{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 500,
  "global_step": 230403,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "grad_norm": 0.6598585247993469,
      "learning_rate": 0.0005999999999721121,
      "loss": 10.3943,
      "step": 1
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6612103581428528,
      "learning_rate": 0.0005999999998884487,
      "loss": 10.3691,
      "step": 2
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5913391709327698,
      "learning_rate": 0.0005999999997490094,
      "loss": 10.3413,
      "step": 3
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5654894709587097,
      "learning_rate": 0.0005999999995537946,
      "loss": 10.3173,
      "step": 4
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6505258679389954,
      "learning_rate": 0.0005999999993028042,
      "loss": 10.2832,
      "step": 5
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7493694424629211,
      "learning_rate": 0.000599999998996038,
      "loss": 10.2409,
      "step": 6
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7680990695953369,
      "learning_rate": 0.0005999999986334962,
      "loss": 10.2266,
      "step": 7
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8414767980575562,
      "learning_rate": 0.0005999999982151787,
      "loss": 10.1742,
      "step": 8
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.027188777923584,
      "learning_rate": 0.0005999999977410855,
      "loss": 10.0745,
      "step": 9
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9435938000679016,
      "learning_rate": 0.0005999999972112169,
      "loss": 10.0635,
      "step": 10
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0193685293197632,
      "learning_rate": 0.0005999999966255724,
      "loss": 9.9666,
      "step": 11
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9588853120803833,
      "learning_rate": 0.0005999999959841522,
      "loss": 9.9409,
      "step": 12
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.086281180381775,
      "learning_rate": 0.0005999999952869564,
      "loss": 9.8376,
      "step": 13
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9215701222419739,
      "learning_rate": 0.0005999999945339849,
      "loss": 9.8307,
      "step": 14
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9918307065963745,
      "learning_rate": 0.0005999999937252379,
      "loss": 9.7638,
      "step": 15
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0389151573181152,
      "learning_rate": 0.0005999999928607151,
      "loss": 9.7027,
      "step": 16
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9174723029136658,
      "learning_rate": 0.0005999999919404167,
      "loss": 9.6849,
      "step": 17
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9601191878318787,
      "learning_rate": 0.0005999999909643426,
      "loss": 9.6191,
      "step": 18
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9423014521598816,
      "learning_rate": 0.0005999999899324929,
      "loss": 9.5591,
      "step": 19
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9709948301315308,
      "learning_rate": 0.0005999999888448674,
      "loss": 9.5038,
      "step": 20
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8601668477058411,
      "learning_rate": 0.0005999999877014663,
      "loss": 9.4834,
      "step": 21
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0100762844085693,
      "learning_rate": 0.0005999999865022896,
      "loss": 9.3787,
      "step": 22
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9448301196098328,
      "learning_rate": 0.0005999999852473372,
      "loss": 9.3652,
      "step": 23
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0423016548156738,
      "learning_rate": 0.0005999999839366092,
      "loss": 9.2837,
      "step": 24
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0264683961868286,
      "learning_rate": 0.0005999999825701055,
      "loss": 9.245,
      "step": 25
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9286175966262817,
      "learning_rate": 0.0005999999811478261,
      "loss": 9.2287,
      "step": 26
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9515063166618347,
      "learning_rate": 0.000599999979669771,
      "loss": 9.128,
      "step": 27
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0779500007629395,
      "learning_rate": 0.0005999999781359403,
      "loss": 9.0627,
      "step": 28
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.065516710281372,
      "learning_rate": 0.000599999976546334,
      "loss": 9.0055,
      "step": 29
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9599165320396423,
      "learning_rate": 0.000599999974900952,
      "loss": 8.9679,
      "step": 30
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0839635133743286,
      "learning_rate": 0.0005999999731997943,
      "loss": 8.8838,
      "step": 31
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.056505560874939,
      "learning_rate": 0.000599999971442861,
      "loss": 8.8472,
      "step": 32
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0371930599212646,
      "learning_rate": 0.0005999999696301519,
      "loss": 8.7885,
      "step": 33
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9491535425186157,
      "learning_rate": 0.0005999999677616674,
      "loss": 8.783,
      "step": 34
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8823418021202087,
      "learning_rate": 0.000599999965837407,
      "loss": 8.7835,
      "step": 35
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9009343981742859,
      "learning_rate": 0.000599999963857371,
      "loss": 8.7026,
      "step": 36
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.962884247303009,
      "learning_rate": 0.0005999999618215595,
      "loss": 8.6296,
      "step": 37
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9905882477760315,
      "learning_rate": 0.0005999999597299722,
      "loss": 8.5626,
      "step": 38
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9840935468673706,
      "learning_rate": 0.0005999999575826093,
      "loss": 8.5115,
      "step": 39
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9249621033668518,
      "learning_rate": 0.0005999999553794707,
      "loss": 8.5445,
      "step": 40
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.029619812965393,
      "learning_rate": 0.0005999999531205564,
      "loss": 8.3769,
      "step": 41
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0050315856933594,
      "learning_rate": 0.0005999999508058666,
      "loss": 8.3455,
      "step": 42
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0113599300384521,
      "learning_rate": 0.000599999948435401,
      "loss": 8.3543,
      "step": 43
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9991005063056946,
      "learning_rate": 0.0005999999460091598,
      "loss": 8.2674,
      "step": 44
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0417594909667969,
      "learning_rate": 0.0005999999435271431,
      "loss": 8.2064,
      "step": 45
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.055893063545227,
      "learning_rate": 0.0005999999409893505,
      "loss": 8.1106,
      "step": 46
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0250859260559082,
      "learning_rate": 0.0005999999383957823,
      "loss": 8.1506,
      "step": 47
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0533592700958252,
      "learning_rate": 0.0005999999357464385,
      "loss": 8.0668,
      "step": 48
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0209156274795532,
      "learning_rate": 0.000599999933041319,
      "loss": 8.0483,
      "step": 49
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0372531414031982,
      "learning_rate": 0.0005999999302804239,
      "loss": 7.9728,
      "step": 50
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0559591054916382,
      "learning_rate": 0.0005999999274637532,
      "loss": 7.9545,
      "step": 51
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.064501404762268,
      "learning_rate": 0.0005999999245913068,
      "loss": 7.8467,
      "step": 52
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0521150827407837,
      "learning_rate": 0.0005999999216630848,
      "loss": 7.8545,
      "step": 53
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9825447201728821,
      "learning_rate": 0.000599999918679087,
      "loss": 7.8253,
      "step": 54
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0372531414031982,
      "learning_rate": 0.0005999999156393138,
      "loss": 7.7737,
      "step": 55
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0567843914031982,
      "learning_rate": 0.0005999999125437647,
      "loss": 7.7471,
      "step": 56
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1001375913619995,
      "learning_rate": 0.0005999999093924401,
      "loss": 7.6144,
      "step": 57
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9409846067428589,
      "learning_rate": 0.0005999999061853398,
      "loss": 7.7032,
      "step": 58
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9859527349472046,
      "learning_rate": 0.0005999999029224638,
      "loss": 7.6407,
      "step": 59
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0107359886169434,
      "learning_rate": 0.0005999998996038123,
      "loss": 7.4747,
      "step": 60
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9767577648162842,
      "learning_rate": 0.000599999896229385,
      "loss": 7.5863,
      "step": 61
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1464706659317017,
      "learning_rate": 0.0005999998927991822,
      "loss": 7.4367,
      "step": 62
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9296828508377075,
      "learning_rate": 0.0005999998893132037,
      "loss": 7.5042,
      "step": 63
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9529527425765991,
      "learning_rate": 0.0005999998857714495,
      "loss": 7.4747,
      "step": 64
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.031238317489624,
      "learning_rate": 0.0005999998821739197,
      "loss": 7.4121,
      "step": 65
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8993680477142334,
      "learning_rate": 0.0005999998785206143,
      "loss": 7.4909,
      "step": 66
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0721580982208252,
      "learning_rate": 0.0005999998748115331,
      "loss": 7.2172,
      "step": 67
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8471489548683167,
      "learning_rate": 0.0005999998710466765,
      "loss": 7.358,
      "step": 68
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0891550779342651,
      "learning_rate": 0.0005999998672260442,
      "loss": 7.2497,
      "step": 69
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9380143284797668,
      "learning_rate": 0.0005999998633496361,
      "loss": 7.4137,
      "step": 70
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9526144862174988,
      "learning_rate": 0.0005999998594174525,
      "loss": 7.2646,
      "step": 71
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1072875261306763,
      "learning_rate": 0.0005999998554294932,
      "loss": 7.1451,
      "step": 72
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9134693145751953,
      "learning_rate": 0.0005999998513857583,
      "loss": 7.2566,
      "step": 73
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9111964702606201,
      "learning_rate": 0.0005999998472862478,
      "loss": 7.1314,
      "step": 74
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9465875625610352,
      "learning_rate": 0.0005999998431309616,
      "loss": 7.0608,
      "step": 75
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9106792211532593,
      "learning_rate": 0.0005999998389198997,
      "loss": 7.0692,
      "step": 76
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9722570776939392,
      "learning_rate": 0.0005999998346530623,
      "loss": 6.9842,
      "step": 77
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8728375434875488,
      "learning_rate": 0.0005999998303304493,
      "loss": 7.0626,
      "step": 78
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8778724670410156,
      "learning_rate": 0.0005999998259520606,
      "loss": 7.0316,
      "step": 79
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.972653865814209,
      "learning_rate": 0.0005999998215178963,
      "loss": 6.8678,
      "step": 80
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.0130351781845093,
      "learning_rate": 0.0005999998170279563,
      "loss": 6.8074,
      "step": 81
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9315195083618164,
      "learning_rate": 0.0005999998124822407,
      "loss": 6.9973,
      "step": 82
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.927353024482727,
      "learning_rate": 0.0005999998078807494,
      "loss": 6.8908,
      "step": 83
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8770304322242737,
      "learning_rate": 0.0005999998032234826,
      "loss": 6.8946,
      "step": 84
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.849486231803894,
      "learning_rate": 0.0005999997985104401,
      "loss": 6.8958,
      "step": 85
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8522434830665588,
      "learning_rate": 0.0005999997937416221,
      "loss": 6.9075,
      "step": 86
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8345043063163757,
      "learning_rate": 0.0005999997889170283,
      "loss": 6.7639,
      "step": 87
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7919420599937439,
      "learning_rate": 0.000599999784036659,
      "loss": 6.9424,
      "step": 88
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9264858365058899,
      "learning_rate": 0.000599999779100514,
      "loss": 6.704,
      "step": 89
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8987029194831848,
      "learning_rate": 0.0005999997741085935,
      "loss": 6.6725,
      "step": 90
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8521320223808289,
      "learning_rate": 0.0005999997690608971,
      "loss": 6.8187,
      "step": 91
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7459168434143066,
      "learning_rate": 0.0005999997639574254,
      "loss": 6.8492,
      "step": 92
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8649438619613647,
      "learning_rate": 0.0005999997587981779,
      "loss": 6.6437,
      "step": 93
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8667361736297607,
      "learning_rate": 0.0005999997535831548,
      "loss": 6.5632,
      "step": 94
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9127094149589539,
      "learning_rate": 0.0005999997483123561,
      "loss": 6.5092,
      "step": 95
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8351328372955322,
      "learning_rate": 0.0005999997429857819,
      "loss": 6.6241,
      "step": 96
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7684814929962158,
      "learning_rate": 0.0005999997376034318,
      "loss": 6.676,
      "step": 97
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8317620754241943,
      "learning_rate": 0.0005999997321653063,
      "loss": 6.4917,
      "step": 98
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7059003114700317,
      "learning_rate": 0.0005999997266714053,
      "loss": 6.5185,
      "step": 99
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.849994957447052,
      "learning_rate": 0.0005999997211217285,
      "loss": 6.491,
      "step": 100
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7043529152870178,
      "learning_rate": 0.0005999997155162762,
      "loss": 6.6964,
      "step": 101
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8318377137184143,
      "learning_rate": 0.0005999997098550482,
      "loss": 6.4772,
      "step": 102
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7279092073440552,
      "learning_rate": 0.0005999997041380445,
      "loss": 6.4495,
      "step": 103
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6518151164054871,
      "learning_rate": 0.0005999996983652653,
      "loss": 6.6566,
      "step": 104
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8711919188499451,
      "learning_rate": 0.0005999996925367105,
      "loss": 6.5632,
      "step": 105
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8647792935371399,
      "learning_rate": 0.0005999996866523801,
      "loss": 6.3735,
      "step": 106
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6094046831130981,
      "learning_rate": 0.0005999996807122741,
      "loss": 6.654,
      "step": 107
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6571348905563354,
      "learning_rate": 0.0005999996747163924,
      "loss": 6.5522,
      "step": 108
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7280174493789673,
      "learning_rate": 0.0005999996686647353,
      "loss": 6.5401,
      "step": 109
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6053960919380188,
      "learning_rate": 0.0005999996625573025,
      "loss": 6.5451,
      "step": 110
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6921632289886475,
      "learning_rate": 0.0005999996563940941,
      "loss": 6.3976,
      "step": 111
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7514722347259521,
      "learning_rate": 0.00059999965017511,
      "loss": 6.342,
      "step": 112
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6617632508277893,
      "learning_rate": 0.0005999996439003504,
      "loss": 6.4492,
      "step": 113
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8846542239189148,
      "learning_rate": 0.0005999996375698151,
      "loss": 6.1731,
      "step": 114
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5812987685203552,
      "learning_rate": 0.0005999996311835044,
      "loss": 6.5065,
      "step": 115
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.4923478662967682,
      "learning_rate": 0.0005999996247414179,
      "loss": 6.5739,
      "step": 116
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6977080702781677,
      "learning_rate": 0.0005999996182435559,
      "loss": 6.1946,
      "step": 117
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6450197100639343,
      "learning_rate": 0.0005999996116899184,
      "loss": 6.3491,
      "step": 118
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1321666240692139,
      "learning_rate": 0.0005999996050805051,
      "loss": 6.6236,
      "step": 119
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5946992635726929,
      "learning_rate": 0.0005999995984153164,
      "loss": 6.465,
      "step": 120
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5655304789543152,
      "learning_rate": 0.000599999591694352,
      "loss": 6.2788,
      "step": 121
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.45994871854782104,
      "learning_rate": 0.0005999995849176121,
      "loss": 6.5311,
      "step": 122
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5516977310180664,
      "learning_rate": 0.0005999995780850965,
      "loss": 6.5221,
      "step": 123
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5784500241279602,
      "learning_rate": 0.0005999995711968054,
      "loss": 6.4657,
      "step": 124
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6322802305221558,
      "learning_rate": 0.0005999995642527388,
      "loss": 6.2564,
      "step": 125
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5629352927207947,
      "learning_rate": 0.0005999995572528964,
      "loss": 6.3483,
      "step": 126
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6105032563209534,
      "learning_rate": 0.0005999995501972786,
      "loss": 6.2387,
      "step": 127
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5480929613113403,
      "learning_rate": 0.0005999995430858851,
      "loss": 6.3626,
      "step": 128
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6170626878738403,
      "learning_rate": 0.0005999995359187161,
      "loss": 6.1241,
      "step": 129
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5544778108596802,
      "learning_rate": 0.0005999995286957716,
      "loss": 6.2758,
      "step": 130
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.630067765712738,
      "learning_rate": 0.0005999995214170514,
      "loss": 6.2802,
      "step": 131
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5922379493713379,
      "learning_rate": 0.0005999995140825556,
      "loss": 6.3123,
      "step": 132
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.727023720741272,
      "learning_rate": 0.0005999995066922843,
      "loss": 6.2257,
      "step": 133
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7470083236694336,
      "learning_rate": 0.0005999994992462374,
      "loss": 6.2482,
      "step": 134
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6260114312171936,
      "learning_rate": 0.000599999491744415,
      "loss": 6.0673,
      "step": 135
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7691227793693542,
      "learning_rate": 0.0005999994841868169,
      "loss": 6.1494,
      "step": 136
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5581669211387634,
      "learning_rate": 0.0005999994765734433,
      "loss": 6.3572,
      "step": 137
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7852990031242371,
      "learning_rate": 0.0005999994689042941,
      "loss": 5.9739,
      "step": 138
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5753302574157715,
      "learning_rate": 0.0005999994611793695,
      "loss": 6.031,
      "step": 139
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7206483483314514,
      "learning_rate": 0.0005999994533986691,
      "loss": 6.073,
      "step": 140
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5503004789352417,
      "learning_rate": 0.0005999994455621933,
      "loss": 6.2838,
      "step": 141
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6881694197654724,
      "learning_rate": 0.0005999994376699418,
      "loss": 5.9602,
      "step": 142
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5312840938568115,
      "learning_rate": 0.0005999994297219149,
      "loss": 6.3527,
      "step": 143
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.46943041682243347,
      "learning_rate": 0.0005999994217181124,
      "loss": 6.3545,
      "step": 144
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5709737539291382,
      "learning_rate": 0.0005999994136585343,
      "loss": 6.0719,
      "step": 145
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.619803786277771,
      "learning_rate": 0.0005999994055431807,
      "loss": 6.2794,
      "step": 146
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5878353118896484,
      "learning_rate": 0.0005999993973720514,
      "loss": 6.0864,
      "step": 147
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5580340623855591,
      "learning_rate": 0.0005999993891451467,
      "loss": 5.9862,
      "step": 148
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6522189378738403,
      "learning_rate": 0.0005999993808624664,
      "loss": 5.9643,
      "step": 149
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6347594857215881,
      "learning_rate": 0.0005999993725240106,
      "loss": 6.0852,
      "step": 150
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5291317105293274,
      "learning_rate": 0.0005999993641297792,
      "loss": 6.1787,
      "step": 151
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6728364825248718,
      "learning_rate": 0.0005999993556797724,
      "loss": 6.1998,
      "step": 152
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.533880889415741,
      "learning_rate": 0.0005999993471739899,
      "loss": 5.9525,
      "step": 153
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5821530222892761,
      "learning_rate": 0.0005999993386124319,
      "loss": 6.049,
      "step": 154
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.532331645488739,
      "learning_rate": 0.0005999993299950982,
      "loss": 5.9889,
      "step": 155
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6450454592704773,
      "learning_rate": 0.0005999993213219892,
      "loss": 5.9803,
      "step": 156
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.47155046463012695,
      "learning_rate": 0.0005999993125931045,
      "loss": 6.2029,
      "step": 157
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.547124445438385,
      "learning_rate": 0.0005999993038084444,
      "loss": 6.1918,
      "step": 158
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5347194075584412,
      "learning_rate": 0.0005999992949680087,
      "loss": 6.1073,
      "step": 159
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5505983829498291,
      "learning_rate": 0.0005999992860717975,
      "loss": 6.1356,
      "step": 160
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5834269523620605,
      "learning_rate": 0.0005999992771198108,
      "loss": 5.8883,
      "step": 161
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6285741925239563,
      "learning_rate": 0.0005999992681120486,
      "loss": 5.8089,
      "step": 162
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5529022216796875,
      "learning_rate": 0.0005999992590485107,
      "loss": 6.2193,
      "step": 163
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5468406677246094,
      "learning_rate": 0.0005999992499291973,
      "loss": 6.2333,
      "step": 164
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5431697964668274,
      "learning_rate": 0.0005999992407541084,
      "loss": 5.8756,
      "step": 165
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6134865283966064,
      "learning_rate": 0.0005999992315232441,
      "loss": 5.8366,
      "step": 166
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.512790858745575,
      "learning_rate": 0.0005999992222366042,
      "loss": 6.1761,
      "step": 167
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5985870361328125,
      "learning_rate": 0.0005999992128941888,
      "loss": 5.9446,
      "step": 168
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.4996299743652344,
      "learning_rate": 0.0005999992034959978,
      "loss": 5.9006,
      "step": 169
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5484021902084351,
      "learning_rate": 0.0005999991940420313,
      "loss": 5.833,
      "step": 170
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5155349969863892,
      "learning_rate": 0.0005999991845322893,
      "loss": 6.2096,
      "step": 171
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5195517539978027,
      "learning_rate": 0.0005999991749667719,
      "loss": 5.8973,
      "step": 172
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5191429853439331,
      "learning_rate": 0.0005999991653454789,
      "loss": 5.9487,
      "step": 173
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5161901116371155,
      "learning_rate": 0.0005999991556684104,
      "loss": 6.0615,
      "step": 174
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5010809898376465,
      "learning_rate": 0.0005999991459355664,
      "loss": 5.9589,
      "step": 175
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5294127464294434,
      "learning_rate": 0.000599999136146947,
      "loss": 6.1201,
      "step": 176
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5608318448066711,
      "learning_rate": 0.000599999126302552,
      "loss": 5.9295,
      "step": 177
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6796548962593079,
      "learning_rate": 0.0005999991164023814,
      "loss": 5.7496,
      "step": 178
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.651563286781311,
      "learning_rate": 0.0005999991064464354,
      "loss": 5.9865,
      "step": 179
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6485670208930969,
      "learning_rate": 0.0005999990964347139,
      "loss": 5.8523,
      "step": 180
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5549309253692627,
      "learning_rate": 0.0005999990863672169,
      "loss": 5.8409,
      "step": 181
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5910934805870056,
      "learning_rate": 0.0005999990762439445,
      "loss": 5.8071,
      "step": 182
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5005125999450684,
      "learning_rate": 0.0005999990660648964,
      "loss": 6.1483,
      "step": 183
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5697265267372131,
      "learning_rate": 0.0005999990558300729,
      "loss": 5.7916,
      "step": 184
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6187750101089478,
      "learning_rate": 0.000599999045539474,
      "loss": 5.6757,
      "step": 185
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5886824727058411,
      "learning_rate": 0.0005999990351930996,
      "loss": 5.8051,
      "step": 186
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6022560596466064,
      "learning_rate": 0.0005999990247909496,
      "loss": 5.743,
      "step": 187
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5228410363197327,
      "learning_rate": 0.0005999990143330243,
      "loss": 5.9529,
      "step": 188
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5995274186134338,
      "learning_rate": 0.0005999990038193234,
      "loss": 5.8258,
      "step": 189
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5654438138008118,
      "learning_rate": 0.000599998993249847,
      "loss": 5.706,
      "step": 190
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6238942742347717,
      "learning_rate": 0.0005999989826245951,
      "loss": 5.6142,
      "step": 191
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.49269378185272217,
      "learning_rate": 0.0005999989719435679,
      "loss": 5.751,
      "step": 192
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.4987429976463318,
      "learning_rate": 0.000599998961206765,
      "loss": 6.0862,
      "step": 193
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5320373177528381,
      "learning_rate": 0.0005999989504141867,
      "loss": 5.808,
      "step": 194
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5114519000053406,
      "learning_rate": 0.000599998939565833,
      "loss": 5.8458,
      "step": 195
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5472970008850098,
      "learning_rate": 0.0005999989286617039,
      "loss": 5.5764,
      "step": 196
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5374953746795654,
      "learning_rate": 0.0005999989177017992,
      "loss": 5.8235,
      "step": 197
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6834536790847778,
      "learning_rate": 0.000599998906686119,
      "loss": 5.6808,
      "step": 198
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6153894662857056,
      "learning_rate": 0.0005999988956146636,
      "loss": 5.6632,
      "step": 199
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6808043122291565,
      "learning_rate": 0.0005999988844874325,
      "loss": 5.7041,
      "step": 200
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.618071973323822,
      "learning_rate": 0.0005999988733044261,
      "loss": 5.5187,
      "step": 201
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6863864660263062,
      "learning_rate": 0.000599998862065644,
      "loss": 5.7587,
      "step": 202
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6017519235610962,
      "learning_rate": 0.0005999988507710866,
      "loss": 5.5211,
      "step": 203
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6277284026145935,
      "learning_rate": 0.0005999988394207537,
      "loss": 5.6438,
      "step": 204
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6990332007408142,
      "learning_rate": 0.0005999988280146455,
      "loss": 5.4815,
      "step": 205
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5716874003410339,
      "learning_rate": 0.0005999988165527618,
      "loss": 5.5884,
      "step": 206
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7090599536895752,
      "learning_rate": 0.0005999988050351026,
      "loss": 5.6824,
      "step": 207
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5974349975585938,
      "learning_rate": 0.000599998793461668,
      "loss": 5.6022,
      "step": 208
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5545417666435242,
      "learning_rate": 0.000599998781832458,
      "loss": 5.3647,
      "step": 209
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.684478759765625,
      "learning_rate": 0.0005999987701474725,
      "loss": 5.4681,
      "step": 210
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5034512877464294,
      "learning_rate": 0.0005999987584067115,
      "loss": 5.5565,
      "step": 211
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5745574831962585,
      "learning_rate": 0.0005999987466101752,
      "loss": 5.7364,
      "step": 212
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6445351839065552,
      "learning_rate": 0.0005999987347578633,
      "loss": 5.4596,
      "step": 213
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5751500725746155,
      "learning_rate": 0.0005999987228497762,
      "loss": 5.5782,
      "step": 214
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6850988268852234,
      "learning_rate": 0.0005999987108859135,
      "loss": 5.4783,
      "step": 215
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.666841983795166,
      "learning_rate": 0.0005999986988662754,
      "loss": 5.3632,
      "step": 216
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6066020131111145,
      "learning_rate": 0.0005999986867908619,
      "loss": 5.4944,
      "step": 217
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6069178581237793,
      "learning_rate": 0.0005999986746596731,
      "loss": 5.7571,
      "step": 218
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5759275555610657,
      "learning_rate": 0.0005999986624727088,
      "loss": 5.5489,
      "step": 219
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7268794178962708,
      "learning_rate": 0.000599998650229969,
      "loss": 5.3506,
      "step": 220
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5577214360237122,
      "learning_rate": 0.0005999986379314539,
      "loss": 5.6068,
      "step": 221
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5385726094245911,
      "learning_rate": 0.0005999986255771632,
      "loss": 5.5362,
      "step": 222
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6918811798095703,
      "learning_rate": 0.0005999986131670973,
      "loss": 5.4784,
      "step": 223
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6033471822738647,
      "learning_rate": 0.0005999986007012559,
      "loss": 5.5448,
      "step": 224
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6563103795051575,
      "learning_rate": 0.0005999985881796392,
      "loss": 5.1666,
      "step": 225
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7446143627166748,
      "learning_rate": 0.000599998575602247,
      "loss": 5.5058,
      "step": 226
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.4986528158187866,
      "learning_rate": 0.0005999985629690795,
      "loss": 5.6093,
      "step": 227
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.518952488899231,
      "learning_rate": 0.0005999985502801365,
      "loss": 5.3663,
      "step": 228
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6506614089012146,
      "learning_rate": 0.0005999985375354181,
      "loss": 5.5655,
      "step": 229
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6193224191665649,
      "learning_rate": 0.0005999985247349244,
      "loss": 5.3474,
      "step": 230
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.4652129113674164,
      "learning_rate": 0.0005999985118786552,
      "loss": 5.5291,
      "step": 231
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6144859194755554,
      "learning_rate": 0.0005999984989666107,
      "loss": 5.3248,
      "step": 232
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6087594032287598,
      "learning_rate": 0.0005999984859987907,
      "loss": 5.5623,
      "step": 233
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5506737232208252,
      "learning_rate": 0.0005999984729751955,
      "loss": 5.4616,
      "step": 234
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5109699368476868,
      "learning_rate": 0.0005999984598958248,
      "loss": 5.5581,
      "step": 235
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5486552715301514,
      "learning_rate": 0.0005999984467606788,
      "loss": 5.1721,
      "step": 236
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6525230407714844,
      "learning_rate": 0.0005999984335697573,
      "loss": 5.5541,
      "step": 237
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.517174243927002,
      "learning_rate": 0.0005999984203230606,
      "loss": 5.3921,
      "step": 238
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5399962663650513,
      "learning_rate": 0.0005999984070205884,
      "loss": 5.4543,
      "step": 239
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6151449680328369,
      "learning_rate": 0.0005999983936623408,
      "loss": 5.4011,
      "step": 240
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.48419317603111267,
      "learning_rate": 0.000599998380248318,
      "loss": 5.6012,
      "step": 241
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5774736404418945,
      "learning_rate": 0.0005999983667785196,
      "loss": 5.468,
      "step": 242
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5309909582138062,
      "learning_rate": 0.000599998353252946,
      "loss": 5.3812,
      "step": 243
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.550531804561615,
      "learning_rate": 0.0005999983396715971,
      "loss": 5.317,
      "step": 244
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5490381717681885,
      "learning_rate": 0.0005999983260344728,
      "loss": 5.4203,
      "step": 245
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6783135533332825,
      "learning_rate": 0.0005999983123415731,
      "loss": 5.2838,
      "step": 246
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5345268249511719,
      "learning_rate": 0.000599998298592898,
      "loss": 5.1149,
      "step": 247
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5725337862968445,
      "learning_rate": 0.0005999982847884477,
      "loss": 5.4448,
      "step": 248
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.68817138671875,
      "learning_rate": 0.0005999982709282219,
      "loss": 5.0489,
      "step": 249
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5322050452232361,
      "learning_rate": 0.0005999982570122209,
      "loss": 5.3954,
      "step": 250
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5169098377227783,
      "learning_rate": 0.0005999982430404445,
      "loss": 5.3187,
      "step": 251
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.4687575101852417,
      "learning_rate": 0.0005999982290128928,
      "loss": 5.549,
      "step": 252
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5794438123703003,
      "learning_rate": 0.0005999982149295657,
      "loss": 5.4128,
      "step": 253
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.4915308952331543,
      "learning_rate": 0.0005999982007904633,
      "loss": 5.3985,
      "step": 254
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6412003636360168,
      "learning_rate": 0.0005999981865955856,
      "loss": 5.0168,
      "step": 255
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.48708075284957886,
      "learning_rate": 0.0005999981723449325,
      "loss": 5.4627,
      "step": 256
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5161681771278381,
      "learning_rate": 0.0005999981580385041,
      "loss": 5.0319,
      "step": 257
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5803518891334534,
      "learning_rate": 0.0005999981436763004,
      "loss": 5.2546,
      "step": 258
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6046208739280701,
      "learning_rate": 0.0005999981292583214,
      "loss": 5.1224,
      "step": 259
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5863410234451294,
      "learning_rate": 0.0005999981147845671,
      "loss": 4.9366,
      "step": 260
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8501752614974976,
      "learning_rate": 0.0005999981002550374,
      "loss": 5.178,
      "step": 261
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.510800302028656,
      "learning_rate": 0.0005999980856697324,
      "loss": 5.1975,
      "step": 262
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6316835284233093,
      "learning_rate": 0.0005999980710286523,
      "loss": 5.6064,
      "step": 263
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5530745983123779,
      "learning_rate": 0.0005999980563317967,
      "loss": 5.3095,
      "step": 264
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6074588894844055,
      "learning_rate": 0.0005999980415791658,
      "loss": 5.371,
      "step": 265
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.4676899313926697,
      "learning_rate": 0.0005999980267707597,
      "loss": 5.6309,
      "step": 266
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6583795547485352,
      "learning_rate": 0.0005999980119065782,
      "loss": 5.3963,
      "step": 267
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5646474957466125,
      "learning_rate": 0.0005999979969866214,
      "loss": 5.2546,
      "step": 268
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.569118082523346,
      "learning_rate": 0.0005999979820108893,
      "loss": 5.2515,
      "step": 269
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5112742185592651,
      "learning_rate": 0.000599997966979382,
      "loss": 5.6041,
      "step": 270
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5401200652122498,
      "learning_rate": 0.0005999979518920994,
      "loss": 5.4926,
      "step": 271
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.664263129234314,
      "learning_rate": 0.0005999979367490415,
      "loss": 5.2047,
      "step": 272
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5247555375099182,
      "learning_rate": 0.0005999979215502083,
      "loss": 5.2927,
      "step": 273
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.4451077878475189,
      "learning_rate": 0.0005999979062955999,
      "loss": 5.3695,
      "step": 274
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6503798365592957,
      "learning_rate": 0.0005999978909852162,
      "loss": 5.1539,
      "step": 275
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5431292653083801,
      "learning_rate": 0.0005999978756190572,
      "loss": 5.4303,
      "step": 276
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.48480987548828125,
      "learning_rate": 0.000599997860197123,
      "loss": 5.143,
      "step": 277
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5256018042564392,
      "learning_rate": 0.0005999978447194133,
      "loss": 5.1244,
      "step": 278
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5803864002227783,
      "learning_rate": 0.0005999978291859285,
      "loss": 4.9829,
      "step": 279
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5797622799873352,
      "learning_rate": 0.0005999978135966685,
      "loss": 5.2996,
      "step": 280
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.583515465259552,
      "learning_rate": 0.0005999977979516333,
      "loss": 5.38,
      "step": 281
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5296593308448792,
      "learning_rate": 0.0005999977822508226,
      "loss": 5.2291,
      "step": 282
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.49422600865364075,
      "learning_rate": 0.0005999977664942368,
      "loss": 5.3024,
      "step": 283
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5592920184135437,
      "learning_rate": 0.0005999977506818758,
      "loss": 5.2494,
      "step": 284
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6210225224494934,
      "learning_rate": 0.0005999977348137395,
      "loss": 5.2375,
      "step": 285
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5075207948684692,
      "learning_rate": 0.0005999977188898279,
      "loss": 5.281,
      "step": 286
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7904847264289856,
      "learning_rate": 0.0005999977029101411,
      "loss": 5.082,
      "step": 287
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5973087549209595,
      "learning_rate": 0.0005999976868746791,
      "loss": 5.3837,
      "step": 288
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6574702262878418,
      "learning_rate": 0.0005999976707834418,
      "loss": 5.1424,
      "step": 289
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5289576649665833,
      "learning_rate": 0.0005999976546364293,
      "loss": 5.1459,
      "step": 290
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5553220510482788,
      "learning_rate": 0.0005999976384336416,
      "loss": 5.2318,
      "step": 291
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5345301032066345,
      "learning_rate": 0.0005999976221750787,
      "loss": 5.2178,
      "step": 292
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6124873757362366,
      "learning_rate": 0.0005999976058607406,
      "loss": 5.0663,
      "step": 293
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5497334003448486,
      "learning_rate": 0.0005999975894906272,
      "loss": 5.1192,
      "step": 294
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5469390153884888,
      "learning_rate": 0.0005999975730647386,
      "loss": 5.2465,
      "step": 295
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7857360243797302,
      "learning_rate": 0.0005999975565830748,
      "loss": 5.1285,
      "step": 296
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6508238911628723,
      "learning_rate": 0.0005999975400456357,
      "loss": 4.6162,
      "step": 297
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5813226699829102,
      "learning_rate": 0.0005999975234524215,
      "loss": 5.2126,
      "step": 298
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5786598920822144,
      "learning_rate": 0.0005999975068034321,
      "loss": 5.3117,
      "step": 299
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5811485648155212,
      "learning_rate": 0.0005999974900986675,
      "loss": 5.0565,
      "step": 300
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6233729124069214,
      "learning_rate": 0.0005999974733381277,
      "loss": 5.0907,
      "step": 301
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5672184824943542,
      "learning_rate": 0.0005999974565218127,
      "loss": 5.2265,
      "step": 302
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5008927583694458,
      "learning_rate": 0.0005999974396497225,
      "loss": 5.3543,
      "step": 303
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5618913769721985,
      "learning_rate": 0.0005999974227218571,
      "loss": 5.0929,
      "step": 304
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6154097318649292,
      "learning_rate": 0.0005999974057382166,
      "loss": 5.1129,
      "step": 305
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5778329372406006,
      "learning_rate": 0.0005999973886988008,
      "loss": 5.474,
      "step": 306
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5737485885620117,
      "learning_rate": 0.0005999973716036098,
      "loss": 5.0034,
      "step": 307
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6262807846069336,
      "learning_rate": 0.0005999973544526437,
      "loss": 5.0073,
      "step": 308
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6267098188400269,
      "learning_rate": 0.0005999973372459024,
      "loss": 5.2483,
      "step": 309
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5537711381912231,
      "learning_rate": 0.0005999973199833859,
      "loss": 5.1585,
      "step": 310
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6709228157997131,
      "learning_rate": 0.0005999973026650944,
      "loss": 5.0372,
      "step": 311
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5624245405197144,
      "learning_rate": 0.0005999972852910277,
      "loss": 4.9478,
      "step": 312
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6722320318222046,
      "learning_rate": 0.0005999972678611857,
      "loss": 5.1258,
      "step": 313
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5983895659446716,
      "learning_rate": 0.0005999972503755686,
      "loss": 5.1215,
      "step": 314
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5284023880958557,
      "learning_rate": 0.0005999972328341764,
      "loss": 5.1325,
      "step": 315
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6462795734405518,
      "learning_rate": 0.0005999972152370091,
      "loss": 5.0386,
      "step": 316
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6079012155532837,
      "learning_rate": 0.0005999971975840665,
      "loss": 5.2896,
      "step": 317
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6083092093467712,
      "learning_rate": 0.0005999971798753488,
      "loss": 4.963,
      "step": 318
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6902062296867371,
      "learning_rate": 0.000599997162110856,
      "loss": 5.0028,
      "step": 319
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6416361927986145,
      "learning_rate": 0.0005999971442905881,
      "loss": 4.9006,
      "step": 320
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6454106569290161,
      "learning_rate": 0.0005999971264145451,
      "loss": 5.0281,
      "step": 321
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6689263582229614,
      "learning_rate": 0.0005999971084827268,
      "loss": 4.9983,
      "step": 322
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6324429512023926,
      "learning_rate": 0.0005999970904951334,
      "loss": 4.9492,
      "step": 323
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5647947788238525,
      "learning_rate": 0.0005999970724517651,
      "loss": 5.268,
      "step": 324
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6545076966285706,
      "learning_rate": 0.0005999970543526215,
      "loss": 5.2321,
      "step": 325
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5033161044120789,
      "learning_rate": 0.0005999970361977027,
      "loss": 5.3302,
      "step": 326
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7206606864929199,
      "learning_rate": 0.000599997017987009,
      "loss": 4.8089,
      "step": 327
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7211560606956482,
      "learning_rate": 0.0005999969997205402,
      "loss": 5.1428,
      "step": 328
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7040310502052307,
      "learning_rate": 0.0005999969813982961,
      "loss": 5.2794,
      "step": 329
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5884260535240173,
      "learning_rate": 0.000599996963020277,
      "loss": 5.0741,
      "step": 330
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6079684495925903,
      "learning_rate": 0.0005999969445864827,
      "loss": 4.9287,
      "step": 331
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7995448708534241,
      "learning_rate": 0.0005999969260969135,
      "loss": 4.8497,
      "step": 332
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5514424443244934,
      "learning_rate": 0.000599996907551569,
      "loss": 5.042,
      "step": 333
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7686806917190552,
      "learning_rate": 0.0005999968889504496,
      "loss": 4.8891,
      "step": 334
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.711251974105835,
      "learning_rate": 0.000599996870293555,
      "loss": 4.7397,
      "step": 335
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5689890384674072,
      "learning_rate": 0.0005999968515808853,
      "loss": 5.1088,
      "step": 336
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7838135361671448,
      "learning_rate": 0.0005999968328124406,
      "loss": 4.8421,
      "step": 337
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7193905711174011,
      "learning_rate": 0.0005999968139882209,
      "loss": 5.0263,
      "step": 338
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6744477152824402,
      "learning_rate": 0.000599996795108226,
      "loss": 4.7864,
      "step": 339
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7814791798591614,
      "learning_rate": 0.000599996776172456,
      "loss": 5.1507,
      "step": 340
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7388622164726257,
      "learning_rate": 0.0005999967571809111,
      "loss": 5.1668,
      "step": 341
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6291711926460266,
      "learning_rate": 0.0005999967381335909,
      "loss": 4.8745,
      "step": 342
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6067249178886414,
      "learning_rate": 0.0005999967190304958,
      "loss": 5.0031,
      "step": 343
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7112147212028503,
      "learning_rate": 0.0005999966998716256,
      "loss": 4.9399,
      "step": 344
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6820663213729858,
      "learning_rate": 0.0005999966806569804,
      "loss": 5.101,
      "step": 345
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5935093760490417,
      "learning_rate": 0.0005999966613865602,
      "loss": 4.6359,
      "step": 346
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6988240480422974,
      "learning_rate": 0.0005999966420603648,
      "loss": 4.8949,
      "step": 347
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6069332361221313,
      "learning_rate": 0.0005999966226783945,
      "loss": 4.9411,
      "step": 348
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8334916234016418,
      "learning_rate": 0.0005999966032406492,
      "loss": 4.9814,
      "step": 349
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6758489608764648,
      "learning_rate": 0.0005999965837471286,
      "loss": 5.0626,
      "step": 350
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6612181663513184,
      "learning_rate": 0.0005999965641978333,
      "loss": 4.9507,
      "step": 351
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7162958383560181,
      "learning_rate": 0.0005999965445927628,
      "loss": 4.7923,
      "step": 352
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6535177826881409,
      "learning_rate": 0.0005999965249319174,
      "loss": 5.2402,
      "step": 353
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8019972443580627,
      "learning_rate": 0.0005999965052152969,
      "loss": 4.8383,
      "step": 354
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6224954724311829,
      "learning_rate": 0.0005999964854429014,
      "loss": 5.0676,
      "step": 355
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6699715256690979,
      "learning_rate": 0.0005999964656147308,
      "loss": 4.9835,
      "step": 356
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6140308380126953,
      "learning_rate": 0.0005999964457307853,
      "loss": 5.1332,
      "step": 357
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6332080364227295,
      "learning_rate": 0.0005999964257910647,
      "loss": 4.8534,
      "step": 358
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7715905904769897,
      "learning_rate": 0.0005999964057955692,
      "loss": 4.8963,
      "step": 359
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.649645209312439,
      "learning_rate": 0.0005999963857442987,
      "loss": 4.9344,
      "step": 360
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7259341478347778,
      "learning_rate": 0.0005999963656372532,
      "loss": 4.63,
      "step": 361
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7096255421638489,
      "learning_rate": 0.0005999963454744327,
      "loss": 5.1338,
      "step": 362
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.738047182559967,
      "learning_rate": 0.0005999963252558371,
      "loss": 5.0942,
      "step": 363
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7350514531135559,
      "learning_rate": 0.0005999963049814666,
      "loss": 4.962,
      "step": 364
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7327950596809387,
      "learning_rate": 0.0005999962846513213,
      "loss": 4.8599,
      "step": 365
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6999970078468323,
      "learning_rate": 0.0005999962642654008,
      "loss": 5.0412,
      "step": 366
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9565870761871338,
      "learning_rate": 0.0005999962438237055,
      "loss": 4.895,
      "step": 367
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7980199456214905,
      "learning_rate": 0.0005999962233262351,
      "loss": 4.8972,
      "step": 368
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6971856355667114,
      "learning_rate": 0.0005999962027729898,
      "loss": 4.6994,
      "step": 369
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.623756468296051,
      "learning_rate": 0.0005999961821639696,
      "loss": 4.8778,
      "step": 370
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6149433851242065,
      "learning_rate": 0.0005999961614991743,
      "loss": 5.0095,
      "step": 371
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7269710302352905,
      "learning_rate": 0.0005999961407786042,
      "loss": 5.0703,
      "step": 372
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6682495474815369,
      "learning_rate": 0.000599996120002259,
      "loss": 4.9221,
      "step": 373
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6614115834236145,
      "learning_rate": 0.000599996099170139,
      "loss": 4.7739,
      "step": 374
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5765252113342285,
      "learning_rate": 0.000599996078282244,
      "loss": 4.8086,
      "step": 375
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7086232900619507,
      "learning_rate": 0.0005999960573385741,
      "loss": 4.8852,
      "step": 376
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8295721411705017,
      "learning_rate": 0.0005999960363391292,
      "loss": 4.6326,
      "step": 377
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.5734622478485107,
      "learning_rate": 0.0005999960152839094,
      "loss": 4.651,
      "step": 378
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7242814302444458,
      "learning_rate": 0.0005999959941729148,
      "loss": 4.8673,
      "step": 379
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7822167277336121,
      "learning_rate": 0.000599995973006145,
      "loss": 4.9329,
      "step": 380
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6343212723731995,
      "learning_rate": 0.0005999959517836005,
      "loss": 4.8902,
      "step": 381
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7329263091087341,
      "learning_rate": 0.0005999959305052811,
      "loss": 4.6968,
      "step": 382
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.7800689935684204,
      "learning_rate": 0.0005999959091711867,
      "loss": 4.657,
      "step": 383
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.6332276463508606,
      "learning_rate": 0.0005999958877813174,
      "loss": 4.8985,
      "step": 384
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7798433899879456,
      "learning_rate": 0.0005999958663356733,
      "loss": 4.5709,
      "step": 385
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6531031131744385,
      "learning_rate": 0.0005999958448342543,
      "loss": 5.1305,
      "step": 386
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7903844118118286,
      "learning_rate": 0.0005999958232770603,
      "loss": 4.8583,
      "step": 387
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.670809268951416,
      "learning_rate": 0.0005999958016640915,
      "loss": 4.8457,
      "step": 388
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7485822439193726,
      "learning_rate": 0.0005999957799953478,
      "loss": 5.071,
      "step": 389
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6287962794303894,
      "learning_rate": 0.0005999957582708292,
      "loss": 4.5694,
      "step": 390
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6717586517333984,
      "learning_rate": 0.0005999957364905357,
      "loss": 4.8713,
      "step": 391
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.768893301486969,
      "learning_rate": 0.0005999957146544674,
      "loss": 5.1366,
      "step": 392
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.850506067276001,
      "learning_rate": 0.0005999956927626243,
      "loss": 4.7447,
      "step": 393
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8041247725486755,
      "learning_rate": 0.0005999956708150062,
      "loss": 4.8131,
      "step": 394
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7948045134544373,
      "learning_rate": 0.0005999956488116132,
      "loss": 5.01,
      "step": 395
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7612506151199341,
      "learning_rate": 0.0005999956267524455,
      "loss": 4.8163,
      "step": 396
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6271497011184692,
      "learning_rate": 0.0005999956046375028,
      "loss": 4.8404,
      "step": 397
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7496037483215332,
      "learning_rate": 0.0005999955824667854,
      "loss": 4.9718,
      "step": 398
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6704515218734741,
      "learning_rate": 0.000599995560240293,
      "loss": 4.9755,
      "step": 399
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8331307172775269,
      "learning_rate": 0.0005999955379580259,
      "loss": 4.7988,
      "step": 400
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6760748028755188,
      "learning_rate": 0.000599995515619984,
      "loss": 4.7869,
      "step": 401
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5770251154899597,
      "learning_rate": 0.0005999954932261671,
      "loss": 4.7883,
      "step": 402
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.642539918422699,
      "learning_rate": 0.0005999954707765755,
      "loss": 4.6686,
      "step": 403
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7028787136077881,
      "learning_rate": 0.000599995448271209,
      "loss": 4.8011,
      "step": 404
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7261245846748352,
      "learning_rate": 0.0005999954257100678,
      "loss": 4.6946,
      "step": 405
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5537574887275696,
      "learning_rate": 0.0005999954030931517,
      "loss": 4.665,
      "step": 406
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7512525320053101,
      "learning_rate": 0.0005999953804204607,
      "loss": 4.8615,
      "step": 407
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6203027367591858,
      "learning_rate": 0.000599995357691995,
      "loss": 4.8343,
      "step": 408
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7878710627555847,
      "learning_rate": 0.0005999953349077546,
      "loss": 4.9273,
      "step": 409
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5291107296943665,
      "learning_rate": 0.0005999953120677392,
      "loss": 5.0822,
      "step": 410
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8786430954933167,
      "learning_rate": 0.0005999952891719493,
      "loss": 4.7314,
      "step": 411
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6895014047622681,
      "learning_rate": 0.0005999952662203844,
      "loss": 4.7556,
      "step": 412
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6502274870872498,
      "learning_rate": 0.0005999952432130447,
      "loss": 4.7272,
      "step": 413
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.792969286441803,
      "learning_rate": 0.0005999952201499302,
      "loss": 4.7406,
      "step": 414
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6938273906707764,
      "learning_rate": 0.000599995197031041,
      "loss": 4.9632,
      "step": 415
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6356906294822693,
      "learning_rate": 0.0005999951738563771,
      "loss": 4.6118,
      "step": 416
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7556868195533752,
      "learning_rate": 0.0005999951506259383,
      "loss": 4.5777,
      "step": 417
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.682034432888031,
      "learning_rate": 0.0005999951273397248,
      "loss": 4.714,
      "step": 418
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6531345248222351,
      "learning_rate": 0.0005999951039977365,
      "loss": 4.8119,
      "step": 419
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.863615870475769,
      "learning_rate": 0.0005999950805999735,
      "loss": 4.8717,
      "step": 420
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7635592222213745,
      "learning_rate": 0.0005999950571464358,
      "loss": 4.582,
      "step": 421
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8945791721343994,
      "learning_rate": 0.0005999950336371232,
      "loss": 4.7149,
      "step": 422
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.703259289264679,
      "learning_rate": 0.0005999950100720361,
      "loss": 4.6831,
      "step": 423
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7808760404586792,
      "learning_rate": 0.000599994986451174,
      "loss": 4.7084,
      "step": 424
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6790940761566162,
      "learning_rate": 0.0005999949627745374,
      "loss": 4.8918,
      "step": 425
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7660654187202454,
      "learning_rate": 0.0005999949390421258,
      "loss": 4.7524,
      "step": 426
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7320963740348816,
      "learning_rate": 0.0005999949152539398,
      "loss": 4.751,
      "step": 427
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8664911985397339,
      "learning_rate": 0.0005999948914099788,
      "loss": 4.7187,
      "step": 428
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7422569394111633,
      "learning_rate": 0.0005999948675102433,
      "loss": 4.6721,
      "step": 429
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8259596824645996,
      "learning_rate": 0.0005999948435547329,
      "loss": 4.9039,
      "step": 430
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7874965071678162,
      "learning_rate": 0.0005999948195434479,
      "loss": 4.5993,
      "step": 431
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7812538146972656,
      "learning_rate": 0.0005999947954763883,
      "loss": 4.6183,
      "step": 432
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8790844082832336,
      "learning_rate": 0.0005999947713535539,
      "loss": 4.6224,
      "step": 433
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6809853911399841,
      "learning_rate": 0.0005999947471749447,
      "loss": 4.8815,
      "step": 434
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7847221493721008,
      "learning_rate": 0.000599994722940561,
      "loss": 4.5929,
      "step": 435
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8024842739105225,
      "learning_rate": 0.0005999946986504025,
      "loss": 4.8808,
      "step": 436
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8715197443962097,
      "learning_rate": 0.0005999946743044693,
      "loss": 4.5713,
      "step": 437
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6194332838058472,
      "learning_rate": 0.0005999946499027616,
      "loss": 4.8703,
      "step": 438
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8924667239189148,
      "learning_rate": 0.0005999946254452791,
      "loss": 4.7013,
      "step": 439
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7645755410194397,
      "learning_rate": 0.0005999946009320219,
      "loss": 4.5969,
      "step": 440
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8842922449111938,
      "learning_rate": 0.0005999945763629902,
      "loss": 4.7211,
      "step": 441
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6709338426589966,
      "learning_rate": 0.0005999945517381837,
      "loss": 4.6137,
      "step": 442
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7006518840789795,
      "learning_rate": 0.0005999945270576026,
      "loss": 4.6363,
      "step": 443
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7254226803779602,
      "learning_rate": 0.0005999945023212468,
      "loss": 4.7022,
      "step": 444
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8744111061096191,
      "learning_rate": 0.0005999944775291165,
      "loss": 4.802,
      "step": 445
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7515599727630615,
      "learning_rate": 0.0005999944526812114,
      "loss": 4.7846,
      "step": 446
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7021082639694214,
      "learning_rate": 0.0005999944277775318,
      "loss": 4.8422,
      "step": 447
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7308422327041626,
      "learning_rate": 0.0005999944028180775,
      "loss": 4.8374,
      "step": 448
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.126850962638855,
      "learning_rate": 0.0005999943778028485,
      "loss": 4.7404,
      "step": 449
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2268168926239014,
      "learning_rate": 0.0005999943527318451,
      "loss": 4.2572,
      "step": 450
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.768912672996521,
      "learning_rate": 0.000599994327605067,
      "loss": 4.7299,
      "step": 451
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8274908065795898,
      "learning_rate": 0.0005999943024225142,
      "loss": 4.6489,
      "step": 452
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7840928435325623,
      "learning_rate": 0.0005999942771841868,
      "loss": 4.492,
      "step": 453
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9391815066337585,
      "learning_rate": 0.0005999942518900848,
      "loss": 4.9184,
      "step": 454
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.82708740234375,
      "learning_rate": 0.0005999942265402083,
      "loss": 4.8594,
      "step": 455
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9273870587348938,
      "learning_rate": 0.0005999942011345572,
      "loss": 4.465,
      "step": 456
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8598252534866333,
      "learning_rate": 0.0005999941756731315,
      "loss": 4.4938,
      "step": 457
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8903417587280273,
      "learning_rate": 0.0005999941501559312,
      "loss": 4.4781,
      "step": 458
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8562648892402649,
      "learning_rate": 0.0005999941245829563,
      "loss": 4.5462,
      "step": 459
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8789373636245728,
      "learning_rate": 0.0005999940989542069,
      "loss": 4.7335,
      "step": 460
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7078028917312622,
      "learning_rate": 0.0005999940732696829,
      "loss": 4.6933,
      "step": 461
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7885638475418091,
      "learning_rate": 0.0005999940475293844,
      "loss": 4.568,
      "step": 462
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7481369376182556,
      "learning_rate": 0.0005999940217333112,
      "loss": 4.7702,
      "step": 463
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8778972625732422,
      "learning_rate": 0.0005999939958814635,
      "loss": 4.5337,
      "step": 464
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.012869954109192,
      "learning_rate": 0.0005999939699738413,
      "loss": 4.2558,
      "step": 465
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9412649273872375,
      "learning_rate": 0.0005999939440104445,
      "loss": 4.5796,
      "step": 466
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7755536437034607,
      "learning_rate": 0.0005999939179912731,
      "loss": 4.5812,
      "step": 467
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8599278330802917,
      "learning_rate": 0.0005999938919163274,
      "loss": 4.4515,
      "step": 468
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8034870028495789,
      "learning_rate": 0.000599993865785607,
      "loss": 4.7243,
      "step": 469
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9666523337364197,
      "learning_rate": 0.0005999938395991122,
      "loss": 4.7991,
      "step": 470
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8418998718261719,
      "learning_rate": 0.0005999938133568428,
      "loss": 4.6558,
      "step": 471
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8761516809463501,
      "learning_rate": 0.0005999937870587988,
      "loss": 4.45,
      "step": 472
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8002101182937622,
      "learning_rate": 0.0005999937607049804,
      "loss": 4.6396,
      "step": 473
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0074703693389893,
      "learning_rate": 0.0005999937342953875,
      "loss": 4.6495,
      "step": 474
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.859565258026123,
      "learning_rate": 0.0005999937078300199,
      "loss": 4.2999,
      "step": 475
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7739835977554321,
      "learning_rate": 0.0005999936813088781,
      "loss": 4.5105,
      "step": 476
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8392801880836487,
      "learning_rate": 0.0005999936547319617,
      "loss": 4.635,
      "step": 477
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7482221722602844,
      "learning_rate": 0.0005999936280992708,
      "loss": 4.9959,
      "step": 478
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8280874490737915,
      "learning_rate": 0.0005999936014108054,
      "loss": 4.7255,
      "step": 479
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8061417937278748,
      "learning_rate": 0.0005999935746665656,
      "loss": 4.7874,
      "step": 480
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8048090934753418,
      "learning_rate": 0.0005999935478665513,
      "loss": 4.5038,
      "step": 481
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7549205422401428,
      "learning_rate": 0.0005999935210107625,
      "loss": 4.655,
      "step": 482
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7529839277267456,
      "learning_rate": 0.0005999934940991993,
      "loss": 4.7623,
      "step": 483
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.01051664352417,
      "learning_rate": 0.0005999934671318617,
      "loss": 4.4377,
      "step": 484
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6610145568847656,
      "learning_rate": 0.0005999934401087495,
      "loss": 4.57,
      "step": 485
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8584699034690857,
      "learning_rate": 0.0005999934130298629,
      "loss": 4.4449,
      "step": 486
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8650259375572205,
      "learning_rate": 0.0005999933858952018,
      "loss": 4.6652,
      "step": 487
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7542089223861694,
      "learning_rate": 0.0005999933587047665,
      "loss": 4.478,
      "step": 488
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6964097619056702,
      "learning_rate": 0.0005999933314585565,
      "loss": 4.7223,
      "step": 489
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8430588245391846,
      "learning_rate": 0.0005999933041565723,
      "loss": 4.5806,
      "step": 490
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0598688125610352,
      "learning_rate": 0.0005999932767988135,
      "loss": 4.1139,
      "step": 491
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8240185976028442,
      "learning_rate": 0.0005999932493852804,
      "loss": 4.7162,
      "step": 492
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8239328861236572,
      "learning_rate": 0.0005999932219159728,
      "loss": 4.6251,
      "step": 493
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9573094248771667,
      "learning_rate": 0.0005999931943908909,
      "loss": 4.531,
      "step": 494
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0714921951293945,
      "learning_rate": 0.0005999931668100346,
      "loss": 4.5291,
      "step": 495
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.772596001625061,
      "learning_rate": 0.0005999931391734039,
      "loss": 4.6671,
      "step": 496
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9610496163368225,
      "learning_rate": 0.0005999931114809987,
      "loss": 4.6762,
      "step": 497
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.095420479774475,
      "learning_rate": 0.0005999930837328192,
      "loss": 4.4161,
      "step": 498
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8929791450500488,
      "learning_rate": 0.0005999930559288653,
      "loss": 4.8105,
      "step": 499
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9331988096237183,
      "learning_rate": 0.0005999930280691371,
      "loss": 4.5048,
      "step": 500
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9967983365058899,
      "learning_rate": 0.0005999930001536344,
      "loss": 4.666,
      "step": 501
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7405862212181091,
      "learning_rate": 0.0005999929721823576,
      "loss": 4.8221,
      "step": 502
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0988017320632935,
      "learning_rate": 0.0005999929441553063,
      "loss": 4.5541,
      "step": 503
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0989478826522827,
      "learning_rate": 0.0005999929160724805,
      "loss": 4.199,
      "step": 504
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8939803242683411,
      "learning_rate": 0.0005999928879338804,
      "loss": 4.5771,
      "step": 505
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9277324080467224,
      "learning_rate": 0.0005999928597395061,
      "loss": 4.653,
      "step": 506
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7759469151496887,
      "learning_rate": 0.0005999928314893574,
      "loss": 4.4609,
      "step": 507
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9509010910987854,
      "learning_rate": 0.0005999928031834343,
      "loss": 4.8576,
      "step": 508
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8633671998977661,
      "learning_rate": 0.000599992774821737,
      "loss": 4.7074,
      "step": 509
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9558639526367188,
      "learning_rate": 0.0005999927464042654,
      "loss": 4.5833,
      "step": 510
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1534693241119385,
      "learning_rate": 0.0005999927179310194,
      "loss": 4.7715,
      "step": 511
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9215481281280518,
      "learning_rate": 0.000599992689401999,
      "loss": 4.6025,
      "step": 512
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9774754047393799,
      "learning_rate": 0.0005999926608172044,
      "loss": 4.5098,
      "step": 513
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8766469955444336,
      "learning_rate": 0.0005999926321766355,
      "loss": 4.2704,
      "step": 514
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7705366611480713,
      "learning_rate": 0.0005999926034802923,
      "loss": 4.8202,
      "step": 515
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8489000201225281,
      "learning_rate": 0.0005999925747281748,
      "loss": 4.4196,
      "step": 516
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8590890169143677,
      "learning_rate": 0.000599992545920283,
      "loss": 4.689,
      "step": 517
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8715841770172119,
      "learning_rate": 0.0005999925170566171,
      "loss": 4.236,
      "step": 518
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9159227013587952,
      "learning_rate": 0.0005999924881371767,
      "loss": 4.5644,
      "step": 519
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7948566675186157,
      "learning_rate": 0.0005999924591619621,
      "loss": 4.6779,
      "step": 520
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1999262571334839,
      "learning_rate": 0.0005999924301309733,
      "loss": 4.2647,
      "step": 521
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8620937466621399,
      "learning_rate": 0.0005999924010442101,
      "loss": 4.5821,
      "step": 522
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.034691333770752,
      "learning_rate": 0.0005999923719016728,
      "loss": 4.4123,
      "step": 523
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8793874979019165,
      "learning_rate": 0.0005999923427033612,
      "loss": 4.2484,
      "step": 524
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8343753218650818,
      "learning_rate": 0.0005999923134492754,
      "loss": 4.6621,
      "step": 525
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0085440874099731,
      "learning_rate": 0.0005999922841394153,
      "loss": 4.5367,
      "step": 526
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9121794700622559,
      "learning_rate": 0.000599992254773781,
      "loss": 4.4148,
      "step": 527
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9690906405448914,
      "learning_rate": 0.0005999922253523724,
      "loss": 4.5281,
      "step": 528
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8809514045715332,
      "learning_rate": 0.0005999921958751897,
      "loss": 4.6877,
      "step": 529
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9151937365531921,
      "learning_rate": 0.0005999921663422328,
      "loss": 4.7929,
      "step": 530
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0568535327911377,
      "learning_rate": 0.0005999921367535016,
      "loss": 4.5583,
      "step": 531
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0048294067382812,
      "learning_rate": 0.0005999921071089962,
      "loss": 4.5676,
      "step": 532
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8333326578140259,
      "learning_rate": 0.0005999920774087166,
      "loss": 4.6442,
      "step": 533
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.952647864818573,
      "learning_rate": 0.0005999920476526629,
      "loss": 4.8914,
      "step": 534
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9694944620132446,
      "learning_rate": 0.000599992017840835,
      "loss": 4.6978,
      "step": 535
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.822319746017456,
      "learning_rate": 0.0005999919879732329,
      "loss": 4.7123,
      "step": 536
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8463312983512878,
      "learning_rate": 0.0005999919580498567,
      "loss": 4.7088,
      "step": 537
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7455915808677673,
      "learning_rate": 0.0005999919280707061,
      "loss": 4.5745,
      "step": 538
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8983517289161682,
      "learning_rate": 0.0005999918980357816,
      "loss": 4.6456,
      "step": 539
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9216125011444092,
      "learning_rate": 0.0005999918679450828,
      "loss": 4.3207,
      "step": 540
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9283474087715149,
      "learning_rate": 0.0005999918377986098,
      "loss": 4.8551,
      "step": 541
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8243228197097778,
      "learning_rate": 0.0005999918075963629,
      "loss": 4.2874,
      "step": 542
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9559061527252197,
      "learning_rate": 0.0005999917773383416,
      "loss": 4.7513,
      "step": 543
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9478698968887329,
      "learning_rate": 0.0005999917470245462,
      "loss": 4.2616,
      "step": 544
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.828840434551239,
      "learning_rate": 0.0005999917166549768,
      "loss": 4.2822,
      "step": 545
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6524748802185059,
      "learning_rate": 0.0005999916862296333,
      "loss": 4.4577,
      "step": 546
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6433447003364563,
      "learning_rate": 0.0005999916557485155,
      "loss": 4.7786,
      "step": 547
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8358259201049805,
      "learning_rate": 0.0005999916252116237,
      "loss": 4.5399,
      "step": 548
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8275769948959351,
      "learning_rate": 0.0005999915946189577,
      "loss": 4.4602,
      "step": 549
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9578084349632263,
      "learning_rate": 0.0005999915639705178,
      "loss": 4.7551,
      "step": 550
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8507407307624817,
      "learning_rate": 0.0005999915332663036,
      "loss": 4.044,
      "step": 551
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6686766743659973,
      "learning_rate": 0.0005999915025063153,
      "loss": 4.2813,
      "step": 552
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8280956149101257,
      "learning_rate": 0.0005999914716905532,
      "loss": 4.2344,
      "step": 553
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9245989918708801,
      "learning_rate": 0.0005999914408190167,
      "loss": 4.3026,
      "step": 554
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8040440678596497,
      "learning_rate": 0.0005999914098917063,
      "loss": 4.4505,
      "step": 555
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.817800760269165,
      "learning_rate": 0.0005999913789086218,
      "loss": 4.6768,
      "step": 556
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9071686267852783,
      "learning_rate": 0.0005999913478697633,
      "loss": 4.2132,
      "step": 557
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.654220461845398,
      "learning_rate": 0.0005999913167751307,
      "loss": 4.796,
      "step": 558
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8951728343963623,
      "learning_rate": 0.000599991285624724,
      "loss": 4.3098,
      "step": 559
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8816576600074768,
      "learning_rate": 0.0005999912544185432,
      "loss": 4.4086,
      "step": 560
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7976811528205872,
      "learning_rate": 0.0005999912231565885,
      "loss": 4.2725,
      "step": 561
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9743579626083374,
      "learning_rate": 0.0005999911918388597,
      "loss": 4.5103,
      "step": 562
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2741212844848633,
      "learning_rate": 0.0005999911604653568,
      "loss": 4.5346,
      "step": 563
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8237372636795044,
      "learning_rate": 0.0005999911290360801,
      "loss": 4.4158,
      "step": 564
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0191340446472168,
      "learning_rate": 0.0005999910975510292,
      "loss": 4.59,
      "step": 565
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.944908857345581,
      "learning_rate": 0.0005999910660102044,
      "loss": 4.434,
      "step": 566
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7892240881919861,
      "learning_rate": 0.0005999910344136056,
      "loss": 4.6364,
      "step": 567
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2377464771270752,
      "learning_rate": 0.0005999910027612328,
      "loss": 4.5701,
      "step": 568
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8924896717071533,
      "learning_rate": 0.0005999909710530859,
      "loss": 4.5177,
      "step": 569
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8839147686958313,
      "learning_rate": 0.000599990939289165,
      "loss": 4.625,
      "step": 570
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9827186465263367,
      "learning_rate": 0.0005999909074694702,
      "loss": 4.6903,
      "step": 571
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8953799605369568,
      "learning_rate": 0.0005999908755940015,
      "loss": 4.4217,
      "step": 572
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7298597097396851,
      "learning_rate": 0.0005999908436627587,
      "loss": 4.6226,
      "step": 573
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8373403549194336,
      "learning_rate": 0.0005999908116757421,
      "loss": 4.3977,
      "step": 574
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.149680256843567,
      "learning_rate": 0.0005999907796329514,
      "loss": 4.2909,
      "step": 575
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8739494681358337,
      "learning_rate": 0.0005999907475343868,
      "loss": 4.4583,
      "step": 576
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1329748630523682,
      "learning_rate": 0.0005999907153800483,
      "loss": 4.054,
      "step": 577
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1000847816467285,
      "learning_rate": 0.0005999906831699359,
      "loss": 4.4739,
      "step": 578
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8742541670799255,
      "learning_rate": 0.0005999906509040493,
      "loss": 4.5128,
      "step": 579
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.910304069519043,
      "learning_rate": 0.0005999906185823891,
      "loss": 4.6226,
      "step": 580
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9549867510795593,
      "learning_rate": 0.0005999905862049548,
      "loss": 4.4167,
      "step": 581
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9427266120910645,
      "learning_rate": 0.0005999905537717466,
      "loss": 4.437,
      "step": 582
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0623327493667603,
      "learning_rate": 0.0005999905212827646,
      "loss": 4.7077,
      "step": 583
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9099586606025696,
      "learning_rate": 0.0005999904887380086,
      "loss": 4.558,
      "step": 584
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7424724698066711,
      "learning_rate": 0.0005999904561374787,
      "loss": 4.5935,
      "step": 585
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9310063123703003,
      "learning_rate": 0.0005999904234811749,
      "loss": 4.8369,
      "step": 586
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5592507123947144,
      "learning_rate": 0.0005999903907690973,
      "loss": 4.5265,
      "step": 587
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8452212810516357,
      "learning_rate": 0.0005999903580012458,
      "loss": 4.6646,
      "step": 588
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1192076206207275,
      "learning_rate": 0.0005999903251776204,
      "loss": 4.2622,
      "step": 589
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1558549404144287,
      "learning_rate": 0.0005999902922982212,
      "loss": 4.3733,
      "step": 590
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1880565881729126,
      "learning_rate": 0.0005999902593630481,
      "loss": 4.5521,
      "step": 591
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8178834915161133,
      "learning_rate": 0.0005999902263721011,
      "loss": 4.6944,
      "step": 592
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.7403278350830078,
      "learning_rate": 0.0005999901933253802,
      "loss": 4.2917,
      "step": 593
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1651467084884644,
      "learning_rate": 0.0005999901602228856,
      "loss": 4.7059,
      "step": 594
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.898216724395752,
      "learning_rate": 0.0005999901270646172,
      "loss": 4.3159,
      "step": 595
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9315022826194763,
      "learning_rate": 0.0005999900938505749,
      "loss": 4.5154,
      "step": 596
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0715879201889038,
      "learning_rate": 0.0005999900605807587,
      "loss": 4.5986,
      "step": 597
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9363990426063538,
      "learning_rate": 0.0005999900272551688,
      "loss": 4.3793,
      "step": 598
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7772135138511658,
      "learning_rate": 0.000599989993873805,
      "loss": 4.5298,
      "step": 599
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.387259840965271,
      "learning_rate": 0.0005999899604366675,
      "loss": 4.5107,
      "step": 600
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5348929166793823,
      "learning_rate": 0.0005999899269437561,
      "loss": 4.397,
      "step": 601
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1433173418045044,
      "learning_rate": 0.000599989893395071,
      "loss": 4.1637,
      "step": 602
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8495518565177917,
      "learning_rate": 0.0005999898597906121,
      "loss": 4.503,
      "step": 603
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.14055597782135,
      "learning_rate": 0.0005999898261303794,
      "loss": 4.488,
      "step": 604
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8774970769882202,
      "learning_rate": 0.0005999897924143729,
      "loss": 4.4486,
      "step": 605
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1274988651275635,
      "learning_rate": 0.0005999897586425926,
      "loss": 4.368,
      "step": 606
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9563073515892029,
      "learning_rate": 0.0005999897248150387,
      "loss": 4.7483,
      "step": 607
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8305675983428955,
      "learning_rate": 0.0005999896909317109,
      "loss": 4.4706,
      "step": 608
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0186108350753784,
      "learning_rate": 0.0005999896569926094,
      "loss": 4.3457,
      "step": 609
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9694738388061523,
      "learning_rate": 0.0005999896229977342,
      "loss": 4.2014,
      "step": 610
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1248701810836792,
      "learning_rate": 0.0005999895889470852,
      "loss": 4.1196,
      "step": 611
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9749529957771301,
      "learning_rate": 0.0005999895548406624,
      "loss": 4.5228,
      "step": 612
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9965658783912659,
      "learning_rate": 0.000599989520678466,
      "loss": 4.2197,
      "step": 613
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8293508887290955,
      "learning_rate": 0.000599989486460496,
      "loss": 4.3965,
      "step": 614
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2947657108306885,
      "learning_rate": 0.0005999894521867521,
      "loss": 4.6129,
      "step": 615
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9965646266937256,
      "learning_rate": 0.0005999894178572346,
      "loss": 4.3095,
      "step": 616
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8920789361000061,
      "learning_rate": 0.0005999893834719433,
      "loss": 4.4681,
      "step": 617
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8873271942138672,
      "learning_rate": 0.0005999893490308784,
      "loss": 4.5998,
      "step": 618
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8986464142799377,
      "learning_rate": 0.0005999893145340398,
      "loss": 4.4147,
      "step": 619
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2024760246276855,
      "learning_rate": 0.0005999892799814275,
      "loss": 4.3534,
      "step": 620
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3723316192626953,
      "learning_rate": 0.0005999892453730415,
      "loss": 4.4072,
      "step": 621
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1817477941513062,
      "learning_rate": 0.000599989210708882,
      "loss": 4.2654,
      "step": 622
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9550788998603821,
      "learning_rate": 0.0005999891759889487,
      "loss": 4.4278,
      "step": 623
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3096418380737305,
      "learning_rate": 0.0005999891412132417,
      "loss": 4.1389,
      "step": 624
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.587257742881775,
      "learning_rate": 0.0005999891063817611,
      "loss": 4.3926,
      "step": 625
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8372544050216675,
      "learning_rate": 0.0005999890714945069,
      "loss": 4.3046,
      "step": 626
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8815043568611145,
      "learning_rate": 0.0005999890365514791,
      "loss": 4.3624,
      "step": 627
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.289283275604248,
      "learning_rate": 0.0005999890015526776,
      "loss": 4.1326,
      "step": 628
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9448599815368652,
      "learning_rate": 0.0005999889664981026,
      "loss": 4.3937,
      "step": 629
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.861036479473114,
      "learning_rate": 0.0005999889313877539,
      "loss": 4.1466,
      "step": 630
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9138443470001221,
      "learning_rate": 0.0005999888962216316,
      "loss": 4.3883,
      "step": 631
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8633536100387573,
      "learning_rate": 0.0005999888609997357,
      "loss": 4.4679,
      "step": 632
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0337368249893188,
      "learning_rate": 0.0005999888257220662,
      "loss": 4.6333,
      "step": 633
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.754109263420105,
      "learning_rate": 0.0005999887903886231,
      "loss": 4.3199,
      "step": 634
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8602330684661865,
      "learning_rate": 0.0005999887549994065,
      "loss": 4.4704,
      "step": 635
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8386890888214111,
      "learning_rate": 0.0005999887195544161,
      "loss": 4.483,
      "step": 636
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3159222602844238,
      "learning_rate": 0.0005999886840536523,
      "loss": 4.1177,
      "step": 637
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9933037757873535,
      "learning_rate": 0.000599988648497115,
      "loss": 4.4172,
      "step": 638
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.833453357219696,
      "learning_rate": 0.0005999886128848041,
      "loss": 4.3278,
      "step": 639
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1272623538970947,
      "learning_rate": 0.0005999885772167197,
      "loss": 4.6927,
      "step": 640
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8798514008522034,
      "learning_rate": 0.0005999885414928617,
      "loss": 4.2854,
      "step": 641
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1554378271102905,
      "learning_rate": 0.0005999885057132303,
      "loss": 4.303,
      "step": 642
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5103455781936646,
      "learning_rate": 0.0005999884698778252,
      "loss": 4.3119,
      "step": 643
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.179171085357666,
      "learning_rate": 0.0005999884339866465,
      "loss": 4.3753,
      "step": 644
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9111053943634033,
      "learning_rate": 0.0005999883980396945,
      "loss": 4.39,
      "step": 645
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0399707555770874,
      "learning_rate": 0.000599988362036969,
      "loss": 4.6522,
      "step": 646
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9585803151130676,
      "learning_rate": 0.0005999883259784698,
      "loss": 4.2814,
      "step": 647
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0470560789108276,
      "learning_rate": 0.0005999882898641973,
      "loss": 4.215,
      "step": 648
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9134689569473267,
      "learning_rate": 0.0005999882536941512,
      "loss": 4.4672,
      "step": 649
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9192173480987549,
      "learning_rate": 0.0005999882174683316,
      "loss": 4.4695,
      "step": 650
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9533227682113647,
      "learning_rate": 0.0005999881811867387,
      "loss": 4.3705,
      "step": 651
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9995131492614746,
      "learning_rate": 0.0005999881448493721,
      "loss": 4.4385,
      "step": 652
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9615848660469055,
      "learning_rate": 0.0005999881084562321,
      "loss": 4.3962,
      "step": 653
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1020441055297852,
      "learning_rate": 0.0005999880720073188,
      "loss": 4.448,
      "step": 654
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8898390531539917,
      "learning_rate": 0.000599988035502632,
      "loss": 4.206,
      "step": 655
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.16994047164917,
      "learning_rate": 0.0005999879989421717,
      "loss": 4.3617,
      "step": 656
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.101820707321167,
      "learning_rate": 0.000599987962325938,
      "loss": 4.5828,
      "step": 657
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8774620294570923,
      "learning_rate": 0.0005999879256539309,
      "loss": 4.4141,
      "step": 658
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0143343210220337,
      "learning_rate": 0.0005999878889261503,
      "loss": 4.1784,
      "step": 659
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8544144630432129,
      "learning_rate": 0.0005999878521425964,
      "loss": 4.5309,
      "step": 660
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9096851348876953,
      "learning_rate": 0.000599987815303269,
      "loss": 4.4141,
      "step": 661
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9010453820228577,
      "learning_rate": 0.0005999877784081682,
      "loss": 4.2757,
      "step": 662
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8772140741348267,
      "learning_rate": 0.0005999877414572941,
      "loss": 4.4615,
      "step": 663
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.052130937576294,
      "learning_rate": 0.0005999877044506464,
      "loss": 4.3346,
      "step": 664
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9526869654655457,
      "learning_rate": 0.0005999876673882256,
      "loss": 4.3092,
      "step": 665
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8971355557441711,
      "learning_rate": 0.0005999876302700313,
      "loss": 4.3052,
      "step": 666
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9597886204719543,
      "learning_rate": 0.0005999875930960638,
      "loss": 4.5195,
      "step": 667
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9276748895645142,
      "learning_rate": 0.0005999875558663227,
      "loss": 4.3147,
      "step": 668
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1769193410873413,
      "learning_rate": 0.0005999875185808082,
      "loss": 4.5093,
      "step": 669
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4679960012435913,
      "learning_rate": 0.0005999874812395206,
      "loss": 4.4693,
      "step": 670
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0024752616882324,
      "learning_rate": 0.0005999874438424596,
      "loss": 4.3356,
      "step": 671
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9065581560134888,
      "learning_rate": 0.0005999874063896253,
      "loss": 4.284,
      "step": 672
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0658379793167114,
      "learning_rate": 0.0005999873688810176,
      "loss": 4.2642,
      "step": 673
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0444165468215942,
      "learning_rate": 0.0005999873313166365,
      "loss": 4.3831,
      "step": 674
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7910222411155701,
      "learning_rate": 0.0005999872936964822,
      "loss": 4.1849,
      "step": 675
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.138417363166809,
      "learning_rate": 0.0005999872560205547,
      "loss": 4.1853,
      "step": 676
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.288425087928772,
      "learning_rate": 0.0005999872182888537,
      "loss": 4.2301,
      "step": 677
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0904042720794678,
      "learning_rate": 0.0005999871805013795,
      "loss": 4.1594,
      "step": 678
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1239700317382812,
      "learning_rate": 0.0005999871426581321,
      "loss": 4.0785,
      "step": 679
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0132896900177002,
      "learning_rate": 0.0005999871047591114,
      "loss": 4.4227,
      "step": 680
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.345286250114441,
      "learning_rate": 0.0005999870668043174,
      "loss": 4.2211,
      "step": 681
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2029772996902466,
      "learning_rate": 0.00059998702879375,
      "loss": 4.3017,
      "step": 682
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8853662014007568,
      "learning_rate": 0.0005999869907274096,
      "loss": 4.0874,
      "step": 683
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9065176248550415,
      "learning_rate": 0.0005999869526052958,
      "loss": 4.3391,
      "step": 684
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2398664951324463,
      "learning_rate": 0.0005999869144274088,
      "loss": 4.2919,
      "step": 685
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9283202886581421,
      "learning_rate": 0.0005999868761937486,
      "loss": 4.2774,
      "step": 686
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9797019362449646,
      "learning_rate": 0.0005999868379043151,
      "loss": 4.6241,
      "step": 687
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.420243501663208,
      "learning_rate": 0.0005999867995591085,
      "loss": 4.0923,
      "step": 688
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8636838793754578,
      "learning_rate": 0.0005999867611581287,
      "loss": 4.2233,
      "step": 689
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9818089008331299,
      "learning_rate": 0.0005999867227013756,
      "loss": 4.3958,
      "step": 690
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.971804141998291,
      "learning_rate": 0.0005999866841888493,
      "loss": 4.3903,
      "step": 691
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0784728527069092,
      "learning_rate": 0.0005999866456205498,
      "loss": 4.1269,
      "step": 692
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8968290686607361,
      "learning_rate": 0.0005999866069964772,
      "loss": 4.3333,
      "step": 693
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0867565870285034,
      "learning_rate": 0.0005999865683166314,
      "loss": 4.2118,
      "step": 694
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9423485994338989,
      "learning_rate": 0.0005999865295810125,
      "loss": 4.5445,
      "step": 695
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9387860894203186,
      "learning_rate": 0.0005999864907896203,
      "loss": 4.2796,
      "step": 696
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0095558166503906,
      "learning_rate": 0.0005999864519424551,
      "loss": 4.3857,
      "step": 697
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4069997072219849,
      "learning_rate": 0.0005999864130395166,
      "loss": 4.0665,
      "step": 698
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9447919130325317,
      "learning_rate": 0.000599986374080805,
      "loss": 4.2326,
      "step": 699
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.449066162109375,
      "learning_rate": 0.0005999863350663202,
      "loss": 4.173,
      "step": 700
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8422912359237671,
      "learning_rate": 0.0005999862959960626,
      "loss": 4.2125,
      "step": 701
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1337933540344238,
      "learning_rate": 0.0005999862568700316,
      "loss": 4.2678,
      "step": 702
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3317394256591797,
      "learning_rate": 0.0005999862176882275,
      "loss": 4.2908,
      "step": 703
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.348730444908142,
      "learning_rate": 0.0005999861784506504,
      "loss": 4.6389,
      "step": 704
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0641939640045166,
      "learning_rate": 0.0005999861391573002,
      "loss": 4.3873,
      "step": 705
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1461834907531738,
      "learning_rate": 0.0005999860998081769,
      "loss": 4.2761,
      "step": 706
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4823085069656372,
      "learning_rate": 0.0005999860604032805,
      "loss": 4.3305,
      "step": 707
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3253448009490967,
      "learning_rate": 0.000599986020942611,
      "loss": 4.4659,
      "step": 708
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1989637613296509,
      "learning_rate": 0.0005999859814261685,
      "loss": 4.1665,
      "step": 709
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4824997186660767,
      "learning_rate": 0.0005999859418539528,
      "loss": 4.1268,
      "step": 710
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1365575790405273,
      "learning_rate": 0.0005999859022259642,
      "loss": 4.2346,
      "step": 711
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3520772457122803,
      "learning_rate": 0.0005999858625422026,
      "loss": 4.226,
      "step": 712
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0677931308746338,
      "learning_rate": 0.0005999858228026678,
      "loss": 4.5178,
      "step": 713
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1939613819122314,
      "learning_rate": 0.0005999857830073602,
      "loss": 4.3965,
      "step": 714
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.861965000629425,
      "learning_rate": 0.0005999857431562793,
      "loss": 4.3714,
      "step": 715
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0037181377410889,
      "learning_rate": 0.0005999857032494255,
      "loss": 4.4572,
      "step": 716
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9748342633247375,
      "learning_rate": 0.0005999856632867988,
      "loss": 4.5517,
      "step": 717
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0315260887145996,
      "learning_rate": 0.0005999856232683991,
      "loss": 4.1767,
      "step": 718
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4887491464614868,
      "learning_rate": 0.0005999855831942262,
      "loss": 4.2695,
      "step": 719
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0197396278381348,
      "learning_rate": 0.0005999855430642804,
      "loss": 4.3647,
      "step": 720
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1082388162612915,
      "learning_rate": 0.0005999855028785617,
      "loss": 4.3249,
      "step": 721
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0022088289260864,
      "learning_rate": 0.0005999854626370701,
      "loss": 4.3465,
      "step": 722
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2216880321502686,
      "learning_rate": 0.0005999854223398054,
      "loss": 3.9983,
      "step": 723
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.492913007736206,
      "learning_rate": 0.0005999853819867678,
      "loss": 4.2756,
      "step": 724
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.204506516456604,
      "learning_rate": 0.0005999853415779573,
      "loss": 4.4456,
      "step": 725
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2984172105789185,
      "learning_rate": 0.0005999853011133739,
      "loss": 4.3832,
      "step": 726
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5266731977462769,
      "learning_rate": 0.0005999852605930174,
      "loss": 4.3144,
      "step": 727
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1553086042404175,
      "learning_rate": 0.000599985220016888,
      "loss": 4.4712,
      "step": 728
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4719105958938599,
      "learning_rate": 0.0005999851793849857,
      "loss": 4.0635,
      "step": 729
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2946734428405762,
      "learning_rate": 0.0005999851386973105,
      "loss": 4.329,
      "step": 730
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2395011186599731,
      "learning_rate": 0.0005999850979538625,
      "loss": 4.3473,
      "step": 731
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3514631986618042,
      "learning_rate": 0.0005999850571546415,
      "loss": 4.2455,
      "step": 732
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0079611539840698,
      "learning_rate": 0.0005999850162996478,
      "loss": 4.4164,
      "step": 733
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0399683713912964,
      "learning_rate": 0.000599984975388881,
      "loss": 4.2344,
      "step": 734
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0057166814804077,
      "learning_rate": 0.0005999849344223415,
      "loss": 4.5382,
      "step": 735
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4228826761245728,
      "learning_rate": 0.000599984893400029,
      "loss": 4.0883,
      "step": 736
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0592870712280273,
      "learning_rate": 0.0005999848523219436,
      "loss": 4.1859,
      "step": 737
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0148169994354248,
      "learning_rate": 0.0005999848111880856,
      "loss": 4.4304,
      "step": 738
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0621085166931152,
      "learning_rate": 0.0005999847699984546,
      "loss": 4.1018,
      "step": 739
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9836688041687012,
      "learning_rate": 0.0005999847287530507,
      "loss": 4.3181,
      "step": 740
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.881971538066864,
      "learning_rate": 0.0005999846874518741,
      "loss": 4.3739,
      "step": 741
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0681785345077515,
      "learning_rate": 0.0005999846460949247,
      "loss": 3.9595,
      "step": 742
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1353563070297241,
      "learning_rate": 0.0005999846046822023,
      "loss": 4.0144,
      "step": 743
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9638946652412415,
      "learning_rate": 0.0005999845632137072,
      "loss": 4.3318,
      "step": 744
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0910060405731201,
      "learning_rate": 0.0005999845216894394,
      "loss": 4.4992,
      "step": 745
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0795600414276123,
      "learning_rate": 0.0005999844801093988,
      "loss": 4.243,
      "step": 746
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.02906334400177,
      "learning_rate": 0.0005999844384735854,
      "loss": 4.4777,
      "step": 747
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3440594673156738,
      "learning_rate": 0.0005999843967819992,
      "loss": 4.3472,
      "step": 748
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5304569005966187,
      "learning_rate": 0.0005999843550346402,
      "loss": 4.3508,
      "step": 749
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1025817394256592,
      "learning_rate": 0.0005999843132315085,
      "loss": 4.3943,
      "step": 750
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3203904628753662,
      "learning_rate": 0.000599984271372604,
      "loss": 4.1751,
      "step": 751
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2325094938278198,
      "learning_rate": 0.0005999842294579269,
      "loss": 4.4367,
      "step": 752
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.095508337020874,
      "learning_rate": 0.000599984187487477,
      "loss": 4.2183,
      "step": 753
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9449789524078369,
      "learning_rate": 0.0005999841454612542,
      "loss": 4.38,
      "step": 754
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9775968790054321,
      "learning_rate": 0.0005999841033792588,
      "loss": 4.0762,
      "step": 755
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1247162818908691,
      "learning_rate": 0.0005999840612414907,
      "loss": 4.123,
      "step": 756
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1377472877502441,
      "learning_rate": 0.0005999840190479501,
      "loss": 3.9843,
      "step": 757
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.252478003501892,
      "learning_rate": 0.0005999839767986367,
      "loss": 4.289,
      "step": 758
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.998268723487854,
      "learning_rate": 0.0005999839344935505,
      "loss": 4.5308,
      "step": 759
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.234990119934082,
      "learning_rate": 0.0005999838921326916,
      "loss": 4.3367,
      "step": 760
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2961374521255493,
      "learning_rate": 0.0005999838497160601,
      "loss": 4.3149,
      "step": 761
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4913946390151978,
      "learning_rate": 0.000599983807243656,
      "loss": 4.1385,
      "step": 762
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5849907398223877,
      "learning_rate": 0.0005999837647154792,
      "loss": 4.013,
      "step": 763
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0236458778381348,
      "learning_rate": 0.0005999837221315297,
      "loss": 4.4926,
      "step": 764
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2731454372406006,
      "learning_rate": 0.0005999836794918076,
      "loss": 4.4367,
      "step": 765
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2701852321624756,
      "learning_rate": 0.000599983636796313,
      "loss": 4.2433,
      "step": 766
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3676085472106934,
      "learning_rate": 0.0005999835940450456,
      "loss": 4.3022,
      "step": 767
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6721678972244263,
      "learning_rate": 0.0005999835512380057,
      "loss": 3.9803,
      "step": 768
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5582524538040161,
      "learning_rate": 0.0005999835083751931,
      "loss": 4.4155,
      "step": 769
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2175053358078003,
      "learning_rate": 0.0005999834654566079,
      "loss": 4.0621,
      "step": 770
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0694634914398193,
      "learning_rate": 0.0005999834224822503,
      "loss": 3.9229,
      "step": 771
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1561070680618286,
      "learning_rate": 0.0005999833794521199,
      "loss": 3.9753,
      "step": 772
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2698801755905151,
      "learning_rate": 0.000599983336366217,
      "loss": 4.2742,
      "step": 773
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4201995134353638,
      "learning_rate": 0.0005999832932245415,
      "loss": 4.3513,
      "step": 774
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5106213092803955,
      "learning_rate": 0.0005999832500270935,
      "loss": 4.2179,
      "step": 775
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8888958692550659,
      "learning_rate": 0.0005999832067738729,
      "loss": 4.2204,
      "step": 776
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0225588083267212,
      "learning_rate": 0.0005999831634648799,
      "loss": 4.2348,
      "step": 777
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4119391441345215,
      "learning_rate": 0.0005999831201001142,
      "loss": 4.0873,
      "step": 778
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1109387874603271,
      "learning_rate": 0.0005999830766795761,
      "loss": 4.1662,
      "step": 779
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3385168313980103,
      "learning_rate": 0.0005999830332032653,
      "loss": 4.2947,
      "step": 780
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1416734457015991,
      "learning_rate": 0.0005999829896711821,
      "loss": 4.2758,
      "step": 781
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0696049928665161,
      "learning_rate": 0.0005999829460833265,
      "loss": 4.5435,
      "step": 782
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.364275574684143,
      "learning_rate": 0.0005999829024396984,
      "loss": 4.156,
      "step": 783
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0516586303710938,
      "learning_rate": 0.0005999828587402976,
      "loss": 4.2709,
      "step": 784
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3048981428146362,
      "learning_rate": 0.0005999828149851245,
      "loss": 4.0658,
      "step": 785
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3682096004486084,
      "learning_rate": 0.0005999827711741789,
      "loss": 4.1952,
      "step": 786
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0293742418289185,
      "learning_rate": 0.0005999827273074609,
      "loss": 4.0448,
      "step": 787
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1942517757415771,
      "learning_rate": 0.0005999826833849703,
      "loss": 4.1502,
      "step": 788
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3155136108398438,
      "learning_rate": 0.0005999826394067074,
      "loss": 4.1687,
      "step": 789
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1810914278030396,
      "learning_rate": 0.0005999825953726719,
      "loss": 4.195,
      "step": 790
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4874335527420044,
      "learning_rate": 0.0005999825512828641,
      "loss": 4.0544,
      "step": 791
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3079533576965332,
      "learning_rate": 0.0005999825071372839,
      "loss": 4.0347,
      "step": 792
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1060888767242432,
      "learning_rate": 0.0005999824629359313,
      "loss": 4.2205,
      "step": 793
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3484710454940796,
      "learning_rate": 0.0005999824186788062,
      "loss": 4.0384,
      "step": 794
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.148471713066101,
      "learning_rate": 0.0005999823743659088,
      "loss": 4.3364,
      "step": 795
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.34870445728302,
      "learning_rate": 0.0005999823299972389,
      "loss": 4.106,
      "step": 796
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2965104579925537,
      "learning_rate": 0.0005999822855727967,
      "loss": 4.1501,
      "step": 797
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4675357341766357,
      "learning_rate": 0.0005999822410925821,
      "loss": 4.146,
      "step": 798
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4928306341171265,
      "learning_rate": 0.0005999821965565952,
      "loss": 4.1709,
      "step": 799
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1669402122497559,
      "learning_rate": 0.0005999821519648359,
      "loss": 4.1344,
      "step": 800
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1587882041931152,
      "learning_rate": 0.0005999821073173042,
      "loss": 4.4461,
      "step": 801
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3657629489898682,
      "learning_rate": 0.0005999820626140002,
      "loss": 4.4775,
      "step": 802
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8838892579078674,
      "learning_rate": 0.0005999820178549239,
      "loss": 4.2073,
      "step": 803
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1085094213485718,
      "learning_rate": 0.0005999819730400754,
      "loss": 4.3236,
      "step": 804
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3050603866577148,
      "learning_rate": 0.0005999819281694544,
      "loss": 4.2342,
      "step": 805
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.108824610710144,
      "learning_rate": 0.0005999818832430611,
      "loss": 4.0631,
      "step": 806
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2679710388183594,
      "learning_rate": 0.0005999818382608956,
      "loss": 3.9575,
      "step": 807
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4764665365219116,
      "learning_rate": 0.0005999817932229578,
      "loss": 4.309,
      "step": 808
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3238970041275024,
      "learning_rate": 0.0005999817481292478,
      "loss": 4.3317,
      "step": 809
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2872687578201294,
      "learning_rate": 0.0005999817029797653,
      "loss": 4.3625,
      "step": 810
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.8848921060562134,
      "learning_rate": 0.0005999816577745107,
      "loss": 4.4058,
      "step": 811
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4029192924499512,
      "learning_rate": 0.0005999816125134838,
      "loss": 4.2208,
      "step": 812
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2394931316375732,
      "learning_rate": 0.0005999815671966847,
      "loss": 3.7166,
      "step": 813
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2435065507888794,
      "learning_rate": 0.0005999815218241133,
      "loss": 3.952,
      "step": 814
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3792634010314941,
      "learning_rate": 0.0005999814763957697,
      "loss": 3.9499,
      "step": 815
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9710925817489624,
      "learning_rate": 0.0005999814309116539,
      "loss": 4.2584,
      "step": 816
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.578568458557129,
      "learning_rate": 0.0005999813853717659,
      "loss": 4.3213,
      "step": 817
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3893029689788818,
      "learning_rate": 0.0005999813397761057,
      "loss": 4.0962,
      "step": 818
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9883338212966919,
      "learning_rate": 0.0005999812941246733,
      "loss": 3.6111,
      "step": 819
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.134035587310791,
      "learning_rate": 0.0005999812484174686,
      "loss": 4.4089,
      "step": 820
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1618579626083374,
      "learning_rate": 0.0005999812026544919,
      "loss": 4.2607,
      "step": 821
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8752887845039368,
      "learning_rate": 0.0005999811568357429,
      "loss": 4.2832,
      "step": 822
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.7742079496383667,
      "learning_rate": 0.0005999811109612218,
      "loss": 4.1074,
      "step": 823
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.8289721012115479,
      "learning_rate": 0.0005999810650309286,
      "loss": 4.1495,
      "step": 824
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0910427570343018,
      "learning_rate": 0.0005999810190448632,
      "loss": 3.8162,
      "step": 825
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6023950576782227,
      "learning_rate": 0.0005999809730030257,
      "loss": 3.7807,
      "step": 826
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.19118070602417,
      "learning_rate": 0.000599980926905416,
      "loss": 4.3939,
      "step": 827
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9561187624931335,
      "learning_rate": 0.0005999808807520342,
      "loss": 4.3528,
      "step": 828
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.644852876663208,
      "learning_rate": 0.0005999808345428804,
      "loss": 4.0047,
      "step": 829
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.7747362852096558,
      "learning_rate": 0.0005999807882779544,
      "loss": 4.1544,
      "step": 830
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0067683458328247,
      "learning_rate": 0.0005999807419572562,
      "loss": 4.2489,
      "step": 831
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1256729364395142,
      "learning_rate": 0.0005999806955807861,
      "loss": 4.4019,
      "step": 832
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1265594959259033,
      "learning_rate": 0.0005999806491485439,
      "loss": 4.1668,
      "step": 833
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9816911220550537,
      "learning_rate": 0.0005999806026605295,
      "loss": 4.1436,
      "step": 834
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5315842628479004,
      "learning_rate": 0.0005999805561167432,
      "loss": 3.9886,
      "step": 835
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0916097164154053,
      "learning_rate": 0.0005999805095171848,
      "loss": 3.9848,
      "step": 836
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9963884353637695,
      "learning_rate": 0.0005999804628618544,
      "loss": 4.058,
      "step": 837
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9908468127250671,
      "learning_rate": 0.0005999804161507519,
      "loss": 4.1894,
      "step": 838
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1506688594818115,
      "learning_rate": 0.0005999803693838774,
      "loss": 4.3246,
      "step": 839
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9362369775772095,
      "learning_rate": 0.0005999803225612308,
      "loss": 4.2563,
      "step": 840
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3325568437576294,
      "learning_rate": 0.0005999802756828124,
      "loss": 4.2036,
      "step": 841
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1762089729309082,
      "learning_rate": 0.0005999802287486219,
      "loss": 4.1434,
      "step": 842
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3048102855682373,
      "learning_rate": 0.0005999801817586594,
      "loss": 4.211,
      "step": 843
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1520251035690308,
      "learning_rate": 0.0005999801347129249,
      "loss": 3.945,
      "step": 844
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.924634337425232,
      "learning_rate": 0.0005999800876114185,
      "loss": 4.2027,
      "step": 845
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2326117753982544,
      "learning_rate": 0.0005999800404541401,
      "loss": 3.9732,
      "step": 846
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6579657793045044,
      "learning_rate": 0.0005999799932410897,
      "loss": 4.1178,
      "step": 847
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6928269863128662,
      "learning_rate": 0.0005999799459722674,
      "loss": 3.995,
      "step": 848
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0493255853652954,
      "learning_rate": 0.0005999798986476731,
      "loss": 4.2449,
      "step": 849
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1087874174118042,
      "learning_rate": 0.000599979851267307,
      "loss": 4.0897,
      "step": 850
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0201117992401123,
      "learning_rate": 0.000599979803831169,
      "loss": 4.0517,
      "step": 851
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1544899940490723,
      "learning_rate": 0.000599979756339259,
      "loss": 4.3613,
      "step": 852
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0148776769638062,
      "learning_rate": 0.0005999797087915771,
      "loss": 4.0347,
      "step": 853
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3690946102142334,
      "learning_rate": 0.0005999796611881233,
      "loss": 4.1332,
      "step": 854
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.147220492362976,
      "learning_rate": 0.0005999796135288977,
      "loss": 3.9553,
      "step": 855
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1170858144760132,
      "learning_rate": 0.0005999795658139001,
      "loss": 4.0975,
      "step": 856
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.079317569732666,
      "learning_rate": 0.0005999795180431307,
      "loss": 4.259,
      "step": 857
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3223329782485962,
      "learning_rate": 0.0005999794702165895,
      "loss": 4.0046,
      "step": 858
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0681017637252808,
      "learning_rate": 0.0005999794223342764,
      "loss": 4.0012,
      "step": 859
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1748477220535278,
      "learning_rate": 0.0005999793743961915,
      "loss": 4.1624,
      "step": 860
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1111329793930054,
      "learning_rate": 0.0005999793264023347,
      "loss": 4.1125,
      "step": 861
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9713415503501892,
      "learning_rate": 0.0005999792783527062,
      "loss": 4.145,
      "step": 862
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.148250937461853,
      "learning_rate": 0.0005999792302473058,
      "loss": 4.4137,
      "step": 863
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.413458228111267,
      "learning_rate": 0.0005999791820861335,
      "loss": 4.179,
      "step": 864
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1419931650161743,
      "learning_rate": 0.0005999791338691895,
      "loss": 4.3497,
      "step": 865
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.565930724143982,
      "learning_rate": 0.0005999790855964739,
      "loss": 4.0125,
      "step": 866
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3281596899032593,
      "learning_rate": 0.0005999790372679863,
      "loss": 4.2291,
      "step": 867
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5009974241256714,
      "learning_rate": 0.000599978988883727,
      "loss": 4.0393,
      "step": 868
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1829487085342407,
      "learning_rate": 0.0005999789404436959,
      "loss": 4.173,
      "step": 869
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3527933359146118,
      "learning_rate": 0.0005999788919478931,
      "loss": 4.0311,
      "step": 870
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3346525430679321,
      "learning_rate": 0.0005999788433963186,
      "loss": 4.1721,
      "step": 871
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3789167404174805,
      "learning_rate": 0.0005999787947889723,
      "loss": 4.0723,
      "step": 872
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.388742208480835,
      "learning_rate": 0.0005999787461258543,
      "loss": 4.2907,
      "step": 873
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9739202260971069,
      "learning_rate": 0.0005999786974069646,
      "loss": 4.0595,
      "step": 874
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4328598976135254,
      "learning_rate": 0.0005999786486323032,
      "loss": 3.9858,
      "step": 875
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.7156784534454346,
      "learning_rate": 0.0005999785998018701,
      "loss": 4.075,
      "step": 876
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2845200300216675,
      "learning_rate": 0.0005999785509156653,
      "loss": 4.0248,
      "step": 877
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.1596126556396484,
      "learning_rate": 0.0005999785019736888,
      "loss": 4.1517,
      "step": 878
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.8021059036254883,
      "learning_rate": 0.0005999784529759406,
      "loss": 4.1384,
      "step": 879
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4345219135284424,
      "learning_rate": 0.0005999784039224209,
      "loss": 4.0362,
      "step": 880
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.473473072052002,
      "learning_rate": 0.0005999783548131294,
      "loss": 4.0137,
      "step": 881
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.1960108280181885,
      "learning_rate": 0.0005999783056480664,
      "loss": 3.8313,
      "step": 882
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.8276270627975464,
      "learning_rate": 0.0005999782564272317,
      "loss": 4.094,
      "step": 883
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.290514349937439,
      "learning_rate": 0.0005999782071506254,
      "loss": 3.8652,
      "step": 884
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1006076335906982,
      "learning_rate": 0.0005999781578182474,
      "loss": 4.3563,
      "step": 885
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5399025678634644,
      "learning_rate": 0.0005999781084300979,
      "loss": 4.0045,
      "step": 886
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.07807457447052,
      "learning_rate": 0.0005999780589861768,
      "loss": 4.0883,
      "step": 887
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.065604329109192,
      "learning_rate": 0.0005999780094864841,
      "loss": 4.2569,
      "step": 888
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6503665447235107,
      "learning_rate": 0.0005999779599310198,
      "loss": 4.2055,
      "step": 889
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4452908039093018,
      "learning_rate": 0.0005999779103197839,
      "loss": 3.8402,
      "step": 890
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5504199266433716,
      "learning_rate": 0.0005999778606527765,
      "loss": 3.6958,
      "step": 891
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6706852912902832,
      "learning_rate": 0.0005999778109299975,
      "loss": 4.5318,
      "step": 892
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.19556725025177,
      "learning_rate": 0.000599977761151447,
      "loss": 3.9788,
      "step": 893
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2238719463348389,
      "learning_rate": 0.000599977711317125,
      "loss": 3.9343,
      "step": 894
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.7938786745071411,
      "learning_rate": 0.0005999776614270314,
      "loss": 4.0134,
      "step": 895
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4960471391677856,
      "learning_rate": 0.0005999776114811664,
      "loss": 4.0992,
      "step": 896
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3744802474975586,
      "learning_rate": 0.0005999775614795299,
      "loss": 4.0639,
      "step": 897
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6056445837020874,
      "learning_rate": 0.0005999775114221219,
      "loss": 4.3171,
      "step": 898
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3829383850097656,
      "learning_rate": 0.0005999774613089423,
      "loss": 4.3701,
      "step": 899
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.429186224937439,
      "learning_rate": 0.0005999774111399913,
      "loss": 4.066,
      "step": 900
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2972497940063477,
      "learning_rate": 0.0005999773609152688,
      "loss": 4.1821,
      "step": 901
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.348758578300476,
      "learning_rate": 0.0005999773106347749,
      "loss": 4.302,
      "step": 902
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1208908557891846,
      "learning_rate": 0.0005999772602985097,
      "loss": 4.2814,
      "step": 903
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.260133981704712,
      "learning_rate": 0.0005999772099064728,
      "loss": 4.2325,
      "step": 904
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2720448970794678,
      "learning_rate": 0.0005999771594586646,
      "loss": 4.3785,
      "step": 905
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.071150302886963,
      "learning_rate": 0.000599977108955085,
      "loss": 4.3932,
      "step": 906
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0443753004074097,
      "learning_rate": 0.0005999770583957339,
      "loss": 4.184,
      "step": 907
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4390076398849487,
      "learning_rate": 0.0005999770077806115,
      "loss": 4.0423,
      "step": 908
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3344634771347046,
      "learning_rate": 0.0005999769571097177,
      "loss": 3.9387,
      "step": 909
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1014701128005981,
      "learning_rate": 0.0005999769063830524,
      "loss": 4.0016,
      "step": 910
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.7082774639129639,
      "learning_rate": 0.0005999768556006159,
      "loss": 4.1468,
      "step": 911
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2825883626937866,
      "learning_rate": 0.0005999768047624079,
      "loss": 3.9606,
      "step": 912
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9836254715919495,
      "learning_rate": 0.0005999767538684286,
      "loss": 4.1777,
      "step": 913
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2426323890686035,
      "learning_rate": 0.000599976702918678,
      "loss": 4.2258,
      "step": 914
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3294215202331543,
      "learning_rate": 0.0005999766519131561,
      "loss": 4.2109,
      "step": 915
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0313000679016113,
      "learning_rate": 0.0005999766008518627,
      "loss": 4.2897,
      "step": 916
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0739781856536865,
      "learning_rate": 0.0005999765497347982,
      "loss": 4.106,
      "step": 917
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4904574155807495,
      "learning_rate": 0.0005999764985619622,
      "loss": 3.9352,
      "step": 918
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1746082305908203,
      "learning_rate": 0.000599976447333355,
      "loss": 4.0659,
      "step": 919
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1929454803466797,
      "learning_rate": 0.0005999763960489766,
      "loss": 4.1117,
      "step": 920
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2759301662445068,
      "learning_rate": 0.0005999763447088269,
      "loss": 4.2136,
      "step": 921
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3897722959518433,
      "learning_rate": 0.0005999762933129059,
      "loss": 4.2156,
      "step": 922
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.099137306213379,
      "learning_rate": 0.0005999762418612135,
      "loss": 3.9786,
      "step": 923
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0546146631240845,
      "learning_rate": 0.00059997619035375,
      "loss": 4.1609,
      "step": 924
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1608548164367676,
      "learning_rate": 0.0005999761387905152,
      "loss": 4.0902,
      "step": 925
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2632286548614502,
      "learning_rate": 0.0005999760871715093,
      "loss": 3.9403,
      "step": 926
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.9570282697677612,
      "learning_rate": 0.0005999760354967322,
      "loss": 4.0002,
      "step": 927
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0492998361587524,
      "learning_rate": 0.0005999759837661837,
      "loss": 4.1691,
      "step": 928
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3958897590637207,
      "learning_rate": 0.0005999759319798641,
      "loss": 4.1557,
      "step": 929
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3969334363937378,
      "learning_rate": 0.0005999758801377733,
      "loss": 3.9744,
      "step": 930
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.184960126876831,
      "learning_rate": 0.0005999758282399113,
      "loss": 3.8969,
      "step": 931
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6898680925369263,
      "learning_rate": 0.0005999757762862782,
      "loss": 3.7928,
      "step": 932
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1313824653625488,
      "learning_rate": 0.0005999757242768738,
      "loss": 4.1307,
      "step": 933
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1723705530166626,
      "learning_rate": 0.0005999756722116985,
      "loss": 4.1196,
      "step": 934
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1285607814788818,
      "learning_rate": 0.0005999756200907518,
      "loss": 4.1062,
      "step": 935
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.283133864402771,
      "learning_rate": 0.0005999755679140341,
      "loss": 4.0103,
      "step": 936
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4757970571517944,
      "learning_rate": 0.0005999755156815453,
      "loss": 3.9893,
      "step": 937
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.076963186264038,
      "learning_rate": 0.0005999754633932854,
      "loss": 4.1101,
      "step": 938
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3642293214797974,
      "learning_rate": 0.0005999754110492543,
      "loss": 4.0425,
      "step": 939
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1723978519439697,
      "learning_rate": 0.0005999753586494521,
      "loss": 4.171,
      "step": 940
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9593654274940491,
      "learning_rate": 0.0005999753061938789,
      "loss": 4.1327,
      "step": 941
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2159348726272583,
      "learning_rate": 0.0005999752536825346,
      "loss": 4.1962,
      "step": 942
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6073155403137207,
      "learning_rate": 0.0005999752011154193,
      "loss": 4.138,
      "step": 943
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0044364929199219,
      "learning_rate": 0.0005999751484925328,
      "loss": 4.0597,
      "step": 944
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4496055841445923,
      "learning_rate": 0.0005999750958138754,
      "loss": 4.2758,
      "step": 945
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.1636886596679688,
      "learning_rate": 0.000599975043079447,
      "loss": 4.2586,
      "step": 946
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5156505107879639,
      "learning_rate": 0.0005999749902892475,
      "loss": 4.2818,
      "step": 947
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.8798291683197021,
      "learning_rate": 0.000599974937443277,
      "loss": 4.1186,
      "step": 948
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.8307112455368042,
      "learning_rate": 0.0005999748845415354,
      "loss": 4.0156,
      "step": 949
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4826500415802002,
      "learning_rate": 0.0005999748315840229,
      "loss": 4.0775,
      "step": 950
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.9180431365966797,
      "learning_rate": 0.0005999747785707394,
      "loss": 4.098,
      "step": 951
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.088343620300293,
      "learning_rate": 0.000599974725501685,
      "loss": 3.8196,
      "step": 952
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1693508625030518,
      "learning_rate": 0.0005999746723768595,
      "loss": 4.0726,
      "step": 953
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.400305151939392,
      "learning_rate": 0.0005999746191962631,
      "loss": 3.908,
      "step": 954
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.363007664680481,
      "learning_rate": 0.0005999745659598958,
      "loss": 3.8056,
      "step": 955
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1465635299682617,
      "learning_rate": 0.0005999745126677575,
      "loss": 3.7448,
      "step": 956
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2048956155776978,
      "learning_rate": 0.0005999744593198483,
      "loss": 4.1205,
      "step": 957
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2786682844161987,
      "learning_rate": 0.0005999744059161682,
      "loss": 4.3452,
      "step": 958
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.293429970741272,
      "learning_rate": 0.0005999743524567172,
      "loss": 4.0755,
      "step": 959
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1238994598388672,
      "learning_rate": 0.0005999742989414952,
      "loss": 4.0533,
      "step": 960
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.9837514758110046,
      "learning_rate": 0.0005999742453705025,
      "loss": 4.4041,
      "step": 961
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.7375779151916504,
      "learning_rate": 0.0005999741917437388,
      "loss": 4.0044,
      "step": 962
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1931339502334595,
      "learning_rate": 0.0005999741380612043,
      "loss": 4.0078,
      "step": 963
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3981928825378418,
      "learning_rate": 0.0005999740843228988,
      "loss": 3.9988,
      "step": 964
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.295425534248352,
      "learning_rate": 0.0005999740305288226,
      "loss": 3.8662,
      "step": 965
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.783758282661438,
      "learning_rate": 0.0005999739766789756,
      "loss": 3.9622,
      "step": 966
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4469032287597656,
      "learning_rate": 0.0005999739227733577,
      "loss": 4.1327,
      "step": 967
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3070361614227295,
      "learning_rate": 0.0005999738688119689,
      "loss": 3.7996,
      "step": 968
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3728104829788208,
      "learning_rate": 0.0005999738147948095,
      "loss": 3.9083,
      "step": 969
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0785468816757202,
      "learning_rate": 0.0005999737607218791,
      "loss": 3.8486,
      "step": 970
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6971173286437988,
      "learning_rate": 0.000599973706593178,
      "loss": 4.1854,
      "step": 971
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3118994235992432,
      "learning_rate": 0.0005999736524087061,
      "loss": 3.9458,
      "step": 972
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6541939973831177,
      "learning_rate": 0.0005999735981684636,
      "loss": 4.3228,
      "step": 973
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.253670334815979,
      "learning_rate": 0.0005999735438724502,
      "loss": 3.9175,
      "step": 974
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.170491099357605,
      "learning_rate": 0.000599973489520666,
      "loss": 3.8592,
      "step": 975
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.7588543891906738,
      "learning_rate": 0.0005999734351131112,
      "loss": 3.9742,
      "step": 976
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2723175287246704,
      "learning_rate": 0.0005999733806497856,
      "loss": 3.955,
      "step": 977
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1871225833892822,
      "learning_rate": 0.0005999733261306893,
      "loss": 3.9316,
      "step": 978
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4525976181030273,
      "learning_rate": 0.0005999732715558224,
      "loss": 4.1531,
      "step": 979
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6382571458816528,
      "learning_rate": 0.0005999732169251847,
      "loss": 4.0544,
      "step": 980
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5213210582733154,
      "learning_rate": 0.0005999731622387763,
      "loss": 3.8412,
      "step": 981
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.003610610961914,
      "learning_rate": 0.0005999731074965972,
      "loss": 4.2053,
      "step": 982
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3407154083251953,
      "learning_rate": 0.0005999730526986475,
      "loss": 4.1694,
      "step": 983
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.506328821182251,
      "learning_rate": 0.0005999729978449271,
      "loss": 4.031,
      "step": 984
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3445664644241333,
      "learning_rate": 0.000599972942935436,
      "loss": 4.0342,
      "step": 985
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1318999528884888,
      "learning_rate": 0.0005999728879701744,
      "loss": 4.1012,
      "step": 986
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.286981463432312,
      "learning_rate": 0.0005999728329491421,
      "loss": 4.1728,
      "step": 987
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2389307022094727,
      "learning_rate": 0.0005999727778723393,
      "loss": 4.1577,
      "step": 988
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1412636041641235,
      "learning_rate": 0.0005999727227397658,
      "loss": 4.1259,
      "step": 989
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0785161256790161,
      "learning_rate": 0.0005999726675514217,
      "loss": 4.1099,
      "step": 990
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0077285766601562,
      "learning_rate": 0.0005999726123073071,
      "loss": 4.1075,
      "step": 991
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3630589246749878,
      "learning_rate": 0.0005999725570074218,
      "loss": 4.2433,
      "step": 992
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0958654880523682,
      "learning_rate": 0.0005999725016517661,
      "loss": 4.0328,
      "step": 993
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3477281332015991,
      "learning_rate": 0.0005999724462403398,
      "loss": 4.1171,
      "step": 994
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.7137393951416016,
      "learning_rate": 0.0005999723907731428,
      "loss": 4.073,
      "step": 995
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2700790166854858,
      "learning_rate": 0.0005999723352501754,
      "loss": 4.0882,
      "step": 996
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.0090980529785156,
      "learning_rate": 0.0005999722796714375,
      "loss": 4.1267,
      "step": 997
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5293374061584473,
      "learning_rate": 0.0005999722240369291,
      "loss": 4.0554,
      "step": 998
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4934582710266113,
      "learning_rate": 0.0005999721683466501,
      "loss": 4.0936,
      "step": 999
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.8585678339004517,
      "learning_rate": 0.0005999721126006008,
      "loss": 4.1706,
      "step": 1000
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2613989114761353,
      "learning_rate": 0.0005999720567987809,
      "loss": 4.0278,
      "step": 1001
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4118105173110962,
      "learning_rate": 0.0005999720009411905,
      "loss": 4.0779,
      "step": 1002
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4686634540557861,
      "learning_rate": 0.0005999719450278296,
      "loss": 3.9817,
      "step": 1003
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3526281118392944,
      "learning_rate": 0.0005999718890586983,
      "loss": 3.9021,
      "step": 1004
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.526209831237793,
      "learning_rate": 0.0005999718330337966,
      "loss": 3.8073,
      "step": 1005
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5240472555160522,
      "learning_rate": 0.0005999717769531245,
      "loss": 3.9676,
      "step": 1006
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5102871656417847,
      "learning_rate": 0.000599971720816682,
      "loss": 3.9128,
      "step": 1007
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.9631938934326172,
      "learning_rate": 0.000599971664624469,
      "loss": 4.1074,
      "step": 1008
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1076414585113525,
      "learning_rate": 0.0005999716083764857,
      "loss": 3.9742,
      "step": 1009
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6778866052627563,
      "learning_rate": 0.000599971552072732,
      "loss": 3.8685,
      "step": 1010
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6955722570419312,
      "learning_rate": 0.0005999714957132078,
      "loss": 3.7082,
      "step": 1011
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3969889879226685,
      "learning_rate": 0.0005999714392979133,
      "loss": 4.0228,
      "step": 1012
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.348717212677002,
      "learning_rate": 0.0005999713828268485,
      "loss": 4.0804,
      "step": 1013
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.090578317642212,
      "learning_rate": 0.0005999713263000133,
      "loss": 4.1327,
      "step": 1014
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.187783122062683,
      "learning_rate": 0.0005999712697174078,
      "loss": 4.1354,
      "step": 1015
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6511459350585938,
      "learning_rate": 0.000599971213079032,
      "loss": 4.0192,
      "step": 1016
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4763469696044922,
      "learning_rate": 0.0005999711563848859,
      "loss": 4.336,
      "step": 1017
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1813799142837524,
      "learning_rate": 0.0005999710996349695,
      "loss": 4.0198,
      "step": 1018
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.207628607749939,
      "learning_rate": 0.0005999710428292828,
      "loss": 4.0314,
      "step": 1019
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4730167388916016,
      "learning_rate": 0.0005999709859678257,
      "loss": 4.0679,
      "step": 1020
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3156050443649292,
      "learning_rate": 0.0005999709290505985,
      "loss": 4.218,
      "step": 1021
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.7220098972320557,
      "learning_rate": 0.0005999708720776009,
      "loss": 3.8783,
      "step": 1022
    },
    {
      "epoch": 0.01,
      "grad_norm": 3.1976866722106934,
      "learning_rate": 0.0005999708150488331,
      "loss": 3.9753,
      "step": 1023
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.315751075744629,
      "learning_rate": 0.0005999707579642952,
      "loss": 4.1664,
      "step": 1024
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4075267314910889,
      "learning_rate": 0.0005999707008239869,
      "loss": 3.8351,
      "step": 1025
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.8607069253921509,
      "learning_rate": 0.0005999706436279085,
      "loss": 4.0731,
      "step": 1026
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5197981595993042,
      "learning_rate": 0.0005999705863760598,
      "loss": 3.6909,
      "step": 1027
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3887896537780762,
      "learning_rate": 0.000599970529068441,
      "loss": 4.0436,
      "step": 1028
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.740310788154602,
      "learning_rate": 0.0005999704717050519,
      "loss": 3.842,
      "step": 1029
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0905425548553467,
      "learning_rate": 0.0005999704142858927,
      "loss": 4.2625,
      "step": 1030
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6321312189102173,
      "learning_rate": 0.0005999703568109633,
      "loss": 4.0406,
      "step": 1031
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.7852466106414795,
      "learning_rate": 0.0005999702992802638,
      "loss": 4.0858,
      "step": 1032
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.8330737352371216,
      "learning_rate": 0.0005999702416937941,
      "loss": 4.2876,
      "step": 1033
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0832555294036865,
      "learning_rate": 0.0005999701840515543,
      "loss": 4.1466,
      "step": 1034
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1138741970062256,
      "learning_rate": 0.0005999701263535444,
      "loss": 4.2134,
      "step": 1035
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1931934356689453,
      "learning_rate": 0.0005999700685997644,
      "loss": 3.9879,
      "step": 1036
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2090057134628296,
      "learning_rate": 0.0005999700107902142,
      "loss": 4.1206,
      "step": 1037
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.199830174446106,
      "learning_rate": 0.000599969952924894,
      "loss": 4.1238,
      "step": 1038
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3952128887176514,
      "learning_rate": 0.0005999698950038037,
      "loss": 3.8046,
      "step": 1039
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1183598041534424,
      "learning_rate": 0.0005999698370269434,
      "loss": 4.1921,
      "step": 1040
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5668630599975586,
      "learning_rate": 0.000599969778994313,
      "loss": 4.2067,
      "step": 1041
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.281068205833435,
      "learning_rate": 0.0005999697209059125,
      "loss": 3.9849,
      "step": 1042
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1868826150894165,
      "learning_rate": 0.0005999696627617421,
      "loss": 3.841,
      "step": 1043
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3405860662460327,
      "learning_rate": 0.0005999696045618016,
      "loss": 4.072,
      "step": 1044
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.123281478881836,
      "learning_rate": 0.0005999695463060911,
      "loss": 4.0444,
      "step": 1045
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4399868249893188,
      "learning_rate": 0.0005999694879946104,
      "loss": 3.957,
      "step": 1046
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.8797765970230103,
      "learning_rate": 0.00059996942962736,
      "loss": 4.0461,
      "step": 1047
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6462422609329224,
      "learning_rate": 0.0005999693712043395,
      "loss": 3.9266,
      "step": 1048
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5583590269088745,
      "learning_rate": 0.000599969312725549,
      "loss": 3.9366,
      "step": 1049
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5810173749923706,
      "learning_rate": 0.0005999692541909886,
      "loss": 4.1456,
      "step": 1050
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.493187665939331,
      "learning_rate": 0.0005999691956006582,
      "loss": 3.9504,
      "step": 1051
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.055952787399292,
      "learning_rate": 0.0005999691369545579,
      "loss": 3.9969,
      "step": 1052
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3399937152862549,
      "learning_rate": 0.0005999690782526878,
      "loss": 4.0574,
      "step": 1053
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.0053606033325195,
      "learning_rate": 0.0005999690194950476,
      "loss": 4.0741,
      "step": 1054
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.0494906902313232,
      "learning_rate": 0.0005999689606816375,
      "loss": 4.0345,
      "step": 1055
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4791560173034668,
      "learning_rate": 0.0005999689018124576,
      "loss": 4.1752,
      "step": 1056
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.4388506412506104,
      "learning_rate": 0.0005999688428875078,
      "loss": 3.848,
      "step": 1057
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.3769724369049072,
      "learning_rate": 0.0005999687839067881,
      "loss": 4.1544,
      "step": 1058
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2287931442260742,
      "learning_rate": 0.0005999687248702985,
      "loss": 3.8834,
      "step": 1059
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.554445743560791,
      "learning_rate": 0.0005999686657780391,
      "loss": 3.7306,
      "step": 1060
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3734946250915527,
      "learning_rate": 0.00059996860663001,
      "loss": 4.0204,
      "step": 1061
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3576947450637817,
      "learning_rate": 0.0005999685474262109,
      "loss": 3.8861,
      "step": 1062
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4677767753601074,
      "learning_rate": 0.0005999684881666421,
      "loss": 4.0208,
      "step": 1063
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.7544867992401123,
      "learning_rate": 0.0005999684288513033,
      "loss": 3.7458,
      "step": 1064
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0805786848068237,
      "learning_rate": 0.0005999683694801948,
      "loss": 3.9154,
      "step": 1065
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0957525968551636,
      "learning_rate": 0.0005999683100533167,
      "loss": 3.8137,
      "step": 1066
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0719045400619507,
      "learning_rate": 0.0005999682505706686,
      "loss": 3.913,
      "step": 1067
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2010799646377563,
      "learning_rate": 0.0005999681910322509,
      "loss": 3.7575,
      "step": 1068
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.538406491279602,
      "learning_rate": 0.0005999681314380632,
      "loss": 4.1316,
      "step": 1069
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3745225667953491,
      "learning_rate": 0.000599968071788106,
      "loss": 4.1106,
      "step": 1070
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1329004764556885,
      "learning_rate": 0.0005999680120823789,
      "loss": 4.3525,
      "step": 1071
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2801613807678223,
      "learning_rate": 0.0005999679523208823,
      "loss": 3.9257,
      "step": 1072
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.90149986743927,
      "learning_rate": 0.0005999678925036159,
      "loss": 3.8242,
      "step": 1073
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2229669094085693,
      "learning_rate": 0.0005999678326305798,
      "loss": 3.9565,
      "step": 1074
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.8429434299468994,
      "learning_rate": 0.000599967772701774,
      "loss": 3.8188,
      "step": 1075
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.7893885374069214,
      "learning_rate": 0.0005999677127171985,
      "loss": 3.6783,
      "step": 1076
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.1115968227386475,
      "learning_rate": 0.0005999676526768534,
      "loss": 3.8555,
      "step": 1077
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.0534658432006836,
      "learning_rate": 0.0005999675925807386,
      "loss": 4.0807,
      "step": 1078
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6341338157653809,
      "learning_rate": 0.0005999675324288542,
      "loss": 3.8518,
      "step": 1079
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3843828439712524,
      "learning_rate": 0.0005999674722212002,
      "loss": 4.1413,
      "step": 1080
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.680357813835144,
      "learning_rate": 0.0005999674119577765,
      "loss": 4.1619,
      "step": 1081
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.178976058959961,
      "learning_rate": 0.0005999673516385832,
      "loss": 4.0958,
      "step": 1082
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1408071517944336,
      "learning_rate": 0.0005999672912636204,
      "loss": 3.9001,
      "step": 1083
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.8838272094726562,
      "learning_rate": 0.000599967230832888,
      "loss": 3.9575,
      "step": 1084
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.7227249145507812,
      "learning_rate": 0.000599967170346386,
      "loss": 4.1029,
      "step": 1085
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1466610431671143,
      "learning_rate": 0.0005999671098041144,
      "loss": 4.3187,
      "step": 1086
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.38974130153656,
      "learning_rate": 0.0005999670492060733,
      "loss": 3.5998,
      "step": 1087
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3847514390945435,
      "learning_rate": 0.0005999669885522627,
      "loss": 4.0776,
      "step": 1088
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0634597539901733,
      "learning_rate": 0.0005999669278426825,
      "loss": 3.8673,
      "step": 1089
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.846268653869629,
      "learning_rate": 0.0005999668670773327,
      "loss": 4.0349,
      "step": 1090
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.177964687347412,
      "learning_rate": 0.0005999668062562135,
      "loss": 4.0251,
      "step": 1091
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3682188987731934,
      "learning_rate": 0.000599966745379325,
      "loss": 4.0708,
      "step": 1092
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.204297423362732,
      "learning_rate": 0.0005999666844466667,
      "loss": 4.2553,
      "step": 1093
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1381713151931763,
      "learning_rate": 0.0005999666234582391,
      "loss": 3.9602,
      "step": 1094
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1530882120132446,
      "learning_rate": 0.000599966562414042,
      "loss": 3.6698,
      "step": 1095
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.9031054973602295,
      "learning_rate": 0.0005999665013140755,
      "loss": 3.8535,
      "step": 1096
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4647289514541626,
      "learning_rate": 0.0005999664401583395,
      "loss": 3.7294,
      "step": 1097
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4345146417617798,
      "learning_rate": 0.0005999663789468342,
      "loss": 3.7005,
      "step": 1098
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.492530107498169,
      "learning_rate": 0.0005999663176795594,
      "loss": 3.8679,
      "step": 1099
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2775630950927734,
      "learning_rate": 0.0005999662563565151,
      "loss": 4.0242,
      "step": 1100
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2498139142990112,
      "learning_rate": 0.0005999661949777015,
      "loss": 3.7242,
      "step": 1101
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6928374767303467,
      "learning_rate": 0.0005999661335431186,
      "loss": 4.1995,
      "step": 1102
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4833323955535889,
      "learning_rate": 0.0005999660720527662,
      "loss": 3.9763,
      "step": 1103
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.224677562713623,
      "learning_rate": 0.0005999660105066444,
      "loss": 3.8342,
      "step": 1104
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.1278862953186035,
      "learning_rate": 0.0005999659489047534,
      "loss": 3.9713,
      "step": 1105
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3023943901062012,
      "learning_rate": 0.0005999658872470931,
      "loss": 3.7026,
      "step": 1106
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.485850214958191,
      "learning_rate": 0.0005999658255336634,
      "loss": 4.1583,
      "step": 1107
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.492279529571533,
      "learning_rate": 0.0005999657637644644,
      "loss": 3.9262,
      "step": 1108
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2621426582336426,
      "learning_rate": 0.0005999657019394962,
      "loss": 4.0041,
      "step": 1109
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.7514182329177856,
      "learning_rate": 0.0005999656400587585,
      "loss": 4.0644,
      "step": 1110
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.8644969463348389,
      "learning_rate": 0.0005999655781222516,
      "loss": 4.0326,
      "step": 1111
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2964731454849243,
      "learning_rate": 0.0005999655161299755,
      "loss": 4.0674,
      "step": 1112
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4328097105026245,
      "learning_rate": 0.0005999654540819302,
      "loss": 3.8839,
      "step": 1113
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.008943796157837,
      "learning_rate": 0.0005999653919781156,
      "loss": 3.9846,
      "step": 1114
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.726683497428894,
      "learning_rate": 0.0005999653298185317,
      "loss": 4.2263,
      "step": 1115
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3770439624786377,
      "learning_rate": 0.0005999652676031787,
      "loss": 3.9614,
      "step": 1116
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.31278920173645,
      "learning_rate": 0.0005999652053320564,
      "loss": 3.9524,
      "step": 1117
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4480698108673096,
      "learning_rate": 0.0005999651430051649,
      "loss": 3.9887,
      "step": 1118
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5091664791107178,
      "learning_rate": 0.0005999650806225043,
      "loss": 3.9276,
      "step": 1119
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.7399005889892578,
      "learning_rate": 0.0005999650181840745,
      "loss": 4.0918,
      "step": 1120
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2213547229766846,
      "learning_rate": 0.0005999649556898755,
      "loss": 4.1616,
      "step": 1121
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.045698404312134,
      "learning_rate": 0.0005999648931399073,
      "loss": 4.2229,
      "step": 1122
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.158742070198059,
      "learning_rate": 0.0005999648305341701,
      "loss": 4.0027,
      "step": 1123
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4555498361587524,
      "learning_rate": 0.0005999647678726637,
      "loss": 4.0862,
      "step": 1124
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.9360065460205078,
      "learning_rate": 0.0005999647051553882,
      "loss": 3.6459,
      "step": 1125
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.171913981437683,
      "learning_rate": 0.0005999646423823437,
      "loss": 4.1534,
      "step": 1126
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6889266967773438,
      "learning_rate": 0.00059996457955353,
      "loss": 3.9175,
      "step": 1127
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.8241572380065918,
      "learning_rate": 0.0005999645166689472,
      "loss": 4.2027,
      "step": 1128
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2594634294509888,
      "learning_rate": 0.0005999644537285953,
      "loss": 3.9688,
      "step": 1129
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4647449254989624,
      "learning_rate": 0.0005999643907324745,
      "loss": 3.8633,
      "step": 1130
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1547797918319702,
      "learning_rate": 0.0005999643276805846,
      "loss": 4.3116,
      "step": 1131
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5713974237442017,
      "learning_rate": 0.0005999642645729257,
      "loss": 4.161,
      "step": 1132
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3171221017837524,
      "learning_rate": 0.0005999642014094976,
      "loss": 3.8094,
      "step": 1133
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2902761697769165,
      "learning_rate": 0.0005999641381903006,
      "loss": 3.9126,
      "step": 1134
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1132203340530396,
      "learning_rate": 0.0005999640749153347,
      "loss": 3.8352,
      "step": 1135
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4939205646514893,
      "learning_rate": 0.0005999640115845997,
      "loss": 4.0062,
      "step": 1136
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.482264757156372,
      "learning_rate": 0.0005999639481980958,
      "loss": 3.9022,
      "step": 1137
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3361432552337646,
      "learning_rate": 0.0005999638847558229,
      "loss": 4.004,
      "step": 1138
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5131969451904297,
      "learning_rate": 0.0005999638212577811,
      "loss": 3.9552,
      "step": 1139
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4545917510986328,
      "learning_rate": 0.0005999637577039703,
      "loss": 3.7817,
      "step": 1140
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.6087957620620728,
      "learning_rate": 0.0005999636940943906,
      "loss": 3.8291,
      "step": 1141
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2219109535217285,
      "learning_rate": 0.000599963630429042,
      "loss": 3.585,
      "step": 1142
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4421519041061401,
      "learning_rate": 0.0005999635667079244,
      "loss": 3.9366,
      "step": 1143
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2140485048294067,
      "learning_rate": 0.0005999635029310381,
      "loss": 4.1311,
      "step": 1144
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5438801050186157,
      "learning_rate": 0.0005999634390983828,
      "loss": 3.7086,
      "step": 1145
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.2203882932662964,
      "learning_rate": 0.0005999633752099586,
      "loss": 4.0293,
      "step": 1146
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.1902503967285156,
      "learning_rate": 0.0005999633112657657,
      "loss": 4.031,
      "step": 1147
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.4694896936416626,
      "learning_rate": 0.0005999632472658038,
      "loss": 3.9549,
      "step": 1148
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.3931528329849243,
      "learning_rate": 0.0005999631832100731,
      "loss": 3.8682,
      "step": 1149
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.770009994506836,
      "learning_rate": 0.0005999631190985737,
      "loss": 4.1848,
      "step": 1150
    },
    {
      "epoch": 0.01,
      "grad_norm": 2.167397975921631,
      "learning_rate": 0.0005999630549313054,
      "loss": 4.0129,
      "step": 1151
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.5011537075042725,
      "learning_rate": 0.0005999629907082681,
      "loss": 3.8047,
      "step": 1152
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1287834644317627,
      "learning_rate": 0.0005999629264294623,
      "loss": 3.7413,
      "step": 1153
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.085282325744629,
      "learning_rate": 0.0005999628620948877,
      "loss": 3.8095,
      "step": 1154
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.2973668575286865,
      "learning_rate": 0.0005999627977045443,
      "loss": 3.7896,
      "step": 1155
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.628904104232788,
      "learning_rate": 0.0005999627332584322,
      "loss": 3.9127,
      "step": 1156
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4836221933364868,
      "learning_rate": 0.0005999626687565513,
      "loss": 3.9159,
      "step": 1157
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.325506567955017,
      "learning_rate": 0.0005999626041989016,
      "loss": 4.0747,
      "step": 1158
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5578267574310303,
      "learning_rate": 0.0005999625395854833,
      "loss": 3.5827,
      "step": 1159
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.592403769493103,
      "learning_rate": 0.0005999624749162963,
      "loss": 4.175,
      "step": 1160
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.849236249923706,
      "learning_rate": 0.0005999624101913406,
      "loss": 3.9845,
      "step": 1161
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1055653095245361,
      "learning_rate": 0.0005999623454106162,
      "loss": 3.9676,
      "step": 1162
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.507803201675415,
      "learning_rate": 0.0005999622805741232,
      "loss": 4.0063,
      "step": 1163
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7686961889266968,
      "learning_rate": 0.0005999622156818616,
      "loss": 3.8,
      "step": 1164
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2846685647964478,
      "learning_rate": 0.0005999621507338312,
      "loss": 4.0706,
      "step": 1165
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4786572456359863,
      "learning_rate": 0.0005999620857300322,
      "loss": 4.139,
      "step": 1166
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1625256538391113,
      "learning_rate": 0.0005999620206704647,
      "loss": 4.0871,
      "step": 1167
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.233964204788208,
      "learning_rate": 0.0005999619555551285,
      "loss": 3.4755,
      "step": 1168
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5961992740631104,
      "learning_rate": 0.0005999618903840238,
      "loss": 3.8956,
      "step": 1169
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.177688717842102,
      "learning_rate": 0.0005999618251571504,
      "loss": 4.1643,
      "step": 1170
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4869580268859863,
      "learning_rate": 0.0005999617598745086,
      "loss": 3.7325,
      "step": 1171
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3673725128173828,
      "learning_rate": 0.0005999616945360981,
      "loss": 3.8819,
      "step": 1172
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2879279851913452,
      "learning_rate": 0.0005999616291419191,
      "loss": 3.7771,
      "step": 1173
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3031877279281616,
      "learning_rate": 0.0005999615636919716,
      "loss": 3.8358,
      "step": 1174
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3258520364761353,
      "learning_rate": 0.0005999614981862556,
      "loss": 4.1201,
      "step": 1175
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8484050035476685,
      "learning_rate": 0.000599961432624771,
      "loss": 3.8344,
      "step": 1176
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5885939598083496,
      "learning_rate": 0.0005999613670075179,
      "loss": 3.7933,
      "step": 1177
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4231839179992676,
      "learning_rate": 0.0005999613013344964,
      "loss": 4.157,
      "step": 1178
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6326863765716553,
      "learning_rate": 0.0005999612356057065,
      "loss": 3.9548,
      "step": 1179
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4767141342163086,
      "learning_rate": 0.000599961169821148,
      "loss": 3.829,
      "step": 1180
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.771743893623352,
      "learning_rate": 0.0005999611039808211,
      "loss": 4.0162,
      "step": 1181
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6056169271469116,
      "learning_rate": 0.0005999610380847258,
      "loss": 4.0768,
      "step": 1182
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7206065654754639,
      "learning_rate": 0.000599960972132862,
      "loss": 4.0447,
      "step": 1183
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9887174367904663,
      "learning_rate": 0.0005999609061252298,
      "loss": 3.7588,
      "step": 1184
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.008103847503662,
      "learning_rate": 0.0005999608400618294,
      "loss": 3.6643,
      "step": 1185
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2789371013641357,
      "learning_rate": 0.0005999607739426605,
      "loss": 3.8598,
      "step": 1186
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.473154306411743,
      "learning_rate": 0.0005999607077677232,
      "loss": 3.9572,
      "step": 1187
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7355620861053467,
      "learning_rate": 0.0005999606415370175,
      "loss": 3.986,
      "step": 1188
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.1604371070861816,
      "learning_rate": 0.0005999605752505436,
      "loss": 3.99,
      "step": 1189
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6115851402282715,
      "learning_rate": 0.0005999605089083012,
      "loss": 3.6644,
      "step": 1190
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.5307507514953613,
      "learning_rate": 0.0005999604425102905,
      "loss": 4.0236,
      "step": 1191
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.858112096786499,
      "learning_rate": 0.0005999603760565116,
      "loss": 3.976,
      "step": 1192
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3539025783538818,
      "learning_rate": 0.0005999603095469643,
      "loss": 4.0648,
      "step": 1193
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.323823928833008,
      "learning_rate": 0.0005999602429816488,
      "loss": 3.9896,
      "step": 1194
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5529706478118896,
      "learning_rate": 0.000599960176360565,
      "loss": 3.8126,
      "step": 1195
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3758838176727295,
      "learning_rate": 0.0005999601096837129,
      "loss": 3.79,
      "step": 1196
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.046384334564209,
      "learning_rate": 0.0005999600429510926,
      "loss": 3.8008,
      "step": 1197
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.884263515472412,
      "learning_rate": 0.0005999599761627041,
      "loss": 4.2279,
      "step": 1198
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2770692110061646,
      "learning_rate": 0.0005999599093185473,
      "loss": 3.9493,
      "step": 1199
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.834109306335449,
      "learning_rate": 0.0005999598424186222,
      "loss": 3.8688,
      "step": 1200
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1542361974716187,
      "learning_rate": 0.0005999597754629292,
      "loss": 4.1173,
      "step": 1201
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4600430727005005,
      "learning_rate": 0.0005999597084514678,
      "loss": 3.8738,
      "step": 1202
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1537046432495117,
      "learning_rate": 0.0005999596413842382,
      "loss": 4.0028,
      "step": 1203
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.708264946937561,
      "learning_rate": 0.0005999595742612405,
      "loss": 4.0916,
      "step": 1204
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.90341317653656,
      "learning_rate": 0.0005999595070824747,
      "loss": 3.8733,
      "step": 1205
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.404264211654663,
      "learning_rate": 0.0005999594398479408,
      "loss": 4.0578,
      "step": 1206
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.4864566326141357,
      "learning_rate": 0.0005999593725576387,
      "loss": 3.8491,
      "step": 1207
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.1687724590301514,
      "learning_rate": 0.0005999593052115684,
      "loss": 4.0758,
      "step": 1208
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3767646551132202,
      "learning_rate": 0.0005999592378097301,
      "loss": 4.1599,
      "step": 1209
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.961228132247925,
      "learning_rate": 0.0005999591703521237,
      "loss": 3.7617,
      "step": 1210
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.668182611465454,
      "learning_rate": 0.0005999591028387493,
      "loss": 3.8786,
      "step": 1211
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.0973575115203857,
      "learning_rate": 0.0005999590352696067,
      "loss": 3.9331,
      "step": 1212
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5127404928207397,
      "learning_rate": 0.0005999589676446962,
      "loss": 4.1511,
      "step": 1213
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8383878469467163,
      "learning_rate": 0.0005999588999640176,
      "loss": 3.5788,
      "step": 1214
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5243197679519653,
      "learning_rate": 0.000599958832227571,
      "loss": 3.9966,
      "step": 1215
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2981127500534058,
      "learning_rate": 0.0005999587644353563,
      "loss": 4.1216,
      "step": 1216
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4416530132293701,
      "learning_rate": 0.0005999586965873738,
      "loss": 3.7754,
      "step": 1217
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6190638542175293,
      "learning_rate": 0.0005999586286836231,
      "loss": 3.9725,
      "step": 1218
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1426509618759155,
      "learning_rate": 0.0005999585607241046,
      "loss": 3.8917,
      "step": 1219
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4814884662628174,
      "learning_rate": 0.000599958492708818,
      "loss": 3.9061,
      "step": 1220
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.299984335899353,
      "learning_rate": 0.0005999584246377636,
      "loss": 3.9097,
      "step": 1221
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6295580863952637,
      "learning_rate": 0.0005999583565109411,
      "loss": 3.8266,
      "step": 1222
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7339212894439697,
      "learning_rate": 0.0005999582883283507,
      "loss": 3.9714,
      "step": 1223
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5557750463485718,
      "learning_rate": 0.0005999582200899926,
      "loss": 3.9639,
      "step": 1224
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.232996940612793,
      "learning_rate": 0.0005999581517958664,
      "loss": 3.9891,
      "step": 1225
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3193167448043823,
      "learning_rate": 0.0005999580834459723,
      "loss": 3.8106,
      "step": 1226
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2598496675491333,
      "learning_rate": 0.0005999580150403105,
      "loss": 3.9231,
      "step": 1227
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5294818878173828,
      "learning_rate": 0.0005999579465788806,
      "loss": 3.8943,
      "step": 1228
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.246505856513977,
      "learning_rate": 0.0005999578780616832,
      "loss": 3.9623,
      "step": 1229
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7631319761276245,
      "learning_rate": 0.0005999578094887177,
      "loss": 4.0634,
      "step": 1230
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.0656898021698,
      "learning_rate": 0.0005999577408599844,
      "loss": 3.989,
      "step": 1231
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9102736711502075,
      "learning_rate": 0.0005999576721754834,
      "loss": 3.7778,
      "step": 1232
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.330323576927185,
      "learning_rate": 0.0005999576034352145,
      "loss": 3.989,
      "step": 1233
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4405652284622192,
      "learning_rate": 0.0005999575346391779,
      "loss": 3.8435,
      "step": 1234
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5477392673492432,
      "learning_rate": 0.0005999574657873735,
      "loss": 4.1095,
      "step": 1235
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4264885187149048,
      "learning_rate": 0.0005999573968798014,
      "loss": 3.881,
      "step": 1236
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2778860330581665,
      "learning_rate": 0.0005999573279164615,
      "loss": 4.0671,
      "step": 1237
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9761375188827515,
      "learning_rate": 0.0005999572588973539,
      "loss": 3.9954,
      "step": 1238
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7054511308670044,
      "learning_rate": 0.0005999571898224785,
      "loss": 4.0496,
      "step": 1239
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9351255893707275,
      "learning_rate": 0.0005999571206918355,
      "loss": 3.7488,
      "step": 1240
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5657764673233032,
      "learning_rate": 0.0005999570515054247,
      "loss": 3.93,
      "step": 1241
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4855351448059082,
      "learning_rate": 0.0005999569822632463,
      "loss": 4.0086,
      "step": 1242
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.348985195159912,
      "learning_rate": 0.0005999569129653003,
      "loss": 3.637,
      "step": 1243
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4672337770462036,
      "learning_rate": 0.0005999568436115865,
      "loss": 3.9507,
      "step": 1244
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5230839252471924,
      "learning_rate": 0.0005999567742021051,
      "loss": 3.8916,
      "step": 1245
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5460786819458008,
      "learning_rate": 0.0005999567047368561,
      "loss": 4.1162,
      "step": 1246
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.380397081375122,
      "learning_rate": 0.0005999566352158396,
      "loss": 4.175,
      "step": 1247
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9556553363800049,
      "learning_rate": 0.0005999565656390554,
      "loss": 3.7131,
      "step": 1248
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4823024272918701,
      "learning_rate": 0.0005999564960065036,
      "loss": 3.9835,
      "step": 1249
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.210249662399292,
      "learning_rate": 0.0005999564263181841,
      "loss": 3.7686,
      "step": 1250
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.614180564880371,
      "learning_rate": 0.0005999563565740972,
      "loss": 3.8327,
      "step": 1251
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2632108926773071,
      "learning_rate": 0.0005999562867742428,
      "loss": 3.9511,
      "step": 1252
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2022862434387207,
      "learning_rate": 0.0005999562169186208,
      "loss": 3.8528,
      "step": 1253
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.213356614112854,
      "learning_rate": 0.0005999561470072313,
      "loss": 3.9458,
      "step": 1254
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.396217703819275,
      "learning_rate": 0.0005999560770400742,
      "loss": 3.8138,
      "step": 1255
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1769285202026367,
      "learning_rate": 0.0005999560070171496,
      "loss": 3.8864,
      "step": 1256
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.888184905052185,
      "learning_rate": 0.0005999559369384576,
      "loss": 4.0702,
      "step": 1257
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.3788397312164307,
      "learning_rate": 0.0005999558668039982,
      "loss": 3.9296,
      "step": 1258
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6313161849975586,
      "learning_rate": 0.0005999557966137711,
      "loss": 3.909,
      "step": 1259
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.247462511062622,
      "learning_rate": 0.0005999557263677767,
      "loss": 3.9933,
      "step": 1260
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2575716972351074,
      "learning_rate": 0.0005999556560660149,
      "loss": 4.0414,
      "step": 1261
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.413115382194519,
      "learning_rate": 0.0005999555857084856,
      "loss": 3.8536,
      "step": 1262
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3530197143554688,
      "learning_rate": 0.000599955515295189,
      "loss": 4.0865,
      "step": 1263
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.621538758277893,
      "learning_rate": 0.0005999554448261249,
      "loss": 4.0292,
      "step": 1264
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.209702491760254,
      "learning_rate": 0.0005999553743012935,
      "loss": 3.8702,
      "step": 1265
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9169628620147705,
      "learning_rate": 0.0005999553037206948,
      "loss": 3.76,
      "step": 1266
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.02172589302063,
      "learning_rate": 0.0005999552330843285,
      "loss": 3.7224,
      "step": 1267
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8765429258346558,
      "learning_rate": 0.0005999551623921951,
      "loss": 3.8599,
      "step": 1268
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8715049028396606,
      "learning_rate": 0.0005999550916442943,
      "loss": 3.8566,
      "step": 1269
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7598448991775513,
      "learning_rate": 0.0005999550208406261,
      "loss": 3.7234,
      "step": 1270
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.650696873664856,
      "learning_rate": 0.0005999549499811907,
      "loss": 3.7922,
      "step": 1271
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5036342144012451,
      "learning_rate": 0.000599954879065988,
      "loss": 3.8738,
      "step": 1272
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3356608152389526,
      "learning_rate": 0.000599954808095018,
      "loss": 3.5907,
      "step": 1273
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2097845077514648,
      "learning_rate": 0.0005999547370682807,
      "loss": 3.8074,
      "step": 1274
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4786063432693481,
      "learning_rate": 0.0005999546659857762,
      "loss": 3.7338,
      "step": 1275
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.687751293182373,
      "learning_rate": 0.0005999545948475045,
      "loss": 3.8006,
      "step": 1276
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.311020016670227,
      "learning_rate": 0.0005999545236534655,
      "loss": 3.9855,
      "step": 1277
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3097739219665527,
      "learning_rate": 0.0005999544524036593,
      "loss": 3.8786,
      "step": 1278
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4448919296264648,
      "learning_rate": 0.000599954381098086,
      "loss": 3.5627,
      "step": 1279
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.349810004234314,
      "learning_rate": 0.0005999543097367455,
      "loss": 3.8961,
      "step": 1280
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4125679731369019,
      "learning_rate": 0.0005999542383196378,
      "loss": 3.939,
      "step": 1281
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2367451190948486,
      "learning_rate": 0.0005999541668467628,
      "loss": 3.8246,
      "step": 1282
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4740040302276611,
      "learning_rate": 0.0005999540953181209,
      "loss": 3.9415,
      "step": 1283
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4072238206863403,
      "learning_rate": 0.0005999540237337117,
      "loss": 4.0274,
      "step": 1284
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4973899126052856,
      "learning_rate": 0.0005999539520935355,
      "loss": 3.917,
      "step": 1285
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8118938207626343,
      "learning_rate": 0.0005999538803975923,
      "loss": 3.8581,
      "step": 1286
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3477059602737427,
      "learning_rate": 0.0005999538086458818,
      "loss": 3.6344,
      "step": 1287
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3517433404922485,
      "learning_rate": 0.0005999537368384042,
      "loss": 3.7361,
      "step": 1288
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.732473611831665,
      "learning_rate": 0.0005999536649751596,
      "loss": 3.853,
      "step": 1289
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7440756559371948,
      "learning_rate": 0.000599953593056148,
      "loss": 3.8107,
      "step": 1290
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.776795744895935,
      "learning_rate": 0.0005999535210813693,
      "loss": 3.6974,
      "step": 1291
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.205211877822876,
      "learning_rate": 0.0005999534490508236,
      "loss": 3.9663,
      "step": 1292
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.623571753501892,
      "learning_rate": 0.000599953376964511,
      "loss": 4.0711,
      "step": 1293
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3699135780334473,
      "learning_rate": 0.0005999533048224314,
      "loss": 4.0643,
      "step": 1294
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.132636547088623,
      "learning_rate": 0.0005999532326245846,
      "loss": 3.9095,
      "step": 1295
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4065748453140259,
      "learning_rate": 0.0005999531603709709,
      "loss": 3.9563,
      "step": 1296
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9179997444152832,
      "learning_rate": 0.0005999530880615904,
      "loss": 3.7791,
      "step": 1297
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6410226821899414,
      "learning_rate": 0.0005999530156964428,
      "loss": 3.835,
      "step": 1298
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3195874691009521,
      "learning_rate": 0.0005999529432755284,
      "loss": 3.8804,
      "step": 1299
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3276047706604004,
      "learning_rate": 0.000599952870798847,
      "loss": 3.9072,
      "step": 1300
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6211369037628174,
      "learning_rate": 0.0005999527982663988,
      "loss": 3.877,
      "step": 1301
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7934401035308838,
      "learning_rate": 0.0005999527256781836,
      "loss": 3.7275,
      "step": 1302
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.263920545578003,
      "learning_rate": 0.0005999526530342015,
      "loss": 3.9374,
      "step": 1303
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.342544674873352,
      "learning_rate": 0.0005999525803344526,
      "loss": 4.2901,
      "step": 1304
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.005554437637329,
      "learning_rate": 0.000599952507578937,
      "loss": 3.7964,
      "step": 1305
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3861435651779175,
      "learning_rate": 0.0005999524347676543,
      "loss": 3.9992,
      "step": 1306
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.784662961959839,
      "learning_rate": 0.000599952361900605,
      "loss": 3.8617,
      "step": 1307
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4109129905700684,
      "learning_rate": 0.0005999522889777888,
      "loss": 3.9068,
      "step": 1308
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9539390802383423,
      "learning_rate": 0.0005999522159992058,
      "loss": 3.9486,
      "step": 1309
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7132253646850586,
      "learning_rate": 0.0005999521429648559,
      "loss": 4.0362,
      "step": 1310
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.341463327407837,
      "learning_rate": 0.0005999520698747395,
      "loss": 3.9172,
      "step": 1311
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.295224666595459,
      "learning_rate": 0.0005999519967288562,
      "loss": 3.7406,
      "step": 1312
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.1428334712982178,
      "learning_rate": 0.0005999519235272062,
      "loss": 3.8816,
      "step": 1313
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2229825258255005,
      "learning_rate": 0.0005999518502697894,
      "loss": 4.01,
      "step": 1314
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2765700817108154,
      "learning_rate": 0.000599951776956606,
      "loss": 3.7935,
      "step": 1315
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.570006847381592,
      "learning_rate": 0.0005999517035876557,
      "loss": 4.0121,
      "step": 1316
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5975291728973389,
      "learning_rate": 0.0005999516301629389,
      "loss": 3.903,
      "step": 1317
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.659740686416626,
      "learning_rate": 0.0005999515566824555,
      "loss": 3.714,
      "step": 1318
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5899157524108887,
      "learning_rate": 0.0005999514831462053,
      "loss": 3.9138,
      "step": 1319
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8528610467910767,
      "learning_rate": 0.0005999514095541885,
      "loss": 4.03,
      "step": 1320
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.310991644859314,
      "learning_rate": 0.000599951335906405,
      "loss": 3.8645,
      "step": 1321
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.127944231033325,
      "learning_rate": 0.000599951262202855,
      "loss": 3.8526,
      "step": 1322
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.3550796508789062,
      "learning_rate": 0.0005999511884435383,
      "loss": 3.7884,
      "step": 1323
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7601464986801147,
      "learning_rate": 0.000599951114628455,
      "loss": 3.6217,
      "step": 1324
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.093257188796997,
      "learning_rate": 0.0005999510407576052,
      "loss": 3.7217,
      "step": 1325
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.63364839553833,
      "learning_rate": 0.0005999509668309889,
      "loss": 3.8377,
      "step": 1326
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5762797594070435,
      "learning_rate": 0.000599950892848606,
      "loss": 4.0049,
      "step": 1327
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.250444173812866,
      "learning_rate": 0.0005999508188104564,
      "loss": 3.6903,
      "step": 1328
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.0999374389648438,
      "learning_rate": 0.0005999507447165404,
      "loss": 3.9889,
      "step": 1329
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2314705848693848,
      "learning_rate": 0.000599950670566858,
      "loss": 3.859,
      "step": 1330
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.708535075187683,
      "learning_rate": 0.0005999505963614091,
      "loss": 3.8521,
      "step": 1331
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1776901483535767,
      "learning_rate": 0.0005999505221001936,
      "loss": 3.8635,
      "step": 1332
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.422752022743225,
      "learning_rate": 0.0005999504477832117,
      "loss": 3.8195,
      "step": 1333
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0711745023727417,
      "learning_rate": 0.0005999503734104633,
      "loss": 3.9377,
      "step": 1334
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.3347864151000977,
      "learning_rate": 0.0005999502989819485,
      "loss": 3.9079,
      "step": 1335
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.284844160079956,
      "learning_rate": 0.0005999502244976672,
      "loss": 3.8253,
      "step": 1336
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.542069435119629,
      "learning_rate": 0.0005999501499576196,
      "loss": 3.6986,
      "step": 1337
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3785032033920288,
      "learning_rate": 0.0005999500753618056,
      "loss": 3.7371,
      "step": 1338
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.103405714035034,
      "learning_rate": 0.0005999500007102252,
      "loss": 3.9216,
      "step": 1339
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9202451705932617,
      "learning_rate": 0.0005999499260028783,
      "loss": 3.9093,
      "step": 1340
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.0656592845916748,
      "learning_rate": 0.0005999498512397652,
      "loss": 3.766,
      "step": 1341
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.0122432708740234,
      "learning_rate": 0.0005999497764208858,
      "loss": 3.8296,
      "step": 1342
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.2488527297973633,
      "learning_rate": 0.00059994970154624,
      "loss": 3.6366,
      "step": 1343
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6409310102462769,
      "learning_rate": 0.0005999496266158279,
      "loss": 3.7835,
      "step": 1344
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8391120433807373,
      "learning_rate": 0.0005999495516296494,
      "loss": 4.0303,
      "step": 1345
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6898475885391235,
      "learning_rate": 0.0005999494765877048,
      "loss": 3.8445,
      "step": 1346
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9113147258758545,
      "learning_rate": 0.0005999494014899938,
      "loss": 3.7234,
      "step": 1347
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1415491104125977,
      "learning_rate": 0.0005999493263365166,
      "loss": 3.6315,
      "step": 1348
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.8545427322387695,
      "learning_rate": 0.0005999492511272732,
      "loss": 4.171,
      "step": 1349
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.800150990486145,
      "learning_rate": 0.0005999491758622635,
      "loss": 4.1365,
      "step": 1350
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4299414157867432,
      "learning_rate": 0.0005999491005414876,
      "loss": 3.6712,
      "step": 1351
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2685062885284424,
      "learning_rate": 0.0005999490251649454,
      "loss": 4.1666,
      "step": 1352
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4264602661132812,
      "learning_rate": 0.0005999489497326373,
      "loss": 3.821,
      "step": 1353
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3848531246185303,
      "learning_rate": 0.0005999488742445627,
      "loss": 3.7419,
      "step": 1354
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4224244356155396,
      "learning_rate": 0.0005999487987007221,
      "loss": 3.9151,
      "step": 1355
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.207418441772461,
      "learning_rate": 0.0005999487231011154,
      "loss": 4.2162,
      "step": 1356
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5628960132598877,
      "learning_rate": 0.0005999486474457425,
      "loss": 4.0243,
      "step": 1357
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1695914268493652,
      "learning_rate": 0.0005999485717346035,
      "loss": 4.1165,
      "step": 1358
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4873242378234863,
      "learning_rate": 0.0005999484959676983,
      "loss": 3.6068,
      "step": 1359
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.209367036819458,
      "learning_rate": 0.0005999484201450272,
      "loss": 3.7,
      "step": 1360
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1836020946502686,
      "learning_rate": 0.00059994834426659,
      "loss": 3.9898,
      "step": 1361
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5169345140457153,
      "learning_rate": 0.0005999482683323866,
      "loss": 3.8013,
      "step": 1362
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2953643798828125,
      "learning_rate": 0.0005999481923424172,
      "loss": 3.6415,
      "step": 1363
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.396227478981018,
      "learning_rate": 0.0005999481162966818,
      "loss": 3.6981,
      "step": 1364
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4928046464920044,
      "learning_rate": 0.0005999480401951804,
      "loss": 3.9765,
      "step": 1365
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5937665700912476,
      "learning_rate": 0.000599947964037913,
      "loss": 3.5343,
      "step": 1366
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.61673104763031,
      "learning_rate": 0.0005999478878248796,
      "loss": 3.5251,
      "step": 1367
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1541424989700317,
      "learning_rate": 0.0005999478115560802,
      "loss": 3.9971,
      "step": 1368
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8819575309753418,
      "learning_rate": 0.0005999477352315149,
      "loss": 3.6208,
      "step": 1369
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.166659355163574,
      "learning_rate": 0.0005999476588511837,
      "loss": 3.9838,
      "step": 1370
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.191579818725586,
      "learning_rate": 0.0005999475824150864,
      "loss": 3.7613,
      "step": 1371
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6556458473205566,
      "learning_rate": 0.0005999475059232232,
      "loss": 3.8749,
      "step": 1372
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5129358768463135,
      "learning_rate": 0.0005999474293755942,
      "loss": 3.6026,
      "step": 1373
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5726157426834106,
      "learning_rate": 0.0005999473527721992,
      "loss": 3.8826,
      "step": 1374
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4725196361541748,
      "learning_rate": 0.0005999472761130384,
      "loss": 4.0044,
      "step": 1375
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2242039442062378,
      "learning_rate": 0.0005999471993981117,
      "loss": 3.637,
      "step": 1376
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1215697526931763,
      "learning_rate": 0.0005999471226274193,
      "loss": 3.6769,
      "step": 1377
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6264967918395996,
      "learning_rate": 0.0005999470458009609,
      "loss": 3.8376,
      "step": 1378
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3540544509887695,
      "learning_rate": 0.0005999469689187367,
      "loss": 3.8605,
      "step": 1379
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.440946340560913,
      "learning_rate": 0.0005999468919807468,
      "loss": 4.0029,
      "step": 1380
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6561349630355835,
      "learning_rate": 0.000599946814986991,
      "loss": 3.9668,
      "step": 1381
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4930304288864136,
      "learning_rate": 0.0005999467379374696,
      "loss": 3.78,
      "step": 1382
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.4022436141967773,
      "learning_rate": 0.0005999466608321821,
      "loss": 3.8083,
      "step": 1383
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5693917274475098,
      "learning_rate": 0.0005999465836711292,
      "loss": 3.9181,
      "step": 1384
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2197315692901611,
      "learning_rate": 0.0005999465064543104,
      "loss": 4.0124,
      "step": 1385
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.0155513286590576,
      "learning_rate": 0.0005999464291817259,
      "loss": 3.605,
      "step": 1386
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4477559328079224,
      "learning_rate": 0.0005999463518533757,
      "loss": 3.6117,
      "step": 1387
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5314264297485352,
      "learning_rate": 0.0005999462744692599,
      "loss": 3.9395,
      "step": 1388
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3475847244262695,
      "learning_rate": 0.0005999461970293782,
      "loss": 3.7152,
      "step": 1389
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4608367681503296,
      "learning_rate": 0.0005999461195337311,
      "loss": 3.9347,
      "step": 1390
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3490526676177979,
      "learning_rate": 0.0005999460419823183,
      "loss": 3.9594,
      "step": 1391
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.4676358699798584,
      "learning_rate": 0.0005999459643751397,
      "loss": 3.9549,
      "step": 1392
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6857765913009644,
      "learning_rate": 0.0005999458867121957,
      "loss": 3.8738,
      "step": 1393
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4482070207595825,
      "learning_rate": 0.000599945808993486,
      "loss": 3.6659,
      "step": 1394
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.2163288593292236,
      "learning_rate": 0.0005999457312190106,
      "loss": 3.6094,
      "step": 1395
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3429937362670898,
      "learning_rate": 0.0005999456533887698,
      "loss": 4.3681,
      "step": 1396
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8446980714797974,
      "learning_rate": 0.0005999455755027633,
      "loss": 3.7706,
      "step": 1397
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6069203615188599,
      "learning_rate": 0.0005999454975609914,
      "loss": 3.7991,
      "step": 1398
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5382437705993652,
      "learning_rate": 0.0005999454195634539,
      "loss": 3.7688,
      "step": 1399
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3653706312179565,
      "learning_rate": 0.0005999453415101508,
      "loss": 3.8348,
      "step": 1400
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3243328332901,
      "learning_rate": 0.0005999452634010824,
      "loss": 3.7651,
      "step": 1401
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4817276000976562,
      "learning_rate": 0.0005999451852362484,
      "loss": 3.9045,
      "step": 1402
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.69315242767334,
      "learning_rate": 0.0005999451070156489,
      "loss": 3.9264,
      "step": 1403
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.3945629596710205,
      "learning_rate": 0.0005999450287392839,
      "loss": 3.668,
      "step": 1404
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3095178604125977,
      "learning_rate": 0.0005999449504071535,
      "loss": 4.0086,
      "step": 1405
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2701467275619507,
      "learning_rate": 0.0005999448720192577,
      "loss": 3.838,
      "step": 1406
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5951063632965088,
      "learning_rate": 0.0005999447935755966,
      "loss": 3.6763,
      "step": 1407
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.507702350616455,
      "learning_rate": 0.00059994471507617,
      "loss": 3.7087,
      "step": 1408
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7408475875854492,
      "learning_rate": 0.0005999446365209779,
      "loss": 3.8326,
      "step": 1409
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8204718828201294,
      "learning_rate": 0.0005999445579100205,
      "loss": 3.9134,
      "step": 1410
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5875285863876343,
      "learning_rate": 0.0005999444792432979,
      "loss": 3.7473,
      "step": 1411
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4629868268966675,
      "learning_rate": 0.0005999444005208098,
      "loss": 3.7438,
      "step": 1412
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.280311107635498,
      "learning_rate": 0.0005999443217425564,
      "loss": 3.8144,
      "step": 1413
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9318445920944214,
      "learning_rate": 0.0005999442429085376,
      "loss": 4.0071,
      "step": 1414
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9700117111206055,
      "learning_rate": 0.0005999441640187536,
      "loss": 3.6595,
      "step": 1415
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.129054307937622,
      "learning_rate": 0.0005999440850732044,
      "loss": 3.6333,
      "step": 1416
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.167478322982788,
      "learning_rate": 0.0005999440060718899,
      "loss": 3.973,
      "step": 1417
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.5811641216278076,
      "learning_rate": 0.0005999439270148101,
      "loss": 3.8926,
      "step": 1418
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.591228485107422,
      "learning_rate": 0.000599943847901965,
      "loss": 3.8963,
      "step": 1419
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2745722532272339,
      "learning_rate": 0.0005999437687333548,
      "loss": 3.5727,
      "step": 1420
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.542879343032837,
      "learning_rate": 0.0005999436895089792,
      "loss": 3.9164,
      "step": 1421
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.703585147857666,
      "learning_rate": 0.0005999436102288386,
      "loss": 3.8569,
      "step": 1422
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2042752504348755,
      "learning_rate": 0.0005999435308929328,
      "loss": 3.8297,
      "step": 1423
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5666083097457886,
      "learning_rate": 0.0005999434515012618,
      "loss": 3.9714,
      "step": 1424
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4131876230239868,
      "learning_rate": 0.0005999433720538256,
      "loss": 3.9631,
      "step": 1425
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9758750200271606,
      "learning_rate": 0.0005999432925506243,
      "loss": 3.7314,
      "step": 1426
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.540627360343933,
      "learning_rate": 0.0005999432129916579,
      "loss": 3.6535,
      "step": 1427
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9068174362182617,
      "learning_rate": 0.0005999431333769264,
      "loss": 3.9456,
      "step": 1428
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5985920429229736,
      "learning_rate": 0.0005999430537064297,
      "loss": 3.9035,
      "step": 1429
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6290645599365234,
      "learning_rate": 0.0005999429739801681,
      "loss": 3.8823,
      "step": 1430
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4190350770950317,
      "learning_rate": 0.0005999428941981413,
      "loss": 3.6901,
      "step": 1431
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.446548581123352,
      "learning_rate": 0.0005999428143603496,
      "loss": 4.137,
      "step": 1432
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8394370079040527,
      "learning_rate": 0.0005999427344667927,
      "loss": 3.8023,
      "step": 1433
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6474770307540894,
      "learning_rate": 0.0005999426545174708,
      "loss": 4.064,
      "step": 1434
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8815255165100098,
      "learning_rate": 0.0005999425745123841,
      "loss": 3.9533,
      "step": 1435
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5187731981277466,
      "learning_rate": 0.0005999424944515322,
      "loss": 3.8196,
      "step": 1436
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6158363819122314,
      "learning_rate": 0.0005999424143349155,
      "loss": 3.8699,
      "step": 1437
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3238246440887451,
      "learning_rate": 0.0005999423341625337,
      "loss": 3.7437,
      "step": 1438
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.716906189918518,
      "learning_rate": 0.000599942253934387,
      "loss": 3.8048,
      "step": 1439
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5333194732666016,
      "learning_rate": 0.0005999421736504753,
      "loss": 3.7695,
      "step": 1440
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.430274248123169,
      "learning_rate": 0.0005999420933107988,
      "loss": 4.061,
      "step": 1441
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.426292061805725,
      "learning_rate": 0.0005999420129153574,
      "loss": 3.7997,
      "step": 1442
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3510278463363647,
      "learning_rate": 0.000599941932464151,
      "loss": 3.8688,
      "step": 1443
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.386627435684204,
      "learning_rate": 0.0005999418519571799,
      "loss": 4.0306,
      "step": 1444
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3838502168655396,
      "learning_rate": 0.0005999417713944439,
      "loss": 3.9482,
      "step": 1445
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2797679901123047,
      "learning_rate": 0.000599941690775943,
      "loss": 3.631,
      "step": 1446
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6962825059890747,
      "learning_rate": 0.0005999416101016773,
      "loss": 3.8221,
      "step": 1447
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9560893774032593,
      "learning_rate": 0.0005999415293716468,
      "loss": 4.1287,
      "step": 1448
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8781540393829346,
      "learning_rate": 0.0005999414485858515,
      "loss": 3.713,
      "step": 1449
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.613623023033142,
      "learning_rate": 0.0005999413677442914,
      "loss": 3.6728,
      "step": 1450
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2640984058380127,
      "learning_rate": 0.0005999412868469666,
      "loss": 3.9716,
      "step": 1451
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.2627432346343994,
      "learning_rate": 0.000599941205893877,
      "loss": 3.8129,
      "step": 1452
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.5126895904541016,
      "learning_rate": 0.0005999411248850227,
      "loss": 3.7705,
      "step": 1453
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.316299557685852,
      "learning_rate": 0.0005999410438204037,
      "loss": 3.6813,
      "step": 1454
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9264758825302124,
      "learning_rate": 0.0005999409627000199,
      "loss": 3.7192,
      "step": 1455
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.4482948780059814,
      "learning_rate": 0.0005999408815238716,
      "loss": 3.9687,
      "step": 1456
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3501473665237427,
      "learning_rate": 0.0005999408002919584,
      "loss": 3.6806,
      "step": 1457
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4324774742126465,
      "learning_rate": 0.0005999407190042808,
      "loss": 4.4091,
      "step": 1458
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8368076086044312,
      "learning_rate": 0.0005999406376608384,
      "loss": 3.8797,
      "step": 1459
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5287598371505737,
      "learning_rate": 0.0005999405562616314,
      "loss": 3.9308,
      "step": 1460
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9304380416870117,
      "learning_rate": 0.0005999404748066597,
      "loss": 3.7069,
      "step": 1461
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.3036158084869385,
      "learning_rate": 0.0005999403932959235,
      "loss": 3.8952,
      "step": 1462
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8082549571990967,
      "learning_rate": 0.0005999403117294227,
      "loss": 3.8911,
      "step": 1463
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6990259885787964,
      "learning_rate": 0.0005999402301071573,
      "loss": 4.047,
      "step": 1464
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.243363857269287,
      "learning_rate": 0.0005999401484291274,
      "loss": 3.737,
      "step": 1465
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7200745344161987,
      "learning_rate": 0.0005999400666953329,
      "loss": 3.9887,
      "step": 1466
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9456326961517334,
      "learning_rate": 0.000599939984905774,
      "loss": 3.5556,
      "step": 1467
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.064577341079712,
      "learning_rate": 0.0005999399030604505,
      "loss": 3.8357,
      "step": 1468
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4636871814727783,
      "learning_rate": 0.0005999398211593625,
      "loss": 3.7551,
      "step": 1469
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6886636018753052,
      "learning_rate": 0.00059993973920251,
      "loss": 4.0481,
      "step": 1470
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6756497621536255,
      "learning_rate": 0.0005999396571898932,
      "loss": 3.8538,
      "step": 1471
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.466468334197998,
      "learning_rate": 0.0005999395751215118,
      "loss": 3.8583,
      "step": 1472
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.58999502658844,
      "learning_rate": 0.0005999394929973661,
      "loss": 4.1891,
      "step": 1473
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.312880039215088,
      "learning_rate": 0.0005999394108174559,
      "loss": 3.808,
      "step": 1474
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5821031332015991,
      "learning_rate": 0.0005999393285817813,
      "loss": 3.8573,
      "step": 1475
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6084388494491577,
      "learning_rate": 0.0005999392462903424,
      "loss": 3.3766,
      "step": 1476
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5336687564849854,
      "learning_rate": 0.000599939163943139,
      "loss": 3.4398,
      "step": 1477
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2890453338623047,
      "learning_rate": 0.0005999390815401714,
      "loss": 3.7128,
      "step": 1478
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.615027904510498,
      "learning_rate": 0.0005999389990814393,
      "loss": 3.765,
      "step": 1479
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.881161689758301,
      "learning_rate": 0.000599938916566943,
      "loss": 4.0002,
      "step": 1480
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5613833665847778,
      "learning_rate": 0.0005999388339966823,
      "loss": 3.7029,
      "step": 1481
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.1752548217773438,
      "learning_rate": 0.0005999387513706574,
      "loss": 3.746,
      "step": 1482
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.240114212036133,
      "learning_rate": 0.0005999386686888681,
      "loss": 4.007,
      "step": 1483
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9063173532485962,
      "learning_rate": 0.0005999385859513147,
      "loss": 3.9716,
      "step": 1484
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9958841800689697,
      "learning_rate": 0.000599938503157997,
      "loss": 3.6757,
      "step": 1485
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.950273871421814,
      "learning_rate": 0.0005999384203089149,
      "loss": 3.6998,
      "step": 1486
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.517439365386963,
      "learning_rate": 0.0005999383374040687,
      "loss": 3.8505,
      "step": 1487
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2286192178726196,
      "learning_rate": 0.0005999382544434584,
      "loss": 3.6831,
      "step": 1488
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9324753284454346,
      "learning_rate": 0.000599938171427084,
      "loss": 3.6383,
      "step": 1489
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.668357014656067,
      "learning_rate": 0.0005999380883549452,
      "loss": 3.5873,
      "step": 1490
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.48249089717865,
      "learning_rate": 0.0005999380052270424,
      "loss": 3.7163,
      "step": 1491
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.592873454093933,
      "learning_rate": 0.0005999379220433753,
      "loss": 3.984,
      "step": 1492
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.560469388961792,
      "learning_rate": 0.0005999378388039442,
      "loss": 3.7256,
      "step": 1493
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.643409252166748,
      "learning_rate": 0.0005999377555087489,
      "loss": 3.9554,
      "step": 1494
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9819737672805786,
      "learning_rate": 0.0005999376721577896,
      "loss": 3.8487,
      "step": 1495
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.5675220489501953,
      "learning_rate": 0.0005999375887510663,
      "loss": 3.712,
      "step": 1496
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3775304555892944,
      "learning_rate": 0.0005999375052885788,
      "loss": 3.8411,
      "step": 1497
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9445409774780273,
      "learning_rate": 0.0005999374217703273,
      "loss": 3.5866,
      "step": 1498
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.950575351715088,
      "learning_rate": 0.0005999373381963117,
      "loss": 3.8031,
      "step": 1499
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4599814414978027,
      "learning_rate": 0.0005999372545665322,
      "loss": 3.6067,
      "step": 1500
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.700814127922058,
      "learning_rate": 0.0005999371708809887,
      "loss": 3.6927,
      "step": 1501
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.377209186553955,
      "learning_rate": 0.0005999370871396811,
      "loss": 3.5962,
      "step": 1502
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.1489386558532715,
      "learning_rate": 0.0005999370033426095,
      "loss": 3.4167,
      "step": 1503
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5988847017288208,
      "learning_rate": 0.0005999369194897742,
      "loss": 3.9098,
      "step": 1504
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6545802354812622,
      "learning_rate": 0.0005999368355811748,
      "loss": 3.732,
      "step": 1505
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.082942247390747,
      "learning_rate": 0.0005999367516168116,
      "loss": 3.6803,
      "step": 1506
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2455767393112183,
      "learning_rate": 0.0005999366675966842,
      "loss": 4.0306,
      "step": 1507
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9743852615356445,
      "learning_rate": 0.0005999365835207932,
      "loss": 3.6025,
      "step": 1508
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.577705979347229,
      "learning_rate": 0.0005999364993891382,
      "loss": 3.8425,
      "step": 1509
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4480812549591064,
      "learning_rate": 0.0005999364152017195,
      "loss": 3.762,
      "step": 1510
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.296999216079712,
      "learning_rate": 0.0005999363309585368,
      "loss": 3.9096,
      "step": 1511
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9503862857818604,
      "learning_rate": 0.0005999362466595902,
      "loss": 3.8224,
      "step": 1512
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6796027421951294,
      "learning_rate": 0.00059993616230488,
      "loss": 3.7594,
      "step": 1513
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.466723918914795,
      "learning_rate": 0.0005999360778944059,
      "loss": 3.6936,
      "step": 1514
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7266819477081299,
      "learning_rate": 0.0005999359934281681,
      "loss": 3.9673,
      "step": 1515
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4012738466262817,
      "learning_rate": 0.0005999359089061664,
      "loss": 3.8211,
      "step": 1516
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4057258367538452,
      "learning_rate": 0.000599935824328401,
      "loss": 3.8048,
      "step": 1517
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3674938678741455,
      "learning_rate": 0.000599935739694872,
      "loss": 3.7092,
      "step": 1518
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7169854640960693,
      "learning_rate": 0.0005999356550055791,
      "loss": 3.8701,
      "step": 1519
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4764840602874756,
      "learning_rate": 0.0005999355702605226,
      "loss": 3.715,
      "step": 1520
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6566661596298218,
      "learning_rate": 0.0005999354854597024,
      "loss": 4.0371,
      "step": 1521
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.678862452507019,
      "learning_rate": 0.0005999354006031185,
      "loss": 3.8476,
      "step": 1522
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4755326509475708,
      "learning_rate": 0.000599935315690771,
      "loss": 3.7934,
      "step": 1523
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3784868717193604,
      "learning_rate": 0.0005999352307226597,
      "loss": 3.8136,
      "step": 1524
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.592911958694458,
      "learning_rate": 0.000599935145698785,
      "loss": 3.5109,
      "step": 1525
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5574867725372314,
      "learning_rate": 0.0005999350606191465,
      "loss": 3.8178,
      "step": 1526
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2936257123947144,
      "learning_rate": 0.0005999349754837447,
      "loss": 3.8113,
      "step": 1527
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5651142597198486,
      "learning_rate": 0.000599934890292579,
      "loss": 3.7784,
      "step": 1528
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.597350835800171,
      "learning_rate": 0.0005999348050456499,
      "loss": 3.6765,
      "step": 1529
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.481068730354309,
      "learning_rate": 0.0005999347197429573,
      "loss": 3.8133,
      "step": 1530
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7881498336791992,
      "learning_rate": 0.0005999346343845011,
      "loss": 3.9233,
      "step": 1531
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.5929033756256104,
      "learning_rate": 0.0005999345489702814,
      "loss": 3.5966,
      "step": 1532
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5360361337661743,
      "learning_rate": 0.0005999344635002981,
      "loss": 3.7773,
      "step": 1533
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.093735456466675,
      "learning_rate": 0.0005999343779745516,
      "loss": 3.9415,
      "step": 1534
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4846998453140259,
      "learning_rate": 0.0005999342923930414,
      "loss": 3.9411,
      "step": 1535
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.726632833480835,
      "learning_rate": 0.0005999342067557679,
      "loss": 3.7447,
      "step": 1536
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.470919132232666,
      "learning_rate": 0.0005999341210627308,
      "loss": 3.6694,
      "step": 1537
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.059112310409546,
      "learning_rate": 0.0005999340353139304,
      "loss": 3.8021,
      "step": 1538
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8162089586257935,
      "learning_rate": 0.0005999339495093666,
      "loss": 3.7495,
      "step": 1539
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.4627954959869385,
      "learning_rate": 0.0005999338636490394,
      "loss": 3.8701,
      "step": 1540
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.6543707847595215,
      "learning_rate": 0.0005999337777329489,
      "loss": 3.8228,
      "step": 1541
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8954790830612183,
      "learning_rate": 0.0005999336917610949,
      "loss": 3.8676,
      "step": 1542
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2628895044326782,
      "learning_rate": 0.0005999336057334777,
      "loss": 3.9141,
      "step": 1543
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8710051774978638,
      "learning_rate": 0.0005999335196500971,
      "loss": 3.6361,
      "step": 1544
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.532654047012329,
      "learning_rate": 0.0005999334335109531,
      "loss": 3.6424,
      "step": 1545
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8603498935699463,
      "learning_rate": 0.000599933347316046,
      "loss": 3.8149,
      "step": 1546
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6143115758895874,
      "learning_rate": 0.0005999332610653757,
      "loss": 3.6796,
      "step": 1547
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7986036539077759,
      "learning_rate": 0.0005999331747589419,
      "loss": 3.8825,
      "step": 1548
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.526252269744873,
      "learning_rate": 0.000599933088396745,
      "loss": 3.8282,
      "step": 1549
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6213157176971436,
      "learning_rate": 0.0005999330019787848,
      "loss": 3.656,
      "step": 1550
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.499215602874756,
      "learning_rate": 0.0005999329155050615,
      "loss": 3.8329,
      "step": 1551
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.3793559074401855,
      "learning_rate": 0.0005999328289755749,
      "loss": 4.0534,
      "step": 1552
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6136149168014526,
      "learning_rate": 0.0005999327423903253,
      "loss": 3.675,
      "step": 1553
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.603865385055542,
      "learning_rate": 0.0005999326557493123,
      "loss": 3.8342,
      "step": 1554
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.633265733718872,
      "learning_rate": 0.0005999325690525363,
      "loss": 3.8809,
      "step": 1555
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.579660415649414,
      "learning_rate": 0.0005999324822999972,
      "loss": 3.8972,
      "step": 1556
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4740890264511108,
      "learning_rate": 0.0005999323954916949,
      "loss": 3.8005,
      "step": 1557
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.505765676498413,
      "learning_rate": 0.0005999323086276295,
      "loss": 3.5574,
      "step": 1558
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.734588623046875,
      "learning_rate": 0.0005999322217078011,
      "loss": 3.7621,
      "step": 1559
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6202985048294067,
      "learning_rate": 0.0005999321347322096,
      "loss": 3.7107,
      "step": 1560
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4222272634506226,
      "learning_rate": 0.000599932047700855,
      "loss": 3.8017,
      "step": 1561
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8120843172073364,
      "learning_rate": 0.0005999319606137375,
      "loss": 4.003,
      "step": 1562
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6843774318695068,
      "learning_rate": 0.0005999318734708569,
      "loss": 3.8239,
      "step": 1563
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8068674802780151,
      "learning_rate": 0.0005999317862722134,
      "loss": 3.865,
      "step": 1564
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9060423374176025,
      "learning_rate": 0.0005999316990178067,
      "loss": 3.4699,
      "step": 1565
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5313900709152222,
      "learning_rate": 0.0005999316117076373,
      "loss": 3.9307,
      "step": 1566
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9796712398529053,
      "learning_rate": 0.0005999315243417048,
      "loss": 3.6797,
      "step": 1567
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3924633264541626,
      "learning_rate": 0.0005999314369200094,
      "loss": 3.9869,
      "step": 1568
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7108932733535767,
      "learning_rate": 0.000599931349442551,
      "loss": 3.9323,
      "step": 1569
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6527509689331055,
      "learning_rate": 0.0005999312619093299,
      "loss": 3.6804,
      "step": 1570
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.710565209388733,
      "learning_rate": 0.0005999311743203458,
      "loss": 3.732,
      "step": 1571
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.635500431060791,
      "learning_rate": 0.0005999310866755987,
      "loss": 3.5026,
      "step": 1572
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.614216685295105,
      "learning_rate": 0.0005999309989750889,
      "loss": 3.9114,
      "step": 1573
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4694833755493164,
      "learning_rate": 0.0005999309112188163,
      "loss": 3.9082,
      "step": 1574
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6291249990463257,
      "learning_rate": 0.0005999308234067807,
      "loss": 3.8501,
      "step": 1575
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.0534534454345703,
      "learning_rate": 0.0005999307355389825,
      "loss": 3.6462,
      "step": 1576
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.421776533126831,
      "learning_rate": 0.0005999306476154214,
      "loss": 3.8116,
      "step": 1577
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4745858907699585,
      "learning_rate": 0.0005999305596360976,
      "loss": 4.2059,
      "step": 1578
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5706104040145874,
      "learning_rate": 0.0005999304716010111,
      "loss": 3.9722,
      "step": 1579
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3262739181518555,
      "learning_rate": 0.0005999303835101618,
      "loss": 3.8122,
      "step": 1580
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.447888970375061,
      "learning_rate": 0.0005999302953635497,
      "loss": 3.7739,
      "step": 1581
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6862527132034302,
      "learning_rate": 0.000599930207161175,
      "loss": 3.8051,
      "step": 1582
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4377844333648682,
      "learning_rate": 0.0005999301189030375,
      "loss": 3.4015,
      "step": 1583
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7131659984588623,
      "learning_rate": 0.0005999300305891375,
      "loss": 3.9664,
      "step": 1584
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3481967449188232,
      "learning_rate": 0.0005999299422194748,
      "loss": 3.6817,
      "step": 1585
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2435109615325928,
      "learning_rate": 0.0005999298537940494,
      "loss": 3.824,
      "step": 1586
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.427996277809143,
      "learning_rate": 0.0005999297653128613,
      "loss": 3.8974,
      "step": 1587
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8419491052627563,
      "learning_rate": 0.0005999296767759106,
      "loss": 3.9328,
      "step": 1588
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2409175634384155,
      "learning_rate": 0.0005999295881831975,
      "loss": 3.6423,
      "step": 1589
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5319768190383911,
      "learning_rate": 0.0005999294995347217,
      "loss": 3.6231,
      "step": 1590
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5779627561569214,
      "learning_rate": 0.0005999294108304834,
      "loss": 3.8218,
      "step": 1591
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2921075820922852,
      "learning_rate": 0.0005999293220704824,
      "loss": 3.8817,
      "step": 1592
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.509301781654358,
      "learning_rate": 0.0005999292332547191,
      "loss": 3.8014,
      "step": 1593
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.757768154144287,
      "learning_rate": 0.0005999291443831931,
      "loss": 3.7257,
      "step": 1594
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.852593183517456,
      "learning_rate": 0.0005999290554559049,
      "loss": 3.5022,
      "step": 1595
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4069067239761353,
      "learning_rate": 0.0005999289664728539,
      "loss": 3.5335,
      "step": 1596
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7573386430740356,
      "learning_rate": 0.0005999288774340407,
      "loss": 3.7575,
      "step": 1597
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.612714409828186,
      "learning_rate": 0.0005999287883394649,
      "loss": 4.0073,
      "step": 1598
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.278671145439148,
      "learning_rate": 0.0005999286991891266,
      "loss": 3.8503,
      "step": 1599
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1463667154312134,
      "learning_rate": 0.0005999286099830261,
      "loss": 3.9187,
      "step": 1600
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7953288555145264,
      "learning_rate": 0.0005999285207211631,
      "loss": 3.8203,
      "step": 1601
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.712222695350647,
      "learning_rate": 0.0005999284314035377,
      "loss": 3.9941,
      "step": 1602
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3513761758804321,
      "learning_rate": 0.00059992834203015,
      "loss": 3.9467,
      "step": 1603
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.0765042304992676,
      "learning_rate": 0.000599928252601,
      "loss": 3.6259,
      "step": 1604
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4119023084640503,
      "learning_rate": 0.0005999281631160876,
      "loss": 4.2179,
      "step": 1605
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6108804941177368,
      "learning_rate": 0.0005999280735754129,
      "loss": 3.6604,
      "step": 1606
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.0805652141571045,
      "learning_rate": 0.0005999279839789759,
      "loss": 3.6216,
      "step": 1607
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.4959020614624023,
      "learning_rate": 0.0005999278943267767,
      "loss": 3.6991,
      "step": 1608
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.0300960540771484,
      "learning_rate": 0.0005999278046188151,
      "loss": 3.6231,
      "step": 1609
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.1650819778442383,
      "learning_rate": 0.0005999277148550913,
      "loss": 3.8145,
      "step": 1610
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.751741409301758,
      "learning_rate": 0.0005999276250356055,
      "loss": 3.7202,
      "step": 1611
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.226222038269043,
      "learning_rate": 0.0005999275351603573,
      "loss": 3.5741,
      "step": 1612
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4818228483200073,
      "learning_rate": 0.000599927445229347,
      "loss": 3.6132,
      "step": 1613
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.860178232192993,
      "learning_rate": 0.0005999273552425744,
      "loss": 3.6108,
      "step": 1614
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.205947160720825,
      "learning_rate": 0.0005999272652000397,
      "loss": 3.7552,
      "step": 1615
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3602609634399414,
      "learning_rate": 0.0005999271751017429,
      "loss": 3.73,
      "step": 1616
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.0527291297912598,
      "learning_rate": 0.0005999270849476839,
      "loss": 3.4525,
      "step": 1617
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.7367117404937744,
      "learning_rate": 0.0005999269947378628,
      "loss": 3.5767,
      "step": 1618
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2483338117599487,
      "learning_rate": 0.0005999269044722797,
      "loss": 3.9727,
      "step": 1619
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.748323678970337,
      "learning_rate": 0.0005999268141509344,
      "loss": 3.7987,
      "step": 1620
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7810593843460083,
      "learning_rate": 0.0005999267237738271,
      "loss": 3.5059,
      "step": 1621
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5170631408691406,
      "learning_rate": 0.0005999266333409578,
      "loss": 3.8504,
      "step": 1622
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.279518723487854,
      "learning_rate": 0.0005999265428523264,
      "loss": 3.6301,
      "step": 1623
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5357450246810913,
      "learning_rate": 0.000599926452307933,
      "loss": 3.9805,
      "step": 1624
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5664035081863403,
      "learning_rate": 0.0005999263617077777,
      "loss": 3.7602,
      "step": 1625
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.372627854347229,
      "learning_rate": 0.0005999262710518604,
      "loss": 3.9433,
      "step": 1626
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3666518926620483,
      "learning_rate": 0.0005999261803401811,
      "loss": 3.9945,
      "step": 1627
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2922544479370117,
      "learning_rate": 0.00059992608957274,
      "loss": 3.903,
      "step": 1628
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7866313457489014,
      "learning_rate": 0.0005999259987495368,
      "loss": 3.5964,
      "step": 1629
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3611125946044922,
      "learning_rate": 0.0005999259078705718,
      "loss": 3.7469,
      "step": 1630
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5265412330627441,
      "learning_rate": 0.0005999258169358449,
      "loss": 3.7051,
      "step": 1631
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2034541368484497,
      "learning_rate": 0.0005999257259453561,
      "loss": 3.7621,
      "step": 1632
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.615676760673523,
      "learning_rate": 0.0005999256348991054,
      "loss": 3.9113,
      "step": 1633
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7520811557769775,
      "learning_rate": 0.0005999255437970929,
      "loss": 3.7494,
      "step": 1634
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7774298191070557,
      "learning_rate": 0.0005999254526393187,
      "loss": 3.6063,
      "step": 1635
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4866386651992798,
      "learning_rate": 0.0005999253614257826,
      "loss": 3.9198,
      "step": 1636
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6580848693847656,
      "learning_rate": 0.0005999252701564846,
      "loss": 3.912,
      "step": 1637
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8662467002868652,
      "learning_rate": 0.000599925178831425,
      "loss": 3.6047,
      "step": 1638
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8753594160079956,
      "learning_rate": 0.0005999250874506036,
      "loss": 3.8818,
      "step": 1639
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9187099933624268,
      "learning_rate": 0.0005999249960140204,
      "loss": 3.9435,
      "step": 1640
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7516928911209106,
      "learning_rate": 0.0005999249045216755,
      "loss": 3.6037,
      "step": 1641
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.884382724761963,
      "learning_rate": 0.0005999248129735689,
      "loss": 3.7313,
      "step": 1642
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8700225353240967,
      "learning_rate": 0.0005999247213697008,
      "loss": 3.7003,
      "step": 1643
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7853246927261353,
      "learning_rate": 0.0005999246297100708,
      "loss": 4.0841,
      "step": 1644
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.0216078758239746,
      "learning_rate": 0.0005999245379946792,
      "loss": 3.8038,
      "step": 1645
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.988162875175476,
      "learning_rate": 0.0005999244462235261,
      "loss": 3.7497,
      "step": 1646
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8724565505981445,
      "learning_rate": 0.0005999243543966113,
      "loss": 3.7781,
      "step": 1647
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4084874391555786,
      "learning_rate": 0.0005999242625139348,
      "loss": 3.8551,
      "step": 1648
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.3270153999328613,
      "learning_rate": 0.0005999241705754968,
      "loss": 3.7367,
      "step": 1649
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.4809815883636475,
      "learning_rate": 0.0005999240785812972,
      "loss": 3.6517,
      "step": 1650
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8385868072509766,
      "learning_rate": 0.000599923986531336,
      "loss": 3.7367,
      "step": 1651
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.527233839035034,
      "learning_rate": 0.0005999238944256134,
      "loss": 3.7798,
      "step": 1652
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3549187183380127,
      "learning_rate": 0.0005999238022641293,
      "loss": 3.7053,
      "step": 1653
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7826437950134277,
      "learning_rate": 0.0005999237100468836,
      "loss": 3.633,
      "step": 1654
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5214616060256958,
      "learning_rate": 0.0005999236177738765,
      "loss": 3.6741,
      "step": 1655
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4680817127227783,
      "learning_rate": 0.0005999235254451079,
      "loss": 3.8709,
      "step": 1656
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4732017517089844,
      "learning_rate": 0.0005999234330605779,
      "loss": 3.4578,
      "step": 1657
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4129493236541748,
      "learning_rate": 0.0005999233406202864,
      "loss": 4.0242,
      "step": 1658
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7536592483520508,
      "learning_rate": 0.0005999232481242335,
      "loss": 3.8941,
      "step": 1659
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5982723236083984,
      "learning_rate": 0.0005999231555724192,
      "loss": 3.6419,
      "step": 1660
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.137123942375183,
      "learning_rate": 0.0005999230629648436,
      "loss": 3.8328,
      "step": 1661
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.503524899482727,
      "learning_rate": 0.0005999229703015066,
      "loss": 3.8057,
      "step": 1662
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8527659177780151,
      "learning_rate": 0.0005999228775824082,
      "loss": 3.8499,
      "step": 1663
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4507482051849365,
      "learning_rate": 0.0005999227848075485,
      "loss": 3.6385,
      "step": 1664
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5883697271347046,
      "learning_rate": 0.0005999226919769276,
      "loss": 3.9332,
      "step": 1665
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7780548334121704,
      "learning_rate": 0.0005999225990905453,
      "loss": 3.8504,
      "step": 1666
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5524027347564697,
      "learning_rate": 0.0005999225061484017,
      "loss": 3.9382,
      "step": 1667
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.015354633331299,
      "learning_rate": 0.000599922413150497,
      "loss": 3.5485,
      "step": 1668
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4784955978393555,
      "learning_rate": 0.0005999223200968309,
      "loss": 3.8533,
      "step": 1669
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2470886707305908,
      "learning_rate": 0.0005999222269874036,
      "loss": 3.8123,
      "step": 1670
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3091672658920288,
      "learning_rate": 0.0005999221338222153,
      "loss": 3.8422,
      "step": 1671
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4859874248504639,
      "learning_rate": 0.0005999220406012655,
      "loss": 3.6098,
      "step": 1672
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9359065294265747,
      "learning_rate": 0.0005999219473245547,
      "loss": 3.7213,
      "step": 1673
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.364098072052002,
      "learning_rate": 0.0005999218539920828,
      "loss": 3.6815,
      "step": 1674
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5693774223327637,
      "learning_rate": 0.0005999217606038497,
      "loss": 3.4577,
      "step": 1675
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8615728616714478,
      "learning_rate": 0.0005999216671598555,
      "loss": 3.7364,
      "step": 1676
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7975642681121826,
      "learning_rate": 0.0005999215736601002,
      "loss": 3.9212,
      "step": 1677
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1610419750213623,
      "learning_rate": 0.0005999214801045838,
      "loss": 4.0965,
      "step": 1678
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7511848211288452,
      "learning_rate": 0.0005999213864933062,
      "loss": 3.8486,
      "step": 1679
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2231066226959229,
      "learning_rate": 0.0005999212928262677,
      "loss": 3.71,
      "step": 1680
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.4534168243408203,
      "learning_rate": 0.0005999211991034682,
      "loss": 3.5892,
      "step": 1681
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3898180723190308,
      "learning_rate": 0.0005999211053249077,
      "loss": 3.9203,
      "step": 1682
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4140077829360962,
      "learning_rate": 0.0005999210114905862,
      "loss": 3.9502,
      "step": 1683
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3649158477783203,
      "learning_rate": 0.0005999209176005035,
      "loss": 3.7144,
      "step": 1684
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3305643796920776,
      "learning_rate": 0.0005999208236546601,
      "loss": 3.9076,
      "step": 1685
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.478837013244629,
      "learning_rate": 0.0005999207296530556,
      "loss": 3.6255,
      "step": 1686
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3351057767868042,
      "learning_rate": 0.0005999206355956903,
      "loss": 3.764,
      "step": 1687
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5999279022216797,
      "learning_rate": 0.0005999205414825641,
      "loss": 3.5502,
      "step": 1688
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9603407382965088,
      "learning_rate": 0.0005999204473136769,
      "loss": 3.5601,
      "step": 1689
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9437062740325928,
      "learning_rate": 0.0005999203530890289,
      "loss": 3.519,
      "step": 1690
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.329283356666565,
      "learning_rate": 0.0005999202588086199,
      "loss": 3.8703,
      "step": 1691
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.5345139503479004,
      "learning_rate": 0.0005999201644724503,
      "loss": 3.4705,
      "step": 1692
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.506025791168213,
      "learning_rate": 0.0005999200700805197,
      "loss": 3.7972,
      "step": 1693
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8614225387573242,
      "learning_rate": 0.0005999199756328284,
      "loss": 3.5783,
      "step": 1694
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8253320455551147,
      "learning_rate": 0.0005999198811293762,
      "loss": 3.9801,
      "step": 1695
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1910158395767212,
      "learning_rate": 0.0005999197865701634,
      "loss": 3.8125,
      "step": 1696
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9311274290084839,
      "learning_rate": 0.0005999196919551897,
      "loss": 3.5627,
      "step": 1697
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3437378406524658,
      "learning_rate": 0.0005999195972844554,
      "loss": 3.7674,
      "step": 1698
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8303358554840088,
      "learning_rate": 0.0005999195025579603,
      "loss": 3.8267,
      "step": 1699
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.5635430812835693,
      "learning_rate": 0.0005999194077757045,
      "loss": 3.6855,
      "step": 1700
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.55633544921875,
      "learning_rate": 0.000599919312937688,
      "loss": 3.7926,
      "step": 1701
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.46162748336792,
      "learning_rate": 0.0005999192180439109,
      "loss": 3.5463,
      "step": 1702
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3307547569274902,
      "learning_rate": 0.0005999191230943732,
      "loss": 3.6613,
      "step": 1703
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.430441975593567,
      "learning_rate": 0.0005999190280890748,
      "loss": 3.667,
      "step": 1704
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7821608781814575,
      "learning_rate": 0.0005999189330280157,
      "loss": 3.9355,
      "step": 1705
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4180113077163696,
      "learning_rate": 0.0005999188379111961,
      "loss": 3.8754,
      "step": 1706
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.485353708267212,
      "learning_rate": 0.0005999187427386159,
      "loss": 3.6716,
      "step": 1707
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4674291610717773,
      "learning_rate": 0.0005999186475102752,
      "loss": 3.7166,
      "step": 1708
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.536673665046692,
      "learning_rate": 0.0005999185522261739,
      "loss": 3.9249,
      "step": 1709
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4603352546691895,
      "learning_rate": 0.0005999184568863122,
      "loss": 3.601,
      "step": 1710
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3743590116500854,
      "learning_rate": 0.0005999183614906899,
      "loss": 3.5381,
      "step": 1711
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4033875465393066,
      "learning_rate": 0.0005999182660393071,
      "loss": 3.7115,
      "step": 1712
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6742626428604126,
      "learning_rate": 0.000599918170532164,
      "loss": 3.7677,
      "step": 1713
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6030471324920654,
      "learning_rate": 0.0005999180749692602,
      "loss": 3.6913,
      "step": 1714
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.2381370067596436,
      "learning_rate": 0.0005999179793505961,
      "loss": 3.4595,
      "step": 1715
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8469468355178833,
      "learning_rate": 0.0005999178836761715,
      "loss": 3.8949,
      "step": 1716
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.344785690307617,
      "learning_rate": 0.0005999177879459866,
      "loss": 3.4421,
      "step": 1717
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.271775245666504,
      "learning_rate": 0.0005999176921600414,
      "loss": 3.6868,
      "step": 1718
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3935472965240479,
      "learning_rate": 0.0005999175963183357,
      "loss": 3.7957,
      "step": 1719
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3331218957901,
      "learning_rate": 0.0005999175004208696,
      "loss": 3.7943,
      "step": 1720
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.108751058578491,
      "learning_rate": 0.0005999174044676433,
      "loss": 3.5913,
      "step": 1721
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3289613723754883,
      "learning_rate": 0.0005999173084586567,
      "loss": 3.5373,
      "step": 1722
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5592401027679443,
      "learning_rate": 0.0005999172123939097,
      "loss": 4.0718,
      "step": 1723
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5973560810089111,
      "learning_rate": 0.0005999171162734025,
      "loss": 3.8278,
      "step": 1724
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4663200378417969,
      "learning_rate": 0.000599917020097135,
      "loss": 3.546,
      "step": 1725
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1716164350509644,
      "learning_rate": 0.0005999169238651074,
      "loss": 3.8662,
      "step": 1726
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1981300115585327,
      "learning_rate": 0.0005999168275773194,
      "loss": 3.8944,
      "step": 1727
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5658437013626099,
      "learning_rate": 0.0005999167312337714,
      "loss": 3.912,
      "step": 1728
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.273137092590332,
      "learning_rate": 0.000599916634834463,
      "loss": 3.9393,
      "step": 1729
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7382227182388306,
      "learning_rate": 0.0005999165383793946,
      "loss": 3.6089,
      "step": 1730
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5188791751861572,
      "learning_rate": 0.0005999164418685661,
      "loss": 3.6449,
      "step": 1731
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.463043212890625,
      "learning_rate": 0.0005999163453019774,
      "loss": 3.9599,
      "step": 1732
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.581972599029541,
      "learning_rate": 0.0005999162486796285,
      "loss": 3.6187,
      "step": 1733
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4528931379318237,
      "learning_rate": 0.0005999161520015196,
      "loss": 3.5213,
      "step": 1734
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.093472480773926,
      "learning_rate": 0.0005999160552676505,
      "loss": 3.503,
      "step": 1735
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.938482403755188,
      "learning_rate": 0.0005999159584780215,
      "loss": 3.6557,
      "step": 1736
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6047604084014893,
      "learning_rate": 0.0005999158616326323,
      "loss": 3.6497,
      "step": 1737
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.599106788635254,
      "learning_rate": 0.0005999157647314832,
      "loss": 3.7615,
      "step": 1738
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6478792428970337,
      "learning_rate": 0.000599915667774574,
      "loss": 3.8649,
      "step": 1739
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4797122478485107,
      "learning_rate": 0.000599915570761905,
      "loss": 3.6716,
      "step": 1740
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.0275890827178955,
      "learning_rate": 0.000599915473693476,
      "loss": 3.6337,
      "step": 1741
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.045520067214966,
      "learning_rate": 0.000599915376569287,
      "loss": 3.7507,
      "step": 1742
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6334824562072754,
      "learning_rate": 0.000599915279389338,
      "loss": 3.783,
      "step": 1743
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.6604909896850586,
      "learning_rate": 0.0005999151821536292,
      "loss": 3.5374,
      "step": 1744
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.3001060485839844,
      "learning_rate": 0.0005999150848621604,
      "loss": 3.64,
      "step": 1745
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.573193907737732,
      "learning_rate": 0.0005999149875149317,
      "loss": 3.7399,
      "step": 1746
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6332300901412964,
      "learning_rate": 0.0005999148901119432,
      "loss": 3.8454,
      "step": 1747
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.3708791732788086,
      "learning_rate": 0.000599914792653195,
      "loss": 3.8186,
      "step": 1748
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.2478585243225098,
      "learning_rate": 0.0005999146951386868,
      "loss": 3.5383,
      "step": 1749
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6378897428512573,
      "learning_rate": 0.0005999145975684189,
      "loss": 3.6074,
      "step": 1750
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6550943851470947,
      "learning_rate": 0.0005999144999423911,
      "loss": 3.5443,
      "step": 1751
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6960842609405518,
      "learning_rate": 0.0005999144022606036,
      "loss": 3.5733,
      "step": 1752
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5212218761444092,
      "learning_rate": 0.0005999143045230564,
      "loss": 3.5801,
      "step": 1753
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.0481531620025635,
      "learning_rate": 0.0005999142067297493,
      "loss": 3.7628,
      "step": 1754
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.512390375137329,
      "learning_rate": 0.0005999141088806827,
      "loss": 3.5822,
      "step": 1755
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.045638084411621,
      "learning_rate": 0.0005999140109758563,
      "loss": 3.6646,
      "step": 1756
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3805128335952759,
      "learning_rate": 0.0005999139130152703,
      "loss": 4.0865,
      "step": 1757
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8620636463165283,
      "learning_rate": 0.0005999138149989246,
      "loss": 3.6276,
      "step": 1758
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8647992610931396,
      "learning_rate": 0.0005999137169268191,
      "loss": 3.5846,
      "step": 1759
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.620977759361267,
      "learning_rate": 0.0005999136187989542,
      "loss": 3.6239,
      "step": 1760
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.9424209594726562,
      "learning_rate": 0.0005999135206153296,
      "loss": 3.5677,
      "step": 1761
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6738396883010864,
      "learning_rate": 0.0005999134223759455,
      "loss": 3.6787,
      "step": 1762
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4873740673065186,
      "learning_rate": 0.0005999133240808017,
      "loss": 3.7201,
      "step": 1763
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7443416118621826,
      "learning_rate": 0.0005999132257298984,
      "loss": 3.5984,
      "step": 1764
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7494900226593018,
      "learning_rate": 0.0005999131273232356,
      "loss": 3.688,
      "step": 1765
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3433212041854858,
      "learning_rate": 0.0005999130288608132,
      "loss": 3.4539,
      "step": 1766
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9814376831054688,
      "learning_rate": 0.0005999129303426315,
      "loss": 3.7408,
      "step": 1767
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.524832010269165,
      "learning_rate": 0.0005999128317686901,
      "loss": 3.77,
      "step": 1768
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7121553421020508,
      "learning_rate": 0.0005999127331389894,
      "loss": 3.7553,
      "step": 1769
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4460532665252686,
      "learning_rate": 0.0005999126344535292,
      "loss": 3.7675,
      "step": 1770
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4328663349151611,
      "learning_rate": 0.0005999125357123097,
      "loss": 3.6856,
      "step": 1771
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2877473831176758,
      "learning_rate": 0.0005999124369153306,
      "loss": 3.8012,
      "step": 1772
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7051838636398315,
      "learning_rate": 0.0005999123380625922,
      "loss": 3.8024,
      "step": 1773
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2424900531768799,
      "learning_rate": 0.0005999122391540944,
      "loss": 3.8597,
      "step": 1774
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5840665102005005,
      "learning_rate": 0.0005999121401898374,
      "loss": 3.8584,
      "step": 1775
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2297914028167725,
      "learning_rate": 0.0005999120411698209,
      "loss": 3.6061,
      "step": 1776
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6031197309494019,
      "learning_rate": 0.0005999119420940452,
      "loss": 3.5772,
      "step": 1777
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5016555786132812,
      "learning_rate": 0.0005999118429625101,
      "loss": 3.8888,
      "step": 1778
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.723203420639038,
      "learning_rate": 0.0005999117437752159,
      "loss": 3.8281,
      "step": 1779
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5865216255187988,
      "learning_rate": 0.0005999116445321623,
      "loss": 3.6665,
      "step": 1780
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6792103052139282,
      "learning_rate": 0.0005999115452333494,
      "loss": 3.481,
      "step": 1781
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9016591310501099,
      "learning_rate": 0.0005999114458787774,
      "loss": 3.7362,
      "step": 1782
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4038031101226807,
      "learning_rate": 0.0005999113464684462,
      "loss": 3.5102,
      "step": 1783
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5682625770568848,
      "learning_rate": 0.0005999112470023558,
      "loss": 3.4906,
      "step": 1784
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.3727974891662598,
      "learning_rate": 0.0005999111474805063,
      "loss": 3.6992,
      "step": 1785
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8365514278411865,
      "learning_rate": 0.0005999110479028975,
      "loss": 3.6499,
      "step": 1786
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6929618120193481,
      "learning_rate": 0.0005999109482695297,
      "loss": 3.8667,
      "step": 1787
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7771880626678467,
      "learning_rate": 0.0005999108485804028,
      "loss": 3.7733,
      "step": 1788
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6089115142822266,
      "learning_rate": 0.0005999107488355169,
      "loss": 3.6635,
      "step": 1789
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.263582706451416,
      "learning_rate": 0.0005999106490348717,
      "loss": 3.8623,
      "step": 1790
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7361594438552856,
      "learning_rate": 0.0005999105491784675,
      "loss": 3.6398,
      "step": 1791
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.539049744606018,
      "learning_rate": 0.0005999104492663043,
      "loss": 3.6113,
      "step": 1792
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.2368078231811523,
      "learning_rate": 0.0005999103492983823,
      "loss": 3.5818,
      "step": 1793
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4284435510635376,
      "learning_rate": 0.0005999102492747011,
      "loss": 3.5307,
      "step": 1794
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3946031332015991,
      "learning_rate": 0.0005999101491952608,
      "loss": 3.6183,
      "step": 1795
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.207211494445801,
      "learning_rate": 0.0005999100490600617,
      "loss": 3.7091,
      "step": 1796
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.3608391284942627,
      "learning_rate": 0.0005999099488691037,
      "loss": 3.7066,
      "step": 1797
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.5734059810638428,
      "learning_rate": 0.0005999098486223867,
      "loss": 3.7379,
      "step": 1798
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9282134771347046,
      "learning_rate": 0.0005999097483199108,
      "loss": 3.8403,
      "step": 1799
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7040585279464722,
      "learning_rate": 0.0005999096479616761,
      "loss": 3.6617,
      "step": 1800
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4928617477416992,
      "learning_rate": 0.0005999095475476825,
      "loss": 3.9835,
      "step": 1801
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.7206075191497803,
      "learning_rate": 0.00059990944707793,
      "loss": 3.7482,
      "step": 1802
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.1255600452423096,
      "learning_rate": 0.0005999093465524186,
      "loss": 3.7633,
      "step": 1803
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4627262353897095,
      "learning_rate": 0.0005999092459711485,
      "loss": 3.5632,
      "step": 1804
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.0699963569641113,
      "learning_rate": 0.0005999091453341197,
      "loss": 3.5966,
      "step": 1805
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.978441834449768,
      "learning_rate": 0.000599909044641332,
      "loss": 3.5433,
      "step": 1806
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4390157461166382,
      "learning_rate": 0.0005999089438927856,
      "loss": 3.5343,
      "step": 1807
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6780511140823364,
      "learning_rate": 0.0005999088430884803,
      "loss": 4.2122,
      "step": 1808
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.635258436203003,
      "learning_rate": 0.0005999087422284165,
      "loss": 3.7082,
      "step": 1809
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4682472944259644,
      "learning_rate": 0.000599908641312594,
      "loss": 3.8071,
      "step": 1810
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.638043761253357,
      "learning_rate": 0.0005999085403410127,
      "loss": 3.5486,
      "step": 1811
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8203940391540527,
      "learning_rate": 0.0005999084393136728,
      "loss": 3.7153,
      "step": 1812
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3732255697250366,
      "learning_rate": 0.0005999083382305743,
      "loss": 3.8678,
      "step": 1813
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7846975326538086,
      "learning_rate": 0.000599908237091717,
      "loss": 3.7129,
      "step": 1814
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.21321439743042,
      "learning_rate": 0.0005999081358971013,
      "loss": 3.7157,
      "step": 1815
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.623835563659668,
      "learning_rate": 0.000599908034646727,
      "loss": 3.7612,
      "step": 1816
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1976654529571533,
      "learning_rate": 0.000599907933340594,
      "loss": 3.8732,
      "step": 1817
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4517722129821777,
      "learning_rate": 0.0005999078319787026,
      "loss": 3.7322,
      "step": 1818
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5877946615219116,
      "learning_rate": 0.0005999077305610526,
      "loss": 3.6841,
      "step": 1819
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7763547897338867,
      "learning_rate": 0.0005999076290876441,
      "loss": 3.6711,
      "step": 1820
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.91829514503479,
      "learning_rate": 0.0005999075275584772,
      "loss": 3.5094,
      "step": 1821
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.1499680280685425,
      "learning_rate": 0.0005999074259735517,
      "loss": 3.538,
      "step": 1822
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4453907012939453,
      "learning_rate": 0.0005999073243328677,
      "loss": 3.5762,
      "step": 1823
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7589486837387085,
      "learning_rate": 0.0005999072226364255,
      "loss": 3.5533,
      "step": 1824
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.302225351333618,
      "learning_rate": 0.0005999071208842246,
      "loss": 3.5587,
      "step": 1825
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2986918687820435,
      "learning_rate": 0.0005999070190762655,
      "loss": 3.9315,
      "step": 1826
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.1404035091400146,
      "learning_rate": 0.000599906917212548,
      "loss": 3.777,
      "step": 1827
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3544607162475586,
      "learning_rate": 0.0005999068152930721,
      "loss": 3.5216,
      "step": 1828
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9794572591781616,
      "learning_rate": 0.0005999067133178378,
      "loss": 3.8445,
      "step": 1829
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5473135709762573,
      "learning_rate": 0.0005999066112868453,
      "loss": 3.7608,
      "step": 1830
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4068129062652588,
      "learning_rate": 0.0005999065092000945,
      "loss": 3.8697,
      "step": 1831
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9566736221313477,
      "learning_rate": 0.0005999064070575853,
      "loss": 3.6427,
      "step": 1832
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6787055730819702,
      "learning_rate": 0.000599906304859318,
      "loss": 3.8699,
      "step": 1833
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.460810661315918,
      "learning_rate": 0.0005999062026052923,
      "loss": 3.7068,
      "step": 1834
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4195125102996826,
      "learning_rate": 0.0005999061002955085,
      "loss": 3.6963,
      "step": 1835
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7245063781738281,
      "learning_rate": 0.0005999059979299664,
      "loss": 3.5036,
      "step": 1836
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5806418657302856,
      "learning_rate": 0.0005999058955086661,
      "loss": 3.8083,
      "step": 1837
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4575119018554688,
      "learning_rate": 0.0005999057930316078,
      "loss": 3.8343,
      "step": 1838
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7941946983337402,
      "learning_rate": 0.0005999056904987913,
      "loss": 3.7158,
      "step": 1839
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4809132814407349,
      "learning_rate": 0.0005999055879102165,
      "loss": 3.8001,
      "step": 1840
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.1276915073394775,
      "learning_rate": 0.0005999054852658838,
      "loss": 3.6768,
      "step": 1841
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3452603816986084,
      "learning_rate": 0.0005999053825657928,
      "loss": 3.9386,
      "step": 1842
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4009631872177124,
      "learning_rate": 0.0005999052798099438,
      "loss": 3.9744,
      "step": 1843
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.0498909950256348,
      "learning_rate": 0.0005999051769983369,
      "loss": 3.5026,
      "step": 1844
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5754296779632568,
      "learning_rate": 0.0005999050741309718,
      "loss": 3.4421,
      "step": 1845
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7317051887512207,
      "learning_rate": 0.0005999049712078487,
      "loss": 3.7482,
      "step": 1846
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.002222776412964,
      "learning_rate": 0.0005999048682289676,
      "loss": 3.9259,
      "step": 1847
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.0154378414154053,
      "learning_rate": 0.0005999047651943286,
      "loss": 3.606,
      "step": 1848
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.72527277469635,
      "learning_rate": 0.0005999046621039316,
      "loss": 3.9538,
      "step": 1849
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5186691284179688,
      "learning_rate": 0.0005999045589577767,
      "loss": 3.7727,
      "step": 1850
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3558857440948486,
      "learning_rate": 0.0005999044557558639,
      "loss": 3.7031,
      "step": 1851
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.293955087661743,
      "learning_rate": 0.0005999043524981931,
      "loss": 3.6382,
      "step": 1852
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9035578966140747,
      "learning_rate": 0.0005999042491847646,
      "loss": 3.5415,
      "step": 1853
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7580325603485107,
      "learning_rate": 0.000599904145815578,
      "loss": 3.562,
      "step": 1854
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.021365165710449,
      "learning_rate": 0.0005999040423906337,
      "loss": 3.5392,
      "step": 1855
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.15154767036438,
      "learning_rate": 0.0005999039389099315,
      "loss": 3.5581,
      "step": 1856
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6136361360549927,
      "learning_rate": 0.0005999038353734716,
      "loss": 3.8322,
      "step": 1857
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.90420401096344,
      "learning_rate": 0.0005999037317812538,
      "loss": 3.7026,
      "step": 1858
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.672975778579712,
      "learning_rate": 0.0005999036281332783,
      "loss": 3.5835,
      "step": 1859
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6667441129684448,
      "learning_rate": 0.0005999035244295451,
      "loss": 3.5375,
      "step": 1860
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3552829027175903,
      "learning_rate": 0.0005999034206700541,
      "loss": 3.5594,
      "step": 1861
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4328112602233887,
      "learning_rate": 0.0005999033168548055,
      "loss": 3.8968,
      "step": 1862
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.656153917312622,
      "learning_rate": 0.000599903212983799,
      "loss": 3.6376,
      "step": 1863
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.657843828201294,
      "learning_rate": 0.0005999031090570349,
      "loss": 3.872,
      "step": 1864
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8153791427612305,
      "learning_rate": 0.0005999030050745133,
      "loss": 3.7735,
      "step": 1865
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3617987632751465,
      "learning_rate": 0.000599902901036234,
      "loss": 3.5439,
      "step": 1866
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4945690631866455,
      "learning_rate": 0.000599902796942197,
      "loss": 3.5601,
      "step": 1867
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9191471338272095,
      "learning_rate": 0.0005999026927924026,
      "loss": 3.5075,
      "step": 1868
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.5905747413635254,
      "learning_rate": 0.0005999025885868504,
      "loss": 3.6276,
      "step": 1869
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4599076509475708,
      "learning_rate": 0.0005999024843255409,
      "loss": 3.5047,
      "step": 1870
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.540889263153076,
      "learning_rate": 0.0005999023800084736,
      "loss": 3.6106,
      "step": 1871
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.6404908895492554,
      "learning_rate": 0.000599902275635649,
      "loss": 3.7029,
      "step": 1872
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3829013109207153,
      "learning_rate": 0.0005999021712070668,
      "loss": 3.8439,
      "step": 1873
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.0294079780578613,
      "learning_rate": 0.0005999020667227271,
      "loss": 3.7588,
      "step": 1874
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.3719139099121094,
      "learning_rate": 0.0005999019621826299,
      "loss": 3.5911,
      "step": 1875
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.60398268699646,
      "learning_rate": 0.0005999018575867754,
      "loss": 3.6493,
      "step": 1876
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.104818820953369,
      "learning_rate": 0.0005999017529351636,
      "loss": 3.7573,
      "step": 1877
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.955393075942993,
      "learning_rate": 0.000599901648227794,
      "loss": 3.5521,
      "step": 1878
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.2645862102508545,
      "learning_rate": 0.0005999015434646674,
      "loss": 3.9006,
      "step": 1879
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4719822406768799,
      "learning_rate": 0.0005999014386457832,
      "loss": 3.7946,
      "step": 1880
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.2920265197753906,
      "learning_rate": 0.0005999013337711418,
      "loss": 3.6955,
      "step": 1881
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.6402668952941895,
      "learning_rate": 0.000599901228840743,
      "loss": 3.591,
      "step": 1882
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.141850709915161,
      "learning_rate": 0.000599901123854587,
      "loss": 3.5202,
      "step": 1883
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.06925106048584,
      "learning_rate": 0.0005999010188126737,
      "loss": 3.6955,
      "step": 1884
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.9665896892547607,
      "learning_rate": 0.0005999009137150031,
      "loss": 3.6844,
      "step": 1885
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.862461805343628,
      "learning_rate": 0.0005999008085615752,
      "loss": 3.7004,
      "step": 1886
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3115426301956177,
      "learning_rate": 0.0005999007033523901,
      "loss": 3.5124,
      "step": 1887
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.761502265930176,
      "learning_rate": 0.000599900598087448,
      "loss": 3.4183,
      "step": 1888
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.8236145973205566,
      "learning_rate": 0.0005999004927667484,
      "loss": 3.8211,
      "step": 1889
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.770970582962036,
      "learning_rate": 0.0005999003873902918,
      "loss": 3.5498,
      "step": 1890
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4879528284072876,
      "learning_rate": 0.0005999002819580781,
      "loss": 3.7261,
      "step": 1891
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.5492899417877197,
      "learning_rate": 0.0005999001764701073,
      "loss": 3.4757,
      "step": 1892
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.8392021656036377,
      "learning_rate": 0.0005999000709263792,
      "loss": 3.5905,
      "step": 1893
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8891198635101318,
      "learning_rate": 0.0005998999653268942,
      "loss": 3.7413,
      "step": 1894
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8850833177566528,
      "learning_rate": 0.0005998998596716521,
      "loss": 4.0385,
      "step": 1895
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.09382963180542,
      "learning_rate": 0.0005998997539606529,
      "loss": 3.7791,
      "step": 1896
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.3464787006378174,
      "learning_rate": 0.0005998996481938968,
      "loss": 3.3927,
      "step": 1897
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.870481014251709,
      "learning_rate": 0.0005998995423713835,
      "loss": 3.6967,
      "step": 1898
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.4733588695526123,
      "learning_rate": 0.0005998994364931133,
      "loss": 3.6234,
      "step": 1899
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.8407459259033203,
      "learning_rate": 0.0005998993305590862,
      "loss": 3.6856,
      "step": 1900
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.7810375690460205,
      "learning_rate": 0.000599899224569302,
      "loss": 3.6269,
      "step": 1901
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.826534390449524,
      "learning_rate": 0.000599899118523761,
      "loss": 3.767,
      "step": 1902
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.9541800022125244,
      "learning_rate": 0.0005998990124224631,
      "loss": 3.679,
      "step": 1903
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.272453784942627,
      "learning_rate": 0.0005998989062654083,
      "loss": 3.6241,
      "step": 1904
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.676974892616272,
      "learning_rate": 0.0005998988000525966,
      "loss": 3.7544,
      "step": 1905
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8056080341339111,
      "learning_rate": 0.000599898693784028,
      "loss": 3.6584,
      "step": 1906
    },
    {
      "epoch": 0.02,
      "grad_norm": 3.3586180210113525,
      "learning_rate": 0.0005998985874597027,
      "loss": 3.7108,
      "step": 1907
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7930446863174438,
      "learning_rate": 0.0005998984810796204,
      "loss": 3.7197,
      "step": 1908
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4799107313156128,
      "learning_rate": 0.0005998983746437816,
      "loss": 3.7417,
      "step": 1909
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8746531009674072,
      "learning_rate": 0.0005998982681521858,
      "loss": 3.6114,
      "step": 1910
    },
    {
      "epoch": 0.02,
      "grad_norm": 2.3836421966552734,
      "learning_rate": 0.0005998981616048333,
      "loss": 3.6557,
      "step": 1911
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7682952880859375,
      "learning_rate": 0.0005998980550017242,
      "loss": 3.6471,
      "step": 1912
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.422993779182434,
      "learning_rate": 0.0005998979483428582,
      "loss": 3.5135,
      "step": 1913
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.2287012338638306,
      "learning_rate": 0.0005998978416282356,
      "loss": 3.8583,
      "step": 1914
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7789231538772583,
      "learning_rate": 0.0005998977348578563,
      "loss": 3.6368,
      "step": 1915
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.7425721883773804,
      "learning_rate": 0.0005998976280317204,
      "loss": 3.6649,
      "step": 1916
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.3045722246170044,
      "learning_rate": 0.0005998975211498278,
      "loss": 3.6768,
      "step": 1917
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4896165132522583,
      "learning_rate": 0.0005998974142121786,
      "loss": 3.6211,
      "step": 1918
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.4686719179153442,
      "learning_rate": 0.0005998973072187729,
      "loss": 3.6017,
      "step": 1919
    },
    {
      "epoch": 0.02,
      "grad_norm": 1.8638960123062134,
      "learning_rate": 0.0005998972001696105,
      "loss": 3.9282,
      "step": 1920
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.443198800086975,
      "learning_rate": 0.0005998970930646916,
      "loss": 3.6807,
      "step": 1921
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3339167833328247,
      "learning_rate": 0.0005998969859040162,
      "loss": 3.6653,
      "step": 1922
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.218509554862976,
      "learning_rate": 0.0005998968786875843,
      "loss": 3.6684,
      "step": 1923
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8146610260009766,
      "learning_rate": 0.0005998967714153959,
      "loss": 3.5601,
      "step": 1924
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.632616639137268,
      "learning_rate": 0.0005998966640874511,
      "loss": 3.729,
      "step": 1925
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4411653280258179,
      "learning_rate": 0.0005998965567037497,
      "loss": 3.9993,
      "step": 1926
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1227221488952637,
      "learning_rate": 0.0005998964492642919,
      "loss": 3.4698,
      "step": 1927
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6221455335617065,
      "learning_rate": 0.0005998963417690778,
      "loss": 3.7995,
      "step": 1928
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4880012273788452,
      "learning_rate": 0.0005998962342181072,
      "loss": 3.8524,
      "step": 1929
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0287327766418457,
      "learning_rate": 0.0005998961266113803,
      "loss": 3.6729,
      "step": 1930
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.355573296546936,
      "learning_rate": 0.000599896018948897,
      "loss": 3.5973,
      "step": 1931
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9863187074661255,
      "learning_rate": 0.0005998959112306574,
      "loss": 3.5818,
      "step": 1932
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.657027244567871,
      "learning_rate": 0.0005998958034566614,
      "loss": 3.4528,
      "step": 1933
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6569743156433105,
      "learning_rate": 0.0005998956956269092,
      "loss": 3.7085,
      "step": 1934
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7621337175369263,
      "learning_rate": 0.0005998955877414007,
      "loss": 3.611,
      "step": 1935
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.55948805809021,
      "learning_rate": 0.000599895479800136,
      "loss": 3.7632,
      "step": 1936
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.7965025901794434,
      "learning_rate": 0.000599895371803115,
      "loss": 3.669,
      "step": 1937
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4747426509857178,
      "learning_rate": 0.0005998952637503378,
      "loss": 4.0753,
      "step": 1938
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3880841732025146,
      "learning_rate": 0.0005998951556418045,
      "loss": 3.8722,
      "step": 1939
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7663648128509521,
      "learning_rate": 0.0005998950474775149,
      "loss": 3.7139,
      "step": 1940
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7330970764160156,
      "learning_rate": 0.0005998949392574692,
      "loss": 3.5285,
      "step": 1941
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6909385919570923,
      "learning_rate": 0.0005998948309816674,
      "loss": 3.8428,
      "step": 1942
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6498862504959106,
      "learning_rate": 0.0005998947226501095,
      "loss": 3.8921,
      "step": 1943
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2314939498901367,
      "learning_rate": 0.0005998946142627954,
      "loss": 3.7808,
      "step": 1944
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4693683385849,
      "learning_rate": 0.0005998945058197253,
      "loss": 3.6997,
      "step": 1945
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4741013050079346,
      "learning_rate": 0.0005998943973208992,
      "loss": 3.4982,
      "step": 1946
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.737250566482544,
      "learning_rate": 0.0005998942887663171,
      "loss": 3.3685,
      "step": 1947
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4470276832580566,
      "learning_rate": 0.0005998941801559789,
      "loss": 3.5322,
      "step": 1948
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.894762635231018,
      "learning_rate": 0.0005998940714898847,
      "loss": 3.9166,
      "step": 1949
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5405782461166382,
      "learning_rate": 0.0005998939627680346,
      "loss": 3.4949,
      "step": 1950
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.2116782665252686,
      "learning_rate": 0.0005998938539904284,
      "loss": 3.2519,
      "step": 1951
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3905428647994995,
      "learning_rate": 0.0005998937451570665,
      "loss": 3.7635,
      "step": 1952
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.420893907546997,
      "learning_rate": 0.0005998936362679485,
      "loss": 3.6823,
      "step": 1953
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4574638605117798,
      "learning_rate": 0.0005998935273230747,
      "loss": 3.9098,
      "step": 1954
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.837804913520813,
      "learning_rate": 0.000599893418322445,
      "loss": 3.4481,
      "step": 1955
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.651178240776062,
      "learning_rate": 0.0005998933092660595,
      "loss": 3.5796,
      "step": 1956
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8459166288375854,
      "learning_rate": 0.0005998932001539182,
      "loss": 3.6732,
      "step": 1957
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5152490139007568,
      "learning_rate": 0.0005998930909860209,
      "loss": 3.7673,
      "step": 1958
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.299884557723999,
      "learning_rate": 0.000599892981762368,
      "loss": 3.5119,
      "step": 1959
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7828831672668457,
      "learning_rate": 0.0005998928724829592,
      "loss": 3.4256,
      "step": 1960
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6701043844223022,
      "learning_rate": 0.0005998927631477948,
      "loss": 3.5096,
      "step": 1961
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7653988599777222,
      "learning_rate": 0.0005998926537568747,
      "loss": 3.8224,
      "step": 1962
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6829780340194702,
      "learning_rate": 0.0005998925443101988,
      "loss": 3.7116,
      "step": 1963
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5050948858261108,
      "learning_rate": 0.0005998924348077671,
      "loss": 3.558,
      "step": 1964
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5064876079559326,
      "learning_rate": 0.0005998923252495799,
      "loss": 3.6308,
      "step": 1965
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.589798092842102,
      "learning_rate": 0.0005998922156356369,
      "loss": 3.7676,
      "step": 1966
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0848655700683594,
      "learning_rate": 0.0005998921059659385,
      "loss": 3.8181,
      "step": 1967
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7984343767166138,
      "learning_rate": 0.0005998919962404844,
      "loss": 3.7375,
      "step": 1968
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.726387858390808,
      "learning_rate": 0.0005998918864592747,
      "loss": 3.4744,
      "step": 1969
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1200473308563232,
      "learning_rate": 0.0005998917766223094,
      "loss": 3.5855,
      "step": 1970
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.6727213859558105,
      "learning_rate": 0.0005998916667295886,
      "loss": 3.4827,
      "step": 1971
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8436161279678345,
      "learning_rate": 0.0005998915567811123,
      "loss": 3.5529,
      "step": 1972
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1806492805480957,
      "learning_rate": 0.0005998914467768804,
      "loss": 3.7204,
      "step": 1973
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6911276578903198,
      "learning_rate": 0.0005998913367168932,
      "loss": 3.3954,
      "step": 1974
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3608084917068481,
      "learning_rate": 0.0005998912266011505,
      "loss": 3.7234,
      "step": 1975
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5003411769866943,
      "learning_rate": 0.0005998911164296522,
      "loss": 3.6298,
      "step": 1976
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6444990634918213,
      "learning_rate": 0.0005998910062023986,
      "loss": 3.4205,
      "step": 1977
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4582842588424683,
      "learning_rate": 0.0005998908959193895,
      "loss": 3.6621,
      "step": 1978
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7473970651626587,
      "learning_rate": 0.0005998907855806251,
      "loss": 3.6223,
      "step": 1979
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.219076156616211,
      "learning_rate": 0.0005998906751861054,
      "loss": 3.527,
      "step": 1980
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.508943796157837,
      "learning_rate": 0.0005998905647358303,
      "loss": 3.59,
      "step": 1981
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3806663751602173,
      "learning_rate": 0.0005998904542297999,
      "loss": 3.6296,
      "step": 1982
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9274550676345825,
      "learning_rate": 0.0005998903436680142,
      "loss": 3.7494,
      "step": 1983
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4925354719161987,
      "learning_rate": 0.0005998902330504732,
      "loss": 3.7414,
      "step": 1984
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5115774869918823,
      "learning_rate": 0.0005998901223771769,
      "loss": 3.6822,
      "step": 1985
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4224963188171387,
      "learning_rate": 0.0005998900116481254,
      "loss": 3.708,
      "step": 1986
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7584048509597778,
      "learning_rate": 0.0005998899008633188,
      "loss": 3.6839,
      "step": 1987
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.8086864948272705,
      "learning_rate": 0.000599889790022757,
      "loss": 3.6401,
      "step": 1988
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3854631185531616,
      "learning_rate": 0.0005998896791264399,
      "loss": 4.066,
      "step": 1989
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4085620641708374,
      "learning_rate": 0.0005998895681743676,
      "loss": 3.8819,
      "step": 1990
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7703896760940552,
      "learning_rate": 0.0005998894571665403,
      "loss": 3.644,
      "step": 1991
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6906729936599731,
      "learning_rate": 0.0005998893461029579,
      "loss": 3.7715,
      "step": 1992
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.677276849746704,
      "learning_rate": 0.0005998892349836203,
      "loss": 3.9932,
      "step": 1993
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.2131457328796387,
      "learning_rate": 0.0005998891238085277,
      "loss": 3.6351,
      "step": 1994
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1945199966430664,
      "learning_rate": 0.00059988901257768,
      "loss": 3.3998,
      "step": 1995
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6888047456741333,
      "learning_rate": 0.0005998889012910774,
      "loss": 3.5743,
      "step": 1996
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.645776629447937,
      "learning_rate": 0.0005998887899487196,
      "loss": 3.4939,
      "step": 1997
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1324150562286377,
      "learning_rate": 0.000599888678550607,
      "loss": 3.6063,
      "step": 1998
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6154372692108154,
      "learning_rate": 0.0005998885670967393,
      "loss": 3.7131,
      "step": 1999
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8422495126724243,
      "learning_rate": 0.0005998884555871167,
      "loss": 3.734,
      "step": 2000
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.789037823677063,
      "learning_rate": 0.0005998883440217393,
      "loss": 3.8297,
      "step": 2001
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.637280225753784,
      "learning_rate": 0.0005998882324006068,
      "loss": 3.7472,
      "step": 2002
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8682221174240112,
      "learning_rate": 0.0005998881207237195,
      "loss": 3.7986,
      "step": 2003
    },
    {
      "epoch": 0.03,
      "grad_norm": 4.832018852233887,
      "learning_rate": 0.0005998880089910773,
      "loss": 3.7006,
      "step": 2004
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6818053722381592,
      "learning_rate": 0.0005998878972026803,
      "loss": 3.556,
      "step": 2005
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5117943286895752,
      "learning_rate": 0.0005998877853585284,
      "loss": 3.5799,
      "step": 2006
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.537055492401123,
      "learning_rate": 0.0005998876734586218,
      "loss": 3.744,
      "step": 2007
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.250654935836792,
      "learning_rate": 0.0005998875615029604,
      "loss": 3.6213,
      "step": 2008
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3488703966140747,
      "learning_rate": 0.0005998874494915443,
      "loss": 3.8234,
      "step": 2009
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7243199348449707,
      "learning_rate": 0.0005998873374243734,
      "loss": 3.8791,
      "step": 2010
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.2527196407318115,
      "learning_rate": 0.0005998872253014478,
      "loss": 3.8512,
      "step": 2011
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3656296730041504,
      "learning_rate": 0.0005998871131227675,
      "loss": 3.6887,
      "step": 2012
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.521893858909607,
      "learning_rate": 0.0005998870008883324,
      "loss": 3.5492,
      "step": 2013
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.348037838935852,
      "learning_rate": 0.0005998868885981428,
      "loss": 3.5357,
      "step": 2014
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2303121089935303,
      "learning_rate": 0.0005998867762521985,
      "loss": 3.5069,
      "step": 2015
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4170578718185425,
      "learning_rate": 0.0005998866638504996,
      "loss": 3.8269,
      "step": 2016
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.420331597328186,
      "learning_rate": 0.0005998865513930461,
      "loss": 3.5816,
      "step": 2017
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.437221884727478,
      "learning_rate": 0.0005998864388798381,
      "loss": 3.6774,
      "step": 2018
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3781578540802002,
      "learning_rate": 0.0005998863263108755,
      "loss": 3.5904,
      "step": 2019
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3893359899520874,
      "learning_rate": 0.0005998862136861583,
      "loss": 3.5224,
      "step": 2020
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3745425939559937,
      "learning_rate": 0.0005998861010056867,
      "loss": 3.645,
      "step": 2021
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.6864752769470215,
      "learning_rate": 0.0005998859882694605,
      "loss": 3.5349,
      "step": 2022
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.7479567527770996,
      "learning_rate": 0.00059988587547748,
      "loss": 3.865,
      "step": 2023
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5217658281326294,
      "learning_rate": 0.0005998857626297449,
      "loss": 3.469,
      "step": 2024
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.2763211727142334,
      "learning_rate": 0.0005998856497262554,
      "loss": 3.5534,
      "step": 2025
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4895350933074951,
      "learning_rate": 0.0005998855367670116,
      "loss": 3.6825,
      "step": 2026
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4309110641479492,
      "learning_rate": 0.0005998854237520134,
      "loss": 3.7308,
      "step": 2027
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.154907464981079,
      "learning_rate": 0.0005998853106812608,
      "loss": 3.4601,
      "step": 2028
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5451358556747437,
      "learning_rate": 0.0005998851975547538,
      "loss": 3.6138,
      "step": 2029
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3952710628509521,
      "learning_rate": 0.0005998850843724925,
      "loss": 3.4712,
      "step": 2030
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.5950355529785156,
      "learning_rate": 0.0005998849711344768,
      "loss": 3.5192,
      "step": 2031
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7674438953399658,
      "learning_rate": 0.0005998848578407071,
      "loss": 3.6302,
      "step": 2032
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5939462184906006,
      "learning_rate": 0.000599884744491183,
      "loss": 3.8396,
      "step": 2033
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.309105634689331,
      "learning_rate": 0.0005998846310859046,
      "loss": 3.4515,
      "step": 2034
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.0053341388702393,
      "learning_rate": 0.000599884517624872,
      "loss": 3.4322,
      "step": 2035
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5623539686203003,
      "learning_rate": 0.0005998844041080853,
      "loss": 3.4982,
      "step": 2036
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.142820358276367,
      "learning_rate": 0.0005998842905355444,
      "loss": 3.7263,
      "step": 2037
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0706448554992676,
      "learning_rate": 0.0005998841769072493,
      "loss": 3.6918,
      "step": 2038
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5234901905059814,
      "learning_rate": 0.0005998840632232001,
      "loss": 3.4588,
      "step": 2039
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9971779584884644,
      "learning_rate": 0.0005998839494833968,
      "loss": 3.5711,
      "step": 2040
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1542413234710693,
      "learning_rate": 0.0005998838356878394,
      "loss": 3.5744,
      "step": 2041
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8865922689437866,
      "learning_rate": 0.000599883721836528,
      "loss": 3.6276,
      "step": 2042
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.5972373485565186,
      "learning_rate": 0.0005998836079294625,
      "loss": 3.8323,
      "step": 2043
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.505590558052063,
      "learning_rate": 0.0005998834939666429,
      "loss": 3.7296,
      "step": 2044
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4759466648101807,
      "learning_rate": 0.0005998833799480695,
      "loss": 3.4376,
      "step": 2045
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4051508903503418,
      "learning_rate": 0.0005998832658737419,
      "loss": 3.6267,
      "step": 2046
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8770744800567627,
      "learning_rate": 0.0005998831517436604,
      "loss": 3.6006,
      "step": 2047
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3049654960632324,
      "learning_rate": 0.0005998830375578251,
      "loss": 3.889,
      "step": 2048
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.867844581604004,
      "learning_rate": 0.0005998829233162357,
      "loss": 3.7482,
      "step": 2049
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5265758037567139,
      "learning_rate": 0.0005998828090188925,
      "loss": 3.6348,
      "step": 2050
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3777679204940796,
      "learning_rate": 0.0005998826946657954,
      "loss": 3.619,
      "step": 2051
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7734018564224243,
      "learning_rate": 0.0005998825802569444,
      "loss": 3.5499,
      "step": 2052
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.528625726699829,
      "learning_rate": 0.0005998824657923397,
      "loss": 3.4587,
      "step": 2053
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.414129376411438,
      "learning_rate": 0.0005998823512719812,
      "loss": 3.9199,
      "step": 2054
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8035776615142822,
      "learning_rate": 0.0005998822366958688,
      "loss": 3.7237,
      "step": 2055
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.625170350074768,
      "learning_rate": 0.0005998821220640026,
      "loss": 3.9515,
      "step": 2056
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4242185354232788,
      "learning_rate": 0.0005998820073763826,
      "loss": 3.8489,
      "step": 2057
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.162391185760498,
      "learning_rate": 0.0005998818926330091,
      "loss": 3.7086,
      "step": 2058
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.610994577407837,
      "learning_rate": 0.0005998817778338817,
      "loss": 3.5401,
      "step": 2059
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3435766696929932,
      "learning_rate": 0.0005998816629790007,
      "loss": 3.8572,
      "step": 2060
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9600238800048828,
      "learning_rate": 0.0005998815480683661,
      "loss": 4.008,
      "step": 2061
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.471670150756836,
      "learning_rate": 0.0005998814331019777,
      "loss": 3.6529,
      "step": 2062
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.885367751121521,
      "learning_rate": 0.0005998813180798358,
      "loss": 3.6016,
      "step": 2063
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7189431190490723,
      "learning_rate": 0.0005998812030019403,
      "loss": 3.623,
      "step": 2064
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8321528434753418,
      "learning_rate": 0.0005998810878682912,
      "loss": 3.7045,
      "step": 2065
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5840718746185303,
      "learning_rate": 0.0005998809726788885,
      "loss": 3.507,
      "step": 2066
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4803611040115356,
      "learning_rate": 0.0005998808574337324,
      "loss": 3.604,
      "step": 2067
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.529719829559326,
      "learning_rate": 0.0005998807421328227,
      "loss": 3.5882,
      "step": 2068
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.655806303024292,
      "learning_rate": 0.0005998806267761595,
      "loss": 3.8696,
      "step": 2069
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6941019296646118,
      "learning_rate": 0.0005998805113637428,
      "loss": 3.8406,
      "step": 2070
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3190038204193115,
      "learning_rate": 0.0005998803958955727,
      "loss": 3.6888,
      "step": 2071
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.3210809230804443,
      "learning_rate": 0.0005998802803716491,
      "loss": 3.6144,
      "step": 2072
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3594688177108765,
      "learning_rate": 0.0005998801647919722,
      "loss": 3.6099,
      "step": 2073
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.418267011642456,
      "learning_rate": 0.0005998800491565419,
      "loss": 3.6801,
      "step": 2074
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.414000153541565,
      "learning_rate": 0.0005998799334653582,
      "loss": 3.6235,
      "step": 2075
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8307911157608032,
      "learning_rate": 0.0005998798177184212,
      "loss": 3.7501,
      "step": 2076
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3156919479370117,
      "learning_rate": 0.0005998797019157308,
      "loss": 3.8703,
      "step": 2077
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.687819480895996,
      "learning_rate": 0.0005998795860572871,
      "loss": 3.5238,
      "step": 2078
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3884062767028809,
      "learning_rate": 0.0005998794701430903,
      "loss": 3.4441,
      "step": 2079
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.858708143234253,
      "learning_rate": 0.00059987935417314,
      "loss": 3.6929,
      "step": 2080
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.309860110282898,
      "learning_rate": 0.0005998792381474367,
      "loss": 3.8308,
      "step": 2081
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5619982481002808,
      "learning_rate": 0.00059987912206598,
      "loss": 3.6903,
      "step": 2082
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6084611415863037,
      "learning_rate": 0.0005998790059287703,
      "loss": 3.5127,
      "step": 2083
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5328854322433472,
      "learning_rate": 0.0005998788897358073,
      "loss": 3.4336,
      "step": 2084
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6028575897216797,
      "learning_rate": 0.0005998787734870911,
      "loss": 3.8774,
      "step": 2085
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.478498101234436,
      "learning_rate": 0.0005998786571826219,
      "loss": 3.8127,
      "step": 2086
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.1126887798309326,
      "learning_rate": 0.0005998785408223994,
      "loss": 3.7381,
      "step": 2087
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4053586721420288,
      "learning_rate": 0.000599878424406424,
      "loss": 3.5736,
      "step": 2088
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6316832304000854,
      "learning_rate": 0.0005998783079346955,
      "loss": 3.7164,
      "step": 2089
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4514168500900269,
      "learning_rate": 0.0005998781914072141,
      "loss": 3.4973,
      "step": 2090
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.008733034133911,
      "learning_rate": 0.0005998780748239795,
      "loss": 3.4275,
      "step": 2091
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6054900884628296,
      "learning_rate": 0.0005998779581849919,
      "loss": 3.6062,
      "step": 2092
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5833148956298828,
      "learning_rate": 0.0005998778414902515,
      "loss": 3.7445,
      "step": 2093
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5003974437713623,
      "learning_rate": 0.000599877724739758,
      "loss": 3.6088,
      "step": 2094
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5902361869812012,
      "learning_rate": 0.0005998776079335117,
      "loss": 3.7112,
      "step": 2095
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.909171223640442,
      "learning_rate": 0.0005998774910715123,
      "loss": 3.5535,
      "step": 2096
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7701836824417114,
      "learning_rate": 0.00059987737415376,
      "loss": 3.3845,
      "step": 2097
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8520704507827759,
      "learning_rate": 0.000599877257180255,
      "loss": 3.5232,
      "step": 2098
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6093977689743042,
      "learning_rate": 0.0005998771401509971,
      "loss": 3.4621,
      "step": 2099
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.7202131748199463,
      "learning_rate": 0.0005998770230659864,
      "loss": 3.6298,
      "step": 2100
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.257957696914673,
      "learning_rate": 0.0005998769059252229,
      "loss": 3.7312,
      "step": 2101
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.694109320640564,
      "learning_rate": 0.0005998767887287066,
      "loss": 3.6742,
      "step": 2102
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.093811273574829,
      "learning_rate": 0.0005998766714764376,
      "loss": 3.7867,
      "step": 2103
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7521874904632568,
      "learning_rate": 0.0005998765541684157,
      "loss": 3.6559,
      "step": 2104
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.722640037536621,
      "learning_rate": 0.0005998764368046413,
      "loss": 3.6811,
      "step": 2105
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.457780122756958,
      "learning_rate": 0.0005998763193851141,
      "loss": 3.2992,
      "step": 2106
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.438133955001831,
      "learning_rate": 0.0005998762019098343,
      "loss": 3.969,
      "step": 2107
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4246906042099,
      "learning_rate": 0.0005998760843788017,
      "loss": 3.394,
      "step": 2108
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6616766452789307,
      "learning_rate": 0.0005998759667920166,
      "loss": 3.7518,
      "step": 2109
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8088079690933228,
      "learning_rate": 0.000599875849149479,
      "loss": 3.4466,
      "step": 2110
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5670703649520874,
      "learning_rate": 0.0005998757314511886,
      "loss": 3.4605,
      "step": 2111
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.496730089187622,
      "learning_rate": 0.0005998756136971457,
      "loss": 3.7385,
      "step": 2112
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.507476806640625,
      "learning_rate": 0.0005998754958873503,
      "loss": 3.6132,
      "step": 2113
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1090540885925293,
      "learning_rate": 0.0005998753780218024,
      "loss": 3.7915,
      "step": 2114
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8381139039993286,
      "learning_rate": 0.000599875260100502,
      "loss": 3.7633,
      "step": 2115
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1859757900238037,
      "learning_rate": 0.0005998751421234491,
      "loss": 3.8693,
      "step": 2116
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.314470887184143,
      "learning_rate": 0.0005998750240906437,
      "loss": 3.6168,
      "step": 2117
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.549623727798462,
      "learning_rate": 0.000599874906002086,
      "loss": 3.682,
      "step": 2118
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7462103366851807,
      "learning_rate": 0.0005998747878577759,
      "loss": 3.2707,
      "step": 2119
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.453573226928711,
      "learning_rate": 0.0005998746696577133,
      "loss": 3.6613,
      "step": 2120
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6760344505310059,
      "learning_rate": 0.0005998745514018983,
      "loss": 3.4435,
      "step": 2121
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8763539791107178,
      "learning_rate": 0.0005998744330903311,
      "loss": 3.5926,
      "step": 2122
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6667143106460571,
      "learning_rate": 0.0005998743147230116,
      "loss": 3.6919,
      "step": 2123
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1217470169067383,
      "learning_rate": 0.0005998741962999397,
      "loss": 3.2416,
      "step": 2124
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6834278106689453,
      "learning_rate": 0.0005998740778211156,
      "loss": 3.8248,
      "step": 2125
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6244486570358276,
      "learning_rate": 0.0005998739592865392,
      "loss": 3.7405,
      "step": 2126
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4909552335739136,
      "learning_rate": 0.0005998738406962106,
      "loss": 3.5368,
      "step": 2127
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.161877155303955,
      "learning_rate": 0.0005998737220501298,
      "loss": 3.5978,
      "step": 2128
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5291874408721924,
      "learning_rate": 0.0005998736033482967,
      "loss": 3.7376,
      "step": 2129
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4837639331817627,
      "learning_rate": 0.0005998734845907114,
      "loss": 3.5628,
      "step": 2130
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.021448850631714,
      "learning_rate": 0.0005998733657773742,
      "loss": 3.4894,
      "step": 2131
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.836639404296875,
      "learning_rate": 0.0005998732469082847,
      "loss": 3.8186,
      "step": 2132
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6179804801940918,
      "learning_rate": 0.0005998731279834432,
      "loss": 3.7692,
      "step": 2133
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7977814674377441,
      "learning_rate": 0.0005998730090028496,
      "loss": 3.7931,
      "step": 2134
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0552117824554443,
      "learning_rate": 0.0005998728899665039,
      "loss": 3.4709,
      "step": 2135
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5485789775848389,
      "learning_rate": 0.0005998727708744062,
      "loss": 3.4388,
      "step": 2136
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.276848077774048,
      "learning_rate": 0.0005998726517265565,
      "loss": 3.4467,
      "step": 2137
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.469168186187744,
      "learning_rate": 0.0005998725325229549,
      "loss": 3.5199,
      "step": 2138
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5588308572769165,
      "learning_rate": 0.0005998724132636012,
      "loss": 3.6621,
      "step": 2139
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9656912088394165,
      "learning_rate": 0.0005998722939484956,
      "loss": 3.6643,
      "step": 2140
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4877334833145142,
      "learning_rate": 0.0005998721745776381,
      "loss": 3.5742,
      "step": 2141
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9654786586761475,
      "learning_rate": 0.0005998720551510287,
      "loss": 3.5627,
      "step": 2142
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6867048740386963,
      "learning_rate": 0.0005998719356686674,
      "loss": 3.4413,
      "step": 2143
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9227840900421143,
      "learning_rate": 0.0005998718161305543,
      "loss": 3.566,
      "step": 2144
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8313058614730835,
      "learning_rate": 0.0005998716965366893,
      "loss": 3.653,
      "step": 2145
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.823584794998169,
      "learning_rate": 0.0005998715768870725,
      "loss": 3.4593,
      "step": 2146
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7461841106414795,
      "learning_rate": 0.0005998714571817038,
      "loss": 3.6048,
      "step": 2147
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.907063603401184,
      "learning_rate": 0.0005998713374205835,
      "loss": 3.7196,
      "step": 2148
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1418325901031494,
      "learning_rate": 0.0005998712176037115,
      "loss": 3.7022,
      "step": 2149
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8613998889923096,
      "learning_rate": 0.0005998710977310876,
      "loss": 3.6472,
      "step": 2150
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1491971015930176,
      "learning_rate": 0.0005998709778027121,
      "loss": 3.4322,
      "step": 2151
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.817254662513733,
      "learning_rate": 0.000599870857818585,
      "loss": 3.6017,
      "step": 2152
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8241193294525146,
      "learning_rate": 0.0005998707377787063,
      "loss": 3.4994,
      "step": 2153
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7052886486053467,
      "learning_rate": 0.0005998706176830758,
      "loss": 3.5254,
      "step": 2154
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.838470697402954,
      "learning_rate": 0.0005998704975316938,
      "loss": 3.6953,
      "step": 2155
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0259857177734375,
      "learning_rate": 0.00059987037732456,
      "loss": 3.6411,
      "step": 2156
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3096460103988647,
      "learning_rate": 0.0005998702570616749,
      "loss": 3.6547,
      "step": 2157
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5612778663635254,
      "learning_rate": 0.0005998701367430381,
      "loss": 3.6825,
      "step": 2158
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7297849655151367,
      "learning_rate": 0.0005998700163686498,
      "loss": 3.5173,
      "step": 2159
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6232930421829224,
      "learning_rate": 0.0005998698959385101,
      "loss": 3.2609,
      "step": 2160
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.747239351272583,
      "learning_rate": 0.0005998697754526188,
      "loss": 3.6679,
      "step": 2161
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7949955463409424,
      "learning_rate": 0.0005998696549109761,
      "loss": 3.4679,
      "step": 2162
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.447849750518799,
      "learning_rate": 0.000599869534313582,
      "loss": 3.4495,
      "step": 2163
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5908381938934326,
      "learning_rate": 0.0005998694136604365,
      "loss": 3.5145,
      "step": 2164
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8841921091079712,
      "learning_rate": 0.0005998692929515396,
      "loss": 3.8069,
      "step": 2165
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4254429340362549,
      "learning_rate": 0.0005998691721868913,
      "loss": 3.8021,
      "step": 2166
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.3183538913726807,
      "learning_rate": 0.0005998690513664917,
      "loss": 3.8353,
      "step": 2167
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6661624908447266,
      "learning_rate": 0.0005998689304903407,
      "loss": 3.7927,
      "step": 2168
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.4540953636169434,
      "learning_rate": 0.0005998688095584386,
      "loss": 3.6338,
      "step": 2169
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.042335033416748,
      "learning_rate": 0.000599868688570785,
      "loss": 3.4826,
      "step": 2170
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9175500869750977,
      "learning_rate": 0.0005998685675273803,
      "loss": 3.9193,
      "step": 2171
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9967896938323975,
      "learning_rate": 0.0005998684464282243,
      "loss": 3.7438,
      "step": 2172
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.3847832679748535,
      "learning_rate": 0.0005998683252733171,
      "loss": 3.6492,
      "step": 2173
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7949873208999634,
      "learning_rate": 0.0005998682040626587,
      "loss": 3.5243,
      "step": 2174
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.2884013652801514,
      "learning_rate": 0.0005998680827962492,
      "loss": 3.6596,
      "step": 2175
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.8596549034118652,
      "learning_rate": 0.0005998679614740885,
      "loss": 3.6903,
      "step": 2176
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7144038677215576,
      "learning_rate": 0.0005998678400961768,
      "loss": 3.7915,
      "step": 2177
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9347976446151733,
      "learning_rate": 0.000599867718662514,
      "loss": 3.77,
      "step": 2178
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.194782257080078,
      "learning_rate": 0.0005998675971731,
      "loss": 3.6259,
      "step": 2179
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7466790676116943,
      "learning_rate": 0.0005998674756279351,
      "loss": 3.556,
      "step": 2180
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.559831142425537,
      "learning_rate": 0.0005998673540270191,
      "loss": 3.6115,
      "step": 2181
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.2314584255218506,
      "learning_rate": 0.000599867232370352,
      "loss": 3.6942,
      "step": 2182
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5849950313568115,
      "learning_rate": 0.000599867110657934,
      "loss": 3.52,
      "step": 2183
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3397176265716553,
      "learning_rate": 0.0005998669888897651,
      "loss": 3.6579,
      "step": 2184
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.358030080795288,
      "learning_rate": 0.0005998668670658452,
      "loss": 3.6494,
      "step": 2185
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.508974552154541,
      "learning_rate": 0.0005998667451861744,
      "loss": 3.6605,
      "step": 2186
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.273937463760376,
      "learning_rate": 0.0005998666232507528,
      "loss": 3.5328,
      "step": 2187
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.466110110282898,
      "learning_rate": 0.0005998665012595802,
      "loss": 3.7699,
      "step": 2188
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8802672624588013,
      "learning_rate": 0.0005998663792126568,
      "loss": 3.6298,
      "step": 2189
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.3517556190490723,
      "learning_rate": 0.0005998662571099826,
      "loss": 3.5094,
      "step": 2190
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5704799890518188,
      "learning_rate": 0.0005998661349515576,
      "loss": 3.7741,
      "step": 2191
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3287925720214844,
      "learning_rate": 0.0005998660127373818,
      "loss": 3.6799,
      "step": 2192
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.358039379119873,
      "learning_rate": 0.0005998658904674553,
      "loss": 3.7336,
      "step": 2193
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2982004880905151,
      "learning_rate": 0.000599865768141778,
      "loss": 3.4975,
      "step": 2194
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.143151044845581,
      "learning_rate": 0.0005998656457603501,
      "loss": 3.7139,
      "step": 2195
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.703787326812744,
      "learning_rate": 0.0005998655233231715,
      "loss": 3.7086,
      "step": 2196
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4855848550796509,
      "learning_rate": 0.0005998654008302421,
      "loss": 3.6787,
      "step": 2197
    },
    {
      "epoch": 0.03,
      "grad_norm": 4.044945240020752,
      "learning_rate": 0.0005998652782815622,
      "loss": 3.3195,
      "step": 2198
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.788984537124634,
      "learning_rate": 0.0005998651556771315,
      "loss": 3.579,
      "step": 2199
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.936952590942383,
      "learning_rate": 0.0005998650330169504,
      "loss": 3.8917,
      "step": 2200
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.890385627746582,
      "learning_rate": 0.0005998649103010187,
      "loss": 3.6505,
      "step": 2201
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.8293819427490234,
      "learning_rate": 0.0005998647875293364,
      "loss": 3.4132,
      "step": 2202
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.886373281478882,
      "learning_rate": 0.0005998646647019036,
      "loss": 3.6511,
      "step": 2203
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5959795713424683,
      "learning_rate": 0.0005998645418187204,
      "loss": 3.5623,
      "step": 2204
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.939221978187561,
      "learning_rate": 0.0005998644188797866,
      "loss": 3.7901,
      "step": 2205
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7565033435821533,
      "learning_rate": 0.0005998642958851023,
      "loss": 3.7388,
      "step": 2206
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5343793630599976,
      "learning_rate": 0.0005998641728346677,
      "loss": 3.595,
      "step": 2207
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5296951532363892,
      "learning_rate": 0.0005998640497284826,
      "loss": 3.6111,
      "step": 2208
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.986085295677185,
      "learning_rate": 0.0005998639265665471,
      "loss": 3.6455,
      "step": 2209
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0449154376983643,
      "learning_rate": 0.0005998638033488612,
      "loss": 3.7136,
      "step": 2210
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0463104248046875,
      "learning_rate": 0.0005998636800754252,
      "loss": 3.6532,
      "step": 2211
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0923824310302734,
      "learning_rate": 0.0005998635567462386,
      "loss": 3.6535,
      "step": 2212
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.512807846069336,
      "learning_rate": 0.0005998634333613018,
      "loss": 3.6874,
      "step": 2213
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9262181520462036,
      "learning_rate": 0.0005998633099206147,
      "loss": 3.584,
      "step": 2214
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3618724346160889,
      "learning_rate": 0.0005998631864241775,
      "loss": 3.8276,
      "step": 2215
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.589659333229065,
      "learning_rate": 0.0005998630628719899,
      "loss": 3.6257,
      "step": 2216
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9516834020614624,
      "learning_rate": 0.0005998629392640522,
      "loss": 3.4984,
      "step": 2217
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.663378357887268,
      "learning_rate": 0.0005998628156003643,
      "loss": 3.5255,
      "step": 2218
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4222358465194702,
      "learning_rate": 0.0005998626918809262,
      "loss": 3.7811,
      "step": 2219
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5162198543548584,
      "learning_rate": 0.0005998625681057381,
      "loss": 3.5899,
      "step": 2220
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9067128896713257,
      "learning_rate": 0.0005998624442747996,
      "loss": 3.5838,
      "step": 2221
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3305292129516602,
      "learning_rate": 0.0005998623203881112,
      "loss": 3.6844,
      "step": 2222
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6305983066558838,
      "learning_rate": 0.0005998621964456728,
      "loss": 3.4482,
      "step": 2223
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8162131309509277,
      "learning_rate": 0.0005998620724474843,
      "loss": 3.5267,
      "step": 2224
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2857080698013306,
      "learning_rate": 0.0005998619483935457,
      "loss": 3.6033,
      "step": 2225
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.85219144821167,
      "learning_rate": 0.0005998618242838571,
      "loss": 3.711,
      "step": 2226
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7771581411361694,
      "learning_rate": 0.0005998617001184187,
      "loss": 3.5127,
      "step": 2227
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9438024759292603,
      "learning_rate": 0.0005998615758972302,
      "loss": 3.32,
      "step": 2228
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.353377342224121,
      "learning_rate": 0.0005998614516202918,
      "loss": 3.4619,
      "step": 2229
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3701910972595215,
      "learning_rate": 0.0005998613272876035,
      "loss": 3.7566,
      "step": 2230
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7801852226257324,
      "learning_rate": 0.0005998612028991654,
      "loss": 3.5253,
      "step": 2231
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9464279413223267,
      "learning_rate": 0.0005998610784549774,
      "loss": 3.5851,
      "step": 2232
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4614306688308716,
      "learning_rate": 0.0005998609539550394,
      "loss": 3.7732,
      "step": 2233
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.209261655807495,
      "learning_rate": 0.0005998608293993518,
      "loss": 3.6535,
      "step": 2234
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6723350286483765,
      "learning_rate": 0.0005998607047879144,
      "loss": 3.5493,
      "step": 2235
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5251070261001587,
      "learning_rate": 0.0005998605801207271,
      "loss": 3.7335,
      "step": 2236
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4202994108200073,
      "learning_rate": 0.0005998604553977901,
      "loss": 3.7282,
      "step": 2237
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4772816896438599,
      "learning_rate": 0.0005998603306191034,
      "loss": 3.8066,
      "step": 2238
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5914756059646606,
      "learning_rate": 0.000599860205784667,
      "loss": 3.7875,
      "step": 2239
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5810086727142334,
      "learning_rate": 0.0005998600808944809,
      "loss": 3.4893,
      "step": 2240
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7746354341506958,
      "learning_rate": 0.0005998599559485453,
      "loss": 3.5034,
      "step": 2241
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1527907848358154,
      "learning_rate": 0.0005998598309468599,
      "loss": 3.6013,
      "step": 2242
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7182142734527588,
      "learning_rate": 0.000599859705889425,
      "loss": 3.6495,
      "step": 2243
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.283928394317627,
      "learning_rate": 0.0005998595807762405,
      "loss": 3.6462,
      "step": 2244
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5231412649154663,
      "learning_rate": 0.0005998594556073063,
      "loss": 3.402,
      "step": 2245
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3995943069458008,
      "learning_rate": 0.0005998593303826228,
      "loss": 3.4784,
      "step": 2246
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6090352535247803,
      "learning_rate": 0.0005998592051021897,
      "loss": 3.4045,
      "step": 2247
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9477248191833496,
      "learning_rate": 0.0005998590797660071,
      "loss": 3.4716,
      "step": 2248
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3293163776397705,
      "learning_rate": 0.0005998589543740751,
      "loss": 3.7832,
      "step": 2249
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.142704486846924,
      "learning_rate": 0.0005998588289263934,
      "loss": 3.5255,
      "step": 2250
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6198235750198364,
      "learning_rate": 0.0005998587034229627,
      "loss": 3.7614,
      "step": 2251
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9774152040481567,
      "learning_rate": 0.0005998585778637822,
      "loss": 3.853,
      "step": 2252
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.747896671295166,
      "learning_rate": 0.0005998584522488526,
      "loss": 3.584,
      "step": 2253
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6527953147888184,
      "learning_rate": 0.0005998583265781736,
      "loss": 3.699,
      "step": 2254
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5904878377914429,
      "learning_rate": 0.0005998582008517452,
      "loss": 3.7134,
      "step": 2255
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8544135093688965,
      "learning_rate": 0.0005998580750695676,
      "loss": 3.5948,
      "step": 2256
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9175653457641602,
      "learning_rate": 0.0005998579492316405,
      "loss": 3.8113,
      "step": 2257
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0731184482574463,
      "learning_rate": 0.0005998578233379644,
      "loss": 3.52,
      "step": 2258
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5823322534561157,
      "learning_rate": 0.000599857697388539,
      "loss": 3.6718,
      "step": 2259
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5849664211273193,
      "learning_rate": 0.0005998575713833643,
      "loss": 3.5221,
      "step": 2260
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.0376405715942383,
      "learning_rate": 0.0005998574453224406,
      "loss": 3.5232,
      "step": 2261
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7325150966644287,
      "learning_rate": 0.0005998573192057676,
      "loss": 3.682,
      "step": 2262
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.163790464401245,
      "learning_rate": 0.0005998571930333455,
      "loss": 3.5159,
      "step": 2263
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.015857458114624,
      "learning_rate": 0.0005998570668051743,
      "loss": 3.6367,
      "step": 2264
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.494083046913147,
      "learning_rate": 0.000599856940521254,
      "loss": 3.6041,
      "step": 2265
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.009566068649292,
      "learning_rate": 0.0005998568141815846,
      "loss": 3.4534,
      "step": 2266
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.3613321781158447,
      "learning_rate": 0.0005998566877861662,
      "loss": 3.6384,
      "step": 2267
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.493741512298584,
      "learning_rate": 0.0005998565613349988,
      "loss": 3.532,
      "step": 2268
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3989431858062744,
      "learning_rate": 0.0005998564348280825,
      "loss": 3.4315,
      "step": 2269
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6034760475158691,
      "learning_rate": 0.000599856308265417,
      "loss": 3.5586,
      "step": 2270
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4937005043029785,
      "learning_rate": 0.0005998561816470026,
      "loss": 3.449,
      "step": 2271
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1695258617401123,
      "learning_rate": 0.0005998560549728394,
      "loss": 3.6281,
      "step": 2272
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.332765817642212,
      "learning_rate": 0.0005998559282429272,
      "loss": 3.3029,
      "step": 2273
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4244751930236816,
      "learning_rate": 0.0005998558014572662,
      "loss": 3.6178,
      "step": 2274
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6834994554519653,
      "learning_rate": 0.0005998556746158563,
      "loss": 3.5856,
      "step": 2275
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.437053680419922,
      "learning_rate": 0.0005998555477186975,
      "loss": 3.7065,
      "step": 2276
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7329102754592896,
      "learning_rate": 0.00059985542076579,
      "loss": 3.7248,
      "step": 2277
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6156609058380127,
      "learning_rate": 0.0005998552937571337,
      "loss": 3.2996,
      "step": 2278
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.125013828277588,
      "learning_rate": 0.0005998551666927286,
      "loss": 3.7618,
      "step": 2279
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.532663345336914,
      "learning_rate": 0.0005998550395725749,
      "loss": 3.5353,
      "step": 2280
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3009282350540161,
      "learning_rate": 0.0005998549123966722,
      "loss": 3.5562,
      "step": 2281
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.313472032546997,
      "learning_rate": 0.000599854785165021,
      "loss": 3.4874,
      "step": 2282
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7610366344451904,
      "learning_rate": 0.0005998546578776212,
      "loss": 3.7233,
      "step": 2283
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7538503408432007,
      "learning_rate": 0.0005998545305344727,
      "loss": 3.8466,
      "step": 2284
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.7364206314086914,
      "learning_rate": 0.0005998544031355755,
      "loss": 3.3057,
      "step": 2285
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.41473126411438,
      "learning_rate": 0.0005998542756809298,
      "loss": 3.6427,
      "step": 2286
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1442785263061523,
      "learning_rate": 0.0005998541481705354,
      "loss": 3.7254,
      "step": 2287
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.188930034637451,
      "learning_rate": 0.0005998540206043927,
      "loss": 3.4876,
      "step": 2288
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.526971697807312,
      "learning_rate": 0.0005998538929825013,
      "loss": 3.5379,
      "step": 2289
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1611573696136475,
      "learning_rate": 0.0005998537653048614,
      "loss": 3.7215,
      "step": 2290
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.5811705589294434,
      "learning_rate": 0.0005998536375714732,
      "loss": 3.5456,
      "step": 2291
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6034586429595947,
      "learning_rate": 0.0005998535097823363,
      "loss": 3.6305,
      "step": 2292
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.949308156967163,
      "learning_rate": 0.0005998533819374511,
      "loss": 3.4632,
      "step": 2293
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4361623525619507,
      "learning_rate": 0.0005998532540368174,
      "loss": 3.4659,
      "step": 2294
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2968772649765015,
      "learning_rate": 0.0005998531260804355,
      "loss": 3.8354,
      "step": 2295
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.450793743133545,
      "learning_rate": 0.0005998529980683051,
      "loss": 3.4052,
      "step": 2296
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5432548522949219,
      "learning_rate": 0.0005998528700004264,
      "loss": 3.8473,
      "step": 2297
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6548664569854736,
      "learning_rate": 0.0005998527418767994,
      "loss": 3.5962,
      "step": 2298
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.19722843170166,
      "learning_rate": 0.0005998526136974242,
      "loss": 3.3512,
      "step": 2299
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9762914180755615,
      "learning_rate": 0.0005998524854623006,
      "loss": 3.2607,
      "step": 2300
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7798254489898682,
      "learning_rate": 0.0005998523571714287,
      "loss": 3.7162,
      "step": 2301
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.351310133934021,
      "learning_rate": 0.0005998522288248087,
      "loss": 3.7128,
      "step": 2302
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.670798420906067,
      "learning_rate": 0.0005998521004224405,
      "loss": 3.5449,
      "step": 2303
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.817313551902771,
      "learning_rate": 0.0005998519719643242,
      "loss": 3.5652,
      "step": 2304
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.200735330581665,
      "learning_rate": 0.0005998518434504597,
      "loss": 3.5414,
      "step": 2305
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6683673858642578,
      "learning_rate": 0.0005998517148808471,
      "loss": 3.4589,
      "step": 2306
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6843760013580322,
      "learning_rate": 0.0005998515862554864,
      "loss": 3.8391,
      "step": 2307
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9350162744522095,
      "learning_rate": 0.0005998514575743776,
      "loss": 3.3712,
      "step": 2308
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8347935676574707,
      "learning_rate": 0.0005998513288375208,
      "loss": 3.2918,
      "step": 2309
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5171821117401123,
      "learning_rate": 0.000599851200044916,
      "loss": 3.7034,
      "step": 2310
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4361932277679443,
      "learning_rate": 0.0005998510711965631,
      "loss": 3.6825,
      "step": 2311
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6349427700042725,
      "learning_rate": 0.0005998509422924622,
      "loss": 3.8341,
      "step": 2312
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6819416284561157,
      "learning_rate": 0.0005998508133326136,
      "loss": 3.3542,
      "step": 2313
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4555197954177856,
      "learning_rate": 0.0005998506843170169,
      "loss": 3.4857,
      "step": 2314
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0150773525238037,
      "learning_rate": 0.0005998505552456722,
      "loss": 3.5567,
      "step": 2315
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.716606616973877,
      "learning_rate": 0.0005998504261185797,
      "loss": 3.7277,
      "step": 2316
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.636770486831665,
      "learning_rate": 0.0005998502969357394,
      "loss": 3.5252,
      "step": 2317
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5488197803497314,
      "learning_rate": 0.0005998501676971513,
      "loss": 3.7018,
      "step": 2318
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.483055591583252,
      "learning_rate": 0.0005998500384028153,
      "loss": 3.4047,
      "step": 2319
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0036516189575195,
      "learning_rate": 0.0005998499090527315,
      "loss": 3.58,
      "step": 2320
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7558326721191406,
      "learning_rate": 0.0005998497796469,
      "loss": 3.6972,
      "step": 2321
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6247341632843018,
      "learning_rate": 0.0005998496501853208,
      "loss": 3.6006,
      "step": 2322
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6791552305221558,
      "learning_rate": 0.0005998495206679938,
      "loss": 3.737,
      "step": 2323
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8705195188522339,
      "learning_rate": 0.0005998493910949192,
      "loss": 3.521,
      "step": 2324
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.198638677597046,
      "learning_rate": 0.0005998492614660969,
      "loss": 3.5137,
      "step": 2325
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.714043378829956,
      "learning_rate": 0.0005998491317815269,
      "loss": 3.5757,
      "step": 2326
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8697775602340698,
      "learning_rate": 0.0005998490020412094,
      "loss": 3.6433,
      "step": 2327
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0972983837127686,
      "learning_rate": 0.0005998488722451442,
      "loss": 3.3075,
      "step": 2328
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9072295427322388,
      "learning_rate": 0.0005998487423933315,
      "loss": 3.5645,
      "step": 2329
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.765855073928833,
      "learning_rate": 0.0005998486124857714,
      "loss": 3.5792,
      "step": 2330
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9282782077789307,
      "learning_rate": 0.0005998484825224637,
      "loss": 3.709,
      "step": 2331
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6854698657989502,
      "learning_rate": 0.0005998483525034083,
      "loss": 3.7769,
      "step": 2332
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8020706176757812,
      "learning_rate": 0.0005998482224286055,
      "loss": 3.6027,
      "step": 2333
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.73321533203125,
      "learning_rate": 0.0005998480922980554,
      "loss": 3.752,
      "step": 2334
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.808640718460083,
      "learning_rate": 0.0005998479621117578,
      "loss": 3.6712,
      "step": 2335
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0711238384246826,
      "learning_rate": 0.0005998478318697129,
      "loss": 3.6721,
      "step": 2336
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.419350266456604,
      "learning_rate": 0.0005998477015719204,
      "loss": 3.3552,
      "step": 2337
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8220564126968384,
      "learning_rate": 0.0005998475712183808,
      "loss": 3.5906,
      "step": 2338
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3826138973236084,
      "learning_rate": 0.0005998474408090938,
      "loss": 3.3888,
      "step": 2339
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5762401819229126,
      "learning_rate": 0.0005998473103440594,
      "loss": 4.0776,
      "step": 2340
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5815188884735107,
      "learning_rate": 0.0005998471798232777,
      "loss": 3.54,
      "step": 2341
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5058742761611938,
      "learning_rate": 0.000599847049246749,
      "loss": 3.5546,
      "step": 2342
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5023586750030518,
      "learning_rate": 0.0005998469186144729,
      "loss": 3.5585,
      "step": 2343
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6139671802520752,
      "learning_rate": 0.0005998467879264496,
      "loss": 3.6019,
      "step": 2344
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8196940422058105,
      "learning_rate": 0.0005998466571826791,
      "loss": 3.6124,
      "step": 2345
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6986905336380005,
      "learning_rate": 0.0005998465263831615,
      "loss": 3.3964,
      "step": 2346
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8701462745666504,
      "learning_rate": 0.0005998463955278967,
      "loss": 3.4438,
      "step": 2347
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5534203052520752,
      "learning_rate": 0.0005998462646168848,
      "loss": 3.7711,
      "step": 2348
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8763197660446167,
      "learning_rate": 0.0005998461336501259,
      "loss": 3.9202,
      "step": 2349
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9934781789779663,
      "learning_rate": 0.00059984600262762,
      "loss": 3.7164,
      "step": 2350
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6964870691299438,
      "learning_rate": 0.000599845871549367,
      "loss": 3.2341,
      "step": 2351
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.4852778911590576,
      "learning_rate": 0.0005998457404153669,
      "loss": 3.5172,
      "step": 2352
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8324040174484253,
      "learning_rate": 0.0005998456092256199,
      "loss": 3.6338,
      "step": 2353
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5417444705963135,
      "learning_rate": 0.000599845477980126,
      "loss": 3.8255,
      "step": 2354
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.18780779838562,
      "learning_rate": 0.000599845346678885,
      "loss": 3.6317,
      "step": 2355
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5403255224227905,
      "learning_rate": 0.0005998452153218972,
      "loss": 3.688,
      "step": 2356
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6723551750183105,
      "learning_rate": 0.0005998450839091625,
      "loss": 3.5038,
      "step": 2357
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.456764578819275,
      "learning_rate": 0.000599844952440681,
      "loss": 3.5768,
      "step": 2358
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5099431276321411,
      "learning_rate": 0.0005998448209164525,
      "loss": 3.5206,
      "step": 2359
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9006500244140625,
      "learning_rate": 0.0005998446893364773,
      "loss": 3.6737,
      "step": 2360
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3603235483169556,
      "learning_rate": 0.0005998445577007554,
      "loss": 3.5492,
      "step": 2361
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.352927803993225,
      "learning_rate": 0.0005998444260092866,
      "loss": 3.6189,
      "step": 2362
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5509003400802612,
      "learning_rate": 0.0005998442942620711,
      "loss": 3.4118,
      "step": 2363
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5362293720245361,
      "learning_rate": 0.0005998441624591089,
      "loss": 3.8195,
      "step": 2364
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.387641429901123,
      "learning_rate": 0.0005998440306004,
      "loss": 3.6272,
      "step": 2365
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8521089553833008,
      "learning_rate": 0.0005998438986859444,
      "loss": 3.6688,
      "step": 2366
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5202319622039795,
      "learning_rate": 0.0005998437667157422,
      "loss": 3.4717,
      "step": 2367
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.968909740447998,
      "learning_rate": 0.0005998436346897934,
      "loss": 3.6842,
      "step": 2368
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.859313726425171,
      "learning_rate": 0.000599843502608098,
      "loss": 3.3588,
      "step": 2369
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4331740140914917,
      "learning_rate": 0.000599843370470656,
      "loss": 3.6585,
      "step": 2370
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.755150318145752,
      "learning_rate": 0.0005998432382774675,
      "loss": 3.6024,
      "step": 2371
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.868198871612549,
      "learning_rate": 0.0005998431060285325,
      "loss": 3.5681,
      "step": 2372
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7513501644134521,
      "learning_rate": 0.0005998429737238509,
      "loss": 3.2354,
      "step": 2373
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4930944442749023,
      "learning_rate": 0.0005998428413634229,
      "loss": 3.4131,
      "step": 2374
    },
    {
      "epoch": 0.03,
      "grad_norm": 4.130383491516113,
      "learning_rate": 0.0005998427089472484,
      "loss": 3.2698,
      "step": 2375
    },
    {
      "epoch": 0.03,
      "grad_norm": 4.312041282653809,
      "learning_rate": 0.0005998425764753276,
      "loss": 3.4467,
      "step": 2376
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.475738763809204,
      "learning_rate": 0.0005998424439476604,
      "loss": 3.704,
      "step": 2377
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.196011781692505,
      "learning_rate": 0.0005998423113642466,
      "loss": 3.6714,
      "step": 2378
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.234797239303589,
      "learning_rate": 0.0005998421787250867,
      "loss": 3.9167,
      "step": 2379
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.1461808681488037,
      "learning_rate": 0.0005998420460301804,
      "loss": 3.4097,
      "step": 2380
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0438523292541504,
      "learning_rate": 0.0005998419132795278,
      "loss": 3.3953,
      "step": 2381
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9941954612731934,
      "learning_rate": 0.0005998417804731289,
      "loss": 3.6209,
      "step": 2382
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.4096741676330566,
      "learning_rate": 0.0005998416476109838,
      "loss": 3.783,
      "step": 2383
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9955923557281494,
      "learning_rate": 0.0005998415146930924,
      "loss": 3.4768,
      "step": 2384
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.305274248123169,
      "learning_rate": 0.0005998413817194549,
      "loss": 3.5491,
      "step": 2385
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.306875467300415,
      "learning_rate": 0.0005998412486900712,
      "loss": 3.6761,
      "step": 2386
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.861534595489502,
      "learning_rate": 0.0005998411156049414,
      "loss": 3.9828,
      "step": 2387
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4808143377304077,
      "learning_rate": 0.0005998409824640654,
      "loss": 3.4935,
      "step": 2388
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6477361917495728,
      "learning_rate": 0.0005998408492674433,
      "loss": 3.6788,
      "step": 2389
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.04056715965271,
      "learning_rate": 0.0005998407160150752,
      "loss": 3.6963,
      "step": 2390
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.749303936958313,
      "learning_rate": 0.000599840582706961,
      "loss": 3.7768,
      "step": 2391
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7966454029083252,
      "learning_rate": 0.0005998404493431007,
      "loss": 3.6634,
      "step": 2392
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.5115342140197754,
      "learning_rate": 0.0005998403159234945,
      "loss": 3.462,
      "step": 2393
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.827335834503174,
      "learning_rate": 0.0005998401824481423,
      "loss": 3.5413,
      "step": 2394
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3659254312515259,
      "learning_rate": 0.0005998400489170442,
      "loss": 3.5604,
      "step": 2395
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.993459701538086,
      "learning_rate": 0.0005998399153302001,
      "loss": 3.7318,
      "step": 2396
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.3553194999694824,
      "learning_rate": 0.0005998397816876101,
      "loss": 3.6348,
      "step": 2397
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.418084144592285,
      "learning_rate": 0.0005998396479892742,
      "loss": 3.4841,
      "step": 2398
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3403083086013794,
      "learning_rate": 0.0005998395142351925,
      "loss": 3.3214,
      "step": 2399
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.2371113300323486,
      "learning_rate": 0.000599839380425365,
      "loss": 3.6357,
      "step": 2400
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.583630084991455,
      "learning_rate": 0.0005998392465597917,
      "loss": 3.7536,
      "step": 2401
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.450555682182312,
      "learning_rate": 0.0005998391126384726,
      "loss": 3.8551,
      "step": 2402
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0203893184661865,
      "learning_rate": 0.0005998389786614078,
      "loss": 3.5152,
      "step": 2403
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7189562320709229,
      "learning_rate": 0.0005998388446285971,
      "loss": 3.5047,
      "step": 2404
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5984461307525635,
      "learning_rate": 0.0005998387105400409,
      "loss": 3.7305,
      "step": 2405
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.136204481124878,
      "learning_rate": 0.0005998385763957389,
      "loss": 3.4922,
      "step": 2406
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.320160388946533,
      "learning_rate": 0.0005998384421956913,
      "loss": 3.4156,
      "step": 2407
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.2471373081207275,
      "learning_rate": 0.0005998383079398981,
      "loss": 3.4876,
      "step": 2408
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5593839883804321,
      "learning_rate": 0.0005998381736283592,
      "loss": 3.7553,
      "step": 2409
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.7252919673919678,
      "learning_rate": 0.0005998380392610749,
      "loss": 3.76,
      "step": 2410
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4492324590682983,
      "learning_rate": 0.0005998379048380449,
      "loss": 3.6617,
      "step": 2411
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6682318449020386,
      "learning_rate": 0.0005998377703592694,
      "loss": 3.5436,
      "step": 2412
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4559657573699951,
      "learning_rate": 0.0005998376358247484,
      "loss": 3.4425,
      "step": 2413
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4083542823791504,
      "learning_rate": 0.0005998375012344819,
      "loss": 3.3498,
      "step": 2414
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5141748189926147,
      "learning_rate": 0.00059983736658847,
      "loss": 3.7071,
      "step": 2415
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5383669137954712,
      "learning_rate": 0.0005998372318867126,
      "loss": 3.5773,
      "step": 2416
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7514843940734863,
      "learning_rate": 0.0005998370971292098,
      "loss": 3.2835,
      "step": 2417
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.551520824432373,
      "learning_rate": 0.0005998369623159618,
      "loss": 3.6504,
      "step": 2418
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3029295206069946,
      "learning_rate": 0.0005998368274469683,
      "loss": 3.6688,
      "step": 2419
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.416594386100769,
      "learning_rate": 0.0005998366925222295,
      "loss": 3.7323,
      "step": 2420
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7801105976104736,
      "learning_rate": 0.0005998365575417454,
      "loss": 3.7667,
      "step": 2421
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.657944679260254,
      "learning_rate": 0.000599836422505516,
      "loss": 3.3487,
      "step": 2422
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5098403692245483,
      "learning_rate": 0.0005998362874135414,
      "loss": 3.5036,
      "step": 2423
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5155737400054932,
      "learning_rate": 0.0005998361522658214,
      "loss": 3.5688,
      "step": 2424
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4567840099334717,
      "learning_rate": 0.0005998360170623565,
      "loss": 3.8531,
      "step": 2425
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2705756425857544,
      "learning_rate": 0.0005998358818031462,
      "loss": 3.4603,
      "step": 2426
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5415887832641602,
      "learning_rate": 0.0005998357464881908,
      "loss": 3.6894,
      "step": 2427
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6819297075271606,
      "learning_rate": 0.0005998356111174903,
      "loss": 3.7225,
      "step": 2428
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8581408262252808,
      "learning_rate": 0.0005998354756910448,
      "loss": 3.5245,
      "step": 2429
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7421352863311768,
      "learning_rate": 0.0005998353402088541,
      "loss": 3.4782,
      "step": 2430
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7529925107955933,
      "learning_rate": 0.0005998352046709183,
      "loss": 3.7061,
      "step": 2431
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.3776581287384033,
      "learning_rate": 0.0005998350690772376,
      "loss": 3.489,
      "step": 2432
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4610812664031982,
      "learning_rate": 0.0005998349334278118,
      "loss": 3.6613,
      "step": 2433
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9631173610687256,
      "learning_rate": 0.000599834797722641,
      "loss": 3.766,
      "step": 2434
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9203259944915771,
      "learning_rate": 0.0005998346619617254,
      "loss": 3.2033,
      "step": 2435
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6692293882369995,
      "learning_rate": 0.0005998345261450647,
      "loss": 3.7439,
      "step": 2436
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.062270402908325,
      "learning_rate": 0.0005998343902726593,
      "loss": 3.6929,
      "step": 2437
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7842986583709717,
      "learning_rate": 0.0005998342543445089,
      "loss": 3.4326,
      "step": 2438
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.352242946624756,
      "learning_rate": 0.0005998341183606137,
      "loss": 3.6082,
      "step": 2439
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4887887239456177,
      "learning_rate": 0.0005998339823209737,
      "loss": 3.4147,
      "step": 2440
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3278062343597412,
      "learning_rate": 0.0005998338462255889,
      "loss": 3.7826,
      "step": 2441
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.556074857711792,
      "learning_rate": 0.0005998337100744592,
      "loss": 3.5234,
      "step": 2442
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5050917863845825,
      "learning_rate": 0.0005998335738675849,
      "loss": 3.7677,
      "step": 2443
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9767181873321533,
      "learning_rate": 0.0005998334376049657,
      "loss": 3.5164,
      "step": 2444
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1066806316375732,
      "learning_rate": 0.000599833301286602,
      "loss": 3.544,
      "step": 2445
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9790756702423096,
      "learning_rate": 0.0005998331649124936,
      "loss": 3.5113,
      "step": 2446
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5845921039581299,
      "learning_rate": 0.0005998330284826405,
      "loss": 3.4711,
      "step": 2447
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5466710329055786,
      "learning_rate": 0.0005998328919970428,
      "loss": 3.7533,
      "step": 2448
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.834974765777588,
      "learning_rate": 0.0005998327554557004,
      "loss": 3.5915,
      "step": 2449
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.1156182289123535,
      "learning_rate": 0.0005998326188586136,
      "loss": 3.7106,
      "step": 2450
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.2169651985168457,
      "learning_rate": 0.0005998324822057822,
      "loss": 3.3642,
      "step": 2451
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5468593835830688,
      "learning_rate": 0.0005998323454972063,
      "loss": 3.5679,
      "step": 2452
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4490784406661987,
      "learning_rate": 0.0005998322087328858,
      "loss": 3.5492,
      "step": 2453
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5035263299942017,
      "learning_rate": 0.000599832071912821,
      "loss": 3.5665,
      "step": 2454
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.376648426055908,
      "learning_rate": 0.0005998319350370116,
      "loss": 3.5563,
      "step": 2455
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4823722839355469,
      "learning_rate": 0.0005998317981054578,
      "loss": 3.4891,
      "step": 2456
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4997040033340454,
      "learning_rate": 0.0005998316611181597,
      "loss": 3.4928,
      "step": 2457
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9446430206298828,
      "learning_rate": 0.0005998315240751172,
      "loss": 3.6234,
      "step": 2458
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.64900803565979,
      "learning_rate": 0.0005998313869763303,
      "loss": 3.6277,
      "step": 2459
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3992369174957275,
      "learning_rate": 0.0005998312498217992,
      "loss": 3.7463,
      "step": 2460
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3023653030395508,
      "learning_rate": 0.0005998311126115237,
      "loss": 3.7071,
      "step": 2461
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3333210945129395,
      "learning_rate": 0.000599830975345504,
      "loss": 3.5058,
      "step": 2462
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3307448625564575,
      "learning_rate": 0.00059983083802374,
      "loss": 3.6835,
      "step": 2463
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.237863779067993,
      "learning_rate": 0.0005998307006462319,
      "loss": 3.5818,
      "step": 2464
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6930423974990845,
      "learning_rate": 0.0005998305632129795,
      "loss": 3.5521,
      "step": 2465
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.621351718902588,
      "learning_rate": 0.0005998304257239831,
      "loss": 3.4527,
      "step": 2466
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4684053659439087,
      "learning_rate": 0.0005998302881792424,
      "loss": 3.3234,
      "step": 2467
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3885546922683716,
      "learning_rate": 0.0005998301505787577,
      "loss": 3.8295,
      "step": 2468
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7950574159622192,
      "learning_rate": 0.0005998300129225287,
      "loss": 3.7629,
      "step": 2469
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.899481177330017,
      "learning_rate": 0.0005998298752105559,
      "loss": 3.6592,
      "step": 2470
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5081217288970947,
      "learning_rate": 0.000599829737442839,
      "loss": 3.6637,
      "step": 2471
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8815255165100098,
      "learning_rate": 0.000599829599619378,
      "loss": 3.3449,
      "step": 2472
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5723886489868164,
      "learning_rate": 0.0005998294617401731,
      "loss": 3.4997,
      "step": 2473
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7274504899978638,
      "learning_rate": 0.0005998293238052241,
      "loss": 3.7379,
      "step": 2474
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5385215282440186,
      "learning_rate": 0.0005998291858145314,
      "loss": 3.6524,
      "step": 2475
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7194703817367554,
      "learning_rate": 0.0005998290477680946,
      "loss": 3.6634,
      "step": 2476
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3674283027648926,
      "learning_rate": 0.000599828909665914,
      "loss": 3.4171,
      "step": 2477
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.401915431022644,
      "learning_rate": 0.0005998287715079895,
      "loss": 3.5026,
      "step": 2478
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5854352712631226,
      "learning_rate": 0.0005998286332943212,
      "loss": 3.6092,
      "step": 2479
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4904639720916748,
      "learning_rate": 0.0005998284950249093,
      "loss": 3.5858,
      "step": 2480
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7299731969833374,
      "learning_rate": 0.0005998283566997533,
      "loss": 3.5677,
      "step": 2481
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3622291088104248,
      "learning_rate": 0.0005998282183188537,
      "loss": 3.6751,
      "step": 2482
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6870744228363037,
      "learning_rate": 0.0005998280798822103,
      "loss": 3.3546,
      "step": 2483
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4347840547561646,
      "learning_rate": 0.0005998279413898233,
      "loss": 3.3457,
      "step": 2484
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4897454977035522,
      "learning_rate": 0.0005998278028416926,
      "loss": 3.3588,
      "step": 2485
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.384896993637085,
      "learning_rate": 0.0005998276642378182,
      "loss": 3.6315,
      "step": 2486
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6024283170700073,
      "learning_rate": 0.0005998275255782003,
      "loss": 3.6265,
      "step": 2487
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7525086402893066,
      "learning_rate": 0.0005998273868628387,
      "loss": 3.6507,
      "step": 2488
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3811572790145874,
      "learning_rate": 0.0005998272480917335,
      "loss": 3.5418,
      "step": 2489
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.478245496749878,
      "learning_rate": 0.0005998271092648848,
      "loss": 3.636,
      "step": 2490
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7654420137405396,
      "learning_rate": 0.0005998269703822926,
      "loss": 3.4845,
      "step": 2491
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7539325952529907,
      "learning_rate": 0.0005998268314439569,
      "loss": 3.8734,
      "step": 2492
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2557162046432495,
      "learning_rate": 0.0005998266924498777,
      "loss": 3.5171,
      "step": 2493
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5723316669464111,
      "learning_rate": 0.0005998265534000551,
      "loss": 3.3745,
      "step": 2494
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.774559736251831,
      "learning_rate": 0.000599826414294489,
      "loss": 3.486,
      "step": 2495
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6264978647232056,
      "learning_rate": 0.0005998262751331796,
      "loss": 3.9442,
      "step": 2496
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4779975414276123,
      "learning_rate": 0.0005998261359161268,
      "loss": 3.3239,
      "step": 2497
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6110968589782715,
      "learning_rate": 0.0005998259966433307,
      "loss": 3.5992,
      "step": 2498
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5058602094650269,
      "learning_rate": 0.0005998258573147913,
      "loss": 3.5535,
      "step": 2499
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7568117380142212,
      "learning_rate": 0.0005998257179305085,
      "loss": 3.4393,
      "step": 2500
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8508673906326294,
      "learning_rate": 0.0005998255784904826,
      "loss": 3.8807,
      "step": 2501
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.916268229484558,
      "learning_rate": 0.0005998254389947134,
      "loss": 3.6304,
      "step": 2502
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6596925258636475,
      "learning_rate": 0.0005998252994432009,
      "loss": 3.6966,
      "step": 2503
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7082154750823975,
      "learning_rate": 0.0005998251598359453,
      "loss": 3.5368,
      "step": 2504
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.567818522453308,
      "learning_rate": 0.0005998250201729465,
      "loss": 3.5385,
      "step": 2505
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.962435245513916,
      "learning_rate": 0.0005998248804542047,
      "loss": 3.6297,
      "step": 2506
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5223891735076904,
      "learning_rate": 0.0005998247406797196,
      "loss": 3.5258,
      "step": 2507
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.721511721611023,
      "learning_rate": 0.0005998246008494915,
      "loss": 3.3198,
      "step": 2508
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.5026021003723145,
      "learning_rate": 0.0005998244609635204,
      "loss": 3.5748,
      "step": 2509
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.029623031616211,
      "learning_rate": 0.0005998243210218062,
      "loss": 3.5815,
      "step": 2510
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5672141313552856,
      "learning_rate": 0.000599824181024349,
      "loss": 3.7002,
      "step": 2511
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9372175931930542,
      "learning_rate": 0.0005998240409711489,
      "loss": 3.5004,
      "step": 2512
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8968753814697266,
      "learning_rate": 0.0005998239008622058,
      "loss": 3.6155,
      "step": 2513
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8253673315048218,
      "learning_rate": 0.0005998237606975197,
      "loss": 3.4729,
      "step": 2514
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1616859436035156,
      "learning_rate": 0.0005998236204770909,
      "loss": 3.5485,
      "step": 2515
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5213589668273926,
      "learning_rate": 0.0005998234802009192,
      "loss": 3.7416,
      "step": 2516
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5740376710891724,
      "learning_rate": 0.0005998233398690046,
      "loss": 3.6868,
      "step": 2517
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8337442874908447,
      "learning_rate": 0.000599823199481347,
      "loss": 3.6156,
      "step": 2518
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.686981439590454,
      "learning_rate": 0.0005998230590379468,
      "loss": 3.4973,
      "step": 2519
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4920654296875,
      "learning_rate": 0.0005998229185388038,
      "loss": 3.4122,
      "step": 2520
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6279677152633667,
      "learning_rate": 0.000599822777983918,
      "loss": 3.5925,
      "step": 2521
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.636725902557373,
      "learning_rate": 0.0005998226373732896,
      "loss": 3.5603,
      "step": 2522
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7628941535949707,
      "learning_rate": 0.0005998224967069185,
      "loss": 3.665,
      "step": 2523
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7854262590408325,
      "learning_rate": 0.0005998223559848046,
      "loss": 3.4692,
      "step": 2524
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9603959321975708,
      "learning_rate": 0.0005998222152069483,
      "loss": 3.7349,
      "step": 2525
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4528756141662598,
      "learning_rate": 0.0005998220743733492,
      "loss": 3.7223,
      "step": 2526
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9327569007873535,
      "learning_rate": 0.0005998219334840075,
      "loss": 3.499,
      "step": 2527
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.327956199645996,
      "learning_rate": 0.0005998217925389233,
      "loss": 3.7631,
      "step": 2528
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.4506173133850098,
      "learning_rate": 0.0005998216515380966,
      "loss": 3.4196,
      "step": 2529
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.5646350383758545,
      "learning_rate": 0.0005998215104815274,
      "loss": 3.7342,
      "step": 2530
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.440090298652649,
      "learning_rate": 0.0005998213693692157,
      "loss": 3.597,
      "step": 2531
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.140681266784668,
      "learning_rate": 0.0005998212282011616,
      "loss": 3.6224,
      "step": 2532
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.6207265853881836,
      "learning_rate": 0.0005998210869773649,
      "loss": 3.7617,
      "step": 2533
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9362179040908813,
      "learning_rate": 0.000599820945697826,
      "loss": 3.2188,
      "step": 2534
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.654672861099243,
      "learning_rate": 0.0005998208043625446,
      "loss": 3.43,
      "step": 2535
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.6265718936920166,
      "learning_rate": 0.0005998206629715209,
      "loss": 3.6514,
      "step": 2536
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8467293977737427,
      "learning_rate": 0.0005998205215247548,
      "loss": 3.6302,
      "step": 2537
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0752713680267334,
      "learning_rate": 0.0005998203800222466,
      "loss": 3.5631,
      "step": 2538
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.715677261352539,
      "learning_rate": 0.000599820238463996,
      "loss": 3.4138,
      "step": 2539
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.97469162940979,
      "learning_rate": 0.0005998200968500032,
      "loss": 3.7643,
      "step": 2540
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.5274388790130615,
      "learning_rate": 0.0005998199551802681,
      "loss": 3.2663,
      "step": 2541
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.893606185913086,
      "learning_rate": 0.0005998198134547909,
      "loss": 3.0765,
      "step": 2542
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9537464380264282,
      "learning_rate": 0.0005998196716735715,
      "loss": 3.5805,
      "step": 2543
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8215523958206177,
      "learning_rate": 0.0005998195298366099,
      "loss": 3.4194,
      "step": 2544
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6742111444473267,
      "learning_rate": 0.0005998193879439063,
      "loss": 3.6579,
      "step": 2545
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1688954830169678,
      "learning_rate": 0.0005998192459954606,
      "loss": 3.5895,
      "step": 2546
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.317683458328247,
      "learning_rate": 0.0005998191039912728,
      "loss": 3.7004,
      "step": 2547
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6621944904327393,
      "learning_rate": 0.000599818961931343,
      "loss": 3.815,
      "step": 2548
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.183992862701416,
      "learning_rate": 0.0005998188198156712,
      "loss": 3.4551,
      "step": 2549
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4726252555847168,
      "learning_rate": 0.0005998186776442573,
      "loss": 3.6545,
      "step": 2550
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6038113832473755,
      "learning_rate": 0.0005998185354171016,
      "loss": 3.611,
      "step": 2551
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.150374412536621,
      "learning_rate": 0.0005998183931342039,
      "loss": 3.3788,
      "step": 2552
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5284487009048462,
      "learning_rate": 0.0005998182507955644,
      "loss": 3.4133,
      "step": 2553
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6394470930099487,
      "learning_rate": 0.0005998181084011829,
      "loss": 3.4018,
      "step": 2554
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6667962074279785,
      "learning_rate": 0.0005998179659510596,
      "loss": 3.3867,
      "step": 2555
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4496630430221558,
      "learning_rate": 0.0005998178234451945,
      "loss": 3.6927,
      "step": 2556
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3079502582550049,
      "learning_rate": 0.0005998176808835876,
      "loss": 3.3927,
      "step": 2557
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5341700315475464,
      "learning_rate": 0.000599817538266239,
      "loss": 3.7079,
      "step": 2558
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0866973400115967,
      "learning_rate": 0.0005998173955931485,
      "loss": 3.4203,
      "step": 2559
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6071819067001343,
      "learning_rate": 0.0005998172528643164,
      "loss": 3.6052,
      "step": 2560
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.3670387268066406,
      "learning_rate": 0.0005998171100797426,
      "loss": 3.3879,
      "step": 2561
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.651547908782959,
      "learning_rate": 0.0005998169672394272,
      "loss": 3.709,
      "step": 2562
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5342586040496826,
      "learning_rate": 0.00059981682434337,
      "loss": 3.4756,
      "step": 2563
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5644304752349854,
      "learning_rate": 0.0005998166813915713,
      "loss": 3.5319,
      "step": 2564
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9017102718353271,
      "learning_rate": 0.000599816538384031,
      "loss": 3.5133,
      "step": 2565
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6391969919204712,
      "learning_rate": 0.0005998163953207491,
      "loss": 3.6506,
      "step": 2566
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9485615491867065,
      "learning_rate": 0.0005998162522017257,
      "loss": 3.4822,
      "step": 2567
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.2563087940216064,
      "learning_rate": 0.0005998161090269609,
      "loss": 3.6782,
      "step": 2568
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3850657939910889,
      "learning_rate": 0.0005998159657964546,
      "loss": 3.613,
      "step": 2569
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.041579008102417,
      "learning_rate": 0.0005998158225102068,
      "loss": 3.4041,
      "step": 2570
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0106613636016846,
      "learning_rate": 0.0005998156791682176,
      "loss": 3.6719,
      "step": 2571
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4716283082962036,
      "learning_rate": 0.000599815535770487,
      "loss": 3.5412,
      "step": 2572
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.430304527282715,
      "learning_rate": 0.000599815392317015,
      "loss": 3.523,
      "step": 2573
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.6306538581848145,
      "learning_rate": 0.0005998152488078016,
      "loss": 3.3313,
      "step": 2574
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4713400602340698,
      "learning_rate": 0.0005998151052428471,
      "loss": 3.5432,
      "step": 2575
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9046404361724854,
      "learning_rate": 0.0005998149616221511,
      "loss": 3.9839,
      "step": 2576
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.6762094497680664,
      "learning_rate": 0.0005998148179457139,
      "loss": 3.5789,
      "step": 2577
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.515798568725586,
      "learning_rate": 0.0005998146742135354,
      "loss": 3.3462,
      "step": 2578
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.219679355621338,
      "learning_rate": 0.0005998145304256159,
      "loss": 3.3363,
      "step": 2579
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.3277390003204346,
      "learning_rate": 0.0005998143865819551,
      "loss": 3.5457,
      "step": 2580
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5550960302352905,
      "learning_rate": 0.0005998142426825531,
      "loss": 3.5141,
      "step": 2581
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9039878845214844,
      "learning_rate": 0.0005998140987274101,
      "loss": 3.5477,
      "step": 2582
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.679219365119934,
      "learning_rate": 0.0005998139547165259,
      "loss": 3.3129,
      "step": 2583
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3546844720840454,
      "learning_rate": 0.0005998138106499005,
      "loss": 3.5127,
      "step": 2584
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5685516595840454,
      "learning_rate": 0.0005998136665275343,
      "loss": 3.7197,
      "step": 2585
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9385639429092407,
      "learning_rate": 0.000599813522349427,
      "loss": 3.3561,
      "step": 2586
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9992866516113281,
      "learning_rate": 0.0005998133781155786,
      "loss": 3.5858,
      "step": 2587
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.154751777648926,
      "learning_rate": 0.0005998132338259893,
      "loss": 3.4592,
      "step": 2588
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.4296138286590576,
      "learning_rate": 0.0005998130894806591,
      "loss": 3.7107,
      "step": 2589
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8288217782974243,
      "learning_rate": 0.000599812945079588,
      "loss": 3.4955,
      "step": 2590
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.2240073680877686,
      "learning_rate": 0.000599812800622776,
      "loss": 3.5893,
      "step": 2591
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.6207377910614014,
      "learning_rate": 0.0005998126561102231,
      "loss": 3.8175,
      "step": 2592
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1616008281707764,
      "learning_rate": 0.0005998125115419294,
      "loss": 3.6707,
      "step": 2593
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.502863883972168,
      "learning_rate": 0.0005998123669178949,
      "loss": 3.5222,
      "step": 2594
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.646665334701538,
      "learning_rate": 0.0005998122222381195,
      "loss": 3.6431,
      "step": 2595
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.303816556930542,
      "learning_rate": 0.0005998120775026035,
      "loss": 3.4802,
      "step": 2596
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9152113199234009,
      "learning_rate": 0.0005998119327113467,
      "loss": 3.4485,
      "step": 2597
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.918368935585022,
      "learning_rate": 0.0005998117878643493,
      "loss": 3.7863,
      "step": 2598
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.5948739051818848,
      "learning_rate": 0.0005998116429616112,
      "loss": 3.7189,
      "step": 2599
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.2788118124008179,
      "learning_rate": 0.0005998114980031323,
      "loss": 3.5887,
      "step": 2600
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7578673362731934,
      "learning_rate": 0.000599811352988913,
      "loss": 3.516,
      "step": 2601
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0731823444366455,
      "learning_rate": 0.0005998112079189529,
      "loss": 3.7586,
      "step": 2602
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.649137020111084,
      "learning_rate": 0.0005998110627932524,
      "loss": 3.7825,
      "step": 2603
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.2543880939483643,
      "learning_rate": 0.0005998109176118113,
      "loss": 3.4629,
      "step": 2604
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7821693420410156,
      "learning_rate": 0.0005998107723746297,
      "loss": 3.5141,
      "step": 2605
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.12992262840271,
      "learning_rate": 0.0005998106270817076,
      "loss": 3.4804,
      "step": 2606
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.266228437423706,
      "learning_rate": 0.000599810481733045,
      "loss": 3.5368,
      "step": 2607
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.916365385055542,
      "learning_rate": 0.0005998103363286421,
      "loss": 3.5351,
      "step": 2608
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7588238716125488,
      "learning_rate": 0.0005998101908684987,
      "loss": 3.5094,
      "step": 2609
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9193955659866333,
      "learning_rate": 0.000599810045352615,
      "loss": 3.4864,
      "step": 2610
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6909462213516235,
      "learning_rate": 0.0005998098997809909,
      "loss": 3.4792,
      "step": 2611
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7592790126800537,
      "learning_rate": 0.0005998097541536265,
      "loss": 3.5707,
      "step": 2612
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5407452583312988,
      "learning_rate": 0.0005998096084705218,
      "loss": 3.3881,
      "step": 2613
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4829643964767456,
      "learning_rate": 0.0005998094627316768,
      "loss": 3.7,
      "step": 2614
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8663909435272217,
      "learning_rate": 0.0005998093169370917,
      "loss": 3.572,
      "step": 2615
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6815402507781982,
      "learning_rate": 0.0005998091710867663,
      "loss": 3.5356,
      "step": 2616
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5849133729934692,
      "learning_rate": 0.0005998090251807006,
      "loss": 3.4751,
      "step": 2617
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7904456853866577,
      "learning_rate": 0.0005998088792188948,
      "loss": 3.5918,
      "step": 2618
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.645114779472351,
      "learning_rate": 0.000599808733201349,
      "loss": 3.553,
      "step": 2619
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4759724140167236,
      "learning_rate": 0.000599808587128063,
      "loss": 3.7123,
      "step": 2620
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3947906494140625,
      "learning_rate": 0.0005998084409990369,
      "loss": 3.4038,
      "step": 2621
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5919389724731445,
      "learning_rate": 0.0005998082948142708,
      "loss": 3.521,
      "step": 2622
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5254034996032715,
      "learning_rate": 0.0005998081485737647,
      "loss": 3.5302,
      "step": 2623
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5044922828674316,
      "learning_rate": 0.0005998080022775185,
      "loss": 3.2455,
      "step": 2624
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.4821290969848633,
      "learning_rate": 0.0005998078559255324,
      "loss": 3.7886,
      "step": 2625
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7952429056167603,
      "learning_rate": 0.0005998077095178064,
      "loss": 3.2251,
      "step": 2626
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9698501825332642,
      "learning_rate": 0.0005998075630543405,
      "loss": 3.2209,
      "step": 2627
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7487003803253174,
      "learning_rate": 0.0005998074165351347,
      "loss": 3.6536,
      "step": 2628
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.8809499740600586,
      "learning_rate": 0.0005998072699601889,
      "loss": 3.4604,
      "step": 2629
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9170981645584106,
      "learning_rate": 0.0005998071233295035,
      "loss": 3.4504,
      "step": 2630
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.447819471359253,
      "learning_rate": 0.0005998069766430781,
      "loss": 3.5213,
      "step": 2631
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.21478271484375,
      "learning_rate": 0.000599806829900913,
      "loss": 3.4996,
      "step": 2632
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.043365478515625,
      "learning_rate": 0.0005998066831030081,
      "loss": 3.472,
      "step": 2633
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.423148274421692,
      "learning_rate": 0.0005998065362493636,
      "loss": 3.5141,
      "step": 2634
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.3480353355407715,
      "learning_rate": 0.0005998063893399793,
      "loss": 3.247,
      "step": 2635
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.2001402378082275,
      "learning_rate": 0.0005998062423748554,
      "loss": 3.4138,
      "step": 2636
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5771913528442383,
      "learning_rate": 0.0005998060953539918,
      "loss": 3.6792,
      "step": 2637
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.549104928970337,
      "learning_rate": 0.0005998059482773886,
      "loss": 3.5005,
      "step": 2638
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1336517333984375,
      "learning_rate": 0.0005998058011450458,
      "loss": 3.2984,
      "step": 2639
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6511751413345337,
      "learning_rate": 0.0005998056539569635,
      "loss": 3.3983,
      "step": 2640
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.3069145679473877,
      "learning_rate": 0.0005998055067131416,
      "loss": 3.6076,
      "step": 2641
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1820785999298096,
      "learning_rate": 0.0005998053594135802,
      "loss": 3.5219,
      "step": 2642
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.3464207649230957,
      "learning_rate": 0.0005998052120582795,
      "loss": 3.5069,
      "step": 2643
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.886108160018921,
      "learning_rate": 0.0005998050646472392,
      "loss": 3.4421,
      "step": 2644
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.2946174144744873,
      "learning_rate": 0.0005998049171804594,
      "loss": 3.5102,
      "step": 2645
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.5913262367248535,
      "learning_rate": 0.0005998047696579404,
      "loss": 3.5874,
      "step": 2646
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.020643472671509,
      "learning_rate": 0.0005998046220796819,
      "loss": 3.421,
      "step": 2647
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.1990675926208496,
      "learning_rate": 0.000599804474445684,
      "loss": 3.57,
      "step": 2648
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7296373844146729,
      "learning_rate": 0.000599804326755947,
      "loss": 3.6019,
      "step": 2649
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.419721007347107,
      "learning_rate": 0.0005998041790104706,
      "loss": 3.3732,
      "step": 2650
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.135218620300293,
      "learning_rate": 0.0005998040312092549,
      "loss": 3.353,
      "step": 2651
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.571657180786133,
      "learning_rate": 0.0005998038833523,
      "loss": 3.7355,
      "step": 2652
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7396785020828247,
      "learning_rate": 0.0005998037354396059,
      "loss": 3.5202,
      "step": 2653
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7622849941253662,
      "learning_rate": 0.0005998035874711726,
      "loss": 3.5975,
      "step": 2654
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.823114275932312,
      "learning_rate": 0.0005998034394470003,
      "loss": 3.1577,
      "step": 2655
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.851538896560669,
      "learning_rate": 0.0005998032913670888,
      "loss": 3.5691,
      "step": 2656
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.227953314781189,
      "learning_rate": 0.0005998031432314382,
      "loss": 3.431,
      "step": 2657
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.4842569828033447,
      "learning_rate": 0.0005998029950400485,
      "loss": 3.1064,
      "step": 2658
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7069510221481323,
      "learning_rate": 0.0005998028467929197,
      "loss": 3.7742,
      "step": 2659
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7572343349456787,
      "learning_rate": 0.000599802698490052,
      "loss": 3.527,
      "step": 2660
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9701344966888428,
      "learning_rate": 0.0005998025501314454,
      "loss": 3.4288,
      "step": 2661
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7655236721038818,
      "learning_rate": 0.0005998024017170997,
      "loss": 3.4103,
      "step": 2662
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6313183307647705,
      "learning_rate": 0.000599802253247015,
      "loss": 3.5113,
      "step": 2663
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.7712783813476562,
      "learning_rate": 0.0005998021047211915,
      "loss": 3.5942,
      "step": 2664
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5137773752212524,
      "learning_rate": 0.0005998019561396292,
      "loss": 3.3379,
      "step": 2665
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9051603078842163,
      "learning_rate": 0.0005998018075023281,
      "loss": 3.3854,
      "step": 2666
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.3266055583953857,
      "learning_rate": 0.000599801658809288,
      "loss": 3.3408,
      "step": 2667
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6758368015289307,
      "learning_rate": 0.0005998015100605093,
      "loss": 3.5481,
      "step": 2668
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5539178848266602,
      "learning_rate": 0.0005998013612559917,
      "loss": 3.3064,
      "step": 2669
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5601696968078613,
      "learning_rate": 0.0005998012123957354,
      "loss": 3.6905,
      "step": 2670
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.528117060661316,
      "learning_rate": 0.0005998010634797404,
      "loss": 3.4609,
      "step": 2671
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6651076078414917,
      "learning_rate": 0.0005998009145080068,
      "loss": 3.6063,
      "step": 2672
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.0442402362823486,
      "learning_rate": 0.0005998007654805344,
      "loss": 3.6015,
      "step": 2673
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.599885940551758,
      "learning_rate": 0.0005998006163973235,
      "loss": 3.8087,
      "step": 2674
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.201054573059082,
      "learning_rate": 0.000599800467258374,
      "loss": 3.3957,
      "step": 2675
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.944038987159729,
      "learning_rate": 0.0005998003180636858,
      "loss": 3.4051,
      "step": 2676
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6651339530944824,
      "learning_rate": 0.0005998001688132592,
      "loss": 3.5975,
      "step": 2677
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.114738702774048,
      "learning_rate": 0.000599800019507094,
      "loss": 3.7012,
      "step": 2678
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.937853217124939,
      "learning_rate": 0.0005997998701451904,
      "loss": 3.4958,
      "step": 2679
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.911759614944458,
      "learning_rate": 0.0005997997207275483,
      "loss": 3.5034,
      "step": 2680
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.486263394355774,
      "learning_rate": 0.0005997995712541678,
      "loss": 3.4959,
      "step": 2681
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6438714265823364,
      "learning_rate": 0.0005997994217250489,
      "loss": 3.4657,
      "step": 2682
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.9391790628433228,
      "learning_rate": 0.0005997992721401916,
      "loss": 3.3882,
      "step": 2683
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.6800317764282227,
      "learning_rate": 0.000599799122499596,
      "loss": 3.5016,
      "step": 2684
    },
    {
      "epoch": 0.03,
      "grad_norm": 3.240701913833618,
      "learning_rate": 0.000599798972803262,
      "loss": 3.5717,
      "step": 2685
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.5961376428604126,
      "learning_rate": 0.0005997988230511898,
      "loss": 3.5084,
      "step": 2686
    },
    {
      "epoch": 0.03,
      "grad_norm": 1.487091302871704,
      "learning_rate": 0.0005997986732433793,
      "loss": 3.4555,
      "step": 2687
    },
    {
      "epoch": 0.03,
      "grad_norm": 2.787256956100464,
      "learning_rate": 0.0005997985233798306,
      "loss": 3.5405,
      "step": 2688
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.192012071609497,
      "learning_rate": 0.0005997983734605437,
      "loss": 3.4488,
      "step": 2689
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4817897081375122,
      "learning_rate": 0.0005997982234855186,
      "loss": 3.5187,
      "step": 2690
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.186966896057129,
      "learning_rate": 0.0005997980734547553,
      "loss": 3.6539,
      "step": 2691
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.4701180458068848,
      "learning_rate": 0.0005997979233682539,
      "loss": 3.516,
      "step": 2692
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4964741468429565,
      "learning_rate": 0.0005997977732260145,
      "loss": 3.444,
      "step": 2693
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6141515970230103,
      "learning_rate": 0.0005997976230280369,
      "loss": 3.5159,
      "step": 2694
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6177988052368164,
      "learning_rate": 0.0005997974727743213,
      "loss": 3.7574,
      "step": 2695
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4812254905700684,
      "learning_rate": 0.0005997973224648677,
      "loss": 3.5006,
      "step": 2696
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5063308477401733,
      "learning_rate": 0.0005997971720996762,
      "loss": 3.3632,
      "step": 2697
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.5213217735290527,
      "learning_rate": 0.0005997970216787467,
      "loss": 3.672,
      "step": 2698
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.507565975189209,
      "learning_rate": 0.0005997968712020792,
      "loss": 3.5649,
      "step": 2699
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4691545963287354,
      "learning_rate": 0.0005997967206696739,
      "loss": 3.7259,
      "step": 2700
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.9165358543395996,
      "learning_rate": 0.0005997965700815307,
      "loss": 3.6256,
      "step": 2701
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.341524362564087,
      "learning_rate": 0.0005997964194376496,
      "loss": 3.882,
      "step": 2702
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1936777830123901,
      "learning_rate": 0.0005997962687380307,
      "loss": 3.7119,
      "step": 2703
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7867616415023804,
      "learning_rate": 0.0005997961179826741,
      "loss": 3.6125,
      "step": 2704
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.2955527305603027,
      "learning_rate": 0.0005997959671715797,
      "loss": 3.4405,
      "step": 2705
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8047016859054565,
      "learning_rate": 0.0005997958163047476,
      "loss": 3.3554,
      "step": 2706
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9108502864837646,
      "learning_rate": 0.0005997956653821777,
      "loss": 3.3205,
      "step": 2707
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7531832456588745,
      "learning_rate": 0.0005997955144038703,
      "loss": 3.709,
      "step": 2708
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8288087844848633,
      "learning_rate": 0.0005997953633698251,
      "loss": 3.4518,
      "step": 2709
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.984584927558899,
      "learning_rate": 0.0005997952122800423,
      "loss": 3.4448,
      "step": 2710
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8091797828674316,
      "learning_rate": 0.0005997950611345219,
      "loss": 3.4065,
      "step": 2711
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.2727468013763428,
      "learning_rate": 0.000599794909933264,
      "loss": 3.5885,
      "step": 2712
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.058391809463501,
      "learning_rate": 0.0005997947586762686,
      "loss": 3.4368,
      "step": 2713
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.275445580482483,
      "learning_rate": 0.0005997946073635356,
      "loss": 3.6341,
      "step": 2714
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5636852979660034,
      "learning_rate": 0.0005997944559950651,
      "loss": 3.5551,
      "step": 2715
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3497875928878784,
      "learning_rate": 0.0005997943045708573,
      "loss": 3.6348,
      "step": 2716
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3404197692871094,
      "learning_rate": 0.000599794153090912,
      "loss": 3.6673,
      "step": 2717
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.073185682296753,
      "learning_rate": 0.0005997940015552292,
      "loss": 3.4341,
      "step": 2718
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7975239753723145,
      "learning_rate": 0.0005997938499638092,
      "loss": 3.723,
      "step": 2719
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3136911392211914,
      "learning_rate": 0.0005997936983166517,
      "loss": 3.3566,
      "step": 2720
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.650493860244751,
      "learning_rate": 0.0005997935466137571,
      "loss": 3.6139,
      "step": 2721
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5143364667892456,
      "learning_rate": 0.0005997933948551251,
      "loss": 3.6605,
      "step": 2722
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.000157356262207,
      "learning_rate": 0.0005997932430407559,
      "loss": 3.5772,
      "step": 2723
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3793787956237793,
      "learning_rate": 0.0005997930911706493,
      "loss": 3.4596,
      "step": 2724
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6471363306045532,
      "learning_rate": 0.0005997929392448057,
      "loss": 3.6665,
      "step": 2725
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.519994854927063,
      "learning_rate": 0.0005997927872632249,
      "loss": 3.5471,
      "step": 2726
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4988046884536743,
      "learning_rate": 0.000599792635225907,
      "loss": 3.5977,
      "step": 2727
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.364457130432129,
      "learning_rate": 0.0005997924831328519,
      "loss": 3.2664,
      "step": 2728
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.5399975776672363,
      "learning_rate": 0.0005997923309840597,
      "loss": 3.4646,
      "step": 2729
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6627861261367798,
      "learning_rate": 0.0005997921787795306,
      "loss": 3.4635,
      "step": 2730
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4221526384353638,
      "learning_rate": 0.0005997920265192642,
      "loss": 3.5301,
      "step": 2731
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8305717706680298,
      "learning_rate": 0.0005997918742032611,
      "loss": 3.4634,
      "step": 2732
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4442321062088013,
      "learning_rate": 0.0005997917218315209,
      "loss": 3.541,
      "step": 2733
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6056782007217407,
      "learning_rate": 0.0005997915694040438,
      "loss": 3.3309,
      "step": 2734
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.513891339302063,
      "learning_rate": 0.0005997914169208297,
      "loss": 3.4411,
      "step": 2735
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.42020583152771,
      "learning_rate": 0.0005997912643818788,
      "loss": 3.6131,
      "step": 2736
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.718540072441101,
      "learning_rate": 0.0005997911117871911,
      "loss": 3.6022,
      "step": 2737
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.1397268772125244,
      "learning_rate": 0.0005997909591367664,
      "loss": 3.351,
      "step": 2738
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.864949107170105,
      "learning_rate": 0.000599790806430605,
      "loss": 3.3465,
      "step": 2739
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6097233295440674,
      "learning_rate": 0.0005997906536687068,
      "loss": 3.2695,
      "step": 2740
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3733298778533936,
      "learning_rate": 0.0005997905008510719,
      "loss": 3.5726,
      "step": 2741
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.619396686553955,
      "learning_rate": 0.0005997903479777003,
      "loss": 3.3028,
      "step": 2742
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.495255947113037,
      "learning_rate": 0.000599790195048592,
      "loss": 3.3395,
      "step": 2743
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8303064107894897,
      "learning_rate": 0.0005997900420637469,
      "loss": 3.5179,
      "step": 2744
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0840537548065186,
      "learning_rate": 0.0005997898890231654,
      "loss": 3.3031,
      "step": 2745
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4934208393096924,
      "learning_rate": 0.0005997897359268471,
      "loss": 3.588,
      "step": 2746
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.866912841796875,
      "learning_rate": 0.0005997895827747924,
      "loss": 3.7729,
      "step": 2747
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9308955669403076,
      "learning_rate": 0.0005997894295670011,
      "loss": 3.7957,
      "step": 2748
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.856309413909912,
      "learning_rate": 0.0005997892763034733,
      "loss": 3.6127,
      "step": 2749
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.604703664779663,
      "learning_rate": 0.000599789122984209,
      "loss": 3.4966,
      "step": 2750
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.777725338935852,
      "learning_rate": 0.0005997889696092081,
      "loss": 3.7192,
      "step": 2751
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8200411796569824,
      "learning_rate": 0.0005997888161784709,
      "loss": 3.5026,
      "step": 2752
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.867670774459839,
      "learning_rate": 0.0005997886626919974,
      "loss": 3.4895,
      "step": 2753
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.5719239711761475,
      "learning_rate": 0.0005997885091497874,
      "loss": 3.3865,
      "step": 2754
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.656063437461853,
      "learning_rate": 0.0005997883555518411,
      "loss": 3.6243,
      "step": 2755
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5914206504821777,
      "learning_rate": 0.0005997882018981585,
      "loss": 3.7405,
      "step": 2756
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4580084085464478,
      "learning_rate": 0.0005997880481887395,
      "loss": 3.3552,
      "step": 2757
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7405532598495483,
      "learning_rate": 0.0005997878944235843,
      "loss": 3.4242,
      "step": 2758
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6539474725723267,
      "learning_rate": 0.000599787740602693,
      "loss": 3.4401,
      "step": 2759
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.99358332157135,
      "learning_rate": 0.0005997875867260654,
      "loss": 3.588,
      "step": 2760
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.452357292175293,
      "learning_rate": 0.0005997874327937015,
      "loss": 3.5053,
      "step": 2761
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4777133464813232,
      "learning_rate": 0.0005997872788056017,
      "loss": 3.7128,
      "step": 2762
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6310738325119019,
      "learning_rate": 0.0005997871247617657,
      "loss": 3.6297,
      "step": 2763
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7219696044921875,
      "learning_rate": 0.0005997869706621936,
      "loss": 3.6442,
      "step": 2764
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9501711130142212,
      "learning_rate": 0.0005997868165068854,
      "loss": 3.7954,
      "step": 2765
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3043951988220215,
      "learning_rate": 0.0005997866622958412,
      "loss": 3.6723,
      "step": 2766
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6158785820007324,
      "learning_rate": 0.0005997865080290609,
      "loss": 3.5862,
      "step": 2767
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9744195938110352,
      "learning_rate": 0.0005997863537065448,
      "loss": 3.6156,
      "step": 2768
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9175790548324585,
      "learning_rate": 0.0005997861993282926,
      "loss": 3.7164,
      "step": 2769
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.2892119884490967,
      "learning_rate": 0.0005997860448943047,
      "loss": 3.3548,
      "step": 2770
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.80465030670166,
      "learning_rate": 0.0005997858904045807,
      "loss": 3.3791,
      "step": 2771
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.6797544956207275,
      "learning_rate": 0.000599785735859121,
      "loss": 3.3724,
      "step": 2772
    },
    {
      "epoch": 0.04,
      "grad_norm": 4.861963272094727,
      "learning_rate": 0.0005997855812579254,
      "loss": 3.3455,
      "step": 2773
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4937777519226074,
      "learning_rate": 0.000599785426600994,
      "loss": 3.4553,
      "step": 2774
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.4170713424682617,
      "learning_rate": 0.0005997852718883269,
      "loss": 3.5369,
      "step": 2775
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.9793083667755127,
      "learning_rate": 0.0005997851171199239,
      "loss": 3.6587,
      "step": 2776
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7016884088516235,
      "learning_rate": 0.0005997849622957853,
      "loss": 3.3642,
      "step": 2777
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.707017183303833,
      "learning_rate": 0.0005997848074159111,
      "loss": 3.5424,
      "step": 2778
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9485317468643188,
      "learning_rate": 0.0005997846524803011,
      "loss": 3.5118,
      "step": 2779
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7193330526351929,
      "learning_rate": 0.0005997844974889555,
      "loss": 3.5206,
      "step": 2780
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.542805552482605,
      "learning_rate": 0.0005997843424418744,
      "loss": 3.2506,
      "step": 2781
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.760937213897705,
      "learning_rate": 0.0005997841873390576,
      "loss": 3.5485,
      "step": 2782
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.2904040813446045,
      "learning_rate": 0.0005997840321805053,
      "loss": 3.6497,
      "step": 2783
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.2571496963500977,
      "learning_rate": 0.0005997838769662175,
      "loss": 3.469,
      "step": 2784
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.247666358947754,
      "learning_rate": 0.0005997837216961942,
      "loss": 3.5044,
      "step": 2785
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5008914470672607,
      "learning_rate": 0.0005997835663704356,
      "loss": 3.405,
      "step": 2786
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.800171971321106,
      "learning_rate": 0.0005997834109889414,
      "loss": 3.8301,
      "step": 2787
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9743094444274902,
      "learning_rate": 0.0005997832555517118,
      "loss": 3.5966,
      "step": 2788
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7406928539276123,
      "learning_rate": 0.000599783100058747,
      "loss": 3.4209,
      "step": 2789
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2509292364120483,
      "learning_rate": 0.0005997829445100467,
      "loss": 3.6172,
      "step": 2790
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6010702848434448,
      "learning_rate": 0.0005997827889056112,
      "loss": 3.4476,
      "step": 2791
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.2011616230010986,
      "learning_rate": 0.0005997826332454403,
      "loss": 3.4897,
      "step": 2792
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6014161109924316,
      "learning_rate": 0.0005997824775295342,
      "loss": 3.6032,
      "step": 2793
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7340561151504517,
      "learning_rate": 0.0005997823217578929,
      "loss": 3.6753,
      "step": 2794
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4611811637878418,
      "learning_rate": 0.0005997821659305164,
      "loss": 3.4716,
      "step": 2795
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5814703702926636,
      "learning_rate": 0.0005997820100474047,
      "loss": 3.3364,
      "step": 2796
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.789929986000061,
      "learning_rate": 0.000599781854108558,
      "loss": 3.4513,
      "step": 2797
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.519911766052246,
      "learning_rate": 0.0005997816981139761,
      "loss": 3.5799,
      "step": 2798
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.751554012298584,
      "learning_rate": 0.0005997815420636591,
      "loss": 3.5532,
      "step": 2799
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7360951900482178,
      "learning_rate": 0.0005997813859576071,
      "loss": 3.4633,
      "step": 2800
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6400066614151,
      "learning_rate": 0.00059978122979582,
      "loss": 3.5901,
      "step": 2801
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5277645587921143,
      "learning_rate": 0.0005997810735782979,
      "loss": 3.343,
      "step": 2802
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.414904236793518,
      "learning_rate": 0.000599780917305041,
      "loss": 3.5995,
      "step": 2803
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.2704997062683105,
      "learning_rate": 0.0005997807609760491,
      "loss": 3.2467,
      "step": 2804
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4911142587661743,
      "learning_rate": 0.0005997806045913223,
      "loss": 3.4066,
      "step": 2805
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.549559473991394,
      "learning_rate": 0.0005997804481508606,
      "loss": 3.6615,
      "step": 2806
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.275177240371704,
      "learning_rate": 0.000599780291654664,
      "loss": 3.6756,
      "step": 2807
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0659713745117188,
      "learning_rate": 0.0005997801351027326,
      "loss": 3.4293,
      "step": 2808
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9645071029663086,
      "learning_rate": 0.0005997799784950665,
      "loss": 3.524,
      "step": 2809
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.132622480392456,
      "learning_rate": 0.0005997798218316656,
      "loss": 3.5481,
      "step": 2810
    },
    {
      "epoch": 0.04,
      "grad_norm": 4.919875144958496,
      "learning_rate": 0.00059977966511253,
      "loss": 3.6553,
      "step": 2811
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.470381259918213,
      "learning_rate": 0.0005997795083376596,
      "loss": 3.7489,
      "step": 2812
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5987814664840698,
      "learning_rate": 0.0005997793515070546,
      "loss": 3.4375,
      "step": 2813
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.945728063583374,
      "learning_rate": 0.000599779194620715,
      "loss": 3.5792,
      "step": 2814
    },
    {
      "epoch": 0.04,
      "grad_norm": 4.0049262046813965,
      "learning_rate": 0.0005997790376786407,
      "loss": 3.3277,
      "step": 2815
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.2596545219421387,
      "learning_rate": 0.0005997788806808319,
      "loss": 3.6076,
      "step": 2816
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.305406093597412,
      "learning_rate": 0.0005997787236272885,
      "loss": 3.4515,
      "step": 2817
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.0474085807800293,
      "learning_rate": 0.0005997785665180106,
      "loss": 3.5052,
      "step": 2818
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3805713653564453,
      "learning_rate": 0.0005997784093529981,
      "loss": 3.2294,
      "step": 2819
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1464420557022095,
      "learning_rate": 0.0005997782521322513,
      "loss": 3.6228,
      "step": 2820
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.761865496635437,
      "learning_rate": 0.00059977809485577,
      "loss": 3.3854,
      "step": 2821
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.830412745475769,
      "learning_rate": 0.0005997779375235544,
      "loss": 3.3981,
      "step": 2822
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5574431419372559,
      "learning_rate": 0.0005997777801356042,
      "loss": 3.2237,
      "step": 2823
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7212165594100952,
      "learning_rate": 0.0005997776226919199,
      "loss": 3.2432,
      "step": 2824
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8394675254821777,
      "learning_rate": 0.000599777465192501,
      "loss": 3.4597,
      "step": 2825
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.6205756664276123,
      "learning_rate": 0.000599777307637348,
      "loss": 3.4688,
      "step": 2826
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.828803539276123,
      "learning_rate": 0.0005997771500264607,
      "loss": 3.6768,
      "step": 2827
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4954818487167358,
      "learning_rate": 0.0005997769923598392,
      "loss": 3.6577,
      "step": 2828
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7263000011444092,
      "learning_rate": 0.0005997768346374835,
      "loss": 3.4242,
      "step": 2829
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.021986484527588,
      "learning_rate": 0.0005997766768593935,
      "loss": 3.5657,
      "step": 2830
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5002092123031616,
      "learning_rate": 0.0005997765190255695,
      "loss": 3.5018,
      "step": 2831
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8984413146972656,
      "learning_rate": 0.0005997763611360113,
      "loss": 3.5866,
      "step": 2832
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4642378091812134,
      "learning_rate": 0.000599776203190719,
      "loss": 3.3079,
      "step": 2833
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.8204452991485596,
      "learning_rate": 0.0005997760451896927,
      "loss": 3.5727,
      "step": 2834
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.2820467948913574,
      "learning_rate": 0.0005997758871329324,
      "loss": 3.6442,
      "step": 2835
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.806713342666626,
      "learning_rate": 0.0005997757290204381,
      "loss": 3.4243,
      "step": 2836
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4083675146102905,
      "learning_rate": 0.0005997755708522099,
      "loss": 3.7524,
      "step": 2837
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.282060980796814,
      "learning_rate": 0.0005997754126282475,
      "loss": 3.3275,
      "step": 2838
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6073017120361328,
      "learning_rate": 0.0005997752543485514,
      "loss": 3.5171,
      "step": 2839
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0750579833984375,
      "learning_rate": 0.0005997750960131214,
      "loss": 3.5941,
      "step": 2840
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4322547912597656,
      "learning_rate": 0.0005997749376219576,
      "loss": 3.631,
      "step": 2841
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.705043315887451,
      "learning_rate": 0.0005997747791750598,
      "loss": 3.5379,
      "step": 2842
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.5860838890075684,
      "learning_rate": 0.0005997746206724284,
      "loss": 3.5405,
      "step": 2843
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.4238157272338867,
      "learning_rate": 0.0005997744621140631,
      "loss": 3.5936,
      "step": 2844
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.576833486557007,
      "learning_rate": 0.0005997743034999642,
      "loss": 3.4012,
      "step": 2845
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5976024866104126,
      "learning_rate": 0.0005997741448301316,
      "loss": 3.5467,
      "step": 2846
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7237935066223145,
      "learning_rate": 0.0005997739861045651,
      "loss": 3.6367,
      "step": 2847
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8092615604400635,
      "learning_rate": 0.0005997738273232652,
      "loss": 3.5419,
      "step": 2848
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7683547735214233,
      "learning_rate": 0.0005997736684862316,
      "loss": 3.261,
      "step": 2849
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.737741231918335,
      "learning_rate": 0.0005997735095934644,
      "loss": 3.6739,
      "step": 2850
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.077183723449707,
      "learning_rate": 0.0005997733506449636,
      "loss": 3.5589,
      "step": 2851
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7320754528045654,
      "learning_rate": 0.0005997731916407295,
      "loss": 3.6755,
      "step": 2852
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.438655376434326,
      "learning_rate": 0.0005997730325807617,
      "loss": 3.3568,
      "step": 2853
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.178356170654297,
      "learning_rate": 0.0005997728734650604,
      "loss": 3.1829,
      "step": 2854
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5012048482894897,
      "learning_rate": 0.0005997727142936258,
      "loss": 3.6281,
      "step": 2855
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9979608058929443,
      "learning_rate": 0.0005997725550664578,
      "loss": 3.5439,
      "step": 2856
    },
    {
      "epoch": 0.04,
      "grad_norm": 4.213482856750488,
      "learning_rate": 0.0005997723957835564,
      "loss": 3.3959,
      "step": 2857
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.781968593597412,
      "learning_rate": 0.0005997722364449215,
      "loss": 3.7329,
      "step": 2858
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.593557357788086,
      "learning_rate": 0.0005997720770505535,
      "loss": 3.6112,
      "step": 2859
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8678793907165527,
      "learning_rate": 0.000599771917600452,
      "loss": 3.435,
      "step": 2860
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7094696760177612,
      "learning_rate": 0.0005997717580946174,
      "loss": 3.6319,
      "step": 2861
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9328932762145996,
      "learning_rate": 0.0005997715985330495,
      "loss": 3.5,
      "step": 2862
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5883828401565552,
      "learning_rate": 0.0005997714389157485,
      "loss": 3.7283,
      "step": 2863
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8554134368896484,
      "learning_rate": 0.0005997712792427142,
      "loss": 3.7286,
      "step": 2864
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6525962352752686,
      "learning_rate": 0.0005997711195139468,
      "loss": 3.3224,
      "step": 2865
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.901357650756836,
      "learning_rate": 0.0005997709597294463,
      "loss": 3.5783,
      "step": 2866
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6211594343185425,
      "learning_rate": 0.0005997707998892128,
      "loss": 3.4943,
      "step": 2867
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8780070543289185,
      "learning_rate": 0.0005997706399932461,
      "loss": 3.6134,
      "step": 2868
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0685601234436035,
      "learning_rate": 0.0005997704800415465,
      "loss": 3.4279,
      "step": 2869
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6137288808822632,
      "learning_rate": 0.0005997703200341138,
      "loss": 3.5677,
      "step": 2870
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.244835138320923,
      "learning_rate": 0.0005997701599709483,
      "loss": 3.6021,
      "step": 2871
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6888548135757446,
      "learning_rate": 0.0005997699998520497,
      "loss": 3.5643,
      "step": 2872
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5085526704788208,
      "learning_rate": 0.0005997698396774183,
      "loss": 3.6812,
      "step": 2873
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.373037815093994,
      "learning_rate": 0.000599769679447054,
      "loss": 3.275,
      "step": 2874
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8799508810043335,
      "learning_rate": 0.0005997695191609568,
      "loss": 3.4206,
      "step": 2875
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.5290451049804688,
      "learning_rate": 0.0005997693588191269,
      "loss": 3.5233,
      "step": 2876
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.7828359603881836,
      "learning_rate": 0.0005997691984215641,
      "loss": 3.611,
      "step": 2877
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.724141240119934,
      "learning_rate": 0.0005997690379682687,
      "loss": 3.549,
      "step": 2878
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.381116271018982,
      "learning_rate": 0.0005997688774592404,
      "loss": 3.5238,
      "step": 2879
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.833856225013733,
      "learning_rate": 0.0005997687168944795,
      "loss": 3.5463,
      "step": 2880
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5580254793167114,
      "learning_rate": 0.000599768556273986,
      "loss": 3.4099,
      "step": 2881
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8869467973709106,
      "learning_rate": 0.0005997683955977598,
      "loss": 3.2832,
      "step": 2882
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.003523349761963,
      "learning_rate": 0.0005997682348658009,
      "loss": 3.4856,
      "step": 2883
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7882554531097412,
      "learning_rate": 0.0005997680740781095,
      "loss": 3.515,
      "step": 2884
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5710914134979248,
      "learning_rate": 0.0005997679132346856,
      "loss": 3.6216,
      "step": 2885
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.69269597530365,
      "learning_rate": 0.0005997677523355292,
      "loss": 3.5411,
      "step": 2886
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.422812581062317,
      "learning_rate": 0.0005997675913806404,
      "loss": 3.6619,
      "step": 2887
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8845869302749634,
      "learning_rate": 0.000599767430370019,
      "loss": 3.5018,
      "step": 2888
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3806657791137695,
      "learning_rate": 0.0005997672693036652,
      "loss": 3.6412,
      "step": 2889
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.511683702468872,
      "learning_rate": 0.0005997671081815789,
      "loss": 3.6007,
      "step": 2890
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6984003782272339,
      "learning_rate": 0.0005997669470037604,
      "loss": 3.809,
      "step": 2891
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.8114116191864014,
      "learning_rate": 0.0005997667857702095,
      "loss": 3.4023,
      "step": 2892
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7414718866348267,
      "learning_rate": 0.0005997666244809263,
      "loss": 3.5082,
      "step": 2893
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5236002206802368,
      "learning_rate": 0.0005997664631359109,
      "loss": 3.3421,
      "step": 2894
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.41373348236084,
      "learning_rate": 0.0005997663017351632,
      "loss": 3.4547,
      "step": 2895
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4631484746932983,
      "learning_rate": 0.0005997661402786833,
      "loss": 3.6364,
      "step": 2896
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6188180446624756,
      "learning_rate": 0.0005997659787664711,
      "loss": 3.5209,
      "step": 2897
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9715770483016968,
      "learning_rate": 0.0005997658171985269,
      "loss": 3.3979,
      "step": 2898
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.431388258934021,
      "learning_rate": 0.0005997656555748504,
      "loss": 3.3662,
      "step": 2899
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3245307207107544,
      "learning_rate": 0.000599765493895442,
      "loss": 3.5123,
      "step": 2900
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7601978778839111,
      "learning_rate": 0.0005997653321603014,
      "loss": 3.6558,
      "step": 2901
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6025865077972412,
      "learning_rate": 0.0005997651703694288,
      "loss": 3.5192,
      "step": 2902
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7825261354446411,
      "learning_rate": 0.0005997650085228243,
      "loss": 3.4234,
      "step": 2903
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7975585460662842,
      "learning_rate": 0.0005997648466204877,
      "loss": 3.4968,
      "step": 2904
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.206230401992798,
      "learning_rate": 0.0005997646846624191,
      "loss": 3.5612,
      "step": 2905
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.126595973968506,
      "learning_rate": 0.0005997645226486187,
      "loss": 3.4813,
      "step": 2906
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8007476329803467,
      "learning_rate": 0.0005997643605790865,
      "loss": 3.4241,
      "step": 2907
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.514513373374939,
      "learning_rate": 0.0005997641984538223,
      "loss": 3.4415,
      "step": 2908
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8813663721084595,
      "learning_rate": 0.0005997640362728263,
      "loss": 3.7335,
      "step": 2909
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.673189401626587,
      "learning_rate": 0.0005997638740360986,
      "loss": 3.3159,
      "step": 2910
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.261936664581299,
      "learning_rate": 0.0005997637117436391,
      "loss": 3.5644,
      "step": 2911
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.154181718826294,
      "learning_rate": 0.0005997635493954478,
      "loss": 3.5842,
      "step": 2912
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4277713298797607,
      "learning_rate": 0.0005997633869915248,
      "loss": 3.5143,
      "step": 2913
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.376861333847046,
      "learning_rate": 0.0005997632245318702,
      "loss": 3.3797,
      "step": 2914
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6156660318374634,
      "learning_rate": 0.000599763062016484,
      "loss": 4.051,
      "step": 2915
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8258860111236572,
      "learning_rate": 0.0005997628994453661,
      "loss": 3.3352,
      "step": 2916
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5018761157989502,
      "learning_rate": 0.0005997627368185166,
      "loss": 3.5458,
      "step": 2917
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.106407880783081,
      "learning_rate": 0.0005997625741359356,
      "loss": 3.379,
      "step": 2918
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.098435163497925,
      "learning_rate": 0.000599762411397623,
      "loss": 3.4665,
      "step": 2919
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7203973531723022,
      "learning_rate": 0.000599762248603579,
      "loss": 3.4251,
      "step": 2920
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4575610160827637,
      "learning_rate": 0.0005997620857538035,
      "loss": 3.5455,
      "step": 2921
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3661653995513916,
      "learning_rate": 0.0005997619228482966,
      "loss": 3.8677,
      "step": 2922
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5271658897399902,
      "learning_rate": 0.0005997617598870582,
      "loss": 3.367,
      "step": 2923
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0357437133789062,
      "learning_rate": 0.0005997615968700885,
      "loss": 3.4211,
      "step": 2924
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6626766920089722,
      "learning_rate": 0.0005997614337973875,
      "loss": 3.5651,
      "step": 2925
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4978337287902832,
      "learning_rate": 0.0005997612706689552,
      "loss": 3.5515,
      "step": 2926
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.2072832584381104,
      "learning_rate": 0.0005997611074847916,
      "loss": 3.4669,
      "step": 2927
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6869866847991943,
      "learning_rate": 0.0005997609442448968,
      "loss": 3.6361,
      "step": 2928
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9177407026290894,
      "learning_rate": 0.0005997607809492706,
      "loss": 3.6003,
      "step": 2929
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.467380166053772,
      "learning_rate": 0.0005997606175979133,
      "loss": 3.5097,
      "step": 2930
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.732478380203247,
      "learning_rate": 0.0005997604541908249,
      "loss": 3.5924,
      "step": 2931
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8486608266830444,
      "learning_rate": 0.0005997602907280053,
      "loss": 3.5159,
      "step": 2932
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.114819288253784,
      "learning_rate": 0.0005997601272094546,
      "loss": 3.3354,
      "step": 2933
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9087387323379517,
      "learning_rate": 0.0005997599636351729,
      "loss": 3.3968,
      "step": 2934
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.203228235244751,
      "learning_rate": 0.0005997598000051603,
      "loss": 3.4338,
      "step": 2935
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5982252359390259,
      "learning_rate": 0.0005997596363194165,
      "loss": 3.4828,
      "step": 2936
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8707493543624878,
      "learning_rate": 0.0005997594725779417,
      "loss": 3.3499,
      "step": 2937
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5872395038604736,
      "learning_rate": 0.0005997593087807361,
      "loss": 3.5122,
      "step": 2938
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7769825458526611,
      "learning_rate": 0.0005997591449277994,
      "loss": 3.7297,
      "step": 2939
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4045606851577759,
      "learning_rate": 0.000599758981019132,
      "loss": 3.5473,
      "step": 2940
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.777321696281433,
      "learning_rate": 0.0005997588170547336,
      "loss": 3.8457,
      "step": 2941
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.510056495666504,
      "learning_rate": 0.0005997586530346045,
      "loss": 3.6648,
      "step": 2942
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8311494588851929,
      "learning_rate": 0.0005997584889587445,
      "loss": 3.2363,
      "step": 2943
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.450128436088562,
      "learning_rate": 0.0005997583248271538,
      "loss": 3.4769,
      "step": 2944
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.590936541557312,
      "learning_rate": 0.0005997581606398324,
      "loss": 3.6735,
      "step": 2945
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.82721745967865,
      "learning_rate": 0.0005997579963967802,
      "loss": 3.6046,
      "step": 2946
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.379290223121643,
      "learning_rate": 0.0005997578320979974,
      "loss": 3.1114,
      "step": 2947
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.799588441848755,
      "learning_rate": 0.000599757667743484,
      "loss": 3.6911,
      "step": 2948
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6852244138717651,
      "learning_rate": 0.00059975750333324,
      "loss": 3.4195,
      "step": 2949
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7767295837402344,
      "learning_rate": 0.0005997573388672652,
      "loss": 3.546,
      "step": 2950
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0400450229644775,
      "learning_rate": 0.0005997571743455601,
      "loss": 3.358,
      "step": 2951
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3132400512695312,
      "learning_rate": 0.0005997570097681244,
      "loss": 3.5829,
      "step": 2952
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4512386322021484,
      "learning_rate": 0.0005997568451349582,
      "loss": 3.5355,
      "step": 2953
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8435190916061401,
      "learning_rate": 0.0005997566804460616,
      "loss": 3.6816,
      "step": 2954
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9715489149093628,
      "learning_rate": 0.0005997565157014346,
      "loss": 3.5263,
      "step": 2955
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6832987070083618,
      "learning_rate": 0.0005997563509010771,
      "loss": 3.445,
      "step": 2956
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7793997526168823,
      "learning_rate": 0.0005997561860449892,
      "loss": 3.3539,
      "step": 2957
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6843430995941162,
      "learning_rate": 0.0005997560211331711,
      "loss": 3.5725,
      "step": 2958
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.706480622291565,
      "learning_rate": 0.0005997558561656227,
      "loss": 3.4677,
      "step": 2959
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4164023399353027,
      "learning_rate": 0.0005997556911423439,
      "loss": 3.4842,
      "step": 2960
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7361506223678589,
      "learning_rate": 0.0005997555260633349,
      "loss": 3.4996,
      "step": 2961
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8093663454055786,
      "learning_rate": 0.0005997553609285957,
      "loss": 3.4296,
      "step": 2962
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6685824394226074,
      "learning_rate": 0.0005997551957381263,
      "loss": 3.5664,
      "step": 2963
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5715028047561646,
      "learning_rate": 0.0005997550304919269,
      "loss": 3.42,
      "step": 2964
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5290000438690186,
      "learning_rate": 0.0005997548651899972,
      "loss": 3.3708,
      "step": 2965
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4670076370239258,
      "learning_rate": 0.0005997546998323375,
      "loss": 3.3968,
      "step": 2966
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.53077232837677,
      "learning_rate": 0.0005997545344189477,
      "loss": 3.4548,
      "step": 2967
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.814154028892517,
      "learning_rate": 0.0005997543689498279,
      "loss": 3.704,
      "step": 2968
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.513310432434082,
      "learning_rate": 0.0005997542034249781,
      "loss": 3.423,
      "step": 2969
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4870446920394897,
      "learning_rate": 0.0005997540378443984,
      "loss": 3.6568,
      "step": 2970
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3229575157165527,
      "learning_rate": 0.0005997538722080887,
      "loss": 3.3123,
      "step": 2971
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.4426047801971436,
      "learning_rate": 0.0005997537065160491,
      "loss": 3.5923,
      "step": 2972
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5824135541915894,
      "learning_rate": 0.0005997535407682796,
      "loss": 3.5116,
      "step": 2973
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7187907695770264,
      "learning_rate": 0.0005997533749647803,
      "loss": 3.6796,
      "step": 2974
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6321144104003906,
      "learning_rate": 0.0005997532091055512,
      "loss": 3.4321,
      "step": 2975
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.561172366142273,
      "learning_rate": 0.0005997530431905922,
      "loss": 3.574,
      "step": 2976
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5264167785644531,
      "learning_rate": 0.0005997528772199035,
      "loss": 3.5622,
      "step": 2977
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.0616161823272705,
      "learning_rate": 0.0005997527111934852,
      "loss": 3.4369,
      "step": 2978
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6786351203918457,
      "learning_rate": 0.0005997525451113372,
      "loss": 3.5344,
      "step": 2979
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.1298346519470215,
      "learning_rate": 0.0005997523789734594,
      "loss": 3.4615,
      "step": 2980
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6346757411956787,
      "learning_rate": 0.0005997522127798522,
      "loss": 3.3489,
      "step": 2981
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5647732019424438,
      "learning_rate": 0.0005997520465305153,
      "loss": 3.3956,
      "step": 2982
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8109614849090576,
      "learning_rate": 0.0005997518802254487,
      "loss": 3.4235,
      "step": 2983
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.673298716545105,
      "learning_rate": 0.0005997517138646527,
      "loss": 3.4607,
      "step": 2984
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7911218404769897,
      "learning_rate": 0.0005997515474481271,
      "loss": 3.3138,
      "step": 2985
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7456351518630981,
      "learning_rate": 0.0005997513809758721,
      "loss": 3.4097,
      "step": 2986
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.1371405124664307,
      "learning_rate": 0.0005997512144478877,
      "loss": 3.7673,
      "step": 2987
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0382332801818848,
      "learning_rate": 0.0005997510478641738,
      "loss": 3.5783,
      "step": 2988
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0741751194000244,
      "learning_rate": 0.0005997508812247306,
      "loss": 3.4277,
      "step": 2989
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8858294486999512,
      "learning_rate": 0.0005997507145295579,
      "loss": 3.4577,
      "step": 2990
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.0435640811920166,
      "learning_rate": 0.0005997505477786561,
      "loss": 3.312,
      "step": 2991
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.268948554992676,
      "learning_rate": 0.0005997503809720249,
      "loss": 3.4842,
      "step": 2992
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5977529287338257,
      "learning_rate": 0.0005997502141096645,
      "loss": 3.704,
      "step": 2993
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4369847774505615,
      "learning_rate": 0.0005997500471915748,
      "loss": 3.1444,
      "step": 2994
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.5232982635498047,
      "learning_rate": 0.000599749880217756,
      "loss": 3.4221,
      "step": 2995
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.308969020843506,
      "learning_rate": 0.000599749713188208,
      "loss": 3.4298,
      "step": 2996
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.8393914699554443,
      "learning_rate": 0.0005997495461029308,
      "loss": 3.6273,
      "step": 2997
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.156843900680542,
      "learning_rate": 0.0005997493789619246,
      "loss": 3.5959,
      "step": 2998
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8909684419631958,
      "learning_rate": 0.0005997492117651893,
      "loss": 3.4632,
      "step": 2999
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4566255807876587,
      "learning_rate": 0.0005997490445127249,
      "loss": 3.7021,
      "step": 3000
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8775097131729126,
      "learning_rate": 0.0005997488772045316,
      "loss": 3.3511,
      "step": 3001
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3865861892700195,
      "learning_rate": 0.0005997487098406092,
      "loss": 3.5198,
      "step": 3002
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7885322570800781,
      "learning_rate": 0.0005997485424209581,
      "loss": 3.2476,
      "step": 3003
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.8019258975982666,
      "learning_rate": 0.0005997483749455778,
      "loss": 3.6012,
      "step": 3004
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7813408374786377,
      "learning_rate": 0.0005997482074144687,
      "loss": 3.2817,
      "step": 3005
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.1881396770477295,
      "learning_rate": 0.0005997480398276308,
      "loss": 3.4692,
      "step": 3006
    },
    {
      "epoch": 0.04,
      "grad_norm": 4.344059467315674,
      "learning_rate": 0.0005997478721850642,
      "loss": 3.3587,
      "step": 3007
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8666269779205322,
      "learning_rate": 0.0005997477044867686,
      "loss": 3.3796,
      "step": 3008
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8639163970947266,
      "learning_rate": 0.0005997475367327444,
      "loss": 3.6016,
      "step": 3009
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.369767427444458,
      "learning_rate": 0.0005997473689229914,
      "loss": 3.4841,
      "step": 3010
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.481142520904541,
      "learning_rate": 0.0005997472010575096,
      "loss": 3.336,
      "step": 3011
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.743923544883728,
      "learning_rate": 0.0005997470331362993,
      "loss": 3.2547,
      "step": 3012
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8105546236038208,
      "learning_rate": 0.0005997468651593603,
      "loss": 3.37,
      "step": 3013
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.217355728149414,
      "learning_rate": 0.0005997466971266927,
      "loss": 3.5027,
      "step": 3014
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7140816450119019,
      "learning_rate": 0.0005997465290382965,
      "loss": 3.1941,
      "step": 3015
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3228261470794678,
      "learning_rate": 0.0005997463608941719,
      "loss": 3.8104,
      "step": 3016
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.018759250640869,
      "learning_rate": 0.0005997461926943187,
      "loss": 3.4641,
      "step": 3017
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3079293966293335,
      "learning_rate": 0.000599746024438737,
      "loss": 3.2927,
      "step": 3018
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.864505410194397,
      "learning_rate": 0.0005997458561274268,
      "loss": 3.5905,
      "step": 3019
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.312253713607788,
      "learning_rate": 0.0005997456877603883,
      "loss": 3.3828,
      "step": 3020
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5661242008209229,
      "learning_rate": 0.0005997455193376215,
      "loss": 3.3082,
      "step": 3021
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3373981714248657,
      "learning_rate": 0.0005997453508591262,
      "loss": 3.5811,
      "step": 3022
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8321950435638428,
      "learning_rate": 0.0005997451823249025,
      "loss": 3.4696,
      "step": 3023
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8845746517181396,
      "learning_rate": 0.0005997450137349506,
      "loss": 3.4101,
      "step": 3024
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.590988278388977,
      "learning_rate": 0.0005997448450892705,
      "loss": 3.6555,
      "step": 3025
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3461623191833496,
      "learning_rate": 0.0005997446763878622,
      "loss": 3.4613,
      "step": 3026
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8200820684432983,
      "learning_rate": 0.0005997445076307257,
      "loss": 3.4719,
      "step": 3027
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.300194263458252,
      "learning_rate": 0.000599744338817861,
      "loss": 3.45,
      "step": 3028
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6597710847854614,
      "learning_rate": 0.0005997441699492681,
      "loss": 3.4072,
      "step": 3029
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.95018470287323,
      "learning_rate": 0.0005997440010249472,
      "loss": 3.4577,
      "step": 3030
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5461431741714478,
      "learning_rate": 0.0005997438320448982,
      "loss": 3.5943,
      "step": 3031
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6143394708633423,
      "learning_rate": 0.0005997436630091211,
      "loss": 3.5722,
      "step": 3032
    },
    {
      "epoch": 0.04,
      "grad_norm": 4.479683876037598,
      "learning_rate": 0.0005997434939176161,
      "loss": 3.2509,
      "step": 3033
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9723237752914429,
      "learning_rate": 0.0005997433247703831,
      "loss": 3.6826,
      "step": 3034
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6237717866897583,
      "learning_rate": 0.0005997431555674221,
      "loss": 3.3755,
      "step": 3035
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.1878621578216553,
      "learning_rate": 0.0005997429863087333,
      "loss": 3.4511,
      "step": 3036
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.2694976329803467,
      "learning_rate": 0.0005997428169943165,
      "loss": 3.1773,
      "step": 3037
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7098119258880615,
      "learning_rate": 0.0005997426476241719,
      "loss": 3.5725,
      "step": 3038
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.5034167766571045,
      "learning_rate": 0.0005997424781982994,
      "loss": 3.5803,
      "step": 3039
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7114644050598145,
      "learning_rate": 0.0005997423087166992,
      "loss": 3.4729,
      "step": 3040
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9863622188568115,
      "learning_rate": 0.0005997421391793714,
      "loss": 3.326,
      "step": 3041
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8777546882629395,
      "learning_rate": 0.0005997419695863156,
      "loss": 3.5985,
      "step": 3042
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.6008894443511963,
      "learning_rate": 0.0005997417999375322,
      "loss": 3.5755,
      "step": 3043
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.821616530418396,
      "learning_rate": 0.0005997416302330213,
      "loss": 3.4888,
      "step": 3044
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0491042137145996,
      "learning_rate": 0.0005997414604727825,
      "loss": 3.6617,
      "step": 3045
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9373708963394165,
      "learning_rate": 0.0005997412906568163,
      "loss": 3.3665,
      "step": 3046
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5323318243026733,
      "learning_rate": 0.0005997411207851226,
      "loss": 3.299,
      "step": 3047
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.307602882385254,
      "learning_rate": 0.0005997409508577013,
      "loss": 3.6337,
      "step": 3048
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.484583616256714,
      "learning_rate": 0.0005997407808745523,
      "loss": 3.4622,
      "step": 3049
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6542800664901733,
      "learning_rate": 0.0005997406108356761,
      "loss": 3.4435,
      "step": 3050
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.5932390689849854,
      "learning_rate": 0.0005997404407410723,
      "loss": 3.4766,
      "step": 3051
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.4966773986816406,
      "learning_rate": 0.0005997402705907411,
      "loss": 3.2047,
      "step": 3052
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5894945859909058,
      "learning_rate": 0.0005997401003846825,
      "loss": 3.5994,
      "step": 3053
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7990859746932983,
      "learning_rate": 0.0005997399301228966,
      "loss": 3.6935,
      "step": 3054
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4144595861434937,
      "learning_rate": 0.0005997397598053835,
      "loss": 3.4796,
      "step": 3055
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.445778489112854,
      "learning_rate": 0.000599739589432143,
      "loss": 3.3613,
      "step": 3056
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.193928837776184,
      "learning_rate": 0.0005997394190031753,
      "loss": 3.4359,
      "step": 3057
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9405173063278198,
      "learning_rate": 0.0005997392485184804,
      "loss": 3.3324,
      "step": 3058
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7010499238967896,
      "learning_rate": 0.0005997390779780582,
      "loss": 3.1264,
      "step": 3059
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5991148948669434,
      "learning_rate": 0.000599738907381909,
      "loss": 3.3834,
      "step": 3060
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4839212894439697,
      "learning_rate": 0.0005997387367300327,
      "loss": 3.5035,
      "step": 3061
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8006116151809692,
      "learning_rate": 0.0005997385660224292,
      "loss": 3.7225,
      "step": 3062
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7882819175720215,
      "learning_rate": 0.0005997383952590986,
      "loss": 3.5254,
      "step": 3063
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.1222753524780273,
      "learning_rate": 0.000599738224440041,
      "loss": 3.4241,
      "step": 3064
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5229096412658691,
      "learning_rate": 0.0005997380535652564,
      "loss": 3.4439,
      "step": 3065
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7904616594314575,
      "learning_rate": 0.000599737882634745,
      "loss": 3.5711,
      "step": 3066
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8078594207763672,
      "learning_rate": 0.0005997377116485065,
      "loss": 3.5207,
      "step": 3067
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5754297971725464,
      "learning_rate": 0.0005997375406065412,
      "loss": 3.626,
      "step": 3068
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0824501514434814,
      "learning_rate": 0.0005997373695088489,
      "loss": 3.4654,
      "step": 3069
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8570492267608643,
      "learning_rate": 0.0005997371983554299,
      "loss": 3.3019,
      "step": 3070
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8948966264724731,
      "learning_rate": 0.0005997370271462841,
      "loss": 3.5196,
      "step": 3071
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6128082275390625,
      "learning_rate": 0.0005997368558814114,
      "loss": 3.4524,
      "step": 3072
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5900166034698486,
      "learning_rate": 0.0005997366845608121,
      "loss": 3.0648,
      "step": 3073
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.745251178741455,
      "learning_rate": 0.000599736513184486,
      "loss": 3.6907,
      "step": 3074
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0865705013275146,
      "learning_rate": 0.0005997363417524333,
      "loss": 3.7324,
      "step": 3075
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9248933792114258,
      "learning_rate": 0.000599736170264654,
      "loss": 3.324,
      "step": 3076
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.1571671962738037,
      "learning_rate": 0.0005997359987211479,
      "loss": 3.4549,
      "step": 3077
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8587803840637207,
      "learning_rate": 0.0005997358271219153,
      "loss": 3.2828,
      "step": 3078
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5382992029190063,
      "learning_rate": 0.0005997356554669562,
      "loss": 3.5726,
      "step": 3079
    },
    {
      "epoch": 0.04,
      "grad_norm": 4.350069999694824,
      "learning_rate": 0.0005997354837562706,
      "loss": 3.5574,
      "step": 3080
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.899951219558716,
      "learning_rate": 0.0005997353119898584,
      "loss": 3.2455,
      "step": 3081
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.2252750396728516,
      "learning_rate": 0.0005997351401677199,
      "loss": 3.2073,
      "step": 3082
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.806465744972229,
      "learning_rate": 0.0005997349682898549,
      "loss": 3.295,
      "step": 3083
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.1390221118927,
      "learning_rate": 0.0005997347963562635,
      "loss": 3.5421,
      "step": 3084
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7749323844909668,
      "learning_rate": 0.0005997346243669457,
      "loss": 3.3442,
      "step": 3085
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5318251848220825,
      "learning_rate": 0.0005997344523219016,
      "loss": 3.482,
      "step": 3086
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8040956258773804,
      "learning_rate": 0.0005997342802211313,
      "loss": 3.7569,
      "step": 3087
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3499542474746704,
      "learning_rate": 0.0005997341080646346,
      "loss": 3.4099,
      "step": 3088
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.2202255725860596,
      "learning_rate": 0.0005997339358524118,
      "loss": 3.3176,
      "step": 3089
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4463067054748535,
      "learning_rate": 0.0005997337635844628,
      "loss": 3.4507,
      "step": 3090
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.619847297668457,
      "learning_rate": 0.0005997335912607875,
      "loss": 3.4752,
      "step": 3091
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9709144830703735,
      "learning_rate": 0.0005997334188813861,
      "loss": 3.7448,
      "step": 3092
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.403959035873413,
      "learning_rate": 0.0005997332464462588,
      "loss": 3.433,
      "step": 3093
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9474605321884155,
      "learning_rate": 0.0005997330739554052,
      "loss": 3.5659,
      "step": 3094
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.490371823310852,
      "learning_rate": 0.0005997329014088257,
      "loss": 3.6063,
      "step": 3095
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7576217651367188,
      "learning_rate": 0.0005997327288065201,
      "loss": 3.2347,
      "step": 3096
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4306448698043823,
      "learning_rate": 0.0005997325561484885,
      "loss": 3.5377,
      "step": 3097
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0046348571777344,
      "learning_rate": 0.0005997323834347311,
      "loss": 3.5123,
      "step": 3098
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7790899276733398,
      "learning_rate": 0.0005997322106652477,
      "loss": 3.3818,
      "step": 3099
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.1369457244873047,
      "learning_rate": 0.0005997320378400385,
      "loss": 3.336,
      "step": 3100
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9103004932403564,
      "learning_rate": 0.0005997318649591034,
      "loss": 3.5521,
      "step": 3101
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.373542070388794,
      "learning_rate": 0.0005997316920224424,
      "loss": 3.675,
      "step": 3102
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.5310511589050293,
      "learning_rate": 0.0005997315190300557,
      "loss": 3.5892,
      "step": 3103
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5644978284835815,
      "learning_rate": 0.0005997313459819434,
      "loss": 3.4494,
      "step": 3104
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8671436309814453,
      "learning_rate": 0.0005997311728781052,
      "loss": 3.2081,
      "step": 3105
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4400050640106201,
      "learning_rate": 0.0005997309997185414,
      "loss": 3.7282,
      "step": 3106
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6254760026931763,
      "learning_rate": 0.0005997308265032518,
      "loss": 3.26,
      "step": 3107
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.197772741317749,
      "learning_rate": 0.0005997306532322368,
      "loss": 3.1586,
      "step": 3108
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.139058828353882,
      "learning_rate": 0.0005997304799054961,
      "loss": 3.5496,
      "step": 3109
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.2004568576812744,
      "learning_rate": 0.0005997303065230299,
      "loss": 3.2783,
      "step": 3110
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.5416548252105713,
      "learning_rate": 0.0005997301330848381,
      "loss": 3.5664,
      "step": 3111
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6895328760147095,
      "learning_rate": 0.0005997299595909208,
      "loss": 3.5646,
      "step": 3112
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3363516330718994,
      "learning_rate": 0.0005997297860412781,
      "loss": 3.4123,
      "step": 3113
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.521017074584961,
      "learning_rate": 0.00059972961243591,
      "loss": 3.4323,
      "step": 3114
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9034216403961182,
      "learning_rate": 0.0005997294387748165,
      "loss": 3.5347,
      "step": 3115
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.0164060592651367,
      "learning_rate": 0.0005997292650579976,
      "loss": 3.3923,
      "step": 3116
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0574615001678467,
      "learning_rate": 0.0005997290912854534,
      "loss": 3.645,
      "step": 3117
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3965518474578857,
      "learning_rate": 0.0005997289174571839,
      "loss": 3.5043,
      "step": 3118
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.6070754528045654,
      "learning_rate": 0.000599728743573189,
      "loss": 3.4114,
      "step": 3119
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2812033891677856,
      "learning_rate": 0.0005997285696334691,
      "loss": 3.5741,
      "step": 3120
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4262943267822266,
      "learning_rate": 0.0005997283956380239,
      "loss": 3.4733,
      "step": 3121
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.824780821800232,
      "learning_rate": 0.0005997282215868535,
      "loss": 3.4059,
      "step": 3122
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8525935411453247,
      "learning_rate": 0.000599728047479958,
      "loss": 3.5519,
      "step": 3123
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3564608097076416,
      "learning_rate": 0.0005997278733173374,
      "loss": 3.6786,
      "step": 3124
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8076852560043335,
      "learning_rate": 0.0005997276990989917,
      "loss": 3.4658,
      "step": 3125
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9326456785202026,
      "learning_rate": 0.000599727524824921,
      "loss": 3.4343,
      "step": 3126
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.8708887100219727,
      "learning_rate": 0.0005997273504951253,
      "loss": 3.5681,
      "step": 3127
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6937994956970215,
      "learning_rate": 0.0005997271761096045,
      "loss": 3.4643,
      "step": 3128
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8496904373168945,
      "learning_rate": 0.0005997270016683589,
      "loss": 3.5507,
      "step": 3129
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6512811183929443,
      "learning_rate": 0.0005997268271713884,
      "loss": 3.0881,
      "step": 3130
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6576260328292847,
      "learning_rate": 0.000599726652618693,
      "loss": 3.7261,
      "step": 3131
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.559287428855896,
      "learning_rate": 0.0005997264780102727,
      "loss": 3.6688,
      "step": 3132
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9746917486190796,
      "learning_rate": 0.0005997263033461277,
      "loss": 3.266,
      "step": 3133
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.768396019935608,
      "learning_rate": 0.0005997261286262578,
      "loss": 3.6017,
      "step": 3134
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.791387915611267,
      "learning_rate": 0.0005997259538506632,
      "loss": 3.4311,
      "step": 3135
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.085716962814331,
      "learning_rate": 0.0005997257790193439,
      "loss": 3.4009,
      "step": 3136
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6145272254943848,
      "learning_rate": 0.0005997256041322999,
      "loss": 3.4636,
      "step": 3137
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4334083795547485,
      "learning_rate": 0.0005997254291895313,
      "loss": 3.4687,
      "step": 3138
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4535558223724365,
      "learning_rate": 0.0005997252541910381,
      "loss": 3.2405,
      "step": 3139
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.482710838317871,
      "learning_rate": 0.0005997250791368202,
      "loss": 3.6328,
      "step": 3140
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.66441011428833,
      "learning_rate": 0.0005997249040268779,
      "loss": 3.3142,
      "step": 3141
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9983547925949097,
      "learning_rate": 0.000599724728861211,
      "loss": 3.4957,
      "step": 3142
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4213356971740723,
      "learning_rate": 0.0005997245536398196,
      "loss": 3.4566,
      "step": 3143
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.528503656387329,
      "learning_rate": 0.0005997243783627037,
      "loss": 3.557,
      "step": 3144
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8954670429229736,
      "learning_rate": 0.0005997242030298635,
      "loss": 3.5024,
      "step": 3145
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.574546217918396,
      "learning_rate": 0.0005997240276412988,
      "loss": 3.6716,
      "step": 3146
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.19869327545166,
      "learning_rate": 0.0005997238521970099,
      "loss": 3.4945,
      "step": 3147
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.016141176223755,
      "learning_rate": 0.0005997236766969965,
      "loss": 3.5829,
      "step": 3148
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.1302547454833984,
      "learning_rate": 0.000599723501141259,
      "loss": 3.3874,
      "step": 3149
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.467217206954956,
      "learning_rate": 0.0005997233255297972,
      "loss": 3.4391,
      "step": 3150
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5067968368530273,
      "learning_rate": 0.000599723149862611,
      "loss": 3.6794,
      "step": 3151
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.705108642578125,
      "learning_rate": 0.0005997229741397008,
      "loss": 3.4324,
      "step": 3152
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3814635276794434,
      "learning_rate": 0.0005997227983610663,
      "loss": 3.6092,
      "step": 3153
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6755133867263794,
      "learning_rate": 0.0005997226225267077,
      "loss": 3.2954,
      "step": 3154
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.268024206161499,
      "learning_rate": 0.000599722446636625,
      "loss": 3.4675,
      "step": 3155
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6226449012756348,
      "learning_rate": 0.0005997222706908183,
      "loss": 3.5522,
      "step": 3156
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.952595591545105,
      "learning_rate": 0.0005997220946892876,
      "loss": 3.4037,
      "step": 3157
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.596642017364502,
      "learning_rate": 0.0005997219186320329,
      "loss": 3.5161,
      "step": 3158
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.477177619934082,
      "learning_rate": 0.0005997217425190542,
      "loss": 3.4772,
      "step": 3159
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6450142860412598,
      "learning_rate": 0.0005997215663503515,
      "loss": 3.444,
      "step": 3160
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3984103202819824,
      "learning_rate": 0.0005997213901259251,
      "loss": 3.4243,
      "step": 3161
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4560115337371826,
      "learning_rate": 0.0005997212138457746,
      "loss": 3.3156,
      "step": 3162
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.482320785522461,
      "learning_rate": 0.0005997210375099004,
      "loss": 3.6057,
      "step": 3163
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9324315786361694,
      "learning_rate": 0.0005997208611183024,
      "loss": 3.4865,
      "step": 3164
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.630824089050293,
      "learning_rate": 0.0005997206846709806,
      "loss": 3.5003,
      "step": 3165
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4637666940689087,
      "learning_rate": 0.000599720508167935,
      "loss": 3.5512,
      "step": 3166
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8373116254806519,
      "learning_rate": 0.0005997203316091659,
      "loss": 3.6932,
      "step": 3167
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.635352373123169,
      "learning_rate": 0.0005997201549946729,
      "loss": 3.4076,
      "step": 3168
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.959681749343872,
      "learning_rate": 0.0005997199783244565,
      "loss": 3.4801,
      "step": 3169
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.084505796432495,
      "learning_rate": 0.0005997198015985164,
      "loss": 3.412,
      "step": 3170
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.5151679515838623,
      "learning_rate": 0.0005997196248168526,
      "loss": 3.3235,
      "step": 3171
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7529877424240112,
      "learning_rate": 0.0005997194479794653,
      "loss": 3.5909,
      "step": 3172
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.391749858856201,
      "learning_rate": 0.0005997192710863547,
      "loss": 3.7217,
      "step": 3173
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.217839241027832,
      "learning_rate": 0.0005997190941375204,
      "loss": 3.4822,
      "step": 3174
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4232902526855469,
      "learning_rate": 0.0005997189171329628,
      "loss": 3.3909,
      "step": 3175
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.427750825881958,
      "learning_rate": 0.0005997187400726818,
      "loss": 3.4631,
      "step": 3176
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.559389591217041,
      "learning_rate": 0.0005997185629566773,
      "loss": 3.4791,
      "step": 3177
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9491395950317383,
      "learning_rate": 0.0005997183857849497,
      "loss": 3.5257,
      "step": 3178
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8951616287231445,
      "learning_rate": 0.0005997182085574986,
      "loss": 3.5372,
      "step": 3179
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.535327911376953,
      "learning_rate": 0.0005997180312743243,
      "loss": 3.6146,
      "step": 3180
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.446151614189148,
      "learning_rate": 0.0005997178539354266,
      "loss": 3.7072,
      "step": 3181
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8465535640716553,
      "learning_rate": 0.0005997176765408058,
      "loss": 3.5361,
      "step": 3182
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.9431564807891846,
      "learning_rate": 0.000599717499090462,
      "loss": 3.6542,
      "step": 3183
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4711577892303467,
      "learning_rate": 0.0005997173215843949,
      "loss": 3.4051,
      "step": 3184
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8383283615112305,
      "learning_rate": 0.0005997171440226047,
      "loss": 3.4966,
      "step": 3185
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.1187453269958496,
      "learning_rate": 0.0005997169664050914,
      "loss": 3.4614,
      "step": 3186
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.1961538791656494,
      "learning_rate": 0.0005997167887318551,
      "loss": 2.9949,
      "step": 3187
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3834540843963623,
      "learning_rate": 0.0005997166110028958,
      "loss": 3.5323,
      "step": 3188
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.522657871246338,
      "learning_rate": 0.0005997164332182135,
      "loss": 3.489,
      "step": 3189
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7749089002609253,
      "learning_rate": 0.0005997162553778082,
      "loss": 3.5323,
      "step": 3190
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.632157802581787,
      "learning_rate": 0.0005997160774816801,
      "loss": 3.4947,
      "step": 3191
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.35823130607605,
      "learning_rate": 0.000599715899529829,
      "loss": 3.1875,
      "step": 3192
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.6437957286834717,
      "learning_rate": 0.0005997157215222552,
      "loss": 3.2908,
      "step": 3193
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9551528692245483,
      "learning_rate": 0.0005997155434589584,
      "loss": 3.4974,
      "step": 3194
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2870382070541382,
      "learning_rate": 0.0005997153653399391,
      "loss": 3.7474,
      "step": 3195
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0536727905273438,
      "learning_rate": 0.0005997151871651968,
      "loss": 3.3224,
      "step": 3196
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8680384159088135,
      "learning_rate": 0.0005997150089347319,
      "loss": 3.4961,
      "step": 3197
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6655901670455933,
      "learning_rate": 0.0005997148306485443,
      "loss": 3.4134,
      "step": 3198
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8937275409698486,
      "learning_rate": 0.0005997146523066342,
      "loss": 3.3579,
      "step": 3199
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.681121587753296,
      "learning_rate": 0.0005997144739090012,
      "loss": 3.3834,
      "step": 3200
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5701638460159302,
      "learning_rate": 0.0005997142954556458,
      "loss": 3.6084,
      "step": 3201
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3707525730133057,
      "learning_rate": 0.0005997141169465679,
      "loss": 3.2161,
      "step": 3202
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6335831880569458,
      "learning_rate": 0.0005997139383817673,
      "loss": 3.2176,
      "step": 3203
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7018567323684692,
      "learning_rate": 0.0005997137597612444,
      "loss": 3.3867,
      "step": 3204
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7194087505340576,
      "learning_rate": 0.000599713581084999,
      "loss": 3.1563,
      "step": 3205
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7963593006134033,
      "learning_rate": 0.0005997134023530312,
      "loss": 3.3884,
      "step": 3206
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.1116766929626465,
      "learning_rate": 0.0005997132235653409,
      "loss": 3.4097,
      "step": 3207
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.23608136177063,
      "learning_rate": 0.0005997130447219284,
      "loss": 3.4822,
      "step": 3208
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.768381953239441,
      "learning_rate": 0.0005997128658227936,
      "loss": 3.4066,
      "step": 3209
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.58351731300354,
      "learning_rate": 0.0005997126868679364,
      "loss": 3.5319,
      "step": 3210
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6910700798034668,
      "learning_rate": 0.000599712507857357,
      "loss": 3.4838,
      "step": 3211
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7537221908569336,
      "learning_rate": 0.0005997123287910555,
      "loss": 3.4882,
      "step": 3212
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5975375175476074,
      "learning_rate": 0.0005997121496690317,
      "loss": 3.5047,
      "step": 3213
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7212865352630615,
      "learning_rate": 0.0005997119704912858,
      "loss": 3.5591,
      "step": 3214
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6258842945098877,
      "learning_rate": 0.0005997117912578177,
      "loss": 3.477,
      "step": 3215
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8011691570281982,
      "learning_rate": 0.0005997116119686276,
      "loss": 3.4463,
      "step": 3216
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8940188884735107,
      "learning_rate": 0.0005997114326237154,
      "loss": 3.5654,
      "step": 3217
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8361790180206299,
      "learning_rate": 0.0005997112532230813,
      "loss": 3.4915,
      "step": 3218
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.618277907371521,
      "learning_rate": 0.0005997110737667252,
      "loss": 3.7199,
      "step": 3219
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7601113319396973,
      "learning_rate": 0.000599710894254647,
      "loss": 3.3978,
      "step": 3220
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5574839115142822,
      "learning_rate": 0.0005997107146868471,
      "loss": 3.217,
      "step": 3221
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.4210917949676514,
      "learning_rate": 0.0005997105350633252,
      "loss": 3.4363,
      "step": 3222
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5955438613891602,
      "learning_rate": 0.0005997103553840814,
      "loss": 3.5509,
      "step": 3223
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.56328284740448,
      "learning_rate": 0.0005997101756491158,
      "loss": 3.5465,
      "step": 3224
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3071415424346924,
      "learning_rate": 0.0005997099958584285,
      "loss": 3.3091,
      "step": 3225
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4400572776794434,
      "learning_rate": 0.0005997098160120195,
      "loss": 3.5707,
      "step": 3226
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6091586351394653,
      "learning_rate": 0.0005997096361098886,
      "loss": 3.4737,
      "step": 3227
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7991136312484741,
      "learning_rate": 0.0005997094561520362,
      "loss": 3.5394,
      "step": 3228
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6978315114974976,
      "learning_rate": 0.0005997092761384622,
      "loss": 3.5768,
      "step": 3229
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4310983419418335,
      "learning_rate": 0.0005997090960691664,
      "loss": 3.4417,
      "step": 3230
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3889168500900269,
      "learning_rate": 0.0005997089159441491,
      "loss": 3.5933,
      "step": 3231
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.471002459526062,
      "learning_rate": 0.0005997087357634103,
      "loss": 3.3342,
      "step": 3232
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5354018211364746,
      "learning_rate": 0.00059970855552695,
      "loss": 3.5273,
      "step": 3233
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2920997142791748,
      "learning_rate": 0.0005997083752347681,
      "loss": 3.5293,
      "step": 3234
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.1751482486724854,
      "learning_rate": 0.0005997081948868648,
      "loss": 3.3803,
      "step": 3235
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.684476375579834,
      "learning_rate": 0.0005997080144832401,
      "loss": 3.2891,
      "step": 3236
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.701219916343689,
      "learning_rate": 0.0005997078340238941,
      "loss": 3.4087,
      "step": 3237
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7656519412994385,
      "learning_rate": 0.0005997076535088267,
      "loss": 3.4058,
      "step": 3238
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5346908569335938,
      "learning_rate": 0.000599707472938038,
      "loss": 3.4878,
      "step": 3239
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7706451416015625,
      "learning_rate": 0.000599707292311528,
      "loss": 3.3991,
      "step": 3240
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.9496917724609375,
      "learning_rate": 0.0005997071116292967,
      "loss": 3.3637,
      "step": 3241
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.1395015716552734,
      "learning_rate": 0.0005997069308913443,
      "loss": 3.3978,
      "step": 3242
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7284828424453735,
      "learning_rate": 0.0005997067500976707,
      "loss": 3.5704,
      "step": 3243
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.646323800086975,
      "learning_rate": 0.0005997065692482759,
      "loss": 3.3763,
      "step": 3244
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3669984340667725,
      "learning_rate": 0.00059970638834316,
      "loss": 3.3407,
      "step": 3245
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5639740228652954,
      "learning_rate": 0.0005997062073823231,
      "loss": 3.6758,
      "step": 3246
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6105268001556396,
      "learning_rate": 0.000599706026365765,
      "loss": 3.5898,
      "step": 3247
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8713994026184082,
      "learning_rate": 0.000599705845293486,
      "loss": 3.3403,
      "step": 3248
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.817119836807251,
      "learning_rate": 0.0005997056641654861,
      "loss": 3.208,
      "step": 3249
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9649930000305176,
      "learning_rate": 0.0005997054829817651,
      "loss": 3.5824,
      "step": 3250
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4379756450653076,
      "learning_rate": 0.0005997053017423234,
      "loss": 3.1285,
      "step": 3251
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9773218631744385,
      "learning_rate": 0.0005997051204471607,
      "loss": 3.6194,
      "step": 3252
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.705935001373291,
      "learning_rate": 0.0005997049390962771,
      "loss": 3.2527,
      "step": 3253
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.473297119140625,
      "learning_rate": 0.0005997047576896728,
      "loss": 3.6461,
      "step": 3254
    },
    {
      "epoch": 0.04,
      "grad_norm": 4.620368480682373,
      "learning_rate": 0.0005997045762273477,
      "loss": 3.3025,
      "step": 3255
    },
    {
      "epoch": 0.04,
      "grad_norm": 4.004748821258545,
      "learning_rate": 0.0005997043947093018,
      "loss": 3.4014,
      "step": 3256
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9084477424621582,
      "learning_rate": 0.0005997042131355352,
      "loss": 3.5986,
      "step": 3257
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.412597894668579,
      "learning_rate": 0.000599704031506048,
      "loss": 3.3677,
      "step": 3258
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.959251880645752,
      "learning_rate": 0.0005997038498208403,
      "loss": 3.4675,
      "step": 3259
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8065780401229858,
      "learning_rate": 0.0005997036680799117,
      "loss": 3.4407,
      "step": 3260
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0252907276153564,
      "learning_rate": 0.0005997034862832627,
      "loss": 3.5047,
      "step": 3261
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.9543399810791016,
      "learning_rate": 0.0005997033044308932,
      "loss": 3.2354,
      "step": 3262
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.638178825378418,
      "learning_rate": 0.0005997031225228031,
      "loss": 3.6223,
      "step": 3263
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9996941089630127,
      "learning_rate": 0.0005997029405589926,
      "loss": 3.5827,
      "step": 3264
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.2885868549346924,
      "learning_rate": 0.0005997027585394615,
      "loss": 3.7407,
      "step": 3265
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5295408964157104,
      "learning_rate": 0.0005997025764642103,
      "loss": 3.5794,
      "step": 3266
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.353987455368042,
      "learning_rate": 0.0005997023943332385,
      "loss": 3.5285,
      "step": 3267
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.5346226692199707,
      "learning_rate": 0.0005997022121465463,
      "loss": 3.1292,
      "step": 3268
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.3837275505065918,
      "learning_rate": 0.000599702029904134,
      "loss": 3.3115,
      "step": 3269
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.204287052154541,
      "learning_rate": 0.0005997018476060013,
      "loss": 3.3886,
      "step": 3270
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.6243207454681396,
      "learning_rate": 0.0005997016652521485,
      "loss": 3.5979,
      "step": 3271
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5734214782714844,
      "learning_rate": 0.0005997014828425755,
      "loss": 3.5349,
      "step": 3272
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.465801239013672,
      "learning_rate": 0.0005997013003772822,
      "loss": 3.6036,
      "step": 3273
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.6296846866607666,
      "learning_rate": 0.0005997011178562689,
      "loss": 3.2964,
      "step": 3274
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.048781156539917,
      "learning_rate": 0.0005997009352795353,
      "loss": 3.2726,
      "step": 3275
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.3071563243865967,
      "learning_rate": 0.000599700752647082,
      "loss": 3.4967,
      "step": 3276
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.9715685844421387,
      "learning_rate": 0.0005997005699589084,
      "loss": 3.2447,
      "step": 3277
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0662949085235596,
      "learning_rate": 0.0005997003872150148,
      "loss": 3.4483,
      "step": 3278
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.890480637550354,
      "learning_rate": 0.0005997002044154014,
      "loss": 3.4618,
      "step": 3279
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.672559976577759,
      "learning_rate": 0.000599700021560068,
      "loss": 3.3852,
      "step": 3280
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.5912110805511475,
      "learning_rate": 0.0005996998386490147,
      "loss": 3.6255,
      "step": 3281
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7730262279510498,
      "learning_rate": 0.0005996996556822416,
      "loss": 3.5084,
      "step": 3282
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.7359251976013184,
      "learning_rate": 0.0005996994726597486,
      "loss": 3.5915,
      "step": 3283
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.8555874824523926,
      "learning_rate": 0.0005996992895815357,
      "loss": 3.4899,
      "step": 3284
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7547533512115479,
      "learning_rate": 0.0005996991064476032,
      "loss": 3.5168,
      "step": 3285
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7567931413650513,
      "learning_rate": 0.000599698923257951,
      "loss": 3.6648,
      "step": 3286
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.57087779045105,
      "learning_rate": 0.0005996987400125791,
      "loss": 3.3796,
      "step": 3287
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.6470394134521484,
      "learning_rate": 0.0005996985567114874,
      "loss": 3.4344,
      "step": 3288
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.137331247329712,
      "learning_rate": 0.0005996983733546762,
      "loss": 3.705,
      "step": 3289
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.635277032852173,
      "learning_rate": 0.0005996981899421454,
      "loss": 3.491,
      "step": 3290
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5395628213882446,
      "learning_rate": 0.0005996980064738952,
      "loss": 3.5932,
      "step": 3291
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5671409368515015,
      "learning_rate": 0.0005996978229499253,
      "loss": 3.4058,
      "step": 3292
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.962345004081726,
      "learning_rate": 0.000599697639370236,
      "loss": 3.4032,
      "step": 3293
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5973222255706787,
      "learning_rate": 0.000599697455734827,
      "loss": 3.5139,
      "step": 3294
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9448740482330322,
      "learning_rate": 0.0005996972720436988,
      "loss": 3.5967,
      "step": 3295
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6038013696670532,
      "learning_rate": 0.0005996970882968512,
      "loss": 3.65,
      "step": 3296
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.5546281337738037,
      "learning_rate": 0.0005996969044942843,
      "loss": 3.4846,
      "step": 3297
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.256617546081543,
      "learning_rate": 0.0005996967206359981,
      "loss": 3.4344,
      "step": 3298
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7557282447814941,
      "learning_rate": 0.0005996965367219926,
      "loss": 3.5166,
      "step": 3299
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.402879238128662,
      "learning_rate": 0.0005996963527522679,
      "loss": 3.4345,
      "step": 3300
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.058725595474243,
      "learning_rate": 0.0005996961687268238,
      "loss": 3.4872,
      "step": 3301
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5674859285354614,
      "learning_rate": 0.0005996959846456607,
      "loss": 3.5802,
      "step": 3302
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.560943126678467,
      "learning_rate": 0.0005996958005087784,
      "loss": 3.5342,
      "step": 3303
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3525390625,
      "learning_rate": 0.0005996956163161769,
      "loss": 3.3669,
      "step": 3304
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0846426486968994,
      "learning_rate": 0.0005996954320678566,
      "loss": 3.208,
      "step": 3305
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9815523624420166,
      "learning_rate": 0.000599695247763817,
      "loss": 3.3368,
      "step": 3306
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.5612525939941406,
      "learning_rate": 0.0005996950634040585,
      "loss": 3.3815,
      "step": 3307
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5746701955795288,
      "learning_rate": 0.000599694878988581,
      "loss": 3.306,
      "step": 3308
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7176618576049805,
      "learning_rate": 0.0005996946945173846,
      "loss": 3.433,
      "step": 3309
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.355083703994751,
      "learning_rate": 0.0005996945099904692,
      "loss": 3.4761,
      "step": 3310
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.779554843902588,
      "learning_rate": 0.0005996943254078351,
      "loss": 3.8024,
      "step": 3311
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.093310594558716,
      "learning_rate": 0.0005996941407694821,
      "loss": 3.3489,
      "step": 3312
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.684490203857422,
      "learning_rate": 0.0005996939560754103,
      "loss": 3.5439,
      "step": 3313
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9998955726623535,
      "learning_rate": 0.0005996937713256197,
      "loss": 3.3882,
      "step": 3314
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6530052423477173,
      "learning_rate": 0.0005996935865201105,
      "loss": 3.6508,
      "step": 3315
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.433673858642578,
      "learning_rate": 0.0005996934016588825,
      "loss": 3.3066,
      "step": 3316
    },
    {
      "epoch": 0.04,
      "grad_norm": 4.618602752685547,
      "learning_rate": 0.0005996932167419357,
      "loss": 3.5879,
      "step": 3317
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.855460286140442,
      "learning_rate": 0.0005996930317692705,
      "loss": 3.5887,
      "step": 3318
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8331692218780518,
      "learning_rate": 0.0005996928467408866,
      "loss": 3.5026,
      "step": 3319
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5621142387390137,
      "learning_rate": 0.0005996926616567841,
      "loss": 3.5947,
      "step": 3320
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9181398153305054,
      "learning_rate": 0.0005996924765169631,
      "loss": 3.6585,
      "step": 3321
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.792014241218567,
      "learning_rate": 0.0005996922913214237,
      "loss": 3.4342,
      "step": 3322
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.215852737426758,
      "learning_rate": 0.0005996921060701659,
      "loss": 3.3499,
      "step": 3323
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3395516872406006,
      "learning_rate": 0.0005996919207631895,
      "loss": 3.5908,
      "step": 3324
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8417127132415771,
      "learning_rate": 0.0005996917354004948,
      "loss": 3.6478,
      "step": 3325
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.462588906288147,
      "learning_rate": 0.0005996915499820818,
      "loss": 3.3149,
      "step": 3326
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.4790589809417725,
      "learning_rate": 0.0005996913645079504,
      "loss": 3.5787,
      "step": 3327
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0039823055267334,
      "learning_rate": 0.0005996911789781007,
      "loss": 3.5027,
      "step": 3328
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5039582252502441,
      "learning_rate": 0.0005996909933925328,
      "loss": 3.2205,
      "step": 3329
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8392951488494873,
      "learning_rate": 0.0005996908077512467,
      "loss": 3.3882,
      "step": 3330
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.1513376235961914,
      "learning_rate": 0.0005996906220542425,
      "loss": 3.2974,
      "step": 3331
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.86055326461792,
      "learning_rate": 0.0005996904363015199,
      "loss": 3.3616,
      "step": 3332
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8546576499938965,
      "learning_rate": 0.0005996902504930793,
      "loss": 3.384,
      "step": 3333
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.2640395164489746,
      "learning_rate": 0.0005996900646289208,
      "loss": 3.7289,
      "step": 3334
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.606109619140625,
      "learning_rate": 0.0005996898787090442,
      "loss": 3.5814,
      "step": 3335
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.2927639484405518,
      "learning_rate": 0.0005996896927334494,
      "loss": 3.2283,
      "step": 3336
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8630433082580566,
      "learning_rate": 0.0005996895067021368,
      "loss": 3.382,
      "step": 3337
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.2466628551483154,
      "learning_rate": 0.0005996893206151062,
      "loss": 3.5434,
      "step": 3338
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7876944541931152,
      "learning_rate": 0.0005996891344723579,
      "loss": 3.4526,
      "step": 3339
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.1510188579559326,
      "learning_rate": 0.0005996889482738914,
      "loss": 3.2794,
      "step": 3340
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.6732168197631836,
      "learning_rate": 0.0005996887620197072,
      "loss": 3.5056,
      "step": 3341
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.7830636501312256,
      "learning_rate": 0.0005996885757098054,
      "loss": 3.5933,
      "step": 3342
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.154961109161377,
      "learning_rate": 0.0005996883893441855,
      "loss": 3.3124,
      "step": 3343
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.225634813308716,
      "learning_rate": 0.000599688202922848,
      "loss": 3.3063,
      "step": 3344
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.4073665142059326,
      "learning_rate": 0.0005996880164457929,
      "loss": 3.7682,
      "step": 3345
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.44697642326355,
      "learning_rate": 0.00059968782991302,
      "loss": 3.3332,
      "step": 3346
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5991301536560059,
      "learning_rate": 0.0005996876433245297,
      "loss": 3.4592,
      "step": 3347
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.4278504848480225,
      "learning_rate": 0.0005996874566803216,
      "loss": 3.355,
      "step": 3348
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.600088357925415,
      "learning_rate": 0.0005996872699803961,
      "loss": 3.5276,
      "step": 3349
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.65085506439209,
      "learning_rate": 0.0005996870832247529,
      "loss": 3.6856,
      "step": 3350
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.503862977027893,
      "learning_rate": 0.0005996868964133923,
      "loss": 3.3415,
      "step": 3351
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.8352062702178955,
      "learning_rate": 0.0005996867095463143,
      "loss": 3.2633,
      "step": 3352
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.6062605381011963,
      "learning_rate": 0.0005996865226235188,
      "loss": 3.6366,
      "step": 3353
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3044416904449463,
      "learning_rate": 0.0005996863356450059,
      "loss": 3.3207,
      "step": 3354
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5670585632324219,
      "learning_rate": 0.0005996861486107758,
      "loss": 3.7618,
      "step": 3355
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.602651357650757,
      "learning_rate": 0.0005996859615208283,
      "loss": 3.3056,
      "step": 3356
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7544859647750854,
      "learning_rate": 0.0005996857743751635,
      "loss": 3.7585,
      "step": 3357
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7195767164230347,
      "learning_rate": 0.0005996855871737815,
      "loss": 3.5806,
      "step": 3358
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.245812177658081,
      "learning_rate": 0.0005996853999166822,
      "loss": 3.2094,
      "step": 3359
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.5561184883117676,
      "learning_rate": 0.0005996852126038659,
      "loss": 3.2569,
      "step": 3360
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5967103242874146,
      "learning_rate": 0.0005996850252353324,
      "loss": 3.5311,
      "step": 3361
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.726908802986145,
      "learning_rate": 0.0005996848378110816,
      "loss": 3.4299,
      "step": 3362
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.104724884033203,
      "learning_rate": 0.0005996846503311139,
      "loss": 3.4175,
      "step": 3363
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.426845908164978,
      "learning_rate": 0.0005996844627954292,
      "loss": 3.4678,
      "step": 3364
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5494498014450073,
      "learning_rate": 0.0005996842752040275,
      "loss": 3.4668,
      "step": 3365
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6335338354110718,
      "learning_rate": 0.0005996840875569087,
      "loss": 3.423,
      "step": 3366
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5973327159881592,
      "learning_rate": 0.0005996838998540731,
      "loss": 3.3128,
      "step": 3367
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7705615758895874,
      "learning_rate": 0.0005996837120955205,
      "loss": 3.1261,
      "step": 3368
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7053245306015015,
      "learning_rate": 0.0005996835242812512,
      "loss": 3.1989,
      "step": 3369
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.650291085243225,
      "learning_rate": 0.000599683336411265,
      "loss": 3.4455,
      "step": 3370
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6422531604766846,
      "learning_rate": 0.000599683148485562,
      "loss": 3.5792,
      "step": 3371
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7677220106124878,
      "learning_rate": 0.0005996829605041422,
      "loss": 3.5092,
      "step": 3372
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4895188808441162,
      "learning_rate": 0.0005996827724670057,
      "loss": 3.7368,
      "step": 3373
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5837311744689941,
      "learning_rate": 0.0005996825843741525,
      "loss": 3.5112,
      "step": 3374
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.037552833557129,
      "learning_rate": 0.0005996823962255828,
      "loss": 3.2386,
      "step": 3375
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6750608682632446,
      "learning_rate": 0.0005996822080212963,
      "loss": 3.427,
      "step": 3376
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.733400821685791,
      "learning_rate": 0.0005996820197612932,
      "loss": 3.5129,
      "step": 3377
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5825155973434448,
      "learning_rate": 0.0005996818314455738,
      "loss": 3.4852,
      "step": 3378
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4993634223937988,
      "learning_rate": 0.0005996816430741377,
      "loss": 3.251,
      "step": 3379
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5865874290466309,
      "learning_rate": 0.0005996814546469852,
      "loss": 3.552,
      "step": 3380
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.542153239250183,
      "learning_rate": 0.0005996812661641162,
      "loss": 3.6885,
      "step": 3381
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6367124319076538,
      "learning_rate": 0.0005996810776255308,
      "loss": 3.6135,
      "step": 3382
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.357571840286255,
      "learning_rate": 0.0005996808890312291,
      "loss": 3.399,
      "step": 3383
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4388128519058228,
      "learning_rate": 0.000599680700381211,
      "loss": 3.369,
      "step": 3384
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5931615829467773,
      "learning_rate": 0.0005996805116754766,
      "loss": 3.1317,
      "step": 3385
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3103270530700684,
      "learning_rate": 0.0005996803229140259,
      "loss": 3.4644,
      "step": 3386
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7414097785949707,
      "learning_rate": 0.0005996801340968591,
      "loss": 3.5022,
      "step": 3387
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.622571349143982,
      "learning_rate": 0.000599679945223976,
      "loss": 3.2869,
      "step": 3388
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4574776887893677,
      "learning_rate": 0.0005996797562953768,
      "loss": 3.4427,
      "step": 3389
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9744257926940918,
      "learning_rate": 0.0005996795673110614,
      "loss": 3.6259,
      "step": 3390
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9785722494125366,
      "learning_rate": 0.00059967937827103,
      "loss": 3.5199,
      "step": 3391
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.498526692390442,
      "learning_rate": 0.0005996791891752825,
      "loss": 3.5275,
      "step": 3392
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4353222846984863,
      "learning_rate": 0.000599679000023819,
      "loss": 3.7677,
      "step": 3393
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.541588544845581,
      "learning_rate": 0.0005996788108166395,
      "loss": 3.3295,
      "step": 3394
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9117770195007324,
      "learning_rate": 0.000599678621553744,
      "loss": 3.6561,
      "step": 3395
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8526742458343506,
      "learning_rate": 0.0005996784322351328,
      "loss": 3.448,
      "step": 3396
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1882398128509521,
      "learning_rate": 0.0005996782428608055,
      "loss": 3.324,
      "step": 3397
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7251287698745728,
      "learning_rate": 0.0005996780534307625,
      "loss": 3.5131,
      "step": 3398
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.397253155708313,
      "learning_rate": 0.0005996778639450036,
      "loss": 3.4117,
      "step": 3399
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6672561168670654,
      "learning_rate": 0.0005996776744035289,
      "loss": 3.5459,
      "step": 3400
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9752837419509888,
      "learning_rate": 0.0005996774848063385,
      "loss": 3.5993,
      "step": 3401
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5694916248321533,
      "learning_rate": 0.0005996772951534325,
      "loss": 3.5119,
      "step": 3402
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.781905174255371,
      "learning_rate": 0.0005996771054448107,
      "loss": 3.5245,
      "step": 3403
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5808736085891724,
      "learning_rate": 0.0005996769156804732,
      "loss": 3.4825,
      "step": 3404
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7626559734344482,
      "learning_rate": 0.0005996767258604202,
      "loss": 3.5015,
      "step": 3405
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.588441252708435,
      "learning_rate": 0.0005996765359846518,
      "loss": 3.5299,
      "step": 3406
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.2835127115249634,
      "learning_rate": 0.0005996763460531677,
      "loss": 3.3034,
      "step": 3407
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.9216573238372803,
      "learning_rate": 0.0005996761560659681,
      "loss": 3.257,
      "step": 3408
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6741020679473877,
      "learning_rate": 0.0005996759660230531,
      "loss": 3.4563,
      "step": 3409
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6583783626556396,
      "learning_rate": 0.0005996757759244226,
      "loss": 3.2713,
      "step": 3410
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.544858455657959,
      "learning_rate": 0.0005996755857700769,
      "loss": 3.6565,
      "step": 3411
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6221843957901,
      "learning_rate": 0.0005996753955600158,
      "loss": 3.6518,
      "step": 3412
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4991869926452637,
      "learning_rate": 0.0005996752052942393,
      "loss": 3.3267,
      "step": 3413
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4247726202011108,
      "learning_rate": 0.0005996750149727476,
      "loss": 3.4735,
      "step": 3414
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6320537328720093,
      "learning_rate": 0.0005996748245955405,
      "loss": 3.5116,
      "step": 3415
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.427319884300232,
      "learning_rate": 0.0005996746341626184,
      "loss": 3.6395,
      "step": 3416
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4904088973999023,
      "learning_rate": 0.0005996744436739809,
      "loss": 3.6568,
      "step": 3417
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8088774681091309,
      "learning_rate": 0.0005996742531296284,
      "loss": 3.2505,
      "step": 3418
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.725355863571167,
      "learning_rate": 0.0005996740625295608,
      "loss": 3.3377,
      "step": 3419
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8990181684494019,
      "learning_rate": 0.0005996738718737781,
      "loss": 3.4057,
      "step": 3420
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.157036066055298,
      "learning_rate": 0.0005996736811622804,
      "loss": 3.425,
      "step": 3421
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.76958167552948,
      "learning_rate": 0.0005996734903950678,
      "loss": 3.6988,
      "step": 3422
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.577261209487915,
      "learning_rate": 0.0005996732995721401,
      "loss": 3.5059,
      "step": 3423
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5995837450027466,
      "learning_rate": 0.0005996731086934976,
      "loss": 3.2757,
      "step": 3424
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.457998275756836,
      "learning_rate": 0.00059967291775914,
      "loss": 3.188,
      "step": 3425
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.581814169883728,
      "learning_rate": 0.0005996727267690678,
      "loss": 3.3292,
      "step": 3426
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.4214686155319214,
      "learning_rate": 0.0005996725357232806,
      "loss": 3.3548,
      "step": 3427
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6105728149414062,
      "learning_rate": 0.0005996723446217787,
      "loss": 3.6476,
      "step": 3428
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6503664255142212,
      "learning_rate": 0.000599672153464562,
      "loss": 3.3189,
      "step": 3429
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6122380495071411,
      "learning_rate": 0.0005996719622516307,
      "loss": 3.1752,
      "step": 3430
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6891894340515137,
      "learning_rate": 0.0005996717709829846,
      "loss": 3.4349,
      "step": 3431
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6038308143615723,
      "learning_rate": 0.0005996715796586239,
      "loss": 3.3593,
      "step": 3432
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.142972707748413,
      "learning_rate": 0.0005996713882785487,
      "loss": 3.4022,
      "step": 3433
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9650081396102905,
      "learning_rate": 0.0005996711968427589,
      "loss": 3.6363,
      "step": 3434
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8043501377105713,
      "learning_rate": 0.0005996710053512545,
      "loss": 3.5142,
      "step": 3435
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6703717708587646,
      "learning_rate": 0.0005996708138040355,
      "loss": 3.4055,
      "step": 3436
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.868636131286621,
      "learning_rate": 0.0005996706222011021,
      "loss": 3.3229,
      "step": 3437
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9779192209243774,
      "learning_rate": 0.0005996704305424544,
      "loss": 3.4697,
      "step": 3438
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9621024131774902,
      "learning_rate": 0.0005996702388280922,
      "loss": 3.6191,
      "step": 3439
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3964600563049316,
      "learning_rate": 0.0005996700470580158,
      "loss": 3.4592,
      "step": 3440
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.0992467403411865,
      "learning_rate": 0.0005996698552322249,
      "loss": 3.4207,
      "step": 3441
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.9820634126663208,
      "learning_rate": 0.0005996696633507198,
      "loss": 3.3565,
      "step": 3442
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.8769242763519287,
      "learning_rate": 0.0005996694714135004,
      "loss": 3.5486,
      "step": 3443
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.3489575386047363,
      "learning_rate": 0.0005996692794205668,
      "loss": 3.3943,
      "step": 3444
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.287076234817505,
      "learning_rate": 0.0005996690873719191,
      "loss": 3.2516,
      "step": 3445
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7472130060195923,
      "learning_rate": 0.0005996688952675572,
      "loss": 3.1873,
      "step": 3446
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.332444667816162,
      "learning_rate": 0.0005996687031074812,
      "loss": 3.5688,
      "step": 3447
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5279958248138428,
      "learning_rate": 0.0005996685108916911,
      "loss": 3.622,
      "step": 3448
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5408649444580078,
      "learning_rate": 0.0005996683186201871,
      "loss": 3.2543,
      "step": 3449
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.923180341720581,
      "learning_rate": 0.000599668126292969,
      "loss": 3.3122,
      "step": 3450
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.7310378551483154,
      "learning_rate": 0.0005996679339100369,
      "loss": 3.4755,
      "step": 3451
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.7135980129241943,
      "learning_rate": 0.0005996677414713909,
      "loss": 3.6425,
      "step": 3452
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.5172662734985352,
      "learning_rate": 0.0005996675489770311,
      "loss": 3.5972,
      "step": 3453
    },
    {
      "epoch": 0.04,
      "grad_norm": 2.682859420776367,
      "learning_rate": 0.0005996673564269573,
      "loss": 3.3619,
      "step": 3454
    },
    {
      "epoch": 0.04,
      "grad_norm": 3.0090548992156982,
      "learning_rate": 0.0005996671638211698,
      "loss": 3.5058,
      "step": 3455
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.6153696775436401,
      "learning_rate": 0.0005996669711596684,
      "loss": 3.5604,
      "step": 3456
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.981560468673706,
      "learning_rate": 0.0005996667784424534,
      "loss": 3.4101,
      "step": 3457
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.067861795425415,
      "learning_rate": 0.0005996665856695246,
      "loss": 3.4472,
      "step": 3458
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6533317565917969,
      "learning_rate": 0.0005996663928408821,
      "loss": 3.5778,
      "step": 3459
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.540280818939209,
      "learning_rate": 0.0005996661999565261,
      "loss": 3.4262,
      "step": 3460
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3698394298553467,
      "learning_rate": 0.0005996660070164564,
      "loss": 3.5757,
      "step": 3461
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.4826722145080566,
      "learning_rate": 0.0005996658140206731,
      "loss": 3.3261,
      "step": 3462
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4659903049468994,
      "learning_rate": 0.0005996656209691763,
      "loss": 3.7307,
      "step": 3463
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5147652626037598,
      "learning_rate": 0.000599665427861966,
      "loss": 3.4649,
      "step": 3464
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5845283269882202,
      "learning_rate": 0.0005996652346990422,
      "loss": 3.5785,
      "step": 3465
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9699286222457886,
      "learning_rate": 0.000599665041480405,
      "loss": 3.3601,
      "step": 3466
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5879008769989014,
      "learning_rate": 0.0005996648482060545,
      "loss": 3.5799,
      "step": 3467
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.5721309185028076,
      "learning_rate": 0.0005996646548759906,
      "loss": 3.6808,
      "step": 3468
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.552269697189331,
      "learning_rate": 0.0005996644614902134,
      "loss": 3.5545,
      "step": 3469
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8152235746383667,
      "learning_rate": 0.0005996642680487228,
      "loss": 3.6485,
      "step": 3470
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2823879718780518,
      "learning_rate": 0.0005996640745515191,
      "loss": 3.4589,
      "step": 3471
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6358338594436646,
      "learning_rate": 0.0005996638809986021,
      "loss": 3.4687,
      "step": 3472
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9010419845581055,
      "learning_rate": 0.000599663687389972,
      "loss": 3.3707,
      "step": 3473
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5990558862686157,
      "learning_rate": 0.0005996634937256287,
      "loss": 3.4475,
      "step": 3474
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.096207857131958,
      "learning_rate": 0.0005996633000055724,
      "loss": 3.4668,
      "step": 3475
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3951491117477417,
      "learning_rate": 0.000599663106229803,
      "loss": 3.4868,
      "step": 3476
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7483391761779785,
      "learning_rate": 0.0005996629123983205,
      "loss": 3.3993,
      "step": 3477
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5370804071426392,
      "learning_rate": 0.000599662718511125,
      "loss": 3.5729,
      "step": 3478
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7882351875305176,
      "learning_rate": 0.0005996625245682166,
      "loss": 3.6487,
      "step": 3479
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6945523023605347,
      "learning_rate": 0.0005996623305695953,
      "loss": 3.362,
      "step": 3480
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8527662754058838,
      "learning_rate": 0.0005996621365152611,
      "loss": 3.4248,
      "step": 3481
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.575934648513794,
      "learning_rate": 0.000599661942405214,
      "loss": 3.3467,
      "step": 3482
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5771170854568481,
      "learning_rate": 0.0005996617482394542,
      "loss": 3.6068,
      "step": 3483
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.6040947437286377,
      "learning_rate": 0.0005996615540179815,
      "loss": 3.1994,
      "step": 3484
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.269592523574829,
      "learning_rate": 0.0005996613597407961,
      "loss": 3.5182,
      "step": 3485
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.796090006828308,
      "learning_rate": 0.000599661165407898,
      "loss": 3.4164,
      "step": 3486
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.5261757373809814,
      "learning_rate": 0.0005996609710192872,
      "loss": 3.2441,
      "step": 3487
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3126718997955322,
      "learning_rate": 0.0005996607765749637,
      "loss": 3.5446,
      "step": 3488
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.501778244972229,
      "learning_rate": 0.0005996605820749279,
      "loss": 3.4139,
      "step": 3489
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.7655134201049805,
      "learning_rate": 0.0005996603875191793,
      "loss": 3.241,
      "step": 3490
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.008537769317627,
      "learning_rate": 0.0005996601929077181,
      "loss": 3.4898,
      "step": 3491
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4490342140197754,
      "learning_rate": 0.0005996599982405446,
      "loss": 3.3999,
      "step": 3492
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.778580665588379,
      "learning_rate": 0.0005996598035176587,
      "loss": 3.2523,
      "step": 3493
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6347984075546265,
      "learning_rate": 0.0005996596087390602,
      "loss": 3.3984,
      "step": 3494
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8622221946716309,
      "learning_rate": 0.0005996594139047493,
      "loss": 3.3485,
      "step": 3495
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7286723852157593,
      "learning_rate": 0.0005996592190147264,
      "loss": 3.2922,
      "step": 3496
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.89644455909729,
      "learning_rate": 0.0005996590240689908,
      "loss": 3.3863,
      "step": 3497
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3296741247177124,
      "learning_rate": 0.0005996588290675431,
      "loss": 3.4288,
      "step": 3498
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2387290000915527,
      "learning_rate": 0.0005996586340103831,
      "loss": 3.6686,
      "step": 3499
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.5060205459594727,
      "learning_rate": 0.0005996584388975111,
      "loss": 3.5077,
      "step": 3500
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.474469542503357,
      "learning_rate": 0.0005996582437289268,
      "loss": 3.5186,
      "step": 3501
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5043691396713257,
      "learning_rate": 0.0005996580485046303,
      "loss": 3.5967,
      "step": 3502
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6802096366882324,
      "learning_rate": 0.0005996578532246218,
      "loss": 3.4479,
      "step": 3503
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.32891047000885,
      "learning_rate": 0.0005996576578889012,
      "loss": 3.7307,
      "step": 3504
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4213508367538452,
      "learning_rate": 0.0005996574624974687,
      "loss": 3.4957,
      "step": 3505
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.683434009552002,
      "learning_rate": 0.0005996572670503241,
      "loss": 3.6303,
      "step": 3506
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.387934684753418,
      "learning_rate": 0.0005996570715474676,
      "loss": 3.4987,
      "step": 3507
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.128911256790161,
      "learning_rate": 0.0005996568759888992,
      "loss": 3.4419,
      "step": 3508
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0143861770629883,
      "learning_rate": 0.0005996566803746189,
      "loss": 3.3145,
      "step": 3509
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0290327072143555,
      "learning_rate": 0.0005996564847046268,
      "loss": 3.4131,
      "step": 3510
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9955039024353027,
      "learning_rate": 0.0005996562889789229,
      "loss": 3.6391,
      "step": 3511
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9728726148605347,
      "learning_rate": 0.0005996560931975072,
      "loss": 3.6881,
      "step": 3512
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6715672016143799,
      "learning_rate": 0.0005996558973603798,
      "loss": 3.5724,
      "step": 3513
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7048734426498413,
      "learning_rate": 0.0005996557014675408,
      "loss": 3.4389,
      "step": 3514
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1984047889709473,
      "learning_rate": 0.00059965550551899,
      "loss": 3.4138,
      "step": 3515
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1518495082855225,
      "learning_rate": 0.0005996553095147277,
      "loss": 3.5853,
      "step": 3516
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0106358528137207,
      "learning_rate": 0.0005996551134547538,
      "loss": 3.0838,
      "step": 3517
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.7755184173583984,
      "learning_rate": 0.0005996549173390682,
      "loss": 3.644,
      "step": 3518
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7636245489120483,
      "learning_rate": 0.0005996547211676713,
      "loss": 3.6769,
      "step": 3519
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0659210681915283,
      "learning_rate": 0.0005996545249405628,
      "loss": 3.5997,
      "step": 3520
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1982197761535645,
      "learning_rate": 0.000599654328657743,
      "loss": 3.4546,
      "step": 3521
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4506514072418213,
      "learning_rate": 0.0005996541323192117,
      "loss": 3.4277,
      "step": 3522
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.292051076889038,
      "learning_rate": 0.000599653935924969,
      "loss": 3.3299,
      "step": 3523
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8119882345199585,
      "learning_rate": 0.0005996537394750152,
      "loss": 3.3926,
      "step": 3524
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.648671269416809,
      "learning_rate": 0.0005996535429693498,
      "loss": 3.6085,
      "step": 3525
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4829586744308472,
      "learning_rate": 0.0005996533464079734,
      "loss": 3.5479,
      "step": 3526
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4006625413894653,
      "learning_rate": 0.0005996531497908857,
      "loss": 3.2927,
      "step": 3527
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7025810480117798,
      "learning_rate": 0.0005996529531180868,
      "loss": 3.2619,
      "step": 3528
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4180405139923096,
      "learning_rate": 0.0005996527563895767,
      "loss": 3.2705,
      "step": 3529
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4844379425048828,
      "learning_rate": 0.0005996525596053558,
      "loss": 3.4441,
      "step": 3530
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.467976689338684,
      "learning_rate": 0.0005996523627654235,
      "loss": 3.4744,
      "step": 3531
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5716755390167236,
      "learning_rate": 0.0005996521658697803,
      "loss": 3.3446,
      "step": 3532
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5217149257659912,
      "learning_rate": 0.000599651968918426,
      "loss": 3.5857,
      "step": 3533
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6370071172714233,
      "learning_rate": 0.0005996517719113609,
      "loss": 3.5572,
      "step": 3534
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.275305986404419,
      "learning_rate": 0.0005996515748485848,
      "loss": 3.3733,
      "step": 3535
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.234586477279663,
      "learning_rate": 0.0005996513777300978,
      "loss": 3.3289,
      "step": 3536
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7669445276260376,
      "learning_rate": 0.0005996511805559,
      "loss": 3.5754,
      "step": 3537
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.955550193786621,
      "learning_rate": 0.0005996509833259913,
      "loss": 3.4369,
      "step": 3538
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.0510613918304443,
      "learning_rate": 0.0005996507860403719,
      "loss": 3.325,
      "step": 3539
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5254932641983032,
      "learning_rate": 0.0005996505886990417,
      "loss": 3.3787,
      "step": 3540
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1437461376190186,
      "learning_rate": 0.000599650391302001,
      "loss": 3.4706,
      "step": 3541
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4758143424987793,
      "learning_rate": 0.0005996501938492493,
      "loss": 3.2756,
      "step": 3542
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.697172999382019,
      "learning_rate": 0.0005996499963407873,
      "loss": 3.5767,
      "step": 3543
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6920431852340698,
      "learning_rate": 0.0005996497987766145,
      "loss": 3.2335,
      "step": 3544
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6164071559906006,
      "learning_rate": 0.0005996496011567312,
      "loss": 3.2379,
      "step": 3545
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7034096717834473,
      "learning_rate": 0.0005996494034811374,
      "loss": 3.6097,
      "step": 3546
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7790184020996094,
      "learning_rate": 0.0005996492057498332,
      "loss": 3.5668,
      "step": 3547
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8581000566482544,
      "learning_rate": 0.0005996490079628184,
      "loss": 3.5092,
      "step": 3548
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6925160884857178,
      "learning_rate": 0.0005996488101200933,
      "loss": 3.4307,
      "step": 3549
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8080435991287231,
      "learning_rate": 0.0005996486122216579,
      "loss": 3.3246,
      "step": 3550
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7475740909576416,
      "learning_rate": 0.0005996484142675119,
      "loss": 3.4734,
      "step": 3551
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6153442859649658,
      "learning_rate": 0.0005996482162576558,
      "loss": 3.2899,
      "step": 3552
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4729337692260742,
      "learning_rate": 0.0005996480181920895,
      "loss": 3.4,
      "step": 3553
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0427303314208984,
      "learning_rate": 0.0005996478200708128,
      "loss": 3.1961,
      "step": 3554
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5667318105697632,
      "learning_rate": 0.0005996476218938261,
      "loss": 3.7691,
      "step": 3555
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4663255214691162,
      "learning_rate": 0.0005996474236611291,
      "loss": 3.3278,
      "step": 3556
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7361217737197876,
      "learning_rate": 0.000599647225372722,
      "loss": 3.3935,
      "step": 3557
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3071582317352295,
      "learning_rate": 0.0005996470270286049,
      "loss": 3.4196,
      "step": 3558
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6522457599639893,
      "learning_rate": 0.0005996468286287777,
      "loss": 3.3704,
      "step": 3559
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.17773175239563,
      "learning_rate": 0.0005996466301732405,
      "loss": 3.5158,
      "step": 3560
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7788482904434204,
      "learning_rate": 0.0005996464316619934,
      "loss": 3.2214,
      "step": 3561
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.080545425415039,
      "learning_rate": 0.0005996462330950363,
      "loss": 3.401,
      "step": 3562
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5015008449554443,
      "learning_rate": 0.0005996460344723693,
      "loss": 3.3685,
      "step": 3563
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.920544147491455,
      "learning_rate": 0.0005996458357939925,
      "loss": 3.387,
      "step": 3564
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9195125102996826,
      "learning_rate": 0.0005996456370599059,
      "loss": 3.306,
      "step": 3565
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5291246175765991,
      "learning_rate": 0.0005996454382701094,
      "loss": 3.3035,
      "step": 3566
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.9045448303222656,
      "learning_rate": 0.0005996452394246033,
      "loss": 3.2834,
      "step": 3567
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6693729162216187,
      "learning_rate": 0.0005996450405233875,
      "loss": 3.2254,
      "step": 3568
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9581843614578247,
      "learning_rate": 0.0005996448415664619,
      "loss": 3.0522,
      "step": 3569
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5782606601715088,
      "learning_rate": 0.0005996446425538267,
      "loss": 3.5396,
      "step": 3570
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4396758079528809,
      "learning_rate": 0.0005996444434854819,
      "loss": 3.5245,
      "step": 3571
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7800806760787964,
      "learning_rate": 0.0005996442443614277,
      "loss": 3.5142,
      "step": 3572
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7983678579330444,
      "learning_rate": 0.0005996440451816638,
      "loss": 3.5614,
      "step": 3573
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6589398384094238,
      "learning_rate": 0.0005996438459461906,
      "loss": 3.3533,
      "step": 3574
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0663962364196777,
      "learning_rate": 0.0005996436466550077,
      "loss": 3.5795,
      "step": 3575
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6791024208068848,
      "learning_rate": 0.0005996434473081155,
      "loss": 3.2763,
      "step": 3576
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6720085144042969,
      "learning_rate": 0.000599643247905514,
      "loss": 3.4114,
      "step": 3577
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3605250120162964,
      "learning_rate": 0.0005996430484472031,
      "loss": 3.3911,
      "step": 3578
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7243492603302002,
      "learning_rate": 0.0005996428489331829,
      "loss": 3.2398,
      "step": 3579
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6291979551315308,
      "learning_rate": 0.0005996426493634535,
      "loss": 3.5724,
      "step": 3580
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6452182531356812,
      "learning_rate": 0.0005996424497380148,
      "loss": 3.57,
      "step": 3581
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.559695243835449,
      "learning_rate": 0.000599642250056867,
      "loss": 3.6181,
      "step": 3582
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5124452114105225,
      "learning_rate": 0.0005996420503200099,
      "loss": 3.0562,
      "step": 3583
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.285872220993042,
      "learning_rate": 0.0005996418505274438,
      "loss": 3.4838,
      "step": 3584
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5389035940170288,
      "learning_rate": 0.0005996416506791686,
      "loss": 3.5063,
      "step": 3585
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.785214900970459,
      "learning_rate": 0.0005996414507751844,
      "loss": 3.1865,
      "step": 3586
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.121683359146118,
      "learning_rate": 0.0005996412508154911,
      "loss": 3.2014,
      "step": 3587
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3699867725372314,
      "learning_rate": 0.0005996410508000889,
      "loss": 3.3866,
      "step": 3588
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6609327793121338,
      "learning_rate": 0.0005996408507289777,
      "loss": 3.4917,
      "step": 3589
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6112538576126099,
      "learning_rate": 0.0005996406506021578,
      "loss": 3.3456,
      "step": 3590
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7342090606689453,
      "learning_rate": 0.0005996404504196288,
      "loss": 3.4696,
      "step": 3591
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8414764404296875,
      "learning_rate": 0.0005996402501813911,
      "loss": 3.1504,
      "step": 3592
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8814918994903564,
      "learning_rate": 0.0005996400498874447,
      "loss": 3.3447,
      "step": 3593
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6659990549087524,
      "learning_rate": 0.0005996398495377894,
      "loss": 3.3991,
      "step": 3594
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7162435054779053,
      "learning_rate": 0.0005996396491324256,
      "loss": 3.6977,
      "step": 3595
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7678909301757812,
      "learning_rate": 0.000599639448671353,
      "loss": 3.3434,
      "step": 3596
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3857499361038208,
      "learning_rate": 0.0005996392481545718,
      "loss": 3.5107,
      "step": 3597
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6010608673095703,
      "learning_rate": 0.000599639047582082,
      "loss": 3.4906,
      "step": 3598
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.263145685195923,
      "learning_rate": 0.0005996388469538835,
      "loss": 3.3744,
      "step": 3599
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.522040843963623,
      "learning_rate": 0.0005996386462699767,
      "loss": 3.3961,
      "step": 3600
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4434003829956055,
      "learning_rate": 0.0005996384455303613,
      "loss": 3.2082,
      "step": 3601
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0711116790771484,
      "learning_rate": 0.0005996382447350376,
      "loss": 3.2284,
      "step": 3602
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6549712419509888,
      "learning_rate": 0.0005996380438840054,
      "loss": 3.1759,
      "step": 3603
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.1724038124084473,
      "learning_rate": 0.0005996378429772648,
      "loss": 3.3024,
      "step": 3604
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2459471225738525,
      "learning_rate": 0.0005996376420148159,
      "loss": 3.6535,
      "step": 3605
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7355492115020752,
      "learning_rate": 0.0005996374409966587,
      "loss": 3.2594,
      "step": 3606
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.387397050857544,
      "learning_rate": 0.0005996372399227931,
      "loss": 3.4967,
      "step": 3607
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.6851115226745605,
      "learning_rate": 0.0005996370387932194,
      "loss": 3.2119,
      "step": 3608
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5764014720916748,
      "learning_rate": 0.0005996368376079376,
      "loss": 3.2695,
      "step": 3609
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0849905014038086,
      "learning_rate": 0.0005996366363669476,
      "loss": 3.4241,
      "step": 3610
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0421741008758545,
      "learning_rate": 0.0005996364350702495,
      "loss": 3.3132,
      "step": 3611
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6897327899932861,
      "learning_rate": 0.0005996362337178433,
      "loss": 3.4134,
      "step": 3612
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.497756838798523,
      "learning_rate": 0.0005996360323097291,
      "loss": 3.3744,
      "step": 3613
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.7557363510131836,
      "learning_rate": 0.0005996358308459068,
      "loss": 3.5553,
      "step": 3614
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5199501514434814,
      "learning_rate": 0.0005996356293263767,
      "loss": 3.421,
      "step": 3615
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2997968196868896,
      "learning_rate": 0.0005996354277511386,
      "loss": 3.3609,
      "step": 3616
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0493898391723633,
      "learning_rate": 0.0005996352261201926,
      "loss": 3.507,
      "step": 3617
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6725938320159912,
      "learning_rate": 0.0005996350244335389,
      "loss": 3.2755,
      "step": 3618
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.358370304107666,
      "learning_rate": 0.0005996348226911773,
      "loss": 3.6107,
      "step": 3619
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.166022777557373,
      "learning_rate": 0.0005996346208931078,
      "loss": 3.4507,
      "step": 3620
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2520534992218018,
      "learning_rate": 0.0005996344190393307,
      "loss": 3.4887,
      "step": 3621
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.571791887283325,
      "learning_rate": 0.0005996342171298459,
      "loss": 3.3484,
      "step": 3622
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8924801349639893,
      "learning_rate": 0.0005996340151646534,
      "loss": 3.4703,
      "step": 3623
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.6426007747650146,
      "learning_rate": 0.0005996338131437533,
      "loss": 3.3832,
      "step": 3624
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.8437142372131348,
      "learning_rate": 0.0005996336110671456,
      "loss": 3.3919,
      "step": 3625
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8259594440460205,
      "learning_rate": 0.0005996334089348305,
      "loss": 3.3685,
      "step": 3626
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4234681129455566,
      "learning_rate": 0.0005996332067468077,
      "loss": 3.5836,
      "step": 3627
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.452061891555786,
      "learning_rate": 0.0005996330045030775,
      "loss": 3.5669,
      "step": 3628
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.744657278060913,
      "learning_rate": 0.0005996328022036399,
      "loss": 3.3999,
      "step": 3629
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.602217435836792,
      "learning_rate": 0.0005996325998484949,
      "loss": 3.5988,
      "step": 3630
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.485661506652832,
      "learning_rate": 0.0005996323974376425,
      "loss": 3.4189,
      "step": 3631
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6621956825256348,
      "learning_rate": 0.0005996321949710828,
      "loss": 3.4834,
      "step": 3632
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3908953666687012,
      "learning_rate": 0.0005996319924488158,
      "loss": 3.5189,
      "step": 3633
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0575804710388184,
      "learning_rate": 0.0005996317898708416,
      "loss": 3.3075,
      "step": 3634
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8362131118774414,
      "learning_rate": 0.0005996315872371601,
      "loss": 3.361,
      "step": 3635
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.49907648563385,
      "learning_rate": 0.0005996313845477715,
      "loss": 3.4623,
      "step": 3636
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.068911552429199,
      "learning_rate": 0.0005996311818026759,
      "loss": 3.6515,
      "step": 3637
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.927405595779419,
      "learning_rate": 0.0005996309790018731,
      "loss": 3.4947,
      "step": 3638
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5982547998428345,
      "learning_rate": 0.0005996307761453632,
      "loss": 3.383,
      "step": 3639
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.154710531234741,
      "learning_rate": 0.0005996305732331464,
      "loss": 3.0848,
      "step": 3640
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3912630081176758,
      "learning_rate": 0.0005996303702652224,
      "loss": 3.4042,
      "step": 3641
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9853416681289673,
      "learning_rate": 0.0005996301672415916,
      "loss": 3.2978,
      "step": 3642
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3407320976257324,
      "learning_rate": 0.0005996299641622539,
      "loss": 3.5189,
      "step": 3643
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4151734113693237,
      "learning_rate": 0.0005996297610272093,
      "loss": 3.2438,
      "step": 3644
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.776718020439148,
      "learning_rate": 0.000599629557836458,
      "loss": 3.2167,
      "step": 3645
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.746920108795166,
      "learning_rate": 0.0005996293545899998,
      "loss": 3.416,
      "step": 3646
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7153548002243042,
      "learning_rate": 0.0005996291512878348,
      "loss": 3.3727,
      "step": 3647
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9258060455322266,
      "learning_rate": 0.0005996289479299631,
      "loss": 3.4104,
      "step": 3648
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.50199556350708,
      "learning_rate": 0.0005996287445163847,
      "loss": 3.7086,
      "step": 3649
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7961770296096802,
      "learning_rate": 0.0005996285410470999,
      "loss": 3.4267,
      "step": 3650
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6676539182662964,
      "learning_rate": 0.0005996283375221082,
      "loss": 3.5145,
      "step": 3651
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.951524257659912,
      "learning_rate": 0.00059962813394141,
      "loss": 3.5214,
      "step": 3652
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8931761980056763,
      "learning_rate": 0.0005996279303050054,
      "loss": 3.426,
      "step": 3653
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3389663696289062,
      "learning_rate": 0.0005996277266128942,
      "loss": 3.3572,
      "step": 3654
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6312966346740723,
      "learning_rate": 0.0005996275228650766,
      "loss": 3.5595,
      "step": 3655
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5356093645095825,
      "learning_rate": 0.0005996273190615525,
      "loss": 3.4038,
      "step": 3656
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4361131191253662,
      "learning_rate": 0.0005996271152023222,
      "loss": 3.2255,
      "step": 3657
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.4331912994384766,
      "learning_rate": 0.0005996269112873854,
      "loss": 3.0901,
      "step": 3658
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9946019649505615,
      "learning_rate": 0.0005996267073167424,
      "loss": 3.5157,
      "step": 3659
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7984576225280762,
      "learning_rate": 0.0005996265032903931,
      "loss": 3.4641,
      "step": 3660
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.189272403717041,
      "learning_rate": 0.0005996262992083376,
      "loss": 3.5142,
      "step": 3661
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.647972822189331,
      "learning_rate": 0.0005996260950705759,
      "loss": 3.5038,
      "step": 3662
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.571300745010376,
      "learning_rate": 0.0005996258908771081,
      "loss": 3.3182,
      "step": 3663
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.091398000717163,
      "learning_rate": 0.0005996256866279341,
      "loss": 3.3813,
      "step": 3664
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.7500417232513428,
      "learning_rate": 0.000599625482323054,
      "loss": 3.4412,
      "step": 3665
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6831464767456055,
      "learning_rate": 0.000599625277962468,
      "loss": 3.215,
      "step": 3666
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0016350746154785,
      "learning_rate": 0.0005996250735461759,
      "loss": 3.6802,
      "step": 3667
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2476654052734375,
      "learning_rate": 0.000599624869074178,
      "loss": 3.6858,
      "step": 3668
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7654889822006226,
      "learning_rate": 0.000599624664546474,
      "loss": 3.4707,
      "step": 3669
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.542493462562561,
      "learning_rate": 0.0005996244599630642,
      "loss": 3.4634,
      "step": 3670
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.146754503250122,
      "learning_rate": 0.0005996242553239485,
      "loss": 3.3589,
      "step": 3671
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9116662740707397,
      "learning_rate": 0.0005996240506291271,
      "loss": 3.3789,
      "step": 3672
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6079081296920776,
      "learning_rate": 0.0005996238458785998,
      "loss": 3.4525,
      "step": 3673
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0618789196014404,
      "learning_rate": 0.0005996236410723668,
      "loss": 3.4065,
      "step": 3674
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.6606485843658447,
      "learning_rate": 0.0005996234362104282,
      "loss": 3.358,
      "step": 3675
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6327852010726929,
      "learning_rate": 0.0005996232312927838,
      "loss": 3.2348,
      "step": 3676
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0904719829559326,
      "learning_rate": 0.000599623026319434,
      "loss": 3.4656,
      "step": 3677
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8847898244857788,
      "learning_rate": 0.0005996228212903785,
      "loss": 3.4397,
      "step": 3678
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.713456392288208,
      "learning_rate": 0.0005996226162056174,
      "loss": 3.4362,
      "step": 3679
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3224730491638184,
      "learning_rate": 0.0005996224110651508,
      "loss": 3.2483,
      "step": 3680
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.1485111713409424,
      "learning_rate": 0.0005996222058689789,
      "loss": 3.3395,
      "step": 3681
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.552274227142334,
      "learning_rate": 0.0005996220006171015,
      "loss": 3.2106,
      "step": 3682
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.754181981086731,
      "learning_rate": 0.0005996217953095186,
      "loss": 3.4424,
      "step": 3683
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.152456283569336,
      "learning_rate": 0.0005996215899462305,
      "loss": 3.2791,
      "step": 3684
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8774447441101074,
      "learning_rate": 0.0005996213845272371,
      "loss": 3.5308,
      "step": 3685
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.795036554336548,
      "learning_rate": 0.0005996211790525384,
      "loss": 3.5025,
      "step": 3686
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.823746919631958,
      "learning_rate": 0.0005996209735221344,
      "loss": 3.2709,
      "step": 3687
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.043553113937378,
      "learning_rate": 0.0005996207679360252,
      "loss": 3.2535,
      "step": 3688
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.657045841217041,
      "learning_rate": 0.0005996205622942109,
      "loss": 3.2766,
      "step": 3689
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6037023067474365,
      "learning_rate": 0.0005996203565966914,
      "loss": 3.6592,
      "step": 3690
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.836798071861267,
      "learning_rate": 0.0005996201508434669,
      "loss": 3.2059,
      "step": 3691
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9232815504074097,
      "learning_rate": 0.0005996199450345374,
      "loss": 3.6398,
      "step": 3692
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1149234771728516,
      "learning_rate": 0.0005996197391699028,
      "loss": 3.4761,
      "step": 3693
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7674907445907593,
      "learning_rate": 0.0005996195332495633,
      "loss": 3.2041,
      "step": 3694
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6280161142349243,
      "learning_rate": 0.0005996193272735189,
      "loss": 3.4944,
      "step": 3695
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9321335554122925,
      "learning_rate": 0.0005996191212417696,
      "loss": 3.5343,
      "step": 3696
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7685613632202148,
      "learning_rate": 0.0005996189151543153,
      "loss": 3.3946,
      "step": 3697
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7770936489105225,
      "learning_rate": 0.0005996187090111563,
      "loss": 3.3658,
      "step": 3698
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.5921742916107178,
      "learning_rate": 0.0005996185028122926,
      "loss": 3.6037,
      "step": 3699
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8440004587173462,
      "learning_rate": 0.000599618296557724,
      "loss": 3.3822,
      "step": 3700
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7858383655548096,
      "learning_rate": 0.0005996180902474508,
      "loss": 3.2359,
      "step": 3701
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.5190534591674805,
      "learning_rate": 0.0005996178838814729,
      "loss": 3.388,
      "step": 3702
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.564650058746338,
      "learning_rate": 0.0005996176774597905,
      "loss": 3.6444,
      "step": 3703
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6066051721572876,
      "learning_rate": 0.0005996174709824033,
      "loss": 3.4088,
      "step": 3704
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3888850212097168,
      "learning_rate": 0.0005996172644493117,
      "loss": 3.6296,
      "step": 3705
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.55264151096344,
      "learning_rate": 0.0005996170578605156,
      "loss": 3.3442,
      "step": 3706
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3854780197143555,
      "learning_rate": 0.0005996168512160151,
      "loss": 3.2527,
      "step": 3707
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.557162880897522,
      "learning_rate": 0.00059961664451581,
      "loss": 3.4241,
      "step": 3708
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.309951066970825,
      "learning_rate": 0.0005996164377599007,
      "loss": 3.209,
      "step": 3709
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8286845684051514,
      "learning_rate": 0.0005996162309482868,
      "loss": 3.2226,
      "step": 3710
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.137925386428833,
      "learning_rate": 0.0005996160240809687,
      "loss": 3.5736,
      "step": 3711
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.9881229400634766,
      "learning_rate": 0.0005996158171579465,
      "loss": 3.4313,
      "step": 3712
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2031052112579346,
      "learning_rate": 0.0005996156101792199,
      "loss": 3.5616,
      "step": 3713
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1790223121643066,
      "learning_rate": 0.0005996154031447891,
      "loss": 3.4738,
      "step": 3714
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8365858793258667,
      "learning_rate": 0.0005996151960546542,
      "loss": 3.3895,
      "step": 3715
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6257168054580688,
      "learning_rate": 0.0005996149889088151,
      "loss": 3.1901,
      "step": 3716
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.566584825515747,
      "learning_rate": 0.000599614781707272,
      "loss": 3.6218,
      "step": 3717
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2042365074157715,
      "learning_rate": 0.0005996145744500248,
      "loss": 3.8954,
      "step": 3718
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.693203091621399,
      "learning_rate": 0.0005996143671370736,
      "loss": 3.1008,
      "step": 3719
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.6602306365966797,
      "learning_rate": 0.0005996141597684185,
      "loss": 3.3587,
      "step": 3720
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.1080827713012695,
      "learning_rate": 0.0005996139523440594,
      "loss": 3.4281,
      "step": 3721
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.007159471511841,
      "learning_rate": 0.0005996137448639965,
      "loss": 3.2478,
      "step": 3722
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4872090816497803,
      "learning_rate": 0.0005996135373282296,
      "loss": 3.2025,
      "step": 3723
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.180875778198242,
      "learning_rate": 0.0005996133297367589,
      "loss": 3.4,
      "step": 3724
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9279332160949707,
      "learning_rate": 0.0005996131220895846,
      "loss": 3.3335,
      "step": 3725
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6265963315963745,
      "learning_rate": 0.0005996129143867065,
      "loss": 3.4565,
      "step": 3726
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3078157901763916,
      "learning_rate": 0.0005996127066281245,
      "loss": 3.3636,
      "step": 3727
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1478235721588135,
      "learning_rate": 0.0005996124988138391,
      "loss": 3.1,
      "step": 3728
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0868420600891113,
      "learning_rate": 0.00059961229094385,
      "loss": 3.2553,
      "step": 3729
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9448559284210205,
      "learning_rate": 0.0005996120830181573,
      "loss": 3.399,
      "step": 3730
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.5379478931427,
      "learning_rate": 0.0005996118750367609,
      "loss": 3.3814,
      "step": 3731
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.509293794631958,
      "learning_rate": 0.0005996116669996613,
      "loss": 3.3242,
      "step": 3732
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.375615119934082,
      "learning_rate": 0.000599611458906858,
      "loss": 3.3034,
      "step": 3733
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.842774510383606,
      "learning_rate": 0.0005996112507583514,
      "loss": 3.4178,
      "step": 3734
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6126861572265625,
      "learning_rate": 0.0005996110425541414,
      "loss": 3.1094,
      "step": 3735
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5639171600341797,
      "learning_rate": 0.000599610834294228,
      "loss": 3.1681,
      "step": 3736
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1557517051696777,
      "learning_rate": 0.0005996106259786114,
      "loss": 3.6501,
      "step": 3737
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9790434837341309,
      "learning_rate": 0.0005996104176072914,
      "loss": 3.6348,
      "step": 3738
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.851961612701416,
      "learning_rate": 0.0005996102091802683,
      "loss": 3.5729,
      "step": 3739
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.9147088527679443,
      "learning_rate": 0.0005996100006975418,
      "loss": 3.2148,
      "step": 3740
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8040779829025269,
      "learning_rate": 0.0005996097921591124,
      "loss": 3.3427,
      "step": 3741
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7792961597442627,
      "learning_rate": 0.0005996095835649796,
      "loss": 3.4251,
      "step": 3742
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.879693865776062,
      "learning_rate": 0.0005996093749151439,
      "loss": 3.3273,
      "step": 3743
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9576151371002197,
      "learning_rate": 0.0005996091662096052,
      "loss": 3.1289,
      "step": 3744
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4762085676193237,
      "learning_rate": 0.0005996089574483634,
      "loss": 3.3266,
      "step": 3745
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.4544432163238525,
      "learning_rate": 0.0005996087486314187,
      "loss": 3.6394,
      "step": 3746
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2303965091705322,
      "learning_rate": 0.000599608539758771,
      "loss": 3.5028,
      "step": 3747
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9579061269760132,
      "learning_rate": 0.0005996083308304205,
      "loss": 3.6043,
      "step": 3748
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.241302251815796,
      "learning_rate": 0.0005996081218463672,
      "loss": 3.3109,
      "step": 3749
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.779170274734497,
      "learning_rate": 0.000599607912806611,
      "loss": 3.3888,
      "step": 3750
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5822279453277588,
      "learning_rate": 0.000599607703711152,
      "loss": 3.5722,
      "step": 3751
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.138751983642578,
      "learning_rate": 0.0005996074945599903,
      "loss": 3.4034,
      "step": 3752
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1404001712799072,
      "learning_rate": 0.000599607285353126,
      "loss": 3.3551,
      "step": 3753
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.673429250717163,
      "learning_rate": 0.000599607076090559,
      "loss": 3.2333,
      "step": 3754
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8247888088226318,
      "learning_rate": 0.0005996068667722894,
      "loss": 3.3149,
      "step": 3755
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.6999711990356445,
      "learning_rate": 0.0005996066573983172,
      "loss": 3.3394,
      "step": 3756
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.34487783908844,
      "learning_rate": 0.0005996064479686425,
      "loss": 3.3285,
      "step": 3757
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9708245992660522,
      "learning_rate": 0.0005996062384832653,
      "loss": 3.5729,
      "step": 3758
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.761921763420105,
      "learning_rate": 0.0005996060289421856,
      "loss": 3.3263,
      "step": 3759
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.2835216522216797,
      "learning_rate": 0.0005996058193454035,
      "loss": 3.1399,
      "step": 3760
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6928935050964355,
      "learning_rate": 0.000599605609692919,
      "loss": 3.4323,
      "step": 3761
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7323135137557983,
      "learning_rate": 0.0005996053999847322,
      "loss": 3.4257,
      "step": 3762
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6847591400146484,
      "learning_rate": 0.0005996051902208431,
      "loss": 3.2902,
      "step": 3763
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.338913083076477,
      "learning_rate": 0.0005996049804012517,
      "loss": 3.5124,
      "step": 3764
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.398544192314148,
      "learning_rate": 0.0005996047705259581,
      "loss": 3.4461,
      "step": 3765
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8740344047546387,
      "learning_rate": 0.0005996045605949624,
      "loss": 3.339,
      "step": 3766
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.146892547607422,
      "learning_rate": 0.0005996043506082644,
      "loss": 3.1293,
      "step": 3767
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1041605472564697,
      "learning_rate": 0.0005996041405658644,
      "loss": 3.3129,
      "step": 3768
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6880075931549072,
      "learning_rate": 0.0005996039304677624,
      "loss": 3.275,
      "step": 3769
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5728340148925781,
      "learning_rate": 0.0005996037203139582,
      "loss": 3.3423,
      "step": 3770
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.06441593170166,
      "learning_rate": 0.0005996035101044522,
      "loss": 3.2328,
      "step": 3771
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.538189172744751,
      "learning_rate": 0.0005996032998392441,
      "loss": 3.7706,
      "step": 3772
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.097647190093994,
      "learning_rate": 0.0005996030895183342,
      "loss": 3.3238,
      "step": 3773
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7238976955413818,
      "learning_rate": 0.0005996028791417224,
      "loss": 3.3874,
      "step": 3774
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9603850841522217,
      "learning_rate": 0.0005996026687094087,
      "loss": 3.5353,
      "step": 3775
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.602482557296753,
      "learning_rate": 0.0005996024582213932,
      "loss": 3.4074,
      "step": 3776
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9649417400360107,
      "learning_rate": 0.0005996022476776761,
      "loss": 3.1383,
      "step": 3777
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.202176094055176,
      "learning_rate": 0.0005996020370782571,
      "loss": 3.3365,
      "step": 3778
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.560949683189392,
      "learning_rate": 0.0005996018264231366,
      "loss": 3.4626,
      "step": 3779
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2863893508911133,
      "learning_rate": 0.0005996016157123143,
      "loss": 3.5355,
      "step": 3780
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.517820954322815,
      "learning_rate": 0.0005996014049457905,
      "loss": 3.4873,
      "step": 3781
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5708526372909546,
      "learning_rate": 0.0005996011941235652,
      "loss": 3.4681,
      "step": 3782
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8724156618118286,
      "learning_rate": 0.0005996009832456383,
      "loss": 3.6529,
      "step": 3783
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.880131483078003,
      "learning_rate": 0.00059960077231201,
      "loss": 3.026,
      "step": 3784
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.798072099685669,
      "learning_rate": 0.0005996005613226801,
      "loss": 3.6677,
      "step": 3785
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8833497762680054,
      "learning_rate": 0.0005996003502776488,
      "loss": 3.5151,
      "step": 3786
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.5529837608337402,
      "learning_rate": 0.0005996001391769163,
      "loss": 3.3746,
      "step": 3787
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.638427972793579,
      "learning_rate": 0.0005995999280204824,
      "loss": 3.358,
      "step": 3788
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8262042999267578,
      "learning_rate": 0.0005995997168083473,
      "loss": 3.2288,
      "step": 3789
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6010240316390991,
      "learning_rate": 0.0005995995055405109,
      "loss": 3.4579,
      "step": 3790
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.612370252609253,
      "learning_rate": 0.0005995992942169732,
      "loss": 3.5653,
      "step": 3791
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3090837001800537,
      "learning_rate": 0.0005995990828377344,
      "loss": 3.4831,
      "step": 3792
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.4595553874969482,
      "learning_rate": 0.0005995988714027946,
      "loss": 3.559,
      "step": 3793
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4705908298492432,
      "learning_rate": 0.0005995986599121536,
      "loss": 3.3618,
      "step": 3794
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2372422218322754,
      "learning_rate": 0.0005995984483658115,
      "loss": 3.3826,
      "step": 3795
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3885878324508667,
      "learning_rate": 0.0005995982367637685,
      "loss": 3.494,
      "step": 3796
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4331374168395996,
      "learning_rate": 0.0005995980251060245,
      "loss": 3.2325,
      "step": 3797
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5182597637176514,
      "learning_rate": 0.0005995978133925795,
      "loss": 3.4778,
      "step": 3798
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6931062936782837,
      "learning_rate": 0.0005995976016234337,
      "loss": 3.2242,
      "step": 3799
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4690282344818115,
      "learning_rate": 0.0005995973897985871,
      "loss": 3.4194,
      "step": 3800
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5072746276855469,
      "learning_rate": 0.0005995971779180396,
      "loss": 3.3274,
      "step": 3801
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7129192352294922,
      "learning_rate": 0.0005995969659817913,
      "loss": 3.3744,
      "step": 3802
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3332765102386475,
      "learning_rate": 0.0005995967539898422,
      "loss": 3.3829,
      "step": 3803
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3833953142166138,
      "learning_rate": 0.0005995965419421926,
      "loss": 3.4459,
      "step": 3804
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6840771436691284,
      "learning_rate": 0.0005995963298388423,
      "loss": 3.4793,
      "step": 3805
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6866153478622437,
      "learning_rate": 0.0005995961176797914,
      "loss": 3.4796,
      "step": 3806
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7597893476486206,
      "learning_rate": 0.0005995959054650398,
      "loss": 3.4486,
      "step": 3807
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5823765993118286,
      "learning_rate": 0.0005995956931945877,
      "loss": 3.5013,
      "step": 3808
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3941712379455566,
      "learning_rate": 0.0005995954808684352,
      "loss": 3.5912,
      "step": 3809
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.5646419525146484,
      "learning_rate": 0.0005995952684865822,
      "loss": 3.251,
      "step": 3810
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7561509609222412,
      "learning_rate": 0.0005995950560490288,
      "loss": 3.4986,
      "step": 3811
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8122754096984863,
      "learning_rate": 0.000599594843555775,
      "loss": 3.4059,
      "step": 3812
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6828629970550537,
      "learning_rate": 0.0005995946310068209,
      "loss": 3.4023,
      "step": 3813
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0664594173431396,
      "learning_rate": 0.0005995944184021665,
      "loss": 3.1822,
      "step": 3814
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8294187784194946,
      "learning_rate": 0.0005995942057418118,
      "loss": 3.498,
      "step": 3815
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4696640968322754,
      "learning_rate": 0.0005995939930257568,
      "loss": 3.3053,
      "step": 3816
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.5056090354919434,
      "learning_rate": 0.0005995937802540018,
      "loss": 3.4585,
      "step": 3817
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7434672117233276,
      "learning_rate": 0.0005995935674265465,
      "loss": 3.4094,
      "step": 3818
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1329596042633057,
      "learning_rate": 0.0005995933545433912,
      "loss": 3.3429,
      "step": 3819
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9198100566864014,
      "learning_rate": 0.0005995931416045358,
      "loss": 3.5032,
      "step": 3820
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8380811214447021,
      "learning_rate": 0.0005995929286099805,
      "loss": 3.4052,
      "step": 3821
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6783065795898438,
      "learning_rate": 0.000599592715559725,
      "loss": 3.4028,
      "step": 3822
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5368036031723022,
      "learning_rate": 0.0005995925024537697,
      "loss": 3.4994,
      "step": 3823
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6493345499038696,
      "learning_rate": 0.0005995922892921145,
      "loss": 3.3625,
      "step": 3824
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.262124538421631,
      "learning_rate": 0.0005995920760747594,
      "loss": 3.3746,
      "step": 3825
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5334073305130005,
      "learning_rate": 0.0005995918628017044,
      "loss": 3.3547,
      "step": 3826
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6138638257980347,
      "learning_rate": 0.0005995916494729497,
      "loss": 3.4463,
      "step": 3827
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7384567260742188,
      "learning_rate": 0.0005995914360884954,
      "loss": 3.5046,
      "step": 3828
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9354020357131958,
      "learning_rate": 0.0005995912226483412,
      "loss": 3.3608,
      "step": 3829
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5909228324890137,
      "learning_rate": 0.0005995910091524874,
      "loss": 3.3398,
      "step": 3830
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4863468408584595,
      "learning_rate": 0.000599590795600934,
      "loss": 3.2446,
      "step": 3831
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.141575574874878,
      "learning_rate": 0.000599590581993681,
      "loss": 3.5295,
      "step": 3832
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1157071590423584,
      "learning_rate": 0.0005995903683307283,
      "loss": 3.3139,
      "step": 3833
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6895453929901123,
      "learning_rate": 0.0005995901546120764,
      "loss": 3.3384,
      "step": 3834
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4464071989059448,
      "learning_rate": 0.0005995899408377249,
      "loss": 3.5727,
      "step": 3835
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0913708209991455,
      "learning_rate": 0.0005995897270076739,
      "loss": 3.066,
      "step": 3836
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0422232151031494,
      "learning_rate": 0.0005995895131219235,
      "loss": 3.4028,
      "step": 3837
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.159789562225342,
      "learning_rate": 0.0005995892991804739,
      "loss": 3.4097,
      "step": 3838
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8650150299072266,
      "learning_rate": 0.0005995890851833249,
      "loss": 3.2436,
      "step": 3839
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5532426834106445,
      "learning_rate": 0.0005995888711304766,
      "loss": 3.3028,
      "step": 3840
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8873661756515503,
      "learning_rate": 0.0005995886570219291,
      "loss": 3.2246,
      "step": 3841
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.726851463317871,
      "learning_rate": 0.0005995884428576825,
      "loss": 3.454,
      "step": 3842
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6022402048110962,
      "learning_rate": 0.0005995882286377366,
      "loss": 3.4175,
      "step": 3843
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8336008787155151,
      "learning_rate": 0.0005995880143620918,
      "loss": 3.4234,
      "step": 3844
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2788517475128174,
      "learning_rate": 0.0005995878000307478,
      "loss": 3.4824,
      "step": 3845
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.301968574523926,
      "learning_rate": 0.0005995875856437048,
      "loss": 3.4458,
      "step": 3846
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2492618560791016,
      "learning_rate": 0.0005995873712009627,
      "loss": 3.4665,
      "step": 3847
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6593424081802368,
      "learning_rate": 0.0005995871567025218,
      "loss": 3.1604,
      "step": 3848
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.700750708580017,
      "learning_rate": 0.0005995869421483819,
      "loss": 3.4996,
      "step": 3849
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.2230498790740967,
      "learning_rate": 0.0005995867275385431,
      "loss": 3.4593,
      "step": 3850
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3757450580596924,
      "learning_rate": 0.0005995865128730057,
      "loss": 3.3285,
      "step": 3851
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8233779668807983,
      "learning_rate": 0.0005995862981517693,
      "loss": 3.4148,
      "step": 3852
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8629082441329956,
      "learning_rate": 0.0005995860833748343,
      "loss": 3.2036,
      "step": 3853
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.9158806800842285,
      "learning_rate": 0.0005995858685422004,
      "loss": 3.2797,
      "step": 3854
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.042891263961792,
      "learning_rate": 0.000599585653653868,
      "loss": 3.6736,
      "step": 3855
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.482771873474121,
      "learning_rate": 0.0005995854387098369,
      "loss": 3.5401,
      "step": 3856
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.012319564819336,
      "learning_rate": 0.0005995852237101072,
      "loss": 3.4036,
      "step": 3857
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.4832077026367188,
      "learning_rate": 0.000599585008654679,
      "loss": 3.2764,
      "step": 3858
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.847940444946289,
      "learning_rate": 0.0005995847935435522,
      "loss": 3.4455,
      "step": 3859
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4700042009353638,
      "learning_rate": 0.0005995845783767271,
      "loss": 3.3058,
      "step": 3860
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.6816554069519043,
      "learning_rate": 0.0005995843631542034,
      "loss": 3.445,
      "step": 3861
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.556382417678833,
      "learning_rate": 0.0005995841478759813,
      "loss": 3.4932,
      "step": 3862
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.736277461051941,
      "learning_rate": 0.000599583932542061,
      "loss": 3.5145,
      "step": 3863
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.594698905944824,
      "learning_rate": 0.0005995837171524424,
      "loss": 3.4184,
      "step": 3864
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1615989208221436,
      "learning_rate": 0.0005995835017071254,
      "loss": 3.5062,
      "step": 3865
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.805274248123169,
      "learning_rate": 0.0005995832862061102,
      "loss": 3.2455,
      "step": 3866
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6216273307800293,
      "learning_rate": 0.0005995830706493969,
      "loss": 3.5022,
      "step": 3867
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7329151630401611,
      "learning_rate": 0.0005995828550369853,
      "loss": 3.3677,
      "step": 3868
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.379277229309082,
      "learning_rate": 0.0005995826393688757,
      "loss": 3.4917,
      "step": 3869
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.6583406925201416,
      "learning_rate": 0.0005995824236450681,
      "loss": 3.3222,
      "step": 3870
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5881025791168213,
      "learning_rate": 0.0005995822078655624,
      "loss": 3.3219,
      "step": 3871
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.703772783279419,
      "learning_rate": 0.0005995819920303586,
      "loss": 3.3566,
      "step": 3872
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8985908031463623,
      "learning_rate": 0.0005995817761394569,
      "loss": 3.4651,
      "step": 3873
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.656670331954956,
      "learning_rate": 0.0005995815601928575,
      "loss": 3.6254,
      "step": 3874
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8168766498565674,
      "learning_rate": 0.00059958134419056,
      "loss": 3.4519,
      "step": 3875
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.4963555335998535,
      "learning_rate": 0.0005995811281325647,
      "loss": 3.2388,
      "step": 3876
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.530403971672058,
      "learning_rate": 0.0005995809120188717,
      "loss": 3.6729,
      "step": 3877
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.148650646209717,
      "learning_rate": 0.000599580695849481,
      "loss": 3.3915,
      "step": 3878
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3830292224884033,
      "learning_rate": 0.0005995804796243925,
      "loss": 3.603,
      "step": 3879
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.00260591506958,
      "learning_rate": 0.0005995802633436063,
      "loss": 3.2364,
      "step": 3880
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5713849067687988,
      "learning_rate": 0.0005995800470071225,
      "loss": 3.5355,
      "step": 3881
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.827826499938965,
      "learning_rate": 0.0005995798306149412,
      "loss": 3.3148,
      "step": 3882
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.345243453979492,
      "learning_rate": 0.0005995796141670623,
      "loss": 3.3421,
      "step": 3883
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7348531484603882,
      "learning_rate": 0.0005995793976634858,
      "loss": 3.3185,
      "step": 3884
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.4602763652801514,
      "learning_rate": 0.000599579181104212,
      "loss": 3.5908,
      "step": 3885
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.939225196838379,
      "learning_rate": 0.0005995789644892407,
      "loss": 3.409,
      "step": 3886
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7006243467330933,
      "learning_rate": 0.000599578747818572,
      "loss": 3.577,
      "step": 3887
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5122764110565186,
      "learning_rate": 0.0005995785310922059,
      "loss": 3.414,
      "step": 3888
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5880225896835327,
      "learning_rate": 0.0005995783143101426,
      "loss": 3.6749,
      "step": 3889
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7152128219604492,
      "learning_rate": 0.000599578097472382,
      "loss": 3.4189,
      "step": 3890
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.674674391746521,
      "learning_rate": 0.0005995778805789241,
      "loss": 3.5216,
      "step": 3891
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.413679361343384,
      "learning_rate": 0.0005995776636297691,
      "loss": 3.3695,
      "step": 3892
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.591854453086853,
      "learning_rate": 0.0005995774466249171,
      "loss": 3.35,
      "step": 3893
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7164429426193237,
      "learning_rate": 0.0005995772295643677,
      "loss": 3.1138,
      "step": 3894
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0549943447113037,
      "learning_rate": 0.0005995770124481214,
      "loss": 3.201,
      "step": 3895
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8778505325317383,
      "learning_rate": 0.0005995767952761781,
      "loss": 3.599,
      "step": 3896
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6904398202896118,
      "learning_rate": 0.0005995765780485378,
      "loss": 3.556,
      "step": 3897
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.4737181663513184,
      "learning_rate": 0.0005995763607652005,
      "loss": 3.502,
      "step": 3898
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8385790586471558,
      "learning_rate": 0.0005995761434261663,
      "loss": 3.3133,
      "step": 3899
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7773116827011108,
      "learning_rate": 0.0005995759260314353,
      "loss": 3.1911,
      "step": 3900
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9444034099578857,
      "learning_rate": 0.0005995757085810075,
      "loss": 3.4415,
      "step": 3901
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8666610717773438,
      "learning_rate": 0.0005995754910748829,
      "loss": 3.627,
      "step": 3902
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.5296785831451416,
      "learning_rate": 0.0005995752735130615,
      "loss": 3.3708,
      "step": 3903
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1419737339019775,
      "learning_rate": 0.0005995750558955435,
      "loss": 3.4469,
      "step": 3904
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4276878833770752,
      "learning_rate": 0.0005995748382223287,
      "loss": 3.1888,
      "step": 3905
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1043362617492676,
      "learning_rate": 0.0005995746204934174,
      "loss": 3.3147,
      "step": 3906
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4802074432373047,
      "learning_rate": 0.0005995744027088095,
      "loss": 3.278,
      "step": 3907
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.521366000175476,
      "learning_rate": 0.000599574184868505,
      "loss": 2.9636,
      "step": 3908
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.2304867506027222,
      "learning_rate": 0.0005995739669725042,
      "loss": 3.2879,
      "step": 3909
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1971006393432617,
      "learning_rate": 0.0005995737490208067,
      "loss": 3.3817,
      "step": 3910
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.4073703289031982,
      "learning_rate": 0.0005995735310134129,
      "loss": 3.2145,
      "step": 3911
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6796438694000244,
      "learning_rate": 0.0005995733129503227,
      "loss": 3.3568,
      "step": 3912
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.016772747039795,
      "learning_rate": 0.0005995730948315361,
      "loss": 3.6966,
      "step": 3913
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5026509761810303,
      "learning_rate": 0.0005995728766570534,
      "loss": 3.3993,
      "step": 3914
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9240858554840088,
      "learning_rate": 0.0005995726584268743,
      "loss": 3.446,
      "step": 3915
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2426674365997314,
      "learning_rate": 0.0005995724401409991,
      "loss": 3.4811,
      "step": 3916
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.714681625366211,
      "learning_rate": 0.0005995722217994276,
      "loss": 3.1767,
      "step": 3917
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9207593202590942,
      "learning_rate": 0.00059957200340216,
      "loss": 3.519,
      "step": 3918
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8207281827926636,
      "learning_rate": 0.0005995717849491964,
      "loss": 3.554,
      "step": 3919
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4251543283462524,
      "learning_rate": 0.0005995715664405366,
      "loss": 3.3441,
      "step": 3920
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7257496118545532,
      "learning_rate": 0.0005995713478761809,
      "loss": 3.2852,
      "step": 3921
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.96334707736969,
      "learning_rate": 0.0005995711292561292,
      "loss": 3.1911,
      "step": 3922
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.370260238647461,
      "learning_rate": 0.0005995709105803816,
      "loss": 3.3978,
      "step": 3923
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.21596360206604,
      "learning_rate": 0.0005995706918489381,
      "loss": 3.6576,
      "step": 3924
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8993786573410034,
      "learning_rate": 0.0005995704730617987,
      "loss": 3.1109,
      "step": 3925
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.841160774230957,
      "learning_rate": 0.0005995702542189635,
      "loss": 3.4588,
      "step": 3926
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.742577075958252,
      "learning_rate": 0.0005995700353204327,
      "loss": 3.4565,
      "step": 3927
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.807978868484497,
      "learning_rate": 0.0005995698163662061,
      "loss": 3.1702,
      "step": 3928
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7919889688491821,
      "learning_rate": 0.0005995695973562837,
      "loss": 3.2041,
      "step": 3929
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9763484001159668,
      "learning_rate": 0.0005995693782906656,
      "loss": 3.6332,
      "step": 3930
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.745743751525879,
      "learning_rate": 0.0005995691591693521,
      "loss": 3.4882,
      "step": 3931
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5542477369308472,
      "learning_rate": 0.0005995689399923429,
      "loss": 3.1841,
      "step": 3932
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.546188235282898,
      "learning_rate": 0.0005995687207596383,
      "loss": 3.4478,
      "step": 3933
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8175017833709717,
      "learning_rate": 0.0005995685014712382,
      "loss": 3.4236,
      "step": 3934
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7686386108398438,
      "learning_rate": 0.0005995682821271426,
      "loss": 3.153,
      "step": 3935
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7841171026229858,
      "learning_rate": 0.0005995680627273516,
      "loss": 3.6416,
      "step": 3936
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.882180094718933,
      "learning_rate": 0.0005995678432718653,
      "loss": 3.1935,
      "step": 3937
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.811983346939087,
      "learning_rate": 0.0005995676237606836,
      "loss": 3.5315,
      "step": 3938
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5975761413574219,
      "learning_rate": 0.0005995674041938066,
      "loss": 3.3079,
      "step": 3939
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.10459566116333,
      "learning_rate": 0.0005995671845712344,
      "loss": 3.1244,
      "step": 3940
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3677055835723877,
      "learning_rate": 0.0005995669648929671,
      "loss": 3.4613,
      "step": 3941
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6833292245864868,
      "learning_rate": 0.0005995667451590046,
      "loss": 3.5728,
      "step": 3942
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.56862473487854,
      "learning_rate": 0.000599566525369347,
      "loss": 3.3732,
      "step": 3943
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3963313102722168,
      "learning_rate": 0.0005995663055239943,
      "loss": 3.3594,
      "step": 3944
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.793931245803833,
      "learning_rate": 0.0005995660856229466,
      "loss": 3.3235,
      "step": 3945
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.69840407371521,
      "learning_rate": 0.0005995658656662038,
      "loss": 3.4308,
      "step": 3946
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6602177619934082,
      "learning_rate": 0.0005995656456537662,
      "loss": 3.4825,
      "step": 3947
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.4323842525482178,
      "learning_rate": 0.0005995654255856336,
      "loss": 3.416,
      "step": 3948
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8344008922576904,
      "learning_rate": 0.0005995652054618061,
      "loss": 3.3337,
      "step": 3949
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4730480909347534,
      "learning_rate": 0.000599564985282284,
      "loss": 3.4532,
      "step": 3950
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3329267501831055,
      "learning_rate": 0.0005995647650470667,
      "loss": 3.3814,
      "step": 3951
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.246251344680786,
      "learning_rate": 0.000599564544756155,
      "loss": 3.462,
      "step": 3952
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5208189487457275,
      "learning_rate": 0.0005995643244095485,
      "loss": 3.2451,
      "step": 3953
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.783958911895752,
      "learning_rate": 0.0005995641040072474,
      "loss": 3.4544,
      "step": 3954
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7272361516952515,
      "learning_rate": 0.0005995638835492516,
      "loss": 3.4574,
      "step": 3955
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8209545612335205,
      "learning_rate": 0.0005995636630355611,
      "loss": 3.7362,
      "step": 3956
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7887861728668213,
      "learning_rate": 0.0005995634424661762,
      "loss": 3.4955,
      "step": 3957
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.051138162612915,
      "learning_rate": 0.0005995632218410968,
      "loss": 3.436,
      "step": 3958
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8084570169448853,
      "learning_rate": 0.000599563001160323,
      "loss": 3.5188,
      "step": 3959
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.642693281173706,
      "learning_rate": 0.0005995627804238548,
      "loss": 3.525,
      "step": 3960
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4277757406234741,
      "learning_rate": 0.0005995625596316921,
      "loss": 3.2303,
      "step": 3961
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4118403196334839,
      "learning_rate": 0.0005995623387838351,
      "loss": 3.3029,
      "step": 3962
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7861262559890747,
      "learning_rate": 0.0005995621178802838,
      "loss": 3.2329,
      "step": 3963
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5798823833465576,
      "learning_rate": 0.0005995618969210383,
      "loss": 3.3719,
      "step": 3964
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.451263189315796,
      "learning_rate": 0.0005995616759060985,
      "loss": 3.6211,
      "step": 3965
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1460483074188232,
      "learning_rate": 0.0005995614548354645,
      "loss": 3.5524,
      "step": 3966
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4659266471862793,
      "learning_rate": 0.0005995612337091365,
      "loss": 3.41,
      "step": 3967
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7683123350143433,
      "learning_rate": 0.0005995610125271144,
      "loss": 3.4852,
      "step": 3968
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8765028715133667,
      "learning_rate": 0.0005995607912893982,
      "loss": 3.7361,
      "step": 3969
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.28867506980896,
      "learning_rate": 0.000599560569995988,
      "loss": 3.421,
      "step": 3970
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8955919742584229,
      "learning_rate": 0.0005995603486468839,
      "loss": 3.3054,
      "step": 3971
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8129932880401611,
      "learning_rate": 0.0005995601272420857,
      "loss": 3.181,
      "step": 3972
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.914357900619507,
      "learning_rate": 0.0005995599057815939,
      "loss": 3.6432,
      "step": 3973
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4912800788879395,
      "learning_rate": 0.0005995596842654081,
      "loss": 3.2748,
      "step": 3974
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6905287504196167,
      "learning_rate": 0.0005995594626935284,
      "loss": 3.5367,
      "step": 3975
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9780664443969727,
      "learning_rate": 0.000599559241065955,
      "loss": 3.4941,
      "step": 3976
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.720915675163269,
      "learning_rate": 0.000599559019382688,
      "loss": 3.3727,
      "step": 3977
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5460247993469238,
      "learning_rate": 0.0005995587976437271,
      "loss": 3.3423,
      "step": 3978
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7008353471755981,
      "learning_rate": 0.0005995585758490728,
      "loss": 3.6325,
      "step": 3979
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3712248802185059,
      "learning_rate": 0.0005995583539987249,
      "loss": 3.414,
      "step": 3980
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.549126386642456,
      "learning_rate": 0.0005995581320926833,
      "loss": 3.397,
      "step": 3981
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5325963497161865,
      "learning_rate": 0.0005995579101309483,
      "loss": 3.3722,
      "step": 3982
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4873130321502686,
      "learning_rate": 0.0005995576881135197,
      "loss": 3.3027,
      "step": 3983
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6813639402389526,
      "learning_rate": 0.0005995574660403978,
      "loss": 3.4157,
      "step": 3984
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3453742265701294,
      "learning_rate": 0.0005995572439115825,
      "loss": 3.2127,
      "step": 3985
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8944979906082153,
      "learning_rate": 0.0005995570217270738,
      "loss": 3.3597,
      "step": 3986
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3091394901275635,
      "learning_rate": 0.0005995567994868718,
      "loss": 3.3426,
      "step": 3987
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.98594331741333,
      "learning_rate": 0.0005995565771909765,
      "loss": 3.7925,
      "step": 3988
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.203211784362793,
      "learning_rate": 0.0005995563548393881,
      "loss": 3.1994,
      "step": 3989
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7428678274154663,
      "learning_rate": 0.0005995561324321064,
      "loss": 3.4209,
      "step": 3990
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3674026727676392,
      "learning_rate": 0.0005995559099691316,
      "loss": 3.45,
      "step": 3991
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.5314416885375977,
      "learning_rate": 0.0005995556874504637,
      "loss": 3.3714,
      "step": 3992
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8450117111206055,
      "learning_rate": 0.0005995554648761028,
      "loss": 3.4135,
      "step": 3993
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6262286901474,
      "learning_rate": 0.0005995552422460488,
      "loss": 3.2992,
      "step": 3994
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.439267873764038,
      "learning_rate": 0.0005995550195603018,
      "loss": 3.2626,
      "step": 3995
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.737808108329773,
      "learning_rate": 0.0005995547968188621,
      "loss": 3.3912,
      "step": 3996
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.253857374191284,
      "learning_rate": 0.0005995545740217293,
      "loss": 3.4515,
      "step": 3997
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.764765977859497,
      "learning_rate": 0.0005995543511689036,
      "loss": 3.5323,
      "step": 3998
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.893346905708313,
      "learning_rate": 0.0005995541282603852,
      "loss": 3.2122,
      "step": 3999
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5995118618011475,
      "learning_rate": 0.0005995539052961741,
      "loss": 3.4072,
      "step": 4000
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2843639850616455,
      "learning_rate": 0.0005995536822762702,
      "loss": 3.473,
      "step": 4001
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4626353979110718,
      "learning_rate": 0.0005995534592006735,
      "loss": 3.6188,
      "step": 4002
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.976065993309021,
      "learning_rate": 0.0005995532360693844,
      "loss": 3.7108,
      "step": 4003
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2635245323181152,
      "learning_rate": 0.0005995530128824025,
      "loss": 3.4048,
      "step": 4004
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6483579874038696,
      "learning_rate": 0.0005995527896397281,
      "loss": 3.2857,
      "step": 4005
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.7006521224975586,
      "learning_rate": 0.0005995525663413612,
      "loss": 3.2828,
      "step": 4006
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5176466703414917,
      "learning_rate": 0.0005995523429873018,
      "loss": 3.7612,
      "step": 4007
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2868669033050537,
      "learning_rate": 0.00059955211957755,
      "loss": 3.5165,
      "step": 4008
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8929601907730103,
      "learning_rate": 0.0005995518961121058,
      "loss": 3.2864,
      "step": 4009
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7511274814605713,
      "learning_rate": 0.0005995516725909692,
      "loss": 3.6172,
      "step": 4010
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7695916891098022,
      "learning_rate": 0.0005995514490141404,
      "loss": 3.3066,
      "step": 4011
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9863650798797607,
      "learning_rate": 0.0005995512253816192,
      "loss": 3.335,
      "step": 4012
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0051393508911133,
      "learning_rate": 0.0005995510016934058,
      "loss": 3.2675,
      "step": 4013
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7257682085037231,
      "learning_rate": 0.0005995507779495002,
      "loss": 3.1782,
      "step": 4014
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6209700107574463,
      "learning_rate": 0.0005995505541499025,
      "loss": 3.4201,
      "step": 4015
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.000688314437866,
      "learning_rate": 0.0005995503302946126,
      "loss": 3.5413,
      "step": 4016
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6533794403076172,
      "learning_rate": 0.0005995501063836308,
      "loss": 3.6111,
      "step": 4017
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5800961256027222,
      "learning_rate": 0.0005995498824169569,
      "loss": 3.2087,
      "step": 4018
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7804123163223267,
      "learning_rate": 0.000599549658394591,
      "loss": 3.4301,
      "step": 4019
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6532293558120728,
      "learning_rate": 0.0005995494343165333,
      "loss": 3.2452,
      "step": 4020
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5474990606307983,
      "learning_rate": 0.0005995492101827835,
      "loss": 3.4613,
      "step": 4021
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.838608980178833,
      "learning_rate": 0.000599548985993342,
      "loss": 3.4867,
      "step": 4022
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.222135305404663,
      "learning_rate": 0.0005995487617482087,
      "loss": 3.4147,
      "step": 4023
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7737077474594116,
      "learning_rate": 0.0005995485374473836,
      "loss": 3.6601,
      "step": 4024
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9052999019622803,
      "learning_rate": 0.0005995483130908666,
      "loss": 3.3639,
      "step": 4025
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7261101007461548,
      "learning_rate": 0.000599548088678658,
      "loss": 3.2016,
      "step": 4026
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7786271572113037,
      "learning_rate": 0.0005995478642107579,
      "loss": 3.1357,
      "step": 4027
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0826222896575928,
      "learning_rate": 0.0005995476396871661,
      "loss": 3.4503,
      "step": 4028
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5979732275009155,
      "learning_rate": 0.0005995474151078828,
      "loss": 3.2702,
      "step": 4029
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5523629188537598,
      "learning_rate": 0.0005995471904729079,
      "loss": 3.4367,
      "step": 4030
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3501298427581787,
      "learning_rate": 0.0005995469657822416,
      "loss": 3.3652,
      "step": 4031
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.010526418685913,
      "learning_rate": 0.0005995467410358838,
      "loss": 3.4562,
      "step": 4032
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8604369163513184,
      "learning_rate": 0.0005995465162338346,
      "loss": 3.4563,
      "step": 4033
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.331646680831909,
      "learning_rate": 0.0005995462913760941,
      "loss": 3.4151,
      "step": 4034
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.841603398323059,
      "learning_rate": 0.0005995460664626623,
      "loss": 3.53,
      "step": 4035
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5610780715942383,
      "learning_rate": 0.0005995458414935392,
      "loss": 3.5149,
      "step": 4036
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8422574996948242,
      "learning_rate": 0.0005995456164687247,
      "loss": 3.4448,
      "step": 4037
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.792723536491394,
      "learning_rate": 0.0005995453913882193,
      "loss": 3.0877,
      "step": 4038
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6424579620361328,
      "learning_rate": 0.0005995451662520226,
      "loss": 3.4852,
      "step": 4039
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6122589111328125,
      "learning_rate": 0.0005995449410601348,
      "loss": 3.3629,
      "step": 4040
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2723124027252197,
      "learning_rate": 0.0005995447158125561,
      "loss": 3.2621,
      "step": 4041
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6124974489212036,
      "learning_rate": 0.0005995444905092861,
      "loss": 3.4833,
      "step": 4042
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7287276983261108,
      "learning_rate": 0.0005995442651503253,
      "loss": 3.4377,
      "step": 4043
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.100321054458618,
      "learning_rate": 0.0005995440397356736,
      "loss": 3.4185,
      "step": 4044
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.803545594215393,
      "learning_rate": 0.000599543814265331,
      "loss": 3.5595,
      "step": 4045
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7914496660232544,
      "learning_rate": 0.0005995435887392975,
      "loss": 3.331,
      "step": 4046
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6769500970840454,
      "learning_rate": 0.0005995433631575733,
      "loss": 3.0865,
      "step": 4047
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1139724254608154,
      "learning_rate": 0.0005995431375201581,
      "loss": 3.481,
      "step": 4048
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5639674663543701,
      "learning_rate": 0.0005995429118270524,
      "loss": 3.414,
      "step": 4049
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.4860801696777344,
      "learning_rate": 0.000599542686078256,
      "loss": 3.2344,
      "step": 4050
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.844834804534912,
      "learning_rate": 0.0005995424602737689,
      "loss": 3.3639,
      "step": 4051
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5476446151733398,
      "learning_rate": 0.0005995422344135911,
      "loss": 3.1593,
      "step": 4052
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8988322019577026,
      "learning_rate": 0.0005995420084977228,
      "loss": 3.3549,
      "step": 4053
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7067774534225464,
      "learning_rate": 0.0005995417825261641,
      "loss": 3.3549,
      "step": 4054
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.657077670097351,
      "learning_rate": 0.0005995415564989149,
      "loss": 3.0193,
      "step": 4055
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9104007482528687,
      "learning_rate": 0.0005995413304159752,
      "loss": 3.2366,
      "step": 4056
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.174445867538452,
      "learning_rate": 0.0005995411042773451,
      "loss": 3.2935,
      "step": 4057
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5318609476089478,
      "learning_rate": 0.0005995408780830247,
      "loss": 3.1933,
      "step": 4058
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9419183731079102,
      "learning_rate": 0.000599540651833014,
      "loss": 3.2842,
      "step": 4059
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4334805011749268,
      "learning_rate": 0.000599540425527313,
      "loss": 3.3142,
      "step": 4060
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.733028531074524,
      "learning_rate": 0.0005995401991659218,
      "loss": 3.2304,
      "step": 4061
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5706274509429932,
      "learning_rate": 0.0005995399727488404,
      "loss": 3.2036,
      "step": 4062
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.804766058921814,
      "learning_rate": 0.0005995397462760689,
      "loss": 3.4346,
      "step": 4063
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.7357990741729736,
      "learning_rate": 0.0005995395197476073,
      "loss": 3.4471,
      "step": 4064
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.947446346282959,
      "learning_rate": 0.0005995392931634557,
      "loss": 3.1953,
      "step": 4065
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.335951089859009,
      "learning_rate": 0.000599539066523614,
      "loss": 3.2506,
      "step": 4066
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.958301067352295,
      "learning_rate": 0.0005995388398280824,
      "loss": 3.5839,
      "step": 4067
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6226699352264404,
      "learning_rate": 0.0005995386130768609,
      "loss": 3.4283,
      "step": 4068
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.1808621883392334,
      "learning_rate": 0.0005995383862699494,
      "loss": 3.3451,
      "step": 4069
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.609157919883728,
      "learning_rate": 0.0005995381594073483,
      "loss": 3.426,
      "step": 4070
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.5015060901641846,
      "learning_rate": 0.0005995379324890571,
      "loss": 3.4281,
      "step": 4071
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.176312208175659,
      "learning_rate": 0.0005995377055150763,
      "loss": 3.4403,
      "step": 4072
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5032750368118286,
      "learning_rate": 0.0005995374784854058,
      "loss": 3.4205,
      "step": 4073
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8850287199020386,
      "learning_rate": 0.0005995372514000456,
      "loss": 3.4338,
      "step": 4074
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7798166275024414,
      "learning_rate": 0.0005995370242589958,
      "loss": 3.4525,
      "step": 4075
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.350348711013794,
      "learning_rate": 0.0005995367970622563,
      "loss": 3.3881,
      "step": 4076
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0869016647338867,
      "learning_rate": 0.0005995365698098273,
      "loss": 3.4932,
      "step": 4077
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6938369274139404,
      "learning_rate": 0.0005995363425017089,
      "loss": 3.5493,
      "step": 4078
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9991834163665771,
      "learning_rate": 0.000599536115137901,
      "loss": 3.4356,
      "step": 4079
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.229457139968872,
      "learning_rate": 0.0005995358877184036,
      "loss": 3.2328,
      "step": 4080
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6289010047912598,
      "learning_rate": 0.0005995356602432169,
      "loss": 3.4094,
      "step": 4081
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.136737585067749,
      "learning_rate": 0.0005995354327123408,
      "loss": 3.3429,
      "step": 4082
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7900967597961426,
      "learning_rate": 0.0005995352051257754,
      "loss": 3.2938,
      "step": 4083
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5390615463256836,
      "learning_rate": 0.0005995349774835209,
      "loss": 3.46,
      "step": 4084
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5879063606262207,
      "learning_rate": 0.000599534749785577,
      "loss": 3.5092,
      "step": 4085
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.662537693977356,
      "learning_rate": 0.000599534522031944,
      "loss": 3.2531,
      "step": 4086
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.662896752357483,
      "learning_rate": 0.0005995342942226219,
      "loss": 3.6348,
      "step": 4087
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4337432384490967,
      "learning_rate": 0.0005995340663576108,
      "loss": 3.6266,
      "step": 4088
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.581976056098938,
      "learning_rate": 0.0005995338384369106,
      "loss": 3.169,
      "step": 4089
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6966140270233154,
      "learning_rate": 0.0005995336104605214,
      "loss": 3.5426,
      "step": 4090
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5192996263504028,
      "learning_rate": 0.0005995333824284433,
      "loss": 3.58,
      "step": 4091
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7296092510223389,
      "learning_rate": 0.0005995331543406762,
      "loss": 3.5405,
      "step": 4092
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.608123302459717,
      "learning_rate": 0.0005995329261972202,
      "loss": 3.4314,
      "step": 4093
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6166313886642456,
      "learning_rate": 0.0005995326979980755,
      "loss": 3.5707,
      "step": 4094
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9709473848342896,
      "learning_rate": 0.000599532469743242,
      "loss": 3.5296,
      "step": 4095
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8740863800048828,
      "learning_rate": 0.0005995322414327196,
      "loss": 3.3034,
      "step": 4096
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.6640734672546387,
      "learning_rate": 0.0005995320130665087,
      "loss": 3.534,
      "step": 4097
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6275315284729004,
      "learning_rate": 0.0005995317846446091,
      "loss": 3.5908,
      "step": 4098
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.6195595264434814,
      "learning_rate": 0.0005995315561670208,
      "loss": 3.5568,
      "step": 4099
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.0616235733032227,
      "learning_rate": 0.000599531327633744,
      "loss": 3.2121,
      "step": 4100
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.4500908851623535,
      "learning_rate": 0.0005995310990447786,
      "loss": 3.246,
      "step": 4101
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.4156084060668945,
      "learning_rate": 0.0005995308704001248,
      "loss": 3.2633,
      "step": 4102
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.9542293548583984,
      "learning_rate": 0.0005995306416997825,
      "loss": 3.4471,
      "step": 4103
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.469243049621582,
      "learning_rate": 0.0005995304129437519,
      "loss": 3.6809,
      "step": 4104
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.69242525100708,
      "learning_rate": 0.0005995301841320329,
      "loss": 3.5589,
      "step": 4105
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8718570470809937,
      "learning_rate": 0.0005995299552646254,
      "loss": 3.2963,
      "step": 4106
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1035187244415283,
      "learning_rate": 0.0005995297263415299,
      "loss": 3.2081,
      "step": 4107
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6023175716400146,
      "learning_rate": 0.000599529497362746,
      "loss": 3.5729,
      "step": 4108
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.4861230850219727,
      "learning_rate": 0.000599529268328274,
      "loss": 3.1408,
      "step": 4109
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.990311861038208,
      "learning_rate": 0.0005995290392381139,
      "loss": 3.2601,
      "step": 4110
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8519184589385986,
      "learning_rate": 0.0005995288100922655,
      "loss": 3.2858,
      "step": 4111
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.825923204421997,
      "learning_rate": 0.0005995285808907292,
      "loss": 3.2332,
      "step": 4112
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.774980902671814,
      "learning_rate": 0.0005995283516335049,
      "loss": 3.2875,
      "step": 4113
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.635190486907959,
      "learning_rate": 0.0005995281223205925,
      "loss": 3.5756,
      "step": 4114
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.453508973121643,
      "learning_rate": 0.0005995278929519923,
      "loss": 3.516,
      "step": 4115
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3828508853912354,
      "learning_rate": 0.000599527663527704,
      "loss": 3.2612,
      "step": 4116
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7380341291427612,
      "learning_rate": 0.0005995274340477281,
      "loss": 3.3682,
      "step": 4117
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.574608564376831,
      "learning_rate": 0.0005995272045120643,
      "loss": 3.4032,
      "step": 4118
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3653976917266846,
      "learning_rate": 0.0005995269749207127,
      "loss": 3.3998,
      "step": 4119
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5532357692718506,
      "learning_rate": 0.0005995267452736734,
      "loss": 3.4681,
      "step": 4120
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8668618202209473,
      "learning_rate": 0.0005995265155709465,
      "loss": 3.2256,
      "step": 4121
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.4979348182678223,
      "learning_rate": 0.0005995262858125318,
      "loss": 3.1456,
      "step": 4122
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6846290826797485,
      "learning_rate": 0.0005995260559984297,
      "loss": 3.7847,
      "step": 4123
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3207318782806396,
      "learning_rate": 0.00059952582612864,
      "loss": 3.5561,
      "step": 4124
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.306802988052368,
      "learning_rate": 0.0005995255962031626,
      "loss": 3.4397,
      "step": 4125
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6406582593917847,
      "learning_rate": 0.000599525366221998,
      "loss": 3.4147,
      "step": 4126
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.7011678218841553,
      "learning_rate": 0.0005995251361851459,
      "loss": 3.3521,
      "step": 4127
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8272019624710083,
      "learning_rate": 0.0005995249060926063,
      "loss": 3.3733,
      "step": 4128
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.525174856185913,
      "learning_rate": 0.0005995246759443796,
      "loss": 3.4633,
      "step": 4129
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.5707476139068604,
      "learning_rate": 0.0005995244457404654,
      "loss": 3.2855,
      "step": 4130
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.447530746459961,
      "learning_rate": 0.000599524215480864,
      "loss": 3.5855,
      "step": 4131
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7763391733169556,
      "learning_rate": 0.0005995239851655754,
      "loss": 3.2651,
      "step": 4132
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8947100639343262,
      "learning_rate": 0.0005995237547945997,
      "loss": 3.2502,
      "step": 4133
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.445290446281433,
      "learning_rate": 0.0005995235243679368,
      "loss": 3.3901,
      "step": 4134
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8882641792297363,
      "learning_rate": 0.0005995232938855869,
      "loss": 3.4759,
      "step": 4135
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8724039793014526,
      "learning_rate": 0.0005995230633475499,
      "loss": 3.3257,
      "step": 4136
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4908102750778198,
      "learning_rate": 0.000599522832753826,
      "loss": 3.621,
      "step": 4137
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.155456304550171,
      "learning_rate": 0.0005995226021044151,
      "loss": 3.6098,
      "step": 4138
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.2072558403015137,
      "learning_rate": 0.0005995223713993172,
      "loss": 3.4726,
      "step": 4139
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.3776782751083374,
      "learning_rate": 0.0005995221406385325,
      "loss": 3.6321,
      "step": 4140
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.801539897918701,
      "learning_rate": 0.000599521909822061,
      "loss": 3.2838,
      "step": 4141
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.5251080989837646,
      "learning_rate": 0.0005995216789499028,
      "loss": 3.2099,
      "step": 4142
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4215648174285889,
      "learning_rate": 0.0005995214480220577,
      "loss": 3.3484,
      "step": 4143
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.683608889579773,
      "learning_rate": 0.0005995212170385261,
      "loss": 3.4505,
      "step": 4144
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.257963180541992,
      "learning_rate": 0.0005995209859993077,
      "loss": 3.372,
      "step": 4145
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4394572973251343,
      "learning_rate": 0.0005995207549044027,
      "loss": 3.1199,
      "step": 4146
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8057023286819458,
      "learning_rate": 0.0005995205237538112,
      "loss": 3.561,
      "step": 4147
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5784446001052856,
      "learning_rate": 0.0005995202925475332,
      "loss": 3.3253,
      "step": 4148
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7735518217086792,
      "learning_rate": 0.0005995200612855687,
      "loss": 3.3259,
      "step": 4149
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7042614221572876,
      "learning_rate": 0.0005995198299679177,
      "loss": 3.4563,
      "step": 4150
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4271113872528076,
      "learning_rate": 0.0005995195985945804,
      "loss": 3.3164,
      "step": 4151
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3604395389556885,
      "learning_rate": 0.0005995193671655567,
      "loss": 3.2634,
      "step": 4152
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.181626796722412,
      "learning_rate": 0.0005995191356808467,
      "loss": 3.4093,
      "step": 4153
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5633169412612915,
      "learning_rate": 0.0005995189041404505,
      "loss": 3.4152,
      "step": 4154
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.6983532905578613,
      "learning_rate": 0.000599518672544368,
      "loss": 3.1651,
      "step": 4155
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7611024379730225,
      "learning_rate": 0.0005995184408925994,
      "loss": 3.5766,
      "step": 4156
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.5731844902038574,
      "learning_rate": 0.0005995182091851446,
      "loss": 3.3476,
      "step": 4157
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7485095262527466,
      "learning_rate": 0.0005995179774220037,
      "loss": 3.3333,
      "step": 4158
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7150726318359375,
      "learning_rate": 0.0005995177456031768,
      "loss": 3.243,
      "step": 4159
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9248830080032349,
      "learning_rate": 0.0005995175137286638,
      "loss": 3.3498,
      "step": 4160
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5204520225524902,
      "learning_rate": 0.000599517281798465,
      "loss": 3.4055,
      "step": 4161
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.151221990585327,
      "learning_rate": 0.0005995170498125802,
      "loss": 3.3868,
      "step": 4162
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1146063804626465,
      "learning_rate": 0.0005995168177710096,
      "loss": 3.3707,
      "step": 4163
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1406867504119873,
      "learning_rate": 0.0005995165856737531,
      "loss": 3.6359,
      "step": 4164
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5807290077209473,
      "learning_rate": 0.0005995163535208108,
      "loss": 3.3949,
      "step": 4165
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3196425437927246,
      "learning_rate": 0.0005995161213121828,
      "loss": 3.2169,
      "step": 4166
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7334040403366089,
      "learning_rate": 0.0005995158890478691,
      "loss": 3.5275,
      "step": 4167
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5832780599594116,
      "learning_rate": 0.0005995156567278697,
      "loss": 3.4226,
      "step": 4168
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6418858766555786,
      "learning_rate": 0.0005995154243521847,
      "loss": 3.4873,
      "step": 4169
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6095540523529053,
      "learning_rate": 0.0005995151919208141,
      "loss": 3.2563,
      "step": 4170
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4704641103744507,
      "learning_rate": 0.000599514959433758,
      "loss": 3.2862,
      "step": 4171
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9140501022338867,
      "learning_rate": 0.0005995147268910164,
      "loss": 3.4061,
      "step": 4172
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.457237720489502,
      "learning_rate": 0.0005995144942925894,
      "loss": 3.2906,
      "step": 4173
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4463305473327637,
      "learning_rate": 0.0005995142616384769,
      "loss": 3.2867,
      "step": 4174
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.156757354736328,
      "learning_rate": 0.0005995140289286792,
      "loss": 3.3343,
      "step": 4175
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7878175973892212,
      "learning_rate": 0.0005995137961631961,
      "loss": 3.4208,
      "step": 4176
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7280473709106445,
      "learning_rate": 0.0005995135633420277,
      "loss": 3.248,
      "step": 4177
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7411142587661743,
      "learning_rate": 0.0005995133304651742,
      "loss": 3.3992,
      "step": 4178
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.2776339054107666,
      "learning_rate": 0.0005995130975326354,
      "loss": 3.3918,
      "step": 4179
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6020448207855225,
      "learning_rate": 0.0005995128645444115,
      "loss": 3.3208,
      "step": 4180
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.9805409908294678,
      "learning_rate": 0.0005995126315005025,
      "loss": 3.3303,
      "step": 4181
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.395688533782959,
      "learning_rate": 0.0005995123984009085,
      "loss": 3.3543,
      "step": 4182
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.877488374710083,
      "learning_rate": 0.0005995121652456294,
      "loss": 3.4186,
      "step": 4183
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7782933712005615,
      "learning_rate": 0.0005995119320346654,
      "loss": 3.4536,
      "step": 4184
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6380068063735962,
      "learning_rate": 0.0005995116987680165,
      "loss": 3.4877,
      "step": 4185
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7148000001907349,
      "learning_rate": 0.0005995114654456826,
      "loss": 3.4416,
      "step": 4186
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9608259201049805,
      "learning_rate": 0.000599511232067664,
      "loss": 3.3059,
      "step": 4187
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5285236835479736,
      "learning_rate": 0.0005995109986339605,
      "loss": 3.4216,
      "step": 4188
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7843127250671387,
      "learning_rate": 0.0005995107651445723,
      "loss": 3.2947,
      "step": 4189
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5837469100952148,
      "learning_rate": 0.0005995105315994994,
      "loss": 3.6015,
      "step": 4190
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.9867641925811768,
      "learning_rate": 0.0005995102979987419,
      "loss": 3.3363,
      "step": 4191
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.4089558124542236,
      "learning_rate": 0.0005995100643422996,
      "loss": 3.5272,
      "step": 4192
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5341358184814453,
      "learning_rate": 0.000599509830630173,
      "loss": 3.2422,
      "step": 4193
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.619336724281311,
      "learning_rate": 0.0005995095968623616,
      "loss": 3.5268,
      "step": 4194
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3039746284484863,
      "learning_rate": 0.0005995093630388658,
      "loss": 3.1346,
      "step": 4195
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0674939155578613,
      "learning_rate": 0.0005995091291596857,
      "loss": 3.2764,
      "step": 4196
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.41253662109375,
      "learning_rate": 0.000599508895224821,
      "loss": 3.4692,
      "step": 4197
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.446376085281372,
      "learning_rate": 0.0005995086612342721,
      "loss": 3.5529,
      "step": 4198
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1115002632141113,
      "learning_rate": 0.0005995084271880387,
      "loss": 3.3947,
      "step": 4199
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.8955644369125366,
      "learning_rate": 0.0005995081930861212,
      "loss": 3.6048,
      "step": 4200
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6797102689743042,
      "learning_rate": 0.0005995079589285195,
      "loss": 3.5906,
      "step": 4201
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1041975021362305,
      "learning_rate": 0.0005995077247152335,
      "loss": 3.5337,
      "step": 4202
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7948582172393799,
      "learning_rate": 0.0005995074904462633,
      "loss": 3.3419,
      "step": 4203
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.6283520460128784,
      "learning_rate": 0.0005995072561216092,
      "loss": 3.5275,
      "step": 4204
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3466734886169434,
      "learning_rate": 0.0005995070217412709,
      "loss": 3.5485,
      "step": 4205
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.171415090560913,
      "learning_rate": 0.0005995067873052486,
      "loss": 3.3828,
      "step": 4206
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.4184465408325195,
      "learning_rate": 0.0005995065528135423,
      "loss": 3.5792,
      "step": 4207
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.9231101274490356,
      "learning_rate": 0.0005995063182661522,
      "loss": 3.4953,
      "step": 4208
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.7489426136016846,
      "learning_rate": 0.0005995060836630782,
      "loss": 3.1689,
      "step": 4209
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.5733184814453125,
      "learning_rate": 0.0005995058490043203,
      "loss": 3.4289,
      "step": 4210
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.923437237739563,
      "learning_rate": 0.0005995056142898786,
      "loss": 3.5933,
      "step": 4211
    },
    {
      "epoch": 0.05,
      "grad_norm": 3.5428855419158936,
      "learning_rate": 0.0005995053795197533,
      "loss": 3.3054,
      "step": 4212
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0382444858551025,
      "learning_rate": 0.0005995051446939442,
      "loss": 3.3738,
      "step": 4213
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3926260471343994,
      "learning_rate": 0.0005995049098124513,
      "loss": 3.1437,
      "step": 4214
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.1408584117889404,
      "learning_rate": 0.000599504674875275,
      "loss": 3.2849,
      "step": 4215
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.077352285385132,
      "learning_rate": 0.0005995044398824149,
      "loss": 3.3791,
      "step": 4216
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.5184195041656494,
      "learning_rate": 0.0005995042048338715,
      "loss": 3.3313,
      "step": 4217
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.0051205158233643,
      "learning_rate": 0.0005995039697296445,
      "loss": 3.2668,
      "step": 4218
    },
    {
      "epoch": 0.05,
      "grad_norm": 1.7595583200454712,
      "learning_rate": 0.0005995037345697342,
      "loss": 3.3382,
      "step": 4219
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.328573703765869,
      "learning_rate": 0.0005995034993541403,
      "loss": 3.0506,
      "step": 4220
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.056849241256714,
      "learning_rate": 0.0005995032640828631,
      "loss": 3.513,
      "step": 4221
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3314504623413086,
      "learning_rate": 0.0005995030287559026,
      "loss": 3.1838,
      "step": 4222
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.9124629497528076,
      "learning_rate": 0.0005995027933732589,
      "loss": 3.1448,
      "step": 4223
    },
    {
      "epoch": 0.05,
      "grad_norm": 2.3947901725769043,
      "learning_rate": 0.0005995025579349319,
      "loss": 3.472,
      "step": 4224
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8244956731796265,
      "learning_rate": 0.0005995023224409219,
      "loss": 3.4894,
      "step": 4225
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.9328036308288574,
      "learning_rate": 0.0005995020868912285,
      "loss": 3.2075,
      "step": 4226
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0937068462371826,
      "learning_rate": 0.0005995018512858521,
      "loss": 3.6406,
      "step": 4227
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7389856576919556,
      "learning_rate": 0.0005995016156247928,
      "loss": 3.4372,
      "step": 4228
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7171903848648071,
      "learning_rate": 0.0005995013799080503,
      "loss": 3.5362,
      "step": 4229
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1874313354492188,
      "learning_rate": 0.0005995011441356249,
      "loss": 3.4108,
      "step": 4230
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.5608747005462646,
      "learning_rate": 0.0005995009083075166,
      "loss": 3.3027,
      "step": 4231
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0089571475982666,
      "learning_rate": 0.0005995006724237254,
      "loss": 3.0699,
      "step": 4232
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.975089192390442,
      "learning_rate": 0.0005995004364842514,
      "loss": 3.3584,
      "step": 4233
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.733008861541748,
      "learning_rate": 0.0005995002004890946,
      "loss": 3.5904,
      "step": 4234
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6225873231887817,
      "learning_rate": 0.0005994999644382551,
      "loss": 3.2536,
      "step": 4235
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.3669497966766357,
      "learning_rate": 0.0005994997283317329,
      "loss": 3.1897,
      "step": 4236
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9429237842559814,
      "learning_rate": 0.0005994994921695281,
      "loss": 3.1722,
      "step": 4237
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.095244884490967,
      "learning_rate": 0.0005994992559516405,
      "loss": 3.3374,
      "step": 4238
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.3725781440734863,
      "learning_rate": 0.0005994990196780705,
      "loss": 3.4431,
      "step": 4239
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2660419940948486,
      "learning_rate": 0.000599498783348818,
      "loss": 3.267,
      "step": 4240
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6840097904205322,
      "learning_rate": 0.0005994985469638829,
      "loss": 3.3222,
      "step": 4241
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.4696130752563477,
      "learning_rate": 0.0005994983105232654,
      "loss": 3.3008,
      "step": 4242
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.339087963104248,
      "learning_rate": 0.0005994980740269655,
      "loss": 3.4457,
      "step": 4243
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3380085229873657,
      "learning_rate": 0.0005994978374749833,
      "loss": 3.3581,
      "step": 4244
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5306733846664429,
      "learning_rate": 0.0005994976008673187,
      "loss": 3.5702,
      "step": 4245
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.740813970565796,
      "learning_rate": 0.000599497364203972,
      "loss": 3.5073,
      "step": 4246
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4645293951034546,
      "learning_rate": 0.000599497127484943,
      "loss": 3.5163,
      "step": 4247
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9299615621566772,
      "learning_rate": 0.0005994968907102318,
      "loss": 3.4212,
      "step": 4248
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8040540218353271,
      "learning_rate": 0.0005994966538798385,
      "loss": 3.3208,
      "step": 4249
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.400558352470398,
      "learning_rate": 0.0005994964169937632,
      "loss": 3.3875,
      "step": 4250
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.217679262161255,
      "learning_rate": 0.0005994961800520058,
      "loss": 3.3842,
      "step": 4251
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5135875940322876,
      "learning_rate": 0.0005994959430545664,
      "loss": 3.3077,
      "step": 4252
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6794651746749878,
      "learning_rate": 0.000599495706001445,
      "loss": 3.4711,
      "step": 4253
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6359918117523193,
      "learning_rate": 0.0005994954688926417,
      "loss": 3.0705,
      "step": 4254
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6336400508880615,
      "learning_rate": 0.0005994952317281566,
      "loss": 3.4167,
      "step": 4255
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.54548978805542,
      "learning_rate": 0.0005994949945079897,
      "loss": 3.1879,
      "step": 4256
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.877371072769165,
      "learning_rate": 0.000599494757232141,
      "loss": 3.4508,
      "step": 4257
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9489529132843018,
      "learning_rate": 0.0005994945199006106,
      "loss": 3.5906,
      "step": 4258
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.853317379951477,
      "learning_rate": 0.0005994942825133985,
      "loss": 3.4535,
      "step": 4259
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.465181827545166,
      "learning_rate": 0.0005994940450705047,
      "loss": 3.5377,
      "step": 4260
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.626831293106079,
      "learning_rate": 0.0005994938075719295,
      "loss": 3.3186,
      "step": 4261
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9812979698181152,
      "learning_rate": 0.0005994935700176725,
      "loss": 3.4289,
      "step": 4262
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.635172963142395,
      "learning_rate": 0.0005994933324077341,
      "loss": 3.4629,
      "step": 4263
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6858080625534058,
      "learning_rate": 0.0005994930947421142,
      "loss": 3.1202,
      "step": 4264
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4232251644134521,
      "learning_rate": 0.0005994928570208129,
      "loss": 3.6443,
      "step": 4265
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.492836833000183,
      "learning_rate": 0.0005994926192438301,
      "loss": 3.311,
      "step": 4266
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4056822061538696,
      "learning_rate": 0.0005994923814111662,
      "loss": 3.4767,
      "step": 4267
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4182716608047485,
      "learning_rate": 0.0005994921435228209,
      "loss": 3.3123,
      "step": 4268
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7041231393814087,
      "learning_rate": 0.0005994919055787943,
      "loss": 3.0752,
      "step": 4269
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4991501569747925,
      "learning_rate": 0.0005994916675790866,
      "loss": 3.4219,
      "step": 4270
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7037595510482788,
      "learning_rate": 0.0005994914295236977,
      "loss": 3.3885,
      "step": 4271
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8323979377746582,
      "learning_rate": 0.0005994911914126278,
      "loss": 3.3002,
      "step": 4272
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4244662523269653,
      "learning_rate": 0.0005994909532458767,
      "loss": 3.4984,
      "step": 4273
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.472727060317993,
      "learning_rate": 0.0005994907150234447,
      "loss": 3.6176,
      "step": 4274
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.053762197494507,
      "learning_rate": 0.0005994904767453315,
      "loss": 3.4251,
      "step": 4275
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.085456132888794,
      "learning_rate": 0.0005994902384115375,
      "loss": 3.3103,
      "step": 4276
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.761460542678833,
      "learning_rate": 0.0005994900000220627,
      "loss": 3.2208,
      "step": 4277
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.792285442352295,
      "learning_rate": 0.0005994897615769069,
      "loss": 3.289,
      "step": 4278
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7604951858520508,
      "learning_rate": 0.0005994895230760703,
      "loss": 3.6722,
      "step": 4279
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8769594430923462,
      "learning_rate": 0.0005994892845195531,
      "loss": 3.1658,
      "step": 4280
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.8226137161254883,
      "learning_rate": 0.0005994890459073552,
      "loss": 3.4292,
      "step": 4281
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.4901180267333984,
      "learning_rate": 0.0005994888072394766,
      "loss": 3.1934,
      "step": 4282
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.391161561012268,
      "learning_rate": 0.0005994885685159172,
      "loss": 3.4797,
      "step": 4283
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2943079471588135,
      "learning_rate": 0.0005994883297366773,
      "loss": 3.1133,
      "step": 4284
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.3247597217559814,
      "learning_rate": 0.000599488090901757,
      "loss": 3.3292,
      "step": 4285
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.638939142227173,
      "learning_rate": 0.0005994878520111561,
      "loss": 3.5272,
      "step": 4286
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.811019778251648,
      "learning_rate": 0.0005994876130648747,
      "loss": 3.5465,
      "step": 4287
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.771495819091797,
      "learning_rate": 0.0005994873740629129,
      "loss": 3.1711,
      "step": 4288
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.3075623512268066,
      "learning_rate": 0.0005994871350052709,
      "loss": 3.1575,
      "step": 4289
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4484357833862305,
      "learning_rate": 0.0005994868958919485,
      "loss": 3.3912,
      "step": 4290
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4092025756835938,
      "learning_rate": 0.0005994866567229458,
      "loss": 3.4385,
      "step": 4291
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8674057722091675,
      "learning_rate": 0.0005994864174982629,
      "loss": 3.5145,
      "step": 4292
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5645931959152222,
      "learning_rate": 0.0005994861782178998,
      "loss": 3.3114,
      "step": 4293
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4062579870224,
      "learning_rate": 0.0005994859388818567,
      "loss": 3.3462,
      "step": 4294
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.174910306930542,
      "learning_rate": 0.0005994856994901334,
      "loss": 3.1232,
      "step": 4295
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6850579977035522,
      "learning_rate": 0.0005994854600427301,
      "loss": 3.1887,
      "step": 4296
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5306987762451172,
      "learning_rate": 0.0005994852205396467,
      "loss": 3.1544,
      "step": 4297
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5895217657089233,
      "learning_rate": 0.0005994849809808835,
      "loss": 3.2367,
      "step": 4298
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.750515341758728,
      "learning_rate": 0.0005994847413664403,
      "loss": 3.2689,
      "step": 4299
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.513614535331726,
      "learning_rate": 0.0005994845016963171,
      "loss": 3.598,
      "step": 4300
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2634432315826416,
      "learning_rate": 0.0005994842619705142,
      "loss": 3.3202,
      "step": 4301
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.330479145050049,
      "learning_rate": 0.0005994840221890316,
      "loss": 3.3274,
      "step": 4302
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.33135187625885,
      "learning_rate": 0.0005994837823518692,
      "loss": 3.3084,
      "step": 4303
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.329740524291992,
      "learning_rate": 0.0005994835424590271,
      "loss": 3.312,
      "step": 4304
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8184523582458496,
      "learning_rate": 0.0005994833025105054,
      "loss": 3.5825,
      "step": 4305
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5459754467010498,
      "learning_rate": 0.000599483062506304,
      "loss": 3.3846,
      "step": 4306
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.21795392036438,
      "learning_rate": 0.0005994828224464232,
      "loss": 3.3949,
      "step": 4307
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0756444931030273,
      "learning_rate": 0.0005994825823308627,
      "loss": 3.4213,
      "step": 4308
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6489418745040894,
      "learning_rate": 0.0005994823421596229,
      "loss": 3.4669,
      "step": 4309
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.478945732116699,
      "learning_rate": 0.0005994821019327036,
      "loss": 3.4531,
      "step": 4310
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.20212459564209,
      "learning_rate": 0.000599481861650105,
      "loss": 3.4812,
      "step": 4311
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.292532205581665,
      "learning_rate": 0.000599481621311827,
      "loss": 3.4758,
      "step": 4312
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.648698329925537,
      "learning_rate": 0.0005994813809178697,
      "loss": 3.0678,
      "step": 4313
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.3505077362060547,
      "learning_rate": 0.0005994811404682332,
      "loss": 3.5583,
      "step": 4314
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5621358156204224,
      "learning_rate": 0.0005994808999629176,
      "loss": 3.5696,
      "step": 4315
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.574922800064087,
      "learning_rate": 0.0005994806594019226,
      "loss": 3.4848,
      "step": 4316
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.5931568145751953,
      "learning_rate": 0.0005994804187852487,
      "loss": 3.6477,
      "step": 4317
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6841108798980713,
      "learning_rate": 0.0005994801781128956,
      "loss": 3.4031,
      "step": 4318
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6622079610824585,
      "learning_rate": 0.0005994799373848636,
      "loss": 3.3515,
      "step": 4319
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8809068202972412,
      "learning_rate": 0.0005994796966011525,
      "loss": 3.0548,
      "step": 4320
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6448280811309814,
      "learning_rate": 0.0005994794557617626,
      "loss": 3.2968,
      "step": 4321
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.856332778930664,
      "learning_rate": 0.0005994792148666936,
      "loss": 3.0665,
      "step": 4322
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.629695177078247,
      "learning_rate": 0.000599478973915946,
      "loss": 3.1597,
      "step": 4323
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8123865127563477,
      "learning_rate": 0.0005994787329095196,
      "loss": 3.2095,
      "step": 4324
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2213377952575684,
      "learning_rate": 0.0005994784918474143,
      "loss": 3.2042,
      "step": 4325
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.773513913154602,
      "learning_rate": 0.0005994782507296304,
      "loss": 3.3896,
      "step": 4326
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.134962797164917,
      "learning_rate": 0.0005994780095561679,
      "loss": 3.2428,
      "step": 4327
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.6742143630981445,
      "learning_rate": 0.0005994777683270266,
      "loss": 3.403,
      "step": 4328
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.649906873703003,
      "learning_rate": 0.0005994775270422068,
      "loss": 3.5451,
      "step": 4329
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.816131591796875,
      "learning_rate": 0.0005994772857017086,
      "loss": 3.1992,
      "step": 4330
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7707891464233398,
      "learning_rate": 0.0005994770443055317,
      "loss": 3.3704,
      "step": 4331
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4365261793136597,
      "learning_rate": 0.0005994768028536766,
      "loss": 3.4181,
      "step": 4332
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.085890531539917,
      "learning_rate": 0.0005994765613461429,
      "loss": 3.2292,
      "step": 4333
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4016257524490356,
      "learning_rate": 0.000599476319782931,
      "loss": 3.3957,
      "step": 4334
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3986241817474365,
      "learning_rate": 0.0005994760781640407,
      "loss": 3.0437,
      "step": 4335
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5159919261932373,
      "learning_rate": 0.0005994758364894722,
      "loss": 3.4213,
      "step": 4336
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6339514255523682,
      "learning_rate": 0.0005994755947592254,
      "loss": 3.3228,
      "step": 4337
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6162244081497192,
      "learning_rate": 0.0005994753529733006,
      "loss": 3.2952,
      "step": 4338
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6669145822525024,
      "learning_rate": 0.0005994751111316975,
      "loss": 3.0421,
      "step": 4339
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.761446475982666,
      "learning_rate": 0.0005994748692344166,
      "loss": 3.3151,
      "step": 4340
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4791815280914307,
      "learning_rate": 0.0005994746272814574,
      "loss": 3.4417,
      "step": 4341
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8849811553955078,
      "learning_rate": 0.0005994743852728203,
      "loss": 3.2396,
      "step": 4342
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8945378065109253,
      "learning_rate": 0.0005994741432085053,
      "loss": 3.3062,
      "step": 4343
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6399505138397217,
      "learning_rate": 0.0005994739010885123,
      "loss": 3.3523,
      "step": 4344
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.02913236618042,
      "learning_rate": 0.0005994736589128415,
      "loss": 3.4673,
      "step": 4345
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6041840314865112,
      "learning_rate": 0.0005994734166814929,
      "loss": 3.3628,
      "step": 4346
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0075454711914062,
      "learning_rate": 0.0005994731743944665,
      "loss": 3.4402,
      "step": 4347
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3386026620864868,
      "learning_rate": 0.0005994729320517626,
      "loss": 3.5225,
      "step": 4348
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6558736562728882,
      "learning_rate": 0.0005994726896533807,
      "loss": 3.3749,
      "step": 4349
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5317327976226807,
      "learning_rate": 0.0005994724471993214,
      "loss": 3.0279,
      "step": 4350
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.666013479232788,
      "learning_rate": 0.0005994722046895844,
      "loss": 3.2911,
      "step": 4351
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.171159267425537,
      "learning_rate": 0.0005994719621241699,
      "loss": 3.3391,
      "step": 4352
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.329121470451355,
      "learning_rate": 0.000599471719503078,
      "loss": 3.4068,
      "step": 4353
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.552217721939087,
      "learning_rate": 0.0005994714768263085,
      "loss": 3.5412,
      "step": 4354
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1114614009857178,
      "learning_rate": 0.0005994712340938617,
      "loss": 3.1929,
      "step": 4355
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.722833514213562,
      "learning_rate": 0.0005994709913057375,
      "loss": 3.1074,
      "step": 4356
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.039559841156006,
      "learning_rate": 0.000599470748461936,
      "loss": 3.3566,
      "step": 4357
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5077533721923828,
      "learning_rate": 0.0005994705055624573,
      "loss": 3.2593,
      "step": 4358
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4674336910247803,
      "learning_rate": 0.0005994702626073013,
      "loss": 3.3447,
      "step": 4359
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.4407129287719727,
      "learning_rate": 0.0005994700195964682,
      "loss": 3.3434,
      "step": 4360
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0194501876831055,
      "learning_rate": 0.0005994697765299579,
      "loss": 3.1467,
      "step": 4361
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9702911376953125,
      "learning_rate": 0.0005994695334077706,
      "loss": 3.334,
      "step": 4362
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.019775390625,
      "learning_rate": 0.0005994692902299061,
      "loss": 3.6033,
      "step": 4363
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0668177604675293,
      "learning_rate": 0.0005994690469963647,
      "loss": 3.4556,
      "step": 4364
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9291495084762573,
      "learning_rate": 0.0005994688037071463,
      "loss": 3.4662,
      "step": 4365
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.4433536529541016,
      "learning_rate": 0.0005994685603622511,
      "loss": 3.4678,
      "step": 4366
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.074495792388916,
      "learning_rate": 0.0005994683169616789,
      "loss": 3.219,
      "step": 4367
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9377185106277466,
      "learning_rate": 0.0005994680735054301,
      "loss": 3.6767,
      "step": 4368
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4703179597854614,
      "learning_rate": 0.0005994678299935044,
      "loss": 3.5661,
      "step": 4369
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4551929235458374,
      "learning_rate": 0.000599467586425902,
      "loss": 3.4692,
      "step": 4370
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5101566314697266,
      "learning_rate": 0.0005994673428026229,
      "loss": 3.4768,
      "step": 4371
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7035044431686401,
      "learning_rate": 0.0005994670991236672,
      "loss": 3.5407,
      "step": 4372
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5409334897994995,
      "learning_rate": 0.0005994668553890349,
      "loss": 3.302,
      "step": 4373
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0166938304901123,
      "learning_rate": 0.0005994666115987261,
      "loss": 3.3348,
      "step": 4374
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6953874826431274,
      "learning_rate": 0.0005994663677527407,
      "loss": 3.4477,
      "step": 4375
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6121240854263306,
      "learning_rate": 0.0005994661238510789,
      "loss": 3.4097,
      "step": 4376
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4561848640441895,
      "learning_rate": 0.0005994658798937407,
      "loss": 3.2449,
      "step": 4377
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6270281076431274,
      "learning_rate": 0.0005994656358807261,
      "loss": 3.3727,
      "step": 4378
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.747753381729126,
      "learning_rate": 0.0005994653918120353,
      "loss": 3.1346,
      "step": 4379
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.514760971069336,
      "learning_rate": 0.0005994651476876682,
      "loss": 3.4875,
      "step": 4380
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8593145608901978,
      "learning_rate": 0.0005994649035076248,
      "loss": 3.2624,
      "step": 4381
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.7945539951324463,
      "learning_rate": 0.0005994646592719053,
      "loss": 3.5359,
      "step": 4382
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4613672494888306,
      "learning_rate": 0.0005994644149805096,
      "loss": 3.4578,
      "step": 4383
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.5559465885162354,
      "learning_rate": 0.0005994641706334378,
      "loss": 3.0726,
      "step": 4384
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.03397274017334,
      "learning_rate": 0.0005994639262306899,
      "loss": 3.4285,
      "step": 4385
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5866670608520508,
      "learning_rate": 0.0005994636817722661,
      "loss": 3.4466,
      "step": 4386
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7107163667678833,
      "learning_rate": 0.0005994634372581664,
      "loss": 3.3702,
      "step": 4387
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9346400499343872,
      "learning_rate": 0.0005994631926883907,
      "loss": 3.4418,
      "step": 4388
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.40089750289917,
      "learning_rate": 0.0005994629480629393,
      "loss": 3.3994,
      "step": 4389
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4547134637832642,
      "learning_rate": 0.0005994627033818119,
      "loss": 3.3651,
      "step": 4390
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4462999105453491,
      "learning_rate": 0.0005994624586450088,
      "loss": 3.4567,
      "step": 4391
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2559831142425537,
      "learning_rate": 0.00059946221385253,
      "loss": 3.2649,
      "step": 4392
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6044970750808716,
      "learning_rate": 0.0005994619690043755,
      "loss": 3.1609,
      "step": 4393
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8899883031845093,
      "learning_rate": 0.0005994617241005453,
      "loss": 3.4514,
      "step": 4394
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6023693084716797,
      "learning_rate": 0.0005994614791410395,
      "loss": 2.986,
      "step": 4395
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.048088312149048,
      "learning_rate": 0.0005994612341258583,
      "loss": 3.4942,
      "step": 4396
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8583327531814575,
      "learning_rate": 0.0005994609890550015,
      "loss": 3.2334,
      "step": 4397
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.722353219985962,
      "learning_rate": 0.0005994607439284693,
      "loss": 3.3481,
      "step": 4398
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.570928931236267,
      "learning_rate": 0.0005994604987462616,
      "loss": 3.3484,
      "step": 4399
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8716514110565186,
      "learning_rate": 0.0005994602535083787,
      "loss": 3.21,
      "step": 4400
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5175389051437378,
      "learning_rate": 0.0005994600082148204,
      "loss": 3.6076,
      "step": 4401
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.6140377521514893,
      "learning_rate": 0.0005994597628655869,
      "loss": 3.3535,
      "step": 4402
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.24546217918396,
      "learning_rate": 0.000599459517460678,
      "loss": 3.5562,
      "step": 4403
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7354209423065186,
      "learning_rate": 0.0005994592720000941,
      "loss": 3.2383,
      "step": 4404
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9540727138519287,
      "learning_rate": 0.000599459026483835,
      "loss": 3.3486,
      "step": 4405
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6230777502059937,
      "learning_rate": 0.0005994587809119008,
      "loss": 3.3797,
      "step": 4406
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.7510533332824707,
      "learning_rate": 0.0005994585352842916,
      "loss": 3.4732,
      "step": 4407
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.199061870574951,
      "learning_rate": 0.0005994582896010073,
      "loss": 3.3634,
      "step": 4408
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.624401569366455,
      "learning_rate": 0.0005994580438620482,
      "loss": 3.3863,
      "step": 4409
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7563276290893555,
      "learning_rate": 0.0005994577980674141,
      "loss": 3.4932,
      "step": 4410
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9599565267562866,
      "learning_rate": 0.0005994575522171052,
      "loss": 3.3981,
      "step": 4411
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7355631589889526,
      "learning_rate": 0.0005994573063111215,
      "loss": 3.3482,
      "step": 4412
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8869777917861938,
      "learning_rate": 0.0005994570603494629,
      "loss": 3.1214,
      "step": 4413
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.601495623588562,
      "learning_rate": 0.0005994568143321297,
      "loss": 3.5661,
      "step": 4414
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5585670471191406,
      "learning_rate": 0.0005994565682591219,
      "loss": 3.4749,
      "step": 4415
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6264598369598389,
      "learning_rate": 0.0005994563221304393,
      "loss": 3.3538,
      "step": 4416
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7674599885940552,
      "learning_rate": 0.0005994560759460822,
      "loss": 3.4486,
      "step": 4417
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4866607189178467,
      "learning_rate": 0.0005994558297060505,
      "loss": 3.5284,
      "step": 4418
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6212867498397827,
      "learning_rate": 0.0005994555834103444,
      "loss": 3.0314,
      "step": 4419
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.9776740074157715,
      "learning_rate": 0.0005994553370589639,
      "loss": 3.4858,
      "step": 4420
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.3961780071258545,
      "learning_rate": 0.0005994550906519089,
      "loss": 3.2455,
      "step": 4421
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.020004987716675,
      "learning_rate": 0.0005994548441891795,
      "loss": 3.1755,
      "step": 4422
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4423105716705322,
      "learning_rate": 0.000599454597670776,
      "loss": 3.4737,
      "step": 4423
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.3391435146331787,
      "learning_rate": 0.000599454351096698,
      "loss": 3.5387,
      "step": 4424
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.3462257385253906,
      "learning_rate": 0.0005994541044669459,
      "loss": 3.5139,
      "step": 4425
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.505847454071045,
      "learning_rate": 0.0005994538577815195,
      "loss": 3.3421,
      "step": 4426
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7720515727996826,
      "learning_rate": 0.0005994536110404191,
      "loss": 3.6675,
      "step": 4427
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.4834699630737305,
      "learning_rate": 0.0005994533642436447,
      "loss": 3.4419,
      "step": 4428
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.550214171409607,
      "learning_rate": 0.0005994531173911961,
      "loss": 3.393,
      "step": 4429
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1764650344848633,
      "learning_rate": 0.0005994528704830735,
      "loss": 3.4163,
      "step": 4430
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.9111642837524414,
      "learning_rate": 0.0005994526235192771,
      "loss": 3.6982,
      "step": 4431
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.086846351623535,
      "learning_rate": 0.0005994523764998068,
      "loss": 3.5034,
      "step": 4432
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9869388341903687,
      "learning_rate": 0.0005994521294246625,
      "loss": 3.2349,
      "step": 4433
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6009238958358765,
      "learning_rate": 0.0005994518822938446,
      "loss": 3.438,
      "step": 4434
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9604511260986328,
      "learning_rate": 0.0005994516351073528,
      "loss": 3.3453,
      "step": 4435
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.536247730255127,
      "learning_rate": 0.0005994513878651873,
      "loss": 3.4829,
      "step": 4436
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.206937551498413,
      "learning_rate": 0.0005994511405673482,
      "loss": 3.4345,
      "step": 4437
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6468851566314697,
      "learning_rate": 0.0005994508932138353,
      "loss": 3.5403,
      "step": 4438
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.228961706161499,
      "learning_rate": 0.0005994506458046491,
      "loss": 3.5075,
      "step": 4439
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.840543270111084,
      "learning_rate": 0.0005994503983397892,
      "loss": 3.5052,
      "step": 4440
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.3603384494781494,
      "learning_rate": 0.0005994501508192558,
      "loss": 3.3298,
      "step": 4441
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3477452993392944,
      "learning_rate": 0.0005994499032430491,
      "loss": 3.258,
      "step": 4442
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3648850917816162,
      "learning_rate": 0.0005994496556111689,
      "loss": 3.2135,
      "step": 4443
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.664811372756958,
      "learning_rate": 0.0005994494079236154,
      "loss": 3.2359,
      "step": 4444
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7797428369522095,
      "learning_rate": 0.0005994491601803886,
      "loss": 3.5181,
      "step": 4445
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5469906330108643,
      "learning_rate": 0.0005994489123814885,
      "loss": 3.2295,
      "step": 4446
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7964063882827759,
      "learning_rate": 0.0005994486645269153,
      "loss": 3.384,
      "step": 4447
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4632811546325684,
      "learning_rate": 0.0005994484166166688,
      "loss": 3.3802,
      "step": 4448
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2710206508636475,
      "learning_rate": 0.0005994481686507493,
      "loss": 3.4901,
      "step": 4449
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0477027893066406,
      "learning_rate": 0.0005994479206291567,
      "loss": 3.4387,
      "step": 4450
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5342878103256226,
      "learning_rate": 0.000599447672551891,
      "loss": 3.3445,
      "step": 4451
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6780719757080078,
      "learning_rate": 0.0005994474244189525,
      "loss": 3.3134,
      "step": 4452
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0936267375946045,
      "learning_rate": 0.0005994471762303409,
      "loss": 3.1994,
      "step": 4453
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5549273490905762,
      "learning_rate": 0.0005994469279860565,
      "loss": 3.3431,
      "step": 4454
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.6239447593688965,
      "learning_rate": 0.0005994466796860993,
      "loss": 3.6222,
      "step": 4455
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.3707075119018555,
      "learning_rate": 0.0005994464313304693,
      "loss": 3.1535,
      "step": 4456
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6669328212738037,
      "learning_rate": 0.0005994461829191664,
      "loss": 3.4279,
      "step": 4457
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.4369053840637207,
      "learning_rate": 0.000599445934452191,
      "loss": 3.3147,
      "step": 4458
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.5696706771850586,
      "learning_rate": 0.0005994456859295428,
      "loss": 3.2965,
      "step": 4459
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5419434309005737,
      "learning_rate": 0.0005994454373512221,
      "loss": 3.4873,
      "step": 4460
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.7601699829101562,
      "learning_rate": 0.0005994451887172288,
      "loss": 3.3685,
      "step": 4461
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.621070623397827,
      "learning_rate": 0.0005994449400275629,
      "loss": 3.2694,
      "step": 4462
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.64604651927948,
      "learning_rate": 0.0005994446912822247,
      "loss": 3.3457,
      "step": 4463
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0350534915924072,
      "learning_rate": 0.000599444442481214,
      "loss": 3.7279,
      "step": 4464
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.3185946941375732,
      "learning_rate": 0.0005994441936245309,
      "loss": 3.2364,
      "step": 4465
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9482266902923584,
      "learning_rate": 0.0005994439447121755,
      "loss": 3.1914,
      "step": 4466
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2467732429504395,
      "learning_rate": 0.0005994436957441478,
      "loss": 3.4296,
      "step": 4467
    },
    {
      "epoch": 0.06,
      "grad_norm": 4.3912882804870605,
      "learning_rate": 0.0005994434467204478,
      "loss": 3.225,
      "step": 4468
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.17338490486145,
      "learning_rate": 0.0005994431976410758,
      "loss": 3.0932,
      "step": 4469
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8547838926315308,
      "learning_rate": 0.0005994429485060316,
      "loss": 3.3893,
      "step": 4470
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8801393508911133,
      "learning_rate": 0.0005994426993153151,
      "loss": 3.4465,
      "step": 4471
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9158681631088257,
      "learning_rate": 0.0005994424500689268,
      "loss": 3.5464,
      "step": 4472
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.814811110496521,
      "learning_rate": 0.0005994422007668664,
      "loss": 3.3125,
      "step": 4473
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5200951099395752,
      "learning_rate": 0.000599441951409134,
      "loss": 3.6563,
      "step": 4474
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7813506126403809,
      "learning_rate": 0.0005994417019957298,
      "loss": 3.3414,
      "step": 4475
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6610493659973145,
      "learning_rate": 0.0005994414525266537,
      "loss": 3.2517,
      "step": 4476
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5971072912216187,
      "learning_rate": 0.0005994412030019057,
      "loss": 3.4539,
      "step": 4477
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9231834411621094,
      "learning_rate": 0.0005994409534214859,
      "loss": 3.5935,
      "step": 4478
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7970796823501587,
      "learning_rate": 0.0005994407037853945,
      "loss": 3.4398,
      "step": 4479
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.6191439628601074,
      "learning_rate": 0.0005994404540936314,
      "loss": 3.5751,
      "step": 4480
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.069927215576172,
      "learning_rate": 0.0005994402043461967,
      "loss": 3.6598,
      "step": 4481
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9444916248321533,
      "learning_rate": 0.0005994399545430902,
      "loss": 3.2333,
      "step": 4482
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8398687839508057,
      "learning_rate": 0.0005994397046843123,
      "loss": 3.1104,
      "step": 4483
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8868049383163452,
      "learning_rate": 0.0005994394547698629,
      "loss": 3.613,
      "step": 4484
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7966368198394775,
      "learning_rate": 0.0005994392047997421,
      "loss": 3.1534,
      "step": 4485
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8133183717727661,
      "learning_rate": 0.0005994389547739499,
      "loss": 3.4944,
      "step": 4486
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.3010811805725098,
      "learning_rate": 0.0005994387046924863,
      "loss": 3.2536,
      "step": 4487
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6304351091384888,
      "learning_rate": 0.0005994384545553515,
      "loss": 3.4663,
      "step": 4488
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1305019855499268,
      "learning_rate": 0.0005994382043625453,
      "loss": 3.4413,
      "step": 4489
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.3955488204956055,
      "learning_rate": 0.0005994379541140679,
      "loss": 3.386,
      "step": 4490
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6941879987716675,
      "learning_rate": 0.0005994377038099194,
      "loss": 3.247,
      "step": 4491
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4656847715377808,
      "learning_rate": 0.0005994374534500997,
      "loss": 3.1444,
      "step": 4492
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5322356224060059,
      "learning_rate": 0.000599437203034609,
      "loss": 3.0468,
      "step": 4493
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6293249130249023,
      "learning_rate": 0.0005994369525634472,
      "loss": 3.3215,
      "step": 4494
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.476518154144287,
      "learning_rate": 0.0005994367020366145,
      "loss": 3.1497,
      "step": 4495
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.474570870399475,
      "learning_rate": 0.0005994364514541108,
      "loss": 3.3008,
      "step": 4496
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8830574750900269,
      "learning_rate": 0.0005994362008159362,
      "loss": 3.5541,
      "step": 4497
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5659663677215576,
      "learning_rate": 0.0005994359501220909,
      "loss": 3.5343,
      "step": 4498
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.383995771408081,
      "learning_rate": 0.0005994356993725747,
      "loss": 3.261,
      "step": 4499
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4057652950286865,
      "learning_rate": 0.0005994354485673877,
      "loss": 3.1918,
      "step": 4500
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5877747535705566,
      "learning_rate": 0.00059943519770653,
      "loss": 3.2936,
      "step": 4501
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9220268726348877,
      "learning_rate": 0.0005994349467900017,
      "loss": 3.4706,
      "step": 4502
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6446155309677124,
      "learning_rate": 0.0005994346958178028,
      "loss": 3.1299,
      "step": 4503
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.620145559310913,
      "learning_rate": 0.0005994344447899333,
      "loss": 3.4222,
      "step": 4504
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4931846857070923,
      "learning_rate": 0.0005994341937063933,
      "loss": 3.4068,
      "step": 4505
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8022764921188354,
      "learning_rate": 0.0005994339425671829,
      "loss": 3.0713,
      "step": 4506
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5094082355499268,
      "learning_rate": 0.000599433691372302,
      "loss": 3.4364,
      "step": 4507
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8679100275039673,
      "learning_rate": 0.0005994334401217507,
      "loss": 3.2262,
      "step": 4508
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.099297046661377,
      "learning_rate": 0.0005994331888155291,
      "loss": 3.2344,
      "step": 4509
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1718599796295166,
      "learning_rate": 0.0005994329374536372,
      "loss": 3.5949,
      "step": 4510
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7306748628616333,
      "learning_rate": 0.0005994326860360751,
      "loss": 3.3312,
      "step": 4511
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6034783124923706,
      "learning_rate": 0.0005994324345628429,
      "loss": 3.287,
      "step": 4512
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.1131210327148438,
      "learning_rate": 0.0005994321830339403,
      "loss": 3.3135,
      "step": 4513
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6815860271453857,
      "learning_rate": 0.0005994319314493678,
      "loss": 3.4124,
      "step": 4514
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3910901546478271,
      "learning_rate": 0.0005994316798091253,
      "loss": 3.327,
      "step": 4515
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5467557907104492,
      "learning_rate": 0.0005994314281132127,
      "loss": 3.3783,
      "step": 4516
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.3906028270721436,
      "learning_rate": 0.0005994311763616301,
      "loss": 3.3614,
      "step": 4517
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3104935884475708,
      "learning_rate": 0.0005994309245543777,
      "loss": 3.4608,
      "step": 4518
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.708160400390625,
      "learning_rate": 0.0005994306726914555,
      "loss": 3.5208,
      "step": 4519
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.831813335418701,
      "learning_rate": 0.0005994304207728632,
      "loss": 3.3369,
      "step": 4520
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5084789991378784,
      "learning_rate": 0.0005994301687986014,
      "loss": 3.1042,
      "step": 4521
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.8442177772521973,
      "learning_rate": 0.0005994299167686697,
      "loss": 3.2314,
      "step": 4522
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8153020143508911,
      "learning_rate": 0.0005994296646830684,
      "loss": 3.2678,
      "step": 4523
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4840809106826782,
      "learning_rate": 0.0005994294125417976,
      "loss": 3.4822,
      "step": 4524
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.6931514739990234,
      "learning_rate": 0.000599429160344857,
      "loss": 3.2959,
      "step": 4525
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.3617043495178223,
      "learning_rate": 0.0005994289080922471,
      "loss": 3.1828,
      "step": 4526
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8630743026733398,
      "learning_rate": 0.0005994286557839675,
      "loss": 3.3736,
      "step": 4527
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.017622232437134,
      "learning_rate": 0.0005994284034200186,
      "loss": 3.1182,
      "step": 4528
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.228715419769287,
      "learning_rate": 0.0005994281510004003,
      "loss": 3.3927,
      "step": 4529
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.508368968963623,
      "learning_rate": 0.0005994278985251125,
      "loss": 3.4148,
      "step": 4530
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.066250801086426,
      "learning_rate": 0.0005994276459941556,
      "loss": 3.4899,
      "step": 4531
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.14821195602417,
      "learning_rate": 0.0005994273934075294,
      "loss": 3.2358,
      "step": 4532
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9109041690826416,
      "learning_rate": 0.0005994271407652339,
      "loss": 3.4219,
      "step": 4533
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0708513259887695,
      "learning_rate": 0.0005994268880672693,
      "loss": 3.3509,
      "step": 4534
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.097205638885498,
      "learning_rate": 0.0005994266353136356,
      "loss": 3.4789,
      "step": 4535
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.648154377937317,
      "learning_rate": 0.0005994263825043328,
      "loss": 3.5181,
      "step": 4536
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5721569061279297,
      "learning_rate": 0.000599426129639361,
      "loss": 3.3673,
      "step": 4537
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.9717068672180176,
      "learning_rate": 0.0005994258767187203,
      "loss": 3.3281,
      "step": 4538
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.5168330669403076,
      "learning_rate": 0.0005994256237424106,
      "loss": 3.311,
      "step": 4539
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6053333282470703,
      "learning_rate": 0.000599425370710432,
      "loss": 3.159,
      "step": 4540
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6522777080535889,
      "learning_rate": 0.0005994251176227847,
      "loss": 3.5078,
      "step": 4541
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9666879177093506,
      "learning_rate": 0.0005994248644794685,
      "loss": 3.4336,
      "step": 4542
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2321016788482666,
      "learning_rate": 0.0005994246112804836,
      "loss": 3.4263,
      "step": 4543
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7033088207244873,
      "learning_rate": 0.00059942435802583,
      "loss": 3.2077,
      "step": 4544
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.6923775672912598,
      "learning_rate": 0.0005994241047155077,
      "loss": 3.3199,
      "step": 4545
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.5060908794403076,
      "learning_rate": 0.0005994238513495169,
      "loss": 3.3842,
      "step": 4546
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.534805417060852,
      "learning_rate": 0.0005994235979278575,
      "loss": 3.2656,
      "step": 4547
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.647036075592041,
      "learning_rate": 0.0005994233444505297,
      "loss": 3.3072,
      "step": 4548
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.866492509841919,
      "learning_rate": 0.0005994230909175333,
      "loss": 3.2709,
      "step": 4549
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6743648052215576,
      "learning_rate": 0.0005994228373288686,
      "loss": 3.4427,
      "step": 4550
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.42806875705719,
      "learning_rate": 0.0005994225836845356,
      "loss": 3.3976,
      "step": 4551
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.225172996520996,
      "learning_rate": 0.0005994223299845341,
      "loss": 3.2192,
      "step": 4552
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4619934558868408,
      "learning_rate": 0.0005994220762288645,
      "loss": 3.4084,
      "step": 4553
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.273879289627075,
      "learning_rate": 0.0005994218224175266,
      "loss": 3.5591,
      "step": 4554
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8665839433670044,
      "learning_rate": 0.0005994215685505205,
      "loss": 3.1881,
      "step": 4555
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0278756618499756,
      "learning_rate": 0.0005994213146278465,
      "loss": 3.4847,
      "step": 4556
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7516217231750488,
      "learning_rate": 0.0005994210606495042,
      "loss": 3.2505,
      "step": 4557
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.3352296352386475,
      "learning_rate": 0.0005994208066154938,
      "loss": 3.4181,
      "step": 4558
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6182312965393066,
      "learning_rate": 0.0005994205525258156,
      "loss": 3.2626,
      "step": 4559
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.198678970336914,
      "learning_rate": 0.0005994202983804696,
      "loss": 3.4586,
      "step": 4560
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.981727957725525,
      "learning_rate": 0.0005994200441794555,
      "loss": 3.3296,
      "step": 4561
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6120460033416748,
      "learning_rate": 0.0005994197899227736,
      "loss": 3.4111,
      "step": 4562
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5421202182769775,
      "learning_rate": 0.0005994195356104239,
      "loss": 3.2638,
      "step": 4563
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.1019227504730225,
      "learning_rate": 0.0005994192812424064,
      "loss": 3.3782,
      "step": 4564
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.7852513790130615,
      "learning_rate": 0.0005994190268187212,
      "loss": 3.4294,
      "step": 4565
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9491844177246094,
      "learning_rate": 0.0005994187723393685,
      "loss": 3.2544,
      "step": 4566
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7603824138641357,
      "learning_rate": 0.0005994185178043481,
      "loss": 3.1191,
      "step": 4567
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6771492958068848,
      "learning_rate": 0.0005994182632136602,
      "loss": 3.2957,
      "step": 4568
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6811476945877075,
      "learning_rate": 0.0005994180085673047,
      "loss": 3.574,
      "step": 4569
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5599546432495117,
      "learning_rate": 0.0005994177538652818,
      "loss": 3.1607,
      "step": 4570
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2307815551757812,
      "learning_rate": 0.0005994174991075915,
      "loss": 3.4952,
      "step": 4571
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6013346910476685,
      "learning_rate": 0.0005994172442942338,
      "loss": 3.4705,
      "step": 4572
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4176312685012817,
      "learning_rate": 0.0005994169894252088,
      "loss": 3.4185,
      "step": 4573
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.16131329536438,
      "learning_rate": 0.0005994167345005166,
      "loss": 3.239,
      "step": 4574
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8031694889068604,
      "learning_rate": 0.0005994164795201571,
      "loss": 3.0813,
      "step": 4575
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.61943519115448,
      "learning_rate": 0.0005994162244841304,
      "loss": 3.3732,
      "step": 4576
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5776071548461914,
      "learning_rate": 0.0005994159693924366,
      "loss": 3.1843,
      "step": 4577
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.304657220840454,
      "learning_rate": 0.0005994157142450757,
      "loss": 3.3599,
      "step": 4578
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3437215089797974,
      "learning_rate": 0.0005994154590420478,
      "loss": 3.3684,
      "step": 4579
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.870524287223816,
      "learning_rate": 0.000599415203783353,
      "loss": 3.3645,
      "step": 4580
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7952014207839966,
      "learning_rate": 0.0005994149484689911,
      "loss": 3.2355,
      "step": 4581
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2009024620056152,
      "learning_rate": 0.0005994146930989624,
      "loss": 3.4124,
      "step": 4582
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7093992233276367,
      "learning_rate": 0.0005994144376732668,
      "loss": 3.2692,
      "step": 4583
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.311417579650879,
      "learning_rate": 0.0005994141821919045,
      "loss": 3.3819,
      "step": 4584
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7595754861831665,
      "learning_rate": 0.0005994139266548754,
      "loss": 3.369,
      "step": 4585
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8440102338790894,
      "learning_rate": 0.0005994136710621795,
      "loss": 3.2499,
      "step": 4586
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7834759950637817,
      "learning_rate": 0.0005994134154138171,
      "loss": 3.4827,
      "step": 4587
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5457355976104736,
      "learning_rate": 0.0005994131597097881,
      "loss": 3.3611,
      "step": 4588
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6391514539718628,
      "learning_rate": 0.0005994129039500923,
      "loss": 3.3041,
      "step": 4589
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5946013927459717,
      "learning_rate": 0.0005994126481347303,
      "loss": 3.4884,
      "step": 4590
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9998881816864014,
      "learning_rate": 0.0005994123922637016,
      "loss": 3.3495,
      "step": 4591
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6916561126708984,
      "learning_rate": 0.0005994121363370066,
      "loss": 3.4843,
      "step": 4592
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7259918451309204,
      "learning_rate": 0.0005994118803546453,
      "loss": 3.3098,
      "step": 4593
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8095673322677612,
      "learning_rate": 0.0005994116243166176,
      "loss": 3.1271,
      "step": 4594
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6806933879852295,
      "learning_rate": 0.0005994113682229236,
      "loss": 3.2513,
      "step": 4595
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1171152591705322,
      "learning_rate": 0.0005994111120735633,
      "loss": 3.3989,
      "step": 4596
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.7868263721466064,
      "learning_rate": 0.0005994108558685369,
      "loss": 3.3603,
      "step": 4597
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6631358861923218,
      "learning_rate": 0.0005994105996078444,
      "loss": 3.2811,
      "step": 4598
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.722188115119934,
      "learning_rate": 0.0005994103432914858,
      "loss": 3.3596,
      "step": 4599
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.5999820232391357,
      "learning_rate": 0.0005994100869194612,
      "loss": 3.3796,
      "step": 4600
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0500872135162354,
      "learning_rate": 0.0005994098304917706,
      "loss": 3.2474,
      "step": 4601
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.114617109298706,
      "learning_rate": 0.0005994095740084141,
      "loss": 3.5518,
      "step": 4602
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1282455921173096,
      "learning_rate": 0.0005994093174693916,
      "loss": 3.4319,
      "step": 4603
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7315245866775513,
      "learning_rate": 0.0005994090608747034,
      "loss": 3.5018,
      "step": 4604
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8169612884521484,
      "learning_rate": 0.0005994088042243492,
      "loss": 3.3337,
      "step": 4605
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6581746339797974,
      "learning_rate": 0.0005994085475183295,
      "loss": 3.4203,
      "step": 4606
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.845267653465271,
      "learning_rate": 0.0005994082907566439,
      "loss": 3.1483,
      "step": 4607
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9387456178665161,
      "learning_rate": 0.0005994080339392926,
      "loss": 3.4458,
      "step": 4608
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6245684623718262,
      "learning_rate": 0.0005994077770662758,
      "loss": 3.1649,
      "step": 4609
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.819957733154297,
      "learning_rate": 0.0005994075201375935,
      "loss": 3.4621,
      "step": 4610
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6490724086761475,
      "learning_rate": 0.0005994072631532457,
      "loss": 3.1487,
      "step": 4611
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.945177674293518,
      "learning_rate": 0.0005994070061132324,
      "loss": 3.1099,
      "step": 4612
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.706141710281372,
      "learning_rate": 0.0005994067490175535,
      "loss": 3.3779,
      "step": 4613
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.330232620239258,
      "learning_rate": 0.0005994064918662095,
      "loss": 3.2322,
      "step": 4614
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7155885696411133,
      "learning_rate": 0.0005994062346592001,
      "loss": 3.3984,
      "step": 4615
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7803435325622559,
      "learning_rate": 0.0005994059773965254,
      "loss": 3.4636,
      "step": 4616
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5260059833526611,
      "learning_rate": 0.0005994057200781855,
      "loss": 3.2206,
      "step": 4617
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6459357738494873,
      "learning_rate": 0.0005994054627041803,
      "loss": 3.1982,
      "step": 4618
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7637444734573364,
      "learning_rate": 0.0005994052052745101,
      "loss": 3.3161,
      "step": 4619
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4528766870498657,
      "learning_rate": 0.0005994049477891748,
      "loss": 3.3547,
      "step": 4620
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.850034236907959,
      "learning_rate": 0.0005994046902481745,
      "loss": 3.2087,
      "step": 4621
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7637262344360352,
      "learning_rate": 0.0005994044326515092,
      "loss": 3.5292,
      "step": 4622
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3706296682357788,
      "learning_rate": 0.0005994041749991789,
      "loss": 3.2447,
      "step": 4623
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7350492477416992,
      "learning_rate": 0.0005994039172911839,
      "loss": 3.1158,
      "step": 4624
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9609390497207642,
      "learning_rate": 0.0005994036595275238,
      "loss": 3.4934,
      "step": 4625
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8622812032699585,
      "learning_rate": 0.000599403401708199,
      "loss": 3.2198,
      "step": 4626
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2952888011932373,
      "learning_rate": 0.0005994031438332095,
      "loss": 3.4672,
      "step": 4627
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1745104789733887,
      "learning_rate": 0.0005994028859025554,
      "loss": 3.1938,
      "step": 4628
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.122053384780884,
      "learning_rate": 0.0005994026279162365,
      "loss": 3.095,
      "step": 4629
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.7338626384735107,
      "learning_rate": 0.0005994023698742529,
      "loss": 3.3841,
      "step": 4630
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.5949556827545166,
      "learning_rate": 0.000599402111776605,
      "loss": 3.3414,
      "step": 4631
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1011781692504883,
      "learning_rate": 0.0005994018536232924,
      "loss": 3.4894,
      "step": 4632
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.5771288871765137,
      "learning_rate": 0.0005994015954143156,
      "loss": 3.2718,
      "step": 4633
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3663742542266846,
      "learning_rate": 0.0005994013371496741,
      "loss": 3.2615,
      "step": 4634
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.287687301635742,
      "learning_rate": 0.0005994010788293684,
      "loss": 3.4259,
      "step": 4635
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.897855520248413,
      "learning_rate": 0.0005994008204533984,
      "loss": 3.1582,
      "step": 4636
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5015654563903809,
      "learning_rate": 0.000599400562021764,
      "loss": 3.4268,
      "step": 4637
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3545197248458862,
      "learning_rate": 0.0005994003035344655,
      "loss": 3.2319,
      "step": 4638
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.097578287124634,
      "learning_rate": 0.0005994000449915028,
      "loss": 3.3915,
      "step": 4639
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5386844873428345,
      "learning_rate": 0.000599399786392876,
      "loss": 3.1719,
      "step": 4640
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7831659317016602,
      "learning_rate": 0.0005993995277385851,
      "loss": 3.3134,
      "step": 4641
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.314674139022827,
      "learning_rate": 0.0005993992690286301,
      "loss": 3.3953,
      "step": 4642
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.237194061279297,
      "learning_rate": 0.0005993990102630113,
      "loss": 3.4685,
      "step": 4643
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.560716152191162,
      "learning_rate": 0.0005993987514417284,
      "loss": 3.3904,
      "step": 4644
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.883893370628357,
      "learning_rate": 0.0005993984925647817,
      "loss": 3.3012,
      "step": 4645
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.1470723152160645,
      "learning_rate": 0.0005993982336321712,
      "loss": 3.5652,
      "step": 4646
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.464369297027588,
      "learning_rate": 0.0005993979746438969,
      "loss": 3.4035,
      "step": 4647
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9074839353561401,
      "learning_rate": 0.0005993977155999588,
      "loss": 3.5173,
      "step": 4648
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0457751750946045,
      "learning_rate": 0.0005993974565003571,
      "loss": 3.3043,
      "step": 4649
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5889639854431152,
      "learning_rate": 0.0005993971973450918,
      "loss": 3.5739,
      "step": 4650
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.604130506515503,
      "learning_rate": 0.0005993969381341628,
      "loss": 3.0489,
      "step": 4651
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1288375854492188,
      "learning_rate": 0.0005993966788675703,
      "loss": 2.9872,
      "step": 4652
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.5972001552581787,
      "learning_rate": 0.0005993964195453144,
      "loss": 3.2987,
      "step": 4653
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9712026119232178,
      "learning_rate": 0.0005993961601673948,
      "loss": 3.5101,
      "step": 4654
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8732398748397827,
      "learning_rate": 0.000599395900733812,
      "loss": 3.2818,
      "step": 4655
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0500926971435547,
      "learning_rate": 0.0005993956412445659,
      "loss": 3.4517,
      "step": 4656
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6912373304367065,
      "learning_rate": 0.0005993953816996563,
      "loss": 3.4002,
      "step": 4657
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8740105628967285,
      "learning_rate": 0.0005993951220990836,
      "loss": 3.5189,
      "step": 4658
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.407792568206787,
      "learning_rate": 0.0005993948624428477,
      "loss": 3.6882,
      "step": 4659
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.7056314945220947,
      "learning_rate": 0.0005993946027309485,
      "loss": 3.1804,
      "step": 4660
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.134169340133667,
      "learning_rate": 0.0005993943429633864,
      "loss": 3.343,
      "step": 4661
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6260714530944824,
      "learning_rate": 0.000599394083140161,
      "loss": 3.2797,
      "step": 4662
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8586032390594482,
      "learning_rate": 0.0005993938232612727,
      "loss": 3.2943,
      "step": 4663
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.375706195831299,
      "learning_rate": 0.0005993935633267215,
      "loss": 3.3247,
      "step": 4664
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7510892152786255,
      "learning_rate": 0.0005993933033365073,
      "loss": 3.4288,
      "step": 4665
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4980930089950562,
      "learning_rate": 0.0005993930432906302,
      "loss": 3.438,
      "step": 4666
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.924323320388794,
      "learning_rate": 0.0005993927831890904,
      "loss": 3.5072,
      "step": 4667
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.4543187618255615,
      "learning_rate": 0.0005993925230318878,
      "loss": 3.2461,
      "step": 4668
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6091177463531494,
      "learning_rate": 0.0005993922628190225,
      "loss": 3.3949,
      "step": 4669
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.5747883319854736,
      "learning_rate": 0.0005993920025504945,
      "loss": 3.3432,
      "step": 4670
    },
    {
      "epoch": 0.06,
      "grad_norm": 4.739680767059326,
      "learning_rate": 0.0005993917422263039,
      "loss": 3.0082,
      "step": 4671
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.7604260444641113,
      "learning_rate": 0.0005993914818464507,
      "loss": 3.5201,
      "step": 4672
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4511921405792236,
      "learning_rate": 0.0005993912214109348,
      "loss": 3.4931,
      "step": 4673
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.370635986328125,
      "learning_rate": 0.0005993909609197566,
      "loss": 3.4014,
      "step": 4674
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.6384661197662354,
      "learning_rate": 0.000599390700372916,
      "loss": 3.1761,
      "step": 4675
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.187063694000244,
      "learning_rate": 0.000599390439770413,
      "loss": 3.4594,
      "step": 4676
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.598756194114685,
      "learning_rate": 0.0005993901791122476,
      "loss": 3.4901,
      "step": 4677
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.6868133544921875,
      "learning_rate": 0.0005993899183984199,
      "loss": 3.3172,
      "step": 4678
    },
    {
      "epoch": 0.06,
      "grad_norm": 4.6399993896484375,
      "learning_rate": 0.00059938965762893,
      "loss": 3.2557,
      "step": 4679
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1693625450134277,
      "learning_rate": 0.000599389396803778,
      "loss": 3.4706,
      "step": 4680
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6978342533111572,
      "learning_rate": 0.0005993891359229637,
      "loss": 3.1781,
      "step": 4681
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.5713250637054443,
      "learning_rate": 0.0005993888749864874,
      "loss": 3.2748,
      "step": 4682
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.9578325748443604,
      "learning_rate": 0.000599388613994349,
      "loss": 2.9873,
      "step": 4683
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.0172369480133057,
      "learning_rate": 0.0005993883529465487,
      "loss": 3.5243,
      "step": 4684
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.817117691040039,
      "learning_rate": 0.0005993880918430864,
      "loss": 3.3969,
      "step": 4685
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.744447946548462,
      "learning_rate": 0.0005993878306839622,
      "loss": 3.276,
      "step": 4686
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.4337873458862305,
      "learning_rate": 0.0005993875694691762,
      "loss": 3.2553,
      "step": 4687
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.6280949115753174,
      "learning_rate": 0.0005993873081987283,
      "loss": 3.4131,
      "step": 4688
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7969379425048828,
      "learning_rate": 0.0005993870468726187,
      "loss": 3.3436,
      "step": 4689
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.001922845840454,
      "learning_rate": 0.0005993867854908473,
      "loss": 3.2772,
      "step": 4690
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.7885031700134277,
      "learning_rate": 0.0005993865240534144,
      "loss": 3.2775,
      "step": 4691
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.7706003189086914,
      "learning_rate": 0.0005993862625603199,
      "loss": 3.3391,
      "step": 4692
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.359786868095398,
      "learning_rate": 0.0005993860010115637,
      "loss": 3.2354,
      "step": 4693
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.880361795425415,
      "learning_rate": 0.0005993857394071461,
      "loss": 3.3726,
      "step": 4694
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.496460199356079,
      "learning_rate": 0.000599385477747067,
      "loss": 3.373,
      "step": 4695
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4950581789016724,
      "learning_rate": 0.0005993852160313265,
      "loss": 3.3785,
      "step": 4696
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7216349840164185,
      "learning_rate": 0.0005993849542599247,
      "loss": 3.1022,
      "step": 4697
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.590372085571289,
      "learning_rate": 0.0005993846924328615,
      "loss": 3.1926,
      "step": 4698
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7100948095321655,
      "learning_rate": 0.000599384430550137,
      "loss": 3.0741,
      "step": 4699
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9709758758544922,
      "learning_rate": 0.0005993841686117514,
      "loss": 3.3067,
      "step": 4700
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3085055351257324,
      "learning_rate": 0.0005993839066177045,
      "loss": 3.3252,
      "step": 4701
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.129819631576538,
      "learning_rate": 0.0005993836445679966,
      "loss": 3.3712,
      "step": 4702
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.3558638095855713,
      "learning_rate": 0.0005993833824626276,
      "loss": 3.3722,
      "step": 4703
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4958136081695557,
      "learning_rate": 0.0005993831203015976,
      "loss": 3.3562,
      "step": 4704
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.599662184715271,
      "learning_rate": 0.0005993828580849065,
      "loss": 3.265,
      "step": 4705
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.690078020095825,
      "learning_rate": 0.0005993825958125545,
      "loss": 3.4437,
      "step": 4706
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6256821155548096,
      "learning_rate": 0.0005993823334845417,
      "loss": 3.544,
      "step": 4707
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2180984020233154,
      "learning_rate": 0.0005993820711008681,
      "loss": 3.3213,
      "step": 4708
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1856508255004883,
      "learning_rate": 0.0005993818086615337,
      "loss": 3.4225,
      "step": 4709
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6754635572433472,
      "learning_rate": 0.0005993815461665386,
      "loss": 3.3923,
      "step": 4710
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.2655030488967896,
      "learning_rate": 0.0005993812836158827,
      "loss": 3.0397,
      "step": 4711
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.395824432373047,
      "learning_rate": 0.0005993810210095662,
      "loss": 3.4016,
      "step": 4712
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.919543981552124,
      "learning_rate": 0.0005993807583475892,
      "loss": 3.4993,
      "step": 4713
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.740169882774353,
      "learning_rate": 0.0005993804956299516,
      "loss": 3.4957,
      "step": 4714
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.178563117980957,
      "learning_rate": 0.0005993802328566536,
      "loss": 3.2251,
      "step": 4715
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0015501976013184,
      "learning_rate": 0.000599379970027695,
      "loss": 3.2938,
      "step": 4716
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3516870737075806,
      "learning_rate": 0.0005993797071430761,
      "loss": 3.7289,
      "step": 4717
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6438710689544678,
      "learning_rate": 0.000599379444202797,
      "loss": 3.3161,
      "step": 4718
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5496777296066284,
      "learning_rate": 0.0005993791812068574,
      "loss": 3.3254,
      "step": 4719
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6124918460845947,
      "learning_rate": 0.0005993789181552577,
      "loss": 3.4731,
      "step": 4720
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4689836502075195,
      "learning_rate": 0.0005993786550479977,
      "loss": 3.4876,
      "step": 4721
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4581764936447144,
      "learning_rate": 0.0005993783918850776,
      "loss": 3.4912,
      "step": 4722
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8855572938919067,
      "learning_rate": 0.0005993781286664975,
      "loss": 3.4716,
      "step": 4723
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9976205825805664,
      "learning_rate": 0.0005993778653922572,
      "loss": 3.6737,
      "step": 4724
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2269105911254883,
      "learning_rate": 0.0005993776020623569,
      "loss": 3.5287,
      "step": 4725
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.385941505432129,
      "learning_rate": 0.0005993773386767968,
      "loss": 3.3276,
      "step": 4726
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9028645753860474,
      "learning_rate": 0.0005993770752355767,
      "loss": 3.4793,
      "step": 4727
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9127277135849,
      "learning_rate": 0.0005993768117386968,
      "loss": 3.0774,
      "step": 4728
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5225985050201416,
      "learning_rate": 0.0005993765481861569,
      "loss": 3.4209,
      "step": 4729
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6082087755203247,
      "learning_rate": 0.0005993762845779575,
      "loss": 3.2257,
      "step": 4730
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5821009874343872,
      "learning_rate": 0.0005993760209140983,
      "loss": 3.3785,
      "step": 4731
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7490209341049194,
      "learning_rate": 0.0005993757571945796,
      "loss": 3.2032,
      "step": 4732
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.580628514289856,
      "learning_rate": 0.000599375493419401,
      "loss": 3.3843,
      "step": 4733
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6072207689285278,
      "learning_rate": 0.0005993752295885631,
      "loss": 3.2803,
      "step": 4734
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.4364373683929443,
      "learning_rate": 0.0005993749657020656,
      "loss": 3.3159,
      "step": 4735
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.662405252456665,
      "learning_rate": 0.0005993747017599087,
      "loss": 3.4961,
      "step": 4736
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7352391481399536,
      "learning_rate": 0.0005993744377620923,
      "loss": 3.3901,
      "step": 4737
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6366020441055298,
      "learning_rate": 0.0005993741737086166,
      "loss": 3.587,
      "step": 4738
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3633780479431152,
      "learning_rate": 0.0005993739095994815,
      "loss": 3.2209,
      "step": 4739
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3880480527877808,
      "learning_rate": 0.0005993736454346873,
      "loss": 3.2869,
      "step": 4740
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.53001868724823,
      "learning_rate": 0.0005993733812142337,
      "loss": 3.3136,
      "step": 4741
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9097177982330322,
      "learning_rate": 0.000599373116938121,
      "loss": 3.3835,
      "step": 4742
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4696370363235474,
      "learning_rate": 0.0005993728526063493,
      "loss": 3.5202,
      "step": 4743
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8838955163955688,
      "learning_rate": 0.0005993725882189184,
      "loss": 3.2808,
      "step": 4744
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8726904392242432,
      "learning_rate": 0.0005993723237758286,
      "loss": 3.4871,
      "step": 4745
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6529556512832642,
      "learning_rate": 0.0005993720592770797,
      "loss": 3.4973,
      "step": 4746
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6434415578842163,
      "learning_rate": 0.0005993717947226719,
      "loss": 3.325,
      "step": 4747
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4303990602493286,
      "learning_rate": 0.0005993715301126054,
      "loss": 3.4933,
      "step": 4748
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7105648517608643,
      "learning_rate": 0.0005993712654468799,
      "loss": 3.3292,
      "step": 4749
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7538461685180664,
      "learning_rate": 0.0005993710007254957,
      "loss": 3.4385,
      "step": 4750
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7273281812667847,
      "learning_rate": 0.0005993707359484528,
      "loss": 3.4448,
      "step": 4751
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6718416213989258,
      "learning_rate": 0.0005993704711157511,
      "loss": 3.6302,
      "step": 4752
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5450760126113892,
      "learning_rate": 0.000599370206227391,
      "loss": 3.2777,
      "step": 4753
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4193956851959229,
      "learning_rate": 0.0005993699412833722,
      "loss": 3.2393,
      "step": 4754
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7957344055175781,
      "learning_rate": 0.0005993696762836948,
      "loss": 3.536,
      "step": 4755
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2330195903778076,
      "learning_rate": 0.0005993694112283591,
      "loss": 3.2047,
      "step": 4756
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1798665523529053,
      "learning_rate": 0.0005993691461173648,
      "loss": 3.2002,
      "step": 4757
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8782610893249512,
      "learning_rate": 0.0005993688809507123,
      "loss": 3.516,
      "step": 4758
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.853670358657837,
      "learning_rate": 0.0005993686157284014,
      "loss": 3.4474,
      "step": 4759
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5838191509246826,
      "learning_rate": 0.0005993683504504322,
      "loss": 3.2957,
      "step": 4760
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9085651636123657,
      "learning_rate": 0.0005993680851168049,
      "loss": 3.4038,
      "step": 4761
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.438489317893982,
      "learning_rate": 0.0005993678197275192,
      "loss": 3.2589,
      "step": 4762
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.039750576019287,
      "learning_rate": 0.0005993675542825755,
      "loss": 3.3972,
      "step": 4763
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5394874811172485,
      "learning_rate": 0.0005993672887819737,
      "loss": 3.2391,
      "step": 4764
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8587232828140259,
      "learning_rate": 0.0005993670232257138,
      "loss": 3.3059,
      "step": 4765
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6215897798538208,
      "learning_rate": 0.0005993667576137961,
      "loss": 3.3873,
      "step": 4766
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.391958236694336,
      "learning_rate": 0.0005993664919462202,
      "loss": 3.3704,
      "step": 4767
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8269789218902588,
      "learning_rate": 0.0005993662262229867,
      "loss": 3.2711,
      "step": 4768
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.960008144378662,
      "learning_rate": 0.0005993659604440953,
      "loss": 3.2585,
      "step": 4769
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.499354839324951,
      "learning_rate": 0.0005993656946095459,
      "loss": 3.4202,
      "step": 4770
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1347901821136475,
      "learning_rate": 0.000599365428719339,
      "loss": 3.5312,
      "step": 4771
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9340146780014038,
      "learning_rate": 0.0005993651627734743,
      "loss": 3.292,
      "step": 4772
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7641680240631104,
      "learning_rate": 0.0005993648967719521,
      "loss": 3.3541,
      "step": 4773
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4271944761276245,
      "learning_rate": 0.0005993646307147722,
      "loss": 3.2617,
      "step": 4774
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4572144746780396,
      "learning_rate": 0.0005993643646019347,
      "loss": 3.4985,
      "step": 4775
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5743259191513062,
      "learning_rate": 0.0005993640984334398,
      "loss": 3.3392,
      "step": 4776
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7742877006530762,
      "learning_rate": 0.0005993638322092875,
      "loss": 3.044,
      "step": 4777
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7849622964859009,
      "learning_rate": 0.0005993635659294779,
      "loss": 3.2082,
      "step": 4778
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.523438572883606,
      "learning_rate": 0.0005993632995940107,
      "loss": 3.2308,
      "step": 4779
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0422682762145996,
      "learning_rate": 0.0005993630332028863,
      "loss": 3.6396,
      "step": 4780
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6473302841186523,
      "learning_rate": 0.0005993627667561048,
      "loss": 3.2005,
      "step": 4781
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8395239114761353,
      "learning_rate": 0.000599362500253666,
      "loss": 3.333,
      "step": 4782
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.758003830909729,
      "learning_rate": 0.0005993622336955702,
      "loss": 3.5029,
      "step": 4783
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.996251106262207,
      "learning_rate": 0.000599361967081817,
      "loss": 3.262,
      "step": 4784
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7596595287322998,
      "learning_rate": 0.0005993617004124071,
      "loss": 3.0604,
      "step": 4785
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.445385456085205,
      "learning_rate": 0.00059936143368734,
      "loss": 3.3954,
      "step": 4786
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5451595783233643,
      "learning_rate": 0.000599361166906616,
      "loss": 3.4827,
      "step": 4787
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4111714363098145,
      "learning_rate": 0.0005993609000702352,
      "loss": 3.5501,
      "step": 4788
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1681737899780273,
      "learning_rate": 0.0005993606331781975,
      "loss": 3.441,
      "step": 4789
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.939799189567566,
      "learning_rate": 0.0005993603662305029,
      "loss": 3.1364,
      "step": 4790
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5629853010177612,
      "learning_rate": 0.0005993600992271517,
      "loss": 3.3667,
      "step": 4791
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.367405652999878,
      "learning_rate": 0.0005993598321681437,
      "loss": 3.433,
      "step": 4792
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5081630945205688,
      "learning_rate": 0.0005993595650534792,
      "loss": 3.3798,
      "step": 4793
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.2304255962371826,
      "learning_rate": 0.000599359297883158,
      "loss": 3.0401,
      "step": 4794
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.960168719291687,
      "learning_rate": 0.0005993590306571803,
      "loss": 3.3423,
      "step": 4795
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6654152870178223,
      "learning_rate": 0.000599358763375546,
      "loss": 3.3259,
      "step": 4796
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0894103050231934,
      "learning_rate": 0.0005993584960382554,
      "loss": 3.3029,
      "step": 4797
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.9850382804870605,
      "learning_rate": 0.0005993582286453083,
      "loss": 3.4394,
      "step": 4798
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9921871423721313,
      "learning_rate": 0.0005993579611967049,
      "loss": 3.3542,
      "step": 4799
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2329721450805664,
      "learning_rate": 0.0005993576936924452,
      "loss": 3.1417,
      "step": 4800
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.6784486770629883,
      "learning_rate": 0.0005993574261325292,
      "loss": 3.3782,
      "step": 4801
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.509455442428589,
      "learning_rate": 0.0005993571585169571,
      "loss": 3.2321,
      "step": 4802
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.598830223083496,
      "learning_rate": 0.0005993568908457288,
      "loss": 3.1989,
      "step": 4803
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.5409486293792725,
      "learning_rate": 0.0005993566231188444,
      "loss": 3.4849,
      "step": 4804
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1028363704681396,
      "learning_rate": 0.000599356355336304,
      "loss": 3.4423,
      "step": 4805
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6124989986419678,
      "learning_rate": 0.0005993560874981075,
      "loss": 3.3309,
      "step": 4806
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7706905603408813,
      "learning_rate": 0.0005993558196042552,
      "loss": 3.3743,
      "step": 4807
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7952916622161865,
      "learning_rate": 0.0005993555516547469,
      "loss": 3.4966,
      "step": 4808
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4674553871154785,
      "learning_rate": 0.0005993552836495828,
      "loss": 3.2141,
      "step": 4809
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.4358766078948975,
      "learning_rate": 0.0005993550155887629,
      "loss": 3.5292,
      "step": 4810
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.8983936309814453,
      "learning_rate": 0.0005993547474722872,
      "loss": 3.2256,
      "step": 4811
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.5231378078460693,
      "learning_rate": 0.0005993544793001558,
      "loss": 3.3388,
      "step": 4812
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.55013108253479,
      "learning_rate": 0.0005993542110723688,
      "loss": 3.2591,
      "step": 4813
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.2696917057037354,
      "learning_rate": 0.0005993539427889262,
      "loss": 3.5567,
      "step": 4814
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.3792834281921387,
      "learning_rate": 0.0005993536744498281,
      "loss": 3.2826,
      "step": 4815
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5617464780807495,
      "learning_rate": 0.0005993534060550744,
      "loss": 3.5399,
      "step": 4816
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.84639835357666,
      "learning_rate": 0.0005993531376046652,
      "loss": 3.2934,
      "step": 4817
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.1703524589538574,
      "learning_rate": 0.0005993528690986007,
      "loss": 3.4244,
      "step": 4818
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.500809669494629,
      "learning_rate": 0.0005993526005368809,
      "loss": 3.5095,
      "step": 4819
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.010267734527588,
      "learning_rate": 0.0005993523319195057,
      "loss": 3.25,
      "step": 4820
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.004640579223633,
      "learning_rate": 0.0005993520632464753,
      "loss": 3.1361,
      "step": 4821
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2630181312561035,
      "learning_rate": 0.0005993517945177897,
      "loss": 3.504,
      "step": 4822
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4423916339874268,
      "learning_rate": 0.0005993515257334489,
      "loss": 3.6166,
      "step": 4823
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7766320705413818,
      "learning_rate": 0.000599351256893453,
      "loss": 3.3881,
      "step": 4824
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.737894058227539,
      "learning_rate": 0.0005993509879978022,
      "loss": 3.4039,
      "step": 4825
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.687239408493042,
      "learning_rate": 0.0005993507190464963,
      "loss": 3.4399,
      "step": 4826
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.60397207736969,
      "learning_rate": 0.0005993504500395354,
      "loss": 3.2329,
      "step": 4827
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.494074821472168,
      "learning_rate": 0.0005993501809769197,
      "loss": 3.4525,
      "step": 4828
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7594062089920044,
      "learning_rate": 0.000599349911858649,
      "loss": 3.2801,
      "step": 4829
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6629265546798706,
      "learning_rate": 0.0005993496426847237,
      "loss": 3.1542,
      "step": 4830
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6154658794403076,
      "learning_rate": 0.0005993493734551435,
      "loss": 3.0803,
      "step": 4831
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8596923351287842,
      "learning_rate": 0.0005993491041699087,
      "loss": 3.0857,
      "step": 4832
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8718661069869995,
      "learning_rate": 0.0005993488348290192,
      "loss": 3.1681,
      "step": 4833
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.823007583618164,
      "learning_rate": 0.000599348565432475,
      "loss": 3.6776,
      "step": 4834
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5998120307922363,
      "learning_rate": 0.0005993482959802764,
      "loss": 3.3451,
      "step": 4835
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5989090204238892,
      "learning_rate": 0.0005993480264724233,
      "loss": 3.4358,
      "step": 4836
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7628806829452515,
      "learning_rate": 0.0005993477569089156,
      "loss": 3.1461,
      "step": 4837
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.920157551765442,
      "learning_rate": 0.0005993474872897537,
      "loss": 3.5963,
      "step": 4838
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2079010009765625,
      "learning_rate": 0.0005993472176149373,
      "loss": 3.2898,
      "step": 4839
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6421743631362915,
      "learning_rate": 0.0005993469478844667,
      "loss": 3.3991,
      "step": 4840
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9503767490386963,
      "learning_rate": 0.0005993466780983418,
      "loss": 3.0263,
      "step": 4841
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6376067399978638,
      "learning_rate": 0.0005993464082565627,
      "loss": 3.1882,
      "step": 4842
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.736498475074768,
      "learning_rate": 0.0005993461383591294,
      "loss": 3.5279,
      "step": 4843
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5563082695007324,
      "learning_rate": 0.0005993458684060421,
      "loss": 3.2983,
      "step": 4844
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2040534019470215,
      "learning_rate": 0.0005993455983973007,
      "loss": 3.0546,
      "step": 4845
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6190098524093628,
      "learning_rate": 0.0005993453283329053,
      "loss": 3.4903,
      "step": 4846
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2803428173065186,
      "learning_rate": 0.000599345058212856,
      "loss": 3.1321,
      "step": 4847
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6028296947479248,
      "learning_rate": 0.0005993447880371528,
      "loss": 3.2934,
      "step": 4848
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8520549535751343,
      "learning_rate": 0.0005993445178057958,
      "loss": 3.199,
      "step": 4849
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6018600463867188,
      "learning_rate": 0.0005993442475187849,
      "loss": 3.4537,
      "step": 4850
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5550802946090698,
      "learning_rate": 0.0005993439771761203,
      "loss": 3.2926,
      "step": 4851
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2409684658050537,
      "learning_rate": 0.0005993437067778019,
      "loss": 3.3068,
      "step": 4852
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7168858051300049,
      "learning_rate": 0.00059934343632383,
      "loss": 3.5044,
      "step": 4853
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.956504464149475,
      "learning_rate": 0.0005993431658142045,
      "loss": 3.3862,
      "step": 4854
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.8485982418060303,
      "learning_rate": 0.0005993428952489253,
      "loss": 3.2165,
      "step": 4855
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.834302306175232,
      "learning_rate": 0.0005993426246279928,
      "loss": 3.4069,
      "step": 4856
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5676380395889282,
      "learning_rate": 0.0005993423539514067,
      "loss": 3.2484,
      "step": 4857
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.096975326538086,
      "learning_rate": 0.0005993420832191673,
      "loss": 3.4868,
      "step": 4858
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7257111072540283,
      "learning_rate": 0.0005993418124312746,
      "loss": 3.1729,
      "step": 4859
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.418915033340454,
      "learning_rate": 0.0005993415415877285,
      "loss": 3.3634,
      "step": 4860
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0236575603485107,
      "learning_rate": 0.0005993412706885292,
      "loss": 3.332,
      "step": 4861
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0510220527648926,
      "learning_rate": 0.0005993409997336767,
      "loss": 3.3229,
      "step": 4862
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7564016580581665,
      "learning_rate": 0.0005993407287231711,
      "loss": 3.4844,
      "step": 4863
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8588871955871582,
      "learning_rate": 0.0005993404576570123,
      "loss": 3.1448,
      "step": 4864
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9042643308639526,
      "learning_rate": 0.0005993401865352006,
      "loss": 3.4844,
      "step": 4865
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7528338432312012,
      "learning_rate": 0.0005993399153577357,
      "loss": 3.2723,
      "step": 4866
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.674111843109131,
      "learning_rate": 0.0005993396441246181,
      "loss": 3.1914,
      "step": 4867
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.342250347137451,
      "learning_rate": 0.0005993393728358474,
      "loss": 3.3399,
      "step": 4868
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.8454020023345947,
      "learning_rate": 0.000599339101491424,
      "loss": 3.3789,
      "step": 4869
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.305692195892334,
      "learning_rate": 0.0005993388300913478,
      "loss": 3.316,
      "step": 4870
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.332867383956909,
      "learning_rate": 0.0005993385586356188,
      "loss": 3.496,
      "step": 4871
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.204374074935913,
      "learning_rate": 0.0005993382871242372,
      "loss": 3.1553,
      "step": 4872
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1621317863464355,
      "learning_rate": 0.0005993380155572029,
      "loss": 3.2268,
      "step": 4873
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5600078105926514,
      "learning_rate": 0.000599337743934516,
      "loss": 3.5969,
      "step": 4874
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6437619924545288,
      "learning_rate": 0.0005993374722561766,
      "loss": 3.0706,
      "step": 4875
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.069430112838745,
      "learning_rate": 0.0005993372005221847,
      "loss": 3.4655,
      "step": 4876
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.75482177734375,
      "learning_rate": 0.0005993369287325403,
      "loss": 3.4135,
      "step": 4877
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5011887550354004,
      "learning_rate": 0.0005993366568872436,
      "loss": 3.429,
      "step": 4878
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.011472463607788,
      "learning_rate": 0.0005993363849862946,
      "loss": 3.2938,
      "step": 4879
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5998302698135376,
      "learning_rate": 0.0005993361130296931,
      "loss": 3.4088,
      "step": 4880
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7342902421951294,
      "learning_rate": 0.0005993358410174396,
      "loss": 3.1312,
      "step": 4881
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8842095136642456,
      "learning_rate": 0.0005993355689495339,
      "loss": 3.0205,
      "step": 4882
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1361780166625977,
      "learning_rate": 0.0005993352968259759,
      "loss": 3.2516,
      "step": 4883
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.362022876739502,
      "learning_rate": 0.000599335024646766,
      "loss": 3.156,
      "step": 4884
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4766669273376465,
      "learning_rate": 0.0005993347524119039,
      "loss": 3.5073,
      "step": 4885
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7272531986236572,
      "learning_rate": 0.00059933448012139,
      "loss": 3.172,
      "step": 4886
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6702544689178467,
      "learning_rate": 0.0005993342077752241,
      "loss": 3.4423,
      "step": 4887
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2286901473999023,
      "learning_rate": 0.0005993339353734063,
      "loss": 3.2416,
      "step": 4888
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2419683933258057,
      "learning_rate": 0.0005993336629159366,
      "loss": 3.3525,
      "step": 4889
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6044185161590576,
      "learning_rate": 0.0005993333904028153,
      "loss": 3.3831,
      "step": 4890
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.4660985469818115,
      "learning_rate": 0.0005993331178340421,
      "loss": 3.1665,
      "step": 4891
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.991951584815979,
      "learning_rate": 0.0005993328452096173,
      "loss": 3.428,
      "step": 4892
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5371949672698975,
      "learning_rate": 0.0005993325725295408,
      "loss": 3.5763,
      "step": 4893
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.757341980934143,
      "learning_rate": 0.0005993322997938128,
      "loss": 3.3074,
      "step": 4894
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.606481909751892,
      "learning_rate": 0.0005993320270024333,
      "loss": 3.3465,
      "step": 4895
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7892119884490967,
      "learning_rate": 0.0005993317541554023,
      "loss": 3.4845,
      "step": 4896
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6692118644714355,
      "learning_rate": 0.0005993314812527199,
      "loss": 3.5727,
      "step": 4897
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8736952543258667,
      "learning_rate": 0.000599331208294386,
      "loss": 3.2555,
      "step": 4898
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.25473952293396,
      "learning_rate": 0.000599330935280401,
      "loss": 3.2657,
      "step": 4899
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8544455766677856,
      "learning_rate": 0.0005993306622107645,
      "loss": 3.2501,
      "step": 4900
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5947225093841553,
      "learning_rate": 0.0005993303890854769,
      "loss": 3.0878,
      "step": 4901
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4091200828552246,
      "learning_rate": 0.0005993301159045382,
      "loss": 3.3642,
      "step": 4902
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8701573610305786,
      "learning_rate": 0.0005993298426679481,
      "loss": 3.5536,
      "step": 4903
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7116565704345703,
      "learning_rate": 0.0005993295693757072,
      "loss": 3.3948,
      "step": 4904
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8947135210037231,
      "learning_rate": 0.0005993292960278152,
      "loss": 3.3336,
      "step": 4905
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.5781471729278564,
      "learning_rate": 0.0005993290226242723,
      "loss": 3.2433,
      "step": 4906
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.629252314567566,
      "learning_rate": 0.0005993287491650784,
      "loss": 3.4207,
      "step": 4907
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.531976342201233,
      "learning_rate": 0.0005993284756502337,
      "loss": 3.3409,
      "step": 4908
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.776353120803833,
      "learning_rate": 0.0005993282020797381,
      "loss": 3.299,
      "step": 4909
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4468960762023926,
      "learning_rate": 0.0005993279284535918,
      "loss": 3.6303,
      "step": 4910
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7015990018844604,
      "learning_rate": 0.0005993276547717947,
      "loss": 3.3603,
      "step": 4911
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8780981302261353,
      "learning_rate": 0.0005993273810343471,
      "loss": 3.2848,
      "step": 4912
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6123266220092773,
      "learning_rate": 0.0005993271072412487,
      "loss": 3.3723,
      "step": 4913
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.475812554359436,
      "learning_rate": 0.0005993268333924998,
      "loss": 3.2406,
      "step": 4914
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7513759136199951,
      "learning_rate": 0.0005993265594881006,
      "loss": 3.3252,
      "step": 4915
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.266667604446411,
      "learning_rate": 0.0005993262855280507,
      "loss": 3.441,
      "step": 4916
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7260304689407349,
      "learning_rate": 0.0005993260115123505,
      "loss": 3.289,
      "step": 4917
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8782744407653809,
      "learning_rate": 0.0005993257374409998,
      "loss": 3.4273,
      "step": 4918
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1263811588287354,
      "learning_rate": 0.000599325463313999,
      "loss": 3.3393,
      "step": 4919
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8359934091567993,
      "learning_rate": 0.0005993251891313478,
      "loss": 3.3742,
      "step": 4920
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8436384201049805,
      "learning_rate": 0.0005993249148930465,
      "loss": 3.3756,
      "step": 4921
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.5333609580993652,
      "learning_rate": 0.000599324640599095,
      "loss": 3.2253,
      "step": 4922
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5569689273834229,
      "learning_rate": 0.0005993243662494933,
      "loss": 3.2051,
      "step": 4923
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5360442399978638,
      "learning_rate": 0.0005993240918442418,
      "loss": 3.5578,
      "step": 4924
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.634787678718567,
      "learning_rate": 0.0005993238173833401,
      "loss": 3.3256,
      "step": 4925
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.952994465827942,
      "learning_rate": 0.0005993235428667884,
      "loss": 3.3951,
      "step": 4926
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6944676637649536,
      "learning_rate": 0.000599323268294587,
      "loss": 3.2057,
      "step": 4927
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.6083266735076904,
      "learning_rate": 0.0005993229936667356,
      "loss": 3.1426,
      "step": 4928
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.9235751628875732,
      "learning_rate": 0.0005993227189832345,
      "loss": 3.379,
      "step": 4929
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7695778608322144,
      "learning_rate": 0.0005993224442440836,
      "loss": 3.2794,
      "step": 4930
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.4116146564483643,
      "learning_rate": 0.0005993221694492831,
      "loss": 3.4274,
      "step": 4931
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.7075014114379883,
      "learning_rate": 0.0005993218945988328,
      "loss": 3.3545,
      "step": 4932
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6636936664581299,
      "learning_rate": 0.000599321619692733,
      "loss": 3.4299,
      "step": 4933
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8820774555206299,
      "learning_rate": 0.0005993213447309837,
      "loss": 3.5308,
      "step": 4934
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.82940673828125,
      "learning_rate": 0.0005993210697135848,
      "loss": 3.4504,
      "step": 4935
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.909430742263794,
      "learning_rate": 0.0005993207946405365,
      "loss": 3.4517,
      "step": 4936
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6705021858215332,
      "learning_rate": 0.0005993205195118389,
      "loss": 3.6259,
      "step": 4937
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.3733274936676025,
      "learning_rate": 0.0005993202443274919,
      "loss": 3.5002,
      "step": 4938
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2992630004882812,
      "learning_rate": 0.0005993199690874956,
      "loss": 3.123,
      "step": 4939
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.824730396270752,
      "learning_rate": 0.0005993196937918501,
      "loss": 3.4496,
      "step": 4940
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.646160364151001,
      "learning_rate": 0.0005993194184405554,
      "loss": 3.4542,
      "step": 4941
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.2613041400909424,
      "learning_rate": 0.0005993191430336116,
      "loss": 3.3355,
      "step": 4942
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.3798952102661133,
      "learning_rate": 0.0005993188675710187,
      "loss": 3.2174,
      "step": 4943
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.460404872894287,
      "learning_rate": 0.0005993185920527769,
      "loss": 3.4616,
      "step": 4944
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1295721530914307,
      "learning_rate": 0.0005993183164788859,
      "loss": 3.1902,
      "step": 4945
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5288126468658447,
      "learning_rate": 0.0005993180408493461,
      "loss": 3.1439,
      "step": 4946
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7440882921218872,
      "learning_rate": 0.0005993177651641573,
      "loss": 3.1179,
      "step": 4947
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.853959321975708,
      "learning_rate": 0.0005993174894233198,
      "loss": 3.4158,
      "step": 4948
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.02006459236145,
      "learning_rate": 0.0005993172136268336,
      "loss": 3.2733,
      "step": 4949
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1838152408599854,
      "learning_rate": 0.0005993169377746985,
      "loss": 3.2495,
      "step": 4950
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.2249250411987305,
      "learning_rate": 0.0005993166618669149,
      "loss": 3.3779,
      "step": 4951
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9768214225769043,
      "learning_rate": 0.0005993163859034825,
      "loss": 3.2639,
      "step": 4952
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.778729796409607,
      "learning_rate": 0.0005993161098844017,
      "loss": 3.5482,
      "step": 4953
    },
    {
      "epoch": 0.06,
      "grad_norm": 3.6360082626342773,
      "learning_rate": 0.0005993158338096722,
      "loss": 3.3962,
      "step": 4954
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4413483142852783,
      "learning_rate": 0.0005993155576792945,
      "loss": 3.1326,
      "step": 4955
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5865294933319092,
      "learning_rate": 0.0005993152814932682,
      "loss": 3.4777,
      "step": 4956
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.835672378540039,
      "learning_rate": 0.0005993150052515935,
      "loss": 2.9688,
      "step": 4957
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.848695993423462,
      "learning_rate": 0.0005993147289542706,
      "loss": 3.3403,
      "step": 4958
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.113210678100586,
      "learning_rate": 0.0005993144526012995,
      "loss": 3.2981,
      "step": 4959
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9524672031402588,
      "learning_rate": 0.00059931417619268,
      "loss": 3.4733,
      "step": 4960
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.067697048187256,
      "learning_rate": 0.0005993138997284125,
      "loss": 3.1624,
      "step": 4961
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5724797248840332,
      "learning_rate": 0.0005993136232084969,
      "loss": 3.3483,
      "step": 4962
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.058687925338745,
      "learning_rate": 0.0005993133466329332,
      "loss": 3.1663,
      "step": 4963
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5311552286148071,
      "learning_rate": 0.0005993130700017215,
      "loss": 3.4488,
      "step": 4964
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.562679409980774,
      "learning_rate": 0.0005993127933148618,
      "loss": 3.4256,
      "step": 4965
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6850334405899048,
      "learning_rate": 0.0005993125165723543,
      "loss": 3.3872,
      "step": 4966
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8601254224777222,
      "learning_rate": 0.0005993122397741988,
      "loss": 3.2208,
      "step": 4967
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4363523721694946,
      "learning_rate": 0.0005993119629203956,
      "loss": 3.325,
      "step": 4968
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6522516012191772,
      "learning_rate": 0.0005993116860109447,
      "loss": 3.3085,
      "step": 4969
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.45835280418396,
      "learning_rate": 0.0005993114090458461,
      "loss": 3.407,
      "step": 4970
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.5849556922912598,
      "learning_rate": 0.0005993111320250998,
      "loss": 3.3394,
      "step": 4971
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4662100076675415,
      "learning_rate": 0.0005993108549487059,
      "loss": 3.1793,
      "step": 4972
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6830400228500366,
      "learning_rate": 0.0005993105778166646,
      "loss": 3.343,
      "step": 4973
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8790684938430786,
      "learning_rate": 0.0005993103006289756,
      "loss": 3.1568,
      "step": 4974
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.125105142593384,
      "learning_rate": 0.0005993100233856394,
      "loss": 3.2236,
      "step": 4975
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3512605428695679,
      "learning_rate": 0.0005993097460866556,
      "loss": 3.45,
      "step": 4976
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.1421151161193848,
      "learning_rate": 0.0005993094687320246,
      "loss": 3.0788,
      "step": 4977
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.376549243927002,
      "learning_rate": 0.0005993091913217462,
      "loss": 3.4179,
      "step": 4978
    },
    {
      "epoch": 0.06,
      "grad_norm": 2.0485401153564453,
      "learning_rate": 0.0005993089138558208,
      "loss": 3.1464,
      "step": 4979
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.8484047651290894,
      "learning_rate": 0.000599308636334248,
      "loss": 3.1627,
      "step": 4980
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.297976016998291,
      "learning_rate": 0.0005993083587570282,
      "loss": 3.5826,
      "step": 4981
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.414995551109314,
      "learning_rate": 0.0005993080811241613,
      "loss": 3.3984,
      "step": 4982
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.4054621458053589,
      "learning_rate": 0.0005993078034356473,
      "loss": 3.0354,
      "step": 4983
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7592753171920776,
      "learning_rate": 0.0005993075256914864,
      "loss": 3.4507,
      "step": 4984
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.362308144569397,
      "learning_rate": 0.0005993072478916786,
      "loss": 3.4855,
      "step": 4985
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3308789730072021,
      "learning_rate": 0.0005993069700362238,
      "loss": 3.4166,
      "step": 4986
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.709760308265686,
      "learning_rate": 0.0005993066921251224,
      "loss": 3.2226,
      "step": 4987
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.6998358964920044,
      "learning_rate": 0.000599306414158374,
      "loss": 3.3333,
      "step": 4988
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7002620697021484,
      "learning_rate": 0.0005993061361359791,
      "loss": 3.5332,
      "step": 4989
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.7795883417129517,
      "learning_rate": 0.0005993058580579374,
      "loss": 3.4996,
      "step": 4990
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.9145941734313965,
      "learning_rate": 0.0005993055799242492,
      "loss": 3.1608,
      "step": 4991
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.3476829528808594,
      "learning_rate": 0.0005993053017349144,
      "loss": 3.2222,
      "step": 4992
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.9539947509765625,
      "learning_rate": 0.000599305023489933,
      "loss": 3.2824,
      "step": 4993
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.310356378555298,
      "learning_rate": 0.0005993047451893053,
      "loss": 3.37,
      "step": 4994
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.663503646850586,
      "learning_rate": 0.000599304466833031,
      "loss": 3.1401,
      "step": 4995
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3543097972869873,
      "learning_rate": 0.0005993041884211106,
      "loss": 3.2363,
      "step": 4996
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.8958120346069336,
      "learning_rate": 0.0005993039099535438,
      "loss": 3.4902,
      "step": 4997
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0255730152130127,
      "learning_rate": 0.0005993036314303307,
      "loss": 3.4597,
      "step": 4998
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.704752802848816,
      "learning_rate": 0.0005993033528514714,
      "loss": 3.5262,
      "step": 4999
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.2603466510772705,
      "learning_rate": 0.0005993030742169659,
      "loss": 3.4227,
      "step": 5000
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.374555826187134,
      "learning_rate": 0.0005993027955268145,
      "loss": 3.3771,
      "step": 5001
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9961488246917725,
      "learning_rate": 0.000599302516781017,
      "loss": 3.409,
      "step": 5002
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.4333434104919434,
      "learning_rate": 0.0005993022379795735,
      "loss": 3.3043,
      "step": 5003
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7714895009994507,
      "learning_rate": 0.0005993019591224839,
      "loss": 3.3417,
      "step": 5004
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4159846305847168,
      "learning_rate": 0.0005993016802097486,
      "loss": 3.1719,
      "step": 5005
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.2232322692871094,
      "learning_rate": 0.0005993014012413674,
      "loss": 3.3357,
      "step": 5006
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.363985300064087,
      "learning_rate": 0.0005993011222173404,
      "loss": 3.3271,
      "step": 5007
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5353907346725464,
      "learning_rate": 0.0005993008431376677,
      "loss": 3.3385,
      "step": 5008
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8809208869934082,
      "learning_rate": 0.0005993005640023494,
      "loss": 3.3681,
      "step": 5009
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0253400802612305,
      "learning_rate": 0.0005993002848113854,
      "loss": 3.4062,
      "step": 5010
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1216976642608643,
      "learning_rate": 0.0005993000055647758,
      "loss": 3.4139,
      "step": 5011
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.246737480163574,
      "learning_rate": 0.0005992997262625208,
      "loss": 3.3058,
      "step": 5012
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7662078142166138,
      "learning_rate": 0.0005992994469046202,
      "loss": 3.388,
      "step": 5013
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.915628433227539,
      "learning_rate": 0.0005992991674910743,
      "loss": 3.5927,
      "step": 5014
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6386350393295288,
      "learning_rate": 0.0005992988880218829,
      "loss": 3.3712,
      "step": 5015
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1961917877197266,
      "learning_rate": 0.0005992986084970462,
      "loss": 3.4385,
      "step": 5016
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7569653987884521,
      "learning_rate": 0.0005992983289165643,
      "loss": 3.3105,
      "step": 5017
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5624089241027832,
      "learning_rate": 0.0005992980492804372,
      "loss": 3.2502,
      "step": 5018
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3073220252990723,
      "learning_rate": 0.000599297769588665,
      "loss": 3.2763,
      "step": 5019
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.4218297004699707,
      "learning_rate": 0.0005992974898412476,
      "loss": 3.1858,
      "step": 5020
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7435790300369263,
      "learning_rate": 0.0005992972100381851,
      "loss": 3.3875,
      "step": 5021
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.105431079864502,
      "learning_rate": 0.0005992969301794777,
      "loss": 3.1845,
      "step": 5022
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8995203971862793,
      "learning_rate": 0.0005992966502651253,
      "loss": 3.3027,
      "step": 5023
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6624069213867188,
      "learning_rate": 0.000599296370295128,
      "loss": 3.0608,
      "step": 5024
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.537373661994934,
      "learning_rate": 0.0005992960902694859,
      "loss": 3.277,
      "step": 5025
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8604564666748047,
      "learning_rate": 0.0005992958101881989,
      "loss": 3.3151,
      "step": 5026
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0209856033325195,
      "learning_rate": 0.0005992955300512673,
      "loss": 3.3493,
      "step": 5027
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.73942232131958,
      "learning_rate": 0.0005992952498586909,
      "loss": 3.1586,
      "step": 5028
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.608912467956543,
      "learning_rate": 0.0005992949696104699,
      "loss": 3.4359,
      "step": 5029
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1077826023101807,
      "learning_rate": 0.0005992946893066044,
      "loss": 3.2653,
      "step": 5030
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.585366129875183,
      "learning_rate": 0.0005992944089470943,
      "loss": 3.387,
      "step": 5031
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9858719110488892,
      "learning_rate": 0.0005992941285319398,
      "loss": 3.3242,
      "step": 5032
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9539488554000854,
      "learning_rate": 0.0005992938480611407,
      "loss": 3.3561,
      "step": 5033
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4073457717895508,
      "learning_rate": 0.0005992935675346974,
      "loss": 3.5721,
      "step": 5034
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7607752084732056,
      "learning_rate": 0.0005992932869526097,
      "loss": 3.3868,
      "step": 5035
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5157872438430786,
      "learning_rate": 0.0005992930063148776,
      "loss": 3.17,
      "step": 5036
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6967767477035522,
      "learning_rate": 0.0005992927256215014,
      "loss": 3.4947,
      "step": 5037
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4520105123519897,
      "learning_rate": 0.000599292444872481,
      "loss": 3.5534,
      "step": 5038
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.73292076587677,
      "learning_rate": 0.0005992921640678166,
      "loss": 3.2008,
      "step": 5039
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5198414325714111,
      "learning_rate": 0.0005992918832075081,
      "loss": 3.2951,
      "step": 5040
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5974034070968628,
      "learning_rate": 0.0005992916022915556,
      "loss": 3.3725,
      "step": 5041
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9840619564056396,
      "learning_rate": 0.000599291321319959,
      "loss": 3.4101,
      "step": 5042
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4647296667099,
      "learning_rate": 0.0005992910402927185,
      "loss": 3.6425,
      "step": 5043
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9528570175170898,
      "learning_rate": 0.0005992907592098343,
      "loss": 3.4032,
      "step": 5044
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.754764437675476,
      "learning_rate": 0.0005992904780713062,
      "loss": 3.0905,
      "step": 5045
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6231944561004639,
      "learning_rate": 0.0005992901968771343,
      "loss": 3.6115,
      "step": 5046
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.91944420337677,
      "learning_rate": 0.0005992899156273189,
      "loss": 3.3818,
      "step": 5047
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6083989143371582,
      "learning_rate": 0.0005992896343218596,
      "loss": 3.335,
      "step": 5048
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.506967306137085,
      "learning_rate": 0.0005992893529607569,
      "loss": 3.2743,
      "step": 5049
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0150816440582275,
      "learning_rate": 0.0005992890715440107,
      "loss": 3.3888,
      "step": 5050
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8184932470321655,
      "learning_rate": 0.0005992887900716209,
      "loss": 3.2519,
      "step": 5051
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6391679048538208,
      "learning_rate": 0.0005992885085435876,
      "loss": 3.4481,
      "step": 5052
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5073344707489014,
      "learning_rate": 0.000599288226959911,
      "loss": 3.2595,
      "step": 5053
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1004114151000977,
      "learning_rate": 0.0005992879453205911,
      "loss": 3.0715,
      "step": 5054
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.698891043663025,
      "learning_rate": 0.0005992876636256279,
      "loss": 3.2636,
      "step": 5055
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6530909538269043,
      "learning_rate": 0.0005992873818750214,
      "loss": 3.2511,
      "step": 5056
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.015092611312866,
      "learning_rate": 0.0005992871000687717,
      "loss": 3.2279,
      "step": 5057
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0465662479400635,
      "learning_rate": 0.0005992868182068791,
      "loss": 2.9825,
      "step": 5058
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5798827409744263,
      "learning_rate": 0.0005992865362893433,
      "loss": 3.2359,
      "step": 5059
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9789419174194336,
      "learning_rate": 0.0005992862543161644,
      "loss": 3.3899,
      "step": 5060
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7817773818969727,
      "learning_rate": 0.0005992859722873426,
      "loss": 3.2291,
      "step": 5061
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8955711126327515,
      "learning_rate": 0.0005992856902028779,
      "loss": 3.2113,
      "step": 5062
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9817873239517212,
      "learning_rate": 0.0005992854080627704,
      "loss": 3.1185,
      "step": 5063
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7362645864486694,
      "learning_rate": 0.00059928512586702,
      "loss": 3.2742,
      "step": 5064
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9035316705703735,
      "learning_rate": 0.0005992848436156268,
      "loss": 3.2261,
      "step": 5065
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7463597059249878,
      "learning_rate": 0.000599284561308591,
      "loss": 3.2154,
      "step": 5066
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5939749479293823,
      "learning_rate": 0.0005992842789459125,
      "loss": 3.2508,
      "step": 5067
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.2390079498291016,
      "learning_rate": 0.0005992839965275916,
      "loss": 3.3056,
      "step": 5068
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.6719253063201904,
      "learning_rate": 0.0005992837140536279,
      "loss": 3.359,
      "step": 5069
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5247547626495361,
      "learning_rate": 0.0005992834315240217,
      "loss": 3.3832,
      "step": 5070
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5715211629867554,
      "learning_rate": 0.0005992831489387732,
      "loss": 3.5567,
      "step": 5071
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6593022346496582,
      "learning_rate": 0.0005992828662978823,
      "loss": 3.4612,
      "step": 5072
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7747507095336914,
      "learning_rate": 0.000599282583601349,
      "loss": 3.4481,
      "step": 5073
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4876344203948975,
      "learning_rate": 0.0005992823008491735,
      "loss": 3.2613,
      "step": 5074
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9275230169296265,
      "learning_rate": 0.0005992820180413558,
      "loss": 3.4438,
      "step": 5075
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.5997490882873535,
      "learning_rate": 0.0005992817351778958,
      "loss": 3.1289,
      "step": 5076
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.706903338432312,
      "learning_rate": 0.0005992814522587937,
      "loss": 3.2654,
      "step": 5077
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1122090816497803,
      "learning_rate": 0.0005992811692840496,
      "loss": 3.4318,
      "step": 5078
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.5004324913024902,
      "learning_rate": 0.0005992808862536634,
      "loss": 3.214,
      "step": 5079
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9570406675338745,
      "learning_rate": 0.0005992806031676353,
      "loss": 3.194,
      "step": 5080
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5117563009262085,
      "learning_rate": 0.0005992803200259652,
      "loss": 3.2885,
      "step": 5081
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.757113218307495,
      "learning_rate": 0.0005992800368286533,
      "loss": 3.0773,
      "step": 5082
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.5678653717041016,
      "learning_rate": 0.0005992797535756995,
      "loss": 3.3955,
      "step": 5083
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7363606691360474,
      "learning_rate": 0.0005992794702671041,
      "loss": 3.479,
      "step": 5084
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9632482528686523,
      "learning_rate": 0.000599279186902867,
      "loss": 3.5482,
      "step": 5085
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9578546285629272,
      "learning_rate": 0.0005992789034829881,
      "loss": 3.2592,
      "step": 5086
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3988978862762451,
      "learning_rate": 0.0005992786200074676,
      "loss": 3.3766,
      "step": 5087
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6906300783157349,
      "learning_rate": 0.0005992783364763058,
      "loss": 3.1609,
      "step": 5088
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.99418044090271,
      "learning_rate": 0.0005992780528895023,
      "loss": 3.1779,
      "step": 5089
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.258889675140381,
      "learning_rate": 0.0005992777692470574,
      "loss": 3.3023,
      "step": 5090
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5866161584854126,
      "learning_rate": 0.0005992774855489711,
      "loss": 3.1325,
      "step": 5091
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.839076042175293,
      "learning_rate": 0.0005992772017952434,
      "loss": 3.2131,
      "step": 5092
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8493366241455078,
      "learning_rate": 0.0005992769179858745,
      "loss": 3.3513,
      "step": 5093
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6174631118774414,
      "learning_rate": 0.0005992766341208645,
      "loss": 3.1269,
      "step": 5094
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7256014347076416,
      "learning_rate": 0.000599276350200213,
      "loss": 3.5313,
      "step": 5095
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6332511901855469,
      "learning_rate": 0.0005992760662239207,
      "loss": 3.1571,
      "step": 5096
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0085511207580566,
      "learning_rate": 0.0005992757821919872,
      "loss": 3.2948,
      "step": 5097
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.159285306930542,
      "learning_rate": 0.0005992754981044127,
      "loss": 3.3796,
      "step": 5098
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.082629919052124,
      "learning_rate": 0.0005992752139611972,
      "loss": 3.2097,
      "step": 5099
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8997219800949097,
      "learning_rate": 0.0005992749297623408,
      "loss": 3.1309,
      "step": 5100
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4529485702514648,
      "learning_rate": 0.0005992746455078436,
      "loss": 3.3234,
      "step": 5101
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6065011024475098,
      "learning_rate": 0.0005992743611977055,
      "loss": 3.3055,
      "step": 5102
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5042126178741455,
      "learning_rate": 0.0005992740768319268,
      "loss": 3.5167,
      "step": 5103
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4734176397323608,
      "learning_rate": 0.0005992737924105072,
      "loss": 3.4109,
      "step": 5104
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7013018131256104,
      "learning_rate": 0.000599273507933447,
      "loss": 3.2348,
      "step": 5105
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.989231824874878,
      "learning_rate": 0.0005992732234007462,
      "loss": 3.2692,
      "step": 5106
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4276775121688843,
      "learning_rate": 0.000599272938812405,
      "loss": 3.3292,
      "step": 5107
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4361896514892578,
      "learning_rate": 0.0005992726541684233,
      "loss": 3.3735,
      "step": 5108
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6947485208511353,
      "learning_rate": 0.000599272369468801,
      "loss": 3.3055,
      "step": 5109
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5511255264282227,
      "learning_rate": 0.0005992720847135385,
      "loss": 3.5345,
      "step": 5110
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.346853494644165,
      "learning_rate": 0.0005992717999026355,
      "loss": 3.233,
      "step": 5111
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3233163356781006,
      "learning_rate": 0.0005992715150360923,
      "loss": 3.3095,
      "step": 5112
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.112734079360962,
      "learning_rate": 0.0005992712301139089,
      "loss": 3.1216,
      "step": 5113
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6005326509475708,
      "learning_rate": 0.0005992709451360853,
      "loss": 3.2669,
      "step": 5114
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6553852558135986,
      "learning_rate": 0.0005992706601026216,
      "loss": 3.1439,
      "step": 5115
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.575533390045166,
      "learning_rate": 0.0005992703750135178,
      "loss": 3.2136,
      "step": 5116
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.518066167831421,
      "learning_rate": 0.0005992700898687739,
      "loss": 3.3787,
      "step": 5117
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.040813684463501,
      "learning_rate": 0.0005992698046683902,
      "loss": 3.3613,
      "step": 5118
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4072562456130981,
      "learning_rate": 0.0005992695194123666,
      "loss": 3.0067,
      "step": 5119
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4580868482589722,
      "learning_rate": 0.000599269234100703,
      "loss": 3.43,
      "step": 5120
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9568371772766113,
      "learning_rate": 0.0005992689487333998,
      "loss": 3.2252,
      "step": 5121
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.5584182739257812,
      "learning_rate": 0.0005992686633104568,
      "loss": 3.3751,
      "step": 5122
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.146700620651245,
      "learning_rate": 0.000599268377831874,
      "loss": 3.2909,
      "step": 5123
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0337460041046143,
      "learning_rate": 0.0005992680922976516,
      "loss": 3.2566,
      "step": 5124
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5488531589508057,
      "learning_rate": 0.0005992678067077896,
      "loss": 3.4653,
      "step": 5125
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7115775346755981,
      "learning_rate": 0.000599267521062288,
      "loss": 3.3867,
      "step": 5126
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.663075566291809,
      "learning_rate": 0.0005992672353611471,
      "loss": 3.2992,
      "step": 5127
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.037008047103882,
      "learning_rate": 0.0005992669496043667,
      "loss": 3.1261,
      "step": 5128
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.898956060409546,
      "learning_rate": 0.0005992666637919469,
      "loss": 3.2915,
      "step": 5129
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4515032768249512,
      "learning_rate": 0.0005992663779238877,
      "loss": 3.2755,
      "step": 5130
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.497262954711914,
      "learning_rate": 0.0005992660920001894,
      "loss": 3.3632,
      "step": 5131
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5406938791275024,
      "learning_rate": 0.0005992658060208518,
      "loss": 3.3742,
      "step": 5132
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6217803955078125,
      "learning_rate": 0.000599265519985875,
      "loss": 3.397,
      "step": 5133
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.444238305091858,
      "learning_rate": 0.000599265233895259,
      "loss": 3.4854,
      "step": 5134
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5909523963928223,
      "learning_rate": 0.0005992649477490042,
      "loss": 3.3955,
      "step": 5135
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.37247896194458,
      "learning_rate": 0.0005992646615471102,
      "loss": 3.2002,
      "step": 5136
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.590891718864441,
      "learning_rate": 0.0005992643752895774,
      "loss": 3.2793,
      "step": 5137
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7921526432037354,
      "learning_rate": 0.0005992640889764056,
      "loss": 3.038,
      "step": 5138
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9851337671279907,
      "learning_rate": 0.000599263802607595,
      "loss": 3.2165,
      "step": 5139
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.5387539863586426,
      "learning_rate": 0.0005992635161831457,
      "loss": 3.2804,
      "step": 5140
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4438222646713257,
      "learning_rate": 0.0005992632297030574,
      "loss": 3.2633,
      "step": 5141
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.063640594482422,
      "learning_rate": 0.0005992629431673305,
      "loss": 3.3072,
      "step": 5142
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4370927810668945,
      "learning_rate": 0.0005992626565759651,
      "loss": 3.2755,
      "step": 5143
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0068275928497314,
      "learning_rate": 0.0005992623699289611,
      "loss": 3.3918,
      "step": 5144
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.550052285194397,
      "learning_rate": 0.0005992620832263186,
      "loss": 3.2826,
      "step": 5145
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.5664820671081543,
      "learning_rate": 0.0005992617964680375,
      "loss": 3.3884,
      "step": 5146
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5111300945281982,
      "learning_rate": 0.000599261509654118,
      "loss": 3.233,
      "step": 5147
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5917117595672607,
      "learning_rate": 0.0005992612227845602,
      "loss": 3.3218,
      "step": 5148
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1344449520111084,
      "learning_rate": 0.0005992609358593641,
      "loss": 3.4084,
      "step": 5149
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7271846532821655,
      "learning_rate": 0.0005992606488785297,
      "loss": 3.3859,
      "step": 5150
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.811652660369873,
      "learning_rate": 0.0005992603618420572,
      "loss": 3.2512,
      "step": 5151
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.889587640762329,
      "learning_rate": 0.0005992600747499464,
      "loss": 3.166,
      "step": 5152
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.022000789642334,
      "learning_rate": 0.0005992597876021976,
      "loss": 3.284,
      "step": 5153
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.5401833057403564,
      "learning_rate": 0.0005992595003988107,
      "loss": 3.1709,
      "step": 5154
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.04540753364563,
      "learning_rate": 0.0005992592131397859,
      "loss": 3.153,
      "step": 5155
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0006325244903564,
      "learning_rate": 0.000599258925825123,
      "loss": 3.3726,
      "step": 5156
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7910345792770386,
      "learning_rate": 0.0005992586384548224,
      "loss": 3.3186,
      "step": 5157
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.224165201187134,
      "learning_rate": 0.0005992583510288839,
      "loss": 3.646,
      "step": 5158
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6734795570373535,
      "learning_rate": 0.0005992580635473076,
      "loss": 3.4743,
      "step": 5159
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8807778358459473,
      "learning_rate": 0.0005992577760100935,
      "loss": 3.2586,
      "step": 5160
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3082003593444824,
      "learning_rate": 0.0005992574884172418,
      "loss": 3.463,
      "step": 5161
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4963335990905762,
      "learning_rate": 0.0005992572007687526,
      "loss": 3.3312,
      "step": 5162
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.780546188354492,
      "learning_rate": 0.0005992569130646257,
      "loss": 3.2641,
      "step": 5163
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7927377223968506,
      "learning_rate": 0.0005992566253048613,
      "loss": 3.4239,
      "step": 5164
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9422502517700195,
      "learning_rate": 0.0005992563374894595,
      "loss": 3.547,
      "step": 5165
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4263689517974854,
      "learning_rate": 0.0005992560496184202,
      "loss": 3.4065,
      "step": 5166
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2680670022964478,
      "learning_rate": 0.0005992557616917437,
      "loss": 3.5042,
      "step": 5167
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3994553089141846,
      "learning_rate": 0.0005992554737094298,
      "loss": 3.243,
      "step": 5168
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1113641262054443,
      "learning_rate": 0.0005992551856714788,
      "loss": 3.3991,
      "step": 5169
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.841978907585144,
      "learning_rate": 0.0005992548975778904,
      "loss": 3.4742,
      "step": 5170
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.6737589836120605,
      "learning_rate": 0.0005992546094286649,
      "loss": 3.1768,
      "step": 5171
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5986719131469727,
      "learning_rate": 0.0005992543212238025,
      "loss": 3.1894,
      "step": 5172
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.485889196395874,
      "learning_rate": 0.0005992540329633029,
      "loss": 3.3711,
      "step": 5173
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.87983238697052,
      "learning_rate": 0.0005992537446471663,
      "loss": 3.3341,
      "step": 5174
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.115807056427002,
      "learning_rate": 0.0005992534562753929,
      "loss": 3.1818,
      "step": 5175
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8628238439559937,
      "learning_rate": 0.0005992531678479826,
      "loss": 3.2741,
      "step": 5176
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.007276773452759,
      "learning_rate": 0.0005992528793649356,
      "loss": 3.2223,
      "step": 5177
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7686349153518677,
      "learning_rate": 0.0005992525908262516,
      "loss": 3.3966,
      "step": 5178
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8451998233795166,
      "learning_rate": 0.0005992523022319311,
      "loss": 3.4021,
      "step": 5179
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.659527540206909,
      "learning_rate": 0.0005992520135819737,
      "loss": 3.2338,
      "step": 5180
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8438563346862793,
      "learning_rate": 0.0005992517248763799,
      "loss": 3.506,
      "step": 5181
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3743462562561035,
      "learning_rate": 0.0005992514361151496,
      "loss": 3.3374,
      "step": 5182
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.284006118774414,
      "learning_rate": 0.0005992511472982827,
      "loss": 3.2028,
      "step": 5183
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.822617530822754,
      "learning_rate": 0.0005992508584257794,
      "loss": 3.6204,
      "step": 5184
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.798433542251587,
      "learning_rate": 0.0005992505694976398,
      "loss": 3.2035,
      "step": 5185
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.846307396888733,
      "learning_rate": 0.0005992502805138637,
      "loss": 3.346,
      "step": 5186
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9375953674316406,
      "learning_rate": 0.0005992499914744514,
      "loss": 3.2014,
      "step": 5187
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.2634429931640625,
      "learning_rate": 0.0005992497023794028,
      "loss": 3.1643,
      "step": 5188
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.13437557220459,
      "learning_rate": 0.0005992494132287182,
      "loss": 3.2772,
      "step": 5189
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5663998126983643,
      "learning_rate": 0.0005992491240223974,
      "loss": 3.3482,
      "step": 5190
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7231465578079224,
      "learning_rate": 0.0005992488347604406,
      "loss": 3.6256,
      "step": 5191
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.4684243202209473,
      "learning_rate": 0.0005992485454428477,
      "loss": 3.2631,
      "step": 5192
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7005033493041992,
      "learning_rate": 0.0005992482560696189,
      "loss": 3.4044,
      "step": 5193
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6480743885040283,
      "learning_rate": 0.0005992479666407542,
      "loss": 3.453,
      "step": 5194
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5531036853790283,
      "learning_rate": 0.0005992476771562536,
      "loss": 3.2907,
      "step": 5195
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7223188877105713,
      "learning_rate": 0.0005992473876161173,
      "loss": 3.3952,
      "step": 5196
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8438317775726318,
      "learning_rate": 0.0005992470980203451,
      "loss": 3.3315,
      "step": 5197
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.730527400970459,
      "learning_rate": 0.0005992468083689373,
      "loss": 3.1859,
      "step": 5198
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.238448143005371,
      "learning_rate": 0.0005992465186618939,
      "loss": 3.652,
      "step": 5199
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0329701900482178,
      "learning_rate": 0.000599246228899215,
      "loss": 3.3944,
      "step": 5200
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9569309949874878,
      "learning_rate": 0.0005992459390809005,
      "loss": 3.3385,
      "step": 5201
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.204981803894043,
      "learning_rate": 0.0005992456492069506,
      "loss": 3.3432,
      "step": 5202
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.2210588455200195,
      "learning_rate": 0.0005992453592773651,
      "loss": 3.3864,
      "step": 5203
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4492506980895996,
      "learning_rate": 0.0005992450692921444,
      "loss": 3.2729,
      "step": 5204
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1779284477233887,
      "learning_rate": 0.0005992447792512882,
      "loss": 3.4324,
      "step": 5205
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.031473398208618,
      "learning_rate": 0.000599244489154797,
      "loss": 2.9676,
      "step": 5206
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4012292623519897,
      "learning_rate": 0.0005992441990026704,
      "loss": 3.352,
      "step": 5207
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.4400012493133545,
      "learning_rate": 0.0005992439087949087,
      "loss": 3.498,
      "step": 5208
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.95061993598938,
      "learning_rate": 0.0005992436185315121,
      "loss": 3.3423,
      "step": 5209
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6210460662841797,
      "learning_rate": 0.0005992433282124803,
      "loss": 3.2286,
      "step": 5210
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.6118948459625244,
      "learning_rate": 0.0005992430378378135,
      "loss": 3.6416,
      "step": 5211
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.721632957458496,
      "learning_rate": 0.0005992427474075118,
      "loss": 3.2082,
      "step": 5212
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3818583488464355,
      "learning_rate": 0.0005992424569215752,
      "loss": 3.625,
      "step": 5213
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1377601623535156,
      "learning_rate": 0.0005992421663800038,
      "loss": 3.3404,
      "step": 5214
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8499596118927002,
      "learning_rate": 0.0005992418757827977,
      "loss": 3.3771,
      "step": 5215
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7978826761245728,
      "learning_rate": 0.0005992415851299568,
      "loss": 3.3392,
      "step": 5216
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4532221555709839,
      "learning_rate": 0.0005992412944214813,
      "loss": 3.442,
      "step": 5217
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.056349277496338,
      "learning_rate": 0.0005992410036573711,
      "loss": 3.1241,
      "step": 5218
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.9574782848358154,
      "learning_rate": 0.0005992407128376264,
      "loss": 3.27,
      "step": 5219
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7885065078735352,
      "learning_rate": 0.0005992404219622472,
      "loss": 3.4533,
      "step": 5220
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1042728424072266,
      "learning_rate": 0.0005992401310312335,
      "loss": 3.4597,
      "step": 5221
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7356560230255127,
      "learning_rate": 0.0005992398400445855,
      "loss": 3.3747,
      "step": 5222
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.2401134967803955,
      "learning_rate": 0.0005992395490023033,
      "loss": 3.3922,
      "step": 5223
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0173182487487793,
      "learning_rate": 0.0005992392579043865,
      "loss": 3.2072,
      "step": 5224
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.070683002471924,
      "learning_rate": 0.0005992389667508356,
      "loss": 3.0185,
      "step": 5225
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5036875009536743,
      "learning_rate": 0.0005992386755416506,
      "loss": 3.4857,
      "step": 5226
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.191143274307251,
      "learning_rate": 0.0005992383842768315,
      "loss": 3.4383,
      "step": 5227
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0476672649383545,
      "learning_rate": 0.0005992380929563783,
      "loss": 3.1406,
      "step": 5228
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9006617069244385,
      "learning_rate": 0.000599237801580291,
      "loss": 3.3827,
      "step": 5229
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.966235876083374,
      "learning_rate": 0.0005992375101485698,
      "loss": 3.3007,
      "step": 5230
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.6727967262268066,
      "learning_rate": 0.0005992372186612148,
      "loss": 3.2519,
      "step": 5231
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.8404934406280518,
      "learning_rate": 0.0005992369271182258,
      "loss": 3.4239,
      "step": 5232
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6255134344100952,
      "learning_rate": 0.000599236635519603,
      "loss": 3.2225,
      "step": 5233
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.2575173377990723,
      "learning_rate": 0.0005992363438653466,
      "loss": 3.3126,
      "step": 5234
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3566417694091797,
      "learning_rate": 0.0005992360521554564,
      "loss": 3.0631,
      "step": 5235
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.297778606414795,
      "learning_rate": 0.0005992357603899327,
      "loss": 3.5365,
      "step": 5236
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8327876329421997,
      "learning_rate": 0.0005992354685687754,
      "loss": 3.3338,
      "step": 5237
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.879860758781433,
      "learning_rate": 0.0005992351766919845,
      "loss": 3.5563,
      "step": 5238
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4950283765792847,
      "learning_rate": 0.0005992348847595601,
      "loss": 3.2282,
      "step": 5239
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5543774366378784,
      "learning_rate": 0.0005992345927715024,
      "loss": 3.3254,
      "step": 5240
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7880513668060303,
      "learning_rate": 0.0005992343007278113,
      "loss": 3.599,
      "step": 5241
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.06101393699646,
      "learning_rate": 0.0005992340086284868,
      "loss": 3.402,
      "step": 5242
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6128512620925903,
      "learning_rate": 0.0005992337164735292,
      "loss": 3.2026,
      "step": 5243
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.524742603302002,
      "learning_rate": 0.0005992334242629383,
      "loss": 3.3948,
      "step": 5244
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.037315607070923,
      "learning_rate": 0.0005992331319967142,
      "loss": 3.4192,
      "step": 5245
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6065741777420044,
      "learning_rate": 0.0005992328396748572,
      "loss": 3.0776,
      "step": 5246
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7645632028579712,
      "learning_rate": 0.000599232547297367,
      "loss": 3.4172,
      "step": 5247
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4549129009246826,
      "learning_rate": 0.0005992322548642439,
      "loss": 3.238,
      "step": 5248
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.581616759300232,
      "learning_rate": 0.0005992319623754878,
      "loss": 3.1662,
      "step": 5249
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.2876365184783936,
      "learning_rate": 0.000599231669831099,
      "loss": 3.3535,
      "step": 5250
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.759359359741211,
      "learning_rate": 0.0005992313772310772,
      "loss": 3.2158,
      "step": 5251
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4935791492462158,
      "learning_rate": 0.0005992310845754228,
      "loss": 3.3793,
      "step": 5252
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4519917964935303,
      "learning_rate": 0.0005992307918641355,
      "loss": 3.0447,
      "step": 5253
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.6358017921447754,
      "learning_rate": 0.0005992304990972157,
      "loss": 3.3627,
      "step": 5254
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6355634927749634,
      "learning_rate": 0.0005992302062746632,
      "loss": 3.2914,
      "step": 5255
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5238487720489502,
      "learning_rate": 0.0005992299133964783,
      "loss": 3.4018,
      "step": 5256
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.922092914581299,
      "learning_rate": 0.0005992296204626608,
      "loss": 3.139,
      "step": 5257
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9575984477996826,
      "learning_rate": 0.0005992293274732109,
      "loss": 3.2402,
      "step": 5258
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9518142938613892,
      "learning_rate": 0.0005992290344281286,
      "loss": 3.5063,
      "step": 5259
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.739520311355591,
      "learning_rate": 0.0005992287413274139,
      "loss": 3.2841,
      "step": 5260
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7574756145477295,
      "learning_rate": 0.000599228448171067,
      "loss": 3.3019,
      "step": 5261
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.250666618347168,
      "learning_rate": 0.0005992281549590879,
      "loss": 3.4581,
      "step": 5262
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3216028213500977,
      "learning_rate": 0.0005992278616914767,
      "loss": 3.4722,
      "step": 5263
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6286343336105347,
      "learning_rate": 0.0005992275683682333,
      "loss": 3.327,
      "step": 5264
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.617769479751587,
      "learning_rate": 0.0005992272749893579,
      "loss": 3.3957,
      "step": 5265
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9428596496582031,
      "learning_rate": 0.0005992269815548504,
      "loss": 3.1908,
      "step": 5266
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6491570472717285,
      "learning_rate": 0.000599226688064711,
      "loss": 3.3227,
      "step": 5267
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0027406215667725,
      "learning_rate": 0.0005992263945189398,
      "loss": 3.3408,
      "step": 5268
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.2573513984680176,
      "learning_rate": 0.0005992261009175367,
      "loss": 3.3278,
      "step": 5269
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.694169521331787,
      "learning_rate": 0.0005992258072605018,
      "loss": 3.2005,
      "step": 5270
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9257400035858154,
      "learning_rate": 0.0005992255135478352,
      "loss": 3.0057,
      "step": 5271
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.298384666442871,
      "learning_rate": 0.0005992252197795369,
      "loss": 3.331,
      "step": 5272
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.823022484779358,
      "learning_rate": 0.0005992249259556071,
      "loss": 3.2601,
      "step": 5273
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.321643352508545,
      "learning_rate": 0.0005992246320760456,
      "loss": 3.1276,
      "step": 5274
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5284016132354736,
      "learning_rate": 0.0005992243381408525,
      "loss": 3.2907,
      "step": 5275
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5023711919784546,
      "learning_rate": 0.0005992240441500282,
      "loss": 3.0902,
      "step": 5276
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3274481296539307,
      "learning_rate": 0.0005992237501035724,
      "loss": 3.4464,
      "step": 5277
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.875846266746521,
      "learning_rate": 0.0005992234560014852,
      "loss": 3.4299,
      "step": 5278
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5844494104385376,
      "learning_rate": 0.0005992231618437668,
      "loss": 3.1728,
      "step": 5279
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8619465827941895,
      "learning_rate": 0.0005992228676304171,
      "loss": 3.1142,
      "step": 5280
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2869890928268433,
      "learning_rate": 0.0005992225733614363,
      "loss": 3.4329,
      "step": 5281
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7750139236450195,
      "learning_rate": 0.0005992222790368242,
      "loss": 3.3004,
      "step": 5282
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6264430284500122,
      "learning_rate": 0.0005992219846565812,
      "loss": 3.3189,
      "step": 5283
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5787608623504639,
      "learning_rate": 0.0005992216902207071,
      "loss": 3.4527,
      "step": 5284
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7911005020141602,
      "learning_rate": 0.0005992213957292022,
      "loss": 3.5466,
      "step": 5285
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5963294506072998,
      "learning_rate": 0.0005992211011820661,
      "loss": 3.1918,
      "step": 5286
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9288655519485474,
      "learning_rate": 0.0005992208065792994,
      "loss": 3.3892,
      "step": 5287
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.416235089302063,
      "learning_rate": 0.0005992205119209019,
      "loss": 3.3673,
      "step": 5288
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.549776554107666,
      "learning_rate": 0.0005992202172068735,
      "loss": 3.527,
      "step": 5289
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8600274324417114,
      "learning_rate": 0.0005992199224372145,
      "loss": 3.1551,
      "step": 5290
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8782936334609985,
      "learning_rate": 0.0005992196276119249,
      "loss": 3.226,
      "step": 5291
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0188069343566895,
      "learning_rate": 0.0005992193327310047,
      "loss": 3.4416,
      "step": 5292
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.167863368988037,
      "learning_rate": 0.0005992190377944539,
      "loss": 3.1996,
      "step": 5293
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.643919825553894,
      "learning_rate": 0.0005992187428022726,
      "loss": 3.351,
      "step": 5294
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6390372514724731,
      "learning_rate": 0.000599218447754461,
      "loss": 3.4122,
      "step": 5295
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0952060222625732,
      "learning_rate": 0.0005992181526510191,
      "loss": 3.2909,
      "step": 5296
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4160261154174805,
      "learning_rate": 0.0005992178574919468,
      "loss": 3.4125,
      "step": 5297
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4945001602172852,
      "learning_rate": 0.0005992175622772443,
      "loss": 3.2755,
      "step": 5298
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8447239398956299,
      "learning_rate": 0.0005992172670069116,
      "loss": 3.3037,
      "step": 5299
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7176728248596191,
      "learning_rate": 0.0005992169716809487,
      "loss": 3.4525,
      "step": 5300
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5277981758117676,
      "learning_rate": 0.0005992166762993557,
      "loss": 3.1436,
      "step": 5301
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4840010404586792,
      "learning_rate": 0.0005992163808621328,
      "loss": 3.3515,
      "step": 5302
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.796047568321228,
      "learning_rate": 0.0005992160853692798,
      "loss": 3.2562,
      "step": 5303
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7216840982437134,
      "learning_rate": 0.0005992157898207969,
      "loss": 3.5996,
      "step": 5304
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4758862257003784,
      "learning_rate": 0.0005992154942166842,
      "loss": 3.2798,
      "step": 5305
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9290103912353516,
      "learning_rate": 0.0005992151985569417,
      "loss": 3.1877,
      "step": 5306
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1360204219818115,
      "learning_rate": 0.0005992149028415693,
      "loss": 3.3188,
      "step": 5307
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.2073535919189453,
      "learning_rate": 0.0005992146070705674,
      "loss": 3.3777,
      "step": 5308
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6478891372680664,
      "learning_rate": 0.0005992143112439357,
      "loss": 3.5209,
      "step": 5309
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5160163640975952,
      "learning_rate": 0.0005992140153616746,
      "loss": 3.1823,
      "step": 5310
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1452889442443848,
      "learning_rate": 0.0005992137194237838,
      "loss": 3.154,
      "step": 5311
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7905107736587524,
      "learning_rate": 0.0005992134234302635,
      "loss": 3.3607,
      "step": 5312
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7641079425811768,
      "learning_rate": 0.0005992131273811139,
      "loss": 3.1918,
      "step": 5313
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9051645994186401,
      "learning_rate": 0.0005992128312763349,
      "loss": 3.2502,
      "step": 5314
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9432083368301392,
      "learning_rate": 0.0005992125351159265,
      "loss": 3.2907,
      "step": 5315
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5055662393569946,
      "learning_rate": 0.0005992122388998889,
      "loss": 3.0103,
      "step": 5316
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9064642190933228,
      "learning_rate": 0.0005992119426282221,
      "loss": 3.0404,
      "step": 5317
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6016173362731934,
      "learning_rate": 0.0005992116463009262,
      "loss": 3.2035,
      "step": 5318
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5527799129486084,
      "learning_rate": 0.0005992113499180011,
      "loss": 3.3698,
      "step": 5319
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4647637605667114,
      "learning_rate": 0.0005992110534794471,
      "loss": 3.3987,
      "step": 5320
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4874635934829712,
      "learning_rate": 0.000599210756985264,
      "loss": 3.1551,
      "step": 5321
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6011518239974976,
      "learning_rate": 0.000599210460435452,
      "loss": 3.2732,
      "step": 5322
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.732267141342163,
      "learning_rate": 0.0005992101638300112,
      "loss": 3.3016,
      "step": 5323
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3835524320602417,
      "learning_rate": 0.0005992098671689414,
      "loss": 3.3264,
      "step": 5324
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3421515226364136,
      "learning_rate": 0.000599209570452243,
      "loss": 3.2966,
      "step": 5325
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0001354217529297,
      "learning_rate": 0.0005992092736799159,
      "loss": 3.3992,
      "step": 5326
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6290818452835083,
      "learning_rate": 0.00059920897685196,
      "loss": 3.3222,
      "step": 5327
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8977996110916138,
      "learning_rate": 0.0005992086799683757,
      "loss": 3.5035,
      "step": 5328
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5150259733200073,
      "learning_rate": 0.0005992083830291627,
      "loss": 3.1582,
      "step": 5329
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5636913776397705,
      "learning_rate": 0.0005992080860343213,
      "loss": 3.3184,
      "step": 5330
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.84853196144104,
      "learning_rate": 0.0005992077889838515,
      "loss": 3.1313,
      "step": 5331
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4924592971801758,
      "learning_rate": 0.0005992074918777533,
      "loss": 3.3676,
      "step": 5332
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.7965750694274902,
      "learning_rate": 0.0005992071947160268,
      "loss": 3.5045,
      "step": 5333
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5305124521255493,
      "learning_rate": 0.0005992068974986719,
      "loss": 3.5731,
      "step": 5334
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5153131484985352,
      "learning_rate": 0.0005992066002256888,
      "loss": 3.2532,
      "step": 5335
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.2922747135162354,
      "learning_rate": 0.0005992063028970777,
      "loss": 3.4424,
      "step": 5336
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3934314250946045,
      "learning_rate": 0.0005992060055128384,
      "loss": 3.6702,
      "step": 5337
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5672446489334106,
      "learning_rate": 0.000599205708072971,
      "loss": 3.3395,
      "step": 5338
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.201730489730835,
      "learning_rate": 0.0005992054105774757,
      "loss": 3.5865,
      "step": 5339
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.794203281402588,
      "learning_rate": 0.0005992051130263525,
      "loss": 3.3753,
      "step": 5340
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6350452899932861,
      "learning_rate": 0.0005992048154196014,
      "loss": 3.2452,
      "step": 5341
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.744500994682312,
      "learning_rate": 0.0005992045177572224,
      "loss": 3.1287,
      "step": 5342
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8163288831710815,
      "learning_rate": 0.0005992042200392157,
      "loss": 3.2336,
      "step": 5343
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1989734172821045,
      "learning_rate": 0.0005992039222655813,
      "loss": 3.2713,
      "step": 5344
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.816095232963562,
      "learning_rate": 0.0005992036244363191,
      "loss": 3.1428,
      "step": 5345
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6886004209518433,
      "learning_rate": 0.0005992033265514294,
      "loss": 3.3838,
      "step": 5346
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6401535272598267,
      "learning_rate": 0.0005992030286109122,
      "loss": 3.552,
      "step": 5347
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3003777265548706,
      "learning_rate": 0.0005992027306147675,
      "loss": 3.2889,
      "step": 5348
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7118878364562988,
      "learning_rate": 0.0005992024325629954,
      "loss": 3.286,
      "step": 5349
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.526995897293091,
      "learning_rate": 0.0005992021344555958,
      "loss": 3.4393,
      "step": 5350
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8602882623672485,
      "learning_rate": 0.000599201836292569,
      "loss": 3.5707,
      "step": 5351
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.617540121078491,
      "learning_rate": 0.0005992015380739148,
      "loss": 3.2501,
      "step": 5352
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.6611104011535645,
      "learning_rate": 0.0005992012397996334,
      "loss": 3.4124,
      "step": 5353
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.311267137527466,
      "learning_rate": 0.0005992009414697249,
      "loss": 3.1766,
      "step": 5354
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0663108825683594,
      "learning_rate": 0.0005992006430841892,
      "loss": 3.1817,
      "step": 5355
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.7697055339813232,
      "learning_rate": 0.0005992003446430267,
      "loss": 3.4799,
      "step": 5356
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8962173461914062,
      "learning_rate": 0.000599200046146237,
      "loss": 3.5299,
      "step": 5357
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8917752504348755,
      "learning_rate": 0.0005991997475938204,
      "loss": 3.1173,
      "step": 5358
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7565739154815674,
      "learning_rate": 0.000599199448985777,
      "loss": 3.1305,
      "step": 5359
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7300728559494019,
      "learning_rate": 0.0005991991503221067,
      "loss": 3.2726,
      "step": 5360
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.864200234413147,
      "learning_rate": 0.0005991988516028095,
      "loss": 2.9434,
      "step": 5361
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5805153846740723,
      "learning_rate": 0.0005991985528278858,
      "loss": 3.0611,
      "step": 5362
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.459877610206604,
      "learning_rate": 0.0005991982539973353,
      "loss": 3.4842,
      "step": 5363
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7770397663116455,
      "learning_rate": 0.0005991979551111583,
      "loss": 3.2676,
      "step": 5364
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7247799634933472,
      "learning_rate": 0.0005991976561693547,
      "loss": 3.5313,
      "step": 5365
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1897220611572266,
      "learning_rate": 0.0005991973571719246,
      "loss": 3.3836,
      "step": 5366
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6732542514801025,
      "learning_rate": 0.0005991970581188681,
      "loss": 3.6175,
      "step": 5367
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9940484762191772,
      "learning_rate": 0.0005991967590101853,
      "loss": 3.4828,
      "step": 5368
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8934249877929688,
      "learning_rate": 0.0005991964598458761,
      "loss": 3.5251,
      "step": 5369
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0204567909240723,
      "learning_rate": 0.0005991961606259406,
      "loss": 3.3575,
      "step": 5370
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.947702407836914,
      "learning_rate": 0.0005991958613503788,
      "loss": 3.1189,
      "step": 5371
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.2519187927246094,
      "learning_rate": 0.0005991955620191909,
      "loss": 3.3265,
      "step": 5372
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5899157524108887,
      "learning_rate": 0.000599195262632377,
      "loss": 3.1453,
      "step": 5373
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7082016468048096,
      "learning_rate": 0.0005991949631899369,
      "loss": 3.3931,
      "step": 5374
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0922820568084717,
      "learning_rate": 0.0005991946636918709,
      "loss": 3.3268,
      "step": 5375
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.485802173614502,
      "learning_rate": 0.000599194364138179,
      "loss": 3.2724,
      "step": 5376
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7622199058532715,
      "learning_rate": 0.0005991940645288611,
      "loss": 3.1265,
      "step": 5377
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.810718536376953,
      "learning_rate": 0.0005991937648639176,
      "loss": 3.4061,
      "step": 5378
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6876169443130493,
      "learning_rate": 0.0005991934651433481,
      "loss": 3.5801,
      "step": 5379
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.441377878189087,
      "learning_rate": 0.0005991931653671531,
      "loss": 3.3315,
      "step": 5380
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.525209903717041,
      "learning_rate": 0.0005991928655353322,
      "loss": 3.3424,
      "step": 5381
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.532414674758911,
      "learning_rate": 0.0005991925656478859,
      "loss": 3.2329,
      "step": 5382
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6633954048156738,
      "learning_rate": 0.0005991922657048139,
      "loss": 3.4589,
      "step": 5383
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9566413164138794,
      "learning_rate": 0.0005991919657061164,
      "loss": 3.2448,
      "step": 5384
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9251174926757812,
      "learning_rate": 0.0005991916656517936,
      "loss": 3.4578,
      "step": 5385
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7412769794464111,
      "learning_rate": 0.0005991913655418454,
      "loss": 3.3211,
      "step": 5386
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7042726278305054,
      "learning_rate": 0.0005991910653762719,
      "loss": 3.3566,
      "step": 5387
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5529415607452393,
      "learning_rate": 0.000599190765155073,
      "loss": 3.2895,
      "step": 5388
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6942800283432007,
      "learning_rate": 0.000599190464878249,
      "loss": 3.1202,
      "step": 5389
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.058432102203369,
      "learning_rate": 0.0005991901645457997,
      "loss": 3.2505,
      "step": 5390
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.147695541381836,
      "learning_rate": 0.0005991898641577255,
      "loss": 3.3666,
      "step": 5391
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.95732045173645,
      "learning_rate": 0.0005991895637140262,
      "loss": 3.1277,
      "step": 5392
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8551446199417114,
      "learning_rate": 0.0005991892632147019,
      "loss": 3.2319,
      "step": 5393
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0800280570983887,
      "learning_rate": 0.0005991889626597525,
      "loss": 3.3416,
      "step": 5394
    },
    {
      "epoch": 0.07,
      "grad_norm": 4.516450881958008,
      "learning_rate": 0.0005991886620491785,
      "loss": 3.2476,
      "step": 5395
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.846334934234619,
      "learning_rate": 0.0005991883613829795,
      "loss": 3.4816,
      "step": 5396
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.973711371421814,
      "learning_rate": 0.0005991880606611557,
      "loss": 3.4137,
      "step": 5397
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.7551655769348145,
      "learning_rate": 0.0005991877598837073,
      "loss": 3.2454,
      "step": 5398
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.360135793685913,
      "learning_rate": 0.0005991874590506343,
      "loss": 3.1131,
      "step": 5399
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.879185438156128,
      "learning_rate": 0.0005991871581619366,
      "loss": 3.505,
      "step": 5400
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6165037155151367,
      "learning_rate": 0.0005991868572176144,
      "loss": 3.3144,
      "step": 5401
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.9800031185150146,
      "learning_rate": 0.0005991865562176678,
      "loss": 3.1268,
      "step": 5402
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.55924129486084,
      "learning_rate": 0.0005991862551620966,
      "loss": 3.3041,
      "step": 5403
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1365513801574707,
      "learning_rate": 0.0005991859540509011,
      "loss": 3.3185,
      "step": 5404
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.41435706615448,
      "learning_rate": 0.0005991856528840812,
      "loss": 3.1191,
      "step": 5405
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.6912994384765625,
      "learning_rate": 0.0005991853516616372,
      "loss": 3.2777,
      "step": 5406
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.4632761478424072,
      "learning_rate": 0.000599185050383569,
      "loss": 3.0981,
      "step": 5407
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2086323499679565,
      "learning_rate": 0.0005991847490498765,
      "loss": 3.4518,
      "step": 5408
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.6725635528564453,
      "learning_rate": 0.00059918444766056,
      "loss": 3.2844,
      "step": 5409
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8731516599655151,
      "learning_rate": 0.0005991841462156194,
      "loss": 3.508,
      "step": 5410
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6594668626785278,
      "learning_rate": 0.0005991838447150549,
      "loss": 2.9754,
      "step": 5411
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.8553762435913086,
      "learning_rate": 0.0005991835431588665,
      "loss": 3.0985,
      "step": 5412
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1111440658569336,
      "learning_rate": 0.0005991832415470542,
      "loss": 3.2219,
      "step": 5413
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3738367557525635,
      "learning_rate": 0.000599182939879618,
      "loss": 3.4079,
      "step": 5414
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.691853642463684,
      "learning_rate": 0.0005991826381565582,
      "loss": 3.271,
      "step": 5415
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.2840850353240967,
      "learning_rate": 0.0005991823363778745,
      "loss": 3.3723,
      "step": 5416
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.6283905506134033,
      "learning_rate": 0.0005991820345435673,
      "loss": 3.1111,
      "step": 5417
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6758995056152344,
      "learning_rate": 0.0005991817326536365,
      "loss": 3.1491,
      "step": 5418
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3262550830841064,
      "learning_rate": 0.0005991814307080821,
      "loss": 3.3995,
      "step": 5419
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.46459698677063,
      "learning_rate": 0.0005991811287069042,
      "loss": 3.3682,
      "step": 5420
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.969218373298645,
      "learning_rate": 0.000599180826650103,
      "loss": 3.264,
      "step": 5421
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0535824298858643,
      "learning_rate": 0.0005991805245376784,
      "loss": 3.3122,
      "step": 5422
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.960747718811035,
      "learning_rate": 0.0005991802223696305,
      "loss": 3.5729,
      "step": 5423
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3654751777648926,
      "learning_rate": 0.0005991799201459593,
      "loss": 3.1967,
      "step": 5424
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4752846956253052,
      "learning_rate": 0.000599179617866665,
      "loss": 3.3105,
      "step": 5425
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.039332628250122,
      "learning_rate": 0.0005991793155317475,
      "loss": 3.1083,
      "step": 5426
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.2115211486816406,
      "learning_rate": 0.0005991790131412068,
      "loss": 3.3485,
      "step": 5427
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.744429111480713,
      "learning_rate": 0.0005991787106950432,
      "loss": 3.394,
      "step": 5428
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7682687044143677,
      "learning_rate": 0.0005991784081932567,
      "loss": 3.2951,
      "step": 5429
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.5215916633605957,
      "learning_rate": 0.0005991781056358472,
      "loss": 3.5205,
      "step": 5430
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.368436098098755,
      "learning_rate": 0.0005991778030228148,
      "loss": 3.2278,
      "step": 5431
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.775315523147583,
      "learning_rate": 0.0005991775003541597,
      "loss": 3.3182,
      "step": 5432
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9699151515960693,
      "learning_rate": 0.0005991771976298818,
      "loss": 3.262,
      "step": 5433
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5908583402633667,
      "learning_rate": 0.0005991768948499812,
      "loss": 3.2106,
      "step": 5434
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0967295169830322,
      "learning_rate": 0.000599176592014458,
      "loss": 3.1398,
      "step": 5435
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7580626010894775,
      "learning_rate": 0.0005991762891233121,
      "loss": 3.0392,
      "step": 5436
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.8574085235595703,
      "learning_rate": 0.0005991759861765438,
      "loss": 3.2064,
      "step": 5437
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8963454961776733,
      "learning_rate": 0.0005991756831741531,
      "loss": 3.5184,
      "step": 5438
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.676352858543396,
      "learning_rate": 0.0005991753801161399,
      "loss": 3.3804,
      "step": 5439
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8353497982025146,
      "learning_rate": 0.0005991750770025043,
      "loss": 3.2555,
      "step": 5440
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.633647918701172,
      "learning_rate": 0.0005991747738332465,
      "loss": 3.4732,
      "step": 5441
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8699493408203125,
      "learning_rate": 0.0005991744706083664,
      "loss": 3.2769,
      "step": 5442
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5148160457611084,
      "learning_rate": 0.0005991741673278642,
      "loss": 3.3291,
      "step": 5443
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8967013359069824,
      "learning_rate": 0.0005991738639917399,
      "loss": 3.417,
      "step": 5444
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.4579241275787354,
      "learning_rate": 0.0005991735605999933,
      "loss": 3.2738,
      "step": 5445
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8286786079406738,
      "learning_rate": 0.0005991732571526249,
      "loss": 3.0437,
      "step": 5446
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.036888360977173,
      "learning_rate": 0.0005991729536496344,
      "loss": 3.1447,
      "step": 5447
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.268156051635742,
      "learning_rate": 0.0005991726500910222,
      "loss": 3.3033,
      "step": 5448
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7607544660568237,
      "learning_rate": 0.000599172346476788,
      "loss": 3.1723,
      "step": 5449
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5658856630325317,
      "learning_rate": 0.0005991720428069321,
      "loss": 3.6489,
      "step": 5450
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5130301713943481,
      "learning_rate": 0.0005991717390814544,
      "loss": 3.2464,
      "step": 5451
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8080730438232422,
      "learning_rate": 0.000599171435300355,
      "loss": 3.4725,
      "step": 5452
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.088508367538452,
      "learning_rate": 0.000599171131463634,
      "loss": 3.3345,
      "step": 5453
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5072144269943237,
      "learning_rate": 0.0005991708275712915,
      "loss": 3.2192,
      "step": 5454
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9500466585159302,
      "learning_rate": 0.0005991705236233276,
      "loss": 3.2528,
      "step": 5455
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3111493587493896,
      "learning_rate": 0.0005991702196197421,
      "loss": 3.2139,
      "step": 5456
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5828535556793213,
      "learning_rate": 0.0005991699155605352,
      "loss": 3.1486,
      "step": 5457
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.559265613555908,
      "learning_rate": 0.000599169611445707,
      "loss": 3.2042,
      "step": 5458
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6724722385406494,
      "learning_rate": 0.0005991693072752575,
      "loss": 3.1218,
      "step": 5459
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5947778224945068,
      "learning_rate": 0.0005991690030491869,
      "loss": 3.4007,
      "step": 5460
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.4162521362304688,
      "learning_rate": 0.000599168698767495,
      "loss": 3.3197,
      "step": 5461
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.785935163497925,
      "learning_rate": 0.0005991683944301821,
      "loss": 3.109,
      "step": 5462
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6190502643585205,
      "learning_rate": 0.000599168090037248,
      "loss": 3.2202,
      "step": 5463
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3450162410736084,
      "learning_rate": 0.000599167785588693,
      "loss": 3.133,
      "step": 5464
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3111772537231445,
      "learning_rate": 0.000599167481084517,
      "loss": 2.8724,
      "step": 5465
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9580926895141602,
      "learning_rate": 0.0005991671765247201,
      "loss": 3.5003,
      "step": 5466
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.132521390914917,
      "learning_rate": 0.0005991668719093025,
      "loss": 3.2442,
      "step": 5467
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.362515449523926,
      "learning_rate": 0.000599166567238264,
      "loss": 3.1817,
      "step": 5468
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.792466878890991,
      "learning_rate": 0.0005991662625116049,
      "loss": 3.1548,
      "step": 5469
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4088085889816284,
      "learning_rate": 0.0005991659577293251,
      "loss": 3.4427,
      "step": 5470
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.54121470451355,
      "learning_rate": 0.0005991656528914248,
      "loss": 3.1522,
      "step": 5471
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.992490768432617,
      "learning_rate": 0.0005991653479979037,
      "loss": 3.2681,
      "step": 5472
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.771422028541565,
      "learning_rate": 0.0005991650430487624,
      "loss": 3.2108,
      "step": 5473
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6499406099319458,
      "learning_rate": 0.0005991647380440005,
      "loss": 3.4908,
      "step": 5474
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.9992072582244873,
      "learning_rate": 0.0005991644329836183,
      "loss": 3.4439,
      "step": 5475
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.6227004528045654,
      "learning_rate": 0.0005991641278676158,
      "loss": 3.087,
      "step": 5476
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.678288221359253,
      "learning_rate": 0.0005991638226959929,
      "loss": 3.0776,
      "step": 5477
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0691077709198,
      "learning_rate": 0.0005991635174687499,
      "loss": 3.255,
      "step": 5478
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3656575679779053,
      "learning_rate": 0.0005991632121858868,
      "loss": 3.2144,
      "step": 5479
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.101686716079712,
      "learning_rate": 0.0005991629068474036,
      "loss": 3.1825,
      "step": 5480
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.889060139656067,
      "learning_rate": 0.0005991626014533002,
      "loss": 3.3282,
      "step": 5481
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0253219604492188,
      "learning_rate": 0.000599162296003577,
      "loss": 3.514,
      "step": 5482
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9506899118423462,
      "learning_rate": 0.0005991619904982339,
      "loss": 3.4481,
      "step": 5483
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5350501537322998,
      "learning_rate": 0.0005991616849372708,
      "loss": 3.1409,
      "step": 5484
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0854530334472656,
      "learning_rate": 0.000599161379320688,
      "loss": 3.1373,
      "step": 5485
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.8431427478790283,
      "learning_rate": 0.0005991610736484855,
      "loss": 3.2161,
      "step": 5486
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.249498128890991,
      "learning_rate": 0.0005991607679206632,
      "loss": 3.4313,
      "step": 5487
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4019957780838013,
      "learning_rate": 0.0005991604621372213,
      "loss": 3.4918,
      "step": 5488
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8873505592346191,
      "learning_rate": 0.0005991601562981598,
      "loss": 3.5717,
      "step": 5489
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.4845566749572754,
      "learning_rate": 0.0005991598504034789,
      "loss": 3.0217,
      "step": 5490
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3433122634887695,
      "learning_rate": 0.0005991595444531783,
      "loss": 3.494,
      "step": 5491
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.751561164855957,
      "learning_rate": 0.0005991592384472585,
      "loss": 3.237,
      "step": 5492
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.5643043518066406,
      "learning_rate": 0.0005991589323857193,
      "loss": 3.4381,
      "step": 5493
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6871696710586548,
      "learning_rate": 0.0005991586262685608,
      "loss": 3.3256,
      "step": 5494
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.299734115600586,
      "learning_rate": 0.000599158320095783,
      "loss": 3.1448,
      "step": 5495
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6356573104858398,
      "learning_rate": 0.0005991580138673861,
      "loss": 3.3626,
      "step": 5496
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1342053413391113,
      "learning_rate": 0.00059915770758337,
      "loss": 3.3609,
      "step": 5497
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.727964997291565,
      "learning_rate": 0.000599157401243735,
      "loss": 3.4553,
      "step": 5498
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.154452085494995,
      "learning_rate": 0.0005991570948484809,
      "loss": 3.1249,
      "step": 5499
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8024145364761353,
      "learning_rate": 0.0005991567883976077,
      "loss": 3.2477,
      "step": 5500
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5924134254455566,
      "learning_rate": 0.0005991564818911159,
      "loss": 3.6001,
      "step": 5501
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6016169786453247,
      "learning_rate": 0.0005991561753290051,
      "loss": 3.3199,
      "step": 5502
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6794886589050293,
      "learning_rate": 0.0005991558687112754,
      "loss": 3.4748,
      "step": 5503
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5892906188964844,
      "learning_rate": 0.0005991555620379271,
      "loss": 3.273,
      "step": 5504
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.707746148109436,
      "learning_rate": 0.0005991552553089601,
      "loss": 3.244,
      "step": 5505
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4985471963882446,
      "learning_rate": 0.0005991549485243746,
      "loss": 3.1839,
      "step": 5506
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4556580781936646,
      "learning_rate": 0.0005991546416841705,
      "loss": 3.2417,
      "step": 5507
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6709463596343994,
      "learning_rate": 0.0005991543347883478,
      "loss": 3.6331,
      "step": 5508
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6694574356079102,
      "learning_rate": 0.0005991540278369068,
      "loss": 3.316,
      "step": 5509
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8236308097839355,
      "learning_rate": 0.0005991537208298474,
      "loss": 3.642,
      "step": 5510
    },
    {
      "epoch": 0.07,
      "grad_norm": 4.151618957519531,
      "learning_rate": 0.0005991534137671696,
      "loss": 3.2542,
      "step": 5511
    },
    {
      "epoch": 0.07,
      "grad_norm": 4.032834053039551,
      "learning_rate": 0.0005991531066488735,
      "loss": 3.1763,
      "step": 5512
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.71119225025177,
      "learning_rate": 0.0005991527994749593,
      "loss": 3.2747,
      "step": 5513
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0556344985961914,
      "learning_rate": 0.000599152492245427,
      "loss": 3.1205,
      "step": 5514
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.7522072792053223,
      "learning_rate": 0.0005991521849602764,
      "loss": 3.5703,
      "step": 5515
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8701293468475342,
      "learning_rate": 0.0005991518776195078,
      "loss": 3.1071,
      "step": 5516
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9103198051452637,
      "learning_rate": 0.0005991515702231213,
      "loss": 3.3456,
      "step": 5517
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.2941763401031494,
      "learning_rate": 0.000599151262771117,
      "loss": 3.4151,
      "step": 5518
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1324617862701416,
      "learning_rate": 0.0005991509552634947,
      "loss": 3.2809,
      "step": 5519
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4897572994232178,
      "learning_rate": 0.0005991506477002545,
      "loss": 3.3155,
      "step": 5520
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.563925862312317,
      "learning_rate": 0.0005991503400813966,
      "loss": 3.3232,
      "step": 5521
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8606399297714233,
      "learning_rate": 0.0005991500324069211,
      "loss": 3.5656,
      "step": 5522
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1981277465820312,
      "learning_rate": 0.000599149724676828,
      "loss": 3.3093,
      "step": 5523
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.10835337638855,
      "learning_rate": 0.0005991494168911172,
      "loss": 3.2075,
      "step": 5524
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9795787334442139,
      "learning_rate": 0.0005991491090497889,
      "loss": 3.3347,
      "step": 5525
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6098442077636719,
      "learning_rate": 0.000599148801152843,
      "loss": 3.3221,
      "step": 5526
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.4236888885498047,
      "learning_rate": 0.00059914849320028,
      "loss": 3.3759,
      "step": 5527
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7833322286605835,
      "learning_rate": 0.0005991481851920994,
      "loss": 3.3251,
      "step": 5528
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7459418773651123,
      "learning_rate": 0.0005991478771283016,
      "loss": 3.2709,
      "step": 5529
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5339465141296387,
      "learning_rate": 0.0005991475690088865,
      "loss": 2.9659,
      "step": 5530
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.609522819519043,
      "learning_rate": 0.0005991472608338544,
      "loss": 3.2784,
      "step": 5531
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3440831899642944,
      "learning_rate": 0.0005991469526032049,
      "loss": 3.2464,
      "step": 5532
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4099682569503784,
      "learning_rate": 0.0005991466443169385,
      "loss": 3.261,
      "step": 5533
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5726557970046997,
      "learning_rate": 0.000599146335975055,
      "loss": 3.2964,
      "step": 5534
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5947614908218384,
      "learning_rate": 0.0005991460275775548,
      "loss": 3.1508,
      "step": 5535
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4251056909561157,
      "learning_rate": 0.0005991457191244374,
      "loss": 3.2644,
      "step": 5536
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5267322063446045,
      "learning_rate": 0.0005991454106157033,
      "loss": 3.3242,
      "step": 5537
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.802345633506775,
      "learning_rate": 0.0005991451020513523,
      "loss": 3.2268,
      "step": 5538
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8294187784194946,
      "learning_rate": 0.0005991447934313847,
      "loss": 3.257,
      "step": 5539
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6266709566116333,
      "learning_rate": 0.0005991444847558005,
      "loss": 3.3145,
      "step": 5540
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.646876335144043,
      "learning_rate": 0.0005991441760245996,
      "loss": 3.7166,
      "step": 5541
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5721113681793213,
      "learning_rate": 0.0005991438672377821,
      "loss": 3.3124,
      "step": 5542
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8774158954620361,
      "learning_rate": 0.0005991435583953483,
      "loss": 3.557,
      "step": 5543
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3568894863128662,
      "learning_rate": 0.0005991432494972979,
      "loss": 3.5416,
      "step": 5544
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7438551187515259,
      "learning_rate": 0.0005991429405436311,
      "loss": 3.3699,
      "step": 5545
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6214863061904907,
      "learning_rate": 0.0005991426315343481,
      "loss": 3.3794,
      "step": 5546
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.588123083114624,
      "learning_rate": 0.0005991423224694488,
      "loss": 3.3392,
      "step": 5547
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5383734703063965,
      "learning_rate": 0.0005991420133489334,
      "loss": 3.4267,
      "step": 5548
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.073392152786255,
      "learning_rate": 0.0005991417041728016,
      "loss": 3.3901,
      "step": 5549
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8598802089691162,
      "learning_rate": 0.0005991413949410539,
      "loss": 3.4365,
      "step": 5550
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9843554496765137,
      "learning_rate": 0.0005991410856536901,
      "loss": 3.479,
      "step": 5551
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.9995439052581787,
      "learning_rate": 0.0005991407763107103,
      "loss": 3.1903,
      "step": 5552
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6713755130767822,
      "learning_rate": 0.0005991404669121146,
      "loss": 3.3736,
      "step": 5553
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4552854299545288,
      "learning_rate": 0.0005991401574579031,
      "loss": 3.2744,
      "step": 5554
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.4976933002471924,
      "learning_rate": 0.0005991398479480757,
      "loss": 3.5608,
      "step": 5555
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9296914339065552,
      "learning_rate": 0.0005991395383826327,
      "loss": 3.1422,
      "step": 5556
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5630580186843872,
      "learning_rate": 0.000599139228761574,
      "loss": 3.0068,
      "step": 5557
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6730165481567383,
      "learning_rate": 0.0005991389190848996,
      "loss": 3.5208,
      "step": 5558
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6873351335525513,
      "learning_rate": 0.0005991386093526095,
      "loss": 3.4115,
      "step": 5559
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.6307389736175537,
      "learning_rate": 0.000599138299564704,
      "loss": 3.3107,
      "step": 5560
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5937678813934326,
      "learning_rate": 0.0005991379897211831,
      "loss": 3.0994,
      "step": 5561
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.738219141960144,
      "learning_rate": 0.0005991376798220468,
      "loss": 3.2909,
      "step": 5562
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3626753091812134,
      "learning_rate": 0.0005991373698672951,
      "loss": 3.3961,
      "step": 5563
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9495776891708374,
      "learning_rate": 0.0005991370598569281,
      "loss": 3.3023,
      "step": 5564
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5393147468566895,
      "learning_rate": 0.0005991367497909459,
      "loss": 3.2511,
      "step": 5565
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.009932041168213,
      "learning_rate": 0.0005991364396693486,
      "loss": 3.1311,
      "step": 5566
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.620356321334839,
      "learning_rate": 0.0005991361294921361,
      "loss": 3.3157,
      "step": 5567
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5922839641571045,
      "learning_rate": 0.0005991358192593085,
      "loss": 3.4084,
      "step": 5568
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.728905439376831,
      "learning_rate": 0.000599135508970866,
      "loss": 3.2948,
      "step": 5569
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.5553057193756104,
      "learning_rate": 0.0005991351986268086,
      "loss": 3.1581,
      "step": 5570
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6880494356155396,
      "learning_rate": 0.0005991348882271362,
      "loss": 3.329,
      "step": 5571
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.832371711730957,
      "learning_rate": 0.000599134577771849,
      "loss": 3.4068,
      "step": 5572
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.4482650756835938,
      "learning_rate": 0.0005991342672609471,
      "loss": 3.1286,
      "step": 5573
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5038779973983765,
      "learning_rate": 0.0005991339566944304,
      "loss": 3.7591,
      "step": 5574
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7076181173324585,
      "learning_rate": 0.0005991336460722992,
      "loss": 3.4528,
      "step": 5575
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.937245488166809,
      "learning_rate": 0.0005991333353945533,
      "loss": 3.2418,
      "step": 5576
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6507443189620972,
      "learning_rate": 0.000599133024661193,
      "loss": 3.5141,
      "step": 5577
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7607216835021973,
      "learning_rate": 0.000599132713872218,
      "loss": 3.098,
      "step": 5578
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6544532775878906,
      "learning_rate": 0.0005991324030276287,
      "loss": 3.3111,
      "step": 5579
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.695899486541748,
      "learning_rate": 0.0005991320921274251,
      "loss": 3.6094,
      "step": 5580
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.323582410812378,
      "learning_rate": 0.0005991317811716071,
      "loss": 3.2682,
      "step": 5581
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7190443277359009,
      "learning_rate": 0.0005991314701601749,
      "loss": 3.385,
      "step": 5582
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1397969722747803,
      "learning_rate": 0.0005991311590931286,
      "loss": 3.268,
      "step": 5583
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6159731149673462,
      "learning_rate": 0.000599130847970468,
      "loss": 3.3311,
      "step": 5584
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5212173461914062,
      "learning_rate": 0.0005991305367921933,
      "loss": 3.371,
      "step": 5585
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5882807970046997,
      "learning_rate": 0.0005991302255583048,
      "loss": 3.5648,
      "step": 5586
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9663496017456055,
      "learning_rate": 0.0005991299142688023,
      "loss": 3.4235,
      "step": 5587
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6701222658157349,
      "learning_rate": 0.0005991296029236857,
      "loss": 3.4379,
      "step": 5588
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4250528812408447,
      "learning_rate": 0.0005991292915229555,
      "loss": 3.681,
      "step": 5589
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.941402554512024,
      "learning_rate": 0.0005991289800666114,
      "loss": 3.3181,
      "step": 5590
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8958812952041626,
      "learning_rate": 0.0005991286685546536,
      "loss": 3.1501,
      "step": 5591
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5180336236953735,
      "learning_rate": 0.000599128356987082,
      "loss": 3.3143,
      "step": 5592
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.726063847541809,
      "learning_rate": 0.000599128045363897,
      "loss": 3.1992,
      "step": 5593
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.730278491973877,
      "learning_rate": 0.0005991277336850983,
      "loss": 3.3901,
      "step": 5594
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3992871046066284,
      "learning_rate": 0.0005991274219506862,
      "loss": 3.5685,
      "step": 5595
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.600952386856079,
      "learning_rate": 0.0005991271101606607,
      "loss": 3.3933,
      "step": 5596
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.2452352046966553,
      "learning_rate": 0.0005991267983150218,
      "loss": 3.1779,
      "step": 5597
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4443047046661377,
      "learning_rate": 0.0005991264864137695,
      "loss": 3.6,
      "step": 5598
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.820818305015564,
      "learning_rate": 0.000599126174456904,
      "loss": 3.3023,
      "step": 5599
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7248915433883667,
      "learning_rate": 0.0005991258624444253,
      "loss": 3.2942,
      "step": 5600
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.559512734413147,
      "learning_rate": 0.0005991255503763334,
      "loss": 3.5674,
      "step": 5601
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8064290285110474,
      "learning_rate": 0.0005991252382526284,
      "loss": 3.4441,
      "step": 5602
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.2591474056243896,
      "learning_rate": 0.0005991249260733105,
      "loss": 3.3192,
      "step": 5603
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7476376295089722,
      "learning_rate": 0.0005991246138383796,
      "loss": 3.3712,
      "step": 5604
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.232973098754883,
      "learning_rate": 0.0005991243015478357,
      "loss": 3.0898,
      "step": 5605
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7346149682998657,
      "learning_rate": 0.0005991239892016789,
      "loss": 3.5703,
      "step": 5606
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.068387746810913,
      "learning_rate": 0.0005991236767999094,
      "loss": 3.0343,
      "step": 5607
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0136005878448486,
      "learning_rate": 0.0005991233643425272,
      "loss": 3.3261,
      "step": 5608
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.767951250076294,
      "learning_rate": 0.0005991230518295322,
      "loss": 3.3402,
      "step": 5609
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.320777416229248,
      "learning_rate": 0.0005991227392609247,
      "loss": 3.2909,
      "step": 5610
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.8415136337280273,
      "learning_rate": 0.0005991224266367046,
      "loss": 3.0286,
      "step": 5611
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8943172693252563,
      "learning_rate": 0.000599122113956872,
      "loss": 3.3773,
      "step": 5612
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6345654726028442,
      "learning_rate": 0.000599121801221427,
      "loss": 3.4282,
      "step": 5613
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9667115211486816,
      "learning_rate": 0.0005991214884303695,
      "loss": 3.1821,
      "step": 5614
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3040300607681274,
      "learning_rate": 0.0005991211755836997,
      "loss": 3.4798,
      "step": 5615
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9834874868392944,
      "learning_rate": 0.0005991208626814177,
      "loss": 3.2034,
      "step": 5616
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3703421354293823,
      "learning_rate": 0.0005991205497235234,
      "loss": 3.4751,
      "step": 5617
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3972327709197998,
      "learning_rate": 0.000599120236710017,
      "loss": 3.3181,
      "step": 5618
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.6339266300201416,
      "learning_rate": 0.0005991199236408984,
      "loss": 3.2371,
      "step": 5619
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6491950750350952,
      "learning_rate": 0.0005991196105161678,
      "loss": 3.1045,
      "step": 5620
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.4122653007507324,
      "learning_rate": 0.0005991192973358253,
      "loss": 3.2333,
      "step": 5621
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5800342559814453,
      "learning_rate": 0.0005991189840998709,
      "loss": 3.3428,
      "step": 5622
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6132423877716064,
      "learning_rate": 0.0005991186708083046,
      "loss": 3.4585,
      "step": 5623
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7032238245010376,
      "learning_rate": 0.0005991183574611263,
      "loss": 3.4168,
      "step": 5624
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7922457456588745,
      "learning_rate": 0.0005991180440583365,
      "loss": 3.2886,
      "step": 5625
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.879305362701416,
      "learning_rate": 0.0005991177305999349,
      "loss": 2.7818,
      "step": 5626
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7123396396636963,
      "learning_rate": 0.0005991174170859217,
      "loss": 3.5494,
      "step": 5627
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.648462176322937,
      "learning_rate": 0.0005991171035162969,
      "loss": 3.1314,
      "step": 5628
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8191132545471191,
      "learning_rate": 0.0005991167898910606,
      "loss": 3.2762,
      "step": 5629
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5846238136291504,
      "learning_rate": 0.0005991164762102128,
      "loss": 3.2547,
      "step": 5630
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8354052305221558,
      "learning_rate": 0.0005991161624737536,
      "loss": 3.1515,
      "step": 5631
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4157047271728516,
      "learning_rate": 0.0005991158486816831,
      "loss": 3.1901,
      "step": 5632
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.909264087677002,
      "learning_rate": 0.0005991155348340014,
      "loss": 3.4186,
      "step": 5633
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.318255662918091,
      "learning_rate": 0.0005991152209307083,
      "loss": 3.1246,
      "step": 5634
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1158459186553955,
      "learning_rate": 0.0005991149069718042,
      "loss": 3.308,
      "step": 5635
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6068618297576904,
      "learning_rate": 0.0005991145929572888,
      "loss": 3.2336,
      "step": 5636
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8042265176773071,
      "learning_rate": 0.0005991142788871626,
      "loss": 3.1723,
      "step": 5637
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8463181257247925,
      "learning_rate": 0.0005991139647614252,
      "loss": 3.3066,
      "step": 5638
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6732113361358643,
      "learning_rate": 0.000599113650580077,
      "loss": 3.0269,
      "step": 5639
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8843995332717896,
      "learning_rate": 0.0005991133363431178,
      "loss": 3.4734,
      "step": 5640
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5071581602096558,
      "learning_rate": 0.0005991130220505479,
      "loss": 3.0715,
      "step": 5641
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.732177495956421,
      "learning_rate": 0.0005991127077023673,
      "loss": 3.3473,
      "step": 5642
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9054595232009888,
      "learning_rate": 0.0005991123932985758,
      "loss": 3.4048,
      "step": 5643
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6398341655731201,
      "learning_rate": 0.0005991120788391737,
      "loss": 3.2866,
      "step": 5644
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5607625246047974,
      "learning_rate": 0.0005991117643241612,
      "loss": 3.202,
      "step": 5645
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.361471176147461,
      "learning_rate": 0.000599111449753538,
      "loss": 3.2698,
      "step": 5646
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.2410422563552856,
      "learning_rate": 0.0005991111351273044,
      "loss": 3.3093,
      "step": 5647
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6459399461746216,
      "learning_rate": 0.0005991108204454603,
      "loss": 3.0777,
      "step": 5648
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8649126291275024,
      "learning_rate": 0.000599110505708006,
      "loss": 3.2651,
      "step": 5649
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7117723226547241,
      "learning_rate": 0.0005991101909149413,
      "loss": 3.6337,
      "step": 5650
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7326276302337646,
      "learning_rate": 0.0005991098760662664,
      "loss": 3.1366,
      "step": 5651
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3014225959777832,
      "learning_rate": 0.0005991095611619814,
      "loss": 3.2038,
      "step": 5652
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7072404623031616,
      "learning_rate": 0.0005991092462020862,
      "loss": 3.2702,
      "step": 5653
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7521404027938843,
      "learning_rate": 0.000599108931186581,
      "loss": 3.3513,
      "step": 5654
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.4137465953826904,
      "learning_rate": 0.0005991086161154658,
      "loss": 3.1718,
      "step": 5655
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7187405824661255,
      "learning_rate": 0.0005991083009887407,
      "loss": 3.4177,
      "step": 5656
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7280583381652832,
      "learning_rate": 0.0005991079858064056,
      "loss": 3.0267,
      "step": 5657
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8131011724472046,
      "learning_rate": 0.0005991076705684608,
      "loss": 2.9888,
      "step": 5658
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5276072025299072,
      "learning_rate": 0.0005991073552749062,
      "loss": 3.3279,
      "step": 5659
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6879256963729858,
      "learning_rate": 0.0005991070399257417,
      "loss": 3.3669,
      "step": 5660
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6172477006912231,
      "learning_rate": 0.0005991067245209678,
      "loss": 2.8308,
      "step": 5661
    },
    {
      "epoch": 0.07,
      "grad_norm": 4.900592803955078,
      "learning_rate": 0.0005991064090605843,
      "loss": 3.278,
      "step": 5662
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9973853826522827,
      "learning_rate": 0.0005991060935445912,
      "loss": 3.2455,
      "step": 5663
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.037081003189087,
      "learning_rate": 0.0005991057779729886,
      "loss": 3.5379,
      "step": 5664
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.258450984954834,
      "learning_rate": 0.0005991054623457766,
      "loss": 2.9869,
      "step": 5665
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9161454439163208,
      "learning_rate": 0.0005991051466629553,
      "loss": 2.9278,
      "step": 5666
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.530349612236023,
      "learning_rate": 0.0005991048309245248,
      "loss": 3.439,
      "step": 5667
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.4711244106292725,
      "learning_rate": 0.0005991045151304849,
      "loss": 3.4477,
      "step": 5668
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.2897822856903076,
      "learning_rate": 0.0005991041992808359,
      "loss": 3.7588,
      "step": 5669
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.965659499168396,
      "learning_rate": 0.0005991038833755778,
      "loss": 3.1374,
      "step": 5670
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.975399136543274,
      "learning_rate": 0.0005991035674147106,
      "loss": 3.28,
      "step": 5671
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5692598819732666,
      "learning_rate": 0.0005991032513982344,
      "loss": 3.2977,
      "step": 5672
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8073956966400146,
      "learning_rate": 0.0005991029353261492,
      "loss": 3.1449,
      "step": 5673
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.375978946685791,
      "learning_rate": 0.0005991026191984552,
      "loss": 3.1281,
      "step": 5674
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.8728504180908203,
      "learning_rate": 0.0005991023030151524,
      "loss": 2.9355,
      "step": 5675
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.413007974624634,
      "learning_rate": 0.0005991019867762408,
      "loss": 3.272,
      "step": 5676
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.5872535705566406,
      "learning_rate": 0.0005991016704817205,
      "loss": 3.3853,
      "step": 5677
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.9438657760620117,
      "learning_rate": 0.0005991013541315916,
      "loss": 3.4547,
      "step": 5678
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7454288005828857,
      "learning_rate": 0.000599101037725854,
      "loss": 3.2464,
      "step": 5679
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.282219648361206,
      "learning_rate": 0.000599100721264508,
      "loss": 3.4097,
      "step": 5680
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.779552936553955,
      "learning_rate": 0.0005991004047475534,
      "loss": 3.308,
      "step": 5681
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.705509066581726,
      "learning_rate": 0.0005991000881749904,
      "loss": 3.0655,
      "step": 5682
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.63696026802063,
      "learning_rate": 0.0005990997715468191,
      "loss": 3.3242,
      "step": 5683
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8314553499221802,
      "learning_rate": 0.0005990994548630396,
      "loss": 3.2631,
      "step": 5684
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5097169876098633,
      "learning_rate": 0.0005990991381236518,
      "loss": 3.3342,
      "step": 5685
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.679392099380493,
      "learning_rate": 0.0005990988213286558,
      "loss": 3.423,
      "step": 5686
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.698312520980835,
      "learning_rate": 0.0005990985044780516,
      "loss": 3.2916,
      "step": 5687
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.2772068977355957,
      "learning_rate": 0.0005990981875718394,
      "loss": 3.3717,
      "step": 5688
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.1780455112457275,
      "learning_rate": 0.0005990978706100193,
      "loss": 3.1699,
      "step": 5689
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6797031164169312,
      "learning_rate": 0.0005990975535925911,
      "loss": 3.2249,
      "step": 5690
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.22463059425354,
      "learning_rate": 0.0005990972365195552,
      "loss": 3.2087,
      "step": 5691
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.282470703125,
      "learning_rate": 0.0005990969193909113,
      "loss": 3.2336,
      "step": 5692
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3372650146484375,
      "learning_rate": 0.0005990966022066598,
      "loss": 3.2764,
      "step": 5693
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3850702047348022,
      "learning_rate": 0.0005990962849668004,
      "loss": 3.1136,
      "step": 5694
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8572824001312256,
      "learning_rate": 0.0005990959676713335,
      "loss": 3.4848,
      "step": 5695
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.542992353439331,
      "learning_rate": 0.000599095650320259,
      "loss": 3.0018,
      "step": 5696
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5343705415725708,
      "learning_rate": 0.000599095332913577,
      "loss": 3.026,
      "step": 5697
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3598228693008423,
      "learning_rate": 0.0005990950154512875,
      "loss": 2.9765,
      "step": 5698
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.6531643867492676,
      "learning_rate": 0.0005990946979333904,
      "loss": 3.3232,
      "step": 5699
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.525691270828247,
      "learning_rate": 0.0005990943803598863,
      "loss": 3.1561,
      "step": 5700
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7164291143417358,
      "learning_rate": 0.0005990940627307747,
      "loss": 3.329,
      "step": 5701
    },
    {
      "epoch": 0.07,
      "grad_norm": 4.154673099517822,
      "learning_rate": 0.0005990937450460559,
      "loss": 2.8472,
      "step": 5702
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8539446592330933,
      "learning_rate": 0.0005990934273057298,
      "loss": 3.0686,
      "step": 5703
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.046081781387329,
      "learning_rate": 0.0005990931095097968,
      "loss": 3.6225,
      "step": 5704
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.138538122177124,
      "learning_rate": 0.0005990927916582567,
      "loss": 3.1725,
      "step": 5705
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8110506534576416,
      "learning_rate": 0.0005990924737511095,
      "loss": 3.474,
      "step": 5706
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7437419891357422,
      "learning_rate": 0.0005990921557883554,
      "loss": 3.3952,
      "step": 5707
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.501441240310669,
      "learning_rate": 0.0005990918377699945,
      "loss": 3.1419,
      "step": 5708
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.1775670051574707,
      "learning_rate": 0.0005990915196960267,
      "loss": 3.2465,
      "step": 5709
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.7146472930908203,
      "learning_rate": 0.0005990912015664522,
      "loss": 3.3356,
      "step": 5710
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.355398416519165,
      "learning_rate": 0.000599090883381271,
      "loss": 3.2695,
      "step": 5711
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6342412233352661,
      "learning_rate": 0.0005990905651404832,
      "loss": 3.235,
      "step": 5712
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.013469934463501,
      "learning_rate": 0.0005990902468440886,
      "loss": 3.1221,
      "step": 5713
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.206434488296509,
      "learning_rate": 0.0005990899284920877,
      "loss": 3.2782,
      "step": 5714
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.225044012069702,
      "learning_rate": 0.0005990896100844802,
      "loss": 3.4557,
      "step": 5715
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4331659078598022,
      "learning_rate": 0.0005990892916212664,
      "loss": 3.1825,
      "step": 5716
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5759787559509277,
      "learning_rate": 0.0005990889731024462,
      "loss": 3.3373,
      "step": 5717
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0193543434143066,
      "learning_rate": 0.0005990886545280198,
      "loss": 3.1177,
      "step": 5718
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.2789218425750732,
      "learning_rate": 0.0005990883358979871,
      "loss": 3.2453,
      "step": 5719
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7555359601974487,
      "learning_rate": 0.0005990880172123482,
      "loss": 3.1266,
      "step": 5720
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6251314878463745,
      "learning_rate": 0.0005990876984711032,
      "loss": 3.1955,
      "step": 5721
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3362228870391846,
      "learning_rate": 0.0005990873796742522,
      "loss": 3.6523,
      "step": 5722
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3872854709625244,
      "learning_rate": 0.0005990870608217952,
      "loss": 3.2743,
      "step": 5723
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.511719822883606,
      "learning_rate": 0.0005990867419137322,
      "loss": 3.0746,
      "step": 5724
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.747497320175171,
      "learning_rate": 0.0005990864229500635,
      "loss": 3.1651,
      "step": 5725
    },
    {
      "epoch": 0.07,
      "grad_norm": 4.035699844360352,
      "learning_rate": 0.0005990861039307888,
      "loss": 3.1107,
      "step": 5726
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6237404346466064,
      "learning_rate": 0.0005990857848559084,
      "loss": 3.0684,
      "step": 5727
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5011365413665771,
      "learning_rate": 0.0005990854657254223,
      "loss": 3.5091,
      "step": 5728
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0498046875,
      "learning_rate": 0.0005990851465393307,
      "loss": 3.113,
      "step": 5729
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.71028995513916,
      "learning_rate": 0.0005990848272976333,
      "loss": 3.1185,
      "step": 5730
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5634111166000366,
      "learning_rate": 0.0005990845080003305,
      "loss": 3.1674,
      "step": 5731
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.970594644546509,
      "learning_rate": 0.0005990841886474223,
      "loss": 3.1244,
      "step": 5732
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0761780738830566,
      "learning_rate": 0.0005990838692389086,
      "loss": 3.2795,
      "step": 5733
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.3270771503448486,
      "learning_rate": 0.0005990835497747896,
      "loss": 3.0849,
      "step": 5734
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3534529209136963,
      "learning_rate": 0.0005990832302550654,
      "loss": 3.2548,
      "step": 5735
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.5737051963806152,
      "learning_rate": 0.000599082910679736,
      "loss": 3.0364,
      "step": 5736
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.4403516054153442,
      "learning_rate": 0.0005990825910488013,
      "loss": 3.3605,
      "step": 5737
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.5506224632263184,
      "learning_rate": 0.0005990822713622616,
      "loss": 3.0979,
      "step": 5738
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.2303757667541504,
      "learning_rate": 0.0005990819516201168,
      "loss": 3.1218,
      "step": 5739
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.7448391914367676,
      "learning_rate": 0.000599081631822367,
      "loss": 3.3545,
      "step": 5740
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.8498330116271973,
      "learning_rate": 0.0005990813119690124,
      "loss": 3.1428,
      "step": 5741
    },
    {
      "epoch": 0.07,
      "grad_norm": 3.0954954624176025,
      "learning_rate": 0.0005990809920600528,
      "loss": 3.2336,
      "step": 5742
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6389544010162354,
      "learning_rate": 0.0005990806720954885,
      "loss": 3.2882,
      "step": 5743
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.241114377975464,
      "learning_rate": 0.0005990803520753193,
      "loss": 3.2886,
      "step": 5744
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.223248243331909,
      "learning_rate": 0.0005990800319995456,
      "loss": 3.1974,
      "step": 5745
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6012448072433472,
      "learning_rate": 0.0005990797118681673,
      "loss": 3.311,
      "step": 5746
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.8414558172225952,
      "learning_rate": 0.0005990793916811843,
      "loss": 3.1415,
      "step": 5747
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6670939922332764,
      "learning_rate": 0.0005990790714385968,
      "loss": 3.1958,
      "step": 5748
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0807955265045166,
      "learning_rate": 0.0005990787511404049,
      "loss": 3.409,
      "step": 5749
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5444124937057495,
      "learning_rate": 0.0005990784307866086,
      "loss": 3.3776,
      "step": 5750
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6604700088500977,
      "learning_rate": 0.000599078110377208,
      "loss": 3.3473,
      "step": 5751
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.666534662246704,
      "learning_rate": 0.0005990777899122032,
      "loss": 3.544,
      "step": 5752
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.692327618598938,
      "learning_rate": 0.000599077469391594,
      "loss": 3.0486,
      "step": 5753
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.6535238027572632,
      "learning_rate": 0.0005990771488153808,
      "loss": 3.3544,
      "step": 5754
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.3304173946380615,
      "learning_rate": 0.0005990768281835634,
      "loss": 3.3981,
      "step": 5755
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.452748417854309,
      "learning_rate": 0.0005990765074961422,
      "loss": 3.0033,
      "step": 5756
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.2489173412323,
      "learning_rate": 0.0005990761867531168,
      "loss": 3.3718,
      "step": 5757
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.0788846015930176,
      "learning_rate": 0.0005990758659544876,
      "loss": 3.3056,
      "step": 5758
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.5998435020446777,
      "learning_rate": 0.0005990755451002545,
      "loss": 3.4047,
      "step": 5759
    },
    {
      "epoch": 0.07,
      "grad_norm": 2.441999912261963,
      "learning_rate": 0.0005990752241904177,
      "loss": 3.167,
      "step": 5760
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.547119617462158,
      "learning_rate": 0.000599074903224977,
      "loss": 3.4866,
      "step": 5761
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4023451805114746,
      "learning_rate": 0.0005990745822039329,
      "loss": 3.5693,
      "step": 5762
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9176509380340576,
      "learning_rate": 0.0005990742611272848,
      "loss": 3.1383,
      "step": 5763
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7196667194366455,
      "learning_rate": 0.0005990739399950335,
      "loss": 3.3807,
      "step": 5764
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5863099098205566,
      "learning_rate": 0.0005990736188071786,
      "loss": 3.0616,
      "step": 5765
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8242970705032349,
      "learning_rate": 0.0005990732975637202,
      "loss": 3.403,
      "step": 5766
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.3810782432556152,
      "learning_rate": 0.0005990729762646585,
      "loss": 3.2409,
      "step": 5767
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.697774648666382,
      "learning_rate": 0.0005990726549099934,
      "loss": 3.4182,
      "step": 5768
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.724381446838379,
      "learning_rate": 0.0005990723334997252,
      "loss": 3.3181,
      "step": 5769
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9544240236282349,
      "learning_rate": 0.0005990720120338536,
      "loss": 3.4914,
      "step": 5770
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.024571180343628,
      "learning_rate": 0.0005990716905123791,
      "loss": 3.2551,
      "step": 5771
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4099175930023193,
      "learning_rate": 0.0005990713689353013,
      "loss": 3.4178,
      "step": 5772
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.545215129852295,
      "learning_rate": 0.0005990710473026208,
      "loss": 3.0904,
      "step": 5773
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.039855480194092,
      "learning_rate": 0.0005990707256143371,
      "loss": 3.3954,
      "step": 5774
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.147923469543457,
      "learning_rate": 0.0005990704038704505,
      "loss": 3.1191,
      "step": 5775
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.670649528503418,
      "learning_rate": 0.0005990700820709612,
      "loss": 3.1354,
      "step": 5776
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9338021278381348,
      "learning_rate": 0.000599069760215869,
      "loss": 3.2218,
      "step": 5777
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.436995029449463,
      "learning_rate": 0.0005990694383051741,
      "loss": 3.3188,
      "step": 5778
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.425959348678589,
      "learning_rate": 0.0005990691163388766,
      "loss": 3.4425,
      "step": 5779
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.6339199542999268,
      "learning_rate": 0.0005990687943169765,
      "loss": 3.2716,
      "step": 5780
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.275001049041748,
      "learning_rate": 0.0005990684722394739,
      "loss": 3.4126,
      "step": 5781
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.61147940158844,
      "learning_rate": 0.0005990681501063688,
      "loss": 3.5039,
      "step": 5782
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6454373598098755,
      "learning_rate": 0.0005990678279176612,
      "loss": 3.2542,
      "step": 5783
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.530503749847412,
      "learning_rate": 0.0005990675056733513,
      "loss": 3.3198,
      "step": 5784
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.720836877822876,
      "learning_rate": 0.0005990671833734392,
      "loss": 3.1164,
      "step": 5785
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7125837802886963,
      "learning_rate": 0.0005990668610179247,
      "loss": 3.3107,
      "step": 5786
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6603628396987915,
      "learning_rate": 0.0005990665386068082,
      "loss": 3.2406,
      "step": 5787
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7482784986495972,
      "learning_rate": 0.0005990662161400894,
      "loss": 3.506,
      "step": 5788
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8150842189788818,
      "learning_rate": 0.0005990658936177686,
      "loss": 3.3014,
      "step": 5789
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1122143268585205,
      "learning_rate": 0.0005990655710398459,
      "loss": 3.1586,
      "step": 5790
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.0473484992980957,
      "learning_rate": 0.0005990652484063212,
      "loss": 3.3175,
      "step": 5791
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.4517953395843506,
      "learning_rate": 0.0005990649257171947,
      "loss": 3.6938,
      "step": 5792
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4840129613876343,
      "learning_rate": 0.0005990646029724663,
      "loss": 3.2604,
      "step": 5793
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5980165004730225,
      "learning_rate": 0.0005990642801721361,
      "loss": 3.0715,
      "step": 5794
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.704164981842041,
      "learning_rate": 0.0005990639573162043,
      "loss": 3.2325,
      "step": 5795
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6026344299316406,
      "learning_rate": 0.0005990636344046709,
      "loss": 3.3674,
      "step": 5796
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.797590732574463,
      "learning_rate": 0.0005990633114375359,
      "loss": 3.1794,
      "step": 5797
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.967857837677002,
      "learning_rate": 0.0005990629884147993,
      "loss": 3.2535,
      "step": 5798
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6210914850234985,
      "learning_rate": 0.0005990626653364612,
      "loss": 3.1955,
      "step": 5799
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8795254230499268,
      "learning_rate": 0.0005990623422025219,
      "loss": 3.3106,
      "step": 5800
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6773877143859863,
      "learning_rate": 0.0005990620190129811,
      "loss": 3.2598,
      "step": 5801
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7369253635406494,
      "learning_rate": 0.0005990616957678391,
      "loss": 3.4149,
      "step": 5802
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7503018379211426,
      "learning_rate": 0.0005990613724670959,
      "loss": 3.1844,
      "step": 5803
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.424863338470459,
      "learning_rate": 0.0005990610491107515,
      "loss": 3.3422,
      "step": 5804
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.205976724624634,
      "learning_rate": 0.0005990607256988061,
      "loss": 3.284,
      "step": 5805
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.063018560409546,
      "learning_rate": 0.0005990604022312595,
      "loss": 3.2473,
      "step": 5806
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6988946199417114,
      "learning_rate": 0.000599060078708112,
      "loss": 2.9502,
      "step": 5807
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8896939754486084,
      "learning_rate": 0.0005990597551293636,
      "loss": 3.3396,
      "step": 5808
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1894280910491943,
      "learning_rate": 0.0005990594314950143,
      "loss": 3.3619,
      "step": 5809
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5797425508499146,
      "learning_rate": 0.0005990591078050642,
      "loss": 3.2005,
      "step": 5810
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.851118803024292,
      "learning_rate": 0.0005990587840595135,
      "loss": 3.5773,
      "step": 5811
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1859943866729736,
      "learning_rate": 0.0005990584602583621,
      "loss": 3.4358,
      "step": 5812
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4494630098342896,
      "learning_rate": 0.00059905813640161,
      "loss": 3.345,
      "step": 5813
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.604613184928894,
      "learning_rate": 0.0005990578124892574,
      "loss": 3.4108,
      "step": 5814
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8776440620422363,
      "learning_rate": 0.0005990574885213042,
      "loss": 3.5399,
      "step": 5815
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7073564529418945,
      "learning_rate": 0.0005990571644977508,
      "loss": 3.2133,
      "step": 5816
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8346514701843262,
      "learning_rate": 0.0005990568404185968,
      "loss": 3.1861,
      "step": 5817
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9682738780975342,
      "learning_rate": 0.0005990565162838425,
      "loss": 3.257,
      "step": 5818
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3792157173156738,
      "learning_rate": 0.0005990561920934881,
      "loss": 3.3526,
      "step": 5819
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8001863956451416,
      "learning_rate": 0.0005990558678475335,
      "loss": 3.0816,
      "step": 5820
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.637682557106018,
      "learning_rate": 0.0005990555435459787,
      "loss": 3.4416,
      "step": 5821
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7481613159179688,
      "learning_rate": 0.0005990552191888238,
      "loss": 3.2572,
      "step": 5822
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7312099933624268,
      "learning_rate": 0.0005990548947760689,
      "loss": 3.1314,
      "step": 5823
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4607963562011719,
      "learning_rate": 0.000599054570307714,
      "loss": 3.4239,
      "step": 5824
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7962347269058228,
      "learning_rate": 0.0005990542457837594,
      "loss": 3.3632,
      "step": 5825
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6403642892837524,
      "learning_rate": 0.0005990539212042048,
      "loss": 3.3917,
      "step": 5826
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.336385726928711,
      "learning_rate": 0.0005990535965690505,
      "loss": 3.343,
      "step": 5827
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.7020323276519775,
      "learning_rate": 0.0005990532718782965,
      "loss": 2.8741,
      "step": 5828
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9773833751678467,
      "learning_rate": 0.0005990529471319427,
      "loss": 3.0713,
      "step": 5829
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4658616781234741,
      "learning_rate": 0.0005990526223299895,
      "loss": 3.2913,
      "step": 5830
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8472734689712524,
      "learning_rate": 0.0005990522974724366,
      "loss": 3.313,
      "step": 5831
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9140625,
      "learning_rate": 0.0005990519725592845,
      "loss": 3.3319,
      "step": 5832
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5560611486434937,
      "learning_rate": 0.0005990516475905328,
      "loss": 3.3063,
      "step": 5833
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.247126340866089,
      "learning_rate": 0.0005990513225661817,
      "loss": 3.3158,
      "step": 5834
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5125274658203125,
      "learning_rate": 0.0005990509974862315,
      "loss": 3.416,
      "step": 5835
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5512410402297974,
      "learning_rate": 0.0005990506723506819,
      "loss": 3.4925,
      "step": 5836
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3721140623092651,
      "learning_rate": 0.0005990503471595333,
      "loss": 3.4594,
      "step": 5837
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.558537483215332,
      "learning_rate": 0.0005990500219127854,
      "loss": 3.3994,
      "step": 5838
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.901210069656372,
      "learning_rate": 0.0005990496966104386,
      "loss": 3.1798,
      "step": 5839
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.0125699043273926,
      "learning_rate": 0.0005990493712524928,
      "loss": 3.3821,
      "step": 5840
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8545253276824951,
      "learning_rate": 0.0005990490458389481,
      "loss": 3.1963,
      "step": 5841
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.845333218574524,
      "learning_rate": 0.0005990487203698044,
      "loss": 3.3927,
      "step": 5842
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.2348506450653076,
      "learning_rate": 0.0005990483948450621,
      "loss": 3.253,
      "step": 5843
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8504388332366943,
      "learning_rate": 0.0005990480692647208,
      "loss": 3.1775,
      "step": 5844
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.761880874633789,
      "learning_rate": 0.0005990477436287811,
      "loss": 3.3293,
      "step": 5845
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.2723381519317627,
      "learning_rate": 0.0005990474179372426,
      "loss": 3.2532,
      "step": 5846
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.115851879119873,
      "learning_rate": 0.0005990470921901056,
      "loss": 3.3285,
      "step": 5847
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.439030408859253,
      "learning_rate": 0.00059904676638737,
      "loss": 3.3003,
      "step": 5848
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7227765321731567,
      "learning_rate": 0.0005990464405290361,
      "loss": 3.3272,
      "step": 5849
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.2107350826263428,
      "learning_rate": 0.0005990461146151038,
      "loss": 3.3081,
      "step": 5850
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7424566745758057,
      "learning_rate": 0.0005990457886455731,
      "loss": 3.2929,
      "step": 5851
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5881755352020264,
      "learning_rate": 0.0005990454626204442,
      "loss": 3.3003,
      "step": 5852
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6598358154296875,
      "learning_rate": 0.0005990451365397171,
      "loss": 3.4331,
      "step": 5853
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4766329526901245,
      "learning_rate": 0.0005990448104033919,
      "loss": 3.4209,
      "step": 5854
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6358834505081177,
      "learning_rate": 0.0005990444842114686,
      "loss": 3.3556,
      "step": 5855
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5219718217849731,
      "learning_rate": 0.0005990441579639472,
      "loss": 3.5336,
      "step": 5856
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4538031816482544,
      "learning_rate": 0.000599043831660828,
      "loss": 3.365,
      "step": 5857
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4501583576202393,
      "learning_rate": 0.0005990435053021109,
      "loss": 3.1311,
      "step": 5858
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.730178952217102,
      "learning_rate": 0.0005990431788877958,
      "loss": 3.303,
      "step": 5859
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4285211563110352,
      "learning_rate": 0.0005990428524178831,
      "loss": 3.3936,
      "step": 5860
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4239418506622314,
      "learning_rate": 0.0005990425258923726,
      "loss": 3.2991,
      "step": 5861
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8194618225097656,
      "learning_rate": 0.0005990421993112644,
      "loss": 3.3394,
      "step": 5862
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.895735740661621,
      "learning_rate": 0.0005990418726745587,
      "loss": 3.4758,
      "step": 5863
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.2777671813964844,
      "learning_rate": 0.0005990415459822554,
      "loss": 3.2018,
      "step": 5864
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4644620418548584,
      "learning_rate": 0.0005990412192343548,
      "loss": 3.39,
      "step": 5865
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.596542477607727,
      "learning_rate": 0.0005990408924308565,
      "loss": 3.3189,
      "step": 5866
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.02473783493042,
      "learning_rate": 0.0005990405655717611,
      "loss": 3.0662,
      "step": 5867
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.206873655319214,
      "learning_rate": 0.0005990402386570683,
      "loss": 2.9679,
      "step": 5868
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.4945528507232666,
      "learning_rate": 0.0005990399116867784,
      "loss": 3.416,
      "step": 5869
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8095779418945312,
      "learning_rate": 0.0005990395846608912,
      "loss": 3.249,
      "step": 5870
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8325245380401611,
      "learning_rate": 0.0005990392575794069,
      "loss": 3.4918,
      "step": 5871
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.552592158317566,
      "learning_rate": 0.0005990389304423256,
      "loss": 2.8911,
      "step": 5872
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1484901905059814,
      "learning_rate": 0.0005990386032496474,
      "loss": 3.2734,
      "step": 5873
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.083655595779419,
      "learning_rate": 0.0005990382760013721,
      "loss": 3.1679,
      "step": 5874
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7781903743743896,
      "learning_rate": 0.0005990379486975001,
      "loss": 3.2929,
      "step": 5875
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7305207252502441,
      "learning_rate": 0.0005990376213380312,
      "loss": 3.2107,
      "step": 5876
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9864062070846558,
      "learning_rate": 0.0005990372939229656,
      "loss": 3.2521,
      "step": 5877
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6139711141586304,
      "learning_rate": 0.0005990369664523033,
      "loss": 3.4262,
      "step": 5878
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.6819570064544678,
      "learning_rate": 0.0005990366389260444,
      "loss": 3.3838,
      "step": 5879
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8033177852630615,
      "learning_rate": 0.000599036311344189,
      "loss": 3.2726,
      "step": 5880
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.896328330039978,
      "learning_rate": 0.000599035983706737,
      "loss": 3.355,
      "step": 5881
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.3861474990844727,
      "learning_rate": 0.0005990356560136886,
      "loss": 3.3919,
      "step": 5882
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5564923286437988,
      "learning_rate": 0.0005990353282650439,
      "loss": 3.206,
      "step": 5883
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4870821237564087,
      "learning_rate": 0.0005990350004608028,
      "loss": 3.6296,
      "step": 5884
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.809990406036377,
      "learning_rate": 0.0005990346726009655,
      "loss": 3.2832,
      "step": 5885
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5967501401901245,
      "learning_rate": 0.000599034344685532,
      "loss": 3.2923,
      "step": 5886
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4441097974777222,
      "learning_rate": 0.0005990340167145023,
      "loss": 3.3037,
      "step": 5887
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.444704294204712,
      "learning_rate": 0.0005990336886878766,
      "loss": 3.0489,
      "step": 5888
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.347937822341919,
      "learning_rate": 0.0005990333606056549,
      "loss": 3.1403,
      "step": 5889
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7820614576339722,
      "learning_rate": 0.0005990330324678372,
      "loss": 3.2567,
      "step": 5890
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.8232624530792236,
      "learning_rate": 0.0005990327042744237,
      "loss": 3.1485,
      "step": 5891
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.5952470302581787,
      "learning_rate": 0.0005990323760254144,
      "loss": 3.1292,
      "step": 5892
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5617109537124634,
      "learning_rate": 0.0005990320477208092,
      "loss": 3.1225,
      "step": 5893
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8254722356796265,
      "learning_rate": 0.0005990317193606084,
      "loss": 3.3666,
      "step": 5894
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6892120838165283,
      "learning_rate": 0.0005990313909448119,
      "loss": 3.4042,
      "step": 5895
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9648339748382568,
      "learning_rate": 0.0005990310624734199,
      "loss": 3.1787,
      "step": 5896
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9050569534301758,
      "learning_rate": 0.0005990307339464322,
      "loss": 3.2938,
      "step": 5897
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.433864116668701,
      "learning_rate": 0.0005990304053638493,
      "loss": 3.266,
      "step": 5898
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5347392559051514,
      "learning_rate": 0.0005990300767256708,
      "loss": 3.0501,
      "step": 5899
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7718464136123657,
      "learning_rate": 0.0005990297480318971,
      "loss": 3.5162,
      "step": 5900
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7198219299316406,
      "learning_rate": 0.000599029419282528,
      "loss": 2.8758,
      "step": 5901
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6035873889923096,
      "learning_rate": 0.0005990290904775636,
      "loss": 3.1618,
      "step": 5902
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7563214302062988,
      "learning_rate": 0.0005990287616170043,
      "loss": 3.5063,
      "step": 5903
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7704060077667236,
      "learning_rate": 0.0005990284327008498,
      "loss": 3.3315,
      "step": 5904
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5080591440200806,
      "learning_rate": 0.0005990281037291003,
      "loss": 3.3326,
      "step": 5905
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.628786325454712,
      "learning_rate": 0.0005990277747017558,
      "loss": 3.1346,
      "step": 5906
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.531800627708435,
      "learning_rate": 0.0005990274456188164,
      "loss": 3.1191,
      "step": 5907
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8327511548995972,
      "learning_rate": 0.0005990271164802821,
      "loss": 3.5035,
      "step": 5908
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8169392347335815,
      "learning_rate": 0.000599026787286153,
      "loss": 3.2112,
      "step": 5909
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4661450386047363,
      "learning_rate": 0.0005990264580364293,
      "loss": 3.6659,
      "step": 5910
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7598286867141724,
      "learning_rate": 0.000599026128731111,
      "loss": 3.0918,
      "step": 5911
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9682912826538086,
      "learning_rate": 0.0005990257993701978,
      "loss": 2.8867,
      "step": 5912
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.869135856628418,
      "learning_rate": 0.0005990254699536903,
      "loss": 3.1419,
      "step": 5913
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.6307833194732666,
      "learning_rate": 0.0005990251404815883,
      "loss": 3.2011,
      "step": 5914
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.589003324508667,
      "learning_rate": 0.0005990248109538918,
      "loss": 3.3582,
      "step": 5915
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6570671796798706,
      "learning_rate": 0.0005990244813706009,
      "loss": 3.4726,
      "step": 5916
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6406201124191284,
      "learning_rate": 0.0005990241517317158,
      "loss": 3.3404,
      "step": 5917
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.2358756065368652,
      "learning_rate": 0.0005990238220372365,
      "loss": 3.1792,
      "step": 5918
    },
    {
      "epoch": 0.08,
      "grad_norm": 4.828913688659668,
      "learning_rate": 0.0005990234922871629,
      "loss": 3.1763,
      "step": 5919
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1508851051330566,
      "learning_rate": 0.0005990231624814952,
      "loss": 3.094,
      "step": 5920
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.009369134902954,
      "learning_rate": 0.0005990228326202337,
      "loss": 3.3763,
      "step": 5921
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5249395370483398,
      "learning_rate": 0.0005990225027033779,
      "loss": 3.4542,
      "step": 5922
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5507721900939941,
      "learning_rate": 0.0005990221727309282,
      "loss": 3.2154,
      "step": 5923
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9514503479003906,
      "learning_rate": 0.0005990218427028848,
      "loss": 3.6629,
      "step": 5924
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7829638719558716,
      "learning_rate": 0.0005990215126192475,
      "loss": 3.297,
      "step": 5925
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.155489444732666,
      "learning_rate": 0.0005990211824800166,
      "loss": 3.3074,
      "step": 5926
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.208510160446167,
      "learning_rate": 0.0005990208522851919,
      "loss": 3.0325,
      "step": 5927
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.139460802078247,
      "learning_rate": 0.0005990205220347735,
      "loss": 3.3227,
      "step": 5928
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.549346685409546,
      "learning_rate": 0.0005990201917287617,
      "loss": 3.3498,
      "step": 5929
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1049106121063232,
      "learning_rate": 0.0005990198613671563,
      "loss": 3.327,
      "step": 5930
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5471112728118896,
      "learning_rate": 0.0005990195309499574,
      "loss": 3.2395,
      "step": 5931
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7647055387496948,
      "learning_rate": 0.0005990192004771653,
      "loss": 3.5471,
      "step": 5932
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4320110082626343,
      "learning_rate": 0.0005990188699487797,
      "loss": 3.4543,
      "step": 5933
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.235652208328247,
      "learning_rate": 0.000599018539364801,
      "loss": 3.3148,
      "step": 5934
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4405614137649536,
      "learning_rate": 0.000599018208725229,
      "loss": 3.2652,
      "step": 5935
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.384031057357788,
      "learning_rate": 0.000599017878030064,
      "loss": 3.2447,
      "step": 5936
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8880058526992798,
      "learning_rate": 0.0005990175472793057,
      "loss": 3.5152,
      "step": 5937
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8840420246124268,
      "learning_rate": 0.0005990172164729547,
      "loss": 3.4701,
      "step": 5938
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4735585451126099,
      "learning_rate": 0.0005990168856110106,
      "loss": 3.2753,
      "step": 5939
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.060049057006836,
      "learning_rate": 0.0005990165546934736,
      "loss": 3.0909,
      "step": 5940
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.020366907119751,
      "learning_rate": 0.0005990162237203437,
      "loss": 3.2138,
      "step": 5941
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4150440692901611,
      "learning_rate": 0.0005990158926916212,
      "loss": 3.5219,
      "step": 5942
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.271233081817627,
      "learning_rate": 0.000599015561607306,
      "loss": 3.346,
      "step": 5943
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.373525857925415,
      "learning_rate": 0.000599015230467398,
      "loss": 3.0882,
      "step": 5944
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5172311067581177,
      "learning_rate": 0.0005990148992718975,
      "loss": 3.2934,
      "step": 5945
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.5519473552703857,
      "learning_rate": 0.0005990145680208046,
      "loss": 3.0581,
      "step": 5946
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.1105105876922607,
      "learning_rate": 0.0005990142367141192,
      "loss": 3.1287,
      "step": 5947
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.851321220397949,
      "learning_rate": 0.0005990139053518413,
      "loss": 3.197,
      "step": 5948
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7335740327835083,
      "learning_rate": 0.0005990135739339711,
      "loss": 3.3092,
      "step": 5949
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.723188638687134,
      "learning_rate": 0.0005990132424605087,
      "loss": 3.2258,
      "step": 5950
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.185647487640381,
      "learning_rate": 0.0005990129109314541,
      "loss": 3.2792,
      "step": 5951
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6240571737289429,
      "learning_rate": 0.0005990125793468073,
      "loss": 3.0568,
      "step": 5952
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.794965386390686,
      "learning_rate": 0.0005990122477065685,
      "loss": 3.5329,
      "step": 5953
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.3574395179748535,
      "learning_rate": 0.0005990119160107375,
      "loss": 3.3174,
      "step": 5954
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6953389644622803,
      "learning_rate": 0.0005990115842593146,
      "loss": 3.4041,
      "step": 5955
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.3675904273986816,
      "learning_rate": 0.0005990112524522999,
      "loss": 3.0474,
      "step": 5956
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.063950777053833,
      "learning_rate": 0.0005990109205896934,
      "loss": 3.401,
      "step": 5957
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.531255841255188,
      "learning_rate": 0.000599010588671495,
      "loss": 3.2957,
      "step": 5958
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4663971662521362,
      "learning_rate": 0.000599010256697705,
      "loss": 3.3994,
      "step": 5959
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5325515270233154,
      "learning_rate": 0.0005990099246683232,
      "loss": 3.393,
      "step": 5960
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4185636043548584,
      "learning_rate": 0.0005990095925833499,
      "loss": 3.2387,
      "step": 5961
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.388742208480835,
      "learning_rate": 0.000599009260442785,
      "loss": 2.9851,
      "step": 5962
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.232365846633911,
      "learning_rate": 0.0005990089282466287,
      "loss": 3.5128,
      "step": 5963
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3985929489135742,
      "learning_rate": 0.000599008595994881,
      "loss": 3.3805,
      "step": 5964
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.452008605003357,
      "learning_rate": 0.000599008263687542,
      "loss": 3.0913,
      "step": 5965
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4606292247772217,
      "learning_rate": 0.0005990079313246116,
      "loss": 3.2405,
      "step": 5966
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.0896339416503906,
      "learning_rate": 0.0005990075989060901,
      "loss": 3.2986,
      "step": 5967
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8078179359436035,
      "learning_rate": 0.0005990072664319774,
      "loss": 3.33,
      "step": 5968
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5463916063308716,
      "learning_rate": 0.0005990069339022736,
      "loss": 3.3627,
      "step": 5969
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6250544786453247,
      "learning_rate": 0.0005990066013169789,
      "loss": 3.4365,
      "step": 5970
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.2983927726745605,
      "learning_rate": 0.000599006268676093,
      "loss": 3.3848,
      "step": 5971
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.746793508529663,
      "learning_rate": 0.0005990059359796163,
      "loss": 3.1429,
      "step": 5972
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.554559350013733,
      "learning_rate": 0.0005990056032275488,
      "loss": 3.211,
      "step": 5973
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.0884652137756348,
      "learning_rate": 0.0005990052704198905,
      "loss": 3.0363,
      "step": 5974
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.0125691890716553,
      "learning_rate": 0.0005990049375566414,
      "loss": 3.005,
      "step": 5975
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.0505435466766357,
      "learning_rate": 0.0005990046046378016,
      "loss": 3.4308,
      "step": 5976
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6707154512405396,
      "learning_rate": 0.0005990042716633713,
      "loss": 3.3551,
      "step": 5977
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7209417819976807,
      "learning_rate": 0.0005990039386333504,
      "loss": 3.2194,
      "step": 5978
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.2085952758789062,
      "learning_rate": 0.0005990036055477391,
      "loss": 3.1774,
      "step": 5979
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.920884609222412,
      "learning_rate": 0.0005990032724065373,
      "loss": 3.4295,
      "step": 5980
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.0509307384490967,
      "learning_rate": 0.0005990029392097452,
      "loss": 3.2264,
      "step": 5981
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.6587257385253906,
      "learning_rate": 0.0005990026059573628,
      "loss": 3.3325,
      "step": 5982
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8728196620941162,
      "learning_rate": 0.0005990022726493901,
      "loss": 3.4312,
      "step": 5983
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8547427654266357,
      "learning_rate": 0.0005990019392858273,
      "loss": 3.4967,
      "step": 5984
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7763749361038208,
      "learning_rate": 0.0005990016058666745,
      "loss": 3.2249,
      "step": 5985
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.2100610733032227,
      "learning_rate": 0.0005990012723919314,
      "loss": 3.1236,
      "step": 5986
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.871447205543518,
      "learning_rate": 0.0005990009388615986,
      "loss": 3.3067,
      "step": 5987
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9726415872573853,
      "learning_rate": 0.0005990006052756757,
      "loss": 3.3216,
      "step": 5988
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5274144411087036,
      "learning_rate": 0.000599000271634163,
      "loss": 3.2934,
      "step": 5989
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5276427268981934,
      "learning_rate": 0.0005989999379370605,
      "loss": 3.5128,
      "step": 5990
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.538731575012207,
      "learning_rate": 0.0005989996041843684,
      "loss": 3.1331,
      "step": 5991
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7087582349777222,
      "learning_rate": 0.0005989992703760863,
      "loss": 3.2008,
      "step": 5992
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.71799635887146,
      "learning_rate": 0.0005989989365122149,
      "loss": 3.2351,
      "step": 5993
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.321755886077881,
      "learning_rate": 0.0005989986025927539,
      "loss": 3.3778,
      "step": 5994
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7257418632507324,
      "learning_rate": 0.0005989982686177033,
      "loss": 3.3482,
      "step": 5995
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.726840853691101,
      "learning_rate": 0.0005989979345870634,
      "loss": 3.1981,
      "step": 5996
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1247878074645996,
      "learning_rate": 0.000598997600500834,
      "loss": 3.2151,
      "step": 5997
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.344803810119629,
      "learning_rate": 0.0005989972663590154,
      "loss": 3.2647,
      "step": 5998
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.045518398284912,
      "learning_rate": 0.0005989969321616075,
      "loss": 3.215,
      "step": 5999
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6402194499969482,
      "learning_rate": 0.0005989965979086105,
      "loss": 3.1112,
      "step": 6000
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3286141157150269,
      "learning_rate": 0.0005989962636000244,
      "loss": 3.2499,
      "step": 6001
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.045315980911255,
      "learning_rate": 0.0005989959292358492,
      "loss": 3.2993,
      "step": 6002
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.432912826538086,
      "learning_rate": 0.0005989955948160851,
      "loss": 3.1112,
      "step": 6003
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5222293138504028,
      "learning_rate": 0.0005989952603407319,
      "loss": 3.1759,
      "step": 6004
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.3941426277160645,
      "learning_rate": 0.0005989949258097899,
      "loss": 3.1142,
      "step": 6005
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1242268085479736,
      "learning_rate": 0.0005989945912232592,
      "loss": 3.1268,
      "step": 6006
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.0612075328826904,
      "learning_rate": 0.0005989942565811397,
      "loss": 3.1747,
      "step": 6007
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1502904891967773,
      "learning_rate": 0.0005989939218834315,
      "loss": 3.3811,
      "step": 6008
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.7001872062683105,
      "learning_rate": 0.0005989935871301347,
      "loss": 3.1512,
      "step": 6009
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.3921287059783936,
      "learning_rate": 0.0005989932523212493,
      "loss": 2.9414,
      "step": 6010
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4491686820983887,
      "learning_rate": 0.0005989929174567756,
      "loss": 3.0639,
      "step": 6011
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9639813899993896,
      "learning_rate": 0.0005989925825367133,
      "loss": 3.4632,
      "step": 6012
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1897265911102295,
      "learning_rate": 0.0005989922475610627,
      "loss": 3.4237,
      "step": 6013
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8766616582870483,
      "learning_rate": 0.0005989919125298237,
      "loss": 3.4717,
      "step": 6014
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4706109762191772,
      "learning_rate": 0.0005989915774429965,
      "loss": 3.3324,
      "step": 6015
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.190572738647461,
      "learning_rate": 0.0005989912423005812,
      "loss": 3.4429,
      "step": 6016
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.289116382598877,
      "learning_rate": 0.0005989909071025777,
      "loss": 3.1132,
      "step": 6017
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8968000411987305,
      "learning_rate": 0.0005989905718489862,
      "loss": 3.2801,
      "step": 6018
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.2464699745178223,
      "learning_rate": 0.0005989902365398067,
      "loss": 3.3795,
      "step": 6019
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.5202808380126953,
      "learning_rate": 0.0005989899011750393,
      "loss": 3.4302,
      "step": 6020
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9130345582962036,
      "learning_rate": 0.0005989895657546841,
      "loss": 3.1894,
      "step": 6021
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3999009132385254,
      "learning_rate": 0.0005989892302787408,
      "loss": 3.4962,
      "step": 6022
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.935405969619751,
      "learning_rate": 0.0005989888947472101,
      "loss": 3.4275,
      "step": 6023
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.7538886070251465,
      "learning_rate": 0.0005989885591600916,
      "loss": 3.5555,
      "step": 6024
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.3728713989257812,
      "learning_rate": 0.0005989882235173854,
      "loss": 3.1124,
      "step": 6025
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.037026882171631,
      "learning_rate": 0.0005989878878190917,
      "loss": 3.4298,
      "step": 6026
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.622466564178467,
      "learning_rate": 0.0005989875520652104,
      "loss": 3.3743,
      "step": 6027
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.7061023712158203,
      "learning_rate": 0.0005989872162557419,
      "loss": 3.1583,
      "step": 6028
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.547526478767395,
      "learning_rate": 0.0005989868803906858,
      "loss": 3.2603,
      "step": 6029
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.0797040462493896,
      "learning_rate": 0.0005989865444700425,
      "loss": 3.2678,
      "step": 6030
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.9093117713928223,
      "learning_rate": 0.0005989862084938121,
      "loss": 3.4431,
      "step": 6031
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.1098294258117676,
      "learning_rate": 0.0005989858724619943,
      "loss": 3.3508,
      "step": 6032
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.3004438877105713,
      "learning_rate": 0.0005989855363745895,
      "loss": 3.4926,
      "step": 6033
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9332592487335205,
      "learning_rate": 0.0005989852002315975,
      "loss": 3.3417,
      "step": 6034
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.337083578109741,
      "learning_rate": 0.0005989848640330186,
      "loss": 3.5106,
      "step": 6035
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.871378183364868,
      "learning_rate": 0.0005989845277788528,
      "loss": 3.1017,
      "step": 6036
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7227258682250977,
      "learning_rate": 0.0005989841914691002,
      "loss": 3.7655,
      "step": 6037
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.938454031944275,
      "learning_rate": 0.0005989838551037607,
      "loss": 3.1287,
      "step": 6038
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.204925775527954,
      "learning_rate": 0.0005989835186828345,
      "loss": 3.2303,
      "step": 6039
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.17887806892395,
      "learning_rate": 0.0005989831822063216,
      "loss": 3.4146,
      "step": 6040
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9129465818405151,
      "learning_rate": 0.000598982845674222,
      "loss": 3.293,
      "step": 6041
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.0819568634033203,
      "learning_rate": 0.0005989825090865359,
      "loss": 3.1751,
      "step": 6042
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.6566216945648193,
      "learning_rate": 0.0005989821724432634,
      "loss": 3.2936,
      "step": 6043
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5809403657913208,
      "learning_rate": 0.0005989818357444044,
      "loss": 3.4534,
      "step": 6044
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.563937783241272,
      "learning_rate": 0.000598981498989959,
      "loss": 3.5244,
      "step": 6045
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9114910364151,
      "learning_rate": 0.0005989811621799273,
      "loss": 3.2322,
      "step": 6046
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6228634119033813,
      "learning_rate": 0.0005989808253143094,
      "loss": 3.2708,
      "step": 6047
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4216217994689941,
      "learning_rate": 0.0005989804883931054,
      "loss": 3.3091,
      "step": 6048
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.752471685409546,
      "learning_rate": 0.0005989801514163152,
      "loss": 3.206,
      "step": 6049
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.6337149143218994,
      "learning_rate": 0.0005989798143839389,
      "loss": 3.2698,
      "step": 6050
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.139784812927246,
      "learning_rate": 0.0005989794772959766,
      "loss": 3.5178,
      "step": 6051
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.084712505340576,
      "learning_rate": 0.0005989791401524284,
      "loss": 3.3522,
      "step": 6052
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.0419206619262695,
      "learning_rate": 0.0005989788029532945,
      "loss": 3.1201,
      "step": 6053
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8381372690200806,
      "learning_rate": 0.0005989784656985745,
      "loss": 3.0224,
      "step": 6054
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6188387870788574,
      "learning_rate": 0.000598978128388269,
      "loss": 3.1424,
      "step": 6055
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.0424928665161133,
      "learning_rate": 0.0005989777910223777,
      "loss": 3.4276,
      "step": 6056
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.4681942462921143,
      "learning_rate": 0.0005989774536009008,
      "loss": 3.2724,
      "step": 6057
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7882355451583862,
      "learning_rate": 0.0005989771161238383,
      "loss": 3.5421,
      "step": 6058
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.238884687423706,
      "learning_rate": 0.0005989767785911904,
      "loss": 3.571,
      "step": 6059
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.300370693206787,
      "learning_rate": 0.0005989764410029571,
      "loss": 3.4568,
      "step": 6060
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.352461576461792,
      "learning_rate": 0.0005989761033591384,
      "loss": 3.2744,
      "step": 6061
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5896936655044556,
      "learning_rate": 0.0005989757656597344,
      "loss": 3.4405,
      "step": 6062
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.578331470489502,
      "learning_rate": 0.0005989754279047451,
      "loss": 3.1454,
      "step": 6063
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7716327905654907,
      "learning_rate": 0.0005989750900941707,
      "loss": 3.2948,
      "step": 6064
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.76816725730896,
      "learning_rate": 0.0005989747522280111,
      "loss": 3.1552,
      "step": 6065
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.1356377601623535,
      "learning_rate": 0.0005989744143062665,
      "loss": 3.3467,
      "step": 6066
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.433049440383911,
      "learning_rate": 0.0005989740763289369,
      "loss": 3.3945,
      "step": 6067
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.210815906524658,
      "learning_rate": 0.0005989737382960224,
      "loss": 3.1341,
      "step": 6068
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1378355026245117,
      "learning_rate": 0.000598973400207523,
      "loss": 3.2368,
      "step": 6069
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.0515525341033936,
      "learning_rate": 0.0005989730620634388,
      "loss": 3.3586,
      "step": 6070
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.511695623397827,
      "learning_rate": 0.00059897272386377,
      "loss": 3.4323,
      "step": 6071
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8702921867370605,
      "learning_rate": 0.0005989723856085164,
      "loss": 3.2017,
      "step": 6072
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4679157733917236,
      "learning_rate": 0.0005989720472976782,
      "loss": 3.4988,
      "step": 6073
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7252951860427856,
      "learning_rate": 0.0005989717089312554,
      "loss": 3.2153,
      "step": 6074
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6672043800354004,
      "learning_rate": 0.0005989713705092482,
      "loss": 3.3064,
      "step": 6075
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5086194276809692,
      "learning_rate": 0.0005989710320316566,
      "loss": 3.298,
      "step": 6076
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.358506679534912,
      "learning_rate": 0.0005989706934984806,
      "loss": 3.2485,
      "step": 6077
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6164463758468628,
      "learning_rate": 0.0005989703549097203,
      "loss": 3.0966,
      "step": 6078
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.514082908630371,
      "learning_rate": 0.0005989700162653759,
      "loss": 3.3967,
      "step": 6079
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.506248950958252,
      "learning_rate": 0.0005989696775654471,
      "loss": 3.3405,
      "step": 6080
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7477339506149292,
      "learning_rate": 0.0005989693388099343,
      "loss": 3.3777,
      "step": 6081
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4517099857330322,
      "learning_rate": 0.0005989689999988375,
      "loss": 3.4139,
      "step": 6082
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7461313009262085,
      "learning_rate": 0.0005989686611321566,
      "loss": 3.2601,
      "step": 6083
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.555841088294983,
      "learning_rate": 0.000598968322209892,
      "loss": 3.1018,
      "step": 6084
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7859668731689453,
      "learning_rate": 0.0005989679832320433,
      "loss": 3.296,
      "step": 6085
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.826460361480713,
      "learning_rate": 0.0005989676441986109,
      "loss": 3.4168,
      "step": 6086
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4439129829406738,
      "learning_rate": 0.0005989673051095948,
      "loss": 3.4313,
      "step": 6087
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3970692157745361,
      "learning_rate": 0.000598966965964995,
      "loss": 3.5163,
      "step": 6088
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9915010929107666,
      "learning_rate": 0.0005989666267648116,
      "loss": 3.278,
      "step": 6089
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4911190271377563,
      "learning_rate": 0.0005989662875090447,
      "loss": 3.2902,
      "step": 6090
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5981452465057373,
      "learning_rate": 0.0005989659481976943,
      "loss": 3.4737,
      "step": 6091
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.045159339904785,
      "learning_rate": 0.0005989656088307605,
      "loss": 3.3465,
      "step": 6092
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9521573781967163,
      "learning_rate": 0.0005989652694082432,
      "loss": 3.2861,
      "step": 6093
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4601045846939087,
      "learning_rate": 0.0005989649299301429,
      "loss": 3.263,
      "step": 6094
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.3910281658172607,
      "learning_rate": 0.0005989645903964592,
      "loss": 3.4949,
      "step": 6095
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.843106985092163,
      "learning_rate": 0.0005989642508071924,
      "loss": 3.1492,
      "step": 6096
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.416762113571167,
      "learning_rate": 0.0005989639111623425,
      "loss": 2.9819,
      "step": 6097
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9162569046020508,
      "learning_rate": 0.0005989635714619095,
      "loss": 3.4093,
      "step": 6098
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.0681281089782715,
      "learning_rate": 0.0005989632317058935,
      "loss": 3.0711,
      "step": 6099
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2639333009719849,
      "learning_rate": 0.0005989628918942946,
      "loss": 3.5201,
      "step": 6100
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4618223905563354,
      "learning_rate": 0.000598962552027113,
      "loss": 3.4702,
      "step": 6101
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.853696346282959,
      "learning_rate": 0.0005989622121043485,
      "loss": 3.1146,
      "step": 6102
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.481282114982605,
      "learning_rate": 0.0005989618721260014,
      "loss": 3.1823,
      "step": 6103
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.80581533908844,
      "learning_rate": 0.0005989615320920714,
      "loss": 3.0701,
      "step": 6104
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.525136113166809,
      "learning_rate": 0.000598961192002559,
      "loss": 3.3261,
      "step": 6105
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4542815685272217,
      "learning_rate": 0.0005989608518574641,
      "loss": 3.2633,
      "step": 6106
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6946619749069214,
      "learning_rate": 0.0005989605116567867,
      "loss": 3.2861,
      "step": 6107
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7148607969284058,
      "learning_rate": 0.0005989601714005267,
      "loss": 3.382,
      "step": 6108
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5612527132034302,
      "learning_rate": 0.0005989598310886846,
      "loss": 3.0778,
      "step": 6109
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4166090488433838,
      "learning_rate": 0.0005989594907212601,
      "loss": 3.3361,
      "step": 6110
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.699866533279419,
      "learning_rate": 0.0005989591502982535,
      "loss": 3.2417,
      "step": 6111
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4979945421218872,
      "learning_rate": 0.0005989588098196647,
      "loss": 3.1885,
      "step": 6112
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.499717116355896,
      "learning_rate": 0.0005989584692854937,
      "loss": 3.102,
      "step": 6113
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1692235469818115,
      "learning_rate": 0.0005989581286957408,
      "loss": 3.2005,
      "step": 6114
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4731683731079102,
      "learning_rate": 0.000598957788050406,
      "loss": 3.3371,
      "step": 6115
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5318889617919922,
      "learning_rate": 0.0005989574473494893,
      "loss": 3.2343,
      "step": 6116
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.995384693145752,
      "learning_rate": 0.0005989571065929906,
      "loss": 3.4772,
      "step": 6117
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7057323455810547,
      "learning_rate": 0.0005989567657809101,
      "loss": 3.126,
      "step": 6118
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7462671995162964,
      "learning_rate": 0.0005989564249132481,
      "loss": 3.302,
      "step": 6119
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8249129056930542,
      "learning_rate": 0.0005989560839900044,
      "loss": 3.4439,
      "step": 6120
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4702106714248657,
      "learning_rate": 0.000598955743011179,
      "loss": 3.1156,
      "step": 6121
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5939151048660278,
      "learning_rate": 0.0005989554019767723,
      "loss": 3.1608,
      "step": 6122
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.405084490776062,
      "learning_rate": 0.000598955060886784,
      "loss": 3.1955,
      "step": 6123
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2320078611373901,
      "learning_rate": 0.0005989547197412143,
      "loss": 3.2035,
      "step": 6124
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.520810842514038,
      "learning_rate": 0.0005989543785400633,
      "loss": 3.1551,
      "step": 6125
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.590440034866333,
      "learning_rate": 0.000598954037283331,
      "loss": 3.5516,
      "step": 6126
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5260926485061646,
      "learning_rate": 0.0005989536959710176,
      "loss": 3.4085,
      "step": 6127
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.952163577079773,
      "learning_rate": 0.0005989533546031231,
      "loss": 3.1343,
      "step": 6128
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8578972816467285,
      "learning_rate": 0.0005989530131796474,
      "loss": 3.4296,
      "step": 6129
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.886856198310852,
      "learning_rate": 0.0005989526717005907,
      "loss": 3.2286,
      "step": 6130
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.394733190536499,
      "learning_rate": 0.0005989523301659531,
      "loss": 3.4103,
      "step": 6131
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1089394092559814,
      "learning_rate": 0.0005989519885757346,
      "loss": 3.1054,
      "step": 6132
    },
    {
      "epoch": 0.08,
      "grad_norm": 4.332274913787842,
      "learning_rate": 0.0005989516469299352,
      "loss": 3.1706,
      "step": 6133
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7077146768569946,
      "learning_rate": 0.0005989513052285552,
      "loss": 2.9655,
      "step": 6134
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.3623886108398438,
      "learning_rate": 0.0005989509634715944,
      "loss": 3.4082,
      "step": 6135
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.709961175918579,
      "learning_rate": 0.000598950621659053,
      "loss": 3.2957,
      "step": 6136
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.3797826766967773,
      "learning_rate": 0.0005989502797909311,
      "loss": 3.1282,
      "step": 6137
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.97977614402771,
      "learning_rate": 0.0005989499378672286,
      "loss": 2.9831,
      "step": 6138
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8665375709533691,
      "learning_rate": 0.0005989495958879457,
      "loss": 3.4255,
      "step": 6139
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7916759252548218,
      "learning_rate": 0.0005989492538530825,
      "loss": 3.2302,
      "step": 6140
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3674910068511963,
      "learning_rate": 0.000598948911762639,
      "loss": 3.3549,
      "step": 6141
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.349484443664551,
      "learning_rate": 0.0005989485696166152,
      "loss": 3.3613,
      "step": 6142
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.220961332321167,
      "learning_rate": 0.0005989482274150112,
      "loss": 3.45,
      "step": 6143
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6149444580078125,
      "learning_rate": 0.000598947885157827,
      "loss": 2.9232,
      "step": 6144
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9834898710250854,
      "learning_rate": 0.0005989475428450629,
      "loss": 3.4026,
      "step": 6145
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.695019006729126,
      "learning_rate": 0.0005989472004767186,
      "loss": 3.2623,
      "step": 6146
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7191989421844482,
      "learning_rate": 0.0005989468580527947,
      "loss": 3.342,
      "step": 6147
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.470622181892395,
      "learning_rate": 0.0005989465155732907,
      "loss": 3.1032,
      "step": 6148
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8810958862304688,
      "learning_rate": 0.000598946173038207,
      "loss": 3.2921,
      "step": 6149
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9936071634292603,
      "learning_rate": 0.0005989458304475434,
      "loss": 3.4332,
      "step": 6150
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7917733192443848,
      "learning_rate": 0.0005989454878013003,
      "loss": 3.1828,
      "step": 6151
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.330564260482788,
      "learning_rate": 0.0005989451450994775,
      "loss": 3.2185,
      "step": 6152
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8580052852630615,
      "learning_rate": 0.0005989448023420752,
      "loss": 3.1092,
      "step": 6153
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.7823541164398193,
      "learning_rate": 0.0005989444595290934,
      "loss": 3.3974,
      "step": 6154
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8502177000045776,
      "learning_rate": 0.0005989441166605322,
      "loss": 3.1981,
      "step": 6155
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3456733226776123,
      "learning_rate": 0.0005989437737363916,
      "loss": 3.3602,
      "step": 6156
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.4811344146728516,
      "learning_rate": 0.0005989434307566718,
      "loss": 3.3438,
      "step": 6157
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.418318271636963,
      "learning_rate": 0.0005989430877213727,
      "loss": 3.2815,
      "step": 6158
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3696013689041138,
      "learning_rate": 0.0005989427446304944,
      "loss": 3.3078,
      "step": 6159
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.9866318702697754,
      "learning_rate": 0.000598942401484037,
      "loss": 3.2307,
      "step": 6160
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.7552645206451416,
      "learning_rate": 0.0005989420582820006,
      "loss": 3.6003,
      "step": 6161
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.766802191734314,
      "learning_rate": 0.0005989417150243853,
      "loss": 3.3943,
      "step": 6162
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.703344464302063,
      "learning_rate": 0.000598941371711191,
      "loss": 3.2277,
      "step": 6163
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9314724206924438,
      "learning_rate": 0.0005989410283424179,
      "loss": 3.2994,
      "step": 6164
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8848140239715576,
      "learning_rate": 0.000598940684918066,
      "loss": 3.4206,
      "step": 6165
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4473481178283691,
      "learning_rate": 0.0005989403414381354,
      "loss": 3.2696,
      "step": 6166
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.4526655673980713,
      "learning_rate": 0.0005989399979026261,
      "loss": 2.978,
      "step": 6167
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.7370338439941406,
      "learning_rate": 0.0005989396543115384,
      "loss": 3.3245,
      "step": 6168
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8585636615753174,
      "learning_rate": 0.000598939310664872,
      "loss": 3.2996,
      "step": 6169
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1291956901550293,
      "learning_rate": 0.0005989389669626272,
      "loss": 3.1954,
      "step": 6170
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.1448798179626465,
      "learning_rate": 0.000598938623204804,
      "loss": 3.3814,
      "step": 6171
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4103713035583496,
      "learning_rate": 0.0005989382793914023,
      "loss": 3.4585,
      "step": 6172
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.480867624282837,
      "learning_rate": 0.0005989379355224225,
      "loss": 3.1733,
      "step": 6173
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.001315116882324,
      "learning_rate": 0.0005989375915978645,
      "loss": 3.3983,
      "step": 6174
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8937734365463257,
      "learning_rate": 0.0005989372476177283,
      "loss": 3.1857,
      "step": 6175
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5199528932571411,
      "learning_rate": 0.000598936903582014,
      "loss": 3.2534,
      "step": 6176
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7511826753616333,
      "learning_rate": 0.0005989365594907218,
      "loss": 3.2239,
      "step": 6177
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.3309452533721924,
      "learning_rate": 0.0005989362153438516,
      "loss": 3.2033,
      "step": 6178
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.7174837589263916,
      "learning_rate": 0.0005989358711414035,
      "loss": 3.2696,
      "step": 6179
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9135173559188843,
      "learning_rate": 0.0005989355268833775,
      "loss": 3.3208,
      "step": 6180
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.4173762798309326,
      "learning_rate": 0.0005989351825697738,
      "loss": 3.1886,
      "step": 6181
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.7865166664123535,
      "learning_rate": 0.0005989348382005925,
      "loss": 3.4826,
      "step": 6182
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4347114562988281,
      "learning_rate": 0.0005989344937758336,
      "loss": 3.3154,
      "step": 6183
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6874512434005737,
      "learning_rate": 0.0005989341492954969,
      "loss": 3.2321,
      "step": 6184
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.0283584594726562,
      "learning_rate": 0.0005989338047595828,
      "loss": 3.3507,
      "step": 6185
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.380115270614624,
      "learning_rate": 0.0005989334601680914,
      "loss": 3.3142,
      "step": 6186
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4642452001571655,
      "learning_rate": 0.0005989331155210225,
      "loss": 3.3423,
      "step": 6187
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.193208932876587,
      "learning_rate": 0.0005989327708183763,
      "loss": 3.1773,
      "step": 6188
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5894922018051147,
      "learning_rate": 0.0005989324260601529,
      "loss": 3.331,
      "step": 6189
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3536252975463867,
      "learning_rate": 0.0005989320812463522,
      "loss": 3.2946,
      "step": 6190
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6045807600021362,
      "learning_rate": 0.0005989317363769745,
      "loss": 3.0861,
      "step": 6191
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5212476253509521,
      "learning_rate": 0.0005989313914520197,
      "loss": 3.2509,
      "step": 6192
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.679996967315674,
      "learning_rate": 0.000598931046471488,
      "loss": 3.3341,
      "step": 6193
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.011143922805786,
      "learning_rate": 0.0005989307014353792,
      "loss": 3.3047,
      "step": 6194
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.789752721786499,
      "learning_rate": 0.0005989303563436937,
      "loss": 3.33,
      "step": 6195
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7722288370132446,
      "learning_rate": 0.0005989300111964314,
      "loss": 3.1257,
      "step": 6196
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.566554307937622,
      "learning_rate": 0.0005989296659935923,
      "loss": 3.3821,
      "step": 6197
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1025681495666504,
      "learning_rate": 0.0005989293207351766,
      "loss": 3.308,
      "step": 6198
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1770670413970947,
      "learning_rate": 0.0005989289754211842,
      "loss": 3.3629,
      "step": 6199
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8548120260238647,
      "learning_rate": 0.0005989286300516152,
      "loss": 3.0361,
      "step": 6200
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.215402364730835,
      "learning_rate": 0.0005989282846264699,
      "loss": 3.2364,
      "step": 6201
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.109344005584717,
      "learning_rate": 0.0005989279391457481,
      "loss": 3.5413,
      "step": 6202
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.235229015350342,
      "learning_rate": 0.00059892759360945,
      "loss": 3.2692,
      "step": 6203
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7920440435409546,
      "learning_rate": 0.0005989272480175756,
      "loss": 3.1766,
      "step": 6204
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8240885734558105,
      "learning_rate": 0.000598926902370125,
      "loss": 3.2805,
      "step": 6205
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9893338680267334,
      "learning_rate": 0.0005989265566670982,
      "loss": 3.3345,
      "step": 6206
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.045875072479248,
      "learning_rate": 0.0005989262109084953,
      "loss": 3.4277,
      "step": 6207
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3808810710906982,
      "learning_rate": 0.0005989258650943164,
      "loss": 3.1684,
      "step": 6208
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.693745732307434,
      "learning_rate": 0.0005989255192245616,
      "loss": 3.2178,
      "step": 6209
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.407700538635254,
      "learning_rate": 0.0005989251732992308,
      "loss": 3.5038,
      "step": 6210
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8301819562911987,
      "learning_rate": 0.0005989248273183242,
      "loss": 3.5471,
      "step": 6211
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8794487714767456,
      "learning_rate": 0.0005989244812818419,
      "loss": 3.3668,
      "step": 6212
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.468986988067627,
      "learning_rate": 0.0005989241351897839,
      "loss": 3.0325,
      "step": 6213
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5635766983032227,
      "learning_rate": 0.0005989237890421501,
      "loss": 3.4314,
      "step": 6214
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.284860849380493,
      "learning_rate": 0.0005989234428389408,
      "loss": 3.2388,
      "step": 6215
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.018427848815918,
      "learning_rate": 0.0005989230965801561,
      "loss": 3.3997,
      "step": 6216
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.356032371520996,
      "learning_rate": 0.0005989227502657959,
      "loss": 3.3043,
      "step": 6217
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6157609224319458,
      "learning_rate": 0.0005989224038958602,
      "loss": 3.4642,
      "step": 6218
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6845479011535645,
      "learning_rate": 0.0005989220574703493,
      "loss": 3.3849,
      "step": 6219
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4733967781066895,
      "learning_rate": 0.0005989217109892631,
      "loss": 3.1518,
      "step": 6220
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5690639019012451,
      "learning_rate": 0.0005989213644526018,
      "loss": 3.337,
      "step": 6221
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5093560218811035,
      "learning_rate": 0.0005989210178603653,
      "loss": 3.2825,
      "step": 6222
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6111353635787964,
      "learning_rate": 0.0005989206712125537,
      "loss": 3.3712,
      "step": 6223
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8011937141418457,
      "learning_rate": 0.0005989203245091671,
      "loss": 3.1095,
      "step": 6224
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5901737213134766,
      "learning_rate": 0.0005989199777502056,
      "loss": 3.4149,
      "step": 6225
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3457696437835693,
      "learning_rate": 0.0005989196309356693,
      "loss": 3.3094,
      "step": 6226
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.0951714515686035,
      "learning_rate": 0.0005989192840655582,
      "loss": 3.4914,
      "step": 6227
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7953994274139404,
      "learning_rate": 0.0005989189371398723,
      "loss": 3.0268,
      "step": 6228
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7840542793273926,
      "learning_rate": 0.0005989185901586118,
      "loss": 3.2514,
      "step": 6229
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.4204413890838623,
      "learning_rate": 0.0005989182431217766,
      "loss": 3.2976,
      "step": 6230
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.040092945098877,
      "learning_rate": 0.0005989178960293668,
      "loss": 3.5345,
      "step": 6231
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5558102130889893,
      "learning_rate": 0.0005989175488813826,
      "loss": 3.3673,
      "step": 6232
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.4647750854492188,
      "learning_rate": 0.0005989172016778241,
      "loss": 3.4765,
      "step": 6233
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.425199031829834,
      "learning_rate": 0.0005989168544186911,
      "loss": 3.4278,
      "step": 6234
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7519662380218506,
      "learning_rate": 0.0005989165071039839,
      "loss": 3.1965,
      "step": 6235
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.747030735015869,
      "learning_rate": 0.0005989161597337024,
      "loss": 3.4872,
      "step": 6236
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.4583795070648193,
      "learning_rate": 0.0005989158123078467,
      "loss": 3.1839,
      "step": 6237
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3707408905029297,
      "learning_rate": 0.000598915464826417,
      "loss": 3.1224,
      "step": 6238
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.292422294616699,
      "learning_rate": 0.0005989151172894133,
      "loss": 3.4775,
      "step": 6239
    },
    {
      "epoch": 0.08,
      "grad_norm": 4.401193618774414,
      "learning_rate": 0.0005989147696968357,
      "loss": 3.2054,
      "step": 6240
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.064685344696045,
      "learning_rate": 0.0005989144220486841,
      "loss": 3.38,
      "step": 6241
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.964258074760437,
      "learning_rate": 0.0005989140743449587,
      "loss": 3.2206,
      "step": 6242
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7993491888046265,
      "learning_rate": 0.0005989137265856594,
      "loss": 3.4802,
      "step": 6243
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.975529193878174,
      "learning_rate": 0.0005989133787707865,
      "loss": 3.3296,
      "step": 6244
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6722689867019653,
      "learning_rate": 0.0005989130309003399,
      "loss": 3.1795,
      "step": 6245
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.7110724449157715,
      "learning_rate": 0.0005989126829743197,
      "loss": 3.2777,
      "step": 6246
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9231338500976562,
      "learning_rate": 0.0005989123349927262,
      "loss": 3.332,
      "step": 6247
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6874762773513794,
      "learning_rate": 0.0005989119869555592,
      "loss": 3.6376,
      "step": 6248
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5982463359832764,
      "learning_rate": 0.0005989116388628187,
      "loss": 3.1997,
      "step": 6249
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7880468368530273,
      "learning_rate": 0.0005989112907145049,
      "loss": 3.4138,
      "step": 6250
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7044637203216553,
      "learning_rate": 0.0005989109425106179,
      "loss": 3.2095,
      "step": 6251
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7123181819915771,
      "learning_rate": 0.0005989105942511576,
      "loss": 3.2875,
      "step": 6252
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9741380214691162,
      "learning_rate": 0.0005989102459361244,
      "loss": 3.1204,
      "step": 6253
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6308585405349731,
      "learning_rate": 0.0005989098975655179,
      "loss": 3.1084,
      "step": 6254
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7996987104415894,
      "learning_rate": 0.0005989095491393385,
      "loss": 3.3675,
      "step": 6255
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5194096565246582,
      "learning_rate": 0.0005989092006575863,
      "loss": 3.3518,
      "step": 6256
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.547947645187378,
      "learning_rate": 0.0005989088521202611,
      "loss": 3.3601,
      "step": 6257
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4427520036697388,
      "learning_rate": 0.0005989085035273631,
      "loss": 3.2428,
      "step": 6258
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5959863662719727,
      "learning_rate": 0.0005989081548788924,
      "loss": 3.4401,
      "step": 6259
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6330844163894653,
      "learning_rate": 0.000598907806174849,
      "loss": 3.4391,
      "step": 6260
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7099591493606567,
      "learning_rate": 0.0005989074574152331,
      "loss": 3.5478,
      "step": 6261
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.858909249305725,
      "learning_rate": 0.0005989071086000445,
      "loss": 3.2165,
      "step": 6262
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4728866815567017,
      "learning_rate": 0.0005989067597292836,
      "loss": 3.4917,
      "step": 6263
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5030758380889893,
      "learning_rate": 0.0005989064108029502,
      "loss": 3.2734,
      "step": 6264
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6217082738876343,
      "learning_rate": 0.0005989060618210443,
      "loss": 3.0523,
      "step": 6265
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.764083981513977,
      "learning_rate": 0.0005989057127835664,
      "loss": 3.2143,
      "step": 6266
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2487640380859375,
      "learning_rate": 0.0005989053636905162,
      "loss": 3.3924,
      "step": 6267
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9849027395248413,
      "learning_rate": 0.0005989050145418938,
      "loss": 3.2961,
      "step": 6268
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9590702056884766,
      "learning_rate": 0.0005989046653376993,
      "loss": 3.353,
      "step": 6269
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.5653011798858643,
      "learning_rate": 0.0005989043160779329,
      "loss": 3.2389,
      "step": 6270
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.955863356590271,
      "learning_rate": 0.0005989039667625944,
      "loss": 3.2427,
      "step": 6271
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5516010522842407,
      "learning_rate": 0.000598903617391684,
      "loss": 3.3024,
      "step": 6272
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6455796957015991,
      "learning_rate": 0.0005989032679652018,
      "loss": 3.4029,
      "step": 6273
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6214120388031006,
      "learning_rate": 0.000598902918483148,
      "loss": 3.3481,
      "step": 6274
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.3615994453430176,
      "learning_rate": 0.0005989025689455224,
      "loss": 3.313,
      "step": 6275
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3396239280700684,
      "learning_rate": 0.0005989022193523252,
      "loss": 3.2375,
      "step": 6276
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5918245315551758,
      "learning_rate": 0.0005989018697035563,
      "loss": 3.3519,
      "step": 6277
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.15901255607605,
      "learning_rate": 0.000598901519999216,
      "loss": 3.4804,
      "step": 6278
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5185041427612305,
      "learning_rate": 0.0005989011702393043,
      "loss": 3.1998,
      "step": 6279
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3879616260528564,
      "learning_rate": 0.0005989008204238212,
      "loss": 3.3338,
      "step": 6280
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8673804998397827,
      "learning_rate": 0.0005989004705527668,
      "loss": 3.2313,
      "step": 6281
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5008413791656494,
      "learning_rate": 0.0005989001206261411,
      "loss": 3.3809,
      "step": 6282
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6560114622116089,
      "learning_rate": 0.0005988997706439443,
      "loss": 3.1157,
      "step": 6283
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.309357762336731,
      "learning_rate": 0.0005988994206061764,
      "loss": 3.047,
      "step": 6284
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.174180507659912,
      "learning_rate": 0.0005988990705128374,
      "loss": 3.4618,
      "step": 6285
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8432120084762573,
      "learning_rate": 0.0005988987203639274,
      "loss": 3.4873,
      "step": 6286
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5428465604782104,
      "learning_rate": 0.0005988983701594464,
      "loss": 3.3464,
      "step": 6287
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9708311557769775,
      "learning_rate": 0.0005988980198993948,
      "loss": 3.3771,
      "step": 6288
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5425548553466797,
      "learning_rate": 0.0005988976695837723,
      "loss": 3.1005,
      "step": 6289
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4566117525100708,
      "learning_rate": 0.000598897319212579,
      "loss": 3.2115,
      "step": 6290
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6158335208892822,
      "learning_rate": 0.0005988969687858153,
      "loss": 3.2918,
      "step": 6291
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.639237642288208,
      "learning_rate": 0.0005988966183034807,
      "loss": 3.4487,
      "step": 6292
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5998048782348633,
      "learning_rate": 0.0005988962677655758,
      "loss": 3.204,
      "step": 6293
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4756450653076172,
      "learning_rate": 0.0005988959171721002,
      "loss": 3.4598,
      "step": 6294
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6748498678207397,
      "learning_rate": 0.0005988955665230544,
      "loss": 3.2063,
      "step": 6295
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9302681684494019,
      "learning_rate": 0.0005988952158184382,
      "loss": 3.3187,
      "step": 6296
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5145294666290283,
      "learning_rate": 0.0005988948650582518,
      "loss": 3.4143,
      "step": 6297
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8677008152008057,
      "learning_rate": 0.0005988945142424951,
      "loss": 3.2035,
      "step": 6298
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.047891139984131,
      "learning_rate": 0.0005988941633711684,
      "loss": 3.1021,
      "step": 6299
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.3391833305358887,
      "learning_rate": 0.0005988938124442717,
      "loss": 3.1666,
      "step": 6300
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6620047092437744,
      "learning_rate": 0.0005988934614618047,
      "loss": 3.256,
      "step": 6301
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7690562009811401,
      "learning_rate": 0.000598893110423768,
      "loss": 3.3134,
      "step": 6302
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.2561275959014893,
      "learning_rate": 0.0005988927593301613,
      "loss": 3.2774,
      "step": 6303
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.3232576847076416,
      "learning_rate": 0.0005988924081809848,
      "loss": 3.1699,
      "step": 6304
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.668855905532837,
      "learning_rate": 0.0005988920569762387,
      "loss": 3.3358,
      "step": 6305
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4674923419952393,
      "learning_rate": 0.0005988917057159228,
      "loss": 3.0467,
      "step": 6306
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3851014375686646,
      "learning_rate": 0.0005988913544000374,
      "loss": 2.9587,
      "step": 6307
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4509248733520508,
      "learning_rate": 0.0005988910030285823,
      "loss": 3.3535,
      "step": 6308
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.747126579284668,
      "learning_rate": 0.0005988906516015578,
      "loss": 3.05,
      "step": 6309
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6632142066955566,
      "learning_rate": 0.000598890300118964,
      "loss": 3.188,
      "step": 6310
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7278920412063599,
      "learning_rate": 0.0005988899485808006,
      "loss": 3.3257,
      "step": 6311
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4186625480651855,
      "learning_rate": 0.0005988895969870681,
      "loss": 3.1535,
      "step": 6312
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5774635076522827,
      "learning_rate": 0.0005988892453377664,
      "loss": 2.8983,
      "step": 6313
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5405235290527344,
      "learning_rate": 0.0005988888936328954,
      "loss": 3.1714,
      "step": 6314
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6999053955078125,
      "learning_rate": 0.0005988885418724554,
      "loss": 3.2117,
      "step": 6315
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.441320538520813,
      "learning_rate": 0.0005988881900564464,
      "loss": 3.3212,
      "step": 6316
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.222081422805786,
      "learning_rate": 0.0005988878381848684,
      "loss": 3.249,
      "step": 6317
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7529925107955933,
      "learning_rate": 0.0005988874862577216,
      "loss": 2.9782,
      "step": 6318
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.437670350074768,
      "learning_rate": 0.0005988871342750058,
      "loss": 3.3746,
      "step": 6319
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.042222261428833,
      "learning_rate": 0.0005988867822367213,
      "loss": 3.0083,
      "step": 6320
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9328843355178833,
      "learning_rate": 0.0005988864301428682,
      "loss": 3.3306,
      "step": 6321
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.622745394706726,
      "learning_rate": 0.0005988860779934465,
      "loss": 3.4527,
      "step": 6322
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7379366159439087,
      "learning_rate": 0.0005988857257884561,
      "loss": 3.2219,
      "step": 6323
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7208613157272339,
      "learning_rate": 0.0005988853735278973,
      "loss": 3.3769,
      "step": 6324
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5763282775878906,
      "learning_rate": 0.0005988850212117699,
      "loss": 3.2591,
      "step": 6325
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6890227794647217,
      "learning_rate": 0.0005988846688400743,
      "loss": 3.0522,
      "step": 6326
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1714069843292236,
      "learning_rate": 0.0005988843164128104,
      "loss": 3.0429,
      "step": 6327
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.4024925231933594,
      "learning_rate": 0.0005988839639299782,
      "loss": 3.0919,
      "step": 6328
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.005915880203247,
      "learning_rate": 0.0005988836113915778,
      "loss": 2.9844,
      "step": 6329
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7491874694824219,
      "learning_rate": 0.0005988832587976095,
      "loss": 3.1783,
      "step": 6330
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.736603021621704,
      "learning_rate": 0.0005988829061480729,
      "loss": 3.3858,
      "step": 6331
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.0158512592315674,
      "learning_rate": 0.0005988825534429684,
      "loss": 3.1835,
      "step": 6332
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6080842018127441,
      "learning_rate": 0.0005988822006822961,
      "loss": 3.3015,
      "step": 6333
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6452386379241943,
      "learning_rate": 0.0005988818478660558,
      "loss": 3.4528,
      "step": 6334
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7730824947357178,
      "learning_rate": 0.0005988814949942478,
      "loss": 3.1262,
      "step": 6335
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7818914651870728,
      "learning_rate": 0.000598881142066872,
      "loss": 3.5063,
      "step": 6336
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4480220079421997,
      "learning_rate": 0.0005988807890839287,
      "loss": 3.4592,
      "step": 6337
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5881808996200562,
      "learning_rate": 0.0005988804360454179,
      "loss": 3.1692,
      "step": 6338
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.116882562637329,
      "learning_rate": 0.0005988800829513394,
      "loss": 3.2528,
      "step": 6339
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6994794607162476,
      "learning_rate": 0.0005988797298016934,
      "loss": 3.1049,
      "step": 6340
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.078249454498291,
      "learning_rate": 0.0005988793765964802,
      "loss": 3.3697,
      "step": 6341
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.493526577949524,
      "learning_rate": 0.0005988790233356996,
      "loss": 3.317,
      "step": 6342
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6951956748962402,
      "learning_rate": 0.0005988786700193517,
      "loss": 3.3816,
      "step": 6343
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.618016004562378,
      "learning_rate": 0.0005988783166474366,
      "loss": 3.1168,
      "step": 6344
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3286631107330322,
      "learning_rate": 0.0005988779632199545,
      "loss": 3.2633,
      "step": 6345
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7132291793823242,
      "learning_rate": 0.0005988776097369053,
      "loss": 3.0407,
      "step": 6346
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7155681848526,
      "learning_rate": 0.0005988772561982891,
      "loss": 3.2442,
      "step": 6347
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6728661060333252,
      "learning_rate": 0.0005988769026041059,
      "loss": 3.1875,
      "step": 6348
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8729631900787354,
      "learning_rate": 0.0005988765489543559,
      "loss": 3.1468,
      "step": 6349
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6580065488815308,
      "learning_rate": 0.0005988761952490391,
      "loss": 3.6751,
      "step": 6350
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.895207166671753,
      "learning_rate": 0.0005988758414881556,
      "loss": 3.2351,
      "step": 6351
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6699825525283813,
      "learning_rate": 0.0005988754876717054,
      "loss": 3.1228,
      "step": 6352
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6736104488372803,
      "learning_rate": 0.0005988751337996887,
      "loss": 3.0498,
      "step": 6353
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7695366144180298,
      "learning_rate": 0.0005988747798721054,
      "loss": 3.5114,
      "step": 6354
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9738132953643799,
      "learning_rate": 0.0005988744258889556,
      "loss": 3.0941,
      "step": 6355
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7275618314743042,
      "learning_rate": 0.0005988740718502395,
      "loss": 3.3376,
      "step": 6356
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.956013798713684,
      "learning_rate": 0.000598873717755957,
      "loss": 3.2466,
      "step": 6357
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6580010652542114,
      "learning_rate": 0.0005988733636061082,
      "loss": 3.4935,
      "step": 6358
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.2905855178833008,
      "learning_rate": 0.0005988730094006933,
      "loss": 3.3758,
      "step": 6359
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4429728984832764,
      "learning_rate": 0.0005988726551397122,
      "loss": 3.3638,
      "step": 6360
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.0663928985595703,
      "learning_rate": 0.0005988723008231651,
      "loss": 3.3632,
      "step": 6361
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.2676823139190674,
      "learning_rate": 0.0005988719464510519,
      "loss": 3.2518,
      "step": 6362
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5971916913986206,
      "learning_rate": 0.0005988715920233728,
      "loss": 3.2983,
      "step": 6363
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4055265188217163,
      "learning_rate": 0.0005988712375401278,
      "loss": 3.1653,
      "step": 6364
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.0597329139709473,
      "learning_rate": 0.0005988708830013171,
      "loss": 3.3273,
      "step": 6365
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.6280317306518555,
      "learning_rate": 0.0005988705284069406,
      "loss": 3.2191,
      "step": 6366
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5527498722076416,
      "learning_rate": 0.0005988701737569984,
      "loss": 3.3112,
      "step": 6367
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.2275173664093018,
      "learning_rate": 0.0005988698190514906,
      "loss": 3.3192,
      "step": 6368
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.650470733642578,
      "learning_rate": 0.0005988694642904173,
      "loss": 2.9934,
      "step": 6369
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.987453579902649,
      "learning_rate": 0.0005988691094737785,
      "loss": 3.3922,
      "step": 6370
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.171689987182617,
      "learning_rate": 0.0005988687546015743,
      "loss": 3.162,
      "step": 6371
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.113351345062256,
      "learning_rate": 0.0005988683996738047,
      "loss": 3.062,
      "step": 6372
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.3444929122924805,
      "learning_rate": 0.0005988680446904699,
      "loss": 3.3515,
      "step": 6373
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4883925914764404,
      "learning_rate": 0.0005988676896515698,
      "loss": 3.0443,
      "step": 6374
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.1116554737091064,
      "learning_rate": 0.0005988673345571047,
      "loss": 3.4363,
      "step": 6375
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8468962907791138,
      "learning_rate": 0.0005988669794070743,
      "loss": 3.3049,
      "step": 6376
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.347843885421753,
      "learning_rate": 0.0005988666242014791,
      "loss": 3.5467,
      "step": 6377
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8891469240188599,
      "learning_rate": 0.0005988662689403189,
      "loss": 3.2063,
      "step": 6378
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.046950578689575,
      "learning_rate": 0.0005988659136235937,
      "loss": 3.1191,
      "step": 6379
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5900098085403442,
      "learning_rate": 0.0005988655582513039,
      "loss": 3.1598,
      "step": 6380
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7576141357421875,
      "learning_rate": 0.0005988652028234492,
      "loss": 3.4024,
      "step": 6381
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.5065255165100098,
      "learning_rate": 0.0005988648473400298,
      "loss": 3.3657,
      "step": 6382
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6984285116195679,
      "learning_rate": 0.0005988644918010458,
      "loss": 3.263,
      "step": 6383
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8977627754211426,
      "learning_rate": 0.0005988641362064972,
      "loss": 3.4612,
      "step": 6384
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.6595966815948486,
      "learning_rate": 0.0005988637805563843,
      "loss": 3.0712,
      "step": 6385
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6883046627044678,
      "learning_rate": 0.0005988634248507068,
      "loss": 3.3369,
      "step": 6386
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5521830320358276,
      "learning_rate": 0.0005988630690894651,
      "loss": 3.458,
      "step": 6387
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8833603858947754,
      "learning_rate": 0.0005988627132726591,
      "loss": 3.4546,
      "step": 6388
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.6200098991394043,
      "learning_rate": 0.0005988623574002888,
      "loss": 3.162,
      "step": 6389
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4323903322219849,
      "learning_rate": 0.0005988620014723543,
      "loss": 3.1126,
      "step": 6390
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9798824787139893,
      "learning_rate": 0.0005988616454888557,
      "loss": 3.2049,
      "step": 6391
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7644420862197876,
      "learning_rate": 0.0005988612894497932,
      "loss": 3.2317,
      "step": 6392
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.270883798599243,
      "learning_rate": 0.0005988609333551667,
      "loss": 3.0607,
      "step": 6393
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9029674530029297,
      "learning_rate": 0.0005988605772049763,
      "loss": 3.1776,
      "step": 6394
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.299220323562622,
      "learning_rate": 0.0005988602209992222,
      "loss": 3.3152,
      "step": 6395
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4635789394378662,
      "learning_rate": 0.0005988598647379042,
      "loss": 3.246,
      "step": 6396
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.717272400856018,
      "learning_rate": 0.0005988595084210225,
      "loss": 3.3475,
      "step": 6397
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7744337320327759,
      "learning_rate": 0.0005988591520485773,
      "loss": 3.422,
      "step": 6398
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6392594575881958,
      "learning_rate": 0.0005988587956205684,
      "loss": 3.1947,
      "step": 6399
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.4818930625915527,
      "learning_rate": 0.000598858439136996,
      "loss": 3.1314,
      "step": 6400
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.523829460144043,
      "learning_rate": 0.0005988580825978603,
      "loss": 3.3508,
      "step": 6401
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9763765335083008,
      "learning_rate": 0.0005988577260031612,
      "loss": 3.0556,
      "step": 6402
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7435036897659302,
      "learning_rate": 0.0005988573693528987,
      "loss": 3.1048,
      "step": 6403
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7330882549285889,
      "learning_rate": 0.0005988570126470731,
      "loss": 3.0457,
      "step": 6404
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6868294477462769,
      "learning_rate": 0.0005988566558856843,
      "loss": 3.1134,
      "step": 6405
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3941489458084106,
      "learning_rate": 0.0005988562990687324,
      "loss": 3.1882,
      "step": 6406
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4074976444244385,
      "learning_rate": 0.0005988559421962174,
      "loss": 3.4561,
      "step": 6407
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5593898296356201,
      "learning_rate": 0.0005988555852681396,
      "loss": 3.1214,
      "step": 6408
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7179969549179077,
      "learning_rate": 0.0005988552282844989,
      "loss": 3.3774,
      "step": 6409
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5977182388305664,
      "learning_rate": 0.0005988548712452953,
      "loss": 3.4044,
      "step": 6410
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6585724353790283,
      "learning_rate": 0.0005988545141505289,
      "loss": 3.3225,
      "step": 6411
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7861777544021606,
      "learning_rate": 0.0005988541570001999,
      "loss": 3.434,
      "step": 6412
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.752290964126587,
      "learning_rate": 0.0005988537997943081,
      "loss": 3.4035,
      "step": 6413
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4500106573104858,
      "learning_rate": 0.0005988534425328539,
      "loss": 2.9611,
      "step": 6414
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8726493120193481,
      "learning_rate": 0.0005988530852158371,
      "loss": 3.2704,
      "step": 6415
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8017034530639648,
      "learning_rate": 0.000598852727843258,
      "loss": 3.1929,
      "step": 6416
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.772101640701294,
      "learning_rate": 0.0005988523704151164,
      "loss": 3.2132,
      "step": 6417
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6857644319534302,
      "learning_rate": 0.0005988520129314126,
      "loss": 3.0623,
      "step": 6418
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.622001051902771,
      "learning_rate": 0.0005988516553921465,
      "loss": 3.4357,
      "step": 6419
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8118062019348145,
      "learning_rate": 0.0005988512977973182,
      "loss": 3.2727,
      "step": 6420
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.627756357192993,
      "learning_rate": 0.000598850940146928,
      "loss": 3.1857,
      "step": 6421
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7388665676116943,
      "learning_rate": 0.0005988505824409756,
      "loss": 3.3097,
      "step": 6422
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.0682153701782227,
      "learning_rate": 0.0005988502246794613,
      "loss": 3.27,
      "step": 6423
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6564769744873047,
      "learning_rate": 0.0005988498668623851,
      "loss": 3.2542,
      "step": 6424
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5552502870559692,
      "learning_rate": 0.000598849508989747,
      "loss": 3.3832,
      "step": 6425
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.414598822593689,
      "learning_rate": 0.0005988491510615472,
      "loss": 3.3696,
      "step": 6426
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4266901016235352,
      "learning_rate": 0.0005988487930777858,
      "loss": 3.2971,
      "step": 6427
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8945282697677612,
      "learning_rate": 0.0005988484350384626,
      "loss": 3.2693,
      "step": 6428
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.958879828453064,
      "learning_rate": 0.0005988480769435778,
      "loss": 3.2448,
      "step": 6429
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6822490692138672,
      "learning_rate": 0.0005988477187931316,
      "loss": 3.3956,
      "step": 6430
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.167853355407715,
      "learning_rate": 0.000598847360587124,
      "loss": 3.1438,
      "step": 6431
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9847489595413208,
      "learning_rate": 0.0005988470023255549,
      "loss": 3.1762,
      "step": 6432
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7806575298309326,
      "learning_rate": 0.0005988466440084246,
      "loss": 3.2444,
      "step": 6433
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3564904928207397,
      "learning_rate": 0.0005988462856357332,
      "loss": 3.0944,
      "step": 6434
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5927281379699707,
      "learning_rate": 0.0005988459272074804,
      "loss": 3.4579,
      "step": 6435
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.612729787826538,
      "learning_rate": 0.0005988455687236666,
      "loss": 3.4314,
      "step": 6436
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5827454328536987,
      "learning_rate": 0.0005988452101842917,
      "loss": 3.1988,
      "step": 6437
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9122616052627563,
      "learning_rate": 0.000598844851589356,
      "loss": 3.3351,
      "step": 6438
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7825242280960083,
      "learning_rate": 0.0005988444929388593,
      "loss": 3.2931,
      "step": 6439
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.755739450454712,
      "learning_rate": 0.0005988441342328017,
      "loss": 3.0966,
      "step": 6440
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8128421306610107,
      "learning_rate": 0.0005988437754711834,
      "loss": 3.1313,
      "step": 6441
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.534288763999939,
      "learning_rate": 0.0005988434166540044,
      "loss": 3.3118,
      "step": 6442
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.4012691974639893,
      "learning_rate": 0.0005988430577812649,
      "loss": 3.2479,
      "step": 6443
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9169811010360718,
      "learning_rate": 0.0005988426988529646,
      "loss": 3.2796,
      "step": 6444
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9485372304916382,
      "learning_rate": 0.0005988423398691039,
      "loss": 3.2262,
      "step": 6445
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.855203866958618,
      "learning_rate": 0.0005988419808296828,
      "loss": 3.2144,
      "step": 6446
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6532702445983887,
      "learning_rate": 0.0005988416217347013,
      "loss": 3.2971,
      "step": 6447
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7184518575668335,
      "learning_rate": 0.0005988412625841594,
      "loss": 3.3823,
      "step": 6448
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1934614181518555,
      "learning_rate": 0.0005988409033780575,
      "loss": 3.3866,
      "step": 6449
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.6057260036468506,
      "learning_rate": 0.0005988405441163952,
      "loss": 3.1832,
      "step": 6450
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8546526432037354,
      "learning_rate": 0.0005988401847991729,
      "loss": 3.1051,
      "step": 6451
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.586909055709839,
      "learning_rate": 0.0005988398254263906,
      "loss": 3.3775,
      "step": 6452
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7750614881515503,
      "learning_rate": 0.0005988394659980483,
      "loss": 3.237,
      "step": 6453
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6374303102493286,
      "learning_rate": 0.0005988391065141461,
      "loss": 3.3785,
      "step": 6454
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5309622287750244,
      "learning_rate": 0.0005988387469746841,
      "loss": 3.3663,
      "step": 6455
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.498138666152954,
      "learning_rate": 0.0005988383873796624,
      "loss": 3.2202,
      "step": 6456
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.20607328414917,
      "learning_rate": 0.0005988380277290809,
      "loss": 3.111,
      "step": 6457
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.0217556953430176,
      "learning_rate": 0.0005988376680229398,
      "loss": 3.296,
      "step": 6458
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5113602876663208,
      "learning_rate": 0.0005988373082612391,
      "loss": 3.4029,
      "step": 6459
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.629033327102661,
      "learning_rate": 0.000598836948443979,
      "loss": 3.2693,
      "step": 6460
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5853043794631958,
      "learning_rate": 0.0005988365885711594,
      "loss": 3.267,
      "step": 6461
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6089006662368774,
      "learning_rate": 0.0005988362286427804,
      "loss": 3.0899,
      "step": 6462
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5753902196884155,
      "learning_rate": 0.0005988358686588423,
      "loss": 3.129,
      "step": 6463
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3161982297897339,
      "learning_rate": 0.0005988355086193447,
      "loss": 3.2833,
      "step": 6464
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5585163831710815,
      "learning_rate": 0.0005988351485242881,
      "loss": 3.315,
      "step": 6465
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.317372441291809,
      "learning_rate": 0.0005988347883736724,
      "loss": 3.1822,
      "step": 6466
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.961430311203003,
      "learning_rate": 0.0005988344281674976,
      "loss": 3.2288,
      "step": 6467
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9529131650924683,
      "learning_rate": 0.0005988340679057638,
      "loss": 3.2203,
      "step": 6468
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1573755741119385,
      "learning_rate": 0.0005988337075884712,
      "loss": 3.5381,
      "step": 6469
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7063361406326294,
      "learning_rate": 0.0005988333472156198,
      "loss": 3.1284,
      "step": 6470
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.4403622150421143,
      "learning_rate": 0.0005988329867872096,
      "loss": 3.0222,
      "step": 6471
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.825242519378662,
      "learning_rate": 0.0005988326263032406,
      "loss": 3.3602,
      "step": 6472
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.450953245162964,
      "learning_rate": 0.0005988322657637131,
      "loss": 3.1777,
      "step": 6473
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8249918222427368,
      "learning_rate": 0.000598831905168627,
      "loss": 3.0236,
      "step": 6474
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8205525875091553,
      "learning_rate": 0.0005988315445179824,
      "loss": 3.2261,
      "step": 6475
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8901089429855347,
      "learning_rate": 0.0005988311838117795,
      "loss": 3.1643,
      "step": 6476
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7686728239059448,
      "learning_rate": 0.0005988308230500181,
      "loss": 3.2435,
      "step": 6477
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5939040184020996,
      "learning_rate": 0.0005988304622326984,
      "loss": 3.2872,
      "step": 6478
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.2522239685058594,
      "learning_rate": 0.0005988301013598206,
      "loss": 2.9996,
      "step": 6479
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6493581533432007,
      "learning_rate": 0.0005988297404313844,
      "loss": 3.1411,
      "step": 6480
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5722098350524902,
      "learning_rate": 0.0005988293794473903,
      "loss": 3.2085,
      "step": 6481
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9014168977737427,
      "learning_rate": 0.0005988290184078382,
      "loss": 3.3152,
      "step": 6482
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6593533754348755,
      "learning_rate": 0.0005988286573127282,
      "loss": 3.186,
      "step": 6483
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7622737884521484,
      "learning_rate": 0.0005988282961620602,
      "loss": 3.2814,
      "step": 6484
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4967710971832275,
      "learning_rate": 0.0005988279349558343,
      "loss": 3.3146,
      "step": 6485
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.665453314781189,
      "learning_rate": 0.0005988275736940508,
      "loss": 3.335,
      "step": 6486
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.9033918380737305,
      "learning_rate": 0.0005988272123767096,
      "loss": 3.5042,
      "step": 6487
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.4900975227355957,
      "learning_rate": 0.0005988268510038107,
      "loss": 3.3823,
      "step": 6488
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.543210744857788,
      "learning_rate": 0.0005988264895753542,
      "loss": 3.3462,
      "step": 6489
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.590188980102539,
      "learning_rate": 0.0005988261280913403,
      "loss": 3.4069,
      "step": 6490
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.5202114582061768,
      "learning_rate": 0.000598825766551769,
      "loss": 3.4423,
      "step": 6491
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6653008460998535,
      "learning_rate": 0.0005988254049566404,
      "loss": 3.3073,
      "step": 6492
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9699525833129883,
      "learning_rate": 0.0005988250433059544,
      "loss": 3.4217,
      "step": 6493
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8216896057128906,
      "learning_rate": 0.0005988246815997112,
      "loss": 3.3521,
      "step": 6494
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.598778486251831,
      "learning_rate": 0.0005988243198379109,
      "loss": 3.3306,
      "step": 6495
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.888081431388855,
      "learning_rate": 0.0005988239580205535,
      "loss": 3.2377,
      "step": 6496
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.430147171020508,
      "learning_rate": 0.000598823596147639,
      "loss": 3.1833,
      "step": 6497
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.740668535232544,
      "learning_rate": 0.0005988232342191677,
      "loss": 3.3506,
      "step": 6498
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.608227014541626,
      "learning_rate": 0.0005988228722351394,
      "loss": 3.2617,
      "step": 6499
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.017200469970703,
      "learning_rate": 0.0005988225101955543,
      "loss": 3.3739,
      "step": 6500
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.7960158586502075,
      "learning_rate": 0.0005988221481004125,
      "loss": 3.3651,
      "step": 6501
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9410110712051392,
      "learning_rate": 0.0005988217859497141,
      "loss": 3.5251,
      "step": 6502
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8456063270568848,
      "learning_rate": 0.000598821423743459,
      "loss": 3.2063,
      "step": 6503
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9196429252624512,
      "learning_rate": 0.0005988210614816473,
      "loss": 3.4577,
      "step": 6504
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.67510986328125,
      "learning_rate": 0.0005988206991642792,
      "loss": 2.9792,
      "step": 6505
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.6729462146759033,
      "learning_rate": 0.0005988203367913547,
      "loss": 3.0344,
      "step": 6506
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1061830520629883,
      "learning_rate": 0.0005988199743628738,
      "loss": 3.1154,
      "step": 6507
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5976130962371826,
      "learning_rate": 0.0005988196118788367,
      "loss": 3.3131,
      "step": 6508
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.871114730834961,
      "learning_rate": 0.0005988192493392434,
      "loss": 3.2648,
      "step": 6509
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.0210375785827637,
      "learning_rate": 0.0005988188867440938,
      "loss": 3.4153,
      "step": 6510
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.7552084922790527,
      "learning_rate": 0.0005988185240933882,
      "loss": 3.1509,
      "step": 6511
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.6999458074569702,
      "learning_rate": 0.0005988181613871267,
      "loss": 3.2434,
      "step": 6512
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.652158260345459,
      "learning_rate": 0.0005988177986253092,
      "loss": 3.225,
      "step": 6513
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.3663713932037354,
      "learning_rate": 0.0005988174358079358,
      "loss": 2.9526,
      "step": 6514
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.651877999305725,
      "learning_rate": 0.0005988170729350067,
      "loss": 3.318,
      "step": 6515
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.4586102962493896,
      "learning_rate": 0.0005988167100065217,
      "loss": 3.1677,
      "step": 6516
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.9037401676177979,
      "learning_rate": 0.0005988163470224812,
      "loss": 2.9756,
      "step": 6517
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5717566013336182,
      "learning_rate": 0.0005988159839828851,
      "loss": 3.2828,
      "step": 6518
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.8700522184371948,
      "learning_rate": 0.0005988156208877333,
      "loss": 3.1638,
      "step": 6519
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.0146796703338623,
      "learning_rate": 0.0005988152577370262,
      "loss": 3.2347,
      "step": 6520
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.9330227375030518,
      "learning_rate": 0.0005988148945307636,
      "loss": 3.2024,
      "step": 6521
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.5505354404449463,
      "learning_rate": 0.0005988145312689456,
      "loss": 3.1769,
      "step": 6522
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.50860857963562,
      "learning_rate": 0.0005988141679515726,
      "loss": 3.2477,
      "step": 6523
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.1446430683135986,
      "learning_rate": 0.0005988138045786442,
      "loss": 3.2995,
      "step": 6524
    },
    {
      "epoch": 0.08,
      "grad_norm": 3.217329502105713,
      "learning_rate": 0.0005988134411501607,
      "loss": 3.1081,
      "step": 6525
    },
    {
      "epoch": 0.08,
      "grad_norm": 1.3517553806304932,
      "learning_rate": 0.0005988130776661222,
      "loss": 3.384,
      "step": 6526
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.45680832862854,
      "learning_rate": 0.0005988127141265286,
      "loss": 3.2485,
      "step": 6527
    },
    {
      "epoch": 0.08,
      "grad_norm": 2.879908561706543,
      "learning_rate": 0.0005988123505313802,
      "loss": 3.3018,
      "step": 6528
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9566514492034912,
      "learning_rate": 0.0005988119868806769,
      "loss": 3.3326,
      "step": 6529
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6017000675201416,
      "learning_rate": 0.0005988116231744188,
      "loss": 3.0661,
      "step": 6530
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5366857051849365,
      "learning_rate": 0.000598811259412606,
      "loss": 3.182,
      "step": 6531
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.596864938735962,
      "learning_rate": 0.0005988108955952386,
      "loss": 3.0614,
      "step": 6532
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3841876983642578,
      "learning_rate": 0.0005988105317223164,
      "loss": 3.4072,
      "step": 6533
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6698720455169678,
      "learning_rate": 0.0005988101677938399,
      "loss": 3.2186,
      "step": 6534
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4294464588165283,
      "learning_rate": 0.0005988098038098088,
      "loss": 3.1524,
      "step": 6535
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5421966314315796,
      "learning_rate": 0.0005988094397702234,
      "loss": 3.2257,
      "step": 6536
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5431770086288452,
      "learning_rate": 0.0005988090756750838,
      "loss": 3.0238,
      "step": 6537
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7026729583740234,
      "learning_rate": 0.0005988087115243897,
      "loss": 3.3677,
      "step": 6538
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5857874155044556,
      "learning_rate": 0.0005988083473181416,
      "loss": 3.3936,
      "step": 6539
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7607792615890503,
      "learning_rate": 0.0005988079830563394,
      "loss": 3.3259,
      "step": 6540
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7794920206069946,
      "learning_rate": 0.0005988076187389829,
      "loss": 3.3192,
      "step": 6541
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0633277893066406,
      "learning_rate": 0.0005988072543660727,
      "loss": 3.225,
      "step": 6542
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8355062007904053,
      "learning_rate": 0.0005988068899376085,
      "loss": 3.2565,
      "step": 6543
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9938679933547974,
      "learning_rate": 0.0005988065254535905,
      "loss": 3.0963,
      "step": 6544
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8572108745574951,
      "learning_rate": 0.0005988061609140187,
      "loss": 3.3079,
      "step": 6545
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6561622619628906,
      "learning_rate": 0.0005988057963188932,
      "loss": 3.1906,
      "step": 6546
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0338494777679443,
      "learning_rate": 0.000598805431668214,
      "loss": 3.1566,
      "step": 6547
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.185426950454712,
      "learning_rate": 0.0005988050669619813,
      "loss": 3.3665,
      "step": 6548
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.435256004333496,
      "learning_rate": 0.000598804702200195,
      "loss": 3.2783,
      "step": 6549
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.538145661354065,
      "learning_rate": 0.0005988043373828555,
      "loss": 3.3778,
      "step": 6550
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7571178674697876,
      "learning_rate": 0.0005988039725099624,
      "loss": 3.1904,
      "step": 6551
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.6970601081848145,
      "learning_rate": 0.0005988036075815161,
      "loss": 3.3051,
      "step": 6552
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9208381175994873,
      "learning_rate": 0.0005988032425975165,
      "loss": 3.462,
      "step": 6553
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4942854642868042,
      "learning_rate": 0.0005988028775579637,
      "loss": 3.3056,
      "step": 6554
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9073644876480103,
      "learning_rate": 0.000598802512462858,
      "loss": 3.414,
      "step": 6555
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.471529483795166,
      "learning_rate": 0.0005988021473121992,
      "loss": 3.2967,
      "step": 6556
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.295955181121826,
      "learning_rate": 0.0005988017821059873,
      "loss": 3.0511,
      "step": 6557
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0688376426696777,
      "learning_rate": 0.0005988014168442226,
      "loss": 3.3433,
      "step": 6558
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4706089496612549,
      "learning_rate": 0.0005988010515269051,
      "loss": 3.3233,
      "step": 6559
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.12626051902771,
      "learning_rate": 0.0005988006861540349,
      "loss": 3.0565,
      "step": 6560
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.0145955085754395,
      "learning_rate": 0.0005988003207256119,
      "loss": 3.4017,
      "step": 6561
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7131866216659546,
      "learning_rate": 0.0005987999552416362,
      "loss": 3.0238,
      "step": 6562
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.55531907081604,
      "learning_rate": 0.0005987995897021081,
      "loss": 3.4305,
      "step": 6563
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.4333882331848145,
      "learning_rate": 0.0005987992241070275,
      "loss": 3.2516,
      "step": 6564
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0184125900268555,
      "learning_rate": 0.0005987988584563945,
      "loss": 3.2558,
      "step": 6565
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5486584901809692,
      "learning_rate": 0.0005987984927502091,
      "loss": 3.104,
      "step": 6566
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.245249032974243,
      "learning_rate": 0.0005987981269884715,
      "loss": 3.3951,
      "step": 6567
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.639918565750122,
      "learning_rate": 0.0005987977611711816,
      "loss": 3.2564,
      "step": 6568
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7761154174804688,
      "learning_rate": 0.0005987973952983396,
      "loss": 3.2423,
      "step": 6569
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6406173706054688,
      "learning_rate": 0.0005987970293699455,
      "loss": 3.1254,
      "step": 6570
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.077099561691284,
      "learning_rate": 0.0005987966633859994,
      "loss": 3.2993,
      "step": 6571
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3266178369522095,
      "learning_rate": 0.0005987962973465013,
      "loss": 3.2235,
      "step": 6572
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.856854796409607,
      "learning_rate": 0.0005987959312514514,
      "loss": 3.3822,
      "step": 6573
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.8378031253814697,
      "learning_rate": 0.0005987955651008497,
      "loss": 3.2172,
      "step": 6574
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4901576042175293,
      "learning_rate": 0.0005987951988946962,
      "loss": 3.3038,
      "step": 6575
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7788804769515991,
      "learning_rate": 0.0005987948326329911,
      "loss": 3.448,
      "step": 6576
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7154701948165894,
      "learning_rate": 0.0005987944663157344,
      "loss": 3.4019,
      "step": 6577
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5368194580078125,
      "learning_rate": 0.0005987940999429262,
      "loss": 3.0699,
      "step": 6578
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4910712242126465,
      "learning_rate": 0.0005987937335145665,
      "loss": 3.2458,
      "step": 6579
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7058310508728027,
      "learning_rate": 0.0005987933670306554,
      "loss": 3.0845,
      "step": 6580
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.4213101863861084,
      "learning_rate": 0.0005987930004911929,
      "loss": 3.1964,
      "step": 6581
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5222899913787842,
      "learning_rate": 0.0005987926338961792,
      "loss": 3.2307,
      "step": 6582
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9293466806411743,
      "learning_rate": 0.0005987922672456143,
      "loss": 3.343,
      "step": 6583
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5942375659942627,
      "learning_rate": 0.0005987919005394983,
      "loss": 3.1791,
      "step": 6584
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8429348468780518,
      "learning_rate": 0.0005987915337778313,
      "loss": 3.0764,
      "step": 6585
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.735917329788208,
      "learning_rate": 0.0005987911669606132,
      "loss": 3.4282,
      "step": 6586
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.9687113761901855,
      "learning_rate": 0.0005987908000878443,
      "loss": 3.4955,
      "step": 6587
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.6476378440856934,
      "learning_rate": 0.0005987904331595244,
      "loss": 3.1238,
      "step": 6588
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5470001697540283,
      "learning_rate": 0.0005987900661756538,
      "loss": 3.0494,
      "step": 6589
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.00661039352417,
      "learning_rate": 0.0005987896991362325,
      "loss": 3.236,
      "step": 6590
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.2360196113586426,
      "learning_rate": 0.0005987893320412604,
      "loss": 3.5837,
      "step": 6591
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.03709077835083,
      "learning_rate": 0.000598788964890738,
      "loss": 3.2305,
      "step": 6592
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8756392002105713,
      "learning_rate": 0.0005987885976846649,
      "loss": 2.9807,
      "step": 6593
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.4370296001434326,
      "learning_rate": 0.0005987882304230415,
      "loss": 3.1912,
      "step": 6594
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.294159412384033,
      "learning_rate": 0.0005987878631058676,
      "loss": 3.3421,
      "step": 6595
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6733675003051758,
      "learning_rate": 0.0005987874957331435,
      "loss": 3.4213,
      "step": 6596
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8902779817581177,
      "learning_rate": 0.000598787128304869,
      "loss": 3.2967,
      "step": 6597
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.6309750080108643,
      "learning_rate": 0.0005987867608210445,
      "loss": 3.3433,
      "step": 6598
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.943374752998352,
      "learning_rate": 0.0005987863932816698,
      "loss": 3.023,
      "step": 6599
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6177726984024048,
      "learning_rate": 0.0005987860256867451,
      "loss": 3.3385,
      "step": 6600
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.6882095336914062,
      "learning_rate": 0.0005987856580362706,
      "loss": 3.3668,
      "step": 6601
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5334644317626953,
      "learning_rate": 0.000598785290330246,
      "loss": 3.2184,
      "step": 6602
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.8606414794921875,
      "learning_rate": 0.0005987849225686715,
      "loss": 3.0709,
      "step": 6603
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4964503049850464,
      "learning_rate": 0.0005987845547515474,
      "loss": 3.304,
      "step": 6604
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.314610719680786,
      "learning_rate": 0.0005987841868788736,
      "loss": 3.2653,
      "step": 6605
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.735525131225586,
      "learning_rate": 0.0005987838189506502,
      "loss": 3.0956,
      "step": 6606
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9328022003173828,
      "learning_rate": 0.0005987834509668771,
      "loss": 3.0402,
      "step": 6607
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.632702350616455,
      "learning_rate": 0.0005987830829275548,
      "loss": 3.4114,
      "step": 6608
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.108774423599243,
      "learning_rate": 0.0005987827148326829,
      "loss": 3.2021,
      "step": 6609
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.804903507232666,
      "learning_rate": 0.0005987823466822616,
      "loss": 3.069,
      "step": 6610
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4483447074890137,
      "learning_rate": 0.000598781978476291,
      "loss": 3.1058,
      "step": 6611
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4784554243087769,
      "learning_rate": 0.0005987816102147713,
      "loss": 3.186,
      "step": 6612
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.2624239921569824,
      "learning_rate": 0.0005987812418977024,
      "loss": 3.1145,
      "step": 6613
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.186087131500244,
      "learning_rate": 0.0005987808735250846,
      "loss": 3.5335,
      "step": 6614
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3589017391204834,
      "learning_rate": 0.0005987805050969176,
      "loss": 3.0678,
      "step": 6615
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.2280263900756836,
      "learning_rate": 0.0005987801366132018,
      "loss": 3.3503,
      "step": 6616
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1968114376068115,
      "learning_rate": 0.0005987797680739369,
      "loss": 3.3282,
      "step": 6617
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9248440265655518,
      "learning_rate": 0.0005987793994791234,
      "loss": 3.3524,
      "step": 6618
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5197590589523315,
      "learning_rate": 0.0005987790308287611,
      "loss": 3.2807,
      "step": 6619
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1535863876342773,
      "learning_rate": 0.0005987786621228503,
      "loss": 3.2636,
      "step": 6620
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0642595291137695,
      "learning_rate": 0.0005987782933613907,
      "loss": 3.15,
      "step": 6621
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9149868488311768,
      "learning_rate": 0.0005987779245443826,
      "loss": 3.2471,
      "step": 6622
    },
    {
      "epoch": 0.09,
      "grad_norm": 4.410598278045654,
      "learning_rate": 0.000598777555671826,
      "loss": 2.9184,
      "step": 6623
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.235360622406006,
      "learning_rate": 0.0005987771867437212,
      "loss": 3.1408,
      "step": 6624
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6676908731460571,
      "learning_rate": 0.000598776817760068,
      "loss": 3.3382,
      "step": 6625
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5447921752929688,
      "learning_rate": 0.0005987764487208665,
      "loss": 3.3462,
      "step": 6626
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.676152229309082,
      "learning_rate": 0.0005987760796261169,
      "loss": 3.3193,
      "step": 6627
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8031646013259888,
      "learning_rate": 0.0005987757104758192,
      "loss": 3.0666,
      "step": 6628
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.922499179840088,
      "learning_rate": 0.0005987753412699733,
      "loss": 3.265,
      "step": 6629
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5617153644561768,
      "learning_rate": 0.0005987749720085796,
      "loss": 3.2912,
      "step": 6630
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6766990423202515,
      "learning_rate": 0.0005987746026916379,
      "loss": 3.2635,
      "step": 6631
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8296200037002563,
      "learning_rate": 0.0005987742333191483,
      "loss": 3.0988,
      "step": 6632
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.524916887283325,
      "learning_rate": 0.000598773863891111,
      "loss": 3.2798,
      "step": 6633
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.3440616130828857,
      "learning_rate": 0.000598773494407526,
      "loss": 3.4497,
      "step": 6634
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6050328016281128,
      "learning_rate": 0.0005987731248683934,
      "loss": 3.1942,
      "step": 6635
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9802420139312744,
      "learning_rate": 0.0005987727552737132,
      "loss": 3.4492,
      "step": 6636
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7569102048873901,
      "learning_rate": 0.0005987723856234855,
      "loss": 3.0159,
      "step": 6637
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8567938804626465,
      "learning_rate": 0.0005987720159177104,
      "loss": 3.4789,
      "step": 6638
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5663201808929443,
      "learning_rate": 0.0005987716461563878,
      "loss": 3.3864,
      "step": 6639
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5497291088104248,
      "learning_rate": 0.0005987712763395181,
      "loss": 3.2914,
      "step": 6640
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7432122230529785,
      "learning_rate": 0.0005987709064671011,
      "loss": 2.9836,
      "step": 6641
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5506808757781982,
      "learning_rate": 0.000598770536539137,
      "loss": 3.2348,
      "step": 6642
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3551355600357056,
      "learning_rate": 0.0005987701665556258,
      "loss": 3.1168,
      "step": 6643
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6226087808609009,
      "learning_rate": 0.0005987697965165676,
      "loss": 3.2328,
      "step": 6644
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5555775165557861,
      "learning_rate": 0.0005987694264219624,
      "loss": 3.2739,
      "step": 6645
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.786598563194275,
      "learning_rate": 0.0005987690562718103,
      "loss": 2.8926,
      "step": 6646
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5933160781860352,
      "learning_rate": 0.0005987686860661115,
      "loss": 3.1956,
      "step": 6647
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5832417011260986,
      "learning_rate": 0.0005987683158048658,
      "loss": 3.3246,
      "step": 6648
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7661943435668945,
      "learning_rate": 0.0005987679454880736,
      "loss": 3.409,
      "step": 6649
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6248362064361572,
      "learning_rate": 0.0005987675751157348,
      "loss": 3.1258,
      "step": 6650
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.4429538249969482,
      "learning_rate": 0.0005987672046878493,
      "loss": 3.3219,
      "step": 6651
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6588904857635498,
      "learning_rate": 0.0005987668342044175,
      "loss": 3.2202,
      "step": 6652
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7423381805419922,
      "learning_rate": 0.0005987664636654392,
      "loss": 3.3529,
      "step": 6653
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.091175079345703,
      "learning_rate": 0.0005987660930709147,
      "loss": 3.1244,
      "step": 6654
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.600038766860962,
      "learning_rate": 0.0005987657224208438,
      "loss": 3.2504,
      "step": 6655
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.739270567893982,
      "learning_rate": 0.0005987653517152268,
      "loss": 3.1497,
      "step": 6656
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7582584619522095,
      "learning_rate": 0.0005987649809540636,
      "loss": 3.2534,
      "step": 6657
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.902506947517395,
      "learning_rate": 0.0005987646101373545,
      "loss": 3.5376,
      "step": 6658
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6453166007995605,
      "learning_rate": 0.0005987642392650993,
      "loss": 3.3642,
      "step": 6659
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.583102226257324,
      "learning_rate": 0.0005987638683372982,
      "loss": 2.9551,
      "step": 6660
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.097731351852417,
      "learning_rate": 0.0005987634973539513,
      "loss": 3.3643,
      "step": 6661
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.014068603515625,
      "learning_rate": 0.0005987631263150586,
      "loss": 3.1199,
      "step": 6662
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.500150442123413,
      "learning_rate": 0.0005987627552206202,
      "loss": 3.3236,
      "step": 6663
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6754933595657349,
      "learning_rate": 0.0005987623840706362,
      "loss": 3.3093,
      "step": 6664
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4685338735580444,
      "learning_rate": 0.0005987620128651065,
      "loss": 3.1877,
      "step": 6665
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.039947748184204,
      "learning_rate": 0.0005987616416040314,
      "loss": 3.3757,
      "step": 6666
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.3776307106018066,
      "learning_rate": 0.0005987612702874109,
      "loss": 2.9053,
      "step": 6667
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9807811975479126,
      "learning_rate": 0.000598760898915245,
      "loss": 3.4866,
      "step": 6668
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.781823754310608,
      "learning_rate": 0.0005987605274875338,
      "loss": 3.3238,
      "step": 6669
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4810398817062378,
      "learning_rate": 0.0005987601560042774,
      "loss": 3.3369,
      "step": 6670
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8085216283798218,
      "learning_rate": 0.0005987597844654759,
      "loss": 3.1091,
      "step": 6671
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1506690979003906,
      "learning_rate": 0.0005987594128711292,
      "loss": 3.3515,
      "step": 6672
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9649409055709839,
      "learning_rate": 0.0005987590412212376,
      "loss": 3.3026,
      "step": 6673
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9428644180297852,
      "learning_rate": 0.0005987586695158009,
      "loss": 3.1979,
      "step": 6674
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4636973142623901,
      "learning_rate": 0.0005987582977548194,
      "loss": 3.2922,
      "step": 6675
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4252065420150757,
      "learning_rate": 0.0005987579259382932,
      "loss": 3.0908,
      "step": 6676
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4787355661392212,
      "learning_rate": 0.0005987575540662222,
      "loss": 3.2253,
      "step": 6677
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.4870591163635254,
      "learning_rate": 0.0005987571821386064,
      "loss": 2.9565,
      "step": 6678
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9881954193115234,
      "learning_rate": 0.0005987568101554461,
      "loss": 3.0611,
      "step": 6679
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5976675748825073,
      "learning_rate": 0.0005987564381167413,
      "loss": 3.4126,
      "step": 6680
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6836094856262207,
      "learning_rate": 0.000598756066022492,
      "loss": 3.1918,
      "step": 6681
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.68320631980896,
      "learning_rate": 0.0005987556938726983,
      "loss": 3.1628,
      "step": 6682
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6638779640197754,
      "learning_rate": 0.0005987553216673604,
      "loss": 3.2293,
      "step": 6683
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5982972383499146,
      "learning_rate": 0.0005987549494064782,
      "loss": 3.3493,
      "step": 6684
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.7043776512145996,
      "learning_rate": 0.0005987545770900517,
      "loss": 3.3234,
      "step": 6685
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.6327314376831055,
      "learning_rate": 0.0005987542047180811,
      "loss": 3.277,
      "step": 6686
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8964886665344238,
      "learning_rate": 0.0005987538322905665,
      "loss": 3.1336,
      "step": 6687
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6318601369857788,
      "learning_rate": 0.000598753459807508,
      "loss": 3.2342,
      "step": 6688
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.702933669090271,
      "learning_rate": 0.0005987530872689055,
      "loss": 3.4703,
      "step": 6689
    },
    {
      "epoch": 0.09,
      "grad_norm": 4.083178520202637,
      "learning_rate": 0.0005987527146747592,
      "loss": 3.2216,
      "step": 6690
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.7179367542266846,
      "learning_rate": 0.000598752342025069,
      "loss": 3.4603,
      "step": 6691
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4375840425491333,
      "learning_rate": 0.0005987519693198352,
      "loss": 3.0624,
      "step": 6692
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.102728843688965,
      "learning_rate": 0.0005987515965590578,
      "loss": 3.2753,
      "step": 6693
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.420516014099121,
      "learning_rate": 0.0005987512237427368,
      "loss": 3.0921,
      "step": 6694
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.225522041320801,
      "learning_rate": 0.0005987508508708723,
      "loss": 3.1758,
      "step": 6695
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3402036428451538,
      "learning_rate": 0.0005987504779434644,
      "loss": 3.3617,
      "step": 6696
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.031850576400757,
      "learning_rate": 0.0005987501049605133,
      "loss": 3.2116,
      "step": 6697
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.4314815998077393,
      "learning_rate": 0.0005987497319220186,
      "loss": 3.3008,
      "step": 6698
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.590203881263733,
      "learning_rate": 0.000598749358827981,
      "loss": 3.4038,
      "step": 6699
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.195362091064453,
      "learning_rate": 0.0005987489856784001,
      "loss": 3.3745,
      "step": 6700
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0355935096740723,
      "learning_rate": 0.0005987486124732761,
      "loss": 3.2895,
      "step": 6701
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.407204270362854,
      "learning_rate": 0.0005987482392126091,
      "loss": 3.1866,
      "step": 6702
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8096507787704468,
      "learning_rate": 0.0005987478658963991,
      "loss": 3.1967,
      "step": 6703
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.663564682006836,
      "learning_rate": 0.0005987474925246465,
      "loss": 3.1255,
      "step": 6704
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4638993740081787,
      "learning_rate": 0.0005987471190973507,
      "loss": 3.0686,
      "step": 6705
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3468332290649414,
      "learning_rate": 0.0005987467456145126,
      "loss": 2.8992,
      "step": 6706
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7237801551818848,
      "learning_rate": 0.0005987463720761315,
      "loss": 3.0568,
      "step": 6707
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.728804349899292,
      "learning_rate": 0.000598745998482208,
      "loss": 2.9824,
      "step": 6708
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7160571813583374,
      "learning_rate": 0.0005987456248327419,
      "loss": 3.1723,
      "step": 6709
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6912384033203125,
      "learning_rate": 0.0005987452511277334,
      "loss": 3.4443,
      "step": 6710
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8194266557693481,
      "learning_rate": 0.0005987448773671825,
      "loss": 3.2222,
      "step": 6711
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6353322267532349,
      "learning_rate": 0.0005987445035510892,
      "loss": 3.2527,
      "step": 6712
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.948274612426758,
      "learning_rate": 0.0005987441296794538,
      "loss": 3.0705,
      "step": 6713
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.493990421295166,
      "learning_rate": 0.0005987437557522762,
      "loss": 3.0976,
      "step": 6714
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3905715942382812,
      "learning_rate": 0.0005987433817695564,
      "loss": 3.3907,
      "step": 6715
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6827346086502075,
      "learning_rate": 0.0005987430077312947,
      "loss": 3.1647,
      "step": 6716
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7715494632720947,
      "learning_rate": 0.0005987426336374909,
      "loss": 3.3932,
      "step": 6717
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7309397459030151,
      "learning_rate": 0.0005987422594881453,
      "loss": 3.2002,
      "step": 6718
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.2390494346618652,
      "learning_rate": 0.0005987418852832578,
      "loss": 3.323,
      "step": 6719
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.272332191467285,
      "learning_rate": 0.0005987415110228285,
      "loss": 3.4014,
      "step": 6720
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5846583843231201,
      "learning_rate": 0.0005987411367068577,
      "loss": 3.0443,
      "step": 6721
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5431519746780396,
      "learning_rate": 0.0005987407623353452,
      "loss": 3.4076,
      "step": 6722
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.6100165843963623,
      "learning_rate": 0.0005987403879082912,
      "loss": 3.243,
      "step": 6723
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8852450847625732,
      "learning_rate": 0.0005987400134256956,
      "loss": 3.1194,
      "step": 6724
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7435386180877686,
      "learning_rate": 0.0005987396388875587,
      "loss": 3.2603,
      "step": 6725
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.849943161010742,
      "learning_rate": 0.0005987392642938803,
      "loss": 3.2642,
      "step": 6726
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.640299081802368,
      "learning_rate": 0.0005987388896446608,
      "loss": 3.2853,
      "step": 6727
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.588180661201477,
      "learning_rate": 0.0005987385149399,
      "loss": 3.4921,
      "step": 6728
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7956221103668213,
      "learning_rate": 0.0005987381401795981,
      "loss": 3.429,
      "step": 6729
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8822518587112427,
      "learning_rate": 0.0005987377653637552,
      "loss": 3.2951,
      "step": 6730
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.3960416316986084,
      "learning_rate": 0.0005987373904923713,
      "loss": 3.147,
      "step": 6731
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.637684941291809,
      "learning_rate": 0.0005987370155654465,
      "loss": 3.3164,
      "step": 6732
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.504716157913208,
      "learning_rate": 0.0005987366405829808,
      "loss": 3.0889,
      "step": 6733
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.9629313945770264,
      "learning_rate": 0.0005987362655449742,
      "loss": 3.2285,
      "step": 6734
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3853317499160767,
      "learning_rate": 0.0005987358904514271,
      "loss": 3.5422,
      "step": 6735
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5432319641113281,
      "learning_rate": 0.0005987355153023393,
      "loss": 3.2371,
      "step": 6736
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1252803802490234,
      "learning_rate": 0.0005987351400977109,
      "loss": 3.37,
      "step": 6737
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.4239277839660645,
      "learning_rate": 0.0005987347648375419,
      "loss": 3.2402,
      "step": 6738
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7360413074493408,
      "learning_rate": 0.0005987343895218326,
      "loss": 3.3722,
      "step": 6739
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.47773015499115,
      "learning_rate": 0.0005987340141505829,
      "loss": 3.4627,
      "step": 6740
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7932981252670288,
      "learning_rate": 0.0005987336387237929,
      "loss": 3.1626,
      "step": 6741
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.545605182647705,
      "learning_rate": 0.0005987332632414626,
      "loss": 3.3596,
      "step": 6742
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.430469036102295,
      "learning_rate": 0.0005987328877035923,
      "loss": 3.1335,
      "step": 6743
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6430411338806152,
      "learning_rate": 0.0005987325121101818,
      "loss": 3.388,
      "step": 6744
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.663717269897461,
      "learning_rate": 0.0005987321364612314,
      "loss": 2.9336,
      "step": 6745
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4287617206573486,
      "learning_rate": 0.0005987317607567409,
      "loss": 3.3091,
      "step": 6746
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.43936288356781,
      "learning_rate": 0.0005987313849967106,
      "loss": 3.3166,
      "step": 6747
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4131767749786377,
      "learning_rate": 0.0005987310091811406,
      "loss": 3.204,
      "step": 6748
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.169862747192383,
      "learning_rate": 0.0005987306333100307,
      "loss": 3.2573,
      "step": 6749
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.550553321838379,
      "learning_rate": 0.0005987302573833813,
      "loss": 3.4355,
      "step": 6750
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.316278338432312,
      "learning_rate": 0.0005987298814011922,
      "loss": 3.4079,
      "step": 6751
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7308926582336426,
      "learning_rate": 0.0005987295053634636,
      "loss": 3.1942,
      "step": 6752
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9561734199523926,
      "learning_rate": 0.0005987291292701956,
      "loss": 3.35,
      "step": 6753
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9713443517684937,
      "learning_rate": 0.0005987287531213881,
      "loss": 3.2455,
      "step": 6754
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4503638744354248,
      "learning_rate": 0.0005987283769170413,
      "loss": 3.1857,
      "step": 6755
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5107421875,
      "learning_rate": 0.0005987280006571554,
      "loss": 3.392,
      "step": 6756
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.092863082885742,
      "learning_rate": 0.0005987276243417302,
      "loss": 3.3623,
      "step": 6757
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.839901089668274,
      "learning_rate": 0.000598727247970766,
      "loss": 3.2426,
      "step": 6758
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8458276987075806,
      "learning_rate": 0.0005987268715442626,
      "loss": 3.283,
      "step": 6759
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.911454439163208,
      "learning_rate": 0.0005987264950622203,
      "loss": 3.2658,
      "step": 6760
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5154615640640259,
      "learning_rate": 0.0005987261185246392,
      "loss": 3.3531,
      "step": 6761
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9495797157287598,
      "learning_rate": 0.0005987257419315192,
      "loss": 3.0051,
      "step": 6762
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9792652130126953,
      "learning_rate": 0.0005987253652828605,
      "loss": 3.242,
      "step": 6763
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7048052549362183,
      "learning_rate": 0.0005987249885786631,
      "loss": 3.3422,
      "step": 6764
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0181844234466553,
      "learning_rate": 0.0005987246118189269,
      "loss": 3.141,
      "step": 6765
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6279033422470093,
      "learning_rate": 0.0005987242350036524,
      "loss": 3.3271,
      "step": 6766
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7668073177337646,
      "learning_rate": 0.0005987238581328393,
      "loss": 3.3283,
      "step": 6767
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5748178958892822,
      "learning_rate": 0.0005987234812064879,
      "loss": 3.3438,
      "step": 6768
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1256961822509766,
      "learning_rate": 0.000598723104224598,
      "loss": 3.0129,
      "step": 6769
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8552474975585938,
      "learning_rate": 0.00059872272718717,
      "loss": 3.3272,
      "step": 6770
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6099909543991089,
      "learning_rate": 0.0005987223500942037,
      "loss": 3.1434,
      "step": 6771
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6906322240829468,
      "learning_rate": 0.0005987219729456994,
      "loss": 3.3184,
      "step": 6772
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4534077644348145,
      "learning_rate": 0.0005987215957416569,
      "loss": 3.447,
      "step": 6773
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5466123819351196,
      "learning_rate": 0.0005987212184820765,
      "loss": 3.2035,
      "step": 6774
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6755825281143188,
      "learning_rate": 0.0005987208411669582,
      "loss": 3.2323,
      "step": 6775
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9018328189849854,
      "learning_rate": 0.000598720463796302,
      "loss": 3.1857,
      "step": 6776
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7322382926940918,
      "learning_rate": 0.0005987200863701081,
      "loss": 3.3471,
      "step": 6777
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0430126190185547,
      "learning_rate": 0.0005987197088883763,
      "loss": 3.4509,
      "step": 6778
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.530708074569702,
      "learning_rate": 0.0005987193313511071,
      "loss": 3.2437,
      "step": 6779
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.4466097354888916,
      "learning_rate": 0.0005987189537583002,
      "loss": 3.0831,
      "step": 6780
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6238980293273926,
      "learning_rate": 0.0005987185761099559,
      "loss": 3.1114,
      "step": 6781
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.394771099090576,
      "learning_rate": 0.0005987181984060742,
      "loss": 3.3284,
      "step": 6782
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0404393672943115,
      "learning_rate": 0.000598717820646655,
      "loss": 3.432,
      "step": 6783
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8290181159973145,
      "learning_rate": 0.0005987174428316986,
      "loss": 3.282,
      "step": 6784
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4070193767547607,
      "learning_rate": 0.0005987170649612051,
      "loss": 3.305,
      "step": 6785
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.679306983947754,
      "learning_rate": 0.0005987166870351743,
      "loss": 3.2954,
      "step": 6786
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.375136613845825,
      "learning_rate": 0.0005987163090536065,
      "loss": 3.2927,
      "step": 6787
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.874977469444275,
      "learning_rate": 0.0005987159310165017,
      "loss": 3.2109,
      "step": 6788
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9637539386749268,
      "learning_rate": 0.0005987155529238599,
      "loss": 3.0339,
      "step": 6789
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.2510132789611816,
      "learning_rate": 0.0005987151747756813,
      "loss": 3.1272,
      "step": 6790
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0228800773620605,
      "learning_rate": 0.0005987147965719659,
      "loss": 3.1922,
      "step": 6791
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.300142526626587,
      "learning_rate": 0.0005987144183127138,
      "loss": 3.1641,
      "step": 6792
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5591055154800415,
      "learning_rate": 0.000598714039997925,
      "loss": 3.3412,
      "step": 6793
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9415838718414307,
      "learning_rate": 0.0005987136616275997,
      "loss": 3.1476,
      "step": 6794
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5035005807876587,
      "learning_rate": 0.0005987132832017378,
      "loss": 3.2869,
      "step": 6795
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8064631223678589,
      "learning_rate": 0.0005987129047203395,
      "loss": 3.2286,
      "step": 6796
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5008641481399536,
      "learning_rate": 0.0005987125261834048,
      "loss": 3.2642,
      "step": 6797
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.458241581916809,
      "learning_rate": 0.0005987121475909339,
      "loss": 3.4788,
      "step": 6798
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8405001163482666,
      "learning_rate": 0.0005987117689429267,
      "loss": 3.1244,
      "step": 6799
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.751793622970581,
      "learning_rate": 0.0005987113902393834,
      "loss": 3.4487,
      "step": 6800
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4749908447265625,
      "learning_rate": 0.0005987110114803039,
      "loss": 3.1984,
      "step": 6801
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4399878978729248,
      "learning_rate": 0.0005987106326656885,
      "loss": 3.3879,
      "step": 6802
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6986124515533447,
      "learning_rate": 0.0005987102537955371,
      "loss": 3.1014,
      "step": 6803
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1936323642730713,
      "learning_rate": 0.0005987098748698499,
      "loss": 3.3782,
      "step": 6804
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9882351160049438,
      "learning_rate": 0.0005987094958886268,
      "loss": 3.1867,
      "step": 6805
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.914831519126892,
      "learning_rate": 0.0005987091168518679,
      "loss": 3.244,
      "step": 6806
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6499502658843994,
      "learning_rate": 0.0005987087377595735,
      "loss": 3.0697,
      "step": 6807
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3849127292633057,
      "learning_rate": 0.0005987083586117435,
      "loss": 3.1065,
      "step": 6808
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3787022829055786,
      "learning_rate": 0.0005987079794083778,
      "loss": 3.2129,
      "step": 6809
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0100717544555664,
      "learning_rate": 0.0005987076001494769,
      "loss": 3.1589,
      "step": 6810
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9595518112182617,
      "learning_rate": 0.0005987072208350404,
      "loss": 3.1225,
      "step": 6811
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8572889566421509,
      "learning_rate": 0.0005987068414650687,
      "loss": 3.1768,
      "step": 6812
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0425827503204346,
      "learning_rate": 0.0005987064620395617,
      "loss": 3.1517,
      "step": 6813
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8154088258743286,
      "learning_rate": 0.0005987060825585197,
      "loss": 3.4903,
      "step": 6814
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.74347984790802,
      "learning_rate": 0.0005987057030219424,
      "loss": 3.2515,
      "step": 6815
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7627602815628052,
      "learning_rate": 0.0005987053234298301,
      "loss": 3.3399,
      "step": 6816
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.1270852088928223,
      "learning_rate": 0.000598704943782183,
      "loss": 3.2851,
      "step": 6817
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7909388542175293,
      "learning_rate": 0.0005987045640790008,
      "loss": 3.4278,
      "step": 6818
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.338416814804077,
      "learning_rate": 0.0005987041843202839,
      "loss": 3.4994,
      "step": 6819
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5596837997436523,
      "learning_rate": 0.0005987038045060321,
      "loss": 3.1793,
      "step": 6820
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7364795207977295,
      "learning_rate": 0.0005987034246362459,
      "loss": 3.1557,
      "step": 6821
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7025363445281982,
      "learning_rate": 0.000598703044710925,
      "loss": 3.1993,
      "step": 6822
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8944064378738403,
      "learning_rate": 0.0005987026647300694,
      "loss": 3.2796,
      "step": 6823
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4632683992385864,
      "learning_rate": 0.0005987022846936794,
      "loss": 3.1204,
      "step": 6824
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6726641654968262,
      "learning_rate": 0.0005987019046017551,
      "loss": 3.2489,
      "step": 6825
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3448196649551392,
      "learning_rate": 0.0005987015244542965,
      "loss": 3.2939,
      "step": 6826
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3511474132537842,
      "learning_rate": 0.0005987011442513035,
      "loss": 3.5767,
      "step": 6827
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1173861026763916,
      "learning_rate": 0.0005987007639927765,
      "loss": 3.3716,
      "step": 6828
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3640894889831543,
      "learning_rate": 0.0005987003836787153,
      "loss": 3.2251,
      "step": 6829
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7198585271835327,
      "learning_rate": 0.00059870000330912,
      "loss": 3.2897,
      "step": 6830
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6522530317306519,
      "learning_rate": 0.0005986996228839908,
      "loss": 3.0798,
      "step": 6831
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9200468063354492,
      "learning_rate": 0.0005986992424033276,
      "loss": 3.2506,
      "step": 6832
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8795403242111206,
      "learning_rate": 0.0005986988618671307,
      "loss": 3.357,
      "step": 6833
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6518679857254028,
      "learning_rate": 0.0005986984812754,
      "loss": 2.9895,
      "step": 6834
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5461723804473877,
      "learning_rate": 0.0005986981006281356,
      "loss": 3.3587,
      "step": 6835
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9153153896331787,
      "learning_rate": 0.0005986977199253376,
      "loss": 3.5467,
      "step": 6836
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8893648386001587,
      "learning_rate": 0.000598697339167006,
      "loss": 3.2319,
      "step": 6837
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8940470218658447,
      "learning_rate": 0.000598696958353141,
      "loss": 3.2157,
      "step": 6838
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.747647762298584,
      "learning_rate": 0.0005986965774837426,
      "loss": 3.1901,
      "step": 6839
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6044753789901733,
      "learning_rate": 0.0005986961965588109,
      "loss": 3.0582,
      "step": 6840
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.386541724205017,
      "learning_rate": 0.0005986958155783459,
      "loss": 3.2643,
      "step": 6841
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6336311101913452,
      "learning_rate": 0.0005986954345423476,
      "loss": 3.2974,
      "step": 6842
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4780277013778687,
      "learning_rate": 0.0005986950534508164,
      "loss": 3.4812,
      "step": 6843
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4010205268859863,
      "learning_rate": 0.0005986946723037519,
      "loss": 3.202,
      "step": 6844
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.061833620071411,
      "learning_rate": 0.0005986942911011547,
      "loss": 3.2886,
      "step": 6845
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4348293542861938,
      "learning_rate": 0.0005986939098430243,
      "loss": 3.2006,
      "step": 6846
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.624484896659851,
      "learning_rate": 0.0005986935285293613,
      "loss": 3.2416,
      "step": 6847
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9845197200775146,
      "learning_rate": 0.0005986931471601654,
      "loss": 2.9678,
      "step": 6848
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.460214376449585,
      "learning_rate": 0.0005986927657354368,
      "loss": 3.3811,
      "step": 6849
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.321697473526001,
      "learning_rate": 0.0005986923842551757,
      "loss": 3.1873,
      "step": 6850
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4940316677093506,
      "learning_rate": 0.000598692002719382,
      "loss": 3.1901,
      "step": 6851
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.55850088596344,
      "learning_rate": 0.0005986916211280557,
      "loss": 3.0984,
      "step": 6852
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5297316312789917,
      "learning_rate": 0.0005986912394811971,
      "loss": 3.37,
      "step": 6853
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4217102527618408,
      "learning_rate": 0.0005986908577788061,
      "loss": 3.224,
      "step": 6854
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6949174404144287,
      "learning_rate": 0.0005986904760208829,
      "loss": 3.4314,
      "step": 6855
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.086632251739502,
      "learning_rate": 0.0005986900942074275,
      "loss": 3.1522,
      "step": 6856
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5834580659866333,
      "learning_rate": 0.0005986897123384399,
      "loss": 3.2838,
      "step": 6857
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7828211784362793,
      "learning_rate": 0.0005986893304139203,
      "loss": 3.2319,
      "step": 6858
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6410895586013794,
      "learning_rate": 0.0005986889484338688,
      "loss": 3.0072,
      "step": 6859
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6297873258590698,
      "learning_rate": 0.0005986885663982853,
      "loss": 3.2547,
      "step": 6860
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.065061092376709,
      "learning_rate": 0.0005986881843071699,
      "loss": 3.1883,
      "step": 6861
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4665886163711548,
      "learning_rate": 0.0005986878021605228,
      "loss": 3.0125,
      "step": 6862
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0600521564483643,
      "learning_rate": 0.0005986874199583439,
      "loss": 3.176,
      "step": 6863
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1528749465942383,
      "learning_rate": 0.0005986870377006334,
      "loss": 3.1488,
      "step": 6864
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4983689785003662,
      "learning_rate": 0.0005986866553873915,
      "loss": 3.2512,
      "step": 6865
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9095849990844727,
      "learning_rate": 0.000598686273018618,
      "loss": 3.1497,
      "step": 6866
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4149361848831177,
      "learning_rate": 0.000598685890594313,
      "loss": 3.2297,
      "step": 6867
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.120457649230957,
      "learning_rate": 0.0005986855081144768,
      "loss": 3.1552,
      "step": 6868
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.412825584411621,
      "learning_rate": 0.0005986851255791092,
      "loss": 3.1209,
      "step": 6869
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5065194368362427,
      "learning_rate": 0.0005986847429882105,
      "loss": 3.2888,
      "step": 6870
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6461334228515625,
      "learning_rate": 0.0005986843603417807,
      "loss": 3.5169,
      "step": 6871
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8717774152755737,
      "learning_rate": 0.0005986839776398197,
      "loss": 3.2369,
      "step": 6872
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0276598930358887,
      "learning_rate": 0.0005986835948823278,
      "loss": 3.4248,
      "step": 6873
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.2557404041290283,
      "learning_rate": 0.0005986832120693049,
      "loss": 3.2867,
      "step": 6874
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5598487854003906,
      "learning_rate": 0.0005986828292007513,
      "loss": 3.1184,
      "step": 6875
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.7948226928710938,
      "learning_rate": 0.0005986824462766668,
      "loss": 3.4288,
      "step": 6876
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.194118022918701,
      "learning_rate": 0.0005986820632970515,
      "loss": 3.1072,
      "step": 6877
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4051254987716675,
      "learning_rate": 0.0005986816802619058,
      "loss": 3.3606,
      "step": 6878
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.3434574604034424,
      "learning_rate": 0.0005986812971712295,
      "loss": 3.2238,
      "step": 6879
    },
    {
      "epoch": 0.09,
      "grad_norm": 4.9607696533203125,
      "learning_rate": 0.0005986809140250225,
      "loss": 3.0861,
      "step": 6880
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9619379043579102,
      "learning_rate": 0.0005986805308232853,
      "loss": 3.1844,
      "step": 6881
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5910969972610474,
      "learning_rate": 0.0005986801475660177,
      "loss": 3.2362,
      "step": 6882
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.2900617122650146,
      "learning_rate": 0.0005986797642532198,
      "loss": 3.2206,
      "step": 6883
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.7081799507141113,
      "learning_rate": 0.0005986793808848917,
      "loss": 3.1885,
      "step": 6884
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.802490234375,
      "learning_rate": 0.0005986789974610334,
      "loss": 3.1907,
      "step": 6885
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5928456783294678,
      "learning_rate": 0.0005986786139816451,
      "loss": 3.3591,
      "step": 6886
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7261300086975098,
      "learning_rate": 0.0005986782304467269,
      "loss": 3.1733,
      "step": 6887
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1177561283111572,
      "learning_rate": 0.0005986778468562786,
      "loss": 3.2401,
      "step": 6888
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.868782877922058,
      "learning_rate": 0.0005986774632103006,
      "loss": 3.3957,
      "step": 6889
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8758323192596436,
      "learning_rate": 0.0005986770795087927,
      "loss": 3.2116,
      "step": 6890
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.431505560874939,
      "learning_rate": 0.0005986766957517552,
      "loss": 3.3327,
      "step": 6891
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5040901899337769,
      "learning_rate": 0.000598676311939188,
      "loss": 3.5105,
      "step": 6892
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5511497259140015,
      "learning_rate": 0.0005986759280710912,
      "loss": 3.0334,
      "step": 6893
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5094847679138184,
      "learning_rate": 0.000598675544147465,
      "loss": 3.3393,
      "step": 6894
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.645889401435852,
      "learning_rate": 0.0005986751601683093,
      "loss": 3.3398,
      "step": 6895
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.4855785369873047,
      "learning_rate": 0.0005986747761336243,
      "loss": 3.2831,
      "step": 6896
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1530709266662598,
      "learning_rate": 0.00059867439204341,
      "loss": 3.2657,
      "step": 6897
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5791419744491577,
      "learning_rate": 0.0005986740078976665,
      "loss": 3.34,
      "step": 6898
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.3159165382385254,
      "learning_rate": 0.0005986736236963939,
      "loss": 3.2108,
      "step": 6899
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3315348625183105,
      "learning_rate": 0.0005986732394395922,
      "loss": 3.2402,
      "step": 6900
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6213136911392212,
      "learning_rate": 0.0005986728551272614,
      "loss": 3.3549,
      "step": 6901
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7610447406768799,
      "learning_rate": 0.0005986724707594018,
      "loss": 3.2396,
      "step": 6902
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.2561354637145996,
      "learning_rate": 0.0005986720863360133,
      "loss": 3.3359,
      "step": 6903
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.708860993385315,
      "learning_rate": 0.000598671701857096,
      "loss": 3.0595,
      "step": 6904
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7718452215194702,
      "learning_rate": 0.0005986713173226501,
      "loss": 2.9983,
      "step": 6905
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.78968346118927,
      "learning_rate": 0.0005986709327326755,
      "loss": 3.3942,
      "step": 6906
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4729448556900024,
      "learning_rate": 0.0005986705480871724,
      "loss": 3.0315,
      "step": 6907
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4613415002822876,
      "learning_rate": 0.0005986701633861408,
      "loss": 3.3106,
      "step": 6908
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.725775122642517,
      "learning_rate": 0.0005986697786295806,
      "loss": 3.3918,
      "step": 6909
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6491920948028564,
      "learning_rate": 0.0005986693938174922,
      "loss": 3.3361,
      "step": 6910
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8595250844955444,
      "learning_rate": 0.0005986690089498755,
      "loss": 3.4842,
      "step": 6911
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6329668760299683,
      "learning_rate": 0.0005986686240267307,
      "loss": 2.8914,
      "step": 6912
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7328555583953857,
      "learning_rate": 0.0005986682390480576,
      "loss": 3.2346,
      "step": 6913
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9499882459640503,
      "learning_rate": 0.0005986678540138565,
      "loss": 3.2833,
      "step": 6914
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9209082126617432,
      "learning_rate": 0.0005986674689241274,
      "loss": 3.2613,
      "step": 6915
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5412038564682007,
      "learning_rate": 0.0005986670837788703,
      "loss": 3.2858,
      "step": 6916
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.793363094329834,
      "learning_rate": 0.0005986666985780855,
      "loss": 3.2328,
      "step": 6917
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.278637409210205,
      "learning_rate": 0.0005986663133217729,
      "loss": 3.3158,
      "step": 6918
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.662173867225647,
      "learning_rate": 0.0005986659280099325,
      "loss": 3.2703,
      "step": 6919
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3957778215408325,
      "learning_rate": 0.0005986655426425646,
      "loss": 3.3168,
      "step": 6920
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.334140658378601,
      "learning_rate": 0.000598665157219669,
      "loss": 3.4445,
      "step": 6921
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.181455612182617,
      "learning_rate": 0.0005986647717412459,
      "loss": 3.4016,
      "step": 6922
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6167857646942139,
      "learning_rate": 0.0005986643862072956,
      "loss": 3.3591,
      "step": 6923
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9398553371429443,
      "learning_rate": 0.0005986640006178178,
      "loss": 3.2674,
      "step": 6924
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6178771257400513,
      "learning_rate": 0.0005986636149728126,
      "loss": 2.9734,
      "step": 6925
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3286490440368652,
      "learning_rate": 0.0005986632292722803,
      "loss": 3.2491,
      "step": 6926
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.652337670326233,
      "learning_rate": 0.000598662843516221,
      "loss": 3.1897,
      "step": 6927
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.2823872566223145,
      "learning_rate": 0.0005986624577046345,
      "loss": 3.1795,
      "step": 6928
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7507472038269043,
      "learning_rate": 0.0005986620718375211,
      "loss": 3.2772,
      "step": 6929
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5678049325942993,
      "learning_rate": 0.0005986616859148807,
      "loss": 3.1866,
      "step": 6930
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5796815156936646,
      "learning_rate": 0.0005986612999367134,
      "loss": 3.0638,
      "step": 6931
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5347859859466553,
      "learning_rate": 0.0005986609139030194,
      "loss": 3.1435,
      "step": 6932
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7499877214431763,
      "learning_rate": 0.0005986605278137987,
      "loss": 3.3191,
      "step": 6933
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.427865982055664,
      "learning_rate": 0.0005986601416690515,
      "loss": 3.2048,
      "step": 6934
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.9774110317230225,
      "learning_rate": 0.0005986597554687776,
      "loss": 3.0929,
      "step": 6935
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.574958086013794,
      "learning_rate": 0.0005986593692129772,
      "loss": 3.117,
      "step": 6936
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.969818115234375,
      "learning_rate": 0.0005986589829016504,
      "loss": 3.0656,
      "step": 6937
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.104182481765747,
      "learning_rate": 0.0005986585965347972,
      "loss": 3.4224,
      "step": 6938
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.243140459060669,
      "learning_rate": 0.0005986582101124179,
      "loss": 3.1573,
      "step": 6939
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6275255680084229,
      "learning_rate": 0.0005986578236345123,
      "loss": 3.0051,
      "step": 6940
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7382419109344482,
      "learning_rate": 0.0005986574371010806,
      "loss": 3.036,
      "step": 6941
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9844204187393188,
      "learning_rate": 0.0005986570505121229,
      "loss": 3.3337,
      "step": 6942
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.090378999710083,
      "learning_rate": 0.0005986566638676391,
      "loss": 3.3143,
      "step": 6943
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.448089838027954,
      "learning_rate": 0.0005986562771676293,
      "loss": 3.0134,
      "step": 6944
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7256958484649658,
      "learning_rate": 0.0005986558904120939,
      "loss": 3.1204,
      "step": 6945
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.481578826904297,
      "learning_rate": 0.0005986555036010325,
      "loss": 3.2047,
      "step": 6946
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.3286778926849365,
      "learning_rate": 0.0005986551167344456,
      "loss": 3.2911,
      "step": 6947
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7952172756195068,
      "learning_rate": 0.0005986547298123331,
      "loss": 2.9373,
      "step": 6948
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7867076396942139,
      "learning_rate": 0.0005986543428346948,
      "loss": 3.1356,
      "step": 6949
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.2442283630371094,
      "learning_rate": 0.0005986539558015311,
      "loss": 3.1379,
      "step": 6950
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5494794845581055,
      "learning_rate": 0.0005986535687128422,
      "loss": 3.1152,
      "step": 6951
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9768551588058472,
      "learning_rate": 0.0005986531815686277,
      "loss": 3.2944,
      "step": 6952
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9370906352996826,
      "learning_rate": 0.0005986527943688881,
      "loss": 3.088,
      "step": 6953
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6884466409683228,
      "learning_rate": 0.0005986524071136233,
      "loss": 3.1808,
      "step": 6954
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7275292873382568,
      "learning_rate": 0.0005986520198028334,
      "loss": 3.2725,
      "step": 6955
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5293253660202026,
      "learning_rate": 0.0005986516324365184,
      "loss": 3.2866,
      "step": 6956
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7386754751205444,
      "learning_rate": 0.0005986512450146784,
      "loss": 3.2356,
      "step": 6957
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7854602336883545,
      "learning_rate": 0.0005986508575373136,
      "loss": 3.1792,
      "step": 6958
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6129686832427979,
      "learning_rate": 0.0005986504700044238,
      "loss": 3.1702,
      "step": 6959
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1925971508026123,
      "learning_rate": 0.0005986500824160094,
      "loss": 3.3876,
      "step": 6960
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7470896244049072,
      "learning_rate": 0.0005986496947720702,
      "loss": 3.2328,
      "step": 6961
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.534450888633728,
      "learning_rate": 0.0005986493070726064,
      "loss": 3.3635,
      "step": 6962
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6302201747894287,
      "learning_rate": 0.000598648919317618,
      "loss": 3.1525,
      "step": 6963
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6776155233383179,
      "learning_rate": 0.0005986485315071053,
      "loss": 3.0802,
      "step": 6964
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4728374481201172,
      "learning_rate": 0.0005986481436410681,
      "loss": 3.5256,
      "step": 6965
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5401825904846191,
      "learning_rate": 0.0005986477557195066,
      "loss": 3.3894,
      "step": 6966
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4439266920089722,
      "learning_rate": 0.0005986473677424208,
      "loss": 3.2966,
      "step": 6967
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5501924753189087,
      "learning_rate": 0.0005986469797098109,
      "loss": 3.2197,
      "step": 6968
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8093550205230713,
      "learning_rate": 0.0005986465916216769,
      "loss": 3.2574,
      "step": 6969
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3869168758392334,
      "learning_rate": 0.0005986462034780187,
      "loss": 3.0788,
      "step": 6970
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.80267596244812,
      "learning_rate": 0.0005986458152788366,
      "loss": 3.3883,
      "step": 6971
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.3499650955200195,
      "learning_rate": 0.0005986454270241308,
      "loss": 3.3553,
      "step": 6972
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7440743446350098,
      "learning_rate": 0.0005986450387139009,
      "loss": 3.2606,
      "step": 6973
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5532116889953613,
      "learning_rate": 0.0005986446503481474,
      "loss": 3.5209,
      "step": 6974
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9076628684997559,
      "learning_rate": 0.0005986442619268701,
      "loss": 3.2893,
      "step": 6975
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.509555697441101,
      "learning_rate": 0.0005986438734500694,
      "loss": 3.1433,
      "step": 6976
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.2823433876037598,
      "learning_rate": 0.000598643484917745,
      "loss": 3.5615,
      "step": 6977
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8439269065856934,
      "learning_rate": 0.0005986430963298971,
      "loss": 3.2237,
      "step": 6978
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4846616983413696,
      "learning_rate": 0.0005986427076865258,
      "loss": 3.2069,
      "step": 6979
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4487690925598145,
      "learning_rate": 0.0005986423189876314,
      "loss": 3.2055,
      "step": 6980
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8450573682785034,
      "learning_rate": 0.0005986419302332136,
      "loss": 3.2791,
      "step": 6981
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.330635905265808,
      "learning_rate": 0.0005986415414232728,
      "loss": 3.2223,
      "step": 6982
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6768535375595093,
      "learning_rate": 0.0005986411525578087,
      "loss": 3.342,
      "step": 6983
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.526678204536438,
      "learning_rate": 0.0005986407636368216,
      "loss": 3.2847,
      "step": 6984
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4644719362258911,
      "learning_rate": 0.0005986403746603116,
      "loss": 3.3503,
      "step": 6985
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4628549814224243,
      "learning_rate": 0.0005986399856282786,
      "loss": 3.1486,
      "step": 6986
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9764182567596436,
      "learning_rate": 0.000598639596540723,
      "loss": 2.9909,
      "step": 6987
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3970705270767212,
      "learning_rate": 0.0005986392073976445,
      "loss": 3.2799,
      "step": 6988
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1208200454711914,
      "learning_rate": 0.0005986388181990434,
      "loss": 3.0683,
      "step": 6989
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6945123672485352,
      "learning_rate": 0.0005986384289449197,
      "loss": 3.1151,
      "step": 6990
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5564701557159424,
      "learning_rate": 0.0005986380396352734,
      "loss": 2.8467,
      "step": 6991
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.509040594100952,
      "learning_rate": 0.0005986376502701048,
      "loss": 3.334,
      "step": 6992
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.472731113433838,
      "learning_rate": 0.0005986372608494137,
      "loss": 3.3512,
      "step": 6993
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.057374954223633,
      "learning_rate": 0.0005986368713732003,
      "loss": 3.2241,
      "step": 6994
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4905685186386108,
      "learning_rate": 0.0005986364818414648,
      "loss": 3.4258,
      "step": 6995
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.151357889175415,
      "learning_rate": 0.000598636092254207,
      "loss": 3.0469,
      "step": 6996
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5439249277114868,
      "learning_rate": 0.0005986357026114272,
      "loss": 3.2912,
      "step": 6997
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.091527223587036,
      "learning_rate": 0.0005986353129131253,
      "loss": 3.3447,
      "step": 6998
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.749751329421997,
      "learning_rate": 0.0005986349231593015,
      "loss": 3.278,
      "step": 6999
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8217703104019165,
      "learning_rate": 0.0005986345333499559,
      "loss": 3.3665,
      "step": 7000
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.933506727218628,
      "learning_rate": 0.0005986341434850884,
      "loss": 2.9789,
      "step": 7001
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3703396320343018,
      "learning_rate": 0.0005986337535646992,
      "loss": 3.3621,
      "step": 7002
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.863396167755127,
      "learning_rate": 0.0005986333635887884,
      "loss": 3.4085,
      "step": 7003
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.45928955078125,
      "learning_rate": 0.000598632973557356,
      "loss": 3.172,
      "step": 7004
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6852627992630005,
      "learning_rate": 0.0005986325834704021,
      "loss": 3.3623,
      "step": 7005
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4426665306091309,
      "learning_rate": 0.0005986321933279267,
      "loss": 3.1344,
      "step": 7006
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4302316904067993,
      "learning_rate": 0.0005986318031299299,
      "loss": 3.3447,
      "step": 7007
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.33360755443573,
      "learning_rate": 0.000598631412876412,
      "loss": 3.1954,
      "step": 7008
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6636654138565063,
      "learning_rate": 0.0005986310225673727,
      "loss": 3.1576,
      "step": 7009
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7324355840682983,
      "learning_rate": 0.0005986306322028124,
      "loss": 3.2295,
      "step": 7010
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5571959018707275,
      "learning_rate": 0.0005986302417827309,
      "loss": 3.1871,
      "step": 7011
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6801841259002686,
      "learning_rate": 0.0005986298513071285,
      "loss": 2.8754,
      "step": 7012
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5978351831436157,
      "learning_rate": 0.0005986294607760051,
      "loss": 3.2135,
      "step": 7013
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.56012761592865,
      "learning_rate": 0.0005986290701893609,
      "loss": 3.066,
      "step": 7014
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6906132698059082,
      "learning_rate": 0.0005986286795471959,
      "loss": 3.3466,
      "step": 7015
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.387501835823059,
      "learning_rate": 0.0005986282888495102,
      "loss": 3.3006,
      "step": 7016
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4712719917297363,
      "learning_rate": 0.0005986278980963038,
      "loss": 3.2714,
      "step": 7017
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.94281005859375,
      "learning_rate": 0.0005986275072875769,
      "loss": 3.2855,
      "step": 7018
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6504641771316528,
      "learning_rate": 0.0005986271164233295,
      "loss": 3.3639,
      "step": 7019
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8440909385681152,
      "learning_rate": 0.0005986267255035617,
      "loss": 3.0333,
      "step": 7020
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.879884123802185,
      "learning_rate": 0.0005986263345282735,
      "loss": 3.1857,
      "step": 7021
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.2125179767608643,
      "learning_rate": 0.0005986259434974651,
      "loss": 3.3532,
      "step": 7022
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.407866358757019,
      "learning_rate": 0.0005986255524111365,
      "loss": 3.0862,
      "step": 7023
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5770704746246338,
      "learning_rate": 0.0005986251612692876,
      "loss": 3.1996,
      "step": 7024
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.4468085765838623,
      "learning_rate": 0.0005986247700719188,
      "loss": 2.8506,
      "step": 7025
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6950639486312866,
      "learning_rate": 0.00059862437881903,
      "loss": 3.3528,
      "step": 7026
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.6631765365600586,
      "learning_rate": 0.0005986239875106214,
      "loss": 3.3883,
      "step": 7027
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.7895500659942627,
      "learning_rate": 0.0005986235961466928,
      "loss": 3.0551,
      "step": 7028
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.938996434211731,
      "learning_rate": 0.0005986232047272444,
      "loss": 3.3143,
      "step": 7029
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7706665992736816,
      "learning_rate": 0.0005986228132522765,
      "loss": 3.2807,
      "step": 7030
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.88591468334198,
      "learning_rate": 0.0005986224217217889,
      "loss": 3.2459,
      "step": 7031
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.664617896080017,
      "learning_rate": 0.0005986220301357817,
      "loss": 3.3989,
      "step": 7032
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3913136720657349,
      "learning_rate": 0.0005986216384942551,
      "loss": 3.3771,
      "step": 7033
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.4112708568573,
      "learning_rate": 0.000598621246797209,
      "loss": 3.1582,
      "step": 7034
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.4357874393463135,
      "learning_rate": 0.0005986208550446437,
      "loss": 3.1014,
      "step": 7035
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4454652070999146,
      "learning_rate": 0.000598620463236559,
      "loss": 3.0708,
      "step": 7036
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.3741962909698486,
      "learning_rate": 0.0005986200713729552,
      "loss": 3.2421,
      "step": 7037
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.482818603515625,
      "learning_rate": 0.0005986196794538324,
      "loss": 3.042,
      "step": 7038
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6748480796813965,
      "learning_rate": 0.0005986192874791904,
      "loss": 3.0992,
      "step": 7039
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8050637245178223,
      "learning_rate": 0.0005986188954490296,
      "loss": 3.2014,
      "step": 7040
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.4816362857818604,
      "learning_rate": 0.0005986185033633498,
      "loss": 3.2069,
      "step": 7041
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4117887020111084,
      "learning_rate": 0.0005986181112221512,
      "loss": 3.418,
      "step": 7042
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.467164158821106,
      "learning_rate": 0.0005986177190254338,
      "loss": 3.4726,
      "step": 7043
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6652377843856812,
      "learning_rate": 0.0005986173267731978,
      "loss": 3.0582,
      "step": 7044
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6938010454177856,
      "learning_rate": 0.0005986169344654431,
      "loss": 3.132,
      "step": 7045
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.018160343170166,
      "learning_rate": 0.0005986165421021701,
      "loss": 3.0458,
      "step": 7046
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0130791664123535,
      "learning_rate": 0.0005986161496833784,
      "loss": 3.3752,
      "step": 7047
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6334172487258911,
      "learning_rate": 0.0005986157572090685,
      "loss": 3.2302,
      "step": 7048
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3632891178131104,
      "learning_rate": 0.0005986153646792402,
      "loss": 3.3053,
      "step": 7049
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4442120790481567,
      "learning_rate": 0.0005986149720938937,
      "loss": 3.3203,
      "step": 7050
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6366244554519653,
      "learning_rate": 0.000598614579453029,
      "loss": 3.1399,
      "step": 7051
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.557255744934082,
      "learning_rate": 0.0005986141867566462,
      "loss": 3.0901,
      "step": 7052
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6042964458465576,
      "learning_rate": 0.0005986137940047454,
      "loss": 3.2514,
      "step": 7053
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8990153074264526,
      "learning_rate": 0.0005986134011973268,
      "loss": 3.237,
      "step": 7054
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9889405965805054,
      "learning_rate": 0.00059861300833439,
      "loss": 3.35,
      "step": 7055
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8270303010940552,
      "learning_rate": 0.0005986126154159358,
      "loss": 3.223,
      "step": 7056
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.620161533355713,
      "learning_rate": 0.0005986122224419636,
      "loss": 3.2533,
      "step": 7057
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0401692390441895,
      "learning_rate": 0.0005986118294124739,
      "loss": 3.0338,
      "step": 7058
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6646636724472046,
      "learning_rate": 0.0005986114363274665,
      "loss": 3.0951,
      "step": 7059
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6136560440063477,
      "learning_rate": 0.0005986110431869417,
      "loss": 3.1768,
      "step": 7060
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6770333051681519,
      "learning_rate": 0.0005986106499908995,
      "loss": 2.9645,
      "step": 7061
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.70842707157135,
      "learning_rate": 0.0005986102567393398,
      "loss": 3.2708,
      "step": 7062
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.865988850593567,
      "learning_rate": 0.0005986098634322629,
      "loss": 3.2795,
      "step": 7063
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6930667161941528,
      "learning_rate": 0.0005986094700696688,
      "loss": 3.2796,
      "step": 7064
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7772338390350342,
      "learning_rate": 0.0005986090766515576,
      "loss": 3.1585,
      "step": 7065
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4341111183166504,
      "learning_rate": 0.0005986086831779293,
      "loss": 3.0778,
      "step": 7066
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7549668550491333,
      "learning_rate": 0.000598608289648784,
      "loss": 3.2309,
      "step": 7067
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6241611242294312,
      "learning_rate": 0.0005986078960641217,
      "loss": 3.4818,
      "step": 7068
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0108706951141357,
      "learning_rate": 0.0005986075024239428,
      "loss": 3.4343,
      "step": 7069
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4693148136138916,
      "learning_rate": 0.0005986071087282468,
      "loss": 3.1017,
      "step": 7070
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4986356496810913,
      "learning_rate": 0.0005986067149770343,
      "loss": 3.1743,
      "step": 7071
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3252168893814087,
      "learning_rate": 0.0005986063211703052,
      "loss": 3.0679,
      "step": 7072
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6992660760879517,
      "learning_rate": 0.0005986059273080594,
      "loss": 3.3806,
      "step": 7073
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5844776630401611,
      "learning_rate": 0.0005986055333902973,
      "loss": 3.5256,
      "step": 7074
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9544347524642944,
      "learning_rate": 0.0005986051394170186,
      "loss": 3.3223,
      "step": 7075
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7427752017974854,
      "learning_rate": 0.0005986047453882238,
      "loss": 3.411,
      "step": 7076
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5157116651535034,
      "learning_rate": 0.0005986043513039126,
      "loss": 3.0157,
      "step": 7077
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4606654644012451,
      "learning_rate": 0.0005986039571640851,
      "loss": 3.1392,
      "step": 7078
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5200108289718628,
      "learning_rate": 0.0005986035629687417,
      "loss": 3.2492,
      "step": 7079
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.839705228805542,
      "learning_rate": 0.0005986031687178822,
      "loss": 3.4709,
      "step": 7080
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.719279408454895,
      "learning_rate": 0.0005986027744115067,
      "loss": 3.2208,
      "step": 7081
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4024972915649414,
      "learning_rate": 0.0005986023800496153,
      "loss": 3.5224,
      "step": 7082
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8519291877746582,
      "learning_rate": 0.000598601985632208,
      "loss": 3.206,
      "step": 7083
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6480218172073364,
      "learning_rate": 0.0005986015911592852,
      "loss": 3.1364,
      "step": 7084
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7483023405075073,
      "learning_rate": 0.0005986011966308465,
      "loss": 3.2322,
      "step": 7085
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5607095956802368,
      "learning_rate": 0.0005986008020468924,
      "loss": 3.2799,
      "step": 7086
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5972485542297363,
      "learning_rate": 0.0005986004074074226,
      "loss": 3.1344,
      "step": 7087
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8630484342575073,
      "learning_rate": 0.0005986000127124374,
      "loss": 3.4178,
      "step": 7088
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4301353693008423,
      "learning_rate": 0.0005985996179619368,
      "loss": 3.0835,
      "step": 7089
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8172791004180908,
      "learning_rate": 0.0005985992231559209,
      "loss": 3.3431,
      "step": 7090
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.376281976699829,
      "learning_rate": 0.0005985988282943899,
      "loss": 3.1931,
      "step": 7091
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0498337745666504,
      "learning_rate": 0.0005985984333773436,
      "loss": 3.2945,
      "step": 7092
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5982449054718018,
      "learning_rate": 0.0005985980384047823,
      "loss": 3.2212,
      "step": 7093
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3933182954788208,
      "learning_rate": 0.0005985976433767058,
      "loss": 3.118,
      "step": 7094
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.300130605697632,
      "learning_rate": 0.0005985972482931146,
      "loss": 3.2682,
      "step": 7095
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.758688449859619,
      "learning_rate": 0.0005985968531540083,
      "loss": 3.4081,
      "step": 7096
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9233866930007935,
      "learning_rate": 0.0005985964579593874,
      "loss": 3.4085,
      "step": 7097
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1964268684387207,
      "learning_rate": 0.0005985960627092517,
      "loss": 3.2126,
      "step": 7098
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8500664234161377,
      "learning_rate": 0.0005985956674036014,
      "loss": 3.0783,
      "step": 7099
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.918904423713684,
      "learning_rate": 0.0005985952720424365,
      "loss": 3.2768,
      "step": 7100
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.30543053150177,
      "learning_rate": 0.0005985948766257572,
      "loss": 3.231,
      "step": 7101
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5621541738510132,
      "learning_rate": 0.0005985944811535633,
      "loss": 3.2953,
      "step": 7102
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8978792428970337,
      "learning_rate": 0.0005985940856258552,
      "loss": 3.3848,
      "step": 7103
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0481083393096924,
      "learning_rate": 0.0005985936900426326,
      "loss": 3.1899,
      "step": 7104
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.539944052696228,
      "learning_rate": 0.000598593294403896,
      "loss": 3.2481,
      "step": 7105
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.347973585128784,
      "learning_rate": 0.0005985928987096453,
      "loss": 3.1912,
      "step": 7106
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.548295021057129,
      "learning_rate": 0.0005985925029598805,
      "loss": 3.2073,
      "step": 7107
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8368885517120361,
      "learning_rate": 0.0005985921071546016,
      "loss": 3.4177,
      "step": 7108
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5065371990203857,
      "learning_rate": 0.0005985917112938089,
      "loss": 3.276,
      "step": 7109
    },
    {
      "epoch": 0.09,
      "grad_norm": 4.015728950500488,
      "learning_rate": 0.0005985913153775023,
      "loss": 3.1495,
      "step": 7110
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.537297248840332,
      "learning_rate": 0.000598590919405682,
      "loss": 3.3467,
      "step": 7111
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.4410383701324463,
      "learning_rate": 0.000598590523378348,
      "loss": 3.3446,
      "step": 7112
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.198336124420166,
      "learning_rate": 0.0005985901272955004,
      "loss": 3.1759,
      "step": 7113
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.188748598098755,
      "learning_rate": 0.0005985897311571392,
      "loss": 3.475,
      "step": 7114
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8790441751480103,
      "learning_rate": 0.0005985893349632645,
      "loss": 3.2047,
      "step": 7115
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6753255128860474,
      "learning_rate": 0.0005985889387138765,
      "loss": 3.0117,
      "step": 7116
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.942516803741455,
      "learning_rate": 0.0005985885424089752,
      "loss": 3.2003,
      "step": 7117
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5849764347076416,
      "learning_rate": 0.0005985881460485606,
      "loss": 3.3408,
      "step": 7118
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.5169003009796143,
      "learning_rate": 0.0005985877496326328,
      "loss": 3.1248,
      "step": 7119
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.333082437515259,
      "learning_rate": 0.000598587353161192,
      "loss": 3.1657,
      "step": 7120
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4382314682006836,
      "learning_rate": 0.0005985869566342381,
      "loss": 3.2138,
      "step": 7121
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.567992925643921,
      "learning_rate": 0.0005985865600517713,
      "loss": 3.0983,
      "step": 7122
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6170728206634521,
      "learning_rate": 0.0005985861634137915,
      "loss": 3.4912,
      "step": 7123
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1833982467651367,
      "learning_rate": 0.000598585766720299,
      "loss": 3.0848,
      "step": 7124
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.63573157787323,
      "learning_rate": 0.0005985853699712937,
      "loss": 3.2504,
      "step": 7125
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6749528646469116,
      "learning_rate": 0.0005985849731667758,
      "loss": 3.3626,
      "step": 7126
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.6579596996307373,
      "learning_rate": 0.0005985845763067454,
      "loss": 3.1207,
      "step": 7127
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4942917823791504,
      "learning_rate": 0.0005985841793912024,
      "loss": 3.2131,
      "step": 7128
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7214300632476807,
      "learning_rate": 0.000598583782420147,
      "loss": 3.6332,
      "step": 7129
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6386040449142456,
      "learning_rate": 0.0005985833853935791,
      "loss": 3.3352,
      "step": 7130
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0281810760498047,
      "learning_rate": 0.000598582988311499,
      "loss": 2.7672,
      "step": 7131
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5453039407730103,
      "learning_rate": 0.0005985825911739068,
      "loss": 3.123,
      "step": 7132
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5721391439437866,
      "learning_rate": 0.0005985821939808023,
      "loss": 3.178,
      "step": 7133
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8664220571517944,
      "learning_rate": 0.0005985817967321858,
      "loss": 3.2645,
      "step": 7134
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.072584390640259,
      "learning_rate": 0.0005985813994280573,
      "loss": 3.5213,
      "step": 7135
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7257030010223389,
      "learning_rate": 0.0005985810020684169,
      "loss": 3.2993,
      "step": 7136
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.205066442489624,
      "learning_rate": 0.0005985806046532647,
      "loss": 3.2873,
      "step": 7137
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0336694717407227,
      "learning_rate": 0.0005985802071826005,
      "loss": 3.0907,
      "step": 7138
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.805962324142456,
      "learning_rate": 0.0005985798096564248,
      "loss": 3.2221,
      "step": 7139
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.339430332183838,
      "learning_rate": 0.0005985794120747375,
      "loss": 3.1439,
      "step": 7140
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1399054527282715,
      "learning_rate": 0.0005985790144375386,
      "loss": 3.3815,
      "step": 7141
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.891775131225586,
      "learning_rate": 0.0005985786167448283,
      "loss": 3.2308,
      "step": 7142
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6515547037124634,
      "learning_rate": 0.0005985782189966064,
      "loss": 3.3809,
      "step": 7143
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.765856981277466,
      "learning_rate": 0.0005985778211928733,
      "loss": 3.271,
      "step": 7144
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.2778103351593018,
      "learning_rate": 0.000598577423333629,
      "loss": 3.4432,
      "step": 7145
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8723119497299194,
      "learning_rate": 0.0005985770254188735,
      "loss": 3.1245,
      "step": 7146
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7035601139068604,
      "learning_rate": 0.0005985766274486068,
      "loss": 3.3363,
      "step": 7147
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6382774114608765,
      "learning_rate": 0.0005985762294228291,
      "loss": 3.3828,
      "step": 7148
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3919264078140259,
      "learning_rate": 0.0005985758313415405,
      "loss": 3.0948,
      "step": 7149
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.537559151649475,
      "learning_rate": 0.000598575433204741,
      "loss": 3.3036,
      "step": 7150
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4635483026504517,
      "learning_rate": 0.0005985750350124307,
      "loss": 3.6466,
      "step": 7151
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7650105953216553,
      "learning_rate": 0.0005985746367646096,
      "loss": 3.0546,
      "step": 7152
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6054123640060425,
      "learning_rate": 0.0005985742384612779,
      "loss": 3.0129,
      "step": 7153
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4994789361953735,
      "learning_rate": 0.0005985738401024356,
      "loss": 3.0175,
      "step": 7154
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3099042177200317,
      "learning_rate": 0.0005985734416880828,
      "loss": 3.3545,
      "step": 7155
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3479652404785156,
      "learning_rate": 0.0005985730432182196,
      "loss": 3.1026,
      "step": 7156
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6285966634750366,
      "learning_rate": 0.000598572644692846,
      "loss": 3.228,
      "step": 7157
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.40518856048584,
      "learning_rate": 0.0005985722461119621,
      "loss": 3.1402,
      "step": 7158
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6854127645492554,
      "learning_rate": 0.0005985718474755679,
      "loss": 3.3061,
      "step": 7159
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6735832691192627,
      "learning_rate": 0.0005985714487836637,
      "loss": 3.3853,
      "step": 7160
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.2763824462890625,
      "learning_rate": 0.0005985710500362493,
      "loss": 3.1886,
      "step": 7161
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.7278244495391846,
      "learning_rate": 0.000598570651233325,
      "loss": 3.291,
      "step": 7162
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8126786947250366,
      "learning_rate": 0.0005985702523748907,
      "loss": 3.2257,
      "step": 7163
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.8608992099761963,
      "learning_rate": 0.0005985698534609466,
      "loss": 3.4222,
      "step": 7164
    },
    {
      "epoch": 0.09,
      "grad_norm": 3.6678242683410645,
      "learning_rate": 0.0005985694544914927,
      "loss": 3.4412,
      "step": 7165
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0241189002990723,
      "learning_rate": 0.0005985690554665291,
      "loss": 3.3219,
      "step": 7166
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4787050485610962,
      "learning_rate": 0.0005985686563860559,
      "loss": 3.2206,
      "step": 7167
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.042734146118164,
      "learning_rate": 0.0005985682572500733,
      "loss": 3.4148,
      "step": 7168
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.878680944442749,
      "learning_rate": 0.000598567858058581,
      "loss": 3.1946,
      "step": 7169
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6457455158233643,
      "learning_rate": 0.0005985674588115794,
      "loss": 3.173,
      "step": 7170
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8042055368423462,
      "learning_rate": 0.0005985670595090685,
      "loss": 3.1628,
      "step": 7171
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.328508973121643,
      "learning_rate": 0.0005985666601510482,
      "loss": 3.4775,
      "step": 7172
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5550665855407715,
      "learning_rate": 0.000598566260737519,
      "loss": 3.3815,
      "step": 7173
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.377692699432373,
      "learning_rate": 0.0005985658612684804,
      "loss": 3.1581,
      "step": 7174
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9101536273956299,
      "learning_rate": 0.0005985654617439329,
      "loss": 3.2693,
      "step": 7175
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.400472640991211,
      "learning_rate": 0.0005985650621638765,
      "loss": 3.0552,
      "step": 7176
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3678245544433594,
      "learning_rate": 0.0005985646625283112,
      "loss": 2.9844,
      "step": 7177
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6576182842254639,
      "learning_rate": 0.000598564262837237,
      "loss": 3.2289,
      "step": 7178
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.872410774230957,
      "learning_rate": 0.0005985638630906542,
      "loss": 3.4809,
      "step": 7179
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.649200677871704,
      "learning_rate": 0.0005985634632885626,
      "loss": 3.3346,
      "step": 7180
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.783562183380127,
      "learning_rate": 0.0005985630634309625,
      "loss": 3.3335,
      "step": 7181
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5028157234191895,
      "learning_rate": 0.0005985626635178538,
      "loss": 3.3891,
      "step": 7182
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.3093676567077637,
      "learning_rate": 0.0005985622635492368,
      "loss": 2.9743,
      "step": 7183
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.369386911392212,
      "learning_rate": 0.0005985618635251113,
      "loss": 3.3197,
      "step": 7184
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6041178703308105,
      "learning_rate": 0.0005985614634454776,
      "loss": 3.4314,
      "step": 7185
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4039413928985596,
      "learning_rate": 0.0005985610633103357,
      "loss": 3.3778,
      "step": 7186
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7855428457260132,
      "learning_rate": 0.0005985606631196856,
      "loss": 3.3765,
      "step": 7187
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8376291990280151,
      "learning_rate": 0.0005985602628735275,
      "loss": 3.0845,
      "step": 7188
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.673779010772705,
      "learning_rate": 0.0005985598625718614,
      "loss": 3.0827,
      "step": 7189
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.2862017154693604,
      "learning_rate": 0.0005985594622146873,
      "loss": 3.3803,
      "step": 7190
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.904351830482483,
      "learning_rate": 0.0005985590618020055,
      "loss": 3.388,
      "step": 7191
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.451443076133728,
      "learning_rate": 0.0005985586613338158,
      "loss": 3.1799,
      "step": 7192
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.667768955230713,
      "learning_rate": 0.0005985582608101185,
      "loss": 3.1947,
      "step": 7193
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.2835187911987305,
      "learning_rate": 0.0005985578602309136,
      "loss": 3.4223,
      "step": 7194
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7674615383148193,
      "learning_rate": 0.0005985574595962012,
      "loss": 3.1951,
      "step": 7195
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.3426527976989746,
      "learning_rate": 0.0005985570589059812,
      "loss": 3.3904,
      "step": 7196
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.2455880641937256,
      "learning_rate": 0.0005985566581602538,
      "loss": 3.2766,
      "step": 7197
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5080420970916748,
      "learning_rate": 0.0005985562573590192,
      "loss": 3.3829,
      "step": 7198
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9403890371322632,
      "learning_rate": 0.0005985558565022773,
      "loss": 2.9573,
      "step": 7199
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.833324670791626,
      "learning_rate": 0.0005985554555900281,
      "loss": 3.1799,
      "step": 7200
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6130621433258057,
      "learning_rate": 0.000598555054622272,
      "loss": 3.191,
      "step": 7201
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7520852088928223,
      "learning_rate": 0.0005985546535990088,
      "loss": 3.4655,
      "step": 7202
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7076054811477661,
      "learning_rate": 0.0005985542525202386,
      "loss": 3.1794,
      "step": 7203
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.605132818222046,
      "learning_rate": 0.0005985538513859617,
      "loss": 3.336,
      "step": 7204
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5685447454452515,
      "learning_rate": 0.0005985534501961779,
      "loss": 3.0038,
      "step": 7205
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0711967945098877,
      "learning_rate": 0.0005985530489508874,
      "loss": 3.1655,
      "step": 7206
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5108108520507812,
      "learning_rate": 0.0005985526476500902,
      "loss": 3.2642,
      "step": 7207
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.149420738220215,
      "learning_rate": 0.0005985522462937865,
      "loss": 3.0607,
      "step": 7208
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8531012535095215,
      "learning_rate": 0.0005985518448819761,
      "loss": 3.3067,
      "step": 7209
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1074278354644775,
      "learning_rate": 0.0005985514434146596,
      "loss": 3.3384,
      "step": 7210
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.749315619468689,
      "learning_rate": 0.0005985510418918365,
      "loss": 3.0347,
      "step": 7211
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.253563404083252,
      "learning_rate": 0.0005985506403135072,
      "loss": 3.0667,
      "step": 7212
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5760966539382935,
      "learning_rate": 0.0005985502386796717,
      "loss": 3.0791,
      "step": 7213
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5684731006622314,
      "learning_rate": 0.0005985498369903301,
      "loss": 3.4245,
      "step": 7214
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.3875951766967773,
      "learning_rate": 0.0005985494352454825,
      "loss": 3.2974,
      "step": 7215
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0478920936584473,
      "learning_rate": 0.0005985490334451288,
      "loss": 3.3503,
      "step": 7216
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6393357515335083,
      "learning_rate": 0.0005985486315892693,
      "loss": 3.186,
      "step": 7217
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.634427547454834,
      "learning_rate": 0.0005985482296779039,
      "loss": 3.0604,
      "step": 7218
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.521193027496338,
      "learning_rate": 0.0005985478277110328,
      "loss": 3.3887,
      "step": 7219
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.935245156288147,
      "learning_rate": 0.000598547425688656,
      "loss": 3.3326,
      "step": 7220
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7386022806167603,
      "learning_rate": 0.0005985470236107737,
      "loss": 3.2439,
      "step": 7221
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.635195016860962,
      "learning_rate": 0.0005985466214773857,
      "loss": 3.3052,
      "step": 7222
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.180752754211426,
      "learning_rate": 0.0005985462192884924,
      "loss": 3.3432,
      "step": 7223
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6787242889404297,
      "learning_rate": 0.0005985458170440935,
      "loss": 3.3506,
      "step": 7224
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5245914459228516,
      "learning_rate": 0.0005985454147441895,
      "loss": 3.118,
      "step": 7225
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7192531824111938,
      "learning_rate": 0.0005985450123887802,
      "loss": 3.1584,
      "step": 7226
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0409066677093506,
      "learning_rate": 0.0005985446099778658,
      "loss": 3.2708,
      "step": 7227
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7573953866958618,
      "learning_rate": 0.0005985442075114463,
      "loss": 3.2848,
      "step": 7228
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9151246547698975,
      "learning_rate": 0.0005985438049895218,
      "loss": 3.0376,
      "step": 7229
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4676125049591064,
      "learning_rate": 0.0005985434024120924,
      "loss": 3.1545,
      "step": 7230
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9914470911026,
      "learning_rate": 0.000598542999779158,
      "loss": 3.2681,
      "step": 7231
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5186684131622314,
      "learning_rate": 0.0005985425970907189,
      "loss": 3.2937,
      "step": 7232
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4561492204666138,
      "learning_rate": 0.0005985421943467751,
      "loss": 3.189,
      "step": 7233
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.795123338699341,
      "learning_rate": 0.0005985417915473268,
      "loss": 3.112,
      "step": 7234
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8824135065078735,
      "learning_rate": 0.0005985413886923737,
      "loss": 3.0106,
      "step": 7235
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.830774188041687,
      "learning_rate": 0.0005985409857819162,
      "loss": 3.3669,
      "step": 7236
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.844896912574768,
      "learning_rate": 0.0005985405828159544,
      "loss": 3.2048,
      "step": 7237
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.4654860496520996,
      "learning_rate": 0.0005985401797944882,
      "loss": 3.2523,
      "step": 7238
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.65301513671875,
      "learning_rate": 0.0005985397767175178,
      "loss": 3.1352,
      "step": 7239
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5391322374343872,
      "learning_rate": 0.0005985393735850431,
      "loss": 3.1647,
      "step": 7240
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1387922763824463,
      "learning_rate": 0.0005985389703970644,
      "loss": 3.4388,
      "step": 7241
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.7102906703948975,
      "learning_rate": 0.0005985385671535816,
      "loss": 3.221,
      "step": 7242
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3494535684585571,
      "learning_rate": 0.000598538163854595,
      "loss": 3.191,
      "step": 7243
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.575590133666992,
      "learning_rate": 0.0005985377605001044,
      "loss": 3.2118,
      "step": 7244
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1908206939697266,
      "learning_rate": 0.00059853735709011,
      "loss": 3.2271,
      "step": 7245
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.475369930267334,
      "learning_rate": 0.0005985369536246119,
      "loss": 3.0823,
      "step": 7246
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.9209110736846924,
      "learning_rate": 0.0005985365501036101,
      "loss": 3.0881,
      "step": 7247
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.6111114025115967,
      "learning_rate": 0.0005985361465271048,
      "loss": 3.4412,
      "step": 7248
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.344008207321167,
      "learning_rate": 0.0005985357428950959,
      "loss": 3.2041,
      "step": 7249
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.3775323629379272,
      "learning_rate": 0.0005985353392075835,
      "loss": 3.0519,
      "step": 7250
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8664518594741821,
      "learning_rate": 0.000598534935464568,
      "loss": 3.1767,
      "step": 7251
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4824402332305908,
      "learning_rate": 0.000598534531666049,
      "loss": 3.2993,
      "step": 7252
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.0665464401245117,
      "learning_rate": 0.0005985341278120269,
      "loss": 3.592,
      "step": 7253
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5341646671295166,
      "learning_rate": 0.0005985337239025016,
      "loss": 3.3359,
      "step": 7254
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5332268476486206,
      "learning_rate": 0.0005985333199374733,
      "loss": 3.2387,
      "step": 7255
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.09657621383667,
      "learning_rate": 0.0005985329159169419,
      "loss": 2.9742,
      "step": 7256
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.497628688812256,
      "learning_rate": 0.0005985325118409078,
      "loss": 3.3037,
      "step": 7257
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8064624071121216,
      "learning_rate": 0.0005985321077093708,
      "loss": 3.1744,
      "step": 7258
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.795479655265808,
      "learning_rate": 0.000598531703522331,
      "loss": 3.3094,
      "step": 7259
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.7693750858306885,
      "learning_rate": 0.0005985312992797885,
      "loss": 3.1777,
      "step": 7260
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7888777256011963,
      "learning_rate": 0.0005985308949817435,
      "loss": 3.3458,
      "step": 7261
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4395296573638916,
      "learning_rate": 0.0005985304906281958,
      "loss": 3.5202,
      "step": 7262
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.7375409603118896,
      "learning_rate": 0.0005985300862191458,
      "loss": 3.1294,
      "step": 7263
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7396923303604126,
      "learning_rate": 0.0005985296817545933,
      "loss": 3.2285,
      "step": 7264
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8324995040893555,
      "learning_rate": 0.0005985292772345386,
      "loss": 3.521,
      "step": 7265
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1533684730529785,
      "learning_rate": 0.0005985288726589816,
      "loss": 3.4047,
      "step": 7266
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.8701633214950562,
      "learning_rate": 0.0005985284680279226,
      "loss": 3.4579,
      "step": 7267
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.1154651641845703,
      "learning_rate": 0.0005985280633413614,
      "loss": 3.2919,
      "step": 7268
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4820749759674072,
      "learning_rate": 0.0005985276585992982,
      "loss": 3.4661,
      "step": 7269
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.737903118133545,
      "learning_rate": 0.0005985272538017332,
      "loss": 3.167,
      "step": 7270
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7180354595184326,
      "learning_rate": 0.0005985268489486661,
      "loss": 3.4059,
      "step": 7271
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5141397714614868,
      "learning_rate": 0.0005985264440400974,
      "loss": 3.4103,
      "step": 7272
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.618754267692566,
      "learning_rate": 0.000598526039076027,
      "loss": 3.202,
      "step": 7273
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.842395544052124,
      "learning_rate": 0.0005985256340564548,
      "loss": 3.0701,
      "step": 7274
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6677467823028564,
      "learning_rate": 0.0005985252289813812,
      "loss": 3.4571,
      "step": 7275
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6233705282211304,
      "learning_rate": 0.0005985248238508061,
      "loss": 3.3382,
      "step": 7276
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.502964973449707,
      "learning_rate": 0.0005985244186647296,
      "loss": 3.269,
      "step": 7277
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.458655834197998,
      "learning_rate": 0.0005985240134231518,
      "loss": 3.2957,
      "step": 7278
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4226195812225342,
      "learning_rate": 0.0005985236081260727,
      "loss": 3.2577,
      "step": 7279
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.556445837020874,
      "learning_rate": 0.0005985232027734926,
      "loss": 3.2599,
      "step": 7280
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5401947498321533,
      "learning_rate": 0.0005985227973654112,
      "loss": 3.3566,
      "step": 7281
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5769566297531128,
      "learning_rate": 0.0005985223919018288,
      "loss": 3.1496,
      "step": 7282
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.518600344657898,
      "learning_rate": 0.0005985219863827454,
      "loss": 3.1556,
      "step": 7283
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.637986660003662,
      "learning_rate": 0.0005985215808081613,
      "loss": 3.2601,
      "step": 7284
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.5248053073883057,
      "learning_rate": 0.0005985211751780763,
      "loss": 3.3818,
      "step": 7285
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.7077434062957764,
      "learning_rate": 0.0005985207694924906,
      "loss": 3.0721,
      "step": 7286
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4424147605895996,
      "learning_rate": 0.0005985203637514043,
      "loss": 3.2129,
      "step": 7287
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.905910611152649,
      "learning_rate": 0.0005985199579548173,
      "loss": 3.2362,
      "step": 7288
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.6752564907073975,
      "learning_rate": 0.0005985195521027299,
      "loss": 3.3501,
      "step": 7289
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.94889497756958,
      "learning_rate": 0.0005985191461951421,
      "loss": 3.2541,
      "step": 7290
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.291199207305908,
      "learning_rate": 0.0005985187402320539,
      "loss": 3.2076,
      "step": 7291
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.828161358833313,
      "learning_rate": 0.0005985183342134654,
      "loss": 3.0189,
      "step": 7292
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4473570585250854,
      "learning_rate": 0.0005985179281393768,
      "loss": 3.0587,
      "step": 7293
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.042874336242676,
      "learning_rate": 0.0005985175220097881,
      "loss": 2.9015,
      "step": 7294
    },
    {
      "epoch": 0.09,
      "grad_norm": 2.671312093734741,
      "learning_rate": 0.0005985171158246992,
      "loss": 3.1152,
      "step": 7295
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.4816701412200928,
      "learning_rate": 0.0005985167095841104,
      "loss": 3.0219,
      "step": 7296
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2624082565307617,
      "learning_rate": 0.0005985163032880218,
      "loss": 3.4546,
      "step": 7297
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6452770233154297,
      "learning_rate": 0.0005985158969364333,
      "loss": 3.0709,
      "step": 7298
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5273133516311646,
      "learning_rate": 0.0005985154905293451,
      "loss": 3.1599,
      "step": 7299
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8199869394302368,
      "learning_rate": 0.0005985150840667572,
      "loss": 3.2742,
      "step": 7300
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2483863830566406,
      "learning_rate": 0.0005985146775486698,
      "loss": 3.2486,
      "step": 7301
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5536798238754272,
      "learning_rate": 0.0005985142709750827,
      "loss": 3.2898,
      "step": 7302
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5717240571975708,
      "learning_rate": 0.0005985138643459963,
      "loss": 3.3349,
      "step": 7303
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4957571029663086,
      "learning_rate": 0.0005985134576614106,
      "loss": 3.5371,
      "step": 7304
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9412113428115845,
      "learning_rate": 0.0005985130509213255,
      "loss": 3.3291,
      "step": 7305
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7317273616790771,
      "learning_rate": 0.0005985126441257412,
      "loss": 3.2161,
      "step": 7306
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.770830750465393,
      "learning_rate": 0.0005985122372746578,
      "loss": 3.3302,
      "step": 7307
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6777427196502686,
      "learning_rate": 0.0005985118303680753,
      "loss": 3.1634,
      "step": 7308
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8244878053665161,
      "learning_rate": 0.0005985114234059939,
      "loss": 3.3089,
      "step": 7309
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.598117709159851,
      "learning_rate": 0.0005985110163884134,
      "loss": 3.1595,
      "step": 7310
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9287396669387817,
      "learning_rate": 0.0005985106093153342,
      "loss": 3.184,
      "step": 7311
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4593404531478882,
      "learning_rate": 0.0005985102021867563,
      "loss": 3.1971,
      "step": 7312
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9854974746704102,
      "learning_rate": 0.0005985097950026797,
      "loss": 3.3579,
      "step": 7313
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8064701557159424,
      "learning_rate": 0.0005985093877631045,
      "loss": 2.9847,
      "step": 7314
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6260294914245605,
      "learning_rate": 0.0005985089804680306,
      "loss": 3.2214,
      "step": 7315
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9562907218933105,
      "learning_rate": 0.0005985085731174584,
      "loss": 3.0192,
      "step": 7316
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4057921171188354,
      "learning_rate": 0.0005985081657113878,
      "loss": 3.2811,
      "step": 7317
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1024398803710938,
      "learning_rate": 0.0005985077582498189,
      "loss": 3.3688,
      "step": 7318
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6259300708770752,
      "learning_rate": 0.0005985073507327518,
      "loss": 3.3663,
      "step": 7319
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5450351238250732,
      "learning_rate": 0.0005985069431601865,
      "loss": 3.3946,
      "step": 7320
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6146574020385742,
      "learning_rate": 0.000598506535532123,
      "loss": 3.3452,
      "step": 7321
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.391132116317749,
      "learning_rate": 0.0005985061278485618,
      "loss": 3.3434,
      "step": 7322
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.0997493267059326,
      "learning_rate": 0.0005985057201095025,
      "loss": 3.3337,
      "step": 7323
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2008845806121826,
      "learning_rate": 0.0005985053123149453,
      "loss": 3.1328,
      "step": 7324
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8897109031677246,
      "learning_rate": 0.0005985049044648905,
      "loss": 3.2828,
      "step": 7325
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2073135375976562,
      "learning_rate": 0.0005985044965593378,
      "loss": 3.2751,
      "step": 7326
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1557352542877197,
      "learning_rate": 0.0005985040885982876,
      "loss": 3.1559,
      "step": 7327
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1525657176971436,
      "learning_rate": 0.0005985036805817397,
      "loss": 3.1491,
      "step": 7328
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2924115657806396,
      "learning_rate": 0.0005985032725096945,
      "loss": 3.4517,
      "step": 7329
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5227367877960205,
      "learning_rate": 0.0005985028643821519,
      "loss": 3.0293,
      "step": 7330
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6188586950302124,
      "learning_rate": 0.0005985024561991119,
      "loss": 3.4707,
      "step": 7331
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.149474859237671,
      "learning_rate": 0.0005985020479605749,
      "loss": 3.3676,
      "step": 7332
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.600029468536377,
      "learning_rate": 0.0005985016396665404,
      "loss": 3.3144,
      "step": 7333
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.121185064315796,
      "learning_rate": 0.000598501231317009,
      "loss": 3.077,
      "step": 7334
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5212076902389526,
      "learning_rate": 0.0005985008229119805,
      "loss": 3.1733,
      "step": 7335
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6264129877090454,
      "learning_rate": 0.0005985004144514551,
      "loss": 3.2782,
      "step": 7336
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5451922416687012,
      "learning_rate": 0.0005985000059354329,
      "loss": 3.3534,
      "step": 7337
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.041135549545288,
      "learning_rate": 0.0005984995973639138,
      "loss": 2.9174,
      "step": 7338
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.802457332611084,
      "learning_rate": 0.0005984991887368981,
      "loss": 3.2656,
      "step": 7339
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6423041820526123,
      "learning_rate": 0.0005984987800543857,
      "loss": 3.1725,
      "step": 7340
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8448700904846191,
      "learning_rate": 0.0005984983713163767,
      "loss": 3.4241,
      "step": 7341
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6233677864074707,
      "learning_rate": 0.0005984979625228713,
      "loss": 3.6016,
      "step": 7342
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4347293376922607,
      "learning_rate": 0.0005984975536738695,
      "loss": 3.3508,
      "step": 7343
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6473485231399536,
      "learning_rate": 0.0005984971447693712,
      "loss": 3.1076,
      "step": 7344
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9201691150665283,
      "learning_rate": 0.0005984967358093769,
      "loss": 3.4083,
      "step": 7345
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6622601747512817,
      "learning_rate": 0.0005984963267938863,
      "loss": 3.1845,
      "step": 7346
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3516885042190552,
      "learning_rate": 0.0005984959177228996,
      "loss": 3.2136,
      "step": 7347
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6318986415863037,
      "learning_rate": 0.0005984955085964168,
      "loss": 3.1764,
      "step": 7348
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4750362634658813,
      "learning_rate": 0.0005984950994144383,
      "loss": 3.2521,
      "step": 7349
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3604763746261597,
      "learning_rate": 0.0005984946901769636,
      "loss": 3.3546,
      "step": 7350
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7825193405151367,
      "learning_rate": 0.0005984942808839933,
      "loss": 3.1874,
      "step": 7351
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.738887071609497,
      "learning_rate": 0.0005984938715355273,
      "loss": 3.4236,
      "step": 7352
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9993312358856201,
      "learning_rate": 0.0005984934621315655,
      "loss": 3.192,
      "step": 7353
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5630402565002441,
      "learning_rate": 0.0005984930526721082,
      "loss": 2.9707,
      "step": 7354
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8804107904434204,
      "learning_rate": 0.0005984926431571554,
      "loss": 3.0921,
      "step": 7355
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4652868509292603,
      "learning_rate": 0.0005984922335867072,
      "loss": 3.2791,
      "step": 7356
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5195893049240112,
      "learning_rate": 0.0005984918239607638,
      "loss": 3.1497,
      "step": 7357
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.0216023921966553,
      "learning_rate": 0.000598491414279325,
      "loss": 2.9979,
      "step": 7358
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6142468452453613,
      "learning_rate": 0.000598491004542391,
      "loss": 3.2965,
      "step": 7359
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7704476118087769,
      "learning_rate": 0.0005984905947499619,
      "loss": 3.1886,
      "step": 7360
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6379914283752441,
      "learning_rate": 0.0005984901849020377,
      "loss": 3.1451,
      "step": 7361
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.952728033065796,
      "learning_rate": 0.0005984897749986187,
      "loss": 3.0035,
      "step": 7362
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.504636764526367,
      "learning_rate": 0.0005984893650397046,
      "loss": 3.4133,
      "step": 7363
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.528998851776123,
      "learning_rate": 0.0005984889550252959,
      "loss": 3.2083,
      "step": 7364
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.906132459640503,
      "learning_rate": 0.0005984885449553923,
      "loss": 3.1196,
      "step": 7365
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.6340537071228027,
      "learning_rate": 0.0005984881348299943,
      "loss": 3.1852,
      "step": 7366
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.633286714553833,
      "learning_rate": 0.0005984877246491014,
      "loss": 3.077,
      "step": 7367
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.831486225128174,
      "learning_rate": 0.0005984873144127143,
      "loss": 3.1872,
      "step": 7368
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.7930750846862793,
      "learning_rate": 0.0005984869041208325,
      "loss": 3.2385,
      "step": 7369
    },
    {
      "epoch": 0.1,
      "grad_norm": 4.837332725524902,
      "learning_rate": 0.0005984864937734565,
      "loss": 3.3732,
      "step": 7370
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9468497037887573,
      "learning_rate": 0.0005984860833705862,
      "loss": 3.2186,
      "step": 7371
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.0291996002197266,
      "learning_rate": 0.0005984856729122217,
      "loss": 3.0479,
      "step": 7372
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.630683422088623,
      "learning_rate": 0.0005984852623983631,
      "loss": 3.2858,
      "step": 7373
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4027516841888428,
      "learning_rate": 0.0005984848518290105,
      "loss": 3.2323,
      "step": 7374
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2261974811553955,
      "learning_rate": 0.0005984844412041638,
      "loss": 3.28,
      "step": 7375
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.0790863037109375,
      "learning_rate": 0.0005984840305238233,
      "loss": 3.1407,
      "step": 7376
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.4770519733428955,
      "learning_rate": 0.000598483619787989,
      "loss": 3.3616,
      "step": 7377
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.845452308654785,
      "learning_rate": 0.0005984832089966609,
      "loss": 3.5702,
      "step": 7378
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2162790298461914,
      "learning_rate": 0.0005984827981498392,
      "loss": 3.3146,
      "step": 7379
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6036317348480225,
      "learning_rate": 0.0005984823872475239,
      "loss": 3.4055,
      "step": 7380
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4630537033081055,
      "learning_rate": 0.000598481976289715,
      "loss": 3.1237,
      "step": 7381
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.257286548614502,
      "learning_rate": 0.0005984815652764127,
      "loss": 3.2793,
      "step": 7382
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.0409703254699707,
      "learning_rate": 0.000598481154207617,
      "loss": 3.2825,
      "step": 7383
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8203433752059937,
      "learning_rate": 0.0005984807430833281,
      "loss": 3.172,
      "step": 7384
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.909628987312317,
      "learning_rate": 0.0005984803319035461,
      "loss": 3.0661,
      "step": 7385
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.118581771850586,
      "learning_rate": 0.0005984799206682707,
      "loss": 3.1131,
      "step": 7386
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4897871017456055,
      "learning_rate": 0.0005984795093775026,
      "loss": 3.1407,
      "step": 7387
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.027010679244995,
      "learning_rate": 0.0005984790980312412,
      "loss": 3.2072,
      "step": 7388
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.6584067344665527,
      "learning_rate": 0.0005984786866294872,
      "loss": 3.3818,
      "step": 7389
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.0456113815307617,
      "learning_rate": 0.0005984782751722402,
      "loss": 3.2544,
      "step": 7390
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5080386400222778,
      "learning_rate": 0.0005984778636595004,
      "loss": 3.0349,
      "step": 7391
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.5836799144744873,
      "learning_rate": 0.000598477452091268,
      "loss": 3.2864,
      "step": 7392
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6216827630996704,
      "learning_rate": 0.000598477040467543,
      "loss": 3.3364,
      "step": 7393
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6977084875106812,
      "learning_rate": 0.0005984766287883255,
      "loss": 3.3014,
      "step": 7394
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7240954637527466,
      "learning_rate": 0.0005984762170536156,
      "loss": 3.2305,
      "step": 7395
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7989367246627808,
      "learning_rate": 0.0005984758052634132,
      "loss": 3.1371,
      "step": 7396
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.985111951828003,
      "learning_rate": 0.0005984753934177187,
      "loss": 3.2324,
      "step": 7397
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.677994966506958,
      "learning_rate": 0.0005984749815165319,
      "loss": 3.3851,
      "step": 7398
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.076505422592163,
      "learning_rate": 0.0005984745695598529,
      "loss": 3.2303,
      "step": 7399
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6445025205612183,
      "learning_rate": 0.0005984741575476819,
      "loss": 3.4243,
      "step": 7400
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.647817850112915,
      "learning_rate": 0.0005984737454800188,
      "loss": 3.3306,
      "step": 7401
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6236652135849,
      "learning_rate": 0.000598473333356864,
      "loss": 3.3666,
      "step": 7402
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9284528493881226,
      "learning_rate": 0.0005984729211782173,
      "loss": 3.1933,
      "step": 7403
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6086361408233643,
      "learning_rate": 0.0005984725089440788,
      "loss": 3.3383,
      "step": 7404
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6704813241958618,
      "learning_rate": 0.0005984720966544487,
      "loss": 3.1441,
      "step": 7405
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3533943891525269,
      "learning_rate": 0.000598471684309327,
      "loss": 3.3548,
      "step": 7406
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.581825852394104,
      "learning_rate": 0.0005984712719087137,
      "loss": 3.2099,
      "step": 7407
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6229979991912842,
      "learning_rate": 0.000598470859452609,
      "loss": 3.2582,
      "step": 7408
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4868146181106567,
      "learning_rate": 0.000598470446941013,
      "loss": 3.1437,
      "step": 7409
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6865721940994263,
      "learning_rate": 0.0005984700343739256,
      "loss": 3.0514,
      "step": 7410
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7077791690826416,
      "learning_rate": 0.0005984696217513471,
      "loss": 3.1694,
      "step": 7411
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3302531242370605,
      "learning_rate": 0.0005984692090732774,
      "loss": 3.2376,
      "step": 7412
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3606812953948975,
      "learning_rate": 0.0005984687963397166,
      "loss": 3.2702,
      "step": 7413
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6174211502075195,
      "learning_rate": 0.0005984683835506649,
      "loss": 3.451,
      "step": 7414
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9113914966583252,
      "learning_rate": 0.0005984679707061221,
      "loss": 3.3108,
      "step": 7415
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6791375875473022,
      "learning_rate": 0.0005984675578060887,
      "loss": 3.1623,
      "step": 7416
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6558541059494019,
      "learning_rate": 0.0005984671448505645,
      "loss": 3.0615,
      "step": 7417
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7957816123962402,
      "learning_rate": 0.0005984667318395495,
      "loss": 3.3011,
      "step": 7418
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7617361545562744,
      "learning_rate": 0.0005984663187730441,
      "loss": 3.5117,
      "step": 7419
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2620861530303955,
      "learning_rate": 0.000598465905651048,
      "loss": 3.1768,
      "step": 7420
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.682715654373169,
      "learning_rate": 0.0005984654924735616,
      "loss": 3.3663,
      "step": 7421
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5765023231506348,
      "learning_rate": 0.0005984650792405848,
      "loss": 3.4027,
      "step": 7422
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6899482011795044,
      "learning_rate": 0.0005984646659521176,
      "loss": 3.2716,
      "step": 7423
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5977040529251099,
      "learning_rate": 0.0005984642526081602,
      "loss": 3.3292,
      "step": 7424
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5598958730697632,
      "learning_rate": 0.0005984638392087128,
      "loss": 3.3544,
      "step": 7425
    },
    {
      "epoch": 0.1,
      "grad_norm": 4.303112030029297,
      "learning_rate": 0.0005984634257537752,
      "loss": 3.2853,
      "step": 7426
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7972877025604248,
      "learning_rate": 0.0005984630122433476,
      "loss": 3.4098,
      "step": 7427
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8138630390167236,
      "learning_rate": 0.0005984625986774301,
      "loss": 3.3373,
      "step": 7428
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.547573447227478,
      "learning_rate": 0.0005984621850560229,
      "loss": 3.2189,
      "step": 7429
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4866294860839844,
      "learning_rate": 0.0005984617713791259,
      "loss": 3.291,
      "step": 7430
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7016452550888062,
      "learning_rate": 0.0005984613576467392,
      "loss": 3.303,
      "step": 7431
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4815642833709717,
      "learning_rate": 0.0005984609438588629,
      "loss": 3.3317,
      "step": 7432
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9036692380905151,
      "learning_rate": 0.0005984605300154971,
      "loss": 3.2112,
      "step": 7433
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5289227962493896,
      "learning_rate": 0.0005984601161166418,
      "loss": 3.1916,
      "step": 7434
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9809993505477905,
      "learning_rate": 0.0005984597021622971,
      "loss": 3.0908,
      "step": 7435
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7177884578704834,
      "learning_rate": 0.0005984592881524632,
      "loss": 3.2751,
      "step": 7436
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7369318008422852,
      "learning_rate": 0.00059845887408714,
      "loss": 3.1583,
      "step": 7437
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8385411500930786,
      "learning_rate": 0.0005984584599663278,
      "loss": 3.0669,
      "step": 7438
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5400429964065552,
      "learning_rate": 0.0005984580457900265,
      "loss": 3.2352,
      "step": 7439
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5725220441818237,
      "learning_rate": 0.0005984576315582361,
      "loss": 3.4361,
      "step": 7440
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.46488618850708,
      "learning_rate": 0.0005984572172709568,
      "loss": 3.2889,
      "step": 7441
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6134215593338013,
      "learning_rate": 0.0005984568029281887,
      "loss": 3.1354,
      "step": 7442
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.648978590965271,
      "learning_rate": 0.0005984563885299319,
      "loss": 3.2104,
      "step": 7443
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.763791799545288,
      "learning_rate": 0.0005984559740761864,
      "loss": 3.317,
      "step": 7444
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5307525396347046,
      "learning_rate": 0.0005984555595669523,
      "loss": 3.372,
      "step": 7445
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.094912528991699,
      "learning_rate": 0.0005984551450022296,
      "loss": 3.3596,
      "step": 7446
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.863737940788269,
      "learning_rate": 0.0005984547303820185,
      "loss": 3.2878,
      "step": 7447
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.572685956954956,
      "learning_rate": 0.000598454315706319,
      "loss": 3.2684,
      "step": 7448
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7884225845336914,
      "learning_rate": 0.0005984539009751314,
      "loss": 3.1572,
      "step": 7449
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7147692441940308,
      "learning_rate": 0.0005984534861884555,
      "loss": 3.1653,
      "step": 7450
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.51955246925354,
      "learning_rate": 0.0005984530713462914,
      "loss": 3.2587,
      "step": 7451
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8594672679901123,
      "learning_rate": 0.0005984526564486392,
      "loss": 3.1388,
      "step": 7452
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3505882024765015,
      "learning_rate": 0.000598452241495499,
      "loss": 3.391,
      "step": 7453
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5353721380233765,
      "learning_rate": 0.000598451826486871,
      "loss": 3.0795,
      "step": 7454
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8043663501739502,
      "learning_rate": 0.0005984514114227551,
      "loss": 3.1493,
      "step": 7455
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.669707179069519,
      "learning_rate": 0.0005984509963031516,
      "loss": 3.3925,
      "step": 7456
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5171830654144287,
      "learning_rate": 0.0005984505811280602,
      "loss": 3.1091,
      "step": 7457
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.991669774055481,
      "learning_rate": 0.0005984501658974815,
      "loss": 3.3427,
      "step": 7458
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9476995468139648,
      "learning_rate": 0.000598449750611415,
      "loss": 3.1442,
      "step": 7459
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9674381017684937,
      "learning_rate": 0.0005984493352698612,
      "loss": 3.4005,
      "step": 7460
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6556336879730225,
      "learning_rate": 0.00059844891987282,
      "loss": 3.1888,
      "step": 7461
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.505255103111267,
      "learning_rate": 0.0005984485044202915,
      "loss": 3.2678,
      "step": 7462
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.488114356994629,
      "learning_rate": 0.0005984480889122757,
      "loss": 3.1375,
      "step": 7463
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.50434410572052,
      "learning_rate": 0.0005984476733487729,
      "loss": 3.3035,
      "step": 7464
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7229286432266235,
      "learning_rate": 0.000598447257729783,
      "loss": 3.1525,
      "step": 7465
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.571514368057251,
      "learning_rate": 0.0005984468420553061,
      "loss": 3.2652,
      "step": 7466
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6747106313705444,
      "learning_rate": 0.0005984464263253423,
      "loss": 3.2621,
      "step": 7467
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7801419496536255,
      "learning_rate": 0.0005984460105398917,
      "loss": 2.9731,
      "step": 7468
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5475633144378662,
      "learning_rate": 0.0005984455946989543,
      "loss": 3.2388,
      "step": 7469
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5474499464035034,
      "learning_rate": 0.0005984451788025304,
      "loss": 3.1486,
      "step": 7470
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.593790888786316,
      "learning_rate": 0.0005984447628506198,
      "loss": 3.0353,
      "step": 7471
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.112976551055908,
      "learning_rate": 0.0005984443468432226,
      "loss": 3.2311,
      "step": 7472
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5371878147125244,
      "learning_rate": 0.000598443930780339,
      "loss": 3.0789,
      "step": 7473
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6098064184188843,
      "learning_rate": 0.0005984435146619691,
      "loss": 3.3336,
      "step": 7474
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4613866806030273,
      "learning_rate": 0.0005984430984881129,
      "loss": 3.2567,
      "step": 7475
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4947330951690674,
      "learning_rate": 0.0005984426822587704,
      "loss": 3.1705,
      "step": 7476
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8780505657196045,
      "learning_rate": 0.0005984422659739419,
      "loss": 3.2543,
      "step": 7477
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2039947509765625,
      "learning_rate": 0.0005984418496336274,
      "loss": 3.3442,
      "step": 7478
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5409287214279175,
      "learning_rate": 0.0005984414332378268,
      "loss": 3.2881,
      "step": 7479
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6904746294021606,
      "learning_rate": 0.0005984410167865403,
      "loss": 3.4008,
      "step": 7480
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.505197286605835,
      "learning_rate": 0.0005984406002797681,
      "loss": 3.2632,
      "step": 7481
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.551223635673523,
      "learning_rate": 0.00059844018371751,
      "loss": 3.26,
      "step": 7482
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7768926620483398,
      "learning_rate": 0.0005984397670997664,
      "loss": 3.2747,
      "step": 7483
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.564286708831787,
      "learning_rate": 0.000598439350426537,
      "loss": 3.4964,
      "step": 7484
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4178240299224854,
      "learning_rate": 0.0005984389336978223,
      "loss": 3.3586,
      "step": 7485
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2844411134719849,
      "learning_rate": 0.0005984385169136221,
      "loss": 3.2988,
      "step": 7486
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4881583452224731,
      "learning_rate": 0.0005984381000739365,
      "loss": 3.1705,
      "step": 7487
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9518016576766968,
      "learning_rate": 0.0005984376831787657,
      "loss": 3.1489,
      "step": 7488
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.695951223373413,
      "learning_rate": 0.0005984372662281097,
      "loss": 3.1668,
      "step": 7489
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8063281774520874,
      "learning_rate": 0.0005984368492219684,
      "loss": 3.3802,
      "step": 7490
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5005640983581543,
      "learning_rate": 0.0005984364321603423,
      "loss": 3.452,
      "step": 7491
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2683063745498657,
      "learning_rate": 0.0005984360150432311,
      "loss": 3.1415,
      "step": 7492
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4992252588272095,
      "learning_rate": 0.0005984355978706351,
      "loss": 3.1875,
      "step": 7493
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6333461999893188,
      "learning_rate": 0.0005984351806425542,
      "loss": 3.0044,
      "step": 7494
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1255648136138916,
      "learning_rate": 0.0005984347633589886,
      "loss": 3.4184,
      "step": 7495
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.0177724361419678,
      "learning_rate": 0.0005984343460199383,
      "loss": 3.3119,
      "step": 7496
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6893880367279053,
      "learning_rate": 0.0005984339286254035,
      "loss": 3.3062,
      "step": 7497
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.128300428390503,
      "learning_rate": 0.0005984335111753841,
      "loss": 3.1188,
      "step": 7498
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7626718282699585,
      "learning_rate": 0.0005984330936698803,
      "loss": 3.1551,
      "step": 7499
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8822338581085205,
      "learning_rate": 0.0005984326761088923,
      "loss": 3.2942,
      "step": 7500
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1468698978424072,
      "learning_rate": 0.0005984322584924198,
      "loss": 3.0817,
      "step": 7501
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2768630981445312,
      "learning_rate": 0.0005984318408204633,
      "loss": 3.1853,
      "step": 7502
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4904948472976685,
      "learning_rate": 0.0005984314230930227,
      "loss": 3.2034,
      "step": 7503
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8112375736236572,
      "learning_rate": 0.0005984310053100979,
      "loss": 3.3057,
      "step": 7504
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.663954496383667,
      "learning_rate": 0.0005984305874716892,
      "loss": 3.1488,
      "step": 7505
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.860222339630127,
      "learning_rate": 0.0005984301695777966,
      "loss": 3.2715,
      "step": 7506
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.5643815994262695,
      "learning_rate": 0.0005984297516284204,
      "loss": 3.3606,
      "step": 7507
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.5294229984283447,
      "learning_rate": 0.0005984293336235602,
      "loss": 3.227,
      "step": 7508
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.3671722412109375,
      "learning_rate": 0.0005984289155632165,
      "loss": 3.1764,
      "step": 7509
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.2370166778564453,
      "learning_rate": 0.0005984284974473893,
      "loss": 3.1251,
      "step": 7510
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.287947654724121,
      "learning_rate": 0.0005984280792760785,
      "loss": 3.2956,
      "step": 7511
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.47486412525177,
      "learning_rate": 0.0005984276610492843,
      "loss": 3.2612,
      "step": 7512
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.211883068084717,
      "learning_rate": 0.0005984272427670068,
      "loss": 2.8981,
      "step": 7513
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.60605788230896,
      "learning_rate": 0.000598426824429246,
      "loss": 3.2967,
      "step": 7514
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7266358137130737,
      "learning_rate": 0.000598426406036002,
      "loss": 3.4573,
      "step": 7515
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6765766143798828,
      "learning_rate": 0.000598425987587275,
      "loss": 3.0893,
      "step": 7516
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7854976654052734,
      "learning_rate": 0.0005984255690830648,
      "loss": 3.1747,
      "step": 7517
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7598748207092285,
      "learning_rate": 0.0005984251505233717,
      "loss": 3.4788,
      "step": 7518
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7057960033416748,
      "learning_rate": 0.0005984247319081959,
      "loss": 3.2894,
      "step": 7519
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.534685492515564,
      "learning_rate": 0.0005984243132375372,
      "loss": 3.4183,
      "step": 7520
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8699702024459839,
      "learning_rate": 0.0005984238945113958,
      "loss": 3.14,
      "step": 7521
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.318479061126709,
      "learning_rate": 0.0005984234757297717,
      "loss": 3.4098,
      "step": 7522
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3737789392471313,
      "learning_rate": 0.0005984230568926651,
      "loss": 3.0719,
      "step": 7523
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7897424697875977,
      "learning_rate": 0.000598422638000076,
      "loss": 3.0569,
      "step": 7524
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5552542209625244,
      "learning_rate": 0.0005984222190520045,
      "loss": 3.1088,
      "step": 7525
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3890974521636963,
      "learning_rate": 0.0005984218000484508,
      "loss": 3.3568,
      "step": 7526
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9506738185882568,
      "learning_rate": 0.0005984213809894146,
      "loss": 3.3505,
      "step": 7527
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9675657749176025,
      "learning_rate": 0.0005984209618748965,
      "loss": 3.3396,
      "step": 7528
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5929731130599976,
      "learning_rate": 0.0005984205427048961,
      "loss": 3.2049,
      "step": 7529
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8486393690109253,
      "learning_rate": 0.0005984201234794138,
      "loss": 3.2111,
      "step": 7530
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8308440446853638,
      "learning_rate": 0.0005984197041984495,
      "loss": 3.1559,
      "step": 7531
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9716295003890991,
      "learning_rate": 0.0005984192848620034,
      "loss": 3.2517,
      "step": 7532
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.877832293510437,
      "learning_rate": 0.0005984188654700756,
      "loss": 3.4021,
      "step": 7533
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4699609279632568,
      "learning_rate": 0.0005984184460226659,
      "loss": 3.1672,
      "step": 7534
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6938515901565552,
      "learning_rate": 0.0005984180265197747,
      "loss": 3.1496,
      "step": 7535
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5583552122116089,
      "learning_rate": 0.000598417606961402,
      "loss": 3.1515,
      "step": 7536
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6264742612838745,
      "learning_rate": 0.0005984171873475477,
      "loss": 3.1826,
      "step": 7537
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6813119649887085,
      "learning_rate": 0.0005984167676782122,
      "loss": 3.0668,
      "step": 7538
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8176535367965698,
      "learning_rate": 0.0005984163479533952,
      "loss": 3.0042,
      "step": 7539
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7945688962936401,
      "learning_rate": 0.000598415928173097,
      "loss": 3.2543,
      "step": 7540
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6913636922836304,
      "learning_rate": 0.0005984155083373177,
      "loss": 3.4943,
      "step": 7541
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5404229164123535,
      "learning_rate": 0.0005984150884460574,
      "loss": 3.1846,
      "step": 7542
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3309475183486938,
      "learning_rate": 0.000598414668499316,
      "loss": 3.1787,
      "step": 7543
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4860221147537231,
      "learning_rate": 0.0005984142484970937,
      "loss": 2.8189,
      "step": 7544
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7226972579956055,
      "learning_rate": 0.0005984138284393904,
      "loss": 3.2156,
      "step": 7545
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9139113426208496,
      "learning_rate": 0.0005984134083262066,
      "loss": 3.1445,
      "step": 7546
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.426166296005249,
      "learning_rate": 0.000598412988157542,
      "loss": 3.4592,
      "step": 7547
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7203211784362793,
      "learning_rate": 0.0005984125679333968,
      "loss": 3.2585,
      "step": 7548
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5464447736740112,
      "learning_rate": 0.000598412147653771,
      "loss": 3.3027,
      "step": 7549
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.392406940460205,
      "learning_rate": 0.0005984117273186648,
      "loss": 3.0673,
      "step": 7550
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.850028395652771,
      "learning_rate": 0.0005984113069280783,
      "loss": 3.1651,
      "step": 7551
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.147493839263916,
      "learning_rate": 0.0005984108864820115,
      "loss": 3.0999,
      "step": 7552
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4716897010803223,
      "learning_rate": 0.0005984104659804644,
      "loss": 3.2937,
      "step": 7553
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2180871963500977,
      "learning_rate": 0.0005984100454234372,
      "loss": 3.196,
      "step": 7554
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.572412371635437,
      "learning_rate": 0.0005984096248109299,
      "loss": 3.1636,
      "step": 7555
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.912428617477417,
      "learning_rate": 0.0005984092041429426,
      "loss": 2.9954,
      "step": 7556
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.047977924346924,
      "learning_rate": 0.0005984087834194755,
      "loss": 3.3153,
      "step": 7557
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5615687370300293,
      "learning_rate": 0.0005984083626405285,
      "loss": 3.0986,
      "step": 7558
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.3289389610290527,
      "learning_rate": 0.0005984079418061018,
      "loss": 3.286,
      "step": 7559
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7628440856933594,
      "learning_rate": 0.0005984075209161954,
      "loss": 2.9386,
      "step": 7560
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7421973943710327,
      "learning_rate": 0.0005984070999708094,
      "loss": 2.91,
      "step": 7561
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.76430082321167,
      "learning_rate": 0.0005984066789699439,
      "loss": 3.3195,
      "step": 7562
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.49360990524292,
      "learning_rate": 0.000598406257913599,
      "loss": 3.2886,
      "step": 7563
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4511914253234863,
      "learning_rate": 0.0005984058368017747,
      "loss": 3.2281,
      "step": 7564
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.3200395107269287,
      "learning_rate": 0.0005984054156344712,
      "loss": 3.0553,
      "step": 7565
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7132518291473389,
      "learning_rate": 0.0005984049944116884,
      "loss": 3.5121,
      "step": 7566
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8542386293411255,
      "learning_rate": 0.0005984045731334264,
      "loss": 3.3541,
      "step": 7567
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8694759607315063,
      "learning_rate": 0.0005984041517996855,
      "loss": 3.5009,
      "step": 7568
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.493582010269165,
      "learning_rate": 0.0005984037304104656,
      "loss": 3.0748,
      "step": 7569
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.5357093811035156,
      "learning_rate": 0.0005984033089657669,
      "loss": 3.2824,
      "step": 7570
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8327056169509888,
      "learning_rate": 0.0005984028874655892,
      "loss": 3.1816,
      "step": 7571
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3872884511947632,
      "learning_rate": 0.0005984024659099329,
      "loss": 3.1762,
      "step": 7572
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.899167537689209,
      "learning_rate": 0.000598402044298798,
      "loss": 3.1901,
      "step": 7573
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.0962698459625244,
      "learning_rate": 0.0005984016226321844,
      "loss": 3.3344,
      "step": 7574
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5768356323242188,
      "learning_rate": 0.0005984012009100923,
      "loss": 3.2478,
      "step": 7575
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.396365165710449,
      "learning_rate": 0.000598400779132522,
      "loss": 3.2104,
      "step": 7576
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4072985649108887,
      "learning_rate": 0.000598400357299473,
      "loss": 3.2054,
      "step": 7577
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6315650939941406,
      "learning_rate": 0.000598399935410946,
      "loss": 3.2763,
      "step": 7578
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9157856702804565,
      "learning_rate": 0.0005983995134669408,
      "loss": 3.0887,
      "step": 7579
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4916017055511475,
      "learning_rate": 0.0005983990914674574,
      "loss": 3.3091,
      "step": 7580
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.756575345993042,
      "learning_rate": 0.0005983986694124961,
      "loss": 3.2171,
      "step": 7581
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6919931173324585,
      "learning_rate": 0.0005983982473020569,
      "loss": 3.1578,
      "step": 7582
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9587429761886597,
      "learning_rate": 0.0005983978251361396,
      "loss": 3.3216,
      "step": 7583
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.55849289894104,
      "learning_rate": 0.0005983974029147447,
      "loss": 3.1068,
      "step": 7584
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6223163604736328,
      "learning_rate": 0.0005983969806378721,
      "loss": 3.3091,
      "step": 7585
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.6989614963531494,
      "learning_rate": 0.0005983965583055217,
      "loss": 3.1382,
      "step": 7586
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6179674863815308,
      "learning_rate": 0.0005983961359176938,
      "loss": 3.1345,
      "step": 7587
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5941286087036133,
      "learning_rate": 0.0005983957134743886,
      "loss": 2.9846,
      "step": 7588
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6733990907669067,
      "learning_rate": 0.0005983952909756059,
      "loss": 3.1839,
      "step": 7589
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7398128509521484,
      "learning_rate": 0.0005983948684213458,
      "loss": 3.0778,
      "step": 7590
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9084382057189941,
      "learning_rate": 0.0005983944458116085,
      "loss": 3.1799,
      "step": 7591
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4908900260925293,
      "learning_rate": 0.0005983940231463941,
      "loss": 3.1531,
      "step": 7592
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7181527614593506,
      "learning_rate": 0.0005983936004257026,
      "loss": 3.1847,
      "step": 7593
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7685890197753906,
      "learning_rate": 0.000598393177649534,
      "loss": 3.2705,
      "step": 7594
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.952760934829712,
      "learning_rate": 0.0005983927548178885,
      "loss": 2.9444,
      "step": 7595
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.087061882019043,
      "learning_rate": 0.0005983923319307663,
      "loss": 3.2433,
      "step": 7596
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.652862310409546,
      "learning_rate": 0.0005983919089881672,
      "loss": 3.4829,
      "step": 7597
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.45672607421875,
      "learning_rate": 0.0005983914859900915,
      "loss": 3.4192,
      "step": 7598
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.246115207672119,
      "learning_rate": 0.0005983910629365391,
      "loss": 2.9366,
      "step": 7599
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7040748596191406,
      "learning_rate": 0.0005983906398275103,
      "loss": 3.3659,
      "step": 7600
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5655927658081055,
      "learning_rate": 0.0005983902166630048,
      "loss": 2.9805,
      "step": 7601
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.0873045921325684,
      "learning_rate": 0.0005983897934430231,
      "loss": 3.133,
      "step": 7602
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2482261657714844,
      "learning_rate": 0.0005983893701675652,
      "loss": 3.244,
      "step": 7603
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.541793704032898,
      "learning_rate": 0.0005983889468366308,
      "loss": 3.1897,
      "step": 7604
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.8264238834381104,
      "learning_rate": 0.0005983885234502205,
      "loss": 3.3819,
      "step": 7605
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.7343575954437256,
      "learning_rate": 0.000598388100008334,
      "loss": 3.0604,
      "step": 7606
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5732287168502808,
      "learning_rate": 0.0005983876765109717,
      "loss": 3.4176,
      "step": 7607
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.5965735912323,
      "learning_rate": 0.0005983872529581334,
      "loss": 3.2945,
      "step": 7608
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.8939311504364014,
      "learning_rate": 0.0005983868293498193,
      "loss": 3.2096,
      "step": 7609
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.091610908508301,
      "learning_rate": 0.0005983864056860294,
      "loss": 3.1062,
      "step": 7610
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.90109384059906,
      "learning_rate": 0.0005983859819667638,
      "loss": 3.0773,
      "step": 7611
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.642565369606018,
      "learning_rate": 0.0005983855581920227,
      "loss": 3.6461,
      "step": 7612
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.081144094467163,
      "learning_rate": 0.0005983851343618061,
      "loss": 3.2534,
      "step": 7613
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9911136627197266,
      "learning_rate": 0.000598384710476114,
      "loss": 3.2787,
      "step": 7614
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.291447401046753,
      "learning_rate": 0.0005983842865349466,
      "loss": 3.037,
      "step": 7615
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4261205196380615,
      "learning_rate": 0.0005983838625383039,
      "loss": 3.0845,
      "step": 7616
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.5131490230560303,
      "learning_rate": 0.000598383438486186,
      "loss": 2.9383,
      "step": 7617
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3771471977233887,
      "learning_rate": 0.000598383014378593,
      "loss": 3.1097,
      "step": 7618
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.190978527069092,
      "learning_rate": 0.000598382590215525,
      "loss": 3.1516,
      "step": 7619
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.681574821472168,
      "learning_rate": 0.0005983821659969821,
      "loss": 3.1909,
      "step": 7620
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4966329336166382,
      "learning_rate": 0.0005983817417229641,
      "loss": 3.2773,
      "step": 7621
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6187888383865356,
      "learning_rate": 0.0005983813173934715,
      "loss": 3.1996,
      "step": 7622
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3581953048706055,
      "learning_rate": 0.0005983808930085041,
      "loss": 3.2198,
      "step": 7623
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5495562553405762,
      "learning_rate": 0.0005983804685680621,
      "loss": 3.2368,
      "step": 7624
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3125474452972412,
      "learning_rate": 0.0005983800440721455,
      "loss": 3.3962,
      "step": 7625
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8486533164978027,
      "learning_rate": 0.0005983796195207545,
      "loss": 3.2193,
      "step": 7626
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.867370367050171,
      "learning_rate": 0.000598379194913889,
      "loss": 3.4133,
      "step": 7627
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7620705366134644,
      "learning_rate": 0.0005983787702515492,
      "loss": 3.2111,
      "step": 7628
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7169256210327148,
      "learning_rate": 0.0005983783455337352,
      "loss": 3.1805,
      "step": 7629
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1347291469573975,
      "learning_rate": 0.000598377920760447,
      "loss": 3.148,
      "step": 7630
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.818314552307129,
      "learning_rate": 0.0005983774959316847,
      "loss": 3.0966,
      "step": 7631
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1401188373565674,
      "learning_rate": 0.0005983770710474484,
      "loss": 3.2462,
      "step": 7632
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.356796979904175,
      "learning_rate": 0.0005983766461077382,
      "loss": 3.0917,
      "step": 7633
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.416937232017517,
      "learning_rate": 0.0005983762211125541,
      "loss": 3.2838,
      "step": 7634
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7334669828414917,
      "learning_rate": 0.0005983757960618963,
      "loss": 3.1175,
      "step": 7635
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.236754894256592,
      "learning_rate": 0.0005983753709557648,
      "loss": 3.0253,
      "step": 7636
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.3339924812316895,
      "learning_rate": 0.0005983749457941595,
      "loss": 3.2061,
      "step": 7637
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6868916749954224,
      "learning_rate": 0.0005983745205770808,
      "loss": 3.2213,
      "step": 7638
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.6862449645996094,
      "learning_rate": 0.0005983740953045287,
      "loss": 3.2665,
      "step": 7639
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4164044857025146,
      "learning_rate": 0.0005983736699765031,
      "loss": 3.2275,
      "step": 7640
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.636905312538147,
      "learning_rate": 0.0005983732445930043,
      "loss": 3.4005,
      "step": 7641
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9052315950393677,
      "learning_rate": 0.0005983728191540323,
      "loss": 3.1034,
      "step": 7642
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.809748411178589,
      "learning_rate": 0.0005983723936595871,
      "loss": 3.4347,
      "step": 7643
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6775033473968506,
      "learning_rate": 0.0005983719681096689,
      "loss": 3.0358,
      "step": 7644
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.444741129875183,
      "learning_rate": 0.0005983715425042777,
      "loss": 3.3155,
      "step": 7645
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.4472718238830566,
      "learning_rate": 0.0005983711168434135,
      "loss": 3.1228,
      "step": 7646
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.338304042816162,
      "learning_rate": 0.0005983706911270766,
      "loss": 3.5625,
      "step": 7647
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6950457096099854,
      "learning_rate": 0.0005983702653552669,
      "loss": 3.1176,
      "step": 7648
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9932880401611328,
      "learning_rate": 0.0005983698395279845,
      "loss": 3.1619,
      "step": 7649
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4431512355804443,
      "learning_rate": 0.0005983694136452296,
      "loss": 3.2435,
      "step": 7650
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3208532333374023,
      "learning_rate": 0.0005983689877070022,
      "loss": 3.2161,
      "step": 7651
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3992246389389038,
      "learning_rate": 0.0005983685617133023,
      "loss": 3.1675,
      "step": 7652
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.1936795711517334,
      "learning_rate": 0.0005983681356641302,
      "loss": 3.1459,
      "step": 7653
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.124415159225464,
      "learning_rate": 0.0005983677095594856,
      "loss": 3.0555,
      "step": 7654
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.688660740852356,
      "learning_rate": 0.0005983672833993689,
      "loss": 3.0384,
      "step": 7655
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2280380725860596,
      "learning_rate": 0.0005983668571837801,
      "loss": 3.3659,
      "step": 7656
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.0671756267547607,
      "learning_rate": 0.0005983664309127193,
      "loss": 3.1807,
      "step": 7657
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4784232378005981,
      "learning_rate": 0.0005983660045861865,
      "loss": 3.4379,
      "step": 7658
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.3710598945617676,
      "learning_rate": 0.0005983655782041819,
      "loss": 3.3342,
      "step": 7659
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.3223373889923096,
      "learning_rate": 0.0005983651517667055,
      "loss": 3.2155,
      "step": 7660
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6997987031936646,
      "learning_rate": 0.0005983647252737574,
      "loss": 3.3632,
      "step": 7661
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1343472003936768,
      "learning_rate": 0.0005983642987253375,
      "loss": 3.2231,
      "step": 7662
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.1195127964019775,
      "learning_rate": 0.0005983638721214461,
      "loss": 3.3758,
      "step": 7663
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6206616163253784,
      "learning_rate": 0.0005983634454620833,
      "loss": 3.0973,
      "step": 7664
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6863582134246826,
      "learning_rate": 0.0005983630187472492,
      "loss": 3.2853,
      "step": 7665
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1197166442871094,
      "learning_rate": 0.0005983625919769436,
      "loss": 3.1532,
      "step": 7666
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1646769046783447,
      "learning_rate": 0.0005983621651511669,
      "loss": 3.3679,
      "step": 7667
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.615235447883606,
      "learning_rate": 0.0005983617382699189,
      "loss": 3.3748,
      "step": 7668
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.134803533554077,
      "learning_rate": 0.0005983613113331999,
      "loss": 3.3514,
      "step": 7669
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.7957210540771484,
      "learning_rate": 0.0005983608843410099,
      "loss": 3.3862,
      "step": 7670
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.005641460418701,
      "learning_rate": 0.0005983604572933491,
      "loss": 2.9333,
      "step": 7671
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6393983364105225,
      "learning_rate": 0.0005983600301902172,
      "loss": 3.1076,
      "step": 7672
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.559230327606201,
      "learning_rate": 0.0005983596030316147,
      "loss": 3.1678,
      "step": 7673
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4847396612167358,
      "learning_rate": 0.0005983591758175415,
      "loss": 3.3634,
      "step": 7674
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2791625261306763,
      "learning_rate": 0.0005983587485479977,
      "loss": 3.1902,
      "step": 7675
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3559846878051758,
      "learning_rate": 0.0005983583212229834,
      "loss": 3.3679,
      "step": 7676
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9607696533203125,
      "learning_rate": 0.0005983578938424986,
      "loss": 3.3957,
      "step": 7677
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6101757287979126,
      "learning_rate": 0.0005983574664065435,
      "loss": 3.3778,
      "step": 7678
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.854044795036316,
      "learning_rate": 0.0005983570389151181,
      "loss": 3.1736,
      "step": 7679
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5477174520492554,
      "learning_rate": 0.0005983566113682224,
      "loss": 3.2087,
      "step": 7680
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4739328622817993,
      "learning_rate": 0.0005983561837658567,
      "loss": 3.1605,
      "step": 7681
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7172449827194214,
      "learning_rate": 0.0005983557561080208,
      "loss": 3.2653,
      "step": 7682
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.62131929397583,
      "learning_rate": 0.000598355328394715,
      "loss": 3.0429,
      "step": 7683
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6638381481170654,
      "learning_rate": 0.0005983549006259393,
      "loss": 3.4001,
      "step": 7684
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7914137840270996,
      "learning_rate": 0.0005983544728016939,
      "loss": 3.1604,
      "step": 7685
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.706065058708191,
      "learning_rate": 0.0005983540449219787,
      "loss": 3.2835,
      "step": 7686
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.697136640548706,
      "learning_rate": 0.0005983536169867938,
      "loss": 3.3778,
      "step": 7687
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7006133794784546,
      "learning_rate": 0.0005983531889961395,
      "loss": 3.2508,
      "step": 7688
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5565153360366821,
      "learning_rate": 0.0005983527609500156,
      "loss": 3.2885,
      "step": 7689
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.984686255455017,
      "learning_rate": 0.0005983523328484222,
      "loss": 3.2902,
      "step": 7690
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8429944515228271,
      "learning_rate": 0.0005983519046913596,
      "loss": 3.4843,
      "step": 7691
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9779309034347534,
      "learning_rate": 0.0005983514764788276,
      "loss": 3.3083,
      "step": 7692
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1036977767944336,
      "learning_rate": 0.0005983510482108266,
      "loss": 3.0645,
      "step": 7693
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5537924766540527,
      "learning_rate": 0.0005983506198873565,
      "loss": 3.0767,
      "step": 7694
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.811547040939331,
      "learning_rate": 0.0005983501915084173,
      "loss": 3.2293,
      "step": 7695
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9993622303009033,
      "learning_rate": 0.0005983497630740091,
      "loss": 3.0791,
      "step": 7696
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4493615627288818,
      "learning_rate": 0.0005983493345841322,
      "loss": 3.502,
      "step": 7697
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9400997161865234,
      "learning_rate": 0.0005983489060387865,
      "loss": 3.2442,
      "step": 7698
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9583009481430054,
      "learning_rate": 0.000598348477437972,
      "loss": 3.1569,
      "step": 7699
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3667511940002441,
      "learning_rate": 0.000598348048781689,
      "loss": 3.5668,
      "step": 7700
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8306330442428589,
      "learning_rate": 0.0005983476200699374,
      "loss": 3.0247,
      "step": 7701
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6756794452667236,
      "learning_rate": 0.0005983471913027174,
      "loss": 3.0978,
      "step": 7702
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5384941101074219,
      "learning_rate": 0.000598346762480029,
      "loss": 3.2816,
      "step": 7703
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6438485383987427,
      "learning_rate": 0.0005983463336018723,
      "loss": 3.3249,
      "step": 7704
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5995469093322754,
      "learning_rate": 0.0005983459046682475,
      "loss": 3.2196,
      "step": 7705
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7204031944274902,
      "learning_rate": 0.0005983454756791544,
      "loss": 3.3082,
      "step": 7706
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9365488290786743,
      "learning_rate": 0.0005983450466345932,
      "loss": 2.9867,
      "step": 7707
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.3852691650390625,
      "learning_rate": 0.0005983446175345642,
      "loss": 3.1016,
      "step": 7708
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4001681804656982,
      "learning_rate": 0.0005983441883790672,
      "loss": 3.226,
      "step": 7709
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.549088716506958,
      "learning_rate": 0.0005983437591681024,
      "loss": 3.2228,
      "step": 7710
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.475507140159607,
      "learning_rate": 0.0005983433299016699,
      "loss": 3.293,
      "step": 7711
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.0799319744110107,
      "learning_rate": 0.0005983429005797697,
      "loss": 3.2107,
      "step": 7712
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.601696252822876,
      "learning_rate": 0.0005983424712024019,
      "loss": 3.2348,
      "step": 7713
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5587594509124756,
      "learning_rate": 0.0005983420417695667,
      "loss": 3.1575,
      "step": 7714
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3909615278244019,
      "learning_rate": 0.0005983416122812641,
      "loss": 3.0951,
      "step": 7715
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6332850456237793,
      "learning_rate": 0.000598341182737494,
      "loss": 3.2584,
      "step": 7716
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6425669193267822,
      "learning_rate": 0.0005983407531382568,
      "loss": 3.15,
      "step": 7717
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8384071588516235,
      "learning_rate": 0.0005983403234835524,
      "loss": 3.2836,
      "step": 7718
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9076802730560303,
      "learning_rate": 0.0005983398937733808,
      "loss": 3.0485,
      "step": 7719
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.942588448524475,
      "learning_rate": 0.0005983394640077422,
      "loss": 3.1708,
      "step": 7720
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.147541046142578,
      "learning_rate": 0.0005983390341866367,
      "loss": 3.3694,
      "step": 7721
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.599912166595459,
      "learning_rate": 0.0005983386043100644,
      "loss": 3.2189,
      "step": 7722
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.698805332183838,
      "learning_rate": 0.0005983381743780252,
      "loss": 3.0403,
      "step": 7723
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.584649920463562,
      "learning_rate": 0.0005983377443905194,
      "loss": 3.3759,
      "step": 7724
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1181259155273438,
      "learning_rate": 0.0005983373143475469,
      "loss": 3.025,
      "step": 7725
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9271117448806763,
      "learning_rate": 0.000598336884249108,
      "loss": 3.2536,
      "step": 7726
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9207544326782227,
      "learning_rate": 0.0005983364540952025,
      "loss": 3.1256,
      "step": 7727
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8413283824920654,
      "learning_rate": 0.0005983360238858307,
      "loss": 3.1081,
      "step": 7728
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.922386646270752,
      "learning_rate": 0.0005983355936209926,
      "loss": 3.1996,
      "step": 7729
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5750380754470825,
      "learning_rate": 0.0005983351633006882,
      "loss": 3.1417,
      "step": 7730
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.791895866394043,
      "learning_rate": 0.0005983347329249178,
      "loss": 3.148,
      "step": 7731
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.266664743423462,
      "learning_rate": 0.0005983343024936812,
      "loss": 3.196,
      "step": 7732
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6641162633895874,
      "learning_rate": 0.0005983338720069787,
      "loss": 3.4602,
      "step": 7733
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7957655191421509,
      "learning_rate": 0.0005983334414648103,
      "loss": 3.3645,
      "step": 7734
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5562691688537598,
      "learning_rate": 0.000598333010867176,
      "loss": 2.9958,
      "step": 7735
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7005434036254883,
      "learning_rate": 0.000598332580214076,
      "loss": 2.9941,
      "step": 7736
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7402530908584595,
      "learning_rate": 0.0005983321495055104,
      "loss": 2.9855,
      "step": 7737
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4804940223693848,
      "learning_rate": 0.0005983317187414792,
      "loss": 3.0945,
      "step": 7738
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5327515602111816,
      "learning_rate": 0.0005983312879219825,
      "loss": 3.0742,
      "step": 7739
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3818073272705078,
      "learning_rate": 0.0005983308570470204,
      "loss": 3.0695,
      "step": 7740
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.814405918121338,
      "learning_rate": 0.0005983304261165929,
      "loss": 3.4943,
      "step": 7741
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5181050300598145,
      "learning_rate": 0.0005983299951307002,
      "loss": 3.0667,
      "step": 7742
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.8112666606903076,
      "learning_rate": 0.0005983295640893423,
      "loss": 3.1059,
      "step": 7743
    },
    {
      "epoch": 0.1,
      "grad_norm": 4.173537254333496,
      "learning_rate": 0.0005983291329925193,
      "loss": 3.1352,
      "step": 7744
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7473331689834595,
      "learning_rate": 0.0005983287018402313,
      "loss": 3.3534,
      "step": 7745
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8165980577468872,
      "learning_rate": 0.0005983282706324783,
      "loss": 3.1699,
      "step": 7746
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.5184366703033447,
      "learning_rate": 0.0005983278393692606,
      "loss": 3.1131,
      "step": 7747
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4273736476898193,
      "learning_rate": 0.0005983274080505779,
      "loss": 3.2277,
      "step": 7748
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5394861698150635,
      "learning_rate": 0.0005983269766764306,
      "loss": 3.3671,
      "step": 7749
    },
    {
      "epoch": 0.1,
      "grad_norm": 4.621050834655762,
      "learning_rate": 0.0005983265452468188,
      "loss": 3.1906,
      "step": 7750
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.086548328399658,
      "learning_rate": 0.0005983261137617424,
      "loss": 2.9696,
      "step": 7751
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7492831945419312,
      "learning_rate": 0.0005983256822212014,
      "loss": 3.0337,
      "step": 7752
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.99054753780365,
      "learning_rate": 0.0005983252506251962,
      "loss": 3.1329,
      "step": 7753
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5858956575393677,
      "learning_rate": 0.0005983248189737266,
      "loss": 3.1987,
      "step": 7754
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8490945100784302,
      "learning_rate": 0.0005983243872667928,
      "loss": 3.1601,
      "step": 7755
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5302656888961792,
      "learning_rate": 0.000598323955504395,
      "loss": 3.3306,
      "step": 7756
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.3860838413238525,
      "learning_rate": 0.0005983235236865329,
      "loss": 3.3198,
      "step": 7757
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.079181432723999,
      "learning_rate": 0.0005983230918132071,
      "loss": 3.2826,
      "step": 7758
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5436666011810303,
      "learning_rate": 0.0005983226598844172,
      "loss": 3.2769,
      "step": 7759
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8199453353881836,
      "learning_rate": 0.0005983222279001636,
      "loss": 3.2299,
      "step": 7760
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6452223062515259,
      "learning_rate": 0.0005983217958604462,
      "loss": 2.9463,
      "step": 7761
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5609252452850342,
      "learning_rate": 0.0005983213637652652,
      "loss": 3.349,
      "step": 7762
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.0998733043670654,
      "learning_rate": 0.0005983209316146205,
      "loss": 3.2032,
      "step": 7763
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.188382625579834,
      "learning_rate": 0.0005983204994085124,
      "loss": 3.0288,
      "step": 7764
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5779271125793457,
      "learning_rate": 0.000598320067146941,
      "loss": 3.144,
      "step": 7765
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6997534036636353,
      "learning_rate": 0.0005983196348299061,
      "loss": 3.4055,
      "step": 7766
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4377084970474243,
      "learning_rate": 0.000598319202457408,
      "loss": 3.3735,
      "step": 7767
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4066516160964966,
      "learning_rate": 0.0005983187700294467,
      "loss": 3.085,
      "step": 7768
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6921073198318481,
      "learning_rate": 0.0005983183375460223,
      "loss": 3.3842,
      "step": 7769
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7269004583358765,
      "learning_rate": 0.000598317905007135,
      "loss": 3.3621,
      "step": 7770
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8969603776931763,
      "learning_rate": 0.0005983174724127847,
      "loss": 3.2682,
      "step": 7771
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4814741611480713,
      "learning_rate": 0.0005983170397629716,
      "loss": 3.4528,
      "step": 7772
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.134178400039673,
      "learning_rate": 0.0005983166070576957,
      "loss": 3.2866,
      "step": 7773
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6781883239746094,
      "learning_rate": 0.0005983161742969571,
      "loss": 3.4502,
      "step": 7774
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.943671464920044,
      "learning_rate": 0.0005983157414807558,
      "loss": 3.0869,
      "step": 7775
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4356105327606201,
      "learning_rate": 0.0005983153086090921,
      "loss": 3.3225,
      "step": 7776
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.045722007751465,
      "learning_rate": 0.0005983148756819659,
      "loss": 3.2718,
      "step": 7777
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.842983603477478,
      "learning_rate": 0.0005983144426993773,
      "loss": 2.95,
      "step": 7778
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6326614618301392,
      "learning_rate": 0.0005983140096613265,
      "loss": 3.2541,
      "step": 7779
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3401621580123901,
      "learning_rate": 0.0005983135765678134,
      "loss": 3.4387,
      "step": 7780
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4920247793197632,
      "learning_rate": 0.0005983131434188384,
      "loss": 3.2345,
      "step": 7781
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7844500541687012,
      "learning_rate": 0.0005983127102144011,
      "loss": 3.1649,
      "step": 7782
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6145259141921997,
      "learning_rate": 0.0005983122769545019,
      "loss": 3.2795,
      "step": 7783
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.5180110931396484,
      "learning_rate": 0.0005983118436391409,
      "loss": 3.3175,
      "step": 7784
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6250078678131104,
      "learning_rate": 0.000598311410268318,
      "loss": 3.1314,
      "step": 7785
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5616506338119507,
      "learning_rate": 0.0005983109768420335,
      "loss": 3.327,
      "step": 7786
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3679091930389404,
      "learning_rate": 0.0005983105433602872,
      "loss": 3.138,
      "step": 7787
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5565869808197021,
      "learning_rate": 0.0005983101098230795,
      "loss": 3.1842,
      "step": 7788
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7389602661132812,
      "learning_rate": 0.0005983096762304101,
      "loss": 3.2502,
      "step": 7789
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.644700050354004,
      "learning_rate": 0.0005983092425822796,
      "loss": 3.3921,
      "step": 7790
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4119126796722412,
      "learning_rate": 0.0005983088088786875,
      "loss": 3.2428,
      "step": 7791
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7316128015518188,
      "learning_rate": 0.0005983083751196342,
      "loss": 3.2131,
      "step": 7792
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4191218614578247,
      "learning_rate": 0.0005983079413051199,
      "loss": 3.0043,
      "step": 7793
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5383092164993286,
      "learning_rate": 0.0005983075074351444,
      "loss": 3.0723,
      "step": 7794
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7084953784942627,
      "learning_rate": 0.0005983070735097079,
      "loss": 3.1193,
      "step": 7795
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7133703231811523,
      "learning_rate": 0.0005983066395288106,
      "loss": 3.1926,
      "step": 7796
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3813918828964233,
      "learning_rate": 0.0005983062054924524,
      "loss": 3.427,
      "step": 7797
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6221303939819336,
      "learning_rate": 0.0005983057714006334,
      "loss": 3.332,
      "step": 7798
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8727245330810547,
      "learning_rate": 0.0005983053372533539,
      "loss": 3.2097,
      "step": 7799
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2083404064178467,
      "learning_rate": 0.0005983049030506136,
      "loss": 3.2389,
      "step": 7800
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4447944164276123,
      "learning_rate": 0.0005983044687924128,
      "loss": 3.255,
      "step": 7801
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.173546552658081,
      "learning_rate": 0.0005983040344787516,
      "loss": 3.2647,
      "step": 7802
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6518360376358032,
      "learning_rate": 0.0005983036001096302,
      "loss": 3.1729,
      "step": 7803
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.3858838081359863,
      "learning_rate": 0.0005983031656850484,
      "loss": 3.2754,
      "step": 7804
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4889018535614014,
      "learning_rate": 0.0005983027312050064,
      "loss": 3.1358,
      "step": 7805
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5510518550872803,
      "learning_rate": 0.0005983022966695043,
      "loss": 2.8789,
      "step": 7806
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4958668947219849,
      "learning_rate": 0.0005983018620785422,
      "loss": 3.2956,
      "step": 7807
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.035794973373413,
      "learning_rate": 0.0005983014274321201,
      "loss": 3.4034,
      "step": 7808
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.7269952297210693,
      "learning_rate": 0.0005983009927302381,
      "loss": 3.2246,
      "step": 7809
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9869731664657593,
      "learning_rate": 0.0005983005579728965,
      "loss": 3.2207,
      "step": 7810
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.051985263824463,
      "learning_rate": 0.000598300123160095,
      "loss": 3.3468,
      "step": 7811
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9309555292129517,
      "learning_rate": 0.000598299688291834,
      "loss": 3.3197,
      "step": 7812
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.501978874206543,
      "learning_rate": 0.0005982992533681133,
      "loss": 3.6155,
      "step": 7813
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.150240421295166,
      "learning_rate": 0.0005982988183889333,
      "loss": 3.1819,
      "step": 7814
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7879834175109863,
      "learning_rate": 0.0005982983833542939,
      "loss": 3.0803,
      "step": 7815
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8200623989105225,
      "learning_rate": 0.0005982979482641952,
      "loss": 3.0411,
      "step": 7816
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.131284236907959,
      "learning_rate": 0.0005982975131186371,
      "loss": 3.2404,
      "step": 7817
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2780462503433228,
      "learning_rate": 0.0005982970779176201,
      "loss": 3.2315,
      "step": 7818
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3763341903686523,
      "learning_rate": 0.0005982966426611439,
      "loss": 2.8886,
      "step": 7819
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3281246423721313,
      "learning_rate": 0.0005982962073492089,
      "loss": 3.2364,
      "step": 7820
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4180235862731934,
      "learning_rate": 0.0005982957719818148,
      "loss": 3.0421,
      "step": 7821
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.852939248085022,
      "learning_rate": 0.000598295336558962,
      "loss": 3.1644,
      "step": 7822
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3367395401000977,
      "learning_rate": 0.0005982949010806504,
      "loss": 3.2349,
      "step": 7823
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.0082075595855713,
      "learning_rate": 0.0005982944655468802,
      "loss": 3.1154,
      "step": 7824
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5228731632232666,
      "learning_rate": 0.0005982940299576514,
      "loss": 2.9148,
      "step": 7825
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7059322595596313,
      "learning_rate": 0.0005982935943129641,
      "loss": 3.2931,
      "step": 7826
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8215779066085815,
      "learning_rate": 0.0005982931586128185,
      "loss": 3.2558,
      "step": 7827
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.7054333686828613,
      "learning_rate": 0.0005982927228572143,
      "loss": 3.171,
      "step": 7828
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.047585964202881,
      "learning_rate": 0.0005982922870461521,
      "loss": 3.2331,
      "step": 7829
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9272358417510986,
      "learning_rate": 0.0005982918511796317,
      "loss": 3.2549,
      "step": 7830
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2255783081054688,
      "learning_rate": 0.0005982914152576532,
      "loss": 3.2538,
      "step": 7831
    },
    {
      "epoch": 0.1,
      "grad_norm": 4.930210113525391,
      "learning_rate": 0.0005982909792802166,
      "loss": 3.0205,
      "step": 7832
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1921980381011963,
      "learning_rate": 0.0005982905432473221,
      "loss": 3.1139,
      "step": 7833
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7355194091796875,
      "learning_rate": 0.0005982901071589699,
      "loss": 3.3582,
      "step": 7834
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2323827743530273,
      "learning_rate": 0.0005982896710151598,
      "loss": 3.0925,
      "step": 7835
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.8953280448913574,
      "learning_rate": 0.0005982892348158921,
      "loss": 3.4085,
      "step": 7836
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7287315130233765,
      "learning_rate": 0.0005982887985611667,
      "loss": 3.4944,
      "step": 7837
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7574936151504517,
      "learning_rate": 0.0005982883622509839,
      "loss": 3.0237,
      "step": 7838
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4688822031021118,
      "learning_rate": 0.0005982879258853436,
      "loss": 2.9592,
      "step": 7839
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.402207851409912,
      "learning_rate": 0.0005982874894642459,
      "loss": 3.2445,
      "step": 7840
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4656978845596313,
      "learning_rate": 0.000598287052987691,
      "loss": 3.3299,
      "step": 7841
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6451903581619263,
      "learning_rate": 0.0005982866164556789,
      "loss": 3.1348,
      "step": 7842
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.232165813446045,
      "learning_rate": 0.0005982861798682097,
      "loss": 3.4061,
      "step": 7843
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4482204914093018,
      "learning_rate": 0.0005982857432252833,
      "loss": 3.1435,
      "step": 7844
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.995794653892517,
      "learning_rate": 0.0005982853065269002,
      "loss": 3.4291,
      "step": 7845
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5001109838485718,
      "learning_rate": 0.0005982848697730601,
      "loss": 3.3907,
      "step": 7846
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9288008213043213,
      "learning_rate": 0.0005982844329637633,
      "loss": 3.209,
      "step": 7847
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4977582693099976,
      "learning_rate": 0.0005982839960990097,
      "loss": 3.4047,
      "step": 7848
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.373415231704712,
      "learning_rate": 0.0005982835591787994,
      "loss": 3.2591,
      "step": 7849
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2620726823806763,
      "learning_rate": 0.0005982831222031327,
      "loss": 3.2488,
      "step": 7850
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5501796007156372,
      "learning_rate": 0.0005982826851720095,
      "loss": 3.2426,
      "step": 7851
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6706887483596802,
      "learning_rate": 0.0005982822480854299,
      "loss": 3.0867,
      "step": 7852
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7182036638259888,
      "learning_rate": 0.000598281810943394,
      "loss": 3.1041,
      "step": 7853
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.831709623336792,
      "learning_rate": 0.0005982813737459019,
      "loss": 3.2824,
      "step": 7854
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4157339334487915,
      "learning_rate": 0.0005982809364929537,
      "loss": 3.1087,
      "step": 7855
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3302388191223145,
      "learning_rate": 0.0005982804991845494,
      "loss": 3.0414,
      "step": 7856
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5579192638397217,
      "learning_rate": 0.0005982800618206891,
      "loss": 3.4952,
      "step": 7857
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6923164129257202,
      "learning_rate": 0.0005982796244013729,
      "loss": 3.3059,
      "step": 7858
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.444756269454956,
      "learning_rate": 0.0005982791869266009,
      "loss": 3.1446,
      "step": 7859
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.8828518390655518,
      "learning_rate": 0.0005982787493963732,
      "loss": 2.997,
      "step": 7860
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.668066143989563,
      "learning_rate": 0.0005982783118106898,
      "loss": 3.2158,
      "step": 7861
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9595844745635986,
      "learning_rate": 0.0005982778741695509,
      "loss": 3.1289,
      "step": 7862
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.802901268005371,
      "learning_rate": 0.0005982774364729564,
      "loss": 3.1272,
      "step": 7863
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5225927829742432,
      "learning_rate": 0.0005982769987209067,
      "loss": 3.2478,
      "step": 7864
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5303194522857666,
      "learning_rate": 0.0005982765609134015,
      "loss": 3.2343,
      "step": 7865
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.330478549003601,
      "learning_rate": 0.0005982761230504412,
      "loss": 3.1076,
      "step": 7866
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7325992584228516,
      "learning_rate": 0.0005982756851320255,
      "loss": 3.0411,
      "step": 7867
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1520090103149414,
      "learning_rate": 0.0005982752471581549,
      "loss": 3.3249,
      "step": 7868
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2185449600219727,
      "learning_rate": 0.0005982748091288292,
      "loss": 3.1302,
      "step": 7869
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4725865125656128,
      "learning_rate": 0.0005982743710440488,
      "loss": 3.3606,
      "step": 7870
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6798572540283203,
      "learning_rate": 0.0005982739329038133,
      "loss": 3.2211,
      "step": 7871
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7519611120224,
      "learning_rate": 0.0005982734947081232,
      "loss": 3.2118,
      "step": 7872
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7626441717147827,
      "learning_rate": 0.0005982730564569784,
      "loss": 3.2368,
      "step": 7873
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8759474754333496,
      "learning_rate": 0.0005982726181503789,
      "loss": 3.0531,
      "step": 7874
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6699961423873901,
      "learning_rate": 0.000598272179788325,
      "loss": 3.2684,
      "step": 7875
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5362074375152588,
      "learning_rate": 0.0005982717413708167,
      "loss": 3.1812,
      "step": 7876
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.198497772216797,
      "learning_rate": 0.000598271302897854,
      "loss": 3.109,
      "step": 7877
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4439165592193604,
      "learning_rate": 0.0005982708643694371,
      "loss": 3.1855,
      "step": 7878
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7448663711547852,
      "learning_rate": 0.0005982704257855659,
      "loss": 3.1732,
      "step": 7879
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9649457931518555,
      "learning_rate": 0.0005982699871462406,
      "loss": 3.3871,
      "step": 7880
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7952921390533447,
      "learning_rate": 0.0005982695484514613,
      "loss": 3.3757,
      "step": 7881
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5413296222686768,
      "learning_rate": 0.0005982691097012282,
      "loss": 3.17,
      "step": 7882
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2364494800567627,
      "learning_rate": 0.000598268670895541,
      "loss": 3.3901,
      "step": 7883
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.5159242153167725,
      "learning_rate": 0.0005982682320344003,
      "loss": 3.2696,
      "step": 7884
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6465461254119873,
      "learning_rate": 0.0005982677931178058,
      "loss": 3.201,
      "step": 7885
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3441450595855713,
      "learning_rate": 0.0005982673541457576,
      "loss": 3.1678,
      "step": 7886
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.840632677078247,
      "learning_rate": 0.0005982669151182559,
      "loss": 3.243,
      "step": 7887
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.393235445022583,
      "learning_rate": 0.0005982664760353008,
      "loss": 3.2753,
      "step": 7888
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.437261700630188,
      "learning_rate": 0.0005982660368968923,
      "loss": 3.3198,
      "step": 7889
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5707939863204956,
      "learning_rate": 0.0005982655977030305,
      "loss": 3.158,
      "step": 7890
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5697044134140015,
      "learning_rate": 0.0005982651584537155,
      "loss": 3.2912,
      "step": 7891
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5684391260147095,
      "learning_rate": 0.0005982647191489474,
      "loss": 3.4985,
      "step": 7892
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5721557140350342,
      "learning_rate": 0.0005982642797887262,
      "loss": 3.1647,
      "step": 7893
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.673636794090271,
      "learning_rate": 0.0005982638403730521,
      "loss": 3.133,
      "step": 7894
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6758540868759155,
      "learning_rate": 0.0005982634009019252,
      "loss": 3.3194,
      "step": 7895
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.388907551765442,
      "learning_rate": 0.0005982629613753453,
      "loss": 3.0737,
      "step": 7896
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5072107315063477,
      "learning_rate": 0.0005982625217933129,
      "loss": 3.2129,
      "step": 7897
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6118156909942627,
      "learning_rate": 0.0005982620821558277,
      "loss": 3.3092,
      "step": 7898
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.007462501525879,
      "learning_rate": 0.00059826164246289,
      "loss": 3.2935,
      "step": 7899
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.917857050895691,
      "learning_rate": 0.0005982612027144999,
      "loss": 3.3991,
      "step": 7900
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6615017652511597,
      "learning_rate": 0.0005982607629106574,
      "loss": 3.0704,
      "step": 7901
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.641127347946167,
      "learning_rate": 0.0005982603230513624,
      "loss": 3.034,
      "step": 7902
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4799697399139404,
      "learning_rate": 0.0005982598831366153,
      "loss": 3.1531,
      "step": 7903
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.0391533374786377,
      "learning_rate": 0.0005982594431664162,
      "loss": 3.1607,
      "step": 7904
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3333221673965454,
      "learning_rate": 0.0005982590031407648,
      "loss": 3.5489,
      "step": 7905
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2907309532165527,
      "learning_rate": 0.0005982585630596617,
      "loss": 3.0661,
      "step": 7906
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.594442367553711,
      "learning_rate": 0.0005982581229231065,
      "loss": 3.5502,
      "step": 7907
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.934984564781189,
      "learning_rate": 0.0005982576827310996,
      "loss": 3.2488,
      "step": 7908
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1300127506256104,
      "learning_rate": 0.0005982572424836408,
      "loss": 3.1651,
      "step": 7909
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.252972364425659,
      "learning_rate": 0.0005982568021807305,
      "loss": 3.2691,
      "step": 7910
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5623996257781982,
      "learning_rate": 0.0005982563618223685,
      "loss": 3.2302,
      "step": 7911
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7701835632324219,
      "learning_rate": 0.0005982559214085552,
      "loss": 3.0443,
      "step": 7912
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6089504957199097,
      "learning_rate": 0.0005982554809392902,
      "loss": 3.2305,
      "step": 7913
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5139044523239136,
      "learning_rate": 0.0005982550404145742,
      "loss": 3.2047,
      "step": 7914
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.615731120109558,
      "learning_rate": 0.0005982545998344067,
      "loss": 3.1902,
      "step": 7915
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.099008798599243,
      "learning_rate": 0.0005982541591987883,
      "loss": 3.2744,
      "step": 7916
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6038163900375366,
      "learning_rate": 0.0005982537185077186,
      "loss": 3.0807,
      "step": 7917
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4172405004501343,
      "learning_rate": 0.000598253277761198,
      "loss": 3.2155,
      "step": 7918
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6386343240737915,
      "learning_rate": 0.0005982528369592265,
      "loss": 3.2987,
      "step": 7919
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.2617405652999878,
      "learning_rate": 0.0005982523961018041,
      "loss": 3.4033,
      "step": 7920
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5931556224822998,
      "learning_rate": 0.0005982519551889309,
      "loss": 3.3048,
      "step": 7921
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4176363945007324,
      "learning_rate": 0.0005982515142206072,
      "loss": 3.2893,
      "step": 7922
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7188273668289185,
      "learning_rate": 0.0005982510731968328,
      "loss": 3.2657,
      "step": 7923
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3209800720214844,
      "learning_rate": 0.000598250632117608,
      "loss": 3.0347,
      "step": 7924
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9943218231201172,
      "learning_rate": 0.0005982501909829326,
      "loss": 2.9602,
      "step": 7925
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6946125030517578,
      "learning_rate": 0.0005982497497928071,
      "loss": 3.2469,
      "step": 7926
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7984153032302856,
      "learning_rate": 0.0005982493085472312,
      "loss": 3.3909,
      "step": 7927
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2898807525634766,
      "learning_rate": 0.0005982488672462051,
      "loss": 3.2633,
      "step": 7928
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.7593138217926025,
      "learning_rate": 0.0005982484258897289,
      "loss": 3.4663,
      "step": 7929
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1213595867156982,
      "learning_rate": 0.0005982479844778028,
      "loss": 3.1056,
      "step": 7930
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6769771575927734,
      "learning_rate": 0.0005982475430104267,
      "loss": 3.0998,
      "step": 7931
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.408222198486328,
      "learning_rate": 0.0005982471014876007,
      "loss": 3.3332,
      "step": 7932
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.484285831451416,
      "learning_rate": 0.000598246659909325,
      "loss": 3.3761,
      "step": 7933
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.098966360092163,
      "learning_rate": 0.0005982462182755996,
      "loss": 3.1571,
      "step": 7934
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2204673290252686,
      "learning_rate": 0.0005982457765864246,
      "loss": 3.1053,
      "step": 7935
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8907380104064941,
      "learning_rate": 0.0005982453348418001,
      "loss": 3.1913,
      "step": 7936
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.011232376098633,
      "learning_rate": 0.000598244893041726,
      "loss": 3.1436,
      "step": 7937
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4853808879852295,
      "learning_rate": 0.0005982444511862027,
      "loss": 3.2464,
      "step": 7938
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.538154125213623,
      "learning_rate": 0.0005982440092752302,
      "loss": 3.2593,
      "step": 7939
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.909691333770752,
      "learning_rate": 0.0005982435673088084,
      "loss": 3.1552,
      "step": 7940
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3375636339187622,
      "learning_rate": 0.0005982431252869375,
      "loss": 2.8454,
      "step": 7941
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4060372114181519,
      "learning_rate": 0.0005982426832096176,
      "loss": 3.1982,
      "step": 7942
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.433936595916748,
      "learning_rate": 0.0005982422410768487,
      "loss": 3.1718,
      "step": 7943
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4724247455596924,
      "learning_rate": 0.000598241798888631,
      "loss": 2.9916,
      "step": 7944
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4798160791397095,
      "learning_rate": 0.0005982413566449645,
      "loss": 3.3392,
      "step": 7945
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6487175226211548,
      "learning_rate": 0.0005982409143458492,
      "loss": 3.1681,
      "step": 7946
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5543780326843262,
      "learning_rate": 0.0005982404719912853,
      "loss": 3.1389,
      "step": 7947
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4233720302581787,
      "learning_rate": 0.0005982400295812731,
      "loss": 3.3188,
      "step": 7948
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.355755090713501,
      "learning_rate": 0.0005982395871158122,
      "loss": 3.2816,
      "step": 7949
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.64120352268219,
      "learning_rate": 0.0005982391445949031,
      "loss": 3.3945,
      "step": 7950
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5508670806884766,
      "learning_rate": 0.0005982387020185457,
      "loss": 3.1584,
      "step": 7951
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5174897909164429,
      "learning_rate": 0.0005982382593867399,
      "loss": 3.3996,
      "step": 7952
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.322932243347168,
      "learning_rate": 0.0005982378166994862,
      "loss": 3.0385,
      "step": 7953
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7093560695648193,
      "learning_rate": 0.0005982373739567843,
      "loss": 3.1767,
      "step": 7954
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.642163634300232,
      "learning_rate": 0.0005982369311586346,
      "loss": 3.2257,
      "step": 7955
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.0823163986206055,
      "learning_rate": 0.000598236488305037,
      "loss": 3.2392,
      "step": 7956
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.652634620666504,
      "learning_rate": 0.0005982360453959915,
      "loss": 3.1144,
      "step": 7957
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.717535138130188,
      "learning_rate": 0.0005982356024314983,
      "loss": 2.9818,
      "step": 7958
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.413830041885376,
      "learning_rate": 0.0005982351594115575,
      "loss": 3.3016,
      "step": 7959
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.3298697471618652,
      "learning_rate": 0.0005982347163361691,
      "loss": 3.3246,
      "step": 7960
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7723159790039062,
      "learning_rate": 0.0005982342732053334,
      "loss": 3.265,
      "step": 7961
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7059358358383179,
      "learning_rate": 0.0005982338300190502,
      "loss": 3.4739,
      "step": 7962
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.5369420051574707,
      "learning_rate": 0.0005982333867773197,
      "loss": 3.151,
      "step": 7963
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9750958681106567,
      "learning_rate": 0.0005982329434801421,
      "loss": 3.3287,
      "step": 7964
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7764915227890015,
      "learning_rate": 0.0005982325001275172,
      "loss": 2.9587,
      "step": 7965
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.6543729305267334,
      "learning_rate": 0.0005982320567194454,
      "loss": 3.3937,
      "step": 7966
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.261990785598755,
      "learning_rate": 0.0005982316132559265,
      "loss": 2.9537,
      "step": 7967
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.358677864074707,
      "learning_rate": 0.0005982311697369607,
      "loss": 3.2101,
      "step": 7968
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3522262573242188,
      "learning_rate": 0.0005982307261625482,
      "loss": 3.2503,
      "step": 7969
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9266988039016724,
      "learning_rate": 0.0005982302825326889,
      "loss": 3.0456,
      "step": 7970
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.7622764110565186,
      "learning_rate": 0.000598229838847383,
      "loss": 3.2415,
      "step": 7971
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3597443103790283,
      "learning_rate": 0.0005982293951066305,
      "loss": 2.9845,
      "step": 7972
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.0720021724700928,
      "learning_rate": 0.0005982289513104316,
      "loss": 3.1256,
      "step": 7973
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.180980920791626,
      "learning_rate": 0.0005982285074587863,
      "loss": 3.2703,
      "step": 7974
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1818602085113525,
      "learning_rate": 0.0005982280635516946,
      "loss": 3.179,
      "step": 7975
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4304473400115967,
      "learning_rate": 0.0005982276195891568,
      "loss": 3.2051,
      "step": 7976
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2750306129455566,
      "learning_rate": 0.0005982271755711728,
      "loss": 3.1839,
      "step": 7977
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.41215443611145,
      "learning_rate": 0.0005982267314977428,
      "loss": 3.1818,
      "step": 7978
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4822983741760254,
      "learning_rate": 0.0005982262873688668,
      "loss": 3.1214,
      "step": 7979
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.5843334197998047,
      "learning_rate": 0.0005982258431845447,
      "loss": 3.4055,
      "step": 7980
    },
    {
      "epoch": 0.1,
      "grad_norm": 3.2545390129089355,
      "learning_rate": 0.000598225398944777,
      "loss": 3.0253,
      "step": 7981
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.516728401184082,
      "learning_rate": 0.0005982249546495636,
      "loss": 3.2958,
      "step": 7982
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5595043897628784,
      "learning_rate": 0.0005982245102989043,
      "loss": 3.269,
      "step": 7983
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.205314874649048,
      "learning_rate": 0.0005982240658927997,
      "loss": 3.1344,
      "step": 7984
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.6774723529815674,
      "learning_rate": 0.0005982236214312496,
      "loss": 3.3818,
      "step": 7985
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.445933222770691,
      "learning_rate": 0.0005982231769142539,
      "loss": 3.638,
      "step": 7986
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3649983406066895,
      "learning_rate": 0.0005982227323418131,
      "loss": 3.2667,
      "step": 7987
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2267045974731445,
      "learning_rate": 0.0005982222877139269,
      "loss": 2.978,
      "step": 7988
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4655852317810059,
      "learning_rate": 0.0005982218430305957,
      "loss": 3.3043,
      "step": 7989
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6210005283355713,
      "learning_rate": 0.0005982213982918192,
      "loss": 3.3698,
      "step": 7990
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.183650016784668,
      "learning_rate": 0.0005982209534975979,
      "loss": 3.1677,
      "step": 7991
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5903860330581665,
      "learning_rate": 0.0005982205086479316,
      "loss": 3.1509,
      "step": 7992
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4609265327453613,
      "learning_rate": 0.0005982200637428206,
      "loss": 3.0301,
      "step": 7993
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5657202005386353,
      "learning_rate": 0.0005982196187822648,
      "loss": 2.8902,
      "step": 7994
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4624154567718506,
      "learning_rate": 0.0005982191737662643,
      "loss": 3.1796,
      "step": 7995
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4046577215194702,
      "learning_rate": 0.0005982187286948193,
      "loss": 3.1593,
      "step": 7996
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.302583694458008,
      "learning_rate": 0.0005982182835679297,
      "loss": 3.3419,
      "step": 7997
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8268593549728394,
      "learning_rate": 0.0005982178383855958,
      "loss": 2.8887,
      "step": 7998
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2680153846740723,
      "learning_rate": 0.0005982173931478175,
      "loss": 3.2463,
      "step": 7999
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9734580516815186,
      "learning_rate": 0.0005982169478545951,
      "loss": 3.262,
      "step": 8000
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6481432914733887,
      "learning_rate": 0.0005982165025059283,
      "loss": 3.3078,
      "step": 8001
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7205919027328491,
      "learning_rate": 0.0005982160571018177,
      "loss": 3.1371,
      "step": 8002
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.848770022392273,
      "learning_rate": 0.0005982156116422629,
      "loss": 3.3706,
      "step": 8003
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8783739805221558,
      "learning_rate": 0.0005982151661272642,
      "loss": 3.4079,
      "step": 8004
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1218183040618896,
      "learning_rate": 0.0005982147205568218,
      "loss": 3.4433,
      "step": 8005
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.9995815753936768,
      "learning_rate": 0.0005982142749309356,
      "loss": 3.135,
      "step": 8006
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5060240030288696,
      "learning_rate": 0.0005982138292496056,
      "loss": 3.3076,
      "step": 8007
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.408589482307434,
      "learning_rate": 0.0005982133835128322,
      "loss": 3.3759,
      "step": 8008
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8118737936019897,
      "learning_rate": 0.0005982129377206152,
      "loss": 3.2466,
      "step": 8009
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9473888874053955,
      "learning_rate": 0.0005982124918729549,
      "loss": 3.2975,
      "step": 8010
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.494667887687683,
      "learning_rate": 0.0005982120459698511,
      "loss": 3.2042,
      "step": 8011
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6538442373275757,
      "learning_rate": 0.0005982116000113042,
      "loss": 3.3755,
      "step": 8012
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.4001708030700684,
      "learning_rate": 0.000598211153997314,
      "loss": 3.452,
      "step": 8013
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7476028203964233,
      "learning_rate": 0.0005982107079278808,
      "loss": 3.2989,
      "step": 8014
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.931962251663208,
      "learning_rate": 0.0005982102618030046,
      "loss": 3.1219,
      "step": 8015
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.5914249420166016,
      "learning_rate": 0.0005982098156226854,
      "loss": 3.1514,
      "step": 8016
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6138668060302734,
      "learning_rate": 0.0005982093693869235,
      "loss": 3.2755,
      "step": 8017
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5768499374389648,
      "learning_rate": 0.0005982089230957188,
      "loss": 2.9353,
      "step": 8018
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8756535053253174,
      "learning_rate": 0.0005982084767490715,
      "loss": 3.0811,
      "step": 8019
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4381756782531738,
      "learning_rate": 0.0005982080303469815,
      "loss": 3.1964,
      "step": 8020
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.2444751262664795,
      "learning_rate": 0.000598207583889449,
      "loss": 3.0516,
      "step": 8021
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5187660455703735,
      "learning_rate": 0.0005982071373764741,
      "loss": 3.2125,
      "step": 8022
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.792586088180542,
      "learning_rate": 0.0005982066908080569,
      "loss": 3.2354,
      "step": 8023
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8964827060699463,
      "learning_rate": 0.0005982062441841975,
      "loss": 3.3395,
      "step": 8024
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.008190870285034,
      "learning_rate": 0.0005982057975048958,
      "loss": 3.1696,
      "step": 8025
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.565072774887085,
      "learning_rate": 0.0005982053507701521,
      "loss": 3.4269,
      "step": 8026
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5612483024597168,
      "learning_rate": 0.0005982049039799664,
      "loss": 3.2341,
      "step": 8027
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6661168336868286,
      "learning_rate": 0.0005982044571343387,
      "loss": 3.0964,
      "step": 8028
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5483232736587524,
      "learning_rate": 0.0005982040102332692,
      "loss": 2.9849,
      "step": 8029
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4038680791854858,
      "learning_rate": 0.000598203563276758,
      "loss": 3.1322,
      "step": 8030
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7559376955032349,
      "learning_rate": 0.000598203116264805,
      "loss": 2.9034,
      "step": 8031
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.976332426071167,
      "learning_rate": 0.0005982026691974105,
      "loss": 3.2088,
      "step": 8032
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.757897138595581,
      "learning_rate": 0.0005982022220745745,
      "loss": 3.2685,
      "step": 8033
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5123810768127441,
      "learning_rate": 0.0005982017748962971,
      "loss": 3.0341,
      "step": 8034
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6400550603866577,
      "learning_rate": 0.0005982013276625783,
      "loss": 3.1983,
      "step": 8035
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.535200595855713,
      "learning_rate": 0.0005982008803734183,
      "loss": 3.1079,
      "step": 8036
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3670326471328735,
      "learning_rate": 0.0005982004330288171,
      "loss": 3.2241,
      "step": 8037
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9011549949645996,
      "learning_rate": 0.0005981999856287748,
      "loss": 3.5346,
      "step": 8038
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6179066896438599,
      "learning_rate": 0.0005981995381732915,
      "loss": 3.0523,
      "step": 8039
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6628093719482422,
      "learning_rate": 0.0005981990906623673,
      "loss": 3.1449,
      "step": 8040
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.3414697647094727,
      "learning_rate": 0.0005981986430960022,
      "loss": 3.1652,
      "step": 8041
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8306101560592651,
      "learning_rate": 0.0005981981954741964,
      "loss": 3.4507,
      "step": 8042
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6503444910049438,
      "learning_rate": 0.0005981977477969499,
      "loss": 3.3311,
      "step": 8043
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5213457345962524,
      "learning_rate": 0.0005981973000642628,
      "loss": 3.2017,
      "step": 8044
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.3833489418029785,
      "learning_rate": 0.0005981968522761352,
      "loss": 3.2554,
      "step": 8045
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.015047788619995,
      "learning_rate": 0.0005981964044325672,
      "loss": 3.1412,
      "step": 8046
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5876843929290771,
      "learning_rate": 0.0005981959565335589,
      "loss": 3.4728,
      "step": 8047
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.835892915725708,
      "learning_rate": 0.0005981955085791103,
      "loss": 3.1923,
      "step": 8048
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6569045782089233,
      "learning_rate": 0.0005981950605692214,
      "loss": 3.1742,
      "step": 8049
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6064540147781372,
      "learning_rate": 0.0005981946125038927,
      "loss": 2.9355,
      "step": 8050
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5369912385940552,
      "learning_rate": 0.0005981941643831238,
      "loss": 3.2906,
      "step": 8051
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6248507499694824,
      "learning_rate": 0.000598193716206915,
      "loss": 3.0916,
      "step": 8052
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.9375085830688477,
      "learning_rate": 0.0005981932679752662,
      "loss": 3.2614,
      "step": 8053
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.8287742137908936,
      "learning_rate": 0.0005981928196881779,
      "loss": 3.212,
      "step": 8054
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.6686086654663086,
      "learning_rate": 0.0005981923713456498,
      "loss": 3.2968,
      "step": 8055
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7903629541397095,
      "learning_rate": 0.0005981919229476821,
      "loss": 3.3244,
      "step": 8056
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.0031943321228027,
      "learning_rate": 0.0005981914744942749,
      "loss": 3.3565,
      "step": 8057
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.5814944505691528,
      "learning_rate": 0.0005981910259854284,
      "loss": 3.1715,
      "step": 8058
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.4890961647033691,
      "learning_rate": 0.0005981905774211424,
      "loss": 3.1374,
      "step": 8059
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7215558290481567,
      "learning_rate": 0.0005981901288014172,
      "loss": 3.1504,
      "step": 8060
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7144742012023926,
      "learning_rate": 0.0005981896801262529,
      "loss": 3.1596,
      "step": 8061
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.1545627117156982,
      "learning_rate": 0.0005981892313956493,
      "loss": 3.4346,
      "step": 8062
    },
    {
      "epoch": 0.1,
      "grad_norm": 2.3341856002807617,
      "learning_rate": 0.0005981887826096069,
      "loss": 3.2533,
      "step": 8063
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.7244731187820435,
      "learning_rate": 0.0005981883337681256,
      "loss": 3.4462,
      "step": 8064
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0730984210968018,
      "learning_rate": 0.0005981878848712053,
      "loss": 3.1772,
      "step": 8065
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0314929485321045,
      "learning_rate": 0.0005981874359188464,
      "loss": 3.0947,
      "step": 8066
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7982698678970337,
      "learning_rate": 0.0005981869869110487,
      "loss": 3.2431,
      "step": 8067
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7695413827896118,
      "learning_rate": 0.0005981865378478124,
      "loss": 3.2282,
      "step": 8068
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.742507815361023,
      "learning_rate": 0.0005981860887291377,
      "loss": 3.254,
      "step": 8069
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7294281721115112,
      "learning_rate": 0.0005981856395550245,
      "loss": 3.2656,
      "step": 8070
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7704671621322632,
      "learning_rate": 0.0005981851903254729,
      "loss": 3.2364,
      "step": 8071
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.4294683933258057,
      "learning_rate": 0.0005981847410404832,
      "loss": 3.5013,
      "step": 8072
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7174506187438965,
      "learning_rate": 0.0005981842917000553,
      "loss": 3.3464,
      "step": 8073
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.6030433177948,
      "learning_rate": 0.0005981838423041892,
      "loss": 3.096,
      "step": 8074
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6537754535675049,
      "learning_rate": 0.0005981833928528852,
      "loss": 3.356,
      "step": 8075
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7893017530441284,
      "learning_rate": 0.0005981829433461432,
      "loss": 3.5411,
      "step": 8076
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.535106897354126,
      "learning_rate": 0.0005981824937839635,
      "loss": 3.0141,
      "step": 8077
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.23435115814209,
      "learning_rate": 0.0005981820441663459,
      "loss": 3.0169,
      "step": 8078
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.551869511604309,
      "learning_rate": 0.0005981815944932906,
      "loss": 3.2636,
      "step": 8079
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.1602299213409424,
      "learning_rate": 0.0005981811447647978,
      "loss": 3.2571,
      "step": 8080
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.949330449104309,
      "learning_rate": 0.0005981806949808676,
      "loss": 3.1642,
      "step": 8081
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3989794254302979,
      "learning_rate": 0.0005981802451414998,
      "loss": 3.0177,
      "step": 8082
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.647759199142456,
      "learning_rate": 0.0005981797952466948,
      "loss": 3.3906,
      "step": 8083
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7490116357803345,
      "learning_rate": 0.0005981793452964524,
      "loss": 2.8584,
      "step": 8084
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6353756189346313,
      "learning_rate": 0.0005981788952907729,
      "loss": 3.2516,
      "step": 8085
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0255627632141113,
      "learning_rate": 0.0005981784452296563,
      "loss": 3.4007,
      "step": 8086
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0652832984924316,
      "learning_rate": 0.0005981779951131028,
      "loss": 3.353,
      "step": 8087
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5545530319213867,
      "learning_rate": 0.0005981775449411123,
      "loss": 3.2041,
      "step": 8088
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.1230101585388184,
      "learning_rate": 0.000598177094713685,
      "loss": 3.136,
      "step": 8089
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9109710454940796,
      "learning_rate": 0.000598176644430821,
      "loss": 2.9447,
      "step": 8090
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3796018362045288,
      "learning_rate": 0.0005981761940925202,
      "loss": 3.2668,
      "step": 8091
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.880793571472168,
      "learning_rate": 0.0005981757436987828,
      "loss": 3.0396,
      "step": 8092
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6201473474502563,
      "learning_rate": 0.000598175293249609,
      "loss": 3.257,
      "step": 8093
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.65519380569458,
      "learning_rate": 0.0005981748427449987,
      "loss": 3.2557,
      "step": 8094
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6932648420333862,
      "learning_rate": 0.0005981743921849522,
      "loss": 3.0276,
      "step": 8095
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.356841802597046,
      "learning_rate": 0.0005981739415694693,
      "loss": 3.2191,
      "step": 8096
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8338862657546997,
      "learning_rate": 0.0005981734908985503,
      "loss": 3.0805,
      "step": 8097
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7642353773117065,
      "learning_rate": 0.0005981730401721952,
      "loss": 3.2856,
      "step": 8098
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.7231407165527344,
      "learning_rate": 0.0005981725893904042,
      "loss": 3.373,
      "step": 8099
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.583282709121704,
      "learning_rate": 0.0005981721385531772,
      "loss": 3.1938,
      "step": 8100
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.2742438316345215,
      "learning_rate": 0.0005981716876605145,
      "loss": 3.2148,
      "step": 8101
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5130534172058105,
      "learning_rate": 0.0005981712367124159,
      "loss": 3.3399,
      "step": 8102
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6044679880142212,
      "learning_rate": 0.0005981707857088816,
      "loss": 3.2999,
      "step": 8103
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5435528755187988,
      "learning_rate": 0.0005981703346499118,
      "loss": 3.3468,
      "step": 8104
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.406123161315918,
      "learning_rate": 0.0005981698835355065,
      "loss": 3.3369,
      "step": 8105
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6823209524154663,
      "learning_rate": 0.0005981694323656659,
      "loss": 3.2416,
      "step": 8106
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9788504838943481,
      "learning_rate": 0.0005981689811403899,
      "loss": 3.3298,
      "step": 8107
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.4386236667633057,
      "learning_rate": 0.0005981685298596786,
      "loss": 3.0891,
      "step": 8108
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7279797792434692,
      "learning_rate": 0.0005981680785235322,
      "loss": 3.1425,
      "step": 8109
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9302805662155151,
      "learning_rate": 0.0005981676271319507,
      "loss": 3.2943,
      "step": 8110
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.9504172801971436,
      "learning_rate": 0.0005981671756849343,
      "loss": 3.255,
      "step": 8111
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.4433767795562744,
      "learning_rate": 0.0005981667241824829,
      "loss": 3.0848,
      "step": 8112
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9116889238357544,
      "learning_rate": 0.0005981662726245966,
      "loss": 3.1831,
      "step": 8113
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.2886486053466797,
      "learning_rate": 0.0005981658210112757,
      "loss": 3.1582,
      "step": 8114
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0925989151000977,
      "learning_rate": 0.0005981653693425201,
      "loss": 3.457,
      "step": 8115
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7361650466918945,
      "learning_rate": 0.0005981649176183299,
      "loss": 3.2749,
      "step": 8116
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6165627241134644,
      "learning_rate": 0.0005981644658387053,
      "loss": 3.3138,
      "step": 8117
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.413593053817749,
      "learning_rate": 0.0005981640140036461,
      "loss": 3.1915,
      "step": 8118
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3984500169754028,
      "learning_rate": 0.0005981635621131528,
      "loss": 3.2586,
      "step": 8119
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5903737545013428,
      "learning_rate": 0.0005981631101672251,
      "loss": 3.2064,
      "step": 8120
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5905122756958008,
      "learning_rate": 0.0005981626581658635,
      "loss": 3.3606,
      "step": 8121
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4574273824691772,
      "learning_rate": 0.0005981622061090676,
      "loss": 3.5035,
      "step": 8122
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3102149963378906,
      "learning_rate": 0.0005981617539968378,
      "loss": 3.0567,
      "step": 8123
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2467983961105347,
      "learning_rate": 0.0005981613018291741,
      "loss": 3.4005,
      "step": 8124
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.1432878971099854,
      "learning_rate": 0.0005981608496060765,
      "loss": 3.3144,
      "step": 8125
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.856293797492981,
      "learning_rate": 0.0005981603973275452,
      "loss": 3.2015,
      "step": 8126
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5012937784194946,
      "learning_rate": 0.0005981599449935804,
      "loss": 2.9191,
      "step": 8127
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5663468837738037,
      "learning_rate": 0.0005981594926041819,
      "loss": 3.0552,
      "step": 8128
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3529579639434814,
      "learning_rate": 0.0005981590401593499,
      "loss": 3.0175,
      "step": 8129
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4652018547058105,
      "learning_rate": 0.0005981585876590846,
      "loss": 3.5182,
      "step": 8130
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5361136198043823,
      "learning_rate": 0.0005981581351033859,
      "loss": 3.3808,
      "step": 8131
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8102126121520996,
      "learning_rate": 0.000598157682492254,
      "loss": 3.2545,
      "step": 8132
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.075324296951294,
      "learning_rate": 0.0005981572298256891,
      "loss": 3.1357,
      "step": 8133
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7565215826034546,
      "learning_rate": 0.0005981567771036909,
      "loss": 3.4701,
      "step": 8134
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.592752456665039,
      "learning_rate": 0.0005981563243262599,
      "loss": 2.9915,
      "step": 8135
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.3948073387145996,
      "learning_rate": 0.000598155871493396,
      "loss": 3.435,
      "step": 8136
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8080180883407593,
      "learning_rate": 0.0005981554186050992,
      "loss": 3.219,
      "step": 8137
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4503690004348755,
      "learning_rate": 0.0005981549656613698,
      "loss": 3.2554,
      "step": 8138
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.70844566822052,
      "learning_rate": 0.0005981545126622078,
      "loss": 3.3232,
      "step": 8139
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.820220708847046,
      "learning_rate": 0.0005981540596076131,
      "loss": 3.1938,
      "step": 8140
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3993046283721924,
      "learning_rate": 0.000598153606497586,
      "loss": 3.4083,
      "step": 8141
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.1741435527801514,
      "learning_rate": 0.0005981531533321264,
      "loss": 3.1432,
      "step": 8142
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9350018501281738,
      "learning_rate": 0.0005981527001112347,
      "loss": 3.272,
      "step": 8143
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4318046569824219,
      "learning_rate": 0.0005981522468349106,
      "loss": 3.0852,
      "step": 8144
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.177363395690918,
      "learning_rate": 0.0005981517935031545,
      "loss": 3.1999,
      "step": 8145
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.344388484954834,
      "learning_rate": 0.0005981513401159663,
      "loss": 3.4283,
      "step": 8146
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7948448657989502,
      "learning_rate": 0.0005981508866733462,
      "loss": 3.4309,
      "step": 8147
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.306950569152832,
      "learning_rate": 0.0005981504331752941,
      "loss": 3.0658,
      "step": 8148
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.4393227100372314,
      "learning_rate": 0.0005981499796218103,
      "loss": 3.2127,
      "step": 8149
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9447948932647705,
      "learning_rate": 0.0005981495260128948,
      "loss": 3.2002,
      "step": 8150
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.352189779281616,
      "learning_rate": 0.0005981490723485476,
      "loss": 3.1598,
      "step": 8151
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6442186832427979,
      "learning_rate": 0.0005981486186287689,
      "loss": 3.0265,
      "step": 8152
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3935339450836182,
      "learning_rate": 0.0005981481648535587,
      "loss": 3.0842,
      "step": 8153
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3863568305969238,
      "learning_rate": 0.0005981477110229172,
      "loss": 3.3432,
      "step": 8154
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.637871503829956,
      "learning_rate": 0.0005981472571368444,
      "loss": 3.0869,
      "step": 8155
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6594953536987305,
      "learning_rate": 0.0005981468031953405,
      "loss": 2.9363,
      "step": 8156
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3862770795822144,
      "learning_rate": 0.0005981463491984052,
      "loss": 3.152,
      "step": 8157
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4962382316589355,
      "learning_rate": 0.0005981458951460391,
      "loss": 3.153,
      "step": 8158
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.4822120666503906,
      "learning_rate": 0.000598145441038242,
      "loss": 3.1226,
      "step": 8159
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.110394239425659,
      "learning_rate": 0.000598144986875014,
      "loss": 3.2116,
      "step": 8160
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.479434609413147,
      "learning_rate": 0.0005981445326563552,
      "loss": 3.2368,
      "step": 8161
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.396977186203003,
      "learning_rate": 0.0005981440783822657,
      "loss": 3.2699,
      "step": 8162
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.3151495456695557,
      "learning_rate": 0.0005981436240527457,
      "loss": 3.048,
      "step": 8163
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.3368051052093506,
      "learning_rate": 0.0005981431696677951,
      "loss": 3.1296,
      "step": 8164
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.096628189086914,
      "learning_rate": 0.000598142715227414,
      "loss": 3.1219,
      "step": 8165
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.9532716274261475,
      "learning_rate": 0.0005981422607316026,
      "loss": 3.3445,
      "step": 8166
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8236160278320312,
      "learning_rate": 0.0005981418061803609,
      "loss": 3.481,
      "step": 8167
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.665232539176941,
      "learning_rate": 0.0005981413515736892,
      "loss": 3.2173,
      "step": 8168
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5748883485794067,
      "learning_rate": 0.0005981408969115871,
      "loss": 3.1585,
      "step": 8169
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3295505046844482,
      "learning_rate": 0.0005981404421940552,
      "loss": 3.4504,
      "step": 8170
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.873402714729309,
      "learning_rate": 0.0005981399874210932,
      "loss": 3.3663,
      "step": 8171
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6240710020065308,
      "learning_rate": 0.0005981395325927014,
      "loss": 3.045,
      "step": 8172
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.570986270904541,
      "learning_rate": 0.0005981390777088798,
      "loss": 3.3151,
      "step": 8173
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.480447769165039,
      "learning_rate": 0.0005981386227696286,
      "loss": 3.2992,
      "step": 8174
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6274527311325073,
      "learning_rate": 0.0005981381677749478,
      "loss": 3.1587,
      "step": 8175
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.752212405204773,
      "learning_rate": 0.0005981377127248374,
      "loss": 3.1873,
      "step": 8176
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7386173009872437,
      "learning_rate": 0.0005981372576192977,
      "loss": 3.1661,
      "step": 8177
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5047190189361572,
      "learning_rate": 0.0005981368024583286,
      "loss": 3.2397,
      "step": 8178
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.865807056427002,
      "learning_rate": 0.0005981363472419302,
      "loss": 3.1442,
      "step": 8179
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6580713987350464,
      "learning_rate": 0.0005981358919701027,
      "loss": 2.9918,
      "step": 8180
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6244597434997559,
      "learning_rate": 0.000598135436642846,
      "loss": 3.0268,
      "step": 8181
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.570395588874817,
      "learning_rate": 0.0005981349812601604,
      "loss": 3.0117,
      "step": 8182
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.010723829269409,
      "learning_rate": 0.0005981345258220458,
      "loss": 3.0088,
      "step": 8183
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.087480068206787,
      "learning_rate": 0.0005981340703285024,
      "loss": 2.9683,
      "step": 8184
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8441640138626099,
      "learning_rate": 0.0005981336147795303,
      "loss": 3.4139,
      "step": 8185
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.619061827659607,
      "learning_rate": 0.0005981331591751295,
      "loss": 3.0313,
      "step": 8186
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.792128086090088,
      "learning_rate": 0.0005981327035153,
      "loss": 3.2289,
      "step": 8187
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7112292051315308,
      "learning_rate": 0.0005981322478000421,
      "loss": 3.2439,
      "step": 8188
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4024205207824707,
      "learning_rate": 0.0005981317920293558,
      "loss": 2.9737,
      "step": 8189
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.2081000804901123,
      "learning_rate": 0.0005981313362032413,
      "loss": 3.1336,
      "step": 8190
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.6773080825805664,
      "learning_rate": 0.0005981308803216983,
      "loss": 3.3371,
      "step": 8191
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.405566930770874,
      "learning_rate": 0.0005981304243847272,
      "loss": 2.9336,
      "step": 8192
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.2685229778289795,
      "learning_rate": 0.0005981299683923281,
      "loss": 3.2251,
      "step": 8193
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6793180704116821,
      "learning_rate": 0.000598129512344501,
      "loss": 3.3262,
      "step": 8194
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.419346570968628,
      "learning_rate": 0.000598129056241246,
      "loss": 3.2167,
      "step": 8195
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6508549451828003,
      "learning_rate": 0.0005981286000825631,
      "loss": 3.3799,
      "step": 8196
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.672754168510437,
      "learning_rate": 0.0005981281438684527,
      "loss": 3.0649,
      "step": 8197
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5914827585220337,
      "learning_rate": 0.0005981276875989144,
      "loss": 3.1171,
      "step": 8198
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.3877413272857666,
      "learning_rate": 0.0005981272312739486,
      "loss": 3.0512,
      "step": 8199
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5152627229690552,
      "learning_rate": 0.0005981267748935554,
      "loss": 3.3401,
      "step": 8200
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.1296212673187256,
      "learning_rate": 0.0005981263184577347,
      "loss": 3.1503,
      "step": 8201
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.87405264377594,
      "learning_rate": 0.0005981258619664868,
      "loss": 3.3603,
      "step": 8202
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.039102792739868,
      "learning_rate": 0.0005981254054198117,
      "loss": 3.1033,
      "step": 8203
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.575246810913086,
      "learning_rate": 0.0005981249488177093,
      "loss": 3.2726,
      "step": 8204
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7046349048614502,
      "learning_rate": 0.0005981244921601799,
      "loss": 3.2438,
      "step": 8205
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3890745639801025,
      "learning_rate": 0.0005981240354472236,
      "loss": 3.3468,
      "step": 8206
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8165091276168823,
      "learning_rate": 0.0005981235786788404,
      "loss": 3.1865,
      "step": 8207
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.3357067108154297,
      "learning_rate": 0.0005981231218550303,
      "loss": 3.3735,
      "step": 8208
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.45697820186615,
      "learning_rate": 0.0005981226649757935,
      "loss": 3.3362,
      "step": 8209
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7893948554992676,
      "learning_rate": 0.0005981222080411302,
      "loss": 3.2188,
      "step": 8210
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.620957612991333,
      "learning_rate": 0.0005981217510510402,
      "loss": 3.2303,
      "step": 8211
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0370888710021973,
      "learning_rate": 0.0005981212940055238,
      "loss": 3.0957,
      "step": 8212
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6251137256622314,
      "learning_rate": 0.0005981208369045812,
      "loss": 3.3966,
      "step": 8213
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.4090359210968018,
      "learning_rate": 0.000598120379748212,
      "loss": 3.1099,
      "step": 8214
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.3415133953094482,
      "learning_rate": 0.0005981199225364168,
      "loss": 3.102,
      "step": 8215
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5045723915100098,
      "learning_rate": 0.0005981194652691953,
      "loss": 3.3055,
      "step": 8216
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.4483554363250732,
      "learning_rate": 0.000598119007946548,
      "loss": 3.1142,
      "step": 8217
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.13769793510437,
      "learning_rate": 0.0005981185505684745,
      "loss": 3.2294,
      "step": 8218
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4232479333877563,
      "learning_rate": 0.0005981180931349753,
      "loss": 3.1816,
      "step": 8219
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.0253233909606934,
      "learning_rate": 0.0005981176356460503,
      "loss": 3.0071,
      "step": 8220
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.703332543373108,
      "learning_rate": 0.0005981171781016996,
      "loss": 3.2238,
      "step": 8221
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8699111938476562,
      "learning_rate": 0.0005981167205019233,
      "loss": 3.2481,
      "step": 8222
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.194258451461792,
      "learning_rate": 0.0005981162628467215,
      "loss": 3.4103,
      "step": 8223
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4649879932403564,
      "learning_rate": 0.0005981158051360942,
      "loss": 3.4997,
      "step": 8224
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7904119491577148,
      "learning_rate": 0.0005981153473700416,
      "loss": 3.1149,
      "step": 8225
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9581525325775146,
      "learning_rate": 0.0005981148895485636,
      "loss": 3.2267,
      "step": 8226
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.933578372001648,
      "learning_rate": 0.0005981144316716605,
      "loss": 3.1187,
      "step": 8227
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.007981300354004,
      "learning_rate": 0.0005981139737393323,
      "loss": 3.3105,
      "step": 8228
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5918587446212769,
      "learning_rate": 0.000598113515751579,
      "loss": 3.2451,
      "step": 8229
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9573862552642822,
      "learning_rate": 0.0005981130577084009,
      "loss": 3.4534,
      "step": 8230
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4767566919326782,
      "learning_rate": 0.0005981125996097979,
      "loss": 3.3353,
      "step": 8231
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6171364784240723,
      "learning_rate": 0.0005981121414557701,
      "loss": 3.3419,
      "step": 8232
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5311787128448486,
      "learning_rate": 0.0005981116832463177,
      "loss": 3.3894,
      "step": 8233
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7768901586532593,
      "learning_rate": 0.0005981112249814407,
      "loss": 3.1958,
      "step": 8234
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.52054762840271,
      "learning_rate": 0.0005981107666611392,
      "loss": 3.2977,
      "step": 8235
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7392016649246216,
      "learning_rate": 0.0005981103082854133,
      "loss": 3.1034,
      "step": 8236
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5008573532104492,
      "learning_rate": 0.000598109849854263,
      "loss": 3.2873,
      "step": 8237
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.1753838062286377,
      "learning_rate": 0.0005981093913676885,
      "loss": 3.2199,
      "step": 8238
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.762044072151184,
      "learning_rate": 0.0005981089328256898,
      "loss": 3.3367,
      "step": 8239
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.843845248222351,
      "learning_rate": 0.000598108474228267,
      "loss": 3.5885,
      "step": 8240
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.972959280014038,
      "learning_rate": 0.0005981080155754203,
      "loss": 3.404,
      "step": 8241
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.2822368144989014,
      "learning_rate": 0.0005981075568671496,
      "loss": 3.0058,
      "step": 8242
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.277289390563965,
      "learning_rate": 0.0005981070981034551,
      "loss": 3.2232,
      "step": 8243
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.962791919708252,
      "learning_rate": 0.0005981066392843369,
      "loss": 3.114,
      "step": 8244
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.460543632507324,
      "learning_rate": 0.000598106180409795,
      "loss": 3.1487,
      "step": 8245
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.3657867908477783,
      "learning_rate": 0.0005981057214798296,
      "loss": 3.2394,
      "step": 8246
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4486331939697266,
      "learning_rate": 0.0005981052624944406,
      "loss": 3.2083,
      "step": 8247
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8234484195709229,
      "learning_rate": 0.0005981048034536283,
      "loss": 3.1527,
      "step": 8248
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.017836093902588,
      "learning_rate": 0.0005981043443573928,
      "loss": 3.2126,
      "step": 8249
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.447577953338623,
      "learning_rate": 0.0005981038852057338,
      "loss": 3.0593,
      "step": 8250
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8880804777145386,
      "learning_rate": 0.0005981034259986518,
      "loss": 3.214,
      "step": 8251
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.2131354808807373,
      "learning_rate": 0.0005981029667361469,
      "loss": 3.2398,
      "step": 8252
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.955196499824524,
      "learning_rate": 0.0005981025074182189,
      "loss": 3.3665,
      "step": 8253
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9998083114624023,
      "learning_rate": 0.0005981020480448679,
      "loss": 3.2556,
      "step": 8254
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7345279455184937,
      "learning_rate": 0.0005981015886160942,
      "loss": 3.1444,
      "step": 8255
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.910789132118225,
      "learning_rate": 0.0005981011291318979,
      "loss": 3.4472,
      "step": 8256
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3113934993743896,
      "learning_rate": 0.0005981006695922788,
      "loss": 3.0401,
      "step": 8257
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.331732988357544,
      "learning_rate": 0.0005981002099972372,
      "loss": 3.0783,
      "step": 8258
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7754738330841064,
      "learning_rate": 0.0005980997503467732,
      "loss": 3.1465,
      "step": 8259
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.2114615440368652,
      "learning_rate": 0.0005980992906408867,
      "loss": 3.4233,
      "step": 8260
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0784058570861816,
      "learning_rate": 0.0005980988308795781,
      "loss": 3.2525,
      "step": 8261
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.6980140209198,
      "learning_rate": 0.0005980983710628473,
      "loss": 3.047,
      "step": 8262
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7567453384399414,
      "learning_rate": 0.0005980979111906942,
      "loss": 3.3334,
      "step": 8263
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0393621921539307,
      "learning_rate": 0.0005980974512631191,
      "loss": 3.2902,
      "step": 8264
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.401911497116089,
      "learning_rate": 0.0005980969912801222,
      "loss": 3.0611,
      "step": 8265
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5138953924179077,
      "learning_rate": 0.0005980965312417034,
      "loss": 3.2331,
      "step": 8266
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.448791742324829,
      "learning_rate": 0.0005980960711478628,
      "loss": 3.0839,
      "step": 8267
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.8687081336975098,
      "learning_rate": 0.0005980956109986005,
      "loss": 3.007,
      "step": 8268
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3797954320907593,
      "learning_rate": 0.0005980951507939166,
      "loss": 3.5178,
      "step": 8269
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3247219324111938,
      "learning_rate": 0.0005980946905338112,
      "loss": 3.3576,
      "step": 8270
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5164645910263062,
      "learning_rate": 0.0005980942302182844,
      "loss": 2.9935,
      "step": 8271
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9438308477401733,
      "learning_rate": 0.0005980937698473362,
      "loss": 3.4403,
      "step": 8272
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4906452894210815,
      "learning_rate": 0.0005980933094209668,
      "loss": 3.0511,
      "step": 8273
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8856536149978638,
      "learning_rate": 0.0005980928489391762,
      "loss": 3.0828,
      "step": 8274
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5377516746520996,
      "learning_rate": 0.0005980923884019644,
      "loss": 3.1898,
      "step": 8275
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6889433860778809,
      "learning_rate": 0.0005980919278093318,
      "loss": 3.0057,
      "step": 8276
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7545239925384521,
      "learning_rate": 0.0005980914671612781,
      "loss": 3.2086,
      "step": 8277
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7993626594543457,
      "learning_rate": 0.0005980910064578037,
      "loss": 3.0791,
      "step": 8278
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9931890964508057,
      "learning_rate": 0.0005980905456989085,
      "loss": 3.4493,
      "step": 8279
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7388283014297485,
      "learning_rate": 0.0005980900848845926,
      "loss": 3.351,
      "step": 8280
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.651418447494507,
      "learning_rate": 0.0005980896240148562,
      "loss": 3.1718,
      "step": 8281
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0098695755004883,
      "learning_rate": 0.0005980891630896993,
      "loss": 3.3669,
      "step": 8282
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.761478304862976,
      "learning_rate": 0.000598088702109122,
      "loss": 3.1293,
      "step": 8283
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.436445951461792,
      "learning_rate": 0.0005980882410731243,
      "loss": 3.2967,
      "step": 8284
    },
    {
      "epoch": 0.11,
      "grad_norm": 4.269468784332275,
      "learning_rate": 0.0005980877799817065,
      "loss": 3.0673,
      "step": 8285
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0870344638824463,
      "learning_rate": 0.0005980873188348684,
      "loss": 3.2501,
      "step": 8286
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9820244312286377,
      "learning_rate": 0.0005980868576326104,
      "loss": 3.6942,
      "step": 8287
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.319462537765503,
      "learning_rate": 0.0005980863963749324,
      "loss": 3.1158,
      "step": 8288
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.3872878551483154,
      "learning_rate": 0.0005980859350618344,
      "loss": 3.1848,
      "step": 8289
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5430773496627808,
      "learning_rate": 0.0005980854736933167,
      "loss": 3.2716,
      "step": 8290
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0089871883392334,
      "learning_rate": 0.0005980850122693794,
      "loss": 3.5527,
      "step": 8291
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.763122797012329,
      "learning_rate": 0.0005980845507900221,
      "loss": 3.0627,
      "step": 8292
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.900491952896118,
      "learning_rate": 0.0005980840892552455,
      "loss": 3.247,
      "step": 8293
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4896670579910278,
      "learning_rate": 0.0005980836276650494,
      "loss": 3.1762,
      "step": 8294
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9948219060897827,
      "learning_rate": 0.000598083166019434,
      "loss": 3.6291,
      "step": 8295
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.243717908859253,
      "learning_rate": 0.0005980827043183993,
      "loss": 3.1655,
      "step": 8296
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.6123030185699463,
      "learning_rate": 0.0005980822425619454,
      "loss": 3.2829,
      "step": 8297
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8842337131500244,
      "learning_rate": 0.0005980817807500723,
      "loss": 3.3977,
      "step": 8298
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.740399122238159,
      "learning_rate": 0.0005980813188827802,
      "loss": 3.2705,
      "step": 8299
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.989758014678955,
      "learning_rate": 0.0005980808569600692,
      "loss": 3.3292,
      "step": 8300
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.254479169845581,
      "learning_rate": 0.0005980803949819393,
      "loss": 3.0706,
      "step": 8301
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0598506927490234,
      "learning_rate": 0.0005980799329483906,
      "loss": 3.4797,
      "step": 8302
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9796116352081299,
      "learning_rate": 0.0005980794708594232,
      "loss": 3.1941,
      "step": 8303
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.635592460632324,
      "learning_rate": 0.0005980790087150373,
      "loss": 3.1452,
      "step": 8304
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.706407308578491,
      "learning_rate": 0.0005980785465152329,
      "loss": 3.066,
      "step": 8305
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.435034990310669,
      "learning_rate": 0.0005980780842600099,
      "loss": 3.3032,
      "step": 8306
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8825814723968506,
      "learning_rate": 0.0005980776219493687,
      "loss": 3.123,
      "step": 8307
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.476923942565918,
      "learning_rate": 0.0005980771595833093,
      "loss": 3.0885,
      "step": 8308
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.055480718612671,
      "learning_rate": 0.0005980766971618316,
      "loss": 3.2135,
      "step": 8309
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.210228681564331,
      "learning_rate": 0.0005980762346849358,
      "loss": 3.0065,
      "step": 8310
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.362955331802368,
      "learning_rate": 0.0005980757721526221,
      "loss": 3.2287,
      "step": 8311
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.770756483078003,
      "learning_rate": 0.0005980753095648904,
      "loss": 3.2053,
      "step": 8312
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6608067750930786,
      "learning_rate": 0.0005980748469217409,
      "loss": 3.2165,
      "step": 8313
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.329343557357788,
      "learning_rate": 0.0005980743842231737,
      "loss": 3.259,
      "step": 8314
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.246497631072998,
      "learning_rate": 0.0005980739214691888,
      "loss": 3.0934,
      "step": 8315
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6096739768981934,
      "learning_rate": 0.0005980734586597864,
      "loss": 3.4914,
      "step": 8316
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6112933158874512,
      "learning_rate": 0.0005980729957949665,
      "loss": 3.3408,
      "step": 8317
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.609330654144287,
      "learning_rate": 0.0005980725328747291,
      "loss": 3.0671,
      "step": 8318
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7233296632766724,
      "learning_rate": 0.0005980720698990745,
      "loss": 3.0747,
      "step": 8319
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8709094524383545,
      "learning_rate": 0.0005980716068680026,
      "loss": 3.2552,
      "step": 8320
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5159085988998413,
      "learning_rate": 0.0005980711437815136,
      "loss": 3.0921,
      "step": 8321
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6682325601577759,
      "learning_rate": 0.0005980706806396075,
      "loss": 3.2998,
      "step": 8322
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4614698886871338,
      "learning_rate": 0.0005980702174422846,
      "loss": 3.1283,
      "step": 8323
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.629728078842163,
      "learning_rate": 0.0005980697541895446,
      "loss": 3.2537,
      "step": 8324
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.524618148803711,
      "learning_rate": 0.0005980692908813879,
      "loss": 3.2971,
      "step": 8325
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7228986024856567,
      "learning_rate": 0.0005980688275178146,
      "loss": 3.1465,
      "step": 8326
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4545037746429443,
      "learning_rate": 0.0005980683640988245,
      "loss": 3.1989,
      "step": 8327
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7636260986328125,
      "learning_rate": 0.0005980679006244179,
      "loss": 3.5998,
      "step": 8328
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4030883312225342,
      "learning_rate": 0.000598067437094595,
      "loss": 3.1026,
      "step": 8329
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8507332801818848,
      "learning_rate": 0.0005980669735093556,
      "loss": 3.177,
      "step": 8330
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.278231382369995,
      "learning_rate": 0.0005980665098687,
      "loss": 3.2193,
      "step": 8331
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6411808729171753,
      "learning_rate": 0.0005980660461726281,
      "loss": 3.3678,
      "step": 8332
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.374002456665039,
      "learning_rate": 0.0005980655824211401,
      "loss": 3.0677,
      "step": 8333
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.624483346939087,
      "learning_rate": 0.0005980651186142362,
      "loss": 3.1399,
      "step": 8334
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4010648727416992,
      "learning_rate": 0.0005980646547519164,
      "loss": 3.362,
      "step": 8335
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5980496406555176,
      "learning_rate": 0.0005980641908341805,
      "loss": 3.0478,
      "step": 8336
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4082657098770142,
      "learning_rate": 0.0005980637268610291,
      "loss": 3.157,
      "step": 8337
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6829261779785156,
      "learning_rate": 0.0005980632628324619,
      "loss": 3.3819,
      "step": 8338
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6598767042160034,
      "learning_rate": 0.0005980627987484791,
      "loss": 3.331,
      "step": 8339
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.892957091331482,
      "learning_rate": 0.0005980623346090809,
      "loss": 3.0776,
      "step": 8340
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5592217445373535,
      "learning_rate": 0.0005980618704142672,
      "loss": 3.0773,
      "step": 8341
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4226371049880981,
      "learning_rate": 0.0005980614061640382,
      "loss": 3.091,
      "step": 8342
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3135254383087158,
      "learning_rate": 0.000598060941858394,
      "loss": 3.1876,
      "step": 8343
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.600111484527588,
      "learning_rate": 0.0005980604774973346,
      "loss": 3.3301,
      "step": 8344
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8290116786956787,
      "learning_rate": 0.0005980600130808602,
      "loss": 3.2405,
      "step": 8345
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5654147863388062,
      "learning_rate": 0.0005980595486089707,
      "loss": 2.9412,
      "step": 8346
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.437535285949707,
      "learning_rate": 0.0005980590840816663,
      "loss": 3.0432,
      "step": 8347
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.41706383228302,
      "learning_rate": 0.0005980586194989472,
      "loss": 3.1992,
      "step": 8348
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7323840856552124,
      "learning_rate": 0.0005980581548608132,
      "loss": 3.2028,
      "step": 8349
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6731549501419067,
      "learning_rate": 0.0005980576901672647,
      "loss": 3.0531,
      "step": 8350
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7621163129806519,
      "learning_rate": 0.0005980572254183017,
      "loss": 3.2607,
      "step": 8351
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8890674114227295,
      "learning_rate": 0.0005980567606139242,
      "loss": 3.1759,
      "step": 8352
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.653141736984253,
      "learning_rate": 0.0005980562957541323,
      "loss": 3.1438,
      "step": 8353
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5014532804489136,
      "learning_rate": 0.0005980558308389261,
      "loss": 3.242,
      "step": 8354
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8688479661941528,
      "learning_rate": 0.0005980553658683056,
      "loss": 3.4876,
      "step": 8355
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.1477489471435547,
      "learning_rate": 0.0005980549008422712,
      "loss": 3.2893,
      "step": 8356
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.090587615966797,
      "learning_rate": 0.0005980544357608226,
      "loss": 3.4691,
      "step": 8357
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.714537501335144,
      "learning_rate": 0.0005980539706239601,
      "loss": 3.1653,
      "step": 8358
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4544703960418701,
      "learning_rate": 0.0005980535054316837,
      "loss": 2.986,
      "step": 8359
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7254366874694824,
      "learning_rate": 0.0005980530401839936,
      "loss": 3.0726,
      "step": 8360
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5469303131103516,
      "learning_rate": 0.0005980525748808897,
      "loss": 3.0658,
      "step": 8361
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.806388020515442,
      "learning_rate": 0.0005980521095223724,
      "loss": 3.4358,
      "step": 8362
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.598212718963623,
      "learning_rate": 0.0005980516441084413,
      "loss": 3.2376,
      "step": 8363
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4930177927017212,
      "learning_rate": 0.0005980511786390971,
      "loss": 3.0464,
      "step": 8364
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8039366006851196,
      "learning_rate": 0.0005980507131143393,
      "loss": 3.2826,
      "step": 8365
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8889456987380981,
      "learning_rate": 0.0005980502475341683,
      "loss": 3.1988,
      "step": 8366
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6967823505401611,
      "learning_rate": 0.0005980497818985842,
      "loss": 3.2622,
      "step": 8367
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0953640937805176,
      "learning_rate": 0.000598049316207587,
      "loss": 3.262,
      "step": 8368
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.3926572799682617,
      "learning_rate": 0.0005980488504611769,
      "loss": 3.4427,
      "step": 8369
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.66684091091156,
      "learning_rate": 0.0005980483846593537,
      "loss": 3.2186,
      "step": 8370
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8561071157455444,
      "learning_rate": 0.0005980479188021178,
      "loss": 3.2567,
      "step": 8371
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8770129680633545,
      "learning_rate": 0.0005980474528894692,
      "loss": 3.1395,
      "step": 8372
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3573616743087769,
      "learning_rate": 0.0005980469869214079,
      "loss": 3.0664,
      "step": 8373
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.500769853591919,
      "learning_rate": 0.0005980465208979339,
      "loss": 2.9275,
      "step": 8374
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6998863220214844,
      "learning_rate": 0.0005980460548190476,
      "loss": 3.1706,
      "step": 8375
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.464158535003662,
      "learning_rate": 0.0005980455886847489,
      "loss": 3.5163,
      "step": 8376
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9462110996246338,
      "learning_rate": 0.0005980451224950379,
      "loss": 3.1084,
      "step": 8377
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6017329692840576,
      "learning_rate": 0.0005980446562499146,
      "loss": 3.0946,
      "step": 8378
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7420517206192017,
      "learning_rate": 0.0005980441899493792,
      "loss": 3.3558,
      "step": 8379
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5694187879562378,
      "learning_rate": 0.0005980437235934318,
      "loss": 3.1328,
      "step": 8380
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.579972743988037,
      "learning_rate": 0.0005980432571820724,
      "loss": 3.3599,
      "step": 8381
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5666344165802002,
      "learning_rate": 0.0005980427907153012,
      "loss": 3.1605,
      "step": 8382
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5491032600402832,
      "learning_rate": 0.0005980423241931182,
      "loss": 3.3311,
      "step": 8383
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.1303021907806396,
      "learning_rate": 0.0005980418576155235,
      "loss": 3.1839,
      "step": 8384
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4111689329147339,
      "learning_rate": 0.0005980413909825172,
      "loss": 3.0919,
      "step": 8385
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4854578971862793,
      "learning_rate": 0.0005980409242940993,
      "loss": 2.714,
      "step": 8386
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5450135469436646,
      "learning_rate": 0.0005980404575502701,
      "loss": 3.2815,
      "step": 8387
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7375670671463013,
      "learning_rate": 0.0005980399907510295,
      "loss": 3.1371,
      "step": 8388
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5870176553726196,
      "learning_rate": 0.0005980395238963775,
      "loss": 3.3517,
      "step": 8389
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9848911762237549,
      "learning_rate": 0.0005980390569863145,
      "loss": 3.1135,
      "step": 8390
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5660520792007446,
      "learning_rate": 0.0005980385900208403,
      "loss": 3.0692,
      "step": 8391
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.51407790184021,
      "learning_rate": 0.0005980381229999552,
      "loss": 3.0966,
      "step": 8392
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4143028259277344,
      "learning_rate": 0.0005980376559236592,
      "loss": 3.3842,
      "step": 8393
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.603861927986145,
      "learning_rate": 0.0005980371887919524,
      "loss": 3.1554,
      "step": 8394
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8220019340515137,
      "learning_rate": 0.0005980367216048347,
      "loss": 3.1517,
      "step": 8395
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3341683149337769,
      "learning_rate": 0.0005980362543623064,
      "loss": 3.0405,
      "step": 8396
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4087207317352295,
      "learning_rate": 0.0005980357870643676,
      "loss": 3.2352,
      "step": 8397
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3366003036499023,
      "learning_rate": 0.0005980353197110183,
      "loss": 3.3309,
      "step": 8398
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.382741928100586,
      "learning_rate": 0.0005980348523022586,
      "loss": 3.0987,
      "step": 8399
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6689091920852661,
      "learning_rate": 0.0005980343848380886,
      "loss": 3.2383,
      "step": 8400
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7694599628448486,
      "learning_rate": 0.0005980339173185084,
      "loss": 3.2408,
      "step": 8401
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6694756746292114,
      "learning_rate": 0.0005980334497435179,
      "loss": 3.2547,
      "step": 8402
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4989973306655884,
      "learning_rate": 0.0005980329821131176,
      "loss": 3.2993,
      "step": 8403
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6531652212142944,
      "learning_rate": 0.0005980325144273071,
      "loss": 3.1372,
      "step": 8404
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.468602180480957,
      "learning_rate": 0.0005980320466860869,
      "loss": 3.0216,
      "step": 8405
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5625115633010864,
      "learning_rate": 0.0005980315788894569,
      "loss": 3.4439,
      "step": 8406
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8765647411346436,
      "learning_rate": 0.0005980311110374173,
      "loss": 3.0443,
      "step": 8407
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6512953042984009,
      "learning_rate": 0.000598030643129968,
      "loss": 3.3061,
      "step": 8408
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5585358142852783,
      "learning_rate": 0.000598030175167109,
      "loss": 3.259,
      "step": 8409
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.519584059715271,
      "learning_rate": 0.0005980297071488408,
      "loss": 3.1191,
      "step": 8410
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5508803129196167,
      "learning_rate": 0.0005980292390751631,
      "loss": 3.4822,
      "step": 8411
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.625688076019287,
      "learning_rate": 0.0005980287709460763,
      "loss": 2.9528,
      "step": 8412
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9799771308898926,
      "learning_rate": 0.0005980283027615801,
      "loss": 3.2675,
      "step": 8413
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.1345889568328857,
      "learning_rate": 0.0005980278345216751,
      "loss": 3.1371,
      "step": 8414
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.181990623474121,
      "learning_rate": 0.0005980273662263609,
      "loss": 3.4757,
      "step": 8415
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5021406412124634,
      "learning_rate": 0.0005980268978756379,
      "loss": 3.3883,
      "step": 8416
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4413564205169678,
      "learning_rate": 0.000598026429469506,
      "loss": 3.2171,
      "step": 8417
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5934298038482666,
      "learning_rate": 0.0005980259610079653,
      "loss": 3.3047,
      "step": 8418
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.845653533935547,
      "learning_rate": 0.000598025492491016,
      "loss": 3.2771,
      "step": 8419
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.1639246940612793,
      "learning_rate": 0.0005980250239186581,
      "loss": 3.0987,
      "step": 8420
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6584945917129517,
      "learning_rate": 0.0005980245552908919,
      "loss": 2.9655,
      "step": 8421
    },
    {
      "epoch": 0.11,
      "grad_norm": 4.795461654663086,
      "learning_rate": 0.0005980240866077172,
      "loss": 3.0339,
      "step": 8422
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.219172716140747,
      "learning_rate": 0.0005980236178691342,
      "loss": 3.4539,
      "step": 8423
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.961395025253296,
      "learning_rate": 0.0005980231490751429,
      "loss": 3.0801,
      "step": 8424
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7586681842803955,
      "learning_rate": 0.0005980226802257435,
      "loss": 3.4095,
      "step": 8425
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.614513635635376,
      "learning_rate": 0.0005980222113209361,
      "loss": 2.9854,
      "step": 8426
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.3968722820281982,
      "learning_rate": 0.0005980217423607208,
      "loss": 3.0312,
      "step": 8427
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.209883451461792,
      "learning_rate": 0.0005980212733450975,
      "loss": 3.1882,
      "step": 8428
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5936245918273926,
      "learning_rate": 0.0005980208042740666,
      "loss": 3.3556,
      "step": 8429
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.292604446411133,
      "learning_rate": 0.0005980203351476279,
      "loss": 3.1861,
      "step": 8430
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.6106724739074707,
      "learning_rate": 0.0005980198659657816,
      "loss": 3.1598,
      "step": 8431
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7233808040618896,
      "learning_rate": 0.0005980193967285278,
      "loss": 3.3342,
      "step": 8432
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0258476734161377,
      "learning_rate": 0.0005980189274358666,
      "loss": 3.0638,
      "step": 8433
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.057814836502075,
      "learning_rate": 0.0005980184580877978,
      "loss": 3.255,
      "step": 8434
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3650182485580444,
      "learning_rate": 0.0005980179886843221,
      "loss": 3.3197,
      "step": 8435
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4393056631088257,
      "learning_rate": 0.0005980175192254391,
      "loss": 3.1609,
      "step": 8436
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5320297479629517,
      "learning_rate": 0.000598017049711149,
      "loss": 3.0721,
      "step": 8437
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8489739894866943,
      "learning_rate": 0.0005980165801414518,
      "loss": 3.0272,
      "step": 8438
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6100025177001953,
      "learning_rate": 0.0005980161105163479,
      "loss": 3.2549,
      "step": 8439
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9140912294387817,
      "learning_rate": 0.0005980156408358369,
      "loss": 3.2146,
      "step": 8440
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3900319337844849,
      "learning_rate": 0.0005980151710999194,
      "loss": 3.1715,
      "step": 8441
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.844002366065979,
      "learning_rate": 0.0005980147013085952,
      "loss": 3.0672,
      "step": 8442
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7222766876220703,
      "learning_rate": 0.0005980142314618644,
      "loss": 3.2772,
      "step": 8443
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6113853454589844,
      "learning_rate": 0.0005980137615597272,
      "loss": 3.0291,
      "step": 8444
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3012446165084839,
      "learning_rate": 0.0005980132916021836,
      "loss": 3.0842,
      "step": 8445
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.56657075881958,
      "learning_rate": 0.0005980128215892336,
      "loss": 3.5616,
      "step": 8446
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7389289140701294,
      "learning_rate": 0.0005980123515208775,
      "loss": 3.3063,
      "step": 8447
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6968684196472168,
      "learning_rate": 0.0005980118813971153,
      "loss": 3.4599,
      "step": 8448
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.739119052886963,
      "learning_rate": 0.000598011411217947,
      "loss": 2.9713,
      "step": 8449
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7022855281829834,
      "learning_rate": 0.0005980109409833728,
      "loss": 3.181,
      "step": 8450
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.3124430179595947,
      "learning_rate": 0.0005980104706933926,
      "loss": 3.3173,
      "step": 8451
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5382096767425537,
      "learning_rate": 0.0005980100003480068,
      "loss": 3.2101,
      "step": 8452
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6250146627426147,
      "learning_rate": 0.0005980095299472153,
      "loss": 3.154,
      "step": 8453
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5605876445770264,
      "learning_rate": 0.0005980090594910181,
      "loss": 3.6054,
      "step": 8454
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2986514568328857,
      "learning_rate": 0.0005980085889794155,
      "loss": 3.2512,
      "step": 8455
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7371774911880493,
      "learning_rate": 0.0005980081184124074,
      "loss": 3.2423,
      "step": 8456
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.547208547592163,
      "learning_rate": 0.000598007647789994,
      "loss": 3.4488,
      "step": 8457
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5490139722824097,
      "learning_rate": 0.0005980071771121753,
      "loss": 3.191,
      "step": 8458
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3255034685134888,
      "learning_rate": 0.0005980067063789515,
      "loss": 3.0562,
      "step": 8459
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5764195919036865,
      "learning_rate": 0.0005980062355903226,
      "loss": 3.3774,
      "step": 8460
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.046957015991211,
      "learning_rate": 0.0005980057647462887,
      "loss": 3.1677,
      "step": 8461
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9848778247833252,
      "learning_rate": 0.0005980052938468499,
      "loss": 3.0218,
      "step": 8462
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8507862091064453,
      "learning_rate": 0.0005980048228920065,
      "loss": 3.0963,
      "step": 8463
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8836276531219482,
      "learning_rate": 0.0005980043518817581,
      "loss": 3.2708,
      "step": 8464
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9913333654403687,
      "learning_rate": 0.0005980038808161052,
      "loss": 3.2925,
      "step": 8465
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6037955284118652,
      "learning_rate": 0.0005980034096950477,
      "loss": 3.1135,
      "step": 8466
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.277312994003296,
      "learning_rate": 0.0005980029385185857,
      "loss": 3.246,
      "step": 8467
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7937180995941162,
      "learning_rate": 0.0005980024672867194,
      "loss": 3.114,
      "step": 8468
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.415223479270935,
      "learning_rate": 0.0005980019959994488,
      "loss": 3.1727,
      "step": 8469
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3695155382156372,
      "learning_rate": 0.0005980015246567741,
      "loss": 3.2312,
      "step": 8470
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7054167985916138,
      "learning_rate": 0.0005980010532586951,
      "loss": 3.2478,
      "step": 8471
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.538191795349121,
      "learning_rate": 0.0005980005818052122,
      "loss": 3.365,
      "step": 8472
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.402003288269043,
      "learning_rate": 0.0005980001102963254,
      "loss": 3.1552,
      "step": 8473
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5484423637390137,
      "learning_rate": 0.0005979996387320346,
      "loss": 3.2837,
      "step": 8474
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3482295274734497,
      "learning_rate": 0.0005979991671123402,
      "loss": 3.2836,
      "step": 8475
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3596864938735962,
      "learning_rate": 0.0005979986954372421,
      "loss": 3.2934,
      "step": 8476
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5225815773010254,
      "learning_rate": 0.0005979982237067404,
      "loss": 3.3142,
      "step": 8477
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5140165090560913,
      "learning_rate": 0.0005979977519208351,
      "loss": 3.2277,
      "step": 8478
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4933686256408691,
      "learning_rate": 0.0005979972800795265,
      "loss": 3.1163,
      "step": 8479
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7193944454193115,
      "learning_rate": 0.0005979968081828146,
      "loss": 3.1622,
      "step": 8480
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4781970977783203,
      "learning_rate": 0.0005979963362306995,
      "loss": 3.1493,
      "step": 8481
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6575480699539185,
      "learning_rate": 0.0005979958642231811,
      "loss": 3.5558,
      "step": 8482
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5602773427963257,
      "learning_rate": 0.0005979953921602597,
      "loss": 3.3355,
      "step": 8483
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9684046506881714,
      "learning_rate": 0.0005979949200419355,
      "loss": 3.14,
      "step": 8484
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7103688716888428,
      "learning_rate": 0.0005979944478682083,
      "loss": 3.0844,
      "step": 8485
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.2856340408325195,
      "learning_rate": 0.0005979939756390782,
      "loss": 3.3139,
      "step": 8486
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8917139768600464,
      "learning_rate": 0.0005979935033545456,
      "loss": 2.838,
      "step": 8487
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8915761709213257,
      "learning_rate": 0.0005979930310146102,
      "loss": 3.2171,
      "step": 8488
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5502122640609741,
      "learning_rate": 0.0005979925586192722,
      "loss": 3.1687,
      "step": 8489
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5425680875778198,
      "learning_rate": 0.000597992086168532,
      "loss": 3.2371,
      "step": 8490
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5803859233856201,
      "learning_rate": 0.0005979916136623893,
      "loss": 2.9463,
      "step": 8491
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.945021152496338,
      "learning_rate": 0.0005979911411008444,
      "loss": 3.3397,
      "step": 8492
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8131520748138428,
      "learning_rate": 0.0005979906684838973,
      "loss": 3.1305,
      "step": 8493
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4147547483444214,
      "learning_rate": 0.0005979901958115481,
      "loss": 3.5128,
      "step": 8494
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4737380743026733,
      "learning_rate": 0.0005979897230837969,
      "loss": 3.1138,
      "step": 8495
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6486222743988037,
      "learning_rate": 0.0005979892503006439,
      "loss": 3.2915,
      "step": 8496
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3890571594238281,
      "learning_rate": 0.0005979887774620889,
      "loss": 3.4055,
      "step": 8497
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7866836786270142,
      "learning_rate": 0.0005979883045681323,
      "loss": 3.3188,
      "step": 8498
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7433220148086548,
      "learning_rate": 0.000597987831618774,
      "loss": 3.4166,
      "step": 8499
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8861280679702759,
      "learning_rate": 0.000597987358614014,
      "loss": 3.1012,
      "step": 8500
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6516684293746948,
      "learning_rate": 0.0005979868855538527,
      "loss": 3.0572,
      "step": 8501
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.4756641387939453,
      "learning_rate": 0.0005979864124382899,
      "loss": 3.0823,
      "step": 8502
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.059196949005127,
      "learning_rate": 0.0005979859392673259,
      "loss": 3.4088,
      "step": 8503
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.50235915184021,
      "learning_rate": 0.0005979854660409606,
      "loss": 2.863,
      "step": 8504
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.7169885635375977,
      "learning_rate": 0.0005979849927591943,
      "loss": 3.0865,
      "step": 8505
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.1416773796081543,
      "learning_rate": 0.0005979845194220267,
      "loss": 3.1593,
      "step": 8506
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7670135498046875,
      "learning_rate": 0.0005979840460294584,
      "loss": 3.1505,
      "step": 8507
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7600164413452148,
      "learning_rate": 0.0005979835725814892,
      "loss": 3.1245,
      "step": 8508
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.380336046218872,
      "learning_rate": 0.0005979830990781193,
      "loss": 3.0449,
      "step": 8509
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4870225191116333,
      "learning_rate": 0.0005979826255193486,
      "loss": 2.9022,
      "step": 8510
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.657476544380188,
      "learning_rate": 0.0005979821519051773,
      "loss": 3.1971,
      "step": 8511
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.1912829875946045,
      "learning_rate": 0.0005979816782356054,
      "loss": 2.9688,
      "step": 8512
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.115568161010742,
      "learning_rate": 0.0005979812045106333,
      "loss": 3.0075,
      "step": 8513
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5067461729049683,
      "learning_rate": 0.0005979807307302607,
      "loss": 3.3126,
      "step": 8514
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.889337420463562,
      "learning_rate": 0.000597980256894488,
      "loss": 3.4061,
      "step": 8515
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.640782356262207,
      "learning_rate": 0.0005979797830033151,
      "loss": 3.1383,
      "step": 8516
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.09922194480896,
      "learning_rate": 0.0005979793090567421,
      "loss": 3.2573,
      "step": 8517
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.682031273841858,
      "learning_rate": 0.0005979788350547691,
      "loss": 3.3891,
      "step": 8518
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5921859741210938,
      "learning_rate": 0.0005979783609973962,
      "loss": 3.1421,
      "step": 8519
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.5021579265594482,
      "learning_rate": 0.0005979778868846237,
      "loss": 3.1966,
      "step": 8520
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5186975002288818,
      "learning_rate": 0.0005979774127164513,
      "loss": 3.2442,
      "step": 8521
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6999021768569946,
      "learning_rate": 0.0005979769384928792,
      "loss": 3.4137,
      "step": 8522
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.7080554962158203,
      "learning_rate": 0.0005979764642139077,
      "loss": 3.306,
      "step": 8523
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7696292400360107,
      "learning_rate": 0.0005979759898795368,
      "loss": 3.2326,
      "step": 8524
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8500051498413086,
      "learning_rate": 0.0005979755154897664,
      "loss": 3.1777,
      "step": 8525
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.2836685180664062,
      "learning_rate": 0.0005979750410445967,
      "loss": 3.151,
      "step": 8526
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9424713850021362,
      "learning_rate": 0.000597974566544028,
      "loss": 3.1711,
      "step": 8527
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7696841955184937,
      "learning_rate": 0.00059797409198806,
      "loss": 3.3036,
      "step": 8528
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7249027490615845,
      "learning_rate": 0.0005979736173766931,
      "loss": 3.2905,
      "step": 8529
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8930507898330688,
      "learning_rate": 0.0005979731427099272,
      "loss": 3.0982,
      "step": 8530
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3752139806747437,
      "learning_rate": 0.0005979726679877627,
      "loss": 3.2415,
      "step": 8531
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7918132543563843,
      "learning_rate": 0.0005979721932101991,
      "loss": 3.1358,
      "step": 8532
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.536753535270691,
      "learning_rate": 0.0005979717183772371,
      "loss": 3.3072,
      "step": 8533
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7283669710159302,
      "learning_rate": 0.0005979712434888765,
      "loss": 3.4139,
      "step": 8534
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7623039484024048,
      "learning_rate": 0.0005979707685451174,
      "loss": 3.4069,
      "step": 8535
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6807628870010376,
      "learning_rate": 0.0005979702935459599,
      "loss": 3.43,
      "step": 8536
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6884217262268066,
      "learning_rate": 0.0005979698184914041,
      "loss": 3.2883,
      "step": 8537
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4627478122711182,
      "learning_rate": 0.0005979693433814501,
      "loss": 3.1266,
      "step": 8538
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7279242277145386,
      "learning_rate": 0.0005979688682160979,
      "loss": 3.2858,
      "step": 8539
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6105897426605225,
      "learning_rate": 0.0005979683929953478,
      "loss": 3.0416,
      "step": 8540
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8189663887023926,
      "learning_rate": 0.0005979679177191996,
      "loss": 3.3599,
      "step": 8541
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.536616563796997,
      "learning_rate": 0.0005979674423876537,
      "loss": 3.3926,
      "step": 8542
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.941680669784546,
      "learning_rate": 0.0005979669670007099,
      "loss": 3.0256,
      "step": 8543
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.3867805004119873,
      "learning_rate": 0.0005979664915583685,
      "loss": 3.3051,
      "step": 8544
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.1081271171569824,
      "learning_rate": 0.0005979660160606294,
      "loss": 3.2155,
      "step": 8545
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9532184600830078,
      "learning_rate": 0.0005979655405074928,
      "loss": 3.0849,
      "step": 8546
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6731313467025757,
      "learning_rate": 0.0005979650648989589,
      "loss": 3.1683,
      "step": 8547
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6694785356521606,
      "learning_rate": 0.0005979645892350276,
      "loss": 3.0827,
      "step": 8548
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.555026054382324,
      "learning_rate": 0.000597964113515699,
      "loss": 3.1936,
      "step": 8549
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.375991106033325,
      "learning_rate": 0.0005979636377409734,
      "loss": 3.0179,
      "step": 8550
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.125455856323242,
      "learning_rate": 0.0005979631619108507,
      "loss": 3.2938,
      "step": 8551
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9054869413375854,
      "learning_rate": 0.0005979626860253309,
      "loss": 3.1855,
      "step": 8552
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.606947660446167,
      "learning_rate": 0.0005979622100844142,
      "loss": 3.3516,
      "step": 8553
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9806947708129883,
      "learning_rate": 0.0005979617340881009,
      "loss": 3.1489,
      "step": 8554
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9414464235305786,
      "learning_rate": 0.0005979612580363907,
      "loss": 3.1788,
      "step": 8555
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8692444562911987,
      "learning_rate": 0.0005979607819292839,
      "loss": 3.2238,
      "step": 8556
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8481396436691284,
      "learning_rate": 0.0005979603057667806,
      "loss": 3.3003,
      "step": 8557
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9216985702514648,
      "learning_rate": 0.000597959829548881,
      "loss": 3.097,
      "step": 8558
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7582191228866577,
      "learning_rate": 0.0005979593532755848,
      "loss": 3.3842,
      "step": 8559
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4961940050125122,
      "learning_rate": 0.0005979588769468925,
      "loss": 3.3516,
      "step": 8560
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.080735206604004,
      "learning_rate": 0.0005979584005628041,
      "loss": 2.9764,
      "step": 8561
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3813964128494263,
      "learning_rate": 0.0005979579241233194,
      "loss": 3.1469,
      "step": 8562
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6725928783416748,
      "learning_rate": 0.0005979574476284387,
      "loss": 2.9644,
      "step": 8563
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.45408034324646,
      "learning_rate": 0.0005979569710781622,
      "loss": 3.147,
      "step": 8564
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6654680967330933,
      "learning_rate": 0.0005979564944724897,
      "loss": 3.1911,
      "step": 8565
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6747581958770752,
      "learning_rate": 0.0005979560178114217,
      "loss": 3.0431,
      "step": 8566
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3335163593292236,
      "learning_rate": 0.000597955541094958,
      "loss": 3.1391,
      "step": 8567
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.6738691329956055,
      "learning_rate": 0.0005979550643230987,
      "loss": 3.2692,
      "step": 8568
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9030143022537231,
      "learning_rate": 0.0005979545874958438,
      "loss": 3.0008,
      "step": 8569
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8620884418487549,
      "learning_rate": 0.0005979541106131937,
      "loss": 3.5563,
      "step": 8570
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9192919731140137,
      "learning_rate": 0.0005979536336751482,
      "loss": 2.9159,
      "step": 8571
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.309270143508911,
      "learning_rate": 0.0005979531566817076,
      "loss": 3.0448,
      "step": 8572
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.501050353050232,
      "learning_rate": 0.0005979526796328718,
      "loss": 3.7286,
      "step": 8573
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.273139238357544,
      "learning_rate": 0.000597952202528641,
      "loss": 3.3363,
      "step": 8574
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4752495288848877,
      "learning_rate": 0.0005979517253690153,
      "loss": 3.1345,
      "step": 8575
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3953301906585693,
      "learning_rate": 0.0005979512481539946,
      "loss": 3.279,
      "step": 8576
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9074469804763794,
      "learning_rate": 0.0005979507708835793,
      "loss": 3.0827,
      "step": 8577
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6134648323059082,
      "learning_rate": 0.0005979502935577692,
      "loss": 3.198,
      "step": 8578
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3476072549819946,
      "learning_rate": 0.0005979498161765647,
      "loss": 3.0963,
      "step": 8579
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.413447380065918,
      "learning_rate": 0.0005979493387399655,
      "loss": 3.2185,
      "step": 8580
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5086613893508911,
      "learning_rate": 0.0005979488612479721,
      "loss": 3.2807,
      "step": 8581
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6627367734909058,
      "learning_rate": 0.0005979483837005842,
      "loss": 3.1445,
      "step": 8582
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5174556970596313,
      "learning_rate": 0.0005979479060978022,
      "loss": 3.172,
      "step": 8583
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9928107261657715,
      "learning_rate": 0.0005979474284396259,
      "loss": 3.217,
      "step": 8584
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4504058361053467,
      "learning_rate": 0.0005979469507260557,
      "loss": 3.149,
      "step": 8585
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8061209917068481,
      "learning_rate": 0.0005979464729570915,
      "loss": 3.255,
      "step": 8586
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7753701210021973,
      "learning_rate": 0.0005979459951327334,
      "loss": 3.2198,
      "step": 8587
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6895835399627686,
      "learning_rate": 0.0005979455172529816,
      "loss": 3.0192,
      "step": 8588
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5912835597991943,
      "learning_rate": 0.0005979450393178359,
      "loss": 3.4367,
      "step": 8589
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4494491815567017,
      "learning_rate": 0.0005979445613272967,
      "loss": 3.1271,
      "step": 8590
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7320780754089355,
      "learning_rate": 0.0005979440832813641,
      "loss": 3.0784,
      "step": 8591
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.558082938194275,
      "learning_rate": 0.0005979436051800379,
      "loss": 3.1986,
      "step": 8592
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.272104024887085,
      "learning_rate": 0.0005979431270233185,
      "loss": 2.8603,
      "step": 8593
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5400294065475464,
      "learning_rate": 0.0005979426488112058,
      "loss": 3.1472,
      "step": 8594
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4622905254364014,
      "learning_rate": 0.0005979421705436999,
      "loss": 3.061,
      "step": 8595
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7884989976882935,
      "learning_rate": 0.0005979416922208009,
      "loss": 3.0089,
      "step": 8596
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7509721517562866,
      "learning_rate": 0.000597941213842509,
      "loss": 3.2462,
      "step": 8597
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.2439563274383545,
      "learning_rate": 0.0005979407354088242,
      "loss": 3.1286,
      "step": 8598
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0256614685058594,
      "learning_rate": 0.0005979402569197466,
      "loss": 2.8801,
      "step": 8599
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0952072143554688,
      "learning_rate": 0.0005979397783752761,
      "loss": 3.3365,
      "step": 8600
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5733429193496704,
      "learning_rate": 0.0005979392997754132,
      "loss": 3.2621,
      "step": 8601
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.703891396522522,
      "learning_rate": 0.0005979388211201577,
      "loss": 3.2202,
      "step": 8602
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.53935706615448,
      "learning_rate": 0.0005979383424095097,
      "loss": 3.4849,
      "step": 8603
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0274603366851807,
      "learning_rate": 0.0005979378636434693,
      "loss": 2.9335,
      "step": 8604
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.5410704612731934,
      "learning_rate": 0.0005979373848220367,
      "loss": 3.3415,
      "step": 8605
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8580563068389893,
      "learning_rate": 0.0005979369059452119,
      "loss": 3.1802,
      "step": 8606
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.074173927307129,
      "learning_rate": 0.0005979364270129951,
      "loss": 3.3244,
      "step": 8607
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.966193675994873,
      "learning_rate": 0.0005979359480253862,
      "loss": 3.0836,
      "step": 8608
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9213215112686157,
      "learning_rate": 0.0005979354689823853,
      "loss": 3.0093,
      "step": 8609
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.132908821105957,
      "learning_rate": 0.0005979349898839928,
      "loss": 3.1937,
      "step": 8610
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9532774686813354,
      "learning_rate": 0.0005979345107302084,
      "loss": 3.4431,
      "step": 8611
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.926534652709961,
      "learning_rate": 0.0005979340315210324,
      "loss": 3.1198,
      "step": 8612
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.909624695777893,
      "learning_rate": 0.0005979335522564648,
      "loss": 3.2145,
      "step": 8613
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.074542284011841,
      "learning_rate": 0.0005979330729365057,
      "loss": 3.0376,
      "step": 8614
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8918135166168213,
      "learning_rate": 0.0005979325935611553,
      "loss": 2.9609,
      "step": 8615
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4879754781723022,
      "learning_rate": 0.0005979321141304135,
      "loss": 3.1436,
      "step": 8616
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.158022165298462,
      "learning_rate": 0.0005979316346442806,
      "loss": 3.0982,
      "step": 8617
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6727489233016968,
      "learning_rate": 0.0005979311551027565,
      "loss": 3.2923,
      "step": 8618
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5338433980941772,
      "learning_rate": 0.0005979306755058415,
      "loss": 3.5301,
      "step": 8619
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.792795181274414,
      "learning_rate": 0.0005979301958535354,
      "loss": 3.0792,
      "step": 8620
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.5664777755737305,
      "learning_rate": 0.0005979297161458386,
      "loss": 3.0604,
      "step": 8621
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5578726530075073,
      "learning_rate": 0.0005979292363827509,
      "loss": 3.2025,
      "step": 8622
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5636333227157593,
      "learning_rate": 0.0005979287565642727,
      "loss": 3.1064,
      "step": 8623
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.1390864849090576,
      "learning_rate": 0.0005979282766904037,
      "loss": 3.2283,
      "step": 8624
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.0050644874572754,
      "learning_rate": 0.0005979277967611445,
      "loss": 3.0436,
      "step": 8625
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6608270406723022,
      "learning_rate": 0.0005979273167764947,
      "loss": 3.1165,
      "step": 8626
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.0290632247924805,
      "learning_rate": 0.0005979268367364546,
      "loss": 3.0984,
      "step": 8627
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.4107155799865723,
      "learning_rate": 0.0005979263566410244,
      "loss": 3.2328,
      "step": 8628
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4388560056686401,
      "learning_rate": 0.0005979258764902039,
      "loss": 3.0479,
      "step": 8629
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3517215251922607,
      "learning_rate": 0.0005979253962839934,
      "loss": 3.1256,
      "step": 8630
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.102796792984009,
      "learning_rate": 0.0005979249160223929,
      "loss": 2.9408,
      "step": 8631
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5663551092147827,
      "learning_rate": 0.0005979244357054026,
      "loss": 3.093,
      "step": 8632
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8518972396850586,
      "learning_rate": 0.0005979239553330226,
      "loss": 3.4222,
      "step": 8633
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5553069114685059,
      "learning_rate": 0.0005979234749052528,
      "loss": 3.1704,
      "step": 8634
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6320708990097046,
      "learning_rate": 0.0005979229944220936,
      "loss": 3.4322,
      "step": 8635
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.420670986175537,
      "learning_rate": 0.0005979225138835446,
      "loss": 3.3077,
      "step": 8636
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.521714448928833,
      "learning_rate": 0.0005979220332896063,
      "loss": 3.1873,
      "step": 8637
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.415459394454956,
      "learning_rate": 0.0005979215526402788,
      "loss": 3.2879,
      "step": 8638
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3423798084259033,
      "learning_rate": 0.000597921071935562,
      "loss": 3.1066,
      "step": 8639
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8303556442260742,
      "learning_rate": 0.0005979205911754559,
      "loss": 3.1458,
      "step": 8640
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5075663328170776,
      "learning_rate": 0.0005979201103599608,
      "loss": 3.058,
      "step": 8641
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4924447536468506,
      "learning_rate": 0.0005979196294890768,
      "loss": 3.4141,
      "step": 8642
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6444097757339478,
      "learning_rate": 0.0005979191485628039,
      "loss": 3.3426,
      "step": 8643
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.709823727607727,
      "learning_rate": 0.0005979186675811421,
      "loss": 3.3509,
      "step": 8644
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5568469762802124,
      "learning_rate": 0.0005979181865440918,
      "loss": 3.2296,
      "step": 8645
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5185993909835815,
      "learning_rate": 0.0005979177054516527,
      "loss": 3.2576,
      "step": 8646
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6067384481430054,
      "learning_rate": 0.0005979172243038252,
      "loss": 3.0885,
      "step": 8647
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.658684253692627,
      "learning_rate": 0.0005979167431006092,
      "loss": 3.192,
      "step": 8648
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0728392601013184,
      "learning_rate": 0.0005979162618420049,
      "loss": 3.0083,
      "step": 8649
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8583227396011353,
      "learning_rate": 0.0005979157805280122,
      "loss": 3.2052,
      "step": 8650
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.092428684234619,
      "learning_rate": 0.0005979152991586314,
      "loss": 3.5629,
      "step": 8651
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.718709111213684,
      "learning_rate": 0.0005979148177338627,
      "loss": 3.1593,
      "step": 8652
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5823445320129395,
      "learning_rate": 0.0005979143362537058,
      "loss": 3.4192,
      "step": 8653
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3476430177688599,
      "learning_rate": 0.0005979138547181611,
      "loss": 3.1721,
      "step": 8654
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.781407356262207,
      "learning_rate": 0.0005979133731272286,
      "loss": 3.0738,
      "step": 8655
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7645349502563477,
      "learning_rate": 0.0005979128914809083,
      "loss": 3.0647,
      "step": 8656
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.1247222423553467,
      "learning_rate": 0.0005979124097792004,
      "loss": 2.9845,
      "step": 8657
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5422338247299194,
      "learning_rate": 0.000597911928022105,
      "loss": 3.3832,
      "step": 8658
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6652076244354248,
      "learning_rate": 0.0005979114462096222,
      "loss": 3.1675,
      "step": 8659
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.529556393623352,
      "learning_rate": 0.0005979109643417519,
      "loss": 3.2603,
      "step": 8660
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.571232795715332,
      "learning_rate": 0.0005979104824184944,
      "loss": 3.5518,
      "step": 8661
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6653265953063965,
      "learning_rate": 0.0005979100004398497,
      "loss": 3.2848,
      "step": 8662
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.763492465019226,
      "learning_rate": 0.0005979095184058179,
      "loss": 3.0084,
      "step": 8663
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.807668685913086,
      "learning_rate": 0.0005979090363163991,
      "loss": 3.3312,
      "step": 8664
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.989999771118164,
      "learning_rate": 0.0005979085541715933,
      "loss": 3.2547,
      "step": 8665
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3256175518035889,
      "learning_rate": 0.0005979080719714009,
      "loss": 3.2734,
      "step": 8666
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4367237091064453,
      "learning_rate": 0.0005979075897158216,
      "loss": 3.179,
      "step": 8667
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8248591423034668,
      "learning_rate": 0.0005979071074048558,
      "loss": 3.147,
      "step": 8668
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5064845085144043,
      "learning_rate": 0.0005979066250385033,
      "loss": 3.2938,
      "step": 8669
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3210821151733398,
      "learning_rate": 0.0005979061426167644,
      "loss": 3.3058,
      "step": 8670
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5918052196502686,
      "learning_rate": 0.000597905660139639,
      "loss": 3.1889,
      "step": 8671
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5992093086242676,
      "learning_rate": 0.0005979051776071275,
      "loss": 3.2755,
      "step": 8672
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4395694732666016,
      "learning_rate": 0.0005979046950192297,
      "loss": 3.3845,
      "step": 8673
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.2684240341186523,
      "learning_rate": 0.0005979042123759458,
      "loss": 3.1386,
      "step": 8674
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.536328673362732,
      "learning_rate": 0.0005979037296772758,
      "loss": 3.1617,
      "step": 8675
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4973032474517822,
      "learning_rate": 0.00059790324692322,
      "loss": 3.1527,
      "step": 8676
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.6385669708251953,
      "learning_rate": 0.0005979027641137784,
      "loss": 3.3248,
      "step": 8677
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.6037919521331787,
      "learning_rate": 0.000597902281248951,
      "loss": 3.2395,
      "step": 8678
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7135226726531982,
      "learning_rate": 0.0005979017983287379,
      "loss": 3.1597,
      "step": 8679
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0072035789489746,
      "learning_rate": 0.0005979013153531393,
      "loss": 3.2029,
      "step": 8680
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6139353513717651,
      "learning_rate": 0.0005979008323221552,
      "loss": 3.2891,
      "step": 8681
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.499106526374817,
      "learning_rate": 0.0005979003492357856,
      "loss": 3.4168,
      "step": 8682
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.441278100013733,
      "learning_rate": 0.0005978998660940308,
      "loss": 3.2146,
      "step": 8683
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5705633163452148,
      "learning_rate": 0.0005978993828968909,
      "loss": 3.3694,
      "step": 8684
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.757236123085022,
      "learning_rate": 0.0005978988996443657,
      "loss": 3.3534,
      "step": 8685
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.383592128753662,
      "learning_rate": 0.0005978984163364556,
      "loss": 3.2112,
      "step": 8686
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.384172797203064,
      "learning_rate": 0.0005978979329731604,
      "loss": 3.3159,
      "step": 8687
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7951152324676514,
      "learning_rate": 0.0005978974495544806,
      "loss": 3.4402,
      "step": 8688
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.6323928833007812,
      "learning_rate": 0.0005978969660804159,
      "loss": 3.2518,
      "step": 8689
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5860871076583862,
      "learning_rate": 0.0005978964825509664,
      "loss": 3.217,
      "step": 8690
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.752223253250122,
      "learning_rate": 0.0005978959989661325,
      "loss": 3.3629,
      "step": 8691
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4038301706314087,
      "learning_rate": 0.0005978955153259141,
      "loss": 2.999,
      "step": 8692
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.722798228263855,
      "learning_rate": 0.0005978950316303112,
      "loss": 3.2913,
      "step": 8693
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.598104476928711,
      "learning_rate": 0.000597894547879324,
      "loss": 3.1724,
      "step": 8694
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.821851134300232,
      "learning_rate": 0.0005978940640729526,
      "loss": 2.9764,
      "step": 8695
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5944819450378418,
      "learning_rate": 0.0005978935802111972,
      "loss": 3.2277,
      "step": 8696
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.504010796546936,
      "learning_rate": 0.0005978930962940576,
      "loss": 3.2287,
      "step": 8697
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7188177108764648,
      "learning_rate": 0.0005978926123215341,
      "loss": 3.2967,
      "step": 8698
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.582775354385376,
      "learning_rate": 0.0005978921282936267,
      "loss": 3.4984,
      "step": 8699
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.795542597770691,
      "learning_rate": 0.0005978916442103357,
      "loss": 3.3406,
      "step": 8700
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5967525243759155,
      "learning_rate": 0.0005978911600716608,
      "loss": 3.1459,
      "step": 8701
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.253578782081604,
      "learning_rate": 0.0005978906758776025,
      "loss": 3.2675,
      "step": 8702
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.477527379989624,
      "learning_rate": 0.0005978901916281606,
      "loss": 3.0923,
      "step": 8703
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.462296724319458,
      "learning_rate": 0.0005978897073233353,
      "loss": 3.3157,
      "step": 8704
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8793742656707764,
      "learning_rate": 0.0005978892229631267,
      "loss": 3.1661,
      "step": 8705
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5642379522323608,
      "learning_rate": 0.0005978887385475348,
      "loss": 3.3372,
      "step": 8706
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5503607988357544,
      "learning_rate": 0.0005978882540765599,
      "loss": 3.3666,
      "step": 8707
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.737742304801941,
      "learning_rate": 0.0005978877695502017,
      "loss": 3.2918,
      "step": 8708
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.600427269935608,
      "learning_rate": 0.0005978872849684608,
      "loss": 3.3128,
      "step": 8709
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0429046154022217,
      "learning_rate": 0.000597886800331337,
      "loss": 3.4808,
      "step": 8710
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4856981039047241,
      "learning_rate": 0.0005978863156388303,
      "loss": 3.1472,
      "step": 8711
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5553920269012451,
      "learning_rate": 0.0005978858308909409,
      "loss": 3.3922,
      "step": 8712
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6178133487701416,
      "learning_rate": 0.000597885346087669,
      "loss": 3.2288,
      "step": 8713
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.752271294593811,
      "learning_rate": 0.0005978848612290146,
      "loss": 3.2126,
      "step": 8714
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5244085788726807,
      "learning_rate": 0.0005978843763149778,
      "loss": 2.9813,
      "step": 8715
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8289101123809814,
      "learning_rate": 0.0005978838913455586,
      "loss": 3.2745,
      "step": 8716
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.069822072982788,
      "learning_rate": 0.0005978834063207571,
      "loss": 2.9998,
      "step": 8717
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5811415910720825,
      "learning_rate": 0.0005978829212405735,
      "loss": 3.5591,
      "step": 8718
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.519225597381592,
      "learning_rate": 0.0005978824361050079,
      "loss": 2.9999,
      "step": 8719
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7361290454864502,
      "learning_rate": 0.0005978819509140604,
      "loss": 3.5475,
      "step": 8720
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6560934782028198,
      "learning_rate": 0.0005978814656677308,
      "loss": 3.5144,
      "step": 8721
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.3104476928710938,
      "learning_rate": 0.0005978809803660196,
      "loss": 3.0924,
      "step": 8722
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9501707553863525,
      "learning_rate": 0.0005978804950089266,
      "loss": 3.3631,
      "step": 8723
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8962377309799194,
      "learning_rate": 0.0005978800095964521,
      "loss": 3.0993,
      "step": 8724
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9062691926956177,
      "learning_rate": 0.0005978795241285959,
      "loss": 2.9351,
      "step": 8725
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3260544538497925,
      "learning_rate": 0.0005978790386053584,
      "loss": 3.3099,
      "step": 8726
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7064977884292603,
      "learning_rate": 0.0005978785530267396,
      "loss": 3.3506,
      "step": 8727
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5443323850631714,
      "learning_rate": 0.0005978780673927395,
      "loss": 3.1885,
      "step": 8728
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6549750566482544,
      "learning_rate": 0.0005978775817033581,
      "loss": 3.0445,
      "step": 8729
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7645457983016968,
      "learning_rate": 0.0005978770959585959,
      "loss": 3.1749,
      "step": 8730
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6151976585388184,
      "learning_rate": 0.0005978766101584526,
      "loss": 3.3906,
      "step": 8731
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.4308927059173584,
      "learning_rate": 0.0005978761243029284,
      "loss": 3.0748,
      "step": 8732
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9335081577301025,
      "learning_rate": 0.0005978756383920235,
      "loss": 3.2212,
      "step": 8733
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.631779670715332,
      "learning_rate": 0.0005978751524257378,
      "loss": 3.3052,
      "step": 8734
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.513466477394104,
      "learning_rate": 0.0005978746664040715,
      "loss": 3.0303,
      "step": 8735
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6129003763198853,
      "learning_rate": 0.0005978741803270247,
      "loss": 3.1668,
      "step": 8736
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.1777896881103516,
      "learning_rate": 0.0005978736941945974,
      "loss": 3.3713,
      "step": 8737
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9803283214569092,
      "learning_rate": 0.0005978732080067898,
      "loss": 3.0433,
      "step": 8738
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.928847312927246,
      "learning_rate": 0.0005978727217636021,
      "loss": 3.2229,
      "step": 8739
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7009867429733276,
      "learning_rate": 0.000597872235465034,
      "loss": 3.0378,
      "step": 8740
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5536547899246216,
      "learning_rate": 0.0005978717491110859,
      "loss": 3.2545,
      "step": 8741
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4835536479949951,
      "learning_rate": 0.0005978712627017578,
      "loss": 3.4705,
      "step": 8742
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6340030431747437,
      "learning_rate": 0.0005978707762370499,
      "loss": 3.2435,
      "step": 8743
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5406330823898315,
      "learning_rate": 0.0005978702897169622,
      "loss": 3.3495,
      "step": 8744
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.6020121574401855,
      "learning_rate": 0.0005978698031414947,
      "loss": 2.988,
      "step": 8745
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7993860244750977,
      "learning_rate": 0.0005978693165106477,
      "loss": 3.0269,
      "step": 8746
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.983752965927124,
      "learning_rate": 0.0005978688298244211,
      "loss": 3.1348,
      "step": 8747
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.2138259410858154,
      "learning_rate": 0.0005978683430828151,
      "loss": 3.1986,
      "step": 8748
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.2956252098083496,
      "learning_rate": 0.0005978678562858297,
      "loss": 3.5238,
      "step": 8749
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.114671230316162,
      "learning_rate": 0.000597867369433465,
      "loss": 3.1933,
      "step": 8750
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.6303656101226807,
      "learning_rate": 0.0005978668825257212,
      "loss": 3.3293,
      "step": 8751
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.287850856781006,
      "learning_rate": 0.0005978663955625983,
      "loss": 3.2396,
      "step": 8752
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.5055861473083496,
      "learning_rate": 0.0005978659085440965,
      "loss": 3.2589,
      "step": 8753
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8000056743621826,
      "learning_rate": 0.0005978654214702157,
      "loss": 3.1177,
      "step": 8754
    },
    {
      "epoch": 0.11,
      "grad_norm": 4.273599624633789,
      "learning_rate": 0.0005978649343409561,
      "loss": 3.0647,
      "step": 8755
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.3823680877685547,
      "learning_rate": 0.0005978644471563178,
      "loss": 3.1776,
      "step": 8756
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.373729944229126,
      "learning_rate": 0.0005978639599163009,
      "loss": 3.004,
      "step": 8757
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5534087419509888,
      "learning_rate": 0.0005978634726209054,
      "loss": 3.138,
      "step": 8758
    },
    {
      "epoch": 0.11,
      "grad_norm": 3.045663356781006,
      "learning_rate": 0.0005978629852701315,
      "loss": 3.1773,
      "step": 8759
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.1864824295043945,
      "learning_rate": 0.0005978624978639793,
      "loss": 3.0545,
      "step": 8760
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5951403379440308,
      "learning_rate": 0.0005978620104024489,
      "loss": 3.095,
      "step": 8761
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7742226123809814,
      "learning_rate": 0.0005978615228855401,
      "loss": 3.1078,
      "step": 8762
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7887494564056396,
      "learning_rate": 0.0005978610353132534,
      "loss": 3.1192,
      "step": 8763
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3609941005706787,
      "learning_rate": 0.0005978605476855887,
      "loss": 3.3865,
      "step": 8764
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5780538320541382,
      "learning_rate": 0.0005978600600025461,
      "loss": 3.2301,
      "step": 8765
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0918726921081543,
      "learning_rate": 0.0005978595722641256,
      "loss": 2.9447,
      "step": 8766
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4674558639526367,
      "learning_rate": 0.0005978590844703274,
      "loss": 3.1255,
      "step": 8767
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3020637035369873,
      "learning_rate": 0.0005978585966211516,
      "loss": 3.2074,
      "step": 8768
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3768820762634277,
      "learning_rate": 0.0005978581087165983,
      "loss": 3.3064,
      "step": 8769
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4610852003097534,
      "learning_rate": 0.0005978576207566676,
      "loss": 3.2948,
      "step": 8770
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6997467279434204,
      "learning_rate": 0.0005978571327413594,
      "loss": 2.8856,
      "step": 8771
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4887596368789673,
      "learning_rate": 0.0005978566446706741,
      "loss": 3.2316,
      "step": 8772
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7464570999145508,
      "learning_rate": 0.0005978561565446115,
      "loss": 3.1788,
      "step": 8773
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8821351528167725,
      "learning_rate": 0.0005978556683631719,
      "loss": 3.4101,
      "step": 8774
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5413880348205566,
      "learning_rate": 0.0005978551801263551,
      "loss": 3.1328,
      "step": 8775
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4561630487442017,
      "learning_rate": 0.0005978546918341617,
      "loss": 3.1276,
      "step": 8776
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.639912724494934,
      "learning_rate": 0.0005978542034865913,
      "loss": 3.1627,
      "step": 8777
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8108187913894653,
      "learning_rate": 0.0005978537150836443,
      "loss": 3.1297,
      "step": 8778
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.598929762840271,
      "learning_rate": 0.0005978532266253207,
      "loss": 3.1609,
      "step": 8779
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5330626964569092,
      "learning_rate": 0.0005978527381116204,
      "loss": 3.4337,
      "step": 8780
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.548093557357788,
      "learning_rate": 0.0005978522495425437,
      "loss": 3.1989,
      "step": 8781
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9093358516693115,
      "learning_rate": 0.0005978517609180906,
      "loss": 3.1206,
      "step": 8782
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.481589913368225,
      "learning_rate": 0.0005978512722382614,
      "loss": 3.407,
      "step": 8783
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.502819538116455,
      "learning_rate": 0.0005978507835030559,
      "loss": 3.3926,
      "step": 8784
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.0999503135681152,
      "learning_rate": 0.0005978502947124744,
      "loss": 2.9685,
      "step": 8785
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2379506826400757,
      "learning_rate": 0.0005978498058665169,
      "loss": 3.228,
      "step": 8786
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3390718698501587,
      "learning_rate": 0.0005978493169651834,
      "loss": 3.2735,
      "step": 8787
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.96080482006073,
      "learning_rate": 0.0005978488280084742,
      "loss": 3.219,
      "step": 8788
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.343445301055908,
      "learning_rate": 0.0005978483389963892,
      "loss": 3.1376,
      "step": 8789
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.412932276725769,
      "learning_rate": 0.0005978478499289285,
      "loss": 3.0746,
      "step": 8790
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6813558340072632,
      "learning_rate": 0.0005978473608060925,
      "loss": 2.9263,
      "step": 8791
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6227905750274658,
      "learning_rate": 0.0005978468716278808,
      "loss": 3.3418,
      "step": 8792
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6904386281967163,
      "learning_rate": 0.0005978463823942939,
      "loss": 3.3561,
      "step": 8793
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.369295358657837,
      "learning_rate": 0.0005978458931053317,
      "loss": 3.0215,
      "step": 8794
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5203115940093994,
      "learning_rate": 0.0005978454037609943,
      "loss": 3.1114,
      "step": 8795
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.65101158618927,
      "learning_rate": 0.0005978449143612818,
      "loss": 3.304,
      "step": 8796
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8152931928634644,
      "learning_rate": 0.0005978444249061945,
      "loss": 3.3367,
      "step": 8797
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3295782804489136,
      "learning_rate": 0.000597843935395732,
      "loss": 3.0684,
      "step": 8798
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8771789073944092,
      "learning_rate": 0.0005978434458298948,
      "loss": 3.0652,
      "step": 8799
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.413315773010254,
      "learning_rate": 0.000597842956208683,
      "loss": 3.0267,
      "step": 8800
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4906494617462158,
      "learning_rate": 0.0005978424665320965,
      "loss": 2.9429,
      "step": 8801
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5449552536010742,
      "learning_rate": 0.0005978419768001355,
      "loss": 3.1655,
      "step": 8802
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.1785926818847656,
      "learning_rate": 0.0005978414870128,
      "loss": 3.1512,
      "step": 8803
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4208178520202637,
      "learning_rate": 0.0005978409971700902,
      "loss": 3.179,
      "step": 8804
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8576632738113403,
      "learning_rate": 0.000597840507272006,
      "loss": 3.1667,
      "step": 8805
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7999730110168457,
      "learning_rate": 0.0005978400173185477,
      "loss": 3.1428,
      "step": 8806
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.966569185256958,
      "learning_rate": 0.0005978395273097154,
      "loss": 2.8976,
      "step": 8807
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5420160293579102,
      "learning_rate": 0.0005978390372455089,
      "loss": 3.1961,
      "step": 8808
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7380081415176392,
      "learning_rate": 0.0005978385471259287,
      "loss": 3.0607,
      "step": 8809
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.5614980459213257,
      "learning_rate": 0.0005978380569509746,
      "loss": 3.1304,
      "step": 8810
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.812380075454712,
      "learning_rate": 0.0005978375667206469,
      "loss": 3.2075,
      "step": 8811
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.4140344858169556,
      "learning_rate": 0.0005978370764349454,
      "loss": 3.2566,
      "step": 8812
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.5812971591949463,
      "learning_rate": 0.0005978365860938705,
      "loss": 3.1588,
      "step": 8813
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.15340256690979,
      "learning_rate": 0.0005978360956974221,
      "loss": 3.2363,
      "step": 8814
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6570194959640503,
      "learning_rate": 0.0005978356052456005,
      "loss": 3.1978,
      "step": 8815
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.712582588195801,
      "learning_rate": 0.0005978351147384054,
      "loss": 3.4994,
      "step": 8816
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.8186044692993164,
      "learning_rate": 0.0005978346241758373,
      "loss": 3.3815,
      "step": 8817
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6853152513504028,
      "learning_rate": 0.0005978341335578961,
      "loss": 3.049,
      "step": 8818
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.9267330169677734,
      "learning_rate": 0.0005978336428845818,
      "loss": 3.165,
      "step": 8819
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.909106492996216,
      "learning_rate": 0.0005978331521558947,
      "loss": 3.0939,
      "step": 8820
    },
    {
      "epoch": 0.11,
      "grad_norm": 2.1944284439086914,
      "learning_rate": 0.0005978326613718348,
      "loss": 3.0543,
      "step": 8821
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6934804916381836,
      "learning_rate": 0.0005978321705324021,
      "loss": 3.3788,
      "step": 8822
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9369999170303345,
      "learning_rate": 0.0005978316796375968,
      "loss": 3.4189,
      "step": 8823
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.9800750017166138,
      "learning_rate": 0.000597831188687419,
      "loss": 3.2778,
      "step": 8824
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6036815643310547,
      "learning_rate": 0.0005978306976818688,
      "loss": 3.2487,
      "step": 8825
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.2778946161270142,
      "learning_rate": 0.0005978302066209462,
      "loss": 3.1946,
      "step": 8826
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6944490671157837,
      "learning_rate": 0.0005978297155046513,
      "loss": 3.0779,
      "step": 8827
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.763629674911499,
      "learning_rate": 0.0005978292243329843,
      "loss": 3.2061,
      "step": 8828
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6585426330566406,
      "learning_rate": 0.0005978287331059452,
      "loss": 3.2898,
      "step": 8829
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.7204400300979614,
      "learning_rate": 0.0005978282418235341,
      "loss": 3.3143,
      "step": 8830
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.3932397365570068,
      "learning_rate": 0.0005978277504857512,
      "loss": 3.0438,
      "step": 8831
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.6989575624465942,
      "learning_rate": 0.0005978272590925964,
      "loss": 3.1226,
      "step": 8832
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7639573812484741,
      "learning_rate": 0.0005978267676440699,
      "loss": 3.1555,
      "step": 8833
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6411551237106323,
      "learning_rate": 0.0005978262761401719,
      "loss": 3.1493,
      "step": 8834
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.727489948272705,
      "learning_rate": 0.0005978257845809022,
      "loss": 3.2709,
      "step": 8835
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.2378926277160645,
      "learning_rate": 0.0005978252929662612,
      "loss": 3.0594,
      "step": 8836
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4619128704071045,
      "learning_rate": 0.0005978248012962488,
      "loss": 3.0784,
      "step": 8837
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.229337453842163,
      "learning_rate": 0.0005978243095708652,
      "loss": 3.1062,
      "step": 8838
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6044155359268188,
      "learning_rate": 0.0005978238177901104,
      "loss": 3.0649,
      "step": 8839
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6942945718765259,
      "learning_rate": 0.0005978233259539844,
      "loss": 2.9417,
      "step": 8840
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.568068027496338,
      "learning_rate": 0.0005978228340624875,
      "loss": 3.3741,
      "step": 8841
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6219884157180786,
      "learning_rate": 0.0005978223421156198,
      "loss": 3.4418,
      "step": 8842
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7009303569793701,
      "learning_rate": 0.0005978218501133813,
      "loss": 3.0569,
      "step": 8843
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5113352537155151,
      "learning_rate": 0.000597821358055772,
      "loss": 3.1347,
      "step": 8844
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5220929384231567,
      "learning_rate": 0.0005978208659427922,
      "loss": 3.2654,
      "step": 8845
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.780970811843872,
      "learning_rate": 0.0005978203737744418,
      "loss": 3.297,
      "step": 8846
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.559761643409729,
      "learning_rate": 0.000597819881550721,
      "loss": 3.2412,
      "step": 8847
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4270291328430176,
      "learning_rate": 0.0005978193892716299,
      "loss": 3.0422,
      "step": 8848
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5747580528259277,
      "learning_rate": 0.0005978188969371685,
      "loss": 3.4054,
      "step": 8849
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9205727577209473,
      "learning_rate": 0.0005978184045473369,
      "loss": 3.3023,
      "step": 8850
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7851448059082031,
      "learning_rate": 0.0005978179121021353,
      "loss": 3.0805,
      "step": 8851
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5577081441879272,
      "learning_rate": 0.0005978174196015637,
      "loss": 3.357,
      "step": 8852
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7555838823318481,
      "learning_rate": 0.0005978169270456223,
      "loss": 3.1299,
      "step": 8853
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2967530488967896,
      "learning_rate": 0.000597816434434311,
      "loss": 3.1277,
      "step": 8854
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.328413248062134,
      "learning_rate": 0.00059781594176763,
      "loss": 3.459,
      "step": 8855
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6204349994659424,
      "learning_rate": 0.0005978154490455795,
      "loss": 3.0659,
      "step": 8856
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3952871561050415,
      "learning_rate": 0.0005978149562681595,
      "loss": 3.0876,
      "step": 8857
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7576754093170166,
      "learning_rate": 0.00059781446343537,
      "loss": 3.275,
      "step": 8858
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4995331764221191,
      "learning_rate": 0.0005978139705472112,
      "loss": 3.0774,
      "step": 8859
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.257042407989502,
      "learning_rate": 0.0005978134776036832,
      "loss": 3.3202,
      "step": 8860
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.708120584487915,
      "learning_rate": 0.0005978129846047859,
      "loss": 3.1902,
      "step": 8861
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6346815824508667,
      "learning_rate": 0.0005978124915505197,
      "loss": 3.224,
      "step": 8862
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6355104446411133,
      "learning_rate": 0.0005978119984408844,
      "loss": 3.005,
      "step": 8863
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7742055654525757,
      "learning_rate": 0.0005978115052758803,
      "loss": 2.9787,
      "step": 8864
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6808841228485107,
      "learning_rate": 0.0005978110120555075,
      "loss": 3.1458,
      "step": 8865
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.548452377319336,
      "learning_rate": 0.0005978105187797659,
      "loss": 3.4042,
      "step": 8866
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5157647132873535,
      "learning_rate": 0.0005978100254486557,
      "loss": 3.2087,
      "step": 8867
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7939729690551758,
      "learning_rate": 0.0005978095320621771,
      "loss": 3.1981,
      "step": 8868
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0155210494995117,
      "learning_rate": 0.00059780903862033,
      "loss": 3.3405,
      "step": 8869
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.3464059829711914,
      "learning_rate": 0.0005978085451231146,
      "loss": 3.2048,
      "step": 8870
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6507328748703003,
      "learning_rate": 0.000597808051570531,
      "loss": 3.2347,
      "step": 8871
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4614613056182861,
      "learning_rate": 0.0005978075579625791,
      "loss": 3.1591,
      "step": 8872
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3126472234725952,
      "learning_rate": 0.0005978070642992594,
      "loss": 3.2873,
      "step": 8873
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2167434692382812,
      "learning_rate": 0.0005978065705805716,
      "loss": 3.2827,
      "step": 8874
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.575036883354187,
      "learning_rate": 0.0005978060768065159,
      "loss": 3.0932,
      "step": 8875
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5861726999282837,
      "learning_rate": 0.0005978055829770926,
      "loss": 3.3721,
      "step": 8876
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.857740640640259,
      "learning_rate": 0.0005978050890923015,
      "loss": 3.2545,
      "step": 8877
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6967825889587402,
      "learning_rate": 0.0005978045951521428,
      "loss": 3.1697,
      "step": 8878
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.109757661819458,
      "learning_rate": 0.0005978041011566167,
      "loss": 3.3013,
      "step": 8879
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.616899013519287,
      "learning_rate": 0.0005978036071057231,
      "loss": 3.2831,
      "step": 8880
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8630131483078003,
      "learning_rate": 0.0005978031129994622,
      "loss": 3.0808,
      "step": 8881
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.297147035598755,
      "learning_rate": 0.0005978026188378341,
      "loss": 3.2503,
      "step": 8882
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8799396753311157,
      "learning_rate": 0.0005978021246208387,
      "loss": 3.1021,
      "step": 8883
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7162104845046997,
      "learning_rate": 0.0005978016303484766,
      "loss": 3.0773,
      "step": 8884
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9474657773971558,
      "learning_rate": 0.0005978011360207473,
      "loss": 3.1995,
      "step": 8885
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7271764278411865,
      "learning_rate": 0.0005978006416376512,
      "loss": 3.0614,
      "step": 8886
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.68345308303833,
      "learning_rate": 0.0005978001471991885,
      "loss": 3.2811,
      "step": 8887
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6091423034667969,
      "learning_rate": 0.0005977996527053589,
      "loss": 3.0861,
      "step": 8888
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9757099151611328,
      "learning_rate": 0.0005977991581561628,
      "loss": 3.388,
      "step": 8889
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.0923478603363037,
      "learning_rate": 0.0005977986635516004,
      "loss": 3.0898,
      "step": 8890
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.688053011894226,
      "learning_rate": 0.0005977981688916713,
      "loss": 3.1748,
      "step": 8891
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5068210363388062,
      "learning_rate": 0.0005977976741763761,
      "loss": 3.0836,
      "step": 8892
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1115262508392334,
      "learning_rate": 0.0005977971794057146,
      "loss": 3.1598,
      "step": 8893
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7404853105545044,
      "learning_rate": 0.0005977966845796871,
      "loss": 3.0528,
      "step": 8894
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6415332555770874,
      "learning_rate": 0.0005977961896982935,
      "loss": 3.274,
      "step": 8895
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.327838182449341,
      "learning_rate": 0.0005977956947615339,
      "loss": 3.1782,
      "step": 8896
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.150538921356201,
      "learning_rate": 0.0005977951997694086,
      "loss": 2.9556,
      "step": 8897
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6156730651855469,
      "learning_rate": 0.0005977947047219174,
      "loss": 3.2937,
      "step": 8898
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.573077917098999,
      "learning_rate": 0.0005977942096190606,
      "loss": 3.0939,
      "step": 8899
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.3226699829101562,
      "learning_rate": 0.0005977937144608383,
      "loss": 2.9741,
      "step": 8900
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1025750637054443,
      "learning_rate": 0.0005977932192472505,
      "loss": 3.2785,
      "step": 8901
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5933074951171875,
      "learning_rate": 0.0005977927239782973,
      "loss": 3.1828,
      "step": 8902
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.4500296115875244,
      "learning_rate": 0.0005977922286539789,
      "loss": 3.2926,
      "step": 8903
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.3608791828155518,
      "learning_rate": 0.0005977917332742951,
      "loss": 3.2495,
      "step": 8904
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7577191591262817,
      "learning_rate": 0.0005977912378392463,
      "loss": 3.0833,
      "step": 8905
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8967379331588745,
      "learning_rate": 0.0005977907423488325,
      "loss": 3.1774,
      "step": 8906
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0972182750701904,
      "learning_rate": 0.0005977902468030538,
      "loss": 3.3738,
      "step": 8907
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.852405071258545,
      "learning_rate": 0.0005977897512019103,
      "loss": 3.2174,
      "step": 8908
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.610651731491089,
      "learning_rate": 0.0005977892555454019,
      "loss": 3.2748,
      "step": 8909
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5783225297927856,
      "learning_rate": 0.000597788759833529,
      "loss": 3.3405,
      "step": 8910
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9569531679153442,
      "learning_rate": 0.0005977882640662915,
      "loss": 3.1222,
      "step": 8911
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8757715225219727,
      "learning_rate": 0.0005977877682436896,
      "loss": 3.1316,
      "step": 8912
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8239259719848633,
      "learning_rate": 0.0005977872723657233,
      "loss": 3.3455,
      "step": 8913
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8927429914474487,
      "learning_rate": 0.0005977867764323928,
      "loss": 3.0994,
      "step": 8914
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.6528677940368652,
      "learning_rate": 0.000597786280443698,
      "loss": 3.3494,
      "step": 8915
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4418354034423828,
      "learning_rate": 0.0005977857843996391,
      "loss": 3.2005,
      "step": 8916
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.413686752319336,
      "learning_rate": 0.0005977852883002161,
      "loss": 3.0981,
      "step": 8917
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.686165452003479,
      "learning_rate": 0.0005977847921454295,
      "loss": 3.0817,
      "step": 8918
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5603164434432983,
      "learning_rate": 0.0005977842959352789,
      "loss": 3.1352,
      "step": 8919
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5491341352462769,
      "learning_rate": 0.0005977837996697646,
      "loss": 3.5571,
      "step": 8920
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9123739004135132,
      "learning_rate": 0.0005977833033488867,
      "loss": 3.4189,
      "step": 8921
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8058823347091675,
      "learning_rate": 0.0005977828069726452,
      "loss": 3.3866,
      "step": 8922
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.14796781539917,
      "learning_rate": 0.0005977823105410403,
      "loss": 3.1151,
      "step": 8923
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4426854848861694,
      "learning_rate": 0.000597781814054072,
      "loss": 3.3542,
      "step": 8924
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.337999701499939,
      "learning_rate": 0.0005977813175117405,
      "loss": 3.2823,
      "step": 8925
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5014104843139648,
      "learning_rate": 0.0005977808209140458,
      "loss": 3.1687,
      "step": 8926
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7550158500671387,
      "learning_rate": 0.0005977803242609881,
      "loss": 3.4827,
      "step": 8927
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9511353969573975,
      "learning_rate": 0.0005977798275525674,
      "loss": 3.1672,
      "step": 8928
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8736406564712524,
      "learning_rate": 0.0005977793307887836,
      "loss": 3.3686,
      "step": 8929
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.924750804901123,
      "learning_rate": 0.0005977788339696371,
      "loss": 2.9935,
      "step": 8930
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.570920705795288,
      "learning_rate": 0.000597778337095128,
      "loss": 2.9248,
      "step": 8931
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.328796863555908,
      "learning_rate": 0.0005977778401652563,
      "loss": 3.4742,
      "step": 8932
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1444926261901855,
      "learning_rate": 0.000597777343180022,
      "loss": 3.2434,
      "step": 8933
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6963273286819458,
      "learning_rate": 0.0005977768461394253,
      "loss": 3.2278,
      "step": 8934
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1409027576446533,
      "learning_rate": 0.0005977763490434663,
      "loss": 3.1135,
      "step": 8935
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.3791375160217285,
      "learning_rate": 0.000597775851892145,
      "loss": 3.5703,
      "step": 8936
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.501438856124878,
      "learning_rate": 0.0005977753546854616,
      "loss": 3.1084,
      "step": 8937
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4124016761779785,
      "learning_rate": 0.000597774857423416,
      "loss": 3.2315,
      "step": 8938
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1454079151153564,
      "learning_rate": 0.0005977743601060085,
      "loss": 3.0992,
      "step": 8939
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5337806940078735,
      "learning_rate": 0.0005977738627332392,
      "loss": 2.8348,
      "step": 8940
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7986630201339722,
      "learning_rate": 0.000597773365305108,
      "loss": 3.0037,
      "step": 8941
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7154474258422852,
      "learning_rate": 0.0005977728678216152,
      "loss": 3.0854,
      "step": 8942
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.89524507522583,
      "learning_rate": 0.0005977723702827608,
      "loss": 3.2267,
      "step": 8943
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.9345672130584717,
      "learning_rate": 0.0005977718726885449,
      "loss": 3.1818,
      "step": 8944
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6778523921966553,
      "learning_rate": 0.0005977713750389676,
      "loss": 3.2732,
      "step": 8945
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9145331382751465,
      "learning_rate": 0.000597770877334029,
      "loss": 2.9756,
      "step": 8946
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7047324180603027,
      "learning_rate": 0.0005977703795737291,
      "loss": 3.1971,
      "step": 8947
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8979299068450928,
      "learning_rate": 0.0005977698817580681,
      "loss": 3.2879,
      "step": 8948
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9958127737045288,
      "learning_rate": 0.000597769383887046,
      "loss": 2.86,
      "step": 8949
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4457240104675293,
      "learning_rate": 0.0005977688859606631,
      "loss": 3.0153,
      "step": 8950
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9662224054336548,
      "learning_rate": 0.0005977683879789192,
      "loss": 3.1467,
      "step": 8951
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4134589433670044,
      "learning_rate": 0.0005977678899418146,
      "loss": 3.3694,
      "step": 8952
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4443087577819824,
      "learning_rate": 0.0005977673918493492,
      "loss": 3.2151,
      "step": 8953
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4527570009231567,
      "learning_rate": 0.0005977668937015234,
      "loss": 3.1675,
      "step": 8954
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.393401861190796,
      "learning_rate": 0.000597766395498337,
      "loss": 3.0445,
      "step": 8955
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3302593231201172,
      "learning_rate": 0.0005977658972397903,
      "loss": 3.257,
      "step": 8956
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.76174795627594,
      "learning_rate": 0.0005977653989258832,
      "loss": 3.2133,
      "step": 8957
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.409965991973877,
      "learning_rate": 0.000597764900556616,
      "loss": 2.9715,
      "step": 8958
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.2655797004699707,
      "learning_rate": 0.0005977644021319886,
      "loss": 3.3238,
      "step": 8959
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5253956317901611,
      "learning_rate": 0.0005977639036520012,
      "loss": 3.3504,
      "step": 8960
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8738019466400146,
      "learning_rate": 0.0005977634051166538,
      "loss": 3.3524,
      "step": 8961
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6166414022445679,
      "learning_rate": 0.0005977629065259467,
      "loss": 2.9951,
      "step": 8962
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9653345346450806,
      "learning_rate": 0.0005977624078798798,
      "loss": 2.893,
      "step": 8963
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6382869482040405,
      "learning_rate": 0.0005977619091784532,
      "loss": 3.3289,
      "step": 8964
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.5033819675445557,
      "learning_rate": 0.0005977614104216671,
      "loss": 2.9687,
      "step": 8965
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9234946966171265,
      "learning_rate": 0.0005977609116095215,
      "loss": 3.1522,
      "step": 8966
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7891446352005005,
      "learning_rate": 0.0005977604127420165,
      "loss": 3.4459,
      "step": 8967
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.117619514465332,
      "learning_rate": 0.0005977599138191523,
      "loss": 3.1483,
      "step": 8968
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.440750241279602,
      "learning_rate": 0.0005977594148409289,
      "loss": 3.0808,
      "step": 8969
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0442304611206055,
      "learning_rate": 0.0005977589158073463,
      "loss": 3.169,
      "step": 8970
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6560330390930176,
      "learning_rate": 0.0005977584167184048,
      "loss": 2.9624,
      "step": 8971
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6957287788391113,
      "learning_rate": 0.0005977579175741043,
      "loss": 3.152,
      "step": 8972
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7974718809127808,
      "learning_rate": 0.000597757418374445,
      "loss": 3.2016,
      "step": 8973
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5129791498184204,
      "learning_rate": 0.0005977569191194271,
      "loss": 3.3197,
      "step": 8974
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7283412218093872,
      "learning_rate": 0.0005977564198090504,
      "loss": 3.1498,
      "step": 8975
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4830622673034668,
      "learning_rate": 0.0005977559204433153,
      "loss": 3.2532,
      "step": 8976
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.026406764984131,
      "learning_rate": 0.0005977554210222217,
      "loss": 3.3587,
      "step": 8977
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4947863817214966,
      "learning_rate": 0.0005977549215457697,
      "loss": 3.2929,
      "step": 8978
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9516483545303345,
      "learning_rate": 0.0005977544220139595,
      "loss": 3.3832,
      "step": 8979
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6488896608352661,
      "learning_rate": 0.0005977539224267911,
      "loss": 3.3063,
      "step": 8980
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7328155040740967,
      "learning_rate": 0.0005977534227842647,
      "loss": 3.1969,
      "step": 8981
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7395159006118774,
      "learning_rate": 0.0005977529230863802,
      "loss": 3.337,
      "step": 8982
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8594409227371216,
      "learning_rate": 0.0005977524233331379,
      "loss": 2.742,
      "step": 8983
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4759656190872192,
      "learning_rate": 0.0005977519235245378,
      "loss": 3.0977,
      "step": 8984
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6083332300186157,
      "learning_rate": 0.0005977514236605799,
      "loss": 3.0234,
      "step": 8985
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6581453084945679,
      "learning_rate": 0.0005977509237412644,
      "loss": 3.0712,
      "step": 8986
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6172840595245361,
      "learning_rate": 0.0005977504237665916,
      "loss": 3.1894,
      "step": 8987
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6790482997894287,
      "learning_rate": 0.000597749923736561,
      "loss": 3.0612,
      "step": 8988
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3685306310653687,
      "learning_rate": 0.0005977494236511733,
      "loss": 3.2281,
      "step": 8989
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.431623935699463,
      "learning_rate": 0.0005977489235104284,
      "loss": 3.1109,
      "step": 8990
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.827313780784607,
      "learning_rate": 0.0005977484233143264,
      "loss": 3.3171,
      "step": 8991
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6231908798217773,
      "learning_rate": 0.0005977479230628672,
      "loss": 3.3362,
      "step": 8992
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5568761825561523,
      "learning_rate": 0.0005977474227560511,
      "loss": 3.2316,
      "step": 8993
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7323789596557617,
      "learning_rate": 0.0005977469223938781,
      "loss": 2.9561,
      "step": 8994
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8682864904403687,
      "learning_rate": 0.0005977464219763484,
      "loss": 3.3573,
      "step": 8995
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3369477987289429,
      "learning_rate": 0.000597745921503462,
      "loss": 2.9661,
      "step": 8996
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6400092840194702,
      "learning_rate": 0.0005977454209752189,
      "loss": 3.0471,
      "step": 8997
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0012528896331787,
      "learning_rate": 0.0005977449203916195,
      "loss": 3.104,
      "step": 8998
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7642379999160767,
      "learning_rate": 0.0005977444197526635,
      "loss": 3.2241,
      "step": 8999
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5989035367965698,
      "learning_rate": 0.0005977439190583513,
      "loss": 3.2828,
      "step": 9000
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7670553922653198,
      "learning_rate": 0.0005977434183086828,
      "loss": 3.5385,
      "step": 9001
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6025032997131348,
      "learning_rate": 0.0005977429175036583,
      "loss": 3.1527,
      "step": 9002
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7631208896636963,
      "learning_rate": 0.0005977424166432777,
      "loss": 3.3644,
      "step": 9003
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9300966262817383,
      "learning_rate": 0.0005977419157275412,
      "loss": 3.4713,
      "step": 9004
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.677493929862976,
      "learning_rate": 0.0005977414147564489,
      "loss": 2.941,
      "step": 9005
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.538166880607605,
      "learning_rate": 0.0005977409137300008,
      "loss": 2.8576,
      "step": 9006
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4549946784973145,
      "learning_rate": 0.0005977404126481969,
      "loss": 3.1948,
      "step": 9007
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9136501550674438,
      "learning_rate": 0.0005977399115110377,
      "loss": 3.2587,
      "step": 9008
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.060443162918091,
      "learning_rate": 0.0005977394103185228,
      "loss": 3.2543,
      "step": 9009
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4803158044815063,
      "learning_rate": 0.0005977389090706526,
      "loss": 3.2692,
      "step": 9010
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2287328243255615,
      "learning_rate": 0.0005977384077674272,
      "loss": 3.2479,
      "step": 9011
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7353147268295288,
      "learning_rate": 0.0005977379064088465,
      "loss": 2.9199,
      "step": 9012
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.414039969444275,
      "learning_rate": 0.0005977374049949107,
      "loss": 3.3217,
      "step": 9013
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5154776573181152,
      "learning_rate": 0.0005977369035256201,
      "loss": 3.185,
      "step": 9014
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0428366661071777,
      "learning_rate": 0.0005977364020009744,
      "loss": 2.9898,
      "step": 9015
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7938222885131836,
      "learning_rate": 0.0005977359004209738,
      "loss": 3.5096,
      "step": 9016
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4459559917449951,
      "learning_rate": 0.0005977353987856187,
      "loss": 3.3993,
      "step": 9017
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6099642515182495,
      "learning_rate": 0.0005977348970949088,
      "loss": 3.411,
      "step": 9018
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.3145201206207275,
      "learning_rate": 0.0005977343953488445,
      "loss": 3.3636,
      "step": 9019
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5319193601608276,
      "learning_rate": 0.0005977338935474256,
      "loss": 3.0972,
      "step": 9020
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5140535831451416,
      "learning_rate": 0.0005977333916906525,
      "loss": 3.1891,
      "step": 9021
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.528295636177063,
      "learning_rate": 0.0005977328897785251,
      "loss": 3.1908,
      "step": 9022
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3289422988891602,
      "learning_rate": 0.0005977323878110434,
      "loss": 3.351,
      "step": 9023
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.746767997741699,
      "learning_rate": 0.0005977318857882078,
      "loss": 3.2682,
      "step": 9024
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.446753978729248,
      "learning_rate": 0.0005977313837100182,
      "loss": 3.2426,
      "step": 9025
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8851721286773682,
      "learning_rate": 0.0005977308815764746,
      "loss": 3.1302,
      "step": 9026
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1290411949157715,
      "learning_rate": 0.0005977303793875773,
      "loss": 3.3145,
      "step": 9027
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3071345090866089,
      "learning_rate": 0.0005977298771433264,
      "loss": 3.2014,
      "step": 9028
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9283668994903564,
      "learning_rate": 0.0005977293748437217,
      "loss": 3.0998,
      "step": 9029
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5893895626068115,
      "learning_rate": 0.0005977288724887636,
      "loss": 3.2286,
      "step": 9030
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5008362531661987,
      "learning_rate": 0.000597728370078452,
      "loss": 3.066,
      "step": 9031
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5128638744354248,
      "learning_rate": 0.0005977278676127872,
      "loss": 3.0197,
      "step": 9032
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5176323652267456,
      "learning_rate": 0.000597727365091769,
      "loss": 3.3061,
      "step": 9033
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9597101211547852,
      "learning_rate": 0.0005977268625153978,
      "loss": 3.2708,
      "step": 9034
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4502615928649902,
      "learning_rate": 0.0005977263598836735,
      "loss": 3.3738,
      "step": 9035
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6807236671447754,
      "learning_rate": 0.0005977258571965964,
      "loss": 3.3754,
      "step": 9036
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7489824295043945,
      "learning_rate": 0.0005977253544541662,
      "loss": 3.1828,
      "step": 9037
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.912484884262085,
      "learning_rate": 0.0005977248516563833,
      "loss": 3.089,
      "step": 9038
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7514783143997192,
      "learning_rate": 0.0005977243488032479,
      "loss": 3.2049,
      "step": 9039
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.762134313583374,
      "learning_rate": 0.0005977238458947598,
      "loss": 3.3068,
      "step": 9040
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8297139406204224,
      "learning_rate": 0.0005977233429309192,
      "loss": 3.2014,
      "step": 9041
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7109025716781616,
      "learning_rate": 0.0005977228399117263,
      "loss": 3.0972,
      "step": 9042
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9087023735046387,
      "learning_rate": 0.0005977223368371809,
      "loss": 3.1877,
      "step": 9043
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8115203380584717,
      "learning_rate": 0.0005977218337072835,
      "loss": 2.8674,
      "step": 9044
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.650779128074646,
      "learning_rate": 0.0005977213305220338,
      "loss": 3.2346,
      "step": 9045
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5073137283325195,
      "learning_rate": 0.0005977208272814323,
      "loss": 3.3618,
      "step": 9046
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.131093978881836,
      "learning_rate": 0.0005977203239854788,
      "loss": 3.2478,
      "step": 9047
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.681050181388855,
      "learning_rate": 0.0005977198206341735,
      "loss": 3.1982,
      "step": 9048
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0846288204193115,
      "learning_rate": 0.0005977193172275164,
      "loss": 3.3417,
      "step": 9049
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.7770543098449707,
      "learning_rate": 0.0005977188137655077,
      "loss": 3.127,
      "step": 9050
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8831815719604492,
      "learning_rate": 0.0005977183102481474,
      "loss": 3.0828,
      "step": 9051
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9983346462249756,
      "learning_rate": 0.0005977178066754356,
      "loss": 3.0441,
      "step": 9052
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.7068939208984375,
      "learning_rate": 0.0005977173030473726,
      "loss": 3.03,
      "step": 9053
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6253504753112793,
      "learning_rate": 0.0005977167993639583,
      "loss": 3.2512,
      "step": 9054
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5612984895706177,
      "learning_rate": 0.0005977162956251928,
      "loss": 3.285,
      "step": 9055
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8260973691940308,
      "learning_rate": 0.0005977157918310762,
      "loss": 3.3008,
      "step": 9056
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8830668926239014,
      "learning_rate": 0.0005977152879816086,
      "loss": 3.1613,
      "step": 9057
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.550066590309143,
      "learning_rate": 0.0005977147840767901,
      "loss": 3.2337,
      "step": 9058
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1431565284729004,
      "learning_rate": 0.0005977142801166208,
      "loss": 3.44,
      "step": 9059
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5030914545059204,
      "learning_rate": 0.0005977137761011009,
      "loss": 3.1684,
      "step": 9060
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5137556791305542,
      "learning_rate": 0.0005977132720302302,
      "loss": 3.0722,
      "step": 9061
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5154762268066406,
      "learning_rate": 0.0005977127679040092,
      "loss": 3.1483,
      "step": 9062
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.489620327949524,
      "learning_rate": 0.0005977122637224377,
      "loss": 3.0371,
      "step": 9063
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.264540672302246,
      "learning_rate": 0.0005977117594855157,
      "loss": 3.0559,
      "step": 9064
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5805695056915283,
      "learning_rate": 0.0005977112551932436,
      "loss": 3.3987,
      "step": 9065
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4378663301467896,
      "learning_rate": 0.0005977107508456215,
      "loss": 3.0895,
      "step": 9066
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5183221101760864,
      "learning_rate": 0.0005977102464426492,
      "loss": 3.1526,
      "step": 9067
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3179384469985962,
      "learning_rate": 0.0005977097419843269,
      "loss": 3.2929,
      "step": 9068
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3582557439804077,
      "learning_rate": 0.0005977092374706548,
      "loss": 3.3959,
      "step": 9069
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.490556240081787,
      "learning_rate": 0.000597708732901633,
      "loss": 3.2489,
      "step": 9070
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.002248764038086,
      "learning_rate": 0.0005977082282772613,
      "loss": 3.1986,
      "step": 9071
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5863431692123413,
      "learning_rate": 0.0005977077235975402,
      "loss": 3.1786,
      "step": 9072
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5030392408370972,
      "learning_rate": 0.0005977072188624697,
      "loss": 3.2555,
      "step": 9073
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.667227029800415,
      "learning_rate": 0.0005977067140720497,
      "loss": 3.4061,
      "step": 9074
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.501500129699707,
      "learning_rate": 0.0005977062092262804,
      "loss": 3.359,
      "step": 9075
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3924612998962402,
      "learning_rate": 0.0005977057043251619,
      "loss": 3.3603,
      "step": 9076
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6782656908035278,
      "learning_rate": 0.0005977051993686943,
      "loss": 3.0647,
      "step": 9077
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7998970746994019,
      "learning_rate": 0.0005977046943568777,
      "loss": 3.0411,
      "step": 9078
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5063053369522095,
      "learning_rate": 0.0005977041892897122,
      "loss": 3.0783,
      "step": 9079
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4592783451080322,
      "learning_rate": 0.0005977036841671978,
      "loss": 3.3684,
      "step": 9080
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3679440021514893,
      "learning_rate": 0.0005977031789893346,
      "loss": 3.4326,
      "step": 9081
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.3855738639831543,
      "learning_rate": 0.0005977026737561228,
      "loss": 2.7679,
      "step": 9082
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6785482168197632,
      "learning_rate": 0.0005977021684675626,
      "loss": 3.0251,
      "step": 9083
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.198776960372925,
      "learning_rate": 0.0005977016631236538,
      "loss": 2.8475,
      "step": 9084
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8753490447998047,
      "learning_rate": 0.0005977011577243967,
      "loss": 3.2968,
      "step": 9085
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5050055980682373,
      "learning_rate": 0.0005977006522697913,
      "loss": 3.0925,
      "step": 9086
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3043631315231323,
      "learning_rate": 0.0005977001467598378,
      "loss": 3.2615,
      "step": 9087
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7253546714782715,
      "learning_rate": 0.0005976996411945362,
      "loss": 3.2064,
      "step": 9088
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4489730596542358,
      "learning_rate": 0.0005976991355738866,
      "loss": 3.358,
      "step": 9089
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7866311073303223,
      "learning_rate": 0.0005976986298978891,
      "loss": 3.284,
      "step": 9090
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3825209140777588,
      "learning_rate": 0.0005976981241665437,
      "loss": 3.3752,
      "step": 9091
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5400817394256592,
      "learning_rate": 0.0005976976183798508,
      "loss": 3.1222,
      "step": 9092
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1746013164520264,
      "learning_rate": 0.0005976971125378101,
      "loss": 3.5198,
      "step": 9093
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.514627456665039,
      "learning_rate": 0.0005976966066404221,
      "loss": 3.3128,
      "step": 9094
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1902387142181396,
      "learning_rate": 0.0005976961006876866,
      "loss": 3.0338,
      "step": 9095
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.087761640548706,
      "learning_rate": 0.0005976955946796037,
      "loss": 2.9166,
      "step": 9096
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5599642992019653,
      "learning_rate": 0.0005976950886161736,
      "loss": 3.1077,
      "step": 9097
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6124380826950073,
      "learning_rate": 0.0005976945824973965,
      "loss": 3.0047,
      "step": 9098
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.710697889328003,
      "learning_rate": 0.0005976940763232722,
      "loss": 3.25,
      "step": 9099
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1928892135620117,
      "learning_rate": 0.000597693570093801,
      "loss": 3.123,
      "step": 9100
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.079742431640625,
      "learning_rate": 0.0005976930638089829,
      "loss": 3.2995,
      "step": 9101
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.958207130432129,
      "learning_rate": 0.0005976925574688181,
      "loss": 2.984,
      "step": 9102
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5230388641357422,
      "learning_rate": 0.0005976920510733066,
      "loss": 3.2595,
      "step": 9103
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7078702449798584,
      "learning_rate": 0.0005976915446224485,
      "loss": 3.2379,
      "step": 9104
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5871925354003906,
      "learning_rate": 0.000597691038116244,
      "loss": 3.5095,
      "step": 9105
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5307074785232544,
      "learning_rate": 0.0005976905315546931,
      "loss": 3.2874,
      "step": 9106
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3337104320526123,
      "learning_rate": 0.0005976900249377959,
      "loss": 3.1832,
      "step": 9107
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.702661395072937,
      "learning_rate": 0.0005976895182655524,
      "loss": 3.2157,
      "step": 9108
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.1618428230285645,
      "learning_rate": 0.000597689011537963,
      "loss": 3.1988,
      "step": 9109
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8029530048370361,
      "learning_rate": 0.0005976885047550275,
      "loss": 3.2649,
      "step": 9110
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7045066356658936,
      "learning_rate": 0.000597687997916746,
      "loss": 3.1893,
      "step": 9111
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.194394588470459,
      "learning_rate": 0.0005976874910231188,
      "loss": 3.0074,
      "step": 9112
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8008242845535278,
      "learning_rate": 0.0005976869840741459,
      "loss": 3.1317,
      "step": 9113
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2715561389923096,
      "learning_rate": 0.0005976864770698273,
      "loss": 3.1821,
      "step": 9114
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.8938071727752686,
      "learning_rate": 0.000597685970010163,
      "loss": 3.1165,
      "step": 9115
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6795361042022705,
      "learning_rate": 0.0005976854628951536,
      "loss": 3.4145,
      "step": 9116
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6029936075210571,
      "learning_rate": 0.0005976849557247986,
      "loss": 3.2852,
      "step": 9117
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9810168743133545,
      "learning_rate": 0.0005976844484990984,
      "loss": 3.2698,
      "step": 9118
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5665940046310425,
      "learning_rate": 0.0005976839412180532,
      "loss": 3.117,
      "step": 9119
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6032600402832031,
      "learning_rate": 0.0005976834338816628,
      "loss": 3.2518,
      "step": 9120
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5441609621047974,
      "learning_rate": 0.0005976829264899274,
      "loss": 3.4257,
      "step": 9121
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.867906332015991,
      "learning_rate": 0.0005976824190428472,
      "loss": 2.8586,
      "step": 9122
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.546113133430481,
      "learning_rate": 0.0005976819115404221,
      "loss": 3.2811,
      "step": 9123
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.2919039726257324,
      "learning_rate": 0.0005976814039826524,
      "loss": 3.0744,
      "step": 9124
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4441466331481934,
      "learning_rate": 0.0005976808963695381,
      "loss": 3.2803,
      "step": 9125
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.767648696899414,
      "learning_rate": 0.0005976803887010793,
      "loss": 3.2258,
      "step": 9126
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6572120189666748,
      "learning_rate": 0.0005976798809772761,
      "loss": 3.3606,
      "step": 9127
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1330554485321045,
      "learning_rate": 0.0005976793731981286,
      "loss": 3.2322,
      "step": 9128
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5037956237792969,
      "learning_rate": 0.0005976788653636369,
      "loss": 3.4365,
      "step": 9129
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.5101563930511475,
      "learning_rate": 0.000597678357473801,
      "loss": 3.2211,
      "step": 9130
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.775154948234558,
      "learning_rate": 0.0005976778495286212,
      "loss": 3.1786,
      "step": 9131
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7745428085327148,
      "learning_rate": 0.0005976773415280974,
      "loss": 3.1143,
      "step": 9132
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8386482000350952,
      "learning_rate": 0.0005976768334722298,
      "loss": 3.0971,
      "step": 9133
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5909473896026611,
      "learning_rate": 0.0005976763253610184,
      "loss": 3.2503,
      "step": 9134
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3110483884811401,
      "learning_rate": 0.0005976758171944633,
      "loss": 3.2369,
      "step": 9135
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9870615005493164,
      "learning_rate": 0.0005976753089725647,
      "loss": 3.1711,
      "step": 9136
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7189605236053467,
      "learning_rate": 0.0005976748006953227,
      "loss": 2.9294,
      "step": 9137
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.571057677268982,
      "learning_rate": 0.0005976742923627374,
      "loss": 3.1617,
      "step": 9138
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6811193227767944,
      "learning_rate": 0.0005976737839748087,
      "loss": 3.3079,
      "step": 9139
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7047514915466309,
      "learning_rate": 0.0005976732755315368,
      "loss": 2.9241,
      "step": 9140
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3721864223480225,
      "learning_rate": 0.0005976727670329218,
      "loss": 3.0804,
      "step": 9141
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3320131301879883,
      "learning_rate": 0.000597672258478964,
      "loss": 3.0922,
      "step": 9142
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0866758823394775,
      "learning_rate": 0.0005976717498696631,
      "loss": 2.8386,
      "step": 9143
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8852640390396118,
      "learning_rate": 0.0005976712412050195,
      "loss": 3.1744,
      "step": 9144
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5743813514709473,
      "learning_rate": 0.0005976707324850333,
      "loss": 3.2943,
      "step": 9145
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.281076192855835,
      "learning_rate": 0.0005976702237097043,
      "loss": 3.1367,
      "step": 9146
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.674851894378662,
      "learning_rate": 0.0005976697148790329,
      "loss": 3.4901,
      "step": 9147
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4355448484420776,
      "learning_rate": 0.0005976692059930191,
      "loss": 3.2497,
      "step": 9148
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0812478065490723,
      "learning_rate": 0.0005976686970516629,
      "loss": 3.1267,
      "step": 9149
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3579318523406982,
      "learning_rate": 0.0005976681880549645,
      "loss": 3.2856,
      "step": 9150
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.8179399967193604,
      "learning_rate": 0.000597667679002924,
      "loss": 3.3358,
      "step": 9151
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8302764892578125,
      "learning_rate": 0.0005976671698955414,
      "loss": 2.9184,
      "step": 9152
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.741868495941162,
      "learning_rate": 0.0005976666607328168,
      "loss": 3.1254,
      "step": 9153
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.045977830886841,
      "learning_rate": 0.0005976661515147505,
      "loss": 3.2203,
      "step": 9154
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5587137937545776,
      "learning_rate": 0.0005976656422413423,
      "loss": 3.3294,
      "step": 9155
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4460844993591309,
      "learning_rate": 0.0005976651329125925,
      "loss": 3.4751,
      "step": 9156
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.5890040397644043,
      "learning_rate": 0.0005976646235285012,
      "loss": 3.0663,
      "step": 9157
    },
    {
      "epoch": 0.12,
      "grad_norm": 4.654388904571533,
      "learning_rate": 0.0005976641140890684,
      "loss": 2.8894,
      "step": 9158
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0925283432006836,
      "learning_rate": 0.0005976636045942941,
      "loss": 3.2092,
      "step": 9159
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6531312465667725,
      "learning_rate": 0.0005976630950441786,
      "loss": 2.8685,
      "step": 9160
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.7319421768188477,
      "learning_rate": 0.000597662585438722,
      "loss": 3.4242,
      "step": 9161
    },
    {
      "epoch": 0.12,
      "grad_norm": 4.054512977600098,
      "learning_rate": 0.0005976620757779242,
      "loss": 3.2396,
      "step": 9162
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.409480094909668,
      "learning_rate": 0.0005976615660617854,
      "loss": 3.1908,
      "step": 9163
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.4326765537261963,
      "learning_rate": 0.0005976610562903057,
      "loss": 3.2269,
      "step": 9164
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.068525791168213,
      "learning_rate": 0.0005976605464634852,
      "loss": 3.4788,
      "step": 9165
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.265720844268799,
      "learning_rate": 0.0005976600365813241,
      "loss": 3.208,
      "step": 9166
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.481698751449585,
      "learning_rate": 0.0005976595266438223,
      "loss": 3.3176,
      "step": 9167
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.3395438194274902,
      "learning_rate": 0.0005976590166509798,
      "loss": 3.1748,
      "step": 9168
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5406159162521362,
      "learning_rate": 0.0005976585066027971,
      "loss": 3.3126,
      "step": 9169
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.525220513343811,
      "learning_rate": 0.000597657996499274,
      "loss": 3.2262,
      "step": 9170
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.2271828651428223,
      "learning_rate": 0.0005976574863404107,
      "loss": 2.9592,
      "step": 9171
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8045686483383179,
      "learning_rate": 0.0005976569761262071,
      "loss": 3.3722,
      "step": 9172
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.682005524635315,
      "learning_rate": 0.0005976564658566636,
      "loss": 3.1841,
      "step": 9173
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.450305700302124,
      "learning_rate": 0.0005976559555317801,
      "loss": 3.5349,
      "step": 9174
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4475370645523071,
      "learning_rate": 0.0005976554451515567,
      "loss": 3.2563,
      "step": 9175
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5979465246200562,
      "learning_rate": 0.0005976549347159936,
      "loss": 3.2087,
      "step": 9176
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3890429735183716,
      "learning_rate": 0.0005976544242250908,
      "loss": 3.2493,
      "step": 9177
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5843892097473145,
      "learning_rate": 0.0005976539136788485,
      "loss": 3.2132,
      "step": 9178
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8497868776321411,
      "learning_rate": 0.0005976534030772665,
      "loss": 3.1092,
      "step": 9179
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5427031517028809,
      "learning_rate": 0.0005976528924203454,
      "loss": 3.3843,
      "step": 9180
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7591549158096313,
      "learning_rate": 0.0005976523817080848,
      "loss": 3.1919,
      "step": 9181
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.579874038696289,
      "learning_rate": 0.000597651870940485,
      "loss": 3.2202,
      "step": 9182
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6633610725402832,
      "learning_rate": 0.0005976513601175462,
      "loss": 3.0305,
      "step": 9183
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6593186855316162,
      "learning_rate": 0.0005976508492392683,
      "loss": 3.2626,
      "step": 9184
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0733633041381836,
      "learning_rate": 0.0005976503383056516,
      "loss": 3.0304,
      "step": 9185
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.112705945968628,
      "learning_rate": 0.0005976498273166961,
      "loss": 2.8702,
      "step": 9186
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4441412687301636,
      "learning_rate": 0.0005976493162724017,
      "loss": 3.1451,
      "step": 9187
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8980485200881958,
      "learning_rate": 0.0005976488051727688,
      "loss": 3.3133,
      "step": 9188
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6165252923965454,
      "learning_rate": 0.0005976482940177974,
      "loss": 3.2804,
      "step": 9189
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9833133220672607,
      "learning_rate": 0.0005976477828074875,
      "loss": 3.3511,
      "step": 9190
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9471392631530762,
      "learning_rate": 0.0005976472715418392,
      "loss": 3.2109,
      "step": 9191
    },
    {
      "epoch": 0.12,
      "grad_norm": 4.361034870147705,
      "learning_rate": 0.0005976467602208528,
      "loss": 3.0736,
      "step": 9192
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8297621011734009,
      "learning_rate": 0.0005976462488445283,
      "loss": 3.3397,
      "step": 9193
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0954947471618652,
      "learning_rate": 0.0005976457374128655,
      "loss": 3.2739,
      "step": 9194
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.3786871433258057,
      "learning_rate": 0.0005976452259258649,
      "loss": 2.9226,
      "step": 9195
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4134353399276733,
      "learning_rate": 0.0005976447143835264,
      "loss": 3.1594,
      "step": 9196
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.778265953063965,
      "learning_rate": 0.0005976442027858501,
      "loss": 3.0245,
      "step": 9197
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7442845106124878,
      "learning_rate": 0.0005976436911328361,
      "loss": 3.0178,
      "step": 9198
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.38895583152771,
      "learning_rate": 0.0005976431794244846,
      "loss": 3.3372,
      "step": 9199
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.145965099334717,
      "learning_rate": 0.0005976426676607956,
      "loss": 2.9704,
      "step": 9200
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.443838357925415,
      "learning_rate": 0.0005976421558417692,
      "loss": 3.1112,
      "step": 9201
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.8574700355529785,
      "learning_rate": 0.0005976416439674054,
      "loss": 3.3718,
      "step": 9202
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9502464532852173,
      "learning_rate": 0.0005976411320377045,
      "loss": 2.9681,
      "step": 9203
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6183947324752808,
      "learning_rate": 0.0005976406200526666,
      "loss": 3.2714,
      "step": 9204
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.3030874729156494,
      "learning_rate": 0.0005976401080122916,
      "loss": 3.2867,
      "step": 9205
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.4729177951812744,
      "learning_rate": 0.0005976395959165796,
      "loss": 3.1063,
      "step": 9206
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.681713581085205,
      "learning_rate": 0.0005976390837655308,
      "loss": 3.1823,
      "step": 9207
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.168879508972168,
      "learning_rate": 0.0005976385715591454,
      "loss": 3.3796,
      "step": 9208
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.670250177383423,
      "learning_rate": 0.0005976380592974233,
      "loss": 3.114,
      "step": 9209
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5254700183868408,
      "learning_rate": 0.0005976375469803647,
      "loss": 3.2504,
      "step": 9210
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6316279172897339,
      "learning_rate": 0.0005976370346079696,
      "loss": 3.2676,
      "step": 9211
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7607523202896118,
      "learning_rate": 0.0005976365221802382,
      "loss": 2.9956,
      "step": 9212
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7692190408706665,
      "learning_rate": 0.0005976360096971705,
      "loss": 3.3146,
      "step": 9213
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3778506517410278,
      "learning_rate": 0.0005976354971587667,
      "loss": 3.0844,
      "step": 9214
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4917428493499756,
      "learning_rate": 0.0005976349845650268,
      "loss": 3.3865,
      "step": 9215
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.676419734954834,
      "learning_rate": 0.000597634471915951,
      "loss": 3.27,
      "step": 9216
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.055576801300049,
      "learning_rate": 0.0005976339592115393,
      "loss": 3.2133,
      "step": 9217
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3903285264968872,
      "learning_rate": 0.0005976334464517919,
      "loss": 3.1795,
      "step": 9218
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5228873491287231,
      "learning_rate": 0.0005976329336367086,
      "loss": 3.1801,
      "step": 9219
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.654802680015564,
      "learning_rate": 0.0005976324207662899,
      "loss": 3.3378,
      "step": 9220
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6245425939559937,
      "learning_rate": 0.0005976319078405357,
      "loss": 3.2082,
      "step": 9221
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8019382953643799,
      "learning_rate": 0.0005976313948594461,
      "loss": 3.1419,
      "step": 9222
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3204946517944336,
      "learning_rate": 0.0005976308818230211,
      "loss": 3.2198,
      "step": 9223
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.6011247634887695,
      "learning_rate": 0.0005976303687312611,
      "loss": 3.0595,
      "step": 9224
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.185448408126831,
      "learning_rate": 0.0005976298555841658,
      "loss": 3.1154,
      "step": 9225
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.457188606262207,
      "learning_rate": 0.0005976293423817356,
      "loss": 3.2428,
      "step": 9226
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.786205530166626,
      "learning_rate": 0.0005976288291239705,
      "loss": 3.4809,
      "step": 9227
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8304139375686646,
      "learning_rate": 0.0005976283158108706,
      "loss": 3.1188,
      "step": 9228
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6970411539077759,
      "learning_rate": 0.000597627802442436,
      "loss": 3.4409,
      "step": 9229
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.767377257347107,
      "learning_rate": 0.0005976272890186666,
      "loss": 3.1577,
      "step": 9230
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.602246046066284,
      "learning_rate": 0.0005976267755395628,
      "loss": 3.316,
      "step": 9231
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7982704639434814,
      "learning_rate": 0.0005976262620051245,
      "loss": 3.4075,
      "step": 9232
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6273107528686523,
      "learning_rate": 0.0005976257484153519,
      "loss": 3.0777,
      "step": 9233
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.230962038040161,
      "learning_rate": 0.0005976252347702451,
      "loss": 3.3578,
      "step": 9234
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.538459539413452,
      "learning_rate": 0.0005976247210698041,
      "loss": 3.1245,
      "step": 9235
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0179457664489746,
      "learning_rate": 0.0005976242073140291,
      "loss": 3.2611,
      "step": 9236
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6013001203536987,
      "learning_rate": 0.00059762369350292,
      "loss": 3.193,
      "step": 9237
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8140079975128174,
      "learning_rate": 0.0005976231796364772,
      "loss": 3.1585,
      "step": 9238
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.5093934535980225,
      "learning_rate": 0.0005976226657147005,
      "loss": 3.1944,
      "step": 9239
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4337230920791626,
      "learning_rate": 0.0005976221517375902,
      "loss": 3.1754,
      "step": 9240
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.212477922439575,
      "learning_rate": 0.0005976216377051464,
      "loss": 3.0573,
      "step": 9241
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8428711891174316,
      "learning_rate": 0.000597621123617369,
      "loss": 3.2164,
      "step": 9242
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4450340270996094,
      "learning_rate": 0.0005976206094742582,
      "loss": 3.1657,
      "step": 9243
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7613427639007568,
      "learning_rate": 0.0005976200952758141,
      "loss": 3.2108,
      "step": 9244
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.259973168373108,
      "learning_rate": 0.000597619581022037,
      "loss": 3.1529,
      "step": 9245
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5603500604629517,
      "learning_rate": 0.0005976190667129266,
      "loss": 2.9291,
      "step": 9246
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5497992038726807,
      "learning_rate": 0.0005976185523484833,
      "loss": 3.4001,
      "step": 9247
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6980568170547485,
      "learning_rate": 0.000597618037928707,
      "loss": 3.0365,
      "step": 9248
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.9123120307922363,
      "learning_rate": 0.000597617523453598,
      "loss": 3.3345,
      "step": 9249
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5919190645217896,
      "learning_rate": 0.0005976170089231562,
      "loss": 2.9722,
      "step": 9250
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.557213544845581,
      "learning_rate": 0.0005976164943373818,
      "loss": 3.3306,
      "step": 9251
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.792794942855835,
      "learning_rate": 0.000597615979696275,
      "loss": 3.3039,
      "step": 9252
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6664677858352661,
      "learning_rate": 0.0005976154649998356,
      "loss": 3.112,
      "step": 9253
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.89485764503479,
      "learning_rate": 0.0005976149502480638,
      "loss": 3.2302,
      "step": 9254
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7975655794143677,
      "learning_rate": 0.00059761443544096,
      "loss": 3.1469,
      "step": 9255
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6155993938446045,
      "learning_rate": 0.000597613920578524,
      "loss": 3.0291,
      "step": 9256
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.976383090019226,
      "learning_rate": 0.0005976134056607558,
      "loss": 3.2928,
      "step": 9257
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2942163944244385,
      "learning_rate": 0.0005976128906876558,
      "loss": 3.1686,
      "step": 9258
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.418078899383545,
      "learning_rate": 0.0005976123756592239,
      "loss": 3.1885,
      "step": 9259
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6272717714309692,
      "learning_rate": 0.0005976118605754602,
      "loss": 3.3013,
      "step": 9260
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3842113018035889,
      "learning_rate": 0.0005976113454363649,
      "loss": 3.1362,
      "step": 9261
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.979308843612671,
      "learning_rate": 0.000597610830241938,
      "loss": 3.321,
      "step": 9262
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.718544840812683,
      "learning_rate": 0.0005976103149921796,
      "loss": 3.349,
      "step": 9263
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.101173162460327,
      "learning_rate": 0.0005976097996870899,
      "loss": 3.3198,
      "step": 9264
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1407735347747803,
      "learning_rate": 0.0005976092843266689,
      "loss": 3.428,
      "step": 9265
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.783363938331604,
      "learning_rate": 0.0005976087689109166,
      "loss": 3.3212,
      "step": 9266
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6584627628326416,
      "learning_rate": 0.0005976082534398334,
      "loss": 3.268,
      "step": 9267
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.302515745162964,
      "learning_rate": 0.0005976077379134192,
      "loss": 3.2522,
      "step": 9268
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.00506854057312,
      "learning_rate": 0.000597607222331674,
      "loss": 3.2446,
      "step": 9269
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6231039762496948,
      "learning_rate": 0.000597606706694598,
      "loss": 3.101,
      "step": 9270
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.361638069152832,
      "learning_rate": 0.0005976061910021914,
      "loss": 3.3343,
      "step": 9271
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0042905807495117,
      "learning_rate": 0.0005976056752544541,
      "loss": 3.1376,
      "step": 9272
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8771525621414185,
      "learning_rate": 0.0005976051594513862,
      "loss": 3.2539,
      "step": 9273
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4576995372772217,
      "learning_rate": 0.000597604643592988,
      "loss": 3.1205,
      "step": 9274
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.514111042022705,
      "learning_rate": 0.0005976041276792595,
      "loss": 3.0797,
      "step": 9275
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1920292377471924,
      "learning_rate": 0.0005976036117102008,
      "loss": 2.9802,
      "step": 9276
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5472939014434814,
      "learning_rate": 0.0005976030956858117,
      "loss": 3.3257,
      "step": 9277
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.753170132637024,
      "learning_rate": 0.0005976025796060929,
      "loss": 3.2702,
      "step": 9278
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1118624210357666,
      "learning_rate": 0.000597602063471044,
      "loss": 3.4775,
      "step": 9279
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.223489284515381,
      "learning_rate": 0.0005976015472806652,
      "loss": 3.4722,
      "step": 9280
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7226552963256836,
      "learning_rate": 0.0005976010310349568,
      "loss": 3.2222,
      "step": 9281
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.386655807495117,
      "learning_rate": 0.0005976005147339188,
      "loss": 3.0738,
      "step": 9282
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.860668420791626,
      "learning_rate": 0.0005975999983775511,
      "loss": 3.2797,
      "step": 9283
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5024927854537964,
      "learning_rate": 0.0005975994819658539,
      "loss": 3.1583,
      "step": 9284
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8149206638336182,
      "learning_rate": 0.0005975989654988274,
      "loss": 3.062,
      "step": 9285
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.07336688041687,
      "learning_rate": 0.0005975984489764719,
      "loss": 3.0337,
      "step": 9286
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.639134168624878,
      "learning_rate": 0.0005975979323987869,
      "loss": 3.0017,
      "step": 9287
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3961869478225708,
      "learning_rate": 0.0005975974157657729,
      "loss": 3.2269,
      "step": 9288
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7679624557495117,
      "learning_rate": 0.0005975968990774298,
      "loss": 3.4243,
      "step": 9289
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.16149640083313,
      "learning_rate": 0.0005975963823337581,
      "loss": 3.0139,
      "step": 9290
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4613655805587769,
      "learning_rate": 0.0005975958655347575,
      "loss": 2.9179,
      "step": 9291
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5067315101623535,
      "learning_rate": 0.0005975953486804282,
      "loss": 3.0237,
      "step": 9292
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.6630923748016357,
      "learning_rate": 0.0005975948317707703,
      "loss": 3.0753,
      "step": 9293
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0004727840423584,
      "learning_rate": 0.0005975943148057839,
      "loss": 3.3238,
      "step": 9294
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0083117485046387,
      "learning_rate": 0.0005975937977854691,
      "loss": 3.2494,
      "step": 9295
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.826605796813965,
      "learning_rate": 0.0005975932807098259,
      "loss": 2.9436,
      "step": 9296
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6064099073410034,
      "learning_rate": 0.0005975927635788546,
      "loss": 3.3663,
      "step": 9297
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5509434938430786,
      "learning_rate": 0.0005975922463925552,
      "loss": 3.1432,
      "step": 9298
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4514763355255127,
      "learning_rate": 0.0005975917291509278,
      "loss": 2.9869,
      "step": 9299
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8028565645217896,
      "learning_rate": 0.0005975912118539724,
      "loss": 3.179,
      "step": 9300
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.196617364883423,
      "learning_rate": 0.0005975906945016893,
      "loss": 3.2575,
      "step": 9301
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.503991723060608,
      "learning_rate": 0.0005975901770940783,
      "loss": 3.3112,
      "step": 9302
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.110208511352539,
      "learning_rate": 0.0005975896596311397,
      "loss": 3.2107,
      "step": 9303
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.47183096408844,
      "learning_rate": 0.0005975891421128737,
      "loss": 3.1699,
      "step": 9304
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5191707611083984,
      "learning_rate": 0.0005975886245392801,
      "loss": 3.1952,
      "step": 9305
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5905953645706177,
      "learning_rate": 0.0005975881069103593,
      "loss": 3.2632,
      "step": 9306
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.46890127658844,
      "learning_rate": 0.0005975875892261112,
      "loss": 3.0905,
      "step": 9307
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1930060386657715,
      "learning_rate": 0.0005975870714865359,
      "loss": 3.1616,
      "step": 9308
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.351135492324829,
      "learning_rate": 0.0005975865536916336,
      "loss": 3.4104,
      "step": 9309
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5369418859481812,
      "learning_rate": 0.0005975860358414044,
      "loss": 2.9379,
      "step": 9310
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6660466194152832,
      "learning_rate": 0.0005975855179358482,
      "loss": 3.1708,
      "step": 9311
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7041977643966675,
      "learning_rate": 0.0005975849999749653,
      "loss": 3.2663,
      "step": 9312
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.441663146018982,
      "learning_rate": 0.0005975844819587556,
      "loss": 3.2141,
      "step": 9313
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7212207317352295,
      "learning_rate": 0.0005975839638872197,
      "loss": 3.2895,
      "step": 9314
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6067805290222168,
      "learning_rate": 0.000597583445760357,
      "loss": 3.3285,
      "step": 9315
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4044021368026733,
      "learning_rate": 0.000597582927578168,
      "loss": 3.3233,
      "step": 9316
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.4781651496887207,
      "learning_rate": 0.0005975824093406527,
      "loss": 3.2443,
      "step": 9317
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6247085332870483,
      "learning_rate": 0.0005975818910478112,
      "loss": 3.1254,
      "step": 9318
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5068434476852417,
      "learning_rate": 0.0005975813726996436,
      "loss": 3.3583,
      "step": 9319
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.681527018547058,
      "learning_rate": 0.0005975808542961501,
      "loss": 3.265,
      "step": 9320
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7313029766082764,
      "learning_rate": 0.0005975803358373307,
      "loss": 3.4099,
      "step": 9321
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5710161924362183,
      "learning_rate": 0.0005975798173231854,
      "loss": 3.1473,
      "step": 9322
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.6803572177886963,
      "learning_rate": 0.0005975792987537145,
      "loss": 2.9888,
      "step": 9323
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.2879974842071533,
      "learning_rate": 0.0005975787801289179,
      "loss": 3.2064,
      "step": 9324
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.3024070262908936,
      "learning_rate": 0.0005975782614487959,
      "loss": 2.9669,
      "step": 9325
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0389091968536377,
      "learning_rate": 0.0005975777427133484,
      "loss": 3.259,
      "step": 9326
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.9309563636779785,
      "learning_rate": 0.0005975772239225754,
      "loss": 3.0867,
      "step": 9327
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.3874921798706055,
      "learning_rate": 0.0005975767050764775,
      "loss": 2.9817,
      "step": 9328
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5394362211227417,
      "learning_rate": 0.0005975761861750543,
      "loss": 3.4177,
      "step": 9329
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9834121465682983,
      "learning_rate": 0.0005975756672183061,
      "loss": 3.1492,
      "step": 9330
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.5719351768493652,
      "learning_rate": 0.0005975751482062329,
      "loss": 3.3061,
      "step": 9331
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.389614224433899,
      "learning_rate": 0.000597574629138835,
      "loss": 2.9721,
      "step": 9332
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.026592969894409,
      "learning_rate": 0.0005975741100161122,
      "loss": 3.194,
      "step": 9333
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6148918867111206,
      "learning_rate": 0.0005975735908380649,
      "loss": 3.1798,
      "step": 9334
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.809999942779541,
      "learning_rate": 0.000597573071604693,
      "loss": 3.2565,
      "step": 9335
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4919477701187134,
      "learning_rate": 0.0005975725523159967,
      "loss": 3.1174,
      "step": 9336
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.423969030380249,
      "learning_rate": 0.0005975720329719759,
      "loss": 3.0713,
      "step": 9337
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.5076165199279785,
      "learning_rate": 0.000597571513572631,
      "loss": 3.2742,
      "step": 9338
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9580931663513184,
      "learning_rate": 0.000597570994117962,
      "loss": 3.2966,
      "step": 9339
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.711614727973938,
      "learning_rate": 0.0005975704746079688,
      "loss": 3.0796,
      "step": 9340
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.644829511642456,
      "learning_rate": 0.0005975699550426516,
      "loss": 3.1895,
      "step": 9341
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6052899360656738,
      "learning_rate": 0.0005975694354220106,
      "loss": 3.0972,
      "step": 9342
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.497233510017395,
      "learning_rate": 0.0005975689157460458,
      "loss": 3.2751,
      "step": 9343
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5768529176712036,
      "learning_rate": 0.0005975683960147573,
      "loss": 3.2756,
      "step": 9344
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5027083158493042,
      "learning_rate": 0.0005975678762281452,
      "loss": 3.1868,
      "step": 9345
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5686297416687012,
      "learning_rate": 0.0005975673563862096,
      "loss": 3.1238,
      "step": 9346
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6895582675933838,
      "learning_rate": 0.0005975668364889508,
      "loss": 3.1694,
      "step": 9347
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5544893741607666,
      "learning_rate": 0.0005975663165363684,
      "loss": 3.0538,
      "step": 9348
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.58282732963562,
      "learning_rate": 0.0005975657965284631,
      "loss": 3.1651,
      "step": 9349
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4902887344360352,
      "learning_rate": 0.0005975652764652346,
      "loss": 3.1118,
      "step": 9350
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.5659542083740234,
      "learning_rate": 0.000597564756346683,
      "loss": 3.1142,
      "step": 9351
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9011008739471436,
      "learning_rate": 0.0005975642361728086,
      "loss": 3.1817,
      "step": 9352
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1465723514556885,
      "learning_rate": 0.0005975637159436113,
      "loss": 3.1075,
      "step": 9353
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.2845561504364014,
      "learning_rate": 0.0005975631956590914,
      "loss": 3.1228,
      "step": 9354
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7659814357757568,
      "learning_rate": 0.0005975626753192488,
      "loss": 3.5719,
      "step": 9355
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4104113578796387,
      "learning_rate": 0.0005975621549240838,
      "loss": 3.131,
      "step": 9356
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.849974274635315,
      "learning_rate": 0.0005975616344735963,
      "loss": 3.2494,
      "step": 9357
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.3198609352111816,
      "learning_rate": 0.0005975611139677864,
      "loss": 3.1755,
      "step": 9358
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5673562288284302,
      "learning_rate": 0.0005975605934066543,
      "loss": 3.4875,
      "step": 9359
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.266686201095581,
      "learning_rate": 0.0005975600727902002,
      "loss": 3.3941,
      "step": 9360
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.6703922748565674,
      "learning_rate": 0.000597559552118424,
      "loss": 3.0572,
      "step": 9361
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.959295630455017,
      "learning_rate": 0.0005975590313913257,
      "loss": 3.0202,
      "step": 9362
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4476945400238037,
      "learning_rate": 0.0005975585106089057,
      "loss": 3.0556,
      "step": 9363
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.175463914871216,
      "learning_rate": 0.000597557989771164,
      "loss": 3.2452,
      "step": 9364
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0309481620788574,
      "learning_rate": 0.0005975574688781007,
      "loss": 2.9948,
      "step": 9365
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7505186796188354,
      "learning_rate": 0.0005975569479297157,
      "loss": 3.2501,
      "step": 9366
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.468741536140442,
      "learning_rate": 0.0005975564269260093,
      "loss": 3.5235,
      "step": 9367
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.682943820953369,
      "learning_rate": 0.0005975559058669815,
      "loss": 3.0778,
      "step": 9368
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.905989170074463,
      "learning_rate": 0.0005975553847526326,
      "loss": 3.1225,
      "step": 9369
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6242164373397827,
      "learning_rate": 0.0005975548635829623,
      "loss": 3.2063,
      "step": 9370
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.07708740234375,
      "learning_rate": 0.0005975543423579711,
      "loss": 3.3528,
      "step": 9371
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.504987955093384,
      "learning_rate": 0.0005975538210776589,
      "loss": 3.1433,
      "step": 9372
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1556012630462646,
      "learning_rate": 0.0005975532997420258,
      "loss": 3.0871,
      "step": 9373
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8070769309997559,
      "learning_rate": 0.000597552778351072,
      "loss": 3.5873,
      "step": 9374
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8139454126358032,
      "learning_rate": 0.0005975522569047975,
      "loss": 3.2051,
      "step": 9375
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6006760597229004,
      "learning_rate": 0.0005975517354032024,
      "loss": 2.8899,
      "step": 9376
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4460121393203735,
      "learning_rate": 0.0005975512138462867,
      "loss": 3.1716,
      "step": 9377
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.724416971206665,
      "learning_rate": 0.0005975506922340508,
      "loss": 3.3137,
      "step": 9378
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9823153018951416,
      "learning_rate": 0.0005975501705664944,
      "loss": 3.1946,
      "step": 9379
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.713244080543518,
      "learning_rate": 0.000597549648843618,
      "loss": 3.4509,
      "step": 9380
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4732033014297485,
      "learning_rate": 0.0005975491270654214,
      "loss": 3.3167,
      "step": 9381
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.059666395187378,
      "learning_rate": 0.0005975486052319049,
      "loss": 3.2815,
      "step": 9382
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.4344215393066406,
      "learning_rate": 0.0005975480833430683,
      "loss": 3.0279,
      "step": 9383
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7321434020996094,
      "learning_rate": 0.0005975475613989121,
      "loss": 3.1365,
      "step": 9384
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.978532552719116,
      "learning_rate": 0.0005975470393994361,
      "loss": 3.3768,
      "step": 9385
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.2198703289031982,
      "learning_rate": 0.0005975465173446406,
      "loss": 3.5628,
      "step": 9386
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7927486896514893,
      "learning_rate": 0.0005975459952345254,
      "loss": 3.2391,
      "step": 9387
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.096176862716675,
      "learning_rate": 0.0005975454730690908,
      "loss": 3.2895,
      "step": 9388
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.3988871574401855,
      "learning_rate": 0.000597544950848337,
      "loss": 3.1359,
      "step": 9389
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.549839973449707,
      "learning_rate": 0.0005975444285722639,
      "loss": 2.9848,
      "step": 9390
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7053756713867188,
      "learning_rate": 0.0005975439062408718,
      "loss": 3.0894,
      "step": 9391
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.854525327682495,
      "learning_rate": 0.0005975433838541606,
      "loss": 3.2044,
      "step": 9392
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8695080280303955,
      "learning_rate": 0.0005975428614121304,
      "loss": 3.1191,
      "step": 9393
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4405386447906494,
      "learning_rate": 0.0005975423389147814,
      "loss": 3.2886,
      "step": 9394
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7840187549591064,
      "learning_rate": 0.0005975418163621137,
      "loss": 2.9978,
      "step": 9395
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.681305170059204,
      "learning_rate": 0.0005975412937541273,
      "loss": 3.1973,
      "step": 9396
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0560522079467773,
      "learning_rate": 0.0005975407710908223,
      "loss": 3.0846,
      "step": 9397
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7159250974655151,
      "learning_rate": 0.000597540248372199,
      "loss": 3.1124,
      "step": 9398
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.022372245788574,
      "learning_rate": 0.0005975397255982573,
      "loss": 3.2601,
      "step": 9399
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.3737683296203613,
      "learning_rate": 0.0005975392027689974,
      "loss": 3.2429,
      "step": 9400
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4609694480895996,
      "learning_rate": 0.0005975386798844193,
      "loss": 3.2368,
      "step": 9401
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3646641969680786,
      "learning_rate": 0.000597538156944523,
      "loss": 3.1559,
      "step": 9402
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8707187175750732,
      "learning_rate": 0.0005975376339493089,
      "loss": 3.187,
      "step": 9403
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.7400095462799072,
      "learning_rate": 0.000597537110898777,
      "loss": 2.8343,
      "step": 9404
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9079301357269287,
      "learning_rate": 0.0005975365877929271,
      "loss": 3.1704,
      "step": 9405
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7618136405944824,
      "learning_rate": 0.0005975360646317598,
      "loss": 3.3017,
      "step": 9406
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9049513339996338,
      "learning_rate": 0.0005975355414152748,
      "loss": 2.751,
      "step": 9407
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9791289567947388,
      "learning_rate": 0.0005975350181434724,
      "loss": 3.1341,
      "step": 9408
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8904194831848145,
      "learning_rate": 0.0005975344948163524,
      "loss": 3.2043,
      "step": 9409
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2922074794769287,
      "learning_rate": 0.0005975339714339153,
      "loss": 3.0589,
      "step": 9410
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.250128746032715,
      "learning_rate": 0.000597533447996161,
      "loss": 3.0082,
      "step": 9411
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3954169750213623,
      "learning_rate": 0.0005975329245030897,
      "loss": 3.0858,
      "step": 9412
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4523546695709229,
      "learning_rate": 0.0005975324009547013,
      "loss": 3.1358,
      "step": 9413
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5517245531082153,
      "learning_rate": 0.000597531877350996,
      "loss": 3.0184,
      "step": 9414
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4426900148391724,
      "learning_rate": 0.0005975313536919739,
      "loss": 3.2078,
      "step": 9415
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6259361505508423,
      "learning_rate": 0.0005975308299776352,
      "loss": 3.0582,
      "step": 9416
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9596033096313477,
      "learning_rate": 0.0005975303062079797,
      "loss": 2.9198,
      "step": 9417
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.530042290687561,
      "learning_rate": 0.000597529782383008,
      "loss": 3.1267,
      "step": 9418
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.195831537246704,
      "learning_rate": 0.0005975292585027197,
      "loss": 3.1218,
      "step": 9419
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9580256938934326,
      "learning_rate": 0.0005975287345671151,
      "loss": 3.1718,
      "step": 9420
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5017101764678955,
      "learning_rate": 0.0005975282105761943,
      "loss": 3.0943,
      "step": 9421
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4512042999267578,
      "learning_rate": 0.0005975276865299576,
      "loss": 3.2894,
      "step": 9422
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8765919208526611,
      "learning_rate": 0.0005975271624284046,
      "loss": 3.3215,
      "step": 9423
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9903740882873535,
      "learning_rate": 0.0005975266382715358,
      "loss": 3.3778,
      "step": 9424
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.571171760559082,
      "learning_rate": 0.0005975261140593512,
      "loss": 3.4371,
      "step": 9425
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.07084059715271,
      "learning_rate": 0.0005975255897918508,
      "loss": 3.2025,
      "step": 9426
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6671921014785767,
      "learning_rate": 0.0005975250654690348,
      "loss": 3.1907,
      "step": 9427
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5281095504760742,
      "learning_rate": 0.0005975245410909034,
      "loss": 3.0291,
      "step": 9428
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4944539070129395,
      "learning_rate": 0.0005975240166574564,
      "loss": 3.51,
      "step": 9429
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9811204671859741,
      "learning_rate": 0.0005975234921686941,
      "loss": 3.1171,
      "step": 9430
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.683266282081604,
      "learning_rate": 0.0005975229676246167,
      "loss": 3.1528,
      "step": 9431
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.06235933303833,
      "learning_rate": 0.0005975224430252241,
      "loss": 3.2314,
      "step": 9432
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.536651611328125,
      "learning_rate": 0.0005975219183705164,
      "loss": 3.2399,
      "step": 9433
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6594513654708862,
      "learning_rate": 0.0005975213936604938,
      "loss": 3.2498,
      "step": 9434
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9460132122039795,
      "learning_rate": 0.0005975208688951563,
      "loss": 3.3037,
      "step": 9435
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6721720695495605,
      "learning_rate": 0.0005975203440745042,
      "loss": 3.3331,
      "step": 9436
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7163550853729248,
      "learning_rate": 0.0005975198191985374,
      "loss": 2.8417,
      "step": 9437
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5549414157867432,
      "learning_rate": 0.000597519294267256,
      "loss": 3.0948,
      "step": 9438
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.5818378925323486,
      "learning_rate": 0.00059751876928066,
      "loss": 2.9971,
      "step": 9439
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9928594827651978,
      "learning_rate": 0.0005975182442387499,
      "loss": 3.015,
      "step": 9440
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6773327589035034,
      "learning_rate": 0.0005975177191415255,
      "loss": 3.4527,
      "step": 9441
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1761276721954346,
      "learning_rate": 0.0005975171939889868,
      "loss": 3.2454,
      "step": 9442
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9337561130523682,
      "learning_rate": 0.0005975166687811342,
      "loss": 3.0873,
      "step": 9443
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.122337579727173,
      "learning_rate": 0.0005975161435179675,
      "loss": 2.9863,
      "step": 9444
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.473414659500122,
      "learning_rate": 0.000597515618199487,
      "loss": 3.214,
      "step": 9445
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0427346229553223,
      "learning_rate": 0.0005975150928256926,
      "loss": 3.2543,
      "step": 9446
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.8208444118499756,
      "learning_rate": 0.0005975145673965847,
      "loss": 3.078,
      "step": 9447
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.440290927886963,
      "learning_rate": 0.0005975140419121632,
      "loss": 3.3814,
      "step": 9448
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.538870096206665,
      "learning_rate": 0.0005975135163724281,
      "loss": 3.1093,
      "step": 9449
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7004376649856567,
      "learning_rate": 0.0005975129907773799,
      "loss": 2.9327,
      "step": 9450
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5949515104293823,
      "learning_rate": 0.0005975124651270181,
      "loss": 3.0792,
      "step": 9451
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6921192407608032,
      "learning_rate": 0.0005975119394213432,
      "loss": 3.251,
      "step": 9452
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7266631126403809,
      "learning_rate": 0.0005975114136603553,
      "loss": 3.1867,
      "step": 9453
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.32875657081604,
      "learning_rate": 0.0005975108878440545,
      "loss": 3.143,
      "step": 9454
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.554115891456604,
      "learning_rate": 0.0005975103619724406,
      "loss": 3.0412,
      "step": 9455
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.372844696044922,
      "learning_rate": 0.000597509836045514,
      "loss": 3.1083,
      "step": 9456
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4314113855361938,
      "learning_rate": 0.0005975093100632747,
      "loss": 3.0593,
      "step": 9457
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7759568691253662,
      "learning_rate": 0.0005975087840257229,
      "loss": 3.0618,
      "step": 9458
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.2862279415130615,
      "learning_rate": 0.0005975082579328585,
      "loss": 3.1891,
      "step": 9459
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.963423728942871,
      "learning_rate": 0.0005975077317846816,
      "loss": 3.1876,
      "step": 9460
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8758286237716675,
      "learning_rate": 0.0005975072055811926,
      "loss": 3.0407,
      "step": 9461
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.378460645675659,
      "learning_rate": 0.0005975066793223912,
      "loss": 3.3113,
      "step": 9462
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.3591091632843018,
      "learning_rate": 0.000597506153008278,
      "loss": 2.9281,
      "step": 9463
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7496771812438965,
      "learning_rate": 0.0005975056266388524,
      "loss": 3.3519,
      "step": 9464
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.3828344345092773,
      "learning_rate": 0.0005975051002141151,
      "loss": 3.2667,
      "step": 9465
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.755438208580017,
      "learning_rate": 0.000597504573734066,
      "loss": 3.3293,
      "step": 9466
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6947340965270996,
      "learning_rate": 0.000597504047198705,
      "loss": 2.9373,
      "step": 9467
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6763750314712524,
      "learning_rate": 0.0005975035206080327,
      "loss": 3.321,
      "step": 9468
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8231624364852905,
      "learning_rate": 0.0005975029939620486,
      "loss": 3.2687,
      "step": 9469
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9405003786087036,
      "learning_rate": 0.0005975024672607532,
      "loss": 3.1322,
      "step": 9470
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.814598560333252,
      "learning_rate": 0.0005975019405041464,
      "loss": 3.1489,
      "step": 9471
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.739620327949524,
      "learning_rate": 0.0005975014136922284,
      "loss": 3.1472,
      "step": 9472
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.467817783355713,
      "learning_rate": 0.0005975008868249994,
      "loss": 3.13,
      "step": 9473
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.989179253578186,
      "learning_rate": 0.0005975003599024592,
      "loss": 3.3938,
      "step": 9474
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8532687425613403,
      "learning_rate": 0.0005974998329246081,
      "loss": 3.193,
      "step": 9475
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6149227619171143,
      "learning_rate": 0.0005974993058914462,
      "loss": 2.8641,
      "step": 9476
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.340919017791748,
      "learning_rate": 0.0005974987788029736,
      "loss": 3.2777,
      "step": 9477
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.362609624862671,
      "learning_rate": 0.0005974982516591904,
      "loss": 3.3035,
      "step": 9478
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5863312482833862,
      "learning_rate": 0.0005974977244600965,
      "loss": 3.3287,
      "step": 9479
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6473240852355957,
      "learning_rate": 0.0005974971972056923,
      "loss": 3.4057,
      "step": 9480
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.749025583267212,
      "learning_rate": 0.0005974966698959777,
      "loss": 3.2167,
      "step": 9481
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.667066216468811,
      "learning_rate": 0.0005974961425309528,
      "loss": 3.1714,
      "step": 9482
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2935101985931396,
      "learning_rate": 0.0005974956151106178,
      "loss": 3.2583,
      "step": 9483
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4485948085784912,
      "learning_rate": 0.0005974950876349728,
      "loss": 3.2619,
      "step": 9484
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4862055778503418,
      "learning_rate": 0.0005974945601040177,
      "loss": 3.1736,
      "step": 9485
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7053693532943726,
      "learning_rate": 0.000597494032517753,
      "loss": 3.3347,
      "step": 9486
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5761467218399048,
      "learning_rate": 0.0005974935048761783,
      "loss": 3.2593,
      "step": 9487
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4126170873641968,
      "learning_rate": 0.000597492977179294,
      "loss": 3.2501,
      "step": 9488
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6947720050811768,
      "learning_rate": 0.0005974924494271003,
      "loss": 3.1383,
      "step": 9489
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6568320989608765,
      "learning_rate": 0.000597491921619597,
      "loss": 3.3278,
      "step": 9490
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.268676996231079,
      "learning_rate": 0.0005974913937567842,
      "loss": 3.1669,
      "step": 9491
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5998892784118652,
      "learning_rate": 0.0005974908658386624,
      "loss": 2.9871,
      "step": 9492
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.419384241104126,
      "learning_rate": 0.0005974903378652312,
      "loss": 3.1799,
      "step": 9493
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.839186906814575,
      "learning_rate": 0.000597489809836491,
      "loss": 2.9912,
      "step": 9494
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9569156169891357,
      "learning_rate": 0.000597489281752442,
      "loss": 3.0119,
      "step": 9495
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5488955974578857,
      "learning_rate": 0.0005974887536130839,
      "loss": 3.4214,
      "step": 9496
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.3860552310943604,
      "learning_rate": 0.0005974882254184172,
      "loss": 3.0662,
      "step": 9497
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.797359585762024,
      "learning_rate": 0.0005974876971684417,
      "loss": 3.2381,
      "step": 9498
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7186520099639893,
      "learning_rate": 0.0005974871688631577,
      "loss": 3.1132,
      "step": 9499
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.257051944732666,
      "learning_rate": 0.0005974866405025652,
      "loss": 3.2729,
      "step": 9500
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.6151785850524902,
      "learning_rate": 0.0005974861120866643,
      "loss": 3.1347,
      "step": 9501
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5389440059661865,
      "learning_rate": 0.0005974855836154551,
      "loss": 3.149,
      "step": 9502
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9773343801498413,
      "learning_rate": 0.0005974850550889378,
      "loss": 3.3781,
      "step": 9503
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5811415910720825,
      "learning_rate": 0.0005974845265071123,
      "loss": 3.3594,
      "step": 9504
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7187572717666626,
      "learning_rate": 0.0005974839978699789,
      "loss": 3.3555,
      "step": 9505
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5864362716674805,
      "learning_rate": 0.0005974834691775375,
      "loss": 3.36,
      "step": 9506
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8406028747558594,
      "learning_rate": 0.0005974829404297884,
      "loss": 3.282,
      "step": 9507
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.253049612045288,
      "learning_rate": 0.0005974824116267316,
      "loss": 3.3385,
      "step": 9508
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4769413471221924,
      "learning_rate": 0.0005974818827683672,
      "loss": 3.4135,
      "step": 9509
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.705963611602783,
      "learning_rate": 0.0005974813538546953,
      "loss": 3.0439,
      "step": 9510
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7764533758163452,
      "learning_rate": 0.000597480824885716,
      "loss": 3.44,
      "step": 9511
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.832198977470398,
      "learning_rate": 0.0005974802958614293,
      "loss": 3.1221,
      "step": 9512
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.797614574432373,
      "learning_rate": 0.0005974797667818356,
      "loss": 3.1234,
      "step": 9513
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7197258472442627,
      "learning_rate": 0.0005974792376469346,
      "loss": 2.9648,
      "step": 9514
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3307321071624756,
      "learning_rate": 0.0005974787084567267,
      "loss": 3.2921,
      "step": 9515
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.2344605922698975,
      "learning_rate": 0.0005974781792112119,
      "loss": 3.0517,
      "step": 9516
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8339073657989502,
      "learning_rate": 0.0005974776499103903,
      "loss": 3.1014,
      "step": 9517
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7995575666427612,
      "learning_rate": 0.0005974771205542619,
      "loss": 3.1143,
      "step": 9518
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5467665195465088,
      "learning_rate": 0.0005974765911428269,
      "loss": 3.2451,
      "step": 9519
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.739543080329895,
      "learning_rate": 0.0005974760616760855,
      "loss": 3.1832,
      "step": 9520
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5525354146957397,
      "learning_rate": 0.0005974755321540376,
      "loss": 3.4245,
      "step": 9521
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.0699784755706787,
      "learning_rate": 0.0005974750025766833,
      "loss": 2.9359,
      "step": 9522
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3820856809616089,
      "learning_rate": 0.0005974744729440229,
      "loss": 3.4033,
      "step": 9523
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7987525463104248,
      "learning_rate": 0.0005974739432560563,
      "loss": 3.1046,
      "step": 9524
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5101388692855835,
      "learning_rate": 0.0005974734135127838,
      "loss": 3.0762,
      "step": 9525
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.517785668373108,
      "learning_rate": 0.0005974728837142054,
      "loss": 3.305,
      "step": 9526
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4174985885620117,
      "learning_rate": 0.000597472353860321,
      "loss": 3.2681,
      "step": 9527
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9818363189697266,
      "learning_rate": 0.000597471823951131,
      "loss": 3.2137,
      "step": 9528
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.926033616065979,
      "learning_rate": 0.0005974712939866353,
      "loss": 3.6427,
      "step": 9529
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7786656618118286,
      "learning_rate": 0.0005974707639668341,
      "loss": 3.264,
      "step": 9530
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1264169216156006,
      "learning_rate": 0.0005974702338917275,
      "loss": 3.2608,
      "step": 9531
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.747636318206787,
      "learning_rate": 0.0005974697037613155,
      "loss": 3.1985,
      "step": 9532
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7540631294250488,
      "learning_rate": 0.0005974691735755983,
      "loss": 3.0181,
      "step": 9533
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.591404676437378,
      "learning_rate": 0.000597468643334576,
      "loss": 3.2085,
      "step": 9534
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6562368869781494,
      "learning_rate": 0.0005974681130382487,
      "loss": 3.0691,
      "step": 9535
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5031895637512207,
      "learning_rate": 0.0005974675826866164,
      "loss": 3.0583,
      "step": 9536
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6900749206542969,
      "learning_rate": 0.0005974670522796792,
      "loss": 3.223,
      "step": 9537
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5522496700286865,
      "learning_rate": 0.0005974665218174374,
      "loss": 3.0503,
      "step": 9538
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6988564729690552,
      "learning_rate": 0.0005974659912998908,
      "loss": 3.0857,
      "step": 9539
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9078700542449951,
      "learning_rate": 0.0005974654607270398,
      "loss": 3.1759,
      "step": 9540
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8640975952148438,
      "learning_rate": 0.0005974649300988843,
      "loss": 3.1915,
      "step": 9541
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.3000924587249756,
      "learning_rate": 0.0005974643994154245,
      "loss": 3.1241,
      "step": 9542
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.946211338043213,
      "learning_rate": 0.0005974638686766603,
      "loss": 3.2107,
      "step": 9543
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5194603204727173,
      "learning_rate": 0.0005974633378825921,
      "loss": 3.1377,
      "step": 9544
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.863854169845581,
      "learning_rate": 0.0005974628070332199,
      "loss": 3.2069,
      "step": 9545
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8641420602798462,
      "learning_rate": 0.0005974622761285435,
      "loss": 3.4125,
      "step": 9546
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7549569606781006,
      "learning_rate": 0.0005974617451685634,
      "loss": 3.442,
      "step": 9547
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7530443668365479,
      "learning_rate": 0.0005974612141532796,
      "loss": 3.1979,
      "step": 9548
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.827521562576294,
      "learning_rate": 0.0005974606830826922,
      "loss": 3.2394,
      "step": 9549
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5578912496566772,
      "learning_rate": 0.000597460151956801,
      "loss": 3.2501,
      "step": 9550
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1246414184570312,
      "learning_rate": 0.0005974596207756064,
      "loss": 3.097,
      "step": 9551
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3799638748168945,
      "learning_rate": 0.0005974590895391086,
      "loss": 3.3235,
      "step": 9552
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1902120113372803,
      "learning_rate": 0.0005974585582473076,
      "loss": 3.1308,
      "step": 9553
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9482735395431519,
      "learning_rate": 0.0005974580269002032,
      "loss": 3.1046,
      "step": 9554
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4927151203155518,
      "learning_rate": 0.0005974574954977958,
      "loss": 3.0384,
      "step": 9555
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7917181253433228,
      "learning_rate": 0.0005974569640400855,
      "loss": 3.527,
      "step": 9556
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3962706327438354,
      "learning_rate": 0.0005974564325270723,
      "loss": 3.2441,
      "step": 9557
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.490592122077942,
      "learning_rate": 0.0005974559009587564,
      "loss": 3.337,
      "step": 9558
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3895409107208252,
      "learning_rate": 0.0005974553693351378,
      "loss": 3.403,
      "step": 9559
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.053157329559326,
      "learning_rate": 0.0005974548376562166,
      "loss": 3.2155,
      "step": 9560
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.001676321029663,
      "learning_rate": 0.0005974543059219929,
      "loss": 3.2001,
      "step": 9561
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4973210096359253,
      "learning_rate": 0.0005974537741324669,
      "loss": 3.0932,
      "step": 9562
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.352812647819519,
      "learning_rate": 0.0005974532422876385,
      "loss": 3.1702,
      "step": 9563
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.542456865310669,
      "learning_rate": 0.0005974527103875081,
      "loss": 3.063,
      "step": 9564
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.65387761592865,
      "learning_rate": 0.0005974521784320756,
      "loss": 2.9745,
      "step": 9565
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9114657640457153,
      "learning_rate": 0.000597451646421341,
      "loss": 3.0404,
      "step": 9566
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3486651182174683,
      "learning_rate": 0.0005974511143553046,
      "loss": 3.1503,
      "step": 9567
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7657395601272583,
      "learning_rate": 0.0005974505822339664,
      "loss": 3.1854,
      "step": 9568
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8961567878723145,
      "learning_rate": 0.0005974500500573266,
      "loss": 3.0865,
      "step": 9569
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4512608051300049,
      "learning_rate": 0.0005974495178253851,
      "loss": 3.2715,
      "step": 9570
    },
    {
      "epoch": 0.12,
      "grad_norm": 3.206252336502075,
      "learning_rate": 0.0005974489855381423,
      "loss": 3.2114,
      "step": 9571
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4802411794662476,
      "learning_rate": 0.000597448453195598,
      "loss": 3.1486,
      "step": 9572
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.589687705039978,
      "learning_rate": 0.0005974479207977524,
      "loss": 3.01,
      "step": 9573
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.5419821739196777,
      "learning_rate": 0.0005974473883446056,
      "loss": 3.2927,
      "step": 9574
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.3289098739624023,
      "learning_rate": 0.0005974468558361578,
      "loss": 3.115,
      "step": 9575
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.743726372718811,
      "learning_rate": 0.000597446323272409,
      "loss": 3.2834,
      "step": 9576
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7553951740264893,
      "learning_rate": 0.0005974457906533592,
      "loss": 3.267,
      "step": 9577
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5589557886123657,
      "learning_rate": 0.0005974452579790088,
      "loss": 3.16,
      "step": 9578
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.631943941116333,
      "learning_rate": 0.0005974447252493576,
      "loss": 2.9138,
      "step": 9579
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8985143899917603,
      "learning_rate": 0.0005974441924644058,
      "loss": 3.2397,
      "step": 9580
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5651060342788696,
      "learning_rate": 0.0005974436596241535,
      "loss": 3.1586,
      "step": 9581
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.1663596630096436,
      "learning_rate": 0.0005974431267286008,
      "loss": 3.2984,
      "step": 9582
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4674173593521118,
      "learning_rate": 0.0005974425937777479,
      "loss": 3.2453,
      "step": 9583
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.570083737373352,
      "learning_rate": 0.0005974420607715947,
      "loss": 3.4413,
      "step": 9584
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.6360082626342773,
      "learning_rate": 0.0005974415277101415,
      "loss": 3.0839,
      "step": 9585
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.5022183656692505,
      "learning_rate": 0.0005974409945933883,
      "loss": 3.2962,
      "step": 9586
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.8417057991027832,
      "learning_rate": 0.0005974404614213351,
      "loss": 3.4731,
      "step": 9587
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7441710233688354,
      "learning_rate": 0.0005974399281939822,
      "loss": 3.2029,
      "step": 9588
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4283839464187622,
      "learning_rate": 0.0005974393949113296,
      "loss": 3.09,
      "step": 9589
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6054093837738037,
      "learning_rate": 0.0005974388615733774,
      "loss": 3.1554,
      "step": 9590
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.362007975578308,
      "learning_rate": 0.0005974383281801257,
      "loss": 3.2315,
      "step": 9591
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4603523015975952,
      "learning_rate": 0.0005974377947315745,
      "loss": 3.3134,
      "step": 9592
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.9906320571899414,
      "learning_rate": 0.0005974372612277242,
      "loss": 3.1714,
      "step": 9593
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.6942732334136963,
      "learning_rate": 0.0005974367276685745,
      "loss": 3.3878,
      "step": 9594
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.651634693145752,
      "learning_rate": 0.0005974361940541258,
      "loss": 3.27,
      "step": 9595
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.707340121269226,
      "learning_rate": 0.000597435660384378,
      "loss": 3.385,
      "step": 9596
    },
    {
      "epoch": 0.12,
      "grad_norm": 2.322735071182251,
      "learning_rate": 0.0005974351266593315,
      "loss": 3.2594,
      "step": 9597
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.2693978548049927,
      "learning_rate": 0.0005974345928789859,
      "loss": 3.1455,
      "step": 9598
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.7006349563598633,
      "learning_rate": 0.0005974340590433419,
      "loss": 3.1229,
      "step": 9599
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.4500819444656372,
      "learning_rate": 0.0005974335251523991,
      "loss": 3.1099,
      "step": 9600
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5074049234390259,
      "learning_rate": 0.0005974329912061579,
      "loss": 3.2836,
      "step": 9601
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6433074474334717,
      "learning_rate": 0.0005974324572046182,
      "loss": 3.4032,
      "step": 9602
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.688876986503601,
      "learning_rate": 0.0005974319231477803,
      "loss": 3.1717,
      "step": 9603
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9592559337615967,
      "learning_rate": 0.000597431389035644,
      "loss": 3.1681,
      "step": 9604
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6286909580230713,
      "learning_rate": 0.0005974308548682097,
      "loss": 3.1698,
      "step": 9605
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6433759927749634,
      "learning_rate": 0.0005974303206454774,
      "loss": 3.0917,
      "step": 9606
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.150794744491577,
      "learning_rate": 0.0005974297863674473,
      "loss": 2.9914,
      "step": 9607
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8550376892089844,
      "learning_rate": 0.0005974292520341192,
      "loss": 3.0834,
      "step": 9608
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6053487062454224,
      "learning_rate": 0.0005974287176454935,
      "loss": 3.151,
      "step": 9609
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.0078248977661133,
      "learning_rate": 0.0005974281832015701,
      "loss": 3.0027,
      "step": 9610
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.493072509765625,
      "learning_rate": 0.0005974276487023493,
      "loss": 3.1543,
      "step": 9611
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.614916205406189,
      "learning_rate": 0.000597427114147831,
      "loss": 3.1725,
      "step": 9612
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.861098289489746,
      "learning_rate": 0.0005974265795380154,
      "loss": 3.2443,
      "step": 9613
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5403683185577393,
      "learning_rate": 0.0005974260448729026,
      "loss": 3.0551,
      "step": 9614
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.791133999824524,
      "learning_rate": 0.0005974255101524927,
      "loss": 3.3777,
      "step": 9615
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6126399040222168,
      "learning_rate": 0.0005974249753767857,
      "loss": 3.0125,
      "step": 9616
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.6702985763549805,
      "learning_rate": 0.0005974244405457819,
      "loss": 3.0692,
      "step": 9617
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.673439383506775,
      "learning_rate": 0.0005974239056594812,
      "loss": 3.1556,
      "step": 9618
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5131890773773193,
      "learning_rate": 0.0005974233707178838,
      "loss": 3.356,
      "step": 9619
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9571226835250854,
      "learning_rate": 0.0005974228357209898,
      "loss": 3.2161,
      "step": 9620
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.047928810119629,
      "learning_rate": 0.0005974223006687993,
      "loss": 3.167,
      "step": 9621
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.726336121559143,
      "learning_rate": 0.0005974217655613124,
      "loss": 3.2336,
      "step": 9622
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.207932472229004,
      "learning_rate": 0.0005974212303985291,
      "loss": 2.959,
      "step": 9623
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.0679216384887695,
      "learning_rate": 0.0005974206951804495,
      "loss": 3.1593,
      "step": 9624
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9687739610671997,
      "learning_rate": 0.000597420159907074,
      "loss": 3.2024,
      "step": 9625
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3799831867218018,
      "learning_rate": 0.0005974196245784023,
      "loss": 3.241,
      "step": 9626
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.819779396057129,
      "learning_rate": 0.0005974190891944348,
      "loss": 3.3393,
      "step": 9627
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.04341197013855,
      "learning_rate": 0.0005974185537551714,
      "loss": 3.0813,
      "step": 9628
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6459407806396484,
      "learning_rate": 0.0005974180182606123,
      "loss": 3.5456,
      "step": 9629
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7115046977996826,
      "learning_rate": 0.0005974174827107576,
      "loss": 3.2603,
      "step": 9630
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.7294716835021973,
      "learning_rate": 0.0005974169471056073,
      "loss": 3.1205,
      "step": 9631
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.68563711643219,
      "learning_rate": 0.0005974164114451616,
      "loss": 2.8835,
      "step": 9632
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5923446416854858,
      "learning_rate": 0.0005974158757294206,
      "loss": 3.3405,
      "step": 9633
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.502642869949341,
      "learning_rate": 0.0005974153399583845,
      "loss": 3.2491,
      "step": 9634
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.667550802230835,
      "learning_rate": 0.000597414804132053,
      "loss": 2.9885,
      "step": 9635
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6658037900924683,
      "learning_rate": 0.0005974142682504268,
      "loss": 3.2464,
      "step": 9636
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.760530948638916,
      "learning_rate": 0.0005974137323135054,
      "loss": 3.4853,
      "step": 9637
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9209071397781372,
      "learning_rate": 0.0005974131963212893,
      "loss": 3.2618,
      "step": 9638
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.129491090774536,
      "learning_rate": 0.0005974126602737783,
      "loss": 3.3934,
      "step": 9639
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3176724910736084,
      "learning_rate": 0.0005974121241709728,
      "loss": 3.2305,
      "step": 9640
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.212909460067749,
      "learning_rate": 0.0005974115880128729,
      "loss": 3.1094,
      "step": 9641
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4370957612991333,
      "learning_rate": 0.0005974110517994785,
      "loss": 2.9133,
      "step": 9642
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.5791962146759033,
      "learning_rate": 0.0005974105155307896,
      "loss": 3.353,
      "step": 9643
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6875605583190918,
      "learning_rate": 0.0005974099792068067,
      "loss": 3.1827,
      "step": 9644
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8252753019332886,
      "learning_rate": 0.0005974094428275296,
      "loss": 2.969,
      "step": 9645
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9721626043319702,
      "learning_rate": 0.0005974089063929584,
      "loss": 3.0802,
      "step": 9646
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5135773420333862,
      "learning_rate": 0.0005974083699030933,
      "loss": 3.0816,
      "step": 9647
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6210781335830688,
      "learning_rate": 0.0005974078333579344,
      "loss": 3.0544,
      "step": 9648
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.191329002380371,
      "learning_rate": 0.0005974072967574818,
      "loss": 2.9156,
      "step": 9649
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5266387462615967,
      "learning_rate": 0.0005974067601017354,
      "loss": 3.0655,
      "step": 9650
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.565875768661499,
      "learning_rate": 0.0005974062233906957,
      "loss": 3.1867,
      "step": 9651
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6573532819747925,
      "learning_rate": 0.0005974056866243625,
      "loss": 3.2234,
      "step": 9652
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.494255781173706,
      "learning_rate": 0.0005974051498027358,
      "loss": 3.4562,
      "step": 9653
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5286986827850342,
      "learning_rate": 0.0005974046129258161,
      "loss": 3.0508,
      "step": 9654
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7276256084442139,
      "learning_rate": 0.0005974040759936032,
      "loss": 3.1631,
      "step": 9655
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.893872857093811,
      "learning_rate": 0.0005974035390060972,
      "loss": 3.14,
      "step": 9656
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3889784812927246,
      "learning_rate": 0.0005974030019632983,
      "loss": 3.2834,
      "step": 9657
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.650899887084961,
      "learning_rate": 0.0005974024648652065,
      "loss": 3.1266,
      "step": 9658
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6923831701278687,
      "learning_rate": 0.000597401927711822,
      "loss": 3.1244,
      "step": 9659
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.4139885902404785,
      "learning_rate": 0.000597401390503145,
      "loss": 3.0636,
      "step": 9660
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7761346101760864,
      "learning_rate": 0.0005974008532391753,
      "loss": 3.2166,
      "step": 9661
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.1204986572265625,
      "learning_rate": 0.0005974003159199133,
      "loss": 3.149,
      "step": 9662
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4778847694396973,
      "learning_rate": 0.0005973997785453589,
      "loss": 3.128,
      "step": 9663
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.1735494136810303,
      "learning_rate": 0.0005973992411155123,
      "loss": 3.3707,
      "step": 9664
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7972416877746582,
      "learning_rate": 0.0005973987036303735,
      "loss": 3.1455,
      "step": 9665
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.498949408531189,
      "learning_rate": 0.0005973981660899427,
      "loss": 2.97,
      "step": 9666
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.824529767036438,
      "learning_rate": 0.0005973976284942199,
      "loss": 3.4602,
      "step": 9667
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.187932252883911,
      "learning_rate": 0.0005973970908432054,
      "loss": 3.413,
      "step": 9668
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.0444562435150146,
      "learning_rate": 0.0005973965531368992,
      "loss": 3.0058,
      "step": 9669
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4383608102798462,
      "learning_rate": 0.0005973960153753012,
      "loss": 2.8614,
      "step": 9670
    },
    {
      "epoch": 0.13,
      "grad_norm": 4.366483211517334,
      "learning_rate": 0.0005973954775584118,
      "loss": 3.2526,
      "step": 9671
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.1160426139831543,
      "learning_rate": 0.0005973949396862309,
      "loss": 3.1962,
      "step": 9672
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8531455993652344,
      "learning_rate": 0.0005973944017587586,
      "loss": 3.3068,
      "step": 9673
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.945623517036438,
      "learning_rate": 0.0005973938637759952,
      "loss": 3.3103,
      "step": 9674
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.17032527923584,
      "learning_rate": 0.0005973933257379406,
      "loss": 3.1287,
      "step": 9675
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6631296873092651,
      "learning_rate": 0.000597392787644595,
      "loss": 3.0682,
      "step": 9676
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2892142534255981,
      "learning_rate": 0.0005973922494959585,
      "loss": 2.9218,
      "step": 9677
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.770105004310608,
      "learning_rate": 0.0005973917112920311,
      "loss": 2.905,
      "step": 9678
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.9093563556671143,
      "learning_rate": 0.000597391173032813,
      "loss": 3.0758,
      "step": 9679
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4158873558044434,
      "learning_rate": 0.0005973906347183042,
      "loss": 3.416,
      "step": 9680
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3037797212600708,
      "learning_rate": 0.000597390096348505,
      "loss": 3.2028,
      "step": 9681
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4497748613357544,
      "learning_rate": 0.0005973895579234152,
      "loss": 3.2185,
      "step": 9682
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3351564407348633,
      "learning_rate": 0.0005973890194430351,
      "loss": 3.3075,
      "step": 9683
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.349003791809082,
      "learning_rate": 0.0005973884809073648,
      "loss": 3.0617,
      "step": 9684
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3740999698638916,
      "learning_rate": 0.0005973879423164044,
      "loss": 3.2115,
      "step": 9685
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3971081972122192,
      "learning_rate": 0.000597387403670154,
      "loss": 3.1857,
      "step": 9686
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3674564361572266,
      "learning_rate": 0.0005973868649686137,
      "loss": 3.0867,
      "step": 9687
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8227546215057373,
      "learning_rate": 0.0005973863262117834,
      "loss": 3.2199,
      "step": 9688
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.036810874938965,
      "learning_rate": 0.0005973857873996635,
      "loss": 2.9975,
      "step": 9689
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4809658527374268,
      "learning_rate": 0.0005973852485322539,
      "loss": 3.3391,
      "step": 9690
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.256216049194336,
      "learning_rate": 0.0005973847096095548,
      "loss": 3.268,
      "step": 9691
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.102725028991699,
      "learning_rate": 0.0005973841706315662,
      "loss": 3.1546,
      "step": 9692
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6049785614013672,
      "learning_rate": 0.0005973836315982884,
      "loss": 3.3307,
      "step": 9693
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.643897294998169,
      "learning_rate": 0.0005973830925097213,
      "loss": 3.2632,
      "step": 9694
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.2164957523345947,
      "learning_rate": 0.0005973825533658651,
      "loss": 3.1072,
      "step": 9695
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9860377311706543,
      "learning_rate": 0.0005973820141667198,
      "loss": 3.3073,
      "step": 9696
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.901402473449707,
      "learning_rate": 0.0005973814749122856,
      "loss": 3.4109,
      "step": 9697
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9914089441299438,
      "learning_rate": 0.0005973809356025625,
      "loss": 3.0776,
      "step": 9698
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6296911239624023,
      "learning_rate": 0.0005973803962375509,
      "loss": 3.156,
      "step": 9699
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4993332624435425,
      "learning_rate": 0.0005973798568172505,
      "loss": 3.2628,
      "step": 9700
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7883613109588623,
      "learning_rate": 0.0005973793173416616,
      "loss": 3.274,
      "step": 9701
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.2145190238952637,
      "learning_rate": 0.0005973787778107843,
      "loss": 3.2259,
      "step": 9702
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6136224269866943,
      "learning_rate": 0.0005973782382246186,
      "loss": 3.3365,
      "step": 9703
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8371998071670532,
      "learning_rate": 0.0005973776985831647,
      "loss": 3.3814,
      "step": 9704
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9548194408416748,
      "learning_rate": 0.0005973771588864226,
      "loss": 3.23,
      "step": 9705
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6782402992248535,
      "learning_rate": 0.0005973766191343927,
      "loss": 3.4278,
      "step": 9706
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4910478591918945,
      "learning_rate": 0.0005973760793270747,
      "loss": 3.2193,
      "step": 9707
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3387831449508667,
      "learning_rate": 0.0005973755394644689,
      "loss": 3.2443,
      "step": 9708
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.201502561569214,
      "learning_rate": 0.0005973749995465753,
      "loss": 3.3144,
      "step": 9709
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.873420000076294,
      "learning_rate": 0.0005973744595733942,
      "loss": 3.1376,
      "step": 9710
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6352958679199219,
      "learning_rate": 0.0005973739195449254,
      "loss": 3.2535,
      "step": 9711
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.4440712928771973,
      "learning_rate": 0.0005973733794611694,
      "loss": 3.0787,
      "step": 9712
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6493151187896729,
      "learning_rate": 0.000597372839322126,
      "loss": 3.2295,
      "step": 9713
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7122448682785034,
      "learning_rate": 0.0005973722991277953,
      "loss": 3.0584,
      "step": 9714
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.514350414276123,
      "learning_rate": 0.0005973717588781775,
      "loss": 3.1364,
      "step": 9715
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.1119067668914795,
      "learning_rate": 0.0005973712185732727,
      "loss": 2.9622,
      "step": 9716
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3808062076568604,
      "learning_rate": 0.000597370678213081,
      "loss": 3.1715,
      "step": 9717
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.767424464225769,
      "learning_rate": 0.0005973701377976025,
      "loss": 3.4425,
      "step": 9718
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8501012325286865,
      "learning_rate": 0.0005973695973268372,
      "loss": 3.1093,
      "step": 9719
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.215458869934082,
      "learning_rate": 0.0005973690568007853,
      "loss": 2.939,
      "step": 9720
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.0307536125183105,
      "learning_rate": 0.000597368516219447,
      "loss": 3.0824,
      "step": 9721
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5121492147445679,
      "learning_rate": 0.0005973679755828221,
      "loss": 3.3033,
      "step": 9722
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.848546028137207,
      "learning_rate": 0.000597367434890911,
      "loss": 3.3165,
      "step": 9723
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.2297003269195557,
      "learning_rate": 0.0005973668941437136,
      "loss": 3.1235,
      "step": 9724
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.1057193279266357,
      "learning_rate": 0.0005973663533412302,
      "loss": 2.9913,
      "step": 9725
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4595856666564941,
      "learning_rate": 0.0005973658124834607,
      "loss": 3.2102,
      "step": 9726
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.445258378982544,
      "learning_rate": 0.0005973652715704053,
      "loss": 3.2492,
      "step": 9727
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4046766757965088,
      "learning_rate": 0.000597364730602064,
      "loss": 3.2781,
      "step": 9728
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.314357042312622,
      "learning_rate": 0.000597364189578437,
      "loss": 3.05,
      "step": 9729
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.713329792022705,
      "learning_rate": 0.0005973636484995244,
      "loss": 3.1849,
      "step": 9730
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6053229570388794,
      "learning_rate": 0.0005973631073653263,
      "loss": 2.9127,
      "step": 9731
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4356331825256348,
      "learning_rate": 0.0005973625661758429,
      "loss": 3.1823,
      "step": 9732
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8693889379501343,
      "learning_rate": 0.0005973620249310741,
      "loss": 3.2362,
      "step": 9733
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2657402753829956,
      "learning_rate": 0.00059736148363102,
      "loss": 3.0598,
      "step": 9734
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5473710298538208,
      "learning_rate": 0.0005973609422756809,
      "loss": 3.2692,
      "step": 9735
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.745139718055725,
      "learning_rate": 0.0005973604008650568,
      "loss": 3.1216,
      "step": 9736
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6454174518585205,
      "learning_rate": 0.0005973598593991477,
      "loss": 3.3968,
      "step": 9737
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5418109893798828,
      "learning_rate": 0.0005973593178779538,
      "loss": 3.1064,
      "step": 9738
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8757143020629883,
      "learning_rate": 0.0005973587763014752,
      "loss": 3.1647,
      "step": 9739
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4688717126846313,
      "learning_rate": 0.000597358234669712,
      "loss": 3.0539,
      "step": 9740
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5520519018173218,
      "learning_rate": 0.0005973576929826643,
      "loss": 3.269,
      "step": 9741
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8622844219207764,
      "learning_rate": 0.0005973571512403322,
      "loss": 3.1449,
      "step": 9742
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6223219633102417,
      "learning_rate": 0.0005973566094427158,
      "loss": 3.1158,
      "step": 9743
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.145155191421509,
      "learning_rate": 0.0005973560675898152,
      "loss": 3.1774,
      "step": 9744
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6799235343933105,
      "learning_rate": 0.0005973555256816304,
      "loss": 3.2111,
      "step": 9745
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3247369527816772,
      "learning_rate": 0.0005973549837181616,
      "loss": 3.3252,
      "step": 9746
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.412555456161499,
      "learning_rate": 0.000597354441699409,
      "loss": 3.3982,
      "step": 9747
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.507339358329773,
      "learning_rate": 0.0005973538996253726,
      "loss": 3.2335,
      "step": 9748
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4031344652175903,
      "learning_rate": 0.0005973533574960524,
      "loss": 2.9893,
      "step": 9749
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.438307523727417,
      "learning_rate": 0.0005973528153114487,
      "loss": 2.9472,
      "step": 9750
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.584371566772461,
      "learning_rate": 0.0005973522730715615,
      "loss": 3.321,
      "step": 9751
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.5236546993255615,
      "learning_rate": 0.0005973517307763908,
      "loss": 3.1653,
      "step": 9752
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6499402523040771,
      "learning_rate": 0.0005973511884259367,
      "loss": 2.6923,
      "step": 9753
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5481716394424438,
      "learning_rate": 0.0005973506460201996,
      "loss": 3.2603,
      "step": 9754
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6225754022598267,
      "learning_rate": 0.0005973501035591794,
      "loss": 3.2561,
      "step": 9755
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.1291208267211914,
      "learning_rate": 0.0005973495610428762,
      "loss": 3.0065,
      "step": 9756
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.236203193664551,
      "learning_rate": 0.00059734901847129,
      "loss": 3.1986,
      "step": 9757
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3978004455566406,
      "learning_rate": 0.0005973484758444211,
      "loss": 3.2728,
      "step": 9758
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4070937633514404,
      "learning_rate": 0.0005973479331622695,
      "loss": 2.8745,
      "step": 9759
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7908411026000977,
      "learning_rate": 0.0005973473904248352,
      "loss": 2.946,
      "step": 9760
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3980050086975098,
      "learning_rate": 0.0005973468476321185,
      "loss": 2.8718,
      "step": 9761
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4201412200927734,
      "learning_rate": 0.0005973463047841194,
      "loss": 3.2611,
      "step": 9762
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.462066888809204,
      "learning_rate": 0.000597345761880838,
      "loss": 3.1347,
      "step": 9763
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.8039982318878174,
      "learning_rate": 0.0005973452189222744,
      "loss": 2.8774,
      "step": 9764
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4831544160842896,
      "learning_rate": 0.0005973446759084287,
      "loss": 3.0816,
      "step": 9765
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7942475080490112,
      "learning_rate": 0.0005973441328393011,
      "loss": 3.2482,
      "step": 9766
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.586289644241333,
      "learning_rate": 0.0005973435897148915,
      "loss": 3.0667,
      "step": 9767
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.678868293762207,
      "learning_rate": 0.0005973430465352001,
      "loss": 3.2567,
      "step": 9768
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3256494998931885,
      "learning_rate": 0.0005973425033002271,
      "loss": 3.2017,
      "step": 9769
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9462716579437256,
      "learning_rate": 0.0005973419600099725,
      "loss": 3.3529,
      "step": 9770
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5295640230178833,
      "learning_rate": 0.0005973414166644364,
      "loss": 3.0857,
      "step": 9771
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.50613272190094,
      "learning_rate": 0.0005973408732636189,
      "loss": 3.4078,
      "step": 9772
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6135993003845215,
      "learning_rate": 0.0005973403298075202,
      "loss": 3.3001,
      "step": 9773
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3013626337051392,
      "learning_rate": 0.0005973397862961402,
      "loss": 3.2497,
      "step": 9774
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.725277066230774,
      "learning_rate": 0.0005973392427294792,
      "loss": 3.2732,
      "step": 9775
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.0284125804901123,
      "learning_rate": 0.0005973386991075373,
      "loss": 3.0714,
      "step": 9776
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5754202604293823,
      "learning_rate": 0.0005973381554303143,
      "loss": 3.2758,
      "step": 9777
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4551458358764648,
      "learning_rate": 0.0005973376116978107,
      "loss": 2.8869,
      "step": 9778
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3735827207565308,
      "learning_rate": 0.0005973370679100263,
      "loss": 3.124,
      "step": 9779
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5072752237319946,
      "learning_rate": 0.0005973365240669616,
      "loss": 3.2725,
      "step": 9780
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9071024656295776,
      "learning_rate": 0.000597335980168616,
      "loss": 3.3323,
      "step": 9781
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6432533264160156,
      "learning_rate": 0.0005973354362149903,
      "loss": 3.0783,
      "step": 9782
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4348770380020142,
      "learning_rate": 0.0005973348922060843,
      "loss": 3.4402,
      "step": 9783
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5806214809417725,
      "learning_rate": 0.0005973343481418982,
      "loss": 3.0991,
      "step": 9784
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3632934093475342,
      "learning_rate": 0.0005973338040224319,
      "loss": 3.0931,
      "step": 9785
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.643359661102295,
      "learning_rate": 0.0005973332598476857,
      "loss": 3.1943,
      "step": 9786
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3725591897964478,
      "learning_rate": 0.0005973327156176596,
      "loss": 3.4412,
      "step": 9787
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9530854225158691,
      "learning_rate": 0.0005973321713323538,
      "loss": 3.1288,
      "step": 9788
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3022937774658203,
      "learning_rate": 0.0005973316269917683,
      "loss": 3.2456,
      "step": 9789
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.0365703105926514,
      "learning_rate": 0.0005973310825959032,
      "loss": 3.3056,
      "step": 9790
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.4283902645111084,
      "learning_rate": 0.0005973305381447587,
      "loss": 3.4499,
      "step": 9791
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.1724109649658203,
      "learning_rate": 0.0005973299936383349,
      "loss": 3.0855,
      "step": 9792
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.8957653045654297,
      "learning_rate": 0.0005973294490766316,
      "loss": 3.0747,
      "step": 9793
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5511436462402344,
      "learning_rate": 0.0005973289044596494,
      "loss": 3.3229,
      "step": 9794
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7442392110824585,
      "learning_rate": 0.000597328359787388,
      "loss": 2.9882,
      "step": 9795
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.560099720954895,
      "learning_rate": 0.0005973278150598477,
      "loss": 3.1581,
      "step": 9796
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4958711862564087,
      "learning_rate": 0.0005973272702770285,
      "loss": 3.2155,
      "step": 9797
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.946852684020996,
      "learning_rate": 0.0005973267254389305,
      "loss": 2.9689,
      "step": 9798
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4685684442520142,
      "learning_rate": 0.000597326180545554,
      "loss": 3.2814,
      "step": 9799
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5337868928909302,
      "learning_rate": 0.0005973256355968989,
      "loss": 3.1089,
      "step": 9800
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8200945854187012,
      "learning_rate": 0.0005973250905929653,
      "loss": 3.1007,
      "step": 9801
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.4594368934631348,
      "learning_rate": 0.0005973245455337534,
      "loss": 3.2908,
      "step": 9802
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7886641025543213,
      "learning_rate": 0.0005973240004192632,
      "loss": 3.2087,
      "step": 9803
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3337690830230713,
      "learning_rate": 0.0005973234552494949,
      "loss": 2.981,
      "step": 9804
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4262685775756836,
      "learning_rate": 0.0005973229100244485,
      "loss": 3.4852,
      "step": 9805
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5936243534088135,
      "learning_rate": 0.0005973223647441243,
      "loss": 2.9966,
      "step": 9806
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6103687286376953,
      "learning_rate": 0.0005973218194085222,
      "loss": 3.2492,
      "step": 9807
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6631425619125366,
      "learning_rate": 0.0005973212740176423,
      "loss": 3.2766,
      "step": 9808
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6886012554168701,
      "learning_rate": 0.0005973207285714848,
      "loss": 2.9994,
      "step": 9809
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7349637746810913,
      "learning_rate": 0.0005973201830700497,
      "loss": 3.1195,
      "step": 9810
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4961953163146973,
      "learning_rate": 0.0005973196375133373,
      "loss": 3.4035,
      "step": 9811
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5987834930419922,
      "learning_rate": 0.0005973190919013474,
      "loss": 3.2202,
      "step": 9812
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.695497751235962,
      "learning_rate": 0.0005973185462340804,
      "loss": 3.361,
      "step": 9813
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4128135442733765,
      "learning_rate": 0.0005973180005115362,
      "loss": 3.3116,
      "step": 9814
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.6935460567474365,
      "learning_rate": 0.0005973174547337151,
      "loss": 3.1253,
      "step": 9815
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.7647104263305664,
      "learning_rate": 0.000597316908900617,
      "loss": 2.9296,
      "step": 9816
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2955535650253296,
      "learning_rate": 0.000597316363012242,
      "loss": 3.0845,
      "step": 9817
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.1066324710845947,
      "learning_rate": 0.0005973158170685904,
      "loss": 3.1,
      "step": 9818
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9561736583709717,
      "learning_rate": 0.000597315271069662,
      "loss": 3.2251,
      "step": 9819
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.1232917308807373,
      "learning_rate": 0.0005973147250154572,
      "loss": 3.3465,
      "step": 9820
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.2549307346343994,
      "learning_rate": 0.000597314178905976,
      "loss": 3.3061,
      "step": 9821
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.93198823928833,
      "learning_rate": 0.0005973136327412184,
      "loss": 2.9214,
      "step": 9822
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.884753942489624,
      "learning_rate": 0.0005973130865211846,
      "loss": 3.0339,
      "step": 9823
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.357245445251465,
      "learning_rate": 0.0005973125402458747,
      "loss": 3.3673,
      "step": 9824
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4909323453903198,
      "learning_rate": 0.0005973119939152888,
      "loss": 3.3308,
      "step": 9825
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.381883978843689,
      "learning_rate": 0.000597311447529427,
      "loss": 3.0372,
      "step": 9826
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.757726788520813,
      "learning_rate": 0.0005973109010882894,
      "loss": 3.3296,
      "step": 9827
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.301474094390869,
      "learning_rate": 0.000597310354591876,
      "loss": 3.3765,
      "step": 9828
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.791121244430542,
      "learning_rate": 0.000597309808040187,
      "loss": 3.4882,
      "step": 9829
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5040946006774902,
      "learning_rate": 0.0005973092614332227,
      "loss": 3.0359,
      "step": 9830
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5348007678985596,
      "learning_rate": 0.0005973087147709828,
      "loss": 3.2385,
      "step": 9831
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.441200017929077,
      "learning_rate": 0.0005973081680534677,
      "loss": 2.9581,
      "step": 9832
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5984708070755005,
      "learning_rate": 0.0005973076212806773,
      "loss": 3.0074,
      "step": 9833
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.470487594604492,
      "learning_rate": 0.0005973070744526119,
      "loss": 3.2315,
      "step": 9834
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.1106698513031006,
      "learning_rate": 0.0005973065275692714,
      "loss": 3.2717,
      "step": 9835
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.606444239616394,
      "learning_rate": 0.000597305980630656,
      "loss": 3.3679,
      "step": 9836
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5039854049682617,
      "learning_rate": 0.0005973054336367659,
      "loss": 3.2832,
      "step": 9837
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4508323669433594,
      "learning_rate": 0.0005973048865876011,
      "loss": 3.1151,
      "step": 9838
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3707606792449951,
      "learning_rate": 0.0005973043394831617,
      "loss": 3.3096,
      "step": 9839
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4191360473632812,
      "learning_rate": 0.0005973037923234478,
      "loss": 3.1222,
      "step": 9840
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4155125617980957,
      "learning_rate": 0.0005973032451084594,
      "loss": 3.354,
      "step": 9841
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4648687839508057,
      "learning_rate": 0.0005973026978381969,
      "loss": 3.142,
      "step": 9842
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7990269660949707,
      "learning_rate": 0.0005973021505126601,
      "loss": 3.0323,
      "step": 9843
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5636767148971558,
      "learning_rate": 0.0005973016031318492,
      "loss": 3.3316,
      "step": 9844
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9656257629394531,
      "learning_rate": 0.0005973010556957645,
      "loss": 3.3246,
      "step": 9845
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4681679010391235,
      "learning_rate": 0.0005973005082044057,
      "loss": 3.1249,
      "step": 9846
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7598974704742432,
      "learning_rate": 0.0005972999606577733,
      "loss": 3.2682,
      "step": 9847
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.515120506286621,
      "learning_rate": 0.0005972994130558671,
      "loss": 3.1412,
      "step": 9848
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5686918497085571,
      "learning_rate": 0.0005972988653986873,
      "loss": 3.2286,
      "step": 9849
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4106749296188354,
      "learning_rate": 0.0005972983176862342,
      "loss": 3.1483,
      "step": 9850
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4170608520507812,
      "learning_rate": 0.0005972977699185075,
      "loss": 3.1288,
      "step": 9851
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4966338872909546,
      "learning_rate": 0.0005972972220955078,
      "loss": 3.3011,
      "step": 9852
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5472519397735596,
      "learning_rate": 0.0005972966742172348,
      "loss": 3.0207,
      "step": 9853
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4167735576629639,
      "learning_rate": 0.0005972961262836887,
      "loss": 3.0729,
      "step": 9854
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5953418016433716,
      "learning_rate": 0.0005972955782948696,
      "loss": 3.1453,
      "step": 9855
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4805060625076294,
      "learning_rate": 0.0005972950302507779,
      "loss": 3.1641,
      "step": 9856
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.5435962677001953,
      "learning_rate": 0.0005972944821514131,
      "loss": 3.0633,
      "step": 9857
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.416056513786316,
      "learning_rate": 0.0005972939339967759,
      "loss": 3.0211,
      "step": 9858
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.420111060142517,
      "learning_rate": 0.000597293385786866,
      "loss": 3.2186,
      "step": 9859
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.393084168434143,
      "learning_rate": 0.0005972928375216837,
      "loss": 3.2841,
      "step": 9860
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5458018779754639,
      "learning_rate": 0.0005972922892012291,
      "loss": 3.3326,
      "step": 9861
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.661439061164856,
      "learning_rate": 0.0005972917408255021,
      "loss": 3.0365,
      "step": 9862
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6888095140457153,
      "learning_rate": 0.000597291192394503,
      "loss": 3.1268,
      "step": 9863
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8813447952270508,
      "learning_rate": 0.000597290643908232,
      "loss": 3.232,
      "step": 9864
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.150413751602173,
      "learning_rate": 0.0005972900953666888,
      "loss": 3.3837,
      "step": 9865
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.1534101963043213,
      "learning_rate": 0.000597289546769874,
      "loss": 3.3167,
      "step": 9866
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.998193621635437,
      "learning_rate": 0.0005972889981177874,
      "loss": 3.317,
      "step": 9867
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.516396999359131,
      "learning_rate": 0.0005972884494104292,
      "loss": 3.0962,
      "step": 9868
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.728761911392212,
      "learning_rate": 0.0005972879006477993,
      "loss": 3.0687,
      "step": 9869
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.1006197929382324,
      "learning_rate": 0.0005972873518298982,
      "loss": 3.1327,
      "step": 9870
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6579822301864624,
      "learning_rate": 0.0005972868029567256,
      "loss": 3.0994,
      "step": 9871
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.26249098777771,
      "learning_rate": 0.0005972862540282817,
      "loss": 3.1381,
      "step": 9872
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.4013168811798096,
      "learning_rate": 0.0005972857050445669,
      "loss": 3.1733,
      "step": 9873
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.193448781967163,
      "learning_rate": 0.000597285156005581,
      "loss": 3.0324,
      "step": 9874
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9288691282272339,
      "learning_rate": 0.0005972846069113242,
      "loss": 3.3189,
      "step": 9875
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.853905200958252,
      "learning_rate": 0.0005972840577617964,
      "loss": 3.2025,
      "step": 9876
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7816200256347656,
      "learning_rate": 0.000597283508556998,
      "loss": 3.097,
      "step": 9877
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.4635677337646484,
      "learning_rate": 0.000597282959296929,
      "loss": 3.1823,
      "step": 9878
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6715267896652222,
      "learning_rate": 0.0005972824099815895,
      "loss": 3.2545,
      "step": 9879
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.883819103240967,
      "learning_rate": 0.0005972818606109796,
      "loss": 3.0012,
      "step": 9880
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.2995285987854004,
      "learning_rate": 0.0005972813111850994,
      "loss": 3.3305,
      "step": 9881
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4749506711959839,
      "learning_rate": 0.0005972807617039489,
      "loss": 3.2328,
      "step": 9882
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.681314468383789,
      "learning_rate": 0.0005972802121675283,
      "loss": 2.9088,
      "step": 9883
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.785570740699768,
      "learning_rate": 0.0005972796625758378,
      "loss": 3.3586,
      "step": 9884
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.917758822441101,
      "learning_rate": 0.0005972791129288773,
      "loss": 3.3579,
      "step": 9885
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.517472267150879,
      "learning_rate": 0.000597278563226647,
      "loss": 3.0773,
      "step": 9886
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.854655385017395,
      "learning_rate": 0.000597278013469147,
      "loss": 3.3011,
      "step": 9887
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7883418798446655,
      "learning_rate": 0.0005972774636563775,
      "loss": 3.2651,
      "step": 9888
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3269850015640259,
      "learning_rate": 0.0005972769137883385,
      "loss": 3.2346,
      "step": 9889
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7369616031646729,
      "learning_rate": 0.0005972763638650301,
      "loss": 3.3233,
      "step": 9890
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3376212120056152,
      "learning_rate": 0.0005972758138864523,
      "loss": 3.1087,
      "step": 9891
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6620947122573853,
      "learning_rate": 0.0005972752638526054,
      "loss": 3.1071,
      "step": 9892
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5996354818344116,
      "learning_rate": 0.0005972747137634895,
      "loss": 3.3187,
      "step": 9893
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.684483528137207,
      "learning_rate": 0.0005972741636191044,
      "loss": 3.005,
      "step": 9894
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7221307754516602,
      "learning_rate": 0.0005972736134194506,
      "loss": 3.3281,
      "step": 9895
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4019349813461304,
      "learning_rate": 0.000597273063164528,
      "loss": 3.2152,
      "step": 9896
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.36745023727417,
      "learning_rate": 0.0005972725128543367,
      "loss": 3.1543,
      "step": 9897
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4830617904663086,
      "learning_rate": 0.0005972719624888768,
      "loss": 3.155,
      "step": 9898
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6550393104553223,
      "learning_rate": 0.0005972714120681485,
      "loss": 3.1658,
      "step": 9899
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.709790587425232,
      "learning_rate": 0.0005972708615921518,
      "loss": 3.4521,
      "step": 9900
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3636274337768555,
      "learning_rate": 0.0005972703110608868,
      "loss": 3.2164,
      "step": 9901
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.481756329536438,
      "learning_rate": 0.0005972697604743536,
      "loss": 3.5471,
      "step": 9902
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5680960416793823,
      "learning_rate": 0.0005972692098325525,
      "loss": 3.0908,
      "step": 9903
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8385086059570312,
      "learning_rate": 0.0005972686591354834,
      "loss": 2.9781,
      "step": 9904
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5377247333526611,
      "learning_rate": 0.0005972681083831464,
      "loss": 3.2221,
      "step": 9905
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6576322317123413,
      "learning_rate": 0.0005972675575755417,
      "loss": 3.378,
      "step": 9906
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.296401858329773,
      "learning_rate": 0.0005972670067126692,
      "loss": 3.3177,
      "step": 9907
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4078967571258545,
      "learning_rate": 0.0005972664557945292,
      "loss": 3.0842,
      "step": 9908
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.963869571685791,
      "learning_rate": 0.0005972659048211218,
      "loss": 3.1929,
      "step": 9909
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4681795835494995,
      "learning_rate": 0.0005972653537924471,
      "loss": 3.2658,
      "step": 9910
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5818555355072021,
      "learning_rate": 0.0005972648027085052,
      "loss": 2.9727,
      "step": 9911
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6033220291137695,
      "learning_rate": 0.000597264251569296,
      "loss": 3.1627,
      "step": 9912
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6674190759658813,
      "learning_rate": 0.0005972637003748199,
      "loss": 3.0856,
      "step": 9913
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.649326205253601,
      "learning_rate": 0.0005972631491250768,
      "loss": 3.2355,
      "step": 9914
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4049098491668701,
      "learning_rate": 0.0005972625978200669,
      "loss": 3.1977,
      "step": 9915
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.1997432708740234,
      "learning_rate": 0.0005972620464597901,
      "loss": 3.0476,
      "step": 9916
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6528418064117432,
      "learning_rate": 0.0005972614950442469,
      "loss": 3.1962,
      "step": 9917
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.731757879257202,
      "learning_rate": 0.0005972609435734371,
      "loss": 2.9181,
      "step": 9918
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5822712182998657,
      "learning_rate": 0.0005972603920473609,
      "loss": 3.4599,
      "step": 9919
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5931012630462646,
      "learning_rate": 0.0005972598404660183,
      "loss": 3.5095,
      "step": 9920
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7954483032226562,
      "learning_rate": 0.0005972592888294095,
      "loss": 3.1295,
      "step": 9921
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6509175300598145,
      "learning_rate": 0.0005972587371375347,
      "loss": 3.1171,
      "step": 9922
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4759306907653809,
      "learning_rate": 0.0005972581853903938,
      "loss": 3.1223,
      "step": 9923
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7107455730438232,
      "learning_rate": 0.0005972576335879871,
      "loss": 3.1538,
      "step": 9924
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7680864334106445,
      "learning_rate": 0.0005972570817303145,
      "loss": 3.1112,
      "step": 9925
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5921423435211182,
      "learning_rate": 0.0005972565298173762,
      "loss": 3.3328,
      "step": 9926
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3554494380950928,
      "learning_rate": 0.0005972559778491723,
      "loss": 3.2425,
      "step": 9927
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8390600681304932,
      "learning_rate": 0.0005972554258257029,
      "loss": 3.0298,
      "step": 9928
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6698018312454224,
      "learning_rate": 0.0005972548737469681,
      "loss": 3.1617,
      "step": 9929
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2140223979949951,
      "learning_rate": 0.0005972543216129681,
      "loss": 3.2288,
      "step": 9930
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8052858114242554,
      "learning_rate": 0.0005972537694237029,
      "loss": 2.9972,
      "step": 9931
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.497342586517334,
      "learning_rate": 0.0005972532171791725,
      "loss": 3.3763,
      "step": 9932
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4487613439559937,
      "learning_rate": 0.0005972526648793771,
      "loss": 3.17,
      "step": 9933
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7872658967971802,
      "learning_rate": 0.0005972521125243168,
      "loss": 3.0171,
      "step": 9934
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.306156635284424,
      "learning_rate": 0.0005972515601139919,
      "loss": 3.2763,
      "step": 9935
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6846024990081787,
      "learning_rate": 0.0005972510076484022,
      "loss": 3.0189,
      "step": 9936
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.145383358001709,
      "learning_rate": 0.000597250455127548,
      "loss": 3.0397,
      "step": 9937
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5448077917099,
      "learning_rate": 0.0005972499025514292,
      "loss": 2.9686,
      "step": 9938
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4866055250167847,
      "learning_rate": 0.0005972493499200462,
      "loss": 3.0317,
      "step": 9939
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8384671211242676,
      "learning_rate": 0.0005972487972333987,
      "loss": 3.2015,
      "step": 9940
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7909395694732666,
      "learning_rate": 0.0005972482444914873,
      "loss": 3.2071,
      "step": 9941
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8927935361862183,
      "learning_rate": 0.0005972476916943117,
      "loss": 3.0135,
      "step": 9942
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7080069780349731,
      "learning_rate": 0.0005972471388418722,
      "loss": 3.2201,
      "step": 9943
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.2841947078704834,
      "learning_rate": 0.0005972465859341688,
      "loss": 3.1593,
      "step": 9944
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8328502178192139,
      "learning_rate": 0.0005972460329712017,
      "loss": 3.1955,
      "step": 9945
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.2002782821655273,
      "learning_rate": 0.0005972454799529709,
      "loss": 2.9374,
      "step": 9946
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.383514404296875,
      "learning_rate": 0.0005972449268794765,
      "loss": 3.16,
      "step": 9947
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.0027124881744385,
      "learning_rate": 0.0005972443737507188,
      "loss": 3.2985,
      "step": 9948
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.2541632652282715,
      "learning_rate": 0.0005972438205666976,
      "loss": 3.3891,
      "step": 9949
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.763538122177124,
      "learning_rate": 0.0005972432673274133,
      "loss": 3.0606,
      "step": 9950
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5468333959579468,
      "learning_rate": 0.0005972427140328658,
      "loss": 3.3197,
      "step": 9951
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7445549964904785,
      "learning_rate": 0.0005972421606830553,
      "loss": 3.1776,
      "step": 9952
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7085431814193726,
      "learning_rate": 0.0005972416072779819,
      "loss": 3.2518,
      "step": 9953
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3995147943496704,
      "learning_rate": 0.0005972410538176456,
      "loss": 3.3214,
      "step": 9954
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.514597773551941,
      "learning_rate": 0.0005972405003020466,
      "loss": 3.2887,
      "step": 9955
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7309123277664185,
      "learning_rate": 0.000597239946731185,
      "loss": 3.1803,
      "step": 9956
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3318127393722534,
      "learning_rate": 0.0005972393931050609,
      "loss": 3.5519,
      "step": 9957
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.821163535118103,
      "learning_rate": 0.0005972388394236744,
      "loss": 3.4147,
      "step": 9958
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4664980173110962,
      "learning_rate": 0.0005972382856870255,
      "loss": 3.0691,
      "step": 9959
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6245217323303223,
      "learning_rate": 0.0005972377318951144,
      "loss": 3.1489,
      "step": 9960
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3398524522781372,
      "learning_rate": 0.0005972371780479413,
      "loss": 3.3967,
      "step": 9961
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7532222270965576,
      "learning_rate": 0.0005972366241455061,
      "loss": 3.2325,
      "step": 9962
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8148561716079712,
      "learning_rate": 0.000597236070187809,
      "loss": 3.185,
      "step": 9963
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.0798373222351074,
      "learning_rate": 0.0005972355161748501,
      "loss": 3.2155,
      "step": 9964
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3526614904403687,
      "learning_rate": 0.0005972349621066297,
      "loss": 3.2395,
      "step": 9965
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5566880702972412,
      "learning_rate": 0.0005972344079831475,
      "loss": 3.0083,
      "step": 9966
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4490625858306885,
      "learning_rate": 0.0005972338538044038,
      "loss": 3.3339,
      "step": 9967
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4340465068817139,
      "learning_rate": 0.0005972332995703988,
      "loss": 3.1552,
      "step": 9968
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6781030893325806,
      "learning_rate": 0.0005972327452811324,
      "loss": 3.1429,
      "step": 9969
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3849310874938965,
      "learning_rate": 0.000597232190936605,
      "loss": 3.161,
      "step": 9970
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6454923152923584,
      "learning_rate": 0.0005972316365368165,
      "loss": 3.2241,
      "step": 9971
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.489911675453186,
      "learning_rate": 0.0005972310820817669,
      "loss": 3.0978,
      "step": 9972
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6289656162261963,
      "learning_rate": 0.0005972305275714566,
      "loss": 3.1823,
      "step": 9973
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5097838640213013,
      "learning_rate": 0.0005972299730058854,
      "loss": 3.2314,
      "step": 9974
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3845152854919434,
      "learning_rate": 0.0005972294183850536,
      "loss": 3.1348,
      "step": 9975
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4881857633590698,
      "learning_rate": 0.0005972288637089612,
      "loss": 3.1661,
      "step": 9976
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.592867136001587,
      "learning_rate": 0.0005972283089776084,
      "loss": 3.3627,
      "step": 9977
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.631935477256775,
      "learning_rate": 0.0005972277541909951,
      "loss": 2.9915,
      "step": 9978
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4507368803024292,
      "learning_rate": 0.0005972271993491217,
      "loss": 3.2651,
      "step": 9979
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7999844551086426,
      "learning_rate": 0.0005972266444519882,
      "loss": 3.2471,
      "step": 9980
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.625105619430542,
      "learning_rate": 0.0005972260894995945,
      "loss": 3.491,
      "step": 9981
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5279264450073242,
      "learning_rate": 0.0005972255344919409,
      "loss": 3.3298,
      "step": 9982
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.853163719177246,
      "learning_rate": 0.0005972249794290275,
      "loss": 3.1901,
      "step": 9983
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7351442575454712,
      "learning_rate": 0.0005972244243108543,
      "loss": 3.2448,
      "step": 9984
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6457029581069946,
      "learning_rate": 0.0005972238691374216,
      "loss": 3.4357,
      "step": 9985
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8424209356307983,
      "learning_rate": 0.0005972233139087293,
      "loss": 3.168,
      "step": 9986
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4989173412322998,
      "learning_rate": 0.0005972227586247775,
      "loss": 3.2407,
      "step": 9987
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4864451885223389,
      "learning_rate": 0.0005972222032855666,
      "loss": 3.2912,
      "step": 9988
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.54514741897583,
      "learning_rate": 0.0005972216478910962,
      "loss": 3.2674,
      "step": 9989
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.415473222732544,
      "learning_rate": 0.0005972210924413668,
      "loss": 3.2792,
      "step": 9990
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4578701257705688,
      "learning_rate": 0.0005972205369363784,
      "loss": 3.278,
      "step": 9991
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.381642460823059,
      "learning_rate": 0.0005972199813761311,
      "loss": 3.1672,
      "step": 9992
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6769696474075317,
      "learning_rate": 0.000597219425760625,
      "loss": 3.069,
      "step": 9993
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.632895827293396,
      "learning_rate": 0.0005972188700898602,
      "loss": 3.0255,
      "step": 9994
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.75045907497406,
      "learning_rate": 0.0005972183143638368,
      "loss": 3.1657,
      "step": 9995
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5662144422531128,
      "learning_rate": 0.000597217758582555,
      "loss": 2.9777,
      "step": 9996
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3435838222503662,
      "learning_rate": 0.0005972172027460146,
      "loss": 3.18,
      "step": 9997
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.305151104927063,
      "learning_rate": 0.000597216646854216,
      "loss": 3.161,
      "step": 9998
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5131169557571411,
      "learning_rate": 0.0005972160909071593,
      "loss": 3.0851,
      "step": 9999
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.214496374130249,
      "learning_rate": 0.0005972155349048445,
      "loss": 3.1233,
      "step": 10000
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.925205111503601,
      "learning_rate": 0.0005972149788472716,
      "loss": 3.2587,
      "step": 10001
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5231813192367554,
      "learning_rate": 0.0005972144227344409,
      "loss": 3.3718,
      "step": 10002
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.242102861404419,
      "learning_rate": 0.0005972138665663525,
      "loss": 3.1169,
      "step": 10003
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.07678484916687,
      "learning_rate": 0.0005972133103430063,
      "loss": 2.9368,
      "step": 10004
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4542790651321411,
      "learning_rate": 0.0005972127540644026,
      "loss": 3.2353,
      "step": 10005
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4655354022979736,
      "learning_rate": 0.0005972121977305414,
      "loss": 3.0337,
      "step": 10006
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.200155735015869,
      "learning_rate": 0.000597211641341423,
      "loss": 3.3207,
      "step": 10007
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6844466924667358,
      "learning_rate": 0.0005972110848970471,
      "loss": 3.2242,
      "step": 10008
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4749197959899902,
      "learning_rate": 0.0005972105283974142,
      "loss": 3.1477,
      "step": 10009
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9694650173187256,
      "learning_rate": 0.0005972099718425242,
      "loss": 3.1135,
      "step": 10010
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8229081630706787,
      "learning_rate": 0.0005972094152323773,
      "loss": 3.262,
      "step": 10011
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4892553091049194,
      "learning_rate": 0.0005972088585669736,
      "loss": 3.1557,
      "step": 10012
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5887233018875122,
      "learning_rate": 0.000597208301846313,
      "loss": 3.2726,
      "step": 10013
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4512499570846558,
      "learning_rate": 0.0005972077450703959,
      "loss": 3.0628,
      "step": 10014
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4386628866195679,
      "learning_rate": 0.0005972071882392223,
      "loss": 2.9249,
      "step": 10015
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3601714372634888,
      "learning_rate": 0.0005972066313527922,
      "loss": 3.1457,
      "step": 10016
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.466349482536316,
      "learning_rate": 0.0005972060744111057,
      "loss": 3.3379,
      "step": 10017
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.0084211826324463,
      "learning_rate": 0.0005972055174141631,
      "loss": 3.1913,
      "step": 10018
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5817617177963257,
      "learning_rate": 0.0005972049603619644,
      "loss": 3.249,
      "step": 10019
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8584368228912354,
      "learning_rate": 0.0005972044032545095,
      "loss": 3.3981,
      "step": 10020
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4275059700012207,
      "learning_rate": 0.0005972038460917989,
      "loss": 3.312,
      "step": 10021
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4196850061416626,
      "learning_rate": 0.0005972032888738324,
      "loss": 3.1944,
      "step": 10022
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.572527527809143,
      "learning_rate": 0.0005972027316006102,
      "loss": 2.982,
      "step": 10023
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3554497957229614,
      "learning_rate": 0.0005972021742721324,
      "loss": 3.0741,
      "step": 10024
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6239545345306396,
      "learning_rate": 0.0005972016168883992,
      "loss": 3.3289,
      "step": 10025
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5621856451034546,
      "learning_rate": 0.0005972010594494106,
      "loss": 2.9953,
      "step": 10026
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6408575773239136,
      "learning_rate": 0.0005972005019551667,
      "loss": 3.2049,
      "step": 10027
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5716670751571655,
      "learning_rate": 0.0005971999444056675,
      "loss": 3.3489,
      "step": 10028
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.757696509361267,
      "learning_rate": 0.0005971993868009133,
      "loss": 3.2228,
      "step": 10029
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4739763736724854,
      "learning_rate": 0.000597198829140904,
      "loss": 3.1556,
      "step": 10030
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8027875423431396,
      "learning_rate": 0.0005971982714256401,
      "loss": 3.2994,
      "step": 10031
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.809694766998291,
      "learning_rate": 0.0005971977136551213,
      "loss": 3.3699,
      "step": 10032
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4305908679962158,
      "learning_rate": 0.0005971971558293476,
      "loss": 2.8799,
      "step": 10033
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3988511562347412,
      "learning_rate": 0.0005971965979483196,
      "loss": 3.3307,
      "step": 10034
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7452174425125122,
      "learning_rate": 0.0005971960400120371,
      "loss": 3.202,
      "step": 10035
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.415239930152893,
      "learning_rate": 0.0005971954820205002,
      "loss": 3.1953,
      "step": 10036
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4440497159957886,
      "learning_rate": 0.0005971949239737092,
      "loss": 2.9983,
      "step": 10037
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.2338740825653076,
      "learning_rate": 0.0005971943658716638,
      "loss": 3.2893,
      "step": 10038
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5356403589248657,
      "learning_rate": 0.0005971938077143646,
      "loss": 3.072,
      "step": 10039
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5900752544403076,
      "learning_rate": 0.0005971932495018113,
      "loss": 2.9759,
      "step": 10040
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4158581495285034,
      "learning_rate": 0.0005971926912340043,
      "loss": 2.8432,
      "step": 10041
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7243857383728027,
      "learning_rate": 0.0005971921329109435,
      "loss": 3.214,
      "step": 10042
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7411798238754272,
      "learning_rate": 0.000597191574532629,
      "loss": 3.1875,
      "step": 10043
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.2632405757904053,
      "learning_rate": 0.0005971910160990611,
      "loss": 3.0125,
      "step": 10044
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8098069429397583,
      "learning_rate": 0.0005971904576102397,
      "loss": 3.2609,
      "step": 10045
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3904800415039062,
      "learning_rate": 0.0005971898990661651,
      "loss": 3.1526,
      "step": 10046
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6567000150680542,
      "learning_rate": 0.000597189340466837,
      "loss": 3.2043,
      "step": 10047
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6213247776031494,
      "learning_rate": 0.000597188781812256,
      "loss": 3.1826,
      "step": 10048
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.4484593868255615,
      "learning_rate": 0.0005971882231024221,
      "loss": 3.0923,
      "step": 10049
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8428809642791748,
      "learning_rate": 0.0005971876643373351,
      "loss": 2.9608,
      "step": 10050
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7162967920303345,
      "learning_rate": 0.0005971871055169954,
      "loss": 3.146,
      "step": 10051
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7184170484542847,
      "learning_rate": 0.000597186546641403,
      "loss": 3.1753,
      "step": 10052
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7292529344558716,
      "learning_rate": 0.0005971859877105581,
      "loss": 3.3432,
      "step": 10053
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7031975984573364,
      "learning_rate": 0.0005971854287244605,
      "loss": 3.4132,
      "step": 10054
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3834291696548462,
      "learning_rate": 0.0005971848696831106,
      "loss": 3.2634,
      "step": 10055
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6478595733642578,
      "learning_rate": 0.0005971843105865085,
      "loss": 3.337,
      "step": 10056
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7881157398223877,
      "learning_rate": 0.0005971837514346541,
      "loss": 2.847,
      "step": 10057
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5700068473815918,
      "learning_rate": 0.0005971831922275477,
      "loss": 3.1483,
      "step": 10058
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.1675937175750732,
      "learning_rate": 0.0005971826329651895,
      "loss": 3.23,
      "step": 10059
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.1395115852355957,
      "learning_rate": 0.0005971820736475793,
      "loss": 3.3536,
      "step": 10060
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5172637701034546,
      "learning_rate": 0.0005971815142747172,
      "loss": 2.9997,
      "step": 10061
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9928184747695923,
      "learning_rate": 0.0005971809548466037,
      "loss": 2.9068,
      "step": 10062
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.430969476699829,
      "learning_rate": 0.0005971803953632385,
      "loss": 3.228,
      "step": 10063
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6890230178833008,
      "learning_rate": 0.000597179835824622,
      "loss": 3.2746,
      "step": 10064
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3592325448989868,
      "learning_rate": 0.0005971792762307539,
      "loss": 3.2598,
      "step": 10065
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.177830219268799,
      "learning_rate": 0.0005971787165816347,
      "loss": 3.1819,
      "step": 10066
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5873920917510986,
      "learning_rate": 0.0005971781568772644,
      "loss": 3.0324,
      "step": 10067
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.656390905380249,
      "learning_rate": 0.000597177597117643,
      "loss": 3.2228,
      "step": 10068
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.86443293094635,
      "learning_rate": 0.0005971770373027707,
      "loss": 3.387,
      "step": 10069
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5847861766815186,
      "learning_rate": 0.0005971764774326476,
      "loss": 3.3104,
      "step": 10070
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.2472662925720215,
      "learning_rate": 0.0005971759175072739,
      "loss": 3.2795,
      "step": 10071
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.4932971000671387,
      "learning_rate": 0.0005971753575266494,
      "loss": 2.9984,
      "step": 10072
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.1242835521698,
      "learning_rate": 0.0005971747974907744,
      "loss": 3.1002,
      "step": 10073
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7582943439483643,
      "learning_rate": 0.0005971742373996491,
      "loss": 3.3426,
      "step": 10074
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.608426094055176,
      "learning_rate": 0.0005971736772532734,
      "loss": 3.0843,
      "step": 10075
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.042126178741455,
      "learning_rate": 0.0005971731170516477,
      "loss": 2.9698,
      "step": 10076
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5262384414672852,
      "learning_rate": 0.0005971725567947717,
      "loss": 3.2203,
      "step": 10077
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7043174505233765,
      "learning_rate": 0.0005971719964826458,
      "loss": 3.0767,
      "step": 10078
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3172967433929443,
      "learning_rate": 0.00059717143611527,
      "loss": 3.3047,
      "step": 10079
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.0213489532470703,
      "learning_rate": 0.0005971708756926443,
      "loss": 3.3388,
      "step": 10080
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.582754135131836,
      "learning_rate": 0.0005971703152147692,
      "loss": 3.0559,
      "step": 10081
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9540925025939941,
      "learning_rate": 0.0005971697546816443,
      "loss": 3.1449,
      "step": 10082
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8898776769638062,
      "learning_rate": 0.0005971691940932699,
      "loss": 2.9238,
      "step": 10083
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.596257209777832,
      "learning_rate": 0.0005971686334496463,
      "loss": 3.352,
      "step": 10084
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.6332013607025146,
      "learning_rate": 0.0005971680727507734,
      "loss": 2.9807,
      "step": 10085
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9944640398025513,
      "learning_rate": 0.0005971675119966514,
      "loss": 3.2878,
      "step": 10086
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3681644201278687,
      "learning_rate": 0.0005971669511872802,
      "loss": 3.1875,
      "step": 10087
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6430848836898804,
      "learning_rate": 0.0005971663903226601,
      "loss": 3.1391,
      "step": 10088
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.528165578842163,
      "learning_rate": 0.0005971658294027913,
      "loss": 3.085,
      "step": 10089
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3077151775360107,
      "learning_rate": 0.0005971652684276736,
      "loss": 3.2216,
      "step": 10090
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8964539766311646,
      "learning_rate": 0.0005971647073973073,
      "loss": 3.0772,
      "step": 10091
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5028743743896484,
      "learning_rate": 0.0005971641463116925,
      "loss": 3.2777,
      "step": 10092
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.6295385360717773,
      "learning_rate": 0.0005971635851708293,
      "loss": 3.3685,
      "step": 10093
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.1171152591705322,
      "learning_rate": 0.0005971630239747177,
      "loss": 3.0974,
      "step": 10094
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.0852839946746826,
      "learning_rate": 0.0005971624627233579,
      "loss": 3.2192,
      "step": 10095
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.0707218647003174,
      "learning_rate": 0.0005971619014167501,
      "loss": 2.8648,
      "step": 10096
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9028693437576294,
      "learning_rate": 0.0005971613400548942,
      "loss": 3.2382,
      "step": 10097
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.408170223236084,
      "learning_rate": 0.0005971607786377903,
      "loss": 3.3081,
      "step": 10098
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5153911113739014,
      "learning_rate": 0.0005971602171654389,
      "loss": 3.2304,
      "step": 10099
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8071011304855347,
      "learning_rate": 0.0005971596556378395,
      "loss": 2.9995,
      "step": 10100
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3604252338409424,
      "learning_rate": 0.0005971590940549926,
      "loss": 3.0968,
      "step": 10101
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.444132924079895,
      "learning_rate": 0.0005971585324168983,
      "loss": 3.2946,
      "step": 10102
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.2258095741271973,
      "learning_rate": 0.0005971579707235566,
      "loss": 3.3079,
      "step": 10103
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.163708209991455,
      "learning_rate": 0.0005971574089749676,
      "loss": 3.2629,
      "step": 10104
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6059907674789429,
      "learning_rate": 0.0005971568471711314,
      "loss": 2.988,
      "step": 10105
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7984215021133423,
      "learning_rate": 0.0005971562853120481,
      "loss": 3.0988,
      "step": 10106
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4563294649124146,
      "learning_rate": 0.0005971557233977179,
      "loss": 2.9645,
      "step": 10107
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7220757007598877,
      "learning_rate": 0.0005971551614281409,
      "loss": 3.2123,
      "step": 10108
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.720116376876831,
      "learning_rate": 0.000597154599403317,
      "loss": 3.1007,
      "step": 10109
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4715585708618164,
      "learning_rate": 0.0005971540373232465,
      "loss": 3.0743,
      "step": 10110
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3909006118774414,
      "learning_rate": 0.0005971534751879295,
      "loss": 3.19,
      "step": 10111
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3660918474197388,
      "learning_rate": 0.0005971529129973661,
      "loss": 3.3113,
      "step": 10112
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5607407093048096,
      "learning_rate": 0.0005971523507515563,
      "loss": 3.0987,
      "step": 10113
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3553853034973145,
      "learning_rate": 0.0005971517884505002,
      "loss": 3.2081,
      "step": 10114
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5797786712646484,
      "learning_rate": 0.0005971512260941981,
      "loss": 3.061,
      "step": 10115
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.587260365486145,
      "learning_rate": 0.0005971506636826499,
      "loss": 3.4217,
      "step": 10116
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7420070171356201,
      "learning_rate": 0.0005971501012158559,
      "loss": 3.2241,
      "step": 10117
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.915462613105774,
      "learning_rate": 0.0005971495386938158,
      "loss": 3.2202,
      "step": 10118
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.0156238079071045,
      "learning_rate": 0.0005971489761165303,
      "loss": 3.2104,
      "step": 10119
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8570964336395264,
      "learning_rate": 0.0005971484134839991,
      "loss": 3.3421,
      "step": 10120
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9880748987197876,
      "learning_rate": 0.0005971478507962223,
      "loss": 3.1753,
      "step": 10121
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6040980815887451,
      "learning_rate": 0.0005971472880532002,
      "loss": 3.2398,
      "step": 10122
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5618358850479126,
      "learning_rate": 0.0005971467252549328,
      "loss": 3.0049,
      "step": 10123
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4347407817840576,
      "learning_rate": 0.0005971461624014202,
      "loss": 3.2331,
      "step": 10124
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5324004888534546,
      "learning_rate": 0.0005971455994926626,
      "loss": 3.3501,
      "step": 10125
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7184481620788574,
      "learning_rate": 0.00059714503652866,
      "loss": 3.1489,
      "step": 10126
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.419592022895813,
      "learning_rate": 0.0005971444735094124,
      "loss": 2.9285,
      "step": 10127
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3161795139312744,
      "learning_rate": 0.0005971439104349201,
      "loss": 3.1466,
      "step": 10128
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.446603775024414,
      "learning_rate": 0.0005971433473051832,
      "loss": 3.1711,
      "step": 10129
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6061086654663086,
      "learning_rate": 0.0005971427841202018,
      "loss": 3.1304,
      "step": 10130
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7128303050994873,
      "learning_rate": 0.0005971422208799758,
      "loss": 3.1354,
      "step": 10131
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.6462795734405518,
      "learning_rate": 0.0005971416575845054,
      "loss": 3.288,
      "step": 10132
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5473657846450806,
      "learning_rate": 0.0005971410942337909,
      "loss": 3.078,
      "step": 10133
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.015775442123413,
      "learning_rate": 0.0005971405308278322,
      "loss": 3.0766,
      "step": 10134
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.0082297325134277,
      "learning_rate": 0.0005971399673666295,
      "loss": 3.2464,
      "step": 10135
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5320268869400024,
      "learning_rate": 0.0005971394038501829,
      "loss": 3.1727,
      "step": 10136
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.4955928325653076,
      "learning_rate": 0.0005971388402784924,
      "loss": 3.0326,
      "step": 10137
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8055070638656616,
      "learning_rate": 0.0005971382766515582,
      "loss": 2.9433,
      "step": 10138
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5113424062728882,
      "learning_rate": 0.0005971377129693804,
      "loss": 3.3784,
      "step": 10139
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.741231083869934,
      "learning_rate": 0.0005971371492319591,
      "loss": 3.0748,
      "step": 10140
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6757285594940186,
      "learning_rate": 0.0005971365854392943,
      "loss": 3.2262,
      "step": 10141
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5769879817962646,
      "learning_rate": 0.0005971360215913864,
      "loss": 3.1608,
      "step": 10142
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7926533222198486,
      "learning_rate": 0.0005971354576882351,
      "loss": 3.0527,
      "step": 10143
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7286635637283325,
      "learning_rate": 0.0005971348937298409,
      "loss": 3.2525,
      "step": 10144
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.396621584892273,
      "learning_rate": 0.0005971343297162036,
      "loss": 3.3738,
      "step": 10145
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8138172626495361,
      "learning_rate": 0.0005971337656473233,
      "loss": 3.034,
      "step": 10146
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.467015266418457,
      "learning_rate": 0.0005971332015232005,
      "loss": 3.3616,
      "step": 10147
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6253068447113037,
      "learning_rate": 0.0005971326373438348,
      "loss": 3.0375,
      "step": 10148
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.220579147338867,
      "learning_rate": 0.0005971320731092266,
      "loss": 3.0364,
      "step": 10149
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3979140520095825,
      "learning_rate": 0.000597131508819376,
      "loss": 3.1897,
      "step": 10150
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5899224281311035,
      "learning_rate": 0.0005971309444742829,
      "loss": 3.1307,
      "step": 10151
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2137057781219482,
      "learning_rate": 0.0005971303800739478,
      "loss": 3.1048,
      "step": 10152
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.426971435546875,
      "learning_rate": 0.0005971298156183703,
      "loss": 2.9258,
      "step": 10153
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.348381757736206,
      "learning_rate": 0.0005971292511075509,
      "loss": 3.2284,
      "step": 10154
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5366965532302856,
      "learning_rate": 0.0005971286865414896,
      "loss": 3.0614,
      "step": 10155
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4607962369918823,
      "learning_rate": 0.0005971281219201863,
      "loss": 3.1724,
      "step": 10156
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5664507150650024,
      "learning_rate": 0.0005971275572436415,
      "loss": 3.3635,
      "step": 10157
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3418121337890625,
      "learning_rate": 0.0005971269925118548,
      "loss": 3.3113,
      "step": 10158
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.530646562576294,
      "learning_rate": 0.0005971264277248268,
      "loss": 3.3821,
      "step": 10159
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2652407884597778,
      "learning_rate": 0.0005971258628825573,
      "loss": 3.2609,
      "step": 10160
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7889282703399658,
      "learning_rate": 0.0005971252979850466,
      "loss": 2.959,
      "step": 10161
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3717398643493652,
      "learning_rate": 0.0005971247330322945,
      "loss": 3.3478,
      "step": 10162
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5102864503860474,
      "learning_rate": 0.0005971241680243014,
      "loss": 3.0673,
      "step": 10163
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7377289533615112,
      "learning_rate": 0.0005971236029610673,
      "loss": 3.0848,
      "step": 10164
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.357032060623169,
      "learning_rate": 0.0005971230378425924,
      "loss": 3.0889,
      "step": 10165
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4518859386444092,
      "learning_rate": 0.0005971224726688765,
      "loss": 2.9072,
      "step": 10166
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.610706090927124,
      "learning_rate": 0.0005971219074399201,
      "loss": 3.336,
      "step": 10167
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4264062643051147,
      "learning_rate": 0.0005971213421557232,
      "loss": 3.2949,
      "step": 10168
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.0006861686706543,
      "learning_rate": 0.0005971207768162857,
      "loss": 3.2889,
      "step": 10169
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3878569602966309,
      "learning_rate": 0.000597120211421608,
      "loss": 3.2019,
      "step": 10170
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.901974081993103,
      "learning_rate": 0.0005971196459716899,
      "loss": 3.2817,
      "step": 10171
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.357298493385315,
      "learning_rate": 0.0005971190804665316,
      "loss": 3.1405,
      "step": 10172
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.530911922454834,
      "learning_rate": 0.0005971185149061334,
      "loss": 3.1552,
      "step": 10173
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5225828886032104,
      "learning_rate": 0.0005971179492904952,
      "loss": 2.9439,
      "step": 10174
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.551089882850647,
      "learning_rate": 0.0005971173836196171,
      "loss": 3.228,
      "step": 10175
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3186548948287964,
      "learning_rate": 0.0005971168178934993,
      "loss": 3.0492,
      "step": 10176
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7230335474014282,
      "learning_rate": 0.0005971162521121419,
      "loss": 3.2673,
      "step": 10177
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3714840412139893,
      "learning_rate": 0.0005971156862755449,
      "loss": 2.9849,
      "step": 10178
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.2548463344573975,
      "learning_rate": 0.0005971151203837086,
      "loss": 2.9102,
      "step": 10179
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6278159618377686,
      "learning_rate": 0.0005971145544366329,
      "loss": 3.2997,
      "step": 10180
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.677347183227539,
      "learning_rate": 0.000597113988434318,
      "loss": 3.3364,
      "step": 10181
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4540847539901733,
      "learning_rate": 0.0005971134223767642,
      "loss": 2.9785,
      "step": 10182
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.523666501045227,
      "learning_rate": 0.0005971128562639712,
      "loss": 3.1583,
      "step": 10183
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9928672313690186,
      "learning_rate": 0.0005971122900959394,
      "loss": 3.2555,
      "step": 10184
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.401561975479126,
      "learning_rate": 0.0005971117238726687,
      "loss": 3.0442,
      "step": 10185
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5876333713531494,
      "learning_rate": 0.0005971111575941595,
      "loss": 3.1312,
      "step": 10186
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.137662172317505,
      "learning_rate": 0.0005971105912604116,
      "loss": 3.1478,
      "step": 10187
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.517041802406311,
      "learning_rate": 0.0005971100248714252,
      "loss": 3.1281,
      "step": 10188
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6702725887298584,
      "learning_rate": 0.0005971094584272006,
      "loss": 3.1164,
      "step": 10189
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7070473432540894,
      "learning_rate": 0.0005971088919277376,
      "loss": 3.2193,
      "step": 10190
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9417693614959717,
      "learning_rate": 0.0005971083253730365,
      "loss": 3.3255,
      "step": 10191
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4537333250045776,
      "learning_rate": 0.0005971077587630973,
      "loss": 3.2721,
      "step": 10192
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8857035636901855,
      "learning_rate": 0.0005971071920979203,
      "loss": 3.1141,
      "step": 10193
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5999946594238281,
      "learning_rate": 0.0005971066253775053,
      "loss": 3.6576,
      "step": 10194
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4301055669784546,
      "learning_rate": 0.0005971060586018527,
      "loss": 3.2558,
      "step": 10195
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.334699273109436,
      "learning_rate": 0.0005971054917709624,
      "loss": 2.969,
      "step": 10196
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7164870500564575,
      "learning_rate": 0.0005971049248848346,
      "loss": 3.1316,
      "step": 10197
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.1872862577438354,
      "learning_rate": 0.0005971043579434695,
      "loss": 3.2056,
      "step": 10198
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4348747730255127,
      "learning_rate": 0.0005971037909468668,
      "loss": 3.1523,
      "step": 10199
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5267953872680664,
      "learning_rate": 0.0005971032238950272,
      "loss": 3.2667,
      "step": 10200
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4464983940124512,
      "learning_rate": 0.0005971026567879503,
      "loss": 3.1792,
      "step": 10201
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.00400972366333,
      "learning_rate": 0.0005971020896256365,
      "loss": 3.2449,
      "step": 10202
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5204216241836548,
      "learning_rate": 0.0005971015224080858,
      "loss": 3.4951,
      "step": 10203
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.429891347885132,
      "learning_rate": 0.0005971009551352984,
      "loss": 3.2524,
      "step": 10204
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5902997255325317,
      "learning_rate": 0.0005971003878072742,
      "loss": 3.3269,
      "step": 10205
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5817360877990723,
      "learning_rate": 0.0005970998204240134,
      "loss": 3.1368,
      "step": 10206
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7107815742492676,
      "learning_rate": 0.0005970992529855162,
      "loss": 3.2831,
      "step": 10207
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7316683530807495,
      "learning_rate": 0.0005970986854917827,
      "loss": 3.3809,
      "step": 10208
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.075021982192993,
      "learning_rate": 0.0005970981179428129,
      "loss": 3.4073,
      "step": 10209
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9371354579925537,
      "learning_rate": 0.0005970975503386069,
      "loss": 3.124,
      "step": 10210
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4659394025802612,
      "learning_rate": 0.0005970969826791649,
      "loss": 3.3811,
      "step": 10211
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8244521617889404,
      "learning_rate": 0.0005970964149644869,
      "loss": 3.1422,
      "step": 10212
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9043316841125488,
      "learning_rate": 0.0005970958471945732,
      "loss": 3.3638,
      "step": 10213
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5826889276504517,
      "learning_rate": 0.0005970952793694237,
      "loss": 3.289,
      "step": 10214
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.945934534072876,
      "learning_rate": 0.0005970947114890385,
      "loss": 3.2269,
      "step": 10215
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.101468324661255,
      "learning_rate": 0.0005970941435534179,
      "loss": 3.2706,
      "step": 10216
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.265836000442505,
      "learning_rate": 0.000597093575562562,
      "loss": 3.2104,
      "step": 10217
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7265660762786865,
      "learning_rate": 0.0005970930075164706,
      "loss": 3.191,
      "step": 10218
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7198556661605835,
      "learning_rate": 0.000597092439415144,
      "loss": 3.3212,
      "step": 10219
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.58771812915802,
      "learning_rate": 0.0005970918712585824,
      "loss": 3.3854,
      "step": 10220
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4796334505081177,
      "learning_rate": 0.0005970913030467857,
      "loss": 3.2566,
      "step": 10221
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.478744387626648,
      "learning_rate": 0.0005970907347797543,
      "loss": 3.3535,
      "step": 10222
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.216447591781616,
      "learning_rate": 0.000597090166457488,
      "loss": 3.339,
      "step": 10223
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.1530978679656982,
      "learning_rate": 0.0005970895980799871,
      "loss": 3.2356,
      "step": 10224
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7747368812561035,
      "learning_rate": 0.0005970890296472515,
      "loss": 2.9956,
      "step": 10225
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.400087594985962,
      "learning_rate": 0.0005970884611592815,
      "loss": 3.2805,
      "step": 10226
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.17830753326416,
      "learning_rate": 0.0005970878926160772,
      "loss": 3.2855,
      "step": 10227
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8685569763183594,
      "learning_rate": 0.0005970873240176386,
      "loss": 3.5913,
      "step": 10228
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.5510993003845215,
      "learning_rate": 0.000597086755363966,
      "loss": 3.2366,
      "step": 10229
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3119921684265137,
      "learning_rate": 0.0005970861866550592,
      "loss": 3.5021,
      "step": 10230
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8269376754760742,
      "learning_rate": 0.0005970856178909186,
      "loss": 3.0817,
      "step": 10231
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.379309892654419,
      "learning_rate": 0.000597085049071544,
      "loss": 3.0522,
      "step": 10232
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9750221967697144,
      "learning_rate": 0.0005970844801969358,
      "loss": 3.2363,
      "step": 10233
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7801315784454346,
      "learning_rate": 0.0005970839112670939,
      "loss": 3.3415,
      "step": 10234
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.532897710800171,
      "learning_rate": 0.0005970833422820186,
      "loss": 3.1347,
      "step": 10235
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7385170459747314,
      "learning_rate": 0.0005970827732417099,
      "loss": 3.2897,
      "step": 10236
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4930561780929565,
      "learning_rate": 0.0005970822041461678,
      "loss": 2.8436,
      "step": 10237
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8175222873687744,
      "learning_rate": 0.0005970816349953927,
      "loss": 3.3449,
      "step": 10238
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.358933448791504,
      "learning_rate": 0.0005970810657893843,
      "loss": 3.076,
      "step": 10239
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.237912178039551,
      "learning_rate": 0.000597080496528143,
      "loss": 2.8276,
      "step": 10240
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.912453055381775,
      "learning_rate": 0.0005970799272116689,
      "loss": 3.0836,
      "step": 10241
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8134926557540894,
      "learning_rate": 0.0005970793578399619,
      "loss": 3.3293,
      "step": 10242
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8443892002105713,
      "learning_rate": 0.0005970787884130223,
      "loss": 3.2999,
      "step": 10243
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6635733842849731,
      "learning_rate": 0.0005970782189308502,
      "loss": 3.0815,
      "step": 10244
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.4892327785491943,
      "learning_rate": 0.0005970776493934457,
      "loss": 2.9913,
      "step": 10245
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.427078127861023,
      "learning_rate": 0.0005970770798008087,
      "loss": 3.2008,
      "step": 10246
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.644866704940796,
      "learning_rate": 0.0005970765101529395,
      "loss": 3.5233,
      "step": 10247
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.4626200199127197,
      "learning_rate": 0.0005970759404498382,
      "loss": 3.2088,
      "step": 10248
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6354514360427856,
      "learning_rate": 0.0005970753706915049,
      "loss": 3.4357,
      "step": 10249
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4778151512145996,
      "learning_rate": 0.0005970748008779397,
      "loss": 3.1099,
      "step": 10250
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9321671724319458,
      "learning_rate": 0.0005970742310091425,
      "loss": 3.4601,
      "step": 10251
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3896584510803223,
      "learning_rate": 0.0005970736610851138,
      "loss": 3.2688,
      "step": 10252
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4826077222824097,
      "learning_rate": 0.0005970730911058535,
      "loss": 3.4705,
      "step": 10253
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2852281332015991,
      "learning_rate": 0.0005970725210713617,
      "loss": 3.1183,
      "step": 10254
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.2172787189483643,
      "learning_rate": 0.0005970719509816383,
      "loss": 3.0436,
      "step": 10255
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7374173402786255,
      "learning_rate": 0.0005970713808366838,
      "loss": 3.2521,
      "step": 10256
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3450558185577393,
      "learning_rate": 0.0005970708106364981,
      "loss": 3.1504,
      "step": 10257
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8576210737228394,
      "learning_rate": 0.0005970702403810814,
      "loss": 3.0567,
      "step": 10258
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3594547510147095,
      "learning_rate": 0.0005970696700704336,
      "loss": 2.9858,
      "step": 10259
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2331360578536987,
      "learning_rate": 0.0005970690997045549,
      "loss": 3.0296,
      "step": 10260
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.565793752670288,
      "learning_rate": 0.0005970685292834456,
      "loss": 3.2054,
      "step": 10261
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5292675495147705,
      "learning_rate": 0.0005970679588071056,
      "loss": 3.303,
      "step": 10262
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3816710710525513,
      "learning_rate": 0.000597067388275535,
      "loss": 3.2703,
      "step": 10263
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6675517559051514,
      "learning_rate": 0.0005970668176887342,
      "loss": 3.0005,
      "step": 10264
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5650712251663208,
      "learning_rate": 0.0005970662470467027,
      "loss": 3.335,
      "step": 10265
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3733597993850708,
      "learning_rate": 0.0005970656763494411,
      "loss": 3.2339,
      "step": 10266
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.386946439743042,
      "learning_rate": 0.0005970651055969495,
      "loss": 3.1967,
      "step": 10267
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3680745363235474,
      "learning_rate": 0.0005970645347892279,
      "loss": 3.1607,
      "step": 10268
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.8639721870422363,
      "learning_rate": 0.0005970639639262763,
      "loss": 3.1084,
      "step": 10269
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4491065740585327,
      "learning_rate": 0.0005970633930080949,
      "loss": 3.3083,
      "step": 10270
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.578798770904541,
      "learning_rate": 0.0005970628220346839,
      "loss": 3.3371,
      "step": 10271
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7581820487976074,
      "learning_rate": 0.0005970622510060432,
      "loss": 3.3118,
      "step": 10272
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4422756433486938,
      "learning_rate": 0.0005970616799221731,
      "loss": 3.1206,
      "step": 10273
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7859920263290405,
      "learning_rate": 0.0005970611087830737,
      "loss": 2.877,
      "step": 10274
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.406746506690979,
      "learning_rate": 0.0005970605375887448,
      "loss": 2.9622,
      "step": 10275
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.829084038734436,
      "learning_rate": 0.0005970599663391869,
      "loss": 3.2262,
      "step": 10276
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7089974880218506,
      "learning_rate": 0.0005970593950343999,
      "loss": 3.0047,
      "step": 10277
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8152066469192505,
      "learning_rate": 0.000597058823674384,
      "loss": 3.4461,
      "step": 10278
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6303982734680176,
      "learning_rate": 0.0005970582522591393,
      "loss": 2.972,
      "step": 10279
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.2812113761901855,
      "learning_rate": 0.0005970576807886657,
      "loss": 3.0066,
      "step": 10280
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.011192798614502,
      "learning_rate": 0.0005970571092629636,
      "loss": 3.0307,
      "step": 10281
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.975733995437622,
      "learning_rate": 0.0005970565376820329,
      "loss": 2.8555,
      "step": 10282
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.527585029602051,
      "learning_rate": 0.0005970559660458738,
      "loss": 3.3406,
      "step": 10283
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.385047435760498,
      "learning_rate": 0.0005970553943544864,
      "loss": 3.0755,
      "step": 10284
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3168281316757202,
      "learning_rate": 0.0005970548226078709,
      "loss": 3.2824,
      "step": 10285
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5387918949127197,
      "learning_rate": 0.0005970542508060271,
      "loss": 3.2788,
      "step": 10286
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9619379043579102,
      "learning_rate": 0.0005970536789489554,
      "loss": 3.2713,
      "step": 10287
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.231663465499878,
      "learning_rate": 0.0005970531070366559,
      "loss": 3.2269,
      "step": 10288
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.403385877609253,
      "learning_rate": 0.0005970525350691285,
      "loss": 3.084,
      "step": 10289
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.9711499214172363,
      "learning_rate": 0.0005970519630463735,
      "loss": 3.1504,
      "step": 10290
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.061556100845337,
      "learning_rate": 0.0005970513909683908,
      "loss": 2.977,
      "step": 10291
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4403120279312134,
      "learning_rate": 0.0005970508188351808,
      "loss": 3.2793,
      "step": 10292
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7806686162948608,
      "learning_rate": 0.0005970502466467434,
      "loss": 3.1588,
      "step": 10293
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6921764612197876,
      "learning_rate": 0.0005970496744030787,
      "loss": 3.1607,
      "step": 10294
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5251185894012451,
      "learning_rate": 0.0005970491021041869,
      "loss": 3.5161,
      "step": 10295
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.399906039237976,
      "learning_rate": 0.0005970485297500681,
      "loss": 3.0065,
      "step": 10296
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7022343873977661,
      "learning_rate": 0.0005970479573407224,
      "loss": 3.2215,
      "step": 10297
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5765138864517212,
      "learning_rate": 0.0005970473848761497,
      "loss": 3.0248,
      "step": 10298
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.904990315437317,
      "learning_rate": 0.0005970468123563504,
      "loss": 3.1987,
      "step": 10299
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5795979499816895,
      "learning_rate": 0.0005970462397813245,
      "loss": 3.1346,
      "step": 10300
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.966654896736145,
      "learning_rate": 0.0005970456671510721,
      "loss": 3.2143,
      "step": 10301
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3689568042755127,
      "learning_rate": 0.0005970450944655933,
      "loss": 3.1498,
      "step": 10302
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6259407997131348,
      "learning_rate": 0.0005970445217248881,
      "loss": 3.2886,
      "step": 10303
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7060184478759766,
      "learning_rate": 0.0005970439489289569,
      "loss": 3.2528,
      "step": 10304
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4614747762680054,
      "learning_rate": 0.0005970433760777996,
      "loss": 3.0434,
      "step": 10305
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.781078577041626,
      "learning_rate": 0.0005970428031714162,
      "loss": 2.8092,
      "step": 10306
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4527596235275269,
      "learning_rate": 0.0005970422302098069,
      "loss": 3.0025,
      "step": 10307
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4266481399536133,
      "learning_rate": 0.000597041657192972,
      "loss": 3.2133,
      "step": 10308
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.2056796550750732,
      "learning_rate": 0.0005970410841209115,
      "loss": 3.1888,
      "step": 10309
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4096829891204834,
      "learning_rate": 0.0005970405109936253,
      "loss": 3.0205,
      "step": 10310
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4961588382720947,
      "learning_rate": 0.0005970399378111137,
      "loss": 3.4461,
      "step": 10311
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5089163780212402,
      "learning_rate": 0.0005970393645733766,
      "loss": 3.1137,
      "step": 10312
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.271845579147339,
      "learning_rate": 0.0005970387912804146,
      "loss": 3.2263,
      "step": 10313
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5485624074935913,
      "learning_rate": 0.0005970382179322273,
      "loss": 2.9188,
      "step": 10314
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.524315595626831,
      "learning_rate": 0.000597037644528815,
      "loss": 3.139,
      "step": 10315
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4990862607955933,
      "learning_rate": 0.0005970370710701779,
      "loss": 3.0898,
      "step": 10316
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5647237300872803,
      "learning_rate": 0.0005970364975563158,
      "loss": 3.1478,
      "step": 10317
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7268810272216797,
      "learning_rate": 0.0005970359239872292,
      "loss": 3.0808,
      "step": 10318
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.2817765474319458,
      "learning_rate": 0.0005970353503629179,
      "loss": 3.288,
      "step": 10319
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.533941388130188,
      "learning_rate": 0.0005970347766833822,
      "loss": 3.3619,
      "step": 10320
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5897634029388428,
      "learning_rate": 0.0005970342029486221,
      "loss": 3.0533,
      "step": 10321
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.0934596061706543,
      "learning_rate": 0.0005970336291586377,
      "loss": 3.0758,
      "step": 10322
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5380791425704956,
      "learning_rate": 0.0005970330553134291,
      "loss": 3.1563,
      "step": 10323
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.578069806098938,
      "learning_rate": 0.0005970324814129967,
      "loss": 3.0772,
      "step": 10324
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6219494342803955,
      "learning_rate": 0.00059703190745734,
      "loss": 3.2071,
      "step": 10325
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.3196359872817993,
      "learning_rate": 0.0005970313334464596,
      "loss": 3.0565,
      "step": 10326
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5116703510284424,
      "learning_rate": 0.0005970307593803556,
      "loss": 3.134,
      "step": 10327
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.051532745361328,
      "learning_rate": 0.0005970301852590278,
      "loss": 3.2105,
      "step": 10328
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8220815658569336,
      "learning_rate": 0.0005970296110824765,
      "loss": 3.4365,
      "step": 10329
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4883922338485718,
      "learning_rate": 0.0005970290368507019,
      "loss": 3.0066,
      "step": 10330
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.7382450103759766,
      "learning_rate": 0.0005970284625637039,
      "loss": 3.3173,
      "step": 10331
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7291971445083618,
      "learning_rate": 0.0005970278882214828,
      "loss": 3.1543,
      "step": 10332
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6921647787094116,
      "learning_rate": 0.0005970273138240386,
      "loss": 3.4787,
      "step": 10333
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.752510666847229,
      "learning_rate": 0.0005970267393713713,
      "loss": 3.3406,
      "step": 10334
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.372890591621399,
      "learning_rate": 0.0005970261648634812,
      "loss": 3.1537,
      "step": 10335
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4841192960739136,
      "learning_rate": 0.0005970255903003683,
      "loss": 3.0226,
      "step": 10336
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.652987003326416,
      "learning_rate": 0.0005970250156820328,
      "loss": 3.431,
      "step": 10337
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6034133434295654,
      "learning_rate": 0.0005970244410084747,
      "loss": 3.1596,
      "step": 10338
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5371030569076538,
      "learning_rate": 0.0005970238662796942,
      "loss": 3.0889,
      "step": 10339
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4700219631195068,
      "learning_rate": 0.0005970232914956912,
      "loss": 3.049,
      "step": 10340
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5265979766845703,
      "learning_rate": 0.0005970227166564661,
      "loss": 3.161,
      "step": 10341
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.674570083618164,
      "learning_rate": 0.0005970221417620188,
      "loss": 3.2263,
      "step": 10342
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7950761318206787,
      "learning_rate": 0.0005970215668123494,
      "loss": 3.2784,
      "step": 10343
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.327129364013672,
      "learning_rate": 0.0005970209918074583,
      "loss": 3.1844,
      "step": 10344
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8602313995361328,
      "learning_rate": 0.0005970204167473453,
      "loss": 2.9314,
      "step": 10345
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6993693113327026,
      "learning_rate": 0.0005970198416320106,
      "loss": 3.2756,
      "step": 10346
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5912078619003296,
      "learning_rate": 0.0005970192664614542,
      "loss": 3.0143,
      "step": 10347
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8599966764450073,
      "learning_rate": 0.0005970186912356764,
      "loss": 3.1072,
      "step": 10348
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5755451917648315,
      "learning_rate": 0.0005970181159546773,
      "loss": 3.0741,
      "step": 10349
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.8769739866256714,
      "learning_rate": 0.0005970175406184568,
      "loss": 3.2051,
      "step": 10350
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.5607671737670898,
      "learning_rate": 0.0005970169652270152,
      "loss": 2.9997,
      "step": 10351
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.6307897567749023,
      "learning_rate": 0.0005970163897803525,
      "loss": 3.2059,
      "step": 10352
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.784854769706726,
      "learning_rate": 0.0005970158142784689,
      "loss": 3.1741,
      "step": 10353
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.0915486812591553,
      "learning_rate": 0.0005970152387213644,
      "loss": 2.948,
      "step": 10354
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.628309965133667,
      "learning_rate": 0.0005970146631090392,
      "loss": 3.2449,
      "step": 10355
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.7588897943496704,
      "learning_rate": 0.0005970140874414934,
      "loss": 3.3247,
      "step": 10356
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4898483753204346,
      "learning_rate": 0.0005970135117187269,
      "loss": 3.1099,
      "step": 10357
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.6792876720428467,
      "learning_rate": 0.0005970129359407401,
      "loss": 3.0903,
      "step": 10358
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.673040509223938,
      "learning_rate": 0.000597012360107533,
      "loss": 3.2484,
      "step": 10359
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.472472071647644,
      "learning_rate": 0.0005970117842191057,
      "loss": 3.2292,
      "step": 10360
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4410808086395264,
      "learning_rate": 0.0005970112082754583,
      "loss": 3.0519,
      "step": 10361
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.6324230432510376,
      "learning_rate": 0.0005970106322765909,
      "loss": 3.2795,
      "step": 10362
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.9891458749771118,
      "learning_rate": 0.0005970100562225037,
      "loss": 3.1596,
      "step": 10363
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.3356149196624756,
      "learning_rate": 0.0005970094801131966,
      "loss": 2.9443,
      "step": 10364
    },
    {
      "epoch": 0.13,
      "grad_norm": 3.030752658843994,
      "learning_rate": 0.0005970089039486698,
      "loss": 3.3424,
      "step": 10365
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.35246205329895,
      "learning_rate": 0.0005970083277289235,
      "loss": 3.2963,
      "step": 10366
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.4313313961029053,
      "learning_rate": 0.0005970077514539577,
      "loss": 3.0665,
      "step": 10367
    },
    {
      "epoch": 0.13,
      "grad_norm": 2.8878307342529297,
      "learning_rate": 0.0005970071751237728,
      "loss": 3.0818,
      "step": 10368
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6966404914855957,
      "learning_rate": 0.0005970065987383684,
      "loss": 3.3084,
      "step": 10369
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8052363395690918,
      "learning_rate": 0.0005970060222977449,
      "loss": 2.9273,
      "step": 10370
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4765146970748901,
      "learning_rate": 0.0005970054458019024,
      "loss": 3.3435,
      "step": 10371
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5578793287277222,
      "learning_rate": 0.000597004869250841,
      "loss": 3.0315,
      "step": 10372
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3677335977554321,
      "learning_rate": 0.0005970042926445608,
      "loss": 3.2443,
      "step": 10373
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.481796145439148,
      "learning_rate": 0.0005970037159830618,
      "loss": 2.9133,
      "step": 10374
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5923023223876953,
      "learning_rate": 0.0005970031392663444,
      "loss": 3.2284,
      "step": 10375
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5764975547790527,
      "learning_rate": 0.0005970025624944083,
      "loss": 3.2218,
      "step": 10376
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4391591548919678,
      "learning_rate": 0.0005970019856672539,
      "loss": 3.0328,
      "step": 10377
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4408483505249023,
      "learning_rate": 0.0005970014087848813,
      "loss": 3.2553,
      "step": 10378
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.741910696029663,
      "learning_rate": 0.0005970008318472904,
      "loss": 3.2279,
      "step": 10379
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.683486223220825,
      "learning_rate": 0.0005970002548544815,
      "loss": 3.045,
      "step": 10380
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6822686195373535,
      "learning_rate": 0.0005969996778064546,
      "loss": 3.2855,
      "step": 10381
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9653542041778564,
      "learning_rate": 0.00059699910070321,
      "loss": 3.1181,
      "step": 10382
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3630794286727905,
      "learning_rate": 0.0005969985235447474,
      "loss": 3.2823,
      "step": 10383
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7234885692596436,
      "learning_rate": 0.0005969979463310674,
      "loss": 3.5562,
      "step": 10384
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3925251960754395,
      "learning_rate": 0.0005969973690621699,
      "loss": 3.3445,
      "step": 10385
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5825155973434448,
      "learning_rate": 0.0005969967917380548,
      "loss": 2.9922,
      "step": 10386
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.0493757724761963,
      "learning_rate": 0.0005969962143587225,
      "loss": 3.0933,
      "step": 10387
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6650772094726562,
      "learning_rate": 0.000596995636924173,
      "loss": 3.2631,
      "step": 10388
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6962226629257202,
      "learning_rate": 0.0005969950594344063,
      "loss": 3.1025,
      "step": 10389
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4866734743118286,
      "learning_rate": 0.0005969944818894228,
      "loss": 3.5469,
      "step": 10390
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.5170843601226807,
      "learning_rate": 0.0005969939042892222,
      "loss": 3.1055,
      "step": 10391
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7861803770065308,
      "learning_rate": 0.0005969933266338049,
      "loss": 3.1038,
      "step": 10392
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.7981302738189697,
      "learning_rate": 0.0005969927489231711,
      "loss": 3.1348,
      "step": 10393
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.65751051902771,
      "learning_rate": 0.0005969921711573206,
      "loss": 2.9993,
      "step": 10394
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4247097969055176,
      "learning_rate": 0.0005969915933362536,
      "loss": 3.2762,
      "step": 10395
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8546379804611206,
      "learning_rate": 0.0005969910154599703,
      "loss": 3.1061,
      "step": 10396
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.0614917278289795,
      "learning_rate": 0.0005969904375284709,
      "loss": 3.3566,
      "step": 10397
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6623128652572632,
      "learning_rate": 0.0005969898595417552,
      "loss": 3.2887,
      "step": 10398
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.0934317111968994,
      "learning_rate": 0.0005969892814998235,
      "loss": 3.1109,
      "step": 10399
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5512646436691284,
      "learning_rate": 0.0005969887034026759,
      "loss": 3.2136,
      "step": 10400
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7560704946517944,
      "learning_rate": 0.0005969881252503125,
      "loss": 3.4049,
      "step": 10401
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4831321239471436,
      "learning_rate": 0.0005969875470427335,
      "loss": 3.2546,
      "step": 10402
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6887273788452148,
      "learning_rate": 0.0005969869687799387,
      "loss": 3.1298,
      "step": 10403
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4199678897857666,
      "learning_rate": 0.0005969863904619286,
      "loss": 3.464,
      "step": 10404
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4209051132202148,
      "learning_rate": 0.000596985812088703,
      "loss": 3.1716,
      "step": 10405
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3398486375808716,
      "learning_rate": 0.0005969852336602622,
      "loss": 3.151,
      "step": 10406
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7363251447677612,
      "learning_rate": 0.0005969846551766063,
      "loss": 2.8599,
      "step": 10407
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6869499683380127,
      "learning_rate": 0.0005969840766377353,
      "loss": 3.4545,
      "step": 10408
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.0414364337921143,
      "learning_rate": 0.0005969834980436493,
      "loss": 3.4655,
      "step": 10409
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.489272117614746,
      "learning_rate": 0.0005969829193943484,
      "loss": 3.0889,
      "step": 10410
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.579026460647583,
      "learning_rate": 0.0005969823406898329,
      "loss": 3.1446,
      "step": 10411
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.884738564491272,
      "learning_rate": 0.0005969817619301028,
      "loss": 3.0097,
      "step": 10412
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8906861543655396,
      "learning_rate": 0.0005969811831151581,
      "loss": 3.0714,
      "step": 10413
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5519425868988037,
      "learning_rate": 0.0005969806042449991,
      "loss": 3.4281,
      "step": 10414
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5125068426132202,
      "learning_rate": 0.0005969800253196257,
      "loss": 3.2534,
      "step": 10415
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.923904299736023,
      "learning_rate": 0.0005969794463390382,
      "loss": 3.2605,
      "step": 10416
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.505847215652466,
      "learning_rate": 0.0005969788673032365,
      "loss": 3.3843,
      "step": 10417
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1427040100097656,
      "learning_rate": 0.0005969782882122208,
      "loss": 3.1922,
      "step": 10418
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6752337217330933,
      "learning_rate": 0.0005969777090659912,
      "loss": 3.2065,
      "step": 10419
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.252810478210449,
      "learning_rate": 0.000596977129864548,
      "loss": 3.1148,
      "step": 10420
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5989717245101929,
      "learning_rate": 0.0005969765506078911,
      "loss": 3.2509,
      "step": 10421
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.016817092895508,
      "learning_rate": 0.0005969759712960206,
      "loss": 3.0108,
      "step": 10422
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.2763400077819824,
      "learning_rate": 0.0005969753919289367,
      "loss": 3.2584,
      "step": 10423
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9622117280960083,
      "learning_rate": 0.0005969748125066395,
      "loss": 3.3253,
      "step": 10424
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9590004682540894,
      "learning_rate": 0.000596974233029129,
      "loss": 3.207,
      "step": 10425
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6886358261108398,
      "learning_rate": 0.0005969736534964055,
      "loss": 3.0404,
      "step": 10426
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5978810787200928,
      "learning_rate": 0.0005969730739084689,
      "loss": 3.2967,
      "step": 10427
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2877306938171387,
      "learning_rate": 0.0005969724942653194,
      "loss": 3.0497,
      "step": 10428
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2532973289489746,
      "learning_rate": 0.0005969719145669571,
      "loss": 3.1262,
      "step": 10429
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.776152491569519,
      "learning_rate": 0.0005969713348133822,
      "loss": 3.1744,
      "step": 10430
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5954710245132446,
      "learning_rate": 0.0005969707550045946,
      "loss": 3.3766,
      "step": 10431
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3747286796569824,
      "learning_rate": 0.0005969701751405946,
      "loss": 3.1665,
      "step": 10432
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5585647821426392,
      "learning_rate": 0.0005969695952213823,
      "loss": 3.0265,
      "step": 10433
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.078124523162842,
      "learning_rate": 0.0005969690152469576,
      "loss": 3.2106,
      "step": 10434
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6820064783096313,
      "learning_rate": 0.0005969684352173208,
      "loss": 3.1498,
      "step": 10435
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.865701913833618,
      "learning_rate": 0.000596967855132472,
      "loss": 3.1783,
      "step": 10436
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.2082254886627197,
      "learning_rate": 0.0005969672749924113,
      "loss": 3.1974,
      "step": 10437
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7302223443984985,
      "learning_rate": 0.0005969666947971386,
      "loss": 3.1795,
      "step": 10438
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8944718837738037,
      "learning_rate": 0.0005969661145466545,
      "loss": 3.3054,
      "step": 10439
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5506417751312256,
      "learning_rate": 0.0005969655342409586,
      "loss": 3.1414,
      "step": 10440
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4128416776657104,
      "learning_rate": 0.0005969649538800512,
      "loss": 3.0174,
      "step": 10441
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.806291103363037,
      "learning_rate": 0.0005969643734639324,
      "loss": 2.946,
      "step": 10442
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7033522129058838,
      "learning_rate": 0.0005969637929926023,
      "loss": 3.3208,
      "step": 10443
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3939783573150635,
      "learning_rate": 0.0005969632124660612,
      "loss": 2.9858,
      "step": 10444
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4444435834884644,
      "learning_rate": 0.0005969626318843088,
      "loss": 3.118,
      "step": 10445
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1888463497161865,
      "learning_rate": 0.0005969620512473456,
      "loss": 3.2011,
      "step": 10446
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3500542640686035,
      "learning_rate": 0.0005969614705551716,
      "loss": 3.348,
      "step": 10447
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8756444454193115,
      "learning_rate": 0.0005969608898077867,
      "loss": 3.0513,
      "step": 10448
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.868181824684143,
      "learning_rate": 0.0005969603090051912,
      "loss": 3.1342,
      "step": 10449
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6521168947219849,
      "learning_rate": 0.0005969597281473851,
      "loss": 3.3423,
      "step": 10450
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3369053602218628,
      "learning_rate": 0.0005969591472343687,
      "loss": 2.8759,
      "step": 10451
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6117914915084839,
      "learning_rate": 0.000596958566266142,
      "loss": 3.2558,
      "step": 10452
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.0292906761169434,
      "learning_rate": 0.000596957985242705,
      "loss": 3.2888,
      "step": 10453
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4335298538208008,
      "learning_rate": 0.000596957404164058,
      "loss": 3.1192,
      "step": 10454
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6305092573165894,
      "learning_rate": 0.000596956823030201,
      "loss": 3.0941,
      "step": 10455
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5286809206008911,
      "learning_rate": 0.0005969562418411341,
      "loss": 3.296,
      "step": 10456
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.652820348739624,
      "learning_rate": 0.0005969556605968574,
      "loss": 3.0047,
      "step": 10457
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6359782218933105,
      "learning_rate": 0.000596955079297371,
      "loss": 3.091,
      "step": 10458
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5417848825454712,
      "learning_rate": 0.0005969544979426752,
      "loss": 3.0384,
      "step": 10459
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5098940134048462,
      "learning_rate": 0.0005969539165327698,
      "loss": 3.2609,
      "step": 10460
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8585420846939087,
      "learning_rate": 0.0005969533350676552,
      "loss": 3.2502,
      "step": 10461
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3893071413040161,
      "learning_rate": 0.0005969527535473313,
      "loss": 3.1463,
      "step": 10462
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.898348331451416,
      "learning_rate": 0.0005969521719717982,
      "loss": 3.2263,
      "step": 10463
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.573864221572876,
      "learning_rate": 0.0005969515903410562,
      "loss": 3.3585,
      "step": 10464
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4181253910064697,
      "learning_rate": 0.0005969510086551052,
      "loss": 3.2479,
      "step": 10465
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7422187328338623,
      "learning_rate": 0.0005969504269139454,
      "loss": 3.293,
      "step": 10466
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4675182104110718,
      "learning_rate": 0.0005969498451175771,
      "loss": 3.0566,
      "step": 10467
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6258772611618042,
      "learning_rate": 0.000596949263266,
      "loss": 2.8378,
      "step": 10468
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.731754183769226,
      "learning_rate": 0.0005969486813592146,
      "loss": 3.3175,
      "step": 10469
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3961542844772339,
      "learning_rate": 0.0005969480993972206,
      "loss": 2.9901,
      "step": 10470
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1441538333892822,
      "learning_rate": 0.0005969475173800186,
      "loss": 3.4461,
      "step": 10471
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.422405481338501,
      "learning_rate": 0.0005969469353076084,
      "loss": 3.1667,
      "step": 10472
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9928879737854004,
      "learning_rate": 0.00059694635317999,
      "loss": 3.099,
      "step": 10473
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6663122177124023,
      "learning_rate": 0.0005969457709971638,
      "loss": 3.1863,
      "step": 10474
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4764833450317383,
      "learning_rate": 0.0005969451887591297,
      "loss": 2.8951,
      "step": 10475
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.418424129486084,
      "learning_rate": 0.000596944606465888,
      "loss": 3.2233,
      "step": 10476
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6413960456848145,
      "learning_rate": 0.0005969440241174386,
      "loss": 3.062,
      "step": 10477
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.329028606414795,
      "learning_rate": 0.0005969434417137817,
      "loss": 3.4234,
      "step": 10478
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6580543518066406,
      "learning_rate": 0.0005969428592549174,
      "loss": 3.2075,
      "step": 10479
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7675169706344604,
      "learning_rate": 0.0005969422767408459,
      "loss": 3.3087,
      "step": 10480
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.0127410888671875,
      "learning_rate": 0.0005969416941715672,
      "loss": 3.1158,
      "step": 10481
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6699563264846802,
      "learning_rate": 0.0005969411115470813,
      "loss": 3.4227,
      "step": 10482
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.061002016067505,
      "learning_rate": 0.0005969405288673886,
      "loss": 3.2741,
      "step": 10483
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.591725468635559,
      "learning_rate": 0.000596939946132489,
      "loss": 3.0576,
      "step": 10484
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.7349345684051514,
      "learning_rate": 0.0005969393633423825,
      "loss": 3.199,
      "step": 10485
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5410609245300293,
      "learning_rate": 0.0005969387804970696,
      "loss": 3.2624,
      "step": 10486
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4914690256118774,
      "learning_rate": 0.0005969381975965502,
      "loss": 3.0512,
      "step": 10487
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.8726041316986084,
      "learning_rate": 0.0005969376146408242,
      "loss": 3.0638,
      "step": 10488
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.622558355331421,
      "learning_rate": 0.000596937031629892,
      "loss": 3.0901,
      "step": 10489
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.349794626235962,
      "learning_rate": 0.0005969364485637536,
      "loss": 3.2064,
      "step": 10490
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.0192761421203613,
      "learning_rate": 0.0005969358654424091,
      "loss": 3.3462,
      "step": 10491
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7140353918075562,
      "learning_rate": 0.0005969352822658585,
      "loss": 3.2766,
      "step": 10492
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9006606340408325,
      "learning_rate": 0.0005969346990341022,
      "loss": 3.1603,
      "step": 10493
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8980258703231812,
      "learning_rate": 0.0005969341157471401,
      "loss": 3.1154,
      "step": 10494
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.56866455078125,
      "learning_rate": 0.0005969335324049723,
      "loss": 3.0462,
      "step": 10495
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.22782826423645,
      "learning_rate": 0.000596932949007599,
      "loss": 3.0639,
      "step": 10496
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6506049633026123,
      "learning_rate": 0.0005969323655550202,
      "loss": 3.3225,
      "step": 10497
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3176392316818237,
      "learning_rate": 0.000596931782047236,
      "loss": 3.1986,
      "step": 10498
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6154619455337524,
      "learning_rate": 0.0005969311984842467,
      "loss": 3.2921,
      "step": 10499
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9821650981903076,
      "learning_rate": 0.0005969306148660522,
      "loss": 3.2342,
      "step": 10500
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5258427858352661,
      "learning_rate": 0.0005969300311926528,
      "loss": 3.4176,
      "step": 10501
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.4021644592285156,
      "learning_rate": 0.0005969294474640484,
      "loss": 3.1813,
      "step": 10502
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.2992286682128906,
      "learning_rate": 0.0005969288636802392,
      "loss": 3.0755,
      "step": 10503
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4977778196334839,
      "learning_rate": 0.0005969282798412254,
      "loss": 3.1989,
      "step": 10504
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5849521160125732,
      "learning_rate": 0.000596927695947007,
      "loss": 3.1202,
      "step": 10505
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.606579065322876,
      "learning_rate": 0.0005969271119975842,
      "loss": 3.1888,
      "step": 10506
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6255369186401367,
      "learning_rate": 0.000596926527992957,
      "loss": 3.0499,
      "step": 10507
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4242626428604126,
      "learning_rate": 0.0005969259439331255,
      "loss": 3.3331,
      "step": 10508
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8409241437911987,
      "learning_rate": 0.0005969253598180899,
      "loss": 3.343,
      "step": 10509
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.0123403072357178,
      "learning_rate": 0.0005969247756478503,
      "loss": 3.3307,
      "step": 10510
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.75562584400177,
      "learning_rate": 0.0005969241914224067,
      "loss": 3.2356,
      "step": 10511
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9909648895263672,
      "learning_rate": 0.0005969236071417594,
      "loss": 3.2179,
      "step": 10512
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1224474906921387,
      "learning_rate": 0.0005969230228059083,
      "loss": 3.0128,
      "step": 10513
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3195959329605103,
      "learning_rate": 0.0005969224384148536,
      "loss": 3.1635,
      "step": 10514
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.366900682449341,
      "learning_rate": 0.0005969218539685954,
      "loss": 3.1289,
      "step": 10515
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.135287284851074,
      "learning_rate": 0.000596921269467134,
      "loss": 3.0771,
      "step": 10516
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.9860870838165283,
      "learning_rate": 0.0005969206849104692,
      "loss": 3.0963,
      "step": 10517
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.483724594116211,
      "learning_rate": 0.0005969201002986012,
      "loss": 3.0289,
      "step": 10518
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5080705881118774,
      "learning_rate": 0.0005969195156315301,
      "loss": 3.1129,
      "step": 10519
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8840152025222778,
      "learning_rate": 0.0005969189309092562,
      "loss": 3.1149,
      "step": 10520
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.832331895828247,
      "learning_rate": 0.0005969183461317794,
      "loss": 2.978,
      "step": 10521
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.391473650932312,
      "learning_rate": 0.0005969177612990999,
      "loss": 3.1741,
      "step": 10522
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6228153705596924,
      "learning_rate": 0.0005969171764112178,
      "loss": 3.1256,
      "step": 10523
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6172090768814087,
      "learning_rate": 0.0005969165914681331,
      "loss": 3.2117,
      "step": 10524
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2814373970031738,
      "learning_rate": 0.0005969160064698461,
      "loss": 3.0671,
      "step": 10525
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.669932246208191,
      "learning_rate": 0.0005969154214163568,
      "loss": 3.4496,
      "step": 10526
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.791722059249878,
      "learning_rate": 0.0005969148363076653,
      "loss": 2.9788,
      "step": 10527
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8485875129699707,
      "learning_rate": 0.0005969142511437717,
      "loss": 3.1445,
      "step": 10528
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5625594854354858,
      "learning_rate": 0.0005969136659246762,
      "loss": 3.0271,
      "step": 10529
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6952871084213257,
      "learning_rate": 0.0005969130806503788,
      "loss": 3.0092,
      "step": 10530
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4514240026474,
      "learning_rate": 0.0005969124953208796,
      "loss": 3.1448,
      "step": 10531
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.896296977996826,
      "learning_rate": 0.0005969119099361789,
      "loss": 2.9865,
      "step": 10532
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.51804780960083,
      "learning_rate": 0.0005969113244962765,
      "loss": 3.0731,
      "step": 10533
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8364641666412354,
      "learning_rate": 0.0005969107390011728,
      "loss": 3.2203,
      "step": 10534
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6125568151474,
      "learning_rate": 0.0005969101534508678,
      "loss": 3.2477,
      "step": 10535
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.4280731678009033,
      "learning_rate": 0.0005969095678453615,
      "loss": 3.1717,
      "step": 10536
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3780841827392578,
      "learning_rate": 0.0005969089821846542,
      "loss": 3.2106,
      "step": 10537
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4496612548828125,
      "learning_rate": 0.0005969083964687459,
      "loss": 3.0332,
      "step": 10538
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.0740950107574463,
      "learning_rate": 0.0005969078106976367,
      "loss": 3.1468,
      "step": 10539
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.678663730621338,
      "learning_rate": 0.0005969072248713267,
      "loss": 3.3132,
      "step": 10540
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.80879545211792,
      "learning_rate": 0.000596906638989816,
      "loss": 3.1594,
      "step": 10541
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5570930242538452,
      "learning_rate": 0.0005969060530531049,
      "loss": 3.2306,
      "step": 10542
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7205467224121094,
      "learning_rate": 0.0005969054670611933,
      "loss": 3.2027,
      "step": 10543
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4518561363220215,
      "learning_rate": 0.0005969048810140813,
      "loss": 3.0649,
      "step": 10544
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6677439212799072,
      "learning_rate": 0.0005969042949117691,
      "loss": 3.131,
      "step": 10545
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5392991304397583,
      "learning_rate": 0.000596903708754257,
      "loss": 3.2313,
      "step": 10546
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8718767166137695,
      "learning_rate": 0.0005969031225415447,
      "loss": 3.0866,
      "step": 10547
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4673599004745483,
      "learning_rate": 0.0005969025362736324,
      "loss": 3.06,
      "step": 10548
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5140092372894287,
      "learning_rate": 0.0005969019499505205,
      "loss": 3.0224,
      "step": 10549
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.458207607269287,
      "learning_rate": 0.0005969013635722088,
      "loss": 3.1306,
      "step": 10550
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.021472692489624,
      "learning_rate": 0.0005969007771386977,
      "loss": 3.1877,
      "step": 10551
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5279662609100342,
      "learning_rate": 0.000596900190649987,
      "loss": 3.1582,
      "step": 10552
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.156764268875122,
      "learning_rate": 0.000596899604106077,
      "loss": 2.8842,
      "step": 10553
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7086313962936401,
      "learning_rate": 0.0005968990175069678,
      "loss": 3.3299,
      "step": 10554
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6554187536239624,
      "learning_rate": 0.0005968984308526593,
      "loss": 3.3768,
      "step": 10555
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4952516555786133,
      "learning_rate": 0.000596897844143152,
      "loss": 3.1807,
      "step": 10556
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7586009502410889,
      "learning_rate": 0.0005968972573784456,
      "loss": 3.2623,
      "step": 10557
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8396145105361938,
      "learning_rate": 0.0005968966705585405,
      "loss": 2.9994,
      "step": 10558
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.470467448234558,
      "learning_rate": 0.0005968960836834367,
      "loss": 3.2072,
      "step": 10559
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4861305952072144,
      "learning_rate": 0.0005968954967531342,
      "loss": 3.3343,
      "step": 10560
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5828608274459839,
      "learning_rate": 0.0005968949097676335,
      "loss": 3.2847,
      "step": 10561
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6716954708099365,
      "learning_rate": 0.0005968943227269342,
      "loss": 3.217,
      "step": 10562
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5026626586914062,
      "learning_rate": 0.0005968937356310366,
      "loss": 3.1407,
      "step": 10563
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7537931203842163,
      "learning_rate": 0.0005968931484799409,
      "loss": 2.6864,
      "step": 10564
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.102530002593994,
      "learning_rate": 0.0005968925612736473,
      "loss": 2.8303,
      "step": 10565
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.847406029701233,
      "learning_rate": 0.0005968919740121557,
      "loss": 2.9276,
      "step": 10566
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.0916569232940674,
      "learning_rate": 0.0005968913866954661,
      "loss": 3.306,
      "step": 10567
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.2583248615264893,
      "learning_rate": 0.000596890799323579,
      "loss": 3.3001,
      "step": 10568
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8476635217666626,
      "learning_rate": 0.0005968902118964943,
      "loss": 3.2528,
      "step": 10569
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.134167432785034,
      "learning_rate": 0.000596889624414212,
      "loss": 3.2257,
      "step": 10570
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1342251300811768,
      "learning_rate": 0.0005968890368767323,
      "loss": 3.3056,
      "step": 10571
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.427976131439209,
      "learning_rate": 0.0005968884492840553,
      "loss": 3.3182,
      "step": 10572
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.591983675956726,
      "learning_rate": 0.0005968878616361813,
      "loss": 3.1624,
      "step": 10573
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.436583161354065,
      "learning_rate": 0.0005968872739331102,
      "loss": 2.9125,
      "step": 10574
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5270155668258667,
      "learning_rate": 0.0005968866861748421,
      "loss": 2.8657,
      "step": 10575
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3914873600006104,
      "learning_rate": 0.0005968860983613771,
      "loss": 3.2882,
      "step": 10576
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5657910108566284,
      "learning_rate": 0.0005968855104927154,
      "loss": 3.1252,
      "step": 10577
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6359928846359253,
      "learning_rate": 0.0005968849225688571,
      "loss": 3.3221,
      "step": 10578
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9901286363601685,
      "learning_rate": 0.0005968843345898023,
      "loss": 3.1675,
      "step": 10579
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1050853729248047,
      "learning_rate": 0.0005968837465555512,
      "loss": 3.1188,
      "step": 10580
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.340066432952881,
      "learning_rate": 0.0005968831584661036,
      "loss": 3.1952,
      "step": 10581
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5691571235656738,
      "learning_rate": 0.00059688257032146,
      "loss": 3.1637,
      "step": 10582
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8916585445404053,
      "learning_rate": 0.0005968819821216202,
      "loss": 2.9301,
      "step": 10583
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7244431972503662,
      "learning_rate": 0.0005968813938665845,
      "loss": 3.2423,
      "step": 10584
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.764723300933838,
      "learning_rate": 0.0005968808055563529,
      "loss": 3.0871,
      "step": 10585
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9102917909622192,
      "learning_rate": 0.0005968802171909256,
      "loss": 2.9693,
      "step": 10586
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8164498805999756,
      "learning_rate": 0.0005968796287703027,
      "loss": 3.2793,
      "step": 10587
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3284173011779785,
      "learning_rate": 0.0005968790402944841,
      "loss": 3.1477,
      "step": 10588
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.686061143875122,
      "learning_rate": 0.0005968784517634703,
      "loss": 2.8876,
      "step": 10589
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.543187141418457,
      "learning_rate": 0.000596877863177261,
      "loss": 3.2067,
      "step": 10590
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3509548902511597,
      "learning_rate": 0.0005968772745358566,
      "loss": 2.9238,
      "step": 10591
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4645124673843384,
      "learning_rate": 0.0005968766858392571,
      "loss": 3.3719,
      "step": 10592
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7851402759552002,
      "learning_rate": 0.0005968760970874626,
      "loss": 3.24,
      "step": 10593
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.407775640487671,
      "learning_rate": 0.0005968755082804733,
      "loss": 3.3068,
      "step": 10594
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7994701862335205,
      "learning_rate": 0.0005968749194182892,
      "loss": 3.0454,
      "step": 10595
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.0800771713256836,
      "learning_rate": 0.0005968743305009104,
      "loss": 3.1158,
      "step": 10596
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.463629126548767,
      "learning_rate": 0.0005968737415283371,
      "loss": 2.8643,
      "step": 10597
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9234023094177246,
      "learning_rate": 0.0005968731525005694,
      "loss": 3.2742,
      "step": 10598
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.016144037246704,
      "learning_rate": 0.0005968725634176073,
      "loss": 3.0566,
      "step": 10599
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.073030948638916,
      "learning_rate": 0.0005968719742794511,
      "loss": 3.0942,
      "step": 10600
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.429170846939087,
      "learning_rate": 0.0005968713850861007,
      "loss": 3.1226,
      "step": 10601
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3187990188598633,
      "learning_rate": 0.0005968707958375563,
      "loss": 3.2869,
      "step": 10602
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2462794780731201,
      "learning_rate": 0.000596870206533818,
      "loss": 3.1687,
      "step": 10603
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4975943565368652,
      "learning_rate": 0.000596869617174886,
      "loss": 3.4036,
      "step": 10604
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7809574604034424,
      "learning_rate": 0.0005968690277607604,
      "loss": 3.0674,
      "step": 10605
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.401450753211975,
      "learning_rate": 0.0005968684382914411,
      "loss": 3.4169,
      "step": 10606
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3962324857711792,
      "learning_rate": 0.0005968678487669284,
      "loss": 3.1689,
      "step": 10607
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6016125679016113,
      "learning_rate": 0.0005968672591872223,
      "loss": 3.054,
      "step": 10608
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.308079719543457,
      "learning_rate": 0.0005968666695523231,
      "loss": 3.1112,
      "step": 10609
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3736038208007812,
      "learning_rate": 0.0005968660798622308,
      "loss": 3.1738,
      "step": 10610
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4591604471206665,
      "learning_rate": 0.0005968654901169453,
      "loss": 3.1753,
      "step": 10611
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.704019546508789,
      "learning_rate": 0.000596864900316467,
      "loss": 3.062,
      "step": 10612
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4525326490402222,
      "learning_rate": 0.000596864310460796,
      "loss": 3.2532,
      "step": 10613
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9393115043640137,
      "learning_rate": 0.0005968637205499322,
      "loss": 3.0742,
      "step": 10614
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4773117303848267,
      "learning_rate": 0.0005968631305838758,
      "loss": 3.2535,
      "step": 10615
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4595035314559937,
      "learning_rate": 0.0005968625405626271,
      "loss": 3.2097,
      "step": 10616
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.539599895477295,
      "learning_rate": 0.000596861950486186,
      "loss": 3.1556,
      "step": 10617
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4473694562911987,
      "learning_rate": 0.0005968613603545526,
      "loss": 3.082,
      "step": 10618
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7676069736480713,
      "learning_rate": 0.0005968607701677271,
      "loss": 3.2832,
      "step": 10619
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.462068796157837,
      "learning_rate": 0.0005968601799257096,
      "loss": 3.0144,
      "step": 10620
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6453361511230469,
      "learning_rate": 0.0005968595896285001,
      "loss": 3.4031,
      "step": 10621
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.293907642364502,
      "learning_rate": 0.000596858999276099,
      "loss": 3.1738,
      "step": 10622
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.2873318195343018,
      "learning_rate": 0.000596858408868506,
      "loss": 3.0712,
      "step": 10623
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5832918882369995,
      "learning_rate": 0.0005968578184057215,
      "loss": 2.8936,
      "step": 10624
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4940388202667236,
      "learning_rate": 0.0005968572278877455,
      "loss": 3.2372,
      "step": 10625
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4230883121490479,
      "learning_rate": 0.0005968566373145782,
      "loss": 3.3949,
      "step": 10626
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4991410970687866,
      "learning_rate": 0.0005968560466862196,
      "loss": 2.9891,
      "step": 10627
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5949950218200684,
      "learning_rate": 0.0005968554560026697,
      "loss": 3.1306,
      "step": 10628
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6999679803848267,
      "learning_rate": 0.000596854865263929,
      "loss": 3.0561,
      "step": 10629
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6622638702392578,
      "learning_rate": 0.0005968542744699974,
      "loss": 3.1766,
      "step": 10630
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6781729459762573,
      "learning_rate": 0.0005968536836208748,
      "loss": 3.0862,
      "step": 10631
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7897117137908936,
      "learning_rate": 0.0005968530927165616,
      "loss": 3.1023,
      "step": 10632
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.4710075855255127,
      "learning_rate": 0.0005968525017570578,
      "loss": 2.9853,
      "step": 10633
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.627427339553833,
      "learning_rate": 0.0005968519107423635,
      "loss": 3.1413,
      "step": 10634
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7272217273712158,
      "learning_rate": 0.0005968513196724788,
      "loss": 3.2693,
      "step": 10635
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5851682424545288,
      "learning_rate": 0.0005968507285474039,
      "loss": 3.0989,
      "step": 10636
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9906260967254639,
      "learning_rate": 0.0005968501373671388,
      "loss": 3.2157,
      "step": 10637
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2970277070999146,
      "learning_rate": 0.0005968495461316837,
      "loss": 3.0057,
      "step": 10638
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5250622034072876,
      "learning_rate": 0.0005968489548410387,
      "loss": 3.0642,
      "step": 10639
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2043323516845703,
      "learning_rate": 0.0005968483634952037,
      "loss": 3.2072,
      "step": 10640
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5353264808654785,
      "learning_rate": 0.0005968477720941792,
      "loss": 3.2568,
      "step": 10641
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.44193434715271,
      "learning_rate": 0.0005968471806379649,
      "loss": 3.251,
      "step": 10642
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6924062967300415,
      "learning_rate": 0.0005968465891265613,
      "loss": 3.1244,
      "step": 10643
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.340104103088379,
      "learning_rate": 0.0005968459975599682,
      "loss": 3.328,
      "step": 10644
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8810268640518188,
      "learning_rate": 0.0005968454059381858,
      "loss": 3.1781,
      "step": 10645
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4742463827133179,
      "learning_rate": 0.0005968448142612144,
      "loss": 2.9644,
      "step": 10646
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7936584949493408,
      "learning_rate": 0.0005968442225290538,
      "loss": 3.1777,
      "step": 10647
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6317123174667358,
      "learning_rate": 0.0005968436307417042,
      "loss": 3.2118,
      "step": 10648
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3153754472732544,
      "learning_rate": 0.0005968430388991659,
      "loss": 3.0072,
      "step": 10649
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8883700370788574,
      "learning_rate": 0.0005968424470014389,
      "loss": 2.9292,
      "step": 10650
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4866290092468262,
      "learning_rate": 0.0005968418550485231,
      "loss": 3.1165,
      "step": 10651
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4820277690887451,
      "learning_rate": 0.000596841263040419,
      "loss": 3.0504,
      "step": 10652
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.7283506393432617,
      "learning_rate": 0.0005968406709771262,
      "loss": 3.119,
      "step": 10653
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.906725287437439,
      "learning_rate": 0.0005968400788586455,
      "loss": 3.2442,
      "step": 10654
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1202807426452637,
      "learning_rate": 0.0005968394866849764,
      "loss": 3.2171,
      "step": 10655
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9031585454940796,
      "learning_rate": 0.0005968388944561193,
      "loss": 3.198,
      "step": 10656
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8549447059631348,
      "learning_rate": 0.0005968383021720743,
      "loss": 3.1011,
      "step": 10657
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6509811878204346,
      "learning_rate": 0.0005968377098328413,
      "loss": 3.3509,
      "step": 10658
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6265461444854736,
      "learning_rate": 0.0005968371174384208,
      "loss": 3.1958,
      "step": 10659
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6851775646209717,
      "learning_rate": 0.0005968365249888124,
      "loss": 3.2558,
      "step": 10660
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.653681993484497,
      "learning_rate": 0.0005968359324840166,
      "loss": 3.0831,
      "step": 10661
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6114245653152466,
      "learning_rate": 0.0005968353399240335,
      "loss": 3.3656,
      "step": 10662
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.741617202758789,
      "learning_rate": 0.000596834747308863,
      "loss": 3.1054,
      "step": 10663
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4154999256134033,
      "learning_rate": 0.0005968341546385053,
      "loss": 3.1186,
      "step": 10664
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.953909158706665,
      "learning_rate": 0.0005968335619129606,
      "loss": 3.1984,
      "step": 10665
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5658968687057495,
      "learning_rate": 0.0005968329691322289,
      "loss": 3.1546,
      "step": 10666
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4066026210784912,
      "learning_rate": 0.0005968323762963103,
      "loss": 3.0945,
      "step": 10667
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3809067010879517,
      "learning_rate": 0.0005968317834052051,
      "loss": 3.2853,
      "step": 10668
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6356074810028076,
      "learning_rate": 0.0005968311904589131,
      "loss": 3.0768,
      "step": 10669
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5131691694259644,
      "learning_rate": 0.0005968305974574347,
      "loss": 3.2334,
      "step": 10670
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4531768560409546,
      "learning_rate": 0.0005968300044007698,
      "loss": 2.9254,
      "step": 10671
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6039506196975708,
      "learning_rate": 0.0005968294112889187,
      "loss": 3.3591,
      "step": 10672
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3796721696853638,
      "learning_rate": 0.0005968288181218814,
      "loss": 3.3218,
      "step": 10673
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7606000900268555,
      "learning_rate": 0.0005968282248996579,
      "loss": 2.9659,
      "step": 10674
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7636505365371704,
      "learning_rate": 0.0005968276316222486,
      "loss": 3.3024,
      "step": 10675
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1029891967773438,
      "learning_rate": 0.0005968270382896533,
      "loss": 3.0389,
      "step": 10676
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6635743379592896,
      "learning_rate": 0.0005968264449018723,
      "loss": 3.4472,
      "step": 10677
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4324522018432617,
      "learning_rate": 0.0005968258514589057,
      "loss": 2.9625,
      "step": 10678
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.531546950340271,
      "learning_rate": 0.0005968252579607535,
      "loss": 3.2268,
      "step": 10679
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.413948893547058,
      "learning_rate": 0.0005968246644074159,
      "loss": 3.1932,
      "step": 10680
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9226998090744019,
      "learning_rate": 0.000596824070798893,
      "loss": 3.1176,
      "step": 10681
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5953643321990967,
      "learning_rate": 0.0005968234771351849,
      "loss": 3.2204,
      "step": 10682
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6271958351135254,
      "learning_rate": 0.0005968228834162917,
      "loss": 3.0811,
      "step": 10683
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9576232433319092,
      "learning_rate": 0.0005968222896422136,
      "loss": 3.3248,
      "step": 10684
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.26397967338562,
      "learning_rate": 0.0005968216958129505,
      "loss": 2.9927,
      "step": 10685
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.5647220611572266,
      "learning_rate": 0.0005968211019285027,
      "loss": 3.4605,
      "step": 10686
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8653215169906616,
      "learning_rate": 0.0005968205079888703,
      "loss": 3.1439,
      "step": 10687
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.1992568969726562,
      "learning_rate": 0.0005968199139940534,
      "loss": 3.1111,
      "step": 10688
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.7656729221343994,
      "learning_rate": 0.000596819319944052,
      "loss": 3.393,
      "step": 10689
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3270901441574097,
      "learning_rate": 0.0005968187258388663,
      "loss": 3.0732,
      "step": 10690
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.589927911758423,
      "learning_rate": 0.0005968181316784964,
      "loss": 3.4346,
      "step": 10691
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.4126956462860107,
      "learning_rate": 0.0005968175374629425,
      "loss": 3.2998,
      "step": 10692
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.602769136428833,
      "learning_rate": 0.0005968169431922044,
      "loss": 3.2775,
      "step": 10693
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.417233943939209,
      "learning_rate": 0.0005968163488662826,
      "loss": 2.9936,
      "step": 10694
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1944427490234375,
      "learning_rate": 0.0005968157544851769,
      "loss": 3.1705,
      "step": 10695
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.2936782836914062,
      "learning_rate": 0.0005968151600488877,
      "loss": 3.0504,
      "step": 10696
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7922348976135254,
      "learning_rate": 0.0005968145655574149,
      "loss": 3.1454,
      "step": 10697
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4660298824310303,
      "learning_rate": 0.0005968139710107586,
      "loss": 3.2635,
      "step": 10698
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.403071403503418,
      "learning_rate": 0.0005968133764089191,
      "loss": 2.9029,
      "step": 10699
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5395896434783936,
      "learning_rate": 0.0005968127817518962,
      "loss": 3.0852,
      "step": 10700
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8424521684646606,
      "learning_rate": 0.0005968121870396904,
      "loss": 3.3582,
      "step": 10701
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.423285961151123,
      "learning_rate": 0.0005968115922723016,
      "loss": 3.1132,
      "step": 10702
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8834526538848877,
      "learning_rate": 0.0005968109974497298,
      "loss": 3.0808,
      "step": 10703
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.0593628883361816,
      "learning_rate": 0.0005968104025719753,
      "loss": 3.1802,
      "step": 10704
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6087381839752197,
      "learning_rate": 0.0005968098076390381,
      "loss": 3.3199,
      "step": 10705
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8478349447250366,
      "learning_rate": 0.0005968092126509185,
      "loss": 3.1936,
      "step": 10706
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.3496320247650146,
      "learning_rate": 0.0005968086176076163,
      "loss": 3.077,
      "step": 10707
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4431616067886353,
      "learning_rate": 0.0005968080225091318,
      "loss": 3.0832,
      "step": 10708
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4738177061080933,
      "learning_rate": 0.0005968074273554652,
      "loss": 3.0829,
      "step": 10709
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4944366216659546,
      "learning_rate": 0.0005968068321466164,
      "loss": 3.1669,
      "step": 10710
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.417107105255127,
      "learning_rate": 0.0005968062368825857,
      "loss": 3.1839,
      "step": 10711
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.8307363986968994,
      "learning_rate": 0.000596805641563373,
      "loss": 3.0175,
      "step": 10712
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.2977852821350098,
      "learning_rate": 0.0005968050461889786,
      "loss": 3.1933,
      "step": 10713
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4399476051330566,
      "learning_rate": 0.0005968044507594025,
      "loss": 3.3093,
      "step": 10714
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7651140689849854,
      "learning_rate": 0.0005968038552746449,
      "loss": 3.2877,
      "step": 10715
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.2482495307922363,
      "learning_rate": 0.0005968032597347058,
      "loss": 3.056,
      "step": 10716
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6707842350006104,
      "learning_rate": 0.0005968026641395854,
      "loss": 3.2498,
      "step": 10717
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7358933687210083,
      "learning_rate": 0.0005968020684892838,
      "loss": 3.161,
      "step": 10718
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9755953550338745,
      "learning_rate": 0.0005968014727838009,
      "loss": 3.5153,
      "step": 10719
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.813952922821045,
      "learning_rate": 0.0005968008770231373,
      "loss": 3.3543,
      "step": 10720
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3666893243789673,
      "learning_rate": 0.0005968002812072927,
      "loss": 3.1794,
      "step": 10721
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7142020463943481,
      "learning_rate": 0.0005967996853362672,
      "loss": 2.8152,
      "step": 10722
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9586601257324219,
      "learning_rate": 0.0005967990894100612,
      "loss": 3.5222,
      "step": 10723
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5951100587844849,
      "learning_rate": 0.0005967984934286746,
      "loss": 3.2457,
      "step": 10724
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9095327854156494,
      "learning_rate": 0.0005967978973921076,
      "loss": 3.3241,
      "step": 10725
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9150155782699585,
      "learning_rate": 0.0005967973013003601,
      "loss": 3.1052,
      "step": 10726
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.595771312713623,
      "learning_rate": 0.0005967967051534326,
      "loss": 3.0932,
      "step": 10727
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8917371034622192,
      "learning_rate": 0.0005967961089513249,
      "loss": 3.1566,
      "step": 10728
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7598456144332886,
      "learning_rate": 0.0005967955126940372,
      "loss": 3.0394,
      "step": 10729
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4481372833251953,
      "learning_rate": 0.0005967949163815696,
      "loss": 3.2345,
      "step": 10730
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.047163486480713,
      "learning_rate": 0.0005967943200139221,
      "loss": 3.1663,
      "step": 10731
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7228126525878906,
      "learning_rate": 0.0005967937235910951,
      "loss": 3.0852,
      "step": 10732
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8187341690063477,
      "learning_rate": 0.0005967931271130886,
      "loss": 2.9963,
      "step": 10733
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9745759963989258,
      "learning_rate": 0.0005967925305799026,
      "loss": 2.9934,
      "step": 10734
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6965900659561157,
      "learning_rate": 0.0005967919339915372,
      "loss": 3.152,
      "step": 10735
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8864401578903198,
      "learning_rate": 0.0005967913373479925,
      "loss": 3.2768,
      "step": 10736
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4663817882537842,
      "learning_rate": 0.0005967907406492688,
      "loss": 3.2227,
      "step": 10737
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3579343557357788,
      "learning_rate": 0.0005967901438953661,
      "loss": 3.1448,
      "step": 10738
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7161849737167358,
      "learning_rate": 0.0005967895470862845,
      "loss": 3.4582,
      "step": 10739
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7580829858779907,
      "learning_rate": 0.0005967889502220241,
      "loss": 2.8635,
      "step": 10740
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4606869220733643,
      "learning_rate": 0.0005967883533025851,
      "loss": 3.0641,
      "step": 10741
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5998454093933105,
      "learning_rate": 0.0005967877563279675,
      "loss": 3.2887,
      "step": 10742
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.27437424659729,
      "learning_rate": 0.0005967871592981714,
      "loss": 3.0527,
      "step": 10743
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.828649640083313,
      "learning_rate": 0.000596786562213197,
      "loss": 3.046,
      "step": 10744
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9589132070541382,
      "learning_rate": 0.0005967859650730444,
      "loss": 3.1842,
      "step": 10745
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7756143808364868,
      "learning_rate": 0.0005967853678777137,
      "loss": 3.3701,
      "step": 10746
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7846903800964355,
      "learning_rate": 0.000596784770627205,
      "loss": 3.0804,
      "step": 10747
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.055901288986206,
      "learning_rate": 0.0005967841733215184,
      "loss": 3.1034,
      "step": 10748
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.4986419677734375,
      "learning_rate": 0.000596783575960654,
      "loss": 3.2259,
      "step": 10749
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.039210319519043,
      "learning_rate": 0.0005967829785446119,
      "loss": 3.1304,
      "step": 10750
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.9467697143554688,
      "learning_rate": 0.0005967823810733923,
      "loss": 3.0035,
      "step": 10751
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.727658748626709,
      "learning_rate": 0.0005967817835469952,
      "loss": 3.2947,
      "step": 10752
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5872310400009155,
      "learning_rate": 0.0005967811859654208,
      "loss": 2.8062,
      "step": 10753
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.81196928024292,
      "learning_rate": 0.0005967805883286692,
      "loss": 3.0045,
      "step": 10754
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.9964778423309326,
      "learning_rate": 0.0005967799906367404,
      "loss": 3.2408,
      "step": 10755
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.959277629852295,
      "learning_rate": 0.0005967793928896346,
      "loss": 3.0304,
      "step": 10756
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6879713535308838,
      "learning_rate": 0.000596778795087352,
      "loss": 3.2416,
      "step": 10757
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.756371021270752,
      "learning_rate": 0.0005967781972298925,
      "loss": 3.1432,
      "step": 10758
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.5427777767181396,
      "learning_rate": 0.0005967775993172565,
      "loss": 3.2303,
      "step": 10759
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.60245680809021,
      "learning_rate": 0.0005967770013494439,
      "loss": 2.7806,
      "step": 10760
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6465195417404175,
      "learning_rate": 0.0005967764033264547,
      "loss": 3.2317,
      "step": 10761
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.8876500129699707,
      "learning_rate": 0.0005967758052482894,
      "loss": 3.0747,
      "step": 10762
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.634814739227295,
      "learning_rate": 0.0005967752071149477,
      "loss": 3.5342,
      "step": 10763
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6700948476791382,
      "learning_rate": 0.0005967746089264299,
      "loss": 3.2624,
      "step": 10764
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5197341442108154,
      "learning_rate": 0.0005967740106827362,
      "loss": 3.2585,
      "step": 10765
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7364963293075562,
      "learning_rate": 0.0005967734123838665,
      "loss": 3.213,
      "step": 10766
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3340561389923096,
      "learning_rate": 0.0005967728140298211,
      "loss": 3.4476,
      "step": 10767
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3426159620285034,
      "learning_rate": 0.0005967722156205999,
      "loss": 3.2816,
      "step": 10768
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9507004022598267,
      "learning_rate": 0.0005967716171562033,
      "loss": 3.2896,
      "step": 10769
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3271907567977905,
      "learning_rate": 0.0005967710186366311,
      "loss": 2.9772,
      "step": 10770
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.304572582244873,
      "learning_rate": 0.0005967704200618838,
      "loss": 3.1338,
      "step": 10771
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9833744764328003,
      "learning_rate": 0.0005967698214319611,
      "loss": 3.118,
      "step": 10772
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9627412557601929,
      "learning_rate": 0.0005967692227468633,
      "loss": 3.319,
      "step": 10773
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4328505992889404,
      "learning_rate": 0.0005967686240065904,
      "loss": 3.4002,
      "step": 10774
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6818675994873047,
      "learning_rate": 0.0005967680252111429,
      "loss": 3.1121,
      "step": 10775
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3566687107086182,
      "learning_rate": 0.0005967674263605205,
      "loss": 3.0429,
      "step": 10776
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3268351554870605,
      "learning_rate": 0.0005967668274547233,
      "loss": 3.1325,
      "step": 10777
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4876844882965088,
      "learning_rate": 0.0005967662284937516,
      "loss": 3.3309,
      "step": 10778
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5387381315231323,
      "learning_rate": 0.0005967656294776055,
      "loss": 3.4514,
      "step": 10779
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3862935304641724,
      "learning_rate": 0.000596765030406285,
      "loss": 3.3965,
      "step": 10780
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7880537509918213,
      "learning_rate": 0.0005967644312797904,
      "loss": 3.1236,
      "step": 10781
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5781985521316528,
      "learning_rate": 0.0005967638320981217,
      "loss": 3.261,
      "step": 10782
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3327957391738892,
      "learning_rate": 0.0005967632328612788,
      "loss": 3.2381,
      "step": 10783
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.341803789138794,
      "learning_rate": 0.0005967626335692622,
      "loss": 3.1081,
      "step": 10784
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3373067378997803,
      "learning_rate": 0.0005967620342220717,
      "loss": 3.35,
      "step": 10785
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4955730438232422,
      "learning_rate": 0.0005967614348197077,
      "loss": 3.1261,
      "step": 10786
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5095940828323364,
      "learning_rate": 0.0005967608353621701,
      "loss": 3.1777,
      "step": 10787
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6642286777496338,
      "learning_rate": 0.0005967602358494589,
      "loss": 3.0899,
      "step": 10788
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4678575992584229,
      "learning_rate": 0.0005967596362815745,
      "loss": 3.1189,
      "step": 10789
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5768773555755615,
      "learning_rate": 0.0005967590366585168,
      "loss": 3.1115,
      "step": 10790
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6133625507354736,
      "learning_rate": 0.0005967584369802861,
      "loss": 3.2587,
      "step": 10791
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5747194290161133,
      "learning_rate": 0.0005967578372468823,
      "loss": 2.9963,
      "step": 10792
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6052907705307007,
      "learning_rate": 0.0005967572374583057,
      "loss": 3.2354,
      "step": 10793
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5777184963226318,
      "learning_rate": 0.0005967566376145564,
      "loss": 3.1753,
      "step": 10794
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4639555215835571,
      "learning_rate": 0.0005967560377156343,
      "loss": 3.2715,
      "step": 10795
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.141054630279541,
      "learning_rate": 0.0005967554377615397,
      "loss": 2.9477,
      "step": 10796
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5526777505874634,
      "learning_rate": 0.0005967548377522726,
      "loss": 3.1991,
      "step": 10797
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5287898778915405,
      "learning_rate": 0.0005967542376878333,
      "loss": 2.8499,
      "step": 10798
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5148131847381592,
      "learning_rate": 0.0005967536375682216,
      "loss": 3.0169,
      "step": 10799
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3603624105453491,
      "learning_rate": 0.000596753037393438,
      "loss": 3.1663,
      "step": 10800
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.89081609249115,
      "learning_rate": 0.0005967524371634824,
      "loss": 3.3337,
      "step": 10801
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5440558195114136,
      "learning_rate": 0.0005967518368783548,
      "loss": 3.1836,
      "step": 10802
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.490097999572754,
      "learning_rate": 0.0005967512365380554,
      "loss": 3.2581,
      "step": 10803
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.574713110923767,
      "learning_rate": 0.0005967506361425844,
      "loss": 3.4042,
      "step": 10804
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5530554056167603,
      "learning_rate": 0.0005967500356919419,
      "loss": 2.948,
      "step": 10805
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8944923877716064,
      "learning_rate": 0.000596749435186128,
      "loss": 3.2756,
      "step": 10806
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.76822030544281,
      "learning_rate": 0.0005967488346251427,
      "loss": 3.0024,
      "step": 10807
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6393537521362305,
      "learning_rate": 0.0005967482340089862,
      "loss": 2.9984,
      "step": 10808
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5302677154541016,
      "learning_rate": 0.0005967476333376586,
      "loss": 3.1238,
      "step": 10809
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5932354927062988,
      "learning_rate": 0.00059674703261116,
      "loss": 3.2013,
      "step": 10810
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4736080169677734,
      "learning_rate": 0.0005967464318294906,
      "loss": 3.3053,
      "step": 10811
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5838854312896729,
      "learning_rate": 0.0005967458309926503,
      "loss": 3.1255,
      "step": 10812
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.726989984512329,
      "learning_rate": 0.0005967452301006395,
      "loss": 3.0842,
      "step": 10813
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5534861087799072,
      "learning_rate": 0.000596744629153458,
      "loss": 3.1727,
      "step": 10814
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.094175100326538,
      "learning_rate": 0.0005967440281511062,
      "loss": 3.2266,
      "step": 10815
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5828750133514404,
      "learning_rate": 0.0005967434270935841,
      "loss": 3.1475,
      "step": 10816
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6537132263183594,
      "learning_rate": 0.0005967428259808917,
      "loss": 3.154,
      "step": 10817
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.518355369567871,
      "learning_rate": 0.0005967422248130293,
      "loss": 3.4463,
      "step": 10818
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3352785110473633,
      "learning_rate": 0.0005967416235899969,
      "loss": 3.2606,
      "step": 10819
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7106741666793823,
      "learning_rate": 0.0005967410223117946,
      "loss": 3.16,
      "step": 10820
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6503242254257202,
      "learning_rate": 0.0005967404209784225,
      "loss": 3.1133,
      "step": 10821
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.652758836746216,
      "learning_rate": 0.0005967398195898808,
      "loss": 3.2079,
      "step": 10822
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5232665538787842,
      "learning_rate": 0.0005967392181461695,
      "loss": 3.2852,
      "step": 10823
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6769464015960693,
      "learning_rate": 0.0005967386166472889,
      "loss": 2.9732,
      "step": 10824
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.0890707969665527,
      "learning_rate": 0.0005967380150932389,
      "loss": 3.0994,
      "step": 10825
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3367962837219238,
      "learning_rate": 0.0005967374134840198,
      "loss": 3.2559,
      "step": 10826
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5757859945297241,
      "learning_rate": 0.0005967368118196315,
      "loss": 3.0942,
      "step": 10827
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.58820378780365,
      "learning_rate": 0.0005967362101000743,
      "loss": 3.1163,
      "step": 10828
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7346844673156738,
      "learning_rate": 0.0005967356083253482,
      "loss": 3.1118,
      "step": 10829
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6722064018249512,
      "learning_rate": 0.0005967350064954534,
      "loss": 3.1526,
      "step": 10830
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.0415825843811035,
      "learning_rate": 0.0005967344046103898,
      "loss": 2.8932,
      "step": 10831
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.662459373474121,
      "learning_rate": 0.0005967338026701579,
      "loss": 3.1495,
      "step": 10832
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.145878553390503,
      "learning_rate": 0.0005967332006747575,
      "loss": 3.2023,
      "step": 10833
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5695923566818237,
      "learning_rate": 0.0005967325986241887,
      "loss": 3.2004,
      "step": 10834
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.718571424484253,
      "learning_rate": 0.0005967319965184518,
      "loss": 2.9558,
      "step": 10835
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5455106496810913,
      "learning_rate": 0.0005967313943575468,
      "loss": 3.0407,
      "step": 10836
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3288038969039917,
      "learning_rate": 0.0005967307921414739,
      "loss": 3.1681,
      "step": 10837
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5533766746520996,
      "learning_rate": 0.000596730189870233,
      "loss": 3.2046,
      "step": 10838
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4980919361114502,
      "learning_rate": 0.0005967295875438245,
      "loss": 3.0987,
      "step": 10839
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.0366625785827637,
      "learning_rate": 0.0005967289851622483,
      "loss": 3.3457,
      "step": 10840
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7377183437347412,
      "learning_rate": 0.0005967283827255045,
      "loss": 3.0226,
      "step": 10841
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.916386604309082,
      "learning_rate": 0.0005967277802335935,
      "loss": 3.0934,
      "step": 10842
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8668054342269897,
      "learning_rate": 0.0005967271776865149,
      "loss": 3.3751,
      "step": 10843
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9443994760513306,
      "learning_rate": 0.0005967265750842694,
      "loss": 3.2782,
      "step": 10844
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5429936647415161,
      "learning_rate": 0.0005967259724268567,
      "loss": 3.3311,
      "step": 10845
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3801312446594238,
      "learning_rate": 0.0005967253697142771,
      "loss": 2.9852,
      "step": 10846
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1131749153137207,
      "learning_rate": 0.0005967247669465307,
      "loss": 3.0083,
      "step": 10847
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7891244888305664,
      "learning_rate": 0.0005967241641236174,
      "loss": 3.1152,
      "step": 10848
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3465638160705566,
      "learning_rate": 0.0005967235612455375,
      "loss": 3.141,
      "step": 10849
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5049901008605957,
      "learning_rate": 0.0005967229583122912,
      "loss": 2.9402,
      "step": 10850
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.3854446411132812,
      "learning_rate": 0.0005967223553238786,
      "loss": 3.1832,
      "step": 10851
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.508599042892456,
      "learning_rate": 0.0005967217522802994,
      "loss": 3.1695,
      "step": 10852
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6136351823806763,
      "learning_rate": 0.0005967211491815542,
      "loss": 3.3376,
      "step": 10853
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.731426477432251,
      "learning_rate": 0.0005967205460276429,
      "loss": 3.1723,
      "step": 10854
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.723061203956604,
      "learning_rate": 0.0005967199428185657,
      "loss": 3.2685,
      "step": 10855
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6473441123962402,
      "learning_rate": 0.0005967193395543227,
      "loss": 3.1913,
      "step": 10856
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.057065725326538,
      "learning_rate": 0.0005967187362349138,
      "loss": 3.2239,
      "step": 10857
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4549587965011597,
      "learning_rate": 0.0005967181328603395,
      "loss": 3.2891,
      "step": 10858
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5403692722320557,
      "learning_rate": 0.0005967175294305996,
      "loss": 3.2039,
      "step": 10859
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3512405157089233,
      "learning_rate": 0.0005967169259456943,
      "loss": 3.0461,
      "step": 10860
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3323472738265991,
      "learning_rate": 0.0005967163224056236,
      "loss": 3.0663,
      "step": 10861
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.491850733757019,
      "learning_rate": 0.000596715718810388,
      "loss": 3.2594,
      "step": 10862
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.885986328125,
      "learning_rate": 0.0005967151151599872,
      "loss": 3.1469,
      "step": 10863
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.4015705585479736,
      "learning_rate": 0.0005967145114544215,
      "loss": 3.0537,
      "step": 10864
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.2590901851654053,
      "learning_rate": 0.0005967139076936908,
      "loss": 3.069,
      "step": 10865
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5308412313461304,
      "learning_rate": 0.0005967133038777956,
      "loss": 2.9943,
      "step": 10866
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.537360429763794,
      "learning_rate": 0.0005967127000067357,
      "loss": 3.4545,
      "step": 10867
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.489387273788452,
      "learning_rate": 0.0005967120960805114,
      "loss": 2.8762,
      "step": 10868
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7855756282806396,
      "learning_rate": 0.0005967114920991226,
      "loss": 3.2713,
      "step": 10869
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6896796226501465,
      "learning_rate": 0.0005967108880625696,
      "loss": 3.1904,
      "step": 10870
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9632039070129395,
      "learning_rate": 0.0005967102839708524,
      "loss": 3.3333,
      "step": 10871
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.807319402694702,
      "learning_rate": 0.0005967096798239713,
      "loss": 3.3007,
      "step": 10872
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9345207214355469,
      "learning_rate": 0.000596709075621926,
      "loss": 3.2711,
      "step": 10873
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8688286542892456,
      "learning_rate": 0.000596708471364717,
      "loss": 3.1211,
      "step": 10874
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8628230094909668,
      "learning_rate": 0.0005967078670523445,
      "loss": 3.2575,
      "step": 10875
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8501195907592773,
      "learning_rate": 0.0005967072626848082,
      "loss": 3.1293,
      "step": 10876
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4503718614578247,
      "learning_rate": 0.0005967066582621085,
      "loss": 2.9343,
      "step": 10877
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.492732286453247,
      "learning_rate": 0.0005967060537842453,
      "loss": 3.1622,
      "step": 10878
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.0090725421905518,
      "learning_rate": 0.000596705449251219,
      "loss": 3.0667,
      "step": 10879
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4153823852539062,
      "learning_rate": 0.0005967048446630295,
      "loss": 3.1586,
      "step": 10880
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8719059228897095,
      "learning_rate": 0.0005967042400196769,
      "loss": 3.0829,
      "step": 10881
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.0667943954467773,
      "learning_rate": 0.0005967036353211614,
      "loss": 3.1703,
      "step": 10882
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.658172369003296,
      "learning_rate": 0.0005967030305674832,
      "loss": 3.2592,
      "step": 10883
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5532246828079224,
      "learning_rate": 0.0005967024257586422,
      "loss": 3.2125,
      "step": 10884
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.5891668796539307,
      "learning_rate": 0.0005967018208946387,
      "loss": 3.2421,
      "step": 10885
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7577364444732666,
      "learning_rate": 0.0005967012159754726,
      "loss": 3.15,
      "step": 10886
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.426289677619934,
      "learning_rate": 0.0005967006110011442,
      "loss": 3.1226,
      "step": 10887
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.3853836059570312,
      "learning_rate": 0.0005967000059716536,
      "loss": 3.132,
      "step": 10888
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.360085964202881,
      "learning_rate": 0.0005966994008870009,
      "loss": 3.2008,
      "step": 10889
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8227871656417847,
      "learning_rate": 0.0005966987957471861,
      "loss": 3.0782,
      "step": 10890
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1057257652282715,
      "learning_rate": 0.0005966981905522095,
      "loss": 3.298,
      "step": 10891
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.586463212966919,
      "learning_rate": 0.000596697585302071,
      "loss": 3.1275,
      "step": 10892
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5864347219467163,
      "learning_rate": 0.0005966969799967708,
      "loss": 3.3549,
      "step": 10893
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.436779499053955,
      "learning_rate": 0.0005966963746363092,
      "loss": 3.1953,
      "step": 10894
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.5099523067474365,
      "learning_rate": 0.000596695769220686,
      "loss": 3.2184,
      "step": 10895
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8906339406967163,
      "learning_rate": 0.0005966951637499015,
      "loss": 3.3923,
      "step": 10896
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1903018951416016,
      "learning_rate": 0.0005966945582239557,
      "loss": 3.141,
      "step": 10897
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5449696779251099,
      "learning_rate": 0.0005966939526428489,
      "loss": 3.2559,
      "step": 10898
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.696668863296509,
      "learning_rate": 0.0005966933470065809,
      "loss": 3.2514,
      "step": 10899
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1887028217315674,
      "learning_rate": 0.0005966927413151522,
      "loss": 3.2415,
      "step": 10900
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4459115266799927,
      "learning_rate": 0.0005966921355685627,
      "loss": 3.1476,
      "step": 10901
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8699408769607544,
      "learning_rate": 0.0005966915297668124,
      "loss": 3.1949,
      "step": 10902
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.2007994651794434,
      "learning_rate": 0.0005966909239099016,
      "loss": 3.2329,
      "step": 10903
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.0388200283050537,
      "learning_rate": 0.0005966903179978304,
      "loss": 3.3042,
      "step": 10904
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.799532175064087,
      "learning_rate": 0.0005966897120305988,
      "loss": 3.0086,
      "step": 10905
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8484231233596802,
      "learning_rate": 0.0005966891060082072,
      "loss": 3.334,
      "step": 10906
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9890129566192627,
      "learning_rate": 0.0005966884999306552,
      "loss": 2.9796,
      "step": 10907
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.382097601890564,
      "learning_rate": 0.0005966878937979434,
      "loss": 3.173,
      "step": 10908
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.2208011150360107,
      "learning_rate": 0.0005966872876100717,
      "loss": 3.0906,
      "step": 10909
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7651258707046509,
      "learning_rate": 0.0005966866813670402,
      "loss": 2.8502,
      "step": 10910
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6125911474227905,
      "learning_rate": 0.000596686075068849,
      "loss": 2.9607,
      "step": 10911
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1348273754119873,
      "learning_rate": 0.0005966854687154982,
      "loss": 2.9701,
      "step": 10912
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8169358968734741,
      "learning_rate": 0.0005966848623069882,
      "loss": 3.2086,
      "step": 10913
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.500266432762146,
      "learning_rate": 0.0005966842558433186,
      "loss": 3.2856,
      "step": 10914
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7013176679611206,
      "learning_rate": 0.00059668364932449,
      "loss": 3.2728,
      "step": 10915
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9158295392990112,
      "learning_rate": 0.0005966830427505022,
      "loss": 3.0313,
      "step": 10916
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.586244821548462,
      "learning_rate": 0.0005966824361213556,
      "loss": 3.3043,
      "step": 10917
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4252835512161255,
      "learning_rate": 0.0005966818294370499,
      "loss": 3.0463,
      "step": 10918
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3998215198516846,
      "learning_rate": 0.0005966812226975856,
      "loss": 3.4002,
      "step": 10919
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3908721208572388,
      "learning_rate": 0.0005966806159029627,
      "loss": 3.2069,
      "step": 10920
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3673746585845947,
      "learning_rate": 0.0005966800090531811,
      "loss": 3.0702,
      "step": 10921
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8101099729537964,
      "learning_rate": 0.0005966794021482412,
      "loss": 3.0886,
      "step": 10922
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5628730058670044,
      "learning_rate": 0.000596678795188143,
      "loss": 2.9698,
      "step": 10923
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3743536472320557,
      "learning_rate": 0.0005966781881728866,
      "loss": 3.16,
      "step": 10924
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4883404970169067,
      "learning_rate": 0.000596677581102472,
      "loss": 3.2099,
      "step": 10925
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7009090185165405,
      "learning_rate": 0.0005966769739768997,
      "loss": 3.0747,
      "step": 10926
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.260472059249878,
      "learning_rate": 0.0005966763667961694,
      "loss": 3.2665,
      "step": 10927
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6792824268341064,
      "learning_rate": 0.0005966757595602813,
      "loss": 3.1376,
      "step": 10928
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2722851037979126,
      "learning_rate": 0.0005966751522692356,
      "loss": 3.1615,
      "step": 10929
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.658839225769043,
      "learning_rate": 0.0005966745449230324,
      "loss": 3.0274,
      "step": 10930
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4635900259017944,
      "learning_rate": 0.0005966739375216719,
      "loss": 3.1647,
      "step": 10931
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3673864603042603,
      "learning_rate": 0.000596673330065154,
      "loss": 3.3272,
      "step": 10932
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3151997327804565,
      "learning_rate": 0.0005966727225534791,
      "loss": 2.9059,
      "step": 10933
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7089951038360596,
      "learning_rate": 0.000596672114986647,
      "loss": 3.0747,
      "step": 10934
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6071019172668457,
      "learning_rate": 0.0005966715073646579,
      "loss": 3.2538,
      "step": 10935
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8278533220291138,
      "learning_rate": 0.0005966708996875121,
      "loss": 3.2489,
      "step": 10936
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4473978281021118,
      "learning_rate": 0.0005966702919552095,
      "loss": 3.1683,
      "step": 10937
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4852032661437988,
      "learning_rate": 0.0005966696841677503,
      "loss": 3.1937,
      "step": 10938
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.687203049659729,
      "learning_rate": 0.0005966690763251346,
      "loss": 2.9835,
      "step": 10939
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.574460506439209,
      "learning_rate": 0.0005966684684273625,
      "loss": 3.1156,
      "step": 10940
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8712365627288818,
      "learning_rate": 0.0005966678604744342,
      "loss": 3.2011,
      "step": 10941
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1432838439941406,
      "learning_rate": 0.0005966672524663496,
      "loss": 2.8973,
      "step": 10942
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4616622924804688,
      "learning_rate": 0.0005966666444031091,
      "loss": 3.1556,
      "step": 10943
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9964487552642822,
      "learning_rate": 0.0005966660362847127,
      "loss": 3.0635,
      "step": 10944
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.281092405319214,
      "learning_rate": 0.0005966654281111604,
      "loss": 3.352,
      "step": 10945
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9641187191009521,
      "learning_rate": 0.0005966648198824524,
      "loss": 3.0096,
      "step": 10946
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.458965539932251,
      "learning_rate": 0.0005966642115985888,
      "loss": 3.1022,
      "step": 10947
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7519060373306274,
      "learning_rate": 0.0005966636032595698,
      "loss": 3.0046,
      "step": 10948
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.706779956817627,
      "learning_rate": 0.0005966629948653954,
      "loss": 3.0312,
      "step": 10949
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.576489806175232,
      "learning_rate": 0.0005966623864160658,
      "loss": 3.2186,
      "step": 10950
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.558295249938965,
      "learning_rate": 0.0005966617779115809,
      "loss": 3.4093,
      "step": 10951
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.695420742034912,
      "learning_rate": 0.0005966611693519411,
      "loss": 3.1187,
      "step": 10952
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4950305223464966,
      "learning_rate": 0.0005966605607371463,
      "loss": 3.1511,
      "step": 10953
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.487926721572876,
      "learning_rate": 0.0005966599520671968,
      "loss": 3.0286,
      "step": 10954
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8697453737258911,
      "learning_rate": 0.0005966593433420926,
      "loss": 3.1048,
      "step": 10955
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.423862338066101,
      "learning_rate": 0.0005966587345618338,
      "loss": 3.1903,
      "step": 10956
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.6129684448242188,
      "learning_rate": 0.0005966581257264205,
      "loss": 3.1362,
      "step": 10957
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.110736131668091,
      "learning_rate": 0.000596657516835853,
      "loss": 3.4184,
      "step": 10958
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2490366697311401,
      "learning_rate": 0.0005966569078901311,
      "loss": 3.2214,
      "step": 10959
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.784999132156372,
      "learning_rate": 0.0005966562988892551,
      "loss": 3.0592,
      "step": 10960
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.8708574771881104,
      "learning_rate": 0.0005966556898332252,
      "loss": 3.0109,
      "step": 10961
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5170552730560303,
      "learning_rate": 0.0005966550807220413,
      "loss": 3.2367,
      "step": 10962
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7633017301559448,
      "learning_rate": 0.0005966544715557037,
      "loss": 3.4969,
      "step": 10963
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8935918807983398,
      "learning_rate": 0.0005966538623342124,
      "loss": 3.1909,
      "step": 10964
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.724721074104309,
      "learning_rate": 0.0005966532530575676,
      "loss": 3.199,
      "step": 10965
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6277179718017578,
      "learning_rate": 0.0005966526437257693,
      "loss": 2.9832,
      "step": 10966
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1906235218048096,
      "learning_rate": 0.0005966520343388177,
      "loss": 3.1,
      "step": 10967
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.0492496490478516,
      "learning_rate": 0.0005966514248967129,
      "loss": 3.2777,
      "step": 10968
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.622401237487793,
      "learning_rate": 0.000596650815399455,
      "loss": 2.7735,
      "step": 10969
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.218832492828369,
      "learning_rate": 0.0005966502058470441,
      "loss": 3.329,
      "step": 10970
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.472240447998047,
      "learning_rate": 0.0005966495962394803,
      "loss": 3.1331,
      "step": 10971
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.2016401290893555,
      "learning_rate": 0.0005966489865767637,
      "loss": 3.4107,
      "step": 10972
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8256573677062988,
      "learning_rate": 0.0005966483768588946,
      "loss": 3.3787,
      "step": 10973
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.7545952796936035,
      "learning_rate": 0.0005966477670858729,
      "loss": 3.1957,
      "step": 10974
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7992990016937256,
      "learning_rate": 0.0005966471572576987,
      "loss": 3.0731,
      "step": 10975
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6551079750061035,
      "learning_rate": 0.0005966465473743722,
      "loss": 3.3184,
      "step": 10976
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8857284784317017,
      "learning_rate": 0.0005966459374358936,
      "loss": 3.1539,
      "step": 10977
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4861186742782593,
      "learning_rate": 0.0005966453274422629,
      "loss": 2.9404,
      "step": 10978
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5571799278259277,
      "learning_rate": 0.0005966447173934801,
      "loss": 3.1571,
      "step": 10979
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9369068145751953,
      "learning_rate": 0.0005966441072895456,
      "loss": 3.3431,
      "step": 10980
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9442338943481445,
      "learning_rate": 0.0005966434971304594,
      "loss": 3.2338,
      "step": 10981
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3512953519821167,
      "learning_rate": 0.0005966428869162214,
      "loss": 3.2183,
      "step": 10982
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.599509835243225,
      "learning_rate": 0.000596642276646832,
      "loss": 3.1958,
      "step": 10983
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1804845333099365,
      "learning_rate": 0.0005966416663222913,
      "loss": 2.8058,
      "step": 10984
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4644476175308228,
      "learning_rate": 0.0005966410559425992,
      "loss": 2.8082,
      "step": 10985
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4042694568634033,
      "learning_rate": 0.000596640445507756,
      "loss": 3.0824,
      "step": 10986
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.027797222137451,
      "learning_rate": 0.0005966398350177616,
      "loss": 3.2893,
      "step": 10987
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3607627153396606,
      "learning_rate": 0.0005966392244726163,
      "loss": 3.1893,
      "step": 10988
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.535223126411438,
      "learning_rate": 0.0005966386138723202,
      "loss": 3.1514,
      "step": 10989
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.656882643699646,
      "learning_rate": 0.0005966380032168733,
      "loss": 3.0302,
      "step": 10990
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.4950201511383057,
      "learning_rate": 0.0005966373925062759,
      "loss": 3.0617,
      "step": 10991
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3857015371322632,
      "learning_rate": 0.0005966367817405281,
      "loss": 3.0121,
      "step": 10992
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3413975238800049,
      "learning_rate": 0.0005966361709196298,
      "loss": 3.1935,
      "step": 10993
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.215105414390564,
      "learning_rate": 0.0005966355600435812,
      "loss": 3.3372,
      "step": 10994
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.303173303604126,
      "learning_rate": 0.0005966349491123824,
      "loss": 3.176,
      "step": 10995
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.47264564037323,
      "learning_rate": 0.0005966343381260338,
      "loss": 3.1678,
      "step": 10996
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8862260580062866,
      "learning_rate": 0.000596633727084535,
      "loss": 3.2525,
      "step": 10997
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8177103996276855,
      "learning_rate": 0.0005966331159878866,
      "loss": 3.1025,
      "step": 10998
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.0213170051574707,
      "learning_rate": 0.0005966325048360884,
      "loss": 3.0542,
      "step": 10999
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3979182243347168,
      "learning_rate": 0.0005966318936291407,
      "loss": 3.1728,
      "step": 11000
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5543147325515747,
      "learning_rate": 0.0005966312823670435,
      "loss": 3.5205,
      "step": 11001
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.216294050216675,
      "learning_rate": 0.0005966306710497968,
      "loss": 3.2876,
      "step": 11002
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4205272197723389,
      "learning_rate": 0.000596630059677401,
      "loss": 3.1902,
      "step": 11003
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.555088758468628,
      "learning_rate": 0.0005966294482498561,
      "loss": 3.135,
      "step": 11004
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7290722131729126,
      "learning_rate": 0.0005966288367671622,
      "loss": 3.3151,
      "step": 11005
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4981486797332764,
      "learning_rate": 0.0005966282252293193,
      "loss": 3.1068,
      "step": 11006
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4649993181228638,
      "learning_rate": 0.0005966276136363276,
      "loss": 3.1254,
      "step": 11007
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6679668426513672,
      "learning_rate": 0.0005966270019881874,
      "loss": 3.1722,
      "step": 11008
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5928974151611328,
      "learning_rate": 0.0005966263902848984,
      "loss": 3.3324,
      "step": 11009
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.306138753890991,
      "learning_rate": 0.0005966257785264611,
      "loss": 3.1319,
      "step": 11010
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1260454654693604,
      "learning_rate": 0.0005966251667128754,
      "loss": 3.0969,
      "step": 11011
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7748662233352661,
      "learning_rate": 0.0005966245548441416,
      "loss": 3.4757,
      "step": 11012
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6726644039154053,
      "learning_rate": 0.0005966239429202596,
      "loss": 3.034,
      "step": 11013
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9294452667236328,
      "learning_rate": 0.0005966233309412296,
      "loss": 3.2272,
      "step": 11014
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.3137547969818115,
      "learning_rate": 0.0005966227189070518,
      "loss": 3.331,
      "step": 11015
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4779893159866333,
      "learning_rate": 0.0005966221068177262,
      "loss": 3.0557,
      "step": 11016
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.2519021034240723,
      "learning_rate": 0.000596621494673253,
      "loss": 3.0987,
      "step": 11017
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.735283613204956,
      "learning_rate": 0.0005966208824736321,
      "loss": 3.0893,
      "step": 11018
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.517235279083252,
      "learning_rate": 0.000596620270218864,
      "loss": 3.3664,
      "step": 11019
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.887994647026062,
      "learning_rate": 0.0005966196579089484,
      "loss": 3.1081,
      "step": 11020
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.2747013568878174,
      "learning_rate": 0.0005966190455438857,
      "loss": 3.185,
      "step": 11021
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1430413722991943,
      "learning_rate": 0.0005966184331236759,
      "loss": 3.1734,
      "step": 11022
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.3145551681518555,
      "learning_rate": 0.0005966178206483192,
      "loss": 2.9019,
      "step": 11023
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6417038440704346,
      "learning_rate": 0.0005966172081178155,
      "loss": 3.0643,
      "step": 11024
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2987825870513916,
      "learning_rate": 0.0005966165955321653,
      "loss": 3.0885,
      "step": 11025
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9094072580337524,
      "learning_rate": 0.0005966159828913682,
      "loss": 3.2725,
      "step": 11026
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7241073846817017,
      "learning_rate": 0.0005966153701954248,
      "loss": 3.193,
      "step": 11027
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5281175374984741,
      "learning_rate": 0.0005966147574443349,
      "loss": 3.239,
      "step": 11028
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.657396674156189,
      "learning_rate": 0.0005966141446380987,
      "loss": 3.0154,
      "step": 11029
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8704969882965088,
      "learning_rate": 0.0005966135317767164,
      "loss": 3.0896,
      "step": 11030
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3263187408447266,
      "learning_rate": 0.000596612918860188,
      "loss": 2.9607,
      "step": 11031
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.240140914916992,
      "learning_rate": 0.0005966123058885137,
      "loss": 3.1033,
      "step": 11032
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8376610279083252,
      "learning_rate": 0.0005966116928616935,
      "loss": 3.4221,
      "step": 11033
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.582218050956726,
      "learning_rate": 0.0005966110797797276,
      "loss": 3.1929,
      "step": 11034
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.72071373462677,
      "learning_rate": 0.0005966104666426162,
      "loss": 3.2629,
      "step": 11035
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.7768185138702393,
      "learning_rate": 0.0005966098534503592,
      "loss": 3.2327,
      "step": 11036
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.9713549613952637,
      "learning_rate": 0.0005966092402029569,
      "loss": 3.3218,
      "step": 11037
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9216639995574951,
      "learning_rate": 0.0005966086269004094,
      "loss": 3.2603,
      "step": 11038
    },
    {
      "epoch": 0.14,
      "grad_norm": 3.1722614765167236,
      "learning_rate": 0.0005966080135427166,
      "loss": 3.1658,
      "step": 11039
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.5111541748046875,
      "learning_rate": 0.000596607400129879,
      "loss": 3.2453,
      "step": 11040
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.99086594581604,
      "learning_rate": 0.0005966067866618963,
      "loss": 3.1319,
      "step": 11041
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.5885584354400635,
      "learning_rate": 0.0005966061731387688,
      "loss": 3.2071,
      "step": 11042
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.6547844409942627,
      "learning_rate": 0.0005966055595604966,
      "loss": 3.2899,
      "step": 11043
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.3564603328704834,
      "learning_rate": 0.00059660494592708,
      "loss": 3.3067,
      "step": 11044
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.1768110990524292,
      "learning_rate": 0.0005966043322385187,
      "loss": 3.2241,
      "step": 11045
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.399595022201538,
      "learning_rate": 0.0005966037184948131,
      "loss": 3.1091,
      "step": 11046
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.934126853942871,
      "learning_rate": 0.0005966031046959634,
      "loss": 3.1216,
      "step": 11047
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1055948734283447,
      "learning_rate": 0.0005966024908419695,
      "loss": 3.033,
      "step": 11048
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7312235832214355,
      "learning_rate": 0.0005966018769328318,
      "loss": 3.0869,
      "step": 11049
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6438878774642944,
      "learning_rate": 0.0005966012629685499,
      "loss": 3.2505,
      "step": 11050
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8798537254333496,
      "learning_rate": 0.0005966006489491244,
      "loss": 3.3597,
      "step": 11051
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.626333236694336,
      "learning_rate": 0.0005966000348745553,
      "loss": 3.1636,
      "step": 11052
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4752854108810425,
      "learning_rate": 0.0005965994207448425,
      "loss": 3.4512,
      "step": 11053
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.373749017715454,
      "learning_rate": 0.0005965988065599863,
      "loss": 3.1353,
      "step": 11054
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5331393480300903,
      "learning_rate": 0.0005965981923199868,
      "loss": 3.1052,
      "step": 11055
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4829425811767578,
      "learning_rate": 0.0005965975780248442,
      "loss": 3.0892,
      "step": 11056
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4695030450820923,
      "learning_rate": 0.0005965969636745584,
      "loss": 3.0579,
      "step": 11057
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4545239210128784,
      "learning_rate": 0.0005965963492691297,
      "loss": 3.0709,
      "step": 11058
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.417035460472107,
      "learning_rate": 0.000596595734808558,
      "loss": 3.245,
      "step": 11059
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4737236499786377,
      "learning_rate": 0.0005965951202928437,
      "loss": 3.4914,
      "step": 11060
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7440794706344604,
      "learning_rate": 0.0005965945057219867,
      "loss": 3.125,
      "step": 11061
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.345382571220398,
      "learning_rate": 0.0005965938910959871,
      "loss": 2.9283,
      "step": 11062
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6277978420257568,
      "learning_rate": 0.0005965932764148453,
      "loss": 3.1421,
      "step": 11063
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6961798667907715,
      "learning_rate": 0.0005965926616785611,
      "loss": 3.4933,
      "step": 11064
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5302554368972778,
      "learning_rate": 0.0005965920468871348,
      "loss": 3.209,
      "step": 11065
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.816169023513794,
      "learning_rate": 0.0005965914320405662,
      "loss": 3.2797,
      "step": 11066
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.474170446395874,
      "learning_rate": 0.0005965908171388559,
      "loss": 3.2299,
      "step": 11067
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.668485403060913,
      "learning_rate": 0.0005965902021820037,
      "loss": 3.1869,
      "step": 11068
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5097203254699707,
      "learning_rate": 0.0005965895871700098,
      "loss": 3.026,
      "step": 11069
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.880441427230835,
      "learning_rate": 0.0005965889721028742,
      "loss": 3.0203,
      "step": 11070
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.5308034420013428,
      "learning_rate": 0.0005965883569805972,
      "loss": 3.295,
      "step": 11071
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1639349460601807,
      "learning_rate": 0.0005965877418031789,
      "loss": 3.0383,
      "step": 11072
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5869100093841553,
      "learning_rate": 0.0005965871265706192,
      "loss": 3.0386,
      "step": 11073
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.822204828262329,
      "learning_rate": 0.0005965865112829183,
      "loss": 3.1949,
      "step": 11074
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8445992469787598,
      "learning_rate": 0.0005965858959400766,
      "loss": 3.2051,
      "step": 11075
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8985410928726196,
      "learning_rate": 0.0005965852805420937,
      "loss": 3.2257,
      "step": 11076
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4350979328155518,
      "learning_rate": 0.0005965846650889703,
      "loss": 3.0079,
      "step": 11077
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.3435776233673096,
      "learning_rate": 0.000596584049580706,
      "loss": 3.1411,
      "step": 11078
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.058711528778076,
      "learning_rate": 0.0005965834340173013,
      "loss": 3.2631,
      "step": 11079
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4844639301300049,
      "learning_rate": 0.000596582818398756,
      "loss": 3.0884,
      "step": 11080
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7194401025772095,
      "learning_rate": 0.0005965822027250704,
      "loss": 3.0558,
      "step": 11081
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.197601318359375,
      "learning_rate": 0.0005965815869962446,
      "loss": 3.2232,
      "step": 11082
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.018576145172119,
      "learning_rate": 0.0005965809712122786,
      "loss": 3.562,
      "step": 11083
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6334164142608643,
      "learning_rate": 0.0005965803553731726,
      "loss": 2.7509,
      "step": 11084
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.3014168739318848,
      "learning_rate": 0.0005965797394789268,
      "loss": 2.9617,
      "step": 11085
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.4210004806518555,
      "learning_rate": 0.0005965791235295412,
      "loss": 3.2053,
      "step": 11086
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4800958633422852,
      "learning_rate": 0.0005965785075250159,
      "loss": 3.2645,
      "step": 11087
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.037355899810791,
      "learning_rate": 0.000596577891465351,
      "loss": 2.9606,
      "step": 11088
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.176513910293579,
      "learning_rate": 0.0005965772753505468,
      "loss": 2.8936,
      "step": 11089
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.619869351387024,
      "learning_rate": 0.0005965766591806034,
      "loss": 3.1506,
      "step": 11090
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.3020505905151367,
      "learning_rate": 0.0005965760429555205,
      "loss": 3.0073,
      "step": 11091
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6544400453567505,
      "learning_rate": 0.0005965754266752987,
      "loss": 3.1146,
      "step": 11092
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4248061180114746,
      "learning_rate": 0.0005965748103399378,
      "loss": 3.2751,
      "step": 11093
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.254472255706787,
      "learning_rate": 0.0005965741939494382,
      "loss": 3.3379,
      "step": 11094
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6733416318893433,
      "learning_rate": 0.0005965735775037999,
      "loss": 3.2448,
      "step": 11095
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.693076729774475,
      "learning_rate": 0.0005965729610030228,
      "loss": 3.352,
      "step": 11096
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4041064977645874,
      "learning_rate": 0.0005965723444471072,
      "loss": 3.2004,
      "step": 11097
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8491616249084473,
      "learning_rate": 0.0005965717278360533,
      "loss": 3.1823,
      "step": 11098
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.3815112113952637,
      "learning_rate": 0.000596571111169861,
      "loss": 3.2591,
      "step": 11099
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6627129316329956,
      "learning_rate": 0.0005965704944485307,
      "loss": 3.2358,
      "step": 11100
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.6468849182128906,
      "learning_rate": 0.0005965698776720622,
      "loss": 3.2909,
      "step": 11101
    },
    {
      "epoch": 0.14,
      "grad_norm": 2.1517603397369385,
      "learning_rate": 0.0005965692608404559,
      "loss": 3.0994,
      "step": 11102
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3494619131088257,
      "learning_rate": 0.0005965686439537117,
      "loss": 3.1609,
      "step": 11103
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3407561779022217,
      "learning_rate": 0.0005965680270118296,
      "loss": 3.0869,
      "step": 11104
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6780258417129517,
      "learning_rate": 0.0005965674100148102,
      "loss": 3.0964,
      "step": 11105
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6958369016647339,
      "learning_rate": 0.0005965667929626532,
      "loss": 3.258,
      "step": 11106
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4142554998397827,
      "learning_rate": 0.0005965661758553589,
      "loss": 3.1113,
      "step": 11107
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9934911727905273,
      "learning_rate": 0.0005965655586929273,
      "loss": 3.1823,
      "step": 11108
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9561783075332642,
      "learning_rate": 0.0005965649414753586,
      "loss": 3.338,
      "step": 11109
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4177852869033813,
      "learning_rate": 0.0005965643242026528,
      "loss": 3.2038,
      "step": 11110
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.845603108406067,
      "learning_rate": 0.0005965637068748101,
      "loss": 2.9705,
      "step": 11111
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8067878484725952,
      "learning_rate": 0.0005965630894918307,
      "loss": 3.1151,
      "step": 11112
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7442057132720947,
      "learning_rate": 0.0005965624720537145,
      "loss": 2.9892,
      "step": 11113
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9056198596954346,
      "learning_rate": 0.0005965618545604618,
      "loss": 2.8629,
      "step": 11114
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.9277089834213257,
      "learning_rate": 0.0005965612370120727,
      "loss": 2.9811,
      "step": 11115
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6282541751861572,
      "learning_rate": 0.0005965606194085472,
      "loss": 3.1353,
      "step": 11116
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5670416355133057,
      "learning_rate": 0.0005965600017498855,
      "loss": 3.3464,
      "step": 11117
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7860344648361206,
      "learning_rate": 0.0005965593840360877,
      "loss": 3.1652,
      "step": 11118
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4743019342422485,
      "learning_rate": 0.0005965587662671539,
      "loss": 3.2899,
      "step": 11119
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.7505888938903809,
      "learning_rate": 0.0005965581484430843,
      "loss": 3.154,
      "step": 11120
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.6629544496536255,
      "learning_rate": 0.0005965575305638789,
      "loss": 3.0926,
      "step": 11121
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.3378986120224,
      "learning_rate": 0.0005965569126295378,
      "loss": 3.2272,
      "step": 11122
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.791782021522522,
      "learning_rate": 0.0005965562946400612,
      "loss": 3.1118,
      "step": 11123
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8054336309432983,
      "learning_rate": 0.0005965556765954492,
      "loss": 3.3933,
      "step": 11124
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2075659036636353,
      "learning_rate": 0.0005965550584957019,
      "loss": 3.3493,
      "step": 11125
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4471814632415771,
      "learning_rate": 0.0005965544403408195,
      "loss": 3.2835,
      "step": 11126
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.2964146137237549,
      "learning_rate": 0.0005965538221308018,
      "loss": 3.1254,
      "step": 11127
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.487460970878601,
      "learning_rate": 0.0005965532038656494,
      "loss": 3.0719,
      "step": 11128
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.5086019039154053,
      "learning_rate": 0.0005965525855453621,
      "loss": 3.0925,
      "step": 11129
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.340880274772644,
      "learning_rate": 0.00059655196716994,
      "loss": 3.2661,
      "step": 11130
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.351602554321289,
      "learning_rate": 0.0005965513487393834,
      "loss": 2.9881,
      "step": 11131
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4533594846725464,
      "learning_rate": 0.0005965507302536922,
      "loss": 3.0131,
      "step": 11132
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.8432613611221313,
      "learning_rate": 0.0005965501117128667,
      "loss": 3.0354,
      "step": 11133
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.4208312034606934,
      "learning_rate": 0.0005965494931169068,
      "loss": 3.0873,
      "step": 11134
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.464749813079834,
      "learning_rate": 0.0005965488744658128,
      "loss": 3.1936,
      "step": 11135
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.748642921447754,
      "learning_rate": 0.0005965482557595849,
      "loss": 3.3232,
      "step": 11136
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.564605951309204,
      "learning_rate": 0.000596547636998223,
      "loss": 3.1735,
      "step": 11137
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.4509239196777344,
      "learning_rate": 0.0005965470181817273,
      "loss": 3.2218,
      "step": 11138
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.742455244064331,
      "learning_rate": 0.0005965463993100979,
      "loss": 3.3302,
      "step": 11139
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8415418863296509,
      "learning_rate": 0.0005965457803833349,
      "loss": 3.2711,
      "step": 11140
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.7877793312072754,
      "learning_rate": 0.0005965451614014384,
      "loss": 2.9713,
      "step": 11141
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.4698069095611572,
      "learning_rate": 0.0005965445423644087,
      "loss": 3.1947,
      "step": 11142
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6089893579483032,
      "learning_rate": 0.0005965439232722457,
      "loss": 3.4685,
      "step": 11143
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8294544219970703,
      "learning_rate": 0.0005965433041249496,
      "loss": 3.2456,
      "step": 11144
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8009096384048462,
      "learning_rate": 0.0005965426849225204,
      "loss": 3.2581,
      "step": 11145
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9023463726043701,
      "learning_rate": 0.0005965420656649585,
      "loss": 3.3377,
      "step": 11146
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3890818357467651,
      "learning_rate": 0.0005965414463522637,
      "loss": 3.3377,
      "step": 11147
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.4175124168395996,
      "learning_rate": 0.0005965408269844362,
      "loss": 3.2785,
      "step": 11148
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.693148136138916,
      "learning_rate": 0.0005965402075614764,
      "loss": 3.0728,
      "step": 11149
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.1601309776306152,
      "learning_rate": 0.000596539588083384,
      "loss": 3.1001,
      "step": 11150
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7353429794311523,
      "learning_rate": 0.0005965389685501592,
      "loss": 3.0429,
      "step": 11151
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.2361507415771484,
      "learning_rate": 0.0005965383489618025,
      "loss": 3.3893,
      "step": 11152
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.2616448402404785,
      "learning_rate": 0.0005965377293183135,
      "loss": 3.1937,
      "step": 11153
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8012720346450806,
      "learning_rate": 0.0005965371096196925,
      "loss": 3.2636,
      "step": 11154
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7016850709915161,
      "learning_rate": 0.0005965364898659397,
      "loss": 3.3251,
      "step": 11155
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.657301902770996,
      "learning_rate": 0.0005965358700570552,
      "loss": 3.2276,
      "step": 11156
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.07934308052063,
      "learning_rate": 0.0005965352501930392,
      "loss": 3.1256,
      "step": 11157
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7910977602005005,
      "learning_rate": 0.0005965346302738916,
      "loss": 3.1434,
      "step": 11158
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6600295305252075,
      "learning_rate": 0.0005965340102996126,
      "loss": 3.0689,
      "step": 11159
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4824503660202026,
      "learning_rate": 0.0005965333902702023,
      "loss": 3.1521,
      "step": 11160
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.952500820159912,
      "learning_rate": 0.0005965327701856609,
      "loss": 2.9179,
      "step": 11161
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.163914203643799,
      "learning_rate": 0.0005965321500459884,
      "loss": 3.2074,
      "step": 11162
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.960322380065918,
      "learning_rate": 0.000596531529851185,
      "loss": 3.2956,
      "step": 11163
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.1104559898376465,
      "learning_rate": 0.0005965309096012508,
      "loss": 2.9306,
      "step": 11164
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7090975046157837,
      "learning_rate": 0.000596530289296186,
      "loss": 3.0698,
      "step": 11165
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6842793226242065,
      "learning_rate": 0.0005965296689359905,
      "loss": 3.1173,
      "step": 11166
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6441067457199097,
      "learning_rate": 0.0005965290485206646,
      "loss": 3.1468,
      "step": 11167
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5003632307052612,
      "learning_rate": 0.0005965284280502084,
      "loss": 3.2875,
      "step": 11168
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6557528972625732,
      "learning_rate": 0.0005965278075246218,
      "loss": 3.3242,
      "step": 11169
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7724034786224365,
      "learning_rate": 0.0005965271869439053,
      "loss": 3.2754,
      "step": 11170
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.5657413005828857,
      "learning_rate": 0.0005965265663080585,
      "loss": 3.2341,
      "step": 11171
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6168545484542847,
      "learning_rate": 0.000596525945617082,
      "loss": 3.2763,
      "step": 11172
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4923200607299805,
      "learning_rate": 0.0005965253248709758,
      "loss": 3.0776,
      "step": 11173
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8529685735702515,
      "learning_rate": 0.0005965247040697399,
      "loss": 2.9841,
      "step": 11174
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.3954336643218994,
      "learning_rate": 0.0005965240832133744,
      "loss": 3.0401,
      "step": 11175
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.389178991317749,
      "learning_rate": 0.0005965234623018796,
      "loss": 3.1745,
      "step": 11176
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0789124965667725,
      "learning_rate": 0.0005965228413352553,
      "loss": 3.4249,
      "step": 11177
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.599794864654541,
      "learning_rate": 0.000596522220313502,
      "loss": 3.2015,
      "step": 11178
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4811402559280396,
      "learning_rate": 0.0005965215992366194,
      "loss": 3.4304,
      "step": 11179
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5942078828811646,
      "learning_rate": 0.000596520978104608,
      "loss": 3.1489,
      "step": 11180
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.208169460296631,
      "learning_rate": 0.0005965203569174676,
      "loss": 3.1125,
      "step": 11181
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2716256380081177,
      "learning_rate": 0.0005965197356751987,
      "loss": 3.1036,
      "step": 11182
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4577534198760986,
      "learning_rate": 0.0005965191143778011,
      "loss": 3.0742,
      "step": 11183
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4996176958084106,
      "learning_rate": 0.0005965184930252749,
      "loss": 2.9617,
      "step": 11184
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2928317785263062,
      "learning_rate": 0.0005965178716176204,
      "loss": 3.2103,
      "step": 11185
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.327061414718628,
      "learning_rate": 0.0005965172501548376,
      "loss": 3.076,
      "step": 11186
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4133018255233765,
      "learning_rate": 0.0005965166286369266,
      "loss": 3.3154,
      "step": 11187
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3831779956817627,
      "learning_rate": 0.0005965160070638876,
      "loss": 3.1305,
      "step": 11188
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3566434383392334,
      "learning_rate": 0.0005965153854357208,
      "loss": 3.1596,
      "step": 11189
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5553261041641235,
      "learning_rate": 0.000596514763752426,
      "loss": 2.901,
      "step": 11190
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6487103700637817,
      "learning_rate": 0.0005965141420140037,
      "loss": 3.0922,
      "step": 11191
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5460816621780396,
      "learning_rate": 0.0005965135202204536,
      "loss": 3.1446,
      "step": 11192
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0721206665039062,
      "learning_rate": 0.0005965128983717762,
      "loss": 3.1758,
      "step": 11193
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.41904878616333,
      "learning_rate": 0.0005965122764679715,
      "loss": 3.1619,
      "step": 11194
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.3498752117156982,
      "learning_rate": 0.0005965116545090394,
      "loss": 3.0505,
      "step": 11195
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8040274381637573,
      "learning_rate": 0.0005965110324949803,
      "loss": 3.4558,
      "step": 11196
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.961720585823059,
      "learning_rate": 0.0005965104104257941,
      "loss": 2.9119,
      "step": 11197
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9969605207443237,
      "learning_rate": 0.0005965097883014811,
      "loss": 3.1993,
      "step": 11198
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3896369934082031,
      "learning_rate": 0.0005965091661220414,
      "loss": 3.2407,
      "step": 11199
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5649434328079224,
      "learning_rate": 0.0005965085438874749,
      "loss": 3.3178,
      "step": 11200
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.337955355644226,
      "learning_rate": 0.000596507921597782,
      "loss": 3.3239,
      "step": 11201
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7554610967636108,
      "learning_rate": 0.0005965072992529626,
      "loss": 3.1702,
      "step": 11202
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3956626653671265,
      "learning_rate": 0.0005965066768530169,
      "loss": 3.1642,
      "step": 11203
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6036460399627686,
      "learning_rate": 0.000596506054397945,
      "loss": 3.2764,
      "step": 11204
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3392407894134521,
      "learning_rate": 0.0005965054318877471,
      "loss": 3.1039,
      "step": 11205
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4875826835632324,
      "learning_rate": 0.0005965048093224232,
      "loss": 3.1094,
      "step": 11206
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.691925287246704,
      "learning_rate": 0.0005965041867019734,
      "loss": 3.2985,
      "step": 11207
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.788019061088562,
      "learning_rate": 0.0005965035640263979,
      "loss": 3.3059,
      "step": 11208
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.2994182109832764,
      "learning_rate": 0.0005965029412956969,
      "loss": 3.1311,
      "step": 11209
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8987901210784912,
      "learning_rate": 0.0005965023185098703,
      "loss": 2.8282,
      "step": 11210
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8828606605529785,
      "learning_rate": 0.0005965016956689185,
      "loss": 3.3139,
      "step": 11211
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9937227964401245,
      "learning_rate": 0.0005965010727728412,
      "loss": 3.1349,
      "step": 11212
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9284064769744873,
      "learning_rate": 0.0005965004498216388,
      "loss": 3.2355,
      "step": 11213
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6490478515625,
      "learning_rate": 0.0005964998268153114,
      "loss": 3.3104,
      "step": 11214
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5625752210617065,
      "learning_rate": 0.0005964992037538591,
      "loss": 3.2849,
      "step": 11215
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.151613235473633,
      "learning_rate": 0.000596498580637282,
      "loss": 3.1245,
      "step": 11216
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.484724998474121,
      "learning_rate": 0.0005964979574655802,
      "loss": 3.2166,
      "step": 11217
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0192675590515137,
      "learning_rate": 0.0005964973342387538,
      "loss": 3.0524,
      "step": 11218
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6035497188568115,
      "learning_rate": 0.000596496710956803,
      "loss": 3.248,
      "step": 11219
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2566672563552856,
      "learning_rate": 0.000596496087619728,
      "loss": 3.1717,
      "step": 11220
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5535701513290405,
      "learning_rate": 0.0005964954642275285,
      "loss": 3.3046,
      "step": 11221
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.35859215259552,
      "learning_rate": 0.000596494840780205,
      "loss": 3.2138,
      "step": 11222
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.534407615661621,
      "learning_rate": 0.0005964942172777577,
      "loss": 3.3339,
      "step": 11223
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8100212812423706,
      "learning_rate": 0.0005964935937201863,
      "loss": 3.2454,
      "step": 11224
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.547107219696045,
      "learning_rate": 0.0005964929701074912,
      "loss": 3.0971,
      "step": 11225
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3103138208389282,
      "learning_rate": 0.0005964923464396725,
      "loss": 3.037,
      "step": 11226
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0563018321990967,
      "learning_rate": 0.0005964917227167303,
      "loss": 3.1588,
      "step": 11227
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.2074029445648193,
      "learning_rate": 0.0005964910989386647,
      "loss": 2.9257,
      "step": 11228
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.437017798423767,
      "learning_rate": 0.0005964904751054757,
      "loss": 3.2638,
      "step": 11229
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3325724601745605,
      "learning_rate": 0.0005964898512171636,
      "loss": 3.0091,
      "step": 11230
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.48166823387146,
      "learning_rate": 0.0005964892272737283,
      "loss": 3.4669,
      "step": 11231
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5934562683105469,
      "learning_rate": 0.0005964886032751702,
      "loss": 3.1755,
      "step": 11232
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6551177501678467,
      "learning_rate": 0.0005964879792214893,
      "loss": 3.2284,
      "step": 11233
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.477583885192871,
      "learning_rate": 0.0005964873551126856,
      "loss": 3.1235,
      "step": 11234
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.457572340965271,
      "learning_rate": 0.0005964867309487593,
      "loss": 2.9537,
      "step": 11235
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.892798662185669,
      "learning_rate": 0.0005964861067297106,
      "loss": 3.0299,
      "step": 11236
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4454622268676758,
      "learning_rate": 0.0005964854824555395,
      "loss": 3.0718,
      "step": 11237
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.053719997406006,
      "learning_rate": 0.0005964848581262462,
      "loss": 3.1731,
      "step": 11238
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4684169292449951,
      "learning_rate": 0.0005964842337418307,
      "loss": 3.2645,
      "step": 11239
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6345082521438599,
      "learning_rate": 0.0005964836093022933,
      "loss": 3.4226,
      "step": 11240
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7190243005752563,
      "learning_rate": 0.0005964829848076338,
      "loss": 3.2802,
      "step": 11241
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5253993272781372,
      "learning_rate": 0.0005964823602578526,
      "loss": 3.2414,
      "step": 11242
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5963813066482544,
      "learning_rate": 0.0005964817356529498,
      "loss": 3.3664,
      "step": 11243
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.0722105503082275,
      "learning_rate": 0.0005964811109929254,
      "loss": 3.1068,
      "step": 11244
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8873919248580933,
      "learning_rate": 0.0005964804862777796,
      "loss": 3.0329,
      "step": 11245
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.1756439208984375,
      "learning_rate": 0.0005964798615075124,
      "loss": 2.8998,
      "step": 11246
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8361084461212158,
      "learning_rate": 0.000596479236682124,
      "loss": 3.1912,
      "step": 11247
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.597425103187561,
      "learning_rate": 0.0005964786118016146,
      "loss": 3.1834,
      "step": 11248
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.765925645828247,
      "learning_rate": 0.0005964779868659842,
      "loss": 3.1921,
      "step": 11249
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.3836405277252197,
      "learning_rate": 0.0005964773618752329,
      "loss": 3.2073,
      "step": 11250
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.631353735923767,
      "learning_rate": 0.000596476736829361,
      "loss": 3.2351,
      "step": 11251
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0487635135650635,
      "learning_rate": 0.0005964761117283683,
      "loss": 3.0345,
      "step": 11252
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.628828763961792,
      "learning_rate": 0.0005964754865722552,
      "loss": 2.9566,
      "step": 11253
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.1282970905303955,
      "learning_rate": 0.0005964748613610216,
      "loss": 2.9156,
      "step": 11254
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.2156214714050293,
      "learning_rate": 0.0005964742360946678,
      "loss": 3.0071,
      "step": 11255
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.192316770553589,
      "learning_rate": 0.0005964736107731939,
      "loss": 3.014,
      "step": 11256
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.563949704170227,
      "learning_rate": 0.0005964729853966,
      "loss": 3.1019,
      "step": 11257
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6625680923461914,
      "learning_rate": 0.0005964723599648859,
      "loss": 3.2951,
      "step": 11258
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5194487571716309,
      "learning_rate": 0.0005964717344780523,
      "loss": 3.0799,
      "step": 11259
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7866731882095337,
      "learning_rate": 0.0005964711089360989,
      "loss": 3.0273,
      "step": 11260
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.859175205230713,
      "learning_rate": 0.0005964704833390259,
      "loss": 3.4212,
      "step": 11261
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.413812518119812,
      "learning_rate": 0.0005964698576868334,
      "loss": 3.0932,
      "step": 11262
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.1149158477783203,
      "learning_rate": 0.0005964692319795216,
      "loss": 3.3596,
      "step": 11263
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0952775478363037,
      "learning_rate": 0.0005964686062170906,
      "loss": 3.0808,
      "step": 11264
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5928875207901,
      "learning_rate": 0.0005964679803995405,
      "loss": 3.2465,
      "step": 11265
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.003220319747925,
      "learning_rate": 0.0005964673545268713,
      "loss": 3.2011,
      "step": 11266
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.852614164352417,
      "learning_rate": 0.0005964667285990833,
      "loss": 3.0318,
      "step": 11267
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5884019136428833,
      "learning_rate": 0.0005964661026161765,
      "loss": 3.2066,
      "step": 11268
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.927309274673462,
      "learning_rate": 0.0005964654765781512,
      "loss": 3.0571,
      "step": 11269
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.855344533920288,
      "learning_rate": 0.0005964648504850072,
      "loss": 3.0502,
      "step": 11270
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.841813564300537,
      "learning_rate": 0.0005964642243367449,
      "loss": 3.3872,
      "step": 11271
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.2689504623413086,
      "learning_rate": 0.0005964635981333642,
      "loss": 3.1493,
      "step": 11272
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9896024465560913,
      "learning_rate": 0.0005964629718748654,
      "loss": 3.057,
      "step": 11273
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6969642639160156,
      "learning_rate": 0.0005964623455612485,
      "loss": 3.1416,
      "step": 11274
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.898777484893799,
      "learning_rate": 0.0005964617191925137,
      "loss": 2.9366,
      "step": 11275
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.826256513595581,
      "learning_rate": 0.000596461092768661,
      "loss": 3.1414,
      "step": 11276
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8171645402908325,
      "learning_rate": 0.0005964604662896907,
      "loss": 2.957,
      "step": 11277
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.080213785171509,
      "learning_rate": 0.0005964598397556028,
      "loss": 2.9352,
      "step": 11278
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5359605550765991,
      "learning_rate": 0.0005964592131663972,
      "loss": 3.1918,
      "step": 11279
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.590103030204773,
      "learning_rate": 0.0005964585865220744,
      "loss": 2.9928,
      "step": 11280
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4600332975387573,
      "learning_rate": 0.0005964579598226343,
      "loss": 3.4441,
      "step": 11281
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6281920671463013,
      "learning_rate": 0.0005964573330680771,
      "loss": 3.0474,
      "step": 11282
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.230759382247925,
      "learning_rate": 0.000596456706258403,
      "loss": 3.2443,
      "step": 11283
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5972856283187866,
      "learning_rate": 0.0005964560793936119,
      "loss": 3.5759,
      "step": 11284
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.1042935848236084,
      "learning_rate": 0.000596455452473704,
      "loss": 3.2262,
      "step": 11285
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.1435651779174805,
      "learning_rate": 0.0005964548254986795,
      "loss": 3.3349,
      "step": 11286
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.457359790802002,
      "learning_rate": 0.0005964541984685383,
      "loss": 3.3223,
      "step": 11287
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5878996849060059,
      "learning_rate": 0.0005964535713832808,
      "loss": 3.0252,
      "step": 11288
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3808327913284302,
      "learning_rate": 0.000596452944242907,
      "loss": 2.9439,
      "step": 11289
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.459019660949707,
      "learning_rate": 0.0005964523170474169,
      "loss": 3.179,
      "step": 11290
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2836050987243652,
      "learning_rate": 0.0005964516897968107,
      "loss": 3.2321,
      "step": 11291
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6490559577941895,
      "learning_rate": 0.0005964510624910887,
      "loss": 2.9901,
      "step": 11292
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2112544775009155,
      "learning_rate": 0.0005964504351302507,
      "loss": 3.288,
      "step": 11293
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.300941228866577,
      "learning_rate": 0.000596449807714297,
      "loss": 3.3262,
      "step": 11294
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3701527118682861,
      "learning_rate": 0.0005964491802432278,
      "loss": 3.3651,
      "step": 11295
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.747071385383606,
      "learning_rate": 0.000596448552717043,
      "loss": 3.2539,
      "step": 11296
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4354137182235718,
      "learning_rate": 0.0005964479251357428,
      "loss": 3.0127,
      "step": 11297
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.127583622932434,
      "learning_rate": 0.0005964472974993274,
      "loss": 2.9483,
      "step": 11298
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2269785404205322,
      "learning_rate": 0.0005964466698077968,
      "loss": 3.2156,
      "step": 11299
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.179417848587036,
      "learning_rate": 0.0005964460420611513,
      "loss": 3.2122,
      "step": 11300
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.149613380432129,
      "learning_rate": 0.0005964454142593906,
      "loss": 3.1317,
      "step": 11301
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6688694953918457,
      "learning_rate": 0.0005964447864025154,
      "loss": 3.1931,
      "step": 11302
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4372225999832153,
      "learning_rate": 0.0005964441584905253,
      "loss": 3.2312,
      "step": 11303
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4815360307693481,
      "learning_rate": 0.0005964435305234208,
      "loss": 3.1435,
      "step": 11304
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4665119647979736,
      "learning_rate": 0.0005964429025012017,
      "loss": 3.1374,
      "step": 11305
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4684927463531494,
      "learning_rate": 0.0005964422744238684,
      "loss": 3.2649,
      "step": 11306
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3121960163116455,
      "learning_rate": 0.0005964416462914208,
      "loss": 3.0294,
      "step": 11307
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5931167602539062,
      "learning_rate": 0.0005964410181038592,
      "loss": 3.228,
      "step": 11308
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.902071475982666,
      "learning_rate": 0.0005964403898611835,
      "loss": 3.1168,
      "step": 11309
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6939918994903564,
      "learning_rate": 0.000596439761563394,
      "loss": 3.0382,
      "step": 11310
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6098742485046387,
      "learning_rate": 0.0005964391332104907,
      "loss": 3.0532,
      "step": 11311
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5170153379440308,
      "learning_rate": 0.0005964385048024739,
      "loss": 2.9947,
      "step": 11312
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5145483016967773,
      "learning_rate": 0.0005964378763393434,
      "loss": 3.1393,
      "step": 11313
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6733976602554321,
      "learning_rate": 0.0005964372478210997,
      "loss": 3.5403,
      "step": 11314
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.423152208328247,
      "learning_rate": 0.0005964366192477426,
      "loss": 3.0194,
      "step": 11315
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5761207342147827,
      "learning_rate": 0.0005964359906192724,
      "loss": 3.0371,
      "step": 11316
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.441166639328003,
      "learning_rate": 0.0005964353619356891,
      "loss": 3.3299,
      "step": 11317
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5419001579284668,
      "learning_rate": 0.0005964347331969929,
      "loss": 3.2293,
      "step": 11318
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.015284299850464,
      "learning_rate": 0.0005964341044031839,
      "loss": 2.9664,
      "step": 11319
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4350773096084595,
      "learning_rate": 0.0005964334755542621,
      "loss": 2.8791,
      "step": 11320
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.482767939567566,
      "learning_rate": 0.0005964328466502278,
      "loss": 3.1035,
      "step": 11321
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4007768630981445,
      "learning_rate": 0.000596432217691081,
      "loss": 3.1076,
      "step": 11322
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2419391870498657,
      "learning_rate": 0.0005964315886768219,
      "loss": 2.9203,
      "step": 11323
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6606558561325073,
      "learning_rate": 0.0005964309596074506,
      "loss": 3.1154,
      "step": 11324
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0465290546417236,
      "learning_rate": 0.0005964303304829672,
      "loss": 3.0642,
      "step": 11325
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.590423583984375,
      "learning_rate": 0.0005964297013033716,
      "loss": 3.0399,
      "step": 11326
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.114391326904297,
      "learning_rate": 0.0005964290720686644,
      "loss": 2.8854,
      "step": 11327
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5015547275543213,
      "learning_rate": 0.0005964284427788454,
      "loss": 3.1828,
      "step": 11328
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4005255699157715,
      "learning_rate": 0.0005964278134339146,
      "loss": 3.3069,
      "step": 11329
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5216723680496216,
      "learning_rate": 0.0005964271840338723,
      "loss": 3.0855,
      "step": 11330
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7751797437667847,
      "learning_rate": 0.0005964265545787186,
      "loss": 3.1413,
      "step": 11331
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5609670877456665,
      "learning_rate": 0.0005964259250684537,
      "loss": 3.293,
      "step": 11332
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7365624904632568,
      "learning_rate": 0.0005964252955030776,
      "loss": 3.1452,
      "step": 11333
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8485114574432373,
      "learning_rate": 0.0005964246658825904,
      "loss": 3.0695,
      "step": 11334
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4056404829025269,
      "learning_rate": 0.0005964240362069923,
      "loss": 3.0743,
      "step": 11335
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.81381356716156,
      "learning_rate": 0.0005964234064762833,
      "loss": 3.4894,
      "step": 11336
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.532060146331787,
      "learning_rate": 0.0005964227766904637,
      "loss": 3.0781,
      "step": 11337
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3209172487258911,
      "learning_rate": 0.0005964221468495334,
      "loss": 3.0355,
      "step": 11338
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5450090169906616,
      "learning_rate": 0.0005964215169534927,
      "loss": 3.1283,
      "step": 11339
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5970908403396606,
      "learning_rate": 0.0005964208870023417,
      "loss": 3.3073,
      "step": 11340
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.807206392288208,
      "learning_rate": 0.0005964202569960804,
      "loss": 3.3875,
      "step": 11341
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.689369559288025,
      "learning_rate": 0.0005964196269347088,
      "loss": 3.2795,
      "step": 11342
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9338771104812622,
      "learning_rate": 0.0005964189968182275,
      "loss": 3.3167,
      "step": 11343
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3027607202529907,
      "learning_rate": 0.0005964183666466362,
      "loss": 3.2269,
      "step": 11344
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3184471130371094,
      "learning_rate": 0.000596417736419935,
      "loss": 3.3742,
      "step": 11345
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.155808687210083,
      "learning_rate": 0.0005964171061381242,
      "loss": 2.8864,
      "step": 11346
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.616131067276001,
      "learning_rate": 0.000596416475801204,
      "loss": 3.1367,
      "step": 11347
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5139031410217285,
      "learning_rate": 0.0005964158454091743,
      "loss": 3.1105,
      "step": 11348
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.211752414703369,
      "learning_rate": 0.0005964152149620352,
      "loss": 3.1179,
      "step": 11349
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8975406885147095,
      "learning_rate": 0.0005964145844597872,
      "loss": 3.2349,
      "step": 11350
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7486662864685059,
      "learning_rate": 0.0005964139539024299,
      "loss": 3.0585,
      "step": 11351
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3338292837142944,
      "learning_rate": 0.0005964133232899637,
      "loss": 3.1825,
      "step": 11352
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3257944583892822,
      "learning_rate": 0.0005964126926223886,
      "loss": 3.1633,
      "step": 11353
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5136514902114868,
      "learning_rate": 0.0005964120618997049,
      "loss": 3.3796,
      "step": 11354
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5489354133605957,
      "learning_rate": 0.0005964114311219126,
      "loss": 3.2966,
      "step": 11355
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.332861304283142,
      "learning_rate": 0.0005964108002890117,
      "loss": 3.3277,
      "step": 11356
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4324252605438232,
      "learning_rate": 0.0005964101694010026,
      "loss": 3.1654,
      "step": 11357
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3443084955215454,
      "learning_rate": 0.000596409538457885,
      "loss": 3.2426,
      "step": 11358
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3895411491394043,
      "learning_rate": 0.0005964089074596596,
      "loss": 2.9673,
      "step": 11359
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5414730310440063,
      "learning_rate": 0.000596408276406326,
      "loss": 3.2522,
      "step": 11360
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.323547124862671,
      "learning_rate": 0.0005964076452978845,
      "loss": 2.8953,
      "step": 11361
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4853549003601074,
      "learning_rate": 0.0005964070141343353,
      "loss": 3.0381,
      "step": 11362
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6759108304977417,
      "learning_rate": 0.0005964063829156784,
      "loss": 3.0154,
      "step": 11363
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.409510850906372,
      "learning_rate": 0.0005964057516419139,
      "loss": 3.0106,
      "step": 11364
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.306519865989685,
      "learning_rate": 0.000596405120313042,
      "loss": 3.2107,
      "step": 11365
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.1734611988067627,
      "learning_rate": 0.000596404488929063,
      "loss": 3.0686,
      "step": 11366
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6121597290039062,
      "learning_rate": 0.0005964038574899766,
      "loss": 3.2964,
      "step": 11367
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5241763591766357,
      "learning_rate": 0.0005964032259957831,
      "loss": 3.1358,
      "step": 11368
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.995090365409851,
      "learning_rate": 0.0005964025944464827,
      "loss": 3.1864,
      "step": 11369
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8188365697860718,
      "learning_rate": 0.0005964019628420755,
      "loss": 3.1745,
      "step": 11370
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5059236288070679,
      "learning_rate": 0.0005964013311825616,
      "loss": 2.9442,
      "step": 11371
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4761853218078613,
      "learning_rate": 0.000596400699467941,
      "loss": 3.2402,
      "step": 11372
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.044567584991455,
      "learning_rate": 0.000596400067698214,
      "loss": 3.1711,
      "step": 11373
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.194204330444336,
      "learning_rate": 0.0005963994358733805,
      "loss": 3.2272,
      "step": 11374
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.3575146198272705,
      "learning_rate": 0.000596398803993441,
      "loss": 3.3506,
      "step": 11375
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.4110372066497803,
      "learning_rate": 0.0005963981720583951,
      "loss": 2.8962,
      "step": 11376
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1860666275024414,
      "learning_rate": 0.0005963975400682433,
      "loss": 2.9397,
      "step": 11377
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7891484498977661,
      "learning_rate": 0.0005963969080229857,
      "loss": 3.097,
      "step": 11378
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5185781717300415,
      "learning_rate": 0.0005963962759226221,
      "loss": 3.1316,
      "step": 11379
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9537440538406372,
      "learning_rate": 0.000596395643767153,
      "loss": 3.1571,
      "step": 11380
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4503062963485718,
      "learning_rate": 0.0005963950115565784,
      "loss": 3.0821,
      "step": 11381
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.282735586166382,
      "learning_rate": 0.0005963943792908982,
      "loss": 3.2695,
      "step": 11382
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8575609922409058,
      "learning_rate": 0.0005963937469701128,
      "loss": 3.248,
      "step": 11383
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3962925672531128,
      "learning_rate": 0.0005963931145942221,
      "loss": 3.1662,
      "step": 11384
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0382182598114014,
      "learning_rate": 0.0005963924821632264,
      "loss": 3.0724,
      "step": 11385
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8975942134857178,
      "learning_rate": 0.0005963918496771259,
      "loss": 3.1882,
      "step": 11386
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5435835123062134,
      "learning_rate": 0.0005963912171359203,
      "loss": 3.23,
      "step": 11387
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4089876413345337,
      "learning_rate": 0.0005963905845396101,
      "loss": 3.3481,
      "step": 11388
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3028966188430786,
      "learning_rate": 0.0005963899518881953,
      "loss": 3.0336,
      "step": 11389
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6438242197036743,
      "learning_rate": 0.000596389319181676,
      "loss": 3.1799,
      "step": 11390
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4204061031341553,
      "learning_rate": 0.0005963886864200524,
      "loss": 3.2401,
      "step": 11391
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4225490093231201,
      "learning_rate": 0.0005963880536033245,
      "loss": 3.0756,
      "step": 11392
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8392118215560913,
      "learning_rate": 0.0005963874207314924,
      "loss": 3.1104,
      "step": 11393
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6052387952804565,
      "learning_rate": 0.0005963867878045563,
      "loss": 3.1832,
      "step": 11394
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.025165557861328,
      "learning_rate": 0.0005963861548225163,
      "loss": 3.2157,
      "step": 11395
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5192192792892456,
      "learning_rate": 0.0005963855217853727,
      "loss": 3.1088,
      "step": 11396
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4714069366455078,
      "learning_rate": 0.0005963848886931252,
      "loss": 3.3097,
      "step": 11397
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5521879196166992,
      "learning_rate": 0.0005963842555457743,
      "loss": 3.1682,
      "step": 11398
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8465826511383057,
      "learning_rate": 0.0005963836223433198,
      "loss": 3.0544,
      "step": 11399
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5426000356674194,
      "learning_rate": 0.0005963829890857621,
      "loss": 3.0823,
      "step": 11400
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.47165048122406,
      "learning_rate": 0.0005963823557731012,
      "loss": 3.206,
      "step": 11401
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6369401216506958,
      "learning_rate": 0.0005963817224053373,
      "loss": 3.0691,
      "step": 11402
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6202372312545776,
      "learning_rate": 0.0005963810889824704,
      "loss": 3.1326,
      "step": 11403
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.034010410308838,
      "learning_rate": 0.0005963804555045006,
      "loss": 3.1936,
      "step": 11404
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.790083646774292,
      "learning_rate": 0.0005963798219714282,
      "loss": 3.3679,
      "step": 11405
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9121346473693848,
      "learning_rate": 0.0005963791883832531,
      "loss": 3.1093,
      "step": 11406
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.514217734336853,
      "learning_rate": 0.0005963785547399755,
      "loss": 2.7089,
      "step": 11407
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.3611440658569336,
      "learning_rate": 0.0005963779210415956,
      "loss": 2.9565,
      "step": 11408
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0706777572631836,
      "learning_rate": 0.0005963772872881134,
      "loss": 3.2135,
      "step": 11409
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3268924951553345,
      "learning_rate": 0.000596376653479529,
      "loss": 3.2231,
      "step": 11410
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5258100032806396,
      "learning_rate": 0.0005963760196158427,
      "loss": 3.2428,
      "step": 11411
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.6862945556640625,
      "learning_rate": 0.0005963753856970545,
      "loss": 3.1611,
      "step": 11412
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.183889389038086,
      "learning_rate": 0.0005963747517231644,
      "loss": 3.2147,
      "step": 11413
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6569336652755737,
      "learning_rate": 0.0005963741176941728,
      "loss": 3.414,
      "step": 11414
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7500450611114502,
      "learning_rate": 0.0005963734836100796,
      "loss": 3.2642,
      "step": 11415
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5670523643493652,
      "learning_rate": 0.0005963728494708849,
      "loss": 3.4711,
      "step": 11416
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4533300399780273,
      "learning_rate": 0.0005963722152765889,
      "loss": 2.9261,
      "step": 11417
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7347100973129272,
      "learning_rate": 0.0005963715810271918,
      "loss": 3.2071,
      "step": 11418
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8014073371887207,
      "learning_rate": 0.0005963709467226936,
      "loss": 3.2393,
      "step": 11419
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6387611627578735,
      "learning_rate": 0.0005963703123630945,
      "loss": 3.1606,
      "step": 11420
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8320549726486206,
      "learning_rate": 0.0005963696779483945,
      "loss": 2.9044,
      "step": 11421
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9075758457183838,
      "learning_rate": 0.0005963690434785937,
      "loss": 3.1024,
      "step": 11422
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5010650157928467,
      "learning_rate": 0.0005963684089536924,
      "loss": 3.4218,
      "step": 11423
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.6245830059051514,
      "learning_rate": 0.0005963677743736906,
      "loss": 3.1013,
      "step": 11424
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.901158094406128,
      "learning_rate": 0.0005963671397385884,
      "loss": 3.3996,
      "step": 11425
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.896966576576233,
      "learning_rate": 0.000596366505048386,
      "loss": 3.2,
      "step": 11426
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3744651079177856,
      "learning_rate": 0.0005963658703030836,
      "loss": 3.166,
      "step": 11427
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5321671962738037,
      "learning_rate": 0.000596365235502681,
      "loss": 3.436,
      "step": 11428
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3072818517684937,
      "learning_rate": 0.0005963646006471785,
      "loss": 3.2079,
      "step": 11429
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5108189582824707,
      "learning_rate": 0.0005963639657365764,
      "loss": 3.1244,
      "step": 11430
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8269506692886353,
      "learning_rate": 0.0005963633307708745,
      "loss": 3.1635,
      "step": 11431
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5900565385818481,
      "learning_rate": 0.0005963626957500731,
      "loss": 3.0929,
      "step": 11432
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4652965068817139,
      "learning_rate": 0.0005963620606741723,
      "loss": 3.1692,
      "step": 11433
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3877339363098145,
      "learning_rate": 0.0005963614255431721,
      "loss": 2.9911,
      "step": 11434
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.805501937866211,
      "learning_rate": 0.0005963607903570729,
      "loss": 3.0633,
      "step": 11435
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6450766324996948,
      "learning_rate": 0.0005963601551158745,
      "loss": 3.1173,
      "step": 11436
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8651403188705444,
      "learning_rate": 0.0005963595198195772,
      "loss": 3.3132,
      "step": 11437
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.2789807319641113,
      "learning_rate": 0.000596358884468181,
      "loss": 3.1261,
      "step": 11438
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8277745246887207,
      "learning_rate": 0.0005963582490616862,
      "loss": 3.3038,
      "step": 11439
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8324029445648193,
      "learning_rate": 0.0005963576136000928,
      "loss": 3.1474,
      "step": 11440
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.5568201541900635,
      "learning_rate": 0.0005963569780834008,
      "loss": 3.0139,
      "step": 11441
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8602828979492188,
      "learning_rate": 0.0005963563425116106,
      "loss": 3.0085,
      "step": 11442
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.955641746520996,
      "learning_rate": 0.000596355706884722,
      "loss": 2.9773,
      "step": 11443
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9121780395507812,
      "learning_rate": 0.0005963550712027353,
      "loss": 3.3227,
      "step": 11444
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.374058961868286,
      "learning_rate": 0.0005963544354656507,
      "loss": 3.0736,
      "step": 11445
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7500137090682983,
      "learning_rate": 0.0005963537996734682,
      "loss": 3.1112,
      "step": 11446
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5717421770095825,
      "learning_rate": 0.000596353163826188,
      "loss": 3.0829,
      "step": 11447
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.7993416786193848,
      "learning_rate": 0.00059635252792381,
      "loss": 2.9921,
      "step": 11448
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5431900024414062,
      "learning_rate": 0.0005963518919663345,
      "loss": 3.2333,
      "step": 11449
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5725398063659668,
      "learning_rate": 0.0005963512559537617,
      "loss": 3.1147,
      "step": 11450
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8462435007095337,
      "learning_rate": 0.0005963506198860915,
      "loss": 2.7917,
      "step": 11451
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7941399812698364,
      "learning_rate": 0.0005963499837633242,
      "loss": 2.9822,
      "step": 11452
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3302679061889648,
      "learning_rate": 0.0005963493475854598,
      "loss": 3.1508,
      "step": 11453
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.030454158782959,
      "learning_rate": 0.0005963487113524984,
      "loss": 3.0832,
      "step": 11454
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5762797594070435,
      "learning_rate": 0.0005963480750644402,
      "loss": 3.2642,
      "step": 11455
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4744130373001099,
      "learning_rate": 0.0005963474387212853,
      "loss": 3.1258,
      "step": 11456
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.746497631072998,
      "learning_rate": 0.000596346802323034,
      "loss": 3.4193,
      "step": 11457
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.275543451309204,
      "learning_rate": 0.000596346165869686,
      "loss": 3.0171,
      "step": 11458
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.655595302581787,
      "learning_rate": 0.0005963455293612418,
      "loss": 3.3665,
      "step": 11459
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5593411922454834,
      "learning_rate": 0.0005963448927977014,
      "loss": 3.139,
      "step": 11460
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.0767822265625,
      "learning_rate": 0.0005963442561790648,
      "loss": 2.9931,
      "step": 11461
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9170466661453247,
      "learning_rate": 0.0005963436195053322,
      "loss": 3.3119,
      "step": 11462
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7318884134292603,
      "learning_rate": 0.0005963429827765037,
      "loss": 3.162,
      "step": 11463
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.7374560832977295,
      "learning_rate": 0.0005963423459925795,
      "loss": 3.1944,
      "step": 11464
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9347379207611084,
      "learning_rate": 0.0005963417091535597,
      "loss": 3.1104,
      "step": 11465
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.3602707386016846,
      "learning_rate": 0.0005963410722594442,
      "loss": 3.0991,
      "step": 11466
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5464361906051636,
      "learning_rate": 0.0005963404353102335,
      "loss": 3.2232,
      "step": 11467
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0405662059783936,
      "learning_rate": 0.0005963397983059274,
      "loss": 3.1859,
      "step": 11468
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.4966585636138916,
      "learning_rate": 0.0005963391612465263,
      "loss": 3.2715,
      "step": 11469
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.613074541091919,
      "learning_rate": 0.00059633852413203,
      "loss": 3.0471,
      "step": 11470
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.38897705078125,
      "learning_rate": 0.0005963378869624389,
      "loss": 3.1809,
      "step": 11471
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.58259117603302,
      "learning_rate": 0.0005963372497377529,
      "loss": 3.2773,
      "step": 11472
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.376483678817749,
      "learning_rate": 0.0005963366124579722,
      "loss": 3.337,
      "step": 11473
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6166868209838867,
      "learning_rate": 0.000596335975123097,
      "loss": 3.0992,
      "step": 11474
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.563200831413269,
      "learning_rate": 0.0005963353377331273,
      "loss": 3.276,
      "step": 11475
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.671041965484619,
      "learning_rate": 0.0005963347002880632,
      "loss": 3.2186,
      "step": 11476
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8930360078811646,
      "learning_rate": 0.0005963340627879051,
      "loss": 3.3221,
      "step": 11477
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8936363458633423,
      "learning_rate": 0.0005963334252326527,
      "loss": 3.2436,
      "step": 11478
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8676692247390747,
      "learning_rate": 0.0005963327876223063,
      "loss": 3.4723,
      "step": 11479
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.1338775157928467,
      "learning_rate": 0.0005963321499568662,
      "loss": 3.385,
      "step": 11480
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.719375729560852,
      "learning_rate": 0.0005963315122363323,
      "loss": 3.1166,
      "step": 11481
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5274903774261475,
      "learning_rate": 0.0005963308744607048,
      "loss": 3.1823,
      "step": 11482
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9195289611816406,
      "learning_rate": 0.0005963302366299838,
      "loss": 3.4557,
      "step": 11483
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4800374507904053,
      "learning_rate": 0.0005963295987441693,
      "loss": 3.2972,
      "step": 11484
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.5166876316070557,
      "learning_rate": 0.0005963289608032616,
      "loss": 3.3023,
      "step": 11485
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3027195930480957,
      "learning_rate": 0.0005963283228072608,
      "loss": 3.2393,
      "step": 11486
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.081737995147705,
      "learning_rate": 0.0005963276847561669,
      "loss": 3.3279,
      "step": 11487
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0585532188415527,
      "learning_rate": 0.0005963270466499801,
      "loss": 3.2877,
      "step": 11488
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4814186096191406,
      "learning_rate": 0.0005963264084887005,
      "loss": 2.9927,
      "step": 11489
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.272413730621338,
      "learning_rate": 0.0005963257702723283,
      "loss": 3.1821,
      "step": 11490
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.5226330757141113,
      "learning_rate": 0.0005963251320008636,
      "loss": 3.1851,
      "step": 11491
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.726741909980774,
      "learning_rate": 0.0005963244936743064,
      "loss": 3.2703,
      "step": 11492
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6802115440368652,
      "learning_rate": 0.0005963238552926569,
      "loss": 3.2577,
      "step": 11493
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.578887939453125,
      "learning_rate": 0.0005963232168559151,
      "loss": 3.1701,
      "step": 11494
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4775549173355103,
      "learning_rate": 0.0005963225783640813,
      "loss": 2.991,
      "step": 11495
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.970430850982666,
      "learning_rate": 0.0005963219398171554,
      "loss": 3.1696,
      "step": 11496
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.393442153930664,
      "learning_rate": 0.0005963213012151378,
      "loss": 3.3451,
      "step": 11497
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.4815211296081543,
      "learning_rate": 0.0005963206625580285,
      "loss": 3.2598,
      "step": 11498
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.163637399673462,
      "learning_rate": 0.0005963200238458276,
      "loss": 3.016,
      "step": 11499
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.447511911392212,
      "learning_rate": 0.0005963193850785351,
      "loss": 3.2832,
      "step": 11500
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6311254501342773,
      "learning_rate": 0.0005963187462561514,
      "loss": 3.2035,
      "step": 11501
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4324086904525757,
      "learning_rate": 0.0005963181073786762,
      "loss": 3.2587,
      "step": 11502
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.2964181900024414,
      "learning_rate": 0.0005963174684461101,
      "loss": 3.3552,
      "step": 11503
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.572087049484253,
      "learning_rate": 0.0005963168294584529,
      "loss": 3.1721,
      "step": 11504
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3902735710144043,
      "learning_rate": 0.0005963161904157047,
      "loss": 3.1123,
      "step": 11505
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.8705177307128906,
      "learning_rate": 0.000596315551317866,
      "loss": 3.1205,
      "step": 11506
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.74272882938385,
      "learning_rate": 0.0005963149121649364,
      "loss": 3.1978,
      "step": 11507
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4755353927612305,
      "learning_rate": 0.0005963142729569163,
      "loss": 3.2965,
      "step": 11508
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.200765371322632,
      "learning_rate": 0.0005963136336938059,
      "loss": 3.4405,
      "step": 11509
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8550173044204712,
      "learning_rate": 0.0005963129943756051,
      "loss": 3.0537,
      "step": 11510
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6624938249588013,
      "learning_rate": 0.0005963123550023142,
      "loss": 3.1283,
      "step": 11511
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.134756565093994,
      "learning_rate": 0.0005963117155739332,
      "loss": 3.3243,
      "step": 11512
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.694758892059326,
      "learning_rate": 0.0005963110760904622,
      "loss": 3.3276,
      "step": 11513
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.775063157081604,
      "learning_rate": 0.0005963104365519015,
      "loss": 3.2753,
      "step": 11514
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5483067035675049,
      "learning_rate": 0.000596309796958251,
      "loss": 3.0817,
      "step": 11515
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5093863010406494,
      "learning_rate": 0.000596309157309511,
      "loss": 3.0444,
      "step": 11516
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.773667573928833,
      "learning_rate": 0.0005963085176056814,
      "loss": 3.2511,
      "step": 11517
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9750347137451172,
      "learning_rate": 0.0005963078778467626,
      "loss": 2.982,
      "step": 11518
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6207245588302612,
      "learning_rate": 0.0005963072380327544,
      "loss": 3.2414,
      "step": 11519
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5081583261489868,
      "learning_rate": 0.0005963065981636573,
      "loss": 3.1147,
      "step": 11520
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8708453178405762,
      "learning_rate": 0.000596305958239471,
      "loss": 3.1955,
      "step": 11521
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.803626537322998,
      "learning_rate": 0.0005963053182601961,
      "loss": 3.0345,
      "step": 11522
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.451025366783142,
      "learning_rate": 0.0005963046782258322,
      "loss": 2.9003,
      "step": 11523
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5239094495773315,
      "learning_rate": 0.0005963040381363798,
      "loss": 3.0398,
      "step": 11524
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5733082294464111,
      "learning_rate": 0.0005963033979918388,
      "loss": 2.958,
      "step": 11525
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7760341167449951,
      "learning_rate": 0.0005963027577922095,
      "loss": 3.3097,
      "step": 11526
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.723878026008606,
      "learning_rate": 0.0005963021175374918,
      "loss": 2.9508,
      "step": 11527
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.673660159111023,
      "learning_rate": 0.000596301477227686,
      "loss": 3.1055,
      "step": 11528
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.067963123321533,
      "learning_rate": 0.0005963008368627922,
      "loss": 3.1024,
      "step": 11529
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6842162609100342,
      "learning_rate": 0.0005963001964428104,
      "loss": 3.1153,
      "step": 11530
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0257315635681152,
      "learning_rate": 0.0005962995559677408,
      "loss": 3.3047,
      "step": 11531
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7676856517791748,
      "learning_rate": 0.0005962989154375837,
      "loss": 3.1491,
      "step": 11532
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7463771104812622,
      "learning_rate": 0.0005962982748523389,
      "loss": 3.1737,
      "step": 11533
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.910783052444458,
      "learning_rate": 0.0005962976342120067,
      "loss": 3.2101,
      "step": 11534
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.1033430099487305,
      "learning_rate": 0.0005962969935165871,
      "loss": 3.3356,
      "step": 11535
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.2758567333221436,
      "learning_rate": 0.0005962963527660803,
      "loss": 3.2113,
      "step": 11536
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4877429008483887,
      "learning_rate": 0.0005962957119604865,
      "loss": 2.7726,
      "step": 11537
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.097344160079956,
      "learning_rate": 0.0005962950710998057,
      "loss": 3.165,
      "step": 11538
    },
    {
      "epoch": 0.15,
      "grad_norm": 4.06681489944458,
      "learning_rate": 0.0005962944301840381,
      "loss": 3.2161,
      "step": 11539
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.2946407794952393,
      "learning_rate": 0.0005962937892131836,
      "loss": 3.2738,
      "step": 11540
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4166176319122314,
      "learning_rate": 0.0005962931481872427,
      "loss": 3.2662,
      "step": 11541
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9816030263900757,
      "learning_rate": 0.0005962925071062152,
      "loss": 3.3312,
      "step": 11542
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.9417173862457275,
      "learning_rate": 0.0005962918659701013,
      "loss": 3.2531,
      "step": 11543
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.838448405265808,
      "learning_rate": 0.0005962912247789012,
      "loss": 3.207,
      "step": 11544
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0793187618255615,
      "learning_rate": 0.000596290583532615,
      "loss": 2.9931,
      "step": 11545
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3324891328811646,
      "learning_rate": 0.0005962899422312427,
      "loss": 3.106,
      "step": 11546
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.658935785293579,
      "learning_rate": 0.0005962893008747846,
      "loss": 3.0494,
      "step": 11547
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.4580342769622803,
      "learning_rate": 0.0005962886594632407,
      "loss": 3.239,
      "step": 11548
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9277263879776,
      "learning_rate": 0.0005962880179966112,
      "loss": 3.0959,
      "step": 11549
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3847230672836304,
      "learning_rate": 0.0005962873764748961,
      "loss": 2.9238,
      "step": 11550
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.614062547683716,
      "learning_rate": 0.0005962867348980955,
      "loss": 3.5076,
      "step": 11551
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.190192222595215,
      "learning_rate": 0.0005962860932662097,
      "loss": 3.0272,
      "step": 11552
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0517613887786865,
      "learning_rate": 0.0005962854515792387,
      "loss": 3.2168,
      "step": 11553
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.357869267463684,
      "learning_rate": 0.0005962848098371827,
      "loss": 3.0733,
      "step": 11554
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8731043338775635,
      "learning_rate": 0.0005962841680400417,
      "loss": 3.1916,
      "step": 11555
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.7292439937591553,
      "learning_rate": 0.0005962835261878158,
      "loss": 3.0625,
      "step": 11556
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4990975856781006,
      "learning_rate": 0.0005962828842805053,
      "loss": 2.9977,
      "step": 11557
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8763114213943481,
      "learning_rate": 0.0005962822423181102,
      "loss": 3.4461,
      "step": 11558
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.859799385070801,
      "learning_rate": 0.0005962816003006306,
      "loss": 2.9694,
      "step": 11559
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.813166618347168,
      "learning_rate": 0.0005962809582280668,
      "loss": 3.4293,
      "step": 11560
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4905964136123657,
      "learning_rate": 0.0005962803161004188,
      "loss": 3.1221,
      "step": 11561
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.744067907333374,
      "learning_rate": 0.0005962796739176864,
      "loss": 3.1007,
      "step": 11562
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8734872341156006,
      "learning_rate": 0.0005962790316798703,
      "loss": 3.1414,
      "step": 11563
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.804792642593384,
      "learning_rate": 0.0005962783893869703,
      "loss": 3.3513,
      "step": 11564
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4329419136047363,
      "learning_rate": 0.0005962777470389863,
      "loss": 3.1842,
      "step": 11565
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3475531339645386,
      "learning_rate": 0.0005962771046359189,
      "loss": 3.2407,
      "step": 11566
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.402164340019226,
      "learning_rate": 0.000596276462177768,
      "loss": 3.4341,
      "step": 11567
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.396183729171753,
      "learning_rate": 0.0005962758196645337,
      "loss": 3.1851,
      "step": 11568
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5112615823745728,
      "learning_rate": 0.000596275177096216,
      "loss": 3.0194,
      "step": 11569
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.367539882659912,
      "learning_rate": 0.0005962745344728153,
      "loss": 3.2176,
      "step": 11570
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.51185142993927,
      "learning_rate": 0.0005962738917943314,
      "loss": 2.8739,
      "step": 11571
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2922252416610718,
      "learning_rate": 0.0005962732490607649,
      "loss": 3.0579,
      "step": 11572
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3992340564727783,
      "learning_rate": 0.0005962726062721153,
      "loss": 3.1624,
      "step": 11573
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4152586460113525,
      "learning_rate": 0.0005962719634283832,
      "loss": 3.089,
      "step": 11574
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4915096759796143,
      "learning_rate": 0.0005962713205295685,
      "loss": 3.3269,
      "step": 11575
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6744741201400757,
      "learning_rate": 0.0005962706775756714,
      "loss": 3.0483,
      "step": 11576
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.059925079345703,
      "learning_rate": 0.0005962700345666919,
      "loss": 3.1713,
      "step": 11577
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.50041925907135,
      "learning_rate": 0.0005962693915026303,
      "loss": 3.2643,
      "step": 11578
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3632419109344482,
      "learning_rate": 0.0005962687483834866,
      "loss": 3.1226,
      "step": 11579
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6383167505264282,
      "learning_rate": 0.000596268105209261,
      "loss": 3.3507,
      "step": 11580
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7307795286178589,
      "learning_rate": 0.0005962674619799535,
      "loss": 3.264,
      "step": 11581
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7356886863708496,
      "learning_rate": 0.0005962668186955643,
      "loss": 3.1086,
      "step": 11582
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4031732082366943,
      "learning_rate": 0.0005962661753560935,
      "loss": 3.291,
      "step": 11583
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6817208528518677,
      "learning_rate": 0.0005962655319615412,
      "loss": 3.3817,
      "step": 11584
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6537563800811768,
      "learning_rate": 0.0005962648885119076,
      "loss": 3.1326,
      "step": 11585
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.413422703742981,
      "learning_rate": 0.0005962642450071927,
      "loss": 3.3185,
      "step": 11586
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6441835165023804,
      "learning_rate": 0.0005962636014473967,
      "loss": 3.1623,
      "step": 11587
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2655694484710693,
      "learning_rate": 0.0005962629578325198,
      "loss": 3.0105,
      "step": 11588
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8017346858978271,
      "learning_rate": 0.0005962623141625619,
      "loss": 2.8655,
      "step": 11589
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6441023349761963,
      "learning_rate": 0.0005962616704375234,
      "loss": 3.1296,
      "step": 11590
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5610238313674927,
      "learning_rate": 0.0005962610266574041,
      "loss": 3.0047,
      "step": 11591
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5453693866729736,
      "learning_rate": 0.0005962603828222043,
      "loss": 3.1465,
      "step": 11592
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7418327331542969,
      "learning_rate": 0.0005962597389319241,
      "loss": 2.9648,
      "step": 11593
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5827804803848267,
      "learning_rate": 0.0005962590949865637,
      "loss": 3.1019,
      "step": 11594
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9644689559936523,
      "learning_rate": 0.000596258450986123,
      "loss": 3.3132,
      "step": 11595
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6034042835235596,
      "learning_rate": 0.0005962578069306025,
      "loss": 3.1508,
      "step": 11596
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2936564683914185,
      "learning_rate": 0.0005962571628200019,
      "loss": 3.1914,
      "step": 11597
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.161240816116333,
      "learning_rate": 0.0005962565186543215,
      "loss": 3.1153,
      "step": 11598
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.062986135482788,
      "learning_rate": 0.0005962558744335615,
      "loss": 3.2173,
      "step": 11599
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.283843994140625,
      "learning_rate": 0.0005962552301577219,
      "loss": 2.9359,
      "step": 11600
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.7030506134033203,
      "learning_rate": 0.0005962545858268029,
      "loss": 2.9957,
      "step": 11601
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.4210031032562256,
      "learning_rate": 0.0005962539414408046,
      "loss": 2.943,
      "step": 11602
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8794575929641724,
      "learning_rate": 0.000596253296999727,
      "loss": 3.1273,
      "step": 11603
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.598811149597168,
      "learning_rate": 0.0005962526525035704,
      "loss": 3.004,
      "step": 11604
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.257927417755127,
      "learning_rate": 0.0005962520079523348,
      "loss": 3.3262,
      "step": 11605
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7871993780136108,
      "learning_rate": 0.0005962513633460204,
      "loss": 3.0659,
      "step": 11606
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2881743907928467,
      "learning_rate": 0.0005962507186846272,
      "loss": 3.2323,
      "step": 11607
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.671666145324707,
      "learning_rate": 0.0005962500739681555,
      "loss": 3.1575,
      "step": 11608
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.229705333709717,
      "learning_rate": 0.0005962494291966052,
      "loss": 3.1014,
      "step": 11609
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.45399010181427,
      "learning_rate": 0.0005962487843699767,
      "loss": 2.9709,
      "step": 11610
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7448017597198486,
      "learning_rate": 0.0005962481394882699,
      "loss": 3.3647,
      "step": 11611
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9629619121551514,
      "learning_rate": 0.0005962474945514849,
      "loss": 2.9471,
      "step": 11612
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2662057876586914,
      "learning_rate": 0.0005962468495596219,
      "loss": 3.3111,
      "step": 11613
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4518948793411255,
      "learning_rate": 0.000596246204512681,
      "loss": 3.1309,
      "step": 11614
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.463598370552063,
      "learning_rate": 0.0005962455594106625,
      "loss": 3.0796,
      "step": 11615
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8318641185760498,
      "learning_rate": 0.0005962449142535662,
      "loss": 3.3699,
      "step": 11616
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2406401634216309,
      "learning_rate": 0.0005962442690413925,
      "loss": 3.2118,
      "step": 11617
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5535422563552856,
      "learning_rate": 0.0005962436237741413,
      "loss": 3.2867,
      "step": 11618
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7359052896499634,
      "learning_rate": 0.0005962429784518129,
      "loss": 3.0927,
      "step": 11619
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5713765621185303,
      "learning_rate": 0.0005962423330744072,
      "loss": 3.0975,
      "step": 11620
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9299652576446533,
      "learning_rate": 0.0005962416876419246,
      "loss": 3.2403,
      "step": 11621
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.658292531967163,
      "learning_rate": 0.000596241042154365,
      "loss": 3.0165,
      "step": 11622
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.239283323287964,
      "learning_rate": 0.0005962403966117286,
      "loss": 3.2086,
      "step": 11623
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5932444334030151,
      "learning_rate": 0.0005962397510140156,
      "loss": 3.3867,
      "step": 11624
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.578011155128479,
      "learning_rate": 0.000596239105361226,
      "loss": 3.146,
      "step": 11625
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4367485046386719,
      "learning_rate": 0.0005962384596533599,
      "loss": 2.9654,
      "step": 11626
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8221772909164429,
      "learning_rate": 0.0005962378138904175,
      "loss": 3.1324,
      "step": 11627
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6556098461151123,
      "learning_rate": 0.000596237168072399,
      "loss": 2.972,
      "step": 11628
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6004267930984497,
      "learning_rate": 0.0005962365221993043,
      "loss": 3.3166,
      "step": 11629
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7333935499191284,
      "learning_rate": 0.0005962358762711338,
      "loss": 3.2618,
      "step": 11630
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5167617797851562,
      "learning_rate": 0.0005962352302878873,
      "loss": 2.9908,
      "step": 11631
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7779419422149658,
      "learning_rate": 0.000596234584249565,
      "loss": 3.1897,
      "step": 11632
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5045380592346191,
      "learning_rate": 0.0005962339381561672,
      "loss": 3.265,
      "step": 11633
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8526865243911743,
      "learning_rate": 0.0005962332920076939,
      "loss": 3.0857,
      "step": 11634
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0043563842773438,
      "learning_rate": 0.0005962326458041454,
      "loss": 3.1318,
      "step": 11635
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6764531135559082,
      "learning_rate": 0.0005962319995455215,
      "loss": 2.9808,
      "step": 11636
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5541293621063232,
      "learning_rate": 0.0005962313532318225,
      "loss": 3.203,
      "step": 11637
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8620902299880981,
      "learning_rate": 0.0005962307068630486,
      "loss": 3.1396,
      "step": 11638
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.506389856338501,
      "learning_rate": 0.0005962300604391997,
      "loss": 3.0847,
      "step": 11639
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2644832134246826,
      "learning_rate": 0.000596229413960276,
      "loss": 2.9264,
      "step": 11640
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8891096115112305,
      "learning_rate": 0.0005962287674262778,
      "loss": 3.2974,
      "step": 11641
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8461240530014038,
      "learning_rate": 0.0005962281208372049,
      "loss": 2.8801,
      "step": 11642
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7243891954421997,
      "learning_rate": 0.0005962274741930577,
      "loss": 3.2154,
      "step": 11643
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3742506504058838,
      "learning_rate": 0.0005962268274938363,
      "loss": 3.3024,
      "step": 11644
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.434420108795166,
      "learning_rate": 0.0005962261807395407,
      "loss": 2.9516,
      "step": 11645
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.459954023361206,
      "learning_rate": 0.000596225533930171,
      "loss": 3.2453,
      "step": 11646
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4690378904342651,
      "learning_rate": 0.0005962248870657274,
      "loss": 3.289,
      "step": 11647
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8680933713912964,
      "learning_rate": 0.00059622424014621,
      "loss": 3.3408,
      "step": 11648
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.1934822797775269,
      "learning_rate": 0.000596223593171619,
      "loss": 3.1381,
      "step": 11649
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5565476417541504,
      "learning_rate": 0.0005962229461419544,
      "loss": 3.0025,
      "step": 11650
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.537692904472351,
      "learning_rate": 0.0005962222990572163,
      "loss": 3.1638,
      "step": 11651
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4757658243179321,
      "learning_rate": 0.000596221651917405,
      "loss": 3.2324,
      "step": 11652
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4620904922485352,
      "learning_rate": 0.0005962210047225203,
      "loss": 2.8679,
      "step": 11653
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6389110088348389,
      "learning_rate": 0.0005962203574725627,
      "loss": 3.0129,
      "step": 11654
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8439851999282837,
      "learning_rate": 0.000596219710167532,
      "loss": 3.3,
      "step": 11655
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.638495922088623,
      "learning_rate": 0.0005962190628074287,
      "loss": 3.2854,
      "step": 11656
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.8562400341033936,
      "learning_rate": 0.0005962184153922525,
      "loss": 3.2554,
      "step": 11657
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.6718554496765137,
      "learning_rate": 0.0005962177679220038,
      "loss": 2.9847,
      "step": 11658
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8832446336746216,
      "learning_rate": 0.0005962171203966825,
      "loss": 3.2434,
      "step": 11659
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6272790431976318,
      "learning_rate": 0.0005962164728162889,
      "loss": 3.2682,
      "step": 11660
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9079421758651733,
      "learning_rate": 0.0005962158251808232,
      "loss": 3.2916,
      "step": 11661
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.920051097869873,
      "learning_rate": 0.0005962151774902851,
      "loss": 3.1709,
      "step": 11662
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7226295471191406,
      "learning_rate": 0.0005962145297446753,
      "loss": 3.0574,
      "step": 11663
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.253448724746704,
      "learning_rate": 0.0005962138819439935,
      "loss": 3.3587,
      "step": 11664
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.2993829250335693,
      "learning_rate": 0.00059621323408824,
      "loss": 3.1719,
      "step": 11665
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4259659051895142,
      "learning_rate": 0.0005962125861774148,
      "loss": 2.9687,
      "step": 11666
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6772887706756592,
      "learning_rate": 0.0005962119382115182,
      "loss": 3.4241,
      "step": 11667
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6398773193359375,
      "learning_rate": 0.00059621129019055,
      "loss": 3.2301,
      "step": 11668
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4537333250045776,
      "learning_rate": 0.0005962106421145107,
      "loss": 3.0747,
      "step": 11669
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5947809219360352,
      "learning_rate": 0.0005962099939834003,
      "loss": 2.9857,
      "step": 11670
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.624647855758667,
      "learning_rate": 0.0005962093457972187,
      "loss": 3.0477,
      "step": 11671
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3853099346160889,
      "learning_rate": 0.0005962086975559662,
      "loss": 3.0517,
      "step": 11672
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4035124778747559,
      "learning_rate": 0.000596208049259643,
      "loss": 3.1303,
      "step": 11673
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6429439783096313,
      "learning_rate": 0.0005962074009082492,
      "loss": 3.1959,
      "step": 11674
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.4525389671325684,
      "learning_rate": 0.0005962067525017848,
      "loss": 3.1046,
      "step": 11675
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9998540878295898,
      "learning_rate": 0.0005962061040402499,
      "loss": 3.1338,
      "step": 11676
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3146244287490845,
      "learning_rate": 0.0005962054555236447,
      "loss": 3.1193,
      "step": 11677
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.448630452156067,
      "learning_rate": 0.0005962048069519694,
      "loss": 3.156,
      "step": 11678
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5229458808898926,
      "learning_rate": 0.0005962041583252238,
      "loss": 3.1785,
      "step": 11679
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.2198967933654785,
      "learning_rate": 0.0005962035096434085,
      "loss": 3.3714,
      "step": 11680
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.4767889976501465,
      "learning_rate": 0.0005962028609065234,
      "loss": 3.1229,
      "step": 11681
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6622326374053955,
      "learning_rate": 0.0005962022121145684,
      "loss": 2.9464,
      "step": 11682
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.527140498161316,
      "learning_rate": 0.000596201563267544,
      "loss": 3.4183,
      "step": 11683
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7018858194351196,
      "learning_rate": 0.0005962009143654501,
      "loss": 2.8482,
      "step": 11684
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4062503576278687,
      "learning_rate": 0.0005962002654082868,
      "loss": 3.1195,
      "step": 11685
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6510075330734253,
      "learning_rate": 0.0005961996163960543,
      "loss": 3.0459,
      "step": 11686
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.083282470703125,
      "learning_rate": 0.0005961989673287526,
      "loss": 3.0216,
      "step": 11687
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.1058006286621094,
      "learning_rate": 0.0005961983182063822,
      "loss": 3.2676,
      "step": 11688
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5567030906677246,
      "learning_rate": 0.0005961976690289426,
      "loss": 3.3441,
      "step": 11689
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.709761142730713,
      "learning_rate": 0.0005961970197964345,
      "loss": 3.1364,
      "step": 11690
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.522758960723877,
      "learning_rate": 0.0005961963705088577,
      "loss": 3.1233,
      "step": 11691
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5690383911132812,
      "learning_rate": 0.0005961957211662125,
      "loss": 3.033,
      "step": 11692
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9855854511260986,
      "learning_rate": 0.0005961950717684988,
      "loss": 3.0598,
      "step": 11693
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3686846494674683,
      "learning_rate": 0.0005961944223157169,
      "loss": 3.3224,
      "step": 11694
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.327319860458374,
      "learning_rate": 0.0005961937728078668,
      "loss": 3.0219,
      "step": 11695
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.991794228553772,
      "learning_rate": 0.0005961931232449487,
      "loss": 3.126,
      "step": 11696
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.346692681312561,
      "learning_rate": 0.0005961924736269628,
      "loss": 2.9796,
      "step": 11697
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5522000789642334,
      "learning_rate": 0.000596191823953909,
      "loss": 3.0314,
      "step": 11698
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.524186611175537,
      "learning_rate": 0.0005961911742257877,
      "loss": 3.1863,
      "step": 11699
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4590795040130615,
      "learning_rate": 0.0005961905244425987,
      "loss": 3.4251,
      "step": 11700
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5938055515289307,
      "learning_rate": 0.0005961898746043424,
      "loss": 3.0471,
      "step": 11701
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8639382123947144,
      "learning_rate": 0.0005961892247110188,
      "loss": 3.2788,
      "step": 11702
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5043665170669556,
      "learning_rate": 0.000596188574762628,
      "loss": 3.3052,
      "step": 11703
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.945525050163269,
      "learning_rate": 0.0005961879247591701,
      "loss": 3.143,
      "step": 11704
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.000798463821411,
      "learning_rate": 0.0005961872747006455,
      "loss": 3.0029,
      "step": 11705
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0263965129852295,
      "learning_rate": 0.0005961866245870539,
      "loss": 3.1069,
      "step": 11706
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.4351515769958496,
      "learning_rate": 0.0005961859744183957,
      "loss": 3.1816,
      "step": 11707
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0926144123077393,
      "learning_rate": 0.0005961853241946708,
      "loss": 3.1901,
      "step": 11708
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.620011329650879,
      "learning_rate": 0.0005961846739158796,
      "loss": 3.0974,
      "step": 11709
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.651517391204834,
      "learning_rate": 0.0005961840235820221,
      "loss": 3.0544,
      "step": 11710
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.3383331298828125,
      "learning_rate": 0.0005961833731930983,
      "loss": 3.3725,
      "step": 11711
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.3733739852905273,
      "learning_rate": 0.0005961827227491084,
      "loss": 3.0842,
      "step": 11712
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6582468748092651,
      "learning_rate": 0.0005961820722500526,
      "loss": 3.1869,
      "step": 11713
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.162236452102661,
      "learning_rate": 0.000596181421695931,
      "loss": 3.023,
      "step": 11714
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7044810056686401,
      "learning_rate": 0.0005961807710867436,
      "loss": 2.8618,
      "step": 11715
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6829874515533447,
      "learning_rate": 0.0005961801204224906,
      "loss": 3.3736,
      "step": 11716
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.648171305656433,
      "learning_rate": 0.0005961794697031723,
      "loss": 3.1211,
      "step": 11717
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.379406213760376,
      "learning_rate": 0.0005961788189287884,
      "loss": 3.2827,
      "step": 11718
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0233561992645264,
      "learning_rate": 0.0005961781680993395,
      "loss": 3.0524,
      "step": 11719
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.123950481414795,
      "learning_rate": 0.0005961775172148254,
      "loss": 3.1364,
      "step": 11720
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.953672170639038,
      "learning_rate": 0.0005961768662752461,
      "loss": 3.1551,
      "step": 11721
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.663743257522583,
      "learning_rate": 0.0005961762152806022,
      "loss": 3.0054,
      "step": 11722
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6003581285476685,
      "learning_rate": 0.0005961755642308935,
      "loss": 2.8661,
      "step": 11723
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0773766040802,
      "learning_rate": 0.0005961749131261201,
      "loss": 2.8843,
      "step": 11724
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.814300537109375,
      "learning_rate": 0.0005961742619662822,
      "loss": 2.9349,
      "step": 11725
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4523594379425049,
      "learning_rate": 0.0005961736107513799,
      "loss": 3.1118,
      "step": 11726
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.554386019706726,
      "learning_rate": 0.0005961729594814134,
      "loss": 3.3102,
      "step": 11727
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6367534399032593,
      "learning_rate": 0.0005961723081563827,
      "loss": 2.9666,
      "step": 11728
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5241215229034424,
      "learning_rate": 0.000596171656776288,
      "loss": 3.4319,
      "step": 11729
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4890366792678833,
      "learning_rate": 0.0005961710053411295,
      "loss": 3.4598,
      "step": 11730
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.36410391330719,
      "learning_rate": 0.000596170353850907,
      "loss": 3.1828,
      "step": 11731
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4817266464233398,
      "learning_rate": 0.000596169702305621,
      "loss": 3.1783,
      "step": 11732
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5233204364776611,
      "learning_rate": 0.0005961690507052715,
      "loss": 3.0715,
      "step": 11733
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4048529863357544,
      "learning_rate": 0.0005961683990498585,
      "loss": 3.149,
      "step": 11734
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.2562968730926514,
      "learning_rate": 0.0005961677473393821,
      "loss": 2.993,
      "step": 11735
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6246658563613892,
      "learning_rate": 0.0005961670955738428,
      "loss": 3.5526,
      "step": 11736
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.084669351577759,
      "learning_rate": 0.0005961664437532402,
      "loss": 2.7264,
      "step": 11737
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.984802007675171,
      "learning_rate": 0.0005961657918775748,
      "loss": 3.273,
      "step": 11738
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6118556261062622,
      "learning_rate": 0.0005961651399468465,
      "loss": 3.1506,
      "step": 11739
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.955660104751587,
      "learning_rate": 0.0005961644879610555,
      "loss": 3.1634,
      "step": 11740
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9919633865356445,
      "learning_rate": 0.000596163835920202,
      "loss": 2.9692,
      "step": 11741
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5230684280395508,
      "learning_rate": 0.000596163183824286,
      "loss": 3.1843,
      "step": 11742
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2674280405044556,
      "learning_rate": 0.0005961625316733077,
      "loss": 2.865,
      "step": 11743
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.239483594894409,
      "learning_rate": 0.0005961618794672672,
      "loss": 3.2565,
      "step": 11744
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.2731001377105713,
      "learning_rate": 0.0005961612272061647,
      "loss": 3.1384,
      "step": 11745
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4696868658065796,
      "learning_rate": 0.0005961605748900001,
      "loss": 2.8741,
      "step": 11746
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.8065850734710693,
      "learning_rate": 0.0005961599225187737,
      "loss": 3.1095,
      "step": 11747
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.3919293880462646,
      "learning_rate": 0.0005961592700924856,
      "loss": 3.1207,
      "step": 11748
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8966126441955566,
      "learning_rate": 0.0005961586176111359,
      "loss": 2.9564,
      "step": 11749
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3853483200073242,
      "learning_rate": 0.0005961579650747247,
      "loss": 3.0884,
      "step": 11750
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.006521224975586,
      "learning_rate": 0.0005961573124832522,
      "loss": 3.1327,
      "step": 11751
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.404892086982727,
      "learning_rate": 0.0005961566598367184,
      "loss": 2.9653,
      "step": 11752
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7542146444320679,
      "learning_rate": 0.0005961560071351235,
      "loss": 3.2071,
      "step": 11753
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.3971478939056396,
      "learning_rate": 0.0005961553543784676,
      "loss": 3.2127,
      "step": 11754
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0818521976470947,
      "learning_rate": 0.0005961547015667509,
      "loss": 3.1981,
      "step": 11755
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.1977767944335938,
      "learning_rate": 0.0005961540486999733,
      "loss": 2.9364,
      "step": 11756
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.1405675411224365,
      "learning_rate": 0.0005961533957781353,
      "loss": 3.0709,
      "step": 11757
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.397170066833496,
      "learning_rate": 0.0005961527428012366,
      "loss": 3.1023,
      "step": 11758
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.489823579788208,
      "learning_rate": 0.0005961520897692775,
      "loss": 3.3029,
      "step": 11759
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6149719953536987,
      "learning_rate": 0.0005961514366822582,
      "loss": 3.1122,
      "step": 11760
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9539663791656494,
      "learning_rate": 0.0005961507835401789,
      "loss": 3.2477,
      "step": 11761
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.02579665184021,
      "learning_rate": 0.0005961501303430394,
      "loss": 3.2672,
      "step": 11762
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4666210412979126,
      "learning_rate": 0.0005961494770908401,
      "loss": 3.2781,
      "step": 11763
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.298861026763916,
      "learning_rate": 0.000596148823783581,
      "loss": 2.9806,
      "step": 11764
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.245286464691162,
      "learning_rate": 0.0005961481704212622,
      "loss": 3.2028,
      "step": 11765
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8663641214370728,
      "learning_rate": 0.0005961475170038838,
      "loss": 3.2425,
      "step": 11766
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3919628858566284,
      "learning_rate": 0.0005961468635314461,
      "loss": 3.1819,
      "step": 11767
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8213685750961304,
      "learning_rate": 0.000596146210003949,
      "loss": 2.9362,
      "step": 11768
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.4795708656311035,
      "learning_rate": 0.0005961455564213929,
      "loss": 3.0718,
      "step": 11769
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.35637366771698,
      "learning_rate": 0.0005961449027837776,
      "loss": 3.0079,
      "step": 11770
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.917015790939331,
      "learning_rate": 0.0005961442490911035,
      "loss": 3.2907,
      "step": 11771
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.922802448272705,
      "learning_rate": 0.0005961435953433705,
      "loss": 2.9239,
      "step": 11772
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8552290201187134,
      "learning_rate": 0.0005961429415405788,
      "loss": 3.1679,
      "step": 11773
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5434647798538208,
      "learning_rate": 0.0005961422876827286,
      "loss": 3.2102,
      "step": 11774
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0086302757263184,
      "learning_rate": 0.00059614163376982,
      "loss": 3.0954,
      "step": 11775
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9584629535675049,
      "learning_rate": 0.0005961409798018529,
      "loss": 3.098,
      "step": 11776
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6921117305755615,
      "learning_rate": 0.0005961403257788277,
      "loss": 3.1685,
      "step": 11777
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6353193521499634,
      "learning_rate": 0.0005961396717007445,
      "loss": 3.0661,
      "step": 11778
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5283300876617432,
      "learning_rate": 0.0005961390175676034,
      "loss": 3.0349,
      "step": 11779
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6071538925170898,
      "learning_rate": 0.0005961383633794042,
      "loss": 3.2043,
      "step": 11780
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3426159620285034,
      "learning_rate": 0.0005961377091361475,
      "loss": 3.1172,
      "step": 11781
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.272857666015625,
      "learning_rate": 0.0005961370548378331,
      "loss": 3.0646,
      "step": 11782
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6858136653900146,
      "learning_rate": 0.0005961364004844614,
      "loss": 3.1262,
      "step": 11783
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.548947811126709,
      "learning_rate": 0.0005961357460760321,
      "loss": 3.0383,
      "step": 11784
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5816441774368286,
      "learning_rate": 0.0005961350916125458,
      "loss": 3.1973,
      "step": 11785
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8539838790893555,
      "learning_rate": 0.0005961344370940023,
      "loss": 3.0798,
      "step": 11786
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.186354160308838,
      "learning_rate": 0.0005961337825204018,
      "loss": 3.329,
      "step": 11787
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4360452890396118,
      "learning_rate": 0.0005961331278917445,
      "loss": 3.2806,
      "step": 11788
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6431832313537598,
      "learning_rate": 0.0005961324732080305,
      "loss": 3.0624,
      "step": 11789
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5733017921447754,
      "learning_rate": 0.0005961318184692598,
      "loss": 3.152,
      "step": 11790
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5704777240753174,
      "learning_rate": 0.0005961311636754325,
      "loss": 3.4938,
      "step": 11791
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0265228748321533,
      "learning_rate": 0.000596130508826549,
      "loss": 3.2257,
      "step": 11792
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.634297490119934,
      "learning_rate": 0.0005961298539226092,
      "loss": 3.373,
      "step": 11793
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.569471836090088,
      "learning_rate": 0.0005961291989636132,
      "loss": 3.1045,
      "step": 11794
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8609315156936646,
      "learning_rate": 0.0005961285439495613,
      "loss": 3.1839,
      "step": 11795
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8569724559783936,
      "learning_rate": 0.0005961278888804534,
      "loss": 3.2202,
      "step": 11796
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5742449760437012,
      "learning_rate": 0.0005961272337562898,
      "loss": 3.3899,
      "step": 11797
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6275933980941772,
      "learning_rate": 0.0005961265785770706,
      "loss": 3.2677,
      "step": 11798
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9788469076156616,
      "learning_rate": 0.0005961259233427959,
      "loss": 3.2641,
      "step": 11799
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6879390478134155,
      "learning_rate": 0.0005961252680534657,
      "loss": 3.1411,
      "step": 11800
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0867698192596436,
      "learning_rate": 0.0005961246127090802,
      "loss": 3.4564,
      "step": 11801
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5214072465896606,
      "learning_rate": 0.0005961239573096396,
      "loss": 3.5272,
      "step": 11802
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7211222648620605,
      "learning_rate": 0.000596123301855144,
      "loss": 3.0783,
      "step": 11803
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5613617897033691,
      "learning_rate": 0.0005961226463455934,
      "loss": 3.0293,
      "step": 11804
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.4346158504486084,
      "learning_rate": 0.0005961219907809882,
      "loss": 3.1961,
      "step": 11805
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.274514675140381,
      "learning_rate": 0.0005961213351613282,
      "loss": 3.0149,
      "step": 11806
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3870607614517212,
      "learning_rate": 0.0005961206794866136,
      "loss": 3.155,
      "step": 11807
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.1432762145996094,
      "learning_rate": 0.0005961200237568446,
      "loss": 3.0779,
      "step": 11808
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0184617042541504,
      "learning_rate": 0.0005961193679720213,
      "loss": 3.1636,
      "step": 11809
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6211769580841064,
      "learning_rate": 0.0005961187121321439,
      "loss": 3.2261,
      "step": 11810
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.535572052001953,
      "learning_rate": 0.0005961180562372123,
      "loss": 3.0975,
      "step": 11811
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.612559199333191,
      "learning_rate": 0.0005961174002872269,
      "loss": 3.0787,
      "step": 11812
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8785401582717896,
      "learning_rate": 0.0005961167442821877,
      "loss": 3.1516,
      "step": 11813
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4522658586502075,
      "learning_rate": 0.0005961160882220948,
      "loss": 3.311,
      "step": 11814
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.641920328140259,
      "learning_rate": 0.0005961154321069482,
      "loss": 3.1925,
      "step": 11815
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.157949686050415,
      "learning_rate": 0.0005961147759367482,
      "loss": 2.9147,
      "step": 11816
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.417946219444275,
      "learning_rate": 0.000596114119711495,
      "loss": 3.129,
      "step": 11817
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8759829998016357,
      "learning_rate": 0.0005961134634311884,
      "loss": 3.1241,
      "step": 11818
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6611837148666382,
      "learning_rate": 0.0005961128070958289,
      "loss": 3.3005,
      "step": 11819
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.565146803855896,
      "learning_rate": 0.0005961121507054163,
      "loss": 3.2215,
      "step": 11820
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.0165910720825195,
      "learning_rate": 0.000596111494259951,
      "loss": 3.2833,
      "step": 11821
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.369890809059143,
      "learning_rate": 0.0005961108377594329,
      "loss": 2.9616,
      "step": 11822
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7480013370513916,
      "learning_rate": 0.0005961101812038622,
      "loss": 3.2963,
      "step": 11823
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3718335628509521,
      "learning_rate": 0.000596109524593239,
      "loss": 2.9826,
      "step": 11824
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5762288570404053,
      "learning_rate": 0.0005961088679275634,
      "loss": 3.2512,
      "step": 11825
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3944817781448364,
      "learning_rate": 0.0005961082112068358,
      "loss": 3.1683,
      "step": 11826
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.7388594150543213,
      "learning_rate": 0.0005961075544310559,
      "loss": 3.2646,
      "step": 11827
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4065275192260742,
      "learning_rate": 0.0005961068976002241,
      "loss": 3.0252,
      "step": 11828
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3865944147109985,
      "learning_rate": 0.0005961062407143404,
      "loss": 3.0273,
      "step": 11829
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5582510232925415,
      "learning_rate": 0.0005961055837734049,
      "loss": 3.11,
      "step": 11830
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6387819051742554,
      "learning_rate": 0.0005961049267774179,
      "loss": 3.1811,
      "step": 11831
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4345489740371704,
      "learning_rate": 0.0005961042697263793,
      "loss": 3.2586,
      "step": 11832
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.645528793334961,
      "learning_rate": 0.0005961036126202894,
      "loss": 3.3271,
      "step": 11833
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6674013137817383,
      "learning_rate": 0.0005961029554591483,
      "loss": 3.1238,
      "step": 11834
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4801549911499023,
      "learning_rate": 0.0005961022982429559,
      "loss": 3.2178,
      "step": 11835
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.181706190109253,
      "learning_rate": 0.0005961016409717126,
      "loss": 3.1725,
      "step": 11836
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7428522109985352,
      "learning_rate": 0.0005961009836454184,
      "loss": 3.1708,
      "step": 11837
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.029071807861328,
      "learning_rate": 0.0005961003262640735,
      "loss": 3.2291,
      "step": 11838
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5707621574401855,
      "learning_rate": 0.0005960996688276779,
      "loss": 3.2264,
      "step": 11839
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6957502365112305,
      "learning_rate": 0.0005960990113362318,
      "loss": 3.0473,
      "step": 11840
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.451078176498413,
      "learning_rate": 0.0005960983537897353,
      "loss": 3.3697,
      "step": 11841
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4162406921386719,
      "learning_rate": 0.0005960976961881886,
      "loss": 3.1079,
      "step": 11842
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8535184860229492,
      "learning_rate": 0.0005960970385315916,
      "loss": 3.3998,
      "step": 11843
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9867472648620605,
      "learning_rate": 0.0005960963808199446,
      "loss": 3.2627,
      "step": 11844
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.547176718711853,
      "learning_rate": 0.0005960957230532477,
      "loss": 3.1367,
      "step": 11845
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5909425020217896,
      "learning_rate": 0.0005960950652315011,
      "loss": 3.3138,
      "step": 11846
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.1610350608825684,
      "learning_rate": 0.0005960944073547048,
      "loss": 3.122,
      "step": 11847
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4915801286697388,
      "learning_rate": 0.000596093749422859,
      "loss": 2.9939,
      "step": 11848
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.816760778427124,
      "learning_rate": 0.0005960930914359636,
      "loss": 3.016,
      "step": 11849
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9420415163040161,
      "learning_rate": 0.0005960924333940191,
      "loss": 3.0713,
      "step": 11850
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5538846254348755,
      "learning_rate": 0.0005960917752970255,
      "loss": 3.4414,
      "step": 11851
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5973832607269287,
      "learning_rate": 0.0005960911171449826,
      "loss": 3.2867,
      "step": 11852
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.306300401687622,
      "learning_rate": 0.0005960904589378909,
      "loss": 3.1712,
      "step": 11853
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8029921054840088,
      "learning_rate": 0.0005960898006757504,
      "loss": 3.2075,
      "step": 11854
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5330638885498047,
      "learning_rate": 0.0005960891423585612,
      "loss": 2.8931,
      "step": 11855
    },
    {
      "epoch": 0.15,
      "grad_norm": 3.0898914337158203,
      "learning_rate": 0.0005960884839863235,
      "loss": 2.9862,
      "step": 11856
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.2552974224090576,
      "learning_rate": 0.0005960878255590372,
      "loss": 2.9323,
      "step": 11857
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4237525463104248,
      "learning_rate": 0.0005960871670767027,
      "loss": 3.1469,
      "step": 11858
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.123399257659912,
      "learning_rate": 0.00059608650853932,
      "loss": 3.0715,
      "step": 11859
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.885822057723999,
      "learning_rate": 0.0005960858499468892,
      "loss": 2.8629,
      "step": 11860
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7585878372192383,
      "learning_rate": 0.0005960851912994104,
      "loss": 2.9296,
      "step": 11861
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7510313987731934,
      "learning_rate": 0.0005960845325968839,
      "loss": 3.3864,
      "step": 11862
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.2742598056793213,
      "learning_rate": 0.0005960838738393097,
      "loss": 3.0525,
      "step": 11863
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3364163637161255,
      "learning_rate": 0.0005960832150266878,
      "loss": 3.2802,
      "step": 11864
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6934564113616943,
      "learning_rate": 0.0005960825561590185,
      "loss": 2.9278,
      "step": 11865
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4147886037826538,
      "learning_rate": 0.0005960818972363019,
      "loss": 3.3281,
      "step": 11866
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.8800294399261475,
      "learning_rate": 0.000596081238258538,
      "loss": 3.0854,
      "step": 11867
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.920758605003357,
      "learning_rate": 0.0005960805792257271,
      "loss": 2.9465,
      "step": 11868
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.2951337099075317,
      "learning_rate": 0.0005960799201378692,
      "loss": 3.2457,
      "step": 11869
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4549249410629272,
      "learning_rate": 0.0005960792609949644,
      "loss": 3.1191,
      "step": 11870
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6922540664672852,
      "learning_rate": 0.000596078601797013,
      "loss": 3.1942,
      "step": 11871
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4432376623153687,
      "learning_rate": 0.0005960779425440148,
      "loss": 3.0044,
      "step": 11872
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6598691940307617,
      "learning_rate": 0.0005960772832359704,
      "loss": 2.8551,
      "step": 11873
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4725689888000488,
      "learning_rate": 0.0005960766238728794,
      "loss": 3.1156,
      "step": 11874
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.816042423248291,
      "learning_rate": 0.0005960759644547423,
      "loss": 3.315,
      "step": 11875
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5641405582427979,
      "learning_rate": 0.000596075304981559,
      "loss": 3.2657,
      "step": 11876
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4844788312911987,
      "learning_rate": 0.0005960746454533298,
      "loss": 3.2648,
      "step": 11877
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5099519491195679,
      "learning_rate": 0.0005960739858700546,
      "loss": 2.9705,
      "step": 11878
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5754863023757935,
      "learning_rate": 0.0005960733262317338,
      "loss": 3.024,
      "step": 11879
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.659348964691162,
      "learning_rate": 0.0005960726665383674,
      "loss": 3.306,
      "step": 11880
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.1628129482269287,
      "learning_rate": 0.0005960720067899553,
      "loss": 3.126,
      "step": 11881
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7478735446929932,
      "learning_rate": 0.000596071346986498,
      "loss": 2.8908,
      "step": 11882
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.906178593635559,
      "learning_rate": 0.0005960706871279953,
      "loss": 3.215,
      "step": 11883
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5490552186965942,
      "learning_rate": 0.0005960700272144477,
      "loss": 3.109,
      "step": 11884
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6477749347686768,
      "learning_rate": 0.000596069367245855,
      "loss": 3.0454,
      "step": 11885
    },
    {
      "epoch": 0.15,
      "grad_norm": 2.5553536415100098,
      "learning_rate": 0.0005960687072222174,
      "loss": 3.061,
      "step": 11886
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7347304821014404,
      "learning_rate": 0.000596068047143535,
      "loss": 3.2809,
      "step": 11887
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.521093487739563,
      "learning_rate": 0.0005960673870098079,
      "loss": 3.3034,
      "step": 11888
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.510407567024231,
      "learning_rate": 0.0005960667268210364,
      "loss": 2.9967,
      "step": 11889
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5860997438430786,
      "learning_rate": 0.0005960660665772204,
      "loss": 3.1548,
      "step": 11890
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5189687013626099,
      "learning_rate": 0.0005960654062783604,
      "loss": 3.0691,
      "step": 11891
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.6054308414459229,
      "learning_rate": 0.000596064745924456,
      "loss": 3.0685,
      "step": 11892
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5940911769866943,
      "learning_rate": 0.0005960640855155075,
      "loss": 3.144,
      "step": 11893
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7437984943389893,
      "learning_rate": 0.0005960634250515152,
      "loss": 3.0944,
      "step": 11894
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.440773367881775,
      "learning_rate": 0.0005960627645324792,
      "loss": 3.1859,
      "step": 11895
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5161677598953247,
      "learning_rate": 0.0005960621039583995,
      "loss": 3.3543,
      "step": 11896
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3833348751068115,
      "learning_rate": 0.0005960614433292762,
      "loss": 3.1076,
      "step": 11897
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.3870761394500732,
      "learning_rate": 0.0005960607826451097,
      "loss": 2.8708,
      "step": 11898
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5939750671386719,
      "learning_rate": 0.0005960601219058996,
      "loss": 3.242,
      "step": 11899
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.540441632270813,
      "learning_rate": 0.0005960594611116466,
      "loss": 3.1662,
      "step": 11900
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.454132080078125,
      "learning_rate": 0.0005960588002623504,
      "loss": 3.1244,
      "step": 11901
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5400439500808716,
      "learning_rate": 0.0005960581393580114,
      "loss": 3.4208,
      "step": 11902
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.7116103172302246,
      "learning_rate": 0.0005960574783986295,
      "loss": 3.4066,
      "step": 11903
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.5837395191192627,
      "learning_rate": 0.0005960568173842049,
      "loss": 3.1919,
      "step": 11904
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8672517538070679,
      "learning_rate": 0.0005960561563147378,
      "loss": 3.0305,
      "step": 11905
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3404830694198608,
      "learning_rate": 0.0005960554951902284,
      "loss": 3.2029,
      "step": 11906
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.3743042945861816,
      "learning_rate": 0.0005960548340106764,
      "loss": 3.1648,
      "step": 11907
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3960216045379639,
      "learning_rate": 0.0005960541727760826,
      "loss": 3.2926,
      "step": 11908
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.2813711166381836,
      "learning_rate": 0.0005960535114864464,
      "loss": 3.0082,
      "step": 11909
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.024728298187256,
      "learning_rate": 0.0005960528501417684,
      "loss": 3.2178,
      "step": 11910
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.834848403930664,
      "learning_rate": 0.0005960521887420486,
      "loss": 2.8741,
      "step": 11911
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5385630130767822,
      "learning_rate": 0.0005960515272872871,
      "loss": 2.9036,
      "step": 11912
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7201706171035767,
      "learning_rate": 0.000596050865777484,
      "loss": 3.2453,
      "step": 11913
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5519381761550903,
      "learning_rate": 0.0005960502042126395,
      "loss": 3.2403,
      "step": 11914
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8458473682403564,
      "learning_rate": 0.0005960495425927537,
      "loss": 3.3617,
      "step": 11915
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9436978101730347,
      "learning_rate": 0.0005960488809178267,
      "loss": 3.01,
      "step": 11916
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.490852952003479,
      "learning_rate": 0.0005960482191878586,
      "loss": 3.2055,
      "step": 11917
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.600223183631897,
      "learning_rate": 0.0005960475574028495,
      "loss": 3.2327,
      "step": 11918
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3806209564208984,
      "learning_rate": 0.0005960468955627995,
      "loss": 3.2692,
      "step": 11919
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.397851586341858,
      "learning_rate": 0.000596046233667709,
      "loss": 3.4269,
      "step": 11920
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.5900449752807617,
      "learning_rate": 0.0005960455717175778,
      "loss": 2.9278,
      "step": 11921
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6160082817077637,
      "learning_rate": 0.0005960449097124062,
      "loss": 3.041,
      "step": 11922
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5175461769104004,
      "learning_rate": 0.0005960442476521942,
      "loss": 3.2546,
      "step": 11923
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8725475072860718,
      "learning_rate": 0.000596043585536942,
      "loss": 2.9411,
      "step": 11924
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4613157510757446,
      "learning_rate": 0.0005960429233666497,
      "loss": 3.2869,
      "step": 11925
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.491548776626587,
      "learning_rate": 0.0005960422611413175,
      "loss": 3.2345,
      "step": 11926
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7756506204605103,
      "learning_rate": 0.0005960415988609454,
      "loss": 3.1771,
      "step": 11927
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4908487796783447,
      "learning_rate": 0.0005960409365255335,
      "loss": 3.2417,
      "step": 11928
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6297515630722046,
      "learning_rate": 0.0005960402741350821,
      "loss": 3.4133,
      "step": 11929
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7225873470306396,
      "learning_rate": 0.0005960396116895913,
      "loss": 3.1122,
      "step": 11930
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.731160283088684,
      "learning_rate": 0.000596038949189061,
      "loss": 3.1014,
      "step": 11931
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5839449167251587,
      "learning_rate": 0.0005960382866334915,
      "loss": 3.2184,
      "step": 11932
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.468470811843872,
      "learning_rate": 0.000596037624022883,
      "loss": 3.0261,
      "step": 11933
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3181617259979248,
      "learning_rate": 0.0005960369613572353,
      "loss": 3.121,
      "step": 11934
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.013359546661377,
      "learning_rate": 0.000596036298636549,
      "loss": 3.2681,
      "step": 11935
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5360658168792725,
      "learning_rate": 0.0005960356358608238,
      "loss": 3.2915,
      "step": 11936
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.506434679031372,
      "learning_rate": 0.0005960349730300601,
      "loss": 3.016,
      "step": 11937
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5895580053329468,
      "learning_rate": 0.0005960343101442578,
      "loss": 3.1863,
      "step": 11938
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4678007364273071,
      "learning_rate": 0.0005960336472034172,
      "loss": 3.0699,
      "step": 11939
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3800969123840332,
      "learning_rate": 0.0005960329842075383,
      "loss": 3.2275,
      "step": 11940
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.854392409324646,
      "learning_rate": 0.0005960323211566214,
      "loss": 3.2122,
      "step": 11941
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.101702928543091,
      "learning_rate": 0.0005960316580506663,
      "loss": 3.1608,
      "step": 11942
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.0097134113311768,
      "learning_rate": 0.0005960309948896735,
      "loss": 3.4481,
      "step": 11943
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9942771196365356,
      "learning_rate": 0.0005960303316736429,
      "loss": 3.0678,
      "step": 11944
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5583819150924683,
      "learning_rate": 0.0005960296684025747,
      "loss": 3.1716,
      "step": 11945
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.372032403945923,
      "learning_rate": 0.0005960290050764688,
      "loss": 3.1185,
      "step": 11946
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6981931924819946,
      "learning_rate": 0.0005960283416953258,
      "loss": 3.2908,
      "step": 11947
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5637567043304443,
      "learning_rate": 0.0005960276782591454,
      "loss": 2.9882,
      "step": 11948
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.6985137462615967,
      "learning_rate": 0.0005960270147679279,
      "loss": 3.1511,
      "step": 11949
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.3853609561920166,
      "learning_rate": 0.0005960263512216734,
      "loss": 3.1573,
      "step": 11950
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5246320962905884,
      "learning_rate": 0.000596025687620382,
      "loss": 3.1079,
      "step": 11951
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3641277551651,
      "learning_rate": 0.0005960250239640539,
      "loss": 3.3385,
      "step": 11952
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.4128525257110596,
      "learning_rate": 0.000596024360252689,
      "loss": 3.0227,
      "step": 11953
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.367576003074646,
      "learning_rate": 0.0005960236964862877,
      "loss": 3.0957,
      "step": 11954
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.393963098526001,
      "learning_rate": 0.00059602303266485,
      "loss": 3.3853,
      "step": 11955
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2418938875198364,
      "learning_rate": 0.000596022368788376,
      "loss": 3.1139,
      "step": 11956
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.629020094871521,
      "learning_rate": 0.0005960217048568659,
      "loss": 3.2868,
      "step": 11957
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.223685622215271,
      "learning_rate": 0.0005960210408703197,
      "loss": 3.3746,
      "step": 11958
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8392796516418457,
      "learning_rate": 0.0005960203768287376,
      "loss": 2.8566,
      "step": 11959
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7555972337722778,
      "learning_rate": 0.0005960197127321198,
      "loss": 3.2015,
      "step": 11960
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4703000783920288,
      "learning_rate": 0.0005960190485804663,
      "loss": 3.2296,
      "step": 11961
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.303415060043335,
      "learning_rate": 0.0005960183843737773,
      "loss": 3.0594,
      "step": 11962
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7602438926696777,
      "learning_rate": 0.0005960177201120529,
      "loss": 3.0231,
      "step": 11963
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8345524072647095,
      "learning_rate": 0.0005960170557952932,
      "loss": 3.1992,
      "step": 11964
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.182325839996338,
      "learning_rate": 0.0005960163914234984,
      "loss": 2.9033,
      "step": 11965
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.6571524143218994,
      "learning_rate": 0.0005960157269966685,
      "loss": 3.4073,
      "step": 11966
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6012276411056519,
      "learning_rate": 0.0005960150625148036,
      "loss": 3.1558,
      "step": 11967
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3397690057754517,
      "learning_rate": 0.0005960143979779041,
      "loss": 3.0433,
      "step": 11968
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.464715600013733,
      "learning_rate": 0.0005960137333859698,
      "loss": 3.1129,
      "step": 11969
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6586806774139404,
      "learning_rate": 0.000596013068739001,
      "loss": 3.2574,
      "step": 11970
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.2106029987335205,
      "learning_rate": 0.0005960124040369978,
      "loss": 2.9313,
      "step": 11971
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5769577026367188,
      "learning_rate": 0.0005960117392799603,
      "loss": 3.1182,
      "step": 11972
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5703872442245483,
      "learning_rate": 0.0005960110744678886,
      "loss": 2.8058,
      "step": 11973
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.478912591934204,
      "learning_rate": 0.000596010409600783,
      "loss": 3.1427,
      "step": 11974
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5071308612823486,
      "learning_rate": 0.0005960097446786433,
      "loss": 3.1207,
      "step": 11975
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.3193538188934326,
      "learning_rate": 0.0005960090797014698,
      "loss": 3.0112,
      "step": 11976
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7398085594177246,
      "learning_rate": 0.0005960084146692627,
      "loss": 3.2507,
      "step": 11977
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.818116307258606,
      "learning_rate": 0.0005960077495820221,
      "loss": 3.2645,
      "step": 11978
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.454591989517212,
      "learning_rate": 0.0005960070844397479,
      "loss": 3.2315,
      "step": 11979
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.2615463733673096,
      "learning_rate": 0.0005960064192424405,
      "loss": 3.0109,
      "step": 11980
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6040962934494019,
      "learning_rate": 0.0005960057539901,
      "loss": 2.9713,
      "step": 11981
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.714445948600769,
      "learning_rate": 0.0005960050886827262,
      "loss": 2.9718,
      "step": 11982
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.66794753074646,
      "learning_rate": 0.0005960044233203198,
      "loss": 3.3085,
      "step": 11983
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6273573637008667,
      "learning_rate": 0.0005960037579028803,
      "loss": 3.3581,
      "step": 11984
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7132658958435059,
      "learning_rate": 0.0005960030924304083,
      "loss": 3.0525,
      "step": 11985
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.609532356262207,
      "learning_rate": 0.0005960024269029036,
      "loss": 3.079,
      "step": 11986
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4207172393798828,
      "learning_rate": 0.0005960017613203665,
      "loss": 3.0497,
      "step": 11987
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6866724491119385,
      "learning_rate": 0.0005960010956827972,
      "loss": 2.8396,
      "step": 11988
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.107177734375,
      "learning_rate": 0.0005960004299901955,
      "loss": 3.1271,
      "step": 11989
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.899238109588623,
      "learning_rate": 0.0005959997642425619,
      "loss": 3.139,
      "step": 11990
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.829557180404663,
      "learning_rate": 0.0005959990984398964,
      "loss": 2.9533,
      "step": 11991
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.748659610748291,
      "learning_rate": 0.0005959984325821989,
      "loss": 3.1507,
      "step": 11992
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5828039646148682,
      "learning_rate": 0.0005959977666694698,
      "loss": 3.0908,
      "step": 11993
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7566512823104858,
      "learning_rate": 0.0005959971007017091,
      "loss": 3.0614,
      "step": 11994
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.6068356037139893,
      "learning_rate": 0.000595996434678917,
      "loss": 3.0296,
      "step": 11995
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5524556636810303,
      "learning_rate": 0.0005959957686010936,
      "loss": 3.1626,
      "step": 11996
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4105743169784546,
      "learning_rate": 0.000595995102468239,
      "loss": 3.1714,
      "step": 11997
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7912710905075073,
      "learning_rate": 0.0005959944362803532,
      "loss": 3.3053,
      "step": 11998
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4004647731781006,
      "learning_rate": 0.0005959937700374364,
      "loss": 3.22,
      "step": 11999
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.586848258972168,
      "learning_rate": 0.000595993103739489,
      "loss": 3.3506,
      "step": 12000
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5257999897003174,
      "learning_rate": 0.0005959924373865108,
      "loss": 3.0901,
      "step": 12001
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.477579116821289,
      "learning_rate": 0.000595991770978502,
      "loss": 3.1018,
      "step": 12002
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5227391719818115,
      "learning_rate": 0.0005959911045154627,
      "loss": 3.3134,
      "step": 12003
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7412278652191162,
      "learning_rate": 0.0005959904379973931,
      "loss": 2.9267,
      "step": 12004
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7513577938079834,
      "learning_rate": 0.0005959897714242933,
      "loss": 3.2104,
      "step": 12005
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5812267065048218,
      "learning_rate": 0.0005959891047961634,
      "loss": 3.2826,
      "step": 12006
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.7646024227142334,
      "learning_rate": 0.0005959884381130035,
      "loss": 3.2255,
      "step": 12007
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.8752150535583496,
      "learning_rate": 0.0005959877713748139,
      "loss": 3.2983,
      "step": 12008
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.701685905456543,
      "learning_rate": 0.0005959871045815944,
      "loss": 2.9776,
      "step": 12009
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.462207555770874,
      "learning_rate": 0.0005959864377333455,
      "loss": 3.1965,
      "step": 12010
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.4873239994049072,
      "learning_rate": 0.000595985770830067,
      "loss": 3.182,
      "step": 12011
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6534887552261353,
      "learning_rate": 0.0005959851038717593,
      "loss": 3.2281,
      "step": 12012
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4366300106048584,
      "learning_rate": 0.0005959844368584223,
      "loss": 3.3058,
      "step": 12013
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5106521844863892,
      "learning_rate": 0.0005959837697900562,
      "loss": 3.1774,
      "step": 12014
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.2949395179748535,
      "learning_rate": 0.000595983102666661,
      "loss": 3.0436,
      "step": 12015
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5004853010177612,
      "learning_rate": 0.0005959824354882372,
      "loss": 3.1299,
      "step": 12016
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4930371046066284,
      "learning_rate": 0.0005959817682547845,
      "loss": 2.898,
      "step": 12017
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.212831735610962,
      "learning_rate": 0.0005959811009663033,
      "loss": 3.185,
      "step": 12018
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7441670894622803,
      "learning_rate": 0.0005959804336227936,
      "loss": 3.2565,
      "step": 12019
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8320950269699097,
      "learning_rate": 0.0005959797662242555,
      "loss": 3.1338,
      "step": 12020
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.0379717350006104,
      "learning_rate": 0.0005959790987706892,
      "loss": 3.079,
      "step": 12021
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.3602426052093506,
      "learning_rate": 0.0005959784312620948,
      "loss": 3.2024,
      "step": 12022
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.0268983840942383,
      "learning_rate": 0.0005959777636984724,
      "loss": 3.1409,
      "step": 12023
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.851091980934143,
      "learning_rate": 0.0005959770960798222,
      "loss": 3.3803,
      "step": 12024
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.3351588249206543,
      "learning_rate": 0.0005959764284061442,
      "loss": 3.2612,
      "step": 12025
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.2337138652801514,
      "learning_rate": 0.0005959757606774387,
      "loss": 3.0803,
      "step": 12026
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3512918949127197,
      "learning_rate": 0.0005959750928937056,
      "loss": 3.1797,
      "step": 12027
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.224886894226074,
      "learning_rate": 0.0005959744250549452,
      "loss": 2.9004,
      "step": 12028
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9969055652618408,
      "learning_rate": 0.0005959737571611576,
      "loss": 3.0071,
      "step": 12029
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.738158106803894,
      "learning_rate": 0.0005959730892123429,
      "loss": 3.4619,
      "step": 12030
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.384033441543579,
      "learning_rate": 0.0005959724212085012,
      "loss": 3.0785,
      "step": 12031
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.146562099456787,
      "learning_rate": 0.0005959717531496325,
      "loss": 3.0444,
      "step": 12032
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.820969820022583,
      "learning_rate": 0.0005959710850357373,
      "loss": 3.2926,
      "step": 12033
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5416064262390137,
      "learning_rate": 0.0005959704168668153,
      "loss": 2.7451,
      "step": 12034
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.2322347164154053,
      "learning_rate": 0.000595969748642867,
      "loss": 3.1867,
      "step": 12035
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.021831512451172,
      "learning_rate": 0.0005959690803638921,
      "loss": 2.8762,
      "step": 12036
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4948368072509766,
      "learning_rate": 0.0005959684120298911,
      "loss": 3.2199,
      "step": 12037
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.0158798694610596,
      "learning_rate": 0.000595967743640864,
      "loss": 3.2984,
      "step": 12038
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9907177686691284,
      "learning_rate": 0.0005959670751968108,
      "loss": 3.3963,
      "step": 12039
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.2755446434020996,
      "learning_rate": 0.0005959664066977319,
      "loss": 3.1123,
      "step": 12040
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.489632248878479,
      "learning_rate": 0.0005959657381436272,
      "loss": 3.2578,
      "step": 12041
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.110818386077881,
      "learning_rate": 0.0005959650695344968,
      "loss": 3.1063,
      "step": 12042
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.6840388774871826,
      "learning_rate": 0.000595964400870341,
      "loss": 2.977,
      "step": 12043
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.0251991748809814,
      "learning_rate": 0.0005959637321511597,
      "loss": 3.0532,
      "step": 12044
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5507205724716187,
      "learning_rate": 0.0005959630633769533,
      "loss": 3.3736,
      "step": 12045
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.3749289512634277,
      "learning_rate": 0.0005959623945477217,
      "loss": 3.0243,
      "step": 12046
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6526780128479004,
      "learning_rate": 0.0005959617256634652,
      "loss": 2.8508,
      "step": 12047
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6893528699874878,
      "learning_rate": 0.0005959610567241837,
      "loss": 2.9711,
      "step": 12048
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6387542486190796,
      "learning_rate": 0.0005959603877298775,
      "loss": 3.08,
      "step": 12049
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8917101621627808,
      "learning_rate": 0.0005959597186805467,
      "loss": 2.9629,
      "step": 12050
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.859742283821106,
      "learning_rate": 0.0005959590495761914,
      "loss": 2.972,
      "step": 12051
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3582580089569092,
      "learning_rate": 0.0005959583804168117,
      "loss": 3.1528,
      "step": 12052
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8863774538040161,
      "learning_rate": 0.0005959577112024078,
      "loss": 3.1355,
      "step": 12053
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.991310715675354,
      "learning_rate": 0.0005959570419329797,
      "loss": 2.8955,
      "step": 12054
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.535528540611267,
      "learning_rate": 0.0005959563726085276,
      "loss": 3.2188,
      "step": 12055
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9173814058303833,
      "learning_rate": 0.0005959557032290517,
      "loss": 3.052,
      "step": 12056
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.752785325050354,
      "learning_rate": 0.000595955033794552,
      "loss": 3.437,
      "step": 12057
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6423949003219604,
      "learning_rate": 0.0005959543643050287,
      "loss": 3.0939,
      "step": 12058
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.329275369644165,
      "learning_rate": 0.0005959536947604819,
      "loss": 3.3026,
      "step": 12059
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3402940034866333,
      "learning_rate": 0.0005959530251609117,
      "loss": 3.3835,
      "step": 12060
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3910341262817383,
      "learning_rate": 0.0005959523555063181,
      "loss": 3.1999,
      "step": 12061
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7190660238265991,
      "learning_rate": 0.0005959516857967016,
      "loss": 3.0102,
      "step": 12062
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5372087955474854,
      "learning_rate": 0.0005959510160320621,
      "loss": 3.2159,
      "step": 12063
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4732496738433838,
      "learning_rate": 0.0005959503462123996,
      "loss": 3.1019,
      "step": 12064
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6293692588806152,
      "learning_rate": 0.0005959496763377144,
      "loss": 3.1693,
      "step": 12065
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7469489574432373,
      "learning_rate": 0.0005959490064080065,
      "loss": 3.1235,
      "step": 12066
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8051282167434692,
      "learning_rate": 0.0005959483364232762,
      "loss": 3.138,
      "step": 12067
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.761168360710144,
      "learning_rate": 0.0005959476663835234,
      "loss": 3.0938,
      "step": 12068
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3736789226531982,
      "learning_rate": 0.0005959469962887484,
      "loss": 3.231,
      "step": 12069
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.057316780090332,
      "learning_rate": 0.0005959463261389513,
      "loss": 2.9453,
      "step": 12070
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9189324378967285,
      "learning_rate": 0.0005959456559341321,
      "loss": 3.005,
      "step": 12071
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.591194748878479,
      "learning_rate": 0.0005959449856742912,
      "loss": 3.1477,
      "step": 12072
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.941949486732483,
      "learning_rate": 0.0005959443153594283,
      "loss": 3.1376,
      "step": 12073
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6751208305358887,
      "learning_rate": 0.0005959436449895439,
      "loss": 3.0707,
      "step": 12074
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6401965618133545,
      "learning_rate": 0.000595942974564638,
      "loss": 3.1922,
      "step": 12075
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7608897686004639,
      "learning_rate": 0.0005959423040847107,
      "loss": 2.7666,
      "step": 12076
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8395646810531616,
      "learning_rate": 0.0005959416335497621,
      "loss": 3.211,
      "step": 12077
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.87275230884552,
      "learning_rate": 0.0005959409629597924,
      "loss": 3.4355,
      "step": 12078
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6687028408050537,
      "learning_rate": 0.0005959402923148017,
      "loss": 3.1975,
      "step": 12079
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7943634986877441,
      "learning_rate": 0.00059593962161479,
      "loss": 3.2617,
      "step": 12080
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9304535388946533,
      "learning_rate": 0.0005959389508597578,
      "loss": 3.0902,
      "step": 12081
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4529677629470825,
      "learning_rate": 0.0005959382800497046,
      "loss": 3.0672,
      "step": 12082
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7076550722122192,
      "learning_rate": 0.0005959376091846311,
      "loss": 3.4528,
      "step": 12083
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7508026361465454,
      "learning_rate": 0.0005959369382645373,
      "loss": 3.1038,
      "step": 12084
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.661850094795227,
      "learning_rate": 0.000595936267289423,
      "loss": 3.2151,
      "step": 12085
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6762620210647583,
      "learning_rate": 0.0005959355962592888,
      "loss": 3.092,
      "step": 12086
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.3916988372802734,
      "learning_rate": 0.0005959349251741345,
      "loss": 3.1906,
      "step": 12087
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.542770504951477,
      "learning_rate": 0.0005959342540339602,
      "loss": 3.2208,
      "step": 12088
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5673878192901611,
      "learning_rate": 0.0005959335828387663,
      "loss": 3.1266,
      "step": 12089
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.955461263656616,
      "learning_rate": 0.0005959329115885527,
      "loss": 3.1397,
      "step": 12090
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.023240327835083,
      "learning_rate": 0.0005959322402833195,
      "loss": 3.0637,
      "step": 12091
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7040880918502808,
      "learning_rate": 0.000595931568923067,
      "loss": 3.1026,
      "step": 12092
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.3912293910980225,
      "learning_rate": 0.0005959308975077952,
      "loss": 2.9318,
      "step": 12093
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.383296489715576,
      "learning_rate": 0.0005959302260375042,
      "loss": 3.3628,
      "step": 12094
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.339118480682373,
      "learning_rate": 0.0005959295545121943,
      "loss": 3.0082,
      "step": 12095
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3607978820800781,
      "learning_rate": 0.0005959288829318656,
      "loss": 2.9375,
      "step": 12096
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.2941553592681885,
      "learning_rate": 0.000595928211296518,
      "loss": 3.1643,
      "step": 12097
    },
    {
      "epoch": 0.16,
      "grad_norm": 4.15679931640625,
      "learning_rate": 0.0005959275396061516,
      "loss": 3.2018,
      "step": 12098
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9473671913146973,
      "learning_rate": 0.0005959268678607669,
      "loss": 3.0701,
      "step": 12099
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.539757490158081,
      "learning_rate": 0.0005959261960603638,
      "loss": 2.8711,
      "step": 12100
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7396644353866577,
      "learning_rate": 0.0005959255242049424,
      "loss": 3.0596,
      "step": 12101
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.906822443008423,
      "learning_rate": 0.0005959248522945028,
      "loss": 2.9044,
      "step": 12102
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5183970928192139,
      "learning_rate": 0.0005959241803290453,
      "loss": 3.0818,
      "step": 12103
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.790665864944458,
      "learning_rate": 0.0005959235083085698,
      "loss": 3.0599,
      "step": 12104
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.926380157470703,
      "learning_rate": 0.0005959228362330766,
      "loss": 3.236,
      "step": 12105
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5824590921401978,
      "learning_rate": 0.0005959221641025658,
      "loss": 3.0452,
      "step": 12106
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8167651891708374,
      "learning_rate": 0.0005959214919170374,
      "loss": 3.2883,
      "step": 12107
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4621915817260742,
      "learning_rate": 0.0005959208196764916,
      "loss": 3.0014,
      "step": 12108
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5325069427490234,
      "learning_rate": 0.0005959201473809285,
      "loss": 3.1017,
      "step": 12109
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4327749013900757,
      "learning_rate": 0.0005959194750303484,
      "loss": 2.8671,
      "step": 12110
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.0391788482666016,
      "learning_rate": 0.0005959188026247511,
      "loss": 3.0678,
      "step": 12111
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3461594581604004,
      "learning_rate": 0.000595918130164137,
      "loss": 3.0692,
      "step": 12112
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5952903032302856,
      "learning_rate": 0.0005959174576485063,
      "loss": 3.3427,
      "step": 12113
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9227290153503418,
      "learning_rate": 0.0005959167850778587,
      "loss": 2.9255,
      "step": 12114
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.408296823501587,
      "learning_rate": 0.0005959161124521947,
      "loss": 3.103,
      "step": 12115
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.54990553855896,
      "learning_rate": 0.0005959154397715144,
      "loss": 3.0518,
      "step": 12116
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.5990867614746094,
      "learning_rate": 0.0005959147670358176,
      "loss": 2.8534,
      "step": 12117
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.304731011390686,
      "learning_rate": 0.0005959140942451049,
      "loss": 3.0368,
      "step": 12118
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7742177248001099,
      "learning_rate": 0.000595913421399376,
      "loss": 2.9968,
      "step": 12119
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5693379640579224,
      "learning_rate": 0.0005959127484986313,
      "loss": 2.9944,
      "step": 12120
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7994080781936646,
      "learning_rate": 0.0005959120755428709,
      "loss": 3.0162,
      "step": 12121
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.1336758136749268,
      "learning_rate": 0.0005959114025320946,
      "loss": 2.8801,
      "step": 12122
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4970667362213135,
      "learning_rate": 0.0005959107294663031,
      "loss": 3.0804,
      "step": 12123
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.0864756107330322,
      "learning_rate": 0.000595910056345496,
      "loss": 3.2753,
      "step": 12124
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6058255434036255,
      "learning_rate": 0.0005959093831696738,
      "loss": 3.2776,
      "step": 12125
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4891457557678223,
      "learning_rate": 0.0005959087099388363,
      "loss": 3.358,
      "step": 12126
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.623437523841858,
      "learning_rate": 0.000595908036652984,
      "loss": 3.2378,
      "step": 12127
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9870694875717163,
      "learning_rate": 0.0005959073633121166,
      "loss": 3.0377,
      "step": 12128
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.787299394607544,
      "learning_rate": 0.0005959066899162345,
      "loss": 2.9327,
      "step": 12129
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5293055772781372,
      "learning_rate": 0.0005959060164653377,
      "loss": 3.1428,
      "step": 12130
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.2821731567382812,
      "learning_rate": 0.0005959053429594265,
      "loss": 3.366,
      "step": 12131
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.887316346168518,
      "learning_rate": 0.0005959046693985008,
      "loss": 2.9816,
      "step": 12132
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4331213235855103,
      "learning_rate": 0.0005959039957825609,
      "loss": 2.9287,
      "step": 12133
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5923224687576294,
      "learning_rate": 0.0005959033221116069,
      "loss": 2.8809,
      "step": 12134
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4694383144378662,
      "learning_rate": 0.0005959026483856388,
      "loss": 3.2717,
      "step": 12135
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.707166075706482,
      "learning_rate": 0.0005959019746046569,
      "loss": 3.2859,
      "step": 12136
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.8383235931396484,
      "learning_rate": 0.0005959013007686611,
      "loss": 3.3765,
      "step": 12137
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.11386775970459,
      "learning_rate": 0.0005959006268776518,
      "loss": 3.3684,
      "step": 12138
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.785227656364441,
      "learning_rate": 0.0005958999529316289,
      "loss": 3.2253,
      "step": 12139
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8346503973007202,
      "learning_rate": 0.0005958992789305926,
      "loss": 3.1254,
      "step": 12140
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.18546724319458,
      "learning_rate": 0.0005958986048745432,
      "loss": 3.2378,
      "step": 12141
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.3221545219421387,
      "learning_rate": 0.0005958979307634806,
      "loss": 3.0739,
      "step": 12142
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5859733819961548,
      "learning_rate": 0.0005958972565974049,
      "loss": 3.2053,
      "step": 12143
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.1213159561157227,
      "learning_rate": 0.0005958965823763164,
      "loss": 3.1777,
      "step": 12144
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5775753259658813,
      "learning_rate": 0.000595895908100215,
      "loss": 3.0465,
      "step": 12145
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6339318752288818,
      "learning_rate": 0.0005958952337691011,
      "loss": 3.0102,
      "step": 12146
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2991958856582642,
      "learning_rate": 0.0005958945593829746,
      "loss": 3.2421,
      "step": 12147
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.7566518783569336,
      "learning_rate": 0.0005958938849418357,
      "loss": 3.1403,
      "step": 12148
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.705971360206604,
      "learning_rate": 0.0005958932104456847,
      "loss": 3.159,
      "step": 12149
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7626055479049683,
      "learning_rate": 0.0005958925358945215,
      "loss": 2.9393,
      "step": 12150
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.481495976448059,
      "learning_rate": 0.0005958918612883463,
      "loss": 3.1108,
      "step": 12151
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.792827844619751,
      "learning_rate": 0.0005958911866271591,
      "loss": 3.269,
      "step": 12152
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6801278591156006,
      "learning_rate": 0.0005958905119109603,
      "loss": 3.2658,
      "step": 12153
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.685581922531128,
      "learning_rate": 0.0005958898371397498,
      "loss": 3.1963,
      "step": 12154
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4604822397232056,
      "learning_rate": 0.0005958891623135278,
      "loss": 3.2151,
      "step": 12155
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.154691696166992,
      "learning_rate": 0.0005958884874322944,
      "loss": 2.7911,
      "step": 12156
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4793606996536255,
      "learning_rate": 0.0005958878124960497,
      "loss": 3.3133,
      "step": 12157
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.377785086631775,
      "learning_rate": 0.000595887137504794,
      "loss": 3.0705,
      "step": 12158
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6045022010803223,
      "learning_rate": 0.0005958864624585272,
      "loss": 3.1439,
      "step": 12159
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5438233613967896,
      "learning_rate": 0.0005958857873572496,
      "loss": 3.1404,
      "step": 12160
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5537137985229492,
      "learning_rate": 0.0005958851122009612,
      "loss": 3.2424,
      "step": 12161
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4751715660095215,
      "learning_rate": 0.0005958844369896621,
      "loss": 3.414,
      "step": 12162
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6434603929519653,
      "learning_rate": 0.0005958837617233525,
      "loss": 3.0626,
      "step": 12163
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.124288558959961,
      "learning_rate": 0.0005958830864020327,
      "loss": 3.0812,
      "step": 12164
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4361158609390259,
      "learning_rate": 0.0005958824110257026,
      "loss": 3.0015,
      "step": 12165
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7273788452148438,
      "learning_rate": 0.0005958817355943623,
      "loss": 3.0137,
      "step": 12166
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3899109363555908,
      "learning_rate": 0.0005958810601080119,
      "loss": 3.1488,
      "step": 12167
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6167055368423462,
      "learning_rate": 0.0005958803845666518,
      "loss": 3.0686,
      "step": 12168
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8098496198654175,
      "learning_rate": 0.0005958797089702818,
      "loss": 3.2172,
      "step": 12169
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7747212648391724,
      "learning_rate": 0.0005958790333189023,
      "loss": 3.2889,
      "step": 12170
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5835500955581665,
      "learning_rate": 0.0005958783576125134,
      "loss": 3.137,
      "step": 12171
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7193610668182373,
      "learning_rate": 0.0005958776818511149,
      "loss": 3.1471,
      "step": 12172
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7165447473526,
      "learning_rate": 0.0005958770060347072,
      "loss": 3.1943,
      "step": 12173
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.15932297706604,
      "learning_rate": 0.0005958763301632904,
      "loss": 2.9772,
      "step": 12174
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.462313175201416,
      "learning_rate": 0.0005958756542368646,
      "loss": 3.149,
      "step": 12175
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.537002444267273,
      "learning_rate": 0.00059587497825543,
      "loss": 3.0984,
      "step": 12176
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6253578662872314,
      "learning_rate": 0.0005958743022189866,
      "loss": 3.138,
      "step": 12177
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5867457389831543,
      "learning_rate": 0.0005958736261275347,
      "loss": 3.3834,
      "step": 12178
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3971269130706787,
      "learning_rate": 0.0005958729499810742,
      "loss": 3.3868,
      "step": 12179
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7682400941848755,
      "learning_rate": 0.0005958722737796053,
      "loss": 3.25,
      "step": 12180
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7219294309616089,
      "learning_rate": 0.0005958715975231283,
      "loss": 3.0854,
      "step": 12181
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.804586410522461,
      "learning_rate": 0.0005958709212116431,
      "loss": 3.0046,
      "step": 12182
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.4815475940704346,
      "learning_rate": 0.0005958702448451499,
      "loss": 3.2077,
      "step": 12183
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8051729202270508,
      "learning_rate": 0.0005958695684236487,
      "loss": 3.2049,
      "step": 12184
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5755505561828613,
      "learning_rate": 0.0005958688919471399,
      "loss": 3.192,
      "step": 12185
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.882340669631958,
      "learning_rate": 0.0005958682154156235,
      "loss": 3.3404,
      "step": 12186
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3556058406829834,
      "learning_rate": 0.0005958675388290996,
      "loss": 3.1641,
      "step": 12187
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9738465547561646,
      "learning_rate": 0.0005958668621875684,
      "loss": 3.0759,
      "step": 12188
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.5544514656066895,
      "learning_rate": 0.0005958661854910299,
      "loss": 3.1665,
      "step": 12189
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4520771503448486,
      "learning_rate": 0.0005958655087394844,
      "loss": 3.2097,
      "step": 12190
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.860736846923828,
      "learning_rate": 0.0005958648319329317,
      "loss": 3.1802,
      "step": 12191
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6176170110702515,
      "learning_rate": 0.0005958641550713723,
      "loss": 3.1694,
      "step": 12192
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4670119285583496,
      "learning_rate": 0.0005958634781548061,
      "loss": 3.1846,
      "step": 12193
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4566608667373657,
      "learning_rate": 0.0005958628011832333,
      "loss": 3.2117,
      "step": 12194
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.2292428016662598,
      "learning_rate": 0.000595862124156654,
      "loss": 3.2865,
      "step": 12195
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.605319857597351,
      "learning_rate": 0.0005958614470750684,
      "loss": 3.3809,
      "step": 12196
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5824103355407715,
      "learning_rate": 0.0005958607699384766,
      "loss": 3.1358,
      "step": 12197
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.1588833332061768,
      "learning_rate": 0.0005958600927468787,
      "loss": 3.1829,
      "step": 12198
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.369490623474121,
      "learning_rate": 0.0005958594155002747,
      "loss": 3.1045,
      "step": 12199
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.416658639907837,
      "learning_rate": 0.000595858738198665,
      "loss": 3.4097,
      "step": 12200
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6170125007629395,
      "learning_rate": 0.0005958580608420495,
      "loss": 3.2805,
      "step": 12201
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.193134069442749,
      "learning_rate": 0.0005958573834304283,
      "loss": 3.0856,
      "step": 12202
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7754380702972412,
      "learning_rate": 0.0005958567059638018,
      "loss": 3.013,
      "step": 12203
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6485668420791626,
      "learning_rate": 0.0005958560284421699,
      "loss": 3.1152,
      "step": 12204
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.2084803581237793,
      "learning_rate": 0.0005958553508655328,
      "loss": 3.0119,
      "step": 12205
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.2825393676757812,
      "learning_rate": 0.0005958546732338906,
      "loss": 3.0552,
      "step": 12206
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5726863145828247,
      "learning_rate": 0.0005958539955472434,
      "loss": 3.0938,
      "step": 12207
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5834307670593262,
      "learning_rate": 0.0005958533178055913,
      "loss": 3.1715,
      "step": 12208
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8557987213134766,
      "learning_rate": 0.0005958526400089346,
      "loss": 3.3344,
      "step": 12209
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4250946044921875,
      "learning_rate": 0.0005958519621572732,
      "loss": 2.8421,
      "step": 12210
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5494874715805054,
      "learning_rate": 0.0005958512842506074,
      "loss": 3.3257,
      "step": 12211
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.1728146076202393,
      "learning_rate": 0.0005958506062889372,
      "loss": 3.1702,
      "step": 12212
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4448614120483398,
      "learning_rate": 0.000595849928272263,
      "loss": 3.015,
      "step": 12213
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8281508684158325,
      "learning_rate": 0.0005958492502005845,
      "loss": 3.2109,
      "step": 12214
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5125504732131958,
      "learning_rate": 0.0005958485720739021,
      "loss": 3.0725,
      "step": 12215
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4143915176391602,
      "learning_rate": 0.0005958478938922158,
      "loss": 2.8027,
      "step": 12216
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7817853689193726,
      "learning_rate": 0.0005958472156555259,
      "loss": 3.218,
      "step": 12217
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7320448160171509,
      "learning_rate": 0.0005958465373638324,
      "loss": 3.2705,
      "step": 12218
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7615952491760254,
      "learning_rate": 0.0005958458590171353,
      "loss": 3.0016,
      "step": 12219
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7409216165542603,
      "learning_rate": 0.0005958451806154351,
      "loss": 3.2659,
      "step": 12220
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.799437165260315,
      "learning_rate": 0.0005958445021587316,
      "loss": 2.9624,
      "step": 12221
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.535711407661438,
      "learning_rate": 0.0005958438236470249,
      "loss": 3.1093,
      "step": 12222
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.748008370399475,
      "learning_rate": 0.0005958431450803154,
      "loss": 3.0315,
      "step": 12223
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6135250329971313,
      "learning_rate": 0.000595842466458603,
      "loss": 3.1792,
      "step": 12224
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2996772527694702,
      "learning_rate": 0.000595841787781888,
      "loss": 3.0656,
      "step": 12225
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3512687683105469,
      "learning_rate": 0.0005958411090501703,
      "loss": 3.2155,
      "step": 12226
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5316438674926758,
      "learning_rate": 0.0005958404302634502,
      "loss": 3.1574,
      "step": 12227
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5794227123260498,
      "learning_rate": 0.0005958397514217278,
      "loss": 3.1059,
      "step": 12228
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.419580340385437,
      "learning_rate": 0.0005958390725250032,
      "loss": 3.1292,
      "step": 12229
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4722737073898315,
      "learning_rate": 0.0005958383935732765,
      "loss": 3.204,
      "step": 12230
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.987137794494629,
      "learning_rate": 0.000595837714566548,
      "loss": 2.9459,
      "step": 12231
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8346683979034424,
      "learning_rate": 0.0005958370355048175,
      "loss": 3.1548,
      "step": 12232
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4843385219573975,
      "learning_rate": 0.0005958363563880854,
      "loss": 3.0855,
      "step": 12233
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7555900812149048,
      "learning_rate": 0.0005958356772163516,
      "loss": 3.0605,
      "step": 12234
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7324522733688354,
      "learning_rate": 0.0005958349979896166,
      "loss": 3.2196,
      "step": 12235
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5610514879226685,
      "learning_rate": 0.0005958343187078801,
      "loss": 2.9482,
      "step": 12236
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.705424189567566,
      "learning_rate": 0.0005958336393711425,
      "loss": 2.9428,
      "step": 12237
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.628945231437683,
      "learning_rate": 0.0005958329599794038,
      "loss": 3.106,
      "step": 12238
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4794729948043823,
      "learning_rate": 0.0005958322805326643,
      "loss": 3.1741,
      "step": 12239
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5201667547225952,
      "learning_rate": 0.0005958316010309238,
      "loss": 3.2744,
      "step": 12240
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4800598621368408,
      "learning_rate": 0.0005958309214741828,
      "loss": 3.1687,
      "step": 12241
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5138752460479736,
      "learning_rate": 0.0005958302418624411,
      "loss": 3.221,
      "step": 12242
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4585285186767578,
      "learning_rate": 0.000595829562195699,
      "loss": 3.1776,
      "step": 12243
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.1482977867126465,
      "learning_rate": 0.0005958288824739566,
      "loss": 3.1525,
      "step": 12244
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.007718324661255,
      "learning_rate": 0.0005958282026972141,
      "loss": 3.0831,
      "step": 12245
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5285052061080933,
      "learning_rate": 0.0005958275228654715,
      "loss": 3.1731,
      "step": 12246
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6820034980773926,
      "learning_rate": 0.000595826842978729,
      "loss": 2.9557,
      "step": 12247
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3410091400146484,
      "learning_rate": 0.0005958261630369867,
      "loss": 3.3541,
      "step": 12248
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7237318754196167,
      "learning_rate": 0.0005958254830402448,
      "loss": 3.1764,
      "step": 12249
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8763757944107056,
      "learning_rate": 0.0005958248029885033,
      "loss": 3.1509,
      "step": 12250
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6254339218139648,
      "learning_rate": 0.0005958241228817623,
      "loss": 3.0056,
      "step": 12251
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.192883014678955,
      "learning_rate": 0.0005958234427200221,
      "loss": 3.0208,
      "step": 12252
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6506667137145996,
      "learning_rate": 0.0005958227625032827,
      "loss": 2.9449,
      "step": 12253
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.583817958831787,
      "learning_rate": 0.0005958220822315444,
      "loss": 3.2236,
      "step": 12254
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9756014347076416,
      "learning_rate": 0.000595821401904807,
      "loss": 3.0245,
      "step": 12255
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.64046311378479,
      "learning_rate": 0.0005958207215230709,
      "loss": 3.2874,
      "step": 12256
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9516092538833618,
      "learning_rate": 0.0005958200410863361,
      "loss": 3.1737,
      "step": 12257
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7393229007720947,
      "learning_rate": 0.0005958193605946028,
      "loss": 3.3191,
      "step": 12258
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5126631259918213,
      "learning_rate": 0.0005958186800478712,
      "loss": 3.2909,
      "step": 12259
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.21602725982666,
      "learning_rate": 0.0005958179994461412,
      "loss": 3.3534,
      "step": 12260
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3199394941329956,
      "learning_rate": 0.0005958173187894132,
      "loss": 3.1849,
      "step": 12261
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5657752752304077,
      "learning_rate": 0.0005958166380776869,
      "loss": 3.0905,
      "step": 12262
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5269577503204346,
      "learning_rate": 0.0005958159573109629,
      "loss": 3.1915,
      "step": 12263
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.423831820487976,
      "learning_rate": 0.0005958152764892411,
      "loss": 3.3191,
      "step": 12264
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5278736352920532,
      "learning_rate": 0.0005958145956125217,
      "loss": 3.1611,
      "step": 12265
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.206658363342285,
      "learning_rate": 0.0005958139146808047,
      "loss": 2.9829,
      "step": 12266
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.571139931678772,
      "learning_rate": 0.0005958132336940903,
      "loss": 3.0838,
      "step": 12267
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6619625091552734,
      "learning_rate": 0.0005958125526523786,
      "loss": 2.9808,
      "step": 12268
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.08663272857666,
      "learning_rate": 0.0005958118715556699,
      "loss": 3.0963,
      "step": 12269
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4262573719024658,
      "learning_rate": 0.0005958111904039641,
      "loss": 3.1792,
      "step": 12270
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.369842767715454,
      "learning_rate": 0.0005958105091972615,
      "loss": 3.1503,
      "step": 12271
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.678702712059021,
      "learning_rate": 0.000595809827935562,
      "loss": 3.0168,
      "step": 12272
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.781862735748291,
      "learning_rate": 0.000595809146618866,
      "loss": 3.0079,
      "step": 12273
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7277072668075562,
      "learning_rate": 0.0005958084652471735,
      "loss": 3.0796,
      "step": 12274
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4252276420593262,
      "learning_rate": 0.0005958077838204846,
      "loss": 2.9395,
      "step": 12275
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.274188995361328,
      "learning_rate": 0.0005958071023387994,
      "loss": 3.3748,
      "step": 12276
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5478202104568481,
      "learning_rate": 0.000595806420802118,
      "loss": 3.1446,
      "step": 12277
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7258713245391846,
      "learning_rate": 0.0005958057392104407,
      "loss": 2.837,
      "step": 12278
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.797851800918579,
      "learning_rate": 0.0005958050575637676,
      "loss": 3.142,
      "step": 12279
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.2237260341644287,
      "learning_rate": 0.0005958043758620987,
      "loss": 3.0678,
      "step": 12280
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.724990725517273,
      "learning_rate": 0.000595803694105434,
      "loss": 3.0933,
      "step": 12281
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.0286476612091064,
      "learning_rate": 0.0005958030122937742,
      "loss": 2.9332,
      "step": 12282
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.9143314361572266,
      "learning_rate": 0.0005958023304271187,
      "loss": 3.2535,
      "step": 12283
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5304315090179443,
      "learning_rate": 0.0005958016485054682,
      "loss": 3.0759,
      "step": 12284
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7389514446258545,
      "learning_rate": 0.0005958009665288224,
      "loss": 3.2302,
      "step": 12285
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.1716666221618652,
      "learning_rate": 0.0005958002844971817,
      "loss": 3.1607,
      "step": 12286
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6315189599990845,
      "learning_rate": 0.0005957996024105461,
      "loss": 3.2035,
      "step": 12287
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3991999626159668,
      "learning_rate": 0.0005957989202689158,
      "loss": 3.1528,
      "step": 12288
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4569824934005737,
      "learning_rate": 0.0005957982380722909,
      "loss": 3.2325,
      "step": 12289
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5275413990020752,
      "learning_rate": 0.0005957975558206715,
      "loss": 3.022,
      "step": 12290
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6270790100097656,
      "learning_rate": 0.0005957968735140577,
      "loss": 3.188,
      "step": 12291
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.422048568725586,
      "learning_rate": 0.0005957961911524498,
      "loss": 3.0147,
      "step": 12292
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3827484846115112,
      "learning_rate": 0.0005957955087358478,
      "loss": 3.2246,
      "step": 12293
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.558222770690918,
      "learning_rate": 0.0005957948262642518,
      "loss": 3.0036,
      "step": 12294
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6043623685836792,
      "learning_rate": 0.0005957941437376619,
      "loss": 3.2348,
      "step": 12295
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.1074419021606445,
      "learning_rate": 0.0005957934611560783,
      "loss": 3.0694,
      "step": 12296
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6250077486038208,
      "learning_rate": 0.0005957927785195011,
      "loss": 3.2127,
      "step": 12297
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3464411497116089,
      "learning_rate": 0.0005957920958279305,
      "loss": 3.0937,
      "step": 12298
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8190648555755615,
      "learning_rate": 0.0005957914130813666,
      "loss": 3.0975,
      "step": 12299
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.561180830001831,
      "learning_rate": 0.0005957907302798094,
      "loss": 3.0709,
      "step": 12300
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5628701448440552,
      "learning_rate": 0.0005957900474232591,
      "loss": 2.9843,
      "step": 12301
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3150463104248047,
      "learning_rate": 0.000595789364511716,
      "loss": 3.216,
      "step": 12302
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.6765263080596924,
      "learning_rate": 0.00059578868154518,
      "loss": 2.9808,
      "step": 12303
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.691396951675415,
      "learning_rate": 0.0005957879985236512,
      "loss": 3.1021,
      "step": 12304
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7501800060272217,
      "learning_rate": 0.00059578731544713,
      "loss": 3.1721,
      "step": 12305
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.0573878288269043,
      "learning_rate": 0.0005957866323156162,
      "loss": 3.1797,
      "step": 12306
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3030149936676025,
      "learning_rate": 0.0005957859491291101,
      "loss": 2.895,
      "step": 12307
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.1699678897857666,
      "learning_rate": 0.0005957852658876119,
      "loss": 3.0872,
      "step": 12308
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.692830204963684,
      "learning_rate": 0.0005957845825911215,
      "loss": 3.2819,
      "step": 12309
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8224949836730957,
      "learning_rate": 0.0005957838992396394,
      "loss": 3.1248,
      "step": 12310
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6223632097244263,
      "learning_rate": 0.0005957832158331653,
      "loss": 3.1496,
      "step": 12311
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6945600509643555,
      "learning_rate": 0.0005957825323716995,
      "loss": 2.9709,
      "step": 12312
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6899996995925903,
      "learning_rate": 0.0005957818488552422,
      "loss": 3.1717,
      "step": 12313
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5371516942977905,
      "learning_rate": 0.0005957811652837935,
      "loss": 3.3237,
      "step": 12314
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3651039600372314,
      "learning_rate": 0.0005957804816573534,
      "loss": 3.0178,
      "step": 12315
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7829382419586182,
      "learning_rate": 0.0005957797979759222,
      "loss": 3.1273,
      "step": 12316
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.881060004234314,
      "learning_rate": 0.0005957791142395,
      "loss": 3.3519,
      "step": 12317
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5505921840667725,
      "learning_rate": 0.0005957784304480867,
      "loss": 2.9895,
      "step": 12318
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5027209520339966,
      "learning_rate": 0.0005957777466016827,
      "loss": 3.3375,
      "step": 12319
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7942177057266235,
      "learning_rate": 0.0005957770627002881,
      "loss": 3.046,
      "step": 12320
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.3213446140289307,
      "learning_rate": 0.000595776378743903,
      "loss": 3.3222,
      "step": 12321
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7748589515686035,
      "learning_rate": 0.0005957756947325274,
      "loss": 3.2121,
      "step": 12322
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.593808650970459,
      "learning_rate": 0.0005957750106661615,
      "loss": 3.2476,
      "step": 12323
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.350121259689331,
      "learning_rate": 0.0005957743265448054,
      "loss": 3.1387,
      "step": 12324
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.701015830039978,
      "learning_rate": 0.0005957736423684595,
      "loss": 3.2062,
      "step": 12325
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8082307577133179,
      "learning_rate": 0.0005957729581371235,
      "loss": 3.1571,
      "step": 12326
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5796067714691162,
      "learning_rate": 0.0005957722738507978,
      "loss": 3.1751,
      "step": 12327
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5530903339385986,
      "learning_rate": 0.0005957715895094823,
      "loss": 3.2126,
      "step": 12328
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6775621175765991,
      "learning_rate": 0.0005957709051131775,
      "loss": 3.3051,
      "step": 12329
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.524460792541504,
      "learning_rate": 0.0005957702206618831,
      "loss": 2.9593,
      "step": 12330
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9163706302642822,
      "learning_rate": 0.0005957695361555995,
      "loss": 3.0779,
      "step": 12331
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.747339129447937,
      "learning_rate": 0.0005957688515943268,
      "loss": 3.1269,
      "step": 12332
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5124911069869995,
      "learning_rate": 0.0005957681669780652,
      "loss": 3.1275,
      "step": 12333
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7231910228729248,
      "learning_rate": 0.0005957674823068145,
      "loss": 3.2493,
      "step": 12334
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.040874481201172,
      "learning_rate": 0.0005957667975805751,
      "loss": 3.3747,
      "step": 12335
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.81159508228302,
      "learning_rate": 0.0005957661127993471,
      "loss": 3.1034,
      "step": 12336
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8777174949645996,
      "learning_rate": 0.0005957654279631306,
      "loss": 3.1639,
      "step": 12337
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4904791116714478,
      "learning_rate": 0.0005957647430719258,
      "loss": 3.3542,
      "step": 12338
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6825141906738281,
      "learning_rate": 0.0005957640581257326,
      "loss": 2.9468,
      "step": 12339
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7323237657546997,
      "learning_rate": 0.0005957633731245514,
      "loss": 3.3501,
      "step": 12340
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5557764768600464,
      "learning_rate": 0.0005957626880683821,
      "loss": 3.1607,
      "step": 12341
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6453005075454712,
      "learning_rate": 0.0005957620029572251,
      "loss": 3.058,
      "step": 12342
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6784493923187256,
      "learning_rate": 0.0005957613177910801,
      "loss": 3.1321,
      "step": 12343
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.519343614578247,
      "learning_rate": 0.0005957606325699477,
      "loss": 3.1869,
      "step": 12344
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.518075942993164,
      "learning_rate": 0.0005957599472938278,
      "loss": 3.1248,
      "step": 12345
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5628986358642578,
      "learning_rate": 0.0005957592619627205,
      "loss": 3.0558,
      "step": 12346
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6471188068389893,
      "learning_rate": 0.0005957585765766259,
      "loss": 3.0907,
      "step": 12347
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5434514284133911,
      "learning_rate": 0.0005957578911355443,
      "loss": 2.9401,
      "step": 12348
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6707278490066528,
      "learning_rate": 0.0005957572056394758,
      "loss": 3.1796,
      "step": 12349
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4891302585601807,
      "learning_rate": 0.0005957565200884202,
      "loss": 3.3527,
      "step": 12350
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5049242973327637,
      "learning_rate": 0.0005957558344823781,
      "loss": 3.1228,
      "step": 12351
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4778162240982056,
      "learning_rate": 0.0005957551488213493,
      "loss": 3.3113,
      "step": 12352
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.912800669670105,
      "learning_rate": 0.000595754463105334,
      "loss": 3.2227,
      "step": 12353
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.522666096687317,
      "learning_rate": 0.0005957537773343325,
      "loss": 3.0998,
      "step": 12354
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9362833499908447,
      "learning_rate": 0.0005957530915083447,
      "loss": 3.2339,
      "step": 12355
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.074794054031372,
      "learning_rate": 0.0005957524056273708,
      "loss": 3.3324,
      "step": 12356
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7583943605422974,
      "learning_rate": 0.000595751719691411,
      "loss": 2.8916,
      "step": 12357
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9953246116638184,
      "learning_rate": 0.0005957510337004654,
      "loss": 3.2531,
      "step": 12358
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9733294248580933,
      "learning_rate": 0.000595750347654534,
      "loss": 3.1837,
      "step": 12359
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.554516077041626,
      "learning_rate": 0.0005957496615536171,
      "loss": 3.0299,
      "step": 12360
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.648543119430542,
      "learning_rate": 0.0005957489753977147,
      "loss": 3.1604,
      "step": 12361
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.0386300086975098,
      "learning_rate": 0.000595748289186827,
      "loss": 3.1018,
      "step": 12362
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7427653074264526,
      "learning_rate": 0.0005957476029209541,
      "loss": 3.2062,
      "step": 12363
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.106865644454956,
      "learning_rate": 0.0005957469166000963,
      "loss": 2.9757,
      "step": 12364
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3656827211380005,
      "learning_rate": 0.0005957462302242535,
      "loss": 3.2794,
      "step": 12365
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.236724376678467,
      "learning_rate": 0.0005957455437934257,
      "loss": 3.299,
      "step": 12366
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3291736841201782,
      "learning_rate": 0.0005957448573076133,
      "loss": 3.2588,
      "step": 12367
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5464938879013062,
      "learning_rate": 0.0005957441707668164,
      "loss": 3.1892,
      "step": 12368
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.5789456367492676,
      "learning_rate": 0.0005957434841710352,
      "loss": 3.3347,
      "step": 12369
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.4471826553344727,
      "learning_rate": 0.0005957427975202696,
      "loss": 3.064,
      "step": 12370
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3404839038848877,
      "learning_rate": 0.0005957421108145197,
      "loss": 3.0622,
      "step": 12371
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7329939603805542,
      "learning_rate": 0.0005957414240537859,
      "loss": 2.9108,
      "step": 12372
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.83875572681427,
      "learning_rate": 0.0005957407372380681,
      "loss": 3.1322,
      "step": 12373
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8804818391799927,
      "learning_rate": 0.0005957400503673666,
      "loss": 3.2621,
      "step": 12374
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6632657051086426,
      "learning_rate": 0.0005957393634416815,
      "loss": 3.232,
      "step": 12375
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8571308851242065,
      "learning_rate": 0.0005957386764610127,
      "loss": 3.0851,
      "step": 12376
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4425230026245117,
      "learning_rate": 0.0005957379894253606,
      "loss": 3.0307,
      "step": 12377
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4178924560546875,
      "learning_rate": 0.0005957373023347252,
      "loss": 3.3354,
      "step": 12378
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7771174907684326,
      "learning_rate": 0.0005957366151891066,
      "loss": 3.019,
      "step": 12379
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3850317001342773,
      "learning_rate": 0.0005957359279885051,
      "loss": 3.3061,
      "step": 12380
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5033018589019775,
      "learning_rate": 0.0005957352407329207,
      "loss": 3.0785,
      "step": 12381
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.842735767364502,
      "learning_rate": 0.0005957345534223534,
      "loss": 3.1074,
      "step": 12382
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.0075902938842773,
      "learning_rate": 0.0005957338660568036,
      "loss": 3.1173,
      "step": 12383
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.509217619895935,
      "learning_rate": 0.0005957331786362712,
      "loss": 3.0717,
      "step": 12384
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5602582693099976,
      "learning_rate": 0.0005957324911607565,
      "loss": 3.0564,
      "step": 12385
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6341012716293335,
      "learning_rate": 0.0005957318036302594,
      "loss": 2.974,
      "step": 12386
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8229693174362183,
      "learning_rate": 0.0005957311160447803,
      "loss": 3.0521,
      "step": 12387
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4852898120880127,
      "learning_rate": 0.0005957304284043192,
      "loss": 3.0106,
      "step": 12388
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9100786447525024,
      "learning_rate": 0.0005957297407088762,
      "loss": 3.1877,
      "step": 12389
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8011454343795776,
      "learning_rate": 0.0005957290529584515,
      "loss": 3.075,
      "step": 12390
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.459953784942627,
      "learning_rate": 0.0005957283651530452,
      "loss": 3.1091,
      "step": 12391
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.289303183555603,
      "learning_rate": 0.0005957276772926574,
      "loss": 3.2776,
      "step": 12392
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.723504900932312,
      "learning_rate": 0.0005957269893772882,
      "loss": 3.1746,
      "step": 12393
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7344375848770142,
      "learning_rate": 0.0005957263014069377,
      "loss": 3.0751,
      "step": 12394
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.417320728302002,
      "learning_rate": 0.0005957256133816062,
      "loss": 3.2589,
      "step": 12395
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6061807870864868,
      "learning_rate": 0.0005957249253012938,
      "loss": 3.1585,
      "step": 12396
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.544826865196228,
      "learning_rate": 0.0005957242371660003,
      "loss": 3.0366,
      "step": 12397
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3677823543548584,
      "learning_rate": 0.0005957235489757264,
      "loss": 3.1417,
      "step": 12398
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.236414909362793,
      "learning_rate": 0.0005957228607304717,
      "loss": 3.3144,
      "step": 12399
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4938243627548218,
      "learning_rate": 0.0005957221724302366,
      "loss": 3.2757,
      "step": 12400
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.7756288051605225,
      "learning_rate": 0.0005957214840750212,
      "loss": 3.0304,
      "step": 12401
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7639415264129639,
      "learning_rate": 0.0005957207956648255,
      "loss": 3.2321,
      "step": 12402
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4150657653808594,
      "learning_rate": 0.0005957201071996498,
      "loss": 3.0889,
      "step": 12403
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6521371603012085,
      "learning_rate": 0.0005957194186794942,
      "loss": 3.3833,
      "step": 12404
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.187671422958374,
      "learning_rate": 0.0005957187301043586,
      "loss": 2.8872,
      "step": 12405
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.996972918510437,
      "learning_rate": 0.0005957180414742435,
      "loss": 3.1929,
      "step": 12406
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.712078332901001,
      "learning_rate": 0.0005957173527891486,
      "loss": 3.1246,
      "step": 12407
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.600027561187744,
      "learning_rate": 0.0005957166640490746,
      "loss": 3.2873,
      "step": 12408
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.864917039871216,
      "learning_rate": 0.000595715975254021,
      "loss": 3.1077,
      "step": 12409
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4224746227264404,
      "learning_rate": 0.0005957152864039883,
      "loss": 3.2296,
      "step": 12410
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.544264316558838,
      "learning_rate": 0.0005957145974989765,
      "loss": 3.2379,
      "step": 12411
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.5670368671417236,
      "learning_rate": 0.0005957139085389859,
      "loss": 3.3313,
      "step": 12412
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.417667031288147,
      "learning_rate": 0.0005957132195240164,
      "loss": 3.203,
      "step": 12413
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.429882526397705,
      "learning_rate": 0.0005957125304540683,
      "loss": 3.3776,
      "step": 12414
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8340034484863281,
      "learning_rate": 0.0005957118413291416,
      "loss": 3.1089,
      "step": 12415
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9897360801696777,
      "learning_rate": 0.0005957111521492365,
      "loss": 2.8783,
      "step": 12416
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7839428186416626,
      "learning_rate": 0.0005957104629143532,
      "loss": 2.9967,
      "step": 12417
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.902071475982666,
      "learning_rate": 0.0005957097736244917,
      "loss": 3.1027,
      "step": 12418
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.1364853382110596,
      "learning_rate": 0.0005957090842796521,
      "loss": 3.0963,
      "step": 12419
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.8833751678466797,
      "learning_rate": 0.0005957083948798347,
      "loss": 3.3953,
      "step": 12420
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.691728353500366,
      "learning_rate": 0.0005957077054250394,
      "loss": 3.1835,
      "step": 12421
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.408365249633789,
      "learning_rate": 0.0005957070159152665,
      "loss": 3.264,
      "step": 12422
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.875623106956482,
      "learning_rate": 0.0005957063263505162,
      "loss": 3.2156,
      "step": 12423
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.3797314167022705,
      "learning_rate": 0.0005957056367307883,
      "loss": 3.1201,
      "step": 12424
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.0156025886535645,
      "learning_rate": 0.0005957049470560834,
      "loss": 3.1688,
      "step": 12425
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7457703351974487,
      "learning_rate": 0.0005957042573264011,
      "loss": 3.1906,
      "step": 12426
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.161919593811035,
      "learning_rate": 0.000595703567541742,
      "loss": 2.9321,
      "step": 12427
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.351860523223877,
      "learning_rate": 0.0005957028777021059,
      "loss": 3.189,
      "step": 12428
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8242909908294678,
      "learning_rate": 0.0005957021878074931,
      "loss": 3.1197,
      "step": 12429
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.036802291870117,
      "learning_rate": 0.0005957014978579037,
      "loss": 3.3077,
      "step": 12430
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.26064395904541,
      "learning_rate": 0.0005957008078533378,
      "loss": 2.9909,
      "step": 12431
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.469101905822754,
      "learning_rate": 0.0005957001177937956,
      "loss": 3.2642,
      "step": 12432
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3903695344924927,
      "learning_rate": 0.0005956994276792771,
      "loss": 3.2915,
      "step": 12433
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4832446575164795,
      "learning_rate": 0.0005956987375097825,
      "loss": 3.113,
      "step": 12434
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.127664089202881,
      "learning_rate": 0.0005956980472853119,
      "loss": 3.3706,
      "step": 12435
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5975934267044067,
      "learning_rate": 0.0005956973570058654,
      "loss": 3.1162,
      "step": 12436
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8582038879394531,
      "learning_rate": 0.0005956966666714433,
      "loss": 3.2413,
      "step": 12437
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.913603663444519,
      "learning_rate": 0.0005956959762820455,
      "loss": 3.1313,
      "step": 12438
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.701835036277771,
      "learning_rate": 0.0005956952858376724,
      "loss": 3.1569,
      "step": 12439
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7017796039581299,
      "learning_rate": 0.0005956945953383238,
      "loss": 3.1074,
      "step": 12440
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.816645860671997,
      "learning_rate": 0.000595693904784,
      "loss": 3.3282,
      "step": 12441
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.693204402923584,
      "learning_rate": 0.0005956932141747012,
      "loss": 2.8019,
      "step": 12442
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.388903260231018,
      "learning_rate": 0.0005956925235104275,
      "loss": 3.028,
      "step": 12443
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5276950597763062,
      "learning_rate": 0.0005956918327911789,
      "loss": 3.0891,
      "step": 12444
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7679064273834229,
      "learning_rate": 0.0005956911420169556,
      "loss": 3.1117,
      "step": 12445
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.5669443607330322,
      "learning_rate": 0.0005956904511877577,
      "loss": 3.0612,
      "step": 12446
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5475172996520996,
      "learning_rate": 0.0005956897603035853,
      "loss": 3.0324,
      "step": 12447
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7877129316329956,
      "learning_rate": 0.0005956890693644388,
      "loss": 3.2618,
      "step": 12448
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3854447603225708,
      "learning_rate": 0.000595688378370318,
      "loss": 2.9943,
      "step": 12449
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3019373416900635,
      "learning_rate": 0.0005956876873212232,
      "loss": 3.0966,
      "step": 12450
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.533246636390686,
      "learning_rate": 0.0005956869962171545,
      "loss": 3.0247,
      "step": 12451
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2307950258255005,
      "learning_rate": 0.0005956863050581119,
      "loss": 3.0509,
      "step": 12452
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5546597242355347,
      "learning_rate": 0.0005956856138440957,
      "loss": 3.3363,
      "step": 12453
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3496520519256592,
      "learning_rate": 0.0005956849225751059,
      "loss": 3.1724,
      "step": 12454
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3741527795791626,
      "learning_rate": 0.0005956842312511428,
      "loss": 3.1173,
      "step": 12455
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5151491165161133,
      "learning_rate": 0.0005956835398722064,
      "loss": 3.3748,
      "step": 12456
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.47719144821167,
      "learning_rate": 0.0005956828484382968,
      "loss": 3.0457,
      "step": 12457
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4684686660766602,
      "learning_rate": 0.0005956821569494141,
      "loss": 3.2524,
      "step": 12458
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3836848735809326,
      "learning_rate": 0.0005956814654055586,
      "loss": 3.0412,
      "step": 12459
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6110337972640991,
      "learning_rate": 0.0005956807738067303,
      "loss": 3.3585,
      "step": 12460
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5608354806900024,
      "learning_rate": 0.0005956800821529294,
      "loss": 3.1556,
      "step": 12461
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6537647247314453,
      "learning_rate": 0.000595679390444156,
      "loss": 3.1263,
      "step": 12462
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4931575059890747,
      "learning_rate": 0.0005956786986804102,
      "loss": 3.1847,
      "step": 12463
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4707155227661133,
      "learning_rate": 0.0005956780068616922,
      "loss": 3.0831,
      "step": 12464
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2861058712005615,
      "learning_rate": 0.0005956773149880019,
      "loss": 3.3351,
      "step": 12465
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6253331899642944,
      "learning_rate": 0.0005956766230593398,
      "loss": 3.1652,
      "step": 12466
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6866471767425537,
      "learning_rate": 0.0005956759310757057,
      "loss": 3.4746,
      "step": 12467
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9929176568984985,
      "learning_rate": 0.0005956752390371,
      "loss": 3.2947,
      "step": 12468
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.837607502937317,
      "learning_rate": 0.0005956745469435226,
      "loss": 3.1894,
      "step": 12469
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4246915578842163,
      "learning_rate": 0.0005956738547949738,
      "loss": 2.8099,
      "step": 12470
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7537145614624023,
      "learning_rate": 0.0005956731625914534,
      "loss": 3.1666,
      "step": 12471
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6541671752929688,
      "learning_rate": 0.000595672470332962,
      "loss": 3.1538,
      "step": 12472
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6724358797073364,
      "learning_rate": 0.0005956717780194996,
      "loss": 3.2166,
      "step": 12473
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3497748374938965,
      "learning_rate": 0.0005956710856510661,
      "loss": 3.2759,
      "step": 12474
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.530811071395874,
      "learning_rate": 0.0005956703932276618,
      "loss": 3.2808,
      "step": 12475
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.918839931488037,
      "learning_rate": 0.0005956697007492867,
      "loss": 3.1015,
      "step": 12476
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4150398969650269,
      "learning_rate": 0.0005956690082159411,
      "loss": 3.1182,
      "step": 12477
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.1020286083221436,
      "learning_rate": 0.0005956683156276251,
      "loss": 3.2416,
      "step": 12478
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8092122077941895,
      "learning_rate": 0.0005956676229843386,
      "loss": 3.0497,
      "step": 12479
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4787458181381226,
      "learning_rate": 0.0005956669302860821,
      "loss": 3.206,
      "step": 12480
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.649857521057129,
      "learning_rate": 0.0005956662375328555,
      "loss": 3.2369,
      "step": 12481
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4569158554077148,
      "learning_rate": 0.0005956655447246588,
      "loss": 3.2123,
      "step": 12482
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.878880262374878,
      "learning_rate": 0.0005956648518614925,
      "loss": 3.219,
      "step": 12483
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7350051403045654,
      "learning_rate": 0.0005956641589433564,
      "loss": 3.1576,
      "step": 12484
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.1142334938049316,
      "learning_rate": 0.0005956634659702508,
      "loss": 3.2272,
      "step": 12485
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9816479682922363,
      "learning_rate": 0.0005956627729421758,
      "loss": 3.0245,
      "step": 12486
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9686741828918457,
      "learning_rate": 0.0005956620798591315,
      "loss": 3.2024,
      "step": 12487
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5601609945297241,
      "learning_rate": 0.000595661386721118,
      "loss": 3.2426,
      "step": 12488
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5043495893478394,
      "learning_rate": 0.0005956606935281354,
      "loss": 3.0521,
      "step": 12489
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.776275873184204,
      "learning_rate": 0.000595660000280184,
      "loss": 3.295,
      "step": 12490
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7186001539230347,
      "learning_rate": 0.0005956593069772639,
      "loss": 3.3426,
      "step": 12491
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.812922716140747,
      "learning_rate": 0.000595658613619375,
      "loss": 3.0189,
      "step": 12492
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.16377592086792,
      "learning_rate": 0.0005956579202065176,
      "loss": 2.8635,
      "step": 12493
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.974461317062378,
      "learning_rate": 0.0005956572267386919,
      "loss": 3.1248,
      "step": 12494
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.8472280502319336,
      "learning_rate": 0.0005956565332158979,
      "loss": 3.0866,
      "step": 12495
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5627803802490234,
      "learning_rate": 0.0005956558396381358,
      "loss": 3.2113,
      "step": 12496
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5679126977920532,
      "learning_rate": 0.0005956551460054056,
      "loss": 2.9938,
      "step": 12497
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7614370584487915,
      "learning_rate": 0.0005956544523177076,
      "loss": 3.002,
      "step": 12498
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7909088134765625,
      "learning_rate": 0.000595653758575042,
      "loss": 3.1042,
      "step": 12499
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4809435606002808,
      "learning_rate": 0.0005956530647774085,
      "loss": 2.9104,
      "step": 12500
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.665994167327881,
      "learning_rate": 0.0005956523709248077,
      "loss": 3.212,
      "step": 12501
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3071658611297607,
      "learning_rate": 0.0005956516770172395,
      "loss": 2.8995,
      "step": 12502
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6393487453460693,
      "learning_rate": 0.000595650983054704,
      "loss": 3.0818,
      "step": 12503
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9765547513961792,
      "learning_rate": 0.0005956502890372015,
      "loss": 3.0885,
      "step": 12504
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5817898511886597,
      "learning_rate": 0.0005956495949647321,
      "loss": 3.2879,
      "step": 12505
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.408114433288574,
      "learning_rate": 0.0005956489008372957,
      "loss": 2.9634,
      "step": 12506
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9632091522216797,
      "learning_rate": 0.0005956482066548926,
      "loss": 3.0198,
      "step": 12507
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4892969131469727,
      "learning_rate": 0.0005956475124175231,
      "loss": 3.0838,
      "step": 12508
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.2993364334106445,
      "learning_rate": 0.0005956468181251869,
      "loss": 3.1105,
      "step": 12509
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.170039176940918,
      "learning_rate": 0.0005956461237778846,
      "loss": 3.0481,
      "step": 12510
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8301315307617188,
      "learning_rate": 0.0005956454293756159,
      "loss": 3.2125,
      "step": 12511
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7434977293014526,
      "learning_rate": 0.0005956447349183812,
      "loss": 3.122,
      "step": 12512
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.151615619659424,
      "learning_rate": 0.0005956440404061806,
      "loss": 3.2171,
      "step": 12513
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.0049386024475098,
      "learning_rate": 0.0005956433458390142,
      "loss": 3.3559,
      "step": 12514
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.720933437347412,
      "learning_rate": 0.000595642651216882,
      "loss": 3.0964,
      "step": 12515
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.0632991790771484,
      "learning_rate": 0.0005956419565397845,
      "loss": 3.3342,
      "step": 12516
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.1069915294647217,
      "learning_rate": 0.0005956412618077214,
      "loss": 3.1533,
      "step": 12517
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.435201644897461,
      "learning_rate": 0.0005956405670206929,
      "loss": 2.9343,
      "step": 12518
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.018731117248535,
      "learning_rate": 0.0005956398721786995,
      "loss": 3.2607,
      "step": 12519
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8191190958023071,
      "learning_rate": 0.000595639177281741,
      "loss": 3.0267,
      "step": 12520
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.5829737186431885,
      "learning_rate": 0.0005956384823298175,
      "loss": 2.9223,
      "step": 12521
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7312062978744507,
      "learning_rate": 0.0005956377873229293,
      "loss": 3.3416,
      "step": 12522
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.145925521850586,
      "learning_rate": 0.0005956370922610764,
      "loss": 3.0823,
      "step": 12523
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7454922199249268,
      "learning_rate": 0.000595636397144259,
      "loss": 3.0747,
      "step": 12524
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2792385816574097,
      "learning_rate": 0.0005956357019724772,
      "loss": 3.0481,
      "step": 12525
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6801090240478516,
      "learning_rate": 0.0005956350067457312,
      "loss": 3.1441,
      "step": 12526
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3662898540496826,
      "learning_rate": 0.000595634311464021,
      "loss": 3.1922,
      "step": 12527
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.073765277862549,
      "learning_rate": 0.0005956336161273469,
      "loss": 3.0153,
      "step": 12528
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5518771409988403,
      "learning_rate": 0.0005956329207357088,
      "loss": 3.1627,
      "step": 12529
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6637450456619263,
      "learning_rate": 0.000595632225289107,
      "loss": 3.2329,
      "step": 12530
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5305403470993042,
      "learning_rate": 0.0005956315297875416,
      "loss": 3.2298,
      "step": 12531
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5717518329620361,
      "learning_rate": 0.0005956308342310128,
      "loss": 3.089,
      "step": 12532
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4071508646011353,
      "learning_rate": 0.0005956301386195205,
      "loss": 3.0796,
      "step": 12533
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8894848823547363,
      "learning_rate": 0.0005956294429530651,
      "loss": 3.0217,
      "step": 12534
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8891544342041016,
      "learning_rate": 0.0005956287472316465,
      "loss": 3.0472,
      "step": 12535
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.1694822311401367,
      "learning_rate": 0.000595628051455265,
      "loss": 3.1108,
      "step": 12536
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7595555782318115,
      "learning_rate": 0.0005956273556239208,
      "loss": 3.0166,
      "step": 12537
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3166017532348633,
      "learning_rate": 0.0005956266597376136,
      "loss": 3.1712,
      "step": 12538
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6879491806030273,
      "learning_rate": 0.000595625963796344,
      "loss": 2.9998,
      "step": 12539
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3743975162506104,
      "learning_rate": 0.000595625267800112,
      "loss": 3.0019,
      "step": 12540
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6505217552185059,
      "learning_rate": 0.0005956245717489176,
      "loss": 3.1635,
      "step": 12541
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6203113794326782,
      "learning_rate": 0.000595623875642761,
      "loss": 3.1967,
      "step": 12542
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9888676404953003,
      "learning_rate": 0.0005956231794816424,
      "loss": 3.025,
      "step": 12543
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7475848197937012,
      "learning_rate": 0.0005956224832655618,
      "loss": 3.0678,
      "step": 12544
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4900524616241455,
      "learning_rate": 0.0005956217869945194,
      "loss": 3.0681,
      "step": 12545
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.635964035987854,
      "learning_rate": 0.0005956210906685154,
      "loss": 3.3228,
      "step": 12546
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4870537519454956,
      "learning_rate": 0.0005956203942875498,
      "loss": 3.1538,
      "step": 12547
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6416407823562622,
      "learning_rate": 0.0005956196978516229,
      "loss": 2.9342,
      "step": 12548
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5265127420425415,
      "learning_rate": 0.0005956190013607346,
      "loss": 3.1632,
      "step": 12549
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.701888918876648,
      "learning_rate": 0.0005956183048148853,
      "loss": 3.1182,
      "step": 12550
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.658996343612671,
      "learning_rate": 0.0005956176082140747,
      "loss": 3.0971,
      "step": 12551
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5210609436035156,
      "learning_rate": 0.0005956169115583033,
      "loss": 3.2226,
      "step": 12552
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.839483380317688,
      "learning_rate": 0.0005956162148475714,
      "loss": 3.1136,
      "step": 12553
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.819859504699707,
      "learning_rate": 0.0005956155180818786,
      "loss": 3.0052,
      "step": 12554
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7389496564865112,
      "learning_rate": 0.0005956148212612253,
      "loss": 3.1114,
      "step": 12555
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5019686222076416,
      "learning_rate": 0.0005956141243856117,
      "loss": 3.1027,
      "step": 12556
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.444522500038147,
      "learning_rate": 0.0005956134274550379,
      "loss": 3.2175,
      "step": 12557
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.524811863899231,
      "learning_rate": 0.0005956127304695039,
      "loss": 3.1367,
      "step": 12558
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7247200012207031,
      "learning_rate": 0.0005956120334290099,
      "loss": 3.301,
      "step": 12559
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7560820579528809,
      "learning_rate": 0.0005956113363335561,
      "loss": 3.2521,
      "step": 12560
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4352458715438843,
      "learning_rate": 0.0005956106391831425,
      "loss": 3.3186,
      "step": 12561
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3471956253051758,
      "learning_rate": 0.0005956099419777694,
      "loss": 3.1439,
      "step": 12562
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.625998854637146,
      "learning_rate": 0.0005956092447174368,
      "loss": 3.2079,
      "step": 12563
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4771873950958252,
      "learning_rate": 0.0005956085474021448,
      "loss": 2.9791,
      "step": 12564
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.434861421585083,
      "learning_rate": 0.0005956078500318937,
      "loss": 2.924,
      "step": 12565
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5123169422149658,
      "learning_rate": 0.0005956071526066834,
      "loss": 3.3438,
      "step": 12566
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5647913217544556,
      "learning_rate": 0.0005956064551265143,
      "loss": 3.2557,
      "step": 12567
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6308358907699585,
      "learning_rate": 0.0005956057575913862,
      "loss": 3.2241,
      "step": 12568
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5917867422103882,
      "learning_rate": 0.0005956050600012995,
      "loss": 3.1222,
      "step": 12569
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5728658437728882,
      "learning_rate": 0.0005956043623562543,
      "loss": 3.031,
      "step": 12570
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5761991739273071,
      "learning_rate": 0.0005956036646562506,
      "loss": 3.0843,
      "step": 12571
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5894567966461182,
      "learning_rate": 0.0005956029669012885,
      "loss": 3.2109,
      "step": 12572
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4648327827453613,
      "learning_rate": 0.0005956022690913684,
      "loss": 3.1458,
      "step": 12573
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.709539532661438,
      "learning_rate": 0.0005956015712264902,
      "loss": 3.0082,
      "step": 12574
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.833335280418396,
      "learning_rate": 0.000595600873306654,
      "loss": 2.9777,
      "step": 12575
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.474873423576355,
      "learning_rate": 0.0005956001753318601,
      "loss": 3.4003,
      "step": 12576
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9479613304138184,
      "learning_rate": 0.0005955994773021087,
      "loss": 3.0678,
      "step": 12577
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.5366358757019043,
      "learning_rate": 0.0005955987792173995,
      "loss": 3.0506,
      "step": 12578
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5198811292648315,
      "learning_rate": 0.0005955980810777331,
      "loss": 3.2601,
      "step": 12579
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6016887426376343,
      "learning_rate": 0.0005955973828831093,
      "loss": 3.156,
      "step": 12580
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.3594248294830322,
      "learning_rate": 0.0005955966846335285,
      "loss": 3.2621,
      "step": 12581
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4760839939117432,
      "learning_rate": 0.0005955959863289906,
      "loss": 3.2479,
      "step": 12582
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.902755856513977,
      "learning_rate": 0.0005955952879694959,
      "loss": 3.1823,
      "step": 12583
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.314004421234131,
      "learning_rate": 0.0005955945895550444,
      "loss": 3.0017,
      "step": 12584
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.243788480758667,
      "learning_rate": 0.0005955938910856362,
      "loss": 3.2818,
      "step": 12585
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5374205112457275,
      "learning_rate": 0.0005955931925612717,
      "loss": 3.2758,
      "step": 12586
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.9192240238189697,
      "learning_rate": 0.0005955924939819508,
      "loss": 3.1453,
      "step": 12587
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8247747421264648,
      "learning_rate": 0.0005955917953476736,
      "loss": 3.0417,
      "step": 12588
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8827780485153198,
      "learning_rate": 0.0005955910966584403,
      "loss": 3.2036,
      "step": 12589
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.0587525367736816,
      "learning_rate": 0.0005955903979142511,
      "loss": 2.8468,
      "step": 12590
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5088666677474976,
      "learning_rate": 0.0005955896991151061,
      "loss": 3.2459,
      "step": 12591
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2870668172836304,
      "learning_rate": 0.0005955890002610053,
      "loss": 3.1368,
      "step": 12592
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3636120557785034,
      "learning_rate": 0.0005955883013519489,
      "loss": 3.2799,
      "step": 12593
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4014875888824463,
      "learning_rate": 0.0005955876023879372,
      "loss": 2.7855,
      "step": 12594
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4432039260864258,
      "learning_rate": 0.0005955869033689701,
      "loss": 3.3371,
      "step": 12595
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4148900508880615,
      "learning_rate": 0.0005955862042950477,
      "loss": 3.089,
      "step": 12596
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.252100944519043,
      "learning_rate": 0.0005955855051661704,
      "loss": 3.0164,
      "step": 12597
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4266152381896973,
      "learning_rate": 0.0005955848059823382,
      "loss": 3.0321,
      "step": 12598
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2446051836013794,
      "learning_rate": 0.0005955841067435511,
      "loss": 3.2623,
      "step": 12599
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6116976737976074,
      "learning_rate": 0.0005955834074498095,
      "loss": 3.2727,
      "step": 12600
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4712270498275757,
      "learning_rate": 0.0005955827081011131,
      "loss": 3.0695,
      "step": 12601
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.391943097114563,
      "learning_rate": 0.0005955820086974626,
      "loss": 3.1374,
      "step": 12602
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7262821197509766,
      "learning_rate": 0.0005955813092388576,
      "loss": 2.8891,
      "step": 12603
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4676002264022827,
      "learning_rate": 0.0005955806097252985,
      "loss": 3.1439,
      "step": 12604
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9708421230316162,
      "learning_rate": 0.0005955799101567854,
      "loss": 3.2785,
      "step": 12605
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4463578462600708,
      "learning_rate": 0.0005955792105333184,
      "loss": 3.1771,
      "step": 12606
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4928582906723022,
      "learning_rate": 0.0005955785108548976,
      "loss": 3.2742,
      "step": 12607
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4886362552642822,
      "learning_rate": 0.0005955778111215233,
      "loss": 3.0335,
      "step": 12608
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.0747532844543457,
      "learning_rate": 0.0005955771113331955,
      "loss": 3.326,
      "step": 12609
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.206202745437622,
      "learning_rate": 0.0005955764114899142,
      "loss": 3.1856,
      "step": 12610
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.584429383277893,
      "learning_rate": 0.0005955757115916797,
      "loss": 3.0755,
      "step": 12611
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.000241994857788,
      "learning_rate": 0.0005955750116384921,
      "loss": 3.104,
      "step": 12612
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6808232069015503,
      "learning_rate": 0.0005955743116303517,
      "loss": 3.0797,
      "step": 12613
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4902867078781128,
      "learning_rate": 0.0005955736115672583,
      "loss": 3.1353,
      "step": 12614
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6064420938491821,
      "learning_rate": 0.0005955729114492123,
      "loss": 2.9245,
      "step": 12615
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4582425355911255,
      "learning_rate": 0.0005955722112762136,
      "loss": 3.3792,
      "step": 12616
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3583251237869263,
      "learning_rate": 0.0005955715110482624,
      "loss": 3.0223,
      "step": 12617
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.454750657081604,
      "learning_rate": 0.000595570810765359,
      "loss": 3.1022,
      "step": 12618
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.786012053489685,
      "learning_rate": 0.0005955701104275033,
      "loss": 3.0288,
      "step": 12619
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.524124026298523,
      "learning_rate": 0.0005955694100346957,
      "loss": 3.2755,
      "step": 12620
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4408897161483765,
      "learning_rate": 0.000595568709586936,
      "loss": 3.1784,
      "step": 12621
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.70353102684021,
      "learning_rate": 0.0005955680090842246,
      "loss": 3.3304,
      "step": 12622
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.018578290939331,
      "learning_rate": 0.0005955673085265615,
      "loss": 3.255,
      "step": 12623
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5012141466140747,
      "learning_rate": 0.0005955666079139467,
      "loss": 2.9618,
      "step": 12624
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.919034242630005,
      "learning_rate": 0.0005955659072463807,
      "loss": 3.1007,
      "step": 12625
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.2915360927581787,
      "learning_rate": 0.0005955652065238634,
      "loss": 3.0073,
      "step": 12626
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.0869247913360596,
      "learning_rate": 0.0005955645057463948,
      "loss": 3.0269,
      "step": 12627
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.3087711334228516,
      "learning_rate": 0.0005955638049139753,
      "loss": 3.064,
      "step": 12628
    },
    {
      "epoch": 0.16,
      "grad_norm": 4.0484185218811035,
      "learning_rate": 0.0005955631040266048,
      "loss": 3.1458,
      "step": 12629
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.4866673946380615,
      "learning_rate": 0.0005955624030842837,
      "loss": 3.0926,
      "step": 12630
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7480522394180298,
      "learning_rate": 0.0005955617020870119,
      "loss": 3.3832,
      "step": 12631
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.067664623260498,
      "learning_rate": 0.0005955610010347896,
      "loss": 3.0143,
      "step": 12632
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.545951247215271,
      "learning_rate": 0.0005955602999276169,
      "loss": 3.1516,
      "step": 12633
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2908852100372314,
      "learning_rate": 0.0005955595987654941,
      "loss": 3.22,
      "step": 12634
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4770771265029907,
      "learning_rate": 0.000595558897548421,
      "loss": 3.0763,
      "step": 12635
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.4208076000213623,
      "learning_rate": 0.0005955581962763981,
      "loss": 3.1155,
      "step": 12636
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.0479421615600586,
      "learning_rate": 0.0005955574949494252,
      "loss": 3.053,
      "step": 12637
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7254530191421509,
      "learning_rate": 0.0005955567935675026,
      "loss": 3.3984,
      "step": 12638
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.49653959274292,
      "learning_rate": 0.0005955560921306305,
      "loss": 3.098,
      "step": 12639
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.125950813293457,
      "learning_rate": 0.000595555390638809,
      "loss": 2.8846,
      "step": 12640
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3603909015655518,
      "learning_rate": 0.000595554689092038,
      "loss": 3.1259,
      "step": 12641
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3194353580474854,
      "learning_rate": 0.0005955539874903178,
      "loss": 3.2253,
      "step": 12642
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3855085372924805,
      "learning_rate": 0.0005955532858336486,
      "loss": 3.0867,
      "step": 12643
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.8622689247131348,
      "learning_rate": 0.0005955525841220304,
      "loss": 3.1876,
      "step": 12644
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.811206340789795,
      "learning_rate": 0.0005955518823554636,
      "loss": 3.1242,
      "step": 12645
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.4203503131866455,
      "learning_rate": 0.000595551180533948,
      "loss": 2.9752,
      "step": 12646
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.5798683166503906,
      "learning_rate": 0.0005955504786574838,
      "loss": 3.2213,
      "step": 12647
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.5445096492767334,
      "learning_rate": 0.0005955497767260713,
      "loss": 3.081,
      "step": 12648
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.906723976135254,
      "learning_rate": 0.0005955490747397104,
      "loss": 3.1107,
      "step": 12649
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.4235928058624268,
      "learning_rate": 0.0005955483726984014,
      "loss": 3.2472,
      "step": 12650
    },
    {
      "epoch": 0.16,
      "grad_norm": 3.592804193496704,
      "learning_rate": 0.0005955476706021444,
      "loss": 3.2008,
      "step": 12651
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7674850225448608,
      "learning_rate": 0.0005955469684509395,
      "loss": 3.1568,
      "step": 12652
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.982640266418457,
      "learning_rate": 0.0005955462662447868,
      "loss": 3.2226,
      "step": 12653
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4441026449203491,
      "learning_rate": 0.0005955455639836865,
      "loss": 3.1718,
      "step": 12654
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.240105390548706,
      "learning_rate": 0.0005955448616676388,
      "loss": 2.71,
      "step": 12655
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.938244342803955,
      "learning_rate": 0.0005955441592966435,
      "loss": 3.2079,
      "step": 12656
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6422603130340576,
      "learning_rate": 0.0005955434568707012,
      "loss": 2.9773,
      "step": 12657
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8966785669326782,
      "learning_rate": 0.0005955427543898116,
      "loss": 3.0,
      "step": 12658
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.354844570159912,
      "learning_rate": 0.0005955420518539751,
      "loss": 3.0978,
      "step": 12659
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.4365214109420776,
      "learning_rate": 0.0005955413492631918,
      "loss": 3.2329,
      "step": 12660
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.655714988708496,
      "learning_rate": 0.0005955406466174617,
      "loss": 3.1437,
      "step": 12661
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5151972770690918,
      "learning_rate": 0.0005955399439167852,
      "loss": 3.1286,
      "step": 12662
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2277621030807495,
      "learning_rate": 0.000595539241161162,
      "loss": 3.1591,
      "step": 12663
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.5581932067871094,
      "learning_rate": 0.0005955385383505927,
      "loss": 3.0015,
      "step": 12664
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.3816975355148315,
      "learning_rate": 0.000595537835485077,
      "loss": 3.0849,
      "step": 12665
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7063847780227661,
      "learning_rate": 0.0005955371325646154,
      "loss": 3.2116,
      "step": 12666
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.7400134801864624,
      "learning_rate": 0.0005955364295892078,
      "loss": 2.9671,
      "step": 12667
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.1480565071105957,
      "learning_rate": 0.0005955357265588543,
      "loss": 3.2488,
      "step": 12668
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.9746599197387695,
      "learning_rate": 0.0005955350234735554,
      "loss": 3.1814,
      "step": 12669
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.6736751794815063,
      "learning_rate": 0.0005955343203333107,
      "loss": 3.4819,
      "step": 12670
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.8608644008636475,
      "learning_rate": 0.0005955336171381206,
      "loss": 3.257,
      "step": 12671
    },
    {
      "epoch": 0.16,
      "grad_norm": 2.506425142288208,
      "learning_rate": 0.0005955329138879854,
      "loss": 2.8191,
      "step": 12672
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6960927248001099,
      "learning_rate": 0.0005955322105829049,
      "loss": 3.2839,
      "step": 12673
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4878137111663818,
      "learning_rate": 0.0005955315072228794,
      "loss": 3.1663,
      "step": 12674
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7648749351501465,
      "learning_rate": 0.0005955308038079092,
      "loss": 2.9383,
      "step": 12675
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6747711896896362,
      "learning_rate": 0.000595530100337994,
      "loss": 3.2219,
      "step": 12676
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8257850408554077,
      "learning_rate": 0.0005955293968131343,
      "loss": 2.9374,
      "step": 12677
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.558590054512024,
      "learning_rate": 0.00059552869323333,
      "loss": 3.3231,
      "step": 12678
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.592357873916626,
      "learning_rate": 0.0005955279895985815,
      "loss": 3.0867,
      "step": 12679
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6686046123504639,
      "learning_rate": 0.0005955272859088887,
      "loss": 2.8575,
      "step": 12680
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7237955331802368,
      "learning_rate": 0.0005955265821642518,
      "loss": 3.2894,
      "step": 12681
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4948840141296387,
      "learning_rate": 0.0005955258783646709,
      "loss": 3.1114,
      "step": 12682
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9719222784042358,
      "learning_rate": 0.0005955251745101461,
      "loss": 3.0804,
      "step": 12683
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.6948368549346924,
      "learning_rate": 0.0005955244706006777,
      "loss": 3.263,
      "step": 12684
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.6409239768981934,
      "learning_rate": 0.0005955237666362657,
      "loss": 3.0986,
      "step": 12685
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.1159558296203613,
      "learning_rate": 0.0005955230626169103,
      "loss": 2.9311,
      "step": 12686
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.422372579574585,
      "learning_rate": 0.0005955223585426115,
      "loss": 3.1763,
      "step": 12687
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.2046220302581787,
      "learning_rate": 0.0005955216544133695,
      "loss": 3.0841,
      "step": 12688
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3551567792892456,
      "learning_rate": 0.0005955209502291845,
      "loss": 3.236,
      "step": 12689
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.07271146774292,
      "learning_rate": 0.0005955202459900565,
      "loss": 3.0913,
      "step": 12690
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.3369014263153076,
      "learning_rate": 0.0005955195416959858,
      "loss": 3.27,
      "step": 12691
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.3579280376434326,
      "learning_rate": 0.0005955188373469724,
      "loss": 3.1679,
      "step": 12692
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4146842956542969,
      "learning_rate": 0.0005955181329430165,
      "loss": 3.0586,
      "step": 12693
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.3735909461975098,
      "learning_rate": 0.0005955174284841182,
      "loss": 3.1005,
      "step": 12694
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.2569477558135986,
      "learning_rate": 0.0005955167239702776,
      "loss": 3.3988,
      "step": 12695
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.3994626998901367,
      "learning_rate": 0.000595516019401495,
      "loss": 3.3139,
      "step": 12696
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6504203081130981,
      "learning_rate": 0.0005955153147777702,
      "loss": 2.9232,
      "step": 12697
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9827696084976196,
      "learning_rate": 0.0005955146100991036,
      "loss": 3.2516,
      "step": 12698
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.3711206912994385,
      "learning_rate": 0.0005955139053654952,
      "loss": 3.2191,
      "step": 12699
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.370565176010132,
      "learning_rate": 0.0005955132005769453,
      "loss": 3.2887,
      "step": 12700
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.318591594696045,
      "learning_rate": 0.0005955124957334538,
      "loss": 3.1967,
      "step": 12701
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6911431550979614,
      "learning_rate": 0.000595511790835021,
      "loss": 3.1606,
      "step": 12702
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.636530637741089,
      "learning_rate": 0.000595511085881647,
      "loss": 3.0583,
      "step": 12703
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.315000057220459,
      "learning_rate": 0.000595510380873332,
      "loss": 2.8259,
      "step": 12704
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3369678258895874,
      "learning_rate": 0.0005955096758100759,
      "loss": 3.0635,
      "step": 12705
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.50345516204834,
      "learning_rate": 0.000595508970691879,
      "loss": 2.9585,
      "step": 12706
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6414940357208252,
      "learning_rate": 0.0005955082655187413,
      "loss": 3.2875,
      "step": 12707
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.741158127784729,
      "learning_rate": 0.0005955075602906633,
      "loss": 2.9374,
      "step": 12708
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6710373163223267,
      "learning_rate": 0.0005955068550076446,
      "loss": 3.2094,
      "step": 12709
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.2064738273620605,
      "learning_rate": 0.0005955061496696858,
      "loss": 2.8716,
      "step": 12710
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9108682870864868,
      "learning_rate": 0.0005955054442767867,
      "loss": 3.4341,
      "step": 12711
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.700051188468933,
      "learning_rate": 0.0005955047388289475,
      "loss": 2.9842,
      "step": 12712
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.368950366973877,
      "learning_rate": 0.0005955040333261686,
      "loss": 2.8941,
      "step": 12713
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5445125102996826,
      "learning_rate": 0.0005955033277684498,
      "loss": 3.1418,
      "step": 12714
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4652204513549805,
      "learning_rate": 0.0005955026221557913,
      "loss": 3.2011,
      "step": 12715
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2912107706069946,
      "learning_rate": 0.0005955019164881934,
      "loss": 3.2543,
      "step": 12716
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5585665702819824,
      "learning_rate": 0.000595501210765656,
      "loss": 3.0833,
      "step": 12717
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0311317443847656,
      "learning_rate": 0.0005955005049881795,
      "loss": 3.0028,
      "step": 12718
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7392923831939697,
      "learning_rate": 0.0005954997991557638,
      "loss": 3.4465,
      "step": 12719
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5273194313049316,
      "learning_rate": 0.0005954990932684091,
      "loss": 3.5407,
      "step": 12720
    },
    {
      "epoch": 0.17,
      "grad_norm": 4.010763168334961,
      "learning_rate": 0.0005954983873261156,
      "loss": 2.8611,
      "step": 12721
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.6539418697357178,
      "learning_rate": 0.0005954976813288832,
      "loss": 3.1048,
      "step": 12722
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.478570580482483,
      "learning_rate": 0.0005954969752767123,
      "loss": 3.008,
      "step": 12723
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.5304737091064453,
      "learning_rate": 0.000595496269169603,
      "loss": 3.2994,
      "step": 12724
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.5813357830047607,
      "learning_rate": 0.0005954955630075554,
      "loss": 3.1805,
      "step": 12725
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.449905276298523,
      "learning_rate": 0.0005954948567905694,
      "loss": 3.3307,
      "step": 12726
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.602217197418213,
      "learning_rate": 0.0005954941505186455,
      "loss": 3.1501,
      "step": 12727
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.5904150009155273,
      "learning_rate": 0.0005954934441917836,
      "loss": 2.8488,
      "step": 12728
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.94479238986969,
      "learning_rate": 0.0005954927378099839,
      "loss": 3.1814,
      "step": 12729
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0166385173797607,
      "learning_rate": 0.0005954920313732465,
      "loss": 3.0751,
      "step": 12730
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6078157424926758,
      "learning_rate": 0.0005954913248815716,
      "loss": 3.0525,
      "step": 12731
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.691686987876892,
      "learning_rate": 0.0005954906183349592,
      "loss": 3.1332,
      "step": 12732
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2786189317703247,
      "learning_rate": 0.0005954899117334096,
      "loss": 3.2486,
      "step": 12733
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8450185060501099,
      "learning_rate": 0.0005954892050769229,
      "loss": 3.2912,
      "step": 12734
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.47782564163208,
      "learning_rate": 0.0005954884983654991,
      "loss": 3.0768,
      "step": 12735
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7790228128433228,
      "learning_rate": 0.0005954877915991384,
      "loss": 3.3844,
      "step": 12736
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.998333692550659,
      "learning_rate": 0.0005954870847778409,
      "loss": 2.9426,
      "step": 12737
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.852530002593994,
      "learning_rate": 0.0005954863779016068,
      "loss": 3.2306,
      "step": 12738
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7864503860473633,
      "learning_rate": 0.0005954856709704362,
      "loss": 3.1391,
      "step": 12739
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5838754177093506,
      "learning_rate": 0.0005954849639843293,
      "loss": 3.36,
      "step": 12740
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.335437059402466,
      "learning_rate": 0.0005954842569432862,
      "loss": 3.15,
      "step": 12741
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.006441116333008,
      "learning_rate": 0.0005954835498473068,
      "loss": 3.2231,
      "step": 12742
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6880056858062744,
      "learning_rate": 0.0005954828426963917,
      "loss": 2.9536,
      "step": 12743
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7028366327285767,
      "learning_rate": 0.0005954821354905406,
      "loss": 3.2455,
      "step": 12744
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.83763587474823,
      "learning_rate": 0.0005954814282297538,
      "loss": 3.1899,
      "step": 12745
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.2477149963378906,
      "learning_rate": 0.0005954807209140315,
      "loss": 3.0962,
      "step": 12746
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4554084539413452,
      "learning_rate": 0.0005954800135433736,
      "loss": 3.0426,
      "step": 12747
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6378722190856934,
      "learning_rate": 0.0005954793061177805,
      "loss": 3.2001,
      "step": 12748
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.7561466693878174,
      "learning_rate": 0.0005954785986372522,
      "loss": 3.0575,
      "step": 12749
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.3897035121917725,
      "learning_rate": 0.0005954778911017889,
      "loss": 3.1447,
      "step": 12750
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3290950059890747,
      "learning_rate": 0.0005954771835113906,
      "loss": 3.1237,
      "step": 12751
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.585937023162842,
      "learning_rate": 0.0005954764758660576,
      "loss": 3.0128,
      "step": 12752
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.1633756160736084,
      "learning_rate": 0.0005954757681657898,
      "loss": 3.2891,
      "step": 12753
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.496658444404602,
      "learning_rate": 0.0005954750604105876,
      "loss": 3.4662,
      "step": 12754
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3951603174209595,
      "learning_rate": 0.0005954743526004509,
      "loss": 3.0992,
      "step": 12755
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.5099246501922607,
      "learning_rate": 0.0005954736447353801,
      "loss": 3.1508,
      "step": 12756
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7065677642822266,
      "learning_rate": 0.0005954729368153752,
      "loss": 3.2919,
      "step": 12757
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3902677297592163,
      "learning_rate": 0.0005954722288404362,
      "loss": 3.0117,
      "step": 12758
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6194312572479248,
      "learning_rate": 0.0005954715208105633,
      "loss": 3.2859,
      "step": 12759
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6162729263305664,
      "learning_rate": 0.0005954708127257567,
      "loss": 3.1347,
      "step": 12760
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2541019916534424,
      "learning_rate": 0.0005954701045860165,
      "loss": 3.215,
      "step": 12761
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3789961338043213,
      "learning_rate": 0.0005954693963913428,
      "loss": 3.2823,
      "step": 12762
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.431803584098816,
      "learning_rate": 0.0005954686881417358,
      "loss": 3.1608,
      "step": 12763
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5730890035629272,
      "learning_rate": 0.0005954679798371956,
      "loss": 2.9666,
      "step": 12764
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6139743328094482,
      "learning_rate": 0.0005954672714777223,
      "loss": 2.9602,
      "step": 12765
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3484529256820679,
      "learning_rate": 0.000595466563063316,
      "loss": 3.159,
      "step": 12766
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.569140076637268,
      "learning_rate": 0.000595465854593977,
      "loss": 2.8995,
      "step": 12767
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5613465309143066,
      "learning_rate": 0.0005954651460697053,
      "loss": 3.0343,
      "step": 12768
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3727138042449951,
      "learning_rate": 0.0005954644374905009,
      "loss": 3.3435,
      "step": 12769
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.460181713104248,
      "learning_rate": 0.0005954637288563642,
      "loss": 2.9453,
      "step": 12770
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5618433952331543,
      "learning_rate": 0.0005954630201672954,
      "loss": 3.1411,
      "step": 12771
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5230478048324585,
      "learning_rate": 0.0005954623114232942,
      "loss": 3.0827,
      "step": 12772
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2536982297897339,
      "learning_rate": 0.000595461602624361,
      "loss": 3.2946,
      "step": 12773
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.389448881149292,
      "learning_rate": 0.0005954608937704961,
      "loss": 3.141,
      "step": 12774
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5430890321731567,
      "learning_rate": 0.0005954601848616992,
      "loss": 3.069,
      "step": 12775
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8539884090423584,
      "learning_rate": 0.0005954594758979708,
      "loss": 3.2284,
      "step": 12776
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5702887773513794,
      "learning_rate": 0.0005954587668793109,
      "loss": 3.2404,
      "step": 12777
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6136826276779175,
      "learning_rate": 0.0005954580578057196,
      "loss": 3.1749,
      "step": 12778
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.9203684329986572,
      "learning_rate": 0.0005954573486771971,
      "loss": 3.0523,
      "step": 12779
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5238919258117676,
      "learning_rate": 0.0005954566394937435,
      "loss": 3.0705,
      "step": 12780
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.1704001426696777,
      "learning_rate": 0.0005954559302553589,
      "loss": 3.0159,
      "step": 12781
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5088765621185303,
      "learning_rate": 0.0005954552209620435,
      "loss": 3.0108,
      "step": 12782
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8176889419555664,
      "learning_rate": 0.0005954545116137975,
      "loss": 2.981,
      "step": 12783
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.761832356452942,
      "learning_rate": 0.0005954538022106208,
      "loss": 3.2427,
      "step": 12784
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4841874837875366,
      "learning_rate": 0.0005954530927525137,
      "loss": 2.9564,
      "step": 12785
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.548835039138794,
      "learning_rate": 0.0005954523832394762,
      "loss": 3.2223,
      "step": 12786
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3138275146484375,
      "learning_rate": 0.0005954516736715086,
      "loss": 2.9794,
      "step": 12787
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8540631532669067,
      "learning_rate": 0.000595450964048611,
      "loss": 3.1716,
      "step": 12788
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5229017734527588,
      "learning_rate": 0.0005954502543707834,
      "loss": 3.2935,
      "step": 12789
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3162178993225098,
      "learning_rate": 0.0005954495446380262,
      "loss": 3.1125,
      "step": 12790
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8452379703521729,
      "learning_rate": 0.0005954488348503391,
      "loss": 3.4786,
      "step": 12791
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.1927106380462646,
      "learning_rate": 0.0005954481250077226,
      "loss": 3.1403,
      "step": 12792
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2934128046035767,
      "learning_rate": 0.0005954474151101767,
      "loss": 3.2633,
      "step": 12793
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.158378839492798,
      "learning_rate": 0.0005954467051577017,
      "loss": 3.0983,
      "step": 12794
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.55204439163208,
      "learning_rate": 0.0005954459951502973,
      "loss": 3.101,
      "step": 12795
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.412431240081787,
      "learning_rate": 0.0005954452850879641,
      "loss": 3.0074,
      "step": 12796
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4141370058059692,
      "learning_rate": 0.0005954445749707021,
      "loss": 3.204,
      "step": 12797
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6749017238616943,
      "learning_rate": 0.0005954438647985112,
      "loss": 3.1144,
      "step": 12798
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4967048168182373,
      "learning_rate": 0.0005954431545713918,
      "loss": 2.802,
      "step": 12799
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.361897349357605,
      "learning_rate": 0.0005954424442893439,
      "loss": 3.2569,
      "step": 12800
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6268850564956665,
      "learning_rate": 0.0005954417339523678,
      "loss": 3.1694,
      "step": 12801
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.614144206047058,
      "learning_rate": 0.0005954410235604634,
      "loss": 3.1484,
      "step": 12802
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6762993335723877,
      "learning_rate": 0.000595440313113631,
      "loss": 3.1069,
      "step": 12803
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.715055227279663,
      "learning_rate": 0.0005954396026118705,
      "loss": 3.0007,
      "step": 12804
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3254581689834595,
      "learning_rate": 0.0005954388920551823,
      "loss": 3.1995,
      "step": 12805
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4999433755874634,
      "learning_rate": 0.0005954381814435664,
      "loss": 3.1298,
      "step": 12806
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2809470891952515,
      "learning_rate": 0.0005954374707770231,
      "loss": 3.0933,
      "step": 12807
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9304670095443726,
      "learning_rate": 0.0005954367600555523,
      "loss": 3.2017,
      "step": 12808
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7844979763031006,
      "learning_rate": 0.0005954360492791543,
      "loss": 3.2048,
      "step": 12809
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5564509630203247,
      "learning_rate": 0.0005954353384478291,
      "loss": 3.3684,
      "step": 12810
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9107868671417236,
      "learning_rate": 0.0005954346275615769,
      "loss": 3.3,
      "step": 12811
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8227959871292114,
      "learning_rate": 0.0005954339166203978,
      "loss": 3.1223,
      "step": 12812
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9568042755126953,
      "learning_rate": 0.000595433205624292,
      "loss": 3.2883,
      "step": 12813
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6161737442016602,
      "learning_rate": 0.0005954324945732595,
      "loss": 2.8208,
      "step": 12814
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9766067266464233,
      "learning_rate": 0.0005954317834673006,
      "loss": 3.0643,
      "step": 12815
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0512278079986572,
      "learning_rate": 0.0005954310723064154,
      "loss": 3.1826,
      "step": 12816
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.461966872215271,
      "learning_rate": 0.0005954303610906039,
      "loss": 3.2203,
      "step": 12817
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2567952871322632,
      "learning_rate": 0.0005954296498198664,
      "loss": 2.96,
      "step": 12818
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4414293766021729,
      "learning_rate": 0.0005954289384942029,
      "loss": 3.1518,
      "step": 12819
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.9364376068115234,
      "learning_rate": 0.0005954282271136136,
      "loss": 3.1223,
      "step": 12820
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.728746771812439,
      "learning_rate": 0.0005954275156780986,
      "loss": 3.1913,
      "step": 12821
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.77386474609375,
      "learning_rate": 0.000595426804187658,
      "loss": 3.085,
      "step": 12822
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5548601150512695,
      "learning_rate": 0.0005954260926422921,
      "loss": 2.9779,
      "step": 12823
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.995072841644287,
      "learning_rate": 0.0005954253810420008,
      "loss": 3.004,
      "step": 12824
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9377552270889282,
      "learning_rate": 0.0005954246693867844,
      "loss": 2.8758,
      "step": 12825
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7444802522659302,
      "learning_rate": 0.000595423957676643,
      "loss": 2.9426,
      "step": 12826
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6810606718063354,
      "learning_rate": 0.0005954232459115767,
      "loss": 3.2433,
      "step": 12827
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6764986515045166,
      "learning_rate": 0.0005954225340915856,
      "loss": 3.0342,
      "step": 12828
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2743481397628784,
      "learning_rate": 0.0005954218222166699,
      "loss": 3.0772,
      "step": 12829
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5729471445083618,
      "learning_rate": 0.0005954211102868297,
      "loss": 3.2921,
      "step": 12830
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7257517576217651,
      "learning_rate": 0.0005954203983020651,
      "loss": 3.1024,
      "step": 12831
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5789223909378052,
      "learning_rate": 0.0005954196862623763,
      "loss": 3.1139,
      "step": 12832
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8698452711105347,
      "learning_rate": 0.0005954189741677634,
      "loss": 3.0424,
      "step": 12833
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4790266752243042,
      "learning_rate": 0.0005954182620182266,
      "loss": 2.9,
      "step": 12834
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7378634214401245,
      "learning_rate": 0.0005954175498137659,
      "loss": 2.8835,
      "step": 12835
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.621701955795288,
      "learning_rate": 0.0005954168375543815,
      "loss": 3.133,
      "step": 12836
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.154860734939575,
      "learning_rate": 0.0005954161252400736,
      "loss": 3.1692,
      "step": 12837
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5799390077590942,
      "learning_rate": 0.0005954154128708422,
      "loss": 3.3968,
      "step": 12838
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4025150537490845,
      "learning_rate": 0.0005954147004466876,
      "loss": 3.1618,
      "step": 12839
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7368446588516235,
      "learning_rate": 0.0005954139879676098,
      "loss": 3.0725,
      "step": 12840
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.318946361541748,
      "learning_rate": 0.0005954132754336089,
      "loss": 3.3168,
      "step": 12841
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8861056566238403,
      "learning_rate": 0.000595412562844685,
      "loss": 3.2245,
      "step": 12842
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.2574849128723145,
      "learning_rate": 0.0005954118502008386,
      "loss": 3.3356,
      "step": 12843
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.9882054328918457,
      "learning_rate": 0.0005954111375020693,
      "loss": 3.2092,
      "step": 12844
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8290127515792847,
      "learning_rate": 0.0005954104247483775,
      "loss": 3.1312,
      "step": 12845
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7623661756515503,
      "learning_rate": 0.0005954097119397635,
      "loss": 3.147,
      "step": 12846
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.848958730697632,
      "learning_rate": 0.0005954089990762272,
      "loss": 2.9012,
      "step": 12847
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.486234188079834,
      "learning_rate": 0.0005954082861577688,
      "loss": 3.3088,
      "step": 12848
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2321425676345825,
      "learning_rate": 0.0005954075731843884,
      "loss": 3.104,
      "step": 12849
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4291287660598755,
      "learning_rate": 0.000595406860156086,
      "loss": 2.9415,
      "step": 12850
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.643768548965454,
      "learning_rate": 0.0005954061470728621,
      "loss": 3.4123,
      "step": 12851
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3481656312942505,
      "learning_rate": 0.0005954054339347166,
      "loss": 3.1921,
      "step": 12852
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5068384408950806,
      "learning_rate": 0.0005954047207416495,
      "loss": 3.3438,
      "step": 12853
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.486419916152954,
      "learning_rate": 0.0005954040074936612,
      "loss": 3.229,
      "step": 12854
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9493639469146729,
      "learning_rate": 0.0005954032941907518,
      "loss": 3.1925,
      "step": 12855
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.461034893989563,
      "learning_rate": 0.0005954025808329213,
      "loss": 3.0945,
      "step": 12856
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.826867699623108,
      "learning_rate": 0.0005954018674201698,
      "loss": 3.1927,
      "step": 12857
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4380550384521484,
      "learning_rate": 0.0005954011539524976,
      "loss": 3.1491,
      "step": 12858
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.084291696548462,
      "learning_rate": 0.0005954004404299047,
      "loss": 3.1223,
      "step": 12859
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.645494818687439,
      "learning_rate": 0.0005953997268523912,
      "loss": 3.0093,
      "step": 12860
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.548904299736023,
      "learning_rate": 0.0005953990132199575,
      "loss": 3.1806,
      "step": 12861
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.493502140045166,
      "learning_rate": 0.0005953982995326034,
      "loss": 2.9384,
      "step": 12862
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5716817378997803,
      "learning_rate": 0.0005953975857903293,
      "loss": 2.9656,
      "step": 12863
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6023763418197632,
      "learning_rate": 0.0005953968719931352,
      "loss": 3.2723,
      "step": 12864
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.3821685314178467,
      "learning_rate": 0.0005953961581410212,
      "loss": 3.268,
      "step": 12865
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.575088381767273,
      "learning_rate": 0.0005953954442339875,
      "loss": 2.9607,
      "step": 12866
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0131189823150635,
      "learning_rate": 0.0005953947302720341,
      "loss": 2.9497,
      "step": 12867
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6782386302947998,
      "learning_rate": 0.0005953940162551613,
      "loss": 3.3333,
      "step": 12868
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.469845175743103,
      "learning_rate": 0.0005953933021833693,
      "loss": 3.3351,
      "step": 12869
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4968760013580322,
      "learning_rate": 0.0005953925880566581,
      "loss": 3.3037,
      "step": 12870
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.1678261756896973,
      "learning_rate": 0.0005953918738750277,
      "loss": 3.1578,
      "step": 12871
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6218743324279785,
      "learning_rate": 0.0005953911596384785,
      "loss": 2.8121,
      "step": 12872
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9858894348144531,
      "learning_rate": 0.0005953904453470104,
      "loss": 3.3416,
      "step": 12873
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5441595315933228,
      "learning_rate": 0.0005953897310006236,
      "loss": 3.2651,
      "step": 12874
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4146943092346191,
      "learning_rate": 0.0005953890165993184,
      "loss": 3.1331,
      "step": 12875
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4533536434173584,
      "learning_rate": 0.0005953883021430947,
      "loss": 2.7639,
      "step": 12876
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.527376651763916,
      "learning_rate": 0.0005953875876319528,
      "loss": 3.0675,
      "step": 12877
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.3046107292175293,
      "learning_rate": 0.0005953868730658929,
      "loss": 3.1225,
      "step": 12878
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.257921576499939,
      "learning_rate": 0.0005953861584449148,
      "loss": 2.8963,
      "step": 12879
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6743650436401367,
      "learning_rate": 0.0005953854437690189,
      "loss": 3.1469,
      "step": 12880
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7837088108062744,
      "learning_rate": 0.0005953847290382053,
      "loss": 3.2155,
      "step": 12881
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7786692380905151,
      "learning_rate": 0.0005953840142524741,
      "loss": 3.1337,
      "step": 12882
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9075584411621094,
      "learning_rate": 0.0005953832994118254,
      "loss": 3.0938,
      "step": 12883
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.505906105041504,
      "learning_rate": 0.0005953825845162594,
      "loss": 3.1589,
      "step": 12884
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.341732382774353,
      "learning_rate": 0.0005953818695657762,
      "loss": 3.2233,
      "step": 12885
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7154064178466797,
      "learning_rate": 0.0005953811545603759,
      "loss": 3.1925,
      "step": 12886
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8688879013061523,
      "learning_rate": 0.0005953804395000587,
      "loss": 3.2036,
      "step": 12887
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6195045709609985,
      "learning_rate": 0.0005953797243848247,
      "loss": 3.065,
      "step": 12888
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9224268198013306,
      "learning_rate": 0.000595379009214674,
      "loss": 3.0655,
      "step": 12889
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0886833667755127,
      "learning_rate": 0.0005953782939896069,
      "loss": 3.1889,
      "step": 12890
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.879629373550415,
      "learning_rate": 0.0005953775787096232,
      "loss": 3.1501,
      "step": 12891
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3799840211868286,
      "learning_rate": 0.0005953768633747234,
      "loss": 3.1824,
      "step": 12892
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7172013521194458,
      "learning_rate": 0.0005953761479849073,
      "loss": 3.0938,
      "step": 12893
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7052571773529053,
      "learning_rate": 0.0005953754325401753,
      "loss": 3.1179,
      "step": 12894
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.614654302597046,
      "learning_rate": 0.0005953747170405274,
      "loss": 3.1459,
      "step": 12895
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7706414461135864,
      "learning_rate": 0.0005953740014859639,
      "loss": 3.1919,
      "step": 12896
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.063861608505249,
      "learning_rate": 0.0005953732858764847,
      "loss": 3.0906,
      "step": 12897
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.160088539123535,
      "learning_rate": 0.0005953725702120899,
      "loss": 2.9103,
      "step": 12898
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5718673467636108,
      "learning_rate": 0.0005953718544927798,
      "loss": 3.0326,
      "step": 12899
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.965855598449707,
      "learning_rate": 0.0005953711387185547,
      "loss": 2.9322,
      "step": 12900
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.119380474090576,
      "learning_rate": 0.0005953704228894144,
      "loss": 3.3031,
      "step": 12901
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7079041004180908,
      "learning_rate": 0.0005953697070053592,
      "loss": 3.2516,
      "step": 12902
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9160196781158447,
      "learning_rate": 0.0005953689910663891,
      "loss": 3.2651,
      "step": 12903
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.429032802581787,
      "learning_rate": 0.0005953682750725045,
      "loss": 3.1713,
      "step": 12904
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.474744439125061,
      "learning_rate": 0.0005953675590237051,
      "loss": 3.2782,
      "step": 12905
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.1149446964263916,
      "learning_rate": 0.0005953668429199915,
      "loss": 2.9039,
      "step": 12906
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.025623083114624,
      "learning_rate": 0.0005953661267613636,
      "loss": 3.209,
      "step": 12907
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.378564715385437,
      "learning_rate": 0.0005953654105478216,
      "loss": 3.1292,
      "step": 12908
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8908952474594116,
      "learning_rate": 0.0005953646942793655,
      "loss": 3.1807,
      "step": 12909
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.011032819747925,
      "learning_rate": 0.0005953639779559956,
      "loss": 3.2187,
      "step": 12910
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8307591676712036,
      "learning_rate": 0.0005953632615777119,
      "loss": 2.9953,
      "step": 12911
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2310373783111572,
      "learning_rate": 0.0005953625451445146,
      "loss": 3.1457,
      "step": 12912
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7470916509628296,
      "learning_rate": 0.0005953618286564038,
      "loss": 3.299,
      "step": 12913
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5315375328063965,
      "learning_rate": 0.0005953611121133798,
      "loss": 2.8779,
      "step": 12914
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.56754732131958,
      "learning_rate": 0.0005953603955154424,
      "loss": 3.3621,
      "step": 12915
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.3597640991210938,
      "learning_rate": 0.0005953596788625921,
      "loss": 3.2986,
      "step": 12916
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3964985609054565,
      "learning_rate": 0.0005953589621548288,
      "loss": 3.0253,
      "step": 12917
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.51763916015625,
      "learning_rate": 0.0005953582453921526,
      "loss": 3.2996,
      "step": 12918
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4053821563720703,
      "learning_rate": 0.0005953575285745639,
      "loss": 3.149,
      "step": 12919
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3714373111724854,
      "learning_rate": 0.0005953568117020626,
      "loss": 3.0183,
      "step": 12920
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.906749963760376,
      "learning_rate": 0.0005953560947746489,
      "loss": 3.1145,
      "step": 12921
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4370800256729126,
      "learning_rate": 0.0005953553777923228,
      "loss": 3.1071,
      "step": 12922
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6511380672454834,
      "learning_rate": 0.0005953546607550846,
      "loss": 2.9166,
      "step": 12923
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6514047384262085,
      "learning_rate": 0.0005953539436629344,
      "loss": 3.3917,
      "step": 12924
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3541311025619507,
      "learning_rate": 0.0005953532265158724,
      "loss": 3.29,
      "step": 12925
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7147876024246216,
      "learning_rate": 0.0005953525093138986,
      "loss": 3.4922,
      "step": 12926
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4916309118270874,
      "learning_rate": 0.0005953517920570133,
      "loss": 3.3139,
      "step": 12927
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8500990867614746,
      "learning_rate": 0.0005953510747452165,
      "loss": 3.2145,
      "step": 12928
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.221811056137085,
      "learning_rate": 0.0005953503573785082,
      "loss": 2.9659,
      "step": 12929
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7699917554855347,
      "learning_rate": 0.0005953496399568888,
      "loss": 3.0593,
      "step": 12930
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0568153858184814,
      "learning_rate": 0.0005953489224803584,
      "loss": 3.0735,
      "step": 12931
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8177211284637451,
      "learning_rate": 0.000595348204948917,
      "loss": 2.8632,
      "step": 12932
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.4304113388061523,
      "learning_rate": 0.0005953474873625648,
      "loss": 2.993,
      "step": 12933
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6362247467041016,
      "learning_rate": 0.0005953467697213018,
      "loss": 3.2812,
      "step": 12934
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.164818525314331,
      "learning_rate": 0.0005953460520251284,
      "loss": 3.1806,
      "step": 12935
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.269620656967163,
      "learning_rate": 0.0005953453342740446,
      "loss": 3.073,
      "step": 12936
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.3328025341033936,
      "learning_rate": 0.0005953446164680505,
      "loss": 3.2644,
      "step": 12937
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5330963134765625,
      "learning_rate": 0.0005953438986071463,
      "loss": 3.0101,
      "step": 12938
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.6787736415863037,
      "learning_rate": 0.0005953431806913319,
      "loss": 2.989,
      "step": 12939
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.19608211517334,
      "learning_rate": 0.0005953424627206079,
      "loss": 3.1516,
      "step": 12940
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4705363512039185,
      "learning_rate": 0.000595341744694974,
      "loss": 3.2675,
      "step": 12941
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5644749402999878,
      "learning_rate": 0.0005953410266144305,
      "loss": 3.3385,
      "step": 12942
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.1963582038879395,
      "learning_rate": 0.0005953403084789776,
      "loss": 3.3191,
      "step": 12943
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4062520265579224,
      "learning_rate": 0.0005953395902886153,
      "loss": 3.2421,
      "step": 12944
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4917192459106445,
      "learning_rate": 0.0005953388720433438,
      "loss": 3.2409,
      "step": 12945
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5433952808380127,
      "learning_rate": 0.0005953381537431632,
      "loss": 2.8817,
      "step": 12946
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5047221183776855,
      "learning_rate": 0.0005953374353880737,
      "loss": 3.0551,
      "step": 12947
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4901965856552124,
      "learning_rate": 0.0005953367169780755,
      "loss": 3.2864,
      "step": 12948
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6738770008087158,
      "learning_rate": 0.0005953359985131685,
      "loss": 3.0988,
      "step": 12949
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4955896139144897,
      "learning_rate": 0.000595335279993353,
      "loss": 3.1661,
      "step": 12950
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.4745874404907227,
      "learning_rate": 0.000595334561418629,
      "loss": 3.3025,
      "step": 12951
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7228995561599731,
      "learning_rate": 0.0005953338427889969,
      "loss": 3.4223,
      "step": 12952
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4370890855789185,
      "learning_rate": 0.0005953331241044566,
      "loss": 3.0729,
      "step": 12953
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5938960313796997,
      "learning_rate": 0.0005953324053650082,
      "loss": 3.3411,
      "step": 12954
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.771909713745117,
      "learning_rate": 0.000595331686570652,
      "loss": 3.1493,
      "step": 12955
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6708283424377441,
      "learning_rate": 0.0005953309677213882,
      "loss": 3.221,
      "step": 12956
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.222980499267578,
      "learning_rate": 0.0005953302488172166,
      "loss": 3.1856,
      "step": 12957
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7061421871185303,
      "learning_rate": 0.0005953295298581376,
      "loss": 3.1267,
      "step": 12958
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.712458372116089,
      "learning_rate": 0.0005953288108441513,
      "loss": 3.1122,
      "step": 12959
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4930188655853271,
      "learning_rate": 0.0005953280917752578,
      "loss": 3.2346,
      "step": 12960
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0784411430358887,
      "learning_rate": 0.0005953273726514572,
      "loss": 3.1581,
      "step": 12961
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9563682079315186,
      "learning_rate": 0.0005953266534727497,
      "loss": 2.9312,
      "step": 12962
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6038211584091187,
      "learning_rate": 0.0005953259342391353,
      "loss": 2.9038,
      "step": 12963
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.528615951538086,
      "learning_rate": 0.0005953252149506143,
      "loss": 3.0192,
      "step": 12964
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.3311710357666016,
      "learning_rate": 0.0005953244956071868,
      "loss": 3.236,
      "step": 12965
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6025457382202148,
      "learning_rate": 0.0005953237762088529,
      "loss": 3.2519,
      "step": 12966
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5976077318191528,
      "learning_rate": 0.0005953230567556127,
      "loss": 3.1977,
      "step": 12967
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9012373685836792,
      "learning_rate": 0.0005953223372474664,
      "loss": 3.1809,
      "step": 12968
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.756090521812439,
      "learning_rate": 0.000595321617684414,
      "loss": 3.0689,
      "step": 12969
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4271860122680664,
      "learning_rate": 0.0005953208980664559,
      "loss": 3.4006,
      "step": 12970
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6866339445114136,
      "learning_rate": 0.000595320178393592,
      "loss": 3.2271,
      "step": 12971
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.241757869720459,
      "learning_rate": 0.0005953194586658224,
      "loss": 2.8848,
      "step": 12972
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.882031798362732,
      "learning_rate": 0.0005953187388831475,
      "loss": 3.0429,
      "step": 12973
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9914337396621704,
      "learning_rate": 0.0005953180190455673,
      "loss": 2.9188,
      "step": 12974
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6682404279708862,
      "learning_rate": 0.0005953172991530818,
      "loss": 3.0862,
      "step": 12975
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.372866153717041,
      "learning_rate": 0.0005953165792056911,
      "loss": 3.2492,
      "step": 12976
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.414994239807129,
      "learning_rate": 0.0005953158592033956,
      "loss": 3.167,
      "step": 12977
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3384803533554077,
      "learning_rate": 0.0005953151391461954,
      "loss": 3.7078,
      "step": 12978
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9990018606185913,
      "learning_rate": 0.0005953144190340905,
      "loss": 3.3502,
      "step": 12979
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4724220037460327,
      "learning_rate": 0.000595313698867081,
      "loss": 2.9625,
      "step": 12980
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7813193798065186,
      "learning_rate": 0.0005953129786451672,
      "loss": 3.014,
      "step": 12981
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1806455850601196,
      "learning_rate": 0.0005953122583683491,
      "loss": 3.2718,
      "step": 12982
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4342344999313354,
      "learning_rate": 0.0005953115380366268,
      "loss": 3.1488,
      "step": 12983
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4919307231903076,
      "learning_rate": 0.0005953108176500006,
      "loss": 3.3376,
      "step": 12984
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4435436725616455,
      "learning_rate": 0.0005953100972084706,
      "loss": 3.1954,
      "step": 12985
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.562642216682434,
      "learning_rate": 0.0005953093767120368,
      "loss": 3.1518,
      "step": 12986
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4292978048324585,
      "learning_rate": 0.0005953086561606995,
      "loss": 3.1867,
      "step": 12987
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.1428658962249756,
      "learning_rate": 0.0005953079355544587,
      "loss": 3.2701,
      "step": 12988
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4921396970748901,
      "learning_rate": 0.0005953072148933144,
      "loss": 3.1317,
      "step": 12989
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.1631662845611572,
      "learning_rate": 0.0005953064941772671,
      "loss": 3.3561,
      "step": 12990
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9888718128204346,
      "learning_rate": 0.0005953057734063167,
      "loss": 3.1255,
      "step": 12991
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7310289144515991,
      "learning_rate": 0.0005953050525804634,
      "loss": 3.2157,
      "step": 12992
    },
    {
      "epoch": 0.17,
      "grad_norm": 4.374833583831787,
      "learning_rate": 0.0005953043316997074,
      "loss": 3.1508,
      "step": 12993
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.272311210632324,
      "learning_rate": 0.0005953036107640486,
      "loss": 3.2601,
      "step": 12994
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7219722270965576,
      "learning_rate": 0.0005953028897734873,
      "loss": 2.8987,
      "step": 12995
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6234428882598877,
      "learning_rate": 0.0005953021687280237,
      "loss": 3.051,
      "step": 12996
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.689513921737671,
      "learning_rate": 0.0005953014476276578,
      "loss": 3.2549,
      "step": 12997
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.4694602489471436,
      "learning_rate": 0.0005953007264723897,
      "loss": 3.1629,
      "step": 12998
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.418416976928711,
      "learning_rate": 0.0005953000052622199,
      "loss": 3.0166,
      "step": 12999
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7595710754394531,
      "learning_rate": 0.000595299283997148,
      "loss": 3.2948,
      "step": 13000
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.282764196395874,
      "learning_rate": 0.0005952985626771745,
      "loss": 2.724,
      "step": 13001
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9819514751434326,
      "learning_rate": 0.0005952978413022994,
      "loss": 3.0861,
      "step": 13002
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5634011030197144,
      "learning_rate": 0.0005952971198725228,
      "loss": 3.2009,
      "step": 13003
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.203089952468872,
      "learning_rate": 0.000595296398387845,
      "loss": 3.0693,
      "step": 13004
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.9610438346862793,
      "learning_rate": 0.0005952956768482659,
      "loss": 3.0092,
      "step": 13005
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4846924543380737,
      "learning_rate": 0.0005952949552537858,
      "loss": 3.0797,
      "step": 13006
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.4623923301696777,
      "learning_rate": 0.0005952942336044049,
      "loss": 3.3424,
      "step": 13007
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.5183186531066895,
      "learning_rate": 0.0005952935119001231,
      "loss": 3.0656,
      "step": 13008
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.419830083847046,
      "learning_rate": 0.0005952927901409407,
      "loss": 3.0697,
      "step": 13009
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5792555809020996,
      "learning_rate": 0.0005952920683268578,
      "loss": 3.3718,
      "step": 13010
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.1256942749023438,
      "learning_rate": 0.0005952913464578745,
      "loss": 3.0062,
      "step": 13011
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.052316188812256,
      "learning_rate": 0.0005952906245339911,
      "loss": 3.1649,
      "step": 13012
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8146271705627441,
      "learning_rate": 0.0005952899025552075,
      "loss": 2.9656,
      "step": 13013
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5828596353530884,
      "learning_rate": 0.0005952891805215239,
      "loss": 3.2433,
      "step": 13014
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4284669160842896,
      "learning_rate": 0.0005952884584329405,
      "loss": 2.9557,
      "step": 13015
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4579710960388184,
      "learning_rate": 0.0005952877362894574,
      "loss": 2.9779,
      "step": 13016
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.4281907081604004,
      "learning_rate": 0.0005952870140910747,
      "loss": 2.9526,
      "step": 13017
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4524924755096436,
      "learning_rate": 0.0005952862918377926,
      "loss": 2.9581,
      "step": 13018
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5494678020477295,
      "learning_rate": 0.0005952855695296113,
      "loss": 3.1976,
      "step": 13019
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.571044921875,
      "learning_rate": 0.0005952848471665307,
      "loss": 3.1841,
      "step": 13020
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.691182017326355,
      "learning_rate": 0.0005952841247485511,
      "loss": 3.0771,
      "step": 13021
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7488571405410767,
      "learning_rate": 0.0005952834022756726,
      "loss": 3.0938,
      "step": 13022
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9801040887832642,
      "learning_rate": 0.0005952826797478954,
      "loss": 3.067,
      "step": 13023
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6049079895019531,
      "learning_rate": 0.0005952819571652196,
      "loss": 3.1316,
      "step": 13024
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0745344161987305,
      "learning_rate": 0.0005952812345276453,
      "loss": 3.4866,
      "step": 13025
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.390192747116089,
      "learning_rate": 0.0005952805118351726,
      "loss": 3.1036,
      "step": 13026
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4501631259918213,
      "learning_rate": 0.0005952797890878016,
      "loss": 3.3102,
      "step": 13027
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7464706897735596,
      "learning_rate": 0.0005952790662855326,
      "loss": 2.7705,
      "step": 13028
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4433581829071045,
      "learning_rate": 0.0005952783434283657,
      "loss": 3.1542,
      "step": 13029
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3451347351074219,
      "learning_rate": 0.0005952776205163008,
      "loss": 3.1478,
      "step": 13030
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4819127321243286,
      "learning_rate": 0.0005952768975493384,
      "loss": 3.0405,
      "step": 13031
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5578843355178833,
      "learning_rate": 0.0005952761745274783,
      "loss": 3.1982,
      "step": 13032
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.222445487976074,
      "learning_rate": 0.000595275451450721,
      "loss": 3.1039,
      "step": 13033
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.639174222946167,
      "learning_rate": 0.0005952747283190663,
      "loss": 3.0772,
      "step": 13034
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7287414073944092,
      "learning_rate": 0.0005952740051325144,
      "loss": 3.4026,
      "step": 13035
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.422898530960083,
      "learning_rate": 0.0005952732818910656,
      "loss": 3.1527,
      "step": 13036
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7006713151931763,
      "learning_rate": 0.0005952725585947198,
      "loss": 2.9912,
      "step": 13037
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6494485139846802,
      "learning_rate": 0.0005952718352434773,
      "loss": 3.225,
      "step": 13038
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.835188627243042,
      "learning_rate": 0.0005952711118373382,
      "loss": 3.1674,
      "step": 13039
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4547439813613892,
      "learning_rate": 0.0005952703883763028,
      "loss": 3.0765,
      "step": 13040
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5365880727767944,
      "learning_rate": 0.0005952696648603708,
      "loss": 2.9372,
      "step": 13041
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4170384407043457,
      "learning_rate": 0.0005952689412895427,
      "loss": 3.1138,
      "step": 13042
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.461394190788269,
      "learning_rate": 0.0005952682176638185,
      "loss": 3.07,
      "step": 13043
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.425012469291687,
      "learning_rate": 0.0005952674939831984,
      "loss": 3.0155,
      "step": 13044
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8295000791549683,
      "learning_rate": 0.0005952667702476825,
      "loss": 3.0495,
      "step": 13045
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6468511819839478,
      "learning_rate": 0.000595266046457271,
      "loss": 3.1663,
      "step": 13046
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.063372850418091,
      "learning_rate": 0.0005952653226119638,
      "loss": 3.2995,
      "step": 13047
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4130710363388062,
      "learning_rate": 0.0005952645987117613,
      "loss": 3.0098,
      "step": 13048
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.471008539199829,
      "learning_rate": 0.0005952638747566635,
      "loss": 3.0978,
      "step": 13049
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6067873239517212,
      "learning_rate": 0.0005952631507466705,
      "loss": 3.0147,
      "step": 13050
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4623756408691406,
      "learning_rate": 0.0005952624266817827,
      "loss": 3.3468,
      "step": 13051
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.681957721710205,
      "learning_rate": 0.000595261702562,
      "loss": 3.128,
      "step": 13052
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6007583141326904,
      "learning_rate": 0.0005952609783873225,
      "loss": 2.9767,
      "step": 13053
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8849953413009644,
      "learning_rate": 0.0005952602541577504,
      "loss": 3.1848,
      "step": 13054
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.321574091911316,
      "learning_rate": 0.000595259529873284,
      "loss": 3.0131,
      "step": 13055
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.3149383068084717,
      "learning_rate": 0.0005952588055339231,
      "loss": 3.09,
      "step": 13056
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.659894585609436,
      "learning_rate": 0.0005952580811396681,
      "loss": 3.4886,
      "step": 13057
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4903452396392822,
      "learning_rate": 0.000595257356690519,
      "loss": 3.2322,
      "step": 13058
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.247616767883301,
      "learning_rate": 0.000595256632186476,
      "loss": 2.8073,
      "step": 13059
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8446184396743774,
      "learning_rate": 0.0005952559076275393,
      "loss": 3.2177,
      "step": 13060
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.291570782661438,
      "learning_rate": 0.0005952551830137088,
      "loss": 3.4517,
      "step": 13061
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4489866495132446,
      "learning_rate": 0.0005952544583449849,
      "loss": 3.0931,
      "step": 13062
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4027364253997803,
      "learning_rate": 0.0005952537336213677,
      "loss": 3.0004,
      "step": 13063
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.154080867767334,
      "learning_rate": 0.000595253008842857,
      "loss": 3.0583,
      "step": 13064
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8978322744369507,
      "learning_rate": 0.0005952522840094534,
      "loss": 3.1589,
      "step": 13065
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5595275163650513,
      "learning_rate": 0.0005952515591211568,
      "loss": 3.0932,
      "step": 13066
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.881589412689209,
      "learning_rate": 0.0005952508341779673,
      "loss": 3.1092,
      "step": 13067
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.449864387512207,
      "learning_rate": 0.0005952501091798852,
      "loss": 3.0826,
      "step": 13068
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7216740846633911,
      "learning_rate": 0.0005952493841269105,
      "loss": 3.1378,
      "step": 13069
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.3362154960632324,
      "learning_rate": 0.0005952486590190432,
      "loss": 3.1049,
      "step": 13070
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5873044729232788,
      "learning_rate": 0.0005952479338562838,
      "loss": 3.0882,
      "step": 13071
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8737679719924927,
      "learning_rate": 0.0005952472086386321,
      "loss": 3.4791,
      "step": 13072
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.193892478942871,
      "learning_rate": 0.0005952464833660885,
      "loss": 3.0124,
      "step": 13073
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.0609066486358643,
      "learning_rate": 0.0005952457580386529,
      "loss": 3.1094,
      "step": 13074
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4538748264312744,
      "learning_rate": 0.0005952450326563257,
      "loss": 3.2873,
      "step": 13075
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7291356325149536,
      "learning_rate": 0.0005952443072191067,
      "loss": 2.8996,
      "step": 13076
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0060980319976807,
      "learning_rate": 0.0005952435817269962,
      "loss": 3.1929,
      "step": 13077
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6492836475372314,
      "learning_rate": 0.0005952428561799945,
      "loss": 3.1533,
      "step": 13078
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5599298477172852,
      "learning_rate": 0.0005952421305781014,
      "loss": 3.078,
      "step": 13079
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7245665788650513,
      "learning_rate": 0.0005952414049213173,
      "loss": 3.1171,
      "step": 13080
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0917763710021973,
      "learning_rate": 0.0005952406792096422,
      "loss": 3.193,
      "step": 13081
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.334183931350708,
      "learning_rate": 0.0005952399534430765,
      "loss": 2.9006,
      "step": 13082
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.4139184951782227,
      "learning_rate": 0.0005952392276216199,
      "loss": 3.1881,
      "step": 13083
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8502380847930908,
      "learning_rate": 0.0005952385017452728,
      "loss": 3.2588,
      "step": 13084
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4292793273925781,
      "learning_rate": 0.0005952377758140353,
      "loss": 3.1624,
      "step": 13085
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6163499355316162,
      "learning_rate": 0.0005952370498279075,
      "loss": 3.0018,
      "step": 13086
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8547332286834717,
      "learning_rate": 0.0005952363237868896,
      "loss": 3.0949,
      "step": 13087
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5573068857192993,
      "learning_rate": 0.0005952355976909817,
      "loss": 3.1334,
      "step": 13088
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6905895471572876,
      "learning_rate": 0.0005952348715401839,
      "loss": 3.0469,
      "step": 13089
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5732109546661377,
      "learning_rate": 0.0005952341453344964,
      "loss": 2.9397,
      "step": 13090
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1891471147537231,
      "learning_rate": 0.0005952334190739193,
      "loss": 3.1771,
      "step": 13091
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.482404351234436,
      "learning_rate": 0.0005952326927584527,
      "loss": 3.1245,
      "step": 13092
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7150133848190308,
      "learning_rate": 0.0005952319663880967,
      "loss": 3.0594,
      "step": 13093
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.472411870956421,
      "learning_rate": 0.0005952312399628516,
      "loss": 3.1482,
      "step": 13094
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4629192352294922,
      "learning_rate": 0.0005952305134827174,
      "loss": 3.0009,
      "step": 13095
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.657314658164978,
      "learning_rate": 0.0005952297869476944,
      "loss": 2.9749,
      "step": 13096
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5237984657287598,
      "learning_rate": 0.0005952290603577825,
      "loss": 3.061,
      "step": 13097
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.161649703979492,
      "learning_rate": 0.0005952283337129819,
      "loss": 3.1075,
      "step": 13098
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4162681102752686,
      "learning_rate": 0.0005952276070132929,
      "loss": 3.0324,
      "step": 13099
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3270384073257446,
      "learning_rate": 0.0005952268802587153,
      "loss": 2.8916,
      "step": 13100
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8716381788253784,
      "learning_rate": 0.0005952261534492496,
      "loss": 3.1011,
      "step": 13101
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3299179077148438,
      "learning_rate": 0.0005952254265848959,
      "loss": 3.4927,
      "step": 13102
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6658649444580078,
      "learning_rate": 0.0005952246996656541,
      "loss": 3.4196,
      "step": 13103
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9612243175506592,
      "learning_rate": 0.0005952239726915245,
      "loss": 3.1463,
      "step": 13104
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.052016496658325,
      "learning_rate": 0.0005952232456625071,
      "loss": 3.3008,
      "step": 13105
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3209813833236694,
      "learning_rate": 0.0005952225185786021,
      "loss": 3.0691,
      "step": 13106
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.332491397857666,
      "learning_rate": 0.0005952217914398098,
      "loss": 3.0016,
      "step": 13107
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0200347900390625,
      "learning_rate": 0.0005952210642461302,
      "loss": 3.1268,
      "step": 13108
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8027695417404175,
      "learning_rate": 0.0005952203369975634,
      "loss": 3.3988,
      "step": 13109
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0862245559692383,
      "learning_rate": 0.0005952196096941095,
      "loss": 3.1526,
      "step": 13110
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.246112823486328,
      "learning_rate": 0.0005952188823357687,
      "loss": 2.9128,
      "step": 13111
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4602171182632446,
      "learning_rate": 0.0005952181549225412,
      "loss": 3.2715,
      "step": 13112
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6334649324417114,
      "learning_rate": 0.000595217427454427,
      "loss": 3.0963,
      "step": 13113
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8066827058792114,
      "learning_rate": 0.0005952166999314264,
      "loss": 3.1859,
      "step": 13114
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.513688087463379,
      "learning_rate": 0.0005952159723535393,
      "loss": 3.2955,
      "step": 13115
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.025477886199951,
      "learning_rate": 0.0005952152447207661,
      "loss": 2.8167,
      "step": 13116
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7090522050857544,
      "learning_rate": 0.0005952145170331068,
      "loss": 3.0451,
      "step": 13117
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4321199655532837,
      "learning_rate": 0.0005952137892905615,
      "loss": 3.0098,
      "step": 13118
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.597611427307129,
      "learning_rate": 0.0005952130614931305,
      "loss": 3.1198,
      "step": 13119
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9171682596206665,
      "learning_rate": 0.0005952123336408137,
      "loss": 2.9009,
      "step": 13120
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4591418504714966,
      "learning_rate": 0.0005952116057336114,
      "loss": 3.2792,
      "step": 13121
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.52202570438385,
      "learning_rate": 0.0005952108777715236,
      "loss": 3.2001,
      "step": 13122
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8543384075164795,
      "learning_rate": 0.0005952101497545507,
      "loss": 3.1638,
      "step": 13123
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7366797924041748,
      "learning_rate": 0.0005952094216826925,
      "loss": 3.0466,
      "step": 13124
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5277799367904663,
      "learning_rate": 0.0005952086935559494,
      "loss": 3.0421,
      "step": 13125
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.7950809001922607,
      "learning_rate": 0.0005952079653743214,
      "loss": 2.8179,
      "step": 13126
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0043163299560547,
      "learning_rate": 0.0005952072371378087,
      "loss": 3.0763,
      "step": 13127
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.302022933959961,
      "learning_rate": 0.0005952065088464113,
      "loss": 3.2254,
      "step": 13128
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9515163898468018,
      "learning_rate": 0.0005952057805001295,
      "loss": 3.4076,
      "step": 13129
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.9485461711883545,
      "learning_rate": 0.0005952050520989634,
      "loss": 2.9501,
      "step": 13130
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.221858263015747,
      "learning_rate": 0.0005952043236429131,
      "loss": 3.2516,
      "step": 13131
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5720818042755127,
      "learning_rate": 0.0005952035951319787,
      "loss": 3.1372,
      "step": 13132
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.743180751800537,
      "learning_rate": 0.0005952028665661604,
      "loss": 3.1786,
      "step": 13133
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.124854326248169,
      "learning_rate": 0.0005952021379454583,
      "loss": 2.9119,
      "step": 13134
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3864599466323853,
      "learning_rate": 0.0005952014092698726,
      "loss": 3.1524,
      "step": 13135
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.242089033126831,
      "learning_rate": 0.0005952006805394033,
      "loss": 3.1478,
      "step": 13136
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.260313034057617,
      "learning_rate": 0.0005951999517540507,
      "loss": 3.1823,
      "step": 13137
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9609146118164062,
      "learning_rate": 0.0005951992229138148,
      "loss": 3.2192,
      "step": 13138
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7650423049926758,
      "learning_rate": 0.0005951984940186959,
      "loss": 3.1605,
      "step": 13139
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.528265118598938,
      "learning_rate": 0.0005951977650686939,
      "loss": 3.2031,
      "step": 13140
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8188235759735107,
      "learning_rate": 0.0005951970360638092,
      "loss": 3.1046,
      "step": 13141
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.477772831916809,
      "learning_rate": 0.0005951963070040417,
      "loss": 3.2521,
      "step": 13142
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9967955350875854,
      "learning_rate": 0.0005951955778893916,
      "loss": 3.0077,
      "step": 13143
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.489219307899475,
      "learning_rate": 0.0005951948487198591,
      "loss": 3.2351,
      "step": 13144
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6412118673324585,
      "learning_rate": 0.0005951941194954444,
      "loss": 3.267,
      "step": 13145
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.961707353591919,
      "learning_rate": 0.0005951933902161474,
      "loss": 3.3536,
      "step": 13146
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8991502523422241,
      "learning_rate": 0.0005951926608819685,
      "loss": 3.0773,
      "step": 13147
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.120105028152466,
      "learning_rate": 0.0005951919314929077,
      "loss": 3.0179,
      "step": 13148
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.832014560699463,
      "learning_rate": 0.000595191202048965,
      "loss": 3.3473,
      "step": 13149
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4232808351516724,
      "learning_rate": 0.0005951904725501408,
      "loss": 2.977,
      "step": 13150
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7580403089523315,
      "learning_rate": 0.0005951897429964351,
      "loss": 2.912,
      "step": 13151
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.53993558883667,
      "learning_rate": 0.000595189013387848,
      "loss": 3.3448,
      "step": 13152
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7873449325561523,
      "learning_rate": 0.0005951882837243799,
      "loss": 3.2086,
      "step": 13153
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.2504959106445312,
      "learning_rate": 0.0005951875540060305,
      "loss": 3.14,
      "step": 13154
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3880152702331543,
      "learning_rate": 0.0005951868242328002,
      "loss": 3.1187,
      "step": 13155
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.6316041946411133,
      "learning_rate": 0.0005951860944046892,
      "loss": 3.1533,
      "step": 13156
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5328254699707031,
      "learning_rate": 0.0005951853645216975,
      "loss": 2.8091,
      "step": 13157
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.3154218196868896,
      "learning_rate": 0.0005951846345838252,
      "loss": 3.0307,
      "step": 13158
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.2006192207336426,
      "learning_rate": 0.0005951839045910725,
      "loss": 3.0566,
      "step": 13159
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4980926513671875,
      "learning_rate": 0.0005951831745434396,
      "loss": 2.8117,
      "step": 13160
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3712756633758545,
      "learning_rate": 0.0005951824444409265,
      "loss": 3.2363,
      "step": 13161
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2692499160766602,
      "learning_rate": 0.0005951817142835335,
      "loss": 3.0852,
      "step": 13162
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5956404209136963,
      "learning_rate": 0.0005951809840712607,
      "loss": 3.2307,
      "step": 13163
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.284471869468689,
      "learning_rate": 0.0005951802538041081,
      "loss": 3.0902,
      "step": 13164
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.212341070175171,
      "learning_rate": 0.0005951795234820759,
      "loss": 2.9903,
      "step": 13165
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.276255488395691,
      "learning_rate": 0.0005951787931051643,
      "loss": 3.2296,
      "step": 13166
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3656387329101562,
      "learning_rate": 0.0005951780626733732,
      "loss": 3.0714,
      "step": 13167
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5283520221710205,
      "learning_rate": 0.0005951773321867032,
      "loss": 3.1974,
      "step": 13168
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4764615297317505,
      "learning_rate": 0.000595176601645154,
      "loss": 3.0578,
      "step": 13169
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6259807348251343,
      "learning_rate": 0.000595175871048726,
      "loss": 2.9475,
      "step": 13170
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.1881344318389893,
      "learning_rate": 0.0005951751403974193,
      "loss": 2.8892,
      "step": 13171
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4360771179199219,
      "learning_rate": 0.0005951744096912337,
      "loss": 3.0856,
      "step": 13172
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.850796103477478,
      "learning_rate": 0.0005951736789301697,
      "loss": 2.9527,
      "step": 13173
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7688267230987549,
      "learning_rate": 0.0005951729481142275,
      "loss": 3.2066,
      "step": 13174
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.1051723957061768,
      "learning_rate": 0.000595172217243407,
      "loss": 2.923,
      "step": 13175
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.612197756767273,
      "learning_rate": 0.0005951714863177084,
      "loss": 3.1291,
      "step": 13176
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.554460883140564,
      "learning_rate": 0.0005951707553371318,
      "loss": 2.9288,
      "step": 13177
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6419601440429688,
      "learning_rate": 0.0005951700243016773,
      "loss": 3.3307,
      "step": 13178
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0407485961914062,
      "learning_rate": 0.0005951692932113454,
      "loss": 2.888,
      "step": 13179
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.862282633781433,
      "learning_rate": 0.0005951685620661358,
      "loss": 2.9773,
      "step": 13180
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5239845514297485,
      "learning_rate": 0.0005951678308660488,
      "loss": 3.275,
      "step": 13181
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2902858257293701,
      "learning_rate": 0.0005951670996110844,
      "loss": 3.2374,
      "step": 13182
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3934402465820312,
      "learning_rate": 0.0005951663683012431,
      "loss": 3.0895,
      "step": 13183
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2179242372512817,
      "learning_rate": 0.0005951656369365246,
      "loss": 3.3094,
      "step": 13184
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.539189100265503,
      "learning_rate": 0.0005951649055169294,
      "loss": 3.3524,
      "step": 13185
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5814799070358276,
      "learning_rate": 0.0005951641740424573,
      "loss": 3.155,
      "step": 13186
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7191511392593384,
      "learning_rate": 0.0005951634425131087,
      "loss": 3.5042,
      "step": 13187
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.592198371887207,
      "learning_rate": 0.0005951627109288837,
      "loss": 2.9973,
      "step": 13188
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5409438610076904,
      "learning_rate": 0.0005951619792897823,
      "loss": 3.3582,
      "step": 13189
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4381163120269775,
      "learning_rate": 0.0005951612475958047,
      "loss": 3.1779,
      "step": 13190
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6707431077957153,
      "learning_rate": 0.0005951605158469512,
      "loss": 3.1946,
      "step": 13191
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0587427616119385,
      "learning_rate": 0.0005951597840432217,
      "loss": 3.2652,
      "step": 13192
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.647987961769104,
      "learning_rate": 0.0005951590521846164,
      "loss": 2.9799,
      "step": 13193
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5632479190826416,
      "learning_rate": 0.0005951583202711354,
      "loss": 3.1089,
      "step": 13194
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8066447973251343,
      "learning_rate": 0.0005951575883027789,
      "loss": 3.1149,
      "step": 13195
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.909845232963562,
      "learning_rate": 0.0005951568562795472,
      "loss": 3.1619,
      "step": 13196
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5011497735977173,
      "learning_rate": 0.0005951561242014402,
      "loss": 3.0253,
      "step": 13197
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3145071268081665,
      "learning_rate": 0.000595155392068458,
      "loss": 3.3678,
      "step": 13198
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.365416407585144,
      "learning_rate": 0.000595154659880601,
      "loss": 3.11,
      "step": 13199
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7811717987060547,
      "learning_rate": 0.000595153927637869,
      "loss": 3.4008,
      "step": 13200
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.685939908027649,
      "learning_rate": 0.0005951531953402624,
      "loss": 3.1973,
      "step": 13201
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.860795497894287,
      "learning_rate": 0.0005951524629877813,
      "loss": 3.1364,
      "step": 13202
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5522888898849487,
      "learning_rate": 0.0005951517305804257,
      "loss": 3.3305,
      "step": 13203
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9118821620941162,
      "learning_rate": 0.0005951509981181959,
      "loss": 3.3235,
      "step": 13204
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6411221027374268,
      "learning_rate": 0.0005951502656010919,
      "loss": 3.1407,
      "step": 13205
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6638473272323608,
      "learning_rate": 0.0005951495330291139,
      "loss": 3.0827,
      "step": 13206
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8970855474472046,
      "learning_rate": 0.000595148800402262,
      "loss": 3.2269,
      "step": 13207
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7066456079483032,
      "learning_rate": 0.0005951480677205365,
      "loss": 3.3934,
      "step": 13208
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6701313257217407,
      "learning_rate": 0.0005951473349839374,
      "loss": 3.071,
      "step": 13209
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.037557601928711,
      "learning_rate": 0.0005951466021924647,
      "loss": 3.2397,
      "step": 13210
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6619807481765747,
      "learning_rate": 0.0005951458693461187,
      "loss": 2.8878,
      "step": 13211
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9008280038833618,
      "learning_rate": 0.0005951451364448995,
      "loss": 3.2742,
      "step": 13212
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.642180323600769,
      "learning_rate": 0.0005951444034888074,
      "loss": 3.1535,
      "step": 13213
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.850662350654602,
      "learning_rate": 0.0005951436704778422,
      "loss": 3.0618,
      "step": 13214
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0447239875793457,
      "learning_rate": 0.0005951429374120043,
      "loss": 3.3568,
      "step": 13215
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8087776899337769,
      "learning_rate": 0.0005951422042912937,
      "loss": 2.9257,
      "step": 13216
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9726430177688599,
      "learning_rate": 0.0005951414711157107,
      "loss": 2.9313,
      "step": 13217
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3851203918457031,
      "learning_rate": 0.0005951407378852553,
      "loss": 3.2647,
      "step": 13218
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5991584062576294,
      "learning_rate": 0.0005951400045999276,
      "loss": 2.9262,
      "step": 13219
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5565708875656128,
      "learning_rate": 0.0005951392712597279,
      "loss": 2.9301,
      "step": 13220
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5575820207595825,
      "learning_rate": 0.0005951385378646561,
      "loss": 3.1451,
      "step": 13221
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7633006572723389,
      "learning_rate": 0.0005951378044147126,
      "loss": 3.0003,
      "step": 13222
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.1579577922821045,
      "learning_rate": 0.0005951370709098973,
      "loss": 2.9803,
      "step": 13223
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.1724019050598145,
      "learning_rate": 0.0005951363373502106,
      "loss": 3.0279,
      "step": 13224
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.143697500228882,
      "learning_rate": 0.0005951356037356523,
      "loss": 3.4168,
      "step": 13225
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5498024225234985,
      "learning_rate": 0.0005951348700662228,
      "loss": 3.2039,
      "step": 13226
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.350363254547119,
      "learning_rate": 0.0005951341363419222,
      "loss": 3.3154,
      "step": 13227
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3676267862319946,
      "learning_rate": 0.0005951334025627506,
      "loss": 3.2813,
      "step": 13228
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.339402198791504,
      "learning_rate": 0.000595132668728708,
      "loss": 3.0356,
      "step": 13229
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.27737295627594,
      "learning_rate": 0.0005951319348397948,
      "loss": 3.2608,
      "step": 13230
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5595507621765137,
      "learning_rate": 0.000595131200896011,
      "loss": 3.2439,
      "step": 13231
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6057416200637817,
      "learning_rate": 0.0005951304668973566,
      "loss": 3.2732,
      "step": 13232
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2948720455169678,
      "learning_rate": 0.0005951297328438321,
      "loss": 3.0551,
      "step": 13233
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.420982837677002,
      "learning_rate": 0.0005951289987354373,
      "loss": 3.1142,
      "step": 13234
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.859066963195801,
      "learning_rate": 0.0005951282645721724,
      "loss": 3.2663,
      "step": 13235
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4674195051193237,
      "learning_rate": 0.0005951275303540376,
      "loss": 3.1916,
      "step": 13236
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.653523325920105,
      "learning_rate": 0.0005951267960810331,
      "loss": 3.0427,
      "step": 13237
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.515000343322754,
      "learning_rate": 0.0005951260617531589,
      "loss": 2.9468,
      "step": 13238
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5264184474945068,
      "learning_rate": 0.0005951253273704151,
      "loss": 3.1414,
      "step": 13239
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.9043405055999756,
      "learning_rate": 0.0005951245929328021,
      "loss": 2.8269,
      "step": 13240
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.621594786643982,
      "learning_rate": 0.0005951238584403197,
      "loss": 3.3621,
      "step": 13241
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.1051571369171143,
      "learning_rate": 0.0005951231238929684,
      "loss": 3.2899,
      "step": 13242
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5182684659957886,
      "learning_rate": 0.000595122389290748,
      "loss": 3.4222,
      "step": 13243
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4425030946731567,
      "learning_rate": 0.0005951216546336588,
      "loss": 3.2006,
      "step": 13244
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5681229829788208,
      "learning_rate": 0.0005951209199217009,
      "loss": 3.1999,
      "step": 13245
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5410494804382324,
      "learning_rate": 0.0005951201851548745,
      "loss": 3.1997,
      "step": 13246
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3498296737670898,
      "learning_rate": 0.0005951194503331797,
      "loss": 3.1148,
      "step": 13247
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3570626974105835,
      "learning_rate": 0.0005951187154566165,
      "loss": 3.1418,
      "step": 13248
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2882051467895508,
      "learning_rate": 0.0005951179805251853,
      "loss": 3.1038,
      "step": 13249
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.516907811164856,
      "learning_rate": 0.0005951172455388861,
      "loss": 3.1624,
      "step": 13250
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5298874378204346,
      "learning_rate": 0.000595116510497719,
      "loss": 3.1861,
      "step": 13251
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5808627605438232,
      "learning_rate": 0.0005951157754016841,
      "loss": 3.1334,
      "step": 13252
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9503462314605713,
      "learning_rate": 0.0005951150402507817,
      "loss": 3.1154,
      "step": 13253
    },
    {
      "epoch": 0.17,
      "grad_norm": 4.011524677276611,
      "learning_rate": 0.0005951143050450117,
      "loss": 2.912,
      "step": 13254
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.025254249572754,
      "learning_rate": 0.0005951135697843745,
      "loss": 3.1554,
      "step": 13255
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.914830207824707,
      "learning_rate": 0.00059511283446887,
      "loss": 3.2143,
      "step": 13256
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.158989667892456,
      "learning_rate": 0.0005951120990984987,
      "loss": 3.2736,
      "step": 13257
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.7602593898773193,
      "learning_rate": 0.0005951113636732603,
      "loss": 3.1258,
      "step": 13258
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8345431089401245,
      "learning_rate": 0.0005951106281931551,
      "loss": 3.4406,
      "step": 13259
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.2766895294189453,
      "learning_rate": 0.0005951098926581834,
      "loss": 2.9822,
      "step": 13260
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6068570613861084,
      "learning_rate": 0.0005951091570683451,
      "loss": 3.4006,
      "step": 13261
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.443406105041504,
      "learning_rate": 0.0005951084214236405,
      "loss": 3.2308,
      "step": 13262
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.72896409034729,
      "learning_rate": 0.0005951076857240696,
      "loss": 3.091,
      "step": 13263
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7899194955825806,
      "learning_rate": 0.0005951069499696327,
      "loss": 3.1779,
      "step": 13264
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.4346773624420166,
      "learning_rate": 0.0005951062141603297,
      "loss": 3.1971,
      "step": 13265
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.274746894836426,
      "learning_rate": 0.0005951054782961611,
      "loss": 3.0768,
      "step": 13266
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3512258529663086,
      "learning_rate": 0.0005951047423771266,
      "loss": 3.2578,
      "step": 13267
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0020172595977783,
      "learning_rate": 0.0005951040064032267,
      "loss": 2.9885,
      "step": 13268
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.6170735359191895,
      "learning_rate": 0.0005951032703744614,
      "loss": 2.8757,
      "step": 13269
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6749573945999146,
      "learning_rate": 0.0005951025342908308,
      "loss": 3.2971,
      "step": 13270
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2369301319122314,
      "learning_rate": 0.000595101798152335,
      "loss": 3.0476,
      "step": 13271
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9694372415542603,
      "learning_rate": 0.0005951010619589742,
      "loss": 3.3014,
      "step": 13272
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.8855972290039062,
      "learning_rate": 0.0005951003257107486,
      "loss": 3.0875,
      "step": 13273
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9100024700164795,
      "learning_rate": 0.0005950995894076584,
      "loss": 3.174,
      "step": 13274
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5816736221313477,
      "learning_rate": 0.0005950988530497034,
      "loss": 3.2283,
      "step": 13275
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8614838123321533,
      "learning_rate": 0.0005950981166368841,
      "loss": 3.3067,
      "step": 13276
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.490370750427246,
      "learning_rate": 0.0005950973801692004,
      "loss": 3.0782,
      "step": 13277
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1718289852142334,
      "learning_rate": 0.0005950966436466526,
      "loss": 3.0733,
      "step": 13278
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3182672262191772,
      "learning_rate": 0.0005950959070692407,
      "loss": 2.9236,
      "step": 13279
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.73941969871521,
      "learning_rate": 0.0005950951704369649,
      "loss": 3.0452,
      "step": 13280
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3631590604782104,
      "learning_rate": 0.0005950944337498254,
      "loss": 3.0573,
      "step": 13281
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.616492509841919,
      "learning_rate": 0.0005950936970078222,
      "loss": 3.2876,
      "step": 13282
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.1626336574554443,
      "learning_rate": 0.0005950929602109555,
      "loss": 3.1097,
      "step": 13283
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.2632720470428467,
      "learning_rate": 0.0005950922233592255,
      "loss": 3.1275,
      "step": 13284
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9751908779144287,
      "learning_rate": 0.0005950914864526324,
      "loss": 3.0144,
      "step": 13285
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3645137548446655,
      "learning_rate": 0.0005950907494911761,
      "loss": 3.156,
      "step": 13286
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0070700645446777,
      "learning_rate": 0.0005950900124748568,
      "loss": 3.0093,
      "step": 13287
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7207218408584595,
      "learning_rate": 0.0005950892754036748,
      "loss": 3.2609,
      "step": 13288
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.348385214805603,
      "learning_rate": 0.0005950885382776301,
      "loss": 3.2528,
      "step": 13289
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6252652406692505,
      "learning_rate": 0.0005950878010967229,
      "loss": 3.0658,
      "step": 13290
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.229106903076172,
      "learning_rate": 0.0005950870638609533,
      "loss": 3.2358,
      "step": 13291
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4002586603164673,
      "learning_rate": 0.0005950863265703213,
      "loss": 2.9493,
      "step": 13292
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0403084754943848,
      "learning_rate": 0.0005950855892248274,
      "loss": 3.0811,
      "step": 13293
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6861002445220947,
      "learning_rate": 0.0005950848518244714,
      "loss": 3.084,
      "step": 13294
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0635929107666016,
      "learning_rate": 0.0005950841143692536,
      "loss": 2.9424,
      "step": 13295
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.467044472694397,
      "learning_rate": 0.000595083376859174,
      "loss": 2.9262,
      "step": 13296
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8945820331573486,
      "learning_rate": 0.000595082639294233,
      "loss": 3.3971,
      "step": 13297
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.482628583908081,
      "learning_rate": 0.0005950819016744304,
      "loss": 3.0557,
      "step": 13298
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2667385339736938,
      "learning_rate": 0.0005950811639997666,
      "loss": 3.3917,
      "step": 13299
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5884156227111816,
      "learning_rate": 0.0005950804262702415,
      "loss": 3.1377,
      "step": 13300
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.6671698093414307,
      "learning_rate": 0.0005950796884858557,
      "loss": 3.3892,
      "step": 13301
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6057841777801514,
      "learning_rate": 0.0005950789506466087,
      "loss": 3.1868,
      "step": 13302
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2300834655761719,
      "learning_rate": 0.000595078212752501,
      "loss": 3.1699,
      "step": 13303
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.023932695388794,
      "learning_rate": 0.0005950774748035328,
      "loss": 3.1914,
      "step": 13304
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7975118160247803,
      "learning_rate": 0.0005950767367997041,
      "loss": 3.2518,
      "step": 13305
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6096221208572388,
      "learning_rate": 0.000595075998741015,
      "loss": 2.8897,
      "step": 13306
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5826513767242432,
      "learning_rate": 0.0005950752606274658,
      "loss": 3.3139,
      "step": 13307
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.040508985519409,
      "learning_rate": 0.0005950745224590565,
      "loss": 3.0416,
      "step": 13308
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3908131122589111,
      "learning_rate": 0.0005950737842357872,
      "loss": 2.954,
      "step": 13309
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2953085899353027,
      "learning_rate": 0.0005950730459576581,
      "loss": 3.265,
      "step": 13310
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4533016681671143,
      "learning_rate": 0.0005950723076246695,
      "loss": 2.8408,
      "step": 13311
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.0808168649673462,
      "learning_rate": 0.0005950715692368213,
      "loss": 3.2169,
      "step": 13312
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0321121215820312,
      "learning_rate": 0.0005950708307941138,
      "loss": 3.1467,
      "step": 13313
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2229195833206177,
      "learning_rate": 0.000595070092296547,
      "loss": 3.2776,
      "step": 13314
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.42863130569458,
      "learning_rate": 0.0005950693537441211,
      "loss": 2.9283,
      "step": 13315
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0133373737335205,
      "learning_rate": 0.0005950686151368362,
      "loss": 3.2721,
      "step": 13316
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4207133054733276,
      "learning_rate": 0.0005950678764746925,
      "loss": 3.0708,
      "step": 13317
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9251059293746948,
      "learning_rate": 0.0005950671377576901,
      "loss": 3.1144,
      "step": 13318
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9190293550491333,
      "learning_rate": 0.0005950663989858292,
      "loss": 2.8582,
      "step": 13319
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.706693410873413,
      "learning_rate": 0.0005950656601591099,
      "loss": 3.3795,
      "step": 13320
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.1937990188598633,
      "learning_rate": 0.0005950649212775322,
      "loss": 3.0189,
      "step": 13321
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3518528938293457,
      "learning_rate": 0.0005950641823410965,
      "loss": 3.0554,
      "step": 13322
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3920857906341553,
      "learning_rate": 0.0005950634433498027,
      "loss": 2.8505,
      "step": 13323
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.2392146587371826,
      "learning_rate": 0.0005950627043036511,
      "loss": 3.2858,
      "step": 13324
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4537725448608398,
      "learning_rate": 0.0005950619652026418,
      "loss": 3.1589,
      "step": 13325
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4315210580825806,
      "learning_rate": 0.0005950612260467749,
      "loss": 3.0102,
      "step": 13326
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6266140937805176,
      "learning_rate": 0.0005950604868360505,
      "loss": 3.303,
      "step": 13327
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3078200817108154,
      "learning_rate": 0.0005950597475704688,
      "loss": 3.1774,
      "step": 13328
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3904285430908203,
      "learning_rate": 0.0005950590082500299,
      "loss": 3.2386,
      "step": 13329
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5753610134124756,
      "learning_rate": 0.000595058268874734,
      "loss": 3.007,
      "step": 13330
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.462235927581787,
      "learning_rate": 0.0005950575294445811,
      "loss": 3.2519,
      "step": 13331
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4724050760269165,
      "learning_rate": 0.0005950567899595716,
      "loss": 3.1694,
      "step": 13332
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.763505220413208,
      "learning_rate": 0.0005950560504197054,
      "loss": 3.0984,
      "step": 13333
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.832242488861084,
      "learning_rate": 0.0005950553108249827,
      "loss": 3.0844,
      "step": 13334
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.59712815284729,
      "learning_rate": 0.0005950545711754035,
      "loss": 3.3738,
      "step": 13335
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4964025020599365,
      "learning_rate": 0.0005950538314709684,
      "loss": 3.1267,
      "step": 13336
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.544729471206665,
      "learning_rate": 0.000595053091711677,
      "loss": 3.1524,
      "step": 13337
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9216498136520386,
      "learning_rate": 0.0005950523518975297,
      "loss": 3.2928,
      "step": 13338
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5006221532821655,
      "learning_rate": 0.0005950516120285266,
      "loss": 2.9476,
      "step": 13339
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5874966382980347,
      "learning_rate": 0.0005950508721046678,
      "loss": 3.1435,
      "step": 13340
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6935107707977295,
      "learning_rate": 0.0005950501321259536,
      "loss": 3.2846,
      "step": 13341
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.504718542098999,
      "learning_rate": 0.0005950493920923839,
      "loss": 3.3809,
      "step": 13342
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.51287043094635,
      "learning_rate": 0.000595048652003959,
      "loss": 3.2444,
      "step": 13343
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0779528617858887,
      "learning_rate": 0.000595047911860679,
      "loss": 3.2237,
      "step": 13344
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.977658987045288,
      "learning_rate": 0.000595047171662544,
      "loss": 3.3644,
      "step": 13345
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4415605068206787,
      "learning_rate": 0.0005950464314095541,
      "loss": 3.2361,
      "step": 13346
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4591729640960693,
      "learning_rate": 0.0005950456911017096,
      "loss": 3.1641,
      "step": 13347
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.2970104217529297,
      "learning_rate": 0.0005950449507390105,
      "loss": 3.0856,
      "step": 13348
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.685092568397522,
      "learning_rate": 0.000595044210321457,
      "loss": 3.0674,
      "step": 13349
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3882018327713013,
      "learning_rate": 0.0005950434698490492,
      "loss": 3.2763,
      "step": 13350
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9131743907928467,
      "learning_rate": 0.0005950427293217872,
      "loss": 3.2809,
      "step": 13351
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.6983351707458496,
      "learning_rate": 0.0005950419887396711,
      "loss": 3.1402,
      "step": 13352
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6625912189483643,
      "learning_rate": 0.0005950412481027013,
      "loss": 3.2438,
      "step": 13353
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5333993434906006,
      "learning_rate": 0.0005950405074108778,
      "loss": 3.0305,
      "step": 13354
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.4156301021575928,
      "learning_rate": 0.0005950397666642006,
      "loss": 3.1939,
      "step": 13355
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6873764991760254,
      "learning_rate": 0.0005950390258626699,
      "loss": 3.0097,
      "step": 13356
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6757144927978516,
      "learning_rate": 0.0005950382850062858,
      "loss": 3.1758,
      "step": 13357
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5449579954147339,
      "learning_rate": 0.0005950375440950487,
      "loss": 3.2056,
      "step": 13358
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5490293502807617,
      "learning_rate": 0.0005950368031289585,
      "loss": 3.418,
      "step": 13359
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.7759222984313965,
      "learning_rate": 0.0005950360621080153,
      "loss": 3.0294,
      "step": 13360
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.555993676185608,
      "learning_rate": 0.0005950353210322195,
      "loss": 3.1091,
      "step": 13361
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.5677359104156494,
      "learning_rate": 0.0005950345799015709,
      "loss": 3.1938,
      "step": 13362
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.291116237640381,
      "learning_rate": 0.0005950338387160699,
      "loss": 3.0484,
      "step": 13363
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.490846872329712,
      "learning_rate": 0.0005950330974757164,
      "loss": 3.0887,
      "step": 13364
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8701198101043701,
      "learning_rate": 0.0005950323561805109,
      "loss": 3.2152,
      "step": 13365
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0127663612365723,
      "learning_rate": 0.0005950316148304531,
      "loss": 3.1337,
      "step": 13366
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6410547494888306,
      "learning_rate": 0.0005950308734255436,
      "loss": 3.1595,
      "step": 13367
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3988343477249146,
      "learning_rate": 0.000595030131965782,
      "loss": 3.0819,
      "step": 13368
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.9160451889038086,
      "learning_rate": 0.000595029390451169,
      "loss": 3.0668,
      "step": 13369
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.969403624534607,
      "learning_rate": 0.0005950286488817043,
      "loss": 3.203,
      "step": 13370
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.60576331615448,
      "learning_rate": 0.0005950279072573883,
      "loss": 3.2794,
      "step": 13371
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5550743341445923,
      "learning_rate": 0.000595027165578221,
      "loss": 3.2953,
      "step": 13372
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5777254104614258,
      "learning_rate": 0.0005950264238442026,
      "loss": 3.2344,
      "step": 13373
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4108608961105347,
      "learning_rate": 0.0005950256820553331,
      "loss": 3.0341,
      "step": 13374
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.528418779373169,
      "learning_rate": 0.0005950249402116129,
      "loss": 3.0847,
      "step": 13375
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.625953197479248,
      "learning_rate": 0.0005950241983130419,
      "loss": 3.1878,
      "step": 13376
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.528348445892334,
      "learning_rate": 0.0005950234563596203,
      "loss": 2.8804,
      "step": 13377
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7443040609359741,
      "learning_rate": 0.0005950227143513484,
      "loss": 3.2872,
      "step": 13378
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5677322149276733,
      "learning_rate": 0.0005950219722882262,
      "loss": 3.4317,
      "step": 13379
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6003772020339966,
      "learning_rate": 0.0005950212301702538,
      "loss": 3.1457,
      "step": 13380
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.460569143295288,
      "learning_rate": 0.0005950204879974313,
      "loss": 3.1552,
      "step": 13381
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.935839056968689,
      "learning_rate": 0.000595019745769759,
      "loss": 3.1538,
      "step": 13382
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7799837589263916,
      "learning_rate": 0.000595019003487237,
      "loss": 3.0262,
      "step": 13383
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.6278347969055176,
      "learning_rate": 0.0005950182611498653,
      "loss": 3.1914,
      "step": 13384
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0890495777130127,
      "learning_rate": 0.0005950175187576442,
      "loss": 3.298,
      "step": 13385
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.769653558731079,
      "learning_rate": 0.0005950167763105738,
      "loss": 3.2037,
      "step": 13386
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5587654113769531,
      "learning_rate": 0.0005950160338086542,
      "loss": 3.1532,
      "step": 13387
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5370748043060303,
      "learning_rate": 0.0005950152912518855,
      "loss": 3.0503,
      "step": 13388
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.686083197593689,
      "learning_rate": 0.0005950145486402679,
      "loss": 3.1655,
      "step": 13389
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7762210369110107,
      "learning_rate": 0.0005950138059738016,
      "loss": 3.1513,
      "step": 13390
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4807394742965698,
      "learning_rate": 0.0005950130632524866,
      "loss": 3.1084,
      "step": 13391
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.244215965270996,
      "learning_rate": 0.0005950123204763231,
      "loss": 3.0958,
      "step": 13392
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7826817035675049,
      "learning_rate": 0.0005950115776453112,
      "loss": 3.2428,
      "step": 13393
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.473031997680664,
      "learning_rate": 0.0005950108347594512,
      "loss": 2.9845,
      "step": 13394
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7264759540557861,
      "learning_rate": 0.0005950100918187431,
      "loss": 3.2694,
      "step": 13395
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.894309639930725,
      "learning_rate": 0.000595009348823187,
      "loss": 3.3188,
      "step": 13396
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.703102946281433,
      "learning_rate": 0.0005950086057727831,
      "loss": 3.1992,
      "step": 13397
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.7300660610198975,
      "learning_rate": 0.0005950078626675315,
      "loss": 3.0393,
      "step": 13398
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.820838689804077,
      "learning_rate": 0.0005950071195074325,
      "loss": 3.1266,
      "step": 13399
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.856854200363159,
      "learning_rate": 0.000595006376292486,
      "loss": 3.2023,
      "step": 13400
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.028329372406006,
      "learning_rate": 0.0005950056330226922,
      "loss": 3.1155,
      "step": 13401
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.806127667427063,
      "learning_rate": 0.0005950048896980513,
      "loss": 3.1101,
      "step": 13402
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.026625394821167,
      "learning_rate": 0.0005950041463185635,
      "loss": 3.2913,
      "step": 13403
    },
    {
      "epoch": 0.17,
      "grad_norm": 6.1746320724487305,
      "learning_rate": 0.0005950034028842289,
      "loss": 3.0703,
      "step": 13404
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.3255507946014404,
      "learning_rate": 0.0005950026593950474,
      "loss": 3.2241,
      "step": 13405
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.9978482723236084,
      "learning_rate": 0.0005950019158510195,
      "loss": 3.1122,
      "step": 13406
    },
    {
      "epoch": 0.17,
      "grad_norm": 4.671538352966309,
      "learning_rate": 0.0005950011722521452,
      "loss": 3.0353,
      "step": 13407
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.4346702098846436,
      "learning_rate": 0.0005950004285984245,
      "loss": 2.996,
      "step": 13408
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5837111473083496,
      "learning_rate": 0.0005949996848898577,
      "loss": 2.997,
      "step": 13409
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8608613014221191,
      "learning_rate": 0.0005949989411264449,
      "loss": 3.1623,
      "step": 13410
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.5869734287261963,
      "learning_rate": 0.0005949981973081863,
      "loss": 2.9327,
      "step": 13411
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.2164571285247803,
      "learning_rate": 0.0005949974534350818,
      "loss": 3.1321,
      "step": 13412
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.181173324584961,
      "learning_rate": 0.0005949967095071317,
      "loss": 3.0694,
      "step": 13413
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8678375482559204,
      "learning_rate": 0.0005949959655243363,
      "loss": 3.2261,
      "step": 13414
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.159411668777466,
      "learning_rate": 0.0005949952214866956,
      "loss": 3.221,
      "step": 13415
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6912325620651245,
      "learning_rate": 0.0005949944773942096,
      "loss": 2.9047,
      "step": 13416
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4262341260910034,
      "learning_rate": 0.0005949937332468786,
      "loss": 3.1084,
      "step": 13417
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.56919002532959,
      "learning_rate": 0.0005949929890447027,
      "loss": 3.256,
      "step": 13418
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.5159080028533936,
      "learning_rate": 0.000594992244787682,
      "loss": 2.9751,
      "step": 13419
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.261873722076416,
      "learning_rate": 0.0005949915004758167,
      "loss": 3.1697,
      "step": 13420
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.0261387825012207,
      "learning_rate": 0.000594990756109107,
      "loss": 3.1469,
      "step": 13421
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.628835439682007,
      "learning_rate": 0.0005949900116875528,
      "loss": 3.2692,
      "step": 13422
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.1335415840148926,
      "learning_rate": 0.0005949892672111544,
      "loss": 3.0551,
      "step": 13423
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.677268385887146,
      "learning_rate": 0.000594988522679912,
      "loss": 3.0533,
      "step": 13424
    },
    {
      "epoch": 0.17,
      "grad_norm": 3.1462111473083496,
      "learning_rate": 0.0005949877780938256,
      "loss": 2.8785,
      "step": 13425
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.043003797531128,
      "learning_rate": 0.0005949870334528956,
      "loss": 3.0119,
      "step": 13426
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4367988109588623,
      "learning_rate": 0.0005949862887571217,
      "loss": 3.3526,
      "step": 13427
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.6616806983947754,
      "learning_rate": 0.0005949855440065044,
      "loss": 3.1781,
      "step": 13428
    },
    {
      "epoch": 0.17,
      "grad_norm": 2.4757633209228516,
      "learning_rate": 0.0005949847992010436,
      "loss": 2.9246,
      "step": 13429
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4303786754608154,
      "learning_rate": 0.0005949840543407397,
      "loss": 3.2416,
      "step": 13430
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.3757500648498535,
      "learning_rate": 0.0005949833094255926,
      "loss": 3.1131,
      "step": 13431
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.8764415979385376,
      "learning_rate": 0.0005949825644556027,
      "loss": 3.2653,
      "step": 13432
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.342136025428772,
      "learning_rate": 0.0005949818194307697,
      "loss": 3.391,
      "step": 13433
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5901515483856201,
      "learning_rate": 0.0005949810743510941,
      "loss": 3.0452,
      "step": 13434
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.7632131576538086,
      "learning_rate": 0.0005949803292165761,
      "loss": 2.9347,
      "step": 13435
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.805615782737732,
      "learning_rate": 0.0005949795840272156,
      "loss": 2.9707,
      "step": 13436
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.4247914552688599,
      "learning_rate": 0.0005949788387830129,
      "loss": 3.1561,
      "step": 13437
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.2924585342407227,
      "learning_rate": 0.000594978093483968,
      "loss": 3.183,
      "step": 13438
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.927650809288025,
      "learning_rate": 0.0005949773481300811,
      "loss": 3.2354,
      "step": 13439
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.5402100086212158,
      "learning_rate": 0.0005949766027213522,
      "loss": 3.0614,
      "step": 13440
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.2771739959716797,
      "learning_rate": 0.0005949758572577818,
      "loss": 3.0993,
      "step": 13441
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.073577880859375,
      "learning_rate": 0.0005949751117393697,
      "loss": 3.2573,
      "step": 13442
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6416549682617188,
      "learning_rate": 0.0005949743661661163,
      "loss": 3.0351,
      "step": 13443
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9252067804336548,
      "learning_rate": 0.0005949736205380214,
      "loss": 3.1358,
      "step": 13444
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.0293188095092773,
      "learning_rate": 0.0005949728748550854,
      "loss": 3.2417,
      "step": 13445
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6091026067733765,
      "learning_rate": 0.0005949721291173085,
      "loss": 3.2117,
      "step": 13446
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.0865280628204346,
      "learning_rate": 0.0005949713833246906,
      "loss": 2.993,
      "step": 13447
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.611835241317749,
      "learning_rate": 0.0005949706374772319,
      "loss": 3.0299,
      "step": 13448
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7990853786468506,
      "learning_rate": 0.0005949698915749327,
      "loss": 3.2169,
      "step": 13449
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9014358520507812,
      "learning_rate": 0.0005949691456177931,
      "loss": 3.3465,
      "step": 13450
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.0057575702667236,
      "learning_rate": 0.000594968399605813,
      "loss": 3.0891,
      "step": 13451
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8730189800262451,
      "learning_rate": 0.0005949676535389927,
      "loss": 3.0337,
      "step": 13452
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4505789279937744,
      "learning_rate": 0.0005949669074173325,
      "loss": 3.2379,
      "step": 13453
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7121466398239136,
      "learning_rate": 0.0005949661612408323,
      "loss": 3.3374,
      "step": 13454
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.675050139427185,
      "learning_rate": 0.0005949654150094922,
      "loss": 2.9499,
      "step": 13455
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5042152404785156,
      "learning_rate": 0.0005949646687233126,
      "loss": 3.1277,
      "step": 13456
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.0774893760681152,
      "learning_rate": 0.0005949639223822935,
      "loss": 2.7253,
      "step": 13457
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.6191141605377197,
      "learning_rate": 0.0005949631759864351,
      "loss": 3.082,
      "step": 13458
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7847684621810913,
      "learning_rate": 0.0005949624295357374,
      "loss": 3.1697,
      "step": 13459
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.3912746906280518,
      "learning_rate": 0.0005949616830302005,
      "loss": 3.1116,
      "step": 13460
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.9262187480926514,
      "learning_rate": 0.0005949609364698248,
      "loss": 3.0681,
      "step": 13461
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4551262855529785,
      "learning_rate": 0.0005949601898546103,
      "loss": 3.3619,
      "step": 13462
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6149922609329224,
      "learning_rate": 0.0005949594431845571,
      "loss": 3.1924,
      "step": 13463
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4216203689575195,
      "learning_rate": 0.0005949586964596654,
      "loss": 3.0531,
      "step": 13464
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5476939678192139,
      "learning_rate": 0.0005949579496799352,
      "loss": 3.2201,
      "step": 13465
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7616275548934937,
      "learning_rate": 0.0005949572028453668,
      "loss": 3.3064,
      "step": 13466
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5636417865753174,
      "learning_rate": 0.0005949564559559604,
      "loss": 3.2024,
      "step": 13467
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5513970851898193,
      "learning_rate": 0.000594955709011716,
      "loss": 3.063,
      "step": 13468
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4580284357070923,
      "learning_rate": 0.0005949549620126336,
      "loss": 3.3106,
      "step": 13469
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.0949225425720215,
      "learning_rate": 0.0005949542149587137,
      "loss": 2.9974,
      "step": 13470
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.374842643737793,
      "learning_rate": 0.000594953467849956,
      "loss": 2.8269,
      "step": 13471
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.350549340248108,
      "learning_rate": 0.0005949527206863611,
      "loss": 3.1325,
      "step": 13472
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5189764499664307,
      "learning_rate": 0.0005949519734679288,
      "loss": 3.1084,
      "step": 13473
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.2646172046661377,
      "learning_rate": 0.0005949512261946595,
      "loss": 3.3556,
      "step": 13474
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.9963736534118652,
      "learning_rate": 0.0005949504788665531,
      "loss": 2.9807,
      "step": 13475
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3593857288360596,
      "learning_rate": 0.0005949497314836099,
      "loss": 3.1378,
      "step": 13476
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.164529800415039,
      "learning_rate": 0.0005949489840458299,
      "loss": 3.235,
      "step": 13477
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.604172945022583,
      "learning_rate": 0.0005949482365532133,
      "loss": 3.1205,
      "step": 13478
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.744088888168335,
      "learning_rate": 0.0005949474890057603,
      "loss": 3.3144,
      "step": 13479
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5458239316940308,
      "learning_rate": 0.000594946741403471,
      "loss": 3.1805,
      "step": 13480
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4749481678009033,
      "learning_rate": 0.0005949459937463456,
      "loss": 3.2545,
      "step": 13481
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.775951862335205,
      "learning_rate": 0.0005949452460343841,
      "loss": 3.0138,
      "step": 13482
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7362970113754272,
      "learning_rate": 0.0005949444982675867,
      "loss": 3.0483,
      "step": 13483
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.045419692993164,
      "learning_rate": 0.0005949437504459537,
      "loss": 3.0495,
      "step": 13484
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.368807554244995,
      "learning_rate": 0.0005949430025694849,
      "loss": 2.9782,
      "step": 13485
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.785122275352478,
      "learning_rate": 0.0005949422546381807,
      "loss": 2.9239,
      "step": 13486
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7769827842712402,
      "learning_rate": 0.000594941506652041,
      "loss": 3.1909,
      "step": 13487
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.2801003456115723,
      "learning_rate": 0.0005949407586110663,
      "loss": 3.3068,
      "step": 13488
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.817240595817566,
      "learning_rate": 0.0005949400105152566,
      "loss": 3.0758,
      "step": 13489
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5385686159133911,
      "learning_rate": 0.0005949392623646119,
      "loss": 3.4137,
      "step": 13490
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.468379259109497,
      "learning_rate": 0.0005949385141591324,
      "loss": 3.1386,
      "step": 13491
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4456994533538818,
      "learning_rate": 0.0005949377658988182,
      "loss": 3.2068,
      "step": 13492
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.122807025909424,
      "learning_rate": 0.0005949370175836696,
      "loss": 3.2016,
      "step": 13493
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4346867799758911,
      "learning_rate": 0.0005949362692136866,
      "loss": 3.2978,
      "step": 13494
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.101422071456909,
      "learning_rate": 0.0005949355207888695,
      "loss": 2.8865,
      "step": 13495
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.2792844772338867,
      "learning_rate": 0.0005949347723092181,
      "loss": 3.3644,
      "step": 13496
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.744200587272644,
      "learning_rate": 0.0005949340237747329,
      "loss": 3.1173,
      "step": 13497
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4826807975769043,
      "learning_rate": 0.0005949332751854139,
      "loss": 2.8993,
      "step": 13498
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9595870971679688,
      "learning_rate": 0.0005949325265412612,
      "loss": 2.9508,
      "step": 13499
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.2217965126037598,
      "learning_rate": 0.000594931777842275,
      "loss": 3.1144,
      "step": 13500
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4616978168487549,
      "learning_rate": 0.0005949310290884554,
      "loss": 3.0835,
      "step": 13501
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3570359945297241,
      "learning_rate": 0.0005949302802798026,
      "loss": 2.9914,
      "step": 13502
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6014039516448975,
      "learning_rate": 0.0005949295314163166,
      "loss": 3.1384,
      "step": 13503
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.2404582500457764,
      "learning_rate": 0.0005949287824979977,
      "loss": 3.1023,
      "step": 13504
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5424699783325195,
      "learning_rate": 0.000594928033524846,
      "loss": 3.2103,
      "step": 13505
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.45420241355896,
      "learning_rate": 0.0005949272844968616,
      "loss": 2.993,
      "step": 13506
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8541568517684937,
      "learning_rate": 0.0005949265354140446,
      "loss": 3.227,
      "step": 13507
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.404128909111023,
      "learning_rate": 0.0005949257862763952,
      "loss": 2.9747,
      "step": 13508
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7948614358901978,
      "learning_rate": 0.0005949250370839136,
      "loss": 2.9981,
      "step": 13509
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.629382610321045,
      "learning_rate": 0.0005949242878365996,
      "loss": 3.1014,
      "step": 13510
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9901854991912842,
      "learning_rate": 0.0005949235385344539,
      "loss": 3.1768,
      "step": 13511
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4546786546707153,
      "learning_rate": 0.0005949227891774763,
      "loss": 3.3042,
      "step": 13512
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3921363353729248,
      "learning_rate": 0.0005949220397656669,
      "loss": 3.1811,
      "step": 13513
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4417316913604736,
      "learning_rate": 0.000594921290299026,
      "loss": 2.8578,
      "step": 13514
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5269824266433716,
      "learning_rate": 0.0005949205407775537,
      "loss": 3.1998,
      "step": 13515
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9328348636627197,
      "learning_rate": 0.00059491979120125,
      "loss": 3.1828,
      "step": 13516
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.276707887649536,
      "learning_rate": 0.0005949190415701151,
      "loss": 3.1674,
      "step": 13517
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.162382125854492,
      "learning_rate": 0.0005949182918841494,
      "loss": 3.0995,
      "step": 13518
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4780975580215454,
      "learning_rate": 0.0005949175421433526,
      "loss": 3.2114,
      "step": 13519
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6412800550460815,
      "learning_rate": 0.0005949167923477252,
      "loss": 2.9052,
      "step": 13520
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4117149114608765,
      "learning_rate": 0.0005949160424972671,
      "loss": 3.0957,
      "step": 13521
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6450722217559814,
      "learning_rate": 0.0005949152925919785,
      "loss": 3.2514,
      "step": 13522
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5770820379257202,
      "learning_rate": 0.0005949145426318598,
      "loss": 3.1829,
      "step": 13523
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4509003162384033,
      "learning_rate": 0.0005949137926169109,
      "loss": 3.4363,
      "step": 13524
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6634916067123413,
      "learning_rate": 0.0005949130425471317,
      "loss": 3.3212,
      "step": 13525
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7264087200164795,
      "learning_rate": 0.0005949122924225228,
      "loss": 3.2039,
      "step": 13526
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4085235595703125,
      "learning_rate": 0.000594911542243084,
      "loss": 3.0833,
      "step": 13527
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6502748727798462,
      "learning_rate": 0.0005949107920088158,
      "loss": 3.4083,
      "step": 13528
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8150368928909302,
      "learning_rate": 0.000594910041719718,
      "loss": 3.2296,
      "step": 13529
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9519904851913452,
      "learning_rate": 0.0005949092913757908,
      "loss": 3.3029,
      "step": 13530
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4181544780731201,
      "learning_rate": 0.0005949085409770344,
      "loss": 3.2021,
      "step": 13531
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3489969968795776,
      "learning_rate": 0.000594907790523449,
      "loss": 3.316,
      "step": 13532
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6713618040084839,
      "learning_rate": 0.0005949070400150347,
      "loss": 3.2232,
      "step": 13533
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3384876251220703,
      "learning_rate": 0.0005949062894517915,
      "loss": 3.0155,
      "step": 13534
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2219656705856323,
      "learning_rate": 0.0005949055388337198,
      "loss": 3.1918,
      "step": 13535
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4485219717025757,
      "learning_rate": 0.0005949047881608194,
      "loss": 3.1729,
      "step": 13536
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8558995723724365,
      "learning_rate": 0.0005949040374330907,
      "loss": 3.1464,
      "step": 13537
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5651164054870605,
      "learning_rate": 0.0005949032866505339,
      "loss": 3.1606,
      "step": 13538
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5398873090744019,
      "learning_rate": 0.0005949025358131489,
      "loss": 3.215,
      "step": 13539
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7478370666503906,
      "learning_rate": 0.0005949017849209361,
      "loss": 2.9085,
      "step": 13540
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5855263471603394,
      "learning_rate": 0.0005949010339738953,
      "loss": 2.7522,
      "step": 13541
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7324416637420654,
      "learning_rate": 0.0005949002829720269,
      "loss": 2.9707,
      "step": 13542
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8865282535552979,
      "learning_rate": 0.000594899531915331,
      "loss": 3.1273,
      "step": 13543
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.704841136932373,
      "learning_rate": 0.0005948987808038076,
      "loss": 3.1769,
      "step": 13544
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5245200395584106,
      "learning_rate": 0.0005948980296374572,
      "loss": 2.8842,
      "step": 13545
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.1230075359344482,
      "learning_rate": 0.0005948972784162794,
      "loss": 2.8348,
      "step": 13546
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7765074968338013,
      "learning_rate": 0.0005948965271402747,
      "loss": 3.3247,
      "step": 13547
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3602739572525024,
      "learning_rate": 0.0005948957758094433,
      "loss": 3.0139,
      "step": 13548
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.214895486831665,
      "learning_rate": 0.0005948950244237851,
      "loss": 3.4181,
      "step": 13549
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.0820670127868652,
      "learning_rate": 0.0005948942729833005,
      "loss": 2.8256,
      "step": 13550
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7168588638305664,
      "learning_rate": 0.0005948935214879893,
      "loss": 3.4328,
      "step": 13551
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8340177536010742,
      "learning_rate": 0.0005948927699378518,
      "loss": 3.0549,
      "step": 13552
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.568499207496643,
      "learning_rate": 0.0005948920183328883,
      "loss": 3.1233,
      "step": 13553
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3718388080596924,
      "learning_rate": 0.0005948912666730988,
      "loss": 3.1279,
      "step": 13554
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.231844425201416,
      "learning_rate": 0.0005948905149584833,
      "loss": 3.2616,
      "step": 13555
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9272713661193848,
      "learning_rate": 0.0005948897631890423,
      "loss": 3.2685,
      "step": 13556
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5184357166290283,
      "learning_rate": 0.0005948890113647756,
      "loss": 3.1825,
      "step": 13557
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.671406865119934,
      "learning_rate": 0.0005948882594856835,
      "loss": 3.2589,
      "step": 13558
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7278918027877808,
      "learning_rate": 0.0005948875075517661,
      "loss": 3.2551,
      "step": 13559
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5204213857650757,
      "learning_rate": 0.0005948867555630236,
      "loss": 3.3539,
      "step": 13560
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5691523551940918,
      "learning_rate": 0.000594886003519456,
      "loss": 3.2667,
      "step": 13561
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3470734357833862,
      "learning_rate": 0.0005948852514210636,
      "loss": 3.1454,
      "step": 13562
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7808746099472046,
      "learning_rate": 0.0005948844992678464,
      "loss": 3.2536,
      "step": 13563
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2970056533813477,
      "learning_rate": 0.0005948837470598047,
      "loss": 3.2171,
      "step": 13564
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6009843349456787,
      "learning_rate": 0.0005948829947969384,
      "loss": 3.0229,
      "step": 13565
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.381303310394287,
      "learning_rate": 0.0005948822424792479,
      "loss": 3.0597,
      "step": 13566
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8810964822769165,
      "learning_rate": 0.0005948814901067332,
      "loss": 3.2055,
      "step": 13567
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.376665711402893,
      "learning_rate": 0.0005948807376793945,
      "loss": 3.0919,
      "step": 13568
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6224604845046997,
      "learning_rate": 0.0005948799851972317,
      "loss": 3.1578,
      "step": 13569
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3475964069366455,
      "learning_rate": 0.0005948792326602453,
      "loss": 3.2172,
      "step": 13570
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.495332956314087,
      "learning_rate": 0.0005948784800684354,
      "loss": 3.0959,
      "step": 13571
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3489054441452026,
      "learning_rate": 0.0005948777274218018,
      "loss": 3.0834,
      "step": 13572
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.401410698890686,
      "learning_rate": 0.0005948769747203451,
      "loss": 3.3085,
      "step": 13573
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6617729663848877,
      "learning_rate": 0.0005948762219640649,
      "loss": 3.2911,
      "step": 13574
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2183959484100342,
      "learning_rate": 0.0005948754691529618,
      "loss": 3.0112,
      "step": 13575
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.527121901512146,
      "learning_rate": 0.0005948747162870359,
      "loss": 3.0972,
      "step": 13576
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.0361995697021484,
      "learning_rate": 0.0005948739633662871,
      "loss": 3.1905,
      "step": 13577
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.478853702545166,
      "learning_rate": 0.0005948732103907157,
      "loss": 3.1504,
      "step": 13578
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.5396225452423096,
      "learning_rate": 0.0005948724573603218,
      "loss": 3.1467,
      "step": 13579
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.447540521621704,
      "learning_rate": 0.0005948717042751056,
      "loss": 3.1504,
      "step": 13580
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.427123785018921,
      "learning_rate": 0.0005948709511350671,
      "loss": 3.1624,
      "step": 13581
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5184626579284668,
      "learning_rate": 0.0005948701979402065,
      "loss": 3.1572,
      "step": 13582
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.512898564338684,
      "learning_rate": 0.000594869444690524,
      "loss": 2.9575,
      "step": 13583
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6567270755767822,
      "learning_rate": 0.0005948686913860197,
      "loss": 2.9276,
      "step": 13584
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2558039426803589,
      "learning_rate": 0.0005948679380266938,
      "loss": 3.3667,
      "step": 13585
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6516718864440918,
      "learning_rate": 0.0005948671846125464,
      "loss": 3.4651,
      "step": 13586
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.436879277229309,
      "learning_rate": 0.0005948664311435775,
      "loss": 3.3728,
      "step": 13587
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4308981895446777,
      "learning_rate": 0.0005948656776197875,
      "loss": 3.2019,
      "step": 13588
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6788432598114014,
      "learning_rate": 0.0005948649240411763,
      "loss": 3.2516,
      "step": 13589
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4524279832839966,
      "learning_rate": 0.0005948641704077442,
      "loss": 3.1057,
      "step": 13590
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4919931888580322,
      "learning_rate": 0.0005948634167194913,
      "loss": 3.1724,
      "step": 13591
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.580392599105835,
      "learning_rate": 0.0005948626629764176,
      "loss": 3.329,
      "step": 13592
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.678086519241333,
      "learning_rate": 0.0005948619091785235,
      "loss": 3.104,
      "step": 13593
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5384007692337036,
      "learning_rate": 0.000594861155325809,
      "loss": 3.335,
      "step": 13594
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5045572519302368,
      "learning_rate": 0.0005948604014182742,
      "loss": 3.1205,
      "step": 13595
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5535250902175903,
      "learning_rate": 0.0005948596474559193,
      "loss": 3.2376,
      "step": 13596
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3842365741729736,
      "learning_rate": 0.0005948588934387444,
      "loss": 3.1692,
      "step": 13597
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5300153493881226,
      "learning_rate": 0.0005948581393667497,
      "loss": 2.9843,
      "step": 13598
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.1680994033813477,
      "learning_rate": 0.0005948573852399353,
      "loss": 2.9262,
      "step": 13599
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9822343587875366,
      "learning_rate": 0.0005948566310583013,
      "loss": 2.9765,
      "step": 13600
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4551033973693848,
      "learning_rate": 0.0005948558768218479,
      "loss": 3.0349,
      "step": 13601
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.380514144897461,
      "learning_rate": 0.0005948551225305753,
      "loss": 3.1975,
      "step": 13602
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.122055768966675,
      "learning_rate": 0.0005948543681844835,
      "loss": 3.3282,
      "step": 13603
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5769450664520264,
      "learning_rate": 0.0005948536137835727,
      "loss": 3.2435,
      "step": 13604
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.3557417392730713,
      "learning_rate": 0.000594852859327843,
      "loss": 2.9006,
      "step": 13605
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.067084789276123,
      "learning_rate": 0.0005948521048172948,
      "loss": 3.1374,
      "step": 13606
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7781521081924438,
      "learning_rate": 0.0005948513502519278,
      "loss": 3.4961,
      "step": 13607
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.542808175086975,
      "learning_rate": 0.0005948505956317426,
      "loss": 3.1241,
      "step": 13608
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.24186372756958,
      "learning_rate": 0.0005948498409567389,
      "loss": 2.9315,
      "step": 13609
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.5649149417877197,
      "learning_rate": 0.0005948490862269171,
      "loss": 3.1717,
      "step": 13610
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6908444166183472,
      "learning_rate": 0.0005948483314422773,
      "loss": 3.3013,
      "step": 13611
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5602619647979736,
      "learning_rate": 0.0005948475766028197,
      "loss": 3.1424,
      "step": 13612
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.418830394744873,
      "learning_rate": 0.0005948468217085442,
      "loss": 3.2892,
      "step": 13613
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.051938772201538,
      "learning_rate": 0.0005948460667594512,
      "loss": 3.2897,
      "step": 13614
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.977716326713562,
      "learning_rate": 0.0005948453117555407,
      "loss": 3.3117,
      "step": 13615
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.437885046005249,
      "learning_rate": 0.0005948445566968131,
      "loss": 3.1123,
      "step": 13616
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.7115085124969482,
      "learning_rate": 0.0005948438015832681,
      "loss": 3.1456,
      "step": 13617
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.6897225379943848,
      "learning_rate": 0.0005948430464149062,
      "loss": 2.9725,
      "step": 13618
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7464247941970825,
      "learning_rate": 0.0005948422911917274,
      "loss": 3.2581,
      "step": 13619
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.034003496170044,
      "learning_rate": 0.0005948415359137318,
      "loss": 2.6729,
      "step": 13620
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.434274673461914,
      "learning_rate": 0.0005948407805809196,
      "loss": 3.1316,
      "step": 13621
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4597020149230957,
      "learning_rate": 0.0005948400251932909,
      "loss": 3.1159,
      "step": 13622
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4154220819473267,
      "learning_rate": 0.0005948392697508459,
      "loss": 3.2987,
      "step": 13623
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.2839417457580566,
      "learning_rate": 0.0005948385142535848,
      "loss": 2.9407,
      "step": 13624
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.024162530899048,
      "learning_rate": 0.0005948377587015076,
      "loss": 3.1979,
      "step": 13625
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6538408994674683,
      "learning_rate": 0.0005948370030946145,
      "loss": 3.1355,
      "step": 13626
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.7362284660339355,
      "learning_rate": 0.0005948362474329056,
      "loss": 3.1724,
      "step": 13627
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8664311170578003,
      "learning_rate": 0.000594835491716381,
      "loss": 3.3208,
      "step": 13628
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8715236186981201,
      "learning_rate": 0.0005948347359450412,
      "loss": 3.1811,
      "step": 13629
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3510841131210327,
      "learning_rate": 0.0005948339801188857,
      "loss": 3.1723,
      "step": 13630
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.1346168518066406,
      "learning_rate": 0.0005948332242379153,
      "loss": 3.1278,
      "step": 13631
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.725496768951416,
      "learning_rate": 0.0005948324683021297,
      "loss": 3.2237,
      "step": 13632
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4219356775283813,
      "learning_rate": 0.0005948317123115292,
      "loss": 3.2668,
      "step": 13633
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6404613256454468,
      "learning_rate": 0.0005948309562661138,
      "loss": 3.428,
      "step": 13634
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5167659521102905,
      "learning_rate": 0.0005948302001658839,
      "loss": 3.3051,
      "step": 13635
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.460601806640625,
      "learning_rate": 0.0005948294440108394,
      "loss": 3.3476,
      "step": 13636
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8318076133728027,
      "learning_rate": 0.0005948286878009806,
      "loss": 3.0481,
      "step": 13637
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.562860369682312,
      "learning_rate": 0.0005948279315363075,
      "loss": 3.402,
      "step": 13638
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6010771989822388,
      "learning_rate": 0.0005948271752168206,
      "loss": 3.4547,
      "step": 13639
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7790935039520264,
      "learning_rate": 0.0005948264188425195,
      "loss": 3.0903,
      "step": 13640
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7851310968399048,
      "learning_rate": 0.0005948256624134047,
      "loss": 3.1169,
      "step": 13641
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.235424518585205,
      "learning_rate": 0.0005948249059294763,
      "loss": 2.7788,
      "step": 13642
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6445281505584717,
      "learning_rate": 0.0005948241493907342,
      "loss": 3.2109,
      "step": 13643
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6198127269744873,
      "learning_rate": 0.0005948233927971789,
      "loss": 3.2318,
      "step": 13644
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3804352283477783,
      "learning_rate": 0.0005948226361488103,
      "loss": 3.484,
      "step": 13645
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5707210302352905,
      "learning_rate": 0.0005948218794456286,
      "loss": 2.9973,
      "step": 13646
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6669334173202515,
      "learning_rate": 0.0005948211226876339,
      "loss": 3.15,
      "step": 13647
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9601161479949951,
      "learning_rate": 0.0005948203658748264,
      "loss": 3.1964,
      "step": 13648
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5775068998336792,
      "learning_rate": 0.0005948196090072063,
      "loss": 3.2965,
      "step": 13649
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3272539377212524,
      "learning_rate": 0.0005948188520847736,
      "loss": 2.889,
      "step": 13650
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5205028057098389,
      "learning_rate": 0.0005948180951075286,
      "loss": 3.4148,
      "step": 13651
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4746956825256348,
      "learning_rate": 0.0005948173380754714,
      "loss": 3.3913,
      "step": 13652
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4633675813674927,
      "learning_rate": 0.0005948165809886018,
      "loss": 3.2785,
      "step": 13653
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4883664846420288,
      "learning_rate": 0.0005948158238469205,
      "loss": 3.1581,
      "step": 13654
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7282886505126953,
      "learning_rate": 0.0005948150666504273,
      "loss": 3.3573,
      "step": 13655
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4556525945663452,
      "learning_rate": 0.0005948143093991224,
      "loss": 3.1326,
      "step": 13656
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8811578750610352,
      "learning_rate": 0.0005948135520930059,
      "loss": 3.0064,
      "step": 13657
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4361968040466309,
      "learning_rate": 0.0005948127947320781,
      "loss": 3.334,
      "step": 13658
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.191033363342285,
      "learning_rate": 0.000594812037316339,
      "loss": 3.3219,
      "step": 13659
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9589476585388184,
      "learning_rate": 0.0005948112798457888,
      "loss": 3.208,
      "step": 13660
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3900541067123413,
      "learning_rate": 0.0005948105223204277,
      "loss": 3.0911,
      "step": 13661
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.570535659790039,
      "learning_rate": 0.0005948097647402556,
      "loss": 3.137,
      "step": 13662
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5641789436340332,
      "learning_rate": 0.0005948090071052728,
      "loss": 3.1419,
      "step": 13663
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3769422769546509,
      "learning_rate": 0.0005948082494154795,
      "loss": 2.9957,
      "step": 13664
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6481389999389648,
      "learning_rate": 0.0005948074916708758,
      "loss": 3.1291,
      "step": 13665
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.0044310092926025,
      "learning_rate": 0.0005948067338714618,
      "loss": 2.8023,
      "step": 13666
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6340750455856323,
      "learning_rate": 0.0005948059760172376,
      "loss": 3.3821,
      "step": 13667
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5428259372711182,
      "learning_rate": 0.0005948052181082035,
      "loss": 3.0596,
      "step": 13668
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.508253812789917,
      "learning_rate": 0.0005948044601443595,
      "loss": 3.2235,
      "step": 13669
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.496214509010315,
      "learning_rate": 0.0005948037021257058,
      "loss": 3.1487,
      "step": 13670
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4684432744979858,
      "learning_rate": 0.0005948029440522425,
      "loss": 3.144,
      "step": 13671
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4157216548919678,
      "learning_rate": 0.0005948021859239698,
      "loss": 3.0286,
      "step": 13672
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4211865663528442,
      "learning_rate": 0.0005948014277408878,
      "loss": 2.9999,
      "step": 13673
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.471448540687561,
      "learning_rate": 0.0005948006695029966,
      "loss": 3.0092,
      "step": 13674
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4578728675842285,
      "learning_rate": 0.0005947999112102964,
      "loss": 3.3979,
      "step": 13675
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9472001791000366,
      "learning_rate": 0.0005947991528627874,
      "loss": 3.1727,
      "step": 13676
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5359474420547485,
      "learning_rate": 0.0005947983944604696,
      "loss": 2.9835,
      "step": 13677
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5395992994308472,
      "learning_rate": 0.0005947976360033433,
      "loss": 3.3688,
      "step": 13678
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4867490530014038,
      "learning_rate": 0.0005947968774914086,
      "loss": 3.0028,
      "step": 13679
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6164659261703491,
      "learning_rate": 0.0005947961189246654,
      "loss": 3.2518,
      "step": 13680
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6218748092651367,
      "learning_rate": 0.0005947953603031141,
      "loss": 3.2334,
      "step": 13681
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8628523349761963,
      "learning_rate": 0.0005947946016267549,
      "loss": 3.2356,
      "step": 13682
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.352921485900879,
      "learning_rate": 0.0005947938428955878,
      "loss": 3.2103,
      "step": 13683
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.0212454795837402,
      "learning_rate": 0.0005947930841096129,
      "loss": 2.9803,
      "step": 13684
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.091897964477539,
      "learning_rate": 0.0005947923252688303,
      "loss": 3.0417,
      "step": 13685
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.187312602996826,
      "learning_rate": 0.0005947915663732404,
      "loss": 3.1638,
      "step": 13686
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.683039903640747,
      "learning_rate": 0.0005947908074228431,
      "loss": 3.2841,
      "step": 13687
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6753172874450684,
      "learning_rate": 0.0005947900484176386,
      "loss": 2.9114,
      "step": 13688
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6773918867111206,
      "learning_rate": 0.0005947892893576271,
      "loss": 3.0433,
      "step": 13689
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5833168029785156,
      "learning_rate": 0.0005947885302428088,
      "loss": 3.3626,
      "step": 13690
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2993205785751343,
      "learning_rate": 0.0005947877710731837,
      "loss": 3.381,
      "step": 13691
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.75348961353302,
      "learning_rate": 0.0005947870118487518,
      "loss": 3.0616,
      "step": 13692
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9132996797561646,
      "learning_rate": 0.0005947862525695137,
      "loss": 2.8348,
      "step": 13693
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.733092188835144,
      "learning_rate": 0.0005947854932354692,
      "loss": 3.3294,
      "step": 13694
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.8200972080230713,
      "learning_rate": 0.0005947847338466185,
      "loss": 3.1385,
      "step": 13695
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.2026944160461426,
      "learning_rate": 0.0005947839744029616,
      "loss": 2.9146,
      "step": 13696
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3881770372390747,
      "learning_rate": 0.000594783214904499,
      "loss": 2.9722,
      "step": 13697
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6996837854385376,
      "learning_rate": 0.0005947824553512305,
      "loss": 3.2038,
      "step": 13698
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9352141618728638,
      "learning_rate": 0.0005947816957431564,
      "loss": 2.9644,
      "step": 13699
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.1718544960021973,
      "learning_rate": 0.0005947809360802769,
      "loss": 3.0841,
      "step": 13700
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4525654315948486,
      "learning_rate": 0.000594780176362592,
      "loss": 2.981,
      "step": 13701
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4305508136749268,
      "learning_rate": 0.000594779416590102,
      "loss": 2.8908,
      "step": 13702
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.7944977283477783,
      "learning_rate": 0.0005947786567628068,
      "loss": 3.1045,
      "step": 13703
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.273843765258789,
      "learning_rate": 0.0005947778968807067,
      "loss": 3.165,
      "step": 13704
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.455976724624634,
      "learning_rate": 0.0005947771369438019,
      "loss": 3.1185,
      "step": 13705
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.188782215118408,
      "learning_rate": 0.0005947763769520925,
      "loss": 3.0574,
      "step": 13706
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6751059293746948,
      "learning_rate": 0.0005947756169055785,
      "loss": 3.212,
      "step": 13707
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.140900135040283,
      "learning_rate": 0.0005947748568042602,
      "loss": 3.2004,
      "step": 13708
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6703901290893555,
      "learning_rate": 0.0005947740966481376,
      "loss": 3.0974,
      "step": 13709
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5103338956832886,
      "learning_rate": 0.0005947733364372109,
      "loss": 3.2894,
      "step": 13710
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3728079795837402,
      "learning_rate": 0.0005947725761714805,
      "loss": 3.1168,
      "step": 13711
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.2055773735046387,
      "learning_rate": 0.0005947718158509462,
      "loss": 2.9291,
      "step": 13712
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5354511737823486,
      "learning_rate": 0.0005947710554756083,
      "loss": 3.087,
      "step": 13713
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4990296363830566,
      "learning_rate": 0.0005947702950454668,
      "loss": 3.2794,
      "step": 13714
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6328966617584229,
      "learning_rate": 0.0005947695345605219,
      "loss": 2.8671,
      "step": 13715
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.648850440979004,
      "learning_rate": 0.0005947687740207739,
      "loss": 3.4261,
      "step": 13716
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6278434991836548,
      "learning_rate": 0.0005947680134262228,
      "loss": 3.2418,
      "step": 13717
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.0324881076812744,
      "learning_rate": 0.0005947672527768687,
      "loss": 3.0472,
      "step": 13718
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8571890592575073,
      "learning_rate": 0.0005947664920727118,
      "loss": 3.1522,
      "step": 13719
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.71615469455719,
      "learning_rate": 0.0005947657313137524,
      "loss": 3.2427,
      "step": 13720
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9011001586914062,
      "learning_rate": 0.0005947649704999904,
      "loss": 3.1021,
      "step": 13721
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.3667619228363037,
      "learning_rate": 0.000594764209631426,
      "loss": 2.9641,
      "step": 13722
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5123658180236816,
      "learning_rate": 0.0005947634487080593,
      "loss": 2.981,
      "step": 13723
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5919549465179443,
      "learning_rate": 0.0005947626877298906,
      "loss": 3.056,
      "step": 13724
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5852274894714355,
      "learning_rate": 0.0005947619266969199,
      "loss": 3.0474,
      "step": 13725
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3814289569854736,
      "learning_rate": 0.0005947611656091475,
      "loss": 3.2142,
      "step": 13726
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8274297714233398,
      "learning_rate": 0.0005947604044665732,
      "loss": 3.011,
      "step": 13727
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3984471559524536,
      "learning_rate": 0.0005947596432691977,
      "loss": 3.267,
      "step": 13728
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.832713007926941,
      "learning_rate": 0.0005947588820170206,
      "loss": 2.9762,
      "step": 13729
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.696047067642212,
      "learning_rate": 0.0005947581207100424,
      "loss": 3.0966,
      "step": 13730
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.549586534500122,
      "learning_rate": 0.000594757359348263,
      "loss": 3.0036,
      "step": 13731
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.999340057373047,
      "learning_rate": 0.0005947565979316827,
      "loss": 3.1363,
      "step": 13732
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6201308965682983,
      "learning_rate": 0.0005947558364603015,
      "loss": 3.0197,
      "step": 13733
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.337578296661377,
      "learning_rate": 0.0005947550749341197,
      "loss": 3.2413,
      "step": 13734
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.823235511779785,
      "learning_rate": 0.0005947543133531373,
      "loss": 3.0975,
      "step": 13735
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6210594177246094,
      "learning_rate": 0.0005947535517173546,
      "loss": 3.2114,
      "step": 13736
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5681384801864624,
      "learning_rate": 0.0005947527900267716,
      "loss": 3.1315,
      "step": 13737
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.568589448928833,
      "learning_rate": 0.0005947520282813886,
      "loss": 3.0323,
      "step": 13738
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9210890531539917,
      "learning_rate": 0.0005947512664812054,
      "loss": 2.8808,
      "step": 13739
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5231726169586182,
      "learning_rate": 0.0005947505046262224,
      "loss": 3.058,
      "step": 13740
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.082564353942871,
      "learning_rate": 0.0005947497427164399,
      "loss": 3.093,
      "step": 13741
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4135862588882446,
      "learning_rate": 0.0005947489807518577,
      "loss": 2.9157,
      "step": 13742
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7060433626174927,
      "learning_rate": 0.0005947482187324763,
      "loss": 3.1514,
      "step": 13743
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.629359245300293,
      "learning_rate": 0.0005947474566582955,
      "loss": 3.0372,
      "step": 13744
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.729122519493103,
      "learning_rate": 0.0005947466945293156,
      "loss": 3.2901,
      "step": 13745
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3709567785263062,
      "learning_rate": 0.0005947459323455368,
      "loss": 3.1879,
      "step": 13746
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3508800268173218,
      "learning_rate": 0.000594745170106959,
      "loss": 3.3026,
      "step": 13747
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3658522367477417,
      "learning_rate": 0.0005947444078135826,
      "loss": 3.1133,
      "step": 13748
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5057417154312134,
      "learning_rate": 0.0005947436454654077,
      "loss": 2.9941,
      "step": 13749
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4007817506790161,
      "learning_rate": 0.0005947428830624344,
      "loss": 3.167,
      "step": 13750
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7235993146896362,
      "learning_rate": 0.0005947421206046628,
      "loss": 3.1567,
      "step": 13751
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.581915020942688,
      "learning_rate": 0.0005947413580920931,
      "loss": 2.9768,
      "step": 13752
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.249276876449585,
      "learning_rate": 0.0005947405955247254,
      "loss": 2.9477,
      "step": 13753
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.463289976119995,
      "learning_rate": 0.0005947398329025598,
      "loss": 3.0395,
      "step": 13754
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8905595541000366,
      "learning_rate": 0.0005947390702255966,
      "loss": 3.0904,
      "step": 13755
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.527888774871826,
      "learning_rate": 0.0005947383074938357,
      "loss": 3.0429,
      "step": 13756
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8561419248580933,
      "learning_rate": 0.0005947375447072776,
      "loss": 3.1365,
      "step": 13757
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.261304259300232,
      "learning_rate": 0.000594736781865922,
      "loss": 3.0594,
      "step": 13758
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4212015867233276,
      "learning_rate": 0.0005947360189697693,
      "loss": 2.8921,
      "step": 13759
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5923347473144531,
      "learning_rate": 0.0005947352560188197,
      "loss": 3.1375,
      "step": 13760
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6108908653259277,
      "learning_rate": 0.0005947344930130733,
      "loss": 2.9593,
      "step": 13761
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4372223615646362,
      "learning_rate": 0.0005947337299525301,
      "loss": 3.1068,
      "step": 13762
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5306159257888794,
      "learning_rate": 0.0005947329668371903,
      "loss": 3.1094,
      "step": 13763
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4555714130401611,
      "learning_rate": 0.0005947322036670542,
      "loss": 3.0789,
      "step": 13764
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6289747953414917,
      "learning_rate": 0.0005947314404421218,
      "loss": 3.1391,
      "step": 13765
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.772884488105774,
      "learning_rate": 0.0005947306771623931,
      "loss": 2.9534,
      "step": 13766
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8808445930480957,
      "learning_rate": 0.0005947299138278686,
      "loss": 3.1895,
      "step": 13767
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6082991361618042,
      "learning_rate": 0.0005947291504385482,
      "loss": 3.0023,
      "step": 13768
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6481245756149292,
      "learning_rate": 0.000594728386994432,
      "loss": 3.0771,
      "step": 13769
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.781195878982544,
      "learning_rate": 0.0005947276234955203,
      "loss": 3.4282,
      "step": 13770
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.575601577758789,
      "learning_rate": 0.0005947268599418131,
      "loss": 3.1675,
      "step": 13771
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7869852781295776,
      "learning_rate": 0.0005947260963333108,
      "loss": 3.3869,
      "step": 13772
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.59783136844635,
      "learning_rate": 0.0005947253326700131,
      "loss": 3.3798,
      "step": 13773
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.935829997062683,
      "learning_rate": 0.0005947245689519205,
      "loss": 3.1443,
      "step": 13774
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4963035583496094,
      "learning_rate": 0.0005947238051790332,
      "loss": 3.0066,
      "step": 13775
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4574954509735107,
      "learning_rate": 0.0005947230413513511,
      "loss": 2.8987,
      "step": 13776
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6432009935379028,
      "learning_rate": 0.0005947222774688743,
      "loss": 3.1154,
      "step": 13777
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6389950513839722,
      "learning_rate": 0.0005947215135316031,
      "loss": 3.1664,
      "step": 13778
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6169822216033936,
      "learning_rate": 0.0005947207495395376,
      "loss": 3.0898,
      "step": 13779
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7148370742797852,
      "learning_rate": 0.0005947199854926781,
      "loss": 3.3197,
      "step": 13780
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4533965587615967,
      "learning_rate": 0.0005947192213910244,
      "loss": 2.9646,
      "step": 13781
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5257360935211182,
      "learning_rate": 0.000594718457234577,
      "loss": 3.2355,
      "step": 13782
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4219878911972046,
      "learning_rate": 0.0005947176930233358,
      "loss": 3.3306,
      "step": 13783
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.277515411376953,
      "learning_rate": 0.0005947169287573009,
      "loss": 3.1663,
      "step": 13784
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4781850576400757,
      "learning_rate": 0.0005947161644364727,
      "loss": 3.1251,
      "step": 13785
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5705863237380981,
      "learning_rate": 0.0005947154000608512,
      "loss": 3.0436,
      "step": 13786
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4738777875900269,
      "learning_rate": 0.0005947146356304365,
      "loss": 3.1795,
      "step": 13787
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.972184658050537,
      "learning_rate": 0.0005947138711452287,
      "loss": 3.445,
      "step": 13788
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.780014157295227,
      "learning_rate": 0.0005947131066052282,
      "loss": 3.1191,
      "step": 13789
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5697052478790283,
      "learning_rate": 0.0005947123420104349,
      "loss": 3.0572,
      "step": 13790
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.1094765663146973,
      "learning_rate": 0.0005947115773608491,
      "loss": 3.0612,
      "step": 13791
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.159893751144409,
      "learning_rate": 0.0005947108126564707,
      "loss": 3.1849,
      "step": 13792
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6538105010986328,
      "learning_rate": 0.0005947100478973001,
      "loss": 3.1527,
      "step": 13793
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.861389398574829,
      "learning_rate": 0.0005947092830833373,
      "loss": 2.9386,
      "step": 13794
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4079548120498657,
      "learning_rate": 0.0005947085182145825,
      "loss": 3.2104,
      "step": 13795
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.488122820854187,
      "learning_rate": 0.0005947077532910358,
      "loss": 3.1031,
      "step": 13796
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.382276177406311,
      "learning_rate": 0.0005947069883126973,
      "loss": 3.0652,
      "step": 13797
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4901959896087646,
      "learning_rate": 0.0005947062232795672,
      "loss": 3.3753,
      "step": 13798
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.629349708557129,
      "learning_rate": 0.0005947054581916458,
      "loss": 3.0735,
      "step": 13799
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3487606048583984,
      "learning_rate": 0.0005947046930489331,
      "loss": 3.2893,
      "step": 13800
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.008300304412842,
      "learning_rate": 0.0005947039278514291,
      "loss": 3.1182,
      "step": 13801
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9007230997085571,
      "learning_rate": 0.0005947031625991342,
      "loss": 3.1027,
      "step": 13802
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.673891544342041,
      "learning_rate": 0.0005947023972920482,
      "loss": 3.4499,
      "step": 13803
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9742324352264404,
      "learning_rate": 0.0005947016319301717,
      "loss": 3.0508,
      "step": 13804
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4614633321762085,
      "learning_rate": 0.0005947008665135046,
      "loss": 3.1977,
      "step": 13805
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.1823673248291016,
      "learning_rate": 0.0005947001010420469,
      "loss": 2.8485,
      "step": 13806
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6386984586715698,
      "learning_rate": 0.0005946993355157989,
      "loss": 3.3165,
      "step": 13807
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5110796689987183,
      "learning_rate": 0.0005946985699347608,
      "loss": 3.1369,
      "step": 13808
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8563933372497559,
      "learning_rate": 0.0005946978042989326,
      "loss": 3.3036,
      "step": 13809
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4986333847045898,
      "learning_rate": 0.0005946970386083146,
      "loss": 3.0273,
      "step": 13810
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5721514225006104,
      "learning_rate": 0.0005946962728629069,
      "loss": 3.18,
      "step": 13811
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3146545886993408,
      "learning_rate": 0.0005946955070627095,
      "loss": 3.0683,
      "step": 13812
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3732762336730957,
      "learning_rate": 0.0005946947412077226,
      "loss": 3.3786,
      "step": 13813
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.740606427192688,
      "learning_rate": 0.0005946939752979464,
      "loss": 2.8303,
      "step": 13814
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4155267477035522,
      "learning_rate": 0.0005946932093333812,
      "loss": 2.904,
      "step": 13815
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.032616376876831,
      "learning_rate": 0.0005946924433140269,
      "loss": 3.3625,
      "step": 13816
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9655240774154663,
      "learning_rate": 0.0005946916772398836,
      "loss": 3.28,
      "step": 13817
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6389166116714478,
      "learning_rate": 0.0005946909111109517,
      "loss": 3.2961,
      "step": 13818
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.025986433029175,
      "learning_rate": 0.0005946901449272311,
      "loss": 3.2,
      "step": 13819
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.0899155139923096,
      "learning_rate": 0.000594689378688722,
      "loss": 3.0274,
      "step": 13820
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7546390295028687,
      "learning_rate": 0.0005946886123954247,
      "loss": 3.051,
      "step": 13821
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4397910833358765,
      "learning_rate": 0.0005946878460473391,
      "loss": 3.2348,
      "step": 13822
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.113245725631714,
      "learning_rate": 0.0005946870796444655,
      "loss": 3.0865,
      "step": 13823
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.0549049377441406,
      "learning_rate": 0.000594686313186804,
      "loss": 3.0903,
      "step": 13824
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.0097553730010986,
      "learning_rate": 0.0005946855466743548,
      "loss": 3.417,
      "step": 13825
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6479713916778564,
      "learning_rate": 0.0005946847801071181,
      "loss": 3.0703,
      "step": 13826
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4866702556610107,
      "learning_rate": 0.0005946840134850937,
      "loss": 3.1967,
      "step": 13827
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.7795560359954834,
      "learning_rate": 0.0005946832468082822,
      "loss": 3.1332,
      "step": 13828
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7264963388442993,
      "learning_rate": 0.0005946824800766834,
      "loss": 3.4091,
      "step": 13829
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9492381811141968,
      "learning_rate": 0.0005946817132902976,
      "loss": 3.1242,
      "step": 13830
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7553209066390991,
      "learning_rate": 0.0005946809464491249,
      "loss": 3.2022,
      "step": 13831
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.521620988845825,
      "learning_rate": 0.0005946801795531654,
      "loss": 3.2088,
      "step": 13832
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4706525802612305,
      "learning_rate": 0.0005946794126024193,
      "loss": 3.2093,
      "step": 13833
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.646544337272644,
      "learning_rate": 0.0005946786455968868,
      "loss": 3.1821,
      "step": 13834
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9484513998031616,
      "learning_rate": 0.000594677878536568,
      "loss": 3.0849,
      "step": 13835
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5791959762573242,
      "learning_rate": 0.0005946771114214629,
      "loss": 3.2272,
      "step": 13836
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5655401945114136,
      "learning_rate": 0.0005946763442515718,
      "loss": 3.0144,
      "step": 13837
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7218995094299316,
      "learning_rate": 0.0005946755770268949,
      "loss": 2.7514,
      "step": 13838
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5314688682556152,
      "learning_rate": 0.0005946748097474321,
      "loss": 2.9656,
      "step": 13839
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.392622470855713,
      "learning_rate": 0.0005946740424131839,
      "loss": 3.1517,
      "step": 13840
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.061431646347046,
      "learning_rate": 0.00059467327502415,
      "loss": 3.4072,
      "step": 13841
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6064506769180298,
      "learning_rate": 0.0005946725075803309,
      "loss": 3.0291,
      "step": 13842
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.923189640045166,
      "learning_rate": 0.0005946717400817267,
      "loss": 3.2644,
      "step": 13843
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.3593013286590576,
      "learning_rate": 0.0005946709725283372,
      "loss": 3.1248,
      "step": 13844
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.0675899982452393,
      "learning_rate": 0.000594670204920163,
      "loss": 3.3952,
      "step": 13845
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6605890989303589,
      "learning_rate": 0.0005946694372572041,
      "loss": 3.206,
      "step": 13846
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.9729301929473877,
      "learning_rate": 0.0005946686695394605,
      "loss": 2.8753,
      "step": 13847
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.9835712909698486,
      "learning_rate": 0.0005946679017669324,
      "loss": 3.0337,
      "step": 13848
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5109022855758667,
      "learning_rate": 0.0005946671339396199,
      "loss": 3.1312,
      "step": 13849
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.4387385845184326,
      "learning_rate": 0.0005946663660575234,
      "loss": 3.383,
      "step": 13850
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.4960432052612305,
      "learning_rate": 0.0005946655981206428,
      "loss": 3.0543,
      "step": 13851
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.8340365886688232,
      "learning_rate": 0.0005946648301289783,
      "loss": 3.0277,
      "step": 13852
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.980054259300232,
      "learning_rate": 0.00059466406208253,
      "loss": 3.0823,
      "step": 13853
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7792376279830933,
      "learning_rate": 0.0005946632939812981,
      "loss": 3.0193,
      "step": 13854
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.6389193534851074,
      "learning_rate": 0.0005946625258252828,
      "loss": 3.1391,
      "step": 13855
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.5915558338165283,
      "learning_rate": 0.0005946617576144842,
      "loss": 3.1041,
      "step": 13856
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6138966083526611,
      "learning_rate": 0.0005946609893489023,
      "loss": 3.1019,
      "step": 13857
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3538018465042114,
      "learning_rate": 0.0005946602210285374,
      "loss": 3.0292,
      "step": 13858
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8056715726852417,
      "learning_rate": 0.0005946594526533896,
      "loss": 3.2159,
      "step": 13859
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6474926471710205,
      "learning_rate": 0.0005946586842234591,
      "loss": 3.2046,
      "step": 13860
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7470893859863281,
      "learning_rate": 0.0005946579157387459,
      "loss": 2.9723,
      "step": 13861
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7996253967285156,
      "learning_rate": 0.0005946571471992502,
      "loss": 3.1293,
      "step": 13862
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5785857439041138,
      "learning_rate": 0.0005946563786049723,
      "loss": 3.2815,
      "step": 13863
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4495232105255127,
      "learning_rate": 0.0005946556099559122,
      "loss": 3.0272,
      "step": 13864
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8537201881408691,
      "learning_rate": 0.00059465484125207,
      "loss": 3.1643,
      "step": 13865
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3750977516174316,
      "learning_rate": 0.0005946540724934459,
      "loss": 2.9189,
      "step": 13866
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.60016667842865,
      "learning_rate": 0.0005946533036800401,
      "loss": 3.1493,
      "step": 13867
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5178979635238647,
      "learning_rate": 0.0005946525348118527,
      "loss": 3.4168,
      "step": 13868
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.596166968345642,
      "learning_rate": 0.0005946517658888837,
      "loss": 3.0671,
      "step": 13869
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4749212265014648,
      "learning_rate": 0.0005946509969111336,
      "loss": 2.8479,
      "step": 13870
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.2986998558044434,
      "learning_rate": 0.0005946502278786021,
      "loss": 2.832,
      "step": 13871
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9696173667907715,
      "learning_rate": 0.0005946494587912897,
      "loss": 3.2421,
      "step": 13872
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7012546062469482,
      "learning_rate": 0.0005946486896491964,
      "loss": 3.0508,
      "step": 13873
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.119537115097046,
      "learning_rate": 0.0005946479204523222,
      "loss": 3.147,
      "step": 13874
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6767727136611938,
      "learning_rate": 0.0005946471512006675,
      "loss": 3.0843,
      "step": 13875
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4263744354248047,
      "learning_rate": 0.0005946463818942324,
      "loss": 3.0185,
      "step": 13876
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2188321352005005,
      "learning_rate": 0.0005946456125330168,
      "loss": 3.1223,
      "step": 13877
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6738706827163696,
      "learning_rate": 0.0005946448431170212,
      "loss": 3.1855,
      "step": 13878
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4753162860870361,
      "learning_rate": 0.0005946440736462454,
      "loss": 3.3034,
      "step": 13879
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.41176176071167,
      "learning_rate": 0.0005946433041206897,
      "loss": 2.9973,
      "step": 13880
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3624038696289062,
      "learning_rate": 0.0005946425345403544,
      "loss": 3.4073,
      "step": 13881
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.1776173114776611,
      "learning_rate": 0.0005946417649052394,
      "loss": 3.2263,
      "step": 13882
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.221435308456421,
      "learning_rate": 0.000594640995215345,
      "loss": 2.8446,
      "step": 13883
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6222329139709473,
      "learning_rate": 0.0005946402254706712,
      "loss": 3.1296,
      "step": 13884
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.835084080696106,
      "learning_rate": 0.0005946394556712183,
      "loss": 3.0151,
      "step": 13885
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7695235013961792,
      "learning_rate": 0.0005946386858169863,
      "loss": 3.3067,
      "step": 13886
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4531075954437256,
      "learning_rate": 0.0005946379159079754,
      "loss": 3.1269,
      "step": 13887
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3630391359329224,
      "learning_rate": 0.0005946371459441857,
      "loss": 3.0255,
      "step": 13888
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4510674476623535,
      "learning_rate": 0.0005946363759256175,
      "loss": 3.0055,
      "step": 13889
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.1178390979766846,
      "learning_rate": 0.0005946356058522708,
      "loss": 3.0952,
      "step": 13890
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4255847930908203,
      "learning_rate": 0.0005946348357241458,
      "loss": 3.1333,
      "step": 13891
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.497593879699707,
      "learning_rate": 0.0005946340655412427,
      "loss": 3.0616,
      "step": 13892
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5723257064819336,
      "learning_rate": 0.0005946332953035615,
      "loss": 3.0678,
      "step": 13893
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5118248462677002,
      "learning_rate": 0.0005946325250111023,
      "loss": 3.246,
      "step": 13894
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8531147241592407,
      "learning_rate": 0.0005946317546638654,
      "loss": 3.1059,
      "step": 13895
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8418605327606201,
      "learning_rate": 0.0005946309842618509,
      "loss": 3.1744,
      "step": 13896
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.8167266845703125,
      "learning_rate": 0.000594630213805059,
      "loss": 3.2216,
      "step": 13897
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.129276752471924,
      "learning_rate": 0.0005946294432934898,
      "loss": 3.2205,
      "step": 13898
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.920722246170044,
      "learning_rate": 0.0005946286727271433,
      "loss": 3.1859,
      "step": 13899
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.100661277770996,
      "learning_rate": 0.0005946279021060199,
      "loss": 3.137,
      "step": 13900
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7627248764038086,
      "learning_rate": 0.0005946271314301195,
      "loss": 3.1895,
      "step": 13901
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8386662006378174,
      "learning_rate": 0.0005946263606994423,
      "loss": 3.3004,
      "step": 13902
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.231520891189575,
      "learning_rate": 0.0005946255899139887,
      "loss": 3.2134,
      "step": 13903
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3784725666046143,
      "learning_rate": 0.0005946248190737586,
      "loss": 3.225,
      "step": 13904
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.796258568763733,
      "learning_rate": 0.0005946240481787521,
      "loss": 2.9392,
      "step": 13905
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7360599040985107,
      "learning_rate": 0.0005946232772289695,
      "loss": 3.2951,
      "step": 13906
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5497950315475464,
      "learning_rate": 0.0005946225062244109,
      "loss": 3.1056,
      "step": 13907
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4024618864059448,
      "learning_rate": 0.0005946217351650762,
      "loss": 3.2649,
      "step": 13908
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5698132514953613,
      "learning_rate": 0.0005946209640509658,
      "loss": 3.4766,
      "step": 13909
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.0169079303741455,
      "learning_rate": 0.00059462019288208,
      "loss": 2.9622,
      "step": 13910
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5071814060211182,
      "learning_rate": 0.0005946194216584187,
      "loss": 2.8582,
      "step": 13911
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.125316858291626,
      "learning_rate": 0.000594618650379982,
      "loss": 3.0839,
      "step": 13912
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4348183870315552,
      "learning_rate": 0.0005946178790467701,
      "loss": 3.0689,
      "step": 13913
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8544771671295166,
      "learning_rate": 0.0005946171076587832,
      "loss": 3.1921,
      "step": 13914
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7587668895721436,
      "learning_rate": 0.0005946163362160216,
      "loss": 3.1029,
      "step": 13915
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.627687692642212,
      "learning_rate": 0.0005946155647184851,
      "loss": 3.3674,
      "step": 13916
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.274587631225586,
      "learning_rate": 0.000594614793166174,
      "loss": 2.9465,
      "step": 13917
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7039272785186768,
      "learning_rate": 0.0005946140215590885,
      "loss": 3.0461,
      "step": 13918
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8532084226608276,
      "learning_rate": 0.0005946132498972286,
      "loss": 3.2511,
      "step": 13919
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5618212223052979,
      "learning_rate": 0.0005946124781805946,
      "loss": 3.0372,
      "step": 13920
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.690630555152893,
      "learning_rate": 0.0005946117064091867,
      "loss": 3.3966,
      "step": 13921
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.115217447280884,
      "learning_rate": 0.0005946109345830047,
      "loss": 3.1306,
      "step": 13922
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5527271032333374,
      "learning_rate": 0.0005946101627020491,
      "loss": 3.1762,
      "step": 13923
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5108431577682495,
      "learning_rate": 0.0005946093907663199,
      "loss": 3.1301,
      "step": 13924
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.792855143547058,
      "learning_rate": 0.0005946086187758172,
      "loss": 3.1653,
      "step": 13925
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5341193675994873,
      "learning_rate": 0.0005946078467305412,
      "loss": 3.0215,
      "step": 13926
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4395442008972168,
      "learning_rate": 0.0005946070746304921,
      "loss": 3.1332,
      "step": 13927
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3395012617111206,
      "learning_rate": 0.00059460630247567,
      "loss": 3.2731,
      "step": 13928
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4417831897735596,
      "learning_rate": 0.000594605530266075,
      "loss": 3.2457,
      "step": 13929
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.481359839439392,
      "learning_rate": 0.0005946047580017072,
      "loss": 3.008,
      "step": 13930
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5257624387741089,
      "learning_rate": 0.0005946039856825669,
      "loss": 3.1627,
      "step": 13931
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5703082084655762,
      "learning_rate": 0.0005946032133086541,
      "loss": 2.9498,
      "step": 13932
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4370524883270264,
      "learning_rate": 0.000594602440879969,
      "loss": 3.3842,
      "step": 13933
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9317126274108887,
      "learning_rate": 0.0005946016683965118,
      "loss": 3.2405,
      "step": 13934
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.721246600151062,
      "learning_rate": 0.0005946008958582824,
      "loss": 3.179,
      "step": 13935
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.600029468536377,
      "learning_rate": 0.0005946001232652815,
      "loss": 3.0207,
      "step": 13936
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5754790306091309,
      "learning_rate": 0.0005945993506175086,
      "loss": 3.2618,
      "step": 13937
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.1379687786102295,
      "learning_rate": 0.0005945985779149641,
      "loss": 3.16,
      "step": 13938
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.131446123123169,
      "learning_rate": 0.0005945978051576483,
      "loss": 3.1367,
      "step": 13939
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.775438666343689,
      "learning_rate": 0.0005945970323455611,
      "loss": 3.2171,
      "step": 13940
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.0862505435943604,
      "learning_rate": 0.0005945962594787029,
      "loss": 3.2039,
      "step": 13941
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.694498300552368,
      "learning_rate": 0.0005945954865570736,
      "loss": 2.9896,
      "step": 13942
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.426198124885559,
      "learning_rate": 0.0005945947135806734,
      "loss": 3.1251,
      "step": 13943
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3964519500732422,
      "learning_rate": 0.0005945939405495025,
      "loss": 2.9191,
      "step": 13944
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.155317783355713,
      "learning_rate": 0.0005945931674635611,
      "loss": 3.0967,
      "step": 13945
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8161685466766357,
      "learning_rate": 0.0005945923943228492,
      "loss": 3.1781,
      "step": 13946
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7392821311950684,
      "learning_rate": 0.0005945916211273671,
      "loss": 3.0046,
      "step": 13947
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4704229831695557,
      "learning_rate": 0.0005945908478771147,
      "loss": 2.968,
      "step": 13948
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.624021053314209,
      "learning_rate": 0.0005945900745720924,
      "loss": 2.8743,
      "step": 13949
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.855658769607544,
      "learning_rate": 0.0005945893012123002,
      "loss": 3.1932,
      "step": 13950
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.408279538154602,
      "learning_rate": 0.0005945885277977384,
      "loss": 3.1391,
      "step": 13951
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5562855005264282,
      "learning_rate": 0.000594587754328407,
      "loss": 3.1603,
      "step": 13952
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8766329288482666,
      "learning_rate": 0.0005945869808043061,
      "loss": 2.9761,
      "step": 13953
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6384092569351196,
      "learning_rate": 0.000594586207225436,
      "loss": 3.161,
      "step": 13954
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6383295059204102,
      "learning_rate": 0.0005945854335917966,
      "loss": 2.9405,
      "step": 13955
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7222176790237427,
      "learning_rate": 0.0005945846599033883,
      "loss": 3.189,
      "step": 13956
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.860663890838623,
      "learning_rate": 0.0005945838861602113,
      "loss": 3.0301,
      "step": 13957
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4481580257415771,
      "learning_rate": 0.0005945831123622654,
      "loss": 3.2515,
      "step": 13958
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4583420753479004,
      "learning_rate": 0.000594582338509551,
      "loss": 3.1705,
      "step": 13959
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.427951693534851,
      "learning_rate": 0.0005945815646020682,
      "loss": 3.2403,
      "step": 13960
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.415532350540161,
      "learning_rate": 0.0005945807906398171,
      "loss": 2.7885,
      "step": 13961
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.849629282951355,
      "learning_rate": 0.0005945800166227978,
      "loss": 3.1129,
      "step": 13962
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6008869409561157,
      "learning_rate": 0.0005945792425510107,
      "loss": 3.31,
      "step": 13963
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.772256851196289,
      "learning_rate": 0.0005945784684244556,
      "loss": 3.1925,
      "step": 13964
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4812912940979004,
      "learning_rate": 0.0005945776942431328,
      "loss": 2.9105,
      "step": 13965
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3530031442642212,
      "learning_rate": 0.0005945769200070425,
      "loss": 3.1436,
      "step": 13966
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4595212936401367,
      "learning_rate": 0.0005945761457161849,
      "loss": 2.9613,
      "step": 13967
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.641028881072998,
      "learning_rate": 0.0005945753713705599,
      "loss": 3.0051,
      "step": 13968
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8187892436981201,
      "learning_rate": 0.0005945745969701678,
      "loss": 3.3989,
      "step": 13969
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7011704444885254,
      "learning_rate": 0.0005945738225150087,
      "loss": 3.4149,
      "step": 13970
    },
    {
      "epoch": 0.18,
      "grad_norm": 4.608952045440674,
      "learning_rate": 0.0005945730480050828,
      "loss": 2.9443,
      "step": 13971
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.7480335235595703,
      "learning_rate": 0.0005945722734403901,
      "loss": 3.1609,
      "step": 13972
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4585989713668823,
      "learning_rate": 0.0005945714988209311,
      "loss": 3.171,
      "step": 13973
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7002922296524048,
      "learning_rate": 0.0005945707241467055,
      "loss": 3.2618,
      "step": 13974
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.5431113243103027,
      "learning_rate": 0.0005945699494177137,
      "loss": 3.3684,
      "step": 13975
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6459282636642456,
      "learning_rate": 0.0005945691746339558,
      "loss": 3.2554,
      "step": 13976
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2935742139816284,
      "learning_rate": 0.0005945683997954319,
      "loss": 3.1624,
      "step": 13977
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.071406126022339,
      "learning_rate": 0.0005945676249021421,
      "loss": 3.0475,
      "step": 13978
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4152586460113525,
      "learning_rate": 0.0005945668499540868,
      "loss": 3.1643,
      "step": 13979
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4043996334075928,
      "learning_rate": 0.0005945660749512658,
      "loss": 3.1375,
      "step": 13980
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.802597165107727,
      "learning_rate": 0.0005945652998936796,
      "loss": 3.2487,
      "step": 13981
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4103405475616455,
      "learning_rate": 0.0005945645247813279,
      "loss": 2.9406,
      "step": 13982
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6219416856765747,
      "learning_rate": 0.0005945637496142113,
      "loss": 2.9357,
      "step": 13983
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7596240043640137,
      "learning_rate": 0.0005945629743923296,
      "loss": 3.1863,
      "step": 13984
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.552178144454956,
      "learning_rate": 0.0005945621991156831,
      "loss": 3.0892,
      "step": 13985
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4079759120941162,
      "learning_rate": 0.0005945614237842721,
      "loss": 3.3145,
      "step": 13986
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.742482304573059,
      "learning_rate": 0.0005945606483980963,
      "loss": 3.3414,
      "step": 13987
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5938899517059326,
      "learning_rate": 0.0005945598729571563,
      "loss": 3.1729,
      "step": 13988
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.370002031326294,
      "learning_rate": 0.000594559097461452,
      "loss": 3.1167,
      "step": 13989
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3882277011871338,
      "learning_rate": 0.0005945583219109836,
      "loss": 3.2444,
      "step": 13990
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5352197885513306,
      "learning_rate": 0.0005945575463057513,
      "loss": 2.87,
      "step": 13991
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3325822353363037,
      "learning_rate": 0.0005945567706457551,
      "loss": 2.9745,
      "step": 13992
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3591163158416748,
      "learning_rate": 0.0005945559949309953,
      "loss": 3.1748,
      "step": 13993
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5264614820480347,
      "learning_rate": 0.000594555219161472,
      "loss": 3.0614,
      "step": 13994
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.679551601409912,
      "learning_rate": 0.0005945544433371854,
      "loss": 3.2221,
      "step": 13995
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3109798431396484,
      "learning_rate": 0.0005945536674581354,
      "loss": 3.1838,
      "step": 13996
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.0841236114501953,
      "learning_rate": 0.0005945528915243223,
      "loss": 3.0718,
      "step": 13997
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3773244619369507,
      "learning_rate": 0.0005945521155357464,
      "loss": 3.3002,
      "step": 13998
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5293854475021362,
      "learning_rate": 0.0005945513394924076,
      "loss": 3.1496,
      "step": 13999
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4613250494003296,
      "learning_rate": 0.0005945505633943063,
      "loss": 3.1941,
      "step": 14000
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.5151662826538086,
      "learning_rate": 0.0005945497872414423,
      "loss": 3.0636,
      "step": 14001
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6500530242919922,
      "learning_rate": 0.0005945490110338159,
      "loss": 3.0464,
      "step": 14002
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.621999740600586,
      "learning_rate": 0.0005945482347714274,
      "loss": 3.0701,
      "step": 14003
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7440738677978516,
      "learning_rate": 0.0005945474584542769,
      "loss": 3.1283,
      "step": 14004
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5680439472198486,
      "learning_rate": 0.0005945466820823644,
      "loss": 3.053,
      "step": 14005
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4976716041564941,
      "learning_rate": 0.00059454590565569,
      "loss": 3.1597,
      "step": 14006
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6152877807617188,
      "learning_rate": 0.0005945451291742541,
      "loss": 3.187,
      "step": 14007
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.547211170196533,
      "learning_rate": 0.0005945443526380567,
      "loss": 3.1131,
      "step": 14008
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9055315256118774,
      "learning_rate": 0.0005945435760470978,
      "loss": 3.0996,
      "step": 14009
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8681540489196777,
      "learning_rate": 0.0005945427994013778,
      "loss": 3.1495,
      "step": 14010
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.351830244064331,
      "learning_rate": 0.0005945420227008967,
      "loss": 3.214,
      "step": 14011
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.484661340713501,
      "learning_rate": 0.0005945412459456547,
      "loss": 2.9762,
      "step": 14012
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4377518892288208,
      "learning_rate": 0.0005945404691356519,
      "loss": 2.9804,
      "step": 14013
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.3022027015686035,
      "learning_rate": 0.0005945396922708884,
      "loss": 3.1884,
      "step": 14014
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.6677443981170654,
      "learning_rate": 0.0005945389153513646,
      "loss": 3.0844,
      "step": 14015
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.474295973777771,
      "learning_rate": 0.0005945381383770803,
      "loss": 3.3762,
      "step": 14016
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3670176267623901,
      "learning_rate": 0.0005945373613480359,
      "loss": 3.1563,
      "step": 14017
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7621229887008667,
      "learning_rate": 0.0005945365842642314,
      "loss": 3.0702,
      "step": 14018
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5832035541534424,
      "learning_rate": 0.000594535807125667,
      "loss": 3.2209,
      "step": 14019
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8275822401046753,
      "learning_rate": 0.0005945350299323427,
      "loss": 3.2922,
      "step": 14020
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7793430089950562,
      "learning_rate": 0.000594534252684259,
      "loss": 3.2607,
      "step": 14021
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7626211643218994,
      "learning_rate": 0.0005945334753814157,
      "loss": 3.1553,
      "step": 14022
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.254838228225708,
      "learning_rate": 0.0005945326980238131,
      "loss": 3.1432,
      "step": 14023
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.905786156654358,
      "learning_rate": 0.0005945319206114513,
      "loss": 3.1694,
      "step": 14024
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3926923274993896,
      "learning_rate": 0.0005945311431443304,
      "loss": 2.7525,
      "step": 14025
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4837348461151123,
      "learning_rate": 0.0005945303656224506,
      "loss": 2.9921,
      "step": 14026
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.145371437072754,
      "learning_rate": 0.0005945295880458121,
      "loss": 3.1827,
      "step": 14027
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4071980714797974,
      "learning_rate": 0.000594528810414415,
      "loss": 3.2791,
      "step": 14028
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6983743906021118,
      "learning_rate": 0.0005945280327282595,
      "loss": 3.0293,
      "step": 14029
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4885954856872559,
      "learning_rate": 0.0005945272549873456,
      "loss": 2.9763,
      "step": 14030
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6266809701919556,
      "learning_rate": 0.0005945264771916735,
      "loss": 2.8963,
      "step": 14031
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4907028675079346,
      "learning_rate": 0.0005945256993412434,
      "loss": 3.164,
      "step": 14032
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.899582028388977,
      "learning_rate": 0.0005945249214360554,
      "loss": 3.4409,
      "step": 14033
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5073351860046387,
      "learning_rate": 0.0005945241434761096,
      "loss": 3.0815,
      "step": 14034
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.608521819114685,
      "learning_rate": 0.0005945233654614063,
      "loss": 3.1846,
      "step": 14035
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.2114884853363037,
      "learning_rate": 0.0005945225873919455,
      "loss": 3.3146,
      "step": 14036
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6580784320831299,
      "learning_rate": 0.0005945218092677274,
      "loss": 3.0647,
      "step": 14037
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7469258308410645,
      "learning_rate": 0.0005945210310887521,
      "loss": 3.2642,
      "step": 14038
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.4349749088287354,
      "learning_rate": 0.0005945202528550199,
      "loss": 3.2198,
      "step": 14039
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.0423665046691895,
      "learning_rate": 0.0005945194745665308,
      "loss": 3.2497,
      "step": 14040
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4723602533340454,
      "learning_rate": 0.0005945186962232849,
      "loss": 3.4696,
      "step": 14041
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7151260375976562,
      "learning_rate": 0.0005945179178252824,
      "loss": 2.9312,
      "step": 14042
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.25297212600708,
      "learning_rate": 0.0005945171393725234,
      "loss": 3.1166,
      "step": 14043
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4685540199279785,
      "learning_rate": 0.0005945163608650082,
      "loss": 3.2498,
      "step": 14044
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4465997219085693,
      "learning_rate": 0.000594515582302737,
      "loss": 2.9608,
      "step": 14045
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.2538280487060547,
      "learning_rate": 0.0005945148036857095,
      "loss": 3.1729,
      "step": 14046
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.048973798751831,
      "learning_rate": 0.0005945140250139263,
      "loss": 3.299,
      "step": 14047
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6816800832748413,
      "learning_rate": 0.0005945132462873874,
      "loss": 3.1536,
      "step": 14048
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2874864339828491,
      "learning_rate": 0.0005945124675060929,
      "loss": 3.0906,
      "step": 14049
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.072500228881836,
      "learning_rate": 0.0005945116886700429,
      "loss": 2.9917,
      "step": 14050
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8727753162384033,
      "learning_rate": 0.0005945109097792376,
      "loss": 3.0094,
      "step": 14051
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3328098058700562,
      "learning_rate": 0.0005945101308336774,
      "loss": 2.9721,
      "step": 14052
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.654381513595581,
      "learning_rate": 0.000594509351833362,
      "loss": 3.1991,
      "step": 14053
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6779628992080688,
      "learning_rate": 0.0005945085727782918,
      "loss": 3.412,
      "step": 14054
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.356239080429077,
      "learning_rate": 0.0005945077936684669,
      "loss": 3.2493,
      "step": 14055
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5168089866638184,
      "learning_rate": 0.0005945070145038874,
      "loss": 2.7052,
      "step": 14056
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3524032831192017,
      "learning_rate": 0.0005945062352845535,
      "loss": 3.1317,
      "step": 14057
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9910045862197876,
      "learning_rate": 0.0005945054560104654,
      "loss": 3.1934,
      "step": 14058
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.622902512550354,
      "learning_rate": 0.0005945046766816232,
      "loss": 3.152,
      "step": 14059
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4306676387786865,
      "learning_rate": 0.0005945038972980269,
      "loss": 3.3466,
      "step": 14060
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8307918310165405,
      "learning_rate": 0.0005945031178596769,
      "loss": 3.2273,
      "step": 14061
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.048640012741089,
      "learning_rate": 0.000594502338366573,
      "loss": 3.1625,
      "step": 14062
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.49545156955719,
      "learning_rate": 0.0005945015588187157,
      "loss": 3.1157,
      "step": 14063
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.576873540878296,
      "learning_rate": 0.000594500779216105,
      "loss": 3.2531,
      "step": 14064
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8228670358657837,
      "learning_rate": 0.0005944999995587411,
      "loss": 3.237,
      "step": 14065
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5013710260391235,
      "learning_rate": 0.0005944992198466239,
      "loss": 3.0611,
      "step": 14066
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.60590660572052,
      "learning_rate": 0.0005944984400797539,
      "loss": 2.9057,
      "step": 14067
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.593168258666992,
      "learning_rate": 0.0005944976602581311,
      "loss": 3.0883,
      "step": 14068
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2417184114456177,
      "learning_rate": 0.0005944968803817556,
      "loss": 3.1923,
      "step": 14069
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3424121141433716,
      "learning_rate": 0.0005944961004506275,
      "loss": 3.1266,
      "step": 14070
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.226374626159668,
      "learning_rate": 0.0005944953204647471,
      "loss": 3.1146,
      "step": 14071
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3622469902038574,
      "learning_rate": 0.0005944945404241144,
      "loss": 3.1285,
      "step": 14072
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4534814357757568,
      "learning_rate": 0.0005944937603287296,
      "loss": 3.2119,
      "step": 14073
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6017714738845825,
      "learning_rate": 0.0005944929801785929,
      "loss": 3.3223,
      "step": 14074
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.073676586151123,
      "learning_rate": 0.0005944921999737043,
      "loss": 3.2333,
      "step": 14075
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.698381781578064,
      "learning_rate": 0.0005944914197140642,
      "loss": 3.1818,
      "step": 14076
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4864346981048584,
      "learning_rate": 0.0005944906393996725,
      "loss": 2.9883,
      "step": 14077
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.7048871517181396,
      "learning_rate": 0.0005944898590305295,
      "loss": 3.1758,
      "step": 14078
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8039194345474243,
      "learning_rate": 0.0005944890786066351,
      "loss": 3.2049,
      "step": 14079
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5162765979766846,
      "learning_rate": 0.0005944882981279897,
      "loss": 3.0748,
      "step": 14080
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9770466089248657,
      "learning_rate": 0.0005944875175945935,
      "loss": 3.1686,
      "step": 14081
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5828849077224731,
      "learning_rate": 0.0005944867370064463,
      "loss": 3.0631,
      "step": 14082
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.2881585359573364,
      "learning_rate": 0.0005944859563635486,
      "loss": 2.9899,
      "step": 14083
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5456899404525757,
      "learning_rate": 0.0005944851756659004,
      "loss": 3.2841,
      "step": 14084
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4312481880187988,
      "learning_rate": 0.0005944843949135018,
      "loss": 3.4003,
      "step": 14085
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7777645587921143,
      "learning_rate": 0.000594483614106353,
      "loss": 3.1763,
      "step": 14086
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.703008770942688,
      "learning_rate": 0.0005944828332444541,
      "loss": 3.0086,
      "step": 14087
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9980560541152954,
      "learning_rate": 0.0005944820523278053,
      "loss": 3.1865,
      "step": 14088
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5665838718414307,
      "learning_rate": 0.0005944812713564069,
      "loss": 3.3907,
      "step": 14089
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.506818175315857,
      "learning_rate": 0.0005944804903302586,
      "loss": 2.8074,
      "step": 14090
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8799631595611572,
      "learning_rate": 0.000594479709249361,
      "loss": 3.13,
      "step": 14091
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.484036445617676,
      "learning_rate": 0.000594478928113714,
      "loss": 3.0068,
      "step": 14092
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4688405990600586,
      "learning_rate": 0.0005944781469233178,
      "loss": 2.7961,
      "step": 14093
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9676203727722168,
      "learning_rate": 0.0005944773656781726,
      "loss": 3.1449,
      "step": 14094
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9851967096328735,
      "learning_rate": 0.0005944765843782784,
      "loss": 3.3326,
      "step": 14095
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5367120504379272,
      "learning_rate": 0.0005944758030236356,
      "loss": 3.2529,
      "step": 14096
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.564274311065674,
      "learning_rate": 0.0005944750216142439,
      "loss": 2.9727,
      "step": 14097
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7550323009490967,
      "learning_rate": 0.000594474240150104,
      "loss": 3.2546,
      "step": 14098
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6912568807601929,
      "learning_rate": 0.0005944734586312158,
      "loss": 3.1446,
      "step": 14099
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4326854944229126,
      "learning_rate": 0.0005944726770575793,
      "loss": 3.3279,
      "step": 14100
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.49216628074646,
      "learning_rate": 0.0005944718954291948,
      "loss": 3.1106,
      "step": 14101
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7177908420562744,
      "learning_rate": 0.0005944711137460624,
      "loss": 3.2414,
      "step": 14102
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7288411855697632,
      "learning_rate": 0.0005944703320081824,
      "loss": 3.0707,
      "step": 14103
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5908247232437134,
      "learning_rate": 0.0005944695502155547,
      "loss": 3.3071,
      "step": 14104
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7417501211166382,
      "learning_rate": 0.0005944687683681794,
      "loss": 3.0573,
      "step": 14105
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9965652227401733,
      "learning_rate": 0.0005944679864660571,
      "loss": 2.9578,
      "step": 14106
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.395886778831482,
      "learning_rate": 0.0005944672045091874,
      "loss": 3.2989,
      "step": 14107
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7612777948379517,
      "learning_rate": 0.0005944664224975708,
      "loss": 3.0857,
      "step": 14108
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7819948196411133,
      "learning_rate": 0.0005944656404312073,
      "loss": 3.2891,
      "step": 14109
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.918320894241333,
      "learning_rate": 0.0005944648583100971,
      "loss": 3.2541,
      "step": 14110
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.457464337348938,
      "learning_rate": 0.0005944640761342402,
      "loss": 3.3772,
      "step": 14111
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9262560606002808,
      "learning_rate": 0.000594463293903637,
      "loss": 3.1195,
      "step": 14112
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.52327036857605,
      "learning_rate": 0.0005944625116182876,
      "loss": 3.0296,
      "step": 14113
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.480509877204895,
      "learning_rate": 0.0005944617292781919,
      "loss": 3.1731,
      "step": 14114
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.836392402648926,
      "learning_rate": 0.0005944609468833503,
      "loss": 3.016,
      "step": 14115
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.849981427192688,
      "learning_rate": 0.0005944601644337627,
      "loss": 3.06,
      "step": 14116
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.435858726501465,
      "learning_rate": 0.0005944593819294297,
      "loss": 3.1395,
      "step": 14117
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6069929599761963,
      "learning_rate": 0.0005944585993703508,
      "loss": 3.0287,
      "step": 14118
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.2121286392211914,
      "learning_rate": 0.0005944578167565267,
      "loss": 3.1452,
      "step": 14119
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.6185874938964844,
      "learning_rate": 0.0005944570340879572,
      "loss": 3.0868,
      "step": 14120
    },
    {
      "epoch": 0.18,
      "grad_norm": 4.059117794036865,
      "learning_rate": 0.0005944562513646428,
      "loss": 3.0708,
      "step": 14121
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4126291275024414,
      "learning_rate": 0.0005944554685865833,
      "loss": 3.1291,
      "step": 14122
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8611377477645874,
      "learning_rate": 0.0005944546857537789,
      "loss": 3.1037,
      "step": 14123
    },
    {
      "epoch": 0.18,
      "grad_norm": 3.559465169906616,
      "learning_rate": 0.0005944539028662299,
      "loss": 3.0554,
      "step": 14124
    },
    {
      "epoch": 0.18,
      "grad_norm": 5.884796142578125,
      "learning_rate": 0.0005944531199239363,
      "loss": 3.0893,
      "step": 14125
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.1566216945648193,
      "learning_rate": 0.0005944523369268984,
      "loss": 2.945,
      "step": 14126
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.2401554584503174,
      "learning_rate": 0.0005944515538751161,
      "loss": 3.1224,
      "step": 14127
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.5673012733459473,
      "learning_rate": 0.0005944507707685897,
      "loss": 3.047,
      "step": 14128
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.3230175971984863,
      "learning_rate": 0.0005944499876073194,
      "loss": 3.1475,
      "step": 14129
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8060485124588013,
      "learning_rate": 0.0005944492043913055,
      "loss": 3.3851,
      "step": 14130
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9536561965942383,
      "learning_rate": 0.0005944484211205477,
      "loss": 2.921,
      "step": 14131
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.0717039108276367,
      "learning_rate": 0.0005944476377950465,
      "loss": 3.3427,
      "step": 14132
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8436120748519897,
      "learning_rate": 0.0005944468544148018,
      "loss": 3.2528,
      "step": 14133
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6838706731796265,
      "learning_rate": 0.0005944460709798139,
      "loss": 3.1714,
      "step": 14134
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.6977949142456055,
      "learning_rate": 0.000594445287490083,
      "loss": 3.0074,
      "step": 14135
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4984512329101562,
      "learning_rate": 0.0005944445039456091,
      "loss": 3.1755,
      "step": 14136
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8307178020477295,
      "learning_rate": 0.0005944437203463924,
      "loss": 3.0269,
      "step": 14137
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6474179029464722,
      "learning_rate": 0.0005944429366924331,
      "loss": 3.2854,
      "step": 14138
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.465177297592163,
      "learning_rate": 0.0005944421529837312,
      "loss": 3.1674,
      "step": 14139
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.6117050647735596,
      "learning_rate": 0.0005944413692202871,
      "loss": 2.9949,
      "step": 14140
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4210336208343506,
      "learning_rate": 0.0005944405854021007,
      "loss": 3.3059,
      "step": 14141
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.5917999744415283,
      "learning_rate": 0.0005944398015291722,
      "loss": 3.3459,
      "step": 14142
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8068808317184448,
      "learning_rate": 0.0005944390176015018,
      "loss": 3.168,
      "step": 14143
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.116353988647461,
      "learning_rate": 0.0005944382336190897,
      "loss": 2.865,
      "step": 14144
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.067965269088745,
      "learning_rate": 0.000594437449581936,
      "loss": 3.329,
      "step": 14145
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.390775203704834,
      "learning_rate": 0.0005944366654900406,
      "loss": 3.1216,
      "step": 14146
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4095523357391357,
      "learning_rate": 0.0005944358813434041,
      "loss": 3.2494,
      "step": 14147
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.7886850833892822,
      "learning_rate": 0.0005944350971420263,
      "loss": 3.2131,
      "step": 14148
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9462751150131226,
      "learning_rate": 0.0005944343128859075,
      "loss": 3.004,
      "step": 14149
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3820775747299194,
      "learning_rate": 0.0005944335285750479,
      "loss": 3.0281,
      "step": 14150
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4778413772583008,
      "learning_rate": 0.0005944327442094474,
      "loss": 3.0263,
      "step": 14151
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.0762135982513428,
      "learning_rate": 0.0005944319597891063,
      "loss": 2.8956,
      "step": 14152
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4575554132461548,
      "learning_rate": 0.0005944311753140248,
      "loss": 3.0831,
      "step": 14153
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4527448415756226,
      "learning_rate": 0.0005944303907842029,
      "loss": 3.4328,
      "step": 14154
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5414429903030396,
      "learning_rate": 0.000594429606199641,
      "loss": 3.0424,
      "step": 14155
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8375506401062012,
      "learning_rate": 0.0005944288215603389,
      "loss": 3.2419,
      "step": 14156
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6601572036743164,
      "learning_rate": 0.000594428036866297,
      "loss": 3.2363,
      "step": 14157
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.935644268989563,
      "learning_rate": 0.0005944272521175154,
      "loss": 3.0697,
      "step": 14158
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5753825902938843,
      "learning_rate": 0.0005944264673139941,
      "loss": 3.2733,
      "step": 14159
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.5874228477478027,
      "learning_rate": 0.0005944256824557335,
      "loss": 2.8804,
      "step": 14160
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8317804336547852,
      "learning_rate": 0.0005944248975427336,
      "loss": 3.2574,
      "step": 14161
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4005074501037598,
      "learning_rate": 0.0005944241125749944,
      "loss": 3.3595,
      "step": 14162
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6645965576171875,
      "learning_rate": 0.0005944233275525163,
      "loss": 3.2111,
      "step": 14163
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.4351284503936768,
      "learning_rate": 0.0005944225424752994,
      "loss": 3.319,
      "step": 14164
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6787396669387817,
      "learning_rate": 0.0005944217573433438,
      "loss": 3.174,
      "step": 14165
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4219914674758911,
      "learning_rate": 0.0005944209721566496,
      "loss": 2.9707,
      "step": 14166
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.510582447052002,
      "learning_rate": 0.0005944201869152169,
      "loss": 3.2953,
      "step": 14167
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.272603988647461,
      "learning_rate": 0.0005944194016190461,
      "loss": 3.4253,
      "step": 14168
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4046335220336914,
      "learning_rate": 0.000594418616268137,
      "loss": 2.9773,
      "step": 14169
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8184618949890137,
      "learning_rate": 0.00059441783086249,
      "loss": 3.0815,
      "step": 14170
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6023467779159546,
      "learning_rate": 0.0005944170454021051,
      "loss": 3.2005,
      "step": 14171
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7024822235107422,
      "learning_rate": 0.0005944162598869827,
      "loss": 3.1284,
      "step": 14172
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5839858055114746,
      "learning_rate": 0.0005944154743171226,
      "loss": 3.3842,
      "step": 14173
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5610783100128174,
      "learning_rate": 0.0005944146886925251,
      "loss": 3.1312,
      "step": 14174
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3075950145721436,
      "learning_rate": 0.0005944139030131904,
      "loss": 2.9692,
      "step": 14175
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.2111148834228516,
      "learning_rate": 0.0005944131172791186,
      "loss": 3.0285,
      "step": 14176
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5240991115570068,
      "learning_rate": 0.0005944123314903098,
      "loss": 3.106,
      "step": 14177
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4499000310897827,
      "learning_rate": 0.0005944115456467643,
      "loss": 3.235,
      "step": 14178
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4681282043457031,
      "learning_rate": 0.000594410759748482,
      "loss": 3.1815,
      "step": 14179
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.607102394104004,
      "learning_rate": 0.0005944099737954633,
      "loss": 3.1997,
      "step": 14180
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5555577278137207,
      "learning_rate": 0.0005944091877877082,
      "loss": 3.2097,
      "step": 14181
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.5133845806121826,
      "learning_rate": 0.0005944084017252167,
      "loss": 3.029,
      "step": 14182
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.016481399536133,
      "learning_rate": 0.0005944076156079893,
      "loss": 3.0587,
      "step": 14183
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5245771408081055,
      "learning_rate": 0.000594406829436026,
      "loss": 2.9785,
      "step": 14184
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9377530813217163,
      "learning_rate": 0.0005944060432093269,
      "loss": 3.0165,
      "step": 14185
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.4932384490966797,
      "learning_rate": 0.000594405256927892,
      "loss": 3.1912,
      "step": 14186
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5678750276565552,
      "learning_rate": 0.0005944044705917218,
      "loss": 3.2587,
      "step": 14187
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6998035907745361,
      "learning_rate": 0.0005944036842008162,
      "loss": 3.1616,
      "step": 14188
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5111284255981445,
      "learning_rate": 0.0005944028977551753,
      "loss": 3.3853,
      "step": 14189
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5060373544692993,
      "learning_rate": 0.0005944021112547994,
      "loss": 3.085,
      "step": 14190
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.689071536064148,
      "learning_rate": 0.0005944013246996886,
      "loss": 2.954,
      "step": 14191
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3269799947738647,
      "learning_rate": 0.000594400538089843,
      "loss": 3.0828,
      "step": 14192
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3704129457473755,
      "learning_rate": 0.0005943997514252628,
      "loss": 3.1705,
      "step": 14193
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3140063285827637,
      "learning_rate": 0.0005943989647059483,
      "loss": 3.1924,
      "step": 14194
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3626857995986938,
      "learning_rate": 0.0005943981779318992,
      "loss": 3.1679,
      "step": 14195
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.513511061668396,
      "learning_rate": 0.000594397391103116,
      "loss": 3.3662,
      "step": 14196
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.717632532119751,
      "learning_rate": 0.0005943966042195989,
      "loss": 2.9687,
      "step": 14197
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.496127963066101,
      "learning_rate": 0.0005943958172813478,
      "loss": 3.0946,
      "step": 14198
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3609782457351685,
      "learning_rate": 0.000594395030288363,
      "loss": 3.3223,
      "step": 14199
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.6715044975280762,
      "learning_rate": 0.0005943942432406446,
      "loss": 3.248,
      "step": 14200
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7624351978302002,
      "learning_rate": 0.0005943934561381927,
      "loss": 3.0126,
      "step": 14201
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.5796403884887695,
      "learning_rate": 0.0005943926689810075,
      "loss": 2.9949,
      "step": 14202
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.7357573509216309,
      "learning_rate": 0.0005943918817690893,
      "loss": 3.0008,
      "step": 14203
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.3815200328826904,
      "learning_rate": 0.0005943910945024379,
      "loss": 3.3376,
      "step": 14204
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.253494143486023,
      "learning_rate": 0.0005943903071810538,
      "loss": 3.0946,
      "step": 14205
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.8095128536224365,
      "learning_rate": 0.0005943895198049369,
      "loss": 2.8757,
      "step": 14206
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.9803967475891113,
      "learning_rate": 0.0005943887323740874,
      "loss": 2.959,
      "step": 14207
    },
    {
      "epoch": 0.18,
      "grad_norm": 2.0792107582092285,
      "learning_rate": 0.0005943879448885055,
      "loss": 2.9784,
      "step": 14208
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.494868755340576,
      "learning_rate": 0.0005943871573481914,
      "loss": 3.0581,
      "step": 14209
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.393056869506836,
      "learning_rate": 0.000594386369753145,
      "loss": 3.0771,
      "step": 14210
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.115607500076294,
      "learning_rate": 0.0005943855821033668,
      "loss": 2.9328,
      "step": 14211
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.708319067955017,
      "learning_rate": 0.0005943847943988566,
      "loss": 2.9613,
      "step": 14212
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6156316995620728,
      "learning_rate": 0.0005943840066396148,
      "loss": 3.2912,
      "step": 14213
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.029101848602295,
      "learning_rate": 0.0005943832188256415,
      "loss": 3.2362,
      "step": 14214
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1184606552124023,
      "learning_rate": 0.0005943824309569368,
      "loss": 3.3047,
      "step": 14215
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.907413363456726,
      "learning_rate": 0.0005943816430335007,
      "loss": 3.1944,
      "step": 14216
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6841671466827393,
      "learning_rate": 0.0005943808550553337,
      "loss": 3.0047,
      "step": 14217
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4405665397644043,
      "learning_rate": 0.0005943800670224356,
      "loss": 3.0196,
      "step": 14218
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5212961435317993,
      "learning_rate": 0.0005943792789348067,
      "loss": 3.0552,
      "step": 14219
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.399278998374939,
      "learning_rate": 0.0005943784907924473,
      "loss": 3.0114,
      "step": 14220
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.416001558303833,
      "learning_rate": 0.0005943777025953571,
      "loss": 3.0926,
      "step": 14221
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3719004392623901,
      "learning_rate": 0.0005943769143435367,
      "loss": 3.0495,
      "step": 14222
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6468040943145752,
      "learning_rate": 0.000594376126036986,
      "loss": 3.1083,
      "step": 14223
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8678685426712036,
      "learning_rate": 0.0005943753376757054,
      "loss": 2.7311,
      "step": 14224
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4452866315841675,
      "learning_rate": 0.0005943745492596946,
      "loss": 3.3114,
      "step": 14225
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1699793338775635,
      "learning_rate": 0.0005943737607889542,
      "loss": 2.9039,
      "step": 14226
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.881042957305908,
      "learning_rate": 0.0005943729722634842,
      "loss": 3.078,
      "step": 14227
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5368940830230713,
      "learning_rate": 0.0005943721836832845,
      "loss": 3.1813,
      "step": 14228
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5395718812942505,
      "learning_rate": 0.0005943713950483557,
      "loss": 3.1772,
      "step": 14229
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.944226026535034,
      "learning_rate": 0.0005943706063586975,
      "loss": 2.9573,
      "step": 14230
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6626067161560059,
      "learning_rate": 0.0005943698176143103,
      "loss": 2.9946,
      "step": 14231
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9725836515426636,
      "learning_rate": 0.0005943690288151943,
      "loss": 3.0837,
      "step": 14232
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.3887367248535156,
      "learning_rate": 0.0005943682399613495,
      "loss": 2.9295,
      "step": 14233
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5983959436416626,
      "learning_rate": 0.000594367451052776,
      "loss": 3.1747,
      "step": 14234
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.599834680557251,
      "learning_rate": 0.0005943666620894741,
      "loss": 2.8841,
      "step": 14235
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4419788122177124,
      "learning_rate": 0.0005943658730714438,
      "loss": 3.008,
      "step": 14236
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5946452617645264,
      "learning_rate": 0.0005943650839986855,
      "loss": 3.1585,
      "step": 14237
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4450554847717285,
      "learning_rate": 0.0005943642948711991,
      "loss": 3.0909,
      "step": 14238
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5546590089797974,
      "learning_rate": 0.0005943635056889847,
      "loss": 3.2527,
      "step": 14239
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7824331521987915,
      "learning_rate": 0.0005943627164520426,
      "loss": 3.2237,
      "step": 14240
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.0041909217834473,
      "learning_rate": 0.0005943619271603731,
      "loss": 3.2074,
      "step": 14241
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3679149150848389,
      "learning_rate": 0.000594361137813976,
      "loss": 3.0791,
      "step": 14242
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.308899164199829,
      "learning_rate": 0.0005943603484128517,
      "loss": 2.9766,
      "step": 14243
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7672200202941895,
      "learning_rate": 0.0005943595589570001,
      "loss": 3.132,
      "step": 14244
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.613122582435608,
      "learning_rate": 0.0005943587694464216,
      "loss": 3.1723,
      "step": 14245
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8642195463180542,
      "learning_rate": 0.0005943579798811162,
      "loss": 3.0435,
      "step": 14246
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.41072678565979,
      "learning_rate": 0.0005943571902610842,
      "loss": 3.0198,
      "step": 14247
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6296641826629639,
      "learning_rate": 0.0005943564005863256,
      "loss": 3.1028,
      "step": 14248
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.582921028137207,
      "learning_rate": 0.0005943556108568406,
      "loss": 3.1414,
      "step": 14249
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6344226598739624,
      "learning_rate": 0.0005943548210726292,
      "loss": 3.2245,
      "step": 14250
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5705268383026123,
      "learning_rate": 0.0005943540312336919,
      "loss": 3.0191,
      "step": 14251
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5195646286010742,
      "learning_rate": 0.0005943532413400285,
      "loss": 3.0415,
      "step": 14252
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4978749752044678,
      "learning_rate": 0.0005943524513916393,
      "loss": 3.1477,
      "step": 14253
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.832459568977356,
      "learning_rate": 0.0005943516613885244,
      "loss": 2.9851,
      "step": 14254
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8272143602371216,
      "learning_rate": 0.0005943508713306839,
      "loss": 3.1163,
      "step": 14255
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.243863344192505,
      "learning_rate": 0.0005943500812181182,
      "loss": 3.2143,
      "step": 14256
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.406098484992981,
      "learning_rate": 0.0005943492910508271,
      "loss": 3.0104,
      "step": 14257
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7457250356674194,
      "learning_rate": 0.000594348500828811,
      "loss": 2.7429,
      "step": 14258
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9201298952102661,
      "learning_rate": 0.0005943477105520699,
      "loss": 3.0964,
      "step": 14259
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5998382568359375,
      "learning_rate": 0.0005943469202206041,
      "loss": 3.3066,
      "step": 14260
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6186326742172241,
      "learning_rate": 0.0005943461298344136,
      "loss": 3.188,
      "step": 14261
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8423649072647095,
      "learning_rate": 0.0005943453393934986,
      "loss": 3.1655,
      "step": 14262
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7892823219299316,
      "learning_rate": 0.0005943445488978593,
      "loss": 3.2518,
      "step": 14263
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6085971593856812,
      "learning_rate": 0.0005943437583474957,
      "loss": 3.2528,
      "step": 14264
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.714404582977295,
      "learning_rate": 0.000594342967742408,
      "loss": 3.4356,
      "step": 14265
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4072444438934326,
      "learning_rate": 0.0005943421770825964,
      "loss": 2.9586,
      "step": 14266
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5135369300842285,
      "learning_rate": 0.0005943413863680611,
      "loss": 2.9384,
      "step": 14267
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6354836225509644,
      "learning_rate": 0.0005943405955988022,
      "loss": 3.1539,
      "step": 14268
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3273237943649292,
      "learning_rate": 0.0005943398047748197,
      "loss": 3.2499,
      "step": 14269
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4700826406478882,
      "learning_rate": 0.000594339013896114,
      "loss": 3.3144,
      "step": 14270
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3749231100082397,
      "learning_rate": 0.0005943382229626851,
      "loss": 3.1517,
      "step": 14271
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.074258804321289,
      "learning_rate": 0.0005943374319745332,
      "loss": 2.8255,
      "step": 14272
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3957157135009766,
      "learning_rate": 0.0005943366409316583,
      "loss": 3.1329,
      "step": 14273
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.612350583076477,
      "learning_rate": 0.0005943358498340607,
      "loss": 3.0736,
      "step": 14274
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6284370422363281,
      "learning_rate": 0.0005943350586817407,
      "loss": 3.3937,
      "step": 14275
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.4502015113830566,
      "learning_rate": 0.0005943342674746981,
      "loss": 3.1518,
      "step": 14276
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4982939958572388,
      "learning_rate": 0.0005943334762129332,
      "loss": 3.1658,
      "step": 14277
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.401765823364258,
      "learning_rate": 0.000594332684896446,
      "loss": 2.9353,
      "step": 14278
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.3672878742218018,
      "learning_rate": 0.000594331893525237,
      "loss": 3.0979,
      "step": 14279
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5619393587112427,
      "learning_rate": 0.0005943311020993061,
      "loss": 3.0761,
      "step": 14280
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.568455934524536,
      "learning_rate": 0.0005943303106186535,
      "loss": 3.1879,
      "step": 14281
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.696373462677002,
      "learning_rate": 0.0005943295190832794,
      "loss": 3.2601,
      "step": 14282
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.3613479137420654,
      "learning_rate": 0.0005943287274931838,
      "loss": 2.9511,
      "step": 14283
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5476518869400024,
      "learning_rate": 0.0005943279358483669,
      "loss": 3.1861,
      "step": 14284
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.46543288230896,
      "learning_rate": 0.000594327144148829,
      "loss": 2.9133,
      "step": 14285
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.9351305961608887,
      "learning_rate": 0.00059432635239457,
      "loss": 3.2053,
      "step": 14286
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8109130859375,
      "learning_rate": 0.0005943255605855902,
      "loss": 3.298,
      "step": 14287
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.948240876197815,
      "learning_rate": 0.0005943247687218898,
      "loss": 3.0594,
      "step": 14288
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.375439405441284,
      "learning_rate": 0.0005943239768034688,
      "loss": 2.8963,
      "step": 14289
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8906983137130737,
      "learning_rate": 0.0005943231848303275,
      "loss": 3.1139,
      "step": 14290
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7751837968826294,
      "learning_rate": 0.0005943223928024659,
      "loss": 3.1219,
      "step": 14291
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.2630019187927246,
      "learning_rate": 0.0005943216007198842,
      "loss": 3.0695,
      "step": 14292
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.5242326259613037,
      "learning_rate": 0.0005943208085825826,
      "loss": 3.0767,
      "step": 14293
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.31154727935791,
      "learning_rate": 0.0005943200163905612,
      "loss": 3.1698,
      "step": 14294
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9381663799285889,
      "learning_rate": 0.0005943192241438201,
      "loss": 3.1325,
      "step": 14295
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4856553077697754,
      "learning_rate": 0.0005943184318423595,
      "loss": 3.1771,
      "step": 14296
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.5302698612213135,
      "learning_rate": 0.0005943176394861796,
      "loss": 3.0702,
      "step": 14297
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8501181602478027,
      "learning_rate": 0.0005943168470752805,
      "loss": 2.9633,
      "step": 14298
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4589697122573853,
      "learning_rate": 0.0005943160546096623,
      "loss": 3.0542,
      "step": 14299
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.8722152709960938,
      "learning_rate": 0.0005943152620893252,
      "loss": 3.1373,
      "step": 14300
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1954963207244873,
      "learning_rate": 0.0005943144695142693,
      "loss": 2.9864,
      "step": 14301
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5887340307235718,
      "learning_rate": 0.0005943136768844948,
      "loss": 3.064,
      "step": 14302
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5048809051513672,
      "learning_rate": 0.000594312884200002,
      "loss": 3.3268,
      "step": 14303
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8832882642745972,
      "learning_rate": 0.0005943120914607907,
      "loss": 3.2994,
      "step": 14304
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.565532922744751,
      "learning_rate": 0.0005943112986668612,
      "loss": 2.9549,
      "step": 14305
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.0049691200256348,
      "learning_rate": 0.0005943105058182138,
      "loss": 2.9844,
      "step": 14306
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4521790742874146,
      "learning_rate": 0.0005943097129148485,
      "loss": 3.087,
      "step": 14307
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.9741859436035156,
      "learning_rate": 0.0005943089199567654,
      "loss": 3.0722,
      "step": 14308
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.1623482704162598,
      "learning_rate": 0.0005943081269439648,
      "loss": 2.9799,
      "step": 14309
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.0987067222595215,
      "learning_rate": 0.0005943073338764468,
      "loss": 3.1457,
      "step": 14310
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6376065015792847,
      "learning_rate": 0.0005943065407542113,
      "loss": 3.2192,
      "step": 14311
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.959280014038086,
      "learning_rate": 0.0005943057475772588,
      "loss": 3.1607,
      "step": 14312
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.354602575302124,
      "learning_rate": 0.0005943049543455892,
      "loss": 3.0757,
      "step": 14313
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7152819633483887,
      "learning_rate": 0.000594304161059203,
      "loss": 3.3121,
      "step": 14314
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8355398178100586,
      "learning_rate": 0.0005943033677180999,
      "loss": 3.0142,
      "step": 14315
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1146533489227295,
      "learning_rate": 0.0005943025743222803,
      "loss": 2.9981,
      "step": 14316
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.2446374893188477,
      "learning_rate": 0.0005943017808717442,
      "loss": 3.1787,
      "step": 14317
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.4651691913604736,
      "learning_rate": 0.000594300987366492,
      "loss": 3.0978,
      "step": 14318
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4603608846664429,
      "learning_rate": 0.0005943001938065236,
      "loss": 2.9366,
      "step": 14319
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.595485806465149,
      "learning_rate": 0.0005942994001918393,
      "loss": 3.0162,
      "step": 14320
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.0712554454803467,
      "learning_rate": 0.0005942986065224391,
      "loss": 2.9402,
      "step": 14321
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6906849145889282,
      "learning_rate": 0.0005942978127983234,
      "loss": 3.2842,
      "step": 14322
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3462467193603516,
      "learning_rate": 0.000594297019019492,
      "loss": 3.3563,
      "step": 14323
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5587769746780396,
      "learning_rate": 0.0005942962251859452,
      "loss": 3.2449,
      "step": 14324
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.0719873905181885,
      "learning_rate": 0.0005942954312976833,
      "loss": 3.0582,
      "step": 14325
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.593675136566162,
      "learning_rate": 0.0005942946373547063,
      "loss": 3.0026,
      "step": 14326
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.400246024131775,
      "learning_rate": 0.0005942938433570144,
      "loss": 3.1939,
      "step": 14327
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4606168270111084,
      "learning_rate": 0.0005942930493046077,
      "loss": 3.4237,
      "step": 14328
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9872450828552246,
      "learning_rate": 0.0005942922551974864,
      "loss": 3.058,
      "step": 14329
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.975390076637268,
      "learning_rate": 0.0005942914610356505,
      "loss": 2.9884,
      "step": 14330
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.483350396156311,
      "learning_rate": 0.0005942906668191003,
      "loss": 3.0278,
      "step": 14331
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.195505142211914,
      "learning_rate": 0.000594289872547836,
      "loss": 3.2133,
      "step": 14332
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.2157363891601562,
      "learning_rate": 0.0005942890782218576,
      "loss": 3.1984,
      "step": 14333
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.4054081439971924,
      "learning_rate": 0.0005942882838411653,
      "loss": 3.2199,
      "step": 14334
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4462817907333374,
      "learning_rate": 0.0005942874894057592,
      "loss": 3.2402,
      "step": 14335
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.823799967765808,
      "learning_rate": 0.0005942866949156395,
      "loss": 3.1393,
      "step": 14336
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.447409749031067,
      "learning_rate": 0.0005942859003708065,
      "loss": 3.1599,
      "step": 14337
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.715472459793091,
      "learning_rate": 0.0005942851057712601,
      "loss": 3.008,
      "step": 14338
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3761777877807617,
      "learning_rate": 0.0005942843111170005,
      "loss": 3.1832,
      "step": 14339
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.651664972305298,
      "learning_rate": 0.000594283516408028,
      "loss": 3.1855,
      "step": 14340
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.408172607421875,
      "learning_rate": 0.0005942827216443425,
      "loss": 3.2067,
      "step": 14341
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6187069416046143,
      "learning_rate": 0.0005942819268259443,
      "loss": 3.4265,
      "step": 14342
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.428179144859314,
      "learning_rate": 0.0005942811319528336,
      "loss": 3.0957,
      "step": 14343
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.576627492904663,
      "learning_rate": 0.0005942803370250106,
      "loss": 2.8211,
      "step": 14344
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.800043821334839,
      "learning_rate": 0.0005942795420424752,
      "loss": 3.2682,
      "step": 14345
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.450218915939331,
      "learning_rate": 0.0005942787470052277,
      "loss": 2.9117,
      "step": 14346
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7137562036514282,
      "learning_rate": 0.0005942779519132682,
      "loss": 3.1985,
      "step": 14347
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.826474666595459,
      "learning_rate": 0.0005942771567665968,
      "loss": 3.2122,
      "step": 14348
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.040572166442871,
      "learning_rate": 0.0005942763615652139,
      "loss": 3.214,
      "step": 14349
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4830323457717896,
      "learning_rate": 0.0005942755663091193,
      "loss": 3.1375,
      "step": 14350
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.867392659187317,
      "learning_rate": 0.0005942747709983135,
      "loss": 3.2101,
      "step": 14351
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9965742826461792,
      "learning_rate": 0.0005942739756327963,
      "loss": 2.8626,
      "step": 14352
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4742119312286377,
      "learning_rate": 0.000594273180212568,
      "loss": 3.1498,
      "step": 14353
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.197561740875244,
      "learning_rate": 0.000594272384737629,
      "loss": 2.862,
      "step": 14354
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8588833808898926,
      "learning_rate": 0.0005942715892079789,
      "loss": 3.1182,
      "step": 14355
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4329159259796143,
      "learning_rate": 0.0005942707936236184,
      "loss": 3.1308,
      "step": 14356
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.012449264526367,
      "learning_rate": 0.0005942699979845472,
      "loss": 3.3332,
      "step": 14357
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7610020637512207,
      "learning_rate": 0.0005942692022907659,
      "loss": 3.1467,
      "step": 14358
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.326859951019287,
      "learning_rate": 0.0005942684065422742,
      "loss": 2.9591,
      "step": 14359
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6865410804748535,
      "learning_rate": 0.0005942676107390725,
      "loss": 3.0467,
      "step": 14360
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8074158430099487,
      "learning_rate": 0.0005942668148811609,
      "loss": 2.9885,
      "step": 14361
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.647938847541809,
      "learning_rate": 0.0005942660189685395,
      "loss": 3.2695,
      "step": 14362
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.48399019241333,
      "learning_rate": 0.0005942652230012085,
      "loss": 3.193,
      "step": 14363
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.153348922729492,
      "learning_rate": 0.0005942644269791681,
      "loss": 3.2674,
      "step": 14364
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3719358444213867,
      "learning_rate": 0.0005942636309024183,
      "loss": 3.0189,
      "step": 14365
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8022929430007935,
      "learning_rate": 0.0005942628347709595,
      "loss": 2.8569,
      "step": 14366
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.578911304473877,
      "learning_rate": 0.0005942620385847915,
      "loss": 3.0857,
      "step": 14367
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.0262184143066406,
      "learning_rate": 0.0005942612423439149,
      "loss": 3.2609,
      "step": 14368
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.666884183883667,
      "learning_rate": 0.0005942604460483293,
      "loss": 3.1355,
      "step": 14369
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.108055353164673,
      "learning_rate": 0.0005942596496980353,
      "loss": 3.1703,
      "step": 14370
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.126569986343384,
      "learning_rate": 0.0005942588532930328,
      "loss": 3.3439,
      "step": 14371
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5320689678192139,
      "learning_rate": 0.000594258056833322,
      "loss": 3.0158,
      "step": 14372
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.215867280960083,
      "learning_rate": 0.0005942572603189031,
      "loss": 3.0246,
      "step": 14373
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7683780193328857,
      "learning_rate": 0.0005942564637497762,
      "loss": 3.1269,
      "step": 14374
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5586256980895996,
      "learning_rate": 0.0005942556671259415,
      "loss": 3.1994,
      "step": 14375
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7585484981536865,
      "learning_rate": 0.0005942548704473991,
      "loss": 2.8668,
      "step": 14376
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.637129306793213,
      "learning_rate": 0.0005942540737141492,
      "loss": 3.1512,
      "step": 14377
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.322737216949463,
      "learning_rate": 0.0005942532769261918,
      "loss": 3.2569,
      "step": 14378
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.4620275497436523,
      "learning_rate": 0.0005942524800835274,
      "loss": 2.9013,
      "step": 14379
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6104625463485718,
      "learning_rate": 0.0005942516831861557,
      "loss": 2.9545,
      "step": 14380
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9087492227554321,
      "learning_rate": 0.0005942508862340771,
      "loss": 3.1541,
      "step": 14381
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.933221697807312,
      "learning_rate": 0.0005942500892272917,
      "loss": 2.9502,
      "step": 14382
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7476253509521484,
      "learning_rate": 0.0005942492921657996,
      "loss": 3.1023,
      "step": 14383
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5093613862991333,
      "learning_rate": 0.0005942484950496012,
      "loss": 3.2511,
      "step": 14384
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.0030267238616943,
      "learning_rate": 0.0005942476978786964,
      "loss": 2.9941,
      "step": 14385
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6606730222702026,
      "learning_rate": 0.0005942469006530853,
      "loss": 3.204,
      "step": 14386
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.530114769935608,
      "learning_rate": 0.0005942461033727681,
      "loss": 3.1774,
      "step": 14387
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.506292700767517,
      "learning_rate": 0.0005942453060377451,
      "loss": 2.9686,
      "step": 14388
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.545729160308838,
      "learning_rate": 0.0005942445086480164,
      "loss": 3.1499,
      "step": 14389
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.477669358253479,
      "learning_rate": 0.0005942437112035819,
      "loss": 3.2005,
      "step": 14390
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.170776844024658,
      "learning_rate": 0.000594242913704442,
      "loss": 3.1881,
      "step": 14391
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6262116432189941,
      "learning_rate": 0.0005942421161505969,
      "loss": 3.2429,
      "step": 14392
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.810219407081604,
      "learning_rate": 0.0005942413185420465,
      "loss": 3.4215,
      "step": 14393
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4554252624511719,
      "learning_rate": 0.0005942405208787912,
      "loss": 3.0908,
      "step": 14394
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.532155990600586,
      "learning_rate": 0.000594239723160831,
      "loss": 3.1451,
      "step": 14395
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.697373628616333,
      "learning_rate": 0.000594238925388166,
      "loss": 3.1826,
      "step": 14396
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8233082294464111,
      "learning_rate": 0.0005942381275607965,
      "loss": 3.1453,
      "step": 14397
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4897143840789795,
      "learning_rate": 0.0005942373296787226,
      "loss": 2.7678,
      "step": 14398
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7773520946502686,
      "learning_rate": 0.0005942365317419445,
      "loss": 3.0031,
      "step": 14399
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8235490322113037,
      "learning_rate": 0.0005942357337504621,
      "loss": 3.2571,
      "step": 14400
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5389642715454102,
      "learning_rate": 0.0005942349357042758,
      "loss": 3.2349,
      "step": 14401
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6007035970687866,
      "learning_rate": 0.0005942341376033857,
      "loss": 3.2711,
      "step": 14402
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5902730226516724,
      "learning_rate": 0.0005942333394477919,
      "loss": 3.1977,
      "step": 14403
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4126139879226685,
      "learning_rate": 0.0005942325412374946,
      "loss": 3.2025,
      "step": 14404
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5090465545654297,
      "learning_rate": 0.0005942317429724938,
      "loss": 3.3189,
      "step": 14405
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.424443006515503,
      "learning_rate": 0.0005942309446527899,
      "loss": 3.0441,
      "step": 14406
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5085638761520386,
      "learning_rate": 0.0005942301462783828,
      "loss": 2.9104,
      "step": 14407
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5302588939666748,
      "learning_rate": 0.0005942293478492729,
      "loss": 2.8696,
      "step": 14408
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5607889890670776,
      "learning_rate": 0.0005942285493654601,
      "loss": 3.3361,
      "step": 14409
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.681989312171936,
      "learning_rate": 0.0005942277508269446,
      "loss": 3.0689,
      "step": 14410
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7711234092712402,
      "learning_rate": 0.0005942269522337268,
      "loss": 2.9831,
      "step": 14411
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9186632633209229,
      "learning_rate": 0.0005942261535858064,
      "loss": 3.1221,
      "step": 14412
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.675197720527649,
      "learning_rate": 0.000594225354883184,
      "loss": 3.2536,
      "step": 14413
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.060504674911499,
      "learning_rate": 0.0005942245561258595,
      "loss": 3.0018,
      "step": 14414
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.550004243850708,
      "learning_rate": 0.0005942237573138331,
      "loss": 2.9653,
      "step": 14415
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6926761865615845,
      "learning_rate": 0.0005942229584471049,
      "loss": 3.1371,
      "step": 14416
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6752034425735474,
      "learning_rate": 0.0005942221595256753,
      "loss": 2.8727,
      "step": 14417
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9091399908065796,
      "learning_rate": 0.000594221360549544,
      "loss": 3.1713,
      "step": 14418
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.008760452270508,
      "learning_rate": 0.0005942205615187115,
      "loss": 3.1944,
      "step": 14419
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.469219446182251,
      "learning_rate": 0.0005942197624331778,
      "loss": 3.2215,
      "step": 14420
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.462079644203186,
      "learning_rate": 0.0005942189632929432,
      "loss": 3.3636,
      "step": 14421
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.802010416984558,
      "learning_rate": 0.0005942181640980076,
      "loss": 3.048,
      "step": 14422
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.12231183052063,
      "learning_rate": 0.0005942173648483714,
      "loss": 3.0976,
      "step": 14423
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9658249616622925,
      "learning_rate": 0.0005942165655440346,
      "loss": 2.8125,
      "step": 14424
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7038516998291016,
      "learning_rate": 0.0005942157661849974,
      "loss": 3.1935,
      "step": 14425
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4141768217086792,
      "learning_rate": 0.00059421496677126,
      "loss": 2.9479,
      "step": 14426
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3637198209762573,
      "learning_rate": 0.0005942141673028223,
      "loss": 3.2242,
      "step": 14427
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5275225639343262,
      "learning_rate": 0.0005942133677796848,
      "loss": 2.9381,
      "step": 14428
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.613145112991333,
      "learning_rate": 0.0005942125682018473,
      "loss": 3.0132,
      "step": 14429
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6746253967285156,
      "learning_rate": 0.0005942117685693103,
      "loss": 3.181,
      "step": 14430
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5896332263946533,
      "learning_rate": 0.0005942109688820737,
      "loss": 2.9928,
      "step": 14431
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.990745186805725,
      "learning_rate": 0.0005942101691401378,
      "loss": 2.966,
      "step": 14432
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6840711832046509,
      "learning_rate": 0.0005942093693435027,
      "loss": 3.002,
      "step": 14433
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1021573543548584,
      "learning_rate": 0.0005942085694921684,
      "loss": 3.0064,
      "step": 14434
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7018637657165527,
      "learning_rate": 0.0005942077695861352,
      "loss": 3.144,
      "step": 14435
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4101648330688477,
      "learning_rate": 0.0005942069696254033,
      "loss": 2.8824,
      "step": 14436
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3402382135391235,
      "learning_rate": 0.0005942061696099727,
      "loss": 3.2038,
      "step": 14437
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9055291414260864,
      "learning_rate": 0.0005942053695398436,
      "loss": 3.1565,
      "step": 14438
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7387481927871704,
      "learning_rate": 0.0005942045694150163,
      "loss": 3.1307,
      "step": 14439
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6528024673461914,
      "learning_rate": 0.0005942037692354907,
      "loss": 3.1858,
      "step": 14440
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4860576391220093,
      "learning_rate": 0.000594202969001267,
      "loss": 3.1878,
      "step": 14441
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4314662218093872,
      "learning_rate": 0.0005942021687123456,
      "loss": 3.0771,
      "step": 14442
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.581303596496582,
      "learning_rate": 0.0005942013683687264,
      "loss": 3.3536,
      "step": 14443
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.835514783859253,
      "learning_rate": 0.0005942005679704096,
      "loss": 3.0571,
      "step": 14444
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5161144733428955,
      "learning_rate": 0.0005941997675173952,
      "loss": 3.234,
      "step": 14445
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7249157428741455,
      "learning_rate": 0.0005941989670096837,
      "loss": 3.1428,
      "step": 14446
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.654823899269104,
      "learning_rate": 0.0005941981664472751,
      "loss": 3.2155,
      "step": 14447
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4716216325759888,
      "learning_rate": 0.0005941973658301694,
      "loss": 3.0495,
      "step": 14448
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5565353631973267,
      "learning_rate": 0.0005941965651583668,
      "loss": 3.1338,
      "step": 14449
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.402901530265808,
      "learning_rate": 0.0005941957644318676,
      "loss": 3.2743,
      "step": 14450
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.451310157775879,
      "learning_rate": 0.0005941949636506718,
      "loss": 3.2675,
      "step": 14451
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5416069030761719,
      "learning_rate": 0.0005941941628147797,
      "loss": 2.9567,
      "step": 14452
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6313422918319702,
      "learning_rate": 0.0005941933619241913,
      "loss": 3.1436,
      "step": 14453
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6496175527572632,
      "learning_rate": 0.0005941925609789068,
      "loss": 2.7984,
      "step": 14454
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6252139806747437,
      "learning_rate": 0.0005941917599789264,
      "loss": 3.2324,
      "step": 14455
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6783816814422607,
      "learning_rate": 0.00059419095892425,
      "loss": 2.9219,
      "step": 14456
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.869015097618103,
      "learning_rate": 0.0005941901578148781,
      "loss": 3.3914,
      "step": 14457
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6354821920394897,
      "learning_rate": 0.0005941893566508107,
      "loss": 3.1607,
      "step": 14458
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.2013638019561768,
      "learning_rate": 0.000594188555432048,
      "loss": 3.0254,
      "step": 14459
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8161014318466187,
      "learning_rate": 0.00059418775415859,
      "loss": 3.0701,
      "step": 14460
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.053565740585327,
      "learning_rate": 0.000594186952830437,
      "loss": 3.3082,
      "step": 14461
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.264247179031372,
      "learning_rate": 0.000594186151447589,
      "loss": 3.1145,
      "step": 14462
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.731776475906372,
      "learning_rate": 0.0005941853500100462,
      "loss": 3.1326,
      "step": 14463
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.551544189453125,
      "learning_rate": 0.000594184548517809,
      "loss": 3.263,
      "step": 14464
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.300783157348633,
      "learning_rate": 0.0005941837469708772,
      "loss": 2.9528,
      "step": 14465
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4409962892532349,
      "learning_rate": 0.000594182945369251,
      "loss": 3.2929,
      "step": 14466
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6924573183059692,
      "learning_rate": 0.0005941821437129308,
      "loss": 3.2542,
      "step": 14467
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.962068796157837,
      "learning_rate": 0.0005941813420019166,
      "loss": 3.062,
      "step": 14468
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.53717839717865,
      "learning_rate": 0.0005941805402362084,
      "loss": 3.2376,
      "step": 14469
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3084276914596558,
      "learning_rate": 0.0005941797384158066,
      "loss": 3.1896,
      "step": 14470
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4682819843292236,
      "learning_rate": 0.0005941789365407112,
      "loss": 3.085,
      "step": 14471
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5378303527832031,
      "learning_rate": 0.0005941781346109223,
      "loss": 3.1693,
      "step": 14472
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.636068344116211,
      "learning_rate": 0.0005941773326264403,
      "loss": 3.0857,
      "step": 14473
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5360195636749268,
      "learning_rate": 0.0005941765305872651,
      "loss": 3.4551,
      "step": 14474
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.2417361736297607,
      "learning_rate": 0.0005941757284933969,
      "loss": 3.0545,
      "step": 14475
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6070853471755981,
      "learning_rate": 0.000594174926344836,
      "loss": 3.1988,
      "step": 14476
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.470959186553955,
      "learning_rate": 0.0005941741241415823,
      "loss": 2.9529,
      "step": 14477
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3499974012374878,
      "learning_rate": 0.000594173321883636,
      "loss": 3.2408,
      "step": 14478
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8520534038543701,
      "learning_rate": 0.0005941725195709975,
      "loss": 3.0677,
      "step": 14479
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5266873836517334,
      "learning_rate": 0.0005941717172036667,
      "loss": 3.1997,
      "step": 14480
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3307104110717773,
      "learning_rate": 0.0005941709147816438,
      "loss": 3.3033,
      "step": 14481
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7008323669433594,
      "learning_rate": 0.0005941701123049289,
      "loss": 3.2981,
      "step": 14482
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7778581380844116,
      "learning_rate": 0.0005941693097735224,
      "loss": 3.1991,
      "step": 14483
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5216233730316162,
      "learning_rate": 0.0005941685071874242,
      "loss": 3.2632,
      "step": 14484
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.9906423091888428,
      "learning_rate": 0.0005941677045466345,
      "loss": 3.1076,
      "step": 14485
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8631715774536133,
      "learning_rate": 0.0005941669018511536,
      "loss": 3.3379,
      "step": 14486
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5814590454101562,
      "learning_rate": 0.0005941660991009814,
      "loss": 3.0695,
      "step": 14487
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.6865956783294678,
      "learning_rate": 0.0005941652962961181,
      "loss": 3.1213,
      "step": 14488
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7504947185516357,
      "learning_rate": 0.000594164493436564,
      "loss": 3.1525,
      "step": 14489
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9014108180999756,
      "learning_rate": 0.0005941636905223191,
      "loss": 3.0714,
      "step": 14490
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4241677522659302,
      "learning_rate": 0.0005941628875533838,
      "loss": 3.1831,
      "step": 14491
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.806829810142517,
      "learning_rate": 0.0005941620845297579,
      "loss": 3.2873,
      "step": 14492
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.8067195415496826,
      "learning_rate": 0.0005941612814514419,
      "loss": 3.2134,
      "step": 14493
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3166002035140991,
      "learning_rate": 0.0005941604783184355,
      "loss": 3.0292,
      "step": 14494
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4010965824127197,
      "learning_rate": 0.0005941596751307392,
      "loss": 3.355,
      "step": 14495
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6678659915924072,
      "learning_rate": 0.0005941588718883532,
      "loss": 3.091,
      "step": 14496
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9513955116271973,
      "learning_rate": 0.0005941580685912774,
      "loss": 2.9572,
      "step": 14497
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.304294228553772,
      "learning_rate": 0.0005941572652395121,
      "loss": 3.4386,
      "step": 14498
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.86388897895813,
      "learning_rate": 0.0005941564618330573,
      "loss": 3.0876,
      "step": 14499
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.463324785232544,
      "learning_rate": 0.0005941556583719134,
      "loss": 3.0896,
      "step": 14500
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4057174921035767,
      "learning_rate": 0.0005941548548560804,
      "loss": 2.9129,
      "step": 14501
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3870428800582886,
      "learning_rate": 0.0005941540512855584,
      "loss": 3.0484,
      "step": 14502
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9899811744689941,
      "learning_rate": 0.0005941532476603477,
      "loss": 2.8361,
      "step": 14503
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.0441317558288574,
      "learning_rate": 0.0005941524439804482,
      "loss": 3.2662,
      "step": 14504
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9052754640579224,
      "learning_rate": 0.0005941516402458604,
      "loss": 3.1634,
      "step": 14505
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6516749858856201,
      "learning_rate": 0.0005941508364565841,
      "loss": 3.2333,
      "step": 14506
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3284883499145508,
      "learning_rate": 0.0005941500326126196,
      "loss": 3.0639,
      "step": 14507
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.43901789188385,
      "learning_rate": 0.0005941492287139672,
      "loss": 3.0492,
      "step": 14508
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4939725399017334,
      "learning_rate": 0.0005941484247606268,
      "loss": 3.2675,
      "step": 14509
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.135986566543579,
      "learning_rate": 0.0005941476207525988,
      "loss": 3.3191,
      "step": 14510
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4234381914138794,
      "learning_rate": 0.000594146816689883,
      "loss": 2.9319,
      "step": 14511
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.927708625793457,
      "learning_rate": 0.0005941460125724798,
      "loss": 3.2055,
      "step": 14512
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8890033960342407,
      "learning_rate": 0.0005941452084003894,
      "loss": 3.0927,
      "step": 14513
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.410285234451294,
      "learning_rate": 0.0005941444041736119,
      "loss": 3.2092,
      "step": 14514
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1818244457244873,
      "learning_rate": 0.0005941435998921472,
      "loss": 3.1971,
      "step": 14515
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3619173765182495,
      "learning_rate": 0.0005941427955559958,
      "loss": 3.0803,
      "step": 14516
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4606531858444214,
      "learning_rate": 0.0005941419911651576,
      "loss": 2.8998,
      "step": 14517
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.641670823097229,
      "learning_rate": 0.000594141186719633,
      "loss": 3.1752,
      "step": 14518
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.610243320465088,
      "learning_rate": 0.0005941403822194219,
      "loss": 3.2084,
      "step": 14519
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5787631273269653,
      "learning_rate": 0.0005941395776645245,
      "loss": 3.0117,
      "step": 14520
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4657362699508667,
      "learning_rate": 0.0005941387730549412,
      "loss": 3.174,
      "step": 14521
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5248159170150757,
      "learning_rate": 0.0005941379683906717,
      "loss": 3.321,
      "step": 14522
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.63005793094635,
      "learning_rate": 0.0005941371636717165,
      "loss": 3.075,
      "step": 14523
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6018532514572144,
      "learning_rate": 0.0005941363588980757,
      "loss": 2.9332,
      "step": 14524
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.792684316635132,
      "learning_rate": 0.0005941355540697493,
      "loss": 3.1279,
      "step": 14525
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8723341226577759,
      "learning_rate": 0.0005941347491867377,
      "loss": 3.1156,
      "step": 14526
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7458668947219849,
      "learning_rate": 0.0005941339442490408,
      "loss": 2.7718,
      "step": 14527
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.610006332397461,
      "learning_rate": 0.0005941331392566588,
      "loss": 3.1185,
      "step": 14528
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5114344358444214,
      "learning_rate": 0.000594132334209592,
      "loss": 3.2314,
      "step": 14529
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7188128232955933,
      "learning_rate": 0.0005941315291078404,
      "loss": 3.1664,
      "step": 14530
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5265847444534302,
      "learning_rate": 0.0005941307239514041,
      "loss": 3.1916,
      "step": 14531
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5588008165359497,
      "learning_rate": 0.0005941299187402835,
      "loss": 2.8452,
      "step": 14532
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8887724876403809,
      "learning_rate": 0.0005941291134744785,
      "loss": 3.1685,
      "step": 14533
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5870985984802246,
      "learning_rate": 0.0005941283081539894,
      "loss": 3.2961,
      "step": 14534
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7052758932113647,
      "learning_rate": 0.0005941275027788163,
      "loss": 3.0988,
      "step": 14535
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.8092691898345947,
      "learning_rate": 0.0005941266973489593,
      "loss": 3.1995,
      "step": 14536
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5308594703674316,
      "learning_rate": 0.0005941258918644187,
      "loss": 3.2347,
      "step": 14537
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6681796312332153,
      "learning_rate": 0.0005941250863251944,
      "loss": 3.3336,
      "step": 14538
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1689298152923584,
      "learning_rate": 0.0005941242807312868,
      "loss": 3.0953,
      "step": 14539
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7731965780258179,
      "learning_rate": 0.0005941234750826958,
      "loss": 3.2674,
      "step": 14540
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9381532669067383,
      "learning_rate": 0.0005941226693794219,
      "loss": 3.1514,
      "step": 14541
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7436749935150146,
      "learning_rate": 0.0005941218636214649,
      "loss": 3.1287,
      "step": 14542
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4687944650650024,
      "learning_rate": 0.0005941210578088251,
      "loss": 3.1252,
      "step": 14543
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.597177267074585,
      "learning_rate": 0.0005941202519415028,
      "loss": 3.1676,
      "step": 14544
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4048539400100708,
      "learning_rate": 0.0005941194460194978,
      "loss": 3.2893,
      "step": 14545
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7979588508605957,
      "learning_rate": 0.0005941186400428106,
      "loss": 3.1783,
      "step": 14546
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5258677005767822,
      "learning_rate": 0.000594117834011441,
      "loss": 3.1624,
      "step": 14547
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.6315340995788574,
      "learning_rate": 0.0005941170279253894,
      "loss": 3.0726,
      "step": 14548
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6127758026123047,
      "learning_rate": 0.000594116221784656,
      "loss": 3.2926,
      "step": 14549
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6319700479507446,
      "learning_rate": 0.0005941154155892407,
      "loss": 3.1309,
      "step": 14550
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.015843152999878,
      "learning_rate": 0.000594114609339144,
      "loss": 2.8343,
      "step": 14551
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5608221292495728,
      "learning_rate": 0.0005941138030343657,
      "loss": 3.1158,
      "step": 14552
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6793981790542603,
      "learning_rate": 0.0005941129966749061,
      "loss": 3.066,
      "step": 14553
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.0341737270355225,
      "learning_rate": 0.0005941121902607653,
      "loss": 3.101,
      "step": 14554
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5994728803634644,
      "learning_rate": 0.0005941113837919436,
      "loss": 3.2609,
      "step": 14555
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.13081431388855,
      "learning_rate": 0.000594110577268441,
      "loss": 3.1113,
      "step": 14556
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8738651275634766,
      "learning_rate": 0.0005941097706902576,
      "loss": 3.2191,
      "step": 14557
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.2991373538970947,
      "learning_rate": 0.0005941089640573938,
      "loss": 3.0986,
      "step": 14558
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.265390157699585,
      "learning_rate": 0.0005941081573698494,
      "loss": 3.3363,
      "step": 14559
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5288504362106323,
      "learning_rate": 0.0005941073506276249,
      "loss": 3.3937,
      "step": 14560
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.3556907176971436,
      "learning_rate": 0.0005941065438307203,
      "loss": 3.0406,
      "step": 14561
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6261597871780396,
      "learning_rate": 0.0005941057369791356,
      "loss": 3.1903,
      "step": 14562
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6570062637329102,
      "learning_rate": 0.0005941049300728712,
      "loss": 3.2475,
      "step": 14563
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1339430809020996,
      "learning_rate": 0.0005941041231119271,
      "loss": 2.932,
      "step": 14564
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7709356546401978,
      "learning_rate": 0.0005941033160963036,
      "loss": 3.186,
      "step": 14565
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5552313327789307,
      "learning_rate": 0.0005941025090260006,
      "loss": 3.0863,
      "step": 14566
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2750822305679321,
      "learning_rate": 0.0005941017019010184,
      "loss": 3.042,
      "step": 14567
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2455509901046753,
      "learning_rate": 0.0005941008947213573,
      "loss": 3.3127,
      "step": 14568
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2605476379394531,
      "learning_rate": 0.0005941000874870172,
      "loss": 3.1315,
      "step": 14569
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6675304174423218,
      "learning_rate": 0.0005940992801979982,
      "loss": 3.1979,
      "step": 14570
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3358489274978638,
      "learning_rate": 0.0005940984728543008,
      "loss": 2.8336,
      "step": 14571
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.13230299949646,
      "learning_rate": 0.0005940976654559248,
      "loss": 2.8577,
      "step": 14572
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.515434980392456,
      "learning_rate": 0.0005940968580028706,
      "loss": 3.0984,
      "step": 14573
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.5448994636535645,
      "learning_rate": 0.0005940960504951381,
      "loss": 3.373,
      "step": 14574
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.382355809211731,
      "learning_rate": 0.0005940952429327278,
      "loss": 3.1315,
      "step": 14575
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6553819179534912,
      "learning_rate": 0.0005940944353156395,
      "loss": 3.1522,
      "step": 14576
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.401530146598816,
      "learning_rate": 0.0005940936276438735,
      "loss": 2.9915,
      "step": 14577
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7067958116531372,
      "learning_rate": 0.00059409281991743,
      "loss": 3.4177,
      "step": 14578
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.0743165016174316,
      "learning_rate": 0.000594092012136309,
      "loss": 3.1008,
      "step": 14579
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3599940538406372,
      "learning_rate": 0.0005940912043005108,
      "loss": 3.2273,
      "step": 14580
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6315628290176392,
      "learning_rate": 0.0005940903964100356,
      "loss": 3.12,
      "step": 14581
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.440406084060669,
      "learning_rate": 0.0005940895884648833,
      "loss": 3.0488,
      "step": 14582
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3614574670791626,
      "learning_rate": 0.0005940887804650541,
      "loss": 2.9753,
      "step": 14583
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.383072853088379,
      "learning_rate": 0.0005940879724105484,
      "loss": 3.1916,
      "step": 14584
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.2173075675964355,
      "learning_rate": 0.0005940871643013662,
      "loss": 3.0642,
      "step": 14585
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.537809133529663,
      "learning_rate": 0.0005940863561375077,
      "loss": 3.0026,
      "step": 14586
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4157226085662842,
      "learning_rate": 0.0005940855479189729,
      "loss": 3.3031,
      "step": 14587
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.0981316566467285,
      "learning_rate": 0.0005940847396457621,
      "loss": 3.1163,
      "step": 14588
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.305772304534912,
      "learning_rate": 0.0005940839313178753,
      "loss": 3.1287,
      "step": 14589
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.2190024852752686,
      "learning_rate": 0.0005940831229353128,
      "loss": 3.3555,
      "step": 14590
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.2463066577911377,
      "learning_rate": 0.0005940823144980747,
      "loss": 3.2528,
      "step": 14591
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.6165621280670166,
      "learning_rate": 0.0005940815060061611,
      "loss": 3.0908,
      "step": 14592
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.2568321228027344,
      "learning_rate": 0.0005940806974595723,
      "loss": 3.3407,
      "step": 14593
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.390999436378479,
      "learning_rate": 0.0005940798888583082,
      "loss": 3.2152,
      "step": 14594
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.613393545150757,
      "learning_rate": 0.0005940790802023692,
      "loss": 2.9371,
      "step": 14595
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.240402936935425,
      "learning_rate": 0.0005940782714917553,
      "loss": 3.1401,
      "step": 14596
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.0996806621551514,
      "learning_rate": 0.0005940774627264668,
      "loss": 3.2123,
      "step": 14597
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4433175325393677,
      "learning_rate": 0.0005940766539065036,
      "loss": 2.9743,
      "step": 14598
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.699476957321167,
      "learning_rate": 0.0005940758450318661,
      "loss": 3.1574,
      "step": 14599
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.3491902351379395,
      "learning_rate": 0.0005940750361025543,
      "loss": 3.1473,
      "step": 14600
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.320967435836792,
      "learning_rate": 0.0005940742271185685,
      "loss": 2.8463,
      "step": 14601
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.452639102935791,
      "learning_rate": 0.0005940734180799087,
      "loss": 2.8335,
      "step": 14602
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5158940553665161,
      "learning_rate": 0.0005940726089865751,
      "loss": 2.9368,
      "step": 14603
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6560865640640259,
      "learning_rate": 0.0005940717998385678,
      "loss": 3.1268,
      "step": 14604
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3420023918151855,
      "learning_rate": 0.0005940709906358869,
      "loss": 3.2637,
      "step": 14605
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6632106304168701,
      "learning_rate": 0.0005940701813785329,
      "loss": 3.2899,
      "step": 14606
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4170082807540894,
      "learning_rate": 0.0005940693720665056,
      "loss": 3.2687,
      "step": 14607
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8492532968521118,
      "learning_rate": 0.0005940685626998052,
      "loss": 3.4703,
      "step": 14608
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.753618597984314,
      "learning_rate": 0.0005940677532784319,
      "loss": 2.9485,
      "step": 14609
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.692360758781433,
      "learning_rate": 0.0005940669438023859,
      "loss": 3.1368,
      "step": 14610
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1587042808532715,
      "learning_rate": 0.0005940661342716673,
      "loss": 3.2356,
      "step": 14611
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.0898940563201904,
      "learning_rate": 0.0005940653246862762,
      "loss": 3.2278,
      "step": 14612
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.695129632949829,
      "learning_rate": 0.000594064515046213,
      "loss": 3.2163,
      "step": 14613
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5840281248092651,
      "learning_rate": 0.0005940637053514775,
      "loss": 3.3049,
      "step": 14614
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8713511228561401,
      "learning_rate": 0.00059406289560207,
      "loss": 3.1773,
      "step": 14615
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.620898962020874,
      "learning_rate": 0.0005940620857979907,
      "loss": 2.9784,
      "step": 14616
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7259963750839233,
      "learning_rate": 0.0005940612759392397,
      "loss": 3.0131,
      "step": 14617
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5301622152328491,
      "learning_rate": 0.0005940604660258171,
      "loss": 3.053,
      "step": 14618
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4276626110076904,
      "learning_rate": 0.0005940596560577231,
      "loss": 3.2508,
      "step": 14619
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3898305892944336,
      "learning_rate": 0.0005940588460349579,
      "loss": 3.0699,
      "step": 14620
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9879215955734253,
      "learning_rate": 0.0005940580359575216,
      "loss": 3.2234,
      "step": 14621
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.476729154586792,
      "learning_rate": 0.0005940572258254144,
      "loss": 3.3755,
      "step": 14622
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5382288694381714,
      "learning_rate": 0.0005940564156386364,
      "loss": 3.2143,
      "step": 14623
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7878339290618896,
      "learning_rate": 0.0005940556053971878,
      "loss": 3.0909,
      "step": 14624
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4649485349655151,
      "learning_rate": 0.0005940547951010686,
      "loss": 3.3393,
      "step": 14625
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7955533266067505,
      "learning_rate": 0.0005940539847502791,
      "loss": 2.8947,
      "step": 14626
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6517071723937988,
      "learning_rate": 0.0005940531743448195,
      "loss": 3.0344,
      "step": 14627
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.79165518283844,
      "learning_rate": 0.0005940523638846898,
      "loss": 3.101,
      "step": 14628
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8960351943969727,
      "learning_rate": 0.0005940515533698902,
      "loss": 3.1475,
      "step": 14629
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4893735647201538,
      "learning_rate": 0.0005940507428004209,
      "loss": 3.4062,
      "step": 14630
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.713845133781433,
      "learning_rate": 0.000594049932176282,
      "loss": 2.9827,
      "step": 14631
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.050431728363037,
      "learning_rate": 0.0005940491214974737,
      "loss": 3.0182,
      "step": 14632
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7276124954223633,
      "learning_rate": 0.000594048310763996,
      "loss": 3.1465,
      "step": 14633
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4423255920410156,
      "learning_rate": 0.0005940474999758493,
      "loss": 2.9959,
      "step": 14634
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4615637063980103,
      "learning_rate": 0.0005940466891330335,
      "loss": 3.019,
      "step": 14635
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8012555837631226,
      "learning_rate": 0.000594045878235549,
      "loss": 3.1732,
      "step": 14636
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.663901925086975,
      "learning_rate": 0.0005940450672833957,
      "loss": 3.0822,
      "step": 14637
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6499638557434082,
      "learning_rate": 0.000594044256276574,
      "loss": 3.2789,
      "step": 14638
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4937613010406494,
      "learning_rate": 0.0005940434452150838,
      "loss": 2.9844,
      "step": 14639
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3385121822357178,
      "learning_rate": 0.0005940426340989254,
      "loss": 3.1688,
      "step": 14640
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6086952686309814,
      "learning_rate": 0.000594041822928099,
      "loss": 3.0852,
      "step": 14641
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.639990210533142,
      "learning_rate": 0.0005940410117026047,
      "loss": 3.0602,
      "step": 14642
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3100693225860596,
      "learning_rate": 0.0005940402004224425,
      "loss": 3.1775,
      "step": 14643
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4283937215805054,
      "learning_rate": 0.0005940393890876127,
      "loss": 3.1441,
      "step": 14644
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5870206356048584,
      "learning_rate": 0.0005940385776981155,
      "loss": 3.1118,
      "step": 14645
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.605690360069275,
      "learning_rate": 0.000594037766253951,
      "loss": 3.2162,
      "step": 14646
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.960479736328125,
      "learning_rate": 0.0005940369547551192,
      "loss": 2.6808,
      "step": 14647
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.237729549407959,
      "learning_rate": 0.0005940361432016205,
      "loss": 3.2994,
      "step": 14648
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.523730993270874,
      "learning_rate": 0.0005940353315934549,
      "loss": 3.259,
      "step": 14649
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.2645962238311768,
      "learning_rate": 0.0005940345199306224,
      "loss": 3.0471,
      "step": 14650
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.023970127105713,
      "learning_rate": 0.0005940337082131236,
      "loss": 2.9669,
      "step": 14651
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.3063814640045166,
      "learning_rate": 0.0005940328964409583,
      "loss": 3.1829,
      "step": 14652
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9915610551834106,
      "learning_rate": 0.0005940320846141267,
      "loss": 3.0297,
      "step": 14653
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.8005480766296387,
      "learning_rate": 0.000594031272732629,
      "loss": 3.3095,
      "step": 14654
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.732222318649292,
      "learning_rate": 0.0005940304607964653,
      "loss": 2.9592,
      "step": 14655
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.375288963317871,
      "learning_rate": 0.0005940296488056358,
      "loss": 2.8136,
      "step": 14656
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.857755422592163,
      "learning_rate": 0.0005940288367601406,
      "loss": 3.3468,
      "step": 14657
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.291701316833496,
      "learning_rate": 0.00059402802465998,
      "loss": 3.0859,
      "step": 14658
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.655125379562378,
      "learning_rate": 0.000594027212505154,
      "loss": 3.0941,
      "step": 14659
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7092889547348022,
      "learning_rate": 0.0005940264002956627,
      "loss": 3.0903,
      "step": 14660
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7919996976852417,
      "learning_rate": 0.0005940255880315065,
      "loss": 2.8893,
      "step": 14661
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8889106512069702,
      "learning_rate": 0.0005940247757126852,
      "loss": 3.1973,
      "step": 14662
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7086833715438843,
      "learning_rate": 0.0005940239633391993,
      "loss": 3.2113,
      "step": 14663
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1069042682647705,
      "learning_rate": 0.0005940231509110488,
      "loss": 2.973,
      "step": 14664
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7354590892791748,
      "learning_rate": 0.0005940223384282338,
      "loss": 2.7792,
      "step": 14665
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.7373476028442383,
      "learning_rate": 0.0005940215258907545,
      "loss": 3.1878,
      "step": 14666
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4168492555618286,
      "learning_rate": 0.0005940207132986109,
      "loss": 3.0964,
      "step": 14667
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3877174854278564,
      "learning_rate": 0.0005940199006518035,
      "loss": 3.0945,
      "step": 14668
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5410138368606567,
      "learning_rate": 0.0005940190879503322,
      "loss": 3.0832,
      "step": 14669
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3972946405410767,
      "learning_rate": 0.0005940182751941973,
      "loss": 3.2243,
      "step": 14670
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.93723464012146,
      "learning_rate": 0.0005940174623833987,
      "loss": 3.2112,
      "step": 14671
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.629183292388916,
      "learning_rate": 0.0005940166495179367,
      "loss": 3.2499,
      "step": 14672
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.5704989433288574,
      "learning_rate": 0.0005940158365978115,
      "loss": 3.1908,
      "step": 14673
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.00068736076355,
      "learning_rate": 0.0005940150236230232,
      "loss": 3.1858,
      "step": 14674
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.561028003692627,
      "learning_rate": 0.0005940142105935719,
      "loss": 3.1423,
      "step": 14675
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7161319255828857,
      "learning_rate": 0.0005940133975094579,
      "loss": 2.8363,
      "step": 14676
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.483465313911438,
      "learning_rate": 0.0005940125843706813,
      "loss": 3.3498,
      "step": 14677
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6362004280090332,
      "learning_rate": 0.000594011771177242,
      "loss": 2.9782,
      "step": 14678
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8394393920898438,
      "learning_rate": 0.0005940109579291406,
      "loss": 3.1656,
      "step": 14679
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5851683616638184,
      "learning_rate": 0.0005940101446263769,
      "loss": 3.2181,
      "step": 14680
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6045936346054077,
      "learning_rate": 0.0005940093312689512,
      "loss": 2.9618,
      "step": 14681
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.601473331451416,
      "learning_rate": 0.0005940085178568637,
      "loss": 3.3089,
      "step": 14682
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5954065322875977,
      "learning_rate": 0.0005940077043901142,
      "loss": 2.9996,
      "step": 14683
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5083062648773193,
      "learning_rate": 0.0005940068908687034,
      "loss": 3.0207,
      "step": 14684
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.523571729660034,
      "learning_rate": 0.000594006077292631,
      "loss": 3.1312,
      "step": 14685
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.532860517501831,
      "learning_rate": 0.0005940052636618973,
      "loss": 2.8842,
      "step": 14686
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6714487075805664,
      "learning_rate": 0.0005940044499765026,
      "loss": 3.2,
      "step": 14687
    },
    {
      "epoch": 0.19,
      "grad_norm": 5.54368257522583,
      "learning_rate": 0.0005940036362364468,
      "loss": 3.1283,
      "step": 14688
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.2214958667755127,
      "learning_rate": 0.0005940028224417303,
      "loss": 3.2669,
      "step": 14689
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.316105604171753,
      "learning_rate": 0.000594002008592353,
      "loss": 3.164,
      "step": 14690
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5726921558380127,
      "learning_rate": 0.0005940011946883153,
      "loss": 3.32,
      "step": 14691
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.4562885761260986,
      "learning_rate": 0.0005940003807296172,
      "loss": 3.0943,
      "step": 14692
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.515751838684082,
      "learning_rate": 0.0005939995667162589,
      "loss": 2.9971,
      "step": 14693
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8442798852920532,
      "learning_rate": 0.0005939987526482405,
      "loss": 3.095,
      "step": 14694
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.384290933609009,
      "learning_rate": 0.0005939979385255621,
      "loss": 3.0303,
      "step": 14695
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8408019542694092,
      "learning_rate": 0.0005939971243482241,
      "loss": 3.2431,
      "step": 14696
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3410909175872803,
      "learning_rate": 0.0005939963101162263,
      "loss": 3.0736,
      "step": 14697
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.121157646179199,
      "learning_rate": 0.0005939954958295691,
      "loss": 2.9886,
      "step": 14698
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.6162848472595215,
      "learning_rate": 0.0005939946814882526,
      "loss": 3.298,
      "step": 14699
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.623914361000061,
      "learning_rate": 0.0005939938670922769,
      "loss": 3.1081,
      "step": 14700
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5219166278839111,
      "learning_rate": 0.0005939930526416423,
      "loss": 2.9433,
      "step": 14701
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.279478073120117,
      "learning_rate": 0.0005939922381363487,
      "loss": 3.0462,
      "step": 14702
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.3858816623687744,
      "learning_rate": 0.0005939914235763965,
      "loss": 3.0176,
      "step": 14703
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4920042753219604,
      "learning_rate": 0.0005939906089617857,
      "loss": 3.1704,
      "step": 14704
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4789236783981323,
      "learning_rate": 0.0005939897942925166,
      "loss": 2.9766,
      "step": 14705
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9518998861312866,
      "learning_rate": 0.0005939889795685892,
      "loss": 3.0049,
      "step": 14706
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.9782660007476807,
      "learning_rate": 0.0005939881647900035,
      "loss": 3.1555,
      "step": 14707
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4397544860839844,
      "learning_rate": 0.0005939873499567601,
      "loss": 3.0937,
      "step": 14708
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.753566026687622,
      "learning_rate": 0.0005939865350688588,
      "loss": 3.0581,
      "step": 14709
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.614760637283325,
      "learning_rate": 0.0005939857201263,
      "loss": 3.0979,
      "step": 14710
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1571764945983887,
      "learning_rate": 0.0005939849051290834,
      "loss": 2.9034,
      "step": 14711
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.01198410987854,
      "learning_rate": 0.0005939840900772097,
      "loss": 2.9699,
      "step": 14712
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.516106605529785,
      "learning_rate": 0.0005939832749706787,
      "loss": 2.8737,
      "step": 14713
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7624635696411133,
      "learning_rate": 0.0005939824598094907,
      "loss": 2.8083,
      "step": 14714
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6373751163482666,
      "learning_rate": 0.0005939816445936458,
      "loss": 3.0463,
      "step": 14715
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5005968809127808,
      "learning_rate": 0.0005939808293231441,
      "loss": 3.1934,
      "step": 14716
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.7104969024658203,
      "learning_rate": 0.000593980013997986,
      "loss": 3.1911,
      "step": 14717
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.303460955619812,
      "learning_rate": 0.0005939791986181713,
      "loss": 2.9648,
      "step": 14718
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.423054575920105,
      "learning_rate": 0.0005939783831837004,
      "loss": 3.1744,
      "step": 14719
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.18603777885437,
      "learning_rate": 0.0005939775676945733,
      "loss": 3.3166,
      "step": 14720
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2693394422531128,
      "learning_rate": 0.0005939767521507902,
      "loss": 2.9774,
      "step": 14721
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.388098955154419,
      "learning_rate": 0.0005939759365523515,
      "loss": 3.1512,
      "step": 14722
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.2664623260498047,
      "learning_rate": 0.0005939751208992568,
      "loss": 3.1475,
      "step": 14723
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6021742820739746,
      "learning_rate": 0.0005939743051915068,
      "loss": 3.1524,
      "step": 14724
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.738925576210022,
      "learning_rate": 0.0005939734894291013,
      "loss": 3.1721,
      "step": 14725
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.073483943939209,
      "learning_rate": 0.0005939726736120407,
      "loss": 3.2349,
      "step": 14726
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.665074110031128,
      "learning_rate": 0.0005939718577403249,
      "loss": 3.1237,
      "step": 14727
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5979139804840088,
      "learning_rate": 0.0005939710418139543,
      "loss": 3.3139,
      "step": 14728
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.819090723991394,
      "learning_rate": 0.0005939702258329289,
      "loss": 3.3032,
      "step": 14729
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.48199462890625,
      "learning_rate": 0.0005939694097972488,
      "loss": 3.115,
      "step": 14730
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4444924592971802,
      "learning_rate": 0.0005939685937069144,
      "loss": 3.182,
      "step": 14731
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8899774551391602,
      "learning_rate": 0.0005939677775619256,
      "loss": 3.0071,
      "step": 14732
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9484193325042725,
      "learning_rate": 0.0005939669613622826,
      "loss": 3.1965,
      "step": 14733
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5364160537719727,
      "learning_rate": 0.0005939661451079857,
      "loss": 3.0317,
      "step": 14734
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.326270580291748,
      "learning_rate": 0.0005939653287990348,
      "loss": 3.2232,
      "step": 14735
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.296060800552368,
      "learning_rate": 0.0005939645124354303,
      "loss": 2.9697,
      "step": 14736
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.8457834720611572,
      "learning_rate": 0.0005939636960171722,
      "loss": 3.0132,
      "step": 14737
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.742287278175354,
      "learning_rate": 0.0005939628795442607,
      "loss": 2.9866,
      "step": 14738
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5908550024032593,
      "learning_rate": 0.000593962063016696,
      "loss": 3.0141,
      "step": 14739
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.7636196613311768,
      "learning_rate": 0.0005939612464344781,
      "loss": 3.2204,
      "step": 14740
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1169590950012207,
      "learning_rate": 0.0005939604297976075,
      "loss": 3.0933,
      "step": 14741
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.32584810256958,
      "learning_rate": 0.0005939596131060838,
      "loss": 3.3448,
      "step": 14742
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.0211150646209717,
      "learning_rate": 0.0005939587963599077,
      "loss": 3.1904,
      "step": 14743
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.2162587642669678,
      "learning_rate": 0.000593957979559079,
      "loss": 3.0766,
      "step": 14744
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6239807605743408,
      "learning_rate": 0.0005939571627035979,
      "loss": 3.276,
      "step": 14745
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8329402208328247,
      "learning_rate": 0.0005939563457934647,
      "loss": 2.9023,
      "step": 14746
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6258214712142944,
      "learning_rate": 0.0005939555288286794,
      "loss": 3.0835,
      "step": 14747
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6161216497421265,
      "learning_rate": 0.0005939547118092424,
      "loss": 3.3425,
      "step": 14748
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9400355815887451,
      "learning_rate": 0.0005939538947351535,
      "loss": 3.1033,
      "step": 14749
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.426002025604248,
      "learning_rate": 0.0005939530776064131,
      "loss": 3.1899,
      "step": 14750
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4372674226760864,
      "learning_rate": 0.0005939522604230212,
      "loss": 3.0382,
      "step": 14751
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4536772966384888,
      "learning_rate": 0.0005939514431849781,
      "loss": 3.369,
      "step": 14752
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4468419551849365,
      "learning_rate": 0.0005939506258922837,
      "loss": 2.9411,
      "step": 14753
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6132417917251587,
      "learning_rate": 0.0005939498085449386,
      "loss": 3.1142,
      "step": 14754
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.636636734008789,
      "learning_rate": 0.0005939489911429425,
      "loss": 3.0542,
      "step": 14755
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.661134123802185,
      "learning_rate": 0.0005939481736862958,
      "loss": 3.0242,
      "step": 14756
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3848422765731812,
      "learning_rate": 0.0005939473561749985,
      "loss": 3.0412,
      "step": 14757
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4253066778182983,
      "learning_rate": 0.000593946538609051,
      "loss": 3.1443,
      "step": 14758
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8533579111099243,
      "learning_rate": 0.0005939457209884532,
      "loss": 3.1927,
      "step": 14759
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6918365955352783,
      "learning_rate": 0.0005939449033132053,
      "loss": 3.1799,
      "step": 14760
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6306406259536743,
      "learning_rate": 0.0005939440855833076,
      "loss": 3.0576,
      "step": 14761
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7225875854492188,
      "learning_rate": 0.00059394326779876,
      "loss": 3.1081,
      "step": 14762
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5343002080917358,
      "learning_rate": 0.0005939424499595629,
      "loss": 2.8917,
      "step": 14763
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8290481567382812,
      "learning_rate": 0.0005939416320657163,
      "loss": 2.9831,
      "step": 14764
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5207858085632324,
      "learning_rate": 0.0005939408141172205,
      "loss": 3.179,
      "step": 14765
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.946090579032898,
      "learning_rate": 0.0005939399961140755,
      "loss": 3.1101,
      "step": 14766
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3855150938034058,
      "learning_rate": 0.0005939391780562814,
      "loss": 3.3947,
      "step": 14767
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.0260276794433594,
      "learning_rate": 0.0005939383599438386,
      "loss": 3.2517,
      "step": 14768
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5009998083114624,
      "learning_rate": 0.0005939375417767471,
      "loss": 3.3371,
      "step": 14769
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9103118181228638,
      "learning_rate": 0.000593936723555007,
      "loss": 3.3958,
      "step": 14770
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4408540725708008,
      "learning_rate": 0.0005939359052786185,
      "loss": 3.2744,
      "step": 14771
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.3896291255950928,
      "learning_rate": 0.0005939350869475818,
      "loss": 3.1107,
      "step": 14772
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9920856952667236,
      "learning_rate": 0.0005939342685618971,
      "loss": 3.0015,
      "step": 14773
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5870014429092407,
      "learning_rate": 0.0005939334501215643,
      "loss": 2.9934,
      "step": 14774
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5706312656402588,
      "learning_rate": 0.0005939326316265839,
      "loss": 3.1446,
      "step": 14775
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7629855871200562,
      "learning_rate": 0.0005939318130769558,
      "loss": 3.3158,
      "step": 14776
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6314921379089355,
      "learning_rate": 0.0005939309944726802,
      "loss": 3.0362,
      "step": 14777
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4774527549743652,
      "learning_rate": 0.0005939301758137573,
      "loss": 2.9784,
      "step": 14778
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4191771745681763,
      "learning_rate": 0.0005939293571001872,
      "loss": 3.153,
      "step": 14779
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4519420862197876,
      "learning_rate": 0.0005939285383319701,
      "loss": 3.251,
      "step": 14780
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3211021423339844,
      "learning_rate": 0.0005939277195091061,
      "loss": 3.107,
      "step": 14781
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3753834962844849,
      "learning_rate": 0.0005939269006315956,
      "loss": 3.0997,
      "step": 14782
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4842946529388428,
      "learning_rate": 0.0005939260816994383,
      "loss": 3.278,
      "step": 14783
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7262959480285645,
      "learning_rate": 0.0005939252627126347,
      "loss": 3.322,
      "step": 14784
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4558106660842896,
      "learning_rate": 0.0005939244436711849,
      "loss": 3.1673,
      "step": 14785
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3318843841552734,
      "learning_rate": 0.0005939236245750888,
      "loss": 3.1986,
      "step": 14786
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5474131107330322,
      "learning_rate": 0.0005939228054243469,
      "loss": 3.0206,
      "step": 14787
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7449346780776978,
      "learning_rate": 0.0005939219862189591,
      "loss": 3.3301,
      "step": 14788
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.0055127143859863,
      "learning_rate": 0.0005939211669589258,
      "loss": 3.2267,
      "step": 14789
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.59077787399292,
      "learning_rate": 0.0005939203476442469,
      "loss": 3.2664,
      "step": 14790
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.001594305038452,
      "learning_rate": 0.0005939195282749227,
      "loss": 3.0855,
      "step": 14791
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7927879095077515,
      "learning_rate": 0.0005939187088509533,
      "loss": 3.0438,
      "step": 14792
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7211679220199585,
      "learning_rate": 0.0005939178893723389,
      "loss": 3.4532,
      "step": 14793
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.4079742431640625,
      "learning_rate": 0.0005939170698390795,
      "loss": 2.9955,
      "step": 14794
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.4723360538482666,
      "learning_rate": 0.0005939162502511755,
      "loss": 3.4242,
      "step": 14795
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1957240104675293,
      "learning_rate": 0.0005939154306086269,
      "loss": 3.0254,
      "step": 14796
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.724566102027893,
      "learning_rate": 0.0005939146109114338,
      "loss": 3.3247,
      "step": 14797
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8885248899459839,
      "learning_rate": 0.0005939137911595966,
      "loss": 3.1868,
      "step": 14798
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4089597463607788,
      "learning_rate": 0.0005939129713531151,
      "loss": 3.1756,
      "step": 14799
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.751572608947754,
      "learning_rate": 0.0005939121514919897,
      "loss": 3.0187,
      "step": 14800
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.5084149837493896,
      "learning_rate": 0.0005939113315762204,
      "loss": 3.2075,
      "step": 14801
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9495627880096436,
      "learning_rate": 0.0005939105116058074,
      "loss": 2.893,
      "step": 14802
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.5711405277252197,
      "learning_rate": 0.0005939096915807511,
      "loss": 2.9104,
      "step": 14803
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.958557367324829,
      "learning_rate": 0.0005939088715010513,
      "loss": 3.3698,
      "step": 14804
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.9455440044403076,
      "learning_rate": 0.0005939080513667084,
      "loss": 3.1744,
      "step": 14805
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5919800996780396,
      "learning_rate": 0.0005939072311777223,
      "loss": 3.1229,
      "step": 14806
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.5125367641448975,
      "learning_rate": 0.0005939064109340933,
      "loss": 3.1681,
      "step": 14807
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.491189479827881,
      "learning_rate": 0.0005939055906358217,
      "loss": 2.9541,
      "step": 14808
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.054871082305908,
      "learning_rate": 0.0005939047702829074,
      "loss": 2.9619,
      "step": 14809
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5955482721328735,
      "learning_rate": 0.0005939039498753506,
      "loss": 3.3647,
      "step": 14810
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.271190881729126,
      "learning_rate": 0.0005939031294131516,
      "loss": 3.1629,
      "step": 14811
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7009660005569458,
      "learning_rate": 0.0005939023088963105,
      "loss": 3.1011,
      "step": 14812
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4907571077346802,
      "learning_rate": 0.0005939014883248274,
      "loss": 2.9511,
      "step": 14813
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7228120565414429,
      "learning_rate": 0.0005939006676987024,
      "loss": 3.132,
      "step": 14814
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2735148668289185,
      "learning_rate": 0.0005938998470179357,
      "loss": 3.5112,
      "step": 14815
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.2915679216384888,
      "learning_rate": 0.0005938990262825275,
      "loss": 3.2221,
      "step": 14816
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9944225549697876,
      "learning_rate": 0.0005938982054924779,
      "loss": 3.1231,
      "step": 14817
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9631586074829102,
      "learning_rate": 0.0005938973846477872,
      "loss": 3.236,
      "step": 14818
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8192861080169678,
      "learning_rate": 0.0005938965637484553,
      "loss": 3.1114,
      "step": 14819
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5508129596710205,
      "learning_rate": 0.0005938957427944825,
      "loss": 3.1416,
      "step": 14820
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8455302715301514,
      "learning_rate": 0.0005938949217858689,
      "loss": 3.1777,
      "step": 14821
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9699312448501587,
      "learning_rate": 0.0005938941007226148,
      "loss": 2.8987,
      "step": 14822
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6442745923995972,
      "learning_rate": 0.0005938932796047202,
      "loss": 3.2898,
      "step": 14823
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.071690797805786,
      "learning_rate": 0.0005938924584321852,
      "loss": 3.0744,
      "step": 14824
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1226634979248047,
      "learning_rate": 0.0005938916372050101,
      "loss": 3.0154,
      "step": 14825
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7951489686965942,
      "learning_rate": 0.000593890815923195,
      "loss": 3.0018,
      "step": 14826
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1539974212646484,
      "learning_rate": 0.00059388999458674,
      "loss": 3.3109,
      "step": 14827
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4092845916748047,
      "learning_rate": 0.0005938891731956454,
      "loss": 3.1786,
      "step": 14828
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4767801761627197,
      "learning_rate": 0.0005938883517499112,
      "loss": 3.112,
      "step": 14829
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6641979217529297,
      "learning_rate": 0.0005938875302495377,
      "loss": 3.2434,
      "step": 14830
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7109793424606323,
      "learning_rate": 0.0005938867086945249,
      "loss": 3.3388,
      "step": 14831
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5235742330551147,
      "learning_rate": 0.000593885887084873,
      "loss": 3.3724,
      "step": 14832
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3284062147140503,
      "learning_rate": 0.0005938850654205821,
      "loss": 3.1743,
      "step": 14833
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6173489093780518,
      "learning_rate": 0.0005938842437016525,
      "loss": 3.3079,
      "step": 14834
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.794407367706299,
      "learning_rate": 0.0005938834219280843,
      "loss": 3.2298,
      "step": 14835
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.965750217437744,
      "learning_rate": 0.0005938826000998775,
      "loss": 3.3853,
      "step": 14836
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9592310190200806,
      "learning_rate": 0.0005938817782170325,
      "loss": 3.355,
      "step": 14837
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5857874155044556,
      "learning_rate": 0.0005938809562795492,
      "loss": 3.2896,
      "step": 14838
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.0308170318603516,
      "learning_rate": 0.000593880134287428,
      "loss": 3.2016,
      "step": 14839
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9736833572387695,
      "learning_rate": 0.0005938793122406689,
      "loss": 3.3266,
      "step": 14840
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6668341159820557,
      "learning_rate": 0.0005938784901392721,
      "loss": 3.1453,
      "step": 14841
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.730080485343933,
      "learning_rate": 0.0005938776679832378,
      "loss": 3.3143,
      "step": 14842
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6541883945465088,
      "learning_rate": 0.0005938768457725659,
      "loss": 3.3238,
      "step": 14843
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.6650733947753906,
      "learning_rate": 0.000593876023507257,
      "loss": 3.4798,
      "step": 14844
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1730399131774902,
      "learning_rate": 0.0005938752011873108,
      "loss": 3.05,
      "step": 14845
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.629083514213562,
      "learning_rate": 0.0005938743788127278,
      "loss": 3.101,
      "step": 14846
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5687261819839478,
      "learning_rate": 0.0005938735563835079,
      "loss": 3.0724,
      "step": 14847
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6002533435821533,
      "learning_rate": 0.0005938727338996514,
      "loss": 2.9978,
      "step": 14848
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.007692575454712,
      "learning_rate": 0.0005938719113611585,
      "loss": 3.081,
      "step": 14849
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7610052824020386,
      "learning_rate": 0.0005938710887680291,
      "loss": 3.2882,
      "step": 14850
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4891993999481201,
      "learning_rate": 0.0005938702661202636,
      "loss": 3.1954,
      "step": 14851
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.313960075378418,
      "learning_rate": 0.000593869443417862,
      "loss": 2.9481,
      "step": 14852
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.4665119647979736,
      "learning_rate": 0.0005938686206608246,
      "loss": 3.155,
      "step": 14853
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4255508184432983,
      "learning_rate": 0.0005938677978491514,
      "loss": 3.1505,
      "step": 14854
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9530645608901978,
      "learning_rate": 0.0005938669749828427,
      "loss": 3.2132,
      "step": 14855
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8886606693267822,
      "learning_rate": 0.0005938661520618985,
      "loss": 3.1625,
      "step": 14856
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.460974097251892,
      "learning_rate": 0.0005938653290863192,
      "loss": 3.3334,
      "step": 14857
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5625072717666626,
      "learning_rate": 0.0005938645060561046,
      "loss": 3.1291,
      "step": 14858
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5978825092315674,
      "learning_rate": 0.0005938636829712552,
      "loss": 3.0296,
      "step": 14859
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.587902545928955,
      "learning_rate": 0.0005938628598317709,
      "loss": 3.1969,
      "step": 14860
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3924635648727417,
      "learning_rate": 0.0005938620366376519,
      "loss": 3.2823,
      "step": 14861
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8457180261611938,
      "learning_rate": 0.0005938612133888986,
      "loss": 3.0087,
      "step": 14862
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3120828866958618,
      "learning_rate": 0.0005938603900855108,
      "loss": 3.062,
      "step": 14863
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4823265075683594,
      "learning_rate": 0.0005938595667274888,
      "loss": 3.184,
      "step": 14864
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5290101766586304,
      "learning_rate": 0.0005938587433148328,
      "loss": 3.257,
      "step": 14865
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.1798118352890015,
      "learning_rate": 0.0005938579198475429,
      "loss": 3.0567,
      "step": 14866
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.330548882484436,
      "learning_rate": 0.0005938570963256193,
      "loss": 3.02,
      "step": 14867
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7278757095336914,
      "learning_rate": 0.0005938562727490621,
      "loss": 3.473,
      "step": 14868
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.6559531688690186,
      "learning_rate": 0.0005938554491178714,
      "loss": 3.19,
      "step": 14869
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3094514608383179,
      "learning_rate": 0.0005938546254320475,
      "loss": 3.2425,
      "step": 14870
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7534713745117188,
      "learning_rate": 0.0005938538016915905,
      "loss": 3.1266,
      "step": 14871
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5414245128631592,
      "learning_rate": 0.0005938529778965005,
      "loss": 3.3651,
      "step": 14872
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9329254627227783,
      "learning_rate": 0.0005938521540467777,
      "loss": 3.0595,
      "step": 14873
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6519404649734497,
      "learning_rate": 0.0005938513301424221,
      "loss": 3.143,
      "step": 14874
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3055455684661865,
      "learning_rate": 0.0005938505061834341,
      "loss": 2.6595,
      "step": 14875
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5503665208816528,
      "learning_rate": 0.0005938496821698138,
      "loss": 2.9881,
      "step": 14876
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4910911321640015,
      "learning_rate": 0.0005938488581015613,
      "loss": 3.1298,
      "step": 14877
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6926820278167725,
      "learning_rate": 0.0005938480339786767,
      "loss": 3.1658,
      "step": 14878
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8901569843292236,
      "learning_rate": 0.0005938472098011602,
      "loss": 3.2953,
      "step": 14879
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4006247520446777,
      "learning_rate": 0.0005938463855690119,
      "loss": 3.2481,
      "step": 14880
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3477662801742554,
      "learning_rate": 0.0005938455612822321,
      "loss": 2.9886,
      "step": 14881
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.423369288444519,
      "learning_rate": 0.0005938447369408208,
      "loss": 3.1174,
      "step": 14882
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8420608043670654,
      "learning_rate": 0.0005938439125447783,
      "loss": 3.2926,
      "step": 14883
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7235333919525146,
      "learning_rate": 0.0005938430880941046,
      "loss": 3.1913,
      "step": 14884
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4005862474441528,
      "learning_rate": 0.0005938422635888,
      "loss": 3.2086,
      "step": 14885
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.493238091468811,
      "learning_rate": 0.0005938414390288644,
      "loss": 3.2481,
      "step": 14886
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3770686388015747,
      "learning_rate": 0.0005938406144142983,
      "loss": 3.2298,
      "step": 14887
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.592507243156433,
      "learning_rate": 0.0005938397897451016,
      "loss": 3.2365,
      "step": 14888
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.623777151107788,
      "learning_rate": 0.0005938389650212745,
      "loss": 2.9941,
      "step": 14889
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5090787410736084,
      "learning_rate": 0.0005938381402428173,
      "loss": 3.1131,
      "step": 14890
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5711307525634766,
      "learning_rate": 0.0005938373154097301,
      "loss": 3.053,
      "step": 14891
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.568385124206543,
      "learning_rate": 0.0005938364905220129,
      "loss": 3.1042,
      "step": 14892
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3964529037475586,
      "learning_rate": 0.000593835665579666,
      "loss": 3.3517,
      "step": 14893
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4797970056533813,
      "learning_rate": 0.0005938348405826894,
      "loss": 3.1181,
      "step": 14894
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4971426725387573,
      "learning_rate": 0.0005938340155310834,
      "loss": 3.3056,
      "step": 14895
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5146156549453735,
      "learning_rate": 0.0005938331904248481,
      "loss": 3.0815,
      "step": 14896
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.224407911300659,
      "learning_rate": 0.0005938323652639837,
      "loss": 2.9858,
      "step": 14897
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.546592950820923,
      "learning_rate": 0.0005938315400484903,
      "loss": 2.9037,
      "step": 14898
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.470183253288269,
      "learning_rate": 0.0005938307147783681,
      "loss": 2.9725,
      "step": 14899
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.813824415206909,
      "learning_rate": 0.0005938298894536171,
      "loss": 2.9087,
      "step": 14900
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3818329572677612,
      "learning_rate": 0.0005938290640742377,
      "loss": 3.2612,
      "step": 14901
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3729286193847656,
      "learning_rate": 0.0005938282386402299,
      "loss": 2.953,
      "step": 14902
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.111229181289673,
      "learning_rate": 0.0005938274131515939,
      "loss": 3.2402,
      "step": 14903
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.2711892127990723,
      "learning_rate": 0.0005938265876083298,
      "loss": 3.2016,
      "step": 14904
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.695152759552002,
      "learning_rate": 0.0005938257620104378,
      "loss": 2.8243,
      "step": 14905
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.689063549041748,
      "learning_rate": 0.0005938249363579181,
      "loss": 3.0428,
      "step": 14906
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.040559768676758,
      "learning_rate": 0.0005938241106507707,
      "loss": 3.1388,
      "step": 14907
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.008396863937378,
      "learning_rate": 0.000593823284888996,
      "loss": 3.0945,
      "step": 14908
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1155498027801514,
      "learning_rate": 0.0005938224590725939,
      "loss": 3.1203,
      "step": 14909
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3460382223129272,
      "learning_rate": 0.0005938216332015647,
      "loss": 3.2158,
      "step": 14910
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.5898303985595703,
      "learning_rate": 0.0005938208072759085,
      "loss": 3.2757,
      "step": 14911
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5563595294952393,
      "learning_rate": 0.0005938199812956254,
      "loss": 3.0296,
      "step": 14912
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.602152109146118,
      "learning_rate": 0.0005938191552607156,
      "loss": 3.3037,
      "step": 14913
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7905185222625732,
      "learning_rate": 0.0005938183291711793,
      "loss": 3.2872,
      "step": 14914
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1815407276153564,
      "learning_rate": 0.0005938175030270167,
      "loss": 3.0657,
      "step": 14915
    },
    {
      "epoch": 0.19,
      "grad_norm": 3.0852999687194824,
      "learning_rate": 0.0005938166768282278,
      "loss": 3.261,
      "step": 14916
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7577846050262451,
      "learning_rate": 0.0005938158505748129,
      "loss": 3.159,
      "step": 14917
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7344406843185425,
      "learning_rate": 0.0005938150242667721,
      "loss": 3.2559,
      "step": 14918
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.7900149822235107,
      "learning_rate": 0.0005938141979041056,
      "loss": 3.0511,
      "step": 14919
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5751597881317139,
      "learning_rate": 0.0005938133714868133,
      "loss": 3.1713,
      "step": 14920
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4691181182861328,
      "learning_rate": 0.0005938125450148956,
      "loss": 3.1241,
      "step": 14921
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6253515481948853,
      "learning_rate": 0.0005938117184883528,
      "loss": 3.2709,
      "step": 14922
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5794564485549927,
      "learning_rate": 0.0005938108919071847,
      "loss": 3.222,
      "step": 14923
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.296074390411377,
      "learning_rate": 0.0005938100652713915,
      "loss": 3.1013,
      "step": 14924
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7198123931884766,
      "learning_rate": 0.0005938092385809737,
      "loss": 3.1074,
      "step": 14925
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8489950895309448,
      "learning_rate": 0.000593808411835931,
      "loss": 2.9189,
      "step": 14926
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3346960544586182,
      "learning_rate": 0.0005938075850362638,
      "loss": 3.1539,
      "step": 14927
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.809795618057251,
      "learning_rate": 0.0005938067581819724,
      "loss": 3.1137,
      "step": 14928
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9254248142242432,
      "learning_rate": 0.0005938059312730567,
      "loss": 2.9271,
      "step": 14929
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6820507049560547,
      "learning_rate": 0.0005938051043095168,
      "loss": 3.3846,
      "step": 14930
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.529947280883789,
      "learning_rate": 0.0005938042772913532,
      "loss": 3.0889,
      "step": 14931
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6647288799285889,
      "learning_rate": 0.0005938034502185656,
      "loss": 3.1408,
      "step": 14932
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8424758911132812,
      "learning_rate": 0.0005938026230911546,
      "loss": 3.098,
      "step": 14933
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6166046857833862,
      "learning_rate": 0.00059380179590912,
      "loss": 3.3035,
      "step": 14934
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7118154764175415,
      "learning_rate": 0.0005938009686724622,
      "loss": 3.1424,
      "step": 14935
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4415311813354492,
      "learning_rate": 0.0005938001413811812,
      "loss": 3.174,
      "step": 14936
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4514575004577637,
      "learning_rate": 0.0005937993140352772,
      "loss": 3.2556,
      "step": 14937
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.1054108142852783,
      "learning_rate": 0.0005937984866347505,
      "loss": 3.1684,
      "step": 14938
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5693774223327637,
      "learning_rate": 0.000593797659179601,
      "loss": 2.9524,
      "step": 14939
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5148069858551025,
      "learning_rate": 0.0005937968316698289,
      "loss": 3.1796,
      "step": 14940
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6625850200653076,
      "learning_rate": 0.0005937960041054345,
      "loss": 3.3098,
      "step": 14941
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3331588506698608,
      "learning_rate": 0.0005937951764864179,
      "loss": 3.2711,
      "step": 14942
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.827863097190857,
      "learning_rate": 0.0005937943488127791,
      "loss": 3.2167,
      "step": 14943
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.418007493019104,
      "learning_rate": 0.0005937935210845185,
      "loss": 3.0782,
      "step": 14944
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.190931797027588,
      "learning_rate": 0.0005937926933016361,
      "loss": 3.2532,
      "step": 14945
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.911847472190857,
      "learning_rate": 0.0005937918654641321,
      "loss": 2.9719,
      "step": 14946
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.742546796798706,
      "learning_rate": 0.0005937910375720068,
      "loss": 3.2766,
      "step": 14947
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.309910774230957,
      "learning_rate": 0.0005937902096252599,
      "loss": 3.041,
      "step": 14948
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.6392459869384766,
      "learning_rate": 0.0005937893816238922,
      "loss": 3.3595,
      "step": 14949
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5345494747161865,
      "learning_rate": 0.0005937885535679033,
      "loss": 2.8949,
      "step": 14950
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.9344935417175293,
      "learning_rate": 0.0005937877254572935,
      "loss": 3.2374,
      "step": 14951
    },
    {
      "epoch": 0.19,
      "grad_norm": 2.5431435108184814,
      "learning_rate": 0.0005937868972920631,
      "loss": 2.9953,
      "step": 14952
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8437854051589966,
      "learning_rate": 0.0005937860690722123,
      "loss": 3.1019,
      "step": 14953
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.605376958847046,
      "learning_rate": 0.0005937852407977409,
      "loss": 3.353,
      "step": 14954
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.597421646118164,
      "learning_rate": 0.0005937844124686494,
      "loss": 3.0393,
      "step": 14955
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.8261783123016357,
      "learning_rate": 0.0005937835840849377,
      "loss": 2.9433,
      "step": 14956
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.564260721206665,
      "learning_rate": 0.0005937827556466062,
      "loss": 3.2037,
      "step": 14957
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4361997842788696,
      "learning_rate": 0.000593781927153655,
      "loss": 3.2921,
      "step": 14958
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4147740602493286,
      "learning_rate": 0.0005937810986060842,
      "loss": 3.0048,
      "step": 14959
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4389499425888062,
      "learning_rate": 0.0005937802700038938,
      "loss": 3.038,
      "step": 14960
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5354926586151123,
      "learning_rate": 0.0005937794413470842,
      "loss": 3.0621,
      "step": 14961
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5025010108947754,
      "learning_rate": 0.0005937786126356554,
      "loss": 3.3522,
      "step": 14962
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3984705209732056,
      "learning_rate": 0.0005937777838696077,
      "loss": 2.9833,
      "step": 14963
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5112268924713135,
      "learning_rate": 0.000593776955048941,
      "loss": 3.1264,
      "step": 14964
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5430793762207031,
      "learning_rate": 0.0005937761261736559,
      "loss": 3.2876,
      "step": 14965
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4040203094482422,
      "learning_rate": 0.000593775297243752,
      "loss": 3.1036,
      "step": 14966
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.7647885084152222,
      "learning_rate": 0.0005937744682592298,
      "loss": 3.5021,
      "step": 14967
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.4528402090072632,
      "learning_rate": 0.0005937736392200894,
      "loss": 3.1615,
      "step": 14968
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5241222381591797,
      "learning_rate": 0.000593772810126331,
      "loss": 3.0892,
      "step": 14969
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.212895393371582,
      "learning_rate": 0.0005937719809779547,
      "loss": 2.9508,
      "step": 14970
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.587853193283081,
      "learning_rate": 0.0005937711517749605,
      "loss": 3.0164,
      "step": 14971
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3767215013504028,
      "learning_rate": 0.0005937703225173488,
      "loss": 2.9335,
      "step": 14972
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.3362890481948853,
      "learning_rate": 0.0005937694932051196,
      "loss": 3.057,
      "step": 14973
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6001765727996826,
      "learning_rate": 0.0005937686638382731,
      "loss": 3.3485,
      "step": 14974
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.6765480041503906,
      "learning_rate": 0.0005937678344168096,
      "loss": 3.3602,
      "step": 14975
    },
    {
      "epoch": 0.19,
      "grad_norm": 1.5263187885284424,
      "learning_rate": 0.000593767004940729,
      "loss": 3.1301,
      "step": 14976
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8504226207733154,
      "learning_rate": 0.0005937661754100315,
      "loss": 3.2101,
      "step": 14977
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6932802200317383,
      "learning_rate": 0.0005937653458247174,
      "loss": 3.0533,
      "step": 14978
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7093302011489868,
      "learning_rate": 0.0005937645161847867,
      "loss": 3.0445,
      "step": 14979
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5936497449874878,
      "learning_rate": 0.0005937636864902398,
      "loss": 3.0631,
      "step": 14980
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5534149408340454,
      "learning_rate": 0.0005937628567410766,
      "loss": 3.0092,
      "step": 14981
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4271557331085205,
      "learning_rate": 0.0005937620269372973,
      "loss": 3.1998,
      "step": 14982
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6587271690368652,
      "learning_rate": 0.0005937611970789021,
      "loss": 3.0349,
      "step": 14983
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5518089532852173,
      "learning_rate": 0.0005937603671658913,
      "loss": 2.9766,
      "step": 14984
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.2309772968292236,
      "learning_rate": 0.0005937595371982647,
      "loss": 3.071,
      "step": 14985
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.577109694480896,
      "learning_rate": 0.0005937587071760227,
      "loss": 3.1338,
      "step": 14986
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.504019856452942,
      "learning_rate": 0.0005937578770991655,
      "loss": 3.021,
      "step": 14987
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4372289180755615,
      "learning_rate": 0.0005937570469676931,
      "loss": 3.1206,
      "step": 14988
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3629796504974365,
      "learning_rate": 0.0005937562167816057,
      "loss": 3.0349,
      "step": 14989
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.1842930316925049,
      "learning_rate": 0.0005937553865409035,
      "loss": 3.1786,
      "step": 14990
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8676313161849976,
      "learning_rate": 0.0005937545562455866,
      "loss": 3.5058,
      "step": 14991
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.8157575130462646,
      "learning_rate": 0.0005937537258956552,
      "loss": 3.2227,
      "step": 14992
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8888827562332153,
      "learning_rate": 0.0005937528954911096,
      "loss": 3.3302,
      "step": 14993
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.4704487323760986,
      "learning_rate": 0.0005937520650319496,
      "loss": 3.231,
      "step": 14994
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.62911856174469,
      "learning_rate": 0.0005937512345181756,
      "loss": 2.8468,
      "step": 14995
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2989581823349,
      "learning_rate": 0.0005937504039497877,
      "loss": 3.2667,
      "step": 14996
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3025182485580444,
      "learning_rate": 0.000593749573326786,
      "loss": 3.1386,
      "step": 14997
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.422670602798462,
      "learning_rate": 0.0005937487426491707,
      "loss": 3.2427,
      "step": 14998
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.989649772644043,
      "learning_rate": 0.0005937479119169421,
      "loss": 3.2275,
      "step": 14999
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4851608276367188,
      "learning_rate": 0.0005937470811301001,
      "loss": 3.3219,
      "step": 15000
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4575166702270508,
      "learning_rate": 0.000593746250288645,
      "loss": 3.1835,
      "step": 15001
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.605485200881958,
      "learning_rate": 0.0005937454193925769,
      "loss": 3.0449,
      "step": 15002
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4873862266540527,
      "learning_rate": 0.0005937445884418961,
      "loss": 3.3338,
      "step": 15003
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.749659538269043,
      "learning_rate": 0.0005937437574366025,
      "loss": 3.0078,
      "step": 15004
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5565353631973267,
      "learning_rate": 0.0005937429263766965,
      "loss": 3.1736,
      "step": 15005
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4408522844314575,
      "learning_rate": 0.0005937420952621781,
      "loss": 3.0097,
      "step": 15006
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4770931005477905,
      "learning_rate": 0.0005937412640930475,
      "loss": 3.335,
      "step": 15007
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3803372383117676,
      "learning_rate": 0.0005937404328693048,
      "loss": 3.0519,
      "step": 15008
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.1654186248779297,
      "learning_rate": 0.0005937396015909503,
      "loss": 3.305,
      "step": 15009
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4879639148712158,
      "learning_rate": 0.000593738770257984,
      "loss": 3.1048,
      "step": 15010
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3596985340118408,
      "learning_rate": 0.0005937379388704061,
      "loss": 3.0631,
      "step": 15011
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.485254168510437,
      "learning_rate": 0.0005937371074282168,
      "loss": 3.2703,
      "step": 15012
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8412063121795654,
      "learning_rate": 0.0005937362759314163,
      "loss": 3.3185,
      "step": 15013
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5628691911697388,
      "learning_rate": 0.0005937354443800045,
      "loss": 3.1577,
      "step": 15014
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7084850072860718,
      "learning_rate": 0.0005937346127739819,
      "loss": 3.2428,
      "step": 15015
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7929906845092773,
      "learning_rate": 0.0005937337811133484,
      "loss": 3.2629,
      "step": 15016
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6268264055252075,
      "learning_rate": 0.0005937329493981043,
      "loss": 3.323,
      "step": 15017
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4487870931625366,
      "learning_rate": 0.0005937321176282497,
      "loss": 3.217,
      "step": 15018
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.120056629180908,
      "learning_rate": 0.0005937312858037848,
      "loss": 3.2553,
      "step": 15019
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5874521732330322,
      "learning_rate": 0.0005937304539247096,
      "loss": 3.047,
      "step": 15020
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5053586959838867,
      "learning_rate": 0.0005937296219910244,
      "loss": 3.026,
      "step": 15021
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6525765657424927,
      "learning_rate": 0.0005937287900027294,
      "loss": 3.1333,
      "step": 15022
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6509928703308105,
      "learning_rate": 0.0005937279579598246,
      "loss": 3.102,
      "step": 15023
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3727350234985352,
      "learning_rate": 0.0005937271258623102,
      "loss": 3.2673,
      "step": 15024
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5836557149887085,
      "learning_rate": 0.0005937262937101864,
      "loss": 3.3169,
      "step": 15025
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8433252573013306,
      "learning_rate": 0.0005937254615034534,
      "loss": 3.199,
      "step": 15026
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.637755274772644,
      "learning_rate": 0.0005937246292421112,
      "loss": 2.9572,
      "step": 15027
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6130934953689575,
      "learning_rate": 0.0005937237969261601,
      "loss": 3.1627,
      "step": 15028
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6714355945587158,
      "learning_rate": 0.0005937229645556002,
      "loss": 3.0092,
      "step": 15029
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5105382204055786,
      "learning_rate": 0.0005937221321304317,
      "loss": 3.2403,
      "step": 15030
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7117996215820312,
      "learning_rate": 0.0005937212996506546,
      "loss": 2.9144,
      "step": 15031
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4359062910079956,
      "learning_rate": 0.0005937204671162693,
      "loss": 3.0977,
      "step": 15032
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7552826404571533,
      "learning_rate": 0.0005937196345272757,
      "loss": 3.4868,
      "step": 15033
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.549424171447754,
      "learning_rate": 0.0005937188018836742,
      "loss": 3.0921,
      "step": 15034
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.2497546672821045,
      "learning_rate": 0.0005937179691854647,
      "loss": 2.9266,
      "step": 15035
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5928720235824585,
      "learning_rate": 0.0005937171364326476,
      "loss": 3.0453,
      "step": 15036
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6227993965148926,
      "learning_rate": 0.0005937163036252229,
      "loss": 3.1867,
      "step": 15037
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.3135859966278076,
      "learning_rate": 0.0005937154707631908,
      "loss": 3.1326,
      "step": 15038
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7573519945144653,
      "learning_rate": 0.0005937146378465514,
      "loss": 3.2458,
      "step": 15039
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5393145084381104,
      "learning_rate": 0.0005937138048753049,
      "loss": 3.0267,
      "step": 15040
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9239228963851929,
      "learning_rate": 0.0005937129718494515,
      "loss": 3.2354,
      "step": 15041
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.1257667541503906,
      "learning_rate": 0.0005937121387689912,
      "loss": 3.0793,
      "step": 15042
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4792274236679077,
      "learning_rate": 0.0005937113056339244,
      "loss": 3.4878,
      "step": 15043
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7499679327011108,
      "learning_rate": 0.0005937104724442513,
      "loss": 2.9981,
      "step": 15044
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6200354099273682,
      "learning_rate": 0.0005937096391999715,
      "loss": 3.2719,
      "step": 15045
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.573456883430481,
      "learning_rate": 0.0005937088059010857,
      "loss": 2.9687,
      "step": 15046
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7642515897750854,
      "learning_rate": 0.000593707972547594,
      "loss": 3.0824,
      "step": 15047
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6337980031967163,
      "learning_rate": 0.0005937071391394963,
      "loss": 2.8606,
      "step": 15048
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4060323238372803,
      "learning_rate": 0.000593706305676793,
      "loss": 3.2672,
      "step": 15049
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.45207679271698,
      "learning_rate": 0.0005937054721594841,
      "loss": 3.1477,
      "step": 15050
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4910950660705566,
      "learning_rate": 0.0005937046385875698,
      "loss": 3.1103,
      "step": 15051
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.0878331661224365,
      "learning_rate": 0.0005937038049610502,
      "loss": 3.3005,
      "step": 15052
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.282888412475586,
      "learning_rate": 0.0005937029712799256,
      "loss": 3.2293,
      "step": 15053
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4664881229400635,
      "learning_rate": 0.0005937021375441961,
      "loss": 2.9881,
      "step": 15054
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.39100182056427,
      "learning_rate": 0.0005937013037538618,
      "loss": 3.2457,
      "step": 15055
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.308977484703064,
      "learning_rate": 0.0005937004699089228,
      "loss": 3.1208,
      "step": 15056
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.396914005279541,
      "learning_rate": 0.0005936996360093794,
      "loss": 3.3598,
      "step": 15057
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.429006576538086,
      "learning_rate": 0.0005936988020552318,
      "loss": 3.0778,
      "step": 15058
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3753539323806763,
      "learning_rate": 0.0005936979680464799,
      "loss": 3.2525,
      "step": 15059
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4510509967803955,
      "learning_rate": 0.0005936971339831241,
      "loss": 3.1038,
      "step": 15060
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6986397504806519,
      "learning_rate": 0.0005936962998651644,
      "loss": 3.1268,
      "step": 15061
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5681939125061035,
      "learning_rate": 0.000593695465692601,
      "loss": 3.2262,
      "step": 15062
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7425190210342407,
      "learning_rate": 0.0005936946314654342,
      "loss": 3.157,
      "step": 15063
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6139487028121948,
      "learning_rate": 0.0005936937971836638,
      "loss": 3.0266,
      "step": 15064
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2664377689361572,
      "learning_rate": 0.0005936929628472903,
      "loss": 3.0749,
      "step": 15065
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7809922695159912,
      "learning_rate": 0.0005936921284563138,
      "loss": 3.1525,
      "step": 15066
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.0006918907165527,
      "learning_rate": 0.0005936912940107343,
      "loss": 3.1106,
      "step": 15067
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5367026329040527,
      "learning_rate": 0.000593690459510552,
      "loss": 3.1551,
      "step": 15068
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.106508493423462,
      "learning_rate": 0.0005936896249557673,
      "loss": 3.1728,
      "step": 15069
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.3235697746276855,
      "learning_rate": 0.00059368879034638,
      "loss": 3.1155,
      "step": 15070
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.0167744159698486,
      "learning_rate": 0.0005936879556823904,
      "loss": 3.0177,
      "step": 15071
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.5070693492889404,
      "learning_rate": 0.0005936871209637987,
      "loss": 2.9006,
      "step": 15072
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.188502311706543,
      "learning_rate": 0.000593686286190605,
      "loss": 3.2365,
      "step": 15073
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.825456142425537,
      "learning_rate": 0.0005936854513628096,
      "loss": 3.0071,
      "step": 15074
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4451230764389038,
      "learning_rate": 0.0005936846164804123,
      "loss": 3.0879,
      "step": 15075
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.804462194442749,
      "learning_rate": 0.0005936837815434138,
      "loss": 3.1773,
      "step": 15076
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.6064608097076416,
      "learning_rate": 0.0005936829465518137,
      "loss": 3.2023,
      "step": 15077
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.3157408237457275,
      "learning_rate": 0.0005936821115056124,
      "loss": 2.9863,
      "step": 15078
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5194746255874634,
      "learning_rate": 0.00059368127640481,
      "loss": 3.0918,
      "step": 15079
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.648454427719116,
      "learning_rate": 0.0005936804412494069,
      "loss": 2.8977,
      "step": 15080
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.0423285961151123,
      "learning_rate": 0.0005936796060394031,
      "loss": 3.2317,
      "step": 15081
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8310712575912476,
      "learning_rate": 0.0005936787707747985,
      "loss": 3.2464,
      "step": 15082
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8856406211853027,
      "learning_rate": 0.0005936779354555935,
      "loss": 2.9065,
      "step": 15083
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.2199442386627197,
      "learning_rate": 0.0005936771000817884,
      "loss": 3.391,
      "step": 15084
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.584946870803833,
      "learning_rate": 0.000593676264653383,
      "loss": 3.0032,
      "step": 15085
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3168946504592896,
      "learning_rate": 0.0005936754291703777,
      "loss": 3.2163,
      "step": 15086
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3220878839492798,
      "learning_rate": 0.0005936745936327726,
      "loss": 3.0766,
      "step": 15087
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5933712720870972,
      "learning_rate": 0.0005936737580405679,
      "loss": 3.0414,
      "step": 15088
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6130462884902954,
      "learning_rate": 0.0005936729223937636,
      "loss": 3.1248,
      "step": 15089
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5841542482376099,
      "learning_rate": 0.0005936720866923601,
      "loss": 3.1728,
      "step": 15090
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5949889421463013,
      "learning_rate": 0.0005936712509363573,
      "loss": 3.2509,
      "step": 15091
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4035109281539917,
      "learning_rate": 0.0005936704151257555,
      "loss": 3.2159,
      "step": 15092
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7622185945510864,
      "learning_rate": 0.0005936695792605549,
      "loss": 3.3848,
      "step": 15093
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5619322061538696,
      "learning_rate": 0.0005936687433407555,
      "loss": 3.1829,
      "step": 15094
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.504457950592041,
      "learning_rate": 0.0005936679073663576,
      "loss": 3.1245,
      "step": 15095
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.0757710933685303,
      "learning_rate": 0.0005936670713373611,
      "loss": 2.9964,
      "step": 15096
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.671321153640747,
      "learning_rate": 0.0005936662352537666,
      "loss": 2.9608,
      "step": 15097
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6372761726379395,
      "learning_rate": 0.0005936653991155739,
      "loss": 3.2597,
      "step": 15098
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.619189977645874,
      "learning_rate": 0.0005936645629227832,
      "loss": 3.0982,
      "step": 15099
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.039358377456665,
      "learning_rate": 0.0005936637266753948,
      "loss": 3.3142,
      "step": 15100
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8188111782073975,
      "learning_rate": 0.0005936628903734087,
      "loss": 3.2063,
      "step": 15101
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.010101556777954,
      "learning_rate": 0.0005936620540168252,
      "loss": 3.018,
      "step": 15102
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.464073657989502,
      "learning_rate": 0.0005936612176056443,
      "loss": 3.1012,
      "step": 15103
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3543866872787476,
      "learning_rate": 0.0005936603811398663,
      "loss": 3.2699,
      "step": 15104
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.559232234954834,
      "learning_rate": 0.0005936595446194914,
      "loss": 3.083,
      "step": 15105
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2728158235549927,
      "learning_rate": 0.0005936587080445194,
      "loss": 3.0967,
      "step": 15106
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6246768236160278,
      "learning_rate": 0.0005936578714149509,
      "loss": 3.096,
      "step": 15107
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2739195823669434,
      "learning_rate": 0.0005936570347307858,
      "loss": 3.372,
      "step": 15108
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6851849555969238,
      "learning_rate": 0.0005936561979920243,
      "loss": 3.0486,
      "step": 15109
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9158153533935547,
      "learning_rate": 0.0005936553611986665,
      "loss": 3.1623,
      "step": 15110
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3923910856246948,
      "learning_rate": 0.0005936545243507127,
      "loss": 3.1533,
      "step": 15111
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3653969764709473,
      "learning_rate": 0.000593653687448163,
      "loss": 3.1784,
      "step": 15112
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.648116111755371,
      "learning_rate": 0.0005936528504910175,
      "loss": 3.2574,
      "step": 15113
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6572215557098389,
      "learning_rate": 0.0005936520134792765,
      "loss": 3.1288,
      "step": 15114
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.363384485244751,
      "learning_rate": 0.0005936511764129399,
      "loss": 3.3693,
      "step": 15115
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7066752910614014,
      "learning_rate": 0.000593650339292008,
      "loss": 3.4065,
      "step": 15116
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7628753185272217,
      "learning_rate": 0.000593649502116481,
      "loss": 3.2184,
      "step": 15117
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5653860569000244,
      "learning_rate": 0.0005936486648863591,
      "loss": 3.0023,
      "step": 15118
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7314739227294922,
      "learning_rate": 0.0005936478276016423,
      "loss": 3.2428,
      "step": 15119
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3238343000411987,
      "learning_rate": 0.0005936469902623309,
      "loss": 2.9489,
      "step": 15120
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4452786445617676,
      "learning_rate": 0.0005936461528684248,
      "loss": 3.0597,
      "step": 15121
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5339076519012451,
      "learning_rate": 0.0005936453154199245,
      "loss": 2.9623,
      "step": 15122
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4466453790664673,
      "learning_rate": 0.0005936444779168299,
      "loss": 2.985,
      "step": 15123
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5176328420639038,
      "learning_rate": 0.0005936436403591413,
      "loss": 3.2083,
      "step": 15124
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.090510129928589,
      "learning_rate": 0.0005936428027468587,
      "loss": 3.016,
      "step": 15125
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5418511629104614,
      "learning_rate": 0.0005936419650799824,
      "loss": 3.0983,
      "step": 15126
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.3909034729003906,
      "learning_rate": 0.0005936411273585126,
      "loss": 3.2241,
      "step": 15127
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7709819078445435,
      "learning_rate": 0.0005936402895824494,
      "loss": 3.0084,
      "step": 15128
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3398246765136719,
      "learning_rate": 0.0005936394517517928,
      "loss": 3.0153,
      "step": 15129
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8497437238693237,
      "learning_rate": 0.0005936386138665432,
      "loss": 2.9587,
      "step": 15130
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8840097188949585,
      "learning_rate": 0.0005936377759267005,
      "loss": 2.9323,
      "step": 15131
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6851731538772583,
      "learning_rate": 0.0005936369379322651,
      "loss": 3.0285,
      "step": 15132
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.7818338871002197,
      "learning_rate": 0.0005936360998832371,
      "loss": 3.0181,
      "step": 15133
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.1313185691833496,
      "learning_rate": 0.0005936352617796166,
      "loss": 3.2306,
      "step": 15134
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3679146766662598,
      "learning_rate": 0.0005936344236214036,
      "loss": 3.4904,
      "step": 15135
    },
    {
      "epoch": 0.2,
      "grad_norm": 4.0328497886657715,
      "learning_rate": 0.0005936335854085986,
      "loss": 2.7606,
      "step": 15136
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.835384726524353,
      "learning_rate": 0.0005936327471412014,
      "loss": 3.03,
      "step": 15137
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6039186716079712,
      "learning_rate": 0.0005936319088192124,
      "loss": 3.1734,
      "step": 15138
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.6839535236358643,
      "learning_rate": 0.0005936310704426318,
      "loss": 3.0239,
      "step": 15139
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.015052080154419,
      "learning_rate": 0.0005936302320114596,
      "loss": 3.0321,
      "step": 15140
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7657135725021362,
      "learning_rate": 0.0005936293935256958,
      "loss": 3.1215,
      "step": 15141
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5613808631896973,
      "learning_rate": 0.0005936285549853409,
      "loss": 3.1105,
      "step": 15142
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3143465518951416,
      "learning_rate": 0.0005936277163903949,
      "loss": 2.9133,
      "step": 15143
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8084540367126465,
      "learning_rate": 0.0005936268777408581,
      "loss": 3.313,
      "step": 15144
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5689514875411987,
      "learning_rate": 0.0005936260390367303,
      "loss": 3.2291,
      "step": 15145
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.298512578010559,
      "learning_rate": 0.000593625200278012,
      "loss": 3.2667,
      "step": 15146
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.871017336845398,
      "learning_rate": 0.0005936243614647032,
      "loss": 3.0913,
      "step": 15147
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.003143310546875,
      "learning_rate": 0.0005936235225968041,
      "loss": 3.1548,
      "step": 15148
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4025459289550781,
      "learning_rate": 0.0005936226836743148,
      "loss": 3.2316,
      "step": 15149
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7457491159439087,
      "learning_rate": 0.0005936218446972356,
      "loss": 3.1255,
      "step": 15150
    },
    {
      "epoch": 0.2,
      "grad_norm": 4.634922027587891,
      "learning_rate": 0.0005936210056655664,
      "loss": 3.1742,
      "step": 15151
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.7136175632476807,
      "learning_rate": 0.0005936201665793076,
      "loss": 3.0198,
      "step": 15152
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.428795337677002,
      "learning_rate": 0.0005936193274384594,
      "loss": 3.247,
      "step": 15153
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.702043890953064,
      "learning_rate": 0.0005936184882430216,
      "loss": 3.1116,
      "step": 15154
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.632704019546509,
      "learning_rate": 0.0005936176489929946,
      "loss": 3.3342,
      "step": 15155
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4643911123275757,
      "learning_rate": 0.0005936168096883787,
      "loss": 3.115,
      "step": 15156
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.800276517868042,
      "learning_rate": 0.0005936159703291738,
      "loss": 2.7522,
      "step": 15157
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.103961229324341,
      "learning_rate": 0.0005936151309153801,
      "loss": 2.895,
      "step": 15158
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3388280868530273,
      "learning_rate": 0.000593614291446998,
      "loss": 3.1523,
      "step": 15159
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5696039199829102,
      "learning_rate": 0.0005936134519240272,
      "loss": 2.9322,
      "step": 15160
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4486769437789917,
      "learning_rate": 0.0005936126123464682,
      "loss": 3.1605,
      "step": 15161
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6915780305862427,
      "learning_rate": 0.0005936117727143212,
      "loss": 2.65,
      "step": 15162
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5531893968582153,
      "learning_rate": 0.0005936109330275861,
      "loss": 3.0478,
      "step": 15163
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6856437921524048,
      "learning_rate": 0.0005936100932862632,
      "loss": 3.1505,
      "step": 15164
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4729136228561401,
      "learning_rate": 0.0005936092534903528,
      "loss": 2.8584,
      "step": 15165
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3023731708526611,
      "learning_rate": 0.0005936084136398547,
      "loss": 3.0844,
      "step": 15166
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7417516708374023,
      "learning_rate": 0.0005936075737347693,
      "loss": 3.0971,
      "step": 15167
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4297269582748413,
      "learning_rate": 0.0005936067337750967,
      "loss": 3.1921,
      "step": 15168
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.468930959701538,
      "learning_rate": 0.0005936058937608371,
      "loss": 3.2467,
      "step": 15169
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5515012741088867,
      "learning_rate": 0.0005936050536919906,
      "loss": 2.7605,
      "step": 15170
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5435283184051514,
      "learning_rate": 0.0005936042135685573,
      "loss": 2.9349,
      "step": 15171
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.2622153759002686,
      "learning_rate": 0.0005936033733905376,
      "loss": 3.2567,
      "step": 15172
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4186047315597534,
      "learning_rate": 0.0005936025331579315,
      "loss": 3.4635,
      "step": 15173
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.451432466506958,
      "learning_rate": 0.000593601692870739,
      "loss": 3.27,
      "step": 15174
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4727504253387451,
      "learning_rate": 0.0005936008525289605,
      "loss": 3.2931,
      "step": 15175
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6757134199142456,
      "learning_rate": 0.0005936000121325959,
      "loss": 3.2104,
      "step": 15176
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5397783517837524,
      "learning_rate": 0.0005935991716816458,
      "loss": 3.1304,
      "step": 15177
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8068511486053467,
      "learning_rate": 0.0005935983311761098,
      "loss": 3.0452,
      "step": 15178
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4688271284103394,
      "learning_rate": 0.0005935974906159886,
      "loss": 3.1976,
      "step": 15179
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.2704906463623047,
      "learning_rate": 0.0005935966500012818,
      "loss": 3.089,
      "step": 15180
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9814754724502563,
      "learning_rate": 0.00059359580933199,
      "loss": 3.0185,
      "step": 15181
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5001777410507202,
      "learning_rate": 0.0005935949686081132,
      "loss": 2.9168,
      "step": 15182
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7524003982543945,
      "learning_rate": 0.0005935941278296515,
      "loss": 3.1954,
      "step": 15183
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4994220733642578,
      "learning_rate": 0.0005935932869966051,
      "loss": 3.1388,
      "step": 15184
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4193592071533203,
      "learning_rate": 0.0005935924461089742,
      "loss": 3.0316,
      "step": 15185
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7514538764953613,
      "learning_rate": 0.0005935916051667589,
      "loss": 3.0712,
      "step": 15186
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.734724521636963,
      "learning_rate": 0.0005935907641699595,
      "loss": 3.2835,
      "step": 15187
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5965830087661743,
      "learning_rate": 0.0005935899231185759,
      "loss": 3.0923,
      "step": 15188
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7228630781173706,
      "learning_rate": 0.0005935890820126083,
      "loss": 3.0005,
      "step": 15189
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3551685810089111,
      "learning_rate": 0.0005935882408520572,
      "loss": 3.2036,
      "step": 15190
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4770561456680298,
      "learning_rate": 0.0005935873996369223,
      "loss": 3.2229,
      "step": 15191
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5136207342147827,
      "learning_rate": 0.0005935865583672041,
      "loss": 3.0806,
      "step": 15192
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7977542877197266,
      "learning_rate": 0.0005935857170429026,
      "loss": 3.1314,
      "step": 15193
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6483882665634155,
      "learning_rate": 0.0005935848756640178,
      "loss": 3.054,
      "step": 15194
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3465949296951294,
      "learning_rate": 0.0005935840342305502,
      "loss": 3.3682,
      "step": 15195
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.0370001792907715,
      "learning_rate": 0.0005935831927424997,
      "loss": 3.1317,
      "step": 15196
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.460820198059082,
      "learning_rate": 0.0005935823511998667,
      "loss": 3.0086,
      "step": 15197
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.888739824295044,
      "learning_rate": 0.000593581509602651,
      "loss": 3.1992,
      "step": 15198
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5211350917816162,
      "learning_rate": 0.0005935806679508531,
      "loss": 3.2849,
      "step": 15199
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6580151319503784,
      "learning_rate": 0.0005935798262444729,
      "loss": 2.984,
      "step": 15200
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.472444772720337,
      "learning_rate": 0.0005935789844835107,
      "loss": 3.2748,
      "step": 15201
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4061425924301147,
      "learning_rate": 0.0005935781426679666,
      "loss": 3.1047,
      "step": 15202
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.672924280166626,
      "learning_rate": 0.0005935773007978408,
      "loss": 3.2705,
      "step": 15203
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5157521963119507,
      "learning_rate": 0.0005935764588731336,
      "loss": 3.0211,
      "step": 15204
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4845820665359497,
      "learning_rate": 0.0005935756168938448,
      "loss": 2.9306,
      "step": 15205
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.488364338874817,
      "learning_rate": 0.0005935747748599748,
      "loss": 3.1619,
      "step": 15206
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3318895101547241,
      "learning_rate": 0.0005935739327715237,
      "loss": 2.9769,
      "step": 15207
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5704329013824463,
      "learning_rate": 0.0005935730906284917,
      "loss": 3.2805,
      "step": 15208
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.387057900428772,
      "learning_rate": 0.0005935722484308788,
      "loss": 3.1254,
      "step": 15209
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.340935230255127,
      "learning_rate": 0.0005935714061786855,
      "loss": 3.2008,
      "step": 15210
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.610520601272583,
      "learning_rate": 0.0005935705638719116,
      "loss": 3.0063,
      "step": 15211
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.446162462234497,
      "learning_rate": 0.0005935697215105573,
      "loss": 3.3235,
      "step": 15212
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.1081230640411377,
      "learning_rate": 0.000593568879094623,
      "loss": 2.9397,
      "step": 15213
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.4792773723602295,
      "learning_rate": 0.0005935680366241087,
      "loss": 3.1448,
      "step": 15214
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6988732814788818,
      "learning_rate": 0.0005935671940990145,
      "loss": 3.2054,
      "step": 15215
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.898886799812317,
      "learning_rate": 0.0005935663515193406,
      "loss": 2.9757,
      "step": 15216
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.441100835800171,
      "learning_rate": 0.0005935655088850873,
      "loss": 3.0974,
      "step": 15217
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9936268329620361,
      "learning_rate": 0.0005935646661962544,
      "loss": 3.0349,
      "step": 15218
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9183803796768188,
      "learning_rate": 0.0005935638234528426,
      "loss": 3.2215,
      "step": 15219
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3336271047592163,
      "learning_rate": 0.0005935629806548514,
      "loss": 3.2527,
      "step": 15220
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8752505779266357,
      "learning_rate": 0.0005935621378022816,
      "loss": 3.0374,
      "step": 15221
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4076284170150757,
      "learning_rate": 0.0005935612948951329,
      "loss": 3.251,
      "step": 15222
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4247013330459595,
      "learning_rate": 0.0005935604519334057,
      "loss": 3.0313,
      "step": 15223
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3904107809066772,
      "learning_rate": 0.0005935596089170999,
      "loss": 3.2778,
      "step": 15224
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.514520287513733,
      "learning_rate": 0.000593558765846216,
      "loss": 3.1235,
      "step": 15225
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.5233335494995117,
      "learning_rate": 0.000593557922720754,
      "loss": 3.0853,
      "step": 15226
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.5035834312438965,
      "learning_rate": 0.0005935570795407138,
      "loss": 3.3932,
      "step": 15227
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.336977005004883,
      "learning_rate": 0.000593556236306096,
      "loss": 3.2419,
      "step": 15228
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.6579554080963135,
      "learning_rate": 0.0005935553930169006,
      "loss": 2.9571,
      "step": 15229
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6392478942871094,
      "learning_rate": 0.0005935545496731275,
      "loss": 3.0954,
      "step": 15230
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5868849754333496,
      "learning_rate": 0.0005935537062747772,
      "loss": 3.2835,
      "step": 15231
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5020077228546143,
      "learning_rate": 0.0005935528628218498,
      "loss": 3.0201,
      "step": 15232
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.492523431777954,
      "learning_rate": 0.0005935520193143453,
      "loss": 3.0768,
      "step": 15233
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5388000011444092,
      "learning_rate": 0.0005935511757522639,
      "loss": 3.0078,
      "step": 15234
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.917170763015747,
      "learning_rate": 0.0005935503321356058,
      "loss": 3.2105,
      "step": 15235
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.585463047027588,
      "learning_rate": 0.0005935494884643712,
      "loss": 2.9785,
      "step": 15236
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5900511741638184,
      "learning_rate": 0.0005935486447385602,
      "loss": 2.9283,
      "step": 15237
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7898932695388794,
      "learning_rate": 0.000593547800958173,
      "loss": 3.1001,
      "step": 15238
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4361107349395752,
      "learning_rate": 0.0005935469571232097,
      "loss": 3.1623,
      "step": 15239
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3464797735214233,
      "learning_rate": 0.0005935461132336703,
      "loss": 2.9773,
      "step": 15240
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5467373132705688,
      "learning_rate": 0.0005935452692895554,
      "loss": 2.96,
      "step": 15241
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5339646339416504,
      "learning_rate": 0.0005935444252908648,
      "loss": 3.2849,
      "step": 15242
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8973779678344727,
      "learning_rate": 0.0005935435812375987,
      "loss": 3.2332,
      "step": 15243
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8473902940750122,
      "learning_rate": 0.0005935427371297574,
      "loss": 3.2458,
      "step": 15244
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9549494981765747,
      "learning_rate": 0.0005935418929673409,
      "loss": 3.1689,
      "step": 15245
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5898725986480713,
      "learning_rate": 0.0005935410487503494,
      "loss": 2.9984,
      "step": 15246
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3870913982391357,
      "learning_rate": 0.0005935402044787831,
      "loss": 3.23,
      "step": 15247
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6869118213653564,
      "learning_rate": 0.0005935393601526422,
      "loss": 3.3044,
      "step": 15248
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4993565082550049,
      "learning_rate": 0.0005935385157719267,
      "loss": 3.1514,
      "step": 15249
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.395547866821289,
      "learning_rate": 0.0005935376713366369,
      "loss": 3.2725,
      "step": 15250
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7383606433868408,
      "learning_rate": 0.0005935368268467729,
      "loss": 3.1955,
      "step": 15251
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6614415645599365,
      "learning_rate": 0.0005935359823023348,
      "loss": 2.7926,
      "step": 15252
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.105863571166992,
      "learning_rate": 0.0005935351377033229,
      "loss": 3.1111,
      "step": 15253
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5628021955490112,
      "learning_rate": 0.0005935342930497372,
      "loss": 3.1477,
      "step": 15254
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2914865016937256,
      "learning_rate": 0.0005935334483415781,
      "loss": 3.0381,
      "step": 15255
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5517441034317017,
      "learning_rate": 0.0005935326035788455,
      "loss": 2.9405,
      "step": 15256
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2916046380996704,
      "learning_rate": 0.0005935317587615395,
      "loss": 3.049,
      "step": 15257
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7502833604812622,
      "learning_rate": 0.0005935309138896607,
      "loss": 3.063,
      "step": 15258
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4034420251846313,
      "learning_rate": 0.0005935300689632087,
      "loss": 3.1581,
      "step": 15259
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8171007633209229,
      "learning_rate": 0.000593529223982184,
      "loss": 3.2992,
      "step": 15260
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.590118169784546,
      "learning_rate": 0.0005935283789465867,
      "loss": 3.2455,
      "step": 15261
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4269330501556396,
      "learning_rate": 0.0005935275338564169,
      "loss": 3.2305,
      "step": 15262
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4829902648925781,
      "learning_rate": 0.0005935266887116747,
      "loss": 3.3446,
      "step": 15263
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.458655595779419,
      "learning_rate": 0.0005935258435123606,
      "loss": 3.109,
      "step": 15264
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6634138822555542,
      "learning_rate": 0.0005935249982584744,
      "loss": 2.9825,
      "step": 15265
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.606459140777588,
      "learning_rate": 0.0005935241529500163,
      "loss": 3.0357,
      "step": 15266
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.663874864578247,
      "learning_rate": 0.0005935233075869866,
      "loss": 3.2408,
      "step": 15267
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.2125260829925537,
      "learning_rate": 0.0005935224621693853,
      "loss": 3.3506,
      "step": 15268
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6959377527236938,
      "learning_rate": 0.0005935216166972127,
      "loss": 3.3818,
      "step": 15269
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8317670822143555,
      "learning_rate": 0.0005935207711704688,
      "loss": 3.0479,
      "step": 15270
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6356723308563232,
      "learning_rate": 0.0005935199255891539,
      "loss": 3.4816,
      "step": 15271
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.633176326751709,
      "learning_rate": 0.0005935190799532681,
      "loss": 3.1191,
      "step": 15272
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.454667091369629,
      "learning_rate": 0.0005935182342628115,
      "loss": 3.2317,
      "step": 15273
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4791218042373657,
      "learning_rate": 0.0005935173885177845,
      "loss": 3.0767,
      "step": 15274
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.0623981952667236,
      "learning_rate": 0.0005935165427181869,
      "loss": 3.2456,
      "step": 15275
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6786729097366333,
      "learning_rate": 0.0005935156968640192,
      "loss": 2.9189,
      "step": 15276
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4031907320022583,
      "learning_rate": 0.0005935148509552811,
      "loss": 3.4605,
      "step": 15277
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6350762844085693,
      "learning_rate": 0.0005935140049919734,
      "loss": 3.1019,
      "step": 15278
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.253663182258606,
      "learning_rate": 0.0005935131589740957,
      "loss": 3.1622,
      "step": 15279
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3119556903839111,
      "learning_rate": 0.0005935123129016483,
      "loss": 3.2219,
      "step": 15280
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5151972770690918,
      "learning_rate": 0.0005935114667746316,
      "loss": 2.8378,
      "step": 15281
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4358619451522827,
      "learning_rate": 0.0005935106205930455,
      "loss": 3.0588,
      "step": 15282
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7305854558944702,
      "learning_rate": 0.0005935097743568902,
      "loss": 3.3909,
      "step": 15283
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3355920314788818,
      "learning_rate": 0.000593508928066166,
      "loss": 3.2014,
      "step": 15284
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.408165693283081,
      "learning_rate": 0.0005935080817208728,
      "loss": 3.3043,
      "step": 15285
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5074009895324707,
      "learning_rate": 0.000593507235321011,
      "loss": 3.3731,
      "step": 15286
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.280897617340088,
      "learning_rate": 0.0005935063888665807,
      "loss": 3.1576,
      "step": 15287
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.273177146911621,
      "learning_rate": 0.0005935055423575819,
      "loss": 3.3159,
      "step": 15288
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7624965906143188,
      "learning_rate": 0.000593504695794015,
      "loss": 3.2071,
      "step": 15289
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9930580854415894,
      "learning_rate": 0.00059350384917588,
      "loss": 3.2721,
      "step": 15290
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5358346700668335,
      "learning_rate": 0.0005935030025031771,
      "loss": 3.3222,
      "step": 15291
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8152841329574585,
      "learning_rate": 0.0005935021557759065,
      "loss": 2.995,
      "step": 15292
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.393463373184204,
      "learning_rate": 0.0005935013089940683,
      "loss": 3.3266,
      "step": 15293
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4533441066741943,
      "learning_rate": 0.0005935004621576625,
      "loss": 3.0845,
      "step": 15294
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.607642412185669,
      "learning_rate": 0.0005934996152666895,
      "loss": 3.1336,
      "step": 15295
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4380662441253662,
      "learning_rate": 0.0005934987683211495,
      "loss": 3.045,
      "step": 15296
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5567494630813599,
      "learning_rate": 0.0005934979213210424,
      "loss": 3.2503,
      "step": 15297
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5055532455444336,
      "learning_rate": 0.0005934970742663685,
      "loss": 3.1648,
      "step": 15298
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7669416666030884,
      "learning_rate": 0.0005934962271571281,
      "loss": 3.1789,
      "step": 15299
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7292261123657227,
      "learning_rate": 0.0005934953799933211,
      "loss": 3.2542,
      "step": 15300
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8035894632339478,
      "learning_rate": 0.0005934945327749477,
      "loss": 2.9772,
      "step": 15301
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.596901297569275,
      "learning_rate": 0.0005934936855020082,
      "loss": 2.999,
      "step": 15302
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.2004778385162354,
      "learning_rate": 0.0005934928381745027,
      "loss": 2.9287,
      "step": 15303
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.20363712310791,
      "learning_rate": 0.0005934919907924314,
      "loss": 3.4471,
      "step": 15304
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4903935194015503,
      "learning_rate": 0.0005934911433557943,
      "loss": 3.411,
      "step": 15305
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6172312498092651,
      "learning_rate": 0.0005934902958645917,
      "loss": 3.1123,
      "step": 15306
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.907591462135315,
      "learning_rate": 0.0005934894483188237,
      "loss": 2.8576,
      "step": 15307
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4424093961715698,
      "learning_rate": 0.0005934886007184904,
      "loss": 3.1307,
      "step": 15308
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.624932050704956,
      "learning_rate": 0.0005934877530635922,
      "loss": 3.2085,
      "step": 15309
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3932361602783203,
      "learning_rate": 0.0005934869053541291,
      "loss": 3.1011,
      "step": 15310
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5726649761199951,
      "learning_rate": 0.000593486057590101,
      "loss": 3.1352,
      "step": 15311
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7566415071487427,
      "learning_rate": 0.0005934852097715085,
      "loss": 3.2046,
      "step": 15312
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2365037202835083,
      "learning_rate": 0.0005934843618983515,
      "loss": 3.1502,
      "step": 15313
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7376550436019897,
      "learning_rate": 0.0005934835139706302,
      "loss": 3.1238,
      "step": 15314
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5990087985992432,
      "learning_rate": 0.0005934826659883448,
      "loss": 2.9853,
      "step": 15315
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4146674871444702,
      "learning_rate": 0.0005934818179514954,
      "loss": 3.0746,
      "step": 15316
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5184062719345093,
      "learning_rate": 0.0005934809698600823,
      "loss": 3.2222,
      "step": 15317
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3486926555633545,
      "learning_rate": 0.0005934801217141053,
      "loss": 3.1673,
      "step": 15318
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.84736967086792,
      "learning_rate": 0.000593479273513565,
      "loss": 2.9971,
      "step": 15319
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4151612520217896,
      "learning_rate": 0.0005934784252584614,
      "loss": 3.109,
      "step": 15320
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4543908834457397,
      "learning_rate": 0.0005934775769487945,
      "loss": 3.3435,
      "step": 15321
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.0597474575042725,
      "learning_rate": 0.0005934767285845646,
      "loss": 3.0532,
      "step": 15322
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.315429925918579,
      "learning_rate": 0.0005934758801657719,
      "loss": 3.1659,
      "step": 15323
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3675727844238281,
      "learning_rate": 0.0005934750316924165,
      "loss": 2.9481,
      "step": 15324
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7166730165481567,
      "learning_rate": 0.0005934741831644984,
      "loss": 3.033,
      "step": 15325
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5479817390441895,
      "learning_rate": 0.0005934733345820181,
      "loss": 3.0695,
      "step": 15326
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.301405429840088,
      "learning_rate": 0.0005934724859449755,
      "loss": 3.0689,
      "step": 15327
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6698517799377441,
      "learning_rate": 0.0005934716372533708,
      "loss": 3.0121,
      "step": 15328
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8596736192703247,
      "learning_rate": 0.0005934707885072042,
      "loss": 2.859,
      "step": 15329
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4858629703521729,
      "learning_rate": 0.0005934699397064759,
      "loss": 3.2259,
      "step": 15330
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3946565389633179,
      "learning_rate": 0.0005934690908511858,
      "loss": 3.1988,
      "step": 15331
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8166711330413818,
      "learning_rate": 0.0005934682419413345,
      "loss": 3.1122,
      "step": 15332
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5033756494522095,
      "learning_rate": 0.0005934673929769218,
      "loss": 3.0367,
      "step": 15333
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.0242786407470703,
      "learning_rate": 0.000593466543957948,
      "loss": 3.0037,
      "step": 15334
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4578697681427002,
      "learning_rate": 0.0005934656948844132,
      "loss": 2.9538,
      "step": 15335
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7610220909118652,
      "learning_rate": 0.0005934648457563176,
      "loss": 3.1147,
      "step": 15336
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.2131595611572266,
      "learning_rate": 0.0005934639965736614,
      "loss": 3.0447,
      "step": 15337
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4312819242477417,
      "learning_rate": 0.0005934631473364445,
      "loss": 2.9944,
      "step": 15338
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9595228433609009,
      "learning_rate": 0.0005934622980446675,
      "loss": 3.1301,
      "step": 15339
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5153907537460327,
      "learning_rate": 0.0005934614486983302,
      "loss": 3.262,
      "step": 15340
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4000180959701538,
      "learning_rate": 0.0005934605992974328,
      "loss": 3.0137,
      "step": 15341
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.900897741317749,
      "learning_rate": 0.0005934597498419757,
      "loss": 3.1846,
      "step": 15342
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4929122924804688,
      "learning_rate": 0.0005934589003319589,
      "loss": 3.2505,
      "step": 15343
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.845285415649414,
      "learning_rate": 0.0005934580507673824,
      "loss": 3.136,
      "step": 15344
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2780274152755737,
      "learning_rate": 0.0005934572011482466,
      "loss": 3.0395,
      "step": 15345
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3488941192626953,
      "learning_rate": 0.0005934563514745515,
      "loss": 3.0674,
      "step": 15346
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.7564778327941895,
      "learning_rate": 0.0005934555017462975,
      "loss": 2.9519,
      "step": 15347
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4913454055786133,
      "learning_rate": 0.0005934546519634844,
      "loss": 3.1232,
      "step": 15348
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5753365755081177,
      "learning_rate": 0.0005934538021261127,
      "loss": 3.3552,
      "step": 15349
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3912241458892822,
      "learning_rate": 0.0005934529522341823,
      "loss": 3.1376,
      "step": 15350
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5897005796432495,
      "learning_rate": 0.0005934521022876935,
      "loss": 3.3116,
      "step": 15351
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4792284965515137,
      "learning_rate": 0.0005934512522866463,
      "loss": 3.0335,
      "step": 15352
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.155745029449463,
      "learning_rate": 0.000593450402231041,
      "loss": 3.2591,
      "step": 15353
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.2101597785949707,
      "learning_rate": 0.0005934495521208778,
      "loss": 3.2218,
      "step": 15354
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.503357172012329,
      "learning_rate": 0.0005934487019561568,
      "loss": 3.1702,
      "step": 15355
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4895052909851074,
      "learning_rate": 0.0005934478517368782,
      "loss": 3.3659,
      "step": 15356
    },
    {
      "epoch": 0.2,
      "grad_norm": 4.103610038757324,
      "learning_rate": 0.0005934470014630419,
      "loss": 3.1214,
      "step": 15357
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.086272716522217,
      "learning_rate": 0.0005934461511346485,
      "loss": 3.028,
      "step": 15358
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8856706619262695,
      "learning_rate": 0.0005934453007516977,
      "loss": 2.9727,
      "step": 15359
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.969718098640442,
      "learning_rate": 0.00059344445031419,
      "loss": 3.2331,
      "step": 15360
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.4357750415802,
      "learning_rate": 0.0005934435998221255,
      "loss": 2.917,
      "step": 15361
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4210560321807861,
      "learning_rate": 0.0005934427492755042,
      "loss": 3.2009,
      "step": 15362
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.1551268100738525,
      "learning_rate": 0.0005934418986743263,
      "loss": 2.9595,
      "step": 15363
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.241969585418701,
      "learning_rate": 0.0005934410480185921,
      "loss": 3.2137,
      "step": 15364
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9964616298675537,
      "learning_rate": 0.0005934401973083017,
      "loss": 2.9599,
      "step": 15365
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6375086307525635,
      "learning_rate": 0.0005934393465434551,
      "loss": 3.022,
      "step": 15366
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.186415433883667,
      "learning_rate": 0.0005934384957240527,
      "loss": 2.9037,
      "step": 15367
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3393081426620483,
      "learning_rate": 0.0005934376448500945,
      "loss": 3.0811,
      "step": 15368
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7613701820373535,
      "learning_rate": 0.0005934367939215807,
      "loss": 2.9167,
      "step": 15369
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8564567565917969,
      "learning_rate": 0.0005934359429385115,
      "loss": 3.1927,
      "step": 15370
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4011203050613403,
      "learning_rate": 0.000593435091900887,
      "loss": 3.1714,
      "step": 15371
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.4043045043945312,
      "learning_rate": 0.0005934342408087074,
      "loss": 3.0807,
      "step": 15372
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3228366374969482,
      "learning_rate": 0.0005934333896619728,
      "loss": 2.9511,
      "step": 15373
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6071059703826904,
      "learning_rate": 0.0005934325384606833,
      "loss": 3.0329,
      "step": 15374
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9942256212234497,
      "learning_rate": 0.0005934316872048394,
      "loss": 3.2008,
      "step": 15375
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.745229959487915,
      "learning_rate": 0.0005934308358944408,
      "loss": 2.8644,
      "step": 15376
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2729873657226562,
      "learning_rate": 0.000593429984529488,
      "loss": 3.324,
      "step": 15377
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6742455959320068,
      "learning_rate": 0.000593429133109981,
      "loss": 3.2294,
      "step": 15378
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6647135019302368,
      "learning_rate": 0.0005934282816359199,
      "loss": 3.3427,
      "step": 15379
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3914730548858643,
      "learning_rate": 0.0005934274301073049,
      "loss": 3.326,
      "step": 15380
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4704432487487793,
      "learning_rate": 0.0005934265785241363,
      "loss": 2.8635,
      "step": 15381
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5893248319625854,
      "learning_rate": 0.0005934257268864142,
      "loss": 3.257,
      "step": 15382
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8181681632995605,
      "learning_rate": 0.0005934248751941387,
      "loss": 3.0648,
      "step": 15383
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2400295734405518,
      "learning_rate": 0.0005934240234473099,
      "loss": 2.9737,
      "step": 15384
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8234291076660156,
      "learning_rate": 0.0005934231716459282,
      "loss": 3.2014,
      "step": 15385
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.765544056892395,
      "learning_rate": 0.0005934223197899934,
      "loss": 3.3572,
      "step": 15386
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.892917513847351,
      "learning_rate": 0.000593421467879506,
      "loss": 3.212,
      "step": 15387
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4638612270355225,
      "learning_rate": 0.0005934206159144658,
      "loss": 3.2134,
      "step": 15388
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6869909763336182,
      "learning_rate": 0.0005934197638948733,
      "loss": 2.9225,
      "step": 15389
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5840340852737427,
      "learning_rate": 0.0005934189118207287,
      "loss": 3.2193,
      "step": 15390
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6149811744689941,
      "learning_rate": 0.0005934180596920317,
      "loss": 3.2053,
      "step": 15391
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.139580488204956,
      "learning_rate": 0.0005934172075087829,
      "loss": 3.1928,
      "step": 15392
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.215381622314453,
      "learning_rate": 0.0005934163552709822,
      "loss": 2.9628,
      "step": 15393
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7236322164535522,
      "learning_rate": 0.00059341550297863,
      "loss": 3.2135,
      "step": 15394
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.7690370082855225,
      "learning_rate": 0.0005934146506317262,
      "loss": 3.0605,
      "step": 15395
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.0306713581085205,
      "learning_rate": 0.0005934137982302712,
      "loss": 2.8896,
      "step": 15396
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6355153322219849,
      "learning_rate": 0.000593412945774265,
      "loss": 3.0424,
      "step": 15397
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7265982627868652,
      "learning_rate": 0.0005934120932637076,
      "loss": 3.0598,
      "step": 15398
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6153889894485474,
      "learning_rate": 0.0005934112406985996,
      "loss": 2.9591,
      "step": 15399
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.9332170486450195,
      "learning_rate": 0.0005934103880789408,
      "loss": 3.098,
      "step": 15400
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.0114595890045166,
      "learning_rate": 0.0005934095354047314,
      "loss": 3.1834,
      "step": 15401
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.0745770931243896,
      "learning_rate": 0.0005934086826759718,
      "loss": 3.1119,
      "step": 15402
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7875139713287354,
      "learning_rate": 0.0005934078298926619,
      "loss": 3.1375,
      "step": 15403
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7653193473815918,
      "learning_rate": 0.000593406977054802,
      "loss": 3.126,
      "step": 15404
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7129801511764526,
      "learning_rate": 0.0005934061241623922,
      "loss": 3.0144,
      "step": 15405
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7820199728012085,
      "learning_rate": 0.0005934052712154326,
      "loss": 2.9864,
      "step": 15406
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6979470252990723,
      "learning_rate": 0.0005934044182139234,
      "loss": 3.0824,
      "step": 15407
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2523436546325684,
      "learning_rate": 0.0005934035651578648,
      "loss": 3.1274,
      "step": 15408
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.0140511989593506,
      "learning_rate": 0.000593402712047257,
      "loss": 3.2684,
      "step": 15409
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9917913675308228,
      "learning_rate": 0.0005934018588821,
      "loss": 3.2878,
      "step": 15410
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7031903266906738,
      "learning_rate": 0.0005934010056623942,
      "loss": 3.3217,
      "step": 15411
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.589133858680725,
      "learning_rate": 0.0005934001523881394,
      "loss": 2.9683,
      "step": 15412
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.3214266300201416,
      "learning_rate": 0.0005933992990593362,
      "loss": 3.2832,
      "step": 15413
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7298531532287598,
      "learning_rate": 0.0005933984456759845,
      "loss": 3.234,
      "step": 15414
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.2956888675689697,
      "learning_rate": 0.0005933975922380844,
      "loss": 3.1084,
      "step": 15415
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9773521423339844,
      "learning_rate": 0.0005933967387456361,
      "loss": 3.1763,
      "step": 15416
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2663382291793823,
      "learning_rate": 0.00059339588519864,
      "loss": 3.0058,
      "step": 15417
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5617444515228271,
      "learning_rate": 0.0005933950315970959,
      "loss": 2.7967,
      "step": 15418
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.274733543395996,
      "learning_rate": 0.0005933941779410042,
      "loss": 3.0266,
      "step": 15419
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4202486276626587,
      "learning_rate": 0.0005933933242303649,
      "loss": 3.0463,
      "step": 15420
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3426774740219116,
      "learning_rate": 0.0005933924704651783,
      "loss": 3.0038,
      "step": 15421
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7376985549926758,
      "learning_rate": 0.0005933916166454445,
      "loss": 3.1975,
      "step": 15422
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8115441799163818,
      "learning_rate": 0.0005933907627711637,
      "loss": 2.983,
      "step": 15423
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4275225400924683,
      "learning_rate": 0.000593389908842336,
      "loss": 3.1491,
      "step": 15424
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8470675945281982,
      "learning_rate": 0.0005933890548589615,
      "loss": 3.3315,
      "step": 15425
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.481649875640869,
      "learning_rate": 0.0005933882008210405,
      "loss": 3.0602,
      "step": 15426
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4285976886749268,
      "learning_rate": 0.0005933873467285731,
      "loss": 3.098,
      "step": 15427
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8121122121810913,
      "learning_rate": 0.0005933864925815594,
      "loss": 2.9958,
      "step": 15428
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.802188515663147,
      "learning_rate": 0.0005933856383799997,
      "loss": 3.0652,
      "step": 15429
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7883743047714233,
      "learning_rate": 0.000593384784123894,
      "loss": 3.1914,
      "step": 15430
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.587702751159668,
      "learning_rate": 0.0005933839298132425,
      "loss": 3.3417,
      "step": 15431
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5040050745010376,
      "learning_rate": 0.0005933830754480456,
      "loss": 3.2812,
      "step": 15432
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.282564401626587,
      "learning_rate": 0.000593382221028303,
      "loss": 2.8574,
      "step": 15433
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.223437547683716,
      "learning_rate": 0.0005933813665540153,
      "loss": 3.1134,
      "step": 15434
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.7423863410949707,
      "learning_rate": 0.0005933805120251824,
      "loss": 3.131,
      "step": 15435
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8082081079483032,
      "learning_rate": 0.0005933796574418044,
      "loss": 3.1215,
      "step": 15436
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9255578517913818,
      "learning_rate": 0.0005933788028038817,
      "loss": 3.1858,
      "step": 15437
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5876331329345703,
      "learning_rate": 0.0005933779481114143,
      "loss": 3.0539,
      "step": 15438
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5054681301116943,
      "learning_rate": 0.0005933770933644024,
      "loss": 3.0582,
      "step": 15439
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5912294387817383,
      "learning_rate": 0.0005933762385628463,
      "loss": 3.3187,
      "step": 15440
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6896926164627075,
      "learning_rate": 0.0005933753837067458,
      "loss": 3.1433,
      "step": 15441
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7351568937301636,
      "learning_rate": 0.0005933745287961014,
      "loss": 3.0892,
      "step": 15442
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6515382528305054,
      "learning_rate": 0.0005933736738309132,
      "loss": 3.1125,
      "step": 15443
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7991163730621338,
      "learning_rate": 0.0005933728188111812,
      "loss": 3.1908,
      "step": 15444
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.526133418083191,
      "learning_rate": 0.0005933719637369057,
      "loss": 3.2786,
      "step": 15445
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5950961112976074,
      "learning_rate": 0.0005933711086080867,
      "loss": 3.0837,
      "step": 15446
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.4086573123931885,
      "learning_rate": 0.0005933702534247246,
      "loss": 3.1288,
      "step": 15447
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.526944398880005,
      "learning_rate": 0.0005933693981868194,
      "loss": 2.9752,
      "step": 15448
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5817019939422607,
      "learning_rate": 0.0005933685428943712,
      "loss": 3.2123,
      "step": 15449
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3930790424346924,
      "learning_rate": 0.0005933676875473804,
      "loss": 3.3345,
      "step": 15450
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.630568504333496,
      "learning_rate": 0.0005933668321458469,
      "loss": 2.9491,
      "step": 15451
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.668181896209717,
      "learning_rate": 0.000593365976689771,
      "loss": 2.9698,
      "step": 15452
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7688244581222534,
      "learning_rate": 0.0005933651211791529,
      "loss": 3.1483,
      "step": 15453
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.273223400115967,
      "learning_rate": 0.0005933642656139926,
      "loss": 3.1076,
      "step": 15454
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.688646078109741,
      "learning_rate": 0.0005933634099942904,
      "loss": 3.0568,
      "step": 15455
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.647305965423584,
      "learning_rate": 0.0005933625543200464,
      "loss": 3.2813,
      "step": 15456
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6401373147964478,
      "learning_rate": 0.0005933616985912607,
      "loss": 2.9839,
      "step": 15457
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.555614709854126,
      "learning_rate": 0.0005933608428079336,
      "loss": 3.0954,
      "step": 15458
    },
    {
      "epoch": 0.2,
      "grad_norm": 4.4286370277404785,
      "learning_rate": 0.000593359986970065,
      "loss": 3.0837,
      "step": 15459
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.7748773097991943,
      "learning_rate": 0.0005933591310776555,
      "loss": 2.9146,
      "step": 15460
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4803062677383423,
      "learning_rate": 0.0005933582751307049,
      "loss": 3.226,
      "step": 15461
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.204617977142334,
      "learning_rate": 0.0005933574191292134,
      "loss": 3.0897,
      "step": 15462
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.4994189739227295,
      "learning_rate": 0.0005933565630731813,
      "loss": 3.2149,
      "step": 15463
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.5353946685791016,
      "learning_rate": 0.0005933557069626085,
      "loss": 3.2297,
      "step": 15464
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7341086864471436,
      "learning_rate": 0.0005933548507974955,
      "loss": 3.2447,
      "step": 15465
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6292533874511719,
      "learning_rate": 0.0005933539945778423,
      "loss": 3.118,
      "step": 15466
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9973453283309937,
      "learning_rate": 0.000593353138303649,
      "loss": 2.9902,
      "step": 15467
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4414352178573608,
      "learning_rate": 0.0005933522819749159,
      "loss": 2.9588,
      "step": 15468
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.608107089996338,
      "learning_rate": 0.000593351425591643,
      "loss": 3.1724,
      "step": 15469
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3438712358474731,
      "learning_rate": 0.0005933505691538306,
      "loss": 3.1738,
      "step": 15470
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4918792247772217,
      "learning_rate": 0.0005933497126614787,
      "loss": 3.1569,
      "step": 15471
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3994019031524658,
      "learning_rate": 0.0005933488561145876,
      "loss": 3.2406,
      "step": 15472
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.992616891860962,
      "learning_rate": 0.0005933479995131574,
      "loss": 2.9638,
      "step": 15473
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7528904676437378,
      "learning_rate": 0.0005933471428571882,
      "loss": 3.1757,
      "step": 15474
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.1895060539245605,
      "learning_rate": 0.0005933462861466804,
      "loss": 2.9241,
      "step": 15475
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.049375295639038,
      "learning_rate": 0.0005933454293816339,
      "loss": 2.9155,
      "step": 15476
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.1610569953918457,
      "learning_rate": 0.0005933445725620489,
      "loss": 3.1864,
      "step": 15477
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.700047254562378,
      "learning_rate": 0.0005933437156879257,
      "loss": 3.2022,
      "step": 15478
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.2956268787384033,
      "learning_rate": 0.0005933428587592642,
      "loss": 3.2877,
      "step": 15479
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7039339542388916,
      "learning_rate": 0.0005933420017760648,
      "loss": 3.1775,
      "step": 15480
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.807348370552063,
      "learning_rate": 0.0005933411447383276,
      "loss": 2.8819,
      "step": 15481
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7298383712768555,
      "learning_rate": 0.0005933402876460528,
      "loss": 2.9515,
      "step": 15482
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6297084093093872,
      "learning_rate": 0.0005933394304992404,
      "loss": 2.8841,
      "step": 15483
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.365112066268921,
      "learning_rate": 0.0005933385732978907,
      "loss": 3.2252,
      "step": 15484
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.274219036102295,
      "learning_rate": 0.000593337716042004,
      "loss": 3.2135,
      "step": 15485
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.4617199897766113,
      "learning_rate": 0.0005933368587315801,
      "loss": 3.0327,
      "step": 15486
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6338367462158203,
      "learning_rate": 0.0005933360013666194,
      "loss": 3.3648,
      "step": 15487
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3867672681808472,
      "learning_rate": 0.0005933351439471219,
      "loss": 3.3958,
      "step": 15488
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4706590175628662,
      "learning_rate": 0.0005933342864730879,
      "loss": 3.0725,
      "step": 15489
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.046464443206787,
      "learning_rate": 0.0005933334289445175,
      "loss": 3.212,
      "step": 15490
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.0571038722991943,
      "learning_rate": 0.000593332571361411,
      "loss": 3.3285,
      "step": 15491
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8295724391937256,
      "learning_rate": 0.0005933317137237683,
      "loss": 3.1592,
      "step": 15492
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.4094882011413574,
      "learning_rate": 0.0005933308560315898,
      "loss": 3.1708,
      "step": 15493
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.858285903930664,
      "learning_rate": 0.0005933299982848755,
      "loss": 3.1203,
      "step": 15494
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9436078071594238,
      "learning_rate": 0.0005933291404836256,
      "loss": 3.3204,
      "step": 15495
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3721835613250732,
      "learning_rate": 0.0005933282826278404,
      "loss": 3.1071,
      "step": 15496
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2446985244750977,
      "learning_rate": 0.0005933274247175198,
      "loss": 3.0162,
      "step": 15497
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4521095752716064,
      "learning_rate": 0.0005933265667526641,
      "loss": 2.9456,
      "step": 15498
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3845727443695068,
      "learning_rate": 0.0005933257087332735,
      "loss": 3.2872,
      "step": 15499
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.2685627937316895,
      "learning_rate": 0.0005933248506593482,
      "loss": 3.1181,
      "step": 15500
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4034209251403809,
      "learning_rate": 0.0005933239925308881,
      "loss": 3.0172,
      "step": 15501
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6520440578460693,
      "learning_rate": 0.0005933231343478937,
      "loss": 3.3005,
      "step": 15502
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5344815254211426,
      "learning_rate": 0.0005933222761103649,
      "loss": 3.1582,
      "step": 15503
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6507402658462524,
      "learning_rate": 0.0005933214178183018,
      "loss": 3.2376,
      "step": 15504
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4763323068618774,
      "learning_rate": 0.000593320559471705,
      "loss": 3.12,
      "step": 15505
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6023386716842651,
      "learning_rate": 0.0005933197010705742,
      "loss": 2.7902,
      "step": 15506
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.026210308074951,
      "learning_rate": 0.0005933188426149098,
      "loss": 2.8138,
      "step": 15507
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6528053283691406,
      "learning_rate": 0.0005933179841047119,
      "loss": 3.2925,
      "step": 15508
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6967343091964722,
      "learning_rate": 0.0005933171255399805,
      "loss": 3.1464,
      "step": 15509
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.847743034362793,
      "learning_rate": 0.0005933162669207161,
      "loss": 3.141,
      "step": 15510
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.514190912246704,
      "learning_rate": 0.0005933154082469186,
      "loss": 3.2078,
      "step": 15511
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5882855653762817,
      "learning_rate": 0.0005933145495185882,
      "loss": 2.9821,
      "step": 15512
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.678460955619812,
      "learning_rate": 0.0005933136907357252,
      "loss": 3.2733,
      "step": 15513
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6782246828079224,
      "learning_rate": 0.0005933128318983294,
      "loss": 3.2668,
      "step": 15514
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.2575619220733643,
      "learning_rate": 0.0005933119730064014,
      "loss": 3.0953,
      "step": 15515
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5012290477752686,
      "learning_rate": 0.0005933111140599411,
      "loss": 3.1921,
      "step": 15516
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.506563425064087,
      "learning_rate": 0.0005933102550589488,
      "loss": 2.9473,
      "step": 15517
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.596295952796936,
      "learning_rate": 0.0005933093960034245,
      "loss": 3.3013,
      "step": 15518
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.545594573020935,
      "learning_rate": 0.0005933085368933685,
      "loss": 2.8214,
      "step": 15519
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5080960988998413,
      "learning_rate": 0.0005933076777287809,
      "loss": 3.2352,
      "step": 15520
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.719142436981201,
      "learning_rate": 0.0005933068185096619,
      "loss": 3.2663,
      "step": 15521
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5354994535446167,
      "learning_rate": 0.0005933059592360116,
      "loss": 3.2852,
      "step": 15522
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3552926778793335,
      "learning_rate": 0.0005933050999078302,
      "loss": 3.2629,
      "step": 15523
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.480635643005371,
      "learning_rate": 0.0005933042405251177,
      "loss": 2.8859,
      "step": 15524
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.6702163219451904,
      "learning_rate": 0.0005933033810878746,
      "loss": 3.2023,
      "step": 15525
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6372731924057007,
      "learning_rate": 0.0005933025215961007,
      "loss": 3.2433,
      "step": 15526
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5413522720336914,
      "learning_rate": 0.0005933016620497965,
      "loss": 3.1561,
      "step": 15527
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.470068097114563,
      "learning_rate": 0.0005933008024489619,
      "loss": 3.2701,
      "step": 15528
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.582606554031372,
      "learning_rate": 0.0005932999427935971,
      "loss": 3.2221,
      "step": 15529
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7637335062026978,
      "learning_rate": 0.0005932990830837024,
      "loss": 3.0108,
      "step": 15530
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5529673099517822,
      "learning_rate": 0.0005932982233192778,
      "loss": 3.1735,
      "step": 15531
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4209271669387817,
      "learning_rate": 0.0005932973635003236,
      "loss": 3.3554,
      "step": 15532
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5592520236968994,
      "learning_rate": 0.0005932965036268398,
      "loss": 2.9058,
      "step": 15533
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.2464919090270996,
      "learning_rate": 0.0005932956436988266,
      "loss": 3.1455,
      "step": 15534
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.2560229301452637,
      "learning_rate": 0.0005932947837162842,
      "loss": 3.0748,
      "step": 15535
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4747347831726074,
      "learning_rate": 0.0005932939236792129,
      "loss": 3.1279,
      "step": 15536
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9127602577209473,
      "learning_rate": 0.0005932930635876127,
      "loss": 3.1425,
      "step": 15537
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8923704624176025,
      "learning_rate": 0.0005932922034414837,
      "loss": 3.1979,
      "step": 15538
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.59945809841156,
      "learning_rate": 0.0005932913432408262,
      "loss": 3.1217,
      "step": 15539
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6378955841064453,
      "learning_rate": 0.0005932904829856402,
      "loss": 3.1379,
      "step": 15540
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4901610612869263,
      "learning_rate": 0.0005932896226759261,
      "loss": 3.3033,
      "step": 15541
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8252304792404175,
      "learning_rate": 0.0005932887623116838,
      "loss": 3.2154,
      "step": 15542
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.517336130142212,
      "learning_rate": 0.0005932879018929137,
      "loss": 3.0574,
      "step": 15543
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4180138111114502,
      "learning_rate": 0.0005932870414196159,
      "loss": 3.0473,
      "step": 15544
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.371255397796631,
      "learning_rate": 0.0005932861808917903,
      "loss": 3.237,
      "step": 15545
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8306961059570312,
      "learning_rate": 0.0005932853203094374,
      "loss": 2.8989,
      "step": 15546
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8585678339004517,
      "learning_rate": 0.0005932844596725571,
      "loss": 2.8144,
      "step": 15547
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7082842588424683,
      "learning_rate": 0.0005932835989811498,
      "loss": 2.8738,
      "step": 15548
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.676454782485962,
      "learning_rate": 0.0005932827382352156,
      "loss": 3.0726,
      "step": 15549
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9230108261108398,
      "learning_rate": 0.0005932818774347545,
      "loss": 3.223,
      "step": 15550
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6236860752105713,
      "learning_rate": 0.0005932810165797668,
      "loss": 2.8881,
      "step": 15551
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.657637357711792,
      "learning_rate": 0.0005932801556702526,
      "loss": 3.1546,
      "step": 15552
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4566057920455933,
      "learning_rate": 0.000593279294706212,
      "loss": 3.2709,
      "step": 15553
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5383903980255127,
      "learning_rate": 0.0005932784336876453,
      "loss": 3.1086,
      "step": 15554
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5645875930786133,
      "learning_rate": 0.0005932775726145527,
      "loss": 3.3505,
      "step": 15555
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.020366907119751,
      "learning_rate": 0.0005932767114869342,
      "loss": 2.9283,
      "step": 15556
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5377167463302612,
      "learning_rate": 0.00059327585030479,
      "loss": 3.1145,
      "step": 15557
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6149028539657593,
      "learning_rate": 0.0005932749890681202,
      "loss": 3.2415,
      "step": 15558
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5684893131256104,
      "learning_rate": 0.0005932741277769252,
      "loss": 3.276,
      "step": 15559
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.743926763534546,
      "learning_rate": 0.000593273266431205,
      "loss": 2.8891,
      "step": 15560
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.611029863357544,
      "learning_rate": 0.0005932724050309597,
      "loss": 3.0219,
      "step": 15561
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.0679354667663574,
      "learning_rate": 0.0005932715435761895,
      "loss": 3.0738,
      "step": 15562
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9773460626602173,
      "learning_rate": 0.0005932706820668946,
      "loss": 3.1567,
      "step": 15563
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.2274270057678223,
      "learning_rate": 0.0005932698205030752,
      "loss": 3.2602,
      "step": 15564
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.87214994430542,
      "learning_rate": 0.0005932689588847313,
      "loss": 3.1955,
      "step": 15565
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.02717924118042,
      "learning_rate": 0.0005932680972118632,
      "loss": 3.2677,
      "step": 15566
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.543981671333313,
      "learning_rate": 0.0005932672354844711,
      "loss": 3.2256,
      "step": 15567
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5218104124069214,
      "learning_rate": 0.0005932663737025551,
      "loss": 2.8298,
      "step": 15568
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6443439722061157,
      "learning_rate": 0.0005932655118661152,
      "loss": 3.243,
      "step": 15569
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4272211790084839,
      "learning_rate": 0.0005932646499751519,
      "loss": 3.2236,
      "step": 15570
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7010464668273926,
      "learning_rate": 0.000593263788029665,
      "loss": 2.9782,
      "step": 15571
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7014583349227905,
      "learning_rate": 0.0005932629260296549,
      "loss": 3.1586,
      "step": 15572
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3957161903381348,
      "learning_rate": 0.0005932620639751216,
      "loss": 2.9087,
      "step": 15573
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9211245775222778,
      "learning_rate": 0.0005932612018660654,
      "loss": 3.0776,
      "step": 15574
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.786071538925171,
      "learning_rate": 0.0005932603397024864,
      "loss": 3.1183,
      "step": 15575
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.49117112159729,
      "learning_rate": 0.0005932594774843849,
      "loss": 3.127,
      "step": 15576
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.401075839996338,
      "learning_rate": 0.0005932586152117607,
      "loss": 2.9847,
      "step": 15577
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5827716588974,
      "learning_rate": 0.0005932577528846144,
      "loss": 3.2451,
      "step": 15578
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.58168625831604,
      "learning_rate": 0.0005932568905029458,
      "loss": 3.3328,
      "step": 15579
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.273772954940796,
      "learning_rate": 0.0005932560280667553,
      "loss": 3.036,
      "step": 15580
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7393358945846558,
      "learning_rate": 0.0005932551655760429,
      "loss": 2.9,
      "step": 15581
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.428614616394043,
      "learning_rate": 0.0005932543030308089,
      "loss": 3.0817,
      "step": 15582
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5864728689193726,
      "learning_rate": 0.0005932534404310534,
      "loss": 3.1066,
      "step": 15583
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4203311204910278,
      "learning_rate": 0.0005932525777767764,
      "loss": 3.1323,
      "step": 15584
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.0979726314544678,
      "learning_rate": 0.0005932517150679783,
      "loss": 3.2754,
      "step": 15585
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.751965284347534,
      "learning_rate": 0.0005932508523046592,
      "loss": 3.4067,
      "step": 15586
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6835471391677856,
      "learning_rate": 0.0005932499894868192,
      "loss": 2.9488,
      "step": 15587
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.510955810546875,
      "learning_rate": 0.0005932491266144585,
      "loss": 3.1788,
      "step": 15588
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7240573167800903,
      "learning_rate": 0.0005932482636875773,
      "loss": 3.1419,
      "step": 15589
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.68271005153656,
      "learning_rate": 0.0005932474007061757,
      "loss": 3.0703,
      "step": 15590
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4483743906021118,
      "learning_rate": 0.0005932465376702538,
      "loss": 3.3433,
      "step": 15591
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9267640113830566,
      "learning_rate": 0.0005932456745798118,
      "loss": 3.0002,
      "step": 15592
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6626076698303223,
      "learning_rate": 0.00059324481143485,
      "loss": 3.2796,
      "step": 15593
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4200716018676758,
      "learning_rate": 0.0005932439482353684,
      "loss": 3.1513,
      "step": 15594
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7384538650512695,
      "learning_rate": 0.0005932430849813672,
      "loss": 3.0815,
      "step": 15595
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.576378345489502,
      "learning_rate": 0.0005932422216728466,
      "loss": 3.295,
      "step": 15596
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7320795059204102,
      "learning_rate": 0.0005932413583098068,
      "loss": 2.943,
      "step": 15597
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5472404956817627,
      "learning_rate": 0.0005932404948922478,
      "loss": 3.1443,
      "step": 15598
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.2275044918060303,
      "learning_rate": 0.0005932396314201699,
      "loss": 3.1976,
      "step": 15599
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5069491863250732,
      "learning_rate": 0.0005932387678935732,
      "loss": 2.9122,
      "step": 15600
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6793696880340576,
      "learning_rate": 0.000593237904312458,
      "loss": 3.2327,
      "step": 15601
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4669902324676514,
      "learning_rate": 0.0005932370406768241,
      "loss": 3.1567,
      "step": 15602
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4229446649551392,
      "learning_rate": 0.0005932361769866721,
      "loss": 3.0649,
      "step": 15603
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5607800483703613,
      "learning_rate": 0.0005932353132420019,
      "loss": 3.2182,
      "step": 15604
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.56742525100708,
      "learning_rate": 0.0005932344494428136,
      "loss": 2.9883,
      "step": 15605
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5238202810287476,
      "learning_rate": 0.0005932335855891077,
      "loss": 3.3701,
      "step": 15606
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4776363372802734,
      "learning_rate": 0.000593232721680884,
      "loss": 3.0762,
      "step": 15607
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4821350574493408,
      "learning_rate": 0.0005932318577181428,
      "loss": 2.8759,
      "step": 15608
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6004897356033325,
      "learning_rate": 0.0005932309937008843,
      "loss": 3.0579,
      "step": 15609
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3600751161575317,
      "learning_rate": 0.0005932301296291086,
      "loss": 3.0562,
      "step": 15610
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4219462871551514,
      "learning_rate": 0.0005932292655028159,
      "loss": 2.8508,
      "step": 15611
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.212606191635132,
      "learning_rate": 0.0005932284013220063,
      "loss": 3.1647,
      "step": 15612
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.940666913986206,
      "learning_rate": 0.0005932275370866801,
      "loss": 3.0152,
      "step": 15613
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4902801513671875,
      "learning_rate": 0.0005932266727968374,
      "loss": 3.2829,
      "step": 15614
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6692982912063599,
      "learning_rate": 0.0005932258084524782,
      "loss": 3.3299,
      "step": 15615
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.6889350414276123,
      "learning_rate": 0.0005932249440536028,
      "loss": 3.2865,
      "step": 15616
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.2321555614471436,
      "learning_rate": 0.0005932240796002115,
      "loss": 3.1758,
      "step": 15617
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9210054874420166,
      "learning_rate": 0.000593223215092304,
      "loss": 3.1872,
      "step": 15618
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.062729597091675,
      "learning_rate": 0.0005932223505298811,
      "loss": 3.2619,
      "step": 15619
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9019865989685059,
      "learning_rate": 0.0005932214859129425,
      "loss": 3.1393,
      "step": 15620
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.507154941558838,
      "learning_rate": 0.0005932206212414884,
      "loss": 3.1101,
      "step": 15621
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.020639657974243,
      "learning_rate": 0.0005932197565155191,
      "loss": 2.9615,
      "step": 15622
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.3380916118621826,
      "learning_rate": 0.0005932188917350347,
      "loss": 3.0673,
      "step": 15623
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.564419984817505,
      "learning_rate": 0.0005932180269000354,
      "loss": 2.8149,
      "step": 15624
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.900710940361023,
      "learning_rate": 0.0005932171620105214,
      "loss": 3.1967,
      "step": 15625
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.2223856449127197,
      "learning_rate": 0.0005932162970664927,
      "loss": 2.8605,
      "step": 15626
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.1865041255950928,
      "learning_rate": 0.0005932154320679497,
      "loss": 3.2484,
      "step": 15627
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7666574716567993,
      "learning_rate": 0.0005932145670148922,
      "loss": 3.2795,
      "step": 15628
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.1465792655944824,
      "learning_rate": 0.0005932137019073208,
      "loss": 3.0417,
      "step": 15629
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.890078544616699,
      "learning_rate": 0.0005932128367452352,
      "loss": 3.0776,
      "step": 15630
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9463213682174683,
      "learning_rate": 0.000593211971528636,
      "loss": 2.9334,
      "step": 15631
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8155657052993774,
      "learning_rate": 0.0005932111062575231,
      "loss": 2.9672,
      "step": 15632
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.0412509441375732,
      "learning_rate": 0.0005932102409318967,
      "loss": 3.0943,
      "step": 15633
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.5265188217163086,
      "learning_rate": 0.0005932093755517569,
      "loss": 3.2047,
      "step": 15634
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.933622121810913,
      "learning_rate": 0.0005932085101171041,
      "loss": 3.1616,
      "step": 15635
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.0448551177978516,
      "learning_rate": 0.0005932076446279383,
      "loss": 3.0135,
      "step": 15636
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.089586019515991,
      "learning_rate": 0.0005932067790842595,
      "loss": 2.9363,
      "step": 15637
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.229912519454956,
      "learning_rate": 0.0005932059134860682,
      "loss": 3.0594,
      "step": 15638
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.8996434211730957,
      "learning_rate": 0.0005932050478333643,
      "loss": 3.2598,
      "step": 15639
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.117366313934326,
      "learning_rate": 0.0005932041821261481,
      "loss": 3.6252,
      "step": 15640
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6213698387145996,
      "learning_rate": 0.0005932033163644196,
      "loss": 3.0547,
      "step": 15641
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.4112768173217773,
      "learning_rate": 0.0005932024505481791,
      "loss": 3.0615,
      "step": 15642
    },
    {
      "epoch": 0.2,
      "grad_norm": 3.285576581954956,
      "learning_rate": 0.0005932015846774268,
      "loss": 3.1241,
      "step": 15643
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.571292519569397,
      "learning_rate": 0.0005932007187521627,
      "loss": 3.094,
      "step": 15644
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8065392971038818,
      "learning_rate": 0.0005931998527723871,
      "loss": 2.9554,
      "step": 15645
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.302778959274292,
      "learning_rate": 0.0005931989867381002,
      "loss": 3.148,
      "step": 15646
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.058044910430908,
      "learning_rate": 0.0005931981206493019,
      "loss": 3.137,
      "step": 15647
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5482138395309448,
      "learning_rate": 0.0005931972545059927,
      "loss": 3.1467,
      "step": 15648
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7320598363876343,
      "learning_rate": 0.0005931963883081726,
      "loss": 3.0581,
      "step": 15649
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.460129976272583,
      "learning_rate": 0.0005931955220558416,
      "loss": 3.1703,
      "step": 15650
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3365287780761719,
      "learning_rate": 0.0005931946557490001,
      "loss": 3.0037,
      "step": 15651
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3775253295898438,
      "learning_rate": 0.0005931937893876481,
      "loss": 3.2171,
      "step": 15652
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5810613632202148,
      "learning_rate": 0.000593192922971786,
      "loss": 3.1353,
      "step": 15653
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3426796197891235,
      "learning_rate": 0.0005931920565014138,
      "loss": 3.3555,
      "step": 15654
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2789186239242554,
      "learning_rate": 0.0005931911899765315,
      "loss": 3.2034,
      "step": 15655
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.93148672580719,
      "learning_rate": 0.0005931903233971395,
      "loss": 2.9116,
      "step": 15656
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6368987560272217,
      "learning_rate": 0.0005931894567632378,
      "loss": 3.2878,
      "step": 15657
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2814594507217407,
      "learning_rate": 0.0005931885900748268,
      "loss": 2.9881,
      "step": 15658
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7642266750335693,
      "learning_rate": 0.0005931877233319064,
      "loss": 3.17,
      "step": 15659
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6190547943115234,
      "learning_rate": 0.000593186856534477,
      "loss": 3.1388,
      "step": 15660
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.921007752418518,
      "learning_rate": 0.0005931859896825385,
      "loss": 3.0403,
      "step": 15661
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.040236473083496,
      "learning_rate": 0.0005931851227760912,
      "loss": 3.0839,
      "step": 15662
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.679358720779419,
      "learning_rate": 0.0005931842558151353,
      "loss": 3.0522,
      "step": 15663
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9850422143936157,
      "learning_rate": 0.0005931833887996709,
      "loss": 3.3583,
      "step": 15664
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7368371486663818,
      "learning_rate": 0.0005931825217296981,
      "loss": 3.3564,
      "step": 15665
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.310817003250122,
      "learning_rate": 0.0005931816546052173,
      "loss": 3.1955,
      "step": 15666
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6852669715881348,
      "learning_rate": 0.0005931807874262284,
      "loss": 3.2134,
      "step": 15667
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8937878608703613,
      "learning_rate": 0.0005931799201927317,
      "loss": 3.3516,
      "step": 15668
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.52767813205719,
      "learning_rate": 0.0005931790529047271,
      "loss": 3.1177,
      "step": 15669
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6328160762786865,
      "learning_rate": 0.0005931781855622152,
      "loss": 3.3058,
      "step": 15670
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5272216796875,
      "learning_rate": 0.000593177318165196,
      "loss": 3.0622,
      "step": 15671
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3074597120285034,
      "learning_rate": 0.0005931764507136695,
      "loss": 3.0016,
      "step": 15672
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.679267406463623,
      "learning_rate": 0.0005931755832076359,
      "loss": 2.9334,
      "step": 15673
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3294411897659302,
      "learning_rate": 0.0005931747156470954,
      "loss": 3.1433,
      "step": 15674
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8358182907104492,
      "learning_rate": 0.0005931738480320483,
      "loss": 2.8416,
      "step": 15675
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7845360040664673,
      "learning_rate": 0.0005931729803624946,
      "loss": 3.1145,
      "step": 15676
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6198993921279907,
      "learning_rate": 0.0005931721126384346,
      "loss": 3.3205,
      "step": 15677
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4340519905090332,
      "learning_rate": 0.0005931712448598683,
      "loss": 3.3047,
      "step": 15678
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.759522795677185,
      "learning_rate": 0.0005931703770267959,
      "loss": 2.9495,
      "step": 15679
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4731818437576294,
      "learning_rate": 0.0005931695091392177,
      "loss": 3.2785,
      "step": 15680
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8337947130203247,
      "learning_rate": 0.0005931686411971336,
      "loss": 3.2429,
      "step": 15681
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7191723585128784,
      "learning_rate": 0.000593167773200544,
      "loss": 3.1662,
      "step": 15682
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2332545518875122,
      "learning_rate": 0.000593166905149449,
      "loss": 3.0401,
      "step": 15683
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.0526974201202393,
      "learning_rate": 0.0005931660370438487,
      "loss": 3.1127,
      "step": 15684
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4616769552230835,
      "learning_rate": 0.0005931651688837434,
      "loss": 3.1082,
      "step": 15685
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8892303705215454,
      "learning_rate": 0.000593164300669133,
      "loss": 2.8389,
      "step": 15686
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.004326820373535,
      "learning_rate": 0.0005931634324000179,
      "loss": 3.0932,
      "step": 15687
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5794453620910645,
      "learning_rate": 0.0005931625640763982,
      "loss": 2.9182,
      "step": 15688
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.754841923713684,
      "learning_rate": 0.000593161695698274,
      "loss": 3.404,
      "step": 15689
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3916022777557373,
      "learning_rate": 0.0005931608272656454,
      "loss": 3.0083,
      "step": 15690
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3640916347503662,
      "learning_rate": 0.0005931599587785129,
      "loss": 3.1431,
      "step": 15691
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.548351764678955,
      "learning_rate": 0.0005931590902368762,
      "loss": 3.1951,
      "step": 15692
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4636561870574951,
      "learning_rate": 0.0005931582216407359,
      "loss": 3.2655,
      "step": 15693
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.416868805885315,
      "learning_rate": 0.0005931573529900919,
      "loss": 3.3995,
      "step": 15694
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.730524778366089,
      "learning_rate": 0.0005931564842849445,
      "loss": 2.955,
      "step": 15695
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.617485761642456,
      "learning_rate": 0.0005931556155252935,
      "loss": 2.9264,
      "step": 15696
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.555420994758606,
      "learning_rate": 0.0005931547467111396,
      "loss": 3.3088,
      "step": 15697
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.790671706199646,
      "learning_rate": 0.0005931538778424825,
      "loss": 3.1939,
      "step": 15698
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.716874361038208,
      "learning_rate": 0.0005931530089193227,
      "loss": 3.1965,
      "step": 15699
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4715238809585571,
      "learning_rate": 0.0005931521399416602,
      "loss": 3.05,
      "step": 15700
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4615857601165771,
      "learning_rate": 0.0005931512709094951,
      "loss": 3.2094,
      "step": 15701
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7235954999923706,
      "learning_rate": 0.0005931504018228277,
      "loss": 3.1509,
      "step": 15702
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5952866077423096,
      "learning_rate": 0.0005931495326816582,
      "loss": 3.1828,
      "step": 15703
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7648179531097412,
      "learning_rate": 0.0005931486634859865,
      "loss": 3.0338,
      "step": 15704
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4860399961471558,
      "learning_rate": 0.0005931477942358131,
      "loss": 3.0614,
      "step": 15705
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9914343357086182,
      "learning_rate": 0.0005931469249311378,
      "loss": 2.9144,
      "step": 15706
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.515141248703003,
      "learning_rate": 0.0005931460555719611,
      "loss": 3.1979,
      "step": 15707
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8343466520309448,
      "learning_rate": 0.000593145186158283,
      "loss": 3.372,
      "step": 15708
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5111939907073975,
      "learning_rate": 0.0005931443166901036,
      "loss": 3.0884,
      "step": 15709
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5729773044586182,
      "learning_rate": 0.0005931434471674232,
      "loss": 3.1119,
      "step": 15710
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5848705768585205,
      "learning_rate": 0.0005931425775902419,
      "loss": 3.1966,
      "step": 15711
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5427610874176025,
      "learning_rate": 0.0005931417079585598,
      "loss": 3.0816,
      "step": 15712
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8532041311264038,
      "learning_rate": 0.0005931408382723772,
      "loss": 3.1511,
      "step": 15713
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.042161226272583,
      "learning_rate": 0.0005931399685316941,
      "loss": 2.9145,
      "step": 15714
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3922916650772095,
      "learning_rate": 0.0005931390987365108,
      "loss": 2.8834,
      "step": 15715
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.4010345935821533,
      "learning_rate": 0.0005931382288868275,
      "loss": 2.7792,
      "step": 15716
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2159790992736816,
      "learning_rate": 0.0005931373589826442,
      "loss": 2.9593,
      "step": 15717
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.758232593536377,
      "learning_rate": 0.000593136489023961,
      "loss": 3.0527,
      "step": 15718
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6474847793579102,
      "learning_rate": 0.0005931356190107784,
      "loss": 3.1672,
      "step": 15719
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.675925850868225,
      "learning_rate": 0.0005931347489430962,
      "loss": 3.1842,
      "step": 15720
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3933839797973633,
      "learning_rate": 0.0005931338788209148,
      "loss": 3.2058,
      "step": 15721
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.7765451669692993,
      "learning_rate": 0.0005931330086442342,
      "loss": 3.2196,
      "step": 15722
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5778565406799316,
      "learning_rate": 0.0005931321384130547,
      "loss": 3.0109,
      "step": 15723
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.381411075592041,
      "learning_rate": 0.0005931312681273764,
      "loss": 3.0353,
      "step": 15724
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3176097869873047,
      "learning_rate": 0.0005931303977871995,
      "loss": 3.3258,
      "step": 15725
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.633086085319519,
      "learning_rate": 0.000593129527392524,
      "loss": 2.9737,
      "step": 15726
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3315213918685913,
      "learning_rate": 0.0005931286569433504,
      "loss": 3.5111,
      "step": 15727
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8896281719207764,
      "learning_rate": 0.0005931277864396785,
      "loss": 3.2647,
      "step": 15728
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.546234130859375,
      "learning_rate": 0.0005931269158815086,
      "loss": 3.1519,
      "step": 15729
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.6367661952972412,
      "learning_rate": 0.0005931260452688409,
      "loss": 3.3093,
      "step": 15730
    },
    {
      "epoch": 0.2,
      "grad_norm": 2.4648277759552,
      "learning_rate": 0.0005931251746016756,
      "loss": 2.8976,
      "step": 15731
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.9245209693908691,
      "learning_rate": 0.0005931243038800127,
      "loss": 3.3103,
      "step": 15732
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5414830446243286,
      "learning_rate": 0.0005931234331038525,
      "loss": 2.7807,
      "step": 15733
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.5427364110946655,
      "learning_rate": 0.0005931225622731951,
      "loss": 3.3215,
      "step": 15734
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3299243450164795,
      "learning_rate": 0.0005931216913880407,
      "loss": 3.1434,
      "step": 15735
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4056329727172852,
      "learning_rate": 0.0005931208204483895,
      "loss": 3.0368,
      "step": 15736
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.998306393623352,
      "learning_rate": 0.0005931199494542415,
      "loss": 2.8123,
      "step": 15737
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3886439800262451,
      "learning_rate": 0.000593119078405597,
      "loss": 3.1984,
      "step": 15738
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4405778646469116,
      "learning_rate": 0.0005931182073024561,
      "loss": 3.2237,
      "step": 15739
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.8040169477462769,
      "learning_rate": 0.0005931173361448191,
      "loss": 3.2534,
      "step": 15740
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2487821578979492,
      "learning_rate": 0.0005931164649326861,
      "loss": 3.056,
      "step": 15741
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.2640979290008545,
      "learning_rate": 0.000593115593666057,
      "loss": 3.1208,
      "step": 15742
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.3250539302825928,
      "learning_rate": 0.0005931147223449323,
      "loss": 3.0439,
      "step": 15743
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.4520809650421143,
      "learning_rate": 0.0005931138509693121,
      "loss": 2.964,
      "step": 15744
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6364705562591553,
      "learning_rate": 0.0005931129795391964,
      "loss": 3.1474,
      "step": 15745
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6244436502456665,
      "learning_rate": 0.0005931121080545855,
      "loss": 2.9447,
      "step": 15746
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4420021772384644,
      "learning_rate": 0.0005931112365154795,
      "loss": 3.1141,
      "step": 15747
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4721136093139648,
      "learning_rate": 0.0005931103649218786,
      "loss": 3.0327,
      "step": 15748
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.036738634109497,
      "learning_rate": 0.000593109493273783,
      "loss": 3.017,
      "step": 15749
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.499792218208313,
      "learning_rate": 0.0005931086215711927,
      "loss": 3.2727,
      "step": 15750
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7115744352340698,
      "learning_rate": 0.0005931077498141081,
      "loss": 2.9964,
      "step": 15751
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8664840459823608,
      "learning_rate": 0.0005931068780025291,
      "loss": 3.1064,
      "step": 15752
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4742863178253174,
      "learning_rate": 0.0005931060061364562,
      "loss": 2.8487,
      "step": 15753
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.3239245414733887,
      "learning_rate": 0.0005931051342158893,
      "loss": 3.1191,
      "step": 15754
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7160688638687134,
      "learning_rate": 0.0005931042622408285,
      "loss": 3.024,
      "step": 15755
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.2552740573883057,
      "learning_rate": 0.0005931033902112742,
      "loss": 3.205,
      "step": 15756
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.619866132736206,
      "learning_rate": 0.0005931025181272264,
      "loss": 3.0848,
      "step": 15757
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6434787511825562,
      "learning_rate": 0.0005931016459886852,
      "loss": 2.9908,
      "step": 15758
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0507473945617676,
      "learning_rate": 0.0005931007737956511,
      "loss": 2.9932,
      "step": 15759
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8088629245758057,
      "learning_rate": 0.0005930999015481238,
      "loss": 2.9455,
      "step": 15760
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3886505365371704,
      "learning_rate": 0.0005930990292461038,
      "loss": 3.2314,
      "step": 15761
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9971789121627808,
      "learning_rate": 0.0005930981568895912,
      "loss": 3.2301,
      "step": 15762
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.2803187370300293,
      "learning_rate": 0.0005930972844785861,
      "loss": 3.2471,
      "step": 15763
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6478705406188965,
      "learning_rate": 0.0005930964120130888,
      "loss": 2.8291,
      "step": 15764
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.628793954849243,
      "learning_rate": 0.0005930955394930991,
      "loss": 2.8986,
      "step": 15765
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.111957311630249,
      "learning_rate": 0.0005930946669186175,
      "loss": 3.2119,
      "step": 15766
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0514204502105713,
      "learning_rate": 0.0005930937942896441,
      "loss": 2.9164,
      "step": 15767
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6662883758544922,
      "learning_rate": 0.000593092921606179,
      "loss": 2.9698,
      "step": 15768
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7795567512512207,
      "learning_rate": 0.0005930920488682224,
      "loss": 3.0996,
      "step": 15769
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8016936779022217,
      "learning_rate": 0.0005930911760757745,
      "loss": 3.0882,
      "step": 15770
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.7624731063842773,
      "learning_rate": 0.0005930903032288354,
      "loss": 3.0267,
      "step": 15771
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9734067916870117,
      "learning_rate": 0.0005930894303274053,
      "loss": 3.2241,
      "step": 15772
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.7749903202056885,
      "learning_rate": 0.0005930885573714842,
      "loss": 3.1699,
      "step": 15773
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7728843688964844,
      "learning_rate": 0.0005930876843610725,
      "loss": 2.9585,
      "step": 15774
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9160418510437012,
      "learning_rate": 0.0005930868112961704,
      "loss": 3.197,
      "step": 15775
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0245540142059326,
      "learning_rate": 0.0005930859381767778,
      "loss": 3.0553,
      "step": 15776
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5309720039367676,
      "learning_rate": 0.000593085065002895,
      "loss": 3.1867,
      "step": 15777
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8042402267456055,
      "learning_rate": 0.0005930841917745222,
      "loss": 3.0213,
      "step": 15778
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6854462623596191,
      "learning_rate": 0.0005930833184916594,
      "loss": 3.1127,
      "step": 15779
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2873259782791138,
      "learning_rate": 0.000593082445154307,
      "loss": 3.1274,
      "step": 15780
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.286820411682129,
      "learning_rate": 0.0005930815717624649,
      "loss": 3.1466,
      "step": 15781
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.732862949371338,
      "learning_rate": 0.0005930806983161336,
      "loss": 3.4183,
      "step": 15782
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.345273494720459,
      "learning_rate": 0.0005930798248153129,
      "loss": 2.9398,
      "step": 15783
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.51645028591156,
      "learning_rate": 0.0005930789512600032,
      "loss": 3.2869,
      "step": 15784
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5991730690002441,
      "learning_rate": 0.0005930780776502045,
      "loss": 3.1609,
      "step": 15785
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3584184646606445,
      "learning_rate": 0.0005930772039859173,
      "loss": 3.1436,
      "step": 15786
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2510000467300415,
      "learning_rate": 0.0005930763302671412,
      "loss": 3.1536,
      "step": 15787
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5249117612838745,
      "learning_rate": 0.0005930754564938769,
      "loss": 3.1773,
      "step": 15788
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7635741233825684,
      "learning_rate": 0.0005930745826661243,
      "loss": 3.2756,
      "step": 15789
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4612354040145874,
      "learning_rate": 0.0005930737087838835,
      "loss": 3.1983,
      "step": 15790
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.80327570438385,
      "learning_rate": 0.0005930728348471549,
      "loss": 3.09,
      "step": 15791
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2981919050216675,
      "learning_rate": 0.0005930719608559384,
      "loss": 2.9584,
      "step": 15792
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6610430479049683,
      "learning_rate": 0.0005930710868102344,
      "loss": 3.2085,
      "step": 15793
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3992559909820557,
      "learning_rate": 0.0005930702127100428,
      "loss": 2.9295,
      "step": 15794
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4741441011428833,
      "learning_rate": 0.0005930693385553641,
      "loss": 3.1273,
      "step": 15795
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2785664796829224,
      "learning_rate": 0.0005930684643461982,
      "loss": 3.3162,
      "step": 15796
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5746049880981445,
      "learning_rate": 0.0005930675900825453,
      "loss": 3.1968,
      "step": 15797
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7805860042572021,
      "learning_rate": 0.0005930667157644057,
      "loss": 3.1957,
      "step": 15798
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6855394840240479,
      "learning_rate": 0.0005930658413917794,
      "loss": 3.1989,
      "step": 15799
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8073140382766724,
      "learning_rate": 0.0005930649669646667,
      "loss": 3.205,
      "step": 15800
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3435509204864502,
      "learning_rate": 0.0005930640924830676,
      "loss": 3.3327,
      "step": 15801
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4994901418685913,
      "learning_rate": 0.0005930632179469823,
      "loss": 3.227,
      "step": 15802
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.151601791381836,
      "learning_rate": 0.0005930623433564111,
      "loss": 3.055,
      "step": 15803
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5684322118759155,
      "learning_rate": 0.0005930614687113541,
      "loss": 3.1653,
      "step": 15804
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3992201089859009,
      "learning_rate": 0.0005930605940118114,
      "loss": 2.8354,
      "step": 15805
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9772447347640991,
      "learning_rate": 0.0005930597192577833,
      "loss": 3.1923,
      "step": 15806
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6180927753448486,
      "learning_rate": 0.0005930588444492698,
      "loss": 3.1357,
      "step": 15807
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.629460334777832,
      "learning_rate": 0.000593057969586271,
      "loss": 3.2131,
      "step": 15808
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4738601446151733,
      "learning_rate": 0.0005930570946687874,
      "loss": 3.3029,
      "step": 15809
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.9347996711730957,
      "learning_rate": 0.0005930562196968188,
      "loss": 3.0788,
      "step": 15810
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.561171054840088,
      "learning_rate": 0.0005930553446703656,
      "loss": 2.8551,
      "step": 15811
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.40861177444458,
      "learning_rate": 0.0005930544695894279,
      "loss": 2.8942,
      "step": 15812
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.27016282081604,
      "learning_rate": 0.0005930535944540057,
      "loss": 2.9267,
      "step": 15813
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.214052200317383,
      "learning_rate": 0.0005930527192640995,
      "loss": 3.0993,
      "step": 15814
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6019443273544312,
      "learning_rate": 0.0005930518440197091,
      "loss": 3.0642,
      "step": 15815
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8237156867980957,
      "learning_rate": 0.0005930509687208349,
      "loss": 3.2787,
      "step": 15816
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.3088393211364746,
      "learning_rate": 0.0005930500933674771,
      "loss": 2.9937,
      "step": 15817
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.217318296432495,
      "learning_rate": 0.0005930492179596356,
      "loss": 3.1224,
      "step": 15818
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.466018557548523,
      "learning_rate": 0.0005930483424973108,
      "loss": 3.0201,
      "step": 15819
    },
    {
      "epoch": 0.21,
      "grad_norm": 3.338646173477173,
      "learning_rate": 0.0005930474669805027,
      "loss": 3.0632,
      "step": 15820
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.502552032470703,
      "learning_rate": 0.0005930465914092116,
      "loss": 3.1397,
      "step": 15821
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5123000144958496,
      "learning_rate": 0.0005930457157834376,
      "loss": 3.2116,
      "step": 15822
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5877636671066284,
      "learning_rate": 0.0005930448401031809,
      "loss": 3.0668,
      "step": 15823
    },
    {
      "epoch": 0.21,
      "grad_norm": 4.074073791503906,
      "learning_rate": 0.0005930439643684416,
      "loss": 3.1508,
      "step": 15824
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.385962963104248,
      "learning_rate": 0.0005930430885792199,
      "loss": 3.278,
      "step": 15825
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3523348569869995,
      "learning_rate": 0.000593042212735516,
      "loss": 3.1061,
      "step": 15826
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2753194570541382,
      "learning_rate": 0.0005930413368373299,
      "loss": 3.2497,
      "step": 15827
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9463164806365967,
      "learning_rate": 0.0005930404608846619,
      "loss": 3.1415,
      "step": 15828
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.914154291152954,
      "learning_rate": 0.0005930395848775123,
      "loss": 3.2736,
      "step": 15829
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6793056726455688,
      "learning_rate": 0.0005930387088158809,
      "loss": 3.0566,
      "step": 15830
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9658201932907104,
      "learning_rate": 0.0005930378326997683,
      "loss": 3.1851,
      "step": 15831
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.404621124267578,
      "learning_rate": 0.0005930369565291742,
      "loss": 3.1094,
      "step": 15832
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5885430574417114,
      "learning_rate": 0.0005930360803040992,
      "loss": 3.0391,
      "step": 15833
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.223240852355957,
      "learning_rate": 0.0005930352040245432,
      "loss": 3.1742,
      "step": 15834
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6751397848129272,
      "learning_rate": 0.0005930343276905064,
      "loss": 2.9815,
      "step": 15835
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8764008283615112,
      "learning_rate": 0.000593033451301989,
      "loss": 2.9714,
      "step": 15836
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3687623739242554,
      "learning_rate": 0.0005930325748589912,
      "loss": 3.2385,
      "step": 15837
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9013458490371704,
      "learning_rate": 0.000593031698361513,
      "loss": 3.1758,
      "step": 15838
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7695406675338745,
      "learning_rate": 0.0005930308218095548,
      "loss": 3.1221,
      "step": 15839
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4304994344711304,
      "learning_rate": 0.0005930299452031165,
      "loss": 3.0229,
      "step": 15840
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.294368028640747,
      "learning_rate": 0.0005930290685421985,
      "loss": 3.162,
      "step": 15841
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5459684133529663,
      "learning_rate": 0.0005930281918268009,
      "loss": 3.4369,
      "step": 15842
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4159467220306396,
      "learning_rate": 0.0005930273150569237,
      "loss": 3.3943,
      "step": 15843
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4545893669128418,
      "learning_rate": 0.0005930264382325673,
      "loss": 2.82,
      "step": 15844
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5312539339065552,
      "learning_rate": 0.0005930255613537317,
      "loss": 3.3001,
      "step": 15845
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7569186687469482,
      "learning_rate": 0.0005930246844204172,
      "loss": 3.1492,
      "step": 15846
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3960919380187988,
      "learning_rate": 0.0005930238074326237,
      "loss": 3.0684,
      "step": 15847
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6567169427871704,
      "learning_rate": 0.0005930229303903518,
      "loss": 3.0733,
      "step": 15848
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3588963747024536,
      "learning_rate": 0.0005930220532936012,
      "loss": 3.2436,
      "step": 15849
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8596054315567017,
      "learning_rate": 0.0005930211761423724,
      "loss": 3.2808,
      "step": 15850
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4020159244537354,
      "learning_rate": 0.0005930202989366652,
      "loss": 3.3633,
      "step": 15851
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5784975290298462,
      "learning_rate": 0.0005930194216764802,
      "loss": 2.8557,
      "step": 15852
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6971086263656616,
      "learning_rate": 0.0005930185443618173,
      "loss": 3.0289,
      "step": 15853
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7428715229034424,
      "learning_rate": 0.0005930176669926767,
      "loss": 3.09,
      "step": 15854
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5913183689117432,
      "learning_rate": 0.0005930167895690587,
      "loss": 3.1431,
      "step": 15855
    },
    {
      "epoch": 0.21,
      "grad_norm": 3.427886486053467,
      "learning_rate": 0.0005930159120909633,
      "loss": 2.9769,
      "step": 15856
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.5292487144470215,
      "learning_rate": 0.0005930150345583907,
      "loss": 2.9734,
      "step": 15857
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7950942516326904,
      "learning_rate": 0.000593014156971341,
      "loss": 3.1168,
      "step": 15858
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.6614139080047607,
      "learning_rate": 0.0005930132793298145,
      "loss": 3.0645,
      "step": 15859
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7319002151489258,
      "learning_rate": 0.0005930124016338112,
      "loss": 3.0688,
      "step": 15860
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.563157081604004,
      "learning_rate": 0.0005930115238833315,
      "loss": 3.2602,
      "step": 15861
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6589372158050537,
      "learning_rate": 0.0005930106460783754,
      "loss": 2.9242,
      "step": 15862
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.749158263206482,
      "learning_rate": 0.0005930097682189431,
      "loss": 3.1083,
      "step": 15863
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.499961256980896,
      "learning_rate": 0.0005930088903050348,
      "loss": 2.9627,
      "step": 15864
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9738670587539673,
      "learning_rate": 0.0005930080123366505,
      "loss": 3.2164,
      "step": 15865
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6603416204452515,
      "learning_rate": 0.0005930071343137904,
      "loss": 2.9166,
      "step": 15866
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8441091775894165,
      "learning_rate": 0.000593006256236455,
      "loss": 3.1954,
      "step": 15867
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6338087320327759,
      "learning_rate": 0.000593005378104644,
      "loss": 3.1698,
      "step": 15868
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.111780881881714,
      "learning_rate": 0.0005930044999183579,
      "loss": 2.9486,
      "step": 15869
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.5526607036590576,
      "learning_rate": 0.0005930036216775966,
      "loss": 3.2669,
      "step": 15870
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.637338399887085,
      "learning_rate": 0.0005930027433823606,
      "loss": 3.1782,
      "step": 15871
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.4023983478546143,
      "learning_rate": 0.0005930018650326496,
      "loss": 3.246,
      "step": 15872
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.041074514389038,
      "learning_rate": 0.0005930009866284641,
      "loss": 3.2402,
      "step": 15873
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0506389141082764,
      "learning_rate": 0.0005930001081698043,
      "loss": 2.9091,
      "step": 15874
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9743599891662598,
      "learning_rate": 0.0005929992296566702,
      "loss": 3.0333,
      "step": 15875
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.859622836112976,
      "learning_rate": 0.000592998351089062,
      "loss": 2.9008,
      "step": 15876
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.6268012523651123,
      "learning_rate": 0.0005929974724669799,
      "loss": 3.269,
      "step": 15877
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.852810025215149,
      "learning_rate": 0.0005929965937904239,
      "loss": 3.1057,
      "step": 15878
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5520727634429932,
      "learning_rate": 0.0005929957150593945,
      "loss": 3.1857,
      "step": 15879
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8577722311019897,
      "learning_rate": 0.0005929948362738915,
      "loss": 3.181,
      "step": 15880
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6354854106903076,
      "learning_rate": 0.0005929939574339153,
      "loss": 3.3015,
      "step": 15881
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6557856798171997,
      "learning_rate": 0.000592993078539466,
      "loss": 3.2385,
      "step": 15882
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4009079933166504,
      "learning_rate": 0.0005929921995905438,
      "loss": 2.998,
      "step": 15883
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7341654300689697,
      "learning_rate": 0.0005929913205871488,
      "loss": 3.1026,
      "step": 15884
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.282057762145996,
      "learning_rate": 0.0005929904415292811,
      "loss": 2.9224,
      "step": 15885
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6232548952102661,
      "learning_rate": 0.0005929895624169411,
      "loss": 3.2195,
      "step": 15886
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5816209316253662,
      "learning_rate": 0.0005929886832501287,
      "loss": 2.9995,
      "step": 15887
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8205260038375854,
      "learning_rate": 0.0005929878040288442,
      "loss": 3.1328,
      "step": 15888
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.07165265083313,
      "learning_rate": 0.0005929869247530877,
      "loss": 3.0625,
      "step": 15889
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6809461116790771,
      "learning_rate": 0.0005929860454228595,
      "loss": 2.9602,
      "step": 15890
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5094373226165771,
      "learning_rate": 0.0005929851660381595,
      "loss": 2.956,
      "step": 15891
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7526835203170776,
      "learning_rate": 0.0005929842865989881,
      "loss": 3.1174,
      "step": 15892
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3124412298202515,
      "learning_rate": 0.0005929834071053454,
      "loss": 3.188,
      "step": 15893
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8073785305023193,
      "learning_rate": 0.0005929825275572316,
      "loss": 3.126,
      "step": 15894
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.351745367050171,
      "learning_rate": 0.0005929816479546468,
      "loss": 3.0308,
      "step": 15895
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4365426301956177,
      "learning_rate": 0.000592980768297591,
      "loss": 2.9681,
      "step": 15896
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5453155040740967,
      "learning_rate": 0.0005929798885860647,
      "loss": 3.0387,
      "step": 15897
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7663853168487549,
      "learning_rate": 0.000592979008820068,
      "loss": 3.1694,
      "step": 15898
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5490024089813232,
      "learning_rate": 0.0005929781289996009,
      "loss": 2.9397,
      "step": 15899
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8243483304977417,
      "learning_rate": 0.0005929772491246636,
      "loss": 3.207,
      "step": 15900
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6099673509597778,
      "learning_rate": 0.0005929763691952564,
      "loss": 3.1019,
      "step": 15901
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5107616186141968,
      "learning_rate": 0.0005929754892113792,
      "loss": 3.0782,
      "step": 15902
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9063823223114014,
      "learning_rate": 0.0005929746091730325,
      "loss": 3.2655,
      "step": 15903
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9822540283203125,
      "learning_rate": 0.0005929737290802161,
      "loss": 3.3326,
      "step": 15904
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7798497676849365,
      "learning_rate": 0.0005929728489329305,
      "loss": 3.069,
      "step": 15905
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4748191833496094,
      "learning_rate": 0.0005929719687311757,
      "loss": 2.9963,
      "step": 15906
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.2241995334625244,
      "learning_rate": 0.0005929710884749517,
      "loss": 3.1321,
      "step": 15907
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.729792833328247,
      "learning_rate": 0.0005929702081642591,
      "loss": 2.8752,
      "step": 15908
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6531641483306885,
      "learning_rate": 0.0005929693277990977,
      "loss": 3.2259,
      "step": 15909
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4141671657562256,
      "learning_rate": 0.0005929684473794678,
      "loss": 3.0267,
      "step": 15910
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2681821584701538,
      "learning_rate": 0.0005929675669053695,
      "loss": 3.3412,
      "step": 15911
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7359203100204468,
      "learning_rate": 0.0005929666863768029,
      "loss": 3.0035,
      "step": 15912
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.254863977432251,
      "learning_rate": 0.0005929658057937685,
      "loss": 2.9152,
      "step": 15913
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7613235712051392,
      "learning_rate": 0.0005929649251562661,
      "loss": 3.1167,
      "step": 15914
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.2339839935302734,
      "learning_rate": 0.0005929640444642959,
      "loss": 2.9072,
      "step": 15915
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8557254076004028,
      "learning_rate": 0.0005929631637178582,
      "loss": 3.1299,
      "step": 15916
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.2388832569122314,
      "learning_rate": 0.0005929622829169533,
      "loss": 2.9792,
      "step": 15917
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6260524988174438,
      "learning_rate": 0.000592961402061581,
      "loss": 3.1643,
      "step": 15918
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8607213497161865,
      "learning_rate": 0.0005929605211517417,
      "loss": 3.0207,
      "step": 15919
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.912997841835022,
      "learning_rate": 0.0005929596401874355,
      "loss": 3.6255,
      "step": 15920
    },
    {
      "epoch": 0.21,
      "grad_norm": 3.4668705463409424,
      "learning_rate": 0.0005929587591686625,
      "loss": 2.9667,
      "step": 15921
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.504156231880188,
      "learning_rate": 0.0005929578780954231,
      "loss": 3.3405,
      "step": 15922
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.5043954849243164,
      "learning_rate": 0.0005929569969677171,
      "loss": 2.9823,
      "step": 15923
    },
    {
      "epoch": 0.21,
      "grad_norm": 3.83320951461792,
      "learning_rate": 0.0005929561157855451,
      "loss": 3.1181,
      "step": 15924
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.08152437210083,
      "learning_rate": 0.0005929552345489068,
      "loss": 2.9578,
      "step": 15925
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6176177263259888,
      "learning_rate": 0.0005929543532578027,
      "loss": 2.9211,
      "step": 15926
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.2592225074768066,
      "learning_rate": 0.0005929534719122328,
      "loss": 3.1436,
      "step": 15927
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4974876642227173,
      "learning_rate": 0.0005929525905121974,
      "loss": 3.4356,
      "step": 15928
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5005251169204712,
      "learning_rate": 0.0005929517090576965,
      "loss": 2.9092,
      "step": 15929
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8171327114105225,
      "learning_rate": 0.0005929508275487305,
      "loss": 3.0946,
      "step": 15930
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5163865089416504,
      "learning_rate": 0.0005929499459852993,
      "loss": 3.1923,
      "step": 15931
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3132983446121216,
      "learning_rate": 0.0005929490643674032,
      "loss": 2.9233,
      "step": 15932
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5681300163269043,
      "learning_rate": 0.0005929481826950423,
      "loss": 3.2415,
      "step": 15933
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.937976598739624,
      "learning_rate": 0.0005929473009682167,
      "loss": 3.1054,
      "step": 15934
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0531492233276367,
      "learning_rate": 0.0005929464191869268,
      "loss": 3.2462,
      "step": 15935
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.308953046798706,
      "learning_rate": 0.0005929455373511727,
      "loss": 2.9045,
      "step": 15936
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.702866315841675,
      "learning_rate": 0.0005929446554609544,
      "loss": 2.8702,
      "step": 15937
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0537109375,
      "learning_rate": 0.0005929437735162722,
      "loss": 2.9474,
      "step": 15938
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8545660972595215,
      "learning_rate": 0.0005929428915171262,
      "loss": 3.1454,
      "step": 15939
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.2604637145996094,
      "learning_rate": 0.0005929420094635166,
      "loss": 3.0257,
      "step": 15940
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.5338499546051025,
      "learning_rate": 0.0005929411273554435,
      "loss": 2.9551,
      "step": 15941
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4392991065979004,
      "learning_rate": 0.0005929402451929071,
      "loss": 3.0766,
      "step": 15942
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5163631439208984,
      "learning_rate": 0.0005929393629759077,
      "loss": 3.0392,
      "step": 15943
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9515724182128906,
      "learning_rate": 0.0005929384807044452,
      "loss": 3.204,
      "step": 15944
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.263604998588562,
      "learning_rate": 0.00059293759837852,
      "loss": 2.9532,
      "step": 15945
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5597927570343018,
      "learning_rate": 0.0005929367159981322,
      "loss": 3.0588,
      "step": 15946
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.185263156890869,
      "learning_rate": 0.0005929358335632819,
      "loss": 3.1888,
      "step": 15947
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6199510097503662,
      "learning_rate": 0.0005929349510739693,
      "loss": 3.3428,
      "step": 15948
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.129042148590088,
      "learning_rate": 0.0005929340685301945,
      "loss": 3.013,
      "step": 15949
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9090285301208496,
      "learning_rate": 0.0005929331859319578,
      "loss": 3.3469,
      "step": 15950
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8467172384262085,
      "learning_rate": 0.0005929323032792592,
      "loss": 3.1789,
      "step": 15951
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.735962152481079,
      "learning_rate": 0.0005929314205720991,
      "loss": 3.1309,
      "step": 15952
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.2655346393585205,
      "learning_rate": 0.0005929305378104774,
      "loss": 3.1353,
      "step": 15953
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4235516786575317,
      "learning_rate": 0.0005929296549943944,
      "loss": 3.2139,
      "step": 15954
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5857850313186646,
      "learning_rate": 0.0005929287721238503,
      "loss": 3.1298,
      "step": 15955
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4867775440216064,
      "learning_rate": 0.0005929278891988452,
      "loss": 3.1323,
      "step": 15956
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5686986446380615,
      "learning_rate": 0.0005929270062193792,
      "loss": 3.0401,
      "step": 15957
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.075251817703247,
      "learning_rate": 0.0005929261231854526,
      "loss": 3.1242,
      "step": 15958
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9375699758529663,
      "learning_rate": 0.0005929252400970655,
      "loss": 3.1086,
      "step": 15959
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.425675868988037,
      "learning_rate": 0.0005929243569542181,
      "loss": 3.2218,
      "step": 15960
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8022494316101074,
      "learning_rate": 0.0005929234737569105,
      "loss": 3.0051,
      "step": 15961
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.846709966659546,
      "learning_rate": 0.0005929225905051429,
      "loss": 3.0093,
      "step": 15962
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.552219271659851,
      "learning_rate": 0.0005929217071989155,
      "loss": 3.1293,
      "step": 15963
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5920404195785522,
      "learning_rate": 0.0005929208238382284,
      "loss": 2.9844,
      "step": 15964
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4255837202072144,
      "learning_rate": 0.0005929199404230818,
      "loss": 2.9541,
      "step": 15965
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.148934841156006,
      "learning_rate": 0.0005929190569534757,
      "loss": 3.3557,
      "step": 15966
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.599906086921692,
      "learning_rate": 0.0005929181734294106,
      "loss": 3.2443,
      "step": 15967
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0739457607269287,
      "learning_rate": 0.0005929172898508864,
      "loss": 3.1094,
      "step": 15968
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7464147806167603,
      "learning_rate": 0.0005929164062179034,
      "loss": 3.2095,
      "step": 15969
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5137302875518799,
      "learning_rate": 0.0005929155225304618,
      "loss": 3.0599,
      "step": 15970
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8957364559173584,
      "learning_rate": 0.0005929146387885615,
      "loss": 3.3239,
      "step": 15971
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4380747079849243,
      "learning_rate": 0.0005929137549922029,
      "loss": 2.9858,
      "step": 15972
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.51481294631958,
      "learning_rate": 0.0005929128711413862,
      "loss": 3.1046,
      "step": 15973
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5665377378463745,
      "learning_rate": 0.0005929119872361112,
      "loss": 2.9256,
      "step": 15974
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0872058868408203,
      "learning_rate": 0.0005929111032763786,
      "loss": 2.9063,
      "step": 15975
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.636362910270691,
      "learning_rate": 0.0005929102192621881,
      "loss": 3.211,
      "step": 15976
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5826399326324463,
      "learning_rate": 0.0005929093351935402,
      "loss": 3.2811,
      "step": 15977
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3483190536499023,
      "learning_rate": 0.000592908451070435,
      "loss": 3.0906,
      "step": 15978
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9146718978881836,
      "learning_rate": 0.0005929075668928724,
      "loss": 3.1044,
      "step": 15979
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.1619789600372314,
      "learning_rate": 0.0005929066826608529,
      "loss": 2.9466,
      "step": 15980
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6581584215164185,
      "learning_rate": 0.0005929057983743764,
      "loss": 3.3004,
      "step": 15981
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.681557297706604,
      "learning_rate": 0.0005929049140334432,
      "loss": 3.2048,
      "step": 15982
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.7621283531188965,
      "learning_rate": 0.0005929040296380535,
      "loss": 3.0253,
      "step": 15983
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3739240169525146,
      "learning_rate": 0.0005929031451882074,
      "loss": 3.0371,
      "step": 15984
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4772619009017944,
      "learning_rate": 0.0005929022606839051,
      "loss": 3.2014,
      "step": 15985
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8254235982894897,
      "learning_rate": 0.0005929013761251467,
      "loss": 3.1414,
      "step": 15986
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.1803297996520996,
      "learning_rate": 0.0005929004915119324,
      "loss": 3.1905,
      "step": 15987
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6305406093597412,
      "learning_rate": 0.0005928996068442624,
      "loss": 3.0581,
      "step": 15988
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.690467357635498,
      "learning_rate": 0.0005928987221221368,
      "loss": 2.9624,
      "step": 15989
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.917419672012329,
      "learning_rate": 0.0005928978373455558,
      "loss": 3.2194,
      "step": 15990
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2038216590881348,
      "learning_rate": 0.0005928969525145196,
      "loss": 3.2968,
      "step": 15991
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.1204090118408203,
      "learning_rate": 0.0005928960676290283,
      "loss": 3.1648,
      "step": 15992
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5075594186782837,
      "learning_rate": 0.0005928951826890821,
      "loss": 3.3653,
      "step": 15993
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4826689958572388,
      "learning_rate": 0.0005928942976946812,
      "loss": 3.1308,
      "step": 15994
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.564496397972107,
      "learning_rate": 0.0005928934126458256,
      "loss": 2.9322,
      "step": 15995
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5682487487792969,
      "learning_rate": 0.0005928925275425158,
      "loss": 2.8876,
      "step": 15996
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3904201984405518,
      "learning_rate": 0.0005928916423847515,
      "loss": 3.186,
      "step": 15997
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.248958706855774,
      "learning_rate": 0.0005928907571725333,
      "loss": 2.9615,
      "step": 15998
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.3110222816467285,
      "learning_rate": 0.0005928898719058611,
      "loss": 3.1449,
      "step": 15999
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8162027597427368,
      "learning_rate": 0.0005928889865847352,
      "loss": 3.0858,
      "step": 16000
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.254119396209717,
      "learning_rate": 0.0005928881012091556,
      "loss": 3.2411,
      "step": 16001
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.288161277770996,
      "learning_rate": 0.0005928872157791227,
      "loss": 3.0614,
      "step": 16002
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7524667978286743,
      "learning_rate": 0.0005928863302946364,
      "loss": 3.0038,
      "step": 16003
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.716949224472046,
      "learning_rate": 0.0005928854447556971,
      "loss": 3.1581,
      "step": 16004
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8908770084381104,
      "learning_rate": 0.0005928845591623049,
      "loss": 3.2153,
      "step": 16005
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.8404133319854736,
      "learning_rate": 0.0005928836735144599,
      "loss": 3.0222,
      "step": 16006
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6118046045303345,
      "learning_rate": 0.0005928827878121623,
      "loss": 3.2063,
      "step": 16007
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.9867513179779053,
      "learning_rate": 0.0005928819020554122,
      "loss": 3.0277,
      "step": 16008
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.9455456733703613,
      "learning_rate": 0.0005928810162442098,
      "loss": 3.2119,
      "step": 16009
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.040980339050293,
      "learning_rate": 0.0005928801303785553,
      "loss": 3.3013,
      "step": 16010
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5542688369750977,
      "learning_rate": 0.000592879244458449,
      "loss": 3.0828,
      "step": 16011
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.209137201309204,
      "learning_rate": 0.0005928783584838907,
      "loss": 2.8229,
      "step": 16012
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8337059020996094,
      "learning_rate": 0.000592877472454881,
      "loss": 2.8359,
      "step": 16013
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.28463876247406,
      "learning_rate": 0.0005928765863714198,
      "loss": 3.4279,
      "step": 16014
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3149523735046387,
      "learning_rate": 0.0005928757002335072,
      "loss": 3.1235,
      "step": 16015
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.041948080062866,
      "learning_rate": 0.0005928748140411436,
      "loss": 3.0885,
      "step": 16016
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.530340313911438,
      "learning_rate": 0.0005928739277943289,
      "loss": 3.1797,
      "step": 16017
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5211360454559326,
      "learning_rate": 0.0005928730414930636,
      "loss": 3.1159,
      "step": 16018
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.3544557094573975,
      "learning_rate": 0.0005928721551373476,
      "loss": 3.1385,
      "step": 16019
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.911487102508545,
      "learning_rate": 0.0005928712687271812,
      "loss": 3.0603,
      "step": 16020
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.840691328048706,
      "learning_rate": 0.0005928703822625644,
      "loss": 2.894,
      "step": 16021
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5316343307495117,
      "learning_rate": 0.0005928694957434975,
      "loss": 2.9726,
      "step": 16022
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0876410007476807,
      "learning_rate": 0.0005928686091699806,
      "loss": 3.4821,
      "step": 16023
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8134219646453857,
      "learning_rate": 0.0005928677225420139,
      "loss": 3.1315,
      "step": 16024
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3565973043441772,
      "learning_rate": 0.0005928668358595975,
      "loss": 3.4473,
      "step": 16025
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.580275535583496,
      "learning_rate": 0.0005928659491227317,
      "loss": 3.0311,
      "step": 16026
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6282724142074585,
      "learning_rate": 0.0005928650623314166,
      "loss": 3.4042,
      "step": 16027
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.225208044052124,
      "learning_rate": 0.0005928641754856524,
      "loss": 3.0075,
      "step": 16028
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6136703491210938,
      "learning_rate": 0.0005928632885854392,
      "loss": 3.1863,
      "step": 16029
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0389978885650635,
      "learning_rate": 0.0005928624016307771,
      "loss": 3.1024,
      "step": 16030
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.113837718963623,
      "learning_rate": 0.0005928615146216664,
      "loss": 3.3381,
      "step": 16031
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.544926643371582,
      "learning_rate": 0.0005928606275581073,
      "loss": 3.0972,
      "step": 16032
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4031652212142944,
      "learning_rate": 0.0005928597404400998,
      "loss": 3.001,
      "step": 16033
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.1623194217681885,
      "learning_rate": 0.0005928588532676441,
      "loss": 3.1072,
      "step": 16034
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.2660071849823,
      "learning_rate": 0.0005928579660407405,
      "loss": 3.0736,
      "step": 16035
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.585572600364685,
      "learning_rate": 0.0005928570787593891,
      "loss": 3.2643,
      "step": 16036
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4662479162216187,
      "learning_rate": 0.00059285619142359,
      "loss": 3.0356,
      "step": 16037
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2416316270828247,
      "learning_rate": 0.0005928553040333433,
      "loss": 3.2351,
      "step": 16038
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4433468580245972,
      "learning_rate": 0.0005928544165886494,
      "loss": 3.1266,
      "step": 16039
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4418822526931763,
      "learning_rate": 0.0005928535290895083,
      "loss": 3.2674,
      "step": 16040
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3647286891937256,
      "learning_rate": 0.0005928526415359202,
      "loss": 3.452,
      "step": 16041
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6486413478851318,
      "learning_rate": 0.0005928517539278852,
      "loss": 2.895,
      "step": 16042
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7507351636886597,
      "learning_rate": 0.0005928508662654037,
      "loss": 2.9797,
      "step": 16043
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3533729314804077,
      "learning_rate": 0.0005928499785484755,
      "loss": 2.9524,
      "step": 16044
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3053297996520996,
      "learning_rate": 0.0005928490907771011,
      "loss": 3.0381,
      "step": 16045
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.341183066368103,
      "learning_rate": 0.0005928482029512804,
      "loss": 3.2023,
      "step": 16046
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7486180067062378,
      "learning_rate": 0.0005928473150710138,
      "loss": 3.0933,
      "step": 16047
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.740684151649475,
      "learning_rate": 0.0005928464271363013,
      "loss": 3.202,
      "step": 16048
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0719287395477295,
      "learning_rate": 0.0005928455391471432,
      "loss": 3.2775,
      "step": 16049
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8046613931655884,
      "learning_rate": 0.0005928446511035396,
      "loss": 3.2577,
      "step": 16050
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8596444129943848,
      "learning_rate": 0.0005928437630054905,
      "loss": 3.0949,
      "step": 16051
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5773333311080933,
      "learning_rate": 0.0005928428748529964,
      "loss": 3.1389,
      "step": 16052
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5122557878494263,
      "learning_rate": 0.0005928419866460572,
      "loss": 3.1579,
      "step": 16053
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6739212274551392,
      "learning_rate": 0.000592841098384673,
      "loss": 3.1548,
      "step": 16054
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.050065279006958,
      "learning_rate": 0.0005928402100688442,
      "loss": 2.9639,
      "step": 16055
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6907497644424438,
      "learning_rate": 0.0005928393216985711,
      "loss": 2.9183,
      "step": 16056
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.711857557296753,
      "learning_rate": 0.0005928384332738534,
      "loss": 3.0046,
      "step": 16057
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4778498411178589,
      "learning_rate": 0.0005928375447946916,
      "loss": 2.9839,
      "step": 16058
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3033145666122437,
      "learning_rate": 0.0005928366562610856,
      "loss": 3.3181,
      "step": 16059
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6288491487503052,
      "learning_rate": 0.0005928357676730359,
      "loss": 3.0904,
      "step": 16060
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.55734121799469,
      "learning_rate": 0.0005928348790305426,
      "loss": 3.1829,
      "step": 16061
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.46367347240448,
      "learning_rate": 0.0005928339903336056,
      "loss": 3.5295,
      "step": 16062
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7693780660629272,
      "learning_rate": 0.0005928331015822253,
      "loss": 3.0963,
      "step": 16063
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.636594533920288,
      "learning_rate": 0.0005928322127764018,
      "loss": 3.0411,
      "step": 16064
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5210368633270264,
      "learning_rate": 0.0005928313239161353,
      "loss": 2.9084,
      "step": 16065
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6146240234375,
      "learning_rate": 0.0005928304350014259,
      "loss": 2.7922,
      "step": 16066
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7184765338897705,
      "learning_rate": 0.0005928295460322737,
      "loss": 3.1151,
      "step": 16067
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.480058193206787,
      "learning_rate": 0.000592828657008679,
      "loss": 2.9667,
      "step": 16068
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2525264024734497,
      "learning_rate": 0.000592827767930642,
      "loss": 3.2436,
      "step": 16069
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6946909427642822,
      "learning_rate": 0.0005928268787981628,
      "loss": 3.3035,
      "step": 16070
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.377258062362671,
      "learning_rate": 0.0005928259896112414,
      "loss": 3.0137,
      "step": 16071
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.856141209602356,
      "learning_rate": 0.0005928251003698783,
      "loss": 3.1238,
      "step": 16072
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9032479524612427,
      "learning_rate": 0.0005928242110740734,
      "loss": 3.2463,
      "step": 16073
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0246410369873047,
      "learning_rate": 0.0005928233217238269,
      "loss": 3.2475,
      "step": 16074
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6991338729858398,
      "learning_rate": 0.0005928224323191392,
      "loss": 3.2921,
      "step": 16075
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4451863765716553,
      "learning_rate": 0.00059282154286001,
      "loss": 3.1127,
      "step": 16076
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.502961277961731,
      "learning_rate": 0.00059282065334644,
      "loss": 2.8827,
      "step": 16077
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1740919351577759,
      "learning_rate": 0.0005928197637784289,
      "loss": 3.0357,
      "step": 16078
    },
    {
      "epoch": 0.21,
      "grad_norm": 3.0545973777770996,
      "learning_rate": 0.0005928188741559773,
      "loss": 3.0575,
      "step": 16079
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.3227033615112305,
      "learning_rate": 0.000592817984479085,
      "loss": 3.2876,
      "step": 16080
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5328116416931152,
      "learning_rate": 0.0005928170947477523,
      "loss": 3.3694,
      "step": 16081
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.528179407119751,
      "learning_rate": 0.0005928162049619795,
      "loss": 3.305,
      "step": 16082
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7696846723556519,
      "learning_rate": 0.0005928153151217666,
      "loss": 3.1763,
      "step": 16083
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2356226444244385,
      "learning_rate": 0.0005928144252271137,
      "loss": 2.9299,
      "step": 16084
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2507606744766235,
      "learning_rate": 0.0005928135352780212,
      "loss": 2.8821,
      "step": 16085
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5659037828445435,
      "learning_rate": 0.0005928126452744891,
      "loss": 3.0116,
      "step": 16086
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3889342546463013,
      "learning_rate": 0.0005928117552165175,
      "loss": 3.2369,
      "step": 16087
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7340797185897827,
      "learning_rate": 0.0005928108651041068,
      "loss": 3.1242,
      "step": 16088
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3178491592407227,
      "learning_rate": 0.000592809974937257,
      "loss": 3.2544,
      "step": 16089
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6652885675430298,
      "learning_rate": 0.0005928090847159682,
      "loss": 2.9433,
      "step": 16090
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6857260465621948,
      "learning_rate": 0.0005928081944402408,
      "loss": 2.9989,
      "step": 16091
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3408440351486206,
      "learning_rate": 0.0005928073041100748,
      "loss": 2.9245,
      "step": 16092
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2859224081039429,
      "learning_rate": 0.0005928064137254704,
      "loss": 3.1968,
      "step": 16093
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.235527753829956,
      "learning_rate": 0.0005928055232864277,
      "loss": 3.079,
      "step": 16094
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6780897378921509,
      "learning_rate": 0.0005928046327929469,
      "loss": 3.0404,
      "step": 16095
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.748308777809143,
      "learning_rate": 0.0005928037422450284,
      "loss": 3.2605,
      "step": 16096
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9170318841934204,
      "learning_rate": 0.000592802851642672,
      "loss": 3.0673,
      "step": 16097
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.328239917755127,
      "learning_rate": 0.0005928019609858779,
      "loss": 3.1124,
      "step": 16098
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3854707479476929,
      "learning_rate": 0.0005928010702746466,
      "loss": 3.2257,
      "step": 16099
    },
    {
      "epoch": 0.21,
      "grad_norm": 3.762516736984253,
      "learning_rate": 0.0005928001795089779,
      "loss": 3.2562,
      "step": 16100
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.8276712894439697,
      "learning_rate": 0.0005927992886888723,
      "loss": 3.1015,
      "step": 16101
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0952882766723633,
      "learning_rate": 0.0005927983978143296,
      "loss": 2.9112,
      "step": 16102
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.3038454055786133,
      "learning_rate": 0.0005927975068853503,
      "loss": 3.0907,
      "step": 16103
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.353347063064575,
      "learning_rate": 0.0005927966159019343,
      "loss": 2.9721,
      "step": 16104
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.9995479583740234,
      "learning_rate": 0.0005927957248640819,
      "loss": 3.1865,
      "step": 16105
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9143447875976562,
      "learning_rate": 0.0005927948337717933,
      "loss": 3.1335,
      "step": 16106
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7237522602081299,
      "learning_rate": 0.0005927939426250686,
      "loss": 3.0379,
      "step": 16107
    },
    {
      "epoch": 0.21,
      "grad_norm": 4.008786678314209,
      "learning_rate": 0.000592793051423908,
      "loss": 3.0019,
      "step": 16108
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.451443672180176,
      "learning_rate": 0.0005927921601683115,
      "loss": 3.3023,
      "step": 16109
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4386261701583862,
      "learning_rate": 0.0005927912688582796,
      "loss": 3.0914,
      "step": 16110
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.600748062133789,
      "learning_rate": 0.0005927903774938122,
      "loss": 3.1691,
      "step": 16111
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0520944595336914,
      "learning_rate": 0.0005927894860749095,
      "loss": 3.026,
      "step": 16112
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.818544626235962,
      "learning_rate": 0.0005927885946015717,
      "loss": 3.4053,
      "step": 16113
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5774070024490356,
      "learning_rate": 0.000592787703073799,
      "loss": 3.1709,
      "step": 16114
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.715369701385498,
      "learning_rate": 0.0005927868114915917,
      "loss": 3.0219,
      "step": 16115
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.813490390777588,
      "learning_rate": 0.0005927859198549496,
      "loss": 3.14,
      "step": 16116
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.258812189102173,
      "learning_rate": 0.0005927850281638731,
      "loss": 3.3019,
      "step": 16117
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4720969200134277,
      "learning_rate": 0.0005927841364183625,
      "loss": 3.277,
      "step": 16118
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.873899221420288,
      "learning_rate": 0.0005927832446184176,
      "loss": 3.2386,
      "step": 16119
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8662010431289673,
      "learning_rate": 0.0005927823527640388,
      "loss": 3.1836,
      "step": 16120
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3777618408203125,
      "learning_rate": 0.0005927814608552263,
      "loss": 3.2701,
      "step": 16121
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6073765754699707,
      "learning_rate": 0.0005927805688919802,
      "loss": 3.2424,
      "step": 16122
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.704373836517334,
      "learning_rate": 0.0005927796768743006,
      "loss": 3.2155,
      "step": 16123
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.147423028945923,
      "learning_rate": 0.0005927787848021877,
      "loss": 3.1817,
      "step": 16124
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.502885103225708,
      "learning_rate": 0.0005927778926756419,
      "loss": 3.0792,
      "step": 16125
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5078809261322021,
      "learning_rate": 0.0005927770004946631,
      "loss": 3.1955,
      "step": 16126
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.095027446746826,
      "learning_rate": 0.0005927761082592513,
      "loss": 2.9839,
      "step": 16127
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5578733682632446,
      "learning_rate": 0.0005927752159694071,
      "loss": 3.1628,
      "step": 16128
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.634318470954895,
      "learning_rate": 0.0005927743236251305,
      "loss": 3.2666,
      "step": 16129
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4047857522964478,
      "learning_rate": 0.0005927734312264214,
      "loss": 2.8806,
      "step": 16130
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.9489099979400635,
      "learning_rate": 0.0005927725387732804,
      "loss": 3.0368,
      "step": 16131
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.551196813583374,
      "learning_rate": 0.0005927716462657074,
      "loss": 3.1532,
      "step": 16132
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7017076015472412,
      "learning_rate": 0.0005927707537037027,
      "loss": 3.0071,
      "step": 16133
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.1922593116760254,
      "learning_rate": 0.0005927698610872662,
      "loss": 3.3113,
      "step": 16134
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.474259614944458,
      "learning_rate": 0.0005927689684163984,
      "loss": 3.5434,
      "step": 16135
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.114370346069336,
      "learning_rate": 0.0005927680756910992,
      "loss": 3.0282,
      "step": 16136
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7105509042739868,
      "learning_rate": 0.0005927671829113691,
      "loss": 3.0202,
      "step": 16137
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9766358137130737,
      "learning_rate": 0.0005927662900772079,
      "loss": 3.2346,
      "step": 16138
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.681147575378418,
      "learning_rate": 0.000592765397188616,
      "loss": 3.3841,
      "step": 16139
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.438947916030884,
      "learning_rate": 0.0005927645042455934,
      "loss": 3.2049,
      "step": 16140
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.619197964668274,
      "learning_rate": 0.0005927636112481404,
      "loss": 3.0187,
      "step": 16141
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6685084104537964,
      "learning_rate": 0.0005927627181962571,
      "loss": 3.334,
      "step": 16142
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.660388946533203,
      "learning_rate": 0.0005927618250899437,
      "loss": 2.8931,
      "step": 16143
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.8280489444732666,
      "learning_rate": 0.0005927609319292004,
      "loss": 3.1502,
      "step": 16144
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.6271820068359375,
      "learning_rate": 0.0005927600387140272,
      "loss": 3.3161,
      "step": 16145
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.7768664360046387,
      "learning_rate": 0.0005927591454444244,
      "loss": 2.956,
      "step": 16146
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.890502691268921,
      "learning_rate": 0.0005927582521203923,
      "loss": 3.4479,
      "step": 16147
    },
    {
      "epoch": 0.21,
      "grad_norm": 4.555367469787598,
      "learning_rate": 0.0005927573587419308,
      "loss": 3.0933,
      "step": 16148
    },
    {
      "epoch": 0.21,
      "grad_norm": 3.0378317832946777,
      "learning_rate": 0.0005927564653090402,
      "loss": 3.2469,
      "step": 16149
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5719810724258423,
      "learning_rate": 0.0005927555718217205,
      "loss": 3.3326,
      "step": 16150
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.2136240005493164,
      "learning_rate": 0.0005927546782799723,
      "loss": 3.3204,
      "step": 16151
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0713295936584473,
      "learning_rate": 0.0005927537846837953,
      "loss": 3.1451,
      "step": 16152
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3631809949874878,
      "learning_rate": 0.0005927528910331898,
      "loss": 3.2264,
      "step": 16153
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6521767377853394,
      "learning_rate": 0.0005927519973281561,
      "loss": 3.2402,
      "step": 16154
    },
    {
      "epoch": 0.21,
      "grad_norm": 3.0907700061798096,
      "learning_rate": 0.0005927511035686943,
      "loss": 2.9966,
      "step": 16155
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4087448120117188,
      "learning_rate": 0.0005927502097548045,
      "loss": 3.0282,
      "step": 16156
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.123800039291382,
      "learning_rate": 0.0005927493158864868,
      "loss": 3.1153,
      "step": 16157
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8587371110916138,
      "learning_rate": 0.0005927484219637417,
      "loss": 3.2928,
      "step": 16158
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4888423681259155,
      "learning_rate": 0.000592747527986569,
      "loss": 3.077,
      "step": 16159
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8781037330627441,
      "learning_rate": 0.000592746633954969,
      "loss": 3.1159,
      "step": 16160
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4078447818756104,
      "learning_rate": 0.0005927457398689419,
      "loss": 3.2368,
      "step": 16161
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5062960386276245,
      "learning_rate": 0.0005927448457284879,
      "loss": 3.0924,
      "step": 16162
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3428664207458496,
      "learning_rate": 0.0005927439515336071,
      "loss": 3.1889,
      "step": 16163
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.017204761505127,
      "learning_rate": 0.0005927430572842997,
      "loss": 3.1485,
      "step": 16164
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4037891626358032,
      "learning_rate": 0.0005927421629805656,
      "loss": 2.8976,
      "step": 16165
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7339508533477783,
      "learning_rate": 0.0005927412686224055,
      "loss": 2.9387,
      "step": 16166
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6529923677444458,
      "learning_rate": 0.0005927403742098192,
      "loss": 3.0329,
      "step": 16167
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4501228332519531,
      "learning_rate": 0.0005927394797428068,
      "loss": 3.0893,
      "step": 16168
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5580902099609375,
      "learning_rate": 0.0005927385852213687,
      "loss": 3.1714,
      "step": 16169
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.591035842895508,
      "learning_rate": 0.000592737690645505,
      "loss": 3.3488,
      "step": 16170
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4933700561523438,
      "learning_rate": 0.0005927367960152158,
      "loss": 3.105,
      "step": 16171
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5335712432861328,
      "learning_rate": 0.0005927359013305014,
      "loss": 3.0135,
      "step": 16172
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.564877510070801,
      "learning_rate": 0.0005927350065913617,
      "loss": 3.0205,
      "step": 16173
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6494044065475464,
      "learning_rate": 0.0005927341117977972,
      "loss": 3.0202,
      "step": 16174
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.614920735359192,
      "learning_rate": 0.0005927332169498077,
      "loss": 3.2972,
      "step": 16175
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5814783573150635,
      "learning_rate": 0.0005927323220473937,
      "loss": 3.3866,
      "step": 16176
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5324629545211792,
      "learning_rate": 0.0005927314270905552,
      "loss": 3.1403,
      "step": 16177
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.594778299331665,
      "learning_rate": 0.0005927305320792925,
      "loss": 3.1149,
      "step": 16178
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0968353748321533,
      "learning_rate": 0.0005927296370136056,
      "loss": 3.1162,
      "step": 16179
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6676585674285889,
      "learning_rate": 0.0005927287418934947,
      "loss": 3.0712,
      "step": 16180
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.664270281791687,
      "learning_rate": 0.0005927278467189601,
      "loss": 3.0993,
      "step": 16181
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5881903171539307,
      "learning_rate": 0.0005927269514900017,
      "loss": 3.1119,
      "step": 16182
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.3938632011413574,
      "learning_rate": 0.00059272605620662,
      "loss": 3.1476,
      "step": 16183
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3001400232315063,
      "learning_rate": 0.0005927251608688149,
      "loss": 2.9618,
      "step": 16184
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.7736523151397705,
      "learning_rate": 0.0005927242654765867,
      "loss": 3.1589,
      "step": 16185
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.2498226165771484,
      "learning_rate": 0.0005927233700299356,
      "loss": 3.1365,
      "step": 16186
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.502650499343872,
      "learning_rate": 0.0005927224745288617,
      "loss": 3.0415,
      "step": 16187
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8818849325180054,
      "learning_rate": 0.000592721578973365,
      "loss": 2.9365,
      "step": 16188
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5714694261550903,
      "learning_rate": 0.0005927206833634459,
      "loss": 2.9572,
      "step": 16189
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4903336763381958,
      "learning_rate": 0.0005927197876991045,
      "loss": 3.0464,
      "step": 16190
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3799008131027222,
      "learning_rate": 0.000592718891980341,
      "loss": 3.2216,
      "step": 16191
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6188437938690186,
      "learning_rate": 0.0005927179962071556,
      "loss": 3.1718,
      "step": 16192
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0960514545440674,
      "learning_rate": 0.0005927171003795483,
      "loss": 3.0385,
      "step": 16193
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4892311096191406,
      "learning_rate": 0.0005927162044975194,
      "loss": 3.1575,
      "step": 16194
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8187098503112793,
      "learning_rate": 0.000592715308561069,
      "loss": 3.0525,
      "step": 16195
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6066570281982422,
      "learning_rate": 0.0005927144125701973,
      "loss": 3.1702,
      "step": 16196
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5067880153656006,
      "learning_rate": 0.0005927135165249046,
      "loss": 2.9091,
      "step": 16197
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.4518535137176514,
      "learning_rate": 0.0005927126204251908,
      "loss": 3.0439,
      "step": 16198
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6932947635650635,
      "learning_rate": 0.0005927117242710562,
      "loss": 2.9751,
      "step": 16199
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4161237478256226,
      "learning_rate": 0.000592710828062501,
      "loss": 3.2293,
      "step": 16200
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.939107060432434,
      "learning_rate": 0.0005927099317995253,
      "loss": 3.172,
      "step": 16201
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4054549932479858,
      "learning_rate": 0.0005927090354821294,
      "loss": 3.128,
      "step": 16202
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3844445943832397,
      "learning_rate": 0.0005927081391103132,
      "loss": 3.1203,
      "step": 16203
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.739844560623169,
      "learning_rate": 0.0005927072426840771,
      "loss": 3.2861,
      "step": 16204
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5281703472137451,
      "learning_rate": 0.0005927063462034212,
      "loss": 3.0902,
      "step": 16205
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8712867498397827,
      "learning_rate": 0.0005927054496683458,
      "loss": 2.8467,
      "step": 16206
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.448491096496582,
      "learning_rate": 0.0005927045530788507,
      "loss": 3.1399,
      "step": 16207
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6063530445098877,
      "learning_rate": 0.0005927036564349364,
      "loss": 2.9409,
      "step": 16208
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6041806936264038,
      "learning_rate": 0.000592702759736603,
      "loss": 3.1277,
      "step": 16209
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5335253477096558,
      "learning_rate": 0.0005927018629838506,
      "loss": 3.1911,
      "step": 16210
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9068928956985474,
      "learning_rate": 0.0005927009661766794,
      "loss": 3.1365,
      "step": 16211
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9418268203735352,
      "learning_rate": 0.0005927000693150896,
      "loss": 3.2744,
      "step": 16212
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7741414308547974,
      "learning_rate": 0.0005926991723990813,
      "loss": 2.8704,
      "step": 16213
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7155729532241821,
      "learning_rate": 0.0005926982754286547,
      "loss": 3.1113,
      "step": 16214
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8338583707809448,
      "learning_rate": 0.0005926973784038099,
      "loss": 3.0807,
      "step": 16215
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7274831533432007,
      "learning_rate": 0.0005926964813245473,
      "loss": 3.3943,
      "step": 16216
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.486877679824829,
      "learning_rate": 0.0005926955841908668,
      "loss": 3.2961,
      "step": 16217
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.071547746658325,
      "learning_rate": 0.0005926946870027686,
      "loss": 2.982,
      "step": 16218
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6046526432037354,
      "learning_rate": 0.000592693789760253,
      "loss": 3.1215,
      "step": 16219
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9345792531967163,
      "learning_rate": 0.0005926928924633201,
      "loss": 3.1682,
      "step": 16220
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7680295705795288,
      "learning_rate": 0.00059269199511197,
      "loss": 2.9243,
      "step": 16221
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7920929193496704,
      "learning_rate": 0.000592691097706203,
      "loss": 3.0401,
      "step": 16222
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4399527311325073,
      "learning_rate": 0.0005926902002460193,
      "loss": 2.9769,
      "step": 16223
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5231374502182007,
      "learning_rate": 0.0005926893027314188,
      "loss": 3.0107,
      "step": 16224
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6235228776931763,
      "learning_rate": 0.0005926884051624019,
      "loss": 2.9969,
      "step": 16225
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5673538446426392,
      "learning_rate": 0.0005926875075389687,
      "loss": 2.9988,
      "step": 16226
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.444207787513733,
      "learning_rate": 0.0005926866098611193,
      "loss": 3.1572,
      "step": 16227
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3677116632461548,
      "learning_rate": 0.0005926857121288541,
      "loss": 3.1215,
      "step": 16228
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.445754051208496,
      "learning_rate": 0.000592684814342173,
      "loss": 3.154,
      "step": 16229
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.1223862171173096,
      "learning_rate": 0.0005926839165010762,
      "loss": 2.9998,
      "step": 16230
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8219612836837769,
      "learning_rate": 0.000592683018605564,
      "loss": 3.1735,
      "step": 16231
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4638460874557495,
      "learning_rate": 0.0005926821206556365,
      "loss": 2.9434,
      "step": 16232
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8999640941619873,
      "learning_rate": 0.0005926812226512938,
      "loss": 3.145,
      "step": 16233
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6056938171386719,
      "learning_rate": 0.0005926803245925363,
      "loss": 3.3011,
      "step": 16234
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3213006258010864,
      "learning_rate": 0.0005926794264793639,
      "loss": 3.3808,
      "step": 16235
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3932757377624512,
      "learning_rate": 0.0005926785283117769,
      "loss": 3.268,
      "step": 16236
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5599266290664673,
      "learning_rate": 0.0005926776300897754,
      "loss": 3.2889,
      "step": 16237
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6251002550125122,
      "learning_rate": 0.0005926767318133597,
      "loss": 3.149,
      "step": 16238
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.822980284690857,
      "learning_rate": 0.0005926758334825297,
      "loss": 3.0957,
      "step": 16239
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.1611509323120117,
      "learning_rate": 0.0005926749350972858,
      "loss": 2.9851,
      "step": 16240
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4581220149993896,
      "learning_rate": 0.0005926740366576282,
      "loss": 3.1211,
      "step": 16241
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.826399326324463,
      "learning_rate": 0.0005926731381635569,
      "loss": 3.0317,
      "step": 16242
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.791285991668701,
      "learning_rate": 0.0005926722396150722,
      "loss": 3.094,
      "step": 16243
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.753035068511963,
      "learning_rate": 0.0005926713410121741,
      "loss": 3.2183,
      "step": 16244
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.411372184753418,
      "learning_rate": 0.0005926704423548629,
      "loss": 3.3715,
      "step": 16245
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4654390811920166,
      "learning_rate": 0.0005926695436431387,
      "loss": 3.0081,
      "step": 16246
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.673674464225769,
      "learning_rate": 0.0005926686448770019,
      "loss": 3.0682,
      "step": 16247
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7608126401901245,
      "learning_rate": 0.0005926677460564524,
      "loss": 3.0938,
      "step": 16248
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4744826555252075,
      "learning_rate": 0.0005926668471814903,
      "loss": 3.3085,
      "step": 16249
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7795970439910889,
      "learning_rate": 0.000592665948252116,
      "loss": 2.9996,
      "step": 16250
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4455630779266357,
      "learning_rate": 0.0005926650492683296,
      "loss": 3.2986,
      "step": 16251
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5755033493041992,
      "learning_rate": 0.0005926641502301312,
      "loss": 3.1708,
      "step": 16252
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3685516119003296,
      "learning_rate": 0.000592663251137521,
      "loss": 2.7841,
      "step": 16253
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.643231749534607,
      "learning_rate": 0.0005926623519904992,
      "loss": 3.1814,
      "step": 16254
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4702826738357544,
      "learning_rate": 0.000592661452789066,
      "loss": 3.0547,
      "step": 16255
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.530010461807251,
      "learning_rate": 0.0005926605535332214,
      "loss": 3.1103,
      "step": 16256
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.748308539390564,
      "learning_rate": 0.0005926596542229658,
      "loss": 3.2695,
      "step": 16257
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7188310623168945,
      "learning_rate": 0.0005926587548582992,
      "loss": 3.2553,
      "step": 16258
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.915377616882324,
      "learning_rate": 0.0005926578554392218,
      "loss": 3.251,
      "step": 16259
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9929981231689453,
      "learning_rate": 0.0005926569559657337,
      "loss": 3.2447,
      "step": 16260
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6482077836990356,
      "learning_rate": 0.0005926560564378353,
      "loss": 3.1642,
      "step": 16261
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6587871313095093,
      "learning_rate": 0.0005926551568555265,
      "loss": 3.311,
      "step": 16262
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5155221223831177,
      "learning_rate": 0.0005926542572188076,
      "loss": 3.0195,
      "step": 16263
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1451208591461182,
      "learning_rate": 0.0005926533575276787,
      "loss": 2.9817,
      "step": 16264
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5763816833496094,
      "learning_rate": 0.0005926524577821401,
      "loss": 3.1154,
      "step": 16265
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8347723484039307,
      "learning_rate": 0.000592651557982192,
      "loss": 3.2445,
      "step": 16266
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4572937488555908,
      "learning_rate": 0.0005926506581278343,
      "loss": 3.1022,
      "step": 16267
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4868346452713013,
      "learning_rate": 0.0005926497582190673,
      "loss": 3.1992,
      "step": 16268
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4910039901733398,
      "learning_rate": 0.0005926488582558911,
      "loss": 3.2057,
      "step": 16269
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6430602073669434,
      "learning_rate": 0.0005926479582383062,
      "loss": 3.1226,
      "step": 16270
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3228507041931152,
      "learning_rate": 0.0005926470581663124,
      "loss": 3.0578,
      "step": 16271
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4569264650344849,
      "learning_rate": 0.00059264615803991,
      "loss": 3.0164,
      "step": 16272
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4534862041473389,
      "learning_rate": 0.0005926452578590991,
      "loss": 3.0866,
      "step": 16273
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6333473920822144,
      "learning_rate": 0.00059264435762388,
      "loss": 3.0189,
      "step": 16274
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.501835584640503,
      "learning_rate": 0.0005926434573342527,
      "loss": 3.1313,
      "step": 16275
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5693261623382568,
      "learning_rate": 0.0005926425569902175,
      "loss": 2.9016,
      "step": 16276
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.006805896759033,
      "learning_rate": 0.0005926416565917746,
      "loss": 3.022,
      "step": 16277
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4611127376556396,
      "learning_rate": 0.000592640756138924,
      "loss": 3.1884,
      "step": 16278
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4707348346710205,
      "learning_rate": 0.000592639855631666,
      "loss": 2.9702,
      "step": 16279
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4014772176742554,
      "learning_rate": 0.0005926389550700008,
      "loss": 3.1071,
      "step": 16280
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.380684494972229,
      "learning_rate": 0.0005926380544539284,
      "loss": 3.0257,
      "step": 16281
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.082763671875,
      "learning_rate": 0.000592637153783449,
      "loss": 3.2486,
      "step": 16282
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2435688972473145,
      "learning_rate": 0.0005926362530585629,
      "loss": 2.9636,
      "step": 16283
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9354209899902344,
      "learning_rate": 0.0005926353522792702,
      "loss": 3.3158,
      "step": 16284
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6655384302139282,
      "learning_rate": 0.000592634451445571,
      "loss": 2.7539,
      "step": 16285
    },
    {
      "epoch": 0.21,
      "grad_norm": 3.019636631011963,
      "learning_rate": 0.0005926335505574657,
      "loss": 3.258,
      "step": 16286
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9944608211517334,
      "learning_rate": 0.0005926326496149541,
      "loss": 3.2148,
      "step": 16287
    },
    {
      "epoch": 0.21,
      "grad_norm": 3.0555975437164307,
      "learning_rate": 0.0005926317486180366,
      "loss": 3.026,
      "step": 16288
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4149168729782104,
      "learning_rate": 0.0005926308475667136,
      "loss": 3.2851,
      "step": 16289
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5307646989822388,
      "learning_rate": 0.0005926299464609847,
      "loss": 3.4365,
      "step": 16290
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5714104175567627,
      "learning_rate": 0.0005926290453008504,
      "loss": 3.0857,
      "step": 16291
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.8155148029327393,
      "learning_rate": 0.000592628144086311,
      "loss": 3.0472,
      "step": 16292
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2808516025543213,
      "learning_rate": 0.0005926272428173663,
      "loss": 3.1624,
      "step": 16293
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.063336133956909,
      "learning_rate": 0.0005926263414940168,
      "loss": 3.1646,
      "step": 16294
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.746495485305786,
      "learning_rate": 0.0005926254401162625,
      "loss": 3.0587,
      "step": 16295
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6078855991363525,
      "learning_rate": 0.0005926245386841037,
      "loss": 3.0751,
      "step": 16296
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7948521375656128,
      "learning_rate": 0.0005926236371975403,
      "loss": 3.0907,
      "step": 16297
    },
    {
      "epoch": 0.21,
      "grad_norm": 3.078739881515503,
      "learning_rate": 0.0005926227356565729,
      "loss": 3.3479,
      "step": 16298
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7379478216171265,
      "learning_rate": 0.0005926218340612012,
      "loss": 3.2857,
      "step": 16299
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2917672395706177,
      "learning_rate": 0.0005926209324114256,
      "loss": 3.073,
      "step": 16300
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3728135824203491,
      "learning_rate": 0.0005926200307072463,
      "loss": 3.0687,
      "step": 16301
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.6112899780273438,
      "learning_rate": 0.0005926191289486633,
      "loss": 3.1974,
      "step": 16302
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6299527883529663,
      "learning_rate": 0.000592618227135677,
      "loss": 3.04,
      "step": 16303
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.551517367362976,
      "learning_rate": 0.0005926173252682875,
      "loss": 3.198,
      "step": 16304
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.1081438064575195,
      "learning_rate": 0.0005926164233464947,
      "loss": 3.0143,
      "step": 16305
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.173405647277832,
      "learning_rate": 0.0005926155213702992,
      "loss": 2.8691,
      "step": 16306
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.570338010787964,
      "learning_rate": 0.0005926146193397008,
      "loss": 2.9892,
      "step": 16307
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5571526288986206,
      "learning_rate": 0.0005926137172546998,
      "loss": 2.9989,
      "step": 16308
    },
    {
      "epoch": 0.21,
      "grad_norm": 3.199253797531128,
      "learning_rate": 0.0005926128151152965,
      "loss": 3.0652,
      "step": 16309
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7530698776245117,
      "learning_rate": 0.000592611912921491,
      "loss": 3.1135,
      "step": 16310
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7946686744689941,
      "learning_rate": 0.0005926110106732833,
      "loss": 3.1526,
      "step": 16311
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8093578815460205,
      "learning_rate": 0.0005926101083706737,
      "loss": 2.914,
      "step": 16312
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.055283308029175,
      "learning_rate": 0.0005926092060136624,
      "loss": 3.4051,
      "step": 16313
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8306816816329956,
      "learning_rate": 0.0005926083036022496,
      "loss": 3.1715,
      "step": 16314
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3944646120071411,
      "learning_rate": 0.0005926074011364353,
      "loss": 3.0989,
      "step": 16315
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.776237964630127,
      "learning_rate": 0.0005926064986162197,
      "loss": 3.156,
      "step": 16316
    },
    {
      "epoch": 0.21,
      "grad_norm": 3.026705503463745,
      "learning_rate": 0.0005926055960416032,
      "loss": 3.0284,
      "step": 16317
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7547338008880615,
      "learning_rate": 0.0005926046934125856,
      "loss": 3.3366,
      "step": 16318
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4324383735656738,
      "learning_rate": 0.0005926037907291673,
      "loss": 3.1169,
      "step": 16319
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.9792144298553467,
      "learning_rate": 0.0005926028879913485,
      "loss": 3.2591,
      "step": 16320
    },
    {
      "epoch": 0.21,
      "grad_norm": 4.154751777648926,
      "learning_rate": 0.0005926019851991293,
      "loss": 2.8138,
      "step": 16321
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.847984790802002,
      "learning_rate": 0.0005926010823525099,
      "loss": 3.0344,
      "step": 16322
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4071919918060303,
      "learning_rate": 0.0005926001794514904,
      "loss": 2.8769,
      "step": 16323
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.7202236652374268,
      "learning_rate": 0.000592599276496071,
      "loss": 3.1152,
      "step": 16324
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.460054636001587,
      "learning_rate": 0.0005925983734862519,
      "loss": 3.1521,
      "step": 16325
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.1163718700408936,
      "learning_rate": 0.0005925974704220332,
      "loss": 3.2045,
      "step": 16326
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4763872623443604,
      "learning_rate": 0.0005925965673034151,
      "loss": 3.0381,
      "step": 16327
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.665103554725647,
      "learning_rate": 0.0005925956641303978,
      "loss": 3.4298,
      "step": 16328
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.5083250999450684,
      "learning_rate": 0.0005925947609029815,
      "loss": 3.3591,
      "step": 16329
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3212435245513916,
      "learning_rate": 0.0005925938576211661,
      "loss": 3.0566,
      "step": 16330
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.69124436378479,
      "learning_rate": 0.0005925929542849521,
      "loss": 3.0978,
      "step": 16331
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.6185014247894287,
      "learning_rate": 0.0005925920508943396,
      "loss": 2.928,
      "step": 16332
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.4010424613952637,
      "learning_rate": 0.0005925911474493287,
      "loss": 2.9521,
      "step": 16333
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.392585277557373,
      "learning_rate": 0.0005925902439499197,
      "loss": 3.2761,
      "step": 16334
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.2139618396759033,
      "learning_rate": 0.0005925893403961124,
      "loss": 3.3043,
      "step": 16335
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0688130855560303,
      "learning_rate": 0.0005925884367879073,
      "loss": 3.0667,
      "step": 16336
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5163908004760742,
      "learning_rate": 0.0005925875331253045,
      "loss": 2.9932,
      "step": 16337
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0686371326446533,
      "learning_rate": 0.0005925866294083042,
      "loss": 2.9973,
      "step": 16338
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8477375507354736,
      "learning_rate": 0.0005925857256369065,
      "loss": 3.3523,
      "step": 16339
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7422436475753784,
      "learning_rate": 0.0005925848218111116,
      "loss": 3.1527,
      "step": 16340
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4788154363632202,
      "learning_rate": 0.0005925839179309197,
      "loss": 3.2841,
      "step": 16341
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3287591934204102,
      "learning_rate": 0.0005925830139963308,
      "loss": 3.2211,
      "step": 16342
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5608465671539307,
      "learning_rate": 0.0005925821100073454,
      "loss": 3.2832,
      "step": 16343
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4967235326766968,
      "learning_rate": 0.0005925812059639632,
      "loss": 3.0187,
      "step": 16344
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4532619714736938,
      "learning_rate": 0.0005925803018661848,
      "loss": 3.0994,
      "step": 16345
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5383260250091553,
      "learning_rate": 0.0005925793977140101,
      "loss": 3.1023,
      "step": 16346
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.70363450050354,
      "learning_rate": 0.0005925784935074395,
      "loss": 3.346,
      "step": 16347
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4423487186431885,
      "learning_rate": 0.000592577589246473,
      "loss": 3.0559,
      "step": 16348
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5751336812973022,
      "learning_rate": 0.0005925766849311107,
      "loss": 2.9221,
      "step": 16349
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5955727100372314,
      "learning_rate": 0.000592575780561353,
      "loss": 2.8527,
      "step": 16350
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3451257944107056,
      "learning_rate": 0.0005925748761371998,
      "loss": 3.2245,
      "step": 16351
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.559261679649353,
      "learning_rate": 0.0005925739716586515,
      "loss": 3.0643,
      "step": 16352
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7488974332809448,
      "learning_rate": 0.0005925730671257082,
      "loss": 3.3505,
      "step": 16353
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5821269750595093,
      "learning_rate": 0.00059257216253837,
      "loss": 3.0217,
      "step": 16354
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7085201740264893,
      "learning_rate": 0.0005925712578966371,
      "loss": 2.9473,
      "step": 16355
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.022535562515259,
      "learning_rate": 0.0005925703532005097,
      "loss": 3.079,
      "step": 16356
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4365230798721313,
      "learning_rate": 0.000592569448449988,
      "loss": 3.3475,
      "step": 16357
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.62325119972229,
      "learning_rate": 0.000592568543645072,
      "loss": 3.1215,
      "step": 16358
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8303754329681396,
      "learning_rate": 0.000592567638785762,
      "loss": 2.8909,
      "step": 16359
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.6311190128326416,
      "learning_rate": 0.0005925667338720583,
      "loss": 2.9425,
      "step": 16360
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.471882700920105,
      "learning_rate": 0.0005925658289039609,
      "loss": 2.8763,
      "step": 16361
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4359874725341797,
      "learning_rate": 0.0005925649238814698,
      "loss": 2.9726,
      "step": 16362
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.372427225112915,
      "learning_rate": 0.0005925640188045855,
      "loss": 3.3528,
      "step": 16363
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3350744247436523,
      "learning_rate": 0.0005925631136733079,
      "loss": 3.2274,
      "step": 16364
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8412325382232666,
      "learning_rate": 0.0005925622084876375,
      "loss": 3.0963,
      "step": 16365
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4398751258850098,
      "learning_rate": 0.0005925613032475741,
      "loss": 2.7414,
      "step": 16366
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0513930320739746,
      "learning_rate": 0.0005925603979531181,
      "loss": 2.8012,
      "step": 16367
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8892196416854858,
      "learning_rate": 0.0005925594926042696,
      "loss": 3.3299,
      "step": 16368
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6573517322540283,
      "learning_rate": 0.0005925585872010288,
      "loss": 3.2048,
      "step": 16369
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5032074451446533,
      "learning_rate": 0.0005925576817433957,
      "loss": 3.0557,
      "step": 16370
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.3442914485931396,
      "learning_rate": 0.0005925567762313707,
      "loss": 3.0477,
      "step": 16371
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8638920783996582,
      "learning_rate": 0.0005925558706649539,
      "loss": 3.2662,
      "step": 16372
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.247616767883301,
      "learning_rate": 0.0005925549650441454,
      "loss": 2.9735,
      "step": 16373
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.980272054672241,
      "learning_rate": 0.0005925540593689454,
      "loss": 3.0509,
      "step": 16374
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.770754337310791,
      "learning_rate": 0.0005925531536393541,
      "loss": 3.4088,
      "step": 16375
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.82277250289917,
      "learning_rate": 0.0005925522478553716,
      "loss": 3.1985,
      "step": 16376
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.882694959640503,
      "learning_rate": 0.0005925513420169982,
      "loss": 3.0637,
      "step": 16377
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4593878984451294,
      "learning_rate": 0.000592550436124234,
      "loss": 3.2438,
      "step": 16378
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.1141068935394287,
      "learning_rate": 0.000592549530177079,
      "loss": 2.9034,
      "step": 16379
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6645545959472656,
      "learning_rate": 0.0005925486241755337,
      "loss": 3.4014,
      "step": 16380
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.495678186416626,
      "learning_rate": 0.000592547718119598,
      "loss": 3.1526,
      "step": 16381
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5981557369232178,
      "learning_rate": 0.0005925468120092721,
      "loss": 3.3041,
      "step": 16382
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7046403884887695,
      "learning_rate": 0.0005925459058445563,
      "loss": 2.9631,
      "step": 16383
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.730177640914917,
      "learning_rate": 0.0005925449996254506,
      "loss": 2.9779,
      "step": 16384
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7606122493743896,
      "learning_rate": 0.0005925440933519555,
      "loss": 3.1342,
      "step": 16385
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4176454544067383,
      "learning_rate": 0.0005925431870240707,
      "loss": 3.1421,
      "step": 16386
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.167670726776123,
      "learning_rate": 0.0005925422806417966,
      "loss": 3.161,
      "step": 16387
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7891108989715576,
      "learning_rate": 0.0005925413742051335,
      "loss": 3.1166,
      "step": 16388
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.784700632095337,
      "learning_rate": 0.0005925404677140813,
      "loss": 3.0262,
      "step": 16389
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7737597227096558,
      "learning_rate": 0.0005925395611686404,
      "loss": 3.3297,
      "step": 16390
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4953835010528564,
      "learning_rate": 0.0005925386545688109,
      "loss": 3.2134,
      "step": 16391
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.555568814277649,
      "learning_rate": 0.0005925377479145929,
      "loss": 3.2253,
      "step": 16392
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9794024229049683,
      "learning_rate": 0.0005925368412059867,
      "loss": 2.9385,
      "step": 16393
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.836466908454895,
      "learning_rate": 0.0005925359344429922,
      "loss": 2.6649,
      "step": 16394
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.584611415863037,
      "learning_rate": 0.0005925350276256099,
      "loss": 3.3487,
      "step": 16395
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.3765788078308105,
      "learning_rate": 0.0005925341207538397,
      "loss": 3.2571,
      "step": 16396
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9378917217254639,
      "learning_rate": 0.0005925332138276819,
      "loss": 3.1358,
      "step": 16397
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6292599439620972,
      "learning_rate": 0.0005925323068471367,
      "loss": 3.1448,
      "step": 16398
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.1467056274414062,
      "learning_rate": 0.0005925313998122042,
      "loss": 2.8497,
      "step": 16399
    },
    {
      "epoch": 0.21,
      "grad_norm": 3.061323881149292,
      "learning_rate": 0.0005925304927228846,
      "loss": 3.0012,
      "step": 16400
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.5686140060424805,
      "learning_rate": 0.0005925295855791781,
      "loss": 3.3131,
      "step": 16401
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7113558053970337,
      "learning_rate": 0.0005925286783810847,
      "loss": 3.2531,
      "step": 16402
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0880236625671387,
      "learning_rate": 0.0005925277711286049,
      "loss": 3.3359,
      "step": 16403
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4648091793060303,
      "learning_rate": 0.0005925268638217384,
      "loss": 3.1048,
      "step": 16404
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5538395643234253,
      "learning_rate": 0.0005925259564604859,
      "loss": 3.0405,
      "step": 16405
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8132442235946655,
      "learning_rate": 0.0005925250490448472,
      "loss": 3.147,
      "step": 16406
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.48349928855896,
      "learning_rate": 0.0005925241415748225,
      "loss": 3.327,
      "step": 16407
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.1550185680389404,
      "learning_rate": 0.0005925232340504122,
      "loss": 3.3646,
      "step": 16408
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.504124402999878,
      "learning_rate": 0.0005925223264716161,
      "loss": 3.1306,
      "step": 16409
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5525169372558594,
      "learning_rate": 0.0005925214188384347,
      "loss": 2.9387,
      "step": 16410
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4241453409194946,
      "learning_rate": 0.000592520511150868,
      "loss": 3.2491,
      "step": 16411
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4051461219787598,
      "learning_rate": 0.0005925196034089163,
      "loss": 2.8155,
      "step": 16412
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.550018548965454,
      "learning_rate": 0.0005925186956125796,
      "loss": 3.1749,
      "step": 16413
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5700172185897827,
      "learning_rate": 0.0005925177877618582,
      "loss": 2.9458,
      "step": 16414
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5860389471054077,
      "learning_rate": 0.0005925168798567522,
      "loss": 3.2533,
      "step": 16415
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.2349815368652344,
      "learning_rate": 0.0005925159718972618,
      "loss": 3.2605,
      "step": 16416
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.056962013244629,
      "learning_rate": 0.0005925150638833871,
      "loss": 3.034,
      "step": 16417
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.860937237739563,
      "learning_rate": 0.0005925141558151284,
      "loss": 3.1283,
      "step": 16418
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.2171895503997803,
      "learning_rate": 0.0005925132476924858,
      "loss": 3.2145,
      "step": 16419
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.382706642150879,
      "learning_rate": 0.0005925123395154594,
      "loss": 3.1875,
      "step": 16420
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7246425151824951,
      "learning_rate": 0.0005925114312840495,
      "loss": 3.3029,
      "step": 16421
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3252038955688477,
      "learning_rate": 0.0005925105229982562,
      "loss": 2.7449,
      "step": 16422
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7864711284637451,
      "learning_rate": 0.0005925096146580797,
      "loss": 3.0968,
      "step": 16423
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8764110803604126,
      "learning_rate": 0.00059250870626352,
      "loss": 3.1494,
      "step": 16424
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7722737789154053,
      "learning_rate": 0.0005925077978145776,
      "loss": 3.4846,
      "step": 16425
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5267105102539062,
      "learning_rate": 0.0005925068893112524,
      "loss": 3.1289,
      "step": 16426
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.385672688484192,
      "learning_rate": 0.0005925059807535446,
      "loss": 3.0938,
      "step": 16427
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4167110919952393,
      "learning_rate": 0.0005925050721414544,
      "loss": 2.9397,
      "step": 16428
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6700382232666016,
      "learning_rate": 0.0005925041634749821,
      "loss": 2.937,
      "step": 16429
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.1875922679901123,
      "learning_rate": 0.0005925032547541277,
      "loss": 3.0191,
      "step": 16430
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.6215689182281494,
      "learning_rate": 0.0005925023459788914,
      "loss": 2.9273,
      "step": 16431
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9791165590286255,
      "learning_rate": 0.0005925014371492733,
      "loss": 3.1169,
      "step": 16432
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.594193696975708,
      "learning_rate": 0.0005925005282652738,
      "loss": 2.9462,
      "step": 16433
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.904282331466675,
      "learning_rate": 0.000592499619326893,
      "loss": 2.9458,
      "step": 16434
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8823442459106445,
      "learning_rate": 0.0005924987103341308,
      "loss": 2.9602,
      "step": 16435
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7535415887832642,
      "learning_rate": 0.0005924978012869877,
      "loss": 2.9326,
      "step": 16436
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.1364328861236572,
      "learning_rate": 0.0005924968921854637,
      "loss": 3.1046,
      "step": 16437
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.513929009437561,
      "learning_rate": 0.0005924959830295591,
      "loss": 3.1141,
      "step": 16438
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6904736757278442,
      "learning_rate": 0.0005924950738192738,
      "loss": 3.2382,
      "step": 16439
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7260109186172485,
      "learning_rate": 0.0005924941645546083,
      "loss": 3.2797,
      "step": 16440
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5697414875030518,
      "learning_rate": 0.0005924932552355624,
      "loss": 3.329,
      "step": 16441
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7155888080596924,
      "learning_rate": 0.0005924923458621367,
      "loss": 3.1598,
      "step": 16442
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8857457637786865,
      "learning_rate": 0.0005924914364343311,
      "loss": 3.176,
      "step": 16443
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6047245264053345,
      "learning_rate": 0.0005924905269521458,
      "loss": 3.1822,
      "step": 16444
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0891950130462646,
      "learning_rate": 0.000592489617415581,
      "loss": 2.8607,
      "step": 16445
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0001931190490723,
      "learning_rate": 0.0005924887078246367,
      "loss": 3.1631,
      "step": 16446
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6822723150253296,
      "learning_rate": 0.0005924877981793135,
      "loss": 3.0934,
      "step": 16447
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5145044326782227,
      "learning_rate": 0.0005924868884796112,
      "loss": 3.166,
      "step": 16448
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3803749084472656,
      "learning_rate": 0.00059248597872553,
      "loss": 3.2538,
      "step": 16449
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.630497932434082,
      "learning_rate": 0.0005924850689170702,
      "loss": 3.333,
      "step": 16450
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4911932945251465,
      "learning_rate": 0.0005924841590542318,
      "loss": 3.1723,
      "step": 16451
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.265282392501831,
      "learning_rate": 0.0005924832491370151,
      "loss": 3.1091,
      "step": 16452
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6919792890548706,
      "learning_rate": 0.0005924823391654204,
      "loss": 3.0415,
      "step": 16453
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5756263732910156,
      "learning_rate": 0.0005924814291394476,
      "loss": 3.0393,
      "step": 16454
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.754725694656372,
      "learning_rate": 0.000592480519059097,
      "loss": 3.1745,
      "step": 16455
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.439971685409546,
      "learning_rate": 0.0005924796089243687,
      "loss": 3.229,
      "step": 16456
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.239603042602539,
      "learning_rate": 0.0005924786987352629,
      "loss": 3.02,
      "step": 16457
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6971162557601929,
      "learning_rate": 0.00059247778849178,
      "loss": 2.9281,
      "step": 16458
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7397549152374268,
      "learning_rate": 0.0005924768781939197,
      "loss": 3.0885,
      "step": 16459
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6974585056304932,
      "learning_rate": 0.0005924759678416826,
      "loss": 3.1664,
      "step": 16460
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5891821384429932,
      "learning_rate": 0.0005924750574350686,
      "loss": 3.2041,
      "step": 16461
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5092835426330566,
      "learning_rate": 0.0005924741469740779,
      "loss": 2.9104,
      "step": 16462
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5173405408859253,
      "learning_rate": 0.000592473236458711,
      "loss": 3.0941,
      "step": 16463
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4670507907867432,
      "learning_rate": 0.0005924723258889676,
      "loss": 3.0767,
      "step": 16464
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.440523624420166,
      "learning_rate": 0.000592471415264848,
      "loss": 2.9969,
      "step": 16465
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5881483554840088,
      "learning_rate": 0.0005924705045863526,
      "loss": 3.2921,
      "step": 16466
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2867481708526611,
      "learning_rate": 0.0005924695938534813,
      "loss": 2.8882,
      "step": 16467
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3730374574661255,
      "learning_rate": 0.0005924686830662345,
      "loss": 3.0033,
      "step": 16468
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.3176918029785156,
      "learning_rate": 0.0005924677722246122,
      "loss": 3.3622,
      "step": 16469
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.544795274734497,
      "learning_rate": 0.0005924668613286147,
      "loss": 3.031,
      "step": 16470
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7556160688400269,
      "learning_rate": 0.000592465950378242,
      "loss": 2.9829,
      "step": 16471
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7803226709365845,
      "learning_rate": 0.0005924650393734943,
      "loss": 3.1469,
      "step": 16472
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.4893293380737305,
      "learning_rate": 0.0005924641283143719,
      "loss": 2.9765,
      "step": 16473
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.746692180633545,
      "learning_rate": 0.0005924632172008749,
      "loss": 3.1668,
      "step": 16474
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.923268437385559,
      "learning_rate": 0.0005924623060330035,
      "loss": 3.1695,
      "step": 16475
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8593753576278687,
      "learning_rate": 0.0005924613948107578,
      "loss": 3.2326,
      "step": 16476
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6280790567398071,
      "learning_rate": 0.000592460483534138,
      "loss": 3.0716,
      "step": 16477
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6697227954864502,
      "learning_rate": 0.0005924595722031443,
      "loss": 3.0374,
      "step": 16478
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.412667989730835,
      "learning_rate": 0.0005924586608177767,
      "loss": 3.1497,
      "step": 16479
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.2974352836608887,
      "learning_rate": 0.0005924577493780358,
      "loss": 3.1828,
      "step": 16480
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5325515270233154,
      "learning_rate": 0.0005924568378839213,
      "loss": 2.8689,
      "step": 16481
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5183621644973755,
      "learning_rate": 0.0005924559263354335,
      "loss": 3.1457,
      "step": 16482
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5323344469070435,
      "learning_rate": 0.0005924550147325727,
      "loss": 3.1055,
      "step": 16483
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9571723937988281,
      "learning_rate": 0.000592454103075339,
      "loss": 2.9724,
      "step": 16484
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.037458658218384,
      "learning_rate": 0.0005924531913637325,
      "loss": 3.0527,
      "step": 16485
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4482463598251343,
      "learning_rate": 0.0005924522795977535,
      "loss": 3.0743,
      "step": 16486
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0246994495391846,
      "learning_rate": 0.0005924513677774021,
      "loss": 3.0662,
      "step": 16487
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.319568395614624,
      "learning_rate": 0.0005924504559026785,
      "loss": 3.0428,
      "step": 16488
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.418766975402832,
      "learning_rate": 0.0005924495439735828,
      "loss": 2.9969,
      "step": 16489
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8341065645217896,
      "learning_rate": 0.0005924486319901152,
      "loss": 3.2044,
      "step": 16490
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.7503790855407715,
      "learning_rate": 0.0005924477199522758,
      "loss": 3.0806,
      "step": 16491
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8265666961669922,
      "learning_rate": 0.000592446807860065,
      "loss": 3.1235,
      "step": 16492
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.466583251953125,
      "learning_rate": 0.0005924458957134827,
      "loss": 3.2011,
      "step": 16493
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9245046377182007,
      "learning_rate": 0.0005924449835125293,
      "loss": 3.1626,
      "step": 16494
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.785158395767212,
      "learning_rate": 0.0005924440712572047,
      "loss": 2.9695,
      "step": 16495
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.73210608959198,
      "learning_rate": 0.0005924431589475093,
      "loss": 3.2118,
      "step": 16496
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.845984697341919,
      "learning_rate": 0.0005924422465834432,
      "loss": 3.1379,
      "step": 16497
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.9999314546585083,
      "learning_rate": 0.0005924413341650066,
      "loss": 3.0423,
      "step": 16498
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.6302168369293213,
      "learning_rate": 0.0005924404216921997,
      "loss": 3.3028,
      "step": 16499
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.5289745330810547,
      "learning_rate": 0.0005924395091650225,
      "loss": 3.118,
      "step": 16500
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.4455008506774902,
      "learning_rate": 0.0005924385965834754,
      "loss": 3.0036,
      "step": 16501
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.8775763511657715,
      "learning_rate": 0.0005924376839475583,
      "loss": 3.0995,
      "step": 16502
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.168433666229248,
      "learning_rate": 0.0005924367712572715,
      "loss": 3.1225,
      "step": 16503
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.988391637802124,
      "learning_rate": 0.0005924358585126153,
      "loss": 3.237,
      "step": 16504
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0854687690734863,
      "learning_rate": 0.0005924349457135897,
      "loss": 2.6988,
      "step": 16505
    },
    {
      "epoch": 0.21,
      "grad_norm": 1.4811798334121704,
      "learning_rate": 0.0005924340328601949,
      "loss": 3.2107,
      "step": 16506
    },
    {
      "epoch": 0.21,
      "grad_norm": 3.3940253257751465,
      "learning_rate": 0.0005924331199524311,
      "loss": 3.0196,
      "step": 16507
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.8066020011901855,
      "learning_rate": 0.0005924322069902984,
      "loss": 3.1848,
      "step": 16508
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.0063743591308594,
      "learning_rate": 0.0005924312939737972,
      "loss": 2.6963,
      "step": 16509
    },
    {
      "epoch": 0.21,
      "grad_norm": 3.0459985733032227,
      "learning_rate": 0.0005924303809029274,
      "loss": 3.0755,
      "step": 16510
    },
    {
      "epoch": 0.21,
      "grad_norm": 5.063216686248779,
      "learning_rate": 0.0005924294677776893,
      "loss": 3.2399,
      "step": 16511
    },
    {
      "epoch": 0.21,
      "grad_norm": 2.448338747024536,
      "learning_rate": 0.0005924285545980829,
      "loss": 3.4937,
      "step": 16512
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8887382745742798,
      "learning_rate": 0.0005924276413641087,
      "loss": 3.3337,
      "step": 16513
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.690057396888733,
      "learning_rate": 0.0005924267280757666,
      "loss": 3.19,
      "step": 16514
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.929090976715088,
      "learning_rate": 0.0005924258147330569,
      "loss": 3.0854,
      "step": 16515
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.888128399848938,
      "learning_rate": 0.0005924249013359797,
      "loss": 2.9867,
      "step": 16516
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9443318843841553,
      "learning_rate": 0.0005924239878845352,
      "loss": 2.9648,
      "step": 16517
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5942621231079102,
      "learning_rate": 0.0005924230743787235,
      "loss": 3.1693,
      "step": 16518
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.5089738368988037,
      "learning_rate": 0.0005924221608185449,
      "loss": 3.106,
      "step": 16519
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.234288215637207,
      "learning_rate": 0.0005924212472039994,
      "loss": 3.1872,
      "step": 16520
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4205750226974487,
      "learning_rate": 0.0005924203335350874,
      "loss": 3.2158,
      "step": 16521
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4696152210235596,
      "learning_rate": 0.0005924194198118089,
      "loss": 3.1092,
      "step": 16522
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.2140870094299316,
      "learning_rate": 0.000592418506034164,
      "loss": 3.2532,
      "step": 16523
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.429917097091675,
      "learning_rate": 0.0005924175922021531,
      "loss": 3.1227,
      "step": 16524
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3269283771514893,
      "learning_rate": 0.0005924166783157763,
      "loss": 3.2845,
      "step": 16525
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.975267767906189,
      "learning_rate": 0.0005924157643750336,
      "loss": 3.0249,
      "step": 16526
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9963741302490234,
      "learning_rate": 0.0005924148503799253,
      "loss": 3.1422,
      "step": 16527
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5655115842819214,
      "learning_rate": 0.0005924139363304516,
      "loss": 3.0086,
      "step": 16528
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5537687540054321,
      "learning_rate": 0.0005924130222266127,
      "loss": 3.1639,
      "step": 16529
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.2138912677764893,
      "learning_rate": 0.0005924121080684086,
      "loss": 3.0399,
      "step": 16530
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8990113735198975,
      "learning_rate": 0.0005924111938558395,
      "loss": 3.4103,
      "step": 16531
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0004045963287354,
      "learning_rate": 0.0005924102795889058,
      "loss": 3.2693,
      "step": 16532
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4143339395523071,
      "learning_rate": 0.0005924093652676074,
      "loss": 3.2263,
      "step": 16533
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5165064334869385,
      "learning_rate": 0.0005924084508919447,
      "loss": 3.0963,
      "step": 16534
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3918230533599854,
      "learning_rate": 0.0005924075364619176,
      "loss": 3.2652,
      "step": 16535
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5301991701126099,
      "learning_rate": 0.0005924066219775265,
      "loss": 3.0723,
      "step": 16536
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4431188106536865,
      "learning_rate": 0.0005924057074387715,
      "loss": 2.9992,
      "step": 16537
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.759931206703186,
      "learning_rate": 0.0005924047928456528,
      "loss": 3.1087,
      "step": 16538
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7023111581802368,
      "learning_rate": 0.0005924038781981704,
      "loss": 3.3405,
      "step": 16539
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.1208600997924805,
      "learning_rate": 0.0005924029634963246,
      "loss": 3.0789,
      "step": 16540
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2255984544754028,
      "learning_rate": 0.0005924020487401157,
      "loss": 3.1179,
      "step": 16541
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7811201810836792,
      "learning_rate": 0.0005924011339295437,
      "loss": 3.2172,
      "step": 16542
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.3702409267425537,
      "learning_rate": 0.0005924002190646087,
      "loss": 3.17,
      "step": 16543
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5789165496826172,
      "learning_rate": 0.000592399304145311,
      "loss": 3.1182,
      "step": 16544
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8312151432037354,
      "learning_rate": 0.0005923983891716509,
      "loss": 2.9559,
      "step": 16545
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9042314291000366,
      "learning_rate": 0.0005923974741436284,
      "loss": 3.0202,
      "step": 16546
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.533659815788269,
      "learning_rate": 0.0005923965590612435,
      "loss": 3.1488,
      "step": 16547
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5730966329574585,
      "learning_rate": 0.0005923956439244967,
      "loss": 3.0322,
      "step": 16548
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5714412927627563,
      "learning_rate": 0.0005923947287333881,
      "loss": 3.1135,
      "step": 16549
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6783795356750488,
      "learning_rate": 0.0005923938134879176,
      "loss": 3.0964,
      "step": 16550
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7214570045471191,
      "learning_rate": 0.0005923928981880857,
      "loss": 3.1226,
      "step": 16551
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.089703321456909,
      "learning_rate": 0.0005923919828338925,
      "loss": 2.9724,
      "step": 16552
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4356292486190796,
      "learning_rate": 0.000592391067425338,
      "loss": 3.2895,
      "step": 16553
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6064444780349731,
      "learning_rate": 0.0005923901519624226,
      "loss": 3.12,
      "step": 16554
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6793849468231201,
      "learning_rate": 0.0005923892364451461,
      "loss": 3.0348,
      "step": 16555
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6778947114944458,
      "learning_rate": 0.0005923883208735092,
      "loss": 3.2034,
      "step": 16556
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2975221872329712,
      "learning_rate": 0.0005923874052475118,
      "loss": 3.2425,
      "step": 16557
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.360518455505371,
      "learning_rate": 0.0005923864895671538,
      "loss": 3.0508,
      "step": 16558
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.707795262336731,
      "learning_rate": 0.0005923855738324357,
      "loss": 2.9897,
      "step": 16559
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.180316925048828,
      "learning_rate": 0.0005923846580433578,
      "loss": 2.7892,
      "step": 16560
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4330037832260132,
      "learning_rate": 0.0005923837421999199,
      "loss": 2.9972,
      "step": 16561
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9895695447921753,
      "learning_rate": 0.0005923828263021224,
      "loss": 2.9507,
      "step": 16562
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6242437362670898,
      "learning_rate": 0.0005923819103499654,
      "loss": 3.0565,
      "step": 16563
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5845123529434204,
      "learning_rate": 0.0005923809943434491,
      "loss": 3.4386,
      "step": 16564
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.667851448059082,
      "learning_rate": 0.0005923800782825737,
      "loss": 3.3447,
      "step": 16565
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.656144380569458,
      "learning_rate": 0.0005923791621673393,
      "loss": 3.166,
      "step": 16566
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.30682635307312,
      "learning_rate": 0.000592378245997746,
      "loss": 3.081,
      "step": 16567
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5855578184127808,
      "learning_rate": 0.0005923773297737942,
      "loss": 2.8475,
      "step": 16568
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.996884822845459,
      "learning_rate": 0.0005923764134954838,
      "loss": 3.0852,
      "step": 16569
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.152667999267578,
      "learning_rate": 0.0005923754971628152,
      "loss": 3.2012,
      "step": 16570
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3965132236480713,
      "learning_rate": 0.0005923745807757885,
      "loss": 3.1558,
      "step": 16571
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.267578363418579,
      "learning_rate": 0.0005923736643344037,
      "loss": 3.0334,
      "step": 16572
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0080623626708984,
      "learning_rate": 0.0005923727478386612,
      "loss": 2.9295,
      "step": 16573
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7473739385604858,
      "learning_rate": 0.0005923718312885612,
      "loss": 3.2042,
      "step": 16574
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2459087371826172,
      "learning_rate": 0.0005923709146841036,
      "loss": 3.3441,
      "step": 16575
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.771788239479065,
      "learning_rate": 0.0005923699980252887,
      "loss": 3.1566,
      "step": 16576
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.178295850753784,
      "learning_rate": 0.0005923690813121168,
      "loss": 2.9257,
      "step": 16577
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5248819589614868,
      "learning_rate": 0.000592368164544588,
      "loss": 3.1178,
      "step": 16578
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2973064184188843,
      "learning_rate": 0.0005923672477227024,
      "loss": 3.2066,
      "step": 16579
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.597891092300415,
      "learning_rate": 0.0005923663308464602,
      "loss": 3.1299,
      "step": 16580
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.9078946113586426,
      "learning_rate": 0.0005923654139158616,
      "loss": 2.9985,
      "step": 16581
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7182300090789795,
      "learning_rate": 0.0005923644969309068,
      "loss": 2.9406,
      "step": 16582
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9984771013259888,
      "learning_rate": 0.0005923635798915958,
      "loss": 3.1228,
      "step": 16583
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.496877431869507,
      "learning_rate": 0.000592362662797929,
      "loss": 3.2227,
      "step": 16584
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.3390820026397705,
      "learning_rate": 0.0005923617456499063,
      "loss": 3.1243,
      "step": 16585
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.481942892074585,
      "learning_rate": 0.0005923608284475281,
      "loss": 3.0334,
      "step": 16586
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8822200298309326,
      "learning_rate": 0.0005923599111907946,
      "loss": 3.1694,
      "step": 16587
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7132381200790405,
      "learning_rate": 0.0005923589938797058,
      "loss": 3.0527,
      "step": 16588
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3630508184432983,
      "learning_rate": 0.000592358076514262,
      "loss": 2.8946,
      "step": 16589
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.144070625305176,
      "learning_rate": 0.0005923571590944634,
      "loss": 3.1008,
      "step": 16590
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.332969903945923,
      "learning_rate": 0.0005923562416203099,
      "loss": 3.2966,
      "step": 16591
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3871347904205322,
      "learning_rate": 0.0005923553240918019,
      "loss": 2.9818,
      "step": 16592
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.170931816101074,
      "learning_rate": 0.0005923544065089395,
      "loss": 3.0038,
      "step": 16593
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9515910148620605,
      "learning_rate": 0.0005923534888717229,
      "loss": 2.9829,
      "step": 16594
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4445806741714478,
      "learning_rate": 0.0005923525711801523,
      "loss": 3.3626,
      "step": 16595
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.2740602493286133,
      "learning_rate": 0.0005923516534342278,
      "loss": 3.1834,
      "step": 16596
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3633980751037598,
      "learning_rate": 0.0005923507356339498,
      "loss": 3.3136,
      "step": 16597
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3204485177993774,
      "learning_rate": 0.000592349817779318,
      "loss": 3.1888,
      "step": 16598
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3509732484817505,
      "learning_rate": 0.0005923488998703331,
      "loss": 3.1299,
      "step": 16599
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8103207349777222,
      "learning_rate": 0.0005923479819069949,
      "loss": 3.0053,
      "step": 16600
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6267848014831543,
      "learning_rate": 0.0005923470638893037,
      "loss": 3.0536,
      "step": 16601
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8577415943145752,
      "learning_rate": 0.0005923461458172596,
      "loss": 2.8517,
      "step": 16602
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5286657810211182,
      "learning_rate": 0.0005923452276908629,
      "loss": 3.071,
      "step": 16603
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.297800064086914,
      "learning_rate": 0.0005923443095101137,
      "loss": 2.961,
      "step": 16604
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8441822528839111,
      "learning_rate": 0.0005923433912750122,
      "loss": 3.14,
      "step": 16605
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5974010229110718,
      "learning_rate": 0.0005923424729855585,
      "loss": 2.9944,
      "step": 16606
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8001281023025513,
      "learning_rate": 0.0005923415546417529,
      "loss": 2.9761,
      "step": 16607
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6694014072418213,
      "learning_rate": 0.0005923406362435954,
      "loss": 3.143,
      "step": 16608
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.446789026260376,
      "learning_rate": 0.0005923397177910863,
      "loss": 2.8475,
      "step": 16609
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.641850471496582,
      "learning_rate": 0.0005923387992842257,
      "loss": 3.0728,
      "step": 16610
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5485022068023682,
      "learning_rate": 0.0005923378807230138,
      "loss": 3.1686,
      "step": 16611
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6091008186340332,
      "learning_rate": 0.0005923369621074507,
      "loss": 3.2265,
      "step": 16612
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8188698291778564,
      "learning_rate": 0.0005923360434375367,
      "loss": 3.1122,
      "step": 16613
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0697126388549805,
      "learning_rate": 0.000592335124713272,
      "loss": 3.1824,
      "step": 16614
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8344191312789917,
      "learning_rate": 0.0005923342059346566,
      "loss": 3.139,
      "step": 16615
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9222513437271118,
      "learning_rate": 0.0005923332871016907,
      "loss": 3.2605,
      "step": 16616
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8994998931884766,
      "learning_rate": 0.0005923323682143747,
      "loss": 3.1356,
      "step": 16617
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4302524328231812,
      "learning_rate": 0.0005923314492727084,
      "loss": 3.0918,
      "step": 16618
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5553460121154785,
      "learning_rate": 0.0005923305302766922,
      "loss": 2.9899,
      "step": 16619
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3384449481964111,
      "learning_rate": 0.0005923296112263263,
      "loss": 3.2304,
      "step": 16620
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8213746547698975,
      "learning_rate": 0.0005923286921216108,
      "loss": 3.1527,
      "step": 16621
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.3355066776275635,
      "learning_rate": 0.0005923277729625459,
      "loss": 3.1169,
      "step": 16622
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6998207569122314,
      "learning_rate": 0.0005923268537491317,
      "loss": 3.1907,
      "step": 16623
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8503093719482422,
      "learning_rate": 0.0005923259344813685,
      "loss": 3.0718,
      "step": 16624
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.084721565246582,
      "learning_rate": 0.0005923250151592563,
      "loss": 3.2249,
      "step": 16625
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.843268871307373,
      "learning_rate": 0.0005923240957827953,
      "loss": 3.4805,
      "step": 16626
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.109628200531006,
      "learning_rate": 0.0005923231763519859,
      "loss": 3.0785,
      "step": 16627
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.2601821422576904,
      "learning_rate": 0.000592322256866828,
      "loss": 3.0735,
      "step": 16628
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6017837524414062,
      "learning_rate": 0.0005923213373273219,
      "loss": 3.0041,
      "step": 16629
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6040486097335815,
      "learning_rate": 0.0005923204177334677,
      "loss": 2.8845,
      "step": 16630
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.1744322776794434,
      "learning_rate": 0.0005923194980852656,
      "loss": 3.2416,
      "step": 16631
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.6934714317321777,
      "learning_rate": 0.000592318578382716,
      "loss": 2.9027,
      "step": 16632
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.699662446975708,
      "learning_rate": 0.0005923176586258187,
      "loss": 3.1101,
      "step": 16633
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0602867603302,
      "learning_rate": 0.000592316738814574,
      "loss": 3.0128,
      "step": 16634
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.973116397857666,
      "learning_rate": 0.0005923158189489821,
      "loss": 3.1332,
      "step": 16635
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4572826623916626,
      "learning_rate": 0.0005923148990290432,
      "loss": 3.1488,
      "step": 16636
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5095527172088623,
      "learning_rate": 0.0005923139790547575,
      "loss": 3.1097,
      "step": 16637
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5009335279464722,
      "learning_rate": 0.000592313059026125,
      "loss": 3.2199,
      "step": 16638
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6166152954101562,
      "learning_rate": 0.0005923121389431462,
      "loss": 3.1426,
      "step": 16639
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3224514722824097,
      "learning_rate": 0.0005923112188058208,
      "loss": 3.358,
      "step": 16640
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6092735528945923,
      "learning_rate": 0.0005923102986141493,
      "loss": 2.9256,
      "step": 16641
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4066739082336426,
      "learning_rate": 0.0005923093783681318,
      "loss": 3.2661,
      "step": 16642
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5560264587402344,
      "learning_rate": 0.0005923084580677686,
      "loss": 3.1736,
      "step": 16643
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.502659559249878,
      "learning_rate": 0.0005923075377130596,
      "loss": 3.13,
      "step": 16644
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2737730741500854,
      "learning_rate": 0.0005923066173040051,
      "loss": 2.8133,
      "step": 16645
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.443045973777771,
      "learning_rate": 0.0005923056968406054,
      "loss": 3.1931,
      "step": 16646
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7726954221725464,
      "learning_rate": 0.0005923047763228604,
      "loss": 3.222,
      "step": 16647
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.553381085395813,
      "learning_rate": 0.0005923038557507706,
      "loss": 3.075,
      "step": 16648
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6659725904464722,
      "learning_rate": 0.0005923029351243359,
      "loss": 3.122,
      "step": 16649
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0181097984313965,
      "learning_rate": 0.0005923020144435565,
      "loss": 3.1271,
      "step": 16650
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.63253653049469,
      "learning_rate": 0.0005923010937084328,
      "loss": 3.1409,
      "step": 16651
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8627839088439941,
      "learning_rate": 0.0005923001729189647,
      "loss": 3.188,
      "step": 16652
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9555960893630981,
      "learning_rate": 0.0005922992520751524,
      "loss": 3.3934,
      "step": 16653
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.980434775352478,
      "learning_rate": 0.0005922983311769963,
      "loss": 3.1544,
      "step": 16654
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.5907700061798096,
      "learning_rate": 0.0005922974102244964,
      "loss": 2.9881,
      "step": 16655
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5932694673538208,
      "learning_rate": 0.0005922964892176528,
      "loss": 2.878,
      "step": 16656
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.386087417602539,
      "learning_rate": 0.0005922955681564658,
      "loss": 2.9944,
      "step": 16657
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.469958543777466,
      "learning_rate": 0.0005922946470409356,
      "loss": 2.8799,
      "step": 16658
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.5960874557495117,
      "learning_rate": 0.0005922937258710623,
      "loss": 3.1271,
      "step": 16659
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7278169393539429,
      "learning_rate": 0.000592292804646846,
      "loss": 3.0735,
      "step": 16660
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.372699022293091,
      "learning_rate": 0.0005922918833682869,
      "loss": 2.9618,
      "step": 16661
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.213972806930542,
      "learning_rate": 0.0005922909620353855,
      "loss": 3.0911,
      "step": 16662
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8354060649871826,
      "learning_rate": 0.0005922900406481414,
      "loss": 2.9444,
      "step": 16663
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7859910726547241,
      "learning_rate": 0.0005922891192065552,
      "loss": 3.0065,
      "step": 16664
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5464518070220947,
      "learning_rate": 0.000592288197710627,
      "loss": 3.0317,
      "step": 16665
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0964038372039795,
      "learning_rate": 0.0005922872761603568,
      "loss": 2.986,
      "step": 16666
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7052972316741943,
      "learning_rate": 0.0005922863545557449,
      "loss": 3.3021,
      "step": 16667
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7733169794082642,
      "learning_rate": 0.0005922854328967915,
      "loss": 3.2198,
      "step": 16668
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.1043636798858643,
      "learning_rate": 0.0005922845111834966,
      "loss": 2.8376,
      "step": 16669
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9336804151535034,
      "learning_rate": 0.0005922835894158605,
      "loss": 3.0086,
      "step": 16670
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9850679636001587,
      "learning_rate": 0.0005922826675938836,
      "loss": 3.1656,
      "step": 16671
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0193378925323486,
      "learning_rate": 0.0005922817457175656,
      "loss": 3.2554,
      "step": 16672
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.934458613395691,
      "learning_rate": 0.000592280823786907,
      "loss": 3.2874,
      "step": 16673
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4229223728179932,
      "learning_rate": 0.0005922799018019078,
      "loss": 3.256,
      "step": 16674
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7476364374160767,
      "learning_rate": 0.0005922789797625683,
      "loss": 3.1241,
      "step": 16675
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8370404243469238,
      "learning_rate": 0.0005922780576688886,
      "loss": 3.0623,
      "step": 16676
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9847639799118042,
      "learning_rate": 0.0005922771355208689,
      "loss": 3.1222,
      "step": 16677
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.008692979812622,
      "learning_rate": 0.0005922762133185093,
      "loss": 2.9772,
      "step": 16678
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.1595447063446045,
      "learning_rate": 0.0005922752910618101,
      "loss": 2.9075,
      "step": 16679
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.4479856491088867,
      "learning_rate": 0.0005922743687507715,
      "loss": 3.1117,
      "step": 16680
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7649248838424683,
      "learning_rate": 0.0005922734463853934,
      "loss": 2.9642,
      "step": 16681
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7786564826965332,
      "learning_rate": 0.0005922725239656762,
      "loss": 3.0003,
      "step": 16682
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.7189300060272217,
      "learning_rate": 0.0005922716014916202,
      "loss": 3.2224,
      "step": 16683
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5572879314422607,
      "learning_rate": 0.0005922706789632252,
      "loss": 3.1771,
      "step": 16684
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.939007043838501,
      "learning_rate": 0.0005922697563804916,
      "loss": 3.2963,
      "step": 16685
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.589788794517517,
      "learning_rate": 0.0005922688337434195,
      "loss": 3.1573,
      "step": 16686
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.451314091682434,
      "learning_rate": 0.0005922679110520092,
      "loss": 2.9828,
      "step": 16687
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4399181604385376,
      "learning_rate": 0.0005922669883062608,
      "loss": 3.2311,
      "step": 16688
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7048289775848389,
      "learning_rate": 0.0005922660655061744,
      "loss": 3.1092,
      "step": 16689
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.48790442943573,
      "learning_rate": 0.0005922651426517501,
      "loss": 3.1151,
      "step": 16690
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3194118738174438,
      "learning_rate": 0.0005922642197429884,
      "loss": 3.2398,
      "step": 16691
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.374124526977539,
      "learning_rate": 0.0005922632967798891,
      "loss": 3.0432,
      "step": 16692
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5168266296386719,
      "learning_rate": 0.0005922623737624526,
      "loss": 3.1143,
      "step": 16693
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5829201936721802,
      "learning_rate": 0.0005922614506906791,
      "loss": 3.0501,
      "step": 16694
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.452816367149353,
      "learning_rate": 0.0005922605275645685,
      "loss": 3.1459,
      "step": 16695
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.841410517692566,
      "learning_rate": 0.0005922596043841212,
      "loss": 3.056,
      "step": 16696
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3609453439712524,
      "learning_rate": 0.0005922586811493374,
      "loss": 3.2053,
      "step": 16697
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7957022190093994,
      "learning_rate": 0.0005922577578602172,
      "loss": 3.1018,
      "step": 16698
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.578739881515503,
      "learning_rate": 0.0005922568345167607,
      "loss": 3.1188,
      "step": 16699
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5809730291366577,
      "learning_rate": 0.0005922559111189681,
      "loss": 3.0177,
      "step": 16700
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3440093994140625,
      "learning_rate": 0.0005922549876668398,
      "loss": 3.2915,
      "step": 16701
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6049177646636963,
      "learning_rate": 0.0005922540641603755,
      "loss": 2.9244,
      "step": 16702
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.663867473602295,
      "learning_rate": 0.0005922531405995759,
      "loss": 3.3167,
      "step": 16703
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.687689185142517,
      "learning_rate": 0.0005922522169844408,
      "loss": 3.2142,
      "step": 16704
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4267560243606567,
      "learning_rate": 0.0005922512933149705,
      "loss": 3.0143,
      "step": 16705
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.139371871948242,
      "learning_rate": 0.0005922503695911652,
      "loss": 3.1711,
      "step": 16706
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.471903681755066,
      "learning_rate": 0.000592249445813025,
      "loss": 3.1058,
      "step": 16707
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4925520420074463,
      "learning_rate": 0.0005922485219805502,
      "loss": 3.1526,
      "step": 16708
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8599772453308105,
      "learning_rate": 0.0005922475980937408,
      "loss": 2.9398,
      "step": 16709
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.721923828125,
      "learning_rate": 0.0005922466741525971,
      "loss": 2.8994,
      "step": 16710
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5567830801010132,
      "learning_rate": 0.0005922457501571192,
      "loss": 3.0436,
      "step": 16711
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.624688982963562,
      "learning_rate": 0.0005922448261073073,
      "loss": 3.1861,
      "step": 16712
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.034200668334961,
      "learning_rate": 0.0005922439020031615,
      "loss": 2.8806,
      "step": 16713
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.997937560081482,
      "learning_rate": 0.0005922429778446822,
      "loss": 3.1363,
      "step": 16714
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3734595775604248,
      "learning_rate": 0.0005922420536318693,
      "loss": 3.2278,
      "step": 16715
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.486311435699463,
      "learning_rate": 0.0005922411293647231,
      "loss": 2.9694,
      "step": 16716
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3653565645217896,
      "learning_rate": 0.0005922402050432438,
      "loss": 3.1235,
      "step": 16717
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.537615418434143,
      "learning_rate": 0.0005922392806674315,
      "loss": 3.1218,
      "step": 16718
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2645829916000366,
      "learning_rate": 0.0005922383562372864,
      "loss": 3.1201,
      "step": 16719
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3888218402862549,
      "learning_rate": 0.0005922374317528087,
      "loss": 3.2306,
      "step": 16720
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7918418645858765,
      "learning_rate": 0.0005922365072139986,
      "loss": 3.1003,
      "step": 16721
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.967315435409546,
      "learning_rate": 0.0005922355826208561,
      "loss": 3.3508,
      "step": 16722
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5097801685333252,
      "learning_rate": 0.0005922346579733815,
      "loss": 3.4137,
      "step": 16723
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4439587593078613,
      "learning_rate": 0.000592233733271575,
      "loss": 3.2896,
      "step": 16724
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3771803379058838,
      "learning_rate": 0.0005922328085154367,
      "loss": 3.077,
      "step": 16725
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.646152973175049,
      "learning_rate": 0.0005922318837049668,
      "loss": 3.1949,
      "step": 16726
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9765729904174805,
      "learning_rate": 0.0005922309588401656,
      "loss": 3.121,
      "step": 16727
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8831613063812256,
      "learning_rate": 0.000592230033921033,
      "loss": 3.1352,
      "step": 16728
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.888546347618103,
      "learning_rate": 0.0005922291089475693,
      "loss": 3.0758,
      "step": 16729
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.499959111213684,
      "learning_rate": 0.0005922281839197748,
      "loss": 3.1782,
      "step": 16730
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6146056652069092,
      "learning_rate": 0.0005922272588376495,
      "loss": 2.9805,
      "step": 16731
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4409812688827515,
      "learning_rate": 0.0005922263337011937,
      "loss": 3.0977,
      "step": 16732
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5772984027862549,
      "learning_rate": 0.0005922254085104075,
      "loss": 3.3028,
      "step": 16733
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8637828826904297,
      "learning_rate": 0.000592224483265291,
      "loss": 3.1252,
      "step": 16734
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6137608289718628,
      "learning_rate": 0.0005922235579658445,
      "loss": 2.8247,
      "step": 16735
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.552415132522583,
      "learning_rate": 0.0005922226326120682,
      "loss": 3.0987,
      "step": 16736
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7454941272735596,
      "learning_rate": 0.000592221707203962,
      "loss": 3.1342,
      "step": 16737
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6229429244995117,
      "learning_rate": 0.0005922207817415264,
      "loss": 3.177,
      "step": 16738
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8614702224731445,
      "learning_rate": 0.0005922198562247615,
      "loss": 3.4037,
      "step": 16739
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.018840789794922,
      "learning_rate": 0.0005922189306536673,
      "loss": 3.0004,
      "step": 16740
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8858742713928223,
      "learning_rate": 0.0005922180050282442,
      "loss": 3.0994,
      "step": 16741
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5720009803771973,
      "learning_rate": 0.0005922170793484922,
      "loss": 3.1436,
      "step": 16742
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6146794557571411,
      "learning_rate": 0.0005922161536144116,
      "loss": 3.1906,
      "step": 16743
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8274073600769043,
      "learning_rate": 0.0005922152278260023,
      "loss": 3.0694,
      "step": 16744
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7256264686584473,
      "learning_rate": 0.0005922143019832649,
      "loss": 3.1848,
      "step": 16745
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6464821100234985,
      "learning_rate": 0.0005922133760861993,
      "loss": 3.1268,
      "step": 16746
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4934310913085938,
      "learning_rate": 0.0005922124501348056,
      "loss": 3.16,
      "step": 16747
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.485064148902893,
      "learning_rate": 0.000592211524129084,
      "loss": 3.2501,
      "step": 16748
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4278465509414673,
      "learning_rate": 0.0005922105980690351,
      "loss": 3.1693,
      "step": 16749
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7494715452194214,
      "learning_rate": 0.0005922096719546585,
      "loss": 3.3628,
      "step": 16750
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7173434495925903,
      "learning_rate": 0.0005922087457859546,
      "loss": 2.9484,
      "step": 16751
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8211206197738647,
      "learning_rate": 0.0005922078195629236,
      "loss": 3.1544,
      "step": 16752
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5768473148345947,
      "learning_rate": 0.0005922068932855657,
      "loss": 3.069,
      "step": 16753
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3167139291763306,
      "learning_rate": 0.0005922059669538811,
      "loss": 3.1794,
      "step": 16754
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5357766151428223,
      "learning_rate": 0.0005922050405678696,
      "loss": 2.9958,
      "step": 16755
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7517757415771484,
      "learning_rate": 0.0005922041141275319,
      "loss": 3.2328,
      "step": 16756
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6125462055206299,
      "learning_rate": 0.0005922031876328679,
      "loss": 3.2086,
      "step": 16757
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0790157318115234,
      "learning_rate": 0.0005922022610838778,
      "loss": 3.3428,
      "step": 16758
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8439810276031494,
      "learning_rate": 0.0005922013344805618,
      "loss": 3.3215,
      "step": 16759
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4353731870651245,
      "learning_rate": 0.0005922004078229202,
      "loss": 2.9962,
      "step": 16760
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8320000171661377,
      "learning_rate": 0.0005921994811109528,
      "loss": 3.1611,
      "step": 16761
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4645599126815796,
      "learning_rate": 0.0005921985543446601,
      "loss": 3.1379,
      "step": 16762
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3432605266571045,
      "learning_rate": 0.0005921976275240421,
      "loss": 3.2303,
      "step": 16763
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8578753471374512,
      "learning_rate": 0.0005921967006490991,
      "loss": 2.9829,
      "step": 16764
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2567782402038574,
      "learning_rate": 0.0005921957737198313,
      "loss": 3.0885,
      "step": 16765
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7211061716079712,
      "learning_rate": 0.0005921948467362387,
      "loss": 3.1999,
      "step": 16766
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.538155198097229,
      "learning_rate": 0.0005921939196983215,
      "loss": 2.9302,
      "step": 16767
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7215362787246704,
      "learning_rate": 0.00059219299260608,
      "loss": 3.1551,
      "step": 16768
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4956936836242676,
      "learning_rate": 0.0005921920654595143,
      "loss": 3.1087,
      "step": 16769
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7152305841445923,
      "learning_rate": 0.0005921911382586247,
      "loss": 3.3088,
      "step": 16770
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6308764219284058,
      "learning_rate": 0.0005921902110034111,
      "loss": 3.258,
      "step": 16771
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7531089782714844,
      "learning_rate": 0.0005921892836938737,
      "loss": 3.0631,
      "step": 16772
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7626962661743164,
      "learning_rate": 0.0005921883563300131,
      "loss": 3.2211,
      "step": 16773
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4160048961639404,
      "learning_rate": 0.000592187428911829,
      "loss": 3.1521,
      "step": 16774
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.2854511737823486,
      "learning_rate": 0.0005921865014393219,
      "loss": 3.2112,
      "step": 16775
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.401714563369751,
      "learning_rate": 0.0005921855739124916,
      "loss": 3.3148,
      "step": 16776
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4106690883636475,
      "learning_rate": 0.0005921846463313386,
      "loss": 3.1677,
      "step": 16777
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7823656797409058,
      "learning_rate": 0.000592183718695863,
      "loss": 2.9901,
      "step": 16778
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9795560836791992,
      "learning_rate": 0.0005921827910060648,
      "loss": 3.1574,
      "step": 16779
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.625516653060913,
      "learning_rate": 0.0005921818632619444,
      "loss": 3.145,
      "step": 16780
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9164152145385742,
      "learning_rate": 0.0005921809354635018,
      "loss": 3.3155,
      "step": 16781
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.891940951347351,
      "learning_rate": 0.0005921800076107372,
      "loss": 3.295,
      "step": 16782
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4148463010787964,
      "learning_rate": 0.000592179079703651,
      "loss": 3.1679,
      "step": 16783
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8203999996185303,
      "learning_rate": 0.0005921781517422431,
      "loss": 2.9094,
      "step": 16784
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3191497325897217,
      "learning_rate": 0.0005921772237265138,
      "loss": 2.9424,
      "step": 16785
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.293136715888977,
      "learning_rate": 0.0005921762956564632,
      "loss": 2.8858,
      "step": 16786
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5103154182434082,
      "learning_rate": 0.0005921753675320916,
      "loss": 3.1836,
      "step": 16787
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.81968355178833,
      "learning_rate": 0.0005921744393533989,
      "loss": 2.9781,
      "step": 16788
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.666257381439209,
      "learning_rate": 0.0005921735111203856,
      "loss": 2.9868,
      "step": 16789
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.454820394515991,
      "learning_rate": 0.0005921725828330518,
      "loss": 3.0905,
      "step": 16790
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5873807668685913,
      "learning_rate": 0.0005921716544913974,
      "loss": 3.0593,
      "step": 16791
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3751842975616455,
      "learning_rate": 0.0005921707260954229,
      "loss": 3.1511,
      "step": 16792
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.713777780532837,
      "learning_rate": 0.0005921697976451283,
      "loss": 2.7997,
      "step": 16793
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3484680652618408,
      "learning_rate": 0.0005921688691405138,
      "loss": 2.8094,
      "step": 16794
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5562821626663208,
      "learning_rate": 0.0005921679405815796,
      "loss": 3.0225,
      "step": 16795
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7026455402374268,
      "learning_rate": 0.000592167011968326,
      "loss": 3.1763,
      "step": 16796
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.40083909034729,
      "learning_rate": 0.0005921660833007529,
      "loss": 3.2376,
      "step": 16797
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7442786693572998,
      "learning_rate": 0.0005921651545788606,
      "loss": 3.2272,
      "step": 16798
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6080251932144165,
      "learning_rate": 0.0005921642258026493,
      "loss": 3.1176,
      "step": 16799
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6338903903961182,
      "learning_rate": 0.0005921632969721192,
      "loss": 3.2124,
      "step": 16800
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7535868883132935,
      "learning_rate": 0.0005921623680872704,
      "loss": 3.1317,
      "step": 16801
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.151010036468506,
      "learning_rate": 0.0005921614391481032,
      "loss": 3.2429,
      "step": 16802
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5574415922164917,
      "learning_rate": 0.0005921605101546175,
      "loss": 3.0727,
      "step": 16803
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5545910596847534,
      "learning_rate": 0.0005921595811068137,
      "loss": 3.1202,
      "step": 16804
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5017669200897217,
      "learning_rate": 0.000592158652004692,
      "loss": 3.1785,
      "step": 16805
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5987826585769653,
      "learning_rate": 0.0005921577228482524,
      "loss": 3.0995,
      "step": 16806
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.30818772315979,
      "learning_rate": 0.0005921567936374953,
      "loss": 3.3079,
      "step": 16807
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.4418723583221436,
      "learning_rate": 0.0005921558643724206,
      "loss": 3.2685,
      "step": 16808
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6507654190063477,
      "learning_rate": 0.0005921549350530286,
      "loss": 3.057,
      "step": 16809
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9461451768875122,
      "learning_rate": 0.0005921540056793195,
      "loss": 3.1379,
      "step": 16810
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9692683219909668,
      "learning_rate": 0.0005921530762512936,
      "loss": 3.0264,
      "step": 16811
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.378082513809204,
      "learning_rate": 0.0005921521467689508,
      "loss": 3.4172,
      "step": 16812
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2787450551986694,
      "learning_rate": 0.0005921512172322915,
      "loss": 3.107,
      "step": 16813
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.070211887359619,
      "learning_rate": 0.0005921502876413157,
      "loss": 3.0097,
      "step": 16814
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0465140342712402,
      "learning_rate": 0.0005921493579960236,
      "loss": 3.4254,
      "step": 16815
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9956858158111572,
      "learning_rate": 0.0005921484282964154,
      "loss": 3.1598,
      "step": 16816
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5384855270385742,
      "learning_rate": 0.0005921474985424914,
      "loss": 3.1421,
      "step": 16817
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.2769172191619873,
      "learning_rate": 0.0005921465687342516,
      "loss": 3.2813,
      "step": 16818
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9942277669906616,
      "learning_rate": 0.0005921456388716963,
      "loss": 3.1375,
      "step": 16819
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.3197381496429443,
      "learning_rate": 0.0005921447089548255,
      "loss": 3.1017,
      "step": 16820
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.316091775894165,
      "learning_rate": 0.0005921437789836395,
      "loss": 3.3109,
      "step": 16821
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4158833026885986,
      "learning_rate": 0.0005921428489581385,
      "loss": 3.0718,
      "step": 16822
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4266375303268433,
      "learning_rate": 0.0005921419188783226,
      "loss": 3.3677,
      "step": 16823
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.88838791847229,
      "learning_rate": 0.0005921409887441921,
      "loss": 3.3692,
      "step": 16824
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7255629301071167,
      "learning_rate": 0.0005921400585557471,
      "loss": 3.1567,
      "step": 16825
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8942042589187622,
      "learning_rate": 0.0005921391283129876,
      "loss": 3.1767,
      "step": 16826
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.976540207862854,
      "learning_rate": 0.000592138198015914,
      "loss": 3.3415,
      "step": 16827
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5410538911819458,
      "learning_rate": 0.0005921372676645264,
      "loss": 3.2461,
      "step": 16828
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6684240102767944,
      "learning_rate": 0.0005921363372588249,
      "loss": 3.3368,
      "step": 16829
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2483960390090942,
      "learning_rate": 0.0005921354067988098,
      "loss": 3.1108,
      "step": 16830
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9046605825424194,
      "learning_rate": 0.0005921344762844812,
      "loss": 2.8853,
      "step": 16831
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6127122640609741,
      "learning_rate": 0.0005921335457158393,
      "loss": 3.1332,
      "step": 16832
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3061004877090454,
      "learning_rate": 0.0005921326150928842,
      "loss": 3.0785,
      "step": 16833
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.435431718826294,
      "learning_rate": 0.0005921316844156162,
      "loss": 3.5008,
      "step": 16834
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.407565712928772,
      "learning_rate": 0.0005921307536840354,
      "loss": 3.1347,
      "step": 16835
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0164272785186768,
      "learning_rate": 0.0005921298228981419,
      "loss": 3.088,
      "step": 16836
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.310520887374878,
      "learning_rate": 0.000592128892057936,
      "loss": 2.8713,
      "step": 16837
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7396962642669678,
      "learning_rate": 0.000592127961163418,
      "loss": 3.1149,
      "step": 16838
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.484449863433838,
      "learning_rate": 0.0005921270302145877,
      "loss": 3.2348,
      "step": 16839
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.304839849472046,
      "learning_rate": 0.0005921260992114455,
      "loss": 3.2846,
      "step": 16840
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.850606918334961,
      "learning_rate": 0.0005921251681539915,
      "loss": 3.0928,
      "step": 16841
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.7676033973693848,
      "learning_rate": 0.000592124237042226,
      "loss": 2.6808,
      "step": 16842
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9796775579452515,
      "learning_rate": 0.000592123305876149,
      "loss": 3.1417,
      "step": 16843
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.3560075759887695,
      "learning_rate": 0.0005921223746557609,
      "loss": 3.076,
      "step": 16844
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.036834239959717,
      "learning_rate": 0.0005921214433810616,
      "loss": 3.0033,
      "step": 16845
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.2393195629119873,
      "learning_rate": 0.0005921205120520515,
      "loss": 3.1675,
      "step": 16846
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.400977373123169,
      "learning_rate": 0.0005921195806687307,
      "loss": 3.1749,
      "step": 16847
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9396824836730957,
      "learning_rate": 0.0005921186492310992,
      "loss": 3.1086,
      "step": 16848
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.18839955329895,
      "learning_rate": 0.0005921177177391575,
      "loss": 3.0551,
      "step": 16849
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5114694833755493,
      "learning_rate": 0.0005921167861929055,
      "loss": 3.2236,
      "step": 16850
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.227661609649658,
      "learning_rate": 0.0005921158545923436,
      "loss": 3.0537,
      "step": 16851
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.490509033203125,
      "learning_rate": 0.0005921149229374717,
      "loss": 3.0744,
      "step": 16852
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5896000862121582,
      "learning_rate": 0.0005921139912282901,
      "loss": 2.8333,
      "step": 16853
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0210278034210205,
      "learning_rate": 0.0005921130594647991,
      "loss": 2.9794,
      "step": 16854
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5993274450302124,
      "learning_rate": 0.0005921121276469988,
      "loss": 3.0777,
      "step": 16855
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3338521718978882,
      "learning_rate": 0.0005921111957748892,
      "loss": 3.1241,
      "step": 16856
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9743305444717407,
      "learning_rate": 0.0005921102638484707,
      "loss": 3.0288,
      "step": 16857
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6644389629364014,
      "learning_rate": 0.0005921093318677435,
      "loss": 3.1762,
      "step": 16858
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5301672220230103,
      "learning_rate": 0.0005921083998327076,
      "loss": 3.0456,
      "step": 16859
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3720457553863525,
      "learning_rate": 0.0005921074677433631,
      "loss": 3.2698,
      "step": 16860
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6555836200714111,
      "learning_rate": 0.0005921065355997103,
      "loss": 2.8343,
      "step": 16861
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3771708011627197,
      "learning_rate": 0.0005921056034017496,
      "loss": 3.1192,
      "step": 16862
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4238431453704834,
      "learning_rate": 0.0005921046711494808,
      "loss": 2.9998,
      "step": 16863
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3394633531570435,
      "learning_rate": 0.0005921037388429042,
      "loss": 3.4027,
      "step": 16864
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4937983751296997,
      "learning_rate": 0.0005921028064820201,
      "loss": 3.1499,
      "step": 16865
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3187098503112793,
      "learning_rate": 0.0005921018740668285,
      "loss": 2.959,
      "step": 16866
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.988160252571106,
      "learning_rate": 0.0005921009415973298,
      "loss": 3.2264,
      "step": 16867
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.427727222442627,
      "learning_rate": 0.0005921000090735239,
      "loss": 2.9944,
      "step": 16868
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.811335802078247,
      "learning_rate": 0.000592099076495411,
      "loss": 3.0604,
      "step": 16869
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5895588397979736,
      "learning_rate": 0.0005920981438629915,
      "loss": 3.2196,
      "step": 16870
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4440679550170898,
      "learning_rate": 0.0005920972111762654,
      "loss": 2.9912,
      "step": 16871
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7880158424377441,
      "learning_rate": 0.0005920962784352328,
      "loss": 3.1915,
      "step": 16872
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.255049705505371,
      "learning_rate": 0.0005920953456398941,
      "loss": 3.2275,
      "step": 16873
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4925593137741089,
      "learning_rate": 0.0005920944127902494,
      "loss": 3.2837,
      "step": 16874
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5572644472122192,
      "learning_rate": 0.0005920934798862989,
      "loss": 3.2271,
      "step": 16875
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6411455869674683,
      "learning_rate": 0.0005920925469280426,
      "loss": 3.3046,
      "step": 16876
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.209716796875,
      "learning_rate": 0.0005920916139154808,
      "loss": 3.1328,
      "step": 16877
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2520196437835693,
      "learning_rate": 0.0005920906808486137,
      "loss": 2.9922,
      "step": 16878
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3456900119781494,
      "learning_rate": 0.0005920897477274413,
      "loss": 3.1996,
      "step": 16879
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3794182538986206,
      "learning_rate": 0.0005920888145519639,
      "loss": 3.072,
      "step": 16880
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4279937744140625,
      "learning_rate": 0.0005920878813221818,
      "loss": 3.2355,
      "step": 16881
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3626245260238647,
      "learning_rate": 0.000592086948038095,
      "loss": 3.09,
      "step": 16882
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8321763277053833,
      "learning_rate": 0.0005920860146997038,
      "loss": 3.1776,
      "step": 16883
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.414857268333435,
      "learning_rate": 0.0005920850813070082,
      "loss": 3.0746,
      "step": 16884
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6156156063079834,
      "learning_rate": 0.0005920841478600085,
      "loss": 2.9233,
      "step": 16885
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4911534786224365,
      "learning_rate": 0.0005920832143587048,
      "loss": 3.1439,
      "step": 16886
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5580846071243286,
      "learning_rate": 0.0005920822808030973,
      "loss": 3.2249,
      "step": 16887
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2965586185455322,
      "learning_rate": 0.0005920813471931864,
      "loss": 2.9114,
      "step": 16888
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4335664510726929,
      "learning_rate": 0.0005920804135289718,
      "loss": 2.7638,
      "step": 16889
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.641548752784729,
      "learning_rate": 0.0005920794798104541,
      "loss": 3.2281,
      "step": 16890
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2755825519561768,
      "learning_rate": 0.0005920785460376333,
      "loss": 2.9444,
      "step": 16891
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.511952519416809,
      "learning_rate": 0.0005920776122105095,
      "loss": 3.0695,
      "step": 16892
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5123374462127686,
      "learning_rate": 0.000592076678329083,
      "loss": 3.1757,
      "step": 16893
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4255316257476807,
      "learning_rate": 0.0005920757443933539,
      "loss": 3.1761,
      "step": 16894
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3448199033737183,
      "learning_rate": 0.0005920748104033224,
      "loss": 3.2044,
      "step": 16895
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4815114736557007,
      "learning_rate": 0.0005920738763589888,
      "loss": 3.3916,
      "step": 16896
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3896716833114624,
      "learning_rate": 0.0005920729422603531,
      "loss": 3.1212,
      "step": 16897
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.453033208847046,
      "learning_rate": 0.0005920720081074155,
      "loss": 3.1714,
      "step": 16898
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.597687005996704,
      "learning_rate": 0.0005920710739001761,
      "loss": 3.0134,
      "step": 16899
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6062685251235962,
      "learning_rate": 0.0005920701396386353,
      "loss": 3.1904,
      "step": 16900
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.374252200126648,
      "learning_rate": 0.0005920692053227931,
      "loss": 2.941,
      "step": 16901
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4376637935638428,
      "learning_rate": 0.0005920682709526498,
      "loss": 2.883,
      "step": 16902
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5463931560516357,
      "learning_rate": 0.0005920673365282054,
      "loss": 2.8895,
      "step": 16903
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5619317293167114,
      "learning_rate": 0.0005920664020494602,
      "loss": 2.8924,
      "step": 16904
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0553252696990967,
      "learning_rate": 0.0005920654675164144,
      "loss": 3.1554,
      "step": 16905
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.2450246810913086,
      "learning_rate": 0.0005920645329290681,
      "loss": 3.0806,
      "step": 16906
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2748135328292847,
      "learning_rate": 0.0005920635982874213,
      "loss": 3.171,
      "step": 16907
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6449023485183716,
      "learning_rate": 0.0005920626635914746,
      "loss": 3.0716,
      "step": 16908
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.83163321018219,
      "learning_rate": 0.0005920617288412279,
      "loss": 3.0624,
      "step": 16909
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5609337091445923,
      "learning_rate": 0.0005920607940366814,
      "loss": 3.2832,
      "step": 16910
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.582200288772583,
      "learning_rate": 0.0005920598591778351,
      "loss": 3.0667,
      "step": 16911
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7962266206741333,
      "learning_rate": 0.0005920589242646896,
      "loss": 3.2142,
      "step": 16912
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4990074634552002,
      "learning_rate": 0.0005920579892972448,
      "loss": 2.9992,
      "step": 16913
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0404000282287598,
      "learning_rate": 0.0005920570542755008,
      "loss": 3.1281,
      "step": 16914
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3554939031600952,
      "learning_rate": 0.0005920561191994579,
      "loss": 3.378,
      "step": 16915
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.623536229133606,
      "learning_rate": 0.0005920551840691164,
      "loss": 3.2624,
      "step": 16916
    },
    {
      "epoch": 0.22,
      "grad_norm": 4.200619220733643,
      "learning_rate": 0.0005920542488844761,
      "loss": 3.049,
      "step": 16917
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4229689836502075,
      "learning_rate": 0.0005920533136455375,
      "loss": 3.129,
      "step": 16918
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.795112133026123,
      "learning_rate": 0.0005920523783523008,
      "loss": 3.0451,
      "step": 16919
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7330824136734009,
      "learning_rate": 0.0005920514430047659,
      "loss": 2.8892,
      "step": 16920
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.606041669845581,
      "learning_rate": 0.0005920505076029332,
      "loss": 3.1833,
      "step": 16921
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.874685525894165,
      "learning_rate": 0.0005920495721468028,
      "loss": 3.1781,
      "step": 16922
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.53293776512146,
      "learning_rate": 0.0005920486366363748,
      "loss": 2.9539,
      "step": 16923
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.552201271057129,
      "learning_rate": 0.0005920477010716495,
      "loss": 3.2922,
      "step": 16924
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5822131633758545,
      "learning_rate": 0.000592046765452627,
      "loss": 3.2294,
      "step": 16925
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.953561782836914,
      "learning_rate": 0.0005920458297793075,
      "loss": 3.0522,
      "step": 16926
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.265796422958374,
      "learning_rate": 0.0005920448940516911,
      "loss": 3.0584,
      "step": 16927
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9434161186218262,
      "learning_rate": 0.0005920439582697781,
      "loss": 3.0384,
      "step": 16928
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.2003912925720215,
      "learning_rate": 0.0005920430224335687,
      "loss": 3.2318,
      "step": 16929
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0594284534454346,
      "learning_rate": 0.0005920420865430628,
      "loss": 3.2007,
      "step": 16930
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6341460943222046,
      "learning_rate": 0.0005920411505982609,
      "loss": 3.0765,
      "step": 16931
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8783727884292603,
      "learning_rate": 0.000592040214599163,
      "loss": 3.2168,
      "step": 16932
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.554078459739685,
      "learning_rate": 0.0005920392785457694,
      "loss": 3.1603,
      "step": 16933
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3665685653686523,
      "learning_rate": 0.0005920383424380801,
      "loss": 2.9844,
      "step": 16934
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8167030811309814,
      "learning_rate": 0.0005920374062760954,
      "loss": 3.0259,
      "step": 16935
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9647077322006226,
      "learning_rate": 0.0005920364700598153,
      "loss": 2.8632,
      "step": 16936
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.405478835105896,
      "learning_rate": 0.0005920355337892402,
      "loss": 3.1182,
      "step": 16937
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3954602479934692,
      "learning_rate": 0.0005920345974643703,
      "loss": 3.1685,
      "step": 16938
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5401362180709839,
      "learning_rate": 0.0005920336610852055,
      "loss": 2.8264,
      "step": 16939
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3129444122314453,
      "learning_rate": 0.0005920327246517462,
      "loss": 2.9936,
      "step": 16940
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.441462755203247,
      "learning_rate": 0.0005920317881639924,
      "loss": 2.9902,
      "step": 16941
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6755528450012207,
      "learning_rate": 0.0005920308516219445,
      "loss": 3.1812,
      "step": 16942
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5327861309051514,
      "learning_rate": 0.0005920299150256025,
      "loss": 3.0155,
      "step": 16943
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3052254915237427,
      "learning_rate": 0.0005920289783749666,
      "loss": 3.1566,
      "step": 16944
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3253759145736694,
      "learning_rate": 0.0005920280416700371,
      "loss": 2.7769,
      "step": 16945
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.097721576690674,
      "learning_rate": 0.0005920271049108139,
      "loss": 3.3341,
      "step": 16946
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.029639959335327,
      "learning_rate": 0.0005920261680972975,
      "loss": 3.0275,
      "step": 16947
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7374489307403564,
      "learning_rate": 0.000592025231229488,
      "loss": 3.2144,
      "step": 16948
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.841665029525757,
      "learning_rate": 0.0005920242943073853,
      "loss": 2.9732,
      "step": 16949
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.912048816680908,
      "learning_rate": 0.0005920233573309899,
      "loss": 3.0917,
      "step": 16950
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6828726530075073,
      "learning_rate": 0.0005920224203003018,
      "loss": 3.0869,
      "step": 16951
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.513989806175232,
      "learning_rate": 0.0005920214832153211,
      "loss": 3.0261,
      "step": 16952
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6164456605911255,
      "learning_rate": 0.0005920205460760483,
      "loss": 3.1495,
      "step": 16953
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2263338565826416,
      "learning_rate": 0.0005920196088824832,
      "loss": 3.2143,
      "step": 16954
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4153497219085693,
      "learning_rate": 0.0005920186716346263,
      "loss": 3.0541,
      "step": 16955
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.81244957447052,
      "learning_rate": 0.0005920177343324776,
      "loss": 3.0807,
      "step": 16956
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.068300485610962,
      "learning_rate": 0.0005920167969760371,
      "loss": 3.2393,
      "step": 16957
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.234151840209961,
      "learning_rate": 0.0005920158595653053,
      "loss": 3.327,
      "step": 16958
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.806039571762085,
      "learning_rate": 0.0005920149221002823,
      "loss": 3.0896,
      "step": 16959
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3879996538162231,
      "learning_rate": 0.0005920139845809681,
      "loss": 3.0506,
      "step": 16960
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3687249422073364,
      "learning_rate": 0.000592013047007363,
      "loss": 3.4549,
      "step": 16961
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9154709577560425,
      "learning_rate": 0.0005920121093794672,
      "loss": 2.965,
      "step": 16962
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4164130687713623,
      "learning_rate": 0.0005920111716972809,
      "loss": 3.1255,
      "step": 16963
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3903366327285767,
      "learning_rate": 0.0005920102339608042,
      "loss": 2.8926,
      "step": 16964
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8954180479049683,
      "learning_rate": 0.0005920092961700372,
      "loss": 3.2616,
      "step": 16965
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6786426305770874,
      "learning_rate": 0.0005920083583249801,
      "loss": 3.1392,
      "step": 16966
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5325429439544678,
      "learning_rate": 0.0005920074204256333,
      "loss": 3.1413,
      "step": 16967
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.456064224243164,
      "learning_rate": 0.0005920064824719967,
      "loss": 2.9382,
      "step": 16968
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.169625997543335,
      "learning_rate": 0.0005920055444640707,
      "loss": 3.3135,
      "step": 16969
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.1936488151550293,
      "learning_rate": 0.0005920046064018552,
      "loss": 2.9491,
      "step": 16970
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6330511569976807,
      "learning_rate": 0.0005920036682853506,
      "loss": 3.1708,
      "step": 16971
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.0633888244628906,
      "learning_rate": 0.000592002730114557,
      "loss": 2.9715,
      "step": 16972
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.253584146499634,
      "learning_rate": 0.0005920017918894747,
      "loss": 3.1685,
      "step": 16973
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9255702495574951,
      "learning_rate": 0.0005920008536101036,
      "loss": 2.878,
      "step": 16974
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3656649589538574,
      "learning_rate": 0.0005919999152764441,
      "loss": 3.281,
      "step": 16975
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.453970193862915,
      "learning_rate": 0.0005919989768884962,
      "loss": 3.2261,
      "step": 16976
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0644710063934326,
      "learning_rate": 0.0005919980384462603,
      "loss": 2.9849,
      "step": 16977
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9117705821990967,
      "learning_rate": 0.0005919970999497363,
      "loss": 3.2531,
      "step": 16978
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.1034181118011475,
      "learning_rate": 0.0005919961613989247,
      "loss": 3.0457,
      "step": 16979
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0353033542633057,
      "learning_rate": 0.0005919952227938255,
      "loss": 3.2133,
      "step": 16980
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6143414974212646,
      "learning_rate": 0.0005919942841344387,
      "loss": 3.0939,
      "step": 16981
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4011045694351196,
      "learning_rate": 0.0005919933454207649,
      "loss": 2.9606,
      "step": 16982
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0818068981170654,
      "learning_rate": 0.0005919924066528038,
      "loss": 2.8789,
      "step": 16983
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7256208658218384,
      "learning_rate": 0.0005919914678305559,
      "loss": 3.2671,
      "step": 16984
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.405864953994751,
      "learning_rate": 0.0005919905289540213,
      "loss": 2.9714,
      "step": 16985
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7001194953918457,
      "learning_rate": 0.0005919895900232001,
      "loss": 3.2227,
      "step": 16986
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.855128288269043,
      "learning_rate": 0.0005919886510380926,
      "loss": 2.9394,
      "step": 16987
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5750486850738525,
      "learning_rate": 0.0005919877119986987,
      "loss": 3.0607,
      "step": 16988
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4980504512786865,
      "learning_rate": 0.000591986772905019,
      "loss": 3.2214,
      "step": 16989
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.1770095825195312,
      "learning_rate": 0.0005919858337570534,
      "loss": 3.0636,
      "step": 16990
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4234589338302612,
      "learning_rate": 0.000591984894554802,
      "loss": 3.0893,
      "step": 16991
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9157028198242188,
      "learning_rate": 0.0005919839552982652,
      "loss": 3.2221,
      "step": 16992
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4127061367034912,
      "learning_rate": 0.0005919830159874431,
      "loss": 2.939,
      "step": 16993
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.744917392730713,
      "learning_rate": 0.0005919820766223358,
      "loss": 2.9076,
      "step": 16994
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.482417345046997,
      "learning_rate": 0.0005919811372029434,
      "loss": 3.4473,
      "step": 16995
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4959967136383057,
      "learning_rate": 0.0005919801977292663,
      "loss": 3.0215,
      "step": 16996
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6922146081924438,
      "learning_rate": 0.0005919792582013046,
      "loss": 3.0334,
      "step": 16997
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5199114084243774,
      "learning_rate": 0.0005919783186190585,
      "loss": 3.1949,
      "step": 16998
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6478440761566162,
      "learning_rate": 0.000591977378982528,
      "loss": 3.2685,
      "step": 16999
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6768938302993774,
      "learning_rate": 0.0005919764392917134,
      "loss": 3.0168,
      "step": 17000
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.346006393432617,
      "learning_rate": 0.0005919754995466149,
      "loss": 3.4037,
      "step": 17001
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4266492128372192,
      "learning_rate": 0.0005919745597472327,
      "loss": 3.2397,
      "step": 17002
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3483874797821045,
      "learning_rate": 0.0005919736198935669,
      "loss": 3.2324,
      "step": 17003
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.461227297782898,
      "learning_rate": 0.0005919726799856176,
      "loss": 3.0319,
      "step": 17004
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5313037633895874,
      "learning_rate": 0.0005919717400233852,
      "loss": 3.1685,
      "step": 17005
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5987306833267212,
      "learning_rate": 0.0005919708000068696,
      "loss": 2.9426,
      "step": 17006
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7096346616744995,
      "learning_rate": 0.0005919698599360712,
      "loss": 3.0056,
      "step": 17007
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3963700532913208,
      "learning_rate": 0.0005919689198109901,
      "loss": 3.0996,
      "step": 17008
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.1648597717285156,
      "learning_rate": 0.0005919679796316265,
      "loss": 3.0634,
      "step": 17009
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8311446905136108,
      "learning_rate": 0.0005919670393979803,
      "loss": 3.1434,
      "step": 17010
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5638738870620728,
      "learning_rate": 0.0005919660991100522,
      "loss": 3.0526,
      "step": 17011
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3194329738616943,
      "learning_rate": 0.000591965158767842,
      "loss": 3.1616,
      "step": 17012
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.759946584701538,
      "learning_rate": 0.0005919642183713499,
      "loss": 3.1115,
      "step": 17013
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.5087718963623047,
      "learning_rate": 0.0005919632779205762,
      "loss": 2.9153,
      "step": 17014
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8704155683517456,
      "learning_rate": 0.000591962337415521,
      "loss": 3.4658,
      "step": 17015
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.3921799659729004,
      "learning_rate": 0.0005919613968561845,
      "loss": 3.1093,
      "step": 17016
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.180872917175293,
      "learning_rate": 0.0005919604562425669,
      "loss": 3.0001,
      "step": 17017
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4832268953323364,
      "learning_rate": 0.0005919595155746683,
      "loss": 3.0976,
      "step": 17018
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5817571878433228,
      "learning_rate": 0.0005919585748524889,
      "loss": 2.8985,
      "step": 17019
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6045080423355103,
      "learning_rate": 0.0005919576340760289,
      "loss": 3.0209,
      "step": 17020
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3706979751586914,
      "learning_rate": 0.0005919566932452885,
      "loss": 3.1083,
      "step": 17021
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5298045873641968,
      "learning_rate": 0.0005919557523602679,
      "loss": 3.0371,
      "step": 17022
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6566529273986816,
      "learning_rate": 0.0005919548114209671,
      "loss": 3.0281,
      "step": 17023
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6694291830062866,
      "learning_rate": 0.0005919538704273864,
      "loss": 3.1184,
      "step": 17024
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.50905179977417,
      "learning_rate": 0.000591952929379526,
      "loss": 3.2921,
      "step": 17025
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0401220321655273,
      "learning_rate": 0.000591951988277386,
      "loss": 3.1693,
      "step": 17026
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.5681586265563965,
      "learning_rate": 0.0005919510471209666,
      "loss": 3.0406,
      "step": 17027
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5340907573699951,
      "learning_rate": 0.000591950105910268,
      "loss": 2.9292,
      "step": 17028
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8132084608078003,
      "learning_rate": 0.0005919491646452904,
      "loss": 3.1312,
      "step": 17029
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6196197271347046,
      "learning_rate": 0.000591948223326034,
      "loss": 3.0179,
      "step": 17030
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.474969744682312,
      "learning_rate": 0.0005919472819524988,
      "loss": 3.2605,
      "step": 17031
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.752602219581604,
      "learning_rate": 0.0005919463405246852,
      "loss": 3.0548,
      "step": 17032
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6148393154144287,
      "learning_rate": 0.0005919453990425932,
      "loss": 3.2068,
      "step": 17033
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0908892154693604,
      "learning_rate": 0.000591944457506223,
      "loss": 2.869,
      "step": 17034
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7988284826278687,
      "learning_rate": 0.0005919435159155748,
      "loss": 3.372,
      "step": 17035
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6280734539031982,
      "learning_rate": 0.0005919425742706489,
      "loss": 3.0409,
      "step": 17036
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3233261108398438,
      "learning_rate": 0.0005919416325714452,
      "loss": 3.1721,
      "step": 17037
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3827465772628784,
      "learning_rate": 0.0005919406908179642,
      "loss": 3.0799,
      "step": 17038
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9245543479919434,
      "learning_rate": 0.0005919397490102058,
      "loss": 2.8169,
      "step": 17039
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4863604307174683,
      "learning_rate": 0.0005919388071481704,
      "loss": 3.4387,
      "step": 17040
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3765066862106323,
      "learning_rate": 0.000591937865231858,
      "loss": 3.2616,
      "step": 17041
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3093539476394653,
      "learning_rate": 0.0005919369232612689,
      "loss": 3.2049,
      "step": 17042
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5263087749481201,
      "learning_rate": 0.0005919359812364033,
      "loss": 3.3023,
      "step": 17043
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.331466794013977,
      "learning_rate": 0.000591935039157261,
      "loss": 3.1669,
      "step": 17044
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4065756797790527,
      "learning_rate": 0.0005919340970238426,
      "loss": 3.1889,
      "step": 17045
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7647885084152222,
      "learning_rate": 0.0005919331548361482,
      "loss": 2.9476,
      "step": 17046
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2082794904708862,
      "learning_rate": 0.0005919322125941779,
      "loss": 2.8599,
      "step": 17047
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.4299275875091553,
      "learning_rate": 0.0005919312702979319,
      "loss": 3.1343,
      "step": 17048
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7744420766830444,
      "learning_rate": 0.0005919303279474103,
      "loss": 3.1919,
      "step": 17049
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.843138337135315,
      "learning_rate": 0.0005919293855426134,
      "loss": 3.402,
      "step": 17050
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6022027730941772,
      "learning_rate": 0.0005919284430835412,
      "loss": 3.166,
      "step": 17051
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.556603193283081,
      "learning_rate": 0.0005919275005701941,
      "loss": 2.9996,
      "step": 17052
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3680974245071411,
      "learning_rate": 0.0005919265580025722,
      "loss": 3.2127,
      "step": 17053
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6263445615768433,
      "learning_rate": 0.0005919256153806757,
      "loss": 2.8861,
      "step": 17054
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.535919427871704,
      "learning_rate": 0.0005919246727045046,
      "loss": 3.1755,
      "step": 17055
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4602590799331665,
      "learning_rate": 0.0005919237299740591,
      "loss": 2.9434,
      "step": 17056
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4372265338897705,
      "learning_rate": 0.0005919227871893396,
      "loss": 2.9681,
      "step": 17057
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6711676120758057,
      "learning_rate": 0.0005919218443503462,
      "loss": 3.2382,
      "step": 17058
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.837446689605713,
      "learning_rate": 0.0005919209014570789,
      "loss": 3.2157,
      "step": 17059
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5364612340927124,
      "learning_rate": 0.000591919958509538,
      "loss": 3.3425,
      "step": 17060
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4880481958389282,
      "learning_rate": 0.0005919190155077238,
      "loss": 3.1243,
      "step": 17061
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4556251764297485,
      "learning_rate": 0.0005919180724516361,
      "loss": 2.9285,
      "step": 17062
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.590053677558899,
      "learning_rate": 0.0005919171293412756,
      "loss": 2.982,
      "step": 17063
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4946460723876953,
      "learning_rate": 0.000591916186176642,
      "loss": 2.9828,
      "step": 17064
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8238351345062256,
      "learning_rate": 0.0005919152429577358,
      "loss": 3.0686,
      "step": 17065
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7787063121795654,
      "learning_rate": 0.0005919142996845569,
      "loss": 2.964,
      "step": 17066
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.076650857925415,
      "learning_rate": 0.0005919133563571057,
      "loss": 3.2229,
      "step": 17067
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7709110975265503,
      "learning_rate": 0.0005919124129753823,
      "loss": 2.7657,
      "step": 17068
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5991101264953613,
      "learning_rate": 0.000591911469539387,
      "loss": 3.1334,
      "step": 17069
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5810564756393433,
      "learning_rate": 0.0005919105260491196,
      "loss": 3.0875,
      "step": 17070
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3207942247390747,
      "learning_rate": 0.0005919095825045806,
      "loss": 3.1435,
      "step": 17071
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7867648601531982,
      "learning_rate": 0.0005919086389057702,
      "loss": 2.9472,
      "step": 17072
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4721400737762451,
      "learning_rate": 0.0005919076952526885,
      "loss": 2.9844,
      "step": 17073
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.556397557258606,
      "learning_rate": 0.0005919067515453355,
      "loss": 3.1713,
      "step": 17074
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7161518335342407,
      "learning_rate": 0.0005919058077837115,
      "loss": 3.0147,
      "step": 17075
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4297055006027222,
      "learning_rate": 0.0005919048639678169,
      "loss": 2.9938,
      "step": 17076
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.1654951572418213,
      "learning_rate": 0.0005919039200976516,
      "loss": 3.2385,
      "step": 17077
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5123584270477295,
      "learning_rate": 0.0005919029761732158,
      "loss": 2.9464,
      "step": 17078
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0347683429718018,
      "learning_rate": 0.0005919020321945097,
      "loss": 3.2868,
      "step": 17079
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6112751960754395,
      "learning_rate": 0.0005919010881615336,
      "loss": 3.3126,
      "step": 17080
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.440100073814392,
      "learning_rate": 0.0005919001440742875,
      "loss": 3.1517,
      "step": 17081
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7822544574737549,
      "learning_rate": 0.0005918991999327716,
      "loss": 3.1102,
      "step": 17082
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.3993725776672363,
      "learning_rate": 0.0005918982557369863,
      "loss": 2.9249,
      "step": 17083
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8281763792037964,
      "learning_rate": 0.0005918973114869315,
      "loss": 3.0248,
      "step": 17084
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.417275905609131,
      "learning_rate": 0.0005918963671826074,
      "loss": 2.9005,
      "step": 17085
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.813281774520874,
      "learning_rate": 0.0005918954228240144,
      "loss": 3.0062,
      "step": 17086
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9067399501800537,
      "learning_rate": 0.0005918944784111524,
      "loss": 3.1928,
      "step": 17087
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6786892414093018,
      "learning_rate": 0.0005918935339440218,
      "loss": 3.3308,
      "step": 17088
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7752609252929688,
      "learning_rate": 0.0005918925894226227,
      "loss": 3.1888,
      "step": 17089
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.545586109161377,
      "learning_rate": 0.0005918916448469551,
      "loss": 3.1778,
      "step": 17090
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6485495567321777,
      "learning_rate": 0.0005918907002170195,
      "loss": 3.1475,
      "step": 17091
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.976637840270996,
      "learning_rate": 0.0005918897555328158,
      "loss": 3.2266,
      "step": 17092
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.0815608501434326,
      "learning_rate": 0.0005918888107943445,
      "loss": 2.8676,
      "step": 17093
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5745679140090942,
      "learning_rate": 0.0005918878660016053,
      "loss": 2.9885,
      "step": 17094
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.29205322265625,
      "learning_rate": 0.0005918869211545988,
      "loss": 3.238,
      "step": 17095
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.349407196044922,
      "learning_rate": 0.0005918859762533249,
      "loss": 3.0169,
      "step": 17096
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7200958728790283,
      "learning_rate": 0.0005918850312977839,
      "loss": 3.0088,
      "step": 17097
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8978291749954224,
      "learning_rate": 0.0005918840862879761,
      "loss": 3.0135,
      "step": 17098
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9443608522415161,
      "learning_rate": 0.0005918831412239015,
      "loss": 2.9424,
      "step": 17099
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4211633205413818,
      "learning_rate": 0.0005918821961055601,
      "loss": 3.074,
      "step": 17100
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3926738500595093,
      "learning_rate": 0.0005918812509329525,
      "loss": 3.1091,
      "step": 17101
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5817006826400757,
      "learning_rate": 0.0005918803057060786,
      "loss": 3.1133,
      "step": 17102
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7008520364761353,
      "learning_rate": 0.0005918793604249387,
      "loss": 3.0828,
      "step": 17103
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.410706639289856,
      "learning_rate": 0.000591878415089533,
      "loss": 3.0257,
      "step": 17104
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6885169744491577,
      "learning_rate": 0.0005918774696998613,
      "loss": 3.0671,
      "step": 17105
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6226181983947754,
      "learning_rate": 0.0005918765242559243,
      "loss": 3.1696,
      "step": 17106
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2715668678283691,
      "learning_rate": 0.0005918755787577219,
      "loss": 2.9959,
      "step": 17107
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.223617672920227,
      "learning_rate": 0.0005918746332052542,
      "loss": 3.099,
      "step": 17108
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4017218351364136,
      "learning_rate": 0.0005918736875985217,
      "loss": 3.2901,
      "step": 17109
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.5295920372009277,
      "learning_rate": 0.0005918727419375242,
      "loss": 3.2214,
      "step": 17110
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.559393048286438,
      "learning_rate": 0.0005918717962222621,
      "loss": 2.8588,
      "step": 17111
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7804895639419556,
      "learning_rate": 0.0005918708504527356,
      "loss": 2.9452,
      "step": 17112
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8103668689727783,
      "learning_rate": 0.0005918699046289446,
      "loss": 3.0637,
      "step": 17113
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7821102142333984,
      "learning_rate": 0.0005918689587508897,
      "loss": 3.0857,
      "step": 17114
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3339813947677612,
      "learning_rate": 0.0005918680128185708,
      "loss": 3.2024,
      "step": 17115
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9522634744644165,
      "learning_rate": 0.000591867066831988,
      "loss": 3.3336,
      "step": 17116
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8135151863098145,
      "learning_rate": 0.0005918661207911418,
      "loss": 2.8835,
      "step": 17117
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9559063911437988,
      "learning_rate": 0.000591865174696032,
      "loss": 2.9009,
      "step": 17118
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6677396297454834,
      "learning_rate": 0.000591864228546659,
      "loss": 3.0325,
      "step": 17119
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.9095306396484375,
      "learning_rate": 0.0005918632823430231,
      "loss": 3.0076,
      "step": 17120
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4958924055099487,
      "learning_rate": 0.0005918623360851241,
      "loss": 3.3531,
      "step": 17121
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8463921546936035,
      "learning_rate": 0.0005918613897729624,
      "loss": 3.2513,
      "step": 17122
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.064864158630371,
      "learning_rate": 0.0005918604434065382,
      "loss": 3.2977,
      "step": 17123
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4808870553970337,
      "learning_rate": 0.0005918594969858517,
      "loss": 3.216,
      "step": 17124
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.643702507019043,
      "learning_rate": 0.0005918585505109029,
      "loss": 2.9991,
      "step": 17125
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.1717264652252197,
      "learning_rate": 0.0005918576039816921,
      "loss": 3.1195,
      "step": 17126
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4263397455215454,
      "learning_rate": 0.0005918566573982195,
      "loss": 3.0005,
      "step": 17127
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5307255983352661,
      "learning_rate": 0.0005918557107604853,
      "loss": 2.9887,
      "step": 17128
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7224986553192139,
      "learning_rate": 0.0005918547640684895,
      "loss": 3.3451,
      "step": 17129
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.380838394165039,
      "learning_rate": 0.0005918538173222324,
      "loss": 3.225,
      "step": 17130
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.749377965927124,
      "learning_rate": 0.0005918528705217143,
      "loss": 3.0374,
      "step": 17131
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5053579807281494,
      "learning_rate": 0.0005918519236669352,
      "loss": 3.021,
      "step": 17132
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4558143615722656,
      "learning_rate": 0.0005918509767578953,
      "loss": 3.3396,
      "step": 17133
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.621826171875,
      "learning_rate": 0.0005918500297945946,
      "loss": 3.1441,
      "step": 17134
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3164784908294678,
      "learning_rate": 0.0005918490827770337,
      "loss": 3.1095,
      "step": 17135
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6908050775527954,
      "learning_rate": 0.0005918481357052125,
      "loss": 3.1847,
      "step": 17136
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.219071865081787,
      "learning_rate": 0.0005918471885791312,
      "loss": 2.9543,
      "step": 17137
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3510645627975464,
      "learning_rate": 0.00059184624139879,
      "loss": 3.0668,
      "step": 17138
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.0744335651397705,
      "learning_rate": 0.0005918452941641891,
      "loss": 2.8286,
      "step": 17139
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.2167341709136963,
      "learning_rate": 0.0005918443468753285,
      "loss": 3.124,
      "step": 17140
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6733790636062622,
      "learning_rate": 0.0005918433995322087,
      "loss": 3.2215,
      "step": 17141
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4328548908233643,
      "learning_rate": 0.0005918424521348297,
      "loss": 3.2194,
      "step": 17142
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8209763765335083,
      "learning_rate": 0.0005918415046831916,
      "loss": 3.1728,
      "step": 17143
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.498451590538025,
      "learning_rate": 0.0005918405571772947,
      "loss": 3.0745,
      "step": 17144
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.499693512916565,
      "learning_rate": 0.000591839609617139,
      "loss": 3.2491,
      "step": 17145
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4620563983917236,
      "learning_rate": 0.0005918386620027251,
      "loss": 3.1287,
      "step": 17146
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.531898856163025,
      "learning_rate": 0.0005918377143340526,
      "loss": 3.2381,
      "step": 17147
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.436322569847107,
      "learning_rate": 0.0005918367666111221,
      "loss": 3.1815,
      "step": 17148
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3934670686721802,
      "learning_rate": 0.0005918358188339336,
      "loss": 3.2195,
      "step": 17149
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1882065534591675,
      "learning_rate": 0.0005918348710024873,
      "loss": 2.8977,
      "step": 17150
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6387012004852295,
      "learning_rate": 0.0005918339231167834,
      "loss": 3.1344,
      "step": 17151
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3954029083251953,
      "learning_rate": 0.0005918329751768221,
      "loss": 3.0252,
      "step": 17152
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4560370445251465,
      "learning_rate": 0.0005918320271826035,
      "loss": 3.2745,
      "step": 17153
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.688616394996643,
      "learning_rate": 0.0005918310791341279,
      "loss": 3.238,
      "step": 17154
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5376893281936646,
      "learning_rate": 0.0005918301310313953,
      "loss": 3.152,
      "step": 17155
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9817804098129272,
      "learning_rate": 0.000591829182874406,
      "loss": 3.139,
      "step": 17156
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.1109976768493652,
      "learning_rate": 0.0005918282346631602,
      "loss": 3.0204,
      "step": 17157
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9531543254852295,
      "learning_rate": 0.0005918272863976579,
      "loss": 3.1361,
      "step": 17158
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3860729932785034,
      "learning_rate": 0.0005918263380778995,
      "loss": 2.7742,
      "step": 17159
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.1300208568573,
      "learning_rate": 0.000591825389703885,
      "loss": 3.2769,
      "step": 17160
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4230073690414429,
      "learning_rate": 0.0005918244412756147,
      "loss": 3.3735,
      "step": 17161
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.6305112838745117,
      "learning_rate": 0.0005918234927930887,
      "loss": 3.1378,
      "step": 17162
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.428578495979309,
      "learning_rate": 0.0005918225442563073,
      "loss": 3.4798,
      "step": 17163
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8495186567306519,
      "learning_rate": 0.0005918215956652704,
      "loss": 3.0461,
      "step": 17164
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4327877759933472,
      "learning_rate": 0.0005918206470199786,
      "loss": 3.0069,
      "step": 17165
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4945049285888672,
      "learning_rate": 0.0005918196983204316,
      "loss": 3.3307,
      "step": 17166
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7771204710006714,
      "learning_rate": 0.0005918187495666298,
      "loss": 3.1614,
      "step": 17167
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.672764778137207,
      "learning_rate": 0.0005918178007585736,
      "loss": 2.9523,
      "step": 17168
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3883730173110962,
      "learning_rate": 0.0005918168518962628,
      "loss": 3.2353,
      "step": 17169
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7152129411697388,
      "learning_rate": 0.0005918159029796978,
      "loss": 3.2195,
      "step": 17170
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.593691349029541,
      "learning_rate": 0.0005918149540088787,
      "loss": 3.0957,
      "step": 17171
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2967298030853271,
      "learning_rate": 0.0005918140049838058,
      "loss": 3.2083,
      "step": 17172
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4689254760742188,
      "learning_rate": 0.000591813055904479,
      "loss": 3.0879,
      "step": 17173
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2866950035095215,
      "learning_rate": 0.0005918121067708987,
      "loss": 3.1627,
      "step": 17174
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.724016547203064,
      "learning_rate": 0.000591811157583065,
      "loss": 3.0859,
      "step": 17175
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5656342506408691,
      "learning_rate": 0.0005918102083409782,
      "loss": 3.161,
      "step": 17176
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9459315538406372,
      "learning_rate": 0.0005918092590446383,
      "loss": 3.0913,
      "step": 17177
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.633735179901123,
      "learning_rate": 0.0005918083096940456,
      "loss": 3.0988,
      "step": 17178
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5687673091888428,
      "learning_rate": 0.0005918073602892,
      "loss": 3.1622,
      "step": 17179
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.922724723815918,
      "learning_rate": 0.0005918064108301022,
      "loss": 3.1167,
      "step": 17180
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5476632118225098,
      "learning_rate": 0.000591805461316752,
      "loss": 3.149,
      "step": 17181
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.56820547580719,
      "learning_rate": 0.0005918045117491495,
      "loss": 2.9825,
      "step": 17182
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4905931949615479,
      "learning_rate": 0.0005918035621272951,
      "loss": 3.4364,
      "step": 17183
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4193353652954102,
      "learning_rate": 0.000591802612451189,
      "loss": 2.7837,
      "step": 17184
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.1751327514648438,
      "learning_rate": 0.0005918016627208313,
      "loss": 3.3519,
      "step": 17185
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.418501853942871,
      "learning_rate": 0.0005918007129362221,
      "loss": 3.1889,
      "step": 17186
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2832697629928589,
      "learning_rate": 0.0005917997630973616,
      "loss": 2.9956,
      "step": 17187
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.497217059135437,
      "learning_rate": 0.0005917988132042501,
      "loss": 3.2292,
      "step": 17188
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3336405754089355,
      "learning_rate": 0.0005917978632568877,
      "loss": 3.1504,
      "step": 17189
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6679705381393433,
      "learning_rate": 0.0005917969132552745,
      "loss": 3.2723,
      "step": 17190
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.703692078590393,
      "learning_rate": 0.0005917959631994107,
      "loss": 3.2473,
      "step": 17191
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4282904863357544,
      "learning_rate": 0.0005917950130892967,
      "loss": 3.3012,
      "step": 17192
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.3098392486572266,
      "learning_rate": 0.0005917940629249324,
      "loss": 2.8205,
      "step": 17193
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5577389001846313,
      "learning_rate": 0.000591793112706318,
      "loss": 3.2153,
      "step": 17194
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8247216939926147,
      "learning_rate": 0.0005917921624334539,
      "loss": 2.969,
      "step": 17195
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.88225519657135,
      "learning_rate": 0.00059179121210634,
      "loss": 3.0895,
      "step": 17196
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5501753091812134,
      "learning_rate": 0.0005917902617249767,
      "loss": 3.4661,
      "step": 17197
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6891478300094604,
      "learning_rate": 0.000591789311289364,
      "loss": 3.0747,
      "step": 17198
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7171179056167603,
      "learning_rate": 0.0005917883607995021,
      "loss": 3.0156,
      "step": 17199
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4032847881317139,
      "learning_rate": 0.0005917874102553914,
      "loss": 3.1432,
      "step": 17200
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.34684419631958,
      "learning_rate": 0.0005917864596570319,
      "loss": 3.2015,
      "step": 17201
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2546862363815308,
      "learning_rate": 0.0005917855090044237,
      "loss": 3.2158,
      "step": 17202
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3516778945922852,
      "learning_rate": 0.0005917845582975672,
      "loss": 3.1213,
      "step": 17203
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.693900465965271,
      "learning_rate": 0.0005917836075364622,
      "loss": 3.3649,
      "step": 17204
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.354849934577942,
      "learning_rate": 0.0005917826567211092,
      "loss": 2.9841,
      "step": 17205
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7567107677459717,
      "learning_rate": 0.0005917817058515085,
      "loss": 3.3304,
      "step": 17206
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8956841230392456,
      "learning_rate": 0.0005917807549276599,
      "loss": 3.1052,
      "step": 17207
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5341624021530151,
      "learning_rate": 0.0005917798039495638,
      "loss": 3.2177,
      "step": 17208
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4459202289581299,
      "learning_rate": 0.0005917788529172203,
      "loss": 3.1472,
      "step": 17209
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5044935941696167,
      "learning_rate": 0.0005917779018306296,
      "loss": 3.3411,
      "step": 17210
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.858833909034729,
      "learning_rate": 0.0005917769506897919,
      "loss": 3.1768,
      "step": 17211
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4025663137435913,
      "learning_rate": 0.0005917759994947074,
      "loss": 3.2279,
      "step": 17212
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3173946142196655,
      "learning_rate": 0.0005917750482453761,
      "loss": 3.2356,
      "step": 17213
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6698113679885864,
      "learning_rate": 0.0005917740969417984,
      "loss": 3.0856,
      "step": 17214
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4664111137390137,
      "learning_rate": 0.0005917731455839744,
      "loss": 3.098,
      "step": 17215
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6869845390319824,
      "learning_rate": 0.0005917721941719043,
      "loss": 3.4283,
      "step": 17216
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3660322427749634,
      "learning_rate": 0.0005917712427055882,
      "loss": 3.1546,
      "step": 17217
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4992750883102417,
      "learning_rate": 0.0005917702911850263,
      "loss": 3.0532,
      "step": 17218
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4915989637374878,
      "learning_rate": 0.0005917693396102188,
      "loss": 3.1353,
      "step": 17219
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5152511596679688,
      "learning_rate": 0.0005917683879811659,
      "loss": 3.3566,
      "step": 17220
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5661906003952026,
      "learning_rate": 0.0005917674362978677,
      "loss": 3.2672,
      "step": 17221
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7307027578353882,
      "learning_rate": 0.0005917664845603245,
      "loss": 3.0772,
      "step": 17222
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.601211667060852,
      "learning_rate": 0.0005917655327685363,
      "loss": 3.2315,
      "step": 17223
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5575737953186035,
      "learning_rate": 0.0005917645809225034,
      "loss": 3.0597,
      "step": 17224
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4496146440505981,
      "learning_rate": 0.0005917636290222259,
      "loss": 3.3169,
      "step": 17225
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5674076080322266,
      "learning_rate": 0.0005917626770677042,
      "loss": 2.9752,
      "step": 17226
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5869731903076172,
      "learning_rate": 0.0005917617250589382,
      "loss": 2.9318,
      "step": 17227
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3910940885543823,
      "learning_rate": 0.0005917607729959282,
      "loss": 3.1102,
      "step": 17228
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8678820133209229,
      "learning_rate": 0.0005917598208786745,
      "loss": 2.9418,
      "step": 17229
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.653791069984436,
      "learning_rate": 0.000591758868707177,
      "loss": 3.2259,
      "step": 17230
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3946280479431152,
      "learning_rate": 0.000591757916481436,
      "loss": 2.9781,
      "step": 17231
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.899325966835022,
      "learning_rate": 0.0005917569642014518,
      "loss": 3.4278,
      "step": 17232
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.191437005996704,
      "learning_rate": 0.0005917560118672245,
      "loss": 3.0727,
      "step": 17233
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9996418952941895,
      "learning_rate": 0.0005917550594787541,
      "loss": 2.9942,
      "step": 17234
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6695613861083984,
      "learning_rate": 0.000591754107036041,
      "loss": 3.0735,
      "step": 17235
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2831478118896484,
      "learning_rate": 0.0005917531545390853,
      "loss": 2.9764,
      "step": 17236
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.779529333114624,
      "learning_rate": 0.0005917522019878871,
      "loss": 3.0356,
      "step": 17237
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5135469436645508,
      "learning_rate": 0.0005917512493824468,
      "loss": 3.1608,
      "step": 17238
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5805763006210327,
      "learning_rate": 0.0005917502967227644,
      "loss": 3.2398,
      "step": 17239
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6313345432281494,
      "learning_rate": 0.0005917493440088401,
      "loss": 3.078,
      "step": 17240
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.714183807373047,
      "learning_rate": 0.000591748391240674,
      "loss": 3.1845,
      "step": 17241
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5697990655899048,
      "learning_rate": 0.0005917474384182664,
      "loss": 2.9306,
      "step": 17242
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6675382852554321,
      "learning_rate": 0.0005917464855416175,
      "loss": 2.8734,
      "step": 17243
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.565660834312439,
      "learning_rate": 0.0005917455326107275,
      "loss": 3.0785,
      "step": 17244
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.020502805709839,
      "learning_rate": 0.0005917445796255964,
      "loss": 3.1481,
      "step": 17245
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6196632385253906,
      "learning_rate": 0.0005917436265862245,
      "loss": 3.2661,
      "step": 17246
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6641048192977905,
      "learning_rate": 0.0005917426734926119,
      "loss": 3.1511,
      "step": 17247
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4272046089172363,
      "learning_rate": 0.0005917417203447588,
      "loss": 3.1528,
      "step": 17248
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5117216110229492,
      "learning_rate": 0.0005917407671426655,
      "loss": 3.1999,
      "step": 17249
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5099705457687378,
      "learning_rate": 0.0005917398138863321,
      "loss": 3.1375,
      "step": 17250
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4032423496246338,
      "learning_rate": 0.0005917388605757587,
      "loss": 3.1283,
      "step": 17251
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4558926820755005,
      "learning_rate": 0.0005917379072109455,
      "loss": 2.9306,
      "step": 17252
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.747505784034729,
      "learning_rate": 0.0005917369537918928,
      "loss": 2.9082,
      "step": 17253
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.3602328300476074,
      "learning_rate": 0.0005917360003186007,
      "loss": 2.9966,
      "step": 17254
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6571593284606934,
      "learning_rate": 0.0005917350467910693,
      "loss": 3.1746,
      "step": 17255
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5063000917434692,
      "learning_rate": 0.0005917340932092989,
      "loss": 3.0211,
      "step": 17256
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4216384887695312,
      "learning_rate": 0.0005917331395732896,
      "loss": 3.1775,
      "step": 17257
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.4419184923171997,
      "learning_rate": 0.0005917321858830417,
      "loss": 2.8758,
      "step": 17258
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.349490761756897,
      "learning_rate": 0.0005917312321385551,
      "loss": 3.3866,
      "step": 17259
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6950918436050415,
      "learning_rate": 0.0005917302783398303,
      "loss": 3.0118,
      "step": 17260
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.2017815113067627,
      "learning_rate": 0.0005917293244868672,
      "loss": 3.3032,
      "step": 17261
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6469157934188843,
      "learning_rate": 0.0005917283705796663,
      "loss": 3.0644,
      "step": 17262
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.7311877012252808,
      "learning_rate": 0.0005917274166182274,
      "loss": 3.1123,
      "step": 17263
    },
    {
      "epoch": 0.22,
      "grad_norm": 4.016190528869629,
      "learning_rate": 0.000591726462602551,
      "loss": 2.9828,
      "step": 17264
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.6402082443237305,
      "learning_rate": 0.000591725508532637,
      "loss": 3.2689,
      "step": 17265
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9820713996887207,
      "learning_rate": 0.0005917245544084859,
      "loss": 3.3446,
      "step": 17266
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.372812032699585,
      "learning_rate": 0.0005917236002300976,
      "loss": 2.9191,
      "step": 17267
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.102323055267334,
      "learning_rate": 0.0005917226459974723,
      "loss": 3.0401,
      "step": 17268
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.987363338470459,
      "learning_rate": 0.0005917216917106105,
      "loss": 3.035,
      "step": 17269
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9213552474975586,
      "learning_rate": 0.0005917207373695119,
      "loss": 2.9589,
      "step": 17270
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.9689886569976807,
      "learning_rate": 0.000591719782974177,
      "loss": 3.0925,
      "step": 17271
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.1689679622650146,
      "learning_rate": 0.0005917188285246059,
      "loss": 3.3393,
      "step": 17272
    },
    {
      "epoch": 0.22,
      "grad_norm": 4.393572807312012,
      "learning_rate": 0.0005917178740207987,
      "loss": 2.9906,
      "step": 17273
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.5035759210586548,
      "learning_rate": 0.0005917169194627557,
      "loss": 2.9794,
      "step": 17274
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.6976585388183594,
      "learning_rate": 0.000591715964850477,
      "loss": 3.42,
      "step": 17275
    },
    {
      "epoch": 0.22,
      "grad_norm": 3.4102416038513184,
      "learning_rate": 0.0005917150101839628,
      "loss": 3.1196,
      "step": 17276
    },
    {
      "epoch": 0.22,
      "grad_norm": 2.202735185623169,
      "learning_rate": 0.0005917140554632133,
      "loss": 3.3205,
      "step": 17277
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.708165168762207,
      "learning_rate": 0.0005917131006882285,
      "loss": 2.9156,
      "step": 17278
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.986480951309204,
      "learning_rate": 0.0005917121458590089,
      "loss": 3.1632,
      "step": 17279
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.8291029930114746,
      "learning_rate": 0.0005917111909755545,
      "loss": 3.1996,
      "step": 17280
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4751217365264893,
      "learning_rate": 0.0005917102360378653,
      "loss": 2.9716,
      "step": 17281
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5192629098892212,
      "learning_rate": 0.0005917092810459419,
      "loss": 3.1165,
      "step": 17282
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8667389154434204,
      "learning_rate": 0.0005917083259997841,
      "loss": 3.0837,
      "step": 17283
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6294270753860474,
      "learning_rate": 0.0005917073708993922,
      "loss": 3.0621,
      "step": 17284
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5707745552062988,
      "learning_rate": 0.0005917064157447664,
      "loss": 2.9572,
      "step": 17285
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6019017696380615,
      "learning_rate": 0.0005917054605359069,
      "loss": 3.0586,
      "step": 17286
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0930585861206055,
      "learning_rate": 0.0005917045052728139,
      "loss": 3.0547,
      "step": 17287
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9748210906982422,
      "learning_rate": 0.0005917035499554875,
      "loss": 2.7178,
      "step": 17288
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3694899082183838,
      "learning_rate": 0.0005917025945839278,
      "loss": 3.1374,
      "step": 17289
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7980656623840332,
      "learning_rate": 0.0005917016391581352,
      "loss": 3.0473,
      "step": 17290
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7054908275604248,
      "learning_rate": 0.0005917006836781097,
      "loss": 3.0467,
      "step": 17291
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3906996250152588,
      "learning_rate": 0.0005916997281438515,
      "loss": 3.2232,
      "step": 17292
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.783613920211792,
      "learning_rate": 0.0005916987725553609,
      "loss": 3.0489,
      "step": 17293
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4712713956832886,
      "learning_rate": 0.000591697816912638,
      "loss": 3.4033,
      "step": 17294
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3264409303665161,
      "learning_rate": 0.0005916968612156829,
      "loss": 3.0309,
      "step": 17295
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4487924575805664,
      "learning_rate": 0.000591695905464496,
      "loss": 3.0698,
      "step": 17296
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.494370698928833,
      "learning_rate": 0.0005916949496590771,
      "loss": 3.1147,
      "step": 17297
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5239593982696533,
      "learning_rate": 0.0005916939937994268,
      "loss": 3.163,
      "step": 17298
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5925531387329102,
      "learning_rate": 0.000591693037885545,
      "loss": 3.1059,
      "step": 17299
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8825880289077759,
      "learning_rate": 0.0005916920819174319,
      "loss": 3.2044,
      "step": 17300
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7209638357162476,
      "learning_rate": 0.0005916911258950878,
      "loss": 3.3401,
      "step": 17301
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4135568141937256,
      "learning_rate": 0.0005916901698185127,
      "loss": 3.3699,
      "step": 17302
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4341354370117188,
      "learning_rate": 0.0005916892136877071,
      "loss": 3.0324,
      "step": 17303
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.638229489326477,
      "learning_rate": 0.0005916882575026708,
      "loss": 3.3019,
      "step": 17304
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7293763160705566,
      "learning_rate": 0.0005916873012634042,
      "loss": 3.4899,
      "step": 17305
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5876420736312866,
      "learning_rate": 0.0005916863449699075,
      "loss": 3.2771,
      "step": 17306
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2292388677597046,
      "learning_rate": 0.0005916853886221808,
      "loss": 3.0412,
      "step": 17307
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6943995952606201,
      "learning_rate": 0.0005916844322202241,
      "loss": 3.2223,
      "step": 17308
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7522270679473877,
      "learning_rate": 0.0005916834757640379,
      "loss": 3.1075,
      "step": 17309
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7837998867034912,
      "learning_rate": 0.0005916825192536221,
      "loss": 3.0913,
      "step": 17310
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8850524425506592,
      "learning_rate": 0.0005916815626889773,
      "loss": 3.4014,
      "step": 17311
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5508919954299927,
      "learning_rate": 0.0005916806060701032,
      "loss": 3.3739,
      "step": 17312
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8819067478179932,
      "learning_rate": 0.000591679649397,
      "loss": 3.0435,
      "step": 17313
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0096707344055176,
      "learning_rate": 0.0005916786926696684,
      "loss": 3.1254,
      "step": 17314
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3117252588272095,
      "learning_rate": 0.000591677735888108,
      "loss": 3.0346,
      "step": 17315
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5611822605133057,
      "learning_rate": 0.0005916767790523193,
      "loss": 2.909,
      "step": 17316
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3964543342590332,
      "learning_rate": 0.0005916758221623023,
      "loss": 3.264,
      "step": 17317
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4800608158111572,
      "learning_rate": 0.0005916748652180574,
      "loss": 3.082,
      "step": 17318
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6946810483932495,
      "learning_rate": 0.0005916739082195845,
      "loss": 3.1598,
      "step": 17319
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9346743822097778,
      "learning_rate": 0.000591672951166884,
      "loss": 3.147,
      "step": 17320
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6179803609848022,
      "learning_rate": 0.0005916719940599559,
      "loss": 3.0905,
      "step": 17321
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6603354215621948,
      "learning_rate": 0.0005916710368988004,
      "loss": 3.2484,
      "step": 17322
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6582839488983154,
      "learning_rate": 0.0005916700796834179,
      "loss": 3.0822,
      "step": 17323
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.573279619216919,
      "learning_rate": 0.0005916691224138084,
      "loss": 3.3635,
      "step": 17324
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.225353479385376,
      "learning_rate": 0.0005916681650899721,
      "loss": 3.2014,
      "step": 17325
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.556204915046692,
      "learning_rate": 0.0005916672077119092,
      "loss": 3.0479,
      "step": 17326
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3724901676177979,
      "learning_rate": 0.0005916662502796198,
      "loss": 2.9786,
      "step": 17327
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3991936445236206,
      "learning_rate": 0.0005916652927931042,
      "loss": 3.1394,
      "step": 17328
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6982253789901733,
      "learning_rate": 0.0005916643352523625,
      "loss": 3.2382,
      "step": 17329
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.690515160560608,
      "learning_rate": 0.0005916633776573949,
      "loss": 3.1607,
      "step": 17330
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5690158605575562,
      "learning_rate": 0.0005916624200082015,
      "loss": 3.4404,
      "step": 17331
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3433245420455933,
      "learning_rate": 0.0005916614623047826,
      "loss": 3.1838,
      "step": 17332
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4359207153320312,
      "learning_rate": 0.0005916605045471384,
      "loss": 3.0665,
      "step": 17333
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4348466396331787,
      "learning_rate": 0.000591659546735269,
      "loss": 3.4921,
      "step": 17334
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.70407235622406,
      "learning_rate": 0.0005916585888691745,
      "loss": 3.2007,
      "step": 17335
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9028668403625488,
      "learning_rate": 0.0005916576309488551,
      "loss": 3.0525,
      "step": 17336
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8700188398361206,
      "learning_rate": 0.0005916566729743111,
      "loss": 2.8737,
      "step": 17337
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4875633716583252,
      "learning_rate": 0.0005916557149455427,
      "loss": 2.9777,
      "step": 17338
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7782396078109741,
      "learning_rate": 0.00059165475686255,
      "loss": 3.193,
      "step": 17339
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.463617205619812,
      "learning_rate": 0.0005916537987253331,
      "loss": 2.9503,
      "step": 17340
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.551024079322815,
      "learning_rate": 0.0005916528405338922,
      "loss": 2.9676,
      "step": 17341
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6141167879104614,
      "learning_rate": 0.0005916518822882277,
      "loss": 2.954,
      "step": 17342
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7066270112991333,
      "learning_rate": 0.0005916509239883395,
      "loss": 3.0105,
      "step": 17343
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7095903158187866,
      "learning_rate": 0.0005916499656342279,
      "loss": 3.1765,
      "step": 17344
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.803470492362976,
      "learning_rate": 0.000591649007225893,
      "loss": 3.0441,
      "step": 17345
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.2477641105651855,
      "learning_rate": 0.0005916480487633352,
      "loss": 2.979,
      "step": 17346
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8101190328598022,
      "learning_rate": 0.0005916470902465544,
      "loss": 3.2179,
      "step": 17347
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.532077670097351,
      "learning_rate": 0.0005916461316755509,
      "loss": 3.1988,
      "step": 17348
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4451260566711426,
      "learning_rate": 0.000591645173050325,
      "loss": 3.1407,
      "step": 17349
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.646013617515564,
      "learning_rate": 0.0005916442143708766,
      "loss": 3.0858,
      "step": 17350
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.6278131008148193,
      "learning_rate": 0.0005916432556372062,
      "loss": 3.1075,
      "step": 17351
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.840846300125122,
      "learning_rate": 0.0005916422968493137,
      "loss": 3.2399,
      "step": 17352
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.306086540222168,
      "learning_rate": 0.0005916413380071993,
      "loss": 3.0787,
      "step": 17353
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8784984350204468,
      "learning_rate": 0.0005916403791108634,
      "loss": 3.0852,
      "step": 17354
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3719077110290527,
      "learning_rate": 0.0005916394201603061,
      "loss": 2.8893,
      "step": 17355
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4553009271621704,
      "learning_rate": 0.0005916384611555275,
      "loss": 3.1138,
      "step": 17356
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9701555967330933,
      "learning_rate": 0.0005916375020965278,
      "loss": 3.2835,
      "step": 17357
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6645129919052124,
      "learning_rate": 0.0005916365429833071,
      "loss": 3.0729,
      "step": 17358
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8347874879837036,
      "learning_rate": 0.0005916355838158657,
      "loss": 2.9837,
      "step": 17359
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1399893760681152,
      "learning_rate": 0.0005916346245942037,
      "loss": 3.2319,
      "step": 17360
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8518010377883911,
      "learning_rate": 0.0005916336653183214,
      "loss": 3.2919,
      "step": 17361
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2986944913864136,
      "learning_rate": 0.0005916327059882189,
      "loss": 3.1393,
      "step": 17362
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.339664340019226,
      "learning_rate": 0.0005916317466038963,
      "loss": 3.2847,
      "step": 17363
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.885960578918457,
      "learning_rate": 0.000591630787165354,
      "loss": 3.1896,
      "step": 17364
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5168055295944214,
      "learning_rate": 0.0005916298276725919,
      "loss": 3.1461,
      "step": 17365
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6002265214920044,
      "learning_rate": 0.0005916288681256103,
      "loss": 3.1555,
      "step": 17366
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6245921850204468,
      "learning_rate": 0.0005916279085244094,
      "loss": 3.2255,
      "step": 17367
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.662017822265625,
      "learning_rate": 0.0005916269488689894,
      "loss": 3.0891,
      "step": 17368
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.255137324333191,
      "learning_rate": 0.0005916259891593504,
      "loss": 3.0051,
      "step": 17369
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.947392225265503,
      "learning_rate": 0.0005916250293954927,
      "loss": 3.403,
      "step": 17370
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.490099310874939,
      "learning_rate": 0.0005916240695774163,
      "loss": 3.2359,
      "step": 17371
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4269517660140991,
      "learning_rate": 0.0005916231097051216,
      "loss": 3.1475,
      "step": 17372
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3393175601959229,
      "learning_rate": 0.0005916221497786086,
      "loss": 3.2101,
      "step": 17373
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5311954021453857,
      "learning_rate": 0.0005916211897978776,
      "loss": 3.1068,
      "step": 17374
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5214494466781616,
      "learning_rate": 0.0005916202297629286,
      "loss": 3.2702,
      "step": 17375
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3260047435760498,
      "learning_rate": 0.0005916192696737619,
      "loss": 3.2136,
      "step": 17376
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6178714036941528,
      "learning_rate": 0.0005916183095303778,
      "loss": 2.9661,
      "step": 17377
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7842427492141724,
      "learning_rate": 0.0005916173493327763,
      "loss": 3.129,
      "step": 17378
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6004457473754883,
      "learning_rate": 0.0005916163890809576,
      "loss": 2.9979,
      "step": 17379
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3083093166351318,
      "learning_rate": 0.0005916154287749219,
      "loss": 3.1585,
      "step": 17380
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.585892915725708,
      "learning_rate": 0.0005916144684146693,
      "loss": 3.0163,
      "step": 17381
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.379342794418335,
      "learning_rate": 0.0005916135080002002,
      "loss": 3.0489,
      "step": 17382
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7209023237228394,
      "learning_rate": 0.0005916125475315146,
      "loss": 3.2808,
      "step": 17383
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.2550578117370605,
      "learning_rate": 0.0005916115870086127,
      "loss": 3.1002,
      "step": 17384
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3198258876800537,
      "learning_rate": 0.0005916106264314948,
      "loss": 3.0255,
      "step": 17385
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.820507764816284,
      "learning_rate": 0.000591609665800161,
      "loss": 3.1343,
      "step": 17386
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.626098155975342,
      "learning_rate": 0.0005916087051146113,
      "loss": 3.0769,
      "step": 17387
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5606656074523926,
      "learning_rate": 0.0005916077443748461,
      "loss": 2.9394,
      "step": 17388
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2609115839004517,
      "learning_rate": 0.0005916067835808655,
      "loss": 3.13,
      "step": 17389
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.265723705291748,
      "learning_rate": 0.0005916058227326698,
      "loss": 3.1367,
      "step": 17390
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.3791961669921875,
      "learning_rate": 0.000591604861830259,
      "loss": 2.9492,
      "step": 17391
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7478033304214478,
      "learning_rate": 0.0005916039008736333,
      "loss": 3.2008,
      "step": 17392
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.690829038619995,
      "learning_rate": 0.000591602939862793,
      "loss": 3.0799,
      "step": 17393
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.405332088470459,
      "learning_rate": 0.0005916019787977381,
      "loss": 2.8994,
      "step": 17394
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1421566009521484,
      "learning_rate": 0.000591601017678469,
      "loss": 2.9917,
      "step": 17395
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1059608459472656,
      "learning_rate": 0.0005916000565049859,
      "loss": 3.0649,
      "step": 17396
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.8289144039154053,
      "learning_rate": 0.0005915990952772885,
      "loss": 3.1175,
      "step": 17397
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.6289544105529785,
      "learning_rate": 0.0005915981339953775,
      "loss": 3.2398,
      "step": 17398
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.714855670928955,
      "learning_rate": 0.0005915971726592529,
      "loss": 2.9179,
      "step": 17399
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4774583578109741,
      "learning_rate": 0.0005915962112689149,
      "loss": 3.2457,
      "step": 17400
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.367999792098999,
      "learning_rate": 0.0005915952498243638,
      "loss": 3.2648,
      "step": 17401
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4313619136810303,
      "learning_rate": 0.0005915942883255995,
      "loss": 3.0134,
      "step": 17402
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.598008155822754,
      "learning_rate": 0.0005915933267726222,
      "loss": 3.0437,
      "step": 17403
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.452197551727295,
      "learning_rate": 0.0005915923651654323,
      "loss": 3.1594,
      "step": 17404
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.585632085800171,
      "learning_rate": 0.0005915914035040299,
      "loss": 3.4023,
      "step": 17405
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5895391702651978,
      "learning_rate": 0.0005915904417884152,
      "loss": 3.3164,
      "step": 17406
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4296804666519165,
      "learning_rate": 0.0005915894800185882,
      "loss": 3.124,
      "step": 17407
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5347412824630737,
      "learning_rate": 0.0005915885181945492,
      "loss": 3.1041,
      "step": 17408
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.423977255821228,
      "learning_rate": 0.0005915875563162986,
      "loss": 3.1601,
      "step": 17409
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6520490646362305,
      "learning_rate": 0.0005915865943838362,
      "loss": 3.162,
      "step": 17410
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3636151552200317,
      "learning_rate": 0.0005915856323971624,
      "loss": 2.9376,
      "step": 17411
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3780810832977295,
      "learning_rate": 0.0005915846703562772,
      "loss": 3.2815,
      "step": 17412
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8119919300079346,
      "learning_rate": 0.0005915837082611812,
      "loss": 3.0292,
      "step": 17413
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8179553747177124,
      "learning_rate": 0.000591582746111874,
      "loss": 3.0684,
      "step": 17414
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5357471704483032,
      "learning_rate": 0.0005915817839083562,
      "loss": 3.2476,
      "step": 17415
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4002231359481812,
      "learning_rate": 0.0005915808216506277,
      "loss": 3.3768,
      "step": 17416
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2236394882202148,
      "learning_rate": 0.000591579859338689,
      "loss": 3.1151,
      "step": 17417
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.4795477390289307,
      "learning_rate": 0.00059157889697254,
      "loss": 3.1823,
      "step": 17418
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4763423204421997,
      "learning_rate": 0.000591577934552181,
      "loss": 3.0212,
      "step": 17419
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7590490579605103,
      "learning_rate": 0.0005915769720776122,
      "loss": 2.9628,
      "step": 17420
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5084971189498901,
      "learning_rate": 0.0005915760095488337,
      "loss": 3.3619,
      "step": 17421
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4197099208831787,
      "learning_rate": 0.0005915750469658458,
      "loss": 3.2514,
      "step": 17422
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.623565673828125,
      "learning_rate": 0.0005915740843286485,
      "loss": 3.0516,
      "step": 17423
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1596012115478516,
      "learning_rate": 0.000591573121637242,
      "loss": 3.0602,
      "step": 17424
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.44714093208313,
      "learning_rate": 0.0005915721588916267,
      "loss": 3.0118,
      "step": 17425
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6016972064971924,
      "learning_rate": 0.0005915711960918026,
      "loss": 3.2635,
      "step": 17426
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1638567447662354,
      "learning_rate": 0.0005915702332377698,
      "loss": 3.204,
      "step": 17427
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8006731271743774,
      "learning_rate": 0.0005915692703295287,
      "loss": 3.1223,
      "step": 17428
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7795723676681519,
      "learning_rate": 0.0005915683073670794,
      "loss": 2.9436,
      "step": 17429
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7274960279464722,
      "learning_rate": 0.000591567344350422,
      "loss": 3.501,
      "step": 17430
    },
    {
      "epoch": 0.23,
      "grad_norm": 4.720673561096191,
      "learning_rate": 0.0005915663812795567,
      "loss": 2.9194,
      "step": 17431
    },
    {
      "epoch": 0.23,
      "grad_norm": 3.6495795249938965,
      "learning_rate": 0.0005915654181544836,
      "loss": 3.0446,
      "step": 17432
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6366844177246094,
      "learning_rate": 0.0005915644549752032,
      "loss": 3.0757,
      "step": 17433
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.668713331222534,
      "learning_rate": 0.0005915634917417154,
      "loss": 3.1924,
      "step": 17434
    },
    {
      "epoch": 0.23,
      "grad_norm": 4.899190902709961,
      "learning_rate": 0.0005915625284540204,
      "loss": 3.2485,
      "step": 17435
    },
    {
      "epoch": 0.23,
      "grad_norm": 3.3352317810058594,
      "learning_rate": 0.0005915615651121183,
      "loss": 3.187,
      "step": 17436
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7434070110321045,
      "learning_rate": 0.0005915606017160097,
      "loss": 3.156,
      "step": 17437
    },
    {
      "epoch": 0.23,
      "grad_norm": 3.713975191116333,
      "learning_rate": 0.0005915596382656943,
      "loss": 3.1374,
      "step": 17438
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.7197837829589844,
      "learning_rate": 0.0005915586747611725,
      "loss": 3.1933,
      "step": 17439
    },
    {
      "epoch": 0.23,
      "grad_norm": 3.0636773109436035,
      "learning_rate": 0.0005915577112024444,
      "loss": 2.8025,
      "step": 17440
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7129929065704346,
      "learning_rate": 0.0005915567475895102,
      "loss": 3.1249,
      "step": 17441
    },
    {
      "epoch": 0.23,
      "grad_norm": 3.2377705574035645,
      "learning_rate": 0.0005915557839223702,
      "loss": 2.9686,
      "step": 17442
    },
    {
      "epoch": 0.23,
      "grad_norm": 3.3213226795196533,
      "learning_rate": 0.0005915548202010244,
      "loss": 3.141,
      "step": 17443
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5263558626174927,
      "learning_rate": 0.000591553856425473,
      "loss": 3.117,
      "step": 17444
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.251042366027832,
      "learning_rate": 0.0005915528925957163,
      "loss": 3.3289,
      "step": 17445
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.179663896560669,
      "learning_rate": 0.0005915519287117545,
      "loss": 2.7277,
      "step": 17446
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.2993457317352295,
      "learning_rate": 0.0005915509647735876,
      "loss": 3.0866,
      "step": 17447
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0736875534057617,
      "learning_rate": 0.0005915500007812158,
      "loss": 3.0346,
      "step": 17448
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7348873615264893,
      "learning_rate": 0.0005915490367346394,
      "loss": 2.9621,
      "step": 17449
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1089229583740234,
      "learning_rate": 0.0005915480726338587,
      "loss": 3.1592,
      "step": 17450
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1404407024383545,
      "learning_rate": 0.0005915471084788734,
      "loss": 3.152,
      "step": 17451
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7442957162857056,
      "learning_rate": 0.0005915461442696843,
      "loss": 3.0222,
      "step": 17452
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.7098491191864014,
      "learning_rate": 0.0005915451800062911,
      "loss": 3.1151,
      "step": 17453
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.673359990119934,
      "learning_rate": 0.0005915442156886942,
      "loss": 3.12,
      "step": 17454
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1895785331726074,
      "learning_rate": 0.0005915432513168937,
      "loss": 3.1194,
      "step": 17455
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.624281644821167,
      "learning_rate": 0.0005915422868908898,
      "loss": 3.1502,
      "step": 17456
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4778578281402588,
      "learning_rate": 0.0005915413224106827,
      "loss": 3.063,
      "step": 17457
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3222839832305908,
      "learning_rate": 0.0005915403578762724,
      "loss": 3.0897,
      "step": 17458
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.385117530822754,
      "learning_rate": 0.0005915393932876595,
      "loss": 3.1074,
      "step": 17459
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6782712936401367,
      "learning_rate": 0.0005915384286448439,
      "loss": 2.9822,
      "step": 17460
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2636116743087769,
      "learning_rate": 0.0005915374639478256,
      "loss": 3.1749,
      "step": 17461
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4010103940963745,
      "learning_rate": 0.0005915364991966052,
      "loss": 2.9894,
      "step": 17462
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3913698196411133,
      "learning_rate": 0.0005915355343911826,
      "loss": 2.9941,
      "step": 17463
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4210838079452515,
      "learning_rate": 0.000591534569531558,
      "loss": 3.1143,
      "step": 17464
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3028160333633423,
      "learning_rate": 0.0005915336046177316,
      "loss": 2.9904,
      "step": 17465
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7944183349609375,
      "learning_rate": 0.0005915326396497036,
      "loss": 3.2478,
      "step": 17466
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3852838277816772,
      "learning_rate": 0.0005915316746274743,
      "loss": 3.0495,
      "step": 17467
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.399474024772644,
      "learning_rate": 0.0005915307095510436,
      "loss": 2.9635,
      "step": 17468
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.974108338356018,
      "learning_rate": 0.000591529744420412,
      "loss": 3.1982,
      "step": 17469
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4008872509002686,
      "learning_rate": 0.0005915287792355794,
      "loss": 3.0975,
      "step": 17470
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.157424211502075,
      "learning_rate": 0.0005915278139965461,
      "loss": 2.9965,
      "step": 17471
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7812050580978394,
      "learning_rate": 0.0005915268487033123,
      "loss": 3.1932,
      "step": 17472
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.556447148323059,
      "learning_rate": 0.0005915258833558781,
      "loss": 3.2718,
      "step": 17473
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0310397148132324,
      "learning_rate": 0.0005915249179542438,
      "loss": 3.096,
      "step": 17474
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.683496117591858,
      "learning_rate": 0.0005915239524984096,
      "loss": 3.0363,
      "step": 17475
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4322402477264404,
      "learning_rate": 0.0005915229869883755,
      "loss": 2.9487,
      "step": 17476
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4100351333618164,
      "learning_rate": 0.0005915220214241418,
      "loss": 3.1674,
      "step": 17477
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4558523893356323,
      "learning_rate": 0.0005915210558057086,
      "loss": 3.2754,
      "step": 17478
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6329021453857422,
      "learning_rate": 0.0005915200901330761,
      "loss": 3.2908,
      "step": 17479
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0389811992645264,
      "learning_rate": 0.0005915191244062446,
      "loss": 3.15,
      "step": 17480
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4114185571670532,
      "learning_rate": 0.0005915181586252142,
      "loss": 3.4636,
      "step": 17481
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4148942232131958,
      "learning_rate": 0.000591517192789985,
      "loss": 3.208,
      "step": 17482
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4139947891235352,
      "learning_rate": 0.0005915162269005573,
      "loss": 3.0598,
      "step": 17483
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6353410482406616,
      "learning_rate": 0.0005915152609569312,
      "loss": 3.1135,
      "step": 17484
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0263075828552246,
      "learning_rate": 0.000591514294959107,
      "loss": 3.4284,
      "step": 17485
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.643641471862793,
      "learning_rate": 0.0005915133289070846,
      "loss": 2.9474,
      "step": 17486
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9605377912521362,
      "learning_rate": 0.0005915123628008645,
      "loss": 2.9281,
      "step": 17487
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4931787252426147,
      "learning_rate": 0.0005915113966404468,
      "loss": 3.2019,
      "step": 17488
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4355190992355347,
      "learning_rate": 0.0005915104304258315,
      "loss": 3.0413,
      "step": 17489
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6528100967407227,
      "learning_rate": 0.0005915094641570191,
      "loss": 3.2896,
      "step": 17490
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5391747951507568,
      "learning_rate": 0.0005915084978340094,
      "loss": 2.8749,
      "step": 17491
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.801281213760376,
      "learning_rate": 0.0005915075314568028,
      "loss": 3.0558,
      "step": 17492
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6116071939468384,
      "learning_rate": 0.0005915065650253995,
      "loss": 3.031,
      "step": 17493
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5256704092025757,
      "learning_rate": 0.0005915055985397996,
      "loss": 3.2001,
      "step": 17494
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7732919454574585,
      "learning_rate": 0.0005915046320000033,
      "loss": 3.0821,
      "step": 17495
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5085175037384033,
      "learning_rate": 0.0005915036654060108,
      "loss": 3.1729,
      "step": 17496
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7931454181671143,
      "learning_rate": 0.0005915026987578223,
      "loss": 3.2106,
      "step": 17497
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.440910816192627,
      "learning_rate": 0.0005915017320554378,
      "loss": 3.0553,
      "step": 17498
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4412294626235962,
      "learning_rate": 0.0005915007652988578,
      "loss": 3.2408,
      "step": 17499
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.408879280090332,
      "learning_rate": 0.0005914997984880823,
      "loss": 3.195,
      "step": 17500
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3884221315383911,
      "learning_rate": 0.0005914988316231114,
      "loss": 3.0615,
      "step": 17501
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8277093172073364,
      "learning_rate": 0.0005914978647039453,
      "loss": 3.0783,
      "step": 17502
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7775194644927979,
      "learning_rate": 0.0005914968977305844,
      "loss": 2.9777,
      "step": 17503
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5650815963745117,
      "learning_rate": 0.0005914959307030287,
      "loss": 3.2736,
      "step": 17504
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3538473844528198,
      "learning_rate": 0.0005914949636212783,
      "loss": 3.0032,
      "step": 17505
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5300908088684082,
      "learning_rate": 0.0005914939964853336,
      "loss": 3.2691,
      "step": 17506
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3182727098464966,
      "learning_rate": 0.0005914930292951945,
      "loss": 3.0871,
      "step": 17507
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5560986995697021,
      "learning_rate": 0.0005914920620508615,
      "loss": 3.1109,
      "step": 17508
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9981577396392822,
      "learning_rate": 0.0005914910947523346,
      "loss": 3.0809,
      "step": 17509
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5251485109329224,
      "learning_rate": 0.0005914901273996139,
      "loss": 3.0458,
      "step": 17510
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1192026138305664,
      "learning_rate": 0.0005914891599926998,
      "loss": 3.049,
      "step": 17511
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7134102582931519,
      "learning_rate": 0.0005914881925315924,
      "loss": 2.9915,
      "step": 17512
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5463414192199707,
      "learning_rate": 0.0005914872250162918,
      "loss": 3.0293,
      "step": 17513
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5247150659561157,
      "learning_rate": 0.0005914862574467982,
      "loss": 3.0084,
      "step": 17514
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5098639726638794,
      "learning_rate": 0.0005914852898231118,
      "loss": 3.0709,
      "step": 17515
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5183601379394531,
      "learning_rate": 0.0005914843221452328,
      "loss": 3.3122,
      "step": 17516
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2760848999023438,
      "learning_rate": 0.0005914833544131612,
      "loss": 3.3636,
      "step": 17517
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3301163911819458,
      "learning_rate": 0.0005914823866268975,
      "loss": 2.9789,
      "step": 17518
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5378347635269165,
      "learning_rate": 0.0005914814187864418,
      "loss": 3.171,
      "step": 17519
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.676900029182434,
      "learning_rate": 0.000591480450891794,
      "loss": 3.1128,
      "step": 17520
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6435601711273193,
      "learning_rate": 0.0005914794829429546,
      "loss": 2.8473,
      "step": 17521
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.525875210762024,
      "learning_rate": 0.0005914785149399237,
      "loss": 3.2484,
      "step": 17522
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7137640714645386,
      "learning_rate": 0.0005914775468827013,
      "loss": 3.1524,
      "step": 17523
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5417218208312988,
      "learning_rate": 0.0005914765787712879,
      "loss": 3.2039,
      "step": 17524
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.751268982887268,
      "learning_rate": 0.0005914756106056834,
      "loss": 3.0606,
      "step": 17525
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5961041450500488,
      "learning_rate": 0.0005914746423858881,
      "loss": 3.3102,
      "step": 17526
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.754165530204773,
      "learning_rate": 0.0005914736741119022,
      "loss": 3.0145,
      "step": 17527
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5169689655303955,
      "learning_rate": 0.0005914727057837257,
      "loss": 2.9276,
      "step": 17528
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6571370363235474,
      "learning_rate": 0.000591471737401359,
      "loss": 3.3159,
      "step": 17529
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5015028715133667,
      "learning_rate": 0.0005914707689648023,
      "loss": 2.9702,
      "step": 17530
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.389286994934082,
      "learning_rate": 0.0005914698004740556,
      "loss": 2.7955,
      "step": 17531
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.808857798576355,
      "learning_rate": 0.0005914688319291192,
      "loss": 3.4769,
      "step": 17532
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.387853980064392,
      "learning_rate": 0.0005914678633299931,
      "loss": 3.0906,
      "step": 17533
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8045467138290405,
      "learning_rate": 0.0005914668946766778,
      "loss": 3.1228,
      "step": 17534
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5690497159957886,
      "learning_rate": 0.0005914659259691733,
      "loss": 3.1393,
      "step": 17535
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4120917320251465,
      "learning_rate": 0.0005914649572074796,
      "loss": 2.9434,
      "step": 17536
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.41753351688385,
      "learning_rate": 0.0005914639883915972,
      "loss": 3.1786,
      "step": 17537
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.8608107566833496,
      "learning_rate": 0.0005914630195215262,
      "loss": 2.9734,
      "step": 17538
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4342750310897827,
      "learning_rate": 0.0005914620505972666,
      "loss": 3.0596,
      "step": 17539
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7234899997711182,
      "learning_rate": 0.0005914610816188187,
      "loss": 3.1401,
      "step": 17540
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9429728984832764,
      "learning_rate": 0.0005914601125861828,
      "loss": 3.0256,
      "step": 17541
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.887734293937683,
      "learning_rate": 0.0005914591434993589,
      "loss": 2.9679,
      "step": 17542
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5269639492034912,
      "learning_rate": 0.0005914581743583472,
      "loss": 3.0803,
      "step": 17543
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.4135024547576904,
      "learning_rate": 0.000591457205163148,
      "loss": 3.1408,
      "step": 17544
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8678803443908691,
      "learning_rate": 0.0005914562359137614,
      "loss": 3.1474,
      "step": 17545
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5402792692184448,
      "learning_rate": 0.0005914552666101875,
      "loss": 3.1655,
      "step": 17546
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9068917036056519,
      "learning_rate": 0.0005914542972524266,
      "loss": 3.1511,
      "step": 17547
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5662497282028198,
      "learning_rate": 0.000591453327840479,
      "loss": 3.2056,
      "step": 17548
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.534403920173645,
      "learning_rate": 0.0005914523583743445,
      "loss": 3.0514,
      "step": 17549
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1830742359161377,
      "learning_rate": 0.0005914513888540235,
      "loss": 2.9983,
      "step": 17550
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5912649631500244,
      "learning_rate": 0.0005914504192795164,
      "loss": 3.2658,
      "step": 17551
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8292328119277954,
      "learning_rate": 0.0005914494496508229,
      "loss": 2.9811,
      "step": 17552
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5253965854644775,
      "learning_rate": 0.0005914484799679437,
      "loss": 3.1591,
      "step": 17553
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4867206811904907,
      "learning_rate": 0.0005914475102308786,
      "loss": 3.1558,
      "step": 17554
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3982040882110596,
      "learning_rate": 0.0005914465404396279,
      "loss": 3.2966,
      "step": 17555
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6497727632522583,
      "learning_rate": 0.0005914455705941918,
      "loss": 2.8525,
      "step": 17556
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1192822456359863,
      "learning_rate": 0.0005914446006945705,
      "loss": 2.9995,
      "step": 17557
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5288819074630737,
      "learning_rate": 0.0005914436307407642,
      "loss": 3.3874,
      "step": 17558
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5000934600830078,
      "learning_rate": 0.0005914426607327728,
      "loss": 3.2189,
      "step": 17559
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9295620918273926,
      "learning_rate": 0.0005914416906705969,
      "loss": 2.8806,
      "step": 17560
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4924391508102417,
      "learning_rate": 0.0005914407205542364,
      "loss": 3.34,
      "step": 17561
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9010939598083496,
      "learning_rate": 0.0005914397503836916,
      "loss": 3.3041,
      "step": 17562
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.2578039169311523,
      "learning_rate": 0.0005914387801589627,
      "loss": 3.1026,
      "step": 17563
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9895516633987427,
      "learning_rate": 0.0005914378098800498,
      "loss": 2.9725,
      "step": 17564
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0459954738616943,
      "learning_rate": 0.0005914368395469531,
      "loss": 2.9784,
      "step": 17565
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1167376041412354,
      "learning_rate": 0.0005914358691596728,
      "loss": 3.2042,
      "step": 17566
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.9452767372131348,
      "learning_rate": 0.0005914348987182091,
      "loss": 3.0168,
      "step": 17567
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.718631625175476,
      "learning_rate": 0.000591433928222562,
      "loss": 3.0662,
      "step": 17568
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3986645936965942,
      "learning_rate": 0.000591432957672732,
      "loss": 3.1729,
      "step": 17569
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.4371161460876465,
      "learning_rate": 0.0005914319870687191,
      "loss": 3.1929,
      "step": 17570
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8306995630264282,
      "learning_rate": 0.0005914310164105234,
      "loss": 3.4208,
      "step": 17571
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6840732097625732,
      "learning_rate": 0.0005914300456981452,
      "loss": 3.1609,
      "step": 17572
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0329763889312744,
      "learning_rate": 0.0005914290749315847,
      "loss": 2.7398,
      "step": 17573
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4910844564437866,
      "learning_rate": 0.000591428104110842,
      "loss": 2.9616,
      "step": 17574
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4852675199508667,
      "learning_rate": 0.0005914271332359174,
      "loss": 2.9492,
      "step": 17575
    },
    {
      "epoch": 0.23,
      "grad_norm": 3.152195692062378,
      "learning_rate": 0.0005914261623068109,
      "loss": 2.8489,
      "step": 17576
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7673664093017578,
      "learning_rate": 0.0005914251913235228,
      "loss": 3.1075,
      "step": 17577
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1999784708023071,
      "learning_rate": 0.0005914242202860533,
      "loss": 3.1233,
      "step": 17578
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9548542499542236,
      "learning_rate": 0.0005914232491944024,
      "loss": 3.314,
      "step": 17579
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.3074257373809814,
      "learning_rate": 0.0005914222780485706,
      "loss": 3.2936,
      "step": 17580
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8878370523452759,
      "learning_rate": 0.0005914213068485577,
      "loss": 3.3458,
      "step": 17581
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5223238468170166,
      "learning_rate": 0.0005914203355943642,
      "loss": 3.2862,
      "step": 17582
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.86544930934906,
      "learning_rate": 0.0005914193642859901,
      "loss": 3.1704,
      "step": 17583
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6395004987716675,
      "learning_rate": 0.0005914183929234358,
      "loss": 3.1256,
      "step": 17584
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9615705013275146,
      "learning_rate": 0.0005914174215067011,
      "loss": 2.9453,
      "step": 17585
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.516191840171814,
      "learning_rate": 0.0005914164500357865,
      "loss": 3.0976,
      "step": 17586
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7609500885009766,
      "learning_rate": 0.0005914154785106921,
      "loss": 3.1456,
      "step": 17587
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5488430261611938,
      "learning_rate": 0.0005914145069314179,
      "loss": 3.4906,
      "step": 17588
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4885255098342896,
      "learning_rate": 0.0005914135352979645,
      "loss": 3.1945,
      "step": 17589
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3275123834609985,
      "learning_rate": 0.0005914125636103316,
      "loss": 3.0988,
      "step": 17590
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4187872409820557,
      "learning_rate": 0.0005914115918685198,
      "loss": 2.8944,
      "step": 17591
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2910295724868774,
      "learning_rate": 0.0005914106200725289,
      "loss": 3.3353,
      "step": 17592
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3229553699493408,
      "learning_rate": 0.0005914096482223595,
      "loss": 3.1966,
      "step": 17593
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5615243911743164,
      "learning_rate": 0.0005914086763180113,
      "loss": 3.0695,
      "step": 17594
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.252935528755188,
      "learning_rate": 0.0005914077043594849,
      "loss": 3.263,
      "step": 17595
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3853123188018799,
      "learning_rate": 0.0005914067323467803,
      "loss": 3.2467,
      "step": 17596
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4920735359191895,
      "learning_rate": 0.0005914057602798976,
      "loss": 2.9306,
      "step": 17597
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.3687894344329834,
      "learning_rate": 0.0005914047881588369,
      "loss": 2.9567,
      "step": 17598
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8313796520233154,
      "learning_rate": 0.0005914038159835988,
      "loss": 2.8964,
      "step": 17599
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5872315168380737,
      "learning_rate": 0.0005914028437541833,
      "loss": 3.0517,
      "step": 17600
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.662912368774414,
      "learning_rate": 0.0005914018714705903,
      "loss": 3.4181,
      "step": 17601
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.58597993850708,
      "learning_rate": 0.0005914008991328203,
      "loss": 2.9515,
      "step": 17602
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5158953666687012,
      "learning_rate": 0.0005913999267408733,
      "loss": 3.0987,
      "step": 17603
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0471532344818115,
      "learning_rate": 0.0005913989542947496,
      "loss": 3.0966,
      "step": 17604
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6445369720458984,
      "learning_rate": 0.0005913979817944495,
      "loss": 3.0979,
      "step": 17605
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7333704233169556,
      "learning_rate": 0.0005913970092399727,
      "loss": 3.1025,
      "step": 17606
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5177332162857056,
      "learning_rate": 0.0005913960366313198,
      "loss": 3.3799,
      "step": 17607
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5528260469436646,
      "learning_rate": 0.000591395063968491,
      "loss": 3.1815,
      "step": 17608
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.639866828918457,
      "learning_rate": 0.0005913940912514863,
      "loss": 3.0398,
      "step": 17609
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5548070669174194,
      "learning_rate": 0.0005913931184803059,
      "loss": 2.9735,
      "step": 17610
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8410738706588745,
      "learning_rate": 0.0005913921456549501,
      "loss": 3.3661,
      "step": 17611
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.448317527770996,
      "learning_rate": 0.0005913911727754188,
      "loss": 3.2311,
      "step": 17612
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.194368839263916,
      "learning_rate": 0.0005913901998417125,
      "loss": 2.9311,
      "step": 17613
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7903409004211426,
      "learning_rate": 0.0005913892268538313,
      "loss": 3.1649,
      "step": 17614
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8431488275527954,
      "learning_rate": 0.0005913882538117754,
      "loss": 3.0197,
      "step": 17615
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8779053688049316,
      "learning_rate": 0.0005913872807155449,
      "loss": 3.1538,
      "step": 17616
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5545167922973633,
      "learning_rate": 0.0005913863075651399,
      "loss": 3.0852,
      "step": 17617
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.440436840057373,
      "learning_rate": 0.0005913853343605607,
      "loss": 3.0773,
      "step": 17618
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8772070407867432,
      "learning_rate": 0.0005913843611018074,
      "loss": 3.1768,
      "step": 17619
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5857532024383545,
      "learning_rate": 0.0005913833877888803,
      "loss": 3.2499,
      "step": 17620
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7782684564590454,
      "learning_rate": 0.0005913824144217797,
      "loss": 3.1121,
      "step": 17621
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3940509557724,
      "learning_rate": 0.0005913814410005054,
      "loss": 3.1532,
      "step": 17622
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5039409399032593,
      "learning_rate": 0.0005913804675250579,
      "loss": 3.3652,
      "step": 17623
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7145124673843384,
      "learning_rate": 0.0005913794939954373,
      "loss": 3.1676,
      "step": 17624
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8884986639022827,
      "learning_rate": 0.0005913785204116436,
      "loss": 3.1213,
      "step": 17625
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5512722730636597,
      "learning_rate": 0.0005913775467736773,
      "loss": 3.1693,
      "step": 17626
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6469560861587524,
      "learning_rate": 0.0005913765730815383,
      "loss": 2.8539,
      "step": 17627
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7663021087646484,
      "learning_rate": 0.000591375599335227,
      "loss": 3.3785,
      "step": 17628
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6071804761886597,
      "learning_rate": 0.0005913746255347433,
      "loss": 3.3809,
      "step": 17629
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8150144815444946,
      "learning_rate": 0.0005913736516800877,
      "loss": 3.4571,
      "step": 17630
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6668494939804077,
      "learning_rate": 0.0005913726777712602,
      "loss": 3.1639,
      "step": 17631
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.570125937461853,
      "learning_rate": 0.0005913717038082611,
      "loss": 3.1145,
      "step": 17632
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8858033418655396,
      "learning_rate": 0.0005913707297910904,
      "loss": 3.0144,
      "step": 17633
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7007488012313843,
      "learning_rate": 0.0005913697557197483,
      "loss": 3.1112,
      "step": 17634
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.71310555934906,
      "learning_rate": 0.0005913687815942352,
      "loss": 2.9928,
      "step": 17635
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3734465837478638,
      "learning_rate": 0.0005913678074145511,
      "loss": 3.1014,
      "step": 17636
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6566975116729736,
      "learning_rate": 0.0005913668331806963,
      "loss": 2.9635,
      "step": 17637
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5211894512176514,
      "learning_rate": 0.0005913658588926708,
      "loss": 2.9971,
      "step": 17638
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.4814629554748535,
      "learning_rate": 0.0005913648845504749,
      "loss": 3.1971,
      "step": 17639
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.8461501598358154,
      "learning_rate": 0.0005913639101541089,
      "loss": 2.9154,
      "step": 17640
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.439442753791809,
      "learning_rate": 0.0005913629357035727,
      "loss": 3.285,
      "step": 17641
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.304189443588257,
      "learning_rate": 0.0005913619611988666,
      "loss": 3.0107,
      "step": 17642
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.831799030303955,
      "learning_rate": 0.0005913609866399909,
      "loss": 3.0581,
      "step": 17643
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3575392961502075,
      "learning_rate": 0.0005913600120269457,
      "loss": 3.0448,
      "step": 17644
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8634628057479858,
      "learning_rate": 0.0005913590373597312,
      "loss": 3.1527,
      "step": 17645
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.404779076576233,
      "learning_rate": 0.0005913580626383475,
      "loss": 2.9391,
      "step": 17646
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.802832841873169,
      "learning_rate": 0.0005913570878627949,
      "loss": 3.0052,
      "step": 17647
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4334819316864014,
      "learning_rate": 0.0005913561130330734,
      "loss": 3.1259,
      "step": 17648
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4582608938217163,
      "learning_rate": 0.0005913551381491834,
      "loss": 3.161,
      "step": 17649
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.290471076965332,
      "learning_rate": 0.0005913541632111248,
      "loss": 3.2376,
      "step": 17650
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4729502201080322,
      "learning_rate": 0.0005913531882188982,
      "loss": 3.1311,
      "step": 17651
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8005998134613037,
      "learning_rate": 0.0005913522131725035,
      "loss": 3.1021,
      "step": 17652
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3541704416275024,
      "learning_rate": 0.0005913512380719408,
      "loss": 3.0795,
      "step": 17653
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5699987411499023,
      "learning_rate": 0.0005913502629172104,
      "loss": 3.2677,
      "step": 17654
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2860620021820068,
      "learning_rate": 0.0005913492877083127,
      "loss": 3.1159,
      "step": 17655
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3251925706863403,
      "learning_rate": 0.0005913483124452474,
      "loss": 2.9267,
      "step": 17656
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7148118019104004,
      "learning_rate": 0.0005913473371280152,
      "loss": 3.0607,
      "step": 17657
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.6988203525543213,
      "learning_rate": 0.0005913463617566158,
      "loss": 3.3199,
      "step": 17658
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.45099675655365,
      "learning_rate": 0.0005913453863310498,
      "loss": 3.228,
      "step": 17659
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3553094863891602,
      "learning_rate": 0.0005913444108513171,
      "loss": 2.9774,
      "step": 17660
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0276565551757812,
      "learning_rate": 0.000591343435317418,
      "loss": 2.7822,
      "step": 17661
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3970203399658203,
      "learning_rate": 0.0005913424597293526,
      "loss": 3.1594,
      "step": 17662
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6521172523498535,
      "learning_rate": 0.0005913414840871212,
      "loss": 2.9956,
      "step": 17663
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9236187934875488,
      "learning_rate": 0.0005913405083907239,
      "loss": 3.1248,
      "step": 17664
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2476509809494019,
      "learning_rate": 0.0005913395326401609,
      "loss": 2.9915,
      "step": 17665
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8617291450500488,
      "learning_rate": 0.0005913385568354324,
      "loss": 3.2644,
      "step": 17666
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5046783685684204,
      "learning_rate": 0.0005913375809765385,
      "loss": 3.1726,
      "step": 17667
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.929187297821045,
      "learning_rate": 0.0005913366050634795,
      "loss": 2.9178,
      "step": 17668
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.4630908966064453,
      "learning_rate": 0.0005913356290962556,
      "loss": 3.2412,
      "step": 17669
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0952863693237305,
      "learning_rate": 0.0005913346530748668,
      "loss": 3.065,
      "step": 17670
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8373875617980957,
      "learning_rate": 0.0005913336769993134,
      "loss": 3.146,
      "step": 17671
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5845694541931152,
      "learning_rate": 0.0005913327008695955,
      "loss": 3.243,
      "step": 17672
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.722640037536621,
      "learning_rate": 0.0005913317246857135,
      "loss": 3.0296,
      "step": 17673
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9770225286483765,
      "learning_rate": 0.0005913307484476674,
      "loss": 2.997,
      "step": 17674
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9228326082229614,
      "learning_rate": 0.0005913297721554573,
      "loss": 3.3014,
      "step": 17675
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.781949520111084,
      "learning_rate": 0.0005913287958090836,
      "loss": 3.0672,
      "step": 17676
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.888444662094116,
      "learning_rate": 0.0005913278194085463,
      "loss": 3.1816,
      "step": 17677
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.137287139892578,
      "learning_rate": 0.0005913268429538457,
      "loss": 3.167,
      "step": 17678
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.961834669113159,
      "learning_rate": 0.000591325866444982,
      "loss": 3.0059,
      "step": 17679
    },
    {
      "epoch": 0.23,
      "grad_norm": 4.266450881958008,
      "learning_rate": 0.0005913248898819553,
      "loss": 3.0431,
      "step": 17680
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1520297527313232,
      "learning_rate": 0.0005913239132647657,
      "loss": 3.005,
      "step": 17681
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.476917028427124,
      "learning_rate": 0.0005913229365934136,
      "loss": 3.1243,
      "step": 17682
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.486100435256958,
      "learning_rate": 0.000591321959867899,
      "loss": 3.3387,
      "step": 17683
    },
    {
      "epoch": 0.23,
      "grad_norm": 3.2595107555389404,
      "learning_rate": 0.0005913209830882222,
      "loss": 2.9547,
      "step": 17684
    },
    {
      "epoch": 0.23,
      "grad_norm": 3.1080269813537598,
      "learning_rate": 0.0005913200062543833,
      "loss": 3.278,
      "step": 17685
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.366908073425293,
      "learning_rate": 0.0005913190293663825,
      "loss": 3.1464,
      "step": 17686
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4054393768310547,
      "learning_rate": 0.0005913180524242201,
      "loss": 3.1665,
      "step": 17687
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0601651668548584,
      "learning_rate": 0.000591317075427896,
      "loss": 3.2577,
      "step": 17688
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.9855613708496094,
      "learning_rate": 0.0005913160983774106,
      "loss": 3.1526,
      "step": 17689
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.655604362487793,
      "learning_rate": 0.0005913151212727642,
      "loss": 3.0271,
      "step": 17690
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.855455994606018,
      "learning_rate": 0.0005913141441139567,
      "loss": 3.0599,
      "step": 17691
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7260493040084839,
      "learning_rate": 0.0005913131669009885,
      "loss": 3.0754,
      "step": 17692
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1192071437835693,
      "learning_rate": 0.0005913121896338595,
      "loss": 3.1552,
      "step": 17693
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7685236930847168,
      "learning_rate": 0.0005913112123125702,
      "loss": 3.2887,
      "step": 17694
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7548682689666748,
      "learning_rate": 0.0005913102349371205,
      "loss": 3.0907,
      "step": 17695
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0346028804779053,
      "learning_rate": 0.0005913092575075108,
      "loss": 3.1443,
      "step": 17696
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5638259649276733,
      "learning_rate": 0.0005913082800237413,
      "loss": 3.067,
      "step": 17697
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4799302816390991,
      "learning_rate": 0.000591307302485812,
      "loss": 3.1924,
      "step": 17698
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.689170241355896,
      "learning_rate": 0.0005913063248937232,
      "loss": 3.0686,
      "step": 17699
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.111004590988159,
      "learning_rate": 0.000591305347247475,
      "loss": 3.3477,
      "step": 17700
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0680017471313477,
      "learning_rate": 0.0005913043695470677,
      "loss": 3.0852,
      "step": 17701
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7187196016311646,
      "learning_rate": 0.0005913033917925015,
      "loss": 3.2231,
      "step": 17702
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9160654544830322,
      "learning_rate": 0.0005913024139837764,
      "loss": 3.1622,
      "step": 17703
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8851425647735596,
      "learning_rate": 0.0005913014361208927,
      "loss": 3.0698,
      "step": 17704
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4071825742721558,
      "learning_rate": 0.0005913004582038505,
      "loss": 3.2434,
      "step": 17705
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.852718710899353,
      "learning_rate": 0.0005912994802326502,
      "loss": 3.0923,
      "step": 17706
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1908316612243652,
      "learning_rate": 0.0005912985022072917,
      "loss": 2.8233,
      "step": 17707
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9026027917861938,
      "learning_rate": 0.0005912975241277754,
      "loss": 3.0508,
      "step": 17708
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2999820709228516,
      "learning_rate": 0.0005912965459941013,
      "loss": 3.3552,
      "step": 17709
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2162039279937744,
      "learning_rate": 0.0005912955678062697,
      "loss": 2.994,
      "step": 17710
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3811579942703247,
      "learning_rate": 0.0005912945895642808,
      "loss": 3.036,
      "step": 17711
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4678159952163696,
      "learning_rate": 0.0005912936112681347,
      "loss": 3.261,
      "step": 17712
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4702754020690918,
      "learning_rate": 0.0005912926329178316,
      "loss": 3.0546,
      "step": 17713
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2750611305236816,
      "learning_rate": 0.0005912916545133718,
      "loss": 3.1681,
      "step": 17714
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7761632204055786,
      "learning_rate": 0.0005912906760547553,
      "loss": 2.9824,
      "step": 17715
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4717720746994019,
      "learning_rate": 0.0005912896975419823,
      "loss": 3.0171,
      "step": 17716
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.361383080482483,
      "learning_rate": 0.0005912887189750531,
      "loss": 3.1618,
      "step": 17717
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5303606986999512,
      "learning_rate": 0.0005912877403539679,
      "loss": 3.1952,
      "step": 17718
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.762526512145996,
      "learning_rate": 0.0005912867616787268,
      "loss": 3.0622,
      "step": 17719
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.304774522781372,
      "learning_rate": 0.0005912857829493299,
      "loss": 3.1602,
      "step": 17720
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8940045833587646,
      "learning_rate": 0.0005912848041657775,
      "loss": 3.0202,
      "step": 17721
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.059112310409546,
      "learning_rate": 0.0005912838253280698,
      "loss": 3.006,
      "step": 17722
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.355898380279541,
      "learning_rate": 0.0005912828464362069,
      "loss": 3.0276,
      "step": 17723
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0739166736602783,
      "learning_rate": 0.0005912818674901891,
      "loss": 3.1658,
      "step": 17724
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5824271440505981,
      "learning_rate": 0.0005912808884900164,
      "loss": 3.0502,
      "step": 17725
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5906012058258057,
      "learning_rate": 0.0005912799094356891,
      "loss": 3.0853,
      "step": 17726
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5060172080993652,
      "learning_rate": 0.0005912789303272074,
      "loss": 2.9016,
      "step": 17727
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.325780987739563,
      "learning_rate": 0.0005912779511645714,
      "loss": 3.3585,
      "step": 17728
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.310114860534668,
      "learning_rate": 0.0005912769719477814,
      "loss": 2.9795,
      "step": 17729
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8621041774749756,
      "learning_rate": 0.0005912759926768376,
      "loss": 3.07,
      "step": 17730
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.226209878921509,
      "learning_rate": 0.0005912750133517399,
      "loss": 3.2924,
      "step": 17731
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.484070897102356,
      "learning_rate": 0.0005912740339724888,
      "loss": 3.0659,
      "step": 17732
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4367507696151733,
      "learning_rate": 0.0005912730545390842,
      "loss": 2.7354,
      "step": 17733
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7551017999649048,
      "learning_rate": 0.0005912720750515266,
      "loss": 2.9529,
      "step": 17734
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.478370428085327,
      "learning_rate": 0.000591271095509816,
      "loss": 3.0907,
      "step": 17735
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6539080142974854,
      "learning_rate": 0.0005912701159139527,
      "loss": 3.0376,
      "step": 17736
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.602346658706665,
      "learning_rate": 0.0005912691362639366,
      "loss": 2.9714,
      "step": 17737
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5365504026412964,
      "learning_rate": 0.0005912681565597681,
      "loss": 3.3038,
      "step": 17738
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8942939043045044,
      "learning_rate": 0.0005912671768014474,
      "loss": 3.0486,
      "step": 17739
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4431381225585938,
      "learning_rate": 0.0005912661969889746,
      "loss": 3.0876,
      "step": 17740
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.052250623703003,
      "learning_rate": 0.0005912652171223499,
      "loss": 3.0394,
      "step": 17741
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4261481761932373,
      "learning_rate": 0.0005912642372015736,
      "loss": 3.0465,
      "step": 17742
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3648921251296997,
      "learning_rate": 0.0005912632572266458,
      "loss": 3.0859,
      "step": 17743
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3895701169967651,
      "learning_rate": 0.0005912622771975665,
      "loss": 3.038,
      "step": 17744
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8570219278335571,
      "learning_rate": 0.0005912612971143362,
      "loss": 3.502,
      "step": 17745
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.318428874015808,
      "learning_rate": 0.0005912603169769548,
      "loss": 3.2,
      "step": 17746
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2797236442565918,
      "learning_rate": 0.0005912593367854227,
      "loss": 3.1033,
      "step": 17747
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5118197202682495,
      "learning_rate": 0.00059125835653974,
      "loss": 3.0557,
      "step": 17748
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3933764696121216,
      "learning_rate": 0.0005912573762399068,
      "loss": 3.2834,
      "step": 17749
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.56878662109375,
      "learning_rate": 0.0005912563958859233,
      "loss": 3.4431,
      "step": 17750
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1134681701660156,
      "learning_rate": 0.0005912554154777898,
      "loss": 2.9734,
      "step": 17751
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6250357627868652,
      "learning_rate": 0.0005912544350155065,
      "loss": 3.1545,
      "step": 17752
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.082878828048706,
      "learning_rate": 0.0005912534544990735,
      "loss": 3.0486,
      "step": 17753
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8529030084609985,
      "learning_rate": 0.0005912524739284909,
      "loss": 3.2051,
      "step": 17754
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7047560214996338,
      "learning_rate": 0.000591251493303759,
      "loss": 3.0814,
      "step": 17755
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8358714580535889,
      "learning_rate": 0.000591250512624878,
      "loss": 3.042,
      "step": 17756
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.423811912536621,
      "learning_rate": 0.0005912495318918479,
      "loss": 2.9341,
      "step": 17757
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8917725086212158,
      "learning_rate": 0.0005912485511046691,
      "loss": 3.1528,
      "step": 17758
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0068247318267822,
      "learning_rate": 0.0005912475702633417,
      "loss": 2.999,
      "step": 17759
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9065829515457153,
      "learning_rate": 0.0005912465893678658,
      "loss": 3.0551,
      "step": 17760
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9494751691818237,
      "learning_rate": 0.0005912456084182418,
      "loss": 3.0686,
      "step": 17761
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.414137363433838,
      "learning_rate": 0.0005912446274144698,
      "loss": 2.9394,
      "step": 17762
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4767829179763794,
      "learning_rate": 0.0005912436463565496,
      "loss": 3.3338,
      "step": 17763
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.5144426822662354,
      "learning_rate": 0.0005912426652444821,
      "loss": 2.9558,
      "step": 17764
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8366878032684326,
      "learning_rate": 0.0005912416840782669,
      "loss": 3.0523,
      "step": 17765
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.321687936782837,
      "learning_rate": 0.0005912407028579043,
      "loss": 2.8986,
      "step": 17766
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.578883409500122,
      "learning_rate": 0.0005912397215833946,
      "loss": 2.9567,
      "step": 17767
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1421607732772827,
      "learning_rate": 0.000591238740254738,
      "loss": 2.9931,
      "step": 17768
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7728904485702515,
      "learning_rate": 0.0005912377588719346,
      "loss": 3.007,
      "step": 17769
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5315970182418823,
      "learning_rate": 0.0005912367774349845,
      "loss": 3.0066,
      "step": 17770
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0276248455047607,
      "learning_rate": 0.0005912357959438881,
      "loss": 2.9442,
      "step": 17771
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5675357580184937,
      "learning_rate": 0.0005912348143986456,
      "loss": 3.1952,
      "step": 17772
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2507737874984741,
      "learning_rate": 0.0005912338327992568,
      "loss": 3.2542,
      "step": 17773
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3862168788909912,
      "learning_rate": 0.0005912328511457223,
      "loss": 3.2072,
      "step": 17774
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4692573547363281,
      "learning_rate": 0.000591231869438042,
      "loss": 3.045,
      "step": 17775
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4212428331375122,
      "learning_rate": 0.0005912308876762162,
      "loss": 3.3529,
      "step": 17776
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8808135986328125,
      "learning_rate": 0.0005912299058602451,
      "loss": 2.8626,
      "step": 17777
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.186948299407959,
      "learning_rate": 0.0005912289239901289,
      "loss": 3.1001,
      "step": 17778
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5148767232894897,
      "learning_rate": 0.0005912279420658677,
      "loss": 3.0613,
      "step": 17779
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2897896766662598,
      "learning_rate": 0.0005912269600874618,
      "loss": 2.9347,
      "step": 17780
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5367101430892944,
      "learning_rate": 0.0005912259780549113,
      "loss": 3.3038,
      "step": 17781
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.4642536640167236,
      "learning_rate": 0.0005912249959682163,
      "loss": 3.068,
      "step": 17782
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.462701439857483,
      "learning_rate": 0.0005912240138273772,
      "loss": 3.1252,
      "step": 17783
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5027761459350586,
      "learning_rate": 0.000591223031632394,
      "loss": 3.0493,
      "step": 17784
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7889012098312378,
      "learning_rate": 0.000591222049383267,
      "loss": 3.4814,
      "step": 17785
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5656201839447021,
      "learning_rate": 0.0005912210670799962,
      "loss": 2.9435,
      "step": 17786
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3161731958389282,
      "learning_rate": 0.000591220084722582,
      "loss": 2.986,
      "step": 17787
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4958860874176025,
      "learning_rate": 0.0005912191023110244,
      "loss": 3.0964,
      "step": 17788
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5609068870544434,
      "learning_rate": 0.0005912181198453238,
      "loss": 3.172,
      "step": 17789
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4150309562683105,
      "learning_rate": 0.0005912171373254802,
      "loss": 3.1234,
      "step": 17790
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.1939101219177246,
      "learning_rate": 0.0005912161547514937,
      "loss": 3.2232,
      "step": 17791
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3963004350662231,
      "learning_rate": 0.0005912151721233648,
      "loss": 3.1575,
      "step": 17792
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.493819236755371,
      "learning_rate": 0.0005912141894410934,
      "loss": 2.9727,
      "step": 17793
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9773246049880981,
      "learning_rate": 0.0005912132067046799,
      "loss": 3.1401,
      "step": 17794
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.188964605331421,
      "learning_rate": 0.0005912122239141243,
      "loss": 3.0939,
      "step": 17795
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9055596590042114,
      "learning_rate": 0.0005912112410694268,
      "loss": 3.1613,
      "step": 17796
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8302048444747925,
      "learning_rate": 0.0005912102581705877,
      "loss": 3.0636,
      "step": 17797
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.5550990104675293,
      "learning_rate": 0.000591209275217607,
      "loss": 2.8818,
      "step": 17798
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.4008448123931885,
      "learning_rate": 0.0005912082922104852,
      "loss": 2.9649,
      "step": 17799
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6887286901474,
      "learning_rate": 0.0005912073091492221,
      "loss": 2.9683,
      "step": 17800
    },
    {
      "epoch": 0.23,
      "grad_norm": 3.6512274742126465,
      "learning_rate": 0.0005912063260338183,
      "loss": 3.2221,
      "step": 17801
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.4806652069091797,
      "learning_rate": 0.0005912053428642736,
      "loss": 3.2985,
      "step": 17802
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8589421510696411,
      "learning_rate": 0.0005912043596405883,
      "loss": 3.0657,
      "step": 17803
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0148696899414062,
      "learning_rate": 0.0005912033763627626,
      "loss": 2.9911,
      "step": 17804
    },
    {
      "epoch": 0.23,
      "grad_norm": 3.4942777156829834,
      "learning_rate": 0.0005912023930307967,
      "loss": 3.2866,
      "step": 17805
    },
    {
      "epoch": 0.23,
      "grad_norm": 3.4287455081939697,
      "learning_rate": 0.0005912014096446907,
      "loss": 3.0104,
      "step": 17806
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.812031626701355,
      "learning_rate": 0.0005912004262044451,
      "loss": 3.0028,
      "step": 17807
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1940133571624756,
      "learning_rate": 0.0005911994427100596,
      "loss": 2.9839,
      "step": 17808
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7062016725540161,
      "learning_rate": 0.0005911984591615348,
      "loss": 3.2831,
      "step": 17809
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.19319748878479,
      "learning_rate": 0.0005911974755588706,
      "loss": 3.5172,
      "step": 17810
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4999394416809082,
      "learning_rate": 0.0005911964919020674,
      "loss": 3.1649,
      "step": 17811
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5020276308059692,
      "learning_rate": 0.0005911955081911251,
      "loss": 3.3434,
      "step": 17812
    },
    {
      "epoch": 0.23,
      "grad_norm": 3.518097162246704,
      "learning_rate": 0.0005911945244260442,
      "loss": 3.2138,
      "step": 17813
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6442660093307495,
      "learning_rate": 0.0005911935406068248,
      "loss": 3.0806,
      "step": 17814
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.541319489479065,
      "learning_rate": 0.0005911925567334669,
      "loss": 2.9343,
      "step": 17815
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5952543020248413,
      "learning_rate": 0.0005911915728059709,
      "loss": 3.091,
      "step": 17816
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.181645154953003,
      "learning_rate": 0.0005911905888243368,
      "loss": 3.0484,
      "step": 17817
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3443095684051514,
      "learning_rate": 0.0005911896047885649,
      "loss": 3.2497,
      "step": 17818
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1195669174194336,
      "learning_rate": 0.0005911886206986553,
      "loss": 3.1928,
      "step": 17819
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0810327529907227,
      "learning_rate": 0.0005911876365546084,
      "loss": 3.1584,
      "step": 17820
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6773101091384888,
      "learning_rate": 0.0005911866523564241,
      "loss": 3.2358,
      "step": 17821
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8421286344528198,
      "learning_rate": 0.0005911856681041027,
      "loss": 3.1499,
      "step": 17822
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6736606359481812,
      "learning_rate": 0.0005911846837976444,
      "loss": 3.0667,
      "step": 17823
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3729981184005737,
      "learning_rate": 0.0005911836994370494,
      "loss": 3.2265,
      "step": 17824
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8468903303146362,
      "learning_rate": 0.0005911827150223179,
      "loss": 3.0954,
      "step": 17825
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5202387571334839,
      "learning_rate": 0.00059118173055345,
      "loss": 3.0706,
      "step": 17826
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.887031078338623,
      "learning_rate": 0.0005911807460304458,
      "loss": 2.7129,
      "step": 17827
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.452592372894287,
      "learning_rate": 0.0005911797614533057,
      "loss": 2.9974,
      "step": 17828
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8251243829727173,
      "learning_rate": 0.0005911787768220298,
      "loss": 3.4625,
      "step": 17829
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.774375319480896,
      "learning_rate": 0.0005911777921366183,
      "loss": 3.1835,
      "step": 17830
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.2790377140045166,
      "learning_rate": 0.0005911768073970713,
      "loss": 3.2227,
      "step": 17831
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7188137769699097,
      "learning_rate": 0.000591175822603389,
      "loss": 3.1101,
      "step": 17832
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.007676362991333,
      "learning_rate": 0.0005911748377555717,
      "loss": 2.8746,
      "step": 17833
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7248591184616089,
      "learning_rate": 0.0005911738528536194,
      "loss": 2.9184,
      "step": 17834
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2997606992721558,
      "learning_rate": 0.0005911728678975324,
      "loss": 3.1094,
      "step": 17835
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4767528772354126,
      "learning_rate": 0.000591171882887311,
      "loss": 3.2551,
      "step": 17836
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0604543685913086,
      "learning_rate": 0.0005911708978229552,
      "loss": 3.1805,
      "step": 17837
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7793018817901611,
      "learning_rate": 0.0005911699127044651,
      "loss": 2.9368,
      "step": 17838
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5976810455322266,
      "learning_rate": 0.0005911689275318411,
      "loss": 3.2609,
      "step": 17839
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0101075172424316,
      "learning_rate": 0.0005911679423050834,
      "loss": 3.2993,
      "step": 17840
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.260561227798462,
      "learning_rate": 0.000591166957024192,
      "loss": 2.9526,
      "step": 17841
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8527151346206665,
      "learning_rate": 0.0005911659716891671,
      "loss": 3.0431,
      "step": 17842
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8695017099380493,
      "learning_rate": 0.000591164986300009,
      "loss": 3.0638,
      "step": 17843
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6818593740463257,
      "learning_rate": 0.0005911640008567179,
      "loss": 3.1858,
      "step": 17844
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8269743919372559,
      "learning_rate": 0.0005911630153592938,
      "loss": 3.1489,
      "step": 17845
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2710219621658325,
      "learning_rate": 0.0005911620298077371,
      "loss": 3.3464,
      "step": 17846
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2769659757614136,
      "learning_rate": 0.0005911610442020479,
      "loss": 2.9502,
      "step": 17847
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.881415605545044,
      "learning_rate": 0.0005911600585422264,
      "loss": 3.196,
      "step": 17848
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8681772947311401,
      "learning_rate": 0.0005911590728282726,
      "loss": 3.0824,
      "step": 17849
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7056161165237427,
      "learning_rate": 0.0005911580870601869,
      "loss": 2.865,
      "step": 17850
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4154103994369507,
      "learning_rate": 0.0005911571012379694,
      "loss": 3.1667,
      "step": 17851
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.485060214996338,
      "learning_rate": 0.0005911561153616203,
      "loss": 2.9967,
      "step": 17852
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.372689723968506,
      "learning_rate": 0.0005911551294311397,
      "loss": 3.1319,
      "step": 17853
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2615150213241577,
      "learning_rate": 0.0005911541434465281,
      "loss": 2.892,
      "step": 17854
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6717835664749146,
      "learning_rate": 0.0005911531574077852,
      "loss": 3.0743,
      "step": 17855
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3691914081573486,
      "learning_rate": 0.0005911521713149115,
      "loss": 3.1465,
      "step": 17856
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7355000972747803,
      "learning_rate": 0.0005911511851679072,
      "loss": 3.12,
      "step": 17857
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2921980619430542,
      "learning_rate": 0.0005911501989667723,
      "loss": 3.1801,
      "step": 17858
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4273604154586792,
      "learning_rate": 0.0005911492127115072,
      "loss": 3.1579,
      "step": 17859
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3775378465652466,
      "learning_rate": 0.0005911482264021119,
      "loss": 3.3531,
      "step": 17860
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6180912256240845,
      "learning_rate": 0.0005911472400385866,
      "loss": 3.0902,
      "step": 17861
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4732900857925415,
      "learning_rate": 0.0005911462536209316,
      "loss": 3.1732,
      "step": 17862
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7071713209152222,
      "learning_rate": 0.000591145267149147,
      "loss": 3.0325,
      "step": 17863
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2837576866149902,
      "learning_rate": 0.000591144280623233,
      "loss": 3.1629,
      "step": 17864
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8688732385635376,
      "learning_rate": 0.0005911432940431898,
      "loss": 3.1173,
      "step": 17865
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6815569400787354,
      "learning_rate": 0.0005911423074090175,
      "loss": 2.9773,
      "step": 17866
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6789133548736572,
      "learning_rate": 0.0005911413207207164,
      "loss": 3.111,
      "step": 17867
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.511262893676758,
      "learning_rate": 0.0005911403339782866,
      "loss": 3.1221,
      "step": 17868
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.3019633293151855,
      "learning_rate": 0.0005911393471817284,
      "loss": 3.2129,
      "step": 17869
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7374978065490723,
      "learning_rate": 0.0005911383603310418,
      "loss": 3.3583,
      "step": 17870
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6836962699890137,
      "learning_rate": 0.0005911373734262272,
      "loss": 2.9384,
      "step": 17871
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6951709985733032,
      "learning_rate": 0.0005911363864672845,
      "loss": 3.1289,
      "step": 17872
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4145032167434692,
      "learning_rate": 0.0005911353994542142,
      "loss": 3.1263,
      "step": 17873
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5831395387649536,
      "learning_rate": 0.0005911344123870162,
      "loss": 3.2542,
      "step": 17874
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5416827201843262,
      "learning_rate": 0.0005911334252656909,
      "loss": 3.3217,
      "step": 17875
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3360482454299927,
      "learning_rate": 0.0005911324380902384,
      "loss": 3.0812,
      "step": 17876
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.038675546646118,
      "learning_rate": 0.0005911314508606589,
      "loss": 2.9482,
      "step": 17877
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.568618655204773,
      "learning_rate": 0.0005911304635769526,
      "loss": 3.0535,
      "step": 17878
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5457159280776978,
      "learning_rate": 0.0005911294762391196,
      "loss": 2.854,
      "step": 17879
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.341633677482605,
      "learning_rate": 0.00059112848884716,
      "loss": 2.9663,
      "step": 17880
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4873658418655396,
      "learning_rate": 0.0005911275014010743,
      "loss": 2.9157,
      "step": 17881
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4185794591903687,
      "learning_rate": 0.0005911265139008624,
      "loss": 3.1138,
      "step": 17882
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4293240308761597,
      "learning_rate": 0.0005911255263465247,
      "loss": 2.9979,
      "step": 17883
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.459901213645935,
      "learning_rate": 0.0005911245387380613,
      "loss": 3.2705,
      "step": 17884
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5361099243164062,
      "learning_rate": 0.0005911235510754722,
      "loss": 3.1657,
      "step": 17885
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.55678391456604,
      "learning_rate": 0.0005911225633587578,
      "loss": 3.0145,
      "step": 17886
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.2220356464385986,
      "learning_rate": 0.0005911215755879182,
      "loss": 3.0081,
      "step": 17887
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9353537559509277,
      "learning_rate": 0.0005911205877629536,
      "loss": 3.3419,
      "step": 17888
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7183266878128052,
      "learning_rate": 0.0005911195998838641,
      "loss": 2.8299,
      "step": 17889
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9120259284973145,
      "learning_rate": 0.0005911186119506502,
      "loss": 2.9431,
      "step": 17890
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5707995891571045,
      "learning_rate": 0.0005911176239633116,
      "loss": 3.0124,
      "step": 17891
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4064487218856812,
      "learning_rate": 0.000591116635921849,
      "loss": 3.2885,
      "step": 17892
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7999457120895386,
      "learning_rate": 0.0005911156478262622,
      "loss": 3.065,
      "step": 17893
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9461746215820312,
      "learning_rate": 0.0005911146596765515,
      "loss": 3.1955,
      "step": 17894
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4032196998596191,
      "learning_rate": 0.000591113671472717,
      "loss": 3.1978,
      "step": 17895
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.9949967861175537,
      "learning_rate": 0.0005911126832147592,
      "loss": 3.1086,
      "step": 17896
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.5137548446655273,
      "learning_rate": 0.0005911116949026779,
      "loss": 3.0424,
      "step": 17897
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6210275888442993,
      "learning_rate": 0.0005911107065364734,
      "loss": 3.478,
      "step": 17898
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6674948930740356,
      "learning_rate": 0.0005911097181161459,
      "loss": 3.2654,
      "step": 17899
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1720876693725586,
      "learning_rate": 0.0005911087296416958,
      "loss": 3.1866,
      "step": 17900
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.208188533782959,
      "learning_rate": 0.0005911077411131229,
      "loss": 3.0484,
      "step": 17901
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.5467007160186768,
      "learning_rate": 0.0005911067525304277,
      "loss": 3.0066,
      "step": 17902
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6559312343597412,
      "learning_rate": 0.0005911057638936102,
      "loss": 3.0868,
      "step": 17903
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.100130796432495,
      "learning_rate": 0.0005911047752026707,
      "loss": 2.8292,
      "step": 17904
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.5851941108703613,
      "learning_rate": 0.0005911037864576094,
      "loss": 3.1935,
      "step": 17905
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.4736580848693848,
      "learning_rate": 0.0005911027976584262,
      "loss": 2.967,
      "step": 17906
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4066553115844727,
      "learning_rate": 0.0005911018088051215,
      "loss": 2.9981,
      "step": 17907
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.3235368728637695,
      "learning_rate": 0.0005911008198976956,
      "loss": 2.8967,
      "step": 17908
    },
    {
      "epoch": 0.23,
      "grad_norm": 3.0904860496520996,
      "learning_rate": 0.0005910998309361485,
      "loss": 3.156,
      "step": 17909
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9855486154556274,
      "learning_rate": 0.0005910988419204806,
      "loss": 3.141,
      "step": 17910
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4935551881790161,
      "learning_rate": 0.0005910978528506918,
      "loss": 3.2653,
      "step": 17911
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5769048929214478,
      "learning_rate": 0.0005910968637267824,
      "loss": 3.0536,
      "step": 17912
    },
    {
      "epoch": 0.23,
      "grad_norm": 4.0109148025512695,
      "learning_rate": 0.0005910958745487527,
      "loss": 3.1794,
      "step": 17913
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.179279088973999,
      "learning_rate": 0.0005910948853166027,
      "loss": 3.2259,
      "step": 17914
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.472851514816284,
      "learning_rate": 0.0005910938960303326,
      "loss": 3.1227,
      "step": 17915
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.5052311420440674,
      "learning_rate": 0.0005910929066899427,
      "loss": 2.9378,
      "step": 17916
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8094065189361572,
      "learning_rate": 0.0005910919172954332,
      "loss": 2.9999,
      "step": 17917
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5059500932693481,
      "learning_rate": 0.0005910909278468042,
      "loss": 3.1947,
      "step": 17918
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1131882667541504,
      "learning_rate": 0.0005910899383440558,
      "loss": 3.1905,
      "step": 17919
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.117450475692749,
      "learning_rate": 0.0005910889487871884,
      "loss": 3.0401,
      "step": 17920
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1124088764190674,
      "learning_rate": 0.000591087959176202,
      "loss": 3.4826,
      "step": 17921
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.780641794204712,
      "learning_rate": 0.0005910869695110969,
      "loss": 3.0656,
      "step": 17922
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7186238765716553,
      "learning_rate": 0.0005910859797918733,
      "loss": 3.2338,
      "step": 17923
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.093339204788208,
      "learning_rate": 0.0005910849900185312,
      "loss": 3.1131,
      "step": 17924
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.241234302520752,
      "learning_rate": 0.0005910840001910709,
      "loss": 3.0926,
      "step": 17925
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4343101978302002,
      "learning_rate": 0.0005910830103094927,
      "loss": 3.0497,
      "step": 17926
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7988312244415283,
      "learning_rate": 0.0005910820203737966,
      "loss": 2.9773,
      "step": 17927
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8090718984603882,
      "learning_rate": 0.0005910810303839829,
      "loss": 3.2667,
      "step": 17928
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6656156778335571,
      "learning_rate": 0.0005910800403400517,
      "loss": 3.0149,
      "step": 17929
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3506563901901245,
      "learning_rate": 0.0005910790502420033,
      "loss": 3.1371,
      "step": 17930
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.636656641960144,
      "learning_rate": 0.0005910780600898377,
      "loss": 3.0568,
      "step": 17931
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9621919393539429,
      "learning_rate": 0.0005910770698835552,
      "loss": 3.14,
      "step": 17932
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.2978622913360596,
      "learning_rate": 0.000591076079623156,
      "loss": 2.9496,
      "step": 17933
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8298072814941406,
      "learning_rate": 0.0005910750893086403,
      "loss": 3.1427,
      "step": 17934
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.785042643547058,
      "learning_rate": 0.0005910740989400082,
      "loss": 3.1468,
      "step": 17935
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.145630359649658,
      "learning_rate": 0.0005910731085172601,
      "loss": 2.8525,
      "step": 17936
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.5124423503875732,
      "learning_rate": 0.0005910721180403958,
      "loss": 2.988,
      "step": 17937
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5304368734359741,
      "learning_rate": 0.0005910711275094158,
      "loss": 3.1462,
      "step": 17938
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0218522548675537,
      "learning_rate": 0.0005910701369243202,
      "loss": 2.8486,
      "step": 17939
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3077298402786255,
      "learning_rate": 0.0005910691462851091,
      "loss": 3.0245,
      "step": 17940
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2382619380950928,
      "learning_rate": 0.0005910681555917827,
      "loss": 3.1123,
      "step": 17941
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4440606832504272,
      "learning_rate": 0.0005910671648443413,
      "loss": 3.1571,
      "step": 17942
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.012882947921753,
      "learning_rate": 0.0005910661740427851,
      "loss": 3.2117,
      "step": 17943
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.418211579322815,
      "learning_rate": 0.0005910651831871141,
      "loss": 3.1596,
      "step": 17944
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7312508821487427,
      "learning_rate": 0.0005910641922773286,
      "loss": 3.2821,
      "step": 17945
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.346489667892456,
      "learning_rate": 0.0005910632013134289,
      "loss": 3.1557,
      "step": 17946
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8879575729370117,
      "learning_rate": 0.0005910622102954149,
      "loss": 3.1171,
      "step": 17947
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7598007917404175,
      "learning_rate": 0.000591061219223287,
      "loss": 3.1029,
      "step": 17948
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1169040203094482,
      "learning_rate": 0.0005910602280970453,
      "loss": 3.1544,
      "step": 17949
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.374953031539917,
      "learning_rate": 0.00059105923691669,
      "loss": 3.1456,
      "step": 17950
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.0750339031219482,
      "learning_rate": 0.0005910582456822213,
      "loss": 3.0117,
      "step": 17951
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7675563097000122,
      "learning_rate": 0.0005910572543936395,
      "loss": 3.115,
      "step": 17952
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.9970438480377197,
      "learning_rate": 0.0005910562630509445,
      "loss": 3.2158,
      "step": 17953
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.389837384223938,
      "learning_rate": 0.0005910552716541367,
      "loss": 2.6549,
      "step": 17954
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2743289470672607,
      "learning_rate": 0.0005910542802032164,
      "loss": 2.8475,
      "step": 17955
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7707496881484985,
      "learning_rate": 0.0005910532886981833,
      "loss": 3.1714,
      "step": 17956
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3742141723632812,
      "learning_rate": 0.0005910522971390381,
      "loss": 3.1982,
      "step": 17957
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2939932346343994,
      "learning_rate": 0.0005910513055257808,
      "loss": 2.8969,
      "step": 17958
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6116470098495483,
      "learning_rate": 0.0005910503138584115,
      "loss": 3.3446,
      "step": 17959
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9161229133605957,
      "learning_rate": 0.0005910493221369304,
      "loss": 3.0004,
      "step": 17960
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4314607381820679,
      "learning_rate": 0.0005910483303613378,
      "loss": 3.0527,
      "step": 17961
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6140680313110352,
      "learning_rate": 0.0005910473385316339,
      "loss": 3.1788,
      "step": 17962
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.634182095527649,
      "learning_rate": 0.0005910463466478187,
      "loss": 3.1814,
      "step": 17963
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.37082040309906,
      "learning_rate": 0.0005910453547098924,
      "loss": 3.1749,
      "step": 17964
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5220710039138794,
      "learning_rate": 0.0005910443627178554,
      "loss": 2.865,
      "step": 17965
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3519073724746704,
      "learning_rate": 0.0005910433706717077,
      "loss": 3.2489,
      "step": 17966
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6950199604034424,
      "learning_rate": 0.0005910423785714496,
      "loss": 3.0663,
      "step": 17967
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5698814392089844,
      "learning_rate": 0.0005910413864170813,
      "loss": 3.146,
      "step": 17968
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4956814050674438,
      "learning_rate": 0.0005910403942086029,
      "loss": 3.1864,
      "step": 17969
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.3435733318328857,
      "learning_rate": 0.0005910394019460145,
      "loss": 3.0312,
      "step": 17970
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6898146867752075,
      "learning_rate": 0.0005910384096293164,
      "loss": 3.1191,
      "step": 17971
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7468081712722778,
      "learning_rate": 0.0005910374172585088,
      "loss": 3.0565,
      "step": 17972
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7238200902938843,
      "learning_rate": 0.0005910364248335918,
      "loss": 3.4454,
      "step": 17973
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6447759866714478,
      "learning_rate": 0.0005910354323545656,
      "loss": 3.0854,
      "step": 17974
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8187620639801025,
      "learning_rate": 0.0005910344398214306,
      "loss": 3.1024,
      "step": 17975
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.702246069908142,
      "learning_rate": 0.0005910334472341866,
      "loss": 3.095,
      "step": 17976
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4017771482467651,
      "learning_rate": 0.0005910324545928341,
      "loss": 3.0776,
      "step": 17977
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4850382804870605,
      "learning_rate": 0.0005910314618973731,
      "loss": 3.0378,
      "step": 17978
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.273670196533203,
      "learning_rate": 0.0005910304691478039,
      "loss": 3.1115,
      "step": 17979
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6067448854446411,
      "learning_rate": 0.0005910294763441267,
      "loss": 3.2212,
      "step": 17980
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7165788412094116,
      "learning_rate": 0.0005910284834863415,
      "loss": 2.9008,
      "step": 17981
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.5815553665161133,
      "learning_rate": 0.0005910274905744487,
      "loss": 3.1193,
      "step": 17982
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.2744195461273193,
      "learning_rate": 0.0005910264976084484,
      "loss": 3.0617,
      "step": 17983
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4763116836547852,
      "learning_rate": 0.0005910255045883407,
      "loss": 2.9881,
      "step": 17984
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.148789405822754,
      "learning_rate": 0.000591024511514126,
      "loss": 3.2572,
      "step": 17985
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.3040482997894287,
      "learning_rate": 0.0005910235183858043,
      "loss": 3.2457,
      "step": 17986
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6622729301452637,
      "learning_rate": 0.0005910225252033758,
      "loss": 3.0956,
      "step": 17987
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5647802352905273,
      "learning_rate": 0.0005910215319668408,
      "loss": 2.9637,
      "step": 17988
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4426981210708618,
      "learning_rate": 0.0005910205386761993,
      "loss": 3.0041,
      "step": 17989
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.540806531906128,
      "learning_rate": 0.0005910195453314516,
      "loss": 3.0386,
      "step": 17990
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.317145586013794,
      "learning_rate": 0.0005910185519325978,
      "loss": 3.3224,
      "step": 17991
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6094110012054443,
      "learning_rate": 0.0005910175584796383,
      "loss": 2.9975,
      "step": 17992
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1314914226531982,
      "learning_rate": 0.0005910165649725732,
      "loss": 3.1756,
      "step": 17993
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6896772384643555,
      "learning_rate": 0.0005910155714114024,
      "loss": 3.1935,
      "step": 17994
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7283889055252075,
      "learning_rate": 0.0005910145777961265,
      "loss": 3.1512,
      "step": 17995
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8120884895324707,
      "learning_rate": 0.0005910135841267454,
      "loss": 3.0033,
      "step": 17996
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.216372013092041,
      "learning_rate": 0.0005910125904032594,
      "loss": 3.338,
      "step": 17997
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.204512119293213,
      "learning_rate": 0.0005910115966256687,
      "loss": 2.8188,
      "step": 17998
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.830541729927063,
      "learning_rate": 0.0005910106027939735,
      "loss": 3.0371,
      "step": 17999
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.803885817527771,
      "learning_rate": 0.0005910096089081739,
      "loss": 3.1118,
      "step": 18000
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6881123781204224,
      "learning_rate": 0.00059100861496827,
      "loss": 2.9985,
      "step": 18001
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.550134301185608,
      "learning_rate": 0.0005910076209742622,
      "loss": 3.3999,
      "step": 18002
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6322335004806519,
      "learning_rate": 0.0005910066269261505,
      "loss": 2.9559,
      "step": 18003
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.396416425704956,
      "learning_rate": 0.0005910056328239353,
      "loss": 2.9919,
      "step": 18004
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8996542692184448,
      "learning_rate": 0.0005910046386676166,
      "loss": 3.0376,
      "step": 18005
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.421425223350525,
      "learning_rate": 0.0005910036444571947,
      "loss": 2.8846,
      "step": 18006
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6353998184204102,
      "learning_rate": 0.0005910026501926696,
      "loss": 3.0217,
      "step": 18007
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.5025153160095215,
      "learning_rate": 0.0005910016558740418,
      "loss": 3.0779,
      "step": 18008
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.8727636337280273,
      "learning_rate": 0.0005910006615013112,
      "loss": 3.3853,
      "step": 18009
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.097410202026367,
      "learning_rate": 0.000590999667074478,
      "loss": 3.4394,
      "step": 18010
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4994176626205444,
      "learning_rate": 0.0005909986725935426,
      "loss": 3.2414,
      "step": 18011
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.625471830368042,
      "learning_rate": 0.0005909976780585051,
      "loss": 3.1665,
      "step": 18012
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.734060049057007,
      "learning_rate": 0.0005909966834693655,
      "loss": 2.8827,
      "step": 18013
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7131060361862183,
      "learning_rate": 0.0005909956888261242,
      "loss": 3.0666,
      "step": 18014
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7849092483520508,
      "learning_rate": 0.0005909946941287812,
      "loss": 3.3127,
      "step": 18015
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.859419107437134,
      "learning_rate": 0.0005909936993773369,
      "loss": 3.2329,
      "step": 18016
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.6573538780212402,
      "learning_rate": 0.0005909927045717914,
      "loss": 3.1618,
      "step": 18017
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3109793663024902,
      "learning_rate": 0.0005909917097121448,
      "loss": 3.0103,
      "step": 18018
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.4611051082611084,
      "learning_rate": 0.0005909907147983973,
      "loss": 3.2481,
      "step": 18019
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.466810464859009,
      "learning_rate": 0.0005909897198305492,
      "loss": 3.3101,
      "step": 18020
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.5070736408233643,
      "learning_rate": 0.0005909887248086007,
      "loss": 3.0787,
      "step": 18021
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6468740701675415,
      "learning_rate": 0.0005909877297325518,
      "loss": 3.3193,
      "step": 18022
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9590601921081543,
      "learning_rate": 0.0005909867346024027,
      "loss": 3.1538,
      "step": 18023
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.5963480472564697,
      "learning_rate": 0.0005909857394181538,
      "loss": 3.1703,
      "step": 18024
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.53587806224823,
      "learning_rate": 0.0005909847441798051,
      "loss": 3.4391,
      "step": 18025
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.6256335973739624,
      "learning_rate": 0.000590983748887357,
      "loss": 3.0109,
      "step": 18026
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.1981821060180664,
      "learning_rate": 0.0005909827535408094,
      "loss": 3.1473,
      "step": 18027
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8447147607803345,
      "learning_rate": 0.0005909817581401626,
      "loss": 3.151,
      "step": 18028
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7340073585510254,
      "learning_rate": 0.0005909807626854168,
      "loss": 3.1925,
      "step": 18029
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5624186992645264,
      "learning_rate": 0.0005909797671765722,
      "loss": 3.1198,
      "step": 18030
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7695611715316772,
      "learning_rate": 0.000590978771613629,
      "loss": 3.4314,
      "step": 18031
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3967833518981934,
      "learning_rate": 0.0005909777759965873,
      "loss": 3.1692,
      "step": 18032
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9431016445159912,
      "learning_rate": 0.0005909767803254474,
      "loss": 3.0888,
      "step": 18033
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5439928770065308,
      "learning_rate": 0.0005909757846002093,
      "loss": 3.1491,
      "step": 18034
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.394210696220398,
      "learning_rate": 0.0005909747888208735,
      "loss": 3.2691,
      "step": 18035
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.68162202835083,
      "learning_rate": 0.0005909737929874399,
      "loss": 3.0716,
      "step": 18036
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9724549055099487,
      "learning_rate": 0.0005909727970999087,
      "loss": 3.2668,
      "step": 18037
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.8676066398620605,
      "learning_rate": 0.0005909718011582803,
      "loss": 3.2521,
      "step": 18038
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5071460008621216,
      "learning_rate": 0.0005909708051625547,
      "loss": 3.0403,
      "step": 18039
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5127335786819458,
      "learning_rate": 0.0005909698091127321,
      "loss": 3.2316,
      "step": 18040
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.4773716926574707,
      "learning_rate": 0.0005909688130088127,
      "loss": 3.2451,
      "step": 18041
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.354866862297058,
      "learning_rate": 0.0005909678168507968,
      "loss": 2.9756,
      "step": 18042
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.481070637702942,
      "learning_rate": 0.0005909668206386843,
      "loss": 3.1982,
      "step": 18043
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.3116552829742432,
      "learning_rate": 0.0005909658243724758,
      "loss": 3.2082,
      "step": 18044
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2637454271316528,
      "learning_rate": 0.0005909648280521712,
      "loss": 3.0542,
      "step": 18045
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.7354063987731934,
      "learning_rate": 0.0005909638316777707,
      "loss": 2.8236,
      "step": 18046
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.9049276113510132,
      "learning_rate": 0.0005909628352492746,
      "loss": 2.8981,
      "step": 18047
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.5411295890808105,
      "learning_rate": 0.0005909618387666829,
      "loss": 3.0223,
      "step": 18048
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2791361808776855,
      "learning_rate": 0.000590960842229996,
      "loss": 3.1385,
      "step": 18049
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.644290804862976,
      "learning_rate": 0.000590959845639214,
      "loss": 3.22,
      "step": 18050
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3042583465576172,
      "learning_rate": 0.000590958848994337,
      "loss": 3.3474,
      "step": 18051
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.393462061882019,
      "learning_rate": 0.0005909578522953653,
      "loss": 3.1596,
      "step": 18052
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4273407459259033,
      "learning_rate": 0.000590956855542299,
      "loss": 3.1453,
      "step": 18053
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5454905033111572,
      "learning_rate": 0.0005909558587351383,
      "loss": 2.994,
      "step": 18054
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4812105894088745,
      "learning_rate": 0.0005909548618738835,
      "loss": 3.0675,
      "step": 18055
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4709477424621582,
      "learning_rate": 0.0005909538649585347,
      "loss": 3.187,
      "step": 18056
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4035875797271729,
      "learning_rate": 0.000590952867989092,
      "loss": 3.3313,
      "step": 18057
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8082257509231567,
      "learning_rate": 0.0005909518709655558,
      "loss": 3.1359,
      "step": 18058
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6579349040985107,
      "learning_rate": 0.0005909508738879261,
      "loss": 3.0046,
      "step": 18059
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.1573057174682617,
      "learning_rate": 0.000590949876756203,
      "loss": 3.0739,
      "step": 18060
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.453994870185852,
      "learning_rate": 0.000590948879570387,
      "loss": 3.1141,
      "step": 18061
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.33011531829834,
      "learning_rate": 0.0005909478823304781,
      "loss": 3.2703,
      "step": 18062
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.72090482711792,
      "learning_rate": 0.0005909468850364765,
      "loss": 3.0674,
      "step": 18063
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6378577947616577,
      "learning_rate": 0.0005909458876883823,
      "loss": 3.3154,
      "step": 18064
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.517422080039978,
      "learning_rate": 0.0005909448902861957,
      "loss": 3.0216,
      "step": 18065
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5183665752410889,
      "learning_rate": 0.0005909438928299171,
      "loss": 3.057,
      "step": 18066
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0425162315368652,
      "learning_rate": 0.0005909428953195465,
      "loss": 3.2806,
      "step": 18067
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7467552423477173,
      "learning_rate": 0.0005909418977550841,
      "loss": 3.1454,
      "step": 18068
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.325535535812378,
      "learning_rate": 0.0005909409001365302,
      "loss": 3.1901,
      "step": 18069
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4591485261917114,
      "learning_rate": 0.0005909399024638848,
      "loss": 3.0443,
      "step": 18070
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5492531061172485,
      "learning_rate": 0.0005909389047371482,
      "loss": 3.2051,
      "step": 18071
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.499477744102478,
      "learning_rate": 0.0005909379069563205,
      "loss": 3.1739,
      "step": 18072
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5647423267364502,
      "learning_rate": 0.0005909369091214021,
      "loss": 3.3052,
      "step": 18073
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5370774269104004,
      "learning_rate": 0.0005909359112323929,
      "loss": 2.9151,
      "step": 18074
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4587956666946411,
      "learning_rate": 0.0005909349132892932,
      "loss": 2.9546,
      "step": 18075
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6276447772979736,
      "learning_rate": 0.0005909339152921034,
      "loss": 3.1899,
      "step": 18076
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9047976732254028,
      "learning_rate": 0.0005909329172408234,
      "loss": 2.7805,
      "step": 18077
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6096456050872803,
      "learning_rate": 0.0005909319191354535,
      "loss": 3.3018,
      "step": 18078
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.928512454032898,
      "learning_rate": 0.0005909309209759937,
      "loss": 2.9017,
      "step": 18079
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4372477531433105,
      "learning_rate": 0.0005909299227624445,
      "loss": 3.0406,
      "step": 18080
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4963107109069824,
      "learning_rate": 0.000590928924494806,
      "loss": 3.0783,
      "step": 18081
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6957308053970337,
      "learning_rate": 0.0005909279261730781,
      "loss": 2.9887,
      "step": 18082
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.442696213722229,
      "learning_rate": 0.0005909269277972614,
      "loss": 3.3016,
      "step": 18083
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.628645658493042,
      "learning_rate": 0.0005909259293673558,
      "loss": 3.2221,
      "step": 18084
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.579641342163086,
      "learning_rate": 0.0005909249308833617,
      "loss": 3.0423,
      "step": 18085
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.643819808959961,
      "learning_rate": 0.000590923932345279,
      "loss": 2.9263,
      "step": 18086
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2003576755523682,
      "learning_rate": 0.0005909229337531082,
      "loss": 3.2142,
      "step": 18087
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3165398836135864,
      "learning_rate": 0.0005909219351068492,
      "loss": 3.1043,
      "step": 18088
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4596667289733887,
      "learning_rate": 0.0005909209364065023,
      "loss": 2.9842,
      "step": 18089
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5104142427444458,
      "learning_rate": 0.0005909199376520678,
      "loss": 3.0684,
      "step": 18090
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.283889055252075,
      "learning_rate": 0.0005909189388435458,
      "loss": 3.0055,
      "step": 18091
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.544520854949951,
      "learning_rate": 0.0005909179399809365,
      "loss": 2.8617,
      "step": 18092
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.2771449089050293,
      "learning_rate": 0.00059091694106424,
      "loss": 3.1472,
      "step": 18093
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.1239657402038574,
      "learning_rate": 0.0005909159420934565,
      "loss": 3.065,
      "step": 18094
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.216087579727173,
      "learning_rate": 0.0005909149430685863,
      "loss": 3.0713,
      "step": 18095
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.736531376838684,
      "learning_rate": 0.0005909139439896296,
      "loss": 2.7855,
      "step": 18096
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7663322687149048,
      "learning_rate": 0.0005909129448565864,
      "loss": 3.2607,
      "step": 18097
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.421806812286377,
      "learning_rate": 0.000590911945669457,
      "loss": 3.0229,
      "step": 18098
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8195923566818237,
      "learning_rate": 0.0005909109464282416,
      "loss": 3.155,
      "step": 18099
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6382040977478027,
      "learning_rate": 0.0005909099471329404,
      "loss": 3.0946,
      "step": 18100
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4010404348373413,
      "learning_rate": 0.0005909089477835535,
      "loss": 3.0051,
      "step": 18101
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.37593412399292,
      "learning_rate": 0.0005909079483800812,
      "loss": 3.2597,
      "step": 18102
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8005905151367188,
      "learning_rate": 0.0005909069489225234,
      "loss": 3.1655,
      "step": 18103
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.974688172340393,
      "learning_rate": 0.0005909059494108807,
      "loss": 2.957,
      "step": 18104
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7424933910369873,
      "learning_rate": 0.0005909049498451531,
      "loss": 2.9927,
      "step": 18105
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4011784791946411,
      "learning_rate": 0.0005909039502253407,
      "loss": 2.917,
      "step": 18106
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.957279682159424,
      "learning_rate": 0.0005909029505514439,
      "loss": 3.1595,
      "step": 18107
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.004723310470581,
      "learning_rate": 0.0005909019508234626,
      "loss": 3.0585,
      "step": 18108
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8594094514846802,
      "learning_rate": 0.0005909009510413972,
      "loss": 2.8206,
      "step": 18109
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.5172200202941895,
      "learning_rate": 0.0005908999512052478,
      "loss": 3.1309,
      "step": 18110
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.939565420150757,
      "learning_rate": 0.0005908989513150147,
      "loss": 2.9528,
      "step": 18111
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0382792949676514,
      "learning_rate": 0.0005908979513706979,
      "loss": 3.1169,
      "step": 18112
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.546531915664673,
      "learning_rate": 0.0005908969513722977,
      "loss": 3.123,
      "step": 18113
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5966238975524902,
      "learning_rate": 0.0005908959513198143,
      "loss": 2.926,
      "step": 18114
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3602209091186523,
      "learning_rate": 0.0005908949512132477,
      "loss": 3.1685,
      "step": 18115
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6573364734649658,
      "learning_rate": 0.0005908939510525984,
      "loss": 2.9344,
      "step": 18116
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.245405673980713,
      "learning_rate": 0.0005908929508378664,
      "loss": 2.8742,
      "step": 18117
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.710587739944458,
      "learning_rate": 0.0005908919505690519,
      "loss": 3.2573,
      "step": 18118
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.690632939338684,
      "learning_rate": 0.000590890950246155,
      "loss": 3.351,
      "step": 18119
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8365756273269653,
      "learning_rate": 0.0005908899498691761,
      "loss": 3.1233,
      "step": 18120
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5477463006973267,
      "learning_rate": 0.0005908889494381153,
      "loss": 3.0177,
      "step": 18121
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3584721088409424,
      "learning_rate": 0.0005908879489529726,
      "loss": 3.0868,
      "step": 18122
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8808093070983887,
      "learning_rate": 0.0005908869484137485,
      "loss": 3.3532,
      "step": 18123
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7495681047439575,
      "learning_rate": 0.0005908859478204429,
      "loss": 3.127,
      "step": 18124
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.587611436843872,
      "learning_rate": 0.0005908849471730562,
      "loss": 3.1264,
      "step": 18125
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8020298480987549,
      "learning_rate": 0.0005908839464715884,
      "loss": 3.3099,
      "step": 18126
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7740521430969238,
      "learning_rate": 0.0005908829457160399,
      "loss": 3.0888,
      "step": 18127
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3168872594833374,
      "learning_rate": 0.0005908819449064106,
      "loss": 3.2742,
      "step": 18128
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4914398193359375,
      "learning_rate": 0.000590880944042701,
      "loss": 3.0493,
      "step": 18129
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.084695339202881,
      "learning_rate": 0.0005908799431249112,
      "loss": 3.2089,
      "step": 18130
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.372043490409851,
      "learning_rate": 0.0005908789421530412,
      "loss": 3.0461,
      "step": 18131
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3901331424713135,
      "learning_rate": 0.0005908779411270914,
      "loss": 3.2105,
      "step": 18132
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0509235858917236,
      "learning_rate": 0.0005908769400470618,
      "loss": 3.1487,
      "step": 18133
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5411089658737183,
      "learning_rate": 0.0005908759389129527,
      "loss": 3.2057,
      "step": 18134
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.7456629276275635,
      "learning_rate": 0.0005908749377247644,
      "loss": 3.0584,
      "step": 18135
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.39638090133667,
      "learning_rate": 0.0005908739364824968,
      "loss": 2.9774,
      "step": 18136
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6053187847137451,
      "learning_rate": 0.0005908729351861503,
      "loss": 3.247,
      "step": 18137
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5101183652877808,
      "learning_rate": 0.0005908719338357251,
      "loss": 2.9328,
      "step": 18138
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.70294189453125,
      "learning_rate": 0.0005908709324312212,
      "loss": 2.9499,
      "step": 18139
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3070130348205566,
      "learning_rate": 0.0005908699309726389,
      "loss": 3.1512,
      "step": 18140
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4418878555297852,
      "learning_rate": 0.0005908689294599785,
      "loss": 3.0368,
      "step": 18141
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.669593334197998,
      "learning_rate": 0.00059086792789324,
      "loss": 3.0698,
      "step": 18142
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.6641035079956055,
      "learning_rate": 0.0005908669262724238,
      "loss": 3.0942,
      "step": 18143
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.2458527088165283,
      "learning_rate": 0.0005908659245975298,
      "loss": 2.8742,
      "step": 18144
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3353813886642456,
      "learning_rate": 0.0005908649228685582,
      "loss": 3.1346,
      "step": 18145
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.934772491455078,
      "learning_rate": 0.0005908639210855097,
      "loss": 3.0865,
      "step": 18146
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8942749500274658,
      "learning_rate": 0.0005908629192483838,
      "loss": 3.0796,
      "step": 18147
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8718267679214478,
      "learning_rate": 0.0005908619173571811,
      "loss": 3.0617,
      "step": 18148
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7470488548278809,
      "learning_rate": 0.0005908609154119017,
      "loss": 3.2196,
      "step": 18149
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.843581438064575,
      "learning_rate": 0.0005908599134125458,
      "loss": 3.0675,
      "step": 18150
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.568709373474121,
      "learning_rate": 0.0005908589113591134,
      "loss": 3.1304,
      "step": 18151
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7053112983703613,
      "learning_rate": 0.000590857909251605,
      "loss": 3.1777,
      "step": 18152
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9347426891326904,
      "learning_rate": 0.0005908569070900205,
      "loss": 2.8562,
      "step": 18153
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.053860664367676,
      "learning_rate": 0.0005908559048743603,
      "loss": 3.0054,
      "step": 18154
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7749022245407104,
      "learning_rate": 0.0005908549026046243,
      "loss": 2.8976,
      "step": 18155
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9290990829467773,
      "learning_rate": 0.000590853900280813,
      "loss": 3.0577,
      "step": 18156
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.197495460510254,
      "learning_rate": 0.0005908528979029266,
      "loss": 2.8751,
      "step": 18157
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6882177591323853,
      "learning_rate": 0.000590851895470965,
      "loss": 3.4713,
      "step": 18158
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3663352727890015,
      "learning_rate": 0.0005908508929849287,
      "loss": 3.2503,
      "step": 18159
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8957455158233643,
      "learning_rate": 0.0005908498904448177,
      "loss": 2.9693,
      "step": 18160
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.2268853187561035,
      "learning_rate": 0.0005908488878506321,
      "loss": 3.2384,
      "step": 18161
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8496296405792236,
      "learning_rate": 0.0005908478852023723,
      "loss": 3.0458,
      "step": 18162
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6578223705291748,
      "learning_rate": 0.0005908468825000383,
      "loss": 3.2054,
      "step": 18163
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6020710468292236,
      "learning_rate": 0.0005908458797436304,
      "loss": 3.1759,
      "step": 18164
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4602491855621338,
      "learning_rate": 0.0005908448769331488,
      "loss": 3.161,
      "step": 18165
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7889198064804077,
      "learning_rate": 0.0005908438740685935,
      "loss": 3.1092,
      "step": 18166
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3378429412841797,
      "learning_rate": 0.000590842871149965,
      "loss": 3.043,
      "step": 18167
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7255232334136963,
      "learning_rate": 0.0005908418681772633,
      "loss": 2.925,
      "step": 18168
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.365854501724243,
      "learning_rate": 0.0005908408651504886,
      "loss": 3.3947,
      "step": 18169
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.5419938564300537,
      "learning_rate": 0.000590839862069641,
      "loss": 3.076,
      "step": 18170
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3389257192611694,
      "learning_rate": 0.000590838858934721,
      "loss": 3.2619,
      "step": 18171
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8105367422103882,
      "learning_rate": 0.0005908378557457284,
      "loss": 3.3624,
      "step": 18172
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5514237880706787,
      "learning_rate": 0.0005908368525026635,
      "loss": 3.3517,
      "step": 18173
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9240800142288208,
      "learning_rate": 0.0005908358492055266,
      "loss": 3.0701,
      "step": 18174
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4178334474563599,
      "learning_rate": 0.0005908348458543179,
      "loss": 3.1721,
      "step": 18175
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9000829458236694,
      "learning_rate": 0.0005908338424490374,
      "loss": 3.2922,
      "step": 18176
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3665870428085327,
      "learning_rate": 0.0005908328389896855,
      "loss": 2.8659,
      "step": 18177
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5614612102508545,
      "learning_rate": 0.0005908318354762622,
      "loss": 2.9108,
      "step": 18178
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3389909267425537,
      "learning_rate": 0.0005908308319087679,
      "loss": 3.0814,
      "step": 18179
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7669926881790161,
      "learning_rate": 0.0005908298282872025,
      "loss": 3.1725,
      "step": 18180
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.231738805770874,
      "learning_rate": 0.0005908288246115664,
      "loss": 3.4182,
      "step": 18181
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6722029447555542,
      "learning_rate": 0.0005908278208818597,
      "loss": 2.9178,
      "step": 18182
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.000467538833618,
      "learning_rate": 0.0005908268170980827,
      "loss": 3.2102,
      "step": 18183
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.566969871520996,
      "learning_rate": 0.0005908258132602355,
      "loss": 3.0728,
      "step": 18184
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8064790964126587,
      "learning_rate": 0.0005908248093683182,
      "loss": 3.1852,
      "step": 18185
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9077355861663818,
      "learning_rate": 0.0005908238054223311,
      "loss": 3.4432,
      "step": 18186
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.505139946937561,
      "learning_rate": 0.0005908228014222744,
      "loss": 3.2084,
      "step": 18187
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.614061713218689,
      "learning_rate": 0.0005908217973681483,
      "loss": 3.236,
      "step": 18188
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4513261318206787,
      "learning_rate": 0.0005908207932599529,
      "loss": 2.9944,
      "step": 18189
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.2966108322143555,
      "learning_rate": 0.0005908197890976884,
      "loss": 3.0032,
      "step": 18190
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6228312253952026,
      "learning_rate": 0.000590818784881355,
      "loss": 3.1815,
      "step": 18191
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1274068355560303,
      "learning_rate": 0.0005908177806109529,
      "loss": 3.0629,
      "step": 18192
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.8735077381134033,
      "learning_rate": 0.0005908167762864823,
      "loss": 3.1396,
      "step": 18193
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6844899654388428,
      "learning_rate": 0.0005908157719079434,
      "loss": 3.191,
      "step": 18194
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7354048490524292,
      "learning_rate": 0.0005908147674753364,
      "loss": 3.0441,
      "step": 18195
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5706697702407837,
      "learning_rate": 0.0005908137629886613,
      "loss": 3.2727,
      "step": 18196
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8523964881896973,
      "learning_rate": 0.0005908127584479185,
      "loss": 3.0395,
      "step": 18197
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3153653144836426,
      "learning_rate": 0.0005908117538531082,
      "loss": 2.9554,
      "step": 18198
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5481314659118652,
      "learning_rate": 0.0005908107492042304,
      "loss": 3.345,
      "step": 18199
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7029114961624146,
      "learning_rate": 0.0005908097445012855,
      "loss": 3.0005,
      "step": 18200
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.2212347984313965,
      "learning_rate": 0.0005908087397442735,
      "loss": 3.138,
      "step": 18201
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.1285572052001953,
      "learning_rate": 0.0005908077349331948,
      "loss": 3.1898,
      "step": 18202
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.326399803161621,
      "learning_rate": 0.0005908067300680492,
      "loss": 3.2081,
      "step": 18203
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.1807851791381836,
      "learning_rate": 0.0005908057251488373,
      "loss": 2.8056,
      "step": 18204
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.892460823059082,
      "learning_rate": 0.0005908047201755591,
      "loss": 3.0949,
      "step": 18205
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.541197657585144,
      "learning_rate": 0.0005908037151482148,
      "loss": 3.0649,
      "step": 18206
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0765185356140137,
      "learning_rate": 0.0005908027100668046,
      "loss": 2.9597,
      "step": 18207
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7725303173065186,
      "learning_rate": 0.0005908017049313287,
      "loss": 3.3609,
      "step": 18208
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6660925149917603,
      "learning_rate": 0.0005908006997417872,
      "loss": 2.9497,
      "step": 18209
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9313890933990479,
      "learning_rate": 0.0005907996944981806,
      "loss": 3.3181,
      "step": 18210
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9765335321426392,
      "learning_rate": 0.0005907986892005085,
      "loss": 3.1616,
      "step": 18211
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0732669830322266,
      "learning_rate": 0.0005907976838487718,
      "loss": 3.132,
      "step": 18212
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.108211040496826,
      "learning_rate": 0.0005907966784429701,
      "loss": 3.2981,
      "step": 18213
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0009100437164307,
      "learning_rate": 0.0005907956729831038,
      "loss": 2.9542,
      "step": 18214
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2615959644317627,
      "learning_rate": 0.0005907946674691731,
      "loss": 3.0314,
      "step": 18215
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5419318675994873,
      "learning_rate": 0.0005907936619011782,
      "loss": 3.2126,
      "step": 18216
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.972777009010315,
      "learning_rate": 0.0005907926562791193,
      "loss": 3.286,
      "step": 18217
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7306909561157227,
      "learning_rate": 0.0005907916506029966,
      "loss": 3.2153,
      "step": 18218
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.06721568107605,
      "learning_rate": 0.0005907906448728102,
      "loss": 3.0476,
      "step": 18219
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.530996322631836,
      "learning_rate": 0.0005907896390885603,
      "loss": 3.197,
      "step": 18220
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7420074939727783,
      "learning_rate": 0.0005907886332502471,
      "loss": 3.2726,
      "step": 18221
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4294133186340332,
      "learning_rate": 0.0005907876273578709,
      "loss": 3.1688,
      "step": 18222
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6828804016113281,
      "learning_rate": 0.0005907866214114317,
      "loss": 3.2154,
      "step": 18223
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.2268106937408447,
      "learning_rate": 0.0005907856154109297,
      "loss": 3.059,
      "step": 18224
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.851110816001892,
      "learning_rate": 0.0005907846093563654,
      "loss": 3.217,
      "step": 18225
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0567586421966553,
      "learning_rate": 0.0005907836032477387,
      "loss": 3.065,
      "step": 18226
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7622545957565308,
      "learning_rate": 0.0005907825970850497,
      "loss": 2.9886,
      "step": 18227
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.833056926727295,
      "learning_rate": 0.0005907815908682987,
      "loss": 3.1659,
      "step": 18228
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.532370090484619,
      "learning_rate": 0.000590780584597486,
      "loss": 2.9477,
      "step": 18229
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5708155632019043,
      "learning_rate": 0.0005907795782726118,
      "loss": 2.9486,
      "step": 18230
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3934226036071777,
      "learning_rate": 0.000590778571893676,
      "loss": 3.1159,
      "step": 18231
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3552443981170654,
      "learning_rate": 0.000590777565460679,
      "loss": 3.1594,
      "step": 18232
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4651894569396973,
      "learning_rate": 0.0005907765589736211,
      "loss": 2.9328,
      "step": 18233
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7860066890716553,
      "learning_rate": 0.0005907755524325023,
      "loss": 3.1585,
      "step": 18234
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5367182493209839,
      "learning_rate": 0.0005907745458373228,
      "loss": 3.3545,
      "step": 18235
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.182257890701294,
      "learning_rate": 0.0005907735391880828,
      "loss": 3.0261,
      "step": 18236
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.740298867225647,
      "learning_rate": 0.0005907725324847827,
      "loss": 3.2624,
      "step": 18237
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.595017910003662,
      "learning_rate": 0.0005907715257274222,
      "loss": 3.1564,
      "step": 18238
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.938794493675232,
      "learning_rate": 0.000590770518916002,
      "loss": 2.9964,
      "step": 18239
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.027078151702881,
      "learning_rate": 0.000590769512050522,
      "loss": 3.0486,
      "step": 18240
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6872186660766602,
      "learning_rate": 0.0005907685051309825,
      "loss": 3.2934,
      "step": 18241
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.378339409828186,
      "learning_rate": 0.0005907674981573836,
      "loss": 3.2049,
      "step": 18242
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.748053550720215,
      "learning_rate": 0.0005907664911297255,
      "loss": 2.9428,
      "step": 18243
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.8201186656951904,
      "learning_rate": 0.0005907654840480084,
      "loss": 3.4741,
      "step": 18244
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3836846351623535,
      "learning_rate": 0.0005907644769122326,
      "loss": 2.94,
      "step": 18245
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.1162567138671875,
      "learning_rate": 0.0005907634697223982,
      "loss": 3.1847,
      "step": 18246
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.9657442569732666,
      "learning_rate": 0.0005907624624785054,
      "loss": 3.1539,
      "step": 18247
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7606850862503052,
      "learning_rate": 0.0005907614551805542,
      "loss": 3.204,
      "step": 18248
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.762631893157959,
      "learning_rate": 0.000590760447828545,
      "loss": 2.8927,
      "step": 18249
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6791131496429443,
      "learning_rate": 0.0005907594404224782,
      "loss": 3.5639,
      "step": 18250
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0024144649505615,
      "learning_rate": 0.0005907584329623535,
      "loss": 2.9613,
      "step": 18251
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3356176614761353,
      "learning_rate": 0.0005907574254481714,
      "loss": 3.0203,
      "step": 18252
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3218141794204712,
      "learning_rate": 0.0005907564178799319,
      "loss": 2.9691,
      "step": 18253
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.1208367347717285,
      "learning_rate": 0.0005907554102576354,
      "loss": 2.9658,
      "step": 18254
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6335735321044922,
      "learning_rate": 0.0005907544025812819,
      "loss": 3.2442,
      "step": 18255
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7395265102386475,
      "learning_rate": 0.0005907533948508717,
      "loss": 3.0926,
      "step": 18256
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7119277715682983,
      "learning_rate": 0.000590752387066405,
      "loss": 2.9749,
      "step": 18257
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6242713928222656,
      "learning_rate": 0.000590751379227882,
      "loss": 3.0238,
      "step": 18258
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4444211721420288,
      "learning_rate": 0.0005907503713353027,
      "loss": 3.3341,
      "step": 18259
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4553948640823364,
      "learning_rate": 0.0005907493633886675,
      "loss": 3.0145,
      "step": 18260
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8093317747116089,
      "learning_rate": 0.0005907483553879764,
      "loss": 3.1223,
      "step": 18261
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9621593952178955,
      "learning_rate": 0.0005907473473332298,
      "loss": 3.086,
      "step": 18262
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4776110649108887,
      "learning_rate": 0.0005907463392244277,
      "loss": 3.1891,
      "step": 18263
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5110663175582886,
      "learning_rate": 0.0005907453310615705,
      "loss": 3.1807,
      "step": 18264
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7485233545303345,
      "learning_rate": 0.000590744322844658,
      "loss": 3.1428,
      "step": 18265
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3984243869781494,
      "learning_rate": 0.0005907433145736909,
      "loss": 3.1962,
      "step": 18266
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5947736501693726,
      "learning_rate": 0.000590742306248669,
      "loss": 3.2597,
      "step": 18267
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.452128529548645,
      "learning_rate": 0.0005907412978695926,
      "loss": 3.0642,
      "step": 18268
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9796627759933472,
      "learning_rate": 0.0005907402894364621,
      "loss": 3.2409,
      "step": 18269
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4027906656265259,
      "learning_rate": 0.0005907392809492773,
      "loss": 3.1231,
      "step": 18270
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7591136693954468,
      "learning_rate": 0.0005907382724080386,
      "loss": 2.9911,
      "step": 18271
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7582772970199585,
      "learning_rate": 0.0005907372638127462,
      "loss": 3.2649,
      "step": 18272
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.03678822517395,
      "learning_rate": 0.0005907362551634002,
      "loss": 3.1254,
      "step": 18273
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6965715885162354,
      "learning_rate": 0.000590735246460001,
      "loss": 3.1968,
      "step": 18274
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6337896585464478,
      "learning_rate": 0.0005907342377025485,
      "loss": 3.0975,
      "step": 18275
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.620460867881775,
      "learning_rate": 0.000590733228891043,
      "loss": 2.9164,
      "step": 18276
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.47713041305542,
      "learning_rate": 0.0005907322200254848,
      "loss": 3.1513,
      "step": 18277
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.477979302406311,
      "learning_rate": 0.0005907312111058738,
      "loss": 3.1408,
      "step": 18278
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3538260459899902,
      "learning_rate": 0.0005907302021322105,
      "loss": 3.1418,
      "step": 18279
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4935564994812012,
      "learning_rate": 0.000590729193104495,
      "loss": 3.0977,
      "step": 18280
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.8361411094665527,
      "learning_rate": 0.0005907281840227275,
      "loss": 3.1373,
      "step": 18281
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.476954698562622,
      "learning_rate": 0.000590727174886908,
      "loss": 3.3624,
      "step": 18282
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.469967246055603,
      "learning_rate": 0.000590726165697037,
      "loss": 3.1235,
      "step": 18283
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7721731662750244,
      "learning_rate": 0.0005907251564531144,
      "loss": 3.5257,
      "step": 18284
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4888370037078857,
      "learning_rate": 0.0005907241471551405,
      "loss": 2.7433,
      "step": 18285
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3137339353561401,
      "learning_rate": 0.0005907231378031155,
      "loss": 3.4273,
      "step": 18286
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4461698532104492,
      "learning_rate": 0.0005907221283970397,
      "loss": 3.4088,
      "step": 18287
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0886762142181396,
      "learning_rate": 0.000590721118936913,
      "loss": 3.0797,
      "step": 18288
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3131616115570068,
      "learning_rate": 0.0005907201094227358,
      "loss": 3.344,
      "step": 18289
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.1829442977905273,
      "learning_rate": 0.0005907190998545083,
      "loss": 3.1342,
      "step": 18290
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.5408082008361816,
      "learning_rate": 0.0005907180902322306,
      "loss": 2.9295,
      "step": 18291
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5333610773086548,
      "learning_rate": 0.0005907170805559029,
      "loss": 2.9535,
      "step": 18292
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3958840370178223,
      "learning_rate": 0.0005907160708255255,
      "loss": 3.2247,
      "step": 18293
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.8907742500305176,
      "learning_rate": 0.0005907150610410984,
      "loss": 2.9426,
      "step": 18294
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9004775285720825,
      "learning_rate": 0.0005907140512026219,
      "loss": 3.0774,
      "step": 18295
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3973891735076904,
      "learning_rate": 0.0005907130413100962,
      "loss": 3.3193,
      "step": 18296
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6035747528076172,
      "learning_rate": 0.0005907120313635214,
      "loss": 3.077,
      "step": 18297
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.1291472911834717,
      "learning_rate": 0.0005907110213628978,
      "loss": 3.4353,
      "step": 18298
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.2196292877197266,
      "learning_rate": 0.0005907100113082256,
      "loss": 3.0317,
      "step": 18299
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4859182834625244,
      "learning_rate": 0.0005907090011995048,
      "loss": 3.1613,
      "step": 18300
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7661494016647339,
      "learning_rate": 0.0005907079910367357,
      "loss": 3.3355,
      "step": 18301
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.857845425605774,
      "learning_rate": 0.0005907069808199185,
      "loss": 2.797,
      "step": 18302
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.324448585510254,
      "learning_rate": 0.0005907059705490535,
      "loss": 3.0913,
      "step": 18303
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3251070976257324,
      "learning_rate": 0.0005907049602241407,
      "loss": 3.4532,
      "step": 18304
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.283609390258789,
      "learning_rate": 0.0005907039498451804,
      "loss": 3.529,
      "step": 18305
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4920289516448975,
      "learning_rate": 0.0005907029394121727,
      "loss": 2.9694,
      "step": 18306
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3523136377334595,
      "learning_rate": 0.0005907019289251178,
      "loss": 3.3269,
      "step": 18307
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0136337280273438,
      "learning_rate": 0.000590700918384016,
      "loss": 3.3834,
      "step": 18308
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6717631816864014,
      "learning_rate": 0.0005906999077888674,
      "loss": 3.0114,
      "step": 18309
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6638296842575073,
      "learning_rate": 0.0005906988971396722,
      "loss": 3.143,
      "step": 18310
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7276691198349,
      "learning_rate": 0.0005906978864364305,
      "loss": 3.2321,
      "step": 18311
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7407935857772827,
      "learning_rate": 0.0005906968756791426,
      "loss": 3.046,
      "step": 18312
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4430497884750366,
      "learning_rate": 0.0005906958648678088,
      "loss": 3.1588,
      "step": 18313
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8266348838806152,
      "learning_rate": 0.000590694854002429,
      "loss": 3.1538,
      "step": 18314
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.169838786125183,
      "learning_rate": 0.0005906938430830037,
      "loss": 2.9966,
      "step": 18315
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5353862047195435,
      "learning_rate": 0.0005906928321095328,
      "loss": 3.2584,
      "step": 18316
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.477766990661621,
      "learning_rate": 0.0005906918210820166,
      "loss": 3.1634,
      "step": 18317
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6036186218261719,
      "learning_rate": 0.0005906908100004553,
      "loss": 2.9594,
      "step": 18318
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.089752435684204,
      "learning_rate": 0.0005906897988648492,
      "loss": 2.8194,
      "step": 18319
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3513069152832031,
      "learning_rate": 0.0005906887876751983,
      "loss": 3.089,
      "step": 18320
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3499202728271484,
      "learning_rate": 0.0005906877764315029,
      "loss": 3.077,
      "step": 18321
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.2077412605285645,
      "learning_rate": 0.0005906867651337632,
      "loss": 3.1249,
      "step": 18322
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.446844220161438,
      "learning_rate": 0.0005906857537819792,
      "loss": 3.0467,
      "step": 18323
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4857386350631714,
      "learning_rate": 0.0005906847423761514,
      "loss": 3.2556,
      "step": 18324
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.1837141513824463,
      "learning_rate": 0.0005906837309162797,
      "loss": 3.0727,
      "step": 18325
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.5379419326782227,
      "learning_rate": 0.0005906827194023644,
      "loss": 3.0455,
      "step": 18326
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5951186418533325,
      "learning_rate": 0.0005906817078344058,
      "loss": 3.1163,
      "step": 18327
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8580210208892822,
      "learning_rate": 0.0005906806962124039,
      "loss": 3.1644,
      "step": 18328
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.179140329360962,
      "learning_rate": 0.000590679684536359,
      "loss": 3.3042,
      "step": 18329
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8490244150161743,
      "learning_rate": 0.0005906786728062713,
      "loss": 3.229,
      "step": 18330
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.651558518409729,
      "learning_rate": 0.0005906776610221407,
      "loss": 3.3503,
      "step": 18331
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.622988224029541,
      "learning_rate": 0.0005906766491839679,
      "loss": 3.217,
      "step": 18332
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7943034172058105,
      "learning_rate": 0.0005906756372917528,
      "loss": 3.0219,
      "step": 18333
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.329822540283203,
      "learning_rate": 0.0005906746253454956,
      "loss": 3.0147,
      "step": 18334
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5076218843460083,
      "learning_rate": 0.0005906736133451965,
      "loss": 2.9878,
      "step": 18335
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5300275087356567,
      "learning_rate": 0.0005906726012908556,
      "loss": 2.9678,
      "step": 18336
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4271645545959473,
      "learning_rate": 0.0005906715891824732,
      "loss": 3.1628,
      "step": 18337
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7510859966278076,
      "learning_rate": 0.0005906705770200495,
      "loss": 3.1971,
      "step": 18338
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5130027532577515,
      "learning_rate": 0.0005906695648035847,
      "loss": 3.0045,
      "step": 18339
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.043815851211548,
      "learning_rate": 0.0005906685525330789,
      "loss": 2.9874,
      "step": 18340
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6552680730819702,
      "learning_rate": 0.0005906675402085322,
      "loss": 3.3155,
      "step": 18341
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5359349250793457,
      "learning_rate": 0.0005906665278299451,
      "loss": 3.0167,
      "step": 18342
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9684832096099854,
      "learning_rate": 0.0005906655153973176,
      "loss": 3.1436,
      "step": 18343
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4060925245285034,
      "learning_rate": 0.0005906645029106498,
      "loss": 3.0123,
      "step": 18344
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3077728748321533,
      "learning_rate": 0.0005906634903699421,
      "loss": 3.1496,
      "step": 18345
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6443302631378174,
      "learning_rate": 0.0005906624777751945,
      "loss": 3.3072,
      "step": 18346
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.630185842514038,
      "learning_rate": 0.0005906614651264071,
      "loss": 3.3247,
      "step": 18347
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.819687843322754,
      "learning_rate": 0.0005906604524235805,
      "loss": 3.1576,
      "step": 18348
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.93665611743927,
      "learning_rate": 0.0005906594396667145,
      "loss": 3.026,
      "step": 18349
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5371158123016357,
      "learning_rate": 0.0005906584268558095,
      "loss": 3.2236,
      "step": 18350
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.758427381515503,
      "learning_rate": 0.0005906574139908655,
      "loss": 3.0394,
      "step": 18351
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.090568780899048,
      "learning_rate": 0.0005906564010718829,
      "loss": 3.2104,
      "step": 18352
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6023542881011963,
      "learning_rate": 0.0005906553880988618,
      "loss": 3.0627,
      "step": 18353
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7448800802230835,
      "learning_rate": 0.0005906543750718023,
      "loss": 3.3634,
      "step": 18354
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.673465609550476,
      "learning_rate": 0.0005906533619907048,
      "loss": 2.9633,
      "step": 18355
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3790959119796753,
      "learning_rate": 0.0005906523488555692,
      "loss": 3.0932,
      "step": 18356
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5108120441436768,
      "learning_rate": 0.0005906513356663959,
      "loss": 3.0018,
      "step": 18357
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2259151935577393,
      "learning_rate": 0.000590650322423185,
      "loss": 3.1629,
      "step": 18358
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4296343326568604,
      "learning_rate": 0.0005906493091259367,
      "loss": 2.8012,
      "step": 18359
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.427270770072937,
      "learning_rate": 0.0005906482957746512,
      "loss": 3.2704,
      "step": 18360
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5146783590316772,
      "learning_rate": 0.0005906472823693288,
      "loss": 3.1686,
      "step": 18361
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3297548294067383,
      "learning_rate": 0.0005906462689099696,
      "loss": 3.0378,
      "step": 18362
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.383297324180603,
      "learning_rate": 0.0005906452553965737,
      "loss": 3.2781,
      "step": 18363
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.342444658279419,
      "learning_rate": 0.0005906442418291413,
      "loss": 3.1629,
      "step": 18364
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.827039361000061,
      "learning_rate": 0.0005906432282076727,
      "loss": 3.1795,
      "step": 18365
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4631149768829346,
      "learning_rate": 0.0005906422145321681,
      "loss": 3.2829,
      "step": 18366
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2557988166809082,
      "learning_rate": 0.0005906412008026275,
      "loss": 3.015,
      "step": 18367
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4840171337127686,
      "learning_rate": 0.0005906401870190513,
      "loss": 3.0519,
      "step": 18368
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3434892892837524,
      "learning_rate": 0.0005906391731814397,
      "loss": 2.7727,
      "step": 18369
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3842620849609375,
      "learning_rate": 0.0005906381592897926,
      "loss": 2.9347,
      "step": 18370
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.838613748550415,
      "learning_rate": 0.0005906371453441105,
      "loss": 3.2491,
      "step": 18371
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7059125900268555,
      "learning_rate": 0.0005906361313443933,
      "loss": 3.223,
      "step": 18372
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3276174068450928,
      "learning_rate": 0.0005906351172906417,
      "loss": 3.0334,
      "step": 18373
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.2816357612609863,
      "learning_rate": 0.0005906341031828552,
      "loss": 3.1603,
      "step": 18374
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.6769731044769287,
      "learning_rate": 0.0005906330890210346,
      "loss": 3.0982,
      "step": 18375
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6150059700012207,
      "learning_rate": 0.0005906320748051796,
      "loss": 2.9872,
      "step": 18376
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9092985391616821,
      "learning_rate": 0.0005906310605352907,
      "loss": 2.8935,
      "step": 18377
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.073894739151001,
      "learning_rate": 0.000590630046211368,
      "loss": 3.3651,
      "step": 18378
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6982381343841553,
      "learning_rate": 0.0005906290318334117,
      "loss": 3.2425,
      "step": 18379
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.010599136352539,
      "learning_rate": 0.000590628017401422,
      "loss": 3.0765,
      "step": 18380
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6515837907791138,
      "learning_rate": 0.000590627002915399,
      "loss": 3.2553,
      "step": 18381
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4494235515594482,
      "learning_rate": 0.0005906259883753429,
      "loss": 3.2046,
      "step": 18382
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4710835218429565,
      "learning_rate": 0.0005906249737812541,
      "loss": 3.0086,
      "step": 18383
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3888806104660034,
      "learning_rate": 0.0005906239591331326,
      "loss": 3.0625,
      "step": 18384
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3270479440689087,
      "learning_rate": 0.0005906229444309786,
      "loss": 3.1854,
      "step": 18385
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3234479427337646,
      "learning_rate": 0.0005906219296747923,
      "loss": 3.2494,
      "step": 18386
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4634287357330322,
      "learning_rate": 0.0005906209148645739,
      "loss": 3.2007,
      "step": 18387
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.442661166191101,
      "learning_rate": 0.0005906199000003235,
      "loss": 3.021,
      "step": 18388
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.636123776435852,
      "learning_rate": 0.0005906188850820414,
      "loss": 3.1187,
      "step": 18389
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.559048056602478,
      "learning_rate": 0.0005906178701097279,
      "loss": 3.0675,
      "step": 18390
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4904457330703735,
      "learning_rate": 0.0005906168550833829,
      "loss": 2.9262,
      "step": 18391
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.894089937210083,
      "learning_rate": 0.0005906158400030069,
      "loss": 3.0341,
      "step": 18392
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4832005500793457,
      "learning_rate": 0.0005906148248685998,
      "loss": 2.9317,
      "step": 18393
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8759390115737915,
      "learning_rate": 0.000590613809680162,
      "loss": 3.1492,
      "step": 18394
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3421117067337036,
      "learning_rate": 0.0005906127944376935,
      "loss": 2.887,
      "step": 18395
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8903030157089233,
      "learning_rate": 0.0005906117791411946,
      "loss": 3.4659,
      "step": 18396
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6165968179702759,
      "learning_rate": 0.0005906107637906657,
      "loss": 3.2102,
      "step": 18397
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.1321794986724854,
      "learning_rate": 0.0005906097483861065,
      "loss": 3.1746,
      "step": 18398
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3344827890396118,
      "learning_rate": 0.0005906087329275176,
      "loss": 3.0882,
      "step": 18399
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3954949378967285,
      "learning_rate": 0.000590607717414899,
      "loss": 2.9454,
      "step": 18400
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8385621309280396,
      "learning_rate": 0.000590606701848251,
      "loss": 2.996,
      "step": 18401
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.942728042602539,
      "learning_rate": 0.0005906056862275737,
      "loss": 2.985,
      "step": 18402
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5739665031433105,
      "learning_rate": 0.0005906046705528674,
      "loss": 3.1349,
      "step": 18403
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0124270915985107,
      "learning_rate": 0.0005906036548241321,
      "loss": 3.1716,
      "step": 18404
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3201167583465576,
      "learning_rate": 0.000590602639041368,
      "loss": 3.1628,
      "step": 18405
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.569212794303894,
      "learning_rate": 0.0005906016232045756,
      "loss": 3.0571,
      "step": 18406
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7508745193481445,
      "learning_rate": 0.0005906006073137547,
      "loss": 3.0243,
      "step": 18407
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5730775594711304,
      "learning_rate": 0.0005905995913689058,
      "loss": 3.2325,
      "step": 18408
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.1803739070892334,
      "learning_rate": 0.0005905985753700288,
      "loss": 3.1294,
      "step": 18409
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4036846160888672,
      "learning_rate": 0.0005905975593171241,
      "loss": 3.051,
      "step": 18410
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3421263694763184,
      "learning_rate": 0.0005905965432101919,
      "loss": 3.1227,
      "step": 18411
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4728953838348389,
      "learning_rate": 0.0005905955270492322,
      "loss": 3.1287,
      "step": 18412
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4615410566329956,
      "learning_rate": 0.0005905945108342454,
      "loss": 3.0221,
      "step": 18413
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.409530520439148,
      "learning_rate": 0.0005905934945652316,
      "loss": 3.3282,
      "step": 18414
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4284231662750244,
      "learning_rate": 0.0005905924782421909,
      "loss": 3.1792,
      "step": 18415
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2258960008621216,
      "learning_rate": 0.0005905914618651236,
      "loss": 3.0559,
      "step": 18416
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5362604856491089,
      "learning_rate": 0.0005905904454340299,
      "loss": 3.1135,
      "step": 18417
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.309003233909607,
      "learning_rate": 0.00059058942894891,
      "loss": 3.1243,
      "step": 18418
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6625885963439941,
      "learning_rate": 0.000590588412409764,
      "loss": 2.8318,
      "step": 18419
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9578949213027954,
      "learning_rate": 0.000590587395816592,
      "loss": 3.0579,
      "step": 18420
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4119911193847656,
      "learning_rate": 0.0005905863791693945,
      "loss": 3.0015,
      "step": 18421
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.55105459690094,
      "learning_rate": 0.0005905853624681714,
      "loss": 3.0062,
      "step": 18422
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.704167366027832,
      "learning_rate": 0.000590584345712923,
      "loss": 3.0345,
      "step": 18423
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5841814279556274,
      "learning_rate": 0.0005905833289036495,
      "loss": 2.9198,
      "step": 18424
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.5438485145568848,
      "learning_rate": 0.0005905823120403511,
      "loss": 3.2728,
      "step": 18425
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7551546096801758,
      "learning_rate": 0.000590581295123028,
      "loss": 3.2218,
      "step": 18426
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4107438325881958,
      "learning_rate": 0.0005905802781516802,
      "loss": 3.2396,
      "step": 18427
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6524224281311035,
      "learning_rate": 0.0005905792611263082,
      "loss": 3.0527,
      "step": 18428
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5579650402069092,
      "learning_rate": 0.000590578244046912,
      "loss": 3.2905,
      "step": 18429
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8090784549713135,
      "learning_rate": 0.0005905772269134918,
      "loss": 3.2769,
      "step": 18430
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4487892389297485,
      "learning_rate": 0.0005905762097260477,
      "loss": 3.2136,
      "step": 18431
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.581400752067566,
      "learning_rate": 0.0005905751924845801,
      "loss": 3.0984,
      "step": 18432
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8048120737075806,
      "learning_rate": 0.0005905741751890892,
      "loss": 3.246,
      "step": 18433
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3809326887130737,
      "learning_rate": 0.0005905731578395749,
      "loss": 3.1557,
      "step": 18434
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6078310012817383,
      "learning_rate": 0.0005905721404360376,
      "loss": 3.1581,
      "step": 18435
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4736924171447754,
      "learning_rate": 0.0005905711229784776,
      "loss": 2.9771,
      "step": 18436
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.983246326446533,
      "learning_rate": 0.0005905701054668948,
      "loss": 3.1665,
      "step": 18437
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8745979070663452,
      "learning_rate": 0.0005905690879012895,
      "loss": 3.2337,
      "step": 18438
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.541292428970337,
      "learning_rate": 0.000590568070281662,
      "loss": 3.19,
      "step": 18439
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.2202584743499756,
      "learning_rate": 0.0005905670526080124,
      "loss": 3.2505,
      "step": 18440
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.254434823989868,
      "learning_rate": 0.0005905660348803409,
      "loss": 3.06,
      "step": 18441
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.835589051246643,
      "learning_rate": 0.0005905650170986476,
      "loss": 3.031,
      "step": 18442
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6547489166259766,
      "learning_rate": 0.0005905639992629329,
      "loss": 3.1221,
      "step": 18443
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.848524570465088,
      "learning_rate": 0.0005905629813731969,
      "loss": 3.1104,
      "step": 18444
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.44718337059021,
      "learning_rate": 0.0005905619634294396,
      "loss": 3.0971,
      "step": 18445
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.8872387409210205,
      "learning_rate": 0.0005905609454316614,
      "loss": 2.9692,
      "step": 18446
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.562848448753357,
      "learning_rate": 0.0005905599273798625,
      "loss": 3.0108,
      "step": 18447
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3625372648239136,
      "learning_rate": 0.000590558909274043,
      "loss": 2.68,
      "step": 18448
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5656355619430542,
      "learning_rate": 0.0005905578911142032,
      "loss": 3.249,
      "step": 18449
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4674018621444702,
      "learning_rate": 0.0005905568729003429,
      "loss": 3.3795,
      "step": 18450
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5794156789779663,
      "learning_rate": 0.0005905558546324629,
      "loss": 3.2955,
      "step": 18451
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4981417655944824,
      "learning_rate": 0.000590554836310563,
      "loss": 3.3626,
      "step": 18452
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.4702188968658447,
      "learning_rate": 0.0005905538179346435,
      "loss": 2.8698,
      "step": 18453
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5043293237686157,
      "learning_rate": 0.0005905527995047045,
      "loss": 3.2803,
      "step": 18454
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4949709177017212,
      "learning_rate": 0.0005905517810207462,
      "loss": 3.0232,
      "step": 18455
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6000372171401978,
      "learning_rate": 0.0005905507624827689,
      "loss": 3.1501,
      "step": 18456
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.2744650840759277,
      "learning_rate": 0.0005905497438907727,
      "loss": 2.9114,
      "step": 18457
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5205228328704834,
      "learning_rate": 0.0005905487252447578,
      "loss": 2.8934,
      "step": 18458
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9497038125991821,
      "learning_rate": 0.0005905477065447246,
      "loss": 3.3581,
      "step": 18459
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.9966673851013184,
      "learning_rate": 0.0005905466877906729,
      "loss": 3.1208,
      "step": 18460
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.122497320175171,
      "learning_rate": 0.0005905456689826031,
      "loss": 3.3372,
      "step": 18461
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4362913370132446,
      "learning_rate": 0.0005905446501205154,
      "loss": 3.4008,
      "step": 18462
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0077881813049316,
      "learning_rate": 0.00059054363120441,
      "loss": 3.1273,
      "step": 18463
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4227547645568848,
      "learning_rate": 0.000590542612234287,
      "loss": 3.0954,
      "step": 18464
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5180094242095947,
      "learning_rate": 0.0005905415932101467,
      "loss": 3.3237,
      "step": 18465
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7494697570800781,
      "learning_rate": 0.0005905405741319892,
      "loss": 3.0015,
      "step": 18466
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.1757848262786865,
      "learning_rate": 0.0005905395549998147,
      "loss": 3.0732,
      "step": 18467
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3827441930770874,
      "learning_rate": 0.0005905385358136235,
      "loss": 3.1862,
      "step": 18468
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5909775495529175,
      "learning_rate": 0.0005905375165734157,
      "loss": 3.2496,
      "step": 18469
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.2748093605041504,
      "learning_rate": 0.0005905364972791914,
      "loss": 3.2665,
      "step": 18470
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.800442934036255,
      "learning_rate": 0.0005905354779309509,
      "loss": 3.2458,
      "step": 18471
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.1177239418029785,
      "learning_rate": 0.0005905344585286944,
      "loss": 3.192,
      "step": 18472
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7595667839050293,
      "learning_rate": 0.000590533439072422,
      "loss": 3.2511,
      "step": 18473
    },
    {
      "epoch": 0.24,
      "grad_norm": 4.083392143249512,
      "learning_rate": 0.000590532419562134,
      "loss": 3.1294,
      "step": 18474
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.090350866317749,
      "learning_rate": 0.0005905313999978306,
      "loss": 3.1305,
      "step": 18475
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2310819625854492,
      "learning_rate": 0.0005905303803795119,
      "loss": 3.284,
      "step": 18476
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9695143699645996,
      "learning_rate": 0.000590529360707178,
      "loss": 2.932,
      "step": 18477
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5822360515594482,
      "learning_rate": 0.0005905283409808293,
      "loss": 2.9963,
      "step": 18478
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6678756475448608,
      "learning_rate": 0.0005905273212004659,
      "loss": 3.3863,
      "step": 18479
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4986647367477417,
      "learning_rate": 0.000590526301366088,
      "loss": 3.093,
      "step": 18480
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7260757684707642,
      "learning_rate": 0.0005905252814776958,
      "loss": 3.2434,
      "step": 18481
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.2654032707214355,
      "learning_rate": 0.0005905242615352893,
      "loss": 2.9826,
      "step": 18482
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.1405105590820312,
      "learning_rate": 0.000590523241538869,
      "loss": 3.0835,
      "step": 18483
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9785948991775513,
      "learning_rate": 0.0005905222214884349,
      "loss": 3.0439,
      "step": 18484
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.568995714187622,
      "learning_rate": 0.0005905212013839873,
      "loss": 2.9731,
      "step": 18485
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4706356525421143,
      "learning_rate": 0.0005905201812255262,
      "loss": 3.2433,
      "step": 18486
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.289008378982544,
      "learning_rate": 0.000590519161013052,
      "loss": 3.0997,
      "step": 18487
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3873612880706787,
      "learning_rate": 0.0005905181407465648,
      "loss": 2.8662,
      "step": 18488
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.984179973602295,
      "learning_rate": 0.0005905171204260649,
      "loss": 2.9468,
      "step": 18489
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4731253385543823,
      "learning_rate": 0.0005905161000515522,
      "loss": 2.9181,
      "step": 18490
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7656155824661255,
      "learning_rate": 0.0005905150796230271,
      "loss": 3.1767,
      "step": 18491
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4972667694091797,
      "learning_rate": 0.0005905140591404898,
      "loss": 3.02,
      "step": 18492
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7013261318206787,
      "learning_rate": 0.0005905130386039405,
      "loss": 3.0177,
      "step": 18493
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7628332376480103,
      "learning_rate": 0.0005905120180133794,
      "loss": 3.1282,
      "step": 18494
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6306345462799072,
      "learning_rate": 0.0005905109973688065,
      "loss": 3.0545,
      "step": 18495
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.5862841606140137,
      "learning_rate": 0.0005905099766702221,
      "loss": 3.0901,
      "step": 18496
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9577642679214478,
      "learning_rate": 0.0005905089559176265,
      "loss": 3.1236,
      "step": 18497
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4521607160568237,
      "learning_rate": 0.0005905079351110197,
      "loss": 3.559,
      "step": 18498
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3540358543395996,
      "learning_rate": 0.0005905069142504021,
      "loss": 3.1485,
      "step": 18499
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.753273606300354,
      "learning_rate": 0.0005905058933357738,
      "loss": 3.0265,
      "step": 18500
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6058228015899658,
      "learning_rate": 0.0005905048723671349,
      "loss": 3.1036,
      "step": 18501
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.797654151916504,
      "learning_rate": 0.0005905038513444856,
      "loss": 2.9984,
      "step": 18502
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.8446204662323,
      "learning_rate": 0.0005905028302678263,
      "loss": 3.0598,
      "step": 18503
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8476670980453491,
      "learning_rate": 0.0005905018091371569,
      "loss": 3.1639,
      "step": 18504
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.257570266723633,
      "learning_rate": 0.0005905007879524779,
      "loss": 2.9492,
      "step": 18505
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.4415721893310547,
      "learning_rate": 0.0005904997667137892,
      "loss": 3.3785,
      "step": 18506
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.361701488494873,
      "learning_rate": 0.0005904987454210911,
      "loss": 3.0744,
      "step": 18507
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4850496053695679,
      "learning_rate": 0.0005904977240743838,
      "loss": 2.7876,
      "step": 18508
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.393864154815674,
      "learning_rate": 0.0005904967026736675,
      "loss": 3.2702,
      "step": 18509
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.7727980613708496,
      "learning_rate": 0.0005904956812189425,
      "loss": 3.0267,
      "step": 18510
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6421273946762085,
      "learning_rate": 0.0005904946597102088,
      "loss": 3.1691,
      "step": 18511
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2315707206726074,
      "learning_rate": 0.0005904936381474666,
      "loss": 3.1765,
      "step": 18512
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.1011335849761963,
      "learning_rate": 0.0005904926165307161,
      "loss": 3.1006,
      "step": 18513
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5544310808181763,
      "learning_rate": 0.0005904915948599576,
      "loss": 2.6213,
      "step": 18514
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.772621750831604,
      "learning_rate": 0.0005904905731351913,
      "loss": 2.9411,
      "step": 18515
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7239432334899902,
      "learning_rate": 0.0005904895513564173,
      "loss": 3.1291,
      "step": 18516
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8579767942428589,
      "learning_rate": 0.0005904885295236357,
      "loss": 3.1759,
      "step": 18517
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.760038137435913,
      "learning_rate": 0.0005904875076368468,
      "loss": 2.9815,
      "step": 18518
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.361846685409546,
      "learning_rate": 0.000590486485696051,
      "loss": 2.9321,
      "step": 18519
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4395006895065308,
      "learning_rate": 0.000590485463701248,
      "loss": 3.2379,
      "step": 18520
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4221340417861938,
      "learning_rate": 0.0005904844416524384,
      "loss": 3.3295,
      "step": 18521
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6546313762664795,
      "learning_rate": 0.0005904834195496223,
      "loss": 3.0415,
      "step": 18522
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5475003719329834,
      "learning_rate": 0.0005904823973927998,
      "loss": 2.9873,
      "step": 18523
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4170103073120117,
      "learning_rate": 0.0005904813751819711,
      "loss": 2.8905,
      "step": 18524
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.562576174736023,
      "learning_rate": 0.0005904803529171366,
      "loss": 3.141,
      "step": 18525
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5931313037872314,
      "learning_rate": 0.0005904793305982961,
      "loss": 2.8827,
      "step": 18526
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6000330448150635,
      "learning_rate": 0.0005904783082254501,
      "loss": 3.0228,
      "step": 18527
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3942383527755737,
      "learning_rate": 0.0005904772857985988,
      "loss": 3.4158,
      "step": 18528
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9375200271606445,
      "learning_rate": 0.0005904762633177422,
      "loss": 3.1519,
      "step": 18529
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.622148036956787,
      "learning_rate": 0.0005904752407828806,
      "loss": 3.1143,
      "step": 18530
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5314972400665283,
      "learning_rate": 0.0005904742181940141,
      "loss": 3.1854,
      "step": 18531
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7804160118103027,
      "learning_rate": 0.000590473195551143,
      "loss": 3.1901,
      "step": 18532
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6075315475463867,
      "learning_rate": 0.0005904721728542674,
      "loss": 3.042,
      "step": 18533
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4393314123153687,
      "learning_rate": 0.0005904711501033877,
      "loss": 2.9026,
      "step": 18534
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.6616978645324707,
      "learning_rate": 0.0005904701272985038,
      "loss": 2.9183,
      "step": 18535
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4273386001586914,
      "learning_rate": 0.0005904691044396161,
      "loss": 3.2181,
      "step": 18536
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6573529243469238,
      "learning_rate": 0.0005904680815267245,
      "loss": 2.9603,
      "step": 18537
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5249102115631104,
      "learning_rate": 0.0005904670585598296,
      "loss": 3.3214,
      "step": 18538
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.319387435913086,
      "learning_rate": 0.0005904660355389314,
      "loss": 2.9963,
      "step": 18539
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9144883155822754,
      "learning_rate": 0.00059046501246403,
      "loss": 3.2449,
      "step": 18540
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6984920501708984,
      "learning_rate": 0.0005904639893351258,
      "loss": 3.1206,
      "step": 18541
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.66423761844635,
      "learning_rate": 0.0005904629661522187,
      "loss": 3.4934,
      "step": 18542
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.048491954803467,
      "learning_rate": 0.0005904619429153091,
      "loss": 3.0839,
      "step": 18543
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6331862211227417,
      "learning_rate": 0.0005904609196243971,
      "loss": 3.1692,
      "step": 18544
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.473441243171692,
      "learning_rate": 0.0005904598962794831,
      "loss": 3.158,
      "step": 18545
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9041404724121094,
      "learning_rate": 0.000590458872880567,
      "loss": 2.8567,
      "step": 18546
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7658957242965698,
      "learning_rate": 0.0005904578494276491,
      "loss": 3.0547,
      "step": 18547
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4521231651306152,
      "learning_rate": 0.0005904568259207295,
      "loss": 2.9072,
      "step": 18548
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9216187000274658,
      "learning_rate": 0.0005904558023598087,
      "loss": 3.1157,
      "step": 18549
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6778539419174194,
      "learning_rate": 0.0005904547787448866,
      "loss": 3.1706,
      "step": 18550
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4028831720352173,
      "learning_rate": 0.0005904537550759635,
      "loss": 3.0516,
      "step": 18551
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4659655094146729,
      "learning_rate": 0.0005904527313530395,
      "loss": 2.7412,
      "step": 18552
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.973183512687683,
      "learning_rate": 0.0005904517075761149,
      "loss": 3.1554,
      "step": 18553
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5359623432159424,
      "learning_rate": 0.0005904506837451899,
      "loss": 3.0865,
      "step": 18554
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3801090717315674,
      "learning_rate": 0.0005904496598602646,
      "loss": 3.1528,
      "step": 18555
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7782784700393677,
      "learning_rate": 0.0005904486359213392,
      "loss": 3.1821,
      "step": 18556
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4525383710861206,
      "learning_rate": 0.0005904476119284139,
      "loss": 3.0515,
      "step": 18557
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8408722877502441,
      "learning_rate": 0.0005904465878814889,
      "loss": 3.1242,
      "step": 18558
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.584580421447754,
      "learning_rate": 0.0005904455637805644,
      "loss": 3.1088,
      "step": 18559
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4282381534576416,
      "learning_rate": 0.0005904445396256406,
      "loss": 3.2268,
      "step": 18560
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5670620203018188,
      "learning_rate": 0.0005904435154167176,
      "loss": 3.2039,
      "step": 18561
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8298412561416626,
      "learning_rate": 0.0005904424911537959,
      "loss": 2.9532,
      "step": 18562
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5326213836669922,
      "learning_rate": 0.0005904414668368752,
      "loss": 2.8063,
      "step": 18563
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.1662871837615967,
      "learning_rate": 0.000590440442465956,
      "loss": 3.1668,
      "step": 18564
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3359289169311523,
      "learning_rate": 0.0005904394180410386,
      "loss": 3.0443,
      "step": 18565
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3280017375946045,
      "learning_rate": 0.000590438393562123,
      "loss": 2.9349,
      "step": 18566
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6880189180374146,
      "learning_rate": 0.0005904373690292093,
      "loss": 2.8194,
      "step": 18567
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4309744834899902,
      "learning_rate": 0.0005904363444422979,
      "loss": 3.1071,
      "step": 18568
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3600678443908691,
      "learning_rate": 0.0005904353198013888,
      "loss": 3.1323,
      "step": 18569
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6314986944198608,
      "learning_rate": 0.0005904342951064824,
      "loss": 3.14,
      "step": 18570
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5000218152999878,
      "learning_rate": 0.0005904332703575788,
      "loss": 3.1796,
      "step": 18571
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.1178252696990967,
      "learning_rate": 0.000590432245554678,
      "loss": 3.2054,
      "step": 18572
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.476263165473938,
      "learning_rate": 0.0005904312206977805,
      "loss": 3.0779,
      "step": 18573
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.2489914894104004,
      "learning_rate": 0.0005904301957868863,
      "loss": 2.886,
      "step": 18574
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7656471729278564,
      "learning_rate": 0.0005904291708219957,
      "loss": 3.1083,
      "step": 18575
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.611222743988037,
      "learning_rate": 0.0005904281458031088,
      "loss": 2.8462,
      "step": 18576
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.673746109008789,
      "learning_rate": 0.0005904271207302258,
      "loss": 3.3403,
      "step": 18577
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5505648851394653,
      "learning_rate": 0.000590426095603347,
      "loss": 3.0768,
      "step": 18578
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.08438777923584,
      "learning_rate": 0.0005904250704224723,
      "loss": 3.0734,
      "step": 18579
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5196012258529663,
      "learning_rate": 0.0005904240451876023,
      "loss": 3.1746,
      "step": 18580
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9471468925476074,
      "learning_rate": 0.000590423019898737,
      "loss": 3.1462,
      "step": 18581
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.766247034072876,
      "learning_rate": 0.0005904219945558765,
      "loss": 3.1395,
      "step": 18582
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5636723041534424,
      "learning_rate": 0.0005904209691590211,
      "loss": 3.2057,
      "step": 18583
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.145263433456421,
      "learning_rate": 0.000590419943708171,
      "loss": 2.9413,
      "step": 18584
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.334557056427002,
      "learning_rate": 0.0005904189182033262,
      "loss": 2.9215,
      "step": 18585
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4382117986679077,
      "learning_rate": 0.0005904178926444872,
      "loss": 3.2249,
      "step": 18586
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.606536626815796,
      "learning_rate": 0.0005904168670316539,
      "loss": 2.9689,
      "step": 18587
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.528076171875,
      "learning_rate": 0.0005904158413648267,
      "loss": 3.002,
      "step": 18588
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6275569200515747,
      "learning_rate": 0.0005904148156440057,
      "loss": 3.1018,
      "step": 18589
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.391615629196167,
      "learning_rate": 0.0005904137898691911,
      "loss": 3.192,
      "step": 18590
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0697920322418213,
      "learning_rate": 0.0005904127640403832,
      "loss": 3.2237,
      "step": 18591
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8559380769729614,
      "learning_rate": 0.0005904117381575818,
      "loss": 3.1805,
      "step": 18592
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.517813801765442,
      "learning_rate": 0.0005904107122207876,
      "loss": 3.1432,
      "step": 18593
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.633488416671753,
      "learning_rate": 0.0005904096862300005,
      "loss": 3.1696,
      "step": 18594
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8881428241729736,
      "learning_rate": 0.0005904086601852209,
      "loss": 3.2162,
      "step": 18595
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.039271116256714,
      "learning_rate": 0.0005904076340864486,
      "loss": 3.2698,
      "step": 18596
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.140120506286621,
      "learning_rate": 0.0005904066079336843,
      "loss": 3.0245,
      "step": 18597
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.208547353744507,
      "learning_rate": 0.0005904055817269278,
      "loss": 2.9328,
      "step": 18598
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.153329610824585,
      "learning_rate": 0.0005904045554661794,
      "loss": 3.2497,
      "step": 18599
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4186885356903076,
      "learning_rate": 0.0005904035291514393,
      "loss": 3.177,
      "step": 18600
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.343295097351074,
      "learning_rate": 0.0005904025027827078,
      "loss": 2.9698,
      "step": 18601
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.733168601989746,
      "learning_rate": 0.0005904014763599848,
      "loss": 3.3226,
      "step": 18602
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8147412538528442,
      "learning_rate": 0.0005904004498832709,
      "loss": 3.2148,
      "step": 18603
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2830456495285034,
      "learning_rate": 0.000590399423352566,
      "loss": 3.3695,
      "step": 18604
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.5915396213531494,
      "learning_rate": 0.0005903983967678703,
      "loss": 3.1382,
      "step": 18605
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3245041370391846,
      "learning_rate": 0.0005903973701291841,
      "loss": 3.3075,
      "step": 18606
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.6001434326171875,
      "learning_rate": 0.0005903963434365077,
      "loss": 3.2011,
      "step": 18607
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8590779304504395,
      "learning_rate": 0.000590395316689841,
      "loss": 2.9306,
      "step": 18608
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9959396123886108,
      "learning_rate": 0.0005903942898891843,
      "loss": 3.1797,
      "step": 18609
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5417368412017822,
      "learning_rate": 0.0005903932630345378,
      "loss": 2.9826,
      "step": 18610
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.35159170627594,
      "learning_rate": 0.0005903922361259018,
      "loss": 3.2125,
      "step": 18611
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4489713907241821,
      "learning_rate": 0.0005903912091632763,
      "loss": 3.2497,
      "step": 18612
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.214373230934143,
      "learning_rate": 0.0005903901821466617,
      "loss": 3.064,
      "step": 18613
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4937773942947388,
      "learning_rate": 0.000590389155076058,
      "loss": 3.3174,
      "step": 18614
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3748657703399658,
      "learning_rate": 0.0005903881279514655,
      "loss": 3.1715,
      "step": 18615
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6267297267913818,
      "learning_rate": 0.0005903871007728843,
      "loss": 3.1687,
      "step": 18616
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8304128646850586,
      "learning_rate": 0.0005903860735403147,
      "loss": 3.1602,
      "step": 18617
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8105148077011108,
      "learning_rate": 0.0005903850462537569,
      "loss": 3.2739,
      "step": 18618
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4906513690948486,
      "learning_rate": 0.0005903840189132111,
      "loss": 3.3636,
      "step": 18619
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.698495626449585,
      "learning_rate": 0.0005903829915186774,
      "loss": 3.2955,
      "step": 18620
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7612498998641968,
      "learning_rate": 0.0005903819640701559,
      "loss": 3.1591,
      "step": 18621
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7040843963623047,
      "learning_rate": 0.0005903809365676469,
      "loss": 2.8606,
      "step": 18622
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.835578441619873,
      "learning_rate": 0.0005903799090111508,
      "loss": 3.0301,
      "step": 18623
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0019798278808594,
      "learning_rate": 0.0005903788814006674,
      "loss": 3.3035,
      "step": 18624
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4205458164215088,
      "learning_rate": 0.0005903778537361971,
      "loss": 3.31,
      "step": 18625
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.171633720397949,
      "learning_rate": 0.0005903768260177403,
      "loss": 3.2094,
      "step": 18626
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3111796379089355,
      "learning_rate": 0.0005903757982452967,
      "loss": 3.1809,
      "step": 18627
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.1474432945251465,
      "learning_rate": 0.0005903747704188669,
      "loss": 2.992,
      "step": 18628
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.069523334503174,
      "learning_rate": 0.0005903737425384509,
      "loss": 3.0018,
      "step": 18629
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.615394115447998,
      "learning_rate": 0.0005903727146040489,
      "loss": 3.1978,
      "step": 18630
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6558915376663208,
      "learning_rate": 0.0005903716866156613,
      "loss": 3.0605,
      "step": 18631
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.662969708442688,
      "learning_rate": 0.0005903706585732879,
      "loss": 3.2164,
      "step": 18632
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4008538722991943,
      "learning_rate": 0.0005903696304769292,
      "loss": 3.2864,
      "step": 18633
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.475474238395691,
      "learning_rate": 0.0005903686023265854,
      "loss": 3.0938,
      "step": 18634
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.96843421459198,
      "learning_rate": 0.0005903675741222564,
      "loss": 2.9924,
      "step": 18635
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8814735412597656,
      "learning_rate": 0.0005903665458639427,
      "loss": 3.2282,
      "step": 18636
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5205365419387817,
      "learning_rate": 0.0005903655175516445,
      "loss": 2.9663,
      "step": 18637
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5194859504699707,
      "learning_rate": 0.0005903644891853616,
      "loss": 3.3023,
      "step": 18638
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3890624046325684,
      "learning_rate": 0.0005903634607650947,
      "loss": 3.0411,
      "step": 18639
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.705159306526184,
      "learning_rate": 0.0005903624322908436,
      "loss": 3.1106,
      "step": 18640
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3371154069900513,
      "learning_rate": 0.0005903614037626088,
      "loss": 3.1227,
      "step": 18641
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4145402908325195,
      "learning_rate": 0.0005903603751803902,
      "loss": 3.1444,
      "step": 18642
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8073441982269287,
      "learning_rate": 0.0005903593465441881,
      "loss": 3.1922,
      "step": 18643
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.041217088699341,
      "learning_rate": 0.0005903583178540028,
      "loss": 3.0591,
      "step": 18644
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.5935230255126953,
      "learning_rate": 0.0005903572891098344,
      "loss": 3.0224,
      "step": 18645
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3306782245635986,
      "learning_rate": 0.000590356260311683,
      "loss": 3.2003,
      "step": 18646
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8453307151794434,
      "learning_rate": 0.000590355231459549,
      "loss": 2.9654,
      "step": 18647
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5808172225952148,
      "learning_rate": 0.0005903542025534324,
      "loss": 3.2218,
      "step": 18648
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.362748384475708,
      "learning_rate": 0.0005903531735933336,
      "loss": 3.1677,
      "step": 18649
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4200292825698853,
      "learning_rate": 0.0005903521445792526,
      "loss": 3.141,
      "step": 18650
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4588779211044312,
      "learning_rate": 0.0005903511155111897,
      "loss": 2.9561,
      "step": 18651
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3252582550048828,
      "learning_rate": 0.000590350086389145,
      "loss": 3.0285,
      "step": 18652
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.2550971508026123,
      "learning_rate": 0.0005903490572131187,
      "loss": 3.0416,
      "step": 18653
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6293344497680664,
      "learning_rate": 0.0005903480279831111,
      "loss": 3.2887,
      "step": 18654
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.4495816230773926,
      "learning_rate": 0.0005903469986991224,
      "loss": 3.1001,
      "step": 18655
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7340757846832275,
      "learning_rate": 0.0005903459693611525,
      "loss": 3.1941,
      "step": 18656
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7238929271697998,
      "learning_rate": 0.0005903449399692019,
      "loss": 2.9555,
      "step": 18657
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.8031532764434814,
      "learning_rate": 0.0005903439105232706,
      "loss": 2.8192,
      "step": 18658
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4723336696624756,
      "learning_rate": 0.0005903428810233591,
      "loss": 3.1432,
      "step": 18659
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.069246768951416,
      "learning_rate": 0.0005903418514694672,
      "loss": 3.1145,
      "step": 18660
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.7624053955078125,
      "learning_rate": 0.0005903408218615953,
      "loss": 2.9493,
      "step": 18661
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6659491062164307,
      "learning_rate": 0.0005903397921997436,
      "loss": 3.117,
      "step": 18662
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.500387191772461,
      "learning_rate": 0.0005903387624839123,
      "loss": 3.0167,
      "step": 18663
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6625343561172485,
      "learning_rate": 0.0005903377327141014,
      "loss": 3.0952,
      "step": 18664
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5010292530059814,
      "learning_rate": 0.0005903367028903114,
      "loss": 2.9994,
      "step": 18665
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5659486055374146,
      "learning_rate": 0.0005903356730125422,
      "loss": 3.0703,
      "step": 18666
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3816769123077393,
      "learning_rate": 0.0005903346430807942,
      "loss": 3.1002,
      "step": 18667
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7108348608016968,
      "learning_rate": 0.0005903336130950674,
      "loss": 2.9175,
      "step": 18668
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6046825647354126,
      "learning_rate": 0.0005903325830553622,
      "loss": 3.0832,
      "step": 18669
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.344044804573059,
      "learning_rate": 0.0005903315529616788,
      "loss": 3.4214,
      "step": 18670
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7833184003829956,
      "learning_rate": 0.000590330522814017,
      "loss": 3.0624,
      "step": 18671
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4365862607955933,
      "learning_rate": 0.0005903294926123774,
      "loss": 3.1983,
      "step": 18672
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.2483768463134766,
      "learning_rate": 0.0005903284623567602,
      "loss": 3.2341,
      "step": 18673
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.1401302814483643,
      "learning_rate": 0.0005903274320471653,
      "loss": 2.9606,
      "step": 18674
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0938076972961426,
      "learning_rate": 0.0005903264016835931,
      "loss": 3.0498,
      "step": 18675
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.627701759338379,
      "learning_rate": 0.0005903253712660438,
      "loss": 3.1092,
      "step": 18676
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7018569707870483,
      "learning_rate": 0.0005903243407945174,
      "loss": 3.339,
      "step": 18677
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.400612711906433,
      "learning_rate": 0.0005903233102690143,
      "loss": 3.3142,
      "step": 18678
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4321763515472412,
      "learning_rate": 0.0005903222796895347,
      "loss": 2.9913,
      "step": 18679
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6513508558273315,
      "learning_rate": 0.0005903212490560786,
      "loss": 3.3367,
      "step": 18680
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8759167194366455,
      "learning_rate": 0.0005903202183686463,
      "loss": 3.2268,
      "step": 18681
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.645545482635498,
      "learning_rate": 0.000590319187627238,
      "loss": 3.0437,
      "step": 18682
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6703413724899292,
      "learning_rate": 0.0005903181568318539,
      "loss": 2.9369,
      "step": 18683
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0128471851348877,
      "learning_rate": 0.0005903171259824942,
      "loss": 3.1095,
      "step": 18684
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.262136220932007,
      "learning_rate": 0.000590316095079159,
      "loss": 3.3738,
      "step": 18685
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8419617414474487,
      "learning_rate": 0.0005903150641218486,
      "loss": 2.988,
      "step": 18686
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.5454375743865967,
      "learning_rate": 0.0005903140331105631,
      "loss": 3.1509,
      "step": 18687
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.5568723678588867,
      "learning_rate": 0.0005903130020453029,
      "loss": 2.9607,
      "step": 18688
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4832416772842407,
      "learning_rate": 0.0005903119709260679,
      "loss": 3.0955,
      "step": 18689
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5703498125076294,
      "learning_rate": 0.0005903109397528585,
      "loss": 3.2557,
      "step": 18690
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.676855444908142,
      "learning_rate": 0.0005903099085256747,
      "loss": 3.0903,
      "step": 18691
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7683932781219482,
      "learning_rate": 0.0005903088772445169,
      "loss": 3.1181,
      "step": 18692
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.393441915512085,
      "learning_rate": 0.0005903078459093852,
      "loss": 3.1763,
      "step": 18693
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6550400257110596,
      "learning_rate": 0.0005903068145202796,
      "loss": 2.9515,
      "step": 18694
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8428711891174316,
      "learning_rate": 0.0005903057830772007,
      "loss": 3.1913,
      "step": 18695
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6938366889953613,
      "learning_rate": 0.0005903047515801485,
      "loss": 3.181,
      "step": 18696
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3925285339355469,
      "learning_rate": 0.000590303720029123,
      "loss": 2.9944,
      "step": 18697
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3534202575683594,
      "learning_rate": 0.0005903026884241247,
      "loss": 3.0137,
      "step": 18698
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.295741319656372,
      "learning_rate": 0.0005903016567651535,
      "loss": 3.3074,
      "step": 18699
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.737428903579712,
      "learning_rate": 0.00059030062505221,
      "loss": 3.1156,
      "step": 18700
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5226041078567505,
      "learning_rate": 0.0005902995932852939,
      "loss": 2.9125,
      "step": 18701
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9926986694335938,
      "learning_rate": 0.0005902985614644057,
      "loss": 3.1743,
      "step": 18702
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.202387809753418,
      "learning_rate": 0.0005902975295895454,
      "loss": 3.0577,
      "step": 18703
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.404854655265808,
      "learning_rate": 0.0005902964976607134,
      "loss": 3.1491,
      "step": 18704
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.587270736694336,
      "learning_rate": 0.00059029546567791,
      "loss": 3.1081,
      "step": 18705
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4275312423706055,
      "learning_rate": 0.0005902944336411349,
      "loss": 3.1566,
      "step": 18706
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.316116213798523,
      "learning_rate": 0.0005902934015503887,
      "loss": 3.035,
      "step": 18707
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7243733406066895,
      "learning_rate": 0.0005902923694056716,
      "loss": 2.8987,
      "step": 18708
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4786629676818848,
      "learning_rate": 0.0005902913372069835,
      "loss": 3.2532,
      "step": 18709
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9837846755981445,
      "learning_rate": 0.0005902903049543249,
      "loss": 2.9645,
      "step": 18710
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6861398220062256,
      "learning_rate": 0.0005902892726476957,
      "loss": 2.9274,
      "step": 18711
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.392693281173706,
      "learning_rate": 0.0005902882402870962,
      "loss": 3.0005,
      "step": 18712
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.824069857597351,
      "learning_rate": 0.0005902872078725268,
      "loss": 2.9926,
      "step": 18713
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0736711025238037,
      "learning_rate": 0.0005902861754039874,
      "loss": 3.0548,
      "step": 18714
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9247448444366455,
      "learning_rate": 0.0005902851428814785,
      "loss": 3.0876,
      "step": 18715
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6207809448242188,
      "learning_rate": 0.000590284110305,
      "loss": 2.9364,
      "step": 18716
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.742447853088379,
      "learning_rate": 0.0005902830776745523,
      "loss": 3.1118,
      "step": 18717
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.806107521057129,
      "learning_rate": 0.0005902820449901353,
      "loss": 3.4354,
      "step": 18718
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.435776710510254,
      "learning_rate": 0.0005902810122517495,
      "loss": 3.1118,
      "step": 18719
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3821818828582764,
      "learning_rate": 0.000590279979459395,
      "loss": 3.1605,
      "step": 18720
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.8686325550079346,
      "learning_rate": 0.0005902789466130719,
      "loss": 3.2005,
      "step": 18721
    },
    {
      "epoch": 0.24,
      "grad_norm": 4.472350120544434,
      "learning_rate": 0.0005902779137127806,
      "loss": 3.0486,
      "step": 18722
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6496996879577637,
      "learning_rate": 0.000590276880758521,
      "loss": 3.0019,
      "step": 18723
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5579252243041992,
      "learning_rate": 0.0005902758477502935,
      "loss": 2.9961,
      "step": 18724
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.614454746246338,
      "learning_rate": 0.0005902748146880982,
      "loss": 2.9812,
      "step": 18725
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5076051950454712,
      "learning_rate": 0.0005902737815719355,
      "loss": 3.2598,
      "step": 18726
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0362980365753174,
      "learning_rate": 0.0005902727484018052,
      "loss": 3.1389,
      "step": 18727
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.465226650238037,
      "learning_rate": 0.0005902717151777079,
      "loss": 3.093,
      "step": 18728
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.1755974292755127,
      "learning_rate": 0.0005902706818996436,
      "loss": 3.0331,
      "step": 18729
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.1803497076034546,
      "learning_rate": 0.0005902696485676124,
      "loss": 3.1162,
      "step": 18730
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5235306024551392,
      "learning_rate": 0.0005902686151816145,
      "loss": 3.1583,
      "step": 18731
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6137306690216064,
      "learning_rate": 0.0005902675817416504,
      "loss": 3.0896,
      "step": 18732
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9420533180236816,
      "learning_rate": 0.00059026654824772,
      "loss": 3.1062,
      "step": 18733
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4155833721160889,
      "learning_rate": 0.0005902655146998235,
      "loss": 3.2253,
      "step": 18734
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4506837129592896,
      "learning_rate": 0.0005902644810979613,
      "loss": 2.8238,
      "step": 18735
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5625832080841064,
      "learning_rate": 0.0005902634474421334,
      "loss": 2.9856,
      "step": 18736
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8387361764907837,
      "learning_rate": 0.00059026241373234,
      "loss": 2.8762,
      "step": 18737
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5092519521713257,
      "learning_rate": 0.0005902613799685814,
      "loss": 3.026,
      "step": 18738
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5379841327667236,
      "learning_rate": 0.0005902603461508577,
      "loss": 3.2249,
      "step": 18739
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6573522090911865,
      "learning_rate": 0.0005902593122791691,
      "loss": 2.9715,
      "step": 18740
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3523427248001099,
      "learning_rate": 0.0005902582783535159,
      "loss": 3.0782,
      "step": 18741
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4382472038269043,
      "learning_rate": 0.0005902572443738981,
      "loss": 3.0326,
      "step": 18742
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.2078865766525269,
      "learning_rate": 0.0005902562103403161,
      "loss": 3.2377,
      "step": 18743
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9172520637512207,
      "learning_rate": 0.0005902551762527698,
      "loss": 3.1023,
      "step": 18744
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4836206436157227,
      "learning_rate": 0.0005902541421112597,
      "loss": 3.0823,
      "step": 18745
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.5999984741210938,
      "learning_rate": 0.0005902531079157859,
      "loss": 3.2953,
      "step": 18746
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.171461582183838,
      "learning_rate": 0.0005902520736663486,
      "loss": 2.9509,
      "step": 18747
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.3139266967773438,
      "learning_rate": 0.000590251039362948,
      "loss": 3.2456,
      "step": 18748
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.2277259826660156,
      "learning_rate": 0.0005902500050055841,
      "loss": 3.1644,
      "step": 18749
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.838803768157959,
      "learning_rate": 0.0005902489705942573,
      "loss": 3.1243,
      "step": 18750
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5253894329071045,
      "learning_rate": 0.0005902479361289678,
      "loss": 3.4279,
      "step": 18751
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.331183910369873,
      "learning_rate": 0.0005902469016097157,
      "loss": 3.1426,
      "step": 18752
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7204937934875488,
      "learning_rate": 0.0005902458670365011,
      "loss": 3.0713,
      "step": 18753
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3884690999984741,
      "learning_rate": 0.0005902448324093245,
      "loss": 3.1782,
      "step": 18754
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7119078636169434,
      "learning_rate": 0.0005902437977281858,
      "loss": 3.1407,
      "step": 18755
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7005906105041504,
      "learning_rate": 0.0005902427629930853,
      "loss": 3.0629,
      "step": 18756
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9527997970581055,
      "learning_rate": 0.0005902417282040233,
      "loss": 3.2643,
      "step": 18757
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7297930717468262,
      "learning_rate": 0.0005902406933609998,
      "loss": 3.1287,
      "step": 18758
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6282455921173096,
      "learning_rate": 0.000590239658464015,
      "loss": 3.2071,
      "step": 18759
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.435490369796753,
      "learning_rate": 0.0005902386235130693,
      "loss": 2.7897,
      "step": 18760
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7134114503860474,
      "learning_rate": 0.0005902375885081626,
      "loss": 3.0163,
      "step": 18761
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3757328987121582,
      "learning_rate": 0.0005902365534492955,
      "loss": 3.0595,
      "step": 18762
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.52203369140625,
      "learning_rate": 0.0005902355183364678,
      "loss": 3.1883,
      "step": 18763
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7606619596481323,
      "learning_rate": 0.0005902344831696797,
      "loss": 3.2477,
      "step": 18764
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4811787605285645,
      "learning_rate": 0.0005902334479489318,
      "loss": 3.2036,
      "step": 18765
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3781670331954956,
      "learning_rate": 0.0005902324126742239,
      "loss": 2.8807,
      "step": 18766
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4455360174179077,
      "learning_rate": 0.0005902313773455563,
      "loss": 3.0524,
      "step": 18767
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5501562356948853,
      "learning_rate": 0.0005902303419629292,
      "loss": 3.1775,
      "step": 18768
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.843011498451233,
      "learning_rate": 0.0005902293065263429,
      "loss": 3.0263,
      "step": 18769
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3445281982421875,
      "learning_rate": 0.0005902282710357974,
      "loss": 2.9251,
      "step": 18770
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.982091188430786,
      "learning_rate": 0.000590227235491293,
      "loss": 3.2813,
      "step": 18771
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9148011207580566,
      "learning_rate": 0.0005902261998928298,
      "loss": 3.011,
      "step": 18772
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.438581943511963,
      "learning_rate": 0.0005902251642404082,
      "loss": 3.0681,
      "step": 18773
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.265867233276367,
      "learning_rate": 0.0005902241285340282,
      "loss": 3.3192,
      "step": 18774
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.773403525352478,
      "learning_rate": 0.0005902230927736901,
      "loss": 3.224,
      "step": 18775
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0327212810516357,
      "learning_rate": 0.0005902220569593941,
      "loss": 2.9599,
      "step": 18776
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.482995629310608,
      "learning_rate": 0.0005902210210911402,
      "loss": 3.0575,
      "step": 18777
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8443471193313599,
      "learning_rate": 0.0005902199851689288,
      "loss": 3.0785,
      "step": 18778
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7506765127182007,
      "learning_rate": 0.00059021894919276,
      "loss": 2.9802,
      "step": 18779
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.592008352279663,
      "learning_rate": 0.000590217913162634,
      "loss": 2.7356,
      "step": 18780
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.774925947189331,
      "learning_rate": 0.0005902168770785511,
      "loss": 3.3225,
      "step": 18781
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5738511085510254,
      "learning_rate": 0.0005902158409405114,
      "loss": 3.1725,
      "step": 18782
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.498473048210144,
      "learning_rate": 0.000590214804748515,
      "loss": 2.943,
      "step": 18783
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3064539432525635,
      "learning_rate": 0.0005902137685025623,
      "loss": 2.9447,
      "step": 18784
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3955779075622559,
      "learning_rate": 0.0005902127322026533,
      "loss": 3.2659,
      "step": 18785
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7587023973464966,
      "learning_rate": 0.0005902116958487883,
      "loss": 3.1849,
      "step": 18786
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.154268503189087,
      "learning_rate": 0.0005902106594409674,
      "loss": 3.0755,
      "step": 18787
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8045647144317627,
      "learning_rate": 0.0005902096229791911,
      "loss": 3.1785,
      "step": 18788
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.0923044681549072,
      "learning_rate": 0.0005902085864634591,
      "loss": 3.1569,
      "step": 18789
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7114876508712769,
      "learning_rate": 0.0005902075498937719,
      "loss": 3.1865,
      "step": 18790
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4907240867614746,
      "learning_rate": 0.0005902065132701298,
      "loss": 3.1915,
      "step": 18791
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.301319122314453,
      "learning_rate": 0.0005902054765925326,
      "loss": 3.2901,
      "step": 18792
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.2604832649230957,
      "learning_rate": 0.0005902044398609809,
      "loss": 2.8421,
      "step": 18793
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6393232345581055,
      "learning_rate": 0.0005902034030754746,
      "loss": 3.3059,
      "step": 18794
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9907341003417969,
      "learning_rate": 0.000590202366236014,
      "loss": 3.12,
      "step": 18795
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.926444172859192,
      "learning_rate": 0.0005902013293425994,
      "loss": 3.1052,
      "step": 18796
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5181033611297607,
      "learning_rate": 0.0005902002923952308,
      "loss": 3.1661,
      "step": 18797
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.7534593343734741,
      "learning_rate": 0.0005901992553939085,
      "loss": 3.1161,
      "step": 18798
    },
    {
      "epoch": 0.24,
      "grad_norm": 3.4259722232818604,
      "learning_rate": 0.0005901982183386328,
      "loss": 3.252,
      "step": 18799
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.5502909421920776,
      "learning_rate": 0.0005901971812294037,
      "loss": 3.2221,
      "step": 18800
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.415419578552246,
      "learning_rate": 0.0005901961440662214,
      "loss": 2.9829,
      "step": 18801
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.465233325958252,
      "learning_rate": 0.0005901951068490862,
      "loss": 2.9289,
      "step": 18802
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6199719905853271,
      "learning_rate": 0.0005901940695779983,
      "loss": 3.2086,
      "step": 18803
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.746688961982727,
      "learning_rate": 0.0005901930322529578,
      "loss": 3.132,
      "step": 18804
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4401216506958008,
      "learning_rate": 0.000590191994873965,
      "loss": 3.2106,
      "step": 18805
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.912420392036438,
      "learning_rate": 0.00059019095744102,
      "loss": 2.8196,
      "step": 18806
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3028112649917603,
      "learning_rate": 0.0005901899199541229,
      "loss": 3.0378,
      "step": 18807
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.369393229484558,
      "learning_rate": 0.0005901888824132741,
      "loss": 3.1133,
      "step": 18808
    },
    {
      "epoch": 0.24,
      "grad_norm": 2.6328957080841064,
      "learning_rate": 0.0005901878448184737,
      "loss": 2.971,
      "step": 18809
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.9600684642791748,
      "learning_rate": 0.000590186807169722,
      "loss": 3.015,
      "step": 18810
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4455653429031372,
      "learning_rate": 0.000590185769467019,
      "loss": 3.1823,
      "step": 18811
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.6759850978851318,
      "learning_rate": 0.000590184731710365,
      "loss": 3.1148,
      "step": 18812
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.4115960597991943,
      "learning_rate": 0.0005901836938997601,
      "loss": 3.015,
      "step": 18813
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.8525463342666626,
      "learning_rate": 0.0005901826560352048,
      "loss": 3.1797,
      "step": 18814
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.282618761062622,
      "learning_rate": 0.0005901816181166988,
      "loss": 2.9041,
      "step": 18815
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.891264796257019,
      "learning_rate": 0.0005901805801442428,
      "loss": 3.2377,
      "step": 18816
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5941905975341797,
      "learning_rate": 0.0005901795421178367,
      "loss": 3.1936,
      "step": 18817
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.56526517868042,
      "learning_rate": 0.0005901785040374806,
      "loss": 3.2028,
      "step": 18818
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4723979234695435,
      "learning_rate": 0.0005901774659031749,
      "loss": 3.1778,
      "step": 18819
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0794456005096436,
      "learning_rate": 0.0005901764277149198,
      "loss": 2.8923,
      "step": 18820
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.869482398033142,
      "learning_rate": 0.0005901753894727154,
      "loss": 3.2132,
      "step": 18821
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8724462985992432,
      "learning_rate": 0.0005901743511765618,
      "loss": 2.9232,
      "step": 18822
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.5729472637176514,
      "learning_rate": 0.0005901733128264595,
      "loss": 3.1119,
      "step": 18823
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0647103786468506,
      "learning_rate": 0.0005901722744224084,
      "loss": 3.0365,
      "step": 18824
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1086652278900146,
      "learning_rate": 0.0005901712359644088,
      "loss": 3.4903,
      "step": 18825
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0840513706207275,
      "learning_rate": 0.000590170197452461,
      "loss": 2.9775,
      "step": 18826
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.537519931793213,
      "learning_rate": 0.0005901691588865649,
      "loss": 3.16,
      "step": 18827
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8797694444656372,
      "learning_rate": 0.000590168120266721,
      "loss": 3.0012,
      "step": 18828
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7932562828063965,
      "learning_rate": 0.0005901670815929294,
      "loss": 3.0897,
      "step": 18829
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4022390842437744,
      "learning_rate": 0.0005901660428651901,
      "loss": 3.1972,
      "step": 18830
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5617212057113647,
      "learning_rate": 0.0005901650040835035,
      "loss": 2.7379,
      "step": 18831
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0132601261138916,
      "learning_rate": 0.0005901639652478699,
      "loss": 2.9972,
      "step": 18832
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6622196435928345,
      "learning_rate": 0.0005901629263582892,
      "loss": 3.2217,
      "step": 18833
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8495334386825562,
      "learning_rate": 0.0005901618874147618,
      "loss": 3.1636,
      "step": 18834
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0396571159362793,
      "learning_rate": 0.0005901608484172878,
      "loss": 3.2245,
      "step": 18835
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3260149955749512,
      "learning_rate": 0.0005901598093658673,
      "loss": 3.2651,
      "step": 18836
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8201419115066528,
      "learning_rate": 0.0005901587702605008,
      "loss": 3.1746,
      "step": 18837
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0376906394958496,
      "learning_rate": 0.0005901577311011883,
      "loss": 3.0289,
      "step": 18838
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5482629537582397,
      "learning_rate": 0.00059015669188793,
      "loss": 3.0788,
      "step": 18839
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5206438302993774,
      "learning_rate": 0.0005901556526207259,
      "loss": 3.136,
      "step": 18840
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6851773262023926,
      "learning_rate": 0.0005901546132995766,
      "loss": 3.1702,
      "step": 18841
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7915488481521606,
      "learning_rate": 0.000590153573924482,
      "loss": 2.8208,
      "step": 18842
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8711456060409546,
      "learning_rate": 0.0005901525344954423,
      "loss": 2.9596,
      "step": 18843
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1743013858795166,
      "learning_rate": 0.0005901514950124579,
      "loss": 3.0936,
      "step": 18844
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7159181833267212,
      "learning_rate": 0.0005901504554755288,
      "loss": 3.0741,
      "step": 18845
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.432828426361084,
      "learning_rate": 0.0005901494158846553,
      "loss": 3.2369,
      "step": 18846
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4746437072753906,
      "learning_rate": 0.0005901483762398374,
      "loss": 3.1115,
      "step": 18847
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6083362102508545,
      "learning_rate": 0.0005901473365410757,
      "loss": 3.1634,
      "step": 18848
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7209012508392334,
      "learning_rate": 0.0005901462967883699,
      "loss": 2.8916,
      "step": 18849
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7012181282043457,
      "learning_rate": 0.0005901452569817206,
      "loss": 2.8087,
      "step": 18850
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3487615585327148,
      "learning_rate": 0.0005901442171211276,
      "loss": 2.955,
      "step": 18851
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3222007751464844,
      "learning_rate": 0.0005901431772065914,
      "loss": 3.038,
      "step": 18852
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7574564218521118,
      "learning_rate": 0.0005901421372381121,
      "loss": 2.9672,
      "step": 18853
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5092624425888062,
      "learning_rate": 0.0005901410972156901,
      "loss": 3.2793,
      "step": 18854
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.538320302963257,
      "learning_rate": 0.0005901400571393252,
      "loss": 3.0804,
      "step": 18855
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7474998235702515,
      "learning_rate": 0.0005901390170090178,
      "loss": 2.9747,
      "step": 18856
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.152573823928833,
      "learning_rate": 0.0005901379768247681,
      "loss": 3.2583,
      "step": 18857
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7630186080932617,
      "learning_rate": 0.0005901369365865763,
      "loss": 3.1261,
      "step": 18858
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1277711391448975,
      "learning_rate": 0.0005901358962944425,
      "loss": 2.9187,
      "step": 18859
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5597399473190308,
      "learning_rate": 0.0005901348559483671,
      "loss": 3.0652,
      "step": 18860
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5697752237319946,
      "learning_rate": 0.0005901338155483499,
      "loss": 3.0368,
      "step": 18861
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.700252652168274,
      "learning_rate": 0.0005901327750943916,
      "loss": 2.9225,
      "step": 18862
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.793915033340454,
      "learning_rate": 0.000590131734586492,
      "loss": 3.032,
      "step": 18863
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6813688278198242,
      "learning_rate": 0.0005901306940246516,
      "loss": 3.1852,
      "step": 18864
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6274352073669434,
      "learning_rate": 0.0005901296534088702,
      "loss": 3.1723,
      "step": 18865
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8468186855316162,
      "learning_rate": 0.0005901286127391484,
      "loss": 3.0236,
      "step": 18866
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1802124977111816,
      "learning_rate": 0.000590127572015486,
      "loss": 2.9212,
      "step": 18867
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4254909753799438,
      "learning_rate": 0.0005901265312378837,
      "loss": 3.0839,
      "step": 18868
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7161809206008911,
      "learning_rate": 0.0005901254904063412,
      "loss": 3.1198,
      "step": 18869
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7599831819534302,
      "learning_rate": 0.0005901244495208589,
      "loss": 3.2699,
      "step": 18870
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4405981302261353,
      "learning_rate": 0.000590123408581437,
      "loss": 2.867,
      "step": 18871
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5672906637191772,
      "learning_rate": 0.0005901223675880759,
      "loss": 3.0236,
      "step": 18872
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.407910943031311,
      "learning_rate": 0.0005901213265407753,
      "loss": 3.2445,
      "step": 18873
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6626592874526978,
      "learning_rate": 0.0005901202854395357,
      "loss": 3.0675,
      "step": 18874
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4645283222198486,
      "learning_rate": 0.0005901192442843573,
      "loss": 2.8866,
      "step": 18875
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9753614664077759,
      "learning_rate": 0.0005901182030752403,
      "loss": 2.8667,
      "step": 18876
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.445084810256958,
      "learning_rate": 0.0005901171618121849,
      "loss": 3.3538,
      "step": 18877
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9045002460479736,
      "learning_rate": 0.0005901161204951911,
      "loss": 3.1779,
      "step": 18878
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.4073641300201416,
      "learning_rate": 0.0005901150791242593,
      "loss": 3.1724,
      "step": 18879
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6553281545639038,
      "learning_rate": 0.0005901140376993897,
      "loss": 2.9092,
      "step": 18880
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5385721921920776,
      "learning_rate": 0.0005901129962205823,
      "loss": 3.1176,
      "step": 18881
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.7503607273101807,
      "learning_rate": 0.0005901119546878375,
      "loss": 3.1386,
      "step": 18882
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8397092819213867,
      "learning_rate": 0.0005901109131011554,
      "loss": 2.9428,
      "step": 18883
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6534006595611572,
      "learning_rate": 0.0005901098714605362,
      "loss": 3.1465,
      "step": 18884
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.0924222469329834,
      "learning_rate": 0.0005901088297659801,
      "loss": 2.9558,
      "step": 18885
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.359894275665283,
      "learning_rate": 0.0005901077880174873,
      "loss": 3.327,
      "step": 18886
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4076335430145264,
      "learning_rate": 0.000590106746215058,
      "loss": 3.2586,
      "step": 18887
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.383249521255493,
      "learning_rate": 0.0005901057043586923,
      "loss": 3.1462,
      "step": 18888
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.7419347763061523,
      "learning_rate": 0.0005901046624483906,
      "loss": 2.9653,
      "step": 18889
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3530193567276,
      "learning_rate": 0.0005901036204841528,
      "loss": 3.27,
      "step": 18890
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.576546549797058,
      "learning_rate": 0.0005901025784659793,
      "loss": 2.9956,
      "step": 18891
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8031457662582397,
      "learning_rate": 0.0005901015363938704,
      "loss": 3.1781,
      "step": 18892
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5748153924942017,
      "learning_rate": 0.0005901004942678261,
      "loss": 3.158,
      "step": 18893
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4480761289596558,
      "learning_rate": 0.0005900994520878466,
      "loss": 3.2103,
      "step": 18894
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.31712007522583,
      "learning_rate": 0.0005900984098539321,
      "loss": 3.0151,
      "step": 18895
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6415601968765259,
      "learning_rate": 0.0005900973675660829,
      "loss": 3.3298,
      "step": 18896
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5746217966079712,
      "learning_rate": 0.000590096325224299,
      "loss": 3.1226,
      "step": 18897
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3344985246658325,
      "learning_rate": 0.0005900952828285809,
      "loss": 3.0835,
      "step": 18898
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.526191234588623,
      "learning_rate": 0.0005900942403789285,
      "loss": 2.9946,
      "step": 18899
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4673024415969849,
      "learning_rate": 0.0005900931978753421,
      "loss": 2.9675,
      "step": 18900
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5164355039596558,
      "learning_rate": 0.0005900921553178219,
      "loss": 3.1877,
      "step": 18901
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.700935959815979,
      "learning_rate": 0.0005900911127063682,
      "loss": 2.91,
      "step": 18902
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8333097696304321,
      "learning_rate": 0.000590090070040981,
      "loss": 3.2135,
      "step": 18903
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4997986555099487,
      "learning_rate": 0.0005900890273216605,
      "loss": 2.942,
      "step": 18904
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4840000867843628,
      "learning_rate": 0.0005900879845484071,
      "loss": 3.1932,
      "step": 18905
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5881465673446655,
      "learning_rate": 0.0005900869417212207,
      "loss": 3.0254,
      "step": 18906
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7407352924346924,
      "learning_rate": 0.0005900858988401018,
      "loss": 2.9955,
      "step": 18907
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3084344863891602,
      "learning_rate": 0.0005900848559050506,
      "loss": 3.1718,
      "step": 18908
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4654213190078735,
      "learning_rate": 0.0005900838129160669,
      "loss": 3.0911,
      "step": 18909
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3401416540145874,
      "learning_rate": 0.0005900827698731513,
      "loss": 3.0277,
      "step": 18910
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7547439336776733,
      "learning_rate": 0.0005900817267763036,
      "loss": 2.9297,
      "step": 18911
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.576033353805542,
      "learning_rate": 0.0005900806836255244,
      "loss": 2.9984,
      "step": 18912
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6984833478927612,
      "learning_rate": 0.0005900796404208138,
      "loss": 3.143,
      "step": 18913
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7520456314086914,
      "learning_rate": 0.0005900785971621719,
      "loss": 3.088,
      "step": 18914
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4286569356918335,
      "learning_rate": 0.0005900775538495988,
      "loss": 3.1433,
      "step": 18915
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3463846445083618,
      "learning_rate": 0.0005900765104830948,
      "loss": 3.2885,
      "step": 18916
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.358207106590271,
      "learning_rate": 0.0005900754670626603,
      "loss": 3.2215,
      "step": 18917
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7064343690872192,
      "learning_rate": 0.0005900744235882951,
      "loss": 3.06,
      "step": 18918
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0995566844940186,
      "learning_rate": 0.0005900733800599996,
      "loss": 3.0327,
      "step": 18919
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.667945146560669,
      "learning_rate": 0.0005900723364777741,
      "loss": 3.1947,
      "step": 18920
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.503984212875366,
      "learning_rate": 0.0005900712928416186,
      "loss": 3.0379,
      "step": 18921
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7795391082763672,
      "learning_rate": 0.0005900702491515333,
      "loss": 3.4047,
      "step": 18922
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9793766736984253,
      "learning_rate": 0.0005900692054075186,
      "loss": 3.3355,
      "step": 18923
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1963813304901123,
      "learning_rate": 0.0005900681616095746,
      "loss": 2.9765,
      "step": 18924
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6618602275848389,
      "learning_rate": 0.0005900671177577013,
      "loss": 3.2851,
      "step": 18925
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8425102233886719,
      "learning_rate": 0.0005900660738518991,
      "loss": 3.2377,
      "step": 18926
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1976587772369385,
      "learning_rate": 0.0005900650298921681,
      "loss": 3.1393,
      "step": 18927
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4851685762405396,
      "learning_rate": 0.0005900639858785087,
      "loss": 3.2073,
      "step": 18928
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8701146841049194,
      "learning_rate": 0.0005900629418109207,
      "loss": 2.9432,
      "step": 18929
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.302924871444702,
      "learning_rate": 0.0005900618976894047,
      "loss": 2.9938,
      "step": 18930
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.382428765296936,
      "learning_rate": 0.0005900608535139606,
      "loss": 3.1298,
      "step": 18931
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.303274393081665,
      "learning_rate": 0.0005900598092845888,
      "loss": 3.1987,
      "step": 18932
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.16898775100708,
      "learning_rate": 0.0005900587650012894,
      "loss": 3.0936,
      "step": 18933
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.570639967918396,
      "learning_rate": 0.0005900577206640626,
      "loss": 3.291,
      "step": 18934
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1372385025024414,
      "learning_rate": 0.0005900566762729085,
      "loss": 3.0914,
      "step": 18935
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.271394729614258,
      "learning_rate": 0.0005900556318278275,
      "loss": 3.1493,
      "step": 18936
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1539146900177,
      "learning_rate": 0.0005900545873288196,
      "loss": 3.024,
      "step": 18937
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4295217990875244,
      "learning_rate": 0.0005900535427758851,
      "loss": 3.1534,
      "step": 18938
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.315059185028076,
      "learning_rate": 0.0005900524981690242,
      "loss": 2.9878,
      "step": 18939
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.19695782661438,
      "learning_rate": 0.000590051453508237,
      "loss": 3.0019,
      "step": 18940
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4618457555770874,
      "learning_rate": 0.0005900504087935238,
      "loss": 3.2127,
      "step": 18941
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.025582790374756,
      "learning_rate": 0.0005900493640248848,
      "loss": 3.1425,
      "step": 18942
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2108614444732666,
      "learning_rate": 0.0005900483192023201,
      "loss": 3.3231,
      "step": 18943
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7022982835769653,
      "learning_rate": 0.00059004727432583,
      "loss": 3.2355,
      "step": 18944
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.10837721824646,
      "learning_rate": 0.0005900462293954146,
      "loss": 3.2395,
      "step": 18945
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4051604270935059,
      "learning_rate": 0.0005900451844110741,
      "loss": 2.9714,
      "step": 18946
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4557608366012573,
      "learning_rate": 0.0005900441393728087,
      "loss": 2.8191,
      "step": 18947
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.647373914718628,
      "learning_rate": 0.0005900430942806187,
      "loss": 3.2585,
      "step": 18948
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5562962293624878,
      "learning_rate": 0.0005900420491345042,
      "loss": 3.2607,
      "step": 18949
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5271148681640625,
      "learning_rate": 0.0005900410039344654,
      "loss": 3.3506,
      "step": 18950
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.826937675476074,
      "learning_rate": 0.0005900399586805025,
      "loss": 3.1916,
      "step": 18951
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5327473878860474,
      "learning_rate": 0.0005900389133726157,
      "loss": 3.0076,
      "step": 18952
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5876237154006958,
      "learning_rate": 0.0005900378680108052,
      "loss": 3.1197,
      "step": 18953
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.30265474319458,
      "learning_rate": 0.0005900368225950711,
      "loss": 3.0496,
      "step": 18954
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.578299641609192,
      "learning_rate": 0.0005900357771254138,
      "loss": 2.9378,
      "step": 18955
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.4038476943969727,
      "learning_rate": 0.0005900347316018333,
      "loss": 3.3162,
      "step": 18956
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1692585945129395,
      "learning_rate": 0.0005900336860243299,
      "loss": 2.9404,
      "step": 18957
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.409926414489746,
      "learning_rate": 0.0005900326403929037,
      "loss": 3.0952,
      "step": 18958
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.4319729804992676,
      "learning_rate": 0.000590031594707555,
      "loss": 3.2572,
      "step": 18959
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4049813747406006,
      "learning_rate": 0.000590030548968284,
      "loss": 3.021,
      "step": 18960
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6582437753677368,
      "learning_rate": 0.0005900295031750908,
      "loss": 3.1846,
      "step": 18961
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.48876690864563,
      "learning_rate": 0.0005900284573279757,
      "loss": 3.2335,
      "step": 18962
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6906784772872925,
      "learning_rate": 0.0005900274114269387,
      "loss": 3.2071,
      "step": 18963
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.713698148727417,
      "learning_rate": 0.0005900263654719803,
      "loss": 3.2398,
      "step": 18964
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.4090373516082764,
      "learning_rate": 0.0005900253194631004,
      "loss": 3.013,
      "step": 18965
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.289578914642334,
      "learning_rate": 0.0005900242734002993,
      "loss": 3.172,
      "step": 18966
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1639480590820312,
      "learning_rate": 0.0005900232272835773,
      "loss": 3.1608,
      "step": 18967
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4762358665466309,
      "learning_rate": 0.0005900221811129345,
      "loss": 3.1645,
      "step": 18968
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6754807233810425,
      "learning_rate": 0.000590021134888371,
      "loss": 2.9775,
      "step": 18969
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3731495141983032,
      "learning_rate": 0.0005900200886098872,
      "loss": 3.1671,
      "step": 18970
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2561168670654297,
      "learning_rate": 0.0005900190422774833,
      "loss": 2.8841,
      "step": 18971
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2166593074798584,
      "learning_rate": 0.0005900179958911591,
      "loss": 3.1161,
      "step": 18972
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4851058721542358,
      "learning_rate": 0.0005900169494509152,
      "loss": 3.4259,
      "step": 18973
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7653837203979492,
      "learning_rate": 0.0005900159029567517,
      "loss": 3.0203,
      "step": 18974
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1448655128479004,
      "learning_rate": 0.0005900148564086688,
      "loss": 2.8893,
      "step": 18975
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4710801839828491,
      "learning_rate": 0.0005900138098066666,
      "loss": 3.127,
      "step": 18976
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5163071155548096,
      "learning_rate": 0.0005900127631507455,
      "loss": 3.148,
      "step": 18977
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8575032949447632,
      "learning_rate": 0.0005900117164409055,
      "loss": 3.1402,
      "step": 18978
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.59531569480896,
      "learning_rate": 0.0005900106696771467,
      "loss": 3.063,
      "step": 18979
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4645239114761353,
      "learning_rate": 0.0005900096228594695,
      "loss": 3.2883,
      "step": 18980
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.753122329711914,
      "learning_rate": 0.0005900085759878741,
      "loss": 3.1658,
      "step": 18981
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.220920443534851,
      "learning_rate": 0.0005900075290623606,
      "loss": 3.3821,
      "step": 18982
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5171685218811035,
      "learning_rate": 0.0005900064820829293,
      "loss": 3.0878,
      "step": 18983
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5466891527175903,
      "learning_rate": 0.0005900054350495802,
      "loss": 3.0141,
      "step": 18984
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6161593198776245,
      "learning_rate": 0.0005900043879623137,
      "loss": 3.1379,
      "step": 18985
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5208715200424194,
      "learning_rate": 0.0005900033408211299,
      "loss": 3.0137,
      "step": 18986
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7508182525634766,
      "learning_rate": 0.0005900022936260289,
      "loss": 3.1714,
      "step": 18987
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8782981634140015,
      "learning_rate": 0.0005900012463770111,
      "loss": 3.0669,
      "step": 18988
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5857278108596802,
      "learning_rate": 0.0005900001990740766,
      "loss": 3.1332,
      "step": 18989
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6294368505477905,
      "learning_rate": 0.0005899991517172256,
      "loss": 2.7751,
      "step": 18990
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.488749623298645,
      "learning_rate": 0.0005899981043064582,
      "loss": 3.3499,
      "step": 18991
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9457571506500244,
      "learning_rate": 0.0005899970568417748,
      "loss": 3.2694,
      "step": 18992
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6577879190444946,
      "learning_rate": 0.0005899960093231753,
      "loss": 3.1743,
      "step": 18993
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5201966762542725,
      "learning_rate": 0.0005899949617506603,
      "loss": 3.1598,
      "step": 18994
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1037003993988037,
      "learning_rate": 0.0005899939141242296,
      "loss": 3.0012,
      "step": 18995
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4547789096832275,
      "learning_rate": 0.0005899928664438836,
      "loss": 3.0035,
      "step": 18996
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.307390809059143,
      "learning_rate": 0.0005899918187096223,
      "loss": 3.1005,
      "step": 18997
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.514845609664917,
      "learning_rate": 0.0005899907709214462,
      "loss": 3.3168,
      "step": 18998
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8130470514297485,
      "learning_rate": 0.0005899897230793554,
      "loss": 3.0466,
      "step": 18999
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.264436721801758,
      "learning_rate": 0.0005899886751833499,
      "loss": 3.0478,
      "step": 19000
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6967809200286865,
      "learning_rate": 0.0005899876272334302,
      "loss": 2.9324,
      "step": 19001
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3216426372528076,
      "learning_rate": 0.0005899865792295962,
      "loss": 3.0231,
      "step": 19002
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4324452877044678,
      "learning_rate": 0.0005899855311718482,
      "loss": 3.0826,
      "step": 19003
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2333743572235107,
      "learning_rate": 0.0005899844830601865,
      "loss": 3.0193,
      "step": 19004
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.814644455909729,
      "learning_rate": 0.0005899834348946111,
      "loss": 2.9676,
      "step": 19005
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5243664979934692,
      "learning_rate": 0.0005899823866751224,
      "loss": 3.311,
      "step": 19006
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6617848873138428,
      "learning_rate": 0.0005899813384017205,
      "loss": 3.2026,
      "step": 19007
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4872885942459106,
      "learning_rate": 0.0005899802900744055,
      "loss": 3.1559,
      "step": 19008
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6818815469741821,
      "learning_rate": 0.0005899792416931779,
      "loss": 3.1615,
      "step": 19009
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0688517093658447,
      "learning_rate": 0.0005899781932580375,
      "loss": 2.8842,
      "step": 19010
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2568453550338745,
      "learning_rate": 0.0005899771447689847,
      "loss": 3.2703,
      "step": 19011
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9443150758743286,
      "learning_rate": 0.0005899760962260196,
      "loss": 3.2571,
      "step": 19012
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.54794442653656,
      "learning_rate": 0.0005899750476291427,
      "loss": 2.8986,
      "step": 19013
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7539273500442505,
      "learning_rate": 0.0005899739989783539,
      "loss": 2.9653,
      "step": 19014
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5093754529953003,
      "learning_rate": 0.0005899729502736533,
      "loss": 3.0186,
      "step": 19015
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2685928344726562,
      "learning_rate": 0.0005899719015150413,
      "loss": 3.0649,
      "step": 19016
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6122097969055176,
      "learning_rate": 0.0005899708527025181,
      "loss": 2.9775,
      "step": 19017
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.842705488204956,
      "learning_rate": 0.0005899698038360838,
      "loss": 3.1552,
      "step": 19018
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6926931142807007,
      "learning_rate": 0.0005899687549157387,
      "loss": 3.0001,
      "step": 19019
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.472042441368103,
      "learning_rate": 0.0005899677059414831,
      "loss": 2.9938,
      "step": 19020
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8804795742034912,
      "learning_rate": 0.0005899666569133167,
      "loss": 3.2079,
      "step": 19021
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8123525381088257,
      "learning_rate": 0.0005899656078312402,
      "loss": 3.0232,
      "step": 19022
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7616090774536133,
      "learning_rate": 0.0005899645586952536,
      "loss": 3.0265,
      "step": 19023
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5493398904800415,
      "learning_rate": 0.0005899635095053572,
      "loss": 3.1197,
      "step": 19024
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.979792594909668,
      "learning_rate": 0.000589962460261551,
      "loss": 3.1054,
      "step": 19025
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.548150062561035,
      "learning_rate": 0.0005899614109638353,
      "loss": 3.022,
      "step": 19026
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2823379039764404,
      "learning_rate": 0.0005899603616122103,
      "loss": 3.0351,
      "step": 19027
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.5271902084350586,
      "learning_rate": 0.0005899593122066762,
      "loss": 3.0925,
      "step": 19028
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7618273496627808,
      "learning_rate": 0.0005899582627472333,
      "loss": 3.0252,
      "step": 19029
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.708846092224121,
      "learning_rate": 0.0005899572132338816,
      "loss": 3.1886,
      "step": 19030
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4518730640411377,
      "learning_rate": 0.0005899561636666213,
      "loss": 2.9266,
      "step": 19031
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6963218450546265,
      "learning_rate": 0.0005899551140454527,
      "loss": 2.8278,
      "step": 19032
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6884701251983643,
      "learning_rate": 0.0005899540643703761,
      "loss": 3.1916,
      "step": 19033
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.845322847366333,
      "learning_rate": 0.0005899530146413916,
      "loss": 3.0768,
      "step": 19034
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.31674325466156,
      "learning_rate": 0.0005899519648584992,
      "loss": 3.0632,
      "step": 19035
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5156373977661133,
      "learning_rate": 0.0005899509150216993,
      "loss": 3.2543,
      "step": 19036
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6520203351974487,
      "learning_rate": 0.000589949865130992,
      "loss": 3.1602,
      "step": 19037
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4792953729629517,
      "learning_rate": 0.0005899488151863777,
      "loss": 2.9997,
      "step": 19038
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4586912393569946,
      "learning_rate": 0.0005899477651878563,
      "loss": 2.9116,
      "step": 19039
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.136366128921509,
      "learning_rate": 0.0005899467151354281,
      "loss": 3.2526,
      "step": 19040
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5515949726104736,
      "learning_rate": 0.0005899456650290935,
      "loss": 3.2136,
      "step": 19041
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7587813138961792,
      "learning_rate": 0.0005899446148688524,
      "loss": 3.3015,
      "step": 19042
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.512453317642212,
      "learning_rate": 0.0005899435646547052,
      "loss": 3.0753,
      "step": 19043
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0205276012420654,
      "learning_rate": 0.000589942514386652,
      "loss": 3.1022,
      "step": 19044
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2124019861221313,
      "learning_rate": 0.0005899414640646929,
      "loss": 3.0924,
      "step": 19045
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2363204956054688,
      "learning_rate": 0.0005899404136888284,
      "loss": 3.0331,
      "step": 19046
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.183596134185791,
      "learning_rate": 0.0005899393632590584,
      "loss": 3.236,
      "step": 19047
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9182387590408325,
      "learning_rate": 0.0005899383127753832,
      "loss": 3.2971,
      "step": 19048
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.3599722385406494,
      "learning_rate": 0.000589937262237803,
      "loss": 3.4698,
      "step": 19049
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9926719665527344,
      "learning_rate": 0.000589936211646318,
      "loss": 3.2301,
      "step": 19050
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.702958345413208,
      "learning_rate": 0.0005899351610009284,
      "loss": 3.4024,
      "step": 19051
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3932300806045532,
      "learning_rate": 0.0005899341103016343,
      "loss": 2.9457,
      "step": 19052
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5230884552001953,
      "learning_rate": 0.0005899330595484361,
      "loss": 3.0605,
      "step": 19053
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.481642723083496,
      "learning_rate": 0.0005899320087413339,
      "loss": 3.0152,
      "step": 19054
    },
    {
      "epoch": 0.25,
      "grad_norm": 4.0345234870910645,
      "learning_rate": 0.0005899309578803277,
      "loss": 3.0533,
      "step": 19055
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.088613986968994,
      "learning_rate": 0.0005899299069654179,
      "loss": 3.1001,
      "step": 19056
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.714013695716858,
      "learning_rate": 0.0005899288559966047,
      "loss": 2.995,
      "step": 19057
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.611825942993164,
      "learning_rate": 0.0005899278049738883,
      "loss": 3.318,
      "step": 19058
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5786285400390625,
      "learning_rate": 0.0005899267538972688,
      "loss": 2.9724,
      "step": 19059
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0586087703704834,
      "learning_rate": 0.0005899257027667464,
      "loss": 3.2729,
      "step": 19060
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.532131314277649,
      "learning_rate": 0.0005899246515823214,
      "loss": 3.0449,
      "step": 19061
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.4321541786193848,
      "learning_rate": 0.0005899236003439939,
      "loss": 3.1439,
      "step": 19062
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3144428730010986,
      "learning_rate": 0.0005899225490517642,
      "loss": 3.2822,
      "step": 19063
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2432602643966675,
      "learning_rate": 0.0005899214977056323,
      "loss": 2.8869,
      "step": 19064
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6293846368789673,
      "learning_rate": 0.0005899204463055986,
      "loss": 3.2887,
      "step": 19065
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4145296812057495,
      "learning_rate": 0.0005899193948516632,
      "loss": 3.2175,
      "step": 19066
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4087120294570923,
      "learning_rate": 0.0005899183433438264,
      "loss": 3.0827,
      "step": 19067
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2343976497650146,
      "learning_rate": 0.0005899172917820882,
      "loss": 3.0614,
      "step": 19068
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4531383514404297,
      "learning_rate": 0.0005899162401664489,
      "loss": 3.4444,
      "step": 19069
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.328577995300293,
      "learning_rate": 0.0005899151884969087,
      "loss": 3.3573,
      "step": 19070
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.3965258598327637,
      "learning_rate": 0.0005899141367734679,
      "loss": 3.111,
      "step": 19071
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.53941810131073,
      "learning_rate": 0.0005899130849961265,
      "loss": 2.9901,
      "step": 19072
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.093057632446289,
      "learning_rate": 0.0005899120331648847,
      "loss": 3.162,
      "step": 19073
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4398263692855835,
      "learning_rate": 0.000589910981279743,
      "loss": 3.3449,
      "step": 19074
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5536640882492065,
      "learning_rate": 0.0005899099293407013,
      "loss": 3.2209,
      "step": 19075
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3645933866500854,
      "learning_rate": 0.0005899088773477598,
      "loss": 3.1306,
      "step": 19076
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3718644380569458,
      "learning_rate": 0.0005899078253009187,
      "loss": 3.2047,
      "step": 19077
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7715052366256714,
      "learning_rate": 0.0005899067732001784,
      "loss": 3.1385,
      "step": 19078
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7389012575149536,
      "learning_rate": 0.0005899057210455389,
      "loss": 2.9697,
      "step": 19079
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0033628940582275,
      "learning_rate": 0.0005899046688370005,
      "loss": 2.8677,
      "step": 19080
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5863243341445923,
      "learning_rate": 0.0005899036165745632,
      "loss": 3.131,
      "step": 19081
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.682348608970642,
      "learning_rate": 0.0005899025642582276,
      "loss": 3.2079,
      "step": 19082
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5559008121490479,
      "learning_rate": 0.0005899015118879935,
      "loss": 3.3973,
      "step": 19083
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7278577089309692,
      "learning_rate": 0.0005899004594638612,
      "loss": 3.0002,
      "step": 19084
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4699794054031372,
      "learning_rate": 0.0005898994069858311,
      "loss": 3.1724,
      "step": 19085
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7924927473068237,
      "learning_rate": 0.000589898354453903,
      "loss": 3.052,
      "step": 19086
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5945466756820679,
      "learning_rate": 0.0005898973018680774,
      "loss": 3.3642,
      "step": 19087
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.808978796005249,
      "learning_rate": 0.0005898962492283544,
      "loss": 3.171,
      "step": 19088
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.716002106666565,
      "learning_rate": 0.0005898951965347343,
      "loss": 3.1405,
      "step": 19089
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.526103138923645,
      "learning_rate": 0.0005898941437872171,
      "loss": 3.1595,
      "step": 19090
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2322832345962524,
      "learning_rate": 0.0005898930909858033,
      "loss": 3.1998,
      "step": 19091
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.770264148712158,
      "learning_rate": 0.0005898920381304928,
      "loss": 3.2298,
      "step": 19092
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8790322542190552,
      "learning_rate": 0.0005898909852212858,
      "loss": 3.0168,
      "step": 19093
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6993588209152222,
      "learning_rate": 0.0005898899322581827,
      "loss": 2.8905,
      "step": 19094
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5565071105957031,
      "learning_rate": 0.0005898888792411836,
      "loss": 3.1422,
      "step": 19095
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.3490166664123535,
      "learning_rate": 0.0005898878261702886,
      "loss": 3.2984,
      "step": 19096
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.5518786907196045,
      "learning_rate": 0.000589886773045498,
      "loss": 3.1954,
      "step": 19097
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6104986667633057,
      "learning_rate": 0.0005898857198668121,
      "loss": 2.7558,
      "step": 19098
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0664501190185547,
      "learning_rate": 0.0005898846666342308,
      "loss": 3.0208,
      "step": 19099
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.016284704208374,
      "learning_rate": 0.0005898836133477546,
      "loss": 3.3385,
      "step": 19100
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9181326627731323,
      "learning_rate": 0.0005898825600073834,
      "loss": 3.1641,
      "step": 19101
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.98838210105896,
      "learning_rate": 0.0005898815066131178,
      "loss": 3.1186,
      "step": 19102
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5179908275604248,
      "learning_rate": 0.0005898804531649576,
      "loss": 2.9703,
      "step": 19103
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.176481008529663,
      "learning_rate": 0.0005898793996629031,
      "loss": 2.8632,
      "step": 19104
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5038071870803833,
      "learning_rate": 0.0005898783461069547,
      "loss": 3.1311,
      "step": 19105
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4329521656036377,
      "learning_rate": 0.0005898772924971123,
      "loss": 3.09,
      "step": 19106
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.100236177444458,
      "learning_rate": 0.0005898762388333763,
      "loss": 2.7974,
      "step": 19107
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5061230659484863,
      "learning_rate": 0.0005898751851157469,
      "loss": 3.1953,
      "step": 19108
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.638003945350647,
      "learning_rate": 0.0005898741313442241,
      "loss": 3.2017,
      "step": 19109
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.452485203742981,
      "learning_rate": 0.0005898730775188083,
      "loss": 3.088,
      "step": 19110
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3666763305664062,
      "learning_rate": 0.0005898720236394996,
      "loss": 3.2334,
      "step": 19111
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.538150429725647,
      "learning_rate": 0.0005898709697062983,
      "loss": 3.083,
      "step": 19112
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2855658531188965,
      "learning_rate": 0.0005898699157192045,
      "loss": 3.0916,
      "step": 19113
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6395516395568848,
      "learning_rate": 0.0005898688616782183,
      "loss": 3.2069,
      "step": 19114
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4734042882919312,
      "learning_rate": 0.0005898678075833401,
      "loss": 2.8417,
      "step": 19115
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.114252805709839,
      "learning_rate": 0.0005898667534345699,
      "loss": 3.051,
      "step": 19116
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.8791024684906006,
      "learning_rate": 0.0005898656992319081,
      "loss": 3.1041,
      "step": 19117
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.491991400718689,
      "learning_rate": 0.0005898646449753549,
      "loss": 3.127,
      "step": 19118
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.593942403793335,
      "learning_rate": 0.0005898635906649103,
      "loss": 2.9358,
      "step": 19119
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2533152103424072,
      "learning_rate": 0.0005898625363005745,
      "loss": 3.0826,
      "step": 19120
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2660709619522095,
      "learning_rate": 0.0005898614818823477,
      "loss": 3.0902,
      "step": 19121
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3170889616012573,
      "learning_rate": 0.0005898604274102304,
      "loss": 3.1384,
      "step": 19122
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3779183626174927,
      "learning_rate": 0.0005898593728842225,
      "loss": 3.1211,
      "step": 19123
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5558037757873535,
      "learning_rate": 0.0005898583183043243,
      "loss": 3.2392,
      "step": 19124
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.218864679336548,
      "learning_rate": 0.0005898572636705359,
      "loss": 3.2629,
      "step": 19125
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.727622389793396,
      "learning_rate": 0.0005898562089828575,
      "loss": 3.2964,
      "step": 19126
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3806591033935547,
      "learning_rate": 0.0005898551542412895,
      "loss": 2.779,
      "step": 19127
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.259743332862854,
      "learning_rate": 0.0005898540994458319,
      "loss": 3.085,
      "step": 19128
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.565080165863037,
      "learning_rate": 0.000589853044596485,
      "loss": 2.9205,
      "step": 19129
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4294133186340332,
      "learning_rate": 0.0005898519896932489,
      "loss": 2.9072,
      "step": 19130
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3657021522521973,
      "learning_rate": 0.0005898509347361237,
      "loss": 3.1023,
      "step": 19131
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4222177267074585,
      "learning_rate": 0.0005898498797251101,
      "loss": 3.0944,
      "step": 19132
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.524219274520874,
      "learning_rate": 0.0005898488246602077,
      "loss": 3.1102,
      "step": 19133
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5144683122634888,
      "learning_rate": 0.0005898477695414169,
      "loss": 3.1965,
      "step": 19134
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1854079961776733,
      "learning_rate": 0.000589846714368738,
      "loss": 2.973,
      "step": 19135
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.01816725730896,
      "learning_rate": 0.000589845659142171,
      "loss": 3.3273,
      "step": 19136
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8147706985473633,
      "learning_rate": 0.0005898446038617163,
      "loss": 2.8864,
      "step": 19137
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4159423112869263,
      "learning_rate": 0.0005898435485273742,
      "loss": 3.0794,
      "step": 19138
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7630114555358887,
      "learning_rate": 0.0005898424931391445,
      "loss": 3.0158,
      "step": 19139
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4470744132995605,
      "learning_rate": 0.0005898414376970277,
      "loss": 2.9625,
      "step": 19140
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.688409686088562,
      "learning_rate": 0.0005898403822010239,
      "loss": 2.9284,
      "step": 19141
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4207091331481934,
      "learning_rate": 0.0005898393266511332,
      "loss": 3.1331,
      "step": 19142
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9355703592300415,
      "learning_rate": 0.000589838271047356,
      "loss": 2.8349,
      "step": 19143
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.238403797149658,
      "learning_rate": 0.0005898372153896924,
      "loss": 3.1577,
      "step": 19144
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4841430187225342,
      "learning_rate": 0.0005898361596781425,
      "loss": 3.1324,
      "step": 19145
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.310214877128601,
      "learning_rate": 0.0005898351039127067,
      "loss": 3.2319,
      "step": 19146
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2079776525497437,
      "learning_rate": 0.000589834048093385,
      "loss": 2.9201,
      "step": 19147
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8572801351547241,
      "learning_rate": 0.0005898329922201778,
      "loss": 3.078,
      "step": 19148
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6186550855636597,
      "learning_rate": 0.000589831936293085,
      "loss": 3.2682,
      "step": 19149
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8565114736557007,
      "learning_rate": 0.000589830880312107,
      "loss": 3.0383,
      "step": 19150
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7758152484893799,
      "learning_rate": 0.0005898298242772441,
      "loss": 2.8714,
      "step": 19151
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.014265537261963,
      "learning_rate": 0.0005898287681884962,
      "loss": 3.0625,
      "step": 19152
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7216720581054688,
      "learning_rate": 0.0005898277120458638,
      "loss": 3.0374,
      "step": 19153
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7039740085601807,
      "learning_rate": 0.0005898266558493468,
      "loss": 3.077,
      "step": 19154
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7124792337417603,
      "learning_rate": 0.0005898255995989458,
      "loss": 3.1149,
      "step": 19155
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7562224864959717,
      "learning_rate": 0.0005898245432946605,
      "loss": 3.1586,
      "step": 19156
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5913714170455933,
      "learning_rate": 0.0005898234869364915,
      "loss": 3.0651,
      "step": 19157
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8906046152114868,
      "learning_rate": 0.0005898224305244387,
      "loss": 3.1328,
      "step": 19158
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5335921049118042,
      "learning_rate": 0.0005898213740585026,
      "loss": 3.1274,
      "step": 19159
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3890246152877808,
      "learning_rate": 0.0005898203175386832,
      "loss": 3.1861,
      "step": 19160
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8627736568450928,
      "learning_rate": 0.0005898192609649806,
      "loss": 3.2664,
      "step": 19161
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.218057155609131,
      "learning_rate": 0.0005898182043373954,
      "loss": 3.0723,
      "step": 19162
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6232010126113892,
      "learning_rate": 0.0005898171476559274,
      "loss": 2.9244,
      "step": 19163
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.884013056755066,
      "learning_rate": 0.0005898160909205768,
      "loss": 3.029,
      "step": 19164
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5714337825775146,
      "learning_rate": 0.000589815034131344,
      "loss": 3.1671,
      "step": 19165
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.449101448059082,
      "learning_rate": 0.0005898139772882291,
      "loss": 3.0243,
      "step": 19166
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4872888326644897,
      "learning_rate": 0.0005898129203912324,
      "loss": 3.1359,
      "step": 19167
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8182460069656372,
      "learning_rate": 0.0005898118634403539,
      "loss": 3.2346,
      "step": 19168
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4457303285598755,
      "learning_rate": 0.0005898108064355939,
      "loss": 3.0757,
      "step": 19169
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1472389698028564,
      "learning_rate": 0.0005898097493769527,
      "loss": 3.1835,
      "step": 19170
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6546475887298584,
      "learning_rate": 0.0005898086922644303,
      "loss": 3.0858,
      "step": 19171
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2101306915283203,
      "learning_rate": 0.000589807635098027,
      "loss": 3.1808,
      "step": 19172
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4138346910476685,
      "learning_rate": 0.0005898065778777431,
      "loss": 3.2823,
      "step": 19173
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.426476240158081,
      "learning_rate": 0.0005898055206035786,
      "loss": 3.2079,
      "step": 19174
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5275869369506836,
      "learning_rate": 0.0005898044632755337,
      "loss": 2.9869,
      "step": 19175
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7172091007232666,
      "learning_rate": 0.0005898034058936088,
      "loss": 3.117,
      "step": 19176
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7918256521224976,
      "learning_rate": 0.000589802348457804,
      "loss": 2.9971,
      "step": 19177
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.40035343170166,
      "learning_rate": 0.0005898012909681193,
      "loss": 2.9458,
      "step": 19178
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.741014003753662,
      "learning_rate": 0.0005898002334245553,
      "loss": 2.8088,
      "step": 19179
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2128374576568604,
      "learning_rate": 0.0005897991758271118,
      "loss": 3.1988,
      "step": 19180
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2540476322174072,
      "learning_rate": 0.0005897981181757892,
      "loss": 3.0214,
      "step": 19181
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.921948790550232,
      "learning_rate": 0.0005897970604705877,
      "loss": 3.0985,
      "step": 19182
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.093841314315796,
      "learning_rate": 0.0005897960027115074,
      "loss": 3.0876,
      "step": 19183
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.5139284133911133,
      "learning_rate": 0.0005897949448985485,
      "loss": 3.1433,
      "step": 19184
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.5315651893615723,
      "learning_rate": 0.0005897938870317115,
      "loss": 2.978,
      "step": 19185
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.284592390060425,
      "learning_rate": 0.0005897928291109961,
      "loss": 3.0147,
      "step": 19186
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4805645942687988,
      "learning_rate": 0.0005897917711364028,
      "loss": 3.0326,
      "step": 19187
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.132395029067993,
      "learning_rate": 0.0005897907131079318,
      "loss": 3.1888,
      "step": 19188
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6989251375198364,
      "learning_rate": 0.0005897896550255832,
      "loss": 2.9082,
      "step": 19189
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.424203634262085,
      "learning_rate": 0.0005897885968893572,
      "loss": 2.9717,
      "step": 19190
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6569907665252686,
      "learning_rate": 0.0005897875386992541,
      "loss": 3.0562,
      "step": 19191
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.3932642936706543,
      "learning_rate": 0.000589786480455274,
      "loss": 3.3416,
      "step": 19192
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7271078824996948,
      "learning_rate": 0.0005897854221574171,
      "loss": 3.0323,
      "step": 19193
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5263065099716187,
      "learning_rate": 0.0005897843638056836,
      "loss": 3.1101,
      "step": 19194
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9989547729492188,
      "learning_rate": 0.0005897833054000738,
      "loss": 3.1236,
      "step": 19195
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6651482582092285,
      "learning_rate": 0.0005897822469405877,
      "loss": 3.1486,
      "step": 19196
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.199459433555603,
      "learning_rate": 0.0005897811884272257,
      "loss": 3.0757,
      "step": 19197
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5547256469726562,
      "learning_rate": 0.0005897801298599878,
      "loss": 3.0425,
      "step": 19198
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6321626901626587,
      "learning_rate": 0.0005897790712388745,
      "loss": 3.1405,
      "step": 19199
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4848123788833618,
      "learning_rate": 0.0005897780125638857,
      "loss": 3.0938,
      "step": 19200
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3153789043426514,
      "learning_rate": 0.0005897769538350216,
      "loss": 3.1386,
      "step": 19201
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9789319038391113,
      "learning_rate": 0.0005897758950522826,
      "loss": 2.8762,
      "step": 19202
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.360695242881775,
      "learning_rate": 0.0005897748362156688,
      "loss": 3.2435,
      "step": 19203
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4296824932098389,
      "learning_rate": 0.0005897737773251803,
      "loss": 3.2445,
      "step": 19204
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.396649956703186,
      "learning_rate": 0.0005897727183808174,
      "loss": 3.1098,
      "step": 19205
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5721139907836914,
      "learning_rate": 0.0005897716593825803,
      "loss": 3.0837,
      "step": 19206
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5728672742843628,
      "learning_rate": 0.0005897706003304692,
      "loss": 2.9775,
      "step": 19207
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1190907955169678,
      "learning_rate": 0.0005897695412244844,
      "loss": 3.1307,
      "step": 19208
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3146535158157349,
      "learning_rate": 0.0005897684820646258,
      "loss": 3.001,
      "step": 19209
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4517545700073242,
      "learning_rate": 0.0005897674228508938,
      "loss": 2.9122,
      "step": 19210
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4559245109558105,
      "learning_rate": 0.0005897663635832886,
      "loss": 3.3089,
      "step": 19211
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5952081680297852,
      "learning_rate": 0.0005897653042618103,
      "loss": 3.0712,
      "step": 19212
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4820787906646729,
      "learning_rate": 0.0005897642448864593,
      "loss": 3.2828,
      "step": 19213
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4633985757827759,
      "learning_rate": 0.0005897631854572355,
      "loss": 2.9356,
      "step": 19214
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4839493036270142,
      "learning_rate": 0.0005897621259741393,
      "loss": 2.9626,
      "step": 19215
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5899286270141602,
      "learning_rate": 0.0005897610664371708,
      "loss": 3.0547,
      "step": 19216
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4747097492218018,
      "learning_rate": 0.0005897600068463304,
      "loss": 3.2796,
      "step": 19217
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3432879447937012,
      "learning_rate": 0.000589758947201618,
      "loss": 3.3673,
      "step": 19218
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.3429324626922607,
      "learning_rate": 0.0005897578875030341,
      "loss": 3.1511,
      "step": 19219
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.676015019416809,
      "learning_rate": 0.0005897568277505786,
      "loss": 3.2296,
      "step": 19220
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7016172409057617,
      "learning_rate": 0.0005897557679442519,
      "loss": 2.9865,
      "step": 19221
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6394001245498657,
      "learning_rate": 0.0005897547080840541,
      "loss": 2.8563,
      "step": 19222
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4751332998275757,
      "learning_rate": 0.0005897536481699855,
      "loss": 3.2433,
      "step": 19223
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4868576526641846,
      "learning_rate": 0.0005897525882020461,
      "loss": 2.9852,
      "step": 19224
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6386255025863647,
      "learning_rate": 0.0005897515281802363,
      "loss": 2.9784,
      "step": 19225
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6428425312042236,
      "learning_rate": 0.0005897504681045563,
      "loss": 3.2291,
      "step": 19226
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7275021076202393,
      "learning_rate": 0.0005897494079750061,
      "loss": 3.066,
      "step": 19227
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2936270236968994,
      "learning_rate": 0.000589748347791586,
      "loss": 2.9276,
      "step": 19228
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7538305521011353,
      "learning_rate": 0.0005897472875542963,
      "loss": 2.9841,
      "step": 19229
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.5101258754730225,
      "learning_rate": 0.0005897462272631371,
      "loss": 3.1094,
      "step": 19230
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5715668201446533,
      "learning_rate": 0.0005897451669181085,
      "loss": 3.0394,
      "step": 19231
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.31900155544281,
      "learning_rate": 0.0005897441065192109,
      "loss": 3.0939,
      "step": 19232
    },
    {
      "epoch": 0.25,
      "grad_norm": 4.161891937255859,
      "learning_rate": 0.0005897430460664444,
      "loss": 2.8771,
      "step": 19233
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5374345779418945,
      "learning_rate": 0.0005897419855598092,
      "loss": 3.2274,
      "step": 19234
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4624526500701904,
      "learning_rate": 0.0005897409249993054,
      "loss": 3.1287,
      "step": 19235
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6758564710617065,
      "learning_rate": 0.0005897398643849334,
      "loss": 3.3532,
      "step": 19236
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.414083480834961,
      "learning_rate": 0.0005897388037166933,
      "loss": 3.1623,
      "step": 19237
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5302921533584595,
      "learning_rate": 0.0005897377429945851,
      "loss": 3.2665,
      "step": 19238
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.176011562347412,
      "learning_rate": 0.0005897366822186095,
      "loss": 3.2283,
      "step": 19239
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4696369171142578,
      "learning_rate": 0.0005897356213887662,
      "loss": 3.2906,
      "step": 19240
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.701939344406128,
      "learning_rate": 0.0005897345605050555,
      "loss": 3.0282,
      "step": 19241
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.143089771270752,
      "learning_rate": 0.0005897334995674778,
      "loss": 2.9221,
      "step": 19242
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5285590887069702,
      "learning_rate": 0.0005897324385760332,
      "loss": 3.1119,
      "step": 19243
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.442229151725769,
      "learning_rate": 0.0005897313775307218,
      "loss": 2.8358,
      "step": 19244
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0853545665740967,
      "learning_rate": 0.0005897303164315438,
      "loss": 3.3547,
      "step": 19245
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.12060284614563,
      "learning_rate": 0.0005897292552784996,
      "loss": 3.2714,
      "step": 19246
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3838094472885132,
      "learning_rate": 0.0005897281940715893,
      "loss": 3.0171,
      "step": 19247
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3696391582489014,
      "learning_rate": 0.0005897271328108129,
      "loss": 3.466,
      "step": 19248
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7362560033798218,
      "learning_rate": 0.0005897260714961708,
      "loss": 3.0629,
      "step": 19249
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4725335836410522,
      "learning_rate": 0.0005897250101276632,
      "loss": 3.0652,
      "step": 19250
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8877545595169067,
      "learning_rate": 0.0005897239487052902,
      "loss": 3.0012,
      "step": 19251
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.424943208694458,
      "learning_rate": 0.0005897228872290522,
      "loss": 3.2681,
      "step": 19252
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3597723245620728,
      "learning_rate": 0.0005897218256989491,
      "loss": 3.1412,
      "step": 19253
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5176219940185547,
      "learning_rate": 0.0005897207641149812,
      "loss": 2.9757,
      "step": 19254
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3764783143997192,
      "learning_rate": 0.0005897197024771488,
      "loss": 3.3325,
      "step": 19255
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1264054775238037,
      "learning_rate": 0.0005897186407854521,
      "loss": 3.0733,
      "step": 19256
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4517639875411987,
      "learning_rate": 0.0005897175790398911,
      "loss": 3.1415,
      "step": 19257
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3488401174545288,
      "learning_rate": 0.0005897165172404663,
      "loss": 3.0156,
      "step": 19258
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3161659240722656,
      "learning_rate": 0.0005897154553871776,
      "loss": 2.9361,
      "step": 19259
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5101529359817505,
      "learning_rate": 0.0005897143934800253,
      "loss": 2.9401,
      "step": 19260
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1295695304870605,
      "learning_rate": 0.0005897133315190098,
      "loss": 3.0168,
      "step": 19261
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0432610511779785,
      "learning_rate": 0.000589712269504131,
      "loss": 3.1392,
      "step": 19262
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.6644177436828613,
      "learning_rate": 0.0005897112074353892,
      "loss": 2.9448,
      "step": 19263
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8363196849822998,
      "learning_rate": 0.0005897101453127847,
      "loss": 3.0296,
      "step": 19264
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4388656616210938,
      "learning_rate": 0.0005897090831363176,
      "loss": 3.1768,
      "step": 19265
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7408524751663208,
      "learning_rate": 0.0005897080209059881,
      "loss": 3.0309,
      "step": 19266
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.408733606338501,
      "learning_rate": 0.0005897069586217964,
      "loss": 2.99,
      "step": 19267
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3188694715499878,
      "learning_rate": 0.0005897058962837427,
      "loss": 3.2323,
      "step": 19268
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7003672122955322,
      "learning_rate": 0.0005897048338918273,
      "loss": 3.1749,
      "step": 19269
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6229891777038574,
      "learning_rate": 0.0005897037714460502,
      "loss": 3.0188,
      "step": 19270
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8589706420898438,
      "learning_rate": 0.0005897027089464116,
      "loss": 3.0805,
      "step": 19271
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6520333290100098,
      "learning_rate": 0.0005897016463929119,
      "loss": 3.007,
      "step": 19272
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5638346672058105,
      "learning_rate": 0.0005897005837855512,
      "loss": 3.0914,
      "step": 19273
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.403322696685791,
      "learning_rate": 0.0005896995211243297,
      "loss": 3.061,
      "step": 19274
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.50254225730896,
      "learning_rate": 0.0005896984584092477,
      "loss": 3.3,
      "step": 19275
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9043530225753784,
      "learning_rate": 0.0005896973956403052,
      "loss": 2.9314,
      "step": 19276
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.684106707572937,
      "learning_rate": 0.0005896963328175024,
      "loss": 3.1183,
      "step": 19277
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.508970260620117,
      "learning_rate": 0.0005896952699408396,
      "loss": 3.0372,
      "step": 19278
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.267030954360962,
      "learning_rate": 0.0005896942070103172,
      "loss": 3.0047,
      "step": 19279
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7428035736083984,
      "learning_rate": 0.0005896931440259349,
      "loss": 3.295,
      "step": 19280
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.7889597415924072,
      "learning_rate": 0.0005896920809876933,
      "loss": 3.1203,
      "step": 19281
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.8635644912719727,
      "learning_rate": 0.0005896910178955924,
      "loss": 3.156,
      "step": 19282
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.710176467895508,
      "learning_rate": 0.0005896899547496327,
      "loss": 2.9321,
      "step": 19283
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8662724494934082,
      "learning_rate": 0.0005896888915498139,
      "loss": 3.0144,
      "step": 19284
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.135667324066162,
      "learning_rate": 0.0005896878282961365,
      "loss": 3.0897,
      "step": 19285
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.6536028385162354,
      "learning_rate": 0.0005896867649886008,
      "loss": 3.2641,
      "step": 19286
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.1945149898529053,
      "learning_rate": 0.0005896857016272068,
      "loss": 3.1654,
      "step": 19287
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.515095829963684,
      "learning_rate": 0.0005896846382119547,
      "loss": 3.2566,
      "step": 19288
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9746447801589966,
      "learning_rate": 0.0005896835747428448,
      "loss": 3.2435,
      "step": 19289
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.437141180038452,
      "learning_rate": 0.0005896825112198773,
      "loss": 3.1816,
      "step": 19290
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2837822437286377,
      "learning_rate": 0.0005896814476430524,
      "loss": 3.2359,
      "step": 19291
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.82298743724823,
      "learning_rate": 0.0005896803840123702,
      "loss": 3.2641,
      "step": 19292
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8716627359390259,
      "learning_rate": 0.0005896793203278308,
      "loss": 2.9478,
      "step": 19293
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.789313793182373,
      "learning_rate": 0.0005896782565894348,
      "loss": 3.1338,
      "step": 19294
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0095348358154297,
      "learning_rate": 0.0005896771927971819,
      "loss": 3.2662,
      "step": 19295
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.404083013534546,
      "learning_rate": 0.0005896761289510726,
      "loss": 3.0126,
      "step": 19296
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5790258646011353,
      "learning_rate": 0.0005896750650511072,
      "loss": 3.1354,
      "step": 19297
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4932594299316406,
      "learning_rate": 0.0005896740010972856,
      "loss": 3.3816,
      "step": 19298
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1987106800079346,
      "learning_rate": 0.0005896729370896081,
      "loss": 3.0806,
      "step": 19299
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3835585117340088,
      "learning_rate": 0.000589671873028075,
      "loss": 3.1231,
      "step": 19300
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5969938039779663,
      "learning_rate": 0.0005896708089126864,
      "loss": 3.0999,
      "step": 19301
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.486515760421753,
      "learning_rate": 0.0005896697447434426,
      "loss": 2.9229,
      "step": 19302
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.369863986968994,
      "learning_rate": 0.0005896686805203436,
      "loss": 2.9477,
      "step": 19303
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4119848012924194,
      "learning_rate": 0.00058966761624339,
      "loss": 3.1962,
      "step": 19304
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6928154230117798,
      "learning_rate": 0.0005896665519125815,
      "loss": 2.949,
      "step": 19305
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.6397337913513184,
      "learning_rate": 0.0005896654875279185,
      "loss": 2.9014,
      "step": 19306
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6902093887329102,
      "learning_rate": 0.0005896644230894013,
      "loss": 2.8531,
      "step": 19307
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4334075450897217,
      "learning_rate": 0.0005896633585970299,
      "loss": 3.2133,
      "step": 19308
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6615217924118042,
      "learning_rate": 0.0005896622940508048,
      "loss": 3.1523,
      "step": 19309
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5273185968399048,
      "learning_rate": 0.0005896612294507259,
      "loss": 3.3267,
      "step": 19310
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5176823139190674,
      "learning_rate": 0.0005896601647967934,
      "loss": 3.2459,
      "step": 19311
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4281026124954224,
      "learning_rate": 0.0005896591000890078,
      "loss": 3.2044,
      "step": 19312
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5380405187606812,
      "learning_rate": 0.000589658035327369,
      "loss": 3.0574,
      "step": 19313
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4067915678024292,
      "learning_rate": 0.0005896569705118774,
      "loss": 2.7913,
      "step": 19314
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4786487817764282,
      "learning_rate": 0.000589655905642533,
      "loss": 3.1895,
      "step": 19315
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.687305212020874,
      "learning_rate": 0.0005896548407193362,
      "loss": 2.9142,
      "step": 19316
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6402521133422852,
      "learning_rate": 0.0005896537757422869,
      "loss": 3.1134,
      "step": 19317
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5743699073791504,
      "learning_rate": 0.0005896527107113858,
      "loss": 3.0296,
      "step": 19318
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9759447574615479,
      "learning_rate": 0.0005896516456266325,
      "loss": 2.9466,
      "step": 19319
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7229442596435547,
      "learning_rate": 0.0005896505804880276,
      "loss": 2.9318,
      "step": 19320
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7328855991363525,
      "learning_rate": 0.0005896495152955712,
      "loss": 3.182,
      "step": 19321
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.644179344177246,
      "learning_rate": 0.0005896484500492635,
      "loss": 3.0762,
      "step": 19322
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8902684450149536,
      "learning_rate": 0.0005896473847491046,
      "loss": 3.1973,
      "step": 19323
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.4570024013519287,
      "learning_rate": 0.000589646319395095,
      "loss": 2.9652,
      "step": 19324
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7422505617141724,
      "learning_rate": 0.0005896452539872345,
      "loss": 3.0968,
      "step": 19325
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.4883487224578857,
      "learning_rate": 0.0005896441885255235,
      "loss": 3.3011,
      "step": 19326
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.1947546005249023,
      "learning_rate": 0.0005896431230099622,
      "loss": 2.9187,
      "step": 19327
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.103872060775757,
      "learning_rate": 0.0005896420574405507,
      "loss": 3.0952,
      "step": 19328
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2876442670822144,
      "learning_rate": 0.0005896409918172894,
      "loss": 3.0055,
      "step": 19329
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7247657775878906,
      "learning_rate": 0.0005896399261401783,
      "loss": 3.1704,
      "step": 19330
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9516550302505493,
      "learning_rate": 0.0005896388604092178,
      "loss": 3.244,
      "step": 19331
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.010194778442383,
      "learning_rate": 0.0005896377946244078,
      "loss": 2.9876,
      "step": 19332
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5062594413757324,
      "learning_rate": 0.0005896367287857487,
      "loss": 3.031,
      "step": 19333
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1717429161071777,
      "learning_rate": 0.0005896356628932407,
      "loss": 3.1303,
      "step": 19334
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.979098916053772,
      "learning_rate": 0.000589634596946884,
      "loss": 3.1667,
      "step": 19335
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6552598476409912,
      "learning_rate": 0.0005896335309466788,
      "loss": 3.1203,
      "step": 19336
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4421358108520508,
      "learning_rate": 0.000589632464892625,
      "loss": 3.0207,
      "step": 19337
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.575028419494629,
      "learning_rate": 0.0005896313987847233,
      "loss": 3.1304,
      "step": 19338
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5160856246948242,
      "learning_rate": 0.0005896303326229736,
      "loss": 3.1313,
      "step": 19339
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6138296127319336,
      "learning_rate": 0.0005896292664073761,
      "loss": 3.1013,
      "step": 19340
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9103000164031982,
      "learning_rate": 0.0005896282001379311,
      "loss": 3.0861,
      "step": 19341
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.564461350440979,
      "learning_rate": 0.0005896271338146387,
      "loss": 2.9023,
      "step": 19342
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3086575269699097,
      "learning_rate": 0.0005896260674374993,
      "loss": 3.1945,
      "step": 19343
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.4467337131500244,
      "learning_rate": 0.0005896250010065128,
      "loss": 3.151,
      "step": 19344
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.073714256286621,
      "learning_rate": 0.0005896239345216795,
      "loss": 3.1944,
      "step": 19345
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.200301170349121,
      "learning_rate": 0.0005896228679829998,
      "loss": 3.0594,
      "step": 19346
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9053443670272827,
      "learning_rate": 0.0005896218013904738,
      "loss": 3.1745,
      "step": 19347
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.7547576427459717,
      "learning_rate": 0.0005896207347441014,
      "loss": 3.16,
      "step": 19348
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.52624249458313,
      "learning_rate": 0.0005896196680438831,
      "loss": 3.0764,
      "step": 19349
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2342984676361084,
      "learning_rate": 0.0005896186012898191,
      "loss": 3.1631,
      "step": 19350
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5964795351028442,
      "learning_rate": 0.0005896175344819095,
      "loss": 3.168,
      "step": 19351
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.259934425354004,
      "learning_rate": 0.0005896164676201546,
      "loss": 3.0794,
      "step": 19352
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7666916847229004,
      "learning_rate": 0.0005896154007045544,
      "loss": 3.0219,
      "step": 19353
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.348029851913452,
      "learning_rate": 0.0005896143337351094,
      "loss": 2.9307,
      "step": 19354
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.348068118095398,
      "learning_rate": 0.0005896132667118195,
      "loss": 3.1524,
      "step": 19355
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3984178304672241,
      "learning_rate": 0.0005896121996346852,
      "loss": 2.9134,
      "step": 19356
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4312211275100708,
      "learning_rate": 0.0005896111325037063,
      "loss": 3.2961,
      "step": 19357
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.108175754547119,
      "learning_rate": 0.0005896100653188833,
      "loss": 2.8917,
      "step": 19358
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7493245601654053,
      "learning_rate": 0.0005896089980802164,
      "loss": 2.9424,
      "step": 19359
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.437476396560669,
      "learning_rate": 0.0005896079307877056,
      "loss": 3.1381,
      "step": 19360
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9877245426177979,
      "learning_rate": 0.0005896068634413513,
      "loss": 3.0892,
      "step": 19361
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.080047845840454,
      "learning_rate": 0.0005896057960411537,
      "loss": 3.1324,
      "step": 19362
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4063419103622437,
      "learning_rate": 0.0005896047285871128,
      "loss": 3.4379,
      "step": 19363
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.5458736419677734,
      "learning_rate": 0.0005896036610792289,
      "loss": 3.0463,
      "step": 19364
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.69461727142334,
      "learning_rate": 0.0005896025935175022,
      "loss": 3.2637,
      "step": 19365
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0284178256988525,
      "learning_rate": 0.0005896015259019329,
      "loss": 2.8925,
      "step": 19366
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6327745914459229,
      "learning_rate": 0.0005896004582325214,
      "loss": 3.1943,
      "step": 19367
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8984863758087158,
      "learning_rate": 0.0005895993905092675,
      "loss": 3.175,
      "step": 19368
    },
    {
      "epoch": 0.25,
      "grad_norm": 3.078624963760376,
      "learning_rate": 0.0005895983227321717,
      "loss": 3.1764,
      "step": 19369
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4618234634399414,
      "learning_rate": 0.0005895972549012341,
      "loss": 3.1625,
      "step": 19370
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.518689751625061,
      "learning_rate": 0.0005895961870164549,
      "loss": 3.2001,
      "step": 19371
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.345585823059082,
      "learning_rate": 0.0005895951190778342,
      "loss": 3.039,
      "step": 19372
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.5676491260528564,
      "learning_rate": 0.0005895940510853724,
      "loss": 3.186,
      "step": 19373
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3941373825073242,
      "learning_rate": 0.0005895929830390696,
      "loss": 3.0,
      "step": 19374
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4360840320587158,
      "learning_rate": 0.000589591914938926,
      "loss": 2.9813,
      "step": 19375
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7679344415664673,
      "learning_rate": 0.0005895908467849418,
      "loss": 3.1209,
      "step": 19376
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7617381811141968,
      "learning_rate": 0.0005895897785771172,
      "loss": 3.1572,
      "step": 19377
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5751618146896362,
      "learning_rate": 0.0005895887103154524,
      "loss": 3.3121,
      "step": 19378
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4238600730895996,
      "learning_rate": 0.0005895876419999476,
      "loss": 3.1914,
      "step": 19379
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9399176836013794,
      "learning_rate": 0.0005895865736306029,
      "loss": 2.9743,
      "step": 19380
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.728778600692749,
      "learning_rate": 0.0005895855052074187,
      "loss": 2.9431,
      "step": 19381
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5605686902999878,
      "learning_rate": 0.000589584436730395,
      "loss": 3.1331,
      "step": 19382
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.7227609157562256,
      "learning_rate": 0.0005895833681995321,
      "loss": 3.1119,
      "step": 19383
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3980331420898438,
      "learning_rate": 0.0005895822996148302,
      "loss": 3.0562,
      "step": 19384
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4861195087432861,
      "learning_rate": 0.0005895812309762895,
      "loss": 2.961,
      "step": 19385
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4464282989501953,
      "learning_rate": 0.0005895801622839102,
      "loss": 3.0481,
      "step": 19386
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7886393070220947,
      "learning_rate": 0.0005895790935376924,
      "loss": 3.3256,
      "step": 19387
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8356729745864868,
      "learning_rate": 0.0005895780247376365,
      "loss": 3.1152,
      "step": 19388
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5898325443267822,
      "learning_rate": 0.0005895769558837425,
      "loss": 2.9262,
      "step": 19389
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4765913486480713,
      "learning_rate": 0.0005895758869760106,
      "loss": 2.797,
      "step": 19390
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9505754709243774,
      "learning_rate": 0.0005895748180144413,
      "loss": 2.9261,
      "step": 19391
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3844261169433594,
      "learning_rate": 0.0005895737489990344,
      "loss": 3.2203,
      "step": 19392
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8935091495513916,
      "learning_rate": 0.0005895726799297903,
      "loss": 3.0545,
      "step": 19393
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7429999113082886,
      "learning_rate": 0.0005895716108067091,
      "loss": 2.9385,
      "step": 19394
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9817448854446411,
      "learning_rate": 0.0005895705416297912,
      "loss": 3.1848,
      "step": 19395
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3904359340667725,
      "learning_rate": 0.0005895694723990366,
      "loss": 3.0481,
      "step": 19396
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.8931708335876465,
      "learning_rate": 0.0005895684031144456,
      "loss": 2.795,
      "step": 19397
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.067572832107544,
      "learning_rate": 0.0005895673337760185,
      "loss": 3.0369,
      "step": 19398
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9700126647949219,
      "learning_rate": 0.0005895662643837553,
      "loss": 3.0929,
      "step": 19399
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.4371955394744873,
      "learning_rate": 0.0005895651949376561,
      "loss": 3.1804,
      "step": 19400
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4135866165161133,
      "learning_rate": 0.0005895641254377213,
      "loss": 3.2859,
      "step": 19401
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.421295404434204,
      "learning_rate": 0.0005895630558839512,
      "loss": 3.0656,
      "step": 19402
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.1510140895843506,
      "learning_rate": 0.0005895619862763458,
      "loss": 3.2356,
      "step": 19403
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6010977029800415,
      "learning_rate": 0.0005895609166149053,
      "loss": 3.3754,
      "step": 19404
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7882800102233887,
      "learning_rate": 0.0005895598468996301,
      "loss": 3.0401,
      "step": 19405
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3893334865570068,
      "learning_rate": 0.0005895587771305201,
      "loss": 3.2581,
      "step": 19406
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.735573649406433,
      "learning_rate": 0.0005895577073075757,
      "loss": 3.4601,
      "step": 19407
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4968034029006958,
      "learning_rate": 0.000589556637430797,
      "loss": 3.0981,
      "step": 19408
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4811702966690063,
      "learning_rate": 0.0005895555675001844,
      "loss": 3.3807,
      "step": 19409
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4408208131790161,
      "learning_rate": 0.000589554497515738,
      "loss": 3.2865,
      "step": 19410
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4019850492477417,
      "learning_rate": 0.0005895534274774578,
      "loss": 3.3155,
      "step": 19411
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3728382587432861,
      "learning_rate": 0.0005895523573853442,
      "loss": 3.2208,
      "step": 19412
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6991833448410034,
      "learning_rate": 0.0005895512872393973,
      "loss": 3.0336,
      "step": 19413
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1804447174072266,
      "learning_rate": 0.0005895502170396175,
      "loss": 3.0262,
      "step": 19414
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4827154874801636,
      "learning_rate": 0.0005895491467860047,
      "loss": 3.2275,
      "step": 19415
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3660049438476562,
      "learning_rate": 0.0005895480764785593,
      "loss": 3.3451,
      "step": 19416
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.587023377418518,
      "learning_rate": 0.0005895470061172814,
      "loss": 3.0755,
      "step": 19417
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9411598443984985,
      "learning_rate": 0.0005895459357021714,
      "loss": 3.3594,
      "step": 19418
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.410507321357727,
      "learning_rate": 0.0005895448652332291,
      "loss": 2.8377,
      "step": 19419
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3682268857955933,
      "learning_rate": 0.0005895437947104551,
      "loss": 3.1126,
      "step": 19420
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6547026634216309,
      "learning_rate": 0.0005895427241338494,
      "loss": 3.0542,
      "step": 19421
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4416314363479614,
      "learning_rate": 0.0005895416535034124,
      "loss": 3.2083,
      "step": 19422
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4903669357299805,
      "learning_rate": 0.0005895405828191439,
      "loss": 3.1768,
      "step": 19423
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4489365816116333,
      "learning_rate": 0.0005895395120810445,
      "loss": 3.0806,
      "step": 19424
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4218961000442505,
      "learning_rate": 0.0005895384412891142,
      "loss": 3.3808,
      "step": 19425
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.411144733428955,
      "learning_rate": 0.0005895373704433532,
      "loss": 3.4132,
      "step": 19426
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3421881198883057,
      "learning_rate": 0.0005895362995437617,
      "loss": 2.8707,
      "step": 19427
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4501551389694214,
      "learning_rate": 0.0005895352285903401,
      "loss": 2.9899,
      "step": 19428
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6818634271621704,
      "learning_rate": 0.0005895341575830884,
      "loss": 3.1509,
      "step": 19429
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4420281648635864,
      "learning_rate": 0.0005895330865220066,
      "loss": 3.0579,
      "step": 19430
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5143860578536987,
      "learning_rate": 0.0005895320154070953,
      "loss": 3.214,
      "step": 19431
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.63545560836792,
      "learning_rate": 0.0005895309442383545,
      "loss": 2.7608,
      "step": 19432
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5460467338562012,
      "learning_rate": 0.0005895298730157845,
      "loss": 3.1139,
      "step": 19433
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4581979513168335,
      "learning_rate": 0.0005895288017393855,
      "loss": 3.303,
      "step": 19434
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5220866203308105,
      "learning_rate": 0.0005895277304091575,
      "loss": 2.9584,
      "step": 19435
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2822682857513428,
      "learning_rate": 0.0005895266590251008,
      "loss": 3.2295,
      "step": 19436
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5895237922668457,
      "learning_rate": 0.0005895255875872158,
      "loss": 3.3812,
      "step": 19437
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6960222721099854,
      "learning_rate": 0.0005895245160955023,
      "loss": 3.0336,
      "step": 19438
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.684149146080017,
      "learning_rate": 0.0005895234445499609,
      "loss": 3.1395,
      "step": 19439
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8514745235443115,
      "learning_rate": 0.0005895223729505915,
      "loss": 3.2127,
      "step": 19440
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5227655172348022,
      "learning_rate": 0.0005895213012973946,
      "loss": 2.9044,
      "step": 19441
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4626754522323608,
      "learning_rate": 0.00058952022959037,
      "loss": 3.2627,
      "step": 19442
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3114173412322998,
      "learning_rate": 0.0005895191578295183,
      "loss": 3.2512,
      "step": 19443
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2328715324401855,
      "learning_rate": 0.0005895180860148394,
      "loss": 3.0524,
      "step": 19444
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5985567569732666,
      "learning_rate": 0.0005895170141463337,
      "loss": 3.1443,
      "step": 19445
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.64914870262146,
      "learning_rate": 0.0005895159422240014,
      "loss": 3.0693,
      "step": 19446
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5043915510177612,
      "learning_rate": 0.0005895148702478425,
      "loss": 3.072,
      "step": 19447
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.545539379119873,
      "learning_rate": 0.0005895137982178573,
      "loss": 3.319,
      "step": 19448
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0351924896240234,
      "learning_rate": 0.0005895127261340461,
      "loss": 3.1372,
      "step": 19449
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4129441976547241,
      "learning_rate": 0.000589511653996409,
      "loss": 3.1889,
      "step": 19450
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7159262895584106,
      "learning_rate": 0.0005895105818049461,
      "loss": 3.061,
      "step": 19451
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5260018110275269,
      "learning_rate": 0.0005895095095596579,
      "loss": 2.9408,
      "step": 19452
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.880748987197876,
      "learning_rate": 0.0005895084372605444,
      "loss": 3.0884,
      "step": 19453
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5241128206253052,
      "learning_rate": 0.0005895073649076057,
      "loss": 3.2363,
      "step": 19454
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0939042568206787,
      "learning_rate": 0.0005895062925008422,
      "loss": 3.0217,
      "step": 19455
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9441773891448975,
      "learning_rate": 0.0005895052200402541,
      "loss": 2.9865,
      "step": 19456
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4273756742477417,
      "learning_rate": 0.0005895041475258414,
      "loss": 3.0554,
      "step": 19457
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9687511920928955,
      "learning_rate": 0.0005895030749576043,
      "loss": 3.0032,
      "step": 19458
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.307649612426758,
      "learning_rate": 0.0005895020023355433,
      "loss": 3.1872,
      "step": 19459
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.707173466682434,
      "learning_rate": 0.0005895009296596584,
      "loss": 3.032,
      "step": 19460
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5083330869674683,
      "learning_rate": 0.0005894998569299497,
      "loss": 3.0612,
      "step": 19461
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.2821249961853027,
      "learning_rate": 0.0005894987841464175,
      "loss": 3.2318,
      "step": 19462
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8975242376327515,
      "learning_rate": 0.0005894977113090621,
      "loss": 3.0724,
      "step": 19463
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.388248085975647,
      "learning_rate": 0.0005894966384178837,
      "loss": 3.1801,
      "step": 19464
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.356257677078247,
      "learning_rate": 0.0005894955654728823,
      "loss": 2.9337,
      "step": 19465
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.7170748710632324,
      "learning_rate": 0.0005894944924740581,
      "loss": 2.9344,
      "step": 19466
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6233899593353271,
      "learning_rate": 0.0005894934194214116,
      "loss": 2.9553,
      "step": 19467
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3554611206054688,
      "learning_rate": 0.0005894923463149427,
      "loss": 3.1767,
      "step": 19468
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.4482316970825195,
      "learning_rate": 0.0005894912731546517,
      "loss": 3.0837,
      "step": 19469
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.5346455574035645,
      "learning_rate": 0.0005894901999405389,
      "loss": 2.9493,
      "step": 19470
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8644342422485352,
      "learning_rate": 0.0005894891266726044,
      "loss": 3.2049,
      "step": 19471
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5233629941940308,
      "learning_rate": 0.0005894880533508484,
      "loss": 3.0811,
      "step": 19472
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8726789951324463,
      "learning_rate": 0.000589486979975271,
      "loss": 3.2976,
      "step": 19473
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0312187671661377,
      "learning_rate": 0.0005894859065458726,
      "loss": 3.2262,
      "step": 19474
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.526496410369873,
      "learning_rate": 0.0005894848330626533,
      "loss": 3.0632,
      "step": 19475
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3116250038146973,
      "learning_rate": 0.0005894837595256132,
      "loss": 3.3943,
      "step": 19476
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7435266971588135,
      "learning_rate": 0.0005894826859347527,
      "loss": 3.51,
      "step": 19477
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.532999038696289,
      "learning_rate": 0.0005894816122900719,
      "loss": 2.8773,
      "step": 19478
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7488794326782227,
      "learning_rate": 0.000589480538591571,
      "loss": 3.0368,
      "step": 19479
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3380626440048218,
      "learning_rate": 0.0005894794648392502,
      "loss": 3.0746,
      "step": 19480
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.338573694229126,
      "learning_rate": 0.0005894783910331097,
      "loss": 3.0394,
      "step": 19481
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6001473665237427,
      "learning_rate": 0.0005894773171731498,
      "loss": 3.1723,
      "step": 19482
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3852252960205078,
      "learning_rate": 0.0005894762432593705,
      "loss": 3.0946,
      "step": 19483
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7283539772033691,
      "learning_rate": 0.0005894751692917721,
      "loss": 3.2191,
      "step": 19484
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.4824650287628174,
      "learning_rate": 0.0005894740952703549,
      "loss": 2.9958,
      "step": 19485
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.964242696762085,
      "learning_rate": 0.0005894730211951188,
      "loss": 3.0988,
      "step": 19486
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2526428699493408,
      "learning_rate": 0.0005894719470660644,
      "loss": 3.1144,
      "step": 19487
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8634613752365112,
      "learning_rate": 0.0005894708728831915,
      "loss": 3.1265,
      "step": 19488
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9472585916519165,
      "learning_rate": 0.0005894697986465006,
      "loss": 3.1605,
      "step": 19489
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9108521938323975,
      "learning_rate": 0.0005894687243559919,
      "loss": 2.9038,
      "step": 19490
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4751886129379272,
      "learning_rate": 0.0005894676500116654,
      "loss": 2.9646,
      "step": 19491
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.810931444168091,
      "learning_rate": 0.0005894665756135214,
      "loss": 3.0359,
      "step": 19492
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.472135305404663,
      "learning_rate": 0.0005894655011615601,
      "loss": 3.1516,
      "step": 19493
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.193835973739624,
      "learning_rate": 0.0005894644266557817,
      "loss": 2.978,
      "step": 19494
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.429388165473938,
      "learning_rate": 0.0005894633520961864,
      "loss": 3.3586,
      "step": 19495
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.007575273513794,
      "learning_rate": 0.0005894622774827745,
      "loss": 2.8731,
      "step": 19496
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.3582401275634766,
      "learning_rate": 0.000589461202815546,
      "loss": 3.0577,
      "step": 19497
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.031111717224121,
      "learning_rate": 0.0005894601280945011,
      "loss": 2.9394,
      "step": 19498
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.889717936515808,
      "learning_rate": 0.0005894590533196403,
      "loss": 2.9423,
      "step": 19499
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6889116764068604,
      "learning_rate": 0.0005894579784909634,
      "loss": 3.1823,
      "step": 19500
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.863573431968689,
      "learning_rate": 0.000589456903608471,
      "loss": 2.776,
      "step": 19501
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3042988777160645,
      "learning_rate": 0.000589455828672163,
      "loss": 3.0074,
      "step": 19502
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4818087816238403,
      "learning_rate": 0.0005894547536820397,
      "loss": 3.1505,
      "step": 19503
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.0223124027252197,
      "learning_rate": 0.0005894536786381013,
      "loss": 3.2503,
      "step": 19504
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6802771091461182,
      "learning_rate": 0.000589452603540348,
      "loss": 3.1527,
      "step": 19505
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.448764443397522,
      "learning_rate": 0.0005894515283887801,
      "loss": 3.036,
      "step": 19506
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.419956922531128,
      "learning_rate": 0.0005894504531833975,
      "loss": 3.2913,
      "step": 19507
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5882055759429932,
      "learning_rate": 0.0005894493779242008,
      "loss": 2.9839,
      "step": 19508
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5457172393798828,
      "learning_rate": 0.0005894483026111898,
      "loss": 3.2179,
      "step": 19509
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3977402448654175,
      "learning_rate": 0.0005894472272443649,
      "loss": 2.9817,
      "step": 19510
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7205450534820557,
      "learning_rate": 0.0005894461518237264,
      "loss": 3.1125,
      "step": 19511
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4681212902069092,
      "learning_rate": 0.0005894450763492744,
      "loss": 3.283,
      "step": 19512
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4496911764144897,
      "learning_rate": 0.0005894440008210091,
      "loss": 3.0888,
      "step": 19513
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.819547414779663,
      "learning_rate": 0.0005894429252389306,
      "loss": 3.2166,
      "step": 19514
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5057916641235352,
      "learning_rate": 0.0005894418496030393,
      "loss": 3.5719,
      "step": 19515
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.820252776145935,
      "learning_rate": 0.0005894407739133352,
      "loss": 3.2226,
      "step": 19516
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6199727058410645,
      "learning_rate": 0.0005894396981698187,
      "loss": 3.2897,
      "step": 19517
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.37799870967865,
      "learning_rate": 0.0005894386223724899,
      "loss": 2.954,
      "step": 19518
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5769845247268677,
      "learning_rate": 0.000589437546521349,
      "loss": 3.1945,
      "step": 19519
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6980204582214355,
      "learning_rate": 0.000589436470616396,
      "loss": 2.9004,
      "step": 19520
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6222625970840454,
      "learning_rate": 0.0005894353946576315,
      "loss": 3.25,
      "step": 19521
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5459362268447876,
      "learning_rate": 0.0005894343186450554,
      "loss": 3.1063,
      "step": 19522
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5427252054214478,
      "learning_rate": 0.0005894332425786681,
      "loss": 3.086,
      "step": 19523
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7065424919128418,
      "learning_rate": 0.0005894321664584697,
      "loss": 3.2051,
      "step": 19524
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2691614627838135,
      "learning_rate": 0.0005894310902844603,
      "loss": 3.2527,
      "step": 19525
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4374184608459473,
      "learning_rate": 0.0005894300140566402,
      "loss": 3.1342,
      "step": 19526
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5281180143356323,
      "learning_rate": 0.0005894289377750096,
      "loss": 3.1298,
      "step": 19527
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6951981782913208,
      "learning_rate": 0.0005894278614395687,
      "loss": 3.1948,
      "step": 19528
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8355408906936646,
      "learning_rate": 0.0005894267850503178,
      "loss": 3.2417,
      "step": 19529
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6321007013320923,
      "learning_rate": 0.0005894257086072569,
      "loss": 2.9285,
      "step": 19530
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4443714618682861,
      "learning_rate": 0.0005894246321103862,
      "loss": 3.0119,
      "step": 19531
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5882682800292969,
      "learning_rate": 0.0005894235555597061,
      "loss": 3.0467,
      "step": 19532
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5297033786773682,
      "learning_rate": 0.0005894224789552168,
      "loss": 2.9386,
      "step": 19533
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7753244638442993,
      "learning_rate": 0.0005894214022969183,
      "loss": 2.9919,
      "step": 19534
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.619094729423523,
      "learning_rate": 0.0005894203255848109,
      "loss": 3.3448,
      "step": 19535
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5482873916625977,
      "learning_rate": 0.0005894192488188948,
      "loss": 3.1557,
      "step": 19536
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5241831541061401,
      "learning_rate": 0.0005894181719991702,
      "loss": 3.1083,
      "step": 19537
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4282435178756714,
      "learning_rate": 0.0005894170951256374,
      "loss": 3.0677,
      "step": 19538
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5361028909683228,
      "learning_rate": 0.0005894160181982963,
      "loss": 3.0408,
      "step": 19539
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4260584115982056,
      "learning_rate": 0.0005894149412171474,
      "loss": 3.0679,
      "step": 19540
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.6057231426239014,
      "learning_rate": 0.0005894138641821908,
      "loss": 3.1347,
      "step": 19541
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.3620896339416504,
      "learning_rate": 0.0005894127870934267,
      "loss": 3.1088,
      "step": 19542
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.549112319946289,
      "learning_rate": 0.0005894117099508553,
      "loss": 3.2912,
      "step": 19543
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4388136863708496,
      "learning_rate": 0.0005894106327544768,
      "loss": 3.048,
      "step": 19544
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.7861993312835693,
      "learning_rate": 0.0005894095555042914,
      "loss": 3.2974,
      "step": 19545
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.199018716812134,
      "learning_rate": 0.0005894084782002994,
      "loss": 2.9788,
      "step": 19546
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5530424118041992,
      "learning_rate": 0.0005894074008425007,
      "loss": 3.0188,
      "step": 19547
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.7488508224487305,
      "learning_rate": 0.0005894063234308958,
      "loss": 3.0727,
      "step": 19548
    },
    {
      "epoch": 0.25,
      "grad_norm": 4.394057750701904,
      "learning_rate": 0.0005894052459654848,
      "loss": 2.8971,
      "step": 19549
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.000366449356079,
      "learning_rate": 0.0005894041684462679,
      "loss": 3.2773,
      "step": 19550
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7699686288833618,
      "learning_rate": 0.0005894030908732453,
      "loss": 3.2327,
      "step": 19551
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.9052348136901855,
      "learning_rate": 0.0005894020132464172,
      "loss": 3.0211,
      "step": 19552
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7800604104995728,
      "learning_rate": 0.0005894009355657838,
      "loss": 3.0114,
      "step": 19553
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5447828769683838,
      "learning_rate": 0.0005893998578313453,
      "loss": 3.2159,
      "step": 19554
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.173179864883423,
      "learning_rate": 0.0005893987800431019,
      "loss": 3.1005,
      "step": 19555
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.677566647529602,
      "learning_rate": 0.0005893977022010539,
      "loss": 3.0611,
      "step": 19556
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5700500011444092,
      "learning_rate": 0.0005893966243052013,
      "loss": 2.7947,
      "step": 19557
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.7226195335388184,
      "learning_rate": 0.0005893955463555444,
      "loss": 3.2763,
      "step": 19558
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3921895027160645,
      "learning_rate": 0.0005893944683520835,
      "loss": 2.9908,
      "step": 19559
    },
    {
      "epoch": 0.25,
      "grad_norm": 2.742901086807251,
      "learning_rate": 0.0005893933902948186,
      "loss": 3.1174,
      "step": 19560
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.433272361755371,
      "learning_rate": 0.0005893923121837501,
      "loss": 3.2042,
      "step": 19561
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3513089418411255,
      "learning_rate": 0.000589391234018878,
      "loss": 2.9611,
      "step": 19562
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.195503830909729,
      "learning_rate": 0.0005893901558002027,
      "loss": 2.8369,
      "step": 19563
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3962723016738892,
      "learning_rate": 0.0005893890775277242,
      "loss": 2.9502,
      "step": 19564
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3678942918777466,
      "learning_rate": 0.0005893879992014429,
      "loss": 3.0965,
      "step": 19565
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3475935459136963,
      "learning_rate": 0.0005893869208213588,
      "loss": 3.2326,
      "step": 19566
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5409735441207886,
      "learning_rate": 0.0005893858423874724,
      "loss": 2.8623,
      "step": 19567
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5506170988082886,
      "learning_rate": 0.0005893847638997836,
      "loss": 3.0572,
      "step": 19568
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3501890897750854,
      "learning_rate": 0.0005893836853582928,
      "loss": 2.9863,
      "step": 19569
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4243086576461792,
      "learning_rate": 0.0005893826067629999,
      "loss": 3.1539,
      "step": 19570
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5302562713623047,
      "learning_rate": 0.0005893815281139055,
      "loss": 3.0159,
      "step": 19571
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8405836820602417,
      "learning_rate": 0.0005893804494110095,
      "loss": 3.2029,
      "step": 19572
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3078835010528564,
      "learning_rate": 0.0005893793706543123,
      "loss": 3.0402,
      "step": 19573
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5830258131027222,
      "learning_rate": 0.000589378291843814,
      "loss": 3.2239,
      "step": 19574
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.4625532627105713,
      "learning_rate": 0.0005893772129795148,
      "loss": 3.1747,
      "step": 19575
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.2706400156021118,
      "learning_rate": 0.0005893761340614148,
      "loss": 3.2879,
      "step": 19576
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.45421302318573,
      "learning_rate": 0.0005893750550895144,
      "loss": 2.892,
      "step": 19577
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.1760802268981934,
      "learning_rate": 0.0005893739760638137,
      "loss": 3.2785,
      "step": 19578
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3973255157470703,
      "learning_rate": 0.000589372896984313,
      "loss": 3.1182,
      "step": 19579
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.3219544887542725,
      "learning_rate": 0.0005893718178510123,
      "loss": 3.2573,
      "step": 19580
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5178018808364868,
      "learning_rate": 0.000589370738663912,
      "loss": 3.1742,
      "step": 19581
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.932264804840088,
      "learning_rate": 0.0005893696594230121,
      "loss": 2.8506,
      "step": 19582
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.8366471529006958,
      "learning_rate": 0.0005893685801283131,
      "loss": 2.9666,
      "step": 19583
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.604835033416748,
      "learning_rate": 0.0005893675007798149,
      "loss": 3.1078,
      "step": 19584
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8917361497879028,
      "learning_rate": 0.0005893664213775179,
      "loss": 3.1788,
      "step": 19585
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.104651689529419,
      "learning_rate": 0.0005893653419214222,
      "loss": 3.0636,
      "step": 19586
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4694676399230957,
      "learning_rate": 0.0005893642624115279,
      "loss": 3.1448,
      "step": 19587
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6357853412628174,
      "learning_rate": 0.0005893631828478354,
      "loss": 2.8915,
      "step": 19588
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8016585111618042,
      "learning_rate": 0.0005893621032303449,
      "loss": 2.9892,
      "step": 19589
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.326375126838684,
      "learning_rate": 0.0005893610235590563,
      "loss": 3.1122,
      "step": 19590
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5555154085159302,
      "learning_rate": 0.0005893599438339703,
      "loss": 3.0253,
      "step": 19591
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.4393014907836914,
      "learning_rate": 0.0005893588640550867,
      "loss": 3.3531,
      "step": 19592
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4951897859573364,
      "learning_rate": 0.0005893577842224059,
      "loss": 3.1282,
      "step": 19593
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2116012573242188,
      "learning_rate": 0.0005893567043359279,
      "loss": 3.0146,
      "step": 19594
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.695883274078369,
      "learning_rate": 0.0005893556243956531,
      "loss": 3.3795,
      "step": 19595
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6243456602096558,
      "learning_rate": 0.0005893545444015816,
      "loss": 3.1819,
      "step": 19596
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.371307373046875,
      "learning_rate": 0.0005893534643537136,
      "loss": 3.1074,
      "step": 19597
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8181095123291016,
      "learning_rate": 0.0005893523842520494,
      "loss": 2.9891,
      "step": 19598
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4168589115142822,
      "learning_rate": 0.0005893513040965891,
      "loss": 3.407,
      "step": 19599
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.659725308418274,
      "learning_rate": 0.0005893502238873328,
      "loss": 3.1124,
      "step": 19600
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5266591310501099,
      "learning_rate": 0.000589349143624281,
      "loss": 3.1577,
      "step": 19601
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5226271152496338,
      "learning_rate": 0.0005893480633074337,
      "loss": 2.8623,
      "step": 19602
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5140631198883057,
      "learning_rate": 0.0005893469829367911,
      "loss": 3.0809,
      "step": 19603
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2080670595169067,
      "learning_rate": 0.0005893459025123535,
      "loss": 3.323,
      "step": 19604
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4130642414093018,
      "learning_rate": 0.000589344822034121,
      "loss": 3.1256,
      "step": 19605
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5628008842468262,
      "learning_rate": 0.0005893437415020938,
      "loss": 2.9595,
      "step": 19606
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3128831386566162,
      "learning_rate": 0.0005893426609162722,
      "loss": 3.0072,
      "step": 19607
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7277714014053345,
      "learning_rate": 0.0005893415802766562,
      "loss": 3.1271,
      "step": 19608
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.869994044303894,
      "learning_rate": 0.0005893404995832463,
      "loss": 3.2209,
      "step": 19609
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6052188873291016,
      "learning_rate": 0.0005893394188360425,
      "loss": 3.3178,
      "step": 19610
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.366927146911621,
      "learning_rate": 0.000589338338035045,
      "loss": 3.151,
      "step": 19611
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7043713331222534,
      "learning_rate": 0.0005893372571802541,
      "loss": 2.8917,
      "step": 19612
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5053796768188477,
      "learning_rate": 0.0005893361762716699,
      "loss": 2.9426,
      "step": 19613
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4192147254943848,
      "learning_rate": 0.0005893350953092927,
      "loss": 3.3339,
      "step": 19614
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3926920890808105,
      "learning_rate": 0.0005893340142931225,
      "loss": 3.3386,
      "step": 19615
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.410740613937378,
      "learning_rate": 0.0005893329332231597,
      "loss": 3.2906,
      "step": 19616
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3663735389709473,
      "learning_rate": 0.0005893318520994045,
      "loss": 2.9428,
      "step": 19617
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.0555002689361572,
      "learning_rate": 0.0005893307709218571,
      "loss": 3.0755,
      "step": 19618
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.6673526763916016,
      "learning_rate": 0.0005893296896905175,
      "loss": 2.9534,
      "step": 19619
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5930503606796265,
      "learning_rate": 0.0005893286084053861,
      "loss": 3.0643,
      "step": 19620
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4614589214324951,
      "learning_rate": 0.0005893275270664631,
      "loss": 2.9643,
      "step": 19621
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.4147109985351562,
      "learning_rate": 0.0005893264456737487,
      "loss": 3.0257,
      "step": 19622
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3778843879699707,
      "learning_rate": 0.0005893253642272429,
      "loss": 3.3576,
      "step": 19623
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6066076755523682,
      "learning_rate": 0.0005893242827269462,
      "loss": 3.1664,
      "step": 19624
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.09771466255188,
      "learning_rate": 0.0005893232011728585,
      "loss": 3.267,
      "step": 19625
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.603393793106079,
      "learning_rate": 0.0005893221195649803,
      "loss": 3.395,
      "step": 19626
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5434162616729736,
      "learning_rate": 0.0005893210379033116,
      "loss": 3.2116,
      "step": 19627
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2646158933639526,
      "learning_rate": 0.0005893199561878527,
      "loss": 3.0853,
      "step": 19628
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3792715072631836,
      "learning_rate": 0.0005893188744186037,
      "loss": 3.2549,
      "step": 19629
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3985847234725952,
      "learning_rate": 0.0005893177925955648,
      "loss": 3.1649,
      "step": 19630
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.09517765045166,
      "learning_rate": 0.0005893167107187364,
      "loss": 2.9288,
      "step": 19631
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2827993631362915,
      "learning_rate": 0.0005893156287881184,
      "loss": 3.1884,
      "step": 19632
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0669338703155518,
      "learning_rate": 0.0005893145468037113,
      "loss": 2.845,
      "step": 19633
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6930474042892456,
      "learning_rate": 0.0005893134647655152,
      "loss": 3.2268,
      "step": 19634
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.343654990196228,
      "learning_rate": 0.0005893123826735302,
      "loss": 3.2806,
      "step": 19635
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9242619276046753,
      "learning_rate": 0.0005893113005277566,
      "loss": 3.0688,
      "step": 19636
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3768125772476196,
      "learning_rate": 0.0005893102183281944,
      "loss": 3.1696,
      "step": 19637
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3558238744735718,
      "learning_rate": 0.0005893091360748441,
      "loss": 3.0745,
      "step": 19638
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.020806312561035,
      "learning_rate": 0.0005893080537677058,
      "loss": 3.4076,
      "step": 19639
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.185908794403076,
      "learning_rate": 0.0005893069714067796,
      "loss": 3.1226,
      "step": 19640
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7711920738220215,
      "learning_rate": 0.0005893058889920659,
      "loss": 2.9385,
      "step": 19641
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.226269006729126,
      "learning_rate": 0.0005893048065235646,
      "loss": 3.1833,
      "step": 19642
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.075377941131592,
      "learning_rate": 0.0005893037240012763,
      "loss": 3.2066,
      "step": 19643
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7064619064331055,
      "learning_rate": 0.0005893026414252008,
      "loss": 3.0631,
      "step": 19644
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.057121515274048,
      "learning_rate": 0.0005893015587953384,
      "loss": 2.9509,
      "step": 19645
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0359816551208496,
      "learning_rate": 0.0005893004761116895,
      "loss": 3.3682,
      "step": 19646
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5512667894363403,
      "learning_rate": 0.0005892993933742542,
      "loss": 2.8893,
      "step": 19647
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6691348552703857,
      "learning_rate": 0.0005892983105830326,
      "loss": 3.121,
      "step": 19648
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0000579357147217,
      "learning_rate": 0.0005892972277380252,
      "loss": 3.1444,
      "step": 19649
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.039233446121216,
      "learning_rate": 0.0005892961448392318,
      "loss": 3.0822,
      "step": 19650
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7465806007385254,
      "learning_rate": 0.0005892950618866528,
      "loss": 3.256,
      "step": 19651
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.079993963241577,
      "learning_rate": 0.0005892939788802883,
      "loss": 3.1778,
      "step": 19652
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3902937173843384,
      "learning_rate": 0.0005892928958201387,
      "loss": 3.2262,
      "step": 19653
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2542774677276611,
      "learning_rate": 0.000589291812706204,
      "loss": 3.1401,
      "step": 19654
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3559131622314453,
      "learning_rate": 0.0005892907295384847,
      "loss": 3.2102,
      "step": 19655
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4746967554092407,
      "learning_rate": 0.0005892896463169806,
      "loss": 3.0477,
      "step": 19656
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7697011232376099,
      "learning_rate": 0.0005892885630416921,
      "loss": 3.07,
      "step": 19657
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0635368824005127,
      "learning_rate": 0.0005892874797126195,
      "loss": 3.0638,
      "step": 19658
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9564355611801147,
      "learning_rate": 0.0005892863963297628,
      "loss": 3.1027,
      "step": 19659
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.796410083770752,
      "learning_rate": 0.0005892853128931225,
      "loss": 2.9406,
      "step": 19660
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4736589193344116,
      "learning_rate": 0.0005892842294026984,
      "loss": 2.9111,
      "step": 19661
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.9938838481903076,
      "learning_rate": 0.0005892831458584909,
      "loss": 2.9478,
      "step": 19662
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5268505811691284,
      "learning_rate": 0.0005892820622605002,
      "loss": 3.0312,
      "step": 19663
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4538233280181885,
      "learning_rate": 0.0005892809786087267,
      "loss": 3.2112,
      "step": 19664
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4696060419082642,
      "learning_rate": 0.0005892798949031702,
      "loss": 3.2438,
      "step": 19665
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8345435857772827,
      "learning_rate": 0.0005892788111438311,
      "loss": 2.9637,
      "step": 19666
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4864915609359741,
      "learning_rate": 0.0005892777273307097,
      "loss": 3.1268,
      "step": 19667
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5839775800704956,
      "learning_rate": 0.0005892766434638061,
      "loss": 3.1582,
      "step": 19668
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.120133399963379,
      "learning_rate": 0.0005892755595431205,
      "loss": 3.1049,
      "step": 19669
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3941469192504883,
      "learning_rate": 0.0005892744755686531,
      "loss": 3.2412,
      "step": 19670
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.711200475692749,
      "learning_rate": 0.0005892733915404041,
      "loss": 3.2653,
      "step": 19671
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4568346738815308,
      "learning_rate": 0.0005892723074583737,
      "loss": 3.0408,
      "step": 19672
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3193085193634033,
      "learning_rate": 0.0005892712233225622,
      "loss": 2.9776,
      "step": 19673
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.378991961479187,
      "learning_rate": 0.0005892701391329697,
      "loss": 2.8386,
      "step": 19674
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8472388982772827,
      "learning_rate": 0.0005892690548895964,
      "loss": 3.0238,
      "step": 19675
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6347765922546387,
      "learning_rate": 0.0005892679705924426,
      "loss": 3.0183,
      "step": 19676
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.453006386756897,
      "learning_rate": 0.0005892668862415082,
      "loss": 3.2621,
      "step": 19677
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.2613253593444824,
      "learning_rate": 0.0005892658018367937,
      "loss": 3.0835,
      "step": 19678
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9113144874572754,
      "learning_rate": 0.0005892647173782994,
      "loss": 2.9513,
      "step": 19679
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3977336883544922,
      "learning_rate": 0.0005892636328660252,
      "loss": 3.2497,
      "step": 19680
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.334722638130188,
      "learning_rate": 0.0005892625482999715,
      "loss": 3.2395,
      "step": 19681
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.6148006916046143,
      "learning_rate": 0.0005892614636801383,
      "loss": 3.0085,
      "step": 19682
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.838723659515381,
      "learning_rate": 0.0005892603790065261,
      "loss": 3.1034,
      "step": 19683
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.899906039237976,
      "learning_rate": 0.0005892592942791349,
      "loss": 3.0678,
      "step": 19684
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.574528455734253,
      "learning_rate": 0.0005892582094979647,
      "loss": 3.0358,
      "step": 19685
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.8837859630584717,
      "learning_rate": 0.0005892571246630162,
      "loss": 3.3153,
      "step": 19686
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.403892755508423,
      "learning_rate": 0.0005892560397742894,
      "loss": 3.0807,
      "step": 19687
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.5007498264312744,
      "learning_rate": 0.0005892549548317842,
      "loss": 3.2029,
      "step": 19688
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.5452005863189697,
      "learning_rate": 0.0005892538698355011,
      "loss": 3.2159,
      "step": 19689
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.949601411819458,
      "learning_rate": 0.0005892527847854404,
      "loss": 3.089,
      "step": 19690
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.2891488075256348,
      "learning_rate": 0.000589251699681602,
      "loss": 2.9439,
      "step": 19691
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4498984813690186,
      "learning_rate": 0.0005892506145239863,
      "loss": 3.1001,
      "step": 19692
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3593554496765137,
      "learning_rate": 0.0005892495293125935,
      "loss": 3.2134,
      "step": 19693
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.91567063331604,
      "learning_rate": 0.0005892484440474237,
      "loss": 3.1692,
      "step": 19694
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5663179159164429,
      "learning_rate": 0.0005892473587284771,
      "loss": 3.0744,
      "step": 19695
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5505555868148804,
      "learning_rate": 0.000589246273355754,
      "loss": 3.1254,
      "step": 19696
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.1159565448760986,
      "learning_rate": 0.0005892451879292546,
      "loss": 3.0386,
      "step": 19697
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7203632593154907,
      "learning_rate": 0.000589244102448979,
      "loss": 3.1378,
      "step": 19698
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6565784215927124,
      "learning_rate": 0.0005892430169149275,
      "loss": 3.2251,
      "step": 19699
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.290062665939331,
      "learning_rate": 0.0005892419313271003,
      "loss": 2.8169,
      "step": 19700
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.467232584953308,
      "learning_rate": 0.0005892408456854975,
      "loss": 3.0987,
      "step": 19701
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4285138845443726,
      "learning_rate": 0.0005892397599901193,
      "loss": 2.9542,
      "step": 19702
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5115050077438354,
      "learning_rate": 0.0005892386742409662,
      "loss": 3.0561,
      "step": 19703
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.440234899520874,
      "learning_rate": 0.0005892375884380379,
      "loss": 2.9287,
      "step": 19704
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7007089853286743,
      "learning_rate": 0.0005892365025813351,
      "loss": 3.2865,
      "step": 19705
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3637564182281494,
      "learning_rate": 0.0005892354166708576,
      "loss": 2.995,
      "step": 19706
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3876703977584839,
      "learning_rate": 0.0005892343307066059,
      "loss": 3.1427,
      "step": 19707
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.527590274810791,
      "learning_rate": 0.0005892332446885799,
      "loss": 3.1292,
      "step": 19708
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4105178117752075,
      "learning_rate": 0.0005892321586167802,
      "loss": 3.4124,
      "step": 19709
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5790170431137085,
      "learning_rate": 0.0005892310724912067,
      "loss": 3.2359,
      "step": 19710
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.633899211883545,
      "learning_rate": 0.0005892299863118597,
      "loss": 2.9089,
      "step": 19711
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4031474590301514,
      "learning_rate": 0.0005892289000787393,
      "loss": 3.3695,
      "step": 19712
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2981904745101929,
      "learning_rate": 0.0005892278137918459,
      "loss": 2.7444,
      "step": 19713
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.224015235900879,
      "learning_rate": 0.0005892267274511796,
      "loss": 2.8664,
      "step": 19714
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5703380107879639,
      "learning_rate": 0.0005892256410567404,
      "loss": 3.1002,
      "step": 19715
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6922627687454224,
      "learning_rate": 0.0005892245546085289,
      "loss": 3.2965,
      "step": 19716
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5780525207519531,
      "learning_rate": 0.000589223468106545,
      "loss": 3.1629,
      "step": 19717
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6750328540802002,
      "learning_rate": 0.0005892223815507889,
      "loss": 3.0522,
      "step": 19718
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2360789775848389,
      "learning_rate": 0.0005892212949412611,
      "loss": 3.1388,
      "step": 19719
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7442632913589478,
      "learning_rate": 0.0005892202082779615,
      "loss": 3.2233,
      "step": 19720
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.323165774345398,
      "learning_rate": 0.0005892191215608905,
      "loss": 2.948,
      "step": 19721
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.41991126537323,
      "learning_rate": 0.0005892180347900481,
      "loss": 3.0915,
      "step": 19722
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7524745464324951,
      "learning_rate": 0.0005892169479654346,
      "loss": 3.2831,
      "step": 19723
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.424690842628479,
      "learning_rate": 0.0005892158610870502,
      "loss": 2.9623,
      "step": 19724
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5029383897781372,
      "learning_rate": 0.0005892147741548952,
      "loss": 3.0933,
      "step": 19725
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6351227760314941,
      "learning_rate": 0.0005892136871689696,
      "loss": 2.9027,
      "step": 19726
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7531129121780396,
      "learning_rate": 0.0005892126001292738,
      "loss": 3.1611,
      "step": 19727
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8629413843154907,
      "learning_rate": 0.000589211513035808,
      "loss": 3.2418,
      "step": 19728
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3548614978790283,
      "learning_rate": 0.0005892104258885722,
      "loss": 2.9244,
      "step": 19729
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4573895931243896,
      "learning_rate": 0.0005892093386875668,
      "loss": 3.4435,
      "step": 19730
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4032150506973267,
      "learning_rate": 0.0005892082514327918,
      "loss": 3.0831,
      "step": 19731
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.433401346206665,
      "learning_rate": 0.0005892071641242476,
      "loss": 3.1898,
      "step": 19732
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.606647253036499,
      "learning_rate": 0.0005892060767619344,
      "loss": 3.1883,
      "step": 19733
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4108989238739014,
      "learning_rate": 0.0005892049893458522,
      "loss": 3.5104,
      "step": 19734
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5534403324127197,
      "learning_rate": 0.0005892039018760013,
      "loss": 3.2205,
      "step": 19735
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6120808124542236,
      "learning_rate": 0.0005892028143523821,
      "loss": 3.1838,
      "step": 19736
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.2545905113220215,
      "learning_rate": 0.0005892017267749945,
      "loss": 2.8602,
      "step": 19737
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7219339609146118,
      "learning_rate": 0.000589200639143839,
      "loss": 3.0106,
      "step": 19738
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6759427785873413,
      "learning_rate": 0.0005891995514589155,
      "loss": 3.4239,
      "step": 19739
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.3275861740112305,
      "learning_rate": 0.0005891984637202244,
      "loss": 3.1056,
      "step": 19740
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7233054637908936,
      "learning_rate": 0.0005891973759277659,
      "loss": 2.9945,
      "step": 19741
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5310640335083008,
      "learning_rate": 0.00058919628808154,
      "loss": 2.9909,
      "step": 19742
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9714446067810059,
      "learning_rate": 0.0005891952001815472,
      "loss": 3.0588,
      "step": 19743
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7801237106323242,
      "learning_rate": 0.0005891941122277875,
      "loss": 3.0232,
      "step": 19744
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3950684070587158,
      "learning_rate": 0.0005891930242202611,
      "loss": 3.2941,
      "step": 19745
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9471601247787476,
      "learning_rate": 0.0005891919361589684,
      "loss": 3.1416,
      "step": 19746
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7922441959381104,
      "learning_rate": 0.0005891908480439093,
      "loss": 3.1138,
      "step": 19747
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4979872703552246,
      "learning_rate": 0.0005891897598750842,
      "loss": 3.0499,
      "step": 19748
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3098235130310059,
      "learning_rate": 0.0005891886716524933,
      "loss": 3.1983,
      "step": 19749
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.110795736312866,
      "learning_rate": 0.0005891875833761368,
      "loss": 3.1827,
      "step": 19750
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.251279592514038,
      "learning_rate": 0.0005891864950460148,
      "loss": 3.051,
      "step": 19751
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2843536138534546,
      "learning_rate": 0.0005891854066621276,
      "loss": 3.1197,
      "step": 19752
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9969489574432373,
      "learning_rate": 0.0005891843182244754,
      "loss": 3.2252,
      "step": 19753
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7996050119400024,
      "learning_rate": 0.0005891832297330584,
      "loss": 2.9304,
      "step": 19754
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.467332601547241,
      "learning_rate": 0.0005891821411878767,
      "loss": 2.9086,
      "step": 19755
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.007286310195923,
      "learning_rate": 0.0005891810525889306,
      "loss": 3.1802,
      "step": 19756
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.2809886932373047,
      "learning_rate": 0.0005891799639362202,
      "loss": 3.2198,
      "step": 19757
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3412561416625977,
      "learning_rate": 0.000589178875229746,
      "loss": 3.1721,
      "step": 19758
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.8548591136932373,
      "learning_rate": 0.0005891777864695079,
      "loss": 2.9032,
      "step": 19759
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.0056912899017334,
      "learning_rate": 0.0005891766976555061,
      "loss": 3.1856,
      "step": 19760
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3833684921264648,
      "learning_rate": 0.000589175608787741,
      "loss": 3.0745,
      "step": 19761
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8734426498413086,
      "learning_rate": 0.0005891745198662126,
      "loss": 3.1004,
      "step": 19762
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7405900955200195,
      "learning_rate": 0.0005891734308909213,
      "loss": 3.0978,
      "step": 19763
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3567873239517212,
      "learning_rate": 0.000589172341861867,
      "loss": 3.3649,
      "step": 19764
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6391282081604004,
      "learning_rate": 0.0005891712527790504,
      "loss": 3.1186,
      "step": 19765
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.3084192276000977,
      "learning_rate": 0.0005891701636424712,
      "loss": 3.073,
      "step": 19766
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6183726787567139,
      "learning_rate": 0.0005891690744521299,
      "loss": 2.9856,
      "step": 19767
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6928361654281616,
      "learning_rate": 0.0005891679852080265,
      "loss": 3.3155,
      "step": 19768
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.328601121902466,
      "learning_rate": 0.0005891668959101614,
      "loss": 3.0092,
      "step": 19769
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3954581022262573,
      "learning_rate": 0.0005891658065585347,
      "loss": 3.2608,
      "step": 19770
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0074026584625244,
      "learning_rate": 0.0005891647171531466,
      "loss": 3.0664,
      "step": 19771
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9628078937530518,
      "learning_rate": 0.0005891636276939974,
      "loss": 2.9766,
      "step": 19772
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3145228624343872,
      "learning_rate": 0.0005891625381810871,
      "loss": 3.1748,
      "step": 19773
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5319864749908447,
      "learning_rate": 0.0005891614486144161,
      "loss": 3.2147,
      "step": 19774
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8623393774032593,
      "learning_rate": 0.0005891603589939846,
      "loss": 2.9178,
      "step": 19775
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7781269550323486,
      "learning_rate": 0.0005891592693197926,
      "loss": 3.0368,
      "step": 19776
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8896350860595703,
      "learning_rate": 0.0005891581795918405,
      "loss": 2.9872,
      "step": 19777
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.574593186378479,
      "learning_rate": 0.0005891570898101285,
      "loss": 2.7126,
      "step": 19778
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4717273712158203,
      "learning_rate": 0.0005891559999746566,
      "loss": 3.0038,
      "step": 19779
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.158468246459961,
      "learning_rate": 0.0005891549100854251,
      "loss": 3.2483,
      "step": 19780
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.008495807647705,
      "learning_rate": 0.0005891538201424344,
      "loss": 2.9944,
      "step": 19781
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7028244733810425,
      "learning_rate": 0.0005891527301456845,
      "loss": 2.9544,
      "step": 19782
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4539462327957153,
      "learning_rate": 0.0005891516400951756,
      "loss": 2.7578,
      "step": 19783
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4693021774291992,
      "learning_rate": 0.000589150549990908,
      "loss": 3.0911,
      "step": 19784
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.099510431289673,
      "learning_rate": 0.0005891494598328819,
      "loss": 2.9558,
      "step": 19785
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7728779315948486,
      "learning_rate": 0.0005891483696210974,
      "loss": 3.3668,
      "step": 19786
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9786550998687744,
      "learning_rate": 0.0005891472793555548,
      "loss": 3.0811,
      "step": 19787
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2236567735671997,
      "learning_rate": 0.0005891461890362542,
      "loss": 3.0307,
      "step": 19788
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.558922290802002,
      "learning_rate": 0.0005891450986631959,
      "loss": 3.1775,
      "step": 19789
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.8527259826660156,
      "learning_rate": 0.0005891440082363801,
      "loss": 3.1496,
      "step": 19790
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9438389539718628,
      "learning_rate": 0.0005891429177558069,
      "loss": 3.169,
      "step": 19791
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.632695198059082,
      "learning_rate": 0.0005891418272214766,
      "loss": 3.1764,
      "step": 19792
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.114964485168457,
      "learning_rate": 0.0005891407366333894,
      "loss": 3.4859,
      "step": 19793
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.23504376411438,
      "learning_rate": 0.0005891396459915454,
      "loss": 3.0875,
      "step": 19794
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6582947969436646,
      "learning_rate": 0.000589138555295945,
      "loss": 3.0527,
      "step": 19795
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6091688871383667,
      "learning_rate": 0.0005891374645465881,
      "loss": 3.2013,
      "step": 19796
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.22087025642395,
      "learning_rate": 0.0005891363737434753,
      "loss": 3.1597,
      "step": 19797
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.6270334720611572,
      "learning_rate": 0.0005891352828866066,
      "loss": 3.0453,
      "step": 19798
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3929239511489868,
      "learning_rate": 0.000589134191975982,
      "loss": 3.2125,
      "step": 19799
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3718141317367554,
      "learning_rate": 0.000589133101011602,
      "loss": 3.0193,
      "step": 19800
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.497394561767578,
      "learning_rate": 0.0005891320099934668,
      "loss": 2.964,
      "step": 19801
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.332834482192993,
      "learning_rate": 0.0005891309189215763,
      "loss": 3.094,
      "step": 19802
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8066827058792114,
      "learning_rate": 0.0005891298277959311,
      "loss": 3.073,
      "step": 19803
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5049786567687988,
      "learning_rate": 0.000589128736616531,
      "loss": 3.1366,
      "step": 19804
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.655700445175171,
      "learning_rate": 0.0005891276453833766,
      "loss": 3.1855,
      "step": 19805
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8707447052001953,
      "learning_rate": 0.0005891265540964677,
      "loss": 3.1377,
      "step": 19806
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.532706618309021,
      "learning_rate": 0.0005891254627558048,
      "loss": 2.9956,
      "step": 19807
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2906428575515747,
      "learning_rate": 0.0005891243713613882,
      "loss": 3.2431,
      "step": 19808
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9515187740325928,
      "learning_rate": 0.0005891232799132178,
      "loss": 3.2176,
      "step": 19809
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.35227370262146,
      "learning_rate": 0.0005891221884112938,
      "loss": 3.0093,
      "step": 19810
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5032047033309937,
      "learning_rate": 0.0005891210968556167,
      "loss": 2.9431,
      "step": 19811
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.588573694229126,
      "learning_rate": 0.0005891200052461865,
      "loss": 3.0869,
      "step": 19812
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.2277822494506836,
      "learning_rate": 0.0005891189135830033,
      "loss": 3.2279,
      "step": 19813
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.1584699153900146,
      "learning_rate": 0.0005891178218660676,
      "loss": 3.037,
      "step": 19814
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.714462399482727,
      "learning_rate": 0.0005891167300953794,
      "loss": 3.0299,
      "step": 19815
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.398451566696167,
      "learning_rate": 0.0005891156382709389,
      "loss": 3.1241,
      "step": 19816
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4818154573440552,
      "learning_rate": 0.0005891145463927463,
      "loss": 2.9954,
      "step": 19817
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.556756615638733,
      "learning_rate": 0.0005891134544608019,
      "loss": 3.1495,
      "step": 19818
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.464005708694458,
      "learning_rate": 0.000589112362475106,
      "loss": 3.2208,
      "step": 19819
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4273802042007446,
      "learning_rate": 0.0005891112704356584,
      "loss": 3.1393,
      "step": 19820
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9492238759994507,
      "learning_rate": 0.0005891101783424597,
      "loss": 3.0039,
      "step": 19821
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.9307985305786133,
      "learning_rate": 0.0005891090861955099,
      "loss": 3.0831,
      "step": 19822
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4163647890090942,
      "learning_rate": 0.0005891079939948094,
      "loss": 3.204,
      "step": 19823
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7832345962524414,
      "learning_rate": 0.0005891069017403581,
      "loss": 2.8799,
      "step": 19824
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0825562477111816,
      "learning_rate": 0.0005891058094321565,
      "loss": 3.0952,
      "step": 19825
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9866167306900024,
      "learning_rate": 0.0005891047170702046,
      "loss": 3.3808,
      "step": 19826
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5668342113494873,
      "learning_rate": 0.0005891036246545027,
      "loss": 3.1195,
      "step": 19827
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7811733484268188,
      "learning_rate": 0.0005891025321850509,
      "loss": 3.0554,
      "step": 19828
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.3200113773345947,
      "learning_rate": 0.0005891014396618495,
      "loss": 3.1855,
      "step": 19829
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4297693967819214,
      "learning_rate": 0.0005891003470848989,
      "loss": 3.151,
      "step": 19830
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4616425037384033,
      "learning_rate": 0.0005890992544541989,
      "loss": 3.1332,
      "step": 19831
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.526537299156189,
      "learning_rate": 0.0005890981617697499,
      "loss": 2.9851,
      "step": 19832
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7625371217727661,
      "learning_rate": 0.0005890970690315521,
      "loss": 3.2192,
      "step": 19833
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8115211725234985,
      "learning_rate": 0.0005890959762396056,
      "loss": 3.2249,
      "step": 19834
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4007459878921509,
      "learning_rate": 0.0005890948833939108,
      "loss": 2.8987,
      "step": 19835
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4787431955337524,
      "learning_rate": 0.0005890937904944679,
      "loss": 3.1397,
      "step": 19836
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.399190902709961,
      "learning_rate": 0.000589092697541277,
      "loss": 3.0243,
      "step": 19837
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2945740222930908,
      "learning_rate": 0.0005890916045343381,
      "loss": 3.1256,
      "step": 19838
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.551484227180481,
      "learning_rate": 0.0005890905114736517,
      "loss": 3.1933,
      "step": 19839
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5162110328674316,
      "learning_rate": 0.000589089418359218,
      "loss": 2.9228,
      "step": 19840
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6420314311981201,
      "learning_rate": 0.0005890883251910371,
      "loss": 3.0419,
      "step": 19841
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6596769094467163,
      "learning_rate": 0.0005890872319691091,
      "loss": 3.0641,
      "step": 19842
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2685350179672241,
      "learning_rate": 0.0005890861386934345,
      "loss": 3.2218,
      "step": 19843
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5241308212280273,
      "learning_rate": 0.0005890850453640134,
      "loss": 3.0025,
      "step": 19844
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6129461526870728,
      "learning_rate": 0.0005890839519808456,
      "loss": 3.1709,
      "step": 19845
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4713689088821411,
      "learning_rate": 0.0005890828585439319,
      "loss": 3.0978,
      "step": 19846
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5224201679229736,
      "learning_rate": 0.0005890817650532721,
      "loss": 2.9111,
      "step": 19847
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4755916595458984,
      "learning_rate": 0.0005890806715088667,
      "loss": 3.1102,
      "step": 19848
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9745351076126099,
      "learning_rate": 0.0005890795779107156,
      "loss": 3.1397,
      "step": 19849
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3494772911071777,
      "learning_rate": 0.0005890784842588193,
      "loss": 3.4319,
      "step": 19850
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8153457641601562,
      "learning_rate": 0.0005890773905531777,
      "loss": 3.1093,
      "step": 19851
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.525671124458313,
      "learning_rate": 0.0005890762967937912,
      "loss": 3.0594,
      "step": 19852
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8901689052581787,
      "learning_rate": 0.00058907520298066,
      "loss": 3.0772,
      "step": 19853
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.228721857070923,
      "learning_rate": 0.0005890741091137842,
      "loss": 3.1954,
      "step": 19854
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4783786535263062,
      "learning_rate": 0.0005890730151931641,
      "loss": 3.052,
      "step": 19855
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8612369298934937,
      "learning_rate": 0.0005890719212188,
      "loss": 3.2777,
      "step": 19856
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.2230377197265625,
      "learning_rate": 0.0005890708271906917,
      "loss": 2.8836,
      "step": 19857
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7234066724777222,
      "learning_rate": 0.0005890697331088399,
      "loss": 3.1208,
      "step": 19858
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4596647024154663,
      "learning_rate": 0.0005890686389732444,
      "loss": 2.9555,
      "step": 19859
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5015276670455933,
      "learning_rate": 0.0005890675447839057,
      "loss": 3.3996,
      "step": 19860
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7961013317108154,
      "learning_rate": 0.000589066450540824,
      "loss": 3.179,
      "step": 19861
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9299167394638062,
      "learning_rate": 0.0005890653562439992,
      "loss": 2.8384,
      "step": 19862
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3668407201766968,
      "learning_rate": 0.0005890642618934317,
      "loss": 3.0396,
      "step": 19863
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.993536114692688,
      "learning_rate": 0.0005890631674891218,
      "loss": 3.1597,
      "step": 19864
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4694703817367554,
      "learning_rate": 0.0005890620730310696,
      "loss": 3.0283,
      "step": 19865
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5939997434616089,
      "learning_rate": 0.0005890609785192752,
      "loss": 3.088,
      "step": 19866
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.545515298843384,
      "learning_rate": 0.0005890598839537389,
      "loss": 3.1382,
      "step": 19867
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.605231761932373,
      "learning_rate": 0.000589058789334461,
      "loss": 3.0476,
      "step": 19868
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9771666526794434,
      "learning_rate": 0.0005890576946614417,
      "loss": 3.1494,
      "step": 19869
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.604355812072754,
      "learning_rate": 0.000589056599934681,
      "loss": 3.121,
      "step": 19870
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6916500329971313,
      "learning_rate": 0.0005890555051541792,
      "loss": 3.0758,
      "step": 19871
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5158315896987915,
      "learning_rate": 0.0005890544103199367,
      "loss": 3.0521,
      "step": 19872
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.426282286643982,
      "learning_rate": 0.0005890533154319533,
      "loss": 3.2036,
      "step": 19873
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0520858764648438,
      "learning_rate": 0.0005890522204902294,
      "loss": 2.9102,
      "step": 19874
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7410985231399536,
      "learning_rate": 0.0005890511254947655,
      "loss": 3.0599,
      "step": 19875
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.48280930519104,
      "learning_rate": 0.0005890500304455613,
      "loss": 2.8238,
      "step": 19876
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.2663209438323975,
      "learning_rate": 0.0005890489353426174,
      "loss": 3.1556,
      "step": 19877
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.381575107574463,
      "learning_rate": 0.0005890478401859337,
      "loss": 3.1318,
      "step": 19878
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3441706895828247,
      "learning_rate": 0.0005890467449755107,
      "loss": 3.1147,
      "step": 19879
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4017724990844727,
      "learning_rate": 0.0005890456497113484,
      "loss": 3.246,
      "step": 19880
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6965657472610474,
      "learning_rate": 0.000589044554393447,
      "loss": 3.1698,
      "step": 19881
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3090217113494873,
      "learning_rate": 0.0005890434590218068,
      "loss": 2.9152,
      "step": 19882
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.388887882232666,
      "learning_rate": 0.000589042363596428,
      "loss": 3.2053,
      "step": 19883
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3046718835830688,
      "learning_rate": 0.0005890412681173107,
      "loss": 3.1494,
      "step": 19884
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3811086416244507,
      "learning_rate": 0.0005890401725844553,
      "loss": 3.1756,
      "step": 19885
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4565540552139282,
      "learning_rate": 0.0005890390769978617,
      "loss": 3.0256,
      "step": 19886
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.101318597793579,
      "learning_rate": 0.0005890379813575304,
      "loss": 3.3226,
      "step": 19887
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7501640319824219,
      "learning_rate": 0.0005890368856634614,
      "loss": 2.9622,
      "step": 19888
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4833788871765137,
      "learning_rate": 0.000589035789915655,
      "loss": 3.181,
      "step": 19889
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2911860942840576,
      "learning_rate": 0.0005890346941141116,
      "loss": 3.0052,
      "step": 19890
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7636913061141968,
      "learning_rate": 0.000589033598258831,
      "loss": 3.241,
      "step": 19891
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.500422716140747,
      "learning_rate": 0.0005890325023498136,
      "loss": 3.2327,
      "step": 19892
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.709793210029602,
      "learning_rate": 0.0005890314063870596,
      "loss": 2.9363,
      "step": 19893
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.286044716835022,
      "learning_rate": 0.0005890303103705692,
      "loss": 3.0424,
      "step": 19894
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.471503496170044,
      "learning_rate": 0.0005890292143003427,
      "loss": 3.0528,
      "step": 19895
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3960541486740112,
      "learning_rate": 0.0005890281181763802,
      "loss": 3.0894,
      "step": 19896
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3914920091629028,
      "learning_rate": 0.0005890270219986818,
      "loss": 3.0662,
      "step": 19897
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.584763765335083,
      "learning_rate": 0.0005890259257672479,
      "loss": 3.0684,
      "step": 19898
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6822175979614258,
      "learning_rate": 0.0005890248294820786,
      "loss": 2.9669,
      "step": 19899
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4476935863494873,
      "learning_rate": 0.0005890237331431741,
      "loss": 3.2642,
      "step": 19900
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4287770986557007,
      "learning_rate": 0.0005890226367505347,
      "loss": 3.1681,
      "step": 19901
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.37201988697052,
      "learning_rate": 0.0005890215403041606,
      "loss": 3.2229,
      "step": 19902
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4651333093643188,
      "learning_rate": 0.0005890204438040518,
      "loss": 2.9598,
      "step": 19903
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.994881510734558,
      "learning_rate": 0.0005890193472502087,
      "loss": 3.2075,
      "step": 19904
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7456746101379395,
      "learning_rate": 0.0005890182506426314,
      "loss": 3.0821,
      "step": 19905
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7493101358413696,
      "learning_rate": 0.0005890171539813203,
      "loss": 2.8829,
      "step": 19906
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4156222343444824,
      "learning_rate": 0.0005890160572662752,
      "loss": 3.2368,
      "step": 19907
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.1154048442840576,
      "learning_rate": 0.0005890149604974968,
      "loss": 3.2011,
      "step": 19908
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9136995077133179,
      "learning_rate": 0.0005890138636749849,
      "loss": 2.9539,
      "step": 19909
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.813887596130371,
      "learning_rate": 0.00058901276679874,
      "loss": 3.2379,
      "step": 19910
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3290635347366333,
      "learning_rate": 0.0005890116698687621,
      "loss": 3.0675,
      "step": 19911
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3726438283920288,
      "learning_rate": 0.0005890105728850515,
      "loss": 2.8785,
      "step": 19912
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6427980661392212,
      "learning_rate": 0.0005890094758476084,
      "loss": 2.9927,
      "step": 19913
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5311123132705688,
      "learning_rate": 0.0005890083787564329,
      "loss": 2.8755,
      "step": 19914
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4019553661346436,
      "learning_rate": 0.0005890072816115253,
      "loss": 3.0579,
      "step": 19915
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7226641178131104,
      "learning_rate": 0.0005890061844128859,
      "loss": 3.1774,
      "step": 19916
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6167627573013306,
      "learning_rate": 0.0005890050871605147,
      "loss": 3.0552,
      "step": 19917
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.770088791847229,
      "learning_rate": 0.000589003989854412,
      "loss": 2.905,
      "step": 19918
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.364431619644165,
      "learning_rate": 0.0005890028924945781,
      "loss": 3.2508,
      "step": 19919
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7918143272399902,
      "learning_rate": 0.000589001795081013,
      "loss": 3.3023,
      "step": 19920
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.276480793952942,
      "learning_rate": 0.000589000697613717,
      "loss": 3.2253,
      "step": 19921
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.533641815185547,
      "learning_rate": 0.0005889996000926904,
      "loss": 3.3585,
      "step": 19922
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.3688302040100098,
      "learning_rate": 0.0005889985025179332,
      "loss": 3.1394,
      "step": 19923
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.722490906715393,
      "learning_rate": 0.0005889974048894458,
      "loss": 3.067,
      "step": 19924
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.458460807800293,
      "learning_rate": 0.0005889963072072283,
      "loss": 3.0616,
      "step": 19925
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.572390079498291,
      "learning_rate": 0.0005889952094712811,
      "loss": 3.2616,
      "step": 19926
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.110698699951172,
      "learning_rate": 0.000588994111681604,
      "loss": 3.1467,
      "step": 19927
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.034534215927124,
      "learning_rate": 0.0005889930138381975,
      "loss": 3.1881,
      "step": 19928
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4997069835662842,
      "learning_rate": 0.0005889919159410618,
      "loss": 2.8708,
      "step": 19929
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.237313747406006,
      "learning_rate": 0.0005889908179901971,
      "loss": 3.2595,
      "step": 19930
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6580028533935547,
      "learning_rate": 0.0005889897199856034,
      "loss": 3.0123,
      "step": 19931
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5673892498016357,
      "learning_rate": 0.0005889886219272812,
      "loss": 2.9438,
      "step": 19932
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.360048532485962,
      "learning_rate": 0.0005889875238152304,
      "loss": 3.1188,
      "step": 19933
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5868573188781738,
      "learning_rate": 0.0005889864256494515,
      "loss": 3.1075,
      "step": 19934
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0778937339782715,
      "learning_rate": 0.0005889853274299445,
      "loss": 3.3774,
      "step": 19935
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6736811399459839,
      "learning_rate": 0.0005889842291567097,
      "loss": 3.182,
      "step": 19936
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6398212909698486,
      "learning_rate": 0.0005889831308297473,
      "loss": 3.065,
      "step": 19937
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.728071451187134,
      "learning_rate": 0.0005889820324490574,
      "loss": 3.4202,
      "step": 19938
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.527808666229248,
      "learning_rate": 0.0005889809340146404,
      "loss": 2.734,
      "step": 19939
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6056714057922363,
      "learning_rate": 0.0005889798355264963,
      "loss": 3.1205,
      "step": 19940
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.1538095474243164,
      "learning_rate": 0.0005889787369846254,
      "loss": 2.8844,
      "step": 19941
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.237309694290161,
      "learning_rate": 0.000588977638389028,
      "loss": 3.1285,
      "step": 19942
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3452575206756592,
      "learning_rate": 0.0005889765397397041,
      "loss": 2.8791,
      "step": 19943
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9624836444854736,
      "learning_rate": 0.000588975441036654,
      "loss": 3.0558,
      "step": 19944
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0065345764160156,
      "learning_rate": 0.000588974342279878,
      "loss": 3.1467,
      "step": 19945
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4173598289489746,
      "learning_rate": 0.0005889732434693761,
      "loss": 3.0114,
      "step": 19946
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.507960319519043,
      "learning_rate": 0.0005889721446051487,
      "loss": 3.0787,
      "step": 19947
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5651921033859253,
      "learning_rate": 0.0005889710456871959,
      "loss": 2.878,
      "step": 19948
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8014354705810547,
      "learning_rate": 0.0005889699467155179,
      "loss": 2.8275,
      "step": 19949
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.455676794052124,
      "learning_rate": 0.0005889688476901149,
      "loss": 3.2448,
      "step": 19950
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.1310815811157227,
      "learning_rate": 0.0005889677486109872,
      "loss": 3.095,
      "step": 19951
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6723110675811768,
      "learning_rate": 0.000588966649478135,
      "loss": 3.0276,
      "step": 19952
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4999419450759888,
      "learning_rate": 0.0005889655502915583,
      "loss": 3.1794,
      "step": 19953
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3250590562820435,
      "learning_rate": 0.0005889644510512575,
      "loss": 3.0239,
      "step": 19954
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4418655633926392,
      "learning_rate": 0.0005889633517572329,
      "loss": 2.9737,
      "step": 19955
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5223749876022339,
      "learning_rate": 0.0005889622524094843,
      "loss": 2.91,
      "step": 19956
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5639021396636963,
      "learning_rate": 0.0005889611530080123,
      "loss": 3.0987,
      "step": 19957
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.637732744216919,
      "learning_rate": 0.000588960053552817,
      "loss": 3.2929,
      "step": 19958
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.525868535041809,
      "learning_rate": 0.0005889589540438987,
      "loss": 3.1606,
      "step": 19959
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4300901889801025,
      "learning_rate": 0.0005889578544812572,
      "loss": 2.8921,
      "step": 19960
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6691628694534302,
      "learning_rate": 0.0005889567548648931,
      "loss": 2.8164,
      "step": 19961
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5397454500198364,
      "learning_rate": 0.0005889556551948065,
      "loss": 2.8362,
      "step": 19962
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3209656476974487,
      "learning_rate": 0.0005889545554709976,
      "loss": 3.2087,
      "step": 19963
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7458827495574951,
      "learning_rate": 0.0005889534556934666,
      "loss": 2.8752,
      "step": 19964
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8261703252792358,
      "learning_rate": 0.0005889523558622136,
      "loss": 3.2095,
      "step": 19965
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6261438131332397,
      "learning_rate": 0.000588951255977239,
      "loss": 3.4448,
      "step": 19966
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5073134899139404,
      "learning_rate": 0.0005889501560385428,
      "loss": 3.1443,
      "step": 19967
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5577822923660278,
      "learning_rate": 0.0005889490560461254,
      "loss": 3.0989,
      "step": 19968
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3858637809753418,
      "learning_rate": 0.0005889479559999869,
      "loss": 3.0076,
      "step": 19969
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4735982418060303,
      "learning_rate": 0.0005889468559001276,
      "loss": 3.0631,
      "step": 19970
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4775015115737915,
      "learning_rate": 0.0005889457557465475,
      "loss": 2.8739,
      "step": 19971
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.855465054512024,
      "learning_rate": 0.000588944655539247,
      "loss": 3.0074,
      "step": 19972
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6068075895309448,
      "learning_rate": 0.0005889435552782262,
      "loss": 3.1057,
      "step": 19973
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4640616178512573,
      "learning_rate": 0.0005889424549634854,
      "loss": 2.9506,
      "step": 19974
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.476854681968689,
      "learning_rate": 0.0005889413545950246,
      "loss": 3.2835,
      "step": 19975
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3669261932373047,
      "learning_rate": 0.0005889402541728444,
      "loss": 3.0008,
      "step": 19976
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.736028790473938,
      "learning_rate": 0.0005889391536969445,
      "loss": 3.033,
      "step": 19977
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.384197473526001,
      "learning_rate": 0.0005889380531673255,
      "loss": 3.0888,
      "step": 19978
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6822971105575562,
      "learning_rate": 0.0005889369525839874,
      "loss": 3.2213,
      "step": 19979
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.412462830543518,
      "learning_rate": 0.0005889358519469304,
      "loss": 3.4707,
      "step": 19980
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3753656148910522,
      "learning_rate": 0.0005889347512561549,
      "loss": 2.9671,
      "step": 19981
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4202096462249756,
      "learning_rate": 0.000588933650511661,
      "loss": 2.8444,
      "step": 19982
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.693153738975525,
      "learning_rate": 0.0005889325497134487,
      "loss": 3.359,
      "step": 19983
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4578341245651245,
      "learning_rate": 0.0005889314488615185,
      "loss": 3.1674,
      "step": 19984
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.550324559211731,
      "learning_rate": 0.0005889303479558705,
      "loss": 3.4873,
      "step": 19985
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7725557088851929,
      "learning_rate": 0.0005889292469965049,
      "loss": 3.2928,
      "step": 19986
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.702799677848816,
      "learning_rate": 0.0005889281459834218,
      "loss": 3.4711,
      "step": 19987
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.585098385810852,
      "learning_rate": 0.0005889270449166217,
      "loss": 3.0537,
      "step": 19988
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.187204360961914,
      "learning_rate": 0.0005889259437961044,
      "loss": 3.2198,
      "step": 19989
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.9483561515808105,
      "learning_rate": 0.0005889248426218704,
      "loss": 3.1965,
      "step": 19990
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5542888641357422,
      "learning_rate": 0.0005889237413939199,
      "loss": 3.2113,
      "step": 19991
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6168562173843384,
      "learning_rate": 0.0005889226401122529,
      "loss": 3.0285,
      "step": 19992
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.3672401905059814,
      "learning_rate": 0.0005889215387768698,
      "loss": 3.1123,
      "step": 19993
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7757362127304077,
      "learning_rate": 0.0005889204373877707,
      "loss": 3.1516,
      "step": 19994
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3897050619125366,
      "learning_rate": 0.0005889193359449558,
      "loss": 3.2477,
      "step": 19995
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7616174221038818,
      "learning_rate": 0.0005889182344484253,
      "loss": 3.0069,
      "step": 19996
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9835783243179321,
      "learning_rate": 0.0005889171328981797,
      "loss": 3.338,
      "step": 19997
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.128782033920288,
      "learning_rate": 0.0005889160312942186,
      "loss": 2.8968,
      "step": 19998
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.405798316001892,
      "learning_rate": 0.0005889149296365428,
      "loss": 3.1439,
      "step": 19999
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0539374351501465,
      "learning_rate": 0.0005889138279251522,
      "loss": 3.1832,
      "step": 20000
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9919670820236206,
      "learning_rate": 0.000588912726160047,
      "loss": 2.9412,
      "step": 20001
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4732805490493774,
      "learning_rate": 0.0005889116243412276,
      "loss": 3.208,
      "step": 20002
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8301045894622803,
      "learning_rate": 0.000588910522468694,
      "loss": 2.8743,
      "step": 20003
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7950536012649536,
      "learning_rate": 0.0005889094205424466,
      "loss": 2.9177,
      "step": 20004
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4101163148880005,
      "learning_rate": 0.0005889083185624852,
      "loss": 3.1064,
      "step": 20005
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7075190544128418,
      "learning_rate": 0.0005889072165288105,
      "loss": 3.0946,
      "step": 20006
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0443003177642822,
      "learning_rate": 0.0005889061144414225,
      "loss": 2.9127,
      "step": 20007
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5943628549575806,
      "learning_rate": 0.0005889050123003213,
      "loss": 3.1166,
      "step": 20008
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.4233479499816895,
      "learning_rate": 0.0005889039101055072,
      "loss": 3.188,
      "step": 20009
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5893962383270264,
      "learning_rate": 0.0005889028078569805,
      "loss": 3.1541,
      "step": 20010
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4718042612075806,
      "learning_rate": 0.0005889017055547412,
      "loss": 3.0887,
      "step": 20011
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4074997901916504,
      "learning_rate": 0.0005889006031987898,
      "loss": 3.155,
      "step": 20012
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4586769342422485,
      "learning_rate": 0.0005888995007891262,
      "loss": 3.1967,
      "step": 20013
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7026753425598145,
      "learning_rate": 0.0005888983983257508,
      "loss": 3.1231,
      "step": 20014
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6632156372070312,
      "learning_rate": 0.0005888972958086637,
      "loss": 3.1179,
      "step": 20015
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.803117275238037,
      "learning_rate": 0.0005888961932378652,
      "loss": 3.2231,
      "step": 20016
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.665173053741455,
      "learning_rate": 0.0005888950906133553,
      "loss": 3.0405,
      "step": 20017
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7009525299072266,
      "learning_rate": 0.0005888939879351345,
      "loss": 3.2959,
      "step": 20018
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1909486055374146,
      "learning_rate": 0.0005888928852032028,
      "loss": 3.1195,
      "step": 20019
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4330655336380005,
      "learning_rate": 0.0005888917824175603,
      "loss": 3.0349,
      "step": 20020
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5739132165908813,
      "learning_rate": 0.0005888906795782075,
      "loss": 3.2478,
      "step": 20021
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4571253061294556,
      "learning_rate": 0.0005888895766851445,
      "loss": 3.1199,
      "step": 20022
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9791792631149292,
      "learning_rate": 0.0005888884737383715,
      "loss": 3.3142,
      "step": 20023
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5238959789276123,
      "learning_rate": 0.0005888873707378886,
      "loss": 3.155,
      "step": 20024
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.5085933208465576,
      "learning_rate": 0.0005888862676836961,
      "loss": 3.1469,
      "step": 20025
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0347707271575928,
      "learning_rate": 0.0005888851645757942,
      "loss": 3.0231,
      "step": 20026
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5701558589935303,
      "learning_rate": 0.0005888840614141831,
      "loss": 2.9267,
      "step": 20027
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5315711498260498,
      "learning_rate": 0.000588882958198863,
      "loss": 3.0891,
      "step": 20028
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8242225646972656,
      "learning_rate": 0.0005888818549298341,
      "loss": 2.8553,
      "step": 20029
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7257747650146484,
      "learning_rate": 0.0005888807516070966,
      "loss": 3.0442,
      "step": 20030
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3808672428131104,
      "learning_rate": 0.0005888796482306507,
      "loss": 3.1598,
      "step": 20031
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4702330827713013,
      "learning_rate": 0.0005888785448004966,
      "loss": 2.7531,
      "step": 20032
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5038458108901978,
      "learning_rate": 0.0005888774413166346,
      "loss": 3.1601,
      "step": 20033
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5399137735366821,
      "learning_rate": 0.0005888763377790648,
      "loss": 3.3051,
      "step": 20034
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.745395541191101,
      "learning_rate": 0.0005888752341877875,
      "loss": 2.986,
      "step": 20035
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.47697114944458,
      "learning_rate": 0.0005888741305428028,
      "loss": 3.0778,
      "step": 20036
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3574039936065674,
      "learning_rate": 0.0005888730268441109,
      "loss": 3.0184,
      "step": 20037
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.470335841178894,
      "learning_rate": 0.000588871923091712,
      "loss": 3.2169,
      "step": 20038
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5615040063858032,
      "learning_rate": 0.0005888708192856065,
      "loss": 3.2644,
      "step": 20039
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7220221757888794,
      "learning_rate": 0.0005888697154257943,
      "loss": 2.9694,
      "step": 20040
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.607535481452942,
      "learning_rate": 0.000588868611512276,
      "loss": 2.9176,
      "step": 20041
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8690247535705566,
      "learning_rate": 0.0005888675075450514,
      "loss": 3.079,
      "step": 20042
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4923919439315796,
      "learning_rate": 0.000588866403524121,
      "loss": 3.1043,
      "step": 20043
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7316361665725708,
      "learning_rate": 0.0005888652994494847,
      "loss": 3.0529,
      "step": 20044
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4135798215866089,
      "learning_rate": 0.000588864195321143,
      "loss": 2.9901,
      "step": 20045
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3554798364639282,
      "learning_rate": 0.000588863091139096,
      "loss": 3.0597,
      "step": 20046
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2704153060913086,
      "learning_rate": 0.0005888619869033439,
      "loss": 3.0262,
      "step": 20047
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3995143175125122,
      "learning_rate": 0.0005888608826138868,
      "loss": 3.0658,
      "step": 20048
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4555132389068604,
      "learning_rate": 0.0005888597782707252,
      "loss": 3.0904,
      "step": 20049
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6437203884124756,
      "learning_rate": 0.000588858673873859,
      "loss": 3.3101,
      "step": 20050
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5022910833358765,
      "learning_rate": 0.0005888575694232885,
      "loss": 3.3029,
      "step": 20051
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.395093321800232,
      "learning_rate": 0.0005888564649190139,
      "loss": 3.1159,
      "step": 20052
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.28664231300354,
      "learning_rate": 0.0005888553603610355,
      "loss": 3.1892,
      "step": 20053
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3600854873657227,
      "learning_rate": 0.0005888542557493533,
      "loss": 3.1294,
      "step": 20054
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.42215895652771,
      "learning_rate": 0.0005888531510839678,
      "loss": 3.2326,
      "step": 20055
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4205267429351807,
      "learning_rate": 0.000588852046364879,
      "loss": 3.1187,
      "step": 20056
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.447220802307129,
      "learning_rate": 0.0005888509415920872,
      "loss": 2.9279,
      "step": 20057
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5539183616638184,
      "learning_rate": 0.0005888498367655925,
      "loss": 2.9418,
      "step": 20058
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.391408920288086,
      "learning_rate": 0.0005888487318853952,
      "loss": 3.4607,
      "step": 20059
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4506782293319702,
      "learning_rate": 0.0005888476269514954,
      "loss": 3.3129,
      "step": 20060
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5247178077697754,
      "learning_rate": 0.0005888465219638934,
      "loss": 3.0277,
      "step": 20061
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.085953712463379,
      "learning_rate": 0.0005888454169225894,
      "loss": 3.3177,
      "step": 20062
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.1016530990600586,
      "learning_rate": 0.0005888443118275837,
      "loss": 3.0519,
      "step": 20063
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.3394527435302734,
      "learning_rate": 0.0005888432066788762,
      "loss": 3.0798,
      "step": 20064
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4609017372131348,
      "learning_rate": 0.0005888421014764673,
      "loss": 3.0299,
      "step": 20065
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3752646446228027,
      "learning_rate": 0.0005888409962203573,
      "loss": 3.1977,
      "step": 20066
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4033896923065186,
      "learning_rate": 0.0005888398909105463,
      "loss": 3.0693,
      "step": 20067
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5199228525161743,
      "learning_rate": 0.0005888387855470345,
      "loss": 3.0058,
      "step": 20068
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5883663892745972,
      "learning_rate": 0.0005888376801298221,
      "loss": 3.1078,
      "step": 20069
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.484419584274292,
      "learning_rate": 0.0005888365746589094,
      "loss": 3.1956,
      "step": 20070
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.2023990154266357,
      "learning_rate": 0.0005888354691342964,
      "loss": 3.151,
      "step": 20071
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.521756887435913,
      "learning_rate": 0.0005888343635559835,
      "loss": 3.011,
      "step": 20072
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.625143051147461,
      "learning_rate": 0.0005888332579239708,
      "loss": 3.0456,
      "step": 20073
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.5097296237945557,
      "learning_rate": 0.0005888321522382586,
      "loss": 2.9718,
      "step": 20074
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4120415449142456,
      "learning_rate": 0.0005888310464988469,
      "loss": 2.9783,
      "step": 20075
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7649654150009155,
      "learning_rate": 0.0005888299407057363,
      "loss": 2.8529,
      "step": 20076
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.988266944885254,
      "learning_rate": 0.0005888288348589266,
      "loss": 3.2948,
      "step": 20077
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.7209389209747314,
      "learning_rate": 0.0005888277289584181,
      "loss": 2.9896,
      "step": 20078
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3818615674972534,
      "learning_rate": 0.0005888266230042113,
      "loss": 3.114,
      "step": 20079
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6156576871871948,
      "learning_rate": 0.0005888255169963061,
      "loss": 3.1618,
      "step": 20080
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.012755870819092,
      "learning_rate": 0.0005888244109347027,
      "loss": 3.0079,
      "step": 20081
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3958195447921753,
      "learning_rate": 0.0005888233048194014,
      "loss": 3.2067,
      "step": 20082
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0000011920928955,
      "learning_rate": 0.0005888221986504026,
      "loss": 3.1701,
      "step": 20083
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.382125973701477,
      "learning_rate": 0.000588821092427706,
      "loss": 3.0628,
      "step": 20084
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4189884662628174,
      "learning_rate": 0.0005888199861513123,
      "loss": 3.0969,
      "step": 20085
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9402893781661987,
      "learning_rate": 0.0005888188798212215,
      "loss": 2.9828,
      "step": 20086
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3189274072647095,
      "learning_rate": 0.0005888177734374336,
      "loss": 3.0158,
      "step": 20087
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2247871160507202,
      "learning_rate": 0.0005888166669999493,
      "loss": 3.162,
      "step": 20088
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5591367483139038,
      "learning_rate": 0.0005888155605087684,
      "loss": 3.4236,
      "step": 20089
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5491220951080322,
      "learning_rate": 0.0005888144539638913,
      "loss": 3.0058,
      "step": 20090
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3425556421279907,
      "learning_rate": 0.000588813347365318,
      "loss": 3.2152,
      "step": 20091
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3811599016189575,
      "learning_rate": 0.0005888122407130491,
      "loss": 2.9729,
      "step": 20092
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3840030431747437,
      "learning_rate": 0.0005888111340070842,
      "loss": 2.8746,
      "step": 20093
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3987773656845093,
      "learning_rate": 0.0005888100272474242,
      "loss": 3.2269,
      "step": 20094
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.3250017166137695,
      "learning_rate": 0.0005888089204340688,
      "loss": 3.0853,
      "step": 20095
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0282108783721924,
      "learning_rate": 0.0005888078135670184,
      "loss": 3.024,
      "step": 20096
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4537707567214966,
      "learning_rate": 0.0005888067066462732,
      "loss": 3.1441,
      "step": 20097
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8513485193252563,
      "learning_rate": 0.0005888055996718333,
      "loss": 2.89,
      "step": 20098
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.869173288345337,
      "learning_rate": 0.000588804492643699,
      "loss": 3.5039,
      "step": 20099
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7945865392684937,
      "learning_rate": 0.0005888033855618706,
      "loss": 3.3665,
      "step": 20100
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5151495933532715,
      "learning_rate": 0.0005888022784263481,
      "loss": 3.4846,
      "step": 20101
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5783065557479858,
      "learning_rate": 0.0005888011712371319,
      "loss": 3.1626,
      "step": 20102
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.1389331817626953,
      "learning_rate": 0.000588800063994222,
      "loss": 3.0193,
      "step": 20103
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.661785364151001,
      "learning_rate": 0.0005887989566976189,
      "loss": 2.9877,
      "step": 20104
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9909337759017944,
      "learning_rate": 0.0005887978493473224,
      "loss": 2.9781,
      "step": 20105
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.310037612915039,
      "learning_rate": 0.000588796741943333,
      "loss": 3.0122,
      "step": 20106
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.622868299484253,
      "learning_rate": 0.0005887956344856508,
      "loss": 2.8582,
      "step": 20107
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.869742512702942,
      "learning_rate": 0.0005887945269742762,
      "loss": 3.2071,
      "step": 20108
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.406947374343872,
      "learning_rate": 0.0005887934194092091,
      "loss": 3.1626,
      "step": 20109
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5484769344329834,
      "learning_rate": 0.0005887923117904499,
      "loss": 2.8773,
      "step": 20110
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5698283910751343,
      "learning_rate": 0.0005887912041179987,
      "loss": 3.2932,
      "step": 20111
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5905814170837402,
      "learning_rate": 0.0005887900963918558,
      "loss": 2.894,
      "step": 20112
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5220189094543457,
      "learning_rate": 0.0005887889886120214,
      "loss": 3.0544,
      "step": 20113
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.981519937515259,
      "learning_rate": 0.0005887878807784956,
      "loss": 3.0597,
      "step": 20114
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4797972440719604,
      "learning_rate": 0.0005887867728912788,
      "loss": 3.2274,
      "step": 20115
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5404934883117676,
      "learning_rate": 0.000588785664950371,
      "loss": 3.0695,
      "step": 20116
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4898861646652222,
      "learning_rate": 0.0005887845569557725,
      "loss": 3.1279,
      "step": 20117
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.508269190788269,
      "learning_rate": 0.0005887834489074835,
      "loss": 3.2167,
      "step": 20118
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.457247257232666,
      "learning_rate": 0.0005887823408055042,
      "loss": 3.2574,
      "step": 20119
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4672561883926392,
      "learning_rate": 0.0005887812326498349,
      "loss": 2.9154,
      "step": 20120
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2323719263076782,
      "learning_rate": 0.0005887801244404756,
      "loss": 3.1573,
      "step": 20121
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.516129732131958,
      "learning_rate": 0.0005887790161774267,
      "loss": 3.0354,
      "step": 20122
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.591703176498413,
      "learning_rate": 0.0005887779078606883,
      "loss": 3.1014,
      "step": 20123
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3797602653503418,
      "learning_rate": 0.0005887767994902606,
      "loss": 3.0651,
      "step": 20124
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.362005591392517,
      "learning_rate": 0.0005887756910661439,
      "loss": 3.3158,
      "step": 20125
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7707017660140991,
      "learning_rate": 0.0005887745825883384,
      "loss": 2.9724,
      "step": 20126
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.612046718597412,
      "learning_rate": 0.0005887734740568441,
      "loss": 3.1941,
      "step": 20127
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.5626676082611084,
      "learning_rate": 0.0005887723654716615,
      "loss": 3.31,
      "step": 20128
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.0628185272216797,
      "learning_rate": 0.0005887712568327906,
      "loss": 3.0834,
      "step": 20129
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9892531633377075,
      "learning_rate": 0.0005887701481402317,
      "loss": 3.134,
      "step": 20130
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.5739238262176514,
      "learning_rate": 0.0005887690393939849,
      "loss": 3.1309,
      "step": 20131
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.3553833961486816,
      "learning_rate": 0.0005887679305940506,
      "loss": 3.1747,
      "step": 20132
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0549230575561523,
      "learning_rate": 0.0005887668217404289,
      "loss": 3.1153,
      "step": 20133
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7305480241775513,
      "learning_rate": 0.0005887657128331199,
      "loss": 3.1453,
      "step": 20134
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.5730669498443604,
      "learning_rate": 0.0005887646038721239,
      "loss": 3.0815,
      "step": 20135
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0529420375823975,
      "learning_rate": 0.0005887634948574411,
      "loss": 3.0072,
      "step": 20136
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4232457876205444,
      "learning_rate": 0.0005887623857890719,
      "loss": 2.9548,
      "step": 20137
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.094449043273926,
      "learning_rate": 0.000588761276667016,
      "loss": 2.9759,
      "step": 20138
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3087621927261353,
      "learning_rate": 0.0005887601674912742,
      "loss": 3.2767,
      "step": 20139
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.14708411693573,
      "learning_rate": 0.0005887590582618464,
      "loss": 2.9895,
      "step": 20140
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.4857654571533203,
      "learning_rate": 0.0005887579489787328,
      "loss": 3.1036,
      "step": 20141
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3755552768707275,
      "learning_rate": 0.0005887568396419336,
      "loss": 3.2363,
      "step": 20142
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3041763305664062,
      "learning_rate": 0.0005887557302514491,
      "loss": 2.9766,
      "step": 20143
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.666716456413269,
      "learning_rate": 0.0005887546208072794,
      "loss": 2.9627,
      "step": 20144
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4177173376083374,
      "learning_rate": 0.0005887535113094248,
      "loss": 2.9286,
      "step": 20145
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.44803786277771,
      "learning_rate": 0.0005887524017578855,
      "loss": 3.2755,
      "step": 20146
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.7066450119018555,
      "learning_rate": 0.0005887512921526616,
      "loss": 3.0909,
      "step": 20147
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.866863250732422,
      "learning_rate": 0.0005887501824937535,
      "loss": 3.0427,
      "step": 20148
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.088413715362549,
      "learning_rate": 0.0005887490727811613,
      "loss": 3.0573,
      "step": 20149
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.2168056964874268,
      "learning_rate": 0.0005887479630148851,
      "loss": 3.1598,
      "step": 20150
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.4567675590515137,
      "learning_rate": 0.0005887468531949252,
      "loss": 3.2219,
      "step": 20151
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.6617016792297363,
      "learning_rate": 0.0005887457433212819,
      "loss": 3.1812,
      "step": 20152
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.32801353931427,
      "learning_rate": 0.0005887446333939552,
      "loss": 3.253,
      "step": 20153
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4679181575775146,
      "learning_rate": 0.0005887435234129456,
      "loss": 3.1457,
      "step": 20154
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6344025135040283,
      "learning_rate": 0.0005887424133782529,
      "loss": 3.1826,
      "step": 20155
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.387904167175293,
      "learning_rate": 0.0005887413032898776,
      "loss": 3.0998,
      "step": 20156
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4441983699798584,
      "learning_rate": 0.00058874019314782,
      "loss": 3.2611,
      "step": 20157
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5345150232315063,
      "learning_rate": 0.00058873908295208,
      "loss": 3.3165,
      "step": 20158
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2495155334472656,
      "learning_rate": 0.000588737972702658,
      "loss": 2.9429,
      "step": 20159
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4989207983016968,
      "learning_rate": 0.0005887368623995541,
      "loss": 3.0113,
      "step": 20160
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5956610441207886,
      "learning_rate": 0.0005887357520427687,
      "loss": 2.8777,
      "step": 20161
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.974759578704834,
      "learning_rate": 0.0005887346416323018,
      "loss": 3.1443,
      "step": 20162
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3410576581954956,
      "learning_rate": 0.0005887335311681537,
      "loss": 3.1486,
      "step": 20163
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8276798725128174,
      "learning_rate": 0.0005887324206503245,
      "loss": 3.1959,
      "step": 20164
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5644500255584717,
      "learning_rate": 0.0005887313100788145,
      "loss": 3.2138,
      "step": 20165
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7910525798797607,
      "learning_rate": 0.000588730199453624,
      "loss": 3.0204,
      "step": 20166
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5963884592056274,
      "learning_rate": 0.000588729088774753,
      "loss": 3.255,
      "step": 20167
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.539069414138794,
      "learning_rate": 0.0005887279780422018,
      "loss": 3.1986,
      "step": 20168
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9884679317474365,
      "learning_rate": 0.0005887268672559707,
      "loss": 2.9593,
      "step": 20169
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5413163900375366,
      "learning_rate": 0.0005887257564160598,
      "loss": 2.8066,
      "step": 20170
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6706714630126953,
      "learning_rate": 0.0005887246455224694,
      "loss": 3.2516,
      "step": 20171
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5019155740737915,
      "learning_rate": 0.0005887235345751996,
      "loss": 3.2221,
      "step": 20172
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4648061990737915,
      "learning_rate": 0.0005887224235742506,
      "loss": 3.1422,
      "step": 20173
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.515450358390808,
      "learning_rate": 0.0005887213125196228,
      "loss": 3.104,
      "step": 20174
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5631095170974731,
      "learning_rate": 0.000588720201411316,
      "loss": 3.3617,
      "step": 20175
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.40038001537323,
      "learning_rate": 0.0005887190902493308,
      "loss": 2.9556,
      "step": 20176
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.434037685394287,
      "learning_rate": 0.0005887179790336673,
      "loss": 3.1998,
      "step": 20177
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6063824892044067,
      "learning_rate": 0.0005887168677643257,
      "loss": 2.8575,
      "step": 20178
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6786472797393799,
      "learning_rate": 0.000588715756441306,
      "loss": 3.0536,
      "step": 20179
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8638269901275635,
      "learning_rate": 0.0005887146450646088,
      "loss": 3.0168,
      "step": 20180
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.204122304916382,
      "learning_rate": 0.0005887135336342341,
      "loss": 3.0328,
      "step": 20181
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5743091106414795,
      "learning_rate": 0.0005887124221501819,
      "loss": 3.1092,
      "step": 20182
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.207688331604004,
      "learning_rate": 0.0005887113106124528,
      "loss": 3.2705,
      "step": 20183
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8905134201049805,
      "learning_rate": 0.0005887101990210467,
      "loss": 3.1078,
      "step": 20184
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.123539447784424,
      "learning_rate": 0.000588709087375964,
      "loss": 2.9712,
      "step": 20185
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3537861108779907,
      "learning_rate": 0.0005887079756772048,
      "loss": 3.0486,
      "step": 20186
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7865716218948364,
      "learning_rate": 0.0005887068639247693,
      "loss": 2.8881,
      "step": 20187
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8172799348831177,
      "learning_rate": 0.0005887057521186577,
      "loss": 3.0078,
      "step": 20188
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5139786005020142,
      "learning_rate": 0.0005887046402588705,
      "loss": 3.0029,
      "step": 20189
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.732139229774475,
      "learning_rate": 0.0005887035283454075,
      "loss": 2.9909,
      "step": 20190
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6168216466903687,
      "learning_rate": 0.0005887024163782691,
      "loss": 3.2175,
      "step": 20191
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.368841290473938,
      "learning_rate": 0.0005887013043574553,
      "loss": 3.1991,
      "step": 20192
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6362638473510742,
      "learning_rate": 0.0005887001922829667,
      "loss": 3.0039,
      "step": 20193
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5780478715896606,
      "learning_rate": 0.0005886990801548032,
      "loss": 3.1276,
      "step": 20194
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3166675567626953,
      "learning_rate": 0.0005886979679729651,
      "loss": 3.2173,
      "step": 20195
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4269368648529053,
      "learning_rate": 0.0005886968557374526,
      "loss": 2.9787,
      "step": 20196
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6438710689544678,
      "learning_rate": 0.0005886957434482658,
      "loss": 3.0459,
      "step": 20197
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4767515659332275,
      "learning_rate": 0.0005886946311054051,
      "loss": 3.14,
      "step": 20198
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4793919324874878,
      "learning_rate": 0.0005886935187088706,
      "loss": 2.9744,
      "step": 20199
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.787865161895752,
      "learning_rate": 0.0005886924062586626,
      "loss": 2.9717,
      "step": 20200
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7878040075302124,
      "learning_rate": 0.0005886912937547812,
      "loss": 2.9505,
      "step": 20201
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9320926666259766,
      "learning_rate": 0.0005886901811972266,
      "loss": 3.0804,
      "step": 20202
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9554269313812256,
      "learning_rate": 0.0005886890685859991,
      "loss": 3.3407,
      "step": 20203
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5637848377227783,
      "learning_rate": 0.0005886879559210987,
      "loss": 3.2531,
      "step": 20204
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4498921632766724,
      "learning_rate": 0.000588686843202526,
      "loss": 3.0821,
      "step": 20205
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5368651151657104,
      "learning_rate": 0.0005886857304302809,
      "loss": 3.2304,
      "step": 20206
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8087693452835083,
      "learning_rate": 0.0005886846176043635,
      "loss": 3.0948,
      "step": 20207
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6121472120285034,
      "learning_rate": 0.0005886835047247742,
      "loss": 3.3049,
      "step": 20208
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4175094366073608,
      "learning_rate": 0.0005886823917915134,
      "loss": 3.0701,
      "step": 20209
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5734449625015259,
      "learning_rate": 0.000588681278804581,
      "loss": 3.1979,
      "step": 20210
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.987328290939331,
      "learning_rate": 0.0005886801657639773,
      "loss": 2.9648,
      "step": 20211
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.459729552268982,
      "learning_rate": 0.0005886790526697024,
      "loss": 3.0658,
      "step": 20212
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8906482458114624,
      "learning_rate": 0.0005886779395217567,
      "loss": 2.9799,
      "step": 20213
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.015242576599121,
      "learning_rate": 0.0005886768263201404,
      "loss": 3.2313,
      "step": 20214
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3843241930007935,
      "learning_rate": 0.0005886757130648536,
      "loss": 3.1078,
      "step": 20215
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7659857273101807,
      "learning_rate": 0.0005886745997558964,
      "loss": 3.277,
      "step": 20216
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.911975383758545,
      "learning_rate": 0.0005886734863932693,
      "loss": 3.0634,
      "step": 20217
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5750668048858643,
      "learning_rate": 0.0005886723729769722,
      "loss": 3.1747,
      "step": 20218
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6414161920547485,
      "learning_rate": 0.0005886712595070055,
      "loss": 3.2394,
      "step": 20219
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7436466217041016,
      "learning_rate": 0.0005886701459833695,
      "loss": 3.0023,
      "step": 20220
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5735422372817993,
      "learning_rate": 0.000588669032406064,
      "loss": 2.9394,
      "step": 20221
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5151878595352173,
      "learning_rate": 0.0005886679187750898,
      "loss": 3.1094,
      "step": 20222
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7472699880599976,
      "learning_rate": 0.0005886668050904466,
      "loss": 3.2521,
      "step": 20223
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5613012313842773,
      "learning_rate": 0.0005886656913521348,
      "loss": 3.1181,
      "step": 20224
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5199438333511353,
      "learning_rate": 0.0005886645775601547,
      "loss": 3.1546,
      "step": 20225
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.833552598953247,
      "learning_rate": 0.0005886634637145063,
      "loss": 3.1408,
      "step": 20226
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4722983837127686,
      "learning_rate": 0.0005886623498151899,
      "loss": 2.9316,
      "step": 20227
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7361353635787964,
      "learning_rate": 0.0005886612358622057,
      "loss": 3.0115,
      "step": 20228
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2728215456008911,
      "learning_rate": 0.000588660121855554,
      "loss": 3.0942,
      "step": 20229
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.462291955947876,
      "learning_rate": 0.000588659007795235,
      "loss": 2.977,
      "step": 20230
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5712153911590576,
      "learning_rate": 0.0005886578936812487,
      "loss": 3.0378,
      "step": 20231
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5873337984085083,
      "learning_rate": 0.0005886567795135956,
      "loss": 3.3482,
      "step": 20232
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6855441331863403,
      "learning_rate": 0.0005886556652922756,
      "loss": 3.1745,
      "step": 20233
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4859044551849365,
      "learning_rate": 0.0005886545510172892,
      "loss": 3.128,
      "step": 20234
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8156911134719849,
      "learning_rate": 0.0005886534366886364,
      "loss": 2.9747,
      "step": 20235
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.84409761428833,
      "learning_rate": 0.0005886523223063174,
      "loss": 3.1033,
      "step": 20236
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6734001636505127,
      "learning_rate": 0.0005886512078703326,
      "loss": 2.8628,
      "step": 20237
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8860259056091309,
      "learning_rate": 0.000588650093380682,
      "loss": 2.9606,
      "step": 20238
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.2752459049224854,
      "learning_rate": 0.000588648978837366,
      "loss": 3.3051,
      "step": 20239
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7179172039031982,
      "learning_rate": 0.0005886478642403847,
      "loss": 3.2083,
      "step": 20240
    },
    {
      "epoch": 0.26,
      "grad_norm": 4.7555341720581055,
      "learning_rate": 0.0005886467495897383,
      "loss": 3.0279,
      "step": 20241
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9940292835235596,
      "learning_rate": 0.0005886456348854269,
      "loss": 3.0031,
      "step": 20242
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.88200843334198,
      "learning_rate": 0.000588644520127451,
      "loss": 3.3435,
      "step": 20243
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.7681326866149902,
      "learning_rate": 0.0005886434053158106,
      "loss": 3.2076,
      "step": 20244
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9155752658843994,
      "learning_rate": 0.0005886422904505059,
      "loss": 3.0191,
      "step": 20245
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4597128629684448,
      "learning_rate": 0.0005886411755315371,
      "loss": 3.2879,
      "step": 20246
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.570676565170288,
      "learning_rate": 0.0005886400605589045,
      "loss": 3.1078,
      "step": 20247
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0702507495880127,
      "learning_rate": 0.0005886389455326083,
      "loss": 3.3058,
      "step": 20248
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7465636730194092,
      "learning_rate": 0.0005886378304526486,
      "loss": 3.2001,
      "step": 20249
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.226630449295044,
      "learning_rate": 0.0005886367153190258,
      "loss": 3.2242,
      "step": 20250
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.2654688358306885,
      "learning_rate": 0.0005886356001317399,
      "loss": 3.0162,
      "step": 20251
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4475820064544678,
      "learning_rate": 0.0005886344848907912,
      "loss": 3.0823,
      "step": 20252
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5721361637115479,
      "learning_rate": 0.0005886333695961799,
      "loss": 3.0793,
      "step": 20253
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6469712257385254,
      "learning_rate": 0.0005886322542479062,
      "loss": 2.9053,
      "step": 20254
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3297327756881714,
      "learning_rate": 0.0005886311388459704,
      "loss": 3.1979,
      "step": 20255
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4835000038146973,
      "learning_rate": 0.0005886300233903725,
      "loss": 2.9767,
      "step": 20256
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4917081594467163,
      "learning_rate": 0.0005886289078811128,
      "loss": 2.9139,
      "step": 20257
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3945443630218506,
      "learning_rate": 0.0005886277923181916,
      "loss": 3.1648,
      "step": 20258
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4025928974151611,
      "learning_rate": 0.0005886266767016092,
      "loss": 3.2112,
      "step": 20259
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0257041454315186,
      "learning_rate": 0.0005886255610313654,
      "loss": 3.1956,
      "step": 20260
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5738754272460938,
      "learning_rate": 0.0005886244453074608,
      "loss": 3.3326,
      "step": 20261
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4175941944122314,
      "learning_rate": 0.0005886233295298955,
      "loss": 2.8629,
      "step": 20262
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6417624950408936,
      "learning_rate": 0.0005886222136986696,
      "loss": 3.1504,
      "step": 20263
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7085387706756592,
      "learning_rate": 0.0005886210978137834,
      "loss": 3.0223,
      "step": 20264
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.6735832691192627,
      "learning_rate": 0.0005886199818752371,
      "loss": 3.2003,
      "step": 20265
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.644694209098816,
      "learning_rate": 0.0005886188658830309,
      "loss": 2.9449,
      "step": 20266
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.741429090499878,
      "learning_rate": 0.0005886177498371651,
      "loss": 3.3228,
      "step": 20267
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.2575666904449463,
      "learning_rate": 0.0005886166337376395,
      "loss": 3.2888,
      "step": 20268
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7476770877838135,
      "learning_rate": 0.0005886155175844549,
      "loss": 3.1458,
      "step": 20269
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4228246212005615,
      "learning_rate": 0.0005886144013776112,
      "loss": 3.1291,
      "step": 20270
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0472261905670166,
      "learning_rate": 0.0005886132851171085,
      "loss": 3.219,
      "step": 20271
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.454437017440796,
      "learning_rate": 0.0005886121688029473,
      "loss": 2.9704,
      "step": 20272
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.0092270374298096,
      "learning_rate": 0.0005886110524351275,
      "loss": 2.9896,
      "step": 20273
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.398208498954773,
      "learning_rate": 0.0005886099360136495,
      "loss": 2.9804,
      "step": 20274
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.554722547531128,
      "learning_rate": 0.0005886088195385137,
      "loss": 3.2392,
      "step": 20275
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.294929027557373,
      "learning_rate": 0.0005886077030097198,
      "loss": 2.993,
      "step": 20276
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.5478098392486572,
      "learning_rate": 0.0005886065864272683,
      "loss": 2.8413,
      "step": 20277
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.267547369003296,
      "learning_rate": 0.0005886054697911595,
      "loss": 3.0988,
      "step": 20278
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.001225471496582,
      "learning_rate": 0.0005886043531013935,
      "loss": 3.1543,
      "step": 20279
    },
    {
      "epoch": 0.26,
      "grad_norm": 5.698742866516113,
      "learning_rate": 0.0005886032363579703,
      "loss": 3.0099,
      "step": 20280
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.4548099040985107,
      "learning_rate": 0.0005886021195608905,
      "loss": 3.0626,
      "step": 20281
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3942086696624756,
      "learning_rate": 0.0005886010027101542,
      "loss": 3.1207,
      "step": 20282
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.1091883182525635,
      "learning_rate": 0.0005885998858057613,
      "loss": 3.0516,
      "step": 20283
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7265573740005493,
      "learning_rate": 0.0005885987688477122,
      "loss": 2.9364,
      "step": 20284
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.872806191444397,
      "learning_rate": 0.0005885976518360073,
      "loss": 3.1091,
      "step": 20285
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4677659273147583,
      "learning_rate": 0.0005885965347706466,
      "loss": 3.3494,
      "step": 20286
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8067176342010498,
      "learning_rate": 0.0005885954176516304,
      "loss": 3.1363,
      "step": 20287
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.3745601177215576,
      "learning_rate": 0.0005885943004789589,
      "loss": 2.9647,
      "step": 20288
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.143975019454956,
      "learning_rate": 0.0005885931832526322,
      "loss": 3.0296,
      "step": 20289
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8559167385101318,
      "learning_rate": 0.0005885920659726506,
      "loss": 3.0424,
      "step": 20290
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.1750893592834473,
      "learning_rate": 0.0005885909486390143,
      "loss": 2.9287,
      "step": 20291
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3671178817749023,
      "learning_rate": 0.0005885898312517234,
      "loss": 3.2232,
      "step": 20292
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5606213808059692,
      "learning_rate": 0.0005885887138107783,
      "loss": 2.9499,
      "step": 20293
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8071762323379517,
      "learning_rate": 0.0005885875963161792,
      "loss": 3.1214,
      "step": 20294
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3546262979507446,
      "learning_rate": 0.000588586478767926,
      "loss": 3.1019,
      "step": 20295
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5484769344329834,
      "learning_rate": 0.0005885853611660193,
      "loss": 3.0778,
      "step": 20296
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.5312633514404297,
      "learning_rate": 0.000588584243510459,
      "loss": 3.316,
      "step": 20297
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5339277982711792,
      "learning_rate": 0.0005885831258012456,
      "loss": 3.0445,
      "step": 20298
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4526112079620361,
      "learning_rate": 0.000588582008038379,
      "loss": 2.9515,
      "step": 20299
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8263791799545288,
      "learning_rate": 0.0005885808902218596,
      "loss": 3.0439,
      "step": 20300
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8205246925354004,
      "learning_rate": 0.0005885797723516876,
      "loss": 3.1337,
      "step": 20301
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.901171326637268,
      "learning_rate": 0.0005885786544278631,
      "loss": 3.3385,
      "step": 20302
    },
    {
      "epoch": 0.26,
      "grad_norm": 3.1655373573303223,
      "learning_rate": 0.0005885775364503865,
      "loss": 3.0675,
      "step": 20303
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.5289146900177,
      "learning_rate": 0.0005885764184192577,
      "loss": 3.2778,
      "step": 20304
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5776901245117188,
      "learning_rate": 0.0005885753003344773,
      "loss": 3.0643,
      "step": 20305
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.87478506565094,
      "learning_rate": 0.0005885741821960453,
      "loss": 3.1467,
      "step": 20306
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.550865650177002,
      "learning_rate": 0.0005885730640039617,
      "loss": 3.1699,
      "step": 20307
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.294464111328125,
      "learning_rate": 0.0005885719457582271,
      "loss": 2.9371,
      "step": 20308
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7864779233932495,
      "learning_rate": 0.0005885708274588415,
      "loss": 3.0384,
      "step": 20309
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.6516456604003906,
      "learning_rate": 0.000588569709105805,
      "loss": 2.9911,
      "step": 20310
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9362177848815918,
      "learning_rate": 0.0005885685906991182,
      "loss": 2.9917,
      "step": 20311
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3173553943634033,
      "learning_rate": 0.000588567472238781,
      "loss": 2.8577,
      "step": 20312
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.971515417098999,
      "learning_rate": 0.0005885663537247935,
      "loss": 2.9788,
      "step": 20313
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9466179609298706,
      "learning_rate": 0.0005885652351571561,
      "loss": 3.0737,
      "step": 20314
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3642297983169556,
      "learning_rate": 0.000588564116535869,
      "loss": 3.0073,
      "step": 20315
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6736390590667725,
      "learning_rate": 0.0005885629978609324,
      "loss": 3.1698,
      "step": 20316
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.1921613216400146,
      "learning_rate": 0.0005885618791323465,
      "loss": 3.2852,
      "step": 20317
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0664610862731934,
      "learning_rate": 0.0005885607603501116,
      "loss": 3.2524,
      "step": 20318
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5450960397720337,
      "learning_rate": 0.0005885596415142276,
      "loss": 3.043,
      "step": 20319
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7479535341262817,
      "learning_rate": 0.000588558522624695,
      "loss": 2.8377,
      "step": 20320
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.9776201248168945,
      "learning_rate": 0.000588557403681514,
      "loss": 3.2296,
      "step": 20321
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8864277601242065,
      "learning_rate": 0.0005885562846846846,
      "loss": 3.0759,
      "step": 20322
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5617156028747559,
      "learning_rate": 0.0005885551656342072,
      "loss": 3.0323,
      "step": 20323
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0141611099243164,
      "learning_rate": 0.000588554046530082,
      "loss": 3.1197,
      "step": 20324
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6188732385635376,
      "learning_rate": 0.0005885529273723092,
      "loss": 3.0541,
      "step": 20325
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.05373215675354,
      "learning_rate": 0.0005885518081608888,
      "loss": 3.0622,
      "step": 20326
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4672925472259521,
      "learning_rate": 0.0005885506888958212,
      "loss": 3.1744,
      "step": 20327
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.1960948705673218,
      "learning_rate": 0.0005885495695771067,
      "loss": 2.9856,
      "step": 20328
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.8515084981918335,
      "learning_rate": 0.0005885484502047453,
      "loss": 2.9829,
      "step": 20329
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9541070461273193,
      "learning_rate": 0.0005885473307787372,
      "loss": 3.0018,
      "step": 20330
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.41145658493042,
      "learning_rate": 0.000588546211299083,
      "loss": 2.8196,
      "step": 20331
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.4792370796203613,
      "learning_rate": 0.0005885450917657824,
      "loss": 2.9496,
      "step": 20332
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.0628554821014404,
      "learning_rate": 0.0005885439721788358,
      "loss": 2.99,
      "step": 20333
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.588032603263855,
      "learning_rate": 0.0005885428525382435,
      "loss": 2.9624,
      "step": 20334
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5096862316131592,
      "learning_rate": 0.0005885417328440057,
      "loss": 2.9268,
      "step": 20335
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5050934553146362,
      "learning_rate": 0.0005885406130961224,
      "loss": 3.0569,
      "step": 20336
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3691036701202393,
      "learning_rate": 0.000588539493294594,
      "loss": 3.0044,
      "step": 20337
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7745534181594849,
      "learning_rate": 0.0005885383734394208,
      "loss": 3.0607,
      "step": 20338
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7188745737075806,
      "learning_rate": 0.0005885372535306026,
      "loss": 3.0452,
      "step": 20339
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7763928174972534,
      "learning_rate": 0.00058853613356814,
      "loss": 3.1164,
      "step": 20340
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3876343965530396,
      "learning_rate": 0.0005885350135520332,
      "loss": 3.1285,
      "step": 20341
    },
    {
      "epoch": 0.26,
      "grad_norm": 2.7016994953155518,
      "learning_rate": 0.0005885338934822821,
      "loss": 2.9735,
      "step": 20342
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6187702417373657,
      "learning_rate": 0.0005885327733588872,
      "loss": 3.233,
      "step": 20343
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.5189708471298218,
      "learning_rate": 0.0005885316531818487,
      "loss": 3.2455,
      "step": 20344
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6979378461837769,
      "learning_rate": 0.0005885305329511666,
      "loss": 3.0465,
      "step": 20345
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3436321020126343,
      "learning_rate": 0.0005885294126668413,
      "loss": 2.8455,
      "step": 20346
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.4729236364364624,
      "learning_rate": 0.0005885282923288729,
      "loss": 3.007,
      "step": 20347
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3324435949325562,
      "learning_rate": 0.0005885271719372617,
      "loss": 3.0704,
      "step": 20348
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.6510872840881348,
      "learning_rate": 0.0005885260514920077,
      "loss": 3.0845,
      "step": 20349
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.3331571817398071,
      "learning_rate": 0.0005885249309931115,
      "loss": 3.172,
      "step": 20350
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.7829554080963135,
      "learning_rate": 0.0005885238104405728,
      "loss": 2.9599,
      "step": 20351
    },
    {
      "epoch": 0.26,
      "grad_norm": 1.9102615118026733,
      "learning_rate": 0.0005885226898343923,
      "loss": 3.0727,
      "step": 20352
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4500312805175781,
      "learning_rate": 0.0005885215691745699,
      "loss": 3.3488,
      "step": 20353
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4080426692962646,
      "learning_rate": 0.0005885204484611059,
      "loss": 3.0874,
      "step": 20354
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2812970876693726,
      "learning_rate": 0.0005885193276940004,
      "loss": 3.1316,
      "step": 20355
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2168291807174683,
      "learning_rate": 0.0005885182068732539,
      "loss": 2.7786,
      "step": 20356
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6169722080230713,
      "learning_rate": 0.0005885170859988663,
      "loss": 3.23,
      "step": 20357
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5975496768951416,
      "learning_rate": 0.0005885159650708379,
      "loss": 3.1434,
      "step": 20358
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4388854503631592,
      "learning_rate": 0.000588514844089169,
      "loss": 3.0795,
      "step": 20359
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3407487869262695,
      "learning_rate": 0.0005885137230538597,
      "loss": 3.121,
      "step": 20360
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3350735902786255,
      "learning_rate": 0.0005885126019649102,
      "loss": 3.1979,
      "step": 20361
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4331982135772705,
      "learning_rate": 0.0005885114808223209,
      "loss": 3.4816,
      "step": 20362
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3306989669799805,
      "learning_rate": 0.0005885103596260918,
      "loss": 3.0255,
      "step": 20363
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5118374824523926,
      "learning_rate": 0.0005885092383762232,
      "loss": 2.9776,
      "step": 20364
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4745160341262817,
      "learning_rate": 0.0005885081170727153,
      "loss": 2.8682,
      "step": 20365
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.720007061958313,
      "learning_rate": 0.0005885069957155683,
      "loss": 3.0743,
      "step": 20366
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.434006690979004,
      "learning_rate": 0.0005885058743047823,
      "loss": 2.9841,
      "step": 20367
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.429025650024414,
      "learning_rate": 0.0005885047528403577,
      "loss": 3.2593,
      "step": 20368
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5506548881530762,
      "learning_rate": 0.0005885036313222946,
      "loss": 3.0659,
      "step": 20369
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5642026662826538,
      "learning_rate": 0.0005885025097505933,
      "loss": 3.2209,
      "step": 20370
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3289785385131836,
      "learning_rate": 0.0005885013881252538,
      "loss": 3.2655,
      "step": 20371
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9775484800338745,
      "learning_rate": 0.0005885002664462765,
      "loss": 2.9868,
      "step": 20372
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5848616361618042,
      "learning_rate": 0.0005884991447136616,
      "loss": 3.1471,
      "step": 20373
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.1852777004241943,
      "learning_rate": 0.0005884980229274093,
      "loss": 3.1948,
      "step": 20374
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.7210464477539062,
      "learning_rate": 0.0005884969010875197,
      "loss": 3.0961,
      "step": 20375
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7535744905471802,
      "learning_rate": 0.000588495779193993,
      "loss": 3.1994,
      "step": 20376
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.664313554763794,
      "learning_rate": 0.0005884946572468296,
      "loss": 3.4003,
      "step": 20377
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5447825193405151,
      "learning_rate": 0.0005884935352460296,
      "loss": 2.8593,
      "step": 20378
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3613754510879517,
      "learning_rate": 0.0005884924131915932,
      "loss": 3.1831,
      "step": 20379
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4792670011520386,
      "learning_rate": 0.0005884912910835206,
      "loss": 3.1699,
      "step": 20380
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.1309571266174316,
      "learning_rate": 0.000588490168921812,
      "loss": 3.0312,
      "step": 20381
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5854427814483643,
      "learning_rate": 0.0005884890467064677,
      "loss": 3.2366,
      "step": 20382
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7529363632202148,
      "learning_rate": 0.0005884879244374878,
      "loss": 3.0791,
      "step": 20383
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4576374292373657,
      "learning_rate": 0.0005884868021148724,
      "loss": 3.293,
      "step": 20384
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5579769611358643,
      "learning_rate": 0.0005884856797386221,
      "loss": 3.1319,
      "step": 20385
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5645663738250732,
      "learning_rate": 0.0005884845573087368,
      "loss": 3.0991,
      "step": 20386
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6404362916946411,
      "learning_rate": 0.0005884834348252166,
      "loss": 2.9567,
      "step": 20387
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8468290567398071,
      "learning_rate": 0.0005884823122880621,
      "loss": 2.8681,
      "step": 20388
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.090017318725586,
      "learning_rate": 0.0005884811896972733,
      "loss": 2.9995,
      "step": 20389
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.676912546157837,
      "learning_rate": 0.0005884800670528502,
      "loss": 3.2391,
      "step": 20390
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7220087051391602,
      "learning_rate": 0.0005884789443547934,
      "loss": 3.1053,
      "step": 20391
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9551422595977783,
      "learning_rate": 0.0005884778216031028,
      "loss": 3.193,
      "step": 20392
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.98307466506958,
      "learning_rate": 0.0005884766987977789,
      "loss": 2.8466,
      "step": 20393
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6601059436798096,
      "learning_rate": 0.0005884755759388216,
      "loss": 3.2318,
      "step": 20394
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9200044870376587,
      "learning_rate": 0.0005884744530262312,
      "loss": 3.0224,
      "step": 20395
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8912136554718018,
      "learning_rate": 0.0005884733300600079,
      "loss": 3.3176,
      "step": 20396
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.7534239292144775,
      "learning_rate": 0.0005884722070401522,
      "loss": 3.0206,
      "step": 20397
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7750695943832397,
      "learning_rate": 0.0005884710839666639,
      "loss": 2.9029,
      "step": 20398
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.0167617797851562,
      "learning_rate": 0.0005884699608395434,
      "loss": 3.1081,
      "step": 20399
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.952223300933838,
      "learning_rate": 0.0005884688376587909,
      "loss": 3.2304,
      "step": 20400
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6439203023910522,
      "learning_rate": 0.0005884677144244066,
      "loss": 3.5574,
      "step": 20401
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8648135662078857,
      "learning_rate": 0.0005884665911363908,
      "loss": 2.9044,
      "step": 20402
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2980823516845703,
      "learning_rate": 0.0005884654677947435,
      "loss": 3.1897,
      "step": 20403
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0069949626922607,
      "learning_rate": 0.000588464344399465,
      "loss": 2.8071,
      "step": 20404
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1964441537857056,
      "learning_rate": 0.0005884632209505556,
      "loss": 3.0997,
      "step": 20405
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3838735818862915,
      "learning_rate": 0.0005884620974480155,
      "loss": 2.9316,
      "step": 20406
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4776341915130615,
      "learning_rate": 0.0005884609738918448,
      "loss": 3.1116,
      "step": 20407
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.572637915611267,
      "learning_rate": 0.0005884598502820437,
      "loss": 3.0045,
      "step": 20408
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2550297975540161,
      "learning_rate": 0.0005884587266186125,
      "loss": 3.1427,
      "step": 20409
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9670522212982178,
      "learning_rate": 0.0005884576029015514,
      "loss": 2.9668,
      "step": 20410
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3372126817703247,
      "learning_rate": 0.0005884564791308605,
      "loss": 3.3191,
      "step": 20411
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5160678625106812,
      "learning_rate": 0.0005884553553065403,
      "loss": 3.0156,
      "step": 20412
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.456561326980591,
      "learning_rate": 0.0005884542314285906,
      "loss": 3.1608,
      "step": 20413
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5182517766952515,
      "learning_rate": 0.0005884531074970119,
      "loss": 2.7982,
      "step": 20414
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5006355047225952,
      "learning_rate": 0.0005884519835118044,
      "loss": 3.064,
      "step": 20415
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4116348028182983,
      "learning_rate": 0.0005884508594729681,
      "loss": 3.1236,
      "step": 20416
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3684030771255493,
      "learning_rate": 0.0005884497353805033,
      "loss": 3.1669,
      "step": 20417
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3998639583587646,
      "learning_rate": 0.0005884486112344104,
      "loss": 3.2119,
      "step": 20418
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3008700609207153,
      "learning_rate": 0.0005884474870346894,
      "loss": 3.0811,
      "step": 20419
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3837031126022339,
      "learning_rate": 0.0005884463627813405,
      "loss": 2.6983,
      "step": 20420
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2719767093658447,
      "learning_rate": 0.000588445238474364,
      "loss": 3.2365,
      "step": 20421
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3909825086593628,
      "learning_rate": 0.0005884441141137602,
      "loss": 2.943,
      "step": 20422
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4088075160980225,
      "learning_rate": 0.0005884429896995291,
      "loss": 2.9931,
      "step": 20423
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2877857685089111,
      "learning_rate": 0.000588441865231671,
      "loss": 3.3473,
      "step": 20424
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.472533106803894,
      "learning_rate": 0.0005884407407101862,
      "loss": 3.0833,
      "step": 20425
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.2050974369049072,
      "learning_rate": 0.0005884396161350747,
      "loss": 3.1681,
      "step": 20426
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8311251401901245,
      "learning_rate": 0.0005884384915063369,
      "loss": 3.1153,
      "step": 20427
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7137048244476318,
      "learning_rate": 0.0005884373668239729,
      "loss": 2.9577,
      "step": 20428
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6558748483657837,
      "learning_rate": 0.000588436242087983,
      "loss": 2.9947,
      "step": 20429
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.347800374031067,
      "learning_rate": 0.0005884351172983673,
      "loss": 3.1173,
      "step": 20430
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.252985715866089,
      "learning_rate": 0.0005884339924551261,
      "loss": 3.084,
      "step": 20431
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6498987674713135,
      "learning_rate": 0.0005884328675582596,
      "loss": 2.9753,
      "step": 20432
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3254045248031616,
      "learning_rate": 0.0005884317426077679,
      "loss": 3.1319,
      "step": 20433
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.461708426475525,
      "learning_rate": 0.0005884306176036514,
      "loss": 3.2256,
      "step": 20434
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.481142520904541,
      "learning_rate": 0.0005884294925459101,
      "loss": 3.2406,
      "step": 20435
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.648965358734131,
      "learning_rate": 0.0005884283674345444,
      "loss": 3.0517,
      "step": 20436
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4813629388809204,
      "learning_rate": 0.0005884272422695543,
      "loss": 3.2165,
      "step": 20437
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.817213535308838,
      "learning_rate": 0.0005884261170509403,
      "loss": 3.0866,
      "step": 20438
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8695727586746216,
      "learning_rate": 0.0005884249917787024,
      "loss": 3.1063,
      "step": 20439
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9822113513946533,
      "learning_rate": 0.0005884238664528408,
      "loss": 3.053,
      "step": 20440
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5712213516235352,
      "learning_rate": 0.0005884227410733558,
      "loss": 3.2423,
      "step": 20441
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.297304630279541,
      "learning_rate": 0.0005884216156402474,
      "loss": 3.2793,
      "step": 20442
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8215044736862183,
      "learning_rate": 0.0005884204901535162,
      "loss": 2.9636,
      "step": 20443
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.368695616722107,
      "learning_rate": 0.0005884193646131621,
      "loss": 3.0018,
      "step": 20444
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.661807894706726,
      "learning_rate": 0.0005884182390191854,
      "loss": 3.1398,
      "step": 20445
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8532642126083374,
      "learning_rate": 0.0005884171133715864,
      "loss": 2.8891,
      "step": 20446
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.1441543102264404,
      "learning_rate": 0.000588415987670365,
      "loss": 3.0994,
      "step": 20447
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.1747872829437256,
      "learning_rate": 0.0005884148619155217,
      "loss": 3.1443,
      "step": 20448
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7558804750442505,
      "learning_rate": 0.0005884137361070568,
      "loss": 2.9822,
      "step": 20449
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7128185033798218,
      "learning_rate": 0.0005884126102449701,
      "loss": 3.1105,
      "step": 20450
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4775357246398926,
      "learning_rate": 0.0005884114843292623,
      "loss": 2.9399,
      "step": 20451
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5401780605316162,
      "learning_rate": 0.0005884103583599331,
      "loss": 3.1609,
      "step": 20452
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.342678427696228,
      "learning_rate": 0.0005884092323369831,
      "loss": 3.361,
      "step": 20453
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.575719952583313,
      "learning_rate": 0.0005884081062604124,
      "loss": 3.0436,
      "step": 20454
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5838518142700195,
      "learning_rate": 0.0005884069801302212,
      "loss": 3.0754,
      "step": 20455
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2124260663986206,
      "learning_rate": 0.0005884058539464096,
      "loss": 3.297,
      "step": 20456
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5826200246810913,
      "learning_rate": 0.0005884047277089781,
      "loss": 3.085,
      "step": 20457
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.5661633014678955,
      "learning_rate": 0.0005884036014179266,
      "loss": 3.2245,
      "step": 20458
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8040475845336914,
      "learning_rate": 0.0005884024750732553,
      "loss": 3.0107,
      "step": 20459
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7753827571868896,
      "learning_rate": 0.0005884013486749647,
      "loss": 3.0703,
      "step": 20460
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.4802651405334473,
      "learning_rate": 0.0005884002222230548,
      "loss": 3.2619,
      "step": 20461
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5737378597259521,
      "learning_rate": 0.0005883990957175259,
      "loss": 3.0253,
      "step": 20462
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3301085233688354,
      "learning_rate": 0.0005883979691583781,
      "loss": 3.1736,
      "step": 20463
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.594320297241211,
      "learning_rate": 0.0005883968425456117,
      "loss": 3.2275,
      "step": 20464
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.618004560470581,
      "learning_rate": 0.0005883957158792268,
      "loss": 3.1268,
      "step": 20465
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4544020891189575,
      "learning_rate": 0.0005883945891592238,
      "loss": 3.2058,
      "step": 20466
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7572031021118164,
      "learning_rate": 0.0005883934623856027,
      "loss": 2.9208,
      "step": 20467
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.575584888458252,
      "learning_rate": 0.000588392335558364,
      "loss": 3.1779,
      "step": 20468
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2878410816192627,
      "learning_rate": 0.0005883912086775075,
      "loss": 2.9276,
      "step": 20469
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9146524667739868,
      "learning_rate": 0.0005883900817430337,
      "loss": 3.1064,
      "step": 20470
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.2969281673431396,
      "learning_rate": 0.0005883889547549429,
      "loss": 3.0547,
      "step": 20471
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5514205694198608,
      "learning_rate": 0.000588387827713235,
      "loss": 3.1033,
      "step": 20472
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.631809711456299,
      "learning_rate": 0.0005883867006179104,
      "loss": 3.0612,
      "step": 20473
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.734442114830017,
      "learning_rate": 0.0005883855734689692,
      "loss": 3.0233,
      "step": 20474
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6290284395217896,
      "learning_rate": 0.0005883844462664117,
      "loss": 2.9342,
      "step": 20475
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4295082092285156,
      "learning_rate": 0.0005883833190102381,
      "loss": 3.0803,
      "step": 20476
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6246670484542847,
      "learning_rate": 0.0005883821917004486,
      "loss": 3.154,
      "step": 20477
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6902978420257568,
      "learning_rate": 0.0005883810643370434,
      "loss": 3.2437,
      "step": 20478
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6515995264053345,
      "learning_rate": 0.0005883799369200228,
      "loss": 3.1172,
      "step": 20479
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5986864566802979,
      "learning_rate": 0.0005883788094493868,
      "loss": 3.1718,
      "step": 20480
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5386912822723389,
      "learning_rate": 0.0005883776819251359,
      "loss": 3.098,
      "step": 20481
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5665017366409302,
      "learning_rate": 0.00058837655434727,
      "loss": 3.2467,
      "step": 20482
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.9279232025146484,
      "learning_rate": 0.0005883754267157895,
      "loss": 3.0647,
      "step": 20483
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4900411367416382,
      "learning_rate": 0.0005883742990306946,
      "loss": 3.2168,
      "step": 20484
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.1421828269958496,
      "learning_rate": 0.0005883731712919854,
      "loss": 3.0667,
      "step": 20485
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.600248098373413,
      "learning_rate": 0.0005883720434996623,
      "loss": 3.207,
      "step": 20486
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6388310194015503,
      "learning_rate": 0.0005883709156537252,
      "loss": 3.1277,
      "step": 20487
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.780473232269287,
      "learning_rate": 0.0005883697877541747,
      "loss": 3.2623,
      "step": 20488
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7555532455444336,
      "learning_rate": 0.0005883686598010107,
      "loss": 3.3529,
      "step": 20489
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6632157564163208,
      "learning_rate": 0.0005883675317942336,
      "loss": 3.0885,
      "step": 20490
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.9911553859710693,
      "learning_rate": 0.0005883664037338433,
      "loss": 3.0628,
      "step": 20491
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8385342359542847,
      "learning_rate": 0.0005883652756198405,
      "loss": 2.7391,
      "step": 20492
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5516366958618164,
      "learning_rate": 0.0005883641474522252,
      "loss": 3.1066,
      "step": 20493
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3232896327972412,
      "learning_rate": 0.0005883630192309974,
      "loss": 2.9758,
      "step": 20494
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.6907153129577637,
      "learning_rate": 0.0005883618909561574,
      "loss": 3.2944,
      "step": 20495
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5457448959350586,
      "learning_rate": 0.0005883607626277057,
      "loss": 3.231,
      "step": 20496
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6015055179595947,
      "learning_rate": 0.0005883596342456421,
      "loss": 3.0423,
      "step": 20497
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4928035736083984,
      "learning_rate": 0.0005883585058099671,
      "loss": 3.3378,
      "step": 20498
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5617890357971191,
      "learning_rate": 0.0005883573773206808,
      "loss": 3.0971,
      "step": 20499
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7344900369644165,
      "learning_rate": 0.0005883562487777835,
      "loss": 3.0842,
      "step": 20500
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.491599678993225,
      "learning_rate": 0.0005883551201812752,
      "loss": 3.3871,
      "step": 20501
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.2383053302764893,
      "learning_rate": 0.0005883539915311562,
      "loss": 3.0312,
      "step": 20502
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.61922025680542,
      "learning_rate": 0.0005883528628274269,
      "loss": 3.1458,
      "step": 20503
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8549160957336426,
      "learning_rate": 0.0005883517340700874,
      "loss": 3.1039,
      "step": 20504
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.501321792602539,
      "learning_rate": 0.0005883506052591377,
      "loss": 3.1525,
      "step": 20505
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.8364996910095215,
      "learning_rate": 0.0005883494763945782,
      "loss": 3.1375,
      "step": 20506
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5517257452011108,
      "learning_rate": 0.0005883483474764092,
      "loss": 3.2105,
      "step": 20507
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6639620065689087,
      "learning_rate": 0.0005883472185046308,
      "loss": 3.0498,
      "step": 20508
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4514728784561157,
      "learning_rate": 0.000588346089479243,
      "loss": 3.2865,
      "step": 20509
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.8068459033966064,
      "learning_rate": 0.0005883449604002464,
      "loss": 3.0831,
      "step": 20510
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5027827024459839,
      "learning_rate": 0.0005883438312676409,
      "loss": 2.9578,
      "step": 20511
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5864248275756836,
      "learning_rate": 0.000588342702081427,
      "loss": 3.045,
      "step": 20512
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5291352272033691,
      "learning_rate": 0.0005883415728416047,
      "loss": 3.1833,
      "step": 20513
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8204199075698853,
      "learning_rate": 0.0005883404435481742,
      "loss": 2.9826,
      "step": 20514
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.606811285018921,
      "learning_rate": 0.0005883393142011358,
      "loss": 3.1926,
      "step": 20515
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9921932220458984,
      "learning_rate": 0.0005883381848004897,
      "loss": 2.9369,
      "step": 20516
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.040159225463867,
      "learning_rate": 0.0005883370553462361,
      "loss": 2.9158,
      "step": 20517
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4522016048431396,
      "learning_rate": 0.0005883359258383752,
      "loss": 3.2112,
      "step": 20518
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6724791526794434,
      "learning_rate": 0.0005883347962769072,
      "loss": 2.9839,
      "step": 20519
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8568984270095825,
      "learning_rate": 0.0005883336666618323,
      "loss": 3.0184,
      "step": 20520
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6450855731964111,
      "learning_rate": 0.0005883325369931507,
      "loss": 3.0804,
      "step": 20521
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4830074310302734,
      "learning_rate": 0.0005883314072708626,
      "loss": 3.1047,
      "step": 20522
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4640425443649292,
      "learning_rate": 0.0005883302774949684,
      "loss": 3.1373,
      "step": 20523
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4409267902374268,
      "learning_rate": 0.0005883291476654681,
      "loss": 3.0757,
      "step": 20524
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7436102628707886,
      "learning_rate": 0.0005883280177823619,
      "loss": 3.1743,
      "step": 20525
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7809711694717407,
      "learning_rate": 0.0005883268878456501,
      "loss": 3.0216,
      "step": 20526
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5917644500732422,
      "learning_rate": 0.000588325757855333,
      "loss": 3.2079,
      "step": 20527
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8820626735687256,
      "learning_rate": 0.0005883246278114105,
      "loss": 3.339,
      "step": 20528
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.527557134628296,
      "learning_rate": 0.0005883234977138831,
      "loss": 3.2295,
      "step": 20529
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.1286356449127197,
      "learning_rate": 0.0005883223675627509,
      "loss": 3.3375,
      "step": 20530
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5611803531646729,
      "learning_rate": 0.0005883212373580143,
      "loss": 3.2736,
      "step": 20531
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3901410102844238,
      "learning_rate": 0.0005883201070996731,
      "loss": 3.2728,
      "step": 20532
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0076773166656494,
      "learning_rate": 0.000588318976787728,
      "loss": 3.1368,
      "step": 20533
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4602622985839844,
      "learning_rate": 0.0005883178464221788,
      "loss": 3.1091,
      "step": 20534
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4182243347167969,
      "learning_rate": 0.0005883167160030258,
      "loss": 3.1539,
      "step": 20535
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.1600961685180664,
      "learning_rate": 0.0005883155855302693,
      "loss": 3.1606,
      "step": 20536
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4172511100769043,
      "learning_rate": 0.0005883144550039096,
      "loss": 2.9327,
      "step": 20537
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.273963689804077,
      "learning_rate": 0.0005883133244239467,
      "loss": 3.339,
      "step": 20538
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.6110708713531494,
      "learning_rate": 0.0005883121937903811,
      "loss": 3.2452,
      "step": 20539
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.421933650970459,
      "learning_rate": 0.0005883110631032126,
      "loss": 3.3916,
      "step": 20540
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7573734521865845,
      "learning_rate": 0.0005883099323624418,
      "loss": 3.5775,
      "step": 20541
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3831462860107422,
      "learning_rate": 0.0005883088015680687,
      "loss": 3.0985,
      "step": 20542
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4756243228912354,
      "learning_rate": 0.0005883076707200934,
      "loss": 3.3433,
      "step": 20543
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.419620156288147,
      "learning_rate": 0.0005883065398185164,
      "loss": 2.9192,
      "step": 20544
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1626496315002441,
      "learning_rate": 0.0005883054088633378,
      "loss": 3.0498,
      "step": 20545
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.441024899482727,
      "learning_rate": 0.0005883042778545577,
      "loss": 3.1294,
      "step": 20546
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4728153944015503,
      "learning_rate": 0.0005883031467921764,
      "loss": 3.0905,
      "step": 20547
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4394150972366333,
      "learning_rate": 0.0005883020156761941,
      "loss": 3.3382,
      "step": 20548
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.376024603843689,
      "learning_rate": 0.000588300884506611,
      "loss": 3.0574,
      "step": 20549
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4487800598144531,
      "learning_rate": 0.0005882997532834274,
      "loss": 3.1075,
      "step": 20550
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4980324506759644,
      "learning_rate": 0.0005882986220066435,
      "loss": 3.2595,
      "step": 20551
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.077732801437378,
      "learning_rate": 0.0005882974906762593,
      "loss": 3.0281,
      "step": 20552
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6742125749588013,
      "learning_rate": 0.0005882963592922752,
      "loss": 3.2685,
      "step": 20553
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.5491390228271484,
      "learning_rate": 0.0005882952278546914,
      "loss": 2.9046,
      "step": 20554
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.2780940532684326,
      "learning_rate": 0.0005882940963635079,
      "loss": 3.1562,
      "step": 20555
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9958817958831787,
      "learning_rate": 0.0005882929648187253,
      "loss": 2.6068,
      "step": 20556
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.524962067604065,
      "learning_rate": 0.0005882918332203435,
      "loss": 2.7965,
      "step": 20557
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.150414228439331,
      "learning_rate": 0.0005882907015683629,
      "loss": 2.9998,
      "step": 20558
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8914095163345337,
      "learning_rate": 0.0005882895698627835,
      "loss": 3.2188,
      "step": 20559
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.466268539428711,
      "learning_rate": 0.0005882884381036057,
      "loss": 3.297,
      "step": 20560
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8546303510665894,
      "learning_rate": 0.0005882873062908296,
      "loss": 2.8451,
      "step": 20561
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9740159511566162,
      "learning_rate": 0.0005882861744244554,
      "loss": 3.0175,
      "step": 20562
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3873367309570312,
      "learning_rate": 0.0005882850425044834,
      "loss": 2.8419,
      "step": 20563
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.7148406505584717,
      "learning_rate": 0.0005882839105309138,
      "loss": 3.0624,
      "step": 20564
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0900838375091553,
      "learning_rate": 0.0005882827785037467,
      "loss": 2.97,
      "step": 20565
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4264471530914307,
      "learning_rate": 0.0005882816464229825,
      "loss": 3.1337,
      "step": 20566
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5216883420944214,
      "learning_rate": 0.0005882805142886212,
      "loss": 3.2318,
      "step": 20567
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.704971194267273,
      "learning_rate": 0.0005882793821006631,
      "loss": 3.1725,
      "step": 20568
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3316371440887451,
      "learning_rate": 0.0005882782498591085,
      "loss": 2.8565,
      "step": 20569
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3711751699447632,
      "learning_rate": 0.0005882771175639575,
      "loss": 3.0655,
      "step": 20570
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7162824869155884,
      "learning_rate": 0.0005882759852152104,
      "loss": 3.1249,
      "step": 20571
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3523768186569214,
      "learning_rate": 0.0005882748528128673,
      "loss": 3.1556,
      "step": 20572
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.196478843688965,
      "learning_rate": 0.0005882737203569285,
      "loss": 3.0822,
      "step": 20573
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4169117212295532,
      "learning_rate": 0.0005882725878473941,
      "loss": 3.2708,
      "step": 20574
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7103276252746582,
      "learning_rate": 0.0005882714552842644,
      "loss": 3.0708,
      "step": 20575
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.8380606174468994,
      "learning_rate": 0.0005882703226675396,
      "loss": 3.3443,
      "step": 20576
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4824740886688232,
      "learning_rate": 0.00058826918999722,
      "loss": 3.3472,
      "step": 20577
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0663838386535645,
      "learning_rate": 0.0005882680572733056,
      "loss": 2.9425,
      "step": 20578
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.44841730594635,
      "learning_rate": 0.0005882669244957968,
      "loss": 3.2737,
      "step": 20579
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5903263092041016,
      "learning_rate": 0.0005882657916646938,
      "loss": 3.0352,
      "step": 20580
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.587998390197754,
      "learning_rate": 0.0005882646587799966,
      "loss": 3.0717,
      "step": 20581
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4982795715332031,
      "learning_rate": 0.0005882635258417056,
      "loss": 2.9638,
      "step": 20582
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.4992918968200684,
      "learning_rate": 0.000588262392849821,
      "loss": 3.0936,
      "step": 20583
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4951075315475464,
      "learning_rate": 0.0005882612598043429,
      "loss": 3.1329,
      "step": 20584
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.42200767993927,
      "learning_rate": 0.0005882601267052717,
      "loss": 3.2541,
      "step": 20585
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.2728705406188965,
      "learning_rate": 0.0005882589935526074,
      "loss": 3.1276,
      "step": 20586
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3684625625610352,
      "learning_rate": 0.0005882578603463504,
      "loss": 3.0905,
      "step": 20587
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5462924242019653,
      "learning_rate": 0.0005882567270865007,
      "loss": 3.2323,
      "step": 20588
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.613122582435608,
      "learning_rate": 0.0005882555937730588,
      "loss": 3.0924,
      "step": 20589
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.505560278892517,
      "learning_rate": 0.0005882544604060247,
      "loss": 3.0829,
      "step": 20590
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4347572326660156,
      "learning_rate": 0.0005882533269853987,
      "loss": 3.1827,
      "step": 20591
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3557642698287964,
      "learning_rate": 0.0005882521935111808,
      "loss": 2.863,
      "step": 20592
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8858131170272827,
      "learning_rate": 0.0005882510599833715,
      "loss": 2.8431,
      "step": 20593
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7349083423614502,
      "learning_rate": 0.0005882499264019708,
      "loss": 3.0869,
      "step": 20594
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.122774839401245,
      "learning_rate": 0.000588248792766979,
      "loss": 3.1085,
      "step": 20595
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0651743412017822,
      "learning_rate": 0.0005882476590783963,
      "loss": 3.2887,
      "step": 20596
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4475641250610352,
      "learning_rate": 0.000588246525336223,
      "loss": 3.0092,
      "step": 20597
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8045622110366821,
      "learning_rate": 0.0005882453915404592,
      "loss": 2.9144,
      "step": 20598
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6982736587524414,
      "learning_rate": 0.0005882442576911052,
      "loss": 3.0877,
      "step": 20599
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.519313931465149,
      "learning_rate": 0.0005882431237881612,
      "loss": 3.1021,
      "step": 20600
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.659701943397522,
      "learning_rate": 0.0005882419898316271,
      "loss": 3.0729,
      "step": 20601
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9171816110610962,
      "learning_rate": 0.0005882408558215036,
      "loss": 2.9879,
      "step": 20602
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5306264162063599,
      "learning_rate": 0.0005882397217577905,
      "loss": 3.2862,
      "step": 20603
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7099556922912598,
      "learning_rate": 0.0005882385876404884,
      "loss": 3.3742,
      "step": 20604
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7784676551818848,
      "learning_rate": 0.0005882374534695972,
      "loss": 3.1123,
      "step": 20605
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.217592716217041,
      "learning_rate": 0.0005882363192451172,
      "loss": 2.9261,
      "step": 20606
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.983532190322876,
      "learning_rate": 0.0005882351849670487,
      "loss": 3.0184,
      "step": 20607
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4033914804458618,
      "learning_rate": 0.0005882340506353918,
      "loss": 3.2918,
      "step": 20608
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.724151849746704,
      "learning_rate": 0.0005882329162501468,
      "loss": 3.2322,
      "step": 20609
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.2748608589172363,
      "learning_rate": 0.0005882317818113138,
      "loss": 3.3296,
      "step": 20610
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9631155729293823,
      "learning_rate": 0.000588230647318893,
      "loss": 3.2111,
      "step": 20611
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.342787504196167,
      "learning_rate": 0.0005882295127728848,
      "loss": 3.2258,
      "step": 20612
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.78954017162323,
      "learning_rate": 0.0005882283781732892,
      "loss": 3.1103,
      "step": 20613
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7682602405548096,
      "learning_rate": 0.0005882272435201066,
      "loss": 2.9352,
      "step": 20614
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2410470247268677,
      "learning_rate": 0.0005882261088133371,
      "loss": 3.4095,
      "step": 20615
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8075917959213257,
      "learning_rate": 0.0005882249740529809,
      "loss": 3.3276,
      "step": 20616
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7489571571350098,
      "learning_rate": 0.0005882238392390382,
      "loss": 3.1937,
      "step": 20617
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4901868104934692,
      "learning_rate": 0.0005882227043715093,
      "loss": 3.412,
      "step": 20618
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3649303913116455,
      "learning_rate": 0.0005882215694503943,
      "loss": 3.024,
      "step": 20619
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4320423603057861,
      "learning_rate": 0.0005882204344756935,
      "loss": 2.8515,
      "step": 20620
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4886518716812134,
      "learning_rate": 0.0005882192994474072,
      "loss": 3.1869,
      "step": 20621
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3773647546768188,
      "learning_rate": 0.0005882181643655352,
      "loss": 3.2858,
      "step": 20622
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6233834028244019,
      "learning_rate": 0.0005882170292300782,
      "loss": 2.9128,
      "step": 20623
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7907695770263672,
      "learning_rate": 0.0005882158940410363,
      "loss": 3.1309,
      "step": 20624
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3209404945373535,
      "learning_rate": 0.0005882147587984094,
      "loss": 3.3802,
      "step": 20625
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.651206612586975,
      "learning_rate": 0.0005882136235021981,
      "loss": 3.1498,
      "step": 20626
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7270785570144653,
      "learning_rate": 0.0005882124881524025,
      "loss": 3.2776,
      "step": 20627
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.547429084777832,
      "learning_rate": 0.0005882113527490225,
      "loss": 3.2982,
      "step": 20628
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0932271480560303,
      "learning_rate": 0.0005882102172920588,
      "loss": 2.8532,
      "step": 20629
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2681516408920288,
      "learning_rate": 0.0005882090817815114,
      "loss": 3.084,
      "step": 20630
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2724672555923462,
      "learning_rate": 0.0005882079462173804,
      "loss": 3.0219,
      "step": 20631
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.794499158859253,
      "learning_rate": 0.000588206810599666,
      "loss": 3.2998,
      "step": 20632
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6768600940704346,
      "learning_rate": 0.0005882056749283687,
      "loss": 3.1745,
      "step": 20633
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.236435651779175,
      "learning_rate": 0.0005882045392034884,
      "loss": 2.9479,
      "step": 20634
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.1388301849365234,
      "learning_rate": 0.0005882034034250254,
      "loss": 3.0344,
      "step": 20635
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.789422631263733,
      "learning_rate": 0.0005882022675929801,
      "loss": 2.9356,
      "step": 20636
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.3497366905212402,
      "learning_rate": 0.0005882011317073526,
      "loss": 2.9372,
      "step": 20637
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.3940646648406982,
      "learning_rate": 0.0005881999957681428,
      "loss": 2.8994,
      "step": 20638
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6600375175476074,
      "learning_rate": 0.0005881988597753513,
      "loss": 3.0453,
      "step": 20639
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.019724130630493,
      "learning_rate": 0.0005881977237289783,
      "loss": 3.187,
      "step": 20640
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8130412101745605,
      "learning_rate": 0.0005881965876290238,
      "loss": 2.9991,
      "step": 20641
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7710528373718262,
      "learning_rate": 0.0005881954514754882,
      "loss": 3.1094,
      "step": 20642
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4626398086547852,
      "learning_rate": 0.0005881943152683715,
      "loss": 3.2338,
      "step": 20643
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.1943552494049072,
      "learning_rate": 0.0005881931790076741,
      "loss": 3.2297,
      "step": 20644
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.625880479812622,
      "learning_rate": 0.0005881920426933962,
      "loss": 2.9687,
      "step": 20645
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.45999014377594,
      "learning_rate": 0.0005881909063255379,
      "loss": 2.8921,
      "step": 20646
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.6532301902770996,
      "learning_rate": 0.0005881897699040995,
      "loss": 2.9494,
      "step": 20647
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6543153524398804,
      "learning_rate": 0.0005881886334290811,
      "loss": 2.9708,
      "step": 20648
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6839138269424438,
      "learning_rate": 0.0005881874969004831,
      "loss": 3.1481,
      "step": 20649
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3473238945007324,
      "learning_rate": 0.0005881863603183057,
      "loss": 3.0633,
      "step": 20650
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8013815879821777,
      "learning_rate": 0.0005881852236825488,
      "loss": 3.1128,
      "step": 20651
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9539257287979126,
      "learning_rate": 0.0005881840869932128,
      "loss": 3.1259,
      "step": 20652
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3428977727890015,
      "learning_rate": 0.0005881829502502981,
      "loss": 2.9848,
      "step": 20653
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.851177453994751,
      "learning_rate": 0.0005881818134538048,
      "loss": 3.253,
      "step": 20654
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0165534019470215,
      "learning_rate": 0.0005881806766037329,
      "loss": 3.0846,
      "step": 20655
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.833941102027893,
      "learning_rate": 0.0005881795397000828,
      "loss": 3.2476,
      "step": 20656
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0101566314697266,
      "learning_rate": 0.0005881784027428547,
      "loss": 3.1224,
      "step": 20657
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7852975130081177,
      "learning_rate": 0.0005881772657320488,
      "loss": 2.8108,
      "step": 20658
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3427292108535767,
      "learning_rate": 0.0005881761286676653,
      "loss": 3.0188,
      "step": 20659
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3450207710266113,
      "learning_rate": 0.0005881749915497044,
      "loss": 2.8249,
      "step": 20660
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2514257431030273,
      "learning_rate": 0.0005881738543781664,
      "loss": 3.1141,
      "step": 20661
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.243459701538086,
      "learning_rate": 0.0005881727171530514,
      "loss": 3.085,
      "step": 20662
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4695756435394287,
      "learning_rate": 0.0005881715798743598,
      "loss": 3.0538,
      "step": 20663
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7382267713546753,
      "learning_rate": 0.0005881704425420914,
      "loss": 3.1664,
      "step": 20664
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5844309329986572,
      "learning_rate": 0.0005881693051562468,
      "loss": 3.403,
      "step": 20665
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4348247051239014,
      "learning_rate": 0.000588168167716826,
      "loss": 3.1255,
      "step": 20666
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4771196842193604,
      "learning_rate": 0.0005881670302238294,
      "loss": 3.1905,
      "step": 20667
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8302110433578491,
      "learning_rate": 0.0005881658926772572,
      "loss": 3.2082,
      "step": 20668
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7161438465118408,
      "learning_rate": 0.0005881647550771093,
      "loss": 3.0088,
      "step": 20669
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.01223087310791,
      "learning_rate": 0.0005881636174233863,
      "loss": 3.2295,
      "step": 20670
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.474241018295288,
      "learning_rate": 0.0005881624797160881,
      "loss": 2.9845,
      "step": 20671
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3525525331497192,
      "learning_rate": 0.0005881613419552153,
      "loss": 2.9657,
      "step": 20672
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2818586826324463,
      "learning_rate": 0.0005881602041407676,
      "loss": 3.256,
      "step": 20673
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.6852591037750244,
      "learning_rate": 0.0005881590662727456,
      "loss": 3.0087,
      "step": 20674
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.798237919807434,
      "learning_rate": 0.0005881579283511493,
      "loss": 3.1753,
      "step": 20675
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8136160373687744,
      "learning_rate": 0.0005881567903759791,
      "loss": 3.2482,
      "step": 20676
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.547667145729065,
      "learning_rate": 0.0005881556523472351,
      "loss": 2.9691,
      "step": 20677
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.4841175079345703,
      "learning_rate": 0.0005881545142649176,
      "loss": 2.9663,
      "step": 20678
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4458414316177368,
      "learning_rate": 0.0005881533761290266,
      "loss": 3.109,
      "step": 20679
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6656532287597656,
      "learning_rate": 0.0005881522379395625,
      "loss": 3.0353,
      "step": 20680
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.431252360343933,
      "learning_rate": 0.0005881510996965254,
      "loss": 3.2204,
      "step": 20681
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6220896244049072,
      "learning_rate": 0.0005881499613999157,
      "loss": 3.2265,
      "step": 20682
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7592270374298096,
      "learning_rate": 0.0005881488230497334,
      "loss": 3.0457,
      "step": 20683
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7466896772384644,
      "learning_rate": 0.0005881476846459788,
      "loss": 3.2062,
      "step": 20684
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9582782983779907,
      "learning_rate": 0.0005881465461886521,
      "loss": 3.1724,
      "step": 20685
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7282600402832031,
      "learning_rate": 0.0005881454076777535,
      "loss": 3.0289,
      "step": 20686
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.5835378170013428,
      "learning_rate": 0.0005881442691132834,
      "loss": 2.9476,
      "step": 20687
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9317560195922852,
      "learning_rate": 0.0005881431304952416,
      "loss": 3.3349,
      "step": 20688
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4421778917312622,
      "learning_rate": 0.0005881419918236286,
      "loss": 2.9879,
      "step": 20689
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.337349534034729,
      "learning_rate": 0.0005881408530984447,
      "loss": 3.2488,
      "step": 20690
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.3784127235412598,
      "learning_rate": 0.0005881397143196899,
      "loss": 3.2181,
      "step": 20691
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7552202939987183,
      "learning_rate": 0.0005881385754873645,
      "loss": 3.2927,
      "step": 20692
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8975359201431274,
      "learning_rate": 0.0005881374366014686,
      "loss": 3.0473,
      "step": 20693
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.5679523944854736,
      "learning_rate": 0.0005881362976620026,
      "loss": 2.8014,
      "step": 20694
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8893074989318848,
      "learning_rate": 0.0005881351586689667,
      "loss": 3.0007,
      "step": 20695
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7441248893737793,
      "learning_rate": 0.000588134019622361,
      "loss": 3.0693,
      "step": 20696
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.873169183731079,
      "learning_rate": 0.0005881328805221855,
      "loss": 3.1482,
      "step": 20697
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.6435177326202393,
      "learning_rate": 0.000588131741368441,
      "loss": 2.9537,
      "step": 20698
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.107961893081665,
      "learning_rate": 0.0005881306021611272,
      "loss": 3.0151,
      "step": 20699
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5466258525848389,
      "learning_rate": 0.0005881294629002446,
      "loss": 3.1105,
      "step": 20700
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7730966806411743,
      "learning_rate": 0.0005881283235857931,
      "loss": 2.973,
      "step": 20701
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.1450674533843994,
      "learning_rate": 0.0005881271842177732,
      "loss": 2.937,
      "step": 20702
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6046031713485718,
      "learning_rate": 0.0005881260447961851,
      "loss": 3.1388,
      "step": 20703
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6295466423034668,
      "learning_rate": 0.0005881249053210288,
      "loss": 3.2585,
      "step": 20704
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0272769927978516,
      "learning_rate": 0.0005881237657923048,
      "loss": 2.9993,
      "step": 20705
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4007179737091064,
      "learning_rate": 0.0005881226262100129,
      "loss": 3.3137,
      "step": 20706
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6064808368682861,
      "learning_rate": 0.0005881214865741538,
      "loss": 3.2788,
      "step": 20707
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5866880416870117,
      "learning_rate": 0.0005881203468847273,
      "loss": 3.3128,
      "step": 20708
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.57013738155365,
      "learning_rate": 0.0005881192071417339,
      "loss": 2.9543,
      "step": 20709
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.61570143699646,
      "learning_rate": 0.0005881180673451737,
      "loss": 3.2561,
      "step": 20710
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.3257155418395996,
      "learning_rate": 0.000588116927495047,
      "loss": 2.9066,
      "step": 20711
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.81028151512146,
      "learning_rate": 0.0005881157875913537,
      "loss": 3.1665,
      "step": 20712
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3551216125488281,
      "learning_rate": 0.0005881146476340944,
      "loss": 3.2802,
      "step": 20713
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6231993436813354,
      "learning_rate": 0.0005881135076232692,
      "loss": 2.9678,
      "step": 20714
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7712613344192505,
      "learning_rate": 0.0005881123675588781,
      "loss": 3.0037,
      "step": 20715
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.849113941192627,
      "learning_rate": 0.0005881112274409216,
      "loss": 3.3376,
      "step": 20716
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1552284955978394,
      "learning_rate": 0.0005881100872693997,
      "loss": 3.1799,
      "step": 20717
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2869584560394287,
      "learning_rate": 0.0005881089470443127,
      "loss": 2.9083,
      "step": 20718
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7949461936950684,
      "learning_rate": 0.0005881078067656608,
      "loss": 2.9937,
      "step": 20719
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6131799221038818,
      "learning_rate": 0.0005881066664334443,
      "loss": 3.2055,
      "step": 20720
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3411178588867188,
      "learning_rate": 0.0005881055260476632,
      "loss": 3.014,
      "step": 20721
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6826666593551636,
      "learning_rate": 0.000588104385608318,
      "loss": 3.3351,
      "step": 20722
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9133573770523071,
      "learning_rate": 0.0005881032451154086,
      "loss": 3.0353,
      "step": 20723
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3092081546783447,
      "learning_rate": 0.0005881021045689355,
      "loss": 3.162,
      "step": 20724
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.8987205028533936,
      "learning_rate": 0.0005881009639688988,
      "loss": 3.1083,
      "step": 20725
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.791797161102295,
      "learning_rate": 0.0005880998233152986,
      "loss": 3.1904,
      "step": 20726
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.590059518814087,
      "learning_rate": 0.0005880986826081352,
      "loss": 3.1922,
      "step": 20727
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7611628770828247,
      "learning_rate": 0.0005880975418474088,
      "loss": 3.0385,
      "step": 20728
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.47489595413208,
      "learning_rate": 0.0005880964010331197,
      "loss": 2.9849,
      "step": 20729
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.055342674255371,
      "learning_rate": 0.000588095260165268,
      "loss": 3.1069,
      "step": 20730
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5045222043991089,
      "learning_rate": 0.000588094119243854,
      "loss": 3.2738,
      "step": 20731
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.6425492763519287,
      "learning_rate": 0.0005880929782688778,
      "loss": 3.0327,
      "step": 20732
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.358520746231079,
      "learning_rate": 0.0005880918372403398,
      "loss": 3.0672,
      "step": 20733
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7260981798171997,
      "learning_rate": 0.0005880906961582399,
      "loss": 3.2019,
      "step": 20734
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9803800582885742,
      "learning_rate": 0.0005880895550225787,
      "loss": 3.1371,
      "step": 20735
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6912175416946411,
      "learning_rate": 0.000588088413833356,
      "loss": 3.1636,
      "step": 20736
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9505047798156738,
      "learning_rate": 0.0005880872725905723,
      "loss": 3.106,
      "step": 20737
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7549402713775635,
      "learning_rate": 0.0005880861312942279,
      "loss": 3.2862,
      "step": 20738
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.517056703567505,
      "learning_rate": 0.0005880849899443227,
      "loss": 3.4877,
      "step": 20739
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.5816102027893066,
      "learning_rate": 0.000588083848540857,
      "loss": 2.9674,
      "step": 20740
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5298963785171509,
      "learning_rate": 0.0005880827070838312,
      "loss": 2.9042,
      "step": 20741
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7140882015228271,
      "learning_rate": 0.0005880815655732454,
      "loss": 3.2239,
      "step": 20742
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8845980167388916,
      "learning_rate": 0.0005880804240090998,
      "loss": 3.2233,
      "step": 20743
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3071486949920654,
      "learning_rate": 0.0005880792823913945,
      "loss": 3.14,
      "step": 20744
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.364001989364624,
      "learning_rate": 0.0005880781407201299,
      "loss": 2.8554,
      "step": 20745
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.533983826637268,
      "learning_rate": 0.0005880769989953062,
      "loss": 3.1344,
      "step": 20746
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.411041498184204,
      "learning_rate": 0.0005880758572169234,
      "loss": 3.298,
      "step": 20747
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4389286041259766,
      "learning_rate": 0.0005880747153849819,
      "loss": 3.1914,
      "step": 20748
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5772935152053833,
      "learning_rate": 0.0005880735734994819,
      "loss": 3.1076,
      "step": 20749
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7539640665054321,
      "learning_rate": 0.0005880724315604235,
      "loss": 3.0646,
      "step": 20750
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5592906475067139,
      "learning_rate": 0.0005880712895678071,
      "loss": 2.8817,
      "step": 20751
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2464032173156738,
      "learning_rate": 0.0005880701475216328,
      "loss": 2.9358,
      "step": 20752
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.3089048862457275,
      "learning_rate": 0.0005880690054219009,
      "loss": 2.8945,
      "step": 20753
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4869533777236938,
      "learning_rate": 0.0005880678632686114,
      "loss": 3.3463,
      "step": 20754
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6240564584732056,
      "learning_rate": 0.0005880667210617647,
      "loss": 3.128,
      "step": 20755
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.522226095199585,
      "learning_rate": 0.0005880655788013609,
      "loss": 3.1285,
      "step": 20756
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6768429279327393,
      "learning_rate": 0.0005880644364874004,
      "loss": 3.0464,
      "step": 20757
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8654967546463013,
      "learning_rate": 0.0005880632941198832,
      "loss": 3.2323,
      "step": 20758
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0820400714874268,
      "learning_rate": 0.0005880621516988096,
      "loss": 2.9895,
      "step": 20759
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.768120527267456,
      "learning_rate": 0.0005880610092241798,
      "loss": 2.9733,
      "step": 20760
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9012970924377441,
      "learning_rate": 0.000588059866695994,
      "loss": 3.3183,
      "step": 20761
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.404642105102539,
      "learning_rate": 0.0005880587241142525,
      "loss": 3.1542,
      "step": 20762
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.453600525856018,
      "learning_rate": 0.0005880575814789554,
      "loss": 3.3248,
      "step": 20763
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3481550216674805,
      "learning_rate": 0.000588056438790103,
      "loss": 3.108,
      "step": 20764
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.827155590057373,
      "learning_rate": 0.0005880552960476955,
      "loss": 3.0382,
      "step": 20765
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5366567373275757,
      "learning_rate": 0.0005880541532517329,
      "loss": 2.8707,
      "step": 20766
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.285571336746216,
      "learning_rate": 0.0005880530104022158,
      "loss": 3.1289,
      "step": 20767
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.7156667709350586,
      "learning_rate": 0.0005880518674991442,
      "loss": 3.1735,
      "step": 20768
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8681660890579224,
      "learning_rate": 0.0005880507245425181,
      "loss": 3.2718,
      "step": 20769
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2967315912246704,
      "learning_rate": 0.0005880495815323381,
      "loss": 2.9466,
      "step": 20770
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7571438550949097,
      "learning_rate": 0.0005880484384686042,
      "loss": 3.1065,
      "step": 20771
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.889099359512329,
      "learning_rate": 0.0005880472953513167,
      "loss": 3.1204,
      "step": 20772
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9185450077056885,
      "learning_rate": 0.0005880461521804757,
      "loss": 2.9704,
      "step": 20773
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9288814067840576,
      "learning_rate": 0.0005880450089560817,
      "loss": 3.0873,
      "step": 20774
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.896416425704956,
      "learning_rate": 0.0005880438656781344,
      "loss": 3.1483,
      "step": 20775
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.066424608230591,
      "learning_rate": 0.0005880427223466345,
      "loss": 3.3753,
      "step": 20776
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4693107604980469,
      "learning_rate": 0.000588041578961582,
      "loss": 3.0824,
      "step": 20777
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.264698028564453,
      "learning_rate": 0.0005880404355229771,
      "loss": 2.9916,
      "step": 20778
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.484276533126831,
      "learning_rate": 0.0005880392920308199,
      "loss": 3.1543,
      "step": 20779
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0921432971954346,
      "learning_rate": 0.000588038148485111,
      "loss": 3.0686,
      "step": 20780
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9791616201400757,
      "learning_rate": 0.0005880370048858503,
      "loss": 3.0871,
      "step": 20781
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.915130376815796,
      "learning_rate": 0.000588035861233038,
      "loss": 3.1052,
      "step": 20782
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.352324366569519,
      "learning_rate": 0.0005880347175266746,
      "loss": 3.1322,
      "step": 20783
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3858351707458496,
      "learning_rate": 0.0005880335737667599,
      "loss": 2.9832,
      "step": 20784
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7091273069381714,
      "learning_rate": 0.0005880324299532943,
      "loss": 3.1505,
      "step": 20785
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4776610136032104,
      "learning_rate": 0.0005880312860862782,
      "loss": 2.903,
      "step": 20786
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4759678840637207,
      "learning_rate": 0.0005880301421657115,
      "loss": 3.2964,
      "step": 20787
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7321439981460571,
      "learning_rate": 0.0005880289981915946,
      "loss": 3.0963,
      "step": 20788
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4042760133743286,
      "learning_rate": 0.0005880278541639277,
      "loss": 3.1234,
      "step": 20789
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5167869329452515,
      "learning_rate": 0.000588026710082711,
      "loss": 3.025,
      "step": 20790
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.6395933628082275,
      "learning_rate": 0.0005880255659479447,
      "loss": 3.087,
      "step": 20791
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5151935815811157,
      "learning_rate": 0.0005880244217596289,
      "loss": 3.2213,
      "step": 20792
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3992253541946411,
      "learning_rate": 0.0005880232775177641,
      "loss": 2.8482,
      "step": 20793
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2335689067840576,
      "learning_rate": 0.0005880221332223503,
      "loss": 3.0628,
      "step": 20794
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3687210083007812,
      "learning_rate": 0.0005880209888733877,
      "loss": 3.0245,
      "step": 20795
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8966609239578247,
      "learning_rate": 0.0005880198444708765,
      "loss": 3.2144,
      "step": 20796
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.312533974647522,
      "learning_rate": 0.000588018700014817,
      "loss": 3.0464,
      "step": 20797
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.16914701461792,
      "learning_rate": 0.0005880175555052095,
      "loss": 3.2325,
      "step": 20798
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8148897886276245,
      "learning_rate": 0.000588016410942054,
      "loss": 2.9317,
      "step": 20799
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4936662912368774,
      "learning_rate": 0.0005880152663253508,
      "loss": 3.307,
      "step": 20800
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.435584306716919,
      "learning_rate": 0.0005880141216551001,
      "loss": 3.0185,
      "step": 20801
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.330589771270752,
      "learning_rate": 0.0005880129769313022,
      "loss": 3.0658,
      "step": 20802
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7916457653045654,
      "learning_rate": 0.0005880118321539572,
      "loss": 3.0509,
      "step": 20803
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4422122240066528,
      "learning_rate": 0.0005880106873230655,
      "loss": 3.0664,
      "step": 20804
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.0781757831573486,
      "learning_rate": 0.0005880095424386271,
      "loss": 3.066,
      "step": 20805
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.568908929824829,
      "learning_rate": 0.0005880083975006422,
      "loss": 3.0572,
      "step": 20806
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5662312507629395,
      "learning_rate": 0.0005880072525091112,
      "loss": 2.8439,
      "step": 20807
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.4169156551361084,
      "learning_rate": 0.0005880061074640342,
      "loss": 3.0047,
      "step": 20808
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3787915706634521,
      "learning_rate": 0.0005880049623654114,
      "loss": 3.31,
      "step": 20809
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4589282274246216,
      "learning_rate": 0.0005880038172132431,
      "loss": 3.2432,
      "step": 20810
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.747910737991333,
      "learning_rate": 0.0005880026720075294,
      "loss": 2.8817,
      "step": 20811
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0570695400238037,
      "learning_rate": 0.0005880015267482705,
      "loss": 3.1439,
      "step": 20812
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6072890758514404,
      "learning_rate": 0.0005880003814354669,
      "loss": 3.2205,
      "step": 20813
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.275371551513672,
      "learning_rate": 0.0005879992360691184,
      "loss": 3.3105,
      "step": 20814
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9831135272979736,
      "learning_rate": 0.0005879980906492254,
      "loss": 2.8915,
      "step": 20815
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5340907573699951,
      "learning_rate": 0.0005879969451757882,
      "loss": 3.3421,
      "step": 20816
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7133055925369263,
      "learning_rate": 0.0005879957996488069,
      "loss": 3.0014,
      "step": 20817
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.1459438800811768,
      "learning_rate": 0.0005879946540682817,
      "loss": 3.0155,
      "step": 20818
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.378808856010437,
      "learning_rate": 0.000587993508434213,
      "loss": 3.1166,
      "step": 20819
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4561470746994019,
      "learning_rate": 0.0005879923627466007,
      "loss": 3.0659,
      "step": 20820
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.382736325263977,
      "learning_rate": 0.0005879912170054454,
      "loss": 3.2442,
      "step": 20821
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4025198221206665,
      "learning_rate": 0.0005879900712107469,
      "loss": 3.0621,
      "step": 20822
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.20282781124115,
      "learning_rate": 0.0005879889253625057,
      "loss": 3.1,
      "step": 20823
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4316306114196777,
      "learning_rate": 0.0005879877794607219,
      "loss": 3.1849,
      "step": 20824
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.4748222827911377,
      "learning_rate": 0.0005879866335053957,
      "loss": 3.2377,
      "step": 20825
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7662675380706787,
      "learning_rate": 0.0005879854874965274,
      "loss": 3.1257,
      "step": 20826
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6257506608963013,
      "learning_rate": 0.000587984341434117,
      "loss": 3.1713,
      "step": 20827
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3312022686004639,
      "learning_rate": 0.0005879831953181651,
      "loss": 3.2266,
      "step": 20828
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.450435996055603,
      "learning_rate": 0.0005879820491486716,
      "loss": 3.2994,
      "step": 20829
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.021366834640503,
      "learning_rate": 0.0005879809029256369,
      "loss": 3.2691,
      "step": 20830
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6567927598953247,
      "learning_rate": 0.000587979756649061,
      "loss": 3.0951,
      "step": 20831
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7927398681640625,
      "learning_rate": 0.0005879786103189442,
      "loss": 3.0003,
      "step": 20832
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.491190195083618,
      "learning_rate": 0.0005879774639352867,
      "loss": 3.3533,
      "step": 20833
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.089035987854004,
      "learning_rate": 0.000587976317498089,
      "loss": 3.1499,
      "step": 20834
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.378955602645874,
      "learning_rate": 0.0005879751710073509,
      "loss": 3.1248,
      "step": 20835
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6940852403640747,
      "learning_rate": 0.0005879740244630726,
      "loss": 3.2309,
      "step": 20836
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.495995044708252,
      "learning_rate": 0.0005879728778652547,
      "loss": 3.078,
      "step": 20837
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.622900366783142,
      "learning_rate": 0.0005879717312138972,
      "loss": 2.997,
      "step": 20838
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7352700233459473,
      "learning_rate": 0.0005879705845090004,
      "loss": 3.0235,
      "step": 20839
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.075939893722534,
      "learning_rate": 0.0005879694377505643,
      "loss": 3.34,
      "step": 20840
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6083428859710693,
      "learning_rate": 0.0005879682909385893,
      "loss": 3.311,
      "step": 20841
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5077323913574219,
      "learning_rate": 0.0005879671440730755,
      "loss": 3.1883,
      "step": 20842
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.352442741394043,
      "learning_rate": 0.0005879659971540232,
      "loss": 2.9892,
      "step": 20843
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0597333908081055,
      "learning_rate": 0.0005879648501814326,
      "loss": 3.1093,
      "step": 20844
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9183146953582764,
      "learning_rate": 0.000587963703155304,
      "loss": 3.0285,
      "step": 20845
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6956353187561035,
      "learning_rate": 0.0005879625560756374,
      "loss": 3.1019,
      "step": 20846
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5506867170333862,
      "learning_rate": 0.0005879614089424331,
      "loss": 3.1425,
      "step": 20847
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.342140555381775,
      "learning_rate": 0.0005879602617556913,
      "loss": 3.0796,
      "step": 20848
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5040825605392456,
      "learning_rate": 0.0005879591145154123,
      "loss": 3.3522,
      "step": 20849
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.544050693511963,
      "learning_rate": 0.0005879579672215964,
      "loss": 3.1366,
      "step": 20850
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.582080364227295,
      "learning_rate": 0.0005879568198742435,
      "loss": 3.1556,
      "step": 20851
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5405681133270264,
      "learning_rate": 0.0005879556724733542,
      "loss": 3.2172,
      "step": 20852
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.32883358001709,
      "learning_rate": 0.0005879545250189282,
      "loss": 3.0765,
      "step": 20853
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.6393840312957764,
      "learning_rate": 0.0005879533775109663,
      "loss": 3.2075,
      "step": 20854
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0721068382263184,
      "learning_rate": 0.0005879522299494682,
      "loss": 3.2808,
      "step": 20855
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5240784883499146,
      "learning_rate": 0.0005879510823344345,
      "loss": 3.0515,
      "step": 20856
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.411400079727173,
      "learning_rate": 0.0005879499346658653,
      "loss": 3.1552,
      "step": 20857
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.3564422130584717,
      "learning_rate": 0.0005879487869437605,
      "loss": 3.3033,
      "step": 20858
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7277940511703491,
      "learning_rate": 0.0005879476391681208,
      "loss": 3.2786,
      "step": 20859
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.310588002204895,
      "learning_rate": 0.0005879464913389462,
      "loss": 3.1652,
      "step": 20860
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9934167861938477,
      "learning_rate": 0.0005879453434562368,
      "loss": 2.9222,
      "step": 20861
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9339836835861206,
      "learning_rate": 0.0005879441955199931,
      "loss": 3.032,
      "step": 20862
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.374956488609314,
      "learning_rate": 0.0005879430475302151,
      "loss": 3.0624,
      "step": 20863
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7730281352996826,
      "learning_rate": 0.000587941899486903,
      "loss": 2.9012,
      "step": 20864
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5480914115905762,
      "learning_rate": 0.0005879407513900571,
      "loss": 3.0288,
      "step": 20865
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3522841930389404,
      "learning_rate": 0.0005879396032396775,
      "loss": 3.1426,
      "step": 20866
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7436048984527588,
      "learning_rate": 0.0005879384550357645,
      "loss": 2.9875,
      "step": 20867
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3117176294326782,
      "learning_rate": 0.0005879373067783183,
      "loss": 3.2122,
      "step": 20868
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3775185346603394,
      "learning_rate": 0.0005879361584673391,
      "loss": 3.1109,
      "step": 20869
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4122014045715332,
      "learning_rate": 0.0005879350101028273,
      "loss": 3.2246,
      "step": 20870
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.3940277099609375,
      "learning_rate": 0.0005879338616847829,
      "loss": 3.1232,
      "step": 20871
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3337877988815308,
      "learning_rate": 0.000587932713213206,
      "loss": 3.1993,
      "step": 20872
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4736615419387817,
      "learning_rate": 0.0005879315646880972,
      "loss": 3.1623,
      "step": 20873
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2097671031951904,
      "learning_rate": 0.0005879304161094562,
      "loss": 3.0213,
      "step": 20874
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3649519681930542,
      "learning_rate": 0.0005879292674772838,
      "loss": 3.1855,
      "step": 20875
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2498183250427246,
      "learning_rate": 0.0005879281187915798,
      "loss": 3.2262,
      "step": 20876
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3875219821929932,
      "learning_rate": 0.0005879269700523444,
      "loss": 2.8622,
      "step": 20877
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3183804750442505,
      "learning_rate": 0.000587925821259578,
      "loss": 2.9042,
      "step": 20878
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7804056406021118,
      "learning_rate": 0.0005879246724132808,
      "loss": 3.2751,
      "step": 20879
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.680285096168518,
      "learning_rate": 0.000587923523513453,
      "loss": 3.1814,
      "step": 20880
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3350911140441895,
      "learning_rate": 0.0005879223745600948,
      "loss": 3.0542,
      "step": 20881
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4734612703323364,
      "learning_rate": 0.0005879212255532064,
      "loss": 3.2286,
      "step": 20882
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4668477773666382,
      "learning_rate": 0.0005879200764927879,
      "loss": 3.1423,
      "step": 20883
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7280464172363281,
      "learning_rate": 0.0005879189273788396,
      "loss": 2.853,
      "step": 20884
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4979636669158936,
      "learning_rate": 0.0005879177782113619,
      "loss": 3.0419,
      "step": 20885
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4701701402664185,
      "learning_rate": 0.0005879166289903548,
      "loss": 3.0525,
      "step": 20886
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.259985089302063,
      "learning_rate": 0.0005879154797158184,
      "loss": 3.0963,
      "step": 20887
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0576207637786865,
      "learning_rate": 0.0005879143303877532,
      "loss": 3.216,
      "step": 20888
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.510278582572937,
      "learning_rate": 0.0005879131810061594,
      "loss": 3.0489,
      "step": 20889
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0078821182250977,
      "learning_rate": 0.000587912031571037,
      "loss": 2.9296,
      "step": 20890
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9098267555236816,
      "learning_rate": 0.0005879108820823863,
      "loss": 3.1686,
      "step": 20891
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3747056722640991,
      "learning_rate": 0.0005879097325402076,
      "loss": 2.9083,
      "step": 20892
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9981467723846436,
      "learning_rate": 0.000587908582944501,
      "loss": 3.2401,
      "step": 20893
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5275764465332031,
      "learning_rate": 0.0005879074332952668,
      "loss": 3.036,
      "step": 20894
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7390865087509155,
      "learning_rate": 0.0005879062835925051,
      "loss": 2.8237,
      "step": 20895
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6140716075897217,
      "learning_rate": 0.0005879051338362162,
      "loss": 3.1978,
      "step": 20896
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4063780307769775,
      "learning_rate": 0.0005879039840264003,
      "loss": 3.2952,
      "step": 20897
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.329954147338867,
      "learning_rate": 0.0005879028341630577,
      "loss": 2.8094,
      "step": 20898
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6763330698013306,
      "learning_rate": 0.0005879016842461883,
      "loss": 3.2273,
      "step": 20899
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2880688905715942,
      "learning_rate": 0.0005879005342757928,
      "loss": 2.8438,
      "step": 20900
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3332561254501343,
      "learning_rate": 0.0005878993842518711,
      "loss": 3.1265,
      "step": 20901
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4441972970962524,
      "learning_rate": 0.0005878982341744233,
      "loss": 3.2521,
      "step": 20902
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.439974069595337,
      "learning_rate": 0.00058789708404345,
      "loss": 3.0324,
      "step": 20903
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7291926145553589,
      "learning_rate": 0.0005878959338589511,
      "loss": 3.2322,
      "step": 20904
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6131588220596313,
      "learning_rate": 0.0005878947836209269,
      "loss": 3.0707,
      "step": 20905
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8547155857086182,
      "learning_rate": 0.0005878936333293777,
      "loss": 3.0914,
      "step": 20906
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5183099508285522,
      "learning_rate": 0.0005878924829843035,
      "loss": 3.1116,
      "step": 20907
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5194058418273926,
      "learning_rate": 0.0005878913325857049,
      "loss": 3.0625,
      "step": 20908
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6929908990859985,
      "learning_rate": 0.0005878901821335815,
      "loss": 3.0359,
      "step": 20909
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5485239028930664,
      "learning_rate": 0.0005878890316279342,
      "loss": 2.9228,
      "step": 20910
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2578855752944946,
      "learning_rate": 0.0005878878810687627,
      "loss": 3.0149,
      "step": 20911
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3514262437820435,
      "learning_rate": 0.0005878867304560676,
      "loss": 3.2172,
      "step": 20912
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8040854930877686,
      "learning_rate": 0.0005878855797898487,
      "loss": 3.2799,
      "step": 20913
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4595983028411865,
      "learning_rate": 0.0005878844290701066,
      "loss": 3.0968,
      "step": 20914
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7767897844314575,
      "learning_rate": 0.0005878832782968414,
      "loss": 3.4368,
      "step": 20915
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3168513774871826,
      "learning_rate": 0.0005878821274700531,
      "loss": 3.2576,
      "step": 20916
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6236053705215454,
      "learning_rate": 0.0005878809765897422,
      "loss": 3.2157,
      "step": 20917
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.403983473777771,
      "learning_rate": 0.0005878798256559087,
      "loss": 2.8799,
      "step": 20918
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.526594877243042,
      "learning_rate": 0.0005878786746685529,
      "loss": 3.015,
      "step": 20919
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4233084917068481,
      "learning_rate": 0.0005878775236276752,
      "loss": 3.1814,
      "step": 20920
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3006900548934937,
      "learning_rate": 0.0005878763725332754,
      "loss": 3.0261,
      "step": 20921
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.281651258468628,
      "learning_rate": 0.000587875221385354,
      "loss": 3.1841,
      "step": 20922
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2115874290466309,
      "learning_rate": 0.0005878740701839113,
      "loss": 3.056,
      "step": 20923
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7796708345413208,
      "learning_rate": 0.0005878729189289473,
      "loss": 3.2032,
      "step": 20924
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2878985404968262,
      "learning_rate": 0.0005878717676204622,
      "loss": 3.171,
      "step": 20925
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5029339790344238,
      "learning_rate": 0.0005878706162584564,
      "loss": 3.3226,
      "step": 20926
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3370399475097656,
      "learning_rate": 0.0005878694648429301,
      "loss": 2.8984,
      "step": 20927
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8020944595336914,
      "learning_rate": 0.0005878683133738832,
      "loss": 3.0999,
      "step": 20928
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5111923217773438,
      "learning_rate": 0.0005878671618513164,
      "loss": 3.1118,
      "step": 20929
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5629183053970337,
      "learning_rate": 0.0005878660102752295,
      "loss": 2.9987,
      "step": 20930
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.2986369132995605,
      "learning_rate": 0.0005878648586456228,
      "loss": 3.0967,
      "step": 20931
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.819372296333313,
      "learning_rate": 0.0005878637069624968,
      "loss": 3.1184,
      "step": 20932
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4406280517578125,
      "learning_rate": 0.0005878625552258514,
      "loss": 3.1142,
      "step": 20933
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8837984800338745,
      "learning_rate": 0.0005878614034356868,
      "loss": 3.1248,
      "step": 20934
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.623611569404602,
      "learning_rate": 0.0005878602515920036,
      "loss": 3.0948,
      "step": 20935
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4481191635131836,
      "learning_rate": 0.0005878590996948016,
      "loss": 2.9124,
      "step": 20936
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8279097080230713,
      "learning_rate": 0.000587857947744081,
      "loss": 3.4088,
      "step": 20937
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.3467636108398438,
      "learning_rate": 0.0005878567957398424,
      "loss": 2.9369,
      "step": 20938
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7636934518814087,
      "learning_rate": 0.0005878556436820858,
      "loss": 2.9455,
      "step": 20939
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.952354073524475,
      "learning_rate": 0.0005878544915708113,
      "loss": 3.3432,
      "step": 20940
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.7958881855010986,
      "learning_rate": 0.0005878533394060192,
      "loss": 2.9025,
      "step": 20941
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.4526169300079346,
      "learning_rate": 0.0005878521871877098,
      "loss": 2.915,
      "step": 20942
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.383708119392395,
      "learning_rate": 0.0005878510349158831,
      "loss": 3.1121,
      "step": 20943
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9862253665924072,
      "learning_rate": 0.0005878498825905397,
      "loss": 3.1376,
      "step": 20944
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.4220502376556396,
      "learning_rate": 0.0005878487302116794,
      "loss": 3.1693,
      "step": 20945
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7039926052093506,
      "learning_rate": 0.0005878475777793027,
      "loss": 3.013,
      "step": 20946
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5833981037139893,
      "learning_rate": 0.0005878464252934096,
      "loss": 3.2392,
      "step": 20947
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8508002758026123,
      "learning_rate": 0.0005878452727540004,
      "loss": 3.2113,
      "step": 20948
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4625329971313477,
      "learning_rate": 0.0005878441201610754,
      "loss": 3.0003,
      "step": 20949
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3564379215240479,
      "learning_rate": 0.0005878429675146348,
      "loss": 3.3936,
      "step": 20950
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2510993480682373,
      "learning_rate": 0.0005878418148146786,
      "loss": 3.1914,
      "step": 20951
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6118147373199463,
      "learning_rate": 0.0005878406620612072,
      "loss": 3.1465,
      "step": 20952
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5090718269348145,
      "learning_rate": 0.0005878395092542209,
      "loss": 3.0332,
      "step": 20953
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2764363288879395,
      "learning_rate": 0.0005878383563937198,
      "loss": 3.2001,
      "step": 20954
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8387755155563354,
      "learning_rate": 0.000587837203479704,
      "loss": 3.1166,
      "step": 20955
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4669526815414429,
      "learning_rate": 0.000587836050512174,
      "loss": 2.7736,
      "step": 20956
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.4828033447265625,
      "learning_rate": 0.0005878348974911297,
      "loss": 3.162,
      "step": 20957
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7015106678009033,
      "learning_rate": 0.0005878337444165715,
      "loss": 3.181,
      "step": 20958
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2175383567810059,
      "learning_rate": 0.0005878325912884995,
      "loss": 3.1662,
      "step": 20959
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3546605110168457,
      "learning_rate": 0.0005878314381069141,
      "loss": 3.3014,
      "step": 20960
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.148730754852295,
      "learning_rate": 0.0005878302848718154,
      "loss": 2.919,
      "step": 20961
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.579257845878601,
      "learning_rate": 0.0005878291315832035,
      "loss": 3.0982,
      "step": 20962
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9145634174346924,
      "learning_rate": 0.0005878279782410788,
      "loss": 3.145,
      "step": 20963
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7512781620025635,
      "learning_rate": 0.0005878268248454416,
      "loss": 3.0227,
      "step": 20964
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0858826637268066,
      "learning_rate": 0.0005878256713962918,
      "loss": 2.9474,
      "step": 20965
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0205459594726562,
      "learning_rate": 0.0005878245178936297,
      "loss": 3.0818,
      "step": 20966
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5746110677719116,
      "learning_rate": 0.0005878233643374556,
      "loss": 3.343,
      "step": 20967
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.1485414505004883,
      "learning_rate": 0.0005878222107277699,
      "loss": 2.9909,
      "step": 20968
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3586300611495972,
      "learning_rate": 0.0005878210570645724,
      "loss": 3.2518,
      "step": 20969
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.561074137687683,
      "learning_rate": 0.0005878199033478637,
      "loss": 3.0949,
      "step": 20970
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5959043502807617,
      "learning_rate": 0.0005878187495776438,
      "loss": 3.2201,
      "step": 20971
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7571821212768555,
      "learning_rate": 0.0005878175957539129,
      "loss": 3.2017,
      "step": 20972
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.917212724685669,
      "learning_rate": 0.0005878164418766713,
      "loss": 3.1158,
      "step": 20973
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9970252513885498,
      "learning_rate": 0.0005878152879459193,
      "loss": 3.0959,
      "step": 20974
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6140661239624023,
      "learning_rate": 0.0005878141339616569,
      "loss": 2.9592,
      "step": 20975
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.152360439300537,
      "learning_rate": 0.0005878129799238844,
      "loss": 3.2512,
      "step": 20976
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.1847448348999023,
      "learning_rate": 0.0005878118258326021,
      "loss": 2.9475,
      "step": 20977
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.517306923866272,
      "learning_rate": 0.0005878106716878101,
      "loss": 3.2157,
      "step": 20978
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.3595330715179443,
      "learning_rate": 0.0005878095174895086,
      "loss": 2.9917,
      "step": 20979
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.6187734603881836,
      "learning_rate": 0.000587808363237698,
      "loss": 2.9605,
      "step": 20980
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7546284198760986,
      "learning_rate": 0.0005878072089323783,
      "loss": 2.9767,
      "step": 20981
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.961601734161377,
      "learning_rate": 0.00058780605457355,
      "loss": 3.2237,
      "step": 20982
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9137457609176636,
      "learning_rate": 0.0005878049001612129,
      "loss": 3.1178,
      "step": 20983
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5733810663223267,
      "learning_rate": 0.0005878037456953676,
      "loss": 3.2864,
      "step": 20984
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7440521717071533,
      "learning_rate": 0.000587802591176014,
      "loss": 2.9677,
      "step": 20985
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7840454578399658,
      "learning_rate": 0.0005878014366031525,
      "loss": 3.1808,
      "step": 20986
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3195838928222656,
      "learning_rate": 0.0005878002819767834,
      "loss": 3.5016,
      "step": 20987
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5985958576202393,
      "learning_rate": 0.0005877991272969066,
      "loss": 3.2891,
      "step": 20988
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6880366802215576,
      "learning_rate": 0.0005877979725635227,
      "loss": 3.1537,
      "step": 20989
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5636835098266602,
      "learning_rate": 0.0005877968177766316,
      "loss": 3.0886,
      "step": 20990
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6651709079742432,
      "learning_rate": 0.0005877956629362337,
      "loss": 3.0904,
      "step": 20991
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6870661973953247,
      "learning_rate": 0.000587794508042329,
      "loss": 2.9659,
      "step": 20992
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.2592906951904297,
      "learning_rate": 0.0005877933530949182,
      "loss": 3.2997,
      "step": 20993
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0146782398223877,
      "learning_rate": 0.0005877921980940008,
      "loss": 2.9593,
      "step": 20994
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.358413815498352,
      "learning_rate": 0.0005877910430395777,
      "loss": 3.0084,
      "step": 20995
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.594862461090088,
      "learning_rate": 0.0005877898879316486,
      "loss": 3.2931,
      "step": 20996
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8819823265075684,
      "learning_rate": 0.0005877887327702141,
      "loss": 3.0881,
      "step": 20997
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6104367971420288,
      "learning_rate": 0.0005877875775552741,
      "loss": 3.0231,
      "step": 20998
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3400057554244995,
      "learning_rate": 0.0005877864222868291,
      "loss": 2.8853,
      "step": 20999
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5677686929702759,
      "learning_rate": 0.0005877852669648791,
      "loss": 2.8721,
      "step": 21000
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.470645546913147,
      "learning_rate": 0.0005877841115894244,
      "loss": 3.1172,
      "step": 21001
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5362147092819214,
      "learning_rate": 0.0005877829561604652,
      "loss": 2.9074,
      "step": 21002
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8366285562515259,
      "learning_rate": 0.0005877818006780015,
      "loss": 3.3007,
      "step": 21003
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3960167169570923,
      "learning_rate": 0.0005877806451420341,
      "loss": 3.2513,
      "step": 21004
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.3933703899383545,
      "learning_rate": 0.0005877794895525626,
      "loss": 3.4861,
      "step": 21005
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.288630247116089,
      "learning_rate": 0.0005877783339095875,
      "loss": 3.1685,
      "step": 21006
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.7751083374023438,
      "learning_rate": 0.000587777178213109,
      "loss": 3.2735,
      "step": 21007
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6660412549972534,
      "learning_rate": 0.0005877760224631273,
      "loss": 3.0103,
      "step": 21008
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0201363563537598,
      "learning_rate": 0.0005877748666596426,
      "loss": 2.9299,
      "step": 21009
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7082717418670654,
      "learning_rate": 0.0005877737108026552,
      "loss": 3.0257,
      "step": 21010
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5939801931381226,
      "learning_rate": 0.0005877725548921652,
      "loss": 3.2442,
      "step": 21011
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.531855583190918,
      "learning_rate": 0.0005877713989281728,
      "loss": 2.9321,
      "step": 21012
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.522555947303772,
      "learning_rate": 0.0005877702429106783,
      "loss": 3.1243,
      "step": 21013
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.1035282611846924,
      "learning_rate": 0.0005877690868396819,
      "loss": 3.1517,
      "step": 21014
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.1834895610809326,
      "learning_rate": 0.0005877679307151836,
      "loss": 2.88,
      "step": 21015
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7966707944869995,
      "learning_rate": 0.000587766774537184,
      "loss": 3.0825,
      "step": 21016
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5551080703735352,
      "learning_rate": 0.0005877656183056831,
      "loss": 3.0785,
      "step": 21017
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.6601457595825195,
      "learning_rate": 0.0005877644620206812,
      "loss": 2.9044,
      "step": 21018
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.1966354846954346,
      "learning_rate": 0.0005877633056821784,
      "loss": 3.2723,
      "step": 21019
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.306676983833313,
      "learning_rate": 0.0005877621492901749,
      "loss": 3.3221,
      "step": 21020
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0389466285705566,
      "learning_rate": 0.000587760992844671,
      "loss": 3.1348,
      "step": 21021
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.3706398010253906,
      "learning_rate": 0.000587759836345667,
      "loss": 3.3519,
      "step": 21022
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6589152812957764,
      "learning_rate": 0.0005877586797931629,
      "loss": 3.0645,
      "step": 21023
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.533443570137024,
      "learning_rate": 0.000587757523187159,
      "loss": 3.0531,
      "step": 21024
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.51276695728302,
      "learning_rate": 0.0005877563665276556,
      "loss": 3.1393,
      "step": 21025
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6989039182662964,
      "learning_rate": 0.0005877552098146529,
      "loss": 3.1189,
      "step": 21026
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4113428592681885,
      "learning_rate": 0.0005877540530481512,
      "loss": 3.2531,
      "step": 21027
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6438415050506592,
      "learning_rate": 0.0005877528962281504,
      "loss": 3.0921,
      "step": 21028
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3460915088653564,
      "learning_rate": 0.0005877517393546509,
      "loss": 3.1625,
      "step": 21029
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.04893159866333,
      "learning_rate": 0.0005877505824276529,
      "loss": 3.2848,
      "step": 21030
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5125203132629395,
      "learning_rate": 0.0005877494254471567,
      "loss": 3.204,
      "step": 21031
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5731923580169678,
      "learning_rate": 0.0005877482684131625,
      "loss": 3.1135,
      "step": 21032
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4981346130371094,
      "learning_rate": 0.0005877471113256705,
      "loss": 3.1004,
      "step": 21033
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5533745288848877,
      "learning_rate": 0.0005877459541846807,
      "loss": 3.1357,
      "step": 21034
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5662823915481567,
      "learning_rate": 0.0005877447969901936,
      "loss": 3.0612,
      "step": 21035
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.657075881958008,
      "learning_rate": 0.0005877436397422093,
      "loss": 3.181,
      "step": 21036
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.5482959747314453,
      "learning_rate": 0.000587742482440728,
      "loss": 3.0352,
      "step": 21037
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.1467740535736084,
      "learning_rate": 0.00058774132508575,
      "loss": 3.1885,
      "step": 21038
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.63430118560791,
      "learning_rate": 0.0005877401676772755,
      "loss": 3.1482,
      "step": 21039
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.1857948303222656,
      "learning_rate": 0.0005877390102153046,
      "loss": 3.1975,
      "step": 21040
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.660693883895874,
      "learning_rate": 0.0005877378526998376,
      "loss": 3.0532,
      "step": 21041
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.576164484024048,
      "learning_rate": 0.0005877366951308747,
      "loss": 3.1679,
      "step": 21042
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.030245065689087,
      "learning_rate": 0.0005877355375084161,
      "loss": 2.9715,
      "step": 21043
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8787717819213867,
      "learning_rate": 0.000587734379832462,
      "loss": 3.0023,
      "step": 21044
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6965113878250122,
      "learning_rate": 0.0005877332221030127,
      "loss": 2.9865,
      "step": 21045
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.96067476272583,
      "learning_rate": 0.0005877320643200684,
      "loss": 3.1518,
      "step": 21046
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.148888111114502,
      "learning_rate": 0.0005877309064836291,
      "loss": 3.2856,
      "step": 21047
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.685638427734375,
      "learning_rate": 0.0005877297485936953,
      "loss": 3.181,
      "step": 21048
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.69181489944458,
      "learning_rate": 0.0005877285906502672,
      "loss": 3.1445,
      "step": 21049
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.5998315811157227,
      "learning_rate": 0.0005877274326533448,
      "loss": 3.1572,
      "step": 21050
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6586397886276245,
      "learning_rate": 0.0005877262746029285,
      "loss": 3.1489,
      "step": 21051
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.942370057106018,
      "learning_rate": 0.0005877251164990184,
      "loss": 2.9939,
      "step": 21052
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5618247985839844,
      "learning_rate": 0.0005877239583416149,
      "loss": 3.29,
      "step": 21053
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.523890256881714,
      "learning_rate": 0.000587722800130718,
      "loss": 2.9624,
      "step": 21054
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.643209934234619,
      "learning_rate": 0.000587721641866328,
      "loss": 2.8646,
      "step": 21055
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.9662036895751953,
      "learning_rate": 0.0005877204835484451,
      "loss": 3.1931,
      "step": 21056
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7852143049240112,
      "learning_rate": 0.0005877193251770695,
      "loss": 3.1016,
      "step": 21057
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.3447763919830322,
      "learning_rate": 0.0005877181667522016,
      "loss": 3.1398,
      "step": 21058
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.5642025470733643,
      "learning_rate": 0.0005877170082738413,
      "loss": 2.9783,
      "step": 21059
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3429919481277466,
      "learning_rate": 0.0005877158497419891,
      "loss": 3.1618,
      "step": 21060
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3317922353744507,
      "learning_rate": 0.0005877146911566449,
      "loss": 3.1497,
      "step": 21061
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.4787747859954834,
      "learning_rate": 0.0005877135325178093,
      "loss": 2.9541,
      "step": 21062
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4037203788757324,
      "learning_rate": 0.0005877123738254822,
      "loss": 3.0336,
      "step": 21063
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4272278547286987,
      "learning_rate": 0.000587711215079664,
      "loss": 3.0631,
      "step": 21064
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.1763031482696533,
      "learning_rate": 0.0005877100562803549,
      "loss": 2.8449,
      "step": 21065
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9104576110839844,
      "learning_rate": 0.000587708897427555,
      "loss": 3.0941,
      "step": 21066
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.041604995727539,
      "learning_rate": 0.0005877077385212646,
      "loss": 3.0603,
      "step": 21067
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6732457876205444,
      "learning_rate": 0.0005877065795614839,
      "loss": 2.929,
      "step": 21068
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7570619583129883,
      "learning_rate": 0.0005877054205482131,
      "loss": 3.3101,
      "step": 21069
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.196197748184204,
      "learning_rate": 0.0005877042614814525,
      "loss": 3.2744,
      "step": 21070
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9350188970565796,
      "learning_rate": 0.0005877031023612022,
      "loss": 3.1178,
      "step": 21071
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4149855375289917,
      "learning_rate": 0.0005877019431874624,
      "loss": 3.0452,
      "step": 21072
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8643876314163208,
      "learning_rate": 0.0005877007839602335,
      "loss": 3.2436,
      "step": 21073
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.171769142150879,
      "learning_rate": 0.0005876996246795155,
      "loss": 3.0878,
      "step": 21074
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6124986410140991,
      "learning_rate": 0.0005876984653453087,
      "loss": 3.0877,
      "step": 21075
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9252218008041382,
      "learning_rate": 0.0005876973059576134,
      "loss": 2.9977,
      "step": 21076
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5658576488494873,
      "learning_rate": 0.0005876961465164297,
      "loss": 2.9765,
      "step": 21077
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5497318506240845,
      "learning_rate": 0.0005876949870217579,
      "loss": 2.9684,
      "step": 21078
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4425846338272095,
      "learning_rate": 0.0005876938274735981,
      "loss": 3.1428,
      "step": 21079
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4357578754425049,
      "learning_rate": 0.0005876926678719506,
      "loss": 3.198,
      "step": 21080
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.7894123792648315,
      "learning_rate": 0.0005876915082168156,
      "loss": 3.0272,
      "step": 21081
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.679803490638733,
      "learning_rate": 0.0005876903485081934,
      "loss": 2.9924,
      "step": 21082
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4797327518463135,
      "learning_rate": 0.000587689188746084,
      "loss": 2.837,
      "step": 21083
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3532944917678833,
      "learning_rate": 0.0005876880289304879,
      "loss": 3.2527,
      "step": 21084
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3412612676620483,
      "learning_rate": 0.000587686869061405,
      "loss": 3.3224,
      "step": 21085
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4761760234832764,
      "learning_rate": 0.0005876857091388358,
      "loss": 3.129,
      "step": 21086
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.373944878578186,
      "learning_rate": 0.0005876845491627803,
      "loss": 2.8376,
      "step": 21087
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3165494203567505,
      "learning_rate": 0.000587683389133239,
      "loss": 3.0626,
      "step": 21088
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5603162050247192,
      "learning_rate": 0.0005876822290502117,
      "loss": 2.8629,
      "step": 21089
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.440493106842041,
      "learning_rate": 0.000587681068913699,
      "loss": 2.9877,
      "step": 21090
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5610873699188232,
      "learning_rate": 0.0005876799087237009,
      "loss": 2.9239,
      "step": 21091
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5682694911956787,
      "learning_rate": 0.0005876787484802177,
      "loss": 3.131,
      "step": 21092
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9171111583709717,
      "learning_rate": 0.0005876775881832495,
      "loss": 2.893,
      "step": 21093
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.568630576133728,
      "learning_rate": 0.0005876764278327967,
      "loss": 3.2638,
      "step": 21094
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.521045684814453,
      "learning_rate": 0.0005876752674288594,
      "loss": 2.9771,
      "step": 21095
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.5717902183532715,
      "learning_rate": 0.0005876741069714379,
      "loss": 3.2462,
      "step": 21096
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5674811601638794,
      "learning_rate": 0.0005876729464605323,
      "loss": 3.0748,
      "step": 21097
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8931293487548828,
      "learning_rate": 0.0005876717858961428,
      "loss": 3.0912,
      "step": 21098
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.335756540298462,
      "learning_rate": 0.0005876706252782697,
      "loss": 2.879,
      "step": 21099
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.9036318063735962,
      "learning_rate": 0.0005876694646069132,
      "loss": 3.0898,
      "step": 21100
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3854509592056274,
      "learning_rate": 0.0005876683038820737,
      "loss": 3.193,
      "step": 21101
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.875030755996704,
      "learning_rate": 0.000587667143103751,
      "loss": 3.2907,
      "step": 21102
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.293426275253296,
      "learning_rate": 0.0005876659822719457,
      "loss": 2.992,
      "step": 21103
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0875673294067383,
      "learning_rate": 0.0005876648213866578,
      "loss": 2.9813,
      "step": 21104
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.6164387464523315,
      "learning_rate": 0.0005876636604478876,
      "loss": 2.9271,
      "step": 21105
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.499396562576294,
      "learning_rate": 0.0005876624994556352,
      "loss": 2.8326,
      "step": 21106
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.8313241004943848,
      "learning_rate": 0.000587661338409901,
      "loss": 3.1414,
      "step": 21107
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.0787558555603027,
      "learning_rate": 0.0005876601773106852,
      "loss": 2.9707,
      "step": 21108
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4218776226043701,
      "learning_rate": 0.0005876590161579878,
      "loss": 2.9997,
      "step": 21109
    },
    {
      "epoch": 0.27,
      "grad_norm": 3.1174814701080322,
      "learning_rate": 0.0005876578549518092,
      "loss": 2.9165,
      "step": 21110
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.2771029472351074,
      "learning_rate": 0.0005876566936921496,
      "loss": 3.1551,
      "step": 21111
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.3669023513793945,
      "learning_rate": 0.0005876555323790092,
      "loss": 3.1455,
      "step": 21112
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4467520713806152,
      "learning_rate": 0.0005876543710123882,
      "loss": 2.9847,
      "step": 21113
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.918127179145813,
      "learning_rate": 0.0005876532095922868,
      "loss": 3.0551,
      "step": 21114
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.9094398021698,
      "learning_rate": 0.0005876520481187052,
      "loss": 3.0154,
      "step": 21115
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.374616265296936,
      "learning_rate": 0.0005876508865916437,
      "loss": 3.0888,
      "step": 21116
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.8263460397720337,
      "learning_rate": 0.0005876497250111026,
      "loss": 2.9092,
      "step": 21117
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.5874272584915161,
      "learning_rate": 0.0005876485633770818,
      "loss": 3.0058,
      "step": 21118
    },
    {
      "epoch": 0.27,
      "grad_norm": 2.830458402633667,
      "learning_rate": 0.0005876474016895817,
      "loss": 3.0598,
      "step": 21119
    },
    {
      "epoch": 0.27,
      "grad_norm": 1.4293800592422485,
      "learning_rate": 0.0005876462399486026,
      "loss": 3.2455,
      "step": 21120
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.7348673343658447,
      "learning_rate": 0.0005876450781541446,
      "loss": 2.9977,
      "step": 21121
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6066420078277588,
      "learning_rate": 0.000587643916306208,
      "loss": 2.9756,
      "step": 21122
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3054752349853516,
      "learning_rate": 0.000587642754404793,
      "loss": 3.0185,
      "step": 21123
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8057817220687866,
      "learning_rate": 0.0005876415924498997,
      "loss": 3.1257,
      "step": 21124
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3383359909057617,
      "learning_rate": 0.0005876404304415284,
      "loss": 3.1859,
      "step": 21125
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2995362281799316,
      "learning_rate": 0.0005876392683796793,
      "loss": 2.5475,
      "step": 21126
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.306113600730896,
      "learning_rate": 0.0005876381062643527,
      "loss": 3.019,
      "step": 21127
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7288777828216553,
      "learning_rate": 0.0005876369440955487,
      "loss": 2.9509,
      "step": 21128
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.48797607421875,
      "learning_rate": 0.0005876357818732675,
      "loss": 3.0003,
      "step": 21129
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4718899726867676,
      "learning_rate": 0.0005876346195975095,
      "loss": 3.135,
      "step": 21130
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.571819543838501,
      "learning_rate": 0.0005876334572682747,
      "loss": 3.323,
      "step": 21131
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4027520418167114,
      "learning_rate": 0.0005876322948855635,
      "loss": 3.1791,
      "step": 21132
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8250797986984253,
      "learning_rate": 0.000587631132449376,
      "loss": 2.9536,
      "step": 21133
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8059886693954468,
      "learning_rate": 0.0005876299699597124,
      "loss": 3.236,
      "step": 21134
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5538487434387207,
      "learning_rate": 0.0005876288074165729,
      "loss": 3.1727,
      "step": 21135
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3666977882385254,
      "learning_rate": 0.0005876276448199579,
      "loss": 2.9451,
      "step": 21136
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8042776584625244,
      "learning_rate": 0.0005876264821698676,
      "loss": 3.2053,
      "step": 21137
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.371220588684082,
      "learning_rate": 0.0005876253194663019,
      "loss": 3.0216,
      "step": 21138
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7340521812438965,
      "learning_rate": 0.0005876241567092612,
      "loss": 3.0306,
      "step": 21139
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8884681463241577,
      "learning_rate": 0.0005876229938987458,
      "loss": 3.1175,
      "step": 21140
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3791308403015137,
      "learning_rate": 0.000587621831034756,
      "loss": 3.1171,
      "step": 21141
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.0763022899627686,
      "learning_rate": 0.0005876206681172916,
      "loss": 2.9903,
      "step": 21142
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4628909826278687,
      "learning_rate": 0.0005876195051463533,
      "loss": 3.1032,
      "step": 21143
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5604019165039062,
      "learning_rate": 0.0005876183421219411,
      "loss": 3.1155,
      "step": 21144
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2724634408950806,
      "learning_rate": 0.0005876171790440551,
      "loss": 3.1282,
      "step": 21145
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.075795888900757,
      "learning_rate": 0.0005876160159126956,
      "loss": 3.1481,
      "step": 21146
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5217621326446533,
      "learning_rate": 0.000587614852727863,
      "loss": 3.2339,
      "step": 21147
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4288400411605835,
      "learning_rate": 0.0005876136894895574,
      "loss": 3.4063,
      "step": 21148
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2628068923950195,
      "learning_rate": 0.0005876125261977789,
      "loss": 3.0957,
      "step": 21149
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4343059062957764,
      "learning_rate": 0.0005876113628525278,
      "loss": 3.2125,
      "step": 21150
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.441421389579773,
      "learning_rate": 0.0005876101994538043,
      "loss": 3.0247,
      "step": 21151
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6417163610458374,
      "learning_rate": 0.0005876090360016087,
      "loss": 3.0303,
      "step": 21152
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7864422798156738,
      "learning_rate": 0.0005876078724959411,
      "loss": 3.1293,
      "step": 21153
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5594550371170044,
      "learning_rate": 0.0005876067089368018,
      "loss": 3.0882,
      "step": 21154
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.0057942867279053,
      "learning_rate": 0.0005876055453241909,
      "loss": 3.2125,
      "step": 21155
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.217068076133728,
      "learning_rate": 0.0005876043816581088,
      "loss": 3.0952,
      "step": 21156
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.739187240600586,
      "learning_rate": 0.0005876032179385556,
      "loss": 3.0762,
      "step": 21157
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5065035820007324,
      "learning_rate": 0.0005876020541655315,
      "loss": 3.1053,
      "step": 21158
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5397425889968872,
      "learning_rate": 0.0005876008903390367,
      "loss": 2.8741,
      "step": 21159
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9243093729019165,
      "learning_rate": 0.0005875997264590716,
      "loss": 3.0025,
      "step": 21160
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9164971113204956,
      "learning_rate": 0.0005875985625256361,
      "loss": 2.776,
      "step": 21161
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6453913450241089,
      "learning_rate": 0.0005875973985387308,
      "loss": 2.9266,
      "step": 21162
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3304060697555542,
      "learning_rate": 0.0005875962344983556,
      "loss": 3.1468,
      "step": 21163
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4802993535995483,
      "learning_rate": 0.0005875950704045109,
      "loss": 3.0407,
      "step": 21164
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8073259592056274,
      "learning_rate": 0.0005875939062571969,
      "loss": 3.3626,
      "step": 21165
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.5813822746276855,
      "learning_rate": 0.0005875927420564136,
      "loss": 3.1861,
      "step": 21166
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.466875672340393,
      "learning_rate": 0.0005875915778021614,
      "loss": 3.1449,
      "step": 21167
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4105405807495117,
      "learning_rate": 0.0005875904134944406,
      "loss": 3.1422,
      "step": 21168
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8766863346099854,
      "learning_rate": 0.0005875892491332513,
      "loss": 3.0721,
      "step": 21169
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.535199522972107,
      "learning_rate": 0.0005875880847185937,
      "loss": 3.021,
      "step": 21170
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7415547370910645,
      "learning_rate": 0.000587586920250468,
      "loss": 3.0928,
      "step": 21171
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.697632908821106,
      "learning_rate": 0.0005875857557288744,
      "loss": 3.0228,
      "step": 21172
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6083275079727173,
      "learning_rate": 0.0005875845911538133,
      "loss": 3.2083,
      "step": 21173
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.684404969215393,
      "learning_rate": 0.0005875834265252848,
      "loss": 3.2362,
      "step": 21174
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4857901334762573,
      "learning_rate": 0.000587582261843289,
      "loss": 3.1172,
      "step": 21175
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.003328800201416,
      "learning_rate": 0.0005875810971078264,
      "loss": 3.1859,
      "step": 21176
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4129629135131836,
      "learning_rate": 0.000587579932318897,
      "loss": 3.0895,
      "step": 21177
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.824250340461731,
      "learning_rate": 0.000587578767476501,
      "loss": 3.1365,
      "step": 21178
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2770057916641235,
      "learning_rate": 0.0005875776025806386,
      "loss": 3.1194,
      "step": 21179
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.467814564704895,
      "learning_rate": 0.0005875764376313103,
      "loss": 2.9736,
      "step": 21180
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5210397243499756,
      "learning_rate": 0.000587575272628516,
      "loss": 3.2547,
      "step": 21181
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9701170921325684,
      "learning_rate": 0.000587574107572256,
      "loss": 3.2793,
      "step": 21182
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9956504106521606,
      "learning_rate": 0.0005875729424625306,
      "loss": 2.9804,
      "step": 21183
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5596727132797241,
      "learning_rate": 0.00058757177729934,
      "loss": 3.1439,
      "step": 21184
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9983901977539062,
      "learning_rate": 0.0005875706120826843,
      "loss": 3.2759,
      "step": 21185
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.551771640777588,
      "learning_rate": 0.0005875694468125638,
      "loss": 3.0656,
      "step": 21186
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.2373580932617188,
      "learning_rate": 0.0005875682814889787,
      "loss": 3.1445,
      "step": 21187
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.1557867527008057,
      "learning_rate": 0.0005875671161119293,
      "loss": 2.9806,
      "step": 21188
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.7808876037597656,
      "learning_rate": 0.0005875659506814157,
      "loss": 3.0323,
      "step": 21189
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4508812427520752,
      "learning_rate": 0.0005875647851974382,
      "loss": 3.1691,
      "step": 21190
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.781563639640808,
      "learning_rate": 0.000587563619659997,
      "loss": 3.1259,
      "step": 21191
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.0665318965911865,
      "learning_rate": 0.0005875624540690921,
      "loss": 3.1868,
      "step": 21192
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5439196825027466,
      "learning_rate": 0.0005875612884247241,
      "loss": 3.0517,
      "step": 21193
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4259282350540161,
      "learning_rate": 0.000587560122726893,
      "loss": 2.9372,
      "step": 21194
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5190542936325073,
      "learning_rate": 0.000587558956975599,
      "loss": 2.8457,
      "step": 21195
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.1267526149749756,
      "learning_rate": 0.0005875577911708424,
      "loss": 3.1966,
      "step": 21196
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8636655807495117,
      "learning_rate": 0.0005875566253126233,
      "loss": 3.0535,
      "step": 21197
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7683367729187012,
      "learning_rate": 0.000587555459400942,
      "loss": 3.0075,
      "step": 21198
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.0647153854370117,
      "learning_rate": 0.0005875542934357989,
      "loss": 3.3124,
      "step": 21199
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.743910551071167,
      "learning_rate": 0.0005875531274171939,
      "loss": 3.1681,
      "step": 21200
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.379058599472046,
      "learning_rate": 0.0005875519613451273,
      "loss": 3.087,
      "step": 21201
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.1639292240142822,
      "learning_rate": 0.0005875507952195994,
      "loss": 2.9685,
      "step": 21202
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.508258819580078,
      "learning_rate": 0.0005875496290406104,
      "loss": 3.2708,
      "step": 21203
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.548210859298706,
      "learning_rate": 0.0005875484628081605,
      "loss": 3.1996,
      "step": 21204
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7360494136810303,
      "learning_rate": 0.0005875472965222499,
      "loss": 3.0952,
      "step": 21205
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8196362257003784,
      "learning_rate": 0.0005875461301828788,
      "loss": 3.0759,
      "step": 21206
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.2895500659942627,
      "learning_rate": 0.0005875449637900474,
      "loss": 2.8233,
      "step": 21207
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4394798278808594,
      "learning_rate": 0.0005875437973437561,
      "loss": 3.0221,
      "step": 21208
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4579927921295166,
      "learning_rate": 0.0005875426308440049,
      "loss": 3.118,
      "step": 21209
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9601664543151855,
      "learning_rate": 0.0005875414642907941,
      "loss": 3.2125,
      "step": 21210
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6501805782318115,
      "learning_rate": 0.000587540297684124,
      "loss": 3.0875,
      "step": 21211
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6669282913208008,
      "learning_rate": 0.0005875391310239946,
      "loss": 3.4333,
      "step": 21212
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6934328079223633,
      "learning_rate": 0.0005875379643104063,
      "loss": 3.2193,
      "step": 21213
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5096006393432617,
      "learning_rate": 0.0005875367975433593,
      "loss": 3.0971,
      "step": 21214
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.45769202709198,
      "learning_rate": 0.0005875356307228538,
      "loss": 2.8624,
      "step": 21215
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5756162405014038,
      "learning_rate": 0.0005875344638488899,
      "loss": 3.3262,
      "step": 21216
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7995522022247314,
      "learning_rate": 0.0005875332969214679,
      "loss": 3.1625,
      "step": 21217
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5963006019592285,
      "learning_rate": 0.0005875321299405881,
      "loss": 3.0618,
      "step": 21218
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.582491159439087,
      "learning_rate": 0.0005875309629062507,
      "loss": 3.0873,
      "step": 21219
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7566535472869873,
      "learning_rate": 0.0005875297958184558,
      "loss": 2.9784,
      "step": 21220
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7308154106140137,
      "learning_rate": 0.0005875286286772036,
      "loss": 2.9433,
      "step": 21221
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2882181406021118,
      "learning_rate": 0.0005875274614824946,
      "loss": 2.8521,
      "step": 21222
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6749838590621948,
      "learning_rate": 0.0005875262942343286,
      "loss": 3.1915,
      "step": 21223
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.5385239124298096,
      "learning_rate": 0.0005875251269327062,
      "loss": 3.3135,
      "step": 21224
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.473018765449524,
      "learning_rate": 0.0005875239595776275,
      "loss": 3.0398,
      "step": 21225
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6792436838150024,
      "learning_rate": 0.0005875227921690925,
      "loss": 3.1439,
      "step": 21226
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.495668411254883,
      "learning_rate": 0.0005875216247071016,
      "loss": 3.1487,
      "step": 21227
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5326887369155884,
      "learning_rate": 0.0005875204571916551,
      "loss": 2.9434,
      "step": 21228
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5776724815368652,
      "learning_rate": 0.0005875192896227531,
      "loss": 2.8672,
      "step": 21229
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.335363745689392,
      "learning_rate": 0.0005875181220003959,
      "loss": 2.9316,
      "step": 21230
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4441431760787964,
      "learning_rate": 0.0005875169543245835,
      "loss": 3.1154,
      "step": 21231
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4760429859161377,
      "learning_rate": 0.0005875157865953163,
      "loss": 3.129,
      "step": 21232
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2995309829711914,
      "learning_rate": 0.0005875146188125946,
      "loss": 2.9127,
      "step": 21233
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.395777702331543,
      "learning_rate": 0.0005875134509764184,
      "loss": 3.2445,
      "step": 21234
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6391268968582153,
      "learning_rate": 0.0005875122830867881,
      "loss": 3.0156,
      "step": 21235
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.488734483718872,
      "learning_rate": 0.0005875111151437037,
      "loss": 3.5189,
      "step": 21236
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6958200931549072,
      "learning_rate": 0.0005875099471471657,
      "loss": 3.2557,
      "step": 21237
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5449974536895752,
      "learning_rate": 0.0005875087790971741,
      "loss": 3.2139,
      "step": 21238
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.699381947517395,
      "learning_rate": 0.0005875076109937293,
      "loss": 3.0071,
      "step": 21239
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.1237494945526123,
      "learning_rate": 0.0005875064428368312,
      "loss": 3.0108,
      "step": 21240
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.7126739025115967,
      "learning_rate": 0.0005875052746264804,
      "loss": 2.863,
      "step": 21241
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5419296026229858,
      "learning_rate": 0.0005875041063626768,
      "loss": 2.9611,
      "step": 21242
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8734053373336792,
      "learning_rate": 0.0005875029380454209,
      "loss": 3.2623,
      "step": 21243
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2950921058654785,
      "learning_rate": 0.0005875017696747128,
      "loss": 3.0412,
      "step": 21244
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5550740957260132,
      "learning_rate": 0.0005875006012505525,
      "loss": 2.9721,
      "step": 21245
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.820942997932434,
      "learning_rate": 0.0005874994327729406,
      "loss": 3.2473,
      "step": 21246
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6492302417755127,
      "learning_rate": 0.0005874982642418771,
      "loss": 3.1362,
      "step": 21247
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5529673099517822,
      "learning_rate": 0.0005874970956573621,
      "loss": 3.1281,
      "step": 21248
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3732677698135376,
      "learning_rate": 0.0005874959270193961,
      "loss": 3.028,
      "step": 21249
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.2305266857147217,
      "learning_rate": 0.0005874947583279791,
      "loss": 3.2347,
      "step": 21250
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.4156227111816406,
      "learning_rate": 0.0005874935895831115,
      "loss": 2.9094,
      "step": 21251
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3361876010894775,
      "learning_rate": 0.0005874924207847933,
      "loss": 3.2387,
      "step": 21252
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7988088130950928,
      "learning_rate": 0.000587491251933025,
      "loss": 3.2774,
      "step": 21253
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.819143295288086,
      "learning_rate": 0.0005874900830278065,
      "loss": 3.0787,
      "step": 21254
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6869242191314697,
      "learning_rate": 0.0005874889140691382,
      "loss": 3.2596,
      "step": 21255
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.744746208190918,
      "learning_rate": 0.0005874877450570203,
      "loss": 3.0145,
      "step": 21256
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7741758823394775,
      "learning_rate": 0.000587486575991453,
      "loss": 3.303,
      "step": 21257
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.0882620811462402,
      "learning_rate": 0.0005874854068724365,
      "loss": 3.1408,
      "step": 21258
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.804197072982788,
      "learning_rate": 0.000587484237699971,
      "loss": 3.2552,
      "step": 21259
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.181764841079712,
      "learning_rate": 0.0005874830684740568,
      "loss": 3.0051,
      "step": 21260
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8522319793701172,
      "learning_rate": 0.0005874818991946941,
      "loss": 3.1388,
      "step": 21261
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4617106914520264,
      "learning_rate": 0.0005874807298618831,
      "loss": 3.058,
      "step": 21262
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.1793272495269775,
      "learning_rate": 0.0005874795604756239,
      "loss": 2.8918,
      "step": 21263
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.9494264125823975,
      "learning_rate": 0.000587478391035917,
      "loss": 2.8716,
      "step": 21264
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.655423879623413,
      "learning_rate": 0.0005874772215427622,
      "loss": 3.1791,
      "step": 21265
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7020244598388672,
      "learning_rate": 0.0005874760519961602,
      "loss": 3.2718,
      "step": 21266
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5783860683441162,
      "learning_rate": 0.0005874748823961108,
      "loss": 3.1863,
      "step": 21267
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.1977379322052,
      "learning_rate": 0.0005874737127426145,
      "loss": 3.2249,
      "step": 21268
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4411441087722778,
      "learning_rate": 0.0005874725430356714,
      "loss": 3.1382,
      "step": 21269
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5008167028427124,
      "learning_rate": 0.0005874713732752817,
      "loss": 3.1302,
      "step": 21270
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.298973560333252,
      "learning_rate": 0.0005874702034614456,
      "loss": 2.652,
      "step": 21271
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.2106759548187256,
      "learning_rate": 0.0005874690335941633,
      "loss": 2.9775,
      "step": 21272
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6897062063217163,
      "learning_rate": 0.0005874678636734353,
      "loss": 2.9438,
      "step": 21273
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.461466908454895,
      "learning_rate": 0.0005874666936992615,
      "loss": 3.0434,
      "step": 21274
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3231420516967773,
      "learning_rate": 0.0005874655236716422,
      "loss": 3.06,
      "step": 21275
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2328356504440308,
      "learning_rate": 0.0005874643535905776,
      "loss": 3.2687,
      "step": 21276
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.198997139930725,
      "learning_rate": 0.0005874631834560679,
      "loss": 3.4049,
      "step": 21277
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.370277762413025,
      "learning_rate": 0.0005874620132681136,
      "loss": 3.2231,
      "step": 21278
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.071276903152466,
      "learning_rate": 0.0005874608430267145,
      "loss": 2.894,
      "step": 21279
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6981160640716553,
      "learning_rate": 0.000587459672731871,
      "loss": 2.8904,
      "step": 21280
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.883431315422058,
      "learning_rate": 0.0005874585023835834,
      "loss": 3.0386,
      "step": 21281
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.093451738357544,
      "learning_rate": 0.0005874573319818517,
      "loss": 2.912,
      "step": 21282
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.8442797660827637,
      "learning_rate": 0.0005874561615266765,
      "loss": 2.9759,
      "step": 21283
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.448952555656433,
      "learning_rate": 0.0005874549910180576,
      "loss": 3.2809,
      "step": 21284
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.947831392288208,
      "learning_rate": 0.0005874538204559954,
      "loss": 3.1725,
      "step": 21285
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5903023481369019,
      "learning_rate": 0.00058745264984049,
      "loss": 2.9913,
      "step": 21286
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.885402798652649,
      "learning_rate": 0.0005874514791715419,
      "loss": 3.1576,
      "step": 21287
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7427688837051392,
      "learning_rate": 0.0005874503084491511,
      "loss": 3.2213,
      "step": 21288
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.540237545967102,
      "learning_rate": 0.0005874491376733178,
      "loss": 3.1866,
      "step": 21289
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8364418745040894,
      "learning_rate": 0.0005874479668440424,
      "loss": 3.1619,
      "step": 21290
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.791190505027771,
      "learning_rate": 0.0005874467959613249,
      "loss": 2.8945,
      "step": 21291
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.1857409477233887,
      "learning_rate": 0.0005874456250251657,
      "loss": 3.1047,
      "step": 21292
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7958036661148071,
      "learning_rate": 0.0005874444540355649,
      "loss": 2.9321,
      "step": 21293
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.725030541419983,
      "learning_rate": 0.0005874432829925227,
      "loss": 3.0986,
      "step": 21294
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.544547438621521,
      "learning_rate": 0.0005874421118960394,
      "loss": 3.2408,
      "step": 21295
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7819687128067017,
      "learning_rate": 0.0005874409407461152,
      "loss": 3.0882,
      "step": 21296
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5983271598815918,
      "learning_rate": 0.0005874397695427502,
      "loss": 3.3123,
      "step": 21297
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6827443838119507,
      "learning_rate": 0.0005874385982859449,
      "loss": 3.0871,
      "step": 21298
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6753000020980835,
      "learning_rate": 0.0005874374269756992,
      "loss": 3.0233,
      "step": 21299
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2841875553131104,
      "learning_rate": 0.0005874362556120135,
      "loss": 2.9827,
      "step": 21300
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2339880466461182,
      "learning_rate": 0.0005874350841948879,
      "loss": 3.1731,
      "step": 21301
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6176122426986694,
      "learning_rate": 0.0005874339127243228,
      "loss": 3.0067,
      "step": 21302
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4208312034606934,
      "learning_rate": 0.0005874327412003184,
      "loss": 3.1186,
      "step": 21303
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6119064092636108,
      "learning_rate": 0.0005874315696228747,
      "loss": 3.2078,
      "step": 21304
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6926242113113403,
      "learning_rate": 0.000587430397991992,
      "loss": 3.0373,
      "step": 21305
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5253187417984009,
      "learning_rate": 0.0005874292263076706,
      "loss": 3.1596,
      "step": 21306
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.580428957939148,
      "learning_rate": 0.0005874280545699107,
      "loss": 3.1829,
      "step": 21307
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3921290636062622,
      "learning_rate": 0.0005874268827787125,
      "loss": 3.4327,
      "step": 21308
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4968887567520142,
      "learning_rate": 0.0005874257109340762,
      "loss": 3.061,
      "step": 21309
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.2534523010253906,
      "learning_rate": 0.000587424539036002,
      "loss": 3.1318,
      "step": 21310
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4587358236312866,
      "learning_rate": 0.0005874233670844903,
      "loss": 2.8573,
      "step": 21311
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.15059232711792,
      "learning_rate": 0.000587422195079541,
      "loss": 2.6243,
      "step": 21312
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5116112232208252,
      "learning_rate": 0.0005874210230211546,
      "loss": 3.2571,
      "step": 21313
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5852948427200317,
      "learning_rate": 0.0005874198509093311,
      "loss": 3.1533,
      "step": 21314
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3855102062225342,
      "learning_rate": 0.000587418678744071,
      "loss": 3.252,
      "step": 21315
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5562686920166016,
      "learning_rate": 0.0005874175065253742,
      "loss": 2.9463,
      "step": 21316
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5126781463623047,
      "learning_rate": 0.0005874163342532411,
      "loss": 3.1327,
      "step": 21317
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4246948957443237,
      "learning_rate": 0.0005874151619276719,
      "loss": 3.0563,
      "step": 21318
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4911361932754517,
      "learning_rate": 0.0005874139895486667,
      "loss": 2.9338,
      "step": 21319
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8555169105529785,
      "learning_rate": 0.0005874128171162259,
      "loss": 2.9425,
      "step": 21320
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4866507053375244,
      "learning_rate": 0.0005874116446303496,
      "loss": 3.0118,
      "step": 21321
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5077682733535767,
      "learning_rate": 0.000587410472091038,
      "loss": 3.3603,
      "step": 21322
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5791324377059937,
      "learning_rate": 0.0005874092994982915,
      "loss": 3.0343,
      "step": 21323
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6983718872070312,
      "learning_rate": 0.0005874081268521101,
      "loss": 3.1481,
      "step": 21324
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6693257093429565,
      "learning_rate": 0.0005874069541524942,
      "loss": 3.2663,
      "step": 21325
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5726646184921265,
      "learning_rate": 0.0005874057813994437,
      "loss": 2.8917,
      "step": 21326
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8223308324813843,
      "learning_rate": 0.0005874046085929593,
      "loss": 3.2831,
      "step": 21327
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.01220440864563,
      "learning_rate": 0.0005874034357330407,
      "loss": 3.2935,
      "step": 21328
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9630378484725952,
      "learning_rate": 0.0005874022628196885,
      "loss": 3.0649,
      "step": 21329
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5204967260360718,
      "learning_rate": 0.0005874010898529029,
      "loss": 2.7722,
      "step": 21330
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3763022422790527,
      "learning_rate": 0.0005873999168326839,
      "loss": 2.9955,
      "step": 21331
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5707494020462036,
      "learning_rate": 0.0005873987437590318,
      "loss": 3.2192,
      "step": 21332
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8403053283691406,
      "learning_rate": 0.000587397570631947,
      "loss": 2.9812,
      "step": 21333
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.272336721420288,
      "learning_rate": 0.0005873963974514295,
      "loss": 2.8932,
      "step": 21334
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.1258561611175537,
      "learning_rate": 0.0005873952242174795,
      "loss": 2.9212,
      "step": 21335
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5635783672332764,
      "learning_rate": 0.0005873940509300974,
      "loss": 3.1975,
      "step": 21336
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4133027791976929,
      "learning_rate": 0.0005873928775892832,
      "loss": 2.9703,
      "step": 21337
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3744258880615234,
      "learning_rate": 0.0005873917041950374,
      "loss": 3.1927,
      "step": 21338
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.835593581199646,
      "learning_rate": 0.0005873905307473599,
      "loss": 3.2947,
      "step": 21339
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5966031551361084,
      "learning_rate": 0.0005873893572462511,
      "loss": 3.0847,
      "step": 21340
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5086793899536133,
      "learning_rate": 0.0005873881836917113,
      "loss": 2.8597,
      "step": 21341
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.5560905933380127,
      "learning_rate": 0.0005873870100837406,
      "loss": 2.9201,
      "step": 21342
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8226405382156372,
      "learning_rate": 0.0005873858364223392,
      "loss": 2.9254,
      "step": 21343
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.712045431137085,
      "learning_rate": 0.0005873846627075073,
      "loss": 2.9647,
      "step": 21344
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.906343936920166,
      "learning_rate": 0.0005873834889392452,
      "loss": 3.2917,
      "step": 21345
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4070158004760742,
      "learning_rate": 0.000587382315117553,
      "loss": 3.2698,
      "step": 21346
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3048731088638306,
      "learning_rate": 0.000587381141242431,
      "loss": 3.0892,
      "step": 21347
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6078811883926392,
      "learning_rate": 0.0005873799673138797,
      "loss": 3.0417,
      "step": 21348
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5924090147018433,
      "learning_rate": 0.0005873787933318988,
      "loss": 2.9901,
      "step": 21349
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4924707412719727,
      "learning_rate": 0.0005873776192964887,
      "loss": 3.0144,
      "step": 21350
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4408516883850098,
      "learning_rate": 0.0005873764452076498,
      "loss": 2.8248,
      "step": 21351
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3556445837020874,
      "learning_rate": 0.0005873752710653823,
      "loss": 3.096,
      "step": 21352
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2514227628707886,
      "learning_rate": 0.0005873740968696861,
      "loss": 3.1409,
      "step": 21353
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.936462640762329,
      "learning_rate": 0.0005873729226205617,
      "loss": 3.1172,
      "step": 21354
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.57939612865448,
      "learning_rate": 0.0005873717483180093,
      "loss": 3.1643,
      "step": 21355
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4948166608810425,
      "learning_rate": 0.0005873705739620291,
      "loss": 2.9612,
      "step": 21356
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6733908653259277,
      "learning_rate": 0.0005873693995526212,
      "loss": 2.9871,
      "step": 21357
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5021244287490845,
      "learning_rate": 0.0005873682250897859,
      "loss": 2.6027,
      "step": 21358
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5622624158859253,
      "learning_rate": 0.0005873670505735234,
      "loss": 2.9815,
      "step": 21359
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5982950925827026,
      "learning_rate": 0.0005873658760038341,
      "loss": 3.2519,
      "step": 21360
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3764805793762207,
      "learning_rate": 0.0005873647013807179,
      "loss": 3.0698,
      "step": 21361
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.1477603912353516,
      "learning_rate": 0.0005873635267041752,
      "loss": 3.1778,
      "step": 21362
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3068286180496216,
      "learning_rate": 0.0005873623519742062,
      "loss": 3.3496,
      "step": 21363
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5458605289459229,
      "learning_rate": 0.0005873611771908111,
      "loss": 2.8558,
      "step": 21364
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4512239694595337,
      "learning_rate": 0.0005873600023539903,
      "loss": 3.2043,
      "step": 21365
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4899317026138306,
      "learning_rate": 0.0005873588274637436,
      "loss": 3.1087,
      "step": 21366
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.2209997177124023,
      "learning_rate": 0.0005873576525200716,
      "loss": 2.9109,
      "step": 21367
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6725341081619263,
      "learning_rate": 0.0005873564775229743,
      "loss": 3.2097,
      "step": 21368
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.752443790435791,
      "learning_rate": 0.0005873553024724522,
      "loss": 3.0055,
      "step": 21369
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9901738166809082,
      "learning_rate": 0.0005873541273685052,
      "loss": 3.113,
      "step": 21370
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.628697395324707,
      "learning_rate": 0.0005873529522111336,
      "loss": 3.1398,
      "step": 21371
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.6204466819763184,
      "learning_rate": 0.0005873517770003377,
      "loss": 3.2708,
      "step": 21372
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9001892805099487,
      "learning_rate": 0.0005873506017361178,
      "loss": 3.0796,
      "step": 21373
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6541094779968262,
      "learning_rate": 0.0005873494264184738,
      "loss": 3.2916,
      "step": 21374
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.401314377784729,
      "learning_rate": 0.0005873482510474063,
      "loss": 2.9858,
      "step": 21375
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.516697883605957,
      "learning_rate": 0.0005873470756229152,
      "loss": 3.1996,
      "step": 21376
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4928561449050903,
      "learning_rate": 0.0005873459001450008,
      "loss": 2.925,
      "step": 21377
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6469076871871948,
      "learning_rate": 0.0005873447246136635,
      "loss": 3.2255,
      "step": 21378
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.587882161140442,
      "learning_rate": 0.0005873435490289034,
      "loss": 3.1131,
      "step": 21379
    },
    {
      "epoch": 0.28,
      "grad_norm": 4.047749042510986,
      "learning_rate": 0.0005873423733907207,
      "loss": 3.0385,
      "step": 21380
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4550020694732666,
      "learning_rate": 0.0005873411976991157,
      "loss": 3.3212,
      "step": 21381
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9143167734146118,
      "learning_rate": 0.0005873400219540883,
      "loss": 3.0533,
      "step": 21382
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3996777534484863,
      "learning_rate": 0.0005873388461556391,
      "loss": 3.2394,
      "step": 21383
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8508837223052979,
      "learning_rate": 0.0005873376703037682,
      "loss": 3.0208,
      "step": 21384
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.509544014930725,
      "learning_rate": 0.0005873364943984759,
      "loss": 3.0763,
      "step": 21385
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5851820707321167,
      "learning_rate": 0.0005873353184397621,
      "loss": 3.0119,
      "step": 21386
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8557796478271484,
      "learning_rate": 0.0005873341424276274,
      "loss": 3.2124,
      "step": 21387
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3189291954040527,
      "learning_rate": 0.0005873329663620719,
      "loss": 3.3856,
      "step": 21388
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8071917295455933,
      "learning_rate": 0.0005873317902430958,
      "loss": 3.0764,
      "step": 21389
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.208588123321533,
      "learning_rate": 0.0005873306140706991,
      "loss": 3.1712,
      "step": 21390
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.592276692390442,
      "learning_rate": 0.0005873294378448822,
      "loss": 3.0179,
      "step": 21391
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.919075608253479,
      "learning_rate": 0.0005873282615656456,
      "loss": 3.0118,
      "step": 21392
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5429344177246094,
      "learning_rate": 0.0005873270852329891,
      "loss": 3.1184,
      "step": 21393
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6061036586761475,
      "learning_rate": 0.000587325908846913,
      "loss": 2.8358,
      "step": 21394
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8324217796325684,
      "learning_rate": 0.0005873247324074177,
      "loss": 3.1914,
      "step": 21395
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4497272968292236,
      "learning_rate": 0.0005873235559145033,
      "loss": 2.9891,
      "step": 21396
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9453034400939941,
      "learning_rate": 0.00058732237936817,
      "loss": 3.0274,
      "step": 21397
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4733577966690063,
      "learning_rate": 0.000587321202768418,
      "loss": 3.2881,
      "step": 21398
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7151095867156982,
      "learning_rate": 0.0005873200261152476,
      "loss": 3.1333,
      "step": 21399
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5409126281738281,
      "learning_rate": 0.000587318849408659,
      "loss": 3.2392,
      "step": 21400
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9111974239349365,
      "learning_rate": 0.0005873176726486523,
      "loss": 2.8778,
      "step": 21401
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.0214619636535645,
      "learning_rate": 0.000587316495835228,
      "loss": 3.2394,
      "step": 21402
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.3551363945007324,
      "learning_rate": 0.000587315318968386,
      "loss": 3.0557,
      "step": 21403
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4380759000778198,
      "learning_rate": 0.0005873141420481268,
      "loss": 3.0989,
      "step": 21404
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.213574767112732,
      "learning_rate": 0.0005873129650744504,
      "loss": 3.1462,
      "step": 21405
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.198807716369629,
      "learning_rate": 0.000587311788047357,
      "loss": 3.208,
      "step": 21406
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4519662857055664,
      "learning_rate": 0.0005873106109668471,
      "loss": 3.0196,
      "step": 21407
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.769654631614685,
      "learning_rate": 0.0005873094338329206,
      "loss": 3.0934,
      "step": 21408
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.02557373046875,
      "learning_rate": 0.0005873082566455779,
      "loss": 2.9086,
      "step": 21409
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4624983072280884,
      "learning_rate": 0.0005873070794048192,
      "loss": 3.0429,
      "step": 21410
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.4081554412841797,
      "learning_rate": 0.0005873059021106446,
      "loss": 3.2469,
      "step": 21411
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.450099468231201,
      "learning_rate": 0.0005873047247630544,
      "loss": 3.2317,
      "step": 21412
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5091536045074463,
      "learning_rate": 0.0005873035473620489,
      "loss": 3.2609,
      "step": 21413
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6810094118118286,
      "learning_rate": 0.0005873023699076283,
      "loss": 3.0358,
      "step": 21414
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7059788703918457,
      "learning_rate": 0.0005873011923997926,
      "loss": 3.0887,
      "step": 21415
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.325941562652588,
      "learning_rate": 0.0005873000148385424,
      "loss": 2.8433,
      "step": 21416
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3425482511520386,
      "learning_rate": 0.0005872988372238776,
      "loss": 3.0337,
      "step": 21417
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7129077911376953,
      "learning_rate": 0.0005872976595557985,
      "loss": 3.0623,
      "step": 21418
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6975747346878052,
      "learning_rate": 0.0005872964818343054,
      "loss": 3.0496,
      "step": 21419
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9573400020599365,
      "learning_rate": 0.0005872953040593984,
      "loss": 2.9891,
      "step": 21420
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.520929217338562,
      "learning_rate": 0.0005872941262310778,
      "loss": 3.2346,
      "step": 21421
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.750702977180481,
      "learning_rate": 0.0005872929483493438,
      "loss": 2.9418,
      "step": 21422
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6069049835205078,
      "learning_rate": 0.0005872917704141966,
      "loss": 2.9572,
      "step": 21423
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6046313047409058,
      "learning_rate": 0.0005872905924256364,
      "loss": 3.1538,
      "step": 21424
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7784442901611328,
      "learning_rate": 0.0005872894143836635,
      "loss": 3.0441,
      "step": 21425
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5561256408691406,
      "learning_rate": 0.000587288236288278,
      "loss": 3.0519,
      "step": 21426
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5205657482147217,
      "learning_rate": 0.0005872870581394803,
      "loss": 3.0427,
      "step": 21427
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.514281153678894,
      "learning_rate": 0.0005872858799372706,
      "loss": 3.0011,
      "step": 21428
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8112400770187378,
      "learning_rate": 0.0005872847016816489,
      "loss": 3.0711,
      "step": 21429
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6275858879089355,
      "learning_rate": 0.0005872835233726155,
      "loss": 3.1514,
      "step": 21430
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.353621482849121,
      "learning_rate": 0.0005872823450101709,
      "loss": 3.2307,
      "step": 21431
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4751477241516113,
      "learning_rate": 0.0005872811665943149,
      "loss": 3.2102,
      "step": 21432
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4281333684921265,
      "learning_rate": 0.0005872799881250479,
      "loss": 3.3559,
      "step": 21433
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.2846972942352295,
      "learning_rate": 0.0005872788096023702,
      "loss": 2.9257,
      "step": 21434
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5269365310668945,
      "learning_rate": 0.0005872776310262819,
      "loss": 2.9583,
      "step": 21435
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5317984819412231,
      "learning_rate": 0.0005872764523967832,
      "loss": 2.9766,
      "step": 21436
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.783708095550537,
      "learning_rate": 0.0005872752737138744,
      "loss": 2.8868,
      "step": 21437
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.5403523445129395,
      "learning_rate": 0.0005872740949775559,
      "loss": 3.1544,
      "step": 21438
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.009983777999878,
      "learning_rate": 0.0005872729161878275,
      "loss": 3.2302,
      "step": 21439
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.980778455734253,
      "learning_rate": 0.0005872717373446897,
      "loss": 2.9784,
      "step": 21440
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4993162155151367,
      "learning_rate": 0.0005872705584481429,
      "loss": 3.1186,
      "step": 21441
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4353742599487305,
      "learning_rate": 0.0005872693794981869,
      "loss": 3.2541,
      "step": 21442
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7921215295791626,
      "learning_rate": 0.000587268200494822,
      "loss": 3.117,
      "step": 21443
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.792054295539856,
      "learning_rate": 0.0005872670214380486,
      "loss": 3.2227,
      "step": 21444
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5801159143447876,
      "learning_rate": 0.0005872658423278669,
      "loss": 2.9373,
      "step": 21445
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.080441474914551,
      "learning_rate": 0.0005872646631642769,
      "loss": 3.3132,
      "step": 21446
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8384342193603516,
      "learning_rate": 0.000587263483947279,
      "loss": 3.1594,
      "step": 21447
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5046201944351196,
      "learning_rate": 0.0005872623046768735,
      "loss": 2.9913,
      "step": 21448
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9633276462554932,
      "learning_rate": 0.0005872611253530605,
      "loss": 3.016,
      "step": 21449
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5541249513626099,
      "learning_rate": 0.0005872599459758402,
      "loss": 3.1016,
      "step": 21450
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.340159296989441,
      "learning_rate": 0.0005872587665452128,
      "loss": 3.1067,
      "step": 21451
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.644457459449768,
      "learning_rate": 0.0005872575870611787,
      "loss": 3.1032,
      "step": 21452
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4709439277648926,
      "learning_rate": 0.0005872564075237379,
      "loss": 3.0549,
      "step": 21453
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6814229488372803,
      "learning_rate": 0.0005872552279328908,
      "loss": 3.125,
      "step": 21454
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4502373933792114,
      "learning_rate": 0.0005872540482886375,
      "loss": 3.2807,
      "step": 21455
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.533596396446228,
      "learning_rate": 0.0005872528685909781,
      "loss": 3.0307,
      "step": 21456
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8070026636123657,
      "learning_rate": 0.0005872516888399132,
      "loss": 2.889,
      "step": 21457
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.549638271331787,
      "learning_rate": 0.0005872505090354427,
      "loss": 3.089,
      "step": 21458
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.503853678703308,
      "learning_rate": 0.0005872493291775668,
      "loss": 2.9476,
      "step": 21459
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9835561513900757,
      "learning_rate": 0.0005872481492662859,
      "loss": 3.232,
      "step": 21460
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7762916088104248,
      "learning_rate": 0.0005872469693016003,
      "loss": 3.3412,
      "step": 21461
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9675483703613281,
      "learning_rate": 0.0005872457892835098,
      "loss": 3.0344,
      "step": 21462
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.3710761070251465,
      "learning_rate": 0.000587244609212015,
      "loss": 3.0018,
      "step": 21463
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3946220874786377,
      "learning_rate": 0.0005872434290871161,
      "loss": 3.2453,
      "step": 21464
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.0117156505584717,
      "learning_rate": 0.0005872422489088131,
      "loss": 2.7983,
      "step": 21465
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4147688150405884,
      "learning_rate": 0.0005872410686771064,
      "loss": 2.9463,
      "step": 21466
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.660299301147461,
      "learning_rate": 0.0005872398883919961,
      "loss": 3.1665,
      "step": 21467
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.050532102584839,
      "learning_rate": 0.0005872387080534826,
      "loss": 3.213,
      "step": 21468
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4458775520324707,
      "learning_rate": 0.0005872375276615659,
      "loss": 3.1358,
      "step": 21469
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7897632122039795,
      "learning_rate": 0.0005872363472162464,
      "loss": 3.1667,
      "step": 21470
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6505322456359863,
      "learning_rate": 0.0005872351667175242,
      "loss": 3.0989,
      "step": 21471
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6752102375030518,
      "learning_rate": 0.0005872339861653995,
      "loss": 3.2264,
      "step": 21472
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5191854238510132,
      "learning_rate": 0.0005872328055598726,
      "loss": 2.9065,
      "step": 21473
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.0059170722961426,
      "learning_rate": 0.0005872316249009438,
      "loss": 3.1779,
      "step": 21474
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5448590517044067,
      "learning_rate": 0.0005872304441886132,
      "loss": 3.2567,
      "step": 21475
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2836874723434448,
      "learning_rate": 0.0005872292634228809,
      "loss": 3.1845,
      "step": 21476
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2859834432601929,
      "learning_rate": 0.0005872280826037474,
      "loss": 3.1789,
      "step": 21477
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5482892990112305,
      "learning_rate": 0.0005872269017312127,
      "loss": 3.3331,
      "step": 21478
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5953975915908813,
      "learning_rate": 0.0005872257208052771,
      "loss": 3.0317,
      "step": 21479
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9147473573684692,
      "learning_rate": 0.0005872245398259409,
      "loss": 2.972,
      "step": 21480
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.254000425338745,
      "learning_rate": 0.0005872233587932042,
      "loss": 3.0958,
      "step": 21481
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.299425721168518,
      "learning_rate": 0.0005872221777070673,
      "loss": 3.0303,
      "step": 21482
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6833261251449585,
      "learning_rate": 0.0005872209965675303,
      "loss": 3.0992,
      "step": 21483
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.3629798889160156,
      "learning_rate": 0.0005872198153745934,
      "loss": 3.1892,
      "step": 21484
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5317833423614502,
      "learning_rate": 0.0005872186341282571,
      "loss": 3.2179,
      "step": 21485
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5500885248184204,
      "learning_rate": 0.0005872174528285213,
      "loss": 3.0945,
      "step": 21486
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8014148473739624,
      "learning_rate": 0.0005872162714753864,
      "loss": 3.1529,
      "step": 21487
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4173681735992432,
      "learning_rate": 0.0005872150900688526,
      "loss": 3.2756,
      "step": 21488
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6032379865646362,
      "learning_rate": 0.00058721390860892,
      "loss": 2.9947,
      "step": 21489
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4170386791229248,
      "learning_rate": 0.0005872127270955891,
      "loss": 2.8221,
      "step": 21490
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.456400752067566,
      "learning_rate": 0.0005872115455288597,
      "loss": 3.1274,
      "step": 21491
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5399271249771118,
      "learning_rate": 0.0005872103639087325,
      "loss": 3.0805,
      "step": 21492
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2826464176177979,
      "learning_rate": 0.0005872091822352074,
      "loss": 2.7285,
      "step": 21493
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7109642028808594,
      "learning_rate": 0.0005872080005082846,
      "loss": 2.7987,
      "step": 21494
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7841240167617798,
      "learning_rate": 0.0005872068187279644,
      "loss": 3.0863,
      "step": 21495
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.562523603439331,
      "learning_rate": 0.0005872056368942471,
      "loss": 2.96,
      "step": 21496
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8203693628311157,
      "learning_rate": 0.0005872044550071328,
      "loss": 3.0751,
      "step": 21497
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.0728023052215576,
      "learning_rate": 0.0005872032730666218,
      "loss": 3.1136,
      "step": 21498
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.471167802810669,
      "learning_rate": 0.0005872020910727144,
      "loss": 3.0959,
      "step": 21499
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6602095365524292,
      "learning_rate": 0.0005872009090254106,
      "loss": 3.0069,
      "step": 21500
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3947557210922241,
      "learning_rate": 0.0005871997269247107,
      "loss": 2.7854,
      "step": 21501
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.29380464553833,
      "learning_rate": 0.0005871985447706149,
      "loss": 3.0066,
      "step": 21502
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8437820672988892,
      "learning_rate": 0.0005871973625631236,
      "loss": 2.9878,
      "step": 21503
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4295942783355713,
      "learning_rate": 0.0005871961803022369,
      "loss": 3.6078,
      "step": 21504
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4894402027130127,
      "learning_rate": 0.000587194997987955,
      "loss": 2.8546,
      "step": 21505
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.040788412094116,
      "learning_rate": 0.000587193815620278,
      "loss": 2.8908,
      "step": 21506
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3286173343658447,
      "learning_rate": 0.0005871926331992063,
      "loss": 3.264,
      "step": 21507
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6071027517318726,
      "learning_rate": 0.0005871914507247401,
      "loss": 3.0265,
      "step": 21508
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.2452309131622314,
      "learning_rate": 0.0005871902681968795,
      "loss": 3.4124,
      "step": 21509
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9246481657028198,
      "learning_rate": 0.000587189085615625,
      "loss": 3.4579,
      "step": 21510
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7729640007019043,
      "learning_rate": 0.0005871879029809764,
      "loss": 3.3135,
      "step": 21511
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.5914039611816406,
      "learning_rate": 0.0005871867202929344,
      "loss": 2.9297,
      "step": 21512
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.8451526165008545,
      "learning_rate": 0.0005871855375514988,
      "loss": 3.1155,
      "step": 21513
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.621579647064209,
      "learning_rate": 0.0005871843547566699,
      "loss": 3.0636,
      "step": 21514
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6616630554199219,
      "learning_rate": 0.0005871831719084482,
      "loss": 3.0376,
      "step": 21515
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.8007283210754395,
      "learning_rate": 0.0005871819890068337,
      "loss": 2.9554,
      "step": 21516
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.3027093410491943,
      "learning_rate": 0.0005871808060518266,
      "loss": 3.2337,
      "step": 21517
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.3792080879211426,
      "learning_rate": 0.0005871796230434271,
      "loss": 3.014,
      "step": 21518
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.4213201999664307,
      "learning_rate": 0.0005871784399816356,
      "loss": 3.1355,
      "step": 21519
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.36738920211792,
      "learning_rate": 0.0005871772568664522,
      "loss": 2.9807,
      "step": 21520
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7901290655136108,
      "learning_rate": 0.000587176073697877,
      "loss": 3.3001,
      "step": 21521
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6192773580551147,
      "learning_rate": 0.0005871748904759104,
      "loss": 3.0494,
      "step": 21522
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.373792290687561,
      "learning_rate": 0.0005871737072005528,
      "loss": 3.0844,
      "step": 21523
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7683675289154053,
      "learning_rate": 0.000587172523871804,
      "loss": 2.9302,
      "step": 21524
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2956689596176147,
      "learning_rate": 0.0005871713404896643,
      "loss": 3.1518,
      "step": 21525
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3125907182693481,
      "learning_rate": 0.0005871701570541342,
      "loss": 2.8722,
      "step": 21526
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3502838611602783,
      "learning_rate": 0.0005871689735652136,
      "loss": 3.445,
      "step": 21527
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6986846923828125,
      "learning_rate": 0.0005871677900229029,
      "loss": 3.1188,
      "step": 21528
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9256892204284668,
      "learning_rate": 0.0005871666064272024,
      "loss": 3.38,
      "step": 21529
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.624838948249817,
      "learning_rate": 0.0005871654227781121,
      "loss": 3.0103,
      "step": 21530
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.6902666091918945,
      "learning_rate": 0.0005871642390756324,
      "loss": 3.0536,
      "step": 21531
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2888119220733643,
      "learning_rate": 0.0005871630553197633,
      "loss": 3.1714,
      "step": 21532
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.23156476020813,
      "learning_rate": 0.0005871618715105054,
      "loss": 3.201,
      "step": 21533
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.573262929916382,
      "learning_rate": 0.0005871606876478586,
      "loss": 3.0016,
      "step": 21534
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.543731689453125,
      "learning_rate": 0.000587159503731823,
      "loss": 3.0085,
      "step": 21535
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.1468327045440674,
      "learning_rate": 0.0005871583197623992,
      "loss": 3.1781,
      "step": 21536
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.2212345600128174,
      "learning_rate": 0.0005871571357395874,
      "loss": 2.9316,
      "step": 21537
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.825594186782837,
      "learning_rate": 0.0005871559516633874,
      "loss": 3.0175,
      "step": 21538
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5289708375930786,
      "learning_rate": 0.0005871547675337999,
      "loss": 3.0586,
      "step": 21539
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6088241338729858,
      "learning_rate": 0.0005871535833508248,
      "loss": 3.4059,
      "step": 21540
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3929669857025146,
      "learning_rate": 0.0005871523991144624,
      "loss": 3.1661,
      "step": 21541
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3205500841140747,
      "learning_rate": 0.000587151214824713,
      "loss": 3.0789,
      "step": 21542
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.190721273422241,
      "learning_rate": 0.0005871500304815767,
      "loss": 3.1537,
      "step": 21543
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6669658422470093,
      "learning_rate": 0.0005871488460850538,
      "loss": 3.0591,
      "step": 21544
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5389087200164795,
      "learning_rate": 0.0005871476616351447,
      "loss": 2.9564,
      "step": 21545
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2618366479873657,
      "learning_rate": 0.0005871464771318492,
      "loss": 3.1304,
      "step": 21546
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.479680061340332,
      "learning_rate": 0.0005871452925751679,
      "loss": 2.9562,
      "step": 21547
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7068114280700684,
      "learning_rate": 0.0005871441079651008,
      "loss": 2.9977,
      "step": 21548
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9508342742919922,
      "learning_rate": 0.0005871429233016482,
      "loss": 3.0769,
      "step": 21549
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6606045961380005,
      "learning_rate": 0.0005871417385848103,
      "loss": 2.8762,
      "step": 21550
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4854114055633545,
      "learning_rate": 0.0005871405538145873,
      "loss": 3.0791,
      "step": 21551
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5754698514938354,
      "learning_rate": 0.0005871393689909795,
      "loss": 3.0029,
      "step": 21552
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.00215482711792,
      "learning_rate": 0.000587138184113987,
      "loss": 2.9791,
      "step": 21553
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6946147680282593,
      "learning_rate": 0.0005871369991836102,
      "loss": 2.9983,
      "step": 21554
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7056715488433838,
      "learning_rate": 0.0005871358141998492,
      "loss": 3.3505,
      "step": 21555
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.535609483718872,
      "learning_rate": 0.0005871346291627041,
      "loss": 3.1317,
      "step": 21556
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4067670106887817,
      "learning_rate": 0.0005871334440721754,
      "loss": 3.3263,
      "step": 21557
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5949156284332275,
      "learning_rate": 0.000587132258928263,
      "loss": 3.14,
      "step": 21558
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2989166975021362,
      "learning_rate": 0.0005871310737309675,
      "loss": 3.2208,
      "step": 21559
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5762596130371094,
      "learning_rate": 0.0005871298884802888,
      "loss": 3.2012,
      "step": 21560
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8634577989578247,
      "learning_rate": 0.0005871287031762273,
      "loss": 3.2467,
      "step": 21561
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8152568340301514,
      "learning_rate": 0.0005871275178187831,
      "loss": 3.1764,
      "step": 21562
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.375504493713379,
      "learning_rate": 0.0005871263324079564,
      "loss": 3.0714,
      "step": 21563
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.932508111000061,
      "learning_rate": 0.0005871251469437476,
      "loss": 3.0255,
      "step": 21564
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8548176288604736,
      "learning_rate": 0.0005871239614261568,
      "loss": 3.2905,
      "step": 21565
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7488512992858887,
      "learning_rate": 0.0005871227758551843,
      "loss": 3.2469,
      "step": 21566
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6258208751678467,
      "learning_rate": 0.0005871215902308302,
      "loss": 3.0945,
      "step": 21567
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4181528091430664,
      "learning_rate": 0.0005871204045530947,
      "loss": 3.1646,
      "step": 21568
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.436511278152466,
      "learning_rate": 0.0005871192188219782,
      "loss": 3.1111,
      "step": 21569
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.058919668197632,
      "learning_rate": 0.0005871180330374808,
      "loss": 3.0578,
      "step": 21570
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8695424795150757,
      "learning_rate": 0.0005871168471996027,
      "loss": 3.2553,
      "step": 21571
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4600412845611572,
      "learning_rate": 0.0005871156613083443,
      "loss": 2.965,
      "step": 21572
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.707802414894104,
      "learning_rate": 0.0005871144753637054,
      "loss": 2.9823,
      "step": 21573
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.215214967727661,
      "learning_rate": 0.0005871132893656867,
      "loss": 2.8774,
      "step": 21574
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9109479188919067,
      "learning_rate": 0.0005871121033142882,
      "loss": 2.9211,
      "step": 21575
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3722319602966309,
      "learning_rate": 0.00058711091720951,
      "loss": 3.3482,
      "step": 21576
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.8506429195404053,
      "learning_rate": 0.0005871097310513526,
      "loss": 3.0515,
      "step": 21577
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.3904099464416504,
      "learning_rate": 0.0005871085448398161,
      "loss": 3.0625,
      "step": 21578
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.0041468143463135,
      "learning_rate": 0.0005871073585749007,
      "loss": 2.9945,
      "step": 21579
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6814854145050049,
      "learning_rate": 0.0005871061722566065,
      "loss": 3.1581,
      "step": 21580
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.1262059211730957,
      "learning_rate": 0.000587104985884934,
      "loss": 3.0286,
      "step": 21581
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.547485828399658,
      "learning_rate": 0.0005871037994598831,
      "loss": 3.0333,
      "step": 21582
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6154999732971191,
      "learning_rate": 0.0005871026129814543,
      "loss": 3.0483,
      "step": 21583
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.234738349914551,
      "learning_rate": 0.0005871014264496477,
      "loss": 3.2759,
      "step": 21584
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.005309820175171,
      "learning_rate": 0.0005871002398644635,
      "loss": 3.088,
      "step": 21585
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7874782085418701,
      "learning_rate": 0.000587099053225902,
      "loss": 3.0387,
      "step": 21586
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5085031986236572,
      "learning_rate": 0.0005870978665339633,
      "loss": 3.1757,
      "step": 21587
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7911185026168823,
      "learning_rate": 0.0005870966797886476,
      "loss": 2.8694,
      "step": 21588
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4786804914474487,
      "learning_rate": 0.0005870954929899554,
      "loss": 3.2611,
      "step": 21589
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5376657247543335,
      "learning_rate": 0.0005870943061378867,
      "loss": 3.2448,
      "step": 21590
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2523659467697144,
      "learning_rate": 0.0005870931192324415,
      "loss": 2.9727,
      "step": 21591
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.6506268978118896,
      "learning_rate": 0.0005870919322736206,
      "loss": 3.0747,
      "step": 21592
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.996419906616211,
      "learning_rate": 0.0005870907452614237,
      "loss": 2.8976,
      "step": 21593
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3478230237960815,
      "learning_rate": 0.0005870895581958513,
      "loss": 3.2125,
      "step": 21594
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.83564031124115,
      "learning_rate": 0.0005870883710769034,
      "loss": 3.155,
      "step": 21595
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3871612548828125,
      "learning_rate": 0.0005870871839045805,
      "loss": 3.1461,
      "step": 21596
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4064258337020874,
      "learning_rate": 0.0005870859966788825,
      "loss": 3.1715,
      "step": 21597
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.9861419200897217,
      "learning_rate": 0.0005870848093998101,
      "loss": 3.0799,
      "step": 21598
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6183371543884277,
      "learning_rate": 0.0005870836220673629,
      "loss": 3.0018,
      "step": 21599
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3967771530151367,
      "learning_rate": 0.0005870824346815416,
      "loss": 2.9957,
      "step": 21600
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7755701541900635,
      "learning_rate": 0.0005870812472423462,
      "loss": 3.2298,
      "step": 21601
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.852014183998108,
      "learning_rate": 0.000587080059749777,
      "loss": 3.2445,
      "step": 21602
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4338957071304321,
      "learning_rate": 0.0005870788722038342,
      "loss": 3.0378,
      "step": 21603
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.0964114665985107,
      "learning_rate": 0.000587077684604518,
      "loss": 3.0443,
      "step": 21604
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4041492938995361,
      "learning_rate": 0.0005870764969518286,
      "loss": 3.2941,
      "step": 21605
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1447210311889648,
      "learning_rate": 0.0005870753092457663,
      "loss": 3.2925,
      "step": 21606
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3032058477401733,
      "learning_rate": 0.0005870741214863313,
      "loss": 3.163,
      "step": 21607
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.031256914138794,
      "learning_rate": 0.0005870729336735238,
      "loss": 3.0193,
      "step": 21608
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.40227210521698,
      "learning_rate": 0.000587071745807344,
      "loss": 3.4177,
      "step": 21609
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.335692882537842,
      "learning_rate": 0.0005870705578877923,
      "loss": 3.0082,
      "step": 21610
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.463792085647583,
      "learning_rate": 0.0005870693699148686,
      "loss": 2.8338,
      "step": 21611
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4535353183746338,
      "learning_rate": 0.0005870681818885733,
      "loss": 3.2814,
      "step": 21612
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.462691307067871,
      "learning_rate": 0.0005870669938089066,
      "loss": 3.006,
      "step": 21613
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7157657146453857,
      "learning_rate": 0.0005870658056758687,
      "loss": 3.1942,
      "step": 21614
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7857688665390015,
      "learning_rate": 0.0005870646174894599,
      "loss": 3.1039,
      "step": 21615
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4985241889953613,
      "learning_rate": 0.0005870634292496804,
      "loss": 3.2046,
      "step": 21616
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7412103414535522,
      "learning_rate": 0.0005870622409565303,
      "loss": 2.9813,
      "step": 21617
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.863310694694519,
      "learning_rate": 0.0005870610526100099,
      "loss": 3.1077,
      "step": 21618
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8065218925476074,
      "learning_rate": 0.0005870598642101195,
      "loss": 3.1044,
      "step": 21619
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.219404935836792,
      "learning_rate": 0.0005870586757568593,
      "loss": 3.2223,
      "step": 21620
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9207035303115845,
      "learning_rate": 0.0005870574872502294,
      "loss": 3.3322,
      "step": 21621
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.713087797164917,
      "learning_rate": 0.0005870562986902301,
      "loss": 3.2031,
      "step": 21622
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.75002121925354,
      "learning_rate": 0.0005870551100768616,
      "loss": 2.9068,
      "step": 21623
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.755624771118164,
      "learning_rate": 0.0005870539214101242,
      "loss": 3.2262,
      "step": 21624
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.619645595550537,
      "learning_rate": 0.0005870527326900179,
      "loss": 3.0824,
      "step": 21625
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9403029680252075,
      "learning_rate": 0.0005870515439165433,
      "loss": 2.9985,
      "step": 21626
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.6337194442749023,
      "learning_rate": 0.0005870503550897003,
      "loss": 2.9862,
      "step": 21627
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.633493661880493,
      "learning_rate": 0.0005870491662094892,
      "loss": 3.1446,
      "step": 21628
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.486636757850647,
      "learning_rate": 0.0005870479772759103,
      "loss": 3.1106,
      "step": 21629
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.157113552093506,
      "learning_rate": 0.0005870467882889638,
      "loss": 2.8975,
      "step": 21630
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4634934663772583,
      "learning_rate": 0.0005870455992486497,
      "loss": 3.1489,
      "step": 21631
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6436415910720825,
      "learning_rate": 0.0005870444101549685,
      "loss": 3.2121,
      "step": 21632
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3410917520523071,
      "learning_rate": 0.0005870432210079204,
      "loss": 3.1388,
      "step": 21633
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.428615689277649,
      "learning_rate": 0.0005870420318075055,
      "loss": 3.1766,
      "step": 21634
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.7401864528656006,
      "learning_rate": 0.000587040842553724,
      "loss": 2.9252,
      "step": 21635
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4428744316101074,
      "learning_rate": 0.0005870396532465762,
      "loss": 3.0535,
      "step": 21636
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.794714331626892,
      "learning_rate": 0.0005870384638860624,
      "loss": 2.7517,
      "step": 21637
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.397696614265442,
      "learning_rate": 0.0005870372744721827,
      "loss": 2.9869,
      "step": 21638
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.359643578529358,
      "learning_rate": 0.0005870360850049373,
      "loss": 3.2731,
      "step": 21639
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2353979349136353,
      "learning_rate": 0.0005870348954843266,
      "loss": 3.2973,
      "step": 21640
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4637912511825562,
      "learning_rate": 0.0005870337059103505,
      "loss": 3.0039,
      "step": 21641
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5522308349609375,
      "learning_rate": 0.0005870325162830095,
      "loss": 3.0785,
      "step": 21642
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.587722659111023,
      "learning_rate": 0.0005870313266023038,
      "loss": 3.0168,
      "step": 21643
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7419465780258179,
      "learning_rate": 0.0005870301368682335,
      "loss": 3.1262,
      "step": 21644
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8768415451049805,
      "learning_rate": 0.0005870289470807988,
      "loss": 3.273,
      "step": 21645
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6110968589782715,
      "learning_rate": 0.0005870277572400001,
      "loss": 3.1427,
      "step": 21646
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.962511658668518,
      "learning_rate": 0.0005870265673458376,
      "loss": 2.7875,
      "step": 21647
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.496011734008789,
      "learning_rate": 0.0005870253773983113,
      "loss": 2.9584,
      "step": 21648
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7437744140625,
      "learning_rate": 0.0005870241873974217,
      "loss": 3.0407,
      "step": 21649
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.601770043373108,
      "learning_rate": 0.0005870229973431687,
      "loss": 3.0411,
      "step": 21650
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7881923913955688,
      "learning_rate": 0.0005870218072355529,
      "loss": 3.1869,
      "step": 21651
    },
    {
      "epoch": 0.28,
      "grad_norm": 4.236188888549805,
      "learning_rate": 0.0005870206170745743,
      "loss": 3.0752,
      "step": 21652
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.3517754077911377,
      "learning_rate": 0.0005870194268602331,
      "loss": 2.8274,
      "step": 21653
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.630631923675537,
      "learning_rate": 0.0005870182365925294,
      "loss": 3.0545,
      "step": 21654
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.3589344024658203,
      "learning_rate": 0.0005870170462714639,
      "loss": 3.2393,
      "step": 21655
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.7597153186798096,
      "learning_rate": 0.0005870158558970363,
      "loss": 3.0702,
      "step": 21656
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7434130907058716,
      "learning_rate": 0.0005870146654692472,
      "loss": 3.3051,
      "step": 21657
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.0627949237823486,
      "learning_rate": 0.0005870134749880965,
      "loss": 3.2346,
      "step": 21658
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.162508010864258,
      "learning_rate": 0.0005870122844535846,
      "loss": 3.0257,
      "step": 21659
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9713705778121948,
      "learning_rate": 0.0005870110938657118,
      "loss": 3.095,
      "step": 21660
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5436112880706787,
      "learning_rate": 0.0005870099032244782,
      "loss": 3.2985,
      "step": 21661
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.4194579124450684,
      "learning_rate": 0.0005870087125298839,
      "loss": 2.9798,
      "step": 21662
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.549913167953491,
      "learning_rate": 0.0005870075217819294,
      "loss": 2.9868,
      "step": 21663
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.719805359840393,
      "learning_rate": 0.0005870063309806147,
      "loss": 3.0378,
      "step": 21664
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3356610536575317,
      "learning_rate": 0.0005870051401259402,
      "loss": 3.1208,
      "step": 21665
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.5276379585266113,
      "learning_rate": 0.000587003949217906,
      "loss": 2.7864,
      "step": 21666
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5148359537124634,
      "learning_rate": 0.0005870027582565123,
      "loss": 3.3465,
      "step": 21667
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5408910512924194,
      "learning_rate": 0.0005870015672417594,
      "loss": 3.1609,
      "step": 21668
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.481251835823059,
      "learning_rate": 0.0005870003761736475,
      "loss": 3.1791,
      "step": 21669
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.145897388458252,
      "learning_rate": 0.0005869991850521768,
      "loss": 3.1527,
      "step": 21670
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.454195261001587,
      "learning_rate": 0.0005869979938773475,
      "loss": 3.2514,
      "step": 21671
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.254124641418457,
      "learning_rate": 0.0005869968026491598,
      "loss": 2.7464,
      "step": 21672
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6183584928512573,
      "learning_rate": 0.0005869956113676142,
      "loss": 2.9984,
      "step": 21673
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7863188982009888,
      "learning_rate": 0.0005869944200327105,
      "loss": 3.0498,
      "step": 21674
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.278738260269165,
      "learning_rate": 0.0005869932286444491,
      "loss": 3.1127,
      "step": 21675
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.19254469871521,
      "learning_rate": 0.0005869920372028304,
      "loss": 3.2179,
      "step": 21676
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4439079761505127,
      "learning_rate": 0.0005869908457078543,
      "loss": 3.0122,
      "step": 21677
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.0844616889953613,
      "learning_rate": 0.0005869896541595214,
      "loss": 3.0389,
      "step": 21678
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7026052474975586,
      "learning_rate": 0.0005869884625578314,
      "loss": 3.1721,
      "step": 21679
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4346778392791748,
      "learning_rate": 0.0005869872709027851,
      "loss": 3.1206,
      "step": 21680
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.3239848613739014,
      "learning_rate": 0.0005869860791943823,
      "loss": 2.9817,
      "step": 21681
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4835137128829956,
      "learning_rate": 0.0005869848874326235,
      "loss": 3.0454,
      "step": 21682
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8054405450820923,
      "learning_rate": 0.0005869836956175087,
      "loss": 2.9936,
      "step": 21683
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.0747509002685547,
      "learning_rate": 0.0005869825037490382,
      "loss": 2.9265,
      "step": 21684
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6031039953231812,
      "learning_rate": 0.0005869813118272123,
      "loss": 3.1412,
      "step": 21685
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7746198177337646,
      "learning_rate": 0.0005869801198520312,
      "loss": 3.1429,
      "step": 21686
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.9657154083251953,
      "learning_rate": 0.0005869789278234949,
      "loss": 3.2261,
      "step": 21687
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8644118309020996,
      "learning_rate": 0.000586977735741604,
      "loss": 3.0027,
      "step": 21688
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7051196098327637,
      "learning_rate": 0.0005869765436063584,
      "loss": 3.0994,
      "step": 21689
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.522202491760254,
      "learning_rate": 0.0005869753514177584,
      "loss": 3.3171,
      "step": 21690
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5844024419784546,
      "learning_rate": 0.0005869741591758044,
      "loss": 3.1022,
      "step": 21691
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6201667785644531,
      "learning_rate": 0.0005869729668804963,
      "loss": 2.8877,
      "step": 21692
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3679720163345337,
      "learning_rate": 0.0005869717745318347,
      "loss": 3.1361,
      "step": 21693
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.1522817611694336,
      "learning_rate": 0.0005869705821298195,
      "loss": 3.1435,
      "step": 21694
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4302762746810913,
      "learning_rate": 0.0005869693896744511,
      "loss": 3.0691,
      "step": 21695
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.473689079284668,
      "learning_rate": 0.0005869681971657297,
      "loss": 3.2072,
      "step": 21696
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5746945142745972,
      "learning_rate": 0.0005869670046036555,
      "loss": 3.4434,
      "step": 21697
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.813552737236023,
      "learning_rate": 0.0005869658119882288,
      "loss": 3.2814,
      "step": 21698
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5032272338867188,
      "learning_rate": 0.0005869646193194495,
      "loss": 2.8859,
      "step": 21699
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.304120659828186,
      "learning_rate": 0.0005869634265973183,
      "loss": 2.7746,
      "step": 21700
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1888779401779175,
      "learning_rate": 0.0005869622338218351,
      "loss": 3.1699,
      "step": 21701
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5853149890899658,
      "learning_rate": 0.0005869610409930001,
      "loss": 3.1485,
      "step": 21702
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.118436336517334,
      "learning_rate": 0.0005869598481108137,
      "loss": 3.1827,
      "step": 21703
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.1035473346710205,
      "learning_rate": 0.0005869586551752761,
      "loss": 3.2253,
      "step": 21704
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.4579594135284424,
      "learning_rate": 0.0005869574621863873,
      "loss": 3.1151,
      "step": 21705
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.1810569763183594,
      "learning_rate": 0.0005869562691441479,
      "loss": 3.0966,
      "step": 21706
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.27724289894104,
      "learning_rate": 0.0005869550760485578,
      "loss": 3.162,
      "step": 21707
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.9708104133605957,
      "learning_rate": 0.0005869538828996173,
      "loss": 2.9976,
      "step": 21708
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.8176517486572266,
      "learning_rate": 0.0005869526896973268,
      "loss": 3.0308,
      "step": 21709
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5005569458007812,
      "learning_rate": 0.0005869514964416863,
      "loss": 2.9644,
      "step": 21710
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4890754222869873,
      "learning_rate": 0.0005869503031326961,
      "loss": 3.0748,
      "step": 21711
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.973134994506836,
      "learning_rate": 0.0005869491097703563,
      "loss": 3.2039,
      "step": 21712
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3641688823699951,
      "learning_rate": 0.0005869479163546675,
      "loss": 3.293,
      "step": 21713
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5374979972839355,
      "learning_rate": 0.0005869467228856295,
      "loss": 3.1927,
      "step": 21714
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.412284255027771,
      "learning_rate": 0.0005869455293632427,
      "loss": 3.1551,
      "step": 21715
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7147982120513916,
      "learning_rate": 0.0005869443357875073,
      "loss": 3.1167,
      "step": 21716
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4672056436538696,
      "learning_rate": 0.0005869431421584235,
      "loss": 2.9978,
      "step": 21717
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4322766065597534,
      "learning_rate": 0.0005869419484759917,
      "loss": 2.8054,
      "step": 21718
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.270150065422058,
      "learning_rate": 0.0005869407547402118,
      "loss": 2.864,
      "step": 21719
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.323418140411377,
      "learning_rate": 0.0005869395609510843,
      "loss": 3.3714,
      "step": 21720
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5141595602035522,
      "learning_rate": 0.0005869383671086093,
      "loss": 3.121,
      "step": 21721
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.224496841430664,
      "learning_rate": 0.000586937173212787,
      "loss": 3.1863,
      "step": 21722
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5450387001037598,
      "learning_rate": 0.0005869359792636178,
      "loss": 3.2046,
      "step": 21723
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7763012647628784,
      "learning_rate": 0.0005869347852611016,
      "loss": 3.0031,
      "step": 21724
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.349247932434082,
      "learning_rate": 0.0005869335912052389,
      "loss": 3.0352,
      "step": 21725
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2164031267166138,
      "learning_rate": 0.0005869323970960299,
      "loss": 3.232,
      "step": 21726
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5480905771255493,
      "learning_rate": 0.0005869312029334747,
      "loss": 3.2774,
      "step": 21727
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3768435716629028,
      "learning_rate": 0.0005869300087175735,
      "loss": 2.7995,
      "step": 21728
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.220287799835205,
      "learning_rate": 0.0005869288144483266,
      "loss": 2.9041,
      "step": 21729
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.313826560974121,
      "learning_rate": 0.0005869276201257343,
      "loss": 3.0494,
      "step": 21730
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5769283771514893,
      "learning_rate": 0.0005869264257497967,
      "loss": 3.0986,
      "step": 21731
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7076692581176758,
      "learning_rate": 0.000586925231320514,
      "loss": 3.3668,
      "step": 21732
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.527720332145691,
      "learning_rate": 0.0005869240368378866,
      "loss": 3.1452,
      "step": 21733
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.56045663356781,
      "learning_rate": 0.0005869228423019146,
      "loss": 2.9598,
      "step": 21734
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5660927295684814,
      "learning_rate": 0.0005869216477125982,
      "loss": 3.1053,
      "step": 21735
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6336909532546997,
      "learning_rate": 0.0005869204530699376,
      "loss": 2.9416,
      "step": 21736
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.916642665863037,
      "learning_rate": 0.0005869192583739331,
      "loss": 3.0951,
      "step": 21737
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9489001035690308,
      "learning_rate": 0.0005869180636245848,
      "loss": 3.1416,
      "step": 21738
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.256349802017212,
      "learning_rate": 0.0005869168688218932,
      "loss": 3.3736,
      "step": 21739
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5532177686691284,
      "learning_rate": 0.0005869156739658582,
      "loss": 3.077,
      "step": 21740
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.722023606300354,
      "learning_rate": 0.0005869144790564803,
      "loss": 2.9146,
      "step": 21741
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2601622343063354,
      "learning_rate": 0.0005869132840937595,
      "loss": 3.0605,
      "step": 21742
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5788331031799316,
      "learning_rate": 0.000586912089077696,
      "loss": 3.1878,
      "step": 21743
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2643282413482666,
      "learning_rate": 0.0005869108940082902,
      "loss": 3.1618,
      "step": 21744
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4240647554397583,
      "learning_rate": 0.0005869096988855423,
      "loss": 3.1108,
      "step": 21745
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6596022844314575,
      "learning_rate": 0.0005869085037094524,
      "loss": 2.8884,
      "step": 21746
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2814701795578003,
      "learning_rate": 0.0005869073084800208,
      "loss": 3.2022,
      "step": 21747
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9974775314331055,
      "learning_rate": 0.0005869061131972478,
      "loss": 3.0147,
      "step": 21748
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8428702354431152,
      "learning_rate": 0.0005869049178611335,
      "loss": 3.1588,
      "step": 21749
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.596836805343628,
      "learning_rate": 0.0005869037224716781,
      "loss": 3.1541,
      "step": 21750
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6067442893981934,
      "learning_rate": 0.0005869025270288821,
      "loss": 3.2063,
      "step": 21751
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3115061521530151,
      "learning_rate": 0.0005869013315327452,
      "loss": 3.0877,
      "step": 21752
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8914563655853271,
      "learning_rate": 0.0005869001359832681,
      "loss": 3.0653,
      "step": 21753
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.633998155593872,
      "learning_rate": 0.0005868989403804508,
      "loss": 2.986,
      "step": 21754
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.2048799991607666,
      "learning_rate": 0.0005868977447242936,
      "loss": 3.3746,
      "step": 21755
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6599587202072144,
      "learning_rate": 0.0005868965490147967,
      "loss": 3.265,
      "step": 21756
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.422586441040039,
      "learning_rate": 0.0005868953532519603,
      "loss": 3.0519,
      "step": 21757
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9315067529678345,
      "learning_rate": 0.0005868941574357846,
      "loss": 3.0843,
      "step": 21758
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.552010416984558,
      "learning_rate": 0.0005868929615662698,
      "loss": 3.06,
      "step": 21759
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6123799085617065,
      "learning_rate": 0.0005868917656434164,
      "loss": 3.0648,
      "step": 21760
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3988280296325684,
      "learning_rate": 0.0005868905696672242,
      "loss": 3.0528,
      "step": 21761
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4374676942825317,
      "learning_rate": 0.0005868893736376938,
      "loss": 3.1223,
      "step": 21762
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3322477340698242,
      "learning_rate": 0.000586888177554825,
      "loss": 3.0661,
      "step": 21763
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.641243815422058,
      "learning_rate": 0.0005868869814186186,
      "loss": 3.2009,
      "step": 21764
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7526049613952637,
      "learning_rate": 0.0005868857852290742,
      "loss": 2.8277,
      "step": 21765
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3813174962997437,
      "learning_rate": 0.0005868845889861925,
      "loss": 3.1369,
      "step": 21766
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.769718885421753,
      "learning_rate": 0.0005868833926899735,
      "loss": 3.1391,
      "step": 21767
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3753573894500732,
      "learning_rate": 0.0005868821963404175,
      "loss": 3.1311,
      "step": 21768
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5156216621398926,
      "learning_rate": 0.0005868809999375246,
      "loss": 2.9153,
      "step": 21769
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4978973865509033,
      "learning_rate": 0.0005868798034812951,
      "loss": 3.2879,
      "step": 21770
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3720026016235352,
      "learning_rate": 0.0005868786069717292,
      "loss": 3.3017,
      "step": 21771
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.676346778869629,
      "learning_rate": 0.0005868774104088272,
      "loss": 2.9838,
      "step": 21772
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4244978427886963,
      "learning_rate": 0.0005868762137925894,
      "loss": 3.2398,
      "step": 21773
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8248333930969238,
      "learning_rate": 0.0005868750171230157,
      "loss": 3.1276,
      "step": 21774
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4734662771224976,
      "learning_rate": 0.0005868738204001067,
      "loss": 3.2022,
      "step": 21775
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5828542709350586,
      "learning_rate": 0.0005868726236238623,
      "loss": 3.1131,
      "step": 21776
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9881327152252197,
      "learning_rate": 0.0005868714267942828,
      "loss": 3.1232,
      "step": 21777
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6539323329925537,
      "learning_rate": 0.0005868702299113688,
      "loss": 2.8644,
      "step": 21778
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9669339656829834,
      "learning_rate": 0.0005868690329751199,
      "loss": 3.327,
      "step": 21779
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5373640060424805,
      "learning_rate": 0.0005868678359855368,
      "loss": 3.0614,
      "step": 21780
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3655508756637573,
      "learning_rate": 0.0005868666389426194,
      "loss": 3.1243,
      "step": 21781
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3961447477340698,
      "learning_rate": 0.0005868654418463682,
      "loss": 3.2732,
      "step": 21782
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6027263402938843,
      "learning_rate": 0.0005868642446967832,
      "loss": 3.1554,
      "step": 21783
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4466626644134521,
      "learning_rate": 0.0005868630474938648,
      "loss": 3.355,
      "step": 21784
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7615355253219604,
      "learning_rate": 0.0005868618502376132,
      "loss": 2.9327,
      "step": 21785
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.329702377319336,
      "learning_rate": 0.0005868606529280285,
      "loss": 3.217,
      "step": 21786
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.655544638633728,
      "learning_rate": 0.000586859455565111,
      "loss": 2.8358,
      "step": 21787
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.1907963752746582,
      "learning_rate": 0.0005868582581488609,
      "loss": 2.8227,
      "step": 21788
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.1665098667144775,
      "learning_rate": 0.0005868570606792784,
      "loss": 2.9737,
      "step": 21789
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7228621244430542,
      "learning_rate": 0.0005868558631563638,
      "loss": 3.2229,
      "step": 21790
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.98516845703125,
      "learning_rate": 0.0005868546655801173,
      "loss": 3.019,
      "step": 21791
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6133373975753784,
      "learning_rate": 0.000586853467950539,
      "loss": 3.3328,
      "step": 21792
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.909011960029602,
      "learning_rate": 0.0005868522702676294,
      "loss": 3.3685,
      "step": 21793
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.637361764907837,
      "learning_rate": 0.0005868510725313884,
      "loss": 3.071,
      "step": 21794
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4017740488052368,
      "learning_rate": 0.0005868498747418165,
      "loss": 3.0978,
      "step": 21795
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4524766206741333,
      "learning_rate": 0.0005868486768989137,
      "loss": 3.2047,
      "step": 21796
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.4533538818359375,
      "learning_rate": 0.0005868474790026803,
      "loss": 3.0053,
      "step": 21797
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.454275131225586,
      "learning_rate": 0.0005868462810531166,
      "loss": 3.0495,
      "step": 21798
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.743109107017517,
      "learning_rate": 0.0005868450830502227,
      "loss": 3.2183,
      "step": 21799
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.03188419342041,
      "learning_rate": 0.000586843884993999,
      "loss": 3.1155,
      "step": 21800
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6897075176239014,
      "learning_rate": 0.0005868426868844454,
      "loss": 3.1267,
      "step": 21801
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5243043899536133,
      "learning_rate": 0.0005868414887215625,
      "loss": 3.1528,
      "step": 21802
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3834809064865112,
      "learning_rate": 0.0005868402905053504,
      "loss": 3.2227,
      "step": 21803
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.460060477256775,
      "learning_rate": 0.0005868390922358091,
      "loss": 2.9737,
      "step": 21804
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.628626823425293,
      "learning_rate": 0.000586837893912939,
      "loss": 3.0783,
      "step": 21805
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5735934972763062,
      "learning_rate": 0.0005868366955367405,
      "loss": 3.2102,
      "step": 21806
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.2815473079681396,
      "learning_rate": 0.0005868354971072135,
      "loss": 2.957,
      "step": 21807
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.843959093093872,
      "learning_rate": 0.0005868342986243585,
      "loss": 3.0396,
      "step": 21808
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7878977060317993,
      "learning_rate": 0.0005868331000881755,
      "loss": 3.1877,
      "step": 21809
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.3417229652404785,
      "learning_rate": 0.0005868319014986648,
      "loss": 3.1867,
      "step": 21810
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.433469295501709,
      "learning_rate": 0.0005868307028558266,
      "loss": 3.1863,
      "step": 21811
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4855982065200806,
      "learning_rate": 0.0005868295041596613,
      "loss": 3.0752,
      "step": 21812
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.713557243347168,
      "learning_rate": 0.0005868283054101689,
      "loss": 3.1098,
      "step": 21813
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5188969373703003,
      "learning_rate": 0.0005868271066073496,
      "loss": 3.2518,
      "step": 21814
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4515177011489868,
      "learning_rate": 0.0005868259077512039,
      "loss": 3.2076,
      "step": 21815
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5796059370040894,
      "learning_rate": 0.0005868247088417316,
      "loss": 3.0199,
      "step": 21816
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5709240436553955,
      "learning_rate": 0.0005868235098789334,
      "loss": 3.0083,
      "step": 21817
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.307589054107666,
      "learning_rate": 0.0005868223108628093,
      "loss": 3.1987,
      "step": 21818
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6223756074905396,
      "learning_rate": 0.0005868211117933595,
      "loss": 2.8474,
      "step": 21819
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.256110191345215,
      "learning_rate": 0.0005868199126705841,
      "loss": 3.1092,
      "step": 21820
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.317606210708618,
      "learning_rate": 0.0005868187134944836,
      "loss": 2.7513,
      "step": 21821
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3982070684432983,
      "learning_rate": 0.0005868175142650581,
      "loss": 3.0854,
      "step": 21822
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5946108102798462,
      "learning_rate": 0.0005868163149823077,
      "loss": 3.2276,
      "step": 21823
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6150472164154053,
      "learning_rate": 0.0005868151156462328,
      "loss": 3.2594,
      "step": 21824
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5441516637802124,
      "learning_rate": 0.0005868139162568336,
      "loss": 3.1839,
      "step": 21825
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.422339677810669,
      "learning_rate": 0.0005868127168141103,
      "loss": 3.3957,
      "step": 21826
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3095359802246094,
      "learning_rate": 0.000586811517318063,
      "loss": 3.0312,
      "step": 21827
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5098520517349243,
      "learning_rate": 0.000586810317768692,
      "loss": 3.0892,
      "step": 21828
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4901376962661743,
      "learning_rate": 0.0005868091181659975,
      "loss": 3.0929,
      "step": 21829
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4413210153579712,
      "learning_rate": 0.0005868079185099799,
      "loss": 3.3361,
      "step": 21830
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.4853274822235107,
      "learning_rate": 0.0005868067188006393,
      "loss": 3.0255,
      "step": 21831
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3551132678985596,
      "learning_rate": 0.0005868055190379759,
      "loss": 3.0753,
      "step": 21832
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9191340208053589,
      "learning_rate": 0.0005868043192219899,
      "loss": 3.0106,
      "step": 21833
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.345750331878662,
      "learning_rate": 0.0005868031193526816,
      "loss": 3.4874,
      "step": 21834
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.030683755874634,
      "learning_rate": 0.0005868019194300512,
      "loss": 3.0448,
      "step": 21835
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.018787384033203,
      "learning_rate": 0.0005868007194540988,
      "loss": 3.1213,
      "step": 21836
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8868860006332397,
      "learning_rate": 0.0005867995194248248,
      "loss": 3.1375,
      "step": 21837
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9067602157592773,
      "learning_rate": 0.0005867983193422293,
      "loss": 2.8882,
      "step": 21838
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6965407133102417,
      "learning_rate": 0.0005867971192063127,
      "loss": 3.1452,
      "step": 21839
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7651185989379883,
      "learning_rate": 0.0005867959190170751,
      "loss": 3.2868,
      "step": 21840
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6812373399734497,
      "learning_rate": 0.0005867947187745168,
      "loss": 3.1641,
      "step": 21841
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.5612599849700928,
      "learning_rate": 0.0005867935184786377,
      "loss": 3.1376,
      "step": 21842
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.328896999359131,
      "learning_rate": 0.0005867923181294384,
      "loss": 2.9545,
      "step": 21843
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7248867750167847,
      "learning_rate": 0.0005867911177269191,
      "loss": 3.067,
      "step": 21844
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4190195798873901,
      "learning_rate": 0.0005867899172710797,
      "loss": 3.1808,
      "step": 21845
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.3980188369750977,
      "learning_rate": 0.0005867887167619207,
      "loss": 2.9898,
      "step": 21846
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5570592880249023,
      "learning_rate": 0.0005867875161994424,
      "loss": 3.1528,
      "step": 21847
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4589014053344727,
      "learning_rate": 0.0005867863155836448,
      "loss": 3.0017,
      "step": 21848
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9604252576828003,
      "learning_rate": 0.0005867851149145282,
      "loss": 3.1076,
      "step": 21849
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2923328876495361,
      "learning_rate": 0.0005867839141920928,
      "loss": 3.0735,
      "step": 21850
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4192912578582764,
      "learning_rate": 0.0005867827134163389,
      "loss": 3.2615,
      "step": 21851
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5160138607025146,
      "learning_rate": 0.0005867815125872667,
      "loss": 3.5061,
      "step": 21852
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8112848997116089,
      "learning_rate": 0.0005867803117048764,
      "loss": 3.0532,
      "step": 21853
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4024478197097778,
      "learning_rate": 0.0005867791107691682,
      "loss": 3.3044,
      "step": 21854
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7152141332626343,
      "learning_rate": 0.0005867779097801423,
      "loss": 3.1447,
      "step": 21855
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.383474826812744,
      "learning_rate": 0.0005867767087377991,
      "loss": 3.0596,
      "step": 21856
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.9880716800689697,
      "learning_rate": 0.0005867755076421386,
      "loss": 3.1499,
      "step": 21857
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.007143259048462,
      "learning_rate": 0.0005867743064931611,
      "loss": 2.9648,
      "step": 21858
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.1379306316375732,
      "learning_rate": 0.000586773105290867,
      "loss": 2.9409,
      "step": 21859
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.328432321548462,
      "learning_rate": 0.0005867719040352563,
      "loss": 3.3247,
      "step": 21860
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.01108717918396,
      "learning_rate": 0.0005867707027263291,
      "loss": 3.1333,
      "step": 21861
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3721868991851807,
      "learning_rate": 0.000586769501364086,
      "loss": 2.8452,
      "step": 21862
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.733353614807129,
      "learning_rate": 0.000586768299948527,
      "loss": 3.0699,
      "step": 21863
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2725423574447632,
      "learning_rate": 0.0005867670984796525,
      "loss": 3.2668,
      "step": 21864
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.736987829208374,
      "learning_rate": 0.0005867658969574623,
      "loss": 3.0867,
      "step": 21865
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6890153884887695,
      "learning_rate": 0.000586764695381957,
      "loss": 3.1445,
      "step": 21866
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5747432708740234,
      "learning_rate": 0.0005867634937531369,
      "loss": 3.2283,
      "step": 21867
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.869952917098999,
      "learning_rate": 0.0005867622920710018,
      "loss": 3.0567,
      "step": 21868
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.8172242641448975,
      "learning_rate": 0.0005867610903355524,
      "loss": 3.2084,
      "step": 21869
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7911790609359741,
      "learning_rate": 0.0005867598885467886,
      "loss": 3.2462,
      "step": 21870
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7512831687927246,
      "learning_rate": 0.0005867586867047107,
      "loss": 3.369,
      "step": 21871
    },
    {
      "epoch": 0.28,
      "grad_norm": 3.4127824306488037,
      "learning_rate": 0.000586757484809319,
      "loss": 3.365,
      "step": 21872
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6421324014663696,
      "learning_rate": 0.0005867562828606136,
      "loss": 3.1543,
      "step": 21873
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.085488796234131,
      "learning_rate": 0.0005867550808585949,
      "loss": 3.0544,
      "step": 21874
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.7832220792770386,
      "learning_rate": 0.000586753878803263,
      "loss": 2.9011,
      "step": 21875
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2845655679702759,
      "learning_rate": 0.000586752676694618,
      "loss": 2.9595,
      "step": 21876
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6115107536315918,
      "learning_rate": 0.0005867514745326605,
      "loss": 3.0314,
      "step": 21877
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5593270063400269,
      "learning_rate": 0.0005867502723173903,
      "loss": 2.9802,
      "step": 21878
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.6309250593185425,
      "learning_rate": 0.0005867490700488079,
      "loss": 3.2047,
      "step": 21879
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.3153191804885864,
      "learning_rate": 0.0005867478677269134,
      "loss": 3.1577,
      "step": 21880
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4933546781539917,
      "learning_rate": 0.0005867466653517071,
      "loss": 2.9973,
      "step": 21881
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.2217764854431152,
      "learning_rate": 0.0005867454629231891,
      "loss": 3.123,
      "step": 21882
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.0156798362731934,
      "learning_rate": 0.0005867442604413597,
      "loss": 2.9335,
      "step": 21883
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.5280380249023438,
      "learning_rate": 0.0005867430579062192,
      "loss": 3.246,
      "step": 21884
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.948504090309143,
      "learning_rate": 0.0005867418553177678,
      "loss": 3.3352,
      "step": 21885
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.3470633029937744,
      "learning_rate": 0.0005867406526760056,
      "loss": 3.0711,
      "step": 21886
    },
    {
      "epoch": 0.28,
      "grad_norm": 2.858560800552368,
      "learning_rate": 0.0005867394499809328,
      "loss": 3.2978,
      "step": 21887
    },
    {
      "epoch": 0.28,
      "grad_norm": 1.4925440549850464,
      "learning_rate": 0.0005867382472325499,
      "loss": 2.8777,
      "step": 21888
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.653960704803467,
      "learning_rate": 0.0005867370444308568,
      "loss": 3.109,
      "step": 21889
    },
    {
      "epoch": 0.29,
      "grad_norm": 3.122086763381958,
      "learning_rate": 0.0005867358415758539,
      "loss": 3.1441,
      "step": 21890
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.8359134197235107,
      "learning_rate": 0.0005867346386675416,
      "loss": 2.9644,
      "step": 21891
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5559033155441284,
      "learning_rate": 0.0005867334357059197,
      "loss": 3.1058,
      "step": 21892
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.6954824924468994,
      "learning_rate": 0.0005867322326909887,
      "loss": 3.2624,
      "step": 21893
    },
    {
      "epoch": 0.29,
      "grad_norm": 4.620194911956787,
      "learning_rate": 0.0005867310296227488,
      "loss": 3.0742,
      "step": 21894
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.6226837635040283,
      "learning_rate": 0.0005867298265012002,
      "loss": 2.994,
      "step": 21895
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3281357288360596,
      "learning_rate": 0.000586728623326343,
      "loss": 3.1531,
      "step": 21896
    },
    {
      "epoch": 0.29,
      "grad_norm": 4.086190223693848,
      "learning_rate": 0.0005867274200981776,
      "loss": 3.055,
      "step": 21897
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.3754239082336426,
      "learning_rate": 0.0005867262168167042,
      "loss": 3.2053,
      "step": 21898
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5310050249099731,
      "learning_rate": 0.0005867250134819229,
      "loss": 3.3914,
      "step": 21899
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2020771503448486,
      "learning_rate": 0.0005867238100938341,
      "loss": 3.3389,
      "step": 21900
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5024242401123047,
      "learning_rate": 0.0005867226066524378,
      "loss": 3.19,
      "step": 21901
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3233436346054077,
      "learning_rate": 0.0005867214031577345,
      "loss": 3.2136,
      "step": 21902
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4767383337020874,
      "learning_rate": 0.0005867201996097243,
      "loss": 3.1165,
      "step": 21903
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3734462261199951,
      "learning_rate": 0.0005867189960084073,
      "loss": 3.1105,
      "step": 21904
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2791701555252075,
      "learning_rate": 0.0005867177923537839,
      "loss": 3.0078,
      "step": 21905
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9084932804107666,
      "learning_rate": 0.0005867165886458541,
      "loss": 3.2786,
      "step": 21906
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.293306827545166,
      "learning_rate": 0.0005867153848846184,
      "loss": 2.9605,
      "step": 21907
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.5091986656188965,
      "learning_rate": 0.0005867141810700769,
      "loss": 3.0513,
      "step": 21908
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4138919115066528,
      "learning_rate": 0.0005867129772022298,
      "loss": 3.1844,
      "step": 21909
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2805278301239014,
      "learning_rate": 0.0005867117732810774,
      "loss": 2.9611,
      "step": 21910
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5708898305892944,
      "learning_rate": 0.0005867105693066197,
      "loss": 2.8302,
      "step": 21911
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2378402948379517,
      "learning_rate": 0.0005867093652788572,
      "loss": 3.1704,
      "step": 21912
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.570785641670227,
      "learning_rate": 0.0005867081611977901,
      "loss": 3.0827,
      "step": 21913
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4461631774902344,
      "learning_rate": 0.0005867069570634184,
      "loss": 3.3774,
      "step": 21914
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.297288417816162,
      "learning_rate": 0.0005867057528757425,
      "loss": 3.0066,
      "step": 21915
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4089410305023193,
      "learning_rate": 0.0005867045486347627,
      "loss": 3.0078,
      "step": 21916
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6161918640136719,
      "learning_rate": 0.000586703344340479,
      "loss": 3.0543,
      "step": 21917
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3238288164138794,
      "learning_rate": 0.0005867021399928918,
      "loss": 2.9973,
      "step": 21918
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3677215576171875,
      "learning_rate": 0.0005867009355920011,
      "loss": 3.3033,
      "step": 21919
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3424150943756104,
      "learning_rate": 0.0005866997311378075,
      "loss": 2.959,
      "step": 21920
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.721464991569519,
      "learning_rate": 0.000586698526630311,
      "loss": 3.0273,
      "step": 21921
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6993144750595093,
      "learning_rate": 0.0005866973220695116,
      "loss": 3.0586,
      "step": 21922
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3675097227096558,
      "learning_rate": 0.00058669611745541,
      "loss": 2.9396,
      "step": 21923
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5041704177856445,
      "learning_rate": 0.000586694912788006,
      "loss": 3.1657,
      "step": 21924
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.729988694190979,
      "learning_rate": 0.0005866937080673002,
      "loss": 3.4173,
      "step": 21925
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7444396018981934,
      "learning_rate": 0.0005866925032932924,
      "loss": 3.0475,
      "step": 21926
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5599737167358398,
      "learning_rate": 0.0005866912984659832,
      "loss": 3.2832,
      "step": 21927
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6441872119903564,
      "learning_rate": 0.0005866900935853727,
      "loss": 2.8529,
      "step": 21928
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.528994083404541,
      "learning_rate": 0.000586688888651461,
      "loss": 3.369,
      "step": 21929
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2806981801986694,
      "learning_rate": 0.0005866876836642485,
      "loss": 3.1007,
      "step": 21930
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.246657371520996,
      "learning_rate": 0.0005866864786237353,
      "loss": 3.2412,
      "step": 21931
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.9068446159362793,
      "learning_rate": 0.0005866852735299217,
      "loss": 3.0016,
      "step": 21932
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5531647205352783,
      "learning_rate": 0.000586684068382808,
      "loss": 3.2248,
      "step": 21933
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.1373093128204346,
      "learning_rate": 0.0005866828631823941,
      "loss": 3.2691,
      "step": 21934
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7566070556640625,
      "learning_rate": 0.0005866816579286805,
      "loss": 3.06,
      "step": 21935
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7542752027511597,
      "learning_rate": 0.0005866804526216675,
      "loss": 3.2563,
      "step": 21936
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2413456439971924,
      "learning_rate": 0.000586679247261355,
      "loss": 3.2126,
      "step": 21937
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.804425597190857,
      "learning_rate": 0.0005866780418477436,
      "loss": 2.9854,
      "step": 21938
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.017068386077881,
      "learning_rate": 0.0005866768363808332,
      "loss": 3.2134,
      "step": 21939
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2271809577941895,
      "learning_rate": 0.0005866756308606243,
      "loss": 2.8966,
      "step": 21940
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.831050992012024,
      "learning_rate": 0.0005866744252871169,
      "loss": 3.0535,
      "step": 21941
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7466442584991455,
      "learning_rate": 0.0005866732196603113,
      "loss": 2.9669,
      "step": 21942
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5753273963928223,
      "learning_rate": 0.0005866720139802078,
      "loss": 2.9035,
      "step": 21943
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4715677499771118,
      "learning_rate": 0.0005866708082468066,
      "loss": 3.016,
      "step": 21944
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8101097345352173,
      "learning_rate": 0.0005866696024601078,
      "loss": 2.958,
      "step": 21945
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7709190845489502,
      "learning_rate": 0.0005866683966201117,
      "loss": 2.9353,
      "step": 21946
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.55294668674469,
      "learning_rate": 0.0005866671907268186,
      "loss": 3.1044,
      "step": 21947
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5067946910858154,
      "learning_rate": 0.0005866659847802285,
      "loss": 2.823,
      "step": 21948
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5028067827224731,
      "learning_rate": 0.000586664778780342,
      "loss": 3.0135,
      "step": 21949
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.64348566532135,
      "learning_rate": 0.000586663572727159,
      "loss": 3.2498,
      "step": 21950
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2933133840560913,
      "learning_rate": 0.0005866623666206798,
      "loss": 3.0141,
      "step": 21951
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3256561756134033,
      "learning_rate": 0.0005866611604609047,
      "loss": 3.1686,
      "step": 21952
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.520178198814392,
      "learning_rate": 0.0005866599542478339,
      "loss": 3.2269,
      "step": 21953
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5532649755477905,
      "learning_rate": 0.0005866587479814676,
      "loss": 3.1331,
      "step": 21954
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5681562423706055,
      "learning_rate": 0.000586657541661806,
      "loss": 2.7031,
      "step": 21955
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3211196660995483,
      "learning_rate": 0.0005866563352888493,
      "loss": 3.2019,
      "step": 21956
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.396201729774475,
      "learning_rate": 0.0005866551288625978,
      "loss": 3.0632,
      "step": 21957
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5743359327316284,
      "learning_rate": 0.0005866539223830517,
      "loss": 3.0538,
      "step": 21958
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5019311904907227,
      "learning_rate": 0.0005866527158502113,
      "loss": 3.16,
      "step": 21959
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4900827407836914,
      "learning_rate": 0.0005866515092640766,
      "loss": 3.0866,
      "step": 21960
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6932755708694458,
      "learning_rate": 0.0005866503026246482,
      "loss": 3.1914,
      "step": 21961
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5275920629501343,
      "learning_rate": 0.000586649095931926,
      "loss": 3.1403,
      "step": 21962
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3422707319259644,
      "learning_rate": 0.0005866478891859103,
      "loss": 3.1619,
      "step": 21963
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2972744703292847,
      "learning_rate": 0.0005866466823866012,
      "loss": 3.2399,
      "step": 21964
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6964125633239746,
      "learning_rate": 0.0005866454755339993,
      "loss": 2.8085,
      "step": 21965
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6272094249725342,
      "learning_rate": 0.0005866442686281046,
      "loss": 3.2768,
      "step": 21966
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.702105164527893,
      "learning_rate": 0.0005866430616689171,
      "loss": 3.3011,
      "step": 21967
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.03574800491333,
      "learning_rate": 0.0005866418546564375,
      "loss": 3.2762,
      "step": 21968
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7819565534591675,
      "learning_rate": 0.0005866406475906656,
      "loss": 3.2038,
      "step": 21969
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.4533286094665527,
      "learning_rate": 0.0005866394404716018,
      "loss": 3.066,
      "step": 21970
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.817118763923645,
      "learning_rate": 0.0005866382332992464,
      "loss": 3.1693,
      "step": 21971
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.758707880973816,
      "learning_rate": 0.0005866370260735994,
      "loss": 2.8181,
      "step": 21972
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.498986005783081,
      "learning_rate": 0.0005866358187946614,
      "loss": 3.1939,
      "step": 21973
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4220941066741943,
      "learning_rate": 0.0005866346114624322,
      "loss": 3.11,
      "step": 21974
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.438714861869812,
      "learning_rate": 0.0005866334040769123,
      "loss": 3.2495,
      "step": 21975
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7643429040908813,
      "learning_rate": 0.0005866321966381017,
      "loss": 3.0156,
      "step": 21976
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5834078788757324,
      "learning_rate": 0.0005866309891460009,
      "loss": 3.0535,
      "step": 21977
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.313340425491333,
      "learning_rate": 0.0005866297816006099,
      "loss": 3.1839,
      "step": 21978
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.190889835357666,
      "learning_rate": 0.0005866285740019292,
      "loss": 3.1682,
      "step": 21979
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.471158027648926,
      "learning_rate": 0.0005866273663499587,
      "loss": 3.1013,
      "step": 21980
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.0058882236480713,
      "learning_rate": 0.0005866261586446988,
      "loss": 3.158,
      "step": 21981
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4818592071533203,
      "learning_rate": 0.0005866249508861496,
      "loss": 3.5512,
      "step": 21982
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.146094799041748,
      "learning_rate": 0.0005866237430743115,
      "loss": 3.0395,
      "step": 21983
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.578480839729309,
      "learning_rate": 0.0005866225352091846,
      "loss": 2.9985,
      "step": 21984
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.632855772972107,
      "learning_rate": 0.0005866213272907691,
      "loss": 3.0042,
      "step": 21985
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2863737344741821,
      "learning_rate": 0.0005866201193190654,
      "loss": 3.2592,
      "step": 21986
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.3251900672912598,
      "learning_rate": 0.0005866189112940735,
      "loss": 3.1493,
      "step": 21987
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7159878015518188,
      "learning_rate": 0.0005866177032157938,
      "loss": 2.8479,
      "step": 21988
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5202175378799438,
      "learning_rate": 0.0005866164950842265,
      "loss": 3.0611,
      "step": 21989
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6428213119506836,
      "learning_rate": 0.0005866152868993717,
      "loss": 3.0479,
      "step": 21990
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.30788254737854,
      "learning_rate": 0.0005866140786612297,
      "loss": 3.0636,
      "step": 21991
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9277338981628418,
      "learning_rate": 0.0005866128703698008,
      "loss": 3.314,
      "step": 21992
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8464027643203735,
      "learning_rate": 0.0005866116620250852,
      "loss": 3.1259,
      "step": 21993
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.526242733001709,
      "learning_rate": 0.0005866104536270829,
      "loss": 3.3094,
      "step": 21994
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.225043535232544,
      "learning_rate": 0.0005866092451757945,
      "loss": 3.1177,
      "step": 21995
    },
    {
      "epoch": 0.29,
      "grad_norm": 3.006742238998413,
      "learning_rate": 0.00058660803667122,
      "loss": 3.1559,
      "step": 21996
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5271788835525513,
      "learning_rate": 0.0005866068281133595,
      "loss": 3.0673,
      "step": 21997
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5575146675109863,
      "learning_rate": 0.0005866056195022136,
      "loss": 3.4184,
      "step": 21998
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.3534443378448486,
      "learning_rate": 0.0005866044108377822,
      "loss": 2.9142,
      "step": 21999
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.978231191635132,
      "learning_rate": 0.0005866032021200656,
      "loss": 3.034,
      "step": 22000
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6419641971588135,
      "learning_rate": 0.000586601993349064,
      "loss": 3.0509,
      "step": 22001
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.484309434890747,
      "learning_rate": 0.0005866007845247778,
      "loss": 3.0277,
      "step": 22002
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4591114521026611,
      "learning_rate": 0.000586599575647207,
      "loss": 3.0402,
      "step": 22003
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.711139440536499,
      "learning_rate": 0.0005865983667163521,
      "loss": 3.0471,
      "step": 22004
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5510709285736084,
      "learning_rate": 0.0005865971577322129,
      "loss": 3.105,
      "step": 22005
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6897529363632202,
      "learning_rate": 0.0005865959486947901,
      "loss": 3.2315,
      "step": 22006
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5896879434585571,
      "learning_rate": 0.0005865947396040837,
      "loss": 3.0685,
      "step": 22007
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3871251344680786,
      "learning_rate": 0.0005865935304600938,
      "loss": 3.0816,
      "step": 22008
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6441715955734253,
      "learning_rate": 0.0005865923212628207,
      "loss": 3.0228,
      "step": 22009
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3761366605758667,
      "learning_rate": 0.0005865911120122649,
      "loss": 3.1714,
      "step": 22010
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3652616739273071,
      "learning_rate": 0.0005865899027084263,
      "loss": 3.2673,
      "step": 22011
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2886658906936646,
      "learning_rate": 0.0005865886933513052,
      "loss": 3.0938,
      "step": 22012
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5505739450454712,
      "learning_rate": 0.0005865874839409019,
      "loss": 2.7809,
      "step": 22013
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5510494709014893,
      "learning_rate": 0.0005865862744772165,
      "loss": 2.9069,
      "step": 22014
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7050013542175293,
      "learning_rate": 0.0005865850649602493,
      "loss": 2.9736,
      "step": 22015
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9987921714782715,
      "learning_rate": 0.0005865838553900007,
      "loss": 2.9282,
      "step": 22016
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8348864316940308,
      "learning_rate": 0.0005865826457664706,
      "loss": 2.8247,
      "step": 22017
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5622906684875488,
      "learning_rate": 0.0005865814360896594,
      "loss": 3.1352,
      "step": 22018
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9246649742126465,
      "learning_rate": 0.0005865802263595672,
      "loss": 3.0363,
      "step": 22019
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.227755069732666,
      "learning_rate": 0.0005865790165761945,
      "loss": 2.9925,
      "step": 22020
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4437798261642456,
      "learning_rate": 0.0005865778067395413,
      "loss": 3.1377,
      "step": 22021
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4950556755065918,
      "learning_rate": 0.0005865765968496078,
      "loss": 3.0538,
      "step": 22022
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.348381757736206,
      "learning_rate": 0.0005865753869063944,
      "loss": 3.1537,
      "step": 22023
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5791486501693726,
      "learning_rate": 0.0005865741769099012,
      "loss": 2.9564,
      "step": 22024
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3942036628723145,
      "learning_rate": 0.0005865729668601284,
      "loss": 2.7402,
      "step": 22025
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.0395777225494385,
      "learning_rate": 0.0005865717567570763,
      "loss": 3.1559,
      "step": 22026
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7314939498901367,
      "learning_rate": 0.000586570546600745,
      "loss": 3.1168,
      "step": 22027
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6630780696868896,
      "learning_rate": 0.000586569336391135,
      "loss": 3.042,
      "step": 22028
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.7848291397094727,
      "learning_rate": 0.0005865681261282463,
      "loss": 3.107,
      "step": 22029
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.22163987159729,
      "learning_rate": 0.0005865669158120792,
      "loss": 3.0378,
      "step": 22030
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5136550664901733,
      "learning_rate": 0.0005865657054426339,
      "loss": 3.0867,
      "step": 22031
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7052991390228271,
      "learning_rate": 0.0005865644950199105,
      "loss": 3.0122,
      "step": 22032
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.7427759170532227,
      "learning_rate": 0.0005865632845439095,
      "loss": 3.0798,
      "step": 22033
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5712902545928955,
      "learning_rate": 0.0005865620740146309,
      "loss": 3.3064,
      "step": 22034
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.332949161529541,
      "learning_rate": 0.000586560863432075,
      "loss": 3.1097,
      "step": 22035
    },
    {
      "epoch": 0.29,
      "grad_norm": 4.005676746368408,
      "learning_rate": 0.000586559652796242,
      "loss": 3.1143,
      "step": 22036
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.012253522872925,
      "learning_rate": 0.0005865584421071323,
      "loss": 3.3653,
      "step": 22037
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4828208684921265,
      "learning_rate": 0.0005865572313647459,
      "loss": 3.0511,
      "step": 22038
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7878495454788208,
      "learning_rate": 0.000586556020569083,
      "loss": 3.0794,
      "step": 22039
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2793521881103516,
      "learning_rate": 0.0005865548097201441,
      "loss": 3.105,
      "step": 22040
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1708258390426636,
      "learning_rate": 0.000586553598817929,
      "loss": 3.1671,
      "step": 22041
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.699259877204895,
      "learning_rate": 0.0005865523878624384,
      "loss": 3.135,
      "step": 22042
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.4217946529388428,
      "learning_rate": 0.0005865511768536723,
      "loss": 3.0937,
      "step": 22043
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7602055072784424,
      "learning_rate": 0.0005865499657916308,
      "loss": 3.2788,
      "step": 22044
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3186743259429932,
      "learning_rate": 0.0005865487546763143,
      "loss": 3.4048,
      "step": 22045
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.232614755630493,
      "learning_rate": 0.000586547543507723,
      "loss": 2.9885,
      "step": 22046
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8593169450759888,
      "learning_rate": 0.0005865463322858571,
      "loss": 3.2183,
      "step": 22047
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5607130527496338,
      "learning_rate": 0.0005865451210107168,
      "loss": 3.1897,
      "step": 22048
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6089904308319092,
      "learning_rate": 0.0005865439096823024,
      "loss": 2.857,
      "step": 22049
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9237209558486938,
      "learning_rate": 0.000586542698300614,
      "loss": 3.1121,
      "step": 22050
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2890747785568237,
      "learning_rate": 0.000586541486865652,
      "loss": 3.0562,
      "step": 22051
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6232391595840454,
      "learning_rate": 0.0005865402753774165,
      "loss": 2.9571,
      "step": 22052
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.048006296157837,
      "learning_rate": 0.0005865390638359078,
      "loss": 3.3466,
      "step": 22053
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4559228420257568,
      "learning_rate": 0.0005865378522411259,
      "loss": 3.168,
      "step": 22054
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3952170610427856,
      "learning_rate": 0.0005865366405930713,
      "loss": 3.2448,
      "step": 22055
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.355333685874939,
      "learning_rate": 0.0005865354288917442,
      "loss": 2.9811,
      "step": 22056
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3648754358291626,
      "learning_rate": 0.0005865342171371447,
      "loss": 2.912,
      "step": 22057
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6095263957977295,
      "learning_rate": 0.000586533005329273,
      "loss": 2.9169,
      "step": 22058
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3063586950302124,
      "learning_rate": 0.0005865317934681295,
      "loss": 3.0018,
      "step": 22059
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5742055177688599,
      "learning_rate": 0.0005865305815537144,
      "loss": 3.2413,
      "step": 22060
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6968834400177002,
      "learning_rate": 0.0005865293695860277,
      "loss": 3.3371,
      "step": 22061
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2611126899719238,
      "learning_rate": 0.0005865281575650699,
      "loss": 3.4176,
      "step": 22062
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6169042587280273,
      "learning_rate": 0.0005865269454908411,
      "loss": 3.0517,
      "step": 22063
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.644118547439575,
      "learning_rate": 0.0005865257333633415,
      "loss": 2.7893,
      "step": 22064
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7199774980545044,
      "learning_rate": 0.0005865245211825713,
      "loss": 3.1597,
      "step": 22065
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7390904426574707,
      "learning_rate": 0.0005865233089485309,
      "loss": 3.1471,
      "step": 22066
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5004998445510864,
      "learning_rate": 0.0005865220966612204,
      "loss": 3.2564,
      "step": 22067
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4937061071395874,
      "learning_rate": 0.0005865208843206399,
      "loss": 3.1078,
      "step": 22068
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6541908979415894,
      "learning_rate": 0.0005865196719267898,
      "loss": 3.0295,
      "step": 22069
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.874984860420227,
      "learning_rate": 0.0005865184594796704,
      "loss": 2.942,
      "step": 22070
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7632278203964233,
      "learning_rate": 0.0005865172469792817,
      "loss": 2.9336,
      "step": 22071
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.407604694366455,
      "learning_rate": 0.0005865160344256241,
      "loss": 3.2237,
      "step": 22072
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5247175693511963,
      "learning_rate": 0.0005865148218186977,
      "loss": 3.0516,
      "step": 22073
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2808349132537842,
      "learning_rate": 0.0005865136091585028,
      "loss": 3.1488,
      "step": 22074
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.779745578765869,
      "learning_rate": 0.0005865123964450396,
      "loss": 3.0769,
      "step": 22075
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7212973833084106,
      "learning_rate": 0.0005865111836783084,
      "loss": 2.8614,
      "step": 22076
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2636890411376953,
      "learning_rate": 0.0005865099708583094,
      "loss": 3.0697,
      "step": 22077
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2596981525421143,
      "learning_rate": 0.0005865087579850426,
      "loss": 3.2213,
      "step": 22078
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6445852518081665,
      "learning_rate": 0.0005865075450585086,
      "loss": 3.0063,
      "step": 22079
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7214176654815674,
      "learning_rate": 0.0005865063320787073,
      "loss": 3.1537,
      "step": 22080
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7135443687438965,
      "learning_rate": 0.0005865051190456392,
      "loss": 3.2435,
      "step": 22081
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8149935007095337,
      "learning_rate": 0.0005865039059593043,
      "loss": 3.1945,
      "step": 22082
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.369500160217285,
      "learning_rate": 0.0005865026928197029,
      "loss": 3.0639,
      "step": 22083
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.369506359100342,
      "learning_rate": 0.0005865014796268353,
      "loss": 3.02,
      "step": 22084
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7081661224365234,
      "learning_rate": 0.0005865002663807017,
      "loss": 3.0531,
      "step": 22085
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.285768985748291,
      "learning_rate": 0.0005864990530813022,
      "loss": 3.0546,
      "step": 22086
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.6724157333374023,
      "learning_rate": 0.0005864978397286372,
      "loss": 2.8653,
      "step": 22087
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6486085653305054,
      "learning_rate": 0.0005864966263227067,
      "loss": 2.9931,
      "step": 22088
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.684851050376892,
      "learning_rate": 0.0005864954128635112,
      "loss": 3.0455,
      "step": 22089
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2769458293914795,
      "learning_rate": 0.0005864941993510508,
      "loss": 3.1007,
      "step": 22090
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6623634099960327,
      "learning_rate": 0.0005864929857853256,
      "loss": 2.9576,
      "step": 22091
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8394581079483032,
      "learning_rate": 0.0005864917721663362,
      "loss": 3.2459,
      "step": 22092
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.8651182651519775,
      "learning_rate": 0.0005864905584940823,
      "loss": 3.2995,
      "step": 22093
    },
    {
      "epoch": 0.29,
      "grad_norm": 3.9940743446350098,
      "learning_rate": 0.0005864893447685645,
      "loss": 2.887,
      "step": 22094
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7298544645309448,
      "learning_rate": 0.000586488130989783,
      "loss": 2.9992,
      "step": 22095
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6423488855361938,
      "learning_rate": 0.0005864869171577379,
      "loss": 2.982,
      "step": 22096
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.808113932609558,
      "learning_rate": 0.0005864857032724293,
      "loss": 3.3354,
      "step": 22097
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.058225393295288,
      "learning_rate": 0.0005864844893338578,
      "loss": 2.9712,
      "step": 22098
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5530370473861694,
      "learning_rate": 0.0005864832753420233,
      "loss": 3.2659,
      "step": 22099
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6994763612747192,
      "learning_rate": 0.0005864820612969263,
      "loss": 3.155,
      "step": 22100
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.662593960762024,
      "learning_rate": 0.0005864808471985668,
      "loss": 2.8652,
      "step": 22101
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4168678522109985,
      "learning_rate": 0.000586479633046945,
      "loss": 3.0505,
      "step": 22102
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2600555419921875,
      "learning_rate": 0.0005864784188420614,
      "loss": 3.2987,
      "step": 22103
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5127122402191162,
      "learning_rate": 0.0005864772045839161,
      "loss": 3.2837,
      "step": 22104
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.4742496013641357,
      "learning_rate": 0.0005864759902725091,
      "loss": 2.7905,
      "step": 22105
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7906566858291626,
      "learning_rate": 0.0005864747759078408,
      "loss": 3.2551,
      "step": 22106
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5144444704055786,
      "learning_rate": 0.0005864735614899115,
      "loss": 3.2714,
      "step": 22107
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.6048004627227783,
      "learning_rate": 0.0005864723470187214,
      "loss": 3.1668,
      "step": 22108
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.66937518119812,
      "learning_rate": 0.0005864711324942706,
      "loss": 2.9366,
      "step": 22109
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4393202066421509,
      "learning_rate": 0.0005864699179165595,
      "loss": 3.1221,
      "step": 22110
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.221942663192749,
      "learning_rate": 0.0005864687032855882,
      "loss": 2.9312,
      "step": 22111
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.3777365684509277,
      "learning_rate": 0.000586467488601357,
      "loss": 3.1688,
      "step": 22112
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8232877254486084,
      "learning_rate": 0.000586466273863866,
      "loss": 3.5016,
      "step": 22113
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4841610193252563,
      "learning_rate": 0.0005864650590731155,
      "loss": 3.0722,
      "step": 22114
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3795667886734009,
      "learning_rate": 0.0005864638442291059,
      "loss": 3.245,
      "step": 22115
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.656841993331909,
      "learning_rate": 0.000586462629331837,
      "loss": 3.0979,
      "step": 22116
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3479682207107544,
      "learning_rate": 0.0005864614143813095,
      "loss": 3.1676,
      "step": 22117
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5129621028900146,
      "learning_rate": 0.0005864601993775234,
      "loss": 3.1717,
      "step": 22118
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7907400131225586,
      "learning_rate": 0.0005864589843204788,
      "loss": 3.2802,
      "step": 22119
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4000780582427979,
      "learning_rate": 0.0005864577692101762,
      "loss": 3.1884,
      "step": 22120
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.442728042602539,
      "learning_rate": 0.0005864565540466156,
      "loss": 3.248,
      "step": 22121
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5663663148880005,
      "learning_rate": 0.0005864553388297975,
      "loss": 3.2812,
      "step": 22122
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6503700017929077,
      "learning_rate": 0.0005864541235597217,
      "loss": 2.9765,
      "step": 22123
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4656652212142944,
      "learning_rate": 0.0005864529082363888,
      "loss": 2.9831,
      "step": 22124
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4885717630386353,
      "learning_rate": 0.0005864516928597989,
      "loss": 3.0811,
      "step": 22125
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.673008918762207,
      "learning_rate": 0.0005864504774299523,
      "loss": 3.156,
      "step": 22126
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.0785369873046875,
      "learning_rate": 0.000586449261946849,
      "loss": 3.0037,
      "step": 22127
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2712442874908447,
      "learning_rate": 0.0005864480464104894,
      "loss": 3.0118,
      "step": 22128
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4193483591079712,
      "learning_rate": 0.0005864468308208738,
      "loss": 2.9474,
      "step": 22129
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.634713888168335,
      "learning_rate": 0.0005864456151780023,
      "loss": 3.2533,
      "step": 22130
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.544219970703125,
      "learning_rate": 0.0005864443994818751,
      "loss": 3.0897,
      "step": 22131
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5606424808502197,
      "learning_rate": 0.0005864431837324925,
      "loss": 3.2636,
      "step": 22132
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.380615234375,
      "learning_rate": 0.0005864419679298547,
      "loss": 3.1621,
      "step": 22133
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2069716453552246,
      "learning_rate": 0.0005864407520739618,
      "loss": 3.1281,
      "step": 22134
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9866060018539429,
      "learning_rate": 0.0005864395361648144,
      "loss": 3.0495,
      "step": 22135
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.865919828414917,
      "learning_rate": 0.0005864383202024124,
      "loss": 3.1847,
      "step": 22136
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9051470756530762,
      "learning_rate": 0.000586437104186756,
      "loss": 3.0675,
      "step": 22137
    },
    {
      "epoch": 0.29,
      "grad_norm": 3.097688913345337,
      "learning_rate": 0.0005864358881178457,
      "loss": 3.1489,
      "step": 22138
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.687438726425171,
      "learning_rate": 0.0005864346719956815,
      "loss": 3.4489,
      "step": 22139
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6922136545181274,
      "learning_rate": 0.0005864334558202636,
      "loss": 3.116,
      "step": 22140
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7217673063278198,
      "learning_rate": 0.0005864322395915923,
      "loss": 3.258,
      "step": 22141
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5070083141326904,
      "learning_rate": 0.000586431023309668,
      "loss": 2.7242,
      "step": 22142
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2606546878814697,
      "learning_rate": 0.0005864298069744908,
      "loss": 3.0037,
      "step": 22143
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2422631978988647,
      "learning_rate": 0.0005864285905860607,
      "loss": 2.9969,
      "step": 22144
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4582656621932983,
      "learning_rate": 0.0005864273741443782,
      "loss": 2.9523,
      "step": 22145
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.047145366668701,
      "learning_rate": 0.0005864261576494433,
      "loss": 3.3041,
      "step": 22146
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4140175580978394,
      "learning_rate": 0.0005864249411012567,
      "loss": 3.286,
      "step": 22147
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.475964307785034,
      "learning_rate": 0.000586423724499818,
      "loss": 3.1178,
      "step": 22148
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9093725681304932,
      "learning_rate": 0.0005864225078451279,
      "loss": 3.1666,
      "step": 22149
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5812898874282837,
      "learning_rate": 0.0005864212911371864,
      "loss": 3.0694,
      "step": 22150
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5993717908859253,
      "learning_rate": 0.0005864200743759938,
      "loss": 3.1215,
      "step": 22151
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.252779960632324,
      "learning_rate": 0.0005864188575615501,
      "loss": 3.1802,
      "step": 22152
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.169340133666992,
      "learning_rate": 0.0005864176406938559,
      "loss": 2.9811,
      "step": 22153
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6159939765930176,
      "learning_rate": 0.0005864164237729113,
      "loss": 3.1947,
      "step": 22154
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.165085792541504,
      "learning_rate": 0.0005864152067987164,
      "loss": 3.015,
      "step": 22155
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4903373718261719,
      "learning_rate": 0.0005864139897712715,
      "loss": 2.9601,
      "step": 22156
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1875346899032593,
      "learning_rate": 0.0005864127726905769,
      "loss": 3.1719,
      "step": 22157
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4247161149978638,
      "learning_rate": 0.0005864115555566327,
      "loss": 3.0704,
      "step": 22158
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6435621976852417,
      "learning_rate": 0.0005864103383694392,
      "loss": 3.0329,
      "step": 22159
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8065805435180664,
      "learning_rate": 0.0005864091211289966,
      "loss": 3.2818,
      "step": 22160
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6392236948013306,
      "learning_rate": 0.0005864079038353052,
      "loss": 3.1557,
      "step": 22161
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4081510305404663,
      "learning_rate": 0.0005864066864883651,
      "loss": 3.0344,
      "step": 22162
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3639744520187378,
      "learning_rate": 0.0005864054690881766,
      "loss": 3.1314,
      "step": 22163
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2110753059387207,
      "learning_rate": 0.00058640425163474,
      "loss": 3.2351,
      "step": 22164
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.503322720527649,
      "learning_rate": 0.0005864030341280553,
      "loss": 3.0716,
      "step": 22165
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5589516162872314,
      "learning_rate": 0.000586401816568123,
      "loss": 2.9922,
      "step": 22166
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7613844871520996,
      "learning_rate": 0.0005864005989549431,
      "loss": 3.2925,
      "step": 22167
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9331122636795044,
      "learning_rate": 0.000586399381288516,
      "loss": 3.0139,
      "step": 22168
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5055001974105835,
      "learning_rate": 0.0005863981635688418,
      "loss": 3.0902,
      "step": 22169
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8326495885849,
      "learning_rate": 0.0005863969457959207,
      "loss": 3.0494,
      "step": 22170
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6879358291625977,
      "learning_rate": 0.0005863957279697531,
      "loss": 3.0623,
      "step": 22171
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3431522846221924,
      "learning_rate": 0.0005863945100903391,
      "loss": 3.1387,
      "step": 22172
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4804633855819702,
      "learning_rate": 0.000586393292157679,
      "loss": 3.1073,
      "step": 22173
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.524644136428833,
      "learning_rate": 0.0005863920741717729,
      "loss": 3.28,
      "step": 22174
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5151290893554688,
      "learning_rate": 0.0005863908561326212,
      "loss": 3.2476,
      "step": 22175
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2054369449615479,
      "learning_rate": 0.0005863896380402239,
      "loss": 3.1307,
      "step": 22176
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8319238424301147,
      "learning_rate": 0.0005863884198945815,
      "loss": 2.799,
      "step": 22177
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4513779878616333,
      "learning_rate": 0.0005863872016956941,
      "loss": 3.2054,
      "step": 22178
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4232233762741089,
      "learning_rate": 0.0005863859834435618,
      "loss": 2.9898,
      "step": 22179
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3796247243881226,
      "learning_rate": 0.0005863847651381849,
      "loss": 3.1495,
      "step": 22180
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6168971061706543,
      "learning_rate": 0.0005863835467795638,
      "loss": 3.1975,
      "step": 22181
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6124407052993774,
      "learning_rate": 0.0005863823283676986,
      "loss": 3.0553,
      "step": 22182
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.788217306137085,
      "learning_rate": 0.0005863811099025894,
      "loss": 3.2923,
      "step": 22183
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.682502269744873,
      "learning_rate": 0.0005863798913842367,
      "loss": 2.9795,
      "step": 22184
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.427405595779419,
      "learning_rate": 0.0005863786728126405,
      "loss": 3.1024,
      "step": 22185
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.593953251838684,
      "learning_rate": 0.000586377454187801,
      "loss": 3.3964,
      "step": 22186
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.462554693222046,
      "learning_rate": 0.0005863762355097186,
      "loss": 2.906,
      "step": 22187
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.830434799194336,
      "learning_rate": 0.0005863750167783935,
      "loss": 3.0091,
      "step": 22188
    },
    {
      "epoch": 0.29,
      "grad_norm": 3.9995176792144775,
      "learning_rate": 0.0005863737979938258,
      "loss": 2.9691,
      "step": 22189
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.460569381713867,
      "learning_rate": 0.0005863725791560158,
      "loss": 3.1552,
      "step": 22190
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6752008199691772,
      "learning_rate": 0.0005863713602649638,
      "loss": 3.2249,
      "step": 22191
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.9486732482910156,
      "learning_rate": 0.00058637014132067,
      "loss": 3.0521,
      "step": 22192
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8215597867965698,
      "learning_rate": 0.0005863689223231344,
      "loss": 3.0974,
      "step": 22193
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5541605949401855,
      "learning_rate": 0.0005863677032723576,
      "loss": 3.1159,
      "step": 22194
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.992577314376831,
      "learning_rate": 0.0005863664841683396,
      "loss": 3.1125,
      "step": 22195
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.0473556518554688,
      "learning_rate": 0.0005863652650110806,
      "loss": 3.1649,
      "step": 22196
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.223893880844116,
      "learning_rate": 0.0005863640458005809,
      "loss": 3.2136,
      "step": 22197
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3965678215026855,
      "learning_rate": 0.0005863628265368408,
      "loss": 2.9562,
      "step": 22198
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9136344194412231,
      "learning_rate": 0.0005863616072198604,
      "loss": 3.0178,
      "step": 22199
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.370734453201294,
      "learning_rate": 0.00058636038784964,
      "loss": 3.1618,
      "step": 22200
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7101919651031494,
      "learning_rate": 0.0005863591684261796,
      "loss": 2.9886,
      "step": 22201
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.654327154159546,
      "learning_rate": 0.0005863579489494799,
      "loss": 3.0262,
      "step": 22202
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6366816759109497,
      "learning_rate": 0.0005863567294195407,
      "loss": 2.9547,
      "step": 22203
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.759602427482605,
      "learning_rate": 0.0005863555098363625,
      "loss": 3.1014,
      "step": 22204
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4829626083374023,
      "learning_rate": 0.0005863542901999452,
      "loss": 3.1243,
      "step": 22205
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5745511054992676,
      "learning_rate": 0.0005863530705102894,
      "loss": 3.0747,
      "step": 22206
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.3062479496002197,
      "learning_rate": 0.0005863518507673952,
      "loss": 3.1908,
      "step": 22207
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4945476055145264,
      "learning_rate": 0.0005863506309712627,
      "loss": 3.059,
      "step": 22208
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3992464542388916,
      "learning_rate": 0.0005863494111218921,
      "loss": 3.0713,
      "step": 22209
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.292257308959961,
      "learning_rate": 0.0005863481912192839,
      "loss": 3.0045,
      "step": 22210
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5606728792190552,
      "learning_rate": 0.0005863469712634381,
      "loss": 3.0983,
      "step": 22211
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5778489112854004,
      "learning_rate": 0.000586345751254355,
      "loss": 2.9819,
      "step": 22212
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.429524302482605,
      "learning_rate": 0.0005863445311920347,
      "loss": 3.0552,
      "step": 22213
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.314394474029541,
      "learning_rate": 0.0005863433110764777,
      "loss": 2.7974,
      "step": 22214
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5900813341140747,
      "learning_rate": 0.0005863420909076842,
      "loss": 3.2961,
      "step": 22215
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5636476278305054,
      "learning_rate": 0.0005863408706856541,
      "loss": 3.037,
      "step": 22216
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.658737301826477,
      "learning_rate": 0.0005863396504103878,
      "loss": 3.1187,
      "step": 22217
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3201837539672852,
      "learning_rate": 0.0005863384300818856,
      "loss": 3.1376,
      "step": 22218
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8461858034133911,
      "learning_rate": 0.0005863372097001478,
      "loss": 3.2876,
      "step": 22219
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8909496068954468,
      "learning_rate": 0.0005863359892651744,
      "loss": 2.8522,
      "step": 22220
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5574755668640137,
      "learning_rate": 0.0005863347687769658,
      "loss": 2.8846,
      "step": 22221
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.357702374458313,
      "learning_rate": 0.0005863335482355221,
      "loss": 3.216,
      "step": 22222
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.520334243774414,
      "learning_rate": 0.0005863323276408436,
      "loss": 3.0296,
      "step": 22223
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.855175495147705,
      "learning_rate": 0.0005863311069929304,
      "loss": 3.0677,
      "step": 22224
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6755011081695557,
      "learning_rate": 0.000586329886291783,
      "loss": 2.9662,
      "step": 22225
    },
    {
      "epoch": 0.29,
      "grad_norm": 3.1973092555999756,
      "learning_rate": 0.0005863286655374014,
      "loss": 2.9372,
      "step": 22226
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5433244705200195,
      "learning_rate": 0.000586327444729786,
      "loss": 3.3371,
      "step": 22227
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.0747835636138916,
      "learning_rate": 0.0005863262238689369,
      "loss": 2.9406,
      "step": 22228
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.408539295196533,
      "learning_rate": 0.0005863250029548542,
      "loss": 2.9501,
      "step": 22229
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7284034490585327,
      "learning_rate": 0.0005863237819875384,
      "loss": 2.9607,
      "step": 22230
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2977635860443115,
      "learning_rate": 0.0005863225609669896,
      "loss": 3.0183,
      "step": 22231
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.5286035537719727,
      "learning_rate": 0.000586321339893208,
      "loss": 2.9241,
      "step": 22232
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3774336576461792,
      "learning_rate": 0.0005863201187661939,
      "loss": 2.7934,
      "step": 22233
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.784439206123352,
      "learning_rate": 0.0005863188975859474,
      "loss": 3.2301,
      "step": 22234
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3132416009902954,
      "learning_rate": 0.0005863176763524689,
      "loss": 3.1277,
      "step": 22235
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4246833324432373,
      "learning_rate": 0.0005863164550657585,
      "loss": 3.0253,
      "step": 22236
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2974560260772705,
      "learning_rate": 0.0005863152337258165,
      "loss": 3.2481,
      "step": 22237
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4593232870101929,
      "learning_rate": 0.000586314012332643,
      "loss": 2.9331,
      "step": 22238
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.58592689037323,
      "learning_rate": 0.0005863127908862384,
      "loss": 3.1103,
      "step": 22239
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4322195053100586,
      "learning_rate": 0.0005863115693866028,
      "loss": 3.1103,
      "step": 22240
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.321105718612671,
      "learning_rate": 0.0005863103478337366,
      "loss": 3.276,
      "step": 22241
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.667114496231079,
      "learning_rate": 0.0005863091262276397,
      "loss": 2.8012,
      "step": 22242
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.68328058719635,
      "learning_rate": 0.0005863079045683127,
      "loss": 3.0725,
      "step": 22243
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3243049383163452,
      "learning_rate": 0.0005863066828557555,
      "loss": 3.0742,
      "step": 22244
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9873127937316895,
      "learning_rate": 0.0005863054610899686,
      "loss": 2.9668,
      "step": 22245
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.698447585105896,
      "learning_rate": 0.0005863042392709522,
      "loss": 3.082,
      "step": 22246
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2952346801757812,
      "learning_rate": 0.0005863030173987063,
      "loss": 2.8889,
      "step": 22247
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.976033329963684,
      "learning_rate": 0.0005863017954732312,
      "loss": 3.064,
      "step": 22248
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.733832597732544,
      "learning_rate": 0.0005863005734945274,
      "loss": 3.0673,
      "step": 22249
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.1219522953033447,
      "learning_rate": 0.0005862993514625948,
      "loss": 3.219,
      "step": 22250
    },
    {
      "epoch": 0.29,
      "grad_norm": 3.1950314044952393,
      "learning_rate": 0.0005862981293774336,
      "loss": 3.0212,
      "step": 22251
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8727028369903564,
      "learning_rate": 0.0005862969072390443,
      "loss": 3.0888,
      "step": 22252
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4975321292877197,
      "learning_rate": 0.0005862956850474271,
      "loss": 2.8626,
      "step": 22253
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.9681990146636963,
      "learning_rate": 0.0005862944628025821,
      "loss": 3.1029,
      "step": 22254
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.7325117588043213,
      "learning_rate": 0.0005862932405045094,
      "loss": 3.1816,
      "step": 22255
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.805295467376709,
      "learning_rate": 0.0005862920181532095,
      "loss": 3.254,
      "step": 22256
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.474621057510376,
      "learning_rate": 0.0005862907957486826,
      "loss": 3.0994,
      "step": 22257
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.631319522857666,
      "learning_rate": 0.0005862895732909288,
      "loss": 3.0437,
      "step": 22258
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6656123399734497,
      "learning_rate": 0.0005862883507799482,
      "loss": 2.9084,
      "step": 22259
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5745248794555664,
      "learning_rate": 0.0005862871282157413,
      "loss": 3.2229,
      "step": 22260
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.185208797454834,
      "learning_rate": 0.0005862859055983082,
      "loss": 2.9687,
      "step": 22261
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9201078414916992,
      "learning_rate": 0.0005862846829276491,
      "loss": 3.0515,
      "step": 22262
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4329909086227417,
      "learning_rate": 0.0005862834602037644,
      "loss": 3.3346,
      "step": 22263
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4112827777862549,
      "learning_rate": 0.0005862822374266541,
      "loss": 3.1213,
      "step": 22264
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5403069257736206,
      "learning_rate": 0.0005862810145963185,
      "loss": 3.3492,
      "step": 22265
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6194195747375488,
      "learning_rate": 0.0005862797917127581,
      "loss": 3.1102,
      "step": 22266
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.499630093574524,
      "learning_rate": 0.0005862785687759726,
      "loss": 2.9048,
      "step": 22267
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8728115558624268,
      "learning_rate": 0.0005862773457859625,
      "loss": 3.1761,
      "step": 22268
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.356331706047058,
      "learning_rate": 0.0005862761227427282,
      "loss": 3.3008,
      "step": 22269
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.455525517463684,
      "learning_rate": 0.0005862748996462697,
      "loss": 3.2065,
      "step": 22270
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3510453701019287,
      "learning_rate": 0.0005862736764965874,
      "loss": 3.0637,
      "step": 22271
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.263027548789978,
      "learning_rate": 0.0005862724532936812,
      "loss": 3.1632,
      "step": 22272
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7257250547409058,
      "learning_rate": 0.0005862712300375517,
      "loss": 3.1375,
      "step": 22273
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4768379926681519,
      "learning_rate": 0.0005862700067281989,
      "loss": 3.3587,
      "step": 22274
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7022818326950073,
      "learning_rate": 0.0005862687833656233,
      "loss": 3.3917,
      "step": 22275
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6150262355804443,
      "learning_rate": 0.0005862675599498247,
      "loss": 3.0674,
      "step": 22276
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4622836112976074,
      "learning_rate": 0.0005862663364808036,
      "loss": 2.9927,
      "step": 22277
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4394595623016357,
      "learning_rate": 0.0005862651129585602,
      "loss": 3.133,
      "step": 22278
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7201972007751465,
      "learning_rate": 0.0005862638893830947,
      "loss": 3.0357,
      "step": 22279
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4301248788833618,
      "learning_rate": 0.0005862626657544074,
      "loss": 3.2123,
      "step": 22280
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4655873775482178,
      "learning_rate": 0.0005862614420724984,
      "loss": 3.2597,
      "step": 22281
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7639857530593872,
      "learning_rate": 0.0005862602183373681,
      "loss": 3.2015,
      "step": 22282
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3172410726547241,
      "learning_rate": 0.0005862589945490165,
      "loss": 3.2116,
      "step": 22283
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9727760553359985,
      "learning_rate": 0.000586257770707444,
      "loss": 3.0907,
      "step": 22284
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5039039850234985,
      "learning_rate": 0.0005862565468126508,
      "loss": 3.0269,
      "step": 22285
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3979289531707764,
      "learning_rate": 0.000586255322864637,
      "loss": 3.4046,
      "step": 22286
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.7472379207611084,
      "learning_rate": 0.0005862540988634031,
      "loss": 3.2145,
      "step": 22287
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8271123170852661,
      "learning_rate": 0.000586252874808949,
      "loss": 2.91,
      "step": 22288
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2887051105499268,
      "learning_rate": 0.0005862516507012752,
      "loss": 3.04,
      "step": 22289
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4563575983047485,
      "learning_rate": 0.0005862504265403818,
      "loss": 3.2872,
      "step": 22290
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.509330153465271,
      "learning_rate": 0.000586249202326269,
      "loss": 3.2871,
      "step": 22291
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.44228994846344,
      "learning_rate": 0.000586247978058937,
      "loss": 3.0209,
      "step": 22292
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5346049070358276,
      "learning_rate": 0.0005862467537383862,
      "loss": 3.0158,
      "step": 22293
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.254366159439087,
      "learning_rate": 0.0005862455293646167,
      "loss": 3.0429,
      "step": 22294
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4693466424942017,
      "learning_rate": 0.0005862443049376287,
      "loss": 2.9028,
      "step": 22295
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.0211551189422607,
      "learning_rate": 0.0005862430804574226,
      "loss": 3.2068,
      "step": 22296
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8191595077514648,
      "learning_rate": 0.0005862418559239985,
      "loss": 3.0088,
      "step": 22297
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.330764889717102,
      "learning_rate": 0.0005862406313373566,
      "loss": 2.9948,
      "step": 22298
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5671508312225342,
      "learning_rate": 0.0005862394066974971,
      "loss": 2.9481,
      "step": 22299
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.560996413230896,
      "learning_rate": 0.0005862381820044203,
      "loss": 3.2213,
      "step": 22300
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8042505979537964,
      "learning_rate": 0.0005862369572581265,
      "loss": 3.0137,
      "step": 22301
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.572190523147583,
      "learning_rate": 0.0005862357324586158,
      "loss": 3.143,
      "step": 22302
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4792156219482422,
      "learning_rate": 0.0005862345076058885,
      "loss": 3.1581,
      "step": 22303
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.3315348625183105,
      "learning_rate": 0.0005862332826999447,
      "loss": 2.8631,
      "step": 22304
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4044969081878662,
      "learning_rate": 0.0005862320577407849,
      "loss": 3.0747,
      "step": 22305
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.609113335609436,
      "learning_rate": 0.000586230832728409,
      "loss": 3.1004,
      "step": 22306
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.440382719039917,
      "learning_rate": 0.0005862296076628175,
      "loss": 3.3287,
      "step": 22307
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4005999565124512,
      "learning_rate": 0.0005862283825440104,
      "loss": 3.2236,
      "step": 22308
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4954432249069214,
      "learning_rate": 0.0005862271573719882,
      "loss": 3.2451,
      "step": 22309
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5794395208358765,
      "learning_rate": 0.0005862259321467509,
      "loss": 3.152,
      "step": 22310
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.898097038269043,
      "learning_rate": 0.0005862247068682987,
      "loss": 3.044,
      "step": 22311
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.657180666923523,
      "learning_rate": 0.0005862234815366319,
      "loss": 2.8087,
      "step": 22312
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.666634202003479,
      "learning_rate": 0.0005862222561517509,
      "loss": 3.0404,
      "step": 22313
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7935630083084106,
      "learning_rate": 0.0005862210307136557,
      "loss": 3.101,
      "step": 22314
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.581918478012085,
      "learning_rate": 0.0005862198052223466,
      "loss": 3.079,
      "step": 22315
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4003881216049194,
      "learning_rate": 0.000586218579677824,
      "loss": 3.0593,
      "step": 22316
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.801698088645935,
      "learning_rate": 0.0005862173540800877,
      "loss": 3.1386,
      "step": 22317
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6230442523956299,
      "learning_rate": 0.0005862161284291384,
      "loss": 2.819,
      "step": 22318
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4470796585083008,
      "learning_rate": 0.0005862149027249761,
      "loss": 3.0596,
      "step": 22319
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.517656922340393,
      "learning_rate": 0.0005862136769676009,
      "loss": 3.0694,
      "step": 22320
    },
    {
      "epoch": 0.29,
      "grad_norm": 3.1335086822509766,
      "learning_rate": 0.0005862124511570134,
      "loss": 3.3228,
      "step": 22321
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.0144762992858887,
      "learning_rate": 0.0005862112252932133,
      "loss": 2.9826,
      "step": 22322
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.054959535598755,
      "learning_rate": 0.0005862099993762013,
      "loss": 2.965,
      "step": 22323
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.90170156955719,
      "learning_rate": 0.0005862087734059776,
      "loss": 2.9876,
      "step": 22324
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3714675903320312,
      "learning_rate": 0.000586207547382542,
      "loss": 3.0478,
      "step": 22325
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.34267258644104,
      "learning_rate": 0.0005862063213058952,
      "loss": 3.073,
      "step": 22326
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5601379871368408,
      "learning_rate": 0.0005862050951760372,
      "loss": 2.9836,
      "step": 22327
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.457015037536621,
      "learning_rate": 0.0005862038689929683,
      "loss": 2.9906,
      "step": 22328
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5355273485183716,
      "learning_rate": 0.0005862026427566887,
      "loss": 3.0576,
      "step": 22329
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2714476585388184,
      "learning_rate": 0.0005862014164671986,
      "loss": 3.2109,
      "step": 22330
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5411075353622437,
      "learning_rate": 0.0005862001901244981,
      "loss": 3.0708,
      "step": 22331
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7845711708068848,
      "learning_rate": 0.0005861989637285878,
      "loss": 3.0423,
      "step": 22332
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5599924325942993,
      "learning_rate": 0.0005861977372794676,
      "loss": 3.0309,
      "step": 22333
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.60735023021698,
      "learning_rate": 0.0005861965107771379,
      "loss": 3.0189,
      "step": 22334
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.0984046459198,
      "learning_rate": 0.0005861952842215989,
      "loss": 2.9274,
      "step": 22335
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4399818181991577,
      "learning_rate": 0.0005861940576128507,
      "loss": 2.9692,
      "step": 22336
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.637274146080017,
      "learning_rate": 0.0005861928309508936,
      "loss": 3.0257,
      "step": 22337
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.3837177753448486,
      "learning_rate": 0.000586191604235728,
      "loss": 3.0668,
      "step": 22338
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.434765338897705,
      "learning_rate": 0.0005861903774673538,
      "loss": 3.3024,
      "step": 22339
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4041979312896729,
      "learning_rate": 0.0005861891506457716,
      "loss": 3.1969,
      "step": 22340
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.1801114082336426,
      "learning_rate": 0.0005861879237709813,
      "loss": 3.138,
      "step": 22341
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8279348611831665,
      "learning_rate": 0.0005861866968429833,
      "loss": 3.144,
      "step": 22342
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.392325758934021,
      "learning_rate": 0.0005861854698617778,
      "loss": 3.2197,
      "step": 22343
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.670243263244629,
      "learning_rate": 0.000586184242827365,
      "loss": 3.3744,
      "step": 22344
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.810718297958374,
      "learning_rate": 0.0005861830157397451,
      "loss": 3.0953,
      "step": 22345
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4987798929214478,
      "learning_rate": 0.0005861817885989184,
      "loss": 3.0877,
      "step": 22346
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.545654058456421,
      "learning_rate": 0.0005861805614048852,
      "loss": 3.1652,
      "step": 22347
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7501299381256104,
      "learning_rate": 0.0005861793341576456,
      "loss": 2.9548,
      "step": 22348
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6162605285644531,
      "learning_rate": 0.0005861781068571998,
      "loss": 2.9793,
      "step": 22349
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7829669713974,
      "learning_rate": 0.0005861768795035481,
      "loss": 3.1041,
      "step": 22350
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.7033159732818604,
      "learning_rate": 0.0005861756520966906,
      "loss": 2.9203,
      "step": 22351
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.359065294265747,
      "learning_rate": 0.0005861744246366278,
      "loss": 3.1923,
      "step": 22352
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4389619827270508,
      "learning_rate": 0.0005861731971233598,
      "loss": 3.2214,
      "step": 22353
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2602813243865967,
      "learning_rate": 0.0005861719695568867,
      "loss": 3.0507,
      "step": 22354
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4803837537765503,
      "learning_rate": 0.0005861707419372089,
      "loss": 2.9681,
      "step": 22355
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3131966590881348,
      "learning_rate": 0.0005861695142643265,
      "loss": 2.8737,
      "step": 22356
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.789176344871521,
      "learning_rate": 0.0005861682865382398,
      "loss": 3.0469,
      "step": 22357
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4741463661193848,
      "learning_rate": 0.0005861670587589491,
      "loss": 2.926,
      "step": 22358
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3503787517547607,
      "learning_rate": 0.0005861658309264545,
      "loss": 3.0568,
      "step": 22359
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5087523460388184,
      "learning_rate": 0.0005861646030407561,
      "loss": 2.9999,
      "step": 22360
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.498901128768921,
      "learning_rate": 0.0005861633751018545,
      "loss": 3.0157,
      "step": 22361
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.0796377658843994,
      "learning_rate": 0.0005861621471097497,
      "loss": 3.2984,
      "step": 22362
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.534245491027832,
      "learning_rate": 0.0005861609190644419,
      "loss": 2.8791,
      "step": 22363
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.632559895515442,
      "learning_rate": 0.0005861596909659314,
      "loss": 2.9559,
      "step": 22364
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2670836448669434,
      "learning_rate": 0.0005861584628142184,
      "loss": 2.9969,
      "step": 22365
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5719256401062012,
      "learning_rate": 0.0005861572346093031,
      "loss": 3.3571,
      "step": 22366
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7001874446868896,
      "learning_rate": 0.0005861560063511859,
      "loss": 3.3058,
      "step": 22367
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.71779203414917,
      "learning_rate": 0.0005861547780398669,
      "loss": 3.2059,
      "step": 22368
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2582712173461914,
      "learning_rate": 0.0005861535496753461,
      "loss": 3.1604,
      "step": 22369
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6685928106307983,
      "learning_rate": 0.0005861523212576242,
      "loss": 3.1414,
      "step": 22370
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.8453989028930664,
      "learning_rate": 0.0005861510927867011,
      "loss": 3.2612,
      "step": 22371
    },
    {
      "epoch": 0.29,
      "grad_norm": 3.443143367767334,
      "learning_rate": 0.0005861498642625771,
      "loss": 3.1265,
      "step": 22372
    },
    {
      "epoch": 0.29,
      "grad_norm": 3.288611888885498,
      "learning_rate": 0.0005861486356852524,
      "loss": 2.885,
      "step": 22373
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7336311340332031,
      "learning_rate": 0.0005861474070547274,
      "loss": 2.846,
      "step": 22374
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.7183923721313477,
      "learning_rate": 0.000586146178371002,
      "loss": 3.2593,
      "step": 22375
    },
    {
      "epoch": 0.29,
      "grad_norm": 3.4480950832366943,
      "learning_rate": 0.0005861449496340767,
      "loss": 3.1116,
      "step": 22376
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.298959970474243,
      "learning_rate": 0.0005861437208439516,
      "loss": 3.2084,
      "step": 22377
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4639394283294678,
      "learning_rate": 0.0005861424920006271,
      "loss": 2.7627,
      "step": 22378
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.7218940258026123,
      "learning_rate": 0.0005861412631041033,
      "loss": 3.0575,
      "step": 22379
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.878098726272583,
      "learning_rate": 0.0005861400341543804,
      "loss": 3.1527,
      "step": 22380
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.898118257522583,
      "learning_rate": 0.0005861388051514586,
      "loss": 3.3446,
      "step": 22381
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5244038105010986,
      "learning_rate": 0.0005861375760953382,
      "loss": 3.1539,
      "step": 22382
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4534841775894165,
      "learning_rate": 0.0005861363469860196,
      "loss": 2.9805,
      "step": 22383
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.633030652999878,
      "learning_rate": 0.0005861351178235027,
      "loss": 3.0392,
      "step": 22384
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6171984672546387,
      "learning_rate": 0.0005861338886077878,
      "loss": 3.1046,
      "step": 22385
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7083532810211182,
      "learning_rate": 0.0005861326593388754,
      "loss": 2.9058,
      "step": 22386
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6603440046310425,
      "learning_rate": 0.0005861314300167655,
      "loss": 2.9904,
      "step": 22387
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.204224109649658,
      "learning_rate": 0.0005861302006414583,
      "loss": 3.1972,
      "step": 22388
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.513458251953125,
      "learning_rate": 0.0005861289712129541,
      "loss": 3.096,
      "step": 22389
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3856067657470703,
      "learning_rate": 0.0005861277417312531,
      "loss": 2.9482,
      "step": 22390
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7886402606964111,
      "learning_rate": 0.0005861265121963556,
      "loss": 3.05,
      "step": 22391
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4855871200561523,
      "learning_rate": 0.0005861252826082618,
      "loss": 3.2503,
      "step": 22392
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4171867370605469,
      "learning_rate": 0.0005861240529669719,
      "loss": 3.1989,
      "step": 22393
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.845268964767456,
      "learning_rate": 0.0005861228232724861,
      "loss": 3.1114,
      "step": 22394
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7024896144866943,
      "learning_rate": 0.0005861215935248048,
      "loss": 3.0917,
      "step": 22395
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5629082918167114,
      "learning_rate": 0.0005861203637239279,
      "loss": 3.1971,
      "step": 22396
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.6502416133880615,
      "learning_rate": 0.0005861191338698559,
      "loss": 3.1699,
      "step": 22397
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4223309755325317,
      "learning_rate": 0.000586117903962589,
      "loss": 3.0393,
      "step": 22398
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8190765380859375,
      "learning_rate": 0.0005861166740021272,
      "loss": 3.0344,
      "step": 22399
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3276722431182861,
      "learning_rate": 0.0005861154439884712,
      "loss": 3.148,
      "step": 22400
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4671568870544434,
      "learning_rate": 0.0005861142139216208,
      "loss": 3.2879,
      "step": 22401
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.056800603866577,
      "learning_rate": 0.0005861129838015763,
      "loss": 3.1458,
      "step": 22402
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.277198553085327,
      "learning_rate": 0.0005861117536283381,
      "loss": 2.956,
      "step": 22403
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.652753233909607,
      "learning_rate": 0.0005861105234019063,
      "loss": 3.2702,
      "step": 22404
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3177982568740845,
      "learning_rate": 0.000586109293122281,
      "loss": 3.0903,
      "step": 22405
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.500619888305664,
      "learning_rate": 0.0005861080627894628,
      "loss": 2.9922,
      "step": 22406
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5861930847167969,
      "learning_rate": 0.0005861068324034516,
      "loss": 3.0714,
      "step": 22407
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5083746910095215,
      "learning_rate": 0.0005861056019642477,
      "loss": 3.0612,
      "step": 22408
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.640012264251709,
      "learning_rate": 0.0005861043714718515,
      "loss": 2.8794,
      "step": 22409
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6251667737960815,
      "learning_rate": 0.000586103140926263,
      "loss": 3.0577,
      "step": 22410
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3503817319869995,
      "learning_rate": 0.0005861019103274826,
      "loss": 3.017,
      "step": 22411
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3998664617538452,
      "learning_rate": 0.0005861006796755103,
      "loss": 3.1918,
      "step": 22412
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3851921558380127,
      "learning_rate": 0.0005860994489703467,
      "loss": 3.3214,
      "step": 22413
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5970699787139893,
      "learning_rate": 0.0005860982182119917,
      "loss": 3.1858,
      "step": 22414
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4491218328475952,
      "learning_rate": 0.0005860969874004456,
      "loss": 3.0883,
      "step": 22415
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.368422031402588,
      "learning_rate": 0.0005860957565357088,
      "loss": 3.0625,
      "step": 22416
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4342083930969238,
      "learning_rate": 0.0005860945256177812,
      "loss": 3.0058,
      "step": 22417
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7606219053268433,
      "learning_rate": 0.0005860932946466633,
      "loss": 3.0111,
      "step": 22418
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3401542901992798,
      "learning_rate": 0.0005860920636223553,
      "loss": 3.3226,
      "step": 22419
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9434807300567627,
      "learning_rate": 0.0005860908325448574,
      "loss": 3.1767,
      "step": 22420
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4022923707962036,
      "learning_rate": 0.0005860896014141697,
      "loss": 3.1289,
      "step": 22421
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5805811882019043,
      "learning_rate": 0.0005860883702302927,
      "loss": 3.2493,
      "step": 22422
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4831595420837402,
      "learning_rate": 0.0005860871389932264,
      "loss": 3.2103,
      "step": 22423
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1915451288223267,
      "learning_rate": 0.0005860859077029711,
      "loss": 3.1973,
      "step": 22424
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3473494052886963,
      "learning_rate": 0.000586084676359527,
      "loss": 3.174,
      "step": 22425
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8013882637023926,
      "learning_rate": 0.0005860834449628945,
      "loss": 2.9416,
      "step": 22426
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7210954427719116,
      "learning_rate": 0.0005860822135130735,
      "loss": 3.019,
      "step": 22427
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7225056886672974,
      "learning_rate": 0.0005860809820100645,
      "loss": 3.0208,
      "step": 22428
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.559873104095459,
      "learning_rate": 0.0005860797504538677,
      "loss": 3.2428,
      "step": 22429
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7402138710021973,
      "learning_rate": 0.0005860785188444831,
      "loss": 3.1383,
      "step": 22430
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5573753118515015,
      "learning_rate": 0.0005860772871819113,
      "loss": 2.9175,
      "step": 22431
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.221067190170288,
      "learning_rate": 0.0005860760554661522,
      "loss": 3.1684,
      "step": 22432
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4527531862258911,
      "learning_rate": 0.0005860748236972063,
      "loss": 3.261,
      "step": 22433
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7308285236358643,
      "learning_rate": 0.0005860735918750736,
      "loss": 3.0076,
      "step": 22434
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.599520444869995,
      "learning_rate": 0.0005860723599997544,
      "loss": 3.1812,
      "step": 22435
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6615021228790283,
      "learning_rate": 0.0005860711280712489,
      "loss": 3.1092,
      "step": 22436
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.3958559036254883,
      "learning_rate": 0.0005860698960895574,
      "loss": 2.8158,
      "step": 22437
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8346933126449585,
      "learning_rate": 0.0005860686640546802,
      "loss": 2.9222,
      "step": 22438
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3842036724090576,
      "learning_rate": 0.0005860674319666174,
      "loss": 3.159,
      "step": 22439
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.1452395915985107,
      "learning_rate": 0.0005860661998253692,
      "loss": 3.1222,
      "step": 22440
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.168881416320801,
      "learning_rate": 0.0005860649676309361,
      "loss": 3.2436,
      "step": 22441
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6863571405410767,
      "learning_rate": 0.0005860637353833179,
      "loss": 3.2387,
      "step": 22442
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.1252949237823486,
      "learning_rate": 0.0005860625030825151,
      "loss": 3.022,
      "step": 22443
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.552307367324829,
      "learning_rate": 0.0005860612707285278,
      "loss": 2.8724,
      "step": 22444
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.7106540203094482,
      "learning_rate": 0.0005860600383213565,
      "loss": 3.0128,
      "step": 22445
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4581342935562134,
      "learning_rate": 0.0005860588058610012,
      "loss": 3.0605,
      "step": 22446
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7356281280517578,
      "learning_rate": 0.000586057573347462,
      "loss": 3.028,
      "step": 22447
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.0625462532043457,
      "learning_rate": 0.0005860563407807395,
      "loss": 3.2083,
      "step": 22448
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4089837074279785,
      "learning_rate": 0.0005860551081608336,
      "loss": 3.0408,
      "step": 22449
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4710103273391724,
      "learning_rate": 0.0005860538754877447,
      "loss": 3.0294,
      "step": 22450
    },
    {
      "epoch": 0.29,
      "grad_norm": 3.3939504623413086,
      "learning_rate": 0.000586052642761473,
      "loss": 3.1018,
      "step": 22451
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2620723247528076,
      "learning_rate": 0.0005860514099820187,
      "loss": 3.2261,
      "step": 22452
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7139064073562622,
      "learning_rate": 0.000586050177149382,
      "loss": 3.0536,
      "step": 22453
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.046992301940918,
      "learning_rate": 0.0005860489442635633,
      "loss": 3.2299,
      "step": 22454
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3534932136535645,
      "learning_rate": 0.0005860477113245625,
      "loss": 3.0008,
      "step": 22455
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3218128681182861,
      "learning_rate": 0.0005860464783323802,
      "loss": 3.2541,
      "step": 22456
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7141575813293457,
      "learning_rate": 0.0005860452452870164,
      "loss": 3.1667,
      "step": 22457
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5530245304107666,
      "learning_rate": 0.0005860440121884715,
      "loss": 3.2429,
      "step": 22458
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4229398965835571,
      "learning_rate": 0.0005860427790367454,
      "loss": 3.2206,
      "step": 22459
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2712924480438232,
      "learning_rate": 0.0005860415458318386,
      "loss": 3.2868,
      "step": 22460
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4177298545837402,
      "learning_rate": 0.0005860403125737514,
      "loss": 3.0499,
      "step": 22461
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.021230697631836,
      "learning_rate": 0.000586039079262484,
      "loss": 3.4241,
      "step": 22462
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5999082326889038,
      "learning_rate": 0.0005860378458980363,
      "loss": 3.2303,
      "step": 22463
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6555447578430176,
      "learning_rate": 0.0005860366124804088,
      "loss": 3.0151,
      "step": 22464
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6179282665252686,
      "learning_rate": 0.0005860353790096017,
      "loss": 3.1104,
      "step": 22465
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5969243049621582,
      "learning_rate": 0.0005860341454856152,
      "loss": 3.161,
      "step": 22466
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7127059698104858,
      "learning_rate": 0.0005860329119084497,
      "loss": 3.1093,
      "step": 22467
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.773926019668579,
      "learning_rate": 0.0005860316782781052,
      "loss": 3.097,
      "step": 22468
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6364637613296509,
      "learning_rate": 0.000586030444594582,
      "loss": 3.0965,
      "step": 22469
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9106405973434448,
      "learning_rate": 0.0005860292108578804,
      "loss": 3.177,
      "step": 22470
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.7475385665893555,
      "learning_rate": 0.0005860279770680005,
      "loss": 2.9383,
      "step": 22471
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8537869453430176,
      "learning_rate": 0.0005860267432249425,
      "loss": 3.3067,
      "step": 22472
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9486185312271118,
      "learning_rate": 0.000586025509328707,
      "loss": 2.9408,
      "step": 22473
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.0558619499206543,
      "learning_rate": 0.0005860242753792938,
      "loss": 2.8756,
      "step": 22474
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7139085531234741,
      "learning_rate": 0.0005860230413767034,
      "loss": 3.0948,
      "step": 22475
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.0829408168792725,
      "learning_rate": 0.0005860218073209357,
      "loss": 3.1113,
      "step": 22476
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.749547004699707,
      "learning_rate": 0.0005860205732119912,
      "loss": 3.1627,
      "step": 22477
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.1521048545837402,
      "learning_rate": 0.0005860193390498702,
      "loss": 2.9757,
      "step": 22478
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7700185775756836,
      "learning_rate": 0.0005860181048345728,
      "loss": 3.073,
      "step": 22479
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6593869924545288,
      "learning_rate": 0.0005860168705660991,
      "loss": 3.2826,
      "step": 22480
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4800657033920288,
      "learning_rate": 0.0005860156362444495,
      "loss": 3.2794,
      "step": 22481
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4640964269638062,
      "learning_rate": 0.0005860144018696243,
      "loss": 3.172,
      "step": 22482
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4223217964172363,
      "learning_rate": 0.0005860131674416236,
      "loss": 3.16,
      "step": 22483
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6063268184661865,
      "learning_rate": 0.0005860119329604476,
      "loss": 3.107,
      "step": 22484
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5941052436828613,
      "learning_rate": 0.0005860106984260966,
      "loss": 3.0903,
      "step": 22485
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3993403911590576,
      "learning_rate": 0.0005860094638385708,
      "loss": 3.2574,
      "step": 22486
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.087014675140381,
      "learning_rate": 0.0005860082291978705,
      "loss": 2.8602,
      "step": 22487
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5457379817962646,
      "learning_rate": 0.0005860069945039958,
      "loss": 3.1109,
      "step": 22488
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5890369415283203,
      "learning_rate": 0.000586005759756947,
      "loss": 3.3037,
      "step": 22489
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7650227546691895,
      "learning_rate": 0.0005860045249567244,
      "loss": 3.2361,
      "step": 22490
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.1696529388427734,
      "learning_rate": 0.0005860032901033281,
      "loss": 3.0152,
      "step": 22491
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.0072824954986572,
      "learning_rate": 0.0005860020551967584,
      "loss": 3.1832,
      "step": 22492
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.0579118728637695,
      "learning_rate": 0.0005860008202370155,
      "loss": 2.5719,
      "step": 22493
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4155349731445312,
      "learning_rate": 0.0005859995852240997,
      "loss": 3.2703,
      "step": 22494
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.1555442810058594,
      "learning_rate": 0.0005859983501580111,
      "loss": 2.9672,
      "step": 22495
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.688989520072937,
      "learning_rate": 0.0005859971150387501,
      "loss": 3.055,
      "step": 22496
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8035691976547241,
      "learning_rate": 0.0005859958798663167,
      "loss": 3.2639,
      "step": 22497
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.5160229206085205,
      "learning_rate": 0.0005859946446407114,
      "loss": 3.0428,
      "step": 22498
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8677639961242676,
      "learning_rate": 0.0005859934093619344,
      "loss": 2.8818,
      "step": 22499
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9440205097198486,
      "learning_rate": 0.0005859921740299855,
      "loss": 2.8117,
      "step": 22500
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.786928415298462,
      "learning_rate": 0.0005859909386448655,
      "loss": 3.4192,
      "step": 22501
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6804616451263428,
      "learning_rate": 0.0005859897032065744,
      "loss": 3.397,
      "step": 22502
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5169063806533813,
      "learning_rate": 0.0005859884677151124,
      "loss": 3.3,
      "step": 22503
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4224613904953003,
      "learning_rate": 0.0005859872321704796,
      "loss": 3.1511,
      "step": 22504
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6901123523712158,
      "learning_rate": 0.0005859859965726764,
      "loss": 3.1055,
      "step": 22505
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.737966537475586,
      "learning_rate": 0.0005859847609217031,
      "loss": 3.0027,
      "step": 22506
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4346064329147339,
      "learning_rate": 0.0005859835252175599,
      "loss": 3.0833,
      "step": 22507
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2450302839279175,
      "learning_rate": 0.0005859822894602469,
      "loss": 3.1495,
      "step": 22508
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6712099313735962,
      "learning_rate": 0.0005859810536497643,
      "loss": 3.338,
      "step": 22509
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6280367374420166,
      "learning_rate": 0.0005859798177861124,
      "loss": 3.2264,
      "step": 22510
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5381224155426025,
      "learning_rate": 0.0005859785818692916,
      "loss": 3.1542,
      "step": 22511
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.524987816810608,
      "learning_rate": 0.0005859773458993019,
      "loss": 2.9669,
      "step": 22512
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8738811016082764,
      "learning_rate": 0.0005859761098761437,
      "loss": 3.0722,
      "step": 22513
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.1574130058288574,
      "learning_rate": 0.0005859748737998171,
      "loss": 3.0215,
      "step": 22514
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3577172756195068,
      "learning_rate": 0.0005859736376703223,
      "loss": 3.0207,
      "step": 22515
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.464261531829834,
      "learning_rate": 0.0005859724014876597,
      "loss": 3.0528,
      "step": 22516
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4958691596984863,
      "learning_rate": 0.0005859711652518294,
      "loss": 3.1482,
      "step": 22517
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.4185166358947754,
      "learning_rate": 0.0005859699289628317,
      "loss": 3.1273,
      "step": 22518
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8257508277893066,
      "learning_rate": 0.0005859686926206666,
      "loss": 3.1767,
      "step": 22519
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7215121984481812,
      "learning_rate": 0.0005859674562253347,
      "loss": 3.2714,
      "step": 22520
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8196135759353638,
      "learning_rate": 0.0005859662197768361,
      "loss": 3.0143,
      "step": 22521
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4831383228302002,
      "learning_rate": 0.0005859649832751708,
      "loss": 3.1035,
      "step": 22522
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.040548086166382,
      "learning_rate": 0.0005859637467203393,
      "loss": 3.3232,
      "step": 22523
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.209141492843628,
      "learning_rate": 0.0005859625101123418,
      "loss": 3.0028,
      "step": 22524
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4234683513641357,
      "learning_rate": 0.0005859612734511784,
      "loss": 3.0926,
      "step": 22525
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.227766513824463,
      "learning_rate": 0.0005859600367368494,
      "loss": 3.0539,
      "step": 22526
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.277771472930908,
      "learning_rate": 0.0005859587999693551,
      "loss": 3.1978,
      "step": 22527
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3805406093597412,
      "learning_rate": 0.0005859575631486957,
      "loss": 3.1038,
      "step": 22528
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.000155210494995,
      "learning_rate": 0.0005859563262748713,
      "loss": 3.1102,
      "step": 22529
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.854280948638916,
      "learning_rate": 0.0005859550893478822,
      "loss": 3.1241,
      "step": 22530
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4452568292617798,
      "learning_rate": 0.0005859538523677287,
      "loss": 3.2205,
      "step": 22531
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4430038928985596,
      "learning_rate": 0.0005859526153344109,
      "loss": 2.8675,
      "step": 22532
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.140007734298706,
      "learning_rate": 0.0005859513782479292,
      "loss": 2.9133,
      "step": 22533
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.584086537361145,
      "learning_rate": 0.0005859501411082837,
      "loss": 2.9601,
      "step": 22534
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4165154695510864,
      "learning_rate": 0.0005859489039154747,
      "loss": 3.1201,
      "step": 22535
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4762026071548462,
      "learning_rate": 0.0005859476666695023,
      "loss": 3.3805,
      "step": 22536
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.621760606765747,
      "learning_rate": 0.000585946429370367,
      "loss": 3.0312,
      "step": 22537
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4513647556304932,
      "learning_rate": 0.0005859451920180688,
      "loss": 3.185,
      "step": 22538
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5152406692504883,
      "learning_rate": 0.0005859439546126079,
      "loss": 3.0612,
      "step": 22539
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.691768169403076,
      "learning_rate": 0.0005859427171539847,
      "loss": 3.0443,
      "step": 22540
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.9991865158081055,
      "learning_rate": 0.0005859414796421993,
      "loss": 2.9437,
      "step": 22541
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.532168984413147,
      "learning_rate": 0.0005859402420772522,
      "loss": 3.0317,
      "step": 22542
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.4250760078430176,
      "learning_rate": 0.0005859390044591432,
      "loss": 2.9302,
      "step": 22543
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5380425453186035,
      "learning_rate": 0.0005859377667878727,
      "loss": 3.1395,
      "step": 22544
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.606388807296753,
      "learning_rate": 0.0005859365290634411,
      "loss": 2.9496,
      "step": 22545
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6247116327285767,
      "learning_rate": 0.0005859352912858484,
      "loss": 2.8175,
      "step": 22546
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.0464284420013428,
      "learning_rate": 0.000585934053455095,
      "loss": 3.1184,
      "step": 22547
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3596758842468262,
      "learning_rate": 0.0005859328155711811,
      "loss": 3.0077,
      "step": 22548
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4942998886108398,
      "learning_rate": 0.0005859315776341068,
      "loss": 3.1286,
      "step": 22549
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4431517124176025,
      "learning_rate": 0.0005859303396438725,
      "loss": 2.9355,
      "step": 22550
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7599576711654663,
      "learning_rate": 0.0005859291016004783,
      "loss": 3.1448,
      "step": 22551
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8162583112716675,
      "learning_rate": 0.0005859278635039245,
      "loss": 3.1773,
      "step": 22552
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.560697078704834,
      "learning_rate": 0.0005859266253542113,
      "loss": 3.0758,
      "step": 22553
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.621807336807251,
      "learning_rate": 0.0005859253871513389,
      "loss": 2.874,
      "step": 22554
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5061695575714111,
      "learning_rate": 0.0005859241488953076,
      "loss": 3.0641,
      "step": 22555
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8104140758514404,
      "learning_rate": 0.0005859229105861176,
      "loss": 3.0284,
      "step": 22556
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4338998794555664,
      "learning_rate": 0.0005859216722237693,
      "loss": 3.1736,
      "step": 22557
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5652780532836914,
      "learning_rate": 0.0005859204338082625,
      "loss": 3.2942,
      "step": 22558
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5250073671340942,
      "learning_rate": 0.0005859191953395979,
      "loss": 3.2195,
      "step": 22559
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7789337635040283,
      "learning_rate": 0.0005859179568177754,
      "loss": 3.2221,
      "step": 22560
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.393174171447754,
      "learning_rate": 0.0005859167182427953,
      "loss": 3.0686,
      "step": 22561
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.063081979751587,
      "learning_rate": 0.0005859154796146581,
      "loss": 3.0435,
      "step": 22562
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8427724838256836,
      "learning_rate": 0.0005859142409333636,
      "loss": 3.0919,
      "step": 22563
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.477089524269104,
      "learning_rate": 0.0005859130021989124,
      "loss": 3.1775,
      "step": 22564
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9343677759170532,
      "learning_rate": 0.0005859117634113046,
      "loss": 3.1629,
      "step": 22565
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6171464920043945,
      "learning_rate": 0.0005859105245705403,
      "loss": 3.2001,
      "step": 22566
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2655190229415894,
      "learning_rate": 0.0005859092856766197,
      "loss": 3.144,
      "step": 22567
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4311152696609497,
      "learning_rate": 0.0005859080467295434,
      "loss": 3.0831,
      "step": 22568
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5792839527130127,
      "learning_rate": 0.0005859068077293114,
      "loss": 3.2012,
      "step": 22569
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.365525245666504,
      "learning_rate": 0.0005859055686759238,
      "loss": 3.1031,
      "step": 22570
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6870477199554443,
      "learning_rate": 0.0005859043295693811,
      "loss": 3.0484,
      "step": 22571
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.305456519126892,
      "learning_rate": 0.0005859030904096832,
      "loss": 3.0662,
      "step": 22572
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8312145471572876,
      "learning_rate": 0.0005859018511968307,
      "loss": 3.5384,
      "step": 22573
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6072688102722168,
      "learning_rate": 0.0005859006119308235,
      "loss": 2.9932,
      "step": 22574
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2984164953231812,
      "learning_rate": 0.0005858993726116621,
      "loss": 3.2034,
      "step": 22575
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.345033884048462,
      "learning_rate": 0.0005858981332393465,
      "loss": 3.2798,
      "step": 22576
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5184977054595947,
      "learning_rate": 0.0005858968938138771,
      "loss": 2.9592,
      "step": 22577
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4395910501480103,
      "learning_rate": 0.0005858956543352541,
      "loss": 3.0858,
      "step": 22578
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2990412712097168,
      "learning_rate": 0.0005858944148034776,
      "loss": 3.1959,
      "step": 22579
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2719167470932007,
      "learning_rate": 0.0005858931752185481,
      "loss": 3.0792,
      "step": 22580
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.1872162818908691,
      "learning_rate": 0.0005858919355804655,
      "loss": 3.2012,
      "step": 22581
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.520493507385254,
      "learning_rate": 0.0005858906958892302,
      "loss": 3.0774,
      "step": 22582
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.5279204845428467,
      "learning_rate": 0.0005858894561448425,
      "loss": 3.1956,
      "step": 22583
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.671975016593933,
      "learning_rate": 0.0005858882163473026,
      "loss": 2.9979,
      "step": 22584
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5232596397399902,
      "learning_rate": 0.0005858869764966105,
      "loss": 3.0342,
      "step": 22585
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6907200813293457,
      "learning_rate": 0.0005858857365927669,
      "loss": 3.0306,
      "step": 22586
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.3843469619750977,
      "learning_rate": 0.0005858844966357715,
      "loss": 3.0364,
      "step": 22587
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5868868827819824,
      "learning_rate": 0.0005858832566256248,
      "loss": 3.1834,
      "step": 22588
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6724849939346313,
      "learning_rate": 0.0005858820165623271,
      "loss": 3.2895,
      "step": 22589
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6086888313293457,
      "learning_rate": 0.0005858807764458785,
      "loss": 3.3133,
      "step": 22590
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.860978364944458,
      "learning_rate": 0.0005858795362762793,
      "loss": 3.014,
      "step": 22591
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7647483348846436,
      "learning_rate": 0.0005858782960535296,
      "loss": 3.2783,
      "step": 22592
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.889557957649231,
      "learning_rate": 0.0005858770557776297,
      "loss": 3.211,
      "step": 22593
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3536957502365112,
      "learning_rate": 0.00058587581544858,
      "loss": 3.0034,
      "step": 22594
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6103217601776123,
      "learning_rate": 0.0005858745750663806,
      "loss": 3.1752,
      "step": 22595
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.8419839143753052,
      "learning_rate": 0.0005858733346310316,
      "loss": 3.1092,
      "step": 22596
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.918796420097351,
      "learning_rate": 0.0005858720941425335,
      "loss": 3.0384,
      "step": 22597
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6761928796768188,
      "learning_rate": 0.0005858708536008862,
      "loss": 3.1727,
      "step": 22598
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7526140213012695,
      "learning_rate": 0.0005858696130060903,
      "loss": 2.9505,
      "step": 22599
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6308581829071045,
      "learning_rate": 0.0005858683723581456,
      "loss": 3.1141,
      "step": 22600
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5669025182724,
      "learning_rate": 0.0005858671316570528,
      "loss": 3.0979,
      "step": 22601
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3104594945907593,
      "learning_rate": 0.0005858658909028118,
      "loss": 3.1948,
      "step": 22602
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4434860944747925,
      "learning_rate": 0.000585864650095423,
      "loss": 3.2494,
      "step": 22603
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4620572328567505,
      "learning_rate": 0.0005858634092348865,
      "loss": 2.7901,
      "step": 22604
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.5526623725891113,
      "learning_rate": 0.0005858621683212026,
      "loss": 3.1386,
      "step": 22605
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.758785367012024,
      "learning_rate": 0.0005858609273543716,
      "loss": 3.1996,
      "step": 22606
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6968923807144165,
      "learning_rate": 0.0005858596863343936,
      "loss": 3.2511,
      "step": 22607
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.506367564201355,
      "learning_rate": 0.0005858584452612688,
      "loss": 3.1106,
      "step": 22608
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3817239999771118,
      "learning_rate": 0.0005858572041349976,
      "loss": 2.9544,
      "step": 22609
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.065378189086914,
      "learning_rate": 0.0005858559629555802,
      "loss": 3.2746,
      "step": 22610
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4970005750656128,
      "learning_rate": 0.0005858547217230167,
      "loss": 3.165,
      "step": 22611
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.9290636777877808,
      "learning_rate": 0.0005858534804373075,
      "loss": 3.0442,
      "step": 22612
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.716847538948059,
      "learning_rate": 0.0005858522390984527,
      "loss": 3.1349,
      "step": 22613
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2760089635849,
      "learning_rate": 0.0005858509977064525,
      "loss": 3.0122,
      "step": 22614
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5090782642364502,
      "learning_rate": 0.0005858497562613073,
      "loss": 3.3599,
      "step": 22615
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.2893788814544678,
      "learning_rate": 0.0005858485147630173,
      "loss": 3.2476,
      "step": 22616
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7042123079299927,
      "learning_rate": 0.0005858472732115825,
      "loss": 3.2002,
      "step": 22617
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5733383893966675,
      "learning_rate": 0.0005858460316070034,
      "loss": 3.2837,
      "step": 22618
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6421515941619873,
      "learning_rate": 0.00058584478994928,
      "loss": 2.977,
      "step": 22619
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3213979005813599,
      "learning_rate": 0.0005858435482384128,
      "loss": 3.0196,
      "step": 22620
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.397858738899231,
      "learning_rate": 0.0005858423064744018,
      "loss": 2.9746,
      "step": 22621
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7663174867630005,
      "learning_rate": 0.0005858410646572474,
      "loss": 2.957,
      "step": 22622
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.373016119003296,
      "learning_rate": 0.0005858398227869497,
      "loss": 3.0306,
      "step": 22623
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.232079029083252,
      "learning_rate": 0.000585838580863509,
      "loss": 3.2257,
      "step": 22624
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7592123746871948,
      "learning_rate": 0.0005858373388869254,
      "loss": 3.2145,
      "step": 22625
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.2586402893066406,
      "learning_rate": 0.0005858360968571994,
      "loss": 2.9587,
      "step": 22626
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.712407112121582,
      "learning_rate": 0.0005858348547743311,
      "loss": 3.1647,
      "step": 22627
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7183170318603516,
      "learning_rate": 0.0005858336126383206,
      "loss": 3.4539,
      "step": 22628
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.608801007270813,
      "learning_rate": 0.0005858323704491683,
      "loss": 2.9168,
      "step": 22629
    },
    {
      "epoch": 0.29,
      "grad_norm": 3.1333117485046387,
      "learning_rate": 0.0005858311282068742,
      "loss": 3.1786,
      "step": 22630
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.7383852005004883,
      "learning_rate": 0.0005858298859114389,
      "loss": 3.1602,
      "step": 22631
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.1842470169067383,
      "learning_rate": 0.0005858286435628623,
      "loss": 3.0,
      "step": 22632
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6515284776687622,
      "learning_rate": 0.0005858274011611448,
      "loss": 3.2639,
      "step": 22633
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.9744760990142822,
      "learning_rate": 0.0005858261587062866,
      "loss": 3.036,
      "step": 22634
    },
    {
      "epoch": 0.29,
      "grad_norm": 4.514678001403809,
      "learning_rate": 0.000585824916198288,
      "loss": 3.2343,
      "step": 22635
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.0330233573913574,
      "learning_rate": 0.0005858236736371491,
      "loss": 3.2672,
      "step": 22636
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.3159440755844116,
      "learning_rate": 0.0005858224310228701,
      "loss": 3.1714,
      "step": 22637
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.4759461879730225,
      "learning_rate": 0.0005858211883554514,
      "loss": 2.8377,
      "step": 22638
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.3235392570495605,
      "learning_rate": 0.0005858199456348931,
      "loss": 3.0947,
      "step": 22639
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.448922634124756,
      "learning_rate": 0.0005858187028611956,
      "loss": 2.996,
      "step": 22640
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.604783296585083,
      "learning_rate": 0.0005858174600343588,
      "loss": 3.117,
      "step": 22641
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6663767099380493,
      "learning_rate": 0.0005858162171543833,
      "loss": 2.9513,
      "step": 22642
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.0882818698883057,
      "learning_rate": 0.000585814974221269,
      "loss": 3.0766,
      "step": 22643
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5913950204849243,
      "learning_rate": 0.0005858137312350165,
      "loss": 3.0939,
      "step": 22644
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.6954970359802246,
      "learning_rate": 0.0005858124881956258,
      "loss": 3.329,
      "step": 22645
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.936440348625183,
      "learning_rate": 0.000585811245103097,
      "loss": 3.0912,
      "step": 22646
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7767075300216675,
      "learning_rate": 0.0005858100019574306,
      "loss": 3.2237,
      "step": 22647
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.4717549085617065,
      "learning_rate": 0.0005858087587586267,
      "loss": 3.2218,
      "step": 22648
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7176475524902344,
      "learning_rate": 0.0005858075155066857,
      "loss": 2.8891,
      "step": 22649
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.254122734069824,
      "learning_rate": 0.0005858062722016075,
      "loss": 3.311,
      "step": 22650
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.600268602371216,
      "learning_rate": 0.0005858050288433927,
      "loss": 2.9799,
      "step": 22651
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.7112984657287598,
      "learning_rate": 0.0005858037854320412,
      "loss": 3.0793,
      "step": 22652
    },
    {
      "epoch": 0.29,
      "grad_norm": 3.5944180488586426,
      "learning_rate": 0.0005858025419675535,
      "loss": 2.9796,
      "step": 22653
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.9509212970733643,
      "learning_rate": 0.0005858012984499295,
      "loss": 2.848,
      "step": 22654
    },
    {
      "epoch": 0.29,
      "grad_norm": 2.5146119594573975,
      "learning_rate": 0.0005858000548791698,
      "loss": 3.2169,
      "step": 22655
    },
    {
      "epoch": 0.29,
      "grad_norm": 1.5187737941741943,
      "learning_rate": 0.0005857988112552744,
      "loss": 3.175,
      "step": 22656
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8264682292938232,
      "learning_rate": 0.0005857975675782437,
      "loss": 3.0847,
      "step": 22657
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8201624155044556,
      "learning_rate": 0.0005857963238480779,
      "loss": 2.9318,
      "step": 22658
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7975417375564575,
      "learning_rate": 0.000585795080064777,
      "loss": 2.9956,
      "step": 22659
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.312736988067627,
      "learning_rate": 0.0005857938362283415,
      "loss": 3.0951,
      "step": 22660
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7078708410263062,
      "learning_rate": 0.0005857925923387715,
      "loss": 3.266,
      "step": 22661
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4649757146835327,
      "learning_rate": 0.0005857913483960672,
      "loss": 2.9992,
      "step": 22662
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6102144718170166,
      "learning_rate": 0.000585790104400229,
      "loss": 2.9966,
      "step": 22663
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.291712760925293,
      "learning_rate": 0.000585788860351257,
      "loss": 2.8961,
      "step": 22664
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3384413719177246,
      "learning_rate": 0.0005857876162491514,
      "loss": 2.9063,
      "step": 22665
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7291687726974487,
      "learning_rate": 0.0005857863720939126,
      "loss": 3.2107,
      "step": 22666
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3918209075927734,
      "learning_rate": 0.0005857851278855406,
      "loss": 2.9414,
      "step": 22667
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3995599746704102,
      "learning_rate": 0.0005857838836240358,
      "loss": 2.9384,
      "step": 22668
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2837127447128296,
      "learning_rate": 0.0005857826393093985,
      "loss": 3.0213,
      "step": 22669
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.458004355430603,
      "learning_rate": 0.0005857813949416285,
      "loss": 3.0415,
      "step": 22670
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1942973136901855,
      "learning_rate": 0.0005857801505207266,
      "loss": 3.2346,
      "step": 22671
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6290619373321533,
      "learning_rate": 0.0005857789060466927,
      "loss": 2.9935,
      "step": 22672
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9984592199325562,
      "learning_rate": 0.0005857776615195271,
      "loss": 2.8843,
      "step": 22673
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8667343854904175,
      "learning_rate": 0.0005857764169392301,
      "loss": 3.1392,
      "step": 22674
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4300087690353394,
      "learning_rate": 0.0005857751723058018,
      "loss": 3.1576,
      "step": 22675
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.589911937713623,
      "learning_rate": 0.0005857739276192425,
      "loss": 3.1391,
      "step": 22676
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9392892122268677,
      "learning_rate": 0.0005857726828795525,
      "loss": 3.0951,
      "step": 22677
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.869499921798706,
      "learning_rate": 0.0005857714380867319,
      "loss": 2.8839,
      "step": 22678
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7716093063354492,
      "learning_rate": 0.000585770193240781,
      "loss": 2.9903,
      "step": 22679
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.488936185836792,
      "learning_rate": 0.0005857689483417001,
      "loss": 3.0224,
      "step": 22680
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7065327167510986,
      "learning_rate": 0.0005857677033894893,
      "loss": 3.1576,
      "step": 22681
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5775260925292969,
      "learning_rate": 0.0005857664583841488,
      "loss": 3.2831,
      "step": 22682
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9278310537338257,
      "learning_rate": 0.0005857652133256791,
      "loss": 3.1902,
      "step": 22683
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.741569757461548,
      "learning_rate": 0.0005857639682140801,
      "loss": 3.1518,
      "step": 22684
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4758427143096924,
      "learning_rate": 0.0005857627230493524,
      "loss": 2.7796,
      "step": 22685
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.453514575958252,
      "learning_rate": 0.0005857614778314958,
      "loss": 3.0469,
      "step": 22686
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.2924489974975586,
      "learning_rate": 0.0005857602325605107,
      "loss": 3.0499,
      "step": 22687
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.735102891921997,
      "learning_rate": 0.0005857589872363976,
      "loss": 3.0204,
      "step": 22688
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.72641921043396,
      "learning_rate": 0.0005857577418591564,
      "loss": 3.1518,
      "step": 22689
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6330640316009521,
      "learning_rate": 0.0005857564964287874,
      "loss": 3.1893,
      "step": 22690
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.476137638092041,
      "learning_rate": 0.0005857552509452909,
      "loss": 3.1009,
      "step": 22691
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.442147731781006,
      "learning_rate": 0.0005857540054086671,
      "loss": 3.1664,
      "step": 22692
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4384307861328125,
      "learning_rate": 0.0005857527598189163,
      "loss": 3.286,
      "step": 22693
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4240118265151978,
      "learning_rate": 0.0005857515141760386,
      "loss": 3.2706,
      "step": 22694
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8212100267410278,
      "learning_rate": 0.0005857502684800343,
      "loss": 3.2124,
      "step": 22695
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4098702669143677,
      "learning_rate": 0.0005857490227309036,
      "loss": 3.1483,
      "step": 22696
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3751190900802612,
      "learning_rate": 0.0005857477769286468,
      "loss": 3.3024,
      "step": 22697
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6691296100616455,
      "learning_rate": 0.0005857465310732642,
      "loss": 3.1172,
      "step": 22698
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7015740871429443,
      "learning_rate": 0.0005857452851647558,
      "loss": 2.8252,
      "step": 22699
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2159066200256348,
      "learning_rate": 0.000585744039203122,
      "loss": 3.0286,
      "step": 22700
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.343165397644043,
      "learning_rate": 0.0005857427931883631,
      "loss": 3.2143,
      "step": 22701
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0098464488983154,
      "learning_rate": 0.000585741547120479,
      "loss": 2.9519,
      "step": 22702
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4991014003753662,
      "learning_rate": 0.0005857403009994703,
      "loss": 3.1247,
      "step": 22703
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.187980890274048,
      "learning_rate": 0.000585739054825337,
      "loss": 3.0049,
      "step": 22704
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9320498704910278,
      "learning_rate": 0.0005857378085980795,
      "loss": 3.0456,
      "step": 22705
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7737735509872437,
      "learning_rate": 0.0005857365623176979,
      "loss": 3.0429,
      "step": 22706
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5318888425827026,
      "learning_rate": 0.0005857353159841925,
      "loss": 3.2332,
      "step": 22707
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8558132648468018,
      "learning_rate": 0.0005857340695975635,
      "loss": 2.9341,
      "step": 22708
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7169910669326782,
      "learning_rate": 0.0005857328231578112,
      "loss": 3.152,
      "step": 22709
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.408132314682007,
      "learning_rate": 0.0005857315766649357,
      "loss": 3.0614,
      "step": 22710
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.695427656173706,
      "learning_rate": 0.0005857303301189372,
      "loss": 3.0852,
      "step": 22711
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3299471139907837,
      "learning_rate": 0.0005857290835198163,
      "loss": 3.124,
      "step": 22712
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7671771049499512,
      "learning_rate": 0.0005857278368675727,
      "loss": 2.7956,
      "step": 22713
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.1848584413528442,
      "learning_rate": 0.000585726590162207,
      "loss": 3.0548,
      "step": 22714
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.732771635055542,
      "learning_rate": 0.0005857253434037195,
      "loss": 3.2208,
      "step": 22715
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7346792221069336,
      "learning_rate": 0.0005857240965921101,
      "loss": 3.0679,
      "step": 22716
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.039735794067383,
      "learning_rate": 0.0005857228497273792,
      "loss": 3.1561,
      "step": 22717
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4164316654205322,
      "learning_rate": 0.000585721602809527,
      "loss": 3.3561,
      "step": 22718
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6164004802703857,
      "learning_rate": 0.0005857203558385539,
      "loss": 3.2125,
      "step": 22719
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5775208473205566,
      "learning_rate": 0.0005857191088144599,
      "loss": 3.0566,
      "step": 22720
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8258056640625,
      "learning_rate": 0.0005857178617372453,
      "loss": 2.9415,
      "step": 22721
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3998386859893799,
      "learning_rate": 0.0005857166146069103,
      "loss": 2.8748,
      "step": 22722
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5307385921478271,
      "learning_rate": 0.0005857153674234554,
      "loss": 2.9696,
      "step": 22723
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2891435623168945,
      "learning_rate": 0.0005857141201868805,
      "loss": 3.2421,
      "step": 22724
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6501178741455078,
      "learning_rate": 0.0005857128728971859,
      "loss": 3.0675,
      "step": 22725
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6198830604553223,
      "learning_rate": 0.0005857116255543719,
      "loss": 3.1792,
      "step": 22726
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6296186447143555,
      "learning_rate": 0.0005857103781584387,
      "loss": 2.7906,
      "step": 22727
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.888988971710205,
      "learning_rate": 0.0005857091307093866,
      "loss": 3.2061,
      "step": 22728
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7520278692245483,
      "learning_rate": 0.0005857078832072159,
      "loss": 3.1991,
      "step": 22729
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4022899866104126,
      "learning_rate": 0.0005857066356519266,
      "loss": 3.2382,
      "step": 22730
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7321362495422363,
      "learning_rate": 0.0005857053880435189,
      "loss": 2.8533,
      "step": 22731
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2972718477249146,
      "learning_rate": 0.0005857041403819933,
      "loss": 3.0894,
      "step": 22732
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4606784582138062,
      "learning_rate": 0.00058570289266735,
      "loss": 3.075,
      "step": 22733
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4529765844345093,
      "learning_rate": 0.000585701644899589,
      "loss": 3.0937,
      "step": 22734
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6751395463943481,
      "learning_rate": 0.0005857003970787107,
      "loss": 3.1389,
      "step": 22735
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2909172773361206,
      "learning_rate": 0.0005856991492047153,
      "loss": 2.9324,
      "step": 22736
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4421584606170654,
      "learning_rate": 0.0005856979012776032,
      "loss": 3.0329,
      "step": 22737
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6631826162338257,
      "learning_rate": 0.0005856966532973742,
      "loss": 3.168,
      "step": 22738
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.452599048614502,
      "learning_rate": 0.0005856954052640289,
      "loss": 2.9796,
      "step": 22739
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7294270992279053,
      "learning_rate": 0.0005856941571775675,
      "loss": 3.1538,
      "step": 22740
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.119068145751953,
      "learning_rate": 0.0005856929090379901,
      "loss": 3.0762,
      "step": 22741
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.034475803375244,
      "learning_rate": 0.000585691660845297,
      "loss": 3.1306,
      "step": 22742
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.347918748855591,
      "learning_rate": 0.0005856904125994885,
      "loss": 3.0174,
      "step": 22743
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4928570985794067,
      "learning_rate": 0.0005856891643005647,
      "loss": 2.9881,
      "step": 22744
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.5074338912963867,
      "learning_rate": 0.0005856879159485259,
      "loss": 3.088,
      "step": 22745
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.4402427673339844,
      "learning_rate": 0.0005856866675433722,
      "loss": 3.1318,
      "step": 22746
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.245898962020874,
      "learning_rate": 0.0005856854190851041,
      "loss": 3.0725,
      "step": 22747
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7895926237106323,
      "learning_rate": 0.0005856841705737218,
      "loss": 2.9535,
      "step": 22748
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.087128162384033,
      "learning_rate": 0.0005856829220092252,
      "loss": 3.083,
      "step": 22749
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.901197671890259,
      "learning_rate": 0.0005856816733916148,
      "loss": 3.2134,
      "step": 22750
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.925537347793579,
      "learning_rate": 0.0005856804247208909,
      "loss": 3.1631,
      "step": 22751
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.204874038696289,
      "learning_rate": 0.0005856791759970535,
      "loss": 3.3607,
      "step": 22752
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.5123090744018555,
      "learning_rate": 0.000585677927220103,
      "loss": 2.9513,
      "step": 22753
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.31255841255188,
      "learning_rate": 0.0005856766783900395,
      "loss": 3.237,
      "step": 22754
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.262528896331787,
      "learning_rate": 0.0005856754295068635,
      "loss": 3.002,
      "step": 22755
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.448033332824707,
      "learning_rate": 0.0005856741805705749,
      "loss": 2.9884,
      "step": 22756
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8626312017440796,
      "learning_rate": 0.000585672931581174,
      "loss": 3.138,
      "step": 22757
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.2586443424224854,
      "learning_rate": 0.0005856716825386613,
      "loss": 2.932,
      "step": 22758
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5210360288619995,
      "learning_rate": 0.0005856704334430366,
      "loss": 3.0185,
      "step": 22759
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8333626985549927,
      "learning_rate": 0.0005856691842943006,
      "loss": 3.0826,
      "step": 22760
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5267363786697388,
      "learning_rate": 0.0005856679350924532,
      "loss": 3.0375,
      "step": 22761
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6852396726608276,
      "learning_rate": 0.0005856666858374949,
      "loss": 3.2294,
      "step": 22762
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5615626573562622,
      "learning_rate": 0.0005856654365294255,
      "loss": 3.0639,
      "step": 22763
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.148515462875366,
      "learning_rate": 0.0005856641871682457,
      "loss": 3.0773,
      "step": 22764
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.835153341293335,
      "learning_rate": 0.0005856629377539555,
      "loss": 3.2747,
      "step": 22765
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5315881967544556,
      "learning_rate": 0.0005856616882865552,
      "loss": 3.1863,
      "step": 22766
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5365437269210815,
      "learning_rate": 0.0005856604387660449,
      "loss": 3.2006,
      "step": 22767
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6671366691589355,
      "learning_rate": 0.000585659189192425,
      "loss": 2.9339,
      "step": 22768
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3946853876113892,
      "learning_rate": 0.0005856579395656956,
      "loss": 3.2113,
      "step": 22769
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.5949435234069824,
      "learning_rate": 0.000585656689885857,
      "loss": 3.0848,
      "step": 22770
    },
    {
      "epoch": 0.3,
      "grad_norm": 4.014594554901123,
      "learning_rate": 0.0005856554401529095,
      "loss": 2.8268,
      "step": 22771
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4896352291107178,
      "learning_rate": 0.0005856541903668533,
      "loss": 2.7908,
      "step": 22772
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8941000699996948,
      "learning_rate": 0.0005856529405276884,
      "loss": 3.0898,
      "step": 22773
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.763607144355774,
      "learning_rate": 0.0005856516906354154,
      "loss": 3.0398,
      "step": 22774
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6585674285888672,
      "learning_rate": 0.0005856504406900343,
      "loss": 3.0747,
      "step": 22775
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9925144910812378,
      "learning_rate": 0.0005856491906915454,
      "loss": 3.2548,
      "step": 22776
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.853918433189392,
      "learning_rate": 0.000585647940639949,
      "loss": 2.9785,
      "step": 22777
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.468074083328247,
      "learning_rate": 0.0005856466905352451,
      "loss": 3.1875,
      "step": 22778
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.385016918182373,
      "learning_rate": 0.0005856454403774343,
      "loss": 3.3751,
      "step": 22779
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.392264723777771,
      "learning_rate": 0.0005856441901665164,
      "loss": 2.9954,
      "step": 22780
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6847376823425293,
      "learning_rate": 0.0005856429399024919,
      "loss": 2.9642,
      "step": 22781
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7020241022109985,
      "learning_rate": 0.0005856416895853611,
      "loss": 3.3622,
      "step": 22782
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6155829429626465,
      "learning_rate": 0.0005856404392151239,
      "loss": 3.294,
      "step": 22783
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8480379581451416,
      "learning_rate": 0.0005856391887917809,
      "loss": 3.0914,
      "step": 22784
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4631786346435547,
      "learning_rate": 0.0005856379383153323,
      "loss": 2.9039,
      "step": 22785
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.6263303756713867,
      "learning_rate": 0.000585636687785778,
      "loss": 2.9646,
      "step": 22786
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.67958402633667,
      "learning_rate": 0.0005856354372031186,
      "loss": 2.9729,
      "step": 22787
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7580108642578125,
      "learning_rate": 0.0005856341865673541,
      "loss": 3.1357,
      "step": 22788
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0483016967773438,
      "learning_rate": 0.0005856329358784848,
      "loss": 2.7643,
      "step": 22789
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.4980924129486084,
      "learning_rate": 0.000585631685136511,
      "loss": 3.0759,
      "step": 22790
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.2211482524871826,
      "learning_rate": 0.0005856304343414328,
      "loss": 3.2527,
      "step": 22791
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3947185277938843,
      "learning_rate": 0.0005856291834932506,
      "loss": 2.816,
      "step": 22792
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.262298822402954,
      "learning_rate": 0.0005856279325919646,
      "loss": 3.1492,
      "step": 22793
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.9114842414855957,
      "learning_rate": 0.0005856266816375748,
      "loss": 3.0673,
      "step": 22794
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.67548406124115,
      "learning_rate": 0.0005856254306300817,
      "loss": 3.2588,
      "step": 22795
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4188529253005981,
      "learning_rate": 0.0005856241795694855,
      "loss": 3.0831,
      "step": 22796
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.664565086364746,
      "learning_rate": 0.0005856229284557863,
      "loss": 3.2817,
      "step": 22797
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.148688793182373,
      "learning_rate": 0.0005856216772889844,
      "loss": 3.0357,
      "step": 22798
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.898438811302185,
      "learning_rate": 0.0005856204260690801,
      "loss": 2.9762,
      "step": 22799
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4631030559539795,
      "learning_rate": 0.0005856191747960735,
      "loss": 3.3736,
      "step": 22800
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4661240577697754,
      "learning_rate": 0.0005856179234699649,
      "loss": 3.0661,
      "step": 22801
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7733118534088135,
      "learning_rate": 0.0005856166720907545,
      "loss": 3.3191,
      "step": 22802
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8544206619262695,
      "learning_rate": 0.0005856154206584427,
      "loss": 2.9255,
      "step": 22803
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.340211033821106,
      "learning_rate": 0.0005856141691730295,
      "loss": 3.1176,
      "step": 22804
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.133744478225708,
      "learning_rate": 0.0005856129176345154,
      "loss": 3.0845,
      "step": 22805
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.773508310317993,
      "learning_rate": 0.0005856116660429003,
      "loss": 3.0508,
      "step": 22806
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.05391526222229,
      "learning_rate": 0.0005856104143981846,
      "loss": 2.895,
      "step": 22807
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.3589365482330322,
      "learning_rate": 0.0005856091627003685,
      "loss": 2.8751,
      "step": 22808
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.084042549133301,
      "learning_rate": 0.0005856079109494524,
      "loss": 3.2759,
      "step": 22809
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.833523750305176,
      "learning_rate": 0.0005856066591454364,
      "loss": 3.1061,
      "step": 22810
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4696714878082275,
      "learning_rate": 0.0005856054072883206,
      "loss": 3.1557,
      "step": 22811
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.864311695098877,
      "learning_rate": 0.0005856041553781054,
      "loss": 2.893,
      "step": 22812
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7589415311813354,
      "learning_rate": 0.0005856029034147911,
      "loss": 3.1198,
      "step": 22813
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6323808431625366,
      "learning_rate": 0.0005856016513983777,
      "loss": 2.9942,
      "step": 22814
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2977675199508667,
      "learning_rate": 0.0005856003993288657,
      "loss": 3.1069,
      "step": 22815
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4929882287979126,
      "learning_rate": 0.000585599147206255,
      "loss": 3.2228,
      "step": 22816
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0213704109191895,
      "learning_rate": 0.0005855978950305461,
      "loss": 2.951,
      "step": 22817
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5663985013961792,
      "learning_rate": 0.0005855966428017392,
      "loss": 2.9668,
      "step": 22818
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4960941076278687,
      "learning_rate": 0.0005855953905198345,
      "loss": 3.1179,
      "step": 22819
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.2703514099121094,
      "learning_rate": 0.0005855941381848322,
      "loss": 2.9743,
      "step": 22820
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7163569927215576,
      "learning_rate": 0.0005855928857967326,
      "loss": 3.0585,
      "step": 22821
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6594897508621216,
      "learning_rate": 0.0005855916333555359,
      "loss": 3.0805,
      "step": 22822
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.5825231075286865,
      "learning_rate": 0.0005855903808612423,
      "loss": 3.2157,
      "step": 22823
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.705596685409546,
      "learning_rate": 0.000585589128313852,
      "loss": 3.18,
      "step": 22824
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4834576845169067,
      "learning_rate": 0.0005855878757133654,
      "loss": 3.0518,
      "step": 22825
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2478139400482178,
      "learning_rate": 0.0005855866230597825,
      "loss": 3.253,
      "step": 22826
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.2974870204925537,
      "learning_rate": 0.0005855853703531038,
      "loss": 2.979,
      "step": 22827
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.4899744987487793,
      "learning_rate": 0.0005855841175933293,
      "loss": 2.8546,
      "step": 22828
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9571231603622437,
      "learning_rate": 0.0005855828647804593,
      "loss": 3.1361,
      "step": 22829
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.455348014831543,
      "learning_rate": 0.0005855816119144941,
      "loss": 2.955,
      "step": 22830
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.922257900238037,
      "learning_rate": 0.0005855803589954339,
      "loss": 3.2798,
      "step": 22831
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.497849941253662,
      "learning_rate": 0.0005855791060232789,
      "loss": 2.9672,
      "step": 22832
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.082791805267334,
      "learning_rate": 0.0005855778529980293,
      "loss": 3.2668,
      "step": 22833
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8225829601287842,
      "learning_rate": 0.0005855765999196854,
      "loss": 3.089,
      "step": 22834
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.067791700363159,
      "learning_rate": 0.0005855753467882475,
      "loss": 2.9682,
      "step": 22835
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.705200433731079,
      "learning_rate": 0.0005855740936037157,
      "loss": 3.1669,
      "step": 22836
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.406921625137329,
      "learning_rate": 0.0005855728403660902,
      "loss": 2.8159,
      "step": 22837
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.523664951324463,
      "learning_rate": 0.0005855715870753715,
      "loss": 3.1819,
      "step": 22838
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5737223625183105,
      "learning_rate": 0.0005855703337315595,
      "loss": 2.768,
      "step": 22839
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.949410080909729,
      "learning_rate": 0.0005855690803346547,
      "loss": 3.2385,
      "step": 22840
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4161421060562134,
      "learning_rate": 0.0005855678268846572,
      "loss": 3.1242,
      "step": 22841
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.404625654220581,
      "learning_rate": 0.0005855665733815671,
      "loss": 2.8276,
      "step": 22842
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.382908821105957,
      "learning_rate": 0.000585565319825385,
      "loss": 3.1163,
      "step": 22843
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8333020210266113,
      "learning_rate": 0.0005855640662161107,
      "loss": 3.0287,
      "step": 22844
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.345194697380066,
      "learning_rate": 0.0005855628125537447,
      "loss": 2.9886,
      "step": 22845
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9808746576309204,
      "learning_rate": 0.0005855615588382873,
      "loss": 2.8646,
      "step": 22846
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7381823062896729,
      "learning_rate": 0.0005855603050697386,
      "loss": 3.2424,
      "step": 22847
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.489536166191101,
      "learning_rate": 0.0005855590512480988,
      "loss": 2.9622,
      "step": 22848
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2894889116287231,
      "learning_rate": 0.0005855577973733682,
      "loss": 3.0334,
      "step": 22849
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7285035848617554,
      "learning_rate": 0.000585556543445547,
      "loss": 3.2091,
      "step": 22850
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.3155229091644287,
      "learning_rate": 0.0005855552894646355,
      "loss": 3.1255,
      "step": 22851
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.806549072265625,
      "learning_rate": 0.0005855540354306338,
      "loss": 3.3332,
      "step": 22852
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.384142279624939,
      "learning_rate": 0.0005855527813435423,
      "loss": 3.1354,
      "step": 22853
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.4197914600372314,
      "learning_rate": 0.0005855515272033611,
      "loss": 3.0787,
      "step": 22854
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5538793802261353,
      "learning_rate": 0.0005855502730100904,
      "loss": 3.0917,
      "step": 22855
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.4087352752685547,
      "learning_rate": 0.0005855490187637307,
      "loss": 3.083,
      "step": 22856
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5377126932144165,
      "learning_rate": 0.0005855477644642819,
      "loss": 2.6975,
      "step": 22857
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.5233066082000732,
      "learning_rate": 0.0005855465101117445,
      "loss": 3.0566,
      "step": 22858
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.643892765045166,
      "learning_rate": 0.0005855452557061184,
      "loss": 3.122,
      "step": 22859
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.439631700515747,
      "learning_rate": 0.0005855440012474042,
      "loss": 3.326,
      "step": 22860
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4797399044036865,
      "learning_rate": 0.0005855427467356019,
      "loss": 2.9815,
      "step": 22861
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.326497197151184,
      "learning_rate": 0.0005855414921707119,
      "loss": 3.0913,
      "step": 22862
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5581188201904297,
      "learning_rate": 0.0005855402375527344,
      "loss": 3.3765,
      "step": 22863
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6787991523742676,
      "learning_rate": 0.0005855389828816694,
      "loss": 3.1204,
      "step": 22864
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5820568799972534,
      "learning_rate": 0.0005855377281575175,
      "loss": 3.0503,
      "step": 22865
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.2229788303375244,
      "learning_rate": 0.0005855364733802787,
      "loss": 2.9166,
      "step": 22866
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.511805534362793,
      "learning_rate": 0.0005855352185499532,
      "loss": 3.0333,
      "step": 22867
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3348705768585205,
      "learning_rate": 0.0005855339636665414,
      "loss": 3.0053,
      "step": 22868
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.58868408203125,
      "learning_rate": 0.0005855327087300433,
      "loss": 3.4245,
      "step": 22869
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7704429626464844,
      "learning_rate": 0.0005855314537404594,
      "loss": 3.3084,
      "step": 22870
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7291299104690552,
      "learning_rate": 0.0005855301986977898,
      "loss": 3.2514,
      "step": 22871
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.543692111968994,
      "learning_rate": 0.0005855289436020348,
      "loss": 3.1296,
      "step": 22872
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6026418209075928,
      "learning_rate": 0.0005855276884531945,
      "loss": 2.8161,
      "step": 22873
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.375761866569519,
      "learning_rate": 0.0005855264332512692,
      "loss": 3.0471,
      "step": 22874
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7167620658874512,
      "learning_rate": 0.0005855251779962592,
      "loss": 2.9638,
      "step": 22875
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7738125324249268,
      "learning_rate": 0.0005855239226881646,
      "loss": 2.8959,
      "step": 22876
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4296315908432007,
      "learning_rate": 0.0005855226673269858,
      "loss": 3.3892,
      "step": 22877
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.528671383857727,
      "learning_rate": 0.0005855214119127229,
      "loss": 3.1294,
      "step": 22878
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.128148078918457,
      "learning_rate": 0.0005855201564453761,
      "loss": 3.1285,
      "step": 22879
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.705678105354309,
      "learning_rate": 0.0005855189009249459,
      "loss": 3.1916,
      "step": 22880
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7800474166870117,
      "learning_rate": 0.0005855176453514323,
      "loss": 3.1192,
      "step": 22881
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.3196797370910645,
      "learning_rate": 0.0005855163897248355,
      "loss": 3.0948,
      "step": 22882
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.223848819732666,
      "learning_rate": 0.0005855151340451558,
      "loss": 3.4266,
      "step": 22883
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.555238962173462,
      "learning_rate": 0.0005855138783123935,
      "loss": 3.2207,
      "step": 22884
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.4875409603118896,
      "learning_rate": 0.0005855126225265488,
      "loss": 2.9766,
      "step": 22885
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.7812185287475586,
      "learning_rate": 0.0005855113666876219,
      "loss": 3.1107,
      "step": 22886
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3894386291503906,
      "learning_rate": 0.000585510110795613,
      "loss": 2.9457,
      "step": 22887
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.846224069595337,
      "learning_rate": 0.0005855088548505225,
      "loss": 2.9255,
      "step": 22888
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.748687744140625,
      "learning_rate": 0.0005855075988523504,
      "loss": 3.0866,
      "step": 22889
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6158658266067505,
      "learning_rate": 0.0005855063428010971,
      "loss": 3.2083,
      "step": 22890
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6455010175704956,
      "learning_rate": 0.0005855050866967628,
      "loss": 3.0545,
      "step": 22891
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9512327909469604,
      "learning_rate": 0.0005855038305393477,
      "loss": 2.9644,
      "step": 22892
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.185559034347534,
      "learning_rate": 0.000585502574328852,
      "loss": 3.2403,
      "step": 22893
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.505433201789856,
      "learning_rate": 0.0005855013180652759,
      "loss": 3.2725,
      "step": 22894
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8160334825515747,
      "learning_rate": 0.0005855000617486199,
      "loss": 2.9534,
      "step": 22895
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.836112380027771,
      "learning_rate": 0.000585498805378884,
      "loss": 3.2948,
      "step": 22896
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3264827728271484,
      "learning_rate": 0.0005854975489560684,
      "loss": 3.2059,
      "step": 22897
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.3625683784484863,
      "learning_rate": 0.0005854962924801735,
      "loss": 2.9615,
      "step": 22898
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.559361219406128,
      "learning_rate": 0.0005854950359511994,
      "loss": 3.2417,
      "step": 22899
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4090001583099365,
      "learning_rate": 0.0005854937793691464,
      "loss": 3.2568,
      "step": 22900
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3843687772750854,
      "learning_rate": 0.0005854925227340148,
      "loss": 3.021,
      "step": 22901
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.512998342514038,
      "learning_rate": 0.0005854912660458046,
      "loss": 3.126,
      "step": 22902
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.931874394416809,
      "learning_rate": 0.0005854900093045163,
      "loss": 3.1642,
      "step": 22903
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.400023937225342,
      "learning_rate": 0.0005854887525101499,
      "loss": 2.9705,
      "step": 22904
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.786038875579834,
      "learning_rate": 0.0005854874956627059,
      "loss": 3.1421,
      "step": 22905
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.472300410270691,
      "learning_rate": 0.0005854862387621843,
      "loss": 3.2544,
      "step": 22906
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.440448522567749,
      "learning_rate": 0.0005854849818085854,
      "loss": 3.0272,
      "step": 22907
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.901570439338684,
      "learning_rate": 0.0005854837248019095,
      "loss": 3.0883,
      "step": 22908
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5689113140106201,
      "learning_rate": 0.0005854824677421569,
      "loss": 3.1254,
      "step": 22909
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7156602144241333,
      "learning_rate": 0.0005854812106293276,
      "loss": 3.1203,
      "step": 22910
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4568498134613037,
      "learning_rate": 0.0005854799534634218,
      "loss": 3.1518,
      "step": 22911
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.454120397567749,
      "learning_rate": 0.00058547869624444,
      "loss": 3.002,
      "step": 22912
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.253472089767456,
      "learning_rate": 0.0005854774389723825,
      "loss": 2.9539,
      "step": 22913
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6695528030395508,
      "learning_rate": 0.0005854761816472491,
      "loss": 2.9131,
      "step": 22914
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5231049060821533,
      "learning_rate": 0.0005854749242690404,
      "loss": 2.9004,
      "step": 22915
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7187789678573608,
      "learning_rate": 0.0005854736668377565,
      "loss": 2.926,
      "step": 22916
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4336841106414795,
      "learning_rate": 0.0005854724093533977,
      "loss": 3.2399,
      "step": 22917
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7920931577682495,
      "learning_rate": 0.0005854711518159643,
      "loss": 3.4183,
      "step": 22918
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9489524364471436,
      "learning_rate": 0.0005854698942254562,
      "loss": 3.2091,
      "step": 22919
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.31744384765625,
      "learning_rate": 0.000585468636581874,
      "loss": 2.9366,
      "step": 22920
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5190914869308472,
      "learning_rate": 0.0005854673788852177,
      "loss": 3.1041,
      "step": 22921
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7142717838287354,
      "learning_rate": 0.0005854661211354877,
      "loss": 2.8199,
      "step": 22922
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6362833976745605,
      "learning_rate": 0.0005854648633326842,
      "loss": 3.207,
      "step": 22923
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.3139021396636963,
      "learning_rate": 0.0005854636054768073,
      "loss": 3.1098,
      "step": 22924
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.335660219192505,
      "learning_rate": 0.0005854623475678573,
      "loss": 3.0718,
      "step": 22925
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.2252414226531982,
      "learning_rate": 0.0005854610896058346,
      "loss": 3.2698,
      "step": 22926
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4396601915359497,
      "learning_rate": 0.0005854598315907392,
      "loss": 3.0748,
      "step": 22927
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4755990505218506,
      "learning_rate": 0.0005854585735225714,
      "loss": 2.9305,
      "step": 22928
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5378752946853638,
      "learning_rate": 0.0005854573154013315,
      "loss": 3.2829,
      "step": 22929
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9008735418319702,
      "learning_rate": 0.0005854560572270197,
      "loss": 2.9778,
      "step": 22930
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5318512916564941,
      "learning_rate": 0.0005854547989996362,
      "loss": 3.17,
      "step": 22931
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.258315086364746,
      "learning_rate": 0.0005854535407191813,
      "loss": 3.0633,
      "step": 22932
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.328244924545288,
      "learning_rate": 0.0005854522823856552,
      "loss": 3.1604,
      "step": 22933
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8429573774337769,
      "learning_rate": 0.0005854510239990581,
      "loss": 3.2626,
      "step": 22934
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7889336347579956,
      "learning_rate": 0.0005854497655593903,
      "loss": 3.0115,
      "step": 22935
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.123215913772583,
      "learning_rate": 0.000585448507066652,
      "loss": 2.9949,
      "step": 22936
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6238396167755127,
      "learning_rate": 0.0005854472485208433,
      "loss": 3.3257,
      "step": 22937
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7011158466339111,
      "learning_rate": 0.0005854459899219648,
      "loss": 3.1844,
      "step": 22938
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4565982818603516,
      "learning_rate": 0.0005854447312700164,
      "loss": 3.2228,
      "step": 22939
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0338687896728516,
      "learning_rate": 0.0005854434725649985,
      "loss": 2.8848,
      "step": 22940
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.880315899848938,
      "learning_rate": 0.0005854422138069111,
      "loss": 3.0445,
      "step": 22941
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0349040031433105,
      "learning_rate": 0.0005854409549957547,
      "loss": 3.2505,
      "step": 22942
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4065041542053223,
      "learning_rate": 0.0005854396961315295,
      "loss": 3.0939,
      "step": 22943
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3112181425094604,
      "learning_rate": 0.0005854384372142356,
      "loss": 3.0645,
      "step": 22944
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4015130996704102,
      "learning_rate": 0.0005854371782438733,
      "loss": 3.2553,
      "step": 22945
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4812471866607666,
      "learning_rate": 0.0005854359192204429,
      "loss": 2.933,
      "step": 22946
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5910283327102661,
      "learning_rate": 0.0005854346601439445,
      "loss": 2.9416,
      "step": 22947
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.157076835632324,
      "learning_rate": 0.0005854334010143784,
      "loss": 2.9365,
      "step": 22948
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.867598533630371,
      "learning_rate": 0.0005854321418317449,
      "loss": 2.9477,
      "step": 22949
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7370827198028564,
      "learning_rate": 0.0005854308825960442,
      "loss": 3.2137,
      "step": 22950
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5251544713974,
      "learning_rate": 0.0005854296233072765,
      "loss": 3.1178,
      "step": 22951
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6703364849090576,
      "learning_rate": 0.000585428363965442,
      "loss": 3.0003,
      "step": 22952
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3636486530303955,
      "learning_rate": 0.000585427104570541,
      "loss": 3.0805,
      "step": 22953
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3957104682922363,
      "learning_rate": 0.0005854258451225737,
      "loss": 3.1266,
      "step": 22954
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3319966793060303,
      "learning_rate": 0.0005854245856215404,
      "loss": 3.2432,
      "step": 22955
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.440003514289856,
      "learning_rate": 0.0005854233260674412,
      "loss": 3.0441,
      "step": 22956
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2545310258865356,
      "learning_rate": 0.0005854220664602765,
      "loss": 2.7948,
      "step": 22957
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.536786437034607,
      "learning_rate": 0.0005854208068000464,
      "loss": 3.2411,
      "step": 22958
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5504440069198608,
      "learning_rate": 0.0005854195470867512,
      "loss": 3.2001,
      "step": 22959
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8128597736358643,
      "learning_rate": 0.0005854182873203911,
      "loss": 2.9946,
      "step": 22960
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3640516996383667,
      "learning_rate": 0.0005854170275009664,
      "loss": 3.1863,
      "step": 22961
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4291348457336426,
      "learning_rate": 0.0005854157676284772,
      "loss": 2.8949,
      "step": 22962
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.441986560821533,
      "learning_rate": 0.0005854145077029239,
      "loss": 3.0632,
      "step": 22963
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.5157456398010254,
      "learning_rate": 0.0005854132477243066,
      "loss": 3.4328,
      "step": 22964
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.818488359451294,
      "learning_rate": 0.0005854119876926256,
      "loss": 3.1789,
      "step": 22965
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0351407527923584,
      "learning_rate": 0.0005854107276078811,
      "loss": 3.165,
      "step": 22966
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0058705806732178,
      "learning_rate": 0.0005854094674700735,
      "loss": 3.3915,
      "step": 22967
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.506561517715454,
      "learning_rate": 0.0005854082072792027,
      "loss": 3.0706,
      "step": 22968
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.008453607559204,
      "learning_rate": 0.0005854069470352692,
      "loss": 2.9677,
      "step": 22969
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4696242809295654,
      "learning_rate": 0.0005854056867382733,
      "loss": 2.9558,
      "step": 22970
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4860098361968994,
      "learning_rate": 0.000585404426388215,
      "loss": 3.3127,
      "step": 22971
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3724889755249023,
      "learning_rate": 0.0005854031659850946,
      "loss": 2.9263,
      "step": 22972
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5541987419128418,
      "learning_rate": 0.0005854019055289123,
      "loss": 2.8498,
      "step": 22973
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.393301010131836,
      "learning_rate": 0.0005854006450196685,
      "loss": 3.1023,
      "step": 22974
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7657270431518555,
      "learning_rate": 0.0005853993844573633,
      "loss": 2.9557,
      "step": 22975
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.369215488433838,
      "learning_rate": 0.000585398123841997,
      "loss": 2.8628,
      "step": 22976
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8091604709625244,
      "learning_rate": 0.0005853968631735698,
      "loss": 3.1211,
      "step": 22977
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5289779901504517,
      "learning_rate": 0.0005853956024520818,
      "loss": 2.9988,
      "step": 22978
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6072405576705933,
      "learning_rate": 0.0005853943416775335,
      "loss": 3.2246,
      "step": 22979
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9600212574005127,
      "learning_rate": 0.000585393080849925,
      "loss": 2.9566,
      "step": 22980
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.558959722518921,
      "learning_rate": 0.0005853918199692565,
      "loss": 2.9407,
      "step": 22981
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2281135320663452,
      "learning_rate": 0.0005853905590355282,
      "loss": 3.0742,
      "step": 22982
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6021082401275635,
      "learning_rate": 0.0005853892980487406,
      "loss": 3.1,
      "step": 22983
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.595889687538147,
      "learning_rate": 0.0005853880370088936,
      "loss": 3.2969,
      "step": 22984
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.1094186305999756,
      "learning_rate": 0.0005853867759159876,
      "loss": 3.0816,
      "step": 22985
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5786473751068115,
      "learning_rate": 0.0005853855147700228,
      "loss": 3.105,
      "step": 22986
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7630740404129028,
      "learning_rate": 0.0005853842535709996,
      "loss": 2.9979,
      "step": 22987
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9394901990890503,
      "learning_rate": 0.000585382992318918,
      "loss": 3.3415,
      "step": 22988
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.428162932395935,
      "learning_rate": 0.0005853817310137782,
      "loss": 2.9987,
      "step": 22989
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5012263059616089,
      "learning_rate": 0.0005853804696555806,
      "loss": 3.0299,
      "step": 22990
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.5962185859680176,
      "learning_rate": 0.0005853792082443255,
      "loss": 3.1275,
      "step": 22991
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.1666243076324463,
      "learning_rate": 0.0005853779467800129,
      "loss": 3.2221,
      "step": 22992
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7825913429260254,
      "learning_rate": 0.000585376685262643,
      "loss": 3.1116,
      "step": 22993
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.9934279918670654,
      "learning_rate": 0.0005853754236922165,
      "loss": 3.1601,
      "step": 22994
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9141229391098022,
      "learning_rate": 0.0005853741620687332,
      "loss": 3.182,
      "step": 22995
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.56540048122406,
      "learning_rate": 0.0005853729003921933,
      "loss": 2.9554,
      "step": 22996
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.641279935836792,
      "learning_rate": 0.0005853716386625974,
      "loss": 3.1198,
      "step": 22997
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4472579956054688,
      "learning_rate": 0.0005853703768799455,
      "loss": 3.1652,
      "step": 22998
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.591935634613037,
      "learning_rate": 0.0005853691150442378,
      "loss": 3.0774,
      "step": 22999
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5270226001739502,
      "learning_rate": 0.0005853678531554747,
      "loss": 3.1621,
      "step": 23000
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.149613380432129,
      "learning_rate": 0.0005853665912136562,
      "loss": 3.0568,
      "step": 23001
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.057055711746216,
      "learning_rate": 0.0005853653292187827,
      "loss": 3.0544,
      "step": 23002
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8159469366073608,
      "learning_rate": 0.0005853640671708544,
      "loss": 2.7921,
      "step": 23003
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5600359439849854,
      "learning_rate": 0.0005853628050698715,
      "loss": 3.2523,
      "step": 23004
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.3538575172424316,
      "learning_rate": 0.0005853615429158343,
      "loss": 3.167,
      "step": 23005
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6931166648864746,
      "learning_rate": 0.000585360280708743,
      "loss": 3.1351,
      "step": 23006
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.314241886138916,
      "learning_rate": 0.0005853590184485978,
      "loss": 3.1641,
      "step": 23007
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4121885299682617,
      "learning_rate": 0.0005853577561353991,
      "loss": 3.1792,
      "step": 23008
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8032207489013672,
      "learning_rate": 0.000585356493769147,
      "loss": 2.9361,
      "step": 23009
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5396138429641724,
      "learning_rate": 0.0005853552313498416,
      "loss": 3.1299,
      "step": 23010
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8897278308868408,
      "learning_rate": 0.0005853539688774834,
      "loss": 2.9853,
      "step": 23011
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4289530515670776,
      "learning_rate": 0.0005853527063520723,
      "loss": 2.9991,
      "step": 23012
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4120306968688965,
      "learning_rate": 0.0005853514437736089,
      "loss": 3.1349,
      "step": 23013
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8337275981903076,
      "learning_rate": 0.0005853501811420934,
      "loss": 3.2348,
      "step": 23014
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0598888397216797,
      "learning_rate": 0.0005853489184575258,
      "loss": 3.0918,
      "step": 23015
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8864758014678955,
      "learning_rate": 0.0005853476557199064,
      "loss": 2.8707,
      "step": 23016
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6103097200393677,
      "learning_rate": 0.0005853463929292355,
      "loss": 2.9058,
      "step": 23017
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7963581085205078,
      "learning_rate": 0.0005853451300855134,
      "loss": 3.023,
      "step": 23018
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4759446382522583,
      "learning_rate": 0.0005853438671887401,
      "loss": 2.9364,
      "step": 23019
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.420322299003601,
      "learning_rate": 0.0005853426042389162,
      "loss": 2.8565,
      "step": 23020
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3110771179199219,
      "learning_rate": 0.0005853413412360416,
      "loss": 3.0478,
      "step": 23021
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.592179536819458,
      "learning_rate": 0.0005853400781801166,
      "loss": 3.1334,
      "step": 23022
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.475646734237671,
      "learning_rate": 0.0005853388150711416,
      "loss": 3.2664,
      "step": 23023
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6820943355560303,
      "learning_rate": 0.0005853375519091167,
      "loss": 2.9527,
      "step": 23024
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7670841217041016,
      "learning_rate": 0.0005853362886940422,
      "loss": 3.1534,
      "step": 23025
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.617917776107788,
      "learning_rate": 0.0005853350254259182,
      "loss": 3.1482,
      "step": 23026
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8408122062683105,
      "learning_rate": 0.0005853337621047451,
      "loss": 3.1847,
      "step": 23027
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6116816997528076,
      "learning_rate": 0.0005853324987305231,
      "loss": 3.0637,
      "step": 23028
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8520368337631226,
      "learning_rate": 0.0005853312353032523,
      "loss": 3.4018,
      "step": 23029
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2916539907455444,
      "learning_rate": 0.0005853299718229331,
      "loss": 3.1374,
      "step": 23030
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.512057900428772,
      "learning_rate": 0.0005853287082895656,
      "loss": 2.9252,
      "step": 23031
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7597002983093262,
      "learning_rate": 0.0005853274447031501,
      "loss": 3.1318,
      "step": 23032
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5767470598220825,
      "learning_rate": 0.0005853261810636871,
      "loss": 2.9783,
      "step": 23033
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6739014387130737,
      "learning_rate": 0.0005853249173711763,
      "loss": 2.8966,
      "step": 23034
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3376576900482178,
      "learning_rate": 0.0005853236536256184,
      "loss": 3.181,
      "step": 23035
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8479498624801636,
      "learning_rate": 0.0005853223898270133,
      "loss": 3.2062,
      "step": 23036
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4573442935943604,
      "learning_rate": 0.0005853211259753614,
      "loss": 3.2183,
      "step": 23037
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5412455797195435,
      "learning_rate": 0.000585319862070663,
      "loss": 2.9666,
      "step": 23038
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8841444253921509,
      "learning_rate": 0.0005853185981129182,
      "loss": 2.9456,
      "step": 23039
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3893688917160034,
      "learning_rate": 0.0005853173341021273,
      "loss": 2.8609,
      "step": 23040
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8720099925994873,
      "learning_rate": 0.0005853160700382905,
      "loss": 2.9617,
      "step": 23041
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.842549204826355,
      "learning_rate": 0.000585314805921408,
      "loss": 2.9983,
      "step": 23042
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9865741729736328,
      "learning_rate": 0.0005853135417514802,
      "loss": 2.983,
      "step": 23043
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.5078201293945312,
      "learning_rate": 0.0005853122775285072,
      "loss": 3.1834,
      "step": 23044
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8818755149841309,
      "learning_rate": 0.0005853110132524892,
      "loss": 3.0591,
      "step": 23045
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5432064533233643,
      "learning_rate": 0.0005853097489234265,
      "loss": 3.0006,
      "step": 23046
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.698117733001709,
      "learning_rate": 0.0005853084845413194,
      "loss": 3.2125,
      "step": 23047
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0729751586914062,
      "learning_rate": 0.0005853072201061681,
      "loss": 3.1352,
      "step": 23048
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.76233971118927,
      "learning_rate": 0.0005853059556179727,
      "loss": 2.8615,
      "step": 23049
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8529307842254639,
      "learning_rate": 0.0005853046910767335,
      "loss": 3.2257,
      "step": 23050
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.014376640319824,
      "learning_rate": 0.0005853034264824509,
      "loss": 3.0254,
      "step": 23051
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.2680094242095947,
      "learning_rate": 0.0005853021618351251,
      "loss": 3.199,
      "step": 23052
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2125437259674072,
      "learning_rate": 0.0005853008971347561,
      "loss": 3.0465,
      "step": 23053
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.325700044631958,
      "learning_rate": 0.0005852996323813442,
      "loss": 2.7432,
      "step": 23054
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.579354763031006,
      "learning_rate": 0.0005852983675748898,
      "loss": 3.1172,
      "step": 23055
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.4852473735809326,
      "learning_rate": 0.0005852971027153931,
      "loss": 3.2064,
      "step": 23056
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4398527145385742,
      "learning_rate": 0.0005852958378028541,
      "loss": 3.0133,
      "step": 23057
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.5594940185546875,
      "learning_rate": 0.0005852945728372734,
      "loss": 3.1557,
      "step": 23058
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.776181936264038,
      "learning_rate": 0.0005852933078186511,
      "loss": 3.0747,
      "step": 23059
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.69442081451416,
      "learning_rate": 0.0005852920427469872,
      "loss": 3.1115,
      "step": 23060
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5232101678848267,
      "learning_rate": 0.0005852907776222822,
      "loss": 3.1701,
      "step": 23061
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9091328382492065,
      "learning_rate": 0.0005852895124445363,
      "loss": 3.0812,
      "step": 23062
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.2945523262023926,
      "learning_rate": 0.0005852882472137496,
      "loss": 3.0925,
      "step": 23063
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.824892997741699,
      "learning_rate": 0.0005852869819299225,
      "loss": 3.0599,
      "step": 23064
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.8734593391418457,
      "learning_rate": 0.0005852857165930551,
      "loss": 2.9938,
      "step": 23065
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.427483081817627,
      "learning_rate": 0.0005852844512031479,
      "loss": 3.1093,
      "step": 23066
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.6101202964782715,
      "learning_rate": 0.0005852831857602008,
      "loss": 3.0182,
      "step": 23067
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.616399049758911,
      "learning_rate": 0.0005852819202642141,
      "loss": 3.1329,
      "step": 23068
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9185826778411865,
      "learning_rate": 0.0005852806547151882,
      "loss": 3.0164,
      "step": 23069
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3788570165634155,
      "learning_rate": 0.0005852793891131232,
      "loss": 3.0928,
      "step": 23070
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.866220235824585,
      "learning_rate": 0.0005852781234580194,
      "loss": 3.2742,
      "step": 23071
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.549462080001831,
      "learning_rate": 0.000585276857749877,
      "loss": 2.9995,
      "step": 23072
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0589425563812256,
      "learning_rate": 0.0005852755919886962,
      "loss": 3.1029,
      "step": 23073
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6156576871871948,
      "learning_rate": 0.0005852743261744774,
      "loss": 2.9649,
      "step": 23074
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5101203918457031,
      "learning_rate": 0.0005852730603072206,
      "loss": 2.9841,
      "step": 23075
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8320963382720947,
      "learning_rate": 0.0005852717943869262,
      "loss": 2.9459,
      "step": 23076
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7232354879379272,
      "learning_rate": 0.0005852705284135944,
      "loss": 2.9497,
      "step": 23077
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7313400506973267,
      "learning_rate": 0.0005852692623872255,
      "loss": 3.259,
      "step": 23078
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.691513180732727,
      "learning_rate": 0.0005852679963078194,
      "loss": 3.2653,
      "step": 23079
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8231362104415894,
      "learning_rate": 0.0005852667301753768,
      "loss": 3.0502,
      "step": 23080
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.42296302318573,
      "learning_rate": 0.0005852654639898978,
      "loss": 3.2973,
      "step": 23081
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8470377922058105,
      "learning_rate": 0.0005852641977513825,
      "loss": 3.1466,
      "step": 23082
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6820001602172852,
      "learning_rate": 0.0005852629314598312,
      "loss": 3.239,
      "step": 23083
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4588176012039185,
      "learning_rate": 0.000585261665115244,
      "loss": 3.0255,
      "step": 23084
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5173704624176025,
      "learning_rate": 0.0005852603987176214,
      "loss": 3.2192,
      "step": 23085
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7914074659347534,
      "learning_rate": 0.0005852591322669635,
      "loss": 3.2496,
      "step": 23086
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5427486896514893,
      "learning_rate": 0.0005852578657632705,
      "loss": 3.2657,
      "step": 23087
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.517602562904358,
      "learning_rate": 0.0005852565992065427,
      "loss": 2.9939,
      "step": 23088
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.039292097091675,
      "learning_rate": 0.0005852553325967803,
      "loss": 3.0641,
      "step": 23089
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.4386379718780518,
      "learning_rate": 0.0005852540659339835,
      "loss": 2.91,
      "step": 23090
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3714884519577026,
      "learning_rate": 0.0005852527992181527,
      "loss": 3.0705,
      "step": 23091
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6017764806747437,
      "learning_rate": 0.000585251532449288,
      "loss": 3.1984,
      "step": 23092
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8676280975341797,
      "learning_rate": 0.0005852502656273896,
      "loss": 3.0138,
      "step": 23093
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2926543951034546,
      "learning_rate": 0.0005852489987524578,
      "loss": 3.1107,
      "step": 23094
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4220218658447266,
      "learning_rate": 0.0005852477318244928,
      "loss": 3.0816,
      "step": 23095
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.938632607460022,
      "learning_rate": 0.000585246464843495,
      "loss": 3.2183,
      "step": 23096
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4047088623046875,
      "learning_rate": 0.0005852451978094643,
      "loss": 3.0131,
      "step": 23097
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6658579111099243,
      "learning_rate": 0.0005852439307224013,
      "loss": 3.1978,
      "step": 23098
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5294404029846191,
      "learning_rate": 0.000585242663582306,
      "loss": 2.64,
      "step": 23099
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5904529094696045,
      "learning_rate": 0.0005852413963891787,
      "loss": 3.0319,
      "step": 23100
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5895901918411255,
      "learning_rate": 0.0005852401291430196,
      "loss": 3.0324,
      "step": 23101
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5055608749389648,
      "learning_rate": 0.000585238861843829,
      "loss": 2.9342,
      "step": 23102
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4444713592529297,
      "learning_rate": 0.0005852375944916073,
      "loss": 3.2463,
      "step": 23103
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.113480806350708,
      "learning_rate": 0.0005852363270863542,
      "loss": 2.6738,
      "step": 23104
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.442055106163025,
      "learning_rate": 0.0005852350596280705,
      "loss": 3.2856,
      "step": 23105
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.581004023551941,
      "learning_rate": 0.0005852337921167563,
      "loss": 3.0867,
      "step": 23106
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6261982917785645,
      "learning_rate": 0.0005852325245524116,
      "loss": 3.1021,
      "step": 23107
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6254950761795044,
      "learning_rate": 0.0005852312569350368,
      "loss": 3.0021,
      "step": 23108
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.561663269996643,
      "learning_rate": 0.0005852299892646321,
      "loss": 2.8578,
      "step": 23109
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9115325212478638,
      "learning_rate": 0.0005852287215411978,
      "loss": 2.977,
      "step": 23110
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5174779891967773,
      "learning_rate": 0.0005852274537647341,
      "loss": 3.2395,
      "step": 23111
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6418381929397583,
      "learning_rate": 0.0005852261859352413,
      "loss": 3.0554,
      "step": 23112
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4679657220840454,
      "learning_rate": 0.0005852249180527194,
      "loss": 3.1087,
      "step": 23113
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6297873258590698,
      "learning_rate": 0.000585223650117169,
      "loss": 3.0887,
      "step": 23114
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6294759511947632,
      "learning_rate": 0.0005852223821285901,
      "loss": 3.08,
      "step": 23115
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6282538175582886,
      "learning_rate": 0.0005852211140869829,
      "loss": 3.1434,
      "step": 23116
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.61078679561615,
      "learning_rate": 0.0005852198459923478,
      "loss": 2.9775,
      "step": 23117
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5443824529647827,
      "learning_rate": 0.0005852185778446848,
      "loss": 3.1096,
      "step": 23118
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6600395441055298,
      "learning_rate": 0.0005852173096439944,
      "loss": 2.9625,
      "step": 23119
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5509254932403564,
      "learning_rate": 0.0005852160413902766,
      "loss": 3.1794,
      "step": 23120
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.926658034324646,
      "learning_rate": 0.0005852147730835319,
      "loss": 3.0562,
      "step": 23121
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7573624849319458,
      "learning_rate": 0.0005852135047237603,
      "loss": 3.2123,
      "step": 23122
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5233972072601318,
      "learning_rate": 0.0005852122363109622,
      "loss": 3.505,
      "step": 23123
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.7755470275878906,
      "learning_rate": 0.0005852109678451377,
      "loss": 3.2979,
      "step": 23124
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.618288516998291,
      "learning_rate": 0.000585209699326287,
      "loss": 3.203,
      "step": 23125
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.260858178138733,
      "learning_rate": 0.0005852084307544106,
      "loss": 3.3757,
      "step": 23126
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4719111919403076,
      "learning_rate": 0.0005852071621295085,
      "loss": 3.2439,
      "step": 23127
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9062753915786743,
      "learning_rate": 0.000585205893451581,
      "loss": 2.8509,
      "step": 23128
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5384846925735474,
      "learning_rate": 0.0005852046247206284,
      "loss": 3.0082,
      "step": 23129
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5994200706481934,
      "learning_rate": 0.0005852033559366509,
      "loss": 2.9434,
      "step": 23130
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.821444034576416,
      "learning_rate": 0.0005852020870996486,
      "loss": 3.156,
      "step": 23131
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.463167190551758,
      "learning_rate": 0.0005852008182096219,
      "loss": 2.9818,
      "step": 23132
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.01349139213562,
      "learning_rate": 0.000585199549266571,
      "loss": 3.1378,
      "step": 23133
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.034827470779419,
      "learning_rate": 0.0005851982802704961,
      "loss": 2.9628,
      "step": 23134
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.907305121421814,
      "learning_rate": 0.0005851970112213975,
      "loss": 3.1831,
      "step": 23135
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4992691278457642,
      "learning_rate": 0.0005851957421192753,
      "loss": 3.1325,
      "step": 23136
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2246414422988892,
      "learning_rate": 0.0005851944729641299,
      "loss": 2.8597,
      "step": 23137
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.387593388557434,
      "learning_rate": 0.0005851932037559615,
      "loss": 3.0242,
      "step": 23138
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7211902141571045,
      "learning_rate": 0.0005851919344947702,
      "loss": 2.9534,
      "step": 23139
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3472354412078857,
      "learning_rate": 0.0005851906651805563,
      "loss": 3.2783,
      "step": 23140
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0365395545959473,
      "learning_rate": 0.0005851893958133201,
      "loss": 3.3714,
      "step": 23141
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4577243328094482,
      "learning_rate": 0.0005851881263930619,
      "loss": 3.331,
      "step": 23142
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8504716157913208,
      "learning_rate": 0.0005851868569197818,
      "loss": 3.1856,
      "step": 23143
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8136179447174072,
      "learning_rate": 0.0005851855873934801,
      "loss": 2.9923,
      "step": 23144
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4983251094818115,
      "learning_rate": 0.000585184317814157,
      "loss": 3.2648,
      "step": 23145
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2660422325134277,
      "learning_rate": 0.0005851830481818128,
      "loss": 3.1851,
      "step": 23146
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5794428586959839,
      "learning_rate": 0.0005851817784964476,
      "loss": 3.0848,
      "step": 23147
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.513272762298584,
      "learning_rate": 0.0005851805087580618,
      "loss": 3.079,
      "step": 23148
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5408669710159302,
      "learning_rate": 0.0005851792389666555,
      "loss": 2.9677,
      "step": 23149
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4352389574050903,
      "learning_rate": 0.0005851779691222291,
      "loss": 2.8529,
      "step": 23150
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3159304857254028,
      "learning_rate": 0.0005851766992247825,
      "loss": 3.0439,
      "step": 23151
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6552166938781738,
      "learning_rate": 0.0005851754292743165,
      "loss": 3.0474,
      "step": 23152
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5166651010513306,
      "learning_rate": 0.0005851741592708307,
      "loss": 3.0069,
      "step": 23153
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4760804176330566,
      "learning_rate": 0.0005851728892143259,
      "loss": 2.9936,
      "step": 23154
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.50717031955719,
      "learning_rate": 0.0005851716191048019,
      "loss": 3.1906,
      "step": 23155
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.475707769393921,
      "learning_rate": 0.0005851703489422592,
      "loss": 3.146,
      "step": 23156
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4548009634017944,
      "learning_rate": 0.0005851690787266978,
      "loss": 2.9003,
      "step": 23157
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8144240379333496,
      "learning_rate": 0.0005851678084581182,
      "loss": 3.1382,
      "step": 23158
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7353192567825317,
      "learning_rate": 0.0005851665381365205,
      "loss": 2.9769,
      "step": 23159
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3200074434280396,
      "learning_rate": 0.0005851652677619049,
      "loss": 3.1928,
      "step": 23160
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0398781299591064,
      "learning_rate": 0.0005851639973342718,
      "loss": 3.3423,
      "step": 23161
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5219686031341553,
      "learning_rate": 0.0005851627268536213,
      "loss": 3.11,
      "step": 23162
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2815921306610107,
      "learning_rate": 0.0005851614563199537,
      "loss": 3.1,
      "step": 23163
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.274943470954895,
      "learning_rate": 0.0005851601857332692,
      "loss": 2.8829,
      "step": 23164
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7692073583602905,
      "learning_rate": 0.000585158915093568,
      "loss": 3.1177,
      "step": 23165
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.78874933719635,
      "learning_rate": 0.0005851576444008504,
      "loss": 3.0147,
      "step": 23166
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3944694995880127,
      "learning_rate": 0.0005851563736551166,
      "loss": 3.0648,
      "step": 23167
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6606740951538086,
      "learning_rate": 0.0005851551028563667,
      "loss": 3.0258,
      "step": 23168
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0516135692596436,
      "learning_rate": 0.0005851538320046013,
      "loss": 3.0338,
      "step": 23169
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3386833667755127,
      "learning_rate": 0.0005851525610998203,
      "loss": 2.9106,
      "step": 23170
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7010533809661865,
      "learning_rate": 0.000585151290142024,
      "loss": 3.0611,
      "step": 23171
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.4806220531463623,
      "learning_rate": 0.0005851500191312129,
      "loss": 3.1623,
      "step": 23172
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.256765127182007,
      "learning_rate": 0.0005851487480673869,
      "loss": 2.9615,
      "step": 23173
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8003071546554565,
      "learning_rate": 0.0005851474769505464,
      "loss": 3.2695,
      "step": 23174
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.36016845703125,
      "learning_rate": 0.0005851462057806916,
      "loss": 2.8114,
      "step": 23175
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.922835111618042,
      "learning_rate": 0.0005851449345578226,
      "loss": 2.8107,
      "step": 23176
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4990342855453491,
      "learning_rate": 0.00058514366328194,
      "loss": 3.1982,
      "step": 23177
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3395452499389648,
      "learning_rate": 0.0005851423919530437,
      "loss": 3.0662,
      "step": 23178
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6710690259933472,
      "learning_rate": 0.000585141120571134,
      "loss": 2.9158,
      "step": 23179
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4642199277877808,
      "learning_rate": 0.0005851398491362113,
      "loss": 3.1619,
      "step": 23180
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2026913166046143,
      "learning_rate": 0.0005851385776482756,
      "loss": 2.9934,
      "step": 23181
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4577531814575195,
      "learning_rate": 0.0005851373061073273,
      "loss": 2.9506,
      "step": 23182
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8085699081420898,
      "learning_rate": 0.0005851360345133667,
      "loss": 3.0805,
      "step": 23183
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.503964900970459,
      "learning_rate": 0.0005851347628663937,
      "loss": 3.0942,
      "step": 23184
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0533318519592285,
      "learning_rate": 0.0005851334911664089,
      "loss": 3.0574,
      "step": 23185
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.060971260070801,
      "learning_rate": 0.0005851322194134124,
      "loss": 3.0713,
      "step": 23186
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4314526319503784,
      "learning_rate": 0.0005851309476074044,
      "loss": 3.3391,
      "step": 23187
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6128190755844116,
      "learning_rate": 0.0005851296757483853,
      "loss": 3.2554,
      "step": 23188
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.779717206954956,
      "learning_rate": 0.0005851284038363551,
      "loss": 2.962,
      "step": 23189
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.2410545349121094,
      "learning_rate": 0.0005851271318713142,
      "loss": 2.9875,
      "step": 23190
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7368597984313965,
      "learning_rate": 0.0005851258598532627,
      "loss": 3.0514,
      "step": 23191
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.3757917881011963,
      "learning_rate": 0.000585124587782201,
      "loss": 3.0797,
      "step": 23192
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6574338674545288,
      "learning_rate": 0.0005851233156581292,
      "loss": 3.1449,
      "step": 23193
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5494186878204346,
      "learning_rate": 0.0005851220434810477,
      "loss": 3.1616,
      "step": 23194
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8458271026611328,
      "learning_rate": 0.0005851207712509566,
      "loss": 3.0362,
      "step": 23195
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3757686614990234,
      "learning_rate": 0.0005851194989678561,
      "loss": 2.905,
      "step": 23196
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.7708847522735596,
      "learning_rate": 0.0005851182266317464,
      "loss": 2.9762,
      "step": 23197
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6769356727600098,
      "learning_rate": 0.0005851169542426281,
      "loss": 3.1143,
      "step": 23198
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.4481754302978516,
      "learning_rate": 0.0005851156818005009,
      "loss": 3.3147,
      "step": 23199
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6211597919464111,
      "learning_rate": 0.0005851144093053655,
      "loss": 3.1432,
      "step": 23200
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.340366005897522,
      "learning_rate": 0.0005851131367572219,
      "loss": 3.2424,
      "step": 23201
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.388308525085449,
      "learning_rate": 0.0005851118641560704,
      "loss": 3.1026,
      "step": 23202
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3832486867904663,
      "learning_rate": 0.0005851105915019113,
      "loss": 3.0063,
      "step": 23203
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4744340181350708,
      "learning_rate": 0.0005851093187947446,
      "loss": 3.063,
      "step": 23204
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.464870572090149,
      "learning_rate": 0.0005851080460345708,
      "loss": 2.9706,
      "step": 23205
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4038457870483398,
      "learning_rate": 0.00058510677322139,
      "loss": 3.042,
      "step": 23206
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6907316446304321,
      "learning_rate": 0.0005851055003552024,
      "loss": 3.0993,
      "step": 23207
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4185702800750732,
      "learning_rate": 0.0005851042274360084,
      "loss": 2.8437,
      "step": 23208
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7910163402557373,
      "learning_rate": 0.0005851029544638081,
      "loss": 3.1659,
      "step": 23209
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.36283540725708,
      "learning_rate": 0.0005851016814386018,
      "loss": 3.0918,
      "step": 23210
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4592629671096802,
      "learning_rate": 0.0005851004083603897,
      "loss": 3.2186,
      "step": 23211
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.200129985809326,
      "learning_rate": 0.0005850991352291721,
      "loss": 3.2236,
      "step": 23212
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.411388874053955,
      "learning_rate": 0.0005850978620449491,
      "loss": 3.2411,
      "step": 23213
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4951938390731812,
      "learning_rate": 0.0005850965888077211,
      "loss": 2.7742,
      "step": 23214
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7708314657211304,
      "learning_rate": 0.0005850953155174882,
      "loss": 3.243,
      "step": 23215
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5457652807235718,
      "learning_rate": 0.0005850940421742507,
      "loss": 3.0747,
      "step": 23216
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8208835124969482,
      "learning_rate": 0.000585092768778009,
      "loss": 3.1132,
      "step": 23217
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4684354066848755,
      "learning_rate": 0.000585091495328763,
      "loss": 3.2189,
      "step": 23218
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.539961814880371,
      "learning_rate": 0.0005850902218265131,
      "loss": 3.2591,
      "step": 23219
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.48146653175354,
      "learning_rate": 0.0005850889482712596,
      "loss": 3.0176,
      "step": 23220
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0160977840423584,
      "learning_rate": 0.0005850876746630027,
      "loss": 3.0393,
      "step": 23221
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.488250970840454,
      "learning_rate": 0.0005850864010017426,
      "loss": 3.1007,
      "step": 23222
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5112792253494263,
      "learning_rate": 0.0005850851272874795,
      "loss": 3.0704,
      "step": 23223
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6503595113754272,
      "learning_rate": 0.0005850838535202138,
      "loss": 3.2664,
      "step": 23224
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3605289459228516,
      "learning_rate": 0.0005850825796999456,
      "loss": 2.9574,
      "step": 23225
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4361766576766968,
      "learning_rate": 0.0005850813058266752,
      "loss": 3.2059,
      "step": 23226
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.5679221153259277,
      "learning_rate": 0.0005850800319004028,
      "loss": 3.1818,
      "step": 23227
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0885205268859863,
      "learning_rate": 0.0005850787579211285,
      "loss": 2.9161,
      "step": 23228
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.496204137802124,
      "learning_rate": 0.0005850774838888529,
      "loss": 3.061,
      "step": 23229
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8332170248031616,
      "learning_rate": 0.0005850762098035758,
      "loss": 3.0661,
      "step": 23230
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5852408409118652,
      "learning_rate": 0.0005850749356652977,
      "loss": 3.1435,
      "step": 23231
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.409320592880249,
      "learning_rate": 0.0005850736614740189,
      "loss": 3.1414,
      "step": 23232
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.408823847770691,
      "learning_rate": 0.0005850723872297394,
      "loss": 3.0372,
      "step": 23233
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4493554830551147,
      "learning_rate": 0.0005850711129324597,
      "loss": 3.2135,
      "step": 23234
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2974724769592285,
      "learning_rate": 0.0005850698385821797,
      "loss": 2.9886,
      "step": 23235
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9643745422363281,
      "learning_rate": 0.0005850685641789,
      "loss": 3.111,
      "step": 23236
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4952160120010376,
      "learning_rate": 0.0005850672897226206,
      "loss": 3.1281,
      "step": 23237
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7003289461135864,
      "learning_rate": 0.0005850660152133418,
      "loss": 2.7019,
      "step": 23238
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2411137819290161,
      "learning_rate": 0.000585064740651064,
      "loss": 2.9224,
      "step": 23239
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7511675357818604,
      "learning_rate": 0.0005850634660357871,
      "loss": 3.2994,
      "step": 23240
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8155275583267212,
      "learning_rate": 0.0005850621913675116,
      "loss": 3.1181,
      "step": 23241
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6233088970184326,
      "learning_rate": 0.0005850609166462376,
      "loss": 3.2622,
      "step": 23242
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.918426275253296,
      "learning_rate": 0.0005850596418719655,
      "loss": 3.1606,
      "step": 23243
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4141980409622192,
      "learning_rate": 0.0005850583670446954,
      "loss": 3.4084,
      "step": 23244
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5344483852386475,
      "learning_rate": 0.0005850570921644275,
      "loss": 2.85,
      "step": 23245
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2569639682769775,
      "learning_rate": 0.0005850558172311622,
      "loss": 3.0051,
      "step": 23246
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.505432367324829,
      "learning_rate": 0.0005850545422448996,
      "loss": 3.1876,
      "step": 23247
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5946427583694458,
      "learning_rate": 0.0005850532672056399,
      "loss": 2.9829,
      "step": 23248
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.78791344165802,
      "learning_rate": 0.0005850519921133835,
      "loss": 3.104,
      "step": 23249
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4980403184890747,
      "learning_rate": 0.0005850507169681305,
      "loss": 3.3685,
      "step": 23250
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.416883945465088,
      "learning_rate": 0.0005850494417698812,
      "loss": 3.3171,
      "step": 23251
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.443403959274292,
      "learning_rate": 0.0005850481665186359,
      "loss": 3.25,
      "step": 23252
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6870144605636597,
      "learning_rate": 0.0005850468912143947,
      "loss": 3.0638,
      "step": 23253
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5141232013702393,
      "learning_rate": 0.000585045615857158,
      "loss": 2.9215,
      "step": 23254
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0958547592163086,
      "learning_rate": 0.0005850443404469258,
      "loss": 3.1191,
      "step": 23255
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5443294048309326,
      "learning_rate": 0.0005850430649836986,
      "loss": 2.8251,
      "step": 23256
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.0608298778533936,
      "learning_rate": 0.0005850417894674764,
      "loss": 2.8644,
      "step": 23257
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8993513584136963,
      "learning_rate": 0.0005850405138982597,
      "loss": 3.0993,
      "step": 23258
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8161994218826294,
      "learning_rate": 0.0005850392382760484,
      "loss": 3.209,
      "step": 23259
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6683827638626099,
      "learning_rate": 0.0005850379626008431,
      "loss": 2.882,
      "step": 23260
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3093585968017578,
      "learning_rate": 0.0005850366868726437,
      "loss": 2.931,
      "step": 23261
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8533035516738892,
      "learning_rate": 0.0005850354110914507,
      "loss": 3.191,
      "step": 23262
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.552624225616455,
      "learning_rate": 0.0005850341352572642,
      "loss": 3.1008,
      "step": 23263
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2768909931182861,
      "learning_rate": 0.0005850328593700845,
      "loss": 3.1069,
      "step": 23264
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4317450523376465,
      "learning_rate": 0.0005850315834299118,
      "loss": 3.0141,
      "step": 23265
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.567374587059021,
      "learning_rate": 0.0005850303074367464,
      "loss": 2.9565,
      "step": 23266
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6700522899627686,
      "learning_rate": 0.0005850290313905883,
      "loss": 2.8964,
      "step": 23267
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.526324510574341,
      "learning_rate": 0.0005850277552914381,
      "loss": 2.9742,
      "step": 23268
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.625924825668335,
      "learning_rate": 0.0005850264791392958,
      "loss": 3.0343,
      "step": 23269
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7352672815322876,
      "learning_rate": 0.0005850252029341617,
      "loss": 3.1583,
      "step": 23270
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.841449499130249,
      "learning_rate": 0.0005850239266760361,
      "loss": 2.9928,
      "step": 23271
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4811551570892334,
      "learning_rate": 0.0005850226503649192,
      "loss": 3.1308,
      "step": 23272
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6802952289581299,
      "learning_rate": 0.000585021374000811,
      "loss": 3.15,
      "step": 23273
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5128681659698486,
      "learning_rate": 0.0005850200975837121,
      "loss": 3.0767,
      "step": 23274
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9455227851867676,
      "learning_rate": 0.0005850188211136226,
      "loss": 3.3188,
      "step": 23275
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4313974380493164,
      "learning_rate": 0.0005850175445905427,
      "loss": 3.1659,
      "step": 23276
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6136868000030518,
      "learning_rate": 0.0005850162680144725,
      "loss": 3.3055,
      "step": 23277
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7454861402511597,
      "learning_rate": 0.0005850149913854127,
      "loss": 2.9535,
      "step": 23278
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4873359203338623,
      "learning_rate": 0.000585013714703363,
      "loss": 3.1451,
      "step": 23279
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0763180255889893,
      "learning_rate": 0.0005850124379683239,
      "loss": 3.0286,
      "step": 23280
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.281569480895996,
      "learning_rate": 0.0005850111611802956,
      "loss": 2.9604,
      "step": 23281
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.274280071258545,
      "learning_rate": 0.0005850098843392785,
      "loss": 2.8107,
      "step": 23282
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3728666305541992,
      "learning_rate": 0.0005850086074452725,
      "loss": 3.1909,
      "step": 23283
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.496238112449646,
      "learning_rate": 0.0005850073304982782,
      "loss": 3.174,
      "step": 23284
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5375983715057373,
      "learning_rate": 0.0005850060534982954,
      "loss": 3.2098,
      "step": 23285
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.520566701889038,
      "learning_rate": 0.0005850047764453249,
      "loss": 3.125,
      "step": 23286
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.8599894046783447,
      "learning_rate": 0.0005850034993393664,
      "loss": 2.8915,
      "step": 23287
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.7706477642059326,
      "learning_rate": 0.0005850022221804205,
      "loss": 3.0128,
      "step": 23288
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5069475173950195,
      "learning_rate": 0.0005850009449684872,
      "loss": 3.2501,
      "step": 23289
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5252009630203247,
      "learning_rate": 0.0005849996677035669,
      "loss": 3.2108,
      "step": 23290
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5013341903686523,
      "learning_rate": 0.0005849983903856598,
      "loss": 3.2565,
      "step": 23291
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7717170715332031,
      "learning_rate": 0.000584997113014766,
      "loss": 3.0271,
      "step": 23292
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5230025053024292,
      "learning_rate": 0.000584995835590886,
      "loss": 3.1214,
      "step": 23293
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2576897144317627,
      "learning_rate": 0.0005849945581140199,
      "loss": 2.9691,
      "step": 23294
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.472449779510498,
      "learning_rate": 0.0005849932805841679,
      "loss": 3.0384,
      "step": 23295
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5493395328521729,
      "learning_rate": 0.0005849920030013302,
      "loss": 3.0151,
      "step": 23296
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9685832262039185,
      "learning_rate": 0.0005849907253655072,
      "loss": 2.9635,
      "step": 23297
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.629868984222412,
      "learning_rate": 0.0005849894476766989,
      "loss": 3.2263,
      "step": 23298
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8201661109924316,
      "learning_rate": 0.0005849881699349058,
      "loss": 2.9195,
      "step": 23299
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3012512922286987,
      "learning_rate": 0.0005849868921401281,
      "loss": 3.2089,
      "step": 23300
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.399101734161377,
      "learning_rate": 0.0005849856142923658,
      "loss": 2.8549,
      "step": 23301
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7421362400054932,
      "learning_rate": 0.0005849843363916194,
      "loss": 3.0641,
      "step": 23302
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7523967027664185,
      "learning_rate": 0.0005849830584378889,
      "loss": 3.0667,
      "step": 23303
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.523413062095642,
      "learning_rate": 0.0005849817804311748,
      "loss": 3.152,
      "step": 23304
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.638548731803894,
      "learning_rate": 0.0005849805023714772,
      "loss": 3.115,
      "step": 23305
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3610138893127441,
      "learning_rate": 0.0005849792242587963,
      "loss": 3.1145,
      "step": 23306
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3629533052444458,
      "learning_rate": 0.0005849779460931324,
      "loss": 3.1751,
      "step": 23307
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7828036546707153,
      "learning_rate": 0.0005849766678744857,
      "loss": 3.0903,
      "step": 23308
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4570320844650269,
      "learning_rate": 0.0005849753896028564,
      "loss": 3.068,
      "step": 23309
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.596929907798767,
      "learning_rate": 0.000584974111278245,
      "loss": 2.7781,
      "step": 23310
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9906620979309082,
      "learning_rate": 0.0005849728329006513,
      "loss": 3.1111,
      "step": 23311
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.4548051357269287,
      "learning_rate": 0.000584971554470076,
      "loss": 3.1203,
      "step": 23312
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.464164137840271,
      "learning_rate": 0.0005849702759865189,
      "loss": 3.0176,
      "step": 23313
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7877081632614136,
      "learning_rate": 0.0005849689974499805,
      "loss": 2.9924,
      "step": 23314
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.339526653289795,
      "learning_rate": 0.0005849677188604611,
      "loss": 3.15,
      "step": 23315
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.2290000915527344,
      "learning_rate": 0.0005849664402179606,
      "loss": 2.9911,
      "step": 23316
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.001585006713867,
      "learning_rate": 0.0005849651615224796,
      "loss": 2.8676,
      "step": 23317
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5580909252166748,
      "learning_rate": 0.0005849638827740184,
      "loss": 3.1682,
      "step": 23318
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0578603744506836,
      "learning_rate": 0.0005849626039725768,
      "loss": 2.9074,
      "step": 23319
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6064187288284302,
      "learning_rate": 0.0005849613251181552,
      "loss": 3.048,
      "step": 23320
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.714161992073059,
      "learning_rate": 0.000584960046210754,
      "loss": 3.0059,
      "step": 23321
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5249674320220947,
      "learning_rate": 0.0005849587672503734,
      "loss": 3.1823,
      "step": 23322
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.334121823310852,
      "learning_rate": 0.0005849574882370135,
      "loss": 3.1584,
      "step": 23323
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.53439462184906,
      "learning_rate": 0.0005849562091706748,
      "loss": 3.1351,
      "step": 23324
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6102993488311768,
      "learning_rate": 0.000584954930051357,
      "loss": 3.0912,
      "step": 23325
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.454467535018921,
      "learning_rate": 0.0005849536508790611,
      "loss": 3.1135,
      "step": 23326
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8396413326263428,
      "learning_rate": 0.0005849523716537868,
      "loss": 3.1417,
      "step": 23327
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.658944845199585,
      "learning_rate": 0.0005849510923755344,
      "loss": 3.2377,
      "step": 23328
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.52499258518219,
      "learning_rate": 0.0005849498130443042,
      "loss": 3.2622,
      "step": 23329
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9285848140716553,
      "learning_rate": 0.0005849485336600965,
      "loss": 2.9315,
      "step": 23330
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3811278343200684,
      "learning_rate": 0.0005849472542229115,
      "loss": 3.1071,
      "step": 23331
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4757195711135864,
      "learning_rate": 0.0005849459747327495,
      "loss": 3.1121,
      "step": 23332
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3313064575195312,
      "learning_rate": 0.0005849446951896106,
      "loss": 3.0364,
      "step": 23333
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.665847659111023,
      "learning_rate": 0.000584943415593495,
      "loss": 2.8429,
      "step": 23334
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6486769914627075,
      "learning_rate": 0.0005849421359444032,
      "loss": 2.9675,
      "step": 23335
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5512323379516602,
      "learning_rate": 0.0005849408562423352,
      "loss": 3.1526,
      "step": 23336
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.359153389930725,
      "learning_rate": 0.0005849395764872913,
      "loss": 3.1921,
      "step": 23337
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5665103197097778,
      "learning_rate": 0.0005849382966792718,
      "loss": 2.9852,
      "step": 23338
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.370090126991272,
      "learning_rate": 0.0005849370168182769,
      "loss": 3.0626,
      "step": 23339
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4848504066467285,
      "learning_rate": 0.0005849357369043067,
      "loss": 3.2525,
      "step": 23340
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5108115673065186,
      "learning_rate": 0.0005849344569373617,
      "loss": 3.0682,
      "step": 23341
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.9919997453689575,
      "learning_rate": 0.000584933176917442,
      "loss": 3.0496,
      "step": 23342
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.676077127456665,
      "learning_rate": 0.0005849318968445478,
      "loss": 2.8596,
      "step": 23343
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8848375082015991,
      "learning_rate": 0.0005849306167186794,
      "loss": 2.9461,
      "step": 23344
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4402235746383667,
      "learning_rate": 0.000584929336539837,
      "loss": 3.1967,
      "step": 23345
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5065654516220093,
      "learning_rate": 0.0005849280563080209,
      "loss": 3.1427,
      "step": 23346
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5486364364624023,
      "learning_rate": 0.0005849267760232313,
      "loss": 3.1874,
      "step": 23347
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.845630168914795,
      "learning_rate": 0.0005849254956854684,
      "loss": 2.9999,
      "step": 23348
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5765149593353271,
      "learning_rate": 0.0005849242152947325,
      "loss": 3.1971,
      "step": 23349
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.993791937828064,
      "learning_rate": 0.0005849229348510237,
      "loss": 2.9311,
      "step": 23350
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5459214448928833,
      "learning_rate": 0.0005849216543543425,
      "loss": 2.9696,
      "step": 23351
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.48721444606781,
      "learning_rate": 0.000584920373804689,
      "loss": 3.0983,
      "step": 23352
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7429628372192383,
      "learning_rate": 0.0005849190932020633,
      "loss": 3.2154,
      "step": 23353
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.914523959159851,
      "learning_rate": 0.0005849178125464658,
      "loss": 3.0703,
      "step": 23354
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6532118320465088,
      "learning_rate": 0.0005849165318378966,
      "loss": 3.0319,
      "step": 23355
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.683411955833435,
      "learning_rate": 0.0005849152510763562,
      "loss": 3.1862,
      "step": 23356
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8396234512329102,
      "learning_rate": 0.0005849139702618447,
      "loss": 3.2056,
      "step": 23357
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.383017659187317,
      "learning_rate": 0.0005849126893943623,
      "loss": 3.0471,
      "step": 23358
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6379344463348389,
      "learning_rate": 0.0005849114084739091,
      "loss": 3.1632,
      "step": 23359
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6184574365615845,
      "learning_rate": 0.0005849101275004855,
      "loss": 2.9876,
      "step": 23360
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6524213552474976,
      "learning_rate": 0.0005849088464740919,
      "loss": 3.0131,
      "step": 23361
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.036822557449341,
      "learning_rate": 0.0005849075653947283,
      "loss": 3.1924,
      "step": 23362
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3615083694458008,
      "learning_rate": 0.000584906284262395,
      "loss": 3.442,
      "step": 23363
    },
    {
      "epoch": 0.3,
      "grad_norm": 3.035160541534424,
      "learning_rate": 0.0005849050030770922,
      "loss": 3.0668,
      "step": 23364
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3261009454727173,
      "learning_rate": 0.0005849037218388202,
      "loss": 2.9677,
      "step": 23365
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6143302917480469,
      "learning_rate": 0.0005849024405475792,
      "loss": 3.0626,
      "step": 23366
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.260889768600464,
      "learning_rate": 0.0005849011592033695,
      "loss": 2.9476,
      "step": 23367
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.652222990989685,
      "learning_rate": 0.0005848998778061914,
      "loss": 3.1369,
      "step": 23368
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5395029783248901,
      "learning_rate": 0.0005848985963560448,
      "loss": 3.149,
      "step": 23369
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6523041725158691,
      "learning_rate": 0.0005848973148529304,
      "loss": 3.4756,
      "step": 23370
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5373954772949219,
      "learning_rate": 0.000584896033296848,
      "loss": 2.9314,
      "step": 23371
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6976778507232666,
      "learning_rate": 0.0005848947516877982,
      "loss": 3.0926,
      "step": 23372
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3912371397018433,
      "learning_rate": 0.0005848934700257811,
      "loss": 3.1621,
      "step": 23373
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.513405680656433,
      "learning_rate": 0.0005848921883107968,
      "loss": 3.0327,
      "step": 23374
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3388121128082275,
      "learning_rate": 0.0005848909065428457,
      "loss": 3.1547,
      "step": 23375
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5112457275390625,
      "learning_rate": 0.0005848896247219282,
      "loss": 3.1787,
      "step": 23376
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.137113094329834,
      "learning_rate": 0.000584888342848044,
      "loss": 3.3411,
      "step": 23377
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.407524824142456,
      "learning_rate": 0.0005848870609211938,
      "loss": 3.1287,
      "step": 23378
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4477516412734985,
      "learning_rate": 0.0005848857789413779,
      "loss": 2.9352,
      "step": 23379
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3577719926834106,
      "learning_rate": 0.0005848844969085962,
      "loss": 3.0352,
      "step": 23380
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6389617919921875,
      "learning_rate": 0.0005848832148228492,
      "loss": 3.0259,
      "step": 23381
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5312319993972778,
      "learning_rate": 0.0005848819326841368,
      "loss": 3.231,
      "step": 23382
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7488151788711548,
      "learning_rate": 0.0005848806504924598,
      "loss": 2.7992,
      "step": 23383
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6205692291259766,
      "learning_rate": 0.0005848793682478178,
      "loss": 3.2938,
      "step": 23384
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8062725067138672,
      "learning_rate": 0.0005848780859502116,
      "loss": 3.1148,
      "step": 23385
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7138737440109253,
      "learning_rate": 0.0005848768035996411,
      "loss": 2.9611,
      "step": 23386
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3813204765319824,
      "learning_rate": 0.0005848755211961066,
      "loss": 3.0773,
      "step": 23387
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4681395292282104,
      "learning_rate": 0.0005848742387396084,
      "loss": 3.1102,
      "step": 23388
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6651533842086792,
      "learning_rate": 0.0005848729562301466,
      "loss": 2.9751,
      "step": 23389
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6246618032455444,
      "learning_rate": 0.0005848716736677216,
      "loss": 3.1756,
      "step": 23390
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.396904468536377,
      "learning_rate": 0.0005848703910523336,
      "loss": 2.908,
      "step": 23391
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.490330457687378,
      "learning_rate": 0.0005848691083839828,
      "loss": 3.5219,
      "step": 23392
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.8133580684661865,
      "learning_rate": 0.0005848678256626694,
      "loss": 2.7782,
      "step": 23393
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.628906488418579,
      "learning_rate": 0.0005848665428883937,
      "loss": 3.0215,
      "step": 23394
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4811627864837646,
      "learning_rate": 0.000584865260061156,
      "loss": 3.2017,
      "step": 23395
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.40053129196167,
      "learning_rate": 0.0005848639771809564,
      "loss": 3.2574,
      "step": 23396
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5546526908874512,
      "learning_rate": 0.0005848626942477953,
      "loss": 2.9561,
      "step": 23397
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3259124755859375,
      "learning_rate": 0.0005848614112616728,
      "loss": 3.0095,
      "step": 23398
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3081828355789185,
      "learning_rate": 0.0005848601282225892,
      "loss": 3.2701,
      "step": 23399
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4421848058700562,
      "learning_rate": 0.0005848588451305447,
      "loss": 3.0859,
      "step": 23400
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3746325969696045,
      "learning_rate": 0.0005848575619855397,
      "loss": 3.0505,
      "step": 23401
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5899357795715332,
      "learning_rate": 0.0005848562787875741,
      "loss": 2.9028,
      "step": 23402
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5101377964019775,
      "learning_rate": 0.0005848549955366484,
      "loss": 3.1765,
      "step": 23403
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.7854045629501343,
      "learning_rate": 0.0005848537122327629,
      "loss": 2.9345,
      "step": 23404
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4146053791046143,
      "learning_rate": 0.0005848524288759176,
      "loss": 3.0115,
      "step": 23405
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4433106184005737,
      "learning_rate": 0.0005848511454661128,
      "loss": 3.064,
      "step": 23406
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5028527975082397,
      "learning_rate": 0.0005848498620033489,
      "loss": 3.3243,
      "step": 23407
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.509300947189331,
      "learning_rate": 0.0005848485784876261,
      "loss": 3.1674,
      "step": 23408
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4981688261032104,
      "learning_rate": 0.0005848472949189444,
      "loss": 3.0741,
      "step": 23409
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.672545313835144,
      "learning_rate": 0.0005848460112973043,
      "loss": 3.1921,
      "step": 23410
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.6412429809570312,
      "learning_rate": 0.000584844727622706,
      "loss": 3.3706,
      "step": 23411
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.656440258026123,
      "learning_rate": 0.0005848434438951496,
      "loss": 2.9331,
      "step": 23412
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.731872797012329,
      "learning_rate": 0.0005848421601146354,
      "loss": 2.9351,
      "step": 23413
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3400806188583374,
      "learning_rate": 0.0005848408762811638,
      "loss": 3.2178,
      "step": 23414
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5812780857086182,
      "learning_rate": 0.0005848395923947348,
      "loss": 3.2227,
      "step": 23415
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0751569271087646,
      "learning_rate": 0.0005848383084553488,
      "loss": 3.1572,
      "step": 23416
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0644209384918213,
      "learning_rate": 0.000584837024463006,
      "loss": 3.2083,
      "step": 23417
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.6879750490188599,
      "learning_rate": 0.0005848357404177064,
      "loss": 3.0242,
      "step": 23418
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4286445379257202,
      "learning_rate": 0.0005848344563194508,
      "loss": 2.8393,
      "step": 23419
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4428092241287231,
      "learning_rate": 0.0005848331721682389,
      "loss": 3.3177,
      "step": 23420
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.4904868602752686,
      "learning_rate": 0.0005848318879640712,
      "loss": 2.8998,
      "step": 23421
    },
    {
      "epoch": 0.3,
      "grad_norm": 2.0349855422973633,
      "learning_rate": 0.0005848306037069478,
      "loss": 3.2578,
      "step": 23422
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.5668338537216187,
      "learning_rate": 0.000584829319396869,
      "loss": 3.0274,
      "step": 23423
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.3964214324951172,
      "learning_rate": 0.0005848280350338351,
      "loss": 3.0087,
      "step": 23424
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5430744886398315,
      "learning_rate": 0.0005848267506178463,
      "loss": 2.9105,
      "step": 23425
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.027719736099243,
      "learning_rate": 0.0005848254661489029,
      "loss": 3.0059,
      "step": 23426
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4520697593688965,
      "learning_rate": 0.000584824181627005,
      "loss": 2.9612,
      "step": 23427
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5997759103775024,
      "learning_rate": 0.0005848228970521529,
      "loss": 3.0762,
      "step": 23428
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4230644702911377,
      "learning_rate": 0.0005848216124243468,
      "loss": 3.2948,
      "step": 23429
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.52859628200531,
      "learning_rate": 0.000584820327743587,
      "loss": 3.0882,
      "step": 23430
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6724015474319458,
      "learning_rate": 0.0005848190430098739,
      "loss": 2.8713,
      "step": 23431
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5314372777938843,
      "learning_rate": 0.0005848177582232074,
      "loss": 3.124,
      "step": 23432
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4734134674072266,
      "learning_rate": 0.0005848164733835879,
      "loss": 2.8478,
      "step": 23433
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.64644455909729,
      "learning_rate": 0.0005848151884910157,
      "loss": 3.0836,
      "step": 23434
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3118330240249634,
      "learning_rate": 0.0005848139035454909,
      "loss": 2.8877,
      "step": 23435
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4042081832885742,
      "learning_rate": 0.0005848126185470139,
      "loss": 3.1291,
      "step": 23436
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1348323822021484,
      "learning_rate": 0.0005848113334955848,
      "loss": 2.8727,
      "step": 23437
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4030708074569702,
      "learning_rate": 0.000584810048391204,
      "loss": 3.1943,
      "step": 23438
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8771564960479736,
      "learning_rate": 0.0005848087632338715,
      "loss": 3.1746,
      "step": 23439
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.461409091949463,
      "learning_rate": 0.0005848074780235877,
      "loss": 3.0873,
      "step": 23440
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.90306556224823,
      "learning_rate": 0.0005848061927603529,
      "loss": 2.8811,
      "step": 23441
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.733855962753296,
      "learning_rate": 0.0005848049074441672,
      "loss": 2.9227,
      "step": 23442
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7045910358428955,
      "learning_rate": 0.0005848036220750309,
      "loss": 2.9624,
      "step": 23443
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5172405242919922,
      "learning_rate": 0.0005848023366529442,
      "loss": 3.1243,
      "step": 23444
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4963823556900024,
      "learning_rate": 0.0005848010511779074,
      "loss": 3.0653,
      "step": 23445
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4910987615585327,
      "learning_rate": 0.0005847997656499207,
      "loss": 3.2275,
      "step": 23446
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.431220531463623,
      "learning_rate": 0.0005847984800689843,
      "loss": 3.1763,
      "step": 23447
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7989884614944458,
      "learning_rate": 0.0005847971944350986,
      "loss": 2.9305,
      "step": 23448
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.732150673866272,
      "learning_rate": 0.0005847959087482637,
      "loss": 3.1819,
      "step": 23449
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.902875304222107,
      "learning_rate": 0.0005847946230084799,
      "loss": 2.9762,
      "step": 23450
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7845646142959595,
      "learning_rate": 0.0005847933372157472,
      "loss": 3.0679,
      "step": 23451
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.294213056564331,
      "learning_rate": 0.0005847920513700663,
      "loss": 3.1939,
      "step": 23452
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4877837896347046,
      "learning_rate": 0.000584790765471437,
      "loss": 3.0229,
      "step": 23453
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.273601770401001,
      "learning_rate": 0.0005847894795198597,
      "loss": 3.1191,
      "step": 23454
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5977249145507812,
      "learning_rate": 0.0005847881935153348,
      "loss": 2.9237,
      "step": 23455
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4072898626327515,
      "learning_rate": 0.0005847869074578623,
      "loss": 3.024,
      "step": 23456
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.673274278640747,
      "learning_rate": 0.0005847856213474427,
      "loss": 3.0307,
      "step": 23457
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3455644845962524,
      "learning_rate": 0.0005847843351840759,
      "loss": 2.9336,
      "step": 23458
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3641483783721924,
      "learning_rate": 0.0005847830489677623,
      "loss": 3.1139,
      "step": 23459
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4305999279022217,
      "learning_rate": 0.0005847817626985023,
      "loss": 3.1012,
      "step": 23460
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.2816922664642334,
      "learning_rate": 0.0005847804763762959,
      "loss": 3.1615,
      "step": 23461
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4810006618499756,
      "learning_rate": 0.0005847791900011434,
      "loss": 2.9982,
      "step": 23462
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.964253306388855,
      "learning_rate": 0.0005847779035730452,
      "loss": 3.186,
      "step": 23463
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.056591510772705,
      "learning_rate": 0.0005847766170920013,
      "loss": 3.1361,
      "step": 23464
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2717355489730835,
      "learning_rate": 0.000584775330558012,
      "loss": 2.9177,
      "step": 23465
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.0673582553863525,
      "learning_rate": 0.0005847740439710778,
      "loss": 3.0116,
      "step": 23466
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1100363731384277,
      "learning_rate": 0.0005847727573311985,
      "loss": 3.1303,
      "step": 23467
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6905121803283691,
      "learning_rate": 0.0005847714706383747,
      "loss": 3.0367,
      "step": 23468
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6558657884597778,
      "learning_rate": 0.0005847701838926065,
      "loss": 2.9267,
      "step": 23469
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.686497449874878,
      "learning_rate": 0.0005847688970938942,
      "loss": 3.2604,
      "step": 23470
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4544488191604614,
      "learning_rate": 0.0005847676102422379,
      "loss": 3.3629,
      "step": 23471
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8342969417572021,
      "learning_rate": 0.0005847663233376379,
      "loss": 3.0902,
      "step": 23472
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6910544633865356,
      "learning_rate": 0.0005847650363800946,
      "loss": 2.6773,
      "step": 23473
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4832227230072021,
      "learning_rate": 0.0005847637493696079,
      "loss": 3.2681,
      "step": 23474
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.561159372329712,
      "learning_rate": 0.0005847624623061784,
      "loss": 3.0738,
      "step": 23475
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8242484331130981,
      "learning_rate": 0.0005847611751898062,
      "loss": 3.196,
      "step": 23476
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6454676389694214,
      "learning_rate": 0.0005847598880204915,
      "loss": 3.0904,
      "step": 23477
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.090355396270752,
      "learning_rate": 0.0005847586007982344,
      "loss": 3.0652,
      "step": 23478
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4997332096099854,
      "learning_rate": 0.0005847573135230354,
      "loss": 3.2195,
      "step": 23479
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.869219422340393,
      "learning_rate": 0.0005847560261948947,
      "loss": 2.9264,
      "step": 23480
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.513159155845642,
      "learning_rate": 0.0005847547388138125,
      "loss": 3.1044,
      "step": 23481
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7580467462539673,
      "learning_rate": 0.0005847534513797889,
      "loss": 3.2463,
      "step": 23482
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9962462186813354,
      "learning_rate": 0.0005847521638928243,
      "loss": 3.0679,
      "step": 23483
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5716379880905151,
      "learning_rate": 0.000584750876352919,
      "loss": 2.9711,
      "step": 23484
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3633280992507935,
      "learning_rate": 0.000584749588760073,
      "loss": 2.8995,
      "step": 23485
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.120950222015381,
      "learning_rate": 0.0005847483011142868,
      "loss": 3.0134,
      "step": 23486
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8605672121047974,
      "learning_rate": 0.0005847470134155605,
      "loss": 2.909,
      "step": 23487
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9379136562347412,
      "learning_rate": 0.0005847457256638942,
      "loss": 3.0317,
      "step": 23488
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3918163776397705,
      "learning_rate": 0.0005847444378592884,
      "loss": 3.1397,
      "step": 23489
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6525324583053589,
      "learning_rate": 0.0005847431500017433,
      "loss": 3.1244,
      "step": 23490
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.935218095779419,
      "learning_rate": 0.0005847418620912589,
      "loss": 3.0403,
      "step": 23491
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.437040090560913,
      "learning_rate": 0.0005847405741278358,
      "loss": 3.131,
      "step": 23492
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8498457670211792,
      "learning_rate": 0.000584739286111474,
      "loss": 2.9227,
      "step": 23493
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4685719013214111,
      "learning_rate": 0.0005847379980421737,
      "loss": 3.2883,
      "step": 23494
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1358838081359863,
      "learning_rate": 0.0005847367099199354,
      "loss": 3.2191,
      "step": 23495
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.2633068561553955,
      "learning_rate": 0.000584735421744759,
      "loss": 2.9442,
      "step": 23496
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.418631076812744,
      "learning_rate": 0.000584734133516645,
      "loss": 2.8236,
      "step": 23497
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.706292986869812,
      "learning_rate": 0.0005847328452355935,
      "loss": 3.1937,
      "step": 23498
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.6351494789123535,
      "learning_rate": 0.0005847315569016048,
      "loss": 3.0559,
      "step": 23499
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5386719703674316,
      "learning_rate": 0.0005847302685146791,
      "loss": 3.2347,
      "step": 23500
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4063889980316162,
      "learning_rate": 0.0005847289800748168,
      "loss": 2.8183,
      "step": 23501
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.2126073837280273,
      "learning_rate": 0.0005847276915820178,
      "loss": 2.9597,
      "step": 23502
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5740851163864136,
      "learning_rate": 0.0005847264030362828,
      "loss": 3.0695,
      "step": 23503
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7409220933914185,
      "learning_rate": 0.0005847251144376115,
      "loss": 3.1424,
      "step": 23504
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6817337274551392,
      "learning_rate": 0.0005847238257860047,
      "loss": 3.0946,
      "step": 23505
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6072522401809692,
      "learning_rate": 0.0005847225370814623,
      "loss": 3.0036,
      "step": 23506
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4028581380844116,
      "learning_rate": 0.0005847212483239845,
      "loss": 3.2525,
      "step": 23507
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2574833631515503,
      "learning_rate": 0.0005847199595135717,
      "loss": 3.2385,
      "step": 23508
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.685014009475708,
      "learning_rate": 0.0005847186706502241,
      "loss": 3.2277,
      "step": 23509
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.680608868598938,
      "learning_rate": 0.000584717381733942,
      "loss": 2.9661,
      "step": 23510
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.446860432624817,
      "learning_rate": 0.0005847160927647254,
      "loss": 2.9134,
      "step": 23511
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.5406413078308105,
      "learning_rate": 0.0005847148037425747,
      "loss": 3.3254,
      "step": 23512
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4163451194763184,
      "learning_rate": 0.0005847135146674903,
      "loss": 2.9824,
      "step": 23513
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4229799509048462,
      "learning_rate": 0.0005847122255394721,
      "loss": 3.0972,
      "step": 23514
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4170781373977661,
      "learning_rate": 0.0005847109363585207,
      "loss": 3.0235,
      "step": 23515
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.5260422229766846,
      "learning_rate": 0.0005847096471246361,
      "loss": 3.0911,
      "step": 23516
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5500309467315674,
      "learning_rate": 0.0005847083578378186,
      "loss": 3.3185,
      "step": 23517
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5289779901504517,
      "learning_rate": 0.0005847070684980684,
      "loss": 3.2538,
      "step": 23518
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.592661738395691,
      "learning_rate": 0.0005847057791053859,
      "loss": 3.1245,
      "step": 23519
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4602508544921875,
      "learning_rate": 0.0005847044896597711,
      "loss": 3.0064,
      "step": 23520
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4186160564422607,
      "learning_rate": 0.0005847032001612244,
      "loss": 2.9471,
      "step": 23521
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4462621212005615,
      "learning_rate": 0.000584701910609746,
      "loss": 3.2323,
      "step": 23522
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6418930292129517,
      "learning_rate": 0.0005847006210053361,
      "loss": 2.8867,
      "step": 23523
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6657179594039917,
      "learning_rate": 0.0005846993313479951,
      "loss": 3.0637,
      "step": 23524
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7291926145553589,
      "learning_rate": 0.000584698041637723,
      "loss": 3.3426,
      "step": 23525
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.457290530204773,
      "learning_rate": 0.0005846967518745202,
      "loss": 3.1616,
      "step": 23526
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6746093034744263,
      "learning_rate": 0.0005846954620583869,
      "loss": 3.3193,
      "step": 23527
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3265373706817627,
      "learning_rate": 0.0005846941721893233,
      "loss": 2.9812,
      "step": 23528
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.760442852973938,
      "learning_rate": 0.0005846928822673297,
      "loss": 3.1392,
      "step": 23529
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3691661357879639,
      "learning_rate": 0.0005846915922924063,
      "loss": 3.1638,
      "step": 23530
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6007747650146484,
      "learning_rate": 0.0005846903022645535,
      "loss": 3.1684,
      "step": 23531
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3630188703536987,
      "learning_rate": 0.0005846890121837712,
      "loss": 3.0247,
      "step": 23532
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.416929006576538,
      "learning_rate": 0.00058468772205006,
      "loss": 3.1119,
      "step": 23533
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5834730863571167,
      "learning_rate": 0.0005846864318634199,
      "loss": 3.2909,
      "step": 23534
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5372035503387451,
      "learning_rate": 0.0005846851416238513,
      "loss": 3.0095,
      "step": 23535
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4902868270874023,
      "learning_rate": 0.0005846838513313543,
      "loss": 3.1555,
      "step": 23536
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5493929386138916,
      "learning_rate": 0.0005846825609859291,
      "loss": 3.1034,
      "step": 23537
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6535167694091797,
      "learning_rate": 0.0005846812705875761,
      "loss": 3.0321,
      "step": 23538
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.000068426132202,
      "learning_rate": 0.0005846799801362955,
      "loss": 2.9074,
      "step": 23539
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.0327906608581543,
      "learning_rate": 0.0005846786896320876,
      "loss": 2.8588,
      "step": 23540
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3747444152832031,
      "learning_rate": 0.0005846773990749525,
      "loss": 3.1906,
      "step": 23541
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.417053461074829,
      "learning_rate": 0.0005846761084648904,
      "loss": 3.1419,
      "step": 23542
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5116978883743286,
      "learning_rate": 0.0005846748178019018,
      "loss": 3.218,
      "step": 23543
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8531242609024048,
      "learning_rate": 0.0005846735270859867,
      "loss": 3.2398,
      "step": 23544
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1710312366485596,
      "learning_rate": 0.0005846722363171454,
      "loss": 3.1379,
      "step": 23545
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.66768479347229,
      "learning_rate": 0.0005846709454953781,
      "loss": 3.2679,
      "step": 23546
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4799325466156006,
      "learning_rate": 0.0005846696546206853,
      "loss": 3.0275,
      "step": 23547
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4971121549606323,
      "learning_rate": 0.0005846683636930669,
      "loss": 3.0616,
      "step": 23548
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.486236333847046,
      "learning_rate": 0.0005846670727125232,
      "loss": 2.9886,
      "step": 23549
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3937345743179321,
      "learning_rate": 0.0005846657816790547,
      "loss": 2.9399,
      "step": 23550
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4737497568130493,
      "learning_rate": 0.0005846644905926613,
      "loss": 3.1788,
      "step": 23551
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.935451626777649,
      "learning_rate": 0.0005846631994533434,
      "loss": 2.8628,
      "step": 23552
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.41916823387146,
      "learning_rate": 0.0005846619082611013,
      "loss": 2.977,
      "step": 23553
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.736923098564148,
      "learning_rate": 0.0005846606170159352,
      "loss": 3.2104,
      "step": 23554
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3503539562225342,
      "learning_rate": 0.0005846593257178452,
      "loss": 2.8017,
      "step": 23555
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8827011585235596,
      "learning_rate": 0.0005846580343668318,
      "loss": 2.9044,
      "step": 23556
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.0853829383850098,
      "learning_rate": 0.0005846567429628951,
      "loss": 2.9248,
      "step": 23557
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2638007402420044,
      "learning_rate": 0.0005846554515060353,
      "loss": 3.2582,
      "step": 23558
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8193165063858032,
      "learning_rate": 0.0005846541599962526,
      "loss": 2.9543,
      "step": 23559
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.0456762313842773,
      "learning_rate": 0.0005846528684335474,
      "loss": 3.1064,
      "step": 23560
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.676823616027832,
      "learning_rate": 0.0005846515768179198,
      "loss": 3.3486,
      "step": 23561
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7087973356246948,
      "learning_rate": 0.0005846502851493702,
      "loss": 3.0989,
      "step": 23562
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3827953338623047,
      "learning_rate": 0.0005846489934278985,
      "loss": 3.045,
      "step": 23563
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8851373195648193,
      "learning_rate": 0.0005846477016535055,
      "loss": 3.0522,
      "step": 23564
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.790321707725525,
      "learning_rate": 0.000584646409826191,
      "loss": 3.2199,
      "step": 23565
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5935057401657104,
      "learning_rate": 0.0005846451179459553,
      "loss": 2.9987,
      "step": 23566
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9761669635772705,
      "learning_rate": 0.0005846438260127988,
      "loss": 3.0393,
      "step": 23567
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.9568591117858887,
      "learning_rate": 0.0005846425340267215,
      "loss": 3.1101,
      "step": 23568
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4440521001815796,
      "learning_rate": 0.0005846412419877238,
      "loss": 3.1851,
      "step": 23569
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.231705665588379,
      "learning_rate": 0.0005846399498958061,
      "loss": 3.1039,
      "step": 23570
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.050055503845215,
      "learning_rate": 0.0005846386577509682,
      "loss": 3.1095,
      "step": 23571
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9545828104019165,
      "learning_rate": 0.0005846373655532108,
      "loss": 3.3492,
      "step": 23572
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1061007976531982,
      "learning_rate": 0.0005846360733025339,
      "loss": 3.0947,
      "step": 23573
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3799569606781006,
      "learning_rate": 0.0005846347809989376,
      "loss": 2.9778,
      "step": 23574
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3117568492889404,
      "learning_rate": 0.0005846334886424226,
      "loss": 3.1542,
      "step": 23575
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.745634913444519,
      "learning_rate": 0.0005846321962329887,
      "loss": 3.1872,
      "step": 23576
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7735223770141602,
      "learning_rate": 0.0005846309037706364,
      "loss": 3.3251,
      "step": 23577
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2125130891799927,
      "learning_rate": 0.0005846296112553658,
      "loss": 3.1337,
      "step": 23578
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6327840089797974,
      "learning_rate": 0.0005846283186871771,
      "loss": 3.1861,
      "step": 23579
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.180068254470825,
      "learning_rate": 0.0005846270260660707,
      "loss": 2.8861,
      "step": 23580
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5465532541275024,
      "learning_rate": 0.0005846257333920468,
      "loss": 3.0087,
      "step": 23581
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.0032060146331787,
      "learning_rate": 0.0005846244406651056,
      "loss": 2.839,
      "step": 23582
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.802121162414551,
      "learning_rate": 0.0005846231478852472,
      "loss": 3.0159,
      "step": 23583
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4227508306503296,
      "learning_rate": 0.0005846218550524721,
      "loss": 3.1126,
      "step": 23584
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8295093774795532,
      "learning_rate": 0.0005846205621667804,
      "loss": 2.8861,
      "step": 23585
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3285777568817139,
      "learning_rate": 0.0005846192692281723,
      "loss": 3.3285,
      "step": 23586
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5088876485824585,
      "learning_rate": 0.0005846179762366482,
      "loss": 3.2185,
      "step": 23587
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.2652831077575684,
      "learning_rate": 0.0005846166831922083,
      "loss": 3.1155,
      "step": 23588
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.4207310676574707,
      "learning_rate": 0.0005846153900948527,
      "loss": 3.0365,
      "step": 23589
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.184814214706421,
      "learning_rate": 0.0005846140969445817,
      "loss": 3.0775,
      "step": 23590
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.875908613204956,
      "learning_rate": 0.0005846128037413956,
      "loss": 3.1561,
      "step": 23591
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8239998817443848,
      "learning_rate": 0.0005846115104852946,
      "loss": 3.2448,
      "step": 23592
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.0511462688446045,
      "learning_rate": 0.000584610217176279,
      "loss": 2.9138,
      "step": 23593
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8038034439086914,
      "learning_rate": 0.0005846089238143489,
      "loss": 3.0446,
      "step": 23594
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5171958208084106,
      "learning_rate": 0.0005846076303995047,
      "loss": 3.2121,
      "step": 23595
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.7599711418151855,
      "learning_rate": 0.0005846063369317465,
      "loss": 2.843,
      "step": 23596
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.861524820327759,
      "learning_rate": 0.0005846050434110747,
      "loss": 3.0759,
      "step": 23597
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.559158205986023,
      "learning_rate": 0.0005846037498374894,
      "loss": 3.2464,
      "step": 23598
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7036385536193848,
      "learning_rate": 0.0005846024562109909,
      "loss": 3.1825,
      "step": 23599
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9149531126022339,
      "learning_rate": 0.0005846011625315794,
      "loss": 3.1711,
      "step": 23600
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2947781085968018,
      "learning_rate": 0.0005845998687992552,
      "loss": 2.9583,
      "step": 23601
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4843050241470337,
      "learning_rate": 0.0005845985750140185,
      "loss": 2.7807,
      "step": 23602
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5299997329711914,
      "learning_rate": 0.0005845972811758695,
      "loss": 3.0968,
      "step": 23603
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3403030633926392,
      "learning_rate": 0.0005845959872848086,
      "loss": 3.4037,
      "step": 23604
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5568183660507202,
      "learning_rate": 0.0005845946933408359,
      "loss": 2.9332,
      "step": 23605
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4646414518356323,
      "learning_rate": 0.0005845933993439517,
      "loss": 3.1785,
      "step": 23606
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.1992186307907104,
      "learning_rate": 0.0005845921052941561,
      "loss": 2.9532,
      "step": 23607
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4592787027359009,
      "learning_rate": 0.0005845908111914496,
      "loss": 3.3761,
      "step": 23608
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5575631856918335,
      "learning_rate": 0.0005845895170358322,
      "loss": 3.2214,
      "step": 23609
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.63091242313385,
      "learning_rate": 0.0005845882228273043,
      "loss": 2.9109,
      "step": 23610
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4369722604751587,
      "learning_rate": 0.000584586928565866,
      "loss": 2.8906,
      "step": 23611
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6476008892059326,
      "learning_rate": 0.0005845856342515176,
      "loss": 3.1993,
      "step": 23612
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4865999221801758,
      "learning_rate": 0.0005845843398842595,
      "loss": 3.2585,
      "step": 23613
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5510380268096924,
      "learning_rate": 0.0005845830454640916,
      "loss": 3.1134,
      "step": 23614
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5256026983261108,
      "learning_rate": 0.0005845817509910145,
      "loss": 3.1707,
      "step": 23615
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5708109140396118,
      "learning_rate": 0.0005845804564650283,
      "loss": 3.0236,
      "step": 23616
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6035487651824951,
      "learning_rate": 0.0005845791618861332,
      "loss": 2.9456,
      "step": 23617
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2292488813400269,
      "learning_rate": 0.0005845778672543294,
      "loss": 2.9654,
      "step": 23618
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.0511093139648438,
      "learning_rate": 0.0005845765725696172,
      "loss": 3.1274,
      "step": 23619
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5157043933868408,
      "learning_rate": 0.0005845752778319969,
      "loss": 3.0117,
      "step": 23620
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7247214317321777,
      "learning_rate": 0.0005845739830414687,
      "loss": 3.1996,
      "step": 23621
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.08083176612854,
      "learning_rate": 0.0005845726881980327,
      "loss": 3.1089,
      "step": 23622
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3990634679794312,
      "learning_rate": 0.0005845713933016894,
      "loss": 3.1918,
      "step": 23623
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3147083520889282,
      "learning_rate": 0.0005845700983524388,
      "loss": 2.9576,
      "step": 23624
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.610284447669983,
      "learning_rate": 0.0005845688033502813,
      "loss": 2.9663,
      "step": 23625
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.953338384628296,
      "learning_rate": 0.000584567508295217,
      "loss": 2.9659,
      "step": 23626
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6927659511566162,
      "learning_rate": 0.0005845662131872463,
      "loss": 3.1658,
      "step": 23627
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.490173101425171,
      "learning_rate": 0.0005845649180263694,
      "loss": 3.0586,
      "step": 23628
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2630852460861206,
      "learning_rate": 0.0005845636228125866,
      "loss": 3.0839,
      "step": 23629
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2594735622406006,
      "learning_rate": 0.0005845623275458979,
      "loss": 2.8275,
      "step": 23630
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5527981519699097,
      "learning_rate": 0.0005845610322263037,
      "loss": 3.4062,
      "step": 23631
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3287116289138794,
      "learning_rate": 0.0005845597368538043,
      "loss": 3.2702,
      "step": 23632
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5230712890625,
      "learning_rate": 0.0005845584414283999,
      "loss": 3.2858,
      "step": 23633
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4055067300796509,
      "learning_rate": 0.0005845571459500905,
      "loss": 3.3201,
      "step": 23634
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.393822193145752,
      "learning_rate": 0.0005845558504188768,
      "loss": 2.9698,
      "step": 23635
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7265264987945557,
      "learning_rate": 0.0005845545548347587,
      "loss": 3.1775,
      "step": 23636
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.68324875831604,
      "learning_rate": 0.0005845532591977365,
      "loss": 2.9703,
      "step": 23637
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.802496075630188,
      "learning_rate": 0.0005845519635078106,
      "loss": 2.8803,
      "step": 23638
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5960842370986938,
      "learning_rate": 0.000584550667764981,
      "loss": 3.1614,
      "step": 23639
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8646718263626099,
      "learning_rate": 0.0005845493719692481,
      "loss": 2.8812,
      "step": 23640
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.2747106552124023,
      "learning_rate": 0.0005845480761206121,
      "loss": 2.9843,
      "step": 23641
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4289301633834839,
      "learning_rate": 0.0005845467802190733,
      "loss": 3.2484,
      "step": 23642
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.8682680130004883,
      "learning_rate": 0.0005845454842646318,
      "loss": 3.0884,
      "step": 23643
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.059464931488037,
      "learning_rate": 0.000584544188257288,
      "loss": 3.0966,
      "step": 23644
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.441601276397705,
      "learning_rate": 0.0005845428921970419,
      "loss": 2.8838,
      "step": 23645
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6106094121932983,
      "learning_rate": 0.000584541596083894,
      "loss": 3.2535,
      "step": 23646
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5425251722335815,
      "learning_rate": 0.0005845402999178445,
      "loss": 3.1745,
      "step": 23647
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.652506947517395,
      "learning_rate": 0.0005845390036988935,
      "loss": 3.1482,
      "step": 23648
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2712650299072266,
      "learning_rate": 0.0005845377074270414,
      "loss": 3.4451,
      "step": 23649
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.2856523990631104,
      "learning_rate": 0.0005845364111022884,
      "loss": 2.6608,
      "step": 23650
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3680020570755005,
      "learning_rate": 0.0005845351147246346,
      "loss": 3.1338,
      "step": 23651
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9674837589263916,
      "learning_rate": 0.0005845338182940804,
      "loss": 2.9591,
      "step": 23652
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.60097336769104,
      "learning_rate": 0.0005845325218106261,
      "loss": 3.5014,
      "step": 23653
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4759503602981567,
      "learning_rate": 0.0005845312252742717,
      "loss": 3.1839,
      "step": 23654
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6779835224151611,
      "learning_rate": 0.0005845299286850175,
      "loss": 3.08,
      "step": 23655
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9857358932495117,
      "learning_rate": 0.000584528632042864,
      "loss": 3.0792,
      "step": 23656
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.885199785232544,
      "learning_rate": 0.0005845273353478112,
      "loss": 2.9656,
      "step": 23657
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2780733108520508,
      "learning_rate": 0.0005845260385998595,
      "loss": 3.1531,
      "step": 23658
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.325249433517456,
      "learning_rate": 0.0005845247417990087,
      "loss": 3.155,
      "step": 23659
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.637892723083496,
      "learning_rate": 0.0005845234449452597,
      "loss": 2.9779,
      "step": 23660
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.187089204788208,
      "learning_rate": 0.0005845221480386123,
      "loss": 2.9491,
      "step": 23661
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2733979225158691,
      "learning_rate": 0.0005845208510790669,
      "loss": 3.1909,
      "step": 23662
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.848436951637268,
      "learning_rate": 0.0005845195540666237,
      "loss": 3.1131,
      "step": 23663
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.3930583000183105,
      "learning_rate": 0.0005845182570012828,
      "loss": 3.4034,
      "step": 23664
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2358397245407104,
      "learning_rate": 0.0005845169598830448,
      "loss": 3.1321,
      "step": 23665
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5717681646347046,
      "learning_rate": 0.0005845156627119096,
      "loss": 3.2539,
      "step": 23666
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.279355764389038,
      "learning_rate": 0.0005845143654878775,
      "loss": 3.1943,
      "step": 23667
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.4900684356689453,
      "learning_rate": 0.000584513068210949,
      "loss": 3.0621,
      "step": 23668
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.447914481163025,
      "learning_rate": 0.0005845117708811241,
      "loss": 3.1983,
      "step": 23669
    },
    {
      "epoch": 0.31,
      "grad_norm": 4.281434535980225,
      "learning_rate": 0.000584510473498403,
      "loss": 2.9984,
      "step": 23670
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.1018877029418945,
      "learning_rate": 0.0005845091760627862,
      "loss": 3.2524,
      "step": 23671
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7372173070907593,
      "learning_rate": 0.0005845078785742736,
      "loss": 3.0826,
      "step": 23672
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.218085527420044,
      "learning_rate": 0.0005845065810328656,
      "loss": 3.0712,
      "step": 23673
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.7483842372894287,
      "learning_rate": 0.0005845052834385626,
      "loss": 3.0753,
      "step": 23674
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.0345888137817383,
      "learning_rate": 0.0005845039857913647,
      "loss": 2.8528,
      "step": 23675
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.977393627166748,
      "learning_rate": 0.000584502688091272,
      "loss": 3.0159,
      "step": 23676
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.2592389583587646,
      "learning_rate": 0.000584501390338285,
      "loss": 2.8759,
      "step": 23677
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.309170961380005,
      "learning_rate": 0.0005845000925324038,
      "loss": 3.0865,
      "step": 23678
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5174474716186523,
      "learning_rate": 0.0005844987946736286,
      "loss": 3.172,
      "step": 23679
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.250319004058838,
      "learning_rate": 0.0005844974967619597,
      "loss": 3.0853,
      "step": 23680
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.775978446006775,
      "learning_rate": 0.0005844961987973974,
      "loss": 2.8317,
      "step": 23681
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6341036558151245,
      "learning_rate": 0.0005844949007799418,
      "loss": 3.2026,
      "step": 23682
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2672231197357178,
      "learning_rate": 0.0005844936027095933,
      "loss": 3.2456,
      "step": 23683
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3151154518127441,
      "learning_rate": 0.0005844923045863521,
      "loss": 2.9794,
      "step": 23684
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6118559837341309,
      "learning_rate": 0.0005844910064102183,
      "loss": 2.9865,
      "step": 23685
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3634095191955566,
      "learning_rate": 0.0005844897081811924,
      "loss": 3.3085,
      "step": 23686
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6632331609725952,
      "learning_rate": 0.0005844884098992743,
      "loss": 3.0831,
      "step": 23687
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6561380624771118,
      "learning_rate": 0.0005844871115644647,
      "loss": 3.0835,
      "step": 23688
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3660589456558228,
      "learning_rate": 0.0005844858131767634,
      "loss": 3.247,
      "step": 23689
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7243576049804688,
      "learning_rate": 0.0005844845147361708,
      "loss": 3.0418,
      "step": 23690
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.038831949234009,
      "learning_rate": 0.0005844832162426873,
      "loss": 3.2016,
      "step": 23691
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.560518503189087,
      "learning_rate": 0.0005844819176963128,
      "loss": 3.0213,
      "step": 23692
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.55579674243927,
      "learning_rate": 0.0005844806190970478,
      "loss": 3.2152,
      "step": 23693
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6830615997314453,
      "learning_rate": 0.0005844793204448926,
      "loss": 3.2045,
      "step": 23694
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.584973931312561,
      "learning_rate": 0.0005844780217398473,
      "loss": 3.1406,
      "step": 23695
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5682390928268433,
      "learning_rate": 0.0005844767229819121,
      "loss": 2.9728,
      "step": 23696
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4562900066375732,
      "learning_rate": 0.0005844754241710872,
      "loss": 3.0742,
      "step": 23697
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2615844011306763,
      "learning_rate": 0.0005844741253073732,
      "loss": 2.8871,
      "step": 23698
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.124202013015747,
      "learning_rate": 0.00058447282639077,
      "loss": 3.3075,
      "step": 23699
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.708448052406311,
      "learning_rate": 0.0005844715274212778,
      "loss": 3.1749,
      "step": 23700
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.698240876197815,
      "learning_rate": 0.0005844702283988971,
      "loss": 3.114,
      "step": 23701
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.685386300086975,
      "learning_rate": 0.000584468929323628,
      "loss": 3.0218,
      "step": 23702
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6065912246704102,
      "learning_rate": 0.0005844676301954708,
      "loss": 3.4729,
      "step": 23703
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3486659526824951,
      "learning_rate": 0.0005844663310144256,
      "loss": 3.1014,
      "step": 23704
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.420112133026123,
      "learning_rate": 0.0005844650317804928,
      "loss": 2.9815,
      "step": 23705
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4558143615722656,
      "learning_rate": 0.0005844637324936725,
      "loss": 3.1818,
      "step": 23706
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.610515832901001,
      "learning_rate": 0.0005844624331539651,
      "loss": 3.0335,
      "step": 23707
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5419522523880005,
      "learning_rate": 0.0005844611337613707,
      "loss": 2.9918,
      "step": 23708
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6130986213684082,
      "learning_rate": 0.0005844598343158897,
      "loss": 3.1809,
      "step": 23709
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4456539154052734,
      "learning_rate": 0.0005844585348175221,
      "loss": 3.1633,
      "step": 23710
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4617934226989746,
      "learning_rate": 0.0005844572352662685,
      "loss": 2.9024,
      "step": 23711
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.433075428009033,
      "learning_rate": 0.0005844559356621287,
      "loss": 2.9969,
      "step": 23712
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3819893598556519,
      "learning_rate": 0.0005844546360051034,
      "loss": 3.126,
      "step": 23713
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8340452909469604,
      "learning_rate": 0.0005844533362951925,
      "loss": 2.7587,
      "step": 23714
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9021308422088623,
      "learning_rate": 0.0005844520365323963,
      "loss": 3.1267,
      "step": 23715
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4561374187469482,
      "learning_rate": 0.0005844507367167152,
      "loss": 3.1171,
      "step": 23716
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9434698820114136,
      "learning_rate": 0.0005844494368481492,
      "loss": 3.3185,
      "step": 23717
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3265860080718994,
      "learning_rate": 0.0005844481369266989,
      "loss": 3.1109,
      "step": 23718
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9810690879821777,
      "learning_rate": 0.0005844468369523642,
      "loss": 2.9997,
      "step": 23719
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.521531581878662,
      "learning_rate": 0.0005844455369251454,
      "loss": 2.9508,
      "step": 23720
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.7942421436309814,
      "learning_rate": 0.0005844442368450429,
      "loss": 3.0034,
      "step": 23721
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8970115184783936,
      "learning_rate": 0.0005844429367120568,
      "loss": 2.9723,
      "step": 23722
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6980334520339966,
      "learning_rate": 0.0005844416365261874,
      "loss": 3.2107,
      "step": 23723
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.4721341133117676,
      "learning_rate": 0.000584440336287435,
      "loss": 3.0214,
      "step": 23724
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.087392807006836,
      "learning_rate": 0.0005844390359957996,
      "loss": 3.2258,
      "step": 23725
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.4968063831329346,
      "learning_rate": 0.0005844377356512817,
      "loss": 3.0573,
      "step": 23726
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3941479921340942,
      "learning_rate": 0.0005844364352538815,
      "loss": 3.2209,
      "step": 23727
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3685741424560547,
      "learning_rate": 0.0005844351348035992,
      "loss": 3.0408,
      "step": 23728
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.733388662338257,
      "learning_rate": 0.000584433834300435,
      "loss": 3.02,
      "step": 23729
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.9897708892822266,
      "learning_rate": 0.0005844325337443893,
      "loss": 3.0062,
      "step": 23730
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2263580560684204,
      "learning_rate": 0.0005844312331354621,
      "loss": 3.0888,
      "step": 23731
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.096025228500366,
      "learning_rate": 0.0005844299324736537,
      "loss": 3.3302,
      "step": 23732
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.0043368339538574,
      "learning_rate": 0.0005844286317589645,
      "loss": 2.9946,
      "step": 23733
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.307901382446289,
      "learning_rate": 0.0005844273309913946,
      "loss": 3.0238,
      "step": 23734
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.59714674949646,
      "learning_rate": 0.0005844260301709443,
      "loss": 3.0843,
      "step": 23735
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.2631092071533203,
      "learning_rate": 0.0005844247292976139,
      "loss": 2.9699,
      "step": 23736
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6573892831802368,
      "learning_rate": 0.0005844234283714035,
      "loss": 2.948,
      "step": 23737
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2708266973495483,
      "learning_rate": 0.0005844221273923134,
      "loss": 3.2616,
      "step": 23738
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6235424280166626,
      "learning_rate": 0.000584420826360344,
      "loss": 3.1257,
      "step": 23739
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8308794498443604,
      "learning_rate": 0.0005844195252754953,
      "loss": 2.8904,
      "step": 23740
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.541381597518921,
      "learning_rate": 0.0005844182241377676,
      "loss": 3.2425,
      "step": 23741
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.0107626914978027,
      "learning_rate": 0.0005844169229471612,
      "loss": 2.9726,
      "step": 23742
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9158848524093628,
      "learning_rate": 0.0005844156217036764,
      "loss": 3.3497,
      "step": 23743
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6909228563308716,
      "learning_rate": 0.0005844143204073133,
      "loss": 3.0522,
      "step": 23744
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6122832298278809,
      "learning_rate": 0.0005844130190580722,
      "loss": 3.3213,
      "step": 23745
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3902922868728638,
      "learning_rate": 0.0005844117176559534,
      "loss": 3.2032,
      "step": 23746
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.154082775115967,
      "learning_rate": 0.0005844104162009571,
      "loss": 3.149,
      "step": 23747
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7895177602767944,
      "learning_rate": 0.0005844091146930835,
      "loss": 3.118,
      "step": 23748
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5011011362075806,
      "learning_rate": 0.000584407813132333,
      "loss": 3.2522,
      "step": 23749
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.586452007293701,
      "learning_rate": 0.0005844065115187055,
      "loss": 2.9638,
      "step": 23750
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.4200057983398438,
      "learning_rate": 0.0005844052098522016,
      "loss": 3.0814,
      "step": 23751
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4787436723709106,
      "learning_rate": 0.0005844039081328213,
      "loss": 3.0317,
      "step": 23752
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4502147436141968,
      "learning_rate": 0.0005844026063605651,
      "loss": 3.393,
      "step": 23753
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.338313341140747,
      "learning_rate": 0.0005844013045354329,
      "loss": 3.0849,
      "step": 23754
    },
    {
      "epoch": 0.31,
      "grad_norm": 4.668621063232422,
      "learning_rate": 0.0005844000026574253,
      "loss": 3.066,
      "step": 23755
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4027572870254517,
      "learning_rate": 0.0005843987007265423,
      "loss": 2.8267,
      "step": 23756
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5892481803894043,
      "learning_rate": 0.0005843973987427842,
      "loss": 3.0939,
      "step": 23757
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6679412126541138,
      "learning_rate": 0.0005843960967061513,
      "loss": 3.2833,
      "step": 23758
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1860616207122803,
      "learning_rate": 0.0005843947946166437,
      "loss": 2.943,
      "step": 23759
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4390614032745361,
      "learning_rate": 0.0005843934924742618,
      "loss": 3.1123,
      "step": 23760
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1200449466705322,
      "learning_rate": 0.0005843921902790058,
      "loss": 3.3633,
      "step": 23761
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3978965282440186,
      "learning_rate": 0.0005843908880308759,
      "loss": 2.8658,
      "step": 23762
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9537241458892822,
      "learning_rate": 0.0005843895857298724,
      "loss": 2.9473,
      "step": 23763
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7707916498184204,
      "learning_rate": 0.0005843882833759954,
      "loss": 2.9724,
      "step": 23764
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.3756935596466064,
      "learning_rate": 0.0005843869809692454,
      "loss": 2.8225,
      "step": 23765
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8462005853652954,
      "learning_rate": 0.0005843856785096225,
      "loss": 2.9732,
      "step": 23766
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6089566946029663,
      "learning_rate": 0.0005843843759971268,
      "loss": 3.0636,
      "step": 23767
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.647374391555786,
      "learning_rate": 0.0005843830734317588,
      "loss": 3.3011,
      "step": 23768
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.42714524269104,
      "learning_rate": 0.0005843817708135185,
      "loss": 3.0715,
      "step": 23769
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.2071890830993652,
      "learning_rate": 0.0005843804681424063,
      "loss": 2.8454,
      "step": 23770
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3564982414245605,
      "learning_rate": 0.0005843791654184224,
      "loss": 2.891,
      "step": 23771
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.559968113899231,
      "learning_rate": 0.000584377862641567,
      "loss": 2.9549,
      "step": 23772
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7691717147827148,
      "learning_rate": 0.0005843765598118405,
      "loss": 3.2757,
      "step": 23773
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2258782386779785,
      "learning_rate": 0.000584375256929243,
      "loss": 3.0918,
      "step": 23774
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6085830926895142,
      "learning_rate": 0.0005843739539937747,
      "loss": 3.0057,
      "step": 23775
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6777180433273315,
      "learning_rate": 0.000584372651005436,
      "loss": 3.4764,
      "step": 23776
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5343561172485352,
      "learning_rate": 0.0005843713479642271,
      "loss": 3.2221,
      "step": 23777
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4829562902450562,
      "learning_rate": 0.000584370044870148,
      "loss": 3.0355,
      "step": 23778
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1952953338623047,
      "learning_rate": 0.0005843687417231993,
      "loss": 2.9753,
      "step": 23779
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.572411298751831,
      "learning_rate": 0.000584367438523381,
      "loss": 2.9552,
      "step": 23780
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5686081647872925,
      "learning_rate": 0.0005843661352706935,
      "loss": 3.1089,
      "step": 23781
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.0577614307403564,
      "learning_rate": 0.0005843648319651371,
      "loss": 2.9193,
      "step": 23782
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3315489292144775,
      "learning_rate": 0.0005843635286067115,
      "loss": 2.9561,
      "step": 23783
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.002534866333008,
      "learning_rate": 0.0005843622251954177,
      "loss": 3.0017,
      "step": 23784
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4784575700759888,
      "learning_rate": 0.0005843609217312556,
      "loss": 2.9583,
      "step": 23785
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.5493321418762207,
      "learning_rate": 0.0005843596182142253,
      "loss": 3.1061,
      "step": 23786
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.6836373805999756,
      "learning_rate": 0.0005843583146443272,
      "loss": 3.0572,
      "step": 23787
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.6804752349853516,
      "learning_rate": 0.0005843570110215615,
      "loss": 3.2612,
      "step": 23788
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4068989753723145,
      "learning_rate": 0.0005843557073459285,
      "loss": 3.0251,
      "step": 23789
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.356816291809082,
      "learning_rate": 0.0005843544036174284,
      "loss": 3.0538,
      "step": 23790
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7193639278411865,
      "learning_rate": 0.0005843530998360614,
      "loss": 2.9989,
      "step": 23791
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5134674310684204,
      "learning_rate": 0.0005843517960018279,
      "loss": 3.0642,
      "step": 23792
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2896785736083984,
      "learning_rate": 0.000584350492114728,
      "loss": 2.9505,
      "step": 23793
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.253594994544983,
      "learning_rate": 0.0005843491881747619,
      "loss": 3.1342,
      "step": 23794
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.225187063217163,
      "learning_rate": 0.00058434788418193,
      "loss": 2.9053,
      "step": 23795
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.7506439685821533,
      "learning_rate": 0.0005843465801362324,
      "loss": 3.071,
      "step": 23796
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.41789972782135,
      "learning_rate": 0.0005843452760376694,
      "loss": 3.058,
      "step": 23797
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.291559934616089,
      "learning_rate": 0.0005843439718862413,
      "loss": 3.1087,
      "step": 23798
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.791097402572632,
      "learning_rate": 0.0005843426676819482,
      "loss": 2.8035,
      "step": 23799
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6777158975601196,
      "learning_rate": 0.0005843413634247906,
      "loss": 3.1833,
      "step": 23800
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8414913415908813,
      "learning_rate": 0.0005843400591147685,
      "loss": 2.8848,
      "step": 23801
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.5837998390197754,
      "learning_rate": 0.0005843387547518821,
      "loss": 2.9753,
      "step": 23802
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.594481945037842,
      "learning_rate": 0.0005843374503361319,
      "loss": 3.2352,
      "step": 23803
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4551639556884766,
      "learning_rate": 0.0005843361458675179,
      "loss": 3.0048,
      "step": 23804
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4894225597381592,
      "learning_rate": 0.0005843348413460405,
      "loss": 2.8671,
      "step": 23805
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5144338607788086,
      "learning_rate": 0.0005843335367716999,
      "loss": 3.0604,
      "step": 23806
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.565917730331421,
      "learning_rate": 0.0005843322321444963,
      "loss": 3.0125,
      "step": 23807
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.360231399536133,
      "learning_rate": 0.0005843309274644299,
      "loss": 3.1415,
      "step": 23808
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.587059736251831,
      "learning_rate": 0.0005843296227315012,
      "loss": 3.0385,
      "step": 23809
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4463660717010498,
      "learning_rate": 0.0005843283179457101,
      "loss": 3.0713,
      "step": 23810
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.102538585662842,
      "learning_rate": 0.000584327013107057,
      "loss": 2.9203,
      "step": 23811
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8086837530136108,
      "learning_rate": 0.0005843257082155422,
      "loss": 3.2026,
      "step": 23812
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3558604717254639,
      "learning_rate": 0.0005843244032711659,
      "loss": 3.0723,
      "step": 23813
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8149579763412476,
      "learning_rate": 0.0005843230982739283,
      "loss": 2.7696,
      "step": 23814
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7909902334213257,
      "learning_rate": 0.0005843217932238297,
      "loss": 3.1409,
      "step": 23815
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3154608011245728,
      "learning_rate": 0.0005843204881208702,
      "loss": 2.9655,
      "step": 23816
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3488249778747559,
      "learning_rate": 0.0005843191829650503,
      "loss": 2.8661,
      "step": 23817
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.86342453956604,
      "learning_rate": 0.00058431787775637,
      "loss": 2.9838,
      "step": 23818
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4794847965240479,
      "learning_rate": 0.0005843165724948296,
      "loss": 3.0524,
      "step": 23819
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.862945795059204,
      "learning_rate": 0.0005843152671804294,
      "loss": 3.1386,
      "step": 23820
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3998651504516602,
      "learning_rate": 0.0005843139618131697,
      "loss": 3.1938,
      "step": 23821
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.818121075630188,
      "learning_rate": 0.0005843126563930507,
      "loss": 2.9546,
      "step": 23822
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.323806643486023,
      "learning_rate": 0.0005843113509200725,
      "loss": 3.0033,
      "step": 23823
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7578206062316895,
      "learning_rate": 0.0005843100453942356,
      "loss": 3.0947,
      "step": 23824
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2692142724990845,
      "learning_rate": 0.00058430873981554,
      "loss": 3.1433,
      "step": 23825
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9168014526367188,
      "learning_rate": 0.0005843074341839859,
      "loss": 2.9371,
      "step": 23826
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.298585295677185,
      "learning_rate": 0.0005843061284995739,
      "loss": 3.2551,
      "step": 23827
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6043965816497803,
      "learning_rate": 0.000584304822762304,
      "loss": 3.3129,
      "step": 23828
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.2778637409210205,
      "learning_rate": 0.0005843035169721763,
      "loss": 3.2132,
      "step": 23829
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1995296478271484,
      "learning_rate": 0.0005843022111291914,
      "loss": 2.9879,
      "step": 23830
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9920848608016968,
      "learning_rate": 0.0005843009052333493,
      "loss": 3.1079,
      "step": 23831
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.051610231399536,
      "learning_rate": 0.0005842995992846503,
      "loss": 3.0801,
      "step": 23832
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.848483681678772,
      "learning_rate": 0.0005842982932830946,
      "loss": 2.8093,
      "step": 23833
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6227824687957764,
      "learning_rate": 0.0005842969872286824,
      "loss": 2.7527,
      "step": 23834
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.6811647415161133,
      "learning_rate": 0.0005842956811214141,
      "loss": 3.0568,
      "step": 23835
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1432130336761475,
      "learning_rate": 0.00058429437496129,
      "loss": 3.212,
      "step": 23836
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5342189073562622,
      "learning_rate": 0.00058429306874831,
      "loss": 3.1768,
      "step": 23837
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4098421335220337,
      "learning_rate": 0.0005842917624824746,
      "loss": 3.3866,
      "step": 23838
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.9156136512756348,
      "learning_rate": 0.000584290456163784,
      "loss": 2.8387,
      "step": 23839
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.544628620147705,
      "learning_rate": 0.0005842891497922385,
      "loss": 3.0433,
      "step": 23840
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9505960941314697,
      "learning_rate": 0.0005842878433678382,
      "loss": 3.0902,
      "step": 23841
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7996938228607178,
      "learning_rate": 0.0005842865368905835,
      "loss": 3.164,
      "step": 23842
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.3060288429260254,
      "learning_rate": 0.0005842852303604745,
      "loss": 3.3178,
      "step": 23843
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.6372344493865967,
      "learning_rate": 0.0005842839237775116,
      "loss": 2.9585,
      "step": 23844
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8835256099700928,
      "learning_rate": 0.0005842826171416949,
      "loss": 3.1829,
      "step": 23845
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.628279685974121,
      "learning_rate": 0.0005842813104530247,
      "loss": 3.0749,
      "step": 23846
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.588569164276123,
      "learning_rate": 0.0005842800037115012,
      "loss": 2.9991,
      "step": 23847
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9048676490783691,
      "learning_rate": 0.0005842786969171247,
      "loss": 3.2405,
      "step": 23848
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8053832054138184,
      "learning_rate": 0.0005842773900698956,
      "loss": 3.1507,
      "step": 23849
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8851243257522583,
      "learning_rate": 0.0005842760831698138,
      "loss": 3.2383,
      "step": 23850
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9382197856903076,
      "learning_rate": 0.0005842747762168796,
      "loss": 3.1214,
      "step": 23851
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.240724563598633,
      "learning_rate": 0.0005842734692110934,
      "loss": 3.2223,
      "step": 23852
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4680691957473755,
      "learning_rate": 0.0005842721621524556,
      "loss": 2.9598,
      "step": 23853
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.401243805885315,
      "learning_rate": 0.000584270855040966,
      "loss": 3.074,
      "step": 23854
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6365952491760254,
      "learning_rate": 0.0005842695478766253,
      "loss": 2.9809,
      "step": 23855
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.2065589427948,
      "learning_rate": 0.0005842682406594333,
      "loss": 3.2123,
      "step": 23856
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.10099458694458,
      "learning_rate": 0.0005842669333893907,
      "loss": 2.8546,
      "step": 23857
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7654168605804443,
      "learning_rate": 0.0005842656260664974,
      "loss": 3.2776,
      "step": 23858
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.7966878414154053,
      "learning_rate": 0.0005842643186907537,
      "loss": 3.2241,
      "step": 23859
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.649686336517334,
      "learning_rate": 0.00058426301126216,
      "loss": 2.9668,
      "step": 23860
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.58436918258667,
      "learning_rate": 0.0005842617037807163,
      "loss": 3.133,
      "step": 23861
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.2066004276275635,
      "learning_rate": 0.0005842603962464231,
      "loss": 3.1911,
      "step": 23862
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.538932204246521,
      "learning_rate": 0.0005842590886592805,
      "loss": 3.0854,
      "step": 23863
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1257688999176025,
      "learning_rate": 0.0005842577810192888,
      "loss": 3.0734,
      "step": 23864
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7451344728469849,
      "learning_rate": 0.0005842564733264481,
      "loss": 3.0955,
      "step": 23865
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9337998628616333,
      "learning_rate": 0.0005842551655807588,
      "loss": 3.2195,
      "step": 23866
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2828114032745361,
      "learning_rate": 0.0005842538577822212,
      "loss": 3.0297,
      "step": 23867
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4965327978134155,
      "learning_rate": 0.0005842525499308354,
      "loss": 3.0878,
      "step": 23868
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5119317770004272,
      "learning_rate": 0.0005842512420266016,
      "loss": 2.9943,
      "step": 23869
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3884053230285645,
      "learning_rate": 0.0005842499340695202,
      "loss": 2.9694,
      "step": 23870
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.473203182220459,
      "learning_rate": 0.0005842486260595914,
      "loss": 3.0413,
      "step": 23871
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.597683072090149,
      "learning_rate": 0.0005842473179968153,
      "loss": 3.0794,
      "step": 23872
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3215057849884033,
      "learning_rate": 0.0005842460098811924,
      "loss": 2.9119,
      "step": 23873
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5681711435317993,
      "learning_rate": 0.0005842447017127227,
      "loss": 3.1039,
      "step": 23874
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.629407286643982,
      "learning_rate": 0.0005842433934914066,
      "loss": 3.2819,
      "step": 23875
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.654690146446228,
      "learning_rate": 0.0005842420852172442,
      "loss": 2.9559,
      "step": 23876
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4262503385543823,
      "learning_rate": 0.0005842407768902359,
      "loss": 3.306,
      "step": 23877
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3001062870025635,
      "learning_rate": 0.0005842394685103819,
      "loss": 3.1114,
      "step": 23878
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4269412755966187,
      "learning_rate": 0.0005842381600776824,
      "loss": 2.8805,
      "step": 23879
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5414438247680664,
      "learning_rate": 0.0005842368515921376,
      "loss": 3.0711,
      "step": 23880
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.258291482925415,
      "learning_rate": 0.0005842355430537479,
      "loss": 2.8843,
      "step": 23881
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5798044204711914,
      "learning_rate": 0.0005842342344625134,
      "loss": 3.2335,
      "step": 23882
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.536777138710022,
      "learning_rate": 0.0005842329258184344,
      "loss": 3.1149,
      "step": 23883
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.906038522720337,
      "learning_rate": 0.0005842316171215112,
      "loss": 3.0812,
      "step": 23884
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5542616844177246,
      "learning_rate": 0.0005842303083717438,
      "loss": 3.1416,
      "step": 23885
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.146937370300293,
      "learning_rate": 0.0005842289995691327,
      "loss": 3.1045,
      "step": 23886
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6262716054916382,
      "learning_rate": 0.0005842276907136781,
      "loss": 3.0719,
      "step": 23887
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.349878191947937,
      "learning_rate": 0.0005842263818053801,
      "loss": 3.0597,
      "step": 23888
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4926385879516602,
      "learning_rate": 0.0005842250728442393,
      "loss": 3.0907,
      "step": 23889
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.2005882263183594,
      "learning_rate": 0.0005842237638302554,
      "loss": 2.967,
      "step": 23890
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.490604281425476,
      "learning_rate": 0.0005842224547634292,
      "loss": 3.0496,
      "step": 23891
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3481032848358154,
      "learning_rate": 0.0005842211456437605,
      "loss": 3.1711,
      "step": 23892
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.71173894405365,
      "learning_rate": 0.0005842198364712498,
      "loss": 3.1396,
      "step": 23893
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.429478645324707,
      "learning_rate": 0.0005842185272458972,
      "loss": 3.0392,
      "step": 23894
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5131639242172241,
      "learning_rate": 0.0005842172179677031,
      "loss": 3.3655,
      "step": 23895
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2433662414550781,
      "learning_rate": 0.0005842159086366676,
      "loss": 3.1595,
      "step": 23896
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3292603492736816,
      "learning_rate": 0.0005842145992527909,
      "loss": 3.1855,
      "step": 23897
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.0925588607788086,
      "learning_rate": 0.0005842132898160735,
      "loss": 3.2529,
      "step": 23898
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7655768394470215,
      "learning_rate": 0.0005842119803265154,
      "loss": 3.0808,
      "step": 23899
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.5598156452178955,
      "learning_rate": 0.000584210670784117,
      "loss": 3.1741,
      "step": 23900
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.519015312194824,
      "learning_rate": 0.0005842093611888784,
      "loss": 3.0552,
      "step": 23901
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4109874963760376,
      "learning_rate": 0.0005842080515408001,
      "loss": 2.8532,
      "step": 23902
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7140411138534546,
      "learning_rate": 0.0005842067418398819,
      "loss": 3.0722,
      "step": 23903
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3639683723449707,
      "learning_rate": 0.0005842054320861244,
      "loss": 2.8339,
      "step": 23904
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8539702892303467,
      "learning_rate": 0.0005842041222795277,
      "loss": 3.0871,
      "step": 23905
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.446954369544983,
      "learning_rate": 0.0005842028124200923,
      "loss": 3.1205,
      "step": 23906
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.104368209838867,
      "learning_rate": 0.000584201502507818,
      "loss": 3.0488,
      "step": 23907
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8746449947357178,
      "learning_rate": 0.0005842001925427053,
      "loss": 3.0063,
      "step": 23908
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.394989013671875,
      "learning_rate": 0.0005841988825247545,
      "loss": 2.981,
      "step": 23909
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.429611086845398,
      "learning_rate": 0.0005841975724539657,
      "loss": 2.9436,
      "step": 23910
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9649851322174072,
      "learning_rate": 0.0005841962623303392,
      "loss": 3.0909,
      "step": 23911
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.558506965637207,
      "learning_rate": 0.0005841949521538753,
      "loss": 3.1904,
      "step": 23912
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.690864086151123,
      "learning_rate": 0.0005841936419245743,
      "loss": 3.0601,
      "step": 23913
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8650845289230347,
      "learning_rate": 0.0005841923316424361,
      "loss": 3.1697,
      "step": 23914
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7802428007125854,
      "learning_rate": 0.0005841910213074614,
      "loss": 3.0876,
      "step": 23915
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.118234872817993,
      "learning_rate": 0.0005841897109196501,
      "loss": 3.1441,
      "step": 23916
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.15018892288208,
      "learning_rate": 0.0005841884004790025,
      "loss": 3.0796,
      "step": 23917
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.890641450881958,
      "learning_rate": 0.000584187089985519,
      "loss": 3.385,
      "step": 23918
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.1794354915618896,
      "learning_rate": 0.0005841857794391998,
      "loss": 3.0536,
      "step": 23919
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3993592262268066,
      "learning_rate": 0.0005841844688400451,
      "loss": 3.0419,
      "step": 23920
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8116165399551392,
      "learning_rate": 0.000584183158188055,
      "loss": 2.9555,
      "step": 23921
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.076754093170166,
      "learning_rate": 0.0005841818474832301,
      "loss": 2.9573,
      "step": 23922
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1064162254333496,
      "learning_rate": 0.0005841805367255704,
      "loss": 3.3594,
      "step": 23923
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8816105127334595,
      "learning_rate": 0.000584179225915076,
      "loss": 3.0509,
      "step": 23924
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.0051939487457275,
      "learning_rate": 0.0005841779150517473,
      "loss": 3.0457,
      "step": 23925
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.4178824424743652,
      "learning_rate": 0.0005841766041355848,
      "loss": 3.1971,
      "step": 23926
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.31732439994812,
      "learning_rate": 0.0005841752931665884,
      "loss": 2.9152,
      "step": 23927
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6706668138504028,
      "learning_rate": 0.0005841739821447583,
      "loss": 3.1331,
      "step": 23928
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9236949682235718,
      "learning_rate": 0.0005841726710700951,
      "loss": 3.1527,
      "step": 23929
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7093596458435059,
      "learning_rate": 0.0005841713599425988,
      "loss": 3.0387,
      "step": 23930
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8045387268066406,
      "learning_rate": 0.0005841700487622696,
      "loss": 2.967,
      "step": 23931
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.94241464138031,
      "learning_rate": 0.0005841687375291079,
      "loss": 3.0311,
      "step": 23932
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2835776805877686,
      "learning_rate": 0.0005841674262431137,
      "loss": 3.1678,
      "step": 23933
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3715837001800537,
      "learning_rate": 0.0005841661149042876,
      "loss": 2.9909,
      "step": 23934
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.313668966293335,
      "learning_rate": 0.0005841648035126297,
      "loss": 2.9378,
      "step": 23935
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7573455572128296,
      "learning_rate": 0.00058416349206814,
      "loss": 2.7932,
      "step": 23936
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7681382894515991,
      "learning_rate": 0.0005841621805708191,
      "loss": 3.2331,
      "step": 23937
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6712186336517334,
      "learning_rate": 0.000584160869020667,
      "loss": 3.1118,
      "step": 23938
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4783873558044434,
      "learning_rate": 0.000584159557417684,
      "loss": 3.3521,
      "step": 23939
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.493527889251709,
      "learning_rate": 0.0005841582457618704,
      "loss": 2.8251,
      "step": 23940
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.649717926979065,
      "learning_rate": 0.0005841569340532265,
      "loss": 2.8976,
      "step": 23941
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6135905981063843,
      "learning_rate": 0.0005841556222917524,
      "loss": 3.2134,
      "step": 23942
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.585426926612854,
      "learning_rate": 0.0005841543104774484,
      "loss": 3.277,
      "step": 23943
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4442824125289917,
      "learning_rate": 0.0005841529986103148,
      "loss": 3.0754,
      "step": 23944
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5352493524551392,
      "learning_rate": 0.0005841516866903517,
      "loss": 3.1312,
      "step": 23945
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2398947477340698,
      "learning_rate": 0.0005841503747175596,
      "loss": 2.9492,
      "step": 23946
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2570971250534058,
      "learning_rate": 0.0005841490626919384,
      "loss": 3.0703,
      "step": 23947
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.772924780845642,
      "learning_rate": 0.0005841477506134886,
      "loss": 3.078,
      "step": 23948
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.43802809715271,
      "learning_rate": 0.0005841464384822104,
      "loss": 3.1428,
      "step": 23949
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.483686089515686,
      "learning_rate": 0.0005841451262981039,
      "loss": 3.0765,
      "step": 23950
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.526963710784912,
      "learning_rate": 0.0005841438140611696,
      "loss": 3.0345,
      "step": 23951
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6326955556869507,
      "learning_rate": 0.0005841425017714076,
      "loss": 3.3645,
      "step": 23952
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1040499210357666,
      "learning_rate": 0.000584141189428818,
      "loss": 2.9576,
      "step": 23953
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5704346895217896,
      "learning_rate": 0.0005841398770334013,
      "loss": 2.9128,
      "step": 23954
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6486984491348267,
      "learning_rate": 0.0005841385645851576,
      "loss": 2.8565,
      "step": 23955
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6901921033859253,
      "learning_rate": 0.0005841372520840872,
      "loss": 2.9699,
      "step": 23956
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4638181924819946,
      "learning_rate": 0.0005841359395301904,
      "loss": 2.8075,
      "step": 23957
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3228118419647217,
      "learning_rate": 0.0005841346269234672,
      "loss": 3.0681,
      "step": 23958
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9994438886642456,
      "learning_rate": 0.0005841333142639181,
      "loss": 3.092,
      "step": 23959
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9007136821746826,
      "learning_rate": 0.0005841320015515432,
      "loss": 3.1213,
      "step": 23960
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5760351419448853,
      "learning_rate": 0.0005841306887863428,
      "loss": 2.9583,
      "step": 23961
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4491084814071655,
      "learning_rate": 0.0005841293759683172,
      "loss": 3.1422,
      "step": 23962
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4435949325561523,
      "learning_rate": 0.0005841280630974667,
      "loss": 3.0747,
      "step": 23963
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9012887477874756,
      "learning_rate": 0.0005841267501737912,
      "loss": 2.9791,
      "step": 23964
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2708607912063599,
      "learning_rate": 0.0005841254371972912,
      "loss": 3.0582,
      "step": 23965
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.023577928543091,
      "learning_rate": 0.000584124124167967,
      "loss": 3.2395,
      "step": 23966
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4929920434951782,
      "learning_rate": 0.0005841228110858188,
      "loss": 2.874,
      "step": 23967
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4748281240463257,
      "learning_rate": 0.0005841214979508467,
      "loss": 2.9031,
      "step": 23968
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.79227614402771,
      "learning_rate": 0.0005841201847630511,
      "loss": 3.0439,
      "step": 23969
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5811851024627686,
      "learning_rate": 0.0005841188715224321,
      "loss": 3.124,
      "step": 23970
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.781095266342163,
      "learning_rate": 0.0005841175582289903,
      "loss": 2.9806,
      "step": 23971
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7153706550598145,
      "learning_rate": 0.0005841162448827255,
      "loss": 3.14,
      "step": 23972
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.486742615699768,
      "learning_rate": 0.0005841149314836381,
      "loss": 3.1965,
      "step": 23973
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5828502178192139,
      "learning_rate": 0.0005841136180317284,
      "loss": 2.9863,
      "step": 23974
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6684763431549072,
      "learning_rate": 0.0005841123045269966,
      "loss": 3.0833,
      "step": 23975
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4296848773956299,
      "learning_rate": 0.0005841109909694431,
      "loss": 3.2661,
      "step": 23976
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3990727663040161,
      "learning_rate": 0.0005841096773590678,
      "loss": 3.1439,
      "step": 23977
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.178809642791748,
      "learning_rate": 0.0005841083636958713,
      "loss": 3.3106,
      "step": 23978
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.391628384590149,
      "learning_rate": 0.0005841070499798536,
      "loss": 3.3065,
      "step": 23979
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4603642225265503,
      "learning_rate": 0.0005841057362110151,
      "loss": 3.1229,
      "step": 23980
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8672903776168823,
      "learning_rate": 0.0005841044223893559,
      "loss": 3.0803,
      "step": 23981
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4050482511520386,
      "learning_rate": 0.0005841031085148764,
      "loss": 2.969,
      "step": 23982
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1627743244171143,
      "learning_rate": 0.0005841017945875768,
      "loss": 2.9863,
      "step": 23983
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7799683809280396,
      "learning_rate": 0.0005841004806074573,
      "loss": 3.184,
      "step": 23984
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5198962688446045,
      "learning_rate": 0.000584099166574518,
      "loss": 3.2387,
      "step": 23985
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1217055320739746,
      "learning_rate": 0.0005840978524887595,
      "loss": 3.2536,
      "step": 23986
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.368134617805481,
      "learning_rate": 0.0005840965383501817,
      "loss": 3.3479,
      "step": 23987
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2782528400421143,
      "learning_rate": 0.000584095224158785,
      "loss": 3.0229,
      "step": 23988
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.432657241821289,
      "learning_rate": 0.0005840939099145697,
      "loss": 3.0248,
      "step": 23989
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7351799011230469,
      "learning_rate": 0.0005840925956175361,
      "loss": 3.1278,
      "step": 23990
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3618428707122803,
      "learning_rate": 0.0005840912812676841,
      "loss": 3.0189,
      "step": 23991
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7304896116256714,
      "learning_rate": 0.0005840899668650144,
      "loss": 3.0381,
      "step": 23992
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.415317177772522,
      "learning_rate": 0.0005840886524095269,
      "loss": 3.0977,
      "step": 23993
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7159303426742554,
      "learning_rate": 0.0005840873379012218,
      "loss": 3.029,
      "step": 23994
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3607392311096191,
      "learning_rate": 0.0005840860233400996,
      "loss": 3.1598,
      "step": 23995
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.2843637466430664,
      "learning_rate": 0.0005840847087261606,
      "loss": 3.1677,
      "step": 23996
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2941001653671265,
      "learning_rate": 0.0005840833940594047,
      "loss": 2.8948,
      "step": 23997
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3984328508377075,
      "learning_rate": 0.0005840820793398324,
      "loss": 2.9412,
      "step": 23998
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8927116394042969,
      "learning_rate": 0.0005840807645674438,
      "loss": 2.798,
      "step": 23999
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.897916078567505,
      "learning_rate": 0.0005840794497422394,
      "loss": 3.284,
      "step": 24000
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.8239777088165283,
      "learning_rate": 0.0005840781348642192,
      "loss": 3.2557,
      "step": 24001
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7129336595535278,
      "learning_rate": 0.0005840768199333834,
      "loss": 3.0851,
      "step": 24002
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.4687163829803467,
      "learning_rate": 0.0005840755049497325,
      "loss": 3.0545,
      "step": 24003
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.5296108722686768,
      "learning_rate": 0.0005840741899132666,
      "loss": 3.15,
      "step": 24004
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6442992687225342,
      "learning_rate": 0.0005840728748239859,
      "loss": 2.9642,
      "step": 24005
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6188724040985107,
      "learning_rate": 0.0005840715596818906,
      "loss": 3.1101,
      "step": 24006
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7906973361968994,
      "learning_rate": 0.0005840702444869811,
      "loss": 3.0663,
      "step": 24007
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.092972993850708,
      "learning_rate": 0.0005840689292392577,
      "loss": 3.0374,
      "step": 24008
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4521020650863647,
      "learning_rate": 0.0005840676139387205,
      "loss": 3.0894,
      "step": 24009
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4528123140335083,
      "learning_rate": 0.0005840662985853696,
      "loss": 3.3071,
      "step": 24010
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.4239633083343506,
      "learning_rate": 0.0005840649831792055,
      "loss": 2.954,
      "step": 24011
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5431069135665894,
      "learning_rate": 0.0005840636677202283,
      "loss": 3.1816,
      "step": 24012
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6713777780532837,
      "learning_rate": 0.0005840623522084384,
      "loss": 2.833,
      "step": 24013
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.583070158958435,
      "learning_rate": 0.000584061036643836,
      "loss": 3.0602,
      "step": 24014
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5940049886703491,
      "learning_rate": 0.000584059721026421,
      "loss": 3.0167,
      "step": 24015
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9849690198898315,
      "learning_rate": 0.0005840584053561943,
      "loss": 3.1672,
      "step": 24016
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7056653499603271,
      "learning_rate": 0.0005840570896331555,
      "loss": 3.1734,
      "step": 24017
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6701881885528564,
      "learning_rate": 0.0005840557738573053,
      "loss": 2.9765,
      "step": 24018
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5271124839782715,
      "learning_rate": 0.0005840544580286436,
      "loss": 3.0806,
      "step": 24019
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6049872636795044,
      "learning_rate": 0.0005840531421471711,
      "loss": 2.8672,
      "step": 24020
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.0484330654144287,
      "learning_rate": 0.0005840518262128875,
      "loss": 3.2553,
      "step": 24021
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.482234239578247,
      "learning_rate": 0.0005840505102257934,
      "loss": 3.0687,
      "step": 24022
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.572843313217163,
      "learning_rate": 0.0005840491941858889,
      "loss": 3.0193,
      "step": 24023
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3790644407272339,
      "learning_rate": 0.0005840478780931743,
      "loss": 2.9372,
      "step": 24024
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.866869330406189,
      "learning_rate": 0.0005840465619476499,
      "loss": 3.0791,
      "step": 24025
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.069211959838867,
      "learning_rate": 0.0005840452457493158,
      "loss": 3.012,
      "step": 24026
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3386646509170532,
      "learning_rate": 0.0005840439294981725,
      "loss": 3.2939,
      "step": 24027
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5008982419967651,
      "learning_rate": 0.0005840426131942199,
      "loss": 2.9755,
      "step": 24028
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8304578065872192,
      "learning_rate": 0.0005840412968374585,
      "loss": 3.1307,
      "step": 24029
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.5038821697235107,
      "learning_rate": 0.0005840399804278884,
      "loss": 3.38,
      "step": 24030
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.407636046409607,
      "learning_rate": 0.0005840386639655099,
      "loss": 3.1398,
      "step": 24031
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.715421676635742,
      "learning_rate": 0.0005840373474503233,
      "loss": 3.1497,
      "step": 24032
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2808667421340942,
      "learning_rate": 0.0005840360308823288,
      "loss": 3.129,
      "step": 24033
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.24812388420105,
      "learning_rate": 0.0005840347142615265,
      "loss": 3.1747,
      "step": 24034
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5845661163330078,
      "learning_rate": 0.000584033397587917,
      "loss": 3.1211,
      "step": 24035
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8857612609863281,
      "learning_rate": 0.0005840320808615002,
      "loss": 2.9242,
      "step": 24036
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4636385440826416,
      "learning_rate": 0.0005840307640822765,
      "loss": 3.064,
      "step": 24037
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.652039885520935,
      "learning_rate": 0.0005840294472502462,
      "loss": 3.0623,
      "step": 24038
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5769762992858887,
      "learning_rate": 0.0005840281303654093,
      "loss": 3.2699,
      "step": 24039
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2573496103286743,
      "learning_rate": 0.0005840268134277664,
      "loss": 3.1321,
      "step": 24040
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2337480783462524,
      "learning_rate": 0.0005840254964373174,
      "loss": 3.1783,
      "step": 24041
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.842471718788147,
      "learning_rate": 0.0005840241793940627,
      "loss": 3.2828,
      "step": 24042
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6621341705322266,
      "learning_rate": 0.0005840228622980025,
      "loss": 3.2967,
      "step": 24043
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4502991437911987,
      "learning_rate": 0.0005840215451491373,
      "loss": 3.1465,
      "step": 24044
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.713809609413147,
      "learning_rate": 0.000584020227947467,
      "loss": 3.1986,
      "step": 24045
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.283703565597534,
      "learning_rate": 0.000584018910692992,
      "loss": 3.0485,
      "step": 24046
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7038381099700928,
      "learning_rate": 0.0005840175933857126,
      "loss": 2.839,
      "step": 24047
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2747379541397095,
      "learning_rate": 0.0005840162760256288,
      "loss": 3.1179,
      "step": 24048
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.981296181678772,
      "learning_rate": 0.0005840149586127412,
      "loss": 3.1827,
      "step": 24049
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.985398769378662,
      "learning_rate": 0.0005840136411470496,
      "loss": 2.9885,
      "step": 24050
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6606886386871338,
      "learning_rate": 0.0005840123236285547,
      "loss": 2.9093,
      "step": 24051
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.085172414779663,
      "learning_rate": 0.0005840110060572566,
      "loss": 3.1498,
      "step": 24052
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.0965638160705566,
      "learning_rate": 0.0005840096884331553,
      "loss": 3.0086,
      "step": 24053
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.535894751548767,
      "learning_rate": 0.0005840083707562514,
      "loss": 2.9477,
      "step": 24054
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.079568862915039,
      "learning_rate": 0.000584007053026545,
      "loss": 3.1368,
      "step": 24055
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.608018398284912,
      "learning_rate": 0.0005840057352440362,
      "loss": 3.1309,
      "step": 24056
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6541556119918823,
      "learning_rate": 0.0005840044174087254,
      "loss": 3.2496,
      "step": 24057
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3294224739074707,
      "learning_rate": 0.0005840030995206128,
      "loss": 3.3374,
      "step": 24058
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4267477989196777,
      "learning_rate": 0.0005840017815796988,
      "loss": 3.2783,
      "step": 24059
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5692006349563599,
      "learning_rate": 0.0005840004635859833,
      "loss": 3.0112,
      "step": 24060
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5522243976593018,
      "learning_rate": 0.000583999145539467,
      "loss": 2.9771,
      "step": 24061
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.06050968170166,
      "learning_rate": 0.0005839978274401497,
      "loss": 3.1221,
      "step": 24062
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.645161509513855,
      "learning_rate": 0.0005839965092880319,
      "loss": 2.9555,
      "step": 24063
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8604024648666382,
      "learning_rate": 0.0005839951910831138,
      "loss": 3.3322,
      "step": 24064
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9710659980773926,
      "learning_rate": 0.0005839938728253956,
      "loss": 2.7176,
      "step": 24065
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3337945938110352,
      "learning_rate": 0.0005839925545148776,
      "loss": 3.2036,
      "step": 24066
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3940331935882568,
      "learning_rate": 0.0005839912361515601,
      "loss": 3.2076,
      "step": 24067
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.605732798576355,
      "learning_rate": 0.0005839899177354431,
      "loss": 3.2888,
      "step": 24068
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6039341688156128,
      "learning_rate": 0.0005839885992665272,
      "loss": 3.0963,
      "step": 24069
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6221140623092651,
      "learning_rate": 0.0005839872807448124,
      "loss": 2.9664,
      "step": 24070
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5200589895248413,
      "learning_rate": 0.0005839859621702989,
      "loss": 3.2082,
      "step": 24071
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2882719039916992,
      "learning_rate": 0.0005839846435429871,
      "loss": 3.1941,
      "step": 24072
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6397050619125366,
      "learning_rate": 0.0005839833248628773,
      "loss": 2.9854,
      "step": 24073
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6288046836853027,
      "learning_rate": 0.0005839820061299695,
      "loss": 3.1131,
      "step": 24074
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5843085050582886,
      "learning_rate": 0.0005839806873442642,
      "loss": 2.9545,
      "step": 24075
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3942500352859497,
      "learning_rate": 0.0005839793685057614,
      "loss": 3.3264,
      "step": 24076
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8779276609420776,
      "learning_rate": 0.0005839780496144616,
      "loss": 2.8163,
      "step": 24077
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4112944602966309,
      "learning_rate": 0.0005839767306703649,
      "loss": 2.8703,
      "step": 24078
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9612932205200195,
      "learning_rate": 0.0005839754116734714,
      "loss": 3.057,
      "step": 24079
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.0437207221984863,
      "learning_rate": 0.0005839740926237816,
      "loss": 3.2084,
      "step": 24080
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.556380033493042,
      "learning_rate": 0.0005839727735212957,
      "loss": 3.1453,
      "step": 24081
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4045684337615967,
      "learning_rate": 0.000583971454366014,
      "loss": 3.1148,
      "step": 24082
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4221503734588623,
      "learning_rate": 0.0005839701351579364,
      "loss": 3.1318,
      "step": 24083
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7564436197280884,
      "learning_rate": 0.0005839688158970637,
      "loss": 3.2405,
      "step": 24084
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.625816822052002,
      "learning_rate": 0.0005839674965833956,
      "loss": 3.1528,
      "step": 24085
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3550392389297485,
      "learning_rate": 0.0005839661772169326,
      "loss": 3.0192,
      "step": 24086
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7797964811325073,
      "learning_rate": 0.000583964857797675,
      "loss": 3.1402,
      "step": 24087
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7654106616973877,
      "learning_rate": 0.000583963538325623,
      "loss": 3.0605,
      "step": 24088
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6899604797363281,
      "learning_rate": 0.0005839622188007768,
      "loss": 3.0851,
      "step": 24089
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.46133291721344,
      "learning_rate": 0.0005839608992231366,
      "loss": 3.3472,
      "step": 24090
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5071150064468384,
      "learning_rate": 0.0005839595795927027,
      "loss": 3.0112,
      "step": 24091
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7283353805541992,
      "learning_rate": 0.0005839582599094754,
      "loss": 3.0605,
      "step": 24092
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.675299882888794,
      "learning_rate": 0.0005839569401734549,
      "loss": 3.1564,
      "step": 24093
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4152369499206543,
      "learning_rate": 0.0005839556203846415,
      "loss": 3.098,
      "step": 24094
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5409421920776367,
      "learning_rate": 0.0005839543005430354,
      "loss": 3.0836,
      "step": 24095
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7116855382919312,
      "learning_rate": 0.0005839529806486367,
      "loss": 3.1527,
      "step": 24096
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5365948677062988,
      "learning_rate": 0.0005839516607014459,
      "loss": 3.2445,
      "step": 24097
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.689953088760376,
      "learning_rate": 0.000583950340701463,
      "loss": 3.0766,
      "step": 24098
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.109792947769165,
      "learning_rate": 0.0005839490206486884,
      "loss": 2.8433,
      "step": 24099
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.4190196990966797,
      "learning_rate": 0.0005839477005431225,
      "loss": 3.157,
      "step": 24100
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7838753461837769,
      "learning_rate": 0.0005839463803847651,
      "loss": 3.1475,
      "step": 24101
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.6891238689422607,
      "learning_rate": 0.000583945060173617,
      "loss": 3.1216,
      "step": 24102
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6799960136413574,
      "learning_rate": 0.0005839437399096779,
      "loss": 3.1689,
      "step": 24103
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7299871444702148,
      "learning_rate": 0.0005839424195929483,
      "loss": 2.9209,
      "step": 24104
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.750974178314209,
      "learning_rate": 0.0005839410992234286,
      "loss": 3.1309,
      "step": 24105
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.510563373565674,
      "learning_rate": 0.0005839397788011188,
      "loss": 3.115,
      "step": 24106
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.247572183609009,
      "learning_rate": 0.0005839384583260193,
      "loss": 2.9454,
      "step": 24107
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.098546266555786,
      "learning_rate": 0.0005839371377981302,
      "loss": 2.9379,
      "step": 24108
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7222448587417603,
      "learning_rate": 0.0005839358172174519,
      "loss": 3.2508,
      "step": 24109
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.200428009033203,
      "learning_rate": 0.0005839344965839846,
      "loss": 2.8861,
      "step": 24110
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.386122465133667,
      "learning_rate": 0.0005839331758977284,
      "loss": 3.0152,
      "step": 24111
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9577966928482056,
      "learning_rate": 0.0005839318551586837,
      "loss": 2.9679,
      "step": 24112
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.52899169921875,
      "learning_rate": 0.0005839305343668507,
      "loss": 3.1179,
      "step": 24113
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8616935014724731,
      "learning_rate": 0.0005839292135222297,
      "loss": 3.1038,
      "step": 24114
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5181078910827637,
      "learning_rate": 0.0005839278926248209,
      "loss": 3.1183,
      "step": 24115
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2042953968048096,
      "learning_rate": 0.0005839265716746244,
      "loss": 2.7325,
      "step": 24116
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6355255842208862,
      "learning_rate": 0.0005839252506716408,
      "loss": 3.2402,
      "step": 24117
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.431241512298584,
      "learning_rate": 0.00058392392961587,
      "loss": 2.8666,
      "step": 24118
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3995816707611084,
      "learning_rate": 0.0005839226085073124,
      "loss": 3.1982,
      "step": 24119
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6988002061843872,
      "learning_rate": 0.0005839212873459684,
      "loss": 3.1251,
      "step": 24120
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.633711338043213,
      "learning_rate": 0.0005839199661318379,
      "loss": 3.24,
      "step": 24121
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4527106285095215,
      "learning_rate": 0.0005839186448649213,
      "loss": 2.9962,
      "step": 24122
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.663601279258728,
      "learning_rate": 0.0005839173235452189,
      "loss": 3.2087,
      "step": 24123
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7033277750015259,
      "learning_rate": 0.0005839160021727309,
      "loss": 3.1422,
      "step": 24124
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9157263040542603,
      "learning_rate": 0.0005839146807474577,
      "loss": 3.1023,
      "step": 24125
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2880690097808838,
      "learning_rate": 0.0005839133592693992,
      "loss": 2.9397,
      "step": 24126
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.8736810684204102,
      "learning_rate": 0.000583912037738556,
      "loss": 2.8991,
      "step": 24127
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5210298299789429,
      "learning_rate": 0.0005839107161549281,
      "loss": 3.351,
      "step": 24128
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.089613437652588,
      "learning_rate": 0.0005839093945185158,
      "loss": 3.0127,
      "step": 24129
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5698440074920654,
      "learning_rate": 0.0005839080728293195,
      "loss": 3.3796,
      "step": 24130
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3903964757919312,
      "learning_rate": 0.0005839067510873393,
      "loss": 2.6453,
      "step": 24131
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9415634870529175,
      "learning_rate": 0.0005839054292925754,
      "loss": 3.1789,
      "step": 24132
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.5873022079467773,
      "learning_rate": 0.0005839041074450282,
      "loss": 3.1799,
      "step": 24133
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.457021713256836,
      "learning_rate": 0.0005839027855446978,
      "loss": 3.2634,
      "step": 24134
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6121100187301636,
      "learning_rate": 0.0005839014635915846,
      "loss": 2.9909,
      "step": 24135
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.252190589904785,
      "learning_rate": 0.0005839001415856887,
      "loss": 3.0549,
      "step": 24136
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.453782081604004,
      "learning_rate": 0.0005838988195270104,
      "loss": 3.1178,
      "step": 24137
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.67051362991333,
      "learning_rate": 0.0005838974974155499,
      "loss": 3.0014,
      "step": 24138
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.6795082092285156,
      "learning_rate": 0.0005838961752513076,
      "loss": 3.1936,
      "step": 24139
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.915381669998169,
      "learning_rate": 0.0005838948530342837,
      "loss": 3.1771,
      "step": 24140
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.202751398086548,
      "learning_rate": 0.0005838935307644782,
      "loss": 3.084,
      "step": 24141
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5776348114013672,
      "learning_rate": 0.0005838922084418917,
      "loss": 3.2668,
      "step": 24142
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.786406993865967,
      "learning_rate": 0.0005838908860665242,
      "loss": 3.1059,
      "step": 24143
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.847938299179077,
      "learning_rate": 0.0005838895636383761,
      "loss": 3.099,
      "step": 24144
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9949488639831543,
      "learning_rate": 0.0005838882411574474,
      "loss": 2.9367,
      "step": 24145
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.1602306365966797,
      "learning_rate": 0.0005838869186237386,
      "loss": 2.9015,
      "step": 24146
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.7129430770874023,
      "learning_rate": 0.00058388559603725,
      "loss": 3.1798,
      "step": 24147
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.306757926940918,
      "learning_rate": 0.0005838842733979816,
      "loss": 2.7924,
      "step": 24148
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.446019172668457,
      "learning_rate": 0.0005838829507059338,
      "loss": 3.274,
      "step": 24149
    },
    {
      "epoch": 0.31,
      "grad_norm": 3.700779438018799,
      "learning_rate": 0.0005838816279611066,
      "loss": 3.2914,
      "step": 24150
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.370983362197876,
      "learning_rate": 0.0005838803051635006,
      "loss": 2.8336,
      "step": 24151
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.699897289276123,
      "learning_rate": 0.0005838789823131159,
      "loss": 3.1118,
      "step": 24152
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.3919787406921387,
      "learning_rate": 0.0005838776594099528,
      "loss": 3.157,
      "step": 24153
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5787826776504517,
      "learning_rate": 0.0005838763364540113,
      "loss": 2.9497,
      "step": 24154
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.002854347229004,
      "learning_rate": 0.0005838750134452921,
      "loss": 3.2761,
      "step": 24155
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.866787314414978,
      "learning_rate": 0.0005838736903837949,
      "loss": 3.0924,
      "step": 24156
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.532056212425232,
      "learning_rate": 0.0005838723672695204,
      "loss": 3.2475,
      "step": 24157
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3001036643981934,
      "learning_rate": 0.0005838710441024686,
      "loss": 3.2892,
      "step": 24158
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2829488515853882,
      "learning_rate": 0.0005838697208826398,
      "loss": 3.0877,
      "step": 24159
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.417617917060852,
      "learning_rate": 0.0005838683976100343,
      "loss": 3.1834,
      "step": 24160
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.2960550785064697,
      "learning_rate": 0.0005838670742846523,
      "loss": 3.044,
      "step": 24161
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.803600788116455,
      "learning_rate": 0.000583865750906494,
      "loss": 2.9522,
      "step": 24162
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.052525281906128,
      "learning_rate": 0.0005838644274755597,
      "loss": 2.8032,
      "step": 24163
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4515941143035889,
      "learning_rate": 0.0005838631039918497,
      "loss": 3.2827,
      "step": 24164
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.30429744720459,
      "learning_rate": 0.0005838617804553642,
      "loss": 2.967,
      "step": 24165
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.3946003913879395,
      "learning_rate": 0.0005838604568661034,
      "loss": 3.0064,
      "step": 24166
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.483875036239624,
      "learning_rate": 0.0005838591332240677,
      "loss": 2.7847,
      "step": 24167
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.1227378845214844,
      "learning_rate": 0.0005838578095292571,
      "loss": 3.1353,
      "step": 24168
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.4599788188934326,
      "learning_rate": 0.0005838564857816719,
      "loss": 2.9893,
      "step": 24169
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.791734218597412,
      "learning_rate": 0.0005838551619813126,
      "loss": 2.978,
      "step": 24170
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5326415300369263,
      "learning_rate": 0.0005838538381281792,
      "loss": 3.1203,
      "step": 24171
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.910706877708435,
      "learning_rate": 0.000583852514222272,
      "loss": 3.3158,
      "step": 24172
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.8798983097076416,
      "learning_rate": 0.0005838511902635913,
      "loss": 3.1209,
      "step": 24173
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.2271761894226074,
      "learning_rate": 0.0005838498662521372,
      "loss": 3.3163,
      "step": 24174
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6029157638549805,
      "learning_rate": 0.0005838485421879101,
      "loss": 2.7985,
      "step": 24175
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9263916015625,
      "learning_rate": 0.0005838472180709103,
      "loss": 3.0663,
      "step": 24176
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.757262110710144,
      "learning_rate": 0.0005838458939011378,
      "loss": 3.0174,
      "step": 24177
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5752958059310913,
      "learning_rate": 0.000583844569678593,
      "loss": 3.1969,
      "step": 24178
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.6267588138580322,
      "learning_rate": 0.0005838432454032763,
      "loss": 3.1915,
      "step": 24179
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9313828945159912,
      "learning_rate": 0.0005838419210751876,
      "loss": 3.3176,
      "step": 24180
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.552697777748108,
      "learning_rate": 0.0005838405966943274,
      "loss": 2.9912,
      "step": 24181
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.632215976715088,
      "learning_rate": 0.0005838392722606958,
      "loss": 3.0454,
      "step": 24182
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.9041839838027954,
      "learning_rate": 0.0005838379477742933,
      "loss": 3.1638,
      "step": 24183
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.123351812362671,
      "learning_rate": 0.0005838366232351199,
      "loss": 3.0873,
      "step": 24184
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.4016530513763428,
      "learning_rate": 0.0005838352986431758,
      "loss": 3.2119,
      "step": 24185
    },
    {
      "epoch": 0.31,
      "grad_norm": 2.2534267902374268,
      "learning_rate": 0.0005838339739984614,
      "loss": 2.7778,
      "step": 24186
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.336409091949463,
      "learning_rate": 0.000583832649300977,
      "loss": 3.212,
      "step": 24187
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.7716418504714966,
      "learning_rate": 0.0005838313245507227,
      "loss": 3.0221,
      "step": 24188
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.67629075050354,
      "learning_rate": 0.0005838299997476988,
      "loss": 3.3134,
      "step": 24189
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5731416940689087,
      "learning_rate": 0.0005838286748919056,
      "loss": 3.2265,
      "step": 24190
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5827362537384033,
      "learning_rate": 0.0005838273499833432,
      "loss": 3.3445,
      "step": 24191
    },
    {
      "epoch": 0.31,
      "grad_norm": 1.5148051977157593,
      "learning_rate": 0.000583826025022012,
      "loss": 2.8452,
      "step": 24192
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6212821006774902,
      "learning_rate": 0.000583824700007912,
      "loss": 3.159,
      "step": 24193
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7017011642456055,
      "learning_rate": 0.0005838233749410438,
      "loss": 2.9936,
      "step": 24194
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5288978815078735,
      "learning_rate": 0.0005838220498214075,
      "loss": 3.0268,
      "step": 24195
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5190110206604004,
      "learning_rate": 0.0005838207246490033,
      "loss": 2.9709,
      "step": 24196
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.181061029434204,
      "learning_rate": 0.0005838193994238314,
      "loss": 3.1715,
      "step": 24197
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.167151927947998,
      "learning_rate": 0.0005838180741458922,
      "loss": 3.2029,
      "step": 24198
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8575068712234497,
      "learning_rate": 0.0005838167488151859,
      "loss": 3.1047,
      "step": 24199
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3786633014678955,
      "learning_rate": 0.0005838154234317126,
      "loss": 3.0552,
      "step": 24200
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6277730464935303,
      "learning_rate": 0.0005838140979954727,
      "loss": 2.9869,
      "step": 24201
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7811323404312134,
      "learning_rate": 0.0005838127725064665,
      "loss": 2.9382,
      "step": 24202
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5782639980316162,
      "learning_rate": 0.000583811446964694,
      "loss": 3.0439,
      "step": 24203
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.954999566078186,
      "learning_rate": 0.0005838101213701556,
      "loss": 3.2837,
      "step": 24204
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8294837474822998,
      "learning_rate": 0.0005838087957228517,
      "loss": 3.0661,
      "step": 24205
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.546886682510376,
      "learning_rate": 0.0005838074700227822,
      "loss": 3.112,
      "step": 24206
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6427775621414185,
      "learning_rate": 0.0005838061442699476,
      "loss": 3.1152,
      "step": 24207
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.652155876159668,
      "learning_rate": 0.0005838048184643481,
      "loss": 3.1103,
      "step": 24208
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5691943168640137,
      "learning_rate": 0.0005838034926059839,
      "loss": 2.8317,
      "step": 24209
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.518876552581787,
      "learning_rate": 0.0005838021666948553,
      "loss": 3.3978,
      "step": 24210
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5858901739120483,
      "learning_rate": 0.0005838008407309625,
      "loss": 3.1206,
      "step": 24211
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6334905624389648,
      "learning_rate": 0.0005837995147143058,
      "loss": 2.8796,
      "step": 24212
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5863491296768188,
      "learning_rate": 0.0005837981886448854,
      "loss": 3.3483,
      "step": 24213
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5856596231460571,
      "learning_rate": 0.0005837968625227015,
      "loss": 2.9291,
      "step": 24214
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3194572925567627,
      "learning_rate": 0.0005837955363477544,
      "loss": 3.0893,
      "step": 24215
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6835463047027588,
      "learning_rate": 0.0005837942101200444,
      "loss": 3.0793,
      "step": 24216
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.894012451171875,
      "learning_rate": 0.0005837928838395717,
      "loss": 3.3579,
      "step": 24217
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.484768271446228,
      "learning_rate": 0.0005837915575063366,
      "loss": 2.8796,
      "step": 24218
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9139204025268555,
      "learning_rate": 0.0005837902311203393,
      "loss": 2.9974,
      "step": 24219
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3833876848220825,
      "learning_rate": 0.00058378890468158,
      "loss": 2.9861,
      "step": 24220
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.497324824333191,
      "learning_rate": 0.000583787578190059,
      "loss": 2.9944,
      "step": 24221
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.054537057876587,
      "learning_rate": 0.0005837862516457764,
      "loss": 3.1267,
      "step": 24222
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.6797916889190674,
      "learning_rate": 0.0005837849250487327,
      "loss": 3.0191,
      "step": 24223
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7546645402908325,
      "learning_rate": 0.000583783598398928,
      "loss": 3.2364,
      "step": 24224
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4570462703704834,
      "learning_rate": 0.0005837822716963626,
      "loss": 3.154,
      "step": 24225
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.904876708984375,
      "learning_rate": 0.0005837809449410367,
      "loss": 3.1312,
      "step": 24226
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.363155722618103,
      "learning_rate": 0.0005837796181329505,
      "loss": 2.9774,
      "step": 24227
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4570480585098267,
      "learning_rate": 0.0005837782912721044,
      "loss": 3.0037,
      "step": 24228
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5302232503890991,
      "learning_rate": 0.0005837769643584985,
      "loss": 3.1597,
      "step": 24229
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.076181411743164,
      "learning_rate": 0.0005837756373921331,
      "loss": 3.0247,
      "step": 24230
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.643568992614746,
      "learning_rate": 0.0005837743103730086,
      "loss": 3.2038,
      "step": 24231
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9170057773590088,
      "learning_rate": 0.000583772983301125,
      "loss": 3.0991,
      "step": 24232
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.946191430091858,
      "learning_rate": 0.0005837716561764826,
      "loss": 3.1315,
      "step": 24233
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7740675210952759,
      "learning_rate": 0.0005837703289990817,
      "loss": 3.1056,
      "step": 24234
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2822191715240479,
      "learning_rate": 0.0005837690017689226,
      "loss": 3.1575,
      "step": 24235
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3273123502731323,
      "learning_rate": 0.0005837676744860055,
      "loss": 2.9894,
      "step": 24236
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4263379573822021,
      "learning_rate": 0.0005837663471503305,
      "loss": 3.0326,
      "step": 24237
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6203876733779907,
      "learning_rate": 0.000583765019761898,
      "loss": 3.2212,
      "step": 24238
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.8188185691833496,
      "learning_rate": 0.0005837636923207084,
      "loss": 3.0951,
      "step": 24239
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.567799687385559,
      "learning_rate": 0.0005837623648267616,
      "loss": 2.9336,
      "step": 24240
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6016325950622559,
      "learning_rate": 0.0005837610372800581,
      "loss": 2.9902,
      "step": 24241
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.527526617050171,
      "learning_rate": 0.0005837597096805981,
      "loss": 3.1937,
      "step": 24242
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4561972618103027,
      "learning_rate": 0.0005837583820283817,
      "loss": 2.9528,
      "step": 24243
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1939215660095215,
      "learning_rate": 0.0005837570543234093,
      "loss": 2.9401,
      "step": 24244
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4391884803771973,
      "learning_rate": 0.0005837557265656811,
      "loss": 3.0102,
      "step": 24245
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4833170175552368,
      "learning_rate": 0.0005837543987551975,
      "loss": 3.2602,
      "step": 24246
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4632093906402588,
      "learning_rate": 0.0005837530708919585,
      "loss": 3.3255,
      "step": 24247
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4655736684799194,
      "learning_rate": 0.0005837517429759645,
      "loss": 3.0103,
      "step": 24248
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3032456636428833,
      "learning_rate": 0.0005837504150072156,
      "loss": 3.2658,
      "step": 24249
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6500493288040161,
      "learning_rate": 0.0005837490869857122,
      "loss": 3.1748,
      "step": 24250
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6004538536071777,
      "learning_rate": 0.0005837477589114544,
      "loss": 3.0056,
      "step": 24251
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7486587762832642,
      "learning_rate": 0.0005837464307844425,
      "loss": 3.0783,
      "step": 24252
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3654844760894775,
      "learning_rate": 0.0005837451026046769,
      "loss": 3.18,
      "step": 24253
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3976083993911743,
      "learning_rate": 0.0005837437743721577,
      "loss": 3.2722,
      "step": 24254
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4293338060379028,
      "learning_rate": 0.0005837424460868854,
      "loss": 3.3081,
      "step": 24255
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7816768884658813,
      "learning_rate": 0.0005837411177488596,
      "loss": 2.9661,
      "step": 24256
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5209287405014038,
      "learning_rate": 0.0005837397893580813,
      "loss": 3.1452,
      "step": 24257
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9502501487731934,
      "learning_rate": 0.0005837384609145502,
      "loss": 3.1276,
      "step": 24258
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.471888542175293,
      "learning_rate": 0.0005837371324182669,
      "loss": 2.8348,
      "step": 24259
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6425597667694092,
      "learning_rate": 0.0005837358038692315,
      "loss": 2.8157,
      "step": 24260
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6536325216293335,
      "learning_rate": 0.0005837344752674441,
      "loss": 3.0988,
      "step": 24261
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.658759355545044,
      "learning_rate": 0.0005837331466129054,
      "loss": 2.9622,
      "step": 24262
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.044480085372925,
      "learning_rate": 0.0005837318179056151,
      "loss": 3.3083,
      "step": 24263
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.947291612625122,
      "learning_rate": 0.0005837304891455738,
      "loss": 2.8965,
      "step": 24264
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4175328016281128,
      "learning_rate": 0.0005837291603327816,
      "loss": 3.1601,
      "step": 24265
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8975034952163696,
      "learning_rate": 0.0005837278314672388,
      "loss": 3.2717,
      "step": 24266
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6085662841796875,
      "learning_rate": 0.0005837265025489457,
      "loss": 2.9362,
      "step": 24267
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3573496341705322,
      "learning_rate": 0.0005837251735779025,
      "loss": 3.079,
      "step": 24268
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.287287473678589,
      "learning_rate": 0.0005837238445541093,
      "loss": 3.0478,
      "step": 24269
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.1436755657196045,
      "learning_rate": 0.0005837225154775666,
      "loss": 3.02,
      "step": 24270
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6874223947525024,
      "learning_rate": 0.0005837211863482745,
      "loss": 3.3457,
      "step": 24271
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7574278116226196,
      "learning_rate": 0.0005837198571662333,
      "loss": 3.2364,
      "step": 24272
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.684862494468689,
      "learning_rate": 0.0005837185279314432,
      "loss": 3.0307,
      "step": 24273
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.552073359489441,
      "learning_rate": 0.0005837171986439045,
      "loss": 3.1172,
      "step": 24274
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7153067588806152,
      "learning_rate": 0.0005837158693036173,
      "loss": 2.9443,
      "step": 24275
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.4081058502197266,
      "learning_rate": 0.0005837145399105821,
      "loss": 3.027,
      "step": 24276
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.529038906097412,
      "learning_rate": 0.000583713210464799,
      "loss": 2.8244,
      "step": 24277
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3370649814605713,
      "learning_rate": 0.0005837118809662681,
      "loss": 3.1782,
      "step": 24278
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.171832323074341,
      "learning_rate": 0.00058371055141499,
      "loss": 3.1848,
      "step": 24279
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.1604487895965576,
      "learning_rate": 0.0005837092218109647,
      "loss": 2.9818,
      "step": 24280
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4976142644882202,
      "learning_rate": 0.0005837078921541925,
      "loss": 3.1448,
      "step": 24281
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.561684489250183,
      "learning_rate": 0.0005837065624446735,
      "loss": 2.9916,
      "step": 24282
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6658241748809814,
      "learning_rate": 0.0005837052326824083,
      "loss": 2.9315,
      "step": 24283
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.621650218963623,
      "learning_rate": 0.0005837039028673969,
      "loss": 3.066,
      "step": 24284
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5012203454971313,
      "learning_rate": 0.0005837025729996395,
      "loss": 3.0876,
      "step": 24285
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4688410758972168,
      "learning_rate": 0.0005837012430791365,
      "loss": 3.164,
      "step": 24286
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7651292085647583,
      "learning_rate": 0.0005836999131058881,
      "loss": 2.999,
      "step": 24287
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4115867614746094,
      "learning_rate": 0.0005836985830798945,
      "loss": 3.2215,
      "step": 24288
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7244691848754883,
      "learning_rate": 0.0005836972530011559,
      "loss": 3.2472,
      "step": 24289
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.2417283058166504,
      "learning_rate": 0.0005836959228696728,
      "loss": 2.8775,
      "step": 24290
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4734907150268555,
      "learning_rate": 0.000583694592685445,
      "loss": 2.942,
      "step": 24291
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8622294664382935,
      "learning_rate": 0.0005836932624484734,
      "loss": 2.9574,
      "step": 24292
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.525655508041382,
      "learning_rate": 0.0005836919321587576,
      "loss": 2.9995,
      "step": 24293
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4491268396377563,
      "learning_rate": 0.0005836906018162982,
      "loss": 3.3729,
      "step": 24294
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.667438268661499,
      "learning_rate": 0.0005836892714210953,
      "loss": 3.1535,
      "step": 24295
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9408905506134033,
      "learning_rate": 0.0005836879409731493,
      "loss": 3.0763,
      "step": 24296
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.377311110496521,
      "learning_rate": 0.0005836866104724603,
      "loss": 3.434,
      "step": 24297
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.473468542098999,
      "learning_rate": 0.0005836852799190286,
      "loss": 3.2538,
      "step": 24298
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4452403783798218,
      "learning_rate": 0.0005836839493128544,
      "loss": 2.9463,
      "step": 24299
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6893599033355713,
      "learning_rate": 0.0005836826186539381,
      "loss": 3.0385,
      "step": 24300
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.920586347579956,
      "learning_rate": 0.0005836812879422799,
      "loss": 2.661,
      "step": 24301
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.2000060081481934,
      "learning_rate": 0.0005836799571778799,
      "loss": 3.4498,
      "step": 24302
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.038348913192749,
      "learning_rate": 0.0005836786263607384,
      "loss": 3.317,
      "step": 24303
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.4274439811706543,
      "learning_rate": 0.0005836772954908557,
      "loss": 3.2298,
      "step": 24304
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.6183176040649414,
      "learning_rate": 0.0005836759645682321,
      "loss": 2.9359,
      "step": 24305
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8162163496017456,
      "learning_rate": 0.0005836746335928678,
      "loss": 3.1721,
      "step": 24306
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.540778398513794,
      "learning_rate": 0.0005836733025647629,
      "loss": 3.1199,
      "step": 24307
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.466717004776001,
      "learning_rate": 0.0005836719714839178,
      "loss": 2.9424,
      "step": 24308
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9404001235961914,
      "learning_rate": 0.0005836706403503329,
      "loss": 2.9741,
      "step": 24309
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.496506690979004,
      "learning_rate": 0.0005836693091640081,
      "loss": 3.0458,
      "step": 24310
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.369756817817688,
      "learning_rate": 0.0005836679779249439,
      "loss": 3.0616,
      "step": 24311
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.981833577156067,
      "learning_rate": 0.0005836666466331404,
      "loss": 2.884,
      "step": 24312
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4090975522994995,
      "learning_rate": 0.000583665315288598,
      "loss": 2.9865,
      "step": 24313
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3775231838226318,
      "learning_rate": 0.0005836639838913168,
      "loss": 2.9329,
      "step": 24314
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4054213762283325,
      "learning_rate": 0.0005836626524412972,
      "loss": 3.0712,
      "step": 24315
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3068876266479492,
      "learning_rate": 0.0005836613209385393,
      "loss": 3.0308,
      "step": 24316
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3713299036026,
      "learning_rate": 0.0005836599893830434,
      "loss": 3.2146,
      "step": 24317
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4288454055786133,
      "learning_rate": 0.0005836586577748097,
      "loss": 3.0155,
      "step": 24318
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8935317993164062,
      "learning_rate": 0.0005836573261138386,
      "loss": 3.1496,
      "step": 24319
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3070292472839355,
      "learning_rate": 0.0005836559944001303,
      "loss": 3.0144,
      "step": 24320
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.630077838897705,
      "learning_rate": 0.0005836546626336849,
      "loss": 2.9772,
      "step": 24321
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9153896570205688,
      "learning_rate": 0.0005836533308145026,
      "loss": 3.0831,
      "step": 24322
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5816007852554321,
      "learning_rate": 0.000583651998942584,
      "loss": 2.9044,
      "step": 24323
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5269782543182373,
      "learning_rate": 0.0005836506670179291,
      "loss": 2.9223,
      "step": 24324
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.493417501449585,
      "learning_rate": 0.0005836493350405382,
      "loss": 3.0543,
      "step": 24325
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.427481770515442,
      "learning_rate": 0.0005836480030104115,
      "loss": 3.2985,
      "step": 24326
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.789441704750061,
      "learning_rate": 0.0005836466709275491,
      "loss": 2.9034,
      "step": 24327
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5568569898605347,
      "learning_rate": 0.0005836453387919518,
      "loss": 3.0424,
      "step": 24328
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6636754274368286,
      "learning_rate": 0.0005836440066036192,
      "loss": 3.3374,
      "step": 24329
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.774137258529663,
      "learning_rate": 0.0005836426743625519,
      "loss": 3.0073,
      "step": 24330
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.326570749282837,
      "learning_rate": 0.00058364134206875,
      "loss": 3.0034,
      "step": 24331
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.2055702209472656,
      "learning_rate": 0.000583640009722214,
      "loss": 3.0401,
      "step": 24332
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.1667027473449707,
      "learning_rate": 0.0005836386773229439,
      "loss": 3.0398,
      "step": 24333
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4819881916046143,
      "learning_rate": 0.00058363734487094,
      "loss": 3.1889,
      "step": 24334
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2804118394851685,
      "learning_rate": 0.0005836360123662025,
      "loss": 3.0651,
      "step": 24335
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8378517627716064,
      "learning_rate": 0.0005836346798087317,
      "loss": 2.9766,
      "step": 24336
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.626758337020874,
      "learning_rate": 0.000583633347198528,
      "loss": 3.2179,
      "step": 24337
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.909859538078308,
      "learning_rate": 0.0005836320145355914,
      "loss": 3.1803,
      "step": 24338
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5939574241638184,
      "learning_rate": 0.0005836306818199224,
      "loss": 3.1488,
      "step": 24339
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.363229513168335,
      "learning_rate": 0.0005836293490515209,
      "loss": 2.8274,
      "step": 24340
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7375110387802124,
      "learning_rate": 0.0005836280162303874,
      "loss": 3.1776,
      "step": 24341
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5994142293930054,
      "learning_rate": 0.0005836266833565222,
      "loss": 3.0051,
      "step": 24342
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2316161394119263,
      "learning_rate": 0.0005836253504299254,
      "loss": 3.1097,
      "step": 24343
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0413875579833984,
      "learning_rate": 0.0005836240174505972,
      "loss": 3.0918,
      "step": 24344
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.711186170578003,
      "learning_rate": 0.000583622684418538,
      "loss": 3.2084,
      "step": 24345
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0438730716705322,
      "learning_rate": 0.000583621351333748,
      "loss": 3.0719,
      "step": 24346
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.989624261856079,
      "learning_rate": 0.0005836200181962275,
      "loss": 2.9532,
      "step": 24347
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.4018521308898926,
      "learning_rate": 0.0005836186850059766,
      "loss": 3.3297,
      "step": 24348
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.63442325592041,
      "learning_rate": 0.0005836173517629957,
      "loss": 3.4293,
      "step": 24349
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8064725399017334,
      "learning_rate": 0.0005836160184672849,
      "loss": 3.1743,
      "step": 24350
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.6279025077819824,
      "learning_rate": 0.0005836146851188446,
      "loss": 3.0571,
      "step": 24351
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.689793586730957,
      "learning_rate": 0.0005836133517176749,
      "loss": 3.126,
      "step": 24352
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6272375583648682,
      "learning_rate": 0.0005836120182637762,
      "loss": 2.9789,
      "step": 24353
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5889862775802612,
      "learning_rate": 0.0005836106847571486,
      "loss": 3.1402,
      "step": 24354
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0420777797698975,
      "learning_rate": 0.0005836093511977924,
      "loss": 3.0165,
      "step": 24355
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.4194116592407227,
      "learning_rate": 0.0005836080175857081,
      "loss": 2.9535,
      "step": 24356
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5816584825515747,
      "learning_rate": 0.0005836066839208955,
      "loss": 3.1827,
      "step": 24357
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.6005353927612305,
      "learning_rate": 0.0005836053502033551,
      "loss": 2.9842,
      "step": 24358
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0853569507598877,
      "learning_rate": 0.000583604016433087,
      "loss": 3.2045,
      "step": 24359
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7786245346069336,
      "learning_rate": 0.0005836026826100917,
      "loss": 2.9616,
      "step": 24360
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4351760149002075,
      "learning_rate": 0.0005836013487343694,
      "loss": 2.9734,
      "step": 24361
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.3142662048339844,
      "learning_rate": 0.0005836000148059201,
      "loss": 2.9023,
      "step": 24362
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5983867645263672,
      "learning_rate": 0.0005835986808247442,
      "loss": 2.9652,
      "step": 24363
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4250280857086182,
      "learning_rate": 0.0005835973467908421,
      "loss": 3.1675,
      "step": 24364
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7280347347259521,
      "learning_rate": 0.0005835960127042137,
      "loss": 3.1076,
      "step": 24365
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7808316946029663,
      "learning_rate": 0.0005835946785648596,
      "loss": 3.1897,
      "step": 24366
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5764869451522827,
      "learning_rate": 0.0005835933443727798,
      "loss": 3.1559,
      "step": 24367
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7311674356460571,
      "learning_rate": 0.0005835920101279747,
      "loss": 3.109,
      "step": 24368
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.587129592895508,
      "learning_rate": 0.0005835906758304445,
      "loss": 3.0314,
      "step": 24369
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7949761152267456,
      "learning_rate": 0.0005835893414801894,
      "loss": 3.1738,
      "step": 24370
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4343178272247314,
      "learning_rate": 0.0005835880070772097,
      "loss": 3.125,
      "step": 24371
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8125097751617432,
      "learning_rate": 0.0005835866726215057,
      "loss": 3.2132,
      "step": 24372
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.135002374649048,
      "learning_rate": 0.0005835853381130774,
      "loss": 3.1627,
      "step": 24373
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5120590925216675,
      "learning_rate": 0.0005835840035519255,
      "loss": 3.296,
      "step": 24374
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4199429750442505,
      "learning_rate": 0.0005835826689380498,
      "loss": 3.2897,
      "step": 24375
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.2167232036590576,
      "learning_rate": 0.0005835813342714507,
      "loss": 3.202,
      "step": 24376
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7830467224121094,
      "learning_rate": 0.0005835799995521287,
      "loss": 2.8787,
      "step": 24377
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9869682788848877,
      "learning_rate": 0.0005835786647800836,
      "loss": 2.8985,
      "step": 24378
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.599077582359314,
      "learning_rate": 0.0005835773299553158,
      "loss": 2.9329,
      "step": 24379
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9440901279449463,
      "learning_rate": 0.0005835759950778258,
      "loss": 3.157,
      "step": 24380
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7603355646133423,
      "learning_rate": 0.0005835746601476138,
      "loss": 3.1582,
      "step": 24381
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0352680683135986,
      "learning_rate": 0.0005835733251646797,
      "loss": 3.0686,
      "step": 24382
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.325105905532837,
      "learning_rate": 0.000583571990129024,
      "loss": 3.341,
      "step": 24383
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6455940008163452,
      "learning_rate": 0.000583570655040647,
      "loss": 2.9282,
      "step": 24384
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.49209725856781,
      "learning_rate": 0.0005835693198995487,
      "loss": 3.1246,
      "step": 24385
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.202559471130371,
      "learning_rate": 0.0005835679847057296,
      "loss": 3.0139,
      "step": 24386
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.376915454864502,
      "learning_rate": 0.00058356664945919,
      "loss": 3.1469,
      "step": 24387
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3841447830200195,
      "learning_rate": 0.0005835653141599298,
      "loss": 3.2557,
      "step": 24388
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.607896327972412,
      "learning_rate": 0.0005835639788079496,
      "loss": 3.1386,
      "step": 24389
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3864444494247437,
      "learning_rate": 0.0005835626434032493,
      "loss": 2.953,
      "step": 24390
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.923444390296936,
      "learning_rate": 0.0005835613079458296,
      "loss": 3.1554,
      "step": 24391
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.706156611442566,
      "learning_rate": 0.0005835599724356903,
      "loss": 3.0272,
      "step": 24392
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5424808263778687,
      "learning_rate": 0.000583558636872832,
      "loss": 3.063,
      "step": 24393
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6748586893081665,
      "learning_rate": 0.0005835573012572547,
      "loss": 3.0535,
      "step": 24394
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.608335018157959,
      "learning_rate": 0.0005835559655889588,
      "loss": 3.1051,
      "step": 24395
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8064403533935547,
      "learning_rate": 0.0005835546298679445,
      "loss": 2.8497,
      "step": 24396
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1669728755950928,
      "learning_rate": 0.0005835532940942119,
      "loss": 2.8803,
      "step": 24397
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5449434518814087,
      "learning_rate": 0.0005835519582677616,
      "loss": 3.1113,
      "step": 24398
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6076692342758179,
      "learning_rate": 0.0005835506223885935,
      "loss": 3.0819,
      "step": 24399
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.566166877746582,
      "learning_rate": 0.000583549286456708,
      "loss": 2.8991,
      "step": 24400
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4780431985855103,
      "learning_rate": 0.0005835479504721053,
      "loss": 3.0236,
      "step": 24401
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.586830973625183,
      "learning_rate": 0.0005835466144347858,
      "loss": 3.0808,
      "step": 24402
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6648532152175903,
      "learning_rate": 0.0005835452783447496,
      "loss": 3.1745,
      "step": 24403
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6638319492340088,
      "learning_rate": 0.0005835439422019969,
      "loss": 3.1721,
      "step": 24404
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0707075595855713,
      "learning_rate": 0.0005835426060065281,
      "loss": 3.1835,
      "step": 24405
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6971222162246704,
      "learning_rate": 0.0005835412697583434,
      "loss": 3.1188,
      "step": 24406
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.48299241065979,
      "learning_rate": 0.000583539933457443,
      "loss": 3.227,
      "step": 24407
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.720886468887329,
      "learning_rate": 0.0005835385971038272,
      "loss": 2.999,
      "step": 24408
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0160388946533203,
      "learning_rate": 0.0005835372606974961,
      "loss": 2.979,
      "step": 24409
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.104973793029785,
      "learning_rate": 0.0005835359242384501,
      "loss": 3.2083,
      "step": 24410
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4918593168258667,
      "learning_rate": 0.0005835345877266896,
      "loss": 3.0137,
      "step": 24411
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.563103199005127,
      "learning_rate": 0.0005835332511622145,
      "loss": 3.187,
      "step": 24412
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7417678833007812,
      "learning_rate": 0.0005835319145450253,
      "loss": 3.0854,
      "step": 24413
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7185635566711426,
      "learning_rate": 0.0005835305778751221,
      "loss": 2.795,
      "step": 24414
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.636720061302185,
      "learning_rate": 0.0005835292411525052,
      "loss": 2.9227,
      "step": 24415
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4660124778747559,
      "learning_rate": 0.000583527904377175,
      "loss": 3.1659,
      "step": 24416
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5747578144073486,
      "learning_rate": 0.0005835265675491315,
      "loss": 3.0741,
      "step": 24417
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4184645414352417,
      "learning_rate": 0.000583525230668375,
      "loss": 3.1731,
      "step": 24418
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5127360820770264,
      "learning_rate": 0.0005835238937349058,
      "loss": 3.2759,
      "step": 24419
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6767971515655518,
      "learning_rate": 0.0005835225567487243,
      "loss": 2.9225,
      "step": 24420
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.402923345565796,
      "learning_rate": 0.0005835212197098307,
      "loss": 3.0934,
      "step": 24421
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.256999135017395,
      "learning_rate": 0.0005835198826182249,
      "loss": 2.9145,
      "step": 24422
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3335444927215576,
      "learning_rate": 0.0005835185454739076,
      "loss": 2.9783,
      "step": 24423
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6547678709030151,
      "learning_rate": 0.0005835172082768786,
      "loss": 3.1388,
      "step": 24424
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9424551725387573,
      "learning_rate": 0.0005835158710271386,
      "loss": 3.3075,
      "step": 24425
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5827447175979614,
      "learning_rate": 0.0005835145337246875,
      "loss": 3.1508,
      "step": 24426
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6594280004501343,
      "learning_rate": 0.0005835131963695258,
      "loss": 3.1894,
      "step": 24427
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0588085651397705,
      "learning_rate": 0.0005835118589616536,
      "loss": 3.3382,
      "step": 24428
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3652461767196655,
      "learning_rate": 0.0005835105215010714,
      "loss": 2.9415,
      "step": 24429
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8101046085357666,
      "learning_rate": 0.000583509183987779,
      "loss": 3.2317,
      "step": 24430
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0560851097106934,
      "learning_rate": 0.000583507846421777,
      "loss": 3.1139,
      "step": 24431
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5857832431793213,
      "learning_rate": 0.0005835065088030655,
      "loss": 3.2744,
      "step": 24432
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.514479398727417,
      "learning_rate": 0.0005835051711316448,
      "loss": 3.1234,
      "step": 24433
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.6246399879455566,
      "learning_rate": 0.0005835038334075151,
      "loss": 3.2966,
      "step": 24434
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9454474449157715,
      "learning_rate": 0.0005835024956306766,
      "loss": 3.0871,
      "step": 24435
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.450333595275879,
      "learning_rate": 0.0005835011578011299,
      "loss": 2.9378,
      "step": 24436
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5069776773452759,
      "learning_rate": 0.0005834998199188748,
      "loss": 3.2598,
      "step": 24437
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3953675031661987,
      "learning_rate": 0.0005834984819839118,
      "loss": 3.1368,
      "step": 24438
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9309766292572021,
      "learning_rate": 0.0005834971439962409,
      "loss": 3.0302,
      "step": 24439
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.6943750381469727,
      "learning_rate": 0.0005834958059558628,
      "loss": 3.0685,
      "step": 24440
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0044612884521484,
      "learning_rate": 0.0005834944678627773,
      "loss": 3.158,
      "step": 24441
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.3316049575805664,
      "learning_rate": 0.0005834931297169849,
      "loss": 3.1554,
      "step": 24442
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0012168884277344,
      "learning_rate": 0.0005834917915184857,
      "loss": 3.1324,
      "step": 24443
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7158113718032837,
      "learning_rate": 0.00058349045326728,
      "loss": 2.8907,
      "step": 24444
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6366682052612305,
      "learning_rate": 0.0005834891149633682,
      "loss": 2.9874,
      "step": 24445
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4148706197738647,
      "learning_rate": 0.0005834877766067504,
      "loss": 3.1029,
      "step": 24446
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.069352865219116,
      "learning_rate": 0.0005834864381974268,
      "loss": 3.2017,
      "step": 24447
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6635286808013916,
      "learning_rate": 0.0005834850997353979,
      "loss": 3.2043,
      "step": 24448
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3587123155593872,
      "learning_rate": 0.0005834837612206636,
      "loss": 3.5993,
      "step": 24449
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.497510313987732,
      "learning_rate": 0.0005834824226532243,
      "loss": 2.8342,
      "step": 24450
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.98966646194458,
      "learning_rate": 0.0005834810840330804,
      "loss": 3.2395,
      "step": 24451
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5779577493667603,
      "learning_rate": 0.0005834797453602319,
      "loss": 3.2498,
      "step": 24452
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.5257956981658936,
      "learning_rate": 0.0005834784066346792,
      "loss": 3.0497,
      "step": 24453
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4045233726501465,
      "learning_rate": 0.0005834770678564225,
      "loss": 3.353,
      "step": 24454
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6431550979614258,
      "learning_rate": 0.000583475729025462,
      "loss": 3.1256,
      "step": 24455
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4690335988998413,
      "learning_rate": 0.0005834743901417982,
      "loss": 2.9754,
      "step": 24456
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.409609079360962,
      "learning_rate": 0.000583473051205431,
      "loss": 3.0682,
      "step": 24457
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4359359741210938,
      "learning_rate": 0.0005834717122163609,
      "loss": 3.0399,
      "step": 24458
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.431650996208191,
      "learning_rate": 0.000583470373174588,
      "loss": 3.0472,
      "step": 24459
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8612253665924072,
      "learning_rate": 0.0005834690340801125,
      "loss": 2.9578,
      "step": 24460
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5818582773208618,
      "learning_rate": 0.0005834676949329349,
      "loss": 3.4622,
      "step": 24461
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6741139888763428,
      "learning_rate": 0.0005834663557330554,
      "loss": 2.7795,
      "step": 24462
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.7911720275878906,
      "learning_rate": 0.000583465016480474,
      "loss": 2.9973,
      "step": 24463
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.017533540725708,
      "learning_rate": 0.000583463677175191,
      "loss": 3.0672,
      "step": 24464
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9975769519805908,
      "learning_rate": 0.000583462337817207,
      "loss": 2.9772,
      "step": 24465
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.401073932647705,
      "learning_rate": 0.0005834609984065219,
      "loss": 2.9892,
      "step": 24466
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9475021362304688,
      "learning_rate": 0.000583459658943136,
      "loss": 3.0476,
      "step": 24467
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6749757528305054,
      "learning_rate": 0.0005834583194270496,
      "loss": 3.1534,
      "step": 24468
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3059160709381104,
      "learning_rate": 0.000583456979858263,
      "loss": 3.3075,
      "step": 24469
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8370275497436523,
      "learning_rate": 0.0005834556402367764,
      "loss": 2.9036,
      "step": 24470
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5175079107284546,
      "learning_rate": 0.00058345430056259,
      "loss": 3.3791,
      "step": 24471
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9144240617752075,
      "learning_rate": 0.0005834529608357042,
      "loss": 3.4123,
      "step": 24472
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5419998168945312,
      "learning_rate": 0.0005834516210561191,
      "loss": 3.0956,
      "step": 24473
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4718199968338013,
      "learning_rate": 0.0005834502812238348,
      "loss": 3.0807,
      "step": 24474
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4705346822738647,
      "learning_rate": 0.000583448941338852,
      "loss": 3.0912,
      "step": 24475
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.30791974067688,
      "learning_rate": 0.0005834476014011706,
      "loss": 2.867,
      "step": 24476
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8379331827163696,
      "learning_rate": 0.0005834462614107909,
      "loss": 3.0005,
      "step": 24477
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9010090827941895,
      "learning_rate": 0.0005834449213677132,
      "loss": 3.0026,
      "step": 24478
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.6338469982147217,
      "learning_rate": 0.0005834435812719378,
      "loss": 3.0514,
      "step": 24479
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.0723721981048584,
      "learning_rate": 0.0005834422411234648,
      "loss": 3.1205,
      "step": 24480
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8055574893951416,
      "learning_rate": 0.0005834409009222946,
      "loss": 3.2775,
      "step": 24481
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9473916292190552,
      "learning_rate": 0.0005834395606684273,
      "loss": 3.0974,
      "step": 24482
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7250088453292847,
      "learning_rate": 0.0005834382203618633,
      "loss": 3.0535,
      "step": 24483
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6164911985397339,
      "learning_rate": 0.0005834368800026029,
      "loss": 2.9699,
      "step": 24484
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.872396469116211,
      "learning_rate": 0.000583435539590646,
      "loss": 3.1239,
      "step": 24485
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.161863088607788,
      "learning_rate": 0.0005834341991259933,
      "loss": 2.8683,
      "step": 24486
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6653505563735962,
      "learning_rate": 0.0005834328586086447,
      "loss": 3.3568,
      "step": 24487
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3687652349472046,
      "learning_rate": 0.0005834315180386007,
      "loss": 3.2738,
      "step": 24488
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.325268030166626,
      "learning_rate": 0.0005834301774158613,
      "loss": 3.1819,
      "step": 24489
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7960481643676758,
      "learning_rate": 0.0005834288367404269,
      "loss": 2.9804,
      "step": 24490
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.07663893699646,
      "learning_rate": 0.0005834274960122978,
      "loss": 3.2367,
      "step": 24491
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2375489473342896,
      "learning_rate": 0.0005834261552314741,
      "loss": 3.0812,
      "step": 24492
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5885261297225952,
      "learning_rate": 0.0005834248143979562,
      "loss": 3.1716,
      "step": 24493
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6106568574905396,
      "learning_rate": 0.0005834234735117443,
      "loss": 3.1705,
      "step": 24494
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3518056869506836,
      "learning_rate": 0.0005834221325728384,
      "loss": 3.4175,
      "step": 24495
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5369073152542114,
      "learning_rate": 0.0005834207915812392,
      "loss": 2.8671,
      "step": 24496
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8961782455444336,
      "learning_rate": 0.0005834194505369467,
      "loss": 3.0588,
      "step": 24497
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5632060766220093,
      "learning_rate": 0.0005834181094399612,
      "loss": 3.3067,
      "step": 24498
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6692451238632202,
      "learning_rate": 0.0005834167682902829,
      "loss": 2.9082,
      "step": 24499
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7473002672195435,
      "learning_rate": 0.000583415427087912,
      "loss": 3.1521,
      "step": 24500
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2560099363327026,
      "learning_rate": 0.0005834140858328488,
      "loss": 3.1161,
      "step": 24501
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.749009609222412,
      "learning_rate": 0.0005834127445250937,
      "loss": 3.1427,
      "step": 24502
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.733888030052185,
      "learning_rate": 0.0005834114031646467,
      "loss": 3.1117,
      "step": 24503
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2223249673843384,
      "learning_rate": 0.0005834100617515082,
      "loss": 3.3712,
      "step": 24504
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6263808012008667,
      "learning_rate": 0.0005834087202856785,
      "loss": 3.0632,
      "step": 24505
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.173992395401001,
      "learning_rate": 0.0005834073787671577,
      "loss": 3.1206,
      "step": 24506
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.7696375846862793,
      "learning_rate": 0.0005834060371959462,
      "loss": 3.0594,
      "step": 24507
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5922490358352661,
      "learning_rate": 0.000583404695572044,
      "loss": 3.1482,
      "step": 24508
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.093878984451294,
      "learning_rate": 0.0005834033538954517,
      "loss": 2.922,
      "step": 24509
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.3435940742492676,
      "learning_rate": 0.0005834020121661692,
      "loss": 2.9258,
      "step": 24510
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4133535623550415,
      "learning_rate": 0.000583400670384197,
      "loss": 3.1186,
      "step": 24511
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.349492073059082,
      "learning_rate": 0.0005833993285495353,
      "loss": 3.0302,
      "step": 24512
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.23309063911438,
      "learning_rate": 0.0005833979866621843,
      "loss": 3.0014,
      "step": 24513
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.44124436378479,
      "learning_rate": 0.0005833966447221442,
      "loss": 3.0831,
      "step": 24514
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6829489469528198,
      "learning_rate": 0.0005833953027294154,
      "loss": 2.8124,
      "step": 24515
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7648160457611084,
      "learning_rate": 0.000583393960683998,
      "loss": 3.3047,
      "step": 24516
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4568114280700684,
      "learning_rate": 0.0005833926185858923,
      "loss": 3.1457,
      "step": 24517
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.84922194480896,
      "learning_rate": 0.0005833912764350987,
      "loss": 2.9348,
      "step": 24518
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7493048906326294,
      "learning_rate": 0.0005833899342316172,
      "loss": 3.027,
      "step": 24519
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5569325685501099,
      "learning_rate": 0.0005833885919754482,
      "loss": 3.2557,
      "step": 24520
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4587984085083008,
      "learning_rate": 0.0005833872496665919,
      "loss": 3.1502,
      "step": 24521
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.1018853187561035,
      "learning_rate": 0.0005833859073050485,
      "loss": 3.1548,
      "step": 24522
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.98497474193573,
      "learning_rate": 0.0005833845648908183,
      "loss": 2.9619,
      "step": 24523
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6371259689331055,
      "learning_rate": 0.0005833832224239016,
      "loss": 2.9854,
      "step": 24524
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4394805431365967,
      "learning_rate": 0.0005833818799042986,
      "loss": 3.1824,
      "step": 24525
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7457269430160522,
      "learning_rate": 0.0005833805373320097,
      "loss": 3.0306,
      "step": 24526
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8638362884521484,
      "learning_rate": 0.0005833791947070349,
      "loss": 2.9291,
      "step": 24527
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6019563674926758,
      "learning_rate": 0.0005833778520293745,
      "loss": 3.0825,
      "step": 24528
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9338176250457764,
      "learning_rate": 0.0005833765092990289,
      "loss": 2.9747,
      "step": 24529
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7562395334243774,
      "learning_rate": 0.0005833751665159982,
      "loss": 3.1086,
      "step": 24530
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9939496517181396,
      "learning_rate": 0.0005833738236802827,
      "loss": 3.1084,
      "step": 24531
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7417645454406738,
      "learning_rate": 0.0005833724807918827,
      "loss": 3.1401,
      "step": 24532
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9289865493774414,
      "learning_rate": 0.0005833711378507983,
      "loss": 2.8422,
      "step": 24533
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4622585773468018,
      "learning_rate": 0.00058336979485703,
      "loss": 3.0413,
      "step": 24534
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.442184567451477,
      "learning_rate": 0.0005833684518105779,
      "loss": 3.0927,
      "step": 24535
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4186875820159912,
      "learning_rate": 0.0005833671087114422,
      "loss": 2.8929,
      "step": 24536
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.5088491439819336,
      "learning_rate": 0.0005833657655596233,
      "loss": 2.9684,
      "step": 24537
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7565563917160034,
      "learning_rate": 0.0005833644223551212,
      "loss": 2.9658,
      "step": 24538
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.44016432762146,
      "learning_rate": 0.0005833630790979364,
      "loss": 3.1433,
      "step": 24539
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0285825729370117,
      "learning_rate": 0.0005833617357880689,
      "loss": 2.8378,
      "step": 24540
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.930869698524475,
      "learning_rate": 0.0005833603924255194,
      "loss": 3.2236,
      "step": 24541
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3829798698425293,
      "learning_rate": 0.0005833590490102876,
      "loss": 2.9299,
      "step": 24542
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.99986732006073,
      "learning_rate": 0.0005833577055423741,
      "loss": 3.2827,
      "step": 24543
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7620660066604614,
      "learning_rate": 0.0005833563620217791,
      "loss": 3.1477,
      "step": 24544
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4342889785766602,
      "learning_rate": 0.0005833550184485027,
      "loss": 3.1087,
      "step": 24545
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4009239673614502,
      "learning_rate": 0.0005833536748225453,
      "loss": 3.1705,
      "step": 24546
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5287957191467285,
      "learning_rate": 0.0005833523311439071,
      "loss": 3.2732,
      "step": 24547
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.269536018371582,
      "learning_rate": 0.0005833509874125884,
      "loss": 3.2457,
      "step": 24548
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5418843030929565,
      "learning_rate": 0.0005833496436285894,
      "loss": 3.1533,
      "step": 24549
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4868464469909668,
      "learning_rate": 0.0005833482997919103,
      "loss": 3.2617,
      "step": 24550
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3623130321502686,
      "learning_rate": 0.0005833469559025513,
      "loss": 2.9637,
      "step": 24551
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4974030256271362,
      "learning_rate": 0.000583345611960513,
      "loss": 3.0745,
      "step": 24552
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5339672565460205,
      "learning_rate": 0.0005833442679657952,
      "loss": 3.3038,
      "step": 24553
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0204412937164307,
      "learning_rate": 0.0005833429239183984,
      "loss": 3.0406,
      "step": 24554
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9376780986785889,
      "learning_rate": 0.0005833415798183228,
      "loss": 2.9959,
      "step": 24555
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8079955577850342,
      "learning_rate": 0.0005833402356655687,
      "loss": 3.0165,
      "step": 24556
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6975845098495483,
      "learning_rate": 0.0005833388914601363,
      "loss": 3.1351,
      "step": 24557
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.335936427116394,
      "learning_rate": 0.0005833375472020258,
      "loss": 3.1748,
      "step": 24558
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5552035570144653,
      "learning_rate": 0.0005833362028912376,
      "loss": 2.8097,
      "step": 24559
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0189785957336426,
      "learning_rate": 0.0005833348585277718,
      "loss": 3.1422,
      "step": 24560
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.496411919593811,
      "learning_rate": 0.0005833335141116287,
      "loss": 3.2297,
      "step": 24561
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.5275073051452637,
      "learning_rate": 0.0005833321696428085,
      "loss": 2.9261,
      "step": 24562
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4615408182144165,
      "learning_rate": 0.0005833308251213115,
      "loss": 3.0827,
      "step": 24563
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5621646642684937,
      "learning_rate": 0.000583329480547138,
      "loss": 3.0383,
      "step": 24564
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4704476594924927,
      "learning_rate": 0.0005833281359202882,
      "loss": 3.0322,
      "step": 24565
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.322916269302368,
      "learning_rate": 0.0005833267912407624,
      "loss": 2.9724,
      "step": 24566
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0026090145111084,
      "learning_rate": 0.0005833254465085607,
      "loss": 2.9655,
      "step": 24567
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8818671703338623,
      "learning_rate": 0.0005833241017236835,
      "loss": 3.2385,
      "step": 24568
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9335885047912598,
      "learning_rate": 0.0005833227568861311,
      "loss": 3.2737,
      "step": 24569
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.593887209892273,
      "learning_rate": 0.0005833214119959035,
      "loss": 3.1111,
      "step": 24570
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4917703866958618,
      "learning_rate": 0.0005833200670530011,
      "loss": 3.024,
      "step": 24571
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7651031017303467,
      "learning_rate": 0.0005833187220574241,
      "loss": 3.1339,
      "step": 24572
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4076684713363647,
      "learning_rate": 0.000583317377009173,
      "loss": 2.9855,
      "step": 24573
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5702712535858154,
      "learning_rate": 0.0005833160319082477,
      "loss": 3.0647,
      "step": 24574
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5968904495239258,
      "learning_rate": 0.0005833146867546487,
      "loss": 3.232,
      "step": 24575
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.384749412536621,
      "learning_rate": 0.0005833133415483761,
      "loss": 3.0117,
      "step": 24576
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.447881817817688,
      "learning_rate": 0.0005833119962894302,
      "loss": 3.009,
      "step": 24577
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5784499645233154,
      "learning_rate": 0.0005833106509778112,
      "loss": 3.0154,
      "step": 24578
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.637328028678894,
      "learning_rate": 0.0005833093056135196,
      "loss": 2.8764,
      "step": 24579
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.349503517150879,
      "learning_rate": 0.0005833079601965553,
      "loss": 2.9869,
      "step": 24580
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3348578214645386,
      "learning_rate": 0.0005833066147269187,
      "loss": 3.0187,
      "step": 24581
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4609694480895996,
      "learning_rate": 0.00058330526920461,
      "loss": 3.091,
      "step": 24582
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4972506761550903,
      "learning_rate": 0.0005833039236296297,
      "loss": 3.3223,
      "step": 24583
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7761021852493286,
      "learning_rate": 0.0005833025780019777,
      "loss": 3.1494,
      "step": 24584
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.015411853790283,
      "learning_rate": 0.0005833012323216544,
      "loss": 3.2832,
      "step": 24585
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0312607288360596,
      "learning_rate": 0.00058329988658866,
      "loss": 2.8752,
      "step": 24586
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5374385118484497,
      "learning_rate": 0.0005832985408029949,
      "loss": 3.2712,
      "step": 24587
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8439289331436157,
      "learning_rate": 0.0005832971949646592,
      "loss": 3.1643,
      "step": 24588
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.5707571506500244,
      "learning_rate": 0.0005832958490736532,
      "loss": 3.2851,
      "step": 24589
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0029118061065674,
      "learning_rate": 0.0005832945031299773,
      "loss": 3.1678,
      "step": 24590
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4399168491363525,
      "learning_rate": 0.0005832931571336315,
      "loss": 3.1323,
      "step": 24591
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.3293778896331787,
      "learning_rate": 0.0005832918110846161,
      "loss": 3.0987,
      "step": 24592
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3282285928726196,
      "learning_rate": 0.0005832904649829315,
      "loss": 3.045,
      "step": 24593
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3261995315551758,
      "learning_rate": 0.0005832891188285778,
      "loss": 2.9524,
      "step": 24594
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4154342412948608,
      "learning_rate": 0.0005832877726215554,
      "loss": 3.0861,
      "step": 24595
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4912906885147095,
      "learning_rate": 0.0005832864263618644,
      "loss": 3.042,
      "step": 24596
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9945130348205566,
      "learning_rate": 0.000583285080049505,
      "loss": 3.1302,
      "step": 24597
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3386749029159546,
      "learning_rate": 0.0005832837336844776,
      "loss": 3.0493,
      "step": 24598
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6723271608352661,
      "learning_rate": 0.0005832823872667825,
      "loss": 3.0082,
      "step": 24599
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4026402235031128,
      "learning_rate": 0.0005832810407964198,
      "loss": 2.9298,
      "step": 24600
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8041880130767822,
      "learning_rate": 0.0005832796942733898,
      "loss": 3.0589,
      "step": 24601
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.466141700744629,
      "learning_rate": 0.0005832783476976928,
      "loss": 3.2084,
      "step": 24602
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7221035957336426,
      "learning_rate": 0.000583277001069329,
      "loss": 3.1347,
      "step": 24603
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.372949242591858,
      "learning_rate": 0.0005832756543882986,
      "loss": 3.197,
      "step": 24604
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.2195730209350586,
      "learning_rate": 0.0005832743076546019,
      "loss": 3.3758,
      "step": 24605
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.1912567615509033,
      "learning_rate": 0.0005832729608682393,
      "loss": 2.8728,
      "step": 24606
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.533074975013733,
      "learning_rate": 0.0005832716140292107,
      "loss": 3.0025,
      "step": 24607
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.297027111053467,
      "learning_rate": 0.0005832702671375167,
      "loss": 3.3621,
      "step": 24608
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.831273078918457,
      "learning_rate": 0.0005832689201931574,
      "loss": 3.0997,
      "step": 24609
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.8563947677612305,
      "learning_rate": 0.0005832675731961331,
      "loss": 3.2477,
      "step": 24610
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4134893417358398,
      "learning_rate": 0.0005832662261464438,
      "loss": 3.0579,
      "step": 24611
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.433993101119995,
      "learning_rate": 0.0005832648790440901,
      "loss": 3.109,
      "step": 24612
    },
    {
      "epoch": 0.32,
      "grad_norm": 4.02060604095459,
      "learning_rate": 0.0005832635318890722,
      "loss": 3.0496,
      "step": 24613
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4657375812530518,
      "learning_rate": 0.0005832621846813902,
      "loss": 3.0871,
      "step": 24614
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.031466245651245,
      "learning_rate": 0.0005832608374210445,
      "loss": 3.0196,
      "step": 24615
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.8195807933807373,
      "learning_rate": 0.000583259490108035,
      "loss": 2.9142,
      "step": 24616
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4322208166122437,
      "learning_rate": 0.0005832581427423624,
      "loss": 3.1727,
      "step": 24617
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0898756980895996,
      "learning_rate": 0.0005832567953240268,
      "loss": 3.1543,
      "step": 24618
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.847113847732544,
      "learning_rate": 0.0005832554478530283,
      "loss": 3.1048,
      "step": 24619
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5577853918075562,
      "learning_rate": 0.0005832541003293674,
      "loss": 3.0867,
      "step": 24620
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.642775058746338,
      "learning_rate": 0.0005832527527530441,
      "loss": 3.2034,
      "step": 24621
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7267628908157349,
      "learning_rate": 0.0005832514051240589,
      "loss": 3.2888,
      "step": 24622
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5970698595046997,
      "learning_rate": 0.0005832500574424117,
      "loss": 3.1848,
      "step": 24623
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3526723384857178,
      "learning_rate": 0.000583248709708103,
      "loss": 2.9084,
      "step": 24624
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5386582612991333,
      "learning_rate": 0.0005832473619211332,
      "loss": 3.2535,
      "step": 24625
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.621099829673767,
      "learning_rate": 0.0005832460140815023,
      "loss": 3.3021,
      "step": 24626
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.1854336261749268,
      "learning_rate": 0.0005832446661892106,
      "loss": 3.122,
      "step": 24627
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3855339288711548,
      "learning_rate": 0.0005832433182442583,
      "loss": 3.0586,
      "step": 24628
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7113066911697388,
      "learning_rate": 0.0005832419702466458,
      "loss": 2.9402,
      "step": 24629
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5098888874053955,
      "learning_rate": 0.0005832406221963733,
      "loss": 2.8881,
      "step": 24630
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4908950328826904,
      "learning_rate": 0.0005832392740934409,
      "loss": 3.1525,
      "step": 24631
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7684828042984009,
      "learning_rate": 0.0005832379259378491,
      "loss": 2.8814,
      "step": 24632
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.4227476119995117,
      "learning_rate": 0.000583236577729598,
      "loss": 3.349,
      "step": 24633
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.679336428642273,
      "learning_rate": 0.0005832352294686878,
      "loss": 3.002,
      "step": 24634
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.539366602897644,
      "learning_rate": 0.0005832338811551189,
      "loss": 3.0495,
      "step": 24635
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.1803483963012695,
      "learning_rate": 0.0005832325327888914,
      "loss": 3.0662,
      "step": 24636
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8082202672958374,
      "learning_rate": 0.0005832311843700057,
      "loss": 3.2497,
      "step": 24637
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3388323783874512,
      "learning_rate": 0.000583229835898462,
      "loss": 3.005,
      "step": 24638
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5291389226913452,
      "learning_rate": 0.0005832284873742604,
      "loss": 3.1591,
      "step": 24639
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5773717164993286,
      "learning_rate": 0.0005832271387974014,
      "loss": 3.4297,
      "step": 24640
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5722509622573853,
      "learning_rate": 0.000583225790167885,
      "loss": 3.007,
      "step": 24641
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.1210615634918213,
      "learning_rate": 0.0005832244414857117,
      "loss": 3.064,
      "step": 24642
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9902878999710083,
      "learning_rate": 0.0005832230927508816,
      "loss": 3.0305,
      "step": 24643
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.595869779586792,
      "learning_rate": 0.000583221743963395,
      "loss": 3.1359,
      "step": 24644
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9004952907562256,
      "learning_rate": 0.0005832203951232521,
      "loss": 3.4115,
      "step": 24645
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.490262031555176,
      "learning_rate": 0.0005832190462304532,
      "loss": 3.206,
      "step": 24646
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.2629995346069336,
      "learning_rate": 0.0005832176972849985,
      "loss": 3.0731,
      "step": 24647
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9023149013519287,
      "learning_rate": 0.0005832163482868883,
      "loss": 3.3131,
      "step": 24648
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.905924916267395,
      "learning_rate": 0.0005832149992361229,
      "loss": 3.0999,
      "step": 24649
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.4050962924957275,
      "learning_rate": 0.0005832136501327024,
      "loss": 3.1038,
      "step": 24650
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3980696201324463,
      "learning_rate": 0.0005832123009766271,
      "loss": 3.3163,
      "step": 24651
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5723870992660522,
      "learning_rate": 0.0005832109517678975,
      "loss": 3.232,
      "step": 24652
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0557515621185303,
      "learning_rate": 0.0005832096025065133,
      "loss": 3.2006,
      "step": 24653
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4134355783462524,
      "learning_rate": 0.0005832082531924753,
      "loss": 3.2393,
      "step": 24654
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5260595083236694,
      "learning_rate": 0.0005832069038257835,
      "loss": 3.0258,
      "step": 24655
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3361936807632446,
      "learning_rate": 0.0005832055544064382,
      "loss": 2.8774,
      "step": 24656
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9639114141464233,
      "learning_rate": 0.0005832042049344397,
      "loss": 3.2458,
      "step": 24657
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4318954944610596,
      "learning_rate": 0.0005832028554097881,
      "loss": 3.0099,
      "step": 24658
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3616777658462524,
      "learning_rate": 0.0005832015058324839,
      "loss": 3.1003,
      "step": 24659
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0923187732696533,
      "learning_rate": 0.000583200156202527,
      "loss": 3.0457,
      "step": 24660
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.827046513557434,
      "learning_rate": 0.000583198806519918,
      "loss": 3.0119,
      "step": 24661
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.419905185699463,
      "learning_rate": 0.0005831974567846568,
      "loss": 2.9906,
      "step": 24662
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8294792175292969,
      "learning_rate": 0.0005831961069967441,
      "loss": 3.1146,
      "step": 24663
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8864870071411133,
      "learning_rate": 0.0005831947571561796,
      "loss": 3.2703,
      "step": 24664
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.695343255996704,
      "learning_rate": 0.000583193407262964,
      "loss": 3.266,
      "step": 24665
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.773880124092102,
      "learning_rate": 0.0005831920573170975,
      "loss": 2.8887,
      "step": 24666
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.6342835426330566,
      "learning_rate": 0.0005831907073185801,
      "loss": 3.1019,
      "step": 24667
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.515984058380127,
      "learning_rate": 0.0005831893572674123,
      "loss": 2.9746,
      "step": 24668
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5336875915527344,
      "learning_rate": 0.0005831880071635943,
      "loss": 3.1205,
      "step": 24669
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.704673409461975,
      "learning_rate": 0.0005831866570071262,
      "loss": 2.6315,
      "step": 24670
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.570176839828491,
      "learning_rate": 0.0005831853067980083,
      "loss": 3.1374,
      "step": 24671
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.431450128555298,
      "learning_rate": 0.000583183956536241,
      "loss": 2.8372,
      "step": 24672
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4594535827636719,
      "learning_rate": 0.0005831826062218244,
      "loss": 3.0288,
      "step": 24673
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6827671527862549,
      "learning_rate": 0.0005831812558547589,
      "loss": 3.1683,
      "step": 24674
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7299705743789673,
      "learning_rate": 0.0005831799054350446,
      "loss": 2.9554,
      "step": 24675
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.800870656967163,
      "learning_rate": 0.0005831785549626818,
      "loss": 3.0797,
      "step": 24676
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.40201997756958,
      "learning_rate": 0.0005831772044376707,
      "loss": 2.9889,
      "step": 24677
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9434510469436646,
      "learning_rate": 0.0005831758538600118,
      "loss": 2.9683,
      "step": 24678
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4443902969360352,
      "learning_rate": 0.000583174503229705,
      "loss": 2.912,
      "step": 24679
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.496439814567566,
      "learning_rate": 0.0005831731525467508,
      "loss": 3.0244,
      "step": 24680
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.036125898361206,
      "learning_rate": 0.0005831718018111493,
      "loss": 2.9469,
      "step": 24681
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.687447428703308,
      "learning_rate": 0.0005831704510229009,
      "loss": 3.01,
      "step": 24682
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.0113918781280518,
      "learning_rate": 0.0005831691001820057,
      "loss": 3.1294,
      "step": 24683
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4664055109024048,
      "learning_rate": 0.0005831677492884641,
      "loss": 3.2197,
      "step": 24684
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7348648309707642,
      "learning_rate": 0.000583166398342276,
      "loss": 3.0764,
      "step": 24685
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.49080491065979,
      "learning_rate": 0.0005831650473434422,
      "loss": 2.8174,
      "step": 24686
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3283121585845947,
      "learning_rate": 0.0005831636962919626,
      "loss": 3.2414,
      "step": 24687
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3367353677749634,
      "learning_rate": 0.0005831623451878376,
      "loss": 3.1874,
      "step": 24688
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7507576942443848,
      "learning_rate": 0.0005831609940310673,
      "loss": 3.0278,
      "step": 24689
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.856794834136963,
      "learning_rate": 0.000583159642821652,
      "loss": 3.0066,
      "step": 24690
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6640064716339111,
      "learning_rate": 0.000583158291559592,
      "loss": 2.9162,
      "step": 24691
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.846655011177063,
      "learning_rate": 0.0005831569402448875,
      "loss": 3.1093,
      "step": 24692
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3560516834259033,
      "learning_rate": 0.0005831555888775389,
      "loss": 3.049,
      "step": 24693
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4434300661087036,
      "learning_rate": 0.0005831542374575461,
      "loss": 3.0455,
      "step": 24694
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4282740354537964,
      "learning_rate": 0.0005831528859849099,
      "loss": 2.9193,
      "step": 24695
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3258976936340332,
      "learning_rate": 0.0005831515344596299,
      "loss": 3.1957,
      "step": 24696
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5208542346954346,
      "learning_rate": 0.0005831501828817069,
      "loss": 2.9718,
      "step": 24697
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7373647689819336,
      "learning_rate": 0.0005831488312511409,
      "loss": 3.0882,
      "step": 24698
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3345144987106323,
      "learning_rate": 0.0005831474795679321,
      "loss": 3.2036,
      "step": 24699
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0737175941467285,
      "learning_rate": 0.000583146127832081,
      "loss": 3.2507,
      "step": 24700
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5220307111740112,
      "learning_rate": 0.0005831447760435875,
      "loss": 2.9824,
      "step": 24701
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6423687934875488,
      "learning_rate": 0.0005831434242024522,
      "loss": 3.2676,
      "step": 24702
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.442407488822937,
      "learning_rate": 0.0005831420723086751,
      "loss": 2.9566,
      "step": 24703
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.349503517150879,
      "learning_rate": 0.0005831407203622566,
      "loss": 3.1436,
      "step": 24704
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.084463119506836,
      "learning_rate": 0.0005831393683631968,
      "loss": 2.9875,
      "step": 24705
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3010085821151733,
      "learning_rate": 0.0005831380163114962,
      "loss": 3.4354,
      "step": 24706
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7834651470184326,
      "learning_rate": 0.0005831366642071548,
      "loss": 3.2626,
      "step": 24707
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.831430196762085,
      "learning_rate": 0.000583135312050173,
      "loss": 2.9985,
      "step": 24708
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2604706287384033,
      "learning_rate": 0.0005831339598405509,
      "loss": 3.0132,
      "step": 24709
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.629794716835022,
      "learning_rate": 0.0005831326075782889,
      "loss": 3.246,
      "step": 24710
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0903332233428955,
      "learning_rate": 0.0005831312552633872,
      "loss": 3.0246,
      "step": 24711
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4758269786834717,
      "learning_rate": 0.0005831299028958461,
      "loss": 2.8623,
      "step": 24712
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.1639528274536133,
      "learning_rate": 0.0005831285504756658,
      "loss": 3.181,
      "step": 24713
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.327941656112671,
      "learning_rate": 0.0005831271980028466,
      "loss": 3.0871,
      "step": 24714
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0237340927124023,
      "learning_rate": 0.0005831258454773886,
      "loss": 3.1494,
      "step": 24715
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.2684459686279297,
      "learning_rate": 0.0005831244928992922,
      "loss": 3.326,
      "step": 24716
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4667913913726807,
      "learning_rate": 0.0005831231402685577,
      "loss": 2.9769,
      "step": 24717
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.694905400276184,
      "learning_rate": 0.0005831217875851851,
      "loss": 3.0482,
      "step": 24718
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.603402853012085,
      "learning_rate": 0.0005831204348491749,
      "loss": 3.0563,
      "step": 24719
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2821063995361328,
      "learning_rate": 0.0005831190820605272,
      "loss": 3.281,
      "step": 24720
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.1985466480255127,
      "learning_rate": 0.0005831177292192425,
      "loss": 3.0488,
      "step": 24721
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.2368929386138916,
      "learning_rate": 0.0005831163763253207,
      "loss": 3.3188,
      "step": 24722
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.1247289180755615,
      "learning_rate": 0.0005831150233787623,
      "loss": 3.0882,
      "step": 24723
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3186888694763184,
      "learning_rate": 0.0005831136703795675,
      "loss": 2.8662,
      "step": 24724
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6007037162780762,
      "learning_rate": 0.0005831123173277364,
      "loss": 3.1025,
      "step": 24725
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5098741054534912,
      "learning_rate": 0.0005831109642232696,
      "loss": 3.0934,
      "step": 24726
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.703296184539795,
      "learning_rate": 0.0005831096110661669,
      "loss": 2.8898,
      "step": 24727
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6418031454086304,
      "learning_rate": 0.0005831082578564289,
      "loss": 3.1219,
      "step": 24728
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0377025604248047,
      "learning_rate": 0.0005831069045940556,
      "loss": 2.8751,
      "step": 24729
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8169289827346802,
      "learning_rate": 0.0005831055512790475,
      "loss": 3.2313,
      "step": 24730
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5640584230422974,
      "learning_rate": 0.0005831041979114047,
      "loss": 2.7975,
      "step": 24731
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.094376564025879,
      "learning_rate": 0.0005831028444911275,
      "loss": 3.064,
      "step": 24732
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9413654804229736,
      "learning_rate": 0.0005831014910182161,
      "loss": 2.5854,
      "step": 24733
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7447237968444824,
      "learning_rate": 0.0005831001374926708,
      "loss": 2.967,
      "step": 24734
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.8250808715820312,
      "learning_rate": 0.0005830987839144919,
      "loss": 2.9294,
      "step": 24735
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.951080083847046,
      "learning_rate": 0.0005830974302836795,
      "loss": 3.167,
      "step": 24736
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4635003805160522,
      "learning_rate": 0.000583096076600234,
      "loss": 2.9799,
      "step": 24737
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6783467531204224,
      "learning_rate": 0.0005830947228641556,
      "loss": 3.1408,
      "step": 24738
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7809391021728516,
      "learning_rate": 0.0005830933690754445,
      "loss": 3.0091,
      "step": 24739
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.3345303535461426,
      "learning_rate": 0.0005830920152341011,
      "loss": 2.9092,
      "step": 24740
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3995054960250854,
      "learning_rate": 0.0005830906613401254,
      "loss": 3.1093,
      "step": 24741
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.05973744392395,
      "learning_rate": 0.0005830893073935179,
      "loss": 3.0888,
      "step": 24742
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6930471658706665,
      "learning_rate": 0.0005830879533942788,
      "loss": 2.8709,
      "step": 24743
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8780815601348877,
      "learning_rate": 0.0005830865993424082,
      "loss": 3.2975,
      "step": 24744
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3942171335220337,
      "learning_rate": 0.0005830852452379065,
      "loss": 3.1057,
      "step": 24745
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.364880919456482,
      "learning_rate": 0.0005830838910807739,
      "loss": 3.1076,
      "step": 24746
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4241117238998413,
      "learning_rate": 0.0005830825368710107,
      "loss": 3.0186,
      "step": 24747
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7238452434539795,
      "learning_rate": 0.0005830811826086171,
      "loss": 3.0942,
      "step": 24748
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4331163167953491,
      "learning_rate": 0.0005830798282935933,
      "loss": 3.108,
      "step": 24749
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3536840677261353,
      "learning_rate": 0.0005830784739259397,
      "loss": 3.2605,
      "step": 24750
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.31026291847229,
      "learning_rate": 0.0005830771195056566,
      "loss": 2.9126,
      "step": 24751
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6042500734329224,
      "learning_rate": 0.0005830757650327438,
      "loss": 3.2581,
      "step": 24752
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4028867483139038,
      "learning_rate": 0.0005830744105072021,
      "loss": 3.1377,
      "step": 24753
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0164260864257812,
      "learning_rate": 0.0005830730559290314,
      "loss": 3.0439,
      "step": 24754
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6490111351013184,
      "learning_rate": 0.0005830717012982322,
      "loss": 3.0848,
      "step": 24755
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7648721933364868,
      "learning_rate": 0.0005830703466148044,
      "loss": 2.8603,
      "step": 24756
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4496861696243286,
      "learning_rate": 0.0005830689918787487,
      "loss": 2.7956,
      "step": 24757
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8397986888885498,
      "learning_rate": 0.0005830676370900651,
      "loss": 3.2799,
      "step": 24758
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.992166519165039,
      "learning_rate": 0.0005830662822487538,
      "loss": 2.9233,
      "step": 24759
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.833661675453186,
      "learning_rate": 0.0005830649273548152,
      "loss": 2.8721,
      "step": 24760
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.507327675819397,
      "learning_rate": 0.0005830635724082495,
      "loss": 3.1529,
      "step": 24761
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7340105772018433,
      "learning_rate": 0.0005830622174090569,
      "loss": 2.9385,
      "step": 24762
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4247850179672241,
      "learning_rate": 0.0005830608623572377,
      "loss": 2.8523,
      "step": 24763
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.367194175720215,
      "learning_rate": 0.0005830595072527921,
      "loss": 3.0091,
      "step": 24764
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.807572841644287,
      "learning_rate": 0.0005830581520957205,
      "loss": 3.0593,
      "step": 24765
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4548031091690063,
      "learning_rate": 0.000583056796886023,
      "loss": 2.9851,
      "step": 24766
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7745321989059448,
      "learning_rate": 0.0005830554416236998,
      "loss": 2.9585,
      "step": 24767
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.763317346572876,
      "learning_rate": 0.0005830540863087514,
      "loss": 3.2128,
      "step": 24768
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4278717041015625,
      "learning_rate": 0.0005830527309411778,
      "loss": 3.2584,
      "step": 24769
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4802075624465942,
      "learning_rate": 0.0005830513755209794,
      "loss": 3.1118,
      "step": 24770
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.801381230354309,
      "learning_rate": 0.0005830500200481564,
      "loss": 3.2202,
      "step": 24771
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.2658743858337402,
      "learning_rate": 0.0005830486645227091,
      "loss": 3.2116,
      "step": 24772
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4825758934020996,
      "learning_rate": 0.0005830473089446376,
      "loss": 3.0137,
      "step": 24773
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0738937854766846,
      "learning_rate": 0.0005830459533139424,
      "loss": 3.0655,
      "step": 24774
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.088172197341919,
      "learning_rate": 0.0005830445976306235,
      "loss": 3.1945,
      "step": 24775
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9002745151519775,
      "learning_rate": 0.0005830432418946814,
      "loss": 2.9226,
      "step": 24776
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.619088053703308,
      "learning_rate": 0.0005830418861061161,
      "loss": 3.254,
      "step": 24777
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9694271087646484,
      "learning_rate": 0.0005830405302649279,
      "loss": 3.0793,
      "step": 24778
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4579010009765625,
      "learning_rate": 0.0005830391743711172,
      "loss": 2.9169,
      "step": 24779
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2558910846710205,
      "learning_rate": 0.0005830378184246843,
      "loss": 3.0952,
      "step": 24780
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6581683158874512,
      "learning_rate": 0.0005830364624256291,
      "loss": 3.1224,
      "step": 24781
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5531476736068726,
      "learning_rate": 0.0005830351063739523,
      "loss": 2.7927,
      "step": 24782
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3401745557785034,
      "learning_rate": 0.0005830337502696537,
      "loss": 3.0037,
      "step": 24783
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.658713936805725,
      "learning_rate": 0.0005830323941127339,
      "loss": 3.0588,
      "step": 24784
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.502427339553833,
      "learning_rate": 0.0005830310379031932,
      "loss": 3.1191,
      "step": 24785
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4179713726043701,
      "learning_rate": 0.0005830296816410315,
      "loss": 2.8352,
      "step": 24786
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8024871349334717,
      "learning_rate": 0.0005830283253262493,
      "loss": 2.9108,
      "step": 24787
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2780159711837769,
      "learning_rate": 0.0005830269689588467,
      "loss": 3.3075,
      "step": 24788
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4053537845611572,
      "learning_rate": 0.0005830256125388242,
      "loss": 3.0439,
      "step": 24789
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4168829917907715,
      "learning_rate": 0.0005830242560661817,
      "loss": 3.0763,
      "step": 24790
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5221326351165771,
      "learning_rate": 0.0005830228995409198,
      "loss": 3.081,
      "step": 24791
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7235606908798218,
      "learning_rate": 0.0005830215429630385,
      "loss": 3.1622,
      "step": 24792
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3345566987991333,
      "learning_rate": 0.0005830201863325382,
      "loss": 3.0355,
      "step": 24793
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3490577936172485,
      "learning_rate": 0.0005830188296494191,
      "loss": 3.0227,
      "step": 24794
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8292759656906128,
      "learning_rate": 0.0005830174729136814,
      "loss": 2.9532,
      "step": 24795
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6566567420959473,
      "learning_rate": 0.0005830161161253255,
      "loss": 2.8866,
      "step": 24796
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6984151601791382,
      "learning_rate": 0.0005830147592843516,
      "loss": 3.0133,
      "step": 24797
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3939111232757568,
      "learning_rate": 0.0005830134023907599,
      "loss": 3.2121,
      "step": 24798
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.2667834758758545,
      "learning_rate": 0.0005830120454445506,
      "loss": 3.2219,
      "step": 24799
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7824584245681763,
      "learning_rate": 0.0005830106884457241,
      "loss": 2.8935,
      "step": 24800
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6789565086364746,
      "learning_rate": 0.0005830093313942804,
      "loss": 2.8847,
      "step": 24801
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2009419202804565,
      "learning_rate": 0.0005830079742902201,
      "loss": 3.1804,
      "step": 24802
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5910370349884033,
      "learning_rate": 0.0005830066171335432,
      "loss": 3.0062,
      "step": 24803
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4536701440811157,
      "learning_rate": 0.00058300525992425,
      "loss": 3.091,
      "step": 24804
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5584181547164917,
      "learning_rate": 0.0005830039026623408,
      "loss": 3.0091,
      "step": 24805
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.3728842735290527,
      "learning_rate": 0.000583002545347816,
      "loss": 3.1313,
      "step": 24806
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4521605968475342,
      "learning_rate": 0.0005830011879806755,
      "loss": 2.922,
      "step": 24807
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4671249389648438,
      "learning_rate": 0.0005829998305609197,
      "loss": 3.0639,
      "step": 24808
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0730767250061035,
      "learning_rate": 0.000582998473088549,
      "loss": 3.0036,
      "step": 24809
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.4908745288848877,
      "learning_rate": 0.0005829971155635634,
      "loss": 3.2878,
      "step": 24810
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6982612609863281,
      "learning_rate": 0.0005829957579859635,
      "loss": 2.9063,
      "step": 24811
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.895945429801941,
      "learning_rate": 0.0005829944003557493,
      "loss": 2.9527,
      "step": 24812
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.4315690994262695,
      "learning_rate": 0.0005829930426729209,
      "loss": 2.9504,
      "step": 24813
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6314538717269897,
      "learning_rate": 0.000582991684937479,
      "loss": 2.9928,
      "step": 24814
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5651757717132568,
      "learning_rate": 0.0005829903271494235,
      "loss": 3.0108,
      "step": 24815
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.046027421951294,
      "learning_rate": 0.0005829889693087548,
      "loss": 3.3136,
      "step": 24816
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.041015148162842,
      "learning_rate": 0.0005829876114154729,
      "loss": 2.9966,
      "step": 24817
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0445430278778076,
      "learning_rate": 0.0005829862534695785,
      "loss": 3.1793,
      "step": 24818
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6887565851211548,
      "learning_rate": 0.0005829848954710716,
      "loss": 3.1089,
      "step": 24819
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7276841402053833,
      "learning_rate": 0.0005829835374199523,
      "loss": 2.9658,
      "step": 24820
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.80698823928833,
      "learning_rate": 0.0005829821793162212,
      "loss": 2.9278,
      "step": 24821
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5887278318405151,
      "learning_rate": 0.0005829808211598782,
      "loss": 3.2145,
      "step": 24822
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8954545259475708,
      "learning_rate": 0.0005829794629509239,
      "loss": 3.1437,
      "step": 24823
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.1276023387908936,
      "learning_rate": 0.0005829781046893583,
      "loss": 3.0336,
      "step": 24824
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5881670713424683,
      "learning_rate": 0.0005829767463751817,
      "loss": 2.9491,
      "step": 24825
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8442131280899048,
      "learning_rate": 0.0005829753880083944,
      "loss": 3.1312,
      "step": 24826
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.073700189590454,
      "learning_rate": 0.0005829740295889966,
      "loss": 2.8943,
      "step": 24827
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7437413930892944,
      "learning_rate": 0.0005829726711169887,
      "loss": 3.2315,
      "step": 24828
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5980453491210938,
      "learning_rate": 0.0005829713125923707,
      "loss": 3.0321,
      "step": 24829
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4763121604919434,
      "learning_rate": 0.0005829699540151431,
      "loss": 3.0179,
      "step": 24830
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.3405539989471436,
      "learning_rate": 0.000582968595385306,
      "loss": 2.9121,
      "step": 24831
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.208237409591675,
      "learning_rate": 0.0005829672367028597,
      "loss": 3.3884,
      "step": 24832
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.237306833267212,
      "learning_rate": 0.0005829658779678044,
      "loss": 3.1673,
      "step": 24833
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.615711212158203,
      "learning_rate": 0.0005829645191801404,
      "loss": 2.9276,
      "step": 24834
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0918867588043213,
      "learning_rate": 0.000582963160339868,
      "loss": 3.1642,
      "step": 24835
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8314392566680908,
      "learning_rate": 0.0005829618014469874,
      "loss": 2.9928,
      "step": 24836
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6988952159881592,
      "learning_rate": 0.000582960442501499,
      "loss": 3.0749,
      "step": 24837
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.7102904319763184,
      "learning_rate": 0.0005829590835034027,
      "loss": 3.1944,
      "step": 24838
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0912091732025146,
      "learning_rate": 0.000582957724452699,
      "loss": 2.8889,
      "step": 24839
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4350554943084717,
      "learning_rate": 0.0005829563653493883,
      "loss": 3.0621,
      "step": 24840
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.3238823413848877,
      "learning_rate": 0.0005829550061934705,
      "loss": 2.8928,
      "step": 24841
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6289652585983276,
      "learning_rate": 0.000582953646984946,
      "loss": 3.1102,
      "step": 24842
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5384138822555542,
      "learning_rate": 0.0005829522877238151,
      "loss": 2.9069,
      "step": 24843
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.361711025238037,
      "learning_rate": 0.0005829509284100781,
      "loss": 3.1531,
      "step": 24844
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.594041347503662,
      "learning_rate": 0.0005829495690437351,
      "loss": 3.0604,
      "step": 24845
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3183802366256714,
      "learning_rate": 0.0005829482096247865,
      "loss": 2.8955,
      "step": 24846
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7512480020523071,
      "learning_rate": 0.0005829468501532325,
      "loss": 2.7158,
      "step": 24847
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2468849420547485,
      "learning_rate": 0.0005829454906290733,
      "loss": 3.0793,
      "step": 24848
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4114842414855957,
      "learning_rate": 0.0005829441310523091,
      "loss": 3.0934,
      "step": 24849
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5518332719802856,
      "learning_rate": 0.0005829427714229404,
      "loss": 3.2324,
      "step": 24850
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3335562944412231,
      "learning_rate": 0.0005829414117409673,
      "loss": 3.1443,
      "step": 24851
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7805131673812866,
      "learning_rate": 0.00058294005200639,
      "loss": 3.1309,
      "step": 24852
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.381560206413269,
      "learning_rate": 0.0005829386922192087,
      "loss": 2.7797,
      "step": 24853
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.293745517730713,
      "learning_rate": 0.000582937332379424,
      "loss": 3.1651,
      "step": 24854
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.1859798431396484,
      "learning_rate": 0.0005829359724870356,
      "loss": 3.1823,
      "step": 24855
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8451082706451416,
      "learning_rate": 0.0005829346125420442,
      "loss": 3.077,
      "step": 24856
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2838743925094604,
      "learning_rate": 0.00058293325254445,
      "loss": 3.2798,
      "step": 24857
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.58344566822052,
      "learning_rate": 0.0005829318924942531,
      "loss": 2.8909,
      "step": 24858
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7008862495422363,
      "learning_rate": 0.0005829305323914538,
      "loss": 3.2194,
      "step": 24859
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8735313415527344,
      "learning_rate": 0.0005829291722360524,
      "loss": 2.9631,
      "step": 24860
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9491360187530518,
      "learning_rate": 0.0005829278120280491,
      "loss": 3.0371,
      "step": 24861
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4229316711425781,
      "learning_rate": 0.0005829264517674443,
      "loss": 3.1193,
      "step": 24862
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7144724130630493,
      "learning_rate": 0.0005829250914542379,
      "loss": 2.8972,
      "step": 24863
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.752107858657837,
      "learning_rate": 0.0005829237310884306,
      "loss": 3.0282,
      "step": 24864
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6147648096084595,
      "learning_rate": 0.0005829223706700223,
      "loss": 3.0208,
      "step": 24865
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.2340328693389893,
      "learning_rate": 0.0005829210101990134,
      "loss": 3.3069,
      "step": 24866
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4201830625534058,
      "learning_rate": 0.0005829196496754042,
      "loss": 3.1418,
      "step": 24867
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.4214084148406982,
      "learning_rate": 0.000582918289099195,
      "loss": 2.6467,
      "step": 24868
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8481582403182983,
      "learning_rate": 0.0005829169284703858,
      "loss": 3.1091,
      "step": 24869
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.74758780002594,
      "learning_rate": 0.000582915567788977,
      "loss": 3.3002,
      "step": 24870
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.3256685733795166,
      "learning_rate": 0.000582914207054969,
      "loss": 3.071,
      "step": 24871
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8743740320205688,
      "learning_rate": 0.0005829128462683617,
      "loss": 3.2657,
      "step": 24872
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5650848150253296,
      "learning_rate": 0.0005829114854291557,
      "loss": 2.8826,
      "step": 24873
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6267086267471313,
      "learning_rate": 0.0005829101245373511,
      "loss": 2.8958,
      "step": 24874
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.388080358505249,
      "learning_rate": 0.0005829087635929481,
      "loss": 2.7234,
      "step": 24875
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5153050422668457,
      "learning_rate": 0.000582907402595947,
      "loss": 3.2642,
      "step": 24876
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.027705669403076,
      "learning_rate": 0.0005829060415463482,
      "loss": 3.3754,
      "step": 24877
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.012083053588867,
      "learning_rate": 0.0005829046804441518,
      "loss": 3.2695,
      "step": 24878
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.106273651123047,
      "learning_rate": 0.000582903319289358,
      "loss": 3.0304,
      "step": 24879
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.622279167175293,
      "learning_rate": 0.0005829019580819672,
      "loss": 2.9857,
      "step": 24880
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0799269676208496,
      "learning_rate": 0.0005829005968219796,
      "loss": 3.4047,
      "step": 24881
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7262766361236572,
      "learning_rate": 0.0005828992355093954,
      "loss": 3.07,
      "step": 24882
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6965283155441284,
      "learning_rate": 0.0005828978741442148,
      "loss": 3.0151,
      "step": 24883
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.401891827583313,
      "learning_rate": 0.0005828965127264383,
      "loss": 3.083,
      "step": 24884
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3726590871810913,
      "learning_rate": 0.000582895151256066,
      "loss": 2.9065,
      "step": 24885
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3677183389663696,
      "learning_rate": 0.000582893789733098,
      "loss": 3.0095,
      "step": 24886
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.93435800075531,
      "learning_rate": 0.0005828924281575348,
      "loss": 3.0948,
      "step": 24887
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.571376085281372,
      "learning_rate": 0.0005828910665293766,
      "loss": 3.0137,
      "step": 24888
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.483565330505371,
      "learning_rate": 0.0005828897048486235,
      "loss": 3.0691,
      "step": 24889
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.041273832321167,
      "learning_rate": 0.0005828883431152759,
      "loss": 3.0115,
      "step": 24890
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9644863605499268,
      "learning_rate": 0.0005828869813293341,
      "loss": 3.211,
      "step": 24891
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.486799716949463,
      "learning_rate": 0.0005828856194907983,
      "loss": 3.035,
      "step": 24892
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4955123662948608,
      "learning_rate": 0.0005828842575996686,
      "loss": 3.3865,
      "step": 24893
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.474128246307373,
      "learning_rate": 0.0005828828956559454,
      "loss": 2.9459,
      "step": 24894
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.471825122833252,
      "learning_rate": 0.0005828815336596289,
      "loss": 3.327,
      "step": 24895
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4256243705749512,
      "learning_rate": 0.0005828801716107195,
      "loss": 3.1824,
      "step": 24896
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7348064184188843,
      "learning_rate": 0.0005828788095092173,
      "loss": 3.2101,
      "step": 24897
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3696999549865723,
      "learning_rate": 0.0005828774473551225,
      "loss": 3.0661,
      "step": 24898
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7522958517074585,
      "learning_rate": 0.0005828760851484355,
      "loss": 3.1085,
      "step": 24899
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.51682448387146,
      "learning_rate": 0.0005828747228891565,
      "loss": 3.031,
      "step": 24900
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3391714096069336,
      "learning_rate": 0.0005828733605772857,
      "loss": 3.1019,
      "step": 24901
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0239999294281006,
      "learning_rate": 0.0005828719982128235,
      "loss": 3.0566,
      "step": 24902
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6532407999038696,
      "learning_rate": 0.00058287063579577,
      "loss": 3.2944,
      "step": 24903
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9044687747955322,
      "learning_rate": 0.0005828692733261255,
      "loss": 3.3758,
      "step": 24904
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0576117038726807,
      "learning_rate": 0.0005828679108038901,
      "loss": 2.957,
      "step": 24905
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6800340414047241,
      "learning_rate": 0.0005828665482290645,
      "loss": 3.1066,
      "step": 24906
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6420966386795044,
      "learning_rate": 0.0005828651856016485,
      "loss": 3.151,
      "step": 24907
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5275565385818481,
      "learning_rate": 0.0005828638229216426,
      "loss": 2.9463,
      "step": 24908
    },
    {
      "epoch": 0.32,
      "grad_norm": 3.0685746669769287,
      "learning_rate": 0.000582862460189047,
      "loss": 2.6929,
      "step": 24909
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7531195878982544,
      "learning_rate": 0.0005828610974038618,
      "loss": 2.983,
      "step": 24910
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3286560773849487,
      "learning_rate": 0.0005828597345660876,
      "loss": 3.3201,
      "step": 24911
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7012522220611572,
      "learning_rate": 0.0005828583716757243,
      "loss": 2.9899,
      "step": 24912
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8718005418777466,
      "learning_rate": 0.0005828570087327722,
      "loss": 3.1651,
      "step": 24913
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5447715520858765,
      "learning_rate": 0.0005828556457372318,
      "loss": 3.1794,
      "step": 24914
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7620891332626343,
      "learning_rate": 0.0005828542826891031,
      "loss": 3.006,
      "step": 24915
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3709839582443237,
      "learning_rate": 0.0005828529195883865,
      "loss": 3.0442,
      "step": 24916
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3274056911468506,
      "learning_rate": 0.0005828515564350822,
      "loss": 3.09,
      "step": 24917
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.333583950996399,
      "learning_rate": 0.0005828501932291905,
      "loss": 3.1588,
      "step": 24918
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5430761575698853,
      "learning_rate": 0.0005828488299707115,
      "loss": 3.075,
      "step": 24919
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.560924768447876,
      "learning_rate": 0.0005828474666596457,
      "loss": 3.2148,
      "step": 24920
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.1697568893432617,
      "learning_rate": 0.000582846103295993,
      "loss": 3.1325,
      "step": 24921
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.069188356399536,
      "learning_rate": 0.000582844739879754,
      "loss": 3.1776,
      "step": 24922
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5739881992340088,
      "learning_rate": 0.0005828433764109288,
      "loss": 3.0613,
      "step": 24923
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8363535404205322,
      "learning_rate": 0.0005828420128895177,
      "loss": 3.1356,
      "step": 24924
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.512709379196167,
      "learning_rate": 0.0005828406493155209,
      "loss": 3.1167,
      "step": 24925
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3698875904083252,
      "learning_rate": 0.0005828392856889387,
      "loss": 2.9439,
      "step": 24926
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8700449466705322,
      "learning_rate": 0.0005828379220097714,
      "loss": 3.1672,
      "step": 24927
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8134065866470337,
      "learning_rate": 0.0005828365582780192,
      "loss": 3.239,
      "step": 24928
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.418412446975708,
      "learning_rate": 0.0005828351944936823,
      "loss": 3.1284,
      "step": 24929
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.9078184366226196,
      "learning_rate": 0.0005828338306567609,
      "loss": 2.9273,
      "step": 24930
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3952909708023071,
      "learning_rate": 0.0005828324667672554,
      "loss": 3.1891,
      "step": 24931
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.738786220550537,
      "learning_rate": 0.0005828311028251662,
      "loss": 3.4803,
      "step": 24932
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6998013257980347,
      "learning_rate": 0.0005828297388304931,
      "loss": 2.9173,
      "step": 24933
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5470407009124756,
      "learning_rate": 0.0005828283747832368,
      "loss": 2.8299,
      "step": 24934
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7535821199417114,
      "learning_rate": 0.0005828270106833973,
      "loss": 2.9944,
      "step": 24935
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7866259813308716,
      "learning_rate": 0.0005828256465309748,
      "loss": 3.235,
      "step": 24936
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2338650226593018,
      "learning_rate": 0.0005828242823259698,
      "loss": 3.2727,
      "step": 24937
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4381394386291504,
      "learning_rate": 0.0005828229180683824,
      "loss": 2.9163,
      "step": 24938
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8046605587005615,
      "learning_rate": 0.0005828215537582129,
      "loss": 2.8915,
      "step": 24939
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.0579733848571777,
      "learning_rate": 0.0005828201893954615,
      "loss": 3.0421,
      "step": 24940
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3556861877441406,
      "learning_rate": 0.0005828188249801286,
      "loss": 3.0772,
      "step": 24941
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.9141509532928467,
      "learning_rate": 0.0005828174605122142,
      "loss": 2.908,
      "step": 24942
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.2972067594528198,
      "learning_rate": 0.0005828160959917187,
      "loss": 2.9284,
      "step": 24943
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.443058967590332,
      "learning_rate": 0.0005828147314186424,
      "loss": 3.2847,
      "step": 24944
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.181002616882324,
      "learning_rate": 0.0005828133667929855,
      "loss": 3.0865,
      "step": 24945
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3069241046905518,
      "learning_rate": 0.0005828120021147483,
      "loss": 3.1749,
      "step": 24946
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4964336156845093,
      "learning_rate": 0.0005828106373839309,
      "loss": 3.1687,
      "step": 24947
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5852916240692139,
      "learning_rate": 0.0005828092726005338,
      "loss": 3.0623,
      "step": 24948
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.8355270624160767,
      "learning_rate": 0.0005828079077645571,
      "loss": 3.171,
      "step": 24949
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5694998502731323,
      "learning_rate": 0.0005828065428760009,
      "loss": 2.9056,
      "step": 24950
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.6218245029449463,
      "learning_rate": 0.0005828051779348658,
      "loss": 3.0661,
      "step": 24951
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.346889615058899,
      "learning_rate": 0.0005828038129411518,
      "loss": 3.0424,
      "step": 24952
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.7597312927246094,
      "learning_rate": 0.0005828024478948592,
      "loss": 3.1362,
      "step": 24953
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.3777198791503906,
      "learning_rate": 0.0005828010827959884,
      "loss": 3.075,
      "step": 24954
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.315022349357605,
      "learning_rate": 0.0005827997176445396,
      "loss": 2.901,
      "step": 24955
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.5357627868652344,
      "learning_rate": 0.0005827983524405127,
      "loss": 2.931,
      "step": 24956
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4911354780197144,
      "learning_rate": 0.0005827969871839084,
      "loss": 2.9609,
      "step": 24957
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.44620680809021,
      "learning_rate": 0.0005827956218747268,
      "loss": 3.1956,
      "step": 24958
    },
    {
      "epoch": 0.32,
      "grad_norm": 2.1337921619415283,
      "learning_rate": 0.0005827942565129683,
      "loss": 2.8959,
      "step": 24959
    },
    {
      "epoch": 0.32,
      "grad_norm": 1.4201306104660034,
      "learning_rate": 0.0005827928910986329,
      "loss": 3.043,
      "step": 24960
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.380851984024048,
      "learning_rate": 0.0005827915256317209,
      "loss": 2.9982,
      "step": 24961
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.043149948120117,
      "learning_rate": 0.0005827901601122327,
      "loss": 3.1502,
      "step": 24962
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4277769327163696,
      "learning_rate": 0.0005827887945401685,
      "loss": 3.1046,
      "step": 24963
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.482398509979248,
      "learning_rate": 0.0005827874289155285,
      "loss": 2.9057,
      "step": 24964
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.260788679122925,
      "learning_rate": 0.0005827860632383129,
      "loss": 2.9516,
      "step": 24965
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3574599027633667,
      "learning_rate": 0.000582784697508522,
      "loss": 2.8634,
      "step": 24966
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.707385540008545,
      "learning_rate": 0.0005827833317261562,
      "loss": 2.8706,
      "step": 24967
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4451372623443604,
      "learning_rate": 0.0005827819658912156,
      "loss": 2.9158,
      "step": 24968
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6038939952850342,
      "learning_rate": 0.0005827806000037005,
      "loss": 3.1018,
      "step": 24969
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0287585258483887,
      "learning_rate": 0.0005827792340636111,
      "loss": 3.0528,
      "step": 24970
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.3789517879486084,
      "learning_rate": 0.0005827778680709478,
      "loss": 3.1204,
      "step": 24971
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.249250888824463,
      "learning_rate": 0.0005827765020257107,
      "loss": 2.7095,
      "step": 24972
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.8175668716430664,
      "learning_rate": 0.0005827751359279001,
      "loss": 3.0334,
      "step": 24973
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9389441013336182,
      "learning_rate": 0.0005827737697775163,
      "loss": 2.8898,
      "step": 24974
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0252788066864014,
      "learning_rate": 0.0005827724035745596,
      "loss": 3.3863,
      "step": 24975
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.4238672256469727,
      "learning_rate": 0.0005827710373190301,
      "loss": 3.0317,
      "step": 24976
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.3684890270233154,
      "learning_rate": 0.000582769671010928,
      "loss": 2.949,
      "step": 24977
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3581373691558838,
      "learning_rate": 0.0005827683046502538,
      "loss": 3.1225,
      "step": 24978
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.650971531867981,
      "learning_rate": 0.0005827669382370076,
      "loss": 3.053,
      "step": 24979
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.3482253551483154,
      "learning_rate": 0.0005827655717711897,
      "loss": 3.2992,
      "step": 24980
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7722461223602295,
      "learning_rate": 0.0005827642052528004,
      "loss": 3.0814,
      "step": 24981
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.733483076095581,
      "learning_rate": 0.0005827628386818398,
      "loss": 2.9528,
      "step": 24982
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9780384302139282,
      "learning_rate": 0.0005827614720583083,
      "loss": 2.9622,
      "step": 24983
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.3233726024627686,
      "learning_rate": 0.0005827601053822061,
      "loss": 3.0387,
      "step": 24984
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2279185056686401,
      "learning_rate": 0.0005827587386535335,
      "loss": 3.224,
      "step": 24985
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4818860292434692,
      "learning_rate": 0.0005827573718722907,
      "loss": 3.1328,
      "step": 24986
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.110567331314087,
      "learning_rate": 0.0005827560050384779,
      "loss": 3.1686,
      "step": 24987
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4833776950836182,
      "learning_rate": 0.0005827546381520954,
      "loss": 3.2819,
      "step": 24988
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.41472327709198,
      "learning_rate": 0.0005827532712131436,
      "loss": 3.2139,
      "step": 24989
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4276206493377686,
      "learning_rate": 0.0005827519042216225,
      "loss": 3.2006,
      "step": 24990
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0968844890594482,
      "learning_rate": 0.0005827505371775326,
      "loss": 2.9966,
      "step": 24991
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3215707540512085,
      "learning_rate": 0.000582749170080874,
      "loss": 3.0058,
      "step": 24992
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.509952187538147,
      "learning_rate": 0.000582747802931647,
      "loss": 3.1796,
      "step": 24993
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8074404001235962,
      "learning_rate": 0.0005827464357298518,
      "loss": 3.1118,
      "step": 24994
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8381550312042236,
      "learning_rate": 0.0005827450684754887,
      "loss": 3.1025,
      "step": 24995
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8466359376907349,
      "learning_rate": 0.000582743701168558,
      "loss": 2.7849,
      "step": 24996
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6272716522216797,
      "learning_rate": 0.0005827423338090598,
      "loss": 3.0662,
      "step": 24997
    },
    {
      "epoch": 0.33,
      "grad_norm": 4.080760478973389,
      "learning_rate": 0.0005827409663969946,
      "loss": 3.1896,
      "step": 24998
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.3514328002929688,
      "learning_rate": 0.0005827395989323625,
      "loss": 3.0458,
      "step": 24999
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6279922723770142,
      "learning_rate": 0.0005827382314151637,
      "loss": 3.1895,
      "step": 25000
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1985294818878174,
      "learning_rate": 0.0005827368638453987,
      "loss": 3.0206,
      "step": 25001
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.686199426651001,
      "learning_rate": 0.0005827354962230674,
      "loss": 2.8924,
      "step": 25002
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6730936765670776,
      "learning_rate": 0.0005827341285481702,
      "loss": 3.1839,
      "step": 25003
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.457458734512329,
      "learning_rate": 0.0005827327608207075,
      "loss": 3.032,
      "step": 25004
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.935502052307129,
      "learning_rate": 0.0005827313930406794,
      "loss": 2.9576,
      "step": 25005
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.388719081878662,
      "learning_rate": 0.0005827300252080862,
      "loss": 3.2382,
      "step": 25006
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5060044527053833,
      "learning_rate": 0.0005827286573229281,
      "loss": 3.3614,
      "step": 25007
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6390964984893799,
      "learning_rate": 0.0005827272893852056,
      "loss": 3.2067,
      "step": 25008
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3481966257095337,
      "learning_rate": 0.0005827259213949186,
      "loss": 2.9497,
      "step": 25009
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4072643518447876,
      "learning_rate": 0.0005827245533520675,
      "loss": 2.9231,
      "step": 25010
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3599083423614502,
      "learning_rate": 0.0005827231852566526,
      "loss": 3.0469,
      "step": 25011
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4533952474594116,
      "learning_rate": 0.0005827218171086741,
      "loss": 2.9703,
      "step": 25012
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5151963233947754,
      "learning_rate": 0.0005827204489081324,
      "loss": 2.9799,
      "step": 25013
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.453021764755249,
      "learning_rate": 0.0005827190806550275,
      "loss": 3.0494,
      "step": 25014
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.682829737663269,
      "learning_rate": 0.0005827177123493599,
      "loss": 2.9427,
      "step": 25015
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9000489711761475,
      "learning_rate": 0.0005827163439911296,
      "loss": 2.8886,
      "step": 25016
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7788786888122559,
      "learning_rate": 0.000582714975580337,
      "loss": 2.9636,
      "step": 25017
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3906660079956055,
      "learning_rate": 0.0005827136071169824,
      "loss": 3.0227,
      "step": 25018
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0766053199768066,
      "learning_rate": 0.0005827122386010661,
      "loss": 2.8093,
      "step": 25019
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.585626244544983,
      "learning_rate": 0.0005827108700325881,
      "loss": 3.2233,
      "step": 25020
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4011248350143433,
      "learning_rate": 0.0005827095014115489,
      "loss": 2.9169,
      "step": 25021
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7051838636398315,
      "learning_rate": 0.0005827081327379487,
      "loss": 3.1522,
      "step": 25022
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5058737993240356,
      "learning_rate": 0.0005827067640117876,
      "loss": 3.2341,
      "step": 25023
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5768108367919922,
      "learning_rate": 0.0005827053952330662,
      "loss": 3.1745,
      "step": 25024
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4444369077682495,
      "learning_rate": 0.0005827040264017843,
      "loss": 3.0784,
      "step": 25025
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.613747239112854,
      "learning_rate": 0.0005827026575179425,
      "loss": 2.9581,
      "step": 25026
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5905107259750366,
      "learning_rate": 0.0005827012885815409,
      "loss": 3.0172,
      "step": 25027
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5535080432891846,
      "learning_rate": 0.0005826999195925798,
      "loss": 3.0634,
      "step": 25028
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.790288209915161,
      "learning_rate": 0.0005826985505510594,
      "loss": 2.8468,
      "step": 25029
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.623100757598877,
      "learning_rate": 0.00058269718145698,
      "loss": 3.224,
      "step": 25030
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8738919496536255,
      "learning_rate": 0.000582695812310342,
      "loss": 3.1312,
      "step": 25031
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.5076687335968018,
      "learning_rate": 0.0005826944431111454,
      "loss": 3.2289,
      "step": 25032
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8491177558898926,
      "learning_rate": 0.0005826930738593905,
      "loss": 3.2171,
      "step": 25033
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4158662557601929,
      "learning_rate": 0.0005826917045550778,
      "loss": 3.1749,
      "step": 25034
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5985677242279053,
      "learning_rate": 0.0005826903351982073,
      "loss": 3.4039,
      "step": 25035
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8891583681106567,
      "learning_rate": 0.0005826889657887793,
      "loss": 3.2503,
      "step": 25036
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1787453889846802,
      "learning_rate": 0.0005826875963267941,
      "loss": 2.804,
      "step": 25037
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7605221271514893,
      "learning_rate": 0.0005826862268122519,
      "loss": 3.2581,
      "step": 25038
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4310321807861328,
      "learning_rate": 0.0005826848572451531,
      "loss": 2.7615,
      "step": 25039
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4268945455551147,
      "learning_rate": 0.0005826834876254978,
      "loss": 3.0873,
      "step": 25040
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.399242639541626,
      "learning_rate": 0.0005826821179532862,
      "loss": 3.1783,
      "step": 25041
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5571298599243164,
      "learning_rate": 0.0005826807482285188,
      "loss": 3.2724,
      "step": 25042
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4130107164382935,
      "learning_rate": 0.0005826793784511957,
      "loss": 3.0175,
      "step": 25043
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5400248765945435,
      "learning_rate": 0.0005826780086213171,
      "loss": 3.2203,
      "step": 25044
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5439311265945435,
      "learning_rate": 0.0005826766387388834,
      "loss": 3.009,
      "step": 25045
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.292929768562317,
      "learning_rate": 0.0005826752688038947,
      "loss": 3.0649,
      "step": 25046
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9425458908081055,
      "learning_rate": 0.0005826738988163513,
      "loss": 3.1539,
      "step": 25047
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7704442739486694,
      "learning_rate": 0.0005826725287762535,
      "loss": 3.1676,
      "step": 25048
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.60463285446167,
      "learning_rate": 0.0005826711586836016,
      "loss": 2.9888,
      "step": 25049
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8110723495483398,
      "learning_rate": 0.0005826697885383958,
      "loss": 3.0662,
      "step": 25050
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2020988464355469,
      "learning_rate": 0.0005826684183406364,
      "loss": 3.257,
      "step": 25051
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0536112785339355,
      "learning_rate": 0.0005826670480903235,
      "loss": 2.9311,
      "step": 25052
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5144444704055786,
      "learning_rate": 0.0005826656777874576,
      "loss": 2.7496,
      "step": 25053
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6530892848968506,
      "learning_rate": 0.0005826643074320387,
      "loss": 3.0247,
      "step": 25054
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7530654668807983,
      "learning_rate": 0.0005826629370240671,
      "loss": 3.2773,
      "step": 25055
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.38338303565979,
      "learning_rate": 0.0005826615665635431,
      "loss": 3.274,
      "step": 25056
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.776771903038025,
      "learning_rate": 0.0005826601960504672,
      "loss": 2.9295,
      "step": 25057
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.565673589706421,
      "learning_rate": 0.0005826588254848392,
      "loss": 3.1214,
      "step": 25058
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4530417919158936,
      "learning_rate": 0.0005826574548666597,
      "loss": 3.1362,
      "step": 25059
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.296067237854004,
      "learning_rate": 0.000582656084195929,
      "loss": 3.1104,
      "step": 25060
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4628963470458984,
      "learning_rate": 0.000582654713472647,
      "loss": 3.0096,
      "step": 25061
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6401299238204956,
      "learning_rate": 0.0005826533426968142,
      "loss": 2.9697,
      "step": 25062
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6942209005355835,
      "learning_rate": 0.0005826519718684308,
      "loss": 3.0862,
      "step": 25063
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6241209506988525,
      "learning_rate": 0.0005826506009874971,
      "loss": 2.6825,
      "step": 25064
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.421554684638977,
      "learning_rate": 0.0005826492300540132,
      "loss": 3.2324,
      "step": 25065
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5266118049621582,
      "learning_rate": 0.0005826478590679796,
      "loss": 3.1434,
      "step": 25066
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2544294595718384,
      "learning_rate": 0.0005826464880293964,
      "loss": 3.321,
      "step": 25067
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6677799224853516,
      "learning_rate": 0.0005826451169382639,
      "loss": 3.0462,
      "step": 25068
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.57256019115448,
      "learning_rate": 0.0005826437457945823,
      "loss": 2.8736,
      "step": 25069
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7374871969223022,
      "learning_rate": 0.000582642374598352,
      "loss": 3.2614,
      "step": 25070
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7581647634506226,
      "learning_rate": 0.0005826410033495731,
      "loss": 3.1886,
      "step": 25071
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.876746416091919,
      "learning_rate": 0.0005826396320482457,
      "loss": 3.1446,
      "step": 25072
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.641977548599243,
      "learning_rate": 0.0005826382606943705,
      "loss": 3.1588,
      "step": 25073
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.700478434562683,
      "learning_rate": 0.0005826368892879474,
      "loss": 3.0209,
      "step": 25074
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3002434968948364,
      "learning_rate": 0.0005826355178289767,
      "loss": 3.1144,
      "step": 25075
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6208306550979614,
      "learning_rate": 0.0005826341463174589,
      "loss": 3.2805,
      "step": 25076
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.099687099456787,
      "learning_rate": 0.000582632774753394,
      "loss": 2.799,
      "step": 25077
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.343700408935547,
      "learning_rate": 0.0005826314031367823,
      "loss": 3.0293,
      "step": 25078
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8130027055740356,
      "learning_rate": 0.0005826300314676241,
      "loss": 3.252,
      "step": 25079
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.785054922103882,
      "learning_rate": 0.0005826286597459196,
      "loss": 3.0483,
      "step": 25080
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1003408432006836,
      "learning_rate": 0.0005826272879716692,
      "loss": 3.053,
      "step": 25081
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1140241622924805,
      "learning_rate": 0.0005826259161448729,
      "loss": 3.245,
      "step": 25082
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.222905397415161,
      "learning_rate": 0.0005826245442655313,
      "loss": 3.0884,
      "step": 25083
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.60036301612854,
      "learning_rate": 0.0005826231723336443,
      "loss": 3.2316,
      "step": 25084
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6106209754943848,
      "learning_rate": 0.0005826218003492124,
      "loss": 3.2371,
      "step": 25085
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0329060554504395,
      "learning_rate": 0.0005826204283122358,
      "loss": 2.8986,
      "step": 25086
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5348838567733765,
      "learning_rate": 0.0005826190562227147,
      "loss": 2.8494,
      "step": 25087
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.731661319732666,
      "learning_rate": 0.0005826176840806493,
      "loss": 3.3229,
      "step": 25088
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.366036295890808,
      "learning_rate": 0.00058261631188604,
      "loss": 3.0416,
      "step": 25089
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9746278524398804,
      "learning_rate": 0.0005826149396388871,
      "loss": 2.9017,
      "step": 25090
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5013957023620605,
      "learning_rate": 0.0005826135673391907,
      "loss": 2.9002,
      "step": 25091
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5852439403533936,
      "learning_rate": 0.0005826121949869509,
      "loss": 2.9945,
      "step": 25092
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2783235311508179,
      "learning_rate": 0.0005826108225821683,
      "loss": 2.9325,
      "step": 25093
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.262887716293335,
      "learning_rate": 0.0005826094501248431,
      "loss": 2.92,
      "step": 25094
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.626323938369751,
      "learning_rate": 0.0005826080776149754,
      "loss": 3.0489,
      "step": 25095
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4879379272460938,
      "learning_rate": 0.0005826067050525654,
      "loss": 2.9183,
      "step": 25096
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3830682039260864,
      "learning_rate": 0.0005826053324376137,
      "loss": 2.9895,
      "step": 25097
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.4661059379577637,
      "learning_rate": 0.0005826039597701202,
      "loss": 3.0101,
      "step": 25098
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.005282163619995,
      "learning_rate": 0.0005826025870500854,
      "loss": 3.0633,
      "step": 25099
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3781288862228394,
      "learning_rate": 0.0005826012142775092,
      "loss": 3.3674,
      "step": 25100
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2603979110717773,
      "learning_rate": 0.0005825998414523924,
      "loss": 2.9695,
      "step": 25101
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.511617660522461,
      "learning_rate": 0.0005825984685747347,
      "loss": 3.303,
      "step": 25102
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.563234567642212,
      "learning_rate": 0.0005825970956445368,
      "loss": 2.9981,
      "step": 25103
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5215246677398682,
      "learning_rate": 0.0005825957226617987,
      "loss": 3.1832,
      "step": 25104
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.091848134994507,
      "learning_rate": 0.0005825943496265207,
      "loss": 2.9164,
      "step": 25105
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5236358642578125,
      "learning_rate": 0.000582592976538703,
      "loss": 3.0532,
      "step": 25106
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1675503253936768,
      "learning_rate": 0.0005825916033983461,
      "loss": 3.2361,
      "step": 25107
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.825130820274353,
      "learning_rate": 0.00058259023020545,
      "loss": 3.2483,
      "step": 25108
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6416832208633423,
      "learning_rate": 0.000582588856960015,
      "loss": 3.0254,
      "step": 25109
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2639533281326294,
      "learning_rate": 0.0005825874836620415,
      "loss": 3.0793,
      "step": 25110
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5122064352035522,
      "learning_rate": 0.0005825861103115295,
      "loss": 3.0285,
      "step": 25111
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3318978548049927,
      "learning_rate": 0.0005825847369084796,
      "loss": 3.0082,
      "step": 25112
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6752115488052368,
      "learning_rate": 0.0005825833634528918,
      "loss": 3.1643,
      "step": 25113
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3277769088745117,
      "learning_rate": 0.0005825819899447663,
      "loss": 2.899,
      "step": 25114
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5863285064697266,
      "learning_rate": 0.0005825806163841037,
      "loss": 3.3624,
      "step": 25115
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.451310396194458,
      "learning_rate": 0.0005825792427709038,
      "loss": 3.2593,
      "step": 25116
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8609442710876465,
      "learning_rate": 0.0005825778691051672,
      "loss": 3.1217,
      "step": 25117
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3997429609298706,
      "learning_rate": 0.0005825764953868941,
      "loss": 3.003,
      "step": 25118
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4033608436584473,
      "learning_rate": 0.0005825751216160845,
      "loss": 3.3087,
      "step": 25119
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3760985136032104,
      "learning_rate": 0.0005825737477927389,
      "loss": 3.0944,
      "step": 25120
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4796041250228882,
      "learning_rate": 0.0005825723739168576,
      "loss": 3.0027,
      "step": 25121
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5431045293807983,
      "learning_rate": 0.0005825709999884408,
      "loss": 3.1941,
      "step": 25122
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5521228313446045,
      "learning_rate": 0.0005825696260074886,
      "loss": 3.0083,
      "step": 25123
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4311532974243164,
      "learning_rate": 0.0005825682519740014,
      "loss": 3.2309,
      "step": 25124
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3161609172821045,
      "learning_rate": 0.0005825668778879795,
      "loss": 2.7421,
      "step": 25125
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7261762619018555,
      "learning_rate": 0.000582565503749423,
      "loss": 3.2913,
      "step": 25126
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6116704940795898,
      "learning_rate": 0.0005825641295583323,
      "loss": 3.3346,
      "step": 25127
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8546401262283325,
      "learning_rate": 0.0005825627553147075,
      "loss": 3.0927,
      "step": 25128
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4716647863388062,
      "learning_rate": 0.0005825613810185491,
      "loss": 3.073,
      "step": 25129
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8150514364242554,
      "learning_rate": 0.000582560006669857,
      "loss": 3.2939,
      "step": 25130
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.838463306427002,
      "learning_rate": 0.0005825586322686318,
      "loss": 2.8846,
      "step": 25131
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4940274953842163,
      "learning_rate": 0.0005825572578148737,
      "loss": 3.0135,
      "step": 25132
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.5945589542388916,
      "learning_rate": 0.0005825558833085828,
      "loss": 3.1858,
      "step": 25133
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8503237962722778,
      "learning_rate": 0.0005825545087497593,
      "loss": 3.0978,
      "step": 25134
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6984148025512695,
      "learning_rate": 0.0005825531341384037,
      "loss": 3.0613,
      "step": 25135
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6692672967910767,
      "learning_rate": 0.0005825517594745162,
      "loss": 3.2288,
      "step": 25136
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.543533444404602,
      "learning_rate": 0.0005825503847580968,
      "loss": 2.9694,
      "step": 25137
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7558578252792358,
      "learning_rate": 0.0005825490099891461,
      "loss": 3.0844,
      "step": 25138
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.618071436882019,
      "learning_rate": 0.0005825476351676642,
      "loss": 3.2461,
      "step": 25139
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3735404014587402,
      "learning_rate": 0.0005825462602936513,
      "loss": 3.2558,
      "step": 25140
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1779611110687256,
      "learning_rate": 0.0005825448853671078,
      "loss": 3.0427,
      "step": 25141
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1576353311538696,
      "learning_rate": 0.0005825435103880337,
      "loss": 3.1352,
      "step": 25142
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7277237176895142,
      "learning_rate": 0.0005825421353564296,
      "loss": 3.1079,
      "step": 25143
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.05495285987854,
      "learning_rate": 0.0005825407602722955,
      "loss": 3.0717,
      "step": 25144
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.578356146812439,
      "learning_rate": 0.0005825393851356318,
      "loss": 2.8652,
      "step": 25145
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.416257619857788,
      "learning_rate": 0.0005825380099464387,
      "loss": 3.0756,
      "step": 25146
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.5925705432891846,
      "learning_rate": 0.0005825366347047163,
      "loss": 3.1752,
      "step": 25147
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.831089735031128,
      "learning_rate": 0.0005825352594104652,
      "loss": 3.002,
      "step": 25148
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8529536724090576,
      "learning_rate": 0.0005825338840636854,
      "loss": 3.031,
      "step": 25149
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.7739803791046143,
      "learning_rate": 0.0005825325086643772,
      "loss": 3.0929,
      "step": 25150
    },
    {
      "epoch": 0.33,
      "grad_norm": 5.987334728240967,
      "learning_rate": 0.0005825311332125409,
      "loss": 3.0768,
      "step": 25151
    },
    {
      "epoch": 0.33,
      "grad_norm": 4.611886978149414,
      "learning_rate": 0.0005825297577081766,
      "loss": 2.9841,
      "step": 25152
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.789595127105713,
      "learning_rate": 0.0005825283821512849,
      "loss": 3.1485,
      "step": 25153
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.047584056854248,
      "learning_rate": 0.0005825270065418657,
      "loss": 3.1105,
      "step": 25154
    },
    {
      "epoch": 0.33,
      "grad_norm": 4.688537120819092,
      "learning_rate": 0.0005825256308799195,
      "loss": 3.0451,
      "step": 25155
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.889951467514038,
      "learning_rate": 0.0005825242551654462,
      "loss": 2.9191,
      "step": 25156
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.5399692058563232,
      "learning_rate": 0.0005825228793984467,
      "loss": 3.1634,
      "step": 25157
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.4030778408050537,
      "learning_rate": 0.0005825215035789205,
      "loss": 3.0141,
      "step": 25158
    },
    {
      "epoch": 0.33,
      "grad_norm": 4.574312210083008,
      "learning_rate": 0.0005825201277068684,
      "loss": 3.1401,
      "step": 25159
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.735280990600586,
      "learning_rate": 0.0005825187517822905,
      "loss": 3.199,
      "step": 25160
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0924322605133057,
      "learning_rate": 0.0005825173758051869,
      "loss": 3.1952,
      "step": 25161
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6504440307617188,
      "learning_rate": 0.0005825159997755582,
      "loss": 2.9091,
      "step": 25162
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0333023071289062,
      "learning_rate": 0.0005825146236934042,
      "loss": 3.1566,
      "step": 25163
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.7201266288757324,
      "learning_rate": 0.0005825132475587255,
      "loss": 3.088,
      "step": 25164
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5460889339447021,
      "learning_rate": 0.0005825118713715222,
      "loss": 2.8705,
      "step": 25165
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3738685846328735,
      "learning_rate": 0.0005825104951317947,
      "loss": 3.2567,
      "step": 25166
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.4477410316467285,
      "learning_rate": 0.0005825091188395432,
      "loss": 3.2255,
      "step": 25167
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.9860005378723145,
      "learning_rate": 0.0005825077424947678,
      "loss": 3.0782,
      "step": 25168
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5591422319412231,
      "learning_rate": 0.000582506366097469,
      "loss": 3.1297,
      "step": 25169
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3346688747406006,
      "learning_rate": 0.0005825049896476468,
      "loss": 3.1444,
      "step": 25170
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.51054048538208,
      "learning_rate": 0.0005825036131453017,
      "loss": 3.0386,
      "step": 25171
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.429334282875061,
      "learning_rate": 0.0005825022365904338,
      "loss": 3.0325,
      "step": 25172
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3440861701965332,
      "learning_rate": 0.0005825008599830433,
      "loss": 3.1965,
      "step": 25173
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4425710439682007,
      "learning_rate": 0.0005824994833231308,
      "loss": 3.0671,
      "step": 25174
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4124863147735596,
      "learning_rate": 0.0005824981066106962,
      "loss": 2.8829,
      "step": 25175
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3095121383666992,
      "learning_rate": 0.0005824967298457398,
      "loss": 3.2374,
      "step": 25176
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2810598611831665,
      "learning_rate": 0.0005824953530282619,
      "loss": 2.8422,
      "step": 25177
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6906918287277222,
      "learning_rate": 0.000582493976158263,
      "loss": 2.8936,
      "step": 25178
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5235605239868164,
      "learning_rate": 0.0005824925992357429,
      "loss": 2.8458,
      "step": 25179
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6397117376327515,
      "learning_rate": 0.0005824912222607022,
      "loss": 3.2132,
      "step": 25180
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1778311729431152,
      "learning_rate": 0.000582489845233141,
      "loss": 3.0427,
      "step": 25181
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.104362964630127,
      "learning_rate": 0.0005824884681530597,
      "loss": 3.2763,
      "step": 25182
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3964838981628418,
      "learning_rate": 0.0005824870910204583,
      "loss": 3.2726,
      "step": 25183
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.3608834743499756,
      "learning_rate": 0.0005824857138353374,
      "loss": 3.0098,
      "step": 25184
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.177319288253784,
      "learning_rate": 0.000582484336597697,
      "loss": 3.2549,
      "step": 25185
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8333384990692139,
      "learning_rate": 0.0005824829593075374,
      "loss": 3.3474,
      "step": 25186
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8340873718261719,
      "learning_rate": 0.0005824815819648588,
      "loss": 2.8555,
      "step": 25187
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1719276905059814,
      "learning_rate": 0.0005824802045696618,
      "loss": 3.167,
      "step": 25188
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6572773456573486,
      "learning_rate": 0.0005824788271219461,
      "loss": 3.0128,
      "step": 25189
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6109814643859863,
      "learning_rate": 0.0005824774496217126,
      "loss": 3.1983,
      "step": 25190
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5030962228775024,
      "learning_rate": 0.0005824760720689609,
      "loss": 3.2907,
      "step": 25191
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3932119607925415,
      "learning_rate": 0.0005824746944636917,
      "loss": 3.134,
      "step": 25192
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7121669054031372,
      "learning_rate": 0.0005824733168059051,
      "loss": 3.1415,
      "step": 25193
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6192175149917603,
      "learning_rate": 0.0005824719390956014,
      "loss": 3.0423,
      "step": 25194
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9602595567703247,
      "learning_rate": 0.0005824705613327807,
      "loss": 3.1124,
      "step": 25195
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.989274740219116,
      "learning_rate": 0.0005824691835174436,
      "loss": 3.2638,
      "step": 25196
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.348767638206482,
      "learning_rate": 0.00058246780564959,
      "loss": 3.0543,
      "step": 25197
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4189388751983643,
      "learning_rate": 0.0005824664277292203,
      "loss": 2.7953,
      "step": 25198
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.946592330932617,
      "learning_rate": 0.0005824650497563348,
      "loss": 3.2342,
      "step": 25199
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7580307722091675,
      "learning_rate": 0.0005824636717309338,
      "loss": 2.8289,
      "step": 25200
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7782503366470337,
      "learning_rate": 0.0005824622936530174,
      "loss": 2.991,
      "step": 25201
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9812042713165283,
      "learning_rate": 0.0005824609155225859,
      "loss": 3.2994,
      "step": 25202
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4795633554458618,
      "learning_rate": 0.0005824595373396396,
      "loss": 2.9855,
      "step": 25203
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7092632055282593,
      "learning_rate": 0.0005824581591041787,
      "loss": 3.3298,
      "step": 25204
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4066531658172607,
      "learning_rate": 0.0005824567808162035,
      "loss": 3.2731,
      "step": 25205
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3877074718475342,
      "learning_rate": 0.0005824554024757144,
      "loss": 3.4115,
      "step": 25206
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3639898300170898,
      "learning_rate": 0.0005824540240827113,
      "loss": 3.0223,
      "step": 25207
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4042184352874756,
      "learning_rate": 0.0005824526456371949,
      "loss": 3.1092,
      "step": 25208
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.357877254486084,
      "learning_rate": 0.0005824512671391651,
      "loss": 3.1592,
      "step": 25209
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4203203916549683,
      "learning_rate": 0.0005824498885886222,
      "loss": 2.9387,
      "step": 25210
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.128619909286499,
      "learning_rate": 0.0005824485099855666,
      "loss": 3.037,
      "step": 25211
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4273372888565063,
      "learning_rate": 0.0005824471313299986,
      "loss": 3.009,
      "step": 25212
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.991841435432434,
      "learning_rate": 0.0005824457526219181,
      "loss": 2.8709,
      "step": 25213
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6359357833862305,
      "learning_rate": 0.0005824443738613258,
      "loss": 3.2036,
      "step": 25214
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5954586267471313,
      "learning_rate": 0.0005824429950482217,
      "loss": 3.1452,
      "step": 25215
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4260119199752808,
      "learning_rate": 0.0005824416161826062,
      "loss": 3.055,
      "step": 25216
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.90348219871521,
      "learning_rate": 0.0005824402372644794,
      "loss": 3.0115,
      "step": 25217
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6925922632217407,
      "learning_rate": 0.0005824388582938415,
      "loss": 2.9382,
      "step": 25218
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.66026771068573,
      "learning_rate": 0.0005824374792706931,
      "loss": 3.0465,
      "step": 25219
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4140139818191528,
      "learning_rate": 0.0005824361001950341,
      "loss": 3.233,
      "step": 25220
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5286240577697754,
      "learning_rate": 0.000582434721066865,
      "loss": 2.82,
      "step": 25221
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9019392728805542,
      "learning_rate": 0.0005824333418861859,
      "loss": 3.093,
      "step": 25222
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6848995685577393,
      "learning_rate": 0.000582431962652997,
      "loss": 3.0465,
      "step": 25223
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5971245765686035,
      "learning_rate": 0.0005824305833672988,
      "loss": 3.0616,
      "step": 25224
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1029646396636963,
      "learning_rate": 0.0005824292040290914,
      "loss": 2.9868,
      "step": 25225
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5903266668319702,
      "learning_rate": 0.0005824278246383751,
      "loss": 3.047,
      "step": 25226
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.496260643005371,
      "learning_rate": 0.0005824264451951501,
      "loss": 3.0043,
      "step": 25227
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.43471360206604,
      "learning_rate": 0.0005824250656994167,
      "loss": 3.0903,
      "step": 25228
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.3203001022338867,
      "learning_rate": 0.0005824236861511751,
      "loss": 3.019,
      "step": 25229
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9082361459732056,
      "learning_rate": 0.0005824223065504258,
      "loss": 3.0682,
      "step": 25230
    },
    {
      "epoch": 0.33,
      "grad_norm": 4.091917514801025,
      "learning_rate": 0.0005824209268971687,
      "loss": 3.0452,
      "step": 25231
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.4437365531921387,
      "learning_rate": 0.0005824195471914042,
      "loss": 2.987,
      "step": 25232
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.526493549346924,
      "learning_rate": 0.0005824181674331326,
      "loss": 2.9369,
      "step": 25233
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7410393953323364,
      "learning_rate": 0.0005824167876223542,
      "loss": 3.3253,
      "step": 25234
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9176143407821655,
      "learning_rate": 0.0005824154077590691,
      "loss": 3.0582,
      "step": 25235
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.920257806777954,
      "learning_rate": 0.0005824140278432777,
      "loss": 3.1108,
      "step": 25236
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.20346999168396,
      "learning_rate": 0.0005824126478749802,
      "loss": 3.0171,
      "step": 25237
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.45259952545166,
      "learning_rate": 0.0005824112678541768,
      "loss": 3.0821,
      "step": 25238
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.266901969909668,
      "learning_rate": 0.0005824098877808678,
      "loss": 3.1589,
      "step": 25239
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5733970403671265,
      "learning_rate": 0.0005824085076550536,
      "loss": 2.9993,
      "step": 25240
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.178781270980835,
      "learning_rate": 0.0005824071274767342,
      "loss": 3.157,
      "step": 25241
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.128554582595825,
      "learning_rate": 0.0005824057472459101,
      "loss": 2.8529,
      "step": 25242
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.865615963935852,
      "learning_rate": 0.0005824043669625813,
      "loss": 3.0276,
      "step": 25243
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7392709255218506,
      "learning_rate": 0.0005824029866267484,
      "loss": 3.0303,
      "step": 25244
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7872233390808105,
      "learning_rate": 0.0005824016062384112,
      "loss": 3.0301,
      "step": 25245
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6541211605072021,
      "learning_rate": 0.0005824002257975704,
      "loss": 3.0526,
      "step": 25246
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4210447072982788,
      "learning_rate": 0.0005823988453042261,
      "loss": 2.9877,
      "step": 25247
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.6798386573791504,
      "learning_rate": 0.0005823974647583783,
      "loss": 3.054,
      "step": 25248
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.602712869644165,
      "learning_rate": 0.0005823960841600277,
      "loss": 3.0911,
      "step": 25249
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4624733924865723,
      "learning_rate": 0.0005823947035091743,
      "loss": 3.1895,
      "step": 25250
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5640735626220703,
      "learning_rate": 0.0005823933228058183,
      "loss": 3.1592,
      "step": 25251
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8152903318405151,
      "learning_rate": 0.00058239194204996,
      "loss": 3.0912,
      "step": 25252
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9712989330291748,
      "learning_rate": 0.0005823905612415999,
      "loss": 2.9866,
      "step": 25253
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4983640909194946,
      "learning_rate": 0.0005823891803807379,
      "loss": 3.2286,
      "step": 25254
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.303030014038086,
      "learning_rate": 0.0005823877994673745,
      "loss": 3.1456,
      "step": 25255
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.9136881828308105,
      "learning_rate": 0.0005823864185015099,
      "loss": 2.9597,
      "step": 25256
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.352651357650757,
      "learning_rate": 0.0005823850374831442,
      "loss": 2.9259,
      "step": 25257
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.141871690750122,
      "learning_rate": 0.000582383656412278,
      "loss": 2.9392,
      "step": 25258
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.572762966156006,
      "learning_rate": 0.0005823822752889111,
      "loss": 2.9606,
      "step": 25259
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9320186376571655,
      "learning_rate": 0.0005823808941130442,
      "loss": 2.8485,
      "step": 25260
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.677125334739685,
      "learning_rate": 0.0005823795128846774,
      "loss": 3.0654,
      "step": 25261
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7257848978042603,
      "learning_rate": 0.0005823781316038108,
      "loss": 3.1974,
      "step": 25262
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.5986294746398926,
      "learning_rate": 0.0005823767502704447,
      "loss": 3.0775,
      "step": 25263
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.319643497467041,
      "learning_rate": 0.0005823753688845796,
      "loss": 3.0755,
      "step": 25264
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8920307159423828,
      "learning_rate": 0.0005823739874462155,
      "loss": 3.042,
      "step": 25265
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.805284023284912,
      "learning_rate": 0.0005823726059553528,
      "loss": 2.9355,
      "step": 25266
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.6992998123168945,
      "learning_rate": 0.0005823712244119916,
      "loss": 3.0376,
      "step": 25267
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8479162454605103,
      "learning_rate": 0.0005823698428161323,
      "loss": 3.2742,
      "step": 25268
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9504659175872803,
      "learning_rate": 0.0005823684611677753,
      "loss": 3.2959,
      "step": 25269
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.3578221797943115,
      "learning_rate": 0.0005823670794669204,
      "loss": 3.1872,
      "step": 25270
    },
    {
      "epoch": 0.33,
      "grad_norm": 5.223516464233398,
      "learning_rate": 0.0005823656977135683,
      "loss": 2.9258,
      "step": 25271
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.5240135192871094,
      "learning_rate": 0.000582364315907719,
      "loss": 3.0008,
      "step": 25272
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.544743537902832,
      "learning_rate": 0.0005823629340493728,
      "loss": 3.162,
      "step": 25273
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7305846214294434,
      "learning_rate": 0.0005823615521385301,
      "loss": 3.2012,
      "step": 25274
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9096821546554565,
      "learning_rate": 0.0005823601701751911,
      "loss": 3.3199,
      "step": 25275
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.4472293853759766,
      "learning_rate": 0.000582358788159356,
      "loss": 3.1042,
      "step": 25276
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.667357325553894,
      "learning_rate": 0.000582357406091025,
      "loss": 3.1648,
      "step": 25277
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2916580438613892,
      "learning_rate": 0.0005823560239701984,
      "loss": 3.2059,
      "step": 25278
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.675342321395874,
      "learning_rate": 0.0005823546417968765,
      "loss": 3.0256,
      "step": 25279
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.241539239883423,
      "learning_rate": 0.0005823532595710596,
      "loss": 3.0846,
      "step": 25280
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.753041982650757,
      "learning_rate": 0.0005823518772927478,
      "loss": 3.1165,
      "step": 25281
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4658381938934326,
      "learning_rate": 0.0005823504949619416,
      "loss": 3.3533,
      "step": 25282
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.2803845405578613,
      "learning_rate": 0.000582349112578641,
      "loss": 2.8638,
      "step": 25283
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.737436056137085,
      "learning_rate": 0.0005823477301428463,
      "loss": 2.9999,
      "step": 25284
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6790640354156494,
      "learning_rate": 0.0005823463476545581,
      "loss": 3.3056,
      "step": 25285
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4179335832595825,
      "learning_rate": 0.0005823449651137762,
      "loss": 3.2905,
      "step": 25286
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.6276233196258545,
      "learning_rate": 0.000582343582520501,
      "loss": 3.0329,
      "step": 25287
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8631839752197266,
      "learning_rate": 0.000582342199874733,
      "loss": 3.1178,
      "step": 25288
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4658920764923096,
      "learning_rate": 0.0005823408171764721,
      "loss": 3.1704,
      "step": 25289
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3443490266799927,
      "learning_rate": 0.0005823394344257187,
      "loss": 2.9474,
      "step": 25290
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9275180101394653,
      "learning_rate": 0.0005823380516224731,
      "loss": 3.0542,
      "step": 25291
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.547296404838562,
      "learning_rate": 0.0005823366687667355,
      "loss": 3.1548,
      "step": 25292
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5694279670715332,
      "learning_rate": 0.0005823352858585062,
      "loss": 3.1408,
      "step": 25293
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4434453248977661,
      "learning_rate": 0.0005823339028977855,
      "loss": 3.0967,
      "step": 25294
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5721181631088257,
      "learning_rate": 0.0005823325198845735,
      "loss": 3.0461,
      "step": 25295
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3968541622161865,
      "learning_rate": 0.0005823311368188706,
      "loss": 3.0428,
      "step": 25296
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6389352083206177,
      "learning_rate": 0.000582329753700677,
      "loss": 3.1962,
      "step": 25297
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2699946165084839,
      "learning_rate": 0.000582328370529993,
      "loss": 3.0517,
      "step": 25298
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3550482988357544,
      "learning_rate": 0.0005823269873068189,
      "loss": 2.913,
      "step": 25299
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6941097974777222,
      "learning_rate": 0.0005823256040311548,
      "loss": 3.1317,
      "step": 25300
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9028302431106567,
      "learning_rate": 0.000582324220703001,
      "loss": 3.2171,
      "step": 25301
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2427997589111328,
      "learning_rate": 0.0005823228373223579,
      "loss": 3.0398,
      "step": 25302
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6444059610366821,
      "learning_rate": 0.0005823214538892255,
      "loss": 2.9877,
      "step": 25303
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6492143869400024,
      "learning_rate": 0.0005823200704036043,
      "loss": 3.1763,
      "step": 25304
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.556376576423645,
      "learning_rate": 0.0005823186868654945,
      "loss": 3.1597,
      "step": 25305
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3158220052719116,
      "learning_rate": 0.0005823173032748962,
      "loss": 3.1771,
      "step": 25306
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5492424964904785,
      "learning_rate": 0.00058231591963181,
      "loss": 3.1846,
      "step": 25307
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.6956493854522705,
      "learning_rate": 0.0005823145359362358,
      "loss": 2.996,
      "step": 25308
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4182676076889038,
      "learning_rate": 0.0005823131521881741,
      "loss": 3.2546,
      "step": 25309
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3942209482192993,
      "learning_rate": 0.0005823117683876249,
      "loss": 2.983,
      "step": 25310
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5781185626983643,
      "learning_rate": 0.0005823103845345887,
      "loss": 2.9869,
      "step": 25311
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.109872579574585,
      "learning_rate": 0.0005823090006290657,
      "loss": 3.1332,
      "step": 25312
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8689582347869873,
      "learning_rate": 0.000582307616671056,
      "loss": 3.0512,
      "step": 25313
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.534684658050537,
      "learning_rate": 0.0005823062326605601,
      "loss": 2.9632,
      "step": 25314
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6361690759658813,
      "learning_rate": 0.0005823048485975781,
      "loss": 2.9963,
      "step": 25315
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.565324306488037,
      "learning_rate": 0.0005823034644821104,
      "loss": 3.08,
      "step": 25316
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.138173818588257,
      "learning_rate": 0.0005823020803141571,
      "loss": 3.0423,
      "step": 25317
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4160847663879395,
      "learning_rate": 0.0005823006960937185,
      "loss": 3.2659,
      "step": 25318
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9748868942260742,
      "learning_rate": 0.0005822993118207948,
      "loss": 2.909,
      "step": 25319
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8470966815948486,
      "learning_rate": 0.0005822979274953863,
      "loss": 3.1418,
      "step": 25320
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7111096382141113,
      "learning_rate": 0.0005822965431174935,
      "loss": 3.2681,
      "step": 25321
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.397352933883667,
      "learning_rate": 0.0005822951586871162,
      "loss": 3.0936,
      "step": 25322
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.03479266166687,
      "learning_rate": 0.0005822937742042551,
      "loss": 3.0411,
      "step": 25323
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7083691358566284,
      "learning_rate": 0.0005822923896689102,
      "loss": 3.1902,
      "step": 25324
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6920979022979736,
      "learning_rate": 0.0005822910050810818,
      "loss": 3.0758,
      "step": 25325
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.114265203475952,
      "learning_rate": 0.0005822896204407702,
      "loss": 3.2953,
      "step": 25326
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9137420654296875,
      "learning_rate": 0.0005822882357479755,
      "loss": 3.0112,
      "step": 25327
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4830515384674072,
      "learning_rate": 0.0005822868510026982,
      "loss": 3.1417,
      "step": 25328
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2830860614776611,
      "learning_rate": 0.0005822854662049385,
      "loss": 3.02,
      "step": 25329
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.793221354484558,
      "learning_rate": 0.0005822840813546965,
      "loss": 2.8153,
      "step": 25330
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5148042440414429,
      "learning_rate": 0.0005822826964519726,
      "loss": 3.1205,
      "step": 25331
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7745693922042847,
      "learning_rate": 0.0005822813114967671,
      "loss": 2.978,
      "step": 25332
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.053049325942993,
      "learning_rate": 0.00058227992648908,
      "loss": 3.0722,
      "step": 25333
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.732202172279358,
      "learning_rate": 0.0005822785414289119,
      "loss": 3.0751,
      "step": 25334
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.678945779800415,
      "learning_rate": 0.0005822771563162629,
      "loss": 2.9722,
      "step": 25335
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3908418416976929,
      "learning_rate": 0.0005822757711511331,
      "loss": 2.9707,
      "step": 25336
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5398378372192383,
      "learning_rate": 0.0005822743859335229,
      "loss": 2.9059,
      "step": 25337
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6642167568206787,
      "learning_rate": 0.0005822730006634326,
      "loss": 3.0212,
      "step": 25338
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.614335298538208,
      "learning_rate": 0.0005822716153408626,
      "loss": 3.0484,
      "step": 25339
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5411322116851807,
      "learning_rate": 0.0005822702299658127,
      "loss": 2.921,
      "step": 25340
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5729236602783203,
      "learning_rate": 0.0005822688445382836,
      "loss": 3.2169,
      "step": 25341
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7088768482208252,
      "learning_rate": 0.0005822674590582754,
      "loss": 3.166,
      "step": 25342
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8276225328445435,
      "learning_rate": 0.0005822660735257883,
      "loss": 3.0038,
      "step": 25343
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.926100492477417,
      "learning_rate": 0.0005822646879408227,
      "loss": 2.9555,
      "step": 25344
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4497212171554565,
      "learning_rate": 0.0005822633023033787,
      "loss": 3.2017,
      "step": 25345
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.582719326019287,
      "learning_rate": 0.0005822619166134565,
      "loss": 3.1698,
      "step": 25346
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3634182214736938,
      "learning_rate": 0.0005822605308710567,
      "loss": 3.1825,
      "step": 25347
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.891877293586731,
      "learning_rate": 0.0005822591450761793,
      "loss": 3.2763,
      "step": 25348
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5930417776107788,
      "learning_rate": 0.0005822577592288244,
      "loss": 3.0287,
      "step": 25349
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.546370267868042,
      "learning_rate": 0.0005822563733289926,
      "loss": 3.0575,
      "step": 25350
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6376174688339233,
      "learning_rate": 0.000582254987376684,
      "loss": 2.9942,
      "step": 25351
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5916359424591064,
      "learning_rate": 0.0005822536013718989,
      "loss": 2.9215,
      "step": 25352
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8473093509674072,
      "learning_rate": 0.0005822522153146374,
      "loss": 3.1277,
      "step": 25353
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6823928356170654,
      "learning_rate": 0.0005822508292049,
      "loss": 2.9379,
      "step": 25354
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9192250967025757,
      "learning_rate": 0.0005822494430426869,
      "loss": 3.3355,
      "step": 25355
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0677554607391357,
      "learning_rate": 0.0005822480568279982,
      "loss": 2.8726,
      "step": 25356
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5537209510803223,
      "learning_rate": 0.0005822466705608343,
      "loss": 3.1045,
      "step": 25357
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8884602785110474,
      "learning_rate": 0.0005822452842411954,
      "loss": 2.9956,
      "step": 25358
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5431389808654785,
      "learning_rate": 0.0005822438978690818,
      "loss": 3.3731,
      "step": 25359
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7834457159042358,
      "learning_rate": 0.0005822425114444937,
      "loss": 3.1882,
      "step": 25360
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4567811489105225,
      "learning_rate": 0.0005822411249674314,
      "loss": 3.1077,
      "step": 25361
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8349058628082275,
      "learning_rate": 0.0005822397384378952,
      "loss": 3.1125,
      "step": 25362
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4787002801895142,
      "learning_rate": 0.0005822383518558851,
      "loss": 3.2137,
      "step": 25363
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4832261800765991,
      "learning_rate": 0.0005822369652214018,
      "loss": 3.3098,
      "step": 25364
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8601648807525635,
      "learning_rate": 0.0005822355785344451,
      "loss": 3.0554,
      "step": 25365
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1172492504119873,
      "learning_rate": 0.0005822341917950157,
      "loss": 3.1915,
      "step": 25366
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.617329716682434,
      "learning_rate": 0.0005822328050031136,
      "loss": 3.1591,
      "step": 25367
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7105666399002075,
      "learning_rate": 0.000582231418158739,
      "loss": 3.0824,
      "step": 25368
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5973892211914062,
      "learning_rate": 0.0005822300312618923,
      "loss": 3.1156,
      "step": 25369
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6353198289871216,
      "learning_rate": 0.0005822286443125736,
      "loss": 3.2827,
      "step": 25370
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4640259742736816,
      "learning_rate": 0.0005822272573107834,
      "loss": 3.031,
      "step": 25371
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.626429557800293,
      "learning_rate": 0.0005822258702565218,
      "loss": 3.093,
      "step": 25372
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.405214786529541,
      "learning_rate": 0.0005822244831497891,
      "loss": 3.2777,
      "step": 25373
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.842966914176941,
      "learning_rate": 0.0005822230959905856,
      "loss": 2.9894,
      "step": 25374
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2936383485794067,
      "learning_rate": 0.0005822217087789114,
      "loss": 3.1354,
      "step": 25375
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5368810892105103,
      "learning_rate": 0.0005822203215147669,
      "loss": 3.1855,
      "step": 25376
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4790587425231934,
      "learning_rate": 0.0005822189341981522,
      "loss": 3.1875,
      "step": 25377
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5096100568771362,
      "learning_rate": 0.0005822175468290678,
      "loss": 3.2562,
      "step": 25378
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4801243543624878,
      "learning_rate": 0.0005822161594075138,
      "loss": 3.174,
      "step": 25379
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8997609615325928,
      "learning_rate": 0.0005822147719334905,
      "loss": 3.179,
      "step": 25380
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1619139909744263,
      "learning_rate": 0.0005822133844069981,
      "loss": 2.9347,
      "step": 25381
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7482155561447144,
      "learning_rate": 0.0005822119968280369,
      "loss": 2.7158,
      "step": 25382
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5655251741409302,
      "learning_rate": 0.0005822106091966072,
      "loss": 3.032,
      "step": 25383
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4113293886184692,
      "learning_rate": 0.0005822092215127092,
      "loss": 3.2983,
      "step": 25384
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5449013710021973,
      "learning_rate": 0.0005822078337763433,
      "loss": 3.0905,
      "step": 25385
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3890511989593506,
      "learning_rate": 0.0005822064459875094,
      "loss": 3.1473,
      "step": 25386
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3934974670410156,
      "learning_rate": 0.0005822050581462081,
      "loss": 3.2473,
      "step": 25387
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.123756170272827,
      "learning_rate": 0.0005822036702524396,
      "loss": 2.9763,
      "step": 25388
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3908333778381348,
      "learning_rate": 0.0005822022823062041,
      "loss": 3.1638,
      "step": 25389
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.272083044052124,
      "learning_rate": 0.0005822008943075019,
      "loss": 2.8668,
      "step": 25390
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.756430983543396,
      "learning_rate": 0.0005821995062563331,
      "loss": 3.11,
      "step": 25391
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4016298055648804,
      "learning_rate": 0.0005821981181526983,
      "loss": 3.0966,
      "step": 25392
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3544460535049438,
      "learning_rate": 0.0005821967299965973,
      "loss": 3.2507,
      "step": 25393
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.584354043006897,
      "learning_rate": 0.0005821953417880308,
      "loss": 3.072,
      "step": 25394
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0618598461151123,
      "learning_rate": 0.0005821939535269987,
      "loss": 2.9373,
      "step": 25395
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6838810443878174,
      "learning_rate": 0.0005821925652135016,
      "loss": 3.241,
      "step": 25396
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.167848587036133,
      "learning_rate": 0.0005821911768475393,
      "loss": 3.2918,
      "step": 25397
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.443273663520813,
      "learning_rate": 0.0005821897884291127,
      "loss": 3.1602,
      "step": 25398
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3775579929351807,
      "learning_rate": 0.0005821883999582215,
      "loss": 3.0276,
      "step": 25399
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5554265975952148,
      "learning_rate": 0.000582187011434866,
      "loss": 3.0975,
      "step": 25400
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5574047565460205,
      "learning_rate": 0.0005821856228590468,
      "loss": 3.0534,
      "step": 25401
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5152044296264648,
      "learning_rate": 0.0005821842342307639,
      "loss": 3.0943,
      "step": 25402
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5601372718811035,
      "learning_rate": 0.0005821828455500177,
      "loss": 3.2056,
      "step": 25403
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5180644989013672,
      "learning_rate": 0.0005821814568168083,
      "loss": 3.0382,
      "step": 25404
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9315555095672607,
      "learning_rate": 0.000582180068031136,
      "loss": 2.9712,
      "step": 25405
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4940731525421143,
      "learning_rate": 0.0005821786791930012,
      "loss": 3.0506,
      "step": 25406
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4427145719528198,
      "learning_rate": 0.000582177290302404,
      "loss": 2.8769,
      "step": 25407
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7511894702911377,
      "learning_rate": 0.0005821759013593448,
      "loss": 3.2884,
      "step": 25408
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5912667512893677,
      "learning_rate": 0.0005821745123638236,
      "loss": 3.1252,
      "step": 25409
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5023051500320435,
      "learning_rate": 0.000582173123315841,
      "loss": 3.3566,
      "step": 25410
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4847180843353271,
      "learning_rate": 0.000582171734215397,
      "loss": 3.06,
      "step": 25411
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4097923040390015,
      "learning_rate": 0.000582170345062492,
      "loss": 3.1375,
      "step": 25412
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.200705051422119,
      "learning_rate": 0.0005821689558571262,
      "loss": 3.0039,
      "step": 25413
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.3005919456481934,
      "learning_rate": 0.0005821675665992999,
      "loss": 3.0256,
      "step": 25414
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5206986665725708,
      "learning_rate": 0.0005821661772890132,
      "loss": 2.9951,
      "step": 25415
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.640037178993225,
      "learning_rate": 0.0005821647879262666,
      "loss": 3.1588,
      "step": 25416
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6050901412963867,
      "learning_rate": 0.0005821633985110601,
      "loss": 2.7978,
      "step": 25417
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.582603096961975,
      "learning_rate": 0.0005821620090433942,
      "loss": 3.0693,
      "step": 25418
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5122873783111572,
      "learning_rate": 0.000582160619523269,
      "loss": 3.1361,
      "step": 25419
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5678857564926147,
      "learning_rate": 0.000582159229950685,
      "loss": 3.1542,
      "step": 25420
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.537843942642212,
      "learning_rate": 0.000582157840325642,
      "loss": 3.0933,
      "step": 25421
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.389259696006775,
      "learning_rate": 0.0005821564506481407,
      "loss": 3.1257,
      "step": 25422
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3010358810424805,
      "learning_rate": 0.0005821550609181812,
      "loss": 3.0232,
      "step": 25423
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7748441696166992,
      "learning_rate": 0.0005821536711357637,
      "loss": 3.113,
      "step": 25424
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7594382762908936,
      "learning_rate": 0.0005821522813008885,
      "loss": 3.0099,
      "step": 25425
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4461997747421265,
      "learning_rate": 0.0005821508914135558,
      "loss": 3.0994,
      "step": 25426
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4905343055725098,
      "learning_rate": 0.0005821495014737661,
      "loss": 2.9531,
      "step": 25427
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5376594066619873,
      "learning_rate": 0.0005821481114815193,
      "loss": 3.2395,
      "step": 25428
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.421396255493164,
      "learning_rate": 0.000582146721436816,
      "loss": 2.9864,
      "step": 25429
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5222290754318237,
      "learning_rate": 0.0005821453313396562,
      "loss": 2.879,
      "step": 25430
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8941650390625,
      "learning_rate": 0.0005821439411900402,
      "loss": 3.0763,
      "step": 25431
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.45453679561615,
      "learning_rate": 0.0005821425509879684,
      "loss": 3.0527,
      "step": 25432
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.57183039188385,
      "learning_rate": 0.000582141160733441,
      "loss": 3.1425,
      "step": 25433
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.746240258216858,
      "learning_rate": 0.0005821397704264582,
      "loss": 2.7931,
      "step": 25434
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.284821033477783,
      "learning_rate": 0.0005821383800670203,
      "loss": 3.1188,
      "step": 25435
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0262463092803955,
      "learning_rate": 0.0005821369896551275,
      "loss": 3.3416,
      "step": 25436
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.538493037223816,
      "learning_rate": 0.0005821355991907801,
      "loss": 3.0736,
      "step": 25437
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7697114944458008,
      "learning_rate": 0.0005821342086739785,
      "loss": 3.1327,
      "step": 25438
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5551958084106445,
      "learning_rate": 0.0005821328181047227,
      "loss": 2.925,
      "step": 25439
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7530763149261475,
      "learning_rate": 0.0005821314274830131,
      "loss": 2.9236,
      "step": 25440
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8849514722824097,
      "learning_rate": 0.00058213003680885,
      "loss": 3.1173,
      "step": 25441
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3579246997833252,
      "learning_rate": 0.0005821286460822335,
      "loss": 3.0609,
      "step": 25442
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5809296369552612,
      "learning_rate": 0.000582127255303164,
      "loss": 2.9937,
      "step": 25443
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.465260624885559,
      "learning_rate": 0.0005821258644716418,
      "loss": 3.0656,
      "step": 25444
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.010507822036743,
      "learning_rate": 0.0005821244735876671,
      "loss": 3.0851,
      "step": 25445
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.002885580062866,
      "learning_rate": 0.00058212308265124,
      "loss": 2.856,
      "step": 25446
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.928971767425537,
      "learning_rate": 0.000582121691662361,
      "loss": 3.0488,
      "step": 25447
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4766016006469727,
      "learning_rate": 0.0005821203006210301,
      "loss": 3.1868,
      "step": 25448
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3954637050628662,
      "learning_rate": 0.0005821189095272479,
      "loss": 3.1831,
      "step": 25449
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6100513935089111,
      "learning_rate": 0.0005821175183810143,
      "loss": 2.9865,
      "step": 25450
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4506961107254028,
      "learning_rate": 0.0005821161271823298,
      "loss": 2.982,
      "step": 25451
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.654294729232788,
      "learning_rate": 0.0005821147359311946,
      "loss": 3.0917,
      "step": 25452
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4553277492523193,
      "learning_rate": 0.0005821133446276089,
      "loss": 3.333,
      "step": 25453
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7738276720046997,
      "learning_rate": 0.000582111953271573,
      "loss": 3.2598,
      "step": 25454
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6420783996582031,
      "learning_rate": 0.0005821105618630872,
      "loss": 3.1239,
      "step": 25455
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4445219039916992,
      "learning_rate": 0.0005821091704021517,
      "loss": 3.0324,
      "step": 25456
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4514683485031128,
      "learning_rate": 0.0005821077788887669,
      "loss": 3.1101,
      "step": 25457
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3871641159057617,
      "learning_rate": 0.0005821063873229327,
      "loss": 2.7313,
      "step": 25458
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.817659616470337,
      "learning_rate": 0.0005821049957046499,
      "loss": 3.3182,
      "step": 25459
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.007739305496216,
      "learning_rate": 0.0005821036040339182,
      "loss": 3.2518,
      "step": 25460
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7033356428146362,
      "learning_rate": 0.0005821022123107382,
      "loss": 3.3354,
      "step": 25461
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7856054306030273,
      "learning_rate": 0.0005821008205351101,
      "loss": 3.0721,
      "step": 25462
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4029251337051392,
      "learning_rate": 0.0005820994287070342,
      "loss": 3.2829,
      "step": 25463
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0368247032165527,
      "learning_rate": 0.0005820980368265106,
      "loss": 3.2505,
      "step": 25464
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4306963682174683,
      "learning_rate": 0.0005820966448935395,
      "loss": 3.4211,
      "step": 25465
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.659827470779419,
      "learning_rate": 0.0005820952529081216,
      "loss": 3.1727,
      "step": 25466
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.608708381652832,
      "learning_rate": 0.0005820938608702567,
      "loss": 3.0975,
      "step": 25467
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8906538486480713,
      "learning_rate": 0.0005820924687799453,
      "loss": 2.9603,
      "step": 25468
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3095314502716064,
      "learning_rate": 0.0005820910766371876,
      "loss": 3.2238,
      "step": 25469
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5285917520523071,
      "learning_rate": 0.0005820896844419837,
      "loss": 3.0697,
      "step": 25470
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4017157554626465,
      "learning_rate": 0.0005820882921943341,
      "loss": 2.9232,
      "step": 25471
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5778716802597046,
      "learning_rate": 0.000582086899894239,
      "loss": 2.9584,
      "step": 25472
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4911153316497803,
      "learning_rate": 0.0005820855075416985,
      "loss": 2.8516,
      "step": 25473
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.043489694595337,
      "learning_rate": 0.0005820841151367131,
      "loss": 3.1287,
      "step": 25474
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8406188488006592,
      "learning_rate": 0.0005820827226792829,
      "loss": 2.9149,
      "step": 25475
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7857460975646973,
      "learning_rate": 0.0005820813301694081,
      "loss": 3.2371,
      "step": 25476
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7730858325958252,
      "learning_rate": 0.0005820799376070892,
      "loss": 3.1561,
      "step": 25477
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.176262617111206,
      "learning_rate": 0.0005820785449923262,
      "loss": 3.0844,
      "step": 25478
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.279952883720398,
      "learning_rate": 0.0005820771523251196,
      "loss": 2.9967,
      "step": 25479
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.2769548892974854,
      "learning_rate": 0.0005820757596054695,
      "loss": 3.0417,
      "step": 25480
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.3287417888641357,
      "learning_rate": 0.0005820743668333761,
      "loss": 3.0459,
      "step": 25481
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3449585437774658,
      "learning_rate": 0.0005820729740088398,
      "loss": 2.893,
      "step": 25482
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.424309253692627,
      "learning_rate": 0.0005820715811318608,
      "loss": 2.9654,
      "step": 25483
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8334206342697144,
      "learning_rate": 0.0005820701882024394,
      "loss": 2.8583,
      "step": 25484
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0650901794433594,
      "learning_rate": 0.0005820687952205758,
      "loss": 2.9859,
      "step": 25485
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.518949031829834,
      "learning_rate": 0.0005820674021862703,
      "loss": 2.8828,
      "step": 25486
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.299008846282959,
      "learning_rate": 0.0005820660090995231,
      "loss": 3.2037,
      "step": 25487
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1138675212860107,
      "learning_rate": 0.0005820646159603346,
      "loss": 3.28,
      "step": 25488
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.841293215751648,
      "learning_rate": 0.0005820632227687049,
      "loss": 3.1899,
      "step": 25489
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5799168348312378,
      "learning_rate": 0.0005820618295246343,
      "loss": 2.9376,
      "step": 25490
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4986168146133423,
      "learning_rate": 0.0005820604362281232,
      "loss": 3.1358,
      "step": 25491
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3893554210662842,
      "learning_rate": 0.0005820590428791716,
      "loss": 3.0826,
      "step": 25492
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4526841640472412,
      "learning_rate": 0.00058205764947778,
      "loss": 3.4234,
      "step": 25493
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5179318189620972,
      "learning_rate": 0.0005820562560239485,
      "loss": 2.9705,
      "step": 25494
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.387056589126587,
      "learning_rate": 0.0005820548625176774,
      "loss": 2.9255,
      "step": 25495
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.287509799003601,
      "learning_rate": 0.0005820534689589671,
      "loss": 2.8793,
      "step": 25496
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2751985788345337,
      "learning_rate": 0.0005820520753478176,
      "loss": 2.8392,
      "step": 25497
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4578462839126587,
      "learning_rate": 0.0005820506816842293,
      "loss": 2.9026,
      "step": 25498
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7224743366241455,
      "learning_rate": 0.0005820492879682025,
      "loss": 3.0986,
      "step": 25499
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8786797523498535,
      "learning_rate": 0.0005820478941997374,
      "loss": 3.0073,
      "step": 25500
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4463270902633667,
      "learning_rate": 0.0005820465003788343,
      "loss": 3.3429,
      "step": 25501
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.430346727371216,
      "learning_rate": 0.0005820451065054934,
      "loss": 2.8325,
      "step": 25502
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.2350616455078125,
      "learning_rate": 0.000582043712579715,
      "loss": 2.9807,
      "step": 25503
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1857266426086426,
      "learning_rate": 0.0005820423186014994,
      "loss": 3.0414,
      "step": 25504
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4163880348205566,
      "learning_rate": 0.0005820409245708468,
      "loss": 3.4282,
      "step": 25505
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9933526515960693,
      "learning_rate": 0.0005820395304877574,
      "loss": 3.0366,
      "step": 25506
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.071941375732422,
      "learning_rate": 0.0005820381363522316,
      "loss": 2.8405,
      "step": 25507
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9102394580841064,
      "learning_rate": 0.0005820367421642696,
      "loss": 3.1075,
      "step": 25508
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2526944875717163,
      "learning_rate": 0.0005820353479238715,
      "loss": 3.1411,
      "step": 25509
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1910059452056885,
      "learning_rate": 0.0005820339536310378,
      "loss": 3.1391,
      "step": 25510
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1801782846450806,
      "learning_rate": 0.0005820325592857686,
      "loss": 2.865,
      "step": 25511
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1737070083618164,
      "learning_rate": 0.0005820311648880644,
      "loss": 2.9733,
      "step": 25512
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1546332836151123,
      "learning_rate": 0.0005820297704379251,
      "loss": 3.3382,
      "step": 25513
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.434238076210022,
      "learning_rate": 0.0005820283759353513,
      "loss": 3.2979,
      "step": 25514
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6897525787353516,
      "learning_rate": 0.000582026981380343,
      "loss": 2.9486,
      "step": 25515
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.5773351192474365,
      "learning_rate": 0.0005820255867729005,
      "loss": 3.0044,
      "step": 25516
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8832793235778809,
      "learning_rate": 0.0005820241921130242,
      "loss": 3.0265,
      "step": 25517
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6363627910614014,
      "learning_rate": 0.0005820227974007142,
      "loss": 3.3442,
      "step": 25518
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1526589393615723,
      "learning_rate": 0.000582021402635971,
      "loss": 2.9488,
      "step": 25519
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.4225778579711914,
      "learning_rate": 0.0005820200078187946,
      "loss": 3.0595,
      "step": 25520
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7828606367111206,
      "learning_rate": 0.0005820186129491853,
      "loss": 3.2088,
      "step": 25521
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.659400224685669,
      "learning_rate": 0.0005820172180271434,
      "loss": 3.1281,
      "step": 25522
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6550986766815186,
      "learning_rate": 0.0005820158230526694,
      "loss": 3.1939,
      "step": 25523
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.5721681118011475,
      "learning_rate": 0.0005820144280257631,
      "loss": 3.3763,
      "step": 25524
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8193475008010864,
      "learning_rate": 0.000582013032946425,
      "loss": 2.9951,
      "step": 25525
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6927615404129028,
      "learning_rate": 0.0005820116378146556,
      "loss": 3.1425,
      "step": 25526
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.363320231437683,
      "learning_rate": 0.0005820102426304547,
      "loss": 3.1548,
      "step": 25527
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5660566091537476,
      "learning_rate": 0.0005820088473938228,
      "loss": 3.0644,
      "step": 25528
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4542739391326904,
      "learning_rate": 0.0005820074521047601,
      "loss": 3.1296,
      "step": 25529
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6300926208496094,
      "learning_rate": 0.000582006056763267,
      "loss": 2.9879,
      "step": 25530
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4448164701461792,
      "learning_rate": 0.0005820046613693436,
      "loss": 2.8823,
      "step": 25531
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2813724279403687,
      "learning_rate": 0.0005820032659229902,
      "loss": 3.0643,
      "step": 25532
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8624272346496582,
      "learning_rate": 0.0005820018704242072,
      "loss": 3.2237,
      "step": 25533
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7113933563232422,
      "learning_rate": 0.0005820004748729945,
      "loss": 3.1183,
      "step": 25534
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.36831533908844,
      "learning_rate": 0.0005819990792693527,
      "loss": 3.1772,
      "step": 25535
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.4328722953796387,
      "learning_rate": 0.000581997683613282,
      "loss": 2.6875,
      "step": 25536
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.143425941467285,
      "learning_rate": 0.0005819962879047825,
      "loss": 3.2647,
      "step": 25537
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.626579999923706,
      "learning_rate": 0.0005819948921438546,
      "loss": 2.9775,
      "step": 25538
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5676915645599365,
      "learning_rate": 0.0005819934963304986,
      "loss": 2.9649,
      "step": 25539
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8613258600234985,
      "learning_rate": 0.0005819921004647146,
      "loss": 3.2169,
      "step": 25540
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.3696448802948,
      "learning_rate": 0.000581990704546503,
      "loss": 2.9146,
      "step": 25541
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8046410083770752,
      "learning_rate": 0.000581989308575864,
      "loss": 2.8175,
      "step": 25542
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8546745777130127,
      "learning_rate": 0.0005819879125527978,
      "loss": 3.337,
      "step": 25543
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.246131181716919,
      "learning_rate": 0.0005819865164773048,
      "loss": 3.2142,
      "step": 25544
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.704197645187378,
      "learning_rate": 0.0005819851203493851,
      "loss": 3.0784,
      "step": 25545
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6265431642532349,
      "learning_rate": 0.0005819837241690391,
      "loss": 3.1342,
      "step": 25546
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9401241540908813,
      "learning_rate": 0.000581982327936267,
      "loss": 3.4163,
      "step": 25547
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5612026453018188,
      "learning_rate": 0.0005819809316510691,
      "loss": 3.0668,
      "step": 25548
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6235368251800537,
      "learning_rate": 0.0005819795353134455,
      "loss": 3.1372,
      "step": 25549
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5582307577133179,
      "learning_rate": 0.0005819781389233967,
      "loss": 3.1472,
      "step": 25550
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6670587062835693,
      "learning_rate": 0.0005819767424809228,
      "loss": 2.8178,
      "step": 25551
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4080121517181396,
      "learning_rate": 0.0005819753459860241,
      "loss": 3.1601,
      "step": 25552
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.538583755493164,
      "learning_rate": 0.0005819739494387009,
      "loss": 3.3011,
      "step": 25553
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6794496774673462,
      "learning_rate": 0.0005819725528389534,
      "loss": 3.0196,
      "step": 25554
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5885430574417114,
      "learning_rate": 0.0005819711561867819,
      "loss": 3.054,
      "step": 25555
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2255409955978394,
      "learning_rate": 0.0005819697594821866,
      "loss": 3.0355,
      "step": 25556
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5294523239135742,
      "learning_rate": 0.0005819683627251678,
      "loss": 3.1091,
      "step": 25557
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6361331939697266,
      "learning_rate": 0.0005819669659157257,
      "loss": 3.1562,
      "step": 25558
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.469498872756958,
      "learning_rate": 0.0005819655690538607,
      "loss": 3.3517,
      "step": 25559
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3738161325454712,
      "learning_rate": 0.0005819641721395729,
      "loss": 3.0577,
      "step": 25560
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9821748733520508,
      "learning_rate": 0.0005819627751728627,
      "loss": 3.1096,
      "step": 25561
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3389091491699219,
      "learning_rate": 0.0005819613781537304,
      "loss": 3.1524,
      "step": 25562
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1126134395599365,
      "learning_rate": 0.0005819599810821761,
      "loss": 3.2231,
      "step": 25563
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6513630151748657,
      "learning_rate": 0.0005819585839582,
      "loss": 3.2684,
      "step": 25564
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4223817586898804,
      "learning_rate": 0.0005819571867818025,
      "loss": 3.3249,
      "step": 25565
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9878188371658325,
      "learning_rate": 0.000581955789552984,
      "loss": 3.061,
      "step": 25566
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8847432136535645,
      "learning_rate": 0.0005819543922717444,
      "loss": 2.7896,
      "step": 25567
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9049774408340454,
      "learning_rate": 0.0005819529949380842,
      "loss": 3.0018,
      "step": 25568
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.508063554763794,
      "learning_rate": 0.0005819515975520037,
      "loss": 3.0357,
      "step": 25569
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3555127382278442,
      "learning_rate": 0.0005819502001135029,
      "loss": 3.1388,
      "step": 25570
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.4569923877716064,
      "learning_rate": 0.0005819488026225825,
      "loss": 3.1265,
      "step": 25571
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7368323802947998,
      "learning_rate": 0.0005819474050792424,
      "loss": 3.2622,
      "step": 25572
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8048707246780396,
      "learning_rate": 0.0005819460074834829,
      "loss": 3.0999,
      "step": 25573
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8616852760314941,
      "learning_rate": 0.0005819446098353043,
      "loss": 3.1521,
      "step": 25574
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4236445426940918,
      "learning_rate": 0.0005819432121347068,
      "loss": 3.2541,
      "step": 25575
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7019922733306885,
      "learning_rate": 0.000581941814381691,
      "loss": 3.0388,
      "step": 25576
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4910683631896973,
      "learning_rate": 0.0005819404165762566,
      "loss": 3.0796,
      "step": 25577
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.512925624847412,
      "learning_rate": 0.0005819390187184044,
      "loss": 2.9828,
      "step": 25578
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5004745721817017,
      "learning_rate": 0.0005819376208081343,
      "loss": 2.9881,
      "step": 25579
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4654152393341064,
      "learning_rate": 0.0005819362228454467,
      "loss": 3.2137,
      "step": 25580
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.288078546524048,
      "learning_rate": 0.0005819348248303418,
      "loss": 3.2661,
      "step": 25581
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5635151863098145,
      "learning_rate": 0.0005819334267628198,
      "loss": 3.0842,
      "step": 25582
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5573813915252686,
      "learning_rate": 0.0005819320286428812,
      "loss": 2.9295,
      "step": 25583
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.624881625175476,
      "learning_rate": 0.0005819306304705261,
      "loss": 3.188,
      "step": 25584
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.598339319229126,
      "learning_rate": 0.0005819292322457548,
      "loss": 3.2663,
      "step": 25585
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8350872993469238,
      "learning_rate": 0.0005819278339685674,
      "loss": 2.9728,
      "step": 25586
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9561251401901245,
      "learning_rate": 0.0005819264356389643,
      "loss": 3.1164,
      "step": 25587
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7723324298858643,
      "learning_rate": 0.0005819250372569459,
      "loss": 3.188,
      "step": 25588
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5595248937606812,
      "learning_rate": 0.0005819236388225123,
      "loss": 3.3218,
      "step": 25589
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6647348403930664,
      "learning_rate": 0.0005819222403356637,
      "loss": 3.1107,
      "step": 25590
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5631107091903687,
      "learning_rate": 0.0005819208417964004,
      "loss": 3.1312,
      "step": 25591
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6534305810928345,
      "learning_rate": 0.0005819194432047227,
      "loss": 3.1554,
      "step": 25592
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0139195919036865,
      "learning_rate": 0.000581918044560631,
      "loss": 2.9316,
      "step": 25593
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.665973424911499,
      "learning_rate": 0.0005819166458641253,
      "loss": 3.2417,
      "step": 25594
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8228826522827148,
      "learning_rate": 0.0005819152471152059,
      "loss": 2.9594,
      "step": 25595
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4400722980499268,
      "learning_rate": 0.0005819138483138733,
      "loss": 3.3526,
      "step": 25596
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.436561942100525,
      "learning_rate": 0.0005819124494601275,
      "loss": 3.1458,
      "step": 25597
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7157022953033447,
      "learning_rate": 0.0005819110505539688,
      "loss": 3.0752,
      "step": 25598
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8938102722167969,
      "learning_rate": 0.0005819096515953977,
      "loss": 3.1105,
      "step": 25599
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8934482336044312,
      "learning_rate": 0.0005819082525844141,
      "loss": 2.9957,
      "step": 25600
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7635406255722046,
      "learning_rate": 0.0005819068535210185,
      "loss": 2.8516,
      "step": 25601
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9777934551239014,
      "learning_rate": 0.0005819054544052111,
      "loss": 2.9047,
      "step": 25602
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7149590253829956,
      "learning_rate": 0.0005819040552369921,
      "loss": 3.0492,
      "step": 25603
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.784482717514038,
      "learning_rate": 0.0005819026560163618,
      "loss": 3.111,
      "step": 25604
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.7726476192474365,
      "learning_rate": 0.0005819012567433206,
      "loss": 2.9689,
      "step": 25605
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.5363211631774902,
      "learning_rate": 0.0005818998574178686,
      "loss": 3.1872,
      "step": 25606
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4955940246582031,
      "learning_rate": 0.0005818984580400061,
      "loss": 3.0784,
      "step": 25607
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.172300338745117,
      "learning_rate": 0.0005818970586097332,
      "loss": 3.0581,
      "step": 25608
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.2643909454345703,
      "learning_rate": 0.0005818956591270505,
      "loss": 3.2245,
      "step": 25609
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7916470766067505,
      "learning_rate": 0.000581894259591958,
      "loss": 2.816,
      "step": 25610
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.887303352355957,
      "learning_rate": 0.0005818928600044561,
      "loss": 3.1845,
      "step": 25611
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.6508629322052,
      "learning_rate": 0.000581891460364545,
      "loss": 3.5221,
      "step": 25612
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.083909034729004,
      "learning_rate": 0.0005818900606722248,
      "loss": 3.0931,
      "step": 25613
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.441739797592163,
      "learning_rate": 0.000581888660927496,
      "loss": 2.8655,
      "step": 25614
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.3168272972106934,
      "learning_rate": 0.0005818872611303588,
      "loss": 3.152,
      "step": 25615
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2162315845489502,
      "learning_rate": 0.0005818858612808135,
      "loss": 3.1116,
      "step": 25616
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4816880226135254,
      "learning_rate": 0.0005818844613788602,
      "loss": 3.1496,
      "step": 25617
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3605197668075562,
      "learning_rate": 0.0005818830614244992,
      "loss": 3.1773,
      "step": 25618
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.1801180839538574,
      "learning_rate": 0.0005818816614177309,
      "loss": 2.9177,
      "step": 25619
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5093581676483154,
      "learning_rate": 0.0005818802613585556,
      "loss": 3.1616,
      "step": 25620
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4375171661376953,
      "learning_rate": 0.0005818788612469732,
      "loss": 3.0421,
      "step": 25621
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1055617332458496,
      "learning_rate": 0.0005818774610829844,
      "loss": 3.1434,
      "step": 25622
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9201856851577759,
      "learning_rate": 0.0005818760608665891,
      "loss": 3.2339,
      "step": 25623
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.063836097717285,
      "learning_rate": 0.0005818746605977879,
      "loss": 3.0836,
      "step": 25624
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8216391801834106,
      "learning_rate": 0.0005818732602765807,
      "loss": 3.2606,
      "step": 25625
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4427934885025024,
      "learning_rate": 0.000581871859902968,
      "loss": 2.9742,
      "step": 25626
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.592421054840088,
      "learning_rate": 0.0005818704594769499,
      "loss": 3.3426,
      "step": 25627
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.469268560409546,
      "learning_rate": 0.000581869058998527,
      "loss": 2.9414,
      "step": 25628
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6904139518737793,
      "learning_rate": 0.0005818676584676991,
      "loss": 3.1723,
      "step": 25629
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.602799892425537,
      "learning_rate": 0.0005818662578844668,
      "loss": 3.1669,
      "step": 25630
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8028961420059204,
      "learning_rate": 0.0005818648572488303,
      "loss": 2.9752,
      "step": 25631
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4645339250564575,
      "learning_rate": 0.0005818634565607897,
      "loss": 2.9813,
      "step": 25632
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4004935026168823,
      "learning_rate": 0.0005818620558203454,
      "loss": 3.0323,
      "step": 25633
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3940396308898926,
      "learning_rate": 0.0005818606550274976,
      "loss": 3.1009,
      "step": 25634
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6667759418487549,
      "learning_rate": 0.0005818592541822465,
      "loss": 3.0779,
      "step": 25635
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.9167916774749756,
      "learning_rate": 0.0005818578532845927,
      "loss": 3.0155,
      "step": 25636
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4411804676055908,
      "learning_rate": 0.000581856452334536,
      "loss": 2.8267,
      "step": 25637
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.574729084968567,
      "learning_rate": 0.0005818550513320769,
      "loss": 3.1847,
      "step": 25638
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.867717981338501,
      "learning_rate": 0.0005818536502772156,
      "loss": 3.1915,
      "step": 25639
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.498262405395508,
      "learning_rate": 0.0005818522491699525,
      "loss": 2.9626,
      "step": 25640
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6183303594589233,
      "learning_rate": 0.0005818508480102876,
      "loss": 3.2008,
      "step": 25641
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6994788646697998,
      "learning_rate": 0.0005818494467982213,
      "loss": 3.1732,
      "step": 25642
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7632908821105957,
      "learning_rate": 0.000581848045533754,
      "loss": 3.2892,
      "step": 25643
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.370086908340454,
      "learning_rate": 0.0005818466442168858,
      "loss": 3.3099,
      "step": 25644
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1540236473083496,
      "learning_rate": 0.0005818452428476169,
      "loss": 3.2527,
      "step": 25645
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4800910949707031,
      "learning_rate": 0.0005818438414259477,
      "loss": 2.8921,
      "step": 25646
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3447744846343994,
      "learning_rate": 0.0005818424399518784,
      "loss": 3.1629,
      "step": 25647
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.700128436088562,
      "learning_rate": 0.0005818410384254094,
      "loss": 3.2053,
      "step": 25648
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5888700485229492,
      "learning_rate": 0.0005818396368465407,
      "loss": 3.1522,
      "step": 25649
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4675813913345337,
      "learning_rate": 0.0005818382352152727,
      "loss": 3.1224,
      "step": 25650
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3485313653945923,
      "learning_rate": 0.0005818368335316056,
      "loss": 3.1785,
      "step": 25651
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.657390832901001,
      "learning_rate": 0.0005818354317955398,
      "loss": 2.9228,
      "step": 25652
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.40529203414917,
      "learning_rate": 0.0005818340300070754,
      "loss": 3.154,
      "step": 25653
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6811195611953735,
      "learning_rate": 0.0005818326281662128,
      "loss": 2.846,
      "step": 25654
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.5567939281463623,
      "learning_rate": 0.0005818312262729521,
      "loss": 2.8505,
      "step": 25655
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5502922534942627,
      "learning_rate": 0.0005818298243272938,
      "loss": 3.1554,
      "step": 25656
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6315805912017822,
      "learning_rate": 0.0005818284223292379,
      "loss": 3.3506,
      "step": 25657
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5257055759429932,
      "learning_rate": 0.0005818270202787849,
      "loss": 3.1386,
      "step": 25658
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.212048053741455,
      "learning_rate": 0.0005818256181759348,
      "loss": 3.1047,
      "step": 25659
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4383225440979004,
      "learning_rate": 0.0005818242160206881,
      "loss": 3.2548,
      "step": 25660
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4209128618240356,
      "learning_rate": 0.0005818228138130448,
      "loss": 3.1184,
      "step": 25661
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5387147665023804,
      "learning_rate": 0.0005818214115530055,
      "loss": 2.9779,
      "step": 25662
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5923349857330322,
      "learning_rate": 0.0005818200092405703,
      "loss": 3.1661,
      "step": 25663
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5103847980499268,
      "learning_rate": 0.0005818186068757392,
      "loss": 2.9902,
      "step": 25664
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.751966118812561,
      "learning_rate": 0.0005818172044585129,
      "loss": 3.1017,
      "step": 25665
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8411085605621338,
      "learning_rate": 0.0005818158019888914,
      "loss": 2.8396,
      "step": 25666
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7177826166152954,
      "learning_rate": 0.0005818143994668751,
      "loss": 3.1936,
      "step": 25667
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9285061359405518,
      "learning_rate": 0.0005818129968924641,
      "loss": 3.1804,
      "step": 25668
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.432483434677124,
      "learning_rate": 0.0005818115942656587,
      "loss": 2.9215,
      "step": 25669
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9741301536560059,
      "learning_rate": 0.0005818101915864594,
      "loss": 3.2241,
      "step": 25670
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.400615930557251,
      "learning_rate": 0.0005818087888548662,
      "loss": 3.2672,
      "step": 25671
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4686740636825562,
      "learning_rate": 0.0005818073860708793,
      "loss": 3.0862,
      "step": 25672
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5864448547363281,
      "learning_rate": 0.000581805983234499,
      "loss": 3.0861,
      "step": 25673
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7896379232406616,
      "learning_rate": 0.0005818045803457259,
      "loss": 3.1223,
      "step": 25674
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8129382133483887,
      "learning_rate": 0.0005818031774045599,
      "loss": 3.0549,
      "step": 25675
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8161646127700806,
      "learning_rate": 0.0005818017744110013,
      "loss": 2.8735,
      "step": 25676
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3600449562072754,
      "learning_rate": 0.0005818003713650505,
      "loss": 3.0317,
      "step": 25677
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.453683614730835,
      "learning_rate": 0.0005817989682667078,
      "loss": 3.0351,
      "step": 25678
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4140914678573608,
      "learning_rate": 0.0005817975651159732,
      "loss": 2.9171,
      "step": 25679
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.43557608127594,
      "learning_rate": 0.0005817961619128472,
      "loss": 3.0544,
      "step": 25680
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6369487047195435,
      "learning_rate": 0.0005817947586573299,
      "loss": 3.2553,
      "step": 25681
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4706671237945557,
      "learning_rate": 0.0005817933553494217,
      "loss": 3.0912,
      "step": 25682
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4427381753921509,
      "learning_rate": 0.0005817919519891227,
      "loss": 2.8715,
      "step": 25683
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6178474426269531,
      "learning_rate": 0.0005817905485764334,
      "loss": 3.1956,
      "step": 25684
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.2377946376800537,
      "learning_rate": 0.0005817891451113539,
      "loss": 3.1954,
      "step": 25685
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8815912008285522,
      "learning_rate": 0.0005817877415938843,
      "loss": 3.1275,
      "step": 25686
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4145867824554443,
      "learning_rate": 0.0005817863380240253,
      "loss": 2.9739,
      "step": 25687
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6859933137893677,
      "learning_rate": 0.0005817849344017768,
      "loss": 2.8887,
      "step": 25688
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9992446899414062,
      "learning_rate": 0.000581783530727139,
      "loss": 3.3258,
      "step": 25689
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7498773336410522,
      "learning_rate": 0.0005817821270001124,
      "loss": 3.0781,
      "step": 25690
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.9473955631256104,
      "learning_rate": 0.0005817807232206973,
      "loss": 3.0483,
      "step": 25691
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.51934015750885,
      "learning_rate": 0.0005817793193888938,
      "loss": 3.0881,
      "step": 25692
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9498423337936401,
      "learning_rate": 0.0005817779155047022,
      "loss": 2.8733,
      "step": 25693
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3919857740402222,
      "learning_rate": 0.0005817765115681227,
      "loss": 3.1635,
      "step": 25694
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.9395965337753296,
      "learning_rate": 0.0005817751075791557,
      "loss": 3.0719,
      "step": 25695
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6445344686508179,
      "learning_rate": 0.0005817737035378014,
      "loss": 3.0729,
      "step": 25696
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4745312929153442,
      "learning_rate": 0.00058177229944406,
      "loss": 3.0008,
      "step": 25697
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6743685007095337,
      "learning_rate": 0.0005817708952979318,
      "loss": 3.0913,
      "step": 25698
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4375824928283691,
      "learning_rate": 0.0005817694910994172,
      "loss": 3.1504,
      "step": 25699
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4679179191589355,
      "learning_rate": 0.0005817680868485161,
      "loss": 3.0321,
      "step": 25700
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5973683595657349,
      "learning_rate": 0.0005817666825452292,
      "loss": 2.9535,
      "step": 25701
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0375914573669434,
      "learning_rate": 0.0005817652781895565,
      "loss": 3.2958,
      "step": 25702
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.3494858741760254,
      "learning_rate": 0.0005817638737814982,
      "loss": 3.0544,
      "step": 25703
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.5615906715393066,
      "learning_rate": 0.0005817624693210548,
      "loss": 3.1501,
      "step": 25704
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.27167010307312,
      "learning_rate": 0.0005817610648082265,
      "loss": 2.7966,
      "step": 25705
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7073479890823364,
      "learning_rate": 0.0005817596602430133,
      "loss": 3.1892,
      "step": 25706
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.115389585494995,
      "learning_rate": 0.0005817582556254157,
      "loss": 2.8204,
      "step": 25707
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.3901761770248413,
      "learning_rate": 0.000581756850955434,
      "loss": 2.7567,
      "step": 25708
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.206252336502075,
      "learning_rate": 0.0005817554462330684,
      "loss": 3.2125,
      "step": 25709
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6354058980941772,
      "learning_rate": 0.0005817540414583192,
      "loss": 3.3291,
      "step": 25710
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.971033811569214,
      "learning_rate": 0.0005817526366311865,
      "loss": 3.2249,
      "step": 25711
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.68288516998291,
      "learning_rate": 0.0005817512317516707,
      "loss": 3.0195,
      "step": 25712
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.2179036140441895,
      "learning_rate": 0.000581749826819772,
      "loss": 3.0599,
      "step": 25713
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.8073331117630005,
      "learning_rate": 0.0005817484218354907,
      "loss": 3.128,
      "step": 25714
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.9048562049865723,
      "learning_rate": 0.0005817470167988271,
      "loss": 3.0352,
      "step": 25715
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.7808940410614014,
      "learning_rate": 0.0005817456117097814,
      "loss": 3.139,
      "step": 25716
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.7888160943984985,
      "learning_rate": 0.0005817442065683537,
      "loss": 3.0867,
      "step": 25717
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.6939195394515991,
      "learning_rate": 0.0005817428013745447,
      "loss": 3.0136,
      "step": 25718
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.81524658203125,
      "learning_rate": 0.0005817413961283542,
      "loss": 2.9478,
      "step": 25719
    },
    {
      "epoch": 0.33,
      "grad_norm": 4.922640323638916,
      "learning_rate": 0.0005817399908297828,
      "loss": 2.9912,
      "step": 25720
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.103177309036255,
      "learning_rate": 0.0005817385854788305,
      "loss": 3.1152,
      "step": 25721
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.2200145721435547,
      "learning_rate": 0.0005817371800754977,
      "loss": 3.1635,
      "step": 25722
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.1709437370300293,
      "learning_rate": 0.0005817357746197847,
      "loss": 3.2393,
      "step": 25723
    },
    {
      "epoch": 0.33,
      "grad_norm": 3.2854456901550293,
      "learning_rate": 0.0005817343691116917,
      "loss": 3.3061,
      "step": 25724
    },
    {
      "epoch": 0.33,
      "grad_norm": 1.4539819955825806,
      "learning_rate": 0.0005817329635512189,
      "loss": 3.1573,
      "step": 25725
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.1121389865875244,
      "learning_rate": 0.0005817315579383666,
      "loss": 3.2575,
      "step": 25726
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.3813564777374268,
      "learning_rate": 0.0005817301522731351,
      "loss": 3.2545,
      "step": 25727
    },
    {
      "epoch": 0.33,
      "grad_norm": 2.0273244380950928,
      "learning_rate": 0.0005817287465555248,
      "loss": 3.0645,
      "step": 25728
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3971197605133057,
      "learning_rate": 0.0005817273407855356,
      "loss": 3.0211,
      "step": 25729
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5228246450424194,
      "learning_rate": 0.000581725934963168,
      "loss": 3.0916,
      "step": 25730
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2847099304199219,
      "learning_rate": 0.0005817245290884224,
      "loss": 3.0361,
      "step": 25731
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.232592225074768,
      "learning_rate": 0.0005817231231612988,
      "loss": 3.2191,
      "step": 25732
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.368806004524231,
      "learning_rate": 0.0005817217171817976,
      "loss": 2.949,
      "step": 25733
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6156022548675537,
      "learning_rate": 0.0005817203111499189,
      "loss": 3.1376,
      "step": 25734
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4689171314239502,
      "learning_rate": 0.000581718905065663,
      "loss": 3.1039,
      "step": 25735
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.861318826675415,
      "learning_rate": 0.0005817174989290304,
      "loss": 3.1571,
      "step": 25736
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5292789936065674,
      "learning_rate": 0.0005817160927400212,
      "loss": 3.0011,
      "step": 25737
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.716765880584717,
      "learning_rate": 0.0005817146864986356,
      "loss": 3.2501,
      "step": 25738
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3349021673202515,
      "learning_rate": 0.0005817132802048739,
      "loss": 2.8304,
      "step": 25739
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9849121570587158,
      "learning_rate": 0.0005817118738587365,
      "loss": 3.0104,
      "step": 25740
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2425754070281982,
      "learning_rate": 0.0005817104674602235,
      "loss": 2.9962,
      "step": 25741
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3422363996505737,
      "learning_rate": 0.0005817090610093351,
      "loss": 3.235,
      "step": 25742
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6199754476547241,
      "learning_rate": 0.0005817076545060717,
      "loss": 3.0243,
      "step": 25743
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4057269096374512,
      "learning_rate": 0.0005817062479504337,
      "loss": 3.3227,
      "step": 25744
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4943780899047852,
      "learning_rate": 0.000581704841342421,
      "loss": 3.1115,
      "step": 25745
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6754080057144165,
      "learning_rate": 0.0005817034346820341,
      "loss": 3.1608,
      "step": 25746
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.5773861408233643,
      "learning_rate": 0.0005817020279692733,
      "loss": 3.1987,
      "step": 25747
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4516668319702148,
      "learning_rate": 0.0005817006212041387,
      "loss": 3.2836,
      "step": 25748
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.881972312927246,
      "learning_rate": 0.0005816992143866306,
      "loss": 3.2476,
      "step": 25749
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.656144380569458,
      "learning_rate": 0.0005816978075167494,
      "loss": 3.1352,
      "step": 25750
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.703902244567871,
      "learning_rate": 0.0005816964005944952,
      "loss": 2.8407,
      "step": 25751
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2940876483917236,
      "learning_rate": 0.0005816949936198684,
      "loss": 3.1884,
      "step": 25752
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4587336778640747,
      "learning_rate": 0.000581693586592869,
      "loss": 3.3305,
      "step": 25753
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.4325907230377197,
      "learning_rate": 0.0005816921795134976,
      "loss": 3.0719,
      "step": 25754
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.936116099357605,
      "learning_rate": 0.0005816907723817542,
      "loss": 2.9739,
      "step": 25755
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3976985216140747,
      "learning_rate": 0.0005816893651976392,
      "loss": 2.974,
      "step": 25756
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.910392165184021,
      "learning_rate": 0.0005816879579611529,
      "loss": 3.1007,
      "step": 25757
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4269238710403442,
      "learning_rate": 0.0005816865506722955,
      "loss": 3.117,
      "step": 25758
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5767141580581665,
      "learning_rate": 0.0005816851433310671,
      "loss": 3.046,
      "step": 25759
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8300461769104004,
      "learning_rate": 0.0005816837359374682,
      "loss": 3.0827,
      "step": 25760
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6280357837677002,
      "learning_rate": 0.000581682328491499,
      "loss": 2.9808,
      "step": 25761
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7635009288787842,
      "learning_rate": 0.0005816809209931599,
      "loss": 3.0048,
      "step": 25762
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.0169527530670166,
      "learning_rate": 0.0005816795134424508,
      "loss": 3.0669,
      "step": 25763
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7386747598648071,
      "learning_rate": 0.0005816781058393722,
      "loss": 3.3128,
      "step": 25764
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.4218637943267822,
      "learning_rate": 0.0005816766981839242,
      "loss": 2.7886,
      "step": 25765
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.509169578552246,
      "learning_rate": 0.0005816752904761074,
      "loss": 3.1523,
      "step": 25766
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2727198600769043,
      "learning_rate": 0.0005816738827159217,
      "loss": 3.1813,
      "step": 25767
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4550403356552124,
      "learning_rate": 0.0005816724749033676,
      "loss": 3.2871,
      "step": 25768
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.559063196182251,
      "learning_rate": 0.0005816710670384452,
      "loss": 2.8365,
      "step": 25769
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5243948698043823,
      "learning_rate": 0.0005816696591211548,
      "loss": 3.1865,
      "step": 25770
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6081154346466064,
      "learning_rate": 0.0005816682511514969,
      "loss": 3.2866,
      "step": 25771
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5786939859390259,
      "learning_rate": 0.0005816668431294713,
      "loss": 3.2478,
      "step": 25772
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.155531644821167,
      "learning_rate": 0.0005816654350550786,
      "loss": 3.0758,
      "step": 25773
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5761382579803467,
      "learning_rate": 0.000581664026928319,
      "loss": 3.088,
      "step": 25774
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5184844732284546,
      "learning_rate": 0.0005816626187491928,
      "loss": 3.0433,
      "step": 25775
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4731318950653076,
      "learning_rate": 0.0005816612105177001,
      "loss": 3.0372,
      "step": 25776
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5025280714035034,
      "learning_rate": 0.0005816598022338413,
      "loss": 2.8961,
      "step": 25777
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.535328984260559,
      "learning_rate": 0.0005816583938976167,
      "loss": 2.8091,
      "step": 25778
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.414823055267334,
      "learning_rate": 0.0005816569855090264,
      "loss": 3.0924,
      "step": 25779
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.402501106262207,
      "learning_rate": 0.0005816555770680708,
      "loss": 3.0699,
      "step": 25780
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2982763051986694,
      "learning_rate": 0.0005816541685747501,
      "loss": 2.8291,
      "step": 25781
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3239223957061768,
      "learning_rate": 0.0005816527600290645,
      "loss": 3.0257,
      "step": 25782
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5793243646621704,
      "learning_rate": 0.0005816513514310144,
      "loss": 2.9568,
      "step": 25783
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8123807907104492,
      "learning_rate": 0.0005816499427806001,
      "loss": 3.0612,
      "step": 25784
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.816288948059082,
      "learning_rate": 0.0005816485340778216,
      "loss": 2.9791,
      "step": 25785
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.543462872505188,
      "learning_rate": 0.0005816471253226793,
      "loss": 3.2181,
      "step": 25786
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4961562156677246,
      "learning_rate": 0.0005816457165151736,
      "loss": 2.9613,
      "step": 25787
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5362120866775513,
      "learning_rate": 0.0005816443076553045,
      "loss": 3.1951,
      "step": 25788
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5328917503356934,
      "learning_rate": 0.0005816428987430726,
      "loss": 3.2526,
      "step": 25789
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.3111326694488525,
      "learning_rate": 0.0005816414897784779,
      "loss": 2.8865,
      "step": 25790
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.2725300788879395,
      "learning_rate": 0.0005816400807615206,
      "loss": 3.1192,
      "step": 25791
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1180083751678467,
      "learning_rate": 0.0005816386716922013,
      "loss": 3.0884,
      "step": 25792
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7952004671096802,
      "learning_rate": 0.00058163726257052,
      "loss": 2.9166,
      "step": 25793
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.6001131534576416,
      "learning_rate": 0.0005816358533964769,
      "loss": 3.1256,
      "step": 25794
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6386948823928833,
      "learning_rate": 0.0005816344441700724,
      "loss": 3.217,
      "step": 25795
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8968555927276611,
      "learning_rate": 0.0005816330348913069,
      "loss": 3.0563,
      "step": 25796
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.800851583480835,
      "learning_rate": 0.0005816316255601803,
      "loss": 3.0385,
      "step": 25797
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1043810844421387,
      "learning_rate": 0.0005816302161766932,
      "loss": 3.0287,
      "step": 25798
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.121244192123413,
      "learning_rate": 0.0005816288067408456,
      "loss": 3.2129,
      "step": 25799
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5449731349945068,
      "learning_rate": 0.000581627397252638,
      "loss": 3.03,
      "step": 25800
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6963335275650024,
      "learning_rate": 0.0005816259877120706,
      "loss": 2.9675,
      "step": 25801
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.2911393642425537,
      "learning_rate": 0.0005816245781191435,
      "loss": 2.8062,
      "step": 25802
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7964293956756592,
      "learning_rate": 0.000581623168473857,
      "loss": 3.213,
      "step": 25803
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.0708024501800537,
      "learning_rate": 0.0005816217587762116,
      "loss": 3.0504,
      "step": 25804
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.7842884063720703,
      "learning_rate": 0.0005816203490262074,
      "loss": 3.0647,
      "step": 25805
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.6276636123657227,
      "learning_rate": 0.0005816189392238445,
      "loss": 3.2594,
      "step": 25806
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4212770462036133,
      "learning_rate": 0.0005816175293691235,
      "loss": 2.9082,
      "step": 25807
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.807727813720703,
      "learning_rate": 0.0005816161194620444,
      "loss": 3.1357,
      "step": 25808
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9224275350570679,
      "learning_rate": 0.0005816147095026075,
      "loss": 3.195,
      "step": 25809
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.297975778579712,
      "learning_rate": 0.0005816132994908133,
      "loss": 3.1895,
      "step": 25810
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.370062828063965,
      "learning_rate": 0.0005816118894266617,
      "loss": 3.0836,
      "step": 25811
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.4490580558776855,
      "learning_rate": 0.0005816104793101532,
      "loss": 2.9366,
      "step": 25812
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.0686800479888916,
      "learning_rate": 0.0005816090691412879,
      "loss": 2.7812,
      "step": 25813
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5494977235794067,
      "learning_rate": 0.0005816076589200662,
      "loss": 3.179,
      "step": 25814
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.320081949234009,
      "learning_rate": 0.0005816062486464884,
      "loss": 3.1607,
      "step": 25815
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.6100237369537354,
      "learning_rate": 0.0005816048383205547,
      "loss": 3.0982,
      "step": 25816
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1331422328948975,
      "learning_rate": 0.0005816034279422653,
      "loss": 3.191,
      "step": 25817
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6663883924484253,
      "learning_rate": 0.0005816020175116204,
      "loss": 2.9953,
      "step": 25818
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8915644884109497,
      "learning_rate": 0.0005816006070286204,
      "loss": 3.0474,
      "step": 25819
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6809232234954834,
      "learning_rate": 0.0005815991964932656,
      "loss": 3.0042,
      "step": 25820
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2314399480819702,
      "learning_rate": 0.0005815977859055562,
      "loss": 3.0443,
      "step": 25821
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6617767810821533,
      "learning_rate": 0.0005815963752654925,
      "loss": 2.997,
      "step": 25822
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.864516258239746,
      "learning_rate": 0.0005815949645730746,
      "loss": 2.9953,
      "step": 25823
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6454002857208252,
      "learning_rate": 0.0005815935538283029,
      "loss": 3.1656,
      "step": 25824
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.539406180381775,
      "learning_rate": 0.0005815921430311777,
      "loss": 2.9809,
      "step": 25825
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.3213255405426025,
      "learning_rate": 0.0005815907321816992,
      "loss": 3.0945,
      "step": 25826
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6520020961761475,
      "learning_rate": 0.0005815893212798677,
      "loss": 3.1607,
      "step": 25827
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7961012125015259,
      "learning_rate": 0.0005815879103256834,
      "loss": 3.4524,
      "step": 25828
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3756994009017944,
      "learning_rate": 0.0005815864993191465,
      "loss": 3.0736,
      "step": 25829
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5249533653259277,
      "learning_rate": 0.0005815850882602574,
      "loss": 3.2505,
      "step": 25830
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.385432243347168,
      "learning_rate": 0.0005815836771490164,
      "loss": 3.1869,
      "step": 25831
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.0509352684020996,
      "learning_rate": 0.0005815822659854237,
      "loss": 3.0647,
      "step": 25832
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.938239574432373,
      "learning_rate": 0.0005815808547694794,
      "loss": 2.9806,
      "step": 25833
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5733932256698608,
      "learning_rate": 0.000581579443501184,
      "loss": 3.0753,
      "step": 25834
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2851223945617676,
      "learning_rate": 0.0005815780321805376,
      "loss": 3.3939,
      "step": 25835
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3070579767227173,
      "learning_rate": 0.0005815766208075406,
      "loss": 3.1028,
      "step": 25836
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6063737869262695,
      "learning_rate": 0.0005815752093821931,
      "loss": 3.1451,
      "step": 25837
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7039577960968018,
      "learning_rate": 0.0005815737979044955,
      "loss": 3.0847,
      "step": 25838
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.119471549987793,
      "learning_rate": 0.000581572386374448,
      "loss": 2.9995,
      "step": 25839
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4661482572555542,
      "learning_rate": 0.0005815709747920509,
      "loss": 2.9718,
      "step": 25840
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6968019008636475,
      "learning_rate": 0.0005815695631573044,
      "loss": 3.0118,
      "step": 25841
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5957651138305664,
      "learning_rate": 0.000581568151470209,
      "loss": 3.1684,
      "step": 25842
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.2037465572357178,
      "learning_rate": 0.0005815667397307645,
      "loss": 2.7417,
      "step": 25843
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.575409173965454,
      "learning_rate": 0.0005815653279389716,
      "loss": 3.1306,
      "step": 25844
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8606616258621216,
      "learning_rate": 0.0005815639160948303,
      "loss": 3.096,
      "step": 25845
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4493659734725952,
      "learning_rate": 0.000581562504198341,
      "loss": 3.0358,
      "step": 25846
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5742995738983154,
      "learning_rate": 0.0005815610922495039,
      "loss": 3.0608,
      "step": 25847
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.283771514892578,
      "learning_rate": 0.0005815596802483192,
      "loss": 3.1165,
      "step": 25848
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8377431631088257,
      "learning_rate": 0.0005815582681947874,
      "loss": 3.0233,
      "step": 25849
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4198172092437744,
      "learning_rate": 0.0005815568560889085,
      "loss": 3.1972,
      "step": 25850
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5787965059280396,
      "learning_rate": 0.0005815554439306829,
      "loss": 2.8762,
      "step": 25851
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.760510802268982,
      "learning_rate": 0.0005815540317201108,
      "loss": 3.2509,
      "step": 25852
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4018172025680542,
      "learning_rate": 0.0005815526194571925,
      "loss": 2.9449,
      "step": 25853
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.159147024154663,
      "learning_rate": 0.0005815512071419283,
      "loss": 3.0916,
      "step": 25854
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.825217604637146,
      "learning_rate": 0.0005815497947743183,
      "loss": 3.115,
      "step": 25855
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.642614722251892,
      "learning_rate": 0.000581548382354363,
      "loss": 3.1147,
      "step": 25856
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.4390037059783936,
      "learning_rate": 0.0005815469698820625,
      "loss": 2.9633,
      "step": 25857
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5309782028198242,
      "learning_rate": 0.0005815455573574171,
      "loss": 3.0554,
      "step": 25858
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8836820125579834,
      "learning_rate": 0.000581544144780427,
      "loss": 3.0664,
      "step": 25859
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8618817329406738,
      "learning_rate": 0.0005815427321510927,
      "loss": 3.1036,
      "step": 25860
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4274135828018188,
      "learning_rate": 0.0005815413194694141,
      "loss": 3.0287,
      "step": 25861
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5151302814483643,
      "learning_rate": 0.0005815399067353918,
      "loss": 2.9231,
      "step": 25862
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1986560821533203,
      "learning_rate": 0.0005815384939490258,
      "loss": 3.1453,
      "step": 25863
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.4634439945220947,
      "learning_rate": 0.0005815370811103165,
      "loss": 2.9895,
      "step": 25864
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6277471780776978,
      "learning_rate": 0.0005815356682192643,
      "loss": 3.0917,
      "step": 25865
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.5279622077941895,
      "learning_rate": 0.0005815342552758691,
      "loss": 3.0767,
      "step": 25866
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.0782506465911865,
      "learning_rate": 0.0005815328422801314,
      "loss": 3.4134,
      "step": 25867
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3180128335952759,
      "learning_rate": 0.0005815314292320515,
      "loss": 3.2603,
      "step": 25868
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3919235467910767,
      "learning_rate": 0.0005815300161316296,
      "loss": 2.9739,
      "step": 25869
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.491831064224243,
      "learning_rate": 0.0005815286029788659,
      "loss": 3.1063,
      "step": 25870
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1881465911865234,
      "learning_rate": 0.0005815271897737608,
      "loss": 3.1031,
      "step": 25871
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6580146551132202,
      "learning_rate": 0.0005815257765163145,
      "loss": 3.085,
      "step": 25872
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.0202910900115967,
      "learning_rate": 0.0005815243632065272,
      "loss": 3.1637,
      "step": 25873
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1780335903167725,
      "learning_rate": 0.0005815229498443991,
      "loss": 2.8645,
      "step": 25874
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.326055884361267,
      "learning_rate": 0.0005815215364299306,
      "loss": 3.1187,
      "step": 25875
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6006876230239868,
      "learning_rate": 0.000581520122963122,
      "loss": 2.9695,
      "step": 25876
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8106800317764282,
      "learning_rate": 0.0005815187094439735,
      "loss": 3.1288,
      "step": 25877
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.360950231552124,
      "learning_rate": 0.0005815172958724853,
      "loss": 3.3906,
      "step": 25878
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6990381479263306,
      "learning_rate": 0.0005815158822486577,
      "loss": 3.1768,
      "step": 25879
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.0673489570617676,
      "learning_rate": 0.0005815144685724911,
      "loss": 2.917,
      "step": 25880
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7158101797103882,
      "learning_rate": 0.0005815130548439856,
      "loss": 3.114,
      "step": 25881
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2689063549041748,
      "learning_rate": 0.0005815116410631414,
      "loss": 3.0188,
      "step": 25882
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.501974582672119,
      "learning_rate": 0.0005815102272299591,
      "loss": 3.069,
      "step": 25883
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.616091012954712,
      "learning_rate": 0.0005815088133444385,
      "loss": 3.1151,
      "step": 25884
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5983091592788696,
      "learning_rate": 0.0005815073994065803,
      "loss": 3.1501,
      "step": 25885
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7159044742584229,
      "learning_rate": 0.0005815059854163844,
      "loss": 3.1423,
      "step": 25886
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4528559446334839,
      "learning_rate": 0.0005815045713738513,
      "loss": 3.1334,
      "step": 25887
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6934548616409302,
      "learning_rate": 0.0005815031572789812,
      "loss": 3.0242,
      "step": 25888
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4553587436676025,
      "learning_rate": 0.0005815017431317742,
      "loss": 2.97,
      "step": 25889
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4309663772583008,
      "learning_rate": 0.0005815003289322309,
      "loss": 3.0644,
      "step": 25890
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.7182881832122803,
      "learning_rate": 0.0005814989146803513,
      "loss": 3.1016,
      "step": 25891
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6938382387161255,
      "learning_rate": 0.0005814975003761358,
      "loss": 3.11,
      "step": 25892
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7649152278900146,
      "learning_rate": 0.0005814960860195845,
      "loss": 3.0345,
      "step": 25893
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5640395879745483,
      "learning_rate": 0.0005814946716106978,
      "loss": 3.1794,
      "step": 25894
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4689112901687622,
      "learning_rate": 0.000581493257149476,
      "loss": 3.0908,
      "step": 25895
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8550366163253784,
      "learning_rate": 0.0005814918426359192,
      "loss": 2.7515,
      "step": 25896
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.13504958152771,
      "learning_rate": 0.0005814904280700277,
      "loss": 2.9428,
      "step": 25897
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4020708799362183,
      "learning_rate": 0.0005814890134518019,
      "loss": 3.2314,
      "step": 25898
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3835406303405762,
      "learning_rate": 0.0005814875987812419,
      "loss": 3.1261,
      "step": 25899
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9908393621444702,
      "learning_rate": 0.0005814861840583482,
      "loss": 2.7066,
      "step": 25900
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.678566813468933,
      "learning_rate": 0.0005814847692831208,
      "loss": 3.0456,
      "step": 25901
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5761581659317017,
      "learning_rate": 0.0005814833544555601,
      "loss": 2.6391,
      "step": 25902
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.372071623802185,
      "learning_rate": 0.0005814819395756663,
      "loss": 3.033,
      "step": 25903
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5295710563659668,
      "learning_rate": 0.0005814805246434398,
      "loss": 3.0226,
      "step": 25904
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5072938203811646,
      "learning_rate": 0.0005814791096588806,
      "loss": 3.1706,
      "step": 25905
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4845702648162842,
      "learning_rate": 0.0005814776946219892,
      "loss": 2.9236,
      "step": 25906
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.482417106628418,
      "learning_rate": 0.0005814762795327658,
      "loss": 3.174,
      "step": 25907
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5939713716506958,
      "learning_rate": 0.0005814748643912105,
      "loss": 3.1958,
      "step": 25908
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.6398887634277344,
      "learning_rate": 0.000581473449197324,
      "loss": 3.0449,
      "step": 25909
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8385659456253052,
      "learning_rate": 0.000581472033951106,
      "loss": 3.1392,
      "step": 25910
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.3808884620666504,
      "learning_rate": 0.0005814706186525572,
      "loss": 2.7831,
      "step": 25911
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9623368978500366,
      "learning_rate": 0.0005814692033016776,
      "loss": 3.0073,
      "step": 25912
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6858421564102173,
      "learning_rate": 0.0005814677878984676,
      "loss": 3.1804,
      "step": 25913
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.3266537189483643,
      "learning_rate": 0.0005814663724429275,
      "loss": 3.2184,
      "step": 25914
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8193936347961426,
      "learning_rate": 0.0005814649569350573,
      "loss": 3.1526,
      "step": 25915
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.9522228240966797,
      "learning_rate": 0.0005814635413748576,
      "loss": 2.7469,
      "step": 25916
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.2527241706848145,
      "learning_rate": 0.0005814621257623284,
      "loss": 2.9977,
      "step": 25917
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.696070671081543,
      "learning_rate": 0.0005814607100974702,
      "loss": 2.9805,
      "step": 25918
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4274758100509644,
      "learning_rate": 0.0005814592943802832,
      "loss": 3.054,
      "step": 25919
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6663744449615479,
      "learning_rate": 0.0005814578786107674,
      "loss": 3.0752,
      "step": 25920
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4513282775878906,
      "learning_rate": 0.0005814564627889234,
      "loss": 3.1878,
      "step": 25921
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3316330909729004,
      "learning_rate": 0.0005814550469147512,
      "loss": 3.2079,
      "step": 25922
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5734649896621704,
      "learning_rate": 0.0005814536309882513,
      "loss": 3.1394,
      "step": 25923
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6202068328857422,
      "learning_rate": 0.000581452215009424,
      "loss": 3.0526,
      "step": 25924
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2525604963302612,
      "learning_rate": 0.0005814507989782692,
      "loss": 2.8795,
      "step": 25925
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4732389450073242,
      "learning_rate": 0.0005814493828947874,
      "loss": 2.9401,
      "step": 25926
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8342831134796143,
      "learning_rate": 0.0005814479667589789,
      "loss": 3.1479,
      "step": 25927
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9437963962554932,
      "learning_rate": 0.0005814465505708439,
      "loss": 3.1634,
      "step": 25928
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.0096383094787598,
      "learning_rate": 0.0005814451343303827,
      "loss": 2.9815,
      "step": 25929
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.415492534637451,
      "learning_rate": 0.0005814437180375955,
      "loss": 3.0105,
      "step": 25930
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.325238585472107,
      "learning_rate": 0.0005814423016924828,
      "loss": 3.1691,
      "step": 25931
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9568047523498535,
      "learning_rate": 0.0005814408852950443,
      "loss": 3.1879,
      "step": 25932
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5071542263031006,
      "learning_rate": 0.0005814394688452809,
      "loss": 3.2225,
      "step": 25933
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.703354835510254,
      "learning_rate": 0.0005814380523431925,
      "loss": 2.7772,
      "step": 25934
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.9017021656036377,
      "learning_rate": 0.0005814366357887794,
      "loss": 2.992,
      "step": 25935
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5738205909729004,
      "learning_rate": 0.000581435219182042,
      "loss": 2.9717,
      "step": 25936
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.110894203186035,
      "learning_rate": 0.0005814338025229804,
      "loss": 3.2193,
      "step": 25937
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9443891048431396,
      "learning_rate": 0.000581432385811595,
      "loss": 2.9949,
      "step": 25938
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.678371787071228,
      "learning_rate": 0.0005814309690478861,
      "loss": 3.0379,
      "step": 25939
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.362436294555664,
      "learning_rate": 0.0005814295522318537,
      "loss": 3.1609,
      "step": 25940
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1629891395568848,
      "learning_rate": 0.0005814281353634983,
      "loss": 3.0145,
      "step": 25941
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.7003064155578613,
      "learning_rate": 0.0005814267184428201,
      "loss": 2.9835,
      "step": 25942
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.526259422302246,
      "learning_rate": 0.0005814253014698194,
      "loss": 3.2104,
      "step": 25943
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.219785451889038,
      "learning_rate": 0.0005814238844444963,
      "loss": 3.1002,
      "step": 25944
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.66647469997406,
      "learning_rate": 0.0005814224673668513,
      "loss": 3.1846,
      "step": 25945
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5123893022537231,
      "learning_rate": 0.0005814210502368846,
      "loss": 2.9723,
      "step": 25946
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.354114294052124,
      "learning_rate": 0.0005814196330545964,
      "loss": 2.9698,
      "step": 25947
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.0600802898406982,
      "learning_rate": 0.0005814182158199869,
      "loss": 3.19,
      "step": 25948
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6090903282165527,
      "learning_rate": 0.0005814167985330565,
      "loss": 3.0347,
      "step": 25949
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3504040241241455,
      "learning_rate": 0.0005814153811938053,
      "loss": 3.1894,
      "step": 25950
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6595369577407837,
      "learning_rate": 0.0005814139638022337,
      "loss": 3.0555,
      "step": 25951
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5170516967773438,
      "learning_rate": 0.0005814125463583419,
      "loss": 2.9712,
      "step": 25952
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.04172945022583,
      "learning_rate": 0.0005814111288621304,
      "loss": 2.8865,
      "step": 25953
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4169752597808838,
      "learning_rate": 0.0005814097113135991,
      "loss": 3.1071,
      "step": 25954
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.483417510986328,
      "learning_rate": 0.0005814082937127484,
      "loss": 2.97,
      "step": 25955
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4269884824752808,
      "learning_rate": 0.0005814068760595786,
      "loss": 2.9353,
      "step": 25956
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.4444098472595215,
      "learning_rate": 0.00058140545835409,
      "loss": 2.9866,
      "step": 25957
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4818578958511353,
      "learning_rate": 0.0005814040405962828,
      "loss": 2.6293,
      "step": 25958
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.650355339050293,
      "learning_rate": 0.0005814026227861573,
      "loss": 3.2294,
      "step": 25959
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.6127712726593018,
      "learning_rate": 0.0005814012049237137,
      "loss": 3.0386,
      "step": 25960
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4404488801956177,
      "learning_rate": 0.0005813997870089524,
      "loss": 2.8469,
      "step": 25961
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.105750322341919,
      "learning_rate": 0.0005813983690418735,
      "loss": 3.2012,
      "step": 25962
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.325180768966675,
      "learning_rate": 0.0005813969510224773,
      "loss": 3.2848,
      "step": 25963
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3865070343017578,
      "learning_rate": 0.000581395532950764,
      "loss": 3.159,
      "step": 25964
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.768810272216797,
      "learning_rate": 0.0005813941148267341,
      "loss": 2.767,
      "step": 25965
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4670556783676147,
      "learning_rate": 0.0005813926966503877,
      "loss": 3.1545,
      "step": 25966
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4726474285125732,
      "learning_rate": 0.0005813912784217251,
      "loss": 3.3892,
      "step": 25967
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.380785346031189,
      "learning_rate": 0.0005813898601407465,
      "loss": 3.062,
      "step": 25968
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6839572191238403,
      "learning_rate": 0.0005813884418074522,
      "loss": 2.8526,
      "step": 25969
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2114075422286987,
      "learning_rate": 0.0005813870234218425,
      "loss": 3.18,
      "step": 25970
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.40560781955719,
      "learning_rate": 0.0005813856049839177,
      "loss": 3.176,
      "step": 25971
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5521541833877563,
      "learning_rate": 0.000581384186493678,
      "loss": 2.8839,
      "step": 25972
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3689296245574951,
      "learning_rate": 0.0005813827679511237,
      "loss": 2.9958,
      "step": 25973
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.460275650024414,
      "learning_rate": 0.0005813813493562548,
      "loss": 3.1714,
      "step": 25974
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9125534296035767,
      "learning_rate": 0.000581379930709072,
      "loss": 3.0641,
      "step": 25975
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3210519552230835,
      "learning_rate": 0.0005813785120095754,
      "loss": 2.9971,
      "step": 25976
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8395729064941406,
      "learning_rate": 0.0005813770932577651,
      "loss": 3.1394,
      "step": 25977
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3243902921676636,
      "learning_rate": 0.0005813756744536415,
      "loss": 2.75,
      "step": 25978
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6173763275146484,
      "learning_rate": 0.0005813742555972049,
      "loss": 3.2128,
      "step": 25979
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5802216529846191,
      "learning_rate": 0.0005813728366884554,
      "loss": 2.8276,
      "step": 25980
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9298628568649292,
      "learning_rate": 0.0005813714177273936,
      "loss": 3.0612,
      "step": 25981
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3047219514846802,
      "learning_rate": 0.0005813699987140193,
      "loss": 3.0286,
      "step": 25982
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.200007915496826,
      "learning_rate": 0.0005813685796483332,
      "loss": 3.2413,
      "step": 25983
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4288902282714844,
      "learning_rate": 0.0005813671605303353,
      "loss": 3.0957,
      "step": 25984
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4996873140335083,
      "learning_rate": 0.0005813657413600259,
      "loss": 2.9732,
      "step": 25985
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3631888628005981,
      "learning_rate": 0.0005813643221374053,
      "loss": 3.2885,
      "step": 25986
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7561073303222656,
      "learning_rate": 0.0005813629028624738,
      "loss": 3.0753,
      "step": 25987
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3538057804107666,
      "learning_rate": 0.0005813614835352316,
      "loss": 3.1783,
      "step": 25988
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6784234046936035,
      "learning_rate": 0.0005813600641556791,
      "loss": 3.0573,
      "step": 25989
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7010389566421509,
      "learning_rate": 0.0005813586447238162,
      "loss": 2.9973,
      "step": 25990
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5518442392349243,
      "learning_rate": 0.0005813572252396436,
      "loss": 3.0663,
      "step": 25991
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3608347177505493,
      "learning_rate": 0.0005813558057031614,
      "loss": 3.0473,
      "step": 25992
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.620998740196228,
      "learning_rate": 0.0005813543861143698,
      "loss": 2.9358,
      "step": 25993
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5505110025405884,
      "learning_rate": 0.0005813529664732691,
      "loss": 3.2569,
      "step": 25994
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4563158750534058,
      "learning_rate": 0.0005813515467798596,
      "loss": 3.3041,
      "step": 25995
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7792235612869263,
      "learning_rate": 0.0005813501270341415,
      "loss": 3.0449,
      "step": 25996
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.3200197219848633,
      "learning_rate": 0.000581348707236115,
      "loss": 3.2221,
      "step": 25997
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.761867880821228,
      "learning_rate": 0.0005813472873857806,
      "loss": 3.0909,
      "step": 25998
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.310313105583191,
      "learning_rate": 0.0005813458674831384,
      "loss": 3.1862,
      "step": 25999
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1024203300476074,
      "learning_rate": 0.0005813444475281887,
      "loss": 2.8243,
      "step": 26000
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1553070545196533,
      "learning_rate": 0.0005813430275209317,
      "loss": 3.1781,
      "step": 26001
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.1564501523971558,
      "learning_rate": 0.0005813416074613679,
      "loss": 2.9973,
      "step": 26002
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.2906653881073,
      "learning_rate": 0.0005813401873494972,
      "loss": 2.8759,
      "step": 26003
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.0815775394439697,
      "learning_rate": 0.0005813387671853201,
      "loss": 3.0002,
      "step": 26004
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6645472049713135,
      "learning_rate": 0.0005813373469688368,
      "loss": 2.9436,
      "step": 26005
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7535903453826904,
      "learning_rate": 0.0005813359267000476,
      "loss": 2.9409,
      "step": 26006
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.024956226348877,
      "learning_rate": 0.0005813345063789527,
      "loss": 3.3401,
      "step": 26007
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1470460891723633,
      "learning_rate": 0.0005813330860055525,
      "loss": 3.0395,
      "step": 26008
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6083288192749023,
      "learning_rate": 0.000581331665579847,
      "loss": 3.2041,
      "step": 26009
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7983064651489258,
      "learning_rate": 0.0005813302451018368,
      "loss": 3.2121,
      "step": 26010
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3358641862869263,
      "learning_rate": 0.0005813288245715219,
      "loss": 2.9393,
      "step": 26011
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.562272548675537,
      "learning_rate": 0.0005813274039889027,
      "loss": 3.0083,
      "step": 26012
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3605401515960693,
      "learning_rate": 0.0005813259833539795,
      "loss": 3.0774,
      "step": 26013
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4336496591567993,
      "learning_rate": 0.0005813245626667524,
      "loss": 3.0101,
      "step": 26014
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7053642272949219,
      "learning_rate": 0.0005813231419272217,
      "loss": 3.0661,
      "step": 26015
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6782299280166626,
      "learning_rate": 0.0005813217211353878,
      "loss": 3.0967,
      "step": 26016
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.500659465789795,
      "learning_rate": 0.0005813203002912509,
      "loss": 3.1186,
      "step": 26017
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2834820747375488,
      "learning_rate": 0.0005813188793948112,
      "loss": 2.9078,
      "step": 26018
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.4583683013916016,
      "learning_rate": 0.000581317458446069,
      "loss": 3.0803,
      "step": 26019
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7555941343307495,
      "learning_rate": 0.0005813160374450246,
      "loss": 3.2975,
      "step": 26020
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8525973558425903,
      "learning_rate": 0.0005813146163916782,
      "loss": 3.1586,
      "step": 26021
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.354416847229004,
      "learning_rate": 0.0005813131952860303,
      "loss": 3.0183,
      "step": 26022
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7262933254241943,
      "learning_rate": 0.0005813117741280807,
      "loss": 2.99,
      "step": 26023
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.823765754699707,
      "learning_rate": 0.0005813103529178301,
      "loss": 3.103,
      "step": 26024
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.313066840171814,
      "learning_rate": 0.0005813089316552784,
      "loss": 3.0437,
      "step": 26025
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7192150354385376,
      "learning_rate": 0.0005813075103404263,
      "loss": 2.9009,
      "step": 26026
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7140625715255737,
      "learning_rate": 0.0005813060889732737,
      "loss": 3.0642,
      "step": 26027
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4310851097106934,
      "learning_rate": 0.000581304667553821,
      "loss": 2.8971,
      "step": 26028
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.66060471534729,
      "learning_rate": 0.0005813032460820684,
      "loss": 3.1386,
      "step": 26029
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.33213210105896,
      "learning_rate": 0.0005813018245580163,
      "loss": 3.2386,
      "step": 26030
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5568292140960693,
      "learning_rate": 0.0005813004029816648,
      "loss": 3.2372,
      "step": 26031
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.221963882446289,
      "learning_rate": 0.0005812989813530143,
      "loss": 3.3843,
      "step": 26032
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.8547518253326416,
      "learning_rate": 0.0005812975596720651,
      "loss": 3.1278,
      "step": 26033
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7132974863052368,
      "learning_rate": 0.0005812961379388172,
      "loss": 2.9528,
      "step": 26034
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6406779289245605,
      "learning_rate": 0.0005812947161532712,
      "loss": 3.02,
      "step": 26035
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.3526976108551025,
      "learning_rate": 0.000581293294315427,
      "loss": 2.9422,
      "step": 26036
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3363953828811646,
      "learning_rate": 0.0005812918724252853,
      "loss": 3.2052,
      "step": 26037
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.514115810394287,
      "learning_rate": 0.0005812904504828459,
      "loss": 3.0521,
      "step": 26038
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8711868524551392,
      "learning_rate": 0.0005812890284881095,
      "loss": 2.7705,
      "step": 26039
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4389052391052246,
      "learning_rate": 0.0005812876064410761,
      "loss": 3.2282,
      "step": 26040
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5661523342132568,
      "learning_rate": 0.000581286184341746,
      "loss": 2.8566,
      "step": 26041
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6004875898361206,
      "learning_rate": 0.0005812847621901194,
      "loss": 2.9977,
      "step": 26042
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.922344923019409,
      "learning_rate": 0.0005812833399861968,
      "loss": 3.1564,
      "step": 26043
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9730706214904785,
      "learning_rate": 0.0005812819177299783,
      "loss": 2.8327,
      "step": 26044
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.4857332706451416,
      "learning_rate": 0.000581280495421464,
      "loss": 3.2285,
      "step": 26045
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.086972236633301,
      "learning_rate": 0.0005812790730606546,
      "loss": 3.0125,
      "step": 26046
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4635932445526123,
      "learning_rate": 0.00058127765064755,
      "loss": 3.1754,
      "step": 26047
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.441932439804077,
      "learning_rate": 0.0005812762281821505,
      "loss": 2.9995,
      "step": 26048
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.08309268951416,
      "learning_rate": 0.0005812748056644565,
      "loss": 3.1899,
      "step": 26049
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6682138442993164,
      "learning_rate": 0.0005812733830944682,
      "loss": 3.0823,
      "step": 26050
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5562336444854736,
      "learning_rate": 0.0005812719604721859,
      "loss": 3.0921,
      "step": 26051
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.924623727798462,
      "learning_rate": 0.0005812705377976099,
      "loss": 3.1501,
      "step": 26052
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4472370147705078,
      "learning_rate": 0.0005812691150707402,
      "loss": 3.2349,
      "step": 26053
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5886777639389038,
      "learning_rate": 0.0005812676922915775,
      "loss": 2.8451,
      "step": 26054
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5109140872955322,
      "learning_rate": 0.0005812662694601216,
      "loss": 3.0138,
      "step": 26055
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5552418231964111,
      "learning_rate": 0.0005812648465763731,
      "loss": 2.9516,
      "step": 26056
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2196359634399414,
      "learning_rate": 0.0005812634236403322,
      "loss": 3.0855,
      "step": 26057
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5836150646209717,
      "learning_rate": 0.000581262000651999,
      "loss": 3.0828,
      "step": 26058
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4896972179412842,
      "learning_rate": 0.000581260577611374,
      "loss": 3.0018,
      "step": 26059
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7710846662521362,
      "learning_rate": 0.0005812591545184573,
      "loss": 2.8979,
      "step": 26060
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6459909677505493,
      "learning_rate": 0.0005812577313732491,
      "loss": 2.9242,
      "step": 26061
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4628379344940186,
      "learning_rate": 0.0005812563081757499,
      "loss": 3.1417,
      "step": 26062
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.436781644821167,
      "learning_rate": 0.00058125488492596,
      "loss": 3.1367,
      "step": 26063
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.3631396293640137,
      "learning_rate": 0.0005812534616238793,
      "loss": 3.1182,
      "step": 26064
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4327894449234009,
      "learning_rate": 0.0005812520382695083,
      "loss": 3.3529,
      "step": 26065
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7865108251571655,
      "learning_rate": 0.0005812506148628472,
      "loss": 3.0931,
      "step": 26066
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4497482776641846,
      "learning_rate": 0.0005812491914038964,
      "loss": 3.0266,
      "step": 26067
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4216192960739136,
      "learning_rate": 0.000581247767892656,
      "loss": 3.0969,
      "step": 26068
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6548320055007935,
      "learning_rate": 0.0005812463443291264,
      "loss": 2.8541,
      "step": 26069
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5612653493881226,
      "learning_rate": 0.0005812449207133078,
      "loss": 2.8142,
      "step": 26070
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8970364332199097,
      "learning_rate": 0.0005812434970452004,
      "loss": 3.1076,
      "step": 26071
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8454289436340332,
      "learning_rate": 0.0005812420733248045,
      "loss": 2.9761,
      "step": 26072
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.315976619720459,
      "learning_rate": 0.0005812406495521204,
      "loss": 2.9927,
      "step": 26073
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5483930110931396,
      "learning_rate": 0.0005812392257271484,
      "loss": 2.9849,
      "step": 26074
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3911281824111938,
      "learning_rate": 0.0005812378018498888,
      "loss": 3.2976,
      "step": 26075
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7005211114883423,
      "learning_rate": 0.0005812363779203416,
      "loss": 3.0302,
      "step": 26076
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4358400106430054,
      "learning_rate": 0.0005812349539385073,
      "loss": 3.0644,
      "step": 26077
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4275026321411133,
      "learning_rate": 0.0005812335299043862,
      "loss": 3.3826,
      "step": 26078
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5216050148010254,
      "learning_rate": 0.0005812321058179784,
      "loss": 3.2632,
      "step": 26079
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.091430425643921,
      "learning_rate": 0.0005812306816792843,
      "loss": 2.8274,
      "step": 26080
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2758963108062744,
      "learning_rate": 0.0005812292574883041,
      "loss": 3.1446,
      "step": 26081
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6706033945083618,
      "learning_rate": 0.000581227833245038,
      "loss": 3.265,
      "step": 26082
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.609010934829712,
      "learning_rate": 0.0005812264089494864,
      "loss": 3.1487,
      "step": 26083
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.2726142406463623,
      "learning_rate": 0.0005812249846016494,
      "loss": 3.0428,
      "step": 26084
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6561925411224365,
      "learning_rate": 0.0005812235602015275,
      "loss": 3.0648,
      "step": 26085
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3121800422668457,
      "learning_rate": 0.0005812221357491208,
      "loss": 3.126,
      "step": 26086
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9339654445648193,
      "learning_rate": 0.0005812207112444296,
      "loss": 2.9863,
      "step": 26087
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.668071985244751,
      "learning_rate": 0.0005812192866874542,
      "loss": 3.0417,
      "step": 26088
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5245388746261597,
      "learning_rate": 0.0005812178620781947,
      "loss": 3.247,
      "step": 26089
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9286056756973267,
      "learning_rate": 0.0005812164374166516,
      "loss": 3.0659,
      "step": 26090
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.46183443069458,
      "learning_rate": 0.000581215012702825,
      "loss": 3.2604,
      "step": 26091
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5734829902648926,
      "learning_rate": 0.0005812135879367152,
      "loss": 2.9621,
      "step": 26092
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9191207885742188,
      "learning_rate": 0.0005812121631183226,
      "loss": 3.2297,
      "step": 26093
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7467010021209717,
      "learning_rate": 0.0005812107382476472,
      "loss": 3.1455,
      "step": 26094
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7922239303588867,
      "learning_rate": 0.0005812093133246895,
      "loss": 2.8629,
      "step": 26095
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4687297344207764,
      "learning_rate": 0.0005812078883494497,
      "loss": 3.356,
      "step": 26096
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6306616067886353,
      "learning_rate": 0.0005812064633219279,
      "loss": 3.3529,
      "step": 26097
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3811776638031006,
      "learning_rate": 0.0005812050382421247,
      "loss": 2.6996,
      "step": 26098
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5441192388534546,
      "learning_rate": 0.0005812036131100401,
      "loss": 3.0952,
      "step": 26099
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6904457807540894,
      "learning_rate": 0.0005812021879256744,
      "loss": 2.999,
      "step": 26100
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.584289312362671,
      "learning_rate": 0.000581200762689028,
      "loss": 3.2008,
      "step": 26101
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1988534927368164,
      "learning_rate": 0.0005811993374001009,
      "loss": 3.2335,
      "step": 26102
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4772708415985107,
      "learning_rate": 0.0005811979120588938,
      "loss": 2.862,
      "step": 26103
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.234668016433716,
      "learning_rate": 0.0005811964866654065,
      "loss": 2.9735,
      "step": 26104
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.92112398147583,
      "learning_rate": 0.0005811950612196394,
      "loss": 3.1866,
      "step": 26105
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.728564977645874,
      "learning_rate": 0.0005811936357215931,
      "loss": 3.3298,
      "step": 26106
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.184373378753662,
      "learning_rate": 0.0005811922101712673,
      "loss": 3.1806,
      "step": 26107
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.2930383682250977,
      "learning_rate": 0.0005811907845686628,
      "loss": 3.0194,
      "step": 26108
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8053488731384277,
      "learning_rate": 0.0005811893589137794,
      "loss": 3.3229,
      "step": 26109
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7377893924713135,
      "learning_rate": 0.0005811879332066178,
      "loss": 3.2178,
      "step": 26110
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.4790918827056885,
      "learning_rate": 0.000581186507447178,
      "loss": 3.0549,
      "step": 26111
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4468286037445068,
      "learning_rate": 0.0005811850816354603,
      "loss": 3.2893,
      "step": 26112
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2275207042694092,
      "learning_rate": 0.000581183655771465,
      "loss": 3.1018,
      "step": 26113
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.5481650829315186,
      "learning_rate": 0.0005811822298551922,
      "loss": 2.9784,
      "step": 26114
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.606353521347046,
      "learning_rate": 0.0005811808038866425,
      "loss": 3.1476,
      "step": 26115
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5934926271438599,
      "learning_rate": 0.000581179377865816,
      "loss": 3.1206,
      "step": 26116
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.73151433467865,
      "learning_rate": 0.0005811779517927128,
      "loss": 3.1917,
      "step": 26117
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.462815761566162,
      "learning_rate": 0.0005811765256673334,
      "loss": 3.1192,
      "step": 26118
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.417570948600769,
      "learning_rate": 0.000581175099489678,
      "loss": 3.2177,
      "step": 26119
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.6320536136627197,
      "learning_rate": 0.0005811736732597467,
      "loss": 2.9897,
      "step": 26120
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9213584661483765,
      "learning_rate": 0.00058117224697754,
      "loss": 3.0885,
      "step": 26121
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4183186292648315,
      "learning_rate": 0.0005811708206430582,
      "loss": 2.9501,
      "step": 26122
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.594015121459961,
      "learning_rate": 0.0005811693942563013,
      "loss": 3.0801,
      "step": 26123
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7061853408813477,
      "learning_rate": 0.0005811679678172696,
      "loss": 3.1362,
      "step": 26124
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5540893077850342,
      "learning_rate": 0.0005811665413259637,
      "loss": 3.2191,
      "step": 26125
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5213940143585205,
      "learning_rate": 0.0005811651147823835,
      "loss": 2.8885,
      "step": 26126
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.038135051727295,
      "learning_rate": 0.0005811636881865295,
      "loss": 2.9724,
      "step": 26127
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.367976188659668,
      "learning_rate": 0.0005811622615384016,
      "loss": 2.9547,
      "step": 26128
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5513710975646973,
      "learning_rate": 0.0005811608348380006,
      "loss": 3.1334,
      "step": 26129
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8599340915679932,
      "learning_rate": 0.0005811594080853263,
      "loss": 3.1934,
      "step": 26130
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9536418914794922,
      "learning_rate": 0.0005811579812803793,
      "loss": 2.993,
      "step": 26131
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7854032516479492,
      "learning_rate": 0.0005811565544231597,
      "loss": 3.0594,
      "step": 26132
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1631574630737305,
      "learning_rate": 0.0005811551275136678,
      "loss": 2.9899,
      "step": 26133
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.476859450340271,
      "learning_rate": 0.0005811537005519038,
      "loss": 3.1367,
      "step": 26134
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.505372166633606,
      "learning_rate": 0.0005811522735378681,
      "loss": 3.0144,
      "step": 26135
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9299415349960327,
      "learning_rate": 0.0005811508464715608,
      "loss": 3.0553,
      "step": 26136
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3706985712051392,
      "learning_rate": 0.0005811494193529823,
      "loss": 2.9464,
      "step": 26137
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7231227159500122,
      "learning_rate": 0.0005811479921821327,
      "loss": 3.2273,
      "step": 26138
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6514816284179688,
      "learning_rate": 0.0005811465649590126,
      "loss": 2.9543,
      "step": 26139
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7102086544036865,
      "learning_rate": 0.0005811451376836219,
      "loss": 3.1536,
      "step": 26140
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.633407473564148,
      "learning_rate": 0.000581143710355961,
      "loss": 2.998,
      "step": 26141
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5527114868164062,
      "learning_rate": 0.0005811422829760303,
      "loss": 3.0544,
      "step": 26142
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5344752073287964,
      "learning_rate": 0.0005811408555438299,
      "loss": 2.9175,
      "step": 26143
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1982736587524414,
      "learning_rate": 0.00058113942805936,
      "loss": 3.0087,
      "step": 26144
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3296281099319458,
      "learning_rate": 0.0005811380005226209,
      "loss": 3.1005,
      "step": 26145
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8033301830291748,
      "learning_rate": 0.0005811365729336132,
      "loss": 3.0555,
      "step": 26146
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5164234638214111,
      "learning_rate": 0.0005811351452923367,
      "loss": 2.9437,
      "step": 26147
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3660224676132202,
      "learning_rate": 0.000581133717598792,
      "loss": 2.9414,
      "step": 26148
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5747216939926147,
      "learning_rate": 0.0005811322898529792,
      "loss": 2.8499,
      "step": 26149
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.311305046081543,
      "learning_rate": 0.0005811308620548985,
      "loss": 2.9307,
      "step": 26150
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9208215475082397,
      "learning_rate": 0.0005811294342045504,
      "loss": 2.9373,
      "step": 26151
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2572059631347656,
      "learning_rate": 0.000581128006301935,
      "loss": 3.0782,
      "step": 26152
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1542768478393555,
      "learning_rate": 0.0005811265783470526,
      "loss": 3.0964,
      "step": 26153
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.636831760406494,
      "learning_rate": 0.0005811251503399034,
      "loss": 3.0524,
      "step": 26154
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7154560089111328,
      "learning_rate": 0.0005811237222804878,
      "loss": 3.0531,
      "step": 26155
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6303057670593262,
      "learning_rate": 0.000581122294168806,
      "loss": 3.1973,
      "step": 26156
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4096221923828125,
      "learning_rate": 0.0005811208660048582,
      "loss": 3.1897,
      "step": 26157
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7068501710891724,
      "learning_rate": 0.0005811194377886447,
      "loss": 3.2414,
      "step": 26158
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3596452474594116,
      "learning_rate": 0.0005811180095201658,
      "loss": 3.4026,
      "step": 26159
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.447108268737793,
      "learning_rate": 0.0005811165811994218,
      "loss": 3.0974,
      "step": 26160
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8606001138687134,
      "learning_rate": 0.000581115152826413,
      "loss": 2.9984,
      "step": 26161
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5670937299728394,
      "learning_rate": 0.0005811137244011394,
      "loss": 3.1583,
      "step": 26162
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8079752922058105,
      "learning_rate": 0.0005811122959236017,
      "loss": 2.8616,
      "step": 26163
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7831718921661377,
      "learning_rate": 0.0005811108673937997,
      "loss": 2.9803,
      "step": 26164
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2168266773223877,
      "learning_rate": 0.0005811094388117339,
      "loss": 3.1052,
      "step": 26165
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.872150182723999,
      "learning_rate": 0.0005811080101774046,
      "loss": 3.2178,
      "step": 26166
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.673738956451416,
      "learning_rate": 0.0005811065814908121,
      "loss": 2.8004,
      "step": 26167
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4502439498901367,
      "learning_rate": 0.0005811051527519564,
      "loss": 2.8988,
      "step": 26168
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6968365907669067,
      "learning_rate": 0.0005811037239608381,
      "loss": 3.0878,
      "step": 26169
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4801616668701172,
      "learning_rate": 0.0005811022951174573,
      "loss": 2.9714,
      "step": 26170
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4581655263900757,
      "learning_rate": 0.0005811008662218142,
      "loss": 3.146,
      "step": 26171
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7442551851272583,
      "learning_rate": 0.0005810994372739092,
      "loss": 3.0729,
      "step": 26172
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3992364406585693,
      "learning_rate": 0.0005810980082737424,
      "loss": 3.0237,
      "step": 26173
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.218522310256958,
      "learning_rate": 0.0005810965792213143,
      "loss": 3.0829,
      "step": 26174
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6187735795974731,
      "learning_rate": 0.000581095150116625,
      "loss": 3.3719,
      "step": 26175
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.6814217567443848,
      "learning_rate": 0.0005810937209596749,
      "loss": 3.0444,
      "step": 26176
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.696572780609131,
      "learning_rate": 0.000581092291750464,
      "loss": 3.2134,
      "step": 26177
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6235527992248535,
      "learning_rate": 0.0005810908624889928,
      "loss": 3.1383,
      "step": 26178
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.516693115234375,
      "learning_rate": 0.0005810894331752615,
      "loss": 3.1348,
      "step": 26179
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.697487235069275,
      "learning_rate": 0.0005810880038092705,
      "loss": 3.1649,
      "step": 26180
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1861419677734375,
      "learning_rate": 0.0005810865743910197,
      "loss": 3.2483,
      "step": 26181
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1101064682006836,
      "learning_rate": 0.0005810851449205097,
      "loss": 2.7455,
      "step": 26182
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.545690894126892,
      "learning_rate": 0.0005810837153977408,
      "loss": 3.0924,
      "step": 26183
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6795282363891602,
      "learning_rate": 0.000581082285822713,
      "loss": 3.1852,
      "step": 26184
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7621036767959595,
      "learning_rate": 0.0005810808561954266,
      "loss": 2.9663,
      "step": 26185
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1632211208343506,
      "learning_rate": 0.0005810794265158822,
      "loss": 2.6773,
      "step": 26186
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4212008714675903,
      "learning_rate": 0.0005810779967840796,
      "loss": 3.1608,
      "step": 26187
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.016512393951416,
      "learning_rate": 0.0005810765670000195,
      "loss": 3.0014,
      "step": 26188
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9794089794158936,
      "learning_rate": 0.0005810751371637017,
      "loss": 3.1541,
      "step": 26189
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6146539449691772,
      "learning_rate": 0.000581073707275127,
      "loss": 3.0561,
      "step": 26190
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2883522510528564,
      "learning_rate": 0.0005810722773342953,
      "loss": 2.9646,
      "step": 26191
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2700217962265015,
      "learning_rate": 0.0005810708473412069,
      "loss": 3.4017,
      "step": 26192
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.344907522201538,
      "learning_rate": 0.0005810694172958621,
      "loss": 3.2045,
      "step": 26193
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.3089334964752197,
      "learning_rate": 0.0005810679871982613,
      "loss": 2.9863,
      "step": 26194
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6711777448654175,
      "learning_rate": 0.0005810665570484046,
      "loss": 2.7943,
      "step": 26195
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6642303466796875,
      "learning_rate": 0.0005810651268462923,
      "loss": 3.0198,
      "step": 26196
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.159653425216675,
      "learning_rate": 0.0005810636965919247,
      "loss": 3.0251,
      "step": 26197
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.529166579246521,
      "learning_rate": 0.0005810622662853021,
      "loss": 3.1208,
      "step": 26198
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3520208597183228,
      "learning_rate": 0.0005810608359264246,
      "loss": 2.9742,
      "step": 26199
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9548230171203613,
      "learning_rate": 0.0005810594055152927,
      "loss": 3.224,
      "step": 26200
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.599042534828186,
      "learning_rate": 0.0005810579750519066,
      "loss": 3.174,
      "step": 26201
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6189311742782593,
      "learning_rate": 0.0005810565445362664,
      "loss": 3.196,
      "step": 26202
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4237306118011475,
      "learning_rate": 0.0005810551139683725,
      "loss": 3.3135,
      "step": 26203
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.927612543106079,
      "learning_rate": 0.0005810536833482252,
      "loss": 2.8802,
      "step": 26204
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1314918994903564,
      "learning_rate": 0.0005810522526758247,
      "loss": 2.8414,
      "step": 26205
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.824448823928833,
      "learning_rate": 0.0005810508219511712,
      "loss": 2.9658,
      "step": 26206
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7910866737365723,
      "learning_rate": 0.0005810493911742651,
      "loss": 3.2509,
      "step": 26207
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.0268757343292236,
      "learning_rate": 0.0005810479603451067,
      "loss": 3.0906,
      "step": 26208
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.0357372760772705,
      "learning_rate": 0.0005810465294636961,
      "loss": 3.0606,
      "step": 26209
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.448482632637024,
      "learning_rate": 0.0005810450985300337,
      "loss": 3.1628,
      "step": 26210
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.46886944770813,
      "learning_rate": 0.0005810436675441195,
      "loss": 3.1784,
      "step": 26211
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7037150859832764,
      "learning_rate": 0.0005810422365059542,
      "loss": 3.0499,
      "step": 26212
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.674482822418213,
      "learning_rate": 0.0005810408054155378,
      "loss": 2.8536,
      "step": 26213
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.037142038345337,
      "learning_rate": 0.0005810393742728706,
      "loss": 3.0988,
      "step": 26214
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.0620415210723877,
      "learning_rate": 0.0005810379430779528,
      "loss": 3.174,
      "step": 26215
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.0028762817382812,
      "learning_rate": 0.0005810365118307848,
      "loss": 2.9572,
      "step": 26216
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4343416690826416,
      "learning_rate": 0.0005810350805313669,
      "loss": 3.2826,
      "step": 26217
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5319336652755737,
      "learning_rate": 0.0005810336491796992,
      "loss": 3.3666,
      "step": 26218
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6458673477172852,
      "learning_rate": 0.000581032217775782,
      "loss": 3.058,
      "step": 26219
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5083163976669312,
      "learning_rate": 0.0005810307863196156,
      "loss": 3.1518,
      "step": 26220
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.828862190246582,
      "learning_rate": 0.0005810293548112004,
      "loss": 3.1222,
      "step": 26221
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4910858869552612,
      "learning_rate": 0.0005810279232505364,
      "loss": 3.1136,
      "step": 26222
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2464803457260132,
      "learning_rate": 0.0005810264916376241,
      "loss": 3.2251,
      "step": 26223
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5638591051101685,
      "learning_rate": 0.0005810250599724636,
      "loss": 3.0228,
      "step": 26224
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2420836687088013,
      "learning_rate": 0.0005810236282550552,
      "loss": 2.9973,
      "step": 26225
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.002937078475952,
      "learning_rate": 0.0005810221964853993,
      "loss": 3.5333,
      "step": 26226
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8482718467712402,
      "learning_rate": 0.0005810207646634961,
      "loss": 3.0809,
      "step": 26227
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.768721342086792,
      "learning_rate": 0.0005810193327893457,
      "loss": 3.0045,
      "step": 26228
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7743397951126099,
      "learning_rate": 0.0005810179008629486,
      "loss": 3.1976,
      "step": 26229
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.328990936279297,
      "learning_rate": 0.0005810164688843048,
      "loss": 2.9526,
      "step": 26230
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.0364840030670166,
      "learning_rate": 0.0005810150368534149,
      "loss": 2.9285,
      "step": 26231
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5965882539749146,
      "learning_rate": 0.000581013604770279,
      "loss": 3.0656,
      "step": 26232
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8820481300354004,
      "learning_rate": 0.0005810121726348973,
      "loss": 3.096,
      "step": 26233
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.3556525707244873,
      "learning_rate": 0.0005810107404472701,
      "loss": 3.1686,
      "step": 26234
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.2420456409454346,
      "learning_rate": 0.0005810093082073978,
      "loss": 3.1992,
      "step": 26235
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6336501836776733,
      "learning_rate": 0.0005810078759152805,
      "loss": 3.1426,
      "step": 26236
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.523686170578003,
      "learning_rate": 0.0005810064435709186,
      "loss": 3.1611,
      "step": 26237
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.368013858795166,
      "learning_rate": 0.0005810050111743122,
      "loss": 3.2704,
      "step": 26238
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.380979061126709,
      "learning_rate": 0.0005810035787254618,
      "loss": 3.0304,
      "step": 26239
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9764726161956787,
      "learning_rate": 0.0005810021462243674,
      "loss": 3.094,
      "step": 26240
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.298210859298706,
      "learning_rate": 0.0005810007136710295,
      "loss": 3.1426,
      "step": 26241
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3973721265792847,
      "learning_rate": 0.0005809992810654483,
      "loss": 2.9757,
      "step": 26242
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5593210458755493,
      "learning_rate": 0.0005809978484076239,
      "loss": 3.3219,
      "step": 26243
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2274303436279297,
      "learning_rate": 0.0005809964156975566,
      "loss": 3.0253,
      "step": 26244
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.269834280014038,
      "learning_rate": 0.000580994982935247,
      "loss": 2.993,
      "step": 26245
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5393118858337402,
      "learning_rate": 0.000580993550120695,
      "loss": 3.0142,
      "step": 26246
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4840781688690186,
      "learning_rate": 0.000580992117253901,
      "loss": 3.3178,
      "step": 26247
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2587013244628906,
      "learning_rate": 0.0005809906843348653,
      "loss": 3.2979,
      "step": 26248
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.44508957862854,
      "learning_rate": 0.0005809892513635882,
      "loss": 2.8958,
      "step": 26249
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.579223394393921,
      "learning_rate": 0.0005809878183400697,
      "loss": 3.1634,
      "step": 26250
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.484108328819275,
      "learning_rate": 0.0005809863852643104,
      "loss": 3.1537,
      "step": 26251
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6232820749282837,
      "learning_rate": 0.0005809849521363104,
      "loss": 3.0911,
      "step": 26252
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.465309500694275,
      "learning_rate": 0.0005809835189560699,
      "loss": 2.9757,
      "step": 26253
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.481323480606079,
      "learning_rate": 0.0005809820857235894,
      "loss": 3.1681,
      "step": 26254
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9504717588424683,
      "learning_rate": 0.000580980652438869,
      "loss": 3.1983,
      "step": 26255
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.0302929878234863,
      "learning_rate": 0.000580979219101909,
      "loss": 3.0586,
      "step": 26256
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5759608745574951,
      "learning_rate": 0.0005809777857127096,
      "loss": 3.0873,
      "step": 26257
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6353288888931274,
      "learning_rate": 0.0005809763522712712,
      "loss": 3.0616,
      "step": 26258
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5768547058105469,
      "learning_rate": 0.0005809749187775939,
      "loss": 3.1324,
      "step": 26259
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4115917682647705,
      "learning_rate": 0.000580973485231678,
      "loss": 3.1974,
      "step": 26260
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5563503503799438,
      "learning_rate": 0.000580972051633524,
      "loss": 3.1015,
      "step": 26261
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7586628198623657,
      "learning_rate": 0.0005809706179831319,
      "loss": 2.9471,
      "step": 26262
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1947760581970215,
      "learning_rate": 0.0005809691842805021,
      "loss": 2.9618,
      "step": 26263
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4123916625976562,
      "learning_rate": 0.0005809677505256349,
      "loss": 2.9351,
      "step": 26264
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7364544868469238,
      "learning_rate": 0.0005809663167185303,
      "loss": 3.2612,
      "step": 26265
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7046154737472534,
      "learning_rate": 0.0005809648828591889,
      "loss": 3.1965,
      "step": 26266
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.157722234725952,
      "learning_rate": 0.0005809634489476107,
      "loss": 3.1125,
      "step": 26267
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7974570989608765,
      "learning_rate": 0.0005809620149837962,
      "loss": 3.1526,
      "step": 26268
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5642441511154175,
      "learning_rate": 0.0005809605809677455,
      "loss": 3.3485,
      "step": 26269
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.6045024394989014,
      "learning_rate": 0.000580959146899459,
      "loss": 3.2449,
      "step": 26270
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.429478883743286,
      "learning_rate": 0.0005809577127789368,
      "loss": 3.1071,
      "step": 26271
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6778730154037476,
      "learning_rate": 0.0005809562786061793,
      "loss": 3.0553,
      "step": 26272
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8843895196914673,
      "learning_rate": 0.0005809548443811867,
      "loss": 2.8637,
      "step": 26273
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.397505044937134,
      "learning_rate": 0.0005809534101039593,
      "loss": 3.2695,
      "step": 26274
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.132814645767212,
      "learning_rate": 0.0005809519757744974,
      "loss": 3.1041,
      "step": 26275
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.899833083152771,
      "learning_rate": 0.0005809505413928012,
      "loss": 2.9576,
      "step": 26276
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.3257603645324707,
      "learning_rate": 0.0005809491069588709,
      "loss": 3.0578,
      "step": 26277
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.901201009750366,
      "learning_rate": 0.000580947672472707,
      "loss": 2.7358,
      "step": 26278
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1554388999938965,
      "learning_rate": 0.0005809462379343096,
      "loss": 3.3837,
      "step": 26279
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.0556068420410156,
      "learning_rate": 0.000580944803343679,
      "loss": 3.0965,
      "step": 26280
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.2736690044403076,
      "learning_rate": 0.0005809433687008154,
      "loss": 3.1181,
      "step": 26281
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8273597955703735,
      "learning_rate": 0.0005809419340057191,
      "loss": 3.1066,
      "step": 26282
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3807965517044067,
      "learning_rate": 0.0005809404992583904,
      "loss": 2.9443,
      "step": 26283
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3209158182144165,
      "learning_rate": 0.0005809390644588297,
      "loss": 3.1887,
      "step": 26284
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8518519401550293,
      "learning_rate": 0.000580937629607037,
      "loss": 3.1258,
      "step": 26285
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4947123527526855,
      "learning_rate": 0.0005809361947030127,
      "loss": 2.9528,
      "step": 26286
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.43158757686615,
      "learning_rate": 0.000580934759746757,
      "loss": 3.0209,
      "step": 26287
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.761544108390808,
      "learning_rate": 0.0005809333247382703,
      "loss": 2.9986,
      "step": 26288
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3786512613296509,
      "learning_rate": 0.0005809318896775528,
      "loss": 2.6644,
      "step": 26289
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7417380809783936,
      "learning_rate": 0.0005809304545646047,
      "loss": 3.0824,
      "step": 26290
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5672093629837036,
      "learning_rate": 0.0005809290193994264,
      "loss": 2.9389,
      "step": 26291
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.671079635620117,
      "learning_rate": 0.000580927584182018,
      "loss": 3.2392,
      "step": 26292
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.76304292678833,
      "learning_rate": 0.00058092614891238,
      "loss": 2.9517,
      "step": 26293
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5757571458816528,
      "learning_rate": 0.0005809247135905124,
      "loss": 3.4299,
      "step": 26294
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.394939661026001,
      "learning_rate": 0.0005809232782164157,
      "loss": 3.2294,
      "step": 26295
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1612792015075684,
      "learning_rate": 0.00058092184279009,
      "loss": 2.9809,
      "step": 26296
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8824049234390259,
      "learning_rate": 0.0005809204073115356,
      "loss": 3.1668,
      "step": 26297
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.63404381275177,
      "learning_rate": 0.0005809189717807528,
      "loss": 3.1055,
      "step": 26298
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.890816330909729,
      "learning_rate": 0.0005809175361977418,
      "loss": 3.103,
      "step": 26299
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4795637130737305,
      "learning_rate": 0.0005809161005625031,
      "loss": 3.3575,
      "step": 26300
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3906430006027222,
      "learning_rate": 0.0005809146648750366,
      "loss": 3.2795,
      "step": 26301
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.6690938472747803,
      "learning_rate": 0.0005809132291353429,
      "loss": 3.12,
      "step": 26302
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.5196454524993896,
      "learning_rate": 0.0005809117933434221,
      "loss": 3.0841,
      "step": 26303
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.610277771949768,
      "learning_rate": 0.0005809103574992745,
      "loss": 3.0051,
      "step": 26304
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6040643453598022,
      "learning_rate": 0.0005809089216029003,
      "loss": 3.0005,
      "step": 26305
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.274205207824707,
      "learning_rate": 0.0005809074856543,
      "loss": 3.2325,
      "step": 26306
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4545420408248901,
      "learning_rate": 0.0005809060496534735,
      "loss": 2.8973,
      "step": 26307
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5818092823028564,
      "learning_rate": 0.0005809046136004214,
      "loss": 3.1261,
      "step": 26308
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5162233114242554,
      "learning_rate": 0.0005809031774951437,
      "loss": 3.2171,
      "step": 26309
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.505687713623047,
      "learning_rate": 0.0005809017413376409,
      "loss": 2.9585,
      "step": 26310
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4798551797866821,
      "learning_rate": 0.0005809003051279131,
      "loss": 3.3392,
      "step": 26311
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.302038550376892,
      "learning_rate": 0.0005808988688659607,
      "loss": 3.2274,
      "step": 26312
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5957446098327637,
      "learning_rate": 0.0005808974325517838,
      "loss": 3.2923,
      "step": 26313
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4489171504974365,
      "learning_rate": 0.0005808959961853828,
      "loss": 3.1141,
      "step": 26314
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8007609844207764,
      "learning_rate": 0.000580894559766758,
      "loss": 3.2477,
      "step": 26315
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4857510328292847,
      "learning_rate": 0.0005808931232959095,
      "loss": 3.0025,
      "step": 26316
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.5597689151763916,
      "learning_rate": 0.0005808916867728378,
      "loss": 2.8501,
      "step": 26317
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.582427978515625,
      "learning_rate": 0.0005808902501975428,
      "loss": 3.193,
      "step": 26318
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4608573913574219,
      "learning_rate": 0.0005808888135700251,
      "loss": 3.1757,
      "step": 26319
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4337252378463745,
      "learning_rate": 0.000580887376890285,
      "loss": 3.2405,
      "step": 26320
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6710532903671265,
      "learning_rate": 0.0005808859401583226,
      "loss": 3.0154,
      "step": 26321
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.416824221611023,
      "learning_rate": 0.0005808845033741381,
      "loss": 2.988,
      "step": 26322
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.471505045890808,
      "learning_rate": 0.000580883066537732,
      "loss": 3.2133,
      "step": 26323
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3037515878677368,
      "learning_rate": 0.0005808816296491043,
      "loss": 3.1975,
      "step": 26324
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3838850259780884,
      "learning_rate": 0.0005808801927082555,
      "loss": 3.1497,
      "step": 26325
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.429409146308899,
      "learning_rate": 0.0005808787557151857,
      "loss": 3.0817,
      "step": 26326
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3981499671936035,
      "learning_rate": 0.0005808773186698953,
      "loss": 3.1277,
      "step": 26327
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4373085498809814,
      "learning_rate": 0.0005808758815723844,
      "loss": 3.0017,
      "step": 26328
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.646340012550354,
      "learning_rate": 0.0005808744444226535,
      "loss": 2.9376,
      "step": 26329
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2728158235549927,
      "learning_rate": 0.0005808730072207026,
      "loss": 3.2452,
      "step": 26330
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.366438388824463,
      "learning_rate": 0.0005808715699665323,
      "loss": 2.9921,
      "step": 26331
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6291431188583374,
      "learning_rate": 0.0005808701326601425,
      "loss": 2.9886,
      "step": 26332
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6256775856018066,
      "learning_rate": 0.0005808686953015337,
      "loss": 3.203,
      "step": 26333
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7435047626495361,
      "learning_rate": 0.0005808672578907061,
      "loss": 3.095,
      "step": 26334
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7704181671142578,
      "learning_rate": 0.00058086582042766,
      "loss": 2.9835,
      "step": 26335
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9379431009292603,
      "learning_rate": 0.0005808643829123956,
      "loss": 3.2152,
      "step": 26336
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.0253963470458984,
      "learning_rate": 0.0005808629453449133,
      "loss": 3.1146,
      "step": 26337
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.458925485610962,
      "learning_rate": 0.0005808615077252131,
      "loss": 3.1657,
      "step": 26338
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.710312843322754,
      "learning_rate": 0.0005808600700532956,
      "loss": 3.0675,
      "step": 26339
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8549469709396362,
      "learning_rate": 0.000580858632329161,
      "loss": 2.9124,
      "step": 26340
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.6253771781921387,
      "learning_rate": 0.0005808571945528092,
      "loss": 2.8589,
      "step": 26341
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8627792596817017,
      "learning_rate": 0.0005808557567242409,
      "loss": 2.928,
      "step": 26342
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.5563302040100098,
      "learning_rate": 0.0005808543188434562,
      "loss": 3.1714,
      "step": 26343
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.1928060054779053,
      "learning_rate": 0.0005808528809104553,
      "loss": 2.9946,
      "step": 26344
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.2831904888153076,
      "learning_rate": 0.0005808514429252388,
      "loss": 2.9607,
      "step": 26345
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7646976709365845,
      "learning_rate": 0.0005808500048878066,
      "loss": 3.1433,
      "step": 26346
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.4725308418273926,
      "learning_rate": 0.0005808485667981589,
      "loss": 3.2483,
      "step": 26347
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4895153045654297,
      "learning_rate": 0.0005808471286562963,
      "loss": 3.1602,
      "step": 26348
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.423741102218628,
      "learning_rate": 0.000580845690462219,
      "loss": 3.2507,
      "step": 26349
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.057525396347046,
      "learning_rate": 0.000580844252215927,
      "loss": 3.273,
      "step": 26350
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4953796863555908,
      "learning_rate": 0.0005808428139174209,
      "loss": 3.2286,
      "step": 26351
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3883901834487915,
      "learning_rate": 0.0005808413755667007,
      "loss": 3.3218,
      "step": 26352
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5686622858047485,
      "learning_rate": 0.0005808399371637668,
      "loss": 3.2948,
      "step": 26353
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5582804679870605,
      "learning_rate": 0.0005808384987086195,
      "loss": 3.0559,
      "step": 26354
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9359239339828491,
      "learning_rate": 0.0005808370602012591,
      "loss": 3.1029,
      "step": 26355
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5293651819229126,
      "learning_rate": 0.0005808356216416856,
      "loss": 3.072,
      "step": 26356
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7536545991897583,
      "learning_rate": 0.0005808341830298996,
      "loss": 3.2066,
      "step": 26357
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6720075607299805,
      "learning_rate": 0.0005808327443659012,
      "loss": 3.209,
      "step": 26358
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3375489711761475,
      "learning_rate": 0.0005808313056496906,
      "loss": 3.0562,
      "step": 26359
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.2297816276550293,
      "learning_rate": 0.0005808298668812683,
      "loss": 3.1992,
      "step": 26360
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.412394404411316,
      "learning_rate": 0.0005808284280606344,
      "loss": 3.1432,
      "step": 26361
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7619150876998901,
      "learning_rate": 0.0005808269891877892,
      "loss": 3.037,
      "step": 26362
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5412952899932861,
      "learning_rate": 0.0005808255502627329,
      "loss": 3.132,
      "step": 26363
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.624813199043274,
      "learning_rate": 0.0005808241112854658,
      "loss": 2.9741,
      "step": 26364
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4935415983200073,
      "learning_rate": 0.0005808226722559883,
      "loss": 3.0788,
      "step": 26365
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5564229488372803,
      "learning_rate": 0.0005808212331743004,
      "loss": 3.0138,
      "step": 26366
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5421528816223145,
      "learning_rate": 0.0005808197940404028,
      "loss": 3.157,
      "step": 26367
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6023125648498535,
      "learning_rate": 0.0005808183548542953,
      "loss": 2.9989,
      "step": 26368
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3850586414337158,
      "learning_rate": 0.0005808169156159784,
      "loss": 3.016,
      "step": 26369
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5228322744369507,
      "learning_rate": 0.0005808154763254523,
      "loss": 2.8223,
      "step": 26370
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3576050996780396,
      "learning_rate": 0.0005808140369827174,
      "loss": 3.0892,
      "step": 26371
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.605668544769287,
      "learning_rate": 0.0005808125975877739,
      "loss": 3.1447,
      "step": 26372
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.808235764503479,
      "learning_rate": 0.0005808111581406219,
      "loss": 3.2598,
      "step": 26373
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5358800888061523,
      "learning_rate": 0.0005808097186412618,
      "loss": 2.9769,
      "step": 26374
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6262760162353516,
      "learning_rate": 0.000580808279089694,
      "loss": 3.0955,
      "step": 26375
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5529427528381348,
      "learning_rate": 0.0005808068394859185,
      "loss": 3.0893,
      "step": 26376
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.129175901412964,
      "learning_rate": 0.0005808053998299358,
      "loss": 2.943,
      "step": 26377
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5295804738998413,
      "learning_rate": 0.0005808039601217461,
      "loss": 2.9751,
      "step": 26378
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4746053218841553,
      "learning_rate": 0.0005808025203613496,
      "loss": 3.1685,
      "step": 26379
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4685148000717163,
      "learning_rate": 0.0005808010805487465,
      "loss": 2.9769,
      "step": 26380
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.6285500526428223,
      "learning_rate": 0.0005807996406839373,
      "loss": 2.924,
      "step": 26381
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4704285860061646,
      "learning_rate": 0.0005807982007669222,
      "loss": 2.9542,
      "step": 26382
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5041192770004272,
      "learning_rate": 0.0005807967607977014,
      "loss": 3.1683,
      "step": 26383
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5957374572753906,
      "learning_rate": 0.0005807953207762751,
      "loss": 2.9577,
      "step": 26384
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4115355014801025,
      "learning_rate": 0.0005807938807026436,
      "loss": 3.0106,
      "step": 26385
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3318915367126465,
      "learning_rate": 0.0005807924405768074,
      "loss": 3.0448,
      "step": 26386
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5114450454711914,
      "learning_rate": 0.0005807910003987665,
      "loss": 3.1974,
      "step": 26387
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.484760046005249,
      "learning_rate": 0.0005807895601685211,
      "loss": 3.1066,
      "step": 26388
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.371654987335205,
      "learning_rate": 0.0005807881198860718,
      "loss": 2.9053,
      "step": 26389
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.454430103302002,
      "learning_rate": 0.0005807866795514187,
      "loss": 3.3524,
      "step": 26390
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5437862873077393,
      "learning_rate": 0.000580785239164562,
      "loss": 2.8776,
      "step": 26391
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.498326063156128,
      "learning_rate": 0.0005807837987255019,
      "loss": 3.2195,
      "step": 26392
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.7357656955718994,
      "learning_rate": 0.000580782358234239,
      "loss": 2.8036,
      "step": 26393
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4325838088989258,
      "learning_rate": 0.0005807809176907733,
      "loss": 3.2031,
      "step": 26394
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3993127346038818,
      "learning_rate": 0.0005807794770951051,
      "loss": 2.923,
      "step": 26395
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6970163583755493,
      "learning_rate": 0.0005807780364472347,
      "loss": 3.1697,
      "step": 26396
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8189353942871094,
      "learning_rate": 0.0005807765957471623,
      "loss": 2.9942,
      "step": 26397
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4944608211517334,
      "learning_rate": 0.0005807751549948884,
      "loss": 2.9124,
      "step": 26398
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8721836805343628,
      "learning_rate": 0.000580773714190413,
      "loss": 2.6223,
      "step": 26399
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.2923331260681152,
      "learning_rate": 0.0005807722733337364,
      "loss": 2.8946,
      "step": 26400
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.2376495599746704,
      "learning_rate": 0.000580770832424859,
      "loss": 3.0818,
      "step": 26401
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3811447620391846,
      "learning_rate": 0.000580769391463781,
      "loss": 3.0,
      "step": 26402
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5799528360366821,
      "learning_rate": 0.0005807679504505026,
      "loss": 3.1625,
      "step": 26403
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6583222150802612,
      "learning_rate": 0.0005807665093850243,
      "loss": 3.2062,
      "step": 26404
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4215582609176636,
      "learning_rate": 0.000580765068267346,
      "loss": 3.0857,
      "step": 26405
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9069602489471436,
      "learning_rate": 0.0005807636270974683,
      "loss": 3.1654,
      "step": 26406
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4371670484542847,
      "learning_rate": 0.0005807621858753913,
      "loss": 2.9771,
      "step": 26407
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3925442695617676,
      "learning_rate": 0.0005807607446011154,
      "loss": 3.1447,
      "step": 26408
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8164288997650146,
      "learning_rate": 0.0005807593032746407,
      "loss": 3.1456,
      "step": 26409
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9684723615646362,
      "learning_rate": 0.0005807578618959676,
      "loss": 2.9352,
      "step": 26410
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.68477201461792,
      "learning_rate": 0.0005807564204650962,
      "loss": 3.2285,
      "step": 26411
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.202509641647339,
      "learning_rate": 0.0005807549789820269,
      "loss": 3.3005,
      "step": 26412
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.958413600921631,
      "learning_rate": 0.0005807535374467601,
      "loss": 2.9558,
      "step": 26413
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9591951370239258,
      "learning_rate": 0.0005807520958592958,
      "loss": 3.269,
      "step": 26414
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6088995933532715,
      "learning_rate": 0.0005807506542196344,
      "loss": 3.0588,
      "step": 26415
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4843753576278687,
      "learning_rate": 0.0005807492125277762,
      "loss": 2.8513,
      "step": 26416
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.4820609092712402,
      "learning_rate": 0.0005807477707837214,
      "loss": 3.1952,
      "step": 26417
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3594249486923218,
      "learning_rate": 0.0005807463289874702,
      "loss": 3.1047,
      "step": 26418
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.276090145111084,
      "learning_rate": 0.0005807448871390231,
      "loss": 2.8966,
      "step": 26419
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.0061488151550293,
      "learning_rate": 0.0005807434452383801,
      "loss": 2.8927,
      "step": 26420
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.042499303817749,
      "learning_rate": 0.0005807420032855416,
      "loss": 3.241,
      "step": 26421
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5175552368164062,
      "learning_rate": 0.0005807405612805079,
      "loss": 3.105,
      "step": 26422
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.8719091415405273,
      "learning_rate": 0.0005807391192232793,
      "loss": 3.05,
      "step": 26423
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.903489828109741,
      "learning_rate": 0.000580737677113856,
      "loss": 2.9211,
      "step": 26424
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.428865432739258,
      "learning_rate": 0.0005807362349522381,
      "loss": 2.9455,
      "step": 26425
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8190125226974487,
      "learning_rate": 0.0005807347927384262,
      "loss": 3.1585,
      "step": 26426
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.1232495307922363,
      "learning_rate": 0.0005807333504724203,
      "loss": 3.0187,
      "step": 26427
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.331049680709839,
      "learning_rate": 0.0005807319081542208,
      "loss": 2.9009,
      "step": 26428
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5539852380752563,
      "learning_rate": 0.000580730465783828,
      "loss": 3.2685,
      "step": 26429
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5960462093353271,
      "learning_rate": 0.000580729023361242,
      "loss": 2.9974,
      "step": 26430
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9309443235397339,
      "learning_rate": 0.0005807275808864633,
      "loss": 2.9186,
      "step": 26431
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4912192821502686,
      "learning_rate": 0.000580726138359492,
      "loss": 3.1737,
      "step": 26432
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.271828055381775,
      "learning_rate": 0.0005807246957803283,
      "loss": 3.0475,
      "step": 26433
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.0776853561401367,
      "learning_rate": 0.0005807232531489727,
      "loss": 2.9911,
      "step": 26434
    },
    {
      "epoch": 0.34,
      "grad_norm": 3.5453014373779297,
      "learning_rate": 0.0005807218104654254,
      "loss": 2.998,
      "step": 26435
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3854833841323853,
      "learning_rate": 0.0005807203677296865,
      "loss": 2.7003,
      "step": 26436
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8345848321914673,
      "learning_rate": 0.0005807189249417565,
      "loss": 3.0767,
      "step": 26437
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.0831663608551025,
      "learning_rate": 0.0005807174821016353,
      "loss": 3.2545,
      "step": 26438
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1133382320404053,
      "learning_rate": 0.0005807160392093237,
      "loss": 3.1523,
      "step": 26439
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.278441071510315,
      "learning_rate": 0.0005807145962648216,
      "loss": 3.0729,
      "step": 26440
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6134788990020752,
      "learning_rate": 0.0005807131532681293,
      "loss": 3.2543,
      "step": 26441
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.417194128036499,
      "learning_rate": 0.0005807117102192472,
      "loss": 2.8586,
      "step": 26442
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.74711012840271,
      "learning_rate": 0.0005807102671181755,
      "loss": 3.0166,
      "step": 26443
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3857005834579468,
      "learning_rate": 0.0005807088239649144,
      "loss": 3.1623,
      "step": 26444
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5451672077178955,
      "learning_rate": 0.0005807073807594642,
      "loss": 3.0228,
      "step": 26445
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3013758659362793,
      "learning_rate": 0.0005807059375018252,
      "loss": 3.1094,
      "step": 26446
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5537687540054321,
      "learning_rate": 0.0005807044941919978,
      "loss": 3.0573,
      "step": 26447
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.6568548679351807,
      "learning_rate": 0.0005807030508299821,
      "loss": 2.929,
      "step": 26448
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.18727970123291,
      "learning_rate": 0.0005807016074157782,
      "loss": 3.1724,
      "step": 26449
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7052905559539795,
      "learning_rate": 0.0005807001639493868,
      "loss": 3.4433,
      "step": 26450
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.469515085220337,
      "learning_rate": 0.0005806987204308078,
      "loss": 2.8067,
      "step": 26451
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7795158624649048,
      "learning_rate": 0.0005806972768600418,
      "loss": 3.1981,
      "step": 26452
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6396616697311401,
      "learning_rate": 0.0005806958332370888,
      "loss": 3.0889,
      "step": 26453
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9549239873886108,
      "learning_rate": 0.0005806943895619491,
      "loss": 3.0308,
      "step": 26454
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9494513273239136,
      "learning_rate": 0.000580692945834623,
      "loss": 3.123,
      "step": 26455
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.68082594871521,
      "learning_rate": 0.0005806915020551109,
      "loss": 3.0178,
      "step": 26456
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5282299518585205,
      "learning_rate": 0.0005806900582234128,
      "loss": 3.1938,
      "step": 26457
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.92328679561615,
      "learning_rate": 0.0005806886143395291,
      "loss": 2.8213,
      "step": 26458
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.629426121711731,
      "learning_rate": 0.0005806871704034602,
      "loss": 3.2009,
      "step": 26459
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6077507734298706,
      "learning_rate": 0.0005806857264152063,
      "loss": 3.1599,
      "step": 26460
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.014779567718506,
      "learning_rate": 0.0005806842823747675,
      "loss": 2.9826,
      "step": 26461
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5993149280548096,
      "learning_rate": 0.0005806828382821442,
      "loss": 3.2584,
      "step": 26462
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1155896186828613,
      "learning_rate": 0.0005806813941373367,
      "loss": 2.9081,
      "step": 26463
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.759158968925476,
      "learning_rate": 0.0005806799499403452,
      "loss": 2.9069,
      "step": 26464
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7040235996246338,
      "learning_rate": 0.0005806785056911701,
      "loss": 3.2325,
      "step": 26465
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.967238426208496,
      "learning_rate": 0.0005806770613898115,
      "loss": 3.0451,
      "step": 26466
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.767628788948059,
      "learning_rate": 0.0005806756170362697,
      "loss": 3.1517,
      "step": 26467
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8732870817184448,
      "learning_rate": 0.000580674172630545,
      "loss": 3.1505,
      "step": 26468
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.313198447227478,
      "learning_rate": 0.0005806727281726377,
      "loss": 3.175,
      "step": 26469
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.735670566558838,
      "learning_rate": 0.0005806712836625481,
      "loss": 3.1191,
      "step": 26470
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.229027032852173,
      "learning_rate": 0.0005806698391002764,
      "loss": 2.8564,
      "step": 26471
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.843262791633606,
      "learning_rate": 0.0005806683944858228,
      "loss": 3.011,
      "step": 26472
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6285059452056885,
      "learning_rate": 0.0005806669498191877,
      "loss": 2.7932,
      "step": 26473
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4917784929275513,
      "learning_rate": 0.0005806655051003713,
      "loss": 2.9379,
      "step": 26474
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.489155888557434,
      "learning_rate": 0.0005806640603293738,
      "loss": 3.0411,
      "step": 26475
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.562627911567688,
      "learning_rate": 0.0005806626155061957,
      "loss": 2.9568,
      "step": 26476
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8283432722091675,
      "learning_rate": 0.0005806611706308372,
      "loss": 3.0712,
      "step": 26477
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6177016496658325,
      "learning_rate": 0.0005806597257032983,
      "loss": 2.8823,
      "step": 26478
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9346903562545776,
      "learning_rate": 0.0005806582807235795,
      "loss": 3.4516,
      "step": 26479
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.4541058540344238,
      "learning_rate": 0.000580656835691681,
      "loss": 3.1543,
      "step": 26480
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.8537698984146118,
      "learning_rate": 0.0005806553906076031,
      "loss": 3.0716,
      "step": 26481
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.74407160282135,
      "learning_rate": 0.0005806539454713462,
      "loss": 3.1451,
      "step": 26482
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.439159870147705,
      "learning_rate": 0.0005806525002829103,
      "loss": 2.9894,
      "step": 26483
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3952480554580688,
      "learning_rate": 0.0005806510550422959,
      "loss": 2.9466,
      "step": 26484
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3601243495941162,
      "learning_rate": 0.0005806496097495031,
      "loss": 3.0446,
      "step": 26485
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.580336332321167,
      "learning_rate": 0.0005806481644045322,
      "loss": 3.2043,
      "step": 26486
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.1685361862182617,
      "learning_rate": 0.0005806467190073837,
      "loss": 3.2545,
      "step": 26487
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6656662225723267,
      "learning_rate": 0.0005806452735580575,
      "loss": 2.9362,
      "step": 26488
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.435943841934204,
      "learning_rate": 0.000580643828056554,
      "loss": 3.0951,
      "step": 26489
    },
    {
      "epoch": 0.34,
      "grad_norm": 2.3535680770874023,
      "learning_rate": 0.0005806423825028736,
      "loss": 2.9977,
      "step": 26490
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5953097343444824,
      "learning_rate": 0.0005806409368970166,
      "loss": 3.0192,
      "step": 26491
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.5197619199752808,
      "learning_rate": 0.0005806394912389828,
      "loss": 3.1963,
      "step": 26492
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.7337543964385986,
      "learning_rate": 0.0005806380455287732,
      "loss": 3.0371,
      "step": 26493
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.3584139347076416,
      "learning_rate": 0.0005806365997663875,
      "loss": 2.9275,
      "step": 26494
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.6902357339859009,
      "learning_rate": 0.0005806351539518262,
      "loss": 3.1072,
      "step": 26495
    },
    {
      "epoch": 0.34,
      "grad_norm": 1.9204752445220947,
      "learning_rate": 0.0005806337080850896,
      "loss": 2.9887,
      "step": 26496
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5508307218551636,
      "learning_rate": 0.0005806322621661779,
      "loss": 3.0647,
      "step": 26497
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3444350957870483,
      "learning_rate": 0.0005806308161950913,
      "loss": 3.2163,
      "step": 26498
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2645317316055298,
      "learning_rate": 0.00058062937017183,
      "loss": 2.977,
      "step": 26499
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4399683475494385,
      "learning_rate": 0.0005806279240963946,
      "loss": 3.2581,
      "step": 26500
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6051263809204102,
      "learning_rate": 0.0005806264779687851,
      "loss": 3.1272,
      "step": 26501
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7823723554611206,
      "learning_rate": 0.0005806250317890019,
      "loss": 3.0402,
      "step": 26502
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5815969705581665,
      "learning_rate": 0.0005806235855570451,
      "loss": 3.0887,
      "step": 26503
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5455632209777832,
      "learning_rate": 0.0005806221392729151,
      "loss": 3.3586,
      "step": 26504
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4378368854522705,
      "learning_rate": 0.0005806206929366122,
      "loss": 3.3409,
      "step": 26505
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6285258531570435,
      "learning_rate": 0.0005806192465481366,
      "loss": 2.9384,
      "step": 26506
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2922272682189941,
      "learning_rate": 0.0005806178001074886,
      "loss": 3.1336,
      "step": 26507
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7460317611694336,
      "learning_rate": 0.0005806163536146684,
      "loss": 3.0625,
      "step": 26508
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.273118495941162,
      "learning_rate": 0.0005806149070696763,
      "loss": 3.0046,
      "step": 26509
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5644019842147827,
      "learning_rate": 0.0005806134604725127,
      "loss": 2.9884,
      "step": 26510
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8121057748794556,
      "learning_rate": 0.0005806120138231777,
      "loss": 3.1298,
      "step": 26511
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5656652450561523,
      "learning_rate": 0.0005806105671216716,
      "loss": 3.0599,
      "step": 26512
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3798620700836182,
      "learning_rate": 0.0005806091203679948,
      "loss": 3.0468,
      "step": 26513
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4893989562988281,
      "learning_rate": 0.0005806076735621474,
      "loss": 3.0765,
      "step": 26514
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.3270375728607178,
      "learning_rate": 0.0005806062267041298,
      "loss": 3.135,
      "step": 26515
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.450877070426941,
      "learning_rate": 0.0005806047797939421,
      "loss": 3.078,
      "step": 26516
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.07293438911438,
      "learning_rate": 0.0005806033328315846,
      "loss": 2.9614,
      "step": 26517
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3223474025726318,
      "learning_rate": 0.0005806018858170579,
      "loss": 3.0398,
      "step": 26518
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7857089042663574,
      "learning_rate": 0.0005806004387503619,
      "loss": 3.2254,
      "step": 26519
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.508507251739502,
      "learning_rate": 0.0005805989916314968,
      "loss": 2.751,
      "step": 26520
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.486872673034668,
      "learning_rate": 0.0005805975444604633,
      "loss": 2.9692,
      "step": 26521
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7232962846755981,
      "learning_rate": 0.0005805960972372614,
      "loss": 3.1783,
      "step": 26522
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4526644945144653,
      "learning_rate": 0.0005805946499618913,
      "loss": 3.1662,
      "step": 26523
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1647312641143799,
      "learning_rate": 0.0005805932026343533,
      "loss": 2.9391,
      "step": 26524
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2962994575500488,
      "learning_rate": 0.0005805917552546478,
      "loss": 2.9766,
      "step": 26525
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9445081949234009,
      "learning_rate": 0.000580590307822775,
      "loss": 3.2221,
      "step": 26526
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6635401248931885,
      "learning_rate": 0.0005805888603387352,
      "loss": 2.9535,
      "step": 26527
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8009790182113647,
      "learning_rate": 0.0005805874128025285,
      "loss": 3.202,
      "step": 26528
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3388134241104126,
      "learning_rate": 0.0005805859652141556,
      "loss": 3.2314,
      "step": 26529
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5632429122924805,
      "learning_rate": 0.0005805845175736162,
      "loss": 3.2496,
      "step": 26530
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9416954517364502,
      "learning_rate": 0.0005805830698809109,
      "loss": 3.0924,
      "step": 26531
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5063602924346924,
      "learning_rate": 0.0005805816221360399,
      "loss": 3.0567,
      "step": 26532
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5174450874328613,
      "learning_rate": 0.0005805801743390035,
      "loss": 3.2136,
      "step": 26533
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.653700828552246,
      "learning_rate": 0.000580578726489802,
      "loss": 2.8475,
      "step": 26534
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8722339868545532,
      "learning_rate": 0.0005805772785884356,
      "loss": 2.9392,
      "step": 26535
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3450533151626587,
      "learning_rate": 0.0005805758306349046,
      "loss": 3.1425,
      "step": 26536
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.550635814666748,
      "learning_rate": 0.0005805743826292092,
      "loss": 3.0324,
      "step": 26537
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5929185152053833,
      "learning_rate": 0.0005805729345713497,
      "loss": 3.1506,
      "step": 26538
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6881139278411865,
      "learning_rate": 0.0005805714864613264,
      "loss": 3.2605,
      "step": 26539
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6637523174285889,
      "learning_rate": 0.0005805700382991397,
      "loss": 2.9461,
      "step": 26540
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8168439865112305,
      "learning_rate": 0.0005805685900847896,
      "loss": 3.334,
      "step": 26541
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3848679065704346,
      "learning_rate": 0.0005805671418182766,
      "loss": 2.9664,
      "step": 26542
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8542983531951904,
      "learning_rate": 0.0005805656934996007,
      "loss": 3.2463,
      "step": 26543
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.2554543018341064,
      "learning_rate": 0.0005805642451287624,
      "loss": 2.8964,
      "step": 26544
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4471348524093628,
      "learning_rate": 0.0005805627967057621,
      "loss": 3.0201,
      "step": 26545
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5473023653030396,
      "learning_rate": 0.0005805613482305997,
      "loss": 3.0505,
      "step": 26546
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6786433458328247,
      "learning_rate": 0.0005805598997032757,
      "loss": 3.2454,
      "step": 26547
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5681229829788208,
      "learning_rate": 0.0005805584511237902,
      "loss": 3.1109,
      "step": 26548
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.378624677658081,
      "learning_rate": 0.0005805570024921439,
      "loss": 3.0936,
      "step": 26549
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4161133766174316,
      "learning_rate": 0.0005805555538083365,
      "loss": 3.1286,
      "step": 26550
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.49948251247406,
      "learning_rate": 0.0005805541050723686,
      "loss": 2.8377,
      "step": 26551
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7448821067810059,
      "learning_rate": 0.0005805526562842403,
      "loss": 3.1627,
      "step": 26552
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6420109272003174,
      "learning_rate": 0.000580551207443952,
      "loss": 3.0547,
      "step": 26553
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4244722127914429,
      "learning_rate": 0.0005805497585515041,
      "loss": 2.9576,
      "step": 26554
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7326582670211792,
      "learning_rate": 0.0005805483096068965,
      "loss": 3.0091,
      "step": 26555
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.650111198425293,
      "learning_rate": 0.0005805468606101299,
      "loss": 2.8169,
      "step": 26556
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8327581882476807,
      "learning_rate": 0.0005805454115612042,
      "loss": 2.9731,
      "step": 26557
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4588981866836548,
      "learning_rate": 0.0005805439624601197,
      "loss": 3.1157,
      "step": 26558
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6493513584136963,
      "learning_rate": 0.000580542513306877,
      "loss": 3.3045,
      "step": 26559
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.661760926246643,
      "learning_rate": 0.0005805410641014759,
      "loss": 3.2249,
      "step": 26560
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4872275590896606,
      "learning_rate": 0.0005805396148439172,
      "loss": 3.3124,
      "step": 26561
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4071966409683228,
      "learning_rate": 0.0005805381655342007,
      "loss": 3.001,
      "step": 26562
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4923924207687378,
      "learning_rate": 0.0005805367161723269,
      "loss": 3.0318,
      "step": 26563
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3778477907180786,
      "learning_rate": 0.0005805352667582961,
      "loss": 2.8778,
      "step": 26564
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8359626531600952,
      "learning_rate": 0.0005805338172921084,
      "loss": 3.0497,
      "step": 26565
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4378575086593628,
      "learning_rate": 0.0005805323677737643,
      "loss": 2.9791,
      "step": 26566
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7166703939437866,
      "learning_rate": 0.0005805309182032639,
      "loss": 3.3327,
      "step": 26567
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.147334098815918,
      "learning_rate": 0.0005805294685806074,
      "loss": 2.8958,
      "step": 26568
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7716312408447266,
      "learning_rate": 0.0005805280189057952,
      "loss": 2.939,
      "step": 26569
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5508755445480347,
      "learning_rate": 0.0005805265691788275,
      "loss": 3.2849,
      "step": 26570
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.3542959690093994,
      "learning_rate": 0.0005805251193997048,
      "loss": 2.775,
      "step": 26571
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.956902027130127,
      "learning_rate": 0.0005805236695684271,
      "loss": 3.0229,
      "step": 26572
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.861964464187622,
      "learning_rate": 0.0005805222196849947,
      "loss": 2.9273,
      "step": 26573
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9823834896087646,
      "learning_rate": 0.0005805207697494079,
      "loss": 3.1708,
      "step": 26574
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5964370965957642,
      "learning_rate": 0.0005805193197616671,
      "loss": 3.1365,
      "step": 26575
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4206488132476807,
      "learning_rate": 0.0005805178697217724,
      "loss": 3.228,
      "step": 26576
    },
    {
      "epoch": 0.35,
      "grad_norm": 3.6645617485046387,
      "learning_rate": 0.0005805164196297242,
      "loss": 2.9453,
      "step": 26577
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.6556050777435303,
      "learning_rate": 0.0005805149694855225,
      "loss": 3.1161,
      "step": 26578
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.275148630142212,
      "learning_rate": 0.000580513519289168,
      "loss": 2.9852,
      "step": 26579
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.687260866165161,
      "learning_rate": 0.0005805120690406607,
      "loss": 3.065,
      "step": 26580
    },
    {
      "epoch": 0.35,
      "grad_norm": 3.4068706035614014,
      "learning_rate": 0.0005805106187400009,
      "loss": 3.0378,
      "step": 26581
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.40136981010437,
      "learning_rate": 0.0005805091683871888,
      "loss": 3.0245,
      "step": 26582
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5784963369369507,
      "learning_rate": 0.0005805077179822249,
      "loss": 2.8025,
      "step": 26583
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6424001455307007,
      "learning_rate": 0.0005805062675251091,
      "loss": 3.1772,
      "step": 26584
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0417397022247314,
      "learning_rate": 0.0005805048170158422,
      "loss": 3.1793,
      "step": 26585
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1926425695419312,
      "learning_rate": 0.0005805033664544239,
      "loss": 3.1765,
      "step": 26586
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5418075323104858,
      "learning_rate": 0.0005805019158408548,
      "loss": 3.141,
      "step": 26587
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6593767404556274,
      "learning_rate": 0.000580500465175135,
      "loss": 2.7506,
      "step": 26588
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7975159883499146,
      "learning_rate": 0.0005804990144572651,
      "loss": 3.1293,
      "step": 26589
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.711079716682434,
      "learning_rate": 0.0005804975636872449,
      "loss": 2.7662,
      "step": 26590
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9935518503189087,
      "learning_rate": 0.000580496112865075,
      "loss": 3.0343,
      "step": 26591
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7162281274795532,
      "learning_rate": 0.0005804946619907557,
      "loss": 3.0791,
      "step": 26592
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2610167264938354,
      "learning_rate": 0.000580493211064287,
      "loss": 3.2366,
      "step": 26593
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6695729494094849,
      "learning_rate": 0.0005804917600856694,
      "loss": 3.1243,
      "step": 26594
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6316843032836914,
      "learning_rate": 0.000580490309054903,
      "loss": 3.1604,
      "step": 26595
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8189862966537476,
      "learning_rate": 0.0005804888579719883,
      "loss": 2.9335,
      "step": 26596
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5133402347564697,
      "learning_rate": 0.0005804874068369253,
      "loss": 2.945,
      "step": 26597
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.798190712928772,
      "learning_rate": 0.0005804859556497145,
      "loss": 3.1228,
      "step": 26598
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6697181463241577,
      "learning_rate": 0.000580484504410356,
      "loss": 2.9842,
      "step": 26599
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5150456428527832,
      "learning_rate": 0.0005804830531188502,
      "loss": 3.0309,
      "step": 26600
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.057340145111084,
      "learning_rate": 0.0005804816017751972,
      "loss": 2.9717,
      "step": 26601
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4031116962432861,
      "learning_rate": 0.0005804801503793973,
      "loss": 3.172,
      "step": 26602
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.1274681091308594,
      "learning_rate": 0.000580478698931451,
      "loss": 3.0263,
      "step": 26603
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4033699035644531,
      "learning_rate": 0.0005804772474313583,
      "loss": 3.1204,
      "step": 26604
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4668326377868652,
      "learning_rate": 0.0005804757958791198,
      "loss": 2.9511,
      "step": 26605
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6980745792388916,
      "learning_rate": 0.0005804743442747353,
      "loss": 2.9592,
      "step": 26606
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.831394910812378,
      "learning_rate": 0.0005804728926182054,
      "loss": 3.1352,
      "step": 26607
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6985752582550049,
      "learning_rate": 0.0005804714409095304,
      "loss": 3.2083,
      "step": 26608
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4233627319335938,
      "learning_rate": 0.0005804699891487104,
      "loss": 2.8954,
      "step": 26609
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2616044282913208,
      "learning_rate": 0.0005804685373357458,
      "loss": 2.8621,
      "step": 26610
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.1376266479492188,
      "learning_rate": 0.0005804670854706367,
      "loss": 2.9451,
      "step": 26611
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4734411239624023,
      "learning_rate": 0.0005804656335533835,
      "loss": 3.0516,
      "step": 26612
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.418011426925659,
      "learning_rate": 0.0005804641815839865,
      "loss": 3.0935,
      "step": 26613
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.2005724906921387,
      "learning_rate": 0.0005804627295624458,
      "loss": 3.3286,
      "step": 26614
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5844106674194336,
      "learning_rate": 0.0005804612774887619,
      "loss": 3.0558,
      "step": 26615
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7622708082199097,
      "learning_rate": 0.0005804598253629349,
      "loss": 3.2246,
      "step": 26616
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0104269981384277,
      "learning_rate": 0.0005804583731849652,
      "loss": 3.1131,
      "step": 26617
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4100261926651,
      "learning_rate": 0.0005804569209548529,
      "loss": 3.1581,
      "step": 26618
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2304139137268066,
      "learning_rate": 0.0005804554686725984,
      "loss": 2.9569,
      "step": 26619
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3417656421661377,
      "learning_rate": 0.000580454016338202,
      "loss": 3.0627,
      "step": 26620
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9635136127471924,
      "learning_rate": 0.0005804525639516638,
      "loss": 3.1515,
      "step": 26621
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2949467897415161,
      "learning_rate": 0.0005804511115129841,
      "loss": 3.0471,
      "step": 26622
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6021077632904053,
      "learning_rate": 0.0005804496590221634,
      "loss": 3.0375,
      "step": 26623
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0312881469726562,
      "learning_rate": 0.0005804482064792018,
      "loss": 3.2133,
      "step": 26624
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0895256996154785,
      "learning_rate": 0.0005804467538840995,
      "loss": 3.0653,
      "step": 26625
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.620557188987732,
      "learning_rate": 0.0005804453012368569,
      "loss": 3.3616,
      "step": 26626
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4727190732955933,
      "learning_rate": 0.0005804438485374743,
      "loss": 2.9563,
      "step": 26627
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5148652791976929,
      "learning_rate": 0.0005804423957859518,
      "loss": 3.1249,
      "step": 26628
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2794770002365112,
      "learning_rate": 0.0005804409429822898,
      "loss": 3.1642,
      "step": 26629
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.355912685394287,
      "learning_rate": 0.0005804394901264885,
      "loss": 3.0403,
      "step": 26630
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6216553449630737,
      "learning_rate": 0.0005804380372185482,
      "loss": 2.9525,
      "step": 26631
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3728160858154297,
      "learning_rate": 0.0005804365842584693,
      "loss": 3.1217,
      "step": 26632
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5959891080856323,
      "learning_rate": 0.0005804351312462518,
      "loss": 2.9555,
      "step": 26633
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2970590591430664,
      "learning_rate": 0.0005804336781818962,
      "loss": 3.2035,
      "step": 26634
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.667230248451233,
      "learning_rate": 0.0005804322250654026,
      "loss": 2.9186,
      "step": 26635
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.540774345397949,
      "learning_rate": 0.0005804307718967715,
      "loss": 2.9273,
      "step": 26636
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4590096473693848,
      "learning_rate": 0.0005804293186760029,
      "loss": 3.0403,
      "step": 26637
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5621185302734375,
      "learning_rate": 0.0005804278654030972,
      "loss": 3.1264,
      "step": 26638
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3721224069595337,
      "learning_rate": 0.0005804264120780547,
      "loss": 3.5213,
      "step": 26639
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4793003797531128,
      "learning_rate": 0.0005804249587008756,
      "loss": 3.1724,
      "step": 26640
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5553812980651855,
      "learning_rate": 0.0005804235052715603,
      "loss": 2.9568,
      "step": 26641
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.53923761844635,
      "learning_rate": 0.0005804220517901088,
      "loss": 2.9238,
      "step": 26642
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3787604570388794,
      "learning_rate": 0.0005804205982565218,
      "loss": 2.8647,
      "step": 26643
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8369687795639038,
      "learning_rate": 0.0005804191446707992,
      "loss": 3.2161,
      "step": 26644
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9846041202545166,
      "learning_rate": 0.0005804176910329413,
      "loss": 3.0585,
      "step": 26645
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4921033382415771,
      "learning_rate": 0.0005804162373429486,
      "loss": 3.0452,
      "step": 26646
    },
    {
      "epoch": 0.35,
      "grad_norm": 3.3524277210235596,
      "learning_rate": 0.0005804147836008211,
      "loss": 2.8897,
      "step": 26647
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0960166454315186,
      "learning_rate": 0.0005804133298065592,
      "loss": 3.0917,
      "step": 26648
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.655640721321106,
      "learning_rate": 0.0005804118759601632,
      "loss": 3.1422,
      "step": 26649
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8068060874938965,
      "learning_rate": 0.0005804104220616334,
      "loss": 3.2121,
      "step": 26650
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4790594577789307,
      "learning_rate": 0.00058040896811097,
      "loss": 3.0648,
      "step": 26651
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4735767841339111,
      "learning_rate": 0.0005804075141081733,
      "loss": 3.2748,
      "step": 26652
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7856303453445435,
      "learning_rate": 0.0005804060600532434,
      "loss": 2.919,
      "step": 26653
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6545430421829224,
      "learning_rate": 0.0005804046059461809,
      "loss": 3.2111,
      "step": 26654
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.228889226913452,
      "learning_rate": 0.0005804031517869858,
      "loss": 2.8786,
      "step": 26655
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5457829236984253,
      "learning_rate": 0.0005804016975756585,
      "loss": 3.1134,
      "step": 26656
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8894728422164917,
      "learning_rate": 0.0005804002433121992,
      "loss": 3.0002,
      "step": 26657
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5084776878356934,
      "learning_rate": 0.0005803987889966082,
      "loss": 3.206,
      "step": 26658
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0004355907440186,
      "learning_rate": 0.0005803973346288858,
      "loss": 3.2333,
      "step": 26659
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7095955610275269,
      "learning_rate": 0.0005803958802090322,
      "loss": 3.1545,
      "step": 26660
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.006392240524292,
      "learning_rate": 0.0005803944257370476,
      "loss": 2.9575,
      "step": 26661
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3550537824630737,
      "learning_rate": 0.0005803929712129327,
      "loss": 3.1821,
      "step": 26662
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.748202919960022,
      "learning_rate": 0.0005803915166366872,
      "loss": 3.032,
      "step": 26663
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9386487007141113,
      "learning_rate": 0.0005803900620083118,
      "loss": 3.0014,
      "step": 26664
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5071220397949219,
      "learning_rate": 0.0005803886073278064,
      "loss": 2.9861,
      "step": 26665
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4115066528320312,
      "learning_rate": 0.0005803871525951715,
      "loss": 3.0496,
      "step": 26666
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8853882551193237,
      "learning_rate": 0.0005803856978104074,
      "loss": 3.0821,
      "step": 26667
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5187382698059082,
      "learning_rate": 0.0005803842429735143,
      "loss": 3.0435,
      "step": 26668
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9778493642807007,
      "learning_rate": 0.0005803827880844926,
      "loss": 3.0703,
      "step": 26669
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5181052684783936,
      "learning_rate": 0.0005803813331433423,
      "loss": 3.1396,
      "step": 26670
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4665647745132446,
      "learning_rate": 0.0005803798781500638,
      "loss": 2.9871,
      "step": 26671
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.5985264778137207,
      "learning_rate": 0.0005803784231046574,
      "loss": 3.0697,
      "step": 26672
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6508135795593262,
      "learning_rate": 0.0005803769680071234,
      "loss": 3.2408,
      "step": 26673
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3769943714141846,
      "learning_rate": 0.0005803755128574621,
      "loss": 3.2585,
      "step": 26674
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0026443004608154,
      "learning_rate": 0.0005803740576556735,
      "loss": 3.195,
      "step": 26675
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.1277668476104736,
      "learning_rate": 0.0005803726024017581,
      "loss": 3.2934,
      "step": 26676
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4034485816955566,
      "learning_rate": 0.0005803711470957163,
      "loss": 3.1963,
      "step": 26677
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.221933126449585,
      "learning_rate": 0.0005803696917375482,
      "loss": 2.9666,
      "step": 26678
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5029383897781372,
      "learning_rate": 0.000580368236327254,
      "loss": 2.9173,
      "step": 26679
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.243553638458252,
      "learning_rate": 0.0005803667808648341,
      "loss": 3.2452,
      "step": 26680
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3586595058441162,
      "learning_rate": 0.0005803653253502887,
      "loss": 3.3076,
      "step": 26681
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.970317840576172,
      "learning_rate": 0.000580363869783618,
      "loss": 3.2121,
      "step": 26682
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5164265632629395,
      "learning_rate": 0.0005803624141648225,
      "loss": 3.1588,
      "step": 26683
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.039078950881958,
      "learning_rate": 0.0005803609584939023,
      "loss": 3.2177,
      "step": 26684
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.579876184463501,
      "learning_rate": 0.0005803595027708577,
      "loss": 3.2513,
      "step": 26685
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3824433088302612,
      "learning_rate": 0.000580358046995689,
      "loss": 3.0767,
      "step": 26686
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5631572008132935,
      "learning_rate": 0.0005803565911683964,
      "loss": 3.0963,
      "step": 26687
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4835500717163086,
      "learning_rate": 0.0005803551352889803,
      "loss": 3.3008,
      "step": 26688
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6434471607208252,
      "learning_rate": 0.0005803536793574408,
      "loss": 3.1437,
      "step": 26689
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4970972537994385,
      "learning_rate": 0.0005803522233737781,
      "loss": 3.4061,
      "step": 26690
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.551731586456299,
      "learning_rate": 0.0005803507673379929,
      "loss": 2.9267,
      "step": 26691
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.366493821144104,
      "learning_rate": 0.0005803493112500851,
      "loss": 3.1933,
      "step": 26692
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2309067249298096,
      "learning_rate": 0.0005803478551100551,
      "loss": 3.2184,
      "step": 26693
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.292348027229309,
      "learning_rate": 0.0005803463989179031,
      "loss": 3.1968,
      "step": 26694
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6604121923446655,
      "learning_rate": 0.0005803449426736295,
      "loss": 3.1723,
      "step": 26695
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.215512752532959,
      "learning_rate": 0.0005803434863772344,
      "loss": 3.0018,
      "step": 26696
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5129238367080688,
      "learning_rate": 0.0005803420300287182,
      "loss": 3.1523,
      "step": 26697
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3819866180419922,
      "learning_rate": 0.0005803405736280811,
      "loss": 2.9912,
      "step": 26698
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0577471256256104,
      "learning_rate": 0.0005803391171753234,
      "loss": 3.2125,
      "step": 26699
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4545141458511353,
      "learning_rate": 0.0005803376606704453,
      "loss": 3.3535,
      "step": 26700
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.498155117034912,
      "learning_rate": 0.0005803362041134473,
      "loss": 2.9993,
      "step": 26701
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8237451314926147,
      "learning_rate": 0.0005803347475043293,
      "loss": 3.014,
      "step": 26702
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.1902801990509033,
      "learning_rate": 0.000580333290843092,
      "loss": 3.1313,
      "step": 26703
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3678184747695923,
      "learning_rate": 0.0005803318341297352,
      "loss": 3.085,
      "step": 26704
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.377596139907837,
      "learning_rate": 0.0005803303773642596,
      "loss": 2.92,
      "step": 26705
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.6362977027893066,
      "learning_rate": 0.0005803289205466652,
      "loss": 3.1621,
      "step": 26706
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.850458025932312,
      "learning_rate": 0.0005803274636769525,
      "loss": 3.1262,
      "step": 26707
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4517550468444824,
      "learning_rate": 0.0005803260067551215,
      "loss": 3.1163,
      "step": 26708
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5562039613723755,
      "learning_rate": 0.0005803245497811726,
      "loss": 3.2167,
      "step": 26709
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.262168288230896,
      "learning_rate": 0.0005803230927551061,
      "loss": 3.031,
      "step": 26710
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.5979573726654053,
      "learning_rate": 0.0005803216356769223,
      "loss": 3.2183,
      "step": 26711
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3842496871948242,
      "learning_rate": 0.0005803201785466214,
      "loss": 3.2731,
      "step": 26712
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7980787754058838,
      "learning_rate": 0.0005803187213642036,
      "loss": 3.0937,
      "step": 26713
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.926318883895874,
      "learning_rate": 0.0005803172641296694,
      "loss": 3.2223,
      "step": 26714
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6466302871704102,
      "learning_rate": 0.0005803158068430189,
      "loss": 3.1981,
      "step": 26715
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6924102306365967,
      "learning_rate": 0.0005803143495042523,
      "loss": 2.8654,
      "step": 26716
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.044395685195923,
      "learning_rate": 0.0005803128921133701,
      "loss": 3.148,
      "step": 26717
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4601880311965942,
      "learning_rate": 0.0005803114346703723,
      "loss": 3.1973,
      "step": 26718
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4523080587387085,
      "learning_rate": 0.0005803099771752594,
      "loss": 3.1408,
      "step": 26719
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7076183557510376,
      "learning_rate": 0.0005803085196280315,
      "loss": 2.9015,
      "step": 26720
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.748236060142517,
      "learning_rate": 0.000580307062028689,
      "loss": 2.9609,
      "step": 26721
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8933196067810059,
      "learning_rate": 0.0005803056043772322,
      "loss": 3.2607,
      "step": 26722
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2881321907043457,
      "learning_rate": 0.0005803041466736612,
      "loss": 3.1284,
      "step": 26723
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.978600263595581,
      "learning_rate": 0.0005803026889179765,
      "loss": 2.9035,
      "step": 26724
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3845593929290771,
      "learning_rate": 0.0005803012311101782,
      "loss": 3.256,
      "step": 26725
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6164450645446777,
      "learning_rate": 0.0005802997732502665,
      "loss": 3.26,
      "step": 26726
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.877685308456421,
      "learning_rate": 0.0005802983153382419,
      "loss": 3.3516,
      "step": 26727
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.300645112991333,
      "learning_rate": 0.0005802968573741044,
      "loss": 2.9118,
      "step": 26728
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7735167741775513,
      "learning_rate": 0.0005802953993578545,
      "loss": 3.3628,
      "step": 26729
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3086689710617065,
      "learning_rate": 0.0005802939412894924,
      "loss": 2.8281,
      "step": 26730
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4937254190444946,
      "learning_rate": 0.0005802924831690185,
      "loss": 3.0069,
      "step": 26731
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4158494472503662,
      "learning_rate": 0.0005802910249964327,
      "loss": 3.3454,
      "step": 26732
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.83235502243042,
      "learning_rate": 0.0005802895667717357,
      "loss": 3.025,
      "step": 26733
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5025335550308228,
      "learning_rate": 0.0005802881084949274,
      "loss": 2.9486,
      "step": 26734
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3495168685913086,
      "learning_rate": 0.0005802866501660084,
      "loss": 3.0172,
      "step": 26735
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6779001951217651,
      "learning_rate": 0.0005802851917849788,
      "loss": 3.1947,
      "step": 26736
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6077669858932495,
      "learning_rate": 0.0005802837333518389,
      "loss": 3.1576,
      "step": 26737
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.290568232536316,
      "learning_rate": 0.0005802822748665888,
      "loss": 2.9677,
      "step": 26738
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8069385290145874,
      "learning_rate": 0.0005802808163292291,
      "loss": 3.0655,
      "step": 26739
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.218132972717285,
      "learning_rate": 0.00058027935773976,
      "loss": 3.0648,
      "step": 26740
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6742740869522095,
      "learning_rate": 0.0005802778990981815,
      "loss": 3.02,
      "step": 26741
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4527668952941895,
      "learning_rate": 0.000580276440404494,
      "loss": 2.8557,
      "step": 26742
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.981074571609497,
      "learning_rate": 0.0005802749816586979,
      "loss": 3.0069,
      "step": 26743
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5599855184555054,
      "learning_rate": 0.0005802735228607934,
      "loss": 2.9634,
      "step": 26744
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4163838624954224,
      "learning_rate": 0.0005802720640107808,
      "loss": 2.6498,
      "step": 26745
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3246686458587646,
      "learning_rate": 0.0005802706051086603,
      "loss": 3.1481,
      "step": 26746
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9711408615112305,
      "learning_rate": 0.0005802691461544322,
      "loss": 2.9285,
      "step": 26747
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.547659993171692,
      "learning_rate": 0.0005802676871480968,
      "loss": 3.2876,
      "step": 26748
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.550089716911316,
      "learning_rate": 0.0005802662280896542,
      "loss": 3.0129,
      "step": 26749
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4975876808166504,
      "learning_rate": 0.000580264768979105,
      "loss": 2.7825,
      "step": 26750
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5023552179336548,
      "learning_rate": 0.0005802633098164492,
      "loss": 2.9503,
      "step": 26751
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.010265588760376,
      "learning_rate": 0.0005802618506016872,
      "loss": 2.9854,
      "step": 26752
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6837108135223389,
      "learning_rate": 0.0005802603913348192,
      "loss": 3.0536,
      "step": 26753
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.231213331222534,
      "learning_rate": 0.0005802589320158455,
      "loss": 3.2217,
      "step": 26754
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.801762342453003,
      "learning_rate": 0.0005802574726447665,
      "loss": 3.0675,
      "step": 26755
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7346926927566528,
      "learning_rate": 0.0005802560132215822,
      "loss": 3.0594,
      "step": 26756
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.391026496887207,
      "learning_rate": 0.0005802545537462931,
      "loss": 2.8291,
      "step": 26757
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9256073236465454,
      "learning_rate": 0.0005802530942188993,
      "loss": 3.1305,
      "step": 26758
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6164758205413818,
      "learning_rate": 0.0005802516346394012,
      "loss": 3.0819,
      "step": 26759
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5159717798233032,
      "learning_rate": 0.0005802501750077991,
      "loss": 3.1129,
      "step": 26760
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8387854099273682,
      "learning_rate": 0.000580248715324093,
      "loss": 3.1289,
      "step": 26761
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.2025721073150635,
      "learning_rate": 0.0005802472555882836,
      "loss": 3.1044,
      "step": 26762
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6212646961212158,
      "learning_rate": 0.0005802457958003709,
      "loss": 3.0868,
      "step": 26763
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.7566397190093994,
      "learning_rate": 0.0005802443359603552,
      "loss": 2.9416,
      "step": 26764
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.2029056549072266,
      "learning_rate": 0.0005802428760682366,
      "loss": 3.0369,
      "step": 26765
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3983997106552124,
      "learning_rate": 0.0005802414161240158,
      "loss": 3.0066,
      "step": 26766
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.559402585029602,
      "learning_rate": 0.0005802399561276927,
      "loss": 2.8787,
      "step": 26767
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.793497920036316,
      "learning_rate": 0.0005802384960792678,
      "loss": 3.1174,
      "step": 26768
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3836747407913208,
      "learning_rate": 0.0005802370359787412,
      "loss": 3.3858,
      "step": 26769
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4024900197982788,
      "learning_rate": 0.0005802355758261133,
      "loss": 2.9967,
      "step": 26770
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8235669136047363,
      "learning_rate": 0.0005802341156213843,
      "loss": 3.2273,
      "step": 26771
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.9801089763641357,
      "learning_rate": 0.0005802326553645545,
      "loss": 2.9799,
      "step": 26772
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3513810634613037,
      "learning_rate": 0.0005802311950556242,
      "loss": 2.9739,
      "step": 26773
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.287516713142395,
      "learning_rate": 0.0005802297346945935,
      "loss": 3.1655,
      "step": 26774
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4970123767852783,
      "learning_rate": 0.0005802282742814629,
      "loss": 2.9728,
      "step": 26775
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3579307794570923,
      "learning_rate": 0.0005802268138162325,
      "loss": 2.9319,
      "step": 26776
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6943014860153198,
      "learning_rate": 0.0005802253532989027,
      "loss": 3.1798,
      "step": 26777
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5411874055862427,
      "learning_rate": 0.0005802238927294737,
      "loss": 2.8814,
      "step": 26778
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4405478239059448,
      "learning_rate": 0.0005802224321079458,
      "loss": 2.8763,
      "step": 26779
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3297913074493408,
      "learning_rate": 0.0005802209714343193,
      "loss": 3.0213,
      "step": 26780
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9001398086547852,
      "learning_rate": 0.0005802195107085943,
      "loss": 3.1458,
      "step": 26781
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3498963117599487,
      "learning_rate": 0.0005802180499307714,
      "loss": 2.837,
      "step": 26782
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4857882261276245,
      "learning_rate": 0.0005802165891008505,
      "loss": 2.9663,
      "step": 26783
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3674753904342651,
      "learning_rate": 0.0005802151282188321,
      "loss": 2.8171,
      "step": 26784
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3209103345870972,
      "learning_rate": 0.0005802136672847164,
      "loss": 2.966,
      "step": 26785
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2595863342285156,
      "learning_rate": 0.0005802122062985038,
      "loss": 3.0023,
      "step": 26786
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.1317925453186035,
      "learning_rate": 0.0005802107452601943,
      "loss": 3.1999,
      "step": 26787
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5785411596298218,
      "learning_rate": 0.0005802092841697884,
      "loss": 2.9985,
      "step": 26788
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6524546146392822,
      "learning_rate": 0.0005802078230272864,
      "loss": 3.7317,
      "step": 26789
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.459889531135559,
      "learning_rate": 0.0005802063618326884,
      "loss": 3.0206,
      "step": 26790
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4097721576690674,
      "learning_rate": 0.0005802049005859947,
      "loss": 2.9692,
      "step": 26791
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.248561978340149,
      "learning_rate": 0.0005802034392872057,
      "loss": 3.3403,
      "step": 26792
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.429584264755249,
      "learning_rate": 0.0005802019779363216,
      "loss": 3.1849,
      "step": 26793
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6177235841751099,
      "learning_rate": 0.0005802005165333426,
      "loss": 3.2067,
      "step": 26794
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6089476346969604,
      "learning_rate": 0.000580199055078269,
      "loss": 2.7961,
      "step": 26795
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3084183931350708,
      "learning_rate": 0.0005801975935711012,
      "loss": 3.0512,
      "step": 26796
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.716915249824524,
      "learning_rate": 0.0005801961320118393,
      "loss": 3.2128,
      "step": 26797
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3854267597198486,
      "learning_rate": 0.0005801946704004837,
      "loss": 2.8414,
      "step": 26798
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5494716167449951,
      "learning_rate": 0.0005801932087370346,
      "loss": 3.0894,
      "step": 26799
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.512626051902771,
      "learning_rate": 0.0005801917470214924,
      "loss": 3.3871,
      "step": 26800
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2409491539001465,
      "learning_rate": 0.0005801902852538572,
      "loss": 3.1626,
      "step": 26801
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3903206586837769,
      "learning_rate": 0.0005801888234341292,
      "loss": 2.8787,
      "step": 26802
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.276287317276001,
      "learning_rate": 0.000580187361562309,
      "loss": 3.3255,
      "step": 26803
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.841251254081726,
      "learning_rate": 0.0005801858996383964,
      "loss": 2.9278,
      "step": 26804
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.6670241355895996,
      "learning_rate": 0.0005801844376623922,
      "loss": 3.0548,
      "step": 26805
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2868796586990356,
      "learning_rate": 0.0005801829756342964,
      "loss": 3.1742,
      "step": 26806
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4022401571273804,
      "learning_rate": 0.0005801815135541092,
      "loss": 3.1939,
      "step": 26807
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5153692960739136,
      "learning_rate": 0.0005801800514218311,
      "loss": 3.2176,
      "step": 26808
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4222995042800903,
      "learning_rate": 0.0005801785892374622,
      "loss": 2.9925,
      "step": 26809
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.972029685974121,
      "learning_rate": 0.0005801771270010028,
      "loss": 3.0101,
      "step": 26810
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7814379930496216,
      "learning_rate": 0.0005801756647124532,
      "loss": 3.179,
      "step": 26811
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.93814754486084,
      "learning_rate": 0.0005801742023718136,
      "loss": 2.931,
      "step": 26812
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.42984938621521,
      "learning_rate": 0.0005801727399790843,
      "loss": 2.9163,
      "step": 26813
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.158663034439087,
      "learning_rate": 0.0005801712775342656,
      "loss": 3.1165,
      "step": 26814
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.629958987236023,
      "learning_rate": 0.000580169815037358,
      "loss": 3.1184,
      "step": 26815
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.642540693283081,
      "learning_rate": 0.0005801683524883612,
      "loss": 2.8692,
      "step": 26816
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9570000171661377,
      "learning_rate": 0.0005801668898872761,
      "loss": 3.1487,
      "step": 26817
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.337527871131897,
      "learning_rate": 0.0005801654272341025,
      "loss": 3.3578,
      "step": 26818
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8106253147125244,
      "learning_rate": 0.000580163964528841,
      "loss": 3.2835,
      "step": 26819
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.11714506149292,
      "learning_rate": 0.0005801625017714916,
      "loss": 2.9075,
      "step": 26820
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4989347457885742,
      "learning_rate": 0.0005801610389620548,
      "loss": 3.3561,
      "step": 26821
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.482936143875122,
      "learning_rate": 0.0005801595761005307,
      "loss": 3.228,
      "step": 26822
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9158333539962769,
      "learning_rate": 0.0005801581131869197,
      "loss": 3.1363,
      "step": 26823
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.820374846458435,
      "learning_rate": 0.000580156650221222,
      "loss": 3.3719,
      "step": 26824
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.486244559288025,
      "learning_rate": 0.0005801551872034378,
      "loss": 3.112,
      "step": 26825
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7502846717834473,
      "learning_rate": 0.0005801537241335676,
      "loss": 3.0381,
      "step": 26826
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3031516075134277,
      "learning_rate": 0.0005801522610116114,
      "loss": 2.9213,
      "step": 26827
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3356738090515137,
      "learning_rate": 0.0005801507978375699,
      "loss": 3.2897,
      "step": 26828
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4301917552947998,
      "learning_rate": 0.0005801493346114428,
      "loss": 2.8764,
      "step": 26829
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3892878293991089,
      "learning_rate": 0.0005801478713332307,
      "loss": 3.1319,
      "step": 26830
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3425986766815186,
      "learning_rate": 0.0005801464080029338,
      "loss": 2.9351,
      "step": 26831
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2580556869506836,
      "learning_rate": 0.0005801449446205525,
      "loss": 3.2226,
      "step": 26832
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5070935487747192,
      "learning_rate": 0.0005801434811860868,
      "loss": 2.9678,
      "step": 26833
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2925294637680054,
      "learning_rate": 0.0005801420176995373,
      "loss": 3.1757,
      "step": 26834
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3213964700698853,
      "learning_rate": 0.000580140554160904,
      "loss": 3.4389,
      "step": 26835
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.469866394996643,
      "learning_rate": 0.0005801390905701873,
      "loss": 2.9931,
      "step": 26836
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.462197184562683,
      "learning_rate": 0.0005801376269273876,
      "loss": 2.9062,
      "step": 26837
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4428497552871704,
      "learning_rate": 0.0005801361632325049,
      "loss": 3.2581,
      "step": 26838
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.303539752960205,
      "learning_rate": 0.0005801346994855396,
      "loss": 3.1347,
      "step": 26839
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4298231601715088,
      "learning_rate": 0.0005801332356864921,
      "loss": 3.081,
      "step": 26840
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4823304414749146,
      "learning_rate": 0.0005801317718353623,
      "loss": 3.1865,
      "step": 26841
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3749949932098389,
      "learning_rate": 0.0005801303079321509,
      "loss": 3.1458,
      "step": 26842
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.562056303024292,
      "learning_rate": 0.000580128843976858,
      "loss": 3.1897,
      "step": 26843
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5281383991241455,
      "learning_rate": 0.0005801273799694837,
      "loss": 2.8765,
      "step": 26844
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.701407790184021,
      "learning_rate": 0.0005801259159100285,
      "loss": 3.1867,
      "step": 26845
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8010696172714233,
      "learning_rate": 0.0005801244517984926,
      "loss": 2.9791,
      "step": 26846
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5007047653198242,
      "learning_rate": 0.0005801229876348764,
      "loss": 2.9412,
      "step": 26847
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3324588537216187,
      "learning_rate": 0.0005801215234191799,
      "loss": 3.11,
      "step": 26848
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6406620740890503,
      "learning_rate": 0.0005801200591514037,
      "loss": 3.3408,
      "step": 26849
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3612700700759888,
      "learning_rate": 0.0005801185948315476,
      "loss": 2.9246,
      "step": 26850
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.6782448291778564,
      "learning_rate": 0.0005801171304596124,
      "loss": 3.3146,
      "step": 26851
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9575165510177612,
      "learning_rate": 0.0005801156660355981,
      "loss": 3.087,
      "step": 26852
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.866462230682373,
      "learning_rate": 0.0005801142015595049,
      "loss": 3.2687,
      "step": 26853
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.971933364868164,
      "learning_rate": 0.0005801127370313333,
      "loss": 3.1358,
      "step": 26854
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7304911613464355,
      "learning_rate": 0.0005801112724510835,
      "loss": 2.7548,
      "step": 26855
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5780198574066162,
      "learning_rate": 0.0005801098078187557,
      "loss": 3.0331,
      "step": 26856
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5218985080718994,
      "learning_rate": 0.0005801083431343501,
      "loss": 3.1068,
      "step": 26857
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.839925765991211,
      "learning_rate": 0.0005801068783978671,
      "loss": 3.0894,
      "step": 26858
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.633310079574585,
      "learning_rate": 0.000580105413609307,
      "loss": 2.752,
      "step": 26859
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4425004720687866,
      "learning_rate": 0.00058010394876867,
      "loss": 3.1605,
      "step": 26860
    },
    {
      "epoch": 0.35,
      "grad_norm": 3.2113866806030273,
      "learning_rate": 0.0005801024838759562,
      "loss": 3.1912,
      "step": 26861
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8268550634384155,
      "learning_rate": 0.0005801010189311663,
      "loss": 3.0519,
      "step": 26862
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9077774286270142,
      "learning_rate": 0.0005800995539343003,
      "loss": 3.1961,
      "step": 26863
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.528017520904541,
      "learning_rate": 0.0005800980888853585,
      "loss": 2.8732,
      "step": 26864
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4245059490203857,
      "learning_rate": 0.0005800966237843411,
      "loss": 3.1374,
      "step": 26865
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4582805633544922,
      "learning_rate": 0.0005800951586312485,
      "loss": 3.3108,
      "step": 26866
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3244800567626953,
      "learning_rate": 0.0005800936934260809,
      "loss": 3.2078,
      "step": 26867
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3493001461029053,
      "learning_rate": 0.0005800922281688387,
      "loss": 3.0597,
      "step": 26868
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9230138063430786,
      "learning_rate": 0.0005800907628595219,
      "loss": 3.2397,
      "step": 26869
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4067301750183105,
      "learning_rate": 0.000580089297498131,
      "loss": 3.044,
      "step": 26870
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.3410181999206543,
      "learning_rate": 0.0005800878320846663,
      "loss": 3.0853,
      "step": 26871
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.184119462966919,
      "learning_rate": 0.0005800863666191279,
      "loss": 3.0152,
      "step": 26872
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.776365876197815,
      "learning_rate": 0.0005800849011015161,
      "loss": 3.2544,
      "step": 26873
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6557183265686035,
      "learning_rate": 0.0005800834355318314,
      "loss": 3.1548,
      "step": 26874
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4230955839157104,
      "learning_rate": 0.0005800819699100738,
      "loss": 3.2093,
      "step": 26875
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6659395694732666,
      "learning_rate": 0.0005800805042362436,
      "loss": 3.0365,
      "step": 26876
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.605074167251587,
      "learning_rate": 0.0005800790385103413,
      "loss": 3.0256,
      "step": 26877
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.604111909866333,
      "learning_rate": 0.000580077572732367,
      "loss": 3.2965,
      "step": 26878
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.398307204246521,
      "learning_rate": 0.0005800761069023209,
      "loss": 2.886,
      "step": 26879
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3565089702606201,
      "learning_rate": 0.0005800746410202035,
      "loss": 3.215,
      "step": 26880
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3422224521636963,
      "learning_rate": 0.0005800731750860148,
      "loss": 3.1608,
      "step": 26881
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8277086019515991,
      "learning_rate": 0.0005800717090997553,
      "loss": 2.8344,
      "step": 26882
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.490928888320923,
      "learning_rate": 0.0005800702430614251,
      "loss": 2.8678,
      "step": 26883
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.916244626045227,
      "learning_rate": 0.0005800687769710245,
      "loss": 3.1256,
      "step": 26884
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.031440019607544,
      "learning_rate": 0.0005800673108285541,
      "loss": 3.06,
      "step": 26885
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4737486839294434,
      "learning_rate": 0.0005800658446340136,
      "loss": 3.2244,
      "step": 26886
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5862587690353394,
      "learning_rate": 0.0005800643783874037,
      "loss": 2.9338,
      "step": 26887
    },
    {
      "epoch": 0.35,
      "grad_norm": 3.1552419662475586,
      "learning_rate": 0.0005800629120887245,
      "loss": 3.1939,
      "step": 26888
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0574941635131836,
      "learning_rate": 0.0005800614457379765,
      "loss": 3.1209,
      "step": 26889
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.5880978107452393,
      "learning_rate": 0.0005800599793351595,
      "loss": 3.2016,
      "step": 26890
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0624778270721436,
      "learning_rate": 0.0005800585128802742,
      "loss": 2.7491,
      "step": 26891
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8534342050552368,
      "learning_rate": 0.0005800570463733208,
      "loss": 2.8982,
      "step": 26892
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.368032693862915,
      "learning_rate": 0.0005800555798142994,
      "loss": 3.2966,
      "step": 26893
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6833255290985107,
      "learning_rate": 0.0005800541132032105,
      "loss": 3.1704,
      "step": 26894
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4672638177871704,
      "learning_rate": 0.0005800526465400542,
      "loss": 2.8585,
      "step": 26895
    },
    {
      "epoch": 0.35,
      "grad_norm": 3.1187851428985596,
      "learning_rate": 0.0005800511798248309,
      "loss": 3.2006,
      "step": 26896
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9296821355819702,
      "learning_rate": 0.0005800497130575407,
      "loss": 3.0174,
      "step": 26897
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5212651491165161,
      "learning_rate": 0.0005800482462381839,
      "loss": 3.3586,
      "step": 26898
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.283625841140747,
      "learning_rate": 0.0005800467793667609,
      "loss": 3.2572,
      "step": 26899
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.7929933071136475,
      "learning_rate": 0.0005800453124432719,
      "loss": 3.2725,
      "step": 26900
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9523152112960815,
      "learning_rate": 0.0005800438454677174,
      "loss": 3.0841,
      "step": 26901
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7838767766952515,
      "learning_rate": 0.0005800423784400973,
      "loss": 3.4426,
      "step": 26902
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6548354625701904,
      "learning_rate": 0.0005800409113604119,
      "loss": 2.9425,
      "step": 26903
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4123200178146362,
      "learning_rate": 0.0005800394442286619,
      "loss": 3.1951,
      "step": 26904
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.969858169555664,
      "learning_rate": 0.0005800379770448471,
      "loss": 3.2028,
      "step": 26905
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9391075372695923,
      "learning_rate": 0.000580036509808968,
      "loss": 3.1857,
      "step": 26906
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.497004508972168,
      "learning_rate": 0.0005800350425210248,
      "loss": 3.225,
      "step": 26907
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.161543130874634,
      "learning_rate": 0.0005800335751810178,
      "loss": 3.1101,
      "step": 26908
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0049850940704346,
      "learning_rate": 0.0005800321077889472,
      "loss": 3.0586,
      "step": 26909
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.35341739654541,
      "learning_rate": 0.0005800306403448136,
      "loss": 3.146,
      "step": 26910
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.059256076812744,
      "learning_rate": 0.0005800291728486167,
      "loss": 3.0351,
      "step": 26911
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8647675514221191,
      "learning_rate": 0.0005800277053003573,
      "loss": 2.7893,
      "step": 26912
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7095626592636108,
      "learning_rate": 0.0005800262377000354,
      "loss": 3.2542,
      "step": 26913
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6125296354293823,
      "learning_rate": 0.0005800247700476512,
      "loss": 2.7266,
      "step": 26914
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5468864440917969,
      "learning_rate": 0.0005800233023432054,
      "loss": 3.008,
      "step": 26915
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.023730754852295,
      "learning_rate": 0.0005800218345866978,
      "loss": 3.126,
      "step": 26916
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8462179899215698,
      "learning_rate": 0.0005800203667781289,
      "loss": 2.9379,
      "step": 26917
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9966851472854614,
      "learning_rate": 0.0005800188989174989,
      "loss": 3.0674,
      "step": 26918
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.1202144622802734,
      "learning_rate": 0.0005800174310048081,
      "loss": 3.0601,
      "step": 26919
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2329496145248413,
      "learning_rate": 0.0005800159630400568,
      "loss": 3.1252,
      "step": 26920
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5184223651885986,
      "learning_rate": 0.0005800144950232452,
      "loss": 3.3312,
      "step": 26921
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5866643190383911,
      "learning_rate": 0.0005800130269543736,
      "loss": 3.2449,
      "step": 26922
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8364436626434326,
      "learning_rate": 0.0005800115588334424,
      "loss": 3.1453,
      "step": 26923
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3964855670928955,
      "learning_rate": 0.0005800100906604517,
      "loss": 3.277,
      "step": 26924
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.516248941421509,
      "learning_rate": 0.0005800086224354019,
      "loss": 2.924,
      "step": 26925
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2926127910614014,
      "learning_rate": 0.0005800071541582931,
      "loss": 3.1887,
      "step": 26926
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3089779615402222,
      "learning_rate": 0.0005800056858291258,
      "loss": 3.2064,
      "step": 26927
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8743712902069092,
      "learning_rate": 0.0005800042174479001,
      "loss": 2.9209,
      "step": 26928
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5331100225448608,
      "learning_rate": 0.0005800027490146163,
      "loss": 3.173,
      "step": 26929
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4450993537902832,
      "learning_rate": 0.0005800012805292747,
      "loss": 2.8761,
      "step": 26930
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6355314254760742,
      "learning_rate": 0.0005799998119918756,
      "loss": 2.998,
      "step": 26931
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.218440294265747,
      "learning_rate": 0.0005799983434024193,
      "loss": 2.9045,
      "step": 26932
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6741293668746948,
      "learning_rate": 0.000579996874760906,
      "loss": 2.9801,
      "step": 26933
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8231732845306396,
      "learning_rate": 0.000579995406067336,
      "loss": 2.8375,
      "step": 26934
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.480974555015564,
      "learning_rate": 0.0005799939373217095,
      "loss": 3.0154,
      "step": 26935
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3755747079849243,
      "learning_rate": 0.000579992468524027,
      "loss": 2.8468,
      "step": 26936
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.3329405784606934,
      "learning_rate": 0.0005799909996742885,
      "loss": 3.1084,
      "step": 26937
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.032496929168701,
      "learning_rate": 0.0005799895307724944,
      "loss": 3.1372,
      "step": 26938
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.85990571975708,
      "learning_rate": 0.000579988061818645,
      "loss": 2.9323,
      "step": 26939
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.5068695545196533,
      "learning_rate": 0.0005799865928127404,
      "loss": 3.1433,
      "step": 26940
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3817261457443237,
      "learning_rate": 0.0005799851237547811,
      "loss": 2.9802,
      "step": 26941
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8315296173095703,
      "learning_rate": 0.0005799836546447673,
      "loss": 3.1402,
      "step": 26942
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7254223823547363,
      "learning_rate": 0.0005799821854826992,
      "loss": 3.2702,
      "step": 26943
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9832391738891602,
      "learning_rate": 0.0005799807162685771,
      "loss": 3.1547,
      "step": 26944
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3017585277557373,
      "learning_rate": 0.0005799792470024014,
      "loss": 3.0145,
      "step": 26945
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.3059377670288086,
      "learning_rate": 0.0005799777776841722,
      "loss": 3.2393,
      "step": 26946
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.6960768699645996,
      "learning_rate": 0.0005799763083138899,
      "loss": 3.0191,
      "step": 26947
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4162272214889526,
      "learning_rate": 0.0005799748388915548,
      "loss": 3.1571,
      "step": 26948
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5121347904205322,
      "learning_rate": 0.0005799733694171669,
      "loss": 2.9973,
      "step": 26949
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.488739252090454,
      "learning_rate": 0.0005799718998907268,
      "loss": 3.0538,
      "step": 26950
    },
    {
      "epoch": 0.35,
      "grad_norm": 4.535749435424805,
      "learning_rate": 0.0005799704303122346,
      "loss": 3.0344,
      "step": 26951
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4571717977523804,
      "learning_rate": 0.0005799689606816905,
      "loss": 3.0289,
      "step": 26952
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4930360317230225,
      "learning_rate": 0.000579967490999095,
      "loss": 3.0426,
      "step": 26953
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.128338575363159,
      "learning_rate": 0.0005799660212644482,
      "loss": 3.0722,
      "step": 26954
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6967424154281616,
      "learning_rate": 0.0005799645514777506,
      "loss": 3.2148,
      "step": 26955
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9786030054092407,
      "learning_rate": 0.0005799630816390022,
      "loss": 2.9798,
      "step": 26956
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6690887212753296,
      "learning_rate": 0.0005799616117482033,
      "loss": 3.1874,
      "step": 26957
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.107126235961914,
      "learning_rate": 0.0005799601418053544,
      "loss": 2.8748,
      "step": 26958
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7744296789169312,
      "learning_rate": 0.0005799586718104556,
      "loss": 3.0589,
      "step": 26959
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6439716815948486,
      "learning_rate": 0.000579957201763507,
      "loss": 3.1381,
      "step": 26960
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4120476245880127,
      "learning_rate": 0.0005799557316645092,
      "loss": 3.1325,
      "step": 26961
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.5865654945373535,
      "learning_rate": 0.0005799542615134624,
      "loss": 2.8402,
      "step": 26962
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4732173681259155,
      "learning_rate": 0.0005799527913103668,
      "loss": 2.9657,
      "step": 26963
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6686233282089233,
      "learning_rate": 0.0005799513210552227,
      "loss": 3.1786,
      "step": 26964
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.6463444232940674,
      "learning_rate": 0.0005799498507480303,
      "loss": 3.0437,
      "step": 26965
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6779451370239258,
      "learning_rate": 0.00057994838038879,
      "loss": 3.0036,
      "step": 26966
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4100497961044312,
      "learning_rate": 0.0005799469099775019,
      "loss": 3.0556,
      "step": 26967
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0836002826690674,
      "learning_rate": 0.0005799454395141665,
      "loss": 3.1508,
      "step": 26968
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9094294309616089,
      "learning_rate": 0.000579943968998784,
      "loss": 2.8499,
      "step": 26969
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.47370445728302,
      "learning_rate": 0.0005799424984313545,
      "loss": 3.1954,
      "step": 26970
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5227055549621582,
      "learning_rate": 0.0005799410278118784,
      "loss": 3.3452,
      "step": 26971
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8097654581069946,
      "learning_rate": 0.0005799395571403561,
      "loss": 3.1878,
      "step": 26972
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7683051824569702,
      "learning_rate": 0.0005799380864167877,
      "loss": 3.1464,
      "step": 26973
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7684874534606934,
      "learning_rate": 0.0005799366156411735,
      "loss": 3.0653,
      "step": 26974
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.085533380508423,
      "learning_rate": 0.0005799351448135139,
      "loss": 3.138,
      "step": 26975
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7274279594421387,
      "learning_rate": 0.0005799336739338089,
      "loss": 3.1293,
      "step": 26976
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8080679178237915,
      "learning_rate": 0.000579932203002059,
      "loss": 2.955,
      "step": 26977
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5475826263427734,
      "learning_rate": 0.0005799307320182645,
      "loss": 3.0185,
      "step": 26978
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9902101755142212,
      "learning_rate": 0.0005799292609824255,
      "loss": 3.2009,
      "step": 26979
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.913009524345398,
      "learning_rate": 0.0005799277898945424,
      "loss": 3.1179,
      "step": 26980
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4400954246520996,
      "learning_rate": 0.0005799263187546155,
      "loss": 3.2664,
      "step": 26981
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4967073202133179,
      "learning_rate": 0.000579924847562645,
      "loss": 3.28,
      "step": 26982
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.757432222366333,
      "learning_rate": 0.0005799233763186311,
      "loss": 3.0527,
      "step": 26983
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7916271686553955,
      "learning_rate": 0.0005799219050225743,
      "loss": 2.9283,
      "step": 26984
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6036473512649536,
      "learning_rate": 0.0005799204336744745,
      "loss": 2.9853,
      "step": 26985
    },
    {
      "epoch": 0.35,
      "grad_norm": 3.3786213397979736,
      "learning_rate": 0.0005799189622743324,
      "loss": 3.1345,
      "step": 26986
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.580031156539917,
      "learning_rate": 0.0005799174908221481,
      "loss": 3.113,
      "step": 26987
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4674311876296997,
      "learning_rate": 0.0005799160193179218,
      "loss": 3.066,
      "step": 26988
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9046865701675415,
      "learning_rate": 0.0005799145477616538,
      "loss": 2.8451,
      "step": 26989
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8383103609085083,
      "learning_rate": 0.0005799130761533444,
      "loss": 3.0942,
      "step": 26990
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8008241653442383,
      "learning_rate": 0.0005799116044929939,
      "loss": 3.2249,
      "step": 26991
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.1899425983428955,
      "learning_rate": 0.0005799101327806026,
      "loss": 3.0021,
      "step": 26992
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.075531244277954,
      "learning_rate": 0.0005799086610161706,
      "loss": 2.9546,
      "step": 26993
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.139477252960205,
      "learning_rate": 0.0005799071891996984,
      "loss": 2.7954,
      "step": 26994
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6535395383834839,
      "learning_rate": 0.0005799057173311861,
      "loss": 3.2667,
      "step": 26995
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.2190749645233154,
      "learning_rate": 0.0005799042454106341,
      "loss": 2.8248,
      "step": 26996
    },
    {
      "epoch": 0.35,
      "grad_norm": 3.8891210556030273,
      "learning_rate": 0.0005799027734380425,
      "loss": 3.2229,
      "step": 26997
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.080193519592285,
      "learning_rate": 0.0005799013014134119,
      "loss": 2.9226,
      "step": 26998
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.403110384941101,
      "learning_rate": 0.0005798998293367422,
      "loss": 2.9499,
      "step": 26999
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.822184443473816,
      "learning_rate": 0.0005798983572080339,
      "loss": 3.4488,
      "step": 27000
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0909152030944824,
      "learning_rate": 0.0005798968850272872,
      "loss": 3.2137,
      "step": 27001
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.315837860107422,
      "learning_rate": 0.0005798954127945023,
      "loss": 3.1246,
      "step": 27002
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.177816390991211,
      "learning_rate": 0.0005798939405096797,
      "loss": 3.0173,
      "step": 27003
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.1262128353118896,
      "learning_rate": 0.0005798924681728195,
      "loss": 2.6878,
      "step": 27004
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.267106056213379,
      "learning_rate": 0.0005798909957839219,
      "loss": 2.7473,
      "step": 27005
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.3096868991851807,
      "learning_rate": 0.0005798895233429874,
      "loss": 3.1063,
      "step": 27006
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.1197776794433594,
      "learning_rate": 0.0005798880508500161,
      "loss": 3.1227,
      "step": 27007
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5189850330352783,
      "learning_rate": 0.0005798865783050084,
      "loss": 2.9922,
      "step": 27008
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3590900897979736,
      "learning_rate": 0.0005798851057079644,
      "loss": 3.0139,
      "step": 27009
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4718267917633057,
      "learning_rate": 0.0005798836330588846,
      "loss": 2.9343,
      "step": 27010
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.10105562210083,
      "learning_rate": 0.0005798821603577692,
      "loss": 3.0969,
      "step": 27011
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3236639499664307,
      "learning_rate": 0.0005798806876046182,
      "loss": 3.065,
      "step": 27012
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3950796127319336,
      "learning_rate": 0.0005798792147994321,
      "loss": 3.1295,
      "step": 27013
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4786890745162964,
      "learning_rate": 0.0005798777419422112,
      "loss": 3.1552,
      "step": 27014
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2527389526367188,
      "learning_rate": 0.000579876269032956,
      "loss": 3.0582,
      "step": 27015
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4130587577819824,
      "learning_rate": 0.0005798747960716662,
      "loss": 3.2077,
      "step": 27016
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5038421154022217,
      "learning_rate": 0.0005798733230583426,
      "loss": 2.9725,
      "step": 27017
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.467750072479248,
      "learning_rate": 0.0005798718499929852,
      "loss": 2.9922,
      "step": 27018
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5260579586029053,
      "learning_rate": 0.0005798703768755943,
      "loss": 3.1284,
      "step": 27019
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5522047281265259,
      "learning_rate": 0.0005798689037061702,
      "loss": 3.0624,
      "step": 27020
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4822918176651,
      "learning_rate": 0.0005798674304847133,
      "loss": 3.1737,
      "step": 27021
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9276413917541504,
      "learning_rate": 0.0005798659572112236,
      "loss": 3.2409,
      "step": 27022
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4389102458953857,
      "learning_rate": 0.0005798644838857017,
      "loss": 3.0083,
      "step": 27023
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7987903356552124,
      "learning_rate": 0.0005798630105081476,
      "loss": 3.2011,
      "step": 27024
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6127312183380127,
      "learning_rate": 0.0005798615370785617,
      "loss": 2.7548,
      "step": 27025
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4118093252182007,
      "learning_rate": 0.0005798600635969443,
      "loss": 3.3736,
      "step": 27026
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.59926438331604,
      "learning_rate": 0.0005798585900632956,
      "loss": 3.2955,
      "step": 27027
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.1996536254882812,
      "learning_rate": 0.000579857116477616,
      "loss": 2.9543,
      "step": 27028
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4500434398651123,
      "learning_rate": 0.0005798556428399055,
      "loss": 3.0633,
      "step": 27029
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6793781518936157,
      "learning_rate": 0.0005798541691501647,
      "loss": 3.0085,
      "step": 27030
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5883078575134277,
      "learning_rate": 0.0005798526954083937,
      "loss": 3.0648,
      "step": 27031
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4052757024765015,
      "learning_rate": 0.0005798512216145927,
      "loss": 3.1805,
      "step": 27032
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9474725723266602,
      "learning_rate": 0.0005798497477687622,
      "loss": 2.994,
      "step": 27033
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6247154474258423,
      "learning_rate": 0.0005798482738709023,
      "loss": 3.134,
      "step": 27034
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.602591872215271,
      "learning_rate": 0.0005798467999210133,
      "loss": 2.9547,
      "step": 27035
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.8198511600494385,
      "learning_rate": 0.0005798453259190955,
      "loss": 3.1858,
      "step": 27036
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5419211387634277,
      "learning_rate": 0.0005798438518651492,
      "loss": 3.3267,
      "step": 27037
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6150028705596924,
      "learning_rate": 0.0005798423777591746,
      "loss": 3.1814,
      "step": 27038
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.882865071296692,
      "learning_rate": 0.0005798409036011722,
      "loss": 3.0161,
      "step": 27039
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8108917474746704,
      "learning_rate": 0.0005798394293911419,
      "loss": 2.9768,
      "step": 27040
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7480242252349854,
      "learning_rate": 0.0005798379551290842,
      "loss": 3.0714,
      "step": 27041
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4634265899658203,
      "learning_rate": 0.0005798364808149993,
      "loss": 3.0975,
      "step": 27042
    },
    {
      "epoch": 0.35,
      "grad_norm": 3.3750522136688232,
      "learning_rate": 0.0005798350064488877,
      "loss": 3.3165,
      "step": 27043
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.124483585357666,
      "learning_rate": 0.0005798335320307493,
      "loss": 2.9481,
      "step": 27044
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6560354232788086,
      "learning_rate": 0.0005798320575605846,
      "loss": 2.909,
      "step": 27045
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.2435197830200195,
      "learning_rate": 0.0005798305830383939,
      "loss": 3.1121,
      "step": 27046
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5004279613494873,
      "learning_rate": 0.0005798291084641773,
      "loss": 3.216,
      "step": 27047
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5476034879684448,
      "learning_rate": 0.0005798276338379353,
      "loss": 2.8202,
      "step": 27048
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.14616322517395,
      "learning_rate": 0.0005798261591596681,
      "loss": 2.8182,
      "step": 27049
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6522040367126465,
      "learning_rate": 0.0005798246844293757,
      "loss": 3.084,
      "step": 27050
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.9035706520080566,
      "learning_rate": 0.0005798232096470589,
      "loss": 3.0515,
      "step": 27051
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3640813827514648,
      "learning_rate": 0.0005798217348127175,
      "loss": 3.1694,
      "step": 27052
    },
    {
      "epoch": 0.35,
      "grad_norm": 3.244196653366089,
      "learning_rate": 0.0005798202599263521,
      "loss": 3.1763,
      "step": 27053
    },
    {
      "epoch": 0.35,
      "grad_norm": 3.168186664581299,
      "learning_rate": 0.0005798187849879626,
      "loss": 3.0453,
      "step": 27054
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6523908376693726,
      "learning_rate": 0.0005798173099975498,
      "loss": 2.877,
      "step": 27055
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5956393480300903,
      "learning_rate": 0.0005798158349551134,
      "loss": 3.0266,
      "step": 27056
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.960676670074463,
      "learning_rate": 0.0005798143598606542,
      "loss": 3.1459,
      "step": 27057
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.491314172744751,
      "learning_rate": 0.000579812884714172,
      "loss": 3.0205,
      "step": 27058
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4398744106292725,
      "learning_rate": 0.0005798114095156675,
      "loss": 3.0367,
      "step": 27059
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.829488754272461,
      "learning_rate": 0.0005798099342651407,
      "loss": 3.0952,
      "step": 27060
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.3937618732452393,
      "learning_rate": 0.0005798084589625919,
      "loss": 2.9581,
      "step": 27061
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.261034369468689,
      "learning_rate": 0.0005798069836080215,
      "loss": 3.186,
      "step": 27062
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.659142255783081,
      "learning_rate": 0.0005798055082014297,
      "loss": 3.4646,
      "step": 27063
    },
    {
      "epoch": 0.35,
      "grad_norm": 3.2177183628082275,
      "learning_rate": 0.0005798040327428168,
      "loss": 3.1182,
      "step": 27064
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7641476392745972,
      "learning_rate": 0.000579802557232183,
      "loss": 2.9689,
      "step": 27065
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9658927917480469,
      "learning_rate": 0.0005798010816695286,
      "loss": 3.0342,
      "step": 27066
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.618597984313965,
      "learning_rate": 0.0005797996060548539,
      "loss": 3.1248,
      "step": 27067
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.39568293094635,
      "learning_rate": 0.0005797981303881591,
      "loss": 3.2028,
      "step": 27068
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3084522485733032,
      "learning_rate": 0.0005797966546694447,
      "loss": 2.9894,
      "step": 27069
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3778352737426758,
      "learning_rate": 0.0005797951788987108,
      "loss": 2.8699,
      "step": 27070
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.2006964683532715,
      "learning_rate": 0.0005797937030759576,
      "loss": 2.9992,
      "step": 27071
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8876909017562866,
      "learning_rate": 0.0005797922272011855,
      "loss": 2.9136,
      "step": 27072
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7718793153762817,
      "learning_rate": 0.0005797907512743947,
      "loss": 2.9171,
      "step": 27073
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.1577939987182617,
      "learning_rate": 0.0005797892752955856,
      "loss": 3.1868,
      "step": 27074
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.315166711807251,
      "learning_rate": 0.0005797877992647583,
      "loss": 2.9568,
      "step": 27075
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.568857192993164,
      "learning_rate": 0.0005797863231819133,
      "loss": 3.0915,
      "step": 27076
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5422327518463135,
      "learning_rate": 0.0005797848470470507,
      "loss": 3.112,
      "step": 27077
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5861369371414185,
      "learning_rate": 0.0005797833708601708,
      "loss": 3.4291,
      "step": 27078
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3093019723892212,
      "learning_rate": 0.0005797818946212739,
      "loss": 3.2088,
      "step": 27079
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5204648971557617,
      "learning_rate": 0.0005797804183303602,
      "loss": 3.057,
      "step": 27080
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4883999824523926,
      "learning_rate": 0.0005797789419874301,
      "loss": 2.8737,
      "step": 27081
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.352574110031128,
      "learning_rate": 0.0005797774655924838,
      "loss": 3.0449,
      "step": 27082
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4756070375442505,
      "learning_rate": 0.0005797759891455216,
      "loss": 3.1298,
      "step": 27083
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.961136817932129,
      "learning_rate": 0.0005797745126465437,
      "loss": 3.092,
      "step": 27084
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.420093536376953,
      "learning_rate": 0.0005797730360955506,
      "loss": 3.1356,
      "step": 27085
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.1467318534851074,
      "learning_rate": 0.0005797715594925422,
      "loss": 2.8798,
      "step": 27086
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.964495897293091,
      "learning_rate": 0.0005797700828375191,
      "loss": 3.1538,
      "step": 27087
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.866501808166504,
      "learning_rate": 0.0005797686061304815,
      "loss": 3.2329,
      "step": 27088
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7266470193862915,
      "learning_rate": 0.0005797671293714295,
      "loss": 3.1108,
      "step": 27089
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5799766778945923,
      "learning_rate": 0.0005797656525603636,
      "loss": 3.1498,
      "step": 27090
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.2428479194641113,
      "learning_rate": 0.000579764175697284,
      "loss": 2.964,
      "step": 27091
    },
    {
      "epoch": 0.35,
      "grad_norm": 3.0821640491485596,
      "learning_rate": 0.0005797626987821909,
      "loss": 2.9995,
      "step": 27092
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6981247663497925,
      "learning_rate": 0.0005797612218150847,
      "loss": 2.8524,
      "step": 27093
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6279035806655884,
      "learning_rate": 0.0005797597447959656,
      "loss": 3.1313,
      "step": 27094
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7328578233718872,
      "learning_rate": 0.0005797582677248338,
      "loss": 3.2191,
      "step": 27095
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6334614753723145,
      "learning_rate": 0.0005797567906016898,
      "loss": 2.9739,
      "step": 27096
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4724539518356323,
      "learning_rate": 0.0005797553134265336,
      "loss": 2.8732,
      "step": 27097
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.722102165222168,
      "learning_rate": 0.0005797538361993656,
      "loss": 2.9656,
      "step": 27098
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4021451473236084,
      "learning_rate": 0.0005797523589201861,
      "loss": 3.1328,
      "step": 27099
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.521337866783142,
      "learning_rate": 0.0005797508815889955,
      "loss": 3.2404,
      "step": 27100
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.103736400604248,
      "learning_rate": 0.0005797494042057938,
      "loss": 3.3109,
      "step": 27101
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7455331087112427,
      "learning_rate": 0.0005797479267705814,
      "loss": 3.0016,
      "step": 27102
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.3034369945526123,
      "learning_rate": 0.0005797464492833586,
      "loss": 3.219,
      "step": 27103
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6647855043411255,
      "learning_rate": 0.0005797449717441257,
      "loss": 3.1292,
      "step": 27104
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2808414697647095,
      "learning_rate": 0.0005797434941528829,
      "loss": 2.9531,
      "step": 27105
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4634559154510498,
      "learning_rate": 0.0005797420165096304,
      "loss": 3.3733,
      "step": 27106
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.2340950965881348,
      "learning_rate": 0.0005797405388143687,
      "loss": 2.9868,
      "step": 27107
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.6018929481506348,
      "learning_rate": 0.000579739061067098,
      "loss": 3.1243,
      "step": 27108
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.483036756515503,
      "learning_rate": 0.0005797375832678184,
      "loss": 2.9899,
      "step": 27109
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.6957993507385254,
      "learning_rate": 0.0005797361054165303,
      "loss": 3.1361,
      "step": 27110
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6108791828155518,
      "learning_rate": 0.000579734627513234,
      "loss": 2.9539,
      "step": 27111
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7910640239715576,
      "learning_rate": 0.0005797331495579299,
      "loss": 3.0463,
      "step": 27112
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4768491983413696,
      "learning_rate": 0.000579731671550618,
      "loss": 3.1221,
      "step": 27113
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.295078754425049,
      "learning_rate": 0.0005797301934912987,
      "loss": 3.1035,
      "step": 27114
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.301415205001831,
      "learning_rate": 0.0005797287153799723,
      "loss": 2.9164,
      "step": 27115
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8582032918930054,
      "learning_rate": 0.000579727237216639,
      "loss": 3.069,
      "step": 27116
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.039358615875244,
      "learning_rate": 0.0005797257590012992,
      "loss": 2.8891,
      "step": 27117
    },
    {
      "epoch": 0.35,
      "grad_norm": 4.029885768890381,
      "learning_rate": 0.000579724280733953,
      "loss": 3.3474,
      "step": 27118
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6046440601348877,
      "learning_rate": 0.0005797228024146009,
      "loss": 3.1939,
      "step": 27119
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4605355262756348,
      "learning_rate": 0.0005797213240432429,
      "loss": 3.247,
      "step": 27120
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8218958377838135,
      "learning_rate": 0.0005797198456198796,
      "loss": 3.2618,
      "step": 27121
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.95302677154541,
      "learning_rate": 0.000579718367144511,
      "loss": 3.1036,
      "step": 27122
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4608681201934814,
      "learning_rate": 0.0005797168886171375,
      "loss": 2.9507,
      "step": 27123
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7797596454620361,
      "learning_rate": 0.0005797154100377593,
      "loss": 3.1686,
      "step": 27124
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.2494733333587646,
      "learning_rate": 0.0005797139314063769,
      "loss": 3.2283,
      "step": 27125
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8520731925964355,
      "learning_rate": 0.0005797124527229902,
      "loss": 3.0702,
      "step": 27126
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4472402334213257,
      "learning_rate": 0.0005797109739875998,
      "loss": 3.1773,
      "step": 27127
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.735766887664795,
      "learning_rate": 0.0005797094952002058,
      "loss": 3.1298,
      "step": 27128
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.056493043899536,
      "learning_rate": 0.0005797080163608085,
      "loss": 2.9579,
      "step": 27129
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6323978900909424,
      "learning_rate": 0.0005797065374694082,
      "loss": 3.1369,
      "step": 27130
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5711079835891724,
      "learning_rate": 0.0005797050585260052,
      "loss": 3.1121,
      "step": 27131
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9901963472366333,
      "learning_rate": 0.0005797035795305997,
      "loss": 3.0541,
      "step": 27132
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5924938917160034,
      "learning_rate": 0.000579702100483192,
      "loss": 2.7944,
      "step": 27133
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.6254379749298096,
      "learning_rate": 0.0005797006213837826,
      "loss": 3.2251,
      "step": 27134
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5606300830841064,
      "learning_rate": 0.0005796991422323714,
      "loss": 3.0519,
      "step": 27135
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8649802207946777,
      "learning_rate": 0.000579697663028959,
      "loss": 3.048,
      "step": 27136
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7517231702804565,
      "learning_rate": 0.0005796961837735453,
      "loss": 2.9809,
      "step": 27137
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4229310750961304,
      "learning_rate": 0.000579694704466131,
      "loss": 2.9373,
      "step": 27138
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.845330834388733,
      "learning_rate": 0.0005796932251067161,
      "loss": 3.1235,
      "step": 27139
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5111935138702393,
      "learning_rate": 0.0005796917456953009,
      "loss": 3.0881,
      "step": 27140
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6471353769302368,
      "learning_rate": 0.0005796902662318857,
      "loss": 3.2022,
      "step": 27141
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7932530641555786,
      "learning_rate": 0.0005796887867164709,
      "loss": 3.1336,
      "step": 27142
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.802170991897583,
      "learning_rate": 0.0005796873071490567,
      "loss": 3.0823,
      "step": 27143
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.1093506813049316,
      "learning_rate": 0.0005796858275296433,
      "loss": 3.0214,
      "step": 27144
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6718727350234985,
      "learning_rate": 0.000579684347858231,
      "loss": 2.902,
      "step": 27145
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6102341413497925,
      "learning_rate": 0.0005796828681348201,
      "loss": 3.1883,
      "step": 27146
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.799354076385498,
      "learning_rate": 0.000579681388359411,
      "loss": 2.9118,
      "step": 27147
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.769498586654663,
      "learning_rate": 0.0005796799085320037,
      "loss": 3.2344,
      "step": 27148
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.558464527130127,
      "learning_rate": 0.0005796784286525987,
      "loss": 3.1419,
      "step": 27149
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.053699016571045,
      "learning_rate": 0.0005796769487211961,
      "loss": 3.111,
      "step": 27150
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5799477100372314,
      "learning_rate": 0.0005796754687377963,
      "loss": 3.1437,
      "step": 27151
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6706550121307373,
      "learning_rate": 0.0005796739887023997,
      "loss": 3.0421,
      "step": 27152
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.506225347518921,
      "learning_rate": 0.0005796725086150063,
      "loss": 3.258,
      "step": 27153
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.435500144958496,
      "learning_rate": 0.0005796710284756165,
      "loss": 3.2313,
      "step": 27154
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3575681447982788,
      "learning_rate": 0.0005796695482842306,
      "loss": 3.1348,
      "step": 27155
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8470059633255005,
      "learning_rate": 0.0005796680680408489,
      "loss": 3.0184,
      "step": 27156
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3101222515106201,
      "learning_rate": 0.0005796665877454716,
      "loss": 3.2519,
      "step": 27157
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8770407438278198,
      "learning_rate": 0.0005796651073980989,
      "loss": 3.1996,
      "step": 27158
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.31734299659729,
      "learning_rate": 0.0005796636269987313,
      "loss": 3.147,
      "step": 27159
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.290152668952942,
      "learning_rate": 0.000579662146547369,
      "loss": 3.2876,
      "step": 27160
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4959698915481567,
      "learning_rate": 0.0005796606660440121,
      "loss": 2.9756,
      "step": 27161
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5934268236160278,
      "learning_rate": 0.000579659185488661,
      "loss": 2.9197,
      "step": 27162
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0927371978759766,
      "learning_rate": 0.000579657704881316,
      "loss": 2.7473,
      "step": 27163
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5719879865646362,
      "learning_rate": 0.0005796562242219774,
      "loss": 3.1269,
      "step": 27164
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4472640752792358,
      "learning_rate": 0.0005796547435106455,
      "loss": 3.0897,
      "step": 27165
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4310202598571777,
      "learning_rate": 0.0005796532627473203,
      "loss": 2.9327,
      "step": 27166
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4135087728500366,
      "learning_rate": 0.0005796517819320025,
      "loss": 2.9743,
      "step": 27167
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.247915506362915,
      "learning_rate": 0.000579650301064692,
      "loss": 3.0527,
      "step": 27168
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.802145004272461,
      "learning_rate": 0.0005796488201453892,
      "loss": 3.2066,
      "step": 27169
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.537781000137329,
      "learning_rate": 0.0005796473391740945,
      "loss": 2.8945,
      "step": 27170
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.319777488708496,
      "learning_rate": 0.0005796458581508081,
      "loss": 3.0306,
      "step": 27171
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7952265739440918,
      "learning_rate": 0.0005796443770755302,
      "loss": 3.0564,
      "step": 27172
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.3789637088775635,
      "learning_rate": 0.0005796428959482612,
      "loss": 3.0058,
      "step": 27173
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3669779300689697,
      "learning_rate": 0.0005796414147690013,
      "loss": 2.832,
      "step": 27174
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.109661340713501,
      "learning_rate": 0.0005796399335377506,
      "loss": 3.1356,
      "step": 27175
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.91263747215271,
      "learning_rate": 0.0005796384522545097,
      "loss": 3.2189,
      "step": 27176
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.3616833686828613,
      "learning_rate": 0.0005796369709192788,
      "loss": 3.1232,
      "step": 27177
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.499282956123352,
      "learning_rate": 0.000579635489532058,
      "loss": 2.9653,
      "step": 27178
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.2191007137298584,
      "learning_rate": 0.0005796340080928478,
      "loss": 2.9398,
      "step": 27179
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7994375228881836,
      "learning_rate": 0.0005796325266016483,
      "loss": 2.8797,
      "step": 27180
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7531243562698364,
      "learning_rate": 0.0005796310450584597,
      "loss": 3.2537,
      "step": 27181
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.026487112045288,
      "learning_rate": 0.0005796295634632826,
      "loss": 2.8949,
      "step": 27182
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.586556911468506,
      "learning_rate": 0.000579628081816117,
      "loss": 3.165,
      "step": 27183
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6960593461990356,
      "learning_rate": 0.0005796266001169633,
      "loss": 2.8974,
      "step": 27184
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.6029109954833984,
      "learning_rate": 0.0005796251183658217,
      "loss": 2.883,
      "step": 27185
    },
    {
      "epoch": 0.35,
      "grad_norm": 4.161904335021973,
      "learning_rate": 0.0005796236365626925,
      "loss": 3.1046,
      "step": 27186
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9311299324035645,
      "learning_rate": 0.000579622154707576,
      "loss": 3.0639,
      "step": 27187
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6643153429031372,
      "learning_rate": 0.0005796206728004724,
      "loss": 3.0978,
      "step": 27188
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.118513584136963,
      "learning_rate": 0.0005796191908413823,
      "loss": 2.9742,
      "step": 27189
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.544179916381836,
      "learning_rate": 0.0005796177088303055,
      "loss": 3.2253,
      "step": 27190
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3639049530029297,
      "learning_rate": 0.0005796162267672425,
      "loss": 3.1147,
      "step": 27191
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4367344379425049,
      "learning_rate": 0.0005796147446521935,
      "loss": 2.9901,
      "step": 27192
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.515328049659729,
      "learning_rate": 0.0005796132624851589,
      "loss": 3.0556,
      "step": 27193
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.523113489151001,
      "learning_rate": 0.0005796117802661391,
      "loss": 3.1483,
      "step": 27194
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4061315059661865,
      "learning_rate": 0.0005796102979951339,
      "loss": 3.003,
      "step": 27195
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4777438640594482,
      "learning_rate": 0.0005796088156721441,
      "loss": 3.1603,
      "step": 27196
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3986327648162842,
      "learning_rate": 0.0005796073332971696,
      "loss": 2.9283,
      "step": 27197
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4054735898971558,
      "learning_rate": 0.0005796058508702108,
      "loss": 3.094,
      "step": 27198
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8000164031982422,
      "learning_rate": 0.0005796043683912681,
      "loss": 3.1363,
      "step": 27199
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.684024453163147,
      "learning_rate": 0.0005796028858603416,
      "loss": 3.1185,
      "step": 27200
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.836060643196106,
      "learning_rate": 0.0005796014032774316,
      "loss": 3.2067,
      "step": 27201
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.358210325241089,
      "learning_rate": 0.0005795999206425386,
      "loss": 3.2184,
      "step": 27202
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6985418796539307,
      "learning_rate": 0.0005795984379556626,
      "loss": 3.0996,
      "step": 27203
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5144474506378174,
      "learning_rate": 0.0005795969552168039,
      "loss": 3.1333,
      "step": 27204
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6934221982955933,
      "learning_rate": 0.000579595472425963,
      "loss": 3.2103,
      "step": 27205
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.688942551612854,
      "learning_rate": 0.0005795939895831398,
      "loss": 2.9868,
      "step": 27206
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3420027494430542,
      "learning_rate": 0.000579592506688335,
      "loss": 3.0057,
      "step": 27207
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.655086636543274,
      "learning_rate": 0.0005795910237415486,
      "loss": 3.1907,
      "step": 27208
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.72414231300354,
      "learning_rate": 0.000579589540742781,
      "loss": 3.2496,
      "step": 27209
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8097009658813477,
      "learning_rate": 0.0005795880576920324,
      "loss": 3.3606,
      "step": 27210
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7362045049667358,
      "learning_rate": 0.000579586574589303,
      "loss": 3.1111,
      "step": 27211
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.5696163177490234,
      "learning_rate": 0.0005795850914345934,
      "loss": 2.9693,
      "step": 27212
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4559624195098877,
      "learning_rate": 0.0005795836082279034,
      "loss": 2.8853,
      "step": 27213
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4515002965927124,
      "learning_rate": 0.0005795821249692337,
      "loss": 3.1056,
      "step": 27214
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8071701526641846,
      "learning_rate": 0.0005795806416585843,
      "loss": 3.0298,
      "step": 27215
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.2918028831481934,
      "learning_rate": 0.0005795791582959558,
      "loss": 3.2003,
      "step": 27216
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3678663969039917,
      "learning_rate": 0.000579577674881348,
      "loss": 3.2421,
      "step": 27217
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.470029592514038,
      "learning_rate": 0.0005795761914147616,
      "loss": 2.9825,
      "step": 27218
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5793434381484985,
      "learning_rate": 0.0005795747078961965,
      "loss": 3.2812,
      "step": 27219
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7041492462158203,
      "learning_rate": 0.0005795732243256534,
      "loss": 2.9363,
      "step": 27220
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.952627420425415,
      "learning_rate": 0.0005795717407031323,
      "loss": 3.0065,
      "step": 27221
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.49678635597229,
      "learning_rate": 0.0005795702570286335,
      "loss": 3.1356,
      "step": 27222
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.2751541137695312,
      "learning_rate": 0.0005795687733021573,
      "loss": 3.0112,
      "step": 27223
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0844650268554688,
      "learning_rate": 0.0005795672895237041,
      "loss": 3.2702,
      "step": 27224
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8046817779541016,
      "learning_rate": 0.0005795658056932739,
      "loss": 3.0278,
      "step": 27225
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8452870845794678,
      "learning_rate": 0.0005795643218108672,
      "loss": 3.0573,
      "step": 27226
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6053210496902466,
      "learning_rate": 0.0005795628378764843,
      "loss": 3.4494,
      "step": 27227
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.855634331703186,
      "learning_rate": 0.0005795613538901252,
      "loss": 3.0306,
      "step": 27228
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3130356073379517,
      "learning_rate": 0.0005795598698517906,
      "loss": 2.9681,
      "step": 27229
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.93122398853302,
      "learning_rate": 0.0005795583857614805,
      "loss": 2.9491,
      "step": 27230
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5549005270004272,
      "learning_rate": 0.000579556901619195,
      "loss": 3.0928,
      "step": 27231
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5460313558578491,
      "learning_rate": 0.0005795554174249348,
      "loss": 3.1043,
      "step": 27232
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7937407493591309,
      "learning_rate": 0.0005795539331786999,
      "loss": 2.9914,
      "step": 27233
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.9745604991912842,
      "learning_rate": 0.0005795524488804906,
      "loss": 3.2414,
      "step": 27234
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4099698066711426,
      "learning_rate": 0.0005795509645303074,
      "loss": 3.3074,
      "step": 27235
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4388997554779053,
      "learning_rate": 0.0005795494801281503,
      "loss": 3.0729,
      "step": 27236
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5257362127304077,
      "learning_rate": 0.0005795479956740196,
      "loss": 3.2976,
      "step": 27237
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.6054044961929321,
      "learning_rate": 0.0005795465111679157,
      "loss": 2.8918,
      "step": 27238
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8307604789733887,
      "learning_rate": 0.0005795450266098389,
      "loss": 3.0028,
      "step": 27239
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.5822021961212158,
      "learning_rate": 0.0005795435419997894,
      "loss": 3.0647,
      "step": 27240
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8814724683761597,
      "learning_rate": 0.0005795420573377674,
      "loss": 3.0589,
      "step": 27241
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8117377758026123,
      "learning_rate": 0.0005795405726237733,
      "loss": 3.179,
      "step": 27242
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3571676015853882,
      "learning_rate": 0.0005795390878578072,
      "loss": 3.0704,
      "step": 27243
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0136313438415527,
      "learning_rate": 0.0005795376030398696,
      "loss": 3.0898,
      "step": 27244
    },
    {
      "epoch": 0.35,
      "grad_norm": 3.3632311820983887,
      "learning_rate": 0.0005795361181699608,
      "loss": 2.9333,
      "step": 27245
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.2256479263305664,
      "learning_rate": 0.0005795346332480809,
      "loss": 3.1666,
      "step": 27246
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8453280925750732,
      "learning_rate": 0.0005795331482742302,
      "loss": 3.1976,
      "step": 27247
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.77455997467041,
      "learning_rate": 0.000579531663248409,
      "loss": 3.1349,
      "step": 27248
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.59391450881958,
      "learning_rate": 0.0005795301781706176,
      "loss": 2.9461,
      "step": 27249
    },
    {
      "epoch": 0.35,
      "grad_norm": 3.0659327507019043,
      "learning_rate": 0.0005795286930408562,
      "loss": 3.257,
      "step": 27250
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.8831400871276855,
      "learning_rate": 0.0005795272078591251,
      "loss": 3.173,
      "step": 27251
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0737171173095703,
      "learning_rate": 0.0005795257226254248,
      "loss": 3.0892,
      "step": 27252
    },
    {
      "epoch": 0.35,
      "grad_norm": 3.111083745956421,
      "learning_rate": 0.0005795242373397553,
      "loss": 2.9285,
      "step": 27253
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.0565898418426514,
      "learning_rate": 0.0005795227520021169,
      "loss": 3.123,
      "step": 27254
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.4404401779174805,
      "learning_rate": 0.0005795212666125101,
      "loss": 3.0199,
      "step": 27255
    },
    {
      "epoch": 0.35,
      "grad_norm": 2.3402554988861084,
      "learning_rate": 0.0005795197811709349,
      "loss": 3.152,
      "step": 27256
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.683154582977295,
      "learning_rate": 0.0005795182956773916,
      "loss": 2.7984,
      "step": 27257
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.425546407699585,
      "learning_rate": 0.0005795168101318808,
      "loss": 3.0693,
      "step": 27258
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.4944273233413696,
      "learning_rate": 0.0005795153245344025,
      "loss": 2.751,
      "step": 27259
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.3724939823150635,
      "learning_rate": 0.000579513838884957,
      "loss": 3.0182,
      "step": 27260
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.50215482711792,
      "learning_rate": 0.0005795123531835446,
      "loss": 3.0144,
      "step": 27261
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.7847142219543457,
      "learning_rate": 0.0005795108674301656,
      "loss": 3.1328,
      "step": 27262
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.526401400566101,
      "learning_rate": 0.0005795093816248201,
      "loss": 3.1145,
      "step": 27263
    },
    {
      "epoch": 0.35,
      "grad_norm": 1.420085072517395,
      "learning_rate": 0.0005795078957675087,
      "loss": 3.1028,
      "step": 27264
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5541996955871582,
      "learning_rate": 0.0005795064098582314,
      "loss": 3.3585,
      "step": 27265
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2333478927612305,
      "learning_rate": 0.0005795049238969886,
      "loss": 3.2134,
      "step": 27266
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.597025752067566,
      "learning_rate": 0.0005795034378837805,
      "loss": 2.9162,
      "step": 27267
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3460665941238403,
      "learning_rate": 0.0005795019518186076,
      "loss": 2.9613,
      "step": 27268
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4609891176223755,
      "learning_rate": 0.0005795004657014699,
      "loss": 2.7929,
      "step": 27269
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4277546405792236,
      "learning_rate": 0.0005794989795323679,
      "loss": 2.9954,
      "step": 27270
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4905718564987183,
      "learning_rate": 0.0005794974933113016,
      "loss": 3.2343,
      "step": 27271
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.454404354095459,
      "learning_rate": 0.0005794960070382716,
      "loss": 3.0065,
      "step": 27272
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4432989358901978,
      "learning_rate": 0.0005794945207132779,
      "loss": 3.0733,
      "step": 27273
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4788777828216553,
      "learning_rate": 0.0005794930343363209,
      "loss": 3.0122,
      "step": 27274
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7699285745620728,
      "learning_rate": 0.0005794915479074009,
      "loss": 3.2562,
      "step": 27275
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.983527183532715,
      "learning_rate": 0.0005794900614265182,
      "loss": 3.0787,
      "step": 27276
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4690524339675903,
      "learning_rate": 0.0005794885748936729,
      "loss": 3.119,
      "step": 27277
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.546647310256958,
      "learning_rate": 0.0005794870883088656,
      "loss": 3.3117,
      "step": 27278
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3695083856582642,
      "learning_rate": 0.0005794856016720961,
      "loss": 3.1436,
      "step": 27279
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3640543222427368,
      "learning_rate": 0.0005794841149833652,
      "loss": 3.1382,
      "step": 27280
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3889553546905518,
      "learning_rate": 0.0005794826282426728,
      "loss": 2.9304,
      "step": 27281
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4627619981765747,
      "learning_rate": 0.0005794811414500192,
      "loss": 3.1993,
      "step": 27282
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7947256565093994,
      "learning_rate": 0.000579479654605405,
      "loss": 2.9694,
      "step": 27283
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7651125192642212,
      "learning_rate": 0.0005794781677088301,
      "loss": 3.0238,
      "step": 27284
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.242628574371338,
      "learning_rate": 0.000579476680760295,
      "loss": 3.2431,
      "step": 27285
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.2512519359588623,
      "learning_rate": 0.0005794751937598,
      "loss": 3.2271,
      "step": 27286
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7439650297164917,
      "learning_rate": 0.0005794737067073452,
      "loss": 3.1035,
      "step": 27287
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5844308137893677,
      "learning_rate": 0.0005794722196029309,
      "loss": 3.2329,
      "step": 27288
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1874048709869385,
      "learning_rate": 0.0005794707324465575,
      "loss": 3.4455,
      "step": 27289
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.391974687576294,
      "learning_rate": 0.0005794692452382252,
      "loss": 3.149,
      "step": 27290
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.341475009918213,
      "learning_rate": 0.0005794677579779343,
      "loss": 3.1489,
      "step": 27291
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.0773863792419434,
      "learning_rate": 0.000579466270665685,
      "loss": 2.6997,
      "step": 27292
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4451026916503906,
      "learning_rate": 0.0005794647833014777,
      "loss": 3.1152,
      "step": 27293
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5955411195755005,
      "learning_rate": 0.0005794632958853127,
      "loss": 3.3068,
      "step": 27294
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4219475984573364,
      "learning_rate": 0.00057946180841719,
      "loss": 3.1994,
      "step": 27295
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1153051853179932,
      "learning_rate": 0.0005794603208971103,
      "loss": 2.7715,
      "step": 27296
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4063763618469238,
      "learning_rate": 0.0005794588333250736,
      "loss": 3.2991,
      "step": 27297
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6435834169387817,
      "learning_rate": 0.0005794573457010802,
      "loss": 3.0767,
      "step": 27298
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5805660486221313,
      "learning_rate": 0.0005794558580251303,
      "loss": 3.1276,
      "step": 27299
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9991517066955566,
      "learning_rate": 0.0005794543702972244,
      "loss": 3.1243,
      "step": 27300
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7660232782363892,
      "learning_rate": 0.0005794528825173626,
      "loss": 3.1608,
      "step": 27301
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4942275285720825,
      "learning_rate": 0.0005794513946855453,
      "loss": 3.077,
      "step": 27302
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6316659450531006,
      "learning_rate": 0.0005794499068017726,
      "loss": 3.3446,
      "step": 27303
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.924610137939453,
      "learning_rate": 0.000579448418866045,
      "loss": 2.9498,
      "step": 27304
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5902460813522339,
      "learning_rate": 0.0005794469308783626,
      "loss": 3.0493,
      "step": 27305
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6186127662658691,
      "learning_rate": 0.0005794454428387258,
      "loss": 3.0259,
      "step": 27306
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.696833848953247,
      "learning_rate": 0.0005794439547471348,
      "loss": 3.135,
      "step": 27307
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5945887565612793,
      "learning_rate": 0.0005794424666035899,
      "loss": 3.0561,
      "step": 27308
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3155368566513062,
      "learning_rate": 0.0005794409784080913,
      "loss": 3.1983,
      "step": 27309
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6626136302947998,
      "learning_rate": 0.0005794394901606394,
      "loss": 3.1514,
      "step": 27310
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5544945001602173,
      "learning_rate": 0.0005794380018612344,
      "loss": 2.9159,
      "step": 27311
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5728522539138794,
      "learning_rate": 0.0005794365135098766,
      "loss": 3.1338,
      "step": 27312
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.595544695854187,
      "learning_rate": 0.0005794350251065663,
      "loss": 3.0418,
      "step": 27313
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.823665738105774,
      "learning_rate": 0.0005794335366513038,
      "loss": 3.2067,
      "step": 27314
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.303954839706421,
      "learning_rate": 0.0005794320481440893,
      "loss": 3.0314,
      "step": 27315
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6962518692016602,
      "learning_rate": 0.000579430559584923,
      "loss": 3.1146,
      "step": 27316
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7931170463562012,
      "learning_rate": 0.0005794290709738054,
      "loss": 3.0083,
      "step": 27317
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.618914008140564,
      "learning_rate": 0.0005794275823107366,
      "loss": 3.052,
      "step": 27318
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.465030312538147,
      "learning_rate": 0.000579426093595717,
      "loss": 3.1538,
      "step": 27319
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.669722318649292,
      "learning_rate": 0.0005794246048287468,
      "loss": 3.0205,
      "step": 27320
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.521763563156128,
      "learning_rate": 0.0005794231160098262,
      "loss": 3.1201,
      "step": 27321
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6718320846557617,
      "learning_rate": 0.0005794216271389557,
      "loss": 3.0606,
      "step": 27322
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3970667123794556,
      "learning_rate": 0.0005794201382161353,
      "loss": 3.2868,
      "step": 27323
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.540758728981018,
      "learning_rate": 0.0005794186492413656,
      "loss": 3.0343,
      "step": 27324
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3238885402679443,
      "learning_rate": 0.0005794171602146465,
      "loss": 2.9098,
      "step": 27325
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5494606494903564,
      "learning_rate": 0.0005794156711359787,
      "loss": 3.052,
      "step": 27326
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.558443546295166,
      "learning_rate": 0.0005794141820053621,
      "loss": 2.9068,
      "step": 27327
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3612219095230103,
      "learning_rate": 0.0005794126928227971,
      "loss": 3.2643,
      "step": 27328
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5127270221710205,
      "learning_rate": 0.0005794112035882841,
      "loss": 3.0947,
      "step": 27329
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4632412195205688,
      "learning_rate": 0.0005794097143018233,
      "loss": 3.1901,
      "step": 27330
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.460065245628357,
      "learning_rate": 0.0005794082249634149,
      "loss": 3.0368,
      "step": 27331
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4782510995864868,
      "learning_rate": 0.0005794067355730593,
      "loss": 2.8642,
      "step": 27332
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.119950532913208,
      "learning_rate": 0.0005794052461307566,
      "loss": 3.0755,
      "step": 27333
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.638344645500183,
      "learning_rate": 0.0005794037566365073,
      "loss": 3.2295,
      "step": 27334
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.4005205631256104,
      "learning_rate": 0.0005794022670903115,
      "loss": 2.9342,
      "step": 27335
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5652564764022827,
      "learning_rate": 0.0005794007774921695,
      "loss": 2.9176,
      "step": 27336
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5999233722686768,
      "learning_rate": 0.0005793992878420817,
      "loss": 3.0758,
      "step": 27337
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8021178245544434,
      "learning_rate": 0.0005793977981400484,
      "loss": 2.9902,
      "step": 27338
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3416695594787598,
      "learning_rate": 0.0005793963083860696,
      "loss": 3.2408,
      "step": 27339
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3215620517730713,
      "learning_rate": 0.0005793948185801458,
      "loss": 3.2864,
      "step": 27340
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3086152076721191,
      "learning_rate": 0.0005793933287222774,
      "loss": 2.9943,
      "step": 27341
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.538925290107727,
      "learning_rate": 0.0005793918388124642,
      "loss": 2.9673,
      "step": 27342
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4694230556488037,
      "learning_rate": 0.0005793903488507069,
      "loss": 3.2047,
      "step": 27343
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.389861583709717,
      "learning_rate": 0.0005793888588370058,
      "loss": 3.1019,
      "step": 27344
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.410369634628296,
      "learning_rate": 0.0005793873687713609,
      "loss": 2.8268,
      "step": 27345
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7698622941970825,
      "learning_rate": 0.0005793858786537726,
      "loss": 3.2017,
      "step": 27346
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.2849013805389404,
      "learning_rate": 0.0005793843884842413,
      "loss": 2.9343,
      "step": 27347
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.297027349472046,
      "learning_rate": 0.000579382898262767,
      "loss": 3.1778,
      "step": 27348
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4149765968322754,
      "learning_rate": 0.0005793814079893502,
      "loss": 3.0128,
      "step": 27349
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5971553325653076,
      "learning_rate": 0.0005793799176639913,
      "loss": 3.0814,
      "step": 27350
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3042373657226562,
      "learning_rate": 0.0005793784272866903,
      "loss": 2.8842,
      "step": 27351
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5600779056549072,
      "learning_rate": 0.0005793769368574476,
      "loss": 3.0829,
      "step": 27352
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6946097612380981,
      "learning_rate": 0.0005793754463762633,
      "loss": 3.0798,
      "step": 27353
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6188809871673584,
      "learning_rate": 0.0005793739558431379,
      "loss": 2.9176,
      "step": 27354
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3279284238815308,
      "learning_rate": 0.0005793724652580717,
      "loss": 2.7533,
      "step": 27355
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4445104598999023,
      "learning_rate": 0.0005793709746210648,
      "loss": 2.9184,
      "step": 27356
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9178872108459473,
      "learning_rate": 0.0005793694839321176,
      "loss": 3.0212,
      "step": 27357
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3645604848861694,
      "learning_rate": 0.0005793679931912303,
      "loss": 3.1767,
      "step": 27358
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5819483995437622,
      "learning_rate": 0.0005793665023984033,
      "loss": 2.8959,
      "step": 27359
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3692395687103271,
      "learning_rate": 0.0005793650115536367,
      "loss": 3.2221,
      "step": 27360
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5441932678222656,
      "learning_rate": 0.0005793635206569309,
      "loss": 2.8764,
      "step": 27361
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.481079339981079,
      "learning_rate": 0.0005793620297082862,
      "loss": 3.0851,
      "step": 27362
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.426905632019043,
      "learning_rate": 0.0005793605387077027,
      "loss": 3.2015,
      "step": 27363
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.233738422393799,
      "learning_rate": 0.0005793590476551809,
      "loss": 2.8142,
      "step": 27364
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8303123712539673,
      "learning_rate": 0.000579357556550721,
      "loss": 3.2062,
      "step": 27365
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9691526889801025,
      "learning_rate": 0.0005793560653943233,
      "loss": 3.1114,
      "step": 27366
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.868512749671936,
      "learning_rate": 0.0005793545741859878,
      "loss": 2.9646,
      "step": 27367
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.129460334777832,
      "learning_rate": 0.0005793530829257152,
      "loss": 3.1723,
      "step": 27368
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5495814085006714,
      "learning_rate": 0.0005793515916135057,
      "loss": 2.9623,
      "step": 27369
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.5830459594726562,
      "learning_rate": 0.0005793501002493592,
      "loss": 2.7603,
      "step": 27370
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1269659996032715,
      "learning_rate": 0.0005793486088332764,
      "loss": 2.8987,
      "step": 27371
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5288949012756348,
      "learning_rate": 0.0005793471173652574,
      "loss": 3.0153,
      "step": 27372
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5934219360351562,
      "learning_rate": 0.0005793456258453025,
      "loss": 2.893,
      "step": 27373
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7676692008972168,
      "learning_rate": 0.000579344134273412,
      "loss": 2.9985,
      "step": 27374
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3400312662124634,
      "learning_rate": 0.000579342642649586,
      "loss": 3.1279,
      "step": 27375
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6429834365844727,
      "learning_rate": 0.0005793411509738251,
      "loss": 3.0263,
      "step": 27376
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.3253889083862305,
      "learning_rate": 0.0005793396592461294,
      "loss": 3.0809,
      "step": 27377
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4458647966384888,
      "learning_rate": 0.000579338167466499,
      "loss": 3.1388,
      "step": 27378
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6216715574264526,
      "learning_rate": 0.0005793366756349346,
      "loss": 2.908,
      "step": 27379
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.924267053604126,
      "learning_rate": 0.0005793351837514361,
      "loss": 2.8721,
      "step": 27380
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5654374361038208,
      "learning_rate": 0.0005793336918160039,
      "loss": 3.1649,
      "step": 27381
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.343017578125,
      "learning_rate": 0.0005793321998286383,
      "loss": 3.2218,
      "step": 27382
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1130542755126953,
      "learning_rate": 0.0005793307077893397,
      "loss": 3.1211,
      "step": 27383
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5841373205184937,
      "learning_rate": 0.0005793292156981082,
      "loss": 2.8562,
      "step": 27384
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9081860780715942,
      "learning_rate": 0.0005793277235549441,
      "loss": 3.0703,
      "step": 27385
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4544878005981445,
      "learning_rate": 0.0005793262313598476,
      "loss": 3.2601,
      "step": 27386
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.1108198165893555,
      "learning_rate": 0.0005793247391128192,
      "loss": 2.9744,
      "step": 27387
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5741275548934937,
      "learning_rate": 0.000579323246813859,
      "loss": 3.2234,
      "step": 27388
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8071775436401367,
      "learning_rate": 0.0005793217544629674,
      "loss": 3.1015,
      "step": 27389
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.430504560470581,
      "learning_rate": 0.0005793202620601447,
      "loss": 3.1489,
      "step": 27390
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8575773239135742,
      "learning_rate": 0.0005793187696053908,
      "loss": 2.9884,
      "step": 27391
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.628711462020874,
      "learning_rate": 0.0005793172770987065,
      "loss": 2.9217,
      "step": 27392
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6372095346450806,
      "learning_rate": 0.0005793157845400918,
      "loss": 2.7405,
      "step": 27393
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6247341632843018,
      "learning_rate": 0.000579314291929547,
      "loss": 3.0318,
      "step": 27394
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.627662181854248,
      "learning_rate": 0.0005793127992670725,
      "loss": 3.0142,
      "step": 27395
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2223961353302002,
      "learning_rate": 0.0005793113065526683,
      "loss": 3.0901,
      "step": 27396
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7031110525131226,
      "learning_rate": 0.0005793098137863349,
      "loss": 2.8868,
      "step": 27397
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.030097484588623,
      "learning_rate": 0.0005793083209680727,
      "loss": 3.134,
      "step": 27398
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3593881130218506,
      "learning_rate": 0.0005793068280978816,
      "loss": 3.0723,
      "step": 27399
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3882532119750977,
      "learning_rate": 0.0005793053351757622,
      "loss": 3.1785,
      "step": 27400
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.028127431869507,
      "learning_rate": 0.0005793038422017146,
      "loss": 3.055,
      "step": 27401
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.839089035987854,
      "learning_rate": 0.0005793023491757393,
      "loss": 3.0917,
      "step": 27402
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4022085666656494,
      "learning_rate": 0.0005793008560978362,
      "loss": 3.1367,
      "step": 27403
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7873400449752808,
      "learning_rate": 0.0005792993629680059,
      "loss": 2.946,
      "step": 27404
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4292546510696411,
      "learning_rate": 0.0005792978697862486,
      "loss": 3.1941,
      "step": 27405
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5453827381134033,
      "learning_rate": 0.0005792963765525645,
      "loss": 3.1558,
      "step": 27406
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.3395094871520996,
      "learning_rate": 0.000579294883266954,
      "loss": 3.195,
      "step": 27407
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6520235538482666,
      "learning_rate": 0.0005792933899294172,
      "loss": 3.1992,
      "step": 27408
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6839075088500977,
      "learning_rate": 0.0005792918965399547,
      "loss": 3.3489,
      "step": 27409
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7483406066894531,
      "learning_rate": 0.0005792904030985665,
      "loss": 3.0406,
      "step": 27410
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.705528974533081,
      "learning_rate": 0.0005792889096052527,
      "loss": 3.1378,
      "step": 27411
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3438514471054077,
      "learning_rate": 0.0005792874160600141,
      "loss": 3.0132,
      "step": 27412
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3658506870269775,
      "learning_rate": 0.0005792859224628505,
      "loss": 2.9898,
      "step": 27413
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.3427343368530273,
      "learning_rate": 0.0005792844288137625,
      "loss": 3.1797,
      "step": 27414
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2372552156448364,
      "learning_rate": 0.0005792829351127502,
      "loss": 3.0325,
      "step": 27415
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.367794156074524,
      "learning_rate": 0.0005792814413598139,
      "loss": 2.8055,
      "step": 27416
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5219197273254395,
      "learning_rate": 0.000579279947554954,
      "loss": 2.9233,
      "step": 27417
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.280379056930542,
      "learning_rate": 0.0005792784536981706,
      "loss": 3.1499,
      "step": 27418
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.292516827583313,
      "learning_rate": 0.0005792769597894641,
      "loss": 3.1502,
      "step": 27419
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4090949296951294,
      "learning_rate": 0.0005792754658288348,
      "loss": 3.1731,
      "step": 27420
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5137648582458496,
      "learning_rate": 0.0005792739718162829,
      "loss": 3.3542,
      "step": 27421
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4716308116912842,
      "learning_rate": 0.0005792724777518085,
      "loss": 3.3279,
      "step": 27422
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.585166335105896,
      "learning_rate": 0.0005792709836354124,
      "loss": 2.8684,
      "step": 27423
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.433039665222168,
      "learning_rate": 0.0005792694894670943,
      "loss": 3.1666,
      "step": 27424
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4660449028015137,
      "learning_rate": 0.0005792679952468548,
      "loss": 3.2325,
      "step": 27425
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4399417638778687,
      "learning_rate": 0.0005792665009746942,
      "loss": 3.0886,
      "step": 27426
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4937036037445068,
      "learning_rate": 0.0005792650066506126,
      "loss": 2.9676,
      "step": 27427
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8316563367843628,
      "learning_rate": 0.0005792635122746103,
      "loss": 3.0093,
      "step": 27428
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3760453462600708,
      "learning_rate": 0.0005792620178466878,
      "loss": 3.3833,
      "step": 27429
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.9305946826934814,
      "learning_rate": 0.0005792605233668451,
      "loss": 3.2642,
      "step": 27430
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3955121040344238,
      "learning_rate": 0.0005792590288350827,
      "loss": 3.1536,
      "step": 27431
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.394709825515747,
      "learning_rate": 0.0005792575342514006,
      "loss": 3.0227,
      "step": 27432
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5462933778762817,
      "learning_rate": 0.0005792560396157994,
      "loss": 3.2207,
      "step": 27433
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.828952670097351,
      "learning_rate": 0.0005792545449282792,
      "loss": 3.0394,
      "step": 27434
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6604220867156982,
      "learning_rate": 0.0005792530501888403,
      "loss": 3.0566,
      "step": 27435
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8028234243392944,
      "learning_rate": 0.000579251555397483,
      "loss": 2.8412,
      "step": 27436
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6589133739471436,
      "learning_rate": 0.0005792500605542076,
      "loss": 3.0065,
      "step": 27437
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.346012592315674,
      "learning_rate": 0.0005792485656590143,
      "loss": 2.984,
      "step": 27438
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5776394605636597,
      "learning_rate": 0.0005792470707119033,
      "loss": 3.0158,
      "step": 27439
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6911695003509521,
      "learning_rate": 0.0005792455757128752,
      "loss": 3.246,
      "step": 27440
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3724744319915771,
      "learning_rate": 0.00057924408066193,
      "loss": 2.9809,
      "step": 27441
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9781888723373413,
      "learning_rate": 0.000579242585559068,
      "loss": 3.0325,
      "step": 27442
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.082083225250244,
      "learning_rate": 0.0005792410904042896,
      "loss": 2.9859,
      "step": 27443
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5084943771362305,
      "learning_rate": 0.0005792395951975949,
      "loss": 2.7133,
      "step": 27444
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.769771933555603,
      "learning_rate": 0.0005792380999389844,
      "loss": 3.05,
      "step": 27445
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1313207149505615,
      "learning_rate": 0.0005792366046284583,
      "loss": 2.9239,
      "step": 27446
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.849967122077942,
      "learning_rate": 0.0005792351092660168,
      "loss": 3.0014,
      "step": 27447
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.729995846748352,
      "learning_rate": 0.0005792336138516601,
      "loss": 3.0217,
      "step": 27448
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.3336191177368164,
      "learning_rate": 0.0005792321183853888,
      "loss": 3.143,
      "step": 27449
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4339512586593628,
      "learning_rate": 0.0005792306228672028,
      "loss": 3.0077,
      "step": 27450
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.6734118461608887,
      "learning_rate": 0.0005792291272971027,
      "loss": 3.1288,
      "step": 27451
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.4438724517822266,
      "learning_rate": 0.0005792276316750886,
      "loss": 3.0287,
      "step": 27452
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4469401836395264,
      "learning_rate": 0.0005792261360011607,
      "loss": 3.0002,
      "step": 27453
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.477103590965271,
      "learning_rate": 0.0005792246402753196,
      "loss": 3.1555,
      "step": 27454
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.8915979862213135,
      "learning_rate": 0.0005792231444975652,
      "loss": 2.7499,
      "step": 27455
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5650862455368042,
      "learning_rate": 0.000579221648667898,
      "loss": 3.129,
      "step": 27456
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.800174355506897,
      "learning_rate": 0.0005792201527863182,
      "loss": 3.0695,
      "step": 27457
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.260585069656372,
      "learning_rate": 0.0005792186568528261,
      "loss": 3.1355,
      "step": 27458
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1890273094177246,
      "learning_rate": 0.000579217160867422,
      "loss": 3.2849,
      "step": 27459
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3675581216812134,
      "learning_rate": 0.0005792156648301062,
      "loss": 2.9302,
      "step": 27460
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.2664756774902344,
      "learning_rate": 0.0005792141687408789,
      "loss": 3.0404,
      "step": 27461
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.6228857040405273,
      "learning_rate": 0.0005792126725997404,
      "loss": 2.9717,
      "step": 27462
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5556883811950684,
      "learning_rate": 0.0005792111764066911,
      "loss": 2.9872,
      "step": 27463
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.545435905456543,
      "learning_rate": 0.000579209680161731,
      "loss": 3.2102,
      "step": 27464
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3928248882293701,
      "learning_rate": 0.0005792081838648606,
      "loss": 2.9606,
      "step": 27465
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.1670432090759277,
      "learning_rate": 0.0005792066875160802,
      "loss": 2.9015,
      "step": 27466
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4630930423736572,
      "learning_rate": 0.00057920519111539,
      "loss": 3.0766,
      "step": 27467
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6878961324691772,
      "learning_rate": 0.0005792036946627902,
      "loss": 3.0318,
      "step": 27468
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6964764595031738,
      "learning_rate": 0.0005792021981582812,
      "loss": 2.9757,
      "step": 27469
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9737274646759033,
      "learning_rate": 0.0005792007016018633,
      "loss": 3.2138,
      "step": 27470
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.3495266437530518,
      "learning_rate": 0.0005791992049935367,
      "loss": 2.8586,
      "step": 27471
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.603936791419983,
      "learning_rate": 0.0005791977083333017,
      "loss": 3.0231,
      "step": 27472
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3583906888961792,
      "learning_rate": 0.0005791962116211585,
      "loss": 3.0255,
      "step": 27473
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.998136043548584,
      "learning_rate": 0.0005791947148571074,
      "loss": 2.9124,
      "step": 27474
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.8452701568603516,
      "learning_rate": 0.0005791932180411489,
      "loss": 3.18,
      "step": 27475
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3696116209030151,
      "learning_rate": 0.000579191721173283,
      "loss": 3.1826,
      "step": 27476
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.2021007537841797,
      "learning_rate": 0.0005791902242535102,
      "loss": 2.9001,
      "step": 27477
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7784048318862915,
      "learning_rate": 0.0005791887272818306,
      "loss": 3.0668,
      "step": 27478
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6309453248977661,
      "learning_rate": 0.0005791872302582445,
      "loss": 3.2251,
      "step": 27479
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.3203341960906982,
      "learning_rate": 0.0005791857331827522,
      "loss": 3.0908,
      "step": 27480
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3946179151535034,
      "learning_rate": 0.0005791842360553542,
      "loss": 3.0178,
      "step": 27481
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.304560661315918,
      "learning_rate": 0.0005791827388760504,
      "loss": 3.1244,
      "step": 27482
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3960819244384766,
      "learning_rate": 0.0005791812416448413,
      "loss": 2.9135,
      "step": 27483
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.0535829067230225,
      "learning_rate": 0.0005791797443617272,
      "loss": 3.0753,
      "step": 27484
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8127790689468384,
      "learning_rate": 0.0005791782470267083,
      "loss": 3.0455,
      "step": 27485
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7297930717468262,
      "learning_rate": 0.0005791767496397847,
      "loss": 3.2016,
      "step": 27486
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.4675941467285156,
      "learning_rate": 0.0005791752522009571,
      "loss": 3.0024,
      "step": 27487
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7764853239059448,
      "learning_rate": 0.0005791737547102255,
      "loss": 3.2229,
      "step": 27488
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3856488466262817,
      "learning_rate": 0.0005791722571675903,
      "loss": 3.1455,
      "step": 27489
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.644068956375122,
      "learning_rate": 0.0005791707595730515,
      "loss": 3.0469,
      "step": 27490
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8316892385482788,
      "learning_rate": 0.0005791692619266098,
      "loss": 3.1982,
      "step": 27491
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.816865086555481,
      "learning_rate": 0.0005791677642282651,
      "loss": 3.1561,
      "step": 27492
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8720113039016724,
      "learning_rate": 0.0005791662664780179,
      "loss": 3.0778,
      "step": 27493
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.855800747871399,
      "learning_rate": 0.0005791647686758685,
      "loss": 3.3215,
      "step": 27494
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4606002569198608,
      "learning_rate": 0.000579163270821817,
      "loss": 3.0883,
      "step": 27495
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5238192081451416,
      "learning_rate": 0.0005791617729158637,
      "loss": 3.234,
      "step": 27496
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9613820314407349,
      "learning_rate": 0.0005791602749580092,
      "loss": 2.9418,
      "step": 27497
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.533419370651245,
      "learning_rate": 0.0005791587769482533,
      "loss": 3.222,
      "step": 27498
    },
    {
      "epoch": 0.36,
      "grad_norm": 5.744696617126465,
      "learning_rate": 0.0005791572788865967,
      "loss": 2.9059,
      "step": 27499
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.741623044013977,
      "learning_rate": 0.0005791557807730394,
      "loss": 2.9935,
      "step": 27500
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.022925853729248,
      "learning_rate": 0.0005791542826075818,
      "loss": 3.1474,
      "step": 27501
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4314919710159302,
      "learning_rate": 0.000579152784390224,
      "loss": 3.1009,
      "step": 27502
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9366999864578247,
      "learning_rate": 0.0005791512861209666,
      "loss": 2.8946,
      "step": 27503
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.6171109676361084,
      "learning_rate": 0.0005791497877998097,
      "loss": 3.0626,
      "step": 27504
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2540732622146606,
      "learning_rate": 0.0005791482894267535,
      "loss": 2.9362,
      "step": 27505
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.617079257965088,
      "learning_rate": 0.0005791467910017985,
      "loss": 2.936,
      "step": 27506
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.17685866355896,
      "learning_rate": 0.0005791452925249447,
      "loss": 2.8196,
      "step": 27507
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2419692277908325,
      "learning_rate": 0.0005791437939961926,
      "loss": 2.92,
      "step": 27508
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.371826410293579,
      "learning_rate": 0.0005791422954155423,
      "loss": 3.0211,
      "step": 27509
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6172020435333252,
      "learning_rate": 0.0005791407967829944,
      "loss": 3.3053,
      "step": 27510
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5440200567245483,
      "learning_rate": 0.0005791392980985488,
      "loss": 3.1416,
      "step": 27511
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3627914190292358,
      "learning_rate": 0.0005791377993622058,
      "loss": 3.2155,
      "step": 27512
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4107599258422852,
      "learning_rate": 0.0005791363005739661,
      "loss": 2.9246,
      "step": 27513
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3744685649871826,
      "learning_rate": 0.0005791348017338294,
      "loss": 2.9901,
      "step": 27514
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9396629333496094,
      "learning_rate": 0.0005791333028417965,
      "loss": 2.9199,
      "step": 27515
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6175261735916138,
      "learning_rate": 0.0005791318038978674,
      "loss": 3.0697,
      "step": 27516
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6252788305282593,
      "learning_rate": 0.0005791303049020424,
      "loss": 2.7843,
      "step": 27517
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5484492778778076,
      "learning_rate": 0.0005791288058543218,
      "loss": 3.2897,
      "step": 27518
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.52275812625885,
      "learning_rate": 0.0005791273067547059,
      "loss": 3.3023,
      "step": 27519
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7933354377746582,
      "learning_rate": 0.0005791258076031949,
      "loss": 2.8795,
      "step": 27520
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.4637818336486816,
      "learning_rate": 0.0005791243083997893,
      "loss": 3.2389,
      "step": 27521
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1565194129943848,
      "learning_rate": 0.0005791228091444891,
      "loss": 3.0518,
      "step": 27522
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.097222328186035,
      "learning_rate": 0.0005791213098372946,
      "loss": 2.8202,
      "step": 27523
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3964353799819946,
      "learning_rate": 0.0005791198104782064,
      "loss": 3.0228,
      "step": 27524
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4669371843338013,
      "learning_rate": 0.0005791183110672245,
      "loss": 3.1349,
      "step": 27525
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.531874179840088,
      "learning_rate": 0.0005791168116043491,
      "loss": 3.0859,
      "step": 27526
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8350368738174438,
      "learning_rate": 0.0005791153120895808,
      "loss": 3.0257,
      "step": 27527
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.724084496498108,
      "learning_rate": 0.0005791138125229195,
      "loss": 2.982,
      "step": 27528
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5085043907165527,
      "learning_rate": 0.0005791123129043658,
      "loss": 2.9087,
      "step": 27529
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.117133617401123,
      "learning_rate": 0.00057911081323392,
      "loss": 3.1871,
      "step": 27530
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7928824424743652,
      "learning_rate": 0.0005791093135115819,
      "loss": 2.953,
      "step": 27531
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1129448413848877,
      "learning_rate": 0.0005791078137373524,
      "loss": 2.9739,
      "step": 27532
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.684540867805481,
      "learning_rate": 0.0005791063139112313,
      "loss": 3.1439,
      "step": 27533
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5879865884780884,
      "learning_rate": 0.0005791048140332192,
      "loss": 2.9192,
      "step": 27534
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7308194637298584,
      "learning_rate": 0.0005791033141033161,
      "loss": 3.1725,
      "step": 27535
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7266790866851807,
      "learning_rate": 0.0005791018141215226,
      "loss": 3.3823,
      "step": 27536
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.407489776611328,
      "learning_rate": 0.0005791003140878387,
      "loss": 2.9417,
      "step": 27537
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.710595726966858,
      "learning_rate": 0.0005790988140022648,
      "loss": 3.2084,
      "step": 27538
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.530557870864868,
      "learning_rate": 0.0005790973138648011,
      "loss": 3.118,
      "step": 27539
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.275167226791382,
      "learning_rate": 0.000579095813675448,
      "loss": 2.9829,
      "step": 27540
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.0608909130096436,
      "learning_rate": 0.0005790943134342057,
      "loss": 2.9403,
      "step": 27541
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.0214109420776367,
      "learning_rate": 0.0005790928131410746,
      "loss": 3.1327,
      "step": 27542
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.076878070831299,
      "learning_rate": 0.0005790913127960547,
      "loss": 3.2088,
      "step": 27543
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.0085136890411377,
      "learning_rate": 0.0005790898123991466,
      "loss": 3.1366,
      "step": 27544
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4236317873001099,
      "learning_rate": 0.0005790883119503504,
      "loss": 3.0116,
      "step": 27545
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.3619349002838135,
      "learning_rate": 0.0005790868114496664,
      "loss": 3.07,
      "step": 27546
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.799462080001831,
      "learning_rate": 0.0005790853108970949,
      "loss": 3.0818,
      "step": 27547
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5601110458374023,
      "learning_rate": 0.0005790838102926361,
      "loss": 3.3431,
      "step": 27548
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6033076047897339,
      "learning_rate": 0.0005790823096362904,
      "loss": 3.2449,
      "step": 27549
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.3685004711151123,
      "learning_rate": 0.0005790808089280582,
      "loss": 2.7532,
      "step": 27550
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3699707984924316,
      "learning_rate": 0.0005790793081679394,
      "loss": 3.2065,
      "step": 27551
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.444422721862793,
      "learning_rate": 0.0005790778073559345,
      "loss": 3.2279,
      "step": 27552
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8899035453796387,
      "learning_rate": 0.0005790763064920439,
      "loss": 3.0824,
      "step": 27553
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4950830936431885,
      "learning_rate": 0.0005790748055762676,
      "loss": 3.0072,
      "step": 27554
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3151576519012451,
      "learning_rate": 0.0005790733046086061,
      "loss": 3.2814,
      "step": 27555
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1889703273773193,
      "learning_rate": 0.0005790718035890597,
      "loss": 3.012,
      "step": 27556
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.326399087905884,
      "learning_rate": 0.0005790703025176283,
      "loss": 3.1851,
      "step": 27557
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4938092231750488,
      "learning_rate": 0.0005790688013943126,
      "loss": 2.9696,
      "step": 27558
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.5296027660369873,
      "learning_rate": 0.0005790673002191129,
      "loss": 3.1746,
      "step": 27559
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.784393548965454,
      "learning_rate": 0.0005790657989920291,
      "loss": 3.1099,
      "step": 27560
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.922452449798584,
      "learning_rate": 0.0005790642977130619,
      "loss": 2.9987,
      "step": 27561
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5196095705032349,
      "learning_rate": 0.0005790627963822112,
      "loss": 3.0415,
      "step": 27562
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.3406331539154053,
      "learning_rate": 0.0005790612949994776,
      "loss": 3.192,
      "step": 27563
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.1463639736175537,
      "learning_rate": 0.0005790597935648611,
      "loss": 3.1405,
      "step": 27564
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.891847014427185,
      "learning_rate": 0.0005790582920783623,
      "loss": 3.2343,
      "step": 27565
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2762689590454102,
      "learning_rate": 0.0005790567905399811,
      "loss": 2.9969,
      "step": 27566
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4058585166931152,
      "learning_rate": 0.0005790552889497181,
      "loss": 3.0136,
      "step": 27567
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.898557424545288,
      "learning_rate": 0.0005790537873075735,
      "loss": 3.2219,
      "step": 27568
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.449209690093994,
      "learning_rate": 0.0005790522856135474,
      "loss": 3.5522,
      "step": 27569
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.3913533687591553,
      "learning_rate": 0.0005790507838676403,
      "loss": 3.2064,
      "step": 27570
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4448049068450928,
      "learning_rate": 0.0005790492820698523,
      "loss": 3.1312,
      "step": 27571
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.29262375831604,
      "learning_rate": 0.0005790477802201839,
      "loss": 3.2022,
      "step": 27572
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.021085262298584,
      "learning_rate": 0.0005790462783186351,
      "loss": 3.0697,
      "step": 27573
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6442487239837646,
      "learning_rate": 0.0005790447763652064,
      "loss": 3.3379,
      "step": 27574
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7974984645843506,
      "learning_rate": 0.000579043274359898,
      "loss": 3.0741,
      "step": 27575
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.747951626777649,
      "learning_rate": 0.0005790417723027103,
      "loss": 3.3394,
      "step": 27576
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.915785551071167,
      "learning_rate": 0.0005790402701936434,
      "loss": 2.9709,
      "step": 27577
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3742237091064453,
      "learning_rate": 0.0005790387680326976,
      "loss": 3.1568,
      "step": 27578
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6440569162368774,
      "learning_rate": 0.0005790372658198732,
      "loss": 3.3699,
      "step": 27579
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.2123303413391113,
      "learning_rate": 0.0005790357635551705,
      "loss": 3.0639,
      "step": 27580
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2959696054458618,
      "learning_rate": 0.0005790342612385899,
      "loss": 3.2973,
      "step": 27581
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8629339933395386,
      "learning_rate": 0.0005790327588701315,
      "loss": 3.1194,
      "step": 27582
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.7017483711242676,
      "learning_rate": 0.0005790312564497956,
      "loss": 3.2904,
      "step": 27583
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6397684812545776,
      "learning_rate": 0.0005790297539775825,
      "loss": 3.0892,
      "step": 27584
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7554214000701904,
      "learning_rate": 0.0005790282514534926,
      "loss": 2.8925,
      "step": 27585
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.37101411819458,
      "learning_rate": 0.0005790267488775261,
      "loss": 3.0615,
      "step": 27586
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4808375835418701,
      "learning_rate": 0.0005790252462496832,
      "loss": 3.1207,
      "step": 27587
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.614141821861267,
      "learning_rate": 0.0005790237435699643,
      "loss": 2.9945,
      "step": 27588
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.160524845123291,
      "learning_rate": 0.0005790222408383695,
      "loss": 3.1204,
      "step": 27589
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.0285327434539795,
      "learning_rate": 0.0005790207380548993,
      "loss": 3.08,
      "step": 27590
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6074519157409668,
      "learning_rate": 0.0005790192352195539,
      "loss": 3.1979,
      "step": 27591
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9871200323104858,
      "learning_rate": 0.0005790177323323334,
      "loss": 3.0089,
      "step": 27592
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.2339510917663574,
      "learning_rate": 0.0005790162293932384,
      "loss": 3.0831,
      "step": 27593
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3839532136917114,
      "learning_rate": 0.0005790147264022689,
      "loss": 3.0088,
      "step": 27594
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4795035123825073,
      "learning_rate": 0.0005790132233594253,
      "loss": 3.2596,
      "step": 27595
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4893773794174194,
      "learning_rate": 0.000579011720264708,
      "loss": 2.9791,
      "step": 27596
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9365335702896118,
      "learning_rate": 0.000579010217118117,
      "loss": 3.2458,
      "step": 27597
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4622917175292969,
      "learning_rate": 0.0005790087139196529,
      "loss": 3.2515,
      "step": 27598
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6423888206481934,
      "learning_rate": 0.0005790072106693156,
      "loss": 3.1045,
      "step": 27599
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9278148412704468,
      "learning_rate": 0.0005790057073671058,
      "loss": 2.9383,
      "step": 27600
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.806499719619751,
      "learning_rate": 0.0005790042040130234,
      "loss": 3.2568,
      "step": 27601
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2869833707809448,
      "learning_rate": 0.000579002700607069,
      "loss": 3.0989,
      "step": 27602
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9647012948989868,
      "learning_rate": 0.0005790011971492427,
      "loss": 2.9945,
      "step": 27603
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9606389999389648,
      "learning_rate": 0.0005789996936395447,
      "loss": 3.1512,
      "step": 27604
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1957569122314453,
      "learning_rate": 0.0005789981900779756,
      "loss": 3.0079,
      "step": 27605
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.529995322227478,
      "learning_rate": 0.0005789966864645353,
      "loss": 3.1453,
      "step": 27606
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.8446805477142334,
      "learning_rate": 0.0005789951827992242,
      "loss": 3.1757,
      "step": 27607
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9533354043960571,
      "learning_rate": 0.0005789936790820429,
      "loss": 3.1509,
      "step": 27608
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4218376874923706,
      "learning_rate": 0.0005789921753129911,
      "loss": 3.0466,
      "step": 27609
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.597529411315918,
      "learning_rate": 0.0005789906714920696,
      "loss": 3.3382,
      "step": 27610
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4486826658248901,
      "learning_rate": 0.0005789891676192783,
      "loss": 2.9714,
      "step": 27611
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5035499334335327,
      "learning_rate": 0.0005789876636946177,
      "loss": 3.1353,
      "step": 27612
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7330822944641113,
      "learning_rate": 0.0005789861597180881,
      "loss": 3.0746,
      "step": 27613
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7675838470458984,
      "learning_rate": 0.0005789846556896897,
      "loss": 3.0892,
      "step": 27614
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6169170141220093,
      "learning_rate": 0.0005789831516094228,
      "loss": 3.0948,
      "step": 27615
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1888821125030518,
      "learning_rate": 0.0005789816474772875,
      "loss": 2.8192,
      "step": 27616
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.5592041015625,
      "learning_rate": 0.0005789801432932845,
      "loss": 2.9009,
      "step": 27617
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.3576340675354004,
      "learning_rate": 0.0005789786390574136,
      "loss": 2.9478,
      "step": 27618
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5998421907424927,
      "learning_rate": 0.0005789771347696754,
      "loss": 2.963,
      "step": 27619
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.190234899520874,
      "learning_rate": 0.0005789756304300701,
      "loss": 2.9548,
      "step": 27620
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3891026973724365,
      "learning_rate": 0.000578974126038598,
      "loss": 2.9784,
      "step": 27621
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5625563859939575,
      "learning_rate": 0.0005789726215952592,
      "loss": 3.1025,
      "step": 27622
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5073481798171997,
      "learning_rate": 0.0005789711171000542,
      "loss": 3.2125,
      "step": 27623
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7243298292160034,
      "learning_rate": 0.0005789696125529832,
      "loss": 3.0334,
      "step": 27624
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6013966798782349,
      "learning_rate": 0.0005789681079540465,
      "loss": 3.014,
      "step": 27625
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.487700343132019,
      "learning_rate": 0.0005789666033032444,
      "loss": 2.9605,
      "step": 27626
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2783414125442505,
      "learning_rate": 0.0005789650986005771,
      "loss": 2.89,
      "step": 27627
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.8031165599823,
      "learning_rate": 0.0005789635938460449,
      "loss": 2.884,
      "step": 27628
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.7184855937957764,
      "learning_rate": 0.0005789620890396482,
      "loss": 2.9843,
      "step": 27629
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.480907678604126,
      "learning_rate": 0.0005789605841813871,
      "loss": 3.1654,
      "step": 27630
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.606719732284546,
      "learning_rate": 0.000578959079271262,
      "loss": 3.0366,
      "step": 27631
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.442042112350464,
      "learning_rate": 0.000578957574309273,
      "loss": 2.9873,
      "step": 27632
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8563746213912964,
      "learning_rate": 0.0005789560692954207,
      "loss": 3.26,
      "step": 27633
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4144784212112427,
      "learning_rate": 0.0005789545642297051,
      "loss": 3.1521,
      "step": 27634
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9344691038131714,
      "learning_rate": 0.0005789530591121266,
      "loss": 2.9106,
      "step": 27635
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.6728243827819824,
      "learning_rate": 0.0005789515539426855,
      "loss": 3.0206,
      "step": 27636
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4260563850402832,
      "learning_rate": 0.000578950048721382,
      "loss": 3.1391,
      "step": 27637
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.263720154762268,
      "learning_rate": 0.0005789485434482165,
      "loss": 3.0524,
      "step": 27638
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9954838752746582,
      "learning_rate": 0.0005789470381231891,
      "loss": 3.0572,
      "step": 27639
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8732656240463257,
      "learning_rate": 0.0005789455327463002,
      "loss": 2.9636,
      "step": 27640
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5066113471984863,
      "learning_rate": 0.0005789440273175501,
      "loss": 2.9669,
      "step": 27641
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8740657567977905,
      "learning_rate": 0.000578942521836939,
      "loss": 3.1172,
      "step": 27642
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.29685640335083,
      "learning_rate": 0.0005789410163044672,
      "loss": 3.1906,
      "step": 27643
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3358664512634277,
      "learning_rate": 0.0005789395107201351,
      "loss": 3.1541,
      "step": 27644
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3907508850097656,
      "learning_rate": 0.0005789380050839428,
      "loss": 3.0253,
      "step": 27645
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8714996576309204,
      "learning_rate": 0.0005789364993958907,
      "loss": 3.0674,
      "step": 27646
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3569384813308716,
      "learning_rate": 0.000578934993655979,
      "loss": 2.9808,
      "step": 27647
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4860388040542603,
      "learning_rate": 0.000578933487864208,
      "loss": 2.964,
      "step": 27648
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9666513204574585,
      "learning_rate": 0.000578931982020578,
      "loss": 2.9327,
      "step": 27649
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3235630989074707,
      "learning_rate": 0.0005789304761250894,
      "loss": 3.1225,
      "step": 27650
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8453210592269897,
      "learning_rate": 0.0005789289701777422,
      "loss": 3.0152,
      "step": 27651
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4360381364822388,
      "learning_rate": 0.000578927464178537,
      "loss": 3.2849,
      "step": 27652
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.137498617172241,
      "learning_rate": 0.0005789259581274738,
      "loss": 3.1262,
      "step": 27653
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5254437923431396,
      "learning_rate": 0.0005789244520245531,
      "loss": 3.1926,
      "step": 27654
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3802694082260132,
      "learning_rate": 0.0005789229458697749,
      "loss": 3.2043,
      "step": 27655
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5157337188720703,
      "learning_rate": 0.0005789214396631398,
      "loss": 3.1175,
      "step": 27656
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.85751211643219,
      "learning_rate": 0.0005789199334046478,
      "loss": 3.0501,
      "step": 27657
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9410748481750488,
      "learning_rate": 0.0005789184270942995,
      "loss": 3.0109,
      "step": 27658
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9366756677627563,
      "learning_rate": 0.0005789169207320949,
      "loss": 2.9205,
      "step": 27659
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5315680503845215,
      "learning_rate": 0.0005789154143180345,
      "loss": 3.1056,
      "step": 27660
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9281344413757324,
      "learning_rate": 0.0005789139078521183,
      "loss": 2.8917,
      "step": 27661
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.572784662246704,
      "learning_rate": 0.0005789124013343467,
      "loss": 3.1335,
      "step": 27662
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7393109798431396,
      "learning_rate": 0.0005789108947647203,
      "loss": 3.0263,
      "step": 27663
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4536820650100708,
      "learning_rate": 0.0005789093881432389,
      "loss": 3.1839,
      "step": 27664
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8972829580307007,
      "learning_rate": 0.000578907881469903,
      "loss": 3.205,
      "step": 27665
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8164582252502441,
      "learning_rate": 0.0005789063747447128,
      "loss": 2.9869,
      "step": 27666
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4486364126205444,
      "learning_rate": 0.0005789048679676688,
      "loss": 3.1111,
      "step": 27667
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6278990507125854,
      "learning_rate": 0.000578903361138771,
      "loss": 3.2413,
      "step": 27668
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.772700548171997,
      "learning_rate": 0.0005789018542580198,
      "loss": 3.2519,
      "step": 27669
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5183148384094238,
      "learning_rate": 0.0005789003473254155,
      "loss": 3.2023,
      "step": 27670
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5916420221328735,
      "learning_rate": 0.0005788988403409584,
      "loss": 2.9843,
      "step": 27671
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5061182975769043,
      "learning_rate": 0.0005788973333046486,
      "loss": 3.0246,
      "step": 27672
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.45015549659729,
      "learning_rate": 0.0005788958262164867,
      "loss": 3.0494,
      "step": 27673
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.835170030593872,
      "learning_rate": 0.0005788943190764728,
      "loss": 3.0241,
      "step": 27674
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8792188167572021,
      "learning_rate": 0.0005788928118846071,
      "loss": 2.8974,
      "step": 27675
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6479504108428955,
      "learning_rate": 0.0005788913046408899,
      "loss": 3.2603,
      "step": 27676
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8577296733856201,
      "learning_rate": 0.0005788897973453217,
      "loss": 3.086,
      "step": 27677
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6868493556976318,
      "learning_rate": 0.0005788882899979025,
      "loss": 3.0836,
      "step": 27678
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7905434370040894,
      "learning_rate": 0.0005788867825986327,
      "loss": 2.7298,
      "step": 27679
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4785187244415283,
      "learning_rate": 0.0005788852751475126,
      "loss": 3.3416,
      "step": 27680
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6132025718688965,
      "learning_rate": 0.0005788837676445425,
      "loss": 3.0369,
      "step": 27681
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6931614875793457,
      "learning_rate": 0.0005788822600897226,
      "loss": 3.1055,
      "step": 27682
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5228371620178223,
      "learning_rate": 0.0005788807524830533,
      "loss": 3.1974,
      "step": 27683
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.1403717994689941,
      "learning_rate": 0.0005788792448245348,
      "loss": 2.9784,
      "step": 27684
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3624358177185059,
      "learning_rate": 0.0005788777371141673,
      "loss": 3.1383,
      "step": 27685
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5616062879562378,
      "learning_rate": 0.0005788762293519513,
      "loss": 3.1321,
      "step": 27686
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6242133378982544,
      "learning_rate": 0.0005788747215378868,
      "loss": 3.1488,
      "step": 27687
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9693893194198608,
      "learning_rate": 0.0005788732136719743,
      "loss": 2.8916,
      "step": 27688
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7685346603393555,
      "learning_rate": 0.000578871705754214,
      "loss": 3.1336,
      "step": 27689
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6227279901504517,
      "learning_rate": 0.0005788701977846061,
      "loss": 3.0717,
      "step": 27690
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4762723445892334,
      "learning_rate": 0.0005788686897631511,
      "loss": 3.1481,
      "step": 27691
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9591403007507324,
      "learning_rate": 0.000578867181689849,
      "loss": 2.8693,
      "step": 27692
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.5963926315307617,
      "learning_rate": 0.0005788656735647003,
      "loss": 2.8062,
      "step": 27693
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3387444019317627,
      "learning_rate": 0.0005788641653877052,
      "loss": 3.277,
      "step": 27694
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7802414894104004,
      "learning_rate": 0.000578862657158864,
      "loss": 3.2003,
      "step": 27695
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4128555059432983,
      "learning_rate": 0.000578861148878177,
      "loss": 2.8729,
      "step": 27696
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3919315338134766,
      "learning_rate": 0.0005788596405456444,
      "loss": 3.2924,
      "step": 27697
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.0528879165649414,
      "learning_rate": 0.0005788581321612665,
      "loss": 3.1053,
      "step": 27698
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8928287029266357,
      "learning_rate": 0.0005788566237250437,
      "loss": 3.1356,
      "step": 27699
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6046769618988037,
      "learning_rate": 0.0005788551152369762,
      "loss": 3.2161,
      "step": 27700
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.7129762172698975,
      "learning_rate": 0.0005788536066970642,
      "loss": 3.0489,
      "step": 27701
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.417062997817993,
      "learning_rate": 0.000578852098105308,
      "loss": 3.4643,
      "step": 27702
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.78390634059906,
      "learning_rate": 0.0005788505894617081,
      "loss": 2.9019,
      "step": 27703
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.141397714614868,
      "learning_rate": 0.0005788490807662644,
      "loss": 3.1762,
      "step": 27704
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.264244556427002,
      "learning_rate": 0.0005788475720189776,
      "loss": 3.0558,
      "step": 27705
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2774574756622314,
      "learning_rate": 0.0005788460632198476,
      "loss": 3.1351,
      "step": 27706
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.080491542816162,
      "learning_rate": 0.0005788445543688749,
      "loss": 3.2482,
      "step": 27707
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6533194780349731,
      "learning_rate": 0.0005788430454660597,
      "loss": 2.8676,
      "step": 27708
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.8775205612182617,
      "learning_rate": 0.0005788415365114024,
      "loss": 3.0479,
      "step": 27709
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.952528476715088,
      "learning_rate": 0.0005788400275049033,
      "loss": 3.1384,
      "step": 27710
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7042261362075806,
      "learning_rate": 0.0005788385184465623,
      "loss": 2.8684,
      "step": 27711
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4596763849258423,
      "learning_rate": 0.0005788370093363802,
      "loss": 3.1715,
      "step": 27712
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.2562880516052246,
      "learning_rate": 0.0005788355001743569,
      "loss": 2.9606,
      "step": 27713
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1248438358306885,
      "learning_rate": 0.0005788339909604929,
      "loss": 3.0019,
      "step": 27714
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.571171760559082,
      "learning_rate": 0.0005788324816947884,
      "loss": 3.0292,
      "step": 27715
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.5039687156677246,
      "learning_rate": 0.0005788309723772436,
      "loss": 2.9093,
      "step": 27716
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.702673077583313,
      "learning_rate": 0.0005788294630078588,
      "loss": 3.0612,
      "step": 27717
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.623387336730957,
      "learning_rate": 0.0005788279535866345,
      "loss": 3.0041,
      "step": 27718
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.505456566810608,
      "learning_rate": 0.0005788264441135707,
      "loss": 3.1644,
      "step": 27719
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3201391696929932,
      "learning_rate": 0.0005788249345886679,
      "loss": 3.1369,
      "step": 27720
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5792925357818604,
      "learning_rate": 0.0005788234250119263,
      "loss": 3.138,
      "step": 27721
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3449397087097168,
      "learning_rate": 0.000578821915383346,
      "loss": 2.9516,
      "step": 27722
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7404077053070068,
      "learning_rate": 0.0005788204057029275,
      "loss": 3.2191,
      "step": 27723
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9493622779846191,
      "learning_rate": 0.0005788188959706712,
      "loss": 3.1027,
      "step": 27724
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.338828206062317,
      "learning_rate": 0.000578817386186577,
      "loss": 2.8832,
      "step": 27725
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3251227140426636,
      "learning_rate": 0.0005788158763506455,
      "loss": 2.9142,
      "step": 27726
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.5206637382507324,
      "learning_rate": 0.0005788143664628767,
      "loss": 3.0177,
      "step": 27727
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8697056770324707,
      "learning_rate": 0.0005788128565232712,
      "loss": 2.9813,
      "step": 27728
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.8307557106018066,
      "learning_rate": 0.0005788113465318291,
      "loss": 3.1377,
      "step": 27729
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.071777105331421,
      "learning_rate": 0.0005788098364885506,
      "loss": 2.823,
      "step": 27730
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.0400912761688232,
      "learning_rate": 0.0005788083263934363,
      "loss": 3.0419,
      "step": 27731
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4301570653915405,
      "learning_rate": 0.0005788068162464862,
      "loss": 3.134,
      "step": 27732
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.1329574584960938,
      "learning_rate": 0.0005788053060477006,
      "loss": 3.0829,
      "step": 27733
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.453679323196411,
      "learning_rate": 0.0005788037957970797,
      "loss": 3.0088,
      "step": 27734
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.2571494579315186,
      "learning_rate": 0.0005788022854946241,
      "loss": 3.0095,
      "step": 27735
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.748228669166565,
      "learning_rate": 0.0005788007751403338,
      "loss": 2.9584,
      "step": 27736
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.4166626930236816,
      "learning_rate": 0.0005787992647342092,
      "loss": 3.1444,
      "step": 27737
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8384405374526978,
      "learning_rate": 0.0005787977542762507,
      "loss": 3.0952,
      "step": 27738
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9800312519073486,
      "learning_rate": 0.0005787962437664582,
      "loss": 2.9481,
      "step": 27739
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5082520246505737,
      "learning_rate": 0.0005787947332048323,
      "loss": 3.0795,
      "step": 27740
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1534383296966553,
      "learning_rate": 0.0005787932225913733,
      "loss": 2.9544,
      "step": 27741
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.3910059928894043,
      "learning_rate": 0.0005787917119260813,
      "loss": 2.8679,
      "step": 27742
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9266115427017212,
      "learning_rate": 0.0005787902012089565,
      "loss": 3.0572,
      "step": 27743
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5375874042510986,
      "learning_rate": 0.0005787886904399996,
      "loss": 3.0991,
      "step": 27744
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.728835105895996,
      "learning_rate": 0.0005787871796192106,
      "loss": 3.1616,
      "step": 27745
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.545122504234314,
      "learning_rate": 0.0005787856687465896,
      "loss": 3.0926,
      "step": 27746
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5582455396652222,
      "learning_rate": 0.0005787841578221371,
      "loss": 3.15,
      "step": 27747
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6673028469085693,
      "learning_rate": 0.0005787826468458535,
      "loss": 2.8526,
      "step": 27748
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.0278971195220947,
      "learning_rate": 0.000578781135817739,
      "loss": 3.1482,
      "step": 27749
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5722706317901611,
      "learning_rate": 0.0005787796247377937,
      "loss": 3.2636,
      "step": 27750
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3308589458465576,
      "learning_rate": 0.0005787781136060179,
      "loss": 3.1939,
      "step": 27751
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1231532096862793,
      "learning_rate": 0.0005787766024224122,
      "loss": 3.1283,
      "step": 27752
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.421612024307251,
      "learning_rate": 0.0005787750911869766,
      "loss": 3.0629,
      "step": 27753
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9591925144195557,
      "learning_rate": 0.0005787735798997115,
      "loss": 3.1421,
      "step": 27754
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7419970035552979,
      "learning_rate": 0.0005787720685606169,
      "loss": 3.2477,
      "step": 27755
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.307178497314453,
      "learning_rate": 0.0005787705571696936,
      "loss": 2.9146,
      "step": 27756
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.453118085861206,
      "learning_rate": 0.0005787690457269414,
      "loss": 3.3895,
      "step": 27757
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4718142747879028,
      "learning_rate": 0.0005787675342323608,
      "loss": 3.1297,
      "step": 27758
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.8150126934051514,
      "learning_rate": 0.0005787660226859522,
      "loss": 3.1668,
      "step": 27759
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.486483097076416,
      "learning_rate": 0.0005787645110877155,
      "loss": 3.0714,
      "step": 27760
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.903176188468933,
      "learning_rate": 0.0005787629994376514,
      "loss": 3.1061,
      "step": 27761
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6151782274246216,
      "learning_rate": 0.0005787614877357598,
      "loss": 2.9267,
      "step": 27762
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7671788930892944,
      "learning_rate": 0.0005787599759820414,
      "loss": 3.1255,
      "step": 27763
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.101430654525757,
      "learning_rate": 0.0005787584641764961,
      "loss": 3.0758,
      "step": 27764
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.548837423324585,
      "learning_rate": 0.0005787569523191244,
      "loss": 3.1877,
      "step": 27765
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9142839908599854,
      "learning_rate": 0.0005787554404099265,
      "loss": 3.0812,
      "step": 27766
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.342463731765747,
      "learning_rate": 0.0005787539284489028,
      "loss": 2.8977,
      "step": 27767
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2780238389968872,
      "learning_rate": 0.0005787524164360534,
      "loss": 2.8321,
      "step": 27768
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.294348955154419,
      "learning_rate": 0.0005787509043713787,
      "loss": 3.2332,
      "step": 27769
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9913674592971802,
      "learning_rate": 0.0005787493922548789,
      "loss": 3.1012,
      "step": 27770
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1684045791625977,
      "learning_rate": 0.0005787478800865543,
      "loss": 2.9746,
      "step": 27771
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4743273258209229,
      "learning_rate": 0.0005787463678664053,
      "loss": 2.9922,
      "step": 27772
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3940229415893555,
      "learning_rate": 0.0005787448555944321,
      "loss": 3.2043,
      "step": 27773
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1835737228393555,
      "learning_rate": 0.0005787433432706348,
      "loss": 3.0483,
      "step": 27774
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.5964856147766113,
      "learning_rate": 0.0005787418308950141,
      "loss": 2.9622,
      "step": 27775
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.585731029510498,
      "learning_rate": 0.0005787403184675698,
      "loss": 3.2219,
      "step": 27776
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5965925455093384,
      "learning_rate": 0.0005787388059883025,
      "loss": 3.1672,
      "step": 27777
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.488813877105713,
      "learning_rate": 0.0005787372934572124,
      "loss": 2.9154,
      "step": 27778
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6350584030151367,
      "learning_rate": 0.0005787357808742997,
      "loss": 3.0621,
      "step": 27779
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4170984029769897,
      "learning_rate": 0.0005787342682395649,
      "loss": 3.2964,
      "step": 27780
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4718389511108398,
      "learning_rate": 0.0005787327555530081,
      "loss": 3.0187,
      "step": 27781
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.2742745876312256,
      "learning_rate": 0.0005787312428146296,
      "loss": 2.9815,
      "step": 27782
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.658321738243103,
      "learning_rate": 0.0005787297300244296,
      "loss": 2.9744,
      "step": 27783
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7837036848068237,
      "learning_rate": 0.0005787282171824086,
      "loss": 3.1274,
      "step": 27784
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.6603925228118896,
      "learning_rate": 0.0005787267042885667,
      "loss": 3.021,
      "step": 27785
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.425577163696289,
      "learning_rate": 0.0005787251913429043,
      "loss": 2.9841,
      "step": 27786
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3463432788848877,
      "learning_rate": 0.0005787236783454216,
      "loss": 3.2257,
      "step": 27787
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.467771053314209,
      "learning_rate": 0.000578722165296119,
      "loss": 3.0171,
      "step": 27788
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.2157485485076904,
      "learning_rate": 0.0005787206521949966,
      "loss": 3.237,
      "step": 27789
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4069594144821167,
      "learning_rate": 0.0005787191390420547,
      "loss": 2.995,
      "step": 27790
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.633122205734253,
      "learning_rate": 0.0005787176258372937,
      "loss": 3.0968,
      "step": 27791
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.176976203918457,
      "learning_rate": 0.0005787161125807139,
      "loss": 3.3832,
      "step": 27792
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4275590181350708,
      "learning_rate": 0.0005787145992723155,
      "loss": 3.1055,
      "step": 27793
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.546658992767334,
      "learning_rate": 0.0005787130859120987,
      "loss": 3.1046,
      "step": 27794
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5606698989868164,
      "learning_rate": 0.0005787115725000639,
      "loss": 2.8516,
      "step": 27795
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7111358642578125,
      "learning_rate": 0.0005787100590362114,
      "loss": 3.0437,
      "step": 27796
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4137773513793945,
      "learning_rate": 0.0005787085455205414,
      "loss": 3.0464,
      "step": 27797
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4650121927261353,
      "learning_rate": 0.0005787070319530544,
      "loss": 3.1414,
      "step": 27798
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.395588994026184,
      "learning_rate": 0.0005787055183337503,
      "loss": 2.8274,
      "step": 27799
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3071181774139404,
      "learning_rate": 0.0005787040046626296,
      "loss": 2.9361,
      "step": 27800
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3436986207962036,
      "learning_rate": 0.0005787024909396927,
      "loss": 2.751,
      "step": 27801
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.0027883052825928,
      "learning_rate": 0.0005787009771649396,
      "loss": 3.1401,
      "step": 27802
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.58857262134552,
      "learning_rate": 0.0005786994633383707,
      "loss": 3.2825,
      "step": 27803
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8661340475082397,
      "learning_rate": 0.0005786979494599864,
      "loss": 3.0838,
      "step": 27804
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5147597789764404,
      "learning_rate": 0.0005786964355297869,
      "loss": 3.2949,
      "step": 27805
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8396329879760742,
      "learning_rate": 0.0005786949215477725,
      "loss": 2.9221,
      "step": 27806
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6597322225570679,
      "learning_rate": 0.0005786934075139434,
      "loss": 3.0985,
      "step": 27807
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.381516933441162,
      "learning_rate": 0.0005786918934282999,
      "loss": 3.1276,
      "step": 27808
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4082762002944946,
      "learning_rate": 0.0005786903792908424,
      "loss": 2.8731,
      "step": 27809
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5533220767974854,
      "learning_rate": 0.0005786888651015709,
      "loss": 3.141,
      "step": 27810
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9058637619018555,
      "learning_rate": 0.0005786873508604861,
      "loss": 2.9337,
      "step": 27811
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6979213953018188,
      "learning_rate": 0.0005786858365675881,
      "loss": 3.1043,
      "step": 27812
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3295786380767822,
      "learning_rate": 0.000578684322222877,
      "loss": 3.2269,
      "step": 27813
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.0578811168670654,
      "learning_rate": 0.0005786828078263532,
      "loss": 2.8365,
      "step": 27814
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7059590816497803,
      "learning_rate": 0.0005786812933780171,
      "loss": 3.1623,
      "step": 27815
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.423531413078308,
      "learning_rate": 0.0005786797788778688,
      "loss": 3.0589,
      "step": 27816
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8702243566513062,
      "learning_rate": 0.0005786782643259088,
      "loss": 2.9244,
      "step": 27817
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3818039894104004,
      "learning_rate": 0.0005786767497221371,
      "loss": 3.0692,
      "step": 27818
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7032335996627808,
      "learning_rate": 0.0005786752350665542,
      "loss": 3.091,
      "step": 27819
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7621989250183105,
      "learning_rate": 0.0005786737203591603,
      "loss": 2.9727,
      "step": 27820
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7321051359176636,
      "learning_rate": 0.0005786722055999557,
      "loss": 3.0951,
      "step": 27821
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4524387121200562,
      "learning_rate": 0.0005786706907889407,
      "loss": 3.0573,
      "step": 27822
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3273723125457764,
      "learning_rate": 0.0005786691759261156,
      "loss": 3.1532,
      "step": 27823
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6853318214416504,
      "learning_rate": 0.0005786676610114805,
      "loss": 2.9464,
      "step": 27824
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8161782026290894,
      "learning_rate": 0.0005786661460450359,
      "loss": 3.0716,
      "step": 27825
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2289413213729858,
      "learning_rate": 0.000578664631026782,
      "loss": 2.9599,
      "step": 27826
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5508389472961426,
      "learning_rate": 0.0005786631159567191,
      "loss": 3.1951,
      "step": 27827
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.874819278717041,
      "learning_rate": 0.0005786616008348475,
      "loss": 3.1824,
      "step": 27828
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5114344358444214,
      "learning_rate": 0.0005786600856611674,
      "loss": 2.9885,
      "step": 27829
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6337865591049194,
      "learning_rate": 0.0005786585704356791,
      "loss": 3.2893,
      "step": 27830
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3027362823486328,
      "learning_rate": 0.000578657055158383,
      "loss": 3.0978,
      "step": 27831
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6587393283843994,
      "learning_rate": 0.0005786555398292792,
      "loss": 3.1651,
      "step": 27832
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4885177612304688,
      "learning_rate": 0.0005786540244483681,
      "loss": 3.047,
      "step": 27833
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8499248027801514,
      "learning_rate": 0.00057865250901565,
      "loss": 3.0876,
      "step": 27834
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.458509087562561,
      "learning_rate": 0.0005786509935311251,
      "loss": 3.2023,
      "step": 27835
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5203752517700195,
      "learning_rate": 0.0005786494779947938,
      "loss": 3.107,
      "step": 27836
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.413199543952942,
      "learning_rate": 0.0005786479624066562,
      "loss": 3.2595,
      "step": 27837
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2699768543243408,
      "learning_rate": 0.0005786464467667128,
      "loss": 3.1083,
      "step": 27838
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.44375479221344,
      "learning_rate": 0.0005786449310749636,
      "loss": 3.4497,
      "step": 27839
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5069864988327026,
      "learning_rate": 0.0005786434153314092,
      "loss": 2.9432,
      "step": 27840
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6403883695602417,
      "learning_rate": 0.0005786418995360497,
      "loss": 3.248,
      "step": 27841
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3664019107818604,
      "learning_rate": 0.0005786403836888853,
      "loss": 2.8582,
      "step": 27842
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4920024871826172,
      "learning_rate": 0.0005786388677899164,
      "loss": 3.0779,
      "step": 27843
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.139632225036621,
      "learning_rate": 0.0005786373518391434,
      "loss": 3.2985,
      "step": 27844
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8231337070465088,
      "learning_rate": 0.0005786358358365664,
      "loss": 3.0512,
      "step": 27845
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9523959159851074,
      "learning_rate": 0.0005786343197821857,
      "loss": 3.1466,
      "step": 27846
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.143857717514038,
      "learning_rate": 0.0005786328036760018,
      "loss": 3.0758,
      "step": 27847
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4415158033370972,
      "learning_rate": 0.0005786312875180146,
      "loss": 3.3819,
      "step": 27848
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6731303930282593,
      "learning_rate": 0.0005786297713082246,
      "loss": 3.1298,
      "step": 27849
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4852055311203003,
      "learning_rate": 0.0005786282550466321,
      "loss": 3.2081,
      "step": 27850
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4911760091781616,
      "learning_rate": 0.0005786267387332373,
      "loss": 2.9683,
      "step": 27851
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5844472646713257,
      "learning_rate": 0.0005786252223680406,
      "loss": 2.9014,
      "step": 27852
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.051161527633667,
      "learning_rate": 0.0005786237059510422,
      "loss": 3.1959,
      "step": 27853
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9183815717697144,
      "learning_rate": 0.0005786221894822424,
      "loss": 3.2049,
      "step": 27854
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3313173055648804,
      "learning_rate": 0.0005786206729616416,
      "loss": 3.1803,
      "step": 27855
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.330285906791687,
      "learning_rate": 0.0005786191563892397,
      "loss": 3.0202,
      "step": 27856
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.336710214614868,
      "learning_rate": 0.0005786176397650374,
      "loss": 3.0701,
      "step": 27857
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.4154610633850098,
      "learning_rate": 0.0005786161230890347,
      "loss": 3.1859,
      "step": 27858
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6155983209609985,
      "learning_rate": 0.0005786146063612321,
      "loss": 3.3201,
      "step": 27859
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.345854640007019,
      "learning_rate": 0.0005786130895816298,
      "loss": 2.853,
      "step": 27860
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.258579730987549,
      "learning_rate": 0.000578611572750228,
      "loss": 3.1934,
      "step": 27861
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.79877769947052,
      "learning_rate": 0.0005786100558670272,
      "loss": 2.8905,
      "step": 27862
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6812098026275635,
      "learning_rate": 0.0005786085389320274,
      "loss": 3.1604,
      "step": 27863
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4656903743743896,
      "learning_rate": 0.000578607021945229,
      "loss": 3.118,
      "step": 27864
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.2043797969818115,
      "learning_rate": 0.0005786055049066323,
      "loss": 3.2318,
      "step": 27865
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.695739507675171,
      "learning_rate": 0.0005786039878162377,
      "loss": 2.9605,
      "step": 27866
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.344454288482666,
      "learning_rate": 0.0005786024706740453,
      "loss": 3.0168,
      "step": 27867
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.3226561546325684,
      "learning_rate": 0.0005786009534800554,
      "loss": 3.2299,
      "step": 27868
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1671276092529297,
      "learning_rate": 0.0005785994362342683,
      "loss": 3.2296,
      "step": 27869
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3429774045944214,
      "learning_rate": 0.0005785979189366843,
      "loss": 3.1008,
      "step": 27870
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.813660979270935,
      "learning_rate": 0.0005785964015873038,
      "loss": 3.1186,
      "step": 27871
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6467608213424683,
      "learning_rate": 0.0005785948841861269,
      "loss": 3.0409,
      "step": 27872
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6240577697753906,
      "learning_rate": 0.0005785933667331538,
      "loss": 3.2937,
      "step": 27873
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.800382137298584,
      "learning_rate": 0.0005785918492283852,
      "loss": 3.2409,
      "step": 27874
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.612061619758606,
      "learning_rate": 0.000578590331671821,
      "loss": 2.9533,
      "step": 27875
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8457250595092773,
      "learning_rate": 0.0005785888140634615,
      "loss": 2.8751,
      "step": 27876
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6899759769439697,
      "learning_rate": 0.0005785872964033073,
      "loss": 3.3206,
      "step": 27877
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.2282841205596924,
      "learning_rate": 0.0005785857786913583,
      "loss": 2.9567,
      "step": 27878
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3583364486694336,
      "learning_rate": 0.000578584260927615,
      "loss": 2.927,
      "step": 27879
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.899320602416992,
      "learning_rate": 0.0005785827431120776,
      "loss": 3.0407,
      "step": 27880
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1932783126831055,
      "learning_rate": 0.0005785812252447465,
      "loss": 3.0128,
      "step": 27881
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6646403074264526,
      "learning_rate": 0.0005785797073256218,
      "loss": 3.1285,
      "step": 27882
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8238322734832764,
      "learning_rate": 0.0005785781893547038,
      "loss": 2.949,
      "step": 27883
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3561968803405762,
      "learning_rate": 0.000578576671331993,
      "loss": 3.023,
      "step": 27884
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4802682399749756,
      "learning_rate": 0.0005785751532574895,
      "loss": 3.2308,
      "step": 27885
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5282245874404907,
      "learning_rate": 0.0005785736351311936,
      "loss": 2.9488,
      "step": 27886
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.507692575454712,
      "learning_rate": 0.0005785721169531056,
      "loss": 3.2711,
      "step": 27887
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8703924417495728,
      "learning_rate": 0.0005785705987232258,
      "loss": 3.194,
      "step": 27888
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6477831602096558,
      "learning_rate": 0.0005785690804415545,
      "loss": 3.4601,
      "step": 27889
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.159564971923828,
      "learning_rate": 0.0005785675621080918,
      "loss": 3.1061,
      "step": 27890
    },
    {
      "epoch": 0.36,
      "grad_norm": 4.14989709854126,
      "learning_rate": 0.0005785660437228383,
      "loss": 3.0794,
      "step": 27891
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.972632884979248,
      "learning_rate": 0.000578564525285794,
      "loss": 3.2594,
      "step": 27892
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.2589855194091797,
      "learning_rate": 0.0005785630067969594,
      "loss": 2.8026,
      "step": 27893
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.5133867263793945,
      "learning_rate": 0.0005785614882563346,
      "loss": 3.2277,
      "step": 27894
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.528233528137207,
      "learning_rate": 0.00057855996966392,
      "loss": 2.9375,
      "step": 27895
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6265180110931396,
      "learning_rate": 0.0005785584510197159,
      "loss": 3.1283,
      "step": 27896
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.546855092048645,
      "learning_rate": 0.0005785569323237224,
      "loss": 2.998,
      "step": 27897
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5037423372268677,
      "learning_rate": 0.00057855541357594,
      "loss": 2.9468,
      "step": 27898
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.523569107055664,
      "learning_rate": 0.0005785538947763689,
      "loss": 3.1931,
      "step": 27899
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3288065195083618,
      "learning_rate": 0.0005785523759250093,
      "loss": 3.1975,
      "step": 27900
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7762506008148193,
      "learning_rate": 0.0005785508570218616,
      "loss": 3.0609,
      "step": 27901
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8117773532867432,
      "learning_rate": 0.0005785493380669259,
      "loss": 3.0495,
      "step": 27902
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4114116430282593,
      "learning_rate": 0.0005785478190602028,
      "loss": 3.1826,
      "step": 27903
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.523505687713623,
      "learning_rate": 0.0005785463000016924,
      "loss": 2.8455,
      "step": 27904
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.648563265800476,
      "learning_rate": 0.0005785447808913949,
      "loss": 3.0582,
      "step": 27905
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4214417934417725,
      "learning_rate": 0.0005785432617293107,
      "loss": 3.358,
      "step": 27906
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5080528259277344,
      "learning_rate": 0.0005785417425154401,
      "loss": 3.1284,
      "step": 27907
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.060950756072998,
      "learning_rate": 0.0005785402232497832,
      "loss": 3.0687,
      "step": 27908
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3268648386001587,
      "learning_rate": 0.0005785387039323406,
      "loss": 3.2261,
      "step": 27909
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2733365297317505,
      "learning_rate": 0.0005785371845631123,
      "loss": 2.9484,
      "step": 27910
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9557850360870361,
      "learning_rate": 0.0005785356651420987,
      "loss": 3.0594,
      "step": 27911
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.5519425868988037,
      "learning_rate": 0.0005785341456693,
      "loss": 2.9595,
      "step": 27912
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.668565034866333,
      "learning_rate": 0.0005785326261447165,
      "loss": 3.2367,
      "step": 27913
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9934916496276855,
      "learning_rate": 0.0005785311065683486,
      "loss": 3.0938,
      "step": 27914
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7638109922409058,
      "learning_rate": 0.0005785295869401966,
      "loss": 3.4222,
      "step": 27915
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.0637471675872803,
      "learning_rate": 0.0005785280672602605,
      "loss": 3.1892,
      "step": 27916
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8660144805908203,
      "learning_rate": 0.000578526547528541,
      "loss": 2.8177,
      "step": 27917
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.3971164226531982,
      "learning_rate": 0.0005785250277450379,
      "loss": 3.3306,
      "step": 27918
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.6347365379333496,
      "learning_rate": 0.000578523507909752,
      "loss": 3.0628,
      "step": 27919
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4837865829467773,
      "learning_rate": 0.0005785219880226831,
      "loss": 3.1733,
      "step": 27920
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6216880083084106,
      "learning_rate": 0.0005785204680838319,
      "loss": 3.1941,
      "step": 27921
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6106247901916504,
      "learning_rate": 0.0005785189480931984,
      "loss": 3.3041,
      "step": 27922
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8015151023864746,
      "learning_rate": 0.0005785174280507829,
      "loss": 2.8876,
      "step": 27923
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6994272470474243,
      "learning_rate": 0.0005785159079565858,
      "loss": 3.0705,
      "step": 27924
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6542481184005737,
      "learning_rate": 0.0005785143878106073,
      "loss": 2.8967,
      "step": 27925
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.0380985736846924,
      "learning_rate": 0.0005785128676128479,
      "loss": 2.8965,
      "step": 27926
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1884164810180664,
      "learning_rate": 0.0005785113473633075,
      "loss": 3.1008,
      "step": 27927
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7232458591461182,
      "learning_rate": 0.0005785098270619867,
      "loss": 3.3067,
      "step": 27928
    },
    {
      "epoch": 0.36,
      "grad_norm": 3.1259775161743164,
      "learning_rate": 0.0005785083067088855,
      "loss": 3.0821,
      "step": 27929
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9291155338287354,
      "learning_rate": 0.0005785067863040046,
      "loss": 3.4119,
      "step": 27930
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6810001134872437,
      "learning_rate": 0.0005785052658473438,
      "loss": 3.1667,
      "step": 27931
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.153489828109741,
      "learning_rate": 0.0005785037453389037,
      "loss": 3.1373,
      "step": 27932
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4137349128723145,
      "learning_rate": 0.0005785022247786847,
      "loss": 3.2393,
      "step": 27933
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2185965776443481,
      "learning_rate": 0.0005785007041666866,
      "loss": 3.148,
      "step": 27934
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4821199178695679,
      "learning_rate": 0.00057849918350291,
      "loss": 2.7219,
      "step": 27935
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3953988552093506,
      "learning_rate": 0.0005784976627873552,
      "loss": 3.2928,
      "step": 27936
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5601401329040527,
      "learning_rate": 0.0005784961420200224,
      "loss": 2.9335,
      "step": 27937
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6086865663528442,
      "learning_rate": 0.000578494621200912,
      "loss": 3.1624,
      "step": 27938
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.3314032554626465,
      "learning_rate": 0.0005784931003300241,
      "loss": 2.813,
      "step": 27939
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4203307628631592,
      "learning_rate": 0.0005784915794073591,
      "loss": 3.1372,
      "step": 27940
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.516632080078125,
      "learning_rate": 0.0005784900584329173,
      "loss": 2.8127,
      "step": 27941
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5207663774490356,
      "learning_rate": 0.0005784885374066988,
      "loss": 3.0881,
      "step": 27942
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3016064167022705,
      "learning_rate": 0.0005784870163287043,
      "loss": 3.279,
      "step": 27943
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4970943927764893,
      "learning_rate": 0.0005784854951989335,
      "loss": 3.4081,
      "step": 27944
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.506365180015564,
      "learning_rate": 0.0005784839740173871,
      "loss": 3.2116,
      "step": 27945
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.2684894800186157,
      "learning_rate": 0.0005784824527840654,
      "loss": 3.2987,
      "step": 27946
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3555952310562134,
      "learning_rate": 0.0005784809314989684,
      "loss": 3.1265,
      "step": 27947
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7086302042007446,
      "learning_rate": 0.0005784794101620966,
      "loss": 3.1019,
      "step": 27948
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8680559396743774,
      "learning_rate": 0.0005784778887734502,
      "loss": 2.931,
      "step": 27949
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.342070460319519,
      "learning_rate": 0.0005784763673330295,
      "loss": 3.1923,
      "step": 27950
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6546289920806885,
      "learning_rate": 0.0005784748458408348,
      "loss": 2.9501,
      "step": 27951
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.739747405052185,
      "learning_rate": 0.0005784733242968664,
      "loss": 2.79,
      "step": 27952
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.0053482055664062,
      "learning_rate": 0.0005784718027011245,
      "loss": 3.0096,
      "step": 27953
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5331629514694214,
      "learning_rate": 0.0005784702810536094,
      "loss": 3.0991,
      "step": 27954
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6423276662826538,
      "learning_rate": 0.0005784687593543216,
      "loss": 2.8668,
      "step": 27955
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.391921043395996,
      "learning_rate": 0.000578467237603261,
      "loss": 3.2403,
      "step": 27956
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9961588382720947,
      "learning_rate": 0.0005784657158004283,
      "loss": 2.9608,
      "step": 27957
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4456433057785034,
      "learning_rate": 0.0005784641939458234,
      "loss": 3.0961,
      "step": 27958
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.653397560119629,
      "learning_rate": 0.0005784626720394467,
      "loss": 3.2624,
      "step": 27959
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.875614047050476,
      "learning_rate": 0.0005784611500812986,
      "loss": 3.1095,
      "step": 27960
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5015980005264282,
      "learning_rate": 0.0005784596280713793,
      "loss": 2.9598,
      "step": 27961
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3632036447525024,
      "learning_rate": 0.0005784581060096891,
      "loss": 2.6996,
      "step": 27962
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5175817012786865,
      "learning_rate": 0.0005784565838962283,
      "loss": 3.0253,
      "step": 27963
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.956771731376648,
      "learning_rate": 0.0005784550617309972,
      "loss": 3.2407,
      "step": 27964
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5610404014587402,
      "learning_rate": 0.000578453539513996,
      "loss": 3.2291,
      "step": 27965
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.831162452697754,
      "learning_rate": 0.0005784520172452251,
      "loss": 3.1084,
      "step": 27966
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9693598747253418,
      "learning_rate": 0.0005784504949246847,
      "loss": 3.1617,
      "step": 27967
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.904465436935425,
      "learning_rate": 0.000578448972552375,
      "loss": 3.0167,
      "step": 27968
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.514304518699646,
      "learning_rate": 0.0005784474501282965,
      "loss": 3.0328,
      "step": 27969
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5510928630828857,
      "learning_rate": 0.0005784459276524492,
      "loss": 3.0088,
      "step": 27970
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7331143617630005,
      "learning_rate": 0.0005784444051248338,
      "loss": 3.158,
      "step": 27971
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.11217999458313,
      "learning_rate": 0.0005784428825454501,
      "loss": 3.1072,
      "step": 27972
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.313891053199768,
      "learning_rate": 0.0005784413599142987,
      "loss": 2.9326,
      "step": 27973
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5239206552505493,
      "learning_rate": 0.0005784398372313797,
      "loss": 3.1092,
      "step": 27974
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5002402067184448,
      "learning_rate": 0.0005784383144966937,
      "loss": 2.9321,
      "step": 27975
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5634374618530273,
      "learning_rate": 0.0005784367917102406,
      "loss": 3.0588,
      "step": 27976
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5945004224777222,
      "learning_rate": 0.0005784352688720208,
      "loss": 3.2031,
      "step": 27977
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8481762409210205,
      "learning_rate": 0.0005784337459820347,
      "loss": 3.0214,
      "step": 27978
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4771950244903564,
      "learning_rate": 0.0005784322230402826,
      "loss": 3.1468,
      "step": 27979
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5124940872192383,
      "learning_rate": 0.0005784307000467646,
      "loss": 2.8822,
      "step": 27980
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6406991481781006,
      "learning_rate": 0.0005784291770014811,
      "loss": 3.2164,
      "step": 27981
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.436683177947998,
      "learning_rate": 0.0005784276539044323,
      "loss": 3.3134,
      "step": 27982
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.146771192550659,
      "learning_rate": 0.0005784261307556185,
      "loss": 2.7495,
      "step": 27983
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.423319935798645,
      "learning_rate": 0.0005784246075550401,
      "loss": 3.1675,
      "step": 27984
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9164419174194336,
      "learning_rate": 0.0005784230843026974,
      "loss": 2.9988,
      "step": 27985
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5799363851547241,
      "learning_rate": 0.0005784215609985904,
      "loss": 2.9849,
      "step": 27986
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5781762599945068,
      "learning_rate": 0.0005784200376427198,
      "loss": 2.9403,
      "step": 27987
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5510011911392212,
      "learning_rate": 0.0005784185142350855,
      "loss": 3.0156,
      "step": 27988
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5551633834838867,
      "learning_rate": 0.0005784169907756881,
      "loss": 3.3222,
      "step": 27989
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6802046298980713,
      "learning_rate": 0.0005784154672645275,
      "loss": 3.0987,
      "step": 27990
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3377143144607544,
      "learning_rate": 0.0005784139437016043,
      "loss": 2.9414,
      "step": 27991
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5167120695114136,
      "learning_rate": 0.0005784124200869187,
      "loss": 3.0193,
      "step": 27992
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9456990957260132,
      "learning_rate": 0.0005784108964204711,
      "loss": 3.0673,
      "step": 27993
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.0062057971954346,
      "learning_rate": 0.0005784093727022616,
      "loss": 3.3684,
      "step": 27994
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8474582433700562,
      "learning_rate": 0.0005784078489322904,
      "loss": 3.053,
      "step": 27995
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6645855903625488,
      "learning_rate": 0.000578406325110558,
      "loss": 2.9277,
      "step": 27996
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.876826047897339,
      "learning_rate": 0.0005784048012370647,
      "loss": 3.0575,
      "step": 27997
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9261318445205688,
      "learning_rate": 0.0005784032773118106,
      "loss": 3.0869,
      "step": 27998
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.7518563270568848,
      "learning_rate": 0.0005784017533347961,
      "loss": 3.4441,
      "step": 27999
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.8695024251937866,
      "learning_rate": 0.0005784002293060214,
      "loss": 2.8842,
      "step": 28000
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.578408122062683,
      "learning_rate": 0.000578398705225487,
      "loss": 3.0235,
      "step": 28001
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4001394510269165,
      "learning_rate": 0.0005783971810931929,
      "loss": 3.1651,
      "step": 28002
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.521825909614563,
      "learning_rate": 0.0005783956569091395,
      "loss": 3.0923,
      "step": 28003
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4969393014907837,
      "learning_rate": 0.0005783941326733272,
      "loss": 3.2754,
      "step": 28004
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9243003129959106,
      "learning_rate": 0.000578392608385756,
      "loss": 3.3462,
      "step": 28005
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3360967636108398,
      "learning_rate": 0.0005783910840464265,
      "loss": 2.9778,
      "step": 28006
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.845828056335449,
      "learning_rate": 0.0005783895596553388,
      "loss": 2.8579,
      "step": 28007
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3106024265289307,
      "learning_rate": 0.0005783880352124932,
      "loss": 3.3094,
      "step": 28008
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.015584707260132,
      "learning_rate": 0.00057838651071789,
      "loss": 3.1669,
      "step": 28009
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3712714910507202,
      "learning_rate": 0.0005783849861715296,
      "loss": 3.0555,
      "step": 28010
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4622193574905396,
      "learning_rate": 0.0005783834615734121,
      "loss": 3.27,
      "step": 28011
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5714389085769653,
      "learning_rate": 0.0005783819369235379,
      "loss": 3.008,
      "step": 28012
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.308777093887329,
      "learning_rate": 0.0005783804122219072,
      "loss": 3.2782,
      "step": 28013
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.3783783912658691,
      "learning_rate": 0.0005783788874685203,
      "loss": 3.1544,
      "step": 28014
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6659932136535645,
      "learning_rate": 0.0005783773626633776,
      "loss": 3.2914,
      "step": 28015
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.446618676185608,
      "learning_rate": 0.0005783758378064791,
      "loss": 3.2432,
      "step": 28016
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.412255048751831,
      "learning_rate": 0.0005783743128978256,
      "loss": 3.3034,
      "step": 28017
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4201862812042236,
      "learning_rate": 0.0005783727879374168,
      "loss": 3.0629,
      "step": 28018
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6832787990570068,
      "learning_rate": 0.0005783712629252533,
      "loss": 2.7948,
      "step": 28019
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4510785341262817,
      "learning_rate": 0.0005783697378613353,
      "loss": 3.1339,
      "step": 28020
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9679534435272217,
      "learning_rate": 0.0005783682127456632,
      "loss": 3.2196,
      "step": 28021
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4326372146606445,
      "learning_rate": 0.0005783666875782372,
      "loss": 3.2349,
      "step": 28022
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4772794246673584,
      "learning_rate": 0.0005783651623590575,
      "loss": 2.9214,
      "step": 28023
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4891357421875,
      "learning_rate": 0.0005783636370881245,
      "loss": 2.7886,
      "step": 28024
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.6548043489456177,
      "learning_rate": 0.0005783621117654385,
      "loss": 3.0925,
      "step": 28025
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.5181972980499268,
      "learning_rate": 0.0005783605863909995,
      "loss": 3.1593,
      "step": 28026
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.578306198120117,
      "learning_rate": 0.0005783590609648083,
      "loss": 2.8055,
      "step": 28027
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.462075114250183,
      "learning_rate": 0.0005783575354868647,
      "loss": 3.1411,
      "step": 28028
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.1880948543548584,
      "learning_rate": 0.0005783560099571692,
      "loss": 2.9803,
      "step": 28029
    },
    {
      "epoch": 0.36,
      "grad_norm": 2.2766525745391846,
      "learning_rate": 0.0005783544843757223,
      "loss": 3.0508,
      "step": 28030
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.4131581783294678,
      "learning_rate": 0.0005783529587425238,
      "loss": 3.0283,
      "step": 28031
    },
    {
      "epoch": 0.36,
      "grad_norm": 1.9089688062667847,
      "learning_rate": 0.0005783514330575743,
      "loss": 2.9548,
      "step": 28032
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.962039589881897,
      "learning_rate": 0.0005783499073208741,
      "loss": 3.2434,
      "step": 28033
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7064322233200073,
      "learning_rate": 0.0005783483815324233,
      "loss": 3.1869,
      "step": 28034
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.3924407958984375,
      "learning_rate": 0.0005783468556922222,
      "loss": 3.0665,
      "step": 28035
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8504525423049927,
      "learning_rate": 0.0005783453298002713,
      "loss": 3.1981,
      "step": 28036
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3531454801559448,
      "learning_rate": 0.0005783438038565707,
      "loss": 3.1153,
      "step": 28037
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5936639308929443,
      "learning_rate": 0.0005783422778611207,
      "loss": 3.0651,
      "step": 28038
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5451645851135254,
      "learning_rate": 0.0005783407518139217,
      "loss": 2.9816,
      "step": 28039
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7644188404083252,
      "learning_rate": 0.0005783392257149738,
      "loss": 3.1687,
      "step": 28040
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1560757160186768,
      "learning_rate": 0.0005783376995642774,
      "loss": 3.1817,
      "step": 28041
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3411498069763184,
      "learning_rate": 0.0005783361733618329,
      "loss": 3.3047,
      "step": 28042
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3303053379058838,
      "learning_rate": 0.0005783346471076403,
      "loss": 3.281,
      "step": 28043
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8927816152572632,
      "learning_rate": 0.0005783331208017001,
      "loss": 2.885,
      "step": 28044
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2781943082809448,
      "learning_rate": 0.0005783315944440125,
      "loss": 3.1204,
      "step": 28045
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5614577531814575,
      "learning_rate": 0.0005783300680345778,
      "loss": 3.0669,
      "step": 28046
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6886590719223022,
      "learning_rate": 0.0005783285415733964,
      "loss": 2.9808,
      "step": 28047
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3394863605499268,
      "learning_rate": 0.0005783270150604683,
      "loss": 3.003,
      "step": 28048
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2611974477767944,
      "learning_rate": 0.000578325488495794,
      "loss": 3.3587,
      "step": 28049
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3200035095214844,
      "learning_rate": 0.0005783239618793738,
      "loss": 2.9538,
      "step": 28050
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4758859872817993,
      "learning_rate": 0.0005783224352112079,
      "loss": 3.1674,
      "step": 28051
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3907313346862793,
      "learning_rate": 0.0005783209084912965,
      "loss": 3.0969,
      "step": 28052
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4667259454727173,
      "learning_rate": 0.0005783193817196402,
      "loss": 3.1892,
      "step": 28053
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.186199426651001,
      "learning_rate": 0.0005783178548962389,
      "loss": 2.9394,
      "step": 28054
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4809597730636597,
      "learning_rate": 0.0005783163280210931,
      "loss": 3.2945,
      "step": 28055
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5090906620025635,
      "learning_rate": 0.000578314801094203,
      "loss": 3.2622,
      "step": 28056
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2787110805511475,
      "learning_rate": 0.0005783132741155691,
      "loss": 3.0899,
      "step": 28057
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.499876618385315,
      "learning_rate": 0.0005783117470851913,
      "loss": 3.303,
      "step": 28058
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5570145845413208,
      "learning_rate": 0.0005783102200030702,
      "loss": 3.1141,
      "step": 28059
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7021615505218506,
      "learning_rate": 0.000578308692869206,
      "loss": 3.0891,
      "step": 28060
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3498421907424927,
      "learning_rate": 0.0005783071656835989,
      "loss": 2.9974,
      "step": 28061
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1294896602630615,
      "learning_rate": 0.0005783056384462492,
      "loss": 2.9516,
      "step": 28062
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3662540912628174,
      "learning_rate": 0.0005783041111571573,
      "loss": 3.0182,
      "step": 28063
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.656559944152832,
      "learning_rate": 0.0005783025838163235,
      "loss": 3.0872,
      "step": 28064
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5101358890533447,
      "learning_rate": 0.0005783010564237479,
      "loss": 2.6685,
      "step": 28065
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5054882764816284,
      "learning_rate": 0.0005782995289794308,
      "loss": 2.9684,
      "step": 28066
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.326920747756958,
      "learning_rate": 0.0005782980014833727,
      "loss": 2.9722,
      "step": 28067
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4999696016311646,
      "learning_rate": 0.0005782964739355737,
      "loss": 3.257,
      "step": 28068
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5323829650878906,
      "learning_rate": 0.0005782949463360341,
      "loss": 2.8915,
      "step": 28069
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4701794385910034,
      "learning_rate": 0.0005782934186847542,
      "loss": 3.1613,
      "step": 28070
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4979122877120972,
      "learning_rate": 0.0005782918909817342,
      "loss": 2.9649,
      "step": 28071
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.264942169189453,
      "learning_rate": 0.0005782903632269747,
      "loss": 2.9586,
      "step": 28072
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.765079140663147,
      "learning_rate": 0.0005782888354204757,
      "loss": 2.9896,
      "step": 28073
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.097463607788086,
      "learning_rate": 0.0005782873075622375,
      "loss": 3.1093,
      "step": 28074
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.990093231201172,
      "learning_rate": 0.0005782857796522606,
      "loss": 3.2137,
      "step": 28075
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7748438119888306,
      "learning_rate": 0.0005782842516905449,
      "loss": 2.902,
      "step": 28076
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9126096963882446,
      "learning_rate": 0.0005782827236770911,
      "loss": 2.9972,
      "step": 28077
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6338058710098267,
      "learning_rate": 0.0005782811956118991,
      "loss": 3.1308,
      "step": 28078
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.7258095741271973,
      "learning_rate": 0.0005782796674949695,
      "loss": 3.2299,
      "step": 28079
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.341508388519287,
      "learning_rate": 0.0005782781393263024,
      "loss": 3.0839,
      "step": 28080
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.501614809036255,
      "learning_rate": 0.0005782766111058982,
      "loss": 3.1514,
      "step": 28081
    },
    {
      "epoch": 0.37,
      "grad_norm": 4.267585754394531,
      "learning_rate": 0.000578275082833757,
      "loss": 3.2473,
      "step": 28082
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.195154905319214,
      "learning_rate": 0.0005782735545098793,
      "loss": 3.1163,
      "step": 28083
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.531471848487854,
      "learning_rate": 0.0005782720261342653,
      "loss": 3.0889,
      "step": 28084
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.787505865097046,
      "learning_rate": 0.0005782704977069154,
      "loss": 3.2704,
      "step": 28085
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.50968337059021,
      "learning_rate": 0.0005782689692278296,
      "loss": 3.1368,
      "step": 28086
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5664658546447754,
      "learning_rate": 0.0005782674406970083,
      "loss": 3.0942,
      "step": 28087
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3329583406448364,
      "learning_rate": 0.0005782659121144519,
      "loss": 3.0419,
      "step": 28088
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5682650804519653,
      "learning_rate": 0.0005782643834801607,
      "loss": 3.1771,
      "step": 28089
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0563321113586426,
      "learning_rate": 0.0005782628547941347,
      "loss": 3.091,
      "step": 28090
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3883639574050903,
      "learning_rate": 0.0005782613260563747,
      "loss": 3.2962,
      "step": 28091
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7457727193832397,
      "learning_rate": 0.0005782597972668804,
      "loss": 3.2184,
      "step": 28092
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6330498456954956,
      "learning_rate": 0.0005782582684256525,
      "loss": 3.3095,
      "step": 28093
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7664200067520142,
      "learning_rate": 0.0005782567395326909,
      "loss": 3.0292,
      "step": 28094
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.181053638458252,
      "learning_rate": 0.0005782552105879963,
      "loss": 3.0088,
      "step": 28095
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.837077021598816,
      "learning_rate": 0.0005782536815915688,
      "loss": 2.8913,
      "step": 28096
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1744468212127686,
      "learning_rate": 0.0005782521525434087,
      "loss": 3.1116,
      "step": 28097
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7443201541900635,
      "learning_rate": 0.0005782506234435162,
      "loss": 3.1857,
      "step": 28098
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.380066156387329,
      "learning_rate": 0.0005782490942918916,
      "loss": 2.9393,
      "step": 28099
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6110854148864746,
      "learning_rate": 0.0005782475650885355,
      "loss": 3.3318,
      "step": 28100
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.9839766025543213,
      "learning_rate": 0.0005782460358334477,
      "loss": 3.1342,
      "step": 28101
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4562904834747314,
      "learning_rate": 0.0005782445065266287,
      "loss": 2.9389,
      "step": 28102
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.811401128768921,
      "learning_rate": 0.000578242977168079,
      "loss": 3.0877,
      "step": 28103
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.206608295440674,
      "learning_rate": 0.0005782414477577986,
      "loss": 3.2328,
      "step": 28104
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.5789458751678467,
      "learning_rate": 0.0005782399182957876,
      "loss": 3.3575,
      "step": 28105
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0599889755249023,
      "learning_rate": 0.0005782383887820469,
      "loss": 2.8974,
      "step": 28106
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8006027936935425,
      "learning_rate": 0.0005782368592165762,
      "loss": 3.2365,
      "step": 28107
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.34389328956604,
      "learning_rate": 0.0005782353295993761,
      "loss": 3.1754,
      "step": 28108
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5445395708084106,
      "learning_rate": 0.0005782337999304468,
      "loss": 3.18,
      "step": 28109
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3828680515289307,
      "learning_rate": 0.0005782322702097886,
      "loss": 3.2688,
      "step": 28110
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4251313209533691,
      "learning_rate": 0.0005782307404374018,
      "loss": 3.069,
      "step": 28111
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.110952138900757,
      "learning_rate": 0.0005782292106132866,
      "loss": 3.2031,
      "step": 28112
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5749822854995728,
      "learning_rate": 0.0005782276807374434,
      "loss": 3.0319,
      "step": 28113
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.957763671875,
      "learning_rate": 0.0005782261508098723,
      "loss": 3.235,
      "step": 28114
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.903872489929199,
      "learning_rate": 0.0005782246208305737,
      "loss": 2.8976,
      "step": 28115
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.527921438217163,
      "learning_rate": 0.000578223090799548,
      "loss": 3.0438,
      "step": 28116
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5703372955322266,
      "learning_rate": 0.0005782215607167953,
      "loss": 3.0317,
      "step": 28117
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.31036114692688,
      "learning_rate": 0.0005782200305823161,
      "loss": 3.1122,
      "step": 28118
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4139182567596436,
      "learning_rate": 0.0005782185003961104,
      "loss": 2.7959,
      "step": 28119
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4814549684524536,
      "learning_rate": 0.0005782169701581787,
      "loss": 2.9559,
      "step": 28120
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6889220476150513,
      "learning_rate": 0.0005782154398685211,
      "loss": 3.0242,
      "step": 28121
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.504828453063965,
      "learning_rate": 0.0005782139095271381,
      "loss": 3.1361,
      "step": 28122
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.888389229774475,
      "learning_rate": 0.00057821237913403,
      "loss": 3.1062,
      "step": 28123
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6175556182861328,
      "learning_rate": 0.0005782108486891968,
      "loss": 3.0229,
      "step": 28124
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2566196918487549,
      "learning_rate": 0.000578209318192639,
      "loss": 2.9461,
      "step": 28125
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3479094505310059,
      "learning_rate": 0.0005782077876443568,
      "loss": 2.9143,
      "step": 28126
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.5227692127227783,
      "learning_rate": 0.0005782062570443505,
      "loss": 3.0264,
      "step": 28127
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6920925378799438,
      "learning_rate": 0.0005782047263926204,
      "loss": 3.2754,
      "step": 28128
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4096165895462036,
      "learning_rate": 0.000578203195689167,
      "loss": 3.0473,
      "step": 28129
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.5974488258361816,
      "learning_rate": 0.0005782016649339901,
      "loss": 2.909,
      "step": 28130
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.478442907333374,
      "learning_rate": 0.0005782001341270905,
      "loss": 3.0986,
      "step": 28131
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5153234004974365,
      "learning_rate": 0.0005781986032684681,
      "loss": 3.016,
      "step": 28132
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.774504542350769,
      "learning_rate": 0.0005781970723581233,
      "loss": 2.8808,
      "step": 28133
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7291088104248047,
      "learning_rate": 0.0005781955413960564,
      "loss": 3.0548,
      "step": 28134
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7520874738693237,
      "learning_rate": 0.0005781940103822677,
      "loss": 2.9931,
      "step": 28135
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2917338609695435,
      "learning_rate": 0.0005781924793167577,
      "loss": 3.0098,
      "step": 28136
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6468311548233032,
      "learning_rate": 0.0005781909481995263,
      "loss": 3.0024,
      "step": 28137
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1849985122680664,
      "learning_rate": 0.0005781894170305739,
      "loss": 3.2607,
      "step": 28138
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5088846683502197,
      "learning_rate": 0.0005781878858099009,
      "loss": 3.0707,
      "step": 28139
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8801618814468384,
      "learning_rate": 0.0005781863545375075,
      "loss": 3.3004,
      "step": 28140
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8590483665466309,
      "learning_rate": 0.000578184823213394,
      "loss": 2.9164,
      "step": 28141
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.648622751235962,
      "learning_rate": 0.0005781832918375608,
      "loss": 3.1211,
      "step": 28142
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5329785346984863,
      "learning_rate": 0.0005781817604100078,
      "loss": 3.1968,
      "step": 28143
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.534686803817749,
      "learning_rate": 0.0005781802289307359,
      "loss": 3.1883,
      "step": 28144
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3499759435653687,
      "learning_rate": 0.0005781786973997449,
      "loss": 3.071,
      "step": 28145
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5192468166351318,
      "learning_rate": 0.0005781771658170352,
      "loss": 3.081,
      "step": 28146
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7682645320892334,
      "learning_rate": 0.000578175634182607,
      "loss": 3.3488,
      "step": 28147
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.3940491676330566,
      "learning_rate": 0.0005781741024964608,
      "loss": 3.1152,
      "step": 28148
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.482751727104187,
      "learning_rate": 0.0005781725707585969,
      "loss": 2.8828,
      "step": 28149
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4283082485198975,
      "learning_rate": 0.0005781710389690153,
      "loss": 3.1931,
      "step": 28150
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.367267370223999,
      "learning_rate": 0.0005781695071277167,
      "loss": 2.9981,
      "step": 28151
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.707470417022705,
      "learning_rate": 0.0005781679752347008,
      "loss": 3.1276,
      "step": 28152
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3733643293380737,
      "learning_rate": 0.0005781664432899685,
      "loss": 3.1523,
      "step": 28153
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4269158840179443,
      "learning_rate": 0.0005781649112935197,
      "loss": 3.168,
      "step": 28154
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0910274982452393,
      "learning_rate": 0.0005781633792453549,
      "loss": 3.018,
      "step": 28155
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3391586542129517,
      "learning_rate": 0.0005781618471454741,
      "loss": 3.0771,
      "step": 28156
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.508619785308838,
      "learning_rate": 0.0005781603149938779,
      "loss": 3.1556,
      "step": 28157
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6168309450149536,
      "learning_rate": 0.0005781587827905663,
      "loss": 2.861,
      "step": 28158
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.03438401222229,
      "learning_rate": 0.0005781572505355398,
      "loss": 3.1052,
      "step": 28159
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3520267009735107,
      "learning_rate": 0.0005781557182287986,
      "loss": 3.0261,
      "step": 28160
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.3580663204193115,
      "learning_rate": 0.000578154185870343,
      "loss": 2.9292,
      "step": 28161
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8653420209884644,
      "learning_rate": 0.0005781526534601733,
      "loss": 3.2867,
      "step": 28162
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6344923973083496,
      "learning_rate": 0.0005781511209982898,
      "loss": 3.1682,
      "step": 28163
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6405001878738403,
      "learning_rate": 0.0005781495884846927,
      "loss": 2.896,
      "step": 28164
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.18906307220459,
      "learning_rate": 0.0005781480559193825,
      "loss": 3.2458,
      "step": 28165
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8925485610961914,
      "learning_rate": 0.0005781465233023592,
      "loss": 3.0376,
      "step": 28166
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.785567283630371,
      "learning_rate": 0.0005781449906336231,
      "loss": 3.1271,
      "step": 28167
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2941032648086548,
      "learning_rate": 0.0005781434579131749,
      "loss": 3.1442,
      "step": 28168
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9340065717697144,
      "learning_rate": 0.0005781419251410143,
      "loss": 3.153,
      "step": 28169
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3463172912597656,
      "learning_rate": 0.000578140392317142,
      "loss": 3.1254,
      "step": 28170
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.719056487083435,
      "learning_rate": 0.0005781388594415581,
      "loss": 3.1123,
      "step": 28171
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3275752067565918,
      "learning_rate": 0.0005781373265142629,
      "loss": 3.0906,
      "step": 28172
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.254732608795166,
      "learning_rate": 0.0005781357935352567,
      "loss": 3.1393,
      "step": 28173
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.448854923248291,
      "learning_rate": 0.00057813426050454,
      "loss": 3.1848,
      "step": 28174
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5571459531784058,
      "learning_rate": 0.0005781327274221128,
      "loss": 2.9971,
      "step": 28175
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7885228395462036,
      "learning_rate": 0.0005781311942879753,
      "loss": 2.8279,
      "step": 28176
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6008944511413574,
      "learning_rate": 0.0005781296611021282,
      "loss": 3.1947,
      "step": 28177
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.422107219696045,
      "learning_rate": 0.0005781281278645715,
      "loss": 2.8902,
      "step": 28178
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7060660123825073,
      "learning_rate": 0.0005781265945753054,
      "loss": 3.1375,
      "step": 28179
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.542608618736267,
      "learning_rate": 0.0005781250612343304,
      "loss": 3.0535,
      "step": 28180
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6249772310256958,
      "learning_rate": 0.0005781235278416467,
      "loss": 3.0522,
      "step": 28181
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6379321813583374,
      "learning_rate": 0.0005781219943972547,
      "loss": 3.3345,
      "step": 28182
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.155815362930298,
      "learning_rate": 0.0005781204609011544,
      "loss": 3.068,
      "step": 28183
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4952025413513184,
      "learning_rate": 0.0005781189273533462,
      "loss": 3.0585,
      "step": 28184
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4515972137451172,
      "learning_rate": 0.0005781173937538306,
      "loss": 3.0015,
      "step": 28185
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.624204397201538,
      "learning_rate": 0.0005781158601026078,
      "loss": 3.0811,
      "step": 28186
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4011049270629883,
      "learning_rate": 0.0005781143263996777,
      "loss": 3.1552,
      "step": 28187
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.590803861618042,
      "learning_rate": 0.0005781127926450412,
      "loss": 3.3263,
      "step": 28188
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.393054485321045,
      "learning_rate": 0.0005781112588386982,
      "loss": 2.9977,
      "step": 28189
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5871684551239014,
      "learning_rate": 0.000578109724980649,
      "loss": 3.1996,
      "step": 28190
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.5924253463745117,
      "learning_rate": 0.0005781081910708939,
      "loss": 2.9536,
      "step": 28191
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8510351181030273,
      "learning_rate": 0.0005781066571094334,
      "loss": 3.1129,
      "step": 28192
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5546073913574219,
      "learning_rate": 0.0005781051230962675,
      "loss": 3.3946,
      "step": 28193
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.434779405593872,
      "learning_rate": 0.0005781035890313967,
      "loss": 2.9318,
      "step": 28194
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.308201789855957,
      "learning_rate": 0.0005781020549148212,
      "loss": 3.1843,
      "step": 28195
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2929726839065552,
      "learning_rate": 0.0005781005207465412,
      "loss": 3.1554,
      "step": 28196
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5186352729797363,
      "learning_rate": 0.000578098986526557,
      "loss": 3.1665,
      "step": 28197
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.244243621826172,
      "learning_rate": 0.000578097452254869,
      "loss": 3.5294,
      "step": 28198
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5126498937606812,
      "learning_rate": 0.0005780959179314775,
      "loss": 2.8955,
      "step": 28199
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.273708701133728,
      "learning_rate": 0.0005780943835563828,
      "loss": 3.2612,
      "step": 28200
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0407140254974365,
      "learning_rate": 0.000578092849129585,
      "loss": 3.2088,
      "step": 28201
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1407806873321533,
      "learning_rate": 0.0005780913146510845,
      "loss": 3.2279,
      "step": 28202
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5285959243774414,
      "learning_rate": 0.0005780897801208815,
      "loss": 3.0244,
      "step": 28203
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1277927160263062,
      "learning_rate": 0.0005780882455389765,
      "loss": 3.376,
      "step": 28204
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4297157526016235,
      "learning_rate": 0.0005780867109053695,
      "loss": 3.1974,
      "step": 28205
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.480338454246521,
      "learning_rate": 0.0005780851762200611,
      "loss": 3.1603,
      "step": 28206
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.699501395225525,
      "learning_rate": 0.0005780836414830512,
      "loss": 3.0893,
      "step": 28207
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5606356859207153,
      "learning_rate": 0.0005780821066943405,
      "loss": 3.0013,
      "step": 28208
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8672407865524292,
      "learning_rate": 0.0005780805718539289,
      "loss": 3.1671,
      "step": 28209
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.552884817123413,
      "learning_rate": 0.0005780790369618171,
      "loss": 3.0792,
      "step": 28210
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4035162925720215,
      "learning_rate": 0.0005780775020180049,
      "loss": 3.1606,
      "step": 28211
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6364619731903076,
      "learning_rate": 0.0005780759670224931,
      "loss": 3.112,
      "step": 28212
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5129905939102173,
      "learning_rate": 0.0005780744319752816,
      "loss": 3.1941,
      "step": 28213
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6795132160186768,
      "learning_rate": 0.0005780728968763708,
      "loss": 3.1527,
      "step": 28214
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.464614987373352,
      "learning_rate": 0.0005780713617257611,
      "loss": 2.7558,
      "step": 28215
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.626845359802246,
      "learning_rate": 0.0005780698265234526,
      "loss": 3.3865,
      "step": 28216
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.466444969177246,
      "learning_rate": 0.0005780682912694457,
      "loss": 3.0111,
      "step": 28217
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6584031581878662,
      "learning_rate": 0.0005780667559637405,
      "loss": 2.8075,
      "step": 28218
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5549886226654053,
      "learning_rate": 0.0005780652206063376,
      "loss": 3.1047,
      "step": 28219
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8520660400390625,
      "learning_rate": 0.0005780636851972372,
      "loss": 3.0248,
      "step": 28220
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4132646322250366,
      "learning_rate": 0.0005780621497364393,
      "loss": 3.3107,
      "step": 28221
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5317045450210571,
      "learning_rate": 0.0005780606142239446,
      "loss": 3.1965,
      "step": 28222
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7235451936721802,
      "learning_rate": 0.0005780590786597531,
      "loss": 3.3273,
      "step": 28223
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8999954462051392,
      "learning_rate": 0.0005780575430438652,
      "loss": 3.1515,
      "step": 28224
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1152830123901367,
      "learning_rate": 0.0005780560073762811,
      "loss": 2.8225,
      "step": 28225
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.418373703956604,
      "learning_rate": 0.0005780544716570012,
      "loss": 3.115,
      "step": 28226
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7086108922958374,
      "learning_rate": 0.0005780529358860256,
      "loss": 3.0343,
      "step": 28227
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6213535070419312,
      "learning_rate": 0.0005780514000633547,
      "loss": 3.0397,
      "step": 28228
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.578697919845581,
      "learning_rate": 0.0005780498641889889,
      "loss": 2.9242,
      "step": 28229
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5353654623031616,
      "learning_rate": 0.0005780483282629285,
      "loss": 3.0039,
      "step": 28230
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9145337343215942,
      "learning_rate": 0.0005780467922851735,
      "loss": 3.1116,
      "step": 28231
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.737642765045166,
      "learning_rate": 0.0005780452562557243,
      "loss": 3.0321,
      "step": 28232
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7274197340011597,
      "learning_rate": 0.0005780437201745814,
      "loss": 2.9053,
      "step": 28233
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3463106155395508,
      "learning_rate": 0.0005780421840417449,
      "loss": 3.2207,
      "step": 28234
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5435172319412231,
      "learning_rate": 0.0005780406478572151,
      "loss": 2.9744,
      "step": 28235
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7887489795684814,
      "learning_rate": 0.0005780391116209921,
      "loss": 3.0347,
      "step": 28236
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5315732955932617,
      "learning_rate": 0.0005780375753330766,
      "loss": 2.9169,
      "step": 28237
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4120533466339111,
      "learning_rate": 0.0005780360389934687,
      "loss": 3.2912,
      "step": 28238
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6582410335540771,
      "learning_rate": 0.0005780345026021685,
      "loss": 3.0037,
      "step": 28239
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3565351963043213,
      "learning_rate": 0.0005780329661591765,
      "loss": 3.1116,
      "step": 28240
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8722217082977295,
      "learning_rate": 0.0005780314296644929,
      "loss": 3.1957,
      "step": 28241
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5403209924697876,
      "learning_rate": 0.0005780298931181181,
      "loss": 3.0116,
      "step": 28242
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5651041269302368,
      "learning_rate": 0.0005780283565200522,
      "loss": 3.0104,
      "step": 28243
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.761110782623291,
      "learning_rate": 0.0005780268198702955,
      "loss": 2.8306,
      "step": 28244
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0330934524536133,
      "learning_rate": 0.0005780252831688485,
      "loss": 2.9478,
      "step": 28245
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5642417669296265,
      "learning_rate": 0.0005780237464157113,
      "loss": 3.022,
      "step": 28246
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.676311731338501,
      "learning_rate": 0.0005780222096108842,
      "loss": 2.8929,
      "step": 28247
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8330248594284058,
      "learning_rate": 0.0005780206727543677,
      "loss": 2.8903,
      "step": 28248
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7181854248046875,
      "learning_rate": 0.0005780191358461617,
      "loss": 2.9704,
      "step": 28249
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5413694381713867,
      "learning_rate": 0.0005780175988862668,
      "loss": 3.1858,
      "step": 28250
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1041080951690674,
      "learning_rate": 0.0005780160618746832,
      "loss": 3.2109,
      "step": 28251
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.478883981704712,
      "learning_rate": 0.0005780145248114111,
      "loss": 3.208,
      "step": 28252
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5260685682296753,
      "learning_rate": 0.0005780129876964509,
      "loss": 3.0727,
      "step": 28253
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8512840270996094,
      "learning_rate": 0.0005780114505298028,
      "loss": 3.2223,
      "step": 28254
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.320690393447876,
      "learning_rate": 0.0005780099133114671,
      "loss": 2.9362,
      "step": 28255
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6210519075393677,
      "learning_rate": 0.0005780083760414442,
      "loss": 3.0416,
      "step": 28256
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0801072120666504,
      "learning_rate": 0.0005780068387197342,
      "loss": 3.0529,
      "step": 28257
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3584953546524048,
      "learning_rate": 0.0005780053013463377,
      "loss": 3.2293,
      "step": 28258
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.630149483680725,
      "learning_rate": 0.0005780037639212545,
      "loss": 2.8728,
      "step": 28259
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4823367595672607,
      "learning_rate": 0.0005780022264444853,
      "loss": 3.1306,
      "step": 28260
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6592717170715332,
      "learning_rate": 0.0005780006889160301,
      "loss": 3.0735,
      "step": 28261
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3306832313537598,
      "learning_rate": 0.0005779991513358894,
      "loss": 3.1375,
      "step": 28262
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8516318798065186,
      "learning_rate": 0.0005779976137040635,
      "loss": 2.9851,
      "step": 28263
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.759849786758423,
      "learning_rate": 0.0005779960760205525,
      "loss": 2.9775,
      "step": 28264
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6073760986328125,
      "learning_rate": 0.0005779945382853568,
      "loss": 3.1131,
      "step": 28265
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7376469373703003,
      "learning_rate": 0.0005779930004984766,
      "loss": 3.2105,
      "step": 28266
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4541856050491333,
      "learning_rate": 0.0005779914626599124,
      "loss": 3.1984,
      "step": 28267
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7590389251708984,
      "learning_rate": 0.0005779899247696642,
      "loss": 3.1028,
      "step": 28268
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5623425245285034,
      "learning_rate": 0.0005779883868277326,
      "loss": 3.1743,
      "step": 28269
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.10154128074646,
      "learning_rate": 0.0005779868488341175,
      "loss": 2.9824,
      "step": 28270
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0313894748687744,
      "learning_rate": 0.0005779853107888195,
      "loss": 2.9211,
      "step": 28271
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.653788447380066,
      "learning_rate": 0.0005779837726918388,
      "loss": 3.1722,
      "step": 28272
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.1528472900390625,
      "learning_rate": 0.0005779822345431756,
      "loss": 2.9287,
      "step": 28273
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.3674795627593994,
      "learning_rate": 0.0005779806963428303,
      "loss": 2.9995,
      "step": 28274
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8368507623672485,
      "learning_rate": 0.0005779791580908032,
      "loss": 3.0417,
      "step": 28275
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4351325035095215,
      "learning_rate": 0.0005779776197870944,
      "loss": 3.3013,
      "step": 28276
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.989903688430786,
      "learning_rate": 0.0005779760814317044,
      "loss": 3.1623,
      "step": 28277
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7751048803329468,
      "learning_rate": 0.0005779745430246334,
      "loss": 3.1692,
      "step": 28278
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.0778372287750244,
      "learning_rate": 0.0005779730045658817,
      "loss": 2.8603,
      "step": 28279
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.039314031600952,
      "learning_rate": 0.0005779714660554495,
      "loss": 3.1082,
      "step": 28280
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9623222351074219,
      "learning_rate": 0.0005779699274933372,
      "loss": 3.1275,
      "step": 28281
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6959604024887085,
      "learning_rate": 0.000577968388879545,
      "loss": 3.2055,
      "step": 28282
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4642051458358765,
      "learning_rate": 0.0005779668502140733,
      "loss": 2.8975,
      "step": 28283
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.900840163230896,
      "learning_rate": 0.0005779653114969223,
      "loss": 2.7625,
      "step": 28284
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.369593620300293,
      "learning_rate": 0.0005779637727280924,
      "loss": 3.0853,
      "step": 28285
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4145525693893433,
      "learning_rate": 0.0005779622339075835,
      "loss": 2.9881,
      "step": 28286
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8202985525131226,
      "learning_rate": 0.0005779606950353964,
      "loss": 3.178,
      "step": 28287
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.381324291229248,
      "learning_rate": 0.0005779591561115311,
      "loss": 3.0062,
      "step": 28288
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5250335931777954,
      "learning_rate": 0.000577957617135988,
      "loss": 3.2387,
      "step": 28289
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8361799716949463,
      "learning_rate": 0.0005779560781087673,
      "loss": 3.2149,
      "step": 28290
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3233568668365479,
      "learning_rate": 0.0005779545390298692,
      "loss": 3.214,
      "step": 28291
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4068964719772339,
      "learning_rate": 0.0005779529998992942,
      "loss": 3.1477,
      "step": 28292
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4180572032928467,
      "learning_rate": 0.0005779514607170426,
      "loss": 3.0655,
      "step": 28293
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4605669975280762,
      "learning_rate": 0.0005779499214831143,
      "loss": 2.9945,
      "step": 28294
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5321012735366821,
      "learning_rate": 0.0005779483821975102,
      "loss": 2.9288,
      "step": 28295
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4317549467086792,
      "learning_rate": 0.00057794684286023,
      "loss": 3.089,
      "step": 28296
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7810686826705933,
      "learning_rate": 0.0005779453034712744,
      "loss": 2.9919,
      "step": 28297
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.286922574043274,
      "learning_rate": 0.0005779437640306435,
      "loss": 2.9265,
      "step": 28298
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.429919958114624,
      "learning_rate": 0.0005779422245383375,
      "loss": 2.8637,
      "step": 28299
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4166128635406494,
      "learning_rate": 0.0005779406849943568,
      "loss": 3.0636,
      "step": 28300
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.462590217590332,
      "learning_rate": 0.0005779391453987017,
      "loss": 3.1188,
      "step": 28301
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.121713399887085,
      "learning_rate": 0.0005779376057513725,
      "loss": 2.9026,
      "step": 28302
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4011727571487427,
      "learning_rate": 0.0005779360660523696,
      "loss": 3.149,
      "step": 28303
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3245693445205688,
      "learning_rate": 0.0005779345263016929,
      "loss": 3.11,
      "step": 28304
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.988987684249878,
      "learning_rate": 0.0005779329864993429,
      "loss": 3.2081,
      "step": 28305
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3829901218414307,
      "learning_rate": 0.0005779314466453201,
      "loss": 3.168,
      "step": 28306
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4479618072509766,
      "learning_rate": 0.0005779299067396245,
      "loss": 3.177,
      "step": 28307
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.9682044982910156,
      "learning_rate": 0.0005779283667822565,
      "loss": 3.0545,
      "step": 28308
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.975450873374939,
      "learning_rate": 0.0005779268267732165,
      "loss": 3.2788,
      "step": 28309
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.297369956970215,
      "learning_rate": 0.0005779252867125044,
      "loss": 3.245,
      "step": 28310
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.8115198612213135,
      "learning_rate": 0.000577923746600121,
      "loss": 2.8885,
      "step": 28311
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4398707151412964,
      "learning_rate": 0.0005779222064360661,
      "loss": 3.0299,
      "step": 28312
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8854295015335083,
      "learning_rate": 0.0005779206662203404,
      "loss": 3.2535,
      "step": 28313
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.495434284210205,
      "learning_rate": 0.0005779191259529438,
      "loss": 3.1634,
      "step": 28314
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6307824850082397,
      "learning_rate": 0.000577917585633877,
      "loss": 3.2776,
      "step": 28315
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5125080347061157,
      "learning_rate": 0.00057791604526314,
      "loss": 2.9775,
      "step": 28316
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2898807525634766,
      "learning_rate": 0.000577914504840733,
      "loss": 3.1386,
      "step": 28317
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3689309358596802,
      "learning_rate": 0.0005779129643666567,
      "loss": 2.9488,
      "step": 28318
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5312366485595703,
      "learning_rate": 0.000577911423840911,
      "loss": 3.0126,
      "step": 28319
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9280636310577393,
      "learning_rate": 0.0005779098832634963,
      "loss": 3.1394,
      "step": 28320
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8138959407806396,
      "learning_rate": 0.000577908342634413,
      "loss": 3.1578,
      "step": 28321
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7392616271972656,
      "learning_rate": 0.0005779068019536612,
      "loss": 3.3615,
      "step": 28322
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8891299962997437,
      "learning_rate": 0.0005779052612212413,
      "loss": 3.0986,
      "step": 28323
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3712867498397827,
      "learning_rate": 0.0005779037204371537,
      "loss": 3.259,
      "step": 28324
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.736623764038086,
      "learning_rate": 0.0005779021796013985,
      "loss": 3.2293,
      "step": 28325
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.728742003440857,
      "learning_rate": 0.0005779006387139758,
      "loss": 3.1973,
      "step": 28326
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2587467432022095,
      "learning_rate": 0.0005778990977748864,
      "loss": 3.1101,
      "step": 28327
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8914191722869873,
      "learning_rate": 0.0005778975567841303,
      "loss": 3.0923,
      "step": 28328
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5934107303619385,
      "learning_rate": 0.0005778960157417076,
      "loss": 3.1116,
      "step": 28329
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.145956039428711,
      "learning_rate": 0.0005778944746476189,
      "loss": 3.0835,
      "step": 28330
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.580249309539795,
      "learning_rate": 0.0005778929335018644,
      "loss": 3.0246,
      "step": 28331
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4581562280654907,
      "learning_rate": 0.0005778913923044443,
      "loss": 3.2861,
      "step": 28332
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4509832859039307,
      "learning_rate": 0.000577889851055359,
      "loss": 2.9715,
      "step": 28333
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.6014487743377686,
      "learning_rate": 0.0005778883097546087,
      "loss": 3.1083,
      "step": 28334
    },
    {
      "epoch": 0.37,
      "grad_norm": 4.383281707763672,
      "learning_rate": 0.0005778867684021938,
      "loss": 2.8841,
      "step": 28335
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6048697233200073,
      "learning_rate": 0.0005778852269981143,
      "loss": 3.0566,
      "step": 28336
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.6975080966949463,
      "learning_rate": 0.0005778836855423709,
      "loss": 3.0396,
      "step": 28337
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.082368850708008,
      "learning_rate": 0.0005778821440349636,
      "loss": 3.3011,
      "step": 28338
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8140844106674194,
      "learning_rate": 0.0005778806024758928,
      "loss": 3.1405,
      "step": 28339
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8651323318481445,
      "learning_rate": 0.0005778790608651586,
      "loss": 3.1894,
      "step": 28340
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.2460227012634277,
      "learning_rate": 0.0005778775192027616,
      "loss": 3.0991,
      "step": 28341
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4278773069381714,
      "learning_rate": 0.0005778759774887019,
      "loss": 3.2342,
      "step": 28342
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4391587972640991,
      "learning_rate": 0.0005778744357229798,
      "loss": 3.1124,
      "step": 28343
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6499137878417969,
      "learning_rate": 0.0005778728939055956,
      "loss": 2.9185,
      "step": 28344
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9455126523971558,
      "learning_rate": 0.0005778713520365495,
      "loss": 3.0953,
      "step": 28345
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5831034183502197,
      "learning_rate": 0.0005778698101158419,
      "loss": 2.9896,
      "step": 28346
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8557629585266113,
      "learning_rate": 0.0005778682681434732,
      "loss": 3.0243,
      "step": 28347
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5440740585327148,
      "learning_rate": 0.0005778667261194434,
      "loss": 3.1206,
      "step": 28348
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4684370756149292,
      "learning_rate": 0.0005778651840437529,
      "loss": 2.7833,
      "step": 28349
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8601592779159546,
      "learning_rate": 0.0005778636419164022,
      "loss": 3.1041,
      "step": 28350
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.338832974433899,
      "learning_rate": 0.0005778620997373913,
      "loss": 3.2212,
      "step": 28351
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5544312000274658,
      "learning_rate": 0.0005778605575067205,
      "loss": 2.9598,
      "step": 28352
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.97524094581604,
      "learning_rate": 0.0005778590152243902,
      "loss": 3.0595,
      "step": 28353
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7328555583953857,
      "learning_rate": 0.0005778574728904008,
      "loss": 3.1069,
      "step": 28354
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.395738124847412,
      "learning_rate": 0.0005778559305047523,
      "loss": 3.3757,
      "step": 28355
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0587117671966553,
      "learning_rate": 0.0005778543880674453,
      "loss": 2.9645,
      "step": 28356
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4274423122406006,
      "learning_rate": 0.0005778528455784797,
      "loss": 3.0058,
      "step": 28357
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5062391757965088,
      "learning_rate": 0.0005778513030378562,
      "loss": 2.9652,
      "step": 28358
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6871048212051392,
      "learning_rate": 0.0005778497604455749,
      "loss": 3.1078,
      "step": 28359
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6235153675079346,
      "learning_rate": 0.0005778482178016359,
      "loss": 2.8837,
      "step": 28360
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3749593496322632,
      "learning_rate": 0.0005778466751060399,
      "loss": 2.8105,
      "step": 28361
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1509366035461426,
      "learning_rate": 0.0005778451323587869,
      "loss": 3.1783,
      "step": 28362
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.73878014087677,
      "learning_rate": 0.0005778435895598772,
      "loss": 3.057,
      "step": 28363
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8932240009307861,
      "learning_rate": 0.0005778420467093111,
      "loss": 3.058,
      "step": 28364
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.6884143352508545,
      "learning_rate": 0.000577840503807089,
      "loss": 2.9647,
      "step": 28365
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.441527843475342,
      "learning_rate": 0.000577838960853211,
      "loss": 3.2865,
      "step": 28366
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.5041069984436035,
      "learning_rate": 0.0005778374178476775,
      "loss": 3.2343,
      "step": 28367
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6865649223327637,
      "learning_rate": 0.0005778358747904889,
      "loss": 2.9337,
      "step": 28368
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0936334133148193,
      "learning_rate": 0.0005778343316816453,
      "loss": 2.8913,
      "step": 28369
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.708974838256836,
      "learning_rate": 0.0005778327885211469,
      "loss": 2.8731,
      "step": 28370
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.5039775371551514,
      "learning_rate": 0.0005778312453089944,
      "loss": 3.3045,
      "step": 28371
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.485154390335083,
      "learning_rate": 0.0005778297020451878,
      "loss": 3.0394,
      "step": 28372
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4128957986831665,
      "learning_rate": 0.0005778281587297272,
      "loss": 3.1959,
      "step": 28373
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0191752910614014,
      "learning_rate": 0.0005778266153626133,
      "loss": 3.2109,
      "step": 28374
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.72109854221344,
      "learning_rate": 0.0005778250719438461,
      "loss": 3.0113,
      "step": 28375
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4059475660324097,
      "learning_rate": 0.0005778235284734259,
      "loss": 2.9977,
      "step": 28376
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.964127540588379,
      "learning_rate": 0.0005778219849513533,
      "loss": 2.9895,
      "step": 28377
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8168562650680542,
      "learning_rate": 0.0005778204413776282,
      "loss": 3.0677,
      "step": 28378
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4368432760238647,
      "learning_rate": 0.000577818897752251,
      "loss": 3.1011,
      "step": 28379
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6251442432403564,
      "learning_rate": 0.000577817354075222,
      "loss": 3.1451,
      "step": 28380
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0736513137817383,
      "learning_rate": 0.0005778158103465417,
      "loss": 3.1569,
      "step": 28381
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5775600671768188,
      "learning_rate": 0.00057781426656621,
      "loss": 2.9906,
      "step": 28382
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7700674533843994,
      "learning_rate": 0.0005778127227342274,
      "loss": 3.2202,
      "step": 28383
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.8113839626312256,
      "learning_rate": 0.0005778111788505943,
      "loss": 2.988,
      "step": 28384
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.472506523132324,
      "learning_rate": 0.0005778096349153107,
      "loss": 3.2935,
      "step": 28385
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0944437980651855,
      "learning_rate": 0.0005778080909283772,
      "loss": 3.0954,
      "step": 28386
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1450412273406982,
      "learning_rate": 0.0005778065468897939,
      "loss": 3.1116,
      "step": 28387
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9617068767547607,
      "learning_rate": 0.000577805002799561,
      "loss": 2.8076,
      "step": 28388
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3465657234191895,
      "learning_rate": 0.000577803458657679,
      "loss": 3.2716,
      "step": 28389
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.241434931755066,
      "learning_rate": 0.0005778019144641481,
      "loss": 3.3342,
      "step": 28390
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6792365312576294,
      "learning_rate": 0.0005778003702189685,
      "loss": 2.9263,
      "step": 28391
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9687649011611938,
      "learning_rate": 0.0005777988259221407,
      "loss": 3.0599,
      "step": 28392
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.210146427154541,
      "learning_rate": 0.0005777972815736648,
      "loss": 2.8384,
      "step": 28393
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.011014938354492,
      "learning_rate": 0.0005777957371735412,
      "loss": 3.0989,
      "step": 28394
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9647499322891235,
      "learning_rate": 0.0005777941927217699,
      "loss": 2.8544,
      "step": 28395
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3296234607696533,
      "learning_rate": 0.0005777926482183516,
      "loss": 3.0391,
      "step": 28396
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.846846580505371,
      "learning_rate": 0.0005777911036632863,
      "loss": 3.0719,
      "step": 28397
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1837754249572754,
      "learning_rate": 0.0005777895590565746,
      "loss": 3.1321,
      "step": 28398
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4222842454910278,
      "learning_rate": 0.0005777880143982163,
      "loss": 3.3704,
      "step": 28399
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8017804622650146,
      "learning_rate": 0.0005777864696882121,
      "loss": 3.2901,
      "step": 28400
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.492382526397705,
      "learning_rate": 0.0005777849249265621,
      "loss": 3.1272,
      "step": 28401
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4530295133590698,
      "learning_rate": 0.0005777833801132667,
      "loss": 3.1762,
      "step": 28402
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1019959449768066,
      "learning_rate": 0.0005777818352483262,
      "loss": 3.0755,
      "step": 28403
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4748287200927734,
      "learning_rate": 0.0005777802903317407,
      "loss": 3.1778,
      "step": 28404
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.393619179725647,
      "learning_rate": 0.0005777787453635106,
      "loss": 2.9361,
      "step": 28405
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.885486364364624,
      "learning_rate": 0.0005777772003436362,
      "loss": 3.3201,
      "step": 28406
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.723003625869751,
      "learning_rate": 0.0005777756552721179,
      "loss": 2.9108,
      "step": 28407
    },
    {
      "epoch": 0.37,
      "grad_norm": 4.879570484161377,
      "learning_rate": 0.0005777741101489557,
      "loss": 3.1512,
      "step": 28408
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.727937936782837,
      "learning_rate": 0.0005777725649741501,
      "loss": 3.3158,
      "step": 28409
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.053009271621704,
      "learning_rate": 0.0005777710197477013,
      "loss": 3.0772,
      "step": 28410
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3091819286346436,
      "learning_rate": 0.0005777694744696098,
      "loss": 3.0282,
      "step": 28411
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6658345460891724,
      "learning_rate": 0.0005777679291398755,
      "loss": 3.0353,
      "step": 28412
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.573249340057373,
      "learning_rate": 0.000577766383758499,
      "loss": 3.1263,
      "step": 28413
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3912400007247925,
      "learning_rate": 0.0005777648383254805,
      "loss": 3.1514,
      "step": 28414
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5812201499938965,
      "learning_rate": 0.0005777632928408202,
      "loss": 2.8987,
      "step": 28415
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4264867305755615,
      "learning_rate": 0.0005777617473045185,
      "loss": 3.281,
      "step": 28416
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.43509840965271,
      "learning_rate": 0.0005777602017165756,
      "loss": 2.8101,
      "step": 28417
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5660009384155273,
      "learning_rate": 0.0005777586560769919,
      "loss": 3.2457,
      "step": 28418
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9293094873428345,
      "learning_rate": 0.0005777571103857676,
      "loss": 3.0588,
      "step": 28419
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6146007776260376,
      "learning_rate": 0.000577755564642903,
      "loss": 2.9735,
      "step": 28420
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6036450862884521,
      "learning_rate": 0.0005777540188483984,
      "loss": 3.2075,
      "step": 28421
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.447538375854492,
      "learning_rate": 0.0005777524730022541,
      "loss": 3.1545,
      "step": 28422
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4260225296020508,
      "learning_rate": 0.0005777509271044704,
      "loss": 3.2458,
      "step": 28423
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.199988603591919,
      "learning_rate": 0.0005777493811550474,
      "loss": 3.1884,
      "step": 28424
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.40028977394104,
      "learning_rate": 0.0005777478351539858,
      "loss": 3.0656,
      "step": 28425
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4357601404190063,
      "learning_rate": 0.0005777462891012855,
      "loss": 2.8789,
      "step": 28426
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9876224994659424,
      "learning_rate": 0.0005777447429969469,
      "loss": 2.9853,
      "step": 28427
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.304834246635437,
      "learning_rate": 0.0005777431968409703,
      "loss": 3.2415,
      "step": 28428
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7165309190750122,
      "learning_rate": 0.0005777416506333561,
      "loss": 2.932,
      "step": 28429
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.401228904724121,
      "learning_rate": 0.0005777401043741044,
      "loss": 3.0927,
      "step": 28430
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3431164026260376,
      "learning_rate": 0.0005777385580632156,
      "loss": 3.2663,
      "step": 28431
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6487789154052734,
      "learning_rate": 0.0005777370117006899,
      "loss": 3.0779,
      "step": 28432
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6893686056137085,
      "learning_rate": 0.0005777354652865277,
      "loss": 2.9851,
      "step": 28433
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.1703108549118042,
      "learning_rate": 0.0005777339188207293,
      "loss": 3.0807,
      "step": 28434
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5227619409561157,
      "learning_rate": 0.0005777323723032948,
      "loss": 2.9438,
      "step": 28435
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4802467823028564,
      "learning_rate": 0.0005777308257342246,
      "loss": 3.0504,
      "step": 28436
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6937192678451538,
      "learning_rate": 0.0005777292791135192,
      "loss": 3.1415,
      "step": 28437
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.458856463432312,
      "learning_rate": 0.0005777277324411784,
      "loss": 3.0259,
      "step": 28438
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.433309555053711,
      "learning_rate": 0.000577726185717203,
      "loss": 3.1002,
      "step": 28439
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6405136585235596,
      "learning_rate": 0.000577724638941593,
      "loss": 2.8696,
      "step": 28440
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4671194553375244,
      "learning_rate": 0.0005777230921143487,
      "loss": 2.9123,
      "step": 28441
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7127597332000732,
      "learning_rate": 0.0005777215452354705,
      "loss": 3.1149,
      "step": 28442
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6272627115249634,
      "learning_rate": 0.0005777199983049585,
      "loss": 3.0028,
      "step": 28443
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7199138402938843,
      "learning_rate": 0.0005777184513228133,
      "loss": 3.0297,
      "step": 28444
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5024199485778809,
      "learning_rate": 0.0005777169042890349,
      "loss": 3.4316,
      "step": 28445
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3988003730773926,
      "learning_rate": 0.0005777153572036236,
      "loss": 3.1152,
      "step": 28446
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5951642990112305,
      "learning_rate": 0.0005777138100665799,
      "loss": 2.892,
      "step": 28447
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6812800168991089,
      "learning_rate": 0.0005777122628779039,
      "loss": 3.0515,
      "step": 28448
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.6762335300445557,
      "learning_rate": 0.0005777107156375959,
      "loss": 3.1058,
      "step": 28449
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8632093667984009,
      "learning_rate": 0.0005777091683456562,
      "loss": 2.953,
      "step": 28450
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.2870676517486572,
      "learning_rate": 0.0005777076210020853,
      "loss": 3.0344,
      "step": 28451
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4123172760009766,
      "learning_rate": 0.0005777060736068833,
      "loss": 3.1395,
      "step": 28452
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4713521003723145,
      "learning_rate": 0.0005777045261600504,
      "loss": 2.9483,
      "step": 28453
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.509143352508545,
      "learning_rate": 0.000577702978661587,
      "loss": 3.1961,
      "step": 28454
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.6062734127044678,
      "learning_rate": 0.0005777014311114933,
      "loss": 3.1524,
      "step": 28455
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.9627373218536377,
      "learning_rate": 0.0005776998835097698,
      "loss": 3.1375,
      "step": 28456
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.7633309364318848,
      "learning_rate": 0.0005776983358564166,
      "loss": 3.0994,
      "step": 28457
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6008325815200806,
      "learning_rate": 0.000577696788151434,
      "loss": 3.0905,
      "step": 28458
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.235072135925293,
      "learning_rate": 0.0005776952403948225,
      "loss": 2.8035,
      "step": 28459
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.7300729751586914,
      "learning_rate": 0.0005776936925865819,
      "loss": 2.9663,
      "step": 28460
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1666440963745117,
      "learning_rate": 0.000577692144726713,
      "loss": 2.9176,
      "step": 28461
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5183863639831543,
      "learning_rate": 0.0005776905968152159,
      "loss": 2.8707,
      "step": 28462
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4556708335876465,
      "learning_rate": 0.0005776890488520908,
      "loss": 3.1588,
      "step": 28463
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0728468894958496,
      "learning_rate": 0.0005776875008373382,
      "loss": 3.0557,
      "step": 28464
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9014323949813843,
      "learning_rate": 0.0005776859527709581,
      "loss": 3.1392,
      "step": 28465
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.5424742698669434,
      "learning_rate": 0.000577684404652951,
      "loss": 3.0025,
      "step": 28466
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6108059883117676,
      "learning_rate": 0.000577682856483317,
      "loss": 3.1129,
      "step": 28467
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1855897903442383,
      "learning_rate": 0.0005776813082620568,
      "loss": 2.9732,
      "step": 28468
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.75137460231781,
      "learning_rate": 0.0005776797599891702,
      "loss": 3.0079,
      "step": 28469
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6252144575119019,
      "learning_rate": 0.0005776782116646577,
      "loss": 3.058,
      "step": 28470
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6908276081085205,
      "learning_rate": 0.0005776766632885197,
      "loss": 2.9013,
      "step": 28471
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.589026689529419,
      "learning_rate": 0.0005776751148607563,
      "loss": 3.011,
      "step": 28472
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3467984199523926,
      "learning_rate": 0.0005776735663813678,
      "loss": 3.2487,
      "step": 28473
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3459091186523438,
      "learning_rate": 0.0005776720178503544,
      "loss": 3.2623,
      "step": 28474
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7805800437927246,
      "learning_rate": 0.0005776704692677168,
      "loss": 3.2962,
      "step": 28475
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9409316778182983,
      "learning_rate": 0.0005776689206334549,
      "loss": 3.3183,
      "step": 28476
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3752858638763428,
      "learning_rate": 0.0005776673719475692,
      "loss": 3.2356,
      "step": 28477
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.5435287952423096,
      "learning_rate": 0.0005776658232100598,
      "loss": 3.0356,
      "step": 28478
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4522485733032227,
      "learning_rate": 0.000577664274420927,
      "loss": 3.1651,
      "step": 28479
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.8155078887939453,
      "learning_rate": 0.0005776627255801713,
      "loss": 2.9614,
      "step": 28480
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0769782066345215,
      "learning_rate": 0.0005776611766877929,
      "loss": 3.2594,
      "step": 28481
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6458922624588013,
      "learning_rate": 0.000577659627743792,
      "loss": 3.1346,
      "step": 28482
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4122202396392822,
      "learning_rate": 0.0005776580787481689,
      "loss": 3.2556,
      "step": 28483
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4994663000106812,
      "learning_rate": 0.0005776565297009239,
      "loss": 2.9895,
      "step": 28484
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4073328971862793,
      "learning_rate": 0.0005776549806020574,
      "loss": 3.0504,
      "step": 28485
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5728514194488525,
      "learning_rate": 0.0005776534314515695,
      "loss": 3.0437,
      "step": 28486
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.319661259651184,
      "learning_rate": 0.0005776518822494607,
      "loss": 3.121,
      "step": 28487
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.686840534210205,
      "learning_rate": 0.0005776503329957311,
      "loss": 3.1361,
      "step": 28488
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3722646236419678,
      "learning_rate": 0.000577648783690381,
      "loss": 2.8043,
      "step": 28489
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4879565238952637,
      "learning_rate": 0.0005776472343334108,
      "loss": 3.1943,
      "step": 28490
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.318576455116272,
      "learning_rate": 0.0005776456849248207,
      "loss": 3.0359,
      "step": 28491
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4054346084594727,
      "learning_rate": 0.0005776441354646112,
      "loss": 3.1199,
      "step": 28492
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3717585802078247,
      "learning_rate": 0.0005776425859527822,
      "loss": 2.8331,
      "step": 28493
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0435428619384766,
      "learning_rate": 0.0005776410363893344,
      "loss": 2.827,
      "step": 28494
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.7271552085876465,
      "learning_rate": 0.0005776394867742677,
      "loss": 3.3747,
      "step": 28495
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3605166673660278,
      "learning_rate": 0.0005776379371075827,
      "loss": 2.8899,
      "step": 28496
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3495988845825195,
      "learning_rate": 0.0005776363873892795,
      "loss": 3.0973,
      "step": 28497
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.3114306926727295,
      "learning_rate": 0.0005776348376193585,
      "loss": 2.9821,
      "step": 28498
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.796524167060852,
      "learning_rate": 0.0005776332877978199,
      "loss": 3.0657,
      "step": 28499
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5780271291732788,
      "learning_rate": 0.0005776317379246641,
      "loss": 3.1816,
      "step": 28500
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.3578133583068848,
      "learning_rate": 0.0005776301879998912,
      "loss": 2.832,
      "step": 28501
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0824363231658936,
      "learning_rate": 0.0005776286380235017,
      "loss": 3.0179,
      "step": 28502
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.512986660003662,
      "learning_rate": 0.0005776270879954958,
      "loss": 3.2366,
      "step": 28503
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.1662354469299316,
      "learning_rate": 0.0005776255379158737,
      "loss": 3.2501,
      "step": 28504
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8483126163482666,
      "learning_rate": 0.000577623987784636,
      "loss": 3.1614,
      "step": 28505
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3558619022369385,
      "learning_rate": 0.0005776224376017825,
      "loss": 3.1654,
      "step": 28506
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.300135850906372,
      "learning_rate": 0.0005776208873673139,
      "loss": 3.0072,
      "step": 28507
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7416937351226807,
      "learning_rate": 0.0005776193370812303,
      "loss": 3.4181,
      "step": 28508
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4733788967132568,
      "learning_rate": 0.000577617786743532,
      "loss": 2.9001,
      "step": 28509
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4009076356887817,
      "learning_rate": 0.0005776162363542193,
      "loss": 3.038,
      "step": 28510
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4521777629852295,
      "learning_rate": 0.0005776146859132926,
      "loss": 3.1231,
      "step": 28511
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.5242748260498047,
      "learning_rate": 0.0005776131354207519,
      "loss": 3.1034,
      "step": 28512
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4418435096740723,
      "learning_rate": 0.0005776115848765978,
      "loss": 3.2261,
      "step": 28513
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7665512561798096,
      "learning_rate": 0.0005776100342808303,
      "loss": 3.0982,
      "step": 28514
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.904789686203003,
      "learning_rate": 0.0005776084836334502,
      "loss": 2.8533,
      "step": 28515
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0799777507781982,
      "learning_rate": 0.0005776069329344572,
      "loss": 3.2929,
      "step": 28516
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4076594114303589,
      "learning_rate": 0.0005776053821838519,
      "loss": 2.8969,
      "step": 28517
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9813199043273926,
      "learning_rate": 0.0005776038313816345,
      "loss": 3.0199,
      "step": 28518
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.3889026641845703,
      "learning_rate": 0.0005776022805278053,
      "loss": 2.9393,
      "step": 28519
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5949240922927856,
      "learning_rate": 0.0005776007296223646,
      "loss": 2.87,
      "step": 28520
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7855013608932495,
      "learning_rate": 0.0005775991786653127,
      "loss": 3.1115,
      "step": 28521
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4550622701644897,
      "learning_rate": 0.0005775976276566499,
      "loss": 3.3122,
      "step": 28522
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.3914880752563477,
      "learning_rate": 0.0005775960765963764,
      "loss": 3.0859,
      "step": 28523
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.9017364978790283,
      "learning_rate": 0.0005775945254844926,
      "loss": 3.0205,
      "step": 28524
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2351641654968262,
      "learning_rate": 0.0005775929743209987,
      "loss": 2.992,
      "step": 28525
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5565484762191772,
      "learning_rate": 0.000577591423105895,
      "loss": 3.0955,
      "step": 28526
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.658353567123413,
      "learning_rate": 0.0005775898718391819,
      "loss": 3.1007,
      "step": 28527
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5482983589172363,
      "learning_rate": 0.0005775883205208595,
      "loss": 3.1063,
      "step": 28528
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8726754188537598,
      "learning_rate": 0.0005775867691509282,
      "loss": 3.1375,
      "step": 28529
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5932825803756714,
      "learning_rate": 0.0005775852177293884,
      "loss": 3.1505,
      "step": 28530
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.892579436302185,
      "learning_rate": 0.0005775836662562402,
      "loss": 3.3716,
      "step": 28531
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.322420597076416,
      "learning_rate": 0.000577582114731484,
      "loss": 3.244,
      "step": 28532
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4490007162094116,
      "learning_rate": 0.0005775805631551199,
      "loss": 3.4091,
      "step": 28533
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.332348108291626,
      "learning_rate": 0.0005775790115271484,
      "loss": 3.0679,
      "step": 28534
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6149334907531738,
      "learning_rate": 0.0005775774598475698,
      "loss": 2.7563,
      "step": 28535
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.873815894126892,
      "learning_rate": 0.0005775759081163842,
      "loss": 3.1547,
      "step": 28536
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3455991744995117,
      "learning_rate": 0.000577574356333592,
      "loss": 3.1504,
      "step": 28537
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.132279396057129,
      "learning_rate": 0.0005775728044991937,
      "loss": 2.9012,
      "step": 28538
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7070950269699097,
      "learning_rate": 0.0005775712526131891,
      "loss": 3.0418,
      "step": 28539
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2237451076507568,
      "learning_rate": 0.000577569700675579,
      "loss": 3.1883,
      "step": 28540
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5559626817703247,
      "learning_rate": 0.0005775681486863633,
      "loss": 3.4329,
      "step": 28541
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5535945892333984,
      "learning_rate": 0.0005775665966455425,
      "loss": 3.2562,
      "step": 28542
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6458752155303955,
      "learning_rate": 0.0005775650445531168,
      "loss": 2.9931,
      "step": 28543
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.2513267993927,
      "learning_rate": 0.0005775634924090865,
      "loss": 3.2306,
      "step": 28544
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4676296710968018,
      "learning_rate": 0.0005775619402134519,
      "loss": 3.0684,
      "step": 28545
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.741896390914917,
      "learning_rate": 0.0005775603879662135,
      "loss": 3.1643,
      "step": 28546
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5822234153747559,
      "learning_rate": 0.0005775588356673711,
      "loss": 3.2128,
      "step": 28547
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.961440086364746,
      "learning_rate": 0.0005775572833169254,
      "loss": 2.9088,
      "step": 28548
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9267548322677612,
      "learning_rate": 0.0005775557309148766,
      "loss": 3.2314,
      "step": 28549
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4232313632965088,
      "learning_rate": 0.0005775541784612248,
      "loss": 3.1276,
      "step": 28550
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3984304666519165,
      "learning_rate": 0.0005775526259559706,
      "loss": 3.131,
      "step": 28551
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7506009340286255,
      "learning_rate": 0.0005775510733991141,
      "loss": 2.8513,
      "step": 28552
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4357300996780396,
      "learning_rate": 0.0005775495207906556,
      "loss": 2.9508,
      "step": 28553
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3644694089889526,
      "learning_rate": 0.0005775479681305953,
      "loss": 2.8937,
      "step": 28554
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.2493622303009033,
      "learning_rate": 0.0005775464154189337,
      "loss": 2.9259,
      "step": 28555
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.087090015411377,
      "learning_rate": 0.000577544862655671,
      "loss": 3.1842,
      "step": 28556
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4699416160583496,
      "learning_rate": 0.0005775433098408074,
      "loss": 3.0072,
      "step": 28557
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7655993700027466,
      "learning_rate": 0.0005775417569743433,
      "loss": 3.2669,
      "step": 28558
    },
    {
      "epoch": 0.37,
      "grad_norm": 4.020618438720703,
      "learning_rate": 0.000577540204056279,
      "loss": 2.9891,
      "step": 28559
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4813957214355469,
      "learning_rate": 0.0005775386510866146,
      "loss": 3.3172,
      "step": 28560
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.352992057800293,
      "learning_rate": 0.0005775370980653507,
      "loss": 2.887,
      "step": 28561
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0133957862854004,
      "learning_rate": 0.0005775355449924873,
      "loss": 2.8268,
      "step": 28562
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9879826307296753,
      "learning_rate": 0.0005775339918680249,
      "loss": 3.286,
      "step": 28563
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6460824012756348,
      "learning_rate": 0.0005775324386919636,
      "loss": 3.0641,
      "step": 28564
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6768094301223755,
      "learning_rate": 0.0005775308854643038,
      "loss": 3.0477,
      "step": 28565
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1820755004882812,
      "learning_rate": 0.0005775293321850457,
      "loss": 3.0779,
      "step": 28566
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4626195430755615,
      "learning_rate": 0.0005775277788541898,
      "loss": 3.0644,
      "step": 28567
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.583206057548523,
      "learning_rate": 0.0005775262254717362,
      "loss": 3.2319,
      "step": 28568
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.408688545227051,
      "learning_rate": 0.0005775246720376852,
      "loss": 2.8271,
      "step": 28569
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.1885063648223877,
      "learning_rate": 0.0005775231185520371,
      "loss": 3.2152,
      "step": 28570
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9647670984268188,
      "learning_rate": 0.0005775215650147922,
      "loss": 2.9262,
      "step": 28571
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3160653114318848,
      "learning_rate": 0.0005775200114259509,
      "loss": 3.0812,
      "step": 28572
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.175253391265869,
      "learning_rate": 0.0005775184577855133,
      "loss": 3.247,
      "step": 28573
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.8857884407043457,
      "learning_rate": 0.0005775169040934798,
      "loss": 3.0012,
      "step": 28574
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8266403675079346,
      "learning_rate": 0.0005775153503498507,
      "loss": 2.9032,
      "step": 28575
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5870152711868286,
      "learning_rate": 0.0005775137965546263,
      "loss": 3.0826,
      "step": 28576
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.366952419281006,
      "learning_rate": 0.0005775122427078067,
      "loss": 3.2167,
      "step": 28577
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.2164859771728516,
      "learning_rate": 0.0005775106888093926,
      "loss": 3.0234,
      "step": 28578
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.431186556816101,
      "learning_rate": 0.0005775091348593838,
      "loss": 2.9334,
      "step": 28579
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6694869995117188,
      "learning_rate": 0.0005775075808577809,
      "loss": 3.1329,
      "step": 28580
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.704145908355713,
      "learning_rate": 0.000577506026804584,
      "loss": 3.2133,
      "step": 28581
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1603901386260986,
      "learning_rate": 0.0005775044726997936,
      "loss": 3.1618,
      "step": 28582
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5360345840454102,
      "learning_rate": 0.0005775029185434099,
      "loss": 2.8322,
      "step": 28583
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.771022081375122,
      "learning_rate": 0.000577501364335433,
      "loss": 3.1007,
      "step": 28584
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.8433923721313477,
      "learning_rate": 0.0005774998100758636,
      "loss": 3.1698,
      "step": 28585
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.600138783454895,
      "learning_rate": 0.0005774982557647016,
      "loss": 3.0906,
      "step": 28586
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3668568134307861,
      "learning_rate": 0.0005774967014019474,
      "loss": 3.0386,
      "step": 28587
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.0734384059906006,
      "learning_rate": 0.0005774951469876014,
      "loss": 2.9054,
      "step": 28588
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.9010062217712402,
      "learning_rate": 0.0005774935925216638,
      "loss": 3.1879,
      "step": 28589
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0601730346679688,
      "learning_rate": 0.0005774920380041349,
      "loss": 2.8578,
      "step": 28590
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6534478664398193,
      "learning_rate": 0.000577490483435015,
      "loss": 3.2088,
      "step": 28591
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8803176879882812,
      "learning_rate": 0.0005774889288143044,
      "loss": 3.074,
      "step": 28592
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8689600229263306,
      "learning_rate": 0.0005774873741420033,
      "loss": 3.0928,
      "step": 28593
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.904618501663208,
      "learning_rate": 0.0005774858194181121,
      "loss": 3.151,
      "step": 28594
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4319981336593628,
      "learning_rate": 0.000577484264642631,
      "loss": 3.0367,
      "step": 28595
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.856079339981079,
      "learning_rate": 0.0005774827098155604,
      "loss": 2.7877,
      "step": 28596
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.5429232120513916,
      "learning_rate": 0.0005774811549369006,
      "loss": 3.3098,
      "step": 28597
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.7157609462738037,
      "learning_rate": 0.0005774796000066517,
      "loss": 3.2971,
      "step": 28598
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.579089641571045,
      "learning_rate": 0.0005774780450248141,
      "loss": 2.7973,
      "step": 28599
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1598010063171387,
      "learning_rate": 0.0005774764899913881,
      "loss": 3.26,
      "step": 28600
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7362399101257324,
      "learning_rate": 0.000577474934906374,
      "loss": 3.3005,
      "step": 28601
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3181527853012085,
      "learning_rate": 0.0005774733797697722,
      "loss": 3.0696,
      "step": 28602
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8023128509521484,
      "learning_rate": 0.0005774718245815827,
      "loss": 2.9727,
      "step": 28603
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.208293914794922,
      "learning_rate": 0.0005774702693418059,
      "loss": 3.1197,
      "step": 28604
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.505574345588684,
      "learning_rate": 0.0005774687140504423,
      "loss": 3.295,
      "step": 28605
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8973698616027832,
      "learning_rate": 0.0005774671587074919,
      "loss": 3.1643,
      "step": 28606
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.765575408935547,
      "learning_rate": 0.0005774656033129552,
      "loss": 2.982,
      "step": 28607
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7121726274490356,
      "learning_rate": 0.0005774640478668324,
      "loss": 2.7517,
      "step": 28608
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.480702519416809,
      "learning_rate": 0.0005774624923691238,
      "loss": 3.1809,
      "step": 28609
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.8278143405914307,
      "learning_rate": 0.0005774609368198296,
      "loss": 3.1133,
      "step": 28610
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9709899425506592,
      "learning_rate": 0.0005774593812189503,
      "loss": 3.2262,
      "step": 28611
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.2714731693267822,
      "learning_rate": 0.000577457825566486,
      "loss": 3.0591,
      "step": 28612
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.510680913925171,
      "learning_rate": 0.000577456269862437,
      "loss": 2.893,
      "step": 28613
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.437521457672119,
      "learning_rate": 0.0005774547141068037,
      "loss": 3.1127,
      "step": 28614
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6596384048461914,
      "learning_rate": 0.0005774531582995863,
      "loss": 2.9587,
      "step": 28615
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4417308568954468,
      "learning_rate": 0.0005774516024407853,
      "loss": 3.0834,
      "step": 28616
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5359092950820923,
      "learning_rate": 0.0005774500465304006,
      "loss": 3.1152,
      "step": 28617
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.188713550567627,
      "learning_rate": 0.0005774484905684328,
      "loss": 3.0948,
      "step": 28618
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.2535712718963623,
      "learning_rate": 0.000577446934554882,
      "loss": 2.9305,
      "step": 28619
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9409735202789307,
      "learning_rate": 0.0005774453784897485,
      "loss": 3.1161,
      "step": 28620
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5085158348083496,
      "learning_rate": 0.0005774438223730329,
      "loss": 3.0335,
      "step": 28621
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.4463393688201904,
      "learning_rate": 0.0005774422662047351,
      "loss": 3.4054,
      "step": 28622
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.3820912837982178,
      "learning_rate": 0.0005774407099848557,
      "loss": 3.085,
      "step": 28623
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6074212789535522,
      "learning_rate": 0.0005774391537133947,
      "loss": 3.1144,
      "step": 28624
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5666041374206543,
      "learning_rate": 0.0005774375973903526,
      "loss": 3.1115,
      "step": 28625
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.3688042163848877,
      "learning_rate": 0.0005774360410157295,
      "loss": 2.9859,
      "step": 28626
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8599821329116821,
      "learning_rate": 0.0005774344845895259,
      "loss": 3.1142,
      "step": 28627
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5315829515457153,
      "learning_rate": 0.0005774329281117419,
      "loss": 2.9213,
      "step": 28628
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.385500192642212,
      "learning_rate": 0.0005774313715823779,
      "loss": 3.1522,
      "step": 28629
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9901922941207886,
      "learning_rate": 0.0005774298150014343,
      "loss": 3.0857,
      "step": 28630
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8279693126678467,
      "learning_rate": 0.0005774282583689112,
      "loss": 3.2064,
      "step": 28631
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4388740062713623,
      "learning_rate": 0.0005774267016848089,
      "loss": 3.1804,
      "step": 28632
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.2366976737976074,
      "learning_rate": 0.0005774251449491277,
      "loss": 2.9499,
      "step": 28633
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.115406036376953,
      "learning_rate": 0.000577423588161868,
      "loss": 3.1487,
      "step": 28634
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5834012031555176,
      "learning_rate": 0.0005774220313230301,
      "loss": 3.0231,
      "step": 28635
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7503085136413574,
      "learning_rate": 0.000577420474432614,
      "loss": 3.0793,
      "step": 28636
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.23184871673584,
      "learning_rate": 0.0005774189174906203,
      "loss": 3.1071,
      "step": 28637
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8047959804534912,
      "learning_rate": 0.0005774173604970493,
      "loss": 3.0255,
      "step": 28638
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6972482204437256,
      "learning_rate": 0.000577415803451901,
      "loss": 3.3222,
      "step": 28639
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5290842056274414,
      "learning_rate": 0.000577414246355176,
      "loss": 3.1681,
      "step": 28640
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.57963228225708,
      "learning_rate": 0.0005774126892068743,
      "loss": 2.8264,
      "step": 28641
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3490568399429321,
      "learning_rate": 0.0005774111320069965,
      "loss": 3.033,
      "step": 28642
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.588965654373169,
      "learning_rate": 0.0005774095747555427,
      "loss": 3.0354,
      "step": 28643
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3745098114013672,
      "learning_rate": 0.0005774080174525132,
      "loss": 3.1376,
      "step": 28644
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9516911506652832,
      "learning_rate": 0.0005774064600979083,
      "loss": 3.0695,
      "step": 28645
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5826513767242432,
      "learning_rate": 0.0005774049026917283,
      "loss": 3.1555,
      "step": 28646
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6646647453308105,
      "learning_rate": 0.0005774033452339736,
      "loss": 2.9015,
      "step": 28647
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7115659713745117,
      "learning_rate": 0.0005774017877246443,
      "loss": 3.0433,
      "step": 28648
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.2525041103363037,
      "learning_rate": 0.0005774002301637407,
      "loss": 3.0303,
      "step": 28649
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2137356996536255,
      "learning_rate": 0.0005773986725512633,
      "loss": 3.1523,
      "step": 28650
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.7215967178344727,
      "learning_rate": 0.000577397114887212,
      "loss": 2.9514,
      "step": 28651
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7645915746688843,
      "learning_rate": 0.0005773955571715876,
      "loss": 2.7693,
      "step": 28652
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3403995037078857,
      "learning_rate": 0.00057739399940439,
      "loss": 2.9299,
      "step": 28653
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.340308427810669,
      "learning_rate": 0.0005773924415856197,
      "loss": 2.9728,
      "step": 28654
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6023788452148438,
      "learning_rate": 0.0005773908837152768,
      "loss": 2.9662,
      "step": 28655
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6546602249145508,
      "learning_rate": 0.0005773893257933619,
      "loss": 3.0268,
      "step": 28656
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.6354477405548096,
      "learning_rate": 0.0005773877678198749,
      "loss": 3.3258,
      "step": 28657
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.134861946105957,
      "learning_rate": 0.0005773862097948163,
      "loss": 2.8437,
      "step": 28658
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7012782096862793,
      "learning_rate": 0.0005773846517181864,
      "loss": 3.2758,
      "step": 28659
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.597583293914795,
      "learning_rate": 0.0005773830935899855,
      "loss": 3.0536,
      "step": 28660
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.229139804840088,
      "learning_rate": 0.0005773815354102139,
      "loss": 2.9183,
      "step": 28661
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3762054443359375,
      "learning_rate": 0.0005773799771788716,
      "loss": 2.9154,
      "step": 28662
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7072135210037231,
      "learning_rate": 0.0005773784188959593,
      "loss": 2.9694,
      "step": 28663
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4644055366516113,
      "learning_rate": 0.0005773768605614771,
      "loss": 2.9906,
      "step": 28664
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5169823169708252,
      "learning_rate": 0.0005773753021754253,
      "loss": 3.2472,
      "step": 28665
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4948253631591797,
      "learning_rate": 0.0005773737437378041,
      "loss": 3.2534,
      "step": 28666
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5133216381072998,
      "learning_rate": 0.000577372185248614,
      "loss": 2.9693,
      "step": 28667
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.419281244277954,
      "learning_rate": 0.0005773706267078552,
      "loss": 2.9298,
      "step": 28668
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8374629020690918,
      "learning_rate": 0.0005773690681155279,
      "loss": 3.0493,
      "step": 28669
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.378894329071045,
      "learning_rate": 0.0005773675094716325,
      "loss": 3.1908,
      "step": 28670
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5427579879760742,
      "learning_rate": 0.0005773659507761692,
      "loss": 2.9836,
      "step": 28671
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5454084873199463,
      "learning_rate": 0.0005773643920291383,
      "loss": 2.8812,
      "step": 28672
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5699862241744995,
      "learning_rate": 0.0005773628332305402,
      "loss": 3.026,
      "step": 28673
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9108169078826904,
      "learning_rate": 0.0005773612743803751,
      "loss": 3.0354,
      "step": 28674
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8097481727600098,
      "learning_rate": 0.0005773597154786433,
      "loss": 3.1643,
      "step": 28675
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4055860042572021,
      "learning_rate": 0.000577358156525345,
      "loss": 2.7137,
      "step": 28676
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7038217782974243,
      "learning_rate": 0.0005773565975204807,
      "loss": 3.1055,
      "step": 28677
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7061396837234497,
      "learning_rate": 0.0005773550384640505,
      "loss": 3.0008,
      "step": 28678
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8429142236709595,
      "learning_rate": 0.0005773534793560548,
      "loss": 2.9606,
      "step": 28679
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3184112310409546,
      "learning_rate": 0.0005773519201964939,
      "loss": 2.8482,
      "step": 28680
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4509005546569824,
      "learning_rate": 0.000577350360985368,
      "loss": 3.1608,
      "step": 28681
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5325533151626587,
      "learning_rate": 0.0005773488017226774,
      "loss": 2.9529,
      "step": 28682
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.690800666809082,
      "learning_rate": 0.0005773472424084225,
      "loss": 3.22,
      "step": 28683
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.552950143814087,
      "learning_rate": 0.0005773456830426033,
      "loss": 3.1316,
      "step": 28684
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.943110466003418,
      "learning_rate": 0.0005773441236252206,
      "loss": 3.0009,
      "step": 28685
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4405372142791748,
      "learning_rate": 0.0005773425641562742,
      "loss": 3.1848,
      "step": 28686
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6050959825515747,
      "learning_rate": 0.0005773410046357646,
      "loss": 2.9373,
      "step": 28687
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9055463075637817,
      "learning_rate": 0.0005773394450636922,
      "loss": 3.3923,
      "step": 28688
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8054429292678833,
      "learning_rate": 0.000577337885440057,
      "loss": 3.2927,
      "step": 28689
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4978581666946411,
      "learning_rate": 0.0005773363257648595,
      "loss": 2.8811,
      "step": 28690
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7107527256011963,
      "learning_rate": 0.0005773347660381,
      "loss": 2.8523,
      "step": 28691
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4687050580978394,
      "learning_rate": 0.0005773332062597788,
      "loss": 3.0246,
      "step": 28692
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5590513944625854,
      "learning_rate": 0.000577331646429896,
      "loss": 3.1814,
      "step": 28693
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5390998125076294,
      "learning_rate": 0.000577330086548452,
      "loss": 2.9937,
      "step": 28694
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6158850193023682,
      "learning_rate": 0.0005773285266154471,
      "loss": 3.056,
      "step": 28695
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4219599962234497,
      "learning_rate": 0.0005773269666308816,
      "loss": 3.0114,
      "step": 28696
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.984786033630371,
      "learning_rate": 0.0005773254065947559,
      "loss": 2.9303,
      "step": 28697
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.401202917098999,
      "learning_rate": 0.00057732384650707,
      "loss": 3.0173,
      "step": 28698
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.624111294746399,
      "learning_rate": 0.0005773222863678244,
      "loss": 2.989,
      "step": 28699
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9968432188034058,
      "learning_rate": 0.0005773207261770194,
      "loss": 2.8638,
      "step": 28700
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.927513837814331,
      "learning_rate": 0.0005773191659346553,
      "loss": 3.0956,
      "step": 28701
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.138911247253418,
      "learning_rate": 0.0005773176056407323,
      "loss": 3.0128,
      "step": 28702
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.058426856994629,
      "learning_rate": 0.0005773160452952507,
      "loss": 3.1005,
      "step": 28703
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4564180374145508,
      "learning_rate": 0.0005773144848982108,
      "loss": 3.0764,
      "step": 28704
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9929957389831543,
      "learning_rate": 0.0005773129244496129,
      "loss": 3.0766,
      "step": 28705
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.898967981338501,
      "learning_rate": 0.0005773113639494572,
      "loss": 2.9593,
      "step": 28706
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5739277601242065,
      "learning_rate": 0.0005773098033977442,
      "loss": 3.0322,
      "step": 28707
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.290863275527954,
      "learning_rate": 0.0005773082427944741,
      "loss": 3.0685,
      "step": 28708
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.7265241146087646,
      "learning_rate": 0.0005773066821396472,
      "loss": 2.824,
      "step": 28709
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6062432527542114,
      "learning_rate": 0.0005773051214332636,
      "loss": 2.8231,
      "step": 28710
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5184847116470337,
      "learning_rate": 0.0005773035606753238,
      "loss": 2.9771,
      "step": 28711
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8303223848342896,
      "learning_rate": 0.000577301999865828,
      "loss": 3.2655,
      "step": 28712
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.731010675430298,
      "learning_rate": 0.0005773004390047765,
      "loss": 3.2603,
      "step": 28713
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9315050840377808,
      "learning_rate": 0.0005772988780921698,
      "loss": 3.2252,
      "step": 28714
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1596755981445312,
      "learning_rate": 0.0005772973171280079,
      "loss": 3.0518,
      "step": 28715
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.6277987957000732,
      "learning_rate": 0.0005772957561122912,
      "loss": 2.791,
      "step": 28716
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.904382348060608,
      "learning_rate": 0.0005772941950450199,
      "loss": 3.3069,
      "step": 28717
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.492477536201477,
      "learning_rate": 0.0005772926339261945,
      "loss": 2.8624,
      "step": 28718
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.008122444152832,
      "learning_rate": 0.000577291072755815,
      "loss": 3.0531,
      "step": 28719
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.05014705657959,
      "learning_rate": 0.000577289511533882,
      "loss": 3.1182,
      "step": 28720
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4166553020477295,
      "learning_rate": 0.0005772879502603957,
      "loss": 3.2247,
      "step": 28721
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.621950387954712,
      "learning_rate": 0.0005772863889353561,
      "loss": 3.0589,
      "step": 28722
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.524430274963379,
      "learning_rate": 0.0005772848275587639,
      "loss": 2.9056,
      "step": 28723
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6435517072677612,
      "learning_rate": 0.0005772832661306192,
      "loss": 2.9615,
      "step": 28724
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4281809329986572,
      "learning_rate": 0.0005772817046509224,
      "loss": 2.9965,
      "step": 28725
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7670838832855225,
      "learning_rate": 0.0005772801431196734,
      "loss": 3.1693,
      "step": 28726
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.0306642055511475,
      "learning_rate": 0.0005772785815368731,
      "loss": 3.0255,
      "step": 28727
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7252768278121948,
      "learning_rate": 0.0005772770199025213,
      "loss": 3.1525,
      "step": 28728
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9554588794708252,
      "learning_rate": 0.0005772754582166185,
      "loss": 3.2737,
      "step": 28729
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3849425315856934,
      "learning_rate": 0.000577273896479165,
      "loss": 3.0763,
      "step": 28730
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5693305730819702,
      "learning_rate": 0.0005772723346901611,
      "loss": 3.0472,
      "step": 28731
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4716262817382812,
      "learning_rate": 0.000577270772849607,
      "loss": 3.1535,
      "step": 28732
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.496410369873047,
      "learning_rate": 0.0005772692109575029,
      "loss": 3.242,
      "step": 28733
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9608056545257568,
      "learning_rate": 0.0005772676490138493,
      "loss": 3.2005,
      "step": 28734
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6542106866836548,
      "learning_rate": 0.0005772660870186465,
      "loss": 3.2133,
      "step": 28735
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9554951190948486,
      "learning_rate": 0.0005772645249718946,
      "loss": 2.9864,
      "step": 28736
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5217310190200806,
      "learning_rate": 0.0005772629628735939,
      "loss": 3.272,
      "step": 28737
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5299979448318481,
      "learning_rate": 0.000577261400723745,
      "loss": 3.0662,
      "step": 28738
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.696231484413147,
      "learning_rate": 0.0005772598385223479,
      "loss": 3.126,
      "step": 28739
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.300176978111267,
      "learning_rate": 0.000577258276269403,
      "loss": 3.0988,
      "step": 28740
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.379080891609192,
      "learning_rate": 0.0005772567139649105,
      "loss": 3.214,
      "step": 28741
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5862860679626465,
      "learning_rate": 0.0005772551516088707,
      "loss": 3.0637,
      "step": 28742
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5089932680130005,
      "learning_rate": 0.0005772535892012839,
      "loss": 2.9371,
      "step": 28743
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4881091117858887,
      "learning_rate": 0.0005772520267421505,
      "loss": 2.9603,
      "step": 28744
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.68015193939209,
      "learning_rate": 0.0005772504642314707,
      "loss": 3.1366,
      "step": 28745
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7129549980163574,
      "learning_rate": 0.0005772489016692448,
      "loss": 3.0465,
      "step": 28746
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.026806116104126,
      "learning_rate": 0.0005772473390554732,
      "loss": 2.9599,
      "step": 28747
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7030096054077148,
      "learning_rate": 0.0005772457763901559,
      "loss": 3.0396,
      "step": 28748
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6095086336135864,
      "learning_rate": 0.0005772442136732934,
      "loss": 3.1618,
      "step": 28749
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4197715520858765,
      "learning_rate": 0.0005772426509048861,
      "loss": 3.1913,
      "step": 28750
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6175767183303833,
      "learning_rate": 0.000577241088084934,
      "loss": 3.088,
      "step": 28751
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6489198207855225,
      "learning_rate": 0.0005772395252134377,
      "loss": 2.8976,
      "step": 28752
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4603877067565918,
      "learning_rate": 0.0005772379622903973,
      "loss": 3.0306,
      "step": 28753
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9646962881088257,
      "learning_rate": 0.0005772363993158131,
      "loss": 3.0184,
      "step": 28754
    },
    {
      "epoch": 0.37,
      "grad_norm": 3.089730978012085,
      "learning_rate": 0.0005772348362896854,
      "loss": 3.1469,
      "step": 28755
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.821655511856079,
      "learning_rate": 0.0005772332732120147,
      "loss": 3.045,
      "step": 28756
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4636069536209106,
      "learning_rate": 0.0005772317100828008,
      "loss": 3.2522,
      "step": 28757
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5227621793746948,
      "learning_rate": 0.0005772301469020445,
      "loss": 3.1644,
      "step": 28758
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.1960394382476807,
      "learning_rate": 0.0005772285836697458,
      "loss": 3.1774,
      "step": 28759
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6050822734832764,
      "learning_rate": 0.000577227020385905,
      "loss": 3.2968,
      "step": 28760
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.2455774545669556,
      "learning_rate": 0.0005772254570505225,
      "loss": 3.0334,
      "step": 28761
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6017946004867554,
      "learning_rate": 0.0005772238936635987,
      "loss": 3.3284,
      "step": 28762
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.5674428939819336,
      "learning_rate": 0.0005772223302251338,
      "loss": 2.9784,
      "step": 28763
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4843164682388306,
      "learning_rate": 0.0005772207667351278,
      "loss": 2.9695,
      "step": 28764
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.940114140510559,
      "learning_rate": 0.0005772192031935813,
      "loss": 2.9765,
      "step": 28765
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4975011348724365,
      "learning_rate": 0.0005772176396004945,
      "loss": 3.0864,
      "step": 28766
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.77943754196167,
      "learning_rate": 0.0005772160759558678,
      "loss": 2.9249,
      "step": 28767
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7203689813613892,
      "learning_rate": 0.0005772145122597014,
      "loss": 2.896,
      "step": 28768
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.540712594985962,
      "learning_rate": 0.0005772129485119955,
      "loss": 3.0594,
      "step": 28769
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.036278247833252,
      "learning_rate": 0.0005772113847127505,
      "loss": 2.9375,
      "step": 28770
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.556645393371582,
      "learning_rate": 0.0005772098208619667,
      "loss": 2.7924,
      "step": 28771
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.545250415802002,
      "learning_rate": 0.0005772082569596443,
      "loss": 2.9426,
      "step": 28772
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4539614915847778,
      "learning_rate": 0.0005772066930057836,
      "loss": 3.0863,
      "step": 28773
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4491151571273804,
      "learning_rate": 0.0005772051290003851,
      "loss": 3.1035,
      "step": 28774
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.44058358669281,
      "learning_rate": 0.0005772035649434489,
      "loss": 2.9917,
      "step": 28775
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8667885065078735,
      "learning_rate": 0.0005772020008349753,
      "loss": 3.1658,
      "step": 28776
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9990208148956299,
      "learning_rate": 0.0005772004366749646,
      "loss": 3.2185,
      "step": 28777
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6412149667739868,
      "learning_rate": 0.0005771988724634171,
      "loss": 3.137,
      "step": 28778
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8198412656784058,
      "learning_rate": 0.0005771973082003331,
      "loss": 3.1781,
      "step": 28779
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.315018653869629,
      "learning_rate": 0.000577195743885713,
      "loss": 3.509,
      "step": 28780
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3649648427963257,
      "learning_rate": 0.0005771941795195568,
      "loss": 3.3075,
      "step": 28781
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8213077783584595,
      "learning_rate": 0.0005771926151018651,
      "loss": 3.297,
      "step": 28782
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4775370359420776,
      "learning_rate": 0.000577191050632638,
      "loss": 3.1309,
      "step": 28783
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.456702470779419,
      "learning_rate": 0.0005771894861118758,
      "loss": 3.0423,
      "step": 28784
    },
    {
      "epoch": 0.37,
      "grad_norm": 2.8915915489196777,
      "learning_rate": 0.0005771879215395789,
      "loss": 2.9111,
      "step": 28785
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6953424215316772,
      "learning_rate": 0.0005771863569157476,
      "loss": 3.1362,
      "step": 28786
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4629708528518677,
      "learning_rate": 0.000577184792240382,
      "loss": 3.1939,
      "step": 28787
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.828240156173706,
      "learning_rate": 0.0005771832275134827,
      "loss": 3.0052,
      "step": 28788
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.951239824295044,
      "learning_rate": 0.0005771816627350495,
      "loss": 2.9465,
      "step": 28789
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.484626293182373,
      "learning_rate": 0.0005771800979050833,
      "loss": 3.1241,
      "step": 28790
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.3750958442687988,
      "learning_rate": 0.000577178533023584,
      "loss": 2.9987,
      "step": 28791
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9636622667312622,
      "learning_rate": 0.0005771769680905519,
      "loss": 2.9914,
      "step": 28792
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.8968693017959595,
      "learning_rate": 0.0005771754031059874,
      "loss": 2.9763,
      "step": 28793
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9642254114151,
      "learning_rate": 0.0005771738380698907,
      "loss": 3.0518,
      "step": 28794
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.7403643131256104,
      "learning_rate": 0.0005771722729822623,
      "loss": 3.0236,
      "step": 28795
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4677635431289673,
      "learning_rate": 0.0005771707078431023,
      "loss": 3.1535,
      "step": 28796
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.9955191612243652,
      "learning_rate": 0.0005771691426524109,
      "loss": 3.3328,
      "step": 28797
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.319765329360962,
      "learning_rate": 0.0005771675774101887,
      "loss": 3.2575,
      "step": 28798
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.6466615200042725,
      "learning_rate": 0.0005771660121164357,
      "loss": 3.4044,
      "step": 28799
    },
    {
      "epoch": 0.37,
      "grad_norm": 1.4751449823379517,
      "learning_rate": 0.0005771644467711523,
      "loss": 3.0935,
      "step": 28800
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7609254121780396,
      "learning_rate": 0.000577162881374339,
      "loss": 3.1082,
      "step": 28801
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5185835361480713,
      "learning_rate": 0.0005771613159259956,
      "loss": 2.9504,
      "step": 28802
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3644475936889648,
      "learning_rate": 0.0005771597504261229,
      "loss": 3.1268,
      "step": 28803
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5544657707214355,
      "learning_rate": 0.0005771581848747208,
      "loss": 3.0197,
      "step": 28804
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4821465015411377,
      "learning_rate": 0.0005771566192717899,
      "loss": 3.1133,
      "step": 28805
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4241104125976562,
      "learning_rate": 0.0005771550536173303,
      "loss": 3.1099,
      "step": 28806
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1928439140319824,
      "learning_rate": 0.0005771534879113423,
      "loss": 3.0285,
      "step": 28807
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0546693801879883,
      "learning_rate": 0.0005771519221538263,
      "loss": 3.0732,
      "step": 28808
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7102923393249512,
      "learning_rate": 0.0005771503563447825,
      "loss": 2.8772,
      "step": 28809
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.537369728088379,
      "learning_rate": 0.0005771487904842112,
      "loss": 3.2338,
      "step": 28810
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7026360034942627,
      "learning_rate": 0.0005771472245721127,
      "loss": 2.9442,
      "step": 28811
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0280601978302,
      "learning_rate": 0.0005771456586084873,
      "loss": 2.9562,
      "step": 28812
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.59903085231781,
      "learning_rate": 0.0005771440925933354,
      "loss": 3.2094,
      "step": 28813
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4773534536361694,
      "learning_rate": 0.0005771425265266571,
      "loss": 3.0257,
      "step": 28814
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0608747005462646,
      "learning_rate": 0.0005771409604084528,
      "loss": 2.9809,
      "step": 28815
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7817294597625732,
      "learning_rate": 0.0005771393942387227,
      "loss": 3.1769,
      "step": 28816
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4931211471557617,
      "learning_rate": 0.0005771378280174672,
      "loss": 3.1751,
      "step": 28817
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5751854181289673,
      "learning_rate": 0.0005771362617446865,
      "loss": 3.1997,
      "step": 28818
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.28536856174469,
      "learning_rate": 0.0005771346954203811,
      "loss": 3.1228,
      "step": 28819
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.18679940700531,
      "learning_rate": 0.0005771331290445509,
      "loss": 3.0551,
      "step": 28820
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.209059476852417,
      "learning_rate": 0.0005771315626171966,
      "loss": 3.1659,
      "step": 28821
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5692120790481567,
      "learning_rate": 0.0005771299961383181,
      "loss": 3.3902,
      "step": 28822
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.677147626876831,
      "learning_rate": 0.0005771284296079161,
      "loss": 3.0829,
      "step": 28823
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5893850326538086,
      "learning_rate": 0.0005771268630259907,
      "loss": 2.9649,
      "step": 28824
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7612239122390747,
      "learning_rate": 0.0005771252963925421,
      "loss": 3.0981,
      "step": 28825
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3708113431930542,
      "learning_rate": 0.0005771237297075707,
      "loss": 2.9865,
      "step": 28826
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.655948281288147,
      "learning_rate": 0.0005771221629710767,
      "loss": 3.1628,
      "step": 28827
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.816570520401001,
      "learning_rate": 0.0005771205961830606,
      "loss": 3.0194,
      "step": 28828
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0007271766662598,
      "learning_rate": 0.0005771190293435224,
      "loss": 3.1614,
      "step": 28829
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6234395503997803,
      "learning_rate": 0.0005771174624524626,
      "loss": 3.0857,
      "step": 28830
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8726567029953003,
      "learning_rate": 0.0005771158955098815,
      "loss": 3.2425,
      "step": 28831
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.7802629470825195,
      "learning_rate": 0.0005771143285157792,
      "loss": 3.2912,
      "step": 28832
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6463708877563477,
      "learning_rate": 0.0005771127614701562,
      "loss": 2.8798,
      "step": 28833
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9797847270965576,
      "learning_rate": 0.0005771111943730127,
      "loss": 3.0278,
      "step": 28834
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.23034405708313,
      "learning_rate": 0.000577109627224349,
      "loss": 3.2723,
      "step": 28835
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.959919810295105,
      "learning_rate": 0.0005771080600241653,
      "loss": 2.9239,
      "step": 28836
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1510215997695923,
      "learning_rate": 0.0005771064927724621,
      "loss": 3.0786,
      "step": 28837
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5314505100250244,
      "learning_rate": 0.0005771049254692396,
      "loss": 3.0298,
      "step": 28838
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.6351420879364014,
      "learning_rate": 0.0005771033581144979,
      "loss": 3.0503,
      "step": 28839
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4025388956069946,
      "learning_rate": 0.0005771017907082376,
      "loss": 3.0098,
      "step": 28840
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.375696897506714,
      "learning_rate": 0.0005771002232504588,
      "loss": 3.3531,
      "step": 28841
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.824950933456421,
      "learning_rate": 0.0005770986557411618,
      "loss": 3.2196,
      "step": 28842
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4376435279846191,
      "learning_rate": 0.000577097088180347,
      "loss": 3.0477,
      "step": 28843
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4873679876327515,
      "learning_rate": 0.0005770955205680146,
      "loss": 3.0537,
      "step": 28844
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2702525854110718,
      "learning_rate": 0.0005770939529041648,
      "loss": 3.0018,
      "step": 28845
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.89975905418396,
      "learning_rate": 0.0005770923851887983,
      "loss": 3.0949,
      "step": 28846
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5036379098892212,
      "learning_rate": 0.0005770908174219148,
      "loss": 3.0816,
      "step": 28847
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3452565670013428,
      "learning_rate": 0.000577089249603515,
      "loss": 3.0126,
      "step": 28848
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1538515090942383,
      "learning_rate": 0.000577087681733599,
      "loss": 2.9791,
      "step": 28849
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5589079856872559,
      "learning_rate": 0.0005770861138121672,
      "loss": 2.9943,
      "step": 28850
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4159948825836182,
      "learning_rate": 0.0005770845458392199,
      "loss": 3.0699,
      "step": 28851
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4778773784637451,
      "learning_rate": 0.0005770829778147574,
      "loss": 3.2167,
      "step": 28852
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9837889671325684,
      "learning_rate": 0.0005770814097387798,
      "loss": 3.0459,
      "step": 28853
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.281710147857666,
      "learning_rate": 0.0005770798416112877,
      "loss": 2.9377,
      "step": 28854
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.823997974395752,
      "learning_rate": 0.0005770782734322811,
      "loss": 2.7889,
      "step": 28855
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.192095994949341,
      "learning_rate": 0.0005770767052017605,
      "loss": 3.1981,
      "step": 28856
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4999207258224487,
      "learning_rate": 0.000577075136919726,
      "loss": 3.4402,
      "step": 28857
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.549972653388977,
      "learning_rate": 0.0005770735685861781,
      "loss": 3.0762,
      "step": 28858
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.007035255432129,
      "learning_rate": 0.000577072000201117,
      "loss": 2.8933,
      "step": 28859
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5166372060775757,
      "learning_rate": 0.000577070431764543,
      "loss": 3.2552,
      "step": 28860
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5287033319473267,
      "learning_rate": 0.0005770688632764563,
      "loss": 3.0531,
      "step": 28861
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.556357502937317,
      "learning_rate": 0.0005770672947368573,
      "loss": 3.3386,
      "step": 28862
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8296889066696167,
      "learning_rate": 0.0005770657261457463,
      "loss": 3.0834,
      "step": 28863
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8575055599212646,
      "learning_rate": 0.0005770641575031235,
      "loss": 2.972,
      "step": 28864
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.277752161026001,
      "learning_rate": 0.0005770625888089892,
      "loss": 3.123,
      "step": 28865
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6825265884399414,
      "learning_rate": 0.0005770610200633439,
      "loss": 3.1902,
      "step": 28866
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.582146167755127,
      "learning_rate": 0.0005770594512661876,
      "loss": 3.0544,
      "step": 28867
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3812763690948486,
      "learning_rate": 0.0005770578824175207,
      "loss": 3.2269,
      "step": 28868
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3906009197235107,
      "learning_rate": 0.0005770563135173437,
      "loss": 3.0951,
      "step": 28869
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.5756046772003174,
      "learning_rate": 0.0005770547445656563,
      "loss": 3.0087,
      "step": 28870
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.4007349014282227,
      "learning_rate": 0.0005770531755624595,
      "loss": 3.0416,
      "step": 28871
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7966415882110596,
      "learning_rate": 0.0005770516065077533,
      "loss": 3.0888,
      "step": 28872
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6000268459320068,
      "learning_rate": 0.0005770500374015379,
      "loss": 2.8548,
      "step": 28873
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3607885837554932,
      "learning_rate": 0.0005770484682438137,
      "loss": 3.0415,
      "step": 28874
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6156669855117798,
      "learning_rate": 0.0005770468990345809,
      "loss": 3.1519,
      "step": 28875
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6591931581497192,
      "learning_rate": 0.0005770453297738398,
      "loss": 3.1708,
      "step": 28876
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.4672536849975586,
      "learning_rate": 0.0005770437604615909,
      "loss": 3.0577,
      "step": 28877
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6938222646713257,
      "learning_rate": 0.0005770421910978342,
      "loss": 2.9009,
      "step": 28878
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.710814118385315,
      "learning_rate": 0.0005770406216825702,
      "loss": 2.8309,
      "step": 28879
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7360246181488037,
      "learning_rate": 0.0005770390522157992,
      "loss": 3.163,
      "step": 28880
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3835670948028564,
      "learning_rate": 0.0005770374826975212,
      "loss": 3.3447,
      "step": 28881
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7803934812545776,
      "learning_rate": 0.000577035913127737,
      "loss": 2.9865,
      "step": 28882
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.503244638442993,
      "learning_rate": 0.0005770343435064463,
      "loss": 3.0515,
      "step": 28883
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.115363121032715,
      "learning_rate": 0.0005770327738336498,
      "loss": 3.1299,
      "step": 28884
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5812910795211792,
      "learning_rate": 0.0005770312041093477,
      "loss": 3.1797,
      "step": 28885
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.323814630508423,
      "learning_rate": 0.0005770296343335402,
      "loss": 2.9158,
      "step": 28886
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7146730422973633,
      "learning_rate": 0.0005770280645062278,
      "loss": 2.9721,
      "step": 28887
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.546921730041504,
      "learning_rate": 0.0005770264946274104,
      "loss": 2.8569,
      "step": 28888
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.9490842819213867,
      "learning_rate": 0.0005770249246970888,
      "loss": 3.0322,
      "step": 28889
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5119240283966064,
      "learning_rate": 0.000577023354715263,
      "loss": 2.9904,
      "step": 28890
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7548425197601318,
      "learning_rate": 0.0005770217846819333,
      "loss": 3.1203,
      "step": 28891
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.731417179107666,
      "learning_rate": 0.0005770202145970999,
      "loss": 2.924,
      "step": 28892
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0104668140411377,
      "learning_rate": 0.0005770186444607634,
      "loss": 3.0523,
      "step": 28893
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5481489896774292,
      "learning_rate": 0.0005770170742729237,
      "loss": 3.1527,
      "step": 28894
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.583603858947754,
      "learning_rate": 0.0005770155040335814,
      "loss": 3.0445,
      "step": 28895
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.674357295036316,
      "learning_rate": 0.0005770139337427368,
      "loss": 3.0856,
      "step": 28896
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.005733013153076,
      "learning_rate": 0.00057701236340039,
      "loss": 3.3152,
      "step": 28897
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9820637702941895,
      "learning_rate": 0.0005770107930065413,
      "loss": 3.035,
      "step": 28898
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.042754888534546,
      "learning_rate": 0.0005770092225611911,
      "loss": 3.1982,
      "step": 28899
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.6441376209259033,
      "learning_rate": 0.0005770076520643397,
      "loss": 3.1833,
      "step": 28900
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7519099712371826,
      "learning_rate": 0.0005770060815159873,
      "loss": 3.1217,
      "step": 28901
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4460448026657104,
      "learning_rate": 0.0005770045109161343,
      "loss": 2.8572,
      "step": 28902
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8259526491165161,
      "learning_rate": 0.0005770029402647809,
      "loss": 2.8339,
      "step": 28903
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6075910329818726,
      "learning_rate": 0.0005770013695619274,
      "loss": 3.3671,
      "step": 28904
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5494087934494019,
      "learning_rate": 0.0005769997988075741,
      "loss": 3.0565,
      "step": 28905
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.291102409362793,
      "learning_rate": 0.0005769982280017215,
      "loss": 2.8217,
      "step": 28906
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.443856954574585,
      "learning_rate": 0.0005769966571443695,
      "loss": 2.9014,
      "step": 28907
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.76635479927063,
      "learning_rate": 0.0005769950862355186,
      "loss": 3.0264,
      "step": 28908
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3607982397079468,
      "learning_rate": 0.0005769935152751691,
      "loss": 2.8963,
      "step": 28909
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3578758239746094,
      "learning_rate": 0.0005769919442633213,
      "loss": 2.8947,
      "step": 28910
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.0557219982147217,
      "learning_rate": 0.0005769903731999755,
      "loss": 3.1338,
      "step": 28911
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5727912187576294,
      "learning_rate": 0.0005769888020851319,
      "loss": 3.1745,
      "step": 28912
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5572822093963623,
      "learning_rate": 0.0005769872309187909,
      "loss": 3.1418,
      "step": 28913
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4441949129104614,
      "learning_rate": 0.0005769856597009527,
      "loss": 3.074,
      "step": 28914
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9535242319107056,
      "learning_rate": 0.0005769840884316177,
      "loss": 2.9866,
      "step": 28915
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5436642169952393,
      "learning_rate": 0.0005769825171107861,
      "loss": 3.134,
      "step": 28916
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.8479344844818115,
      "learning_rate": 0.0005769809457384582,
      "loss": 3.2005,
      "step": 28917
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3219199180603027,
      "learning_rate": 0.0005769793743146343,
      "loss": 3.0223,
      "step": 28918
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.758781909942627,
      "learning_rate": 0.0005769778028393148,
      "loss": 3.0444,
      "step": 28919
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.542362928390503,
      "learning_rate": 0.0005769762313124998,
      "loss": 3.0686,
      "step": 28920
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.32948637008667,
      "learning_rate": 0.0005769746597341898,
      "loss": 3.3019,
      "step": 28921
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.779375433921814,
      "learning_rate": 0.000576973088104385,
      "loss": 3.1725,
      "step": 28922
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3291680812835693,
      "learning_rate": 0.0005769715164230854,
      "loss": 3.1231,
      "step": 28923
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5901960134506226,
      "learning_rate": 0.0005769699446902918,
      "loss": 3.0391,
      "step": 28924
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4688400030136108,
      "learning_rate": 0.0005769683729060042,
      "loss": 3.217,
      "step": 28925
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5232634544372559,
      "learning_rate": 0.0005769668010702229,
      "loss": 2.9418,
      "step": 28926
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5295988321304321,
      "learning_rate": 0.0005769652291829484,
      "loss": 3.0125,
      "step": 28927
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.5562891960144043,
      "learning_rate": 0.0005769636572441806,
      "loss": 2.993,
      "step": 28928
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7526084184646606,
      "learning_rate": 0.0005769620852539203,
      "loss": 2.9122,
      "step": 28929
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4593325853347778,
      "learning_rate": 0.0005769605132121674,
      "loss": 3.3565,
      "step": 28930
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.74201238155365,
      "learning_rate": 0.0005769589411189223,
      "loss": 3.0903,
      "step": 28931
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.213927984237671,
      "learning_rate": 0.0005769573689741853,
      "loss": 2.7318,
      "step": 28932
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.614530324935913,
      "learning_rate": 0.0005769557967779566,
      "loss": 3.0703,
      "step": 28933
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.670770525932312,
      "learning_rate": 0.0005769542245302367,
      "loss": 3.1777,
      "step": 28934
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.847333550453186,
      "learning_rate": 0.0005769526522310258,
      "loss": 2.8885,
      "step": 28935
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5916510820388794,
      "learning_rate": 0.0005769510798803242,
      "loss": 2.8435,
      "step": 28936
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7570254802703857,
      "learning_rate": 0.0005769495074781321,
      "loss": 2.9778,
      "step": 28937
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8936799764633179,
      "learning_rate": 0.0005769479350244499,
      "loss": 2.9907,
      "step": 28938
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.6169426441192627,
      "learning_rate": 0.0005769463625192778,
      "loss": 3.1428,
      "step": 28939
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2817471027374268,
      "learning_rate": 0.0005769447899626162,
      "loss": 2.922,
      "step": 28940
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8069372177124023,
      "learning_rate": 0.0005769432173544653,
      "loss": 3.1906,
      "step": 28941
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1899607181549072,
      "learning_rate": 0.0005769416446948254,
      "loss": 3.2633,
      "step": 28942
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.208728790283203,
      "learning_rate": 0.0005769400719836969,
      "loss": 3.2316,
      "step": 28943
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4138498306274414,
      "learning_rate": 0.0005769384992210799,
      "loss": 3.1296,
      "step": 28944
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6596636772155762,
      "learning_rate": 0.0005769369264069749,
      "loss": 2.9907,
      "step": 28945
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.406247138977051,
      "learning_rate": 0.0005769353535413821,
      "loss": 3.1139,
      "step": 28946
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.576719880104065,
      "learning_rate": 0.0005769337806243017,
      "loss": 3.124,
      "step": 28947
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4584351778030396,
      "learning_rate": 0.0005769322076557341,
      "loss": 2.9276,
      "step": 28948
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.59421706199646,
      "learning_rate": 0.0005769306346356796,
      "loss": 3.2071,
      "step": 28949
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7049120664596558,
      "learning_rate": 0.0005769290615641386,
      "loss": 3.3007,
      "step": 28950
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5066709518432617,
      "learning_rate": 0.0005769274884411111,
      "loss": 3.1026,
      "step": 28951
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2371699810028076,
      "learning_rate": 0.0005769259152665976,
      "loss": 3.0905,
      "step": 28952
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5024337768554688,
      "learning_rate": 0.0005769243420405984,
      "loss": 2.9135,
      "step": 28953
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.469064474105835,
      "learning_rate": 0.0005769227687631136,
      "loss": 3.048,
      "step": 28954
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.521010398864746,
      "learning_rate": 0.0005769211954341438,
      "loss": 3.1125,
      "step": 28955
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.891315221786499,
      "learning_rate": 0.0005769196220536889,
      "loss": 3.2157,
      "step": 28956
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5491647720336914,
      "learning_rate": 0.0005769180486217497,
      "loss": 2.9184,
      "step": 28957
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5992475748062134,
      "learning_rate": 0.0005769164751383259,
      "loss": 2.9098,
      "step": 28958
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2681573629379272,
      "learning_rate": 0.0005769149016034183,
      "loss": 2.8658,
      "step": 28959
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5840599536895752,
      "learning_rate": 0.0005769133280170269,
      "loss": 2.9806,
      "step": 28960
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.410617709159851,
      "learning_rate": 0.0005769117543791522,
      "loss": 2.9466,
      "step": 28961
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.6975584030151367,
      "learning_rate": 0.0005769101806897943,
      "loss": 3.1851,
      "step": 28962
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.4723877906799316,
      "learning_rate": 0.0005769086069489536,
      "loss": 3.1992,
      "step": 28963
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9050301313400269,
      "learning_rate": 0.0005769070331566303,
      "loss": 3.158,
      "step": 28964
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.6640467643737793,
      "learning_rate": 0.0005769054593128248,
      "loss": 3.2656,
      "step": 28965
    },
    {
      "epoch": 0.38,
      "grad_norm": 4.071147918701172,
      "learning_rate": 0.0005769038854175374,
      "loss": 2.9944,
      "step": 28966
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.416278600692749,
      "learning_rate": 0.0005769023114707682,
      "loss": 3.0172,
      "step": 28967
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.459700345993042,
      "learning_rate": 0.0005769007374725178,
      "loss": 3.3633,
      "step": 28968
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8580411672592163,
      "learning_rate": 0.0005768991634227862,
      "loss": 3.0482,
      "step": 28969
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.398475170135498,
      "learning_rate": 0.0005768975893215739,
      "loss": 3.2237,
      "step": 28970
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3345667123794556,
      "learning_rate": 0.000576896015168881,
      "loss": 3.1471,
      "step": 28971
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3335402011871338,
      "learning_rate": 0.000576894440964708,
      "loss": 3.2212,
      "step": 28972
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2221641540527344,
      "learning_rate": 0.0005768928667090551,
      "loss": 3.1844,
      "step": 28973
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4619646072387695,
      "learning_rate": 0.0005768912924019226,
      "loss": 3.0878,
      "step": 28974
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.529769778251648,
      "learning_rate": 0.0005768897180433107,
      "loss": 3.0597,
      "step": 28975
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6886539459228516,
      "learning_rate": 0.0005768881436332199,
      "loss": 3.237,
      "step": 28976
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4253557920455933,
      "learning_rate": 0.0005768865691716502,
      "loss": 3.2586,
      "step": 28977
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6777634620666504,
      "learning_rate": 0.0005768849946586022,
      "loss": 3.1387,
      "step": 28978
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5834238529205322,
      "learning_rate": 0.000576883420094076,
      "loss": 3.0273,
      "step": 28979
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5136553049087524,
      "learning_rate": 0.0005768818454780719,
      "loss": 3.088,
      "step": 28980
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6215640306472778,
      "learning_rate": 0.0005768802708105903,
      "loss": 3.2,
      "step": 28981
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.605127215385437,
      "learning_rate": 0.0005768786960916315,
      "loss": 2.8985,
      "step": 28982
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9251230955123901,
      "learning_rate": 0.0005768771213211955,
      "loss": 3.0958,
      "step": 28983
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.760122537612915,
      "learning_rate": 0.0005768755464992831,
      "loss": 3.2102,
      "step": 28984
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2642285823822021,
      "learning_rate": 0.0005768739716258941,
      "loss": 3.1885,
      "step": 28985
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5749050378799438,
      "learning_rate": 0.0005768723967010291,
      "loss": 2.861,
      "step": 28986
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4217967987060547,
      "learning_rate": 0.0005768708217246882,
      "loss": 3.3142,
      "step": 28987
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8588558435440063,
      "learning_rate": 0.0005768692466968718,
      "loss": 3.1569,
      "step": 28988
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3662806749343872,
      "learning_rate": 0.0005768676716175803,
      "loss": 3.4145,
      "step": 28989
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7167062759399414,
      "learning_rate": 0.0005768660964868139,
      "loss": 3.1042,
      "step": 28990
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4770846366882324,
      "learning_rate": 0.0005768645213045727,
      "loss": 3.0522,
      "step": 28991
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.259429931640625,
      "learning_rate": 0.0005768629460708572,
      "loss": 3.2976,
      "step": 28992
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3693809509277344,
      "learning_rate": 0.0005768613707856677,
      "loss": 3.0966,
      "step": 28993
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.475528359413147,
      "learning_rate": 0.0005768597954490045,
      "loss": 3.1607,
      "step": 28994
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5132479667663574,
      "learning_rate": 0.0005768582200608677,
      "loss": 3.2069,
      "step": 28995
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3788520097732544,
      "learning_rate": 0.0005768566446212579,
      "loss": 3.0103,
      "step": 28996
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6506590843200684,
      "learning_rate": 0.0005768550691301751,
      "loss": 3.1914,
      "step": 28997
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9630446434020996,
      "learning_rate": 0.0005768534935876198,
      "loss": 3.0298,
      "step": 28998
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3942233324050903,
      "learning_rate": 0.0005768519179935921,
      "loss": 3.0974,
      "step": 28999
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.265629529953003,
      "learning_rate": 0.0005768503423480924,
      "loss": 2.9497,
      "step": 29000
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5674121379852295,
      "learning_rate": 0.0005768487666511211,
      "loss": 3.1437,
      "step": 29001
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.659466028213501,
      "learning_rate": 0.0005768471909026783,
      "loss": 3.1032,
      "step": 29002
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.620203733444214,
      "learning_rate": 0.0005768456151027645,
      "loss": 3.0397,
      "step": 29003
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5390490293502808,
      "learning_rate": 0.0005768440392513798,
      "loss": 3.1012,
      "step": 29004
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6492282152175903,
      "learning_rate": 0.0005768424633485244,
      "loss": 2.9358,
      "step": 29005
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.0171406269073486,
      "learning_rate": 0.000576840887394199,
      "loss": 2.9094,
      "step": 29006
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7263875007629395,
      "learning_rate": 0.0005768393113884036,
      "loss": 3.2976,
      "step": 29007
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2808525562286377,
      "learning_rate": 0.0005768377353311385,
      "loss": 2.9793,
      "step": 29008
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6541601419448853,
      "learning_rate": 0.000576836159222404,
      "loss": 3.008,
      "step": 29009
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4581046104431152,
      "learning_rate": 0.0005768345830622005,
      "loss": 3.084,
      "step": 29010
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5922402143478394,
      "learning_rate": 0.0005768330068505282,
      "loss": 3.1209,
      "step": 29011
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5637972354888916,
      "learning_rate": 0.0005768314305873876,
      "loss": 3.1803,
      "step": 29012
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.626192569732666,
      "learning_rate": 0.0005768298542727786,
      "loss": 3.1265,
      "step": 29013
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5780521631240845,
      "learning_rate": 0.0005768282779067017,
      "loss": 3.1971,
      "step": 29014
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9072082042694092,
      "learning_rate": 0.0005768267014891571,
      "loss": 3.1166,
      "step": 29015
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4624638557434082,
      "learning_rate": 0.0005768251250201455,
      "loss": 3.165,
      "step": 29016
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5155123472213745,
      "learning_rate": 0.0005768235484996667,
      "loss": 3.0636,
      "step": 29017
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3303600549697876,
      "learning_rate": 0.0005768219719277212,
      "loss": 3.2212,
      "step": 29018
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.640213131904602,
      "learning_rate": 0.0005768203953043093,
      "loss": 3.0368,
      "step": 29019
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9100195169448853,
      "learning_rate": 0.0005768188186294312,
      "loss": 2.9936,
      "step": 29020
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2988088130950928,
      "learning_rate": 0.0005768172419030873,
      "loss": 3.1347,
      "step": 29021
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2323319911956787,
      "learning_rate": 0.0005768156651252778,
      "loss": 2.9944,
      "step": 29022
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3734476566314697,
      "learning_rate": 0.0005768140882960031,
      "loss": 2.8632,
      "step": 29023
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3332332372665405,
      "learning_rate": 0.0005768125114152634,
      "loss": 3.0777,
      "step": 29024
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4891352653503418,
      "learning_rate": 0.000576810934483059,
      "loss": 3.1105,
      "step": 29025
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6487189531326294,
      "learning_rate": 0.0005768093574993903,
      "loss": 3.0239,
      "step": 29026
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5643136501312256,
      "learning_rate": 0.0005768077804642575,
      "loss": 3.0134,
      "step": 29027
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.822129726409912,
      "learning_rate": 0.0005768062033776609,
      "loss": 3.3064,
      "step": 29028
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6354416608810425,
      "learning_rate": 0.0005768046262396007,
      "loss": 3.1469,
      "step": 29029
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1174027919769287,
      "learning_rate": 0.0005768030490500774,
      "loss": 3.1376,
      "step": 29030
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4492359161376953,
      "learning_rate": 0.0005768014718090912,
      "loss": 2.7676,
      "step": 29031
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4325594902038574,
      "learning_rate": 0.0005767998945166424,
      "loss": 2.8056,
      "step": 29032
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.442248582839966,
      "learning_rate": 0.0005767983171727312,
      "loss": 3.3342,
      "step": 29033
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.6680405139923096,
      "learning_rate": 0.0005767967397773579,
      "loss": 2.9594,
      "step": 29034
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4764328002929688,
      "learning_rate": 0.0005767951623305231,
      "loss": 3.1728,
      "step": 29035
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.4453773498535156,
      "learning_rate": 0.0005767935848322267,
      "loss": 3.1031,
      "step": 29036
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.0542001724243164,
      "learning_rate": 0.0005767920072824692,
      "loss": 3.059,
      "step": 29037
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6246247291564941,
      "learning_rate": 0.0005767904296812507,
      "loss": 2.9054,
      "step": 29038
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.8203060626983643,
      "learning_rate": 0.0005767888520285718,
      "loss": 3.0216,
      "step": 29039
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.181274652481079,
      "learning_rate": 0.0005767872743244326,
      "loss": 3.0029,
      "step": 29040
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5823194980621338,
      "learning_rate": 0.0005767856965688333,
      "loss": 3.078,
      "step": 29041
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.324758529663086,
      "learning_rate": 0.0005767841187617744,
      "loss": 3.1384,
      "step": 29042
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9863442182540894,
      "learning_rate": 0.0005767825409032562,
      "loss": 3.0004,
      "step": 29043
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6195658445358276,
      "learning_rate": 0.0005767809629932788,
      "loss": 2.9513,
      "step": 29044
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6748863458633423,
      "learning_rate": 0.0005767793850318426,
      "loss": 3.1326,
      "step": 29045
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.021515130996704,
      "learning_rate": 0.0005767778070189479,
      "loss": 2.9948,
      "step": 29046
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.191135883331299,
      "learning_rate": 0.000576776228954595,
      "loss": 3.0619,
      "step": 29047
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.555982232093811,
      "learning_rate": 0.0005767746508387841,
      "loss": 3.2121,
      "step": 29048
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9495612382888794,
      "learning_rate": 0.0005767730726715155,
      "loss": 3.1573,
      "step": 29049
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.5736491680145264,
      "learning_rate": 0.0005767714944527898,
      "loss": 3.0779,
      "step": 29050
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4357242584228516,
      "learning_rate": 0.0005767699161826069,
      "loss": 2.9014,
      "step": 29051
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7760883569717407,
      "learning_rate": 0.0005767683378609672,
      "loss": 2.9112,
      "step": 29052
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6389615535736084,
      "learning_rate": 0.0005767667594878712,
      "loss": 3.2289,
      "step": 29053
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5888595581054688,
      "learning_rate": 0.000576765181063319,
      "loss": 3.1039,
      "step": 29054
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4094908237457275,
      "learning_rate": 0.0005767636025873109,
      "loss": 3.0823,
      "step": 29055
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.890931248664856,
      "learning_rate": 0.0005767620240598472,
      "loss": 3.0854,
      "step": 29056
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1263864040374756,
      "learning_rate": 0.0005767604454809281,
      "loss": 3.3016,
      "step": 29057
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5541191101074219,
      "learning_rate": 0.0005767588668505543,
      "loss": 3.0491,
      "step": 29058
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.720919132232666,
      "learning_rate": 0.0005767572881687255,
      "loss": 3.1166,
      "step": 29059
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7153011560440063,
      "learning_rate": 0.0005767557094354424,
      "loss": 3.1467,
      "step": 29060
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2520711421966553,
      "learning_rate": 0.0005767541306507053,
      "loss": 3.0381,
      "step": 29061
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5982959270477295,
      "learning_rate": 0.0005767525518145143,
      "loss": 3.1784,
      "step": 29062
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.499287486076355,
      "learning_rate": 0.0005767509729268698,
      "loss": 3.0757,
      "step": 29063
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8410732746124268,
      "learning_rate": 0.0005767493939877721,
      "loss": 2.9261,
      "step": 29064
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2800183296203613,
      "learning_rate": 0.0005767478149972214,
      "loss": 2.8996,
      "step": 29065
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5738643407821655,
      "learning_rate": 0.000576746235955218,
      "loss": 3.1217,
      "step": 29066
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2397398948669434,
      "learning_rate": 0.0005767446568617623,
      "loss": 3.1382,
      "step": 29067
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.385742425918579,
      "learning_rate": 0.0005767430777168545,
      "loss": 3.0398,
      "step": 29068
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8537495136260986,
      "learning_rate": 0.000576741498520495,
      "loss": 3.0854,
      "step": 29069
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5561367273330688,
      "learning_rate": 0.0005767399192726841,
      "loss": 2.9458,
      "step": 29070
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7218589782714844,
      "learning_rate": 0.000576738339973422,
      "loss": 3.1342,
      "step": 29071
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4851518869400024,
      "learning_rate": 0.0005767367606227089,
      "loss": 3.2723,
      "step": 29072
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2256131172180176,
      "learning_rate": 0.0005767351812205453,
      "loss": 3.105,
      "step": 29073
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3851828575134277,
      "learning_rate": 0.0005767336017669314,
      "loss": 3.3714,
      "step": 29074
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.577079176902771,
      "learning_rate": 0.0005767320222618675,
      "loss": 3.146,
      "step": 29075
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.900162696838379,
      "learning_rate": 0.0005767304427053538,
      "loss": 3.3938,
      "step": 29076
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6120691299438477,
      "learning_rate": 0.000576728863097391,
      "loss": 3.2564,
      "step": 29077
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.4315266609191895,
      "learning_rate": 0.0005767272834379787,
      "loss": 3.1289,
      "step": 29078
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8362627029418945,
      "learning_rate": 0.0005767257037271177,
      "loss": 2.9191,
      "step": 29079
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3820619583129883,
      "learning_rate": 0.0005767241239648083,
      "loss": 3.1149,
      "step": 29080
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6302955150604248,
      "learning_rate": 0.0005767225441510505,
      "loss": 3.099,
      "step": 29081
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3419941663742065,
      "learning_rate": 0.0005767209642858448,
      "loss": 2.9473,
      "step": 29082
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0009212493896484,
      "learning_rate": 0.0005767193843691915,
      "loss": 3.2721,
      "step": 29083
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5523921251296997,
      "learning_rate": 0.0005767178044010908,
      "loss": 3.0687,
      "step": 29084
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8610855340957642,
      "learning_rate": 0.0005767162243815431,
      "loss": 3.1497,
      "step": 29085
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.305213689804077,
      "learning_rate": 0.0005767146443105485,
      "loss": 2.9713,
      "step": 29086
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.618442416191101,
      "learning_rate": 0.0005767130641881076,
      "loss": 2.9861,
      "step": 29087
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7312054634094238,
      "learning_rate": 0.0005767114840142203,
      "loss": 3.1111,
      "step": 29088
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.7187297344207764,
      "learning_rate": 0.0005767099037888873,
      "loss": 2.9858,
      "step": 29089
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3904457092285156,
      "learning_rate": 0.0005767083235121086,
      "loss": 2.9969,
      "step": 29090
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.534138798713684,
      "learning_rate": 0.0005767067431838846,
      "loss": 2.8716,
      "step": 29091
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4151911735534668,
      "learning_rate": 0.0005767051628042157,
      "loss": 3.105,
      "step": 29092
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6488120555877686,
      "learning_rate": 0.000576703582373102,
      "loss": 2.8608,
      "step": 29093
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.373016834259033,
      "learning_rate": 0.0005767020018905438,
      "loss": 3.4247,
      "step": 29094
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5065468549728394,
      "learning_rate": 0.0005767004213565416,
      "loss": 3.0664,
      "step": 29095
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.6663646697998047,
      "learning_rate": 0.0005766988407710956,
      "loss": 2.8313,
      "step": 29096
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.56573224067688,
      "learning_rate": 0.000576697260134206,
      "loss": 3.3038,
      "step": 29097
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.455597400665283,
      "learning_rate": 0.0005766956794458732,
      "loss": 2.9913,
      "step": 29098
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.742250680923462,
      "learning_rate": 0.0005766940987060973,
      "loss": 3.1441,
      "step": 29099
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4565187692642212,
      "learning_rate": 0.0005766925179148789,
      "loss": 3.3081,
      "step": 29100
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.716188907623291,
      "learning_rate": 0.0005766909370722181,
      "loss": 3.0232,
      "step": 29101
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.311472177505493,
      "learning_rate": 0.0005766893561781152,
      "loss": 3.0636,
      "step": 29102
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.474489688873291,
      "learning_rate": 0.0005766877752325706,
      "loss": 3.1402,
      "step": 29103
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6238776445388794,
      "learning_rate": 0.0005766861942355844,
      "loss": 3.1103,
      "step": 29104
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.291609525680542,
      "learning_rate": 0.0005766846131871571,
      "loss": 3.1031,
      "step": 29105
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9668614864349365,
      "learning_rate": 0.0005766830320872889,
      "loss": 3.0292,
      "step": 29106
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4739439487457275,
      "learning_rate": 0.0005766814509359801,
      "loss": 3.1147,
      "step": 29107
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2858493328094482,
      "learning_rate": 0.000576679869733231,
      "loss": 3.2538,
      "step": 29108
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2049126625061035,
      "learning_rate": 0.0005766782884790418,
      "loss": 2.9904,
      "step": 29109
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4837347269058228,
      "learning_rate": 0.000576676707173413,
      "loss": 2.8818,
      "step": 29110
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7504092454910278,
      "learning_rate": 0.0005766751258163448,
      "loss": 3.0618,
      "step": 29111
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.157670736312866,
      "learning_rate": 0.0005766735444078374,
      "loss": 3.0474,
      "step": 29112
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.616365671157837,
      "learning_rate": 0.0005766719629478911,
      "loss": 3.2538,
      "step": 29113
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3426706790924072,
      "learning_rate": 0.0005766703814365063,
      "loss": 2.966,
      "step": 29114
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.777079701423645,
      "learning_rate": 0.0005766687998736834,
      "loss": 3.3768,
      "step": 29115
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.906347632408142,
      "learning_rate": 0.0005766672182594224,
      "loss": 3.2228,
      "step": 29116
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4860061407089233,
      "learning_rate": 0.0005766656365937238,
      "loss": 3.1323,
      "step": 29117
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.658267855644226,
      "learning_rate": 0.0005766640548765877,
      "loss": 3.256,
      "step": 29118
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.577370285987854,
      "learning_rate": 0.0005766624731080147,
      "loss": 3.2438,
      "step": 29119
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3521111011505127,
      "learning_rate": 0.0005766608912880049,
      "loss": 2.985,
      "step": 29120
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3236041069030762,
      "learning_rate": 0.0005766593094165584,
      "loss": 2.936,
      "step": 29121
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5791842937469482,
      "learning_rate": 0.000576657727493676,
      "loss": 3.0207,
      "step": 29122
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5331637859344482,
      "learning_rate": 0.0005766561455193576,
      "loss": 3.1116,
      "step": 29123
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.714406967163086,
      "learning_rate": 0.0005766545634936036,
      "loss": 3.2893,
      "step": 29124
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4062836170196533,
      "learning_rate": 0.0005766529814164144,
      "loss": 2.9663,
      "step": 29125
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.88592529296875,
      "learning_rate": 0.00057665139928779,
      "loss": 3.1654,
      "step": 29126
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3949881792068481,
      "learning_rate": 0.000576649817107731,
      "loss": 3.0754,
      "step": 29127
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.572477102279663,
      "learning_rate": 0.0005766482348762375,
      "loss": 3.056,
      "step": 29128
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0277419090270996,
      "learning_rate": 0.00057664665259331,
      "loss": 3.1169,
      "step": 29129
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.803596019744873,
      "learning_rate": 0.0005766450702589486,
      "loss": 3.0348,
      "step": 29130
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.8001396656036377,
      "learning_rate": 0.0005766434878731536,
      "loss": 2.9968,
      "step": 29131
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.650862216949463,
      "learning_rate": 0.0005766419054359253,
      "loss": 3.0759,
      "step": 29132
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6237112283706665,
      "learning_rate": 0.0005766403229472641,
      "loss": 3.1423,
      "step": 29133
    },
    {
      "epoch": 0.38,
      "grad_norm": 4.004430294036865,
      "learning_rate": 0.0005766387404071703,
      "loss": 3.079,
      "step": 29134
    },
    {
      "epoch": 0.38,
      "grad_norm": 4.571741104125977,
      "learning_rate": 0.0005766371578156441,
      "loss": 3.0209,
      "step": 29135
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8450464010238647,
      "learning_rate": 0.0005766355751726859,
      "loss": 2.8565,
      "step": 29136
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8343796730041504,
      "learning_rate": 0.0005766339924782958,
      "loss": 2.8775,
      "step": 29137
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.74966561794281,
      "learning_rate": 0.0005766324097324744,
      "loss": 2.9341,
      "step": 29138
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8298686742782593,
      "learning_rate": 0.0005766308269352216,
      "loss": 3.1561,
      "step": 29139
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6963499784469604,
      "learning_rate": 0.0005766292440865381,
      "loss": 3.2764,
      "step": 29140
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4377950429916382,
      "learning_rate": 0.0005766276611864239,
      "loss": 3.0879,
      "step": 29141
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9933717250823975,
      "learning_rate": 0.0005766260782348794,
      "loss": 3.2255,
      "step": 29142
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6915056705474854,
      "learning_rate": 0.0005766244952319049,
      "loss": 3.1838,
      "step": 29143
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4802525043487549,
      "learning_rate": 0.0005766229121775008,
      "loss": 3.0897,
      "step": 29144
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.49519944190979,
      "learning_rate": 0.0005766213290716671,
      "loss": 3.2464,
      "step": 29145
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5800507068634033,
      "learning_rate": 0.0005766197459144042,
      "loss": 3.2995,
      "step": 29146
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.128446578979492,
      "learning_rate": 0.0005766181627057127,
      "loss": 3.1184,
      "step": 29147
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5089802742004395,
      "learning_rate": 0.0005766165794455926,
      "loss": 3.0527,
      "step": 29148
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3901561498641968,
      "learning_rate": 0.0005766149961340442,
      "loss": 2.9453,
      "step": 29149
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7812471389770508,
      "learning_rate": 0.0005766134127710679,
      "loss": 3.1382,
      "step": 29150
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.299946665763855,
      "learning_rate": 0.0005766118293566638,
      "loss": 3.0752,
      "step": 29151
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4996293783187866,
      "learning_rate": 0.0005766102458908325,
      "loss": 3.3733,
      "step": 29152
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5852993726730347,
      "learning_rate": 0.0005766086623735742,
      "loss": 3.2162,
      "step": 29153
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5718803405761719,
      "learning_rate": 0.000576607078804889,
      "loss": 2.8048,
      "step": 29154
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4296549558639526,
      "learning_rate": 0.0005766054951847773,
      "loss": 2.843,
      "step": 29155
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.773242712020874,
      "learning_rate": 0.0005766039115132394,
      "loss": 2.9836,
      "step": 29156
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4745525121688843,
      "learning_rate": 0.0005766023277902758,
      "loss": 3.162,
      "step": 29157
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6372836828231812,
      "learning_rate": 0.0005766007440158864,
      "loss": 3.0935,
      "step": 29158
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2789002656936646,
      "learning_rate": 0.0005765991601900718,
      "loss": 3.0397,
      "step": 29159
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4308805465698242,
      "learning_rate": 0.0005765975763128322,
      "loss": 3.1802,
      "step": 29160
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.693617820739746,
      "learning_rate": 0.0005765959923841678,
      "loss": 2.9994,
      "step": 29161
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.623291015625,
      "learning_rate": 0.000576594408404079,
      "loss": 3.2889,
      "step": 29162
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.637593388557434,
      "learning_rate": 0.0005765928243725662,
      "loss": 3.038,
      "step": 29163
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6100552082061768,
      "learning_rate": 0.0005765912402896294,
      "loss": 2.9806,
      "step": 29164
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3710030317306519,
      "learning_rate": 0.0005765896561552692,
      "loss": 2.8595,
      "step": 29165
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3811432123184204,
      "learning_rate": 0.0005765880719694858,
      "loss": 3.0498,
      "step": 29166
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.454136610031128,
      "learning_rate": 0.0005765864877322794,
      "loss": 3.0388,
      "step": 29167
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3630943298339844,
      "learning_rate": 0.0005765849034436503,
      "loss": 3.2125,
      "step": 29168
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5041451454162598,
      "learning_rate": 0.0005765833191035989,
      "loss": 3.0094,
      "step": 29169
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5266873836517334,
      "learning_rate": 0.0005765817347121254,
      "loss": 3.079,
      "step": 29170
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7418627738952637,
      "learning_rate": 0.0005765801502692301,
      "loss": 2.9739,
      "step": 29171
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9166240692138672,
      "learning_rate": 0.0005765785657749135,
      "loss": 3.1754,
      "step": 29172
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4261524677276611,
      "learning_rate": 0.0005765769812291756,
      "loss": 3.1824,
      "step": 29173
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.030836343765259,
      "learning_rate": 0.0005765753966320168,
      "loss": 3.0228,
      "step": 29174
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5233319997787476,
      "learning_rate": 0.0005765738119834373,
      "loss": 3.0381,
      "step": 29175
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5941729545593262,
      "learning_rate": 0.0005765722272834378,
      "loss": 2.9998,
      "step": 29176
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4374886751174927,
      "learning_rate": 0.000576570642532018,
      "loss": 3.2508,
      "step": 29177
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4259378910064697,
      "learning_rate": 0.0005765690577291787,
      "loss": 3.0186,
      "step": 29178
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1639323234558105,
      "learning_rate": 0.0005765674728749199,
      "loss": 3.1527,
      "step": 29179
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0158045291900635,
      "learning_rate": 0.0005765658879692421,
      "loss": 2.8683,
      "step": 29180
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3685760498046875,
      "learning_rate": 0.0005765643030121454,
      "loss": 2.9272,
      "step": 29181
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5735869407653809,
      "learning_rate": 0.0005765627180036302,
      "loss": 3.3292,
      "step": 29182
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.247420072555542,
      "learning_rate": 0.0005765611329436967,
      "loss": 3.205,
      "step": 29183
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2137529850006104,
      "learning_rate": 0.0005765595478323454,
      "loss": 3.18,
      "step": 29184
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6122074127197266,
      "learning_rate": 0.0005765579626695763,
      "loss": 3.3296,
      "step": 29185
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9955788850784302,
      "learning_rate": 0.00057655637745539,
      "loss": 3.2698,
      "step": 29186
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9333945512771606,
      "learning_rate": 0.0005765547921897865,
      "loss": 3.0233,
      "step": 29187
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3104808330535889,
      "learning_rate": 0.0005765532068727664,
      "loss": 3.0946,
      "step": 29188
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1904728412628174,
      "learning_rate": 0.0005765516215043297,
      "loss": 2.9313,
      "step": 29189
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.273015022277832,
      "learning_rate": 0.000576550036084477,
      "loss": 3.0051,
      "step": 29190
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3201992511749268,
      "learning_rate": 0.0005765484506132083,
      "loss": 3.0724,
      "step": 29191
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7396899461746216,
      "learning_rate": 0.000576546865090524,
      "loss": 3.1007,
      "step": 29192
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.757079005241394,
      "learning_rate": 0.0005765452795164245,
      "loss": 3.2105,
      "step": 29193
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6186753511428833,
      "learning_rate": 0.0005765436938909099,
      "loss": 3.0531,
      "step": 29194
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.0352537631988525,
      "learning_rate": 0.0005765421082139807,
      "loss": 2.9189,
      "step": 29195
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.571874976158142,
      "learning_rate": 0.0005765405224856371,
      "loss": 3.1495,
      "step": 29196
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.674731969833374,
      "learning_rate": 0.0005765389367058793,
      "loss": 3.0782,
      "step": 29197
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.494871735572815,
      "learning_rate": 0.0005765373508747078,
      "loss": 2.963,
      "step": 29198
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6834417581558228,
      "learning_rate": 0.0005765357649921228,
      "loss": 3.169,
      "step": 29199
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5364354848861694,
      "learning_rate": 0.0005765341790581245,
      "loss": 3.0146,
      "step": 29200
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.817945957183838,
      "learning_rate": 0.0005765325930727134,
      "loss": 2.9553,
      "step": 29201
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0686838626861572,
      "learning_rate": 0.0005765310070358896,
      "loss": 2.9356,
      "step": 29202
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.503533959388733,
      "learning_rate": 0.0005765294209476534,
      "loss": 3.0067,
      "step": 29203
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9625921249389648,
      "learning_rate": 0.0005765278348080051,
      "loss": 3.1752,
      "step": 29204
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.461892604827881,
      "learning_rate": 0.0005765262486169452,
      "loss": 2.9862,
      "step": 29205
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.745448350906372,
      "learning_rate": 0.0005765246623744739,
      "loss": 3.1837,
      "step": 29206
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6607691049575806,
      "learning_rate": 0.0005765230760805913,
      "loss": 3.07,
      "step": 29207
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.630038857460022,
      "learning_rate": 0.000576521489735298,
      "loss": 3.4558,
      "step": 29208
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7860058546066284,
      "learning_rate": 0.000576519903338594,
      "loss": 3.169,
      "step": 29209
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6738232374191284,
      "learning_rate": 0.0005765183168904797,
      "loss": 3.2457,
      "step": 29210
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5293142795562744,
      "learning_rate": 0.0005765167303909555,
      "loss": 2.736,
      "step": 29211
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3060939311981201,
      "learning_rate": 0.0005765151438400217,
      "loss": 2.6798,
      "step": 29212
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2392233610153198,
      "learning_rate": 0.0005765135572376784,
      "loss": 3.0141,
      "step": 29213
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5705029964447021,
      "learning_rate": 0.0005765119705839261,
      "loss": 3.0378,
      "step": 29214
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.685316562652588,
      "learning_rate": 0.0005765103838787648,
      "loss": 3.1557,
      "step": 29215
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5352799892425537,
      "learning_rate": 0.0005765087971221953,
      "loss": 3.269,
      "step": 29216
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5105600357055664,
      "learning_rate": 0.0005765072103142174,
      "loss": 2.9582,
      "step": 29217
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3770180940628052,
      "learning_rate": 0.0005765056234548316,
      "loss": 3.0922,
      "step": 29218
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4288990497589111,
      "learning_rate": 0.0005765040365440382,
      "loss": 3.1232,
      "step": 29219
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.5891263484954834,
      "learning_rate": 0.0005765024495818375,
      "loss": 3.2034,
      "step": 29220
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7042316198349,
      "learning_rate": 0.0005765008625682298,
      "loss": 3.0564,
      "step": 29221
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3252774477005005,
      "learning_rate": 0.0005764992755032154,
      "loss": 3.2178,
      "step": 29222
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2906346321105957,
      "learning_rate": 0.0005764976883867944,
      "loss": 2.974,
      "step": 29223
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6741595268249512,
      "learning_rate": 0.0005764961012189674,
      "loss": 3.254,
      "step": 29224
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4754366874694824,
      "learning_rate": 0.0005764945139997345,
      "loss": 3.2452,
      "step": 29225
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5499486923217773,
      "learning_rate": 0.0005764929267290961,
      "loss": 3.0969,
      "step": 29226
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8615260124206543,
      "learning_rate": 0.0005764913394070524,
      "loss": 2.9945,
      "step": 29227
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1690850257873535,
      "learning_rate": 0.0005764897520336037,
      "loss": 3.2266,
      "step": 29228
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5675684213638306,
      "learning_rate": 0.0005764881646087505,
      "loss": 3.084,
      "step": 29229
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.462298035621643,
      "learning_rate": 0.0005764865771324928,
      "loss": 2.9352,
      "step": 29230
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6542726755142212,
      "learning_rate": 0.0005764849896048311,
      "loss": 3.0996,
      "step": 29231
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8381168842315674,
      "learning_rate": 0.0005764834020257655,
      "loss": 3.0054,
      "step": 29232
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7504584789276123,
      "learning_rate": 0.0005764818143952965,
      "loss": 3.1179,
      "step": 29233
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.694800853729248,
      "learning_rate": 0.0005764802267134243,
      "loss": 3.0212,
      "step": 29234
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3782122135162354,
      "learning_rate": 0.0005764786389801492,
      "loss": 3.0941,
      "step": 29235
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0161209106445312,
      "learning_rate": 0.0005764770511954716,
      "loss": 3.1615,
      "step": 29236
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.601750373840332,
      "learning_rate": 0.0005764754633593916,
      "loss": 3.158,
      "step": 29237
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0335569381713867,
      "learning_rate": 0.0005764738754719096,
      "loss": 3.0,
      "step": 29238
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2350497245788574,
      "learning_rate": 0.0005764722875330259,
      "loss": 3.02,
      "step": 29239
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2947583198547363,
      "learning_rate": 0.0005764706995427407,
      "loss": 3.0612,
      "step": 29240
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4041410684585571,
      "learning_rate": 0.0005764691115010545,
      "loss": 2.8406,
      "step": 29241
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9464545249938965,
      "learning_rate": 0.0005764675234079675,
      "loss": 3.1759,
      "step": 29242
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.5867958068847656,
      "learning_rate": 0.0005764659352634798,
      "loss": 3.0142,
      "step": 29243
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3936095237731934,
      "learning_rate": 0.000576464347067592,
      "loss": 3.0597,
      "step": 29244
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8653640747070312,
      "learning_rate": 0.0005764627588203041,
      "loss": 3.1181,
      "step": 29245
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8109350204467773,
      "learning_rate": 0.0005764611705216167,
      "loss": 3.1588,
      "step": 29246
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.7053444385528564,
      "learning_rate": 0.0005764595821715299,
      "loss": 3.0726,
      "step": 29247
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7581171989440918,
      "learning_rate": 0.0005764579937700441,
      "loss": 2.8428,
      "step": 29248
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.75663423538208,
      "learning_rate": 0.0005764564053171595,
      "loss": 2.911,
      "step": 29249
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3726598024368286,
      "learning_rate": 0.0005764548168128765,
      "loss": 3.1114,
      "step": 29250
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5112628936767578,
      "learning_rate": 0.0005764532282571953,
      "loss": 3.0364,
      "step": 29251
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5152575969696045,
      "learning_rate": 0.0005764516396501161,
      "loss": 2.7972,
      "step": 29252
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1028506755828857,
      "learning_rate": 0.0005764500509916395,
      "loss": 3.1917,
      "step": 29253
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6261664628982544,
      "learning_rate": 0.0005764484622817655,
      "loss": 3.3906,
      "step": 29254
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4716545343399048,
      "learning_rate": 0.0005764468735204945,
      "loss": 2.9162,
      "step": 29255
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4306350946426392,
      "learning_rate": 0.000576445284707827,
      "loss": 3.1594,
      "step": 29256
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1578149795532227,
      "learning_rate": 0.0005764436958437629,
      "loss": 3.0065,
      "step": 29257
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4482280015945435,
      "learning_rate": 0.0005764421069283027,
      "loss": 3.0055,
      "step": 29258
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9120864868164062,
      "learning_rate": 0.0005764405179614468,
      "loss": 3.2778,
      "step": 29259
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.599900484085083,
      "learning_rate": 0.0005764389289431953,
      "loss": 3.2066,
      "step": 29260
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6874544620513916,
      "learning_rate": 0.0005764373398735486,
      "loss": 2.8514,
      "step": 29261
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2470546960830688,
      "learning_rate": 0.0005764357507525071,
      "loss": 2.9391,
      "step": 29262
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0157508850097656,
      "learning_rate": 0.0005764341615800708,
      "loss": 3.2032,
      "step": 29263
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1287667751312256,
      "learning_rate": 0.0005764325723562403,
      "loss": 2.9437,
      "step": 29264
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4031658172607422,
      "learning_rate": 0.0005764309830810157,
      "loss": 2.9367,
      "step": 29265
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2881871461868286,
      "learning_rate": 0.0005764293937543974,
      "loss": 2.9639,
      "step": 29266
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7455148696899414,
      "learning_rate": 0.0005764278043763857,
      "loss": 3.029,
      "step": 29267
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8098028898239136,
      "learning_rate": 0.0005764262149469808,
      "loss": 3.0932,
      "step": 29268
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1987664699554443,
      "learning_rate": 0.000576424625466183,
      "loss": 2.9956,
      "step": 29269
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4621835947036743,
      "learning_rate": 0.0005764230359339927,
      "loss": 3.083,
      "step": 29270
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3246533870697021,
      "learning_rate": 0.0005764214463504101,
      "loss": 3.1221,
      "step": 29271
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7529598474502563,
      "learning_rate": 0.0005764198567154356,
      "loss": 3.128,
      "step": 29272
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8435574769973755,
      "learning_rate": 0.0005764182670290694,
      "loss": 3.0632,
      "step": 29273
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.524596095085144,
      "learning_rate": 0.0005764166772913119,
      "loss": 2.931,
      "step": 29274
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.422942876815796,
      "learning_rate": 0.0005764150875021631,
      "loss": 2.9814,
      "step": 29275
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7598869800567627,
      "learning_rate": 0.0005764134976616237,
      "loss": 3.3615,
      "step": 29276
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.270117163658142,
      "learning_rate": 0.0005764119077696938,
      "loss": 3.2432,
      "step": 29277
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8704321384429932,
      "learning_rate": 0.0005764103178263737,
      "loss": 3.3665,
      "step": 29278
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.332434892654419,
      "learning_rate": 0.0005764087278316637,
      "loss": 3.0921,
      "step": 29279
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.730543613433838,
      "learning_rate": 0.000576407137785564,
      "loss": 2.978,
      "step": 29280
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7211381196975708,
      "learning_rate": 0.0005764055476880751,
      "loss": 3.0826,
      "step": 29281
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2474679946899414,
      "learning_rate": 0.0005764039575391972,
      "loss": 3.1396,
      "step": 29282
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.663320779800415,
      "learning_rate": 0.0005764023673389305,
      "loss": 3.234,
      "step": 29283
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5844844579696655,
      "learning_rate": 0.0005764007770872755,
      "loss": 2.9845,
      "step": 29284
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3480920791625977,
      "learning_rate": 0.0005763991867842324,
      "loss": 3.1085,
      "step": 29285
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.18894624710083,
      "learning_rate": 0.0005763975964298012,
      "loss": 3.2787,
      "step": 29286
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4560061693191528,
      "learning_rate": 0.0005763960060239828,
      "loss": 3.1383,
      "step": 29287
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4741259813308716,
      "learning_rate": 0.000576394415566777,
      "loss": 3.2507,
      "step": 29288
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4602187871932983,
      "learning_rate": 0.0005763928250581843,
      "loss": 3.115,
      "step": 29289
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3936527967453003,
      "learning_rate": 0.0005763912344982049,
      "loss": 3.4808,
      "step": 29290
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.581939935684204,
      "learning_rate": 0.0005763896438868392,
      "loss": 3.1873,
      "step": 29291
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.490074872970581,
      "learning_rate": 0.0005763880532240875,
      "loss": 3.0258,
      "step": 29292
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8543174266815186,
      "learning_rate": 0.00057638646250995,
      "loss": 3.2092,
      "step": 29293
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.486769914627075,
      "learning_rate": 0.000576384871744427,
      "loss": 3.055,
      "step": 29294
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5084401369094849,
      "learning_rate": 0.0005763832809275189,
      "loss": 3.2542,
      "step": 29295
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5970650911331177,
      "learning_rate": 0.0005763816900592258,
      "loss": 3.1154,
      "step": 29296
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4478421211242676,
      "learning_rate": 0.0005763800991395483,
      "loss": 3.1809,
      "step": 29297
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.575751543045044,
      "learning_rate": 0.0005763785081684865,
      "loss": 3.1747,
      "step": 29298
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3711520433425903,
      "learning_rate": 0.0005763769171460406,
      "loss": 3.0597,
      "step": 29299
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6422585248947144,
      "learning_rate": 0.0005763753260722111,
      "loss": 2.8471,
      "step": 29300
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.365551710128784,
      "learning_rate": 0.0005763737349469981,
      "loss": 3.0205,
      "step": 29301
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4962106943130493,
      "learning_rate": 0.0005763721437704022,
      "loss": 3.176,
      "step": 29302
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7403677701950073,
      "learning_rate": 0.0005763705525424234,
      "loss": 3.1606,
      "step": 29303
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6543385982513428,
      "learning_rate": 0.000576368961263062,
      "loss": 3.185,
      "step": 29304
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6872843503952026,
      "learning_rate": 0.0005763673699323185,
      "loss": 2.9703,
      "step": 29305
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.399391531944275,
      "learning_rate": 0.0005763657785501931,
      "loss": 3.2567,
      "step": 29306
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.467916488647461,
      "learning_rate": 0.0005763641871166861,
      "loss": 3.0492,
      "step": 29307
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4966344833374023,
      "learning_rate": 0.0005763625956317976,
      "loss": 3.0793,
      "step": 29308
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0955135822296143,
      "learning_rate": 0.0005763610040955283,
      "loss": 2.8117,
      "step": 29309
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9381613731384277,
      "learning_rate": 0.0005763594125078781,
      "loss": 3.1607,
      "step": 29310
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7239632606506348,
      "learning_rate": 0.0005763578208688476,
      "loss": 3.1504,
      "step": 29311
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2739131450653076,
      "learning_rate": 0.000576356229178437,
      "loss": 2.9894,
      "step": 29312
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5901938676834106,
      "learning_rate": 0.0005763546374366465,
      "loss": 3.2099,
      "step": 29313
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3595013618469238,
      "learning_rate": 0.0005763530456434763,
      "loss": 3.1818,
      "step": 29314
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5726364850997925,
      "learning_rate": 0.000576351453798927,
      "loss": 3.0595,
      "step": 29315
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6377400159835815,
      "learning_rate": 0.0005763498619029988,
      "loss": 3.2891,
      "step": 29316
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.351315975189209,
      "learning_rate": 0.0005763482699556919,
      "loss": 2.9741,
      "step": 29317
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.466567039489746,
      "learning_rate": 0.0005763466779570065,
      "loss": 3.292,
      "step": 29318
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.593479871749878,
      "learning_rate": 0.0005763450859069433,
      "loss": 3.1739,
      "step": 29319
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7540092468261719,
      "learning_rate": 0.0005763434938055021,
      "loss": 3.1868,
      "step": 29320
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7215718030929565,
      "learning_rate": 0.0005763419016526836,
      "loss": 3.0378,
      "step": 29321
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8183118104934692,
      "learning_rate": 0.0005763403094484878,
      "loss": 3.0854,
      "step": 29322
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6001672744750977,
      "learning_rate": 0.0005763387171929152,
      "loss": 3.1682,
      "step": 29323
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0105435848236084,
      "learning_rate": 0.0005763371248859659,
      "loss": 3.0766,
      "step": 29324
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.742261528968811,
      "learning_rate": 0.0005763355325276406,
      "loss": 3.1163,
      "step": 29325
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6008626222610474,
      "learning_rate": 0.000576333940117939,
      "loss": 3.0461,
      "step": 29326
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1604950428009033,
      "learning_rate": 0.0005763323476568618,
      "loss": 2.8896,
      "step": 29327
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6025769710540771,
      "learning_rate": 0.0005763307551444092,
      "loss": 2.8659,
      "step": 29328
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.7663464546203613,
      "learning_rate": 0.0005763291625805815,
      "loss": 3.0414,
      "step": 29329
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3597476482391357,
      "learning_rate": 0.000576327569965379,
      "loss": 2.9814,
      "step": 29330
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5439082384109497,
      "learning_rate": 0.0005763259772988019,
      "loss": 3.117,
      "step": 29331
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.884238839149475,
      "learning_rate": 0.0005763243845808507,
      "loss": 3.0108,
      "step": 29332
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.838549017906189,
      "learning_rate": 0.0005763227918115256,
      "loss": 3.1695,
      "step": 29333
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3606623411178589,
      "learning_rate": 0.0005763211989908268,
      "loss": 3.3502,
      "step": 29334
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4302973747253418,
      "learning_rate": 0.0005763196061187546,
      "loss": 2.9809,
      "step": 29335
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3497276306152344,
      "learning_rate": 0.0005763180131953096,
      "loss": 3.1119,
      "step": 29336
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6268656253814697,
      "learning_rate": 0.0005763164202204918,
      "loss": 3.221,
      "step": 29337
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8973249197006226,
      "learning_rate": 0.0005763148271943014,
      "loss": 3.2073,
      "step": 29338
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1039676666259766,
      "learning_rate": 0.0005763132341167389,
      "loss": 3.092,
      "step": 29339
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6146376132965088,
      "learning_rate": 0.0005763116409878046,
      "loss": 3.0076,
      "step": 29340
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4057366847991943,
      "learning_rate": 0.0005763100478074989,
      "loss": 2.8834,
      "step": 29341
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5509718656539917,
      "learning_rate": 0.0005763084545758217,
      "loss": 3.0829,
      "step": 29342
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8282238245010376,
      "learning_rate": 0.0005763068612927737,
      "loss": 3.3433,
      "step": 29343
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4682143926620483,
      "learning_rate": 0.0005763052679583549,
      "loss": 3.114,
      "step": 29344
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.5043020248413086,
      "learning_rate": 0.0005763036745725659,
      "loss": 3.4075,
      "step": 29345
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.327939987182617,
      "learning_rate": 0.0005763020811354067,
      "loss": 3.0154,
      "step": 29346
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6549559831619263,
      "learning_rate": 0.0005763004876468778,
      "loss": 2.8408,
      "step": 29347
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1688475608825684,
      "learning_rate": 0.0005762988941069794,
      "loss": 2.9301,
      "step": 29348
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.7519328594207764,
      "learning_rate": 0.0005762973005157117,
      "loss": 3.0349,
      "step": 29349
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.654451608657837,
      "learning_rate": 0.0005762957068730754,
      "loss": 3.051,
      "step": 29350
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.419634461402893,
      "learning_rate": 0.0005762941131790703,
      "loss": 3.2601,
      "step": 29351
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.582980155944824,
      "learning_rate": 0.000576292519433697,
      "loss": 3.2398,
      "step": 29352
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.245331048965454,
      "learning_rate": 0.0005762909256369557,
      "loss": 2.9927,
      "step": 29353
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.1295998096466064,
      "learning_rate": 0.0005762893317888466,
      "loss": 2.9176,
      "step": 29354
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5507330894470215,
      "learning_rate": 0.0005762877378893702,
      "loss": 2.8891,
      "step": 29355
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3773276805877686,
      "learning_rate": 0.0005762861439385266,
      "loss": 2.9222,
      "step": 29356
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.8618204593658447,
      "learning_rate": 0.0005762845499363163,
      "loss": 3.0403,
      "step": 29357
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0245516300201416,
      "learning_rate": 0.0005762829558827394,
      "loss": 2.9318,
      "step": 29358
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5495916604995728,
      "learning_rate": 0.0005762813617777963,
      "loss": 3.0265,
      "step": 29359
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.495657444000244,
      "learning_rate": 0.0005762797676214873,
      "loss": 3.0929,
      "step": 29360
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.0983870029449463,
      "learning_rate": 0.0005762781734138127,
      "loss": 3.1238,
      "step": 29361
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0610499382019043,
      "learning_rate": 0.0005762765791547728,
      "loss": 3.0688,
      "step": 29362
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4628618955612183,
      "learning_rate": 0.0005762749848443678,
      "loss": 3.0709,
      "step": 29363
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.538886070251465,
      "learning_rate": 0.0005762733904825981,
      "loss": 3.3713,
      "step": 29364
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2991087436676025,
      "learning_rate": 0.0005762717960694639,
      "loss": 3.4274,
      "step": 29365
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3184970617294312,
      "learning_rate": 0.0005762702016049656,
      "loss": 3.0121,
      "step": 29366
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3559143543243408,
      "learning_rate": 0.0005762686070891035,
      "loss": 3.2049,
      "step": 29367
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7337692975997925,
      "learning_rate": 0.0005762670125218777,
      "loss": 3.3177,
      "step": 29368
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1208863258361816,
      "learning_rate": 0.0005762654179032889,
      "loss": 3.0555,
      "step": 29369
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4370784759521484,
      "learning_rate": 0.0005762638232333369,
      "loss": 3.1665,
      "step": 29370
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7096986770629883,
      "learning_rate": 0.0005762622285120224,
      "loss": 2.9354,
      "step": 29371
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.003770351409912,
      "learning_rate": 0.0005762606337393454,
      "loss": 3.2093,
      "step": 29372
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.4717190265655518,
      "learning_rate": 0.0005762590389153065,
      "loss": 2.9966,
      "step": 29373
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4592558145523071,
      "learning_rate": 0.0005762574440399057,
      "loss": 3.1641,
      "step": 29374
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6727886199951172,
      "learning_rate": 0.0005762558491131434,
      "loss": 2.9013,
      "step": 29375
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6970462799072266,
      "learning_rate": 0.00057625425413502,
      "loss": 3.2046,
      "step": 29376
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.544565200805664,
      "learning_rate": 0.0005762526591055357,
      "loss": 3.1028,
      "step": 29377
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.524877667427063,
      "learning_rate": 0.0005762510640246909,
      "loss": 3.1103,
      "step": 29378
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.691930055618286,
      "learning_rate": 0.0005762494688924857,
      "loss": 3.1905,
      "step": 29379
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.270907402038574,
      "learning_rate": 0.0005762478737089205,
      "loss": 3.0056,
      "step": 29380
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.8758630752563477,
      "learning_rate": 0.0005762462784739957,
      "loss": 3.0182,
      "step": 29381
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4769006967544556,
      "learning_rate": 0.0005762446831877114,
      "loss": 3.2013,
      "step": 29382
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.452791929244995,
      "learning_rate": 0.0005762430878500681,
      "loss": 2.9547,
      "step": 29383
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3777085542678833,
      "learning_rate": 0.000576241492461066,
      "loss": 3.1828,
      "step": 29384
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.455335021018982,
      "learning_rate": 0.0005762398970207053,
      "loss": 3.1294,
      "step": 29385
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.693830966949463,
      "learning_rate": 0.0005762383015289864,
      "loss": 3.1072,
      "step": 29386
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.4504430294036865,
      "learning_rate": 0.0005762367059859096,
      "loss": 3.0209,
      "step": 29387
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6660585403442383,
      "learning_rate": 0.0005762351103914752,
      "loss": 3.0301,
      "step": 29388
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.512534499168396,
      "learning_rate": 0.0005762335147456836,
      "loss": 3.0781,
      "step": 29389
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.972912311553955,
      "learning_rate": 0.0005762319190485347,
      "loss": 3.0389,
      "step": 29390
    },
    {
      "epoch": 0.38,
      "grad_norm": 4.7602763175964355,
      "learning_rate": 0.0005762303233000293,
      "loss": 2.9875,
      "step": 29391
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3670475482940674,
      "learning_rate": 0.0005762287275001674,
      "loss": 2.9924,
      "step": 29392
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8192445039749146,
      "learning_rate": 0.0005762271316489492,
      "loss": 3.087,
      "step": 29393
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.5768015384674072,
      "learning_rate": 0.0005762255357463755,
      "loss": 3.1542,
      "step": 29394
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.620267868041992,
      "learning_rate": 0.000576223939792446,
      "loss": 3.2381,
      "step": 29395
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.5454492568969727,
      "learning_rate": 0.0005762223437871613,
      "loss": 3.0019,
      "step": 29396
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3233219385147095,
      "learning_rate": 0.0005762207477305217,
      "loss": 3.0111,
      "step": 29397
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6702384948730469,
      "learning_rate": 0.0005762191516225275,
      "loss": 2.9667,
      "step": 29398
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.8457272052764893,
      "learning_rate": 0.0005762175554631788,
      "loss": 3.1172,
      "step": 29399
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.470329523086548,
      "learning_rate": 0.0005762159592524761,
      "loss": 2.9364,
      "step": 29400
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.442203402519226,
      "learning_rate": 0.0005762143629904198,
      "loss": 3.0954,
      "step": 29401
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.109025239944458,
      "learning_rate": 0.0005762127666770098,
      "loss": 3.4046,
      "step": 29402
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.5960240364074707,
      "learning_rate": 0.0005762111703122467,
      "loss": 3.0454,
      "step": 29403
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8241430521011353,
      "learning_rate": 0.0005762095738961308,
      "loss": 3.1768,
      "step": 29404
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6342699527740479,
      "learning_rate": 0.0005762079774286623,
      "loss": 3.0267,
      "step": 29405
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7553375959396362,
      "learning_rate": 0.0005762063809098414,
      "loss": 3.2535,
      "step": 29406
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6564218997955322,
      "learning_rate": 0.0005762047843396687,
      "loss": 3.1211,
      "step": 29407
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4470218420028687,
      "learning_rate": 0.0005762031877181443,
      "loss": 2.7478,
      "step": 29408
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4076874256134033,
      "learning_rate": 0.0005762015910452684,
      "loss": 3.1459,
      "step": 29409
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1868906021118164,
      "learning_rate": 0.0005761999943210415,
      "loss": 3.2246,
      "step": 29410
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.370524525642395,
      "learning_rate": 0.0005761983975454638,
      "loss": 2.9707,
      "step": 29411
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5413097143173218,
      "learning_rate": 0.0005761968007185356,
      "loss": 3.0491,
      "step": 29412
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6073174476623535,
      "learning_rate": 0.0005761952038402571,
      "loss": 3.0952,
      "step": 29413
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2602709531784058,
      "learning_rate": 0.0005761936069106289,
      "loss": 3.1025,
      "step": 29414
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3674252033233643,
      "learning_rate": 0.0005761920099296509,
      "loss": 2.8551,
      "step": 29415
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1743065118789673,
      "learning_rate": 0.0005761904128973236,
      "loss": 2.9793,
      "step": 29416
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2572929859161377,
      "learning_rate": 0.0005761888158136474,
      "loss": 3.1886,
      "step": 29417
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2589752674102783,
      "learning_rate": 0.0005761872186786224,
      "loss": 2.9053,
      "step": 29418
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.599916934967041,
      "learning_rate": 0.000576185621492249,
      "loss": 3.0714,
      "step": 29419
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6718839406967163,
      "learning_rate": 0.0005761840242545275,
      "loss": 2.9944,
      "step": 29420
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0415005683898926,
      "learning_rate": 0.0005761824269654583,
      "loss": 3.089,
      "step": 29421
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6410610675811768,
      "learning_rate": 0.0005761808296250413,
      "loss": 3.3069,
      "step": 29422
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5967737436294556,
      "learning_rate": 0.0005761792322332773,
      "loss": 3.0605,
      "step": 29423
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5728033781051636,
      "learning_rate": 0.0005761776347901663,
      "loss": 3.1256,
      "step": 29424
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7260973453521729,
      "learning_rate": 0.0005761760372957087,
      "loss": 3.0741,
      "step": 29425
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4639708995819092,
      "learning_rate": 0.0005761744397499047,
      "loss": 3.1367,
      "step": 29426
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5289790630340576,
      "learning_rate": 0.0005761728421527547,
      "loss": 2.8998,
      "step": 29427
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4287724494934082,
      "learning_rate": 0.0005761712445042588,
      "loss": 3.3344,
      "step": 29428
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5533334016799927,
      "learning_rate": 0.0005761696468044176,
      "loss": 3.0137,
      "step": 29429
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.787880301475525,
      "learning_rate": 0.0005761680490532313,
      "loss": 3.133,
      "step": 29430
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6704583168029785,
      "learning_rate": 0.0005761664512507001,
      "loss": 2.8814,
      "step": 29431
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5455864667892456,
      "learning_rate": 0.0005761648533968243,
      "loss": 2.8634,
      "step": 29432
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.4882445335388184,
      "learning_rate": 0.0005761632554916043,
      "loss": 3.1066,
      "step": 29433
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6204556226730347,
      "learning_rate": 0.0005761616575350403,
      "loss": 3.2068,
      "step": 29434
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3955117464065552,
      "learning_rate": 0.0005761600595271326,
      "loss": 3.0026,
      "step": 29435
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.981422185897827,
      "learning_rate": 0.0005761584614678815,
      "loss": 2.8699,
      "step": 29436
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2476794719696045,
      "learning_rate": 0.0005761568633572874,
      "loss": 2.9609,
      "step": 29437
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7135041952133179,
      "learning_rate": 0.0005761552651953507,
      "loss": 3.4077,
      "step": 29438
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.6907193660736084,
      "learning_rate": 0.0005761536669820713,
      "loss": 3.0239,
      "step": 29439
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3027112483978271,
      "learning_rate": 0.0005761520687174497,
      "loss": 3.0442,
      "step": 29440
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.458693265914917,
      "learning_rate": 0.0005761504704014863,
      "loss": 3.1272,
      "step": 29441
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5870850086212158,
      "learning_rate": 0.0005761488720341813,
      "loss": 3.2326,
      "step": 29442
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8326846361160278,
      "learning_rate": 0.000576147273615535,
      "loss": 3.121,
      "step": 29443
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8739771842956543,
      "learning_rate": 0.0005761456751455478,
      "loss": 3.0834,
      "step": 29444
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6063700914382935,
      "learning_rate": 0.0005761440766242198,
      "loss": 3.0064,
      "step": 29445
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3457157611846924,
      "learning_rate": 0.0005761424780515514,
      "loss": 2.8045,
      "step": 29446
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4832013845443726,
      "learning_rate": 0.000576140879427543,
      "loss": 3.1566,
      "step": 29447
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7844232320785522,
      "learning_rate": 0.0005761392807521946,
      "loss": 2.9387,
      "step": 29448
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.482871651649475,
      "learning_rate": 0.0005761376820255069,
      "loss": 3.105,
      "step": 29449
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3947203159332275,
      "learning_rate": 0.0005761360832474799,
      "loss": 3.243,
      "step": 29450
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6743475198745728,
      "learning_rate": 0.000576134484418114,
      "loss": 3.1794,
      "step": 29451
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6597572565078735,
      "learning_rate": 0.0005761328855374095,
      "loss": 2.8746,
      "step": 29452
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.792827844619751,
      "learning_rate": 0.0005761312866053667,
      "loss": 3.3367,
      "step": 29453
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3619797229766846,
      "learning_rate": 0.0005761296876219858,
      "loss": 3.1403,
      "step": 29454
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.692157745361328,
      "learning_rate": 0.0005761280885872673,
      "loss": 3.1079,
      "step": 29455
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7311971187591553,
      "learning_rate": 0.0005761264895012112,
      "loss": 3.1224,
      "step": 29456
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5976712703704834,
      "learning_rate": 0.0005761248903638182,
      "loss": 3.1171,
      "step": 29457
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.490492105484009,
      "learning_rate": 0.0005761232911750883,
      "loss": 3.1317,
      "step": 29458
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5226737260818481,
      "learning_rate": 0.0005761216919350217,
      "loss": 3.1565,
      "step": 29459
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4031645059585571,
      "learning_rate": 0.0005761200926436191,
      "loss": 3.1007,
      "step": 29460
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9175723791122437,
      "learning_rate": 0.0005761184933008804,
      "loss": 3.3096,
      "step": 29461
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.911973237991333,
      "learning_rate": 0.0005761168939068061,
      "loss": 2.8088,
      "step": 29462
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9881751537322998,
      "learning_rate": 0.0005761152944613964,
      "loss": 2.8877,
      "step": 29463
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6028509140014648,
      "learning_rate": 0.0005761136949646519,
      "loss": 3.2372,
      "step": 29464
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7638556957244873,
      "learning_rate": 0.0005761120954165724,
      "loss": 3.0462,
      "step": 29465
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5195130109786987,
      "learning_rate": 0.0005761104958171585,
      "loss": 3.2136,
      "step": 29466
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2413907051086426,
      "learning_rate": 0.0005761088961664105,
      "loss": 3.3768,
      "step": 29467
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.868240237236023,
      "learning_rate": 0.0005761072964643286,
      "loss": 2.9531,
      "step": 29468
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.37190842628479,
      "learning_rate": 0.0005761056967109132,
      "loss": 2.9712,
      "step": 29469
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6281869411468506,
      "learning_rate": 0.0005761040969061645,
      "loss": 3.0502,
      "step": 29470
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.525221347808838,
      "learning_rate": 0.0005761024970500828,
      "loss": 3.176,
      "step": 29471
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3641650676727295,
      "learning_rate": 0.0005761008971426685,
      "loss": 2.8614,
      "step": 29472
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4392837285995483,
      "learning_rate": 0.0005760992971839217,
      "loss": 3.1183,
      "step": 29473
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.956325650215149,
      "learning_rate": 0.000576097697173843,
      "loss": 3.0533,
      "step": 29474
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5462311506271362,
      "learning_rate": 0.0005760960971124325,
      "loss": 3.0843,
      "step": 29475
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6649540662765503,
      "learning_rate": 0.0005760944969996905,
      "loss": 3.0221,
      "step": 29476
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5563137531280518,
      "learning_rate": 0.0005760928968356172,
      "loss": 3.3002,
      "step": 29477
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.79825496673584,
      "learning_rate": 0.0005760912966202131,
      "loss": 3.206,
      "step": 29478
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3658970594406128,
      "learning_rate": 0.0005760896963534786,
      "loss": 2.9901,
      "step": 29479
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4090811014175415,
      "learning_rate": 0.0005760880960354136,
      "loss": 3.2989,
      "step": 29480
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.06803560256958,
      "learning_rate": 0.0005760864956660186,
      "loss": 3.1318,
      "step": 29481
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.1982009410858154,
      "learning_rate": 0.000576084895245294,
      "loss": 3.0066,
      "step": 29482
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4095649719238281,
      "learning_rate": 0.00057608329477324,
      "loss": 2.8956,
      "step": 29483
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4739691019058228,
      "learning_rate": 0.0005760816942498568,
      "loss": 2.9181,
      "step": 29484
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3278732299804688,
      "learning_rate": 0.0005760800936751449,
      "loss": 2.8783,
      "step": 29485
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3971240520477295,
      "learning_rate": 0.0005760784930491045,
      "loss": 3.3066,
      "step": 29486
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8302348852157593,
      "learning_rate": 0.0005760768923717359,
      "loss": 3.0622,
      "step": 29487
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.5464603900909424,
      "learning_rate": 0.0005760752916430394,
      "loss": 2.8681,
      "step": 29488
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.3869807720184326,
      "learning_rate": 0.0005760736908630152,
      "loss": 2.951,
      "step": 29489
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5290169715881348,
      "learning_rate": 0.0005760720900316638,
      "loss": 3.1704,
      "step": 29490
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.377034068107605,
      "learning_rate": 0.0005760704891489853,
      "loss": 3.0568,
      "step": 29491
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.875313401222229,
      "learning_rate": 0.0005760688882149802,
      "loss": 3.2875,
      "step": 29492
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.196786880493164,
      "learning_rate": 0.0005760672872296486,
      "loss": 3.0601,
      "step": 29493
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1960753202438354,
      "learning_rate": 0.0005760656861929908,
      "loss": 3.3284,
      "step": 29494
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5666276216506958,
      "learning_rate": 0.0005760640851050073,
      "loss": 2.8566,
      "step": 29495
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4009565114974976,
      "learning_rate": 0.0005760624839656983,
      "loss": 3.1414,
      "step": 29496
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.595118761062622,
      "learning_rate": 0.000576060882775064,
      "loss": 2.8314,
      "step": 29497
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6941007375717163,
      "learning_rate": 0.0005760592815331047,
      "loss": 3.2293,
      "step": 29498
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6428358554840088,
      "learning_rate": 0.0005760576802398209,
      "loss": 3.1989,
      "step": 29499
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6958717107772827,
      "learning_rate": 0.0005760560788952128,
      "loss": 3.307,
      "step": 29500
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.926223635673523,
      "learning_rate": 0.0005760544774992806,
      "loss": 3.2129,
      "step": 29501
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4314192533493042,
      "learning_rate": 0.0005760528760520246,
      "loss": 3.2766,
      "step": 29502
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3763928413391113,
      "learning_rate": 0.0005760512745534451,
      "loss": 3.0712,
      "step": 29503
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4807229042053223,
      "learning_rate": 0.0005760496730035426,
      "loss": 3.1195,
      "step": 29504
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.3316893577575684,
      "learning_rate": 0.0005760480714023172,
      "loss": 3.2056,
      "step": 29505
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6424455642700195,
      "learning_rate": 0.0005760464697497693,
      "loss": 3.073,
      "step": 29506
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4165548086166382,
      "learning_rate": 0.0005760448680458992,
      "loss": 3.1632,
      "step": 29507
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4992406368255615,
      "learning_rate": 0.0005760432662907071,
      "loss": 3.1463,
      "step": 29508
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.1782772541046143,
      "learning_rate": 0.0005760416644841933,
      "loss": 3.1564,
      "step": 29509
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7506787776947021,
      "learning_rate": 0.0005760400626263582,
      "loss": 3.1586,
      "step": 29510
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3026061058044434,
      "learning_rate": 0.0005760384607172019,
      "loss": 3.1221,
      "step": 29511
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4690929651260376,
      "learning_rate": 0.000576036858756725,
      "loss": 3.2889,
      "step": 29512
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6314753293991089,
      "learning_rate": 0.0005760352567449275,
      "loss": 3.1888,
      "step": 29513
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7897875308990479,
      "learning_rate": 0.00057603365468181,
      "loss": 2.9107,
      "step": 29514
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.618476390838623,
      "learning_rate": 0.0005760320525673724,
      "loss": 3.3036,
      "step": 29515
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7361971139907837,
      "learning_rate": 0.0005760304504016155,
      "loss": 2.9563,
      "step": 29516
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.939788579940796,
      "learning_rate": 0.0005760288481845392,
      "loss": 2.7811,
      "step": 29517
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3583623170852661,
      "learning_rate": 0.0005760272459161439,
      "loss": 2.7783,
      "step": 29518
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7516270875930786,
      "learning_rate": 0.0005760256435964299,
      "loss": 3.1657,
      "step": 29519
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1943435668945312,
      "learning_rate": 0.0005760240412253976,
      "loss": 3.1769,
      "step": 29520
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8059616088867188,
      "learning_rate": 0.0005760224388030473,
      "loss": 3.1329,
      "step": 29521
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.00065541267395,
      "learning_rate": 0.0005760208363293791,
      "loss": 3.1517,
      "step": 29522
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.05267596244812,
      "learning_rate": 0.0005760192338043934,
      "loss": 3.1774,
      "step": 29523
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.5162532329559326,
      "learning_rate": 0.0005760176312280906,
      "loss": 2.9742,
      "step": 29524
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7584742307662964,
      "learning_rate": 0.0005760160286004708,
      "loss": 3.1946,
      "step": 29525
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7048981189727783,
      "learning_rate": 0.0005760144259215345,
      "loss": 3.1212,
      "step": 29526
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0316431522369385,
      "learning_rate": 0.0005760128231912818,
      "loss": 3.0333,
      "step": 29527
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.229968786239624,
      "learning_rate": 0.0005760112204097132,
      "loss": 3.1202,
      "step": 29528
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.667042851448059,
      "learning_rate": 0.000576009617576829,
      "loss": 3.0348,
      "step": 29529
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.753796339035034,
      "learning_rate": 0.0005760080146926292,
      "loss": 3.009,
      "step": 29530
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.2514588832855225,
      "learning_rate": 0.0005760064117571144,
      "loss": 3.2486,
      "step": 29531
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.1108031272888184,
      "learning_rate": 0.0005760048087702849,
      "loss": 3.0953,
      "step": 29532
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.2367267608642578,
      "learning_rate": 0.0005760032057321407,
      "loss": 2.9944,
      "step": 29533
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.9028208255767822,
      "learning_rate": 0.0005760016026426823,
      "loss": 3.1518,
      "step": 29534
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6846046447753906,
      "learning_rate": 0.0005759999995019102,
      "loss": 3.1442,
      "step": 29535
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7698465585708618,
      "learning_rate": 0.0005759983963098244,
      "loss": 3.0927,
      "step": 29536
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3725472688674927,
      "learning_rate": 0.0005759967930664252,
      "loss": 3.047,
      "step": 29537
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8703550100326538,
      "learning_rate": 0.000575995189771713,
      "loss": 3.1879,
      "step": 29538
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.6742057800292969,
      "learning_rate": 0.0005759935864256882,
      "loss": 3.2387,
      "step": 29539
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4915269613265991,
      "learning_rate": 0.0005759919830283509,
      "loss": 3.3365,
      "step": 29540
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.0912742614746094,
      "learning_rate": 0.0005759903795797014,
      "loss": 3.2507,
      "step": 29541
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.799310326576233,
      "learning_rate": 0.0005759887760797402,
      "loss": 2.997,
      "step": 29542
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4309747219085693,
      "learning_rate": 0.0005759871725284675,
      "loss": 2.965,
      "step": 29543
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3182176351547241,
      "learning_rate": 0.0005759855689258834,
      "loss": 3.343,
      "step": 29544
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9808615446090698,
      "learning_rate": 0.0005759839652719886,
      "loss": 3.2067,
      "step": 29545
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.9021445512771606,
      "learning_rate": 0.000575982361566783,
      "loss": 3.3325,
      "step": 29546
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7111047506332397,
      "learning_rate": 0.0005759807578102671,
      "loss": 3.0381,
      "step": 29547
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3590927124023438,
      "learning_rate": 0.0005759791540024412,
      "loss": 3.0994,
      "step": 29548
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3357166051864624,
      "learning_rate": 0.0005759775501433057,
      "loss": 3.0847,
      "step": 29549
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.4944047927856445,
      "learning_rate": 0.0005759759462328605,
      "loss": 3.0541,
      "step": 29550
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.039914846420288,
      "learning_rate": 0.0005759743422711062,
      "loss": 3.2263,
      "step": 29551
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.352892518043518,
      "learning_rate": 0.0005759727382580432,
      "loss": 3.0667,
      "step": 29552
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.227123737335205,
      "learning_rate": 0.0005759711341936717,
      "loss": 3.0429,
      "step": 29553
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.1130383014678955,
      "learning_rate": 0.0005759695300779918,
      "loss": 3.3353,
      "step": 29554
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.640713095664978,
      "learning_rate": 0.0005759679259110039,
      "loss": 3.0391,
      "step": 29555
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.949000597000122,
      "learning_rate": 0.0005759663216927086,
      "loss": 2.9063,
      "step": 29556
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.252584934234619,
      "learning_rate": 0.0005759647174231058,
      "loss": 3.0588,
      "step": 29557
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.7868411540985107,
      "learning_rate": 0.000575963113102196,
      "loss": 3.1371,
      "step": 29558
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.3707480430603027,
      "learning_rate": 0.0005759615087299794,
      "loss": 3.0373,
      "step": 29559
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.235607624053955,
      "learning_rate": 0.0005759599043064563,
      "loss": 3.0868,
      "step": 29560
    },
    {
      "epoch": 0.38,
      "grad_norm": 2.149993896484375,
      "learning_rate": 0.0005759582998316272,
      "loss": 2.9216,
      "step": 29561
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.31655752658844,
      "learning_rate": 0.0005759566953054922,
      "loss": 2.9737,
      "step": 29562
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.447478175163269,
      "learning_rate": 0.0005759550907280515,
      "loss": 2.9698,
      "step": 29563
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.4793570041656494,
      "learning_rate": 0.0005759534860993057,
      "loss": 3.0904,
      "step": 29564
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.5017002820968628,
      "learning_rate": 0.0005759518814192549,
      "loss": 3.1939,
      "step": 29565
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8311526775360107,
      "learning_rate": 0.0005759502766878994,
      "loss": 3.1222,
      "step": 29566
    },
    {
      "epoch": 0.38,
      "grad_norm": 3.013306140899658,
      "learning_rate": 0.0005759486719052395,
      "loss": 2.871,
      "step": 29567
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.645927906036377,
      "learning_rate": 0.0005759470670712756,
      "loss": 3.0882,
      "step": 29568
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.015275478363037,
      "learning_rate": 0.0005759454621860079,
      "loss": 2.9645,
      "step": 29569
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6811058521270752,
      "learning_rate": 0.0005759438572494367,
      "loss": 2.9424,
      "step": 29570
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9297162294387817,
      "learning_rate": 0.0005759422522615624,
      "loss": 3.0784,
      "step": 29571
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9741401672363281,
      "learning_rate": 0.0005759406472223852,
      "loss": 2.8969,
      "step": 29572
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.062046527862549,
      "learning_rate": 0.0005759390421319054,
      "loss": 3.0256,
      "step": 29573
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5598042011260986,
      "learning_rate": 0.0005759374369901233,
      "loss": 2.9886,
      "step": 29574
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7408372163772583,
      "learning_rate": 0.0005759358317970393,
      "loss": 3.1709,
      "step": 29575
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3217471837997437,
      "learning_rate": 0.0005759342265526536,
      "loss": 3.1466,
      "step": 29576
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3976235389709473,
      "learning_rate": 0.0005759326212569666,
      "loss": 3.0631,
      "step": 29577
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5401524305343628,
      "learning_rate": 0.0005759310159099783,
      "loss": 3.0792,
      "step": 29578
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.415330171585083,
      "learning_rate": 0.0005759294105116894,
      "loss": 3.043,
      "step": 29579
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5659184455871582,
      "learning_rate": 0.0005759278050620999,
      "loss": 2.9128,
      "step": 29580
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5422550439834595,
      "learning_rate": 0.0005759261995612103,
      "loss": 3.1303,
      "step": 29581
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.008314371109009,
      "learning_rate": 0.0005759245940090207,
      "loss": 2.9589,
      "step": 29582
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2199273109436035,
      "learning_rate": 0.0005759229884055317,
      "loss": 3.0946,
      "step": 29583
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.399677276611328,
      "learning_rate": 0.0005759213827507432,
      "loss": 2.9757,
      "step": 29584
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5443538427352905,
      "learning_rate": 0.0005759197770446558,
      "loss": 3.1282,
      "step": 29585
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4030178785324097,
      "learning_rate": 0.0005759181712872698,
      "loss": 3.3027,
      "step": 29586
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5572445392608643,
      "learning_rate": 0.0005759165654785852,
      "loss": 2.9444,
      "step": 29587
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.430001974105835,
      "learning_rate": 0.0005759149596186026,
      "loss": 3.3006,
      "step": 29588
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3916943073272705,
      "learning_rate": 0.0005759133537073223,
      "loss": 2.9757,
      "step": 29589
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.451513409614563,
      "learning_rate": 0.0005759117477447444,
      "loss": 3.3958,
      "step": 29590
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3812943696975708,
      "learning_rate": 0.0005759101417308693,
      "loss": 3.0262,
      "step": 29591
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9320441484451294,
      "learning_rate": 0.0005759085356656972,
      "loss": 2.9509,
      "step": 29592
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2023742198944092,
      "learning_rate": 0.0005759069295492287,
      "loss": 3.1335,
      "step": 29593
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9780445098876953,
      "learning_rate": 0.0005759053233814637,
      "loss": 3.0968,
      "step": 29594
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5628482103347778,
      "learning_rate": 0.0005759037171624028,
      "loss": 2.9832,
      "step": 29595
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8527880907058716,
      "learning_rate": 0.0005759021108920462,
      "loss": 3.1693,
      "step": 29596
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8123928308486938,
      "learning_rate": 0.0005759005045703942,
      "loss": 3.0195,
      "step": 29597
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.043672561645508,
      "learning_rate": 0.000575898898197447,
      "loss": 3.2045,
      "step": 29598
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5888516902923584,
      "learning_rate": 0.000575897291773205,
      "loss": 3.0125,
      "step": 29599
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7436929941177368,
      "learning_rate": 0.0005758956852976685,
      "loss": 3.1194,
      "step": 29600
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.463554859161377,
      "learning_rate": 0.0005758940787708379,
      "loss": 3.2182,
      "step": 29601
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2763131856918335,
      "learning_rate": 0.0005758924721927133,
      "loss": 3.0521,
      "step": 29602
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.686281681060791,
      "learning_rate": 0.000575890865563295,
      "loss": 2.9154,
      "step": 29603
    },
    {
      "epoch": 0.39,
      "grad_norm": 3.3589088916778564,
      "learning_rate": 0.0005758892588825836,
      "loss": 3.1658,
      "step": 29604
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.971902370452881,
      "learning_rate": 0.000575887652150579,
      "loss": 3.2526,
      "step": 29605
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.3234751224517822,
      "learning_rate": 0.0005758860453672816,
      "loss": 3.2876,
      "step": 29606
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.064589023590088,
      "learning_rate": 0.000575884438532692,
      "loss": 2.9525,
      "step": 29607
    },
    {
      "epoch": 0.39,
      "grad_norm": 3.269455909729004,
      "learning_rate": 0.0005758828316468101,
      "loss": 3.1174,
      "step": 29608
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.347383737564087,
      "learning_rate": 0.0005758812247096364,
      "loss": 2.9854,
      "step": 29609
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.161447525024414,
      "learning_rate": 0.0005758796177211713,
      "loss": 3.0874,
      "step": 29610
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.0361363887786865,
      "learning_rate": 0.0005758780106814148,
      "loss": 2.7935,
      "step": 29611
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.7353017330169678,
      "learning_rate": 0.0005758764035903674,
      "loss": 3.2038,
      "step": 29612
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6429630517959595,
      "learning_rate": 0.0005758747964480295,
      "loss": 3.1428,
      "step": 29613
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8012681007385254,
      "learning_rate": 0.0005758731892544012,
      "loss": 3.1799,
      "step": 29614
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5268840789794922,
      "learning_rate": 0.0005758715820094829,
      "loss": 3.0814,
      "step": 29615
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.148238182067871,
      "learning_rate": 0.0005758699747132747,
      "loss": 2.9575,
      "step": 29616
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.023376703262329,
      "learning_rate": 0.0005758683673657772,
      "loss": 2.9819,
      "step": 29617
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4632681608200073,
      "learning_rate": 0.0005758667599669906,
      "loss": 3.1754,
      "step": 29618
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7789117097854614,
      "learning_rate": 0.0005758651525169151,
      "loss": 2.8923,
      "step": 29619
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8346785306930542,
      "learning_rate": 0.000575863545015551,
      "loss": 2.972,
      "step": 29620
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4192157983779907,
      "learning_rate": 0.0005758619374628988,
      "loss": 3.0946,
      "step": 29621
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5193158388137817,
      "learning_rate": 0.0005758603298589586,
      "loss": 3.0492,
      "step": 29622
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.477123737335205,
      "learning_rate": 0.0005758587222037307,
      "loss": 3.095,
      "step": 29623
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3841753005981445,
      "learning_rate": 0.0005758571144972155,
      "loss": 2.8952,
      "step": 29624
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.751505970954895,
      "learning_rate": 0.0005758555067394132,
      "loss": 2.9549,
      "step": 29625
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4943019151687622,
      "learning_rate": 0.0005758538989303243,
      "loss": 3.4262,
      "step": 29626
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.767629861831665,
      "learning_rate": 0.0005758522910699488,
      "loss": 2.9546,
      "step": 29627
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.891998052597046,
      "learning_rate": 0.0005758506831582872,
      "loss": 3.1237,
      "step": 29628
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2944788932800293,
      "learning_rate": 0.0005758490751953398,
      "loss": 3.0223,
      "step": 29629
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.586879849433899,
      "learning_rate": 0.0005758474671811068,
      "loss": 3.2066,
      "step": 29630
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5763471126556396,
      "learning_rate": 0.0005758458591155886,
      "loss": 2.99,
      "step": 29631
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5484036207199097,
      "learning_rate": 0.0005758442509987854,
      "loss": 3.1728,
      "step": 29632
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5039634704589844,
      "learning_rate": 0.0005758426428306975,
      "loss": 3.2746,
      "step": 29633
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.451426386833191,
      "learning_rate": 0.0005758410346113253,
      "loss": 2.9859,
      "step": 29634
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.382755994796753,
      "learning_rate": 0.0005758394263406691,
      "loss": 3.0003,
      "step": 29635
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5507121086120605,
      "learning_rate": 0.0005758378180187291,
      "loss": 2.9792,
      "step": 29636
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2758893966674805,
      "learning_rate": 0.0005758362096455056,
      "loss": 2.9058,
      "step": 29637
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7373393774032593,
      "learning_rate": 0.000575834601220999,
      "loss": 3.0913,
      "step": 29638
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5249890089035034,
      "learning_rate": 0.0005758329927452096,
      "loss": 3.1708,
      "step": 29639
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4035183191299438,
      "learning_rate": 0.0005758313842181375,
      "loss": 2.891,
      "step": 29640
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3398200273513794,
      "learning_rate": 0.0005758297756397832,
      "loss": 3.2562,
      "step": 29641
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9628239870071411,
      "learning_rate": 0.0005758281670101469,
      "loss": 3.1,
      "step": 29642
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5884166955947876,
      "learning_rate": 0.000575826558329229,
      "loss": 3.0952,
      "step": 29643
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5840116739273071,
      "learning_rate": 0.0005758249495970297,
      "loss": 3.0872,
      "step": 29644
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6935455799102783,
      "learning_rate": 0.0005758233408135493,
      "loss": 3.2069,
      "step": 29645
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8856804370880127,
      "learning_rate": 0.0005758217319787883,
      "loss": 2.9918,
      "step": 29646
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6717294454574585,
      "learning_rate": 0.0005758201230927466,
      "loss": 3.0564,
      "step": 29647
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.411325454711914,
      "learning_rate": 0.0005758185141554248,
      "loss": 3.0478,
      "step": 29648
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6511794328689575,
      "learning_rate": 0.0005758169051668233,
      "loss": 3.1293,
      "step": 29649
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.597527265548706,
      "learning_rate": 0.0005758152961269421,
      "loss": 3.3263,
      "step": 29650
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4505988359451294,
      "learning_rate": 0.0005758136870357817,
      "loss": 3.2378,
      "step": 29651
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.729355812072754,
      "learning_rate": 0.0005758120778933423,
      "loss": 3.0345,
      "step": 29652
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.4060404300689697,
      "learning_rate": 0.0005758104686996241,
      "loss": 3.1611,
      "step": 29653
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6707288026809692,
      "learning_rate": 0.0005758088594546276,
      "loss": 3.0202,
      "step": 29654
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5905953645706177,
      "learning_rate": 0.000575807250158353,
      "loss": 3.0549,
      "step": 29655
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.550528883934021,
      "learning_rate": 0.0005758056408108007,
      "loss": 3.1551,
      "step": 29656
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4747899770736694,
      "learning_rate": 0.0005758040314119709,
      "loss": 2.8232,
      "step": 29657
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3350039720535278,
      "learning_rate": 0.0005758024219618639,
      "loss": 3.0444,
      "step": 29658
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.121022939682007,
      "learning_rate": 0.0005758008124604801,
      "loss": 3.1598,
      "step": 29659
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4511674642562866,
      "learning_rate": 0.0005757992029078197,
      "loss": 3.1811,
      "step": 29660
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.370147705078125,
      "learning_rate": 0.0005757975933038829,
      "loss": 3.2295,
      "step": 29661
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6918538808822632,
      "learning_rate": 0.0005757959836486703,
      "loss": 3.1716,
      "step": 29662
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.181486129760742,
      "learning_rate": 0.0005757943739421818,
      "loss": 3.145,
      "step": 29663
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4297003746032715,
      "learning_rate": 0.0005757927641844182,
      "loss": 2.9753,
      "step": 29664
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9239221811294556,
      "learning_rate": 0.0005757911543753793,
      "loss": 3.0984,
      "step": 29665
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6349968910217285,
      "learning_rate": 0.0005757895445150657,
      "loss": 3.2157,
      "step": 29666
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8610587120056152,
      "learning_rate": 0.0005757879346034776,
      "loss": 3.2586,
      "step": 29667
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2803927659988403,
      "learning_rate": 0.0005757863246406153,
      "loss": 3.2913,
      "step": 29668
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4074877500534058,
      "learning_rate": 0.0005757847146264792,
      "loss": 2.8872,
      "step": 29669
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5415675640106201,
      "learning_rate": 0.0005757831045610694,
      "loss": 3.0324,
      "step": 29670
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3062241077423096,
      "learning_rate": 0.0005757814944443864,
      "loss": 3.1217,
      "step": 29671
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.405320644378662,
      "learning_rate": 0.0005757798842764304,
      "loss": 3.1786,
      "step": 29672
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3691086769104004,
      "learning_rate": 0.0005757782740572017,
      "loss": 3.0328,
      "step": 29673
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7650268077850342,
      "learning_rate": 0.0005757766637867006,
      "loss": 3.1579,
      "step": 29674
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.359041690826416,
      "learning_rate": 0.0005757750534649275,
      "loss": 3.1758,
      "step": 29675
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5999237298965454,
      "learning_rate": 0.0005757734430918825,
      "loss": 3.0967,
      "step": 29676
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.788081407546997,
      "learning_rate": 0.000575771832667566,
      "loss": 3.1296,
      "step": 29677
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8019219636917114,
      "learning_rate": 0.0005757702221919784,
      "loss": 3.4395,
      "step": 29678
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5309325456619263,
      "learning_rate": 0.0005757686116651199,
      "loss": 2.8818,
      "step": 29679
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.20133900642395,
      "learning_rate": 0.0005757670010869908,
      "loss": 2.9805,
      "step": 29680
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9351692199707031,
      "learning_rate": 0.0005757653904575913,
      "loss": 3.169,
      "step": 29681
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3568592071533203,
      "learning_rate": 0.0005757637797769219,
      "loss": 3.2313,
      "step": 29682
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7973651885986328,
      "learning_rate": 0.0005757621690449828,
      "loss": 2.9243,
      "step": 29683
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4654568433761597,
      "learning_rate": 0.0005757605582617744,
      "loss": 3.2254,
      "step": 29684
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9264562129974365,
      "learning_rate": 0.0005757589474272968,
      "loss": 2.9921,
      "step": 29685
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4574878215789795,
      "learning_rate": 0.0005757573365415505,
      "loss": 3.2544,
      "step": 29686
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.579343557357788,
      "learning_rate": 0.0005757557256045355,
      "loss": 2.8959,
      "step": 29687
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.533522605895996,
      "learning_rate": 0.0005757541146162525,
      "loss": 3.0713,
      "step": 29688
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5845379829406738,
      "learning_rate": 0.0005757525035767015,
      "loss": 3.06,
      "step": 29689
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3636459112167358,
      "learning_rate": 0.0005757508924858829,
      "loss": 3.1607,
      "step": 29690
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6433950662612915,
      "learning_rate": 0.0005757492813437971,
      "loss": 3.1022,
      "step": 29691
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9852079153060913,
      "learning_rate": 0.0005757476701504443,
      "loss": 2.8876,
      "step": 29692
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.763449788093567,
      "learning_rate": 0.0005757460589058248,
      "loss": 2.9831,
      "step": 29693
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4660356044769287,
      "learning_rate": 0.0005757444476099388,
      "loss": 3.159,
      "step": 29694
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.560030221939087,
      "learning_rate": 0.0005757428362627867,
      "loss": 3.1385,
      "step": 29695
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.891618490219116,
      "learning_rate": 0.0005757412248643689,
      "loss": 2.9842,
      "step": 29696
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6171157360076904,
      "learning_rate": 0.0005757396134146855,
      "loss": 2.9874,
      "step": 29697
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.268064022064209,
      "learning_rate": 0.0005757380019137371,
      "loss": 2.902,
      "step": 29698
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1751201152801514,
      "learning_rate": 0.0005757363903615236,
      "loss": 3.1236,
      "step": 29699
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2107642889022827,
      "learning_rate": 0.0005757347787580455,
      "loss": 3.1005,
      "step": 29700
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3514728546142578,
      "learning_rate": 0.0005757331671033032,
      "loss": 3.0987,
      "step": 29701
    },
    {
      "epoch": 0.39,
      "grad_norm": 3.537012815475464,
      "learning_rate": 0.000575731555397297,
      "loss": 3.2678,
      "step": 29702
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6742726564407349,
      "learning_rate": 0.0005757299436400269,
      "loss": 2.9602,
      "step": 29703
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9175232648849487,
      "learning_rate": 0.0005757283318314934,
      "loss": 3.1471,
      "step": 29704
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.8067071437835693,
      "learning_rate": 0.0005757267199716969,
      "loss": 2.9195,
      "step": 29705
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.421825647354126,
      "learning_rate": 0.0005757251080606376,
      "loss": 2.8961,
      "step": 29706
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6789220571517944,
      "learning_rate": 0.0005757234960983157,
      "loss": 2.9462,
      "step": 29707
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.654759407043457,
      "learning_rate": 0.0005757218840847317,
      "loss": 2.9951,
      "step": 29708
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9302443265914917,
      "learning_rate": 0.0005757202720198857,
      "loss": 2.8216,
      "step": 29709
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3828692436218262,
      "learning_rate": 0.0005757186599037781,
      "loss": 3.0601,
      "step": 29710
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2892234325408936,
      "learning_rate": 0.0005757170477364093,
      "loss": 3.0484,
      "step": 29711
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4810703992843628,
      "learning_rate": 0.0005757154355177795,
      "loss": 2.9886,
      "step": 29712
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7808003425598145,
      "learning_rate": 0.0005757138232478889,
      "loss": 2.9216,
      "step": 29713
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6825217008590698,
      "learning_rate": 0.0005757122109267379,
      "loss": 3.2974,
      "step": 29714
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.352413535118103,
      "learning_rate": 0.0005757105985543269,
      "loss": 2.9714,
      "step": 29715
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5287140607833862,
      "learning_rate": 0.0005757089861306559,
      "loss": 3.0351,
      "step": 29716
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8246303796768188,
      "learning_rate": 0.0005757073736557256,
      "loss": 2.9616,
      "step": 29717
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4227176904678345,
      "learning_rate": 0.000575705761129536,
      "loss": 3.0928,
      "step": 29718
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8012300729751587,
      "learning_rate": 0.0005757041485520875,
      "loss": 3.0816,
      "step": 29719
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.718420147895813,
      "learning_rate": 0.0005757025359233804,
      "loss": 3.1534,
      "step": 29720
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4536757469177246,
      "learning_rate": 0.000575700923243415,
      "loss": 3.0084,
      "step": 29721
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.64853835105896,
      "learning_rate": 0.0005756993105121916,
      "loss": 3.0004,
      "step": 29722
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2669402360916138,
      "learning_rate": 0.0005756976977297105,
      "loss": 3.2491,
      "step": 29723
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.163059711456299,
      "learning_rate": 0.000575696084895972,
      "loss": 3.0019,
      "step": 29724
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4886823892593384,
      "learning_rate": 0.0005756944720109764,
      "loss": 2.9712,
      "step": 29725
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9467021226882935,
      "learning_rate": 0.0005756928590747239,
      "loss": 2.9698,
      "step": 29726
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.990481972694397,
      "learning_rate": 0.000575691246087215,
      "loss": 2.9647,
      "step": 29727
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.749033808708191,
      "learning_rate": 0.0005756896330484498,
      "loss": 3.0312,
      "step": 29728
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9644666910171509,
      "learning_rate": 0.0005756880199584288,
      "loss": 2.8624,
      "step": 29729
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8637220859527588,
      "learning_rate": 0.0005756864068171521,
      "loss": 3.0451,
      "step": 29730
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.046724796295166,
      "learning_rate": 0.0005756847936246201,
      "loss": 3.0017,
      "step": 29731
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2155582904815674,
      "learning_rate": 0.0005756831803808332,
      "loss": 2.9983,
      "step": 29732
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8936355113983154,
      "learning_rate": 0.0005756815670857915,
      "loss": 3.0907,
      "step": 29733
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9032726287841797,
      "learning_rate": 0.0005756799537394954,
      "loss": 2.8897,
      "step": 29734
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.448306083679199,
      "learning_rate": 0.0005756783403419452,
      "loss": 2.9947,
      "step": 29735
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.6098082065582275,
      "learning_rate": 0.0005756767268931412,
      "loss": 3.1552,
      "step": 29736
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.625566244125366,
      "learning_rate": 0.0005756751133930837,
      "loss": 3.1216,
      "step": 29737
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8865522146224976,
      "learning_rate": 0.000575673499841773,
      "loss": 3.2183,
      "step": 29738
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.277867078781128,
      "learning_rate": 0.0005756718862392093,
      "loss": 3.1721,
      "step": 29739
    },
    {
      "epoch": 0.39,
      "grad_norm": 3.2488834857940674,
      "learning_rate": 0.0005756702725853931,
      "loss": 3.1157,
      "step": 29740
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.4021451473236084,
      "learning_rate": 0.0005756686588803245,
      "loss": 3.113,
      "step": 29741
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4567984342575073,
      "learning_rate": 0.000575667045124004,
      "loss": 3.1708,
      "step": 29742
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.711421251296997,
      "learning_rate": 0.0005756654313164317,
      "loss": 3.0867,
      "step": 29743
    },
    {
      "epoch": 0.39,
      "grad_norm": 4.264966011047363,
      "learning_rate": 0.0005756638174576081,
      "loss": 3.0104,
      "step": 29744
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.680316925048828,
      "learning_rate": 0.0005756622035475333,
      "loss": 3.2157,
      "step": 29745
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5527886152267456,
      "learning_rate": 0.0005756605895862077,
      "loss": 3.1158,
      "step": 29746
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.8352110385894775,
      "learning_rate": 0.0005756589755736317,
      "loss": 3.0813,
      "step": 29747
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.138990640640259,
      "learning_rate": 0.0005756573615098054,
      "loss": 3.0516,
      "step": 29748
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.4149439334869385,
      "learning_rate": 0.0005756557473947292,
      "loss": 2.9887,
      "step": 29749
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9368335008621216,
      "learning_rate": 0.0005756541332284033,
      "loss": 2.9404,
      "step": 29750
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7767904996871948,
      "learning_rate": 0.0005756525190108282,
      "loss": 2.9465,
      "step": 29751
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2589969635009766,
      "learning_rate": 0.0005756509047420041,
      "loss": 3.4156,
      "step": 29752
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.4925625324249268,
      "learning_rate": 0.0005756492904219313,
      "loss": 3.0971,
      "step": 29753
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7146129608154297,
      "learning_rate": 0.0005756476760506101,
      "loss": 2.9909,
      "step": 29754
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2829830646514893,
      "learning_rate": 0.0005756460616280407,
      "loss": 2.9124,
      "step": 29755
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5520433187484741,
      "learning_rate": 0.0005756444471542236,
      "loss": 2.7434,
      "step": 29756
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3122239112854004,
      "learning_rate": 0.0005756428326291589,
      "loss": 2.8104,
      "step": 29757
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6555992364883423,
      "learning_rate": 0.000575641218052847,
      "loss": 3.02,
      "step": 29758
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7252330780029297,
      "learning_rate": 0.0005756396034252883,
      "loss": 3.144,
      "step": 29759
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5348089933395386,
      "learning_rate": 0.0005756379887464831,
      "loss": 3.0176,
      "step": 29760
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3328287601470947,
      "learning_rate": 0.0005756363740164314,
      "loss": 3.2451,
      "step": 29761
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5495634078979492,
      "learning_rate": 0.0005756347592351336,
      "loss": 3.0794,
      "step": 29762
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.459335446357727,
      "learning_rate": 0.0005756331444025902,
      "loss": 3.2434,
      "step": 29763
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4286551475524902,
      "learning_rate": 0.0005756315295188015,
      "loss": 3.186,
      "step": 29764
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7194615602493286,
      "learning_rate": 0.0005756299145837676,
      "loss": 3.2259,
      "step": 29765
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6726274490356445,
      "learning_rate": 0.0005756282995974889,
      "loss": 3.1081,
      "step": 29766
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.885042428970337,
      "learning_rate": 0.0005756266845599657,
      "loss": 3.0231,
      "step": 29767
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.358565330505371,
      "learning_rate": 0.0005756250694711983,
      "loss": 3.079,
      "step": 29768
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9779396057128906,
      "learning_rate": 0.000575623454331187,
      "loss": 2.7501,
      "step": 29769
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2387028932571411,
      "learning_rate": 0.000575621839139932,
      "loss": 3.0132,
      "step": 29770
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4073834419250488,
      "learning_rate": 0.0005756202238974339,
      "loss": 3.0888,
      "step": 29771
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4992626905441284,
      "learning_rate": 0.0005756186086036927,
      "loss": 3.1121,
      "step": 29772
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.48391592502594,
      "learning_rate": 0.0005756169932587087,
      "loss": 2.8312,
      "step": 29773
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7579987049102783,
      "learning_rate": 0.0005756153778624824,
      "loss": 3.087,
      "step": 29774
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6280410289764404,
      "learning_rate": 0.0005756137624150139,
      "loss": 3.2512,
      "step": 29775
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4427576065063477,
      "learning_rate": 0.0005756121469163039,
      "loss": 3.1154,
      "step": 29776
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3524054288864136,
      "learning_rate": 0.000575610531366352,
      "loss": 3.2141,
      "step": 29777
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8968194723129272,
      "learning_rate": 0.0005756089157651591,
      "loss": 3.0406,
      "step": 29778
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6585479974746704,
      "learning_rate": 0.0005756073001127254,
      "loss": 2.9529,
      "step": 29779
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6894609928131104,
      "learning_rate": 0.0005756056844090509,
      "loss": 3.1256,
      "step": 29780
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8916116952896118,
      "learning_rate": 0.0005756040686541362,
      "loss": 3.102,
      "step": 29781
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8302689790725708,
      "learning_rate": 0.0005756024528479815,
      "loss": 3.0783,
      "step": 29782
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4346243143081665,
      "learning_rate": 0.0005756008369905871,
      "loss": 3.336,
      "step": 29783
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2529590129852295,
      "learning_rate": 0.0005755992210819532,
      "loss": 2.9755,
      "step": 29784
    },
    {
      "epoch": 0.39,
      "grad_norm": 3.000875234603882,
      "learning_rate": 0.0005755976051220803,
      "loss": 2.9736,
      "step": 29785
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.759175181388855,
      "learning_rate": 0.0005755959891109686,
      "loss": 3.2479,
      "step": 29786
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1036319732666016,
      "learning_rate": 0.0005755943730486185,
      "loss": 3.13,
      "step": 29787
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8796722888946533,
      "learning_rate": 0.00057559275693503,
      "loss": 2.9754,
      "step": 29788
    },
    {
      "epoch": 0.39,
      "grad_norm": 3.0079500675201416,
      "learning_rate": 0.0005755911407702037,
      "loss": 3.3347,
      "step": 29789
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8209916353225708,
      "learning_rate": 0.0005755895245541399,
      "loss": 3.3791,
      "step": 29790
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6150096654891968,
      "learning_rate": 0.0005755879082868387,
      "loss": 3.2062,
      "step": 29791
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3770649433135986,
      "learning_rate": 0.0005755862919683004,
      "loss": 3.1428,
      "step": 29792
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6723307371139526,
      "learning_rate": 0.0005755846755985256,
      "loss": 3.0411,
      "step": 29793
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4646995067596436,
      "learning_rate": 0.0005755830591775143,
      "loss": 3.251,
      "step": 29794
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3946099281311035,
      "learning_rate": 0.000575581442705267,
      "loss": 2.8522,
      "step": 29795
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3576573133468628,
      "learning_rate": 0.0005755798261817838,
      "loss": 3.0121,
      "step": 29796
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.898231029510498,
      "learning_rate": 0.0005755782096070651,
      "loss": 2.6118,
      "step": 29797
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.003934621810913,
      "learning_rate": 0.0005755765929811113,
      "loss": 3.0403,
      "step": 29798
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7077960968017578,
      "learning_rate": 0.0005755749763039226,
      "loss": 2.9787,
      "step": 29799
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7160089015960693,
      "learning_rate": 0.0005755733595754993,
      "loss": 3.0472,
      "step": 29800
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1388282775878906,
      "learning_rate": 0.0005755717427958416,
      "loss": 2.7678,
      "step": 29801
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.9597625732421875,
      "learning_rate": 0.0005755701259649501,
      "loss": 3.0832,
      "step": 29802
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4608291387557983,
      "learning_rate": 0.0005755685090828249,
      "loss": 3.0965,
      "step": 29803
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.7595129013061523,
      "learning_rate": 0.0005755668921494662,
      "loss": 2.9599,
      "step": 29804
    },
    {
      "epoch": 0.39,
      "grad_norm": 3.396582841873169,
      "learning_rate": 0.0005755652751648744,
      "loss": 3.3339,
      "step": 29805
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.5596776008605957,
      "learning_rate": 0.0005755636581290498,
      "loss": 3.021,
      "step": 29806
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.813018798828125,
      "learning_rate": 0.0005755620410419929,
      "loss": 3.1431,
      "step": 29807
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7871160507202148,
      "learning_rate": 0.0005755604239037036,
      "loss": 3.0807,
      "step": 29808
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9496651887893677,
      "learning_rate": 0.0005755588067141825,
      "loss": 3.0148,
      "step": 29809
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4913945198059082,
      "learning_rate": 0.0005755571894734299,
      "loss": 3.0149,
      "step": 29810
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.165109872817993,
      "learning_rate": 0.0005755555721814459,
      "loss": 3.1271,
      "step": 29811
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1832010746002197,
      "learning_rate": 0.000575553954838231,
      "loss": 2.8557,
      "step": 29812
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.940316081047058,
      "learning_rate": 0.0005755523374437853,
      "loss": 3.2288,
      "step": 29813
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6730948686599731,
      "learning_rate": 0.0005755507199981093,
      "loss": 3.2425,
      "step": 29814
    },
    {
      "epoch": 0.39,
      "grad_norm": 3.470935106277466,
      "learning_rate": 0.0005755491025012031,
      "loss": 2.9754,
      "step": 29815
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.3164143562316895,
      "learning_rate": 0.0005755474849530673,
      "loss": 3.1168,
      "step": 29816
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5190361738204956,
      "learning_rate": 0.0005755458673537019,
      "loss": 3.1129,
      "step": 29817
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.0973658561706543,
      "learning_rate": 0.0005755442497031074,
      "loss": 3.1821,
      "step": 29818
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.154132604598999,
      "learning_rate": 0.0005755426320012841,
      "loss": 3.2234,
      "step": 29819
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.0537774562835693,
      "learning_rate": 0.0005755410142482321,
      "loss": 3.0874,
      "step": 29820
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6070455312728882,
      "learning_rate": 0.0005755393964439518,
      "loss": 3.2313,
      "step": 29821
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.167994260787964,
      "learning_rate": 0.0005755377785884435,
      "loss": 2.8605,
      "step": 29822
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.786936044692993,
      "learning_rate": 0.0005755361606817076,
      "loss": 3.0302,
      "step": 29823
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.520216703414917,
      "learning_rate": 0.0005755345427237445,
      "loss": 2.9974,
      "step": 29824
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4685087203979492,
      "learning_rate": 0.0005755329247145541,
      "loss": 3.1172,
      "step": 29825
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.640766143798828,
      "learning_rate": 0.000575531306654137,
      "loss": 3.1119,
      "step": 29826
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.68822181224823,
      "learning_rate": 0.0005755296885424933,
      "loss": 3.0909,
      "step": 29827
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.9759345054626465,
      "learning_rate": 0.0005755280703796235,
      "loss": 3.1109,
      "step": 29828
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5958333015441895,
      "learning_rate": 0.0005755264521655279,
      "loss": 3.0491,
      "step": 29829
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.5944271087646484,
      "learning_rate": 0.0005755248339002067,
      "loss": 3.0667,
      "step": 29830
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1012959480285645,
      "learning_rate": 0.0005755232155836603,
      "loss": 3.1753,
      "step": 29831
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4019436836242676,
      "learning_rate": 0.0005755215972158888,
      "loss": 3.0149,
      "step": 29832
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.0200483798980713,
      "learning_rate": 0.0005755199787968927,
      "loss": 3.2693,
      "step": 29833
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.5586986541748047,
      "learning_rate": 0.0005755183603266722,
      "loss": 2.8886,
      "step": 29834
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7923647165298462,
      "learning_rate": 0.0005755167418052277,
      "loss": 3.3126,
      "step": 29835
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.244628429412842,
      "learning_rate": 0.0005755151232325594,
      "loss": 3.1267,
      "step": 29836
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9872527122497559,
      "learning_rate": 0.0005755135046086676,
      "loss": 3.2198,
      "step": 29837
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.7660796642303467,
      "learning_rate": 0.0005755118859335527,
      "loss": 3.0853,
      "step": 29838
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6385583877563477,
      "learning_rate": 0.0005755102672072149,
      "loss": 3.0183,
      "step": 29839
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.4113929271698,
      "learning_rate": 0.0005755086484296546,
      "loss": 3.0038,
      "step": 29840
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7084627151489258,
      "learning_rate": 0.000575507029600872,
      "loss": 3.1144,
      "step": 29841
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3211442232131958,
      "learning_rate": 0.0005755054107208675,
      "loss": 3.0136,
      "step": 29842
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.385830283164978,
      "learning_rate": 0.0005755037917896412,
      "loss": 2.816,
      "step": 29843
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.993013858795166,
      "learning_rate": 0.0005755021728071937,
      "loss": 3.1982,
      "step": 29844
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.092764139175415,
      "learning_rate": 0.000575500553773525,
      "loss": 2.9881,
      "step": 29845
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5926473140716553,
      "learning_rate": 0.0005754989346886357,
      "loss": 3.1198,
      "step": 29846
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.811445474624634,
      "learning_rate": 0.0005754973155525259,
      "loss": 3.3012,
      "step": 29847
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.0733401775360107,
      "learning_rate": 0.0005754956963651958,
      "loss": 3.0615,
      "step": 29848
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.0892786979675293,
      "learning_rate": 0.0005754940771266461,
      "loss": 3.0083,
      "step": 29849
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.9236648082733154,
      "learning_rate": 0.0005754924578368766,
      "loss": 2.929,
      "step": 29850
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.274430513381958,
      "learning_rate": 0.0005754908384958881,
      "loss": 3.1659,
      "step": 29851
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1310510635375977,
      "learning_rate": 0.0005754892191036804,
      "loss": 2.9244,
      "step": 29852
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.383036494255066,
      "learning_rate": 0.0005754875996602542,
      "loss": 3.1396,
      "step": 29853
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.279448986053467,
      "learning_rate": 0.0005754859801656097,
      "loss": 3.271,
      "step": 29854
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.750866174697876,
      "learning_rate": 0.0005754843606197472,
      "loss": 3.0152,
      "step": 29855
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6691392660140991,
      "learning_rate": 0.0005754827410226667,
      "loss": 3.105,
      "step": 29856
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.7115278244018555,
      "learning_rate": 0.000575481121374369,
      "loss": 2.9358,
      "step": 29857
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9364686012268066,
      "learning_rate": 0.000575479501674854,
      "loss": 2.8289,
      "step": 29858
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3508318662643433,
      "learning_rate": 0.0005754778819241222,
      "loss": 3.0809,
      "step": 29859
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2166333198547363,
      "learning_rate": 0.0005754762621221739,
      "loss": 2.8419,
      "step": 29860
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2668440341949463,
      "learning_rate": 0.0005754746422690094,
      "loss": 3.1508,
      "step": 29861
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.608472228050232,
      "learning_rate": 0.0005754730223646289,
      "loss": 3.1575,
      "step": 29862
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4790109395980835,
      "learning_rate": 0.0005754714024090328,
      "loss": 3.1161,
      "step": 29863
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4017829895019531,
      "learning_rate": 0.0005754697824022214,
      "loss": 3.1767,
      "step": 29864
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7409350872039795,
      "learning_rate": 0.0005754681623441948,
      "loss": 3.0444,
      "step": 29865
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.296152949333191,
      "learning_rate": 0.0005754665422349536,
      "loss": 3.0918,
      "step": 29866
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7369498014450073,
      "learning_rate": 0.000575464922074498,
      "loss": 2.9796,
      "step": 29867
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.798150897026062,
      "learning_rate": 0.0005754633018628283,
      "loss": 3.1682,
      "step": 29868
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5696525573730469,
      "learning_rate": 0.0005754616815999447,
      "loss": 3.0157,
      "step": 29869
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.62764573097229,
      "learning_rate": 0.0005754600612858476,
      "loss": 2.8222,
      "step": 29870
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.4590530395507812,
      "learning_rate": 0.0005754584409205371,
      "loss": 3.2039,
      "step": 29871
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.344619035720825,
      "learning_rate": 0.0005754568205040139,
      "loss": 2.903,
      "step": 29872
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3396885395050049,
      "learning_rate": 0.000575455200036278,
      "loss": 3.2205,
      "step": 29873
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7710129022598267,
      "learning_rate": 0.0005754535795173298,
      "loss": 2.9127,
      "step": 29874
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3958920240402222,
      "learning_rate": 0.0005754519589471694,
      "loss": 3.0904,
      "step": 29875
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2477205991744995,
      "learning_rate": 0.0005754503383257975,
      "loss": 3.3642,
      "step": 29876
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6003127098083496,
      "learning_rate": 0.0005754487176532142,
      "loss": 3.1655,
      "step": 29877
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4544636011123657,
      "learning_rate": 0.0005754470969294198,
      "loss": 3.2597,
      "step": 29878
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7643035650253296,
      "learning_rate": 0.0005754454761544143,
      "loss": 2.7662,
      "step": 29879
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7951244115829468,
      "learning_rate": 0.0005754438553281986,
      "loss": 3.1065,
      "step": 29880
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.442986488342285,
      "learning_rate": 0.0005754422344507725,
      "loss": 2.8538,
      "step": 29881
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.584984302520752,
      "learning_rate": 0.0005754406135221365,
      "loss": 3.0727,
      "step": 29882
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5103435516357422,
      "learning_rate": 0.000575438992542291,
      "loss": 3.2417,
      "step": 29883
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.89182448387146,
      "learning_rate": 0.0005754373715112361,
      "loss": 3.0092,
      "step": 29884
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5766547918319702,
      "learning_rate": 0.0005754357504289722,
      "loss": 3.0389,
      "step": 29885
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4454991817474365,
      "learning_rate": 0.0005754341292954998,
      "loss": 2.9897,
      "step": 29886
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3732095956802368,
      "learning_rate": 0.0005754325081108188,
      "loss": 2.7829,
      "step": 29887
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2780656814575195,
      "learning_rate": 0.0005754308868749297,
      "loss": 3.1873,
      "step": 29888
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.523532748222351,
      "learning_rate": 0.0005754292655878328,
      "loss": 3.0269,
      "step": 29889
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5600223541259766,
      "learning_rate": 0.0005754276442495284,
      "loss": 3.1069,
      "step": 29890
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3864774703979492,
      "learning_rate": 0.0005754260228600168,
      "loss": 3.2213,
      "step": 29891
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.507697343826294,
      "learning_rate": 0.0005754244014192984,
      "loss": 3.124,
      "step": 29892
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7039213180541992,
      "learning_rate": 0.0005754227799273732,
      "loss": 3.0305,
      "step": 29893
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8990980386734009,
      "learning_rate": 0.0005754211583842419,
      "loss": 3.0294,
      "step": 29894
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4276962280273438,
      "learning_rate": 0.0005754195367899046,
      "loss": 2.9331,
      "step": 29895
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.755897045135498,
      "learning_rate": 0.0005754179151443615,
      "loss": 2.9982,
      "step": 29896
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8875854015350342,
      "learning_rate": 0.0005754162934476131,
      "loss": 3.1376,
      "step": 29897
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.830295205116272,
      "learning_rate": 0.0005754146716996595,
      "loss": 2.8477,
      "step": 29898
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4994425773620605,
      "learning_rate": 0.0005754130499005013,
      "loss": 3.1941,
      "step": 29899
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4712555408477783,
      "learning_rate": 0.0005754114280501385,
      "loss": 3.2403,
      "step": 29900
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.158409357070923,
      "learning_rate": 0.0005754098061485714,
      "loss": 2.9727,
      "step": 29901
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3820394277572632,
      "learning_rate": 0.0005754081841958007,
      "loss": 2.9814,
      "step": 29902
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.907549500465393,
      "learning_rate": 0.0005754065621918262,
      "loss": 2.8328,
      "step": 29903
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.977349042892456,
      "learning_rate": 0.0005754049401366484,
      "loss": 3.1029,
      "step": 29904
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2115892171859741,
      "learning_rate": 0.0005754033180302677,
      "loss": 3.0814,
      "step": 29905
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3575396537780762,
      "learning_rate": 0.0005754016958726844,
      "loss": 3.1141,
      "step": 29906
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4592729806900024,
      "learning_rate": 0.0005754000736638987,
      "loss": 2.9674,
      "step": 29907
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5658974647521973,
      "learning_rate": 0.0005753984514039109,
      "loss": 3.3273,
      "step": 29908
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6060513257980347,
      "learning_rate": 0.0005753968290927213,
      "loss": 3.0523,
      "step": 29909
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5127848386764526,
      "learning_rate": 0.0005753952067303301,
      "loss": 3.2031,
      "step": 29910
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.250314712524414,
      "learning_rate": 0.0005753935843167379,
      "loss": 3.1794,
      "step": 29911
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6156513690948486,
      "learning_rate": 0.0005753919618519448,
      "loss": 2.9977,
      "step": 29912
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4390816688537598,
      "learning_rate": 0.0005753903393359512,
      "loss": 2.9943,
      "step": 29913
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3844430446624756,
      "learning_rate": 0.0005753887167687573,
      "loss": 3.2545,
      "step": 29914
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8368685245513916,
      "learning_rate": 0.0005753870941503634,
      "loss": 2.94,
      "step": 29915
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4814174175262451,
      "learning_rate": 0.0005753854714807699,
      "loss": 3.2875,
      "step": 29916
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.705370545387268,
      "learning_rate": 0.000575383848759977,
      "loss": 3.0006,
      "step": 29917
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6154896020889282,
      "learning_rate": 0.000575382225987985,
      "loss": 2.9767,
      "step": 29918
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4474722146987915,
      "learning_rate": 0.0005753806031647943,
      "loss": 3.2587,
      "step": 29919
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8933360576629639,
      "learning_rate": 0.0005753789802904051,
      "loss": 3.3254,
      "step": 29920
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5092761516571045,
      "learning_rate": 0.0005753773573648178,
      "loss": 2.9724,
      "step": 29921
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3718363046646118,
      "learning_rate": 0.0005753757343880327,
      "loss": 2.896,
      "step": 29922
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.246512532234192,
      "learning_rate": 0.00057537411136005,
      "loss": 3.0978,
      "step": 29923
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3890290260314941,
      "learning_rate": 0.00057537248828087,
      "loss": 3.2398,
      "step": 29924
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7563964128494263,
      "learning_rate": 0.0005753708651504932,
      "loss": 3.1065,
      "step": 29925
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8639311790466309,
      "learning_rate": 0.0005753692419689197,
      "loss": 3.0543,
      "step": 29926
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.628354549407959,
      "learning_rate": 0.0005753676187361497,
      "loss": 2.9558,
      "step": 29927
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7522528171539307,
      "learning_rate": 0.0005753659954521839,
      "loss": 3.0575,
      "step": 29928
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.9085988998413086,
      "learning_rate": 0.0005753643721170222,
      "loss": 2.9312,
      "step": 29929
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.598996639251709,
      "learning_rate": 0.0005753627487306651,
      "loss": 3.0129,
      "step": 29930
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.614867925643921,
      "learning_rate": 0.0005753611252931129,
      "loss": 3.2696,
      "step": 29931
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.517486810684204,
      "learning_rate": 0.0005753595018043659,
      "loss": 2.9714,
      "step": 29932
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6810050010681152,
      "learning_rate": 0.0005753578782644243,
      "loss": 3.1811,
      "step": 29933
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.484851598739624,
      "learning_rate": 0.0005753562546732884,
      "loss": 2.9398,
      "step": 29934
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2609997987747192,
      "learning_rate": 0.0005753546310309587,
      "loss": 3.1445,
      "step": 29935
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5789638757705688,
      "learning_rate": 0.0005753530073374354,
      "loss": 3.1284,
      "step": 29936
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6620136499404907,
      "learning_rate": 0.0005753513835927188,
      "loss": 3.1527,
      "step": 29937
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.257420063018799,
      "learning_rate": 0.000575349759796809,
      "loss": 3.1923,
      "step": 29938
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6839815378189087,
      "learning_rate": 0.0005753481359497066,
      "loss": 3.0569,
      "step": 29939
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6269725561141968,
      "learning_rate": 0.0005753465120514117,
      "loss": 3.2966,
      "step": 29940
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7555044889450073,
      "learning_rate": 0.0005753448881019248,
      "loss": 3.1582,
      "step": 29941
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6012898683547974,
      "learning_rate": 0.000575343264101246,
      "loss": 3.3021,
      "step": 29942
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.0835444927215576,
      "learning_rate": 0.0005753416400493757,
      "loss": 3.0621,
      "step": 29943
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.37662410736084,
      "learning_rate": 0.0005753400159463143,
      "loss": 3.1501,
      "step": 29944
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.3219921588897705,
      "learning_rate": 0.000575338391792062,
      "loss": 2.9688,
      "step": 29945
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.946004033088684,
      "learning_rate": 0.0005753367675866189,
      "loss": 2.8634,
      "step": 29946
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5713711977005005,
      "learning_rate": 0.0005753351433299856,
      "loss": 3.3436,
      "step": 29947
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.008124589920044,
      "learning_rate": 0.0005753335190221624,
      "loss": 3.0885,
      "step": 29948
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.4234185218811035,
      "learning_rate": 0.0005753318946631493,
      "loss": 3.0736,
      "step": 29949
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5008264780044556,
      "learning_rate": 0.0005753302702529471,
      "loss": 3.0487,
      "step": 29950
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.415350914001465,
      "learning_rate": 0.0005753286457915555,
      "loss": 2.9748,
      "step": 29951
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.6526784896850586,
      "learning_rate": 0.0005753270212789753,
      "loss": 3.1055,
      "step": 29952
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7541630268096924,
      "learning_rate": 0.0005753253967152065,
      "loss": 3.289,
      "step": 29953
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.52734375,
      "learning_rate": 0.0005753237721002496,
      "loss": 2.9944,
      "step": 29954
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.050562620162964,
      "learning_rate": 0.0005753221474341047,
      "loss": 3.2306,
      "step": 29955
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3087953329086304,
      "learning_rate": 0.0005753205227167723,
      "loss": 2.8521,
      "step": 29956
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.517539620399475,
      "learning_rate": 0.0005753188979482525,
      "loss": 3.194,
      "step": 29957
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7294167280197144,
      "learning_rate": 0.0005753172731285459,
      "loss": 3.1776,
      "step": 29958
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2649441957473755,
      "learning_rate": 0.0005753156482576525,
      "loss": 2.9836,
      "step": 29959
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5186189413070679,
      "learning_rate": 0.0005753140233355728,
      "loss": 3.003,
      "step": 29960
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.4701287746429443,
      "learning_rate": 0.0005753123983623069,
      "loss": 3.0712,
      "step": 29961
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.491639494895935,
      "learning_rate": 0.0005753107733378554,
      "loss": 3.1523,
      "step": 29962
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4914406538009644,
      "learning_rate": 0.0005753091482622183,
      "loss": 3.2021,
      "step": 29963
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6490485668182373,
      "learning_rate": 0.0005753075231353962,
      "loss": 3.1954,
      "step": 29964
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.16754412651062,
      "learning_rate": 0.000575305897957389,
      "loss": 2.9764,
      "step": 29965
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4611361026763916,
      "learning_rate": 0.0005753042727281973,
      "loss": 3.1312,
      "step": 29966
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.3608391284942627,
      "learning_rate": 0.0005753026474478215,
      "loss": 3.3251,
      "step": 29967
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7669483423233032,
      "learning_rate": 0.0005753010221162616,
      "loss": 3.1457,
      "step": 29968
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.0025925636291504,
      "learning_rate": 0.0005752993967335181,
      "loss": 3.1515,
      "step": 29969
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1946988105773926,
      "learning_rate": 0.0005752977712995913,
      "loss": 2.7704,
      "step": 29970
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.60630464553833,
      "learning_rate": 0.0005752961458144812,
      "loss": 3.0968,
      "step": 29971
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7306166887283325,
      "learning_rate": 0.0005752945202781887,
      "loss": 3.3574,
      "step": 29972
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7596323490142822,
      "learning_rate": 0.0005752928946907135,
      "loss": 3.0147,
      "step": 29973
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5757489204406738,
      "learning_rate": 0.0005752912690520562,
      "loss": 3.1173,
      "step": 29974
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4638584852218628,
      "learning_rate": 0.0005752896433622171,
      "loss": 3.3553,
      "step": 29975
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3814599514007568,
      "learning_rate": 0.0005752880176211965,
      "loss": 3.0429,
      "step": 29976
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6952624320983887,
      "learning_rate": 0.0005752863918289945,
      "loss": 3.0178,
      "step": 29977
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7782686948776245,
      "learning_rate": 0.0005752847659856117,
      "loss": 3.0944,
      "step": 29978
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.990043044090271,
      "learning_rate": 0.0005752831400910482,
      "loss": 3.3127,
      "step": 29979
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5858196020126343,
      "learning_rate": 0.0005752815141453044,
      "loss": 3.0189,
      "step": 29980
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6436201333999634,
      "learning_rate": 0.0005752798881483806,
      "loss": 3.0215,
      "step": 29981
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9623820781707764,
      "learning_rate": 0.0005752782621002771,
      "loss": 3.2003,
      "step": 29982
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.0352425575256348,
      "learning_rate": 0.0005752766360009941,
      "loss": 3.0793,
      "step": 29983
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7459166049957275,
      "learning_rate": 0.0005752750098505319,
      "loss": 2.9474,
      "step": 29984
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5329577922821045,
      "learning_rate": 0.000575273383648891,
      "loss": 3.1854,
      "step": 29985
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.343854546546936,
      "learning_rate": 0.0005752717573960716,
      "loss": 3.0556,
      "step": 29986
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.523024559020996,
      "learning_rate": 0.000575270131092074,
      "loss": 3.165,
      "step": 29987
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5370746850967407,
      "learning_rate": 0.0005752685047368983,
      "loss": 3.1879,
      "step": 29988
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7572300434112549,
      "learning_rate": 0.0005752668783305452,
      "loss": 3.272,
      "step": 29989
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3743969202041626,
      "learning_rate": 0.0005752652518730146,
      "loss": 3.2183,
      "step": 29990
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.387761116027832,
      "learning_rate": 0.0005752636253643071,
      "loss": 3.0036,
      "step": 29991
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5182995796203613,
      "learning_rate": 0.0005752619988044229,
      "loss": 2.9318,
      "step": 29992
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7037992477416992,
      "learning_rate": 0.0005752603721933624,
      "loss": 3.1543,
      "step": 29993
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4859490394592285,
      "learning_rate": 0.0005752587455311256,
      "loss": 2.9746,
      "step": 29994
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.625442385673523,
      "learning_rate": 0.0005752571188177131,
      "loss": 3.1037,
      "step": 29995
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6515530347824097,
      "learning_rate": 0.0005752554920531251,
      "loss": 3.0089,
      "step": 29996
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1842936277389526,
      "learning_rate": 0.000575253865237362,
      "loss": 3.0023,
      "step": 29997
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4976856708526611,
      "learning_rate": 0.0005752522383704239,
      "loss": 3.0996,
      "step": 29998
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7107822895050049,
      "learning_rate": 0.0005752506114523112,
      "loss": 3.0743,
      "step": 29999
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6315494775772095,
      "learning_rate": 0.0005752489844830243,
      "loss": 3.0524,
      "step": 30000
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5313434600830078,
      "learning_rate": 0.0005752473574625633,
      "loss": 3.078,
      "step": 30001
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5527907609939575,
      "learning_rate": 0.0005752457303909287,
      "loss": 3.3279,
      "step": 30002
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3721948862075806,
      "learning_rate": 0.0005752441032681208,
      "loss": 3.1608,
      "step": 30003
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.556400179862976,
      "learning_rate": 0.0005752424760941397,
      "loss": 3.0439,
      "step": 30004
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.928352952003479,
      "learning_rate": 0.0005752408488689858,
      "loss": 3.0764,
      "step": 30005
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6691327095031738,
      "learning_rate": 0.0005752392215926595,
      "loss": 3.0927,
      "step": 30006
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5629782676696777,
      "learning_rate": 0.0005752375942651611,
      "loss": 3.073,
      "step": 30007
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7049552202224731,
      "learning_rate": 0.0005752359668864907,
      "loss": 2.86,
      "step": 30008
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3030668497085571,
      "learning_rate": 0.0005752343394566489,
      "loss": 3.0927,
      "step": 30009
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4608896970748901,
      "learning_rate": 0.0005752327119756356,
      "loss": 3.2574,
      "step": 30010
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.664977788925171,
      "learning_rate": 0.0005752310844434515,
      "loss": 3.0525,
      "step": 30011
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5368677377700806,
      "learning_rate": 0.0005752294568600968,
      "loss": 3.0042,
      "step": 30012
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3376927375793457,
      "learning_rate": 0.0005752278292255717,
      "loss": 3.0133,
      "step": 30013
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2602741718292236,
      "learning_rate": 0.0005752262015398766,
      "loss": 3.3114,
      "step": 30014
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3273391723632812,
      "learning_rate": 0.0005752245738030116,
      "loss": 2.8243,
      "step": 30015
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.788434624671936,
      "learning_rate": 0.0005752229460149772,
      "loss": 2.9561,
      "step": 30016
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6503666639328003,
      "learning_rate": 0.0005752213181757737,
      "loss": 3.1305,
      "step": 30017
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.890687942504883,
      "learning_rate": 0.0005752196902854014,
      "loss": 2.9307,
      "step": 30018
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1156299114227295,
      "learning_rate": 0.0005752180623438605,
      "loss": 3.1263,
      "step": 30019
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8571823835372925,
      "learning_rate": 0.0005752164343511514,
      "loss": 3.259,
      "step": 30020
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.401508331298828,
      "learning_rate": 0.0005752148063072744,
      "loss": 3.3805,
      "step": 30021
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3232905864715576,
      "learning_rate": 0.0005752131782122298,
      "loss": 3.2797,
      "step": 30022
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3401036262512207,
      "learning_rate": 0.0005752115500660178,
      "loss": 3.1515,
      "step": 30023
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3604639768600464,
      "learning_rate": 0.0005752099218686387,
      "loss": 3.2287,
      "step": 30024
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.0237064361572266,
      "learning_rate": 0.000575208293620093,
      "loss": 2.9543,
      "step": 30025
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5649869441986084,
      "learning_rate": 0.0005752066653203808,
      "loss": 3.0372,
      "step": 30026
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.19569993019104,
      "learning_rate": 0.0005752050369695026,
      "loss": 3.347,
      "step": 30027
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9439996480941772,
      "learning_rate": 0.0005752034085674586,
      "loss": 3.175,
      "step": 30028
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2976452112197876,
      "learning_rate": 0.0005752017801142489,
      "loss": 2.9665,
      "step": 30029
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3382307291030884,
      "learning_rate": 0.0005752001516098742,
      "loss": 2.9265,
      "step": 30030
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.8884358406066895,
      "learning_rate": 0.0005751985230543345,
      "loss": 3.0424,
      "step": 30031
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4971489906311035,
      "learning_rate": 0.0005751968944476303,
      "loss": 3.3337,
      "step": 30032
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.707059621810913,
      "learning_rate": 0.0005751952657897616,
      "loss": 2.9903,
      "step": 30033
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.881354808807373,
      "learning_rate": 0.0005751936370807291,
      "loss": 3.2176,
      "step": 30034
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3632994890213013,
      "learning_rate": 0.0005751920083205327,
      "loss": 3.3988,
      "step": 30035
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7447593212127686,
      "learning_rate": 0.0005751903795091731,
      "loss": 2.9521,
      "step": 30036
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4960418939590454,
      "learning_rate": 0.0005751887506466503,
      "loss": 3.2168,
      "step": 30037
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7043529748916626,
      "learning_rate": 0.0005751871217329649,
      "loss": 3.3719,
      "step": 30038
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.448472499847412,
      "learning_rate": 0.0005751854927681169,
      "loss": 2.9193,
      "step": 30039
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2058475017547607,
      "learning_rate": 0.0005751838637521067,
      "loss": 3.2369,
      "step": 30040
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5206553936004639,
      "learning_rate": 0.0005751822346849346,
      "loss": 3.2442,
      "step": 30041
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1164193153381348,
      "learning_rate": 0.0005751806055666009,
      "loss": 3.1596,
      "step": 30042
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6256133317947388,
      "learning_rate": 0.000575178976397106,
      "loss": 2.8878,
      "step": 30043
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7175730466842651,
      "learning_rate": 0.0005751773471764501,
      "loss": 2.9623,
      "step": 30044
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5801830291748047,
      "learning_rate": 0.0005751757179046336,
      "loss": 2.9132,
      "step": 30045
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4983997344970703,
      "learning_rate": 0.0005751740885816567,
      "loss": 3.2458,
      "step": 30046
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.367204189300537,
      "learning_rate": 0.0005751724592075198,
      "loss": 2.9114,
      "step": 30047
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6459239721298218,
      "learning_rate": 0.000575170829782223,
      "loss": 3.1461,
      "step": 30048
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5867670774459839,
      "learning_rate": 0.0005751692003057669,
      "loss": 2.7647,
      "step": 30049
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7208203077316284,
      "learning_rate": 0.0005751675707781516,
      "loss": 3.0046,
      "step": 30050
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.597622275352478,
      "learning_rate": 0.0005751659411993774,
      "loss": 3.2452,
      "step": 30051
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5278394222259521,
      "learning_rate": 0.0005751643115694448,
      "loss": 3.0922,
      "step": 30052
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9686241149902344,
      "learning_rate": 0.0005751626818883538,
      "loss": 2.9612,
      "step": 30053
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8265069723129272,
      "learning_rate": 0.0005751610521561049,
      "loss": 3.3957,
      "step": 30054
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5552289485931396,
      "learning_rate": 0.0005751594223726984,
      "loss": 3.2503,
      "step": 30055
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.6713039875030518,
      "learning_rate": 0.0005751577925381346,
      "loss": 2.9177,
      "step": 30056
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5745776891708374,
      "learning_rate": 0.0005751561626524138,
      "loss": 3.1614,
      "step": 30057
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4732391834259033,
      "learning_rate": 0.0005751545327155362,
      "loss": 3.2206,
      "step": 30058
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6775054931640625,
      "learning_rate": 0.0005751529027275022,
      "loss": 2.8731,
      "step": 30059
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.327576756477356,
      "learning_rate": 0.000575151272688312,
      "loss": 3.118,
      "step": 30060
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.0243804454803467,
      "learning_rate": 0.0005751496425979661,
      "loss": 2.9251,
      "step": 30061
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8280513286590576,
      "learning_rate": 0.0005751480124564646,
      "loss": 3.1806,
      "step": 30062
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.001439094543457,
      "learning_rate": 0.000575146382263808,
      "loss": 2.9349,
      "step": 30063
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7459555864334106,
      "learning_rate": 0.0005751447520199965,
      "loss": 3.1398,
      "step": 30064
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.672336220741272,
      "learning_rate": 0.0005751431217250302,
      "loss": 2.8707,
      "step": 30065
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2299468517303467,
      "learning_rate": 0.0005751414913789097,
      "loss": 3.2339,
      "step": 30066
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9470044374465942,
      "learning_rate": 0.0005751398609816354,
      "loss": 2.8309,
      "step": 30067
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6202070713043213,
      "learning_rate": 0.0005751382305332071,
      "loss": 3.2522,
      "step": 30068
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4719451665878296,
      "learning_rate": 0.0005751366000336256,
      "loss": 2.8822,
      "step": 30069
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8040237426757812,
      "learning_rate": 0.0005751349694828909,
      "loss": 3.3468,
      "step": 30070
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6930102109909058,
      "learning_rate": 0.0005751333388810035,
      "loss": 2.9173,
      "step": 30071
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.78317928314209,
      "learning_rate": 0.0005751317082279636,
      "loss": 3.2637,
      "step": 30072
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7298076152801514,
      "learning_rate": 0.0005751300775237715,
      "loss": 3.2099,
      "step": 30073
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1557748317718506,
      "learning_rate": 0.0005751284467684275,
      "loss": 3.2705,
      "step": 30074
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.38253653049469,
      "learning_rate": 0.000575126815961932,
      "loss": 2.9815,
      "step": 30075
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.601474642753601,
      "learning_rate": 0.0005751251851042852,
      "loss": 3.0808,
      "step": 30076
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3969255685806274,
      "learning_rate": 0.0005751235541954874,
      "loss": 3.0603,
      "step": 30077
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6430546045303345,
      "learning_rate": 0.000575121923235539,
      "loss": 2.9881,
      "step": 30078
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.384279489517212,
      "learning_rate": 0.0005751202922244403,
      "loss": 3.0808,
      "step": 30079
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5952345132827759,
      "learning_rate": 0.0005751186611621914,
      "loss": 3.2579,
      "step": 30080
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5929774045944214,
      "learning_rate": 0.0005751170300487928,
      "loss": 3.1635,
      "step": 30081
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.0321836471557617,
      "learning_rate": 0.0005751153988842447,
      "loss": 3.1189,
      "step": 30082
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.017113208770752,
      "learning_rate": 0.0005751137676685476,
      "loss": 3.0945,
      "step": 30083
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.0383808612823486,
      "learning_rate": 0.0005751121364017015,
      "loss": 2.8925,
      "step": 30084
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9323594570159912,
      "learning_rate": 0.0005751105050837069,
      "loss": 3.0557,
      "step": 30085
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5858662128448486,
      "learning_rate": 0.0005751088737145641,
      "loss": 3.184,
      "step": 30086
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6604187488555908,
      "learning_rate": 0.0005751072422942733,
      "loss": 3.0202,
      "step": 30087
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3003027439117432,
      "learning_rate": 0.000575105610822835,
      "loss": 3.221,
      "step": 30088
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.436599612236023,
      "learning_rate": 0.0005751039793002494,
      "loss": 2.8975,
      "step": 30089
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.076399564743042,
      "learning_rate": 0.0005751023477265166,
      "loss": 3.0178,
      "step": 30090
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7115375995635986,
      "learning_rate": 0.0005751007161016372,
      "loss": 3.0675,
      "step": 30091
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5381041765213013,
      "learning_rate": 0.0005750990844256113,
      "loss": 3.296,
      "step": 30092
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5483146905899048,
      "learning_rate": 0.0005750974526984394,
      "loss": 3.0712,
      "step": 30093
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1631414890289307,
      "learning_rate": 0.0005750958209201216,
      "loss": 3.1153,
      "step": 30094
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5749647617340088,
      "learning_rate": 0.0005750941890906583,
      "loss": 3.1931,
      "step": 30095
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2077534198760986,
      "learning_rate": 0.0005750925572100498,
      "loss": 3.096,
      "step": 30096
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.9543991088867188,
      "learning_rate": 0.0005750909252782965,
      "loss": 3.1075,
      "step": 30097
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4111229181289673,
      "learning_rate": 0.0005750892932953985,
      "loss": 3.3009,
      "step": 30098
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.082418203353882,
      "learning_rate": 0.0005750876612613562,
      "loss": 3.0835,
      "step": 30099
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6910285949707031,
      "learning_rate": 0.00057508602917617,
      "loss": 3.1061,
      "step": 30100
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3405157327651978,
      "learning_rate": 0.00057508439703984,
      "loss": 3.0685,
      "step": 30101
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5048792362213135,
      "learning_rate": 0.0005750827648523666,
      "loss": 2.9331,
      "step": 30102
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.18198299407959,
      "learning_rate": 0.0005750811326137502,
      "loss": 3.091,
      "step": 30103
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6320246458053589,
      "learning_rate": 0.000575079500323991,
      "loss": 2.9744,
      "step": 30104
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5074464082717896,
      "learning_rate": 0.0005750778679830894,
      "loss": 3.0054,
      "step": 30105
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8630647659301758,
      "learning_rate": 0.0005750762355910456,
      "loss": 3.0106,
      "step": 30106
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1930723190307617,
      "learning_rate": 0.00057507460314786,
      "loss": 3.0485,
      "step": 30107
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6151419878005981,
      "learning_rate": 0.0005750729706535326,
      "loss": 2.9659,
      "step": 30108
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7556195259094238,
      "learning_rate": 0.0005750713381080641,
      "loss": 3.0439,
      "step": 30109
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6791399717330933,
      "learning_rate": 0.0005750697055114547,
      "loss": 3.0258,
      "step": 30110
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7954256534576416,
      "learning_rate": 0.0005750680728637045,
      "loss": 3.0114,
      "step": 30111
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6672841310501099,
      "learning_rate": 0.0005750664401648142,
      "loss": 3.3105,
      "step": 30112
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2143945693969727,
      "learning_rate": 0.0005750648074147836,
      "loss": 3.3971,
      "step": 30113
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.521968960762024,
      "learning_rate": 0.0005750631746136134,
      "loss": 3.2823,
      "step": 30114
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.517138123512268,
      "learning_rate": 0.0005750615417613038,
      "loss": 2.9409,
      "step": 30115
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5266684293746948,
      "learning_rate": 0.0005750599088578549,
      "loss": 2.8852,
      "step": 30116
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6517013311386108,
      "learning_rate": 0.0005750582759032673,
      "loss": 3.1866,
      "step": 30117
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1966437101364136,
      "learning_rate": 0.0005750566428975411,
      "loss": 3.0991,
      "step": 30118
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4036962985992432,
      "learning_rate": 0.0005750550098406767,
      "loss": 3.1033,
      "step": 30119
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5429068803787231,
      "learning_rate": 0.0005750533767326744,
      "loss": 2.9852,
      "step": 30120
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.4308433532714844,
      "learning_rate": 0.0005750517435735345,
      "loss": 2.9656,
      "step": 30121
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.338617205619812,
      "learning_rate": 0.0005750501103632572,
      "loss": 2.8397,
      "step": 30122
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6068400144577026,
      "learning_rate": 0.0005750484771018429,
      "loss": 3.2522,
      "step": 30123
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2925431728363037,
      "learning_rate": 0.0005750468437892921,
      "loss": 3.0596,
      "step": 30124
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5285699367523193,
      "learning_rate": 0.0005750452104256047,
      "loss": 3.0353,
      "step": 30125
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9678866863250732,
      "learning_rate": 0.0005750435770107813,
      "loss": 3.0015,
      "step": 30126
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3889050483703613,
      "learning_rate": 0.000575041943544822,
      "loss": 3.0787,
      "step": 30127
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5517886877059937,
      "learning_rate": 0.0005750403100277273,
      "loss": 2.9672,
      "step": 30128
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3914332389831543,
      "learning_rate": 0.0005750386764594974,
      "loss": 3.077,
      "step": 30129
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9734851121902466,
      "learning_rate": 0.0005750370428401326,
      "loss": 3.1948,
      "step": 30130
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.727431297302246,
      "learning_rate": 0.0005750354091696333,
      "loss": 2.998,
      "step": 30131
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7051903009414673,
      "learning_rate": 0.0005750337754479996,
      "loss": 2.9395,
      "step": 30132
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9927014112472534,
      "learning_rate": 0.000575032141675232,
      "loss": 3.0213,
      "step": 30133
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6458461284637451,
      "learning_rate": 0.0005750305078513307,
      "loss": 3.1173,
      "step": 30134
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.475927710533142,
      "learning_rate": 0.0005750288739762961,
      "loss": 3.0516,
      "step": 30135
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2952179908752441,
      "learning_rate": 0.0005750272400501284,
      "loss": 3.3734,
      "step": 30136
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5026628971099854,
      "learning_rate": 0.0005750256060728279,
      "loss": 3.0261,
      "step": 30137
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.488094449043274,
      "learning_rate": 0.000575023972044395,
      "loss": 3.0059,
      "step": 30138
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2813761234283447,
      "learning_rate": 0.00057502233796483,
      "loss": 3.0764,
      "step": 30139
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4837983846664429,
      "learning_rate": 0.0005750207038341331,
      "loss": 3.0429,
      "step": 30140
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3988628387451172,
      "learning_rate": 0.0005750190696523046,
      "loss": 2.9809,
      "step": 30141
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4024914503097534,
      "learning_rate": 0.000575017435419345,
      "loss": 3.396,
      "step": 30142
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.983109712600708,
      "learning_rate": 0.0005750158011352544,
      "loss": 3.2067,
      "step": 30143
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3657891750335693,
      "learning_rate": 0.0005750141668000331,
      "loss": 3.1372,
      "step": 30144
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.047541618347168,
      "learning_rate": 0.0005750125324136816,
      "loss": 3.1692,
      "step": 30145
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8880794048309326,
      "learning_rate": 0.0005750108979762,
      "loss": 3.0789,
      "step": 30146
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3248648643493652,
      "learning_rate": 0.0005750092634875887,
      "loss": 3.268,
      "step": 30147
    },
    {
      "epoch": 0.39,
      "grad_norm": 3.426224708557129,
      "learning_rate": 0.000575007628947848,
      "loss": 3.194,
      "step": 30148
    },
    {
      "epoch": 0.39,
      "grad_norm": 3.708951950073242,
      "learning_rate": 0.0005750059943569781,
      "loss": 3.2958,
      "step": 30149
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.434139370918274,
      "learning_rate": 0.0005750043597149794,
      "loss": 3.3179,
      "step": 30150
    },
    {
      "epoch": 0.39,
      "grad_norm": 3.1510488986968994,
      "learning_rate": 0.0005750027250218524,
      "loss": 3.0125,
      "step": 30151
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9501888751983643,
      "learning_rate": 0.000575001090277597,
      "loss": 3.1857,
      "step": 30152
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.986161947250366,
      "learning_rate": 0.0005749994554822139,
      "loss": 2.9589,
      "step": 30153
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3840352296829224,
      "learning_rate": 0.000574997820635703,
      "loss": 3.1391,
      "step": 30154
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.92831289768219,
      "learning_rate": 0.0005749961857380649,
      "loss": 3.227,
      "step": 30155
    },
    {
      "epoch": 0.39,
      "grad_norm": 3.115020275115967,
      "learning_rate": 0.0005749945507892998,
      "loss": 3.0816,
      "step": 30156
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7693840265274048,
      "learning_rate": 0.000574992915789408,
      "loss": 3.2868,
      "step": 30157
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4119598865509033,
      "learning_rate": 0.00057499128073839,
      "loss": 3.0026,
      "step": 30158
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9865293502807617,
      "learning_rate": 0.0005749896456362458,
      "loss": 3.2622,
      "step": 30159
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.7517683506011963,
      "learning_rate": 0.0005749880104829758,
      "loss": 3.1841,
      "step": 30160
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6266157627105713,
      "learning_rate": 0.0005749863752785803,
      "loss": 3.2999,
      "step": 30161
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.708632469177246,
      "learning_rate": 0.0005749847400230597,
      "loss": 3.2551,
      "step": 30162
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.1927231550216675,
      "learning_rate": 0.0005749831047164143,
      "loss": 3.16,
      "step": 30163
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.443451166152954,
      "learning_rate": 0.0005749814693586443,
      "loss": 3.2193,
      "step": 30164
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8730145692825317,
      "learning_rate": 0.0005749798339497501,
      "loss": 3.0763,
      "step": 30165
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6331849098205566,
      "learning_rate": 0.0005749781984897319,
      "loss": 3.0535,
      "step": 30166
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7347387075424194,
      "learning_rate": 0.0005749765629785901,
      "loss": 2.6049,
      "step": 30167
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4159027338027954,
      "learning_rate": 0.000574974927416325,
      "loss": 2.949,
      "step": 30168
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.3942952156066895,
      "learning_rate": 0.0005749732918029367,
      "loss": 3.1866,
      "step": 30169
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2613435983657837,
      "learning_rate": 0.0005749716561384259,
      "loss": 3.0748,
      "step": 30170
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1491785049438477,
      "learning_rate": 0.0005749700204227925,
      "loss": 3.0903,
      "step": 30171
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4407211542129517,
      "learning_rate": 0.0005749683846560371,
      "loss": 3.4171,
      "step": 30172
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.06888747215271,
      "learning_rate": 0.0005749667488381599,
      "loss": 3.0541,
      "step": 30173
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3335646390914917,
      "learning_rate": 0.000574965112969161,
      "loss": 2.9075,
      "step": 30174
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.411556601524353,
      "learning_rate": 0.0005749634770490412,
      "loss": 2.9772,
      "step": 30175
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.730993628501892,
      "learning_rate": 0.0005749618410778002,
      "loss": 3.0845,
      "step": 30176
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5498065948486328,
      "learning_rate": 0.0005749602050554388,
      "loss": 3.0487,
      "step": 30177
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6652346849441528,
      "learning_rate": 0.000574958568981957,
      "loss": 3.0916,
      "step": 30178
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4013512134552002,
      "learning_rate": 0.0005749569328573552,
      "loss": 3.2986,
      "step": 30179
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3771913051605225,
      "learning_rate": 0.0005749552966816338,
      "loss": 2.9693,
      "step": 30180
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7249994277954102,
      "learning_rate": 0.000574953660454793,
      "loss": 2.9766,
      "step": 30181
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.512462854385376,
      "learning_rate": 0.0005749520241768332,
      "loss": 3.0674,
      "step": 30182
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4960875511169434,
      "learning_rate": 0.0005749503878477544,
      "loss": 3.3698,
      "step": 30183
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.600121259689331,
      "learning_rate": 0.0005749487514675574,
      "loss": 3.3052,
      "step": 30184
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6844075918197632,
      "learning_rate": 0.0005749471150362421,
      "loss": 2.981,
      "step": 30185
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7812747955322266,
      "learning_rate": 0.0005749454785538089,
      "loss": 3.0664,
      "step": 30186
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6982475519180298,
      "learning_rate": 0.0005749438420202583,
      "loss": 3.0704,
      "step": 30187
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3244531154632568,
      "learning_rate": 0.0005749422054355903,
      "loss": 2.619,
      "step": 30188
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.5153534412384033,
      "learning_rate": 0.0005749405687998053,
      "loss": 2.9447,
      "step": 30189
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2052242755889893,
      "learning_rate": 0.0005749389321129038,
      "loss": 3.1319,
      "step": 30190
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7907055616378784,
      "learning_rate": 0.0005749372953748859,
      "loss": 3.0532,
      "step": 30191
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.10064697265625,
      "learning_rate": 0.0005749356585857521,
      "loss": 2.9907,
      "step": 30192
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.737783670425415,
      "learning_rate": 0.0005749340217455024,
      "loss": 3.2967,
      "step": 30193
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.64302396774292,
      "learning_rate": 0.0005749323848541374,
      "loss": 3.088,
      "step": 30194
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5580228567123413,
      "learning_rate": 0.0005749307479116572,
      "loss": 3.0487,
      "step": 30195
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.358309030532837,
      "learning_rate": 0.0005749291109180621,
      "loss": 2.9447,
      "step": 30196
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8475676774978638,
      "learning_rate": 0.0005749274738733527,
      "loss": 3.2072,
      "step": 30197
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5841155052185059,
      "learning_rate": 0.000574925836777529,
      "loss": 3.2481,
      "step": 30198
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5144304037094116,
      "learning_rate": 0.0005749241996305913,
      "loss": 2.9606,
      "step": 30199
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4952447414398193,
      "learning_rate": 0.0005749225624325401,
      "loss": 3.0277,
      "step": 30200
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8846728801727295,
      "learning_rate": 0.0005749209251833756,
      "loss": 3.3282,
      "step": 30201
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4817023277282715,
      "learning_rate": 0.000574919287883098,
      "loss": 2.7498,
      "step": 30202
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6974647045135498,
      "learning_rate": 0.0005749176505317079,
      "loss": 3.1644,
      "step": 30203
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3657631874084473,
      "learning_rate": 0.0005749160131292054,
      "loss": 3.112,
      "step": 30204
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5231139659881592,
      "learning_rate": 0.0005749143756755906,
      "loss": 2.8601,
      "step": 30205
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.050729274749756,
      "learning_rate": 0.0005749127381708643,
      "loss": 3.0672,
      "step": 30206
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5068851709365845,
      "learning_rate": 0.0005749111006150264,
      "loss": 2.909,
      "step": 30207
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3012356758117676,
      "learning_rate": 0.0005749094630080773,
      "loss": 3.053,
      "step": 30208
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.733062505722046,
      "learning_rate": 0.0005749078253500174,
      "loss": 3.2387,
      "step": 30209
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3901009559631348,
      "learning_rate": 0.0005749061876408469,
      "loss": 3.0575,
      "step": 30210
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6710790395736694,
      "learning_rate": 0.0005749045498805662,
      "loss": 3.1298,
      "step": 30211
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6098036766052246,
      "learning_rate": 0.0005749029120691756,
      "loss": 3.2694,
      "step": 30212
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.436346411705017,
      "learning_rate": 0.0005749012742066753,
      "loss": 3.1159,
      "step": 30213
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6044063568115234,
      "learning_rate": 0.0005748996362930657,
      "loss": 2.9523,
      "step": 30214
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6952999830245972,
      "learning_rate": 0.000574897998328347,
      "loss": 3.0537,
      "step": 30215
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5475472211837769,
      "learning_rate": 0.0005748963603125196,
      "loss": 2.9688,
      "step": 30216
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8464267253875732,
      "learning_rate": 0.0005748947222455838,
      "loss": 2.9566,
      "step": 30217
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.586644411087036,
      "learning_rate": 0.0005748930841275398,
      "loss": 3.1018,
      "step": 30218
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.537803292274475,
      "learning_rate": 0.0005748914459583882,
      "loss": 3.0723,
      "step": 30219
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6338484287261963,
      "learning_rate": 0.0005748898077381289,
      "loss": 2.8767,
      "step": 30220
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.456620693206787,
      "learning_rate": 0.0005748881694667624,
      "loss": 3.2015,
      "step": 30221
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.8852555751800537,
      "learning_rate": 0.0005748865311442891,
      "loss": 3.189,
      "step": 30222
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4959235191345215,
      "learning_rate": 0.0005748848927707091,
      "loss": 2.9534,
      "step": 30223
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1615512371063232,
      "learning_rate": 0.0005748832543460229,
      "loss": 2.9801,
      "step": 30224
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.711357593536377,
      "learning_rate": 0.0005748816158702306,
      "loss": 3.4297,
      "step": 30225
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4718077182769775,
      "learning_rate": 0.0005748799773433327,
      "loss": 3.2768,
      "step": 30226
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6089553833007812,
      "learning_rate": 0.0005748783387653294,
      "loss": 3.2183,
      "step": 30227
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.5075321197509766,
      "learning_rate": 0.0005748767001362211,
      "loss": 3.0632,
      "step": 30228
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4137569665908813,
      "learning_rate": 0.000574875061456008,
      "loss": 3.0327,
      "step": 30229
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.494143009185791,
      "learning_rate": 0.0005748734227246904,
      "loss": 3.0055,
      "step": 30230
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6577568054199219,
      "learning_rate": 0.0005748717839422687,
      "loss": 2.8446,
      "step": 30231
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3219082355499268,
      "learning_rate": 0.0005748701451087431,
      "loss": 3.0766,
      "step": 30232
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2347707748413086,
      "learning_rate": 0.0005748685062241139,
      "loss": 2.9873,
      "step": 30233
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.4444446563720703,
      "learning_rate": 0.0005748668672883816,
      "loss": 3.0144,
      "step": 30234
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5376108884811401,
      "learning_rate": 0.0005748652283015461,
      "loss": 3.1521,
      "step": 30235
    },
    {
      "epoch": 0.39,
      "grad_norm": 3.0386009216308594,
      "learning_rate": 0.0005748635892636083,
      "loss": 3.1552,
      "step": 30236
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7843319177627563,
      "learning_rate": 0.0005748619501745679,
      "loss": 3.0489,
      "step": 30237
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2693055868148804,
      "learning_rate": 0.0005748603110344256,
      "loss": 3.3604,
      "step": 30238
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5698350667953491,
      "learning_rate": 0.0005748586718431816,
      "loss": 3.0272,
      "step": 30239
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4455736875534058,
      "learning_rate": 0.0005748570326008361,
      "loss": 3.0741,
      "step": 30240
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.367879867553711,
      "learning_rate": 0.0005748553933073896,
      "loss": 2.9966,
      "step": 30241
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3887183666229248,
      "learning_rate": 0.0005748537539628422,
      "loss": 3.1516,
      "step": 30242
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9561880826950073,
      "learning_rate": 0.0005748521145671943,
      "loss": 2.961,
      "step": 30243
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8799909353256226,
      "learning_rate": 0.0005748504751204463,
      "loss": 3.2571,
      "step": 30244
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5921579599380493,
      "learning_rate": 0.0005748488356225983,
      "loss": 3.1448,
      "step": 30245
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9090473651885986,
      "learning_rate": 0.0005748471960736508,
      "loss": 3.2584,
      "step": 30246
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7084354162216187,
      "learning_rate": 0.000574845556473604,
      "loss": 2.9571,
      "step": 30247
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5684528350830078,
      "learning_rate": 0.0005748439168224581,
      "loss": 2.9391,
      "step": 30248
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.304556965827942,
      "learning_rate": 0.0005748422771202136,
      "loss": 2.83,
      "step": 30249
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.755194664001465,
      "learning_rate": 0.0005748406373668708,
      "loss": 3.0163,
      "step": 30250
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.7946221828460693,
      "learning_rate": 0.00057483899756243,
      "loss": 3.1198,
      "step": 30251
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3944844007492065,
      "learning_rate": 0.0005748373577068913,
      "loss": 3.1226,
      "step": 30252
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6111873388290405,
      "learning_rate": 0.0005748357178002551,
      "loss": 3.1736,
      "step": 30253
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.126136541366577,
      "learning_rate": 0.0005748340778425218,
      "loss": 3.3674,
      "step": 30254
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.779422402381897,
      "learning_rate": 0.0005748324378336917,
      "loss": 3.0561,
      "step": 30255
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2142839431762695,
      "learning_rate": 0.0005748307977737652,
      "loss": 3.0311,
      "step": 30256
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6122697591781616,
      "learning_rate": 0.0005748291576627421,
      "loss": 3.1357,
      "step": 30257
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.927112102508545,
      "learning_rate": 0.0005748275175006234,
      "loss": 3.0577,
      "step": 30258
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.177666187286377,
      "learning_rate": 0.0005748258772874089,
      "loss": 2.8212,
      "step": 30259
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9564182758331299,
      "learning_rate": 0.0005748242370230992,
      "loss": 2.9673,
      "step": 30260
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4202338457107544,
      "learning_rate": 0.0005748225967076944,
      "loss": 3.1353,
      "step": 30261
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5244487524032593,
      "learning_rate": 0.000574820956341195,
      "loss": 3.2649,
      "step": 30262
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4084590673446655,
      "learning_rate": 0.0005748193159236011,
      "loss": 3.0268,
      "step": 30263
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.999050498008728,
      "learning_rate": 0.0005748176754549131,
      "loss": 3.0317,
      "step": 30264
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5908582210540771,
      "learning_rate": 0.0005748160349351313,
      "loss": 3.1259,
      "step": 30265
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7706290483474731,
      "learning_rate": 0.0005748143943642561,
      "loss": 3.0874,
      "step": 30266
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4645848274230957,
      "learning_rate": 0.0005748127537422876,
      "loss": 3.0711,
      "step": 30267
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.611897349357605,
      "learning_rate": 0.0005748111130692263,
      "loss": 3.1548,
      "step": 30268
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.390885353088379,
      "learning_rate": 0.0005748094723450725,
      "loss": 3.204,
      "step": 30269
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4249792098999023,
      "learning_rate": 0.0005748078315698263,
      "loss": 3.0441,
      "step": 30270
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4302902221679688,
      "learning_rate": 0.0005748061907434881,
      "loss": 2.9801,
      "step": 30271
    },
    {
      "epoch": 0.39,
      "grad_norm": 3.7086939811706543,
      "learning_rate": 0.0005748045498660584,
      "loss": 3.0814,
      "step": 30272
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7761003971099854,
      "learning_rate": 0.0005748029089375373,
      "loss": 3.1866,
      "step": 30273
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.2925565242767334,
      "learning_rate": 0.0005748012679579252,
      "loss": 3.258,
      "step": 30274
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.4299476146698,
      "learning_rate": 0.0005747996269272222,
      "loss": 3.2135,
      "step": 30275
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.0298891067504883,
      "learning_rate": 0.0005747979858454288,
      "loss": 2.7916,
      "step": 30276
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.211179256439209,
      "learning_rate": 0.0005747963447125453,
      "loss": 2.9847,
      "step": 30277
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.409095525741577,
      "learning_rate": 0.000574794703528572,
      "loss": 2.8589,
      "step": 30278
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3689332008361816,
      "learning_rate": 0.0005747930622935092,
      "loss": 3.2615,
      "step": 30279
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3708122968673706,
      "learning_rate": 0.0005747914210073571,
      "loss": 3.052,
      "step": 30280
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.2422057390213013,
      "learning_rate": 0.0005747897796701161,
      "loss": 2.9983,
      "step": 30281
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.325444221496582,
      "learning_rate": 0.0005747881382817865,
      "loss": 3.0315,
      "step": 30282
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7137703895568848,
      "learning_rate": 0.0005747864968423687,
      "loss": 3.0814,
      "step": 30283
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3618860244750977,
      "learning_rate": 0.0005747848553518627,
      "loss": 3.0041,
      "step": 30284
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9343593120574951,
      "learning_rate": 0.0005747832138102691,
      "loss": 3.1174,
      "step": 30285
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.372111439704895,
      "learning_rate": 0.0005747815722175882,
      "loss": 3.0739,
      "step": 30286
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5649831295013428,
      "learning_rate": 0.0005747799305738202,
      "loss": 3.2114,
      "step": 30287
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3878633975982666,
      "learning_rate": 0.0005747782888789654,
      "loss": 2.9207,
      "step": 30288
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3590037822723389,
      "learning_rate": 0.0005747766471330242,
      "loss": 2.9335,
      "step": 30289
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5224521160125732,
      "learning_rate": 0.0005747750053359966,
      "loss": 2.9177,
      "step": 30290
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5577294826507568,
      "learning_rate": 0.0005747733634878833,
      "loss": 3.0912,
      "step": 30291
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5782440900802612,
      "learning_rate": 0.0005747717215886845,
      "loss": 3.0177,
      "step": 30292
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.608025312423706,
      "learning_rate": 0.0005747700796384004,
      "loss": 3.2276,
      "step": 30293
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.07350754737854,
      "learning_rate": 0.0005747684376370312,
      "loss": 3.1208,
      "step": 30294
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5016708374023438,
      "learning_rate": 0.0005747667955845777,
      "loss": 3.1881,
      "step": 30295
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5156328678131104,
      "learning_rate": 0.0005747651534810396,
      "loss": 3.0359,
      "step": 30296
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3292819261550903,
      "learning_rate": 0.0005747635113264176,
      "loss": 3.4234,
      "step": 30297
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8945212364196777,
      "learning_rate": 0.0005747618691207117,
      "loss": 2.8631,
      "step": 30298
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.720604658126831,
      "learning_rate": 0.0005747602268639225,
      "loss": 3.1783,
      "step": 30299
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.720194697380066,
      "learning_rate": 0.0005747585845560502,
      "loss": 3.0888,
      "step": 30300
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.104189157485962,
      "learning_rate": 0.000574756942197095,
      "loss": 3.1618,
      "step": 30301
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5797356367111206,
      "learning_rate": 0.0005747552997870575,
      "loss": 3.1664,
      "step": 30302
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.368524432182312,
      "learning_rate": 0.0005747536573259376,
      "loss": 3.1518,
      "step": 30303
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7980602979660034,
      "learning_rate": 0.000574752014813736,
      "loss": 3.1124,
      "step": 30304
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4791278839111328,
      "learning_rate": 0.0005747503722504526,
      "loss": 3.0334,
      "step": 30305
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4903898239135742,
      "learning_rate": 0.000574748729636088,
      "loss": 3.0113,
      "step": 30306
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4639755487442017,
      "learning_rate": 0.0005747470869706425,
      "loss": 3.1241,
      "step": 30307
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4606297016143799,
      "learning_rate": 0.0005747454442541162,
      "loss": 3.161,
      "step": 30308
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6467833518981934,
      "learning_rate": 0.0005747438014865096,
      "loss": 3.0678,
      "step": 30309
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8753281831741333,
      "learning_rate": 0.0005747421586678231,
      "loss": 2.9732,
      "step": 30310
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5034359693527222,
      "learning_rate": 0.0005747405157980566,
      "loss": 2.8934,
      "step": 30311
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4087285995483398,
      "learning_rate": 0.0005747388728772108,
      "loss": 3.1098,
      "step": 30312
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5018689632415771,
      "learning_rate": 0.0005747372299052858,
      "loss": 3.013,
      "step": 30313
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.4155619144439697,
      "learning_rate": 0.000574735586882282,
      "loss": 2.9156,
      "step": 30314
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.122786283493042,
      "learning_rate": 0.0005747339438081996,
      "loss": 2.9471,
      "step": 30315
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.527735948562622,
      "learning_rate": 0.000574732300683039,
      "loss": 3.2247,
      "step": 30316
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.1441781520843506,
      "learning_rate": 0.0005747306575068005,
      "loss": 3.0599,
      "step": 30317
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3961570262908936,
      "learning_rate": 0.0005747290142794843,
      "loss": 3.0903,
      "step": 30318
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3484383821487427,
      "learning_rate": 0.0005747273710010909,
      "loss": 3.1699,
      "step": 30319
    },
    {
      "epoch": 0.39,
      "grad_norm": 2.0247979164123535,
      "learning_rate": 0.0005747257276716205,
      "loss": 3.0097,
      "step": 30320
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5236084461212158,
      "learning_rate": 0.0005747240842910733,
      "loss": 2.8269,
      "step": 30321
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.530847430229187,
      "learning_rate": 0.0005747224408594498,
      "loss": 2.8372,
      "step": 30322
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7393736839294434,
      "learning_rate": 0.0005747207973767503,
      "loss": 3.1383,
      "step": 30323
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.6190953254699707,
      "learning_rate": 0.0005747191538429748,
      "loss": 3.0394,
      "step": 30324
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3960644006729126,
      "learning_rate": 0.0005747175102581239,
      "loss": 2.9125,
      "step": 30325
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.7527269124984741,
      "learning_rate": 0.0005747158666221979,
      "loss": 3.231,
      "step": 30326
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5496537685394287,
      "learning_rate": 0.000574714222935197,
      "loss": 3.2978,
      "step": 30327
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.8658056259155273,
      "learning_rate": 0.0005747125791971215,
      "loss": 3.0962,
      "step": 30328
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.479079008102417,
      "learning_rate": 0.0005747109354079718,
      "loss": 3.1596,
      "step": 30329
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.255468726158142,
      "learning_rate": 0.0005747092915677481,
      "loss": 3.0913,
      "step": 30330
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.609833836555481,
      "learning_rate": 0.0005747076476764508,
      "loss": 3.0996,
      "step": 30331
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.423291563987732,
      "learning_rate": 0.0005747060037340801,
      "loss": 3.1873,
      "step": 30332
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.9880321025848389,
      "learning_rate": 0.0005747043597406365,
      "loss": 2.9194,
      "step": 30333
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.5949795246124268,
      "learning_rate": 0.0005747027156961201,
      "loss": 3.0739,
      "step": 30334
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.733816385269165,
      "learning_rate": 0.0005747010716005312,
      "loss": 2.9785,
      "step": 30335
    },
    {
      "epoch": 0.39,
      "grad_norm": 1.3664768934249878,
      "learning_rate": 0.0005746994274538703,
      "loss": 3.1452,
      "step": 30336
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4003245830535889,
      "learning_rate": 0.0005746977832561376,
      "loss": 2.9989,
      "step": 30337
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2740569114685059,
      "learning_rate": 0.0005746961390073333,
      "loss": 2.9599,
      "step": 30338
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.397676944732666,
      "learning_rate": 0.0005746944947074579,
      "loss": 3.1745,
      "step": 30339
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4616180658340454,
      "learning_rate": 0.0005746928503565115,
      "loss": 3.286,
      "step": 30340
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.469085454940796,
      "learning_rate": 0.0005746912059544946,
      "loss": 3.0218,
      "step": 30341
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.033581495285034,
      "learning_rate": 0.0005746895615014074,
      "loss": 3.1589,
      "step": 30342
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5221396684646606,
      "learning_rate": 0.0005746879169972501,
      "loss": 3.1902,
      "step": 30343
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6771396398544312,
      "learning_rate": 0.0005746862724420233,
      "loss": 2.6535,
      "step": 30344
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8546870946884155,
      "learning_rate": 0.0005746846278357271,
      "loss": 3.0973,
      "step": 30345
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.6925246715545654,
      "learning_rate": 0.0005746829831783617,
      "loss": 2.9986,
      "step": 30346
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8860538005828857,
      "learning_rate": 0.0005746813384699277,
      "loss": 3.119,
      "step": 30347
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.492192506790161,
      "learning_rate": 0.0005746796937104253,
      "loss": 2.989,
      "step": 30348
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7089357376098633,
      "learning_rate": 0.0005746780488998547,
      "loss": 3.0781,
      "step": 30349
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2326997518539429,
      "learning_rate": 0.0005746764040382162,
      "loss": 3.1918,
      "step": 30350
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.6783266067504883,
      "learning_rate": 0.0005746747591255102,
      "loss": 3.1154,
      "step": 30351
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.856346607208252,
      "learning_rate": 0.0005746731141617371,
      "loss": 3.2204,
      "step": 30352
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6389707326889038,
      "learning_rate": 0.0005746714691468969,
      "loss": 2.8439,
      "step": 30353
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.559509515762329,
      "learning_rate": 0.0005746698240809902,
      "loss": 3.1623,
      "step": 30354
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.6701772212982178,
      "learning_rate": 0.0005746681789640172,
      "loss": 2.9522,
      "step": 30355
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.5628132820129395,
      "learning_rate": 0.0005746665337959782,
      "loss": 2.9599,
      "step": 30356
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4290093183517456,
      "learning_rate": 0.0005746648885768733,
      "loss": 2.9051,
      "step": 30357
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5197336673736572,
      "learning_rate": 0.0005746632433067032,
      "loss": 3.1158,
      "step": 30358
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.467363715171814,
      "learning_rate": 0.0005746615979854681,
      "loss": 3.0362,
      "step": 30359
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8360568284988403,
      "learning_rate": 0.0005746599526131682,
      "loss": 3.0896,
      "step": 30360
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.430583119392395,
      "learning_rate": 0.0005746583071898037,
      "loss": 3.1883,
      "step": 30361
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5405255556106567,
      "learning_rate": 0.0005746566617153751,
      "loss": 2.7743,
      "step": 30362
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5238903760910034,
      "learning_rate": 0.0005746550161898826,
      "loss": 3.0624,
      "step": 30363
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.584611177444458,
      "learning_rate": 0.0005746533706133265,
      "loss": 3.1016,
      "step": 30364
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3845479488372803,
      "learning_rate": 0.0005746517249857073,
      "loss": 3.32,
      "step": 30365
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6198619604110718,
      "learning_rate": 0.0005746500793070251,
      "loss": 3.251,
      "step": 30366
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6149882078170776,
      "learning_rate": 0.0005746484335772804,
      "loss": 2.8829,
      "step": 30367
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5170016288757324,
      "learning_rate": 0.0005746467877964731,
      "loss": 3.1726,
      "step": 30368
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.236291527748108,
      "learning_rate": 0.000574645141964604,
      "loss": 3.0934,
      "step": 30369
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8712306022644043,
      "learning_rate": 0.000574643496081673,
      "loss": 3.0923,
      "step": 30370
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5652378797531128,
      "learning_rate": 0.0005746418501476807,
      "loss": 3.1603,
      "step": 30371
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6445404291152954,
      "learning_rate": 0.0005746402041626273,
      "loss": 3.1441,
      "step": 30372
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.781084418296814,
      "learning_rate": 0.000574638558126513,
      "loss": 2.9412,
      "step": 30373
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2102383375167847,
      "learning_rate": 0.0005746369120393384,
      "loss": 3.0564,
      "step": 30374
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3425650596618652,
      "learning_rate": 0.0005746352659011035,
      "loss": 3.2016,
      "step": 30375
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4939594268798828,
      "learning_rate": 0.0005746336197118087,
      "loss": 3.1414,
      "step": 30376
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.915685772895813,
      "learning_rate": 0.0005746319734714544,
      "loss": 3.1695,
      "step": 30377
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.369247555732727,
      "learning_rate": 0.0005746303271800408,
      "loss": 3.0662,
      "step": 30378
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7380821704864502,
      "learning_rate": 0.0005746286808375682,
      "loss": 3.2521,
      "step": 30379
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5629993677139282,
      "learning_rate": 0.0005746270344440369,
      "loss": 3.1519,
      "step": 30380
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8535414934158325,
      "learning_rate": 0.0005746253879994474,
      "loss": 3.1145,
      "step": 30381
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.533313274383545,
      "learning_rate": 0.0005746237415037998,
      "loss": 3.0289,
      "step": 30382
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.427237629890442,
      "learning_rate": 0.0005746220949570944,
      "loss": 3.0005,
      "step": 30383
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.5118958950042725,
      "learning_rate": 0.0005746204483593316,
      "loss": 2.7627,
      "step": 30384
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6655724048614502,
      "learning_rate": 0.0005746188017105117,
      "loss": 3.1657,
      "step": 30385
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0060932636260986,
      "learning_rate": 0.0005746171550106349,
      "loss": 3.0856,
      "step": 30386
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8381699323654175,
      "learning_rate": 0.0005746155082597017,
      "loss": 3.1851,
      "step": 30387
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.778251051902771,
      "learning_rate": 0.0005746138614577123,
      "loss": 3.1926,
      "step": 30388
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6310443878173828,
      "learning_rate": 0.0005746122146046669,
      "loss": 3.1495,
      "step": 30389
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.881456732749939,
      "learning_rate": 0.000574610567700566,
      "loss": 3.0628,
      "step": 30390
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.45610511302948,
      "learning_rate": 0.0005746089207454096,
      "loss": 3.0752,
      "step": 30391
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.703951120376587,
      "learning_rate": 0.0005746072737391985,
      "loss": 3.3202,
      "step": 30392
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.4307358264923096,
      "learning_rate": 0.0005746056266819326,
      "loss": 3.1644,
      "step": 30393
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.169782876968384,
      "learning_rate": 0.0005746039795736124,
      "loss": 2.8319,
      "step": 30394
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8334581851959229,
      "learning_rate": 0.000574602332414238,
      "loss": 2.9986,
      "step": 30395
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.8111214637756348,
      "learning_rate": 0.0005746006852038099,
      "loss": 3.1441,
      "step": 30396
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.678239107131958,
      "learning_rate": 0.0005745990379423284,
      "loss": 3.179,
      "step": 30397
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.322052001953125,
      "learning_rate": 0.0005745973906297936,
      "loss": 3.1747,
      "step": 30398
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.7799766063690186,
      "learning_rate": 0.0005745957432662061,
      "loss": 3.0266,
      "step": 30399
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.377641201019287,
      "learning_rate": 0.000574594095851566,
      "loss": 3.3546,
      "step": 30400
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7496436834335327,
      "learning_rate": 0.0005745924483858737,
      "loss": 3.0974,
      "step": 30401
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.852684736251831,
      "learning_rate": 0.0005745908008691295,
      "loss": 3.1665,
      "step": 30402
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6446031332015991,
      "learning_rate": 0.0005745891533013338,
      "loss": 3.0456,
      "step": 30403
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.49058198928833,
      "learning_rate": 0.0005745875056824867,
      "loss": 2.9394,
      "step": 30404
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6230779886245728,
      "learning_rate": 0.0005745858580125886,
      "loss": 2.9219,
      "step": 30405
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.513000965118408,
      "learning_rate": 0.0005745842102916397,
      "loss": 2.9949,
      "step": 30406
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.987522006034851,
      "learning_rate": 0.0005745825625196406,
      "loss": 3.0846,
      "step": 30407
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5332845449447632,
      "learning_rate": 0.0005745809146965912,
      "loss": 2.8048,
      "step": 30408
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9414148330688477,
      "learning_rate": 0.0005745792668224921,
      "loss": 2.857,
      "step": 30409
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.8825461864471436,
      "learning_rate": 0.0005745776188973437,
      "loss": 3.1496,
      "step": 30410
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.61603844165802,
      "learning_rate": 0.0005745759709211459,
      "loss": 2.8647,
      "step": 30411
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5991895198822021,
      "learning_rate": 0.0005745743228938994,
      "loss": 3.0615,
      "step": 30412
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3864920139312744,
      "learning_rate": 0.0005745726748156044,
      "loss": 3.2363,
      "step": 30413
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.5210750102996826,
      "learning_rate": 0.0005745710266862609,
      "loss": 2.9861,
      "step": 30414
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.536165714263916,
      "learning_rate": 0.0005745693785058697,
      "loss": 3.168,
      "step": 30415
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.1164278984069824,
      "learning_rate": 0.0005745677302744308,
      "loss": 2.7648,
      "step": 30416
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6712754964828491,
      "learning_rate": 0.0005745660819919446,
      "loss": 3.2493,
      "step": 30417
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9247324466705322,
      "learning_rate": 0.0005745644336584113,
      "loss": 2.9279,
      "step": 30418
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7481374740600586,
      "learning_rate": 0.0005745627852738313,
      "loss": 3.1589,
      "step": 30419
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0322985649108887,
      "learning_rate": 0.000574561136838205,
      "loss": 3.0411,
      "step": 30420
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.040475606918335,
      "learning_rate": 0.0005745594883515326,
      "loss": 3.0646,
      "step": 30421
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4717516899108887,
      "learning_rate": 0.0005745578398138143,
      "loss": 2.8506,
      "step": 30422
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.215390682220459,
      "learning_rate": 0.0005745561912250505,
      "loss": 3.084,
      "step": 30423
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6344279050827026,
      "learning_rate": 0.0005745545425852416,
      "loss": 3.2199,
      "step": 30424
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.885918378829956,
      "learning_rate": 0.0005745528938943879,
      "loss": 3.1443,
      "step": 30425
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3592472076416016,
      "learning_rate": 0.0005745512451524895,
      "loss": 2.981,
      "step": 30426
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6694343090057373,
      "learning_rate": 0.0005745495963595469,
      "loss": 2.8682,
      "step": 30427
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.256631851196289,
      "learning_rate": 0.0005745479475155603,
      "loss": 3.0824,
      "step": 30428
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5971753597259521,
      "learning_rate": 0.0005745462986205301,
      "loss": 3.0452,
      "step": 30429
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9593279361724854,
      "learning_rate": 0.0005745446496744566,
      "loss": 2.9748,
      "step": 30430
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3267745971679688,
      "learning_rate": 0.00057454300067734,
      "loss": 2.8317,
      "step": 30431
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.336701512336731,
      "learning_rate": 0.0005745413516291807,
      "loss": 3.0352,
      "step": 30432
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5053073167800903,
      "learning_rate": 0.000574539702529979,
      "loss": 3.418,
      "step": 30433
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5027832984924316,
      "learning_rate": 0.0005745380533797351,
      "loss": 3.0445,
      "step": 30434
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3382099866867065,
      "learning_rate": 0.0005745364041784494,
      "loss": 2.7884,
      "step": 30435
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.307876467704773,
      "learning_rate": 0.0005745347549261223,
      "loss": 3.0936,
      "step": 30436
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6891463994979858,
      "learning_rate": 0.0005745331056227539,
      "loss": 3.0088,
      "step": 30437
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4684494733810425,
      "learning_rate": 0.0005745314562683447,
      "loss": 3.1863,
      "step": 30438
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5169779062271118,
      "learning_rate": 0.0005745298068628949,
      "loss": 3.1409,
      "step": 30439
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5077100992202759,
      "learning_rate": 0.0005745281574064049,
      "loss": 3.2244,
      "step": 30440
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.007486343383789,
      "learning_rate": 0.0005745265078988749,
      "loss": 3.1995,
      "step": 30441
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.564527153968811,
      "learning_rate": 0.0005745248583403051,
      "loss": 3.2994,
      "step": 30442
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2064050436019897,
      "learning_rate": 0.000574523208730696,
      "loss": 3.1056,
      "step": 30443
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9487148523330688,
      "learning_rate": 0.0005745215590700479,
      "loss": 3.1127,
      "step": 30444
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.0115742683410645,
      "learning_rate": 0.0005745199093583611,
      "loss": 3.175,
      "step": 30445
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6187989711761475,
      "learning_rate": 0.0005745182595956358,
      "loss": 2.9182,
      "step": 30446
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9595884084701538,
      "learning_rate": 0.0005745166097818724,
      "loss": 3.0753,
      "step": 30447
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.527451753616333,
      "learning_rate": 0.0005745149599170711,
      "loss": 3.0302,
      "step": 30448
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.5241446495056152,
      "learning_rate": 0.0005745133100012324,
      "loss": 3.0002,
      "step": 30449
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3516960144042969,
      "learning_rate": 0.0005745116600343564,
      "loss": 3.1089,
      "step": 30450
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8322103023529053,
      "learning_rate": 0.0005745100100164436,
      "loss": 3.2242,
      "step": 30451
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9750263690948486,
      "learning_rate": 0.0005745083599474942,
      "loss": 3.2076,
      "step": 30452
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0448436737060547,
      "learning_rate": 0.0005745067098275084,
      "loss": 2.8511,
      "step": 30453
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4033317565917969,
      "learning_rate": 0.0005745050596564868,
      "loss": 3.0436,
      "step": 30454
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5819945335388184,
      "learning_rate": 0.0005745034094344294,
      "loss": 3.2604,
      "step": 30455
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.427380323410034,
      "learning_rate": 0.0005745017591613366,
      "loss": 3.0395,
      "step": 30456
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4096592664718628,
      "learning_rate": 0.0005745001088372088,
      "loss": 3.0596,
      "step": 30457
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5146842002868652,
      "learning_rate": 0.0005744984584620463,
      "loss": 3.123,
      "step": 30458
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.769291639328003,
      "learning_rate": 0.0005744968080358492,
      "loss": 3.4302,
      "step": 30459
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.2826879024505615,
      "learning_rate": 0.0005744951575586181,
      "loss": 3.0049,
      "step": 30460
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6759724617004395,
      "learning_rate": 0.0005744935070303532,
      "loss": 2.9947,
      "step": 30461
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4928852319717407,
      "learning_rate": 0.0005744918564510546,
      "loss": 3.3023,
      "step": 30462
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3664402961730957,
      "learning_rate": 0.000574490205820723,
      "loss": 3.0243,
      "step": 30463
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7296584844589233,
      "learning_rate": 0.0005744885551393584,
      "loss": 3.0076,
      "step": 30464
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5833723545074463,
      "learning_rate": 0.0005744869044069611,
      "loss": 3.1722,
      "step": 30465
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.839037299156189,
      "learning_rate": 0.0005744852536235316,
      "loss": 3.1687,
      "step": 30466
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7063161134719849,
      "learning_rate": 0.0005744836027890702,
      "loss": 3.1176,
      "step": 30467
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.475812315940857,
      "learning_rate": 0.0005744819519035769,
      "loss": 3.1657,
      "step": 30468
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.542536973953247,
      "learning_rate": 0.0005744803009670523,
      "loss": 2.8882,
      "step": 30469
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6093952655792236,
      "learning_rate": 0.0005744786499794967,
      "loss": 3.0311,
      "step": 30470
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5534526109695435,
      "learning_rate": 0.0005744769989409103,
      "loss": 3.0949,
      "step": 30471
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.395433783531189,
      "learning_rate": 0.0005744753478512934,
      "loss": 2.8859,
      "step": 30472
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5485317707061768,
      "learning_rate": 0.0005744736967106465,
      "loss": 2.94,
      "step": 30473
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1765859127044678,
      "learning_rate": 0.0005744720455189696,
      "loss": 3.1583,
      "step": 30474
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.287180185317993,
      "learning_rate": 0.0005744703942762632,
      "loss": 3.1362,
      "step": 30475
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3910634517669678,
      "learning_rate": 0.0005744687429825277,
      "loss": 3.1101,
      "step": 30476
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3338682651519775,
      "learning_rate": 0.0005744670916377631,
      "loss": 2.9182,
      "step": 30477
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3326343297958374,
      "learning_rate": 0.00057446544024197,
      "loss": 3.1155,
      "step": 30478
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4173688888549805,
      "learning_rate": 0.0005744637887951485,
      "loss": 2.9534,
      "step": 30479
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.683869481086731,
      "learning_rate": 0.000574462137297299,
      "loss": 2.9461,
      "step": 30480
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7527382373809814,
      "learning_rate": 0.0005744604857484219,
      "loss": 3.1052,
      "step": 30481
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6265665292739868,
      "learning_rate": 0.0005744588341485174,
      "loss": 3.1635,
      "step": 30482
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.6078171730041504,
      "learning_rate": 0.0005744571824975858,
      "loss": 3.1412,
      "step": 30483
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.2796995639801025,
      "learning_rate": 0.0005744555307956274,
      "loss": 3.1588,
      "step": 30484
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.6979997158050537,
      "learning_rate": 0.0005744538790426425,
      "loss": 3.3032,
      "step": 30485
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9710121154785156,
      "learning_rate": 0.0005744522272386316,
      "loss": 2.9852,
      "step": 30486
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.88457989692688,
      "learning_rate": 0.0005744505753835947,
      "loss": 3.1432,
      "step": 30487
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.353004217147827,
      "learning_rate": 0.0005744489234775324,
      "loss": 2.7777,
      "step": 30488
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5211296081542969,
      "learning_rate": 0.0005744472715204448,
      "loss": 2.9504,
      "step": 30489
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3815665245056152,
      "learning_rate": 0.0005744456195123322,
      "loss": 3.012,
      "step": 30490
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.402139186859131,
      "learning_rate": 0.000574443967453195,
      "loss": 3.2185,
      "step": 30491
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.4084415435791016,
      "learning_rate": 0.0005744423153430336,
      "loss": 2.8558,
      "step": 30492
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6367734670639038,
      "learning_rate": 0.000574440663181848,
      "loss": 3.0918,
      "step": 30493
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6825872659683228,
      "learning_rate": 0.0005744390109696389,
      "loss": 3.1394,
      "step": 30494
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9476484060287476,
      "learning_rate": 0.0005744373587064063,
      "loss": 3.0272,
      "step": 30495
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3997094631195068,
      "learning_rate": 0.0005744357063921506,
      "loss": 3.0781,
      "step": 30496
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.32787024974823,
      "learning_rate": 0.0005744340540268721,
      "loss": 3.2704,
      "step": 30497
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.745403528213501,
      "learning_rate": 0.0005744324016105712,
      "loss": 3.1781,
      "step": 30498
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.0920612812042236,
      "learning_rate": 0.0005744307491432481,
      "loss": 3.1552,
      "step": 30499
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5538350343704224,
      "learning_rate": 0.0005744290966249033,
      "loss": 3.361,
      "step": 30500
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9140493869781494,
      "learning_rate": 0.0005744274440555368,
      "loss": 3.1568,
      "step": 30501
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.0955920219421387,
      "learning_rate": 0.000574425791435149,
      "loss": 2.9811,
      "step": 30502
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.1788229942321777,
      "learning_rate": 0.0005744241387637403,
      "loss": 3.3745,
      "step": 30503
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.622140645980835,
      "learning_rate": 0.000574422486041311,
      "loss": 2.8861,
      "step": 30504
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.7032086849212646,
      "learning_rate": 0.0005744208332678615,
      "loss": 2.8919,
      "step": 30505
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.418882131576538,
      "learning_rate": 0.0005744191804433918,
      "loss": 3.2178,
      "step": 30506
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6397641897201538,
      "learning_rate": 0.0005744175275679025,
      "loss": 3.0875,
      "step": 30507
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0823147296905518,
      "learning_rate": 0.0005744158746413938,
      "loss": 3.1179,
      "step": 30508
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7237578630447388,
      "learning_rate": 0.000574414221663866,
      "loss": 2.9724,
      "step": 30509
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.726511001586914,
      "learning_rate": 0.0005744125686353193,
      "loss": 3.1603,
      "step": 30510
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5975087881088257,
      "learning_rate": 0.0005744109155557543,
      "loss": 2.8886,
      "step": 30511
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1493165493011475,
      "learning_rate": 0.0005744092624251709,
      "loss": 3.0713,
      "step": 30512
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.624516487121582,
      "learning_rate": 0.0005744076092435699,
      "loss": 3.1225,
      "step": 30513
    },
    {
      "epoch": 0.4,
      "grad_norm": 6.7633442878723145,
      "learning_rate": 0.0005744059560109512,
      "loss": 3.118,
      "step": 30514
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0921525955200195,
      "learning_rate": 0.0005744043027273154,
      "loss": 3.0944,
      "step": 30515
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4752867221832275,
      "learning_rate": 0.0005744026493926625,
      "loss": 2.9921,
      "step": 30516
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7608524560928345,
      "learning_rate": 0.000574400996006993,
      "loss": 2.9241,
      "step": 30517
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8635919094085693,
      "learning_rate": 0.0005743993425703072,
      "loss": 2.9098,
      "step": 30518
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5362012386322021,
      "learning_rate": 0.0005743976890826054,
      "loss": 3.101,
      "step": 30519
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.432320237159729,
      "learning_rate": 0.0005743960355438877,
      "loss": 3.1158,
      "step": 30520
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.490867257118225,
      "learning_rate": 0.0005743943819541548,
      "loss": 3.0741,
      "step": 30521
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.178772211074829,
      "learning_rate": 0.0005743927283134068,
      "loss": 3.0838,
      "step": 30522
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6867947578430176,
      "learning_rate": 0.000574391074621644,
      "loss": 2.9623,
      "step": 30523
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9296411275863647,
      "learning_rate": 0.0005743894208788667,
      "loss": 2.9545,
      "step": 30524
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9225913286209106,
      "learning_rate": 0.0005743877670850752,
      "loss": 3.1021,
      "step": 30525
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.1891076564788818,
      "learning_rate": 0.0005743861132402699,
      "loss": 3.0756,
      "step": 30526
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.596171259880066,
      "learning_rate": 0.000574384459344451,
      "loss": 3.0499,
      "step": 30527
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.395716428756714,
      "learning_rate": 0.0005743828053976188,
      "loss": 2.93,
      "step": 30528
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.2746059894561768,
      "learning_rate": 0.0005743811513997738,
      "loss": 3.351,
      "step": 30529
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.450168490409851,
      "learning_rate": 0.0005743794973509161,
      "loss": 2.9076,
      "step": 30530
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.6181321144104004,
      "learning_rate": 0.000574377843251046,
      "loss": 3.0009,
      "step": 30531
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.8303873538970947,
      "learning_rate": 0.0005743761891001638,
      "loss": 2.9666,
      "step": 30532
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6257894039154053,
      "learning_rate": 0.0005743745348982701,
      "loss": 3.038,
      "step": 30533
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5257749557495117,
      "learning_rate": 0.0005743728806453649,
      "loss": 3.0534,
      "step": 30534
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.495126485824585,
      "learning_rate": 0.0005743712263414486,
      "loss": 2.9962,
      "step": 30535
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7015515565872192,
      "learning_rate": 0.0005743695719865215,
      "loss": 3.1108,
      "step": 30536
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5922820568084717,
      "learning_rate": 0.0005743679175805839,
      "loss": 3.185,
      "step": 30537
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.10249662399292,
      "learning_rate": 0.0005743662631236363,
      "loss": 3.1177,
      "step": 30538
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6050388813018799,
      "learning_rate": 0.0005743646086156786,
      "loss": 3.1757,
      "step": 30539
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8045945167541504,
      "learning_rate": 0.0005743629540567114,
      "loss": 3.2613,
      "step": 30540
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5110573768615723,
      "learning_rate": 0.000574361299446735,
      "loss": 2.9783,
      "step": 30541
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.6313247680664062,
      "learning_rate": 0.0005743596447857497,
      "loss": 2.8689,
      "step": 30542
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7156972885131836,
      "learning_rate": 0.0005743579900737557,
      "loss": 3.1518,
      "step": 30543
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.757319688796997,
      "learning_rate": 0.0005743563353107533,
      "loss": 2.881,
      "step": 30544
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3888312578201294,
      "learning_rate": 0.000574354680496743,
      "loss": 3.1415,
      "step": 30545
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.5519261360168457,
      "learning_rate": 0.0005743530256317249,
      "loss": 2.9303,
      "step": 30546
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.058931589126587,
      "learning_rate": 0.0005743513707156994,
      "loss": 2.9251,
      "step": 30547
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6458086967468262,
      "learning_rate": 0.0005743497157486669,
      "loss": 3.2345,
      "step": 30548
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.885326862335205,
      "learning_rate": 0.0005743480607306275,
      "loss": 3.1931,
      "step": 30549
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.215503215789795,
      "learning_rate": 0.0005743464056615817,
      "loss": 3.0448,
      "step": 30550
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5029491186141968,
      "learning_rate": 0.0005743447505415296,
      "loss": 2.9911,
      "step": 30551
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6576606035232544,
      "learning_rate": 0.0005743430953704718,
      "loss": 3.1779,
      "step": 30552
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0931994915008545,
      "learning_rate": 0.0005743414401484083,
      "loss": 3.006,
      "step": 30553
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8124232292175293,
      "learning_rate": 0.0005743397848753396,
      "loss": 3.3479,
      "step": 30554
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.477579116821289,
      "learning_rate": 0.000574338129551266,
      "loss": 2.9804,
      "step": 30555
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5199685096740723,
      "learning_rate": 0.0005743364741761876,
      "loss": 2.9872,
      "step": 30556
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4799892902374268,
      "learning_rate": 0.0005743348187501051,
      "loss": 3.197,
      "step": 30557
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.2437045574188232,
      "learning_rate": 0.0005743331632730185,
      "loss": 2.9615,
      "step": 30558
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.707344889640808,
      "learning_rate": 0.0005743315077449281,
      "loss": 3.1029,
      "step": 30559
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0169732570648193,
      "learning_rate": 0.0005743298521658344,
      "loss": 3.1599,
      "step": 30560
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.479510545730591,
      "learning_rate": 0.0005743281965357375,
      "loss": 3.1875,
      "step": 30561
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.199591875076294,
      "learning_rate": 0.0005743265408546379,
      "loss": 2.977,
      "step": 30562
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6192620992660522,
      "learning_rate": 0.0005743248851225358,
      "loss": 3.3083,
      "step": 30563
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.184272050857544,
      "learning_rate": 0.0005743232293394314,
      "loss": 3.1681,
      "step": 30564
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6186028718948364,
      "learning_rate": 0.0005743215735053253,
      "loss": 3.0316,
      "step": 30565
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9055129289627075,
      "learning_rate": 0.0005743199176202176,
      "loss": 2.9651,
      "step": 30566
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4167293310165405,
      "learning_rate": 0.0005743182616841086,
      "loss": 3.2307,
      "step": 30567
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.846731185913086,
      "learning_rate": 0.0005743166056969987,
      "loss": 3.3171,
      "step": 30568
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6613166332244873,
      "learning_rate": 0.0005743149496588881,
      "loss": 3.4042,
      "step": 30569
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3165172338485718,
      "learning_rate": 0.0005743132935697772,
      "loss": 3.1725,
      "step": 30570
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.391519546508789,
      "learning_rate": 0.0005743116374296663,
      "loss": 3.0077,
      "step": 30571
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8134276866912842,
      "learning_rate": 0.0005743099812385557,
      "loss": 3.0528,
      "step": 30572
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.651487112045288,
      "learning_rate": 0.0005743083249964457,
      "loss": 3.0557,
      "step": 30573
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.568697452545166,
      "learning_rate": 0.0005743066687033366,
      "loss": 3.3311,
      "step": 30574
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5123155117034912,
      "learning_rate": 0.0005743050123592288,
      "loss": 3.1632,
      "step": 30575
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8656105995178223,
      "learning_rate": 0.0005743033559641223,
      "loss": 3.0842,
      "step": 30576
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.1602470874786377,
      "learning_rate": 0.0005743016995180178,
      "loss": 3.3854,
      "step": 30577
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.452146530151367,
      "learning_rate": 0.0005743000430209153,
      "loss": 2.5977,
      "step": 30578
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5030536651611328,
      "learning_rate": 0.0005742983864728154,
      "loss": 2.9676,
      "step": 30579
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.996394157409668,
      "learning_rate": 0.0005742967298737182,
      "loss": 3.0793,
      "step": 30580
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3565704822540283,
      "learning_rate": 0.000574295073223624,
      "loss": 3.2132,
      "step": 30581
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2414851188659668,
      "learning_rate": 0.0005742934165225333,
      "loss": 2.9662,
      "step": 30582
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3677200078964233,
      "learning_rate": 0.0005742917597704462,
      "loss": 3.2971,
      "step": 30583
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.369245171546936,
      "learning_rate": 0.0005742901029673631,
      "loss": 3.0996,
      "step": 30584
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.459433913230896,
      "learning_rate": 0.0005742884461132842,
      "loss": 3.4072,
      "step": 30585
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4935481548309326,
      "learning_rate": 0.00057428678920821,
      "loss": 3.0045,
      "step": 30586
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3112330436706543,
      "learning_rate": 0.0005742851322521407,
      "loss": 2.9974,
      "step": 30587
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5493459701538086,
      "learning_rate": 0.0005742834752450765,
      "loss": 3.0786,
      "step": 30588
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5790736675262451,
      "learning_rate": 0.000574281818187018,
      "loss": 3.2442,
      "step": 30589
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3140459060668945,
      "learning_rate": 0.0005742801610779652,
      "loss": 3.2369,
      "step": 30590
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.395651936531067,
      "learning_rate": 0.0005742785039179186,
      "loss": 3.0287,
      "step": 30591
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9004870653152466,
      "learning_rate": 0.0005742768467068784,
      "loss": 2.5909,
      "step": 30592
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9202563762664795,
      "learning_rate": 0.0005742751894448451,
      "loss": 2.985,
      "step": 30593
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4218885898590088,
      "learning_rate": 0.0005742735321318187,
      "loss": 3.3378,
      "step": 30594
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.829479455947876,
      "learning_rate": 0.0005742718747677998,
      "loss": 3.1727,
      "step": 30595
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.578370451927185,
      "learning_rate": 0.0005742702173527885,
      "loss": 3.0789,
      "step": 30596
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8064440488815308,
      "learning_rate": 0.0005742685598867851,
      "loss": 3.1643,
      "step": 30597
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.4709811210632324,
      "learning_rate": 0.0005742669023697901,
      "loss": 2.947,
      "step": 30598
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6387392282485962,
      "learning_rate": 0.0005742652448018037,
      "loss": 3.0936,
      "step": 30599
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9771960973739624,
      "learning_rate": 0.0005742635871828262,
      "loss": 2.8353,
      "step": 30600
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4590388536453247,
      "learning_rate": 0.000574261929512858,
      "loss": 3.1969,
      "step": 30601
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3126845359802246,
      "learning_rate": 0.0005742602717918993,
      "loss": 3.0866,
      "step": 30602
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3620880842208862,
      "learning_rate": 0.0005742586140199504,
      "loss": 3.2425,
      "step": 30603
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7531999349594116,
      "learning_rate": 0.0005742569561970116,
      "loss": 2.804,
      "step": 30604
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7432783842086792,
      "learning_rate": 0.0005742552983230834,
      "loss": 2.9773,
      "step": 30605
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8714146614074707,
      "learning_rate": 0.0005742536403981659,
      "loss": 3.075,
      "step": 30606
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.562410831451416,
      "learning_rate": 0.0005742519824222594,
      "loss": 3.0105,
      "step": 30607
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.2710297107696533,
      "learning_rate": 0.0005742503243953644,
      "loss": 3.1201,
      "step": 30608
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.5147128105163574,
      "learning_rate": 0.000574248666317481,
      "loss": 3.2374,
      "step": 30609
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4469457864761353,
      "learning_rate": 0.0005742470081886096,
      "loss": 3.2487,
      "step": 30610
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.176500082015991,
      "learning_rate": 0.0005742453500087504,
      "loss": 3.3466,
      "step": 30611
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.381340742111206,
      "learning_rate": 0.0005742436917779041,
      "loss": 2.9903,
      "step": 30612
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5256896018981934,
      "learning_rate": 0.0005742420334960705,
      "loss": 3.165,
      "step": 30613
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8188389539718628,
      "learning_rate": 0.0005742403751632501,
      "loss": 3.0212,
      "step": 30614
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6737639904022217,
      "learning_rate": 0.0005742387167794433,
      "loss": 3.1814,
      "step": 30615
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.482834815979004,
      "learning_rate": 0.0005742370583446504,
      "loss": 3.083,
      "step": 30616
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6168817281723022,
      "learning_rate": 0.0005742353998588716,
      "loss": 2.9182,
      "step": 30617
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.354559063911438,
      "learning_rate": 0.0005742337413221073,
      "loss": 3.1269,
      "step": 30618
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0419187545776367,
      "learning_rate": 0.0005742320827343577,
      "loss": 3.0324,
      "step": 30619
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3391145467758179,
      "learning_rate": 0.0005742304240956233,
      "loss": 3.1837,
      "step": 30620
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.784077525138855,
      "learning_rate": 0.0005742287654059042,
      "loss": 3.1147,
      "step": 30621
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4188491106033325,
      "learning_rate": 0.0005742271066652008,
      "loss": 3.1764,
      "step": 30622
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.622922658920288,
      "learning_rate": 0.0005742254478735134,
      "loss": 3.1116,
      "step": 30623
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.283874273300171,
      "learning_rate": 0.0005742237890308423,
      "loss": 3.1012,
      "step": 30624
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0649056434631348,
      "learning_rate": 0.0005742221301371879,
      "loss": 3.1146,
      "step": 30625
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7930970191955566,
      "learning_rate": 0.0005742204711925503,
      "loss": 3.2083,
      "step": 30626
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8311809301376343,
      "learning_rate": 0.0005742188121969301,
      "loss": 2.9352,
      "step": 30627
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4799898862838745,
      "learning_rate": 0.0005742171531503273,
      "loss": 3.1864,
      "step": 30628
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6102598905563354,
      "learning_rate": 0.0005742154940527425,
      "loss": 3.0389,
      "step": 30629
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3049767017364502,
      "learning_rate": 0.0005742138349041757,
      "loss": 3.0886,
      "step": 30630
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3160128593444824,
      "learning_rate": 0.0005742121757046275,
      "loss": 3.2139,
      "step": 30631
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4762318134307861,
      "learning_rate": 0.000574210516454098,
      "loss": 3.1077,
      "step": 30632
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6112868785858154,
      "learning_rate": 0.0005742088571525877,
      "loss": 3.111,
      "step": 30633
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7725011110305786,
      "learning_rate": 0.0005742071978000967,
      "loss": 3.2371,
      "step": 30634
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4140387773513794,
      "learning_rate": 0.0005742055383966254,
      "loss": 2.8637,
      "step": 30635
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8646469116210938,
      "learning_rate": 0.0005742038789421742,
      "loss": 3.3201,
      "step": 30636
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8431094884872437,
      "learning_rate": 0.0005742022194367433,
      "loss": 2.9449,
      "step": 30637
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.586608648300171,
      "learning_rate": 0.0005742005598803329,
      "loss": 3.0925,
      "step": 30638
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4859002828598022,
      "learning_rate": 0.0005741989002729436,
      "loss": 2.9749,
      "step": 30639
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5105023384094238,
      "learning_rate": 0.0005741972406145754,
      "loss": 2.9797,
      "step": 30640
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4680191278457642,
      "learning_rate": 0.000574195580905229,
      "loss": 3.0072,
      "step": 30641
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5601246356964111,
      "learning_rate": 0.0005741939211449041,
      "loss": 3.1518,
      "step": 30642
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4441496133804321,
      "learning_rate": 0.0005741922613336017,
      "loss": 3.0755,
      "step": 30643
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4401843547821045,
      "learning_rate": 0.0005741906014713217,
      "loss": 3.0896,
      "step": 30644
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0945448875427246,
      "learning_rate": 0.0005741889415580645,
      "loss": 3.1241,
      "step": 30645
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.757361650466919,
      "learning_rate": 0.0005741872815938303,
      "loss": 3.1396,
      "step": 30646
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.025754928588867,
      "learning_rate": 0.0005741856215786196,
      "loss": 3.1398,
      "step": 30647
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.541852355003357,
      "learning_rate": 0.0005741839615124325,
      "loss": 3.0508,
      "step": 30648
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9024096727371216,
      "learning_rate": 0.0005741823013952695,
      "loss": 3.0733,
      "step": 30649
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.9459145069122314,
      "learning_rate": 0.0005741806412271308,
      "loss": 3.1055,
      "step": 30650
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7137181758880615,
      "learning_rate": 0.0005741789810080168,
      "loss": 3.3106,
      "step": 30651
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.297283411026001,
      "learning_rate": 0.0005741773207379278,
      "loss": 3.222,
      "step": 30652
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.7880666255950928,
      "learning_rate": 0.000574175660416864,
      "loss": 3.0483,
      "step": 30653
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7467173337936401,
      "learning_rate": 0.0005741740000448258,
      "loss": 2.7811,
      "step": 30654
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3230000734329224,
      "learning_rate": 0.0005741723396218134,
      "loss": 3.0826,
      "step": 30655
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.4443533420562744,
      "learning_rate": 0.0005741706791478272,
      "loss": 3.146,
      "step": 30656
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.868666648864746,
      "learning_rate": 0.0005741690186228676,
      "loss": 2.8993,
      "step": 30657
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4258676767349243,
      "learning_rate": 0.0005741673580469347,
      "loss": 3.0311,
      "step": 30658
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0389134883880615,
      "learning_rate": 0.0005741656974200289,
      "loss": 3.1916,
      "step": 30659
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.196791410446167,
      "learning_rate": 0.0005741640367421506,
      "loss": 3.1779,
      "step": 30660
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4305609464645386,
      "learning_rate": 0.0005741623760133,
      "loss": 3.043,
      "step": 30661
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7754051685333252,
      "learning_rate": 0.0005741607152334775,
      "loss": 3.1112,
      "step": 30662
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.847750663757324,
      "learning_rate": 0.0005741590544026833,
      "loss": 3.1603,
      "step": 30663
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.380152940750122,
      "learning_rate": 0.0005741573935209177,
      "loss": 2.8733,
      "step": 30664
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6556776762008667,
      "learning_rate": 0.0005741557325881811,
      "loss": 3.0566,
      "step": 30665
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.751698613166809,
      "learning_rate": 0.0005741540716044738,
      "loss": 2.7728,
      "step": 30666
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.533556580543518,
      "learning_rate": 0.000574152410569796,
      "loss": 3.0074,
      "step": 30667
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7225699424743652,
      "learning_rate": 0.0005741507494841482,
      "loss": 3.434,
      "step": 30668
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3589415550231934,
      "learning_rate": 0.0005741490883475305,
      "loss": 2.9016,
      "step": 30669
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.466256022453308,
      "learning_rate": 0.0005741474271599434,
      "loss": 3.0768,
      "step": 30670
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4389283657073975,
      "learning_rate": 0.000574145765921387,
      "loss": 2.9731,
      "step": 30671
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7646845579147339,
      "learning_rate": 0.0005741441046318618,
      "loss": 2.8635,
      "step": 30672
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5384979248046875,
      "learning_rate": 0.0005741424432913682,
      "loss": 3.0173,
      "step": 30673
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6767101287841797,
      "learning_rate": 0.0005741407818999062,
      "loss": 3.0281,
      "step": 30674
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.376081705093384,
      "learning_rate": 0.0005741391204574762,
      "loss": 2.9276,
      "step": 30675
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3913276195526123,
      "learning_rate": 0.0005741374589640786,
      "loss": 3.0704,
      "step": 30676
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.815830111503601,
      "learning_rate": 0.0005741357974197137,
      "loss": 3.1039,
      "step": 30677
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.687158226966858,
      "learning_rate": 0.0005741341358243817,
      "loss": 3.0983,
      "step": 30678
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7177131175994873,
      "learning_rate": 0.0005741324741780831,
      "loss": 3.0913,
      "step": 30679
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3049752712249756,
      "learning_rate": 0.000574130812480818,
      "loss": 3.1886,
      "step": 30680
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9429712295532227,
      "learning_rate": 0.0005741291507325868,
      "loss": 3.3227,
      "step": 30681
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.317602515220642,
      "learning_rate": 0.00057412748893339,
      "loss": 3.0812,
      "step": 30682
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9631178379058838,
      "learning_rate": 0.0005741258270832275,
      "loss": 3.0805,
      "step": 30683
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.4870829582214355,
      "learning_rate": 0.0005741241651821001,
      "loss": 2.8317,
      "step": 30684
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.391074776649475,
      "learning_rate": 0.0005741225032300075,
      "loss": 3.211,
      "step": 30685
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8267993927001953,
      "learning_rate": 0.0005741208412269506,
      "loss": 2.7936,
      "step": 30686
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5337722301483154,
      "learning_rate": 0.0005741191791729293,
      "loss": 3.0768,
      "step": 30687
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5623879432678223,
      "learning_rate": 0.0005741175170679442,
      "loss": 3.1093,
      "step": 30688
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5173217058181763,
      "learning_rate": 0.0005741158549119954,
      "loss": 3.0218,
      "step": 30689
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6005553007125854,
      "learning_rate": 0.0005741141927050834,
      "loss": 3.2892,
      "step": 30690
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.36929452419281,
      "learning_rate": 0.0005741125304472083,
      "loss": 3.0294,
      "step": 30691
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4704468250274658,
      "learning_rate": 0.0005741108681383706,
      "loss": 3.1977,
      "step": 30692
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.522276520729065,
      "learning_rate": 0.0005741092057785703,
      "loss": 2.8982,
      "step": 30693
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5349364280700684,
      "learning_rate": 0.0005741075433678081,
      "loss": 2.8691,
      "step": 30694
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3255387544631958,
      "learning_rate": 0.0005741058809060842,
      "loss": 3.1414,
      "step": 30695
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.718401312828064,
      "learning_rate": 0.0005741042183933987,
      "loss": 3.2196,
      "step": 30696
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.440504550933838,
      "learning_rate": 0.0005741025558297521,
      "loss": 2.8097,
      "step": 30697
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6737152338027954,
      "learning_rate": 0.0005741008932151447,
      "loss": 2.8857,
      "step": 30698
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6922420263290405,
      "learning_rate": 0.0005740992305495767,
      "loss": 3.0515,
      "step": 30699
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.51445472240448,
      "learning_rate": 0.0005740975678330485,
      "loss": 3.0966,
      "step": 30700
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5417410135269165,
      "learning_rate": 0.0005740959050655604,
      "loss": 2.9519,
      "step": 30701
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.843172311782837,
      "learning_rate": 0.0005740942422471127,
      "loss": 3.2464,
      "step": 30702
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5372244119644165,
      "learning_rate": 0.0005740925793777056,
      "loss": 3.2381,
      "step": 30703
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.47389554977417,
      "learning_rate": 0.0005740909164573397,
      "loss": 2.871,
      "step": 30704
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5341740846633911,
      "learning_rate": 0.000574089253486015,
      "loss": 3.0532,
      "step": 30705
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7015862464904785,
      "learning_rate": 0.000574087590463732,
      "loss": 3.0471,
      "step": 30706
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.491703987121582,
      "learning_rate": 0.000574085927390491,
      "loss": 2.7707,
      "step": 30707
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5915439128875732,
      "learning_rate": 0.0005740842642662921,
      "loss": 3.1761,
      "step": 30708
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4242910146713257,
      "learning_rate": 0.0005740826010911358,
      "loss": 3.0098,
      "step": 30709
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.375885009765625,
      "learning_rate": 0.0005740809378650224,
      "loss": 2.9473,
      "step": 30710
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.532185673713684,
      "learning_rate": 0.0005740792745879522,
      "loss": 3.1308,
      "step": 30711
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8479664325714111,
      "learning_rate": 0.0005740776112599255,
      "loss": 3.0061,
      "step": 30712
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5278196334838867,
      "learning_rate": 0.0005740759478809425,
      "loss": 3.2848,
      "step": 30713
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0817840099334717,
      "learning_rate": 0.0005740742844510037,
      "loss": 3.1872,
      "step": 30714
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6173126697540283,
      "learning_rate": 0.0005740726209701093,
      "loss": 3.0273,
      "step": 30715
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5159943103790283,
      "learning_rate": 0.0005740709574382596,
      "loss": 2.9546,
      "step": 30716
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0501925945281982,
      "learning_rate": 0.000574069293855455,
      "loss": 2.8934,
      "step": 30717
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4636152982711792,
      "learning_rate": 0.0005740676302216957,
      "loss": 3.1361,
      "step": 30718
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4367008209228516,
      "learning_rate": 0.000574065966536982,
      "loss": 3.0776,
      "step": 30719
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4411427974700928,
      "learning_rate": 0.0005740643028013143,
      "loss": 3.2674,
      "step": 30720
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4592005014419556,
      "learning_rate": 0.0005740626390146929,
      "loss": 3.0572,
      "step": 30721
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4957395792007446,
      "learning_rate": 0.0005740609751771181,
      "loss": 3.25,
      "step": 30722
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7317289113998413,
      "learning_rate": 0.0005740593112885902,
      "loss": 2.9579,
      "step": 30723
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7446660995483398,
      "learning_rate": 0.0005740576473491094,
      "loss": 3.0764,
      "step": 30724
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5836410522460938,
      "learning_rate": 0.0005740559833586762,
      "loss": 3.0341,
      "step": 30725
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.564010739326477,
      "learning_rate": 0.0005740543193172909,
      "loss": 2.9498,
      "step": 30726
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8756312131881714,
      "learning_rate": 0.0005740526552249535,
      "loss": 3.2177,
      "step": 30727
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9017937183380127,
      "learning_rate": 0.0005740509910816646,
      "loss": 3.1547,
      "step": 30728
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5132132768630981,
      "learning_rate": 0.0005740493268874246,
      "loss": 3.1197,
      "step": 30729
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6852728128433228,
      "learning_rate": 0.0005740476626422336,
      "loss": 2.8647,
      "step": 30730
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7421600818634033,
      "learning_rate": 0.0005740459983460919,
      "loss": 2.9842,
      "step": 30731
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5718703269958496,
      "learning_rate": 0.0005740443339989999,
      "loss": 2.9566,
      "step": 30732
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.5808589458465576,
      "learning_rate": 0.0005740426696009579,
      "loss": 2.8921,
      "step": 30733
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8861899375915527,
      "learning_rate": 0.0005740410051519662,
      "loss": 3.223,
      "step": 30734
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3387774229049683,
      "learning_rate": 0.0005740393406520251,
      "loss": 3.469,
      "step": 30735
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.470125675201416,
      "learning_rate": 0.0005740376761011349,
      "loss": 2.9427,
      "step": 30736
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5353888273239136,
      "learning_rate": 0.0005740360114992959,
      "loss": 3.0537,
      "step": 30737
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4064487218856812,
      "learning_rate": 0.0005740343468465085,
      "loss": 3.1942,
      "step": 30738
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5853211879730225,
      "learning_rate": 0.0005740326821427729,
      "loss": 3.1662,
      "step": 30739
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2235419750213623,
      "learning_rate": 0.0005740310173880895,
      "loss": 2.9518,
      "step": 30740
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.500403881072998,
      "learning_rate": 0.0005740293525824586,
      "loss": 3.0539,
      "step": 30741
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.432115912437439,
      "learning_rate": 0.0005740276877258802,
      "loss": 2.6696,
      "step": 30742
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6331079006195068,
      "learning_rate": 0.0005740260228183551,
      "loss": 3.124,
      "step": 30743
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3222942352294922,
      "learning_rate": 0.0005740243578598833,
      "loss": 3.1069,
      "step": 30744
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3689266443252563,
      "learning_rate": 0.0005740226928504652,
      "loss": 2.9289,
      "step": 30745
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6626722812652588,
      "learning_rate": 0.0005740210277901014,
      "loss": 3.259,
      "step": 30746
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6789438724517822,
      "learning_rate": 0.0005740193626787916,
      "loss": 3.0793,
      "step": 30747
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.520094871520996,
      "learning_rate": 0.0005740176975165365,
      "loss": 3.327,
      "step": 30748
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0633459091186523,
      "learning_rate": 0.0005740160323033363,
      "loss": 3.0383,
      "step": 30749
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.725629210472107,
      "learning_rate": 0.0005740143670391914,
      "loss": 3.1334,
      "step": 30750
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.871095895767212,
      "learning_rate": 0.0005740127017241022,
      "loss": 3.1147,
      "step": 30751
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0409200191497803,
      "learning_rate": 0.0005740110363580686,
      "loss": 3.4821,
      "step": 30752
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.2653915882110596,
      "learning_rate": 0.0005740093709410912,
      "loss": 3.2991,
      "step": 30753
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6893945932388306,
      "learning_rate": 0.0005740077054731705,
      "loss": 2.752,
      "step": 30754
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6294140815734863,
      "learning_rate": 0.0005740060399543064,
      "loss": 3.3181,
      "step": 30755
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.782705545425415,
      "learning_rate": 0.0005740043743844996,
      "loss": 3.0934,
      "step": 30756
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3171467781066895,
      "learning_rate": 0.0005740027087637499,
      "loss": 3.2766,
      "step": 30757
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.655661702156067,
      "learning_rate": 0.0005740010430920582,
      "loss": 3.1611,
      "step": 30758
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.933976411819458,
      "learning_rate": 0.0005739993773694245,
      "loss": 3.1267,
      "step": 30759
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3657472133636475,
      "learning_rate": 0.000573997711595849,
      "loss": 3.1389,
      "step": 30760
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.6933138370513916,
      "learning_rate": 0.0005739960457713323,
      "loss": 3.041,
      "step": 30761
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.415198802947998,
      "learning_rate": 0.0005739943798958746,
      "loss": 3.1184,
      "step": 30762
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.717402696609497,
      "learning_rate": 0.000573992713969476,
      "loss": 2.9631,
      "step": 30763
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9180877208709717,
      "learning_rate": 0.0005739910479921371,
      "loss": 3.1175,
      "step": 30764
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6944910287857056,
      "learning_rate": 0.0005739893819638581,
      "loss": 2.7478,
      "step": 30765
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3239829540252686,
      "learning_rate": 0.0005739877158846392,
      "loss": 2.9782,
      "step": 30766
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.872941017150879,
      "learning_rate": 0.0005739860497544809,
      "loss": 2.8415,
      "step": 30767
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9675003290176392,
      "learning_rate": 0.0005739843835733834,
      "loss": 2.9478,
      "step": 30768
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.217477798461914,
      "learning_rate": 0.000573982717341347,
      "loss": 3.3665,
      "step": 30769
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2836284637451172,
      "learning_rate": 0.000573981051058372,
      "loss": 3.1003,
      "step": 30770
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.993178367614746,
      "learning_rate": 0.000573979384724459,
      "loss": 3.0077,
      "step": 30771
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4860557317733765,
      "learning_rate": 0.0005739777183396078,
      "loss": 3.0021,
      "step": 30772
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.1197152137756348,
      "learning_rate": 0.000573976051903819,
      "loss": 3.1965,
      "step": 30773
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5638909339904785,
      "learning_rate": 0.000573974385417093,
      "loss": 3.2167,
      "step": 30774
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3623446226119995,
      "learning_rate": 0.0005739727188794299,
      "loss": 3.0051,
      "step": 30775
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.492436647415161,
      "learning_rate": 0.0005739710522908301,
      "loss": 3.001,
      "step": 30776
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2987630367279053,
      "learning_rate": 0.0005739693856512939,
      "loss": 2.9636,
      "step": 30777
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.802948236465454,
      "learning_rate": 0.0005739677189608217,
      "loss": 2.9528,
      "step": 30778
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4998483657836914,
      "learning_rate": 0.0005739660522194137,
      "loss": 3.2265,
      "step": 30779
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2575745582580566,
      "learning_rate": 0.0005739643854270702,
      "loss": 2.7718,
      "step": 30780
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5748542547225952,
      "learning_rate": 0.0005739627185837916,
      "loss": 2.9971,
      "step": 30781
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3734983205795288,
      "learning_rate": 0.0005739610516895781,
      "loss": 3.1847,
      "step": 30782
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4881443977355957,
      "learning_rate": 0.0005739593847444301,
      "loss": 3.2956,
      "step": 30783
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3455893993377686,
      "learning_rate": 0.0005739577177483478,
      "loss": 3.0398,
      "step": 30784
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7156469821929932,
      "learning_rate": 0.0005739560507013317,
      "loss": 3.1237,
      "step": 30785
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3817840814590454,
      "learning_rate": 0.000573954383603382,
      "loss": 3.1691,
      "step": 30786
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6873106956481934,
      "learning_rate": 0.000573952716454499,
      "loss": 3.0387,
      "step": 30787
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.285338044166565,
      "learning_rate": 0.000573951049254683,
      "loss": 3.2885,
      "step": 30788
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5014162063598633,
      "learning_rate": 0.0005739493820039343,
      "loss": 3.0932,
      "step": 30789
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.098064422607422,
      "learning_rate": 0.0005739477147022532,
      "loss": 3.0291,
      "step": 30790
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6120868921279907,
      "learning_rate": 0.0005739460473496401,
      "loss": 3.0043,
      "step": 30791
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.625544548034668,
      "learning_rate": 0.0005739443799460953,
      "loss": 3.1774,
      "step": 30792
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5092891454696655,
      "learning_rate": 0.0005739427124916191,
      "loss": 2.9875,
      "step": 30793
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7252647876739502,
      "learning_rate": 0.0005739410449862116,
      "loss": 2.8012,
      "step": 30794
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2227660417556763,
      "learning_rate": 0.0005739393774298735,
      "loss": 3.2263,
      "step": 30795
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4741241931915283,
      "learning_rate": 0.0005739377098226047,
      "loss": 3.1287,
      "step": 30796
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.537716269493103,
      "learning_rate": 0.0005739360421644058,
      "loss": 2.9401,
      "step": 30797
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7517476081848145,
      "learning_rate": 0.0005739343744552771,
      "loss": 3.0472,
      "step": 30798
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9283695220947266,
      "learning_rate": 0.0005739327066952187,
      "loss": 3.1314,
      "step": 30799
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4727795124053955,
      "learning_rate": 0.0005739310388842311,
      "loss": 3.2159,
      "step": 30800
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0426137447357178,
      "learning_rate": 0.0005739293710223146,
      "loss": 3.2977,
      "step": 30801
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4841476678848267,
      "learning_rate": 0.0005739277031094694,
      "loss": 3.432,
      "step": 30802
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9043108224868774,
      "learning_rate": 0.0005739260351456959,
      "loss": 3.2054,
      "step": 30803
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4884414672851562,
      "learning_rate": 0.0005739243671309943,
      "loss": 2.6283,
      "step": 30804
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.098379611968994,
      "learning_rate": 0.0005739226990653651,
      "loss": 2.9095,
      "step": 30805
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7577763795852661,
      "learning_rate": 0.0005739210309488084,
      "loss": 3.2082,
      "step": 30806
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3113926649093628,
      "learning_rate": 0.0005739193627813246,
      "loss": 2.9371,
      "step": 30807
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5984163284301758,
      "learning_rate": 0.0005739176945629141,
      "loss": 3.3409,
      "step": 30808
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.535144567489624,
      "learning_rate": 0.0005739160262935772,
      "loss": 3.2993,
      "step": 30809
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.448066234588623,
      "learning_rate": 0.000573914357973314,
      "loss": 3.0063,
      "step": 30810
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4845188856124878,
      "learning_rate": 0.000573912689602125,
      "loss": 3.0374,
      "step": 30811
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6801767349243164,
      "learning_rate": 0.0005739110211800104,
      "loss": 3.2327,
      "step": 30812
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.589111089706421,
      "learning_rate": 0.0005739093527069707,
      "loss": 3.0264,
      "step": 30813
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3712995052337646,
      "learning_rate": 0.000573907684183006,
      "loss": 2.8347,
      "step": 30814
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.1361052989959717,
      "learning_rate": 0.0005739060156081166,
      "loss": 2.9522,
      "step": 30815
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5544089078903198,
      "learning_rate": 0.000573904346982303,
      "loss": 3.1098,
      "step": 30816
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9568294286727905,
      "learning_rate": 0.0005739026783055654,
      "loss": 2.9767,
      "step": 30817
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.2456467151641846,
      "learning_rate": 0.0005739010095779042,
      "loss": 3.074,
      "step": 30818
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5330592393875122,
      "learning_rate": 0.0005738993407993195,
      "loss": 3.3102,
      "step": 30819
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6440213918685913,
      "learning_rate": 0.0005738976719698118,
      "loss": 3.0006,
      "step": 30820
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4752728939056396,
      "learning_rate": 0.0005738960030893815,
      "loss": 2.8099,
      "step": 30821
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7567607164382935,
      "learning_rate": 0.0005738943341580285,
      "loss": 3.1784,
      "step": 30822
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.809948444366455,
      "learning_rate": 0.0005738926651757536,
      "loss": 3.0433,
      "step": 30823
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3374382257461548,
      "learning_rate": 0.0005738909961425566,
      "loss": 2.8725,
      "step": 30824
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7647018432617188,
      "learning_rate": 0.0005738893270584383,
      "loss": 3.0266,
      "step": 30825
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7682733535766602,
      "learning_rate": 0.0005738876579233987,
      "loss": 3.0418,
      "step": 30826
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0374364852905273,
      "learning_rate": 0.0005738859887374384,
      "loss": 3.1018,
      "step": 30827
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4632562398910522,
      "learning_rate": 0.0005738843195005574,
      "loss": 2.9749,
      "step": 30828
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9338884353637695,
      "learning_rate": 0.0005738826502127561,
      "loss": 3.3144,
      "step": 30829
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8344507217407227,
      "learning_rate": 0.0005738809808740349,
      "loss": 3.0886,
      "step": 30830
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9265186786651611,
      "learning_rate": 0.0005738793114843941,
      "loss": 2.9632,
      "step": 30831
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7224769592285156,
      "learning_rate": 0.0005738776420438339,
      "loss": 3.0566,
      "step": 30832
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4907714128494263,
      "learning_rate": 0.0005738759725523547,
      "loss": 3.3013,
      "step": 30833
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3444020748138428,
      "learning_rate": 0.0005738743030099568,
      "loss": 3.1899,
      "step": 30834
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2547416687011719,
      "learning_rate": 0.0005738726334166404,
      "loss": 3.2595,
      "step": 30835
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8298792839050293,
      "learning_rate": 0.000573870963772406,
      "loss": 3.0648,
      "step": 30836
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7121798992156982,
      "learning_rate": 0.0005738692940772538,
      "loss": 2.8946,
      "step": 30837
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3240034580230713,
      "learning_rate": 0.0005738676243311841,
      "loss": 3.198,
      "step": 30838
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5615662336349487,
      "learning_rate": 0.0005738659545341974,
      "loss": 3.1112,
      "step": 30839
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6003870964050293,
      "learning_rate": 0.0005738642846862936,
      "loss": 3.0068,
      "step": 30840
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3453855514526367,
      "learning_rate": 0.0005738626147874734,
      "loss": 2.9909,
      "step": 30841
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4575644731521606,
      "learning_rate": 0.0005738609448377371,
      "loss": 3.1138,
      "step": 30842
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6205179691314697,
      "learning_rate": 0.0005738592748370845,
      "loss": 2.9304,
      "step": 30843
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.1195626258850098,
      "learning_rate": 0.0005738576047855166,
      "loss": 2.969,
      "step": 30844
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5635164976119995,
      "learning_rate": 0.0005738559346830334,
      "loss": 3.0876,
      "step": 30845
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5175117254257202,
      "learning_rate": 0.0005738542645296351,
      "loss": 2.9298,
      "step": 30846
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4748166799545288,
      "learning_rate": 0.0005738525943253222,
      "loss": 3.1512,
      "step": 30847
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4992611408233643,
      "learning_rate": 0.0005738509240700948,
      "loss": 3.0036,
      "step": 30848
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2190182209014893,
      "learning_rate": 0.0005738492537639535,
      "loss": 3.1113,
      "step": 30849
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4933853149414062,
      "learning_rate": 0.0005738475834068984,
      "loss": 3.1876,
      "step": 30850
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.28230357170105,
      "learning_rate": 0.0005738459129989298,
      "loss": 3.0919,
      "step": 30851
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7036482095718384,
      "learning_rate": 0.000573844242540048,
      "loss": 3.0404,
      "step": 30852
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.169477701187134,
      "learning_rate": 0.0005738425720302536,
      "loss": 3.1532,
      "step": 30853
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.78497576713562,
      "learning_rate": 0.0005738409014695466,
      "loss": 3.0008,
      "step": 30854
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.350220203399658,
      "learning_rate": 0.0005738392308579274,
      "loss": 2.9186,
      "step": 30855
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.2161810398101807,
      "learning_rate": 0.0005738375601953964,
      "loss": 3.1439,
      "step": 30856
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.2174577713012695,
      "learning_rate": 0.0005738358894819538,
      "loss": 3.2931,
      "step": 30857
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8999555110931396,
      "learning_rate": 0.0005738342187175999,
      "loss": 3.0021,
      "step": 30858
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9186207056045532,
      "learning_rate": 0.0005738325479023351,
      "loss": 3.0217,
      "step": 30859
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9028452634811401,
      "learning_rate": 0.0005738308770361595,
      "loss": 2.8891,
      "step": 30860
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4743320941925049,
      "learning_rate": 0.0005738292061190737,
      "loss": 2.7794,
      "step": 30861
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.473876714706421,
      "learning_rate": 0.0005738275351510779,
      "loss": 2.9519,
      "step": 30862
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.1343679428100586,
      "learning_rate": 0.0005738258641321724,
      "loss": 3.1908,
      "step": 30863
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4508225917816162,
      "learning_rate": 0.0005738241930623575,
      "loss": 3.0157,
      "step": 30864
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4683465957641602,
      "learning_rate": 0.0005738225219416335,
      "loss": 2.9904,
      "step": 30865
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4111623764038086,
      "learning_rate": 0.0005738208507700006,
      "loss": 3.1512,
      "step": 30866
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7838690280914307,
      "learning_rate": 0.0005738191795474593,
      "loss": 3.2006,
      "step": 30867
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5127861499786377,
      "learning_rate": 0.0005738175082740099,
      "loss": 2.8777,
      "step": 30868
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.4233953952789307,
      "learning_rate": 0.0005738158369496527,
      "loss": 3.064,
      "step": 30869
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5323832035064697,
      "learning_rate": 0.0005738141655743879,
      "loss": 3.0647,
      "step": 30870
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3232654333114624,
      "learning_rate": 0.0005738124941482159,
      "loss": 3.0072,
      "step": 30871
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7814509868621826,
      "learning_rate": 0.000573810822671137,
      "loss": 3.0521,
      "step": 30872
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5173033475875854,
      "learning_rate": 0.0005738091511431514,
      "loss": 2.9139,
      "step": 30873
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5950753688812256,
      "learning_rate": 0.0005738074795642596,
      "loss": 3.0039,
      "step": 30874
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5915642976760864,
      "learning_rate": 0.0005738058079344618,
      "loss": 2.912,
      "step": 30875
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7691506147384644,
      "learning_rate": 0.0005738041362537583,
      "loss": 3.4034,
      "step": 30876
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8000829219818115,
      "learning_rate": 0.0005738024645221495,
      "loss": 2.9694,
      "step": 30877
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.478402614593506,
      "learning_rate": 0.0005738007927396356,
      "loss": 3.1587,
      "step": 30878
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6707998514175415,
      "learning_rate": 0.0005737991209062171,
      "loss": 3.1414,
      "step": 30879
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8709092140197754,
      "learning_rate": 0.0005737974490218939,
      "loss": 2.8422,
      "step": 30880
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.4541540145874023,
      "learning_rate": 0.0005737957770866669,
      "loss": 3.0831,
      "step": 30881
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3731619119644165,
      "learning_rate": 0.0005737941051005359,
      "loss": 3.1705,
      "step": 30882
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4167927503585815,
      "learning_rate": 0.0005737924330635015,
      "loss": 2.9979,
      "step": 30883
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.736669898033142,
      "learning_rate": 0.0005737907609755638,
      "loss": 2.8699,
      "step": 30884
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6883487701416016,
      "learning_rate": 0.0005737890888367234,
      "loss": 3.0458,
      "step": 30885
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7573747634887695,
      "learning_rate": 0.0005737874166469803,
      "loss": 3.0512,
      "step": 30886
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5806896686553955,
      "learning_rate": 0.000573785744406335,
      "loss": 3.3761,
      "step": 30887
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.1390342712402344,
      "learning_rate": 0.0005737840721147878,
      "loss": 3.3269,
      "step": 30888
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.134089469909668,
      "learning_rate": 0.0005737823997723389,
      "loss": 2.8583,
      "step": 30889
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6365584135055542,
      "learning_rate": 0.0005737807273789887,
      "loss": 2.8957,
      "step": 30890
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.5276472568511963,
      "learning_rate": 0.0005737790549347375,
      "loss": 3.0352,
      "step": 30891
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8682767152786255,
      "learning_rate": 0.0005737773824395856,
      "loss": 3.2909,
      "step": 30892
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7932844161987305,
      "learning_rate": 0.0005737757098935334,
      "loss": 3.1692,
      "step": 30893
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.762678623199463,
      "learning_rate": 0.0005737740372965811,
      "loss": 3.013,
      "step": 30894
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8409483432769775,
      "learning_rate": 0.0005737723646487289,
      "loss": 3.3223,
      "step": 30895
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7318319082260132,
      "learning_rate": 0.0005737706919499774,
      "loss": 2.9998,
      "step": 30896
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4716594219207764,
      "learning_rate": 0.0005737690192003267,
      "loss": 3.1082,
      "step": 30897
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.1428544521331787,
      "learning_rate": 0.0005737673463997772,
      "loss": 3.0242,
      "step": 30898
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.500174045562744,
      "learning_rate": 0.0005737656735483292,
      "loss": 2.8637,
      "step": 30899
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9300435781478882,
      "learning_rate": 0.000573764000645983,
      "loss": 2.9863,
      "step": 30900
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0683252811431885,
      "learning_rate": 0.0005737623276927388,
      "loss": 2.9426,
      "step": 30901
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6687111854553223,
      "learning_rate": 0.0005737606546885972,
      "loss": 2.9917,
      "step": 30902
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.15290904045105,
      "learning_rate": 0.000573758981633558,
      "loss": 3.2561,
      "step": 30903
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8651750087738037,
      "learning_rate": 0.0005737573085276221,
      "loss": 2.8929,
      "step": 30904
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6688848733901978,
      "learning_rate": 0.0005737556353707895,
      "loss": 2.85,
      "step": 30905
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6530979871749878,
      "learning_rate": 0.0005737539621630606,
      "loss": 2.9843,
      "step": 30906
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3703253269195557,
      "learning_rate": 0.0005737522889044356,
      "loss": 3.1017,
      "step": 30907
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.2213351726531982,
      "learning_rate": 0.0005737506155949149,
      "loss": 3.5926,
      "step": 30908
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4494504928588867,
      "learning_rate": 0.0005737489422344988,
      "loss": 3.0514,
      "step": 30909
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6317188739776611,
      "learning_rate": 0.0005737472688231876,
      "loss": 3.0605,
      "step": 30910
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7859416007995605,
      "learning_rate": 0.0005737455953609816,
      "loss": 3.0474,
      "step": 30911
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3190338611602783,
      "learning_rate": 0.0005737439218478812,
      "loss": 3.0318,
      "step": 30912
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5193711519241333,
      "learning_rate": 0.0005737422482838865,
      "loss": 3.3383,
      "step": 30913
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4750690460205078,
      "learning_rate": 0.000573740574668998,
      "loss": 3.0986,
      "step": 30914
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.4217545986175537,
      "learning_rate": 0.0005737389010032161,
      "loss": 2.7292,
      "step": 30915
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6196497678756714,
      "learning_rate": 0.0005737372272865408,
      "loss": 2.8986,
      "step": 30916
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7065699100494385,
      "learning_rate": 0.0005737355535189726,
      "loss": 3.2489,
      "step": 30917
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5426816940307617,
      "learning_rate": 0.0005737338797005119,
      "loss": 3.1524,
      "step": 30918
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5520188808441162,
      "learning_rate": 0.0005737322058311588,
      "loss": 2.8271,
      "step": 30919
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.736725091934204,
      "learning_rate": 0.0005737305319109138,
      "loss": 2.8646,
      "step": 30920
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5826036930084229,
      "learning_rate": 0.0005737288579397771,
      "loss": 2.9585,
      "step": 30921
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.330228567123413,
      "learning_rate": 0.000573727183917749,
      "loss": 3.1492,
      "step": 30922
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.487977147102356,
      "learning_rate": 0.0005737255098448298,
      "loss": 3.2812,
      "step": 30923
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4890058040618896,
      "learning_rate": 0.0005737238357210201,
      "loss": 3.0424,
      "step": 30924
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5111761093139648,
      "learning_rate": 0.0005737221615463198,
      "loss": 2.9252,
      "step": 30925
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4433205127716064,
      "learning_rate": 0.0005737204873207292,
      "loss": 3.2065,
      "step": 30926
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4339423179626465,
      "learning_rate": 0.0005737188130442491,
      "loss": 2.9581,
      "step": 30927
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7926807403564453,
      "learning_rate": 0.0005737171387168793,
      "loss": 3.218,
      "step": 30928
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.515643835067749,
      "learning_rate": 0.0005737154643386205,
      "loss": 3.1662,
      "step": 30929
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4249017238616943,
      "learning_rate": 0.0005737137899094728,
      "loss": 3.322,
      "step": 30930
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.525141716003418,
      "learning_rate": 0.0005737121154294364,
      "loss": 3.0359,
      "step": 30931
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5718770027160645,
      "learning_rate": 0.0005737104408985118,
      "loss": 3.2176,
      "step": 30932
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6872559785842896,
      "learning_rate": 0.0005737087663166994,
      "loss": 3.2056,
      "step": 30933
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.68263840675354,
      "learning_rate": 0.0005737070916839992,
      "loss": 3.1171,
      "step": 30934
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.1050972938537598,
      "learning_rate": 0.0005737054170004117,
      "loss": 3.001,
      "step": 30935
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7927305698394775,
      "learning_rate": 0.0005737037422659374,
      "loss": 3.2635,
      "step": 30936
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3960487842559814,
      "learning_rate": 0.0005737020674805761,
      "loss": 2.9907,
      "step": 30937
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.025921106338501,
      "learning_rate": 0.0005737003926443285,
      "loss": 3.0219,
      "step": 30938
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.0151588916778564,
      "learning_rate": 0.000573698717757195,
      "loss": 3.1043,
      "step": 30939
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.440938115119934,
      "learning_rate": 0.0005736970428191757,
      "loss": 3.03,
      "step": 30940
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.55246901512146,
      "learning_rate": 0.0005736953678302708,
      "loss": 3.104,
      "step": 30941
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.2037715911865234,
      "learning_rate": 0.0005736936927904808,
      "loss": 3.1838,
      "step": 30942
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6487871408462524,
      "learning_rate": 0.0005736920176998061,
      "loss": 3.3199,
      "step": 30943
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4103020429611206,
      "learning_rate": 0.0005736903425582468,
      "loss": 3.2954,
      "step": 30944
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4639835357666016,
      "learning_rate": 0.0005736886673658034,
      "loss": 3.2987,
      "step": 30945
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6811628341674805,
      "learning_rate": 0.000573686992122476,
      "loss": 3.0023,
      "step": 30946
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.892441987991333,
      "learning_rate": 0.000573685316828265,
      "loss": 3.1484,
      "step": 30947
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.334006667137146,
      "learning_rate": 0.0005736836414831709,
      "loss": 3.2282,
      "step": 30948
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.59159255027771,
      "learning_rate": 0.0005736819660871937,
      "loss": 3.2862,
      "step": 30949
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3234903812408447,
      "learning_rate": 0.0005736802906403339,
      "loss": 2.9848,
      "step": 30950
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6830154657363892,
      "learning_rate": 0.0005736786151425917,
      "loss": 3.2317,
      "step": 30951
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.069253444671631,
      "learning_rate": 0.0005736769395939676,
      "loss": 3.1185,
      "step": 30952
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3464090824127197,
      "learning_rate": 0.0005736752639944618,
      "loss": 3.1534,
      "step": 30953
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5591768026351929,
      "learning_rate": 0.0005736735883440744,
      "loss": 3.124,
      "step": 30954
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3957242965698242,
      "learning_rate": 0.000573671912642806,
      "loss": 2.9518,
      "step": 30955
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.5391929149627686,
      "learning_rate": 0.0005736702368906569,
      "loss": 2.9062,
      "step": 30956
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.8852524757385254,
      "learning_rate": 0.0005736685610876274,
      "loss": 2.9151,
      "step": 30957
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6204217672348022,
      "learning_rate": 0.0005736668852337176,
      "loss": 3.131,
      "step": 30958
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.509206771850586,
      "learning_rate": 0.000573665209328928,
      "loss": 3.0026,
      "step": 30959
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9496179819107056,
      "learning_rate": 0.0005736635333732589,
      "loss": 3.0727,
      "step": 30960
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7960777282714844,
      "learning_rate": 0.0005736618573667107,
      "loss": 2.9044,
      "step": 30961
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6234180927276611,
      "learning_rate": 0.0005736601813092834,
      "loss": 3.1263,
      "step": 30962
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8782528638839722,
      "learning_rate": 0.0005736585052009775,
      "loss": 2.7981,
      "step": 30963
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8513991832733154,
      "learning_rate": 0.0005736568290417934,
      "loss": 3.0733,
      "step": 30964
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.532118082046509,
      "learning_rate": 0.0005736551528317313,
      "loss": 3.0958,
      "step": 30965
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.851464033126831,
      "learning_rate": 0.0005736534765707917,
      "loss": 3.0308,
      "step": 30966
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7892130613327026,
      "learning_rate": 0.0005736518002589745,
      "loss": 3.1369,
      "step": 30967
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5542049407958984,
      "learning_rate": 0.0005736501238962804,
      "loss": 2.941,
      "step": 30968
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3920891284942627,
      "learning_rate": 0.0005736484474827095,
      "loss": 2.9032,
      "step": 30969
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4767590761184692,
      "learning_rate": 0.0005736467710182623,
      "loss": 3.3375,
      "step": 30970
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6573799848556519,
      "learning_rate": 0.0005736450945029389,
      "loss": 3.1302,
      "step": 30971
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4658502340316772,
      "learning_rate": 0.0005736434179367398,
      "loss": 3.1939,
      "step": 30972
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9705135822296143,
      "learning_rate": 0.0005736417413196652,
      "loss": 3.1657,
      "step": 30973
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0182394981384277,
      "learning_rate": 0.0005736400646517153,
      "loss": 3.2098,
      "step": 30974
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5363585948944092,
      "learning_rate": 0.0005736383879328908,
      "loss": 3.1255,
      "step": 30975
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7437975406646729,
      "learning_rate": 0.0005736367111631916,
      "loss": 3.1911,
      "step": 30976
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.658825635910034,
      "learning_rate": 0.0005736350343426181,
      "loss": 3.1124,
      "step": 30977
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9241167306900024,
      "learning_rate": 0.0005736333574711708,
      "loss": 2.8818,
      "step": 30978
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4766582250595093,
      "learning_rate": 0.0005736316805488499,
      "loss": 3.369,
      "step": 30979
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4399430751800537,
      "learning_rate": 0.0005736300035756556,
      "loss": 3.1287,
      "step": 30980
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.145231246948242,
      "learning_rate": 0.0005736283265515884,
      "loss": 3.0219,
      "step": 30981
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5412824153900146,
      "learning_rate": 0.0005736266494766486,
      "loss": 3.2633,
      "step": 30982
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5859829187393188,
      "learning_rate": 0.0005736249723508364,
      "loss": 2.8971,
      "step": 30983
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7843635082244873,
      "learning_rate": 0.000573623295174152,
      "loss": 3.1317,
      "step": 30984
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.578034520149231,
      "learning_rate": 0.000573621617946596,
      "loss": 3.2055,
      "step": 30985
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5046590566635132,
      "learning_rate": 0.0005736199406681685,
      "loss": 3.2296,
      "step": 30986
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7308995723724365,
      "learning_rate": 0.0005736182633388699,
      "loss": 3.0941,
      "step": 30987
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7520822286605835,
      "learning_rate": 0.0005736165859587006,
      "loss": 2.8065,
      "step": 30988
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2991644144058228,
      "learning_rate": 0.0005736149085276608,
      "loss": 3.176,
      "step": 30989
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7169595956802368,
      "learning_rate": 0.0005736132310457507,
      "loss": 3.0421,
      "step": 30990
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8681421279907227,
      "learning_rate": 0.0005736115535129708,
      "loss": 3.1588,
      "step": 30991
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7667241096496582,
      "learning_rate": 0.0005736098759293215,
      "loss": 2.9974,
      "step": 30992
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.354856014251709,
      "learning_rate": 0.0005736081982948026,
      "loss": 3.2048,
      "step": 30993
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9740548133850098,
      "learning_rate": 0.0005736065206094151,
      "loss": 2.9573,
      "step": 30994
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.373180627822876,
      "learning_rate": 0.0005736048428731589,
      "loss": 3.2624,
      "step": 30995
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.256022334098816,
      "learning_rate": 0.0005736031650860344,
      "loss": 2.8748,
      "step": 30996
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4922246932983398,
      "learning_rate": 0.0005736014872480418,
      "loss": 3.0795,
      "step": 30997
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.525172710418701,
      "learning_rate": 0.0005735998093591816,
      "loss": 3.0044,
      "step": 30998
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.957886815071106,
      "learning_rate": 0.000573598131419454,
      "loss": 3.0128,
      "step": 30999
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4959994554519653,
      "learning_rate": 0.0005735964534288595,
      "loss": 3.0369,
      "step": 31000
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.2821872234344482,
      "learning_rate": 0.000573594775387398,
      "loss": 2.7222,
      "step": 31001
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8076049089431763,
      "learning_rate": 0.0005735930972950702,
      "loss": 3.1424,
      "step": 31002
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8123433589935303,
      "learning_rate": 0.0005735914191518763,
      "loss": 3.2879,
      "step": 31003
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8583753108978271,
      "learning_rate": 0.0005735897409578166,
      "loss": 3.1982,
      "step": 31004
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.6261284351348877,
      "learning_rate": 0.0005735880627128913,
      "loss": 3.0076,
      "step": 31005
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.544287919998169,
      "learning_rate": 0.0005735863844171009,
      "loss": 2.6377,
      "step": 31006
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4182095527648926,
      "learning_rate": 0.0005735847060704455,
      "loss": 3.0711,
      "step": 31007
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.9324257373809814,
      "learning_rate": 0.0005735830276729257,
      "loss": 3.2945,
      "step": 31008
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.4078922271728516,
      "learning_rate": 0.0005735813492245416,
      "loss": 3.1251,
      "step": 31009
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6390849351882935,
      "learning_rate": 0.0005735796707252935,
      "loss": 2.9087,
      "step": 31010
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5746426582336426,
      "learning_rate": 0.0005735779921751819,
      "loss": 3.0561,
      "step": 31011
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.388366222381592,
      "learning_rate": 0.0005735763135742069,
      "loss": 2.7128,
      "step": 31012
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5322188138961792,
      "learning_rate": 0.0005735746349223688,
      "loss": 3.3715,
      "step": 31013
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3704462051391602,
      "learning_rate": 0.0005735729562196683,
      "loss": 2.9913,
      "step": 31014
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.5732967853546143,
      "learning_rate": 0.0005735712774661051,
      "loss": 3.1277,
      "step": 31015
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.32943058013916,
      "learning_rate": 0.00057356959866168,
      "loss": 3.0698,
      "step": 31016
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4607316255569458,
      "learning_rate": 0.0005735679198063931,
      "loss": 3.0261,
      "step": 31017
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7782516479492188,
      "learning_rate": 0.0005735662409002448,
      "loss": 2.9353,
      "step": 31018
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.76619815826416,
      "learning_rate": 0.0005735645619432354,
      "loss": 2.9511,
      "step": 31019
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2379977703094482,
      "learning_rate": 0.0005735628829353651,
      "loss": 3.0,
      "step": 31020
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5564351081848145,
      "learning_rate": 0.0005735612038766344,
      "loss": 3.1852,
      "step": 31021
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.3871138095855713,
      "learning_rate": 0.0005735595247670435,
      "loss": 3.2312,
      "step": 31022
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.5692946910858154,
      "learning_rate": 0.0005735578456065927,
      "loss": 2.9373,
      "step": 31023
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9623390436172485,
      "learning_rate": 0.0005735561663952822,
      "loss": 3.1663,
      "step": 31024
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4022055864334106,
      "learning_rate": 0.0005735544871331126,
      "loss": 3.1584,
      "step": 31025
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.969107151031494,
      "learning_rate": 0.000573552807820084,
      "loss": 3.0581,
      "step": 31026
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.436715602874756,
      "learning_rate": 0.0005735511284561968,
      "loss": 2.9742,
      "step": 31027
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8783189058303833,
      "learning_rate": 0.0005735494490414512,
      "loss": 2.9802,
      "step": 31028
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8741785287857056,
      "learning_rate": 0.0005735477695758477,
      "loss": 2.742,
      "step": 31029
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6052159070968628,
      "learning_rate": 0.0005735460900593865,
      "loss": 3.2534,
      "step": 31030
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.4293785095214844,
      "learning_rate": 0.0005735444104920678,
      "loss": 3.0753,
      "step": 31031
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4826843738555908,
      "learning_rate": 0.0005735427308738921,
      "loss": 3.0565,
      "step": 31032
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5087766647338867,
      "learning_rate": 0.0005735410512048596,
      "loss": 3.2023,
      "step": 31033
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8674092292785645,
      "learning_rate": 0.0005735393714849708,
      "loss": 3.1198,
      "step": 31034
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4103376865386963,
      "learning_rate": 0.0005735376917142258,
      "loss": 3.4105,
      "step": 31035
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6729191541671753,
      "learning_rate": 0.0005735360118926249,
      "loss": 3.2553,
      "step": 31036
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.9683328866958618,
      "learning_rate": 0.0005735343320201685,
      "loss": 3.1709,
      "step": 31037
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4089611768722534,
      "learning_rate": 0.000573532652096857,
      "loss": 2.9813,
      "step": 31038
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3662697076797485,
      "learning_rate": 0.0005735309721226906,
      "loss": 3.1158,
      "step": 31039
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3719700574874878,
      "learning_rate": 0.0005735292920976695,
      "loss": 2.9764,
      "step": 31040
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.207850456237793,
      "learning_rate": 0.0005735276120217942,
      "loss": 2.9789,
      "step": 31041
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7227098941802979,
      "learning_rate": 0.0005735259318950651,
      "loss": 2.954,
      "step": 31042
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4661622047424316,
      "learning_rate": 0.0005735242517174822,
      "loss": 3.1439,
      "step": 31043
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0442357063293457,
      "learning_rate": 0.000573522571489046,
      "loss": 3.0456,
      "step": 31044
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6782294511795044,
      "learning_rate": 0.0005735208912097568,
      "loss": 3.0043,
      "step": 31045
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8400580883026123,
      "learning_rate": 0.0005735192108796151,
      "loss": 3.1567,
      "step": 31046
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3953913450241089,
      "learning_rate": 0.0005735175304986208,
      "loss": 3.0766,
      "step": 31047
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4356939792633057,
      "learning_rate": 0.0005735158500667744,
      "loss": 3.4013,
      "step": 31048
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.2676358222961426,
      "learning_rate": 0.0005735141695840763,
      "loss": 3.0365,
      "step": 31049
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.6198010444641113,
      "learning_rate": 0.0005735124890505267,
      "loss": 2.8573,
      "step": 31050
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.815953016281128,
      "learning_rate": 0.000573510808466126,
      "loss": 2.9425,
      "step": 31051
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.723135232925415,
      "learning_rate": 0.0005735091278308746,
      "loss": 3.1677,
      "step": 31052
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.8985157012939453,
      "learning_rate": 0.0005735074471447726,
      "loss": 3.076,
      "step": 31053
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.5699570178985596,
      "learning_rate": 0.0005735057664078203,
      "loss": 3.1099,
      "step": 31054
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0256354808807373,
      "learning_rate": 0.0005735040856200183,
      "loss": 3.0547,
      "step": 31055
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.935353994369507,
      "learning_rate": 0.0005735024047813667,
      "loss": 3.1445,
      "step": 31056
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8753412961959839,
      "learning_rate": 0.0005735007238918657,
      "loss": 2.9824,
      "step": 31057
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5493950843811035,
      "learning_rate": 0.0005734990429515159,
      "loss": 3.2334,
      "step": 31058
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6032551527023315,
      "learning_rate": 0.0005734973619603174,
      "loss": 3.3379,
      "step": 31059
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0785303115844727,
      "learning_rate": 0.0005734956809182706,
      "loss": 2.995,
      "step": 31060
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8507558107376099,
      "learning_rate": 0.0005734939998253757,
      "loss": 3.0949,
      "step": 31061
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5606433153152466,
      "learning_rate": 0.0005734923186816333,
      "loss": 3.2816,
      "step": 31062
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6535941362380981,
      "learning_rate": 0.0005734906374870433,
      "loss": 3.1662,
      "step": 31063
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.9088666439056396,
      "learning_rate": 0.0005734889562416063,
      "loss": 3.1631,
      "step": 31064
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5272278785705566,
      "learning_rate": 0.0005734872749453226,
      "loss": 3.4311,
      "step": 31065
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2491250038146973,
      "learning_rate": 0.0005734855935981923,
      "loss": 2.9536,
      "step": 31066
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.103972911834717,
      "learning_rate": 0.000573483912200216,
      "loss": 3.2572,
      "step": 31067
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.066572427749634,
      "learning_rate": 0.0005734822307513938,
      "loss": 3.0482,
      "step": 31068
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.7060647010803223,
      "learning_rate": 0.0005734805492517263,
      "loss": 3.1391,
      "step": 31069
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3286850452423096,
      "learning_rate": 0.0005734788677012134,
      "loss": 2.9532,
      "step": 31070
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.756069540977478,
      "learning_rate": 0.0005734771860998557,
      "loss": 3.0538,
      "step": 31071
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6017121076583862,
      "learning_rate": 0.0005734755044476533,
      "loss": 2.8291,
      "step": 31072
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3904234170913696,
      "learning_rate": 0.0005734738227446067,
      "loss": 3.0369,
      "step": 31073
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5692262649536133,
      "learning_rate": 0.0005734721409907163,
      "loss": 2.97,
      "step": 31074
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.007113456726074,
      "learning_rate": 0.0005734704591859821,
      "loss": 2.9386,
      "step": 31075
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.658801555633545,
      "learning_rate": 0.0005734687773304046,
      "loss": 3.2469,
      "step": 31076
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5780442953109741,
      "learning_rate": 0.0005734670954239842,
      "loss": 3.249,
      "step": 31077
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2072863578796387,
      "learning_rate": 0.0005734654134667209,
      "loss": 3.1361,
      "step": 31078
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.3477237224578857,
      "learning_rate": 0.0005734637314586154,
      "loss": 3.0575,
      "step": 31079
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2721385955810547,
      "learning_rate": 0.0005734620493996678,
      "loss": 3.0014,
      "step": 31080
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.389749526977539,
      "learning_rate": 0.0005734603672898784,
      "loss": 3.1692,
      "step": 31081
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.2128429412841797,
      "learning_rate": 0.0005734586851292477,
      "loss": 3.217,
      "step": 31082
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6893103122711182,
      "learning_rate": 0.0005734570029177757,
      "loss": 2.9442,
      "step": 31083
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.578954815864563,
      "learning_rate": 0.0005734553206554629,
      "loss": 3.1576,
      "step": 31084
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8527936935424805,
      "learning_rate": 0.0005734536383423096,
      "loss": 3.0614,
      "step": 31085
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.039271831512451,
      "learning_rate": 0.0005734519559783161,
      "loss": 3.1757,
      "step": 31086
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5608683824539185,
      "learning_rate": 0.0005734502735634828,
      "loss": 3.0499,
      "step": 31087
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0364017486572266,
      "learning_rate": 0.0005734485910978099,
      "loss": 3.0297,
      "step": 31088
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8439911603927612,
      "learning_rate": 0.0005734469085812977,
      "loss": 3.1282,
      "step": 31089
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5485401153564453,
      "learning_rate": 0.0005734452260139466,
      "loss": 3.0454,
      "step": 31090
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0590054988861084,
      "learning_rate": 0.0005734435433957569,
      "loss": 3.3712,
      "step": 31091
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.568157434463501,
      "learning_rate": 0.0005734418607267288,
      "loss": 2.923,
      "step": 31092
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.7670400142669678,
      "learning_rate": 0.0005734401780068627,
      "loss": 3.0265,
      "step": 31093
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.0044331550598145,
      "learning_rate": 0.000573438495236159,
      "loss": 3.1095,
      "step": 31094
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.0331389904022217,
      "learning_rate": 0.0005734368124146178,
      "loss": 3.1749,
      "step": 31095
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.4750771522521973,
      "learning_rate": 0.0005734351295422396,
      "loss": 3.2338,
      "step": 31096
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.5191212892532349,
      "learning_rate": 0.0005734334466190247,
      "loss": 3.1316,
      "step": 31097
    },
    {
      "epoch": 0.4,
      "grad_norm": 3.9899942874908447,
      "learning_rate": 0.0005734317636449733,
      "loss": 3.2018,
      "step": 31098
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.1783416271209717,
      "learning_rate": 0.0005734300806200858,
      "loss": 3.1389,
      "step": 31099
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.614225149154663,
      "learning_rate": 0.0005734283975443624,
      "loss": 3.1297,
      "step": 31100
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.8034632205963135,
      "learning_rate": 0.0005734267144178036,
      "loss": 3.1463,
      "step": 31101
    },
    {
      "epoch": 0.4,
      "grad_norm": 2.111595630645752,
      "learning_rate": 0.0005734250312404096,
      "loss": 2.9451,
      "step": 31102
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.6003501415252686,
      "learning_rate": 0.0005734233480121806,
      "loss": 3.1378,
      "step": 31103
    },
    {
      "epoch": 0.4,
      "grad_norm": 1.503211259841919,
      "learning_rate": 0.0005734216647331172,
      "loss": 2.8793,
      "step": 31104
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.2270898818969727,
      "learning_rate": 0.0005734199814032196,
      "loss": 3.0115,
      "step": 31105
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.278587579727173,
      "learning_rate": 0.0005734182980224879,
      "loss": 3.1255,
      "step": 31106
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3663196563720703,
      "learning_rate": 0.0005734166145909226,
      "loss": 3.2718,
      "step": 31107
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.712887167930603,
      "learning_rate": 0.000573414931108524,
      "loss": 3.4327,
      "step": 31108
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.501194715499878,
      "learning_rate": 0.0005734132475752925,
      "loss": 3.2313,
      "step": 31109
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.534690499305725,
      "learning_rate": 0.0005734115639912282,
      "loss": 3.0478,
      "step": 31110
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5099296569824219,
      "learning_rate": 0.0005734098803563315,
      "loss": 2.9916,
      "step": 31111
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2379413843154907,
      "learning_rate": 0.0005734081966706028,
      "loss": 2.962,
      "step": 31112
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4119569063186646,
      "learning_rate": 0.0005734065129340425,
      "loss": 3.3898,
      "step": 31113
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.383216142654419,
      "learning_rate": 0.0005734048291466507,
      "loss": 3.2264,
      "step": 31114
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.432167887687683,
      "learning_rate": 0.0005734031453084276,
      "loss": 3.0584,
      "step": 31115
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7184051275253296,
      "learning_rate": 0.0005734014614193738,
      "loss": 3.1359,
      "step": 31116
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6642080545425415,
      "learning_rate": 0.0005733997774794896,
      "loss": 3.157,
      "step": 31117
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3965864181518555,
      "learning_rate": 0.0005733980934887752,
      "loss": 3.1534,
      "step": 31118
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2593177556991577,
      "learning_rate": 0.0005733964094472309,
      "loss": 2.9122,
      "step": 31119
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4860458374023438,
      "learning_rate": 0.000573394725354857,
      "loss": 2.8629,
      "step": 31120
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8407955169677734,
      "learning_rate": 0.0005733930412116538,
      "loss": 3.0892,
      "step": 31121
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.0086214542388916,
      "learning_rate": 0.0005733913570176219,
      "loss": 2.8792,
      "step": 31122
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6890974044799805,
      "learning_rate": 0.0005733896727727611,
      "loss": 3.0686,
      "step": 31123
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.930074691772461,
      "learning_rate": 0.0005733879884770722,
      "loss": 3.0566,
      "step": 31124
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5771152973175049,
      "learning_rate": 0.0005733863041305552,
      "loss": 3.2812,
      "step": 31125
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8244848251342773,
      "learning_rate": 0.0005733846197332106,
      "loss": 2.9581,
      "step": 31126
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6623742580413818,
      "learning_rate": 0.0005733829352850386,
      "loss": 3.0153,
      "step": 31127
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.369258165359497,
      "learning_rate": 0.0005733812507860396,
      "loss": 2.932,
      "step": 31128
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4003297090530396,
      "learning_rate": 0.0005733795662362139,
      "loss": 3.132,
      "step": 31129
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6839289665222168,
      "learning_rate": 0.0005733778816355617,
      "loss": 3.297,
      "step": 31130
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6444908380508423,
      "learning_rate": 0.0005733761969840833,
      "loss": 3.052,
      "step": 31131
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4855592250823975,
      "learning_rate": 0.0005733745122817793,
      "loss": 3.2459,
      "step": 31132
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.0188984870910645,
      "learning_rate": 0.0005733728275286497,
      "loss": 3.0992,
      "step": 31133
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.346003532409668,
      "learning_rate": 0.0005733711427246951,
      "loss": 3.3012,
      "step": 31134
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.2305314540863037,
      "learning_rate": 0.0005733694578699154,
      "loss": 2.8778,
      "step": 31135
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5868762731552124,
      "learning_rate": 0.0005733677729643112,
      "loss": 3.1832,
      "step": 31136
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5989420413970947,
      "learning_rate": 0.000573366088007883,
      "loss": 3.1331,
      "step": 31137
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.46443772315979,
      "learning_rate": 0.0005733644030006306,
      "loss": 3.151,
      "step": 31138
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4068621397018433,
      "learning_rate": 0.0005733627179425547,
      "loss": 3.0301,
      "step": 31139
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5970979928970337,
      "learning_rate": 0.0005733610328336556,
      "loss": 3.034,
      "step": 31140
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3369288444519043,
      "learning_rate": 0.0005733593476739334,
      "loss": 2.9704,
      "step": 31141
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7521806955337524,
      "learning_rate": 0.0005733576624633886,
      "loss": 2.8842,
      "step": 31142
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.391873836517334,
      "learning_rate": 0.0005733559772020215,
      "loss": 2.9268,
      "step": 31143
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4289641380310059,
      "learning_rate": 0.0005733542918898322,
      "loss": 3.0389,
      "step": 31144
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6764037609100342,
      "learning_rate": 0.0005733526065268213,
      "loss": 3.1494,
      "step": 31145
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3513197898864746,
      "learning_rate": 0.000573350921112989,
      "loss": 3.0986,
      "step": 31146
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7321699857711792,
      "learning_rate": 0.0005733492356483355,
      "loss": 2.9446,
      "step": 31147
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7054928541183472,
      "learning_rate": 0.0005733475501328613,
      "loss": 3.1323,
      "step": 31148
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5925942659378052,
      "learning_rate": 0.0005733458645665666,
      "loss": 2.8189,
      "step": 31149
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7172499895095825,
      "learning_rate": 0.0005733441789494519,
      "loss": 3.0751,
      "step": 31150
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8352863788604736,
      "learning_rate": 0.0005733424932815171,
      "loss": 3.3092,
      "step": 31151
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5808722972869873,
      "learning_rate": 0.000573340807562763,
      "loss": 3.2992,
      "step": 31152
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6002525091171265,
      "learning_rate": 0.0005733391217931894,
      "loss": 3.2159,
      "step": 31153
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.4165613651275635,
      "learning_rate": 0.0005733374359727972,
      "loss": 3.0621,
      "step": 31154
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2999639511108398,
      "learning_rate": 0.0005733357501015862,
      "loss": 3.0106,
      "step": 31155
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.546524167060852,
      "learning_rate": 0.0005733340641795571,
      "loss": 2.9476,
      "step": 31156
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.764624834060669,
      "learning_rate": 0.00057333237820671,
      "loss": 3.3713,
      "step": 31157
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2150599956512451,
      "learning_rate": 0.0005733306921830452,
      "loss": 3.2111,
      "step": 31158
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9633735418319702,
      "learning_rate": 0.000573329006108563,
      "loss": 2.9642,
      "step": 31159
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9820218086242676,
      "learning_rate": 0.0005733273199832638,
      "loss": 3.1878,
      "step": 31160
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.467148780822754,
      "learning_rate": 0.000573325633807148,
      "loss": 2.8192,
      "step": 31161
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8751250505447388,
      "learning_rate": 0.0005733239475802157,
      "loss": 3.0001,
      "step": 31162
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6660716533660889,
      "learning_rate": 0.0005733222613024673,
      "loss": 2.8292,
      "step": 31163
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7256826162338257,
      "learning_rate": 0.0005733205749739032,
      "loss": 3.1905,
      "step": 31164
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.871398687362671,
      "learning_rate": 0.0005733188885945237,
      "loss": 3.1329,
      "step": 31165
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9723724126815796,
      "learning_rate": 0.0005733172021643291,
      "loss": 2.9535,
      "step": 31166
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6954631805419922,
      "learning_rate": 0.0005733155156833194,
      "loss": 3.3452,
      "step": 31167
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8129161596298218,
      "learning_rate": 0.0005733138291514954,
      "loss": 3.082,
      "step": 31168
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.2151434421539307,
      "learning_rate": 0.0005733121425688572,
      "loss": 3.0166,
      "step": 31169
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5504765510559082,
      "learning_rate": 0.000573310455935405,
      "loss": 2.8433,
      "step": 31170
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3558831214904785,
      "learning_rate": 0.0005733087692511394,
      "loss": 3.3067,
      "step": 31171
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7674115896224976,
      "learning_rate": 0.0005733070825160604,
      "loss": 2.8966,
      "step": 31172
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.109144449234009,
      "learning_rate": 0.0005733053957301684,
      "loss": 3.072,
      "step": 31173
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6763044595718384,
      "learning_rate": 0.0005733037088934639,
      "loss": 3.0355,
      "step": 31174
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.65403413772583,
      "learning_rate": 0.000573302022005947,
      "loss": 3.3632,
      "step": 31175
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.4340128898620605,
      "learning_rate": 0.0005733003350676182,
      "loss": 3.1311,
      "step": 31176
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.0837795734405518,
      "learning_rate": 0.0005732986480784777,
      "loss": 2.929,
      "step": 31177
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2961761951446533,
      "learning_rate": 0.0005732969610385257,
      "loss": 3.2058,
      "step": 31178
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.912955403327942,
      "learning_rate": 0.0005732952739477626,
      "loss": 2.9614,
      "step": 31179
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.6256744861602783,
      "learning_rate": 0.0005732935868061889,
      "loss": 3.2054,
      "step": 31180
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5895713567733765,
      "learning_rate": 0.0005732918996138049,
      "loss": 3.0895,
      "step": 31181
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1470682621002197,
      "learning_rate": 0.0005732902123706104,
      "loss": 2.9093,
      "step": 31182
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6038215160369873,
      "learning_rate": 0.0005732885250766064,
      "loss": 3.1864,
      "step": 31183
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.796057105064392,
      "learning_rate": 0.0005732868377317928,
      "loss": 3.2299,
      "step": 31184
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.433689832687378,
      "learning_rate": 0.0005732851503361699,
      "loss": 2.9918,
      "step": 31185
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4042322635650635,
      "learning_rate": 0.0005732834628897382,
      "loss": 3.1736,
      "step": 31186
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.94307541847229,
      "learning_rate": 0.0005732817753924981,
      "loss": 2.8733,
      "step": 31187
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6084229946136475,
      "learning_rate": 0.0005732800878444496,
      "loss": 3.1802,
      "step": 31188
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8795878887176514,
      "learning_rate": 0.0005732784002455933,
      "loss": 3.1708,
      "step": 31189
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6833261251449585,
      "learning_rate": 0.0005732767125959293,
      "loss": 3.1609,
      "step": 31190
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6775728464126587,
      "learning_rate": 0.0005732750248954579,
      "loss": 2.943,
      "step": 31191
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.204928398132324,
      "learning_rate": 0.0005732733371441797,
      "loss": 3.2052,
      "step": 31192
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.3401732444763184,
      "learning_rate": 0.0005732716493420946,
      "loss": 2.9288,
      "step": 31193
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8014543056488037,
      "learning_rate": 0.0005732699614892033,
      "loss": 2.9594,
      "step": 31194
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4297009706497192,
      "learning_rate": 0.000573268273585506,
      "loss": 3.1819,
      "step": 31195
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7160887718200684,
      "learning_rate": 0.0005732665856310029,
      "loss": 3.1813,
      "step": 31196
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.801312804222107,
      "learning_rate": 0.0005732648976256945,
      "loss": 3.2357,
      "step": 31197
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4749635457992554,
      "learning_rate": 0.0005732632095695808,
      "loss": 3.2809,
      "step": 31198
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1229751110076904,
      "learning_rate": 0.0005732615214626624,
      "loss": 3.3192,
      "step": 31199
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9441628456115723,
      "learning_rate": 0.0005732598333049395,
      "loss": 3.0022,
      "step": 31200
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3750615119934082,
      "learning_rate": 0.0005732581450964125,
      "loss": 2.9133,
      "step": 31201
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3420910835266113,
      "learning_rate": 0.0005732564568370817,
      "loss": 2.9558,
      "step": 31202
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4794418811798096,
      "learning_rate": 0.0005732547685269472,
      "loss": 3.154,
      "step": 31203
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.500996470451355,
      "learning_rate": 0.0005732530801660096,
      "loss": 3.3283,
      "step": 31204
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5138975381851196,
      "learning_rate": 0.0005732513917542691,
      "loss": 3.0117,
      "step": 31205
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3623138666152954,
      "learning_rate": 0.0005732497032917259,
      "loss": 3.0979,
      "step": 31206
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.610543966293335,
      "learning_rate": 0.0005732480147783805,
      "loss": 2.887,
      "step": 31207
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.3904778957366943,
      "learning_rate": 0.0005732463262142332,
      "loss": 2.9199,
      "step": 31208
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4355260133743286,
      "learning_rate": 0.0005732446375992841,
      "loss": 3.1397,
      "step": 31209
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7689626216888428,
      "learning_rate": 0.0005732429489335338,
      "loss": 3.2198,
      "step": 31210
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.556570291519165,
      "learning_rate": 0.0005732412602169825,
      "loss": 3.0378,
      "step": 31211
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8778318166732788,
      "learning_rate": 0.0005732395714496303,
      "loss": 3.0848,
      "step": 31212
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3665632009506226,
      "learning_rate": 0.0005732378826314779,
      "loss": 3.1916,
      "step": 31213
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4577723741531372,
      "learning_rate": 0.0005732361937625253,
      "loss": 3.1974,
      "step": 31214
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6909853219985962,
      "learning_rate": 0.000573234504842773,
      "loss": 3.1625,
      "step": 31215
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9193881750106812,
      "learning_rate": 0.0005732328158722211,
      "loss": 3.1504,
      "step": 31216
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.627212643623352,
      "learning_rate": 0.0005732311268508702,
      "loss": 3.0664,
      "step": 31217
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5587353706359863,
      "learning_rate": 0.0005732294377787205,
      "loss": 2.9092,
      "step": 31218
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.421311616897583,
      "learning_rate": 0.0005732277486557723,
      "loss": 3.0822,
      "step": 31219
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7459086179733276,
      "learning_rate": 0.0005732260594820259,
      "loss": 3.2426,
      "step": 31220
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.3856616020202637,
      "learning_rate": 0.0005732243702574815,
      "loss": 2.959,
      "step": 31221
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5927156209945679,
      "learning_rate": 0.0005732226809821396,
      "loss": 3.0939,
      "step": 31222
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5647053718566895,
      "learning_rate": 0.0005732209916560004,
      "loss": 3.1186,
      "step": 31223
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1591954231262207,
      "learning_rate": 0.0005732193022790643,
      "loss": 3.0056,
      "step": 31224
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.5324976444244385,
      "learning_rate": 0.0005732176128513316,
      "loss": 3.0046,
      "step": 31225
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5685765743255615,
      "learning_rate": 0.0005732159233728026,
      "loss": 3.0858,
      "step": 31226
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.077235221862793,
      "learning_rate": 0.0005732142338434776,
      "loss": 2.8474,
      "step": 31227
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9825059175491333,
      "learning_rate": 0.0005732125442633568,
      "loss": 3.0971,
      "step": 31228
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9669064283370972,
      "learning_rate": 0.0005732108546324408,
      "loss": 3.209,
      "step": 31229
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.0507285594940186,
      "learning_rate": 0.0005732091649507296,
      "loss": 3.148,
      "step": 31230
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.212498426437378,
      "learning_rate": 0.0005732074752182238,
      "loss": 3.3904,
      "step": 31231
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6506518125534058,
      "learning_rate": 0.0005732057854349235,
      "loss": 2.9245,
      "step": 31232
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5113269090652466,
      "learning_rate": 0.000573204095600829,
      "loss": 2.8468,
      "step": 31233
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.2200684547424316,
      "learning_rate": 0.0005732024057159409,
      "loss": 3.1371,
      "step": 31234
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.3965320587158203,
      "learning_rate": 0.0005732007157802591,
      "loss": 3.3033,
      "step": 31235
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.528520941734314,
      "learning_rate": 0.0005731990257937842,
      "loss": 2.9327,
      "step": 31236
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.3237814903259277,
      "learning_rate": 0.0005731973357565166,
      "loss": 2.8832,
      "step": 31237
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.210951805114746,
      "learning_rate": 0.0005731956456684563,
      "loss": 2.9694,
      "step": 31238
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4761546850204468,
      "learning_rate": 0.0005731939555296037,
      "loss": 3.1826,
      "step": 31239
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9736266136169434,
      "learning_rate": 0.0005731922653399594,
      "loss": 2.8826,
      "step": 31240
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.389024496078491,
      "learning_rate": 0.0005731905750995233,
      "loss": 2.8863,
      "step": 31241
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4785441160202026,
      "learning_rate": 0.0005731888848082962,
      "loss": 2.8119,
      "step": 31242
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5480766296386719,
      "learning_rate": 0.0005731871944662778,
      "loss": 2.9918,
      "step": 31243
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.5025179386138916,
      "learning_rate": 0.0005731855040734689,
      "loss": 3.2053,
      "step": 31244
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.553016424179077,
      "learning_rate": 0.0005731838136298696,
      "loss": 3.0094,
      "step": 31245
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8986068964004517,
      "learning_rate": 0.0005731821231354803,
      "loss": 3.1411,
      "step": 31246
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.8715295791625977,
      "learning_rate": 0.0005731804325903014,
      "loss": 3.0626,
      "step": 31247
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.4923644065856934,
      "learning_rate": 0.000573178741994333,
      "loss": 3.1952,
      "step": 31248
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9999923706054688,
      "learning_rate": 0.0005731770513475755,
      "loss": 2.9496,
      "step": 31249
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4225430488586426,
      "learning_rate": 0.0005731753606500293,
      "loss": 3.1285,
      "step": 31250
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6473150253295898,
      "learning_rate": 0.0005731736699016946,
      "loss": 3.0437,
      "step": 31251
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.413131833076477,
      "learning_rate": 0.0005731719791025716,
      "loss": 3.1809,
      "step": 31252
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5282014608383179,
      "learning_rate": 0.000573170288252661,
      "loss": 3.2001,
      "step": 31253
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6161365509033203,
      "learning_rate": 0.0005731685973519628,
      "loss": 3.0627,
      "step": 31254
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.015270709991455,
      "learning_rate": 0.0005731669064004774,
      "loss": 2.9759,
      "step": 31255
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.8687853813171387,
      "learning_rate": 0.0005731652153982052,
      "loss": 3.2662,
      "step": 31256
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.3696415424346924,
      "learning_rate": 0.0005731635243451463,
      "loss": 3.0367,
      "step": 31257
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.372241497039795,
      "learning_rate": 0.0005731618332413012,
      "loss": 3.0655,
      "step": 31258
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6285405158996582,
      "learning_rate": 0.0005731601420866701,
      "loss": 3.0303,
      "step": 31259
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1399824619293213,
      "learning_rate": 0.0005731584508812535,
      "loss": 3.1992,
      "step": 31260
    },
    {
      "epoch": 0.41,
      "grad_norm": 4.036249160766602,
      "learning_rate": 0.0005731567596250515,
      "loss": 2.8166,
      "step": 31261
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.863095998764038,
      "learning_rate": 0.0005731550683180645,
      "loss": 3.0375,
      "step": 31262
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5263478755950928,
      "learning_rate": 0.0005731533769602929,
      "loss": 3.0368,
      "step": 31263
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.0515544414520264,
      "learning_rate": 0.0005731516855517368,
      "loss": 3.0201,
      "step": 31264
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.6556556224823,
      "learning_rate": 0.0005731499940923967,
      "loss": 3.0461,
      "step": 31265
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.639562964439392,
      "learning_rate": 0.0005731483025822729,
      "loss": 3.4132,
      "step": 31266
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9674166440963745,
      "learning_rate": 0.0005731466110213656,
      "loss": 2.8748,
      "step": 31267
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.541902780532837,
      "learning_rate": 0.0005731449194096753,
      "loss": 3.0238,
      "step": 31268
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.4541428089141846,
      "learning_rate": 0.0005731432277472022,
      "loss": 3.0597,
      "step": 31269
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6861350536346436,
      "learning_rate": 0.0005731415360339465,
      "loss": 2.9341,
      "step": 31270
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8567278385162354,
      "learning_rate": 0.0005731398442699087,
      "loss": 3.118,
      "step": 31271
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9914724826812744,
      "learning_rate": 0.0005731381524550891,
      "loss": 3.0616,
      "step": 31272
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.2152974605560303,
      "learning_rate": 0.0005731364605894878,
      "loss": 2.9441,
      "step": 31273
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6581876277923584,
      "learning_rate": 0.0005731347686731054,
      "loss": 3.0181,
      "step": 31274
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6587235927581787,
      "learning_rate": 0.000573133076705942,
      "loss": 3.2535,
      "step": 31275
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4664933681488037,
      "learning_rate": 0.0005731313846879982,
      "loss": 3.0638,
      "step": 31276
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3624765872955322,
      "learning_rate": 0.000573129692619274,
      "loss": 3.0882,
      "step": 31277
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.657619595527649,
      "learning_rate": 0.0005731280004997697,
      "loss": 2.9667,
      "step": 31278
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7674694061279297,
      "learning_rate": 0.0005731263083294859,
      "loss": 3.2586,
      "step": 31279
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5793472528457642,
      "learning_rate": 0.0005731246161084228,
      "loss": 3.2639,
      "step": 31280
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6424455642700195,
      "learning_rate": 0.0005731229238365806,
      "loss": 3.1098,
      "step": 31281
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.050387382507324,
      "learning_rate": 0.0005731212315139597,
      "loss": 3.0908,
      "step": 31282
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5128473043441772,
      "learning_rate": 0.0005731195391405604,
      "loss": 2.9746,
      "step": 31283
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6777719259262085,
      "learning_rate": 0.0005731178467163831,
      "loss": 2.9962,
      "step": 31284
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.2411997318267822,
      "learning_rate": 0.0005731161542414279,
      "loss": 2.9785,
      "step": 31285
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.997549057006836,
      "learning_rate": 0.0005731144617156953,
      "loss": 3.06,
      "step": 31286
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6219675540924072,
      "learning_rate": 0.0005731127691391857,
      "loss": 2.9667,
      "step": 31287
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3273556232452393,
      "learning_rate": 0.0005731110765118991,
      "loss": 2.9055,
      "step": 31288
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.1051700115203857,
      "learning_rate": 0.0005731093838338361,
      "loss": 3.0791,
      "step": 31289
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.961681842803955,
      "learning_rate": 0.0005731076911049969,
      "loss": 2.7907,
      "step": 31290
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.417466402053833,
      "learning_rate": 0.0005731059983253819,
      "loss": 2.9984,
      "step": 31291
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.900309681892395,
      "learning_rate": 0.0005731043054949911,
      "loss": 3.299,
      "step": 31292
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.562462568283081,
      "learning_rate": 0.0005731026126138253,
      "loss": 3.1104,
      "step": 31293
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.6940243244171143,
      "learning_rate": 0.0005731009196818845,
      "loss": 2.9663,
      "step": 31294
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6905308961868286,
      "learning_rate": 0.000573099226699169,
      "loss": 3.1028,
      "step": 31295
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3696333169937134,
      "learning_rate": 0.0005730975336656792,
      "loss": 3.1772,
      "step": 31296
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.2319304943084717,
      "learning_rate": 0.0005730958405814155,
      "loss": 3.2127,
      "step": 31297
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.1594619750976562,
      "learning_rate": 0.0005730941474463781,
      "loss": 3.2393,
      "step": 31298
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.749997615814209,
      "learning_rate": 0.0005730924542605674,
      "loss": 3.1466,
      "step": 31299
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3272294998168945,
      "learning_rate": 0.0005730907610239835,
      "loss": 2.9224,
      "step": 31300
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8130300045013428,
      "learning_rate": 0.000573089067736627,
      "loss": 3.1442,
      "step": 31301
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.4453470706939697,
      "learning_rate": 0.000573087374398498,
      "loss": 3.001,
      "step": 31302
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9937248229980469,
      "learning_rate": 0.000573085681009597,
      "loss": 2.982,
      "step": 31303
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.041879892349243,
      "learning_rate": 0.0005730839875699242,
      "loss": 3.2468,
      "step": 31304
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.72126042842865,
      "learning_rate": 0.0005730822940794799,
      "loss": 3.319,
      "step": 31305
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.990146279335022,
      "learning_rate": 0.0005730806005382644,
      "loss": 3.2185,
      "step": 31306
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3982566595077515,
      "learning_rate": 0.0005730789069462781,
      "loss": 3.0236,
      "step": 31307
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.343860387802124,
      "learning_rate": 0.0005730772133035213,
      "loss": 3.1487,
      "step": 31308
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.0387227535247803,
      "learning_rate": 0.0005730755196099943,
      "loss": 3.118,
      "step": 31309
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.532640218734741,
      "learning_rate": 0.0005730738258656973,
      "loss": 3.0785,
      "step": 31310
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.0215466022491455,
      "learning_rate": 0.0005730721320706309,
      "loss": 3.1142,
      "step": 31311
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4900637865066528,
      "learning_rate": 0.0005730704382247952,
      "loss": 3.2934,
      "step": 31312
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.0957486629486084,
      "learning_rate": 0.0005730687443281905,
      "loss": 3.1754,
      "step": 31313
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6942089796066284,
      "learning_rate": 0.000573067050380817,
      "loss": 3.133,
      "step": 31314
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5518132448196411,
      "learning_rate": 0.0005730653563826754,
      "loss": 2.9759,
      "step": 31315
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7590324878692627,
      "learning_rate": 0.0005730636623337658,
      "loss": 3.0082,
      "step": 31316
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.940665364265442,
      "learning_rate": 0.0005730619682340884,
      "loss": 3.0295,
      "step": 31317
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6731079816818237,
      "learning_rate": 0.0005730602740836437,
      "loss": 3.3205,
      "step": 31318
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.681252121925354,
      "learning_rate": 0.0005730585798824319,
      "loss": 3.2268,
      "step": 31319
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.3550925254821777,
      "learning_rate": 0.0005730568856304534,
      "loss": 3.1918,
      "step": 31320
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5547020435333252,
      "learning_rate": 0.0005730551913277084,
      "loss": 3.0548,
      "step": 31321
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6809488534927368,
      "learning_rate": 0.0005730534969741974,
      "loss": 3.2428,
      "step": 31322
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.380271553993225,
      "learning_rate": 0.0005730518025699204,
      "loss": 3.0897,
      "step": 31323
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7701143026351929,
      "learning_rate": 0.0005730501081148781,
      "loss": 3.1096,
      "step": 31324
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.547909140586853,
      "learning_rate": 0.0005730484136090707,
      "loss": 3.1623,
      "step": 31325
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3937759399414062,
      "learning_rate": 0.0005730467190524982,
      "loss": 3.0326,
      "step": 31326
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9112699031829834,
      "learning_rate": 0.0005730450244451613,
      "loss": 3.0723,
      "step": 31327
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2764261960983276,
      "learning_rate": 0.0005730433297870602,
      "loss": 2.8094,
      "step": 31328
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.550967812538147,
      "learning_rate": 0.000573041635078195,
      "loss": 2.9318,
      "step": 31329
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5906330347061157,
      "learning_rate": 0.0005730399403185664,
      "loss": 2.9826,
      "step": 31330
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2730145454406738,
      "learning_rate": 0.0005730382455081746,
      "loss": 3.0143,
      "step": 31331
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.45158851146698,
      "learning_rate": 0.0005730365506470196,
      "loss": 2.7899,
      "step": 31332
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4985140562057495,
      "learning_rate": 0.0005730348557351021,
      "loss": 3.1319,
      "step": 31333
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4793776273727417,
      "learning_rate": 0.0005730331607724222,
      "loss": 3.4751,
      "step": 31334
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3487269878387451,
      "learning_rate": 0.0005730314657589803,
      "loss": 3.0338,
      "step": 31335
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.0442538261413574,
      "learning_rate": 0.0005730297706947767,
      "loss": 2.8118,
      "step": 31336
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.35968816280365,
      "learning_rate": 0.0005730280755798118,
      "loss": 3.2361,
      "step": 31337
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.580193042755127,
      "learning_rate": 0.0005730263804140857,
      "loss": 3.1391,
      "step": 31338
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3957921266555786,
      "learning_rate": 0.0005730246851975988,
      "loss": 3.1039,
      "step": 31339
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6086018085479736,
      "learning_rate": 0.0005730229899303516,
      "loss": 3.0156,
      "step": 31340
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.487369179725647,
      "learning_rate": 0.0005730212946123442,
      "loss": 2.8457,
      "step": 31341
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4265128374099731,
      "learning_rate": 0.000573019599243577,
      "loss": 3.1577,
      "step": 31342
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6990904808044434,
      "learning_rate": 0.0005730179038240504,
      "loss": 3.0706,
      "step": 31343
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7808078527450562,
      "learning_rate": 0.0005730162083537644,
      "loss": 3.0548,
      "step": 31344
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5992094278335571,
      "learning_rate": 0.0005730145128327197,
      "loss": 2.851,
      "step": 31345
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3904610872268677,
      "learning_rate": 0.0005730128172609165,
      "loss": 3.1714,
      "step": 31346
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5138417482376099,
      "learning_rate": 0.0005730111216383549,
      "loss": 3.1543,
      "step": 31347
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9195805788040161,
      "learning_rate": 0.0005730094259650355,
      "loss": 3.0065,
      "step": 31348
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.491755723953247,
      "learning_rate": 0.0005730077302409584,
      "loss": 3.0595,
      "step": 31349
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5589334964752197,
      "learning_rate": 0.000573006034466124,
      "loss": 3.1355,
      "step": 31350
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6380658149719238,
      "learning_rate": 0.0005730043386405327,
      "loss": 3.029,
      "step": 31351
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.2917685508728027,
      "learning_rate": 0.0005730026427641847,
      "loss": 3.3447,
      "step": 31352
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.2617623805999756,
      "learning_rate": 0.0005730009468370805,
      "loss": 3.2355,
      "step": 31353
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5179606676101685,
      "learning_rate": 0.00057299925085922,
      "loss": 3.1571,
      "step": 31354
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5833216905593872,
      "learning_rate": 0.000572997554830604,
      "loss": 3.036,
      "step": 31355
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1790473461151123,
      "learning_rate": 0.0005729958587512325,
      "loss": 2.9742,
      "step": 31356
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2699921131134033,
      "learning_rate": 0.000572994162621106,
      "loss": 3.1754,
      "step": 31357
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1004767417907715,
      "learning_rate": 0.0005729924664402247,
      "loss": 2.9682,
      "step": 31358
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8819119930267334,
      "learning_rate": 0.0005729907702085889,
      "loss": 2.9825,
      "step": 31359
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.672402024269104,
      "learning_rate": 0.0005729890739261989,
      "loss": 2.8294,
      "step": 31360
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6690844297409058,
      "learning_rate": 0.0005729873775930553,
      "loss": 2.9077,
      "step": 31361
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.8934855461120605,
      "learning_rate": 0.000572985681209158,
      "loss": 3.2011,
      "step": 31362
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6882354021072388,
      "learning_rate": 0.0005729839847745076,
      "loss": 3.1735,
      "step": 31363
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.480319619178772,
      "learning_rate": 0.0005729822882891043,
      "loss": 3.2984,
      "step": 31364
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5965702533721924,
      "learning_rate": 0.0005729805917529484,
      "loss": 2.9308,
      "step": 31365
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4414312839508057,
      "learning_rate": 0.0005729788951660403,
      "loss": 3.0476,
      "step": 31366
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5337413549423218,
      "learning_rate": 0.0005729771985283803,
      "loss": 3.0189,
      "step": 31367
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.369435429573059,
      "learning_rate": 0.0005729755018399687,
      "loss": 3.1124,
      "step": 31368
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4335265159606934,
      "learning_rate": 0.0005729738051008056,
      "loss": 3.1547,
      "step": 31369
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8790247440338135,
      "learning_rate": 0.0005729721083108917,
      "loss": 3.0838,
      "step": 31370
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3565117120742798,
      "learning_rate": 0.000572970411470227,
      "loss": 3.1746,
      "step": 31371
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.192492961883545,
      "learning_rate": 0.0005729687145788121,
      "loss": 3.1635,
      "step": 31372
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.74504816532135,
      "learning_rate": 0.0005729670176366471,
      "loss": 3.0809,
      "step": 31373
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5203969478607178,
      "learning_rate": 0.0005729653206437324,
      "loss": 2.9226,
      "step": 31374
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.417301058769226,
      "learning_rate": 0.0005729636236000683,
      "loss": 2.9209,
      "step": 31375
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.0537095069885254,
      "learning_rate": 0.0005729619265056552,
      "loss": 3.2152,
      "step": 31376
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4056588411331177,
      "learning_rate": 0.0005729602293604931,
      "loss": 3.1806,
      "step": 31377
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3925349712371826,
      "learning_rate": 0.0005729585321645826,
      "loss": 2.9716,
      "step": 31378
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.9911630153656006,
      "learning_rate": 0.0005729568349179241,
      "loss": 3.1764,
      "step": 31379
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9331899881362915,
      "learning_rate": 0.0005729551376205177,
      "loss": 3.184,
      "step": 31380
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4407740831375122,
      "learning_rate": 0.0005729534402723637,
      "loss": 3.1684,
      "step": 31381
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3933361768722534,
      "learning_rate": 0.0005729517428734627,
      "loss": 2.9717,
      "step": 31382
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1827855110168457,
      "learning_rate": 0.0005729500454238147,
      "loss": 3.241,
      "step": 31383
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6388989686965942,
      "learning_rate": 0.0005729483479234201,
      "loss": 2.8562,
      "step": 31384
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.579086184501648,
      "learning_rate": 0.0005729466503722793,
      "loss": 3.1685,
      "step": 31385
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4216246604919434,
      "learning_rate": 0.0005729449527703925,
      "loss": 3.2899,
      "step": 31386
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.715330123901367,
      "learning_rate": 0.0005729432551177602,
      "loss": 3.248,
      "step": 31387
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.37942636013031,
      "learning_rate": 0.0005729415574143824,
      "loss": 3.0381,
      "step": 31388
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5500848293304443,
      "learning_rate": 0.0005729398596602598,
      "loss": 2.862,
      "step": 31389
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7394903898239136,
      "learning_rate": 0.0005729381618553924,
      "loss": 3.365,
      "step": 31390
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5117985010147095,
      "learning_rate": 0.0005729364639997807,
      "loss": 3.2717,
      "step": 31391
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6698200702667236,
      "learning_rate": 0.000572934766093425,
      "loss": 3.2416,
      "step": 31392
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.557463526725769,
      "learning_rate": 0.0005729330681363254,
      "loss": 3.1577,
      "step": 31393
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5112950801849365,
      "learning_rate": 0.0005729313701284826,
      "loss": 2.9086,
      "step": 31394
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4304600954055786,
      "learning_rate": 0.0005729296720698967,
      "loss": 2.9395,
      "step": 31395
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6521483659744263,
      "learning_rate": 0.000572927973960568,
      "loss": 2.9787,
      "step": 31396
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6151176691055298,
      "learning_rate": 0.0005729262758004968,
      "loss": 3.1289,
      "step": 31397
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6647270917892456,
      "learning_rate": 0.0005729245775896834,
      "loss": 3.0842,
      "step": 31398
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3027054071426392,
      "learning_rate": 0.0005729228793281282,
      "loss": 3.0942,
      "step": 31399
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2669802904129028,
      "learning_rate": 0.0005729211810158316,
      "loss": 3.1764,
      "step": 31400
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2781423330307007,
      "learning_rate": 0.0005729194826527936,
      "loss": 2.8006,
      "step": 31401
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.819385290145874,
      "learning_rate": 0.0005729177842390148,
      "loss": 3.2188,
      "step": 31402
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.82468843460083,
      "learning_rate": 0.0005729160857744955,
      "loss": 3.104,
      "step": 31403
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5129846334457397,
      "learning_rate": 0.0005729143872592359,
      "loss": 3.0541,
      "step": 31404
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3057408332824707,
      "learning_rate": 0.0005729126886932364,
      "loss": 3.189,
      "step": 31405
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5228811502456665,
      "learning_rate": 0.0005729109900764972,
      "loss": 3.0648,
      "step": 31406
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5606597661972046,
      "learning_rate": 0.0005729092914090187,
      "loss": 3.1999,
      "step": 31407
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5770142078399658,
      "learning_rate": 0.0005729075926908013,
      "loss": 3.1032,
      "step": 31408
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5626791715621948,
      "learning_rate": 0.0005729058939218451,
      "loss": 3.0803,
      "step": 31409
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5568499565124512,
      "learning_rate": 0.0005729041951021506,
      "loss": 3.0211,
      "step": 31410
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.391644835472107,
      "learning_rate": 0.0005729024962317182,
      "loss": 3.176,
      "step": 31411
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.508907437324524,
      "learning_rate": 0.0005729007973105479,
      "loss": 3.1008,
      "step": 31412
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4591662883758545,
      "learning_rate": 0.0005728990983386403,
      "loss": 3.2511,
      "step": 31413
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5326735973358154,
      "learning_rate": 0.0005728973993159955,
      "loss": 3.1084,
      "step": 31414
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3524271249771118,
      "learning_rate": 0.000572895700242614,
      "loss": 3.0384,
      "step": 31415
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5975645780563354,
      "learning_rate": 0.000572894001118496,
      "loss": 3.0603,
      "step": 31416
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.677772045135498,
      "learning_rate": 0.0005728923019436418,
      "loss": 3.0267,
      "step": 31417
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2657819986343384,
      "learning_rate": 0.0005728906027180518,
      "loss": 3.1126,
      "step": 31418
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7369493246078491,
      "learning_rate": 0.0005728889034417264,
      "loss": 3.1181,
      "step": 31419
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4163367748260498,
      "learning_rate": 0.0005728872041146657,
      "loss": 2.9996,
      "step": 31420
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.624141812324524,
      "learning_rate": 0.0005728855047368702,
      "loss": 3.0822,
      "step": 31421
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.186021089553833,
      "learning_rate": 0.00057288380530834,
      "loss": 3.1,
      "step": 31422
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3452335596084595,
      "learning_rate": 0.0005728821058290757,
      "loss": 3.2029,
      "step": 31423
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9685906171798706,
      "learning_rate": 0.0005728804062990773,
      "loss": 3.0218,
      "step": 31424
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.379498839378357,
      "learning_rate": 0.0005728787067183454,
      "loss": 3.1489,
      "step": 31425
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4494259357452393,
      "learning_rate": 0.0005728770070868802,
      "loss": 3.042,
      "step": 31426
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7094026803970337,
      "learning_rate": 0.0005728753074046819,
      "loss": 3.1488,
      "step": 31427
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6051169633865356,
      "learning_rate": 0.000572873607671751,
      "loss": 3.1443,
      "step": 31428
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7242810726165771,
      "learning_rate": 0.0005728719078880878,
      "loss": 3.141,
      "step": 31429
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.732712745666504,
      "learning_rate": 0.0005728702080536925,
      "loss": 2.9496,
      "step": 31430
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.37839674949646,
      "learning_rate": 0.0005728685081685655,
      "loss": 3.0667,
      "step": 31431
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3712056875228882,
      "learning_rate": 0.0005728668082327071,
      "loss": 3.0421,
      "step": 31432
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.47496497631073,
      "learning_rate": 0.0005728651082461176,
      "loss": 3.1341,
      "step": 31433
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.594963788986206,
      "learning_rate": 0.0005728634082087973,
      "loss": 3.0175,
      "step": 31434
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5386496782302856,
      "learning_rate": 0.0005728617081207466,
      "loss": 2.9997,
      "step": 31435
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.248408079147339,
      "learning_rate": 0.0005728600079819658,
      "loss": 2.9701,
      "step": 31436
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.316160798072815,
      "learning_rate": 0.000572858307792455,
      "loss": 2.9591,
      "step": 31437
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8258922100067139,
      "learning_rate": 0.0005728566075522148,
      "loss": 3.282,
      "step": 31438
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.4494400024414062,
      "learning_rate": 0.0005728549072612454,
      "loss": 3.1377,
      "step": 31439
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9980807304382324,
      "learning_rate": 0.0005728532069195472,
      "loss": 3.1743,
      "step": 31440
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4545645713806152,
      "learning_rate": 0.0005728515065271203,
      "loss": 2.945,
      "step": 31441
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4598987102508545,
      "learning_rate": 0.0005728498060839652,
      "loss": 2.9322,
      "step": 31442
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.564688801765442,
      "learning_rate": 0.0005728481055900822,
      "loss": 3.239,
      "step": 31443
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.146939992904663,
      "learning_rate": 0.0005728464050454715,
      "loss": 2.9834,
      "step": 31444
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5725855827331543,
      "learning_rate": 0.0005728447044501336,
      "loss": 3.0737,
      "step": 31445
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.942413091659546,
      "learning_rate": 0.0005728430038040687,
      "loss": 3.0923,
      "step": 31446
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7275164127349854,
      "learning_rate": 0.0005728413031072771,
      "loss": 3.3402,
      "step": 31447
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3227224349975586,
      "learning_rate": 0.0005728396023597592,
      "loss": 3.0439,
      "step": 31448
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4429891109466553,
      "learning_rate": 0.0005728379015615152,
      "loss": 3.0968,
      "step": 31449
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6003299951553345,
      "learning_rate": 0.0005728362007125455,
      "loss": 3.0942,
      "step": 31450
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.4827609062194824,
      "learning_rate": 0.0005728344998128504,
      "loss": 3.1094,
      "step": 31451
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.131866931915283,
      "learning_rate": 0.0005728327988624303,
      "loss": 3.2267,
      "step": 31452
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8475241661071777,
      "learning_rate": 0.0005728310978612854,
      "loss": 2.984,
      "step": 31453
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5019452571868896,
      "learning_rate": 0.000572829396809416,
      "loss": 3.2246,
      "step": 31454
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6995511054992676,
      "learning_rate": 0.0005728276957068225,
      "loss": 3.0517,
      "step": 31455
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6456515789031982,
      "learning_rate": 0.0005728259945535052,
      "loss": 3.2042,
      "step": 31456
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4924890995025635,
      "learning_rate": 0.0005728242933494644,
      "loss": 3.3666,
      "step": 31457
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.371147632598877,
      "learning_rate": 0.0005728225920947003,
      "loss": 2.9105,
      "step": 31458
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2247074842453003,
      "learning_rate": 0.0005728208907892134,
      "loss": 3.3024,
      "step": 31459
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4996601343154907,
      "learning_rate": 0.0005728191894330039,
      "loss": 3.1217,
      "step": 31460
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4854789972305298,
      "learning_rate": 0.0005728174880260722,
      "loss": 3.3958,
      "step": 31461
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.0453217029571533,
      "learning_rate": 0.0005728157865684186,
      "loss": 2.8938,
      "step": 31462
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.59364652633667,
      "learning_rate": 0.0005728140850600435,
      "loss": 3.3807,
      "step": 31463
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3730621337890625,
      "learning_rate": 0.0005728123835009469,
      "loss": 3.1445,
      "step": 31464
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.464069366455078,
      "learning_rate": 0.0005728106818911295,
      "loss": 2.9635,
      "step": 31465
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5194170475006104,
      "learning_rate": 0.0005728089802305914,
      "loss": 3.1431,
      "step": 31466
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5958763360977173,
      "learning_rate": 0.000572807278519333,
      "loss": 2.9929,
      "step": 31467
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5313674211502075,
      "learning_rate": 0.0005728055767573545,
      "loss": 3.0696,
      "step": 31468
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2700344324111938,
      "learning_rate": 0.0005728038749446562,
      "loss": 2.948,
      "step": 31469
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6240748167037964,
      "learning_rate": 0.0005728021730812387,
      "loss": 2.9544,
      "step": 31470
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2694545984268188,
      "learning_rate": 0.000572800471167102,
      "loss": 3.1123,
      "step": 31471
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4159283638000488,
      "learning_rate": 0.0005727987692022466,
      "loss": 3.055,
      "step": 31472
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4033116102218628,
      "learning_rate": 0.0005727970671866728,
      "loss": 3.1057,
      "step": 31473
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5562595129013062,
      "learning_rate": 0.0005727953651203808,
      "loss": 3.1374,
      "step": 31474
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6077148914337158,
      "learning_rate": 0.000572793663003371,
      "loss": 3.1292,
      "step": 31475
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.181889295578003,
      "learning_rate": 0.0005727919608356438,
      "loss": 3.1872,
      "step": 31476
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4826998710632324,
      "learning_rate": 0.0005727902586171994,
      "loss": 3.1558,
      "step": 31477
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.00297212600708,
      "learning_rate": 0.000572788556348038,
      "loss": 2.9049,
      "step": 31478
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7692501544952393,
      "learning_rate": 0.0005727868540281601,
      "loss": 3.1103,
      "step": 31479
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5338572263717651,
      "learning_rate": 0.0005727851516575661,
      "loss": 2.7409,
      "step": 31480
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4062680006027222,
      "learning_rate": 0.000572783449236256,
      "loss": 3.0881,
      "step": 31481
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7290705442428589,
      "learning_rate": 0.0005727817467642305,
      "loss": 2.9161,
      "step": 31482
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.826377511024475,
      "learning_rate": 0.0005727800442414897,
      "loss": 3.0865,
      "step": 31483
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4575248956680298,
      "learning_rate": 0.0005727783416680338,
      "loss": 2.8324,
      "step": 31484
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6712191104888916,
      "learning_rate": 0.0005727766390438634,
      "loss": 3.2202,
      "step": 31485
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5038602352142334,
      "learning_rate": 0.0005727749363689786,
      "loss": 2.9439,
      "step": 31486
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3973115682601929,
      "learning_rate": 0.0005727732336433798,
      "loss": 3.3783,
      "step": 31487
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6732711791992188,
      "learning_rate": 0.0005727715308670673,
      "loss": 2.9816,
      "step": 31488
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6951801776885986,
      "learning_rate": 0.0005727698280400415,
      "loss": 2.9142,
      "step": 31489
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6802940368652344,
      "learning_rate": 0.0005727681251623026,
      "loss": 3.1072,
      "step": 31490
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.255631685256958,
      "learning_rate": 0.0005727664222338509,
      "loss": 3.3815,
      "step": 31491
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9108717441558838,
      "learning_rate": 0.0005727647192546868,
      "loss": 3.0952,
      "step": 31492
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3849478960037231,
      "learning_rate": 0.0005727630162248106,
      "loss": 3.1099,
      "step": 31493
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1735360622406006,
      "learning_rate": 0.0005727613131442225,
      "loss": 3.2538,
      "step": 31494
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8413752317428589,
      "learning_rate": 0.0005727596100129231,
      "loss": 2.9141,
      "step": 31495
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5434293746948242,
      "learning_rate": 0.0005727579068309125,
      "loss": 2.8802,
      "step": 31496
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3422088623046875,
      "learning_rate": 0.0005727562035981912,
      "loss": 3.0239,
      "step": 31497
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5810425281524658,
      "learning_rate": 0.0005727545003147591,
      "loss": 3.1411,
      "step": 31498
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.519448161125183,
      "learning_rate": 0.0005727527969806167,
      "loss": 2.9483,
      "step": 31499
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.782837986946106,
      "learning_rate": 0.0005727510935957647,
      "loss": 3.2049,
      "step": 31500
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.571653127670288,
      "learning_rate": 0.000572749390160203,
      "loss": 3.0506,
      "step": 31501
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7826480865478516,
      "learning_rate": 0.0005727476866739321,
      "loss": 3.004,
      "step": 31502
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5982352495193481,
      "learning_rate": 0.0005727459831369522,
      "loss": 3.0502,
      "step": 31503
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5597988367080688,
      "learning_rate": 0.0005727442795492638,
      "loss": 3.1525,
      "step": 31504
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8667149543762207,
      "learning_rate": 0.0005727425759108668,
      "loss": 3.0058,
      "step": 31505
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2816871404647827,
      "learning_rate": 0.000572740872221762,
      "loss": 2.9632,
      "step": 31506
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5111232995986938,
      "learning_rate": 0.0005727391684819496,
      "loss": 2.9974,
      "step": 31507
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1506502628326416,
      "learning_rate": 0.0005727374646914298,
      "loss": 3.1724,
      "step": 31508
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7767084836959839,
      "learning_rate": 0.0005727357608502028,
      "loss": 2.8951,
      "step": 31509
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7009124755859375,
      "learning_rate": 0.0005727340569582692,
      "loss": 3.4785,
      "step": 31510
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.480952501296997,
      "learning_rate": 0.0005727323530156293,
      "loss": 3.1634,
      "step": 31511
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.772855520248413,
      "learning_rate": 0.0005727306490222831,
      "loss": 3.2145,
      "step": 31512
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9225293397903442,
      "learning_rate": 0.0005727289449782312,
      "loss": 3.3021,
      "step": 31513
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4543014764785767,
      "learning_rate": 0.0005727272408834737,
      "loss": 3.1197,
      "step": 31514
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3675199747085571,
      "learning_rate": 0.0005727255367380113,
      "loss": 3.2,
      "step": 31515
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.544592022895813,
      "learning_rate": 0.0005727238325418441,
      "loss": 3.014,
      "step": 31516
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.482102394104004,
      "learning_rate": 0.0005727221282949721,
      "loss": 3.1721,
      "step": 31517
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.601933479309082,
      "learning_rate": 0.0005727204239973962,
      "loss": 3.2102,
      "step": 31518
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7712873220443726,
      "learning_rate": 0.0005727187196491163,
      "loss": 3.0667,
      "step": 31519
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.7593929767608643,
      "learning_rate": 0.0005727170152501328,
      "loss": 3.2636,
      "step": 31520
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4760559797286987,
      "learning_rate": 0.0005727153108004462,
      "loss": 2.9832,
      "step": 31521
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4903252124786377,
      "learning_rate": 0.0005727136063000565,
      "loss": 3.0984,
      "step": 31522
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4144822359085083,
      "learning_rate": 0.0005727119017489643,
      "loss": 3.1454,
      "step": 31523
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9651446342468262,
      "learning_rate": 0.0005727101971471697,
      "loss": 3.4651,
      "step": 31524
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8673911094665527,
      "learning_rate": 0.0005727084924946733,
      "loss": 3.2827,
      "step": 31525
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.376102089881897,
      "learning_rate": 0.000572706787791475,
      "loss": 2.9461,
      "step": 31526
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6658471822738647,
      "learning_rate": 0.0005727050830375756,
      "loss": 3.2205,
      "step": 31527
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.0468368530273438,
      "learning_rate": 0.000572703378232975,
      "loss": 3.0296,
      "step": 31528
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6577972173690796,
      "learning_rate": 0.0005727016733776738,
      "loss": 2.962,
      "step": 31529
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5989594459533691,
      "learning_rate": 0.0005726999684716722,
      "loss": 3.0469,
      "step": 31530
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9534422159194946,
      "learning_rate": 0.0005726982635149705,
      "loss": 2.9517,
      "step": 31531
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6778334379196167,
      "learning_rate": 0.0005726965585075691,
      "loss": 2.8535,
      "step": 31532
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6853549480438232,
      "learning_rate": 0.0005726948534494683,
      "loss": 3.147,
      "step": 31533
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5161153078079224,
      "learning_rate": 0.0005726931483406683,
      "loss": 2.853,
      "step": 31534
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6697475910186768,
      "learning_rate": 0.0005726914431811695,
      "loss": 3.0806,
      "step": 31535
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5872784852981567,
      "learning_rate": 0.0005726897379709721,
      "loss": 3.3414,
      "step": 31536
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9029468297958374,
      "learning_rate": 0.0005726880327100767,
      "loss": 2.8911,
      "step": 31537
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.599751591682434,
      "learning_rate": 0.0005726863273984834,
      "loss": 2.9789,
      "step": 31538
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6165262460708618,
      "learning_rate": 0.0005726846220361926,
      "loss": 2.8665,
      "step": 31539
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7322546243667603,
      "learning_rate": 0.0005726829166232046,
      "loss": 3.2175,
      "step": 31540
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.347757339477539,
      "learning_rate": 0.0005726812111595196,
      "loss": 3.2025,
      "step": 31541
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7633064985275269,
      "learning_rate": 0.000572679505645138,
      "loss": 3.0303,
      "step": 31542
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9763872623443604,
      "learning_rate": 0.0005726778000800602,
      "loss": 2.9729,
      "step": 31543
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.3017358779907227,
      "learning_rate": 0.0005726760944642864,
      "loss": 3.0611,
      "step": 31544
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.0435447692871094,
      "learning_rate": 0.000572674388797817,
      "loss": 3.0458,
      "step": 31545
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6419497728347778,
      "learning_rate": 0.0005726726830806522,
      "loss": 3.0028,
      "step": 31546
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.795305013656616,
      "learning_rate": 0.0005726709773127927,
      "loss": 3.1758,
      "step": 31547
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.23665189743042,
      "learning_rate": 0.0005726692714942383,
      "loss": 3.0253,
      "step": 31548
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.9219889640808105,
      "learning_rate": 0.0005726675656249895,
      "loss": 3.053,
      "step": 31549
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4664690494537354,
      "learning_rate": 0.0005726658597050467,
      "loss": 2.8983,
      "step": 31550
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.4890682697296143,
      "learning_rate": 0.0005726641537344101,
      "loss": 3.1384,
      "step": 31551
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.7099058628082275,
      "learning_rate": 0.0005726624477130802,
      "loss": 3.1419,
      "step": 31552
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.406010627746582,
      "learning_rate": 0.0005726607416410572,
      "loss": 2.9876,
      "step": 31553
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6940438747406006,
      "learning_rate": 0.0005726590355183414,
      "loss": 3.0057,
      "step": 31554
    },
    {
      "epoch": 0.41,
      "grad_norm": 4.712591171264648,
      "learning_rate": 0.000572657329344933,
      "loss": 2.7648,
      "step": 31555
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.3389930725097656,
      "learning_rate": 0.0005726556231208326,
      "loss": 3.062,
      "step": 31556
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7040064334869385,
      "learning_rate": 0.0005726539168460404,
      "loss": 3.1468,
      "step": 31557
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1532278060913086,
      "learning_rate": 0.0005726522105205566,
      "loss": 3.0213,
      "step": 31558
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5316729545593262,
      "learning_rate": 0.0005726505041443817,
      "loss": 3.1474,
      "step": 31559
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9099563360214233,
      "learning_rate": 0.0005726487977175158,
      "loss": 2.9741,
      "step": 31560
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.457350492477417,
      "learning_rate": 0.0005726470912399594,
      "loss": 2.9946,
      "step": 31561
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.323028326034546,
      "learning_rate": 0.0005726453847117128,
      "loss": 2.8826,
      "step": 31562
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6201729774475098,
      "learning_rate": 0.0005726436781327762,
      "loss": 2.7878,
      "step": 31563
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6171671152114868,
      "learning_rate": 0.0005726419715031501,
      "loss": 2.8318,
      "step": 31564
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6788111925125122,
      "learning_rate": 0.0005726402648228346,
      "loss": 3.2136,
      "step": 31565
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3266531229019165,
      "learning_rate": 0.0005726385580918301,
      "loss": 3.222,
      "step": 31566
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4800934791564941,
      "learning_rate": 0.000572636851310137,
      "loss": 3.0253,
      "step": 31567
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.742088794708252,
      "learning_rate": 0.0005726351444777557,
      "loss": 3.2589,
      "step": 31568
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5769174098968506,
      "learning_rate": 0.0005726334375946862,
      "loss": 3.1177,
      "step": 31569
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.33884334564209,
      "learning_rate": 0.0005726317306609291,
      "loss": 3.0223,
      "step": 31570
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5974622964859009,
      "learning_rate": 0.0005726300236764846,
      "loss": 3.3233,
      "step": 31571
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5154550075531006,
      "learning_rate": 0.0005726283166413529,
      "loss": 3.0444,
      "step": 31572
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.581376314163208,
      "learning_rate": 0.0005726266095555346,
      "loss": 3.1727,
      "step": 31573
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8095109462738037,
      "learning_rate": 0.0005726249024190299,
      "loss": 3.1272,
      "step": 31574
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.542132019996643,
      "learning_rate": 0.000572623195231839,
      "loss": 2.9283,
      "step": 31575
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.369041919708252,
      "learning_rate": 0.0005726214879939623,
      "loss": 3.0087,
      "step": 31576
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3612030744552612,
      "learning_rate": 0.0005726197807054002,
      "loss": 2.938,
      "step": 31577
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.978524088859558,
      "learning_rate": 0.0005726180733661528,
      "loss": 3.0099,
      "step": 31578
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.3931751251220703,
      "learning_rate": 0.0005726163659762206,
      "loss": 3.078,
      "step": 31579
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.2727203369140625,
      "learning_rate": 0.0005726146585356039,
      "loss": 2.8631,
      "step": 31580
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7264853715896606,
      "learning_rate": 0.000572612951044303,
      "loss": 3.1316,
      "step": 31581
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3756036758422852,
      "learning_rate": 0.0005726112435023182,
      "loss": 2.9901,
      "step": 31582
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5535014867782593,
      "learning_rate": 0.0005726095359096498,
      "loss": 3.0714,
      "step": 31583
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8733952045440674,
      "learning_rate": 0.0005726078282662981,
      "loss": 2.9006,
      "step": 31584
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.508529543876648,
      "learning_rate": 0.0005726061205722636,
      "loss": 3.1111,
      "step": 31585
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4045655727386475,
      "learning_rate": 0.0005726044128275463,
      "loss": 3.1337,
      "step": 31586
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.450631856918335,
      "learning_rate": 0.0005726027050321469,
      "loss": 3.193,
      "step": 31587
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3495829105377197,
      "learning_rate": 0.0005726009971860653,
      "loss": 2.9512,
      "step": 31588
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7993736267089844,
      "learning_rate": 0.0005725992892893022,
      "loss": 3.208,
      "step": 31589
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6215928792953491,
      "learning_rate": 0.0005725975813418577,
      "loss": 3.379,
      "step": 31590
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.844073534011841,
      "learning_rate": 0.0005725958733437321,
      "loss": 3.1998,
      "step": 31591
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4242995977401733,
      "learning_rate": 0.0005725941652949258,
      "loss": 3.3036,
      "step": 31592
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7416564226150513,
      "learning_rate": 0.0005725924571954391,
      "loss": 3.0472,
      "step": 31593
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6618531942367554,
      "learning_rate": 0.0005725907490452723,
      "loss": 3.2089,
      "step": 31594
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4795589447021484,
      "learning_rate": 0.0005725890408444258,
      "loss": 3.0897,
      "step": 31595
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6693631410598755,
      "learning_rate": 0.0005725873325928999,
      "loss": 2.9245,
      "step": 31596
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5094586610794067,
      "learning_rate": 0.0005725856242906948,
      "loss": 3.3714,
      "step": 31597
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3523814678192139,
      "learning_rate": 0.0005725839159378108,
      "loss": 2.8423,
      "step": 31598
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.766237497329712,
      "learning_rate": 0.0005725822075342483,
      "loss": 2.916,
      "step": 31599
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.514265298843384,
      "learning_rate": 0.0005725804990800077,
      "loss": 3.3176,
      "step": 31600
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4519755840301514,
      "learning_rate": 0.0005725787905750894,
      "loss": 3.3134,
      "step": 31601
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.909860372543335,
      "learning_rate": 0.0005725770820194933,
      "loss": 3.2416,
      "step": 31602
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6454508304595947,
      "learning_rate": 0.0005725753734132201,
      "loss": 3.0731,
      "step": 31603
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7130331993103027,
      "learning_rate": 0.00057257366475627,
      "loss": 2.8294,
      "step": 31604
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.569146990776062,
      "learning_rate": 0.0005725719560486433,
      "loss": 3.0347,
      "step": 31605
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4869310855865479,
      "learning_rate": 0.0005725702472903402,
      "loss": 3.2171,
      "step": 31606
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.381476879119873,
      "learning_rate": 0.0005725685384813614,
      "loss": 2.922,
      "step": 31607
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1534714698791504,
      "learning_rate": 0.0005725668296217068,
      "loss": 3.0926,
      "step": 31608
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4869461059570312,
      "learning_rate": 0.0005725651207113768,
      "loss": 3.1256,
      "step": 31609
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.617844581604004,
      "learning_rate": 0.000572563411750372,
      "loss": 3.117,
      "step": 31610
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5050897598266602,
      "learning_rate": 0.0005725617027386924,
      "loss": 3.1124,
      "step": 31611
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3949984312057495,
      "learning_rate": 0.0005725599936763384,
      "loss": 3.2596,
      "step": 31612
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3217190504074097,
      "learning_rate": 0.0005725582845633104,
      "loss": 3.1611,
      "step": 31613
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3325272798538208,
      "learning_rate": 0.0005725565753996086,
      "loss": 2.7538,
      "step": 31614
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.641531229019165,
      "learning_rate": 0.0005725548661852335,
      "loss": 2.9793,
      "step": 31615
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.199947714805603,
      "learning_rate": 0.0005725531569201852,
      "loss": 3.1206,
      "step": 31616
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5681827068328857,
      "learning_rate": 0.0005725514476044643,
      "loss": 3.0644,
      "step": 31617
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6745336055755615,
      "learning_rate": 0.0005725497382380708,
      "loss": 3.0299,
      "step": 31618
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3922021389007568,
      "learning_rate": 0.0005725480288210051,
      "loss": 3.2812,
      "step": 31619
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7836859226226807,
      "learning_rate": 0.0005725463193532677,
      "loss": 3.1848,
      "step": 31620
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9126039743423462,
      "learning_rate": 0.0005725446098348589,
      "loss": 3.2401,
      "step": 31621
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.470543622970581,
      "learning_rate": 0.0005725429002657787,
      "loss": 2.8848,
      "step": 31622
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.79986572265625,
      "learning_rate": 0.0005725411906460277,
      "loss": 3.0151,
      "step": 31623
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1724038124084473,
      "learning_rate": 0.0005725394809756061,
      "loss": 3.2906,
      "step": 31624
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9978440999984741,
      "learning_rate": 0.0005725377712545143,
      "loss": 3.2682,
      "step": 31625
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3697216510772705,
      "learning_rate": 0.0005725360614827526,
      "loss": 3.0919,
      "step": 31626
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.0155398845672607,
      "learning_rate": 0.0005725343516603213,
      "loss": 2.877,
      "step": 31627
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.025057315826416,
      "learning_rate": 0.0005725326417872206,
      "loss": 3.142,
      "step": 31628
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4250961542129517,
      "learning_rate": 0.0005725309318634512,
      "loss": 3.3389,
      "step": 31629
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4598345756530762,
      "learning_rate": 0.000572529221889013,
      "loss": 2.9271,
      "step": 31630
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.2446391582489014,
      "learning_rate": 0.0005725275118639064,
      "loss": 3.0508,
      "step": 31631
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4426552057266235,
      "learning_rate": 0.0005725258017881319,
      "loss": 2.9459,
      "step": 31632
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4652519226074219,
      "learning_rate": 0.0005725240916616898,
      "loss": 2.8951,
      "step": 31633
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.668351173400879,
      "learning_rate": 0.0005725223814845801,
      "loss": 3.0604,
      "step": 31634
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.1641901731491089,
      "learning_rate": 0.0005725206712568035,
      "loss": 2.9885,
      "step": 31635
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5389667749404907,
      "learning_rate": 0.0005725189609783601,
      "loss": 3.2081,
      "step": 31636
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5045503377914429,
      "learning_rate": 0.0005725172506492503,
      "loss": 3.0966,
      "step": 31637
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3089118003845215,
      "learning_rate": 0.0005725155402694743,
      "loss": 3.2361,
      "step": 31638
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.2667770385742188,
      "learning_rate": 0.0005725138298390326,
      "loss": 3.1901,
      "step": 31639
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3940870761871338,
      "learning_rate": 0.0005725121193579255,
      "loss": 3.0561,
      "step": 31640
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.3579230308532715,
      "learning_rate": 0.0005725104088261533,
      "loss": 3.2897,
      "step": 31641
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8247754573822021,
      "learning_rate": 0.0005725086982437161,
      "loss": 3.0016,
      "step": 31642
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3621646165847778,
      "learning_rate": 0.0005725069876106145,
      "loss": 3.1338,
      "step": 31643
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6187734603881836,
      "learning_rate": 0.0005725052769268487,
      "loss": 3.101,
      "step": 31644
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.378026008605957,
      "learning_rate": 0.000572503566192419,
      "loss": 3.0852,
      "step": 31645
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7886110544204712,
      "learning_rate": 0.0005725018554073257,
      "loss": 2.7934,
      "step": 31646
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5437849760055542,
      "learning_rate": 0.0005725001445715692,
      "loss": 3.1431,
      "step": 31647
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8020130395889282,
      "learning_rate": 0.0005724984336851498,
      "loss": 2.9844,
      "step": 31648
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.431465744972229,
      "learning_rate": 0.0005724967227480678,
      "loss": 2.9853,
      "step": 31649
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4559144973754883,
      "learning_rate": 0.0005724950117603235,
      "loss": 3.3597,
      "step": 31650
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.408452033996582,
      "learning_rate": 0.0005724933007219172,
      "loss": 3.0036,
      "step": 31651
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4088597297668457,
      "learning_rate": 0.0005724915896328493,
      "loss": 3.1398,
      "step": 31652
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.954831838607788,
      "learning_rate": 0.0005724898784931201,
      "loss": 3.1444,
      "step": 31653
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3977717161178589,
      "learning_rate": 0.0005724881673027297,
      "loss": 3.0365,
      "step": 31654
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.857851266860962,
      "learning_rate": 0.0005724864560616787,
      "loss": 3.0967,
      "step": 31655
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.422358512878418,
      "learning_rate": 0.0005724847447699675,
      "loss": 3.1602,
      "step": 31656
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7988841533660889,
      "learning_rate": 0.0005724830334275961,
      "loss": 3.0329,
      "step": 31657
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8501027822494507,
      "learning_rate": 0.0005724813220345649,
      "loss": 3.1745,
      "step": 31658
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6924502849578857,
      "learning_rate": 0.0005724796105908744,
      "loss": 3.2259,
      "step": 31659
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.628282904624939,
      "learning_rate": 0.0005724778990965247,
      "loss": 2.9503,
      "step": 31660
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.274025321006775,
      "learning_rate": 0.0005724761875515163,
      "loss": 3.2663,
      "step": 31661
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.368065595626831,
      "learning_rate": 0.0005724744759558494,
      "loss": 2.8222,
      "step": 31662
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2205649614334106,
      "learning_rate": 0.0005724727643095243,
      "loss": 2.9543,
      "step": 31663
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2273770570755005,
      "learning_rate": 0.0005724710526125415,
      "loss": 2.9546,
      "step": 31664
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9828308820724487,
      "learning_rate": 0.0005724693408649011,
      "loss": 2.941,
      "step": 31665
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.59541654586792,
      "learning_rate": 0.0005724676290666034,
      "loss": 2.9972,
      "step": 31666
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4384307861328125,
      "learning_rate": 0.000572465917217649,
      "loss": 2.9394,
      "step": 31667
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7345331907272339,
      "learning_rate": 0.000572464205318038,
      "loss": 3.1616,
      "step": 31668
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5734814405441284,
      "learning_rate": 0.0005724624933677707,
      "loss": 2.871,
      "step": 31669
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9785932302474976,
      "learning_rate": 0.0005724607813668475,
      "loss": 3.0214,
      "step": 31670
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.0292139053344727,
      "learning_rate": 0.0005724590693152688,
      "loss": 3.0626,
      "step": 31671
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4412147998809814,
      "learning_rate": 0.0005724573572130346,
      "loss": 2.8082,
      "step": 31672
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.894164800643921,
      "learning_rate": 0.0005724556450601456,
      "loss": 3.1693,
      "step": 31673
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5043154954910278,
      "learning_rate": 0.000572453932856602,
      "loss": 3.0603,
      "step": 31674
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7827547788619995,
      "learning_rate": 0.000572452220602404,
      "loss": 3.218,
      "step": 31675
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.393513798713684,
      "learning_rate": 0.0005724505082975519,
      "loss": 3.1598,
      "step": 31676
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2020776271820068,
      "learning_rate": 0.0005724487959420462,
      "loss": 2.9732,
      "step": 31677
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.539025068283081,
      "learning_rate": 0.0005724470835358871,
      "loss": 3.3311,
      "step": 31678
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3291995525360107,
      "learning_rate": 0.000572445371079075,
      "loss": 3.1452,
      "step": 31679
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7742239236831665,
      "learning_rate": 0.0005724436585716101,
      "loss": 3.2109,
      "step": 31680
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4136388301849365,
      "learning_rate": 0.0005724419460134928,
      "loss": 3.1964,
      "step": 31681
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9444297552108765,
      "learning_rate": 0.0005724402334047234,
      "loss": 3.0378,
      "step": 31682
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.588850975036621,
      "learning_rate": 0.0005724385207453022,
      "loss": 3.1444,
      "step": 31683
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3678767681121826,
      "learning_rate": 0.0005724368080352296,
      "loss": 3.0431,
      "step": 31684
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6132956743240356,
      "learning_rate": 0.0005724350952745058,
      "loss": 2.9363,
      "step": 31685
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3060777187347412,
      "learning_rate": 0.0005724333824631312,
      "loss": 3.1287,
      "step": 31686
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4203976392745972,
      "learning_rate": 0.000572431669601106,
      "loss": 2.8465,
      "step": 31687
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6972377300262451,
      "learning_rate": 0.0005724299566884307,
      "loss": 3.2344,
      "step": 31688
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.8530526161193848,
      "learning_rate": 0.0005724282437251056,
      "loss": 3.1409,
      "step": 31689
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5860892534255981,
      "learning_rate": 0.0005724265307111308,
      "loss": 3.0066,
      "step": 31690
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7417194843292236,
      "learning_rate": 0.0005724248176465069,
      "loss": 3.0898,
      "step": 31691
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.0139694213867188,
      "learning_rate": 0.0005724231045312339,
      "loss": 2.9834,
      "step": 31692
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.726453423500061,
      "learning_rate": 0.0005724213913653124,
      "loss": 2.9379,
      "step": 31693
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5520299673080444,
      "learning_rate": 0.0005724196781487427,
      "loss": 3.2174,
      "step": 31694
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5600066184997559,
      "learning_rate": 0.0005724179648815249,
      "loss": 2.9781,
      "step": 31695
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.2764017581939697,
      "learning_rate": 0.0005724162515636595,
      "loss": 2.9897,
      "step": 31696
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8662781715393066,
      "learning_rate": 0.0005724145381951469,
      "loss": 3.0419,
      "step": 31697
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3706814050674438,
      "learning_rate": 0.0005724128247759871,
      "loss": 2.9295,
      "step": 31698
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6222208738327026,
      "learning_rate": 0.0005724111113061808,
      "loss": 2.8938,
      "step": 31699
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1397781372070312,
      "learning_rate": 0.000572409397785728,
      "loss": 2.948,
      "step": 31700
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.0419580936431885,
      "learning_rate": 0.0005724076842146292,
      "loss": 3.0818,
      "step": 31701
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5166383981704712,
      "learning_rate": 0.0005724059705928847,
      "loss": 3.1596,
      "step": 31702
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5987749099731445,
      "learning_rate": 0.0005724042569204947,
      "loss": 3.1329,
      "step": 31703
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4109132289886475,
      "learning_rate": 0.0005724025431974597,
      "loss": 3.1145,
      "step": 31704
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3409267663955688,
      "learning_rate": 0.0005724008294237798,
      "loss": 2.8524,
      "step": 31705
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2474411725997925,
      "learning_rate": 0.0005723991155994556,
      "loss": 3.1128,
      "step": 31706
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.203617811203003,
      "learning_rate": 0.0005723974017244872,
      "loss": 3.1473,
      "step": 31707
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.2168092727661133,
      "learning_rate": 0.000572395687798875,
      "loss": 2.9287,
      "step": 31708
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5336194038391113,
      "learning_rate": 0.0005723939738226193,
      "loss": 2.9947,
      "step": 31709
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8809711933135986,
      "learning_rate": 0.0005723922597957203,
      "loss": 3.0585,
      "step": 31710
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.819183349609375,
      "learning_rate": 0.0005723905457181785,
      "loss": 2.8819,
      "step": 31711
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3645153045654297,
      "learning_rate": 0.0005723888315899942,
      "loss": 3.0882,
      "step": 31712
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.687285900115967,
      "learning_rate": 0.0005723871174111676,
      "loss": 2.9013,
      "step": 31713
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.8345413208007812,
      "learning_rate": 0.0005723854031816993,
      "loss": 2.8234,
      "step": 31714
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.577581763267517,
      "learning_rate": 0.0005723836889015892,
      "loss": 3.0367,
      "step": 31715
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8548372983932495,
      "learning_rate": 0.0005723819745708379,
      "loss": 3.1077,
      "step": 31716
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.5819783210754395,
      "learning_rate": 0.0005723802601894457,
      "loss": 3.2118,
      "step": 31717
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4488284587860107,
      "learning_rate": 0.0005723785457574128,
      "loss": 3.0935,
      "step": 31718
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3411412239074707,
      "learning_rate": 0.0005723768312747396,
      "loss": 3.1627,
      "step": 31719
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.4154343605041504,
      "learning_rate": 0.0005723751167414264,
      "loss": 2.8492,
      "step": 31720
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4740005731582642,
      "learning_rate": 0.0005723734021574735,
      "loss": 2.9384,
      "step": 31721
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6054786443710327,
      "learning_rate": 0.0005723716875228813,
      "loss": 3.2328,
      "step": 31722
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6436799764633179,
      "learning_rate": 0.00057236997283765,
      "loss": 2.9541,
      "step": 31723
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5689873695373535,
      "learning_rate": 0.00057236825810178,
      "loss": 3.0215,
      "step": 31724
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5211567878723145,
      "learning_rate": 0.0005723665433152717,
      "loss": 3.1253,
      "step": 31725
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.532712697982788,
      "learning_rate": 0.0005723648284781252,
      "loss": 2.8287,
      "step": 31726
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4473230838775635,
      "learning_rate": 0.0005723631135903409,
      "loss": 2.8235,
      "step": 31727
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6795361042022705,
      "learning_rate": 0.0005723613986519193,
      "loss": 3.1471,
      "step": 31728
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7442573308944702,
      "learning_rate": 0.0005723596836628605,
      "loss": 3.0455,
      "step": 31729
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2946866750717163,
      "learning_rate": 0.0005723579686231649,
      "loss": 3.0284,
      "step": 31730
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4913071393966675,
      "learning_rate": 0.0005723562535328327,
      "loss": 3.1718,
      "step": 31731
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6782646179199219,
      "learning_rate": 0.0005723545383918645,
      "loss": 2.9038,
      "step": 31732
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.420450448989868,
      "learning_rate": 0.0005723528232002604,
      "loss": 3.1047,
      "step": 31733
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3997271060943604,
      "learning_rate": 0.0005723511079580208,
      "loss": 3.0807,
      "step": 31734
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3805172443389893,
      "learning_rate": 0.0005723493926651458,
      "loss": 2.826,
      "step": 31735
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.839909553527832,
      "learning_rate": 0.0005723476773216361,
      "loss": 2.9925,
      "step": 31736
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5316691398620605,
      "learning_rate": 0.0005723459619274918,
      "loss": 3.1778,
      "step": 31737
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5895349979400635,
      "learning_rate": 0.0005723442464827133,
      "loss": 3.031,
      "step": 31738
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.330207109451294,
      "learning_rate": 0.0005723425309873008,
      "loss": 3.1886,
      "step": 31739
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8323619365692139,
      "learning_rate": 0.0005723408154412546,
      "loss": 2.859,
      "step": 31740
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.887758731842041,
      "learning_rate": 0.0005723390998445751,
      "loss": 3.0132,
      "step": 31741
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.5488815307617188,
      "learning_rate": 0.0005723373841972627,
      "loss": 3.2239,
      "step": 31742
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9428269863128662,
      "learning_rate": 0.0005723356684993176,
      "loss": 2.9074,
      "step": 31743
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5654979944229126,
      "learning_rate": 0.0005723339527507403,
      "loss": 3.1021,
      "step": 31744
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.355804443359375,
      "learning_rate": 0.0005723322369515308,
      "loss": 3.315,
      "step": 31745
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6542021036148071,
      "learning_rate": 0.0005723305211016897,
      "loss": 2.9007,
      "step": 31746
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5818663835525513,
      "learning_rate": 0.0005723288052012171,
      "loss": 3.1256,
      "step": 31747
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.386655807495117,
      "learning_rate": 0.0005723270892501136,
      "loss": 2.9793,
      "step": 31748
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4151993989944458,
      "learning_rate": 0.0005723253732483792,
      "loss": 3.182,
      "step": 31749
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6751281023025513,
      "learning_rate": 0.0005723236571960145,
      "loss": 3.1063,
      "step": 31750
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.578946828842163,
      "learning_rate": 0.0005723219410930196,
      "loss": 3.2051,
      "step": 31751
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5035170316696167,
      "learning_rate": 0.000572320224939395,
      "loss": 3.231,
      "step": 31752
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4859557151794434,
      "learning_rate": 0.0005723185087351409,
      "loss": 3.0422,
      "step": 31753
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8128606081008911,
      "learning_rate": 0.0005723167924802575,
      "loss": 3.2688,
      "step": 31754
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4704539775848389,
      "learning_rate": 0.0005723150761747454,
      "loss": 3.031,
      "step": 31755
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4626038074493408,
      "learning_rate": 0.0005723133598186048,
      "loss": 3.0494,
      "step": 31756
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5553044080734253,
      "learning_rate": 0.000572311643411836,
      "loss": 2.9767,
      "step": 31757
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7581232786178589,
      "learning_rate": 0.0005723099269544393,
      "loss": 3.2492,
      "step": 31758
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5385797023773193,
      "learning_rate": 0.0005723082104464151,
      "loss": 3.0297,
      "step": 31759
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3372844457626343,
      "learning_rate": 0.0005723064938877636,
      "loss": 2.9153,
      "step": 31760
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4356486797332764,
      "learning_rate": 0.0005723047772784851,
      "loss": 3.2122,
      "step": 31761
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1214780807495117,
      "learning_rate": 0.0005723030606185802,
      "loss": 2.891,
      "step": 31762
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.76078462600708,
      "learning_rate": 0.000572301343908049,
      "loss": 3.111,
      "step": 31763
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5757354497909546,
      "learning_rate": 0.0005722996271468917,
      "loss": 3.0911,
      "step": 31764
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5206096172332764,
      "learning_rate": 0.0005722979103351088,
      "loss": 3.1451,
      "step": 31765
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3994226455688477,
      "learning_rate": 0.0005722961934727006,
      "loss": 3.0279,
      "step": 31766
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8899487257003784,
      "learning_rate": 0.0005722944765596674,
      "loss": 3.0804,
      "step": 31767
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7854480743408203,
      "learning_rate": 0.0005722927595960094,
      "loss": 3.1811,
      "step": 31768
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.788545846939087,
      "learning_rate": 0.0005722910425817273,
      "loss": 2.8876,
      "step": 31769
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6050047874450684,
      "learning_rate": 0.000572289325516821,
      "loss": 3.0718,
      "step": 31770
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2405970096588135,
      "learning_rate": 0.0005722876084012909,
      "loss": 3.1489,
      "step": 31771
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4207853078842163,
      "learning_rate": 0.0005722858912351374,
      "loss": 3.315,
      "step": 31772
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.804511547088623,
      "learning_rate": 0.000572284174018361,
      "loss": 2.9643,
      "step": 31773
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2963002920150757,
      "learning_rate": 0.0005722824567509616,
      "loss": 3.2676,
      "step": 31774
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4245336055755615,
      "learning_rate": 0.00057228073943294,
      "loss": 2.6769,
      "step": 31775
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6094331741333008,
      "learning_rate": 0.000572279022064296,
      "loss": 2.9378,
      "step": 31776
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7229695320129395,
      "learning_rate": 0.0005722773046450305,
      "loss": 3.0299,
      "step": 31777
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4110116958618164,
      "learning_rate": 0.0005722755871751432,
      "loss": 3.0183,
      "step": 31778
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.507574200630188,
      "learning_rate": 0.0005722738696546349,
      "loss": 3.0108,
      "step": 31779
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4794999361038208,
      "learning_rate": 0.0005722721520835057,
      "loss": 3.1267,
      "step": 31780
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.485181212425232,
      "learning_rate": 0.000572270434461756,
      "loss": 3.193,
      "step": 31781
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2820518016815186,
      "learning_rate": 0.0005722687167893861,
      "loss": 3.2915,
      "step": 31782
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1069095134735107,
      "learning_rate": 0.0005722669990663962,
      "loss": 3.0951,
      "step": 31783
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3988596200942993,
      "learning_rate": 0.0005722652812927869,
      "loss": 3.075,
      "step": 31784
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7370362281799316,
      "learning_rate": 0.0005722635634685582,
      "loss": 3.2265,
      "step": 31785
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.9878287315368652,
      "learning_rate": 0.0005722618455937106,
      "loss": 3.1287,
      "step": 31786
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1232376098632812,
      "learning_rate": 0.0005722601276682445,
      "loss": 2.8833,
      "step": 31787
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8757621049880981,
      "learning_rate": 0.0005722584096921598,
      "loss": 3.0843,
      "step": 31788
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.8381876945495605,
      "learning_rate": 0.0005722566916654574,
      "loss": 3.0762,
      "step": 31789
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.8561246395111084,
      "learning_rate": 0.0005722549735881373,
      "loss": 2.9684,
      "step": 31790
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.944190740585327,
      "learning_rate": 0.0005722532554601999,
      "loss": 3.098,
      "step": 31791
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7002578973770142,
      "learning_rate": 0.0005722515372816454,
      "loss": 2.9234,
      "step": 31792
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6379743814468384,
      "learning_rate": 0.0005722498190524741,
      "loss": 3.0154,
      "step": 31793
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.64856219291687,
      "learning_rate": 0.0005722481007726866,
      "loss": 3.2463,
      "step": 31794
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.000290870666504,
      "learning_rate": 0.0005722463824422829,
      "loss": 3.3308,
      "step": 31795
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.720943808555603,
      "learning_rate": 0.0005722446640612635,
      "loss": 3.1106,
      "step": 31796
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.0835936069488525,
      "learning_rate": 0.0005722429456296287,
      "loss": 3.1088,
      "step": 31797
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9700816869735718,
      "learning_rate": 0.0005722412271473788,
      "loss": 3.2386,
      "step": 31798
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4392516613006592,
      "learning_rate": 0.0005722395086145141,
      "loss": 3.0805,
      "step": 31799
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.0286941528320312,
      "learning_rate": 0.0005722377900310349,
      "loss": 3.2999,
      "step": 31800
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.5767712593078613,
      "learning_rate": 0.0005722360713969417,
      "loss": 3.0822,
      "step": 31801
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.287021517753601,
      "learning_rate": 0.0005722343527122346,
      "loss": 3.2618,
      "step": 31802
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.357232928276062,
      "learning_rate": 0.0005722326339769139,
      "loss": 3.1999,
      "step": 31803
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.416564702987671,
      "learning_rate": 0.00057223091519098,
      "loss": 2.9725,
      "step": 31804
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.053863286972046,
      "learning_rate": 0.0005722291963544334,
      "loss": 3.17,
      "step": 31805
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7011817693710327,
      "learning_rate": 0.0005722274774672743,
      "loss": 3.2498,
      "step": 31806
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.314621925354004,
      "learning_rate": 0.0005722257585295028,
      "loss": 3.2802,
      "step": 31807
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.4992504119873047,
      "learning_rate": 0.0005722240395411194,
      "loss": 2.9239,
      "step": 31808
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6325228214263916,
      "learning_rate": 0.0005722223205021245,
      "loss": 3.1289,
      "step": 31809
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5448254346847534,
      "learning_rate": 0.0005722206014125182,
      "loss": 3.0725,
      "step": 31810
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6626020669937134,
      "learning_rate": 0.0005722188822723011,
      "loss": 2.8841,
      "step": 31811
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8090710639953613,
      "learning_rate": 0.0005722171630814733,
      "loss": 2.9377,
      "step": 31812
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9083446264266968,
      "learning_rate": 0.0005722154438400352,
      "loss": 3.0876,
      "step": 31813
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.146759033203125,
      "learning_rate": 0.0005722137245479871,
      "loss": 3.0811,
      "step": 31814
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4720202684402466,
      "learning_rate": 0.0005722120052053293,
      "loss": 3.163,
      "step": 31815
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1988463401794434,
      "learning_rate": 0.0005722102858120622,
      "loss": 3.1427,
      "step": 31816
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4496318101882935,
      "learning_rate": 0.0005722085663681861,
      "loss": 2.8637,
      "step": 31817
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.151458501815796,
      "learning_rate": 0.0005722068468737013,
      "loss": 2.9982,
      "step": 31818
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7791141271591187,
      "learning_rate": 0.000572205127328608,
      "loss": 3.028,
      "step": 31819
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.516162395477295,
      "learning_rate": 0.0005722034077329067,
      "loss": 3.0416,
      "step": 31820
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8268284797668457,
      "learning_rate": 0.0005722016880865976,
      "loss": 2.9755,
      "step": 31821
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.75715970993042,
      "learning_rate": 0.0005721999683896811,
      "loss": 2.9964,
      "step": 31822
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.547602891921997,
      "learning_rate": 0.0005721982486421575,
      "loss": 2.9008,
      "step": 31823
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4267607927322388,
      "learning_rate": 0.0005721965288440271,
      "loss": 3.0432,
      "step": 31824
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5629438161849976,
      "learning_rate": 0.0005721948089952903,
      "loss": 2.9446,
      "step": 31825
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8063992261886597,
      "learning_rate": 0.0005721930890959472,
      "loss": 3.0405,
      "step": 31826
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7334214448928833,
      "learning_rate": 0.0005721913691459983,
      "loss": 3.2957,
      "step": 31827
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6859561204910278,
      "learning_rate": 0.000572189649145444,
      "loss": 3.3096,
      "step": 31828
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.484877347946167,
      "learning_rate": 0.0005721879290942844,
      "loss": 3.3772,
      "step": 31829
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.147852659225464,
      "learning_rate": 0.0005721862089925199,
      "loss": 3.2577,
      "step": 31830
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.9997141361236572,
      "learning_rate": 0.000572184488840151,
      "loss": 3.2287,
      "step": 31831
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8988964557647705,
      "learning_rate": 0.0005721827686371777,
      "loss": 3.0737,
      "step": 31832
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6972954273223877,
      "learning_rate": 0.0005721810483836006,
      "loss": 3.3338,
      "step": 31833
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.6031503677368164,
      "learning_rate": 0.0005721793280794199,
      "loss": 3.2766,
      "step": 31834
    },
    {
      "epoch": 0.41,
      "grad_norm": 3.063516855239868,
      "learning_rate": 0.0005721776077246359,
      "loss": 2.8459,
      "step": 31835
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.8874826431274414,
      "learning_rate": 0.000572175887319249,
      "loss": 3.1312,
      "step": 31836
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7644729614257812,
      "learning_rate": 0.0005721741668632593,
      "loss": 3.1177,
      "step": 31837
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.5408341884613037,
      "learning_rate": 0.0005721724463566674,
      "loss": 3.0132,
      "step": 31838
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.665224552154541,
      "learning_rate": 0.0005721707257994735,
      "loss": 3.2041,
      "step": 31839
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.630976915359497,
      "learning_rate": 0.0005721690051916779,
      "loss": 3.0417,
      "step": 31840
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5863354206085205,
      "learning_rate": 0.0005721672845332809,
      "loss": 3.1542,
      "step": 31841
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.0255930423736572,
      "learning_rate": 0.000572165563824283,
      "loss": 2.9979,
      "step": 31842
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.437668561935425,
      "learning_rate": 0.0005721638430646842,
      "loss": 3.0023,
      "step": 31843
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4263341426849365,
      "learning_rate": 0.0005721621222544852,
      "loss": 3.1391,
      "step": 31844
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.7687628269195557,
      "learning_rate": 0.000572160401393686,
      "loss": 3.1388,
      "step": 31845
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8479079008102417,
      "learning_rate": 0.000572158680482287,
      "loss": 2.9854,
      "step": 31846
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2907418012619019,
      "learning_rate": 0.0005721569595202887,
      "loss": 3.1339,
      "step": 31847
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4189903736114502,
      "learning_rate": 0.0005721552385076912,
      "loss": 2.9936,
      "step": 31848
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5255013704299927,
      "learning_rate": 0.0005721535174444949,
      "loss": 2.9041,
      "step": 31849
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6142874956130981,
      "learning_rate": 0.0005721517963307002,
      "loss": 3.0325,
      "step": 31850
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.41893470287323,
      "learning_rate": 0.0005721500751663073,
      "loss": 2.9076,
      "step": 31851
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6941245794296265,
      "learning_rate": 0.0005721483539513166,
      "loss": 3.0637,
      "step": 31852
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5170387029647827,
      "learning_rate": 0.0005721466326857283,
      "loss": 3.2233,
      "step": 31853
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.3829624652862549,
      "learning_rate": 0.0005721449113695428,
      "loss": 2.9444,
      "step": 31854
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2913668155670166,
      "learning_rate": 0.0005721431900027604,
      "loss": 2.862,
      "step": 31855
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8231866359710693,
      "learning_rate": 0.0005721414685853816,
      "loss": 2.8704,
      "step": 31856
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5722827911376953,
      "learning_rate": 0.0005721397471174064,
      "loss": 3.0965,
      "step": 31857
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6193348169326782,
      "learning_rate": 0.0005721380255988354,
      "loss": 3.0408,
      "step": 31858
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.9379284381866455,
      "learning_rate": 0.0005721363040296688,
      "loss": 2.8988,
      "step": 31859
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.8961466550827026,
      "learning_rate": 0.0005721345824099068,
      "loss": 3.0709,
      "step": 31860
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.306938648223877,
      "learning_rate": 0.00057213286073955,
      "loss": 3.2774,
      "step": 31861
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5432460308074951,
      "learning_rate": 0.0005721311390185985,
      "loss": 3.1744,
      "step": 31862
    },
    {
      "epoch": 0.41,
      "grad_norm": 2.1567494869232178,
      "learning_rate": 0.0005721294172470527,
      "loss": 3.074,
      "step": 31863
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5390461683273315,
      "learning_rate": 0.0005721276954249128,
      "loss": 3.0761,
      "step": 31864
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.6546931266784668,
      "learning_rate": 0.0005721259735521793,
      "loss": 2.9671,
      "step": 31865
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4394047260284424,
      "learning_rate": 0.0005721242516288525,
      "loss": 3.0387,
      "step": 31866
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.7548600435256958,
      "learning_rate": 0.0005721225296549326,
      "loss": 3.2275,
      "step": 31867
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.628359317779541,
      "learning_rate": 0.0005721208076304201,
      "loss": 3.1152,
      "step": 31868
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.5000697374343872,
      "learning_rate": 0.0005721190855553151,
      "loss": 3.0109,
      "step": 31869
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.545945167541504,
      "learning_rate": 0.000572117363429618,
      "loss": 3.178,
      "step": 31870
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.2984299659729004,
      "learning_rate": 0.0005721156412533292,
      "loss": 3.1106,
      "step": 31871
    },
    {
      "epoch": 0.41,
      "grad_norm": 1.4838827848434448,
      "learning_rate": 0.000572113919026449,
      "loss": 3.2118,
      "step": 31872
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.84055233001709,
      "learning_rate": 0.0005721121967489775,
      "loss": 3.2322,
      "step": 31873
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.591662883758545,
      "learning_rate": 0.0005721104744209153,
      "loss": 3.1087,
      "step": 31874
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.334366798400879,
      "learning_rate": 0.0005721087520422628,
      "loss": 3.2524,
      "step": 31875
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0402944087982178,
      "learning_rate": 0.00057210702961302,
      "loss": 3.3049,
      "step": 31876
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.554300546646118,
      "learning_rate": 0.0005721053071331874,
      "loss": 3.2237,
      "step": 31877
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.569211721420288,
      "learning_rate": 0.0005721035846027652,
      "loss": 3.2325,
      "step": 31878
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2854045629501343,
      "learning_rate": 0.000572101862021754,
      "loss": 3.0835,
      "step": 31879
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.8649044036865234,
      "learning_rate": 0.0005721001393901537,
      "loss": 3.3279,
      "step": 31880
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.455044984817505,
      "learning_rate": 0.000572098416707965,
      "loss": 3.051,
      "step": 31881
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3937757015228271,
      "learning_rate": 0.000572096693975188,
      "loss": 3.1353,
      "step": 31882
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5936484336853027,
      "learning_rate": 0.0005720949711918231,
      "loss": 3.1367,
      "step": 31883
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.650768756866455,
      "learning_rate": 0.0005720932483578706,
      "loss": 3.142,
      "step": 31884
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4582124948501587,
      "learning_rate": 0.0005720915254733309,
      "loss": 2.9034,
      "step": 31885
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0601277351379395,
      "learning_rate": 0.0005720898025382042,
      "loss": 3.2327,
      "step": 31886
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.847503662109375,
      "learning_rate": 0.0005720880795524908,
      "loss": 3.1272,
      "step": 31887
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4996308088302612,
      "learning_rate": 0.0005720863565161911,
      "loss": 3.2372,
      "step": 31888
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.988983154296875,
      "learning_rate": 0.0005720846334293055,
      "loss": 3.164,
      "step": 31889
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.883854866027832,
      "learning_rate": 0.0005720829102918341,
      "loss": 3.0198,
      "step": 31890
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4336602687835693,
      "learning_rate": 0.0005720811871037775,
      "loss": 3.2482,
      "step": 31891
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5516986846923828,
      "learning_rate": 0.0005720794638651358,
      "loss": 2.9847,
      "step": 31892
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.40347957611084,
      "learning_rate": 0.0005720777405759095,
      "loss": 3.2376,
      "step": 31893
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.228161334991455,
      "learning_rate": 0.0005720760172360986,
      "loss": 3.1954,
      "step": 31894
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.657836437225342,
      "learning_rate": 0.0005720742938457038,
      "loss": 3.3562,
      "step": 31895
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.537515640258789,
      "learning_rate": 0.0005720725704047253,
      "loss": 3.0217,
      "step": 31896
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5105096101760864,
      "learning_rate": 0.0005720708469131632,
      "loss": 3.1438,
      "step": 31897
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.207202434539795,
      "learning_rate": 0.0005720691233710181,
      "loss": 3.0129,
      "step": 31898
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2923572063446045,
      "learning_rate": 0.0005720673997782902,
      "loss": 3.0052,
      "step": 31899
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.595333456993103,
      "learning_rate": 0.0005720656761349798,
      "loss": 3.1052,
      "step": 31900
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.396976113319397,
      "learning_rate": 0.0005720639524410872,
      "loss": 3.1508,
      "step": 31901
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.11979341506958,
      "learning_rate": 0.000572062228696613,
      "loss": 2.9933,
      "step": 31902
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.368349313735962,
      "learning_rate": 0.0005720605049015571,
      "loss": 3.2915,
      "step": 31903
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3558779954910278,
      "learning_rate": 0.00057205878105592,
      "loss": 2.8345,
      "step": 31904
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.288727045059204,
      "learning_rate": 0.0005720570571597022,
      "loss": 3.3019,
      "step": 31905
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.697176456451416,
      "learning_rate": 0.0005720553332129037,
      "loss": 3.2486,
      "step": 31906
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6483222246170044,
      "learning_rate": 0.000572053609215525,
      "loss": 3.0894,
      "step": 31907
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7258864641189575,
      "learning_rate": 0.0005720518851675665,
      "loss": 3.1969,
      "step": 31908
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0746374130249023,
      "learning_rate": 0.0005720501610690283,
      "loss": 2.938,
      "step": 31909
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.107124090194702,
      "learning_rate": 0.000572048436919911,
      "loss": 2.9714,
      "step": 31910
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6067246198654175,
      "learning_rate": 0.0005720467127202145,
      "loss": 3.0941,
      "step": 31911
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.824245810508728,
      "learning_rate": 0.0005720449884699396,
      "loss": 2.9313,
      "step": 31912
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2874560356140137,
      "learning_rate": 0.0005720432641690863,
      "loss": 2.9962,
      "step": 31913
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5226850509643555,
      "learning_rate": 0.0005720415398176551,
      "loss": 2.9608,
      "step": 31914
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7847073078155518,
      "learning_rate": 0.0005720398154156461,
      "loss": 3.227,
      "step": 31915
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.66549813747406,
      "learning_rate": 0.00057203809096306,
      "loss": 2.9656,
      "step": 31916
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4308286905288696,
      "learning_rate": 0.0005720363664598967,
      "loss": 3.1677,
      "step": 31917
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5516748428344727,
      "learning_rate": 0.0005720346419061567,
      "loss": 3.1234,
      "step": 31918
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.459859848022461,
      "learning_rate": 0.0005720329173018404,
      "loss": 3.0727,
      "step": 31919
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7356168031692505,
      "learning_rate": 0.0005720311926469481,
      "loss": 2.9958,
      "step": 31920
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.282818555831909,
      "learning_rate": 0.00057202946794148,
      "loss": 3.2223,
      "step": 31921
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5244255065917969,
      "learning_rate": 0.0005720277431854364,
      "loss": 3.0689,
      "step": 31922
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3755426406860352,
      "learning_rate": 0.0005720260183788178,
      "loss": 2.9709,
      "step": 31923
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.519562840461731,
      "learning_rate": 0.0005720242935216245,
      "loss": 3.1469,
      "step": 31924
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.465346097946167,
      "learning_rate": 0.0005720225686138566,
      "loss": 3.0534,
      "step": 31925
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3670045137405396,
      "learning_rate": 0.0005720208436555146,
      "loss": 2.9682,
      "step": 31926
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4939230680465698,
      "learning_rate": 0.0005720191186465988,
      "loss": 2.8294,
      "step": 31927
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7461695671081543,
      "learning_rate": 0.0005720173935871095,
      "loss": 3.0685,
      "step": 31928
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5686278343200684,
      "learning_rate": 0.0005720156684770471,
      "loss": 3.0124,
      "step": 31929
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2639837265014648,
      "learning_rate": 0.0005720139433164119,
      "loss": 3.1676,
      "step": 31930
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3671517372131348,
      "learning_rate": 0.000572012218105204,
      "loss": 3.1815,
      "step": 31931
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.798161506652832,
      "learning_rate": 0.000572010492843424,
      "loss": 3.1091,
      "step": 31932
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.476468801498413,
      "learning_rate": 0.0005720087675310722,
      "loss": 2.8884,
      "step": 31933
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.44322669506073,
      "learning_rate": 0.0005720070421681487,
      "loss": 2.8144,
      "step": 31934
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6417819261550903,
      "learning_rate": 0.000572005316754654,
      "loss": 3.1497,
      "step": 31935
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2996059656143188,
      "learning_rate": 0.0005720035912905884,
      "loss": 3.1887,
      "step": 31936
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9305671453475952,
      "learning_rate": 0.0005720018657759521,
      "loss": 2.9915,
      "step": 31937
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.554444432258606,
      "learning_rate": 0.0005720001402107455,
      "loss": 3.2308,
      "step": 31938
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5548983812332153,
      "learning_rate": 0.0005719984145949691,
      "loss": 2.9888,
      "step": 31939
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4496924877166748,
      "learning_rate": 0.000571996688928623,
      "loss": 3.0658,
      "step": 31940
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5712356567382812,
      "learning_rate": 0.0005719949632117075,
      "loss": 2.9653,
      "step": 31941
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5641841888427734,
      "learning_rate": 0.0005719932374442232,
      "loss": 3.3061,
      "step": 31942
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4112229347229004,
      "learning_rate": 0.00057199151162617,
      "loss": 2.978,
      "step": 31943
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6324889659881592,
      "learning_rate": 0.0005719897857575485,
      "loss": 3.1127,
      "step": 31944
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4478060007095337,
      "learning_rate": 0.000571988059838359,
      "loss": 2.959,
      "step": 31945
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.478480339050293,
      "learning_rate": 0.0005719863338686018,
      "loss": 3.1965,
      "step": 31946
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5400694608688354,
      "learning_rate": 0.0005719846078482771,
      "loss": 3.052,
      "step": 31947
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0619113445281982,
      "learning_rate": 0.0005719828817773854,
      "loss": 3.1107,
      "step": 31948
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3482533693313599,
      "learning_rate": 0.0005719811556559271,
      "loss": 2.882,
      "step": 31949
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5397014617919922,
      "learning_rate": 0.0005719794294839022,
      "loss": 3.0244,
      "step": 31950
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6061428785324097,
      "learning_rate": 0.0005719777032613113,
      "loss": 3.2311,
      "step": 31951
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3553224802017212,
      "learning_rate": 0.0005719759769881544,
      "loss": 3.2667,
      "step": 31952
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8936210870742798,
      "learning_rate": 0.0005719742506644322,
      "loss": 3.0635,
      "step": 31953
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5899417400360107,
      "learning_rate": 0.0005719725242901448,
      "loss": 3.1216,
      "step": 31954
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3654810190200806,
      "learning_rate": 0.0005719707978652927,
      "loss": 3.052,
      "step": 31955
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3241050243377686,
      "learning_rate": 0.000571969071389876,
      "loss": 3.1074,
      "step": 31956
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.448626160621643,
      "learning_rate": 0.0005719673448638951,
      "loss": 3.1597,
      "step": 31957
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3281148672103882,
      "learning_rate": 0.0005719656182873503,
      "loss": 3.0124,
      "step": 31958
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9185940027236938,
      "learning_rate": 0.000571963891660242,
      "loss": 2.9749,
      "step": 31959
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4313545227050781,
      "learning_rate": 0.0005719621649825705,
      "loss": 3.1902,
      "step": 31960
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6225669384002686,
      "learning_rate": 0.0005719604382543361,
      "loss": 3.3106,
      "step": 31961
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5148203372955322,
      "learning_rate": 0.0005719587114755391,
      "loss": 3.3083,
      "step": 31962
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6199629306793213,
      "learning_rate": 0.0005719569846461798,
      "loss": 3.2429,
      "step": 31963
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5088952779769897,
      "learning_rate": 0.0005719552577662586,
      "loss": 3.1441,
      "step": 31964
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8153009414672852,
      "learning_rate": 0.0005719535308357758,
      "loss": 3.0344,
      "step": 31965
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4718693494796753,
      "learning_rate": 0.0005719518038547317,
      "loss": 3.0694,
      "step": 31966
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7111419439315796,
      "learning_rate": 0.0005719500768231265,
      "loss": 3.1552,
      "step": 31967
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4940898418426514,
      "learning_rate": 0.0005719483497409609,
      "loss": 2.8692,
      "step": 31968
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2862465381622314,
      "learning_rate": 0.0005719466226082349,
      "loss": 2.8526,
      "step": 31969
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.934723973274231,
      "learning_rate": 0.0005719448954249487,
      "loss": 2.7552,
      "step": 31970
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7523250579833984,
      "learning_rate": 0.000571943168191103,
      "loss": 2.8654,
      "step": 31971
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3101859092712402,
      "learning_rate": 0.0005719414409066978,
      "loss": 3.0914,
      "step": 31972
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.7326722145080566,
      "learning_rate": 0.0005719397135717336,
      "loss": 2.9164,
      "step": 31973
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.153848171234131,
      "learning_rate": 0.0005719379861862108,
      "loss": 2.9523,
      "step": 31974
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.58102285861969,
      "learning_rate": 0.0005719362587501294,
      "loss": 3.1211,
      "step": 31975
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.205410957336426,
      "learning_rate": 0.0005719345312634901,
      "loss": 3.0217,
      "step": 31976
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.535463809967041,
      "learning_rate": 0.0005719328037262929,
      "loss": 3.1706,
      "step": 31977
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7967090606689453,
      "learning_rate": 0.0005719310761385383,
      "loss": 2.9574,
      "step": 31978
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4318938255310059,
      "learning_rate": 0.0005719293485002266,
      "loss": 3.2258,
      "step": 31979
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8655120134353638,
      "learning_rate": 0.0005719276208113581,
      "loss": 3.0645,
      "step": 31980
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0636610984802246,
      "learning_rate": 0.0005719258930719331,
      "loss": 3.2445,
      "step": 31981
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.604819416999817,
      "learning_rate": 0.0005719241652819519,
      "loss": 2.8233,
      "step": 31982
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6567600965499878,
      "learning_rate": 0.0005719224374414149,
      "loss": 3.0588,
      "step": 31983
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4179774522781372,
      "learning_rate": 0.0005719207095503224,
      "loss": 3.0517,
      "step": 31984
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6313661336898804,
      "learning_rate": 0.0005719189816086747,
      "loss": 2.854,
      "step": 31985
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4420095682144165,
      "learning_rate": 0.0005719172536164721,
      "loss": 3.0857,
      "step": 31986
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.650125503540039,
      "learning_rate": 0.0005719155255737151,
      "loss": 3.0451,
      "step": 31987
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.24375581741333,
      "learning_rate": 0.0005719137974804037,
      "loss": 3.1639,
      "step": 31988
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5466055870056152,
      "learning_rate": 0.0005719120693365384,
      "loss": 3.2745,
      "step": 31989
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6809498071670532,
      "learning_rate": 0.0005719103411421197,
      "loss": 2.88,
      "step": 31990
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2965282201766968,
      "learning_rate": 0.0005719086128971475,
      "loss": 3.0395,
      "step": 31991
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2714667320251465,
      "learning_rate": 0.0005719068846016225,
      "loss": 3.1533,
      "step": 31992
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.080427885055542,
      "learning_rate": 0.0005719051562555448,
      "loss": 3.028,
      "step": 31993
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2976975440979004,
      "learning_rate": 0.0005719034278589149,
      "loss": 2.984,
      "step": 31994
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.198852777481079,
      "learning_rate": 0.0005719016994117329,
      "loss": 3.0453,
      "step": 31995
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9036535024642944,
      "learning_rate": 0.0005718999709139993,
      "loss": 3.1353,
      "step": 31996
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6873745918273926,
      "learning_rate": 0.0005718982423657143,
      "loss": 3.1075,
      "step": 31997
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.2535665035247803,
      "learning_rate": 0.0005718965137668783,
      "loss": 3.3268,
      "step": 31998
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.4637820720672607,
      "learning_rate": 0.0005718947851174917,
      "loss": 3.1131,
      "step": 31999
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4507997035980225,
      "learning_rate": 0.0005718930564175547,
      "loss": 3.2625,
      "step": 32000
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2560347318649292,
      "learning_rate": 0.0005718913276670676,
      "loss": 3.2008,
      "step": 32001
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0728039741516113,
      "learning_rate": 0.0005718895988660308,
      "loss": 2.9635,
      "step": 32002
    },
    {
      "epoch": 0.42,
      "grad_norm": 4.383673191070557,
      "learning_rate": 0.0005718878700144446,
      "loss": 2.9543,
      "step": 32003
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.861046075820923,
      "learning_rate": 0.0005718861411123093,
      "loss": 3.246,
      "step": 32004
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7364336252212524,
      "learning_rate": 0.0005718844121596252,
      "loss": 3.0858,
      "step": 32005
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.117011547088623,
      "learning_rate": 0.0005718826831563927,
      "loss": 2.8724,
      "step": 32006
    },
    {
      "epoch": 0.42,
      "grad_norm": 4.496059894561768,
      "learning_rate": 0.000571880954102612,
      "loss": 3.0656,
      "step": 32007
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.699287176132202,
      "learning_rate": 0.0005718792249982835,
      "loss": 3.0982,
      "step": 32008
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7854548692703247,
      "learning_rate": 0.0005718774958434076,
      "loss": 2.8905,
      "step": 32009
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8617956638336182,
      "learning_rate": 0.0005718757666379845,
      "loss": 3.2337,
      "step": 32010
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.353269338607788,
      "learning_rate": 0.0005718740373820146,
      "loss": 2.9087,
      "step": 32011
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.1827890872955322,
      "learning_rate": 0.0005718723080754982,
      "loss": 3.0429,
      "step": 32012
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5030931234359741,
      "learning_rate": 0.0005718705787184356,
      "loss": 3.0551,
      "step": 32013
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7007509469985962,
      "learning_rate": 0.0005718688493108271,
      "loss": 2.9562,
      "step": 32014
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.7517077922821045,
      "learning_rate": 0.000571867119852673,
      "loss": 3.1624,
      "step": 32015
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9140878915786743,
      "learning_rate": 0.0005718653903439738,
      "loss": 3.1815,
      "step": 32016
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3101404905319214,
      "learning_rate": 0.0005718636607847296,
      "loss": 3.0312,
      "step": 32017
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.9898860454559326,
      "learning_rate": 0.0005718619311749408,
      "loss": 3.081,
      "step": 32018
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6329883337020874,
      "learning_rate": 0.0005718602015146078,
      "loss": 3.1069,
      "step": 32019
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6826035976409912,
      "learning_rate": 0.0005718584718037308,
      "loss": 3.1876,
      "step": 32020
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.131258726119995,
      "learning_rate": 0.0005718567420423101,
      "loss": 3.217,
      "step": 32021
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7648547887802124,
      "learning_rate": 0.0005718550122303463,
      "loss": 3.0799,
      "step": 32022
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.643011212348938,
      "learning_rate": 0.0005718532823678394,
      "loss": 3.0395,
      "step": 32023
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5196945667266846,
      "learning_rate": 0.0005718515524547899,
      "loss": 3.2496,
      "step": 32024
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5475428104400635,
      "learning_rate": 0.000571849822491198,
      "loss": 3.2636,
      "step": 32025
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6565521955490112,
      "learning_rate": 0.000571848092477064,
      "loss": 3.5421,
      "step": 32026
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4859622716903687,
      "learning_rate": 0.0005718463624123885,
      "loss": 3.1192,
      "step": 32027
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9707874059677124,
      "learning_rate": 0.0005718446322971716,
      "loss": 2.927,
      "step": 32028
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3854191303253174,
      "learning_rate": 0.0005718429021314135,
      "loss": 3.2635,
      "step": 32029
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3755840063095093,
      "learning_rate": 0.0005718411719151148,
      "loss": 3.0862,
      "step": 32030
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3829493522644043,
      "learning_rate": 0.0005718394416482757,
      "loss": 3.1825,
      "step": 32031
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4837456941604614,
      "learning_rate": 0.0005718377113308965,
      "loss": 3.132,
      "step": 32032
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2004799842834473,
      "learning_rate": 0.0005718359809629774,
      "loss": 3.0362,
      "step": 32033
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0850577354431152,
      "learning_rate": 0.0005718342505445191,
      "loss": 3.0277,
      "step": 32034
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5098224878311157,
      "learning_rate": 0.0005718325200755215,
      "loss": 3.2154,
      "step": 32035
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2517898082733154,
      "learning_rate": 0.0005718307895559852,
      "loss": 3.1693,
      "step": 32036
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4798427820205688,
      "learning_rate": 0.0005718290589859104,
      "loss": 3.2896,
      "step": 32037
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5424798727035522,
      "learning_rate": 0.0005718273283652974,
      "loss": 3.0711,
      "step": 32038
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5492304563522339,
      "learning_rate": 0.0005718255976941467,
      "loss": 3.2072,
      "step": 32039
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4529730081558228,
      "learning_rate": 0.0005718238669724584,
      "loss": 2.8306,
      "step": 32040
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6525874137878418,
      "learning_rate": 0.0005718221362002328,
      "loss": 3.1083,
      "step": 32041
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0933568477630615,
      "learning_rate": 0.0005718204053774705,
      "loss": 3.185,
      "step": 32042
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.970963954925537,
      "learning_rate": 0.0005718186745041717,
      "loss": 3.0925,
      "step": 32043
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2474586963653564,
      "learning_rate": 0.0005718169435803365,
      "loss": 2.9092,
      "step": 32044
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9263190031051636,
      "learning_rate": 0.0005718152126059654,
      "loss": 3.1805,
      "step": 32045
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.157066583633423,
      "learning_rate": 0.0005718134815810589,
      "loss": 3.0979,
      "step": 32046
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.891202926635742,
      "learning_rate": 0.000571811750505617,
      "loss": 3.011,
      "step": 32047
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5459465980529785,
      "learning_rate": 0.0005718100193796402,
      "loss": 3.02,
      "step": 32048
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.041774272918701,
      "learning_rate": 0.0005718082882031288,
      "loss": 3.0604,
      "step": 32049
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5286946296691895,
      "learning_rate": 0.0005718065569760831,
      "loss": 3.3764,
      "step": 32050
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.1717896461486816,
      "learning_rate": 0.0005718048256985035,
      "loss": 2.9807,
      "step": 32051
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.615391969680786,
      "learning_rate": 0.0005718030943703902,
      "loss": 3.045,
      "step": 32052
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5996134281158447,
      "learning_rate": 0.0005718013629917435,
      "loss": 2.8463,
      "step": 32053
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5595780611038208,
      "learning_rate": 0.0005717996315625639,
      "loss": 3.173,
      "step": 32054
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9270389080047607,
      "learning_rate": 0.0005717979000828515,
      "loss": 2.8169,
      "step": 32055
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.1439926624298096,
      "learning_rate": 0.0005717961685526067,
      "loss": 3.2185,
      "step": 32056
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.436930537223816,
      "learning_rate": 0.0005717944369718301,
      "loss": 3.1063,
      "step": 32057
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8347063064575195,
      "learning_rate": 0.0005717927053405217,
      "loss": 3.1704,
      "step": 32058
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.443061113357544,
      "learning_rate": 0.0005717909736586818,
      "loss": 3.0411,
      "step": 32059
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.531327247619629,
      "learning_rate": 0.0005717892419263109,
      "loss": 2.8232,
      "step": 32060
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5153268575668335,
      "learning_rate": 0.0005717875101434092,
      "loss": 3.1157,
      "step": 32061
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3534857034683228,
      "learning_rate": 0.0005717857783099771,
      "loss": 3.0471,
      "step": 32062
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8109192848205566,
      "learning_rate": 0.0005717840464260149,
      "loss": 3.0768,
      "step": 32063
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0830392837524414,
      "learning_rate": 0.000571782314491523,
      "loss": 2.9756,
      "step": 32064
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.707188367843628,
      "learning_rate": 0.0005717805825065015,
      "loss": 3.3082,
      "step": 32065
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.818325400352478,
      "learning_rate": 0.0005717788504709509,
      "loss": 3.0018,
      "step": 32066
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8870230913162231,
      "learning_rate": 0.0005717771183848716,
      "loss": 2.9723,
      "step": 32067
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5909935235977173,
      "learning_rate": 0.0005717753862482636,
      "loss": 3.1062,
      "step": 32068
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.620347261428833,
      "learning_rate": 0.0005717736540611276,
      "loss": 3.0893,
      "step": 32069
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.4535508155822754,
      "learning_rate": 0.0005717719218234636,
      "loss": 2.9805,
      "step": 32070
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.501154899597168,
      "learning_rate": 0.0005717701895352722,
      "loss": 3.0311,
      "step": 32071
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.861019253730774,
      "learning_rate": 0.0005717684571965535,
      "loss": 3.1278,
      "step": 32072
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.92694890499115,
      "learning_rate": 0.000571766724807308,
      "loss": 3.1458,
      "step": 32073
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3423068523406982,
      "learning_rate": 0.0005717649923675358,
      "loss": 2.9156,
      "step": 32074
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5311307907104492,
      "learning_rate": 0.0005717632598772374,
      "loss": 2.8437,
      "step": 32075
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2292847633361816,
      "learning_rate": 0.0005717615273364132,
      "loss": 2.8911,
      "step": 32076
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2190024852752686,
      "learning_rate": 0.0005717597947450633,
      "loss": 2.8494,
      "step": 32077
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.096731185913086,
      "learning_rate": 0.0005717580621031882,
      "loss": 2.9706,
      "step": 32078
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3270719051361084,
      "learning_rate": 0.0005717563294107881,
      "loss": 2.6761,
      "step": 32079
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3704326152801514,
      "learning_rate": 0.0005717545966678634,
      "loss": 3.2291,
      "step": 32080
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.6149065494537354,
      "learning_rate": 0.0005717528638744143,
      "loss": 2.9822,
      "step": 32081
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6262277364730835,
      "learning_rate": 0.0005717511310304413,
      "loss": 3.1927,
      "step": 32082
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9856231212615967,
      "learning_rate": 0.0005717493981359445,
      "loss": 3.0793,
      "step": 32083
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5973622798919678,
      "learning_rate": 0.0005717476651909245,
      "loss": 2.9489,
      "step": 32084
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3139569759368896,
      "learning_rate": 0.0005717459321953815,
      "loss": 3.2073,
      "step": 32085
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5559090375900269,
      "learning_rate": 0.0005717441991493158,
      "loss": 2.9801,
      "step": 32086
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.465074062347412,
      "learning_rate": 0.0005717424660527276,
      "loss": 2.8786,
      "step": 32087
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6367570161819458,
      "learning_rate": 0.0005717407329056174,
      "loss": 3.0714,
      "step": 32088
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.603310227394104,
      "learning_rate": 0.0005717389997079855,
      "loss": 3.1477,
      "step": 32089
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3675286769866943,
      "learning_rate": 0.0005717372664598322,
      "loss": 2.9978,
      "step": 32090
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7693729400634766,
      "learning_rate": 0.0005717355331611578,
      "loss": 2.8558,
      "step": 32091
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7948867082595825,
      "learning_rate": 0.0005717337998119626,
      "loss": 3.0973,
      "step": 32092
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3238686323165894,
      "learning_rate": 0.0005717320664122469,
      "loss": 2.8641,
      "step": 32093
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.515474319458008,
      "learning_rate": 0.0005717303329620112,
      "loss": 2.9665,
      "step": 32094
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.444214105606079,
      "learning_rate": 0.0005717285994612557,
      "loss": 3.0992,
      "step": 32095
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.302294373512268,
      "learning_rate": 0.0005717268659099806,
      "loss": 3.0182,
      "step": 32096
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4250839948654175,
      "learning_rate": 0.0005717251323081865,
      "loss": 3.1503,
      "step": 32097
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7059932947158813,
      "learning_rate": 0.0005717233986558734,
      "loss": 3.1192,
      "step": 32098
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.351828932762146,
      "learning_rate": 0.0005717216649530419,
      "loss": 3.2277,
      "step": 32099
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.825457215309143,
      "learning_rate": 0.0005717199311996922,
      "loss": 3.1063,
      "step": 32100
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0742998123168945,
      "learning_rate": 0.0005717181973958247,
      "loss": 2.9448,
      "step": 32101
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3351861238479614,
      "learning_rate": 0.0005717164635414395,
      "loss": 3.0624,
      "step": 32102
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.528959035873413,
      "learning_rate": 0.0005717147296365372,
      "loss": 3.2508,
      "step": 32103
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2828691005706787,
      "learning_rate": 0.000571712995681118,
      "loss": 3.037,
      "step": 32104
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4605156183242798,
      "learning_rate": 0.0005717112616751822,
      "loss": 3.1138,
      "step": 32105
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5864784717559814,
      "learning_rate": 0.0005717095276187301,
      "loss": 3.0578,
      "step": 32106
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4054752588272095,
      "learning_rate": 0.0005717077935117621,
      "loss": 2.8899,
      "step": 32107
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.363553524017334,
      "learning_rate": 0.0005717060593542786,
      "loss": 3.3472,
      "step": 32108
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.533974289894104,
      "learning_rate": 0.0005717043251462796,
      "loss": 2.9138,
      "step": 32109
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.578473448753357,
      "learning_rate": 0.0005717025908877659,
      "loss": 3.0317,
      "step": 32110
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5419100522994995,
      "learning_rate": 0.0005717008565787374,
      "loss": 3.0251,
      "step": 32111
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3447893857955933,
      "learning_rate": 0.0005716991222191947,
      "loss": 3.3367,
      "step": 32112
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4794371128082275,
      "learning_rate": 0.0005716973878091378,
      "loss": 3.0447,
      "step": 32113
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6392983198165894,
      "learning_rate": 0.0005716956533485673,
      "loss": 2.7916,
      "step": 32114
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2307679653167725,
      "learning_rate": 0.0005716939188374835,
      "loss": 3.1068,
      "step": 32115
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.393664002418518,
      "learning_rate": 0.0005716921842758866,
      "loss": 2.976,
      "step": 32116
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5015997886657715,
      "learning_rate": 0.000571690449663777,
      "loss": 3.1891,
      "step": 32117
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3333086967468262,
      "learning_rate": 0.0005716887150011552,
      "loss": 3.0179,
      "step": 32118
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.58935546875,
      "learning_rate": 0.0005716869802880212,
      "loss": 3.3717,
      "step": 32119
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.1526505947113037,
      "learning_rate": 0.0005716852455243755,
      "loss": 3.1318,
      "step": 32120
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5554324388504028,
      "learning_rate": 0.0005716835107102184,
      "loss": 3.1726,
      "step": 32121
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6283106803894043,
      "learning_rate": 0.0005716817758455501,
      "loss": 3.1914,
      "step": 32122
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3165394067764282,
      "learning_rate": 0.000571680040930371,
      "loss": 3.1673,
      "step": 32123
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3721883296966553,
      "learning_rate": 0.0005716783059646816,
      "loss": 3.1136,
      "step": 32124
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4988678693771362,
      "learning_rate": 0.0005716765709484821,
      "loss": 2.848,
      "step": 32125
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6145895719528198,
      "learning_rate": 0.0005716748358817726,
      "loss": 3.0795,
      "step": 32126
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4105271100997925,
      "learning_rate": 0.0005716731007645538,
      "loss": 3.137,
      "step": 32127
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.621072769165039,
      "learning_rate": 0.0005716713655968258,
      "loss": 3.0138,
      "step": 32128
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4749406576156616,
      "learning_rate": 0.0005716696303785889,
      "loss": 2.9491,
      "step": 32129
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3529638051986694,
      "learning_rate": 0.0005716678951098435,
      "loss": 3.2553,
      "step": 32130
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.309752106666565,
      "learning_rate": 0.00057166615979059,
      "loss": 3.0732,
      "step": 32131
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5686434507369995,
      "learning_rate": 0.0005716644244208285,
      "loss": 3.1119,
      "step": 32132
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8082520961761475,
      "learning_rate": 0.0005716626890005595,
      "loss": 2.8679,
      "step": 32133
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4992811679840088,
      "learning_rate": 0.0005716609535297834,
      "loss": 2.9973,
      "step": 32134
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5514025688171387,
      "learning_rate": 0.0005716592180085003,
      "loss": 3.0974,
      "step": 32135
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5157573223114014,
      "learning_rate": 0.0005716574824367105,
      "loss": 3.238,
      "step": 32136
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5674899816513062,
      "learning_rate": 0.0005716557468144147,
      "loss": 3.0838,
      "step": 32137
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3781335353851318,
      "learning_rate": 0.0005716540111416128,
      "loss": 3.1217,
      "step": 32138
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.484169840812683,
      "learning_rate": 0.0005716522754183054,
      "loss": 3.2852,
      "step": 32139
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.471159815788269,
      "learning_rate": 0.0005716505396444927,
      "loss": 2.7929,
      "step": 32140
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6264568567276,
      "learning_rate": 0.0005716488038201749,
      "loss": 3.005,
      "step": 32141
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.282843828201294,
      "learning_rate": 0.0005716470679453526,
      "loss": 2.7734,
      "step": 32142
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7698458433151245,
      "learning_rate": 0.0005716453320200259,
      "loss": 3.1507,
      "step": 32143
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5502930879592896,
      "learning_rate": 0.0005716435960441953,
      "loss": 3.1047,
      "step": 32144
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9724321365356445,
      "learning_rate": 0.0005716418600178609,
      "loss": 3.2014,
      "step": 32145
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5361964702606201,
      "learning_rate": 0.0005716401239410232,
      "loss": 2.979,
      "step": 32146
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5598174333572388,
      "learning_rate": 0.0005716383878136825,
      "loss": 3.105,
      "step": 32147
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4300384521484375,
      "learning_rate": 0.0005716366516358391,
      "loss": 3.2339,
      "step": 32148
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5284024477005005,
      "learning_rate": 0.0005716349154074932,
      "loss": 3.1204,
      "step": 32149
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4809396266937256,
      "learning_rate": 0.0005716331791286454,
      "loss": 2.9922,
      "step": 32150
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.462951898574829,
      "learning_rate": 0.0005716314427992959,
      "loss": 2.9977,
      "step": 32151
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.757905125617981,
      "learning_rate": 0.0005716297064194448,
      "loss": 3.0987,
      "step": 32152
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3912986516952515,
      "learning_rate": 0.0005716279699890927,
      "loss": 3.0808,
      "step": 32153
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.608350396156311,
      "learning_rate": 0.0005716262335082399,
      "loss": 2.9722,
      "step": 32154
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5462381839752197,
      "learning_rate": 0.0005716244969768866,
      "loss": 2.9468,
      "step": 32155
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.367902398109436,
      "learning_rate": 0.0005716227603950331,
      "loss": 3.2233,
      "step": 32156
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4321104288101196,
      "learning_rate": 0.0005716210237626799,
      "loss": 2.8198,
      "step": 32157
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4955717325210571,
      "learning_rate": 0.0005716192870798271,
      "loss": 3.1198,
      "step": 32158
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3997544050216675,
      "learning_rate": 0.0005716175503464753,
      "loss": 3.286,
      "step": 32159
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2373669147491455,
      "learning_rate": 0.0005716158135626246,
      "loss": 3.2321,
      "step": 32160
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5275859832763672,
      "learning_rate": 0.0005716140767282754,
      "loss": 3.1034,
      "step": 32161
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.382174491882324,
      "learning_rate": 0.0005716123398434281,
      "loss": 2.9708,
      "step": 32162
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5671697854995728,
      "learning_rate": 0.0005716106029080829,
      "loss": 2.8519,
      "step": 32163
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5527231693267822,
      "learning_rate": 0.0005716088659222401,
      "loss": 2.901,
      "step": 32164
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6155149936676025,
      "learning_rate": 0.0005716071288859001,
      "loss": 2.9888,
      "step": 32165
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4971665143966675,
      "learning_rate": 0.0005716053917990632,
      "loss": 3.3139,
      "step": 32166
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3306307792663574,
      "learning_rate": 0.0005716036546617298,
      "loss": 3.2435,
      "step": 32167
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7597789764404297,
      "learning_rate": 0.0005716019174739,
      "loss": 2.8861,
      "step": 32168
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4593820571899414,
      "learning_rate": 0.0005716001802355745,
      "loss": 3.0604,
      "step": 32169
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7064297199249268,
      "learning_rate": 0.0005715984429467533,
      "loss": 3.1435,
      "step": 32170
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3734056949615479,
      "learning_rate": 0.0005715967056074367,
      "loss": 3.2504,
      "step": 32171
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5972057580947876,
      "learning_rate": 0.0005715949682176252,
      "loss": 2.9184,
      "step": 32172
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.164245843887329,
      "learning_rate": 0.0005715932307773191,
      "loss": 3.2667,
      "step": 32173
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3589317798614502,
      "learning_rate": 0.0005715914932865188,
      "loss": 3.1595,
      "step": 32174
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5109333992004395,
      "learning_rate": 0.0005715897557452244,
      "loss": 2.9708,
      "step": 32175
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2862848043441772,
      "learning_rate": 0.0005715880181534364,
      "loss": 2.952,
      "step": 32176
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5476837158203125,
      "learning_rate": 0.000571586280511155,
      "loss": 3.334,
      "step": 32177
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3886466026306152,
      "learning_rate": 0.0005715845428183805,
      "loss": 3.1246,
      "step": 32178
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5042003393173218,
      "learning_rate": 0.0005715828050751135,
      "loss": 3.075,
      "step": 32179
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.1790030002593994,
      "learning_rate": 0.000571581067281354,
      "loss": 2.8281,
      "step": 32180
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6432874202728271,
      "learning_rate": 0.0005715793294371025,
      "loss": 3.0967,
      "step": 32181
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8535088300704956,
      "learning_rate": 0.0005715775915423593,
      "loss": 2.8487,
      "step": 32182
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6406956911087036,
      "learning_rate": 0.0005715758535971247,
      "loss": 3.1146,
      "step": 32183
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4772971868515015,
      "learning_rate": 0.0005715741156013989,
      "loss": 3.1024,
      "step": 32184
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4783127307891846,
      "learning_rate": 0.0005715723775551825,
      "loss": 3.0686,
      "step": 32185
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5789533853530884,
      "learning_rate": 0.0005715706394584756,
      "loss": 3.3251,
      "step": 32186
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5422807931900024,
      "learning_rate": 0.0005715689013112786,
      "loss": 2.9727,
      "step": 32187
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7660105228424072,
      "learning_rate": 0.0005715671631135918,
      "loss": 3.2098,
      "step": 32188
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.0044476985931396,
      "learning_rate": 0.0005715654248654155,
      "loss": 3.006,
      "step": 32189
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6633669137954712,
      "learning_rate": 0.0005715636865667502,
      "loss": 2.9982,
      "step": 32190
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9528286457061768,
      "learning_rate": 0.000571561948217596,
      "loss": 3.2107,
      "step": 32191
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3115609884262085,
      "learning_rate": 0.0005715602098179533,
      "loss": 3.0165,
      "step": 32192
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0658814907073975,
      "learning_rate": 0.0005715584713678224,
      "loss": 3.2979,
      "step": 32193
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4660216569900513,
      "learning_rate": 0.0005715567328672037,
      "loss": 3.0764,
      "step": 32194
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4879372119903564,
      "learning_rate": 0.0005715549943160975,
      "loss": 3.1157,
      "step": 32195
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.90006422996521,
      "learning_rate": 0.0005715532557145041,
      "loss": 2.955,
      "step": 32196
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6927108764648438,
      "learning_rate": 0.0005715515170624238,
      "loss": 3.078,
      "step": 32197
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5666744709014893,
      "learning_rate": 0.0005715497783598569,
      "loss": 2.9757,
      "step": 32198
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8279244899749756,
      "learning_rate": 0.0005715480396068038,
      "loss": 3.1877,
      "step": 32199
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.765763282775879,
      "learning_rate": 0.0005715463008032647,
      "loss": 3.1363,
      "step": 32200
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.579443097114563,
      "learning_rate": 0.0005715445619492403,
      "loss": 3.0539,
      "step": 32201
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3096317052841187,
      "learning_rate": 0.0005715428230447304,
      "loss": 3.2521,
      "step": 32202
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.413794994354248,
      "learning_rate": 0.0005715410840897356,
      "loss": 3.0528,
      "step": 32203
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.094470500946045,
      "learning_rate": 0.0005715393450842562,
      "loss": 2.7526,
      "step": 32204
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.274585247039795,
      "learning_rate": 0.0005715376060282925,
      "loss": 3.2158,
      "step": 32205
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6756247282028198,
      "learning_rate": 0.0005715358669218449,
      "loss": 3.0433,
      "step": 32206
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.011312246322632,
      "learning_rate": 0.0005715341277649135,
      "loss": 3.1118,
      "step": 32207
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8721559047698975,
      "learning_rate": 0.0005715323885574989,
      "loss": 3.1969,
      "step": 32208
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5982564687728882,
      "learning_rate": 0.0005715306492996014,
      "loss": 3.0293,
      "step": 32209
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.556201457977295,
      "learning_rate": 0.0005715289099912211,
      "loss": 3.2382,
      "step": 32210
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3156516551971436,
      "learning_rate": 0.0005715271706323585,
      "loss": 3.1642,
      "step": 32211
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5064828395843506,
      "learning_rate": 0.0005715254312230138,
      "loss": 3.0156,
      "step": 32212
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4119311571121216,
      "learning_rate": 0.0005715236917631874,
      "loss": 3.3251,
      "step": 32213
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.826545000076294,
      "learning_rate": 0.0005715219522528797,
      "loss": 3.1532,
      "step": 32214
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6231507062911987,
      "learning_rate": 0.0005715202126920909,
      "loss": 3.2776,
      "step": 32215
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.90329110622406,
      "learning_rate": 0.0005715184730808214,
      "loss": 3.0101,
      "step": 32216
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5956628322601318,
      "learning_rate": 0.0005715167334190715,
      "loss": 3.2158,
      "step": 32217
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5370028018951416,
      "learning_rate": 0.0005715149937068415,
      "loss": 2.9751,
      "step": 32218
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6167100667953491,
      "learning_rate": 0.0005715132539441316,
      "loss": 3.0763,
      "step": 32219
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3795859813690186,
      "learning_rate": 0.0005715115141309425,
      "loss": 3.0462,
      "step": 32220
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.610725998878479,
      "learning_rate": 0.0005715097742672741,
      "loss": 3.0719,
      "step": 32221
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0529534816741943,
      "learning_rate": 0.000571508034353127,
      "loss": 3.1903,
      "step": 32222
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5558737516403198,
      "learning_rate": 0.0005715062943885014,
      "loss": 3.205,
      "step": 32223
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.1873786449432373,
      "learning_rate": 0.0005715045543733977,
      "loss": 3.0833,
      "step": 32224
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.457821011543274,
      "learning_rate": 0.000571502814307816,
      "loss": 3.1129,
      "step": 32225
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6082369089126587,
      "learning_rate": 0.000571501074191757,
      "loss": 3.0347,
      "step": 32226
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9002554416656494,
      "learning_rate": 0.0005714993340252207,
      "loss": 2.8067,
      "step": 32227
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3570127487182617,
      "learning_rate": 0.0005714975938082077,
      "loss": 3.2068,
      "step": 32228
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2132909297943115,
      "learning_rate": 0.000571495853540718,
      "loss": 2.7742,
      "step": 32229
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8480089902877808,
      "learning_rate": 0.0005714941132227522,
      "loss": 3.0581,
      "step": 32230
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.348332166671753,
      "learning_rate": 0.0005714923728543105,
      "loss": 2.8274,
      "step": 32231
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6107436418533325,
      "learning_rate": 0.0005714906324353932,
      "loss": 3.3942,
      "step": 32232
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0542445182800293,
      "learning_rate": 0.0005714888919660007,
      "loss": 3.0971,
      "step": 32233
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9355825185775757,
      "learning_rate": 0.0005714871514461333,
      "loss": 3.2103,
      "step": 32234
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4695340394973755,
      "learning_rate": 0.0005714854108757913,
      "loss": 2.9753,
      "step": 32235
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5805349349975586,
      "learning_rate": 0.0005714836702549751,
      "loss": 3.1986,
      "step": 32236
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3196498155593872,
      "learning_rate": 0.0005714819295836849,
      "loss": 3.1093,
      "step": 32237
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6246281862258911,
      "learning_rate": 0.0005714801888619211,
      "loss": 2.9342,
      "step": 32238
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9877662658691406,
      "learning_rate": 0.000571478448089684,
      "loss": 2.9676,
      "step": 32239
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.612704873085022,
      "learning_rate": 0.000571476707266974,
      "loss": 2.9848,
      "step": 32240
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.162454128265381,
      "learning_rate": 0.0005714749663937913,
      "loss": 3.0117,
      "step": 32241
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.306078553199768,
      "learning_rate": 0.0005714732254701361,
      "loss": 3.1639,
      "step": 32242
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.516068458557129,
      "learning_rate": 0.0005714714844960091,
      "loss": 2.9569,
      "step": 32243
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6981747150421143,
      "learning_rate": 0.0005714697434714104,
      "loss": 2.9468,
      "step": 32244
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6312191486358643,
      "learning_rate": 0.0005714680023963404,
      "loss": 2.9546,
      "step": 32245
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2561582326889038,
      "learning_rate": 0.0005714662612707993,
      "loss": 2.9641,
      "step": 32246
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.095759630203247,
      "learning_rate": 0.0005714645200947876,
      "loss": 3.0189,
      "step": 32247
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3305915594100952,
      "learning_rate": 0.0005714627788683054,
      "loss": 3.0299,
      "step": 32248
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4683427810668945,
      "learning_rate": 0.0005714610375913531,
      "loss": 2.9601,
      "step": 32249
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2087371349334717,
      "learning_rate": 0.0005714592962639312,
      "loss": 3.1807,
      "step": 32250
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.636775016784668,
      "learning_rate": 0.0005714575548860398,
      "loss": 3.2236,
      "step": 32251
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2844678163528442,
      "learning_rate": 0.0005714558134576794,
      "loss": 3.1086,
      "step": 32252
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9621018171310425,
      "learning_rate": 0.0005714540719788502,
      "loss": 3.1602,
      "step": 32253
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3259689807891846,
      "learning_rate": 0.0005714523304495527,
      "loss": 3.0165,
      "step": 32254
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2891929149627686,
      "learning_rate": 0.0005714505888697868,
      "loss": 3.1802,
      "step": 32255
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.760504961013794,
      "learning_rate": 0.0005714488472395533,
      "loss": 3.4177,
      "step": 32256
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8465005159378052,
      "learning_rate": 0.0005714471055588522,
      "loss": 3.1348,
      "step": 32257
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4160939455032349,
      "learning_rate": 0.0005714453638276842,
      "loss": 3.2664,
      "step": 32258
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0040547847747803,
      "learning_rate": 0.0005714436220460493,
      "loss": 3.0064,
      "step": 32259
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3352546691894531,
      "learning_rate": 0.0005714418802139477,
      "loss": 3.2518,
      "step": 32260
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3009461164474487,
      "learning_rate": 0.0005714401383313801,
      "loss": 3.1352,
      "step": 32261
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4040544033050537,
      "learning_rate": 0.0005714383963983467,
      "loss": 3.0179,
      "step": 32262
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.511576533317566,
      "learning_rate": 0.0005714366544148477,
      "loss": 2.8988,
      "step": 32263
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.373644232749939,
      "learning_rate": 0.0005714349123808835,
      "loss": 3.104,
      "step": 32264
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.434640884399414,
      "learning_rate": 0.0005714331702964544,
      "loss": 3.1908,
      "step": 32265
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6973638534545898,
      "learning_rate": 0.0005714314281615609,
      "loss": 3.1217,
      "step": 32266
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5648796558380127,
      "learning_rate": 0.000571429685976203,
      "loss": 3.2464,
      "step": 32267
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.58062744140625,
      "learning_rate": 0.0005714279437403812,
      "loss": 2.9447,
      "step": 32268
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.2027394771575928,
      "learning_rate": 0.000571426201454096,
      "loss": 3.1711,
      "step": 32269
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.1114730834960938,
      "learning_rate": 0.0005714244591173474,
      "loss": 2.8711,
      "step": 32270
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.742285966873169,
      "learning_rate": 0.000571422716730136,
      "loss": 2.9352,
      "step": 32271
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.545403242111206,
      "learning_rate": 0.0005714209742924618,
      "loss": 2.9886,
      "step": 32272
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4814889430999756,
      "learning_rate": 0.0005714192318043254,
      "loss": 3.2721,
      "step": 32273
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6346544027328491,
      "learning_rate": 0.0005714174892657271,
      "loss": 3.1362,
      "step": 32274
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7653025388717651,
      "learning_rate": 0.000571415746676667,
      "loss": 3.1795,
      "step": 32275
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5183072090148926,
      "learning_rate": 0.0005714140040371458,
      "loss": 2.8515,
      "step": 32276
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.754773736000061,
      "learning_rate": 0.0005714122613471635,
      "loss": 2.8943,
      "step": 32277
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.162059783935547,
      "learning_rate": 0.0005714105186067206,
      "loss": 2.8185,
      "step": 32278
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.002361297607422,
      "learning_rate": 0.0005714087758158173,
      "loss": 3.0689,
      "step": 32279
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6765940189361572,
      "learning_rate": 0.000571407032974454,
      "loss": 3.0248,
      "step": 32280
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.996360421180725,
      "learning_rate": 0.000571405290082631,
      "loss": 3.1158,
      "step": 32281
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.451021671295166,
      "learning_rate": 0.0005714035471403487,
      "loss": 2.9692,
      "step": 32282
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3384443521499634,
      "learning_rate": 0.0005714018041476073,
      "loss": 3.1567,
      "step": 32283
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.353859782218933,
      "learning_rate": 0.0005714000611044071,
      "loss": 2.9006,
      "step": 32284
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.391113042831421,
      "learning_rate": 0.0005713983180107486,
      "loss": 3.0952,
      "step": 32285
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3846604824066162,
      "learning_rate": 0.000571396574866632,
      "loss": 3.0614,
      "step": 32286
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3540067672729492,
      "learning_rate": 0.0005713948316720577,
      "loss": 2.9819,
      "step": 32287
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4815298318862915,
      "learning_rate": 0.0005713930884270259,
      "loss": 3.0684,
      "step": 32288
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.358392357826233,
      "learning_rate": 0.0005713913451315372,
      "loss": 3.0748,
      "step": 32289
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2508978843688965,
      "learning_rate": 0.0005713896017855916,
      "loss": 3.0662,
      "step": 32290
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4590378999710083,
      "learning_rate": 0.0005713878583891894,
      "loss": 3.0137,
      "step": 32291
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7174689769744873,
      "learning_rate": 0.0005713861149423313,
      "loss": 3.0762,
      "step": 32292
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9585905075073242,
      "learning_rate": 0.0005713843714450173,
      "loss": 3.293,
      "step": 32293
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.6724913120269775,
      "learning_rate": 0.0005713826278972478,
      "loss": 3.0599,
      "step": 32294
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3049895763397217,
      "learning_rate": 0.0005713808842990231,
      "loss": 3.0098,
      "step": 32295
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5747967958450317,
      "learning_rate": 0.0005713791406503437,
      "loss": 2.8604,
      "step": 32296
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0511529445648193,
      "learning_rate": 0.0005713773969512098,
      "loss": 3.1182,
      "step": 32297
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6880136728286743,
      "learning_rate": 0.0005713756532016216,
      "loss": 3.018,
      "step": 32298
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5831209421157837,
      "learning_rate": 0.0005713739094015797,
      "loss": 2.9626,
      "step": 32299
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8167084455490112,
      "learning_rate": 0.0005713721655510841,
      "loss": 3.2817,
      "step": 32300
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9432226419448853,
      "learning_rate": 0.0005713704216501354,
      "loss": 2.9359,
      "step": 32301
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.4995970726013184,
      "learning_rate": 0.0005713686776987338,
      "loss": 3.0538,
      "step": 32302
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.543257713317871,
      "learning_rate": 0.0005713669336968796,
      "loss": 3.0971,
      "step": 32303
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.8075742721557617,
      "learning_rate": 0.0005713651896445732,
      "loss": 3.1597,
      "step": 32304
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0629167556762695,
      "learning_rate": 0.0005713634455418148,
      "loss": 3.207,
      "step": 32305
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6191011667251587,
      "learning_rate": 0.0005713617013886048,
      "loss": 2.9834,
      "step": 32306
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0620639324188232,
      "learning_rate": 0.0005713599571849437,
      "loss": 2.9854,
      "step": 32307
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.7172670364379883,
      "learning_rate": 0.0005713582129308316,
      "loss": 2.9902,
      "step": 32308
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7150346040725708,
      "learning_rate": 0.0005713564686262688,
      "loss": 3.1659,
      "step": 32309
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6447588205337524,
      "learning_rate": 0.0005713547242712559,
      "loss": 3.1284,
      "step": 32310
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2628657817840576,
      "learning_rate": 0.0005713529798657929,
      "loss": 3.1733,
      "step": 32311
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.59207022190094,
      "learning_rate": 0.0005713512354098803,
      "loss": 2.9368,
      "step": 32312
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5361632108688354,
      "learning_rate": 0.0005713494909035183,
      "loss": 3.0163,
      "step": 32313
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.282167673110962,
      "learning_rate": 0.0005713477463467074,
      "loss": 2.8587,
      "step": 32314
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0494484901428223,
      "learning_rate": 0.0005713460017394477,
      "loss": 2.9355,
      "step": 32315
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4838249683380127,
      "learning_rate": 0.0005713442570817398,
      "loss": 2.9294,
      "step": 32316
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8993829488754272,
      "learning_rate": 0.0005713425123735838,
      "loss": 2.8327,
      "step": 32317
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5027129650115967,
      "learning_rate": 0.0005713407676149802,
      "loss": 2.9005,
      "step": 32318
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5128127336502075,
      "learning_rate": 0.0005713390228059292,
      "loss": 3.1027,
      "step": 32319
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.944671392440796,
      "learning_rate": 0.0005713372779464311,
      "loss": 2.8897,
      "step": 32320
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5010144710540771,
      "learning_rate": 0.0005713355330364863,
      "loss": 3.1875,
      "step": 32321
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7397328615188599,
      "learning_rate": 0.0005713337880760951,
      "loss": 3.283,
      "step": 32322
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.007657527923584,
      "learning_rate": 0.0005713320430652578,
      "loss": 3.1604,
      "step": 32323
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.0369350910186768,
      "learning_rate": 0.0005713302980039748,
      "loss": 2.998,
      "step": 32324
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.8888325691223145,
      "learning_rate": 0.0005713285528922464,
      "loss": 3.2064,
      "step": 32325
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.897145390510559,
      "learning_rate": 0.0005713268077300728,
      "loss": 3.103,
      "step": 32326
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7575873136520386,
      "learning_rate": 0.0005713250625174545,
      "loss": 3.061,
      "step": 32327
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.222879648208618,
      "learning_rate": 0.0005713233172543917,
      "loss": 3.1326,
      "step": 32328
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3804025650024414,
      "learning_rate": 0.0005713215719408848,
      "loss": 3.1447,
      "step": 32329
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7594441175460815,
      "learning_rate": 0.0005713198265769341,
      "loss": 2.9939,
      "step": 32330
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.335094451904297,
      "learning_rate": 0.0005713180811625399,
      "loss": 2.9863,
      "step": 32331
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.870667815208435,
      "learning_rate": 0.0005713163356977026,
      "loss": 3.0444,
      "step": 32332
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8984570503234863,
      "learning_rate": 0.0005713145901824224,
      "loss": 3.1829,
      "step": 32333
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2912932634353638,
      "learning_rate": 0.0005713128446166997,
      "loss": 3.2429,
      "step": 32334
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8827836513519287,
      "learning_rate": 0.000571311099000535,
      "loss": 2.9468,
      "step": 32335
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.442054271697998,
      "learning_rate": 0.0005713093533339282,
      "loss": 3.1652,
      "step": 32336
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3456755876541138,
      "learning_rate": 0.0005713076076168799,
      "loss": 3.0742,
      "step": 32337
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4563459157943726,
      "learning_rate": 0.0005713058618493906,
      "loss": 3.3236,
      "step": 32338
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.497889757156372,
      "learning_rate": 0.0005713041160314603,
      "loss": 2.8907,
      "step": 32339
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.997319221496582,
      "learning_rate": 0.0005713023701630894,
      "loss": 2.995,
      "step": 32340
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6924467086791992,
      "learning_rate": 0.0005713006242442783,
      "loss": 2.7731,
      "step": 32341
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.881149411201477,
      "learning_rate": 0.0005712988782750273,
      "loss": 2.9862,
      "step": 32342
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5018205642700195,
      "learning_rate": 0.0005712971322553367,
      "loss": 3.0315,
      "step": 32343
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8531919717788696,
      "learning_rate": 0.0005712953861852068,
      "loss": 3.2094,
      "step": 32344
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.344862461090088,
      "learning_rate": 0.0005712936400646381,
      "loss": 2.982,
      "step": 32345
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6049563884735107,
      "learning_rate": 0.0005712918938936307,
      "loss": 3.2642,
      "step": 32346
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.1121366024017334,
      "learning_rate": 0.000571290147672185,
      "loss": 3.0547,
      "step": 32347
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2641937732696533,
      "learning_rate": 0.0005712884014003013,
      "loss": 3.0797,
      "step": 32348
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8530492782592773,
      "learning_rate": 0.0005712866550779801,
      "loss": 2.7825,
      "step": 32349
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7326029539108276,
      "learning_rate": 0.0005712849087052216,
      "loss": 2.7764,
      "step": 32350
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6790977716445923,
      "learning_rate": 0.000571283162282026,
      "loss": 3.0477,
      "step": 32351
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3406720161437988,
      "learning_rate": 0.0005712814158083938,
      "loss": 3.2444,
      "step": 32352
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6002800464630127,
      "learning_rate": 0.0005712796692843252,
      "loss": 2.9889,
      "step": 32353
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9008344411849976,
      "learning_rate": 0.0005712779227098206,
      "loss": 3.0349,
      "step": 32354
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3090747594833374,
      "learning_rate": 0.0005712761760848804,
      "loss": 3.1236,
      "step": 32355
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.765724539756775,
      "learning_rate": 0.0005712744294095048,
      "loss": 2.8239,
      "step": 32356
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4567265510559082,
      "learning_rate": 0.0005712726826836941,
      "loss": 3.012,
      "step": 32357
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2226483821868896,
      "learning_rate": 0.0005712709359074487,
      "loss": 3.1357,
      "step": 32358
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4695032835006714,
      "learning_rate": 0.000571269189080769,
      "loss": 3.3001,
      "step": 32359
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.528582811355591,
      "learning_rate": 0.0005712674422036551,
      "loss": 2.9224,
      "step": 32360
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2666425704956055,
      "learning_rate": 0.0005712656952761076,
      "loss": 2.9682,
      "step": 32361
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4662832021713257,
      "learning_rate": 0.0005712639482981267,
      "loss": 3.0531,
      "step": 32362
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.849533796310425,
      "learning_rate": 0.0005712622012697126,
      "loss": 2.9956,
      "step": 32363
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.1905081272125244,
      "learning_rate": 0.0005712604541908658,
      "loss": 3.1586,
      "step": 32364
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3355841636657715,
      "learning_rate": 0.0005712587070615864,
      "loss": 2.9469,
      "step": 32365
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8892415761947632,
      "learning_rate": 0.0005712569598818751,
      "loss": 2.9251,
      "step": 32366
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.8593685626983643,
      "learning_rate": 0.000571255212651732,
      "loss": 3.2616,
      "step": 32367
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8800060749053955,
      "learning_rate": 0.0005712534653711573,
      "loss": 3.0815,
      "step": 32368
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8483192920684814,
      "learning_rate": 0.0005712517180401515,
      "loss": 2.8887,
      "step": 32369
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9454782009124756,
      "learning_rate": 0.000571249970658715,
      "loss": 3.2104,
      "step": 32370
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5354127883911133,
      "learning_rate": 0.0005712482232268479,
      "loss": 3.1045,
      "step": 32371
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4911056756973267,
      "learning_rate": 0.0005712464757445506,
      "loss": 3.221,
      "step": 32372
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3426729440689087,
      "learning_rate": 0.0005712447282118236,
      "loss": 3.2335,
      "step": 32373
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4638595581054688,
      "learning_rate": 0.000571242980628667,
      "loss": 2.8407,
      "step": 32374
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3811880350112915,
      "learning_rate": 0.0005712412329950811,
      "loss": 3.3251,
      "step": 32375
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5968364477157593,
      "learning_rate": 0.0005712394853110665,
      "loss": 2.9953,
      "step": 32376
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3837037086486816,
      "learning_rate": 0.0005712377375766233,
      "loss": 2.9484,
      "step": 32377
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3571937084197998,
      "learning_rate": 0.0005712359897917519,
      "loss": 3.3088,
      "step": 32378
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.353222608566284,
      "learning_rate": 0.0005712342419564526,
      "loss": 2.9424,
      "step": 32379
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4121458530426025,
      "learning_rate": 0.0005712324940707258,
      "loss": 2.854,
      "step": 32380
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3796521425247192,
      "learning_rate": 0.0005712307461345717,
      "loss": 2.8009,
      "step": 32381
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.6768910884857178,
      "learning_rate": 0.0005712289981479906,
      "loss": 3.0963,
      "step": 32382
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6289801597595215,
      "learning_rate": 0.0005712272501109831,
      "loss": 3.1049,
      "step": 32383
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4673584699630737,
      "learning_rate": 0.0005712255020235492,
      "loss": 2.793,
      "step": 32384
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9785679578781128,
      "learning_rate": 0.0005712237538856894,
      "loss": 3.0204,
      "step": 32385
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8867679834365845,
      "learning_rate": 0.000571222005697404,
      "loss": 3.1145,
      "step": 32386
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5436421632766724,
      "learning_rate": 0.0005712202574586932,
      "loss": 3.2459,
      "step": 32387
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.1918846368789673,
      "learning_rate": 0.0005712185091695575,
      "loss": 3.0738,
      "step": 32388
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4455312490463257,
      "learning_rate": 0.0005712167608299971,
      "loss": 3.0631,
      "step": 32389
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2151761054992676,
      "learning_rate": 0.0005712150124400125,
      "loss": 3.0025,
      "step": 32390
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4522409439086914,
      "learning_rate": 0.000571213263999604,
      "loss": 3.326,
      "step": 32391
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.036877393722534,
      "learning_rate": 0.0005712115155087717,
      "loss": 3.1488,
      "step": 32392
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.6440372467041016,
      "learning_rate": 0.000571209766967516,
      "loss": 2.9099,
      "step": 32393
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4334176778793335,
      "learning_rate": 0.0005712080183758373,
      "loss": 3.037,
      "step": 32394
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5969871282577515,
      "learning_rate": 0.000571206269733736,
      "loss": 3.0145,
      "step": 32395
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6540274620056152,
      "learning_rate": 0.0005712045210412123,
      "loss": 3.0908,
      "step": 32396
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7124178409576416,
      "learning_rate": 0.0005712027722982665,
      "loss": 3.4762,
      "step": 32397
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3576385974884033,
      "learning_rate": 0.0005712010235048991,
      "loss": 3.0294,
      "step": 32398
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6899999380111694,
      "learning_rate": 0.0005711992746611103,
      "loss": 3.2327,
      "step": 32399
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6439541578292847,
      "learning_rate": 0.0005711975257669004,
      "loss": 3.0651,
      "step": 32400
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.384237289428711,
      "learning_rate": 0.0005711957768222696,
      "loss": 3.0496,
      "step": 32401
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5166473388671875,
      "learning_rate": 0.0005711940278272185,
      "loss": 3.034,
      "step": 32402
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.354825735092163,
      "learning_rate": 0.0005711922787817475,
      "loss": 3.0154,
      "step": 32403
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5990610122680664,
      "learning_rate": 0.0005711905296858565,
      "loss": 3.2179,
      "step": 32404
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4466900825500488,
      "learning_rate": 0.0005711887805395462,
      "loss": 3.2234,
      "step": 32405
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8806078433990479,
      "learning_rate": 0.0005711870313428167,
      "loss": 3.3379,
      "step": 32406
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.935551643371582,
      "learning_rate": 0.0005711852820956684,
      "loss": 3.0213,
      "step": 32407
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6630277633666992,
      "learning_rate": 0.0005711835327981017,
      "loss": 3.1519,
      "step": 32408
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5027121305465698,
      "learning_rate": 0.0005711817834501169,
      "loss": 3.0,
      "step": 32409
    },
    {
      "epoch": 0.42,
      "grad_norm": 4.028369426727295,
      "learning_rate": 0.0005711800340517142,
      "loss": 2.7924,
      "step": 32410
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5818778276443481,
      "learning_rate": 0.000571178284602894,
      "loss": 3.1161,
      "step": 32411
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.290162205696106,
      "learning_rate": 0.0005711765351036567,
      "loss": 3.0489,
      "step": 32412
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.020598888397217,
      "learning_rate": 0.0005711747855540025,
      "loss": 3.1493,
      "step": 32413
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5258636474609375,
      "learning_rate": 0.0005711730359539318,
      "loss": 3.1548,
      "step": 32414
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8082618713378906,
      "learning_rate": 0.0005711712863034449,
      "loss": 2.8727,
      "step": 32415
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.945945382118225,
      "learning_rate": 0.0005711695366025423,
      "loss": 3.0319,
      "step": 32416
    },
    {
      "epoch": 0.42,
      "grad_norm": 4.016175270080566,
      "learning_rate": 0.0005711677868512241,
      "loss": 2.8962,
      "step": 32417
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.095038890838623,
      "learning_rate": 0.0005711660370494906,
      "loss": 2.8258,
      "step": 32418
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.574448585510254,
      "learning_rate": 0.0005711642871973422,
      "loss": 3.135,
      "step": 32419
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.1071763038635254,
      "learning_rate": 0.0005711625372947793,
      "loss": 3.0323,
      "step": 32420
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3810460567474365,
      "learning_rate": 0.0005711607873418021,
      "loss": 3.0216,
      "step": 32421
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7457911968231201,
      "learning_rate": 0.0005711590373384111,
      "loss": 3.2715,
      "step": 32422
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.810408353805542,
      "learning_rate": 0.0005711572872846065,
      "loss": 2.8981,
      "step": 32423
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8824948072433472,
      "learning_rate": 0.0005711555371803886,
      "loss": 3.1052,
      "step": 32424
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3624906539916992,
      "learning_rate": 0.0005711537870257576,
      "loss": 2.8452,
      "step": 32425
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3171616792678833,
      "learning_rate": 0.0005711520368207142,
      "loss": 3.0167,
      "step": 32426
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.591657280921936,
      "learning_rate": 0.0005711502865652586,
      "loss": 3.264,
      "step": 32427
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3355495929718018,
      "learning_rate": 0.0005711485362593909,
      "loss": 2.7399,
      "step": 32428
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.680809497833252,
      "learning_rate": 0.0005711467859031116,
      "loss": 2.854,
      "step": 32429
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3914620876312256,
      "learning_rate": 0.0005711450354964209,
      "loss": 2.9941,
      "step": 32430
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0621559619903564,
      "learning_rate": 0.0005711432850393194,
      "loss": 2.9891,
      "step": 32431
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.5693111419677734,
      "learning_rate": 0.0005711415345318071,
      "loss": 3.1815,
      "step": 32432
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9216046333312988,
      "learning_rate": 0.0005711397839738845,
      "loss": 3.1868,
      "step": 32433
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6166952848434448,
      "learning_rate": 0.000571138033365552,
      "loss": 2.9807,
      "step": 32434
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8816709518432617,
      "learning_rate": 0.0005711362827068098,
      "loss": 3.0742,
      "step": 32435
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5375633239746094,
      "learning_rate": 0.0005711345319976581,
      "loss": 2.8393,
      "step": 32436
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.487401008605957,
      "learning_rate": 0.0005711327812380975,
      "loss": 2.9412,
      "step": 32437
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.850183129310608,
      "learning_rate": 0.0005711310304281281,
      "loss": 2.7882,
      "step": 32438
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.215001344680786,
      "learning_rate": 0.0005711292795677504,
      "loss": 3.0833,
      "step": 32439
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6903468370437622,
      "learning_rate": 0.0005711275286569646,
      "loss": 2.984,
      "step": 32440
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7816263437271118,
      "learning_rate": 0.0005711257776957711,
      "loss": 3.11,
      "step": 32441
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.48809015750885,
      "learning_rate": 0.0005711240266841703,
      "loss": 3.029,
      "step": 32442
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.505913496017456,
      "learning_rate": 0.0005711222756221622,
      "loss": 3.1299,
      "step": 32443
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.918722152709961,
      "learning_rate": 0.0005711205245097475,
      "loss": 3.2543,
      "step": 32444
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3434500694274902,
      "learning_rate": 0.0005711187733469264,
      "loss": 3.1798,
      "step": 32445
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3577567338943481,
      "learning_rate": 0.0005711170221336992,
      "loss": 3.0456,
      "step": 32446
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5853168964385986,
      "learning_rate": 0.0005711152708700661,
      "loss": 3.0279,
      "step": 32447
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.602998971939087,
      "learning_rate": 0.0005711135195560277,
      "loss": 2.879,
      "step": 32448
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.369016170501709,
      "learning_rate": 0.000571111768191584,
      "loss": 2.9652,
      "step": 32449
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6039751768112183,
      "learning_rate": 0.0005711100167767357,
      "loss": 3.136,
      "step": 32450
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4534833431243896,
      "learning_rate": 0.0005711082653114829,
      "loss": 3.2381,
      "step": 32451
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.592751383781433,
      "learning_rate": 0.0005711065137958258,
      "loss": 2.869,
      "step": 32452
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5325491428375244,
      "learning_rate": 0.0005711047622297651,
      "loss": 2.8009,
      "step": 32453
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.715430498123169,
      "learning_rate": 0.0005711030106133008,
      "loss": 3.1481,
      "step": 32454
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.273776054382324,
      "learning_rate": 0.0005711012589464333,
      "loss": 2.994,
      "step": 32455
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4496492147445679,
      "learning_rate": 0.000571099507229163,
      "loss": 3.1129,
      "step": 32456
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3246263265609741,
      "learning_rate": 0.0005710977554614903,
      "loss": 3.2501,
      "step": 32457
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.913973808288574,
      "learning_rate": 0.0005710960036434152,
      "loss": 2.9141,
      "step": 32458
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7451752424240112,
      "learning_rate": 0.0005710942517749382,
      "loss": 3.0021,
      "step": 32459
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4780317544937134,
      "learning_rate": 0.0005710924998560599,
      "loss": 2.9663,
      "step": 32460
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4641233682632446,
      "learning_rate": 0.0005710907478867803,
      "loss": 3.0678,
      "step": 32461
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2996885776519775,
      "learning_rate": 0.0005710889958670997,
      "loss": 3.1639,
      "step": 32462
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3432466983795166,
      "learning_rate": 0.0005710872437970187,
      "loss": 3.1004,
      "step": 32463
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.292158842086792,
      "learning_rate": 0.0005710854916765373,
      "loss": 3.0606,
      "step": 32464
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4905166625976562,
      "learning_rate": 0.0005710837395056562,
      "loss": 3.2354,
      "step": 32465
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6822890043258667,
      "learning_rate": 0.0005710819872843753,
      "loss": 3.1341,
      "step": 32466
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.040003538131714,
      "learning_rate": 0.0005710802350126952,
      "loss": 3.1863,
      "step": 32467
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7157878875732422,
      "learning_rate": 0.0005710784826906162,
      "loss": 3.0538,
      "step": 32468
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4990266561508179,
      "learning_rate": 0.0005710767303181387,
      "loss": 3.2201,
      "step": 32469
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4578524827957153,
      "learning_rate": 0.0005710749778952628,
      "loss": 2.9697,
      "step": 32470
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9374735355377197,
      "learning_rate": 0.000571073225421989,
      "loss": 3.018,
      "step": 32471
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.831284999847412,
      "learning_rate": 0.0005710714728983174,
      "loss": 3.059,
      "step": 32472
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.374738097190857,
      "learning_rate": 0.0005710697203242486,
      "loss": 2.9568,
      "step": 32473
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.4407029151916504,
      "learning_rate": 0.0005710679676997829,
      "loss": 2.868,
      "step": 32474
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4366878271102905,
      "learning_rate": 0.0005710662150249206,
      "loss": 2.7767,
      "step": 32475
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6343328952789307,
      "learning_rate": 0.0005710644622996618,
      "loss": 3.0898,
      "step": 32476
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5378748178482056,
      "learning_rate": 0.000571062709524007,
      "loss": 3.1893,
      "step": 32477
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3189716339111328,
      "learning_rate": 0.0005710609566979566,
      "loss": 3.3283,
      "step": 32478
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7784823179244995,
      "learning_rate": 0.0005710592038215108,
      "loss": 2.829,
      "step": 32479
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2407946586608887,
      "learning_rate": 0.0005710574508946702,
      "loss": 3.0805,
      "step": 32480
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9644261598587036,
      "learning_rate": 0.0005710556979174346,
      "loss": 3.162,
      "step": 32481
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8842918872833252,
      "learning_rate": 0.0005710539448898048,
      "loss": 3.2956,
      "step": 32482
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.329537272453308,
      "learning_rate": 0.0005710521918117809,
      "loss": 3.1811,
      "step": 32483
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5673658847808838,
      "learning_rate": 0.0005710504386833632,
      "loss": 3.0574,
      "step": 32484
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7416430711746216,
      "learning_rate": 0.0005710486855045523,
      "loss": 2.9063,
      "step": 32485
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.350884199142456,
      "learning_rate": 0.0005710469322753482,
      "loss": 2.9842,
      "step": 32486
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8324552774429321,
      "learning_rate": 0.0005710451789957514,
      "loss": 2.9218,
      "step": 32487
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6069706678390503,
      "learning_rate": 0.0005710434256657622,
      "loss": 3.1128,
      "step": 32488
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3288785219192505,
      "learning_rate": 0.0005710416722853809,
      "loss": 3.0285,
      "step": 32489
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.064419984817505,
      "learning_rate": 0.0005710399188546078,
      "loss": 3.0159,
      "step": 32490
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6983994245529175,
      "learning_rate": 0.0005710381653734433,
      "loss": 3.1648,
      "step": 32491
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4666513204574585,
      "learning_rate": 0.0005710364118418877,
      "loss": 3.1918,
      "step": 32492
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4101238250732422,
      "learning_rate": 0.0005710346582599412,
      "loss": 3.0441,
      "step": 32493
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.919232726097107,
      "learning_rate": 0.0005710329046276043,
      "loss": 2.8897,
      "step": 32494
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2049933671951294,
      "learning_rate": 0.0005710311509448773,
      "loss": 2.9249,
      "step": 32495
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6042277812957764,
      "learning_rate": 0.0005710293972117606,
      "loss": 3.0958,
      "step": 32496
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.686147928237915,
      "learning_rate": 0.0005710276434282542,
      "loss": 2.8666,
      "step": 32497
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3265926837921143,
      "learning_rate": 0.0005710258895943589,
      "loss": 3.004,
      "step": 32498
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6183475255966187,
      "learning_rate": 0.0005710241357100745,
      "loss": 2.9806,
      "step": 32499
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9483485221862793,
      "learning_rate": 0.0005710223817754018,
      "loss": 2.877,
      "step": 32500
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.390982747077942,
      "learning_rate": 0.0005710206277903408,
      "loss": 3.035,
      "step": 32501
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4626485109329224,
      "learning_rate": 0.000571018873754892,
      "loss": 3.1641,
      "step": 32502
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7876697778701782,
      "learning_rate": 0.0005710171196690556,
      "loss": 3.1293,
      "step": 32503
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.754425287246704,
      "learning_rate": 0.0005710153655328322,
      "loss": 3.0283,
      "step": 32504
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3993290662765503,
      "learning_rate": 0.0005710136113462217,
      "loss": 3.466,
      "step": 32505
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5321344137191772,
      "learning_rate": 0.0005710118571092248,
      "loss": 3.0265,
      "step": 32506
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9070242643356323,
      "learning_rate": 0.0005710101028218416,
      "loss": 2.7953,
      "step": 32507
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.725398302078247,
      "learning_rate": 0.0005710083484840726,
      "loss": 3.0452,
      "step": 32508
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8201961517333984,
      "learning_rate": 0.000571006594095918,
      "loss": 2.9772,
      "step": 32509
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.11067795753479,
      "learning_rate": 0.0005710048396573781,
      "loss": 3.2222,
      "step": 32510
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8294767141342163,
      "learning_rate": 0.0005710030851684533,
      "loss": 2.9343,
      "step": 32511
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8479963541030884,
      "learning_rate": 0.0005710013306291439,
      "loss": 3.1324,
      "step": 32512
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9154855012893677,
      "learning_rate": 0.0005709995760394502,
      "loss": 2.6494,
      "step": 32513
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2068896293640137,
      "learning_rate": 0.0005709978213993727,
      "loss": 3.0832,
      "step": 32514
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6830333471298218,
      "learning_rate": 0.0005709960667089116,
      "loss": 3.1712,
      "step": 32515
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0240330696105957,
      "learning_rate": 0.0005709943119680671,
      "loss": 3.1458,
      "step": 32516
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.571630597114563,
      "learning_rate": 0.0005709925571768396,
      "loss": 2.9542,
      "step": 32517
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.9918644428253174,
      "learning_rate": 0.0005709908023352296,
      "loss": 2.8393,
      "step": 32518
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.0090818405151367,
      "learning_rate": 0.0005709890474432372,
      "loss": 2.9809,
      "step": 32519
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.648653268814087,
      "learning_rate": 0.0005709872925008628,
      "loss": 2.9132,
      "step": 32520
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3345996141433716,
      "learning_rate": 0.0005709855375081068,
      "loss": 2.8996,
      "step": 32521
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.6044623851776123,
      "learning_rate": 0.0005709837824649695,
      "loss": 2.984,
      "step": 32522
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5025956630706787,
      "learning_rate": 0.0005709820273714512,
      "loss": 3.1986,
      "step": 32523
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3652740716934204,
      "learning_rate": 0.0005709802722275521,
      "loss": 2.9233,
      "step": 32524
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6204530000686646,
      "learning_rate": 0.0005709785170332728,
      "loss": 2.8861,
      "step": 32525
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5913891792297363,
      "learning_rate": 0.0005709767617886135,
      "loss": 3.0164,
      "step": 32526
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6139436960220337,
      "learning_rate": 0.0005709750064935744,
      "loss": 3.2298,
      "step": 32527
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.643507957458496,
      "learning_rate": 0.000570973251148156,
      "loss": 3.2502,
      "step": 32528
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7513824701309204,
      "learning_rate": 0.0005709714957523585,
      "loss": 2.8897,
      "step": 32529
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.776452660560608,
      "learning_rate": 0.0005709697403061824,
      "loss": 3.1787,
      "step": 32530
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9635555744171143,
      "learning_rate": 0.0005709679848096278,
      "loss": 3.0099,
      "step": 32531
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5056607723236084,
      "learning_rate": 0.0005709662292626951,
      "loss": 3.1705,
      "step": 32532
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3589556217193604,
      "learning_rate": 0.0005709644736653848,
      "loss": 2.8284,
      "step": 32533
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4324336051940918,
      "learning_rate": 0.0005709627180176969,
      "loss": 3.2415,
      "step": 32534
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2857078313827515,
      "learning_rate": 0.0005709609623196321,
      "loss": 2.7313,
      "step": 32535
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7410062551498413,
      "learning_rate": 0.0005709592065711904,
      "loss": 2.936,
      "step": 32536
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.948391318321228,
      "learning_rate": 0.0005709574507723723,
      "loss": 3.2483,
      "step": 32537
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6440571546554565,
      "learning_rate": 0.0005709556949231783,
      "loss": 3.2071,
      "step": 32538
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6153388023376465,
      "learning_rate": 0.0005709539390236082,
      "loss": 2.9638,
      "step": 32539
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5746828317642212,
      "learning_rate": 0.0005709521830736629,
      "loss": 3.0308,
      "step": 32540
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5831633806228638,
      "learning_rate": 0.0005709504270733423,
      "loss": 3.1107,
      "step": 32541
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.189781904220581,
      "learning_rate": 0.000570948671022647,
      "loss": 3.2175,
      "step": 32542
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9328293800354004,
      "learning_rate": 0.0005709469149215772,
      "loss": 3.0942,
      "step": 32543
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4780021905899048,
      "learning_rate": 0.0005709451587701333,
      "loss": 3.1307,
      "step": 32544
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5324138402938843,
      "learning_rate": 0.0005709434025683156,
      "loss": 2.9106,
      "step": 32545
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0459442138671875,
      "learning_rate": 0.0005709416463161243,
      "loss": 3.004,
      "step": 32546
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5266107320785522,
      "learning_rate": 0.0005709398900135598,
      "loss": 3.031,
      "step": 32547
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3434096574783325,
      "learning_rate": 0.0005709381336606226,
      "loss": 3.2797,
      "step": 32548
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7436293363571167,
      "learning_rate": 0.0005709363772573128,
      "loss": 3.2981,
      "step": 32549
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5623414516448975,
      "learning_rate": 0.0005709346208036308,
      "loss": 2.9968,
      "step": 32550
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9307500123977661,
      "learning_rate": 0.000570932864299577,
      "loss": 2.8985,
      "step": 32551
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3249990940093994,
      "learning_rate": 0.0005709311077451517,
      "loss": 3.1324,
      "step": 32552
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7671080827713013,
      "learning_rate": 0.0005709293511403552,
      "loss": 3.0717,
      "step": 32553
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.67423677444458,
      "learning_rate": 0.0005709275944851877,
      "loss": 3.0262,
      "step": 32554
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0779600143432617,
      "learning_rate": 0.0005709258377796496,
      "loss": 3.1133,
      "step": 32555
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8627697229385376,
      "learning_rate": 0.0005709240810237414,
      "loss": 2.7931,
      "step": 32556
    },
    {
      "epoch": 0.42,
      "grad_norm": 4.77514123916626,
      "learning_rate": 0.0005709223242174634,
      "loss": 3.0671,
      "step": 32557
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5002915859222412,
      "learning_rate": 0.0005709205673608156,
      "loss": 3.1669,
      "step": 32558
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.042429208755493,
      "learning_rate": 0.0005709188104537987,
      "loss": 2.9981,
      "step": 32559
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7293264865875244,
      "learning_rate": 0.0005709170534964128,
      "loss": 3.1216,
      "step": 32560
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4086934328079224,
      "learning_rate": 0.0005709152964886583,
      "loss": 3.1989,
      "step": 32561
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.849157691001892,
      "learning_rate": 0.0005709135394305357,
      "loss": 3.1091,
      "step": 32562
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.546729564666748,
      "learning_rate": 0.0005709117823220449,
      "loss": 2.9988,
      "step": 32563
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6984883546829224,
      "learning_rate": 0.0005709100251631867,
      "loss": 3.057,
      "step": 32564
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5869556665420532,
      "learning_rate": 0.0005709082679539612,
      "loss": 3.1515,
      "step": 32565
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7573586702346802,
      "learning_rate": 0.0005709065106943686,
      "loss": 3.0373,
      "step": 32566
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.709044098854065,
      "learning_rate": 0.0005709047533844093,
      "loss": 3.0076,
      "step": 32567
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.871657133102417,
      "learning_rate": 0.0005709029960240839,
      "loss": 3.2818,
      "step": 32568
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5066585540771484,
      "learning_rate": 0.0005709012386133925,
      "loss": 3.1032,
      "step": 32569
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.592067837715149,
      "learning_rate": 0.0005708994811523354,
      "loss": 2.9669,
      "step": 32570
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.1467413902282715,
      "learning_rate": 0.000570897723640913,
      "loss": 3.0819,
      "step": 32571
    },
    {
      "epoch": 0.42,
      "grad_norm": 3.4653897285461426,
      "learning_rate": 0.0005708959660791255,
      "loss": 2.9677,
      "step": 32572
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5605638027191162,
      "learning_rate": 0.0005708942084669734,
      "loss": 3.1925,
      "step": 32573
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7072633504867554,
      "learning_rate": 0.000570892450804457,
      "loss": 3.3052,
      "step": 32574
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2669639587402344,
      "learning_rate": 0.0005708906930915765,
      "loss": 3.2818,
      "step": 32575
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3222604990005493,
      "learning_rate": 0.0005708889353283323,
      "loss": 2.8973,
      "step": 32576
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9584468603134155,
      "learning_rate": 0.0005708871775147248,
      "loss": 3.0407,
      "step": 32577
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.284686803817749,
      "learning_rate": 0.0005708854196507542,
      "loss": 3.0436,
      "step": 32578
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3016959428787231,
      "learning_rate": 0.0005708836617364209,
      "loss": 2.9827,
      "step": 32579
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7755144834518433,
      "learning_rate": 0.0005708819037717252,
      "loss": 2.9611,
      "step": 32580
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6018955707550049,
      "learning_rate": 0.0005708801457566675,
      "loss": 3.0356,
      "step": 32581
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.807173252105713,
      "learning_rate": 0.0005708783876912481,
      "loss": 3.0378,
      "step": 32582
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.3896000385284424,
      "learning_rate": 0.0005708766295754672,
      "loss": 3.063,
      "step": 32583
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8384990692138672,
      "learning_rate": 0.0005708748714093252,
      "loss": 3.1938,
      "step": 32584
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.4019439220428467,
      "learning_rate": 0.0005708731131928225,
      "loss": 3.0996,
      "step": 32585
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0735995769500732,
      "learning_rate": 0.0005708713549259594,
      "loss": 3.3073,
      "step": 32586
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5856012105941772,
      "learning_rate": 0.0005708695966087362,
      "loss": 2.8818,
      "step": 32587
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9842805862426758,
      "learning_rate": 0.0005708678382411532,
      "loss": 2.9623,
      "step": 32588
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.025285243988037,
      "learning_rate": 0.0005708660798232107,
      "loss": 3.0947,
      "step": 32589
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.416186809539795,
      "learning_rate": 0.0005708643213549092,
      "loss": 2.9404,
      "step": 32590
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.36322820186615,
      "learning_rate": 0.0005708625628362489,
      "loss": 2.9525,
      "step": 32591
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.8242080211639404,
      "learning_rate": 0.0005708608042672299,
      "loss": 2.9487,
      "step": 32592
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.390834331512451,
      "learning_rate": 0.0005708590456478531,
      "loss": 2.8449,
      "step": 32593
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.1735494136810303,
      "learning_rate": 0.0005708572869781184,
      "loss": 3.2101,
      "step": 32594
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7767831087112427,
      "learning_rate": 0.0005708555282580261,
      "loss": 3.2154,
      "step": 32595
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.8984594345092773,
      "learning_rate": 0.0005708537694875767,
      "loss": 3.0659,
      "step": 32596
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0433969497680664,
      "learning_rate": 0.0005708520106667705,
      "loss": 2.8798,
      "step": 32597
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.289646029472351,
      "learning_rate": 0.0005708502517956077,
      "loss": 3.0326,
      "step": 32598
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.2055509090423584,
      "learning_rate": 0.0005708484928740889,
      "loss": 3.1736,
      "step": 32599
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.7232929468154907,
      "learning_rate": 0.000570846733902214,
      "loss": 3.2762,
      "step": 32600
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2444895505905151,
      "learning_rate": 0.0005708449748799837,
      "loss": 3.0865,
      "step": 32601
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.6708329916000366,
      "learning_rate": 0.0005708432158073983,
      "loss": 2.912,
      "step": 32602
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4562625885009766,
      "learning_rate": 0.000570841456684458,
      "loss": 3.3187,
      "step": 32603
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.409056544303894,
      "learning_rate": 0.0005708396975111632,
      "loss": 3.3231,
      "step": 32604
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.844797372817993,
      "learning_rate": 0.0005708379382875141,
      "loss": 3.0994,
      "step": 32605
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2630316019058228,
      "learning_rate": 0.0005708361790135112,
      "loss": 3.0384,
      "step": 32606
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8752609491348267,
      "learning_rate": 0.0005708344196891546,
      "loss": 3.077,
      "step": 32607
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8215265274047852,
      "learning_rate": 0.0005708326603144449,
      "loss": 2.9694,
      "step": 32608
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2318974733352661,
      "learning_rate": 0.0005708309008893822,
      "loss": 3.207,
      "step": 32609
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4904406070709229,
      "learning_rate": 0.0005708291414139669,
      "loss": 2.9678,
      "step": 32610
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5120970010757446,
      "learning_rate": 0.0005708273818881994,
      "loss": 3.2842,
      "step": 32611
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.529159426689148,
      "learning_rate": 0.0005708256223120801,
      "loss": 3.0879,
      "step": 32612
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3112633228302002,
      "learning_rate": 0.0005708238626856091,
      "loss": 2.8334,
      "step": 32613
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9685720205307007,
      "learning_rate": 0.0005708221030087868,
      "loss": 3.1433,
      "step": 32614
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.8726677894592285,
      "learning_rate": 0.0005708203432816136,
      "loss": 3.1949,
      "step": 32615
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5892786979675293,
      "learning_rate": 0.0005708185835040898,
      "loss": 3.0545,
      "step": 32616
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.851641297340393,
      "learning_rate": 0.0005708168236762157,
      "loss": 2.9741,
      "step": 32617
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.4396088123321533,
      "learning_rate": 0.0005708150637979916,
      "loss": 3.1656,
      "step": 32618
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5466879606246948,
      "learning_rate": 0.000570813303869418,
      "loss": 3.1906,
      "step": 32619
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5587750673294067,
      "learning_rate": 0.000570811543890495,
      "loss": 3.0252,
      "step": 32620
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.958092212677002,
      "learning_rate": 0.0005708097838612229,
      "loss": 3.1283,
      "step": 32621
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.571790099143982,
      "learning_rate": 0.0005708080237816024,
      "loss": 3.0261,
      "step": 32622
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3455116748809814,
      "learning_rate": 0.0005708062636516334,
      "loss": 2.9121,
      "step": 32623
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.350740671157837,
      "learning_rate": 0.0005708045034713163,
      "loss": 3.0097,
      "step": 32624
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4384691715240479,
      "learning_rate": 0.0005708027432406518,
      "loss": 3.1341,
      "step": 32625
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.46892249584198,
      "learning_rate": 0.0005708009829596398,
      "loss": 3.2145,
      "step": 32626
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3483502864837646,
      "learning_rate": 0.0005707992226282808,
      "loss": 3.1816,
      "step": 32627
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3005921840667725,
      "learning_rate": 0.0005707974622465751,
      "loss": 3.2304,
      "step": 32628
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.475885272026062,
      "learning_rate": 0.0005707957018145231,
      "loss": 2.9981,
      "step": 32629
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4326812028884888,
      "learning_rate": 0.0005707939413321251,
      "loss": 2.9263,
      "step": 32630
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.284692406654358,
      "learning_rate": 0.0005707921807993812,
      "loss": 2.8772,
      "step": 32631
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4764525890350342,
      "learning_rate": 0.0005707904202162922,
      "loss": 3.1795,
      "step": 32632
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.2740569114685059,
      "learning_rate": 0.000570788659582858,
      "loss": 3.1161,
      "step": 32633
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.3240783214569092,
      "learning_rate": 0.0005707868988990791,
      "loss": 3.1195,
      "step": 32634
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.0223300457000732,
      "learning_rate": 0.0005707851381649557,
      "loss": 3.095,
      "step": 32635
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4903395175933838,
      "learning_rate": 0.0005707833773804883,
      "loss": 3.1455,
      "step": 32636
    },
    {
      "epoch": 0.42,
      "grad_norm": 2.5750269889831543,
      "learning_rate": 0.0005707816165456772,
      "loss": 2.9703,
      "step": 32637
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.4405603408813477,
      "learning_rate": 0.0005707798556605227,
      "loss": 2.8305,
      "step": 32638
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.5837184190750122,
      "learning_rate": 0.000570778094725025,
      "loss": 3.2888,
      "step": 32639
    },
    {
      "epoch": 0.42,
      "grad_norm": 1.9749977588653564,
      "learning_rate": 0.0005707763337391847,
      "loss": 3.1888,
      "step": 32640
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3162976503372192,
      "learning_rate": 0.0005707745727030018,
      "loss": 3.1347,
      "step": 32641
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7058056592941284,
      "learning_rate": 0.0005707728116164769,
      "loss": 3.1637,
      "step": 32642
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.71546471118927,
      "learning_rate": 0.0005707710504796102,
      "loss": 3.0491,
      "step": 32643
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6416881084442139,
      "learning_rate": 0.000570769289292402,
      "loss": 3.0982,
      "step": 32644
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.580303430557251,
      "learning_rate": 0.0005707675280548528,
      "loss": 2.8357,
      "step": 32645
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.2528605461120605,
      "learning_rate": 0.0005707657667669627,
      "loss": 3.2766,
      "step": 32646
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.1344358921051025,
      "learning_rate": 0.0005707640054287322,
      "loss": 3.075,
      "step": 32647
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3279480934143066,
      "learning_rate": 0.0005707622440401615,
      "loss": 2.9573,
      "step": 32648
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.1293952465057373,
      "learning_rate": 0.0005707604826012511,
      "loss": 3.0861,
      "step": 32649
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.141589879989624,
      "learning_rate": 0.000570758721112001,
      "loss": 3.1151,
      "step": 32650
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.574686288833618,
      "learning_rate": 0.0005707569595724119,
      "loss": 2.8087,
      "step": 32651
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5731056928634644,
      "learning_rate": 0.000570755197982484,
      "loss": 3.0473,
      "step": 32652
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.671425223350525,
      "learning_rate": 0.0005707534363422176,
      "loss": 3.2794,
      "step": 32653
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7233010530471802,
      "learning_rate": 0.0005707516746516129,
      "loss": 2.9624,
      "step": 32654
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.814935326576233,
      "learning_rate": 0.0005707499129106704,
      "loss": 3.1338,
      "step": 32655
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9422277212142944,
      "learning_rate": 0.0005707481511193904,
      "loss": 3.1182,
      "step": 32656
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.564021348953247,
      "learning_rate": 0.0005707463892777732,
      "loss": 3.1054,
      "step": 32657
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5537376403808594,
      "learning_rate": 0.0005707446273858192,
      "loss": 3.0574,
      "step": 32658
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4749113321304321,
      "learning_rate": 0.0005707428654435285,
      "loss": 3.1368,
      "step": 32659
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8324624300003052,
      "learning_rate": 0.0005707411034509019,
      "loss": 3.1113,
      "step": 32660
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.946129560470581,
      "learning_rate": 0.000570739341407939,
      "loss": 3.1491,
      "step": 32661
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4812020063400269,
      "learning_rate": 0.0005707375793146408,
      "loss": 3.3209,
      "step": 32662
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.39915132522583,
      "learning_rate": 0.0005707358171710074,
      "loss": 2.9844,
      "step": 32663
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.11549973487854,
      "learning_rate": 0.000570734054977039,
      "loss": 3.017,
      "step": 32664
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.818772315979004,
      "learning_rate": 0.0005707322927327361,
      "loss": 3.0891,
      "step": 32665
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5295122861862183,
      "learning_rate": 0.0005707305304380989,
      "loss": 2.8861,
      "step": 32666
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5143083333969116,
      "learning_rate": 0.0005707287680931279,
      "loss": 2.8895,
      "step": 32667
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.6842234134674072,
      "learning_rate": 0.0005707270056978232,
      "loss": 2.9334,
      "step": 32668
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.7784550189971924,
      "learning_rate": 0.0005707252432521852,
      "loss": 2.9599,
      "step": 32669
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9529695510864258,
      "learning_rate": 0.0005707234807562143,
      "loss": 3.0838,
      "step": 32670
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6294537782669067,
      "learning_rate": 0.0005707217182099109,
      "loss": 3.1236,
      "step": 32671
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.0514533519744873,
      "learning_rate": 0.0005707199556132752,
      "loss": 3.0912,
      "step": 32672
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.045309066772461,
      "learning_rate": 0.0005707181929663074,
      "loss": 3.2095,
      "step": 32673
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.839579463005066,
      "learning_rate": 0.0005707164302690081,
      "loss": 2.9844,
      "step": 32674
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9432371854782104,
      "learning_rate": 0.0005707146675213774,
      "loss": 2.7219,
      "step": 32675
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.7361807823181152,
      "learning_rate": 0.0005707129047234159,
      "loss": 3.1033,
      "step": 32676
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.454240083694458,
      "learning_rate": 0.0005707111418751236,
      "loss": 3.2147,
      "step": 32677
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6435328722000122,
      "learning_rate": 0.0005707093789765011,
      "loss": 2.8892,
      "step": 32678
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4109312295913696,
      "learning_rate": 0.0005707076160275486,
      "loss": 3.2439,
      "step": 32679
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.245348572731018,
      "learning_rate": 0.0005707058530282664,
      "loss": 3.2278,
      "step": 32680
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8073002099990845,
      "learning_rate": 0.0005707040899786548,
      "loss": 2.798,
      "step": 32681
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5089271068572998,
      "learning_rate": 0.0005707023268787144,
      "loss": 3.1558,
      "step": 32682
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.840613603591919,
      "learning_rate": 0.0005707005637284452,
      "loss": 3.0108,
      "step": 32683
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7554328441619873,
      "learning_rate": 0.0005706988005278477,
      "loss": 3.0802,
      "step": 32684
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5694656372070312,
      "learning_rate": 0.0005706970372769221,
      "loss": 3.1453,
      "step": 32685
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.871020793914795,
      "learning_rate": 0.0005706952739756689,
      "loss": 2.9204,
      "step": 32686
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5580101013183594,
      "learning_rate": 0.0005706935106240883,
      "loss": 3.1987,
      "step": 32687
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4266209602355957,
      "learning_rate": 0.0005706917472221806,
      "loss": 2.9963,
      "step": 32688
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6797733306884766,
      "learning_rate": 0.0005706899837699463,
      "loss": 3.0301,
      "step": 32689
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3854228258132935,
      "learning_rate": 0.0005706882202673856,
      "loss": 2.813,
      "step": 32690
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6974256038665771,
      "learning_rate": 0.0005706864567144988,
      "loss": 3.2429,
      "step": 32691
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5680795907974243,
      "learning_rate": 0.0005706846931112864,
      "loss": 3.1012,
      "step": 32692
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.249602794647217,
      "learning_rate": 0.0005706829294577485,
      "loss": 3.1036,
      "step": 32693
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7718840837478638,
      "learning_rate": 0.0005706811657538855,
      "loss": 3.2884,
      "step": 32694
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7537716627120972,
      "learning_rate": 0.0005706794019996979,
      "loss": 3.3518,
      "step": 32695
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4736263751983643,
      "learning_rate": 0.0005706776381951857,
      "loss": 3.0949,
      "step": 32696
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6654759645462036,
      "learning_rate": 0.0005706758743403496,
      "loss": 3.2861,
      "step": 32697
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.1927034854888916,
      "learning_rate": 0.0005706741104351897,
      "loss": 2.8165,
      "step": 32698
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4736735820770264,
      "learning_rate": 0.0005706723464797063,
      "loss": 2.9726,
      "step": 32699
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7815358638763428,
      "learning_rate": 0.0005706705824738999,
      "loss": 3.0717,
      "step": 32700
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5509834289550781,
      "learning_rate": 0.0005706688184177707,
      "loss": 3.237,
      "step": 32701
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4047136306762695,
      "learning_rate": 0.000570667054311319,
      "loss": 2.9961,
      "step": 32702
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5178362131118774,
      "learning_rate": 0.0005706652901545452,
      "loss": 3.0817,
      "step": 32703
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3080886602401733,
      "learning_rate": 0.0005706635259474496,
      "loss": 3.1475,
      "step": 32704
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.461866855621338,
      "learning_rate": 0.0005706617616900326,
      "loss": 2.9754,
      "step": 32705
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4302781820297241,
      "learning_rate": 0.0005706599973822944,
      "loss": 3.2605,
      "step": 32706
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.687639594078064,
      "learning_rate": 0.0005706582330242355,
      "loss": 3.0781,
      "step": 32707
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4579784870147705,
      "learning_rate": 0.0005706564686158561,
      "loss": 2.771,
      "step": 32708
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.573695182800293,
      "learning_rate": 0.0005706547041571565,
      "loss": 2.8134,
      "step": 32709
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5767842531204224,
      "learning_rate": 0.0005706529396481371,
      "loss": 3.0219,
      "step": 32710
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2855767011642456,
      "learning_rate": 0.0005706511750887982,
      "loss": 2.9309,
      "step": 32711
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6238539218902588,
      "learning_rate": 0.0005706494104791402,
      "loss": 2.9611,
      "step": 32712
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5047268867492676,
      "learning_rate": 0.0005706476458191632,
      "loss": 2.9357,
      "step": 32713
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7137473821640015,
      "learning_rate": 0.0005706458811088679,
      "loss": 3.0954,
      "step": 32714
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.353346586227417,
      "learning_rate": 0.0005706441163482542,
      "loss": 3.2663,
      "step": 32715
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6035635471343994,
      "learning_rate": 0.0005706423515373228,
      "loss": 2.9546,
      "step": 32716
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4969336986541748,
      "learning_rate": 0.0005706405866760739,
      "loss": 2.9967,
      "step": 32717
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.62489652633667,
      "learning_rate": 0.0005706388217645077,
      "loss": 3.1007,
      "step": 32718
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5677071809768677,
      "learning_rate": 0.0005706370568026248,
      "loss": 3.0973,
      "step": 32719
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4366029500961304,
      "learning_rate": 0.0005706352917904253,
      "loss": 2.8413,
      "step": 32720
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6268571615219116,
      "learning_rate": 0.0005706335267279095,
      "loss": 2.9942,
      "step": 32721
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4700194597244263,
      "learning_rate": 0.0005706317616150779,
      "loss": 2.9873,
      "step": 32722
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.458534598350525,
      "learning_rate": 0.0005706299964519306,
      "loss": 3.1109,
      "step": 32723
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5523784160614014,
      "learning_rate": 0.0005706282312384683,
      "loss": 3.0109,
      "step": 32724
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7229175567626953,
      "learning_rate": 0.000570626465974691,
      "loss": 3.0122,
      "step": 32725
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6326848268508911,
      "learning_rate": 0.0005706247006605991,
      "loss": 2.6392,
      "step": 32726
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9945675134658813,
      "learning_rate": 0.0005706229352961929,
      "loss": 3.2151,
      "step": 32727
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.724149703979492,
      "learning_rate": 0.0005706211698814729,
      "loss": 2.6725,
      "step": 32728
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.670697808265686,
      "learning_rate": 0.0005706194044164394,
      "loss": 2.9323,
      "step": 32729
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.1393775939941406,
      "learning_rate": 0.0005706176389010924,
      "loss": 3.1279,
      "step": 32730
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.291489601135254,
      "learning_rate": 0.0005706158733354327,
      "loss": 3.1044,
      "step": 32731
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.540754795074463,
      "learning_rate": 0.0005706141077194603,
      "loss": 3.099,
      "step": 32732
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5181808471679688,
      "learning_rate": 0.0005706123420531756,
      "loss": 3.0535,
      "step": 32733
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.42010760307312,
      "learning_rate": 0.0005706105763365791,
      "loss": 3.0213,
      "step": 32734
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.56404185295105,
      "learning_rate": 0.0005706088105696707,
      "loss": 3.337,
      "step": 32735
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.147372245788574,
      "learning_rate": 0.0005706070447524513,
      "loss": 2.9559,
      "step": 32736
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8403503894805908,
      "learning_rate": 0.0005706052788849209,
      "loss": 3.1645,
      "step": 32737
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9570058584213257,
      "learning_rate": 0.0005706035129670798,
      "loss": 2.9781,
      "step": 32738
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.999885320663452,
      "learning_rate": 0.0005706017469989283,
      "loss": 3.248,
      "step": 32739
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8135870695114136,
      "learning_rate": 0.000570599980980467,
      "loss": 2.9501,
      "step": 32740
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.1164417266845703,
      "learning_rate": 0.000570598214911696,
      "loss": 2.9808,
      "step": 32741
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.561993956565857,
      "learning_rate": 0.0005705964487926157,
      "loss": 3.2114,
      "step": 32742
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.0657057762145996,
      "learning_rate": 0.0005705946826232263,
      "loss": 3.0595,
      "step": 32743
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4717185497283936,
      "learning_rate": 0.0005705929164035283,
      "loss": 3.216,
      "step": 32744
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.099421501159668,
      "learning_rate": 0.0005705911501335221,
      "loss": 3.0527,
      "step": 32745
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8759552240371704,
      "learning_rate": 0.0005705893838132077,
      "loss": 3.099,
      "step": 32746
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.777823567390442,
      "learning_rate": 0.0005705876174425858,
      "loss": 2.9269,
      "step": 32747
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.252169370651245,
      "learning_rate": 0.0005705858510216564,
      "loss": 3.2177,
      "step": 32748
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.523092269897461,
      "learning_rate": 0.0005705840845504201,
      "loss": 3.2266,
      "step": 32749
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9925894737243652,
      "learning_rate": 0.0005705823180288771,
      "loss": 2.9498,
      "step": 32750
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5426076650619507,
      "learning_rate": 0.0005705805514570276,
      "loss": 3.0744,
      "step": 32751
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3497512340545654,
      "learning_rate": 0.0005705787848348723,
      "loss": 3.0061,
      "step": 32752
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4446533918380737,
      "learning_rate": 0.0005705770181624112,
      "loss": 3.0232,
      "step": 32753
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4109653234481812,
      "learning_rate": 0.0005705752514396446,
      "loss": 3.0639,
      "step": 32754
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.268548846244812,
      "learning_rate": 0.000570573484666573,
      "loss": 3.0237,
      "step": 32755
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.111811399459839,
      "learning_rate": 0.0005705717178431969,
      "loss": 2.7334,
      "step": 32756
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6948833465576172,
      "learning_rate": 0.0005705699509695161,
      "loss": 3.0254,
      "step": 32757
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4250106811523438,
      "learning_rate": 0.0005705681840455314,
      "loss": 2.9907,
      "step": 32758
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.804407835006714,
      "learning_rate": 0.0005705664170712429,
      "loss": 2.9933,
      "step": 32759
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7539045810699463,
      "learning_rate": 0.000570564650046651,
      "loss": 3.0319,
      "step": 32760
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3016221523284912,
      "learning_rate": 0.0005705628829717561,
      "loss": 3.1798,
      "step": 32761
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9106285572052002,
      "learning_rate": 0.0005705611158465585,
      "loss": 3.1042,
      "step": 32762
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7586703300476074,
      "learning_rate": 0.0005705593486710584,
      "loss": 3.211,
      "step": 32763
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.63345205783844,
      "learning_rate": 0.0005705575814452562,
      "loss": 3.2215,
      "step": 32764
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.0005414485931396,
      "learning_rate": 0.0005705558141691522,
      "loss": 3.01,
      "step": 32765
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.994895100593567,
      "learning_rate": 0.0005705540468427468,
      "loss": 3.1075,
      "step": 32766
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5731868743896484,
      "learning_rate": 0.0005705522794660404,
      "loss": 3.2663,
      "step": 32767
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4051518440246582,
      "learning_rate": 0.0005705505120390331,
      "loss": 3.2731,
      "step": 32768
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4761738777160645,
      "learning_rate": 0.0005705487445617254,
      "loss": 3.0387,
      "step": 32769
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6346992254257202,
      "learning_rate": 0.0005705469770341175,
      "loss": 3.0348,
      "step": 32770
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.961108684539795,
      "learning_rate": 0.00057054520945621,
      "loss": 2.8371,
      "step": 32771
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8640843629837036,
      "learning_rate": 0.0005705434418280029,
      "loss": 3.0505,
      "step": 32772
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4092705249786377,
      "learning_rate": 0.0005705416741494967,
      "loss": 3.1486,
      "step": 32773
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.238678455352783,
      "learning_rate": 0.0005705399064206917,
      "loss": 3.0131,
      "step": 32774
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5418606996536255,
      "learning_rate": 0.0005705381386415881,
      "loss": 3.365,
      "step": 32775
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7415683269500732,
      "learning_rate": 0.0005705363708121865,
      "loss": 2.8336,
      "step": 32776
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6249902248382568,
      "learning_rate": 0.0005705346029324871,
      "loss": 3.0226,
      "step": 32777
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9697651863098145,
      "learning_rate": 0.00057053283500249,
      "loss": 2.9584,
      "step": 32778
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6247392892837524,
      "learning_rate": 0.000570531067022196,
      "loss": 2.825,
      "step": 32779
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.1981310844421387,
      "learning_rate": 0.000570529298991605,
      "loss": 3.172,
      "step": 32780
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.8777964115142822,
      "learning_rate": 0.0005705275309107176,
      "loss": 3.206,
      "step": 32781
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.319084644317627,
      "learning_rate": 0.000570525762779534,
      "loss": 3.3106,
      "step": 32782
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9040676355361938,
      "learning_rate": 0.0005705239945980545,
      "loss": 3.3494,
      "step": 32783
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.691156268119812,
      "learning_rate": 0.0005705222263662795,
      "loss": 2.9821,
      "step": 32784
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.201601982116699,
      "learning_rate": 0.0005705204580842094,
      "loss": 2.9076,
      "step": 32785
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6968008279800415,
      "learning_rate": 0.0005705186897518443,
      "loss": 2.7987,
      "step": 32786
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.340045690536499,
      "learning_rate": 0.0005705169213691848,
      "loss": 3.0924,
      "step": 32787
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3163089752197266,
      "learning_rate": 0.000570515152936231,
      "loss": 3.1122,
      "step": 32788
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7947635650634766,
      "learning_rate": 0.0005705133844529834,
      "loss": 2.9703,
      "step": 32789
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3197153806686401,
      "learning_rate": 0.0005705116159194422,
      "loss": 3.2905,
      "step": 32790
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.131054162979126,
      "learning_rate": 0.0005705098473356078,
      "loss": 2.9583,
      "step": 32791
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8623266220092773,
      "learning_rate": 0.0005705080787014805,
      "loss": 3.0433,
      "step": 32792
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3258213996887207,
      "learning_rate": 0.0005705063100170606,
      "loss": 3.0821,
      "step": 32793
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8536368608474731,
      "learning_rate": 0.0005705045412823486,
      "loss": 2.8189,
      "step": 32794
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.90871262550354,
      "learning_rate": 0.0005705027724973446,
      "loss": 3.1212,
      "step": 32795
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4214504957199097,
      "learning_rate": 0.000570501003662049,
      "loss": 2.8983,
      "step": 32796
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.2985622882843018,
      "learning_rate": 0.0005704992347764623,
      "loss": 3.018,
      "step": 32797
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.3501720428466797,
      "learning_rate": 0.0005704974658405845,
      "loss": 2.8907,
      "step": 32798
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6426445245742798,
      "learning_rate": 0.0005704956968544163,
      "loss": 3.0355,
      "step": 32799
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.0580623149871826,
      "learning_rate": 0.0005704939278179577,
      "loss": 2.9416,
      "step": 32800
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.5112369060516357,
      "learning_rate": 0.0005704921587312093,
      "loss": 3.0084,
      "step": 32801
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4792238473892212,
      "learning_rate": 0.0005704903895941712,
      "loss": 2.8263,
      "step": 32802
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5672374963760376,
      "learning_rate": 0.0005704886204068439,
      "loss": 3.1526,
      "step": 32803
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3250113725662231,
      "learning_rate": 0.0005704868511692276,
      "loss": 3.222,
      "step": 32804
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.647012710571289,
      "learning_rate": 0.0005704850818813227,
      "loss": 2.9309,
      "step": 32805
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5238629579544067,
      "learning_rate": 0.0005704833125431295,
      "loss": 2.9034,
      "step": 32806
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.646735668182373,
      "learning_rate": 0.0005704815431546485,
      "loss": 3.123,
      "step": 32807
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1952145099639893,
      "learning_rate": 0.0005704797737158796,
      "loss": 3.0009,
      "step": 32808
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4396367073059082,
      "learning_rate": 0.0005704780042268237,
      "loss": 2.9457,
      "step": 32809
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6128935813903809,
      "learning_rate": 0.0005704762346874806,
      "loss": 2.8745,
      "step": 32810
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.821978211402893,
      "learning_rate": 0.000570474465097851,
      "loss": 3.0889,
      "step": 32811
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7133396863937378,
      "learning_rate": 0.0005704726954579351,
      "loss": 3.2362,
      "step": 32812
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5355242490768433,
      "learning_rate": 0.0005704709257677331,
      "loss": 3.1726,
      "step": 32813
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.2723050117492676,
      "learning_rate": 0.0005704691560272455,
      "loss": 2.8093,
      "step": 32814
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4358537197113037,
      "learning_rate": 0.0005704673862364726,
      "loss": 3.0485,
      "step": 32815
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7007591724395752,
      "learning_rate": 0.0005704656163954147,
      "loss": 3.0647,
      "step": 32816
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.638315200805664,
      "learning_rate": 0.000570463846504072,
      "loss": 2.9619,
      "step": 32817
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.0184686183929443,
      "learning_rate": 0.0005704620765624452,
      "loss": 3.2936,
      "step": 32818
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4492762088775635,
      "learning_rate": 0.0005704603065705343,
      "loss": 3.2611,
      "step": 32819
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.329798698425293,
      "learning_rate": 0.0005704585365283398,
      "loss": 3.0537,
      "step": 32820
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.601874589920044,
      "learning_rate": 0.0005704567664358618,
      "loss": 3.3684,
      "step": 32821
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.483500599861145,
      "learning_rate": 0.0005704549962931008,
      "loss": 3.149,
      "step": 32822
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3095037937164307,
      "learning_rate": 0.0005704532261000573,
      "loss": 2.9919,
      "step": 32823
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4327888488769531,
      "learning_rate": 0.0005704514558567312,
      "loss": 3.1823,
      "step": 32824
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.5793442726135254,
      "learning_rate": 0.0005704496855631232,
      "loss": 2.9119,
      "step": 32825
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5163605213165283,
      "learning_rate": 0.0005704479152192335,
      "loss": 3.0443,
      "step": 32826
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.1972968578338623,
      "learning_rate": 0.0005704461448250624,
      "loss": 3.0389,
      "step": 32827
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7839547395706177,
      "learning_rate": 0.0005704443743806102,
      "loss": 2.9412,
      "step": 32828
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.423829197883606,
      "learning_rate": 0.0005704426038858773,
      "loss": 2.8791,
      "step": 32829
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7565714120864868,
      "learning_rate": 0.0005704408333408642,
      "loss": 3.1294,
      "step": 32830
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3549240827560425,
      "learning_rate": 0.0005704390627455708,
      "loss": 3.274,
      "step": 32831
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4213773012161255,
      "learning_rate": 0.0005704372920999978,
      "loss": 2.9431,
      "step": 32832
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5750842094421387,
      "learning_rate": 0.0005704355214041453,
      "loss": 3.0559,
      "step": 32833
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.1832141876220703,
      "learning_rate": 0.0005704337506580138,
      "loss": 3.2181,
      "step": 32834
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.5114998817443848,
      "learning_rate": 0.0005704319798616037,
      "loss": 3.028,
      "step": 32835
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5814505815505981,
      "learning_rate": 0.0005704302090149151,
      "loss": 3.1156,
      "step": 32836
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.2483267784118652,
      "learning_rate": 0.0005704284381179483,
      "loss": 3.2724,
      "step": 32837
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.336838960647583,
      "learning_rate": 0.0005704266671707038,
      "loss": 2.8878,
      "step": 32838
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5978667736053467,
      "learning_rate": 0.0005704248961731819,
      "loss": 3.0869,
      "step": 32839
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.0438079833984375,
      "learning_rate": 0.000570423125125383,
      "loss": 3.0516,
      "step": 32840
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8208584785461426,
      "learning_rate": 0.0005704213540273072,
      "loss": 3.0546,
      "step": 32841
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.367116928100586,
      "learning_rate": 0.000570419582878955,
      "loss": 3.1984,
      "step": 32842
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.691280722618103,
      "learning_rate": 0.0005704178116803267,
      "loss": 3.0919,
      "step": 32843
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3676220178604126,
      "learning_rate": 0.0005704160404314227,
      "loss": 3.2477,
      "step": 32844
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.576074481010437,
      "learning_rate": 0.0005704142691322432,
      "loss": 3.2248,
      "step": 32845
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6460069417953491,
      "learning_rate": 0.0005704124977827886,
      "loss": 2.8338,
      "step": 32846
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6654881238937378,
      "learning_rate": 0.0005704107263830591,
      "loss": 3.3882,
      "step": 32847
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.3841471672058105,
      "learning_rate": 0.0005704089549330553,
      "loss": 3.2165,
      "step": 32848
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6967015266418457,
      "learning_rate": 0.0005704071834327772,
      "loss": 2.9846,
      "step": 32849
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7093130350112915,
      "learning_rate": 0.0005704054118822254,
      "loss": 3.0047,
      "step": 32850
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8951159715652466,
      "learning_rate": 0.0005704036402814001,
      "loss": 2.9777,
      "step": 32851
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7251745462417603,
      "learning_rate": 0.0005704018686303017,
      "loss": 3.173,
      "step": 32852
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.361559510231018,
      "learning_rate": 0.0005704000969289305,
      "loss": 3.1506,
      "step": 32853
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.1155385971069336,
      "learning_rate": 0.0005703983251772869,
      "loss": 3.2835,
      "step": 32854
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.450956106185913,
      "learning_rate": 0.000570396553375371,
      "loss": 2.9589,
      "step": 32855
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5480676889419556,
      "learning_rate": 0.0005703947815231832,
      "loss": 2.9453,
      "step": 32856
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.569029450416565,
      "learning_rate": 0.000570393009620724,
      "loss": 3.0234,
      "step": 32857
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5919333696365356,
      "learning_rate": 0.0005703912376679937,
      "loss": 3.0403,
      "step": 32858
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4893041849136353,
      "learning_rate": 0.0005703894656649925,
      "loss": 3.1478,
      "step": 32859
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4406038522720337,
      "learning_rate": 0.0005703876936117209,
      "loss": 2.93,
      "step": 32860
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7249228954315186,
      "learning_rate": 0.000570385921508179,
      "loss": 3.0571,
      "step": 32861
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2558950185775757,
      "learning_rate": 0.0005703841493543674,
      "loss": 3.1105,
      "step": 32862
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5371897220611572,
      "learning_rate": 0.0005703823771502861,
      "loss": 3.0864,
      "step": 32863
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4354205131530762,
      "learning_rate": 0.0005703806048959356,
      "loss": 3.0081,
      "step": 32864
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5161643028259277,
      "learning_rate": 0.0005703788325913164,
      "loss": 3.084,
      "step": 32865
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6561182737350464,
      "learning_rate": 0.0005703770602364285,
      "loss": 2.8889,
      "step": 32866
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.694572925567627,
      "learning_rate": 0.0005703752878312725,
      "loss": 2.8959,
      "step": 32867
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2784981727600098,
      "learning_rate": 0.0005703735153758487,
      "loss": 3.148,
      "step": 32868
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.655279278755188,
      "learning_rate": 0.0005703717428701573,
      "loss": 2.9048,
      "step": 32869
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5178133249282837,
      "learning_rate": 0.0005703699703141987,
      "loss": 3.1418,
      "step": 32870
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5555431842803955,
      "learning_rate": 0.0005703681977079732,
      "loss": 3.1251,
      "step": 32871
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7385571002960205,
      "learning_rate": 0.0005703664250514811,
      "loss": 2.7932,
      "step": 32872
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3718597888946533,
      "learning_rate": 0.0005703646523447228,
      "loss": 3.0152,
      "step": 32873
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6237837076187134,
      "learning_rate": 0.0005703628795876987,
      "loss": 2.8727,
      "step": 32874
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6331403255462646,
      "learning_rate": 0.000570361106780409,
      "loss": 3.1277,
      "step": 32875
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8585649728775024,
      "learning_rate": 0.000570359333922854,
      "loss": 3.0736,
      "step": 32876
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.6258974075317383,
      "learning_rate": 0.0005703575610150342,
      "loss": 3.1755,
      "step": 32877
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5556282997131348,
      "learning_rate": 0.0005703557880569497,
      "loss": 2.9663,
      "step": 32878
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4488714933395386,
      "learning_rate": 0.0005703540150486011,
      "loss": 3.1864,
      "step": 32879
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5367926359176636,
      "learning_rate": 0.0005703522419899884,
      "loss": 2.7026,
      "step": 32880
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7128840684890747,
      "learning_rate": 0.0005703504688811122,
      "loss": 3.1265,
      "step": 32881
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3153233528137207,
      "learning_rate": 0.0005703486957219728,
      "loss": 3.1158,
      "step": 32882
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.707226037979126,
      "learning_rate": 0.0005703469225125704,
      "loss": 2.9912,
      "step": 32883
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4855566024780273,
      "learning_rate": 0.0005703451492529054,
      "loss": 3.1897,
      "step": 32884
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7687702178955078,
      "learning_rate": 0.0005703433759429782,
      "loss": 2.9705,
      "step": 32885
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4873073101043701,
      "learning_rate": 0.000570341602582789,
      "loss": 3.1864,
      "step": 32886
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.0877485275268555,
      "learning_rate": 0.0005703398291723383,
      "loss": 2.9283,
      "step": 32887
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2634408473968506,
      "learning_rate": 0.0005703380557116263,
      "loss": 3.036,
      "step": 32888
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5131216049194336,
      "learning_rate": 0.0005703362822006532,
      "loss": 3.1188,
      "step": 32889
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5938526391983032,
      "learning_rate": 0.0005703345086394197,
      "loss": 2.9437,
      "step": 32890
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8247222900390625,
      "learning_rate": 0.0005703327350279257,
      "loss": 2.9812,
      "step": 32891
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3700225353240967,
      "learning_rate": 0.0005703309613661719,
      "loss": 3.2683,
      "step": 32892
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8396881818771362,
      "learning_rate": 0.0005703291876541583,
      "loss": 3.149,
      "step": 32893
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8928929567337036,
      "learning_rate": 0.0005703274138918856,
      "loss": 3.0759,
      "step": 32894
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9004837274551392,
      "learning_rate": 0.0005703256400793539,
      "loss": 3.213,
      "step": 32895
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4485684633255005,
      "learning_rate": 0.0005703238662165635,
      "loss": 3.0628,
      "step": 32896
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5277425050735474,
      "learning_rate": 0.0005703220923035147,
      "loss": 2.8712,
      "step": 32897
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7755544185638428,
      "learning_rate": 0.0005703203183402081,
      "loss": 3.1523,
      "step": 32898
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3917843103408813,
      "learning_rate": 0.0005703185443266437,
      "loss": 2.8794,
      "step": 32899
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6784971952438354,
      "learning_rate": 0.000570316770262822,
      "loss": 3.0143,
      "step": 32900
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4132695198059082,
      "learning_rate": 0.0005703149961487433,
      "loss": 3.0574,
      "step": 32901
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2383049726486206,
      "learning_rate": 0.0005703132219844081,
      "loss": 3.1685,
      "step": 32902
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.085317373275757,
      "learning_rate": 0.0005703114477698164,
      "loss": 3.1069,
      "step": 32903
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.702057123184204,
      "learning_rate": 0.0005703096735049688,
      "loss": 3.0501,
      "step": 32904
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2854630947113037,
      "learning_rate": 0.0005703078991898655,
      "loss": 2.9673,
      "step": 32905
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4346858263015747,
      "learning_rate": 0.0005703061248245069,
      "loss": 2.9121,
      "step": 32906
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3944745063781738,
      "learning_rate": 0.0005703043504088932,
      "loss": 3.1023,
      "step": 32907
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4936695098876953,
      "learning_rate": 0.0005703025759430249,
      "loss": 2.9986,
      "step": 32908
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9039798974990845,
      "learning_rate": 0.0005703008014269022,
      "loss": 3.1313,
      "step": 32909
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5706725120544434,
      "learning_rate": 0.0005702990268605254,
      "loss": 3.0082,
      "step": 32910
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4021527767181396,
      "learning_rate": 0.000570297252243895,
      "loss": 3.1801,
      "step": 32911
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5514633655548096,
      "learning_rate": 0.0005702954775770112,
      "loss": 3.0715,
      "step": 32912
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7169402837753296,
      "learning_rate": 0.0005702937028598745,
      "loss": 3.1538,
      "step": 32913
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.1128885746002197,
      "learning_rate": 0.0005702919280924849,
      "loss": 3.0455,
      "step": 32914
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5277208089828491,
      "learning_rate": 0.000570290153274843,
      "loss": 3.4765,
      "step": 32915
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.865009069442749,
      "learning_rate": 0.0005702883784069492,
      "loss": 2.9643,
      "step": 32916
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6949262619018555,
      "learning_rate": 0.0005702866034888036,
      "loss": 2.7072,
      "step": 32917
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7808488607406616,
      "learning_rate": 0.0005702848285204066,
      "loss": 2.9949,
      "step": 32918
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.038100481033325,
      "learning_rate": 0.0005702830535017586,
      "loss": 3.0411,
      "step": 32919
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.0180444717407227,
      "learning_rate": 0.0005702812784328597,
      "loss": 3.2021,
      "step": 32920
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.507229208946228,
      "learning_rate": 0.0005702795033137105,
      "loss": 3.0165,
      "step": 32921
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.129270076751709,
      "learning_rate": 0.0005702777281443113,
      "loss": 3.1567,
      "step": 32922
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2102357149124146,
      "learning_rate": 0.0005702759529246623,
      "loss": 3.192,
      "step": 32923
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5513241291046143,
      "learning_rate": 0.0005702741776547639,
      "loss": 3.0845,
      "step": 32924
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.0385046005249023,
      "learning_rate": 0.0005702724023346165,
      "loss": 3.1469,
      "step": 32925
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4744101762771606,
      "learning_rate": 0.0005702706269642203,
      "loss": 3.0264,
      "step": 32926
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3312371969223022,
      "learning_rate": 0.0005702688515435757,
      "loss": 3.205,
      "step": 32927
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9261003732681274,
      "learning_rate": 0.000570267076072683,
      "loss": 3.1056,
      "step": 32928
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.1200993061065674,
      "learning_rate": 0.0005702653005515427,
      "loss": 3.0138,
      "step": 32929
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5254491567611694,
      "learning_rate": 0.0005702635249801548,
      "loss": 3.1495,
      "step": 32930
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.445751905441284,
      "learning_rate": 0.00057026174935852,
      "loss": 3.2173,
      "step": 32931
    },
    {
      "epoch": 0.43,
      "grad_norm": 4.103452205657959,
      "learning_rate": 0.0005702599736866382,
      "loss": 3.2217,
      "step": 32932
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5323008298873901,
      "learning_rate": 0.00057025819796451,
      "loss": 3.1357,
      "step": 32933
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.696376085281372,
      "learning_rate": 0.0005702564221921358,
      "loss": 3.1026,
      "step": 32934
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6769018173217773,
      "learning_rate": 0.0005702546463695159,
      "loss": 3.0658,
      "step": 32935
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7882691621780396,
      "learning_rate": 0.0005702528704966504,
      "loss": 3.2095,
      "step": 32936
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.265034794807434,
      "learning_rate": 0.0005702510945735399,
      "loss": 2.9769,
      "step": 32937
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.318152666091919,
      "learning_rate": 0.0005702493186001846,
      "loss": 3.0131,
      "step": 32938
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.3387041091918945,
      "learning_rate": 0.0005702475425765848,
      "loss": 3.0494,
      "step": 32939
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6668661832809448,
      "learning_rate": 0.0005702457665027409,
      "loss": 3.1392,
      "step": 32940
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9542253017425537,
      "learning_rate": 0.0005702439903786531,
      "loss": 2.9828,
      "step": 32941
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.064718723297119,
      "learning_rate": 0.000570242214204322,
      "loss": 3.1818,
      "step": 32942
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5897716283798218,
      "learning_rate": 0.0005702404379797478,
      "loss": 2.9345,
      "step": 32943
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.2347657680511475,
      "learning_rate": 0.0005702386617049307,
      "loss": 2.7804,
      "step": 32944
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.697951078414917,
      "learning_rate": 0.0005702368853798712,
      "loss": 2.8762,
      "step": 32945
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4804422855377197,
      "learning_rate": 0.0005702351090045695,
      "loss": 3.0915,
      "step": 32946
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4527145624160767,
      "learning_rate": 0.000570233332579026,
      "loss": 2.9128,
      "step": 32947
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2724475860595703,
      "learning_rate": 0.0005702315561032411,
      "loss": 3.1933,
      "step": 32948
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3147590160369873,
      "learning_rate": 0.0005702297795772149,
      "loss": 3.198,
      "step": 32949
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.230279803276062,
      "learning_rate": 0.000570228003000948,
      "loss": 3.1534,
      "step": 32950
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7466416358947754,
      "learning_rate": 0.0005702262263744406,
      "loss": 2.7901,
      "step": 32951
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.273104190826416,
      "learning_rate": 0.000570224449697693,
      "loss": 3.0007,
      "step": 32952
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6268970966339111,
      "learning_rate": 0.0005702226729707056,
      "loss": 3.0663,
      "step": 32953
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4995841979980469,
      "learning_rate": 0.0005702208961934787,
      "loss": 2.9036,
      "step": 32954
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5108094215393066,
      "learning_rate": 0.0005702191193660127,
      "loss": 3.0543,
      "step": 32955
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7165130376815796,
      "learning_rate": 0.0005702173424883077,
      "loss": 2.6446,
      "step": 32956
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7224586009979248,
      "learning_rate": 0.0005702155655603644,
      "loss": 3.1846,
      "step": 32957
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6195614337921143,
      "learning_rate": 0.0005702137885821828,
      "loss": 2.8129,
      "step": 32958
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5480999946594238,
      "learning_rate": 0.0005702120115537634,
      "loss": 3.215,
      "step": 32959
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5297114849090576,
      "learning_rate": 0.0005702102344751065,
      "loss": 2.9969,
      "step": 32960
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6854045391082764,
      "learning_rate": 0.0005702084573462123,
      "loss": 2.9754,
      "step": 32961
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4684975147247314,
      "learning_rate": 0.0005702066801670814,
      "loss": 2.8037,
      "step": 32962
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5825388431549072,
      "learning_rate": 0.0005702049029377139,
      "loss": 3.0283,
      "step": 32963
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5309728384017944,
      "learning_rate": 0.0005702031256581102,
      "loss": 2.9537,
      "step": 32964
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6111626625061035,
      "learning_rate": 0.0005702013483282706,
      "loss": 3.1915,
      "step": 32965
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3923132419586182,
      "learning_rate": 0.0005701995709481956,
      "loss": 3.0178,
      "step": 32966
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7668561935424805,
      "learning_rate": 0.0005701977935178852,
      "loss": 2.8233,
      "step": 32967
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.657509446144104,
      "learning_rate": 0.0005701960160373401,
      "loss": 2.9221,
      "step": 32968
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5987075567245483,
      "learning_rate": 0.0005701942385065605,
      "loss": 3.0887,
      "step": 32969
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.741562843322754,
      "learning_rate": 0.0005701924609255466,
      "loss": 2.9194,
      "step": 32970
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6592285633087158,
      "learning_rate": 0.0005701906832942989,
      "loss": 3.1572,
      "step": 32971
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.726086974143982,
      "learning_rate": 0.0005701889056128175,
      "loss": 2.9629,
      "step": 32972
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4651150703430176,
      "learning_rate": 0.0005701871278811029,
      "loss": 3.1523,
      "step": 32973
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7578681707382202,
      "learning_rate": 0.0005701853500991556,
      "loss": 2.8383,
      "step": 32974
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4766322374343872,
      "learning_rate": 0.0005701835722669755,
      "loss": 3.4318,
      "step": 32975
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6596451997756958,
      "learning_rate": 0.0005701817943845633,
      "loss": 3.2568,
      "step": 32976
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.023346185684204,
      "learning_rate": 0.0005701800164519192,
      "loss": 3.055,
      "step": 32977
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9913684129714966,
      "learning_rate": 0.0005701782384690436,
      "loss": 2.8206,
      "step": 32978
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6834402084350586,
      "learning_rate": 0.0005701764604359367,
      "loss": 3.0148,
      "step": 32979
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4597147703170776,
      "learning_rate": 0.0005701746823525988,
      "loss": 3.1601,
      "step": 32980
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7214717864990234,
      "learning_rate": 0.0005701729042190305,
      "loss": 3.4358,
      "step": 32981
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6381841897964478,
      "learning_rate": 0.0005701711260352318,
      "loss": 3.0644,
      "step": 32982
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.441314935684204,
      "learning_rate": 0.0005701693478012032,
      "loss": 3.0968,
      "step": 32983
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.668230652809143,
      "learning_rate": 0.0005701675695169451,
      "loss": 3.1728,
      "step": 32984
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5902221202850342,
      "learning_rate": 0.0005701657911824578,
      "loss": 3.0774,
      "step": 32985
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5338318347930908,
      "learning_rate": 0.0005701640127977416,
      "loss": 2.9758,
      "step": 32986
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7697882652282715,
      "learning_rate": 0.0005701622343627967,
      "loss": 3.0894,
      "step": 32987
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.229706048965454,
      "learning_rate": 0.0005701604558776235,
      "loss": 3.3212,
      "step": 32988
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9903291463851929,
      "learning_rate": 0.0005701586773422224,
      "loss": 3.2525,
      "step": 32989
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.819631338119507,
      "learning_rate": 0.0005701568987565938,
      "loss": 2.8923,
      "step": 32990
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.439683198928833,
      "learning_rate": 0.0005701551201207379,
      "loss": 3.2816,
      "step": 32991
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4236799478530884,
      "learning_rate": 0.000570153341434655,
      "loss": 3.2059,
      "step": 32992
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9944636821746826,
      "learning_rate": 0.0005701515626983455,
      "loss": 3.0684,
      "step": 32993
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9761250019073486,
      "learning_rate": 0.0005701497839118098,
      "loss": 3.2524,
      "step": 32994
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.298409342765808,
      "learning_rate": 0.000570148005075048,
      "loss": 3.1029,
      "step": 32995
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.965212106704712,
      "learning_rate": 0.0005701462261880608,
      "loss": 2.9279,
      "step": 32996
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.224011778831482,
      "learning_rate": 0.0005701444472508482,
      "loss": 3.2981,
      "step": 32997
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2988355159759521,
      "learning_rate": 0.0005701426682634107,
      "loss": 3.2489,
      "step": 32998
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3314948081970215,
      "learning_rate": 0.0005701408892257485,
      "loss": 2.9916,
      "step": 32999
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4944546222686768,
      "learning_rate": 0.000570139110137862,
      "loss": 3.0684,
      "step": 33000
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.630566120147705,
      "learning_rate": 0.0005701373309997517,
      "loss": 3.1135,
      "step": 33001
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.510203242301941,
      "learning_rate": 0.0005701355518114177,
      "loss": 3.1401,
      "step": 33002
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4830193519592285,
      "learning_rate": 0.0005701337725728602,
      "loss": 3.1903,
      "step": 33003
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.130483865737915,
      "learning_rate": 0.0005701319932840799,
      "loss": 3.0913,
      "step": 33004
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.620396137237549,
      "learning_rate": 0.000570130213945077,
      "loss": 3.1142,
      "step": 33005
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.904288411140442,
      "learning_rate": 0.0005701284345558517,
      "loss": 3.1848,
      "step": 33006
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.41618013381958,
      "learning_rate": 0.0005701266551164045,
      "loss": 3.213,
      "step": 33007
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4279111623764038,
      "learning_rate": 0.0005701248756267357,
      "loss": 3.0544,
      "step": 33008
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5434813499450684,
      "learning_rate": 0.0005701230960868455,
      "loss": 3.1754,
      "step": 33009
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.065263032913208,
      "learning_rate": 0.0005701213164967344,
      "loss": 2.9458,
      "step": 33010
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.702746868133545,
      "learning_rate": 0.0005701195368564024,
      "loss": 3.0048,
      "step": 33011
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.451520562171936,
      "learning_rate": 0.0005701177571658503,
      "loss": 3.1034,
      "step": 33012
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.955422043800354,
      "learning_rate": 0.0005701159774250782,
      "loss": 2.8221,
      "step": 33013
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3900405168533325,
      "learning_rate": 0.0005701141976340864,
      "loss": 2.8989,
      "step": 33014
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4593441486358643,
      "learning_rate": 0.0005701124177928754,
      "loss": 3.1686,
      "step": 33015
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.542168140411377,
      "learning_rate": 0.0005701106379014453,
      "loss": 2.9871,
      "step": 33016
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.767799735069275,
      "learning_rate": 0.0005701088579597964,
      "loss": 3.0944,
      "step": 33017
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8507411479949951,
      "learning_rate": 0.0005701070779679292,
      "loss": 2.8025,
      "step": 33018
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6082230806350708,
      "learning_rate": 0.0005701052979258442,
      "loss": 3.0083,
      "step": 33019
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9623820781707764,
      "learning_rate": 0.0005701035178335414,
      "loss": 3.3232,
      "step": 33020
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.328905701637268,
      "learning_rate": 0.0005701017376910213,
      "loss": 2.7226,
      "step": 33021
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.627488374710083,
      "learning_rate": 0.0005700999574982841,
      "loss": 3.2044,
      "step": 33022
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4657282829284668,
      "learning_rate": 0.0005700981772553303,
      "loss": 3.0974,
      "step": 33023
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3376870155334473,
      "learning_rate": 0.00057009639696216,
      "loss": 3.127,
      "step": 33024
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3325822353363037,
      "learning_rate": 0.0005700946166187739,
      "loss": 3.0916,
      "step": 33025
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4960150718688965,
      "learning_rate": 0.000570092836225172,
      "loss": 3.0598,
      "step": 33026
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8247768878936768,
      "learning_rate": 0.0005700910557813548,
      "loss": 2.9596,
      "step": 33027
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7356411218643188,
      "learning_rate": 0.0005700892752873225,
      "loss": 2.9627,
      "step": 33028
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3135349750518799,
      "learning_rate": 0.0005700874947430755,
      "loss": 2.9823,
      "step": 33029
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8346384763717651,
      "learning_rate": 0.0005700857141486143,
      "loss": 3.0994,
      "step": 33030
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6049368381500244,
      "learning_rate": 0.0005700839335039388,
      "loss": 3.0401,
      "step": 33031
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4649699926376343,
      "learning_rate": 0.0005700821528090499,
      "loss": 3.302,
      "step": 33032
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3962910175323486,
      "learning_rate": 0.0005700803720639474,
      "loss": 3.1629,
      "step": 33033
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7758642435073853,
      "learning_rate": 0.0005700785912686319,
      "loss": 2.9771,
      "step": 33034
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.529800534248352,
      "learning_rate": 0.0005700768104231038,
      "loss": 3.1226,
      "step": 33035
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.757165551185608,
      "learning_rate": 0.0005700750295273633,
      "loss": 3.034,
      "step": 33036
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6287322044372559,
      "learning_rate": 0.0005700732485814107,
      "loss": 3.1318,
      "step": 33037
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7922470569610596,
      "learning_rate": 0.0005700714675852464,
      "loss": 2.7962,
      "step": 33038
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4683842658996582,
      "learning_rate": 0.0005700696865388707,
      "loss": 3.2698,
      "step": 33039
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8004437685012817,
      "learning_rate": 0.0005700679054422839,
      "loss": 3.0892,
      "step": 33040
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.460767388343811,
      "learning_rate": 0.0005700661242954864,
      "loss": 3.1419,
      "step": 33041
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4551786184310913,
      "learning_rate": 0.0005700643430984785,
      "loss": 3.0405,
      "step": 33042
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4151477813720703,
      "learning_rate": 0.0005700625618512607,
      "loss": 3.1601,
      "step": 33043
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.342423677444458,
      "learning_rate": 0.000570060780553833,
      "loss": 3.1714,
      "step": 33044
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5483717918395996,
      "learning_rate": 0.000570058999206196,
      "loss": 3.1418,
      "step": 33045
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4455244541168213,
      "learning_rate": 0.0005700572178083498,
      "loss": 2.9947,
      "step": 33046
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7974334955215454,
      "learning_rate": 0.000570055436360295,
      "loss": 2.9645,
      "step": 33047
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4076523780822754,
      "learning_rate": 0.0005700536548620316,
      "loss": 3.1257,
      "step": 33048
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.575799822807312,
      "learning_rate": 0.0005700518733135604,
      "loss": 3.1229,
      "step": 33049
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6648545265197754,
      "learning_rate": 0.0005700500917148814,
      "loss": 3.0647,
      "step": 33050
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7247192859649658,
      "learning_rate": 0.0005700483100659949,
      "loss": 3.1011,
      "step": 33051
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7960878610610962,
      "learning_rate": 0.0005700465283669013,
      "loss": 2.9852,
      "step": 33052
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3659781217575073,
      "learning_rate": 0.000570044746617601,
      "loss": 3.1197,
      "step": 33053
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8302267789840698,
      "learning_rate": 0.0005700429648180943,
      "loss": 3.1645,
      "step": 33054
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.2308168411254883,
      "learning_rate": 0.0005700411829683815,
      "loss": 3.2911,
      "step": 33055
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4478942155838013,
      "learning_rate": 0.000570039401068463,
      "loss": 3.0876,
      "step": 33056
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6812160015106201,
      "learning_rate": 0.000570037619118339,
      "loss": 3.0759,
      "step": 33057
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.745182752609253,
      "learning_rate": 0.0005700358371180099,
      "loss": 3.129,
      "step": 33058
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4820886850357056,
      "learning_rate": 0.000570034055067476,
      "loss": 3.0639,
      "step": 33059
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5759165287017822,
      "learning_rate": 0.0005700322729667379,
      "loss": 3.0835,
      "step": 33060
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3065098524093628,
      "learning_rate": 0.0005700304908157955,
      "loss": 3.3717,
      "step": 33061
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.068127155303955,
      "learning_rate": 0.0005700287086146494,
      "loss": 2.8914,
      "step": 33062
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.707106590270996,
      "learning_rate": 0.0005700269263632998,
      "loss": 2.8781,
      "step": 33063
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3790391683578491,
      "learning_rate": 0.0005700251440617472,
      "loss": 3.0829,
      "step": 33064
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.067294120788574,
      "learning_rate": 0.0005700233617099917,
      "loss": 3.0655,
      "step": 33065
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.400351047515869,
      "learning_rate": 0.0005700215793080338,
      "loss": 3.0169,
      "step": 33066
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4025474786758423,
      "learning_rate": 0.0005700197968558739,
      "loss": 3.0382,
      "step": 33067
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.8465535640716553,
      "learning_rate": 0.0005700180143535121,
      "loss": 3.0301,
      "step": 33068
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.522998332977295,
      "learning_rate": 0.000570016231800949,
      "loss": 3.2326,
      "step": 33069
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5374635457992554,
      "learning_rate": 0.0005700144491981847,
      "loss": 2.8509,
      "step": 33070
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8679591417312622,
      "learning_rate": 0.0005700126665452196,
      "loss": 3.1336,
      "step": 33071
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.060753345489502,
      "learning_rate": 0.000570010883842054,
      "loss": 2.9918,
      "step": 33072
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.438349485397339,
      "learning_rate": 0.0005700091010886883,
      "loss": 3.2169,
      "step": 33073
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6846837997436523,
      "learning_rate": 0.0005700073182851228,
      "loss": 3.2602,
      "step": 33074
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.166867971420288,
      "learning_rate": 0.000570005535431358,
      "loss": 3.1463,
      "step": 33075
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.794991135597229,
      "learning_rate": 0.0005700037525273939,
      "loss": 3.0981,
      "step": 33076
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4835556745529175,
      "learning_rate": 0.0005700019695732312,
      "loss": 2.9582,
      "step": 33077
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9967684745788574,
      "learning_rate": 0.0005700001865688699,
      "loss": 3.2342,
      "step": 33078
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.388397693634033,
      "learning_rate": 0.0005699984035143105,
      "loss": 2.9994,
      "step": 33079
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.626859188079834,
      "learning_rate": 0.0005699966204095532,
      "loss": 3.066,
      "step": 33080
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.729393720626831,
      "learning_rate": 0.0005699948372545984,
      "loss": 2.9932,
      "step": 33081
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.961182117462158,
      "learning_rate": 0.0005699930540494466,
      "loss": 3.0913,
      "step": 33082
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.185861349105835,
      "learning_rate": 0.000569991270794098,
      "loss": 3.1439,
      "step": 33083
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4841046333312988,
      "learning_rate": 0.000569989487488553,
      "loss": 2.8901,
      "step": 33084
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.0088298320770264,
      "learning_rate": 0.0005699877041328117,
      "loss": 2.8715,
      "step": 33085
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3294408321380615,
      "learning_rate": 0.0005699859207268746,
      "loss": 2.97,
      "step": 33086
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6975589990615845,
      "learning_rate": 0.000569984137270742,
      "loss": 2.9931,
      "step": 33087
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8613200187683105,
      "learning_rate": 0.0005699823537644144,
      "loss": 3.1736,
      "step": 33088
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3142716884613037,
      "learning_rate": 0.0005699805702078919,
      "loss": 2.9057,
      "step": 33089
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5580980777740479,
      "learning_rate": 0.0005699787866011749,
      "loss": 3.1604,
      "step": 33090
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2603099346160889,
      "learning_rate": 0.0005699770029442638,
      "loss": 3.2596,
      "step": 33091
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4904812574386597,
      "learning_rate": 0.0005699752192371588,
      "loss": 3.3018,
      "step": 33092
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5309414863586426,
      "learning_rate": 0.0005699734354798604,
      "loss": 3.2167,
      "step": 33093
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.191519021987915,
      "learning_rate": 0.0005699716516723687,
      "loss": 3.1917,
      "step": 33094
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6367729902267456,
      "learning_rate": 0.0005699698678146844,
      "loss": 2.8178,
      "step": 33095
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8202471733093262,
      "learning_rate": 0.0005699680839068074,
      "loss": 3.1518,
      "step": 33096
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.1572911739349365,
      "learning_rate": 0.0005699662999487384,
      "loss": 3.0082,
      "step": 33097
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7702347040176392,
      "learning_rate": 0.0005699645159404774,
      "loss": 3.0806,
      "step": 33098
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.493064284324646,
      "learning_rate": 0.000569962731882025,
      "loss": 3.1263,
      "step": 33099
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.452190399169922,
      "learning_rate": 0.0005699609477733814,
      "loss": 2.9418,
      "step": 33100
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.3442394733428955,
      "learning_rate": 0.0005699591636145469,
      "loss": 2.9164,
      "step": 33101
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.3265490531921387,
      "learning_rate": 0.000569957379405522,
      "loss": 2.8963,
      "step": 33102
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5120316743850708,
      "learning_rate": 0.0005699555951463069,
      "loss": 3.0924,
      "step": 33103
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.186272382736206,
      "learning_rate": 0.0005699538108369021,
      "loss": 3.116,
      "step": 33104
    },
    {
      "epoch": 0.43,
      "grad_norm": 6.5179619789123535,
      "learning_rate": 0.0005699520264773075,
      "loss": 2.9855,
      "step": 33105
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.4647021293640137,
      "learning_rate": 0.0005699502420675239,
      "loss": 3.2723,
      "step": 33106
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.0253679752349854,
      "learning_rate": 0.0005699484576075515,
      "loss": 2.9174,
      "step": 33107
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.672361373901367,
      "learning_rate": 0.0005699466730973904,
      "loss": 3.0206,
      "step": 33108
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9407492876052856,
      "learning_rate": 0.0005699448885370412,
      "loss": 3.1428,
      "step": 33109
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.369321346282959,
      "learning_rate": 0.0005699431039265043,
      "loss": 2.8169,
      "step": 33110
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5483853816986084,
      "learning_rate": 0.0005699413192657796,
      "loss": 2.7519,
      "step": 33111
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2483837604522705,
      "learning_rate": 0.0005699395345548679,
      "loss": 2.9831,
      "step": 33112
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3368432521820068,
      "learning_rate": 0.0005699377497937694,
      "loss": 3.2084,
      "step": 33113
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4526766538619995,
      "learning_rate": 0.0005699359649824843,
      "loss": 2.9649,
      "step": 33114
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.684505581855774,
      "learning_rate": 0.000569934180121013,
      "loss": 3.0278,
      "step": 33115
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9645216464996338,
      "learning_rate": 0.0005699323952093558,
      "loss": 2.7812,
      "step": 33116
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5255855321884155,
      "learning_rate": 0.0005699306102475132,
      "loss": 3.1001,
      "step": 33117
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.513132095336914,
      "learning_rate": 0.0005699288252354852,
      "loss": 3.1602,
      "step": 33118
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5033974647521973,
      "learning_rate": 0.0005699270401732726,
      "loss": 2.9309,
      "step": 33119
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4546256065368652,
      "learning_rate": 0.0005699252550608753,
      "loss": 3.1302,
      "step": 33120
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4011461734771729,
      "learning_rate": 0.0005699234698982937,
      "loss": 3.2404,
      "step": 33121
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6044411659240723,
      "learning_rate": 0.0005699216846855284,
      "loss": 3.0885,
      "step": 33122
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.676758050918579,
      "learning_rate": 0.0005699198994225795,
      "loss": 3.029,
      "step": 33123
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4482535123825073,
      "learning_rate": 0.0005699181141094475,
      "loss": 3.182,
      "step": 33124
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.568540096282959,
      "learning_rate": 0.0005699163287461325,
      "loss": 2.887,
      "step": 33125
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.46610426902771,
      "learning_rate": 0.000569914543332635,
      "loss": 3.0678,
      "step": 33126
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5987517833709717,
      "learning_rate": 0.0005699127578689553,
      "loss": 3.1741,
      "step": 33127
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4473901987075806,
      "learning_rate": 0.0005699109723550937,
      "loss": 2.8901,
      "step": 33128
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.595540165901184,
      "learning_rate": 0.0005699091867910506,
      "loss": 3.1133,
      "step": 33129
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8878246545791626,
      "learning_rate": 0.0005699074011768263,
      "loss": 3.1052,
      "step": 33130
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4933303594589233,
      "learning_rate": 0.000569905615512421,
      "loss": 3.1316,
      "step": 33131
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.071767807006836,
      "learning_rate": 0.0005699038297978352,
      "loss": 2.9557,
      "step": 33132
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.671270489692688,
      "learning_rate": 0.0005699020440330692,
      "loss": 2.8108,
      "step": 33133
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3583985567092896,
      "learning_rate": 0.0005699002582181233,
      "loss": 3.1076,
      "step": 33134
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6596429347991943,
      "learning_rate": 0.000569898472352998,
      "loss": 2.7734,
      "step": 33135
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.459356665611267,
      "learning_rate": 0.0005698966864376933,
      "loss": 3.3276,
      "step": 33136
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.787837028503418,
      "learning_rate": 0.0005698949004722098,
      "loss": 3.0948,
      "step": 33137
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5672028064727783,
      "learning_rate": 0.0005698931144565477,
      "loss": 3.0367,
      "step": 33138
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8644156455993652,
      "learning_rate": 0.0005698913283907074,
      "loss": 3.1191,
      "step": 33139
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4735844135284424,
      "learning_rate": 0.0005698895422746892,
      "loss": 3.0258,
      "step": 33140
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9120993614196777,
      "learning_rate": 0.0005698877561084933,
      "loss": 3.1386,
      "step": 33141
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.445203423500061,
      "learning_rate": 0.0005698859698921204,
      "loss": 2.8513,
      "step": 33142
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8353266716003418,
      "learning_rate": 0.0005698841836255705,
      "loss": 3.0208,
      "step": 33143
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6203323602676392,
      "learning_rate": 0.000569882397308844,
      "loss": 3.1798,
      "step": 33144
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.1020143032073975,
      "learning_rate": 0.0005698806109419414,
      "loss": 3.1683,
      "step": 33145
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4406920671463013,
      "learning_rate": 0.0005698788245248628,
      "loss": 3.1725,
      "step": 33146
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.0525028705596924,
      "learning_rate": 0.0005698770380576086,
      "loss": 3.0835,
      "step": 33147
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.9618356227874756,
      "learning_rate": 0.0005698752515401793,
      "loss": 2.9952,
      "step": 33148
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.9748101234436035,
      "learning_rate": 0.0005698734649725749,
      "loss": 3.0506,
      "step": 33149
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5064257383346558,
      "learning_rate": 0.0005698716783547961,
      "loss": 3.0576,
      "step": 33150
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.312084436416626,
      "learning_rate": 0.0005698698916868429,
      "loss": 3.1522,
      "step": 33151
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6024997234344482,
      "learning_rate": 0.000569868104968716,
      "loss": 3.2433,
      "step": 33152
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.709780216217041,
      "learning_rate": 0.0005698663182004154,
      "loss": 2.957,
      "step": 33153
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.688748836517334,
      "learning_rate": 0.0005698645313819415,
      "loss": 3.0752,
      "step": 33154
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.1069178581237793,
      "learning_rate": 0.0005698627445132947,
      "loss": 3.2884,
      "step": 33155
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9538480043411255,
      "learning_rate": 0.0005698609575944754,
      "loss": 3.0617,
      "step": 33156
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.6443533897399902,
      "learning_rate": 0.0005698591706254838,
      "loss": 3.0028,
      "step": 33157
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5297497510910034,
      "learning_rate": 0.0005698573836063203,
      "loss": 3.1527,
      "step": 33158
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.514420747756958,
      "learning_rate": 0.0005698555965369854,
      "loss": 3.0454,
      "step": 33159
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.8332126140594482,
      "learning_rate": 0.000569853809417479,
      "loss": 3.0155,
      "step": 33160
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.481039524078369,
      "learning_rate": 0.0005698520222478018,
      "loss": 3.1777,
      "step": 33161
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5728849172592163,
      "learning_rate": 0.000569850235027954,
      "loss": 3.1439,
      "step": 33162
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.465791702270508,
      "learning_rate": 0.000569848447757936,
      "loss": 3.3242,
      "step": 33163
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.943676233291626,
      "learning_rate": 0.0005698466604377479,
      "loss": 3.0112,
      "step": 33164
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.023183822631836,
      "learning_rate": 0.0005698448730673903,
      "loss": 3.265,
      "step": 33165
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.596717357635498,
      "learning_rate": 0.0005698430856468635,
      "loss": 3.0571,
      "step": 33166
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8780184984207153,
      "learning_rate": 0.0005698412981761678,
      "loss": 2.9868,
      "step": 33167
    },
    {
      "epoch": 0.43,
      "grad_norm": 5.214908599853516,
      "learning_rate": 0.0005698395106553035,
      "loss": 2.8133,
      "step": 33168
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.809227705001831,
      "learning_rate": 0.0005698377230842707,
      "loss": 3.2059,
      "step": 33169
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.2059576511383057,
      "learning_rate": 0.0005698359354630703,
      "loss": 3.279,
      "step": 33170
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5241117477416992,
      "learning_rate": 0.0005698341477917021,
      "loss": 3.1954,
      "step": 33171
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.0488743782043457,
      "learning_rate": 0.0005698323600701668,
      "loss": 3.1333,
      "step": 33172
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.0259809494018555,
      "learning_rate": 0.0005698305722984645,
      "loss": 2.8994,
      "step": 33173
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9998921155929565,
      "learning_rate": 0.0005698287844765955,
      "loss": 3.172,
      "step": 33174
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5101996660232544,
      "learning_rate": 0.0005698269966045604,
      "loss": 3.0922,
      "step": 33175
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.847420334815979,
      "learning_rate": 0.0005698252086823593,
      "loss": 2.9118,
      "step": 33176
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.734698534011841,
      "learning_rate": 0.0005698234207099926,
      "loss": 3.1487,
      "step": 33177
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.560305118560791,
      "learning_rate": 0.0005698216326874607,
      "loss": 3.0045,
      "step": 33178
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4412288665771484,
      "learning_rate": 0.0005698198446147637,
      "loss": 2.9769,
      "step": 33179
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.124220132827759,
      "learning_rate": 0.0005698180564919021,
      "loss": 3.2019,
      "step": 33180
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.631981134414673,
      "learning_rate": 0.0005698162683188763,
      "loss": 3.0766,
      "step": 33181
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8060524463653564,
      "learning_rate": 0.0005698144800956867,
      "loss": 2.8973,
      "step": 33182
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.2318201065063477,
      "learning_rate": 0.0005698126918223334,
      "loss": 3.0343,
      "step": 33183
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7469724416732788,
      "learning_rate": 0.0005698109034988168,
      "loss": 3.0188,
      "step": 33184
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.712013602256775,
      "learning_rate": 0.0005698091151251372,
      "loss": 3.3826,
      "step": 33185
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.084728240966797,
      "learning_rate": 0.0005698073267012951,
      "loss": 2.9925,
      "step": 33186
    },
    {
      "epoch": 0.43,
      "grad_norm": 4.466181755065918,
      "learning_rate": 0.0005698055382272907,
      "loss": 3.1474,
      "step": 33187
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.3188934326171875,
      "learning_rate": 0.0005698037497031244,
      "loss": 2.9768,
      "step": 33188
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7262232303619385,
      "learning_rate": 0.0005698019611287963,
      "loss": 2.8772,
      "step": 33189
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.5701165199279785,
      "learning_rate": 0.000569800172504307,
      "loss": 3.0822,
      "step": 33190
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4190821647644043,
      "learning_rate": 0.0005697983838296569,
      "loss": 3.0052,
      "step": 33191
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.396044373512268,
      "learning_rate": 0.0005697965951048462,
      "loss": 3.1149,
      "step": 33192
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5573315620422363,
      "learning_rate": 0.000569794806329875,
      "loss": 3.1237,
      "step": 33193
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.5030221939086914,
      "learning_rate": 0.0005697930175047439,
      "loss": 3.2421,
      "step": 33194
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5811681747436523,
      "learning_rate": 0.0005697912286294533,
      "loss": 2.9106,
      "step": 33195
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3191992044448853,
      "learning_rate": 0.0005697894397040034,
      "loss": 3.0436,
      "step": 33196
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4959322214126587,
      "learning_rate": 0.0005697876507283945,
      "loss": 2.9382,
      "step": 33197
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.70101797580719,
      "learning_rate": 0.0005697858617026271,
      "loss": 3.1111,
      "step": 33198
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7141025066375732,
      "learning_rate": 0.0005697840726267012,
      "loss": 3.2002,
      "step": 33199
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5204370021820068,
      "learning_rate": 0.0005697822835006175,
      "loss": 3.1292,
      "step": 33200
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8845659494400024,
      "learning_rate": 0.0005697804943243762,
      "loss": 3.0911,
      "step": 33201
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4689083099365234,
      "learning_rate": 0.0005697787050979776,
      "loss": 3.0398,
      "step": 33202
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7086739540100098,
      "learning_rate": 0.000569776915821422,
      "loss": 2.9591,
      "step": 33203
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6393548250198364,
      "learning_rate": 0.0005697751264947097,
      "loss": 3.1364,
      "step": 33204
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.918424367904663,
      "learning_rate": 0.0005697733371178412,
      "loss": 3.3076,
      "step": 33205
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.1985292434692383,
      "learning_rate": 0.0005697715476908167,
      "loss": 2.9868,
      "step": 33206
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6929651498794556,
      "learning_rate": 0.0005697697582136367,
      "loss": 3.2698,
      "step": 33207
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.3529036045074463,
      "learning_rate": 0.0005697679686863013,
      "loss": 2.98,
      "step": 33208
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.671112298965454,
      "learning_rate": 0.000569766179108811,
      "loss": 3.028,
      "step": 33209
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.393959879875183,
      "learning_rate": 0.000569764389481166,
      "loss": 3.4501,
      "step": 33210
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4593281745910645,
      "learning_rate": 0.0005697625998033668,
      "loss": 3.147,
      "step": 33211
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4000009298324585,
      "learning_rate": 0.0005697608100754136,
      "loss": 3.0627,
      "step": 33212
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8457810878753662,
      "learning_rate": 0.0005697590202973067,
      "loss": 3.1459,
      "step": 33213
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3498120307922363,
      "learning_rate": 0.0005697572304690467,
      "loss": 2.8611,
      "step": 33214
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7819640636444092,
      "learning_rate": 0.0005697554405906334,
      "loss": 3.3647,
      "step": 33215
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.805619478225708,
      "learning_rate": 0.0005697536506620678,
      "loss": 3.2268,
      "step": 33216
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3244872093200684,
      "learning_rate": 0.0005697518606833499,
      "loss": 3.0234,
      "step": 33217
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.358283281326294,
      "learning_rate": 0.0005697500706544799,
      "loss": 3.1466,
      "step": 33218
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.586390733718872,
      "learning_rate": 0.0005697482805754582,
      "loss": 3.089,
      "step": 33219
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.688595175743103,
      "learning_rate": 0.0005697464904462854,
      "loss": 3.1033,
      "step": 33220
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5981613397598267,
      "learning_rate": 0.0005697447002669615,
      "loss": 2.8151,
      "step": 33221
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.224141836166382,
      "learning_rate": 0.000569742910037487,
      "loss": 3.2381,
      "step": 33222
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3878535032272339,
      "learning_rate": 0.0005697411197578622,
      "loss": 2.9565,
      "step": 33223
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5749541521072388,
      "learning_rate": 0.0005697393294280874,
      "loss": 3.1411,
      "step": 33224
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.57429039478302,
      "learning_rate": 0.000569737539048163,
      "loss": 3.1697,
      "step": 33225
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4647727012634277,
      "learning_rate": 0.0005697357486180893,
      "loss": 3.2135,
      "step": 33226
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3964900970458984,
      "learning_rate": 0.0005697339581378667,
      "loss": 3.2156,
      "step": 33227
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4949339628219604,
      "learning_rate": 0.0005697321676074954,
      "loss": 3.2956,
      "step": 33228
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2382603883743286,
      "learning_rate": 0.0005697303770269758,
      "loss": 2.9639,
      "step": 33229
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.323881983757019,
      "learning_rate": 0.0005697285863963082,
      "loss": 2.8481,
      "step": 33230
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4877986907958984,
      "learning_rate": 0.0005697267957154931,
      "loss": 3.1472,
      "step": 33231
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4752304553985596,
      "learning_rate": 0.0005697250049845305,
      "loss": 3.0476,
      "step": 33232
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5582548379898071,
      "learning_rate": 0.000569723214203421,
      "loss": 2.8629,
      "step": 33233
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.618082880973816,
      "learning_rate": 0.0005697214233721649,
      "loss": 2.9562,
      "step": 33234
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.0230045318603516,
      "learning_rate": 0.0005697196324907626,
      "loss": 3.0676,
      "step": 33235
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.678911805152893,
      "learning_rate": 0.0005697178415592141,
      "loss": 3.406,
      "step": 33236
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.144164562225342,
      "learning_rate": 0.0005697160505775201,
      "loss": 3.1465,
      "step": 33237
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7576969861984253,
      "learning_rate": 0.0005697142595456808,
      "loss": 3.1106,
      "step": 33238
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.0468411445617676,
      "learning_rate": 0.0005697124684636964,
      "loss": 3.1003,
      "step": 33239
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.335602283477783,
      "learning_rate": 0.0005697106773315675,
      "loss": 2.8792,
      "step": 33240
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6594873666763306,
      "learning_rate": 0.0005697088861492943,
      "loss": 3.1159,
      "step": 33241
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.585502028465271,
      "learning_rate": 0.000569707094916877,
      "loss": 3.2994,
      "step": 33242
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.4628758430480957,
      "learning_rate": 0.0005697053036343163,
      "loss": 2.8497,
      "step": 33243
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.820055603981018,
      "learning_rate": 0.000569703512301612,
      "loss": 3.1403,
      "step": 33244
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4092786312103271,
      "learning_rate": 0.0005697017209187649,
      "loss": 3.2089,
      "step": 33245
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8860312700271606,
      "learning_rate": 0.0005696999294857751,
      "loss": 2.984,
      "step": 33246
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.842298984527588,
      "learning_rate": 0.0005696981380026429,
      "loss": 2.9609,
      "step": 33247
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.1544368267059326,
      "learning_rate": 0.000569696346469369,
      "loss": 3.0577,
      "step": 33248
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3888181447982788,
      "learning_rate": 0.0005696945548859532,
      "loss": 3.4435,
      "step": 33249
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.462042212486267,
      "learning_rate": 0.0005696927632523963,
      "loss": 3.3628,
      "step": 33250
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.15128493309021,
      "learning_rate": 0.0005696909715686983,
      "loss": 3.1984,
      "step": 33251
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.1814677715301514,
      "learning_rate": 0.0005696891798348596,
      "loss": 2.9386,
      "step": 33252
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7889883518218994,
      "learning_rate": 0.0005696873880508806,
      "loss": 3.0663,
      "step": 33253
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.294104814529419,
      "learning_rate": 0.0005696855962167617,
      "loss": 2.8046,
      "step": 33254
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.7275636196136475,
      "learning_rate": 0.0005696838043325031,
      "loss": 3.0651,
      "step": 33255
    },
    {
      "epoch": 0.43,
      "grad_norm": 4.222596168518066,
      "learning_rate": 0.0005696820123981053,
      "loss": 3.057,
      "step": 33256
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.263732433319092,
      "learning_rate": 0.0005696802204135684,
      "loss": 3.1957,
      "step": 33257
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4837902784347534,
      "learning_rate": 0.000569678428378893,
      "loss": 2.9939,
      "step": 33258
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.0988211631774902,
      "learning_rate": 0.0005696766362940791,
      "loss": 3.103,
      "step": 33259
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.4562785625457764,
      "learning_rate": 0.0005696748441591274,
      "loss": 3.0577,
      "step": 33260
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3693383932113647,
      "learning_rate": 0.000569673051974038,
      "loss": 2.9986,
      "step": 33261
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5853044986724854,
      "learning_rate": 0.0005696712597388112,
      "loss": 3.1583,
      "step": 33262
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.141282796859741,
      "learning_rate": 0.0005696694674534476,
      "loss": 2.8607,
      "step": 33263
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.5167124271392822,
      "learning_rate": 0.0005696676751179472,
      "loss": 3.1405,
      "step": 33264
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.61605703830719,
      "learning_rate": 0.0005696658827323105,
      "loss": 3.2539,
      "step": 33265
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5976380109786987,
      "learning_rate": 0.0005696640902965378,
      "loss": 3.1506,
      "step": 33266
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4739397764205933,
      "learning_rate": 0.0005696622978106296,
      "loss": 3.3768,
      "step": 33267
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8993489742279053,
      "learning_rate": 0.000569660505274586,
      "loss": 2.96,
      "step": 33268
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.508855938911438,
      "learning_rate": 0.0005696587126884074,
      "loss": 3.0118,
      "step": 33269
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6328682899475098,
      "learning_rate": 0.0005696569200520942,
      "loss": 3.0257,
      "step": 33270
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3297487497329712,
      "learning_rate": 0.0005696551273656466,
      "loss": 2.9966,
      "step": 33271
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.478925108909607,
      "learning_rate": 0.0005696533346290651,
      "loss": 3.0349,
      "step": 33272
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.324037790298462,
      "learning_rate": 0.00056965154184235,
      "loss": 3.0338,
      "step": 33273
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3450900316238403,
      "learning_rate": 0.0005696497490055015,
      "loss": 3.1084,
      "step": 33274
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3540064096450806,
      "learning_rate": 0.00056964795611852,
      "loss": 3.0904,
      "step": 33275
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.985772967338562,
      "learning_rate": 0.0005696461631814059,
      "loss": 3.0536,
      "step": 33276
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5278329849243164,
      "learning_rate": 0.0005696443701941595,
      "loss": 2.9796,
      "step": 33277
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7909315824508667,
      "learning_rate": 0.0005696425771567811,
      "loss": 2.8441,
      "step": 33278
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2321298122406006,
      "learning_rate": 0.000569640784069271,
      "loss": 3.2668,
      "step": 33279
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.285398483276367,
      "learning_rate": 0.0005696389909316297,
      "loss": 3.0143,
      "step": 33280
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.15537166595459,
      "learning_rate": 0.0005696371977438575,
      "loss": 2.9798,
      "step": 33281
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3165074586868286,
      "learning_rate": 0.0005696354045059545,
      "loss": 3.1746,
      "step": 33282
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.016049385070801,
      "learning_rate": 0.0005696336112179212,
      "loss": 3.2784,
      "step": 33283
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4878226518630981,
      "learning_rate": 0.000569631817879758,
      "loss": 3.0941,
      "step": 33284
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.2639051675796509,
      "learning_rate": 0.0005696300244914651,
      "loss": 3.0139,
      "step": 33285
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1541520357131958,
      "learning_rate": 0.0005696282310530429,
      "loss": 3.0509,
      "step": 33286
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3170793056488037,
      "learning_rate": 0.0005696264375644917,
      "loss": 3.1513,
      "step": 33287
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.556016445159912,
      "learning_rate": 0.0005696246440258119,
      "loss": 2.9957,
      "step": 33288
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.036832094192505,
      "learning_rate": 0.0005696228504370037,
      "loss": 2.8724,
      "step": 33289
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6050223112106323,
      "learning_rate": 0.0005696210567980675,
      "loss": 3.1246,
      "step": 33290
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.2216317653656006,
      "learning_rate": 0.0005696192631090038,
      "loss": 3.0067,
      "step": 33291
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.926318645477295,
      "learning_rate": 0.0005696174693698127,
      "loss": 2.9148,
      "step": 33292
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4519681930541992,
      "learning_rate": 0.0005696156755804946,
      "loss": 2.9438,
      "step": 33293
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4630911350250244,
      "learning_rate": 0.0005696138817410499,
      "loss": 3.1186,
      "step": 33294
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.232565402984619,
      "learning_rate": 0.000569612087851479,
      "loss": 3.0158,
      "step": 33295
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.570863723754883,
      "learning_rate": 0.000569610293911782,
      "loss": 2.971,
      "step": 33296
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.412824034690857,
      "learning_rate": 0.0005696084999219594,
      "loss": 3.0369,
      "step": 33297
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8936384916305542,
      "learning_rate": 0.0005696067058820114,
      "loss": 2.9992,
      "step": 33298
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5709856748580933,
      "learning_rate": 0.0005696049117919385,
      "loss": 2.9468,
      "step": 33299
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.088109254837036,
      "learning_rate": 0.000569603117651741,
      "loss": 3.1061,
      "step": 33300
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5607028007507324,
      "learning_rate": 0.0005696013234614191,
      "loss": 2.7484,
      "step": 33301
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.181889057159424,
      "learning_rate": 0.0005695995292209733,
      "loss": 3.3362,
      "step": 33302
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.8298776149749756,
      "learning_rate": 0.0005695977349304038,
      "loss": 3.3801,
      "step": 33303
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.669870376586914,
      "learning_rate": 0.0005695959405897109,
      "loss": 3.0398,
      "step": 33304
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5440781116485596,
      "learning_rate": 0.0005695941461988952,
      "loss": 3.1332,
      "step": 33305
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5770840644836426,
      "learning_rate": 0.0005695923517579568,
      "loss": 3.0415,
      "step": 33306
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7203582525253296,
      "learning_rate": 0.0005695905572668962,
      "loss": 2.9978,
      "step": 33307
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7854937314987183,
      "learning_rate": 0.0005695887627257135,
      "loss": 3.0584,
      "step": 33308
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3437615633010864,
      "learning_rate": 0.0005695869681344092,
      "loss": 2.8942,
      "step": 33309
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.614507794380188,
      "learning_rate": 0.0005695851734929835,
      "loss": 3.0476,
      "step": 33310
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3829962015151978,
      "learning_rate": 0.000569583378801437,
      "loss": 3.2345,
      "step": 33311
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3899033069610596,
      "learning_rate": 0.0005695815840597697,
      "loss": 3.083,
      "step": 33312
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7219455242156982,
      "learning_rate": 0.0005695797892679821,
      "loss": 3.2977,
      "step": 33313
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.687895655632019,
      "learning_rate": 0.0005695779944260746,
      "loss": 3.0766,
      "step": 33314
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5151766538619995,
      "learning_rate": 0.0005695761995340475,
      "loss": 3.0829,
      "step": 33315
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5397186279296875,
      "learning_rate": 0.000569574404591901,
      "loss": 2.9124,
      "step": 33316
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.0528550148010254,
      "learning_rate": 0.0005695726095996356,
      "loss": 2.9076,
      "step": 33317
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.831054925918579,
      "learning_rate": 0.0005695708145572515,
      "loss": 2.7375,
      "step": 33318
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5318515300750732,
      "learning_rate": 0.0005695690194647492,
      "loss": 3.0638,
      "step": 33319
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.033693790435791,
      "learning_rate": 0.0005695672243221288,
      "loss": 3.0509,
      "step": 33320
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.581405758857727,
      "learning_rate": 0.0005695654291293908,
      "loss": 2.9188,
      "step": 33321
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4778079986572266,
      "learning_rate": 0.0005695636338865355,
      "loss": 2.9283,
      "step": 33322
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.6939826011657715,
      "learning_rate": 0.0005695618385935633,
      "loss": 3.0444,
      "step": 33323
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.733038306236267,
      "learning_rate": 0.0005695600432504745,
      "loss": 3.122,
      "step": 33324
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6414400339126587,
      "learning_rate": 0.0005695582478572693,
      "loss": 3.1035,
      "step": 33325
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.607103109359741,
      "learning_rate": 0.0005695564524139481,
      "loss": 3.1378,
      "step": 33326
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8585402965545654,
      "learning_rate": 0.0005695546569205114,
      "loss": 2.9128,
      "step": 33327
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.321372628211975,
      "learning_rate": 0.0005695528613769593,
      "loss": 3.2228,
      "step": 33328
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8356785774230957,
      "learning_rate": 0.0005695510657832922,
      "loss": 3.1336,
      "step": 33329
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7480980157852173,
      "learning_rate": 0.0005695492701395105,
      "loss": 2.8555,
      "step": 33330
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4988508224487305,
      "learning_rate": 0.0005695474744456146,
      "loss": 3.1063,
      "step": 33331
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4615328311920166,
      "learning_rate": 0.0005695456787016047,
      "loss": 3.3136,
      "step": 33332
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.563169002532959,
      "learning_rate": 0.0005695438829074811,
      "loss": 2.9883,
      "step": 33333
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.944830060005188,
      "learning_rate": 0.0005695420870632442,
      "loss": 3.0683,
      "step": 33334
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7755850553512573,
      "learning_rate": 0.0005695402911688944,
      "loss": 3.0824,
      "step": 33335
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.2051737308502197,
      "learning_rate": 0.000569538495224432,
      "loss": 3.0737,
      "step": 33336
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.3810007572174072,
      "learning_rate": 0.0005695366992298572,
      "loss": 2.8885,
      "step": 33337
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.316683530807495,
      "learning_rate": 0.0005695349031851705,
      "loss": 2.9152,
      "step": 33338
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6069972515106201,
      "learning_rate": 0.0005695331070903723,
      "loss": 3.0912,
      "step": 33339
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.037856101989746,
      "learning_rate": 0.0005695313109454626,
      "loss": 3.0886,
      "step": 33340
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.6170036792755127,
      "learning_rate": 0.0005695295147504421,
      "loss": 3.1426,
      "step": 33341
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.6238458156585693,
      "learning_rate": 0.0005695277185053108,
      "loss": 3.1043,
      "step": 33342
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.396321415901184,
      "learning_rate": 0.0005695259222100694,
      "loss": 2.9718,
      "step": 33343
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3293752670288086,
      "learning_rate": 0.0005695241258647178,
      "loss": 3.0461,
      "step": 33344
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8043134212493896,
      "learning_rate": 0.0005695223294692568,
      "loss": 3.0274,
      "step": 33345
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8756061792373657,
      "learning_rate": 0.0005695205330236864,
      "loss": 3.0945,
      "step": 33346
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4544042348861694,
      "learning_rate": 0.0005695187365280071,
      "loss": 3.1196,
      "step": 33347
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4388302564620972,
      "learning_rate": 0.0005695169399822192,
      "loss": 3.0294,
      "step": 33348
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7037827968597412,
      "learning_rate": 0.0005695151433863229,
      "loss": 2.989,
      "step": 33349
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.928784966468811,
      "learning_rate": 0.0005695133467403188,
      "loss": 3.175,
      "step": 33350
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3928134441375732,
      "learning_rate": 0.000569511550044207,
      "loss": 3.0292,
      "step": 33351
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.5215585231781006,
      "learning_rate": 0.0005695097532979878,
      "loss": 3.1312,
      "step": 33352
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.316760540008545,
      "learning_rate": 0.0005695079565016619,
      "loss": 3.0179,
      "step": 33353
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.720260500907898,
      "learning_rate": 0.0005695061596552291,
      "loss": 2.9249,
      "step": 33354
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.569777011871338,
      "learning_rate": 0.0005695043627586902,
      "loss": 3.1532,
      "step": 33355
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3036226034164429,
      "learning_rate": 0.0005695025658120454,
      "loss": 3.014,
      "step": 33356
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9678866863250732,
      "learning_rate": 0.0005695007688152949,
      "loss": 3.1776,
      "step": 33357
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3152037858963013,
      "learning_rate": 0.0005694989717684391,
      "loss": 2.92,
      "step": 33358
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6505169868469238,
      "learning_rate": 0.0005694971746714784,
      "loss": 3.3693,
      "step": 33359
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9394663572311401,
      "learning_rate": 0.000569495377524413,
      "loss": 3.1545,
      "step": 33360
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5140351057052612,
      "learning_rate": 0.0005694935803272434,
      "loss": 3.1439,
      "step": 33361
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5433119535446167,
      "learning_rate": 0.0005694917830799699,
      "loss": 3.0074,
      "step": 33362
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9184935092926025,
      "learning_rate": 0.0005694899857825928,
      "loss": 3.1785,
      "step": 33363
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9247465133666992,
      "learning_rate": 0.0005694881884351124,
      "loss": 3.1551,
      "step": 33364
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5691736936569214,
      "learning_rate": 0.0005694863910375289,
      "loss": 3.1253,
      "step": 33365
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3783475160598755,
      "learning_rate": 0.000569484593589843,
      "loss": 2.8871,
      "step": 33366
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8242242336273193,
      "learning_rate": 0.0005694827960920546,
      "loss": 3.3822,
      "step": 33367
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.258325219154358,
      "learning_rate": 0.0005694809985441645,
      "loss": 3.0687,
      "step": 33368
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3334001302719116,
      "learning_rate": 0.0005694792009461727,
      "loss": 3.1103,
      "step": 33369
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3390668630599976,
      "learning_rate": 0.0005694774032980796,
      "loss": 2.9598,
      "step": 33370
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.7132668495178223,
      "learning_rate": 0.0005694756055998856,
      "loss": 2.9359,
      "step": 33371
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5359083414077759,
      "learning_rate": 0.000569473807851591,
      "loss": 3.008,
      "step": 33372
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.429738163948059,
      "learning_rate": 0.0005694720100531961,
      "loss": 2.973,
      "step": 33373
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.743239164352417,
      "learning_rate": 0.0005694702122047013,
      "loss": 3.2528,
      "step": 33374
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.6211435794830322,
      "learning_rate": 0.000569468414306107,
      "loss": 2.9798,
      "step": 33375
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.1774685382843018,
      "learning_rate": 0.0005694666163574133,
      "loss": 2.92,
      "step": 33376
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5804986953735352,
      "learning_rate": 0.0005694648183586207,
      "loss": 3.0689,
      "step": 33377
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.2826547622680664,
      "learning_rate": 0.0005694630203097295,
      "loss": 2.9454,
      "step": 33378
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.857947826385498,
      "learning_rate": 0.0005694612222107399,
      "loss": 3.1281,
      "step": 33379
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5074782371520996,
      "learning_rate": 0.0005694594240616526,
      "loss": 3.4657,
      "step": 33380
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4984310865402222,
      "learning_rate": 0.0005694576258624675,
      "loss": 2.9937,
      "step": 33381
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.1603927612304688,
      "learning_rate": 0.0005694558276131853,
      "loss": 3.0613,
      "step": 33382
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.3034664392471313,
      "learning_rate": 0.0005694540293138062,
      "loss": 3.1218,
      "step": 33383
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4721436500549316,
      "learning_rate": 0.0005694522309643304,
      "loss": 2.9436,
      "step": 33384
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5794693231582642,
      "learning_rate": 0.0005694504325647584,
      "loss": 3.2936,
      "step": 33385
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.859254002571106,
      "learning_rate": 0.0005694486341150905,
      "loss": 3.0929,
      "step": 33386
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.142639398574829,
      "learning_rate": 0.000569446835615327,
      "loss": 3.1158,
      "step": 33387
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.7772741317749023,
      "learning_rate": 0.0005694450370654682,
      "loss": 3.0781,
      "step": 33388
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.8727803230285645,
      "learning_rate": 0.0005694432384655145,
      "loss": 2.9033,
      "step": 33389
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.316065549850464,
      "learning_rate": 0.0005694414398154661,
      "loss": 3.0028,
      "step": 33390
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.5988259315490723,
      "learning_rate": 0.0005694396411153236,
      "loss": 2.9903,
      "step": 33391
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.295992851257324,
      "learning_rate": 0.0005694378423650873,
      "loss": 2.8897,
      "step": 33392
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.0944197177886963,
      "learning_rate": 0.0005694360435647573,
      "loss": 3.0549,
      "step": 33393
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.0007522106170654,
      "learning_rate": 0.0005694342447143339,
      "loss": 2.7758,
      "step": 33394
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9030719995498657,
      "learning_rate": 0.0005694324458138178,
      "loss": 3.3605,
      "step": 33395
    },
    {
      "epoch": 0.43,
      "grad_norm": 3.1300342082977295,
      "learning_rate": 0.0005694306468632089,
      "loss": 3.1468,
      "step": 33396
    },
    {
      "epoch": 0.43,
      "grad_norm": 4.015686511993408,
      "learning_rate": 0.0005694288478625079,
      "loss": 3.0229,
      "step": 33397
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.9115734100341797,
      "learning_rate": 0.0005694270488117151,
      "loss": 2.9179,
      "step": 33398
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.8103492259979248,
      "learning_rate": 0.0005694252497108306,
      "loss": 3.0404,
      "step": 33399
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.914609670639038,
      "learning_rate": 0.0005694234505598548,
      "loss": 3.0037,
      "step": 33400
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.489873170852661,
      "learning_rate": 0.0005694216513587883,
      "loss": 3.4874,
      "step": 33401
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.181693196296692,
      "learning_rate": 0.0005694198521076309,
      "loss": 3.2176,
      "step": 33402
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4889445304870605,
      "learning_rate": 0.0005694180528063835,
      "loss": 2.924,
      "step": 33403
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.11006236076355,
      "learning_rate": 0.0005694162534550462,
      "loss": 3.1166,
      "step": 33404
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.4067658185958862,
      "learning_rate": 0.0005694144540536193,
      "loss": 3.273,
      "step": 33405
    },
    {
      "epoch": 0.43,
      "grad_norm": 1.1165952682495117,
      "learning_rate": 0.0005694126546021031,
      "loss": 3.3585,
      "step": 33406
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.165647268295288,
      "learning_rate": 0.0005694108551004981,
      "loss": 3.3791,
      "step": 33407
    },
    {
      "epoch": 0.43,
      "grad_norm": 2.224353790283203,
      "learning_rate": 0.0005694090555488045,
      "loss": 3.0754,
      "step": 33408
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9218027591705322,
      "learning_rate": 0.0005694072559470227,
      "loss": 3.0674,
      "step": 33409
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6861522197723389,
      "learning_rate": 0.0005694054562951531,
      "loss": 2.8974,
      "step": 33410
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.8793444633483887,
      "learning_rate": 0.0005694036565931957,
      "loss": 3.0782,
      "step": 33411
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.827507734298706,
      "learning_rate": 0.0005694018568411512,
      "loss": 3.2484,
      "step": 33412
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8523269891738892,
      "learning_rate": 0.0005694000570390197,
      "loss": 3.1532,
      "step": 33413
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.967773675918579,
      "learning_rate": 0.0005693982571868018,
      "loss": 3.0209,
      "step": 33414
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8849315643310547,
      "learning_rate": 0.0005693964572844977,
      "loss": 2.8188,
      "step": 33415
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5741091966629028,
      "learning_rate": 0.0005693946573321077,
      "loss": 3.2123,
      "step": 33416
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3223252296447754,
      "learning_rate": 0.0005693928573296319,
      "loss": 3.1091,
      "step": 33417
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.4185376167297363,
      "learning_rate": 0.0005693910572770711,
      "loss": 2.9776,
      "step": 33418
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5301116704940796,
      "learning_rate": 0.0005693892571744254,
      "loss": 3.035,
      "step": 33419
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3883259296417236,
      "learning_rate": 0.0005693874570216952,
      "loss": 3.0298,
      "step": 33420
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.260873794555664,
      "learning_rate": 0.0005693856568188807,
      "loss": 3.0207,
      "step": 33421
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8347880840301514,
      "learning_rate": 0.0005693838565659823,
      "loss": 3.0543,
      "step": 33422
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5768122673034668,
      "learning_rate": 0.0005693820562630004,
      "loss": 3.2039,
      "step": 33423
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4103302955627441,
      "learning_rate": 0.0005693802559099353,
      "loss": 3.1513,
      "step": 33424
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7048015594482422,
      "learning_rate": 0.0005693784555067873,
      "loss": 2.8518,
      "step": 33425
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9104973077774048,
      "learning_rate": 0.0005693766550535569,
      "loss": 3.1314,
      "step": 33426
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3414289951324463,
      "learning_rate": 0.0005693748545502441,
      "loss": 3.3527,
      "step": 33427
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8748605251312256,
      "learning_rate": 0.0005693730539968495,
      "loss": 2.875,
      "step": 33428
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9217443466186523,
      "learning_rate": 0.0005693712533933733,
      "loss": 2.8448,
      "step": 33429
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4182409048080444,
      "learning_rate": 0.0005693694527398159,
      "loss": 2.8778,
      "step": 33430
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7029422521591187,
      "learning_rate": 0.0005693676520361777,
      "loss": 3.0878,
      "step": 33431
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.928356647491455,
      "learning_rate": 0.0005693658512824591,
      "loss": 3.0109,
      "step": 33432
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3312615156173706,
      "learning_rate": 0.0005693640504786601,
      "loss": 3.0343,
      "step": 33433
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7761187553405762,
      "learning_rate": 0.0005693622496247814,
      "loss": 3.1564,
      "step": 33434
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.825668454170227,
      "learning_rate": 0.0005693604487208231,
      "loss": 3.0215,
      "step": 33435
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.512574315071106,
      "learning_rate": 0.0005693586477667855,
      "loss": 3.1094,
      "step": 33436
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.0219337940216064,
      "learning_rate": 0.0005693568467626691,
      "loss": 3.0603,
      "step": 33437
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4705688953399658,
      "learning_rate": 0.0005693550457084743,
      "loss": 3.0444,
      "step": 33438
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4478307962417603,
      "learning_rate": 0.0005693532446042011,
      "loss": 2.8772,
      "step": 33439
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3897171020507812,
      "learning_rate": 0.0005693514434498502,
      "loss": 2.9457,
      "step": 33440
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.449148178100586,
      "learning_rate": 0.0005693496422454218,
      "loss": 3.0737,
      "step": 33441
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5404802560806274,
      "learning_rate": 0.0005693478409909161,
      "loss": 2.7627,
      "step": 33442
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.466880440711975,
      "learning_rate": 0.0005693460396863337,
      "loss": 3.2518,
      "step": 33443
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5285186767578125,
      "learning_rate": 0.0005693442383316747,
      "loss": 3.2846,
      "step": 33444
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6798545122146606,
      "learning_rate": 0.0005693424369269395,
      "loss": 3.0505,
      "step": 33445
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7686619758605957,
      "learning_rate": 0.0005693406354721285,
      "loss": 3.0208,
      "step": 33446
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4957249164581299,
      "learning_rate": 0.000569338833967242,
      "loss": 2.9972,
      "step": 33447
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5018013715744019,
      "learning_rate": 0.0005693370324122803,
      "loss": 3.0114,
      "step": 33448
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.2883243560791016,
      "learning_rate": 0.0005693352308072438,
      "loss": 3.1934,
      "step": 33449
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8153517246246338,
      "learning_rate": 0.0005693334291521328,
      "loss": 3.0801,
      "step": 33450
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.1705338954925537,
      "learning_rate": 0.0005693316274469475,
      "loss": 3.2366,
      "step": 33451
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.5558226108551025,
      "learning_rate": 0.0005693298256916885,
      "loss": 3.1343,
      "step": 33452
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6209447383880615,
      "learning_rate": 0.0005693280238863559,
      "loss": 3.0245,
      "step": 33453
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.161060094833374,
      "learning_rate": 0.0005693262220309503,
      "loss": 3.0698,
      "step": 33454
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7081305980682373,
      "learning_rate": 0.0005693244201254718,
      "loss": 3.0325,
      "step": 33455
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5128306150436401,
      "learning_rate": 0.0005693226181699208,
      "loss": 3.0426,
      "step": 33456
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9117469787597656,
      "learning_rate": 0.0005693208161642975,
      "loss": 3.0814,
      "step": 33457
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.6458170413970947,
      "learning_rate": 0.0005693190141086025,
      "loss": 2.9532,
      "step": 33458
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.4330365657806396,
      "learning_rate": 0.000569317212002836,
      "loss": 3.0719,
      "step": 33459
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.470729112625122,
      "learning_rate": 0.0005693154098469984,
      "loss": 3.054,
      "step": 33460
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6328239440917969,
      "learning_rate": 0.0005693136076410899,
      "loss": 2.9113,
      "step": 33461
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3584195375442505,
      "learning_rate": 0.0005693118053851109,
      "loss": 2.9174,
      "step": 33462
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7058097124099731,
      "learning_rate": 0.0005693100030790619,
      "loss": 3.0222,
      "step": 33463
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7516206502914429,
      "learning_rate": 0.000569308200722943,
      "loss": 2.85,
      "step": 33464
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4855924844741821,
      "learning_rate": 0.0005693063983167546,
      "loss": 3.1064,
      "step": 33465
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.668762445449829,
      "learning_rate": 0.0005693045958604971,
      "loss": 3.1294,
      "step": 33466
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.1215739250183105,
      "learning_rate": 0.0005693027933541707,
      "loss": 3.1841,
      "step": 33467
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5589946508407593,
      "learning_rate": 0.0005693009907977759,
      "loss": 3.1157,
      "step": 33468
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4531669616699219,
      "learning_rate": 0.0005692991881913129,
      "loss": 3.0846,
      "step": 33469
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.37558114528656,
      "learning_rate": 0.0005692973855347822,
      "loss": 3.1791,
      "step": 33470
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.755448818206787,
      "learning_rate": 0.000569295582828184,
      "loss": 2.9724,
      "step": 33471
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3325964212417603,
      "learning_rate": 0.0005692937800715186,
      "loss": 3.021,
      "step": 33472
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3534051179885864,
      "learning_rate": 0.0005692919772647865,
      "loss": 3.0544,
      "step": 33473
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9927984476089478,
      "learning_rate": 0.0005692901744079878,
      "loss": 2.9977,
      "step": 33474
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5896075963974,
      "learning_rate": 0.0005692883715011231,
      "loss": 2.886,
      "step": 33475
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4187427759170532,
      "learning_rate": 0.0005692865685441925,
      "loss": 2.8239,
      "step": 33476
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4567821025848389,
      "learning_rate": 0.0005692847655371965,
      "loss": 2.9827,
      "step": 33477
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4038854837417603,
      "learning_rate": 0.0005692829624801355,
      "loss": 2.9892,
      "step": 33478
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5943665504455566,
      "learning_rate": 0.0005692811593730096,
      "loss": 3.1223,
      "step": 33479
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5586459636688232,
      "learning_rate": 0.0005692793562158192,
      "loss": 3.0218,
      "step": 33480
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6731349229812622,
      "learning_rate": 0.0005692775530085648,
      "loss": 3.2195,
      "step": 33481
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.890954613685608,
      "learning_rate": 0.0005692757497512465,
      "loss": 3.3837,
      "step": 33482
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6055512428283691,
      "learning_rate": 0.0005692739464438648,
      "loss": 3.0972,
      "step": 33483
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7251969575881958,
      "learning_rate": 0.0005692721430864201,
      "loss": 3.1614,
      "step": 33484
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5375148057937622,
      "learning_rate": 0.0005692703396789126,
      "loss": 3.0168,
      "step": 33485
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8786824941635132,
      "learning_rate": 0.0005692685362213426,
      "loss": 2.9905,
      "step": 33486
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3995736837387085,
      "learning_rate": 0.0005692667327137104,
      "loss": 3.1978,
      "step": 33487
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8046897649765015,
      "learning_rate": 0.0005692649291560165,
      "loss": 3.0594,
      "step": 33488
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4841645956039429,
      "learning_rate": 0.0005692631255482612,
      "loss": 3.0496,
      "step": 33489
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2744990587234497,
      "learning_rate": 0.0005692613218904448,
      "loss": 3.2171,
      "step": 33490
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4694575071334839,
      "learning_rate": 0.0005692595181825676,
      "loss": 3.0263,
      "step": 33491
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.699285626411438,
      "learning_rate": 0.00056925771442463,
      "loss": 2.8873,
      "step": 33492
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6106250286102295,
      "learning_rate": 0.0005692559106166323,
      "loss": 3.0993,
      "step": 33493
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5824053287506104,
      "learning_rate": 0.0005692541067585749,
      "loss": 2.99,
      "step": 33494
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7866450548171997,
      "learning_rate": 0.0005692523028504579,
      "loss": 3.1383,
      "step": 33495
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.66133451461792,
      "learning_rate": 0.0005692504988922821,
      "loss": 3.0607,
      "step": 33496
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9950426816940308,
      "learning_rate": 0.0005692486948840474,
      "loss": 3.0276,
      "step": 33497
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5245438814163208,
      "learning_rate": 0.0005692468908257542,
      "loss": 2.9188,
      "step": 33498
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2978079319000244,
      "learning_rate": 0.000569245086717403,
      "loss": 3.3325,
      "step": 33499
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.41478431224823,
      "learning_rate": 0.0005692432825589941,
      "loss": 3.0122,
      "step": 33500
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4594115018844604,
      "learning_rate": 0.0005692414783505278,
      "loss": 3.0248,
      "step": 33501
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3677620887756348,
      "learning_rate": 0.0005692396740920044,
      "loss": 2.8576,
      "step": 33502
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5140992403030396,
      "learning_rate": 0.0005692378697834242,
      "loss": 3.1058,
      "step": 33503
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6093562841415405,
      "learning_rate": 0.0005692360654247875,
      "loss": 2.9925,
      "step": 33504
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.475865364074707,
      "learning_rate": 0.000569234261016095,
      "loss": 3.056,
      "step": 33505
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4269769191741943,
      "learning_rate": 0.0005692324565573466,
      "loss": 2.9929,
      "step": 33506
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.29706609249115,
      "learning_rate": 0.0005692306520485429,
      "loss": 2.9787,
      "step": 33507
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5980737209320068,
      "learning_rate": 0.0005692288474896841,
      "loss": 3.0943,
      "step": 33508
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.691354751586914,
      "learning_rate": 0.0005692270428807705,
      "loss": 3.1935,
      "step": 33509
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9580813646316528,
      "learning_rate": 0.0005692252382218026,
      "loss": 3.041,
      "step": 33510
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.620700478553772,
      "learning_rate": 0.0005692234335127807,
      "loss": 3.008,
      "step": 33511
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.831956386566162,
      "learning_rate": 0.0005692216287537049,
      "loss": 2.8495,
      "step": 33512
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3926942348480225,
      "learning_rate": 0.0005692198239445758,
      "loss": 3.229,
      "step": 33513
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3590039014816284,
      "learning_rate": 0.0005692180190853937,
      "loss": 3.2447,
      "step": 33514
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3854916095733643,
      "learning_rate": 0.0005692162141761588,
      "loss": 2.8145,
      "step": 33515
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5277048349380493,
      "learning_rate": 0.0005692144092168716,
      "loss": 3.1815,
      "step": 33516
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5850837230682373,
      "learning_rate": 0.0005692126042075324,
      "loss": 3.0486,
      "step": 33517
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5271313190460205,
      "learning_rate": 0.0005692107991481413,
      "loss": 3.2032,
      "step": 33518
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7475221157073975,
      "learning_rate": 0.000569208994038699,
      "loss": 2.9201,
      "step": 33519
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4398541450500488,
      "learning_rate": 0.0005692071888792056,
      "loss": 3.0201,
      "step": 33520
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.090334177017212,
      "learning_rate": 0.0005692053836696615,
      "loss": 2.8942,
      "step": 33521
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4909226894378662,
      "learning_rate": 0.0005692035784100672,
      "loss": 3.1773,
      "step": 33522
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.556023359298706,
      "learning_rate": 0.0005692017731004227,
      "loss": 3.0215,
      "step": 33523
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.750624418258667,
      "learning_rate": 0.0005691999677407285,
      "loss": 2.8724,
      "step": 33524
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6063498258590698,
      "learning_rate": 0.0005691981623309851,
      "loss": 3.012,
      "step": 33525
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.732288122177124,
      "learning_rate": 0.0005691963568711925,
      "loss": 3.2232,
      "step": 33526
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.578334093093872,
      "learning_rate": 0.0005691945513613513,
      "loss": 2.9496,
      "step": 33527
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.2587974071502686,
      "learning_rate": 0.0005691927458014618,
      "loss": 3.0751,
      "step": 33528
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5905853509902954,
      "learning_rate": 0.0005691909401915242,
      "loss": 3.1011,
      "step": 33529
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.021265983581543,
      "learning_rate": 0.0005691891345315389,
      "loss": 2.8843,
      "step": 33530
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7774860858917236,
      "learning_rate": 0.0005691873288215063,
      "loss": 3.1375,
      "step": 33531
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8736900091171265,
      "learning_rate": 0.0005691855230614268,
      "loss": 2.9322,
      "step": 33532
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8158038854599,
      "learning_rate": 0.0005691837172513005,
      "loss": 2.9146,
      "step": 33533
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.201669216156006,
      "learning_rate": 0.0005691819113911279,
      "loss": 3.2485,
      "step": 33534
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3392431735992432,
      "learning_rate": 0.0005691801054809093,
      "loss": 3.0986,
      "step": 33535
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5432261228561401,
      "learning_rate": 0.0005691782995206451,
      "loss": 3.0412,
      "step": 33536
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5901780128479004,
      "learning_rate": 0.0005691764935103355,
      "loss": 3.1978,
      "step": 33537
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.47352135181427,
      "learning_rate": 0.0005691746874499809,
      "loss": 3.2,
      "step": 33538
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3430777788162231,
      "learning_rate": 0.0005691728813395817,
      "loss": 2.6994,
      "step": 33539
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4983915090560913,
      "learning_rate": 0.000569171075179138,
      "loss": 3.2978,
      "step": 33540
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6427809000015259,
      "learning_rate": 0.0005691692689686505,
      "loss": 3.1747,
      "step": 33541
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6845601797103882,
      "learning_rate": 0.0005691674627081193,
      "loss": 3.0362,
      "step": 33542
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8616020679473877,
      "learning_rate": 0.0005691656563975449,
      "loss": 3.1799,
      "step": 33543
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.0129504203796387,
      "learning_rate": 0.0005691638500369274,
      "loss": 3.1461,
      "step": 33544
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3339166641235352,
      "learning_rate": 0.0005691620436262673,
      "loss": 3.0753,
      "step": 33545
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.617893099784851,
      "learning_rate": 0.0005691602371655648,
      "loss": 3.1499,
      "step": 33546
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9712709188461304,
      "learning_rate": 0.0005691584306548205,
      "loss": 3.089,
      "step": 33547
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.173039197921753,
      "learning_rate": 0.0005691566240940344,
      "loss": 3.0034,
      "step": 33548
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.31205415725708,
      "learning_rate": 0.0005691548174832072,
      "loss": 3.1281,
      "step": 33549
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3966796398162842,
      "learning_rate": 0.0005691530108223389,
      "loss": 3.099,
      "step": 33550
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3066750764846802,
      "learning_rate": 0.00056915120411143,
      "loss": 3.1692,
      "step": 33551
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4596474170684814,
      "learning_rate": 0.0005691493973504808,
      "loss": 2.9762,
      "step": 33552
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5460087060928345,
      "learning_rate": 0.0005691475905394916,
      "loss": 2.9555,
      "step": 33553
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.2115583419799805,
      "learning_rate": 0.0005691457836784628,
      "loss": 2.8253,
      "step": 33554
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5685302019119263,
      "learning_rate": 0.0005691439767673949,
      "loss": 3.0903,
      "step": 33555
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.3315091133117676,
      "learning_rate": 0.0005691421698062879,
      "loss": 2.9664,
      "step": 33556
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.337598204612732,
      "learning_rate": 0.0005691403627951422,
      "loss": 3.166,
      "step": 33557
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5220974683761597,
      "learning_rate": 0.0005691385557339584,
      "loss": 3.0734,
      "step": 33558
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6077749729156494,
      "learning_rate": 0.0005691367486227365,
      "loss": 2.9506,
      "step": 33559
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3760466575622559,
      "learning_rate": 0.0005691349414614771,
      "loss": 3.035,
      "step": 33560
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.130131721496582,
      "learning_rate": 0.0005691331342501804,
      "loss": 2.9803,
      "step": 33561
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3887851238250732,
      "learning_rate": 0.0005691313269888469,
      "loss": 3.1731,
      "step": 33562
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5591886043548584,
      "learning_rate": 0.0005691295196774766,
      "loss": 3.1451,
      "step": 33563
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5736459493637085,
      "learning_rate": 0.0005691277123160702,
      "loss": 3.2179,
      "step": 33564
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7716245651245117,
      "learning_rate": 0.0005691259049046278,
      "loss": 3.0396,
      "step": 33565
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6729304790496826,
      "learning_rate": 0.0005691240974431497,
      "loss": 3.2162,
      "step": 33566
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.2491023540496826,
      "learning_rate": 0.0005691222899316366,
      "loss": 3.0649,
      "step": 33567
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5850982666015625,
      "learning_rate": 0.0005691204823700884,
      "loss": 3.0096,
      "step": 33568
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.532995343208313,
      "learning_rate": 0.0005691186747585058,
      "loss": 2.8772,
      "step": 33569
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.644286036491394,
      "learning_rate": 0.0005691168670968888,
      "loss": 3.1278,
      "step": 33570
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8439466953277588,
      "learning_rate": 0.000569115059385238,
      "loss": 3.0434,
      "step": 33571
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3994131088256836,
      "learning_rate": 0.0005691132516235535,
      "loss": 3.3319,
      "step": 33572
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4739749431610107,
      "learning_rate": 0.0005691114438118359,
      "loss": 2.9066,
      "step": 33573
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6638234853744507,
      "learning_rate": 0.0005691096359500853,
      "loss": 3.3543,
      "step": 33574
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5449650287628174,
      "learning_rate": 0.0005691078280383021,
      "loss": 2.9569,
      "step": 33575
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6059938669204712,
      "learning_rate": 0.0005691060200764868,
      "loss": 3.0846,
      "step": 33576
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3921170234680176,
      "learning_rate": 0.0005691042120646396,
      "loss": 3.1599,
      "step": 33577
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.254824638366699,
      "learning_rate": 0.0005691024040027608,
      "loss": 2.8698,
      "step": 33578
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4997098445892334,
      "learning_rate": 0.0005691005958908508,
      "loss": 3.0084,
      "step": 33579
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3658591508865356,
      "learning_rate": 0.0005690987877289099,
      "loss": 3.0023,
      "step": 33580
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7051137685775757,
      "learning_rate": 0.0005690969795169385,
      "loss": 3.1728,
      "step": 33581
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4282567501068115,
      "learning_rate": 0.0005690951712549368,
      "loss": 3.1991,
      "step": 33582
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.675775170326233,
      "learning_rate": 0.0005690933629429053,
      "loss": 3.1268,
      "step": 33583
    },
    {
      "epoch": 0.44,
      "grad_norm": 3.1990694999694824,
      "learning_rate": 0.0005690915545808443,
      "loss": 2.9653,
      "step": 33584
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.7577619552612305,
      "learning_rate": 0.000569089746168754,
      "loss": 2.9964,
      "step": 33585
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9502592086791992,
      "learning_rate": 0.0005690879377066349,
      "loss": 2.89,
      "step": 33586
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9831500053405762,
      "learning_rate": 0.0005690861291944873,
      "loss": 3.2247,
      "step": 33587
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5146647691726685,
      "learning_rate": 0.0005690843206323114,
      "loss": 3.1198,
      "step": 33588
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4420400857925415,
      "learning_rate": 0.0005690825120201078,
      "loss": 3.0868,
      "step": 33589
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.376997470855713,
      "learning_rate": 0.0005690807033578765,
      "loss": 3.1249,
      "step": 33590
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.808181881904602,
      "learning_rate": 0.0005690788946456181,
      "loss": 3.014,
      "step": 33591
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7783583402633667,
      "learning_rate": 0.000569077085883333,
      "loss": 3.2434,
      "step": 33592
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.58661687374115,
      "learning_rate": 0.0005690752770710212,
      "loss": 2.9164,
      "step": 33593
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5497897863388062,
      "learning_rate": 0.0005690734682086833,
      "loss": 3.1798,
      "step": 33594
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.765727996826172,
      "learning_rate": 0.0005690716592963196,
      "loss": 3.1035,
      "step": 33595
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5084978342056274,
      "learning_rate": 0.0005690698503339303,
      "loss": 3.1342,
      "step": 33596
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8108010292053223,
      "learning_rate": 0.0005690680413215159,
      "loss": 3.0872,
      "step": 33597
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.1269383430480957,
      "learning_rate": 0.0005690662322590767,
      "loss": 3.2492,
      "step": 33598
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4055111408233643,
      "learning_rate": 0.0005690644231466129,
      "loss": 3.2937,
      "step": 33599
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3595168590545654,
      "learning_rate": 0.000569062613984125,
      "loss": 3.0829,
      "step": 33600
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.1195719242095947,
      "learning_rate": 0.0005690608047716134,
      "loss": 2.985,
      "step": 33601
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.708232045173645,
      "learning_rate": 0.0005690589955090782,
      "loss": 3.0456,
      "step": 33602
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.354644775390625,
      "learning_rate": 0.0005690571861965198,
      "loss": 2.9029,
      "step": 33603
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8469898700714111,
      "learning_rate": 0.0005690553768339387,
      "loss": 3.1464,
      "step": 33604
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.435131311416626,
      "learning_rate": 0.0005690535674213351,
      "loss": 3.2269,
      "step": 33605
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.721902847290039,
      "learning_rate": 0.0005690517579587094,
      "loss": 3.1545,
      "step": 33606
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6141892671585083,
      "learning_rate": 0.0005690499484460618,
      "loss": 2.9803,
      "step": 33607
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7294907569885254,
      "learning_rate": 0.0005690481388833927,
      "loss": 3.0157,
      "step": 33608
    },
    {
      "epoch": 0.44,
      "grad_norm": 4.193996906280518,
      "learning_rate": 0.0005690463292707026,
      "loss": 3.1411,
      "step": 33609
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.342879295349121,
      "learning_rate": 0.0005690445196079916,
      "loss": 3.02,
      "step": 33610
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.010932207107544,
      "learning_rate": 0.0005690427098952603,
      "loss": 3.1368,
      "step": 33611
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7078756093978882,
      "learning_rate": 0.0005690409001325087,
      "loss": 3.0065,
      "step": 33612
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.3670694828033447,
      "learning_rate": 0.0005690390903197374,
      "loss": 3.0333,
      "step": 33613
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.1913774013519287,
      "learning_rate": 0.0005690372804569467,
      "loss": 3.3936,
      "step": 33614
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4066301584243774,
      "learning_rate": 0.0005690354705441368,
      "loss": 3.1491,
      "step": 33615
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.0280776023864746,
      "learning_rate": 0.0005690336605813083,
      "loss": 3.0062,
      "step": 33616
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.481595754623413,
      "learning_rate": 0.0005690318505684613,
      "loss": 3.1643,
      "step": 33617
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6777840852737427,
      "learning_rate": 0.000569030040505596,
      "loss": 3.1478,
      "step": 33618
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6294195652008057,
      "learning_rate": 0.0005690282303927131,
      "loss": 3.0181,
      "step": 33619
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.949973702430725,
      "learning_rate": 0.0005690264202298128,
      "loss": 3.0824,
      "step": 33620
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.8756446838378906,
      "learning_rate": 0.0005690246100168954,
      "loss": 2.7799,
      "step": 33621
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.818785548210144,
      "learning_rate": 0.0005690227997539613,
      "loss": 3.1252,
      "step": 33622
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.5635600090026855,
      "learning_rate": 0.0005690209894410106,
      "loss": 3.3528,
      "step": 33623
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.5371077060699463,
      "learning_rate": 0.0005690191790780439,
      "loss": 3.0688,
      "step": 33624
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.085965156555176,
      "learning_rate": 0.0005690173686650615,
      "loss": 3.0213,
      "step": 33625
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2243314981460571,
      "learning_rate": 0.0005690155582020636,
      "loss": 3.0706,
      "step": 33626
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.733535885810852,
      "learning_rate": 0.0005690137476890507,
      "loss": 3.0588,
      "step": 33627
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8303674459457397,
      "learning_rate": 0.0005690119371260231,
      "loss": 3.4016,
      "step": 33628
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7279459238052368,
      "learning_rate": 0.0005690101265129811,
      "loss": 3.0446,
      "step": 33629
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6435045003890991,
      "learning_rate": 0.000569008315849925,
      "loss": 3.1745,
      "step": 33630
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4377542734146118,
      "learning_rate": 0.0005690065051368551,
      "loss": 2.9077,
      "step": 33631
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6549501419067383,
      "learning_rate": 0.0005690046943737719,
      "loss": 3.1501,
      "step": 33632
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6210225820541382,
      "learning_rate": 0.0005690028835606758,
      "loss": 3.0135,
      "step": 33633
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.351754069328308,
      "learning_rate": 0.0005690010726975668,
      "loss": 3.2043,
      "step": 33634
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.0279769897460938,
      "learning_rate": 0.0005689992617844453,
      "loss": 3.1343,
      "step": 33635
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.175325393676758,
      "learning_rate": 0.000568997450821312,
      "loss": 3.1207,
      "step": 33636
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4760464429855347,
      "learning_rate": 0.0005689956398081669,
      "loss": 3.0882,
      "step": 33637
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.066441774368286,
      "learning_rate": 0.0005689938287450105,
      "loss": 2.8647,
      "step": 33638
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.2320945262908936,
      "learning_rate": 0.0005689920176318429,
      "loss": 3.0845,
      "step": 33639
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.621437907218933,
      "learning_rate": 0.0005689902064686648,
      "loss": 3.17,
      "step": 33640
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6676234006881714,
      "learning_rate": 0.0005689883952554762,
      "loss": 2.795,
      "step": 33641
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7108708620071411,
      "learning_rate": 0.0005689865839922777,
      "loss": 3.1663,
      "step": 33642
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5008341073989868,
      "learning_rate": 0.0005689847726790694,
      "loss": 3.1788,
      "step": 33643
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4160794019699097,
      "learning_rate": 0.0005689829613158518,
      "loss": 3.1294,
      "step": 33644
    },
    {
      "epoch": 0.44,
      "grad_norm": 4.072478294372559,
      "learning_rate": 0.0005689811499026252,
      "loss": 3.0582,
      "step": 33645
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.3196072578430176,
      "learning_rate": 0.00056897933843939,
      "loss": 3.0093,
      "step": 33646
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3545167446136475,
      "learning_rate": 0.0005689775269261463,
      "loss": 3.2197,
      "step": 33647
    },
    {
      "epoch": 0.44,
      "grad_norm": 3.620058059692383,
      "learning_rate": 0.0005689757153628947,
      "loss": 3.3785,
      "step": 33648
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.545301675796509,
      "learning_rate": 0.0005689739037496354,
      "loss": 3.0183,
      "step": 33649
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.2204504013061523,
      "learning_rate": 0.0005689720920863689,
      "loss": 3.1691,
      "step": 33650
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.206200361251831,
      "learning_rate": 0.0005689702803730952,
      "loss": 2.9784,
      "step": 33651
    },
    {
      "epoch": 0.44,
      "grad_norm": 3.329120635986328,
      "learning_rate": 0.000568968468609815,
      "loss": 3.014,
      "step": 33652
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.2518723011016846,
      "learning_rate": 0.0005689666567965285,
      "loss": 3.2538,
      "step": 33653
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.556312918663025,
      "learning_rate": 0.0005689648449332359,
      "loss": 3.1669,
      "step": 33654
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2345069646835327,
      "learning_rate": 0.0005689630330199378,
      "loss": 3.116,
      "step": 33655
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5878181457519531,
      "learning_rate": 0.0005689612210566342,
      "loss": 2.8545,
      "step": 33656
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.4307448863983154,
      "learning_rate": 0.0005689594090433259,
      "loss": 3.0088,
      "step": 33657
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8012064695358276,
      "learning_rate": 0.0005689575969800128,
      "loss": 3.0601,
      "step": 33658
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.671886920928955,
      "learning_rate": 0.0005689557848666954,
      "loss": 3.2302,
      "step": 33659
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9002617597579956,
      "learning_rate": 0.0005689539727033741,
      "loss": 2.9239,
      "step": 33660
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.7021446228027344,
      "learning_rate": 0.000568952160490049,
      "loss": 2.8554,
      "step": 33661
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4199855327606201,
      "learning_rate": 0.0005689503482267209,
      "loss": 3.0263,
      "step": 33662
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8873510360717773,
      "learning_rate": 0.0005689485359133897,
      "loss": 3.074,
      "step": 33663
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.63535475730896,
      "learning_rate": 0.0005689467235500559,
      "loss": 2.8916,
      "step": 33664
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6932227611541748,
      "learning_rate": 0.0005689449111367198,
      "loss": 2.6786,
      "step": 33665
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7644031047821045,
      "learning_rate": 0.0005689430986733818,
      "loss": 3.1727,
      "step": 33666
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.479294776916504,
      "learning_rate": 0.0005689412861600423,
      "loss": 3.0267,
      "step": 33667
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3547443151474,
      "learning_rate": 0.0005689394735967013,
      "loss": 3.1708,
      "step": 33668
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.18658447265625,
      "learning_rate": 0.0005689376609833596,
      "loss": 2.9354,
      "step": 33669
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5025701522827148,
      "learning_rate": 0.0005689358483200172,
      "loss": 3.1573,
      "step": 33670
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7129557132720947,
      "learning_rate": 0.0005689340356066745,
      "loss": 2.8206,
      "step": 33671
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.266878843307495,
      "learning_rate": 0.0005689322228433321,
      "loss": 2.822,
      "step": 33672
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.82039737701416,
      "learning_rate": 0.0005689304100299899,
      "loss": 2.9573,
      "step": 33673
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5804827213287354,
      "learning_rate": 0.0005689285971666486,
      "loss": 3.0919,
      "step": 33674
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5102448463439941,
      "learning_rate": 0.0005689267842533083,
      "loss": 2.9529,
      "step": 33675
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.169166088104248,
      "learning_rate": 0.0005689249712899694,
      "loss": 3.1089,
      "step": 33676
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6815266609191895,
      "learning_rate": 0.0005689231582766325,
      "loss": 2.982,
      "step": 33677
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1894495487213135,
      "learning_rate": 0.0005689213452132975,
      "loss": 3.0836,
      "step": 33678
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4662978649139404,
      "learning_rate": 0.0005689195320999651,
      "loss": 2.9715,
      "step": 33679
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4663373231887817,
      "learning_rate": 0.0005689177189366353,
      "loss": 2.9684,
      "step": 33680
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7314294576644897,
      "learning_rate": 0.0005689159057233087,
      "loss": 3.1438,
      "step": 33681
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.562748908996582,
      "learning_rate": 0.0005689140924599855,
      "loss": 3.0723,
      "step": 33682
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8286330699920654,
      "learning_rate": 0.0005689122791466663,
      "loss": 3.0854,
      "step": 33683
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3629786968231201,
      "learning_rate": 0.000568910465783351,
      "loss": 2.8583,
      "step": 33684
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3299262523651123,
      "learning_rate": 0.0005689086523700403,
      "loss": 2.8552,
      "step": 33685
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5746828317642212,
      "learning_rate": 0.0005689068389067343,
      "loss": 2.8159,
      "step": 33686
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3973897695541382,
      "learning_rate": 0.0005689050253934335,
      "loss": 2.9913,
      "step": 33687
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.451797366142273,
      "learning_rate": 0.0005689032118301382,
      "loss": 3.0337,
      "step": 33688
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4487794637680054,
      "learning_rate": 0.0005689013982168487,
      "loss": 3.2211,
      "step": 33689
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8121364116668701,
      "learning_rate": 0.0005688995845535654,
      "loss": 2.8987,
      "step": 33690
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9395662546157837,
      "learning_rate": 0.0005688977708402886,
      "loss": 2.8704,
      "step": 33691
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7718263864517212,
      "learning_rate": 0.0005688959570770185,
      "loss": 3.1487,
      "step": 33692
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.548534870147705,
      "learning_rate": 0.0005688941432637557,
      "loss": 2.9718,
      "step": 33693
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.727198839187622,
      "learning_rate": 0.0005688923294005004,
      "loss": 3.1587,
      "step": 33694
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6082531213760376,
      "learning_rate": 0.0005688905154872528,
      "loss": 3.0591,
      "step": 33695
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.400486946105957,
      "learning_rate": 0.0005688887015240135,
      "loss": 2.9658,
      "step": 33696
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5599992275238037,
      "learning_rate": 0.0005688868875107826,
      "loss": 3.2564,
      "step": 33697
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9269622564315796,
      "learning_rate": 0.0005688850734475606,
      "loss": 3.0552,
      "step": 33698
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4452310800552368,
      "learning_rate": 0.0005688832593343479,
      "loss": 3.2075,
      "step": 33699
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.2377004623413086,
      "learning_rate": 0.0005688814451711446,
      "loss": 2.9184,
      "step": 33700
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.360121965408325,
      "learning_rate": 0.0005688796309579513,
      "loss": 2.5958,
      "step": 33701
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7763407230377197,
      "learning_rate": 0.000568877816694768,
      "loss": 3.0681,
      "step": 33702
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.657400369644165,
      "learning_rate": 0.0005688760023815953,
      "loss": 2.8403,
      "step": 33703
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.943317174911499,
      "learning_rate": 0.0005688741880184336,
      "loss": 3.1795,
      "step": 33704
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.505386233329773,
      "learning_rate": 0.000568872373605283,
      "loss": 3.0007,
      "step": 33705
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4439560174942017,
      "learning_rate": 0.000568870559142144,
      "loss": 2.9752,
      "step": 33706
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8061139583587646,
      "learning_rate": 0.0005688687446290169,
      "loss": 3.0068,
      "step": 33707
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4329583644866943,
      "learning_rate": 0.000568866930065902,
      "loss": 3.2926,
      "step": 33708
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4247688055038452,
      "learning_rate": 0.0005688651154527997,
      "loss": 3.2365,
      "step": 33709
    },
    {
      "epoch": 0.44,
      "grad_norm": 3.1513564586639404,
      "learning_rate": 0.0005688633007897103,
      "loss": 3.0781,
      "step": 33710
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3789817094802856,
      "learning_rate": 0.0005688614860766341,
      "loss": 3.0477,
      "step": 33711
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.328109622001648,
      "learning_rate": 0.0005688596713135715,
      "loss": 2.9204,
      "step": 33712
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.537552833557129,
      "learning_rate": 0.0005688578565005227,
      "loss": 3.112,
      "step": 33713
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.43165922164917,
      "learning_rate": 0.0005688560416374882,
      "loss": 3.118,
      "step": 33714
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4569971561431885,
      "learning_rate": 0.0005688542267244683,
      "loss": 3.1669,
      "step": 33715
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4710893630981445,
      "learning_rate": 0.0005688524117614634,
      "loss": 2.9103,
      "step": 33716
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7935622930526733,
      "learning_rate": 0.0005688505967484736,
      "loss": 3.1781,
      "step": 33717
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5086818933486938,
      "learning_rate": 0.0005688487816854996,
      "loss": 3.0596,
      "step": 33718
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8930635452270508,
      "learning_rate": 0.0005688469665725414,
      "loss": 3.0864,
      "step": 33719
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6821062564849854,
      "learning_rate": 0.0005688451514095995,
      "loss": 2.9876,
      "step": 33720
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6129533052444458,
      "learning_rate": 0.0005688433361966743,
      "loss": 3.1258,
      "step": 33721
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3421061038970947,
      "learning_rate": 0.0005688415209337659,
      "loss": 2.8452,
      "step": 33722
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6818698644638062,
      "learning_rate": 0.000568839705620875,
      "loss": 3.0856,
      "step": 33723
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.089308738708496,
      "learning_rate": 0.0005688378902580015,
      "loss": 2.9086,
      "step": 33724
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3946040868759155,
      "learning_rate": 0.0005688360748451461,
      "loss": 2.9873,
      "step": 33725
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.064485549926758,
      "learning_rate": 0.0005688342593823089,
      "loss": 3.083,
      "step": 33726
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4897820949554443,
      "learning_rate": 0.0005688324438694903,
      "loss": 3.1507,
      "step": 33727
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6419223546981812,
      "learning_rate": 0.0005688306283066908,
      "loss": 3.2066,
      "step": 33728
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4970626831054688,
      "learning_rate": 0.0005688288126939107,
      "loss": 3.0177,
      "step": 33729
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4405661821365356,
      "learning_rate": 0.0005688269970311501,
      "loss": 2.9707,
      "step": 33730
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4508886337280273,
      "learning_rate": 0.0005688251813184094,
      "loss": 3.1593,
      "step": 33731
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.339322805404663,
      "learning_rate": 0.0005688233655556892,
      "loss": 2.8329,
      "step": 33732
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.664060354232788,
      "learning_rate": 0.0005688215497429896,
      "loss": 3.187,
      "step": 33733
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6612787246704102,
      "learning_rate": 0.0005688197338803109,
      "loss": 2.8428,
      "step": 33734
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6427927017211914,
      "learning_rate": 0.0005688179179676537,
      "loss": 3.0829,
      "step": 33735
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6059516668319702,
      "learning_rate": 0.0005688161020050181,
      "loss": 3.1104,
      "step": 33736
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4527074098587036,
      "learning_rate": 0.0005688142859924046,
      "loss": 3.088,
      "step": 33737
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.441320776939392,
      "learning_rate": 0.0005688124699298133,
      "loss": 2.9109,
      "step": 33738
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.568455696105957,
      "learning_rate": 0.0005688106538172447,
      "loss": 3.1656,
      "step": 33739
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5302647352218628,
      "learning_rate": 0.0005688088376546993,
      "loss": 3.1034,
      "step": 33740
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6315431594848633,
      "learning_rate": 0.0005688070214421771,
      "loss": 3.1532,
      "step": 33741
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.540079951286316,
      "learning_rate": 0.0005688052051796787,
      "loss": 3.1055,
      "step": 33742
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.2963054180145264,
      "learning_rate": 0.0005688033888672043,
      "loss": 3.0382,
      "step": 33743
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.14353346824646,
      "learning_rate": 0.0005688015725047542,
      "loss": 2.9191,
      "step": 33744
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9398930072784424,
      "learning_rate": 0.0005687997560923288,
      "loss": 2.7986,
      "step": 33745
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5471466779708862,
      "learning_rate": 0.0005687979396299286,
      "loss": 3.1384,
      "step": 33746
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9333467483520508,
      "learning_rate": 0.0005687961231175537,
      "loss": 3.3321,
      "step": 33747
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.358029842376709,
      "learning_rate": 0.0005687943065552045,
      "loss": 3.0206,
      "step": 33748
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6669102907180786,
      "learning_rate": 0.0005687924899428814,
      "loss": 3.0935,
      "step": 33749
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.415121555328369,
      "learning_rate": 0.0005687906732805847,
      "loss": 2.9114,
      "step": 33750
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6636385917663574,
      "learning_rate": 0.0005687888565683147,
      "loss": 3.0446,
      "step": 33751
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3780133724212646,
      "learning_rate": 0.0005687870398060719,
      "loss": 2.9361,
      "step": 33752
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9036322832107544,
      "learning_rate": 0.0005687852229938564,
      "loss": 3.0648,
      "step": 33753
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2362849712371826,
      "learning_rate": 0.0005687834061316687,
      "loss": 3.1463,
      "step": 33754
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.1371378898620605,
      "learning_rate": 0.0005687815892195091,
      "loss": 3.0812,
      "step": 33755
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2612324953079224,
      "learning_rate": 0.000568779772257378,
      "loss": 3.1442,
      "step": 33756
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4580389261245728,
      "learning_rate": 0.0005687779552452755,
      "loss": 3.3671,
      "step": 33757
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8047305345535278,
      "learning_rate": 0.0005687761381832022,
      "loss": 3.0971,
      "step": 33758
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4475826025009155,
      "learning_rate": 0.0005687743210711583,
      "loss": 2.9214,
      "step": 33759
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.50697922706604,
      "learning_rate": 0.0005687725039091442,
      "loss": 3.1481,
      "step": 33760
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2983797788619995,
      "learning_rate": 0.0005687706866971602,
      "loss": 3.2272,
      "step": 33761
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.563077688217163,
      "learning_rate": 0.0005687688694352068,
      "loss": 3.106,
      "step": 33762
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.474984049797058,
      "learning_rate": 0.000568767052123284,
      "loss": 3.1616,
      "step": 33763
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.0445053577423096,
      "learning_rate": 0.0005687652347613924,
      "loss": 2.6285,
      "step": 33764
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5674700736999512,
      "learning_rate": 0.0005687634173495322,
      "loss": 2.8802,
      "step": 33765
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6316208839416504,
      "learning_rate": 0.000568761599887704,
      "loss": 3.0354,
      "step": 33766
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.638939380645752,
      "learning_rate": 0.0005687597823759078,
      "loss": 3.2443,
      "step": 33767
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6386103630065918,
      "learning_rate": 0.0005687579648141441,
      "loss": 3.2974,
      "step": 33768
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6427193880081177,
      "learning_rate": 0.0005687561472024133,
      "loss": 2.907,
      "step": 33769
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7592400312423706,
      "learning_rate": 0.0005687543295407156,
      "loss": 3.0104,
      "step": 33770
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.0255753993988037,
      "learning_rate": 0.0005687525118290513,
      "loss": 3.092,
      "step": 33771
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7545721530914307,
      "learning_rate": 0.000568750694067421,
      "loss": 3.0658,
      "step": 33772
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7308695316314697,
      "learning_rate": 0.0005687488762558247,
      "loss": 2.965,
      "step": 33773
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5083422660827637,
      "learning_rate": 0.000568747058394263,
      "loss": 2.7131,
      "step": 33774
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.3253695964813232,
      "learning_rate": 0.0005687452404827362,
      "loss": 2.9819,
      "step": 33775
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.695897102355957,
      "learning_rate": 0.0005687434225212446,
      "loss": 2.9451,
      "step": 33776
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5777171850204468,
      "learning_rate": 0.0005687416045097885,
      "loss": 2.8508,
      "step": 33777
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.777588129043579,
      "learning_rate": 0.0005687397864483681,
      "loss": 2.8583,
      "step": 33778
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6337928771972656,
      "learning_rate": 0.0005687379683369841,
      "loss": 2.9493,
      "step": 33779
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.040309429168701,
      "learning_rate": 0.0005687361501756365,
      "loss": 3.1398,
      "step": 33780
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.111494302749634,
      "learning_rate": 0.0005687343319643259,
      "loss": 3.0778,
      "step": 33781
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.808066487312317,
      "learning_rate": 0.0005687325137030524,
      "loss": 3.0716,
      "step": 33782
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.126542806625366,
      "learning_rate": 0.0005687306953918165,
      "loss": 2.974,
      "step": 33783
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8181943893432617,
      "learning_rate": 0.0005687288770306185,
      "loss": 3.1958,
      "step": 33784
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4248019456863403,
      "learning_rate": 0.0005687270586194588,
      "loss": 3.0405,
      "step": 33785
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.173541784286499,
      "learning_rate": 0.0005687252401583376,
      "loss": 3.048,
      "step": 33786
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5710551738739014,
      "learning_rate": 0.0005687234216472553,
      "loss": 3.0448,
      "step": 33787
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4486130475997925,
      "learning_rate": 0.0005687216030862123,
      "loss": 3.0415,
      "step": 33788
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4272644519805908,
      "learning_rate": 0.0005687197844752088,
      "loss": 3.1209,
      "step": 33789
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6971739530563354,
      "learning_rate": 0.0005687179658142453,
      "loss": 3.1379,
      "step": 33790
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8760799169540405,
      "learning_rate": 0.000568716147103322,
      "loss": 3.2479,
      "step": 33791
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8429181575775146,
      "learning_rate": 0.0005687143283424393,
      "loss": 2.9617,
      "step": 33792
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6226125955581665,
      "learning_rate": 0.0005687125095315974,
      "loss": 3.298,
      "step": 33793
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4736480712890625,
      "learning_rate": 0.000568710690670797,
      "loss": 2.896,
      "step": 33794
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.608237862586975,
      "learning_rate": 0.0005687088717600381,
      "loss": 2.9812,
      "step": 33795
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.467241883277893,
      "learning_rate": 0.0005687070527993211,
      "loss": 3.1476,
      "step": 33796
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8484904766082764,
      "learning_rate": 0.0005687052337886465,
      "loss": 3.3165,
      "step": 33797
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.0341904163360596,
      "learning_rate": 0.0005687034147280146,
      "loss": 3.1876,
      "step": 33798
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.776771903038025,
      "learning_rate": 0.0005687015956174254,
      "loss": 3.1917,
      "step": 33799
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.083688974380493,
      "learning_rate": 0.0005686997764568797,
      "loss": 3.1547,
      "step": 33800
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5736912488937378,
      "learning_rate": 0.0005686979572463776,
      "loss": 2.9996,
      "step": 33801
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6212009191513062,
      "learning_rate": 0.0005686961379859195,
      "loss": 3.2057,
      "step": 33802
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7719279527664185,
      "learning_rate": 0.0005686943186755056,
      "loss": 3.0664,
      "step": 33803
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8289604187011719,
      "learning_rate": 0.0005686924993151364,
      "loss": 3.1583,
      "step": 33804
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5425066947937012,
      "learning_rate": 0.0005686906799048123,
      "loss": 3.0324,
      "step": 33805
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3739339113235474,
      "learning_rate": 0.0005686888604445335,
      "loss": 3.0231,
      "step": 33806
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.849520206451416,
      "learning_rate": 0.0005686870409343003,
      "loss": 2.9674,
      "step": 33807
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.553871989250183,
      "learning_rate": 0.0005686852213741131,
      "loss": 3.2317,
      "step": 33808
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3800508975982666,
      "learning_rate": 0.0005686834017639723,
      "loss": 3.228,
      "step": 33809
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9913146495819092,
      "learning_rate": 0.0005686815821038781,
      "loss": 2.8749,
      "step": 33810
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7888286113739014,
      "learning_rate": 0.0005686797623938311,
      "loss": 2.8121,
      "step": 33811
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9156804084777832,
      "learning_rate": 0.0005686779426338313,
      "loss": 3.2367,
      "step": 33812
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.4471826553344727,
      "learning_rate": 0.0005686761228238793,
      "loss": 3.171,
      "step": 33813
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6786653995513916,
      "learning_rate": 0.0005686743029639752,
      "loss": 2.9817,
      "step": 33814
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3361835479736328,
      "learning_rate": 0.0005686724830541195,
      "loss": 3.1611,
      "step": 33815
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.1827337741851807,
      "learning_rate": 0.0005686706630943127,
      "loss": 2.9869,
      "step": 33816
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9769033193588257,
      "learning_rate": 0.0005686688430845548,
      "loss": 3.2476,
      "step": 33817
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.903167963027954,
      "learning_rate": 0.0005686670230248463,
      "loss": 2.741,
      "step": 33818
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7298047542572021,
      "learning_rate": 0.0005686652029151876,
      "loss": 2.9964,
      "step": 33819
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.562234878540039,
      "learning_rate": 0.0005686633827555788,
      "loss": 2.9062,
      "step": 33820
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5708558559417725,
      "learning_rate": 0.0005686615625460205,
      "loss": 3.3437,
      "step": 33821
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5619919300079346,
      "learning_rate": 0.000568659742286513,
      "loss": 3.1898,
      "step": 33822
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4121053218841553,
      "learning_rate": 0.0005686579219770565,
      "loss": 3.1672,
      "step": 33823
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6231862306594849,
      "learning_rate": 0.0005686561016176515,
      "loss": 2.9526,
      "step": 33824
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8383375406265259,
      "learning_rate": 0.0005686542812082982,
      "loss": 3.0624,
      "step": 33825
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.317980170249939,
      "learning_rate": 0.000568652460748997,
      "loss": 3.1038,
      "step": 33826
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4361705780029297,
      "learning_rate": 0.0005686506402397483,
      "loss": 2.9944,
      "step": 33827
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.433552622795105,
      "learning_rate": 0.0005686488196805523,
      "loss": 3.1989,
      "step": 33828
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8962428569793701,
      "learning_rate": 0.0005686469990714095,
      "loss": 2.9608,
      "step": 33829
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.032010555267334,
      "learning_rate": 0.0005686451784123201,
      "loss": 3.0578,
      "step": 33830
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9493629932403564,
      "learning_rate": 0.0005686433577032844,
      "loss": 2.9024,
      "step": 33831
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.4922385215759277,
      "learning_rate": 0.000568641536944303,
      "loss": 2.9352,
      "step": 33832
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6888329982757568,
      "learning_rate": 0.000568639716135376,
      "loss": 2.9107,
      "step": 33833
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.5642645359039307,
      "learning_rate": 0.0005686378952765037,
      "loss": 3.1751,
      "step": 33834
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.4871976375579834,
      "learning_rate": 0.0005686360743676867,
      "loss": 3.1799,
      "step": 33835
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8420636653900146,
      "learning_rate": 0.0005686342534089251,
      "loss": 3.3123,
      "step": 33836
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.52558171749115,
      "learning_rate": 0.0005686324324002193,
      "loss": 3.1244,
      "step": 33837
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7010297775268555,
      "learning_rate": 0.0005686306113415697,
      "loss": 2.9945,
      "step": 33838
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.395928382873535,
      "learning_rate": 0.0005686287902329767,
      "loss": 3.2479,
      "step": 33839
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5434324741363525,
      "learning_rate": 0.0005686269690744403,
      "loss": 3.0174,
      "step": 33840
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.296612501144409,
      "learning_rate": 0.0005686251478659612,
      "loss": 2.8824,
      "step": 33841
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6667872667312622,
      "learning_rate": 0.0005686233266075396,
      "loss": 2.9363,
      "step": 33842
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.418652057647705,
      "learning_rate": 0.0005686215052991759,
      "loss": 2.9158,
      "step": 33843
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.602203607559204,
      "learning_rate": 0.0005686196839408704,
      "loss": 2.8748,
      "step": 33844
    },
    {
      "epoch": 0.44,
      "grad_norm": 3.341495990753174,
      "learning_rate": 0.0005686178625326233,
      "loss": 2.9298,
      "step": 33845
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6023331880569458,
      "learning_rate": 0.0005686160410744353,
      "loss": 2.8693,
      "step": 33846
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.407594919204712,
      "learning_rate": 0.0005686142195663064,
      "loss": 3.2333,
      "step": 33847
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7329466342926025,
      "learning_rate": 0.000568612398008237,
      "loss": 3.0298,
      "step": 33848
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9930449724197388,
      "learning_rate": 0.0005686105764002275,
      "loss": 3.0744,
      "step": 33849
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7628662586212158,
      "learning_rate": 0.0005686087547422782,
      "loss": 3.2656,
      "step": 33850
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.119011640548706,
      "learning_rate": 0.0005686069330343895,
      "loss": 3.2149,
      "step": 33851
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9509172439575195,
      "learning_rate": 0.0005686051112765617,
      "loss": 3.0876,
      "step": 33852
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5216881036758423,
      "learning_rate": 0.0005686032894687951,
      "loss": 3.3406,
      "step": 33853
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6061269044876099,
      "learning_rate": 0.0005686014676110901,
      "loss": 3.0781,
      "step": 33854
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.0029959678649902,
      "learning_rate": 0.000568599645703447,
      "loss": 3.014,
      "step": 33855
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.997464179992676,
      "learning_rate": 0.0005685978237458662,
      "loss": 3.0646,
      "step": 33856
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9301698207855225,
      "learning_rate": 0.000568596001738348,
      "loss": 2.88,
      "step": 33857
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.561160683631897,
      "learning_rate": 0.0005685941796808927,
      "loss": 3.3116,
      "step": 33858
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7351645231246948,
      "learning_rate": 0.0005685923575735007,
      "loss": 3.1624,
      "step": 33859
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9276585578918457,
      "learning_rate": 0.0005685905354161722,
      "loss": 3.2893,
      "step": 33860
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6105592250823975,
      "learning_rate": 0.0005685887132089078,
      "loss": 2.8963,
      "step": 33861
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1666030883789062,
      "learning_rate": 0.0005685868909517076,
      "loss": 3.0793,
      "step": 33862
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9166065454483032,
      "learning_rate": 0.0005685850686445722,
      "loss": 3.1863,
      "step": 33863
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4195961952209473,
      "learning_rate": 0.0005685832462875016,
      "loss": 3.1666,
      "step": 33864
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4904052019119263,
      "learning_rate": 0.0005685814238804962,
      "loss": 3.1731,
      "step": 33865
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8786946535110474,
      "learning_rate": 0.0005685796014235566,
      "loss": 2.8926,
      "step": 33866
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6184033155441284,
      "learning_rate": 0.0005685777789166829,
      "loss": 2.7772,
      "step": 33867
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4355548620224,
      "learning_rate": 0.0005685759563598756,
      "loss": 2.996,
      "step": 33868
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.459167242050171,
      "learning_rate": 0.000568574133753135,
      "loss": 2.9779,
      "step": 33869
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6943919658660889,
      "learning_rate": 0.0005685723110964612,
      "loss": 3.1331,
      "step": 33870
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6666821241378784,
      "learning_rate": 0.0005685704883898549,
      "loss": 2.9698,
      "step": 33871
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4645648002624512,
      "learning_rate": 0.0005685686656333164,
      "loss": 3.3229,
      "step": 33872
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8676711320877075,
      "learning_rate": 0.0005685668428268456,
      "loss": 3.1309,
      "step": 33873
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.023541212081909,
      "learning_rate": 0.0005685650199704434,
      "loss": 2.9076,
      "step": 33874
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6906472444534302,
      "learning_rate": 0.0005685631970641098,
      "loss": 2.9101,
      "step": 33875
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.900884985923767,
      "learning_rate": 0.0005685613741078453,
      "loss": 3.1423,
      "step": 33876
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7873525619506836,
      "learning_rate": 0.00056855955110165,
      "loss": 3.0466,
      "step": 33877
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9201509952545166,
      "learning_rate": 0.0005685577280455246,
      "loss": 2.9118,
      "step": 33878
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6319990158081055,
      "learning_rate": 0.0005685559049394692,
      "loss": 3.05,
      "step": 33879
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5737091302871704,
      "learning_rate": 0.000568554081783484,
      "loss": 3.0875,
      "step": 33880
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8054217100143433,
      "learning_rate": 0.0005685522585775698,
      "loss": 3.1476,
      "step": 33881
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5438470840454102,
      "learning_rate": 0.0005685504353217265,
      "loss": 3.22,
      "step": 33882
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6138049364089966,
      "learning_rate": 0.0005685486120159547,
      "loss": 2.958,
      "step": 33883
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9246774911880493,
      "learning_rate": 0.0005685467886602546,
      "loss": 2.825,
      "step": 33884
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6488280296325684,
      "learning_rate": 0.0005685449652546266,
      "loss": 2.8677,
      "step": 33885
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4889332056045532,
      "learning_rate": 0.000568543141799071,
      "loss": 3.1807,
      "step": 33886
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4951635599136353,
      "learning_rate": 0.0005685413182935881,
      "loss": 2.9827,
      "step": 33887
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6014333963394165,
      "learning_rate": 0.0005685394947381783,
      "loss": 3.0221,
      "step": 33888
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4944632053375244,
      "learning_rate": 0.0005685376711328422,
      "loss": 3.2599,
      "step": 33889
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9102542400360107,
      "learning_rate": 0.0005685358474775797,
      "loss": 2.9952,
      "step": 33890
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5319762229919434,
      "learning_rate": 0.0005685340237723912,
      "loss": 2.9588,
      "step": 33891
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4668198823928833,
      "learning_rate": 0.0005685322000172772,
      "loss": 3.185,
      "step": 33892
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.517820119857788,
      "learning_rate": 0.0005685303762122382,
      "loss": 3.0358,
      "step": 33893
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.819216012954712,
      "learning_rate": 0.0005685285523572741,
      "loss": 3.197,
      "step": 33894
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6741830110549927,
      "learning_rate": 0.0005685267284523855,
      "loss": 3.1219,
      "step": 33895
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.575639009475708,
      "learning_rate": 0.0005685249044975728,
      "loss": 3.0935,
      "step": 33896
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5799307823181152,
      "learning_rate": 0.0005685230804928363,
      "loss": 2.9982,
      "step": 33897
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6308578252792358,
      "learning_rate": 0.0005685212564381761,
      "loss": 3.0151,
      "step": 33898
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4746791124343872,
      "learning_rate": 0.0005685194323335928,
      "loss": 3.2274,
      "step": 33899
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.530467987060547,
      "learning_rate": 0.0005685176081790868,
      "loss": 3.0182,
      "step": 33900
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4433687925338745,
      "learning_rate": 0.0005685157839746582,
      "loss": 3.0899,
      "step": 33901
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4918564558029175,
      "learning_rate": 0.0005685139597203074,
      "loss": 3.1311,
      "step": 33902
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.48368239402771,
      "learning_rate": 0.0005685121354160349,
      "loss": 3.2634,
      "step": 33903
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.337268590927124,
      "learning_rate": 0.0005685103110618409,
      "loss": 3.2879,
      "step": 33904
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.4932615756988525,
      "learning_rate": 0.0005685084866577257,
      "loss": 2.9183,
      "step": 33905
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7284349203109741,
      "learning_rate": 0.0005685066622036898,
      "loss": 3.0429,
      "step": 33906
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4316223859786987,
      "learning_rate": 0.0005685048376997333,
      "loss": 3.0346,
      "step": 33907
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5479871034622192,
      "learning_rate": 0.0005685030131458568,
      "loss": 3.1384,
      "step": 33908
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.1567256450653076,
      "learning_rate": 0.0005685011885420606,
      "loss": 3.1538,
      "step": 33909
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8252723217010498,
      "learning_rate": 0.0005684993638883448,
      "loss": 3.0746,
      "step": 33910
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8188897371292114,
      "learning_rate": 0.00056849753918471,
      "loss": 3.0633,
      "step": 33911
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6721328496932983,
      "learning_rate": 0.0005684957144311563,
      "loss": 3.2358,
      "step": 33912
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5771631002426147,
      "learning_rate": 0.0005684938896276843,
      "loss": 3.1081,
      "step": 33913
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5332040786743164,
      "learning_rate": 0.0005684920647742942,
      "loss": 3.1102,
      "step": 33914
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8748764991760254,
      "learning_rate": 0.0005684902398709863,
      "loss": 3.0569,
      "step": 33915
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8154257535934448,
      "learning_rate": 0.0005684884149177611,
      "loss": 2.9804,
      "step": 33916
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.512916922569275,
      "learning_rate": 0.0005684865899146188,
      "loss": 3.3181,
      "step": 33917
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5213475227355957,
      "learning_rate": 0.0005684847648615597,
      "loss": 3.2705,
      "step": 33918
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.637060523033142,
      "learning_rate": 0.0005684829397585842,
      "loss": 2.8473,
      "step": 33919
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3752597570419312,
      "learning_rate": 0.0005684811146056928,
      "loss": 3.2554,
      "step": 33920
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4132635593414307,
      "learning_rate": 0.0005684792894028857,
      "loss": 3.1461,
      "step": 33921
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8542085886001587,
      "learning_rate": 0.000568477464150163,
      "loss": 3.1123,
      "step": 33922
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.566285252571106,
      "learning_rate": 0.0005684756388475255,
      "loss": 3.1488,
      "step": 33923
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5089349746704102,
      "learning_rate": 0.0005684738134949732,
      "loss": 3.3247,
      "step": 33924
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5292532444000244,
      "learning_rate": 0.0005684719880925066,
      "loss": 3.2662,
      "step": 33925
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.443973422050476,
      "learning_rate": 0.0005684701626401259,
      "loss": 2.7173,
      "step": 33926
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.614534854888916,
      "learning_rate": 0.0005684683371378317,
      "loss": 3.2259,
      "step": 33927
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.702666997909546,
      "learning_rate": 0.000568466511585624,
      "loss": 2.8956,
      "step": 33928
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4875422716140747,
      "learning_rate": 0.0005684646859835034,
      "loss": 3.0756,
      "step": 33929
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.699984073638916,
      "learning_rate": 0.0005684628603314701,
      "loss": 3.1683,
      "step": 33930
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8578084707260132,
      "learning_rate": 0.0005684610346295244,
      "loss": 3.2023,
      "step": 33931
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4412295818328857,
      "learning_rate": 0.0005684592088776669,
      "loss": 3.0892,
      "step": 33932
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.40083646774292,
      "learning_rate": 0.0005684573830758978,
      "loss": 3.3442,
      "step": 33933
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7353776693344116,
      "learning_rate": 0.0005684555572242172,
      "loss": 3.1028,
      "step": 33934
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.783884048461914,
      "learning_rate": 0.0005684537313226257,
      "loss": 2.9264,
      "step": 33935
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.790604591369629,
      "learning_rate": 0.0005684519053711238,
      "loss": 3.1257,
      "step": 33936
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4412034749984741,
      "learning_rate": 0.0005684500793697114,
      "loss": 3.191,
      "step": 33937
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4053006172180176,
      "learning_rate": 0.0005684482533183891,
      "loss": 3.0055,
      "step": 33938
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.1868832111358643,
      "learning_rate": 0.0005684464272171572,
      "loss": 2.8649,
      "step": 33939
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5040241479873657,
      "learning_rate": 0.0005684446010660161,
      "loss": 3.1014,
      "step": 33940
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.328357458114624,
      "learning_rate": 0.0005684427748649661,
      "loss": 2.9354,
      "step": 33941
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7111841440200806,
      "learning_rate": 0.0005684409486140074,
      "loss": 3.0656,
      "step": 33942
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5691865682601929,
      "learning_rate": 0.0005684391223131405,
      "loss": 3.0106,
      "step": 33943
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.829751968383789,
      "learning_rate": 0.0005684372959623657,
      "loss": 2.9719,
      "step": 33944
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.339888572692871,
      "learning_rate": 0.0005684354695616833,
      "loss": 3.0672,
      "step": 33945
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4771400690078735,
      "learning_rate": 0.0005684336431110938,
      "loss": 3.0273,
      "step": 33946
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.413872241973877,
      "learning_rate": 0.0005684318166105974,
      "loss": 2.9305,
      "step": 33947
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9123433828353882,
      "learning_rate": 0.0005684299900601944,
      "loss": 3.2154,
      "step": 33948
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.459317922592163,
      "learning_rate": 0.0005684281634598852,
      "loss": 2.9848,
      "step": 33949
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8726987838745117,
      "learning_rate": 0.0005684263368096701,
      "loss": 3.0724,
      "step": 33950
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.524415135383606,
      "learning_rate": 0.0005684245101095496,
      "loss": 2.8604,
      "step": 33951
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3085025548934937,
      "learning_rate": 0.0005684226833595239,
      "loss": 2.9905,
      "step": 33952
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.869879126548767,
      "learning_rate": 0.0005684208565595933,
      "loss": 3.3834,
      "step": 33953
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5490578413009644,
      "learning_rate": 0.0005684190297097581,
      "loss": 2.8017,
      "step": 33954
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5772284269332886,
      "learning_rate": 0.0005684172028100189,
      "loss": 3.1078,
      "step": 33955
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5599769353866577,
      "learning_rate": 0.0005684153758603759,
      "loss": 3.2509,
      "step": 33956
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4015589952468872,
      "learning_rate": 0.0005684135488608292,
      "loss": 3.2254,
      "step": 33957
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6445029973983765,
      "learning_rate": 0.0005684117218113796,
      "loss": 3.0657,
      "step": 33958
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6864752769470215,
      "learning_rate": 0.000568409894712027,
      "loss": 3.188,
      "step": 33959
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5445237159729004,
      "learning_rate": 0.000568408067562772,
      "loss": 3.1302,
      "step": 33960
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4670215845108032,
      "learning_rate": 0.0005684062403636149,
      "loss": 3.0277,
      "step": 33961
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.3322396278381348,
      "learning_rate": 0.0005684044131145559,
      "loss": 2.9929,
      "step": 33962
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5603514909744263,
      "learning_rate": 0.0005684025858155957,
      "loss": 3.0691,
      "step": 33963
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4021879434585571,
      "learning_rate": 0.0005684007584667342,
      "loss": 3.3239,
      "step": 33964
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8293676376342773,
      "learning_rate": 0.0005683989310679719,
      "loss": 2.9205,
      "step": 33965
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8433130979537964,
      "learning_rate": 0.0005683971036193093,
      "loss": 3.022,
      "step": 33966
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.536717414855957,
      "learning_rate": 0.0005683952761207466,
      "loss": 2.8196,
      "step": 33967
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6804814338684082,
      "learning_rate": 0.0005683934485722841,
      "loss": 3.006,
      "step": 33968
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6455252170562744,
      "learning_rate": 0.0005683916209739221,
      "loss": 3.03,
      "step": 33969
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.1557090282440186,
      "learning_rate": 0.0005683897933256612,
      "loss": 2.8911,
      "step": 33970
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3606399297714233,
      "learning_rate": 0.0005683879656275014,
      "loss": 3.1903,
      "step": 33971
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5748194456100464,
      "learning_rate": 0.0005683861378794434,
      "loss": 3.092,
      "step": 33972
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2411582469940186,
      "learning_rate": 0.0005683843100814873,
      "loss": 2.7178,
      "step": 33973
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3989901542663574,
      "learning_rate": 0.0005683824822336334,
      "loss": 3.0253,
      "step": 33974
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.606227159500122,
      "learning_rate": 0.0005683806543358822,
      "loss": 3.126,
      "step": 33975
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5267523527145386,
      "learning_rate": 0.0005683788263882339,
      "loss": 3.2966,
      "step": 33976
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7239501476287842,
      "learning_rate": 0.000568376998390689,
      "loss": 3.1032,
      "step": 33977
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5933406352996826,
      "learning_rate": 0.0005683751703432477,
      "loss": 3.2057,
      "step": 33978
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.568909764289856,
      "learning_rate": 0.0005683733422459105,
      "loss": 3.1631,
      "step": 33979
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6480026245117188,
      "learning_rate": 0.0005683715140986775,
      "loss": 2.9294,
      "step": 33980
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3866791725158691,
      "learning_rate": 0.0005683696859015492,
      "loss": 3.0851,
      "step": 33981
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6911745071411133,
      "learning_rate": 0.0005683678576545259,
      "loss": 3.1974,
      "step": 33982
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.3255929946899414,
      "learning_rate": 0.000568366029357608,
      "loss": 3.0195,
      "step": 33983
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.025071382522583,
      "learning_rate": 0.0005683642010107957,
      "loss": 3.0031,
      "step": 33984
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.772635817527771,
      "learning_rate": 0.0005683623726140895,
      "loss": 2.9251,
      "step": 33985
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.596534252166748,
      "learning_rate": 0.0005683605441674897,
      "loss": 3.1772,
      "step": 33986
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8000887632369995,
      "learning_rate": 0.0005683587156709966,
      "loss": 2.9469,
      "step": 33987
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6112892627716064,
      "learning_rate": 0.0005683568871246104,
      "loss": 3.1142,
      "step": 33988
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4096808433532715,
      "learning_rate": 0.0005683550585283317,
      "loss": 2.9775,
      "step": 33989
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5234947204589844,
      "learning_rate": 0.0005683532298821608,
      "loss": 3.1452,
      "step": 33990
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5114961862564087,
      "learning_rate": 0.0005683514011860978,
      "loss": 2.935,
      "step": 33991
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2864583730697632,
      "learning_rate": 0.0005683495724401434,
      "loss": 3.0007,
      "step": 33992
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9353586435317993,
      "learning_rate": 0.0005683477436442976,
      "loss": 3.2346,
      "step": 33993
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5866073369979858,
      "learning_rate": 0.000568345914798561,
      "loss": 3.2995,
      "step": 33994
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5428578853607178,
      "learning_rate": 0.0005683440859029338,
      "loss": 3.0427,
      "step": 33995
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6985093355178833,
      "learning_rate": 0.0005683422569574163,
      "loss": 3.106,
      "step": 33996
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8564283847808838,
      "learning_rate": 0.000568340427962009,
      "loss": 2.9813,
      "step": 33997
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8566358089447021,
      "learning_rate": 0.0005683385989167121,
      "loss": 3.2712,
      "step": 33998
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.4624440670013428,
      "learning_rate": 0.000568336769821526,
      "loss": 3.0661,
      "step": 33999
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.045280694961548,
      "learning_rate": 0.0005683349406764509,
      "loss": 3.1166,
      "step": 34000
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6431223154067993,
      "learning_rate": 0.0005683331114814874,
      "loss": 2.9678,
      "step": 34001
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.107093334197998,
      "learning_rate": 0.0005683312822366358,
      "loss": 3.0273,
      "step": 34002
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.089738607406616,
      "learning_rate": 0.0005683294529418962,
      "loss": 3.022,
      "step": 34003
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.0179357528686523,
      "learning_rate": 0.0005683276235972692,
      "loss": 3.1835,
      "step": 34004
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5553498268127441,
      "learning_rate": 0.0005683257942027549,
      "loss": 3.0337,
      "step": 34005
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.1952855587005615,
      "learning_rate": 0.0005683239647583539,
      "loss": 3.221,
      "step": 34006
    },
    {
      "epoch": 0.44,
      "grad_norm": 6.461793422698975,
      "learning_rate": 0.0005683221352640663,
      "loss": 3.1673,
      "step": 34007
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9699666500091553,
      "learning_rate": 0.0005683203057198925,
      "loss": 3.0536,
      "step": 34008
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5121673345565796,
      "learning_rate": 0.0005683184761258332,
      "loss": 3.116,
      "step": 34009
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4378787279129028,
      "learning_rate": 0.0005683166464818881,
      "loss": 3.0599,
      "step": 34010
    },
    {
      "epoch": 0.44,
      "grad_norm": 3.0756232738494873,
      "learning_rate": 0.000568314816788058,
      "loss": 3.1448,
      "step": 34011
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7122374773025513,
      "learning_rate": 0.0005683129870443431,
      "loss": 3.0605,
      "step": 34012
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7802181243896484,
      "learning_rate": 0.0005683111572507437,
      "loss": 2.9909,
      "step": 34013
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3774664402008057,
      "learning_rate": 0.0005683093274072603,
      "loss": 3.1511,
      "step": 34014
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7860767841339111,
      "learning_rate": 0.000568307497513893,
      "loss": 3.0453,
      "step": 34015
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.515192985534668,
      "learning_rate": 0.0005683056675706424,
      "loss": 3.4054,
      "step": 34016
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5291109085083008,
      "learning_rate": 0.0005683038375775087,
      "loss": 2.8389,
      "step": 34017
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.468030333518982,
      "learning_rate": 0.0005683020075344923,
      "loss": 3.142,
      "step": 34018
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.458534836769104,
      "learning_rate": 0.0005683001774415933,
      "loss": 3.2734,
      "step": 34019
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4360069036483765,
      "learning_rate": 0.0005682983472988124,
      "loss": 3.1406,
      "step": 34020
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3978297710418701,
      "learning_rate": 0.0005682965171061496,
      "loss": 3.1554,
      "step": 34021
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.653122067451477,
      "learning_rate": 0.0005682946868636055,
      "loss": 3.1229,
      "step": 34022
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6125690937042236,
      "learning_rate": 0.0005682928565711805,
      "loss": 3.1742,
      "step": 34023
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9603065252304077,
      "learning_rate": 0.0005682910262288746,
      "loss": 2.9199,
      "step": 34024
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5491160154342651,
      "learning_rate": 0.0005682891958366884,
      "loss": 3.1396,
      "step": 34025
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.020317792892456,
      "learning_rate": 0.0005682873653946222,
      "loss": 2.9854,
      "step": 34026
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5276062488555908,
      "learning_rate": 0.0005682855349026763,
      "loss": 2.9827,
      "step": 34027
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.706986665725708,
      "learning_rate": 0.0005682837043608509,
      "loss": 2.9546,
      "step": 34028
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.7696709632873535,
      "learning_rate": 0.0005682818737691468,
      "loss": 2.9887,
      "step": 34029
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6212422847747803,
      "learning_rate": 0.0005682800431275638,
      "loss": 3.2312,
      "step": 34030
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.3899335861206055,
      "learning_rate": 0.0005682782124361025,
      "loss": 2.9829,
      "step": 34031
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.282469391822815,
      "learning_rate": 0.0005682763816947633,
      "loss": 3.1013,
      "step": 34032
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5561189651489258,
      "learning_rate": 0.0005682745509035464,
      "loss": 3.0894,
      "step": 34033
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5471522808074951,
      "learning_rate": 0.0005682727200624522,
      "loss": 3.3583,
      "step": 34034
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5497705936431885,
      "learning_rate": 0.0005682708891714811,
      "loss": 3.1988,
      "step": 34035
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4548343420028687,
      "learning_rate": 0.0005682690582306332,
      "loss": 3.1047,
      "step": 34036
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5304104089736938,
      "learning_rate": 0.0005682672272399093,
      "loss": 3.0505,
      "step": 34037
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9186286926269531,
      "learning_rate": 0.0005682653961993093,
      "loss": 3.0093,
      "step": 34038
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.369515299797058,
      "learning_rate": 0.0005682635651088337,
      "loss": 2.8751,
      "step": 34039
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9264955520629883,
      "learning_rate": 0.0005682617339684828,
      "loss": 2.8216,
      "step": 34040
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4076017141342163,
      "learning_rate": 0.0005682599027782571,
      "loss": 3.202,
      "step": 34041
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.357237458229065,
      "learning_rate": 0.0005682580715381567,
      "loss": 3.092,
      "step": 34042
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5648311376571655,
      "learning_rate": 0.0005682562402481821,
      "loss": 2.7619,
      "step": 34043
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.333658218383789,
      "learning_rate": 0.0005682544089083337,
      "loss": 2.9059,
      "step": 34044
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.022017240524292,
      "learning_rate": 0.0005682525775186116,
      "loss": 2.7522,
      "step": 34045
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8213791847229004,
      "learning_rate": 0.0005682507460790163,
      "loss": 3.015,
      "step": 34046
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5644471645355225,
      "learning_rate": 0.0005682489145895482,
      "loss": 3.2046,
      "step": 34047
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4410451650619507,
      "learning_rate": 0.0005682470830502076,
      "loss": 2.9897,
      "step": 34048
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7574738264083862,
      "learning_rate": 0.0005682452514609946,
      "loss": 3.1201,
      "step": 34049
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6531038284301758,
      "learning_rate": 0.0005682434198219099,
      "loss": 2.8732,
      "step": 34050
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5655533075332642,
      "learning_rate": 0.0005682415881329536,
      "loss": 3.2289,
      "step": 34051
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3936784267425537,
      "learning_rate": 0.0005682397563941263,
      "loss": 3.2115,
      "step": 34052
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4604144096374512,
      "learning_rate": 0.000568237924605428,
      "loss": 2.8605,
      "step": 34053
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6780357360839844,
      "learning_rate": 0.0005682360927668593,
      "loss": 3.048,
      "step": 34054
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.284959316253662,
      "learning_rate": 0.0005682342608784204,
      "loss": 2.9719,
      "step": 34055
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4249944686889648,
      "learning_rate": 0.0005682324289401116,
      "loss": 3.34,
      "step": 34056
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.342288613319397,
      "learning_rate": 0.0005682305969519334,
      "loss": 3.2315,
      "step": 34057
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.0799200534820557,
      "learning_rate": 0.0005682287649138862,
      "loss": 3.197,
      "step": 34058
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7730419635772705,
      "learning_rate": 0.00056822693282597,
      "loss": 3.2097,
      "step": 34059
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.31427800655365,
      "learning_rate": 0.0005682251006881854,
      "loss": 2.9556,
      "step": 34060
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.0634539127349854,
      "learning_rate": 0.0005682232685005328,
      "loss": 3.0172,
      "step": 34061
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.351808547973633,
      "learning_rate": 0.0005682214362630124,
      "loss": 3.0383,
      "step": 34062
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.609100580215454,
      "learning_rate": 0.0005682196039756244,
      "loss": 2.9863,
      "step": 34063
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.0512278079986572,
      "learning_rate": 0.0005682177716383696,
      "loss": 3.0523,
      "step": 34064
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.705947756767273,
      "learning_rate": 0.0005682159392512478,
      "loss": 3.1614,
      "step": 34065
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.00358247756958,
      "learning_rate": 0.0005682141068142597,
      "loss": 3.1359,
      "step": 34066
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6113262176513672,
      "learning_rate": 0.0005682122743274055,
      "loss": 3.0277,
      "step": 34067
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5600521564483643,
      "learning_rate": 0.0005682104417906857,
      "loss": 3.0051,
      "step": 34068
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4931460618972778,
      "learning_rate": 0.0005682086092041004,
      "loss": 3.2207,
      "step": 34069
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.572279691696167,
      "learning_rate": 0.0005682067765676501,
      "loss": 3.2062,
      "step": 34070
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4099608659744263,
      "learning_rate": 0.0005682049438813351,
      "loss": 3.2269,
      "step": 34071
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2285622358322144,
      "learning_rate": 0.0005682031111451557,
      "loss": 3.0057,
      "step": 34072
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5928843021392822,
      "learning_rate": 0.0005682012783591123,
      "loss": 3.2724,
      "step": 34073
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5718849897384644,
      "learning_rate": 0.0005681994455232052,
      "loss": 3.3365,
      "step": 34074
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5300968885421753,
      "learning_rate": 0.0005681976126374347,
      "loss": 3.1556,
      "step": 34075
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.2460007667541504,
      "learning_rate": 0.0005681957797018014,
      "loss": 2.8629,
      "step": 34076
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6686058044433594,
      "learning_rate": 0.0005681939467163052,
      "loss": 3.0822,
      "step": 34077
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.993748188018799,
      "learning_rate": 0.0005681921136809468,
      "loss": 3.0741,
      "step": 34078
    },
    {
      "epoch": 0.44,
      "grad_norm": 3.0999791622161865,
      "learning_rate": 0.0005681902805957264,
      "loss": 3.2274,
      "step": 34079
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.074188232421875,
      "learning_rate": 0.0005681884474606445,
      "loss": 3.3506,
      "step": 34080
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.064077138900757,
      "learning_rate": 0.0005681866142757011,
      "loss": 2.9851,
      "step": 34081
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.169917345046997,
      "learning_rate": 0.0005681847810408969,
      "loss": 3.2071,
      "step": 34082
    },
    {
      "epoch": 0.44,
      "grad_norm": 3.3271079063415527,
      "learning_rate": 0.000568182947756232,
      "loss": 3.126,
      "step": 34083
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8341758251190186,
      "learning_rate": 0.0005681811144217069,
      "loss": 3.1314,
      "step": 34084
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.8995633125305176,
      "learning_rate": 0.0005681792810373218,
      "loss": 3.0201,
      "step": 34085
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.051872730255127,
      "learning_rate": 0.0005681774476030771,
      "loss": 3.1852,
      "step": 34086
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7432491779327393,
      "learning_rate": 0.0005681756141189732,
      "loss": 3.1523,
      "step": 34087
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3109891414642334,
      "learning_rate": 0.0005681737805850103,
      "loss": 2.9546,
      "step": 34088
    },
    {
      "epoch": 0.44,
      "grad_norm": 3.3347575664520264,
      "learning_rate": 0.000568171947001189,
      "loss": 3.341,
      "step": 34089
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.9158201217651367,
      "learning_rate": 0.0005681701133675093,
      "loss": 3.047,
      "step": 34090
    },
    {
      "epoch": 0.44,
      "grad_norm": 3.286673069000244,
      "learning_rate": 0.0005681682796839718,
      "loss": 2.9121,
      "step": 34091
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8372372388839722,
      "learning_rate": 0.0005681664459505768,
      "loss": 2.9677,
      "step": 34092
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.4262523651123047,
      "learning_rate": 0.0005681646121673244,
      "loss": 3.0136,
      "step": 34093
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9067692756652832,
      "learning_rate": 0.0005681627783342153,
      "loss": 2.9874,
      "step": 34094
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7662204504013062,
      "learning_rate": 0.0005681609444512497,
      "loss": 3.0473,
      "step": 34095
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.6026787757873535,
      "learning_rate": 0.0005681591105184279,
      "loss": 2.8656,
      "step": 34096
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4308736324310303,
      "learning_rate": 0.0005681572765357503,
      "loss": 2.9696,
      "step": 34097
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.640315294265747,
      "learning_rate": 0.0005681554425032171,
      "loss": 3.1513,
      "step": 34098
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.2691195011138916,
      "learning_rate": 0.0005681536084208288,
      "loss": 3.0793,
      "step": 34099
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8931300640106201,
      "learning_rate": 0.0005681517742885855,
      "loss": 3.3326,
      "step": 34100
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.1326963901519775,
      "learning_rate": 0.000568149940106488,
      "loss": 2.9559,
      "step": 34101
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7112611532211304,
      "learning_rate": 0.0005681481058745362,
      "loss": 3.2755,
      "step": 34102
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.72782564163208,
      "learning_rate": 0.0005681462715927307,
      "loss": 3.0333,
      "step": 34103
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5121681690216064,
      "learning_rate": 0.0005681444372610717,
      "loss": 3.2709,
      "step": 34104
    },
    {
      "epoch": 0.44,
      "grad_norm": 3.4549949169158936,
      "learning_rate": 0.0005681426028795595,
      "loss": 2.9429,
      "step": 34105
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5109072923660278,
      "learning_rate": 0.0005681407684481947,
      "loss": 3.1838,
      "step": 34106
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4909541606903076,
      "learning_rate": 0.0005681389339669774,
      "loss": 3.1849,
      "step": 34107
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.141951322555542,
      "learning_rate": 0.000568137099435908,
      "loss": 3.0458,
      "step": 34108
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8949272632598877,
      "learning_rate": 0.0005681352648549868,
      "loss": 3.1491,
      "step": 34109
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5650423765182495,
      "learning_rate": 0.0005681334302242142,
      "loss": 2.9515,
      "step": 34110
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8115687370300293,
      "learning_rate": 0.0005681315955435906,
      "loss": 3.1347,
      "step": 34111
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.1583621501922607,
      "learning_rate": 0.0005681297608131162,
      "loss": 3.1278,
      "step": 34112
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.466103434562683,
      "learning_rate": 0.0005681279260327914,
      "loss": 2.7893,
      "step": 34113
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5401005744934082,
      "learning_rate": 0.0005681260912026167,
      "loss": 3.016,
      "step": 34114
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8793284893035889,
      "learning_rate": 0.0005681242563225922,
      "loss": 3.1821,
      "step": 34115
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.449021339416504,
      "learning_rate": 0.0005681224213927184,
      "loss": 3.13,
      "step": 34116
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7528499364852905,
      "learning_rate": 0.0005681205864129955,
      "loss": 2.8732,
      "step": 34117
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6728515625,
      "learning_rate": 0.000568118751383424,
      "loss": 3.0309,
      "step": 34118
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.862180233001709,
      "learning_rate": 0.0005681169163040042,
      "loss": 3.0339,
      "step": 34119
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6336218118667603,
      "learning_rate": 0.0005681150811747362,
      "loss": 3.1498,
      "step": 34120
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6673682928085327,
      "learning_rate": 0.0005681132459956207,
      "loss": 3.3522,
      "step": 34121
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7469326257705688,
      "learning_rate": 0.0005681114107666578,
      "loss": 2.8587,
      "step": 34122
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5896961688995361,
      "learning_rate": 0.0005681095754878481,
      "loss": 2.9514,
      "step": 34123
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4120668172836304,
      "learning_rate": 0.0005681077401591916,
      "loss": 2.9368,
      "step": 34124
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.517544150352478,
      "learning_rate": 0.0005681059047806889,
      "loss": 2.8611,
      "step": 34125
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.430713176727295,
      "learning_rate": 0.0005681040693523402,
      "loss": 3.1419,
      "step": 34126
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4723889827728271,
      "learning_rate": 0.000568102233874146,
      "loss": 2.9157,
      "step": 34127
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5728111267089844,
      "learning_rate": 0.0005681003983461064,
      "loss": 3.0485,
      "step": 34128
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.1123530864715576,
      "learning_rate": 0.0005680985627682218,
      "loss": 3.2938,
      "step": 34129
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7397571802139282,
      "learning_rate": 0.0005680967271404928,
      "loss": 2.9203,
      "step": 34130
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.0135576725006104,
      "learning_rate": 0.0005680948914629195,
      "loss": 2.6756,
      "step": 34131
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.473924994468689,
      "learning_rate": 0.0005680930557355024,
      "loss": 3.2035,
      "step": 34132
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7135297060012817,
      "learning_rate": 0.0005680912199582416,
      "loss": 3.0807,
      "step": 34133
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.0259907245635986,
      "learning_rate": 0.0005680893841311375,
      "loss": 2.9291,
      "step": 34134
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3006423711776733,
      "learning_rate": 0.0005680875482541907,
      "loss": 2.9538,
      "step": 34135
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.496374249458313,
      "learning_rate": 0.0005680857123274012,
      "loss": 2.8826,
      "step": 34136
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9956855773925781,
      "learning_rate": 0.0005680838763507697,
      "loss": 2.9636,
      "step": 34137
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.268944501876831,
      "learning_rate": 0.0005680820403242962,
      "loss": 2.9791,
      "step": 34138
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5582544803619385,
      "learning_rate": 0.0005680802042479814,
      "loss": 3.0711,
      "step": 34139
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.261091709136963,
      "learning_rate": 0.0005680783681218252,
      "loss": 3.452,
      "step": 34140
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.3968684673309326,
      "learning_rate": 0.0005680765319458281,
      "loss": 3.1,
      "step": 34141
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.422167181968689,
      "learning_rate": 0.0005680746957199907,
      "loss": 2.9566,
      "step": 34142
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.097264289855957,
      "learning_rate": 0.0005680728594443131,
      "loss": 2.9613,
      "step": 34143
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7817487716674805,
      "learning_rate": 0.0005680710231187958,
      "loss": 2.938,
      "step": 34144
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.562704086303711,
      "learning_rate": 0.0005680691867434389,
      "loss": 3.0913,
      "step": 34145
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4158048629760742,
      "learning_rate": 0.000568067350318243,
      "loss": 3.2916,
      "step": 34146
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.00231671333313,
      "learning_rate": 0.0005680655138432082,
      "loss": 2.8886,
      "step": 34147
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.845301628112793,
      "learning_rate": 0.0005680636773183349,
      "loss": 3.0782,
      "step": 34148
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.117828607559204,
      "learning_rate": 0.0005680618407436236,
      "loss": 3.1408,
      "step": 34149
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.781760334968567,
      "learning_rate": 0.0005680600041190747,
      "loss": 3.1624,
      "step": 34150
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.554250717163086,
      "learning_rate": 0.0005680581674446881,
      "loss": 2.8973,
      "step": 34151
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.255401611328125,
      "learning_rate": 0.0005680563307204646,
      "loss": 2.9591,
      "step": 34152
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.381994366645813,
      "learning_rate": 0.0005680544939464043,
      "loss": 3.039,
      "step": 34153
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4093416929244995,
      "learning_rate": 0.0005680526571225077,
      "loss": 3.0907,
      "step": 34154
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7119108438491821,
      "learning_rate": 0.000568050820248775,
      "loss": 3.3118,
      "step": 34155
    },
    {
      "epoch": 0.44,
      "grad_norm": 6.0435919761657715,
      "learning_rate": 0.0005680489833252066,
      "loss": 2.9721,
      "step": 34156
    },
    {
      "epoch": 0.44,
      "grad_norm": 6.001306056976318,
      "learning_rate": 0.0005680471463518028,
      "loss": 2.9645,
      "step": 34157
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.7661761045455933,
      "learning_rate": 0.0005680453093285639,
      "loss": 3.1037,
      "step": 34158
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.472755789756775,
      "learning_rate": 0.0005680434722554904,
      "loss": 2.7488,
      "step": 34159
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.8608076572418213,
      "learning_rate": 0.0005680416351325825,
      "loss": 2.9903,
      "step": 34160
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.4483683109283447,
      "learning_rate": 0.0005680397979598408,
      "loss": 2.973,
      "step": 34161
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.470296621322632,
      "learning_rate": 0.0005680379607372652,
      "loss": 2.8671,
      "step": 34162
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.50542151927948,
      "learning_rate": 0.0005680361234648565,
      "loss": 2.9118,
      "step": 34163
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3094630241394043,
      "learning_rate": 0.0005680342861426146,
      "loss": 2.9389,
      "step": 34164
    },
    {
      "epoch": 0.44,
      "grad_norm": 2.402662515640259,
      "learning_rate": 0.0005680324487705403,
      "loss": 3.2177,
      "step": 34165
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3921263217926025,
      "learning_rate": 0.0005680306113486336,
      "loss": 2.8962,
      "step": 34166
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5690546035766602,
      "learning_rate": 0.0005680287738768949,
      "loss": 2.8988,
      "step": 34167
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.9475456476211548,
      "learning_rate": 0.0005680269363553246,
      "loss": 2.9731,
      "step": 34168
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.6544443368911743,
      "learning_rate": 0.0005680250987839231,
      "loss": 3.1196,
      "step": 34169
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.468026041984558,
      "learning_rate": 0.0005680232611626906,
      "loss": 3.1559,
      "step": 34170
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2502598762512207,
      "learning_rate": 0.0005680214234916276,
      "loss": 2.8878,
      "step": 34171
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3734204769134521,
      "learning_rate": 0.0005680195857707343,
      "loss": 3.2141,
      "step": 34172
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.2860233783721924,
      "learning_rate": 0.0005680177480000111,
      "loss": 3.1064,
      "step": 34173
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.5278658866882324,
      "learning_rate": 0.0005680159101794583,
      "loss": 3.146,
      "step": 34174
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.503246784210205,
      "learning_rate": 0.0005680140723090764,
      "loss": 3.2957,
      "step": 34175
    },
    {
      "epoch": 0.44,
      "grad_norm": 1.3815863132476807,
      "learning_rate": 0.0005680122343888656,
      "loss": 3.0431,
      "step": 34176
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3124253749847412,
      "learning_rate": 0.0005680103964188261,
      "loss": 2.9018,
      "step": 34177
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7781126499176025,
      "learning_rate": 0.0005680085583989586,
      "loss": 3.2395,
      "step": 34178
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9603146314620972,
      "learning_rate": 0.0005680067203292633,
      "loss": 2.9397,
      "step": 34179
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5739896297454834,
      "learning_rate": 0.0005680048822097403,
      "loss": 2.9155,
      "step": 34180
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8858766555786133,
      "learning_rate": 0.0005680030440403901,
      "loss": 2.8074,
      "step": 34181
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8860933780670166,
      "learning_rate": 0.0005680012058212133,
      "loss": 3.3814,
      "step": 34182
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3444488048553467,
      "learning_rate": 0.0005679993675522098,
      "loss": 3.2754,
      "step": 34183
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4521543979644775,
      "learning_rate": 0.0005679975292333803,
      "loss": 2.6638,
      "step": 34184
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3900879621505737,
      "learning_rate": 0.000567995690864725,
      "loss": 3.2441,
      "step": 34185
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3878819942474365,
      "learning_rate": 0.0005679938524462443,
      "loss": 3.201,
      "step": 34186
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.451766014099121,
      "learning_rate": 0.0005679920139779384,
      "loss": 3.0694,
      "step": 34187
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.662130355834961,
      "learning_rate": 0.0005679901754598076,
      "loss": 3.2157,
      "step": 34188
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4787852764129639,
      "learning_rate": 0.0005679883368918525,
      "loss": 3.1902,
      "step": 34189
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9406585693359375,
      "learning_rate": 0.0005679864982740733,
      "loss": 3.0865,
      "step": 34190
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7283656597137451,
      "learning_rate": 0.0005679846596064704,
      "loss": 3.0926,
      "step": 34191
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4878764152526855,
      "learning_rate": 0.000567982820889044,
      "loss": 3.1821,
      "step": 34192
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2794604301452637,
      "learning_rate": 0.0005679809821217945,
      "loss": 3.044,
      "step": 34193
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6057909727096558,
      "learning_rate": 0.0005679791433047224,
      "loss": 3.0096,
      "step": 34194
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3745746612548828,
      "learning_rate": 0.0005679773044378279,
      "loss": 2.8692,
      "step": 34195
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6917037963867188,
      "learning_rate": 0.0005679754655211113,
      "loss": 2.9107,
      "step": 34196
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.354137659072876,
      "learning_rate": 0.000567973626554573,
      "loss": 3.3314,
      "step": 34197
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6002519130706787,
      "learning_rate": 0.0005679717875382134,
      "loss": 3.2626,
      "step": 34198
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.01591420173645,
      "learning_rate": 0.0005679699484720328,
      "loss": 2.8618,
      "step": 34199
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4308043718338013,
      "learning_rate": 0.0005679681093560315,
      "loss": 3.1208,
      "step": 34200
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.955894112586975,
      "learning_rate": 0.0005679662701902099,
      "loss": 3.176,
      "step": 34201
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7955329418182373,
      "learning_rate": 0.0005679644309745682,
      "loss": 2.937,
      "step": 34202
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.676712989807129,
      "learning_rate": 0.0005679625917091069,
      "loss": 3.0311,
      "step": 34203
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7341374158859253,
      "learning_rate": 0.0005679607523938263,
      "loss": 3.2335,
      "step": 34204
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.250808596611023,
      "learning_rate": 0.0005679589130287269,
      "loss": 3.0772,
      "step": 34205
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.354430079460144,
      "learning_rate": 0.0005679570736138086,
      "loss": 2.9789,
      "step": 34206
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.3820748329162598,
      "learning_rate": 0.0005679552341490723,
      "loss": 3.0527,
      "step": 34207
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7242621183395386,
      "learning_rate": 0.0005679533946345178,
      "loss": 3.2468,
      "step": 34208
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.4538722038269043,
      "learning_rate": 0.0005679515550701459,
      "loss": 3.0759,
      "step": 34209
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5212258100509644,
      "learning_rate": 0.0005679497154559567,
      "loss": 3.0337,
      "step": 34210
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.153887987136841,
      "learning_rate": 0.0005679478757919505,
      "loss": 3.1923,
      "step": 34211
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.89825439453125,
      "learning_rate": 0.0005679460360781278,
      "loss": 2.9512,
      "step": 34212
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4711861610412598,
      "learning_rate": 0.0005679441963144889,
      "loss": 3.0424,
      "step": 34213
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4622832536697388,
      "learning_rate": 0.0005679423565010341,
      "loss": 3.2962,
      "step": 34214
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3068873882293701,
      "learning_rate": 0.0005679405166377638,
      "loss": 3.2787,
      "step": 34215
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.561394691467285,
      "learning_rate": 0.0005679386767246782,
      "loss": 3.2511,
      "step": 34216
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.4644289016723633,
      "learning_rate": 0.0005679368367617778,
      "loss": 2.9161,
      "step": 34217
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3049293756484985,
      "learning_rate": 0.0005679349967490628,
      "loss": 2.9537,
      "step": 34218
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.953918695449829,
      "learning_rate": 0.0005679331566865338,
      "loss": 3.0079,
      "step": 34219
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9292782545089722,
      "learning_rate": 0.0005679313165741908,
      "loss": 3.3256,
      "step": 34220
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.1603779792785645,
      "learning_rate": 0.0005679294764120344,
      "loss": 2.8958,
      "step": 34221
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9909114837646484,
      "learning_rate": 0.0005679276362000649,
      "loss": 3.2365,
      "step": 34222
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.302764654159546,
      "learning_rate": 0.0005679257959382826,
      "loss": 3.3316,
      "step": 34223
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.99893319606781,
      "learning_rate": 0.0005679239556266877,
      "loss": 2.9418,
      "step": 34224
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3619030714035034,
      "learning_rate": 0.0005679221152652807,
      "loss": 2.9765,
      "step": 34225
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.067563772201538,
      "learning_rate": 0.000567920274854062,
      "loss": 3.3464,
      "step": 34226
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5340603590011597,
      "learning_rate": 0.0005679184343930318,
      "loss": 3.3102,
      "step": 34227
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5824137926101685,
      "learning_rate": 0.0005679165938821905,
      "loss": 3.3141,
      "step": 34228
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.692442774772644,
      "learning_rate": 0.0005679147533215386,
      "loss": 2.8634,
      "step": 34229
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9440782070159912,
      "learning_rate": 0.0005679129127110762,
      "loss": 3.1775,
      "step": 34230
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2221051454544067,
      "learning_rate": 0.0005679110720508037,
      "loss": 3.1302,
      "step": 34231
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6265722513198853,
      "learning_rate": 0.0005679092313407214,
      "loss": 3.0228,
      "step": 34232
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5149645805358887,
      "learning_rate": 0.0005679073905808298,
      "loss": 3.1796,
      "step": 34233
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4555413722991943,
      "learning_rate": 0.0005679055497711292,
      "loss": 2.8745,
      "step": 34234
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8097673654556274,
      "learning_rate": 0.0005679037089116199,
      "loss": 3.2233,
      "step": 34235
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2819123268127441,
      "learning_rate": 0.0005679018680023021,
      "loss": 3.0616,
      "step": 34236
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4727030992507935,
      "learning_rate": 0.0005679000270431764,
      "loss": 2.8733,
      "step": 34237
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7206426858901978,
      "learning_rate": 0.0005678981860342431,
      "loss": 3.0833,
      "step": 34238
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5116865634918213,
      "learning_rate": 0.0005678963449755023,
      "loss": 2.9354,
      "step": 34239
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6334209442138672,
      "learning_rate": 0.0005678945038669547,
      "loss": 3.0973,
      "step": 34240
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.811580777168274,
      "learning_rate": 0.0005678926627086002,
      "loss": 2.8998,
      "step": 34241
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0384867191314697,
      "learning_rate": 0.0005678908215004397,
      "loss": 2.9227,
      "step": 34242
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.180692672729492,
      "learning_rate": 0.000567888980242473,
      "loss": 2.9086,
      "step": 34243
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5404161214828491,
      "learning_rate": 0.0005678871389347008,
      "loss": 3.021,
      "step": 34244
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4738068580627441,
      "learning_rate": 0.0005678852975771233,
      "loss": 3.2363,
      "step": 34245
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.017106056213379,
      "learning_rate": 0.0005678834561697408,
      "loss": 2.8221,
      "step": 34246
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9200208187103271,
      "learning_rate": 0.0005678816147125539,
      "loss": 3.2173,
      "step": 34247
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5827800035476685,
      "learning_rate": 0.0005678797732055625,
      "loss": 3.0267,
      "step": 34248
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.92616605758667,
      "learning_rate": 0.0005678779316487673,
      "loss": 2.8677,
      "step": 34249
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.778092622756958,
      "learning_rate": 0.0005678760900421686,
      "loss": 3.1767,
      "step": 34250
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6362477540969849,
      "learning_rate": 0.0005678742483857666,
      "loss": 3.0062,
      "step": 34251
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.613391637802124,
      "learning_rate": 0.0005678724066795617,
      "loss": 3.0652,
      "step": 34252
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.430570363998413,
      "learning_rate": 0.0005678705649235544,
      "loss": 3.1189,
      "step": 34253
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.883416771888733,
      "learning_rate": 0.0005678687231177448,
      "loss": 3.0265,
      "step": 34254
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0679492950439453,
      "learning_rate": 0.0005678668812621334,
      "loss": 3.159,
      "step": 34255
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9946630001068115,
      "learning_rate": 0.0005678650393567204,
      "loss": 3.1025,
      "step": 34256
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5894911289215088,
      "learning_rate": 0.0005678631974015064,
      "loss": 3.1384,
      "step": 34257
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.189013719558716,
      "learning_rate": 0.0005678613553964914,
      "loss": 2.9807,
      "step": 34258
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.8084471225738525,
      "learning_rate": 0.000567859513341676,
      "loss": 3.1284,
      "step": 34259
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4770092964172363,
      "learning_rate": 0.0005678576712370605,
      "loss": 2.8857,
      "step": 34260
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.385483980178833,
      "learning_rate": 0.000567855829082645,
      "loss": 2.9729,
      "step": 34261
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.7921671867370605,
      "learning_rate": 0.0005678539868784303,
      "loss": 2.8217,
      "step": 34262
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6902867555618286,
      "learning_rate": 0.0005678521446244164,
      "loss": 3.1497,
      "step": 34263
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2761728763580322,
      "learning_rate": 0.0005678503023206037,
      "loss": 3.2357,
      "step": 34264
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.2075321674346924,
      "learning_rate": 0.0005678484599669926,
      "loss": 2.9101,
      "step": 34265
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.551919937133789,
      "learning_rate": 0.0005678466175635834,
      "loss": 2.9191,
      "step": 34266
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.1853578090667725,
      "learning_rate": 0.0005678447751103765,
      "loss": 3.6563,
      "step": 34267
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.370924711227417,
      "learning_rate": 0.000567842932607372,
      "loss": 3.0491,
      "step": 34268
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4744118452072144,
      "learning_rate": 0.0005678410900545706,
      "loss": 2.9161,
      "step": 34269
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3669129610061646,
      "learning_rate": 0.0005678392474519725,
      "loss": 2.9251,
      "step": 34270
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2715755701065063,
      "learning_rate": 0.000567837404799578,
      "loss": 3.1068,
      "step": 34271
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.722902774810791,
      "learning_rate": 0.0005678355620973876,
      "loss": 3.0669,
      "step": 34272
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.713647484779358,
      "learning_rate": 0.0005678337193454014,
      "loss": 3.0033,
      "step": 34273
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4324110746383667,
      "learning_rate": 0.0005678318765436198,
      "loss": 3.2873,
      "step": 34274
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5344904661178589,
      "learning_rate": 0.0005678300336920433,
      "loss": 2.9366,
      "step": 34275
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5197114944458008,
      "learning_rate": 0.000567828190790672,
      "loss": 3.0411,
      "step": 34276
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7034027576446533,
      "learning_rate": 0.0005678263478395066,
      "loss": 3.0708,
      "step": 34277
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6018407344818115,
      "learning_rate": 0.000567824504838547,
      "loss": 3.1506,
      "step": 34278
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.871285080909729,
      "learning_rate": 0.0005678226617877939,
      "loss": 3.0934,
      "step": 34279
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8878687620162964,
      "learning_rate": 0.0005678208186872475,
      "loss": 2.8795,
      "step": 34280
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5090018510818481,
      "learning_rate": 0.0005678189755369081,
      "loss": 3.1141,
      "step": 34281
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.74030339717865,
      "learning_rate": 0.0005678171323367762,
      "loss": 2.9898,
      "step": 34282
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6752233505249023,
      "learning_rate": 0.000567815289086852,
      "loss": 3.3259,
      "step": 34283
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.422690987586975,
      "learning_rate": 0.0005678134457871357,
      "loss": 3.039,
      "step": 34284
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3881672620773315,
      "learning_rate": 0.000567811602437628,
      "loss": 3.1228,
      "step": 34285
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5650885105133057,
      "learning_rate": 0.0005678097590383291,
      "loss": 3.2809,
      "step": 34286
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3447126150131226,
      "learning_rate": 0.0005678079155892393,
      "loss": 3.0191,
      "step": 34287
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7795408964157104,
      "learning_rate": 0.0005678060720903589,
      "loss": 3.2204,
      "step": 34288
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.119215488433838,
      "learning_rate": 0.0005678042285416883,
      "loss": 3.0115,
      "step": 34289
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8987820148468018,
      "learning_rate": 0.0005678023849432279,
      "loss": 3.0624,
      "step": 34290
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.801468014717102,
      "learning_rate": 0.0005678005412949778,
      "loss": 3.0777,
      "step": 34291
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.968628406524658,
      "learning_rate": 0.0005677986975969388,
      "loss": 3.2531,
      "step": 34292
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.118744134902954,
      "learning_rate": 0.0005677968538491107,
      "loss": 2.8593,
      "step": 34293
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.7293951511383057,
      "learning_rate": 0.0005677950100514943,
      "loss": 2.652,
      "step": 34294
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.696670413017273,
      "learning_rate": 0.0005677931662040897,
      "loss": 3.1554,
      "step": 34295
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5976943969726562,
      "learning_rate": 0.0005677913223068972,
      "loss": 3.2128,
      "step": 34296
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7311253547668457,
      "learning_rate": 0.0005677894783599174,
      "loss": 3.0443,
      "step": 34297
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.3038063049316406,
      "learning_rate": 0.0005677876343631504,
      "loss": 2.9058,
      "step": 34298
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5739197731018066,
      "learning_rate": 0.0005677857903165966,
      "loss": 3.3462,
      "step": 34299
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0364773273468018,
      "learning_rate": 0.0005677839462202563,
      "loss": 3.098,
      "step": 34300
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5561723709106445,
      "learning_rate": 0.00056778210207413,
      "loss": 2.891,
      "step": 34301
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2253434658050537,
      "learning_rate": 0.0005677802578782181,
      "loss": 3.1773,
      "step": 34302
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2758071422576904,
      "learning_rate": 0.0005677784136325205,
      "loss": 3.0129,
      "step": 34303
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6578395366668701,
      "learning_rate": 0.0005677765693370381,
      "loss": 3.0286,
      "step": 34304
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4695755243301392,
      "learning_rate": 0.0005677747249917708,
      "loss": 3.13,
      "step": 34305
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6422877311706543,
      "learning_rate": 0.0005677728805967192,
      "loss": 3.1601,
      "step": 34306
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.417962908744812,
      "learning_rate": 0.0005677710361518836,
      "loss": 2.9008,
      "step": 34307
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.532830834388733,
      "learning_rate": 0.0005677691916572641,
      "loss": 3.0505,
      "step": 34308
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.728482961654663,
      "learning_rate": 0.0005677673471128615,
      "loss": 3.154,
      "step": 34309
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2930092811584473,
      "learning_rate": 0.0005677655025186758,
      "loss": 2.9305,
      "step": 34310
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4829577207565308,
      "learning_rate": 0.0005677636578747075,
      "loss": 3.0259,
      "step": 34311
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7009668350219727,
      "learning_rate": 0.0005677618131809568,
      "loss": 3.0112,
      "step": 34312
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.504299283027649,
      "learning_rate": 0.0005677599684374241,
      "loss": 3.0114,
      "step": 34313
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6360658407211304,
      "learning_rate": 0.00056775812364411,
      "loss": 2.9256,
      "step": 34314
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.528401255607605,
      "learning_rate": 0.0005677562788010144,
      "loss": 3.0814,
      "step": 34315
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8507124185562134,
      "learning_rate": 0.0005677544339081378,
      "loss": 3.1094,
      "step": 34316
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0045909881591797,
      "learning_rate": 0.0005677525889654807,
      "loss": 3.1052,
      "step": 34317
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6909151077270508,
      "learning_rate": 0.0005677507439730433,
      "loss": 3.0919,
      "step": 34318
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.786760687828064,
      "learning_rate": 0.000567748898930826,
      "loss": 2.9421,
      "step": 34319
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5921143293380737,
      "learning_rate": 0.0005677470538388292,
      "loss": 2.8824,
      "step": 34320
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0276811122894287,
      "learning_rate": 0.000567745208697053,
      "loss": 2.9468,
      "step": 34321
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5293781757354736,
      "learning_rate": 0.0005677433635054981,
      "loss": 2.9556,
      "step": 34322
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9280939102172852,
      "learning_rate": 0.0005677415182641645,
      "loss": 2.9623,
      "step": 34323
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9355344772338867,
      "learning_rate": 0.0005677396729730529,
      "loss": 3.1452,
      "step": 34324
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5320427417755127,
      "learning_rate": 0.0005677378276321632,
      "loss": 3.1867,
      "step": 34325
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.926985502243042,
      "learning_rate": 0.0005677359822414961,
      "loss": 3.2548,
      "step": 34326
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.281667947769165,
      "learning_rate": 0.0005677341368010518,
      "loss": 2.9255,
      "step": 34327
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5504839420318604,
      "learning_rate": 0.0005677322913108308,
      "loss": 3.0067,
      "step": 34328
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9221264123916626,
      "learning_rate": 0.0005677304457708331,
      "loss": 3.1012,
      "step": 34329
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.3787882328033447,
      "learning_rate": 0.0005677286001810595,
      "loss": 3.144,
      "step": 34330
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.628778338432312,
      "learning_rate": 0.0005677267545415098,
      "loss": 2.8487,
      "step": 34331
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5103720426559448,
      "learning_rate": 0.0005677249088521848,
      "loss": 2.9448,
      "step": 34332
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.04211163520813,
      "learning_rate": 0.0005677230631130848,
      "loss": 3.1973,
      "step": 34333
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.7750658988952637,
      "learning_rate": 0.0005677212173242098,
      "loss": 3.08,
      "step": 34334
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.606335997581482,
      "learning_rate": 0.0005677193714855606,
      "loss": 3.0098,
      "step": 34335
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.469675302505493,
      "learning_rate": 0.0005677175255971371,
      "loss": 2.9822,
      "step": 34336
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.5958175659179688,
      "learning_rate": 0.00056771567965894,
      "loss": 3.0832,
      "step": 34337
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2437458038330078,
      "learning_rate": 0.0005677138336709695,
      "loss": 3.3645,
      "step": 34338
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.1864428520202637,
      "learning_rate": 0.0005677119876332259,
      "loss": 3.2547,
      "step": 34339
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3913733959197998,
      "learning_rate": 0.0005677101415457095,
      "loss": 3.3359,
      "step": 34340
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4765093326568604,
      "learning_rate": 0.0005677082954084209,
      "loss": 3.1914,
      "step": 34341
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4727874994277954,
      "learning_rate": 0.0005677064492213602,
      "loss": 3.0762,
      "step": 34342
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6597557067871094,
      "learning_rate": 0.0005677046029845278,
      "loss": 2.957,
      "step": 34343
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3164427280426025,
      "learning_rate": 0.0005677027566979242,
      "loss": 3.0186,
      "step": 34344
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7374240159988403,
      "learning_rate": 0.0005677009103615494,
      "loss": 3.0115,
      "step": 34345
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5964815616607666,
      "learning_rate": 0.000567699063975404,
      "loss": 2.9212,
      "step": 34346
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0201642513275146,
      "learning_rate": 0.0005676972175394883,
      "loss": 3.1668,
      "step": 34347
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0240836143493652,
      "learning_rate": 0.0005676953710538027,
      "loss": 2.8194,
      "step": 34348
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4479097127914429,
      "learning_rate": 0.0005676935245183475,
      "loss": 3.1469,
      "step": 34349
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.874436855316162,
      "learning_rate": 0.0005676916779331229,
      "loss": 3.0612,
      "step": 34350
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.285142421722412,
      "learning_rate": 0.0005676898312981294,
      "loss": 3.2055,
      "step": 34351
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5775583982467651,
      "learning_rate": 0.0005676879846133673,
      "loss": 3.089,
      "step": 34352
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2374939918518066,
      "learning_rate": 0.000567686137878837,
      "loss": 3.1635,
      "step": 34353
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.6559550762176514,
      "learning_rate": 0.0005676842910945387,
      "loss": 3.318,
      "step": 34354
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3404475450515747,
      "learning_rate": 0.000567682444260473,
      "loss": 3.1929,
      "step": 34355
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.586162567138672,
      "learning_rate": 0.0005676805973766399,
      "loss": 2.7889,
      "step": 34356
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7533020973205566,
      "learning_rate": 0.0005676787504430401,
      "loss": 3.0902,
      "step": 34357
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3223410844802856,
      "learning_rate": 0.0005676769034596736,
      "loss": 3.1023,
      "step": 34358
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5385206937789917,
      "learning_rate": 0.0005676750564265411,
      "loss": 2.9245,
      "step": 34359
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.521769642829895,
      "learning_rate": 0.0005676732093436426,
      "loss": 2.9243,
      "step": 34360
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.257529616355896,
      "learning_rate": 0.0005676713622109786,
      "loss": 3.0749,
      "step": 34361
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2687342166900635,
      "learning_rate": 0.0005676695150285496,
      "loss": 3.0513,
      "step": 34362
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3447625637054443,
      "learning_rate": 0.0005676676677963555,
      "loss": 2.9609,
      "step": 34363
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7966694831848145,
      "learning_rate": 0.0005676658205143972,
      "loss": 3.1172,
      "step": 34364
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5551797151565552,
      "learning_rate": 0.0005676639731826747,
      "loss": 3.0751,
      "step": 34365
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8880871534347534,
      "learning_rate": 0.0005676621258011883,
      "loss": 2.827,
      "step": 34366
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6106187105178833,
      "learning_rate": 0.0005676602783699385,
      "loss": 3.1578,
      "step": 34367
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.374022364616394,
      "learning_rate": 0.0005676584308889257,
      "loss": 3.2895,
      "step": 34368
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4872329235076904,
      "learning_rate": 0.00056765658335815,
      "loss": 2.8322,
      "step": 34369
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0189311504364014,
      "learning_rate": 0.000567654735777612,
      "loss": 2.9362,
      "step": 34370
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6702476739883423,
      "learning_rate": 0.000567652888147312,
      "loss": 3.0589,
      "step": 34371
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3573291301727295,
      "learning_rate": 0.0005676510404672502,
      "loss": 3.0751,
      "step": 34372
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5272407531738281,
      "learning_rate": 0.000567649192737427,
      "loss": 3.1458,
      "step": 34373
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7226407527923584,
      "learning_rate": 0.0005676473449578428,
      "loss": 3.0148,
      "step": 34374
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6885240077972412,
      "learning_rate": 0.0005676454971284977,
      "loss": 3.2373,
      "step": 34375
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6387426853179932,
      "learning_rate": 0.0005676436492493925,
      "loss": 3.257,
      "step": 34376
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7480084896087646,
      "learning_rate": 0.0005676418013205272,
      "loss": 3.1386,
      "step": 34377
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5778615474700928,
      "learning_rate": 0.0005676399533419023,
      "loss": 3.072,
      "step": 34378
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6294745206832886,
      "learning_rate": 0.0005676381053135181,
      "loss": 3.3366,
      "step": 34379
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8112635612487793,
      "learning_rate": 0.0005676362572353748,
      "loss": 2.935,
      "step": 34380
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7115212678909302,
      "learning_rate": 0.000567634409107473,
      "loss": 2.9152,
      "step": 34381
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4472767114639282,
      "learning_rate": 0.0005676325609298127,
      "loss": 2.9606,
      "step": 34382
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.586747169494629,
      "learning_rate": 0.0005676307127023947,
      "loss": 3.0456,
      "step": 34383
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5721430778503418,
      "learning_rate": 0.0005676288644252189,
      "loss": 3.2506,
      "step": 34384
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5630178451538086,
      "learning_rate": 0.000567627016098286,
      "loss": 3.1599,
      "step": 34385
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6688400506973267,
      "learning_rate": 0.000567625167721596,
      "loss": 2.88,
      "step": 34386
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3794881105422974,
      "learning_rate": 0.0005676233192951496,
      "loss": 3.2634,
      "step": 34387
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.406826972961426,
      "learning_rate": 0.0005676214708189469,
      "loss": 3.1493,
      "step": 34388
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7743072509765625,
      "learning_rate": 0.0005676196222929884,
      "loss": 2.9974,
      "step": 34389
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2814440727233887,
      "learning_rate": 0.0005676177737172742,
      "loss": 3.2004,
      "step": 34390
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.995206832885742,
      "learning_rate": 0.000567615925091805,
      "loss": 2.9649,
      "step": 34391
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4559460878372192,
      "learning_rate": 0.0005676140764165808,
      "loss": 3.2578,
      "step": 34392
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0178539752960205,
      "learning_rate": 0.0005676122276916022,
      "loss": 2.7941,
      "step": 34393
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.1129372119903564,
      "learning_rate": 0.0005676103789168693,
      "loss": 2.9614,
      "step": 34394
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4784162044525146,
      "learning_rate": 0.0005676085300923827,
      "loss": 3.1618,
      "step": 34395
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.2608892917633057,
      "learning_rate": 0.0005676066812181426,
      "loss": 2.9414,
      "step": 34396
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.381998896598816,
      "learning_rate": 0.0005676048322941494,
      "loss": 3.1403,
      "step": 34397
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1891227960586548,
      "learning_rate": 0.0005676029833204034,
      "loss": 2.9743,
      "step": 34398
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8574814796447754,
      "learning_rate": 0.0005676011342969048,
      "loss": 2.968,
      "step": 34399
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8174688816070557,
      "learning_rate": 0.0005675992852236542,
      "loss": 2.9436,
      "step": 34400
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3372241258621216,
      "learning_rate": 0.0005675974361006519,
      "loss": 3.1807,
      "step": 34401
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7217693328857422,
      "learning_rate": 0.0005675955869278982,
      "loss": 3.0397,
      "step": 34402
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4527095556259155,
      "learning_rate": 0.0005675937377053932,
      "loss": 3.4725,
      "step": 34403
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5526604652404785,
      "learning_rate": 0.0005675918884331377,
      "loss": 3.1823,
      "step": 34404
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5892013311386108,
      "learning_rate": 0.0005675900391111317,
      "loss": 3.4576,
      "step": 34405
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8549882173538208,
      "learning_rate": 0.0005675881897393757,
      "loss": 2.8301,
      "step": 34406
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7419689893722534,
      "learning_rate": 0.0005675863403178702,
      "loss": 2.9891,
      "step": 34407
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8532980680465698,
      "learning_rate": 0.0005675844908466151,
      "loss": 2.8678,
      "step": 34408
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6659048795700073,
      "learning_rate": 0.0005675826413256111,
      "loss": 2.9922,
      "step": 34409
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.605324387550354,
      "learning_rate": 0.0005675807917548582,
      "loss": 3.1144,
      "step": 34410
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5181691646575928,
      "learning_rate": 0.0005675789421343572,
      "loss": 3.2303,
      "step": 34411
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9678304195404053,
      "learning_rate": 0.0005675770924641083,
      "loss": 3.1448,
      "step": 34412
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.5568199157714844,
      "learning_rate": 0.0005675752427441117,
      "loss": 3.1964,
      "step": 34413
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.837605357170105,
      "learning_rate": 0.0005675733929743677,
      "loss": 3.0343,
      "step": 34414
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.46280837059021,
      "learning_rate": 0.0005675715431548768,
      "loss": 2.8933,
      "step": 34415
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.1900441646575928,
      "learning_rate": 0.0005675696932856394,
      "loss": 3.08,
      "step": 34416
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5068154335021973,
      "learning_rate": 0.0005675678433666556,
      "loss": 3.1049,
      "step": 34417
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5548102855682373,
      "learning_rate": 0.000567565993397926,
      "loss": 3.1713,
      "step": 34418
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.44379723072052,
      "learning_rate": 0.0005675641433794508,
      "loss": 3.2168,
      "step": 34419
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9265282154083252,
      "learning_rate": 0.0005675622933112304,
      "loss": 3.0502,
      "step": 34420
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3300269842147827,
      "learning_rate": 0.0005675604431932651,
      "loss": 3.1634,
      "step": 34421
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.1816635131835938,
      "learning_rate": 0.0005675585930255552,
      "loss": 3.1883,
      "step": 34422
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5072599649429321,
      "learning_rate": 0.0005675567428081011,
      "loss": 3.1289,
      "step": 34423
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5855172872543335,
      "learning_rate": 0.0005675548925409032,
      "loss": 3.2384,
      "step": 34424
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.957778811454773,
      "learning_rate": 0.0005675530422239619,
      "loss": 2.9911,
      "step": 34425
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.853991746902466,
      "learning_rate": 0.0005675511918572774,
      "loss": 2.9987,
      "step": 34426
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8329187631607056,
      "learning_rate": 0.00056754934144085,
      "loss": 3.1917,
      "step": 34427
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.777188777923584,
      "learning_rate": 0.0005675474909746801,
      "loss": 3.0787,
      "step": 34428
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7841877937316895,
      "learning_rate": 0.0005675456404587681,
      "loss": 3.1096,
      "step": 34429
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7752697467803955,
      "learning_rate": 0.0005675437898931143,
      "loss": 3.3117,
      "step": 34430
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2670724391937256,
      "learning_rate": 0.0005675419392777192,
      "loss": 2.9539,
      "step": 34431
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8550193309783936,
      "learning_rate": 0.0005675400886125828,
      "loss": 2.9939,
      "step": 34432
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6128703355789185,
      "learning_rate": 0.0005675382378977059,
      "loss": 3.0152,
      "step": 34433
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.323440432548523,
      "learning_rate": 0.0005675363871330884,
      "loss": 2.9572,
      "step": 34434
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5703985691070557,
      "learning_rate": 0.0005675345363187308,
      "loss": 2.9961,
      "step": 34435
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4200105667114258,
      "learning_rate": 0.0005675326854546336,
      "loss": 2.9738,
      "step": 34436
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5461064577102661,
      "learning_rate": 0.000567530834540797,
      "loss": 3.0631,
      "step": 34437
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5347024202346802,
      "learning_rate": 0.0005675289835772213,
      "loss": 3.2543,
      "step": 34438
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5623234510421753,
      "learning_rate": 0.000567527132563907,
      "loss": 3.2316,
      "step": 34439
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2037017345428467,
      "learning_rate": 0.0005675252815008543,
      "loss": 3.0529,
      "step": 34440
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9264259338378906,
      "learning_rate": 0.0005675234303880636,
      "loss": 3.2555,
      "step": 34441
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4687594175338745,
      "learning_rate": 0.0005675215792255352,
      "loss": 3.0616,
      "step": 34442
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6974982023239136,
      "learning_rate": 0.0005675197280132696,
      "loss": 3.1007,
      "step": 34443
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5874273777008057,
      "learning_rate": 0.000567517876751267,
      "loss": 3.0266,
      "step": 34444
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8620641231536865,
      "learning_rate": 0.0005675160254395278,
      "loss": 2.8975,
      "step": 34445
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2856756448745728,
      "learning_rate": 0.0005675141740780523,
      "loss": 3.0828,
      "step": 34446
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.337863564491272,
      "learning_rate": 0.0005675123226668408,
      "loss": 3.0299,
      "step": 34447
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3128076791763306,
      "learning_rate": 0.0005675104712058938,
      "loss": 3.076,
      "step": 34448
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.629709243774414,
      "learning_rate": 0.0005675086196952114,
      "loss": 2.9179,
      "step": 34449
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9297488927841187,
      "learning_rate": 0.0005675067681347942,
      "loss": 3.1165,
      "step": 34450
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8442579507827759,
      "learning_rate": 0.0005675049165246425,
      "loss": 3.025,
      "step": 34451
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.900343894958496,
      "learning_rate": 0.0005675030648647565,
      "loss": 3.2243,
      "step": 34452
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6765409708023071,
      "learning_rate": 0.0005675012131551366,
      "loss": 2.9991,
      "step": 34453
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.069418430328369,
      "learning_rate": 0.0005674993613957834,
      "loss": 3.2005,
      "step": 34454
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.616408348083496,
      "learning_rate": 0.0005674975095866968,
      "loss": 3.1785,
      "step": 34455
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7161320447921753,
      "learning_rate": 0.0005674956577278774,
      "loss": 3.1479,
      "step": 34456
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3378543853759766,
      "learning_rate": 0.0005674938058193255,
      "loss": 3.2643,
      "step": 34457
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.662554144859314,
      "learning_rate": 0.0005674919538610415,
      "loss": 3.0404,
      "step": 34458
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3628495931625366,
      "learning_rate": 0.0005674901018530257,
      "loss": 3.1341,
      "step": 34459
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3527377843856812,
      "learning_rate": 0.0005674882497952784,
      "loss": 3.3492,
      "step": 34460
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4227231740951538,
      "learning_rate": 0.0005674863976878,
      "loss": 3.255,
      "step": 34461
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7143890857696533,
      "learning_rate": 0.0005674845455305909,
      "loss": 2.908,
      "step": 34462
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2956594228744507,
      "learning_rate": 0.0005674826933236512,
      "loss": 3.0991,
      "step": 34463
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.71110999584198,
      "learning_rate": 0.0005674808410669816,
      "loss": 2.8901,
      "step": 34464
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2551426887512207,
      "learning_rate": 0.0005674789887605821,
      "loss": 3.1486,
      "step": 34465
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4355812072753906,
      "learning_rate": 0.0005674771364044534,
      "loss": 3.0599,
      "step": 34466
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2527071237564087,
      "learning_rate": 0.0005674752839985955,
      "loss": 3.1113,
      "step": 34467
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8043384552001953,
      "learning_rate": 0.000567473431543009,
      "loss": 3.2732,
      "step": 34468
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4251805543899536,
      "learning_rate": 0.000567471579037694,
      "loss": 3.2134,
      "step": 34469
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2625046968460083,
      "learning_rate": 0.0005674697264826511,
      "loss": 3.2326,
      "step": 34470
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.406485676765442,
      "learning_rate": 0.0005674678738778805,
      "loss": 3.0649,
      "step": 34471
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.740604281425476,
      "learning_rate": 0.0005674660212233826,
      "loss": 2.8169,
      "step": 34472
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4416546821594238,
      "learning_rate": 0.0005674641685191577,
      "loss": 2.9756,
      "step": 34473
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.271527886390686,
      "learning_rate": 0.0005674623157652061,
      "loss": 2.9219,
      "step": 34474
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.600278615951538,
      "learning_rate": 0.0005674604629615283,
      "loss": 3.0018,
      "step": 34475
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8439934253692627,
      "learning_rate": 0.0005674586101081245,
      "loss": 3.1619,
      "step": 34476
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7700039148330688,
      "learning_rate": 0.000567456757204995,
      "loss": 3.3546,
      "step": 34477
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6542341709136963,
      "learning_rate": 0.0005674549042521404,
      "loss": 3.2172,
      "step": 34478
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6376017332077026,
      "learning_rate": 0.0005674530512495609,
      "loss": 3.1004,
      "step": 34479
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5160326957702637,
      "learning_rate": 0.0005674511981972567,
      "loss": 3.0125,
      "step": 34480
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.588504433631897,
      "learning_rate": 0.0005674493450952284,
      "loss": 3.2171,
      "step": 34481
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4348267316818237,
      "learning_rate": 0.0005674474919434761,
      "loss": 3.0503,
      "step": 34482
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2997877597808838,
      "learning_rate": 0.0005674456387420003,
      "loss": 2.9318,
      "step": 34483
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8709502220153809,
      "learning_rate": 0.0005674437854908012,
      "loss": 3.2314,
      "step": 34484
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.4417471885681152,
      "learning_rate": 0.0005674419321898795,
      "loss": 3.2725,
      "step": 34485
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.461033821105957,
      "learning_rate": 0.0005674400788392352,
      "loss": 3.0,
      "step": 34486
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7707399129867554,
      "learning_rate": 0.0005674382254388687,
      "loss": 3.2363,
      "step": 34487
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.570559024810791,
      "learning_rate": 0.0005674363719887803,
      "loss": 2.9659,
      "step": 34488
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3652520179748535,
      "learning_rate": 0.0005674345184889706,
      "loss": 3.1384,
      "step": 34489
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.652886152267456,
      "learning_rate": 0.0005674326649394396,
      "loss": 3.2405,
      "step": 34490
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0300562381744385,
      "learning_rate": 0.000567430811340188,
      "loss": 2.9814,
      "step": 34491
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7730375528335571,
      "learning_rate": 0.000567428957691216,
      "loss": 2.9245,
      "step": 34492
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.535240888595581,
      "learning_rate": 0.0005674271039925237,
      "loss": 3.2028,
      "step": 34493
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.056509017944336,
      "learning_rate": 0.0005674252502441118,
      "loss": 3.0164,
      "step": 34494
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5933003425598145,
      "learning_rate": 0.0005674233964459804,
      "loss": 2.8097,
      "step": 34495
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.3893203735351562,
      "learning_rate": 0.00056742154259813,
      "loss": 3.1702,
      "step": 34496
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2091469764709473,
      "learning_rate": 0.0005674196887005609,
      "loss": 2.9258,
      "step": 34497
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7031745910644531,
      "learning_rate": 0.0005674178347532733,
      "loss": 3.1817,
      "step": 34498
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3694809675216675,
      "learning_rate": 0.0005674159807562678,
      "loss": 3.0052,
      "step": 34499
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6042406558990479,
      "learning_rate": 0.0005674141267095446,
      "loss": 3.0186,
      "step": 34500
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3419413566589355,
      "learning_rate": 0.0005674122726131041,
      "loss": 2.8958,
      "step": 34501
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5973554849624634,
      "learning_rate": 0.0005674104184669466,
      "loss": 2.8112,
      "step": 34502
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.897130012512207,
      "learning_rate": 0.0005674085642710724,
      "loss": 2.6218,
      "step": 34503
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4781299829483032,
      "learning_rate": 0.000567406710025482,
      "loss": 3.0421,
      "step": 34504
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9505939483642578,
      "learning_rate": 0.0005674048557301756,
      "loss": 3.084,
      "step": 34505
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.520830512046814,
      "learning_rate": 0.0005674030013851535,
      "loss": 2.9361,
      "step": 34506
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4904770851135254,
      "learning_rate": 0.0005674011469904162,
      "loss": 3.1005,
      "step": 34507
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2165873050689697,
      "learning_rate": 0.000567399292545964,
      "loss": 3.1443,
      "step": 34508
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.539682149887085,
      "learning_rate": 0.0005673974380517972,
      "loss": 3.0394,
      "step": 34509
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.489667534828186,
      "learning_rate": 0.0005673955835079162,
      "loss": 3.0622,
      "step": 34510
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8291021585464478,
      "learning_rate": 0.0005673937289143213,
      "loss": 3.0129,
      "step": 34511
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6102465391159058,
      "learning_rate": 0.0005673918742710128,
      "loss": 2.9262,
      "step": 34512
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6141563653945923,
      "learning_rate": 0.0005673900195779912,
      "loss": 3.1075,
      "step": 34513
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5996729135513306,
      "learning_rate": 0.0005673881648352566,
      "loss": 3.1719,
      "step": 34514
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.7392640113830566,
      "learning_rate": 0.0005673863100428097,
      "loss": 3.178,
      "step": 34515
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9601402282714844,
      "learning_rate": 0.0005673844552006505,
      "loss": 3.1431,
      "step": 34516
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.445312261581421,
      "learning_rate": 0.0005673826003087795,
      "loss": 2.9825,
      "step": 34517
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.620274305343628,
      "learning_rate": 0.000567380745367197,
      "loss": 3.1595,
      "step": 34518
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.5894949436187744,
      "learning_rate": 0.0005673788903759034,
      "loss": 3.2528,
      "step": 34519
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9808951616287231,
      "learning_rate": 0.0005673770353348991,
      "loss": 3.0379,
      "step": 34520
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.796607494354248,
      "learning_rate": 0.0005673751802441843,
      "loss": 3.0166,
      "step": 34521
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9513435363769531,
      "learning_rate": 0.0005673733251037594,
      "loss": 3.1244,
      "step": 34522
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2650779485702515,
      "learning_rate": 0.0005673714699136247,
      "loss": 3.2362,
      "step": 34523
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4360352754592896,
      "learning_rate": 0.0005673696146737807,
      "loss": 3.0366,
      "step": 34524
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6027494668960571,
      "learning_rate": 0.0005673677593842275,
      "loss": 3.2037,
      "step": 34525
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6101551055908203,
      "learning_rate": 0.0005673659040449657,
      "loss": 3.0715,
      "step": 34526
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2550193071365356,
      "learning_rate": 0.0005673640486559956,
      "loss": 3.088,
      "step": 34527
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3332874774932861,
      "learning_rate": 0.0005673621932173173,
      "loss": 2.9114,
      "step": 34528
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4746403694152832,
      "learning_rate": 0.0005673603377289315,
      "loss": 2.9558,
      "step": 34529
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.774919033050537,
      "learning_rate": 0.0005673584821908382,
      "loss": 3.0837,
      "step": 34530
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4586496353149414,
      "learning_rate": 0.000567356626603038,
      "loss": 3.0924,
      "step": 34531
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5928925275802612,
      "learning_rate": 0.0005673547709655312,
      "loss": 3.0194,
      "step": 34532
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5590602159500122,
      "learning_rate": 0.000567352915278318,
      "loss": 3.3329,
      "step": 34533
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1439452171325684,
      "learning_rate": 0.0005673510595413988,
      "loss": 3.064,
      "step": 34534
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9270195960998535,
      "learning_rate": 0.0005673492037547741,
      "loss": 3.1836,
      "step": 34535
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.169579029083252,
      "learning_rate": 0.0005673473479184441,
      "loss": 2.9582,
      "step": 34536
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.543617010116577,
      "learning_rate": 0.0005673454920324092,
      "loss": 2.8634,
      "step": 34537
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7807843685150146,
      "learning_rate": 0.0005673436360966697,
      "loss": 3.0944,
      "step": 34538
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5706983804702759,
      "learning_rate": 0.000567341780111226,
      "loss": 2.9342,
      "step": 34539
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3836705684661865,
      "learning_rate": 0.0005673399240760785,
      "loss": 3.1885,
      "step": 34540
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.043386697769165,
      "learning_rate": 0.0005673380679912274,
      "loss": 3.0281,
      "step": 34541
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8196029663085938,
      "learning_rate": 0.000567336211856673,
      "loss": 2.9117,
      "step": 34542
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7308651208877563,
      "learning_rate": 0.0005673343556724158,
      "loss": 3.1215,
      "step": 34543
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.51376211643219,
      "learning_rate": 0.0005673324994384561,
      "loss": 3.1656,
      "step": 34544
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.409779667854309,
      "learning_rate": 0.0005673306431547944,
      "loss": 3.3223,
      "step": 34545
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3575632572174072,
      "learning_rate": 0.0005673287868214307,
      "loss": 2.9444,
      "step": 34546
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.753771424293518,
      "learning_rate": 0.0005673269304383655,
      "loss": 3.1042,
      "step": 34547
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3841590881347656,
      "learning_rate": 0.0005673250740055993,
      "loss": 2.797,
      "step": 34548
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.5185937881469727,
      "learning_rate": 0.0005673232175231323,
      "loss": 3.2155,
      "step": 34549
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.975691795349121,
      "learning_rate": 0.0005673213609909648,
      "loss": 2.9529,
      "step": 34550
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2113924026489258,
      "learning_rate": 0.0005673195044090973,
      "loss": 3.0396,
      "step": 34551
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.891568899154663,
      "learning_rate": 0.0005673176477775301,
      "loss": 3.2548,
      "step": 34552
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.1249871253967285,
      "learning_rate": 0.0005673157910962635,
      "loss": 3.0759,
      "step": 34553
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.774312138557434,
      "learning_rate": 0.0005673139343652978,
      "loss": 3.467,
      "step": 34554
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.932007312774658,
      "learning_rate": 0.0005673120775846333,
      "loss": 3.2282,
      "step": 34555
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0134546756744385,
      "learning_rate": 0.0005673102207542705,
      "loss": 2.9282,
      "step": 34556
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9156882762908936,
      "learning_rate": 0.0005673083638742097,
      "loss": 3.1757,
      "step": 34557
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.3040831089019775,
      "learning_rate": 0.0005673065069444512,
      "loss": 3.11,
      "step": 34558
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.044970989227295,
      "learning_rate": 0.0005673046499649954,
      "loss": 3.4631,
      "step": 34559
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7955156564712524,
      "learning_rate": 0.0005673027929358426,
      "loss": 3.0222,
      "step": 34560
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.498800277709961,
      "learning_rate": 0.0005673009358569932,
      "loss": 3.0779,
      "step": 34561
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.5854499340057373,
      "learning_rate": 0.0005672990787284474,
      "loss": 3.32,
      "step": 34562
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.7350854873657227,
      "learning_rate": 0.0005672972215502058,
      "loss": 2.9034,
      "step": 34563
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.394641160964966,
      "learning_rate": 0.0005672953643222685,
      "loss": 3.1202,
      "step": 34564
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.373013973236084,
      "learning_rate": 0.0005672935070446361,
      "loss": 3.0984,
      "step": 34565
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.554241895675659,
      "learning_rate": 0.0005672916497173086,
      "loss": 3.0295,
      "step": 34566
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.551203727722168,
      "learning_rate": 0.0005672897923402867,
      "loss": 3.0281,
      "step": 34567
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4961202144622803,
      "learning_rate": 0.0005672879349135705,
      "loss": 3.2871,
      "step": 34568
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3480396270751953,
      "learning_rate": 0.0005672860774371604,
      "loss": 3.2686,
      "step": 34569
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5498321056365967,
      "learning_rate": 0.0005672842199110569,
      "loss": 2.9972,
      "step": 34570
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0910091400146484,
      "learning_rate": 0.0005672823623352601,
      "loss": 3.2086,
      "step": 34571
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6738168001174927,
      "learning_rate": 0.0005672805047097705,
      "loss": 2.9445,
      "step": 34572
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.8174707889556885,
      "learning_rate": 0.0005672786470345884,
      "loss": 2.9704,
      "step": 34573
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4205166101455688,
      "learning_rate": 0.0005672767893097141,
      "loss": 3.218,
      "step": 34574
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.902971625328064,
      "learning_rate": 0.000567274931535148,
      "loss": 2.8401,
      "step": 34575
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6207791566848755,
      "learning_rate": 0.0005672730737108906,
      "loss": 3.0879,
      "step": 34576
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5018318891525269,
      "learning_rate": 0.0005672712158369419,
      "loss": 3.0438,
      "step": 34577
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7943371534347534,
      "learning_rate": 0.0005672693579133026,
      "loss": 3.3772,
      "step": 34578
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.57914137840271,
      "learning_rate": 0.0005672674999399728,
      "loss": 3.0709,
      "step": 34579
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.697039842605591,
      "learning_rate": 0.0005672656419169529,
      "loss": 3.0101,
      "step": 34580
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7427043914794922,
      "learning_rate": 0.0005672637838442433,
      "loss": 2.898,
      "step": 34581
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0173113346099854,
      "learning_rate": 0.0005672619257218443,
      "loss": 3.2163,
      "step": 34582
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.671440362930298,
      "learning_rate": 0.0005672600675497564,
      "loss": 3.028,
      "step": 34583
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.554439663887024,
      "learning_rate": 0.0005672582093279796,
      "loss": 2.894,
      "step": 34584
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6918946504592896,
      "learning_rate": 0.0005672563510565146,
      "loss": 3.0774,
      "step": 34585
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.835541009902954,
      "learning_rate": 0.0005672544927353615,
      "loss": 3.1587,
      "step": 34586
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.1303446292877197,
      "learning_rate": 0.0005672526343645208,
      "loss": 3.0797,
      "step": 34587
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.1981003284454346,
      "learning_rate": 0.0005672507759439928,
      "loss": 3.1546,
      "step": 34588
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.563690185546875,
      "learning_rate": 0.0005672489174737777,
      "loss": 3.0515,
      "step": 34589
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0783793926239014,
      "learning_rate": 0.0005672470589538762,
      "loss": 3.0312,
      "step": 34590
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.647409200668335,
      "learning_rate": 0.0005672452003842883,
      "loss": 3.258,
      "step": 34591
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4012256860733032,
      "learning_rate": 0.0005672433417650146,
      "loss": 3.1217,
      "step": 34592
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.361295223236084,
      "learning_rate": 0.0005672414830960551,
      "loss": 2.8742,
      "step": 34593
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9549872875213623,
      "learning_rate": 0.0005672396243774105,
      "loss": 3.0201,
      "step": 34594
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7729142904281616,
      "learning_rate": 0.000567237765609081,
      "loss": 3.0434,
      "step": 34595
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4209275245666504,
      "learning_rate": 0.000567235906791067,
      "loss": 3.3064,
      "step": 34596
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.1950132846832275,
      "learning_rate": 0.0005672340479233687,
      "loss": 2.972,
      "step": 34597
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6258301734924316,
      "learning_rate": 0.0005672321890059866,
      "loss": 2.8211,
      "step": 34598
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3914998769760132,
      "learning_rate": 0.000567230330038921,
      "loss": 2.9905,
      "step": 34599
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.616346836090088,
      "learning_rate": 0.0005672284710221722,
      "loss": 2.9937,
      "step": 34600
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8981959819793701,
      "learning_rate": 0.0005672266119557406,
      "loss": 2.8403,
      "step": 34601
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6414225101470947,
      "learning_rate": 0.0005672247528396265,
      "loss": 3.1661,
      "step": 34602
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5599067211151123,
      "learning_rate": 0.0005672228936738304,
      "loss": 3.0569,
      "step": 34603
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9231337308883667,
      "learning_rate": 0.0005672210344583524,
      "loss": 3.0081,
      "step": 34604
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5069687366485596,
      "learning_rate": 0.000567219175193193,
      "loss": 3.165,
      "step": 34605
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.538466453552246,
      "learning_rate": 0.0005672173158783525,
      "loss": 2.8795,
      "step": 34606
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.812617301940918,
      "learning_rate": 0.0005672154565138312,
      "loss": 3.0872,
      "step": 34607
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7209882736206055,
      "learning_rate": 0.0005672135970996296,
      "loss": 3.0521,
      "step": 34608
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.9686837196350098,
      "learning_rate": 0.0005672117376357479,
      "loss": 2.8669,
      "step": 34609
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9672973155975342,
      "learning_rate": 0.0005672098781221866,
      "loss": 3.1398,
      "step": 34610
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4641542434692383,
      "learning_rate": 0.0005672080185589458,
      "loss": 3.1448,
      "step": 34611
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4446594715118408,
      "learning_rate": 0.0005672061589460259,
      "loss": 3.127,
      "step": 34612
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5205613374710083,
      "learning_rate": 0.0005672042992834275,
      "loss": 3.3579,
      "step": 34613
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7713030576705933,
      "learning_rate": 0.0005672024395711508,
      "loss": 2.9887,
      "step": 34614
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.822946548461914,
      "learning_rate": 0.0005672005798091961,
      "loss": 2.8402,
      "step": 34615
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6282957792282104,
      "learning_rate": 0.0005671987199975636,
      "loss": 2.8978,
      "step": 34616
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6338425874710083,
      "learning_rate": 0.0005671968601362541,
      "loss": 3.1131,
      "step": 34617
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5932419300079346,
      "learning_rate": 0.0005671950002252674,
      "loss": 2.9809,
      "step": 34618
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.481272578239441,
      "learning_rate": 0.0005671931402646042,
      "loss": 2.9754,
      "step": 34619
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.718762993812561,
      "learning_rate": 0.0005671912802542648,
      "loss": 3.0178,
      "step": 34620
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7670011520385742,
      "learning_rate": 0.0005671894201942495,
      "loss": 3.1257,
      "step": 34621
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5370038747787476,
      "learning_rate": 0.0005671875600845585,
      "loss": 2.882,
      "step": 34622
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5713456869125366,
      "learning_rate": 0.0005671856999251925,
      "loss": 3.1752,
      "step": 34623
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7242786884307861,
      "learning_rate": 0.0005671838397161515,
      "loss": 3.1044,
      "step": 34624
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5257432460784912,
      "learning_rate": 0.0005671819794574359,
      "loss": 3.0505,
      "step": 34625
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6272222995758057,
      "learning_rate": 0.0005671801191490463,
      "loss": 3.0649,
      "step": 34626
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3714572191238403,
      "learning_rate": 0.0005671782587909828,
      "loss": 3.2101,
      "step": 34627
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8685410022735596,
      "learning_rate": 0.0005671763983832458,
      "loss": 3.1496,
      "step": 34628
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5217925310134888,
      "learning_rate": 0.0005671745379258356,
      "loss": 3.3648,
      "step": 34629
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9880291223526,
      "learning_rate": 0.0005671726774187528,
      "loss": 3.0395,
      "step": 34630
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.681843876838684,
      "learning_rate": 0.0005671708168619974,
      "loss": 3.3255,
      "step": 34631
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.627623438835144,
      "learning_rate": 0.00056716895625557,
      "loss": 3.1407,
      "step": 34632
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.4729411602020264,
      "learning_rate": 0.0005671670955994709,
      "loss": 3.3071,
      "step": 34633
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.962241530418396,
      "learning_rate": 0.0005671652348937002,
      "loss": 3.1136,
      "step": 34634
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6141548156738281,
      "learning_rate": 0.0005671633741382585,
      "loss": 3.219,
      "step": 34635
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.350694179534912,
      "learning_rate": 0.0005671615133331461,
      "loss": 2.8738,
      "step": 34636
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7093806266784668,
      "learning_rate": 0.0005671596524783635,
      "loss": 2.8922,
      "step": 34637
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3579386472702026,
      "learning_rate": 0.0005671577915739107,
      "loss": 3.113,
      "step": 34638
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0483880043029785,
      "learning_rate": 0.0005671559306197882,
      "loss": 3.1714,
      "step": 34639
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4432538747787476,
      "learning_rate": 0.0005671540696159964,
      "loss": 3.2778,
      "step": 34640
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7429349422454834,
      "learning_rate": 0.0005671522085625356,
      "loss": 3.084,
      "step": 34641
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0734078884124756,
      "learning_rate": 0.0005671503474594063,
      "loss": 3.0762,
      "step": 34642
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6410058736801147,
      "learning_rate": 0.0005671484863066087,
      "loss": 3.0457,
      "step": 34643
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.589883804321289,
      "learning_rate": 0.000567146625104143,
      "loss": 3.2999,
      "step": 34644
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5180854797363281,
      "learning_rate": 0.0005671447638520098,
      "loss": 2.998,
      "step": 34645
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4251418113708496,
      "learning_rate": 0.0005671429025502092,
      "loss": 2.9023,
      "step": 34646
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5654449462890625,
      "learning_rate": 0.0005671410411987419,
      "loss": 2.833,
      "step": 34647
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.350895643234253,
      "learning_rate": 0.0005671391797976078,
      "loss": 3.0253,
      "step": 34648
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8152540922164917,
      "learning_rate": 0.0005671373183468077,
      "loss": 3.3275,
      "step": 34649
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4330177307128906,
      "learning_rate": 0.0005671354568463417,
      "loss": 3.0785,
      "step": 34650
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0379767417907715,
      "learning_rate": 0.00056713359529621,
      "loss": 2.9973,
      "step": 34651
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8592535257339478,
      "learning_rate": 0.0005671317336964133,
      "loss": 2.9232,
      "step": 34652
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.594211459159851,
      "learning_rate": 0.0005671298720469518,
      "loss": 2.7933,
      "step": 34653
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.251936435699463,
      "learning_rate": 0.0005671280103478256,
      "loss": 3.2088,
      "step": 34654
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.607418417930603,
      "learning_rate": 0.0005671261485990355,
      "loss": 3.0129,
      "step": 34655
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3584154844284058,
      "learning_rate": 0.0005671242868005815,
      "loss": 3.2721,
      "step": 34656
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5302926301956177,
      "learning_rate": 0.000567122424952464,
      "loss": 3.1246,
      "step": 34657
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5264675617218018,
      "learning_rate": 0.0005671205630546834,
      "loss": 2.8253,
      "step": 34658
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4536067247390747,
      "learning_rate": 0.0005671187011072401,
      "loss": 2.8536,
      "step": 34659
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9638605117797852,
      "learning_rate": 0.0005671168391101343,
      "loss": 3.1252,
      "step": 34660
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6000759601593018,
      "learning_rate": 0.0005671149770633667,
      "loss": 3.0843,
      "step": 34661
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7086553573608398,
      "learning_rate": 0.0005671131149669372,
      "loss": 3.1815,
      "step": 34662
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.982031226158142,
      "learning_rate": 0.0005671112528208463,
      "loss": 3.1517,
      "step": 34663
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5166858434677124,
      "learning_rate": 0.0005671093906250944,
      "loss": 3.0549,
      "step": 34664
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5220566987991333,
      "learning_rate": 0.0005671075283796819,
      "loss": 2.9822,
      "step": 34665
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3921936750411987,
      "learning_rate": 0.000567105666084609,
      "loss": 3.1776,
      "step": 34666
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.612762689590454,
      "learning_rate": 0.0005671038037398761,
      "loss": 2.9898,
      "step": 34667
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.726021409034729,
      "learning_rate": 0.0005671019413454835,
      "loss": 2.9366,
      "step": 34668
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5277708768844604,
      "learning_rate": 0.0005671000789014318,
      "loss": 3.0212,
      "step": 34669
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.247515320777893,
      "learning_rate": 0.000567098216407721,
      "loss": 2.9532,
      "step": 34670
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4245561361312866,
      "learning_rate": 0.0005670963538643517,
      "loss": 3.0434,
      "step": 34671
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7316383123397827,
      "learning_rate": 0.0005670944912713239,
      "loss": 2.9099,
      "step": 34672
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9642595052719116,
      "learning_rate": 0.0005670926286286385,
      "loss": 2.8789,
      "step": 34673
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.01088285446167,
      "learning_rate": 0.0005670907659362954,
      "loss": 2.8035,
      "step": 34674
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4590858221054077,
      "learning_rate": 0.0005670889031942952,
      "loss": 2.9709,
      "step": 34675
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2450257539749146,
      "learning_rate": 0.0005670870404026379,
      "loss": 2.9671,
      "step": 34676
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9107980728149414,
      "learning_rate": 0.0005670851775613242,
      "loss": 2.9156,
      "step": 34677
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6711885929107666,
      "learning_rate": 0.0005670833146703544,
      "loss": 2.8731,
      "step": 34678
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6889991760253906,
      "learning_rate": 0.0005670814517297287,
      "loss": 3.1969,
      "step": 34679
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6287407875061035,
      "learning_rate": 0.0005670795887394474,
      "loss": 3.1674,
      "step": 34680
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6013914346694946,
      "learning_rate": 0.0005670777256995111,
      "loss": 3.1514,
      "step": 34681
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5908937454223633,
      "learning_rate": 0.0005670758626099201,
      "loss": 3.1495,
      "step": 34682
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6694848537445068,
      "learning_rate": 0.0005670739994706745,
      "loss": 3.1107,
      "step": 34683
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.1740710735321045,
      "learning_rate": 0.0005670721362817749,
      "loss": 2.9503,
      "step": 34684
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5129464864730835,
      "learning_rate": 0.0005670702730432215,
      "loss": 3.0662,
      "step": 34685
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6406629085540771,
      "learning_rate": 0.0005670684097550146,
      "loss": 3.0282,
      "step": 34686
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.1720995903015137,
      "learning_rate": 0.0005670665464171548,
      "loss": 3.0388,
      "step": 34687
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.676269292831421,
      "learning_rate": 0.0005670646830296422,
      "loss": 2.9189,
      "step": 34688
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.524793028831482,
      "learning_rate": 0.0005670628195924772,
      "loss": 3.2446,
      "step": 34689
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4025890827178955,
      "learning_rate": 0.0005670609561056603,
      "loss": 3.3594,
      "step": 34690
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6709274053573608,
      "learning_rate": 0.0005670590925691916,
      "loss": 2.9153,
      "step": 34691
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5763969421386719,
      "learning_rate": 0.0005670572289830717,
      "loss": 3.328,
      "step": 34692
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5444514751434326,
      "learning_rate": 0.0005670553653473007,
      "loss": 3.237,
      "step": 34693
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3897409439086914,
      "learning_rate": 0.0005670535016618792,
      "loss": 3.0693,
      "step": 34694
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5650988817214966,
      "learning_rate": 0.0005670516379268072,
      "loss": 3.1013,
      "step": 34695
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8527480363845825,
      "learning_rate": 0.0005670497741420855,
      "loss": 3.0368,
      "step": 34696
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6487228870391846,
      "learning_rate": 0.0005670479103077142,
      "loss": 3.0663,
      "step": 34697
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3341580629348755,
      "learning_rate": 0.0005670460464236934,
      "loss": 3.0174,
      "step": 34698
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6215256452560425,
      "learning_rate": 0.0005670441824900239,
      "loss": 2.678,
      "step": 34699
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.655993938446045,
      "learning_rate": 0.0005670423185067058,
      "loss": 2.7238,
      "step": 34700
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4966963529586792,
      "learning_rate": 0.0005670404544737396,
      "loss": 3.0488,
      "step": 34701
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0397706031799316,
      "learning_rate": 0.0005670385903911254,
      "loss": 3.1992,
      "step": 34702
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.770330548286438,
      "learning_rate": 0.0005670367262588636,
      "loss": 3.2975,
      "step": 34703
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.248591661453247,
      "learning_rate": 0.0005670348620769549,
      "loss": 3.0905,
      "step": 34704
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.28763484954834,
      "learning_rate": 0.0005670329978453992,
      "loss": 2.9461,
      "step": 34705
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.992126703262329,
      "learning_rate": 0.000567031133564197,
      "loss": 3.3351,
      "step": 34706
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2586472034454346,
      "learning_rate": 0.0005670292692333488,
      "loss": 3.1299,
      "step": 34707
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3567142486572266,
      "learning_rate": 0.0005670274048528547,
      "loss": 2.8426,
      "step": 34708
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6043505668640137,
      "learning_rate": 0.0005670255404227152,
      "loss": 3.2147,
      "step": 34709
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6989943981170654,
      "learning_rate": 0.0005670236759429306,
      "loss": 3.0568,
      "step": 34710
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2392909526824951,
      "learning_rate": 0.0005670218114135013,
      "loss": 3.1835,
      "step": 34711
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.3819847106933594,
      "learning_rate": 0.0005670199468344276,
      "loss": 3.0288,
      "step": 34712
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4843968152999878,
      "learning_rate": 0.0005670180822057098,
      "loss": 2.8799,
      "step": 34713
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.137080430984497,
      "learning_rate": 0.0005670162175273484,
      "loss": 3.2557,
      "step": 34714
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6973316669464111,
      "learning_rate": 0.0005670143527993436,
      "loss": 2.8925,
      "step": 34715
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3136454820632935,
      "learning_rate": 0.0005670124880216956,
      "loss": 3.1091,
      "step": 34716
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.810104489326477,
      "learning_rate": 0.0005670106231944052,
      "loss": 3.1306,
      "step": 34717
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.762390375137329,
      "learning_rate": 0.0005670087583174723,
      "loss": 3.0862,
      "step": 34718
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.376513957977295,
      "learning_rate": 0.0005670068933908975,
      "loss": 3.2323,
      "step": 34719
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.476311206817627,
      "learning_rate": 0.0005670050284146812,
      "loss": 3.0405,
      "step": 34720
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.573052406311035,
      "learning_rate": 0.0005670031633888236,
      "loss": 3.2799,
      "step": 34721
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3257362842559814,
      "learning_rate": 0.0005670012983133249,
      "loss": 3.1925,
      "step": 34722
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2380900382995605,
      "learning_rate": 0.0005669994331881857,
      "loss": 2.9143,
      "step": 34723
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.102820873260498,
      "learning_rate": 0.0005669975680134062,
      "loss": 3.177,
      "step": 34724
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.516311526298523,
      "learning_rate": 0.000566995702788987,
      "loss": 3.2348,
      "step": 34725
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.1897847652435303,
      "learning_rate": 0.0005669938375149282,
      "loss": 3.2273,
      "step": 34726
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.014497995376587,
      "learning_rate": 0.0005669919721912302,
      "loss": 3.0608,
      "step": 34727
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0291061401367188,
      "learning_rate": 0.0005669901068178933,
      "loss": 3.0464,
      "step": 34728
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8282197713851929,
      "learning_rate": 0.0005669882413949179,
      "loss": 2.9824,
      "step": 34729
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4328442811965942,
      "learning_rate": 0.0005669863759223044,
      "loss": 3.0529,
      "step": 34730
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7282934188842773,
      "learning_rate": 0.0005669845104000529,
      "loss": 3.3905,
      "step": 34731
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6000763177871704,
      "learning_rate": 0.0005669826448281642,
      "loss": 3.1334,
      "step": 34732
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4949980974197388,
      "learning_rate": 0.0005669807792066383,
      "loss": 3.2119,
      "step": 34733
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4574460983276367,
      "learning_rate": 0.0005669789135354756,
      "loss": 3.1632,
      "step": 34734
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6020824909210205,
      "learning_rate": 0.0005669770478146765,
      "loss": 3.2051,
      "step": 34735
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4709736108779907,
      "learning_rate": 0.0005669751820442413,
      "loss": 2.96,
      "step": 34736
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3706655502319336,
      "learning_rate": 0.0005669733162241704,
      "loss": 3.0749,
      "step": 34737
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4453343152999878,
      "learning_rate": 0.0005669714503544641,
      "loss": 3.0418,
      "step": 34738
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.579925775527954,
      "learning_rate": 0.0005669695844351228,
      "loss": 2.9274,
      "step": 34739
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6713299751281738,
      "learning_rate": 0.0005669677184661467,
      "loss": 2.9969,
      "step": 34740
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4542901515960693,
      "learning_rate": 0.0005669658524475364,
      "loss": 2.9952,
      "step": 34741
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.1381993293762207,
      "learning_rate": 0.000566963986379292,
      "loss": 3.1295,
      "step": 34742
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.348202705383301,
      "learning_rate": 0.0005669621202614141,
      "loss": 3.1112,
      "step": 34743
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2555413246154785,
      "learning_rate": 0.0005669602540939029,
      "loss": 3.0403,
      "step": 34744
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.1929619312286377,
      "learning_rate": 0.0005669583878767586,
      "loss": 3.0175,
      "step": 34745
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.808528423309326,
      "learning_rate": 0.0005669565216099817,
      "loss": 3.1007,
      "step": 34746
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2846200466156006,
      "learning_rate": 0.0005669546552935727,
      "loss": 3.202,
      "step": 34747
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.623668909072876,
      "learning_rate": 0.0005669527889275316,
      "loss": 3.0192,
      "step": 34748
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.4803216457366943,
      "learning_rate": 0.0005669509225118591,
      "loss": 2.9336,
      "step": 34749
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.9719998836517334,
      "learning_rate": 0.0005669490560465553,
      "loss": 3.1738,
      "step": 34750
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8393491506576538,
      "learning_rate": 0.0005669471895316207,
      "loss": 3.1149,
      "step": 34751
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5055612325668335,
      "learning_rate": 0.0005669453229670556,
      "loss": 3.2932,
      "step": 34752
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.414422035217285,
      "learning_rate": 0.0005669434563528602,
      "loss": 3.1142,
      "step": 34753
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.186429738998413,
      "learning_rate": 0.000566941589689035,
      "loss": 3.0307,
      "step": 34754
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4508092403411865,
      "learning_rate": 0.0005669397229755804,
      "loss": 3.0347,
      "step": 34755
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.589635968208313,
      "learning_rate": 0.0005669378562124966,
      "loss": 3.1383,
      "step": 34756
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.3438878059387207,
      "learning_rate": 0.000566935989399784,
      "loss": 3.3613,
      "step": 34757
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5110647678375244,
      "learning_rate": 0.000566934122537443,
      "loss": 3.2074,
      "step": 34758
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5161930322647095,
      "learning_rate": 0.0005669322556254739,
      "loss": 3.2043,
      "step": 34759
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4841097593307495,
      "learning_rate": 0.000566930388663877,
      "loss": 3.1519,
      "step": 34760
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8962376117706299,
      "learning_rate": 0.0005669285216526529,
      "loss": 3.4409,
      "step": 34761
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3064011335372925,
      "learning_rate": 0.0005669266545918015,
      "loss": 2.939,
      "step": 34762
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6767715215682983,
      "learning_rate": 0.0005669247874813235,
      "loss": 3.2202,
      "step": 34763
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.660752534866333,
      "learning_rate": 0.0005669229203212192,
      "loss": 3.0915,
      "step": 34764
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7095043659210205,
      "learning_rate": 0.0005669210531114888,
      "loss": 3.014,
      "step": 34765
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4750282764434814,
      "learning_rate": 0.0005669191858521328,
      "loss": 2.9679,
      "step": 34766
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.772179126739502,
      "learning_rate": 0.0005669173185431514,
      "loss": 3.1561,
      "step": 34767
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4805163145065308,
      "learning_rate": 0.0005669154511845451,
      "loss": 3.0839,
      "step": 34768
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4883111715316772,
      "learning_rate": 0.0005669135837763141,
      "loss": 3.102,
      "step": 34769
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.621663212776184,
      "learning_rate": 0.000566911716318459,
      "loss": 3.0422,
      "step": 34770
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4026676416397095,
      "learning_rate": 0.0005669098488109799,
      "loss": 3.1358,
      "step": 34771
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4322220087051392,
      "learning_rate": 0.0005669079812538772,
      "loss": 2.85,
      "step": 34772
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7063847780227661,
      "learning_rate": 0.0005669061136471513,
      "loss": 3.0789,
      "step": 34773
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4795818328857422,
      "learning_rate": 0.0005669042459908024,
      "loss": 3.1118,
      "step": 34774
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.3026411533355713,
      "learning_rate": 0.0005669023782848311,
      "loss": 3.2215,
      "step": 34775
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.390615701675415,
      "learning_rate": 0.0005669005105292375,
      "loss": 3.1136,
      "step": 34776
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9707040786743164,
      "learning_rate": 0.0005668986427240221,
      "loss": 2.9953,
      "step": 34777
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7679823637008667,
      "learning_rate": 0.0005668967748691852,
      "loss": 3.2535,
      "step": 34778
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2227489948272705,
      "learning_rate": 0.0005668949069647271,
      "loss": 3.0181,
      "step": 34779
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5728564262390137,
      "learning_rate": 0.0005668930390106483,
      "loss": 3.1293,
      "step": 34780
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.299973487854004,
      "learning_rate": 0.0005668911710069491,
      "loss": 3.0354,
      "step": 34781
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7219808101654053,
      "learning_rate": 0.0005668893029536296,
      "loss": 3.2625,
      "step": 34782
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2580416202545166,
      "learning_rate": 0.0005668874348506905,
      "loss": 2.9556,
      "step": 34783
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7199267148971558,
      "learning_rate": 0.0005668855666981319,
      "loss": 2.9943,
      "step": 34784
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.6798126697540283,
      "learning_rate": 0.0005668836984959543,
      "loss": 3.0276,
      "step": 34785
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3312139511108398,
      "learning_rate": 0.0005668818302441579,
      "loss": 2.8813,
      "step": 34786
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8007783889770508,
      "learning_rate": 0.0005668799619427432,
      "loss": 3.0152,
      "step": 34787
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3180160522460938,
      "learning_rate": 0.0005668780935917105,
      "loss": 3.2045,
      "step": 34788
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.4878194332122803,
      "learning_rate": 0.00056687622519106,
      "loss": 2.8489,
      "step": 34789
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8488026857376099,
      "learning_rate": 0.0005668743567407923,
      "loss": 3.1988,
      "step": 34790
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3321963548660278,
      "learning_rate": 0.0005668724882409075,
      "loss": 2.9543,
      "step": 34791
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6048940420150757,
      "learning_rate": 0.0005668706196914061,
      "loss": 2.9314,
      "step": 34792
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3090847730636597,
      "learning_rate": 0.0005668687510922885,
      "loss": 2.9241,
      "step": 34793
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6290653944015503,
      "learning_rate": 0.0005668668824435549,
      "loss": 3.0684,
      "step": 34794
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5524660348892212,
      "learning_rate": 0.0005668650137452058,
      "loss": 2.97,
      "step": 34795
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4196854829788208,
      "learning_rate": 0.0005668631449972413,
      "loss": 3.1753,
      "step": 34796
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.709750771522522,
      "learning_rate": 0.000566861276199662,
      "loss": 3.1743,
      "step": 34797
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.341168999671936,
      "learning_rate": 0.0005668594073524681,
      "loss": 3.1158,
      "step": 34798
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5125781297683716,
      "learning_rate": 0.0005668575384556601,
      "loss": 2.9421,
      "step": 34799
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6760209798812866,
      "learning_rate": 0.0005668556695092382,
      "loss": 3.0269,
      "step": 34800
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5960118770599365,
      "learning_rate": 0.0005668538005132028,
      "loss": 2.8869,
      "step": 34801
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8216702938079834,
      "learning_rate": 0.0005668519314675541,
      "loss": 3.0878,
      "step": 34802
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7321745157241821,
      "learning_rate": 0.0005668500623722927,
      "loss": 3.1143,
      "step": 34803
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.4081246852874756,
      "learning_rate": 0.0005668481932274189,
      "loss": 3.1257,
      "step": 34804
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.006075620651245,
      "learning_rate": 0.000566846324032933,
      "loss": 2.9088,
      "step": 34805
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.52292799949646,
      "learning_rate": 0.0005668444547888351,
      "loss": 2.9902,
      "step": 34806
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6616790294647217,
      "learning_rate": 0.000566842585495126,
      "loss": 3.1983,
      "step": 34807
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7037615776062012,
      "learning_rate": 0.0005668407161518057,
      "loss": 2.9108,
      "step": 34808
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8031556606292725,
      "learning_rate": 0.0005668388467588748,
      "loss": 2.9948,
      "step": 34809
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5459985733032227,
      "learning_rate": 0.0005668369773163335,
      "loss": 2.9641,
      "step": 34810
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8057944774627686,
      "learning_rate": 0.0005668351078241821,
      "loss": 3.2878,
      "step": 34811
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.579023838043213,
      "learning_rate": 0.0005668332382824209,
      "loss": 2.9701,
      "step": 34812
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4326694011688232,
      "learning_rate": 0.0005668313686910505,
      "loss": 3.2928,
      "step": 34813
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6372694969177246,
      "learning_rate": 0.0005668294990500712,
      "loss": 3.2468,
      "step": 34814
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6902647018432617,
      "learning_rate": 0.0005668276293594831,
      "loss": 3.0376,
      "step": 34815
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5275464057922363,
      "learning_rate": 0.0005668257596192868,
      "loss": 3.1544,
      "step": 34816
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.774603009223938,
      "learning_rate": 0.0005668238898294826,
      "loss": 2.9133,
      "step": 34817
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.1844472885131836,
      "learning_rate": 0.0005668220199900707,
      "loss": 3.125,
      "step": 34818
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4491835832595825,
      "learning_rate": 0.0005668201501010516,
      "loss": 2.8825,
      "step": 34819
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4384865760803223,
      "learning_rate": 0.0005668182801624256,
      "loss": 2.9305,
      "step": 34820
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9431178569793701,
      "learning_rate": 0.0005668164101741929,
      "loss": 3.1792,
      "step": 34821
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5861115455627441,
      "learning_rate": 0.0005668145401363543,
      "loss": 3.0364,
      "step": 34822
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.848253607749939,
      "learning_rate": 0.0005668126700489097,
      "loss": 3.11,
      "step": 34823
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7787957191467285,
      "learning_rate": 0.0005668107999118595,
      "loss": 3.3302,
      "step": 34824
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7443523406982422,
      "learning_rate": 0.0005668089297252042,
      "loss": 3.2475,
      "step": 34825
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5595812797546387,
      "learning_rate": 0.000566807059488944,
      "loss": 2.9962,
      "step": 34826
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0363528728485107,
      "learning_rate": 0.0005668051892030794,
      "loss": 3.1061,
      "step": 34827
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.348057508468628,
      "learning_rate": 0.0005668033188676108,
      "loss": 2.8081,
      "step": 34828
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6059956550598145,
      "learning_rate": 0.0005668014484825382,
      "loss": 3.016,
      "step": 34829
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7066718339920044,
      "learning_rate": 0.0005667995780478623,
      "loss": 3.0654,
      "step": 34830
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7191311120986938,
      "learning_rate": 0.0005667977075635833,
      "loss": 3.2086,
      "step": 34831
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4598767757415771,
      "learning_rate": 0.0005667958370297016,
      "loss": 2.9372,
      "step": 34832
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.3106374740600586,
      "learning_rate": 0.0005667939664462174,
      "loss": 3.041,
      "step": 34833
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7815759181976318,
      "learning_rate": 0.0005667920958131313,
      "loss": 3.1244,
      "step": 34834
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.4071106910705566,
      "learning_rate": 0.0005667902251304435,
      "loss": 3.2015,
      "step": 34835
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5312036275863647,
      "learning_rate": 0.0005667883543981543,
      "loss": 3.032,
      "step": 34836
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.096644401550293,
      "learning_rate": 0.0005667864836162641,
      "loss": 3.066,
      "step": 34837
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.5520448684692383,
      "learning_rate": 0.0005667846127847733,
      "loss": 2.9439,
      "step": 34838
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.285585641860962,
      "learning_rate": 0.0005667827419036822,
      "loss": 3.2581,
      "step": 34839
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9786796569824219,
      "learning_rate": 0.0005667808709729912,
      "loss": 3.044,
      "step": 34840
    },
    {
      "epoch": 0.45,
      "grad_norm": 4.729371070861816,
      "learning_rate": 0.0005667789999927006,
      "loss": 2.7536,
      "step": 34841
    },
    {
      "epoch": 0.45,
      "grad_norm": 6.566430568695068,
      "learning_rate": 0.0005667771289628107,
      "loss": 2.9485,
      "step": 34842
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.534334659576416,
      "learning_rate": 0.0005667752578833218,
      "loss": 3.1778,
      "step": 34843
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7857732772827148,
      "learning_rate": 0.0005667733867542344,
      "loss": 2.9202,
      "step": 34844
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.669918417930603,
      "learning_rate": 0.0005667715155755489,
      "loss": 3.1095,
      "step": 34845
    },
    {
      "epoch": 0.45,
      "grad_norm": 4.081542491912842,
      "learning_rate": 0.0005667696443472655,
      "loss": 3.1131,
      "step": 34846
    },
    {
      "epoch": 0.45,
      "grad_norm": 4.262220859527588,
      "learning_rate": 0.0005667677730693846,
      "loss": 2.8948,
      "step": 34847
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0658133029937744,
      "learning_rate": 0.0005667659017419065,
      "loss": 3.1907,
      "step": 34848
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.404288411140442,
      "learning_rate": 0.0005667640303648315,
      "loss": 2.7359,
      "step": 34849
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7149378061294556,
      "learning_rate": 0.0005667621589381601,
      "loss": 3.1123,
      "step": 34850
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.002936601638794,
      "learning_rate": 0.0005667602874618928,
      "loss": 3.2762,
      "step": 34851
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9475226402282715,
      "learning_rate": 0.0005667584159360293,
      "loss": 2.9663,
      "step": 34852
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2492618560791016,
      "learning_rate": 0.0005667565443605707,
      "loss": 2.8188,
      "step": 34853
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.6361172199249268,
      "learning_rate": 0.0005667546727355169,
      "loss": 2.9913,
      "step": 34854
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7952667474746704,
      "learning_rate": 0.0005667528010608684,
      "loss": 3.0698,
      "step": 34855
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.2916377782821655,
      "learning_rate": 0.0005667509293366254,
      "loss": 3.0481,
      "step": 34856
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4707404375076294,
      "learning_rate": 0.0005667490575627886,
      "loss": 3.0661,
      "step": 34857
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.5934431552886963,
      "learning_rate": 0.000566747185739358,
      "loss": 2.9774,
      "step": 34858
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.283693790435791,
      "learning_rate": 0.000566745313866334,
      "loss": 3.1905,
      "step": 34859
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.31354820728302,
      "learning_rate": 0.0005667434419437171,
      "loss": 3.1214,
      "step": 34860
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8124969005584717,
      "learning_rate": 0.0005667415699715074,
      "loss": 2.9282,
      "step": 34861
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7373369932174683,
      "learning_rate": 0.0005667396979497056,
      "loss": 3.1399,
      "step": 34862
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0987019538879395,
      "learning_rate": 0.0005667378258783117,
      "loss": 2.8776,
      "step": 34863
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6482007503509521,
      "learning_rate": 0.0005667359537573263,
      "loss": 2.8611,
      "step": 34864
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5118916034698486,
      "learning_rate": 0.0005667340815867498,
      "loss": 2.9768,
      "step": 34865
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.6069624423980713,
      "learning_rate": 0.0005667322093665821,
      "loss": 2.8054,
      "step": 34866
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.1522867679595947,
      "learning_rate": 0.000566730337096824,
      "loss": 3.1583,
      "step": 34867
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.959742546081543,
      "learning_rate": 0.0005667284647774757,
      "loss": 2.8507,
      "step": 34868
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9561222791671753,
      "learning_rate": 0.0005667265924085375,
      "loss": 3.0635,
      "step": 34869
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6031016111373901,
      "learning_rate": 0.0005667247199900098,
      "loss": 3.1404,
      "step": 34870
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.4818923473358154,
      "learning_rate": 0.0005667228475218929,
      "loss": 3.2215,
      "step": 34871
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9207892417907715,
      "learning_rate": 0.0005667209750041872,
      "loss": 3.2506,
      "step": 34872
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6602908372879028,
      "learning_rate": 0.0005667191024368932,
      "loss": 2.9721,
      "step": 34873
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.7886950969696045,
      "learning_rate": 0.000566717229820011,
      "loss": 2.8391,
      "step": 34874
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.752121686935425,
      "learning_rate": 0.000566715357153541,
      "loss": 3.0015,
      "step": 34875
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.095423460006714,
      "learning_rate": 0.0005667134844374834,
      "loss": 3.1207,
      "step": 34876
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.510140299797058,
      "learning_rate": 0.000566711611671839,
      "loss": 3.1725,
      "step": 34877
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.7377660274505615,
      "learning_rate": 0.0005667097388566078,
      "loss": 2.9167,
      "step": 34878
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9845073223114014,
      "learning_rate": 0.0005667078659917902,
      "loss": 3.0361,
      "step": 34879
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.9662344455718994,
      "learning_rate": 0.0005667059930773866,
      "loss": 3.2101,
      "step": 34880
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.558004379272461,
      "learning_rate": 0.0005667041201133973,
      "loss": 2.7607,
      "step": 34881
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.3540022373199463,
      "learning_rate": 0.0005667022470998227,
      "loss": 3.0978,
      "step": 34882
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.5268306732177734,
      "learning_rate": 0.000566700374036663,
      "loss": 2.8468,
      "step": 34883
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.321275472640991,
      "learning_rate": 0.0005666985009239188,
      "loss": 2.9984,
      "step": 34884
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.031947135925293,
      "learning_rate": 0.0005666966277615902,
      "loss": 3.2538,
      "step": 34885
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.76039457321167,
      "learning_rate": 0.0005666947545496778,
      "loss": 3.1108,
      "step": 34886
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.4741368293762207,
      "learning_rate": 0.0005666928812881817,
      "loss": 2.8827,
      "step": 34887
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4109981060028076,
      "learning_rate": 0.0005666910079771024,
      "loss": 3.2153,
      "step": 34888
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.826271653175354,
      "learning_rate": 0.0005666891346164402,
      "loss": 2.9701,
      "step": 34889
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.405509114265442,
      "learning_rate": 0.0005666872612061955,
      "loss": 3.1672,
      "step": 34890
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7880452871322632,
      "learning_rate": 0.0005666853877463686,
      "loss": 3.1909,
      "step": 34891
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5494824647903442,
      "learning_rate": 0.0005666835142369598,
      "loss": 3.2573,
      "step": 34892
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.512101650238037,
      "learning_rate": 0.0005666816406779695,
      "loss": 3.1587,
      "step": 34893
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.004725694656372,
      "learning_rate": 0.000566679767069398,
      "loss": 3.0735,
      "step": 34894
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.714046835899353,
      "learning_rate": 0.0005666778934112458,
      "loss": 3.0949,
      "step": 34895
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5446510314941406,
      "learning_rate": 0.0005666760197035131,
      "loss": 3.0533,
      "step": 34896
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7954941987991333,
      "learning_rate": 0.0005666741459462003,
      "loss": 3.0169,
      "step": 34897
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4051659107208252,
      "learning_rate": 0.0005666722721393078,
      "loss": 3.0016,
      "step": 34898
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.579950213432312,
      "learning_rate": 0.0005666703982828359,
      "loss": 2.8522,
      "step": 34899
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.0433030128479004,
      "learning_rate": 0.0005666685243767848,
      "loss": 3.0177,
      "step": 34900
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7468031644821167,
      "learning_rate": 0.000566666650421155,
      "loss": 3.1241,
      "step": 34901
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5882911682128906,
      "learning_rate": 0.0005666647764159469,
      "loss": 2.8579,
      "step": 34902
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9164278507232666,
      "learning_rate": 0.000566662902361161,
      "loss": 3.2838,
      "step": 34903
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6712758541107178,
      "learning_rate": 0.0005666610282567971,
      "loss": 3.1595,
      "step": 34904
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6840136051177979,
      "learning_rate": 0.000566659154102856,
      "loss": 2.989,
      "step": 34905
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.9763550758361816,
      "learning_rate": 0.0005666572798993379,
      "loss": 3.0335,
      "step": 34906
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.563011884689331,
      "learning_rate": 0.0005666554056462433,
      "loss": 3.1713,
      "step": 34907
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3823692798614502,
      "learning_rate": 0.0005666535313435723,
      "loss": 3.0513,
      "step": 34908
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4199525117874146,
      "learning_rate": 0.0005666516569913253,
      "loss": 3.1304,
      "step": 34909
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6013734340667725,
      "learning_rate": 0.0005666497825895029,
      "loss": 2.9663,
      "step": 34910
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.653723120689392,
      "learning_rate": 0.0005666479081381052,
      "loss": 3.0364,
      "step": 34911
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.580960750579834,
      "learning_rate": 0.0005666460336371327,
      "loss": 3.1342,
      "step": 34912
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6558806896209717,
      "learning_rate": 0.0005666441590865855,
      "loss": 3.008,
      "step": 34913
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5091147422790527,
      "learning_rate": 0.0005666422844864642,
      "loss": 2.9404,
      "step": 34914
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5563498735427856,
      "learning_rate": 0.0005666404098367691,
      "loss": 3.0169,
      "step": 34915
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4023793935775757,
      "learning_rate": 0.0005666385351375005,
      "loss": 3.1542,
      "step": 34916
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8643085956573486,
      "learning_rate": 0.0005666366603886587,
      "loss": 2.9174,
      "step": 34917
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.8258368968963623,
      "learning_rate": 0.0005666347855902442,
      "loss": 2.9625,
      "step": 34918
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.3156682252883911,
      "learning_rate": 0.0005666329107422573,
      "loss": 3.3429,
      "step": 34919
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7341065406799316,
      "learning_rate": 0.0005666310358446981,
      "loss": 3.1953,
      "step": 34920
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.625281572341919,
      "learning_rate": 0.0005666291608975673,
      "loss": 3.0904,
      "step": 34921
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6029638051986694,
      "learning_rate": 0.0005666272859008652,
      "loss": 2.9989,
      "step": 34922
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.364720344543457,
      "learning_rate": 0.0005666254108545918,
      "loss": 3.127,
      "step": 34923
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.192523956298828,
      "learning_rate": 0.000566623535758748,
      "loss": 3.0684,
      "step": 34924
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.480271577835083,
      "learning_rate": 0.0005666216606133337,
      "loss": 2.8605,
      "step": 34925
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.640437364578247,
      "learning_rate": 0.0005666197854183495,
      "loss": 3.329,
      "step": 34926
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.09318208694458,
      "learning_rate": 0.0005666179101737954,
      "loss": 3.0502,
      "step": 34927
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.694099187850952,
      "learning_rate": 0.0005666160348796722,
      "loss": 2.8154,
      "step": 34928
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.056385040283203,
      "learning_rate": 0.00056661415953598,
      "loss": 2.9979,
      "step": 34929
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.1869089603424072,
      "learning_rate": 0.0005666122841427192,
      "loss": 3.0928,
      "step": 34930
    },
    {
      "epoch": 0.45,
      "grad_norm": 3.572154998779297,
      "learning_rate": 0.0005666104086998902,
      "loss": 3.193,
      "step": 34931
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6501765251159668,
      "learning_rate": 0.0005666085332074932,
      "loss": 2.9561,
      "step": 34932
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4974175691604614,
      "learning_rate": 0.0005666066576655287,
      "loss": 3.0503,
      "step": 34933
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5211570262908936,
      "learning_rate": 0.000566604782073997,
      "loss": 3.1851,
      "step": 34934
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7340466976165771,
      "learning_rate": 0.0005666029064328983,
      "loss": 3.0889,
      "step": 34935
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.5598726272583008,
      "learning_rate": 0.0005666010307422332,
      "loss": 3.0423,
      "step": 34936
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.7166179418563843,
      "learning_rate": 0.0005665991550020019,
      "loss": 3.0257,
      "step": 34937
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.2603390216827393,
      "learning_rate": 0.0005665972792122049,
      "loss": 2.9785,
      "step": 34938
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.4633651971817017,
      "learning_rate": 0.0005665954033728423,
      "loss": 2.9872,
      "step": 34939
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.495171308517456,
      "learning_rate": 0.0005665935274839146,
      "loss": 2.9316,
      "step": 34940
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6258214712142944,
      "learning_rate": 0.0005665916515454222,
      "loss": 2.8791,
      "step": 34941
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.6312931776046753,
      "learning_rate": 0.0005665897755573653,
      "loss": 3.0579,
      "step": 34942
    },
    {
      "epoch": 0.45,
      "grad_norm": 1.557132601737976,
      "learning_rate": 0.0005665878995197444,
      "loss": 3.0744,
      "step": 34943
    },
    {
      "epoch": 0.45,
      "grad_norm": 2.0760245323181152,
      "learning_rate": 0.0005665860234325597,
      "loss": 3.3715,
      "step": 34944
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7703436613082886,
      "learning_rate": 0.0005665841472958117,
      "loss": 2.9229,
      "step": 34945
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.44886314868927,
      "learning_rate": 0.0005665822711095007,
      "loss": 3.0754,
      "step": 34946
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4768303632736206,
      "learning_rate": 0.0005665803948736269,
      "loss": 3.0056,
      "step": 34947
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6051479578018188,
      "learning_rate": 0.0005665785185881908,
      "loss": 2.9197,
      "step": 34948
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5083035230636597,
      "learning_rate": 0.0005665766422531927,
      "loss": 2.9703,
      "step": 34949
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6911252737045288,
      "learning_rate": 0.0005665747658686332,
      "loss": 3.3012,
      "step": 34950
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5058526992797852,
      "learning_rate": 0.0005665728894345121,
      "loss": 2.7861,
      "step": 34951
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4173827171325684,
      "learning_rate": 0.0005665710129508303,
      "loss": 3.0746,
      "step": 34952
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.685736894607544,
      "learning_rate": 0.0005665691364175879,
      "loss": 3.2602,
      "step": 34953
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.404591679573059,
      "learning_rate": 0.0005665672598347851,
      "loss": 3.0994,
      "step": 34954
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.39579701423645,
      "learning_rate": 0.0005665653832024225,
      "loss": 3.2293,
      "step": 34955
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.648886203765869,
      "learning_rate": 0.0005665635065205004,
      "loss": 3.2753,
      "step": 34956
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5012664794921875,
      "learning_rate": 0.0005665616297890192,
      "loss": 3.2044,
      "step": 34957
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9245357513427734,
      "learning_rate": 0.0005665597530079789,
      "loss": 3.2148,
      "step": 34958
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.759149193763733,
      "learning_rate": 0.0005665578761773803,
      "loss": 3.0032,
      "step": 34959
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8456909656524658,
      "learning_rate": 0.0005665559992972236,
      "loss": 3.1095,
      "step": 34960
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8258391618728638,
      "learning_rate": 0.000566554122367509,
      "loss": 2.912,
      "step": 34961
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3536096811294556,
      "learning_rate": 0.000566552245388237,
      "loss": 3.0347,
      "step": 34962
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0511815547943115,
      "learning_rate": 0.0005665503683594078,
      "loss": 2.7338,
      "step": 34963
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5978260040283203,
      "learning_rate": 0.000566548491281022,
      "loss": 3.3819,
      "step": 34964
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2899043560028076,
      "learning_rate": 0.0005665466141530798,
      "loss": 3.0561,
      "step": 34965
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4754632711410522,
      "learning_rate": 0.0005665447369755815,
      "loss": 3.0806,
      "step": 34966
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5189088582992554,
      "learning_rate": 0.0005665428597485275,
      "loss": 2.9387,
      "step": 34967
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3159631490707397,
      "learning_rate": 0.0005665409824719181,
      "loss": 3.1296,
      "step": 34968
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.419641137123108,
      "learning_rate": 0.0005665391051457538,
      "loss": 3.3467,
      "step": 34969
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3033262491226196,
      "learning_rate": 0.0005665372277700348,
      "loss": 3.1254,
      "step": 34970
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7129114866256714,
      "learning_rate": 0.0005665353503447615,
      "loss": 3.0879,
      "step": 34971
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6985256671905518,
      "learning_rate": 0.0005665334728699343,
      "loss": 3.0782,
      "step": 34972
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.543506622314453,
      "learning_rate": 0.0005665315953455533,
      "loss": 2.8081,
      "step": 34973
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.712112307548523,
      "learning_rate": 0.0005665297177716193,
      "loss": 3.1041,
      "step": 34974
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.486720085144043,
      "learning_rate": 0.0005665278401481323,
      "loss": 3.1546,
      "step": 34975
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9414585828781128,
      "learning_rate": 0.0005665259624750928,
      "loss": 3.1275,
      "step": 34976
    },
    {
      "epoch": 0.46,
      "grad_norm": 4.05318546295166,
      "learning_rate": 0.0005665240847525009,
      "loss": 3.1006,
      "step": 34977
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.1313886642456055,
      "learning_rate": 0.0005665222069803573,
      "loss": 3.1693,
      "step": 34978
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4261387586593628,
      "learning_rate": 0.0005665203291586622,
      "loss": 3.3176,
      "step": 34979
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9187545776367188,
      "learning_rate": 0.0005665184512874159,
      "loss": 3.0899,
      "step": 34980
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5333056449890137,
      "learning_rate": 0.0005665165733666187,
      "loss": 2.9766,
      "step": 34981
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.8892107009887695,
      "learning_rate": 0.0005665146953962711,
      "loss": 2.9778,
      "step": 34982
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.574141263961792,
      "learning_rate": 0.0005665128173763735,
      "loss": 3.3032,
      "step": 34983
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.299119710922241,
      "learning_rate": 0.000566510939306926,
      "loss": 3.0271,
      "step": 34984
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.870676279067993,
      "learning_rate": 0.0005665090611879292,
      "loss": 3.3964,
      "step": 34985
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.9378719329833984,
      "learning_rate": 0.0005665071830193832,
      "loss": 3.1057,
      "step": 34986
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2977083921432495,
      "learning_rate": 0.0005665053048012885,
      "loss": 3.2293,
      "step": 34987
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7491695880889893,
      "learning_rate": 0.0005665034265336455,
      "loss": 2.9349,
      "step": 34988
    },
    {
      "epoch": 0.46,
      "grad_norm": 5.134705543518066,
      "learning_rate": 0.0005665015482164545,
      "loss": 3.1113,
      "step": 34989
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.1348283290863037,
      "learning_rate": 0.0005664996698497159,
      "loss": 3.2595,
      "step": 34990
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6125853061676025,
      "learning_rate": 0.0005664977914334299,
      "loss": 3.2143,
      "step": 34991
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.5359702110290527,
      "learning_rate": 0.0005664959129675968,
      "loss": 3.0626,
      "step": 34992
    },
    {
      "epoch": 0.46,
      "grad_norm": 5.272133827209473,
      "learning_rate": 0.0005664940344522172,
      "loss": 2.9087,
      "step": 34993
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.219654083251953,
      "learning_rate": 0.0005664921558872913,
      "loss": 3.0659,
      "step": 34994
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5932742357254028,
      "learning_rate": 0.0005664902772728195,
      "loss": 3.1494,
      "step": 34995
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.566471815109253,
      "learning_rate": 0.0005664883986088021,
      "loss": 3.0548,
      "step": 34996
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.7922964096069336,
      "learning_rate": 0.0005664865198952395,
      "loss": 3.1664,
      "step": 34997
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.6045358180999756,
      "learning_rate": 0.0005664846411321321,
      "loss": 3.0394,
      "step": 34998
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.3936333656311035,
      "learning_rate": 0.00056648276231948,
      "loss": 3.2176,
      "step": 34999
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.130051851272583,
      "learning_rate": 0.000566480883457284,
      "loss": 2.8165,
      "step": 35000
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.1466917991638184,
      "learning_rate": 0.0005664790045455439,
      "loss": 3.1128,
      "step": 35001
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0243988037109375,
      "learning_rate": 0.0005664771255842603,
      "loss": 2.9832,
      "step": 35002
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.934558391571045,
      "learning_rate": 0.0005664752465734339,
      "loss": 2.9317,
      "step": 35003
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.3165395259857178,
      "learning_rate": 0.0005664733675130644,
      "loss": 2.7692,
      "step": 35004
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4255390167236328,
      "learning_rate": 0.0005664714884031526,
      "loss": 2.9354,
      "step": 35005
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.8993194103240967,
      "learning_rate": 0.0005664696092436987,
      "loss": 3.0787,
      "step": 35006
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.4909262657165527,
      "learning_rate": 0.000566467730034703,
      "loss": 3.2191,
      "step": 35007
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0325701236724854,
      "learning_rate": 0.000566465850776166,
      "loss": 2.9032,
      "step": 35008
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6591770648956299,
      "learning_rate": 0.0005664639714680881,
      "loss": 3.0848,
      "step": 35009
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8706711530685425,
      "learning_rate": 0.0005664620921104693,
      "loss": 2.9498,
      "step": 35010
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.005863666534424,
      "learning_rate": 0.0005664602127033102,
      "loss": 3.1774,
      "step": 35011
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2219207286834717,
      "learning_rate": 0.0005664583332466112,
      "loss": 3.0515,
      "step": 35012
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6603233814239502,
      "learning_rate": 0.0005664564537403725,
      "loss": 2.8807,
      "step": 35013
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6728878021240234,
      "learning_rate": 0.0005664545741845945,
      "loss": 3.0383,
      "step": 35014
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4641869068145752,
      "learning_rate": 0.0005664526945792777,
      "loss": 2.9837,
      "step": 35015
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4766284227371216,
      "learning_rate": 0.0005664508149244222,
      "loss": 3.0413,
      "step": 35016
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5573852062225342,
      "learning_rate": 0.0005664489352200285,
      "loss": 3.2615,
      "step": 35017
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4827452898025513,
      "learning_rate": 0.0005664470554660969,
      "loss": 3.1686,
      "step": 35018
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.4878780841827393,
      "learning_rate": 0.0005664451756626277,
      "loss": 3.1804,
      "step": 35019
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6793514490127563,
      "learning_rate": 0.0005664432958096214,
      "loss": 2.9798,
      "step": 35020
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4779645204544067,
      "learning_rate": 0.0005664414159070783,
      "loss": 3.347,
      "step": 35021
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0965821743011475,
      "learning_rate": 0.0005664395359549986,
      "loss": 3.1569,
      "step": 35022
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.27392578125,
      "learning_rate": 0.0005664376559533828,
      "loss": 3.218,
      "step": 35023
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.668609380722046,
      "learning_rate": 0.0005664357759022313,
      "loss": 3.1946,
      "step": 35024
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3485321998596191,
      "learning_rate": 0.0005664338958015442,
      "loss": 3.2014,
      "step": 35025
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5308963060379028,
      "learning_rate": 0.0005664320156513221,
      "loss": 3.0687,
      "step": 35026
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8268929719924927,
      "learning_rate": 0.0005664301354515653,
      "loss": 3.0137,
      "step": 35027
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.064631462097168,
      "learning_rate": 0.000566428255202274,
      "loss": 2.8476,
      "step": 35028
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7708122730255127,
      "learning_rate": 0.0005664263749034488,
      "loss": 3.1647,
      "step": 35029
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4239550828933716,
      "learning_rate": 0.0005664244945550897,
      "loss": 3.1031,
      "step": 35030
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5135046243667603,
      "learning_rate": 0.0005664226141571974,
      "loss": 2.7716,
      "step": 35031
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.387772560119629,
      "learning_rate": 0.0005664207337097721,
      "loss": 2.9246,
      "step": 35032
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7373148202896118,
      "learning_rate": 0.0005664188532128142,
      "loss": 2.8342,
      "step": 35033
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5316327810287476,
      "learning_rate": 0.0005664169726663239,
      "loss": 2.8335,
      "step": 35034
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6412910223007202,
      "learning_rate": 0.0005664150920703017,
      "loss": 2.9008,
      "step": 35035
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.342707633972168,
      "learning_rate": 0.0005664132114247479,
      "loss": 3.0265,
      "step": 35036
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7296286821365356,
      "learning_rate": 0.0005664113307296628,
      "loss": 3.2534,
      "step": 35037
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5020568370819092,
      "learning_rate": 0.000566409449985047,
      "loss": 2.8497,
      "step": 35038
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.378637671470642,
      "learning_rate": 0.0005664075691909005,
      "loss": 2.9449,
      "step": 35039
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5289748907089233,
      "learning_rate": 0.0005664056883472238,
      "loss": 3.1524,
      "step": 35040
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.639578104019165,
      "learning_rate": 0.0005664038074540173,
      "loss": 2.9902,
      "step": 35041
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.608978271484375,
      "learning_rate": 0.0005664019265112813,
      "loss": 3.0559,
      "step": 35042
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7677487134933472,
      "learning_rate": 0.0005664000455190162,
      "loss": 2.796,
      "step": 35043
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3877698183059692,
      "learning_rate": 0.0005663981644772222,
      "loss": 2.8918,
      "step": 35044
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5737251043319702,
      "learning_rate": 0.0005663962833858998,
      "loss": 3.1019,
      "step": 35045
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7107462882995605,
      "learning_rate": 0.0005663944022450492,
      "loss": 3.1711,
      "step": 35046
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4114633798599243,
      "learning_rate": 0.000566392521054671,
      "loss": 3.2239,
      "step": 35047
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.874718189239502,
      "learning_rate": 0.0005663906398147654,
      "loss": 3.337,
      "step": 35048
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6121622323989868,
      "learning_rate": 0.0005663887585253326,
      "loss": 3.0892,
      "step": 35049
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6799776554107666,
      "learning_rate": 0.0005663868771863732,
      "loss": 3.179,
      "step": 35050
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5455939769744873,
      "learning_rate": 0.0005663849957978875,
      "loss": 3.0516,
      "step": 35051
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6445387601852417,
      "learning_rate": 0.0005663831143598758,
      "loss": 3.0747,
      "step": 35052
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4689340591430664,
      "learning_rate": 0.0005663812328723384,
      "loss": 2.981,
      "step": 35053
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6891279220581055,
      "learning_rate": 0.0005663793513352758,
      "loss": 2.999,
      "step": 35054
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.609366536140442,
      "learning_rate": 0.000566377469748688,
      "loss": 3.2418,
      "step": 35055
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9531501531600952,
      "learning_rate": 0.0005663755881125758,
      "loss": 3.2753,
      "step": 35056
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4466444253921509,
      "learning_rate": 0.0005663737064269393,
      "loss": 2.9976,
      "step": 35057
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8044146299362183,
      "learning_rate": 0.0005663718246917789,
      "loss": 3.1861,
      "step": 35058
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7283470630645752,
      "learning_rate": 0.0005663699429070949,
      "loss": 3.0209,
      "step": 35059
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6232258081436157,
      "learning_rate": 0.0005663680610728878,
      "loss": 3.159,
      "step": 35060
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5825873613357544,
      "learning_rate": 0.0005663661791891579,
      "loss": 3.2639,
      "step": 35061
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.088127613067627,
      "learning_rate": 0.0005663642972559053,
      "loss": 2.9288,
      "step": 35062
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5173511505126953,
      "learning_rate": 0.0005663624152731307,
      "loss": 2.9169,
      "step": 35063
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.860085368156433,
      "learning_rate": 0.0005663605332408342,
      "loss": 2.9981,
      "step": 35064
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4086494445800781,
      "learning_rate": 0.0005663586511590163,
      "loss": 3.0174,
      "step": 35065
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5174930095672607,
      "learning_rate": 0.0005663567690276773,
      "loss": 2.9106,
      "step": 35066
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4540053606033325,
      "learning_rate": 0.0005663548868468176,
      "loss": 2.763,
      "step": 35067
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4199800491333008,
      "learning_rate": 0.0005663530046164375,
      "loss": 3.038,
      "step": 35068
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5231597423553467,
      "learning_rate": 0.0005663511223365373,
      "loss": 3.3685,
      "step": 35069
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5379737615585327,
      "learning_rate": 0.0005663492400071173,
      "loss": 3.0653,
      "step": 35070
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6029703617095947,
      "learning_rate": 0.0005663473576281781,
      "loss": 3.0405,
      "step": 35071
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7455343008041382,
      "learning_rate": 0.0005663454751997199,
      "loss": 3.0982,
      "step": 35072
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4954789876937866,
      "learning_rate": 0.000566343592721743,
      "loss": 2.9243,
      "step": 35073
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.491143822669983,
      "learning_rate": 0.0005663417101942477,
      "loss": 3.1456,
      "step": 35074
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5717278718948364,
      "learning_rate": 0.0005663398276172345,
      "loss": 3.106,
      "step": 35075
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5060091018676758,
      "learning_rate": 0.0005663379449907038,
      "loss": 3.2578,
      "step": 35076
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3815029859542847,
      "learning_rate": 0.0005663360623146558,
      "loss": 2.752,
      "step": 35077
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3501088619232178,
      "learning_rate": 0.0005663341795890908,
      "loss": 3.1688,
      "step": 35078
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5153872966766357,
      "learning_rate": 0.0005663322968140094,
      "loss": 2.8993,
      "step": 35079
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6550949811935425,
      "learning_rate": 0.0005663304139894116,
      "loss": 2.8389,
      "step": 35080
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5025333166122437,
      "learning_rate": 0.0005663285311152982,
      "loss": 3.2573,
      "step": 35081
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.72422456741333,
      "learning_rate": 0.0005663266481916692,
      "loss": 3.0864,
      "step": 35082
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4615809917449951,
      "learning_rate": 0.000566324765218525,
      "loss": 3.0297,
      "step": 35083
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9806708097457886,
      "learning_rate": 0.000566322882195866,
      "loss": 3.0587,
      "step": 35084
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5289102792739868,
      "learning_rate": 0.0005663209991236926,
      "loss": 3.1986,
      "step": 35085
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6032915115356445,
      "learning_rate": 0.000566319116002005,
      "loss": 2.9809,
      "step": 35086
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.411339521408081,
      "learning_rate": 0.0005663172328308038,
      "loss": 2.9185,
      "step": 35087
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4447120428085327,
      "learning_rate": 0.0005663153496100891,
      "loss": 3.1891,
      "step": 35088
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5702069997787476,
      "learning_rate": 0.0005663134663398614,
      "loss": 2.7832,
      "step": 35089
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3241331577301025,
      "learning_rate": 0.000566311583020121,
      "loss": 3.1735,
      "step": 35090
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2709910869598389,
      "learning_rate": 0.0005663096996508681,
      "loss": 3.0175,
      "step": 35091
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.658787250518799,
      "learning_rate": 0.0005663078162321033,
      "loss": 3.3807,
      "step": 35092
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.739579439163208,
      "learning_rate": 0.0005663059327638269,
      "loss": 3.0936,
      "step": 35093
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.158898115158081,
      "learning_rate": 0.0005663040492460392,
      "loss": 3.0122,
      "step": 35094
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.5506014823913574,
      "learning_rate": 0.0005663021656787406,
      "loss": 3.0896,
      "step": 35095
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.497836947441101,
      "learning_rate": 0.0005663002820619313,
      "loss": 3.2421,
      "step": 35096
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2741968631744385,
      "learning_rate": 0.0005662983983956118,
      "loss": 2.9318,
      "step": 35097
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.145634651184082,
      "learning_rate": 0.0005662965146797823,
      "loss": 3.0873,
      "step": 35098
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7432377338409424,
      "learning_rate": 0.0005662946309144434,
      "loss": 3.1307,
      "step": 35099
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2282402515411377,
      "learning_rate": 0.0005662927470995951,
      "loss": 3.0393,
      "step": 35100
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2482333183288574,
      "learning_rate": 0.0005662908632352382,
      "loss": 2.8449,
      "step": 35101
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9643417596817017,
      "learning_rate": 0.0005662889793213727,
      "loss": 2.8969,
      "step": 35102
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5850815773010254,
      "learning_rate": 0.0005662870953579989,
      "loss": 3.0536,
      "step": 35103
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7220830917358398,
      "learning_rate": 0.0005662852113451175,
      "loss": 3.0827,
      "step": 35104
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4400864839553833,
      "learning_rate": 0.0005662833272827286,
      "loss": 3.0568,
      "step": 35105
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.532455325126648,
      "learning_rate": 0.0005662814431708326,
      "loss": 3.2125,
      "step": 35106
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.6075544357299805,
      "learning_rate": 0.0005662795590094298,
      "loss": 2.9397,
      "step": 35107
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9762088060379028,
      "learning_rate": 0.0005662776747985207,
      "loss": 3.1822,
      "step": 35108
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5342108011245728,
      "learning_rate": 0.0005662757905381054,
      "loss": 3.2846,
      "step": 35109
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8546154499053955,
      "learning_rate": 0.0005662739062281846,
      "loss": 3.2757,
      "step": 35110
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.695817232131958,
      "learning_rate": 0.0005662720218687583,
      "loss": 2.9232,
      "step": 35111
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3726073503494263,
      "learning_rate": 0.0005662701374598272,
      "loss": 3.1009,
      "step": 35112
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5073026418685913,
      "learning_rate": 0.0005662682530013912,
      "loss": 2.9326,
      "step": 35113
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.7107107639312744,
      "learning_rate": 0.0005662663684934511,
      "loss": 2.9947,
      "step": 35114
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.1756222248077393,
      "learning_rate": 0.000566264483936007,
      "loss": 3.1359,
      "step": 35115
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5537325143814087,
      "learning_rate": 0.0005662625993290592,
      "loss": 2.9005,
      "step": 35116
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4208003282546997,
      "learning_rate": 0.0005662607146726083,
      "loss": 3.1498,
      "step": 35117
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.435142993927002,
      "learning_rate": 0.0005662588299666544,
      "loss": 3.0326,
      "step": 35118
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.345687747001648,
      "learning_rate": 0.0005662569452111981,
      "loss": 3.0929,
      "step": 35119
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4948762655258179,
      "learning_rate": 0.0005662550604062394,
      "loss": 2.9125,
      "step": 35120
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2759901285171509,
      "learning_rate": 0.000566253175551779,
      "loss": 3.0126,
      "step": 35121
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.720245599746704,
      "learning_rate": 0.000566251290647817,
      "loss": 2.9527,
      "step": 35122
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.70961594581604,
      "learning_rate": 0.000566249405694354,
      "loss": 3.1324,
      "step": 35123
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5639671087265015,
      "learning_rate": 0.0005662475206913902,
      "loss": 3.1353,
      "step": 35124
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.1557273864746094,
      "learning_rate": 0.0005662456356389259,
      "loss": 3.0885,
      "step": 35125
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.469205617904663,
      "learning_rate": 0.0005662437505369615,
      "loss": 3.2086,
      "step": 35126
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5341194868087769,
      "learning_rate": 0.0005662418653854974,
      "loss": 3.152,
      "step": 35127
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.717616319656372,
      "learning_rate": 0.0005662399801845339,
      "loss": 3.147,
      "step": 35128
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.742370843887329,
      "learning_rate": 0.0005662380949340713,
      "loss": 3.0386,
      "step": 35129
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.1315131187438965,
      "learning_rate": 0.0005662362096341101,
      "loss": 3.0225,
      "step": 35130
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.957269310951233,
      "learning_rate": 0.0005662343242846506,
      "loss": 2.9992,
      "step": 35131
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7243478298187256,
      "learning_rate": 0.000566232438885693,
      "loss": 2.9665,
      "step": 35132
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7642037868499756,
      "learning_rate": 0.0005662305534372378,
      "loss": 2.9693,
      "step": 35133
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3405022621154785,
      "learning_rate": 0.0005662286679392854,
      "loss": 3.2917,
      "step": 35134
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.3462002277374268,
      "learning_rate": 0.000566226782391836,
      "loss": 2.9103,
      "step": 35135
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.079077959060669,
      "learning_rate": 0.0005662248967948899,
      "loss": 3.004,
      "step": 35136
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2814836502075195,
      "learning_rate": 0.0005662230111484478,
      "loss": 2.9639,
      "step": 35137
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6030791997909546,
      "learning_rate": 0.0005662211254525096,
      "loss": 3.28,
      "step": 35138
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7026621103286743,
      "learning_rate": 0.000566219239707076,
      "loss": 2.9983,
      "step": 35139
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.3003299236297607,
      "learning_rate": 0.0005662173539121471,
      "loss": 2.9241,
      "step": 35140
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.103912353515625,
      "learning_rate": 0.0005662154680677234,
      "loss": 3.0258,
      "step": 35141
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7549084424972534,
      "learning_rate": 0.0005662135821738053,
      "loss": 2.9776,
      "step": 35142
    },
    {
      "epoch": 0.46,
      "grad_norm": 5.366305351257324,
      "learning_rate": 0.0005662116962303931,
      "loss": 2.7565,
      "step": 35143
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.138648748397827,
      "learning_rate": 0.0005662098102374869,
      "loss": 3.4058,
      "step": 35144
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5271365642547607,
      "learning_rate": 0.0005662079241950875,
      "loss": 2.9018,
      "step": 35145
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4687227010726929,
      "learning_rate": 0.000566206038103195,
      "loss": 3.029,
      "step": 35146
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.329653739929199,
      "learning_rate": 0.0005662041519618097,
      "loss": 3.0494,
      "step": 35147
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8286187648773193,
      "learning_rate": 0.000566202265770932,
      "loss": 2.727,
      "step": 35148
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5731970071792603,
      "learning_rate": 0.0005662003795305624,
      "loss": 2.9409,
      "step": 35149
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5827001333236694,
      "learning_rate": 0.0005661984932407011,
      "loss": 3.0099,
      "step": 35150
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.469717264175415,
      "learning_rate": 0.0005661966069013483,
      "loss": 3.119,
      "step": 35151
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.434731364250183,
      "learning_rate": 0.0005661947205125046,
      "loss": 3.0787,
      "step": 35152
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4369651079177856,
      "learning_rate": 0.0005661928340741704,
      "loss": 2.94,
      "step": 35153
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5378837585449219,
      "learning_rate": 0.0005661909475863458,
      "loss": 3.0895,
      "step": 35154
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.5094621181488037,
      "learning_rate": 0.0005661890610490313,
      "loss": 3.2552,
      "step": 35155
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.170319080352783,
      "learning_rate": 0.0005661871744622273,
      "loss": 3.1805,
      "step": 35156
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6383402347564697,
      "learning_rate": 0.000566185287825934,
      "loss": 3.165,
      "step": 35157
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.8154430389404297,
      "learning_rate": 0.0005661834011401519,
      "loss": 3.2817,
      "step": 35158
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.1716368198394775,
      "learning_rate": 0.0005661815144048812,
      "loss": 2.9764,
      "step": 35159
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7014267444610596,
      "learning_rate": 0.0005661796276201224,
      "loss": 2.9143,
      "step": 35160
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7497633695602417,
      "learning_rate": 0.0005661777407858757,
      "loss": 3.3372,
      "step": 35161
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.28082013130188,
      "learning_rate": 0.0005661758539021417,
      "loss": 3.0608,
      "step": 35162
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.177807569503784,
      "learning_rate": 0.0005661739669689204,
      "loss": 3.1725,
      "step": 35163
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7155205011367798,
      "learning_rate": 0.0005661720799862125,
      "loss": 2.9852,
      "step": 35164
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8739619255065918,
      "learning_rate": 0.0005661701929540181,
      "loss": 3.0507,
      "step": 35165
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0987377166748047,
      "learning_rate": 0.0005661683058723376,
      "loss": 2.9501,
      "step": 35166
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6652532815933228,
      "learning_rate": 0.0005661664187411714,
      "loss": 2.8136,
      "step": 35167
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3948181867599487,
      "learning_rate": 0.0005661645315605199,
      "loss": 2.9866,
      "step": 35168
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.575888752937317,
      "learning_rate": 0.0005661626443303833,
      "loss": 2.9099,
      "step": 35169
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2042148113250732,
      "learning_rate": 0.000566160757050762,
      "loss": 3.2186,
      "step": 35170
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.3970160484313965,
      "learning_rate": 0.0005661588697216567,
      "loss": 3.1504,
      "step": 35171
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.037620782852173,
      "learning_rate": 0.0005661569823430671,
      "loss": 2.8736,
      "step": 35172
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.506429672241211,
      "learning_rate": 0.0005661550949149941,
      "loss": 3.0309,
      "step": 35173
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6132898330688477,
      "learning_rate": 0.0005661532074374377,
      "loss": 3.2409,
      "step": 35174
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4633052349090576,
      "learning_rate": 0.0005661513199103985,
      "loss": 3.1573,
      "step": 35175
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3955556154251099,
      "learning_rate": 0.0005661494323338767,
      "loss": 3.1191,
      "step": 35176
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.097381353378296,
      "learning_rate": 0.0005661475447078727,
      "loss": 2.9729,
      "step": 35177
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8545769453048706,
      "learning_rate": 0.0005661456570323867,
      "loss": 3.0668,
      "step": 35178
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9077367782592773,
      "learning_rate": 0.0005661437693074194,
      "loss": 3.2958,
      "step": 35179
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3162381649017334,
      "learning_rate": 0.0005661418815329709,
      "loss": 3.2272,
      "step": 35180
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7740771770477295,
      "learning_rate": 0.0005661399937090416,
      "loss": 3.0058,
      "step": 35181
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9653260707855225,
      "learning_rate": 0.0005661381058356318,
      "loss": 3.1829,
      "step": 35182
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.3629374504089355,
      "learning_rate": 0.000566136217912742,
      "loss": 3.372,
      "step": 35183
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.494126319885254,
      "learning_rate": 0.0005661343299403724,
      "loss": 3.1103,
      "step": 35184
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.385427713394165,
      "learning_rate": 0.0005661324419185234,
      "loss": 3.0814,
      "step": 35185
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4833918809890747,
      "learning_rate": 0.0005661305538471953,
      "loss": 3.1574,
      "step": 35186
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.6804165840148926,
      "learning_rate": 0.0005661286657263887,
      "loss": 3.0605,
      "step": 35187
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2672486305236816,
      "learning_rate": 0.0005661267775561035,
      "loss": 3.0381,
      "step": 35188
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8330549001693726,
      "learning_rate": 0.0005661248893363405,
      "loss": 2.9502,
      "step": 35189
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7517032623291016,
      "learning_rate": 0.0005661230010670997,
      "loss": 2.7625,
      "step": 35190
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.5670289993286133,
      "learning_rate": 0.0005661211127483818,
      "loss": 3.2304,
      "step": 35191
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.9277944564819336,
      "learning_rate": 0.0005661192243801869,
      "loss": 2.8761,
      "step": 35192
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.143723726272583,
      "learning_rate": 0.0005661173359625154,
      "loss": 3.1379,
      "step": 35193
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.087531328201294,
      "learning_rate": 0.0005661154474953675,
      "loss": 2.8706,
      "step": 35194
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.063316583633423,
      "learning_rate": 0.0005661135589787439,
      "loss": 3.0574,
      "step": 35195
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2817740440368652,
      "learning_rate": 0.0005661116704126448,
      "loss": 3.1848,
      "step": 35196
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5715136528015137,
      "learning_rate": 0.0005661097817970704,
      "loss": 2.8589,
      "step": 35197
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0764710903167725,
      "learning_rate": 0.0005661078931320212,
      "loss": 2.9726,
      "step": 35198
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.785054326057434,
      "learning_rate": 0.0005661060044174975,
      "loss": 3.1874,
      "step": 35199
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6004822254180908,
      "learning_rate": 0.0005661041156534997,
      "loss": 2.981,
      "step": 35200
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3781524896621704,
      "learning_rate": 0.0005661022268400281,
      "loss": 3.2774,
      "step": 35201
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5302966833114624,
      "learning_rate": 0.0005661003379770831,
      "loss": 2.9241,
      "step": 35202
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5502289533615112,
      "learning_rate": 0.000566098449064665,
      "loss": 3.0282,
      "step": 35203
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4505090713500977,
      "learning_rate": 0.0005660965601027743,
      "loss": 3.1633,
      "step": 35204
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.913387656211853,
      "learning_rate": 0.000566094671091411,
      "loss": 2.9824,
      "step": 35205
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.431901454925537,
      "learning_rate": 0.0005660927820305758,
      "loss": 3.0818,
      "step": 35206
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5333552360534668,
      "learning_rate": 0.000566090892920269,
      "loss": 3.1007,
      "step": 35207
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.262057900428772,
      "learning_rate": 0.0005660890037604907,
      "loss": 3.199,
      "step": 35208
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.41642165184021,
      "learning_rate": 0.0005660871145512416,
      "loss": 3.1471,
      "step": 35209
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.293480634689331,
      "learning_rate": 0.0005660852252925219,
      "loss": 3.1702,
      "step": 35210
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4840470552444458,
      "learning_rate": 0.0005660833359843317,
      "loss": 2.9168,
      "step": 35211
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3523213863372803,
      "learning_rate": 0.0005660814466266717,
      "loss": 3.2007,
      "step": 35212
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5440696477890015,
      "learning_rate": 0.0005660795572195422,
      "loss": 3.1457,
      "step": 35213
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5151909589767456,
      "learning_rate": 0.0005660776677629436,
      "loss": 3.1589,
      "step": 35214
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4421578645706177,
      "learning_rate": 0.0005660757782568759,
      "loss": 3.0825,
      "step": 35215
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7496843338012695,
      "learning_rate": 0.0005660738887013398,
      "loss": 2.9085,
      "step": 35216
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5242928266525269,
      "learning_rate": 0.0005660719990963355,
      "loss": 3.0945,
      "step": 35217
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6458832025527954,
      "learning_rate": 0.0005660701094418635,
      "loss": 2.8993,
      "step": 35218
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3523894548416138,
      "learning_rate": 0.0005660682197379239,
      "loss": 3.289,
      "step": 35219
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3450766801834106,
      "learning_rate": 0.0005660663299845173,
      "loss": 2.8817,
      "step": 35220
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6080501079559326,
      "learning_rate": 0.0005660644401816439,
      "loss": 3.1569,
      "step": 35221
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.613991141319275,
      "learning_rate": 0.0005660625503293041,
      "loss": 3.0574,
      "step": 35222
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5723187923431396,
      "learning_rate": 0.0005660606604274982,
      "loss": 2.9245,
      "step": 35223
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.4586122035980225,
      "learning_rate": 0.0005660587704762268,
      "loss": 2.9908,
      "step": 35224
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2025907039642334,
      "learning_rate": 0.0005660568804754898,
      "loss": 2.9334,
      "step": 35225
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5602635145187378,
      "learning_rate": 0.000566054990425288,
      "loss": 3.1117,
      "step": 35226
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0835444927215576,
      "learning_rate": 0.0005660531003256215,
      "loss": 3.2126,
      "step": 35227
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8411130905151367,
      "learning_rate": 0.0005660512101764906,
      "loss": 2.6876,
      "step": 35228
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.225273847579956,
      "learning_rate": 0.0005660493199778959,
      "loss": 2.844,
      "step": 35229
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8988056182861328,
      "learning_rate": 0.0005660474297298374,
      "loss": 3.1022,
      "step": 35230
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.282862901687622,
      "learning_rate": 0.0005660455394323159,
      "loss": 2.9991,
      "step": 35231
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.406002163887024,
      "learning_rate": 0.0005660436490853315,
      "loss": 2.7591,
      "step": 35232
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5600693225860596,
      "learning_rate": 0.0005660417586888844,
      "loss": 3.1945,
      "step": 35233
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0338053703308105,
      "learning_rate": 0.0005660398682429752,
      "loss": 2.8776,
      "step": 35234
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5563486814498901,
      "learning_rate": 0.0005660379777476042,
      "loss": 3.137,
      "step": 35235
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.761232852935791,
      "learning_rate": 0.0005660360872027716,
      "loss": 2.8115,
      "step": 35236
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.537632942199707,
      "learning_rate": 0.000566034196608478,
      "loss": 2.7942,
      "step": 35237
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.98630952835083,
      "learning_rate": 0.0005660323059647236,
      "loss": 2.9959,
      "step": 35238
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8430432081222534,
      "learning_rate": 0.0005660304152715087,
      "loss": 2.8165,
      "step": 35239
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.294501304626465,
      "learning_rate": 0.0005660285245288338,
      "loss": 3.3072,
      "step": 35240
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.666750431060791,
      "learning_rate": 0.0005660266337366991,
      "loss": 2.9265,
      "step": 35241
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3278614282608032,
      "learning_rate": 0.0005660247428951051,
      "loss": 3.117,
      "step": 35242
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3929864168167114,
      "learning_rate": 0.0005660228520040521,
      "loss": 3.0404,
      "step": 35243
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4265252351760864,
      "learning_rate": 0.0005660209610635404,
      "loss": 3.128,
      "step": 35244
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8159432411193848,
      "learning_rate": 0.0005660190700735703,
      "loss": 2.8195,
      "step": 35245
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.229868769645691,
      "learning_rate": 0.0005660171790341423,
      "loss": 2.948,
      "step": 35246
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6203052997589111,
      "learning_rate": 0.0005660152879452566,
      "loss": 2.9713,
      "step": 35247
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.440049409866333,
      "learning_rate": 0.0005660133968069138,
      "loss": 3.0281,
      "step": 35248
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2400187253952026,
      "learning_rate": 0.0005660115056191139,
      "loss": 2.9853,
      "step": 35249
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4865602254867554,
      "learning_rate": 0.0005660096143818577,
      "loss": 3.1876,
      "step": 35250
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5045241117477417,
      "learning_rate": 0.0005660077230951451,
      "loss": 3.0087,
      "step": 35251
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6030768156051636,
      "learning_rate": 0.0005660058317589766,
      "loss": 2.881,
      "step": 35252
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.966117262840271,
      "learning_rate": 0.0005660039403733528,
      "loss": 2.8915,
      "step": 35253
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8913893699645996,
      "learning_rate": 0.0005660020489382737,
      "loss": 2.9013,
      "step": 35254
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3901804685592651,
      "learning_rate": 0.0005660001574537397,
      "loss": 2.8658,
      "step": 35255
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3861496448516846,
      "learning_rate": 0.0005659982659197515,
      "loss": 2.9741,
      "step": 35256
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.4661738872528076,
      "learning_rate": 0.000565996374336309,
      "loss": 2.9573,
      "step": 35257
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5308517217636108,
      "learning_rate": 0.0005659944827034128,
      "loss": 3.1074,
      "step": 35258
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9275723695755005,
      "learning_rate": 0.0005659925910210632,
      "loss": 3.0773,
      "step": 35259
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8124306201934814,
      "learning_rate": 0.0005659906992892607,
      "loss": 3.0426,
      "step": 35260
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5634461641311646,
      "learning_rate": 0.0005659888075080052,
      "loss": 3.2713,
      "step": 35261
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.656980037689209,
      "learning_rate": 0.0005659869156772978,
      "loss": 3.1269,
      "step": 35262
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.252429246902466,
      "learning_rate": 0.000565985023797138,
      "loss": 3.1692,
      "step": 35263
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7474981546401978,
      "learning_rate": 0.0005659831318675267,
      "loss": 2.9534,
      "step": 35264
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7895097732543945,
      "learning_rate": 0.0005659812398884643,
      "loss": 3.1904,
      "step": 35265
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.3092832565307617,
      "learning_rate": 0.0005659793478599507,
      "loss": 3.1675,
      "step": 35266
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5798022747039795,
      "learning_rate": 0.0005659774557819866,
      "loss": 3.0695,
      "step": 35267
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7471020221710205,
      "learning_rate": 0.0005659755636545724,
      "loss": 3.0416,
      "step": 35268
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9193087816238403,
      "learning_rate": 0.0005659736714777082,
      "loss": 3.0265,
      "step": 35269
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.8590493202209473,
      "learning_rate": 0.0005659717792513945,
      "loss": 3.0055,
      "step": 35270
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.0520718097686768,
      "learning_rate": 0.0005659698869756315,
      "loss": 3.1559,
      "step": 35271
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.210975170135498,
      "learning_rate": 0.0005659679946504199,
      "loss": 3.1327,
      "step": 35272
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9388262033462524,
      "learning_rate": 0.0005659661022757597,
      "loss": 2.9684,
      "step": 35273
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.7358994483947754,
      "learning_rate": 0.0005659642098516514,
      "loss": 2.8591,
      "step": 35274
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5481088161468506,
      "learning_rate": 0.0005659623173780953,
      "loss": 3.1584,
      "step": 35275
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4039194583892822,
      "learning_rate": 0.0005659604248550919,
      "loss": 3.1671,
      "step": 35276
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8427678346633911,
      "learning_rate": 0.0005659585322826413,
      "loss": 2.8492,
      "step": 35277
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9176595211029053,
      "learning_rate": 0.000565956639660744,
      "loss": 3.3188,
      "step": 35278
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4326486587524414,
      "learning_rate": 0.0005659547469894005,
      "loss": 2.9857,
      "step": 35279
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2788655757904053,
      "learning_rate": 0.0005659528542686108,
      "loss": 3.1588,
      "step": 35280
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.7904961109161377,
      "learning_rate": 0.0005659509614983756,
      "loss": 3.0787,
      "step": 35281
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5470640659332275,
      "learning_rate": 0.0005659490686786949,
      "loss": 3.0388,
      "step": 35282
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5658220052719116,
      "learning_rate": 0.0005659471758095695,
      "loss": 2.8619,
      "step": 35283
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7892851829528809,
      "learning_rate": 0.0005659452828909992,
      "loss": 2.9085,
      "step": 35284
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3982635736465454,
      "learning_rate": 0.0005659433899229849,
      "loss": 3.0208,
      "step": 35285
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.494264841079712,
      "learning_rate": 0.0005659414969055266,
      "loss": 3.0263,
      "step": 35286
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.9825856685638428,
      "learning_rate": 0.0005659396038386248,
      "loss": 2.9661,
      "step": 35287
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.1804020404815674,
      "learning_rate": 0.0005659377107222798,
      "loss": 3.2212,
      "step": 35288
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9635945558547974,
      "learning_rate": 0.000565935817556492,
      "loss": 2.9644,
      "step": 35289
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.793958067893982,
      "learning_rate": 0.0005659339243412616,
      "loss": 3.3643,
      "step": 35290
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.803798198699951,
      "learning_rate": 0.0005659320310765891,
      "loss": 3.1679,
      "step": 35291
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.046804189682007,
      "learning_rate": 0.0005659301377624749,
      "loss": 2.8234,
      "step": 35292
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.682558298110962,
      "learning_rate": 0.0005659282443989192,
      "loss": 3.0358,
      "step": 35293
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.0279548168182373,
      "learning_rate": 0.0005659263509859225,
      "loss": 3.1073,
      "step": 35294
    },
    {
      "epoch": 0.46,
      "grad_norm": 4.039398670196533,
      "learning_rate": 0.0005659244575234849,
      "loss": 2.7827,
      "step": 35295
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5029655694961548,
      "learning_rate": 0.000565922564011607,
      "loss": 3.0753,
      "step": 35296
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8198734521865845,
      "learning_rate": 0.0005659206704502892,
      "loss": 3.2775,
      "step": 35297
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5842229127883911,
      "learning_rate": 0.0005659187768395316,
      "loss": 2.9956,
      "step": 35298
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.489861011505127,
      "learning_rate": 0.0005659168831793346,
      "loss": 3.3963,
      "step": 35299
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9576836824417114,
      "learning_rate": 0.0005659149894696988,
      "loss": 2.9145,
      "step": 35300
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.665062665939331,
      "learning_rate": 0.0005659130957106242,
      "loss": 2.9276,
      "step": 35301
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.03985595703125,
      "learning_rate": 0.0005659112019021116,
      "loss": 2.9373,
      "step": 35302
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9986180067062378,
      "learning_rate": 0.0005659093080441609,
      "loss": 3.0703,
      "step": 35303
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5513205528259277,
      "learning_rate": 0.0005659074141367727,
      "loss": 3.0808,
      "step": 35304
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.6887753009796143,
      "learning_rate": 0.0005659055201799473,
      "loss": 2.9169,
      "step": 35305
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.5201613903045654,
      "learning_rate": 0.000565903626173685,
      "loss": 2.8192,
      "step": 35306
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7991690635681152,
      "learning_rate": 0.0005659017321179862,
      "loss": 3.0342,
      "step": 35307
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.331932544708252,
      "learning_rate": 0.0005658998380128513,
      "loss": 2.8974,
      "step": 35308
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.663668394088745,
      "learning_rate": 0.0005658979438582806,
      "loss": 2.9112,
      "step": 35309
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.299769401550293,
      "learning_rate": 0.0005658960496542744,
      "loss": 3.1678,
      "step": 35310
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6680302619934082,
      "learning_rate": 0.0005658941554008331,
      "loss": 2.9977,
      "step": 35311
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5598019361495972,
      "learning_rate": 0.0005658922610979571,
      "loss": 3.1001,
      "step": 35312
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6628661155700684,
      "learning_rate": 0.0005658903667456466,
      "loss": 3.3709,
      "step": 35313
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.647160291671753,
      "learning_rate": 0.0005658884723439022,
      "loss": 2.9634,
      "step": 35314
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2407593727111816,
      "learning_rate": 0.000565886577892724,
      "loss": 2.9314,
      "step": 35315
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7066653966903687,
      "learning_rate": 0.0005658846833921127,
      "loss": 2.9812,
      "step": 35316
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6689865589141846,
      "learning_rate": 0.0005658827888420681,
      "loss": 3.0974,
      "step": 35317
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6184182167053223,
      "learning_rate": 0.000565880894242591,
      "loss": 2.9696,
      "step": 35318
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.599435567855835,
      "learning_rate": 0.0005658789995936817,
      "loss": 2.9433,
      "step": 35319
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5984554290771484,
      "learning_rate": 0.0005658771048953405,
      "loss": 2.8457,
      "step": 35320
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3105075359344482,
      "learning_rate": 0.0005658752101475675,
      "loss": 2.8405,
      "step": 35321
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.1221011877059937,
      "learning_rate": 0.0005658733153503635,
      "loss": 3.2123,
      "step": 35322
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.524232029914856,
      "learning_rate": 0.0005658714205037285,
      "loss": 3.014,
      "step": 35323
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4557222127914429,
      "learning_rate": 0.0005658695256076632,
      "loss": 3.221,
      "step": 35324
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.522775173187256,
      "learning_rate": 0.0005658676306621675,
      "loss": 3.2212,
      "step": 35325
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.611083745956421,
      "learning_rate": 0.0005658657356672421,
      "loss": 3.1144,
      "step": 35326
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.757495403289795,
      "learning_rate": 0.0005658638406228872,
      "loss": 2.8935,
      "step": 35327
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.3345694541931152,
      "learning_rate": 0.0005658619455291032,
      "loss": 3.1669,
      "step": 35328
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4602099657058716,
      "learning_rate": 0.0005658600503858905,
      "loss": 3.1332,
      "step": 35329
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.431206226348877,
      "learning_rate": 0.0005658581551932492,
      "loss": 3.0113,
      "step": 35330
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.5073177814483643,
      "learning_rate": 0.0005658562599511802,
      "loss": 3.0973,
      "step": 35331
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6319599151611328,
      "learning_rate": 0.0005658543646596832,
      "loss": 3.1747,
      "step": 35332
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6338160037994385,
      "learning_rate": 0.000565852469318759,
      "loss": 3.0844,
      "step": 35333
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.64954674243927,
      "learning_rate": 0.0005658505739284077,
      "loss": 3.0182,
      "step": 35334
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.6452407836914062,
      "learning_rate": 0.0005658486784886298,
      "loss": 3.1224,
      "step": 35335
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3641581535339355,
      "learning_rate": 0.0005658467829994258,
      "loss": 2.9746,
      "step": 35336
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5769003629684448,
      "learning_rate": 0.0005658448874607956,
      "loss": 3.1227,
      "step": 35337
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.9812841415405273,
      "learning_rate": 0.00056584299187274,
      "loss": 2.8786,
      "step": 35338
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4782047271728516,
      "learning_rate": 0.000565841096235259,
      "loss": 3.1316,
      "step": 35339
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.1402242183685303,
      "learning_rate": 0.0005658392005483533,
      "loss": 2.9185,
      "step": 35340
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0218725204467773,
      "learning_rate": 0.0005658373048120229,
      "loss": 2.9525,
      "step": 35341
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8042430877685547,
      "learning_rate": 0.0005658354090262684,
      "loss": 2.9774,
      "step": 35342
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.070781707763672,
      "learning_rate": 0.0005658335131910901,
      "loss": 3.0663,
      "step": 35343
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.213102102279663,
      "learning_rate": 0.0005658316173064883,
      "loss": 2.8998,
      "step": 35344
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.1548314094543457,
      "learning_rate": 0.0005658297213724634,
      "loss": 2.847,
      "step": 35345
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4142646789550781,
      "learning_rate": 0.0005658278253890158,
      "loss": 3.1508,
      "step": 35346
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7205911874771118,
      "learning_rate": 0.0005658259293561457,
      "loss": 3.1762,
      "step": 35347
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.942696452140808,
      "learning_rate": 0.0005658240332738535,
      "loss": 3.0526,
      "step": 35348
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.88715660572052,
      "learning_rate": 0.0005658221371421398,
      "loss": 3.0054,
      "step": 35349
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3093550205230713,
      "learning_rate": 0.0005658202409610045,
      "loss": 2.9595,
      "step": 35350
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.169492244720459,
      "learning_rate": 0.0005658183447304483,
      "loss": 2.9874,
      "step": 35351
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2591638565063477,
      "learning_rate": 0.0005658164484504715,
      "loss": 2.944,
      "step": 35352
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8494268655776978,
      "learning_rate": 0.0005658145521210744,
      "loss": 3.0099,
      "step": 35353
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2271311283111572,
      "learning_rate": 0.0005658126557422573,
      "loss": 2.9429,
      "step": 35354
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6870026588439941,
      "learning_rate": 0.0005658107593140207,
      "loss": 3.3547,
      "step": 35355
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0656347274780273,
      "learning_rate": 0.0005658088628363647,
      "loss": 2.9986,
      "step": 35356
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3427079916000366,
      "learning_rate": 0.0005658069663092899,
      "loss": 3.0984,
      "step": 35357
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0388455390930176,
      "learning_rate": 0.0005658050697327966,
      "loss": 3.236,
      "step": 35358
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.894975185394287,
      "learning_rate": 0.0005658031731068852,
      "loss": 3.0302,
      "step": 35359
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5855419635772705,
      "learning_rate": 0.0005658012764315559,
      "loss": 2.9214,
      "step": 35360
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4917843341827393,
      "learning_rate": 0.000565799379706809,
      "loss": 3.1317,
      "step": 35361
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.070700168609619,
      "learning_rate": 0.0005657974829326452,
      "loss": 3.1191,
      "step": 35362
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.4480156898498535,
      "learning_rate": 0.0005657955861090644,
      "loss": 3.3673,
      "step": 35363
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7866017818450928,
      "learning_rate": 0.0005657936892360674,
      "loss": 3.189,
      "step": 35364
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.934757113456726,
      "learning_rate": 0.0005657917923136542,
      "loss": 3.0585,
      "step": 35365
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0516316890716553,
      "learning_rate": 0.0005657898953418252,
      "loss": 3.1407,
      "step": 35366
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.195277214050293,
      "learning_rate": 0.000565787998320581,
      "loss": 3.0709,
      "step": 35367
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.820790410041809,
      "learning_rate": 0.0005657861012499218,
      "loss": 3.1128,
      "step": 35368
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.3075475692749023,
      "learning_rate": 0.000565784204129848,
      "loss": 3.4058,
      "step": 35369
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9834271669387817,
      "learning_rate": 0.0005657823069603597,
      "loss": 2.922,
      "step": 35370
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.5742549896240234,
      "learning_rate": 0.0005657804097414576,
      "loss": 3.0748,
      "step": 35371
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5896989107131958,
      "learning_rate": 0.0005657785124731418,
      "loss": 3.3757,
      "step": 35372
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0983495712280273,
      "learning_rate": 0.0005657766151554129,
      "loss": 3.0347,
      "step": 35373
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.7589755058288574,
      "learning_rate": 0.000565774717788271,
      "loss": 2.9833,
      "step": 35374
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.384973168373108,
      "learning_rate": 0.0005657728203717166,
      "loss": 2.9306,
      "step": 35375
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.386833906173706,
      "learning_rate": 0.0005657709229057499,
      "loss": 3.2041,
      "step": 35376
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.3224263191223145,
      "learning_rate": 0.0005657690253903715,
      "loss": 2.9045,
      "step": 35377
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.413703680038452,
      "learning_rate": 0.0005657671278255816,
      "loss": 3.1167,
      "step": 35378
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7410579919815063,
      "learning_rate": 0.0005657652302113805,
      "loss": 2.9903,
      "step": 35379
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4734774827957153,
      "learning_rate": 0.0005657633325477688,
      "loss": 3.0518,
      "step": 35380
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5143406391143799,
      "learning_rate": 0.0005657614348347466,
      "loss": 3.23,
      "step": 35381
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8898457288742065,
      "learning_rate": 0.0005657595370723142,
      "loss": 3.2274,
      "step": 35382
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2431412935256958,
      "learning_rate": 0.0005657576392604723,
      "loss": 3.018,
      "step": 35383
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0008981227874756,
      "learning_rate": 0.0005657557413992209,
      "loss": 3.4847,
      "step": 35384
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5065972805023193,
      "learning_rate": 0.0005657538434885606,
      "loss": 3.1245,
      "step": 35385
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2516274452209473,
      "learning_rate": 0.0005657519455284914,
      "loss": 3.2168,
      "step": 35386
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6715312004089355,
      "learning_rate": 0.0005657500475190141,
      "loss": 3.1586,
      "step": 35387
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.688141107559204,
      "learning_rate": 0.0005657481494601289,
      "loss": 2.8799,
      "step": 35388
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3959122896194458,
      "learning_rate": 0.0005657462513518359,
      "loss": 2.9642,
      "step": 35389
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5416308641433716,
      "learning_rate": 0.0005657443531941358,
      "loss": 2.9137,
      "step": 35390
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8599809408187866,
      "learning_rate": 0.0005657424549870288,
      "loss": 3.0318,
      "step": 35391
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7587543725967407,
      "learning_rate": 0.0005657405567305153,
      "loss": 2.9087,
      "step": 35392
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.433111548423767,
      "learning_rate": 0.0005657386584245955,
      "loss": 2.9607,
      "step": 35393
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.5769705772399902,
      "learning_rate": 0.0005657367600692698,
      "loss": 2.9396,
      "step": 35394
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.465210437774658,
      "learning_rate": 0.0005657348616645389,
      "loss": 3.1263,
      "step": 35395
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.388403296470642,
      "learning_rate": 0.0005657329632104027,
      "loss": 2.7993,
      "step": 35396
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.7044200897216797,
      "learning_rate": 0.0005657310647068617,
      "loss": 2.844,
      "step": 35397
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.6409108638763428,
      "learning_rate": 0.0005657291661539163,
      "loss": 2.9617,
      "step": 35398
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.0656797885894775,
      "learning_rate": 0.000565727267551567,
      "loss": 3.1388,
      "step": 35399
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4733912944793701,
      "learning_rate": 0.0005657253688998137,
      "loss": 3.1442,
      "step": 35400
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.303375482559204,
      "learning_rate": 0.0005657234701986572,
      "loss": 2.9774,
      "step": 35401
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.008357286453247,
      "learning_rate": 0.0005657215714480976,
      "loss": 2.923,
      "step": 35402
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.581973671913147,
      "learning_rate": 0.0005657196726481354,
      "loss": 3.2142,
      "step": 35403
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5229016542434692,
      "learning_rate": 0.000565717773798771,
      "loss": 3.2751,
      "step": 35404
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.318920850753784,
      "learning_rate": 0.0005657158749000045,
      "loss": 2.8678,
      "step": 35405
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.323460578918457,
      "learning_rate": 0.0005657139759518365,
      "loss": 3.1293,
      "step": 35406
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7560917139053345,
      "learning_rate": 0.0005657120769542672,
      "loss": 2.9906,
      "step": 35407
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.342026948928833,
      "learning_rate": 0.000565710177907297,
      "loss": 3.1017,
      "step": 35408
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4581151008605957,
      "learning_rate": 0.0005657082788109263,
      "loss": 2.9641,
      "step": 35409
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7480183839797974,
      "learning_rate": 0.0005657063796651555,
      "loss": 3.153,
      "step": 35410
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5857141017913818,
      "learning_rate": 0.0005657044804699848,
      "loss": 3.0684,
      "step": 35411
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.933843731880188,
      "learning_rate": 0.0005657025812254144,
      "loss": 3.0215,
      "step": 35412
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5627880096435547,
      "learning_rate": 0.0005657006819314452,
      "loss": 3.113,
      "step": 35413
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8669997453689575,
      "learning_rate": 0.0005656987825880771,
      "loss": 3.228,
      "step": 35414
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.165316104888916,
      "learning_rate": 0.0005656968831953106,
      "loss": 2.8799,
      "step": 35415
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.91413152217865,
      "learning_rate": 0.0005656949837531459,
      "loss": 3.1051,
      "step": 35416
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5739597082138062,
      "learning_rate": 0.0005656930842615836,
      "loss": 3.0601,
      "step": 35417
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6284258365631104,
      "learning_rate": 0.000565691184720624,
      "loss": 2.7554,
      "step": 35418
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5151252746582031,
      "learning_rate": 0.0005656892851302673,
      "loss": 3.0085,
      "step": 35419
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7899587154388428,
      "learning_rate": 0.000565687385490514,
      "loss": 3.1698,
      "step": 35420
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3171133995056152,
      "learning_rate": 0.0005656854858013643,
      "loss": 3.1005,
      "step": 35421
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2866170406341553,
      "learning_rate": 0.0005656835860628188,
      "loss": 3.2826,
      "step": 35422
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5898003578186035,
      "learning_rate": 0.0005656816862748777,
      "loss": 3.0429,
      "step": 35423
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4197962284088135,
      "learning_rate": 0.0005656797864375412,
      "loss": 2.7858,
      "step": 35424
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.43807852268219,
      "learning_rate": 0.00056567788655081,
      "loss": 3.2341,
      "step": 35425
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4840604066848755,
      "learning_rate": 0.0005656759866146841,
      "loss": 2.878,
      "step": 35426
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7402712106704712,
      "learning_rate": 0.0005656740866291642,
      "loss": 2.8773,
      "step": 35427
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4539117813110352,
      "learning_rate": 0.0005656721865942503,
      "loss": 3.1406,
      "step": 35428
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.442700982093811,
      "learning_rate": 0.0005656702865099429,
      "loss": 3.0592,
      "step": 35429
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7260379791259766,
      "learning_rate": 0.0005656683863762425,
      "loss": 2.8694,
      "step": 35430
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.771500825881958,
      "learning_rate": 0.0005656664861931494,
      "loss": 2.9408,
      "step": 35431
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5468448400497437,
      "learning_rate": 0.0005656645859606638,
      "loss": 2.9764,
      "step": 35432
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.659729480743408,
      "learning_rate": 0.0005656626856787861,
      "loss": 3.1096,
      "step": 35433
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7865341901779175,
      "learning_rate": 0.0005656607853475167,
      "loss": 3.27,
      "step": 35434
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7361112833023071,
      "learning_rate": 0.0005656588849668559,
      "loss": 2.8181,
      "step": 35435
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5244810581207275,
      "learning_rate": 0.0005656569845368042,
      "loss": 3.2596,
      "step": 35436
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4869482517242432,
      "learning_rate": 0.0005656550840573619,
      "loss": 2.845,
      "step": 35437
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.434898853302002,
      "learning_rate": 0.0005656531835285291,
      "loss": 3.2111,
      "step": 35438
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4473559856414795,
      "learning_rate": 0.0005656512829503065,
      "loss": 3.2279,
      "step": 35439
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.38832426071167,
      "learning_rate": 0.0005656493823226943,
      "loss": 3.1793,
      "step": 35440
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.22687828540802,
      "learning_rate": 0.0005656474816456928,
      "loss": 3.0371,
      "step": 35441
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4363203048706055,
      "learning_rate": 0.0005656455809193026,
      "loss": 3.2408,
      "step": 35442
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4212349653244019,
      "learning_rate": 0.0005656436801435236,
      "loss": 3.047,
      "step": 35443
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.463205337524414,
      "learning_rate": 0.0005656417793183567,
      "loss": 3.0655,
      "step": 35444
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4522004127502441,
      "learning_rate": 0.0005656398784438018,
      "loss": 3.2002,
      "step": 35445
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.411900520324707,
      "learning_rate": 0.0005656379775198595,
      "loss": 3.0157,
      "step": 35446
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.540771484375,
      "learning_rate": 0.0005656360765465301,
      "loss": 2.8408,
      "step": 35447
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3999723196029663,
      "learning_rate": 0.0005656341755238139,
      "loss": 2.9303,
      "step": 35448
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3929259777069092,
      "learning_rate": 0.0005656322744517113,
      "loss": 3.0529,
      "step": 35449
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4108853340148926,
      "learning_rate": 0.0005656303733302227,
      "loss": 3.2211,
      "step": 35450
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9753282070159912,
      "learning_rate": 0.0005656284721593483,
      "loss": 3.1414,
      "step": 35451
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.383939504623413,
      "learning_rate": 0.0005656265709390886,
      "loss": 3.3595,
      "step": 35452
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6194636821746826,
      "learning_rate": 0.0005656246696694438,
      "loss": 3.0716,
      "step": 35453
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.538615107536316,
      "learning_rate": 0.0005656227683504145,
      "loss": 3.0333,
      "step": 35454
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6913583278656006,
      "learning_rate": 0.0005656208669820009,
      "loss": 2.973,
      "step": 35455
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.1100406646728516,
      "learning_rate": 0.0005656189655642034,
      "loss": 3.176,
      "step": 35456
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2670884132385254,
      "learning_rate": 0.0005656170640970222,
      "loss": 3.0235,
      "step": 35457
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7928096055984497,
      "learning_rate": 0.0005656151625804579,
      "loss": 3.1262,
      "step": 35458
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3809924125671387,
      "learning_rate": 0.0005656132610145106,
      "loss": 3.1199,
      "step": 35459
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.8061611652374268,
      "learning_rate": 0.0005656113593991809,
      "loss": 3.0742,
      "step": 35460
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.5806031227111816,
      "learning_rate": 0.0005656094577344689,
      "loss": 3.2104,
      "step": 35461
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6643439531326294,
      "learning_rate": 0.0005656075560203752,
      "loss": 2.9139,
      "step": 35462
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7011767625808716,
      "learning_rate": 0.0005656056542569,
      "loss": 3.1046,
      "step": 35463
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.652342438697815,
      "learning_rate": 0.0005656037524440437,
      "loss": 3.1714,
      "step": 35464
    },
    {
      "epoch": 0.46,
      "grad_norm": 4.036515235900879,
      "learning_rate": 0.0005656018505818067,
      "loss": 3.1059,
      "step": 35465
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.104431629180908,
      "learning_rate": 0.0005655999486701892,
      "loss": 3.0943,
      "step": 35466
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6481056213378906,
      "learning_rate": 0.0005655980467091918,
      "loss": 3.3126,
      "step": 35467
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.814119577407837,
      "learning_rate": 0.0005655961446988147,
      "loss": 3.229,
      "step": 35468
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.1849708557128906,
      "learning_rate": 0.000565594242639058,
      "loss": 2.9264,
      "step": 35469
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.1542906761169434,
      "learning_rate": 0.0005655923405299226,
      "loss": 3.0754,
      "step": 35470
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3809843063354492,
      "learning_rate": 0.0005655904383714084,
      "loss": 3.2035,
      "step": 35471
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.687928557395935,
      "learning_rate": 0.000565588536163516,
      "loss": 2.9765,
      "step": 35472
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.6005680561065674,
      "learning_rate": 0.0005655866339062457,
      "loss": 3.1369,
      "step": 35473
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.407920479774475,
      "learning_rate": 0.0005655847315995979,
      "loss": 3.0743,
      "step": 35474
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3569839000701904,
      "learning_rate": 0.0005655828292435728,
      "loss": 2.8735,
      "step": 35475
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.821743130683899,
      "learning_rate": 0.0005655809268381708,
      "loss": 3.0902,
      "step": 35476
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3206661939620972,
      "learning_rate": 0.0005655790243833923,
      "loss": 2.9768,
      "step": 35477
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.681868076324463,
      "learning_rate": 0.0005655771218792377,
      "loss": 2.9043,
      "step": 35478
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.554076075553894,
      "learning_rate": 0.0005655752193257072,
      "loss": 3.3624,
      "step": 35479
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6648930311203003,
      "learning_rate": 0.0005655733167228014,
      "loss": 2.8357,
      "step": 35480
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7516230344772339,
      "learning_rate": 0.0005655714140705203,
      "loss": 3.2448,
      "step": 35481
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6956675052642822,
      "learning_rate": 0.0005655695113688647,
      "loss": 2.9402,
      "step": 35482
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5360031127929688,
      "learning_rate": 0.0005655676086178346,
      "loss": 3.0198,
      "step": 35483
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2755653858184814,
      "learning_rate": 0.0005655657058174304,
      "loss": 3.162,
      "step": 35484
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.484882116317749,
      "learning_rate": 0.0005655638029676527,
      "loss": 3.1481,
      "step": 35485
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.3920366764068604,
      "learning_rate": 0.0005655619000685015,
      "loss": 3.2779,
      "step": 35486
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5309981107711792,
      "learning_rate": 0.0005655599971199774,
      "loss": 3.1123,
      "step": 35487
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5869179964065552,
      "learning_rate": 0.0005655580941220808,
      "loss": 3.3343,
      "step": 35488
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8763649463653564,
      "learning_rate": 0.0005655561910748117,
      "loss": 3.0416,
      "step": 35489
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7246716022491455,
      "learning_rate": 0.0005655542879781708,
      "loss": 3.0737,
      "step": 35490
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6539654731750488,
      "learning_rate": 0.0005655523848321584,
      "loss": 2.7984,
      "step": 35491
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.743719220161438,
      "learning_rate": 0.0005655504816367748,
      "loss": 2.8916,
      "step": 35492
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.152501106262207,
      "learning_rate": 0.0005655485783920203,
      "loss": 3.1906,
      "step": 35493
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.659533143043518,
      "learning_rate": 0.0005655466750978953,
      "loss": 3.1599,
      "step": 35494
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.800719976425171,
      "learning_rate": 0.0005655447717544002,
      "loss": 3.2212,
      "step": 35495
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5774294137954712,
      "learning_rate": 0.0005655428683615353,
      "loss": 3.1483,
      "step": 35496
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.635008454322815,
      "learning_rate": 0.0005655409649193009,
      "loss": 2.9718,
      "step": 35497
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5332331657409668,
      "learning_rate": 0.0005655390614276976,
      "loss": 3.1214,
      "step": 35498
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8277201652526855,
      "learning_rate": 0.0005655371578867255,
      "loss": 3.046,
      "step": 35499
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5384957790374756,
      "learning_rate": 0.0005655352542963849,
      "loss": 3.3108,
      "step": 35500
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4434795379638672,
      "learning_rate": 0.0005655333506566763,
      "loss": 3.2341,
      "step": 35501
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.892275094985962,
      "learning_rate": 0.0005655314469676002,
      "loss": 2.7771,
      "step": 35502
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8527309894561768,
      "learning_rate": 0.0005655295432291567,
      "loss": 3.2662,
      "step": 35503
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5929149389266968,
      "learning_rate": 0.0005655276394413462,
      "loss": 3.126,
      "step": 35504
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.4091811180114746,
      "learning_rate": 0.0005655257356041691,
      "loss": 2.9675,
      "step": 35505
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6806166172027588,
      "learning_rate": 0.0005655238317176258,
      "loss": 2.8558,
      "step": 35506
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6795049905776978,
      "learning_rate": 0.0005655219277817167,
      "loss": 2.945,
      "step": 35507
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9439444541931152,
      "learning_rate": 0.0005655200237964419,
      "loss": 3.0908,
      "step": 35508
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3539314270019531,
      "learning_rate": 0.000565518119761802,
      "loss": 3.0453,
      "step": 35509
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5539029836654663,
      "learning_rate": 0.0005655162156777973,
      "loss": 3.0703,
      "step": 35510
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.732285976409912,
      "learning_rate": 0.0005655143115444281,
      "loss": 3.0824,
      "step": 35511
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.8659069538116455,
      "learning_rate": 0.0005655124073616947,
      "loss": 2.9277,
      "step": 35512
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4292701482772827,
      "learning_rate": 0.0005655105031295976,
      "loss": 3.0049,
      "step": 35513
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.402162790298462,
      "learning_rate": 0.0005655085988481371,
      "loss": 3.2626,
      "step": 35514
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2624893188476562,
      "learning_rate": 0.0005655066945173135,
      "loss": 3.1707,
      "step": 35515
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4890047311782837,
      "learning_rate": 0.0005655047901371273,
      "loss": 3.3244,
      "step": 35516
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.492453932762146,
      "learning_rate": 0.0005655028857075786,
      "loss": 3.1328,
      "step": 35517
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3552980422973633,
      "learning_rate": 0.0005655009812286679,
      "loss": 3.1815,
      "step": 35518
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.330899715423584,
      "learning_rate": 0.0005654990767003956,
      "loss": 3.0176,
      "step": 35519
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8031177520751953,
      "learning_rate": 0.000565497172122762,
      "loss": 3.1328,
      "step": 35520
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6932612657546997,
      "learning_rate": 0.0005654952674957676,
      "loss": 2.9748,
      "step": 35521
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7784795761108398,
      "learning_rate": 0.0005654933628194125,
      "loss": 3.0281,
      "step": 35522
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3872088193893433,
      "learning_rate": 0.0005654914580936972,
      "loss": 2.987,
      "step": 35523
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5838491916656494,
      "learning_rate": 0.0005654895533186219,
      "loss": 3.083,
      "step": 35524
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3792340755462646,
      "learning_rate": 0.0005654876484941872,
      "loss": 2.8655,
      "step": 35525
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7063696384429932,
      "learning_rate": 0.0005654857436203934,
      "loss": 3.2602,
      "step": 35526
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3191035985946655,
      "learning_rate": 0.0005654838386972407,
      "loss": 3.12,
      "step": 35527
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7659707069396973,
      "learning_rate": 0.0005654819337247295,
      "loss": 3.034,
      "step": 35528
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.443992018699646,
      "learning_rate": 0.0005654800287028602,
      "loss": 3.0526,
      "step": 35529
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4548969268798828,
      "learning_rate": 0.0005654781236316333,
      "loss": 3.0826,
      "step": 35530
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4730098247528076,
      "learning_rate": 0.0005654762185110489,
      "loss": 3.1778,
      "step": 35531
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9644813537597656,
      "learning_rate": 0.0005654743133411075,
      "loss": 2.8815,
      "step": 35532
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4001237154006958,
      "learning_rate": 0.0005654724081218093,
      "loss": 3.0715,
      "step": 35533
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7089283466339111,
      "learning_rate": 0.0005654705028531548,
      "loss": 2.9898,
      "step": 35534
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.4253010749816895,
      "learning_rate": 0.0005654685975351445,
      "loss": 3.1235,
      "step": 35535
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4082409143447876,
      "learning_rate": 0.0005654666921677783,
      "loss": 3.1229,
      "step": 35536
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.566620945930481,
      "learning_rate": 0.000565464786751057,
      "loss": 3.0549,
      "step": 35537
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.086022138595581,
      "learning_rate": 0.0005654628812849808,
      "loss": 3.152,
      "step": 35538
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7301636934280396,
      "learning_rate": 0.0005654609757695501,
      "loss": 3.0775,
      "step": 35539
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6850206851959229,
      "learning_rate": 0.000565459070204765,
      "loss": 3.2431,
      "step": 35540
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5136854648590088,
      "learning_rate": 0.0005654571645906262,
      "loss": 2.9127,
      "step": 35541
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3942855596542358,
      "learning_rate": 0.0005654552589271338,
      "loss": 3.1567,
      "step": 35542
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5833485126495361,
      "learning_rate": 0.0005654533532142884,
      "loss": 3.284,
      "step": 35543
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4620240926742554,
      "learning_rate": 0.00056545144745209,
      "loss": 3.0383,
      "step": 35544
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3308621644973755,
      "learning_rate": 0.0005654495416405393,
      "loss": 3.1775,
      "step": 35545
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.449473261833191,
      "learning_rate": 0.0005654476357796365,
      "loss": 3.3958,
      "step": 35546
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7150176763534546,
      "learning_rate": 0.000565445729869382,
      "loss": 3.0385,
      "step": 35547
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7359484434127808,
      "learning_rate": 0.000565443823909776,
      "loss": 3.064,
      "step": 35548
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4934406280517578,
      "learning_rate": 0.0005654419179008191,
      "loss": 3.1407,
      "step": 35549
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5600112676620483,
      "learning_rate": 0.0005654400118425116,
      "loss": 3.1784,
      "step": 35550
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.307744026184082,
      "learning_rate": 0.0005654381057348537,
      "loss": 3.1893,
      "step": 35551
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7251460552215576,
      "learning_rate": 0.0005654361995778459,
      "loss": 3.132,
      "step": 35552
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.488335132598877,
      "learning_rate": 0.0005654342933714884,
      "loss": 3.1213,
      "step": 35553
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3919309377670288,
      "learning_rate": 0.0005654323871157816,
      "loss": 3.0439,
      "step": 35554
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.619189977645874,
      "learning_rate": 0.0005654304808107261,
      "loss": 2.983,
      "step": 35555
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4516162872314453,
      "learning_rate": 0.000565428574456322,
      "loss": 2.9816,
      "step": 35556
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5150775909423828,
      "learning_rate": 0.0005654266680525697,
      "loss": 3.0117,
      "step": 35557
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7798904180526733,
      "learning_rate": 0.0005654247615994695,
      "loss": 2.952,
      "step": 35558
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.76980459690094,
      "learning_rate": 0.000565422855097022,
      "loss": 3.0026,
      "step": 35559
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5418871641159058,
      "learning_rate": 0.0005654209485452272,
      "loss": 2.9957,
      "step": 35560
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.3297014236450195,
      "learning_rate": 0.0005654190419440857,
      "loss": 3.1199,
      "step": 35561
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9296789169311523,
      "learning_rate": 0.0005654171352935979,
      "loss": 3.1406,
      "step": 35562
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3503880500793457,
      "learning_rate": 0.0005654152285937639,
      "loss": 2.8231,
      "step": 35563
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8591539859771729,
      "learning_rate": 0.0005654133218445843,
      "loss": 3.4703,
      "step": 35564
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.904951333999634,
      "learning_rate": 0.0005654114150460593,
      "loss": 2.7858,
      "step": 35565
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8039872646331787,
      "learning_rate": 0.0005654095081981893,
      "loss": 3.0502,
      "step": 35566
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7341547012329102,
      "learning_rate": 0.0005654076013009747,
      "loss": 2.8934,
      "step": 35567
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0983197689056396,
      "learning_rate": 0.0005654056943544158,
      "loss": 2.9135,
      "step": 35568
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6672227382659912,
      "learning_rate": 0.0005654037873585129,
      "loss": 2.9661,
      "step": 35569
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.970528244972229,
      "learning_rate": 0.0005654018803132665,
      "loss": 3.1287,
      "step": 35570
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.990687608718872,
      "learning_rate": 0.0005653999732186768,
      "loss": 3.018,
      "step": 35571
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5038467645645142,
      "learning_rate": 0.0005653980660747444,
      "loss": 3.0188,
      "step": 35572
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4421823024749756,
      "learning_rate": 0.0005653961588814693,
      "loss": 3.3776,
      "step": 35573
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4102665185928345,
      "learning_rate": 0.0005653942516388521,
      "loss": 3.1244,
      "step": 35574
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8799149990081787,
      "learning_rate": 0.0005653923443468933,
      "loss": 3.193,
      "step": 35575
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6651160717010498,
      "learning_rate": 0.0005653904370055928,
      "loss": 2.9313,
      "step": 35576
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8231946229934692,
      "learning_rate": 0.0005653885296149513,
      "loss": 2.9907,
      "step": 35577
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0504324436187744,
      "learning_rate": 0.0005653866221749691,
      "loss": 3.0207,
      "step": 35578
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4887874126434326,
      "learning_rate": 0.0005653847146856464,
      "loss": 3.2794,
      "step": 35579
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.54463791847229,
      "learning_rate": 0.0005653828071469838,
      "loss": 3.1171,
      "step": 35580
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7301260232925415,
      "learning_rate": 0.0005653808995589814,
      "loss": 2.9809,
      "step": 35581
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7544242143630981,
      "learning_rate": 0.0005653789919216398,
      "loss": 3.2151,
      "step": 35582
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.628345012664795,
      "learning_rate": 0.0005653770842349592,
      "loss": 3.0904,
      "step": 35583
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4857662916183472,
      "learning_rate": 0.00056537517649894,
      "loss": 3.2116,
      "step": 35584
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0199527740478516,
      "learning_rate": 0.0005653732687135826,
      "loss": 3.0847,
      "step": 35585
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2110748291015625,
      "learning_rate": 0.0005653713608788872,
      "loss": 3.0069,
      "step": 35586
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.2636867761611938,
      "learning_rate": 0.0005653694529948542,
      "loss": 2.9289,
      "step": 35587
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6187920570373535,
      "learning_rate": 0.0005653675450614841,
      "loss": 3.0994,
      "step": 35588
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5602831840515137,
      "learning_rate": 0.0005653656370787772,
      "loss": 3.2775,
      "step": 35589
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.5076892375946045,
      "learning_rate": 0.0005653637290467337,
      "loss": 3.212,
      "step": 35590
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5301331281661987,
      "learning_rate": 0.0005653618209653543,
      "loss": 2.9784,
      "step": 35591
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.3465781211853027,
      "learning_rate": 0.0005653599128346389,
      "loss": 3.1755,
      "step": 35592
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.974177598953247,
      "learning_rate": 0.0005653580046545882,
      "loss": 3.0193,
      "step": 35593
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3844192028045654,
      "learning_rate": 0.0005653560964252024,
      "loss": 2.8969,
      "step": 35594
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.096541404724121,
      "learning_rate": 0.0005653541881464819,
      "loss": 3.075,
      "step": 35595
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2021896839141846,
      "learning_rate": 0.000565352279818427,
      "loss": 2.9851,
      "step": 35596
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8492629528045654,
      "learning_rate": 0.0005653503714410381,
      "loss": 3.3878,
      "step": 35597
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.983410120010376,
      "learning_rate": 0.0005653484630143156,
      "loss": 3.0957,
      "step": 35598
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.1450133323669434,
      "learning_rate": 0.0005653465545382598,
      "loss": 3.0133,
      "step": 35599
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.1703898906707764,
      "learning_rate": 0.0005653446460128711,
      "loss": 3.2508,
      "step": 35600
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4521126747131348,
      "learning_rate": 0.0005653427374381498,
      "loss": 2.8599,
      "step": 35601
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8902031183242798,
      "learning_rate": 0.0005653408288140964,
      "loss": 3.0605,
      "step": 35602
    },
    {
      "epoch": 0.46,
      "grad_norm": 4.852969646453857,
      "learning_rate": 0.0005653389201407109,
      "loss": 2.9277,
      "step": 35603
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.685091018676758,
      "learning_rate": 0.000565337011417994,
      "loss": 3.0313,
      "step": 35604
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.534056544303894,
      "learning_rate": 0.0005653351026459459,
      "loss": 2.7347,
      "step": 35605
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.817305564880371,
      "learning_rate": 0.0005653331938245671,
      "loss": 3.0373,
      "step": 35606
    },
    {
      "epoch": 0.46,
      "grad_norm": 4.043213367462158,
      "learning_rate": 0.0005653312849538577,
      "loss": 3.1902,
      "step": 35607
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.568267345428467,
      "learning_rate": 0.0005653293760338183,
      "loss": 2.976,
      "step": 35608
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5469412803649902,
      "learning_rate": 0.0005653274670644492,
      "loss": 3.1931,
      "step": 35609
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.655505657196045,
      "learning_rate": 0.0005653255580457507,
      "loss": 2.7798,
      "step": 35610
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8404468297958374,
      "learning_rate": 0.0005653236489777231,
      "loss": 3.2131,
      "step": 35611
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3581161499023438,
      "learning_rate": 0.0005653217398603668,
      "loss": 3.0247,
      "step": 35612
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3208743333816528,
      "learning_rate": 0.0005653198306936822,
      "loss": 3.0213,
      "step": 35613
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8021087646484375,
      "learning_rate": 0.0005653179214776697,
      "loss": 3.2305,
      "step": 35614
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5010502338409424,
      "learning_rate": 0.0005653160122123295,
      "loss": 3.1747,
      "step": 35615
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3836182355880737,
      "learning_rate": 0.0005653141028976622,
      "loss": 3.0171,
      "step": 35616
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5532609224319458,
      "learning_rate": 0.0005653121935336679,
      "loss": 2.7258,
      "step": 35617
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4556607007980347,
      "learning_rate": 0.0005653102841203471,
      "loss": 3.134,
      "step": 35618
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3537142276763916,
      "learning_rate": 0.0005653083746577,
      "loss": 3.3162,
      "step": 35619
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3014932870864868,
      "learning_rate": 0.0005653064651457272,
      "loss": 2.9109,
      "step": 35620
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4364755153656006,
      "learning_rate": 0.0005653045555844288,
      "loss": 3.1474,
      "step": 35621
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8144806623458862,
      "learning_rate": 0.0005653026459738053,
      "loss": 3.1694,
      "step": 35622
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0810985565185547,
      "learning_rate": 0.0005653007363138571,
      "loss": 3.0784,
      "step": 35623
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5215884447097778,
      "learning_rate": 0.0005652988266045845,
      "loss": 3.1793,
      "step": 35624
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.808538794517517,
      "learning_rate": 0.0005652969168459878,
      "loss": 3.2253,
      "step": 35625
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7618587017059326,
      "learning_rate": 0.0005652950070380673,
      "loss": 3.225,
      "step": 35626
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.490006685256958,
      "learning_rate": 0.0005652930971808236,
      "loss": 3.1459,
      "step": 35627
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9023756980895996,
      "learning_rate": 0.0005652911872742569,
      "loss": 2.9848,
      "step": 35628
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8728052377700806,
      "learning_rate": 0.0005652892773183674,
      "loss": 2.9648,
      "step": 35629
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3650844097137451,
      "learning_rate": 0.0005652873673131557,
      "loss": 3.1263,
      "step": 35630
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.272223949432373,
      "learning_rate": 0.0005652854572586221,
      "loss": 3.1585,
      "step": 35631
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.650790214538574,
      "learning_rate": 0.000565283547154767,
      "loss": 3.0364,
      "step": 35632
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.4091970920562744,
      "learning_rate": 0.0005652816370015906,
      "loss": 3.0647,
      "step": 35633
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.907558560371399,
      "learning_rate": 0.0005652797267990933,
      "loss": 2.9402,
      "step": 35634
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7938402891159058,
      "learning_rate": 0.0005652778165472755,
      "loss": 3.0068,
      "step": 35635
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6965118646621704,
      "learning_rate": 0.0005652759062461377,
      "loss": 2.9387,
      "step": 35636
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4356061220169067,
      "learning_rate": 0.0005652739958956799,
      "loss": 2.9492,
      "step": 35637
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6560313701629639,
      "learning_rate": 0.0005652720854959027,
      "loss": 3.0851,
      "step": 35638
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.140521764755249,
      "learning_rate": 0.0005652701750468064,
      "loss": 2.8258,
      "step": 35639
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5684903860092163,
      "learning_rate": 0.0005652682645483914,
      "loss": 3.1812,
      "step": 35640
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4811046123504639,
      "learning_rate": 0.000565266354000658,
      "loss": 3.1366,
      "step": 35641
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9394322633743286,
      "learning_rate": 0.0005652644434036066,
      "loss": 3.2775,
      "step": 35642
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.1337716579437256,
      "learning_rate": 0.0005652625327572376,
      "loss": 3.2136,
      "step": 35643
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6340415477752686,
      "learning_rate": 0.0005652606220615513,
      "loss": 3.2571,
      "step": 35644
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.098710060119629,
      "learning_rate": 0.0005652587113165478,
      "loss": 3.0644,
      "step": 35645
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.5641467571258545,
      "learning_rate": 0.0005652568005222279,
      "loss": 3.3133,
      "step": 35646
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5605735778808594,
      "learning_rate": 0.0005652548896785917,
      "loss": 2.96,
      "step": 35647
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.293045997619629,
      "learning_rate": 0.0005652529787856397,
      "loss": 3.1091,
      "step": 35648
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3025691509246826,
      "learning_rate": 0.0005652510678433721,
      "loss": 3.1979,
      "step": 35649
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.903503179550171,
      "learning_rate": 0.0005652491568517892,
      "loss": 3.0922,
      "step": 35650
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8733878135681152,
      "learning_rate": 0.0005652472458108917,
      "loss": 3.2845,
      "step": 35651
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6757386922836304,
      "learning_rate": 0.0005652453347206796,
      "loss": 3.2594,
      "step": 35652
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6096546649932861,
      "learning_rate": 0.0005652434235811534,
      "loss": 3.0212,
      "step": 35653
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.725480318069458,
      "learning_rate": 0.0005652415123923134,
      "loss": 3.0237,
      "step": 35654
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.735411286354065,
      "learning_rate": 0.0005652396011541601,
      "loss": 3.193,
      "step": 35655
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.5855910778045654,
      "learning_rate": 0.0005652376898666936,
      "loss": 3.2214,
      "step": 35656
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.669979453086853,
      "learning_rate": 0.0005652357785299146,
      "loss": 3.1336,
      "step": 35657
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7221534252166748,
      "learning_rate": 0.0005652338671438232,
      "loss": 3.0143,
      "step": 35658
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.249159336090088,
      "learning_rate": 0.0005652319557084198,
      "loss": 2.9337,
      "step": 35659
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.6582987308502197,
      "learning_rate": 0.0005652300442237047,
      "loss": 2.9133,
      "step": 35660
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4544459581375122,
      "learning_rate": 0.0005652281326896783,
      "loss": 3.0745,
      "step": 35661
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.094991683959961,
      "learning_rate": 0.0005652262211063411,
      "loss": 3.3486,
      "step": 35662
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.915147304534912,
      "learning_rate": 0.0005652243094736933,
      "loss": 3.024,
      "step": 35663
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.2011303901672363,
      "learning_rate": 0.0005652223977917353,
      "loss": 3.2422,
      "step": 35664
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.383984088897705,
      "learning_rate": 0.0005652204860604674,
      "loss": 3.105,
      "step": 35665
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.142782688140869,
      "learning_rate": 0.0005652185742798901,
      "loss": 2.9072,
      "step": 35666
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6603175401687622,
      "learning_rate": 0.0005652166624500036,
      "loss": 3.201,
      "step": 35667
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5760761499404907,
      "learning_rate": 0.0005652147505708083,
      "loss": 3.2514,
      "step": 35668
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6051985025405884,
      "learning_rate": 0.0005652128386423046,
      "loss": 3.2072,
      "step": 35669
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6696746349334717,
      "learning_rate": 0.0005652109266644929,
      "loss": 3.0765,
      "step": 35670
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0250985622406006,
      "learning_rate": 0.0005652090146373733,
      "loss": 3.1539,
      "step": 35671
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3523046970367432,
      "learning_rate": 0.0005652071025609465,
      "loss": 3.2073,
      "step": 35672
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.494089961051941,
      "learning_rate": 0.0005652051904352126,
      "loss": 2.9586,
      "step": 35673
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.7448883056640625,
      "learning_rate": 0.0005652032782601722,
      "loss": 2.9121,
      "step": 35674
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3177587985992432,
      "learning_rate": 0.0005652013660358253,
      "loss": 3.1453,
      "step": 35675
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7152724266052246,
      "learning_rate": 0.0005651994537621725,
      "loss": 2.8319,
      "step": 35676
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8683916330337524,
      "learning_rate": 0.0005651975414392143,
      "loss": 3.1258,
      "step": 35677
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.404329776763916,
      "learning_rate": 0.0005651956290669507,
      "loss": 3.1044,
      "step": 35678
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3613449335098267,
      "learning_rate": 0.0005651937166453822,
      "loss": 3.242,
      "step": 35679
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5219968557357788,
      "learning_rate": 0.0005651918041745094,
      "loss": 3.0811,
      "step": 35680
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6102021932601929,
      "learning_rate": 0.0005651898916543322,
      "loss": 2.8064,
      "step": 35681
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9291051626205444,
      "learning_rate": 0.0005651879790848513,
      "loss": 3.1611,
      "step": 35682
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5042517185211182,
      "learning_rate": 0.0005651860664660669,
      "loss": 3.2295,
      "step": 35683
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6548538208007812,
      "learning_rate": 0.0005651841537979794,
      "loss": 2.9922,
      "step": 35684
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.64180326461792,
      "learning_rate": 0.0005651822410805892,
      "loss": 2.9358,
      "step": 35685
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.605434775352478,
      "learning_rate": 0.0005651803283138967,
      "loss": 3.2032,
      "step": 35686
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4078502655029297,
      "learning_rate": 0.000565178415497902,
      "loss": 3.1024,
      "step": 35687
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4762861728668213,
      "learning_rate": 0.0005651765026326057,
      "loss": 3.0762,
      "step": 35688
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8639427423477173,
      "learning_rate": 0.0005651745897180081,
      "loss": 3.1682,
      "step": 35689
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.1875383853912354,
      "learning_rate": 0.0005651726767541095,
      "loss": 3.1544,
      "step": 35690
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.175168752670288,
      "learning_rate": 0.0005651707637409104,
      "loss": 2.9068,
      "step": 35691
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5160036087036133,
      "learning_rate": 0.0005651688506784109,
      "loss": 2.9801,
      "step": 35692
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5115442276000977,
      "learning_rate": 0.0005651669375666117,
      "loss": 2.9002,
      "step": 35693
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.3060017824172974,
      "learning_rate": 0.0005651650244055127,
      "loss": 2.9693,
      "step": 35694
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6271506547927856,
      "learning_rate": 0.0005651631111951147,
      "loss": 2.9694,
      "step": 35695
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7156413793563843,
      "learning_rate": 0.0005651611979354178,
      "loss": 2.9293,
      "step": 35696
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.5718321800231934,
      "learning_rate": 0.0005651592846264224,
      "loss": 2.8696,
      "step": 35697
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.092937707901001,
      "learning_rate": 0.000565157371268129,
      "loss": 3.1389,
      "step": 35698
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.0230069160461426,
      "learning_rate": 0.0005651554578605378,
      "loss": 3.0541,
      "step": 35699
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.518510341644287,
      "learning_rate": 0.0005651535444036492,
      "loss": 2.9336,
      "step": 35700
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.6241868734359741,
      "learning_rate": 0.0005651516308974635,
      "loss": 3.0503,
      "step": 35701
    },
    {
      "epoch": 0.46,
      "grad_norm": 3.688875198364258,
      "learning_rate": 0.0005651497173419811,
      "loss": 2.7363,
      "step": 35702
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.55192494392395,
      "learning_rate": 0.0005651478037372023,
      "loss": 2.7902,
      "step": 35703
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8039926290512085,
      "learning_rate": 0.0005651458900831277,
      "loss": 3.1168,
      "step": 35704
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.1277499198913574,
      "learning_rate": 0.0005651439763797574,
      "loss": 2.8819,
      "step": 35705
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.9949699640274048,
      "learning_rate": 0.0005651420626270918,
      "loss": 2.9995,
      "step": 35706
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4692983627319336,
      "learning_rate": 0.0005651401488251314,
      "loss": 2.9201,
      "step": 35707
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.7998558282852173,
      "learning_rate": 0.0005651382349738763,
      "loss": 3.2181,
      "step": 35708
    },
    {
      "epoch": 0.46,
      "grad_norm": 2.276883125305176,
      "learning_rate": 0.000565136321073327,
      "loss": 3.0815,
      "step": 35709
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4686919450759888,
      "learning_rate": 0.0005651344071234839,
      "loss": 2.9992,
      "step": 35710
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.488593339920044,
      "learning_rate": 0.0005651324931243473,
      "loss": 3.0506,
      "step": 35711
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.4127200841903687,
      "learning_rate": 0.0005651305790759176,
      "loss": 2.8948,
      "step": 35712
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3382890224456787,
      "learning_rate": 0.000565128664978195,
      "loss": 3.1622,
      "step": 35713
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.765012264251709,
      "learning_rate": 0.0005651267508311801,
      "loss": 2.7626,
      "step": 35714
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5537376403808594,
      "learning_rate": 0.0005651248366348731,
      "loss": 3.2498,
      "step": 35715
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.945349931716919,
      "learning_rate": 0.0005651229223892744,
      "loss": 3.0448,
      "step": 35716
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.437628984451294,
      "learning_rate": 0.0005651210080943843,
      "loss": 3.0616,
      "step": 35717
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.678407073020935,
      "learning_rate": 0.0005651190937502032,
      "loss": 2.8957,
      "step": 35718
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.267374038696289,
      "learning_rate": 0.0005651171793567315,
      "loss": 2.789,
      "step": 35719
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2750859260559082,
      "learning_rate": 0.0005651152649139695,
      "loss": 2.8699,
      "step": 35720
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4448000192642212,
      "learning_rate": 0.0005651133504219176,
      "loss": 2.8059,
      "step": 35721
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4155409336090088,
      "learning_rate": 0.0005651114358805761,
      "loss": 3.1908,
      "step": 35722
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6337699890136719,
      "learning_rate": 0.0005651095212899453,
      "loss": 3.1935,
      "step": 35723
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.438242793083191,
      "learning_rate": 0.0005651076066500258,
      "loss": 3.0368,
      "step": 35724
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8397622108459473,
      "learning_rate": 0.0005651056919608176,
      "loss": 2.8224,
      "step": 35725
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5782825946807861,
      "learning_rate": 0.0005651037772223214,
      "loss": 2.7934,
      "step": 35726
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6044485569000244,
      "learning_rate": 0.0005651018624345373,
      "loss": 3.102,
      "step": 35727
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.398547649383545,
      "learning_rate": 0.0005650999475974659,
      "loss": 2.8645,
      "step": 35728
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4015109539031982,
      "learning_rate": 0.0005650980327111072,
      "loss": 2.757,
      "step": 35729
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3925050497055054,
      "learning_rate": 0.000565096117775462,
      "loss": 3.0596,
      "step": 35730
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.549140453338623,
      "learning_rate": 0.0005650942027905303,
      "loss": 3.1628,
      "step": 35731
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4834121465682983,
      "learning_rate": 0.0005650922877563125,
      "loss": 2.7774,
      "step": 35732
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.1709463596343994,
      "learning_rate": 0.0005650903726728093,
      "loss": 2.9175,
      "step": 35733
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4198938608169556,
      "learning_rate": 0.0005650884575400206,
      "loss": 3.0216,
      "step": 35734
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4529554843902588,
      "learning_rate": 0.0005650865423579469,
      "loss": 2.8591,
      "step": 35735
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7775652408599854,
      "learning_rate": 0.0005650846271265887,
      "loss": 3.0364,
      "step": 35736
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9531601667404175,
      "learning_rate": 0.0005650827118459463,
      "loss": 3.2977,
      "step": 35737
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3594601154327393,
      "learning_rate": 0.0005650807965160201,
      "loss": 2.9474,
      "step": 35738
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.146664619445801,
      "learning_rate": 0.0005650788811368101,
      "loss": 3.2582,
      "step": 35739
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4810924530029297,
      "learning_rate": 0.0005650769657083171,
      "loss": 3.0849,
      "step": 35740
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.408827781677246,
      "learning_rate": 0.0005650750502305412,
      "loss": 3.036,
      "step": 35741
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0806567668914795,
      "learning_rate": 0.0005650731347034831,
      "loss": 3.1931,
      "step": 35742
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8571574687957764,
      "learning_rate": 0.0005650712191271428,
      "loss": 3.1843,
      "step": 35743
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5394147634506226,
      "learning_rate": 0.0005650693035015205,
      "loss": 3.1472,
      "step": 35744
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3805537223815918,
      "learning_rate": 0.0005650673878266171,
      "loss": 3.1903,
      "step": 35745
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.115623712539673,
      "learning_rate": 0.0005650654721024325,
      "loss": 2.9789,
      "step": 35746
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5283476114273071,
      "learning_rate": 0.0005650635563289672,
      "loss": 2.935,
      "step": 35747
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.040487051010132,
      "learning_rate": 0.0005650616405062217,
      "loss": 3.1657,
      "step": 35748
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.500095009803772,
      "learning_rate": 0.0005650597246341962,
      "loss": 2.8537,
      "step": 35749
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.264181137084961,
      "learning_rate": 0.0005650578087128911,
      "loss": 3.232,
      "step": 35750
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5642422437667847,
      "learning_rate": 0.0005650558927423068,
      "loss": 3.1639,
      "step": 35751
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0876572132110596,
      "learning_rate": 0.0005650539767224436,
      "loss": 2.8472,
      "step": 35752
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6069145202636719,
      "learning_rate": 0.0005650520606533018,
      "loss": 3.0669,
      "step": 35753
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7374662160873413,
      "learning_rate": 0.0005650501445348818,
      "loss": 2.9437,
      "step": 35754
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9648776054382324,
      "learning_rate": 0.0005650482283671841,
      "loss": 3.0759,
      "step": 35755
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.376566767692566,
      "learning_rate": 0.0005650463121502088,
      "loss": 3.2106,
      "step": 35756
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.591168999671936,
      "learning_rate": 0.0005650443958839564,
      "loss": 2.9079,
      "step": 35757
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0674359798431396,
      "learning_rate": 0.0005650424795684274,
      "loss": 3.1425,
      "step": 35758
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4072182178497314,
      "learning_rate": 0.0005650405632036218,
      "loss": 3.5275,
      "step": 35759
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5958718061447144,
      "learning_rate": 0.0005650386467895402,
      "loss": 3.0045,
      "step": 35760
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6994092464447021,
      "learning_rate": 0.0005650367303261829,
      "loss": 3.1274,
      "step": 35761
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2611638307571411,
      "learning_rate": 0.0005650348138135504,
      "loss": 3.429,
      "step": 35762
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.1217238903045654,
      "learning_rate": 0.0005650328972516428,
      "loss": 3.2077,
      "step": 35763
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3532989025115967,
      "learning_rate": 0.0005650309806404605,
      "loss": 3.1853,
      "step": 35764
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.3037242889404297,
      "learning_rate": 0.0005650290639800041,
      "loss": 3.1674,
      "step": 35765
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0428733825683594,
      "learning_rate": 0.0005650271472702738,
      "loss": 3.2697,
      "step": 35766
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.379103422164917,
      "learning_rate": 0.0005650252305112699,
      "loss": 3.1174,
      "step": 35767
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.4494879245758057,
      "learning_rate": 0.0005650233137029928,
      "loss": 3.0945,
      "step": 35768
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.26806640625,
      "learning_rate": 0.0005650213968454429,
      "loss": 2.7738,
      "step": 35769
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5197868347167969,
      "learning_rate": 0.0005650194799386205,
      "loss": 3.0595,
      "step": 35770
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8412362337112427,
      "learning_rate": 0.000565017562982526,
      "loss": 3.0205,
      "step": 35771
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8883298635482788,
      "learning_rate": 0.0005650156459771597,
      "loss": 3.2164,
      "step": 35772
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.4173288345336914,
      "learning_rate": 0.000565013728922522,
      "loss": 3.1575,
      "step": 35773
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3577346801757812,
      "learning_rate": 0.0005650118118186132,
      "loss": 2.8804,
      "step": 35774
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.809112310409546,
      "learning_rate": 0.0005650098946654337,
      "loss": 3.1847,
      "step": 35775
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8587673902511597,
      "learning_rate": 0.0005650079774629839,
      "loss": 3.1682,
      "step": 35776
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.265949010848999,
      "learning_rate": 0.0005650060602112641,
      "loss": 3.1857,
      "step": 35777
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8122214078903198,
      "learning_rate": 0.0005650041429102747,
      "loss": 3.1975,
      "step": 35778
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2717905044555664,
      "learning_rate": 0.000565002225560016,
      "loss": 3.1628,
      "step": 35779
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6752907037734985,
      "learning_rate": 0.0005650003081604884,
      "loss": 3.0679,
      "step": 35780
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.1340606212615967,
      "learning_rate": 0.0005649983907116922,
      "loss": 2.9711,
      "step": 35781
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.02297043800354,
      "learning_rate": 0.0005649964732136279,
      "loss": 2.9599,
      "step": 35782
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6837927103042603,
      "learning_rate": 0.0005649945556662957,
      "loss": 2.9062,
      "step": 35783
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5935273170471191,
      "learning_rate": 0.0005649926380696959,
      "loss": 3.1827,
      "step": 35784
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5591092109680176,
      "learning_rate": 0.0005649907204238291,
      "loss": 2.8962,
      "step": 35785
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1769425868988037,
      "learning_rate": 0.0005649888027286955,
      "loss": 3.0064,
      "step": 35786
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.380182147026062,
      "learning_rate": 0.0005649868849842954,
      "loss": 3.2861,
      "step": 35787
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.697525978088379,
      "learning_rate": 0.0005649849671906294,
      "loss": 3.1791,
      "step": 35788
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.173039197921753,
      "learning_rate": 0.0005649830493476976,
      "loss": 3.2591,
      "step": 35789
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3251736164093018,
      "learning_rate": 0.0005649811314555003,
      "loss": 2.9811,
      "step": 35790
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.600509762763977,
      "learning_rate": 0.0005649792135140382,
      "loss": 3.1561,
      "step": 35791
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5798379182815552,
      "learning_rate": 0.0005649772955233115,
      "loss": 2.94,
      "step": 35792
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.110642910003662,
      "learning_rate": 0.0005649753774833204,
      "loss": 2.9387,
      "step": 35793
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8430153131484985,
      "learning_rate": 0.0005649734593940654,
      "loss": 3.071,
      "step": 35794
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6872446537017822,
      "learning_rate": 0.0005649715412555468,
      "loss": 3.2357,
      "step": 35795
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2912322282791138,
      "learning_rate": 0.0005649696230677649,
      "loss": 3.0941,
      "step": 35796
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3468457460403442,
      "learning_rate": 0.0005649677048307205,
      "loss": 3.1058,
      "step": 35797
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3779644966125488,
      "learning_rate": 0.0005649657865444133,
      "loss": 3.0698,
      "step": 35798
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.38118577003479,
      "learning_rate": 0.0005649638682088439,
      "loss": 2.976,
      "step": 35799
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.021939516067505,
      "learning_rate": 0.000564961949824013,
      "loss": 3.197,
      "step": 35800
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6573866605758667,
      "learning_rate": 0.0005649600313899205,
      "loss": 3.1514,
      "step": 35801
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4034724235534668,
      "learning_rate": 0.0005649581129065669,
      "loss": 2.9661,
      "step": 35802
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9659966230392456,
      "learning_rate": 0.0005649561943739526,
      "loss": 2.8723,
      "step": 35803
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8338416814804077,
      "learning_rate": 0.000564954275792078,
      "loss": 3.1304,
      "step": 35804
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5584943294525146,
      "learning_rate": 0.0005649523571609434,
      "loss": 2.9851,
      "step": 35805
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.2894978523254395,
      "learning_rate": 0.0005649504384805491,
      "loss": 2.9697,
      "step": 35806
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7994916439056396,
      "learning_rate": 0.0005649485197508955,
      "loss": 3.0971,
      "step": 35807
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5387245416641235,
      "learning_rate": 0.000564946600971983,
      "loss": 2.8004,
      "step": 35808
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4659662246704102,
      "learning_rate": 0.000564944682143812,
      "loss": 3.0559,
      "step": 35809
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.598946213722229,
      "learning_rate": 0.0005649427632663827,
      "loss": 2.8904,
      "step": 35810
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.2603540420532227,
      "learning_rate": 0.0005649408443396956,
      "loss": 3.089,
      "step": 35811
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.930037498474121,
      "learning_rate": 0.0005649389253637509,
      "loss": 3.2197,
      "step": 35812
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5181031227111816,
      "learning_rate": 0.0005649370063385491,
      "loss": 3.2945,
      "step": 35813
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7173324823379517,
      "learning_rate": 0.0005649350872640905,
      "loss": 3.0369,
      "step": 35814
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.438592553138733,
      "learning_rate": 0.0005649331681403755,
      "loss": 2.9135,
      "step": 35815
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6706656217575073,
      "learning_rate": 0.0005649312489674044,
      "loss": 3.1255,
      "step": 35816
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4647966623306274,
      "learning_rate": 0.0005649293297451776,
      "loss": 3.0479,
      "step": 35817
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5623010396957397,
      "learning_rate": 0.0005649274104736954,
      "loss": 3.0158,
      "step": 35818
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3708146810531616,
      "learning_rate": 0.0005649254911529582,
      "loss": 3.1185,
      "step": 35819
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4063889980316162,
      "learning_rate": 0.0005649235717829664,
      "loss": 3.2683,
      "step": 35820
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5383127927780151,
      "learning_rate": 0.0005649216523637202,
      "loss": 2.9225,
      "step": 35821
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7288655042648315,
      "learning_rate": 0.0005649197328952202,
      "loss": 3.1126,
      "step": 35822
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4586197137832642,
      "learning_rate": 0.0005649178133774665,
      "loss": 3.1495,
      "step": 35823
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5317436456680298,
      "learning_rate": 0.0005649158938104597,
      "loss": 3.2656,
      "step": 35824
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4301508665084839,
      "learning_rate": 0.0005649139741941999,
      "loss": 2.9664,
      "step": 35825
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5131042003631592,
      "learning_rate": 0.0005649120545286878,
      "loss": 3.2741,
      "step": 35826
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3841897249221802,
      "learning_rate": 0.0005649101348139233,
      "loss": 3.2308,
      "step": 35827
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7901960611343384,
      "learning_rate": 0.0005649082150499071,
      "loss": 2.8156,
      "step": 35828
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4780948162078857,
      "learning_rate": 0.0005649062952366395,
      "loss": 2.7856,
      "step": 35829
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6614145040512085,
      "learning_rate": 0.0005649043753741209,
      "loss": 3.1906,
      "step": 35830
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6254754066467285,
      "learning_rate": 0.0005649024554623514,
      "loss": 3.2953,
      "step": 35831
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7484345436096191,
      "learning_rate": 0.0005649005355013316,
      "loss": 3.2091,
      "step": 35832
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.865275263786316,
      "learning_rate": 0.0005648986154910617,
      "loss": 3.3681,
      "step": 35833
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.326570749282837,
      "learning_rate": 0.0005648966954315423,
      "loss": 3.1503,
      "step": 35834
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8524023294448853,
      "learning_rate": 0.0005648947753227735,
      "loss": 2.9054,
      "step": 35835
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5457707643508911,
      "learning_rate": 0.0005648928551647558,
      "loss": 3.208,
      "step": 35836
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.979818105697632,
      "learning_rate": 0.0005648909349574895,
      "loss": 3.1208,
      "step": 35837
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.373244047164917,
      "learning_rate": 0.0005648890147009751,
      "loss": 3.264,
      "step": 35838
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5419880151748657,
      "learning_rate": 0.0005648870943952127,
      "loss": 2.8899,
      "step": 35839
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6954593658447266,
      "learning_rate": 0.0005648851740402028,
      "loss": 2.9837,
      "step": 35840
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6630184650421143,
      "learning_rate": 0.0005648832536359459,
      "loss": 2.9831,
      "step": 35841
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.438755750656128,
      "learning_rate": 0.000564881333182442,
      "loss": 3.1302,
      "step": 35842
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2440180778503418,
      "learning_rate": 0.0005648794126796918,
      "loss": 2.9731,
      "step": 35843
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.468824028968811,
      "learning_rate": 0.0005648774921276953,
      "loss": 3.1331,
      "step": 35844
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0595290660858154,
      "learning_rate": 0.0005648755715264533,
      "loss": 2.8854,
      "step": 35845
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8690931797027588,
      "learning_rate": 0.0005648736508759658,
      "loss": 3.2162,
      "step": 35846
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3559612035751343,
      "learning_rate": 0.0005648717301762334,
      "loss": 3.0731,
      "step": 35847
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0236895084381104,
      "learning_rate": 0.0005648698094272562,
      "loss": 3.2415,
      "step": 35848
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.504584789276123,
      "learning_rate": 0.0005648678886290348,
      "loss": 3.0282,
      "step": 35849
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.423258662223816,
      "learning_rate": 0.0005648659677815694,
      "loss": 3.031,
      "step": 35850
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.796764850616455,
      "learning_rate": 0.0005648640468848605,
      "loss": 2.7928,
      "step": 35851
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.619445562362671,
      "learning_rate": 0.0005648621259389084,
      "loss": 2.9508,
      "step": 35852
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.887454867362976,
      "learning_rate": 0.0005648602049437133,
      "loss": 3.0155,
      "step": 35853
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7703602313995361,
      "learning_rate": 0.0005648582838992757,
      "loss": 2.9704,
      "step": 35854
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.990309476852417,
      "learning_rate": 0.000564856362805596,
      "loss": 3.0861,
      "step": 35855
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4906386137008667,
      "learning_rate": 0.0005648544416626746,
      "loss": 3.2905,
      "step": 35856
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6229528188705444,
      "learning_rate": 0.0005648525204705117,
      "loss": 3.1729,
      "step": 35857
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4663753509521484,
      "learning_rate": 0.0005648505992291076,
      "loss": 2.9614,
      "step": 35858
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3065346479415894,
      "learning_rate": 0.0005648486779384629,
      "loss": 2.8253,
      "step": 35859
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6985427141189575,
      "learning_rate": 0.0005648467565985778,
      "loss": 3.1514,
      "step": 35860
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6077407598495483,
      "learning_rate": 0.0005648448352094526,
      "loss": 2.6829,
      "step": 35861
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3367748260498047,
      "learning_rate": 0.0005648429137710878,
      "loss": 2.9926,
      "step": 35862
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6712995767593384,
      "learning_rate": 0.0005648409922834838,
      "loss": 3.3294,
      "step": 35863
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5977226495742798,
      "learning_rate": 0.0005648390707466408,
      "loss": 3.128,
      "step": 35864
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4718976020812988,
      "learning_rate": 0.0005648371491605592,
      "loss": 3.0804,
      "step": 35865
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.452181339263916,
      "learning_rate": 0.0005648352275252394,
      "loss": 3.0383,
      "step": 35866
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4165366888046265,
      "learning_rate": 0.0005648333058406817,
      "loss": 3.1412,
      "step": 35867
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.774870753288269,
      "learning_rate": 0.0005648313841068865,
      "loss": 3.1783,
      "step": 35868
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8362241983413696,
      "learning_rate": 0.0005648294623238542,
      "loss": 3.0042,
      "step": 35869
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0688042640686035,
      "learning_rate": 0.0005648275404915849,
      "loss": 3.0966,
      "step": 35870
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2750463485717773,
      "learning_rate": 0.0005648256186100794,
      "loss": 3.0204,
      "step": 35871
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9496653079986572,
      "learning_rate": 0.0005648236966793377,
      "loss": 3.2583,
      "step": 35872
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6083526611328125,
      "learning_rate": 0.0005648217746993604,
      "loss": 3.164,
      "step": 35873
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.4350128173828125,
      "learning_rate": 0.0005648198526701475,
      "loss": 3.1566,
      "step": 35874
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5182379484176636,
      "learning_rate": 0.0005648179305916997,
      "loss": 3.112,
      "step": 35875
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5605822801589966,
      "learning_rate": 0.0005648160084640175,
      "loss": 3.0844,
      "step": 35876
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5306354761123657,
      "learning_rate": 0.0005648140862871007,
      "loss": 3.0398,
      "step": 35877
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3827310800552368,
      "learning_rate": 0.00056481216406095,
      "loss": 3.2841,
      "step": 35878
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.39846670627594,
      "learning_rate": 0.0005648102417855658,
      "loss": 3.0071,
      "step": 35879
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7786026000976562,
      "learning_rate": 0.0005648083194609483,
      "loss": 3.1862,
      "step": 35880
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4952774047851562,
      "learning_rate": 0.000564806397087098,
      "loss": 2.843,
      "step": 35881
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5785441398620605,
      "learning_rate": 0.0005648044746640151,
      "loss": 2.8128,
      "step": 35882
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2485440969467163,
      "learning_rate": 0.0005648025521917002,
      "loss": 3.198,
      "step": 35883
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.742228627204895,
      "learning_rate": 0.0005648006296701534,
      "loss": 3.2869,
      "step": 35884
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7012208700180054,
      "learning_rate": 0.0005647987070993752,
      "loss": 3.2621,
      "step": 35885
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.672631859779358,
      "learning_rate": 0.0005647967844793659,
      "loss": 3.255,
      "step": 35886
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7027432918548584,
      "learning_rate": 0.0005647948618101258,
      "loss": 3.3965,
      "step": 35887
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6773666143417358,
      "learning_rate": 0.0005647929390916555,
      "loss": 3.1906,
      "step": 35888
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.393267273902893,
      "learning_rate": 0.0005647910163239551,
      "loss": 2.8772,
      "step": 35889
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9966216087341309,
      "learning_rate": 0.000564789093507025,
      "loss": 3.2397,
      "step": 35890
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4036294221878052,
      "learning_rate": 0.0005647871706408659,
      "loss": 3.0433,
      "step": 35891
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.3546178340911865,
      "learning_rate": 0.0005647852477254775,
      "loss": 3.1584,
      "step": 35892
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4049206972122192,
      "learning_rate": 0.0005647833247608607,
      "loss": 2.9197,
      "step": 35893
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9917263984680176,
      "learning_rate": 0.0005647814017470157,
      "loss": 2.9192,
      "step": 35894
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4567008018493652,
      "learning_rate": 0.0005647794786839428,
      "loss": 3.1118,
      "step": 35895
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7441517114639282,
      "learning_rate": 0.0005647775555716423,
      "loss": 2.9083,
      "step": 35896
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.138061046600342,
      "learning_rate": 0.0005647756324101148,
      "loss": 3.0497,
      "step": 35897
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4960277080535889,
      "learning_rate": 0.0005647737091993604,
      "loss": 3.4383,
      "step": 35898
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8503296375274658,
      "learning_rate": 0.0005647717859393797,
      "loss": 2.9636,
      "step": 35899
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6031912565231323,
      "learning_rate": 0.0005647698626301728,
      "loss": 2.9971,
      "step": 35900
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.4667744636535645,
      "learning_rate": 0.0005647679392717402,
      "loss": 3.013,
      "step": 35901
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.427729606628418,
      "learning_rate": 0.0005647660158640824,
      "loss": 3.0168,
      "step": 35902
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4680476188659668,
      "learning_rate": 0.0005647640924071994,
      "loss": 2.9548,
      "step": 35903
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.079702854156494,
      "learning_rate": 0.0005647621689010919,
      "loss": 3.3126,
      "step": 35904
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.4250049591064453,
      "learning_rate": 0.0005647602453457601,
      "loss": 3.1099,
      "step": 35905
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0057225227355957,
      "learning_rate": 0.0005647583217412043,
      "loss": 2.8889,
      "step": 35906
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5617953538894653,
      "learning_rate": 0.0005647563980874249,
      "loss": 3.0548,
      "step": 35907
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.970292806625366,
      "learning_rate": 0.0005647544743844224,
      "loss": 2.9318,
      "step": 35908
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4028667211532593,
      "learning_rate": 0.0005647525506321969,
      "loss": 3.0639,
      "step": 35909
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7036166191101074,
      "learning_rate": 0.0005647506268307491,
      "loss": 2.9859,
      "step": 35910
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5059947967529297,
      "learning_rate": 0.000564748702980079,
      "loss": 2.856,
      "step": 35911
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.3762059211730957,
      "learning_rate": 0.0005647467790801872,
      "loss": 2.7565,
      "step": 35912
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.574963331222534,
      "learning_rate": 0.000564744855131074,
      "loss": 3.2748,
      "step": 35913
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4472788572311401,
      "learning_rate": 0.0005647429311327396,
      "loss": 3.3103,
      "step": 35914
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7222074270248413,
      "learning_rate": 0.0005647410070851847,
      "loss": 2.7628,
      "step": 35915
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0348901748657227,
      "learning_rate": 0.0005647390829884092,
      "loss": 3.123,
      "step": 35916
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3284839391708374,
      "learning_rate": 0.0005647371588424139,
      "loss": 3.2141,
      "step": 35917
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5789366960525513,
      "learning_rate": 0.0005647352346471988,
      "loss": 3.0881,
      "step": 35918
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.4144845008850098,
      "learning_rate": 0.0005647333104027646,
      "loss": 2.9207,
      "step": 35919
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4597439765930176,
      "learning_rate": 0.0005647313861091114,
      "loss": 3.0015,
      "step": 35920
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.491857886314392,
      "learning_rate": 0.0005647294617662396,
      "loss": 3.3988,
      "step": 35921
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.115558624267578,
      "learning_rate": 0.0005647275373741496,
      "loss": 3.1361,
      "step": 35922
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4826761484146118,
      "learning_rate": 0.0005647256129328418,
      "loss": 3.1739,
      "step": 35923
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8455052375793457,
      "learning_rate": 0.0005647236884423165,
      "loss": 2.9792,
      "step": 35924
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8819315433502197,
      "learning_rate": 0.0005647217639025741,
      "loss": 3.1579,
      "step": 35925
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2143605947494507,
      "learning_rate": 0.0005647198393136149,
      "loss": 3.0371,
      "step": 35926
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.424257516860962,
      "learning_rate": 0.0005647179146754393,
      "loss": 2.9951,
      "step": 35927
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2314140796661377,
      "learning_rate": 0.0005647159899880476,
      "loss": 3.1644,
      "step": 35928
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4574202299118042,
      "learning_rate": 0.0005647140652514402,
      "loss": 2.9254,
      "step": 35929
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6047505140304565,
      "learning_rate": 0.0005647121404656175,
      "loss": 3.0235,
      "step": 35930
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9740161895751953,
      "learning_rate": 0.0005647102156305799,
      "loss": 3.2707,
      "step": 35931
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6726568937301636,
      "learning_rate": 0.0005647082907463275,
      "loss": 3.0044,
      "step": 35932
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3825244903564453,
      "learning_rate": 0.0005647063658128609,
      "loss": 3.1291,
      "step": 35933
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.3142549991607666,
      "learning_rate": 0.0005647044408301804,
      "loss": 3.4396,
      "step": 35934
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4309309720993042,
      "learning_rate": 0.0005647025157982863,
      "loss": 3.0358,
      "step": 35935
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4732391834259033,
      "learning_rate": 0.0005647005907171791,
      "loss": 3.2143,
      "step": 35936
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3728537559509277,
      "learning_rate": 0.0005646986655868589,
      "loss": 3.1313,
      "step": 35937
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.448090672492981,
      "learning_rate": 0.0005646967404073264,
      "loss": 2.9224,
      "step": 35938
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.57620108127594,
      "learning_rate": 0.0005646948151785818,
      "loss": 3.0321,
      "step": 35939
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5018367767333984,
      "learning_rate": 0.0005646928899006253,
      "loss": 3.1025,
      "step": 35940
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6040655374526978,
      "learning_rate": 0.0005646909645734574,
      "loss": 3.0612,
      "step": 35941
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.419437885284424,
      "learning_rate": 0.0005646890391970786,
      "loss": 2.8087,
      "step": 35942
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6221140623092651,
      "learning_rate": 0.0005646871137714889,
      "loss": 3.2347,
      "step": 35943
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.651343822479248,
      "learning_rate": 0.000564685188296689,
      "loss": 3.0141,
      "step": 35944
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6775463819503784,
      "learning_rate": 0.0005646832627726791,
      "loss": 3.3388,
      "step": 35945
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0396053791046143,
      "learning_rate": 0.0005646813371994596,
      "loss": 3.0454,
      "step": 35946
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5640636682510376,
      "learning_rate": 0.0005646794115770308,
      "loss": 2.9525,
      "step": 35947
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.741839051246643,
      "learning_rate": 0.0005646774859053933,
      "loss": 3.1116,
      "step": 35948
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3283531665802002,
      "learning_rate": 0.0005646755601845469,
      "loss": 3.0651,
      "step": 35949
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6314551830291748,
      "learning_rate": 0.0005646736344144926,
      "loss": 3.0049,
      "step": 35950
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5072420835494995,
      "learning_rate": 0.0005646717085952304,
      "loss": 2.9471,
      "step": 35951
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0334458351135254,
      "learning_rate": 0.0005646697827267607,
      "loss": 3.1942,
      "step": 35952
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0304930210113525,
      "learning_rate": 0.0005646678568090839,
      "loss": 3.062,
      "step": 35953
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.508681297302246,
      "learning_rate": 0.0005646659308422004,
      "loss": 3.0312,
      "step": 35954
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.431206226348877,
      "learning_rate": 0.0005646640048261104,
      "loss": 3.1362,
      "step": 35955
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.865752935409546,
      "learning_rate": 0.0005646620787608144,
      "loss": 2.9006,
      "step": 35956
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4935835599899292,
      "learning_rate": 0.0005646601526463128,
      "loss": 3.0933,
      "step": 35957
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5768859386444092,
      "learning_rate": 0.0005646582264826058,
      "loss": 3.261,
      "step": 35958
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7623094320297241,
      "learning_rate": 0.000564656300269694,
      "loss": 2.7556,
      "step": 35959
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5084842443466187,
      "learning_rate": 0.0005646543740075773,
      "loss": 3.1679,
      "step": 35960
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8463927507400513,
      "learning_rate": 0.0005646524476962567,
      "loss": 3.134,
      "step": 35961
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4347280263900757,
      "learning_rate": 0.0005646505213357319,
      "loss": 2.9673,
      "step": 35962
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7760342359542847,
      "learning_rate": 0.0005646485949260038,
      "loss": 2.8956,
      "step": 35963
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.463104486465454,
      "learning_rate": 0.0005646466684670725,
      "loss": 3.3199,
      "step": 35964
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5889402627944946,
      "learning_rate": 0.0005646447419589383,
      "loss": 3.0768,
      "step": 35965
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4695022106170654,
      "learning_rate": 0.0005646428154016017,
      "loss": 3.0023,
      "step": 35966
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.665902018547058,
      "learning_rate": 0.000564640888795063,
      "loss": 3.0096,
      "step": 35967
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6551405191421509,
      "learning_rate": 0.0005646389621393225,
      "loss": 3.043,
      "step": 35968
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4856411218643188,
      "learning_rate": 0.0005646370354343807,
      "loss": 3.074,
      "step": 35969
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5154192447662354,
      "learning_rate": 0.0005646351086802378,
      "loss": 3.0722,
      "step": 35970
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5905027389526367,
      "learning_rate": 0.0005646331818768944,
      "loss": 3.0312,
      "step": 35971
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7592854499816895,
      "learning_rate": 0.0005646312550243506,
      "loss": 3.0582,
      "step": 35972
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6548430919647217,
      "learning_rate": 0.0005646293281226069,
      "loss": 3.0751,
      "step": 35973
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4433327913284302,
      "learning_rate": 0.0005646274011716635,
      "loss": 3.0045,
      "step": 35974
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5340358018875122,
      "learning_rate": 0.0005646254741715209,
      "loss": 3.0557,
      "step": 35975
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3056522607803345,
      "learning_rate": 0.0005646235471221795,
      "loss": 3.1774,
      "step": 35976
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4721908569335938,
      "learning_rate": 0.0005646216200236395,
      "loss": 3.3277,
      "step": 35977
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.599940538406372,
      "learning_rate": 0.0005646196928759014,
      "loss": 3.1349,
      "step": 35978
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3535274267196655,
      "learning_rate": 0.0005646177656789655,
      "loss": 3.0813,
      "step": 35979
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4586505889892578,
      "learning_rate": 0.0005646158384328321,
      "loss": 2.9925,
      "step": 35980
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5019233226776123,
      "learning_rate": 0.0005646139111375018,
      "loss": 3.0821,
      "step": 35981
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4444928169250488,
      "learning_rate": 0.0005646119837929746,
      "loss": 3.1417,
      "step": 35982
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7369685173034668,
      "learning_rate": 0.000564610056399251,
      "loss": 2.9626,
      "step": 35983
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.670977234840393,
      "learning_rate": 0.0005646081289563316,
      "loss": 3.0556,
      "step": 35984
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8126249313354492,
      "learning_rate": 0.0005646062014642165,
      "loss": 2.9606,
      "step": 35985
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5907965898513794,
      "learning_rate": 0.000564604273922906,
      "loss": 3.0202,
      "step": 35986
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6767566204071045,
      "learning_rate": 0.0005646023463324006,
      "loss": 2.9606,
      "step": 35987
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7440892457962036,
      "learning_rate": 0.0005646004186927006,
      "loss": 2.9141,
      "step": 35988
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5478920936584473,
      "learning_rate": 0.0005645984910038065,
      "loss": 3.1982,
      "step": 35989
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.780471682548523,
      "learning_rate": 0.0005645965632657185,
      "loss": 3.1203,
      "step": 35990
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3871296644210815,
      "learning_rate": 0.000564594635478437,
      "loss": 2.8565,
      "step": 35991
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3709118366241455,
      "learning_rate": 0.0005645927076419623,
      "loss": 3.1329,
      "step": 35992
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4667080640792847,
      "learning_rate": 0.0005645907797562949,
      "loss": 2.731,
      "step": 35993
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5549566745758057,
      "learning_rate": 0.000564588851821435,
      "loss": 3.049,
      "step": 35994
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6032507419586182,
      "learning_rate": 0.0005645869238373832,
      "loss": 2.8449,
      "step": 35995
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4415861368179321,
      "learning_rate": 0.0005645849958041396,
      "loss": 2.9875,
      "step": 35996
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2650954723358154,
      "learning_rate": 0.0005645830677217046,
      "loss": 3.2496,
      "step": 35997
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.8856050968170166,
      "learning_rate": 0.0005645811395900787,
      "loss": 2.7164,
      "step": 35998
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.392486810684204,
      "learning_rate": 0.0005645792114092621,
      "loss": 3.1941,
      "step": 35999
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4383995532989502,
      "learning_rate": 0.0005645772831792552,
      "loss": 3.2808,
      "step": 36000
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.981813907623291,
      "learning_rate": 0.0005645753549000584,
      "loss": 2.9345,
      "step": 36001
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.598965883255005,
      "learning_rate": 0.0005645734265716722,
      "loss": 3.0323,
      "step": 36002
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.522393226623535,
      "learning_rate": 0.0005645714981940967,
      "loss": 3.265,
      "step": 36003
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3415091037750244,
      "learning_rate": 0.0005645695697673324,
      "loss": 2.8012,
      "step": 36004
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1898082494735718,
      "learning_rate": 0.0005645676412913795,
      "loss": 3.0732,
      "step": 36005
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.4067201614379883,
      "learning_rate": 0.0005645657127662386,
      "loss": 3.1331,
      "step": 36006
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.579892635345459,
      "learning_rate": 0.00056456378419191,
      "loss": 3.1042,
      "step": 36007
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0263173580169678,
      "learning_rate": 0.0005645618555683939,
      "loss": 3.2106,
      "step": 36008
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.31862211227417,
      "learning_rate": 0.0005645599268956907,
      "loss": 3.0618,
      "step": 36009
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.2192280292510986,
      "learning_rate": 0.0005645579981738009,
      "loss": 3.0091,
      "step": 36010
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.545275330543518,
      "learning_rate": 0.0005645560694027247,
      "loss": 3.3178,
      "step": 36011
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.2935707569122314,
      "learning_rate": 0.0005645541405824627,
      "loss": 3.145,
      "step": 36012
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.9233224391937256,
      "learning_rate": 0.0005645522117130149,
      "loss": 3.0932,
      "step": 36013
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.388221025466919,
      "learning_rate": 0.000564550282794382,
      "loss": 3.041,
      "step": 36014
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.4739770889282227,
      "learning_rate": 0.0005645483538265641,
      "loss": 3.1751,
      "step": 36015
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7262314558029175,
      "learning_rate": 0.0005645464248095617,
      "loss": 3.2907,
      "step": 36016
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4903059005737305,
      "learning_rate": 0.0005645444957433751,
      "loss": 2.7556,
      "step": 36017
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0486061573028564,
      "learning_rate": 0.0005645425666280048,
      "loss": 2.8354,
      "step": 36018
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.48593270778656,
      "learning_rate": 0.0005645406374634509,
      "loss": 3.0646,
      "step": 36019
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.179248094558716,
      "learning_rate": 0.0005645387082497139,
      "loss": 3.2863,
      "step": 36020
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6362227201461792,
      "learning_rate": 0.0005645367789867943,
      "loss": 3.0712,
      "step": 36021
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8330341577529907,
      "learning_rate": 0.0005645348496746923,
      "loss": 3.0684,
      "step": 36022
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4307384490966797,
      "learning_rate": 0.0005645329203134081,
      "loss": 3.1933,
      "step": 36023
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7138339281082153,
      "learning_rate": 0.0005645309909029424,
      "loss": 3.2445,
      "step": 36024
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5773800611495972,
      "learning_rate": 0.0005645290614432954,
      "loss": 3.2859,
      "step": 36025
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7720820903778076,
      "learning_rate": 0.0005645271319344674,
      "loss": 3.245,
      "step": 36026
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.007251262664795,
      "learning_rate": 0.0005645252023764588,
      "loss": 3.0924,
      "step": 36027
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3522965908050537,
      "learning_rate": 0.00056452327276927,
      "loss": 3.0746,
      "step": 36028
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5995748043060303,
      "learning_rate": 0.0005645213431129013,
      "loss": 3.2222,
      "step": 36029
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7302722930908203,
      "learning_rate": 0.0005645194134073531,
      "loss": 3.0237,
      "step": 36030
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.52779221534729,
      "learning_rate": 0.0005645174836526258,
      "loss": 3.072,
      "step": 36031
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4763658046722412,
      "learning_rate": 0.0005645155538487196,
      "loss": 2.9611,
      "step": 36032
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4081956148147583,
      "learning_rate": 0.000564513623995635,
      "loss": 3.1125,
      "step": 36033
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.587834358215332,
      "learning_rate": 0.0005645116940933725,
      "loss": 3.1674,
      "step": 36034
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7163004875183105,
      "learning_rate": 0.0005645097641419321,
      "loss": 2.9026,
      "step": 36035
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3533121347427368,
      "learning_rate": 0.0005645078341413143,
      "loss": 3.2028,
      "step": 36036
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3546651601791382,
      "learning_rate": 0.0005645059040915196,
      "loss": 3.1167,
      "step": 36037
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4007495641708374,
      "learning_rate": 0.0005645039739925482,
      "loss": 3.0976,
      "step": 36038
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.1973819732666016,
      "learning_rate": 0.0005645020438444007,
      "loss": 3.0422,
      "step": 36039
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5428575277328491,
      "learning_rate": 0.0005645001136470771,
      "loss": 2.904,
      "step": 36040
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5440173149108887,
      "learning_rate": 0.0005644981834005779,
      "loss": 3.1739,
      "step": 36041
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.52106511592865,
      "learning_rate": 0.0005644962531049035,
      "loss": 3.1275,
      "step": 36042
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5883671045303345,
      "learning_rate": 0.0005644943227600544,
      "loss": 3.0498,
      "step": 36043
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6865836381912231,
      "learning_rate": 0.0005644923923660307,
      "loss": 3.2399,
      "step": 36044
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4315805435180664,
      "learning_rate": 0.000564490461922833,
      "loss": 3.1949,
      "step": 36045
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5660796165466309,
      "learning_rate": 0.0005644885314304613,
      "loss": 3.0791,
      "step": 36046
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.472130537033081,
      "learning_rate": 0.0005644866008889164,
      "loss": 3.0114,
      "step": 36047
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.356001853942871,
      "learning_rate": 0.0005644846702981983,
      "loss": 3.0685,
      "step": 36048
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6131352186203003,
      "learning_rate": 0.0005644827396583076,
      "loss": 2.9199,
      "step": 36049
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3948355913162231,
      "learning_rate": 0.0005644808089692446,
      "loss": 3.0601,
      "step": 36050
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5026469230651855,
      "learning_rate": 0.0005644788782310096,
      "loss": 3.0932,
      "step": 36051
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4503201246261597,
      "learning_rate": 0.0005644769474436029,
      "loss": 3.1353,
      "step": 36052
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2312647104263306,
      "learning_rate": 0.000564475016607025,
      "loss": 3.213,
      "step": 36053
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9756468534469604,
      "learning_rate": 0.0005644730857212761,
      "loss": 2.8894,
      "step": 36054
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5341782569885254,
      "learning_rate": 0.0005644711547863569,
      "loss": 2.8514,
      "step": 36055
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.1930339336395264,
      "learning_rate": 0.0005644692238022674,
      "loss": 3.1408,
      "step": 36056
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.533013105392456,
      "learning_rate": 0.0005644672927690081,
      "loss": 2.9738,
      "step": 36057
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.369032621383667,
      "learning_rate": 0.0005644653616865792,
      "loss": 3.0902,
      "step": 36058
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.30299973487854,
      "learning_rate": 0.0005644634305549813,
      "loss": 2.9682,
      "step": 36059
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.8830623626708984,
      "learning_rate": 0.0005644614993742147,
      "loss": 2.9904,
      "step": 36060
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.842799186706543,
      "learning_rate": 0.0005644595681442796,
      "loss": 3.1246,
      "step": 36061
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.444258213043213,
      "learning_rate": 0.0005644576368651766,
      "loss": 3.3815,
      "step": 36062
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.80844783782959,
      "learning_rate": 0.0005644557055369058,
      "loss": 2.8663,
      "step": 36063
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5496591329574585,
      "learning_rate": 0.0005644537741594678,
      "loss": 3.2148,
      "step": 36064
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3195708990097046,
      "learning_rate": 0.0005644518427328628,
      "loss": 2.9797,
      "step": 36065
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.158534288406372,
      "learning_rate": 0.0005644499112570912,
      "loss": 3.1659,
      "step": 36066
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.018399953842163,
      "learning_rate": 0.0005644479797321534,
      "loss": 2.9818,
      "step": 36067
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.7270562648773193,
      "learning_rate": 0.0005644460481580497,
      "loss": 3.0796,
      "step": 36068
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4773930311203003,
      "learning_rate": 0.0005644441165347806,
      "loss": 3.0317,
      "step": 36069
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.320600986480713,
      "learning_rate": 0.0005644421848623462,
      "loss": 2.9901,
      "step": 36070
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5775887966156006,
      "learning_rate": 0.0005644402531407471,
      "loss": 3.0123,
      "step": 36071
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.401416301727295,
      "learning_rate": 0.0005644383213699836,
      "loss": 2.7268,
      "step": 36072
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7081636190414429,
      "learning_rate": 0.0005644363895500559,
      "loss": 2.9629,
      "step": 36073
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7024614810943604,
      "learning_rate": 0.0005644344576809646,
      "loss": 2.8899,
      "step": 36074
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.957399845123291,
      "learning_rate": 0.0005644325257627099,
      "loss": 2.923,
      "step": 36075
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5938925743103027,
      "learning_rate": 0.0005644305937952921,
      "loss": 3.2762,
      "step": 36076
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.247366189956665,
      "learning_rate": 0.0005644286617787118,
      "loss": 3.1597,
      "step": 36077
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7689080238342285,
      "learning_rate": 0.0005644267297129691,
      "loss": 2.96,
      "step": 36078
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3519192934036255,
      "learning_rate": 0.0005644247975980645,
      "loss": 3.0623,
      "step": 36079
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4616081714630127,
      "learning_rate": 0.0005644228654339985,
      "loss": 3.25,
      "step": 36080
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4981836080551147,
      "learning_rate": 0.0005644209332207712,
      "loss": 3.134,
      "step": 36081
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5643913745880127,
      "learning_rate": 0.0005644190009583829,
      "loss": 2.9711,
      "step": 36082
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6955831050872803,
      "learning_rate": 0.0005644170686468343,
      "loss": 3.1939,
      "step": 36083
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9813621044158936,
      "learning_rate": 0.0005644151362861256,
      "loss": 2.9925,
      "step": 36084
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.5503132343292236,
      "learning_rate": 0.0005644132038762571,
      "loss": 2.9797,
      "step": 36085
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6513336896896362,
      "learning_rate": 0.0005644112714172291,
      "loss": 3.2916,
      "step": 36086
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4972014427185059,
      "learning_rate": 0.0005644093389090421,
      "loss": 2.8508,
      "step": 36087
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.019207715988159,
      "learning_rate": 0.0005644074063516965,
      "loss": 3.1722,
      "step": 36088
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.5233070850372314,
      "learning_rate": 0.0005644054737451924,
      "loss": 2.9886,
      "step": 36089
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8958799839019775,
      "learning_rate": 0.0005644035410895305,
      "loss": 3.0295,
      "step": 36090
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5752296447753906,
      "learning_rate": 0.0005644016083847108,
      "loss": 3.1391,
      "step": 36091
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3230962753295898,
      "learning_rate": 0.000564399675630734,
      "loss": 3.1998,
      "step": 36092
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4038586616516113,
      "learning_rate": 0.0005643977428276003,
      "loss": 3.0102,
      "step": 36093
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.1520931720733643,
      "learning_rate": 0.0005643958099753101,
      "loss": 3.108,
      "step": 36094
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3474206924438477,
      "learning_rate": 0.0005643938770738637,
      "loss": 3.1133,
      "step": 36095
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.478463888168335,
      "learning_rate": 0.0005643919441232615,
      "loss": 3.0907,
      "step": 36096
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.7512366771698,
      "learning_rate": 0.0005643900111235038,
      "loss": 3.0554,
      "step": 36097
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.541649341583252,
      "learning_rate": 0.000564388078074591,
      "loss": 3.4165,
      "step": 36098
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7694823741912842,
      "learning_rate": 0.0005643861449765235,
      "loss": 3.1783,
      "step": 36099
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.58268404006958,
      "learning_rate": 0.0005643842118293016,
      "loss": 3.1229,
      "step": 36100
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8007150888442993,
      "learning_rate": 0.0005643822786329256,
      "loss": 3.1022,
      "step": 36101
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9212212562561035,
      "learning_rate": 0.000564380345387396,
      "loss": 2.8699,
      "step": 36102
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.1263508796691895,
      "learning_rate": 0.0005643784120927133,
      "loss": 3.1614,
      "step": 36103
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5906486511230469,
      "learning_rate": 0.0005643764787488775,
      "loss": 3.0201,
      "step": 36104
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4652022123336792,
      "learning_rate": 0.0005643745453558891,
      "loss": 2.9813,
      "step": 36105
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4875181913375854,
      "learning_rate": 0.0005643726119137485,
      "loss": 2.969,
      "step": 36106
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5293807983398438,
      "learning_rate": 0.000564370678422456,
      "loss": 2.9939,
      "step": 36107
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2865712642669678,
      "learning_rate": 0.0005643687448820121,
      "loss": 3.1324,
      "step": 36108
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9889408349990845,
      "learning_rate": 0.000564366811292417,
      "loss": 3.0217,
      "step": 36109
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4771758317947388,
      "learning_rate": 0.0005643648776536711,
      "loss": 3.2166,
      "step": 36110
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.2760815620422363,
      "learning_rate": 0.0005643629439657748,
      "loss": 3.2308,
      "step": 36111
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.098362684249878,
      "learning_rate": 0.0005643610102287284,
      "loss": 2.9603,
      "step": 36112
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4121204614639282,
      "learning_rate": 0.0005643590764425325,
      "loss": 3.1682,
      "step": 36113
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0263161659240723,
      "learning_rate": 0.0005643571426071869,
      "loss": 3.1948,
      "step": 36114
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8381561040878296,
      "learning_rate": 0.0005643552087226926,
      "loss": 3.143,
      "step": 36115
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3250545263290405,
      "learning_rate": 0.0005643532747890497,
      "loss": 3.3228,
      "step": 36116
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8557573556900024,
      "learning_rate": 0.0005643513408062584,
      "loss": 3.0687,
      "step": 36117
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2992424964904785,
      "learning_rate": 0.0005643494067743192,
      "loss": 3.1351,
      "step": 36118
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.564767599105835,
      "learning_rate": 0.0005643474726932325,
      "loss": 3.246,
      "step": 36119
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6838486194610596,
      "learning_rate": 0.0005643455385629985,
      "loss": 3.215,
      "step": 36120
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5076700448989868,
      "learning_rate": 0.0005643436043836178,
      "loss": 2.9161,
      "step": 36121
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6420537233352661,
      "learning_rate": 0.0005643416701550906,
      "loss": 2.8844,
      "step": 36122
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6882609128952026,
      "learning_rate": 0.0005643397358774172,
      "loss": 2.8671,
      "step": 36123
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4318138360977173,
      "learning_rate": 0.0005643378015505983,
      "loss": 3.0752,
      "step": 36124
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8102387189865112,
      "learning_rate": 0.0005643358671746339,
      "loss": 3.084,
      "step": 36125
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3178551197052002,
      "learning_rate": 0.0005643339327495244,
      "loss": 2.9232,
      "step": 36126
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.37400484085083,
      "learning_rate": 0.0005643319982752702,
      "loss": 2.9004,
      "step": 36127
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6603511571884155,
      "learning_rate": 0.0005643300637518718,
      "loss": 3.1295,
      "step": 36128
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.810402750968933,
      "learning_rate": 0.0005643281291793295,
      "loss": 2.8647,
      "step": 36129
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.501621961593628,
      "learning_rate": 0.0005643261945576434,
      "loss": 3.0111,
      "step": 36130
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5839709043502808,
      "learning_rate": 0.0005643242598868143,
      "loss": 3.4616,
      "step": 36131
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5349974632263184,
      "learning_rate": 0.0005643223251668422,
      "loss": 3.1088,
      "step": 36132
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7346465587615967,
      "learning_rate": 0.0005643203903977276,
      "loss": 3.0571,
      "step": 36133
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.1019489765167236,
      "learning_rate": 0.0005643184555794708,
      "loss": 2.7358,
      "step": 36134
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7046500444412231,
      "learning_rate": 0.0005643165207120723,
      "loss": 3.0079,
      "step": 36135
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.491606593132019,
      "learning_rate": 0.0005643145857955323,
      "loss": 3.1607,
      "step": 36136
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7491059303283691,
      "learning_rate": 0.0005643126508298514,
      "loss": 3.1361,
      "step": 36137
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0572965145111084,
      "learning_rate": 0.0005643107158150296,
      "loss": 3.167,
      "step": 36138
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.029473066329956,
      "learning_rate": 0.0005643087807510675,
      "loss": 2.9004,
      "step": 36139
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.231776714324951,
      "learning_rate": 0.0005643068456379655,
      "loss": 3.0072,
      "step": 36140
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.99998140335083,
      "learning_rate": 0.0005643049104757237,
      "loss": 3.142,
      "step": 36141
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.352792739868164,
      "learning_rate": 0.0005643029752643427,
      "loss": 2.8506,
      "step": 36142
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8526923656463623,
      "learning_rate": 0.0005643010400038229,
      "loss": 3.0848,
      "step": 36143
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4668035507202148,
      "learning_rate": 0.0005642991046941645,
      "loss": 2.9877,
      "step": 36144
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6604268550872803,
      "learning_rate": 0.0005642971693353677,
      "loss": 2.8125,
      "step": 36145
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.606980323791504,
      "learning_rate": 0.0005642952339274332,
      "loss": 3.0991,
      "step": 36146
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7805721759796143,
      "learning_rate": 0.0005642932984703614,
      "loss": 3.0046,
      "step": 36147
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.56215500831604,
      "learning_rate": 0.0005642913629641523,
      "loss": 2.9342,
      "step": 36148
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3516186475753784,
      "learning_rate": 0.0005642894274088064,
      "loss": 3.0573,
      "step": 36149
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6267727613449097,
      "learning_rate": 0.0005642874918043243,
      "loss": 3.1922,
      "step": 36150
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6411473751068115,
      "learning_rate": 0.000564285556150706,
      "loss": 2.9457,
      "step": 36151
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4205948114395142,
      "learning_rate": 0.0005642836204479521,
      "loss": 3.1631,
      "step": 36152
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4595106840133667,
      "learning_rate": 0.0005642816846960629,
      "loss": 2.9297,
      "step": 36153
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.713218092918396,
      "learning_rate": 0.0005642797488950386,
      "loss": 2.9513,
      "step": 36154
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.8452727794647217,
      "learning_rate": 0.00056427781304488,
      "loss": 2.9667,
      "step": 36155
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5871096849441528,
      "learning_rate": 0.000564275877145587,
      "loss": 3.0895,
      "step": 36156
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7152326107025146,
      "learning_rate": 0.00056427394119716,
      "loss": 3.2162,
      "step": 36157
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0127525329589844,
      "learning_rate": 0.0005642720051995996,
      "loss": 2.9455,
      "step": 36158
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5581278800964355,
      "learning_rate": 0.0005642700691529061,
      "loss": 3.2189,
      "step": 36159
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.433082938194275,
      "learning_rate": 0.0005642681330570797,
      "loss": 3.0662,
      "step": 36160
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7379473447799683,
      "learning_rate": 0.000564266196912121,
      "loss": 3.1979,
      "step": 36161
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5844182968139648,
      "learning_rate": 0.0005642642607180301,
      "loss": 3.3534,
      "step": 36162
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3618911504745483,
      "learning_rate": 0.0005642623244748075,
      "loss": 2.9855,
      "step": 36163
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3272387981414795,
      "learning_rate": 0.0005642603881824537,
      "loss": 3.0582,
      "step": 36164
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7165201902389526,
      "learning_rate": 0.0005642584518409687,
      "loss": 2.8818,
      "step": 36165
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5964089632034302,
      "learning_rate": 0.0005642565154503531,
      "loss": 3.1717,
      "step": 36166
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6810027360916138,
      "learning_rate": 0.0005642545790106073,
      "loss": 3.0544,
      "step": 36167
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.061885118484497,
      "learning_rate": 0.0005642526425217316,
      "loss": 2.8906,
      "step": 36168
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7974766492843628,
      "learning_rate": 0.0005642507059837262,
      "loss": 2.9782,
      "step": 36169
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5013335943222046,
      "learning_rate": 0.0005642487693965917,
      "loss": 2.9224,
      "step": 36170
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.2852911949157715,
      "learning_rate": 0.0005642468327603285,
      "loss": 2.8772,
      "step": 36171
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4313441514968872,
      "learning_rate": 0.0005642448960749367,
      "loss": 3.1277,
      "step": 36172
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.572055697441101,
      "learning_rate": 0.0005642429593404167,
      "loss": 3.1882,
      "step": 36173
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.075495481491089,
      "learning_rate": 0.0005642410225567691,
      "loss": 3.084,
      "step": 36174
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3122334480285645,
      "learning_rate": 0.0005642390857239941,
      "loss": 3.0655,
      "step": 36175
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5993252992630005,
      "learning_rate": 0.000564237148842092,
      "loss": 3.0475,
      "step": 36176
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9728167057037354,
      "learning_rate": 0.0005642352119110632,
      "loss": 2.9538,
      "step": 36177
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8619967699050903,
      "learning_rate": 0.000564233274930908,
      "loss": 3.1852,
      "step": 36178
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5702152252197266,
      "learning_rate": 0.000564231337901627,
      "loss": 3.0609,
      "step": 36179
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6018526554107666,
      "learning_rate": 0.0005642294008232204,
      "loss": 3.047,
      "step": 36180
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.283384084701538,
      "learning_rate": 0.0005642274636956886,
      "loss": 3.2445,
      "step": 36181
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2650392055511475,
      "learning_rate": 0.0005642255265190317,
      "loss": 2.9663,
      "step": 36182
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.560565710067749,
      "learning_rate": 0.0005642235892932505,
      "loss": 3.0739,
      "step": 36183
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4079433679580688,
      "learning_rate": 0.0005642216520183452,
      "loss": 3.0436,
      "step": 36184
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.334916353225708,
      "learning_rate": 0.000564219714694316,
      "loss": 3.0641,
      "step": 36185
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4971721172332764,
      "learning_rate": 0.0005642177773211633,
      "loss": 3.051,
      "step": 36186
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5950208902359009,
      "learning_rate": 0.0005642158398988876,
      "loss": 3.1887,
      "step": 36187
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.466983437538147,
      "learning_rate": 0.0005642139024274892,
      "loss": 3.1322,
      "step": 36188
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5899646282196045,
      "learning_rate": 0.0005642119649069685,
      "loss": 3.1299,
      "step": 36189
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.398544430732727,
      "learning_rate": 0.0005642100273373256,
      "loss": 3.0655,
      "step": 36190
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6170560121536255,
      "learning_rate": 0.0005642080897185613,
      "loss": 2.9618,
      "step": 36191
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3924493789672852,
      "learning_rate": 0.0005642061520506757,
      "loss": 2.9364,
      "step": 36192
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5674805641174316,
      "learning_rate": 0.0005642042143336691,
      "loss": 2.8116,
      "step": 36193
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5940314531326294,
      "learning_rate": 0.000564202276567542,
      "loss": 2.7067,
      "step": 36194
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3856925964355469,
      "learning_rate": 0.0005642003387522946,
      "loss": 3.1548,
      "step": 36195
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.916334629058838,
      "learning_rate": 0.0005641984008879276,
      "loss": 3.1877,
      "step": 36196
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5912508964538574,
      "learning_rate": 0.0005641964629744409,
      "loss": 3.1843,
      "step": 36197
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9594106674194336,
      "learning_rate": 0.0005641945250118352,
      "loss": 3.1534,
      "step": 36198
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.575569987297058,
      "learning_rate": 0.0005641925870001108,
      "loss": 2.9469,
      "step": 36199
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6867133378982544,
      "learning_rate": 0.0005641906489392679,
      "loss": 2.9259,
      "step": 36200
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.449314832687378,
      "learning_rate": 0.000564188710829307,
      "loss": 2.9358,
      "step": 36201
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6592168807983398,
      "learning_rate": 0.0005641867726702284,
      "loss": 2.9262,
      "step": 36202
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3717561960220337,
      "learning_rate": 0.0005641848344620326,
      "loss": 3.2328,
      "step": 36203
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7106636762619019,
      "learning_rate": 0.0005641828962047198,
      "loss": 3.1257,
      "step": 36204
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4704375267028809,
      "learning_rate": 0.0005641809578982904,
      "loss": 3.0678,
      "step": 36205
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.209559917449951,
      "learning_rate": 0.0005641790195427447,
      "loss": 3.2719,
      "step": 36206
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6843774318695068,
      "learning_rate": 0.0005641770811380832,
      "loss": 3.0902,
      "step": 36207
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.421635627746582,
      "learning_rate": 0.0005641751426843062,
      "loss": 3.2649,
      "step": 36208
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4006426334381104,
      "learning_rate": 0.0005641732041814141,
      "loss": 3.2201,
      "step": 36209
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.7468721866607666,
      "learning_rate": 0.0005641712656294072,
      "loss": 3.1702,
      "step": 36210
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5477654933929443,
      "learning_rate": 0.0005641693270282859,
      "loss": 3.3077,
      "step": 36211
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.930199384689331,
      "learning_rate": 0.0005641673883780504,
      "loss": 3.1022,
      "step": 36212
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.7621352672576904,
      "learning_rate": 0.0005641654496787013,
      "loss": 2.7022,
      "step": 36213
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5270739793777466,
      "learning_rate": 0.0005641635109302389,
      "loss": 2.8511,
      "step": 36214
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3796416521072388,
      "learning_rate": 0.0005641615721326634,
      "loss": 3.3534,
      "step": 36215
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.3544933795928955,
      "learning_rate": 0.0005641596332859753,
      "loss": 2.9701,
      "step": 36216
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.108975887298584,
      "learning_rate": 0.0005641576943901749,
      "loss": 3.2055,
      "step": 36217
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8356928825378418,
      "learning_rate": 0.0005641557554452628,
      "loss": 3.0756,
      "step": 36218
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.340392827987671,
      "learning_rate": 0.000564153816451239,
      "loss": 3.0801,
      "step": 36219
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.9157919883728027,
      "learning_rate": 0.000564151877408104,
      "loss": 3.0263,
      "step": 36220
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0774028301239014,
      "learning_rate": 0.0005641499383158582,
      "loss": 2.9827,
      "step": 36221
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4793287515640259,
      "learning_rate": 0.000564147999174502,
      "loss": 3.0697,
      "step": 36222
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.162930727005005,
      "learning_rate": 0.0005641460599840357,
      "loss": 2.909,
      "step": 36223
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8391320705413818,
      "learning_rate": 0.0005641441207444596,
      "loss": 3.0191,
      "step": 36224
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.3320462703704834,
      "learning_rate": 0.0005641421814557741,
      "loss": 2.8743,
      "step": 36225
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5267354249954224,
      "learning_rate": 0.0005641402421179797,
      "loss": 2.9605,
      "step": 36226
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.869868040084839,
      "learning_rate": 0.0005641383027310765,
      "loss": 2.9801,
      "step": 36227
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.9642765522003174,
      "learning_rate": 0.0005641363632950651,
      "loss": 3.3026,
      "step": 36228
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.024129629135132,
      "learning_rate": 0.0005641344238099458,
      "loss": 2.9562,
      "step": 36229
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2620595693588257,
      "learning_rate": 0.0005641324842757189,
      "loss": 2.9671,
      "step": 36230
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.9492969512939453,
      "learning_rate": 0.0005641305446923847,
      "loss": 2.9185,
      "step": 36231
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.3955464363098145,
      "learning_rate": 0.0005641286050599437,
      "loss": 2.7458,
      "step": 36232
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.466731309890747,
      "learning_rate": 0.0005641266653783962,
      "loss": 2.8815,
      "step": 36233
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3493006229400635,
      "learning_rate": 0.0005641247256477425,
      "loss": 3.1524,
      "step": 36234
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.439155101776123,
      "learning_rate": 0.0005641227858679831,
      "loss": 2.9095,
      "step": 36235
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8521898984909058,
      "learning_rate": 0.0005641208460391183,
      "loss": 2.9012,
      "step": 36236
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4937090873718262,
      "learning_rate": 0.0005641189061611485,
      "loss": 3.0834,
      "step": 36237
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.47898530960083,
      "learning_rate": 0.0005641169662340739,
      "loss": 3.028,
      "step": 36238
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5150774717330933,
      "learning_rate": 0.000564115026257895,
      "loss": 2.8178,
      "step": 36239
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7213854789733887,
      "learning_rate": 0.0005641130862326121,
      "loss": 3.1093,
      "step": 36240
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6934871673583984,
      "learning_rate": 0.0005641111461582257,
      "loss": 2.965,
      "step": 36241
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6930272579193115,
      "learning_rate": 0.000564109206034736,
      "loss": 2.8605,
      "step": 36242
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9786938428878784,
      "learning_rate": 0.0005641072658621434,
      "loss": 2.9326,
      "step": 36243
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.537379503250122,
      "learning_rate": 0.0005641053256404483,
      "loss": 2.9446,
      "step": 36244
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4547971487045288,
      "learning_rate": 0.000564103385369651,
      "loss": 3.3393,
      "step": 36245
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.226301431655884,
      "learning_rate": 0.000564101445049752,
      "loss": 2.9834,
      "step": 36246
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.57593035697937,
      "learning_rate": 0.0005640995046807514,
      "loss": 3.2249,
      "step": 36247
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5258554220199585,
      "learning_rate": 0.0005640975642626497,
      "loss": 3.1024,
      "step": 36248
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.603297472000122,
      "learning_rate": 0.0005640956237954474,
      "loss": 3.1248,
      "step": 36249
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.113337755203247,
      "learning_rate": 0.0005640936832791446,
      "loss": 3.2779,
      "step": 36250
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7119399309158325,
      "learning_rate": 0.0005640917427137419,
      "loss": 3.0235,
      "step": 36251
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.2211215496063232,
      "learning_rate": 0.0005640898020992397,
      "loss": 2.8105,
      "step": 36252
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8006671667099,
      "learning_rate": 0.000564087861435638,
      "loss": 2.9044,
      "step": 36253
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6750636100769043,
      "learning_rate": 0.0005640859207229374,
      "loss": 3.2868,
      "step": 36254
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4381593465805054,
      "learning_rate": 0.0005640839799611383,
      "loss": 2.999,
      "step": 36255
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5844507217407227,
      "learning_rate": 0.0005640820391502411,
      "loss": 2.9831,
      "step": 36256
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4653921127319336,
      "learning_rate": 0.000564080098290246,
      "loss": 3.1751,
      "step": 36257
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.8031089305877686,
      "learning_rate": 0.0005640781573811533,
      "loss": 3.0094,
      "step": 36258
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3668467998504639,
      "learning_rate": 0.0005640762164229636,
      "loss": 3.1392,
      "step": 36259
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.560259222984314,
      "learning_rate": 0.0005640742754156772,
      "loss": 3.0999,
      "step": 36260
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4124031066894531,
      "learning_rate": 0.0005640723343592943,
      "loss": 2.7285,
      "step": 36261
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6607779264450073,
      "learning_rate": 0.0005640703932538153,
      "loss": 3.2293,
      "step": 36262
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8777544498443604,
      "learning_rate": 0.0005640684520992408,
      "loss": 3.1443,
      "step": 36263
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5601530075073242,
      "learning_rate": 0.0005640665108955709,
      "loss": 2.9452,
      "step": 36264
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.02626895904541,
      "learning_rate": 0.0005640645696428061,
      "loss": 3.1897,
      "step": 36265
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9422202110290527,
      "learning_rate": 0.0005640626283409467,
      "loss": 3.0926,
      "step": 36266
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7416025400161743,
      "learning_rate": 0.000564060686989993,
      "loss": 3.2503,
      "step": 36267
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.052379608154297,
      "learning_rate": 0.0005640587455899455,
      "loss": 3.3325,
      "step": 36268
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.356492042541504,
      "learning_rate": 0.0005640568041408045,
      "loss": 3.3504,
      "step": 36269
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4462648630142212,
      "learning_rate": 0.0005640548626425704,
      "loss": 3.1438,
      "step": 36270
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7238911390304565,
      "learning_rate": 0.0005640529210952434,
      "loss": 3.2141,
      "step": 36271
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9157896041870117,
      "learning_rate": 0.000564050979498824,
      "loss": 3.1716,
      "step": 36272
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.1031057834625244,
      "learning_rate": 0.0005640490378533126,
      "loss": 2.9682,
      "step": 36273
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.186966896057129,
      "learning_rate": 0.0005640470961587095,
      "loss": 2.8055,
      "step": 36274
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.377973794937134,
      "learning_rate": 0.000564045154415015,
      "loss": 2.9664,
      "step": 36275
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6617121696472168,
      "learning_rate": 0.0005640432126222295,
      "loss": 2.937,
      "step": 36276
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7993179559707642,
      "learning_rate": 0.0005640412707803535,
      "loss": 3.1532,
      "step": 36277
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5299841165542603,
      "learning_rate": 0.0005640393288893871,
      "loss": 3.3401,
      "step": 36278
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.626926064491272,
      "learning_rate": 0.000564037386949331,
      "loss": 3.0152,
      "step": 36279
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6513924598693848,
      "learning_rate": 0.0005640354449601852,
      "loss": 3.1152,
      "step": 36280
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3643052577972412,
      "learning_rate": 0.0005640335029219502,
      "loss": 3.1705,
      "step": 36281
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.657122015953064,
      "learning_rate": 0.0005640315608346266,
      "loss": 2.9156,
      "step": 36282
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6698224544525146,
      "learning_rate": 0.0005640296186982144,
      "loss": 3.228,
      "step": 36283
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5604270696640015,
      "learning_rate": 0.000564027676512714,
      "loss": 3.1673,
      "step": 36284
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3942266702651978,
      "learning_rate": 0.000564025734278126,
      "loss": 3.0498,
      "step": 36285
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4579651355743408,
      "learning_rate": 0.0005640237919944507,
      "loss": 3.3472,
      "step": 36286
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9318132400512695,
      "learning_rate": 0.0005640218496616883,
      "loss": 2.8042,
      "step": 36287
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3288402557373047,
      "learning_rate": 0.0005640199072798393,
      "loss": 2.8874,
      "step": 36288
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.528354525566101,
      "learning_rate": 0.0005640179648489039,
      "loss": 2.9436,
      "step": 36289
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6896923780441284,
      "learning_rate": 0.0005640160223688828,
      "loss": 2.9213,
      "step": 36290
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.1144726276397705,
      "learning_rate": 0.0005640140798397759,
      "loss": 2.9616,
      "step": 36291
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4823176860809326,
      "learning_rate": 0.000564012137261584,
      "loss": 3.002,
      "step": 36292
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5304725170135498,
      "learning_rate": 0.000564010194634307,
      "loss": 3.0094,
      "step": 36293
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4476344585418701,
      "learning_rate": 0.0005640082519579458,
      "loss": 3.0043,
      "step": 36294
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6523429155349731,
      "learning_rate": 0.0005640063092325003,
      "loss": 2.8765,
      "step": 36295
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7548226118087769,
      "learning_rate": 0.0005640043664579711,
      "loss": 2.9342,
      "step": 36296
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5400452613830566,
      "learning_rate": 0.0005640024236343586,
      "loss": 3.2123,
      "step": 36297
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7844598293304443,
      "learning_rate": 0.000564000480761663,
      "loss": 3.2705,
      "step": 36298
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.399063229560852,
      "learning_rate": 0.0005639985378398846,
      "loss": 2.9364,
      "step": 36299
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.2406301498413086,
      "learning_rate": 0.000563996594869024,
      "loss": 2.9775,
      "step": 36300
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6554640531539917,
      "learning_rate": 0.0005639946518490815,
      "loss": 3.2913,
      "step": 36301
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9918620586395264,
      "learning_rate": 0.0005639927087800574,
      "loss": 2.9655,
      "step": 36302
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.6152470111846924,
      "learning_rate": 0.0005639907656619519,
      "loss": 2.9128,
      "step": 36303
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8130028247833252,
      "learning_rate": 0.0005639888224947657,
      "loss": 2.8923,
      "step": 36304
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5041728019714355,
      "learning_rate": 0.000563986879278499,
      "loss": 3.1051,
      "step": 36305
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.0339648723602295,
      "learning_rate": 0.0005639849360131521,
      "loss": 3.3116,
      "step": 36306
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.4914968013763428,
      "learning_rate": 0.0005639829926987254,
      "loss": 3.0005,
      "step": 36307
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6432000398635864,
      "learning_rate": 0.0005639810493352193,
      "loss": 3.0591,
      "step": 36308
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6397089958190918,
      "learning_rate": 0.0005639791059226342,
      "loss": 2.8822,
      "step": 36309
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.683544397354126,
      "learning_rate": 0.0005639771624609704,
      "loss": 2.9202,
      "step": 36310
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.615414619445801,
      "learning_rate": 0.0005639752189502282,
      "loss": 3.1533,
      "step": 36311
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.247786045074463,
      "learning_rate": 0.0005639732753904079,
      "loss": 3.12,
      "step": 36312
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8766977787017822,
      "learning_rate": 0.0005639713317815103,
      "loss": 3.3251,
      "step": 36313
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.1375973224639893,
      "learning_rate": 0.0005639693881235351,
      "loss": 3.032,
      "step": 36314
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.493596076965332,
      "learning_rate": 0.0005639674444164833,
      "loss": 3.15,
      "step": 36315
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3041235208511353,
      "learning_rate": 0.0005639655006603548,
      "loss": 3.0369,
      "step": 36316
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6774985790252686,
      "learning_rate": 0.0005639635568551502,
      "loss": 2.9368,
      "step": 36317
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.6846706867218018,
      "learning_rate": 0.0005639616130008698,
      "loss": 2.8596,
      "step": 36318
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9140093326568604,
      "learning_rate": 0.0005639596690975139,
      "loss": 3.2151,
      "step": 36319
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.623706579208374,
      "learning_rate": 0.000563957725145083,
      "loss": 2.9926,
      "step": 36320
    },
    {
      "epoch": 0.47,
      "grad_norm": 4.392274379730225,
      "learning_rate": 0.0005639557811435772,
      "loss": 3.0782,
      "step": 36321
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.645355224609375,
      "learning_rate": 0.0005639538370929972,
      "loss": 3.4105,
      "step": 36322
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8325283527374268,
      "learning_rate": 0.0005639518929933432,
      "loss": 3.2682,
      "step": 36323
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.315701484680176,
      "learning_rate": 0.0005639499488446154,
      "loss": 3.0027,
      "step": 36324
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.7649645805358887,
      "learning_rate": 0.0005639480046468145,
      "loss": 3.2342,
      "step": 36325
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.7514915466308594,
      "learning_rate": 0.0005639460603999406,
      "loss": 3.1068,
      "step": 36326
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.1345527172088623,
      "learning_rate": 0.0005639441161039941,
      "loss": 3.2145,
      "step": 36327
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3473753929138184,
      "learning_rate": 0.0005639421717589755,
      "loss": 3.0799,
      "step": 36328
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0024139881134033,
      "learning_rate": 0.000563940227364885,
      "loss": 3.1172,
      "step": 36329
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6629494428634644,
      "learning_rate": 0.0005639382829217232,
      "loss": 3.1444,
      "step": 36330
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6084773540496826,
      "learning_rate": 0.0005639363384294901,
      "loss": 3.2274,
      "step": 36331
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3858906030654907,
      "learning_rate": 0.0005639343938881863,
      "loss": 3.1425,
      "step": 36332
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.381286144256592,
      "learning_rate": 0.0005639324492978121,
      "loss": 3.0918,
      "step": 36333
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4394277334213257,
      "learning_rate": 0.0005639305046583679,
      "loss": 3.005,
      "step": 36334
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2264478206634521,
      "learning_rate": 0.000563928559969854,
      "loss": 3.0198,
      "step": 36335
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6432418823242188,
      "learning_rate": 0.0005639266152322708,
      "loss": 3.0078,
      "step": 36336
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.587357997894287,
      "learning_rate": 0.0005639246704456188,
      "loss": 3.2161,
      "step": 36337
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.50214421749115,
      "learning_rate": 0.0005639227256098981,
      "loss": 3.1093,
      "step": 36338
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5054112672805786,
      "learning_rate": 0.0005639207807251091,
      "loss": 3.0724,
      "step": 36339
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.1329119205474854,
      "learning_rate": 0.0005639188357912523,
      "loss": 3.0235,
      "step": 36340
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.501287579536438,
      "learning_rate": 0.0005639168908083282,
      "loss": 3.1694,
      "step": 36341
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5541843175888062,
      "learning_rate": 0.0005639149457763368,
      "loss": 3.3249,
      "step": 36342
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0274503231048584,
      "learning_rate": 0.0005639130006952787,
      "loss": 2.9396,
      "step": 36343
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5344531536102295,
      "learning_rate": 0.0005639110555651541,
      "loss": 3.0535,
      "step": 36344
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5211052894592285,
      "learning_rate": 0.0005639091103859635,
      "loss": 2.9791,
      "step": 36345
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6773031949996948,
      "learning_rate": 0.0005639071651577072,
      "loss": 2.8817,
      "step": 36346
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2895936965942383,
      "learning_rate": 0.0005639052198803856,
      "loss": 3.2394,
      "step": 36347
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.787122368812561,
      "learning_rate": 0.0005639032745539989,
      "loss": 3.2138,
      "step": 36348
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.445855975151062,
      "learning_rate": 0.0005639013291785478,
      "loss": 3.206,
      "step": 36349
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6304551362991333,
      "learning_rate": 0.0005638993837540324,
      "loss": 3.0594,
      "step": 36350
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4637984037399292,
      "learning_rate": 0.0005638974382804531,
      "loss": 3.1856,
      "step": 36351
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0604777336120605,
      "learning_rate": 0.0005638954927578103,
      "loss": 2.9815,
      "step": 36352
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.3140063285827637,
      "learning_rate": 0.0005638935471861044,
      "loss": 3.0047,
      "step": 36353
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.3675050735473633,
      "learning_rate": 0.0005638916015653356,
      "loss": 3.1313,
      "step": 36354
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8202464580535889,
      "learning_rate": 0.0005638896558955044,
      "loss": 3.1669,
      "step": 36355
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8169373273849487,
      "learning_rate": 0.0005638877101766111,
      "loss": 2.8787,
      "step": 36356
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6224099397659302,
      "learning_rate": 0.0005638857644086562,
      "loss": 3.0049,
      "step": 36357
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.923013210296631,
      "learning_rate": 0.0005638838185916399,
      "loss": 3.0644,
      "step": 36358
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.506137728691101,
      "learning_rate": 0.0005638818727255625,
      "loss": 3.0343,
      "step": 36359
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5594589710235596,
      "learning_rate": 0.0005638799268104247,
      "loss": 2.8938,
      "step": 36360
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.107107639312744,
      "learning_rate": 0.0005638779808462266,
      "loss": 3.1419,
      "step": 36361
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8936512470245361,
      "learning_rate": 0.0005638760348329684,
      "loss": 3.1198,
      "step": 36362
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0406594276428223,
      "learning_rate": 0.0005638740887706509,
      "loss": 3.0469,
      "step": 36363
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3440427780151367,
      "learning_rate": 0.000563872142659274,
      "loss": 3.0314,
      "step": 36364
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.401296854019165,
      "learning_rate": 0.0005638701964988385,
      "loss": 3.0467,
      "step": 36365
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0769708156585693,
      "learning_rate": 0.0005638682502893444,
      "loss": 3.0865,
      "step": 36366
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3651247024536133,
      "learning_rate": 0.0005638663040307923,
      "loss": 3.1241,
      "step": 36367
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.632138729095459,
      "learning_rate": 0.0005638643577231825,
      "loss": 3.0966,
      "step": 36368
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.8388209342956543,
      "learning_rate": 0.0005638624113665152,
      "loss": 2.9178,
      "step": 36369
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.405395746231079,
      "learning_rate": 0.000563860464960791,
      "loss": 3.0944,
      "step": 36370
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4361859560012817,
      "learning_rate": 0.0005638585185060101,
      "loss": 3.1493,
      "step": 36371
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.014124631881714,
      "learning_rate": 0.000563856572002173,
      "loss": 3.3175,
      "step": 36372
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.564396619796753,
      "learning_rate": 0.00056385462544928,
      "loss": 3.0539,
      "step": 36373
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4713540077209473,
      "learning_rate": 0.0005638526788473313,
      "loss": 3.0843,
      "step": 36374
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0266566276550293,
      "learning_rate": 0.0005638507321963276,
      "loss": 3.1327,
      "step": 36375
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5556588172912598,
      "learning_rate": 0.000563848785496269,
      "loss": 3.0519,
      "step": 36376
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7116496562957764,
      "learning_rate": 0.0005638468387471558,
      "loss": 2.8952,
      "step": 36377
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4522600173950195,
      "learning_rate": 0.0005638448919489886,
      "loss": 2.9788,
      "step": 36378
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9262347221374512,
      "learning_rate": 0.0005638429451017676,
      "loss": 3.1872,
      "step": 36379
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2813688516616821,
      "learning_rate": 0.0005638409982054932,
      "loss": 3.1058,
      "step": 36380
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.517702579498291,
      "learning_rate": 0.0005638390512601659,
      "loss": 3.0343,
      "step": 36381
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3945820331573486,
      "learning_rate": 0.0005638371042657859,
      "loss": 2.8672,
      "step": 36382
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6255106925964355,
      "learning_rate": 0.0005638351572223536,
      "loss": 3.3642,
      "step": 36383
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4472432136535645,
      "learning_rate": 0.0005638332101298693,
      "loss": 3.22,
      "step": 36384
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.751071572303772,
      "learning_rate": 0.0005638312629883335,
      "loss": 2.9854,
      "step": 36385
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.603796124458313,
      "learning_rate": 0.0005638293157977463,
      "loss": 3.0191,
      "step": 36386
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.585946798324585,
      "learning_rate": 0.0005638273685581084,
      "loss": 3.3127,
      "step": 36387
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5419540405273438,
      "learning_rate": 0.0005638254212694201,
      "loss": 3.0776,
      "step": 36388
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.567063808441162,
      "learning_rate": 0.0005638234739316816,
      "loss": 3.0586,
      "step": 36389
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5960166454315186,
      "learning_rate": 0.0005638215265448933,
      "loss": 3.1619,
      "step": 36390
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4449965953826904,
      "learning_rate": 0.0005638195791090556,
      "loss": 2.7817,
      "step": 36391
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3952434062957764,
      "learning_rate": 0.0005638176316241689,
      "loss": 2.9257,
      "step": 36392
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6331653594970703,
      "learning_rate": 0.0005638156840902335,
      "loss": 3.19,
      "step": 36393
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2839730978012085,
      "learning_rate": 0.0005638137365072499,
      "loss": 3.2203,
      "step": 36394
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.831934928894043,
      "learning_rate": 0.0005638117888752181,
      "loss": 3.0799,
      "step": 36395
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2441385984420776,
      "learning_rate": 0.0005638098411941388,
      "loss": 2.9203,
      "step": 36396
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5296415090560913,
      "learning_rate": 0.0005638078934640123,
      "loss": 2.6223,
      "step": 36397
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4508693218231201,
      "learning_rate": 0.000563805945684839,
      "loss": 3.0443,
      "step": 36398
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6340453624725342,
      "learning_rate": 0.0005638039978566191,
      "loss": 3.0001,
      "step": 36399
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3817795515060425,
      "learning_rate": 0.0005638020499793532,
      "loss": 3.0616,
      "step": 36400
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6422760486602783,
      "learning_rate": 0.0005638001020530414,
      "loss": 3.086,
      "step": 36401
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7427743673324585,
      "learning_rate": 0.0005637981540776842,
      "loss": 3.1881,
      "step": 36402
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.2295098304748535,
      "learning_rate": 0.000563796206053282,
      "loss": 3.1513,
      "step": 36403
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3169821500778198,
      "learning_rate": 0.000563794257979835,
      "loss": 3.1615,
      "step": 36404
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7747377157211304,
      "learning_rate": 0.0005637923098573437,
      "loss": 3.2169,
      "step": 36405
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.5618834495544434,
      "learning_rate": 0.0005637903616858085,
      "loss": 3.3112,
      "step": 36406
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3520817756652832,
      "learning_rate": 0.0005637884134652297,
      "loss": 3.012,
      "step": 36407
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.023135185241699,
      "learning_rate": 0.0005637864651956074,
      "loss": 3.1297,
      "step": 36408
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2892816066741943,
      "learning_rate": 0.0005637845168769425,
      "loss": 3.061,
      "step": 36409
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8899977207183838,
      "learning_rate": 0.000563782568509235,
      "loss": 2.9719,
      "step": 36410
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3380156755447388,
      "learning_rate": 0.0005637806200924853,
      "loss": 2.9527,
      "step": 36411
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.546940803527832,
      "learning_rate": 0.0005637786716266938,
      "loss": 3.1255,
      "step": 36412
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5531995296478271,
      "learning_rate": 0.000563776723111861,
      "loss": 2.7708,
      "step": 36413
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.972205638885498,
      "learning_rate": 0.000563774774547987,
      "loss": 3.1425,
      "step": 36414
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3540151119232178,
      "learning_rate": 0.0005637728259350723,
      "loss": 3.0385,
      "step": 36415
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.499153971672058,
      "learning_rate": 0.0005637708772731172,
      "loss": 3.0762,
      "step": 36416
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4725027084350586,
      "learning_rate": 0.0005637689285621222,
      "loss": 3.0641,
      "step": 36417
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4986343383789062,
      "learning_rate": 0.0005637669798020876,
      "loss": 3.0254,
      "step": 36418
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.1452698707580566,
      "learning_rate": 0.0005637650309930137,
      "loss": 3.0001,
      "step": 36419
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7135754823684692,
      "learning_rate": 0.0005637630821349009,
      "loss": 3.1119,
      "step": 36420
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9595593214035034,
      "learning_rate": 0.0005637611332277496,
      "loss": 3.1003,
      "step": 36421
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.622445821762085,
      "learning_rate": 0.0005637591842715601,
      "loss": 3.0312,
      "step": 36422
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4130527973175049,
      "learning_rate": 0.0005637572352663327,
      "loss": 3.1933,
      "step": 36423
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5408612489700317,
      "learning_rate": 0.000563755286212068,
      "loss": 3.2498,
      "step": 36424
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8023539781570435,
      "learning_rate": 0.0005637533371087661,
      "loss": 2.8144,
      "step": 36425
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4958831071853638,
      "learning_rate": 0.0005637513879564275,
      "loss": 3.1831,
      "step": 36426
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5591181516647339,
      "learning_rate": 0.0005637494387550527,
      "loss": 3.3512,
      "step": 36427
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4569813013076782,
      "learning_rate": 0.0005637474895046418,
      "loss": 3.3082,
      "step": 36428
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.3005335330963135,
      "learning_rate": 0.0005637455402051951,
      "loss": 3.0617,
      "step": 36429
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.0135533809661865,
      "learning_rate": 0.0005637435908567133,
      "loss": 2.9921,
      "step": 36430
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.6711277961730957,
      "learning_rate": 0.0005637416414591965,
      "loss": 3.0693,
      "step": 36431
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9197813272476196,
      "learning_rate": 0.0005637396920126453,
      "loss": 3.1463,
      "step": 36432
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.3740756511688232,
      "learning_rate": 0.0005637377425170598,
      "loss": 3.0724,
      "step": 36433
    },
    {
      "epoch": 0.47,
      "grad_norm": 4.672321319580078,
      "learning_rate": 0.0005637357929724404,
      "loss": 3.1051,
      "step": 36434
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8305386304855347,
      "learning_rate": 0.0005637338433787877,
      "loss": 2.8803,
      "step": 36435
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8520525693893433,
      "learning_rate": 0.0005637318937361018,
      "loss": 2.7561,
      "step": 36436
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.3352768421173096,
      "learning_rate": 0.000563729944044383,
      "loss": 2.8489,
      "step": 36437
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.9190640449523926,
      "learning_rate": 0.0005637279943036321,
      "loss": 3.0352,
      "step": 36438
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6842387914657593,
      "learning_rate": 0.0005637260445138492,
      "loss": 3.1946,
      "step": 36439
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5470420122146606,
      "learning_rate": 0.0005637240946750345,
      "loss": 3.1311,
      "step": 36440
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2580769062042236,
      "learning_rate": 0.0005637221447871884,
      "loss": 2.8859,
      "step": 36441
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.6464381217956543,
      "learning_rate": 0.0005637201948503116,
      "loss": 3.1655,
      "step": 36442
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3741508722305298,
      "learning_rate": 0.0005637182448644042,
      "loss": 3.1271,
      "step": 36443
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5914115905761719,
      "learning_rate": 0.0005637162948294665,
      "loss": 3.1094,
      "step": 36444
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.33225154876709,
      "learning_rate": 0.0005637143447454992,
      "loss": 2.8636,
      "step": 36445
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.162785053253174,
      "learning_rate": 0.0005637123946125021,
      "loss": 2.9054,
      "step": 36446
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3762556314468384,
      "learning_rate": 0.0005637104444304761,
      "loss": 3.0476,
      "step": 36447
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8233397006988525,
      "learning_rate": 0.0005637084941994213,
      "loss": 3.2065,
      "step": 36448
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5521363019943237,
      "learning_rate": 0.0005637065439193381,
      "loss": 2.8329,
      "step": 36449
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8253936767578125,
      "learning_rate": 0.0005637045935902269,
      "loss": 2.9823,
      "step": 36450
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.411751627922058,
      "learning_rate": 0.000563702643212088,
      "loss": 2.9676,
      "step": 36451
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8167704343795776,
      "learning_rate": 0.0005637006927849218,
      "loss": 3.0372,
      "step": 36452
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2380074262619019,
      "learning_rate": 0.0005636987423087287,
      "loss": 2.9824,
      "step": 36453
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.327068567276001,
      "learning_rate": 0.000563696791783509,
      "loss": 3.0801,
      "step": 36454
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4923244714736938,
      "learning_rate": 0.0005636948412092631,
      "loss": 2.9666,
      "step": 36455
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.5811549425125122,
      "learning_rate": 0.0005636928905859914,
      "loss": 3.1613,
      "step": 36456
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.0091350078582764,
      "learning_rate": 0.0005636909399136941,
      "loss": 3.064,
      "step": 36457
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6849850416183472,
      "learning_rate": 0.0005636889891923718,
      "loss": 3.1758,
      "step": 36458
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.439050555229187,
      "learning_rate": 0.0005636870384220247,
      "loss": 3.0299,
      "step": 36459
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.9798880815505981,
      "learning_rate": 0.0005636850876026531,
      "loss": 3.0839,
      "step": 36460
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.8450132608413696,
      "learning_rate": 0.0005636831367342576,
      "loss": 3.197,
      "step": 36461
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4300971031188965,
      "learning_rate": 0.0005636811858168384,
      "loss": 3.0382,
      "step": 36462
    },
    {
      "epoch": 0.47,
      "grad_norm": 3.5070574283599854,
      "learning_rate": 0.0005636792348503958,
      "loss": 3.2313,
      "step": 36463
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.1097781658172607,
      "learning_rate": 0.0005636772838349303,
      "loss": 3.0979,
      "step": 36464
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4210997819900513,
      "learning_rate": 0.0005636753327704423,
      "loss": 3.2088,
      "step": 36465
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.583679437637329,
      "learning_rate": 0.000563673381656932,
      "loss": 3.073,
      "step": 36466
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.869009017944336,
      "learning_rate": 0.0005636714304943999,
      "loss": 2.8992,
      "step": 36467
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.3380879163742065,
      "learning_rate": 0.0005636694792828462,
      "loss": 2.9028,
      "step": 36468
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.4719865322113037,
      "learning_rate": 0.0005636675280222714,
      "loss": 3.1804,
      "step": 36469
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.2505277395248413,
      "learning_rate": 0.0005636655767126759,
      "loss": 3.1381,
      "step": 36470
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.253302812576294,
      "learning_rate": 0.0005636636253540599,
      "loss": 2.7746,
      "step": 36471
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.460305094718933,
      "learning_rate": 0.000563661673946424,
      "loss": 3.146,
      "step": 36472
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.888593912124634,
      "learning_rate": 0.0005636597224897682,
      "loss": 3.0327,
      "step": 36473
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.6397289037704468,
      "learning_rate": 0.0005636577709840933,
      "loss": 3.0414,
      "step": 36474
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.650923490524292,
      "learning_rate": 0.0005636558194293994,
      "loss": 2.9759,
      "step": 36475
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.7316055297851562,
      "learning_rate": 0.0005636538678256866,
      "loss": 2.9297,
      "step": 36476
    },
    {
      "epoch": 0.47,
      "grad_norm": 1.383614420890808,
      "learning_rate": 0.0005636519161729559,
      "loss": 3.2464,
      "step": 36477
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.071368455886841,
      "learning_rate": 0.0005636499644712073,
      "loss": 3.1208,
      "step": 36478
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.9788520336151123,
      "learning_rate": 0.0005636480127204412,
      "loss": 3.0863,
      "step": 36479
    },
    {
      "epoch": 0.47,
      "grad_norm": 2.349184989929199,
      "learning_rate": 0.0005636460609206578,
      "loss": 3.0514,
      "step": 36480
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6482728719711304,
      "learning_rate": 0.0005636441090718577,
      "loss": 3.0084,
      "step": 36481
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9158962965011597,
      "learning_rate": 0.0005636421571740413,
      "loss": 3.0211,
      "step": 36482
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7491061687469482,
      "learning_rate": 0.0005636402052272087,
      "loss": 3.2182,
      "step": 36483
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5019224882125854,
      "learning_rate": 0.0005636382532313605,
      "loss": 3.2379,
      "step": 36484
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5189307928085327,
      "learning_rate": 0.0005636363011864969,
      "loss": 3.0913,
      "step": 36485
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.585542678833008,
      "learning_rate": 0.0005636343490926185,
      "loss": 3.1171,
      "step": 36486
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.904525876045227,
      "learning_rate": 0.0005636323969497253,
      "loss": 2.957,
      "step": 36487
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8310407400131226,
      "learning_rate": 0.0005636304447578179,
      "loss": 3.0235,
      "step": 36488
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6094697713851929,
      "learning_rate": 0.0005636284925168966,
      "loss": 3.1548,
      "step": 36489
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.100935697555542,
      "learning_rate": 0.0005636265402269618,
      "loss": 3.08,
      "step": 36490
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6879854202270508,
      "learning_rate": 0.000563624587888014,
      "loss": 2.8053,
      "step": 36491
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8505789041519165,
      "learning_rate": 0.0005636226355000532,
      "loss": 3.2304,
      "step": 36492
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.738269567489624,
      "learning_rate": 0.0005636206830630802,
      "loss": 2.9179,
      "step": 36493
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6862653493881226,
      "learning_rate": 0.000563618730577095,
      "loss": 3.0055,
      "step": 36494
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3411691188812256,
      "learning_rate": 0.000563616778042098,
      "loss": 3.2301,
      "step": 36495
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.204773187637329,
      "learning_rate": 0.0005636148254580898,
      "loss": 2.8736,
      "step": 36496
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9393571615219116,
      "learning_rate": 0.0005636128728250707,
      "loss": 3.1231,
      "step": 36497
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6831151247024536,
      "learning_rate": 0.0005636109201430408,
      "loss": 3.1226,
      "step": 36498
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7567667961120605,
      "learning_rate": 0.0005636089674120008,
      "loss": 2.8709,
      "step": 36499
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8957359790802002,
      "learning_rate": 0.000563607014631951,
      "loss": 2.9539,
      "step": 36500
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2806391716003418,
      "learning_rate": 0.0005636050618028915,
      "loss": 2.8256,
      "step": 36501
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3795809745788574,
      "learning_rate": 0.0005636031089248229,
      "loss": 3.0966,
      "step": 36502
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7966969013214111,
      "learning_rate": 0.0005636011559977455,
      "loss": 3.181,
      "step": 36503
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7673894166946411,
      "learning_rate": 0.0005635992030216596,
      "loss": 2.9891,
      "step": 36504
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8737996816635132,
      "learning_rate": 0.0005635972499965658,
      "loss": 3.1508,
      "step": 36505
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7980483770370483,
      "learning_rate": 0.0005635952969224641,
      "loss": 2.8979,
      "step": 36506
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.527169942855835,
      "learning_rate": 0.0005635933437993551,
      "loss": 3.121,
      "step": 36507
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4347190856933594,
      "learning_rate": 0.0005635913906272392,
      "loss": 3.227,
      "step": 36508
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5524606704711914,
      "learning_rate": 0.0005635894374061167,
      "loss": 2.9985,
      "step": 36509
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4698669910430908,
      "learning_rate": 0.000563587484135988,
      "loss": 3.2764,
      "step": 36510
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4780170917510986,
      "learning_rate": 0.0005635855308168533,
      "loss": 3.0316,
      "step": 36511
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5333373546600342,
      "learning_rate": 0.0005635835774487131,
      "loss": 3.0071,
      "step": 36512
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.040515184402466,
      "learning_rate": 0.0005635816240315677,
      "loss": 3.1635,
      "step": 36513
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0687901973724365,
      "learning_rate": 0.0005635796705654176,
      "loss": 3.1284,
      "step": 36514
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.009169816970825,
      "learning_rate": 0.000563577717050263,
      "loss": 3.1333,
      "step": 36515
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5510694980621338,
      "learning_rate": 0.0005635757634861043,
      "loss": 3.2749,
      "step": 36516
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2096784114837646,
      "learning_rate": 0.0005635738098729419,
      "loss": 3.034,
      "step": 36517
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6377803087234497,
      "learning_rate": 0.0005635718562107762,
      "loss": 3.0691,
      "step": 36518
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6234614849090576,
      "learning_rate": 0.0005635699024996075,
      "loss": 3.0734,
      "step": 36519
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1363518238067627,
      "learning_rate": 0.0005635679487394361,
      "loss": 3.2627,
      "step": 36520
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.541507363319397,
      "learning_rate": 0.0005635659949302626,
      "loss": 2.944,
      "step": 36521
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9862723350524902,
      "learning_rate": 0.000563564041072087,
      "loss": 3.2949,
      "step": 36522
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.495877981185913,
      "learning_rate": 0.00056356208716491,
      "loss": 2.9251,
      "step": 36523
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.058579683303833,
      "learning_rate": 0.0005635601332087319,
      "loss": 3.064,
      "step": 36524
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3088624477386475,
      "learning_rate": 0.0005635581792035528,
      "loss": 3.0942,
      "step": 36525
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.918776512145996,
      "learning_rate": 0.0005635562251493734,
      "loss": 2.9349,
      "step": 36526
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3963623046875,
      "learning_rate": 0.0005635542710461939,
      "loss": 3.2073,
      "step": 36527
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.11635422706604,
      "learning_rate": 0.0005635523168940147,
      "loss": 3.3606,
      "step": 36528
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.407610297203064,
      "learning_rate": 0.0005635503626928362,
      "loss": 2.9024,
      "step": 36529
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.834053874015808,
      "learning_rate": 0.0005635484084426586,
      "loss": 3.0018,
      "step": 36530
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3493114709854126,
      "learning_rate": 0.0005635464541434824,
      "loss": 2.6977,
      "step": 36531
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5373715162277222,
      "learning_rate": 0.0005635444997953079,
      "loss": 3.061,
      "step": 36532
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.35002601146698,
      "learning_rate": 0.0005635425453981356,
      "loss": 3.1009,
      "step": 36533
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.171050786972046,
      "learning_rate": 0.0005635405909519657,
      "loss": 3.1858,
      "step": 36534
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3639589548110962,
      "learning_rate": 0.0005635386364567987,
      "loss": 2.9821,
      "step": 36535
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7587547302246094,
      "learning_rate": 0.0005635366819126349,
      "loss": 3.0447,
      "step": 36536
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2299513816833496,
      "learning_rate": 0.0005635347273194746,
      "loss": 2.986,
      "step": 36537
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5001075267791748,
      "learning_rate": 0.0005635327726773182,
      "loss": 3.0879,
      "step": 36538
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8000926971435547,
      "learning_rate": 0.0005635308179861661,
      "loss": 3.3414,
      "step": 36539
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.312955379486084,
      "learning_rate": 0.0005635288632460187,
      "loss": 3.0134,
      "step": 36540
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5588459968566895,
      "learning_rate": 0.0005635269084568763,
      "loss": 3.1102,
      "step": 36541
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6361191272735596,
      "learning_rate": 0.0005635249536187391,
      "loss": 2.7063,
      "step": 36542
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2255488634109497,
      "learning_rate": 0.0005635229987316078,
      "loss": 3.0296,
      "step": 36543
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3174657821655273,
      "learning_rate": 0.0005635210437954826,
      "loss": 2.7503,
      "step": 36544
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3088078498840332,
      "learning_rate": 0.0005635190888103639,
      "loss": 2.9389,
      "step": 36545
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4924099445343018,
      "learning_rate": 0.0005635171337762519,
      "loss": 3.1767,
      "step": 36546
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6478195190429688,
      "learning_rate": 0.0005635151786931472,
      "loss": 3.2175,
      "step": 36547
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4779664278030396,
      "learning_rate": 0.00056351322356105,
      "loss": 2.9915,
      "step": 36548
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4971872568130493,
      "learning_rate": 0.0005635112683799607,
      "loss": 3.2581,
      "step": 36549
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7844913005828857,
      "learning_rate": 0.0005635093131498797,
      "loss": 3.0113,
      "step": 36550
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.508033037185669,
      "learning_rate": 0.0005635073578708074,
      "loss": 2.9401,
      "step": 36551
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5625593662261963,
      "learning_rate": 0.0005635054025427441,
      "loss": 3.1728,
      "step": 36552
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.783137559890747,
      "learning_rate": 0.00056350344716569,
      "loss": 3.1853,
      "step": 36553
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4843590259552002,
      "learning_rate": 0.0005635014917396459,
      "loss": 3.0112,
      "step": 36554
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4788435697555542,
      "learning_rate": 0.0005634995362646118,
      "loss": 3.1108,
      "step": 36555
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.316032886505127,
      "learning_rate": 0.000563497580740588,
      "loss": 2.8504,
      "step": 36556
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6176801919937134,
      "learning_rate": 0.0005634956251675752,
      "loss": 2.9478,
      "step": 36557
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4308375120162964,
      "learning_rate": 0.0005634936695455736,
      "loss": 3.1796,
      "step": 36558
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.14823842048645,
      "learning_rate": 0.0005634917138745834,
      "loss": 2.9006,
      "step": 36559
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5895545482635498,
      "learning_rate": 0.0005634897581546053,
      "loss": 3.0421,
      "step": 36560
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8013702630996704,
      "learning_rate": 0.0005634878023856394,
      "loss": 3.0144,
      "step": 36561
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7172536849975586,
      "learning_rate": 0.0005634858465676861,
      "loss": 3.0612,
      "step": 36562
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.269937515258789,
      "learning_rate": 0.0005634838907007458,
      "loss": 3.104,
      "step": 36563
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7642608880996704,
      "learning_rate": 0.000563481934784819,
      "loss": 2.7856,
      "step": 36564
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.7315704822540283,
      "learning_rate": 0.0005634799788199057,
      "loss": 3.0738,
      "step": 36565
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.888822078704834,
      "learning_rate": 0.0005634780228060068,
      "loss": 3.0169,
      "step": 36566
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.500286340713501,
      "learning_rate": 0.0005634760667431221,
      "loss": 3.0534,
      "step": 36567
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.256791591644287,
      "learning_rate": 0.0005634741106312523,
      "loss": 3.1233,
      "step": 36568
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.561582326889038,
      "learning_rate": 0.0005634721544703978,
      "loss": 2.9711,
      "step": 36569
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1973681449890137,
      "learning_rate": 0.0005634701982605588,
      "loss": 3.1589,
      "step": 36570
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.663283348083496,
      "learning_rate": 0.0005634682420017357,
      "loss": 2.8485,
      "step": 36571
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9555200338363647,
      "learning_rate": 0.0005634662856939289,
      "loss": 2.9871,
      "step": 36572
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6983296871185303,
      "learning_rate": 0.0005634643293371388,
      "loss": 3.1432,
      "step": 36573
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.514439344406128,
      "learning_rate": 0.0005634623729313655,
      "loss": 3.1764,
      "step": 36574
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.1874668598175049,
      "learning_rate": 0.0005634604164766097,
      "loss": 3.3463,
      "step": 36575
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8050819635391235,
      "learning_rate": 0.0005634584599728717,
      "loss": 3.0146,
      "step": 36576
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.423465609550476,
      "learning_rate": 0.0005634565034201518,
      "loss": 3.0216,
      "step": 36577
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7799569368362427,
      "learning_rate": 0.0005634545468184502,
      "loss": 2.9625,
      "step": 36578
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7190653085708618,
      "learning_rate": 0.0005634525901677675,
      "loss": 3.0446,
      "step": 36579
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7642943859100342,
      "learning_rate": 0.000563450633468104,
      "loss": 3.1423,
      "step": 36580
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6730482578277588,
      "learning_rate": 0.0005634486767194601,
      "loss": 2.9223,
      "step": 36581
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6310962438583374,
      "learning_rate": 0.0005634467199218361,
      "loss": 2.8861,
      "step": 36582
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7851510047912598,
      "learning_rate": 0.0005634447630752323,
      "loss": 2.8796,
      "step": 36583
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.930629849433899,
      "learning_rate": 0.0005634428061796493,
      "loss": 2.9463,
      "step": 36584
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5849807262420654,
      "learning_rate": 0.0005634408492350872,
      "loss": 3.1838,
      "step": 36585
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6425609588623047,
      "learning_rate": 0.0005634388922415466,
      "loss": 3.2665,
      "step": 36586
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8797842264175415,
      "learning_rate": 0.0005634369351990275,
      "loss": 2.8903,
      "step": 36587
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.554368019104004,
      "learning_rate": 0.0005634349781075308,
      "loss": 3.1635,
      "step": 36588
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.640286922454834,
      "learning_rate": 0.0005634330209670563,
      "loss": 3.1605,
      "step": 36589
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8708205223083496,
      "learning_rate": 0.0005634310637776047,
      "loss": 3.1878,
      "step": 36590
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7135571241378784,
      "learning_rate": 0.0005634291065391763,
      "loss": 3.0808,
      "step": 36591
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.859358787536621,
      "learning_rate": 0.0005634271492517716,
      "loss": 3.2211,
      "step": 36592
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6492174863815308,
      "learning_rate": 0.0005634251919153907,
      "loss": 2.98,
      "step": 36593
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4004015922546387,
      "learning_rate": 0.000563423234530034,
      "loss": 3.0337,
      "step": 36594
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.853995680809021,
      "learning_rate": 0.0005634212770957021,
      "loss": 3.0732,
      "step": 36595
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.941123127937317,
      "learning_rate": 0.000563419319612395,
      "loss": 2.9966,
      "step": 36596
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.874634027481079,
      "learning_rate": 0.0005634173620801135,
      "loss": 3.1034,
      "step": 36597
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5275012254714966,
      "learning_rate": 0.0005634154044988576,
      "loss": 3.0308,
      "step": 36598
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6942448616027832,
      "learning_rate": 0.000563413446868628,
      "loss": 3.0928,
      "step": 36599
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4601562023162842,
      "learning_rate": 0.0005634114891894246,
      "loss": 3.1818,
      "step": 36600
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9266294240951538,
      "learning_rate": 0.0005634095314612482,
      "loss": 2.7649,
      "step": 36601
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.506588935852051,
      "learning_rate": 0.0005634075736840991,
      "loss": 2.8841,
      "step": 36602
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.456386923789978,
      "learning_rate": 0.0005634056158579772,
      "loss": 2.9377,
      "step": 36603
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.5659313201904297,
      "learning_rate": 0.0005634036579828835,
      "loss": 2.9503,
      "step": 36604
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.107081174850464,
      "learning_rate": 0.0005634017000588181,
      "loss": 3.1326,
      "step": 36605
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.764515995979309,
      "learning_rate": 0.0005633997420857813,
      "loss": 2.7779,
      "step": 36606
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5675723552703857,
      "learning_rate": 0.0005633977840637734,
      "loss": 3.1455,
      "step": 36607
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2865958213806152,
      "learning_rate": 0.0005633958259927951,
      "loss": 3.2369,
      "step": 36608
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7870299816131592,
      "learning_rate": 0.0005633938678728464,
      "loss": 2.9365,
      "step": 36609
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6657577753067017,
      "learning_rate": 0.0005633919097039279,
      "loss": 2.8128,
      "step": 36610
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5772829055786133,
      "learning_rate": 0.0005633899514860398,
      "loss": 2.9451,
      "step": 36611
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6853703260421753,
      "learning_rate": 0.0005633879932191825,
      "loss": 3.0079,
      "step": 36612
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3460807800292969,
      "learning_rate": 0.0005633860349033564,
      "loss": 2.9345,
      "step": 36613
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.942605495452881,
      "learning_rate": 0.0005633840765385619,
      "loss": 3.1541,
      "step": 36614
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.346350908279419,
      "learning_rate": 0.0005633821181247993,
      "loss": 2.8781,
      "step": 36615
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6369972229003906,
      "learning_rate": 0.0005633801596620691,
      "loss": 3.1846,
      "step": 36616
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.493180751800537,
      "learning_rate": 0.0005633782011503714,
      "loss": 3.1419,
      "step": 36617
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.8003146648406982,
      "learning_rate": 0.0005633762425897068,
      "loss": 3.0132,
      "step": 36618
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6265751123428345,
      "learning_rate": 0.0005633742839800756,
      "loss": 2.8329,
      "step": 36619
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.4958832263946533,
      "learning_rate": 0.0005633723253214781,
      "loss": 2.9499,
      "step": 36620
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.174535036087036,
      "learning_rate": 0.0005633703666139148,
      "loss": 3.3437,
      "step": 36621
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0269722938537598,
      "learning_rate": 0.000563368407857386,
      "loss": 2.9906,
      "step": 36622
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3332433700561523,
      "learning_rate": 0.0005633664490518918,
      "loss": 3.0702,
      "step": 36623
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.148742437362671,
      "learning_rate": 0.0005633644901974331,
      "loss": 3.0887,
      "step": 36624
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3868987560272217,
      "learning_rate": 0.0005633625312940098,
      "loss": 3.1946,
      "step": 36625
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3879594802856445,
      "learning_rate": 0.0005633605723416225,
      "loss": 3.0051,
      "step": 36626
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.228365659713745,
      "learning_rate": 0.0005633586133402715,
      "loss": 3.1832,
      "step": 36627
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1038506031036377,
      "learning_rate": 0.0005633566542899571,
      "loss": 2.9971,
      "step": 36628
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5950641632080078,
      "learning_rate": 0.0005633546951906797,
      "loss": 3.2067,
      "step": 36629
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8850303888320923,
      "learning_rate": 0.0005633527360424398,
      "loss": 3.0442,
      "step": 36630
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.935018539428711,
      "learning_rate": 0.0005633507768452377,
      "loss": 2.8709,
      "step": 36631
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4240403175354004,
      "learning_rate": 0.0005633488175990736,
      "loss": 2.8017,
      "step": 36632
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8638423681259155,
      "learning_rate": 0.000563346858303948,
      "loss": 2.9993,
      "step": 36633
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.074826955795288,
      "learning_rate": 0.0005633448989598612,
      "loss": 2.8708,
      "step": 36634
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.515461802482605,
      "learning_rate": 0.0005633429395668138,
      "loss": 2.8851,
      "step": 36635
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.189518451690674,
      "learning_rate": 0.0005633409801248058,
      "loss": 2.9412,
      "step": 36636
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2984859943389893,
      "learning_rate": 0.0005633390206338378,
      "loss": 2.9433,
      "step": 36637
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7342649698257446,
      "learning_rate": 0.0005633370610939101,
      "loss": 3.0424,
      "step": 36638
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.777360200881958,
      "learning_rate": 0.0005633351015050231,
      "loss": 2.9435,
      "step": 36639
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.92535400390625,
      "learning_rate": 0.000563333141867177,
      "loss": 3.0926,
      "step": 36640
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3502827882766724,
      "learning_rate": 0.0005633311821803725,
      "loss": 3.0172,
      "step": 36641
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.182703971862793,
      "learning_rate": 0.0005633292224446096,
      "loss": 3.2502,
      "step": 36642
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5493223667144775,
      "learning_rate": 0.0005633272626598889,
      "loss": 2.9963,
      "step": 36643
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.04292631149292,
      "learning_rate": 0.0005633253028262107,
      "loss": 2.8328,
      "step": 36644
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0568666458129883,
      "learning_rate": 0.0005633233429435752,
      "loss": 2.9486,
      "step": 36645
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5145950317382812,
      "learning_rate": 0.0005633213830119832,
      "loss": 3.0951,
      "step": 36646
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.062948703765869,
      "learning_rate": 0.0005633194230314346,
      "loss": 2.9677,
      "step": 36647
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8815935850143433,
      "learning_rate": 0.00056331746300193,
      "loss": 3.0659,
      "step": 36648
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6895807981491089,
      "learning_rate": 0.0005633155029234697,
      "loss": 2.9225,
      "step": 36649
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5339250564575195,
      "learning_rate": 0.000563313542796054,
      "loss": 2.9284,
      "step": 36650
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5766195058822632,
      "learning_rate": 0.0005633115826196836,
      "loss": 3.0753,
      "step": 36651
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5006417036056519,
      "learning_rate": 0.0005633096223943584,
      "loss": 3.153,
      "step": 36652
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.129749298095703,
      "learning_rate": 0.0005633076621200789,
      "loss": 3.1541,
      "step": 36653
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3875789642333984,
      "learning_rate": 0.0005633057017968456,
      "loss": 2.9872,
      "step": 36654
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2861557006835938,
      "learning_rate": 0.0005633037414246589,
      "loss": 3.0695,
      "step": 36655
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9104095697402954,
      "learning_rate": 0.000563301781003519,
      "loss": 2.9828,
      "step": 36656
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8248900175094604,
      "learning_rate": 0.0005632998205334263,
      "loss": 2.7983,
      "step": 36657
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1888513565063477,
      "learning_rate": 0.0005632978600143813,
      "loss": 2.9109,
      "step": 36658
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4612656831741333,
      "learning_rate": 0.0005632958994463841,
      "loss": 3.0714,
      "step": 36659
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.483382225036621,
      "learning_rate": 0.0005632939388294354,
      "loss": 3.2548,
      "step": 36660
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5714281797409058,
      "learning_rate": 0.0005632919781635351,
      "loss": 3.1537,
      "step": 36661
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5238046646118164,
      "learning_rate": 0.0005632900174486841,
      "loss": 2.9367,
      "step": 36662
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9685837030410767,
      "learning_rate": 0.0005632880566848825,
      "loss": 2.9503,
      "step": 36663
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5022730827331543,
      "learning_rate": 0.0005632860958721306,
      "loss": 3.149,
      "step": 36664
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9516171216964722,
      "learning_rate": 0.0005632841350104289,
      "loss": 3.0037,
      "step": 36665
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.281522274017334,
      "learning_rate": 0.0005632821740997777,
      "loss": 2.705,
      "step": 36666
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.507107138633728,
      "learning_rate": 0.0005632802131401774,
      "loss": 3.0421,
      "step": 36667
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7993415594100952,
      "learning_rate": 0.0005632782521316282,
      "loss": 3.1577,
      "step": 36668
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3440279960632324,
      "learning_rate": 0.0005632762910741308,
      "loss": 3.0493,
      "step": 36669
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.039964437484741,
      "learning_rate": 0.0005632743299676852,
      "loss": 3.0608,
      "step": 36670
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.734830379486084,
      "learning_rate": 0.0005632723688122921,
      "loss": 3.2152,
      "step": 36671
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4006813764572144,
      "learning_rate": 0.0005632704076079516,
      "loss": 2.9154,
      "step": 36672
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9855856895446777,
      "learning_rate": 0.0005632684463546641,
      "loss": 3.0177,
      "step": 36673
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0207667350769043,
      "learning_rate": 0.0005632664850524302,
      "loss": 3.0599,
      "step": 36674
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3529624938964844,
      "learning_rate": 0.0005632645237012499,
      "loss": 3.0101,
      "step": 36675
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5988123416900635,
      "learning_rate": 0.0005632625623011238,
      "loss": 3.0307,
      "step": 36676
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3591432571411133,
      "learning_rate": 0.0005632606008520522,
      "loss": 2.9755,
      "step": 36677
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4144928455352783,
      "learning_rate": 0.0005632586393540356,
      "loss": 2.9953,
      "step": 36678
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4992963075637817,
      "learning_rate": 0.0005632566778070742,
      "loss": 3.0061,
      "step": 36679
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3814464807510376,
      "learning_rate": 0.0005632547162111684,
      "loss": 3.1506,
      "step": 36680
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8351658582687378,
      "learning_rate": 0.0005632527545663186,
      "loss": 2.9582,
      "step": 36681
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8267979621887207,
      "learning_rate": 0.0005632507928725251,
      "loss": 2.7772,
      "step": 36682
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3078871965408325,
      "learning_rate": 0.0005632488311297884,
      "loss": 3.242,
      "step": 36683
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6496963500976562,
      "learning_rate": 0.0005632468693381087,
      "loss": 2.9293,
      "step": 36684
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5775038003921509,
      "learning_rate": 0.0005632449074974864,
      "loss": 3.306,
      "step": 36685
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4534125328063965,
      "learning_rate": 0.000563242945607922,
      "loss": 3.3013,
      "step": 36686
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5514806509017944,
      "learning_rate": 0.0005632409836694157,
      "loss": 2.8113,
      "step": 36687
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5502045154571533,
      "learning_rate": 0.000563239021681968,
      "loss": 3.0804,
      "step": 36688
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5677897930145264,
      "learning_rate": 0.000563237059645579,
      "loss": 3.0589,
      "step": 36689
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7012710571289062,
      "learning_rate": 0.0005632350975602494,
      "loss": 3.1494,
      "step": 36690
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.696099281311035,
      "learning_rate": 0.0005632331354259795,
      "loss": 2.9672,
      "step": 36691
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3038409948349,
      "learning_rate": 0.0005632311732427695,
      "loss": 3.1234,
      "step": 36692
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4916623830795288,
      "learning_rate": 0.0005632292110106198,
      "loss": 3.4878,
      "step": 36693
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4187383651733398,
      "learning_rate": 0.000563227248729531,
      "loss": 3.1394,
      "step": 36694
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3713006973266602,
      "learning_rate": 0.0005632252863995032,
      "loss": 3.1625,
      "step": 36695
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.720088005065918,
      "learning_rate": 0.0005632233240205368,
      "loss": 3.1017,
      "step": 36696
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5120961666107178,
      "learning_rate": 0.0005632213615926323,
      "loss": 2.7824,
      "step": 36697
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.379888892173767,
      "learning_rate": 0.0005632193991157899,
      "loss": 3.1956,
      "step": 36698
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8244224786758423,
      "learning_rate": 0.0005632174365900101,
      "loss": 2.891,
      "step": 36699
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6300462484359741,
      "learning_rate": 0.0005632154740152931,
      "loss": 3.1972,
      "step": 36700
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5630220174789429,
      "learning_rate": 0.0005632135113916396,
      "loss": 3.2229,
      "step": 36701
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.985026240348816,
      "learning_rate": 0.0005632115487190495,
      "loss": 2.8073,
      "step": 36702
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8643392324447632,
      "learning_rate": 0.0005632095859975235,
      "loss": 3.2061,
      "step": 36703
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4289535284042358,
      "learning_rate": 0.0005632076232270619,
      "loss": 2.9401,
      "step": 36704
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6897921562194824,
      "learning_rate": 0.000563205660407665,
      "loss": 3.1925,
      "step": 36705
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.9884960651397705,
      "learning_rate": 0.0005632036975393332,
      "loss": 3.0951,
      "step": 36706
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2634267807006836,
      "learning_rate": 0.0005632017346220669,
      "loss": 3.1344,
      "step": 36707
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1253092288970947,
      "learning_rate": 0.0005631997716558664,
      "loss": 3.3095,
      "step": 36708
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2553300857543945,
      "learning_rate": 0.000563197808640732,
      "loss": 2.9076,
      "step": 36709
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.365804433822632,
      "learning_rate": 0.0005631958455766644,
      "loss": 3.061,
      "step": 36710
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5676238536834717,
      "learning_rate": 0.0005631938824636636,
      "loss": 2.7781,
      "step": 36711
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.777907371520996,
      "learning_rate": 0.00056319191930173,
      "loss": 3.109,
      "step": 36712
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.5630807876586914,
      "learning_rate": 0.0005631899560908641,
      "loss": 3.2185,
      "step": 36713
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6299303770065308,
      "learning_rate": 0.0005631879928310662,
      "loss": 3.1061,
      "step": 36714
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3721436262130737,
      "learning_rate": 0.0005631860295223367,
      "loss": 3.1311,
      "step": 36715
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.45575213432312,
      "learning_rate": 0.000563184066164676,
      "loss": 3.0344,
      "step": 36716
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3641936779022217,
      "learning_rate": 0.0005631821027580844,
      "loss": 3.2235,
      "step": 36717
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0652966499328613,
      "learning_rate": 0.0005631801393025623,
      "loss": 2.9301,
      "step": 36718
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9463025331497192,
      "learning_rate": 0.00056317817579811,
      "loss": 3.1966,
      "step": 36719
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6178913116455078,
      "learning_rate": 0.000563176212244728,
      "loss": 3.0894,
      "step": 36720
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0936431884765625,
      "learning_rate": 0.0005631742486424165,
      "loss": 3.1167,
      "step": 36721
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4845141172409058,
      "learning_rate": 0.0005631722849911759,
      "loss": 3.0327,
      "step": 36722
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.290017604827881,
      "learning_rate": 0.0005631703212910067,
      "loss": 3.0061,
      "step": 36723
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.6619865894317627,
      "learning_rate": 0.0005631683575419091,
      "loss": 3.0496,
      "step": 36724
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6492236852645874,
      "learning_rate": 0.0005631663937438835,
      "loss": 2.9585,
      "step": 36725
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4679232835769653,
      "learning_rate": 0.0005631644298969303,
      "loss": 3.0161,
      "step": 36726
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.798158884048462,
      "learning_rate": 0.0005631624660010499,
      "loss": 3.0792,
      "step": 36727
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7054252624511719,
      "learning_rate": 0.0005631605020562428,
      "loss": 3.1757,
      "step": 36728
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4202808141708374,
      "learning_rate": 0.000563158538062509,
      "loss": 3.0533,
      "step": 36729
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.451629877090454,
      "learning_rate": 0.000563156574019849,
      "loss": 3.0985,
      "step": 36730
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2690205574035645,
      "learning_rate": 0.0005631546099282633,
      "loss": 3.2135,
      "step": 36731
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7207858562469482,
      "learning_rate": 0.0005631526457877522,
      "loss": 3.0889,
      "step": 36732
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7199556827545166,
      "learning_rate": 0.0005631506815983161,
      "loss": 2.9396,
      "step": 36733
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.954150676727295,
      "learning_rate": 0.0005631487173599553,
      "loss": 3.0425,
      "step": 36734
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7037032842636108,
      "learning_rate": 0.0005631467530726701,
      "loss": 3.0588,
      "step": 36735
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5415905714035034,
      "learning_rate": 0.0005631447887364611,
      "loss": 3.2808,
      "step": 36736
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2853977680206299,
      "learning_rate": 0.0005631428243513284,
      "loss": 3.1356,
      "step": 36737
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.9070959091186523,
      "learning_rate": 0.0005631408599172726,
      "loss": 2.9745,
      "step": 36738
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3832859992980957,
      "learning_rate": 0.0005631388954342938,
      "loss": 3.0653,
      "step": 36739
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.4890449047088623,
      "learning_rate": 0.0005631369309023926,
      "loss": 2.8963,
      "step": 36740
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.056335210800171,
      "learning_rate": 0.0005631349663215693,
      "loss": 3.0779,
      "step": 36741
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5200364589691162,
      "learning_rate": 0.0005631330016918241,
      "loss": 3.079,
      "step": 36742
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8403106927871704,
      "learning_rate": 0.0005631310370131576,
      "loss": 2.8588,
      "step": 36743
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4937688112258911,
      "learning_rate": 0.0005631290722855701,
      "loss": 3.066,
      "step": 36744
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5004760026931763,
      "learning_rate": 0.000563127107509062,
      "loss": 3.0486,
      "step": 36745
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7130684852600098,
      "learning_rate": 0.0005631251426836335,
      "loss": 3.1126,
      "step": 36746
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.4512665271759033,
      "learning_rate": 0.0005631231778092851,
      "loss": 3.1061,
      "step": 36747
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.312654733657837,
      "learning_rate": 0.0005631212128860171,
      "loss": 2.9786,
      "step": 36748
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8613100051879883,
      "learning_rate": 0.00056311924791383,
      "loss": 3.2022,
      "step": 36749
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.52744722366333,
      "learning_rate": 0.000563117282892724,
      "loss": 2.977,
      "step": 36750
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2618135213851929,
      "learning_rate": 0.0005631153178226995,
      "loss": 3.2383,
      "step": 36751
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9660038948059082,
      "learning_rate": 0.0005631133527037569,
      "loss": 3.027,
      "step": 36752
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.0965583324432373,
      "learning_rate": 0.0005631113875358966,
      "loss": 3.2465,
      "step": 36753
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.623870611190796,
      "learning_rate": 0.0005631094223191189,
      "loss": 2.8213,
      "step": 36754
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.5465586185455322,
      "learning_rate": 0.0005631074570534243,
      "loss": 3.1142,
      "step": 36755
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3944473266601562,
      "learning_rate": 0.0005631054917388128,
      "loss": 3.209,
      "step": 36756
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.127321481704712,
      "learning_rate": 0.0005631035263752853,
      "loss": 2.9301,
      "step": 36757
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7252336740493774,
      "learning_rate": 0.0005631015609628417,
      "loss": 2.7786,
      "step": 36758
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0793559551239014,
      "learning_rate": 0.0005630995955014827,
      "loss": 3.1667,
      "step": 36759
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.7522268295288086,
      "learning_rate": 0.0005630976299912084,
      "loss": 3.0626,
      "step": 36760
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9560288190841675,
      "learning_rate": 0.0005630956644320193,
      "loss": 2.9559,
      "step": 36761
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5630934238433838,
      "learning_rate": 0.0005630936988239158,
      "loss": 3.1913,
      "step": 36762
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.4135653972625732,
      "learning_rate": 0.0005630917331668981,
      "loss": 3.1897,
      "step": 36763
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.037343978881836,
      "learning_rate": 0.0005630897674609669,
      "loss": 2.8358,
      "step": 36764
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8821163177490234,
      "learning_rate": 0.0005630878017061222,
      "loss": 2.9322,
      "step": 36765
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.439897298812866,
      "learning_rate": 0.0005630858359023644,
      "loss": 3.0709,
      "step": 36766
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.768359661102295,
      "learning_rate": 0.0005630838700496942,
      "loss": 2.9926,
      "step": 36767
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7387521266937256,
      "learning_rate": 0.0005630819041481116,
      "loss": 3.3355,
      "step": 36768
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4269046783447266,
      "learning_rate": 0.0005630799381976172,
      "loss": 2.9307,
      "step": 36769
    },
    {
      "epoch": 0.48,
      "grad_norm": 4.171468734741211,
      "learning_rate": 0.0005630779721982112,
      "loss": 2.9799,
      "step": 36770
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.326582193374634,
      "learning_rate": 0.0005630760061498941,
      "loss": 3.2198,
      "step": 36771
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7615917921066284,
      "learning_rate": 0.0005630740400526661,
      "loss": 2.9268,
      "step": 36772
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4519885778427124,
      "learning_rate": 0.0005630720739065278,
      "loss": 3.0898,
      "step": 36773
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.1867809295654297,
      "learning_rate": 0.0005630701077114793,
      "loss": 3.1195,
      "step": 36774
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.757116436958313,
      "learning_rate": 0.0005630681414675212,
      "loss": 2.9183,
      "step": 36775
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4858819246292114,
      "learning_rate": 0.0005630661751746537,
      "loss": 3.2365,
      "step": 36776
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5171749591827393,
      "learning_rate": 0.0005630642088328774,
      "loss": 2.9324,
      "step": 36777
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2354475259780884,
      "learning_rate": 0.0005630622424421923,
      "loss": 3.1774,
      "step": 36778
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4680519104003906,
      "learning_rate": 0.000563060276002599,
      "loss": 3.1761,
      "step": 36779
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.9943735599517822,
      "learning_rate": 0.0005630583095140979,
      "loss": 3.0198,
      "step": 36780
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4838440418243408,
      "learning_rate": 0.0005630563429766893,
      "loss": 3.2927,
      "step": 36781
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5713046789169312,
      "learning_rate": 0.0005630543763903735,
      "loss": 3.177,
      "step": 36782
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.343993902206421,
      "learning_rate": 0.000563052409755151,
      "loss": 3.2218,
      "step": 36783
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4142370223999023,
      "learning_rate": 0.000563050443071022,
      "loss": 2.8938,
      "step": 36784
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5566976070404053,
      "learning_rate": 0.000563048476337987,
      "loss": 3.2809,
      "step": 36785
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.35927152633667,
      "learning_rate": 0.0005630465095560463,
      "loss": 3.2311,
      "step": 36786
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1368143558502197,
      "learning_rate": 0.0005630445427252004,
      "loss": 3.2726,
      "step": 36787
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.03248929977417,
      "learning_rate": 0.0005630425758454494,
      "loss": 3.0518,
      "step": 36788
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4880367517471313,
      "learning_rate": 0.0005630406089167938,
      "loss": 3.0303,
      "step": 36789
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9786133766174316,
      "learning_rate": 0.0005630386419392342,
      "loss": 3.2352,
      "step": 36790
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.953126311302185,
      "learning_rate": 0.0005630366749127706,
      "loss": 2.9453,
      "step": 36791
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.232544183731079,
      "learning_rate": 0.0005630347078374035,
      "loss": 3.2956,
      "step": 36792
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5336682796478271,
      "learning_rate": 0.0005630327407131333,
      "loss": 2.9693,
      "step": 36793
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.270469903945923,
      "learning_rate": 0.0005630307735399603,
      "loss": 3.0991,
      "step": 36794
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.453674077987671,
      "learning_rate": 0.0005630288063178851,
      "loss": 3.2287,
      "step": 36795
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4292699098587036,
      "learning_rate": 0.0005630268390469078,
      "loss": 2.7895,
      "step": 36796
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5611577033996582,
      "learning_rate": 0.0005630248717270287,
      "loss": 3.1933,
      "step": 36797
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.446883201599121,
      "learning_rate": 0.0005630229043582485,
      "loss": 2.9908,
      "step": 36798
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4916768074035645,
      "learning_rate": 0.0005630209369405672,
      "loss": 3.1254,
      "step": 36799
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.1756699085235596,
      "learning_rate": 0.0005630189694739856,
      "loss": 2.9078,
      "step": 36800
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6825202703475952,
      "learning_rate": 0.0005630170019585035,
      "loss": 3.2014,
      "step": 36801
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9364668130874634,
      "learning_rate": 0.0005630150343941217,
      "loss": 3.089,
      "step": 36802
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7355618476867676,
      "learning_rate": 0.0005630130667808405,
      "loss": 3.0745,
      "step": 36803
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6547470092773438,
      "learning_rate": 0.0005630110991186602,
      "loss": 3.2076,
      "step": 36804
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5545207262039185,
      "learning_rate": 0.0005630091314075811,
      "loss": 3.1806,
      "step": 36805
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7339457273483276,
      "learning_rate": 0.0005630071636476036,
      "loss": 3.0281,
      "step": 36806
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8338263034820557,
      "learning_rate": 0.0005630051958387283,
      "loss": 3.0726,
      "step": 36807
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7806098461151123,
      "learning_rate": 0.0005630032279809553,
      "loss": 3.1038,
      "step": 36808
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.438551425933838,
      "learning_rate": 0.000563001260074285,
      "loss": 3.1875,
      "step": 36809
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3818978071212769,
      "learning_rate": 0.0005629992921187177,
      "loss": 3.0475,
      "step": 36810
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.654071569442749,
      "learning_rate": 0.000562997324114254,
      "loss": 3.1941,
      "step": 36811
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4376766681671143,
      "learning_rate": 0.0005629953560608941,
      "loss": 3.007,
      "step": 36812
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9545172452926636,
      "learning_rate": 0.0005629933879586383,
      "loss": 3.1369,
      "step": 36813
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7331665754318237,
      "learning_rate": 0.0005629914198074872,
      "loss": 3.1591,
      "step": 36814
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.245361089706421,
      "learning_rate": 0.000562989451607441,
      "loss": 3.2161,
      "step": 36815
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.469506859779358,
      "learning_rate": 0.0005629874833585001,
      "loss": 3.0398,
      "step": 36816
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8660383224487305,
      "learning_rate": 0.0005629855150606648,
      "loss": 2.9382,
      "step": 36817
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5187652111053467,
      "learning_rate": 0.0005629835467139355,
      "loss": 3.2424,
      "step": 36818
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8465728759765625,
      "learning_rate": 0.0005629815783183128,
      "loss": 3.1207,
      "step": 36819
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6467021703720093,
      "learning_rate": 0.0005629796098737967,
      "loss": 2.8942,
      "step": 36820
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.508974313735962,
      "learning_rate": 0.0005629776413803877,
      "loss": 3.2157,
      "step": 36821
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.693215847015381,
      "learning_rate": 0.0005629756728380862,
      "loss": 2.9301,
      "step": 36822
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.4888179302215576,
      "learning_rate": 0.0005629737042468926,
      "loss": 3.0074,
      "step": 36823
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5494173765182495,
      "learning_rate": 0.0005629717356068072,
      "loss": 2.8094,
      "step": 36824
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3858089447021484,
      "learning_rate": 0.0005629697669178304,
      "loss": 2.8684,
      "step": 36825
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.6228647232055664,
      "learning_rate": 0.0005629677981799626,
      "loss": 3.2582,
      "step": 36826
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5116487741470337,
      "learning_rate": 0.0005629658293932042,
      "loss": 3.063,
      "step": 36827
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6377395391464233,
      "learning_rate": 0.0005629638605575553,
      "loss": 2.9927,
      "step": 36828
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3319329023361206,
      "learning_rate": 0.0005629618916730165,
      "loss": 3.0912,
      "step": 36829
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.183795690536499,
      "learning_rate": 0.0005629599227395882,
      "loss": 3.2277,
      "step": 36830
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.386612892150879,
      "learning_rate": 0.0005629579537572706,
      "loss": 3.2737,
      "step": 36831
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9863682985305786,
      "learning_rate": 0.0005629559847260643,
      "loss": 3.0152,
      "step": 36832
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4172749519348145,
      "learning_rate": 0.0005629540156459694,
      "loss": 3.333,
      "step": 36833
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.428666591644287,
      "learning_rate": 0.0005629520465169864,
      "loss": 3.2228,
      "step": 36834
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.801955223083496,
      "learning_rate": 0.0005629500773391156,
      "loss": 3.0302,
      "step": 36835
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5222960710525513,
      "learning_rate": 0.0005629481081123575,
      "loss": 2.9236,
      "step": 36836
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.157231330871582,
      "learning_rate": 0.0005629461388367124,
      "loss": 3.1324,
      "step": 36837
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.1389167308807373,
      "learning_rate": 0.0005629441695121806,
      "loss": 2.9766,
      "step": 36838
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.148944616317749,
      "learning_rate": 0.0005629422001387625,
      "loss": 3.2023,
      "step": 36839
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7704399824142456,
      "learning_rate": 0.0005629402307164585,
      "loss": 3.1787,
      "step": 36840
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.430379629135132,
      "learning_rate": 0.0005629382612452689,
      "loss": 3.0824,
      "step": 36841
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8710283041000366,
      "learning_rate": 0.0005629362917251941,
      "loss": 2.9643,
      "step": 36842
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3432800769805908,
      "learning_rate": 0.0005629343221562346,
      "loss": 3.0391,
      "step": 36843
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3540050983428955,
      "learning_rate": 0.0005629323525383906,
      "loss": 3.0117,
      "step": 36844
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.724804639816284,
      "learning_rate": 0.0005629303828716624,
      "loss": 3.0008,
      "step": 36845
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6727676391601562,
      "learning_rate": 0.0005629284131560507,
      "loss": 3.0692,
      "step": 36846
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7501683235168457,
      "learning_rate": 0.0005629264433915555,
      "loss": 3.1204,
      "step": 36847
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.82163143157959,
      "learning_rate": 0.0005629244735781773,
      "loss": 2.9892,
      "step": 36848
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8977043628692627,
      "learning_rate": 0.0005629225037159165,
      "loss": 3.0103,
      "step": 36849
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.488060474395752,
      "learning_rate": 0.0005629205338047735,
      "loss": 3.2877,
      "step": 36850
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2550106048583984,
      "learning_rate": 0.0005629185638447485,
      "loss": 3.3141,
      "step": 36851
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.5611276626586914,
      "learning_rate": 0.0005629165938358422,
      "loss": 3.1877,
      "step": 36852
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.413749098777771,
      "learning_rate": 0.0005629146237780545,
      "loss": 2.9392,
      "step": 36853
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.320920467376709,
      "learning_rate": 0.0005629126536713861,
      "loss": 3.096,
      "step": 36854
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.33111834526062,
      "learning_rate": 0.0005629106835158373,
      "loss": 3.3305,
      "step": 36855
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1027989387512207,
      "learning_rate": 0.0005629087133114083,
      "loss": 3.1517,
      "step": 36856
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0537219047546387,
      "learning_rate": 0.0005629067430580998,
      "loss": 3.0733,
      "step": 36857
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.618643879890442,
      "learning_rate": 0.0005629047727559118,
      "loss": 3.1374,
      "step": 36858
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.643810749053955,
      "learning_rate": 0.000562902802404845,
      "loss": 3.0749,
      "step": 36859
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.580998182296753,
      "learning_rate": 0.0005629008320048995,
      "loss": 3.1104,
      "step": 36860
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9257454872131348,
      "learning_rate": 0.0005628988615560759,
      "loss": 3.0077,
      "step": 36861
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3362929821014404,
      "learning_rate": 0.0005628968910583743,
      "loss": 3.0711,
      "step": 36862
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.722504138946533,
      "learning_rate": 0.0005628949205117952,
      "loss": 3.0857,
      "step": 36863
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8234847784042358,
      "learning_rate": 0.000562892949916339,
      "loss": 3.1065,
      "step": 36864
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2784640789031982,
      "learning_rate": 0.0005628909792720062,
      "loss": 3.307,
      "step": 36865
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.173013925552368,
      "learning_rate": 0.0005628890085787968,
      "loss": 2.9342,
      "step": 36866
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0209779739379883,
      "learning_rate": 0.0005628870378367115,
      "loss": 3.0499,
      "step": 36867
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.761521339416504,
      "learning_rate": 0.0005628850670457504,
      "loss": 3.1191,
      "step": 36868
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.260329008102417,
      "learning_rate": 0.0005628830962059141,
      "loss": 2.964,
      "step": 36869
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.628742218017578,
      "learning_rate": 0.0005628811253172028,
      "loss": 3.2382,
      "step": 36870
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9547001123428345,
      "learning_rate": 0.000562879154379617,
      "loss": 2.9713,
      "step": 36871
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4160574674606323,
      "learning_rate": 0.0005628771833931569,
      "loss": 3.3183,
      "step": 36872
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4277973175048828,
      "learning_rate": 0.0005628752123578232,
      "loss": 3.0663,
      "step": 36873
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.5290420055389404,
      "learning_rate": 0.0005628732412736159,
      "loss": 3.2184,
      "step": 36874
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.301367998123169,
      "learning_rate": 0.0005628712701405354,
      "loss": 3.2018,
      "step": 36875
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5895322561264038,
      "learning_rate": 0.0005628692989585823,
      "loss": 3.2292,
      "step": 36876
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.274198532104492,
      "learning_rate": 0.0005628673277277567,
      "loss": 2.8462,
      "step": 36877
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.56152081489563,
      "learning_rate": 0.0005628653564480593,
      "loss": 3.045,
      "step": 36878
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.589996337890625,
      "learning_rate": 0.0005628633851194902,
      "loss": 2.7972,
      "step": 36879
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5657143592834473,
      "learning_rate": 0.0005628614137420497,
      "loss": 3.1994,
      "step": 36880
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.166050434112549,
      "learning_rate": 0.0005628594423157385,
      "loss": 3.0168,
      "step": 36881
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.397411823272705,
      "learning_rate": 0.0005628574708405566,
      "loss": 3.0929,
      "step": 36882
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.34183406829834,
      "learning_rate": 0.0005628554993165047,
      "loss": 3.1805,
      "step": 36883
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5142377614974976,
      "learning_rate": 0.0005628535277435829,
      "loss": 3.0647,
      "step": 36884
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4119361639022827,
      "learning_rate": 0.0005628515561217916,
      "loss": 3.2002,
      "step": 36885
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.1306397914886475,
      "learning_rate": 0.0005628495844511313,
      "loss": 2.9489,
      "step": 36886
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.006981134414673,
      "learning_rate": 0.0005628476127316023,
      "loss": 3.105,
      "step": 36887
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6116036176681519,
      "learning_rate": 0.000562845640963205,
      "loss": 3.1492,
      "step": 36888
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5130492448806763,
      "learning_rate": 0.0005628436691459398,
      "loss": 2.8274,
      "step": 36889
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8666094541549683,
      "learning_rate": 0.0005628416972798069,
      "loss": 3.2754,
      "step": 36890
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.668826103210449,
      "learning_rate": 0.0005628397253648067,
      "loss": 2.9034,
      "step": 36891
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4803696870803833,
      "learning_rate": 0.0005628377534009397,
      "loss": 2.8005,
      "step": 36892
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7670173645019531,
      "learning_rate": 0.0005628357813882063,
      "loss": 2.9771,
      "step": 36893
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3907089233398438,
      "learning_rate": 0.0005628338093266067,
      "loss": 2.7156,
      "step": 36894
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4785528182983398,
      "learning_rate": 0.0005628318372161412,
      "loss": 3.0,
      "step": 36895
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.659631609916687,
      "learning_rate": 0.0005628298650568104,
      "loss": 3.0635,
      "step": 36896
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.704925537109375,
      "learning_rate": 0.0005628278928486146,
      "loss": 3.0213,
      "step": 36897
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5625518560409546,
      "learning_rate": 0.0005628259205915541,
      "loss": 3.1558,
      "step": 36898
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4583683013916016,
      "learning_rate": 0.0005628239482856293,
      "loss": 3.133,
      "step": 36899
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1531996726989746,
      "learning_rate": 0.0005628219759308406,
      "loss": 3.183,
      "step": 36900
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7292965650558472,
      "learning_rate": 0.0005628200035271883,
      "loss": 3.0861,
      "step": 36901
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4088044166564941,
      "learning_rate": 0.0005628180310746729,
      "loss": 2.9837,
      "step": 36902
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8314143419265747,
      "learning_rate": 0.0005628160585732945,
      "loss": 3.2911,
      "step": 36903
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5859054327011108,
      "learning_rate": 0.0005628140860230537,
      "loss": 2.9784,
      "step": 36904
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4148740768432617,
      "learning_rate": 0.0005628121134239509,
      "loss": 2.8298,
      "step": 36905
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4725627899169922,
      "learning_rate": 0.0005628101407759862,
      "loss": 3.1793,
      "step": 36906
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4684278964996338,
      "learning_rate": 0.0005628081680791603,
      "loss": 3.1191,
      "step": 36907
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4137825965881348,
      "learning_rate": 0.0005628061953334734,
      "loss": 2.8937,
      "step": 36908
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5768120288848877,
      "learning_rate": 0.0005628042225389256,
      "loss": 3.1118,
      "step": 36909
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6536341905593872,
      "learning_rate": 0.0005628022496955178,
      "loss": 3.083,
      "step": 36910
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7860372066497803,
      "learning_rate": 0.00056280027680325,
      "loss": 3.1639,
      "step": 36911
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4547748565673828,
      "learning_rate": 0.0005627983038621227,
      "loss": 2.9437,
      "step": 36912
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7620775699615479,
      "learning_rate": 0.0005627963308721363,
      "loss": 3.3292,
      "step": 36913
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3964710235595703,
      "learning_rate": 0.0005627943578332909,
      "loss": 3.1045,
      "step": 36914
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5384577512741089,
      "learning_rate": 0.0005627923847455873,
      "loss": 2.7757,
      "step": 36915
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3051350116729736,
      "learning_rate": 0.0005627904116090255,
      "loss": 3.1655,
      "step": 36916
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6945419311523438,
      "learning_rate": 0.000562788438423606,
      "loss": 2.9203,
      "step": 36917
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.641729712486267,
      "learning_rate": 0.0005627864651893291,
      "loss": 3.1123,
      "step": 36918
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1267714500427246,
      "learning_rate": 0.0005627844919061954,
      "loss": 3.0603,
      "step": 36919
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.671231746673584,
      "learning_rate": 0.000562782518574205,
      "loss": 3.129,
      "step": 36920
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4902522563934326,
      "learning_rate": 0.0005627805451933585,
      "loss": 3.1194,
      "step": 36921
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2269039154052734,
      "learning_rate": 0.000562778571763656,
      "loss": 3.0282,
      "step": 36922
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8515571355819702,
      "learning_rate": 0.000562776598285098,
      "loss": 3.303,
      "step": 36923
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6172770261764526,
      "learning_rate": 0.000562774624757685,
      "loss": 3.0102,
      "step": 36924
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.785291314125061,
      "learning_rate": 0.0005627726511814171,
      "loss": 2.846,
      "step": 36925
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.3135242462158203,
      "learning_rate": 0.0005627706775562949,
      "loss": 3.0397,
      "step": 36926
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7463622093200684,
      "learning_rate": 0.0005627687038823186,
      "loss": 3.0931,
      "step": 36927
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7454524040222168,
      "learning_rate": 0.0005627667301594887,
      "loss": 3.225,
      "step": 36928
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.328738212585449,
      "learning_rate": 0.0005627647563878054,
      "loss": 2.8276,
      "step": 36929
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.955842137336731,
      "learning_rate": 0.0005627627825672693,
      "loss": 3.1772,
      "step": 36930
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7815803289413452,
      "learning_rate": 0.0005627608086978806,
      "loss": 3.0079,
      "step": 36931
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3958858251571655,
      "learning_rate": 0.0005627588347796396,
      "loss": 2.9709,
      "step": 36932
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4355182647705078,
      "learning_rate": 0.0005627568608125469,
      "loss": 3.2229,
      "step": 36933
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6790701150894165,
      "learning_rate": 0.0005627548867966027,
      "loss": 3.0337,
      "step": 36934
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5589648485183716,
      "learning_rate": 0.0005627529127318074,
      "loss": 2.9067,
      "step": 36935
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3043570518493652,
      "learning_rate": 0.0005627509386181613,
      "loss": 3.1175,
      "step": 36936
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3133553266525269,
      "learning_rate": 0.0005627489644556649,
      "loss": 3.1631,
      "step": 36937
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.4894907474517822,
      "learning_rate": 0.0005627469902443187,
      "loss": 3.2658,
      "step": 36938
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1152827739715576,
      "learning_rate": 0.0005627450159841227,
      "loss": 2.9529,
      "step": 36939
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4420667886734009,
      "learning_rate": 0.0005627430416750773,
      "loss": 3.1005,
      "step": 36940
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7188116312026978,
      "learning_rate": 0.0005627410673171833,
      "loss": 3.1595,
      "step": 36941
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.864762306213379,
      "learning_rate": 0.0005627390929104406,
      "loss": 3.1788,
      "step": 36942
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7765408754348755,
      "learning_rate": 0.0005627371184548497,
      "loss": 3.2311,
      "step": 36943
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0163233280181885,
      "learning_rate": 0.0005627351439504111,
      "loss": 3.0767,
      "step": 36944
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3707246780395508,
      "learning_rate": 0.000562733169397125,
      "loss": 3.2631,
      "step": 36945
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5425422191619873,
      "learning_rate": 0.0005627311947949919,
      "loss": 3.0403,
      "step": 36946
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0249381065368652,
      "learning_rate": 0.0005627292201440122,
      "loss": 3.0543,
      "step": 36947
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7514268159866333,
      "learning_rate": 0.000562727245444186,
      "loss": 2.9703,
      "step": 36948
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.655133605003357,
      "learning_rate": 0.000562725270695514,
      "loss": 3.0676,
      "step": 36949
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6343867778778076,
      "learning_rate": 0.0005627232958979963,
      "loss": 3.0968,
      "step": 36950
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.435580849647522,
      "learning_rate": 0.0005627213210516335,
      "loss": 3.0548,
      "step": 36951
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7712208032608032,
      "learning_rate": 0.0005627193461564257,
      "loss": 2.936,
      "step": 36952
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1941537857055664,
      "learning_rate": 0.0005627173712123736,
      "loss": 3.0599,
      "step": 36953
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7053687572479248,
      "learning_rate": 0.0005627153962194772,
      "loss": 3.0645,
      "step": 36954
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3097922801971436,
      "learning_rate": 0.0005627134211777371,
      "loss": 3.1133,
      "step": 36955
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5874775648117065,
      "learning_rate": 0.0005627114460871537,
      "loss": 2.9859,
      "step": 36956
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6330218315124512,
      "learning_rate": 0.0005627094709477272,
      "loss": 3.1183,
      "step": 36957
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4282888174057007,
      "learning_rate": 0.0005627074957594581,
      "loss": 3.0459,
      "step": 36958
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8924790620803833,
      "learning_rate": 0.0005627055205223466,
      "loss": 3.1753,
      "step": 36959
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4778910875320435,
      "learning_rate": 0.0005627035452363933,
      "loss": 3.1051,
      "step": 36960
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6096470355987549,
      "learning_rate": 0.0005627015699015984,
      "loss": 3.0488,
      "step": 36961
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7338600158691406,
      "learning_rate": 0.0005626995945179624,
      "loss": 2.9108,
      "step": 36962
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6106129884719849,
      "learning_rate": 0.0005626976190854855,
      "loss": 2.9844,
      "step": 36963
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5026459693908691,
      "learning_rate": 0.0005626956436041682,
      "loss": 3.0624,
      "step": 36964
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.371664047241211,
      "learning_rate": 0.0005626936680740108,
      "loss": 3.1204,
      "step": 36965
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.0663237571716309,
      "learning_rate": 0.0005626916924950137,
      "loss": 3.1369,
      "step": 36966
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5760337114334106,
      "learning_rate": 0.0005626897168671772,
      "loss": 3.0087,
      "step": 36967
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8545671701431274,
      "learning_rate": 0.0005626877411905019,
      "loss": 3.3153,
      "step": 36968
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3716682195663452,
      "learning_rate": 0.0005626857654649878,
      "loss": 2.9217,
      "step": 36969
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.2058815956115723,
      "learning_rate": 0.0005626837896906355,
      "loss": 2.8271,
      "step": 36970
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.527373790740967,
      "learning_rate": 0.0005626818138674453,
      "loss": 2.8429,
      "step": 36971
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2035374641418457,
      "learning_rate": 0.0005626798379954177,
      "loss": 2.9414,
      "step": 36972
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.5290513038635254,
      "learning_rate": 0.0005626778620745528,
      "loss": 3.1359,
      "step": 36973
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5063954591751099,
      "learning_rate": 0.0005626758861048513,
      "loss": 3.2336,
      "step": 36974
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6699401140213013,
      "learning_rate": 0.0005626739100863134,
      "loss": 3.0901,
      "step": 36975
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5722049474716187,
      "learning_rate": 0.0005626719340189393,
      "loss": 3.139,
      "step": 36976
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9445796012878418,
      "learning_rate": 0.0005626699579027297,
      "loss": 3.1781,
      "step": 36977
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6609742641448975,
      "learning_rate": 0.0005626679817376846,
      "loss": 3.1703,
      "step": 36978
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5706545114517212,
      "learning_rate": 0.0005626660055238047,
      "loss": 3.0016,
      "step": 36979
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5789337158203125,
      "learning_rate": 0.0005626640292610901,
      "loss": 3.0515,
      "step": 36980
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3732116222381592,
      "learning_rate": 0.0005626620529495415,
      "loss": 3.1885,
      "step": 36981
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.449188470840454,
      "learning_rate": 0.0005626600765891589,
      "loss": 3.0895,
      "step": 36982
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.521409273147583,
      "learning_rate": 0.000562658100179943,
      "loss": 3.006,
      "step": 36983
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.425171971321106,
      "learning_rate": 0.0005626561237218939,
      "loss": 3.0023,
      "step": 36984
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4324226379394531,
      "learning_rate": 0.0005626541472150121,
      "loss": 2.9635,
      "step": 36985
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3303966522216797,
      "learning_rate": 0.0005626521706592979,
      "loss": 3.0845,
      "step": 36986
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.394193172454834,
      "learning_rate": 0.0005626501940547517,
      "loss": 3.127,
      "step": 36987
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3420608043670654,
      "learning_rate": 0.0005626482174013738,
      "loss": 3.0559,
      "step": 36988
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3975845575332642,
      "learning_rate": 0.0005626462406991648,
      "loss": 3.0064,
      "step": 36989
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.98783016204834,
      "learning_rate": 0.0005626442639481248,
      "loss": 3.1703,
      "step": 36990
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.6886513233184814,
      "learning_rate": 0.0005626422871482543,
      "loss": 2.9852,
      "step": 36991
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9165340662002563,
      "learning_rate": 0.0005626403102995536,
      "loss": 2.9421,
      "step": 36992
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.0661747455596924,
      "learning_rate": 0.0005626383334020233,
      "loss": 3.1473,
      "step": 36993
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.9932281970977783,
      "learning_rate": 0.0005626363564556633,
      "loss": 3.0616,
      "step": 36994
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.236823558807373,
      "learning_rate": 0.0005626343794604744,
      "loss": 3.0111,
      "step": 36995
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6759077310562134,
      "learning_rate": 0.0005626324024164568,
      "loss": 3.288,
      "step": 36996
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.0658950805664062,
      "learning_rate": 0.0005626304253236109,
      "loss": 3.1711,
      "step": 36997
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2249903678894043,
      "learning_rate": 0.000562628448181937,
      "loss": 3.2023,
      "step": 36998
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.9135324954986572,
      "learning_rate": 0.0005626264709914354,
      "loss": 3.0667,
      "step": 36999
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7428957223892212,
      "learning_rate": 0.0005626244937521068,
      "loss": 3.2055,
      "step": 37000
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.056135892868042,
      "learning_rate": 0.0005626225164639512,
      "loss": 3.0711,
      "step": 37001
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.745614528656006,
      "learning_rate": 0.0005626205391269691,
      "loss": 3.2292,
      "step": 37002
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.5024607181549072,
      "learning_rate": 0.000562618561741161,
      "loss": 3.051,
      "step": 37003
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.892788052558899,
      "learning_rate": 0.0005626165843065271,
      "loss": 3.4097,
      "step": 37004
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4999955892562866,
      "learning_rate": 0.0005626146068230678,
      "loss": 2.9887,
      "step": 37005
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.21744441986084,
      "learning_rate": 0.0005626126292907835,
      "loss": 3.044,
      "step": 37006
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.4491424560546875,
      "learning_rate": 0.0005626106517096747,
      "loss": 2.9286,
      "step": 37007
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0653228759765625,
      "learning_rate": 0.0005626086740797414,
      "loss": 3.0412,
      "step": 37008
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.4750852584838867,
      "learning_rate": 0.0005626066964009843,
      "loss": 3.1094,
      "step": 37009
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.0344629287719727,
      "learning_rate": 0.0005626047186734036,
      "loss": 3.1865,
      "step": 37010
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.927267074584961,
      "learning_rate": 0.0005626027408969998,
      "loss": 2.8879,
      "step": 37011
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4610252380371094,
      "learning_rate": 0.0005626007630717732,
      "loss": 2.7977,
      "step": 37012
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.846813440322876,
      "learning_rate": 0.0005625987851977241,
      "loss": 2.9742,
      "step": 37013
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.2592666149139404,
      "learning_rate": 0.0005625968072748529,
      "loss": 2.9598,
      "step": 37014
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.40973961353302,
      "learning_rate": 0.00056259482930316,
      "loss": 2.9663,
      "step": 37015
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.643871784210205,
      "learning_rate": 0.0005625928512826458,
      "loss": 3.1105,
      "step": 37016
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.24383282661438,
      "learning_rate": 0.0005625908732133108,
      "loss": 3.1779,
      "step": 37017
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4880621433258057,
      "learning_rate": 0.0005625888950951549,
      "loss": 2.9771,
      "step": 37018
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7172539234161377,
      "learning_rate": 0.000562586916928179,
      "loss": 2.9183,
      "step": 37019
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8441218137741089,
      "learning_rate": 0.0005625849387123831,
      "loss": 2.8108,
      "step": 37020
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5372717380523682,
      "learning_rate": 0.0005625829604477677,
      "loss": 3.142,
      "step": 37021
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.941941499710083,
      "learning_rate": 0.0005625809821343333,
      "loss": 3.2151,
      "step": 37022
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3498601913452148,
      "learning_rate": 0.0005625790037720801,
      "loss": 3.0911,
      "step": 37023
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4715832471847534,
      "learning_rate": 0.0005625770253610084,
      "loss": 3.1324,
      "step": 37024
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5497280359268188,
      "learning_rate": 0.0005625750469011187,
      "loss": 3.2958,
      "step": 37025
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4342156648635864,
      "learning_rate": 0.0005625730683924113,
      "loss": 3.1001,
      "step": 37026
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3441468477249146,
      "learning_rate": 0.0005625710898348867,
      "loss": 3.075,
      "step": 37027
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5227775573730469,
      "learning_rate": 0.0005625691112285452,
      "loss": 3.0305,
      "step": 37028
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0388965606689453,
      "learning_rate": 0.0005625671325733872,
      "loss": 3.0544,
      "step": 37029
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5193078517913818,
      "learning_rate": 0.0005625651538694129,
      "loss": 3.0173,
      "step": 37030
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5287232398986816,
      "learning_rate": 0.0005625631751166228,
      "loss": 3.1598,
      "step": 37031
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0164217948913574,
      "learning_rate": 0.0005625611963150172,
      "loss": 2.9964,
      "step": 37032
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3135737180709839,
      "learning_rate": 0.0005625592174645967,
      "loss": 2.9781,
      "step": 37033
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2836265563964844,
      "learning_rate": 0.0005625572385653612,
      "loss": 2.8265,
      "step": 37034
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1638784408569336,
      "learning_rate": 0.0005625552596173115,
      "loss": 3.0892,
      "step": 37035
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6849428415298462,
      "learning_rate": 0.000562553280620448,
      "loss": 3.1419,
      "step": 37036
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0531527996063232,
      "learning_rate": 0.0005625513015747707,
      "loss": 2.9153,
      "step": 37037
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4298851490020752,
      "learning_rate": 0.0005625493224802802,
      "loss": 3.0804,
      "step": 37038
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5305012464523315,
      "learning_rate": 0.0005625473433369767,
      "loss": 3.0354,
      "step": 37039
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5749146938323975,
      "learning_rate": 0.0005625453641448609,
      "loss": 2.9419,
      "step": 37040
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3181424140930176,
      "learning_rate": 0.0005625433849039328,
      "loss": 3.2262,
      "step": 37041
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3401625156402588,
      "learning_rate": 0.000562541405614193,
      "loss": 2.8317,
      "step": 37042
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2678050994873047,
      "learning_rate": 0.0005625394262756418,
      "loss": 3.301,
      "step": 37043
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3894346952438354,
      "learning_rate": 0.0005625374468882795,
      "loss": 3.1279,
      "step": 37044
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.9220364093780518,
      "learning_rate": 0.0005625354674521067,
      "loss": 3.1583,
      "step": 37045
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5646613836288452,
      "learning_rate": 0.0005625334879671235,
      "loss": 3.0428,
      "step": 37046
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5892841815948486,
      "learning_rate": 0.0005625315084333303,
      "loss": 3.0736,
      "step": 37047
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5551549196243286,
      "learning_rate": 0.0005625295288507275,
      "loss": 3.1923,
      "step": 37048
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.374598741531372,
      "learning_rate": 0.0005625275492193156,
      "loss": 3.1395,
      "step": 37049
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1405961513519287,
      "learning_rate": 0.000562525569539095,
      "loss": 2.9514,
      "step": 37050
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3926563262939453,
      "learning_rate": 0.0005625235898100657,
      "loss": 2.8633,
      "step": 37051
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7352598905563354,
      "learning_rate": 0.0005625216100322285,
      "loss": 2.999,
      "step": 37052
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4084025621414185,
      "learning_rate": 0.0005625196302055834,
      "loss": 2.8475,
      "step": 37053
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.694938898086548,
      "learning_rate": 0.000562517650330131,
      "loss": 3.0874,
      "step": 37054
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4250272512435913,
      "learning_rate": 0.0005625156704058716,
      "loss": 2.9724,
      "step": 37055
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5400396585464478,
      "learning_rate": 0.0005625136904328058,
      "loss": 3.0637,
      "step": 37056
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3095334768295288,
      "learning_rate": 0.0005625117104109336,
      "loss": 3.1716,
      "step": 37057
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.682265520095825,
      "learning_rate": 0.0005625097303402554,
      "loss": 3.0439,
      "step": 37058
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.012861490249634,
      "learning_rate": 0.0005625077502207717,
      "loss": 2.9687,
      "step": 37059
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9371349811553955,
      "learning_rate": 0.0005625057700524831,
      "loss": 3.1976,
      "step": 37060
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.4388482570648193,
      "learning_rate": 0.0005625037898353895,
      "loss": 3.221,
      "step": 37061
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.6363685131073,
      "learning_rate": 0.0005625018095694915,
      "loss": 2.979,
      "step": 37062
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4710510969161987,
      "learning_rate": 0.0005624998292547896,
      "loss": 3.0872,
      "step": 37063
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7739495038986206,
      "learning_rate": 0.0005624978488912839,
      "loss": 2.9689,
      "step": 37064
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.552797317504883,
      "learning_rate": 0.000562495868478975,
      "loss": 3.0537,
      "step": 37065
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3117822408676147,
      "learning_rate": 0.000562493888017863,
      "loss": 3.1713,
      "step": 37066
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3040406703948975,
      "learning_rate": 0.0005624919075079486,
      "loss": 3.3029,
      "step": 37067
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6596442461013794,
      "learning_rate": 0.0005624899269492319,
      "loss": 3.0784,
      "step": 37068
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.3482067584991455,
      "learning_rate": 0.0005624879463417134,
      "loss": 3.1165,
      "step": 37069
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6604094505310059,
      "learning_rate": 0.0005624859656853935,
      "loss": 2.9888,
      "step": 37070
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6163386106491089,
      "learning_rate": 0.0005624839849802724,
      "loss": 3.1145,
      "step": 37071
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.541088104248047,
      "learning_rate": 0.0005624820042263506,
      "loss": 3.1139,
      "step": 37072
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5433008670806885,
      "learning_rate": 0.0005624800234236285,
      "loss": 3.1349,
      "step": 37073
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5838046073913574,
      "learning_rate": 0.0005624780425721064,
      "loss": 2.8847,
      "step": 37074
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.4204349517822266,
      "learning_rate": 0.0005624760616717846,
      "loss": 3.2225,
      "step": 37075
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.219942331314087,
      "learning_rate": 0.0005624740807226636,
      "loss": 3.0761,
      "step": 37076
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5719071626663208,
      "learning_rate": 0.0005624720997247436,
      "loss": 3.0183,
      "step": 37077
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5058623552322388,
      "learning_rate": 0.0005624701186780251,
      "loss": 3.176,
      "step": 37078
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1494662761688232,
      "learning_rate": 0.0005624681375825086,
      "loss": 2.808,
      "step": 37079
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.575779676437378,
      "learning_rate": 0.0005624661564381943,
      "loss": 2.8865,
      "step": 37080
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6394445896148682,
      "learning_rate": 0.0005624641752450825,
      "loss": 2.9272,
      "step": 37081
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1356475353240967,
      "learning_rate": 0.0005624621940031737,
      "loss": 3.1008,
      "step": 37082
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.235581398010254,
      "learning_rate": 0.0005624602127124682,
      "loss": 3.0028,
      "step": 37083
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4477285146713257,
      "learning_rate": 0.0005624582313729664,
      "loss": 3.3319,
      "step": 37084
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6202614307403564,
      "learning_rate": 0.0005624562499846687,
      "loss": 3.1402,
      "step": 37085
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.836477518081665,
      "learning_rate": 0.0005624542685475753,
      "loss": 3.0275,
      "step": 37086
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5273627042770386,
      "learning_rate": 0.0005624522870616869,
      "loss": 2.8658,
      "step": 37087
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5050287246704102,
      "learning_rate": 0.0005624503055270035,
      "loss": 3.1986,
      "step": 37088
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3874644041061401,
      "learning_rate": 0.0005624483239435258,
      "loss": 3.0492,
      "step": 37089
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5844558477401733,
      "learning_rate": 0.0005624463423112538,
      "loss": 3.291,
      "step": 37090
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1974055767059326,
      "learning_rate": 0.0005624443606301882,
      "loss": 2.9209,
      "step": 37091
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4970159530639648,
      "learning_rate": 0.0005624423789003292,
      "loss": 2.8677,
      "step": 37092
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6900073289871216,
      "learning_rate": 0.0005624403971216772,
      "loss": 3.1138,
      "step": 37093
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9178309440612793,
      "learning_rate": 0.0005624384152942326,
      "loss": 3.1258,
      "step": 37094
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2782166004180908,
      "learning_rate": 0.0005624364334179956,
      "loss": 3.0587,
      "step": 37095
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5949878692626953,
      "learning_rate": 0.0005624344514929669,
      "loss": 3.0737,
      "step": 37096
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7142810821533203,
      "learning_rate": 0.0005624324695191466,
      "loss": 3.0327,
      "step": 37097
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.934951663017273,
      "learning_rate": 0.0005624304874965353,
      "loss": 3.0131,
      "step": 37098
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7374324798583984,
      "learning_rate": 0.0005624285054251331,
      "loss": 2.7617,
      "step": 37099
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.33174467086792,
      "learning_rate": 0.0005624265233049405,
      "loss": 3.1067,
      "step": 37100
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6231855154037476,
      "learning_rate": 0.0005624245411359577,
      "loss": 3.0708,
      "step": 37101
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.4029502868652344,
      "learning_rate": 0.0005624225589181854,
      "loss": 3.0645,
      "step": 37102
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.390453815460205,
      "learning_rate": 0.0005624205766516237,
      "loss": 2.8385,
      "step": 37103
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8313380479812622,
      "learning_rate": 0.0005624185943362732,
      "loss": 2.9926,
      "step": 37104
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.252427577972412,
      "learning_rate": 0.000562416611972134,
      "loss": 3.1542,
      "step": 37105
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5359219312667847,
      "learning_rate": 0.0005624146295592066,
      "loss": 3.0727,
      "step": 37106
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.863602876663208,
      "learning_rate": 0.0005624126470974915,
      "loss": 3.1472,
      "step": 37107
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.524690866470337,
      "learning_rate": 0.0005624106645869888,
      "loss": 3.1472,
      "step": 37108
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.536522626876831,
      "learning_rate": 0.0005624086820276991,
      "loss": 3.1844,
      "step": 37109
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6905814409255981,
      "learning_rate": 0.0005624066994196226,
      "loss": 2.6758,
      "step": 37110
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3999345302581787,
      "learning_rate": 0.0005624047167627598,
      "loss": 3.106,
      "step": 37111
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6606441736221313,
      "learning_rate": 0.0005624027340571109,
      "loss": 3.1684,
      "step": 37112
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.250429630279541,
      "learning_rate": 0.0005624007513026765,
      "loss": 3.0308,
      "step": 37113
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.4466893672943115,
      "learning_rate": 0.0005623987684994569,
      "loss": 3.1879,
      "step": 37114
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9030373096466064,
      "learning_rate": 0.0005623967856474522,
      "loss": 2.9855,
      "step": 37115
    },
    {
      "epoch": 0.48,
      "grad_norm": 4.139054298400879,
      "learning_rate": 0.0005623948027466631,
      "loss": 3.0376,
      "step": 37116
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.972552537918091,
      "learning_rate": 0.0005623928197970899,
      "loss": 3.028,
      "step": 37117
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9916366338729858,
      "learning_rate": 0.0005623908367987329,
      "loss": 2.9332,
      "step": 37118
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8674042224884033,
      "learning_rate": 0.0005623888537515925,
      "loss": 3.2823,
      "step": 37119
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3002851009368896,
      "learning_rate": 0.000562386870655669,
      "loss": 2.918,
      "step": 37120
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6431219577789307,
      "learning_rate": 0.0005623848875109628,
      "loss": 3.2422,
      "step": 37121
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5660457611083984,
      "learning_rate": 0.0005623829043174744,
      "loss": 2.8149,
      "step": 37122
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.994211196899414,
      "learning_rate": 0.000562380921075204,
      "loss": 2.979,
      "step": 37123
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9611351490020752,
      "learning_rate": 0.000562378937784152,
      "loss": 3.0175,
      "step": 37124
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.551722764968872,
      "learning_rate": 0.0005623769544443189,
      "loss": 3.0263,
      "step": 37125
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9271767139434814,
      "learning_rate": 0.000562374971055705,
      "loss": 2.8976,
      "step": 37126
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1617817878723145,
      "learning_rate": 0.0005623729876183105,
      "loss": 3.1235,
      "step": 37127
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4881521463394165,
      "learning_rate": 0.0005623710041321359,
      "loss": 3.0604,
      "step": 37128
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9360734224319458,
      "learning_rate": 0.0005623690205971817,
      "loss": 2.9177,
      "step": 37129
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8377758264541626,
      "learning_rate": 0.000562367037013448,
      "loss": 2.9195,
      "step": 37130
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6073877811431885,
      "learning_rate": 0.0005623650533809355,
      "loss": 3.3657,
      "step": 37131
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6413872241973877,
      "learning_rate": 0.0005623630696996444,
      "loss": 3.0678,
      "step": 37132
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.011549949645996,
      "learning_rate": 0.0005623610859695748,
      "loss": 3.0037,
      "step": 37133
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5509722232818604,
      "learning_rate": 0.0005623591021907275,
      "loss": 2.959,
      "step": 37134
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.249297618865967,
      "learning_rate": 0.0005623571183631026,
      "loss": 3.052,
      "step": 37135
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6958192586898804,
      "learning_rate": 0.0005623551344867007,
      "loss": 3.0475,
      "step": 37136
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4675756692886353,
      "learning_rate": 0.0005623531505615219,
      "loss": 3.0591,
      "step": 37137
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.15887451171875,
      "learning_rate": 0.0005623511665875668,
      "loss": 2.9812,
      "step": 37138
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4989824295043945,
      "learning_rate": 0.0005623491825648356,
      "loss": 2.9354,
      "step": 37139
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8617439270019531,
      "learning_rate": 0.0005623471984933287,
      "loss": 3.132,
      "step": 37140
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.730109691619873,
      "learning_rate": 0.0005623452143730466,
      "loss": 3.0878,
      "step": 37141
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0324807167053223,
      "learning_rate": 0.0005623432302039894,
      "loss": 2.9369,
      "step": 37142
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0930750370025635,
      "learning_rate": 0.0005623412459861579,
      "loss": 2.8172,
      "step": 37143
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6836543083190918,
      "learning_rate": 0.0005623392617195521,
      "loss": 3.1354,
      "step": 37144
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.610390305519104,
      "learning_rate": 0.0005623372774041724,
      "loss": 2.9761,
      "step": 37145
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9405940771102905,
      "learning_rate": 0.0005623352930400193,
      "loss": 2.914,
      "step": 37146
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6080842018127441,
      "learning_rate": 0.0005623333086270931,
      "loss": 2.9417,
      "step": 37147
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7416236400604248,
      "learning_rate": 0.0005623313241653942,
      "loss": 2.8692,
      "step": 37148
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.032569646835327,
      "learning_rate": 0.000562329339654923,
      "loss": 2.9592,
      "step": 37149
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5880299806594849,
      "learning_rate": 0.0005623273550956797,
      "loss": 2.8362,
      "step": 37150
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2722995281219482,
      "learning_rate": 0.000562325370487665,
      "loss": 3.1859,
      "step": 37151
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5064793825149536,
      "learning_rate": 0.0005623233858308788,
      "loss": 3.0704,
      "step": 37152
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5982424020767212,
      "learning_rate": 0.0005623214011253219,
      "loss": 3.0775,
      "step": 37153
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4828448295593262,
      "learning_rate": 0.0005623194163709945,
      "loss": 3.2342,
      "step": 37154
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.730604648590088,
      "learning_rate": 0.000562317431567897,
      "loss": 3.0799,
      "step": 37155
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.492884635925293,
      "learning_rate": 0.0005623154467160296,
      "loss": 3.1668,
      "step": 37156
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9902050495147705,
      "learning_rate": 0.0005623134618153929,
      "loss": 3.3014,
      "step": 37157
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9477012157440186,
      "learning_rate": 0.0005623114768659871,
      "loss": 3.1533,
      "step": 37158
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8053309917449951,
      "learning_rate": 0.0005623094918678127,
      "loss": 3.1929,
      "step": 37159
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.161249876022339,
      "learning_rate": 0.0005623075068208701,
      "loss": 3.242,
      "step": 37160
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.6167213916778564,
      "learning_rate": 0.0005623055217251595,
      "loss": 3.1377,
      "step": 37161
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.008404016494751,
      "learning_rate": 0.0005623035365806814,
      "loss": 3.0741,
      "step": 37162
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.455458402633667,
      "learning_rate": 0.000562301551387436,
      "loss": 2.8589,
      "step": 37163
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5437465906143188,
      "learning_rate": 0.0005622995661454239,
      "loss": 2.9452,
      "step": 37164
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0824928283691406,
      "learning_rate": 0.0005622975808546453,
      "loss": 2.8651,
      "step": 37165
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8902945518493652,
      "learning_rate": 0.0005622955955151006,
      "loss": 3.0217,
      "step": 37166
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6595475673675537,
      "learning_rate": 0.0005622936101267904,
      "loss": 3.1311,
      "step": 37167
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7855658531188965,
      "learning_rate": 0.0005622916246897146,
      "loss": 3.1841,
      "step": 37168
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.317368268966675,
      "learning_rate": 0.000562289639203874,
      "loss": 3.0014,
      "step": 37169
    },
    {
      "epoch": 0.48,
      "grad_norm": 4.0389204025268555,
      "learning_rate": 0.0005622876536692687,
      "loss": 2.9684,
      "step": 37170
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.188314914703369,
      "learning_rate": 0.0005622856680858993,
      "loss": 3.1151,
      "step": 37171
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4099169969558716,
      "learning_rate": 0.0005622836824537659,
      "loss": 3.0021,
      "step": 37172
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.0708956718444824,
      "learning_rate": 0.0005622816967728691,
      "loss": 3.1043,
      "step": 37173
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.048440456390381,
      "learning_rate": 0.0005622797110432092,
      "loss": 3.0759,
      "step": 37174
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4604475498199463,
      "learning_rate": 0.0005622777252647865,
      "loss": 3.1311,
      "step": 37175
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4855482578277588,
      "learning_rate": 0.0005622757394376015,
      "loss": 3.0022,
      "step": 37176
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.3596261739730835,
      "learning_rate": 0.0005622737535616543,
      "loss": 2.9333,
      "step": 37177
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2049062252044678,
      "learning_rate": 0.0005622717676369457,
      "loss": 2.999,
      "step": 37178
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6574772596359253,
      "learning_rate": 0.0005622697816634756,
      "loss": 3.4272,
      "step": 37179
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1097419261932373,
      "learning_rate": 0.0005622677956412448,
      "loss": 2.9885,
      "step": 37180
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.708643913269043,
      "learning_rate": 0.0005622658095702533,
      "loss": 2.9725,
      "step": 37181
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.4082741737365723,
      "learning_rate": 0.0005622638234505018,
      "loss": 2.9758,
      "step": 37182
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.423814058303833,
      "learning_rate": 0.0005622618372819904,
      "loss": 3.0695,
      "step": 37183
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.8152647018432617,
      "learning_rate": 0.0005622598510647196,
      "loss": 2.8705,
      "step": 37184
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.434176206588745,
      "learning_rate": 0.0005622578647986897,
      "loss": 3.0318,
      "step": 37185
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.0443968772888184,
      "learning_rate": 0.0005622558784839012,
      "loss": 3.1341,
      "step": 37186
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.3727822303771973,
      "learning_rate": 0.0005622538921203543,
      "loss": 2.9679,
      "step": 37187
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.746035575866699,
      "learning_rate": 0.0005622519057080494,
      "loss": 3.1583,
      "step": 37188
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.5288922786712646,
      "learning_rate": 0.000562249919246987,
      "loss": 3.0504,
      "step": 37189
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.534924864768982,
      "learning_rate": 0.0005622479327371674,
      "loss": 2.886,
      "step": 37190
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.2611442804336548,
      "learning_rate": 0.0005622459461785909,
      "loss": 3.0307,
      "step": 37191
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5970408916473389,
      "learning_rate": 0.0005622439595712581,
      "loss": 2.9842,
      "step": 37192
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6936757564544678,
      "learning_rate": 0.0005622419729151691,
      "loss": 3.2865,
      "step": 37193
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.33477783203125,
      "learning_rate": 0.0005622399862103243,
      "loss": 2.938,
      "step": 37194
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4791980981826782,
      "learning_rate": 0.0005622379994567241,
      "loss": 3.1168,
      "step": 37195
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.772319793701172,
      "learning_rate": 0.000562236012654369,
      "loss": 3.0087,
      "step": 37196
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.265615940093994,
      "learning_rate": 0.0005622340258032594,
      "loss": 2.8834,
      "step": 37197
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.678558349609375,
      "learning_rate": 0.0005622320389033953,
      "loss": 3.0894,
      "step": 37198
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.329324960708618,
      "learning_rate": 0.0005622300519547774,
      "loss": 2.9804,
      "step": 37199
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.444176435470581,
      "learning_rate": 0.000562228064957406,
      "loss": 3.1845,
      "step": 37200
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4901797771453857,
      "learning_rate": 0.0005622260779112815,
      "loss": 3.1561,
      "step": 37201
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4102178812026978,
      "learning_rate": 0.0005622240908164042,
      "loss": 3.1484,
      "step": 37202
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.5278701782226562,
      "learning_rate": 0.0005622221036727744,
      "loss": 3.0316,
      "step": 37203
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.879364013671875,
      "learning_rate": 0.0005622201164803926,
      "loss": 2.9766,
      "step": 37204
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7161269187927246,
      "learning_rate": 0.0005622181292392592,
      "loss": 2.8804,
      "step": 37205
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.95053231716156,
      "learning_rate": 0.0005622161419493745,
      "loss": 2.9711,
      "step": 37206
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.422205924987793,
      "learning_rate": 0.0005622141546107388,
      "loss": 3.105,
      "step": 37207
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5005993843078613,
      "learning_rate": 0.0005622121672233525,
      "loss": 3.0956,
      "step": 37208
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.125581979751587,
      "learning_rate": 0.0005622101797872161,
      "loss": 2.9925,
      "step": 37209
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.2543320655822754,
      "learning_rate": 0.0005622081923023299,
      "loss": 2.809,
      "step": 37210
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.46608567237854,
      "learning_rate": 0.0005622062047686942,
      "loss": 3.2567,
      "step": 37211
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.6706362962722778,
      "learning_rate": 0.0005622042171863094,
      "loss": 3.1052,
      "step": 37212
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.331128478050232,
      "learning_rate": 0.0005622022295551759,
      "loss": 2.9416,
      "step": 37213
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9286398887634277,
      "learning_rate": 0.000562200241875294,
      "loss": 3.2806,
      "step": 37214
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7845708131790161,
      "learning_rate": 0.0005621982541466642,
      "loss": 2.9401,
      "step": 37215
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4326926469802856,
      "learning_rate": 0.0005621962663692868,
      "loss": 3.0023,
      "step": 37216
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5879220962524414,
      "learning_rate": 0.0005621942785431622,
      "loss": 2.9585,
      "step": 37217
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.84597647190094,
      "learning_rate": 0.0005621922906682907,
      "loss": 3.0911,
      "step": 37218
    },
    {
      "epoch": 0.48,
      "grad_norm": 4.519275188446045,
      "learning_rate": 0.0005621903027446726,
      "loss": 2.8665,
      "step": 37219
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9718749523162842,
      "learning_rate": 0.0005621883147723085,
      "loss": 3.1359,
      "step": 37220
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.1085121631622314,
      "learning_rate": 0.0005621863267511986,
      "loss": 3.0616,
      "step": 37221
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.65432071685791,
      "learning_rate": 0.0005621843386813434,
      "loss": 2.9512,
      "step": 37222
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.4821221828460693,
      "learning_rate": 0.0005621823505627429,
      "loss": 3.1047,
      "step": 37223
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.5470757484436035,
      "learning_rate": 0.0005621803623953981,
      "loss": 3.0382,
      "step": 37224
    },
    {
      "epoch": 0.48,
      "grad_norm": 4.237978458404541,
      "learning_rate": 0.0005621783741793088,
      "loss": 3.1556,
      "step": 37225
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.1511707305908203,
      "learning_rate": 0.0005621763859144757,
      "loss": 2.851,
      "step": 37226
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.420584201812744,
      "learning_rate": 0.0005621743976008991,
      "loss": 3.1376,
      "step": 37227
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5075465440750122,
      "learning_rate": 0.0005621724092385791,
      "loss": 3.1695,
      "step": 37228
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.765427589416504,
      "learning_rate": 0.0005621704208275165,
      "loss": 2.9664,
      "step": 37229
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.9962480068206787,
      "learning_rate": 0.0005621684323677114,
      "loss": 3.0251,
      "step": 37230
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5697357654571533,
      "learning_rate": 0.0005621664438591643,
      "loss": 3.2155,
      "step": 37231
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.7335097789764404,
      "learning_rate": 0.0005621644553018754,
      "loss": 3.0072,
      "step": 37232
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.228524684906006,
      "learning_rate": 0.0005621624666958453,
      "loss": 3.2997,
      "step": 37233
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.9744412899017334,
      "learning_rate": 0.0005621604780410742,
      "loss": 3.0975,
      "step": 37234
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.8799326419830322,
      "learning_rate": 0.0005621584893375625,
      "loss": 3.0669,
      "step": 37235
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4652647972106934,
      "learning_rate": 0.0005621565005853106,
      "loss": 2.9078,
      "step": 37236
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.8399338722229004,
      "learning_rate": 0.0005621545117843188,
      "loss": 2.9171,
      "step": 37237
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.584747791290283,
      "learning_rate": 0.0005621525229345877,
      "loss": 3.0959,
      "step": 37238
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.4293198585510254,
      "learning_rate": 0.0005621505340361174,
      "loss": 3.0905,
      "step": 37239
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.4833269119262695,
      "learning_rate": 0.0005621485450889083,
      "loss": 2.9109,
      "step": 37240
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.330937623977661,
      "learning_rate": 0.0005621465560929609,
      "loss": 2.8875,
      "step": 37241
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.241319417953491,
      "learning_rate": 0.0005621445670482755,
      "loss": 3.0155,
      "step": 37242
    },
    {
      "epoch": 0.48,
      "grad_norm": 3.327725410461426,
      "learning_rate": 0.0005621425779548526,
      "loss": 2.9649,
      "step": 37243
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.5275635719299316,
      "learning_rate": 0.0005621405888126922,
      "loss": 2.9439,
      "step": 37244
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.9958969354629517,
      "learning_rate": 0.000562138599621795,
      "loss": 3.1919,
      "step": 37245
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.617767572402954,
      "learning_rate": 0.0005621366103821615,
      "loss": 3.1655,
      "step": 37246
    },
    {
      "epoch": 0.48,
      "grad_norm": 2.20513653755188,
      "learning_rate": 0.0005621346210937916,
      "loss": 3.1656,
      "step": 37247
    },
    {
      "epoch": 0.48,
      "grad_norm": 1.4238675832748413,
      "learning_rate": 0.0005621326317566859,
      "loss": 3.1824,
      "step": 37248
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.610242247581482,
      "learning_rate": 0.000562130642370845,
      "loss": 3.3697,
      "step": 37249
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4285718202590942,
      "learning_rate": 0.0005621286529362689,
      "loss": 3.055,
      "step": 37250
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4031918048858643,
      "learning_rate": 0.0005621266634529583,
      "loss": 3.1496,
      "step": 37251
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5087568759918213,
      "learning_rate": 0.0005621246739209132,
      "loss": 3.1673,
      "step": 37252
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1520888805389404,
      "learning_rate": 0.0005621226843401343,
      "loss": 2.9495,
      "step": 37253
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.563952922821045,
      "learning_rate": 0.0005621206947106219,
      "loss": 3.0809,
      "step": 37254
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4911646842956543,
      "learning_rate": 0.0005621187050323762,
      "loss": 2.8856,
      "step": 37255
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9436869621276855,
      "learning_rate": 0.0005621167153053978,
      "loss": 3.0685,
      "step": 37256
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3600221872329712,
      "learning_rate": 0.0005621147255296868,
      "loss": 3.0464,
      "step": 37257
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3727521896362305,
      "learning_rate": 0.0005621127357052438,
      "loss": 2.9863,
      "step": 37258
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.510417103767395,
      "learning_rate": 0.0005621107458320691,
      "loss": 3.2759,
      "step": 37259
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6870129108428955,
      "learning_rate": 0.0005621087559101631,
      "loss": 3.0681,
      "step": 37260
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.573011875152588,
      "learning_rate": 0.0005621067659395261,
      "loss": 3.0579,
      "step": 37261
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6079912185668945,
      "learning_rate": 0.0005621047759201585,
      "loss": 2.9865,
      "step": 37262
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5802912712097168,
      "learning_rate": 0.0005621027858520607,
      "loss": 3.0004,
      "step": 37263
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4488756656646729,
      "learning_rate": 0.0005621007957352331,
      "loss": 3.0455,
      "step": 37264
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6182751655578613,
      "learning_rate": 0.0005620988055696759,
      "loss": 2.9578,
      "step": 37265
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.329352855682373,
      "learning_rate": 0.0005620968153553895,
      "loss": 3.1068,
      "step": 37266
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7597405910491943,
      "learning_rate": 0.0005620948250923746,
      "loss": 2.9433,
      "step": 37267
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.0380783081054688,
      "learning_rate": 0.0005620928347806313,
      "loss": 3.0634,
      "step": 37268
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.41899573802948,
      "learning_rate": 0.0005620908444201597,
      "loss": 3.263,
      "step": 37269
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.364104151725769,
      "learning_rate": 0.0005620888540109608,
      "loss": 3.1007,
      "step": 37270
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.463590145111084,
      "learning_rate": 0.0005620868635530345,
      "loss": 3.0644,
      "step": 37271
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9650644063949585,
      "learning_rate": 0.0005620848730463812,
      "loss": 3.0817,
      "step": 37272
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.797800064086914,
      "learning_rate": 0.0005620828824910015,
      "loss": 2.9903,
      "step": 37273
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3081058263778687,
      "learning_rate": 0.0005620808918868957,
      "loss": 2.9074,
      "step": 37274
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.791345238685608,
      "learning_rate": 0.000562078901234064,
      "loss": 2.7023,
      "step": 37275
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3352211713790894,
      "learning_rate": 0.0005620769105325069,
      "loss": 3.1209,
      "step": 37276
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4371886253356934,
      "learning_rate": 0.0005620749197822247,
      "loss": 3.0898,
      "step": 37277
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.5834670066833496,
      "learning_rate": 0.000562072928983218,
      "loss": 3.0183,
      "step": 37278
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7257485389709473,
      "learning_rate": 0.0005620709381354868,
      "loss": 2.999,
      "step": 37279
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.390979290008545,
      "learning_rate": 0.0005620689472390318,
      "loss": 3.1257,
      "step": 37280
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.0366246700286865,
      "learning_rate": 0.0005620669562938532,
      "loss": 3.1937,
      "step": 37281
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1982077360153198,
      "learning_rate": 0.0005620649652999514,
      "loss": 3.0929,
      "step": 37282
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.59148371219635,
      "learning_rate": 0.0005620629742573268,
      "loss": 2.9535,
      "step": 37283
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6857157945632935,
      "learning_rate": 0.0005620609831659798,
      "loss": 2.9749,
      "step": 37284
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3611384630203247,
      "learning_rate": 0.0005620589920259106,
      "loss": 2.9898,
      "step": 37285
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.69108247756958,
      "learning_rate": 0.0005620570008371198,
      "loss": 2.9172,
      "step": 37286
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.1794357299804688,
      "learning_rate": 0.0005620550095996075,
      "loss": 3.0655,
      "step": 37287
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9087648391723633,
      "learning_rate": 0.0005620530183133743,
      "loss": 3.2373,
      "step": 37288
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7149299383163452,
      "learning_rate": 0.0005620510269784205,
      "loss": 3.2435,
      "step": 37289
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.21695876121521,
      "learning_rate": 0.0005620490355947466,
      "loss": 2.8432,
      "step": 37290
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4159278869628906,
      "learning_rate": 0.0005620470441623526,
      "loss": 3.1392,
      "step": 37291
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.56505286693573,
      "learning_rate": 0.0005620450526812393,
      "loss": 3.0629,
      "step": 37292
    },
    {
      "epoch": 0.49,
      "grad_norm": 3.6032347679138184,
      "learning_rate": 0.0005620430611514068,
      "loss": 2.6533,
      "step": 37293
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.7865140438079834,
      "learning_rate": 0.0005620410695728555,
      "loss": 3.1872,
      "step": 37294
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4650651216506958,
      "learning_rate": 0.0005620390779455859,
      "loss": 3.3148,
      "step": 37295
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.9320130348205566,
      "learning_rate": 0.0005620370862695983,
      "loss": 3.0107,
      "step": 37296
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.560598850250244,
      "learning_rate": 0.000562035094544893,
      "loss": 2.8343,
      "step": 37297
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4535950422286987,
      "learning_rate": 0.0005620331027714705,
      "loss": 3.065,
      "step": 37298
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8123931884765625,
      "learning_rate": 0.000562031110949331,
      "loss": 2.9252,
      "step": 37299
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.785849094390869,
      "learning_rate": 0.000562029119078475,
      "loss": 3.2551,
      "step": 37300
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.482483148574829,
      "learning_rate": 0.0005620271271589029,
      "loss": 3.1226,
      "step": 37301
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5483143329620361,
      "learning_rate": 0.000562025135190615,
      "loss": 3.1282,
      "step": 37302
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7392457723617554,
      "learning_rate": 0.0005620231431736116,
      "loss": 3.2379,
      "step": 37303
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6430772542953491,
      "learning_rate": 0.0005620211511078932,
      "loss": 3.1071,
      "step": 37304
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3216960430145264,
      "learning_rate": 0.0005620191589934601,
      "loss": 2.9002,
      "step": 37305
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3315366506576538,
      "learning_rate": 0.0005620171668303127,
      "loss": 2.9162,
      "step": 37306
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.1185929775238037,
      "learning_rate": 0.0005620151746184514,
      "loss": 2.9922,
      "step": 37307
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6405445337295532,
      "learning_rate": 0.0005620131823578765,
      "loss": 3.1194,
      "step": 37308
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6105530261993408,
      "learning_rate": 0.0005620111900485884,
      "loss": 3.0549,
      "step": 37309
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3580355644226074,
      "learning_rate": 0.0005620091976905876,
      "loss": 3.0457,
      "step": 37310
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9894789457321167,
      "learning_rate": 0.0005620072052838742,
      "loss": 2.7745,
      "step": 37311
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4869122505187988,
      "learning_rate": 0.0005620052128284487,
      "loss": 2.9055,
      "step": 37312
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.60763680934906,
      "learning_rate": 0.0005620032203243116,
      "loss": 2.9874,
      "step": 37313
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5859884023666382,
      "learning_rate": 0.0005620012277714631,
      "loss": 2.9533,
      "step": 37314
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.755872130393982,
      "learning_rate": 0.0005619992351699035,
      "loss": 2.7255,
      "step": 37315
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.64919912815094,
      "learning_rate": 0.0005619972425196335,
      "loss": 3.0927,
      "step": 37316
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.2392942905426025,
      "learning_rate": 0.0005619952498206532,
      "loss": 3.0133,
      "step": 37317
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.6912219524383545,
      "learning_rate": 0.000561993257072963,
      "loss": 3.2084,
      "step": 37318
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.678043007850647,
      "learning_rate": 0.0005619912642765633,
      "loss": 3.072,
      "step": 37319
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5574350357055664,
      "learning_rate": 0.0005619892714314545,
      "loss": 3.2399,
      "step": 37320
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.041942834854126,
      "learning_rate": 0.000561987278537637,
      "loss": 3.0769,
      "step": 37321
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.201374053955078,
      "learning_rate": 0.000561985285595111,
      "loss": 3.1837,
      "step": 37322
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.657849907875061,
      "learning_rate": 0.0005619832926038771,
      "loss": 3.3508,
      "step": 37323
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4066903591156006,
      "learning_rate": 0.0005619812995639355,
      "loss": 3.2428,
      "step": 37324
    },
    {
      "epoch": 0.49,
      "grad_norm": 3.4320480823516846,
      "learning_rate": 0.0005619793064752867,
      "loss": 3.2449,
      "step": 37325
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.5358574390411377,
      "learning_rate": 0.000561977313337931,
      "loss": 3.0138,
      "step": 37326
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7061710357666016,
      "learning_rate": 0.0005619753201518687,
      "loss": 2.9637,
      "step": 37327
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.62631094455719,
      "learning_rate": 0.0005619733269171003,
      "loss": 2.8519,
      "step": 37328
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.8384740352630615,
      "learning_rate": 0.000561971333633626,
      "loss": 2.9193,
      "step": 37329
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.537074327468872,
      "learning_rate": 0.0005619693403014465,
      "loss": 3.356,
      "step": 37330
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5467767715454102,
      "learning_rate": 0.0005619673469205617,
      "loss": 3.0523,
      "step": 37331
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.2662508487701416,
      "learning_rate": 0.0005619653534909724,
      "loss": 2.916,
      "step": 37332
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.550647497177124,
      "learning_rate": 0.0005619633600126788,
      "loss": 3.2817,
      "step": 37333
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.5080084800720215,
      "learning_rate": 0.0005619613664856811,
      "loss": 2.7836,
      "step": 37334
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6717673540115356,
      "learning_rate": 0.0005619593729099799,
      "loss": 2.9875,
      "step": 37335
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9591376781463623,
      "learning_rate": 0.0005619573792855756,
      "loss": 3.07,
      "step": 37336
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.058417797088623,
      "learning_rate": 0.0005619553856124684,
      "loss": 3.0804,
      "step": 37337
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5229973793029785,
      "learning_rate": 0.0005619533918906588,
      "loss": 3.0378,
      "step": 37338
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.2883996963500977,
      "learning_rate": 0.0005619513981201472,
      "loss": 3.2513,
      "step": 37339
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.717254400253296,
      "learning_rate": 0.0005619494043009337,
      "loss": 3.1089,
      "step": 37340
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.5143680572509766,
      "learning_rate": 0.000561947410433019,
      "loss": 2.9321,
      "step": 37341
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4090603590011597,
      "learning_rate": 0.0005619454165164033,
      "loss": 3.1944,
      "step": 37342
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.5108211040496826,
      "learning_rate": 0.000561943422551087,
      "loss": 3.0744,
      "step": 37343
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.684098958969116,
      "learning_rate": 0.0005619414285370705,
      "loss": 3.0928,
      "step": 37344
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.592336893081665,
      "learning_rate": 0.0005619394344743541,
      "loss": 3.2207,
      "step": 37345
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6659950017929077,
      "learning_rate": 0.0005619374403629382,
      "loss": 2.9982,
      "step": 37346
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.208954334259033,
      "learning_rate": 0.0005619354462028232,
      "loss": 3.0788,
      "step": 37347
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.889106035232544,
      "learning_rate": 0.0005619334519940094,
      "loss": 3.0637,
      "step": 37348
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7367444038391113,
      "learning_rate": 0.0005619314577364974,
      "loss": 3.1091,
      "step": 37349
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.2248106002807617,
      "learning_rate": 0.0005619294634302872,
      "loss": 2.8118,
      "step": 37350
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.2233853340148926,
      "learning_rate": 0.0005619274690753795,
      "loss": 3.1721,
      "step": 37351
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8669378757476807,
      "learning_rate": 0.0005619254746717744,
      "loss": 3.0037,
      "step": 37352
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4626966714859009,
      "learning_rate": 0.0005619234802194727,
      "loss": 3.0689,
      "step": 37353
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.328712224960327,
      "learning_rate": 0.0005619214857184742,
      "loss": 3.1858,
      "step": 37354
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5614944696426392,
      "learning_rate": 0.0005619194911687796,
      "loss": 3.2944,
      "step": 37355
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.5795605182647705,
      "learning_rate": 0.0005619174965703893,
      "loss": 3.0093,
      "step": 37356
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2948331832885742,
      "learning_rate": 0.0005619155019233036,
      "loss": 3.2782,
      "step": 37357
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7073733806610107,
      "learning_rate": 0.0005619135072275227,
      "loss": 3.1071,
      "step": 37358
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.723367214202881,
      "learning_rate": 0.0005619115124830473,
      "loss": 3.0245,
      "step": 37359
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6474632024765015,
      "learning_rate": 0.0005619095176898776,
      "loss": 2.9586,
      "step": 37360
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4310380220413208,
      "learning_rate": 0.0005619075228480139,
      "loss": 3.2349,
      "step": 37361
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.827493190765381,
      "learning_rate": 0.0005619055279574566,
      "loss": 3.0175,
      "step": 37362
    },
    {
      "epoch": 0.49,
      "grad_norm": 3.469193458557129,
      "learning_rate": 0.0005619035330182063,
      "loss": 3.2743,
      "step": 37363
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6176704168319702,
      "learning_rate": 0.000561901538030263,
      "loss": 2.9463,
      "step": 37364
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.0004703998565674,
      "learning_rate": 0.0005618995429936274,
      "loss": 2.9651,
      "step": 37365
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.162708044052124,
      "learning_rate": 0.0005618975479082997,
      "loss": 2.9031,
      "step": 37366
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3943772315979004,
      "learning_rate": 0.0005618955527742803,
      "loss": 2.892,
      "step": 37367
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5182427167892456,
      "learning_rate": 0.0005618935575915695,
      "loss": 2.6788,
      "step": 37368
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5276916027069092,
      "learning_rate": 0.0005618915623601679,
      "loss": 3.1309,
      "step": 37369
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.655349016189575,
      "learning_rate": 0.0005618895670800755,
      "loss": 3.1922,
      "step": 37370
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9066892862319946,
      "learning_rate": 0.0005618875717512931,
      "loss": 3.1049,
      "step": 37371
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7096365690231323,
      "learning_rate": 0.0005618855763738208,
      "loss": 3.1991,
      "step": 37372
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7316645383834839,
      "learning_rate": 0.0005618835809476589,
      "loss": 3.0403,
      "step": 37373
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7481225728988647,
      "learning_rate": 0.000561881585472808,
      "loss": 3.2518,
      "step": 37374
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3185218572616577,
      "learning_rate": 0.0005618795899492683,
      "loss": 3.4317,
      "step": 37375
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4441481828689575,
      "learning_rate": 0.0005618775943770403,
      "loss": 3.048,
      "step": 37376
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5269016027450562,
      "learning_rate": 0.0005618755987561243,
      "loss": 3.1987,
      "step": 37377
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8943687677383423,
      "learning_rate": 0.0005618736030865207,
      "loss": 3.0715,
      "step": 37378
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5397847890853882,
      "learning_rate": 0.0005618716073682298,
      "loss": 3.1022,
      "step": 37379
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5637634992599487,
      "learning_rate": 0.0005618696116012522,
      "loss": 3.4482,
      "step": 37380
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5054646730422974,
      "learning_rate": 0.000561867615785588,
      "loss": 3.1863,
      "step": 37381
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5142863988876343,
      "learning_rate": 0.0005618656199212375,
      "loss": 3.1865,
      "step": 37382
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.043124198913574,
      "learning_rate": 0.0005618636240082015,
      "loss": 3.1905,
      "step": 37383
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9071040153503418,
      "learning_rate": 0.0005618616280464799,
      "loss": 3.1038,
      "step": 37384
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6325429677963257,
      "learning_rate": 0.0005618596320360734,
      "loss": 2.998,
      "step": 37385
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7924294471740723,
      "learning_rate": 0.0005618576359769821,
      "loss": 2.9705,
      "step": 37386
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5387580394744873,
      "learning_rate": 0.0005618556398692067,
      "loss": 3.0983,
      "step": 37387
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.2962350845336914,
      "learning_rate": 0.0005618536437127473,
      "loss": 2.9782,
      "step": 37388
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.070411443710327,
      "learning_rate": 0.0005618516475076044,
      "loss": 3.1465,
      "step": 37389
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6166123151779175,
      "learning_rate": 0.0005618496512537783,
      "loss": 3.081,
      "step": 37390
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5579310655593872,
      "learning_rate": 0.0005618476549512695,
      "loss": 2.9871,
      "step": 37391
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7672975063323975,
      "learning_rate": 0.0005618456586000782,
      "loss": 3.2024,
      "step": 37392
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4345823526382446,
      "learning_rate": 0.0005618436622002048,
      "loss": 3.0766,
      "step": 37393
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8511295318603516,
      "learning_rate": 0.0005618416657516499,
      "loss": 3.1111,
      "step": 37394
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5018965005874634,
      "learning_rate": 0.0005618396692544135,
      "loss": 3.0258,
      "step": 37395
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5145370960235596,
      "learning_rate": 0.0005618376727084963,
      "loss": 3.0165,
      "step": 37396
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8674852848052979,
      "learning_rate": 0.0005618356761138985,
      "loss": 2.8683,
      "step": 37397
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8927032947540283,
      "learning_rate": 0.0005618336794706204,
      "loss": 3.3865,
      "step": 37398
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7152565717697144,
      "learning_rate": 0.0005618316827786625,
      "loss": 2.9833,
      "step": 37399
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8117543458938599,
      "learning_rate": 0.0005618296860380252,
      "loss": 3.0784,
      "step": 37400
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.47134530544281,
      "learning_rate": 0.0005618276892487089,
      "loss": 2.9048,
      "step": 37401
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5484743118286133,
      "learning_rate": 0.0005618256924107138,
      "loss": 3.0394,
      "step": 37402
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5228403806686401,
      "learning_rate": 0.0005618236955240404,
      "loss": 3.0101,
      "step": 37403
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7776533365249634,
      "learning_rate": 0.000561821698588689,
      "loss": 2.8316,
      "step": 37404
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9920334815979004,
      "learning_rate": 0.00056181970160466,
      "loss": 3.306,
      "step": 37405
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3178051710128784,
      "learning_rate": 0.0005618177045719538,
      "loss": 3.0599,
      "step": 37406
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4367212057113647,
      "learning_rate": 0.0005618157074905707,
      "loss": 2.9045,
      "step": 37407
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5480231046676636,
      "learning_rate": 0.0005618137103605112,
      "loss": 2.9256,
      "step": 37408
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5798965692520142,
      "learning_rate": 0.0005618117131817755,
      "loss": 3.1505,
      "step": 37409
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.423696756362915,
      "learning_rate": 0.0005618097159543641,
      "loss": 3.0465,
      "step": 37410
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.1517982482910156,
      "learning_rate": 0.0005618077186782774,
      "loss": 3.3967,
      "step": 37411
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.2697126865386963,
      "learning_rate": 0.0005618057213535155,
      "loss": 3.1342,
      "step": 37412
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.430474877357483,
      "learning_rate": 0.0005618037239800792,
      "loss": 3.3717,
      "step": 37413
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.2543110847473145,
      "learning_rate": 0.0005618017265579685,
      "loss": 2.8728,
      "step": 37414
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5530153512954712,
      "learning_rate": 0.0005617997290871839,
      "loss": 3.2266,
      "step": 37415
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.574163794517517,
      "learning_rate": 0.0005617977315677259,
      "loss": 2.9263,
      "step": 37416
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.403674840927124,
      "learning_rate": 0.0005617957339995947,
      "loss": 2.9488,
      "step": 37417
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7700120210647583,
      "learning_rate": 0.0005617937363827909,
      "loss": 3.112,
      "step": 37418
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.850577712059021,
      "learning_rate": 0.0005617917387173145,
      "loss": 2.9955,
      "step": 37419
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4476419687271118,
      "learning_rate": 0.0005617897410031661,
      "loss": 2.8845,
      "step": 37420
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.401066780090332,
      "learning_rate": 0.0005617877432403461,
      "loss": 3.1292,
      "step": 37421
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5677601099014282,
      "learning_rate": 0.0005617857454288548,
      "loss": 2.9743,
      "step": 37422
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8912544250488281,
      "learning_rate": 0.0005617837475686925,
      "loss": 2.9645,
      "step": 37423
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7603894472122192,
      "learning_rate": 0.0005617817496598597,
      "loss": 3.1408,
      "step": 37424
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4091086387634277,
      "learning_rate": 0.0005617797517023568,
      "loss": 2.9652,
      "step": 37425
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9555141925811768,
      "learning_rate": 0.0005617777536961841,
      "loss": 2.9765,
      "step": 37426
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9957677125930786,
      "learning_rate": 0.000561775755641342,
      "loss": 3.065,
      "step": 37427
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4198578596115112,
      "learning_rate": 0.0005617737575378308,
      "loss": 2.927,
      "step": 37428
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4433928728103638,
      "learning_rate": 0.0005617717593856509,
      "loss": 2.9463,
      "step": 37429
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4175106287002563,
      "learning_rate": 0.0005617697611848026,
      "loss": 3.1792,
      "step": 37430
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4285715818405151,
      "learning_rate": 0.0005617677629352865,
      "loss": 3.3995,
      "step": 37431
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.446046233177185,
      "learning_rate": 0.0005617657646371029,
      "loss": 3.0421,
      "step": 37432
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.540838599205017,
      "learning_rate": 0.0005617637662902519,
      "loss": 3.1843,
      "step": 37433
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7477329969406128,
      "learning_rate": 0.0005617617678947342,
      "loss": 3.0876,
      "step": 37434
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.612995982170105,
      "learning_rate": 0.0005617597694505499,
      "loss": 2.8027,
      "step": 37435
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2159398794174194,
      "learning_rate": 0.0005617577709576997,
      "loss": 3.0348,
      "step": 37436
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.121732711791992,
      "learning_rate": 0.0005617557724161838,
      "loss": 3.0873,
      "step": 37437
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5520037412643433,
      "learning_rate": 0.0005617537738260024,
      "loss": 3.0007,
      "step": 37438
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8569368124008179,
      "learning_rate": 0.0005617517751871562,
      "loss": 3.0983,
      "step": 37439
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9302111864089966,
      "learning_rate": 0.0005617497764996452,
      "loss": 2.9904,
      "step": 37440
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8714710474014282,
      "learning_rate": 0.0005617477777634701,
      "loss": 2.9609,
      "step": 37441
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6629304885864258,
      "learning_rate": 0.0005617457789786312,
      "loss": 2.8386,
      "step": 37442
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5484657287597656,
      "learning_rate": 0.0005617437801451288,
      "loss": 3.0735,
      "step": 37443
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8402035236358643,
      "learning_rate": 0.0005617417812629631,
      "loss": 3.1842,
      "step": 37444
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4707038402557373,
      "learning_rate": 0.0005617397823321348,
      "loss": 3.1464,
      "step": 37445
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5345172882080078,
      "learning_rate": 0.0005617377833526442,
      "loss": 2.9702,
      "step": 37446
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5809249877929688,
      "learning_rate": 0.0005617357843244916,
      "loss": 3.0687,
      "step": 37447
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.0034072399139404,
      "learning_rate": 0.0005617337852476773,
      "loss": 2.6024,
      "step": 37448
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6102678775787354,
      "learning_rate": 0.0005617317861222018,
      "loss": 3.1849,
      "step": 37449
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.534166932106018,
      "learning_rate": 0.0005617297869480654,
      "loss": 3.0953,
      "step": 37450
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6557549238204956,
      "learning_rate": 0.0005617277877252685,
      "loss": 3.0057,
      "step": 37451
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4846936464309692,
      "learning_rate": 0.0005617257884538115,
      "loss": 2.784,
      "step": 37452
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.709843635559082,
      "learning_rate": 0.0005617237891336946,
      "loss": 2.9598,
      "step": 37453
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9566783905029297,
      "learning_rate": 0.0005617217897649184,
      "loss": 2.9435,
      "step": 37454
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4364033937454224,
      "learning_rate": 0.0005617197903474832,
      "loss": 3.086,
      "step": 37455
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4172166585922241,
      "learning_rate": 0.0005617177908813893,
      "loss": 3.0656,
      "step": 37456
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5164445638656616,
      "learning_rate": 0.0005617157913666371,
      "loss": 2.7953,
      "step": 37457
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5998433828353882,
      "learning_rate": 0.0005617137918032271,
      "loss": 2.9311,
      "step": 37458
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6648075580596924,
      "learning_rate": 0.0005617117921911595,
      "loss": 2.8113,
      "step": 37459
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.915414810180664,
      "learning_rate": 0.0005617097925304348,
      "loss": 3.0385,
      "step": 37460
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5713366270065308,
      "learning_rate": 0.0005617077928210532,
      "loss": 2.961,
      "step": 37461
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3348779678344727,
      "learning_rate": 0.0005617057930630152,
      "loss": 3.0168,
      "step": 37462
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.235867500305176,
      "learning_rate": 0.0005617037932563212,
      "loss": 2.8682,
      "step": 37463
    },
    {
      "epoch": 0.49,
      "grad_norm": 3.124727725982666,
      "learning_rate": 0.0005617017934009716,
      "loss": 3.013,
      "step": 37464
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5876879692077637,
      "learning_rate": 0.0005616997934969665,
      "loss": 3.0701,
      "step": 37465
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.205721139907837,
      "learning_rate": 0.0005616977935443067,
      "loss": 2.9903,
      "step": 37466
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.264361619949341,
      "learning_rate": 0.0005616957935429922,
      "loss": 3.221,
      "step": 37467
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7226530313491821,
      "learning_rate": 0.0005616937934930235,
      "loss": 2.9539,
      "step": 37468
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6384528875350952,
      "learning_rate": 0.000561691793394401,
      "loss": 3.1087,
      "step": 37469
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8776483535766602,
      "learning_rate": 0.0005616897932471251,
      "loss": 3.0494,
      "step": 37470
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6215531826019287,
      "learning_rate": 0.0005616877930511961,
      "loss": 3.1355,
      "step": 37471
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2956913709640503,
      "learning_rate": 0.0005616857928066144,
      "loss": 2.9784,
      "step": 37472
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.171860694885254,
      "learning_rate": 0.0005616837925133803,
      "loss": 3.2442,
      "step": 37473
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5972877740859985,
      "learning_rate": 0.0005616817921714943,
      "loss": 2.9547,
      "step": 37474
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.475420594215393,
      "learning_rate": 0.0005616797917809568,
      "loss": 3.2306,
      "step": 37475
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.498188853263855,
      "learning_rate": 0.000561677791341768,
      "loss": 3.1209,
      "step": 37476
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4060134887695312,
      "learning_rate": 0.0005616757908539283,
      "loss": 3.0558,
      "step": 37477
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3734238147735596,
      "learning_rate": 0.0005616737903174383,
      "loss": 3.0728,
      "step": 37478
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2955715656280518,
      "learning_rate": 0.000561671789732298,
      "loss": 3.1659,
      "step": 37479
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.390493869781494,
      "learning_rate": 0.0005616697890985082,
      "loss": 2.9924,
      "step": 37480
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.2834129333496094,
      "learning_rate": 0.0005616677884160689,
      "loss": 3.0932,
      "step": 37481
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.441763162612915,
      "learning_rate": 0.0005616657876849806,
      "loss": 2.9071,
      "step": 37482
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.876277208328247,
      "learning_rate": 0.0005616637869052439,
      "loss": 3.0811,
      "step": 37483
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.0497851371765137,
      "learning_rate": 0.0005616617860768587,
      "loss": 3.1673,
      "step": 37484
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6799910068511963,
      "learning_rate": 0.0005616597851998257,
      "loss": 3.1732,
      "step": 37485
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.133275032043457,
      "learning_rate": 0.0005616577842741452,
      "loss": 3.1387,
      "step": 37486
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.1951699256896973,
      "learning_rate": 0.0005616557832998177,
      "loss": 3.1389,
      "step": 37487
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.6124956607818604,
      "learning_rate": 0.0005616537822768434,
      "loss": 3.2495,
      "step": 37488
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3828810453414917,
      "learning_rate": 0.0005616517812052226,
      "loss": 3.053,
      "step": 37489
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.482979655265808,
      "learning_rate": 0.0005616497800849559,
      "loss": 2.9326,
      "step": 37490
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.5576963424682617,
      "learning_rate": 0.0005616477789160436,
      "loss": 3.0573,
      "step": 37491
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.4257888793945312,
      "learning_rate": 0.0005616457776984861,
      "loss": 3.0317,
      "step": 37492
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4823700189590454,
      "learning_rate": 0.0005616437764322835,
      "loss": 2.9096,
      "step": 37493
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4031192064285278,
      "learning_rate": 0.0005616417751174366,
      "loss": 2.9628,
      "step": 37494
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.4063973426818848,
      "learning_rate": 0.0005616397737539453,
      "loss": 3.0206,
      "step": 37495
    },
    {
      "epoch": 0.49,
      "grad_norm": 3.444798707962036,
      "learning_rate": 0.0005616377723418104,
      "loss": 3.282,
      "step": 37496
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5279582738876343,
      "learning_rate": 0.0005616357708810321,
      "loss": 3.2658,
      "step": 37497
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.172715902328491,
      "learning_rate": 0.0005616337693716107,
      "loss": 3.1995,
      "step": 37498
    },
    {
      "epoch": 0.49,
      "grad_norm": 3.229426383972168,
      "learning_rate": 0.0005616317678135467,
      "loss": 3.299,
      "step": 37499
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7702555656433105,
      "learning_rate": 0.0005616297662068404,
      "loss": 3.0079,
      "step": 37500
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4023234844207764,
      "learning_rate": 0.0005616277645514922,
      "loss": 3.2236,
      "step": 37501
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7355656623840332,
      "learning_rate": 0.0005616257628475025,
      "loss": 3.0384,
      "step": 37502
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.951516032218933,
      "learning_rate": 0.0005616237610948716,
      "loss": 3.0281,
      "step": 37503
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8974575996398926,
      "learning_rate": 0.0005616217592935999,
      "loss": 2.9994,
      "step": 37504
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.1412391662597656,
      "learning_rate": 0.0005616197574436877,
      "loss": 3.0711,
      "step": 37505
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4770714044570923,
      "learning_rate": 0.0005616177555451355,
      "loss": 3.2989,
      "step": 37506
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7765495777130127,
      "learning_rate": 0.0005616157535979437,
      "loss": 2.8653,
      "step": 37507
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8821351528167725,
      "learning_rate": 0.0005616137516021124,
      "loss": 3.0961,
      "step": 37508
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.463151693344116,
      "learning_rate": 0.0005616117495576424,
      "loss": 2.9623,
      "step": 37509
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4425804615020752,
      "learning_rate": 0.0005616097474645338,
      "loss": 2.8915,
      "step": 37510
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5179470777511597,
      "learning_rate": 0.0005616077453227869,
      "loss": 2.9162,
      "step": 37511
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3848886489868164,
      "learning_rate": 0.0005616057431324021,
      "loss": 3.0304,
      "step": 37512
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.0251333713531494,
      "learning_rate": 0.00056160374089338,
      "loss": 3.1789,
      "step": 37513
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7360483407974243,
      "learning_rate": 0.0005616017386057209,
      "loss": 3.1716,
      "step": 37514
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3753328323364258,
      "learning_rate": 0.000561599736269425,
      "loss": 3.139,
      "step": 37515
    },
    {
      "epoch": 0.49,
      "grad_norm": 3.5304884910583496,
      "learning_rate": 0.0005615977338844927,
      "loss": 3.1567,
      "step": 37516
    },
    {
      "epoch": 0.49,
      "grad_norm": 3.3037099838256836,
      "learning_rate": 0.0005615957314509245,
      "loss": 3.2947,
      "step": 37517
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5286744832992554,
      "learning_rate": 0.0005615937289687208,
      "loss": 3.0289,
      "step": 37518
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5019434690475464,
      "learning_rate": 0.0005615917264378818,
      "loss": 2.8427,
      "step": 37519
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.385429859161377,
      "learning_rate": 0.0005615897238584079,
      "loss": 2.9685,
      "step": 37520
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4539343118667603,
      "learning_rate": 0.0005615877212302997,
      "loss": 3.0382,
      "step": 37521
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3318443298339844,
      "learning_rate": 0.0005615857185535572,
      "loss": 3.3265,
      "step": 37522
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2986631393432617,
      "learning_rate": 0.0005615837158281812,
      "loss": 2.8321,
      "step": 37523
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6921545267105103,
      "learning_rate": 0.0005615817130541717,
      "loss": 3.1339,
      "step": 37524
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9832069873809814,
      "learning_rate": 0.0005615797102315293,
      "loss": 3.0728,
      "step": 37525
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.562840461730957,
      "learning_rate": 0.0005615777073602542,
      "loss": 3.0844,
      "step": 37526
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.32244074344635,
      "learning_rate": 0.0005615757044403469,
      "loss": 3.0301,
      "step": 37527
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6243596076965332,
      "learning_rate": 0.0005615737014718078,
      "loss": 3.2105,
      "step": 37528
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.816550850868225,
      "learning_rate": 0.0005615716984546372,
      "loss": 3.1311,
      "step": 37529
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5875457525253296,
      "learning_rate": 0.0005615696953888354,
      "loss": 3.0143,
      "step": 37530
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7946548461914062,
      "learning_rate": 0.000561567692274403,
      "loss": 3.1276,
      "step": 37531
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8006075620651245,
      "learning_rate": 0.0005615656891113401,
      "loss": 3.1896,
      "step": 37532
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7007966041564941,
      "learning_rate": 0.0005615636858996472,
      "loss": 2.9589,
      "step": 37533
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4338021278381348,
      "learning_rate": 0.0005615616826393248,
      "loss": 3.1339,
      "step": 37534
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.200430393218994,
      "learning_rate": 0.000561559679330373,
      "loss": 2.8537,
      "step": 37535
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8564348220825195,
      "learning_rate": 0.0005615576759727924,
      "loss": 3.1786,
      "step": 37536
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5550410747528076,
      "learning_rate": 0.0005615556725665832,
      "loss": 3.122,
      "step": 37537
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7697277069091797,
      "learning_rate": 0.000561553669111746,
      "loss": 2.7374,
      "step": 37538
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5148907899856567,
      "learning_rate": 0.0005615516656082809,
      "loss": 2.9858,
      "step": 37539
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5151581764221191,
      "learning_rate": 0.0005615496620561886,
      "loss": 3.3744,
      "step": 37540
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9392882585525513,
      "learning_rate": 0.0005615476584554691,
      "loss": 3.3126,
      "step": 37541
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.1509265899658203,
      "learning_rate": 0.000561545654806123,
      "loss": 3.1142,
      "step": 37542
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.2578108310699463,
      "learning_rate": 0.0005615436511081506,
      "loss": 3.1723,
      "step": 37543
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6050364971160889,
      "learning_rate": 0.0005615416473615523,
      "loss": 2.8889,
      "step": 37544
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5557080507278442,
      "learning_rate": 0.0005615396435663286,
      "loss": 3.2302,
      "step": 37545
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4479609727859497,
      "learning_rate": 0.0005615376397224796,
      "loss": 3.0232,
      "step": 37546
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5231131315231323,
      "learning_rate": 0.0005615356358300059,
      "loss": 2.8181,
      "step": 37547
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6836965084075928,
      "learning_rate": 0.0005615336318889078,
      "loss": 3.0092,
      "step": 37548
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2517181634902954,
      "learning_rate": 0.0005615316278991855,
      "loss": 2.903,
      "step": 37549
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.4680898189544678,
      "learning_rate": 0.0005615296238608397,
      "loss": 2.9136,
      "step": 37550
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.520196557044983,
      "learning_rate": 0.0005615276197738706,
      "loss": 3.2221,
      "step": 37551
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.841871976852417,
      "learning_rate": 0.0005615256156382784,
      "loss": 2.8604,
      "step": 37552
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5290359258651733,
      "learning_rate": 0.0005615236114540639,
      "loss": 3.097,
      "step": 37553
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8822240829467773,
      "learning_rate": 0.0005615216072212271,
      "loss": 2.9,
      "step": 37554
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.483737587928772,
      "learning_rate": 0.0005615196029397685,
      "loss": 3.0553,
      "step": 37555
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.754172682762146,
      "learning_rate": 0.0005615175986096885,
      "loss": 3.1793,
      "step": 37556
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.190965414047241,
      "learning_rate": 0.0005615155942309874,
      "loss": 3.1676,
      "step": 37557
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4790761470794678,
      "learning_rate": 0.0005615135898036657,
      "loss": 3.0724,
      "step": 37558
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5466060638427734,
      "learning_rate": 0.0005615115853277236,
      "loss": 3.2076,
      "step": 37559
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.613650918006897,
      "learning_rate": 0.0005615095808031615,
      "loss": 2.9368,
      "step": 37560
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6505422592163086,
      "learning_rate": 0.00056150757622998,
      "loss": 3.0348,
      "step": 37561
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5787866115570068,
      "learning_rate": 0.0005615055716081791,
      "loss": 3.0963,
      "step": 37562
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5082776546478271,
      "learning_rate": 0.0005615035669377595,
      "loss": 3.0643,
      "step": 37563
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5319740772247314,
      "learning_rate": 0.0005615015622187215,
      "loss": 3.1735,
      "step": 37564
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3874433040618896,
      "learning_rate": 0.0005614995574510654,
      "loss": 2.9106,
      "step": 37565
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5516777038574219,
      "learning_rate": 0.0005614975526347915,
      "loss": 3.0806,
      "step": 37566
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.008343458175659,
      "learning_rate": 0.0005614955477699004,
      "loss": 3.2519,
      "step": 37567
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.1927738189697266,
      "learning_rate": 0.0005614935428563923,
      "loss": 2.728,
      "step": 37568
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6669830083847046,
      "learning_rate": 0.0005614915378942675,
      "loss": 3.3247,
      "step": 37569
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.036943197250366,
      "learning_rate": 0.0005614895328835267,
      "loss": 3.0433,
      "step": 37570
    },
    {
      "epoch": 0.49,
      "grad_norm": 3.2101335525512695,
      "learning_rate": 0.0005614875278241698,
      "loss": 2.8627,
      "step": 37571
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4216545820236206,
      "learning_rate": 0.0005614855227161976,
      "loss": 3.0975,
      "step": 37572
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6562527418136597,
      "learning_rate": 0.0005614835175596103,
      "loss": 2.932,
      "step": 37573
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.796372413635254,
      "learning_rate": 0.0005614815123544082,
      "loss": 3.1167,
      "step": 37574
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4014151096343994,
      "learning_rate": 0.0005614795071005918,
      "loss": 3.0898,
      "step": 37575
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.470311403274536,
      "learning_rate": 0.0005614775017981615,
      "loss": 3.1106,
      "step": 37576
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2513333559036255,
      "learning_rate": 0.0005614754964471174,
      "loss": 2.928,
      "step": 37577
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2935701608657837,
      "learning_rate": 0.0005614734910474603,
      "loss": 2.8441,
      "step": 37578
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.396669864654541,
      "learning_rate": 0.0005614714855991902,
      "loss": 3.0004,
      "step": 37579
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3249045610427856,
      "learning_rate": 0.0005614694801023076,
      "loss": 3.0677,
      "step": 37580
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4233160018920898,
      "learning_rate": 0.0005614674745568131,
      "loss": 3.0469,
      "step": 37581
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.7429397106170654,
      "learning_rate": 0.0005614654689627066,
      "loss": 2.8193,
      "step": 37582
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5057685375213623,
      "learning_rate": 0.0005614634633199888,
      "loss": 3.1721,
      "step": 37583
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.0535387992858887,
      "learning_rate": 0.0005614614576286601,
      "loss": 3.1318,
      "step": 37584
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.124556541442871,
      "learning_rate": 0.0005614594518887206,
      "loss": 2.8505,
      "step": 37585
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.100321054458618,
      "learning_rate": 0.000561457446100171,
      "loss": 3.2112,
      "step": 37586
    },
    {
      "epoch": 0.49,
      "grad_norm": 3.504227638244629,
      "learning_rate": 0.0005614554402630116,
      "loss": 3.2498,
      "step": 37587
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6846243143081665,
      "learning_rate": 0.0005614534343772426,
      "loss": 3.2975,
      "step": 37588
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.201219081878662,
      "learning_rate": 0.0005614514284428644,
      "loss": 3.0044,
      "step": 37589
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7707980871200562,
      "learning_rate": 0.0005614494224598775,
      "loss": 3.2754,
      "step": 37590
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4775285720825195,
      "learning_rate": 0.0005614474164282822,
      "loss": 3.0074,
      "step": 37591
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3102071285247803,
      "learning_rate": 0.0005614454103480789,
      "loss": 3.125,
      "step": 37592
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.55158269405365,
      "learning_rate": 0.000561443404219268,
      "loss": 2.9774,
      "step": 37593
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.1794261932373047,
      "learning_rate": 0.0005614413980418499,
      "loss": 3.0551,
      "step": 37594
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6023064851760864,
      "learning_rate": 0.0005614393918158247,
      "loss": 2.8641,
      "step": 37595
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4106141328811646,
      "learning_rate": 0.0005614373855411932,
      "loss": 3.2187,
      "step": 37596
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4356639385223389,
      "learning_rate": 0.0005614353792179554,
      "loss": 2.8799,
      "step": 37597
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6538726091384888,
      "learning_rate": 0.000561433372846112,
      "loss": 2.9159,
      "step": 37598
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9506639242172241,
      "learning_rate": 0.0005614313664256631,
      "loss": 3.0471,
      "step": 37599
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9705945253372192,
      "learning_rate": 0.0005614293599566091,
      "loss": 3.2117,
      "step": 37600
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4517444372177124,
      "learning_rate": 0.0005614273534389506,
      "loss": 3.0355,
      "step": 37601
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.927353858947754,
      "learning_rate": 0.0005614253468726876,
      "loss": 3.0563,
      "step": 37602
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3968838453292847,
      "learning_rate": 0.000561423340257821,
      "loss": 3.1466,
      "step": 37603
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6387587785720825,
      "learning_rate": 0.0005614213335943506,
      "loss": 2.7564,
      "step": 37604
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6163246631622314,
      "learning_rate": 0.0005614193268822772,
      "loss": 2.8498,
      "step": 37605
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.130621910095215,
      "learning_rate": 0.0005614173201216009,
      "loss": 3.036,
      "step": 37606
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.349295735359192,
      "learning_rate": 0.0005614153133123223,
      "loss": 3.0982,
      "step": 37607
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5858993530273438,
      "learning_rate": 0.0005614133064544414,
      "loss": 3.0173,
      "step": 37608
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.74786376953125,
      "learning_rate": 0.0005614112995479591,
      "loss": 2.8716,
      "step": 37609
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3308913707733154,
      "learning_rate": 0.0005614092925928755,
      "loss": 3.0364,
      "step": 37610
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5294981002807617,
      "learning_rate": 0.0005614072855891908,
      "loss": 2.8154,
      "step": 37611
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2846183776855469,
      "learning_rate": 0.0005614052785369057,
      "loss": 3.0828,
      "step": 37612
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.331429123878479,
      "learning_rate": 0.0005614032714360204,
      "loss": 3.3049,
      "step": 37613
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.388610601425171,
      "learning_rate": 0.0005614012642865353,
      "loss": 3.2663,
      "step": 37614
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6486293077468872,
      "learning_rate": 0.0005613992570884508,
      "loss": 2.9269,
      "step": 37615
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5291167497634888,
      "learning_rate": 0.0005613972498417671,
      "loss": 3.2865,
      "step": 37616
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8381434679031372,
      "learning_rate": 0.0005613952425464848,
      "loss": 3.3426,
      "step": 37617
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.13055157661438,
      "learning_rate": 0.0005613932352026042,
      "loss": 2.857,
      "step": 37618
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6010433435440063,
      "learning_rate": 0.0005613912278101257,
      "loss": 3.2578,
      "step": 37619
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6345213651657104,
      "learning_rate": 0.0005613892203690495,
      "loss": 3.0591,
      "step": 37620
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.405667543411255,
      "learning_rate": 0.0005613872128793762,
      "loss": 3.1313,
      "step": 37621
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2823525667190552,
      "learning_rate": 0.0005613852053411062,
      "loss": 2.844,
      "step": 37622
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5886205434799194,
      "learning_rate": 0.0005613831977542396,
      "loss": 3.1874,
      "step": 37623
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.4227066040039062,
      "learning_rate": 0.000561381190118777,
      "loss": 2.9395,
      "step": 37624
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.518507957458496,
      "learning_rate": 0.0005613791824347187,
      "loss": 3.1554,
      "step": 37625
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.904033899307251,
      "learning_rate": 0.000561377174702065,
      "loss": 3.0669,
      "step": 37626
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7057204246520996,
      "learning_rate": 0.0005613751669208164,
      "loss": 3.2519,
      "step": 37627
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5559364557266235,
      "learning_rate": 0.0005613731590909732,
      "loss": 3.1732,
      "step": 37628
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6910415887832642,
      "learning_rate": 0.0005613711512125358,
      "loss": 3.2813,
      "step": 37629
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9670699834823608,
      "learning_rate": 0.0005613691432855047,
      "loss": 2.9665,
      "step": 37630
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4829827547073364,
      "learning_rate": 0.0005613671353098801,
      "loss": 3.3056,
      "step": 37631
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5430693626403809,
      "learning_rate": 0.0005613651272856622,
      "loss": 2.9957,
      "step": 37632
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6474305391311646,
      "learning_rate": 0.0005613631192128518,
      "loss": 3.0324,
      "step": 37633
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.686079740524292,
      "learning_rate": 0.0005613611110914491,
      "loss": 3.136,
      "step": 37634
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.526833415031433,
      "learning_rate": 0.0005613591029214542,
      "loss": 3.287,
      "step": 37635
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6245660781860352,
      "learning_rate": 0.0005613570947028679,
      "loss": 2.8942,
      "step": 37636
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4946931600570679,
      "learning_rate": 0.0005613550864356903,
      "loss": 3.054,
      "step": 37637
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4825431108474731,
      "learning_rate": 0.0005613530781199219,
      "loss": 3.0821,
      "step": 37638
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7344073057174683,
      "learning_rate": 0.0005613510697555631,
      "loss": 3.1365,
      "step": 37639
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.812613606452942,
      "learning_rate": 0.0005613490613426141,
      "loss": 3.2952,
      "step": 37640
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5969698429107666,
      "learning_rate": 0.0005613470528810754,
      "loss": 3.1732,
      "step": 37641
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5849058628082275,
      "learning_rate": 0.0005613450443709473,
      "loss": 3.1128,
      "step": 37642
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.333246111869812,
      "learning_rate": 0.0005613430358122303,
      "loss": 3.1223,
      "step": 37643
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6079808473587036,
      "learning_rate": 0.0005613410272049247,
      "loss": 3.0938,
      "step": 37644
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.1919119358062744,
      "learning_rate": 0.0005613390185490308,
      "loss": 2.6886,
      "step": 37645
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7142499685287476,
      "learning_rate": 0.0005613370098445491,
      "loss": 3.2241,
      "step": 37646
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5768426656723022,
      "learning_rate": 0.0005613350010914799,
      "loss": 2.9074,
      "step": 37647
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6846635341644287,
      "learning_rate": 0.0005613329922898235,
      "loss": 2.9864,
      "step": 37648
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.403093695640564,
      "learning_rate": 0.0005613309834395805,
      "loss": 3.0318,
      "step": 37649
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5139240026474,
      "learning_rate": 0.000561328974540751,
      "loss": 2.9546,
      "step": 37650
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.302567958831787,
      "learning_rate": 0.0005613269655933357,
      "loss": 3.2776,
      "step": 37651
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.4652180671691895,
      "learning_rate": 0.0005613249565973346,
      "loss": 2.9714,
      "step": 37652
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.687415599822998,
      "learning_rate": 0.0005613229475527483,
      "loss": 3.1443,
      "step": 37653
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.911617398262024,
      "learning_rate": 0.0005613209384595772,
      "loss": 3.0794,
      "step": 37654
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.912742257118225,
      "learning_rate": 0.0005613189293178216,
      "loss": 2.9918,
      "step": 37655
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3747994899749756,
      "learning_rate": 0.0005613169201274819,
      "loss": 2.9551,
      "step": 37656
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.59660005569458,
      "learning_rate": 0.0005613149108885583,
      "loss": 3.1958,
      "step": 37657
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.068392038345337,
      "learning_rate": 0.0005613129016010515,
      "loss": 2.9811,
      "step": 37658
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.173072576522827,
      "learning_rate": 0.0005613108922649617,
      "loss": 2.6746,
      "step": 37659
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5393646955490112,
      "learning_rate": 0.0005613088828802891,
      "loss": 3.0324,
      "step": 37660
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.478000521659851,
      "learning_rate": 0.0005613068734470344,
      "loss": 3.1828,
      "step": 37661
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5642443895339966,
      "learning_rate": 0.0005613048639651978,
      "loss": 3.0076,
      "step": 37662
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.551358938217163,
      "learning_rate": 0.0005613028544347795,
      "loss": 2.6043,
      "step": 37663
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.60528564453125,
      "learning_rate": 0.0005613008448557803,
      "loss": 2.9371,
      "step": 37664
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6861534118652344,
      "learning_rate": 0.0005612988352282003,
      "loss": 2.9754,
      "step": 37665
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.456066608428955,
      "learning_rate": 0.0005612968255520399,
      "loss": 3.0455,
      "step": 37666
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6872272491455078,
      "learning_rate": 0.0005612948158272995,
      "loss": 3.0934,
      "step": 37667
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7596415281295776,
      "learning_rate": 0.0005612928060539794,
      "loss": 3.1391,
      "step": 37668
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.59456205368042,
      "learning_rate": 0.00056129079623208,
      "loss": 3.239,
      "step": 37669
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.482684850692749,
      "learning_rate": 0.0005612887863616019,
      "loss": 3.0403,
      "step": 37670
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7337967157363892,
      "learning_rate": 0.0005612867764425451,
      "loss": 2.9666,
      "step": 37671
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4376639127731323,
      "learning_rate": 0.0005612847664749102,
      "loss": 3.331,
      "step": 37672
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8010954856872559,
      "learning_rate": 0.0005612827564586975,
      "loss": 2.8934,
      "step": 37673
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.557403564453125,
      "learning_rate": 0.0005612807463939074,
      "loss": 2.9486,
      "step": 37674
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.371564269065857,
      "learning_rate": 0.0005612787362805405,
      "loss": 3.1399,
      "step": 37675
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4338970184326172,
      "learning_rate": 0.0005612767261185967,
      "loss": 3.1584,
      "step": 37676
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.5192971229553223,
      "learning_rate": 0.0005612747159080767,
      "loss": 3.1036,
      "step": 37677
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9156450033187866,
      "learning_rate": 0.0005612727056489808,
      "loss": 3.1788,
      "step": 37678
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9098471403121948,
      "learning_rate": 0.0005612706953413094,
      "loss": 3.2068,
      "step": 37679
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6614089012145996,
      "learning_rate": 0.0005612686849850628,
      "loss": 2.9261,
      "step": 37680
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4325859546661377,
      "learning_rate": 0.0005612666745802414,
      "loss": 2.9302,
      "step": 37681
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.462886095046997,
      "learning_rate": 0.0005612646641268457,
      "loss": 3.0914,
      "step": 37682
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5638015270233154,
      "learning_rate": 0.0005612626536248758,
      "loss": 3.1063,
      "step": 37683
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.61961030960083,
      "learning_rate": 0.0005612606430743323,
      "loss": 3.1343,
      "step": 37684
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5184123516082764,
      "learning_rate": 0.0005612586324752156,
      "loss": 3.0255,
      "step": 37685
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4857292175292969,
      "learning_rate": 0.0005612566218275259,
      "loss": 3.0781,
      "step": 37686
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4576001167297363,
      "learning_rate": 0.0005612546111312638,
      "loss": 3.0342,
      "step": 37687
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7757750749588013,
      "learning_rate": 0.0005612526003864294,
      "loss": 2.8965,
      "step": 37688
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4086250066757202,
      "learning_rate": 0.0005612505895930232,
      "loss": 2.7937,
      "step": 37689
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9364312887191772,
      "learning_rate": 0.0005612485787510457,
      "loss": 2.9323,
      "step": 37690
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5055466890335083,
      "learning_rate": 0.0005612465678604971,
      "loss": 3.2072,
      "step": 37691
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5888487100601196,
      "learning_rate": 0.0005612445569213777,
      "loss": 3.2967,
      "step": 37692
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4398648738861084,
      "learning_rate": 0.0005612425459336882,
      "loss": 3.1271,
      "step": 37693
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.437851071357727,
      "learning_rate": 0.0005612405348974287,
      "loss": 3.1357,
      "step": 37694
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6914641857147217,
      "learning_rate": 0.0005612385238125997,
      "loss": 2.8765,
      "step": 37695
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6337380409240723,
      "learning_rate": 0.0005612365126792014,
      "loss": 2.8401,
      "step": 37696
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5084019899368286,
      "learning_rate": 0.0005612345014972344,
      "loss": 3.2349,
      "step": 37697
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.025117874145508,
      "learning_rate": 0.000561232490266699,
      "loss": 2.8703,
      "step": 37698
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7398979663848877,
      "learning_rate": 0.0005612304789875955,
      "loss": 2.8894,
      "step": 37699
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6179487705230713,
      "learning_rate": 0.0005612284676599244,
      "loss": 3.1291,
      "step": 37700
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.0787103176116943,
      "learning_rate": 0.0005612264562836859,
      "loss": 2.9146,
      "step": 37701
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6563035249710083,
      "learning_rate": 0.0005612244448588806,
      "loss": 2.9523,
      "step": 37702
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4699159860610962,
      "learning_rate": 0.0005612224333855085,
      "loss": 3.1013,
      "step": 37703
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6848167181015015,
      "learning_rate": 0.0005612204218635704,
      "loss": 3.0621,
      "step": 37704
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3661789894104004,
      "learning_rate": 0.0005612184102930665,
      "loss": 3.2544,
      "step": 37705
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2848972082138062,
      "learning_rate": 0.000561216398673997,
      "loss": 3.0415,
      "step": 37706
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.779829740524292,
      "learning_rate": 0.0005612143870063626,
      "loss": 3.1423,
      "step": 37707
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.574962854385376,
      "learning_rate": 0.0005612123752901635,
      "loss": 3.0328,
      "step": 37708
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5881930589675903,
      "learning_rate": 0.0005612103635254,
      "loss": 3.1847,
      "step": 37709
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4318636655807495,
      "learning_rate": 0.0005612083517120728,
      "loss": 2.9584,
      "step": 37710
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5175261497497559,
      "learning_rate": 0.0005612063398501818,
      "loss": 2.9397,
      "step": 37711
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.370758295059204,
      "learning_rate": 0.0005612043279397277,
      "loss": 3.1959,
      "step": 37712
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2997115850448608,
      "learning_rate": 0.0005612023159807108,
      "loss": 2.9336,
      "step": 37713
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7698088884353638,
      "learning_rate": 0.0005612003039731314,
      "loss": 3.0701,
      "step": 37714
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5464555025100708,
      "learning_rate": 0.0005611982919169899,
      "loss": 3.0635,
      "step": 37715
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.824424147605896,
      "learning_rate": 0.0005611962798122868,
      "loss": 3.1033,
      "step": 37716
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5274120569229126,
      "learning_rate": 0.0005611942676590223,
      "loss": 3.1155,
      "step": 37717
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5837221145629883,
      "learning_rate": 0.0005611922554571969,
      "loss": 3.1756,
      "step": 37718
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.0870392322540283,
      "learning_rate": 0.000561190243206811,
      "loss": 3.1253,
      "step": 37719
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.5228164196014404,
      "learning_rate": 0.0005611882309078648,
      "loss": 3.0965,
      "step": 37720
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4595832824707031,
      "learning_rate": 0.0005611862185603587,
      "loss": 3.1102,
      "step": 37721
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.723586916923523,
      "learning_rate": 0.0005611842061642934,
      "loss": 3.0279,
      "step": 37722
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7237226963043213,
      "learning_rate": 0.0005611821937196688,
      "loss": 3.228,
      "step": 37723
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7479671239852905,
      "learning_rate": 0.0005611801812264856,
      "loss": 2.9309,
      "step": 37724
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.319883942604065,
      "learning_rate": 0.0005611781686847441,
      "loss": 2.9973,
      "step": 37725
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.2643978595733643,
      "learning_rate": 0.0005611761560944446,
      "loss": 2.8068,
      "step": 37726
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.70272696018219,
      "learning_rate": 0.0005611741434555876,
      "loss": 2.9711,
      "step": 37727
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5669664144515991,
      "learning_rate": 0.0005611721307681733,
      "loss": 3.0013,
      "step": 37728
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.890736699104309,
      "learning_rate": 0.0005611701180322022,
      "loss": 2.9196,
      "step": 37729
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8224796056747437,
      "learning_rate": 0.0005611681052476746,
      "loss": 2.9188,
      "step": 37730
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7347568273544312,
      "learning_rate": 0.0005611660924145909,
      "loss": 2.9166,
      "step": 37731
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.0677897930145264,
      "learning_rate": 0.0005611640795329517,
      "loss": 3.3086,
      "step": 37732
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.8267598152160645,
      "learning_rate": 0.000561162066602757,
      "loss": 3.1012,
      "step": 37733
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5378751754760742,
      "learning_rate": 0.0005611600536240074,
      "loss": 3.0955,
      "step": 37734
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8871276378631592,
      "learning_rate": 0.0005611580405967032,
      "loss": 2.8493,
      "step": 37735
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.740040898323059,
      "learning_rate": 0.0005611560275208447,
      "loss": 3.1968,
      "step": 37736
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4364266395568848,
      "learning_rate": 0.0005611540143964324,
      "loss": 3.2028,
      "step": 37737
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.7219152450561523,
      "learning_rate": 0.0005611520012234667,
      "loss": 2.9445,
      "step": 37738
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7011414766311646,
      "learning_rate": 0.0005611499880019479,
      "loss": 2.9822,
      "step": 37739
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7677565813064575,
      "learning_rate": 0.0005611479747318764,
      "loss": 3.2326,
      "step": 37740
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.765623927116394,
      "learning_rate": 0.0005611459614132526,
      "loss": 3.1946,
      "step": 37741
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9336662292480469,
      "learning_rate": 0.0005611439480460767,
      "loss": 3.1486,
      "step": 37742
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.579390048980713,
      "learning_rate": 0.0005611419346303493,
      "loss": 3.1645,
      "step": 37743
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3705768585205078,
      "learning_rate": 0.0005611399211660707,
      "loss": 2.9767,
      "step": 37744
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6862058639526367,
      "learning_rate": 0.0005611379076532412,
      "loss": 3.0549,
      "step": 37745
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5855293273925781,
      "learning_rate": 0.0005611358940918612,
      "loss": 2.7733,
      "step": 37746
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5812010765075684,
      "learning_rate": 0.0005611338804819313,
      "loss": 3.1321,
      "step": 37747
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7342207431793213,
      "learning_rate": 0.0005611318668234514,
      "loss": 3.0368,
      "step": 37748
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4687447547912598,
      "learning_rate": 0.0005611298531164224,
      "loss": 3.0064,
      "step": 37749
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9393020868301392,
      "learning_rate": 0.0005611278393608443,
      "loss": 3.3,
      "step": 37750
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4278346300125122,
      "learning_rate": 0.0005611258255567177,
      "loss": 3.0625,
      "step": 37751
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9567114114761353,
      "learning_rate": 0.0005611238117040428,
      "loss": 3.1693,
      "step": 37752
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.479852557182312,
      "learning_rate": 0.0005611217978028202,
      "loss": 2.9877,
      "step": 37753
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.2333343029022217,
      "learning_rate": 0.0005611197838530499,
      "loss": 2.9157,
      "step": 37754
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3598335981369019,
      "learning_rate": 0.0005611177698547327,
      "loss": 3.0725,
      "step": 37755
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2758733034133911,
      "learning_rate": 0.0005611157558078686,
      "loss": 2.9819,
      "step": 37756
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.649937391281128,
      "learning_rate": 0.0005611137417124583,
      "loss": 3.0496,
      "step": 37757
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6955090761184692,
      "learning_rate": 0.0005611117275685019,
      "loss": 3.0373,
      "step": 37758
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7767596244812012,
      "learning_rate": 0.000561109713376,
      "loss": 2.9717,
      "step": 37759
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.623875856399536,
      "learning_rate": 0.0005611076991349529,
      "loss": 3.1705,
      "step": 37760
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5936700105667114,
      "learning_rate": 0.0005611056848453609,
      "loss": 3.0879,
      "step": 37761
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.52541184425354,
      "learning_rate": 0.0005611036705072244,
      "loss": 3.1451,
      "step": 37762
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.109311819076538,
      "learning_rate": 0.0005611016561205438,
      "loss": 3.0762,
      "step": 37763
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7563222646713257,
      "learning_rate": 0.0005610996416853195,
      "loss": 3.3298,
      "step": 37764
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6305803060531616,
      "learning_rate": 0.0005610976272015519,
      "loss": 3.2679,
      "step": 37765
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.177760601043701,
      "learning_rate": 0.0005610956126692412,
      "loss": 3.1463,
      "step": 37766
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.6363584995269775,
      "learning_rate": 0.000561093598088388,
      "loss": 2.9995,
      "step": 37767
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7086855173110962,
      "learning_rate": 0.0005610915834589925,
      "loss": 3.1278,
      "step": 37768
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.2673027515411377,
      "learning_rate": 0.0005610895687810552,
      "loss": 2.9926,
      "step": 37769
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3866392374038696,
      "learning_rate": 0.0005610875540545764,
      "loss": 3.0193,
      "step": 37770
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.437264323234558,
      "learning_rate": 0.0005610855392795565,
      "loss": 3.0217,
      "step": 37771
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6104309558868408,
      "learning_rate": 0.0005610835244559959,
      "loss": 3.2244,
      "step": 37772
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7675507068634033,
      "learning_rate": 0.0005610815095838949,
      "loss": 3.2515,
      "step": 37773
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7727470397949219,
      "learning_rate": 0.000561079494663254,
      "loss": 2.9994,
      "step": 37774
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6305609941482544,
      "learning_rate": 0.0005610774796940733,
      "loss": 3.1437,
      "step": 37775
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.1386054754257202,
      "learning_rate": 0.0005610754646763535,
      "loss": 2.9431,
      "step": 37776
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6126891374588013,
      "learning_rate": 0.0005610734496100949,
      "loss": 2.888,
      "step": 37777
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.584702491760254,
      "learning_rate": 0.0005610714344952976,
      "loss": 3.1345,
      "step": 37778
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9671601057052612,
      "learning_rate": 0.0005610694193319624,
      "loss": 3.1112,
      "step": 37779
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.0549237728118896,
      "learning_rate": 0.0005610674041200894,
      "loss": 3.0358,
      "step": 37780
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3082367181777954,
      "learning_rate": 0.0005610653888596792,
      "loss": 3.3093,
      "step": 37781
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2833505868911743,
      "learning_rate": 0.0005610633735507318,
      "loss": 3.1523,
      "step": 37782
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5330383777618408,
      "learning_rate": 0.0005610613581932478,
      "loss": 3.2223,
      "step": 37783
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9055001735687256,
      "learning_rate": 0.0005610593427872277,
      "loss": 3.0681,
      "step": 37784
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6016021966934204,
      "learning_rate": 0.0005610573273326716,
      "loss": 3.0908,
      "step": 37785
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.536041498184204,
      "learning_rate": 0.0005610553118295801,
      "loss": 2.8496,
      "step": 37786
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7946414947509766,
      "learning_rate": 0.0005610532962779534,
      "loss": 3.0896,
      "step": 37787
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4596869945526123,
      "learning_rate": 0.0005610512806777921,
      "loss": 3.0463,
      "step": 37788
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3554437160491943,
      "learning_rate": 0.0005610492650290964,
      "loss": 3.2348,
      "step": 37789
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5881338119506836,
      "learning_rate": 0.0005610472493318666,
      "loss": 3.0006,
      "step": 37790
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.0313472747802734,
      "learning_rate": 0.0005610452335861033,
      "loss": 3.1179,
      "step": 37791
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4134843349456787,
      "learning_rate": 0.0005610432177918068,
      "loss": 3.0106,
      "step": 37792
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4071931838989258,
      "learning_rate": 0.0005610412019489774,
      "loss": 3.1797,
      "step": 37793
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5613172054290771,
      "learning_rate": 0.0005610391860576154,
      "loss": 3.1168,
      "step": 37794
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8494031429290771,
      "learning_rate": 0.0005610371701177215,
      "loss": 3.2088,
      "step": 37795
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5883550643920898,
      "learning_rate": 0.0005610351541292957,
      "loss": 2.7734,
      "step": 37796
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5686688423156738,
      "learning_rate": 0.0005610331380923385,
      "loss": 3.034,
      "step": 37797
    },
    {
      "epoch": 0.49,
      "grad_norm": 3.081482172012329,
      "learning_rate": 0.0005610311220068504,
      "loss": 2.8587,
      "step": 37798
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6859411001205444,
      "learning_rate": 0.0005610291058728318,
      "loss": 3.0363,
      "step": 37799
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.389320731163025,
      "learning_rate": 0.0005610270896902827,
      "loss": 3.2541,
      "step": 37800
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.632288932800293,
      "learning_rate": 0.0005610250734592039,
      "loss": 3.0059,
      "step": 37801
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.4658591747283936,
      "learning_rate": 0.0005610230571795956,
      "loss": 2.8342,
      "step": 37802
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.791996717453003,
      "learning_rate": 0.0005610210408514582,
      "loss": 3.0495,
      "step": 37803
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.6977336406707764,
      "learning_rate": 0.000561019024474792,
      "loss": 2.8093,
      "step": 37804
    },
    {
      "epoch": 0.49,
      "grad_norm": 3.807616949081421,
      "learning_rate": 0.0005610170080495975,
      "loss": 2.9359,
      "step": 37805
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.390798330307007,
      "learning_rate": 0.000561014991575875,
      "loss": 2.8816,
      "step": 37806
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.557404637336731,
      "learning_rate": 0.0005610129750536248,
      "loss": 3.3015,
      "step": 37807
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5411056280136108,
      "learning_rate": 0.0005610109584828476,
      "loss": 3.1414,
      "step": 37808
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.819885730743408,
      "learning_rate": 0.0005610089418635433,
      "loss": 3.0966,
      "step": 37809
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.320955753326416,
      "learning_rate": 0.0005610069251957125,
      "loss": 3.0455,
      "step": 37810
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.02813982963562,
      "learning_rate": 0.0005610049084793558,
      "loss": 3.0795,
      "step": 37811
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7211387157440186,
      "learning_rate": 0.0005610028917144731,
      "loss": 3.2642,
      "step": 37812
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7460682392120361,
      "learning_rate": 0.0005610008749010652,
      "loss": 3.0004,
      "step": 37813
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4926390647888184,
      "learning_rate": 0.0005609988580391322,
      "loss": 3.2765,
      "step": 37814
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6613140106201172,
      "learning_rate": 0.0005609968411286747,
      "loss": 3.0021,
      "step": 37815
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.629738688468933,
      "learning_rate": 0.000560994824169693,
      "loss": 2.8048,
      "step": 37816
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6366995573043823,
      "learning_rate": 0.0005609928071621873,
      "loss": 3.1462,
      "step": 37817
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6195908784866333,
      "learning_rate": 0.0005609907901061582,
      "loss": 2.898,
      "step": 37818
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.012601375579834,
      "learning_rate": 0.0005609887730016059,
      "loss": 2.9947,
      "step": 37819
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.393186330795288,
      "learning_rate": 0.0005609867558485309,
      "loss": 3.1146,
      "step": 37820
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.0716986656188965,
      "learning_rate": 0.0005609847386469335,
      "loss": 2.918,
      "step": 37821
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.000439405441284,
      "learning_rate": 0.0005609827213968143,
      "loss": 3.1297,
      "step": 37822
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3839459419250488,
      "learning_rate": 0.0005609807040981732,
      "loss": 3.0299,
      "step": 37823
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6490026712417603,
      "learning_rate": 0.000560978686751011,
      "loss": 2.8568,
      "step": 37824
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6116809844970703,
      "learning_rate": 0.000560976669355328,
      "loss": 3.3055,
      "step": 37825
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.984410285949707,
      "learning_rate": 0.0005609746519111245,
      "loss": 2.9568,
      "step": 37826
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7167679071426392,
      "learning_rate": 0.0005609726344184007,
      "loss": 3.2581,
      "step": 37827
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5502514839172363,
      "learning_rate": 0.0005609706168771574,
      "loss": 3.152,
      "step": 37828
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5378376245498657,
      "learning_rate": 0.0005609685992873946,
      "loss": 3.0235,
      "step": 37829
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.742231011390686,
      "learning_rate": 0.0005609665816491129,
      "loss": 2.9403,
      "step": 37830
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5907201766967773,
      "learning_rate": 0.0005609645639623125,
      "loss": 2.976,
      "step": 37831
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6910148859024048,
      "learning_rate": 0.0005609625462269938,
      "loss": 3.1396,
      "step": 37832
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7182180881500244,
      "learning_rate": 0.0005609605284431574,
      "loss": 2.9085,
      "step": 37833
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.15742564201355,
      "learning_rate": 0.0005609585106108034,
      "loss": 2.9326,
      "step": 37834
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7456670999526978,
      "learning_rate": 0.0005609564927299323,
      "loss": 2.9076,
      "step": 37835
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3508938550949097,
      "learning_rate": 0.0005609544748005446,
      "loss": 3.0711,
      "step": 37836
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3761796951293945,
      "learning_rate": 0.0005609524568226403,
      "loss": 3.1027,
      "step": 37837
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6586867570877075,
      "learning_rate": 0.0005609504387962202,
      "loss": 3.0119,
      "step": 37838
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3035938739776611,
      "learning_rate": 0.0005609484207212843,
      "loss": 3.1414,
      "step": 37839
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5654009580612183,
      "learning_rate": 0.0005609464025978333,
      "loss": 3.1747,
      "step": 37840
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.765623688697815,
      "learning_rate": 0.0005609443844258675,
      "loss": 2.8773,
      "step": 37841
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.687087893486023,
      "learning_rate": 0.0005609423662053871,
      "loss": 2.7388,
      "step": 37842
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.930044412612915,
      "learning_rate": 0.0005609403479363927,
      "loss": 3.1321,
      "step": 37843
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9614078998565674,
      "learning_rate": 0.0005609383296188845,
      "loss": 2.9488,
      "step": 37844
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8566235303878784,
      "learning_rate": 0.0005609363112528629,
      "loss": 3.0186,
      "step": 37845
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6473584175109863,
      "learning_rate": 0.0005609342928383284,
      "loss": 2.9537,
      "step": 37846
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9123880863189697,
      "learning_rate": 0.0005609322743752813,
      "loss": 3.1348,
      "step": 37847
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.371341347694397,
      "learning_rate": 0.0005609302558637218,
      "loss": 2.935,
      "step": 37848
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4673640727996826,
      "learning_rate": 0.0005609282373036505,
      "loss": 2.9455,
      "step": 37849
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.1254658699035645,
      "learning_rate": 0.0005609262186950677,
      "loss": 3.1253,
      "step": 37850
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.1946284770965576,
      "learning_rate": 0.0005609242000379739,
      "loss": 2.9817,
      "step": 37851
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5613658428192139,
      "learning_rate": 0.0005609221813323693,
      "loss": 3.1912,
      "step": 37852
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8359196186065674,
      "learning_rate": 0.0005609201625782544,
      "loss": 3.0232,
      "step": 37853
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7369410991668701,
      "learning_rate": 0.0005609181437756294,
      "loss": 3.2951,
      "step": 37854
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.047898769378662,
      "learning_rate": 0.0005609161249244949,
      "loss": 2.9609,
      "step": 37855
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6995282173156738,
      "learning_rate": 0.0005609141060248511,
      "loss": 3.0942,
      "step": 37856
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.124831199645996,
      "learning_rate": 0.0005609120870766985,
      "loss": 3.1621,
      "step": 37857
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3192832469940186,
      "learning_rate": 0.0005609100680800375,
      "loss": 2.7716,
      "step": 37858
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.435187816619873,
      "learning_rate": 0.0005609080490348681,
      "loss": 2.7779,
      "step": 37859
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8690321445465088,
      "learning_rate": 0.0005609060299411912,
      "loss": 3.106,
      "step": 37860
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7462708950042725,
      "learning_rate": 0.000560904010799007,
      "loss": 2.9152,
      "step": 37861
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2935818433761597,
      "learning_rate": 0.0005609019916083156,
      "loss": 2.865,
      "step": 37862
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.953704595565796,
      "learning_rate": 0.0005608999723691177,
      "loss": 3.1538,
      "step": 37863
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9761571884155273,
      "learning_rate": 0.0005608979530814137,
      "loss": 2.9413,
      "step": 37864
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5214715003967285,
      "learning_rate": 0.0005608959337452036,
      "loss": 2.9027,
      "step": 37865
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5292681455612183,
      "learning_rate": 0.0005608939143604882,
      "loss": 3.0661,
      "step": 37866
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.330157995223999,
      "learning_rate": 0.0005608918949272676,
      "loss": 2.8508,
      "step": 37867
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3259230852127075,
      "learning_rate": 0.0005608898754455423,
      "loss": 3.0714,
      "step": 37868
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.0158984661102295,
      "learning_rate": 0.0005608878559153126,
      "loss": 2.9998,
      "step": 37869
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8799610137939453,
      "learning_rate": 0.0005608858363365791,
      "loss": 2.8548,
      "step": 37870
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5861691236495972,
      "learning_rate": 0.0005608838167093418,
      "loss": 3.0115,
      "step": 37871
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4711965322494507,
      "learning_rate": 0.0005608817970336014,
      "loss": 3.0082,
      "step": 37872
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6244210004806519,
      "learning_rate": 0.0005608797773093581,
      "loss": 3.0578,
      "step": 37873
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6951526403427124,
      "learning_rate": 0.0005608777575366123,
      "loss": 2.999,
      "step": 37874
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4487993717193604,
      "learning_rate": 0.0005608757377153644,
      "loss": 3.1997,
      "step": 37875
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4420831203460693,
      "learning_rate": 0.0005608737178456149,
      "loss": 3.1778,
      "step": 37876
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2744476795196533,
      "learning_rate": 0.000560871697927364,
      "loss": 3.1304,
      "step": 37877
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.463555097579956,
      "learning_rate": 0.000560869677960612,
      "loss": 3.0388,
      "step": 37878
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6273093223571777,
      "learning_rate": 0.0005608676579453595,
      "loss": 3.1458,
      "step": 37879
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9238836765289307,
      "learning_rate": 0.0005608656378816068,
      "loss": 3.2606,
      "step": 37880
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4851394891738892,
      "learning_rate": 0.0005608636177693542,
      "loss": 2.9479,
      "step": 37881
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5796223878860474,
      "learning_rate": 0.0005608615976086021,
      "loss": 3.1939,
      "step": 37882
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7105947732925415,
      "learning_rate": 0.000560859577399351,
      "loss": 2.9284,
      "step": 37883
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.997847557067871,
      "learning_rate": 0.0005608575571416011,
      "loss": 3.0561,
      "step": 37884
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.49271821975708,
      "learning_rate": 0.0005608555368353529,
      "loss": 3.0772,
      "step": 37885
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9613487720489502,
      "learning_rate": 0.0005608535164806068,
      "loss": 3.1098,
      "step": 37886
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.028038263320923,
      "learning_rate": 0.000560851496077363,
      "loss": 2.9277,
      "step": 37887
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.8787338733673096,
      "learning_rate": 0.000560849475625622,
      "loss": 3.1806,
      "step": 37888
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.959415316581726,
      "learning_rate": 0.0005608474551253842,
      "loss": 2.7979,
      "step": 37889
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6369893550872803,
      "learning_rate": 0.0005608454345766499,
      "loss": 2.9324,
      "step": 37890
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9743266105651855,
      "learning_rate": 0.0005608434139794194,
      "loss": 3.0276,
      "step": 37891
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.211355209350586,
      "learning_rate": 0.0005608413933336934,
      "loss": 3.1442,
      "step": 37892
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5335322618484497,
      "learning_rate": 0.000560839372639472,
      "loss": 3.2012,
      "step": 37893
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8089054822921753,
      "learning_rate": 0.0005608373518967556,
      "loss": 2.9754,
      "step": 37894
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.938353419303894,
      "learning_rate": 0.0005608353311055445,
      "loss": 2.9182,
      "step": 37895
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5616706609725952,
      "learning_rate": 0.0005608333102658393,
      "loss": 3.1025,
      "step": 37896
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4287375211715698,
      "learning_rate": 0.0005608312893776404,
      "loss": 3.2395,
      "step": 37897
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5175310373306274,
      "learning_rate": 0.0005608292684409478,
      "loss": 3.2289,
      "step": 37898
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7212812900543213,
      "learning_rate": 0.0005608272474557622,
      "loss": 3.1013,
      "step": 37899
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.491245985031128,
      "learning_rate": 0.0005608252264220839,
      "loss": 3.2078,
      "step": 37900
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4050830602645874,
      "learning_rate": 0.0005608232053399133,
      "loss": 3.0281,
      "step": 37901
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8808799982070923,
      "learning_rate": 0.0005608211842092507,
      "loss": 3.3008,
      "step": 37902
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6766109466552734,
      "learning_rate": 0.0005608191630300965,
      "loss": 3.0352,
      "step": 37903
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7343658208847046,
      "learning_rate": 0.0005608171418024512,
      "loss": 2.9812,
      "step": 37904
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2526028156280518,
      "learning_rate": 0.000560815120526315,
      "loss": 3.1049,
      "step": 37905
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3312132358551025,
      "learning_rate": 0.0005608130992016884,
      "loss": 3.1622,
      "step": 37906
    },
    {
      "epoch": 0.49,
      "grad_norm": 3.523507595062256,
      "learning_rate": 0.0005608110778285716,
      "loss": 2.8525,
      "step": 37907
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6878749132156372,
      "learning_rate": 0.0005608090564069651,
      "loss": 3.0138,
      "step": 37908
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.571656346321106,
      "learning_rate": 0.0005608070349368695,
      "loss": 2.899,
      "step": 37909
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9360833168029785,
      "learning_rate": 0.0005608050134182848,
      "loss": 3.2486,
      "step": 37910
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.47402024269104,
      "learning_rate": 0.0005608029918512114,
      "loss": 3.2715,
      "step": 37911
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.363580584526062,
      "learning_rate": 0.0005608009702356499,
      "loss": 3.2038,
      "step": 37912
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.180817127227783,
      "learning_rate": 0.0005607989485716005,
      "loss": 3.0453,
      "step": 37913
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.0313308238983154,
      "learning_rate": 0.0005607969268590639,
      "loss": 3.0409,
      "step": 37914
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.6683876514434814,
      "learning_rate": 0.0005607949050980401,
      "loss": 3.0353,
      "step": 37915
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5360980033874512,
      "learning_rate": 0.0005607928832885295,
      "loss": 3.175,
      "step": 37916
    },
    {
      "epoch": 0.49,
      "grad_norm": 3.3477063179016113,
      "learning_rate": 0.0005607908614305327,
      "loss": 2.9543,
      "step": 37917
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.1626601219177246,
      "learning_rate": 0.0005607888395240499,
      "loss": 3.3534,
      "step": 37918
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.582498073577881,
      "learning_rate": 0.0005607868175690815,
      "loss": 3.0089,
      "step": 37919
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7504411935806274,
      "learning_rate": 0.000560784795565628,
      "loss": 3.1837,
      "step": 37920
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.3210861682891846,
      "learning_rate": 0.0005607827735136896,
      "loss": 2.9993,
      "step": 37921
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.759591817855835,
      "learning_rate": 0.0005607807514132668,
      "loss": 3.2067,
      "step": 37922
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.569359540939331,
      "learning_rate": 0.00056077872926436,
      "loss": 2.8982,
      "step": 37923
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6239805221557617,
      "learning_rate": 0.0005607767070669695,
      "loss": 2.9361,
      "step": 37924
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4846915006637573,
      "learning_rate": 0.0005607746848210956,
      "loss": 3.0437,
      "step": 37925
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6037260293960571,
      "learning_rate": 0.0005607726625267387,
      "loss": 2.9364,
      "step": 37926
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4978716373443604,
      "learning_rate": 0.0005607706401838993,
      "loss": 2.9345,
      "step": 37927
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3335765600204468,
      "learning_rate": 0.0005607686177925778,
      "loss": 3.1252,
      "step": 37928
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.347912073135376,
      "learning_rate": 0.0005607665953527744,
      "loss": 2.9899,
      "step": 37929
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.28816819190979,
      "learning_rate": 0.0005607645728644897,
      "loss": 2.9493,
      "step": 37930
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2887319326400757,
      "learning_rate": 0.0005607625503277238,
      "loss": 3.2583,
      "step": 37931
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5385209321975708,
      "learning_rate": 0.0005607605277424773,
      "loss": 2.9132,
      "step": 37932
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8003218173980713,
      "learning_rate": 0.0005607585051087505,
      "loss": 2.8937,
      "step": 37933
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7611756324768066,
      "learning_rate": 0.0005607564824265437,
      "loss": 2.812,
      "step": 37934
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.736555576324463,
      "learning_rate": 0.0005607544596958573,
      "loss": 3.0812,
      "step": 37935
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7954133749008179,
      "learning_rate": 0.0005607524369166918,
      "loss": 3.0321,
      "step": 37936
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5708110332489014,
      "learning_rate": 0.0005607504140890476,
      "loss": 3.027,
      "step": 37937
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2487454414367676,
      "learning_rate": 0.0005607483912129249,
      "loss": 2.969,
      "step": 37938
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6455076932907104,
      "learning_rate": 0.0005607463682883242,
      "loss": 3.1484,
      "step": 37939
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5302858352661133,
      "learning_rate": 0.0005607443453152456,
      "loss": 3.1006,
      "step": 37940
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4324572086334229,
      "learning_rate": 0.0005607423222936899,
      "loss": 3.2952,
      "step": 37941
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.379421353340149,
      "learning_rate": 0.0005607402992236574,
      "loss": 2.9995,
      "step": 37942
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5218454599380493,
      "learning_rate": 0.000560738276105148,
      "loss": 2.9642,
      "step": 37943
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.606365442276001,
      "learning_rate": 0.0005607362529381628,
      "loss": 3.2033,
      "step": 37944
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.4243931770324707,
      "learning_rate": 0.0005607342297227016,
      "loss": 2.7758,
      "step": 37945
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3332746028900146,
      "learning_rate": 0.000560732206458765,
      "loss": 2.874,
      "step": 37946
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5930297374725342,
      "learning_rate": 0.0005607301831463535,
      "loss": 3.1062,
      "step": 37947
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8561331033706665,
      "learning_rate": 0.0005607281597854672,
      "loss": 3.1018,
      "step": 37948
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.3649489879608154,
      "learning_rate": 0.0005607261363761066,
      "loss": 2.8337,
      "step": 37949
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9315487146377563,
      "learning_rate": 0.0005607241129182721,
      "loss": 3.0597,
      "step": 37950
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6781431436538696,
      "learning_rate": 0.0005607220894119641,
      "loss": 3.1691,
      "step": 37951
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4750616550445557,
      "learning_rate": 0.000560720065857183,
      "loss": 3.0178,
      "step": 37952
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4363799095153809,
      "learning_rate": 0.0005607180422539291,
      "loss": 2.9932,
      "step": 37953
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.29903244972229,
      "learning_rate": 0.0005607160186022027,
      "loss": 2.8497,
      "step": 37954
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7847974300384521,
      "learning_rate": 0.0005607139949020043,
      "loss": 3.1025,
      "step": 37955
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4114997386932373,
      "learning_rate": 0.0005607119711533344,
      "loss": 3.2043,
      "step": 37956
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.2511773109436035,
      "learning_rate": 0.000560709947356193,
      "loss": 2.9376,
      "step": 37957
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.445634126663208,
      "learning_rate": 0.0005607079235105809,
      "loss": 2.8922,
      "step": 37958
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.344985842704773,
      "learning_rate": 0.0005607058996164982,
      "loss": 2.7918,
      "step": 37959
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6100976467132568,
      "learning_rate": 0.0005607038756739452,
      "loss": 3.0416,
      "step": 37960
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.0464346408843994,
      "learning_rate": 0.0005607018516829226,
      "loss": 3.0204,
      "step": 37961
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5099074840545654,
      "learning_rate": 0.0005606998276434306,
      "loss": 3.2454,
      "step": 37962
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.82356858253479,
      "learning_rate": 0.0005606978035554695,
      "loss": 3.0542,
      "step": 37963
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.130347967147827,
      "learning_rate": 0.0005606957794190398,
      "loss": 2.9332,
      "step": 37964
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8030434846878052,
      "learning_rate": 0.0005606937552341418,
      "loss": 3.0301,
      "step": 37965
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6461572647094727,
      "learning_rate": 0.000560691731000776,
      "loss": 3.0219,
      "step": 37966
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.783039927482605,
      "learning_rate": 0.0005606897067189425,
      "loss": 2.9173,
      "step": 37967
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.040975570678711,
      "learning_rate": 0.000560687682388642,
      "loss": 3.0942,
      "step": 37968
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6669068336486816,
      "learning_rate": 0.0005606856580098748,
      "loss": 3.0076,
      "step": 37969
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5852514505386353,
      "learning_rate": 0.0005606836335826412,
      "loss": 3.0761,
      "step": 37970
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3852784633636475,
      "learning_rate": 0.0005606816091069415,
      "loss": 3.063,
      "step": 37971
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.746039628982544,
      "learning_rate": 0.0005606795845827762,
      "loss": 2.8351,
      "step": 37972
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3768774271011353,
      "learning_rate": 0.0005606775600101456,
      "loss": 3.0928,
      "step": 37973
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.809252381324768,
      "learning_rate": 0.0005606755353890502,
      "loss": 3.1065,
      "step": 37974
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6432485580444336,
      "learning_rate": 0.0005606735107194903,
      "loss": 3.1524,
      "step": 37975
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9622470140457153,
      "learning_rate": 0.0005606714860014663,
      "loss": 3.3193,
      "step": 37976
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.546978235244751,
      "learning_rate": 0.0005606694612349785,
      "loss": 3.1188,
      "step": 37977
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9460920095443726,
      "learning_rate": 0.0005606674364200272,
      "loss": 3.2169,
      "step": 37978
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5710906982421875,
      "learning_rate": 0.000560665411556613,
      "loss": 3.0646,
      "step": 37979
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6383402347564697,
      "learning_rate": 0.0005606633866447363,
      "loss": 3.0761,
      "step": 37980
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6284550428390503,
      "learning_rate": 0.0005606613616843972,
      "loss": 2.9701,
      "step": 37981
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.383509635925293,
      "learning_rate": 0.0005606593366755962,
      "loss": 2.8695,
      "step": 37982
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6124300956726074,
      "learning_rate": 0.0005606573116183339,
      "loss": 3.1834,
      "step": 37983
    },
    {
      "epoch": 0.49,
      "grad_norm": 2.113758087158203,
      "learning_rate": 0.0005606552865126103,
      "loss": 3.013,
      "step": 37984
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.469049334526062,
      "learning_rate": 0.000560653261358426,
      "loss": 2.9832,
      "step": 37985
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6450896263122559,
      "learning_rate": 0.0005606512361557815,
      "loss": 2.9832,
      "step": 37986
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8635226488113403,
      "learning_rate": 0.0005606492109046768,
      "loss": 3.1164,
      "step": 37987
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.521510124206543,
      "learning_rate": 0.0005606471856051127,
      "loss": 3.279,
      "step": 37988
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.9645401239395142,
      "learning_rate": 0.0005606451602570892,
      "loss": 3.1213,
      "step": 37989
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5436631441116333,
      "learning_rate": 0.0005606431348606069,
      "loss": 3.1724,
      "step": 37990
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5141781568527222,
      "learning_rate": 0.0005606411094156662,
      "loss": 3.1004,
      "step": 37991
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.362228512763977,
      "learning_rate": 0.0005606390839222673,
      "loss": 3.1988,
      "step": 37992
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5399128198623657,
      "learning_rate": 0.0005606370583804106,
      "loss": 2.9532,
      "step": 37993
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6118429899215698,
      "learning_rate": 0.0005606350327900967,
      "loss": 3.2961,
      "step": 37994
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.3415114879608154,
      "learning_rate": 0.0005606330071513258,
      "loss": 3.368,
      "step": 37995
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.505658507347107,
      "learning_rate": 0.0005606309814640983,
      "loss": 2.8899,
      "step": 37996
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4893338680267334,
      "learning_rate": 0.0005606289557284145,
      "loss": 2.8997,
      "step": 37997
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8529095649719238,
      "learning_rate": 0.000560626929944275,
      "loss": 3.136,
      "step": 37998
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.30534827709198,
      "learning_rate": 0.0005606249041116799,
      "loss": 2.7899,
      "step": 37999
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8701423406600952,
      "learning_rate": 0.0005606228782306297,
      "loss": 3.0312,
      "step": 38000
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7035996913909912,
      "learning_rate": 0.0005606208523011249,
      "loss": 3.2027,
      "step": 38001
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7122344970703125,
      "learning_rate": 0.0005606188263231657,
      "loss": 3.2023,
      "step": 38002
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5158194303512573,
      "learning_rate": 0.0005606168002967526,
      "loss": 3.1235,
      "step": 38003
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7723714113235474,
      "learning_rate": 0.0005606147742218858,
      "loss": 3.2003,
      "step": 38004
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6714473962783813,
      "learning_rate": 0.0005606127480985659,
      "loss": 3.04,
      "step": 38005
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5322688817977905,
      "learning_rate": 0.0005606107219267931,
      "loss": 3.2081,
      "step": 38006
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8078693151474,
      "learning_rate": 0.0005606086957065679,
      "loss": 3.0058,
      "step": 38007
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8621375560760498,
      "learning_rate": 0.0005606066694378907,
      "loss": 3.2058,
      "step": 38008
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5417383909225464,
      "learning_rate": 0.0005606046431207616,
      "loss": 2.9883,
      "step": 38009
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.7660119533538818,
      "learning_rate": 0.0005606026167551813,
      "loss": 3.2061,
      "step": 38010
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.5134512186050415,
      "learning_rate": 0.0005606005903411501,
      "loss": 3.0888,
      "step": 38011
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.4120557308197021,
      "learning_rate": 0.0005605985638786682,
      "loss": 3.1319,
      "step": 38012
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.8245820999145508,
      "learning_rate": 0.0005605965373677361,
      "loss": 3.2313,
      "step": 38013
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.439677119255066,
      "learning_rate": 0.0005605945108083544,
      "loss": 3.0994,
      "step": 38014
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6194573640823364,
      "learning_rate": 0.0005605924842005232,
      "loss": 2.9948,
      "step": 38015
    },
    {
      "epoch": 0.49,
      "grad_norm": 1.6797490119934082,
      "learning_rate": 0.0005605904575442428,
      "loss": 3.1273,
      "step": 38016
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0492403507232666,
      "learning_rate": 0.0005605884308395138,
      "loss": 3.2032,
      "step": 38017
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4076120853424072,
      "learning_rate": 0.0005605864040863364,
      "loss": 3.0097,
      "step": 38018
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6684157848358154,
      "learning_rate": 0.0005605843772847111,
      "loss": 3.3687,
      "step": 38019
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.122122049331665,
      "learning_rate": 0.0005605823504346382,
      "loss": 3.0349,
      "step": 38020
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8272128105163574,
      "learning_rate": 0.0005605803235361183,
      "loss": 3.0373,
      "step": 38021
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4474012851715088,
      "learning_rate": 0.0005605782965891515,
      "loss": 3.0636,
      "step": 38022
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3928147554397583,
      "learning_rate": 0.0005605762695937383,
      "loss": 2.8867,
      "step": 38023
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4062087535858154,
      "learning_rate": 0.000560574242549879,
      "loss": 3.1849,
      "step": 38024
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4802792072296143,
      "learning_rate": 0.000560572215457574,
      "loss": 3.1918,
      "step": 38025
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.573181390762329,
      "learning_rate": 0.0005605701883168239,
      "loss": 2.8909,
      "step": 38026
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0892741680145264,
      "learning_rate": 0.0005605681611276286,
      "loss": 3.0618,
      "step": 38027
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.483277440071106,
      "learning_rate": 0.000560566133889989,
      "loss": 2.9471,
      "step": 38028
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4260122776031494,
      "learning_rate": 0.0005605641066039051,
      "loss": 2.9819,
      "step": 38029
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1969401836395264,
      "learning_rate": 0.0005605620792693775,
      "loss": 3.0021,
      "step": 38030
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0593888759613037,
      "learning_rate": 0.0005605600518864065,
      "loss": 3.2567,
      "step": 38031
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.389521837234497,
      "learning_rate": 0.0005605580244549925,
      "loss": 2.9352,
      "step": 38032
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.3409504890441895,
      "learning_rate": 0.0005605559969751356,
      "loss": 3.318,
      "step": 38033
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0093841552734375,
      "learning_rate": 0.0005605539694468367,
      "loss": 2.745,
      "step": 38034
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.011070966720581,
      "learning_rate": 0.0005605519418700958,
      "loss": 3.0967,
      "step": 38035
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9336466789245605,
      "learning_rate": 0.0005605499142449133,
      "loss": 3.2801,
      "step": 38036
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.387343406677246,
      "learning_rate": 0.0005605478865712897,
      "loss": 2.99,
      "step": 38037
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.5876200199127197,
      "learning_rate": 0.0005605458588492254,
      "loss": 3.0821,
      "step": 38038
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.3117637634277344,
      "learning_rate": 0.0005605438310787206,
      "loss": 3.0384,
      "step": 38039
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.189417600631714,
      "learning_rate": 0.0005605418032597759,
      "loss": 3.3186,
      "step": 38040
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.0131916999816895,
      "learning_rate": 0.0005605397753923914,
      "loss": 2.8771,
      "step": 38041
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.2234463691711426,
      "learning_rate": 0.0005605377474765678,
      "loss": 3.0859,
      "step": 38042
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.384828805923462,
      "learning_rate": 0.0005605357195123052,
      "loss": 3.0197,
      "step": 38043
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3834125995635986,
      "learning_rate": 0.0005605336914996042,
      "loss": 3.2368,
      "step": 38044
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4514728784561157,
      "learning_rate": 0.0005605316634384649,
      "loss": 3.2192,
      "step": 38045
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.181800365447998,
      "learning_rate": 0.000560529635328888,
      "loss": 2.8965,
      "step": 38046
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7868603467941284,
      "learning_rate": 0.0005605276071708737,
      "loss": 3.0671,
      "step": 38047
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7124487161636353,
      "learning_rate": 0.0005605255789644224,
      "loss": 2.9922,
      "step": 38048
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3789674043655396,
      "learning_rate": 0.0005605235507095345,
      "loss": 3.1162,
      "step": 38049
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.581872820854187,
      "learning_rate": 0.0005605215224062102,
      "loss": 3.1036,
      "step": 38050
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7743710279464722,
      "learning_rate": 0.0005605194940544502,
      "loss": 3.2235,
      "step": 38051
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7903215885162354,
      "learning_rate": 0.0005605174656542546,
      "loss": 2.7566,
      "step": 38052
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.065579414367676,
      "learning_rate": 0.000560515437205624,
      "loss": 3.1015,
      "step": 38053
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.180591583251953,
      "learning_rate": 0.0005605134087085585,
      "loss": 3.1354,
      "step": 38054
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6019045114517212,
      "learning_rate": 0.0005605113801630588,
      "loss": 2.9666,
      "step": 38055
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.848741292953491,
      "learning_rate": 0.0005605093515691251,
      "loss": 2.7699,
      "step": 38056
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5341547727584839,
      "learning_rate": 0.0005605073229267577,
      "loss": 2.8023,
      "step": 38057
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7827367782592773,
      "learning_rate": 0.0005605052942359571,
      "loss": 3.0996,
      "step": 38058
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7225537300109863,
      "learning_rate": 0.0005605032654967237,
      "loss": 2.892,
      "step": 38059
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6543793678283691,
      "learning_rate": 0.0005605012367090578,
      "loss": 3.0145,
      "step": 38060
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.295098066329956,
      "learning_rate": 0.0005604992078729598,
      "loss": 2.9333,
      "step": 38061
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9199434518814087,
      "learning_rate": 0.0005604971789884301,
      "loss": 3.1649,
      "step": 38062
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4391621351242065,
      "learning_rate": 0.000560495150055469,
      "loss": 3.0586,
      "step": 38063
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6303329467773438,
      "learning_rate": 0.000560493121074077,
      "loss": 3.2046,
      "step": 38064
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5465633869171143,
      "learning_rate": 0.0005604910920442544,
      "loss": 3.396,
      "step": 38065
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.571247935295105,
      "learning_rate": 0.0005604890629660015,
      "loss": 2.9851,
      "step": 38066
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3503901958465576,
      "learning_rate": 0.0005604870338393189,
      "loss": 3.1574,
      "step": 38067
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3847798109054565,
      "learning_rate": 0.0005604850046642067,
      "loss": 3.2151,
      "step": 38068
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5335934162139893,
      "learning_rate": 0.0005604829754406655,
      "loss": 3.146,
      "step": 38069
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.285135269165039,
      "learning_rate": 0.0005604809461686956,
      "loss": 3.1482,
      "step": 38070
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.927109956741333,
      "learning_rate": 0.0005604789168482974,
      "loss": 2.9737,
      "step": 38071
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3888332843780518,
      "learning_rate": 0.0005604768874794712,
      "loss": 3.0626,
      "step": 38072
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.2113935947418213,
      "learning_rate": 0.0005604748580622175,
      "loss": 3.0753,
      "step": 38073
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6932282447814941,
      "learning_rate": 0.0005604728285965366,
      "loss": 3.169,
      "step": 38074
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7501482963562012,
      "learning_rate": 0.0005604707990824288,
      "loss": 2.9234,
      "step": 38075
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.159576892852783,
      "learning_rate": 0.0005604687695198946,
      "loss": 2.8616,
      "step": 38076
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8614224195480347,
      "learning_rate": 0.0005604667399089344,
      "loss": 3.0588,
      "step": 38077
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.6534876823425293,
      "learning_rate": 0.0005604647102495484,
      "loss": 3.0351,
      "step": 38078
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5731576681137085,
      "learning_rate": 0.0005604626805417372,
      "loss": 3.0213,
      "step": 38079
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9446138143539429,
      "learning_rate": 0.0005604606507855009,
      "loss": 2.8504,
      "step": 38080
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.266054391860962,
      "learning_rate": 0.0005604586209808402,
      "loss": 3.1157,
      "step": 38081
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.45939302444458,
      "learning_rate": 0.0005604565911277553,
      "loss": 3.0674,
      "step": 38082
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.00740909576416,
      "learning_rate": 0.0005604545612262466,
      "loss": 3.2381,
      "step": 38083
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5357260704040527,
      "learning_rate": 0.0005604525312763145,
      "loss": 3.1083,
      "step": 38084
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4608098268508911,
      "learning_rate": 0.0005604505012779593,
      "loss": 3.1667,
      "step": 38085
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.032118558883667,
      "learning_rate": 0.0005604484712311815,
      "loss": 2.9805,
      "step": 38086
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5125558376312256,
      "learning_rate": 0.0005604464411359814,
      "loss": 2.9447,
      "step": 38087
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6393369436264038,
      "learning_rate": 0.0005604444109923594,
      "loss": 3.0893,
      "step": 38088
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.9991953372955322,
      "learning_rate": 0.0005604423808003158,
      "loss": 2.9732,
      "step": 38089
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9291951656341553,
      "learning_rate": 0.0005604403505598512,
      "loss": 3.0873,
      "step": 38090
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5869096517562866,
      "learning_rate": 0.0005604383202709657,
      "loss": 3.1251,
      "step": 38091
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.070202350616455,
      "learning_rate": 0.0005604362899336597,
      "loss": 3.0521,
      "step": 38092
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.224126100540161,
      "learning_rate": 0.0005604342595479339,
      "loss": 3.0338,
      "step": 38093
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3396083116531372,
      "learning_rate": 0.0005604322291137883,
      "loss": 3.0165,
      "step": 38094
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.574506402015686,
      "learning_rate": 0.0005604301986312236,
      "loss": 3.1302,
      "step": 38095
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.5969693660736084,
      "learning_rate": 0.0005604281681002399,
      "loss": 3.0998,
      "step": 38096
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.3185176849365234,
      "learning_rate": 0.0005604261375208376,
      "loss": 2.9498,
      "step": 38097
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.2263307571411133,
      "learning_rate": 0.0005604241068930171,
      "loss": 3.0208,
      "step": 38098
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.675158739089966,
      "learning_rate": 0.0005604220762167791,
      "loss": 2.8633,
      "step": 38099
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4979865550994873,
      "learning_rate": 0.0005604200454921236,
      "loss": 2.9684,
      "step": 38100
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1482789516448975,
      "learning_rate": 0.0005604180147190511,
      "loss": 3.2129,
      "step": 38101
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5315892696380615,
      "learning_rate": 0.0005604159838975619,
      "loss": 3.0674,
      "step": 38102
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6779110431671143,
      "learning_rate": 0.0005604139530276566,
      "loss": 2.8417,
      "step": 38103
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2283762693405151,
      "learning_rate": 0.0005604119221093354,
      "loss": 3.1383,
      "step": 38104
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1665823459625244,
      "learning_rate": 0.0005604098911425987,
      "loss": 2.7484,
      "step": 38105
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5811851024627686,
      "learning_rate": 0.0005604078601274469,
      "loss": 2.8149,
      "step": 38106
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5276668071746826,
      "learning_rate": 0.0005604058290638803,
      "loss": 3.2604,
      "step": 38107
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3290338516235352,
      "learning_rate": 0.0005604037979518993,
      "loss": 3.1691,
      "step": 38108
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3788578510284424,
      "learning_rate": 0.0005604017667915044,
      "loss": 3.0601,
      "step": 38109
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9699844121932983,
      "learning_rate": 0.0005603997355826958,
      "loss": 3.0727,
      "step": 38110
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6489734649658203,
      "learning_rate": 0.0005603977043254742,
      "loss": 3.1798,
      "step": 38111
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.271772027015686,
      "learning_rate": 0.0005603956730198395,
      "loss": 3.2094,
      "step": 38112
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4362387657165527,
      "learning_rate": 0.0005603936416657924,
      "loss": 2.9317,
      "step": 38113
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.052062749862671,
      "learning_rate": 0.0005603916102633333,
      "loss": 2.9662,
      "step": 38114
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9156574010849,
      "learning_rate": 0.0005603895788124624,
      "loss": 3.1651,
      "step": 38115
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.3769450187683105,
      "learning_rate": 0.0005603875473131802,
      "loss": 3.2119,
      "step": 38116
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.6589412689208984,
      "learning_rate": 0.000560385515765487,
      "loss": 3.0587,
      "step": 38117
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.7545127868652344,
      "learning_rate": 0.0005603834841693832,
      "loss": 2.7541,
      "step": 38118
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3291805982589722,
      "learning_rate": 0.0005603814525248692,
      "loss": 3.2472,
      "step": 38119
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.604664921760559,
      "learning_rate": 0.0005603794208319454,
      "loss": 3.1727,
      "step": 38120
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.5923731327056885,
      "learning_rate": 0.0005603773890906122,
      "loss": 3.1724,
      "step": 38121
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6014857292175293,
      "learning_rate": 0.00056037535730087,
      "loss": 3.0173,
      "step": 38122
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7725749015808105,
      "learning_rate": 0.0005603733254627189,
      "loss": 3.1534,
      "step": 38123
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5292043685913086,
      "learning_rate": 0.0005603712935761596,
      "loss": 3.1767,
      "step": 38124
    },
    {
      "epoch": 0.5,
      "grad_norm": 4.136518955230713,
      "learning_rate": 0.0005603692616411923,
      "loss": 3.1346,
      "step": 38125
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.955317497253418,
      "learning_rate": 0.0005603672296578175,
      "loss": 2.9519,
      "step": 38126
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0774757862091064,
      "learning_rate": 0.0005603651976260355,
      "loss": 3.002,
      "step": 38127
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0197901725769043,
      "learning_rate": 0.0005603631655458467,
      "loss": 2.941,
      "step": 38128
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3387236595153809,
      "learning_rate": 0.0005603611334172514,
      "loss": 2.9735,
      "step": 38129
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6671782732009888,
      "learning_rate": 0.0005603591012402502,
      "loss": 3.0878,
      "step": 38130
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7039668560028076,
      "learning_rate": 0.0005603570690148433,
      "loss": 3.0042,
      "step": 38131
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7187477350234985,
      "learning_rate": 0.000560355036741031,
      "loss": 3.0725,
      "step": 38132
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5125247240066528,
      "learning_rate": 0.0005603530044188138,
      "loss": 2.7852,
      "step": 38133
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.522315263748169,
      "learning_rate": 0.0005603509720481921,
      "loss": 2.7651,
      "step": 38134
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.45964515209198,
      "learning_rate": 0.0005603489396291662,
      "loss": 2.833,
      "step": 38135
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.642861843109131,
      "learning_rate": 0.0005603469071617367,
      "loss": 3.0796,
      "step": 38136
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.241868495941162,
      "learning_rate": 0.0005603448746459035,
      "loss": 3.0744,
      "step": 38137
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4221374988555908,
      "learning_rate": 0.0005603428420816674,
      "loss": 2.8947,
      "step": 38138
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3403862714767456,
      "learning_rate": 0.0005603408094690287,
      "loss": 3.103,
      "step": 38139
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4265869855880737,
      "learning_rate": 0.0005603387768079877,
      "loss": 2.9729,
      "step": 38140
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7826392650604248,
      "learning_rate": 0.0005603367440985449,
      "loss": 3.0586,
      "step": 38141
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9331285953521729,
      "learning_rate": 0.0005603347113407004,
      "loss": 3.1474,
      "step": 38142
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8244892358779907,
      "learning_rate": 0.0005603326785344549,
      "loss": 3.302,
      "step": 38143
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6201030015945435,
      "learning_rate": 0.0005603306456798085,
      "loss": 2.948,
      "step": 38144
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6489673852920532,
      "learning_rate": 0.0005603286127767619,
      "loss": 2.7958,
      "step": 38145
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.391791582107544,
      "learning_rate": 0.0005603265798253152,
      "loss": 3.0984,
      "step": 38146
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.257380247116089,
      "learning_rate": 0.0005603245468254689,
      "loss": 3.0216,
      "step": 38147
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9106435775756836,
      "learning_rate": 0.0005603225137772234,
      "loss": 2.9419,
      "step": 38148
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.357835054397583,
      "learning_rate": 0.0005603204806805789,
      "loss": 3.108,
      "step": 38149
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.6726222038269043,
      "learning_rate": 0.000560318447535536,
      "loss": 2.9184,
      "step": 38150
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7067034244537354,
      "learning_rate": 0.000560316414342095,
      "loss": 2.9152,
      "step": 38151
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5406934022903442,
      "learning_rate": 0.0005603143811002562,
      "loss": 3.0378,
      "step": 38152
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.526831865310669,
      "learning_rate": 0.00056031234781002,
      "loss": 3.2319,
      "step": 38153
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7241957187652588,
      "learning_rate": 0.0005603103144713869,
      "loss": 3.0212,
      "step": 38154
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.126288652420044,
      "learning_rate": 0.0005603082810843572,
      "loss": 3.003,
      "step": 38155
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5066272020339966,
      "learning_rate": 0.0005603062476489313,
      "loss": 2.8143,
      "step": 38156
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.493087887763977,
      "learning_rate": 0.0005603042141651096,
      "loss": 3.1186,
      "step": 38157
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0282492637634277,
      "learning_rate": 0.0005603021806328924,
      "loss": 2.9069,
      "step": 38158
    },
    {
      "epoch": 0.5,
      "grad_norm": 4.069361686706543,
      "learning_rate": 0.00056030014705228,
      "loss": 2.9516,
      "step": 38159
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7768324613571167,
      "learning_rate": 0.0005602981134232729,
      "loss": 3.0356,
      "step": 38160
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.5903000831604004,
      "learning_rate": 0.0005602960797458715,
      "loss": 3.1089,
      "step": 38161
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1413605213165283,
      "learning_rate": 0.0005602940460200762,
      "loss": 3.1532,
      "step": 38162
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5713367462158203,
      "learning_rate": 0.0005602920122458874,
      "loss": 3.2382,
      "step": 38163
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5403785705566406,
      "learning_rate": 0.0005602899784233052,
      "loss": 2.8299,
      "step": 38164
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.0978283882141113,
      "learning_rate": 0.0005602879445523304,
      "loss": 2.9263,
      "step": 38165
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.223595142364502,
      "learning_rate": 0.0005602859106329629,
      "loss": 3.0538,
      "step": 38166
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0091161727905273,
      "learning_rate": 0.0005602838766652035,
      "loss": 3.0153,
      "step": 38167
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6300114393234253,
      "learning_rate": 0.0005602818426490524,
      "loss": 3.0929,
      "step": 38168
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.6823484897613525,
      "learning_rate": 0.0005602798085845099,
      "loss": 2.991,
      "step": 38169
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.3979713916778564,
      "learning_rate": 0.0005602777744715767,
      "loss": 3.1592,
      "step": 38170
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7151458263397217,
      "learning_rate": 0.0005602757403102527,
      "loss": 2.8609,
      "step": 38171
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5932183265686035,
      "learning_rate": 0.0005602737061005386,
      "loss": 3.1743,
      "step": 38172
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.566412925720215,
      "learning_rate": 0.0005602716718424348,
      "loss": 3.0368,
      "step": 38173
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8679566383361816,
      "learning_rate": 0.0005602696375359415,
      "loss": 3.1071,
      "step": 38174
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6032251119613647,
      "learning_rate": 0.0005602676031810592,
      "loss": 3.2323,
      "step": 38175
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.609442114830017,
      "learning_rate": 0.0005602655687777882,
      "loss": 3.0304,
      "step": 38176
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8224319219589233,
      "learning_rate": 0.0005602635343261289,
      "loss": 2.7676,
      "step": 38177
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0294528007507324,
      "learning_rate": 0.0005602614998260816,
      "loss": 3.1786,
      "step": 38178
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5196555852890015,
      "learning_rate": 0.000560259465277647,
      "loss": 3.1037,
      "step": 38179
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.469355821609497,
      "learning_rate": 0.0005602574306808252,
      "loss": 3.1325,
      "step": 38180
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7449434995651245,
      "learning_rate": 0.0005602553960356166,
      "loss": 2.9664,
      "step": 38181
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5058767795562744,
      "learning_rate": 0.0005602533613420215,
      "loss": 3.173,
      "step": 38182
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4000327587127686,
      "learning_rate": 0.0005602513266000405,
      "loss": 2.8089,
      "step": 38183
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6370642185211182,
      "learning_rate": 0.0005602492918096738,
      "loss": 3.1121,
      "step": 38184
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5188955068588257,
      "learning_rate": 0.0005602472569709219,
      "loss": 2.9419,
      "step": 38185
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3540465831756592,
      "learning_rate": 0.0005602452220837852,
      "loss": 3.0246,
      "step": 38186
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3268706798553467,
      "learning_rate": 0.0005602431871482639,
      "loss": 3.1011,
      "step": 38187
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.449796438217163,
      "learning_rate": 0.0005602411521643584,
      "loss": 2.8946,
      "step": 38188
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8089979887008667,
      "learning_rate": 0.0005602391171320693,
      "loss": 2.8555,
      "step": 38189
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6000053882598877,
      "learning_rate": 0.0005602370820513968,
      "loss": 3.1881,
      "step": 38190
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.9140121936798096,
      "learning_rate": 0.0005602350469223412,
      "loss": 2.9623,
      "step": 38191
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6580151319503784,
      "learning_rate": 0.0005602330117449032,
      "loss": 2.9578,
      "step": 38192
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6988961696624756,
      "learning_rate": 0.0005602309765190828,
      "loss": 2.9589,
      "step": 38193
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8315861225128174,
      "learning_rate": 0.0005602289412448807,
      "loss": 3.2599,
      "step": 38194
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1833672523498535,
      "learning_rate": 0.0005602269059222971,
      "loss": 3.1133,
      "step": 38195
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2546652555465698,
      "learning_rate": 0.0005602248705513322,
      "loss": 3.2055,
      "step": 38196
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.311788558959961,
      "learning_rate": 0.0005602228351319868,
      "loss": 2.8328,
      "step": 38197
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7200846672058105,
      "learning_rate": 0.000560220799664261,
      "loss": 3.1663,
      "step": 38198
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0400853157043457,
      "learning_rate": 0.0005602187641481553,
      "loss": 3.1536,
      "step": 38199
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0219428539276123,
      "learning_rate": 0.0005602167285836698,
      "loss": 2.8284,
      "step": 38200
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4679481983184814,
      "learning_rate": 0.0005602146929708053,
      "loss": 2.9891,
      "step": 38201
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3177696466445923,
      "learning_rate": 0.000560212657309562,
      "loss": 3.0999,
      "step": 38202
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1927415132522583,
      "learning_rate": 0.00056021062159994,
      "loss": 3.1542,
      "step": 38203
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7115811109542847,
      "learning_rate": 0.0005602085858419403,
      "loss": 3.247,
      "step": 38204
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.598180055618286,
      "learning_rate": 0.0005602065500355627,
      "loss": 3.4118,
      "step": 38205
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6273632049560547,
      "learning_rate": 0.0005602045141808078,
      "loss": 3.0776,
      "step": 38206
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9794753789901733,
      "learning_rate": 0.0005602024782776759,
      "loss": 2.8413,
      "step": 38207
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1191556453704834,
      "learning_rate": 0.0005602004423261675,
      "loss": 3.0747,
      "step": 38208
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4068245887756348,
      "learning_rate": 0.0005601984063262831,
      "loss": 3.1154,
      "step": 38209
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4223158359527588,
      "learning_rate": 0.0005601963702780227,
      "loss": 2.8308,
      "step": 38210
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.7424206733703613,
      "learning_rate": 0.000560194334181387,
      "loss": 2.879,
      "step": 38211
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4926122426986694,
      "learning_rate": 0.0005601922980363761,
      "loss": 3.4079,
      "step": 38212
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8318243026733398,
      "learning_rate": 0.0005601902618429908,
      "loss": 2.8933,
      "step": 38213
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.0487167835235596,
      "learning_rate": 0.000560188225601231,
      "loss": 2.9998,
      "step": 38214
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.471687912940979,
      "learning_rate": 0.0005601861893110973,
      "loss": 3.0588,
      "step": 38215
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.630685567855835,
      "learning_rate": 0.0005601841529725902,
      "loss": 3.1001,
      "step": 38216
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0555946826934814,
      "learning_rate": 0.0005601821165857099,
      "loss": 3.0835,
      "step": 38217
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.042001247406006,
      "learning_rate": 0.0005601800801504569,
      "loss": 3.065,
      "step": 38218
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8781991004943848,
      "learning_rate": 0.0005601780436668314,
      "loss": 3.2558,
      "step": 38219
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.2186994552612305,
      "learning_rate": 0.0005601760071348339,
      "loss": 2.9229,
      "step": 38220
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6348471641540527,
      "learning_rate": 0.0005601739705544648,
      "loss": 3.044,
      "step": 38221
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6389354467391968,
      "learning_rate": 0.0005601719339257246,
      "loss": 3.1196,
      "step": 38222
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.6698148250579834,
      "learning_rate": 0.0005601698972486133,
      "loss": 3.0347,
      "step": 38223
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.584942579269409,
      "learning_rate": 0.0005601678605231316,
      "loss": 2.9937,
      "step": 38224
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7004289627075195,
      "learning_rate": 0.0005601658237492797,
      "loss": 2.8707,
      "step": 38225
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8280341625213623,
      "learning_rate": 0.0005601637869270582,
      "loss": 2.963,
      "step": 38226
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1464955806732178,
      "learning_rate": 0.0005601617500564672,
      "loss": 2.9809,
      "step": 38227
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6078382730484009,
      "learning_rate": 0.0005601597131375072,
      "loss": 3.171,
      "step": 38228
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6367119550704956,
      "learning_rate": 0.0005601576761701788,
      "loss": 2.9672,
      "step": 38229
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.7132463455200195,
      "learning_rate": 0.0005601556391544821,
      "loss": 2.9815,
      "step": 38230
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7766900062561035,
      "learning_rate": 0.0005601536020904174,
      "loss": 2.9766,
      "step": 38231
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3231165409088135,
      "learning_rate": 0.0005601515649779855,
      "loss": 2.8463,
      "step": 38232
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0621771812438965,
      "learning_rate": 0.0005601495278171862,
      "loss": 2.9866,
      "step": 38233
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4888707399368286,
      "learning_rate": 0.0005601474906080205,
      "loss": 3.0706,
      "step": 38234
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.51227867603302,
      "learning_rate": 0.0005601454533504883,
      "loss": 3.1805,
      "step": 38235
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.374593734741211,
      "learning_rate": 0.0005601434160445902,
      "loss": 2.9295,
      "step": 38236
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.708450436592102,
      "learning_rate": 0.0005601413786903265,
      "loss": 3.121,
      "step": 38237
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3484750986099243,
      "learning_rate": 0.0005601393412876976,
      "loss": 3.0388,
      "step": 38238
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4036442041397095,
      "learning_rate": 0.0005601373038367039,
      "loss": 2.9959,
      "step": 38239
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3172637224197388,
      "learning_rate": 0.0005601352663373457,
      "loss": 3.1196,
      "step": 38240
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.737175703048706,
      "learning_rate": 0.0005601332287896235,
      "loss": 3.2229,
      "step": 38241
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.542720913887024,
      "learning_rate": 0.0005601311911935377,
      "loss": 2.8807,
      "step": 38242
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4455045461654663,
      "learning_rate": 0.0005601291535490885,
      "loss": 2.9079,
      "step": 38243
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1983697414398193,
      "learning_rate": 0.0005601271158562764,
      "loss": 3.2927,
      "step": 38244
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.065556049346924,
      "learning_rate": 0.0005601250781151018,
      "loss": 3.0917,
      "step": 38245
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4321913719177246,
      "learning_rate": 0.000560123040325565,
      "loss": 2.7453,
      "step": 38246
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9188767671585083,
      "learning_rate": 0.0005601210024876664,
      "loss": 2.9253,
      "step": 38247
    },
    {
      "epoch": 0.5,
      "grad_norm": 5.660031795501709,
      "learning_rate": 0.0005601189646014065,
      "loss": 3.2662,
      "step": 38248
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.6040124893188477,
      "learning_rate": 0.0005601169266667854,
      "loss": 2.8274,
      "step": 38249
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7464114427566528,
      "learning_rate": 0.0005601148886838037,
      "loss": 2.8875,
      "step": 38250
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3574620485305786,
      "learning_rate": 0.0005601128506524619,
      "loss": 3.3046,
      "step": 38251
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0849599838256836,
      "learning_rate": 0.0005601108125727601,
      "loss": 2.8518,
      "step": 38252
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.2163166999816895,
      "learning_rate": 0.0005601087744446988,
      "loss": 2.941,
      "step": 38253
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6000053882598877,
      "learning_rate": 0.0005601067362682783,
      "loss": 3.0599,
      "step": 38254
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.866995930671692,
      "learning_rate": 0.0005601046980434992,
      "loss": 2.9965,
      "step": 38255
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.417703151702881,
      "learning_rate": 0.0005601026597703615,
      "loss": 3.0405,
      "step": 38256
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.5338809490203857,
      "learning_rate": 0.000560100621448866,
      "loss": 3.1364,
      "step": 38257
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8969615697860718,
      "learning_rate": 0.0005600985830790128,
      "loss": 2.8371,
      "step": 38258
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7859737873077393,
      "learning_rate": 0.0005600965446608024,
      "loss": 3.1131,
      "step": 38259
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4040495157241821,
      "learning_rate": 0.0005600945061942351,
      "loss": 3.2718,
      "step": 38260
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4322443008422852,
      "learning_rate": 0.0005600924676793114,
      "loss": 2.8588,
      "step": 38261
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.595603108406067,
      "learning_rate": 0.0005600904291160315,
      "loss": 3.1785,
      "step": 38262
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.487705111503601,
      "learning_rate": 0.0005600883905043959,
      "loss": 2.7682,
      "step": 38263
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5945342779159546,
      "learning_rate": 0.0005600863518444051,
      "loss": 3.0561,
      "step": 38264
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5788841247558594,
      "learning_rate": 0.0005600843131360592,
      "loss": 3.0099,
      "step": 38265
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5687215328216553,
      "learning_rate": 0.0005600822743793587,
      "loss": 3.0565,
      "step": 38266
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.652660608291626,
      "learning_rate": 0.000560080235574304,
      "loss": 2.8378,
      "step": 38267
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5726395845413208,
      "learning_rate": 0.0005600781967208955,
      "loss": 3.2202,
      "step": 38268
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7210252285003662,
      "learning_rate": 0.0005600761578191336,
      "loss": 2.917,
      "step": 38269
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3421344757080078,
      "learning_rate": 0.0005600741188690186,
      "loss": 3.0234,
      "step": 38270
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5451772212982178,
      "learning_rate": 0.0005600720798705508,
      "loss": 3.1178,
      "step": 38271
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6949183940887451,
      "learning_rate": 0.0005600700408237309,
      "loss": 3.1432,
      "step": 38272
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.513914942741394,
      "learning_rate": 0.000560068001728559,
      "loss": 2.9971,
      "step": 38273
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8146069049835205,
      "learning_rate": 0.0005600659625850356,
      "loss": 2.9517,
      "step": 38274
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.242281198501587,
      "learning_rate": 0.0005600639233931608,
      "loss": 2.8678,
      "step": 38275
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5139610767364502,
      "learning_rate": 0.0005600618841529354,
      "loss": 3.162,
      "step": 38276
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.133742570877075,
      "learning_rate": 0.0005600598448643596,
      "loss": 3.278,
      "step": 38277
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7837927341461182,
      "learning_rate": 0.0005600578055274336,
      "loss": 2.9687,
      "step": 38278
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6619620323181152,
      "learning_rate": 0.0005600557661421581,
      "loss": 3.3135,
      "step": 38279
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.3344931602478027,
      "learning_rate": 0.0005600537267085332,
      "loss": 3.2168,
      "step": 38280
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7015374898910522,
      "learning_rate": 0.0005600516872265595,
      "loss": 2.8538,
      "step": 38281
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4625929594039917,
      "learning_rate": 0.0005600496476962373,
      "loss": 2.8462,
      "step": 38282
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.618884801864624,
      "learning_rate": 0.000560047608117567,
      "loss": 3.1832,
      "step": 38283
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4703314304351807,
      "learning_rate": 0.0005600455684905488,
      "loss": 3.1432,
      "step": 38284
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5292037725448608,
      "learning_rate": 0.0005600435288151832,
      "loss": 3.0088,
      "step": 38285
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6688010692596436,
      "learning_rate": 0.0005600414890914707,
      "loss": 3.0131,
      "step": 38286
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.485129952430725,
      "learning_rate": 0.0005600394493194117,
      "loss": 3.1297,
      "step": 38287
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7488300800323486,
      "learning_rate": 0.0005600374094990062,
      "loss": 3.0091,
      "step": 38288
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3655049800872803,
      "learning_rate": 0.000560035369630255,
      "loss": 3.085,
      "step": 38289
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7397397756576538,
      "learning_rate": 0.0005600333297131583,
      "loss": 2.9304,
      "step": 38290
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5995116233825684,
      "learning_rate": 0.0005600312897477165,
      "loss": 3.2167,
      "step": 38291
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8381041288375854,
      "learning_rate": 0.0005600292497339299,
      "loss": 3.2071,
      "step": 38292
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6530364751815796,
      "learning_rate": 0.000560027209671799,
      "loss": 2.9163,
      "step": 38293
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3386789560317993,
      "learning_rate": 0.0005600251695613242,
      "loss": 3.1596,
      "step": 38294
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4552135467529297,
      "learning_rate": 0.0005600231294025056,
      "loss": 3.1496,
      "step": 38295
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4470875263214111,
      "learning_rate": 0.000560021089195344,
      "loss": 3.3569,
      "step": 38296
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.549247145652771,
      "learning_rate": 0.0005600190489398395,
      "loss": 3.0328,
      "step": 38297
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.495173692703247,
      "learning_rate": 0.0005600170086359925,
      "loss": 3.1713,
      "step": 38298
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7084826231002808,
      "learning_rate": 0.0005600149682838036,
      "loss": 2.9968,
      "step": 38299
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.377937912940979,
      "learning_rate": 0.0005600129278832729,
      "loss": 2.8151,
      "step": 38300
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.2584757804870605,
      "learning_rate": 0.0005600108874344008,
      "loss": 2.9102,
      "step": 38301
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.448592185974121,
      "learning_rate": 0.0005600088469371879,
      "loss": 2.9626,
      "step": 38302
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9889832735061646,
      "learning_rate": 0.0005600068063916344,
      "loss": 2.9386,
      "step": 38303
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6818478107452393,
      "learning_rate": 0.0005600047657977407,
      "loss": 3.2308,
      "step": 38304
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6177302598953247,
      "learning_rate": 0.0005600027251555072,
      "loss": 3.2091,
      "step": 38305
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7937968969345093,
      "learning_rate": 0.0005600006844649344,
      "loss": 2.8143,
      "step": 38306
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0043628215789795,
      "learning_rate": 0.0005599986437260224,
      "loss": 3.0029,
      "step": 38307
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6087332963943481,
      "learning_rate": 0.0005599966029387719,
      "loss": 3.165,
      "step": 38308
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7174097299575806,
      "learning_rate": 0.0005599945621031831,
      "loss": 2.877,
      "step": 38309
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4239447116851807,
      "learning_rate": 0.0005599925212192564,
      "loss": 2.8449,
      "step": 38310
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5994256734848022,
      "learning_rate": 0.000559990480286992,
      "loss": 2.9059,
      "step": 38311
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.181236505508423,
      "learning_rate": 0.0005599884393063906,
      "loss": 3.1779,
      "step": 38312
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6156306266784668,
      "learning_rate": 0.0005599863982774525,
      "loss": 3.1466,
      "step": 38313
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4835940599441528,
      "learning_rate": 0.000559984357200178,
      "loss": 3.1914,
      "step": 38314
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5853043794631958,
      "learning_rate": 0.0005599823160745674,
      "loss": 3.3462,
      "step": 38315
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9121710062026978,
      "learning_rate": 0.0005599802749006212,
      "loss": 3.1493,
      "step": 38316
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.253700017929077,
      "learning_rate": 0.0005599782336783398,
      "loss": 3.1306,
      "step": 38317
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6284748315811157,
      "learning_rate": 0.0005599761924077235,
      "loss": 2.976,
      "step": 38318
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7731435298919678,
      "learning_rate": 0.0005599741510887729,
      "loss": 3.1189,
      "step": 38319
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4791579246520996,
      "learning_rate": 0.000559972109721488,
      "loss": 2.9505,
      "step": 38320
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4515066146850586,
      "learning_rate": 0.0005599700683058693,
      "loss": 2.993,
      "step": 38321
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7416858673095703,
      "learning_rate": 0.0005599680268419175,
      "loss": 3.1334,
      "step": 38322
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6815134286880493,
      "learning_rate": 0.0005599659853296327,
      "loss": 3.0268,
      "step": 38323
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.7376537322998047,
      "learning_rate": 0.0005599639437690152,
      "loss": 3.1929,
      "step": 38324
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.924044132232666,
      "learning_rate": 0.0005599619021600656,
      "loss": 3.0453,
      "step": 38325
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4146502017974854,
      "learning_rate": 0.000559959860502784,
      "loss": 3.1742,
      "step": 38326
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.731212615966797,
      "learning_rate": 0.0005599578187971712,
      "loss": 3.1255,
      "step": 38327
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.1705307960510254,
      "learning_rate": 0.0005599557770432272,
      "loss": 2.8217,
      "step": 38328
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.6017770767211914,
      "learning_rate": 0.0005599537352409524,
      "loss": 3.2202,
      "step": 38329
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4106539487838745,
      "learning_rate": 0.0005599516933903475,
      "loss": 2.9382,
      "step": 38330
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.690396785736084,
      "learning_rate": 0.0005599496514914126,
      "loss": 3.1812,
      "step": 38331
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.847536325454712,
      "learning_rate": 0.0005599476095441481,
      "loss": 3.1658,
      "step": 38332
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5208466053009033,
      "learning_rate": 0.0005599455675485544,
      "loss": 2.8505,
      "step": 38333
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6446598768234253,
      "learning_rate": 0.000559943525504632,
      "loss": 3.0098,
      "step": 38334
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.557989239692688,
      "learning_rate": 0.0005599414834123812,
      "loss": 3.3247,
      "step": 38335
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8641953468322754,
      "learning_rate": 0.0005599394412718024,
      "loss": 2.9916,
      "step": 38336
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.472562313079834,
      "learning_rate": 0.0005599373990828959,
      "loss": 2.9784,
      "step": 38337
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4434759616851807,
      "learning_rate": 0.0005599353568456621,
      "loss": 3.2013,
      "step": 38338
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.1809979677200317,
      "learning_rate": 0.0005599333145601015,
      "loss": 3.0543,
      "step": 38339
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3373539447784424,
      "learning_rate": 0.0005599312722262143,
      "loss": 2.8953,
      "step": 38340
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4962687492370605,
      "learning_rate": 0.000559929229844001,
      "loss": 2.9971,
      "step": 38341
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4634597301483154,
      "learning_rate": 0.0005599271874134619,
      "loss": 3.1459,
      "step": 38342
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.650770664215088,
      "learning_rate": 0.0005599251449345975,
      "loss": 3.044,
      "step": 38343
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4530659914016724,
      "learning_rate": 0.0005599231024074081,
      "loss": 3.2135,
      "step": 38344
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3681669235229492,
      "learning_rate": 0.000559921059831894,
      "loss": 3.0751,
      "step": 38345
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5631431341171265,
      "learning_rate": 0.0005599190172080557,
      "loss": 3.0424,
      "step": 38346
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6165461540222168,
      "learning_rate": 0.0005599169745358936,
      "loss": 3.2463,
      "step": 38347
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6637264490127563,
      "learning_rate": 0.000559914931815408,
      "loss": 2.9248,
      "step": 38348
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5077818632125854,
      "learning_rate": 0.0005599128890465994,
      "loss": 2.9839,
      "step": 38349
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8740215301513672,
      "learning_rate": 0.000559910846229468,
      "loss": 3.0652,
      "step": 38350
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6088738441467285,
      "learning_rate": 0.0005599088033640142,
      "loss": 3.1957,
      "step": 38351
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.00382661819458,
      "learning_rate": 0.0005599067604502387,
      "loss": 3.2488,
      "step": 38352
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0180625915527344,
      "learning_rate": 0.0005599047174881414,
      "loss": 3.0769,
      "step": 38353
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4294477701187134,
      "learning_rate": 0.000559902674477723,
      "loss": 3.3157,
      "step": 38354
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.513780117034912,
      "learning_rate": 0.0005599006314189837,
      "loss": 3.3442,
      "step": 38355
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.465440034866333,
      "learning_rate": 0.0005598985883119241,
      "loss": 3.2136,
      "step": 38356
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5707122087478638,
      "learning_rate": 0.0005598965451565443,
      "loss": 3.1416,
      "step": 38357
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6108078956604004,
      "learning_rate": 0.0005598945019528449,
      "loss": 3.0325,
      "step": 38358
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6531966924667358,
      "learning_rate": 0.0005598924587008262,
      "loss": 3.0711,
      "step": 38359
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4009791612625122,
      "learning_rate": 0.0005598904154004886,
      "loss": 2.5758,
      "step": 38360
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7802693843841553,
      "learning_rate": 0.0005598883720518324,
      "loss": 3.1593,
      "step": 38361
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.521296501159668,
      "learning_rate": 0.0005598863286548581,
      "loss": 3.2272,
      "step": 38362
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.573498249053955,
      "learning_rate": 0.000559884285209566,
      "loss": 2.9778,
      "step": 38363
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.142461061477661,
      "learning_rate": 0.0005598822417159565,
      "loss": 3.2369,
      "step": 38364
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.545533299446106,
      "learning_rate": 0.00055988019817403,
      "loss": 3.2312,
      "step": 38365
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.749906301498413,
      "learning_rate": 0.0005598781545837869,
      "loss": 3.1231,
      "step": 38366
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.3984415531158447,
      "learning_rate": 0.0005598761109452276,
      "loss": 3.0875,
      "step": 38367
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.5667593479156494,
      "learning_rate": 0.0005598740672583522,
      "loss": 3.2274,
      "step": 38368
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.309260606765747,
      "learning_rate": 0.0005598720235231616,
      "loss": 3.1902,
      "step": 38369
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.604130744934082,
      "learning_rate": 0.0005598699797396557,
      "loss": 2.9425,
      "step": 38370
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1878209114074707,
      "learning_rate": 0.0005598679359078351,
      "loss": 2.9429,
      "step": 38371
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.351918339729309,
      "learning_rate": 0.0005598658920277002,
      "loss": 3.1188,
      "step": 38372
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2713457345962524,
      "learning_rate": 0.0005598638480992513,
      "loss": 3.0311,
      "step": 38373
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8155628442764282,
      "learning_rate": 0.0005598618041224887,
      "loss": 3.0822,
      "step": 38374
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8416420221328735,
      "learning_rate": 0.000559859760097413,
      "loss": 3.2871,
      "step": 38375
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.399314522743225,
      "learning_rate": 0.0005598577160240244,
      "loss": 3.0675,
      "step": 38376
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7394435405731201,
      "learning_rate": 0.0005598556719023234,
      "loss": 3.2683,
      "step": 38377
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.740328788757324,
      "learning_rate": 0.0005598536277323104,
      "loss": 2.9138,
      "step": 38378
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8659720420837402,
      "learning_rate": 0.0005598515835139855,
      "loss": 2.8849,
      "step": 38379
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.310901403427124,
      "learning_rate": 0.0005598495392473494,
      "loss": 3.0679,
      "step": 38380
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6774660348892212,
      "learning_rate": 0.0005598474949324024,
      "loss": 2.975,
      "step": 38381
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.844156265258789,
      "learning_rate": 0.0005598454505691448,
      "loss": 3.0964,
      "step": 38382
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3874027729034424,
      "learning_rate": 0.000559843406157577,
      "loss": 2.938,
      "step": 38383
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0622880458831787,
      "learning_rate": 0.0005598413616976994,
      "loss": 3.3044,
      "step": 38384
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8899743556976318,
      "learning_rate": 0.0005598393171895125,
      "loss": 3.0102,
      "step": 38385
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5149539709091187,
      "learning_rate": 0.0005598372726330164,
      "loss": 3.0139,
      "step": 38386
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.3224265575408936,
      "learning_rate": 0.0005598352280282118,
      "loss": 3.1608,
      "step": 38387
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.2782788276672363,
      "learning_rate": 0.0005598331833750989,
      "loss": 3.3511,
      "step": 38388
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8313536643981934,
      "learning_rate": 0.000559831138673678,
      "loss": 3.2223,
      "step": 38389
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.745051622390747,
      "learning_rate": 0.0005598290939239496,
      "loss": 3.0676,
      "step": 38390
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.6340572834014893,
      "learning_rate": 0.0005598270491259142,
      "loss": 2.9116,
      "step": 38391
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.9066481590270996,
      "learning_rate": 0.0005598250042795719,
      "loss": 2.8865,
      "step": 38392
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7416789531707764,
      "learning_rate": 0.0005598229593849234,
      "loss": 3.0374,
      "step": 38393
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.6786725521087646,
      "learning_rate": 0.0005598209144419687,
      "loss": 2.9544,
      "step": 38394
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4919700622558594,
      "learning_rate": 0.0005598188694507085,
      "loss": 3.1735,
      "step": 38395
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9264241456985474,
      "learning_rate": 0.0005598168244111431,
      "loss": 2.9832,
      "step": 38396
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8254330158233643,
      "learning_rate": 0.0005598147793232727,
      "loss": 3.0812,
      "step": 38397
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4714292287826538,
      "learning_rate": 0.0005598127341870981,
      "loss": 2.972,
      "step": 38398
    },
    {
      "epoch": 0.5,
      "grad_norm": 4.3372344970703125,
      "learning_rate": 0.0005598106890026192,
      "loss": 2.9101,
      "step": 38399
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.5504958629608154,
      "learning_rate": 0.0005598086437698366,
      "loss": 2.9212,
      "step": 38400
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.2114291191101074,
      "learning_rate": 0.0005598065984887507,
      "loss": 3.1601,
      "step": 38401
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.3162684440612793,
      "learning_rate": 0.0005598045531593619,
      "loss": 2.9862,
      "step": 38402
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9891277551651,
      "learning_rate": 0.0005598025077816704,
      "loss": 3.3414,
      "step": 38403
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9805629253387451,
      "learning_rate": 0.0005598004623556768,
      "loss": 3.2386,
      "step": 38404
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4520573616027832,
      "learning_rate": 0.0005597984168813814,
      "loss": 2.9841,
      "step": 38405
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0443363189697266,
      "learning_rate": 0.0005597963713587845,
      "loss": 3.1625,
      "step": 38406
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6069411039352417,
      "learning_rate": 0.0005597943257878866,
      "loss": 3.3006,
      "step": 38407
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.308630108833313,
      "learning_rate": 0.0005597922801686881,
      "loss": 3.0498,
      "step": 38408
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6922088861465454,
      "learning_rate": 0.0005597902345011892,
      "loss": 3.014,
      "step": 38409
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.749327540397644,
      "learning_rate": 0.0005597881887853904,
      "loss": 3.3258,
      "step": 38410
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4829529523849487,
      "learning_rate": 0.0005597861430212921,
      "loss": 3.1706,
      "step": 38411
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.695165753364563,
      "learning_rate": 0.0005597840972088947,
      "loss": 3.0377,
      "step": 38412
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4903730154037476,
      "learning_rate": 0.0005597820513481985,
      "loss": 3.0916,
      "step": 38413
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.2834975719451904,
      "learning_rate": 0.0005597800054392038,
      "loss": 3.0777,
      "step": 38414
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4495600461959839,
      "learning_rate": 0.0005597779594819113,
      "loss": 3.3246,
      "step": 38415
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8736820220947266,
      "learning_rate": 0.000559775913476321,
      "loss": 3.0285,
      "step": 38416
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6322890520095825,
      "learning_rate": 0.0005597738674224335,
      "loss": 3.0302,
      "step": 38417
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6485525369644165,
      "learning_rate": 0.0005597718213202493,
      "loss": 3.0742,
      "step": 38418
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4017078876495361,
      "learning_rate": 0.0005597697751697684,
      "loss": 3.127,
      "step": 38419
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5710127353668213,
      "learning_rate": 0.0005597677289709915,
      "loss": 2.9908,
      "step": 38420
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5617750883102417,
      "learning_rate": 0.0005597656827239188,
      "loss": 3.1533,
      "step": 38421
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5050413608551025,
      "learning_rate": 0.0005597636364285508,
      "loss": 3.2705,
      "step": 38422
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.800231695175171,
      "learning_rate": 0.0005597615900848878,
      "loss": 3.0995,
      "step": 38423
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.592451572418213,
      "learning_rate": 0.0005597595436929303,
      "loss": 3.26,
      "step": 38424
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4656810760498047,
      "learning_rate": 0.0005597574972526785,
      "loss": 2.8721,
      "step": 38425
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5687371492385864,
      "learning_rate": 0.0005597554507641329,
      "loss": 2.9271,
      "step": 38426
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5745247602462769,
      "learning_rate": 0.000559753404227294,
      "loss": 2.9647,
      "step": 38427
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4292694330215454,
      "learning_rate": 0.0005597513576421619,
      "loss": 2.9717,
      "step": 38428
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6059563159942627,
      "learning_rate": 0.0005597493110087371,
      "loss": 2.9804,
      "step": 38429
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.44246244430542,
      "learning_rate": 0.00055974726432702,
      "loss": 3.0507,
      "step": 38430
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3344752788543701,
      "learning_rate": 0.0005597452175970111,
      "loss": 2.9816,
      "step": 38431
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.249075174331665,
      "learning_rate": 0.0005597431708187105,
      "loss": 3.0862,
      "step": 38432
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.177284002304077,
      "learning_rate": 0.0005597411239921189,
      "loss": 3.0103,
      "step": 38433
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4080729484558105,
      "learning_rate": 0.0005597390771172365,
      "loss": 3.1298,
      "step": 38434
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1115617752075195,
      "learning_rate": 0.0005597370301940637,
      "loss": 3.0678,
      "step": 38435
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4730900526046753,
      "learning_rate": 0.0005597349832226008,
      "loss": 2.992,
      "step": 38436
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5073260068893433,
      "learning_rate": 0.0005597329362028483,
      "loss": 3.0641,
      "step": 38437
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4991384744644165,
      "learning_rate": 0.0005597308891348065,
      "loss": 3.1573,
      "step": 38438
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.722797155380249,
      "learning_rate": 0.0005597288420184759,
      "loss": 3.2072,
      "step": 38439
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5296574831008911,
      "learning_rate": 0.0005597267948538568,
      "loss": 2.7688,
      "step": 38440
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5027669668197632,
      "learning_rate": 0.0005597247476409494,
      "loss": 2.7713,
      "step": 38441
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4467157125473022,
      "learning_rate": 0.0005597227003797544,
      "loss": 3.199,
      "step": 38442
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.572595238685608,
      "learning_rate": 0.0005597206530702722,
      "loss": 2.9456,
      "step": 38443
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4431416988372803,
      "learning_rate": 0.0005597186057125029,
      "loss": 3.1072,
      "step": 38444
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.3044159412384033,
      "learning_rate": 0.000559716558306447,
      "loss": 3.2615,
      "step": 38445
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4787094593048096,
      "learning_rate": 0.0005597145108521049,
      "loss": 3.1552,
      "step": 38446
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6887134313583374,
      "learning_rate": 0.000559712463349477,
      "loss": 3.2577,
      "step": 38447
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5573554039001465,
      "learning_rate": 0.0005597104157985636,
      "loss": 3.2601,
      "step": 38448
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4743741750717163,
      "learning_rate": 0.0005597083681993652,
      "loss": 3.1871,
      "step": 38449
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4179940223693848,
      "learning_rate": 0.000559706320551882,
      "loss": 2.8915,
      "step": 38450
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8178882598876953,
      "learning_rate": 0.0005597042728561145,
      "loss": 3.1222,
      "step": 38451
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4279590845108032,
      "learning_rate": 0.0005597022251120631,
      "loss": 3.0282,
      "step": 38452
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6243857145309448,
      "learning_rate": 0.0005597001773197282,
      "loss": 3.2366,
      "step": 38453
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5209357738494873,
      "learning_rate": 0.0005596981294791102,
      "loss": 2.9868,
      "step": 38454
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.321143627166748,
      "learning_rate": 0.0005596960815902093,
      "loss": 3.1441,
      "step": 38455
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.925295352935791,
      "learning_rate": 0.000559694033653026,
      "loss": 2.9315,
      "step": 38456
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.450181007385254,
      "learning_rate": 0.0005596919856675608,
      "loss": 2.8687,
      "step": 38457
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0476114749908447,
      "learning_rate": 0.0005596899376338138,
      "loss": 3.0346,
      "step": 38458
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4545472860336304,
      "learning_rate": 0.0005596878895517856,
      "loss": 3.1621,
      "step": 38459
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8052802085876465,
      "learning_rate": 0.0005596858414214765,
      "loss": 3.1139,
      "step": 38460
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2445484399795532,
      "learning_rate": 0.000559683793242887,
      "loss": 2.9906,
      "step": 38461
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.697459101676941,
      "learning_rate": 0.0005596817450160173,
      "loss": 3.1869,
      "step": 38462
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.947739839553833,
      "learning_rate": 0.0005596796967408679,
      "loss": 2.8413,
      "step": 38463
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3481945991516113,
      "learning_rate": 0.0005596776484174391,
      "loss": 2.9921,
      "step": 38464
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4724647998809814,
      "learning_rate": 0.0005596756000457312,
      "loss": 2.9915,
      "step": 38465
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3944863080978394,
      "learning_rate": 0.0005596735516257449,
      "loss": 3.0205,
      "step": 38466
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.941691279411316,
      "learning_rate": 0.0005596715031574803,
      "loss": 2.9763,
      "step": 38467
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.241982936859131,
      "learning_rate": 0.0005596694546409379,
      "loss": 2.8523,
      "step": 38468
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.604317307472229,
      "learning_rate": 0.0005596674060761181,
      "loss": 2.7775,
      "step": 38469
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6785144805908203,
      "learning_rate": 0.0005596653574630211,
      "loss": 2.9704,
      "step": 38470
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7206556797027588,
      "learning_rate": 0.0005596633088016475,
      "loss": 2.8596,
      "step": 38471
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6047773361206055,
      "learning_rate": 0.0005596612600919976,
      "loss": 3.2443,
      "step": 38472
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5307719707489014,
      "learning_rate": 0.0005596592113340717,
      "loss": 2.9434,
      "step": 38473
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.012955665588379,
      "learning_rate": 0.0005596571625278703,
      "loss": 3.1212,
      "step": 38474
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2730010747909546,
      "learning_rate": 0.0005596551136733937,
      "loss": 2.8656,
      "step": 38475
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5462726354599,
      "learning_rate": 0.0005596530647706423,
      "loss": 3.0472,
      "step": 38476
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5228676795959473,
      "learning_rate": 0.0005596510158196164,
      "loss": 3.2106,
      "step": 38477
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.09255313873291,
      "learning_rate": 0.0005596489668203167,
      "loss": 3.2154,
      "step": 38478
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5139703750610352,
      "learning_rate": 0.0005596469177727432,
      "loss": 3.1643,
      "step": 38479
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.8541409969329834,
      "learning_rate": 0.0005596448686768966,
      "loss": 3.0565,
      "step": 38480
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.540616273880005,
      "learning_rate": 0.000559642819532777,
      "loss": 3.0583,
      "step": 38481
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5735903978347778,
      "learning_rate": 0.0005596407703403848,
      "loss": 2.7869,
      "step": 38482
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1113991737365723,
      "learning_rate": 0.0005596387210997206,
      "loss": 3.1186,
      "step": 38483
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9038889408111572,
      "learning_rate": 0.0005596366718107846,
      "loss": 2.9777,
      "step": 38484
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.082531452178955,
      "learning_rate": 0.0005596346224735773,
      "loss": 3.0396,
      "step": 38485
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9236502647399902,
      "learning_rate": 0.000559632573088099,
      "loss": 3.04,
      "step": 38486
    },
    {
      "epoch": 0.5,
      "grad_norm": 4.926880836486816,
      "learning_rate": 0.0005596305236543501,
      "loss": 2.9258,
      "step": 38487
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9221537113189697,
      "learning_rate": 0.000559628474172331,
      "loss": 3.1098,
      "step": 38488
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1037421226501465,
      "learning_rate": 0.0005596264246420421,
      "loss": 2.6369,
      "step": 38489
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.566056251525879,
      "learning_rate": 0.0005596243750634837,
      "loss": 2.8797,
      "step": 38490
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.056201934814453,
      "learning_rate": 0.0005596223254366563,
      "loss": 3.0039,
      "step": 38491
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4561148881912231,
      "learning_rate": 0.0005596202757615601,
      "loss": 3.0783,
      "step": 38492
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7689725160598755,
      "learning_rate": 0.0005596182260381957,
      "loss": 3.0256,
      "step": 38493
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.380530595779419,
      "learning_rate": 0.0005596161762665633,
      "loss": 3.1128,
      "step": 38494
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.0871152877807617,
      "learning_rate": 0.0005596141264466634,
      "loss": 2.8968,
      "step": 38495
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7624285221099854,
      "learning_rate": 0.0005596120765784962,
      "loss": 3.1609,
      "step": 38496
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9929172992706299,
      "learning_rate": 0.0005596100266620624,
      "loss": 3.2294,
      "step": 38497
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.5842971801757812,
      "learning_rate": 0.000559607976697362,
      "loss": 3.0264,
      "step": 38498
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.2637956142425537,
      "learning_rate": 0.0005596059266843957,
      "loss": 2.9001,
      "step": 38499
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6702395677566528,
      "learning_rate": 0.0005596038766231637,
      "loss": 2.9627,
      "step": 38500
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3562005758285522,
      "learning_rate": 0.0005596018265136665,
      "loss": 2.9422,
      "step": 38501
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.9345593452453613,
      "learning_rate": 0.0005595997763559044,
      "loss": 3.3395,
      "step": 38502
    },
    {
      "epoch": 0.5,
      "grad_norm": 4.31035041809082,
      "learning_rate": 0.0005595977261498779,
      "loss": 3.1252,
      "step": 38503
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8694757223129272,
      "learning_rate": 0.0005595956758955871,
      "loss": 3.2241,
      "step": 38504
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3564265966415405,
      "learning_rate": 0.0005595936255930327,
      "loss": 3.0468,
      "step": 38505
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.3789353370666504,
      "learning_rate": 0.0005595915752422149,
      "loss": 2.8418,
      "step": 38506
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4988298416137695,
      "learning_rate": 0.0005595895248431341,
      "loss": 3.2025,
      "step": 38507
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6721670627593994,
      "learning_rate": 0.0005595874743957907,
      "loss": 3.2416,
      "step": 38508
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.873767614364624,
      "learning_rate": 0.0005595854239001851,
      "loss": 3.0273,
      "step": 38509
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.512080192565918,
      "learning_rate": 0.0005595833733563176,
      "loss": 3.1677,
      "step": 38510
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1779210567474365,
      "learning_rate": 0.0005595813227641888,
      "loss": 3.2797,
      "step": 38511
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.447941541671753,
      "learning_rate": 0.0005595792721237988,
      "loss": 3.0312,
      "step": 38512
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4719305038452148,
      "learning_rate": 0.0005595772214351482,
      "loss": 3.0285,
      "step": 38513
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.592334032058716,
      "learning_rate": 0.0005595751706982372,
      "loss": 2.7221,
      "step": 38514
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.7191708087921143,
      "learning_rate": 0.0005595731199130664,
      "loss": 3.288,
      "step": 38515
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9709439277648926,
      "learning_rate": 0.000559571069079636,
      "loss": 3.1204,
      "step": 38516
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.191147804260254,
      "learning_rate": 0.0005595690181979464,
      "loss": 3.1869,
      "step": 38517
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.746206283569336,
      "learning_rate": 0.0005595669672679982,
      "loss": 3.0105,
      "step": 38518
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7229843139648438,
      "learning_rate": 0.0005595649162897915,
      "loss": 2.9711,
      "step": 38519
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5322633981704712,
      "learning_rate": 0.0005595628652633266,
      "loss": 2.9931,
      "step": 38520
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.2796499729156494,
      "learning_rate": 0.0005595608141886043,
      "loss": 2.768,
      "step": 38521
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.711400270462036,
      "learning_rate": 0.0005595587630656245,
      "loss": 3.1103,
      "step": 38522
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.390092134475708,
      "learning_rate": 0.0005595567118943881,
      "loss": 3.1723,
      "step": 38523
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.394167184829712,
      "learning_rate": 0.000559554660674895,
      "loss": 3.2258,
      "step": 38524
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8713003396987915,
      "learning_rate": 0.0005595526094071459,
      "loss": 3.1735,
      "step": 38525
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.6864635944366455,
      "learning_rate": 0.0005595505580911411,
      "loss": 2.9281,
      "step": 38526
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6161878108978271,
      "learning_rate": 0.0005595485067268809,
      "loss": 3.1656,
      "step": 38527
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.066664934158325,
      "learning_rate": 0.0005595464553143657,
      "loss": 3.2534,
      "step": 38528
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.954460620880127,
      "learning_rate": 0.0005595444038535959,
      "loss": 3.0872,
      "step": 38529
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4553097486495972,
      "learning_rate": 0.000559542352344572,
      "loss": 2.9021,
      "step": 38530
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3620680570602417,
      "learning_rate": 0.0005595403007872942,
      "loss": 3.0301,
      "step": 38531
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.840427279472351,
      "learning_rate": 0.0005595382491817631,
      "loss": 2.9978,
      "step": 38532
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8112760782241821,
      "learning_rate": 0.0005595361975279788,
      "loss": 3.0523,
      "step": 38533
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5111699104309082,
      "learning_rate": 0.0005595341458259418,
      "loss": 3.1226,
      "step": 38534
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.391079306602478,
      "learning_rate": 0.0005595320940756526,
      "loss": 3.1769,
      "step": 38535
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.6355254650115967,
      "learning_rate": 0.0005595300422771113,
      "loss": 2.9593,
      "step": 38536
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4290186166763306,
      "learning_rate": 0.0005595279904303186,
      "loss": 3.073,
      "step": 38537
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4048641920089722,
      "learning_rate": 0.0005595259385352747,
      "loss": 3.0115,
      "step": 38538
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2975231409072876,
      "learning_rate": 0.0005595238865919801,
      "loss": 3.2021,
      "step": 38539
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4180047512054443,
      "learning_rate": 0.0005595218346004352,
      "loss": 2.9195,
      "step": 38540
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4417818784713745,
      "learning_rate": 0.0005595197825606402,
      "loss": 3.2694,
      "step": 38541
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5194451808929443,
      "learning_rate": 0.0005595177304725954,
      "loss": 3.1731,
      "step": 38542
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5417969226837158,
      "learning_rate": 0.0005595156783363015,
      "loss": 3.0784,
      "step": 38543
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3864574432373047,
      "learning_rate": 0.0005595136261517588,
      "loss": 2.9642,
      "step": 38544
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7707024812698364,
      "learning_rate": 0.0005595115739189675,
      "loss": 3.1369,
      "step": 38545
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8923624753952026,
      "learning_rate": 0.0005595095216379283,
      "loss": 3.0854,
      "step": 38546
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.607229471206665,
      "learning_rate": 0.0005595074693086412,
      "loss": 2.7868,
      "step": 38547
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8209667205810547,
      "learning_rate": 0.0005595054169311068,
      "loss": 2.8966,
      "step": 38548
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5970916748046875,
      "learning_rate": 0.0005595033645053254,
      "loss": 3.1156,
      "step": 38549
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.478259563446045,
      "learning_rate": 0.0005595013120312975,
      "loss": 2.7837,
      "step": 38550
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4611008167266846,
      "learning_rate": 0.0005594992595090233,
      "loss": 3.1972,
      "step": 38551
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1600542068481445,
      "learning_rate": 0.0005594972069385034,
      "loss": 2.9377,
      "step": 38552
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6513643264770508,
      "learning_rate": 0.0005594951543197381,
      "loss": 3.1619,
      "step": 38553
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.667940139770508,
      "learning_rate": 0.0005594931016527277,
      "loss": 2.9086,
      "step": 38554
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.323324203491211,
      "learning_rate": 0.0005594910489374725,
      "loss": 2.919,
      "step": 38555
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7657418251037598,
      "learning_rate": 0.0005594889961739732,
      "loss": 3.2089,
      "step": 38556
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.691365122795105,
      "learning_rate": 0.00055948694336223,
      "loss": 3.04,
      "step": 38557
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4112449884414673,
      "learning_rate": 0.0005594848905022432,
      "loss": 3.4595,
      "step": 38558
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.131856679916382,
      "learning_rate": 0.0005594828375940133,
      "loss": 3.0101,
      "step": 38559
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.41108238697052,
      "learning_rate": 0.0005594807846375406,
      "loss": 2.7076,
      "step": 38560
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5994622707366943,
      "learning_rate": 0.0005594787316328255,
      "loss": 2.9466,
      "step": 38561
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.53888738155365,
      "learning_rate": 0.0005594766785798685,
      "loss": 3.2836,
      "step": 38562
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.2040040493011475,
      "learning_rate": 0.0005594746254786699,
      "loss": 3.2206,
      "step": 38563
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7905592918395996,
      "learning_rate": 0.00055947257232923,
      "loss": 2.9301,
      "step": 38564
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.797324776649475,
      "learning_rate": 0.0005594705191315491,
      "loss": 3.1258,
      "step": 38565
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.6045541763305664,
      "learning_rate": 0.0005594684658856281,
      "loss": 3.1038,
      "step": 38566
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.098234176635742,
      "learning_rate": 0.0005594664125914667,
      "loss": 3.1975,
      "step": 38567
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5059415102005005,
      "learning_rate": 0.0005594643592490658,
      "loss": 2.8268,
      "step": 38568
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.830102801322937,
      "learning_rate": 0.0005594623058584254,
      "loss": 3.1328,
      "step": 38569
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8840034008026123,
      "learning_rate": 0.0005594602524195461,
      "loss": 2.98,
      "step": 38570
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8976943492889404,
      "learning_rate": 0.0005594581989324283,
      "loss": 3.0819,
      "step": 38571
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4715163707733154,
      "learning_rate": 0.0005594561453970723,
      "loss": 2.8765,
      "step": 38572
    },
    {
      "epoch": 0.5,
      "grad_norm": 4.623194694519043,
      "learning_rate": 0.0005594540918134785,
      "loss": 2.8304,
      "step": 38573
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9016815423965454,
      "learning_rate": 0.0005594520381816473,
      "loss": 3.2409,
      "step": 38574
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3636771440505981,
      "learning_rate": 0.0005594499845015791,
      "loss": 2.9654,
      "step": 38575
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.75614595413208,
      "learning_rate": 0.0005594479307732742,
      "loss": 3.1096,
      "step": 38576
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.899025797843933,
      "learning_rate": 0.000559445876996733,
      "loss": 3.1062,
      "step": 38577
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.392482876777649,
      "learning_rate": 0.000559443823171956,
      "loss": 2.8844,
      "step": 38578
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4610435962677002,
      "learning_rate": 0.0005594417692989435,
      "loss": 3.1074,
      "step": 38579
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6467554569244385,
      "learning_rate": 0.0005594397153776958,
      "loss": 2.9744,
      "step": 38580
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7855725288391113,
      "learning_rate": 0.0005594376614082134,
      "loss": 3.3214,
      "step": 38581
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6061456203460693,
      "learning_rate": 0.0005594356073904967,
      "loss": 3.1643,
      "step": 38582
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9738123416900635,
      "learning_rate": 0.0005594335533245459,
      "loss": 3.0936,
      "step": 38583
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.097482681274414,
      "learning_rate": 0.0005594314992103615,
      "loss": 3.0713,
      "step": 38584
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.747044324874878,
      "learning_rate": 0.0005594294450479441,
      "loss": 2.9922,
      "step": 38585
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.018475294113159,
      "learning_rate": 0.0005594273908372937,
      "loss": 3.3376,
      "step": 38586
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.368340015411377,
      "learning_rate": 0.0005594253365784108,
      "loss": 3.0251,
      "step": 38587
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3610901832580566,
      "learning_rate": 0.0005594232822712959,
      "loss": 3.222,
      "step": 38588
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.041951894760132,
      "learning_rate": 0.0005594212279159493,
      "loss": 2.844,
      "step": 38589
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.704892873764038,
      "learning_rate": 0.0005594191735123714,
      "loss": 2.977,
      "step": 38590
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.548292875289917,
      "learning_rate": 0.0005594171190605626,
      "loss": 2.9538,
      "step": 38591
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2079944610595703,
      "learning_rate": 0.0005594150645605233,
      "loss": 3.0918,
      "step": 38592
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2629162073135376,
      "learning_rate": 0.0005594130100122538,
      "loss": 3.1649,
      "step": 38593
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6761081218719482,
      "learning_rate": 0.0005594109554157545,
      "loss": 2.7538,
      "step": 38594
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.424633264541626,
      "learning_rate": 0.0005594089007710259,
      "loss": 3.0495,
      "step": 38595
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5326480865478516,
      "learning_rate": 0.0005594068460780682,
      "loss": 3.1106,
      "step": 38596
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7314941883087158,
      "learning_rate": 0.0005594047913368819,
      "loss": 3.1016,
      "step": 38597
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7026982307434082,
      "learning_rate": 0.0005594027365474673,
      "loss": 2.9217,
      "step": 38598
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6622366905212402,
      "learning_rate": 0.0005594006817098249,
      "loss": 2.8197,
      "step": 38599
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4628759622573853,
      "learning_rate": 0.0005593986268239551,
      "loss": 2.8715,
      "step": 38600
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.89210844039917,
      "learning_rate": 0.000559396571889858,
      "loss": 3.1057,
      "step": 38601
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.015812873840332,
      "learning_rate": 0.0005593945169075342,
      "loss": 2.877,
      "step": 38602
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5079169273376465,
      "learning_rate": 0.0005593924618769842,
      "loss": 2.7539,
      "step": 38603
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.400619626045227,
      "learning_rate": 0.0005593904067982082,
      "loss": 3.1122,
      "step": 38604
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.026627540588379,
      "learning_rate": 0.0005593883516712066,
      "loss": 3.3048,
      "step": 38605
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.311532735824585,
      "learning_rate": 0.0005593862964959797,
      "loss": 2.9457,
      "step": 38606
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3118699789047241,
      "learning_rate": 0.0005593842412725281,
      "loss": 3.1303,
      "step": 38607
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.788430094718933,
      "learning_rate": 0.0005593821860008522,
      "loss": 2.7067,
      "step": 38608
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.444096326828003,
      "learning_rate": 0.0005593801306809521,
      "loss": 3.0566,
      "step": 38609
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2528407573699951,
      "learning_rate": 0.0005593780753128284,
      "loss": 3.2195,
      "step": 38610
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.534248948097229,
      "learning_rate": 0.0005593760198964814,
      "loss": 3.3414,
      "step": 38611
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.3610901832580566,
      "learning_rate": 0.0005593739644319116,
      "loss": 2.855,
      "step": 38612
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5172892808914185,
      "learning_rate": 0.000559371908919119,
      "loss": 3.139,
      "step": 38613
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.116353750228882,
      "learning_rate": 0.0005593698533581045,
      "loss": 3.0771,
      "step": 38614
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8577303886413574,
      "learning_rate": 0.0005593677977488682,
      "loss": 3.0798,
      "step": 38615
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5478506088256836,
      "learning_rate": 0.0005593657420914105,
      "loss": 2.9077,
      "step": 38616
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5928748846054077,
      "learning_rate": 0.0005593636863857319,
      "loss": 3.2519,
      "step": 38617
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.085869550704956,
      "learning_rate": 0.0005593616306318326,
      "loss": 3.2326,
      "step": 38618
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.9524195194244385,
      "learning_rate": 0.0005593595748297131,
      "loss": 3.0315,
      "step": 38619
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6380186080932617,
      "learning_rate": 0.0005593575189793737,
      "loss": 2.9734,
      "step": 38620
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1062471866607666,
      "learning_rate": 0.000559355463080815,
      "loss": 3.2539,
      "step": 38621
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.151426315307617,
      "learning_rate": 0.0005593534071340372,
      "loss": 3.0997,
      "step": 38622
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6508034467697144,
      "learning_rate": 0.0005593513511390405,
      "loss": 2.9141,
      "step": 38623
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.063814878463745,
      "learning_rate": 0.0005593492950958257,
      "loss": 3.0682,
      "step": 38624
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1566004753112793,
      "learning_rate": 0.0005593472390043929,
      "loss": 3.3543,
      "step": 38625
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6346311569213867,
      "learning_rate": 0.0005593451828647425,
      "loss": 2.8155,
      "step": 38626
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.544950246810913,
      "learning_rate": 0.0005593431266768751,
      "loss": 3.1406,
      "step": 38627
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.3596270084381104,
      "learning_rate": 0.0005593410704407907,
      "loss": 3.1357,
      "step": 38628
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4793987274169922,
      "learning_rate": 0.0005593390141564901,
      "loss": 2.9541,
      "step": 38629
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3492960929870605,
      "learning_rate": 0.0005593369578239734,
      "loss": 2.8188,
      "step": 38630
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9469448328018188,
      "learning_rate": 0.000559334901443241,
      "loss": 3.0424,
      "step": 38631
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5930516719818115,
      "learning_rate": 0.0005593328450142933,
      "loss": 3.1571,
      "step": 38632
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0329058170318604,
      "learning_rate": 0.0005593307885371309,
      "loss": 3.027,
      "step": 38633
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4346719980239868,
      "learning_rate": 0.0005593287320117539,
      "loss": 3.1439,
      "step": 38634
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4624803066253662,
      "learning_rate": 0.0005593266754381628,
      "loss": 3.0799,
      "step": 38635
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7642173767089844,
      "learning_rate": 0.000559324618816358,
      "loss": 3.1514,
      "step": 38636
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3321980237960815,
      "learning_rate": 0.0005593225621463399,
      "loss": 3.0321,
      "step": 38637
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0090596675872803,
      "learning_rate": 0.0005593205054281088,
      "loss": 3.2328,
      "step": 38638
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5110715627670288,
      "learning_rate": 0.0005593184486616652,
      "loss": 2.9419,
      "step": 38639
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.614030361175537,
      "learning_rate": 0.0005593163918470093,
      "loss": 2.9744,
      "step": 38640
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.8743913173675537,
      "learning_rate": 0.0005593143349841417,
      "loss": 3.0165,
      "step": 38641
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.185570478439331,
      "learning_rate": 0.0005593122780730625,
      "loss": 3.0403,
      "step": 38642
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4165568351745605,
      "learning_rate": 0.0005593102211137723,
      "loss": 3.0151,
      "step": 38643
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7166328430175781,
      "learning_rate": 0.0005593081641062714,
      "loss": 3.3605,
      "step": 38644
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.044271945953369,
      "learning_rate": 0.0005593061070505603,
      "loss": 2.9245,
      "step": 38645
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4746896028518677,
      "learning_rate": 0.0005593040499466393,
      "loss": 2.9046,
      "step": 38646
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.2377405166625977,
      "learning_rate": 0.0005593019927945088,
      "loss": 3.1045,
      "step": 38647
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2649611234664917,
      "learning_rate": 0.0005592999355941692,
      "loss": 3.3413,
      "step": 38648
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.002596139907837,
      "learning_rate": 0.0005592978783456208,
      "loss": 2.6996,
      "step": 38649
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.991869330406189,
      "learning_rate": 0.000559295821048864,
      "loss": 2.9213,
      "step": 38650
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5281380414962769,
      "learning_rate": 0.0005592937637038992,
      "loss": 3.139,
      "step": 38651
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.470276951789856,
      "learning_rate": 0.0005592917063107268,
      "loss": 3.1552,
      "step": 38652
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4758585691452026,
      "learning_rate": 0.0005592896488693473,
      "loss": 2.879,
      "step": 38653
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5957989692687988,
      "learning_rate": 0.0005592875913797608,
      "loss": 3.0486,
      "step": 38654
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.814843773841858,
      "learning_rate": 0.0005592855338419679,
      "loss": 2.8987,
      "step": 38655
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0236361026763916,
      "learning_rate": 0.000559283476255969,
      "loss": 2.9362,
      "step": 38656
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6754229068756104,
      "learning_rate": 0.0005592814186217643,
      "loss": 3.1315,
      "step": 38657
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.901867151260376,
      "learning_rate": 0.0005592793609393543,
      "loss": 2.9008,
      "step": 38658
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.7875680923461914,
      "learning_rate": 0.0005592773032087394,
      "loss": 2.9406,
      "step": 38659
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.483379602432251,
      "learning_rate": 0.00055927524542992,
      "loss": 2.8266,
      "step": 38660
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8359853029251099,
      "learning_rate": 0.0005592731876028964,
      "loss": 3.0951,
      "step": 38661
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.843871831893921,
      "learning_rate": 0.000559271129727669,
      "loss": 3.1415,
      "step": 38662
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.7168052196502686,
      "learning_rate": 0.0005592690718042382,
      "loss": 3.1049,
      "step": 38663
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3520257472991943,
      "learning_rate": 0.0005592670138326045,
      "loss": 2.9592,
      "step": 38664
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5206694602966309,
      "learning_rate": 0.000559264955812768,
      "loss": 3.4334,
      "step": 38665
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.047518253326416,
      "learning_rate": 0.0005592628977447293,
      "loss": 2.958,
      "step": 38666
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.832075595855713,
      "learning_rate": 0.0005592608396284888,
      "loss": 3.2567,
      "step": 38667
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9860130548477173,
      "learning_rate": 0.0005592587814640467,
      "loss": 3.0052,
      "step": 38668
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9650299549102783,
      "learning_rate": 0.0005592567232514036,
      "loss": 3.1081,
      "step": 38669
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.74435293674469,
      "learning_rate": 0.0005592546649905597,
      "loss": 3.3308,
      "step": 38670
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7785420417785645,
      "learning_rate": 0.0005592526066815156,
      "loss": 2.8204,
      "step": 38671
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4521030187606812,
      "learning_rate": 0.0005592505483242715,
      "loss": 2.9645,
      "step": 38672
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.316656470298767,
      "learning_rate": 0.0005592484899188278,
      "loss": 3.1019,
      "step": 38673
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4181365966796875,
      "learning_rate": 0.0005592464314651848,
      "loss": 3.1123,
      "step": 38674
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6104025840759277,
      "learning_rate": 0.0005592443729633431,
      "loss": 3.092,
      "step": 38675
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3929260969161987,
      "learning_rate": 0.000559242314413303,
      "loss": 3.1915,
      "step": 38676
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.505611538887024,
      "learning_rate": 0.0005592402558150648,
      "loss": 3.1607,
      "step": 38677
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8385772705078125,
      "learning_rate": 0.000559238197168629,
      "loss": 3.1427,
      "step": 38678
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9008218050003052,
      "learning_rate": 0.0005592361384739959,
      "loss": 3.2185,
      "step": 38679
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4966119527816772,
      "learning_rate": 0.0005592340797311659,
      "loss": 2.9173,
      "step": 38680
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9556201696395874,
      "learning_rate": 0.0005592320209401394,
      "loss": 2.8841,
      "step": 38681
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5923686027526855,
      "learning_rate": 0.0005592299621009168,
      "loss": 3.119,
      "step": 38682
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4836559295654297,
      "learning_rate": 0.0005592279032134984,
      "loss": 3.1405,
      "step": 38683
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6607258319854736,
      "learning_rate": 0.0005592258442778847,
      "loss": 2.7139,
      "step": 38684
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.4970221519470215,
      "learning_rate": 0.0005592237852940759,
      "loss": 3.1887,
      "step": 38685
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0775206089019775,
      "learning_rate": 0.0005592217262620727,
      "loss": 3.0983,
      "step": 38686
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6914030313491821,
      "learning_rate": 0.0005592196671818751,
      "loss": 3.1408,
      "step": 38687
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.5877346992492676,
      "learning_rate": 0.0005592176080534838,
      "loss": 3.1223,
      "step": 38688
    },
    {
      "epoch": 0.5,
      "grad_norm": 5.898531436920166,
      "learning_rate": 0.000559215548876899,
      "loss": 3.0478,
      "step": 38689
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.77090322971344,
      "learning_rate": 0.0005592134896521212,
      "loss": 3.0062,
      "step": 38690
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5795106887817383,
      "learning_rate": 0.0005592114303791506,
      "loss": 3.0593,
      "step": 38691
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.430063486099243,
      "learning_rate": 0.0005592093710579878,
      "loss": 2.9637,
      "step": 38692
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.302546739578247,
      "learning_rate": 0.0005592073116886331,
      "loss": 3.15,
      "step": 38693
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8403397798538208,
      "learning_rate": 0.0005592052522710867,
      "loss": 3.1398,
      "step": 38694
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5369820594787598,
      "learning_rate": 0.0005592031928053494,
      "loss": 3.1723,
      "step": 38695
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.948566436767578,
      "learning_rate": 0.0005592011332914211,
      "loss": 2.9357,
      "step": 38696
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5738962888717651,
      "learning_rate": 0.0005591990737293025,
      "loss": 2.8805,
      "step": 38697
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4898408651351929,
      "learning_rate": 0.000559197014118994,
      "loss": 2.9275,
      "step": 38698
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4589687585830688,
      "learning_rate": 0.0005591949544604958,
      "loss": 2.9588,
      "step": 38699
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6142030954360962,
      "learning_rate": 0.0005591928947538084,
      "loss": 3.0209,
      "step": 38700
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2590872049331665,
      "learning_rate": 0.0005591908349989321,
      "loss": 2.9766,
      "step": 38701
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.396528720855713,
      "learning_rate": 0.0005591887751958673,
      "loss": 3.0115,
      "step": 38702
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7403990030288696,
      "learning_rate": 0.0005591867153446145,
      "loss": 2.94,
      "step": 38703
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5225188732147217,
      "learning_rate": 0.0005591846554451739,
      "loss": 3.1317,
      "step": 38704
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0190112590789795,
      "learning_rate": 0.0005591825954975461,
      "loss": 3.2377,
      "step": 38705
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4228376150131226,
      "learning_rate": 0.0005591805355017312,
      "loss": 2.9806,
      "step": 38706
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.51944100856781,
      "learning_rate": 0.00055917847545773,
      "loss": 3.0403,
      "step": 38707
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9783474206924438,
      "learning_rate": 0.0005591764153655424,
      "loss": 2.9404,
      "step": 38708
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9857598543167114,
      "learning_rate": 0.0005591743552251691,
      "loss": 3.1496,
      "step": 38709
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7399095296859741,
      "learning_rate": 0.0005591722950366105,
      "loss": 3.1822,
      "step": 38710
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9414154291152954,
      "learning_rate": 0.0005591702347998667,
      "loss": 2.8222,
      "step": 38711
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.394258499145508,
      "learning_rate": 0.0005591681745149384,
      "loss": 3.1835,
      "step": 38712
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6523962020874023,
      "learning_rate": 0.0005591661141818257,
      "loss": 3.0503,
      "step": 38713
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4686726331710815,
      "learning_rate": 0.0005591640538005292,
      "loss": 3.0663,
      "step": 38714
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.1718950271606445,
      "learning_rate": 0.0005591619933710492,
      "loss": 3.0648,
      "step": 38715
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.4604451656341553,
      "learning_rate": 0.0005591599328933861,
      "loss": 2.965,
      "step": 38716
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7620753049850464,
      "learning_rate": 0.0005591578723675402,
      "loss": 2.7174,
      "step": 38717
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.478487253189087,
      "learning_rate": 0.0005591558117935121,
      "loss": 3.2094,
      "step": 38718
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.8027641773223877,
      "learning_rate": 0.000559153751171302,
      "loss": 3.3123,
      "step": 38719
    },
    {
      "epoch": 0.5,
      "grad_norm": 3.272526979446411,
      "learning_rate": 0.0005591516905009103,
      "loss": 3.2303,
      "step": 38720
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.982496738433838,
      "learning_rate": 0.0005591496297823374,
      "loss": 2.9543,
      "step": 38721
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5749915838241577,
      "learning_rate": 0.0005591475690155836,
      "loss": 3.0097,
      "step": 38722
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.321989059448242,
      "learning_rate": 0.0005591455082006496,
      "loss": 3.0246,
      "step": 38723
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.412580728530884,
      "learning_rate": 0.0005591434473375354,
      "loss": 3.1189,
      "step": 38724
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5609606504440308,
      "learning_rate": 0.0005591413864262416,
      "loss": 3.3844,
      "step": 38725
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6670973300933838,
      "learning_rate": 0.0005591393254667685,
      "loss": 3.0488,
      "step": 38726
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.891583204269409,
      "learning_rate": 0.0005591372644591165,
      "loss": 3.2593,
      "step": 38727
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7034015655517578,
      "learning_rate": 0.0005591352034032861,
      "loss": 2.956,
      "step": 38728
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4980934858322144,
      "learning_rate": 0.0005591331422992775,
      "loss": 2.9867,
      "step": 38729
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.7120397090911865,
      "learning_rate": 0.000559131081147091,
      "loss": 3.0763,
      "step": 38730
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5327273607254028,
      "learning_rate": 0.0005591290199467273,
      "loss": 3.267,
      "step": 38731
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3808304071426392,
      "learning_rate": 0.0005591269586981867,
      "loss": 3.0073,
      "step": 38732
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3463610410690308,
      "learning_rate": 0.0005591248974014694,
      "loss": 3.0654,
      "step": 38733
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4697853326797485,
      "learning_rate": 0.0005591228360565758,
      "loss": 3.0937,
      "step": 38734
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2919495105743408,
      "learning_rate": 0.0005591207746635065,
      "loss": 3.0581,
      "step": 38735
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6521950960159302,
      "learning_rate": 0.0005591187132222617,
      "loss": 3.155,
      "step": 38736
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6666260957717896,
      "learning_rate": 0.0005591166517328419,
      "loss": 3.291,
      "step": 38737
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5535930395126343,
      "learning_rate": 0.0005591145901952474,
      "loss": 2.8974,
      "step": 38738
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.502751350402832,
      "learning_rate": 0.0005591125286094786,
      "loss": 3.0062,
      "step": 38739
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5867515802383423,
      "learning_rate": 0.0005591104669755359,
      "loss": 3.1065,
      "step": 38740
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6341627836227417,
      "learning_rate": 0.0005591084052934196,
      "loss": 2.9932,
      "step": 38741
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7269182205200195,
      "learning_rate": 0.0005591063435631303,
      "loss": 2.8521,
      "step": 38742
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4022796154022217,
      "learning_rate": 0.0005591042817846681,
      "loss": 3.0953,
      "step": 38743
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7247427701950073,
      "learning_rate": 0.0005591022199580336,
      "loss": 3.055,
      "step": 38744
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0159380435943604,
      "learning_rate": 0.000559100158083227,
      "loss": 2.8899,
      "step": 38745
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7909770011901855,
      "learning_rate": 0.000559098096160249,
      "loss": 3.0488,
      "step": 38746
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5208559036254883,
      "learning_rate": 0.0005590960341890996,
      "loss": 2.911,
      "step": 38747
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5041275024414062,
      "learning_rate": 0.0005590939721697793,
      "loss": 3.0316,
      "step": 38748
    },
    {
      "epoch": 0.5,
      "grad_norm": 2.0376226902008057,
      "learning_rate": 0.0005590919101022887,
      "loss": 3.0074,
      "step": 38749
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.9121168851852417,
      "learning_rate": 0.000559089847986628,
      "loss": 3.1978,
      "step": 38750
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.942102313041687,
      "learning_rate": 0.0005590877858227976,
      "loss": 2.918,
      "step": 38751
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5897103548049927,
      "learning_rate": 0.0005590857236107978,
      "loss": 3.2703,
      "step": 38752
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6829222440719604,
      "learning_rate": 0.0005590836613506291,
      "loss": 2.9145,
      "step": 38753
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6213337182998657,
      "learning_rate": 0.000559081599042292,
      "loss": 3.0913,
      "step": 38754
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4486918449401855,
      "learning_rate": 0.0005590795366857866,
      "loss": 2.9241,
      "step": 38755
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.297930121421814,
      "learning_rate": 0.0005590774742811135,
      "loss": 2.9478,
      "step": 38756
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.76270592212677,
      "learning_rate": 0.0005590754118282729,
      "loss": 3.0197,
      "step": 38757
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4686462879180908,
      "learning_rate": 0.0005590733493272653,
      "loss": 2.7173,
      "step": 38758
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6434177160263062,
      "learning_rate": 0.0005590712867780913,
      "loss": 2.8936,
      "step": 38759
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5931185483932495,
      "learning_rate": 0.0005590692241807507,
      "loss": 2.9309,
      "step": 38760
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.543192982673645,
      "learning_rate": 0.0005590671615352445,
      "loss": 2.8827,
      "step": 38761
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.2467796802520752,
      "learning_rate": 0.0005590650988415727,
      "loss": 2.9704,
      "step": 38762
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.353344440460205,
      "learning_rate": 0.000559063036099736,
      "loss": 2.8493,
      "step": 38763
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5863769054412842,
      "learning_rate": 0.0005590609733097344,
      "loss": 2.9297,
      "step": 38764
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.90729820728302,
      "learning_rate": 0.0005590589104715686,
      "loss": 3.1288,
      "step": 38765
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.6721209287643433,
      "learning_rate": 0.0005590568475852387,
      "loss": 3.0166,
      "step": 38766
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5192421674728394,
      "learning_rate": 0.0005590547846507453,
      "loss": 3.1491,
      "step": 38767
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.420247197151184,
      "learning_rate": 0.0005590527216680888,
      "loss": 3.2725,
      "step": 38768
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7786030769348145,
      "learning_rate": 0.0005590506586372695,
      "loss": 2.7163,
      "step": 38769
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.680167555809021,
      "learning_rate": 0.0005590485955582877,
      "loss": 3.1464,
      "step": 38770
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5326783657073975,
      "learning_rate": 0.000559046532431144,
      "loss": 2.9508,
      "step": 38771
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5362530946731567,
      "learning_rate": 0.0005590444692558386,
      "loss": 2.8199,
      "step": 38772
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5314176082611084,
      "learning_rate": 0.0005590424060323718,
      "loss": 3.3176,
      "step": 38773
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3365651369094849,
      "learning_rate": 0.0005590403427607443,
      "loss": 3.2439,
      "step": 38774
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.735142469406128,
      "learning_rate": 0.0005590382794409563,
      "loss": 3.0463,
      "step": 38775
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.604727864265442,
      "learning_rate": 0.0005590362160730082,
      "loss": 2.8854,
      "step": 38776
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.7405738830566406,
      "learning_rate": 0.0005590341526569003,
      "loss": 2.9321,
      "step": 38777
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4027844667434692,
      "learning_rate": 0.000559032089192633,
      "loss": 3.0313,
      "step": 38778
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.359079122543335,
      "learning_rate": 0.0005590300256802068,
      "loss": 3.16,
      "step": 38779
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.3628720045089722,
      "learning_rate": 0.0005590279621196222,
      "loss": 3.1241,
      "step": 38780
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.4320029020309448,
      "learning_rate": 0.0005590258985108793,
      "loss": 3.1962,
      "step": 38781
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5217069387435913,
      "learning_rate": 0.0005590238348539786,
      "loss": 3.1455,
      "step": 38782
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5936981439590454,
      "learning_rate": 0.0005590217711489204,
      "loss": 3.0396,
      "step": 38783
    },
    {
      "epoch": 0.5,
      "grad_norm": 1.5548529624938965,
      "learning_rate": 0.0005590197073957053,
      "loss": 3.2185,
      "step": 38784
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.702880620956421,
      "learning_rate": 0.0005590176435943335,
      "loss": 3.0668,
      "step": 38785
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.594144582748413,
      "learning_rate": 0.0005590155797448054,
      "loss": 3.2429,
      "step": 38786
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8272372484207153,
      "learning_rate": 0.0005590135158471213,
      "loss": 3.0452,
      "step": 38787
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5807939767837524,
      "learning_rate": 0.0005590114519012819,
      "loss": 3.2154,
      "step": 38788
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0549192428588867,
      "learning_rate": 0.0005590093879072873,
      "loss": 3.204,
      "step": 38789
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8989520072937012,
      "learning_rate": 0.000559007323865138,
      "loss": 2.9935,
      "step": 38790
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5153486728668213,
      "learning_rate": 0.0005590052597748342,
      "loss": 3.0335,
      "step": 38791
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4688133001327515,
      "learning_rate": 0.0005590031956363766,
      "loss": 2.951,
      "step": 38792
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.5000901222229004,
      "learning_rate": 0.0005590011314497654,
      "loss": 3.0831,
      "step": 38793
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1093740463256836,
      "learning_rate": 0.0005589990672150009,
      "loss": 3.0765,
      "step": 38794
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8137613534927368,
      "learning_rate": 0.0005589970029320837,
      "loss": 3.1292,
      "step": 38795
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.9032037258148193,
      "learning_rate": 0.000558994938601014,
      "loss": 3.0757,
      "step": 38796
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4917138814926147,
      "learning_rate": 0.0005589928742217923,
      "loss": 3.1905,
      "step": 38797
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7052583694458008,
      "learning_rate": 0.0005589908097944189,
      "loss": 2.8701,
      "step": 38798
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8608988523483276,
      "learning_rate": 0.0005589887453188942,
      "loss": 3.2384,
      "step": 38799
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.69342041015625,
      "learning_rate": 0.0005589866807952186,
      "loss": 3.3037,
      "step": 38800
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.54961097240448,
      "learning_rate": 0.0005589846162233925,
      "loss": 2.9837,
      "step": 38801
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7284157276153564,
      "learning_rate": 0.0005589825516034163,
      "loss": 3.1783,
      "step": 38802
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7221176624298096,
      "learning_rate": 0.0005589804869352904,
      "loss": 3.0179,
      "step": 38803
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9249804019927979,
      "learning_rate": 0.0005589784222190152,
      "loss": 3.0849,
      "step": 38804
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8445425033569336,
      "learning_rate": 0.0005589763574545908,
      "loss": 2.8158,
      "step": 38805
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.455697774887085,
      "learning_rate": 0.0005589742926420179,
      "loss": 2.9418,
      "step": 38806
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6678521633148193,
      "learning_rate": 0.0005589722277812968,
      "loss": 3.0583,
      "step": 38807
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6610740423202515,
      "learning_rate": 0.0005589701628724279,
      "loss": 3.1295,
      "step": 38808
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4397029876708984,
      "learning_rate": 0.0005589680979154115,
      "loss": 2.923,
      "step": 38809
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.8813798427581787,
      "learning_rate": 0.0005589660329102481,
      "loss": 3.2507,
      "step": 38810
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6197055578231812,
      "learning_rate": 0.000558963967856938,
      "loss": 3.0907,
      "step": 38811
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4781566858291626,
      "learning_rate": 0.0005589619027554817,
      "loss": 3.0812,
      "step": 38812
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6168311834335327,
      "learning_rate": 0.0005589598376058793,
      "loss": 3.1033,
      "step": 38813
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5471017360687256,
      "learning_rate": 0.0005589577724081315,
      "loss": 3.021,
      "step": 38814
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.3004331588745117,
      "learning_rate": 0.0005589557071622386,
      "loss": 3.1314,
      "step": 38815
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.393013596534729,
      "learning_rate": 0.0005589536418682009,
      "loss": 3.0409,
      "step": 38816
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5324982404708862,
      "learning_rate": 0.0005589515765260188,
      "loss": 2.9947,
      "step": 38817
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9898858070373535,
      "learning_rate": 0.0005589495111356927,
      "loss": 3.1184,
      "step": 38818
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5271209478378296,
      "learning_rate": 0.000558947445697223,
      "loss": 3.062,
      "step": 38819
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3307878971099854,
      "learning_rate": 0.0005589453802106102,
      "loss": 3.3531,
      "step": 38820
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4999425411224365,
      "learning_rate": 0.0005589433146758544,
      "loss": 2.9556,
      "step": 38821
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3753141164779663,
      "learning_rate": 0.0005589412490929563,
      "loss": 3.1755,
      "step": 38822
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.227709174156189,
      "learning_rate": 0.000558939183461916,
      "loss": 3.2042,
      "step": 38823
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.40984046459198,
      "learning_rate": 0.0005589371177827341,
      "loss": 3.1511,
      "step": 38824
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.044368267059326,
      "learning_rate": 0.0005589350520554109,
      "loss": 2.9046,
      "step": 38825
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7269501686096191,
      "learning_rate": 0.0005589329862799468,
      "loss": 3.1958,
      "step": 38826
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8764679431915283,
      "learning_rate": 0.0005589309204563421,
      "loss": 3.1353,
      "step": 38827
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8057713508605957,
      "learning_rate": 0.0005589288545845973,
      "loss": 3.4374,
      "step": 38828
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8521367311477661,
      "learning_rate": 0.0005589267886647127,
      "loss": 3.087,
      "step": 38829
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.528868556022644,
      "learning_rate": 0.0005589247226966888,
      "loss": 3.3641,
      "step": 38830
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.8564164638519287,
      "learning_rate": 0.0005589226566805258,
      "loss": 3.1312,
      "step": 38831
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.753710389137268,
      "learning_rate": 0.0005589205906162243,
      "loss": 3.185,
      "step": 38832
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2448616027832031,
      "learning_rate": 0.0005589185245037845,
      "loss": 3.124,
      "step": 38833
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.943238615989685,
      "learning_rate": 0.0005589164583432069,
      "loss": 2.8318,
      "step": 38834
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.6050119400024414,
      "learning_rate": 0.0005589143921344919,
      "loss": 2.7476,
      "step": 38835
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9087615013122559,
      "learning_rate": 0.0005589123258776397,
      "loss": 3.0475,
      "step": 38836
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.6585440635681152,
      "learning_rate": 0.0005589102595726508,
      "loss": 3.1112,
      "step": 38837
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1928906440734863,
      "learning_rate": 0.0005589081932195257,
      "loss": 3.1552,
      "step": 38838
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5409806966781616,
      "learning_rate": 0.0005589061268182647,
      "loss": 2.9143,
      "step": 38839
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.561050534248352,
      "learning_rate": 0.000558904060368868,
      "loss": 2.7985,
      "step": 38840
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.144643783569336,
      "learning_rate": 0.0005589019938713364,
      "loss": 2.911,
      "step": 38841
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5383068323135376,
      "learning_rate": 0.0005588999273256699,
      "loss": 3.0398,
      "step": 38842
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.8191332817077637,
      "learning_rate": 0.000558897860731869,
      "loss": 3.0438,
      "step": 38843
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.321824550628662,
      "learning_rate": 0.000558895794089934,
      "loss": 2.932,
      "step": 38844
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3328261375427246,
      "learning_rate": 0.0005588937273998656,
      "loss": 2.8904,
      "step": 38845
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6113642454147339,
      "learning_rate": 0.0005588916606616638,
      "loss": 2.9762,
      "step": 38846
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6382192373275757,
      "learning_rate": 0.0005588895938753291,
      "loss": 3.2086,
      "step": 38847
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4285430908203125,
      "learning_rate": 0.0005588875270408621,
      "loss": 2.9448,
      "step": 38848
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6887896060943604,
      "learning_rate": 0.0005588854601582631,
      "loss": 2.7815,
      "step": 38849
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7423654794692993,
      "learning_rate": 0.0005588833932275323,
      "loss": 2.8066,
      "step": 38850
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7524045705795288,
      "learning_rate": 0.00055888132624867,
      "loss": 3.0118,
      "step": 38851
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.531360149383545,
      "learning_rate": 0.000558879259221677,
      "loss": 2.8011,
      "step": 38852
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.193112850189209,
      "learning_rate": 0.0005588771921465534,
      "loss": 3.0333,
      "step": 38853
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7523813247680664,
      "learning_rate": 0.0005588751250232997,
      "loss": 3.089,
      "step": 38854
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0991580486297607,
      "learning_rate": 0.0005588730578519161,
      "loss": 3.2428,
      "step": 38855
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2436137199401855,
      "learning_rate": 0.0005588709906324032,
      "loss": 3.1003,
      "step": 38856
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.5927114486694336,
      "learning_rate": 0.0005588689233647613,
      "loss": 2.6959,
      "step": 38857
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9062249660491943,
      "learning_rate": 0.0005588668560489908,
      "loss": 3.1704,
      "step": 38858
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6926815509796143,
      "learning_rate": 0.000558864788685092,
      "loss": 3.0617,
      "step": 38859
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.64039146900177,
      "learning_rate": 0.0005588627212730654,
      "loss": 3.0879,
      "step": 38860
    },
    {
      "epoch": 0.51,
      "grad_norm": 6.109871864318848,
      "learning_rate": 0.0005588606538129113,
      "loss": 2.889,
      "step": 38861
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.5342445373535156,
      "learning_rate": 0.0005588585863046301,
      "loss": 3.1701,
      "step": 38862
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6125036478042603,
      "learning_rate": 0.0005588565187482223,
      "loss": 3.1214,
      "step": 38863
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.1899683475494385,
      "learning_rate": 0.0005588544511436881,
      "loss": 3.0743,
      "step": 38864
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0315308570861816,
      "learning_rate": 0.0005588523834910279,
      "loss": 3.103,
      "step": 38865
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4440593719482422,
      "learning_rate": 0.0005588503157902422,
      "loss": 3.145,
      "step": 38866
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0155773162841797,
      "learning_rate": 0.0005588482480413315,
      "loss": 2.8337,
      "step": 38867
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.272231101989746,
      "learning_rate": 0.0005588461802442958,
      "loss": 2.9405,
      "step": 38868
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5440223217010498,
      "learning_rate": 0.0005588441123991359,
      "loss": 2.8295,
      "step": 38869
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8119124174118042,
      "learning_rate": 0.000558842044505852,
      "loss": 3.1673,
      "step": 38870
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.901230812072754,
      "learning_rate": 0.0005588399765644443,
      "loss": 2.8708,
      "step": 38871
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8612698316574097,
      "learning_rate": 0.0005588379085749134,
      "loss": 3.1046,
      "step": 38872
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.184844732284546,
      "learning_rate": 0.0005588358405372597,
      "loss": 2.9351,
      "step": 38873
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3528510332107544,
      "learning_rate": 0.0005588337724514835,
      "loss": 2.8368,
      "step": 38874
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.427200436592102,
      "learning_rate": 0.0005588317043175852,
      "loss": 3.0711,
      "step": 38875
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3895208835601807,
      "learning_rate": 0.0005588296361355653,
      "loss": 3.1777,
      "step": 38876
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8231242895126343,
      "learning_rate": 0.0005588275679054241,
      "loss": 3.0728,
      "step": 38877
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2343562841415405,
      "learning_rate": 0.0005588254996271619,
      "loss": 3.1976,
      "step": 38878
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5864819288253784,
      "learning_rate": 0.0005588234313007791,
      "loss": 3.1561,
      "step": 38879
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5436650514602661,
      "learning_rate": 0.0005588213629262762,
      "loss": 3.2864,
      "step": 38880
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8478957414627075,
      "learning_rate": 0.0005588192945036536,
      "loss": 3.1014,
      "step": 38881
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3320214748382568,
      "learning_rate": 0.0005588172260329114,
      "loss": 3.0337,
      "step": 38882
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.5755209922790527,
      "learning_rate": 0.0005588151575140504,
      "loss": 2.8192,
      "step": 38883
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6583837270736694,
      "learning_rate": 0.0005588130889470706,
      "loss": 3.0399,
      "step": 38884
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.4753711223602295,
      "learning_rate": 0.0005588110203319727,
      "loss": 3.1761,
      "step": 38885
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4370535612106323,
      "learning_rate": 0.0005588089516687569,
      "loss": 2.9993,
      "step": 38886
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.2253823280334473,
      "learning_rate": 0.0005588068829574237,
      "loss": 3.0124,
      "step": 38887
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.3821372985839844,
      "learning_rate": 0.0005588048141979732,
      "loss": 3.1207,
      "step": 38888
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5069589614868164,
      "learning_rate": 0.0005588027453904063,
      "loss": 3.3746,
      "step": 38889
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.392824649810791,
      "learning_rate": 0.0005588006765347229,
      "loss": 3.184,
      "step": 38890
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.7543623447418213,
      "learning_rate": 0.0005587986076309235,
      "loss": 3.0207,
      "step": 38891
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4372807741165161,
      "learning_rate": 0.0005587965386790087,
      "loss": 2.8217,
      "step": 38892
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.355156421661377,
      "learning_rate": 0.0005587944696789787,
      "loss": 3.0742,
      "step": 38893
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0572996139526367,
      "learning_rate": 0.0005587924006308339,
      "loss": 2.9309,
      "step": 38894
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.936683177947998,
      "learning_rate": 0.0005587903315345747,
      "loss": 3.0539,
      "step": 38895
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7207107543945312,
      "learning_rate": 0.0005587882623902015,
      "loss": 2.8371,
      "step": 38896
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7506710290908813,
      "learning_rate": 0.0005587861931977148,
      "loss": 2.9223,
      "step": 38897
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3937655687332153,
      "learning_rate": 0.0005587841239571146,
      "loss": 2.9466,
      "step": 38898
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.4184072017669678,
      "learning_rate": 0.0005587820546684018,
      "loss": 2.8245,
      "step": 38899
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7009263038635254,
      "learning_rate": 0.0005587799853315764,
      "loss": 2.9363,
      "step": 38900
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.411623239517212,
      "learning_rate": 0.000558777915946639,
      "loss": 3.063,
      "step": 38901
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.56752347946167,
      "learning_rate": 0.0005587758465135899,
      "loss": 2.8981,
      "step": 38902
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4502358436584473,
      "learning_rate": 0.0005587737770324294,
      "loss": 3.032,
      "step": 38903
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.591600179672241,
      "learning_rate": 0.000558771707503158,
      "loss": 3.0491,
      "step": 38904
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.105478048324585,
      "learning_rate": 0.000558769637925776,
      "loss": 3.1811,
      "step": 38905
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.504852056503296,
      "learning_rate": 0.000558767568300284,
      "loss": 2.7389,
      "step": 38906
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.172844886779785,
      "learning_rate": 0.000558765498626682,
      "loss": 3.0133,
      "step": 38907
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7101593017578125,
      "learning_rate": 0.0005587634289049708,
      "loss": 3.1338,
      "step": 38908
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4767651557922363,
      "learning_rate": 0.0005587613591351505,
      "loss": 2.9944,
      "step": 38909
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9089056253433228,
      "learning_rate": 0.0005587592893172216,
      "loss": 3.0333,
      "step": 38910
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9516706466674805,
      "learning_rate": 0.0005587572194511843,
      "loss": 2.9179,
      "step": 38911
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3802140951156616,
      "learning_rate": 0.0005587551495370394,
      "loss": 2.9774,
      "step": 38912
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6029493808746338,
      "learning_rate": 0.0005587530795747869,
      "loss": 2.9628,
      "step": 38913
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4897693395614624,
      "learning_rate": 0.0005587510095644274,
      "loss": 3.0902,
      "step": 38914
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.548512578010559,
      "learning_rate": 0.0005587489395059612,
      "loss": 2.9896,
      "step": 38915
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3586649894714355,
      "learning_rate": 0.0005587468693993886,
      "loss": 3.0273,
      "step": 38916
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4054018259048462,
      "learning_rate": 0.0005587447992447101,
      "loss": 2.6691,
      "step": 38917
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3912935256958008,
      "learning_rate": 0.0005587427290419261,
      "loss": 3.1352,
      "step": 38918
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.502808690071106,
      "learning_rate": 0.000558740658791037,
      "loss": 3.0455,
      "step": 38919
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5232688188552856,
      "learning_rate": 0.000558738588492043,
      "loss": 3.1001,
      "step": 38920
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2894389629364014,
      "learning_rate": 0.0005587365181449448,
      "loss": 3.1781,
      "step": 38921
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.596144676208496,
      "learning_rate": 0.0005587344477497424,
      "loss": 2.631,
      "step": 38922
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3054081201553345,
      "learning_rate": 0.0005587323773064364,
      "loss": 2.9581,
      "step": 38923
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5749503374099731,
      "learning_rate": 0.0005587303068150273,
      "loss": 3.1232,
      "step": 38924
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7431905269622803,
      "learning_rate": 0.0005587282362755152,
      "loss": 3.0188,
      "step": 38925
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.483376383781433,
      "learning_rate": 0.0005587261656879007,
      "loss": 3.0543,
      "step": 38926
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6516011953353882,
      "learning_rate": 0.0005587240950521841,
      "loss": 3.017,
      "step": 38927
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9984703063964844,
      "learning_rate": 0.0005587220243683658,
      "loss": 3.1739,
      "step": 38928
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0284531116485596,
      "learning_rate": 0.0005587199536364463,
      "loss": 2.9913,
      "step": 38929
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5896213054656982,
      "learning_rate": 0.0005587178828564258,
      "loss": 3.0286,
      "step": 38930
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.345017671585083,
      "learning_rate": 0.0005587158120283048,
      "loss": 2.9698,
      "step": 38931
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7826378345489502,
      "learning_rate": 0.0005587137411520837,
      "loss": 3.1068,
      "step": 38932
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5227829217910767,
      "learning_rate": 0.0005587116702277627,
      "loss": 3.0037,
      "step": 38933
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5536439418792725,
      "learning_rate": 0.0005587095992553423,
      "loss": 2.9723,
      "step": 38934
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1694746017456055,
      "learning_rate": 0.0005587075282348229,
      "loss": 2.9576,
      "step": 38935
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7221896648406982,
      "learning_rate": 0.000558705457166205,
      "loss": 2.9583,
      "step": 38936
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8484437465667725,
      "learning_rate": 0.0005587033860494888,
      "loss": 2.6849,
      "step": 38937
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9205174446105957,
      "learning_rate": 0.0005587013148846749,
      "loss": 3.145,
      "step": 38938
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7390530109405518,
      "learning_rate": 0.0005586992436717633,
      "loss": 2.9984,
      "step": 38939
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.777984380722046,
      "learning_rate": 0.0005586971724107548,
      "loss": 3.0702,
      "step": 38940
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6796373128890991,
      "learning_rate": 0.0005586951011016495,
      "loss": 2.9562,
      "step": 38941
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3572289943695068,
      "learning_rate": 0.000558693029744448,
      "loss": 3.025,
      "step": 38942
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5180937051773071,
      "learning_rate": 0.0005586909583391506,
      "loss": 2.8444,
      "step": 38943
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5799028873443604,
      "learning_rate": 0.0005586888868857575,
      "loss": 3.082,
      "step": 38944
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.733278512954712,
      "learning_rate": 0.0005586868153842694,
      "loss": 3.0884,
      "step": 38945
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5133730173110962,
      "learning_rate": 0.0005586847438346865,
      "loss": 3.036,
      "step": 38946
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.7213971614837646,
      "learning_rate": 0.0005586826722370092,
      "loss": 2.8695,
      "step": 38947
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.343541145324707,
      "learning_rate": 0.000558680600591238,
      "loss": 3.1527,
      "step": 38948
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9311085939407349,
      "learning_rate": 0.000558678528897373,
      "loss": 3.0217,
      "step": 38949
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.222450017929077,
      "learning_rate": 0.0005586764571554149,
      "loss": 2.9726,
      "step": 38950
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5652446746826172,
      "learning_rate": 0.000558674385365364,
      "loss": 3.1969,
      "step": 38951
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0559701919555664,
      "learning_rate": 0.0005586723135272206,
      "loss": 2.9432,
      "step": 38952
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.410334825515747,
      "learning_rate": 0.0005586702416409852,
      "loss": 3.3063,
      "step": 38953
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4249181747436523,
      "learning_rate": 0.0005586681697066581,
      "loss": 3.0283,
      "step": 38954
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.365526795387268,
      "learning_rate": 0.0005586660977242396,
      "loss": 3.0583,
      "step": 38955
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5766079425811768,
      "learning_rate": 0.0005586640256937303,
      "loss": 2.9063,
      "step": 38956
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.24982488155365,
      "learning_rate": 0.0005586619536151304,
      "loss": 2.9012,
      "step": 38957
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9956015348434448,
      "learning_rate": 0.0005586598814884404,
      "loss": 3.0316,
      "step": 38958
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6770167350769043,
      "learning_rate": 0.0005586578093136606,
      "loss": 2.9323,
      "step": 38959
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4982786178588867,
      "learning_rate": 0.0005586557370907915,
      "loss": 3.2996,
      "step": 38960
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9860713481903076,
      "learning_rate": 0.0005586536648198334,
      "loss": 2.937,
      "step": 38961
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4301631450653076,
      "learning_rate": 0.0005586515925007867,
      "loss": 3.022,
      "step": 38962
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4435638189315796,
      "learning_rate": 0.0005586495201336518,
      "loss": 2.931,
      "step": 38963
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5871700048446655,
      "learning_rate": 0.0005586474477184291,
      "loss": 2.9944,
      "step": 38964
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5260134935379028,
      "learning_rate": 0.0005586453752551188,
      "loss": 3.0968,
      "step": 38965
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.406695008277893,
      "learning_rate": 0.0005586433027437215,
      "loss": 3.0725,
      "step": 38966
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7945377826690674,
      "learning_rate": 0.0005586412301842377,
      "loss": 3.239,
      "step": 38967
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8931658267974854,
      "learning_rate": 0.0005586391575766674,
      "loss": 2.7185,
      "step": 38968
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.583566665649414,
      "learning_rate": 0.0005586370849210114,
      "loss": 3.095,
      "step": 38969
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0211374759674072,
      "learning_rate": 0.0005586350122172697,
      "loss": 3.1561,
      "step": 38970
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.9567484855651855,
      "learning_rate": 0.0005586329394654429,
      "loss": 3.0359,
      "step": 38971
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.446671485900879,
      "learning_rate": 0.0005586308666655315,
      "loss": 3.0479,
      "step": 38972
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4942731857299805,
      "learning_rate": 0.0005586287938175356,
      "loss": 3.38,
      "step": 38973
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.4566266536712646,
      "learning_rate": 0.0005586267209214558,
      "loss": 3.1725,
      "step": 38974
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.7230024337768555,
      "learning_rate": 0.0005586246479772923,
      "loss": 2.9351,
      "step": 38975
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3222277164459229,
      "learning_rate": 0.0005586225749850457,
      "loss": 3.4945,
      "step": 38976
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5223305225372314,
      "learning_rate": 0.0005586205019447162,
      "loss": 3.1124,
      "step": 38977
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.727348566055298,
      "learning_rate": 0.0005586184288563043,
      "loss": 2.8864,
      "step": 38978
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4739580154418945,
      "learning_rate": 0.0005586163557198104,
      "loss": 2.8714,
      "step": 38979
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.593848705291748,
      "learning_rate": 0.0005586142825352348,
      "loss": 2.8985,
      "step": 38980
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.536102294921875,
      "learning_rate": 0.000558612209302578,
      "loss": 3.0828,
      "step": 38981
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.214461088180542,
      "learning_rate": 0.0005586101360218401,
      "loss": 3.1987,
      "step": 38982
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6069830656051636,
      "learning_rate": 0.0005586080626930218,
      "loss": 3.191,
      "step": 38983
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1828620433807373,
      "learning_rate": 0.0005586059893161235,
      "loss": 3.3398,
      "step": 38984
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.667565107345581,
      "learning_rate": 0.0005586039158911454,
      "loss": 3.0448,
      "step": 38985
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4510105848312378,
      "learning_rate": 0.0005586018424180879,
      "loss": 3.0354,
      "step": 38986
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.522247076034546,
      "learning_rate": 0.0005585997688969514,
      "loss": 2.9298,
      "step": 38987
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0259203910827637,
      "learning_rate": 0.0005585976953277364,
      "loss": 3.157,
      "step": 38988
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.336466670036316,
      "learning_rate": 0.0005585956217104432,
      "loss": 3.0535,
      "step": 38989
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2997652292251587,
      "learning_rate": 0.0005585935480450722,
      "loss": 3.2214,
      "step": 38990
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.749105453491211,
      "learning_rate": 0.0005585914743316238,
      "loss": 2.9457,
      "step": 38991
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4974017143249512,
      "learning_rate": 0.0005585894005700983,
      "loss": 3.04,
      "step": 38992
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8948006629943848,
      "learning_rate": 0.0005585873267604962,
      "loss": 3.179,
      "step": 38993
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4310781955718994,
      "learning_rate": 0.0005585852529028177,
      "loss": 3.0777,
      "step": 38994
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7165213823318481,
      "learning_rate": 0.0005585831789970636,
      "loss": 3.0911,
      "step": 38995
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.545399785041809,
      "learning_rate": 0.0005585811050432337,
      "loss": 3.1773,
      "step": 38996
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5000834465026855,
      "learning_rate": 0.0005585790310413289,
      "loss": 2.9686,
      "step": 38997
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7701622247695923,
      "learning_rate": 0.0005585769569913493,
      "loss": 2.8906,
      "step": 38998
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6073055267333984,
      "learning_rate": 0.0005585748828932953,
      "loss": 3.1974,
      "step": 38999
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.4163832664489746,
      "learning_rate": 0.0005585728087471675,
      "loss": 3.1885,
      "step": 39000
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7801762819290161,
      "learning_rate": 0.000558570734552966,
      "loss": 3.1149,
      "step": 39001
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6893118619918823,
      "learning_rate": 0.0005585686603106914,
      "loss": 3.0455,
      "step": 39002
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6653145551681519,
      "learning_rate": 0.0005585665860203439,
      "loss": 3.114,
      "step": 39003
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4563286304473877,
      "learning_rate": 0.0005585645116819242,
      "loss": 2.9406,
      "step": 39004
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.597109317779541,
      "learning_rate": 0.0005585624372954323,
      "loss": 3.2856,
      "step": 39005
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.686227560043335,
      "learning_rate": 0.0005585603628608688,
      "loss": 3.2573,
      "step": 39006
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3427364826202393,
      "learning_rate": 0.000558558288378234,
      "loss": 3.3072,
      "step": 39007
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1670775413513184,
      "learning_rate": 0.0005585562138475284,
      "loss": 3.0429,
      "step": 39008
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.715234398841858,
      "learning_rate": 0.0005585541392687523,
      "loss": 3.2994,
      "step": 39009
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8504626750946045,
      "learning_rate": 0.0005585520646419061,
      "loss": 3.0286,
      "step": 39010
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.4797260761260986,
      "learning_rate": 0.0005585499899669902,
      "loss": 3.252,
      "step": 39011
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.646515130996704,
      "learning_rate": 0.000558547915244005,
      "loss": 3.1386,
      "step": 39012
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.4898924827575684,
      "learning_rate": 0.0005585458404729508,
      "loss": 3.0135,
      "step": 39013
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.864311933517456,
      "learning_rate": 0.0005585437656538281,
      "loss": 3.2451,
      "step": 39014
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.03391695022583,
      "learning_rate": 0.0005585416907866372,
      "loss": 2.8335,
      "step": 39015
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2455902099609375,
      "learning_rate": 0.0005585396158713786,
      "loss": 2.9062,
      "step": 39016
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1093943119049072,
      "learning_rate": 0.0005585375409080524,
      "loss": 3.0296,
      "step": 39017
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7188469171524048,
      "learning_rate": 0.0005585354658966594,
      "loss": 3.0275,
      "step": 39018
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1321442127227783,
      "learning_rate": 0.0005585333908371998,
      "loss": 3.0337,
      "step": 39019
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.027186393737793,
      "learning_rate": 0.0005585313157296738,
      "loss": 3.3515,
      "step": 39020
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.697559118270874,
      "learning_rate": 0.000558529240574082,
      "loss": 3.0358,
      "step": 39021
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1596555709838867,
      "learning_rate": 0.0005585271653704248,
      "loss": 2.9166,
      "step": 39022
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.61237370967865,
      "learning_rate": 0.0005585250901187025,
      "loss": 2.9299,
      "step": 39023
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5483156442642212,
      "learning_rate": 0.0005585230148189154,
      "loss": 3.1513,
      "step": 39024
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6079045534133911,
      "learning_rate": 0.0005585209394710642,
      "loss": 3.0797,
      "step": 39025
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5335139036178589,
      "learning_rate": 0.0005585188640751489,
      "loss": 3.272,
      "step": 39026
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5053893327713013,
      "learning_rate": 0.00055851678863117,
      "loss": 3.1271,
      "step": 39027
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.327504277229309,
      "learning_rate": 0.0005585147131391281,
      "loss": 2.8474,
      "step": 39028
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.713769793510437,
      "learning_rate": 0.0005585126375990234,
      "loss": 3.0486,
      "step": 39029
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8156508207321167,
      "learning_rate": 0.0005585105620108563,
      "loss": 2.8371,
      "step": 39030
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5504179000854492,
      "learning_rate": 0.0005585084863746273,
      "loss": 2.8554,
      "step": 39031
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8598273992538452,
      "learning_rate": 0.0005585064106903366,
      "loss": 2.7622,
      "step": 39032
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5393445491790771,
      "learning_rate": 0.0005585043349579847,
      "loss": 3.2663,
      "step": 39033
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5091058015823364,
      "learning_rate": 0.000558502259177572,
      "loss": 2.8597,
      "step": 39034
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3221476078033447,
      "learning_rate": 0.0005585001833490987,
      "loss": 3.0548,
      "step": 39035
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3517948389053345,
      "learning_rate": 0.0005584981074725655,
      "loss": 2.8129,
      "step": 39036
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.811327576637268,
      "learning_rate": 0.0005584960315479726,
      "loss": 3.1718,
      "step": 39037
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.448920726776123,
      "learning_rate": 0.0005584939555753203,
      "loss": 2.9103,
      "step": 39038
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4809424877166748,
      "learning_rate": 0.0005584918795546092,
      "loss": 2.895,
      "step": 39039
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.248615264892578,
      "learning_rate": 0.0005584898034858395,
      "loss": 2.9971,
      "step": 39040
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4465004205703735,
      "learning_rate": 0.0005584877273690118,
      "loss": 2.9203,
      "step": 39041
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5765395164489746,
      "learning_rate": 0.0005584856512041263,
      "loss": 2.913,
      "step": 39042
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4479211568832397,
      "learning_rate": 0.0005584835749911834,
      "loss": 2.9167,
      "step": 39043
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8596409559249878,
      "learning_rate": 0.0005584814987301834,
      "loss": 3.0801,
      "step": 39044
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.806705117225647,
      "learning_rate": 0.000558479422421127,
      "loss": 3.1748,
      "step": 39045
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8454638719558716,
      "learning_rate": 0.0005584773460640144,
      "loss": 3.1572,
      "step": 39046
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5259987115859985,
      "learning_rate": 0.0005584752696588459,
      "loss": 3.1453,
      "step": 39047
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6398335695266724,
      "learning_rate": 0.0005584731932056219,
      "loss": 3.0226,
      "step": 39048
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.029937744140625,
      "learning_rate": 0.0005584711167043431,
      "loss": 3.0843,
      "step": 39049
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5830405950546265,
      "learning_rate": 0.0005584690401550093,
      "loss": 3.1235,
      "step": 39050
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.375634789466858,
      "learning_rate": 0.0005584669635576216,
      "loss": 2.9448,
      "step": 39051
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6250466108322144,
      "learning_rate": 0.0005584648869121797,
      "loss": 2.9435,
      "step": 39052
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.536094069480896,
      "learning_rate": 0.0005584628102186844,
      "loss": 2.709,
      "step": 39053
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.285380482673645,
      "learning_rate": 0.0005584607334771361,
      "loss": 2.996,
      "step": 39054
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1387603282928467,
      "learning_rate": 0.0005584586566875349,
      "loss": 2.8972,
      "step": 39055
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3079272508621216,
      "learning_rate": 0.0005584565798498815,
      "loss": 3.0491,
      "step": 39056
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7815158367156982,
      "learning_rate": 0.0005584545029641761,
      "loss": 3.091,
      "step": 39057
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6461315155029297,
      "learning_rate": 0.000558452426030419,
      "loss": 3.065,
      "step": 39058
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2065773010253906,
      "learning_rate": 0.0005584503490486109,
      "loss": 3.178,
      "step": 39059
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.434293270111084,
      "learning_rate": 0.0005584482720187519,
      "loss": 3.2229,
      "step": 39060
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.432157278060913,
      "learning_rate": 0.0005584461949408425,
      "loss": 3.0193,
      "step": 39061
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2537044286727905,
      "learning_rate": 0.000558444117814883,
      "loss": 3.0877,
      "step": 39062
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6432242393493652,
      "learning_rate": 0.000558442040640874,
      "loss": 3.0258,
      "step": 39063
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5247752666473389,
      "learning_rate": 0.0005584399634188157,
      "loss": 3.2046,
      "step": 39064
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5585569143295288,
      "learning_rate": 0.0005584378861487085,
      "loss": 3.1739,
      "step": 39065
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.026796817779541,
      "learning_rate": 0.0005584358088305529,
      "loss": 2.7063,
      "step": 39066
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6718064546585083,
      "learning_rate": 0.0005584337314643492,
      "loss": 3.2418,
      "step": 39067
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5101052522659302,
      "learning_rate": 0.0005584316540500977,
      "loss": 3.3371,
      "step": 39068
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8195692300796509,
      "learning_rate": 0.0005584295765877989,
      "loss": 2.7697,
      "step": 39069
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9030505418777466,
      "learning_rate": 0.0005584274990774531,
      "loss": 2.8805,
      "step": 39070
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4581516981124878,
      "learning_rate": 0.0005584254215190608,
      "loss": 3.17,
      "step": 39071
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.263572335243225,
      "learning_rate": 0.0005584233439126225,
      "loss": 3.0708,
      "step": 39072
    },
    {
      "epoch": 0.51,
      "grad_norm": 4.792826175689697,
      "learning_rate": 0.0005584212662581382,
      "loss": 2.9861,
      "step": 39073
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.506028652191162,
      "learning_rate": 0.0005584191885556086,
      "loss": 2.7708,
      "step": 39074
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.804988980293274,
      "learning_rate": 0.0005584171108050341,
      "loss": 3.0528,
      "step": 39075
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6183116436004639,
      "learning_rate": 0.0005584150330064148,
      "loss": 3.13,
      "step": 39076
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.065603733062744,
      "learning_rate": 0.0005584129551597514,
      "loss": 3.1442,
      "step": 39077
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.7323615550994873,
      "learning_rate": 0.000558410877265044,
      "loss": 3.0625,
      "step": 39078
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.7590692043304443,
      "learning_rate": 0.0005584087993222933,
      "loss": 2.8246,
      "step": 39079
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.121274471282959,
      "learning_rate": 0.0005584067213314994,
      "loss": 3.0242,
      "step": 39080
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.633382797241211,
      "learning_rate": 0.0005584046432926629,
      "loss": 2.8727,
      "step": 39081
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.8085641860961914,
      "learning_rate": 0.000558402565205784,
      "loss": 3.2082,
      "step": 39082
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5708129405975342,
      "learning_rate": 0.0005584004870708633,
      "loss": 2.9584,
      "step": 39083
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.19620418548584,
      "learning_rate": 0.000558398408887901,
      "loss": 3.2355,
      "step": 39084
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.3065128326416016,
      "learning_rate": 0.0005583963306568977,
      "loss": 2.9628,
      "step": 39085
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.6963164806365967,
      "learning_rate": 0.0005583942523778535,
      "loss": 3.1072,
      "step": 39086
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8605892658233643,
      "learning_rate": 0.0005583921740507689,
      "loss": 2.8976,
      "step": 39087
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6875056028366089,
      "learning_rate": 0.0005583900956756445,
      "loss": 3.0613,
      "step": 39088
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2610740661621094,
      "learning_rate": 0.0005583880172524803,
      "loss": 3.0948,
      "step": 39089
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1232681274414062,
      "learning_rate": 0.000558385938781277,
      "loss": 3.0902,
      "step": 39090
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.753585934638977,
      "learning_rate": 0.0005583838602620349,
      "loss": 3.1068,
      "step": 39091
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7453805208206177,
      "learning_rate": 0.0005583817816947542,
      "loss": 3.1493,
      "step": 39092
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.227775812149048,
      "learning_rate": 0.0005583797030794355,
      "loss": 2.9815,
      "step": 39093
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4286199808120728,
      "learning_rate": 0.0005583776244160793,
      "loss": 3.2039,
      "step": 39094
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4791488647460938,
      "learning_rate": 0.0005583755457046857,
      "loss": 3.1656,
      "step": 39095
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2991669178009033,
      "learning_rate": 0.0005583734669452552,
      "loss": 2.8302,
      "step": 39096
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5388816595077515,
      "learning_rate": 0.0005583713881377883,
      "loss": 2.9964,
      "step": 39097
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3577278852462769,
      "learning_rate": 0.0005583693092822852,
      "loss": 3.0986,
      "step": 39098
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3987159729003906,
      "learning_rate": 0.0005583672303787463,
      "loss": 3.0918,
      "step": 39099
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.302494764328003,
      "learning_rate": 0.0005583651514271721,
      "loss": 3.2681,
      "step": 39100
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.616858720779419,
      "learning_rate": 0.0005583630724275631,
      "loss": 3.1293,
      "step": 39101
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5230236053466797,
      "learning_rate": 0.0005583609933799193,
      "loss": 2.9583,
      "step": 39102
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2249720096588135,
      "learning_rate": 0.0005583589142842414,
      "loss": 3.1357,
      "step": 39103
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8981555700302124,
      "learning_rate": 0.0005583568351405297,
      "loss": 3.056,
      "step": 39104
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8379366397857666,
      "learning_rate": 0.0005583547559487846,
      "loss": 3.0624,
      "step": 39105
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.567007303237915,
      "learning_rate": 0.0005583526767090066,
      "loss": 3.2191,
      "step": 39106
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0117814540863037,
      "learning_rate": 0.0005583505974211958,
      "loss": 3.2734,
      "step": 39107
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.8488128185272217,
      "learning_rate": 0.0005583485180853527,
      "loss": 2.8831,
      "step": 39108
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4227198362350464,
      "learning_rate": 0.0005583464387014779,
      "loss": 3.2644,
      "step": 39109
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4430559873580933,
      "learning_rate": 0.0005583443592695715,
      "loss": 3.0957,
      "step": 39110
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.6847450733184814,
      "learning_rate": 0.000558342279789634,
      "loss": 3.0039,
      "step": 39111
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.6198654174804688,
      "learning_rate": 0.0005583402002616658,
      "loss": 2.9064,
      "step": 39112
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5226093530654907,
      "learning_rate": 0.0005583381206856673,
      "loss": 3.0517,
      "step": 39113
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.868441104888916,
      "learning_rate": 0.0005583360410616389,
      "loss": 3.1808,
      "step": 39114
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.587547540664673,
      "learning_rate": 0.000558333961389581,
      "loss": 3.1314,
      "step": 39115
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.499229907989502,
      "learning_rate": 0.0005583318816694939,
      "loss": 3.0442,
      "step": 39116
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2743290662765503,
      "learning_rate": 0.000558329801901378,
      "loss": 2.997,
      "step": 39117
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9591543674468994,
      "learning_rate": 0.0005583277220852337,
      "loss": 3.079,
      "step": 39118
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2056031227111816,
      "learning_rate": 0.0005583256422210614,
      "loss": 2.8516,
      "step": 39119
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.657740831375122,
      "learning_rate": 0.0005583235623088615,
      "loss": 3.1322,
      "step": 39120
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2858422994613647,
      "learning_rate": 0.0005583214823486344,
      "loss": 2.7725,
      "step": 39121
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7218073606491089,
      "learning_rate": 0.0005583194023403805,
      "loss": 3.0305,
      "step": 39122
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5370752811431885,
      "learning_rate": 0.0005583173222841,
      "loss": 3.1019,
      "step": 39123
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1783151626586914,
      "learning_rate": 0.0005583152421797936,
      "loss": 3.0802,
      "step": 39124
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.504905104637146,
      "learning_rate": 0.0005583131620274614,
      "loss": 3.0694,
      "step": 39125
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5930061340332031,
      "learning_rate": 0.0005583110818271039,
      "loss": 2.8892,
      "step": 39126
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6342628002166748,
      "learning_rate": 0.0005583090015787215,
      "loss": 3.1288,
      "step": 39127
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5520436763763428,
      "learning_rate": 0.0005583069212823146,
      "loss": 3.0217,
      "step": 39128
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4312456846237183,
      "learning_rate": 0.0005583048409378836,
      "loss": 3.1233,
      "step": 39129
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0674023628234863,
      "learning_rate": 0.0005583027605454289,
      "loss": 2.8796,
      "step": 39130
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6339040994644165,
      "learning_rate": 0.0005583006801049507,
      "loss": 2.9322,
      "step": 39131
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5076744556427002,
      "learning_rate": 0.0005582985996164497,
      "loss": 3.1276,
      "step": 39132
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6962685585021973,
      "learning_rate": 0.0005582965190799261,
      "loss": 3.078,
      "step": 39133
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7478564977645874,
      "learning_rate": 0.0005582944384953801,
      "loss": 3.0587,
      "step": 39134
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7269164323806763,
      "learning_rate": 0.0005582923578628125,
      "loss": 2.7705,
      "step": 39135
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.524556040763855,
      "learning_rate": 0.0005582902771822234,
      "loss": 2.9739,
      "step": 39136
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3781265020370483,
      "learning_rate": 0.0005582881964536131,
      "loss": 3.0956,
      "step": 39137
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4313559532165527,
      "learning_rate": 0.0005582861156769825,
      "loss": 3.0342,
      "step": 39138
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4544888734817505,
      "learning_rate": 0.0005582840348523313,
      "loss": 3.0299,
      "step": 39139
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0831379890441895,
      "learning_rate": 0.0005582819539796603,
      "loss": 3.3491,
      "step": 39140
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4071593284606934,
      "learning_rate": 0.0005582798730589699,
      "loss": 3.1348,
      "step": 39141
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8331340551376343,
      "learning_rate": 0.0005582777920902605,
      "loss": 3.0311,
      "step": 39142
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5014549493789673,
      "learning_rate": 0.0005582757110735322,
      "loss": 3.1021,
      "step": 39143
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3115822076797485,
      "learning_rate": 0.0005582736300087857,
      "loss": 3.2827,
      "step": 39144
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0405335426330566,
      "learning_rate": 0.0005582715488960211,
      "loss": 2.9403,
      "step": 39145
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3896528482437134,
      "learning_rate": 0.000558269467735239,
      "loss": 3.1619,
      "step": 39146
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.5782809257507324,
      "learning_rate": 0.0005582673865264397,
      "loss": 3.0042,
      "step": 39147
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.477403998374939,
      "learning_rate": 0.0005582653052696237,
      "loss": 3.0365,
      "step": 39148
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3825509548187256,
      "learning_rate": 0.0005582632239647913,
      "loss": 2.8356,
      "step": 39149
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7138479948043823,
      "learning_rate": 0.0005582611426119428,
      "loss": 3.1619,
      "step": 39150
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.546565055847168,
      "learning_rate": 0.0005582590612110787,
      "loss": 3.0853,
      "step": 39151
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.517113447189331,
      "learning_rate": 0.0005582569797621995,
      "loss": 3.1761,
      "step": 39152
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5287340879440308,
      "learning_rate": 0.0005582548982653053,
      "loss": 3.0363,
      "step": 39153
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3373326063156128,
      "learning_rate": 0.0005582528167203968,
      "loss": 2.8667,
      "step": 39154
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.605637788772583,
      "learning_rate": 0.000558250735127474,
      "loss": 2.8652,
      "step": 39155
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6826601028442383,
      "learning_rate": 0.0005582486534865378,
      "loss": 2.9976,
      "step": 39156
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6836515665054321,
      "learning_rate": 0.000558246571797588,
      "loss": 2.987,
      "step": 39157
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9733060598373413,
      "learning_rate": 0.0005582444900606255,
      "loss": 2.9635,
      "step": 39158
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5208946466445923,
      "learning_rate": 0.0005582424082756504,
      "loss": 3.1579,
      "step": 39159
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9127739667892456,
      "learning_rate": 0.0005582403264426632,
      "loss": 3.1027,
      "step": 39160
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6522616147994995,
      "learning_rate": 0.0005582382445616642,
      "loss": 3.1546,
      "step": 39161
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6198666095733643,
      "learning_rate": 0.0005582361626326538,
      "loss": 3.1428,
      "step": 39162
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.449597120285034,
      "learning_rate": 0.0005582340806556325,
      "loss": 3.045,
      "step": 39163
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.373258113861084,
      "learning_rate": 0.0005582319986306006,
      "loss": 3.1782,
      "step": 39164
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8555151224136353,
      "learning_rate": 0.0005582299165575585,
      "loss": 2.9255,
      "step": 39165
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4517054557800293,
      "learning_rate": 0.0005582278344365064,
      "loss": 3.3422,
      "step": 39166
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.48978853225708,
      "learning_rate": 0.0005582257522674451,
      "loss": 2.9478,
      "step": 39167
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.952582597732544,
      "learning_rate": 0.0005582236700503747,
      "loss": 3.1467,
      "step": 39168
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1170802116394043,
      "learning_rate": 0.0005582215877852957,
      "loss": 2.833,
      "step": 39169
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2798171043395996,
      "learning_rate": 0.0005582195054722084,
      "loss": 2.9229,
      "step": 39170
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.8577828407287598,
      "learning_rate": 0.000558217423111113,
      "loss": 3.1041,
      "step": 39171
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.8835384845733643,
      "learning_rate": 0.0005582153407020104,
      "loss": 2.925,
      "step": 39172
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5010286569595337,
      "learning_rate": 0.0005582132582449006,
      "loss": 2.9569,
      "step": 39173
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.788908839225769,
      "learning_rate": 0.000558211175739784,
      "loss": 3.3187,
      "step": 39174
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9964067935943604,
      "learning_rate": 0.0005582090931866612,
      "loss": 3.1799,
      "step": 39175
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.7051589488983154,
      "learning_rate": 0.0005582070105855324,
      "loss": 3.0542,
      "step": 39176
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6332099437713623,
      "learning_rate": 0.000558204927936398,
      "loss": 3.1611,
      "step": 39177
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0453128814697266,
      "learning_rate": 0.0005582028452392585,
      "loss": 3.1273,
      "step": 39178
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6104471683502197,
      "learning_rate": 0.0005582007624941141,
      "loss": 3.1133,
      "step": 39179
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5270483493804932,
      "learning_rate": 0.0005581986797009654,
      "loss": 3.2887,
      "step": 39180
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.318617820739746,
      "learning_rate": 0.0005581965968598127,
      "loss": 3.011,
      "step": 39181
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.9542810916900635,
      "learning_rate": 0.0005581945139706564,
      "loss": 3.0319,
      "step": 39182
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.979487419128418,
      "learning_rate": 0.0005581924310334969,
      "loss": 2.6559,
      "step": 39183
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.161996841430664,
      "learning_rate": 0.0005581903480483344,
      "loss": 2.7714,
      "step": 39184
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.1945087909698486,
      "learning_rate": 0.0005581882650151695,
      "loss": 3.1092,
      "step": 39185
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4347261190414429,
      "learning_rate": 0.0005581861819340026,
      "loss": 3.0688,
      "step": 39186
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3132463693618774,
      "learning_rate": 0.000558184098804834,
      "loss": 3.1944,
      "step": 39187
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5952121019363403,
      "learning_rate": 0.000558182015627664,
      "loss": 2.7377,
      "step": 39188
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4159257411956787,
      "learning_rate": 0.0005581799324024931,
      "loss": 3.2282,
      "step": 39189
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.210653305053711,
      "learning_rate": 0.0005581778491293218,
      "loss": 3.1629,
      "step": 39190
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.441158652305603,
      "learning_rate": 0.0005581757658081502,
      "loss": 3.1837,
      "step": 39191
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7752000093460083,
      "learning_rate": 0.000558173682438979,
      "loss": 3.0666,
      "step": 39192
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4255720376968384,
      "learning_rate": 0.0005581715990218084,
      "loss": 3.0664,
      "step": 39193
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6488487720489502,
      "learning_rate": 0.0005581695155566388,
      "loss": 3.1425,
      "step": 39194
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.575662612915039,
      "learning_rate": 0.0005581674320434707,
      "loss": 2.8846,
      "step": 39195
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7572786808013916,
      "learning_rate": 0.0005581653484823043,
      "loss": 2.9979,
      "step": 39196
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.1417630910873413,
      "learning_rate": 0.0005581632648731401,
      "loss": 2.9865,
      "step": 39197
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.414832592010498,
      "learning_rate": 0.0005581611812159785,
      "loss": 2.9874,
      "step": 39198
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.530073881149292,
      "learning_rate": 0.0005581590975108199,
      "loss": 3.1104,
      "step": 39199
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.797408103942871,
      "learning_rate": 0.0005581570137576645,
      "loss": 3.1119,
      "step": 39200
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4746135473251343,
      "learning_rate": 0.000558154929956513,
      "loss": 3.1857,
      "step": 39201
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5170860290527344,
      "learning_rate": 0.0005581528461073655,
      "loss": 3.0598,
      "step": 39202
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5238715410232544,
      "learning_rate": 0.0005581507622102227,
      "loss": 3.1498,
      "step": 39203
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4934589862823486,
      "learning_rate": 0.0005581486782650846,
      "loss": 2.9247,
      "step": 39204
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6109064817428589,
      "learning_rate": 0.0005581465942719519,
      "loss": 3.0787,
      "step": 39205
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.343224048614502,
      "learning_rate": 0.0005581445102308248,
      "loss": 3.3886,
      "step": 39206
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4577686786651611,
      "learning_rate": 0.0005581424261417038,
      "loss": 3.0549,
      "step": 39207
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.424518346786499,
      "learning_rate": 0.0005581403420045893,
      "loss": 2.8733,
      "step": 39208
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5180299282073975,
      "learning_rate": 0.0005581382578194816,
      "loss": 3.3478,
      "step": 39209
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.677377700805664,
      "learning_rate": 0.0005581361735863811,
      "loss": 2.9125,
      "step": 39210
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.652677059173584,
      "learning_rate": 0.0005581340893052883,
      "loss": 2.8935,
      "step": 39211
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3574360609054565,
      "learning_rate": 0.0005581320049762034,
      "loss": 3.1638,
      "step": 39212
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2707772254943848,
      "learning_rate": 0.0005581299205991269,
      "loss": 2.9731,
      "step": 39213
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4467790126800537,
      "learning_rate": 0.0005581278361740591,
      "loss": 2.954,
      "step": 39214
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6334154605865479,
      "learning_rate": 0.0005581257517010006,
      "loss": 2.9547,
      "step": 39215
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5087809562683105,
      "learning_rate": 0.0005581236671799516,
      "loss": 2.9291,
      "step": 39216
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.675034523010254,
      "learning_rate": 0.0005581215826109126,
      "loss": 3.1354,
      "step": 39217
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.556288242340088,
      "learning_rate": 0.0005581194979938839,
      "loss": 3.1505,
      "step": 39218
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.551634430885315,
      "learning_rate": 0.0005581174133288658,
      "loss": 3.1196,
      "step": 39219
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4601365327835083,
      "learning_rate": 0.0005581153286158589,
      "loss": 3.127,
      "step": 39220
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9535430669784546,
      "learning_rate": 0.0005581132438548634,
      "loss": 3.0287,
      "step": 39221
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.90521240234375,
      "learning_rate": 0.0005581111590458799,
      "loss": 2.7797,
      "step": 39222
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5299108028411865,
      "learning_rate": 0.0005581090741889087,
      "loss": 2.9769,
      "step": 39223
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0905303955078125,
      "learning_rate": 0.0005581069892839501,
      "loss": 2.7189,
      "step": 39224
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.002690553665161,
      "learning_rate": 0.0005581049043310044,
      "loss": 2.938,
      "step": 39225
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0956172943115234,
      "learning_rate": 0.0005581028193300722,
      "loss": 2.9586,
      "step": 39226
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8489017486572266,
      "learning_rate": 0.000558100734281154,
      "loss": 3.2747,
      "step": 39227
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5521498918533325,
      "learning_rate": 0.0005580986491842497,
      "loss": 3.2129,
      "step": 39228
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.2728986740112305,
      "learning_rate": 0.0005580965640393601,
      "loss": 3.087,
      "step": 39229
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3061435222625732,
      "learning_rate": 0.0005580944788464855,
      "loss": 3.1193,
      "step": 39230
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7070964574813843,
      "learning_rate": 0.0005580923936056263,
      "loss": 3.1279,
      "step": 39231
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5670498609542847,
      "learning_rate": 0.0005580903083167828,
      "loss": 3.0504,
      "step": 39232
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6166399717330933,
      "learning_rate": 0.0005580882229799556,
      "loss": 3.3192,
      "step": 39233
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6848745346069336,
      "learning_rate": 0.0005580861375951448,
      "loss": 2.8515,
      "step": 39234
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.802255392074585,
      "learning_rate": 0.0005580840521623509,
      "loss": 2.9425,
      "step": 39235
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4981071949005127,
      "learning_rate": 0.0005580819666815743,
      "loss": 3.3876,
      "step": 39236
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.39083993434906,
      "learning_rate": 0.0005580798811528154,
      "loss": 3.1534,
      "step": 39237
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3151720762252808,
      "learning_rate": 0.0005580777955760745,
      "loss": 2.9755,
      "step": 39238
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.050039052963257,
      "learning_rate": 0.0005580757099513522,
      "loss": 2.8914,
      "step": 39239
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4389500617980957,
      "learning_rate": 0.0005580736242786487,
      "loss": 3.0985,
      "step": 39240
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5088015794754028,
      "learning_rate": 0.0005580715385579644,
      "loss": 2.9366,
      "step": 39241
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6251460313796997,
      "learning_rate": 0.0005580694527892998,
      "loss": 3.0518,
      "step": 39242
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.620335578918457,
      "learning_rate": 0.0005580673669726553,
      "loss": 3.1178,
      "step": 39243
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.438493013381958,
      "learning_rate": 0.000558065281108031,
      "loss": 2.998,
      "step": 39244
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7118687629699707,
      "learning_rate": 0.0005580631951954277,
      "loss": 2.9511,
      "step": 39245
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.421392560005188,
      "learning_rate": 0.0005580611092348453,
      "loss": 2.9936,
      "step": 39246
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.018571615219116,
      "learning_rate": 0.0005580590232262847,
      "loss": 3.0527,
      "step": 39247
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.687605381011963,
      "learning_rate": 0.0005580569371697459,
      "loss": 3.3345,
      "step": 39248
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.3220183849334717,
      "learning_rate": 0.0005580548510652297,
      "loss": 3.1135,
      "step": 39249
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8046112060546875,
      "learning_rate": 0.000558052764912736,
      "loss": 3.0422,
      "step": 39250
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6442803144454956,
      "learning_rate": 0.0005580506787122655,
      "loss": 3.094,
      "step": 39251
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1462488174438477,
      "learning_rate": 0.0005580485924638185,
      "loss": 3.0867,
      "step": 39252
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.450877070426941,
      "learning_rate": 0.0005580465061673953,
      "loss": 3.3975,
      "step": 39253
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6649261713027954,
      "learning_rate": 0.0005580444198229965,
      "loss": 3.0364,
      "step": 39254
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.756858468055725,
      "learning_rate": 0.0005580423334306223,
      "loss": 3.1043,
      "step": 39255
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.965780735015869,
      "learning_rate": 0.0005580402469902733,
      "loss": 3.0205,
      "step": 39256
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.151088237762451,
      "learning_rate": 0.0005580381605019496,
      "loss": 2.9529,
      "step": 39257
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9114774465560913,
      "learning_rate": 0.0005580360739656519,
      "loss": 2.8127,
      "step": 39258
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.560167074203491,
      "learning_rate": 0.0005580339873813802,
      "loss": 3.0952,
      "step": 39259
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4083813428878784,
      "learning_rate": 0.0005580319007491353,
      "loss": 2.9962,
      "step": 39260
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5372051000595093,
      "learning_rate": 0.0005580298140689173,
      "loss": 2.9182,
      "step": 39261
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4020694494247437,
      "learning_rate": 0.0005580277273407267,
      "loss": 2.9787,
      "step": 39262
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3676316738128662,
      "learning_rate": 0.0005580256405645638,
      "loss": 2.9687,
      "step": 39263
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3821791410446167,
      "learning_rate": 0.0005580235537404292,
      "loss": 2.9626,
      "step": 39264
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.453561544418335,
      "learning_rate": 0.0005580214668683231,
      "loss": 3.3686,
      "step": 39265
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3974864482879639,
      "learning_rate": 0.0005580193799482459,
      "loss": 2.8883,
      "step": 39266
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.012937307357788,
      "learning_rate": 0.0005580172929801981,
      "loss": 2.7357,
      "step": 39267
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1586625576019287,
      "learning_rate": 0.00055801520596418,
      "loss": 2.8651,
      "step": 39268
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.762463927268982,
      "learning_rate": 0.000558013118900192,
      "loss": 3.199,
      "step": 39269
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0884385108947754,
      "learning_rate": 0.0005580110317882344,
      "loss": 3.218,
      "step": 39270
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.442317485809326,
      "learning_rate": 0.0005580089446283078,
      "loss": 2.8841,
      "step": 39271
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.594847321510315,
      "learning_rate": 0.0005580068574204124,
      "loss": 3.0594,
      "step": 39272
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7148033380508423,
      "learning_rate": 0.0005580047701645487,
      "loss": 2.9884,
      "step": 39273
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.25931715965271,
      "learning_rate": 0.000558002682860717,
      "loss": 3.2457,
      "step": 39274
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3296031951904297,
      "learning_rate": 0.0005580005955089178,
      "loss": 3.3928,
      "step": 39275
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6411782503128052,
      "learning_rate": 0.0005579985081091513,
      "loss": 3.1325,
      "step": 39276
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.090458869934082,
      "learning_rate": 0.0005579964206614181,
      "loss": 3.0421,
      "step": 39277
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6920171976089478,
      "learning_rate": 0.0005579943331657185,
      "loss": 3.2328,
      "step": 39278
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2863094806671143,
      "learning_rate": 0.0005579922456220529,
      "loss": 2.853,
      "step": 39279
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.0695149898529053,
      "learning_rate": 0.0005579901580304215,
      "loss": 2.9739,
      "step": 39280
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8564308881759644,
      "learning_rate": 0.000557988070390825,
      "loss": 2.9385,
      "step": 39281
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5927742719650269,
      "learning_rate": 0.0005579859827032637,
      "loss": 3.3277,
      "step": 39282
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4337328672409058,
      "learning_rate": 0.0005579838949677379,
      "loss": 3.0127,
      "step": 39283
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.377284288406372,
      "learning_rate": 0.0005579818071842479,
      "loss": 3.1575,
      "step": 39284
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7567777633666992,
      "learning_rate": 0.0005579797193527944,
      "loss": 2.9692,
      "step": 39285
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7210503816604614,
      "learning_rate": 0.0005579776314733775,
      "loss": 3.0166,
      "step": 39286
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.215670585632324,
      "learning_rate": 0.0005579755435459975,
      "loss": 2.9623,
      "step": 39287
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8711127042770386,
      "learning_rate": 0.0005579734555706552,
      "loss": 3.1523,
      "step": 39288
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9056909084320068,
      "learning_rate": 0.0005579713675473508,
      "loss": 3.1853,
      "step": 39289
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5791552066802979,
      "learning_rate": 0.0005579692794760846,
      "loss": 3.0848,
      "step": 39290
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.5535316467285156,
      "learning_rate": 0.000557967191356857,
      "loss": 2.8377,
      "step": 39291
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.602325439453125,
      "learning_rate": 0.0005579651031896685,
      "loss": 3.1668,
      "step": 39292
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8748705387115479,
      "learning_rate": 0.0005579630149745193,
      "loss": 2.8832,
      "step": 39293
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7243907451629639,
      "learning_rate": 0.0005579609267114101,
      "loss": 2.8102,
      "step": 39294
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1567912101745605,
      "learning_rate": 0.0005579588384003409,
      "loss": 2.9366,
      "step": 39295
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.441943645477295,
      "learning_rate": 0.0005579567500413124,
      "loss": 2.9513,
      "step": 39296
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.404233455657959,
      "learning_rate": 0.0005579546616343248,
      "loss": 3.2378,
      "step": 39297
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9959796667099,
      "learning_rate": 0.0005579525731793786,
      "loss": 3.2087,
      "step": 39298
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6498992443084717,
      "learning_rate": 0.0005579504846764742,
      "loss": 3.0745,
      "step": 39299
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3017696142196655,
      "learning_rate": 0.0005579483961256118,
      "loss": 2.8231,
      "step": 39300
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7402371168136597,
      "learning_rate": 0.000557946307526792,
      "loss": 3.0889,
      "step": 39301
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4105932712554932,
      "learning_rate": 0.0005579442188800151,
      "loss": 3.0842,
      "step": 39302
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0683155059814453,
      "learning_rate": 0.0005579421301852815,
      "loss": 2.9998,
      "step": 39303
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5925371646881104,
      "learning_rate": 0.0005579400414425916,
      "loss": 3.3141,
      "step": 39304
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.488476037979126,
      "learning_rate": 0.0005579379526519457,
      "loss": 2.9815,
      "step": 39305
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3731166124343872,
      "learning_rate": 0.0005579358638133444,
      "loss": 3.1152,
      "step": 39306
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5570063591003418,
      "learning_rate": 0.0005579337749267879,
      "loss": 3.0297,
      "step": 39307
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9125488996505737,
      "learning_rate": 0.0005579316859922767,
      "loss": 2.7971,
      "step": 39308
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5580602884292603,
      "learning_rate": 0.000557929597009811,
      "loss": 3.0667,
      "step": 39309
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6862620115280151,
      "learning_rate": 0.0005579275079793914,
      "loss": 3.2283,
      "step": 39310
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9303780794143677,
      "learning_rate": 0.0005579254189010183,
      "loss": 2.9721,
      "step": 39311
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6972174644470215,
      "learning_rate": 0.0005579233297746918,
      "loss": 2.9746,
      "step": 39312
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4396578073501587,
      "learning_rate": 0.0005579212406004126,
      "loss": 3.3268,
      "step": 39313
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9203928709030151,
      "learning_rate": 0.0005579191513781809,
      "loss": 2.9338,
      "step": 39314
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.874747633934021,
      "learning_rate": 0.0005579170621079971,
      "loss": 2.8846,
      "step": 39315
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6619082689285278,
      "learning_rate": 0.0005579149727898619,
      "loss": 3.237,
      "step": 39316
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1968817710876465,
      "learning_rate": 0.0005579128834237752,
      "loss": 3.1172,
      "step": 39317
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.6537420749664307,
      "learning_rate": 0.0005579107940097378,
      "loss": 2.9584,
      "step": 39318
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6730120182037354,
      "learning_rate": 0.0005579087045477499,
      "loss": 3.132,
      "step": 39319
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.084564685821533,
      "learning_rate": 0.0005579066150378117,
      "loss": 3.1051,
      "step": 39320
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4844179153442383,
      "learning_rate": 0.000557904525479924,
      "loss": 3.3542,
      "step": 39321
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8734569549560547,
      "learning_rate": 0.0005579024358740869,
      "loss": 3.2114,
      "step": 39322
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9844818115234375,
      "learning_rate": 0.0005579003462203009,
      "loss": 3.2202,
      "step": 39323
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4978057146072388,
      "learning_rate": 0.0005578982565185663,
      "loss": 2.8893,
      "step": 39324
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5162276029586792,
      "learning_rate": 0.0005578961667688836,
      "loss": 3.1782,
      "step": 39325
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.211824655532837,
      "learning_rate": 0.0005578940769712531,
      "loss": 2.8644,
      "step": 39326
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.4218668937683105,
      "learning_rate": 0.0005578919871256753,
      "loss": 2.977,
      "step": 39327
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.751399278640747,
      "learning_rate": 0.0005578898972321504,
      "loss": 2.7493,
      "step": 39328
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4998756647109985,
      "learning_rate": 0.0005578878072906789,
      "loss": 3.1112,
      "step": 39329
    },
    {
      "epoch": 0.51,
      "grad_norm": 4.039776802062988,
      "learning_rate": 0.0005578857173012614,
      "loss": 3.0671,
      "step": 39330
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9348636865615845,
      "learning_rate": 0.0005578836272638978,
      "loss": 3.1892,
      "step": 39331
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4850324392318726,
      "learning_rate": 0.0005578815371785889,
      "loss": 3.0415,
      "step": 39332
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2055470943450928,
      "learning_rate": 0.0005578794470453349,
      "loss": 3.1934,
      "step": 39333
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.075317621231079,
      "learning_rate": 0.0005578773568641365,
      "loss": 2.9819,
      "step": 39334
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3588091135025024,
      "learning_rate": 0.0005578752666349935,
      "loss": 3.2012,
      "step": 39335
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5422073602676392,
      "learning_rate": 0.0005578731763579066,
      "loss": 2.9414,
      "step": 39336
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4609100818634033,
      "learning_rate": 0.0005578710860328764,
      "loss": 3.1442,
      "step": 39337
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.242020845413208,
      "learning_rate": 0.0005578689956599031,
      "loss": 2.9312,
      "step": 39338
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.405174970626831,
      "learning_rate": 0.000557866905238987,
      "loss": 2.9871,
      "step": 39339
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4164477586746216,
      "learning_rate": 0.0005578648147701287,
      "loss": 3.3868,
      "step": 39340
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5536707639694214,
      "learning_rate": 0.0005578627242533283,
      "loss": 3.2113,
      "step": 39341
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6199860572814941,
      "learning_rate": 0.0005578606336885864,
      "loss": 3.059,
      "step": 39342
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.54487144947052,
      "learning_rate": 0.0005578585430759033,
      "loss": 3.2626,
      "step": 39343
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.770443320274353,
      "learning_rate": 0.0005578564524152795,
      "loss": 2.8547,
      "step": 39344
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4975593090057373,
      "learning_rate": 0.0005578543617067153,
      "loss": 2.9915,
      "step": 39345
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3654004335403442,
      "learning_rate": 0.0005578522709502112,
      "loss": 2.9879,
      "step": 39346
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6639502048492432,
      "learning_rate": 0.0005578501801457674,
      "loss": 2.948,
      "step": 39347
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5630625486373901,
      "learning_rate": 0.0005578480892933844,
      "loss": 3.1113,
      "step": 39348
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.427897572517395,
      "learning_rate": 0.0005578459983930625,
      "loss": 3.0541,
      "step": 39349
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3503116369247437,
      "learning_rate": 0.0005578439074448023,
      "loss": 3.1419,
      "step": 39350
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4858571290969849,
      "learning_rate": 0.0005578418164486039,
      "loss": 3.2299,
      "step": 39351
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2058545351028442,
      "learning_rate": 0.0005578397254044679,
      "loss": 3.23,
      "step": 39352
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4623024463653564,
      "learning_rate": 0.0005578376343123946,
      "loss": 3.1016,
      "step": 39353
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2218053340911865,
      "learning_rate": 0.0005578355431723846,
      "loss": 3.2497,
      "step": 39354
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2302186489105225,
      "learning_rate": 0.0005578334519844379,
      "loss": 3.0695,
      "step": 39355
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4371901750564575,
      "learning_rate": 0.0005578313607485552,
      "loss": 2.9752,
      "step": 39356
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.952345848083496,
      "learning_rate": 0.0005578292694647367,
      "loss": 2.969,
      "step": 39357
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3916810750961304,
      "learning_rate": 0.0005578271781329829,
      "loss": 3.1062,
      "step": 39358
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4607723951339722,
      "learning_rate": 0.0005578250867532943,
      "loss": 3.0722,
      "step": 39359
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3679213523864746,
      "learning_rate": 0.0005578229953256709,
      "loss": 3.1328,
      "step": 39360
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5146424770355225,
      "learning_rate": 0.0005578209038501135,
      "loss": 3.0691,
      "step": 39361
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.430511236190796,
      "learning_rate": 0.0005578188123266223,
      "loss": 3.0725,
      "step": 39362
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5254381895065308,
      "learning_rate": 0.0005578167207551976,
      "loss": 2.9223,
      "step": 39363
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5138651132583618,
      "learning_rate": 0.0005578146291358401,
      "loss": 3.0539,
      "step": 39364
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7635045051574707,
      "learning_rate": 0.0005578125374685499,
      "loss": 3.2705,
      "step": 39365
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6193865537643433,
      "learning_rate": 0.0005578104457533275,
      "loss": 3.0625,
      "step": 39366
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6345319747924805,
      "learning_rate": 0.0005578083539901732,
      "loss": 2.9615,
      "step": 39367
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7081959247589111,
      "learning_rate": 0.0005578062621790876,
      "loss": 2.9372,
      "step": 39368
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6954289674758911,
      "learning_rate": 0.0005578041703200708,
      "loss": 3.2656,
      "step": 39369
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9207618236541748,
      "learning_rate": 0.0005578020784131235,
      "loss": 3.0936,
      "step": 39370
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.525460124015808,
      "learning_rate": 0.0005577999864582458,
      "loss": 3.1232,
      "step": 39371
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6645029783248901,
      "learning_rate": 0.0005577978944554383,
      "loss": 3.3505,
      "step": 39372
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7474421262741089,
      "learning_rate": 0.0005577958024047012,
      "loss": 3.2535,
      "step": 39373
    },
    {
      "epoch": 0.51,
      "grad_norm": 3.233419418334961,
      "learning_rate": 0.0005577937103060351,
      "loss": 2.7504,
      "step": 39374
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.411786675453186,
      "learning_rate": 0.0005577916181594402,
      "loss": 3.1816,
      "step": 39375
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0128729343414307,
      "learning_rate": 0.0005577895259649171,
      "loss": 2.8679,
      "step": 39376
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.4698259830474854,
      "learning_rate": 0.000557787433722466,
      "loss": 2.9839,
      "step": 39377
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.63364839553833,
      "learning_rate": 0.0005577853414320872,
      "loss": 3.1913,
      "step": 39378
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.5206665992736816,
      "learning_rate": 0.0005577832490937814,
      "loss": 3.4387,
      "step": 39379
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.6287150382995605,
      "learning_rate": 0.0005577811567075488,
      "loss": 3.1876,
      "step": 39380
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5073773860931396,
      "learning_rate": 0.0005577790642733898,
      "loss": 3.2025,
      "step": 39381
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6779001951217651,
      "learning_rate": 0.0005577769717913047,
      "loss": 3.0803,
      "step": 39382
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.882649302482605,
      "learning_rate": 0.0005577748792612941,
      "loss": 3.4819,
      "step": 39383
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8476130962371826,
      "learning_rate": 0.0005577727866833583,
      "loss": 3.1684,
      "step": 39384
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7124475240707397,
      "learning_rate": 0.0005577706940574976,
      "loss": 3.1336,
      "step": 39385
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.792891502380371,
      "learning_rate": 0.0005577686013837126,
      "loss": 2.737,
      "step": 39386
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.002744197845459,
      "learning_rate": 0.0005577665086620035,
      "loss": 2.98,
      "step": 39387
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8524506092071533,
      "learning_rate": 0.0005577644158923708,
      "loss": 3.0836,
      "step": 39388
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6466237306594849,
      "learning_rate": 0.0005577623230748147,
      "loss": 3.0292,
      "step": 39389
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2261271476745605,
      "learning_rate": 0.0005577602302093357,
      "loss": 2.9747,
      "step": 39390
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4965949058532715,
      "learning_rate": 0.0005577581372959343,
      "loss": 2.8666,
      "step": 39391
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6762170791625977,
      "learning_rate": 0.0005577560443346107,
      "loss": 2.9822,
      "step": 39392
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7485781908035278,
      "learning_rate": 0.0005577539513253654,
      "loss": 3.0739,
      "step": 39393
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4482839107513428,
      "learning_rate": 0.0005577518582681989,
      "loss": 3.2611,
      "step": 39394
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6965306997299194,
      "learning_rate": 0.0005577497651631114,
      "loss": 2.8234,
      "step": 39395
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4964137077331543,
      "learning_rate": 0.0005577476720101033,
      "loss": 3.0094,
      "step": 39396
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5869466066360474,
      "learning_rate": 0.0005577455788091751,
      "loss": 3.0705,
      "step": 39397
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.353135347366333,
      "learning_rate": 0.0005577434855603271,
      "loss": 3.1473,
      "step": 39398
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5048902034759521,
      "learning_rate": 0.0005577413922635598,
      "loss": 3.087,
      "step": 39399
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2790368795394897,
      "learning_rate": 0.0005577392989188735,
      "loss": 3.1366,
      "step": 39400
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.581557035446167,
      "learning_rate": 0.0005577372055262684,
      "loss": 2.9211,
      "step": 39401
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.705122470855713,
      "learning_rate": 0.0005577351120857453,
      "loss": 3.1419,
      "step": 39402
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6290403604507446,
      "learning_rate": 0.0005577330185973043,
      "loss": 2.9703,
      "step": 39403
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4941517114639282,
      "learning_rate": 0.0005577309250609458,
      "loss": 3.0027,
      "step": 39404
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3369851112365723,
      "learning_rate": 0.0005577288314766705,
      "loss": 2.8048,
      "step": 39405
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3935884237289429,
      "learning_rate": 0.0005577267378444783,
      "loss": 3.2502,
      "step": 39406
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9794493913650513,
      "learning_rate": 0.00055772464416437,
      "loss": 3.1559,
      "step": 39407
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.727105736732483,
      "learning_rate": 0.0005577225504363457,
      "loss": 2.8869,
      "step": 39408
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5116926431655884,
      "learning_rate": 0.0005577204566604059,
      "loss": 3.0446,
      "step": 39409
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5516753196716309,
      "learning_rate": 0.0005577183628365511,
      "loss": 3.1413,
      "step": 39410
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2287023067474365,
      "learning_rate": 0.0005577162689647815,
      "loss": 2.9861,
      "step": 39411
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.864686369895935,
      "learning_rate": 0.0005577141750450976,
      "loss": 3.0187,
      "step": 39412
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.046773910522461,
      "learning_rate": 0.0005577120810774998,
      "loss": 2.8283,
      "step": 39413
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5765312910079956,
      "learning_rate": 0.0005577099870619886,
      "loss": 2.9619,
      "step": 39414
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.472516417503357,
      "learning_rate": 0.000557707892998564,
      "loss": 3.023,
      "step": 39415
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5789791345596313,
      "learning_rate": 0.0005577057988872268,
      "loss": 2.9151,
      "step": 39416
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9950675964355469,
      "learning_rate": 0.0005577037047279771,
      "loss": 3.2152,
      "step": 39417
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4729365110397339,
      "learning_rate": 0.0005577016105208155,
      "loss": 3.312,
      "step": 39418
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0984113216400146,
      "learning_rate": 0.0005576995162657423,
      "loss": 2.7746,
      "step": 39419
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7681591510772705,
      "learning_rate": 0.0005576974219627578,
      "loss": 2.8428,
      "step": 39420
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.312875509262085,
      "learning_rate": 0.0005576953276118626,
      "loss": 2.9389,
      "step": 39421
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.122802495956421,
      "learning_rate": 0.000557693233213057,
      "loss": 3.1116,
      "step": 39422
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.4700636863708496,
      "learning_rate": 0.0005576911387663413,
      "loss": 3.2059,
      "step": 39423
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.710848093032837,
      "learning_rate": 0.0005576890442717161,
      "loss": 3.1134,
      "step": 39424
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.561524510383606,
      "learning_rate": 0.0005576869497291814,
      "loss": 3.0518,
      "step": 39425
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.697183609008789,
      "learning_rate": 0.000557684855138738,
      "loss": 3.0929,
      "step": 39426
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8371721506118774,
      "learning_rate": 0.000557682760500386,
      "loss": 2.9658,
      "step": 39427
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2024617195129395,
      "learning_rate": 0.0005576806658141259,
      "loss": 2.8654,
      "step": 39428
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.75870680809021,
      "learning_rate": 0.0005576785710799582,
      "loss": 2.857,
      "step": 39429
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.284387469291687,
      "learning_rate": 0.0005576764762978832,
      "loss": 2.986,
      "step": 39430
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.700385332107544,
      "learning_rate": 0.0005576743814679013,
      "loss": 3.2519,
      "step": 39431
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.49854576587677,
      "learning_rate": 0.0005576722865900127,
      "loss": 2.9842,
      "step": 39432
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7467278242111206,
      "learning_rate": 0.0005576701916642182,
      "loss": 2.9898,
      "step": 39433
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4945969581604004,
      "learning_rate": 0.0005576680966905178,
      "loss": 2.8866,
      "step": 39434
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3185385465621948,
      "learning_rate": 0.000557666001668912,
      "loss": 3.2872,
      "step": 39435
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4608533382415771,
      "learning_rate": 0.0005576639065994013,
      "loss": 3.2307,
      "step": 39436
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8683353662490845,
      "learning_rate": 0.000557661811481986,
      "loss": 2.9514,
      "step": 39437
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.414175033569336,
      "learning_rate": 0.0005576597163166664,
      "loss": 3.196,
      "step": 39438
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3965750932693481,
      "learning_rate": 0.0005576576211034431,
      "loss": 3.1179,
      "step": 39439
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.851199746131897,
      "learning_rate": 0.0005576555258423164,
      "loss": 3.1239,
      "step": 39440
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4922723770141602,
      "learning_rate": 0.0005576534305332866,
      "loss": 2.9412,
      "step": 39441
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.7569525241851807,
      "learning_rate": 0.0005576513351763543,
      "loss": 2.9818,
      "step": 39442
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1791083812713623,
      "learning_rate": 0.0005576492397715196,
      "loss": 2.9743,
      "step": 39443
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6575839519500732,
      "learning_rate": 0.0005576471443187831,
      "loss": 3.2574,
      "step": 39444
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.4854416847229004,
      "learning_rate": 0.0005576450488181452,
      "loss": 2.8631,
      "step": 39445
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5428582429885864,
      "learning_rate": 0.0005576429532696062,
      "loss": 2.9324,
      "step": 39446
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5153653621673584,
      "learning_rate": 0.0005576408576731664,
      "loss": 3.0971,
      "step": 39447
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7415728569030762,
      "learning_rate": 0.0005576387620288265,
      "loss": 3.1706,
      "step": 39448
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.971748948097229,
      "learning_rate": 0.0005576366663365865,
      "loss": 3.0004,
      "step": 39449
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4114770889282227,
      "learning_rate": 0.000557634570596447,
      "loss": 3.2412,
      "step": 39450
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.343875527381897,
      "learning_rate": 0.0005576324748084085,
      "loss": 3.1134,
      "step": 39451
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.2944034337997437,
      "learning_rate": 0.0005576303789724712,
      "loss": 3.1855,
      "step": 39452
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.5399980545043945,
      "learning_rate": 0.0005576282830886354,
      "loss": 3.1328,
      "step": 39453
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7656832933425903,
      "learning_rate": 0.0005576261871569018,
      "loss": 2.9347,
      "step": 39454
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8534092903137207,
      "learning_rate": 0.0005576240911772706,
      "loss": 2.8229,
      "step": 39455
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.653151035308838,
      "learning_rate": 0.0005576219951497423,
      "loss": 2.9558,
      "step": 39456
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.4096877574920654,
      "learning_rate": 0.000557619899074317,
      "loss": 3.2218,
      "step": 39457
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5001531839370728,
      "learning_rate": 0.0005576178029509955,
      "loss": 3.1106,
      "step": 39458
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7929435968399048,
      "learning_rate": 0.000557615706779778,
      "loss": 3.1023,
      "step": 39459
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.768800973892212,
      "learning_rate": 0.0005576136105606647,
      "loss": 2.8429,
      "step": 39460
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4427939653396606,
      "learning_rate": 0.0005576115142936562,
      "loss": 3.0526,
      "step": 39461
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7479065656661987,
      "learning_rate": 0.000557609417978753,
      "loss": 3.0901,
      "step": 39462
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4104989767074585,
      "learning_rate": 0.0005576073216159552,
      "loss": 3.1167,
      "step": 39463
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4890778064727783,
      "learning_rate": 0.0005576052252052635,
      "loss": 3.0887,
      "step": 39464
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.642758846282959,
      "learning_rate": 0.000557603128746678,
      "loss": 3.2309,
      "step": 39465
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.849287509918213,
      "learning_rate": 0.0005576010322401991,
      "loss": 3.0313,
      "step": 39466
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3593144416809082,
      "learning_rate": 0.0005575989356858275,
      "loss": 3.0596,
      "step": 39467
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8842419385910034,
      "learning_rate": 0.0005575968390835633,
      "loss": 3.1996,
      "step": 39468
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4542882442474365,
      "learning_rate": 0.0005575947424334071,
      "loss": 2.9755,
      "step": 39469
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3430001735687256,
      "learning_rate": 0.000557592645735359,
      "loss": 2.9774,
      "step": 39470
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.724156379699707,
      "learning_rate": 0.0005575905489894196,
      "loss": 3.1579,
      "step": 39471
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6721762418746948,
      "learning_rate": 0.0005575884521955894,
      "loss": 3.2186,
      "step": 39472
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.655144214630127,
      "learning_rate": 0.0005575863553538686,
      "loss": 3.0917,
      "step": 39473
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4996163845062256,
      "learning_rate": 0.0005575842584642576,
      "loss": 2.8276,
      "step": 39474
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.866917133331299,
      "learning_rate": 0.0005575821615267569,
      "loss": 3.0847,
      "step": 39475
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7733343839645386,
      "learning_rate": 0.0005575800645413666,
      "loss": 3.0737,
      "step": 39476
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.974226713180542,
      "learning_rate": 0.0005575779675080875,
      "loss": 3.3036,
      "step": 39477
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.574623465538025,
      "learning_rate": 0.0005575758704269197,
      "loss": 3.0289,
      "step": 39478
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6849082708358765,
      "learning_rate": 0.0005575737732978637,
      "loss": 2.9454,
      "step": 39479
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.394721508026123,
      "learning_rate": 0.00055757167612092,
      "loss": 3.2598,
      "step": 39480
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5925747156143188,
      "learning_rate": 0.0005575695788960887,
      "loss": 3.2613,
      "step": 39481
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7703239917755127,
      "learning_rate": 0.0005575674816233704,
      "loss": 3.2338,
      "step": 39482
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.58173406124115,
      "learning_rate": 0.0005575653843027654,
      "loss": 2.8714,
      "step": 39483
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5415362119674683,
      "learning_rate": 0.0005575632869342742,
      "loss": 2.8839,
      "step": 39484
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.311676025390625,
      "learning_rate": 0.0005575611895178971,
      "loss": 2.996,
      "step": 39485
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9709638357162476,
      "learning_rate": 0.0005575590920536346,
      "loss": 3.0909,
      "step": 39486
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.080416202545166,
      "learning_rate": 0.0005575569945414869,
      "loss": 3.1075,
      "step": 39487
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9462039470672607,
      "learning_rate": 0.0005575548969814543,
      "loss": 3.2079,
      "step": 39488
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0686371326446533,
      "learning_rate": 0.0005575527993735376,
      "loss": 2.7925,
      "step": 39489
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.354325532913208,
      "learning_rate": 0.0005575507017177371,
      "loss": 3.2795,
      "step": 39490
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8763000965118408,
      "learning_rate": 0.0005575486040140529,
      "loss": 2.8783,
      "step": 39491
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7628064155578613,
      "learning_rate": 0.0005575465062624855,
      "loss": 2.9261,
      "step": 39492
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.478196144104004,
      "learning_rate": 0.0005575444084630355,
      "loss": 3.1455,
      "step": 39493
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.6203536987304688,
      "learning_rate": 0.000557542310615703,
      "loss": 3.0782,
      "step": 39494
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4605642557144165,
      "learning_rate": 0.0005575402127204886,
      "loss": 3.1999,
      "step": 39495
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4223167896270752,
      "learning_rate": 0.0005575381147773926,
      "loss": 3.0967,
      "step": 39496
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9727650880813599,
      "learning_rate": 0.0005575360167864154,
      "loss": 2.9502,
      "step": 39497
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4617164134979248,
      "learning_rate": 0.0005575339187475574,
      "loss": 2.7847,
      "step": 39498
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8813343048095703,
      "learning_rate": 0.000557531820660819,
      "loss": 3.1027,
      "step": 39499
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.6379780769348145,
      "learning_rate": 0.0005575297225262005,
      "loss": 2.986,
      "step": 39500
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2752840518951416,
      "learning_rate": 0.0005575276243437024,
      "loss": 3.0447,
      "step": 39501
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6186299324035645,
      "learning_rate": 0.0005575255261133251,
      "loss": 3.0103,
      "step": 39502
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8658055067062378,
      "learning_rate": 0.000557523427835069,
      "loss": 3.0764,
      "step": 39503
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5627548694610596,
      "learning_rate": 0.0005575213295089342,
      "loss": 3.1687,
      "step": 39504
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0651183128356934,
      "learning_rate": 0.0005575192311349216,
      "loss": 3.0039,
      "step": 39505
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6164222955703735,
      "learning_rate": 0.0005575171327130311,
      "loss": 3.2952,
      "step": 39506
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4348911046981812,
      "learning_rate": 0.0005575150342432634,
      "loss": 3.1463,
      "step": 39507
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7118725776672363,
      "learning_rate": 0.0005575129357256188,
      "loss": 3.1418,
      "step": 39508
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.9028289318084717,
      "learning_rate": 0.0005575108371600976,
      "loss": 3.1793,
      "step": 39509
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.358001708984375,
      "learning_rate": 0.0005575087385467003,
      "loss": 3.1485,
      "step": 39510
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4165281057357788,
      "learning_rate": 0.0005575066398854273,
      "loss": 3.0861,
      "step": 39511
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4438915252685547,
      "learning_rate": 0.000557504541176279,
      "loss": 3.0765,
      "step": 39512
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.806243896484375,
      "learning_rate": 0.0005575024424192557,
      "loss": 3.0856,
      "step": 39513
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7948918342590332,
      "learning_rate": 0.0005575003436143578,
      "loss": 2.9411,
      "step": 39514
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.9231271743774414,
      "learning_rate": 0.0005574982447615858,
      "loss": 2.8674,
      "step": 39515
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.801818609237671,
      "learning_rate": 0.00055749614586094,
      "loss": 2.9601,
      "step": 39516
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.2177224159240723,
      "learning_rate": 0.0005574940469124208,
      "loss": 2.9477,
      "step": 39517
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7764904499053955,
      "learning_rate": 0.0005574919479160286,
      "loss": 2.9921,
      "step": 39518
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.446537971496582,
      "learning_rate": 0.0005574898488717637,
      "loss": 3.1171,
      "step": 39519
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.001513719558716,
      "learning_rate": 0.0005574877497796267,
      "loss": 2.9847,
      "step": 39520
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6803529262542725,
      "learning_rate": 0.0005574856506396177,
      "loss": 3.0124,
      "step": 39521
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7059028148651123,
      "learning_rate": 0.0005574835514517375,
      "loss": 3.0181,
      "step": 39522
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5162603855133057,
      "learning_rate": 0.0005574814522159861,
      "loss": 3.0236,
      "step": 39523
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.6079070568084717,
      "learning_rate": 0.0005574793529323639,
      "loss": 3.0203,
      "step": 39524
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.546675205230713,
      "learning_rate": 0.0005574772536008718,
      "loss": 2.9481,
      "step": 39525
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5215259790420532,
      "learning_rate": 0.0005574751542215095,
      "loss": 3.1996,
      "step": 39526
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7378281354904175,
      "learning_rate": 0.0005574730547942778,
      "loss": 3.2483,
      "step": 39527
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.301572561264038,
      "learning_rate": 0.000557470955319177,
      "loss": 3.0498,
      "step": 39528
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8008519411087036,
      "learning_rate": 0.0005574688557962074,
      "loss": 2.8764,
      "step": 39529
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8791017532348633,
      "learning_rate": 0.0005574667562253697,
      "loss": 2.9864,
      "step": 39530
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4093470573425293,
      "learning_rate": 0.0005574646566066638,
      "loss": 2.9173,
      "step": 39531
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.822549819946289,
      "learning_rate": 0.0005574625569400906,
      "loss": 2.97,
      "step": 39532
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.4538288116455078,
      "learning_rate": 0.0005574604572256501,
      "loss": 3.1387,
      "step": 39533
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.430475115776062,
      "learning_rate": 0.0005574583574633429,
      "loss": 3.2321,
      "step": 39534
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.8925434350967407,
      "learning_rate": 0.0005574562576531694,
      "loss": 3.2681,
      "step": 39535
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.201653242111206,
      "learning_rate": 0.0005574541577951298,
      "loss": 2.9968,
      "step": 39536
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.4432427883148193,
      "learning_rate": 0.0005574520578892247,
      "loss": 3.2787,
      "step": 39537
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.0846333503723145,
      "learning_rate": 0.0005574499579354543,
      "loss": 3.1988,
      "step": 39538
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.829291343688965,
      "learning_rate": 0.0005574478579338192,
      "loss": 2.9086,
      "step": 39539
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.319023847579956,
      "learning_rate": 0.0005574457578843196,
      "loss": 3.0485,
      "step": 39540
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3615319728851318,
      "learning_rate": 0.0005574436577869561,
      "loss": 3.3447,
      "step": 39541
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.1074535846710205,
      "learning_rate": 0.0005574415576417288,
      "loss": 3.0382,
      "step": 39542
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.844048023223877,
      "learning_rate": 0.0005574394574486384,
      "loss": 3.0728,
      "step": 39543
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5214769840240479,
      "learning_rate": 0.000557437357207685,
      "loss": 2.8752,
      "step": 39544
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.855939507484436,
      "learning_rate": 0.0005574352569188692,
      "loss": 2.9654,
      "step": 39545
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.275670051574707,
      "learning_rate": 0.0005574331565821914,
      "loss": 3.1604,
      "step": 39546
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.7009689807891846,
      "learning_rate": 0.0005574310561976518,
      "loss": 2.7547,
      "step": 39547
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.5378319025039673,
      "learning_rate": 0.000557428955765251,
      "loss": 3.1356,
      "step": 39548
    },
    {
      "epoch": 0.51,
      "grad_norm": 4.073700904846191,
      "learning_rate": 0.0005574268552849893,
      "loss": 2.9694,
      "step": 39549
    },
    {
      "epoch": 0.51,
      "grad_norm": 4.5908684730529785,
      "learning_rate": 0.000557424754756867,
      "loss": 2.9399,
      "step": 39550
    },
    {
      "epoch": 0.51,
      "grad_norm": 1.3071717023849487,
      "learning_rate": 0.0005574226541808846,
      "loss": 2.954,
      "step": 39551
    },
    {
      "epoch": 0.51,
      "grad_norm": 2.798678159713745,
      "learning_rate": 0.0005574205535570425,
      "loss": 2.7918,
      "step": 39552
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2446300983428955,
      "learning_rate": 0.000557418452885341,
      "loss": 2.9868,
      "step": 39553
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3776676654815674,
      "learning_rate": 0.0005574163521657807,
      "loss": 3.065,
      "step": 39554
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2659804821014404,
      "learning_rate": 0.0005574142513983618,
      "loss": 3.1708,
      "step": 39555
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.094214916229248,
      "learning_rate": 0.0005574121505830846,
      "loss": 3.0249,
      "step": 39556
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.532367706298828,
      "learning_rate": 0.0005574100497199498,
      "loss": 2.9674,
      "step": 39557
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5763248205184937,
      "learning_rate": 0.0005574079488089574,
      "loss": 3.1366,
      "step": 39558
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.125924587249756,
      "learning_rate": 0.0005574058478501082,
      "loss": 2.8886,
      "step": 39559
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.8521788120269775,
      "learning_rate": 0.0005574037468434023,
      "loss": 2.965,
      "step": 39560
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.8715410232543945,
      "learning_rate": 0.0005574016457888403,
      "loss": 3.1724,
      "step": 39561
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3488800525665283,
      "learning_rate": 0.0005573995446864222,
      "loss": 3.1335,
      "step": 39562
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9973076581954956,
      "learning_rate": 0.000557397443536149,
      "loss": 2.9858,
      "step": 39563
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.489124298095703,
      "learning_rate": 0.0005573953423380206,
      "loss": 3.0019,
      "step": 39564
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.0208094120025635,
      "learning_rate": 0.0005573932410920375,
      "loss": 2.8818,
      "step": 39565
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7384310960769653,
      "learning_rate": 0.0005573911397982001,
      "loss": 3.0115,
      "step": 39566
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9269075393676758,
      "learning_rate": 0.000557389038456509,
      "loss": 3.0399,
      "step": 39567
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.775641918182373,
      "learning_rate": 0.0005573869370669643,
      "loss": 3.3071,
      "step": 39568
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.696188449859619,
      "learning_rate": 0.0005573848356295665,
      "loss": 3.1711,
      "step": 39569
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.678993582725525,
      "learning_rate": 0.000557382734144316,
      "loss": 3.4288,
      "step": 39570
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.575192928314209,
      "learning_rate": 0.0005573806326112133,
      "loss": 3.109,
      "step": 39571
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.0683891773223877,
      "learning_rate": 0.0005573785310302586,
      "loss": 3.0534,
      "step": 39572
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.0033891201019287,
      "learning_rate": 0.0005573764294014524,
      "loss": 3.1519,
      "step": 39573
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6648461818695068,
      "learning_rate": 0.000557374327724795,
      "loss": 3.2169,
      "step": 39574
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6140650510787964,
      "learning_rate": 0.0005573722260002869,
      "loss": 3.1046,
      "step": 39575
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.9425647258758545,
      "learning_rate": 0.0005573701242279285,
      "loss": 2.9741,
      "step": 39576
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.22475266456604,
      "learning_rate": 0.00055736802240772,
      "loss": 3.2967,
      "step": 39577
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7143213748931885,
      "learning_rate": 0.0005573659205396621,
      "loss": 3.0711,
      "step": 39578
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3803151845932007,
      "learning_rate": 0.0005573638186237549,
      "loss": 3.1642,
      "step": 39579
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8133397102355957,
      "learning_rate": 0.000557361716659999,
      "loss": 3.0047,
      "step": 39580
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.164604663848877,
      "learning_rate": 0.0005573596146483946,
      "loss": 3.089,
      "step": 39581
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.245195150375366,
      "learning_rate": 0.0005573575125889422,
      "loss": 2.9816,
      "step": 39582
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7375342845916748,
      "learning_rate": 0.0005573554104816422,
      "loss": 3.1026,
      "step": 39583
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.3049354553222656,
      "learning_rate": 0.000557353308326495,
      "loss": 3.0185,
      "step": 39584
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9754084348678589,
      "learning_rate": 0.0005573512061235008,
      "loss": 2.9821,
      "step": 39585
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.733040690422058,
      "learning_rate": 0.0005573491038726604,
      "loss": 3.0597,
      "step": 39586
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3706316947937012,
      "learning_rate": 0.0005573470015739738,
      "loss": 3.2081,
      "step": 39587
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2646098136901855,
      "learning_rate": 0.0005573448992274416,
      "loss": 2.9559,
      "step": 39588
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7730398178100586,
      "learning_rate": 0.0005573427968330641,
      "loss": 2.9605,
      "step": 39589
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4264811277389526,
      "learning_rate": 0.0005573406943908417,
      "loss": 3.2193,
      "step": 39590
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.402724027633667,
      "learning_rate": 0.0005573385919007748,
      "loss": 2.9323,
      "step": 39591
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.196339726448059,
      "learning_rate": 0.0005573364893628638,
      "loss": 3.2437,
      "step": 39592
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9477417469024658,
      "learning_rate": 0.0005573343867771091,
      "loss": 3.0309,
      "step": 39593
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3873943090438843,
      "learning_rate": 0.000557332284143511,
      "loss": 3.0944,
      "step": 39594
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4822914600372314,
      "learning_rate": 0.0005573301814620702,
      "loss": 2.9532,
      "step": 39595
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4390592575073242,
      "learning_rate": 0.0005573280787327867,
      "loss": 3.1419,
      "step": 39596
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4061676263809204,
      "learning_rate": 0.000557325975955661,
      "loss": 2.8372,
      "step": 39597
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7836962938308716,
      "learning_rate": 0.0005573238731306936,
      "loss": 2.7853,
      "step": 39598
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6111797094345093,
      "learning_rate": 0.000557321770257885,
      "loss": 3.3025,
      "step": 39599
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.418563961982727,
      "learning_rate": 0.0005573196673372352,
      "loss": 3.0354,
      "step": 39600
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8073108196258545,
      "learning_rate": 0.0005573175643687449,
      "loss": 3.0683,
      "step": 39601
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6640267372131348,
      "learning_rate": 0.0005573154613524143,
      "loss": 3.1212,
      "step": 39602
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7300204038619995,
      "learning_rate": 0.000557313358288244,
      "loss": 2.8707,
      "step": 39603
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5275429487228394,
      "learning_rate": 0.0005573112551762343,
      "loss": 3.097,
      "step": 39604
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3343075513839722,
      "learning_rate": 0.0005573091520163857,
      "loss": 3.0491,
      "step": 39605
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6978060007095337,
      "learning_rate": 0.0005573070488086982,
      "loss": 3.0255,
      "step": 39606
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4521758556365967,
      "learning_rate": 0.0005573049455531727,
      "loss": 3.0062,
      "step": 39607
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.671081304550171,
      "learning_rate": 0.0005573028422498093,
      "loss": 3.0844,
      "step": 39608
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6429134607315063,
      "learning_rate": 0.0005573007388986083,
      "loss": 3.0176,
      "step": 39609
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.83150315284729,
      "learning_rate": 0.0005572986354995703,
      "loss": 3.1689,
      "step": 39610
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.4448132514953613,
      "learning_rate": 0.0005572965320526957,
      "loss": 3.192,
      "step": 39611
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.021299123764038,
      "learning_rate": 0.0005572944285579847,
      "loss": 2.908,
      "step": 39612
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.988997220993042,
      "learning_rate": 0.000557292325015438,
      "loss": 3.0551,
      "step": 39613
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3424386978149414,
      "learning_rate": 0.0005572902214250557,
      "loss": 3.3744,
      "step": 39614
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8811289072036743,
      "learning_rate": 0.0005572881177868382,
      "loss": 3.2269,
      "step": 39615
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6853810548782349,
      "learning_rate": 0.0005572860141007861,
      "loss": 2.7651,
      "step": 39616
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.49256432056427,
      "learning_rate": 0.0005572839103668996,
      "loss": 3.1428,
      "step": 39617
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3261704444885254,
      "learning_rate": 0.0005572818065851793,
      "loss": 2.9009,
      "step": 39618
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3227931261062622,
      "learning_rate": 0.0005572797027556253,
      "loss": 2.9474,
      "step": 39619
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2225353717803955,
      "learning_rate": 0.0005572775988782381,
      "loss": 3.232,
      "step": 39620
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2734993696212769,
      "learning_rate": 0.0005572754949530182,
      "loss": 3.1039,
      "step": 39621
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4792550802230835,
      "learning_rate": 0.0005572733909799661,
      "loss": 3.1899,
      "step": 39622
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2781354188919067,
      "learning_rate": 0.0005572712869590818,
      "loss": 3.1686,
      "step": 39623
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.46113121509552,
      "learning_rate": 0.0005572691828903661,
      "loss": 3.1085,
      "step": 39624
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4790791273117065,
      "learning_rate": 0.0005572670787738191,
      "loss": 3.111,
      "step": 39625
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4721310138702393,
      "learning_rate": 0.0005572649746094414,
      "loss": 2.8747,
      "step": 39626
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5723129510879517,
      "learning_rate": 0.0005572628703972331,
      "loss": 3.2626,
      "step": 39627
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4770478010177612,
      "learning_rate": 0.0005572607661371948,
      "loss": 3.2124,
      "step": 39628
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6423968076705933,
      "learning_rate": 0.000557258661829327,
      "loss": 3.0822,
      "step": 39629
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6306278705596924,
      "learning_rate": 0.0005572565574736299,
      "loss": 3.1875,
      "step": 39630
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9212391376495361,
      "learning_rate": 0.0005572544530701039,
      "loss": 2.8635,
      "step": 39631
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6673616170883179,
      "learning_rate": 0.0005572523486187494,
      "loss": 3.0339,
      "step": 39632
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6452610492706299,
      "learning_rate": 0.0005572502441195669,
      "loss": 3.2763,
      "step": 39633
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4248542785644531,
      "learning_rate": 0.0005572481395725567,
      "loss": 2.996,
      "step": 39634
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4834039211273193,
      "learning_rate": 0.0005572460349777192,
      "loss": 3.24,
      "step": 39635
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4310812950134277,
      "learning_rate": 0.000557243930335055,
      "loss": 3.0973,
      "step": 39636
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7712738513946533,
      "learning_rate": 0.0005572418256445641,
      "loss": 3.1463,
      "step": 39637
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3862488269805908,
      "learning_rate": 0.0005572397209062472,
      "loss": 3.2578,
      "step": 39638
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6016035079956055,
      "learning_rate": 0.0005572376161201045,
      "loss": 3.1006,
      "step": 39639
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4957350492477417,
      "learning_rate": 0.0005572355112861365,
      "loss": 3.1233,
      "step": 39640
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5322891473770142,
      "learning_rate": 0.0005572334064043434,
      "loss": 3.1706,
      "step": 39641
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4635348320007324,
      "learning_rate": 0.0005572313014747261,
      "loss": 3.0232,
      "step": 39642
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4235872030258179,
      "learning_rate": 0.0005572291964972844,
      "loss": 2.9673,
      "step": 39643
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8236628770828247,
      "learning_rate": 0.000557227091472019,
      "loss": 3.0136,
      "step": 39644
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8636250495910645,
      "learning_rate": 0.0005572249863989301,
      "loss": 2.9072,
      "step": 39645
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0297470092773438,
      "learning_rate": 0.0005572228812780185,
      "loss": 3.0995,
      "step": 39646
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8155304193496704,
      "learning_rate": 0.000557220776109284,
      "loss": 3.0603,
      "step": 39647
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4159191846847534,
      "learning_rate": 0.0005572186708927275,
      "loss": 3.2407,
      "step": 39648
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2713580131530762,
      "learning_rate": 0.0005572165656283492,
      "loss": 3.1708,
      "step": 39649
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4689762592315674,
      "learning_rate": 0.0005572144603161493,
      "loss": 3.2165,
      "step": 39650
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3845000267028809,
      "learning_rate": 0.0005572123549561286,
      "loss": 2.8721,
      "step": 39651
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4068052768707275,
      "learning_rate": 0.0005572102495482871,
      "loss": 3.1615,
      "step": 39652
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.253949761390686,
      "learning_rate": 0.0005572081440926255,
      "loss": 3.1853,
      "step": 39653
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.369211196899414,
      "learning_rate": 0.0005572060385891439,
      "loss": 2.6993,
      "step": 39654
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7900068759918213,
      "learning_rate": 0.000557203933037843,
      "loss": 3.2073,
      "step": 39655
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3171584606170654,
      "learning_rate": 0.0005572018274387229,
      "loss": 3.2672,
      "step": 39656
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9405351877212524,
      "learning_rate": 0.0005571997217917842,
      "loss": 3.1761,
      "step": 39657
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.738647937774658,
      "learning_rate": 0.0005571976160970273,
      "loss": 2.913,
      "step": 39658
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3538312911987305,
      "learning_rate": 0.0005571955103544523,
      "loss": 2.8009,
      "step": 39659
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0177395343780518,
      "learning_rate": 0.00055719340456406,
      "loss": 3.1604,
      "step": 39660
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.778223991394043,
      "learning_rate": 0.0005571912987258504,
      "loss": 3.1162,
      "step": 39661
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.277154803276062,
      "learning_rate": 0.0005571891928398242,
      "loss": 3.3174,
      "step": 39662
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5387089252471924,
      "learning_rate": 0.0005571870869059817,
      "loss": 3.0811,
      "step": 39663
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.4585514068603516,
      "learning_rate": 0.0005571849809243232,
      "loss": 3.3281,
      "step": 39664
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.372442603111267,
      "learning_rate": 0.0005571828748948492,
      "loss": 3.0348,
      "step": 39665
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4805940389633179,
      "learning_rate": 0.0005571807688175601,
      "loss": 3.0068,
      "step": 39666
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.589179515838623,
      "learning_rate": 0.0005571786626924561,
      "loss": 2.9256,
      "step": 39667
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9882971048355103,
      "learning_rate": 0.0005571765565195377,
      "loss": 3.2923,
      "step": 39668
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5860038995742798,
      "learning_rate": 0.0005571744502988055,
      "loss": 3.0008,
      "step": 39669
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8402713537216187,
      "learning_rate": 0.0005571723440302597,
      "loss": 3.1663,
      "step": 39670
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4767124652862549,
      "learning_rate": 0.0005571702377139006,
      "loss": 3.3705,
      "step": 39671
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4150837659835815,
      "learning_rate": 0.0005571681313497288,
      "loss": 2.9579,
      "step": 39672
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4770636558532715,
      "learning_rate": 0.0005571660249377445,
      "loss": 3.1869,
      "step": 39673
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.527761459350586,
      "learning_rate": 0.0005571639184779482,
      "loss": 2.8925,
      "step": 39674
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6180484294891357,
      "learning_rate": 0.0005571618119703403,
      "loss": 2.9619,
      "step": 39675
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.9863877296447754,
      "learning_rate": 0.0005571597054149211,
      "loss": 3.0238,
      "step": 39676
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7839336395263672,
      "learning_rate": 0.0005571575988116912,
      "loss": 3.0965,
      "step": 39677
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6345458030700684,
      "learning_rate": 0.0005571554921606507,
      "loss": 2.8011,
      "step": 39678
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9638762474060059,
      "learning_rate": 0.0005571533854618002,
      "loss": 3.0182,
      "step": 39679
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5022739171981812,
      "learning_rate": 0.00055715127871514,
      "loss": 2.7983,
      "step": 39680
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.18218994140625,
      "learning_rate": 0.0005571491719206706,
      "loss": 2.8782,
      "step": 39681
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7660995721817017,
      "learning_rate": 0.0005571470650783922,
      "loss": 2.9817,
      "step": 39682
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.025874614715576,
      "learning_rate": 0.0005571449581883054,
      "loss": 2.8795,
      "step": 39683
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8580467700958252,
      "learning_rate": 0.0005571428512504105,
      "loss": 3.0498,
      "step": 39684
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4895336627960205,
      "learning_rate": 0.0005571407442647078,
      "loss": 3.0566,
      "step": 39685
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6220730543136597,
      "learning_rate": 0.0005571386372311979,
      "loss": 3.1918,
      "step": 39686
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3135032653808594,
      "learning_rate": 0.0005571365301498811,
      "loss": 3.1151,
      "step": 39687
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4472647905349731,
      "learning_rate": 0.0005571344230207576,
      "loss": 3.1023,
      "step": 39688
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4807544946670532,
      "learning_rate": 0.0005571323158438281,
      "loss": 2.9571,
      "step": 39689
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.019395112991333,
      "learning_rate": 0.0005571302086190928,
      "loss": 2.7377,
      "step": 39690
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.692692518234253,
      "learning_rate": 0.0005571281013465521,
      "loss": 3.1344,
      "step": 39691
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5868918895721436,
      "learning_rate": 0.0005571259940262065,
      "loss": 2.8295,
      "step": 39692
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.7163047790527344,
      "learning_rate": 0.0005571238866580562,
      "loss": 2.9555,
      "step": 39693
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6040525436401367,
      "learning_rate": 0.0005571217792421018,
      "loss": 3.0691,
      "step": 39694
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7858425378799438,
      "learning_rate": 0.0005571196717783436,
      "loss": 2.9732,
      "step": 39695
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6341242790222168,
      "learning_rate": 0.000557117564266782,
      "loss": 3.3475,
      "step": 39696
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.602513074874878,
      "learning_rate": 0.0005571154567074175,
      "loss": 3.0657,
      "step": 39697
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7616541385650635,
      "learning_rate": 0.0005571133491002503,
      "loss": 3.1015,
      "step": 39698
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5550537109375,
      "learning_rate": 0.0005571112414452808,
      "loss": 3.0889,
      "step": 39699
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.040480136871338,
      "learning_rate": 0.0005571091337425095,
      "loss": 2.8454,
      "step": 39700
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.1282765865325928,
      "learning_rate": 0.0005571070259919368,
      "loss": 3.0497,
      "step": 39701
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.093679189682007,
      "learning_rate": 0.0005571049181935631,
      "loss": 2.9715,
      "step": 39702
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.172210454940796,
      "learning_rate": 0.0005571028103473887,
      "loss": 2.8495,
      "step": 39703
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8748174905776978,
      "learning_rate": 0.000557100702453414,
      "loss": 3.0575,
      "step": 39704
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0823206901550293,
      "learning_rate": 0.0005570985945116394,
      "loss": 3.148,
      "step": 39705
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.586277723312378,
      "learning_rate": 0.0005570964865220652,
      "loss": 3.2069,
      "step": 39706
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.238701820373535,
      "learning_rate": 0.0005570943784846922,
      "loss": 2.8902,
      "step": 39707
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5861027240753174,
      "learning_rate": 0.0005570922703995203,
      "loss": 3.0693,
      "step": 39708
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4015299081802368,
      "learning_rate": 0.0005570901622665502,
      "loss": 2.9576,
      "step": 39709
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6161521673202515,
      "learning_rate": 0.0005570880540857821,
      "loss": 3.0664,
      "step": 39710
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.252302646636963,
      "learning_rate": 0.0005570859458572166,
      "loss": 3.1912,
      "step": 39711
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.470011830329895,
      "learning_rate": 0.000557083837580854,
      "loss": 3.2249,
      "step": 39712
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5508191585540771,
      "learning_rate": 0.0005570817292566944,
      "loss": 3.3046,
      "step": 39713
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9153752326965332,
      "learning_rate": 0.0005570796208847386,
      "loss": 2.74,
      "step": 39714
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5095075368881226,
      "learning_rate": 0.0005570775124649869,
      "loss": 3.0837,
      "step": 39715
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4239088296890259,
      "learning_rate": 0.0005570754039974396,
      "loss": 2.9791,
      "step": 39716
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5869840383529663,
      "learning_rate": 0.0005570732954820971,
      "loss": 3.1742,
      "step": 39717
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4073172807693481,
      "learning_rate": 0.0005570711869189598,
      "loss": 3.1974,
      "step": 39718
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3738607168197632,
      "learning_rate": 0.0005570690783080282,
      "loss": 3.157,
      "step": 39719
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5104058980941772,
      "learning_rate": 0.0005570669696493025,
      "loss": 3.0694,
      "step": 39720
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.416048288345337,
      "learning_rate": 0.0005570648609427833,
      "loss": 3.1772,
      "step": 39721
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5084800720214844,
      "learning_rate": 0.0005570627521884709,
      "loss": 3.2374,
      "step": 39722
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9243253469467163,
      "learning_rate": 0.0005570606433863656,
      "loss": 3.1709,
      "step": 39723
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5084766149520874,
      "learning_rate": 0.000557058534536468,
      "loss": 3.0601,
      "step": 39724
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.485767126083374,
      "learning_rate": 0.0005570564256387782,
      "loss": 3.0179,
      "step": 39725
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5742534399032593,
      "learning_rate": 0.0005570543166932969,
      "loss": 3.1186,
      "step": 39726
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5988130569458008,
      "learning_rate": 0.0005570522077000242,
      "loss": 3.0747,
      "step": 39727
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6684366464614868,
      "learning_rate": 0.0005570500986589608,
      "loss": 2.9669,
      "step": 39728
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4565871953964233,
      "learning_rate": 0.0005570479895701068,
      "loss": 3.2117,
      "step": 39729
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.631617784500122,
      "learning_rate": 0.0005570458804334628,
      "loss": 3.258,
      "step": 39730
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5427560806274414,
      "learning_rate": 0.0005570437712490291,
      "loss": 2.9129,
      "step": 39731
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4559276103973389,
      "learning_rate": 0.0005570416620168062,
      "loss": 2.9871,
      "step": 39732
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.667454481124878,
      "learning_rate": 0.0005570395527367942,
      "loss": 3.2652,
      "step": 39733
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4869612455368042,
      "learning_rate": 0.0005570374434089939,
      "loss": 2.9858,
      "step": 39734
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4419984817504883,
      "learning_rate": 0.0005570353340334054,
      "loss": 3.1391,
      "step": 39735
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.350163221359253,
      "learning_rate": 0.0005570332246100291,
      "loss": 3.2065,
      "step": 39736
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3031076192855835,
      "learning_rate": 0.0005570311151388656,
      "loss": 2.8767,
      "step": 39737
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7382322549819946,
      "learning_rate": 0.0005570290056199151,
      "loss": 3.0651,
      "step": 39738
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.966091275215149,
      "learning_rate": 0.0005570268960531781,
      "loss": 3.0897,
      "step": 39739
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7855061292648315,
      "learning_rate": 0.0005570247864386549,
      "loss": 2.9935,
      "step": 39740
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0330748558044434,
      "learning_rate": 0.000557022676776346,
      "loss": 2.9318,
      "step": 39741
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.3960254192352295,
      "learning_rate": 0.0005570205670662517,
      "loss": 2.8166,
      "step": 39742
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.610342264175415,
      "learning_rate": 0.0005570184573083724,
      "loss": 3.066,
      "step": 39743
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.848469614982605,
      "learning_rate": 0.0005570163475027085,
      "loss": 3.0847,
      "step": 39744
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5298649072647095,
      "learning_rate": 0.0005570142376492605,
      "loss": 2.9375,
      "step": 39745
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6378740072250366,
      "learning_rate": 0.0005570121277480287,
      "loss": 2.8237,
      "step": 39746
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.021850824356079,
      "learning_rate": 0.0005570100177990134,
      "loss": 3.1971,
      "step": 39747
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6602782011032104,
      "learning_rate": 0.0005570079078022151,
      "loss": 2.9083,
      "step": 39748
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5127276182174683,
      "learning_rate": 0.0005570057977576342,
      "loss": 3.0777,
      "step": 39749
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9745643138885498,
      "learning_rate": 0.000557003687665271,
      "loss": 3.2749,
      "step": 39750
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3840341567993164,
      "learning_rate": 0.0005570015775251261,
      "loss": 3.0391,
      "step": 39751
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5962263345718384,
      "learning_rate": 0.0005569994673371997,
      "loss": 3.2071,
      "step": 39752
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7615691423416138,
      "learning_rate": 0.0005569973571014922,
      "loss": 3.2797,
      "step": 39753
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8911899328231812,
      "learning_rate": 0.0005569952468180042,
      "loss": 3.1322,
      "step": 39754
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.909666895866394,
      "learning_rate": 0.0005569931364867357,
      "loss": 3.2204,
      "step": 39755
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.317265510559082,
      "learning_rate": 0.0005569910261076875,
      "loss": 3.2127,
      "step": 39756
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6116411685943604,
      "learning_rate": 0.0005569889156808597,
      "loss": 2.8934,
      "step": 39757
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4185161590576172,
      "learning_rate": 0.0005569868052062529,
      "loss": 3.0977,
      "step": 39758
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.7054049968719482,
      "learning_rate": 0.0005569846946838675,
      "loss": 3.0112,
      "step": 39759
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7108948230743408,
      "learning_rate": 0.0005569825841137036,
      "loss": 2.9757,
      "step": 39760
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6501842737197876,
      "learning_rate": 0.0005569804734957618,
      "loss": 3.072,
      "step": 39761
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.612902045249939,
      "learning_rate": 0.0005569783628300426,
      "loss": 3.1215,
      "step": 39762
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9971543550491333,
      "learning_rate": 0.0005569762521165462,
      "loss": 3.0202,
      "step": 39763
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4491300582885742,
      "learning_rate": 0.000556974141355273,
      "loss": 2.8967,
      "step": 39764
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3160994052886963,
      "learning_rate": 0.0005569720305462235,
      "loss": 3.0041,
      "step": 39765
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5183396339416504,
      "learning_rate": 0.000556969919689398,
      "loss": 3.1615,
      "step": 39766
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3123382329940796,
      "learning_rate": 0.0005569678087847971,
      "loss": 3.3662,
      "step": 39767
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5161614418029785,
      "learning_rate": 0.000556965697832421,
      "loss": 2.9901,
      "step": 39768
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5282602310180664,
      "learning_rate": 0.00055696358683227,
      "loss": 3.1076,
      "step": 39769
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2616256475448608,
      "learning_rate": 0.0005569614757843448,
      "loss": 2.9194,
      "step": 39770
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3804621696472168,
      "learning_rate": 0.0005569593646886455,
      "loss": 2.9461,
      "step": 39771
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4691832065582275,
      "learning_rate": 0.0005569572535451726,
      "loss": 3.0217,
      "step": 39772
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5672082901000977,
      "learning_rate": 0.0005569551423539264,
      "loss": 3.2176,
      "step": 39773
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5044344663619995,
      "learning_rate": 0.0005569530311149075,
      "loss": 2.8377,
      "step": 39774
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.774005651473999,
      "learning_rate": 0.0005569509198281162,
      "loss": 2.9235,
      "step": 39775
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.554414987564087,
      "learning_rate": 0.0005569488084935528,
      "loss": 3.0558,
      "step": 39776
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.3598480224609375,
      "learning_rate": 0.0005569466971112179,
      "loss": 3.0474,
      "step": 39777
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.7564682960510254,
      "learning_rate": 0.0005569445856811117,
      "loss": 2.9967,
      "step": 39778
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.4303977489471436,
      "learning_rate": 0.0005569424742032346,
      "loss": 3.1315,
      "step": 39779
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.260606050491333,
      "learning_rate": 0.0005569403626775871,
      "loss": 3.1705,
      "step": 39780
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9701038599014282,
      "learning_rate": 0.0005569382511041695,
      "loss": 3.1889,
      "step": 39781
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.789982795715332,
      "learning_rate": 0.0005569361394829821,
      "loss": 3.2032,
      "step": 39782
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2610723972320557,
      "learning_rate": 0.0005569340278140256,
      "loss": 3.0327,
      "step": 39783
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5924558639526367,
      "learning_rate": 0.0005569319160973001,
      "loss": 3.1358,
      "step": 39784
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.586538314819336,
      "learning_rate": 0.0005569298043328063,
      "loss": 3.1088,
      "step": 39785
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.804195761680603,
      "learning_rate": 0.0005569276925205442,
      "loss": 2.9214,
      "step": 39786
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2824811935424805,
      "learning_rate": 0.0005569255806605145,
      "loss": 2.9251,
      "step": 39787
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9540096521377563,
      "learning_rate": 0.0005569234687527174,
      "loss": 3.1836,
      "step": 39788
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.701406955718994,
      "learning_rate": 0.0005569213567971535,
      "loss": 2.885,
      "step": 39789
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.277134418487549,
      "learning_rate": 0.0005569192447938229,
      "loss": 2.9231,
      "step": 39790
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.44371497631073,
      "learning_rate": 0.0005569171327427263,
      "loss": 3.3101,
      "step": 39791
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7199915647506714,
      "learning_rate": 0.000556915020643864,
      "loss": 3.1576,
      "step": 39792
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.958937644958496,
      "learning_rate": 0.0005569129084972362,
      "loss": 3.0538,
      "step": 39793
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.4055898189544678,
      "learning_rate": 0.0005569107963028435,
      "loss": 3.0549,
      "step": 39794
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6572962999343872,
      "learning_rate": 0.0005569086840606862,
      "loss": 3.0669,
      "step": 39795
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.685924530029297,
      "learning_rate": 0.0005569065717707648,
      "loss": 3.2099,
      "step": 39796
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9523082971572876,
      "learning_rate": 0.0005569044594330795,
      "loss": 3.172,
      "step": 39797
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.680160641670227,
      "learning_rate": 0.0005569023470476309,
      "loss": 3.1459,
      "step": 39798
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.771782398223877,
      "learning_rate": 0.0005569002346144193,
      "loss": 3.0261,
      "step": 39799
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8570513725280762,
      "learning_rate": 0.0005568981221334451,
      "loss": 3.0627,
      "step": 39800
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7939952611923218,
      "learning_rate": 0.0005568960096047087,
      "loss": 3.1615,
      "step": 39801
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6417670249938965,
      "learning_rate": 0.0005568938970282105,
      "loss": 3.0963,
      "step": 39802
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8279751539230347,
      "learning_rate": 0.0005568917844039509,
      "loss": 2.9842,
      "step": 39803
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0337648391723633,
      "learning_rate": 0.0005568896717319301,
      "loss": 3.2695,
      "step": 39804
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.472312569618225,
      "learning_rate": 0.0005568875590121488,
      "loss": 2.8977,
      "step": 39805
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4601753950119019,
      "learning_rate": 0.0005568854462446071,
      "loss": 3.1053,
      "step": 39806
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.044358491897583,
      "learning_rate": 0.0005568833334293059,
      "loss": 3.0836,
      "step": 39807
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6951243877410889,
      "learning_rate": 0.0005568812205662449,
      "loss": 3.0981,
      "step": 39808
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4696441888809204,
      "learning_rate": 0.000556879107655425,
      "loss": 3.0667,
      "step": 39809
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4380409717559814,
      "learning_rate": 0.0005568769946968464,
      "loss": 3.2633,
      "step": 39810
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1876485347747803,
      "learning_rate": 0.0005568748816905094,
      "loss": 3.1542,
      "step": 39811
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3218894004821777,
      "learning_rate": 0.0005568727686364146,
      "loss": 3.1886,
      "step": 39812
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6473150253295898,
      "learning_rate": 0.0005568706555345623,
      "loss": 3.0344,
      "step": 39813
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.431113600730896,
      "learning_rate": 0.0005568685423849529,
      "loss": 2.915,
      "step": 39814
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.355738639831543,
      "learning_rate": 0.0005568664291875868,
      "loss": 3.0308,
      "step": 39815
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9400975704193115,
      "learning_rate": 0.0005568643159424644,
      "loss": 2.9875,
      "step": 39816
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3879404067993164,
      "learning_rate": 0.000556862202649586,
      "loss": 3.1973,
      "step": 39817
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6291660070419312,
      "learning_rate": 0.0005568600893089521,
      "loss": 2.9807,
      "step": 39818
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.227771043777466,
      "learning_rate": 0.000556857975920563,
      "loss": 3.4045,
      "step": 39819
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.742591381072998,
      "learning_rate": 0.0005568558624844194,
      "loss": 2.8185,
      "step": 39820
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7940154075622559,
      "learning_rate": 0.0005568537490005212,
      "loss": 3.302,
      "step": 39821
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4993653297424316,
      "learning_rate": 0.0005568516354688691,
      "loss": 3.1539,
      "step": 39822
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.299094319343567,
      "learning_rate": 0.0005568495218894635,
      "loss": 3.2547,
      "step": 39823
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.597772240638733,
      "learning_rate": 0.0005568474082623046,
      "loss": 3.0025,
      "step": 39824
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9751530885696411,
      "learning_rate": 0.0005568452945873929,
      "loss": 3.1704,
      "step": 39825
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9754188060760498,
      "learning_rate": 0.000556843180864729,
      "loss": 3.0193,
      "step": 39826
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7676137685775757,
      "learning_rate": 0.0005568410670943129,
      "loss": 3.0914,
      "step": 39827
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.8681185245513916,
      "learning_rate": 0.0005568389532761453,
      "loss": 3.1803,
      "step": 39828
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5736948251724243,
      "learning_rate": 0.0005568368394102266,
      "loss": 3.1046,
      "step": 39829
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4638155698776245,
      "learning_rate": 0.000556834725496557,
      "loss": 2.8126,
      "step": 39830
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6538617610931396,
      "learning_rate": 0.0005568326115351369,
      "loss": 3.0518,
      "step": 39831
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.571128487586975,
      "learning_rate": 0.0005568304975259668,
      "loss": 3.3316,
      "step": 39832
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4690779447555542,
      "learning_rate": 0.0005568283834690472,
      "loss": 3.3021,
      "step": 39833
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6690826416015625,
      "learning_rate": 0.0005568262693643782,
      "loss": 3.2921,
      "step": 39834
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4977846145629883,
      "learning_rate": 0.0005568241552119604,
      "loss": 3.0835,
      "step": 39835
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.0717434883117676,
      "learning_rate": 0.000556822041011794,
      "loss": 3.0291,
      "step": 39836
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6406670808792114,
      "learning_rate": 0.0005568199267638798,
      "loss": 2.9798,
      "step": 39837
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9032231569290161,
      "learning_rate": 0.0005568178124682176,
      "loss": 3.1356,
      "step": 39838
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8502088785171509,
      "learning_rate": 0.0005568156981248084,
      "loss": 2.8431,
      "step": 39839
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6074587106704712,
      "learning_rate": 0.0005568135837336522,
      "loss": 3.1157,
      "step": 39840
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6257069110870361,
      "learning_rate": 0.0005568114692947496,
      "loss": 3.3646,
      "step": 39841
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.647776484489441,
      "learning_rate": 0.0005568093548081008,
      "loss": 2.9941,
      "step": 39842
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5114262104034424,
      "learning_rate": 0.0005568072402737064,
      "loss": 3.0528,
      "step": 39843
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2703208923339844,
      "learning_rate": 0.0005568051256915666,
      "loss": 3.0287,
      "step": 39844
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.858738660812378,
      "learning_rate": 0.0005568030110616819,
      "loss": 2.906,
      "step": 39845
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.5185790061950684,
      "learning_rate": 0.0005568008963840526,
      "loss": 3.227,
      "step": 39846
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8517833948135376,
      "learning_rate": 0.0005567987816586792,
      "loss": 3.0934,
      "step": 39847
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3953144550323486,
      "learning_rate": 0.0005567966668855621,
      "loss": 3.1333,
      "step": 39848
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4617338180541992,
      "learning_rate": 0.0005567945520647017,
      "loss": 3.0271,
      "step": 39849
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6975681781768799,
      "learning_rate": 0.0005567924371960983,
      "loss": 3.1579,
      "step": 39850
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5337004661560059,
      "learning_rate": 0.0005567903222797523,
      "loss": 2.9135,
      "step": 39851
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9261571168899536,
      "learning_rate": 0.0005567882073156641,
      "loss": 2.8206,
      "step": 39852
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.317915916442871,
      "learning_rate": 0.0005567860923038342,
      "loss": 3.1496,
      "step": 39853
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.135443925857544,
      "learning_rate": 0.0005567839772442629,
      "loss": 3.2301,
      "step": 39854
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3387736082077026,
      "learning_rate": 0.0005567818621369506,
      "loss": 2.9184,
      "step": 39855
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7799783945083618,
      "learning_rate": 0.0005567797469818978,
      "loss": 3.2857,
      "step": 39856
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.785112738609314,
      "learning_rate": 0.0005567776317791047,
      "loss": 3.2217,
      "step": 39857
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5247539281845093,
      "learning_rate": 0.0005567755165285718,
      "loss": 2.8736,
      "step": 39858
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6468822956085205,
      "learning_rate": 0.0005567734012302994,
      "loss": 3.1436,
      "step": 39859
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8002854585647583,
      "learning_rate": 0.0005567712858842881,
      "loss": 3.2163,
      "step": 39860
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.5059947967529297,
      "learning_rate": 0.0005567691704905382,
      "loss": 3.0772,
      "step": 39861
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8200709819793701,
      "learning_rate": 0.00055676705504905,
      "loss": 3.1798,
      "step": 39862
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5747904777526855,
      "learning_rate": 0.0005567649395598239,
      "loss": 3.1355,
      "step": 39863
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5583724975585938,
      "learning_rate": 0.0005567628240228604,
      "loss": 2.9262,
      "step": 39864
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.38573157787323,
      "learning_rate": 0.0005567607084381599,
      "loss": 3.256,
      "step": 39865
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3036285638809204,
      "learning_rate": 0.0005567585928057227,
      "loss": 3.2438,
      "step": 39866
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7166194915771484,
      "learning_rate": 0.0005567564771255492,
      "loss": 3.0112,
      "step": 39867
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6453782320022583,
      "learning_rate": 0.0005567543613976398,
      "loss": 2.9857,
      "step": 39868
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4347037076950073,
      "learning_rate": 0.0005567522456219951,
      "loss": 3.2607,
      "step": 39869
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6378499269485474,
      "learning_rate": 0.0005567501297986151,
      "loss": 3.2374,
      "step": 39870
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6665493249893188,
      "learning_rate": 0.0005567480139275005,
      "loss": 2.9843,
      "step": 39871
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.010746717453003,
      "learning_rate": 0.0005567458980086515,
      "loss": 3.1077,
      "step": 39872
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.279439926147461,
      "learning_rate": 0.0005567437820420687,
      "loss": 3.0536,
      "step": 39873
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9895215034484863,
      "learning_rate": 0.0005567416660277523,
      "loss": 3.1327,
      "step": 39874
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2042932510375977,
      "learning_rate": 0.0005567395499657029,
      "loss": 3.0024,
      "step": 39875
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9624828100204468,
      "learning_rate": 0.0005567374338559207,
      "loss": 2.9348,
      "step": 39876
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4916096925735474,
      "learning_rate": 0.0005567353176984063,
      "loss": 3.0539,
      "step": 39877
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.01456618309021,
      "learning_rate": 0.0005567332014931597,
      "loss": 3.087,
      "step": 39878
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.013991594314575,
      "learning_rate": 0.0005567310852401818,
      "loss": 3.0115,
      "step": 39879
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.51893150806427,
      "learning_rate": 0.0005567289689394725,
      "loss": 2.8875,
      "step": 39880
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3236087560653687,
      "learning_rate": 0.0005567268525910326,
      "loss": 3.1247,
      "step": 39881
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7368849515914917,
      "learning_rate": 0.0005567247361948623,
      "loss": 3.0032,
      "step": 39882
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.1347110271453857,
      "learning_rate": 0.000556722619750962,
      "loss": 3.2414,
      "step": 39883
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7439197301864624,
      "learning_rate": 0.0005567205032593321,
      "loss": 3.1516,
      "step": 39884
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4764450788497925,
      "learning_rate": 0.0005567183867199731,
      "loss": 2.8406,
      "step": 39885
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.174043893814087,
      "learning_rate": 0.0005567162701328852,
      "loss": 2.7394,
      "step": 39886
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3478721380233765,
      "learning_rate": 0.000556714153498069,
      "loss": 2.9258,
      "step": 39887
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.553128957748413,
      "learning_rate": 0.0005567120368155248,
      "loss": 3.0102,
      "step": 39888
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3842381238937378,
      "learning_rate": 0.000556709920085253,
      "loss": 2.9324,
      "step": 39889
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3999269008636475,
      "learning_rate": 0.0005567078033072539,
      "loss": 3.1395,
      "step": 39890
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.40919828414917,
      "learning_rate": 0.0005567056864815279,
      "loss": 3.1926,
      "step": 39891
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4606376886367798,
      "learning_rate": 0.0005567035696080756,
      "loss": 3.1314,
      "step": 39892
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.359130620956421,
      "learning_rate": 0.0005567014526868971,
      "loss": 2.928,
      "step": 39893
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3468836545944214,
      "learning_rate": 0.0005566993357179931,
      "loss": 3.0531,
      "step": 39894
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3688586950302124,
      "learning_rate": 0.0005566972187013639,
      "loss": 2.9829,
      "step": 39895
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.539139747619629,
      "learning_rate": 0.0005566951016370097,
      "loss": 3.0633,
      "step": 39896
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8091726303100586,
      "learning_rate": 0.0005566929845249311,
      "loss": 3.1479,
      "step": 39897
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.130964994430542,
      "learning_rate": 0.0005566908673651285,
      "loss": 3.0761,
      "step": 39898
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7923119068145752,
      "learning_rate": 0.0005566887501576021,
      "loss": 3.0419,
      "step": 39899
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7818641662597656,
      "learning_rate": 0.0005566866329023525,
      "loss": 3.1933,
      "step": 39900
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.579458475112915,
      "learning_rate": 0.0005566845155993799,
      "loss": 3.1232,
      "step": 39901
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.6573386192321777,
      "learning_rate": 0.0005566823982486849,
      "loss": 3.3057,
      "step": 39902
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.6492743492126465,
      "learning_rate": 0.0005566802808502678,
      "loss": 3.2295,
      "step": 39903
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7674695253372192,
      "learning_rate": 0.0005566781634041289,
      "loss": 2.9866,
      "step": 39904
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.4350924491882324,
      "learning_rate": 0.0005566760459102688,
      "loss": 3.194,
      "step": 39905
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.135568618774414,
      "learning_rate": 0.0005566739283686876,
      "loss": 3.2411,
      "step": 39906
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5003042221069336,
      "learning_rate": 0.0005566718107793861,
      "loss": 3.1834,
      "step": 39907
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7587147951126099,
      "learning_rate": 0.0005566696931423644,
      "loss": 3.063,
      "step": 39908
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7156426906585693,
      "learning_rate": 0.0005566675754576229,
      "loss": 3.0967,
      "step": 39909
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5965684652328491,
      "learning_rate": 0.0005566654577251622,
      "loss": 3.0021,
      "step": 39910
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4987725019454956,
      "learning_rate": 0.0005566633399449823,
      "loss": 2.8701,
      "step": 39911
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.523571252822876,
      "learning_rate": 0.000556661222117084,
      "loss": 3.0874,
      "step": 39912
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6215708255767822,
      "learning_rate": 0.0005566591042414675,
      "loss": 2.8366,
      "step": 39913
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4175877571105957,
      "learning_rate": 0.0005566569863181332,
      "loss": 3.1791,
      "step": 39914
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8140872716903687,
      "learning_rate": 0.0005566548683470815,
      "loss": 2.9601,
      "step": 39915
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5069422721862793,
      "learning_rate": 0.0005566527503283129,
      "loss": 2.9836,
      "step": 39916
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0130996704101562,
      "learning_rate": 0.0005566506322618276,
      "loss": 3.1281,
      "step": 39917
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5127532482147217,
      "learning_rate": 0.0005566485141476261,
      "loss": 3.1802,
      "step": 39918
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4377135038375854,
      "learning_rate": 0.000556646395985709,
      "loss": 2.8379,
      "step": 39919
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.494423747062683,
      "learning_rate": 0.0005566442777760764,
      "loss": 3.0886,
      "step": 39920
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6532599925994873,
      "learning_rate": 0.0005566421595187287,
      "loss": 3.013,
      "step": 39921
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4143165349960327,
      "learning_rate": 0.0005566400412136664,
      "loss": 3.1712,
      "step": 39922
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5145100355148315,
      "learning_rate": 0.0005566379228608899,
      "loss": 3.0863,
      "step": 39923
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6467370986938477,
      "learning_rate": 0.0005566358044603995,
      "loss": 3.1414,
      "step": 39924
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4258571863174438,
      "learning_rate": 0.0005566336860121958,
      "loss": 3.0787,
      "step": 39925
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.818444848060608,
      "learning_rate": 0.000556631567516279,
      "loss": 3.1495,
      "step": 39926
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.33095383644104,
      "learning_rate": 0.0005566294489726494,
      "loss": 3.0736,
      "step": 39927
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6925745010375977,
      "learning_rate": 0.0005566273303813078,
      "loss": 2.8743,
      "step": 39928
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4967716932296753,
      "learning_rate": 0.0005566252117422541,
      "loss": 3.0212,
      "step": 39929
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8041791915893555,
      "learning_rate": 0.0005566230930554891,
      "loss": 2.8585,
      "step": 39930
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.786360263824463,
      "learning_rate": 0.0005566209743210128,
      "loss": 3.0514,
      "step": 39931
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.688968539237976,
      "learning_rate": 0.0005566188555388261,
      "loss": 2.9811,
      "step": 39932
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6353387832641602,
      "learning_rate": 0.000556616736708929,
      "loss": 3.2554,
      "step": 39933
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8002650737762451,
      "learning_rate": 0.0005566146178313219,
      "loss": 3.0985,
      "step": 39934
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4421755075454712,
      "learning_rate": 0.0005566124989060054,
      "loss": 2.8967,
      "step": 39935
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5293140411376953,
      "learning_rate": 0.0005566103799329797,
      "loss": 3.1958,
      "step": 39936
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3732130527496338,
      "learning_rate": 0.0005566082609122454,
      "loss": 3.0325,
      "step": 39937
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.619280457496643,
      "learning_rate": 0.0005566061418438028,
      "loss": 3.0573,
      "step": 39938
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4017517566680908,
      "learning_rate": 0.0005566040227276522,
      "loss": 3.0216,
      "step": 39939
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.606400966644287,
      "learning_rate": 0.0005566019035637941,
      "loss": 3.103,
      "step": 39940
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.913356900215149,
      "learning_rate": 0.0005565997843522289,
      "loss": 3.1136,
      "step": 39941
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5495506525039673,
      "learning_rate": 0.0005565976650929568,
      "loss": 3.1562,
      "step": 39942
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2666263580322266,
      "learning_rate": 0.0005565955457859785,
      "loss": 3.1167,
      "step": 39943
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.4685113430023193,
      "learning_rate": 0.0005565934264312941,
      "loss": 3.1022,
      "step": 39944
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9533940553665161,
      "learning_rate": 0.0005565913070289043,
      "loss": 2.9384,
      "step": 39945
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9205982685089111,
      "learning_rate": 0.0005565891875788093,
      "loss": 3.1192,
      "step": 39946
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.0574166774749756,
      "learning_rate": 0.0005565870680810095,
      "loss": 3.019,
      "step": 39947
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.737263798713684,
      "learning_rate": 0.0005565849485355053,
      "loss": 3.1945,
      "step": 39948
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.653344988822937,
      "learning_rate": 0.0005565828289422971,
      "loss": 3.0435,
      "step": 39949
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.490247130393982,
      "learning_rate": 0.0005565807093013853,
      "loss": 3.3733,
      "step": 39950
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.8775274753570557,
      "learning_rate": 0.0005565785896127703,
      "loss": 3.0542,
      "step": 39951
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.6392695903778076,
      "learning_rate": 0.0005565764698764526,
      "loss": 3.1262,
      "step": 39952
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.4558417797088623,
      "learning_rate": 0.0005565743500924324,
      "loss": 3.223,
      "step": 39953
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8495564460754395,
      "learning_rate": 0.0005565722302607102,
      "loss": 2.766,
      "step": 39954
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2826712131500244,
      "learning_rate": 0.0005565701103812864,
      "loss": 3.1707,
      "step": 39955
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.09639048576355,
      "learning_rate": 0.0005565679904541613,
      "loss": 3.209,
      "step": 39956
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.83688223361969,
      "learning_rate": 0.0005565658704793355,
      "loss": 3.1719,
      "step": 39957
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2363851070404053,
      "learning_rate": 0.0005565637504568092,
      "loss": 3.0968,
      "step": 39958
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.9979920387268066,
      "learning_rate": 0.0005565616303865829,
      "loss": 3.0848,
      "step": 39959
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0456066131591797,
      "learning_rate": 0.0005565595102686568,
      "loss": 2.998,
      "step": 39960
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8393309116363525,
      "learning_rate": 0.0005565573901030316,
      "loss": 3.2108,
      "step": 39961
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.815186858177185,
      "learning_rate": 0.0005565552698897075,
      "loss": 2.952,
      "step": 39962
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.1954479217529297,
      "learning_rate": 0.000556553149628685,
      "loss": 3.0875,
      "step": 39963
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2724735736846924,
      "learning_rate": 0.0005565510293199642,
      "loss": 3.1514,
      "step": 39964
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9349843263626099,
      "learning_rate": 0.0005565489089635458,
      "loss": 3.0701,
      "step": 39965
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.405060052871704,
      "learning_rate": 0.0005565467885594303,
      "loss": 2.9171,
      "step": 39966
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8943777084350586,
      "learning_rate": 0.0005565446681076177,
      "loss": 3.0916,
      "step": 39967
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8564292192459106,
      "learning_rate": 0.0005565425476081087,
      "loss": 3.1887,
      "step": 39968
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6901763677597046,
      "learning_rate": 0.0005565404270609037,
      "loss": 3.1648,
      "step": 39969
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.020634412765503,
      "learning_rate": 0.0005565383064660027,
      "loss": 3.0428,
      "step": 39970
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7560431957244873,
      "learning_rate": 0.0005565361858234067,
      "loss": 3.3542,
      "step": 39971
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5918338298797607,
      "learning_rate": 0.0005565340651331156,
      "loss": 3.1041,
      "step": 39972
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6649442911148071,
      "learning_rate": 0.00055653194439513,
      "loss": 3.1241,
      "step": 39973
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.639695882797241,
      "learning_rate": 0.0005565298236094503,
      "loss": 3.0899,
      "step": 39974
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.7067017555236816,
      "learning_rate": 0.0005565277027760768,
      "loss": 2.9614,
      "step": 39975
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5325803756713867,
      "learning_rate": 0.0005565255818950101,
      "loss": 2.9637,
      "step": 39976
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2034194469451904,
      "learning_rate": 0.0005565234609662504,
      "loss": 3.0398,
      "step": 39977
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.182384967803955,
      "learning_rate": 0.0005565213399897981,
      "loss": 3.2862,
      "step": 39978
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5547716617584229,
      "learning_rate": 0.0005565192189656536,
      "loss": 3.1074,
      "step": 39979
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2327841520309448,
      "learning_rate": 0.0005565170978938174,
      "loss": 2.8931,
      "step": 39980
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.389792561531067,
      "learning_rate": 0.0005565149767742897,
      "loss": 3.0356,
      "step": 39981
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.3029208183288574,
      "learning_rate": 0.0005565128556070712,
      "loss": 3.0802,
      "step": 39982
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0005440711975098,
      "learning_rate": 0.0005565107343921621,
      "loss": 2.9958,
      "step": 39983
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.295323371887207,
      "learning_rate": 0.0005565086131295627,
      "loss": 3.3314,
      "step": 39984
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.6826064586639404,
      "learning_rate": 0.0005565064918192736,
      "loss": 3.0313,
      "step": 39985
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.2791860103607178,
      "learning_rate": 0.0005565043704612951,
      "loss": 3.2738,
      "step": 39986
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6202151775360107,
      "learning_rate": 0.0005565022490556275,
      "loss": 2.8503,
      "step": 39987
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.452043056488037,
      "learning_rate": 0.0005565001276022714,
      "loss": 3.1174,
      "step": 39988
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.281296491622925,
      "learning_rate": 0.000556498006101227,
      "loss": 3.0298,
      "step": 39989
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5895683765411377,
      "learning_rate": 0.0005564958845524949,
      "loss": 2.9918,
      "step": 39990
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3335902690887451,
      "learning_rate": 0.0005564937629560753,
      "loss": 3.1222,
      "step": 39991
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.320539116859436,
      "learning_rate": 0.0005564916413119687,
      "loss": 3.1083,
      "step": 39992
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7737785577774048,
      "learning_rate": 0.0005564895196201754,
      "loss": 3.0036,
      "step": 39993
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.01837158203125,
      "learning_rate": 0.0005564873978806959,
      "loss": 3.0084,
      "step": 39994
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.354803442955017,
      "learning_rate": 0.0005564852760935305,
      "loss": 2.8913,
      "step": 39995
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.025120258331299,
      "learning_rate": 0.0005564831542586797,
      "loss": 2.9714,
      "step": 39996
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.1541407108306885,
      "learning_rate": 0.0005564810323761438,
      "loss": 3.0686,
      "step": 39997
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2916933298110962,
      "learning_rate": 0.0005564789104459233,
      "loss": 3.084,
      "step": 39998
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.199401378631592,
      "learning_rate": 0.0005564767884680185,
      "loss": 2.7996,
      "step": 39999
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.668556809425354,
      "learning_rate": 0.0005564746664424298,
      "loss": 3.211,
      "step": 40000
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7410110235214233,
      "learning_rate": 0.0005564725443691576,
      "loss": 2.7883,
      "step": 40001
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6151323318481445,
      "learning_rate": 0.0005564704222482023,
      "loss": 3.0921,
      "step": 40002
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9617704153060913,
      "learning_rate": 0.0005564683000795645,
      "loss": 3.0337,
      "step": 40003
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.177509069442749,
      "learning_rate": 0.0005564661778632441,
      "loss": 2.9954,
      "step": 40004
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4784448146820068,
      "learning_rate": 0.000556464055599242,
      "loss": 3.0,
      "step": 40005
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9595471620559692,
      "learning_rate": 0.0005564619332875584,
      "loss": 3.1329,
      "step": 40006
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.7025868892669678,
      "learning_rate": 0.0005564598109281936,
      "loss": 3.0112,
      "step": 40007
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.640366792678833,
      "learning_rate": 0.000556457688521148,
      "loss": 3.1446,
      "step": 40008
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.263923168182373,
      "learning_rate": 0.0005564555660664222,
      "loss": 3.0156,
      "step": 40009
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4792020320892334,
      "learning_rate": 0.0005564534435640165,
      "loss": 3.0794,
      "step": 40010
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.989806890487671,
      "learning_rate": 0.0005564513210139312,
      "loss": 3.3118,
      "step": 40011
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5269488096237183,
      "learning_rate": 0.0005564491984161667,
      "loss": 2.7179,
      "step": 40012
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6971807479858398,
      "learning_rate": 0.0005564470757707235,
      "loss": 3.0472,
      "step": 40013
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5894856452941895,
      "learning_rate": 0.000556444953077602,
      "loss": 2.8849,
      "step": 40014
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5754872560501099,
      "learning_rate": 0.0005564428303368027,
      "loss": 3.0323,
      "step": 40015
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4062786102294922,
      "learning_rate": 0.0005564407075483254,
      "loss": 2.864,
      "step": 40016
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.479791283607483,
      "learning_rate": 0.0005564385847121713,
      "loss": 3.1243,
      "step": 40017
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.470335602760315,
      "learning_rate": 0.0005564364618283403,
      "loss": 3.1551,
      "step": 40018
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.3408119678497314,
      "learning_rate": 0.0005564343388968329,
      "loss": 2.9333,
      "step": 40019
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.599427580833435,
      "learning_rate": 0.0005564322159176496,
      "loss": 2.9219,
      "step": 40020
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7841461896896362,
      "learning_rate": 0.0005564300928907907,
      "loss": 3.1281,
      "step": 40021
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6789878606796265,
      "learning_rate": 0.0005564279698162565,
      "loss": 3.4024,
      "step": 40022
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.165367841720581,
      "learning_rate": 0.0005564258466940475,
      "loss": 2.9456,
      "step": 40023
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6264513731002808,
      "learning_rate": 0.0005564237235241642,
      "loss": 3.0784,
      "step": 40024
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5103602409362793,
      "learning_rate": 0.0005564216003066068,
      "loss": 3.0808,
      "step": 40025
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.53311288356781,
      "learning_rate": 0.0005564194770413759,
      "loss": 2.9808,
      "step": 40026
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.415103554725647,
      "learning_rate": 0.0005564173537284717,
      "loss": 2.789,
      "step": 40027
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5018742084503174,
      "learning_rate": 0.0005564152303678947,
      "loss": 3.1883,
      "step": 40028
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.1319236755371094,
      "learning_rate": 0.0005564131069596452,
      "loss": 3.0009,
      "step": 40029
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0223987102508545,
      "learning_rate": 0.0005564109835037238,
      "loss": 2.9611,
      "step": 40030
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.89995539188385,
      "learning_rate": 0.0005564088600001306,
      "loss": 2.8756,
      "step": 40031
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9421895742416382,
      "learning_rate": 0.0005564067364488663,
      "loss": 3.2009,
      "step": 40032
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.759914755821228,
      "learning_rate": 0.0005564046128499311,
      "loss": 2.9729,
      "step": 40033
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2586512565612793,
      "learning_rate": 0.0005564024892033253,
      "loss": 3.1143,
      "step": 40034
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5227714776992798,
      "learning_rate": 0.0005564003655090495,
      "loss": 3.081,
      "step": 40035
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.280102491378784,
      "learning_rate": 0.0005563982417671041,
      "loss": 2.9085,
      "step": 40036
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.405826210975647,
      "learning_rate": 0.0005563961179774894,
      "loss": 3.1089,
      "step": 40037
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8967756032943726,
      "learning_rate": 0.0005563939941402059,
      "loss": 2.9653,
      "step": 40038
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.448528528213501,
      "learning_rate": 0.0005563918702552539,
      "loss": 3.1343,
      "step": 40039
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6261520385742188,
      "learning_rate": 0.0005563897463226337,
      "loss": 3.1061,
      "step": 40040
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6103863716125488,
      "learning_rate": 0.0005563876223423459,
      "loss": 3.0606,
      "step": 40041
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4741456508636475,
      "learning_rate": 0.0005563854983143908,
      "loss": 2.8777,
      "step": 40042
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.657817006111145,
      "learning_rate": 0.0005563833742387688,
      "loss": 3.0592,
      "step": 40043
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5994871854782104,
      "learning_rate": 0.0005563812501154802,
      "loss": 3.0655,
      "step": 40044
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.870211601257324,
      "learning_rate": 0.0005563791259445256,
      "loss": 3.0849,
      "step": 40045
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5396374464035034,
      "learning_rate": 0.000556377001725905,
      "loss": 3.2045,
      "step": 40046
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4879469871520996,
      "learning_rate": 0.0005563748774596194,
      "loss": 3.1903,
      "step": 40047
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.5564680099487305,
      "learning_rate": 0.0005563727531456688,
      "loss": 3.0003,
      "step": 40048
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.8520569801330566,
      "learning_rate": 0.0005563706287840536,
      "loss": 3.0957,
      "step": 40049
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.249639630317688,
      "learning_rate": 0.0005563685043747744,
      "loss": 3.0645,
      "step": 40050
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.153148889541626,
      "learning_rate": 0.0005563663799178313,
      "loss": 2.9736,
      "step": 40051
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.450360059738159,
      "learning_rate": 0.0005563642554132249,
      "loss": 2.8336,
      "step": 40052
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1050766706466675,
      "learning_rate": 0.0005563621308609556,
      "loss": 3.0025,
      "step": 40053
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0239551067352295,
      "learning_rate": 0.0005563600062610237,
      "loss": 3.1425,
      "step": 40054
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.829355239868164,
      "learning_rate": 0.0005563578816134296,
      "loss": 2.9829,
      "step": 40055
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6575579643249512,
      "learning_rate": 0.0005563557569181737,
      "loss": 2.9414,
      "step": 40056
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.1649210453033447,
      "learning_rate": 0.0005563536321752565,
      "loss": 3.2533,
      "step": 40057
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7902511358261108,
      "learning_rate": 0.0005563515073846783,
      "loss": 2.6908,
      "step": 40058
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4886765480041504,
      "learning_rate": 0.0005563493825464394,
      "loss": 3.0469,
      "step": 40059
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.209996223449707,
      "learning_rate": 0.0005563472576605405,
      "loss": 3.1848,
      "step": 40060
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6922472715377808,
      "learning_rate": 0.0005563451327269818,
      "loss": 2.9045,
      "step": 40061
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6714588403701782,
      "learning_rate": 0.0005563430077457636,
      "loss": 2.9711,
      "step": 40062
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6444456577301025,
      "learning_rate": 0.0005563408827168865,
      "loss": 2.8984,
      "step": 40063
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7959916591644287,
      "learning_rate": 0.0005563387576403507,
      "loss": 3.1018,
      "step": 40064
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2256139516830444,
      "learning_rate": 0.0005563366325161567,
      "loss": 3.2421,
      "step": 40065
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2886146306991577,
      "learning_rate": 0.0005563345073443048,
      "loss": 2.9466,
      "step": 40066
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9923944473266602,
      "learning_rate": 0.0005563323821247956,
      "loss": 2.9038,
      "step": 40067
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.5775039196014404,
      "learning_rate": 0.0005563302568576293,
      "loss": 2.8356,
      "step": 40068
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3916279077529907,
      "learning_rate": 0.0005563281315428064,
      "loss": 3.2299,
      "step": 40069
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.365654230117798,
      "learning_rate": 0.0005563260061803273,
      "loss": 3.0776,
      "step": 40070
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.944399118423462,
      "learning_rate": 0.0005563238807701924,
      "loss": 2.8775,
      "step": 40071
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5039961338043213,
      "learning_rate": 0.000556321755312402,
      "loss": 2.9482,
      "step": 40072
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0558254718780518,
      "learning_rate": 0.0005563196298069564,
      "loss": 2.821,
      "step": 40073
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.7439346313476562,
      "learning_rate": 0.0005563175042538563,
      "loss": 2.9859,
      "step": 40074
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3687189817428589,
      "learning_rate": 0.0005563153786531019,
      "loss": 3.0038,
      "step": 40075
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4652631282806396,
      "learning_rate": 0.0005563132530046936,
      "loss": 2.9196,
      "step": 40076
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.210339307785034,
      "learning_rate": 0.0005563111273086319,
      "loss": 3.2125,
      "step": 40077
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6124413013458252,
      "learning_rate": 0.0005563090015649171,
      "loss": 2.9487,
      "step": 40078
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.636454463005066,
      "learning_rate": 0.0005563068757735495,
      "loss": 3.1669,
      "step": 40079
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2379369735717773,
      "learning_rate": 0.0005563047499345298,
      "loss": 3.1143,
      "step": 40080
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2756147384643555,
      "learning_rate": 0.0005563026240478581,
      "loss": 3.0182,
      "step": 40081
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6340751647949219,
      "learning_rate": 0.0005563004981135349,
      "loss": 3.2018,
      "step": 40082
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8729268312454224,
      "learning_rate": 0.0005562983721315607,
      "loss": 3.3653,
      "step": 40083
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.135936975479126,
      "learning_rate": 0.0005562962461019357,
      "loss": 3.1733,
      "step": 40084
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.441597580909729,
      "learning_rate": 0.0005562941200246604,
      "loss": 3.0779,
      "step": 40085
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4982750415802002,
      "learning_rate": 0.0005562919938997352,
      "loss": 2.9154,
      "step": 40086
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.1830406188964844,
      "learning_rate": 0.0005562898677271606,
      "loss": 2.9997,
      "step": 40087
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0430243015289307,
      "learning_rate": 0.0005562877415069366,
      "loss": 3.0797,
      "step": 40088
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.395294427871704,
      "learning_rate": 0.0005562856152390641,
      "loss": 3.0862,
      "step": 40089
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5835860967636108,
      "learning_rate": 0.0005562834889235432,
      "loss": 3.0203,
      "step": 40090
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6037436723709106,
      "learning_rate": 0.0005562813625603744,
      "loss": 2.6832,
      "step": 40091
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.394715666770935,
      "learning_rate": 0.0005562792361495579,
      "loss": 3.1143,
      "step": 40092
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4269657135009766,
      "learning_rate": 0.0005562771096910944,
      "loss": 3.0418,
      "step": 40093
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2558268308639526,
      "learning_rate": 0.0005562749831849841,
      "loss": 2.9377,
      "step": 40094
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4590340852737427,
      "learning_rate": 0.0005562728566312274,
      "loss": 3.2695,
      "step": 40095
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.101534128189087,
      "learning_rate": 0.0005562707300298249,
      "loss": 3.2023,
      "step": 40096
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6186952590942383,
      "learning_rate": 0.0005562686033807767,
      "loss": 3.1859,
      "step": 40097
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3583272695541382,
      "learning_rate": 0.0005562664766840834,
      "loss": 2.9708,
      "step": 40098
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.506904125213623,
      "learning_rate": 0.0005562643499397453,
      "loss": 2.7423,
      "step": 40099
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0311217308044434,
      "learning_rate": 0.0005562622231477628,
      "loss": 2.9597,
      "step": 40100
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5319017171859741,
      "learning_rate": 0.0005562600963081362,
      "loss": 3.0954,
      "step": 40101
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3563350439071655,
      "learning_rate": 0.0005562579694208662,
      "loss": 2.7917,
      "step": 40102
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4435076713562012,
      "learning_rate": 0.0005562558424859529,
      "loss": 3.1223,
      "step": 40103
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4453712701797485,
      "learning_rate": 0.0005562537155033968,
      "loss": 3.2465,
      "step": 40104
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3064931631088257,
      "learning_rate": 0.0005562515884731984,
      "loss": 2.9472,
      "step": 40105
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2755837440490723,
      "learning_rate": 0.000556249461395358,
      "loss": 2.9338,
      "step": 40106
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6706677675247192,
      "learning_rate": 0.0005562473342698758,
      "loss": 2.7987,
      "step": 40107
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5343111753463745,
      "learning_rate": 0.0005562452070967525,
      "loss": 2.913,
      "step": 40108
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.630178451538086,
      "learning_rate": 0.0005562430798759883,
      "loss": 2.9958,
      "step": 40109
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.445732831954956,
      "learning_rate": 0.0005562409526075839,
      "loss": 3.1167,
      "step": 40110
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7309229373931885,
      "learning_rate": 0.0005562388252915392,
      "loss": 2.8091,
      "step": 40111
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.635663390159607,
      "learning_rate": 0.000556236697927855,
      "loss": 3.0948,
      "step": 40112
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.424745798110962,
      "learning_rate": 0.0005562345705165315,
      "loss": 3.112,
      "step": 40113
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.576806902885437,
      "learning_rate": 0.0005562324430575693,
      "loss": 3.0414,
      "step": 40114
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.56715726852417,
      "learning_rate": 0.0005562303155509686,
      "loss": 3.1999,
      "step": 40115
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8749150037765503,
      "learning_rate": 0.0005562281879967297,
      "loss": 3.2106,
      "step": 40116
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.489241361618042,
      "learning_rate": 0.0005562260603948532,
      "loss": 2.937,
      "step": 40117
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4133949279785156,
      "learning_rate": 0.0005562239327453396,
      "loss": 2.9947,
      "step": 40118
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.901347041130066,
      "learning_rate": 0.000556221805048189,
      "loss": 3.0562,
      "step": 40119
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6149648427963257,
      "learning_rate": 0.000556219677303402,
      "loss": 3.1883,
      "step": 40120
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.091773271560669,
      "learning_rate": 0.0005562175495109788,
      "loss": 3.2831,
      "step": 40121
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0100860595703125,
      "learning_rate": 0.00055621542167092,
      "loss": 3.0148,
      "step": 40122
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6896820068359375,
      "learning_rate": 0.0005562132937832259,
      "loss": 3.5212,
      "step": 40123
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4197884798049927,
      "learning_rate": 0.000556211165847897,
      "loss": 2.892,
      "step": 40124
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.500342845916748,
      "learning_rate": 0.0005562090378649335,
      "loss": 3.2459,
      "step": 40125
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8607004880905151,
      "learning_rate": 0.0005562069098343359,
      "loss": 3.0561,
      "step": 40126
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.70205557346344,
      "learning_rate": 0.0005562047817561047,
      "loss": 3.1638,
      "step": 40127
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8434399366378784,
      "learning_rate": 0.0005562026536302401,
      "loss": 3.1506,
      "step": 40128
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6624860763549805,
      "learning_rate": 0.0005562005254567426,
      "loss": 3.0874,
      "step": 40129
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4887155294418335,
      "learning_rate": 0.0005561983972356126,
      "loss": 3.0824,
      "step": 40130
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2991026639938354,
      "learning_rate": 0.0005561962689668504,
      "loss": 3.1154,
      "step": 40131
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5198615789413452,
      "learning_rate": 0.0005561941406504566,
      "loss": 3.2258,
      "step": 40132
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6333247423171997,
      "learning_rate": 0.0005561920122864315,
      "loss": 3.0877,
      "step": 40133
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9348506927490234,
      "learning_rate": 0.0005561898838747755,
      "loss": 3.0949,
      "step": 40134
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.049490213394165,
      "learning_rate": 0.0005561877554154888,
      "loss": 2.9595,
      "step": 40135
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3554141521453857,
      "learning_rate": 0.000556185626908572,
      "loss": 3.1553,
      "step": 40136
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7330094575881958,
      "learning_rate": 0.0005561834983540255,
      "loss": 2.991,
      "step": 40137
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2198071479797363,
      "learning_rate": 0.0005561813697518497,
      "loss": 3.2324,
      "step": 40138
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.660378336906433,
      "learning_rate": 0.0005561792411020449,
      "loss": 3.3026,
      "step": 40139
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8783341646194458,
      "learning_rate": 0.0005561771124046115,
      "loss": 3.1683,
      "step": 40140
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2152950763702393,
      "learning_rate": 0.00055617498365955,
      "loss": 3.069,
      "step": 40141
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3265379667282104,
      "learning_rate": 0.0005561728548668607,
      "loss": 3.1281,
      "step": 40142
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4072864055633545,
      "learning_rate": 0.000556170726026544,
      "loss": 3.0086,
      "step": 40143
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.1931827068328857,
      "learning_rate": 0.0005561685971386003,
      "loss": 3.2156,
      "step": 40144
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5816981792449951,
      "learning_rate": 0.0005561664682030301,
      "loss": 2.8838,
      "step": 40145
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0687918663024902,
      "learning_rate": 0.0005561643392198338,
      "loss": 3.3534,
      "step": 40146
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3326756954193115,
      "learning_rate": 0.0005561622101890115,
      "loss": 3.2781,
      "step": 40147
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4589487314224243,
      "learning_rate": 0.000556160081110564,
      "loss": 3.1662,
      "step": 40148
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4948289394378662,
      "learning_rate": 0.0005561579519844914,
      "loss": 2.7219,
      "step": 40149
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9307153224945068,
      "learning_rate": 0.0005561558228107942,
      "loss": 3.0394,
      "step": 40150
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.1535900831222534,
      "learning_rate": 0.0005561536935894728,
      "loss": 3.2961,
      "step": 40151
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3033888339996338,
      "learning_rate": 0.0005561515643205277,
      "loss": 3.0037,
      "step": 40152
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9949208498001099,
      "learning_rate": 0.0005561494350039592,
      "loss": 3.106,
      "step": 40153
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3334146738052368,
      "learning_rate": 0.0005561473056397675,
      "loss": 2.9911,
      "step": 40154
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6056833267211914,
      "learning_rate": 0.0005561451762279533,
      "loss": 2.8499,
      "step": 40155
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.254760265350342,
      "learning_rate": 0.0005561430467685168,
      "loss": 2.9949,
      "step": 40156
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4159590005874634,
      "learning_rate": 0.0005561409172614586,
      "loss": 3.1116,
      "step": 40157
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5623143911361694,
      "learning_rate": 0.0005561387877067789,
      "loss": 3.0121,
      "step": 40158
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8877683877944946,
      "learning_rate": 0.0005561366581044782,
      "loss": 3.0871,
      "step": 40159
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.550884246826172,
      "learning_rate": 0.0005561345284545567,
      "loss": 3.1233,
      "step": 40160
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3422263860702515,
      "learning_rate": 0.0005561323987570151,
      "loss": 3.1699,
      "step": 40161
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.6235907077789307,
      "learning_rate": 0.0005561302690118536,
      "loss": 3.1899,
      "step": 40162
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.8144679069519043,
      "learning_rate": 0.0005561281392190727,
      "loss": 3.0352,
      "step": 40163
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8557968139648438,
      "learning_rate": 0.0005561260093786727,
      "loss": 3.115,
      "step": 40164
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2708921432495117,
      "learning_rate": 0.0005561238794906541,
      "loss": 2.9965,
      "step": 40165
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.539339542388916,
      "learning_rate": 0.0005561217495550173,
      "loss": 2.87,
      "step": 40166
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4621583223342896,
      "learning_rate": 0.0005561196195717624,
      "loss": 3.0274,
      "step": 40167
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.203302025794983,
      "learning_rate": 0.0005561174895408902,
      "loss": 3.0466,
      "step": 40168
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.174544095993042,
      "learning_rate": 0.0005561153594624009,
      "loss": 3.0553,
      "step": 40169
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6927571296691895,
      "learning_rate": 0.0005561132293362949,
      "loss": 3.2026,
      "step": 40170
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4758707284927368,
      "learning_rate": 0.0005561110991625727,
      "loss": 2.9707,
      "step": 40171
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7809066772460938,
      "learning_rate": 0.0005561089689412345,
      "loss": 3.0406,
      "step": 40172
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.358158588409424,
      "learning_rate": 0.0005561068386722808,
      "loss": 3.0724,
      "step": 40173
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.088219404220581,
      "learning_rate": 0.000556104708355712,
      "loss": 2.9629,
      "step": 40174
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6997612714767456,
      "learning_rate": 0.0005561025779915286,
      "loss": 2.987,
      "step": 40175
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.497646450996399,
      "learning_rate": 0.0005561004475797308,
      "loss": 2.9753,
      "step": 40176
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3944205045700073,
      "learning_rate": 0.0005560983171203191,
      "loss": 3.0516,
      "step": 40177
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5587953329086304,
      "learning_rate": 0.000556096186613294,
      "loss": 3.3069,
      "step": 40178
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.5881969928741455,
      "learning_rate": 0.0005560940560586557,
      "loss": 2.9692,
      "step": 40179
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6379238367080688,
      "learning_rate": 0.0005560919254564046,
      "loss": 3.0818,
      "step": 40180
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5274159908294678,
      "learning_rate": 0.0005560897948065413,
      "loss": 2.8632,
      "step": 40181
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5645232200622559,
      "learning_rate": 0.0005560876641090661,
      "loss": 3.1445,
      "step": 40182
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4507585763931274,
      "learning_rate": 0.0005560855333639793,
      "loss": 3.1076,
      "step": 40183
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6246072053909302,
      "learning_rate": 0.0005560834025712813,
      "loss": 3.0981,
      "step": 40184
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.727587342262268,
      "learning_rate": 0.0005560812717309726,
      "loss": 3.1694,
      "step": 40185
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.725598931312561,
      "learning_rate": 0.0005560791408430536,
      "loss": 2.9203,
      "step": 40186
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6520136594772339,
      "learning_rate": 0.0005560770099075247,
      "loss": 2.9864,
      "step": 40187
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6892327070236206,
      "learning_rate": 0.0005560748789243861,
      "loss": 2.9139,
      "step": 40188
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.117584466934204,
      "learning_rate": 0.0005560727478936385,
      "loss": 2.985,
      "step": 40189
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.4569411277770996,
      "learning_rate": 0.0005560706168152821,
      "loss": 2.9283,
      "step": 40190
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4846864938735962,
      "learning_rate": 0.0005560684856893174,
      "loss": 3.268,
      "step": 40191
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.560482144355774,
      "learning_rate": 0.0005560663545157446,
      "loss": 3.1904,
      "step": 40192
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3265818357467651,
      "learning_rate": 0.0005560642232945643,
      "loss": 3.0677,
      "step": 40193
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.221733570098877,
      "learning_rate": 0.0005560620920257767,
      "loss": 3.1831,
      "step": 40194
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.1954338550567627,
      "learning_rate": 0.0005560599607093825,
      "loss": 2.8828,
      "step": 40195
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2772446870803833,
      "learning_rate": 0.0005560578293453819,
      "loss": 3.2178,
      "step": 40196
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.568227767944336,
      "learning_rate": 0.0005560556979337753,
      "loss": 3.0736,
      "step": 40197
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.4373867511749268,
      "learning_rate": 0.000556053566474563,
      "loss": 3.1076,
      "step": 40198
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.818217396736145,
      "learning_rate": 0.0005560514349677456,
      "loss": 2.9147,
      "step": 40199
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.437058448791504,
      "learning_rate": 0.0005560493034133234,
      "loss": 3.1405,
      "step": 40200
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.6282999515533447,
      "learning_rate": 0.0005560471718112968,
      "loss": 3.0014,
      "step": 40201
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.5570435523986816,
      "learning_rate": 0.0005560450401616662,
      "loss": 3.1024,
      "step": 40202
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2412796020507812,
      "learning_rate": 0.000556042908464432,
      "loss": 3.1859,
      "step": 40203
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.5449438095092773,
      "learning_rate": 0.0005560407767195946,
      "loss": 3.0451,
      "step": 40204
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.4348888397216797,
      "learning_rate": 0.0005560386449271543,
      "loss": 3.0953,
      "step": 40205
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.233832836151123,
      "learning_rate": 0.0005560365130871117,
      "loss": 3.1032,
      "step": 40206
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.413809895515442,
      "learning_rate": 0.000556034381199467,
      "loss": 3.2784,
      "step": 40207
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4971003532409668,
      "learning_rate": 0.0005560322492642208,
      "loss": 2.9507,
      "step": 40208
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.851862668991089,
      "learning_rate": 0.0005560301172813732,
      "loss": 3.1549,
      "step": 40209
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.3131422996520996,
      "learning_rate": 0.0005560279852509249,
      "loss": 2.8967,
      "step": 40210
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3991087675094604,
      "learning_rate": 0.0005560258531728761,
      "loss": 2.9563,
      "step": 40211
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.6234118938446045,
      "learning_rate": 0.0005560237210472272,
      "loss": 3.0451,
      "step": 40212
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.1354293823242188,
      "learning_rate": 0.0005560215888739787,
      "loss": 2.8878,
      "step": 40213
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.5572900772094727,
      "learning_rate": 0.0005560194566531312,
      "loss": 2.9123,
      "step": 40214
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2973073720932007,
      "learning_rate": 0.0005560173243846846,
      "loss": 3.128,
      "step": 40215
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.7533304691314697,
      "learning_rate": 0.0005560151920686395,
      "loss": 2.7892,
      "step": 40216
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.5590648651123047,
      "learning_rate": 0.0005560130597049964,
      "loss": 3.2052,
      "step": 40217
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.8308753967285156,
      "learning_rate": 0.0005560109272937557,
      "loss": 3.1904,
      "step": 40218
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4471708536148071,
      "learning_rate": 0.0005560087948349177,
      "loss": 3.1267,
      "step": 40219
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.8521957397460938,
      "learning_rate": 0.0005560066623284828,
      "loss": 3.3138,
      "step": 40220
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.979870557785034,
      "learning_rate": 0.0005560045297744514,
      "loss": 2.8971,
      "step": 40221
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.7750253677368164,
      "learning_rate": 0.0005560023971728241,
      "loss": 2.9682,
      "step": 40222
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.34743070602417,
      "learning_rate": 0.000556000264523601,
      "loss": 3.2941,
      "step": 40223
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.456519842147827,
      "learning_rate": 0.0005559981318267826,
      "loss": 3.0176,
      "step": 40224
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.223792791366577,
      "learning_rate": 0.0005559959990823693,
      "loss": 3.023,
      "step": 40225
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8325916528701782,
      "learning_rate": 0.0005559938662903616,
      "loss": 2.8731,
      "step": 40226
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0815749168395996,
      "learning_rate": 0.0005559917334507598,
      "loss": 2.7984,
      "step": 40227
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.1992220878601074,
      "learning_rate": 0.0005559896005635643,
      "loss": 3.0167,
      "step": 40228
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5443440675735474,
      "learning_rate": 0.0005559874676287753,
      "loss": 3.0699,
      "step": 40229
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3903391361236572,
      "learning_rate": 0.0005559853346463937,
      "loss": 3.2602,
      "step": 40230
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4029085636138916,
      "learning_rate": 0.0005559832016164194,
      "loss": 3.0923,
      "step": 40231
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7006860971450806,
      "learning_rate": 0.0005559810685388531,
      "loss": 3.0408,
      "step": 40232
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.176845073699951,
      "learning_rate": 0.000555978935413695,
      "loss": 2.9736,
      "step": 40233
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.58209228515625,
      "learning_rate": 0.0005559768022409457,
      "loss": 2.999,
      "step": 40234
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2207491397857666,
      "learning_rate": 0.0005559746690206053,
      "loss": 3.0494,
      "step": 40235
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.5957469940185547,
      "learning_rate": 0.0005559725357526745,
      "loss": 2.7627,
      "step": 40236
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.562557578086853,
      "learning_rate": 0.0005559704024371535,
      "loss": 3.1205,
      "step": 40237
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4953930377960205,
      "learning_rate": 0.0005559682690740429,
      "loss": 3.1815,
      "step": 40238
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9718384742736816,
      "learning_rate": 0.0005559661356633428,
      "loss": 2.9139,
      "step": 40239
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2920600175857544,
      "learning_rate": 0.0005559640022050539,
      "loss": 3.125,
      "step": 40240
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3854361772537231,
      "learning_rate": 0.0005559618686991765,
      "loss": 2.9492,
      "step": 40241
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6826868057250977,
      "learning_rate": 0.0005559597351457108,
      "loss": 3.0977,
      "step": 40242
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3836780786514282,
      "learning_rate": 0.0005559576015446574,
      "loss": 3.0967,
      "step": 40243
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4759535789489746,
      "learning_rate": 0.0005559554678960168,
      "loss": 3.004,
      "step": 40244
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7204997539520264,
      "learning_rate": 0.000555953334199789,
      "loss": 3.1546,
      "step": 40245
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8837283849716187,
      "learning_rate": 0.0005559512004559748,
      "loss": 2.8635,
      "step": 40246
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7937657833099365,
      "learning_rate": 0.0005559490666645743,
      "loss": 3.1658,
      "step": 40247
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.5515003204345703,
      "learning_rate": 0.0005559469328255882,
      "loss": 2.8666,
      "step": 40248
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.7150468826293945,
      "learning_rate": 0.0005559447989390167,
      "loss": 3.109,
      "step": 40249
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7501128911972046,
      "learning_rate": 0.0005559426650048602,
      "loss": 3.1482,
      "step": 40250
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.846573829650879,
      "learning_rate": 0.0005559405310231191,
      "loss": 3.0097,
      "step": 40251
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.3335862159729004,
      "learning_rate": 0.0005559383969937939,
      "loss": 3.0357,
      "step": 40252
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.2087578773498535,
      "learning_rate": 0.0005559362629168849,
      "loss": 3.0604,
      "step": 40253
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.333990454673767,
      "learning_rate": 0.0005559341287923924,
      "loss": 3.0638,
      "step": 40254
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.7422802448272705,
      "learning_rate": 0.0005559319946203171,
      "loss": 2.8351,
      "step": 40255
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.425919771194458,
      "learning_rate": 0.0005559298604006591,
      "loss": 3.1985,
      "step": 40256
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8564190864562988,
      "learning_rate": 0.0005559277261334189,
      "loss": 2.9347,
      "step": 40257
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4814610481262207,
      "learning_rate": 0.0005559255918185969,
      "loss": 2.9582,
      "step": 40258
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.311077833175659,
      "learning_rate": 0.0005559234574561936,
      "loss": 3.0453,
      "step": 40259
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.984586000442505,
      "learning_rate": 0.0005559213230462093,
      "loss": 3.0428,
      "step": 40260
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3920204639434814,
      "learning_rate": 0.0005559191885886443,
      "loss": 3.0967,
      "step": 40261
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.003217935562134,
      "learning_rate": 0.000555917054083499,
      "loss": 3.0324,
      "step": 40262
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.1187021732330322,
      "learning_rate": 0.0005559149195307741,
      "loss": 2.9916,
      "step": 40263
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.4508283138275146,
      "learning_rate": 0.0005559127849304697,
      "loss": 3.0668,
      "step": 40264
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4593560695648193,
      "learning_rate": 0.0005559106502825863,
      "loss": 2.9177,
      "step": 40265
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9185729026794434,
      "learning_rate": 0.0005559085155871242,
      "loss": 3.0306,
      "step": 40266
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.910142660140991,
      "learning_rate": 0.0005559063808440839,
      "loss": 3.0284,
      "step": 40267
    },
    {
      "epoch": 0.52,
      "grad_norm": 3.157438039779663,
      "learning_rate": 0.0005559042460534659,
      "loss": 3.2401,
      "step": 40268
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5811225175857544,
      "learning_rate": 0.0005559021112152703,
      "loss": 3.2176,
      "step": 40269
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8544731140136719,
      "learning_rate": 0.0005558999763294978,
      "loss": 2.9809,
      "step": 40270
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.6058313846588135,
      "learning_rate": 0.0005558978413961486,
      "loss": 2.9031,
      "step": 40271
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3950679302215576,
      "learning_rate": 0.0005558957064152231,
      "loss": 3.1807,
      "step": 40272
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4977571964263916,
      "learning_rate": 0.0005558935713867219,
      "loss": 3.0179,
      "step": 40273
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.5311949253082275,
      "learning_rate": 0.0005558914363106451,
      "loss": 2.9027,
      "step": 40274
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2596431970596313,
      "learning_rate": 0.0005558893011869934,
      "loss": 2.9459,
      "step": 40275
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0654032230377197,
      "learning_rate": 0.000555887166015767,
      "loss": 2.8478,
      "step": 40276
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7585604190826416,
      "learning_rate": 0.0005558850307969663,
      "loss": 3.1264,
      "step": 40277
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3229776620864868,
      "learning_rate": 0.0005558828955305918,
      "loss": 3.2006,
      "step": 40278
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.1635735034942627,
      "learning_rate": 0.0005558807602166438,
      "loss": 2.9271,
      "step": 40279
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5729972124099731,
      "learning_rate": 0.0005558786248551227,
      "loss": 3.3338,
      "step": 40280
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3240270614624023,
      "learning_rate": 0.000555876489446029,
      "loss": 2.8999,
      "step": 40281
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8763184547424316,
      "learning_rate": 0.000555874353989363,
      "loss": 3.0384,
      "step": 40282
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.488034725189209,
      "learning_rate": 0.0005558722184851252,
      "loss": 3.1188,
      "step": 40283
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5605548620224,
      "learning_rate": 0.0005558700829333158,
      "loss": 2.9509,
      "step": 40284
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3197280168533325,
      "learning_rate": 0.0005558679473339354,
      "loss": 2.8895,
      "step": 40285
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9066745042800903,
      "learning_rate": 0.0005558658116869843,
      "loss": 2.9653,
      "step": 40286
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6882734298706055,
      "learning_rate": 0.0005558636759924629,
      "loss": 3.2525,
      "step": 40287
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.963346004486084,
      "learning_rate": 0.0005558615402503716,
      "loss": 2.7854,
      "step": 40288
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.0336155891418457,
      "learning_rate": 0.0005558594044607109,
      "loss": 3.2477,
      "step": 40289
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.6232303380966187,
      "learning_rate": 0.0005558572686234811,
      "loss": 3.0865,
      "step": 40290
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.1378347873687744,
      "learning_rate": 0.0005558551327386825,
      "loss": 3.1088,
      "step": 40291
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4777979850769043,
      "learning_rate": 0.0005558529968063157,
      "loss": 3.0337,
      "step": 40292
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.439207911491394,
      "learning_rate": 0.000555850860826381,
      "loss": 3.1695,
      "step": 40293
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2582110166549683,
      "learning_rate": 0.0005558487247988786,
      "loss": 2.9481,
      "step": 40294
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7744816541671753,
      "learning_rate": 0.0005558465887238093,
      "loss": 2.9287,
      "step": 40295
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5377782583236694,
      "learning_rate": 0.0005558444526011732,
      "loss": 3.0852,
      "step": 40296
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2077388763427734,
      "learning_rate": 0.0005558423164309708,
      "loss": 3.1421,
      "step": 40297
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9334758520126343,
      "learning_rate": 0.0005558401802132025,
      "loss": 3.0402,
      "step": 40298
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9602665901184082,
      "learning_rate": 0.0005558380439478687,
      "loss": 3.0228,
      "step": 40299
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.386648416519165,
      "learning_rate": 0.0005558359076349698,
      "loss": 3.1157,
      "step": 40300
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.17608904838562,
      "learning_rate": 0.0005558337712745061,
      "loss": 2.8679,
      "step": 40301
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.111778497695923,
      "learning_rate": 0.0005558316348664782,
      "loss": 3.0895,
      "step": 40302
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.3122780323028564,
      "learning_rate": 0.0005558294984108862,
      "loss": 3.3273,
      "step": 40303
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.235252618789673,
      "learning_rate": 0.0005558273619077308,
      "loss": 3.0026,
      "step": 40304
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4466633796691895,
      "learning_rate": 0.0005558252253570122,
      "loss": 3.1606,
      "step": 40305
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7820953130722046,
      "learning_rate": 0.0005558230887587308,
      "loss": 3.1744,
      "step": 40306
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8811187744140625,
      "learning_rate": 0.0005558209521128872,
      "loss": 3.0229,
      "step": 40307
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.4503196477890015,
      "learning_rate": 0.0005558188154194815,
      "loss": 2.9283,
      "step": 40308
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.177504062652588,
      "learning_rate": 0.0005558166786785144,
      "loss": 3.3109,
      "step": 40309
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.7701653242111206,
      "learning_rate": 0.0005558145418899861,
      "loss": 2.9455,
      "step": 40310
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.2546478509902954,
      "learning_rate": 0.000555812405053897,
      "loss": 2.9334,
      "step": 40311
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.8173251152038574,
      "learning_rate": 0.0005558102681702475,
      "loss": 3.3171,
      "step": 40312
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.5352146625518799,
      "learning_rate": 0.0005558081312390383,
      "loss": 3.1489,
      "step": 40313
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.261404275894165,
      "learning_rate": 0.0005558059942602692,
      "loss": 3.2679,
      "step": 40314
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.882360816001892,
      "learning_rate": 0.0005558038572339412,
      "loss": 2.8136,
      "step": 40315
    },
    {
      "epoch": 0.52,
      "grad_norm": 2.581516742706299,
      "learning_rate": 0.0005558017201600543,
      "loss": 3.1878,
      "step": 40316
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3069735765457153,
      "learning_rate": 0.0005557995830386091,
      "loss": 2.964,
      "step": 40317
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.9755812883377075,
      "learning_rate": 0.0005557974458696059,
      "loss": 2.9939,
      "step": 40318
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.3892666101455688,
      "learning_rate": 0.0005557953086530452,
      "loss": 3.0571,
      "step": 40319
    },
    {
      "epoch": 0.52,
      "grad_norm": 1.638702630996704,
      "learning_rate": 0.0005557931713889272,
      "loss": 3.0345,
      "step": 40320
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4016690254211426,
      "learning_rate": 0.0005557910340772524,
      "loss": 2.9178,
      "step": 40321
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.407015800476074,
      "learning_rate": 0.0005557888967180212,
      "loss": 2.8986,
      "step": 40322
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5291731357574463,
      "learning_rate": 0.0005557867593112342,
      "loss": 2.8963,
      "step": 40323
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7247051000595093,
      "learning_rate": 0.0005557846218568915,
      "loss": 3.1237,
      "step": 40324
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3682899475097656,
      "learning_rate": 0.0005557824843549936,
      "loss": 3.1131,
      "step": 40325
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4724643230438232,
      "learning_rate": 0.000555780346805541,
      "loss": 3.071,
      "step": 40326
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5066089630126953,
      "learning_rate": 0.0005557782092085339,
      "loss": 2.9522,
      "step": 40327
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1929388046264648,
      "learning_rate": 0.0005557760715639729,
      "loss": 2.8515,
      "step": 40328
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.561169981956482,
      "learning_rate": 0.0005557739338718582,
      "loss": 3.0049,
      "step": 40329
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.254938840866089,
      "learning_rate": 0.0005557717961321903,
      "loss": 2.9974,
      "step": 40330
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5151678323745728,
      "learning_rate": 0.0005557696583449697,
      "loss": 2.6061,
      "step": 40331
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5250740051269531,
      "learning_rate": 0.0005557675205101967,
      "loss": 2.8788,
      "step": 40332
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.997376561164856,
      "learning_rate": 0.0005557653826278715,
      "loss": 3.0272,
      "step": 40333
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6903502941131592,
      "learning_rate": 0.0005557632446979947,
      "loss": 2.7281,
      "step": 40334
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3136489391326904,
      "learning_rate": 0.0005557611067205668,
      "loss": 3.1039,
      "step": 40335
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8230868577957153,
      "learning_rate": 0.0005557589686955882,
      "loss": 3.4061,
      "step": 40336
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9867725372314453,
      "learning_rate": 0.000555756830623059,
      "loss": 2.9639,
      "step": 40337
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.599370002746582,
      "learning_rate": 0.0005557546925029799,
      "loss": 3.1503,
      "step": 40338
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7659709453582764,
      "learning_rate": 0.000555752554335351,
      "loss": 3.0918,
      "step": 40339
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.025846004486084,
      "learning_rate": 0.000555750416120173,
      "loss": 3.1137,
      "step": 40340
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.453098177909851,
      "learning_rate": 0.0005557482778574462,
      "loss": 2.8804,
      "step": 40341
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7635773420333862,
      "learning_rate": 0.0005557461395471709,
      "loss": 2.9849,
      "step": 40342
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3938496112823486,
      "learning_rate": 0.0005557440011893475,
      "loss": 3.1283,
      "step": 40343
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.565116047859192,
      "learning_rate": 0.0005557418627839765,
      "loss": 2.8723,
      "step": 40344
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.965009093284607,
      "learning_rate": 0.0005557397243310583,
      "loss": 3.1768,
      "step": 40345
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.038989782333374,
      "learning_rate": 0.0005557375858305934,
      "loss": 3.0611,
      "step": 40346
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5918021202087402,
      "learning_rate": 0.0005557354472825819,
      "loss": 2.8567,
      "step": 40347
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.341775894165039,
      "learning_rate": 0.0005557333086870243,
      "loss": 2.9035,
      "step": 40348
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6800692081451416,
      "learning_rate": 0.0005557311700439212,
      "loss": 3.3941,
      "step": 40349
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.631980299949646,
      "learning_rate": 0.0005557290313532728,
      "loss": 3.183,
      "step": 40350
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3016955852508545,
      "learning_rate": 0.0005557268926150794,
      "loss": 2.8726,
      "step": 40351
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.517724871635437,
      "learning_rate": 0.0005557247538293417,
      "loss": 3.1919,
      "step": 40352
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.418300986289978,
      "learning_rate": 0.0005557226149960599,
      "loss": 3.1064,
      "step": 40353
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5311897993087769,
      "learning_rate": 0.0005557204761152344,
      "loss": 3.0238,
      "step": 40354
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.554378628730774,
      "learning_rate": 0.0005557183371868656,
      "loss": 3.1189,
      "step": 40355
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2349086999893188,
      "learning_rate": 0.0005557161982109541,
      "loss": 3.0601,
      "step": 40356
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4157449007034302,
      "learning_rate": 0.0005557140591875,
      "loss": 2.9198,
      "step": 40357
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8644542694091797,
      "learning_rate": 0.0005557119201165039,
      "loss": 3.1717,
      "step": 40358
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9712709188461304,
      "learning_rate": 0.0005557097809979661,
      "loss": 2.9508,
      "step": 40359
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.742901086807251,
      "learning_rate": 0.000555707641831887,
      "loss": 2.9932,
      "step": 40360
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8834688663482666,
      "learning_rate": 0.0005557055026182671,
      "loss": 3.1802,
      "step": 40361
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.9632272720336914,
      "learning_rate": 0.0005557033633571066,
      "loss": 2.7691,
      "step": 40362
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8273510932922363,
      "learning_rate": 0.000555701224048406,
      "loss": 2.9687,
      "step": 40363
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9866561889648438,
      "learning_rate": 0.0005556990846921658,
      "loss": 3.0805,
      "step": 40364
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7225509881973267,
      "learning_rate": 0.0005556969452883864,
      "loss": 3.0282,
      "step": 40365
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.327325701713562,
      "learning_rate": 0.000555694805837068,
      "loss": 3.0597,
      "step": 40366
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6986198425292969,
      "learning_rate": 0.0005556926663382112,
      "loss": 3.0165,
      "step": 40367
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.7107012271881104,
      "learning_rate": 0.0005556905267918162,
      "loss": 2.7846,
      "step": 40368
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.654960036277771,
      "learning_rate": 0.0005556883871978835,
      "loss": 3.2976,
      "step": 40369
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.3563308715820312,
      "learning_rate": 0.0005556862475564135,
      "loss": 3.0571,
      "step": 40370
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3567122220993042,
      "learning_rate": 0.0005556841078674067,
      "loss": 3.0956,
      "step": 40371
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.946675419807434,
      "learning_rate": 0.0005556819681308633,
      "loss": 2.9895,
      "step": 40372
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.136016368865967,
      "learning_rate": 0.0005556798283467839,
      "loss": 3.0397,
      "step": 40373
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5499699115753174,
      "learning_rate": 0.0005556776885151687,
      "loss": 2.9866,
      "step": 40374
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.272135019302368,
      "learning_rate": 0.0005556755486360182,
      "loss": 3.2198,
      "step": 40375
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.9636874198913574,
      "learning_rate": 0.0005556734087093328,
      "loss": 2.9396,
      "step": 40376
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4788470268249512,
      "learning_rate": 0.0005556712687351129,
      "loss": 3.0678,
      "step": 40377
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2796962261199951,
      "learning_rate": 0.0005556691287133588,
      "loss": 3.0152,
      "step": 40378
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5793945789337158,
      "learning_rate": 0.0005556669886440711,
      "loss": 3.1986,
      "step": 40379
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5952924489974976,
      "learning_rate": 0.0005556648485272501,
      "loss": 3.0153,
      "step": 40380
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.02677583694458,
      "learning_rate": 0.0005556627083628962,
      "loss": 3.0173,
      "step": 40381
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.489211082458496,
      "learning_rate": 0.0005556605681510096,
      "loss": 3.2658,
      "step": 40382
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4293874502182007,
      "learning_rate": 0.000555658427891591,
      "loss": 3.2785,
      "step": 40383
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4559459686279297,
      "learning_rate": 0.0005556562875846406,
      "loss": 2.9868,
      "step": 40384
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.471678376197815,
      "learning_rate": 0.000555654147230159,
      "loss": 2.7135,
      "step": 40385
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4424225091934204,
      "learning_rate": 0.0005556520068281464,
      "loss": 2.7682,
      "step": 40386
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4604687690734863,
      "learning_rate": 0.0005556498663786033,
      "loss": 3.1161,
      "step": 40387
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7060308456420898,
      "learning_rate": 0.00055564772588153,
      "loss": 2.9137,
      "step": 40388
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.474474310874939,
      "learning_rate": 0.0005556455853369268,
      "loss": 3.1088,
      "step": 40389
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7066562175750732,
      "learning_rate": 0.0005556434447447945,
      "loss": 3.2726,
      "step": 40390
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.651273488998413,
      "learning_rate": 0.0005556413041051333,
      "loss": 2.8972,
      "step": 40391
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5545010566711426,
      "learning_rate": 0.0005556391634179434,
      "loss": 2.9911,
      "step": 40392
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.5355420112609863,
      "learning_rate": 0.0005556370226832254,
      "loss": 2.8656,
      "step": 40393
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8269109725952148,
      "learning_rate": 0.0005556348819009796,
      "loss": 2.8692,
      "step": 40394
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7512401342391968,
      "learning_rate": 0.0005556327410712065,
      "loss": 3.0647,
      "step": 40395
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.465151309967041,
      "learning_rate": 0.0005556306001939065,
      "loss": 2.8667,
      "step": 40396
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5477228164672852,
      "learning_rate": 0.00055562845926908,
      "loss": 3.0372,
      "step": 40397
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5640937089920044,
      "learning_rate": 0.0005556263182967272,
      "loss": 3.1306,
      "step": 40398
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6781569719314575,
      "learning_rate": 0.0005556241772768486,
      "loss": 2.9492,
      "step": 40399
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1762378215789795,
      "learning_rate": 0.0005556220362094448,
      "loss": 3.0529,
      "step": 40400
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.3328630924224854,
      "learning_rate": 0.0005556198950945159,
      "loss": 3.2882,
      "step": 40401
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5657457113265991,
      "learning_rate": 0.0005556177539320625,
      "loss": 3.3656,
      "step": 40402
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.387268304824829,
      "learning_rate": 0.000555615612722085,
      "loss": 3.1237,
      "step": 40403
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.414393901824951,
      "learning_rate": 0.0005556134714645837,
      "loss": 3.1487,
      "step": 40404
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5221776962280273,
      "learning_rate": 0.0005556113301595589,
      "loss": 3.1752,
      "step": 40405
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.954897165298462,
      "learning_rate": 0.0005556091888070112,
      "loss": 3.0746,
      "step": 40406
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3983885049819946,
      "learning_rate": 0.000555607047406941,
      "loss": 2.9218,
      "step": 40407
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3152613639831543,
      "learning_rate": 0.0005556049059593486,
      "loss": 3.3122,
      "step": 40408
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.356108546257019,
      "learning_rate": 0.0005556027644642344,
      "loss": 2.9058,
      "step": 40409
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.9666664600372314,
      "learning_rate": 0.0005556006229215989,
      "loss": 2.7775,
      "step": 40410
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6500505208969116,
      "learning_rate": 0.0005555984813314423,
      "loss": 3.2739,
      "step": 40411
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.5565543174743652,
      "learning_rate": 0.0005555963396937651,
      "loss": 2.9827,
      "step": 40412
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3502905368804932,
      "learning_rate": 0.0005555941980085678,
      "loss": 3.0557,
      "step": 40413
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5866444110870361,
      "learning_rate": 0.0005555920562758507,
      "loss": 2.9932,
      "step": 40414
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4417765140533447,
      "learning_rate": 0.0005555899144956142,
      "loss": 2.984,
      "step": 40415
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3003798723220825,
      "learning_rate": 0.0005555877726678587,
      "loss": 2.9754,
      "step": 40416
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3176400661468506,
      "learning_rate": 0.0005555856307925847,
      "loss": 3.1003,
      "step": 40417
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1959655284881592,
      "learning_rate": 0.0005555834888697924,
      "loss": 3.261,
      "step": 40418
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6129281520843506,
      "learning_rate": 0.0005555813468994822,
      "loss": 3.061,
      "step": 40419
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5855594873428345,
      "learning_rate": 0.0005555792048816549,
      "loss": 2.917,
      "step": 40420
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2044873237609863,
      "learning_rate": 0.0005555770628163103,
      "loss": 3.2734,
      "step": 40421
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.3665382862091064,
      "learning_rate": 0.0005555749207034493,
      "loss": 2.8572,
      "step": 40422
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4145557880401611,
      "learning_rate": 0.000555572778543072,
      "loss": 2.919,
      "step": 40423
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7872337102890015,
      "learning_rate": 0.0005555706363351789,
      "loss": 2.9576,
      "step": 40424
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.143087863922119,
      "learning_rate": 0.0005555684940797706,
      "loss": 2.9483,
      "step": 40425
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.473144769668579,
      "learning_rate": 0.0005555663517768471,
      "loss": 2.9915,
      "step": 40426
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8529831171035767,
      "learning_rate": 0.000555564209426409,
      "loss": 3.0773,
      "step": 40427
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6008774042129517,
      "learning_rate": 0.0005555620670284567,
      "loss": 3.0792,
      "step": 40428
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4728831052780151,
      "learning_rate": 0.0005555599245829905,
      "loss": 3.1306,
      "step": 40429
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.45838463306427,
      "learning_rate": 0.000555557782090011,
      "loss": 3.0373,
      "step": 40430
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8421579599380493,
      "learning_rate": 0.0005555556395495184,
      "loss": 3.0563,
      "step": 40431
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6026173830032349,
      "learning_rate": 0.0005555534969615133,
      "loss": 2.8335,
      "step": 40432
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4757449626922607,
      "learning_rate": 0.0005555513543259959,
      "loss": 3.2595,
      "step": 40433
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.778995394706726,
      "learning_rate": 0.0005555492116429667,
      "loss": 2.8721,
      "step": 40434
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6744935512542725,
      "learning_rate": 0.0005555470689124261,
      "loss": 3.4676,
      "step": 40435
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8902316093444824,
      "learning_rate": 0.0005555449261343744,
      "loss": 3.058,
      "step": 40436
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5770834684371948,
      "learning_rate": 0.0005555427833088121,
      "loss": 2.8819,
      "step": 40437
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5972118377685547,
      "learning_rate": 0.0005555406404357397,
      "loss": 2.9787,
      "step": 40438
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.458632469177246,
      "learning_rate": 0.0005555384975151574,
      "loss": 3.2032,
      "step": 40439
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8110169172286987,
      "learning_rate": 0.0005555363545470656,
      "loss": 3.0584,
      "step": 40440
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4224005937576294,
      "learning_rate": 0.0005555342115314648,
      "loss": 3.0554,
      "step": 40441
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7849992513656616,
      "learning_rate": 0.0005555320684683554,
      "loss": 3.0485,
      "step": 40442
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4165894985198975,
      "learning_rate": 0.0005555299253577377,
      "loss": 3.0688,
      "step": 40443
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5844471454620361,
      "learning_rate": 0.0005555277821996123,
      "loss": 3.2981,
      "step": 40444
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6189631223678589,
      "learning_rate": 0.0005555256389939793,
      "loss": 3.0484,
      "step": 40445
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.980289101600647,
      "learning_rate": 0.0005555234957408394,
      "loss": 2.8997,
      "step": 40446
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.9319329261779785,
      "learning_rate": 0.0005555213524401927,
      "loss": 3.0471,
      "step": 40447
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8973069190979004,
      "learning_rate": 0.0005555192090920399,
      "loss": 3.2825,
      "step": 40448
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7702058553695679,
      "learning_rate": 0.0005555170656963811,
      "loss": 2.9978,
      "step": 40449
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.212783098220825,
      "learning_rate": 0.000555514922253217,
      "loss": 3.2094,
      "step": 40450
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.6730053424835205,
      "learning_rate": 0.0005555127787625478,
      "loss": 3.1673,
      "step": 40451
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8572447299957275,
      "learning_rate": 0.0005555106352243739,
      "loss": 2.9761,
      "step": 40452
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.2761037349700928,
      "learning_rate": 0.0005555084916386958,
      "loss": 3.0833,
      "step": 40453
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.9296061992645264,
      "learning_rate": 0.0005555063480055139,
      "loss": 3.0295,
      "step": 40454
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4876632690429688,
      "learning_rate": 0.0005555042043248285,
      "loss": 3.0451,
      "step": 40455
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6910407543182373,
      "learning_rate": 0.00055550206059664,
      "loss": 2.9409,
      "step": 40456
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.6713690757751465,
      "learning_rate": 0.0005554999168209489,
      "loss": 2.845,
      "step": 40457
    },
    {
      "epoch": 0.53,
      "grad_norm": 4.211771011352539,
      "learning_rate": 0.0005554977729977556,
      "loss": 3.0427,
      "step": 40458
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.186018466949463,
      "learning_rate": 0.0005554956291270604,
      "loss": 2.985,
      "step": 40459
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6790772676467896,
      "learning_rate": 0.0005554934852088636,
      "loss": 3.1148,
      "step": 40460
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.7543413639068604,
      "learning_rate": 0.000555491341243166,
      "loss": 3.1106,
      "step": 40461
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7652547359466553,
      "learning_rate": 0.0005554891972299675,
      "loss": 2.9936,
      "step": 40462
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.2238075733184814,
      "learning_rate": 0.0005554870531692688,
      "loss": 3.1647,
      "step": 40463
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.0384433269500732,
      "learning_rate": 0.0005554849090610703,
      "loss": 3.0491,
      "step": 40464
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5697462558746338,
      "learning_rate": 0.0005554827649053723,
      "loss": 3.2302,
      "step": 40465
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.293710470199585,
      "learning_rate": 0.0005554806207021752,
      "loss": 3.2147,
      "step": 40466
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.2872469425201416,
      "learning_rate": 0.0005554784764514795,
      "loss": 2.8972,
      "step": 40467
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3397881984710693,
      "learning_rate": 0.0005554763321532854,
      "loss": 3.0039,
      "step": 40468
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9398294687271118,
      "learning_rate": 0.0005554741878075936,
      "loss": 2.9107,
      "step": 40469
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.501866102218628,
      "learning_rate": 0.0005554720434144042,
      "loss": 3.0189,
      "step": 40470
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.214806079864502,
      "learning_rate": 0.0005554698989737177,
      "loss": 3.3234,
      "step": 40471
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.24980890750885,
      "learning_rate": 0.0005554677544855347,
      "loss": 2.8116,
      "step": 40472
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4173150062561035,
      "learning_rate": 0.0005554656099498553,
      "loss": 3.1016,
      "step": 40473
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5570485591888428,
      "learning_rate": 0.0005554634653666799,
      "loss": 3.1515,
      "step": 40474
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1937973499298096,
      "learning_rate": 0.0005554613207360092,
      "loss": 2.7681,
      "step": 40475
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6447155475616455,
      "learning_rate": 0.0005554591760578434,
      "loss": 3.1796,
      "step": 40476
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.806056499481201,
      "learning_rate": 0.0005554570313321829,
      "loss": 3.0545,
      "step": 40477
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4385031461715698,
      "learning_rate": 0.000555454886559028,
      "loss": 2.981,
      "step": 40478
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.75332510471344,
      "learning_rate": 0.0005554527417383794,
      "loss": 3.0719,
      "step": 40479
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5966609716415405,
      "learning_rate": 0.0005554505968702372,
      "loss": 3.0202,
      "step": 40480
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6323940753936768,
      "learning_rate": 0.0005554484519546019,
      "loss": 3.1303,
      "step": 40481
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.410407543182373,
      "learning_rate": 0.000555446306991474,
      "loss": 3.0677,
      "step": 40482
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5025198459625244,
      "learning_rate": 0.0005554441619808537,
      "loss": 3.0438,
      "step": 40483
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7766729593276978,
      "learning_rate": 0.0005554420169227416,
      "loss": 2.7561,
      "step": 40484
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8805440664291382,
      "learning_rate": 0.000555439871817138,
      "loss": 3.05,
      "step": 40485
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7074227333068848,
      "learning_rate": 0.0005554377266640433,
      "loss": 3.3314,
      "step": 40486
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5920227766036987,
      "learning_rate": 0.0005554355814634579,
      "loss": 3.2016,
      "step": 40487
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2862359285354614,
      "learning_rate": 0.0005554334362153821,
      "loss": 3.0303,
      "step": 40488
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.232853651046753,
      "learning_rate": 0.0005554312909198166,
      "loss": 2.9559,
      "step": 40489
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3750625848770142,
      "learning_rate": 0.0005554291455767614,
      "loss": 3.0713,
      "step": 40490
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.338679313659668,
      "learning_rate": 0.0005554270001862172,
      "loss": 2.9836,
      "step": 40491
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7075682878494263,
      "learning_rate": 0.0005554248547481843,
      "loss": 3.1708,
      "step": 40492
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.0063529014587402,
      "learning_rate": 0.0005554227092626631,
      "loss": 2.847,
      "step": 40493
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4253270626068115,
      "learning_rate": 0.0005554205637296539,
      "loss": 3.0373,
      "step": 40494
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5236438512802124,
      "learning_rate": 0.0005554184181491573,
      "loss": 3.0653,
      "step": 40495
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8287724256515503,
      "learning_rate": 0.0005554162725211737,
      "loss": 3.1865,
      "step": 40496
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7543545961380005,
      "learning_rate": 0.0005554141268457032,
      "loss": 2.8579,
      "step": 40497
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.170163631439209,
      "learning_rate": 0.0005554119811227465,
      "loss": 3.2242,
      "step": 40498
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7325416803359985,
      "learning_rate": 0.0005554098353523038,
      "loss": 3.1022,
      "step": 40499
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5996878147125244,
      "learning_rate": 0.0005554076895343757,
      "loss": 2.8598,
      "step": 40500
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.793459177017212,
      "learning_rate": 0.0005554055436689624,
      "loss": 3.1606,
      "step": 40501
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4407882690429688,
      "learning_rate": 0.0005554033977560644,
      "loss": 3.1107,
      "step": 40502
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.5834577083587646,
      "learning_rate": 0.0005554012517956821,
      "loss": 2.9708,
      "step": 40503
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.5211422443389893,
      "learning_rate": 0.0005553991057878159,
      "loss": 2.9706,
      "step": 40504
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5647153854370117,
      "learning_rate": 0.0005553969597324662,
      "loss": 3.0719,
      "step": 40505
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1109225749969482,
      "learning_rate": 0.0005553948136296334,
      "loss": 2.8929,
      "step": 40506
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.930821418762207,
      "learning_rate": 0.0005553926674793178,
      "loss": 2.9332,
      "step": 40507
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.460080146789551,
      "learning_rate": 0.0005553905212815199,
      "loss": 3.0045,
      "step": 40508
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.707524061203003,
      "learning_rate": 0.0005553883750362402,
      "loss": 2.9448,
      "step": 40509
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.111349105834961,
      "learning_rate": 0.0005553862287434787,
      "loss": 2.908,
      "step": 40510
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8700141906738281,
      "learning_rate": 0.0005553840824032365,
      "loss": 2.9268,
      "step": 40511
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4621635675430298,
      "learning_rate": 0.0005553819360155132,
      "loss": 3.0632,
      "step": 40512
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3827162981033325,
      "learning_rate": 0.0005553797895803098,
      "loss": 3.0485,
      "step": 40513
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4727318286895752,
      "learning_rate": 0.0005553776430976264,
      "loss": 3.0972,
      "step": 40514
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2342506647109985,
      "learning_rate": 0.0005553754965674635,
      "loss": 3.0247,
      "step": 40515
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4078794717788696,
      "learning_rate": 0.0005553733499898214,
      "loss": 3.1768,
      "step": 40516
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.016289234161377,
      "learning_rate": 0.0005553712033647007,
      "loss": 3.1969,
      "step": 40517
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5503243207931519,
      "learning_rate": 0.0005553690566921015,
      "loss": 3.1144,
      "step": 40518
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6452888250350952,
      "learning_rate": 0.0005553669099720246,
      "loss": 2.8809,
      "step": 40519
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4006558656692505,
      "learning_rate": 0.00055536476320447,
      "loss": 3.2508,
      "step": 40520
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7407810688018799,
      "learning_rate": 0.0005553626163894382,
      "loss": 3.0029,
      "step": 40521
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5927879810333252,
      "learning_rate": 0.0005553604695269299,
      "loss": 3.1482,
      "step": 40522
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6459989547729492,
      "learning_rate": 0.000555358322616945,
      "loss": 3.0623,
      "step": 40523
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.853079319000244,
      "learning_rate": 0.0005553561756594845,
      "loss": 3.1709,
      "step": 40524
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.0253398418426514,
      "learning_rate": 0.0005553540286545482,
      "loss": 2.8288,
      "step": 40525
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.576623797416687,
      "learning_rate": 0.0005553518816021369,
      "loss": 3.3347,
      "step": 40526
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4096035957336426,
      "learning_rate": 0.0005553497345022508,
      "loss": 2.7764,
      "step": 40527
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.55791437625885,
      "learning_rate": 0.0005553475873548904,
      "loss": 3.0108,
      "step": 40528
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2989262342453003,
      "learning_rate": 0.000555345440160056,
      "loss": 2.8876,
      "step": 40529
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.446384072303772,
      "learning_rate": 0.0005553432929177481,
      "loss": 2.9493,
      "step": 40530
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4945964813232422,
      "learning_rate": 0.0005553411456279671,
      "loss": 2.9758,
      "step": 40531
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5245048999786377,
      "learning_rate": 0.0005553389982907133,
      "loss": 2.8409,
      "step": 40532
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.505698323249817,
      "learning_rate": 0.0005553368509059872,
      "loss": 3.2605,
      "step": 40533
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.544232726097107,
      "learning_rate": 0.0005553347034737891,
      "loss": 3.5076,
      "step": 40534
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6702139377593994,
      "learning_rate": 0.0005553325559941195,
      "loss": 3.3599,
      "step": 40535
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7624300718307495,
      "learning_rate": 0.0005553304084669788,
      "loss": 3.06,
      "step": 40536
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6420058012008667,
      "learning_rate": 0.0005553282608923673,
      "loss": 2.9983,
      "step": 40537
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.519774317741394,
      "learning_rate": 0.0005553261132702855,
      "loss": 3.0232,
      "step": 40538
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6658884286880493,
      "learning_rate": 0.0005553239656007337,
      "loss": 3.2172,
      "step": 40539
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8102096319198608,
      "learning_rate": 0.0005553218178837125,
      "loss": 3.0908,
      "step": 40540
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5998293161392212,
      "learning_rate": 0.000555319670119222,
      "loss": 3.0848,
      "step": 40541
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.64876127243042,
      "learning_rate": 0.0005553175223072628,
      "loss": 3.0835,
      "step": 40542
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1350347995758057,
      "learning_rate": 0.0005553153744478353,
      "loss": 2.796,
      "step": 40543
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5695911645889282,
      "learning_rate": 0.0005553132265409397,
      "loss": 3.0448,
      "step": 40544
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.622420072555542,
      "learning_rate": 0.0005553110785865768,
      "loss": 3.2449,
      "step": 40545
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.578897476196289,
      "learning_rate": 0.0005553089305847464,
      "loss": 2.9375,
      "step": 40546
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5195457935333252,
      "learning_rate": 0.0005553067825354495,
      "loss": 3.0676,
      "step": 40547
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.099825620651245,
      "learning_rate": 0.0005553046344386862,
      "loss": 3.0491,
      "step": 40548
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8485169410705566,
      "learning_rate": 0.0005553024862944571,
      "loss": 3.063,
      "step": 40549
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5072863101959229,
      "learning_rate": 0.0005553003381027623,
      "loss": 3.0529,
      "step": 40550
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.3482141494750977,
      "learning_rate": 0.0005552981898636022,
      "loss": 2.9418,
      "step": 40551
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9600214958190918,
      "learning_rate": 0.0005552960415769776,
      "loss": 3.0041,
      "step": 40552
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.951767921447754,
      "learning_rate": 0.0005552938932428885,
      "loss": 3.0266,
      "step": 40553
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.425418496131897,
      "learning_rate": 0.0005552917448613356,
      "loss": 3.0855,
      "step": 40554
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8990192413330078,
      "learning_rate": 0.0005552895964323189,
      "loss": 3.027,
      "step": 40555
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4138362407684326,
      "learning_rate": 0.0005552874479558392,
      "loss": 2.9802,
      "step": 40556
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1965945959091187,
      "learning_rate": 0.0005552852994318968,
      "loss": 3.2098,
      "step": 40557
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.429863452911377,
      "learning_rate": 0.0005552831508604919,
      "loss": 3.0577,
      "step": 40558
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6761689186096191,
      "learning_rate": 0.0005552810022416252,
      "loss": 3.0506,
      "step": 40559
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6120531558990479,
      "learning_rate": 0.0005552788535752968,
      "loss": 3.0917,
      "step": 40560
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.393437147140503,
      "learning_rate": 0.0005552767048615073,
      "loss": 3.1291,
      "step": 40561
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5789493322372437,
      "learning_rate": 0.0005552745561002571,
      "loss": 3.1695,
      "step": 40562
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8347581624984741,
      "learning_rate": 0.0005552724072915464,
      "loss": 3.0719,
      "step": 40563
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4058421850204468,
      "learning_rate": 0.0005552702584353759,
      "loss": 3.2393,
      "step": 40564
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4096249341964722,
      "learning_rate": 0.0005552681095317457,
      "loss": 3.0036,
      "step": 40565
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.515486001968384,
      "learning_rate": 0.0005552659605806565,
      "loss": 2.9944,
      "step": 40566
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.693099021911621,
      "learning_rate": 0.0005552638115821084,
      "loss": 3.1378,
      "step": 40567
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.6262450218200684,
      "learning_rate": 0.000555261662536102,
      "loss": 3.0817,
      "step": 40568
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.3791134357452393,
      "learning_rate": 0.0005552595134426376,
      "loss": 2.9948,
      "step": 40569
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7658368349075317,
      "learning_rate": 0.0005552573643017156,
      "loss": 3.0446,
      "step": 40570
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.2651381492614746,
      "learning_rate": 0.0005552552151133365,
      "loss": 3.1117,
      "step": 40571
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8999046087265015,
      "learning_rate": 0.0005552530658775007,
      "loss": 3.0861,
      "step": 40572
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3837755918502808,
      "learning_rate": 0.0005552509165942085,
      "loss": 3.0312,
      "step": 40573
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2906115055084229,
      "learning_rate": 0.0005552487672634603,
      "loss": 2.9907,
      "step": 40574
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4845706224441528,
      "learning_rate": 0.0005552466178852565,
      "loss": 2.9809,
      "step": 40575
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5751765966415405,
      "learning_rate": 0.0005552444684595976,
      "loss": 2.9943,
      "step": 40576
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4736604690551758,
      "learning_rate": 0.0005552423189864839,
      "loss": 3.0117,
      "step": 40577
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.286086320877075,
      "learning_rate": 0.0005552401694659159,
      "loss": 2.8071,
      "step": 40578
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.335622787475586,
      "learning_rate": 0.0005552380198978939,
      "loss": 3.0805,
      "step": 40579
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5725992918014526,
      "learning_rate": 0.0005552358702824183,
      "loss": 3.1762,
      "step": 40580
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.023507595062256,
      "learning_rate": 0.0005552337206194896,
      "loss": 3.0018,
      "step": 40581
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4102537631988525,
      "learning_rate": 0.000555231570909108,
      "loss": 3.1374,
      "step": 40582
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4559510946273804,
      "learning_rate": 0.0005552294211512741,
      "loss": 2.9689,
      "step": 40583
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.658512830734253,
      "learning_rate": 0.0005552272713459884,
      "loss": 3.2626,
      "step": 40584
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5492321252822876,
      "learning_rate": 0.0005552251214932509,
      "loss": 2.9005,
      "step": 40585
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5327062606811523,
      "learning_rate": 0.0005552229715930623,
      "loss": 3.1779,
      "step": 40586
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6229627132415771,
      "learning_rate": 0.000555220821645423,
      "loss": 3.0208,
      "step": 40587
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5795414447784424,
      "learning_rate": 0.0005552186716503332,
      "loss": 2.8572,
      "step": 40588
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4570506811141968,
      "learning_rate": 0.0005552165216077935,
      "loss": 3.0422,
      "step": 40589
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4813810586929321,
      "learning_rate": 0.0005552143715178043,
      "loss": 2.961,
      "step": 40590
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9254682064056396,
      "learning_rate": 0.0005552122213803658,
      "loss": 2.8618,
      "step": 40591
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.860780119895935,
      "learning_rate": 0.0005552100711954786,
      "loss": 3.1801,
      "step": 40592
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6070330142974854,
      "learning_rate": 0.000555207920963143,
      "loss": 2.7789,
      "step": 40593
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6293699741363525,
      "learning_rate": 0.0005552057706833595,
      "loss": 3.0221,
      "step": 40594
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4687424898147583,
      "learning_rate": 0.0005552036203561284,
      "loss": 3.1728,
      "step": 40595
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8324774503707886,
      "learning_rate": 0.0005552014699814501,
      "loss": 3.1754,
      "step": 40596
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.534791350364685,
      "learning_rate": 0.0005551993195593251,
      "loss": 3.2437,
      "step": 40597
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7533479928970337,
      "learning_rate": 0.0005551971690897537,
      "loss": 3.0154,
      "step": 40598
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6788818836212158,
      "learning_rate": 0.0005551950185727363,
      "loss": 2.8631,
      "step": 40599
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8020788431167603,
      "learning_rate": 0.0005551928680082733,
      "loss": 2.9995,
      "step": 40600
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8197109699249268,
      "learning_rate": 0.0005551907173963653,
      "loss": 3.0269,
      "step": 40601
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.69471275806427,
      "learning_rate": 0.0005551885667370122,
      "loss": 3.2225,
      "step": 40602
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9391192197799683,
      "learning_rate": 0.0005551864160302151,
      "loss": 2.8925,
      "step": 40603
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3599787950515747,
      "learning_rate": 0.0005551842652759738,
      "loss": 2.9764,
      "step": 40604
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9520243406295776,
      "learning_rate": 0.000555182114474289,
      "loss": 3.0619,
      "step": 40605
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9877079725265503,
      "learning_rate": 0.0005551799636251609,
      "loss": 3.0785,
      "step": 40606
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6701767444610596,
      "learning_rate": 0.0005551778127285903,
      "loss": 3.2077,
      "step": 40607
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3901821374893188,
      "learning_rate": 0.0005551756617845771,
      "loss": 2.9799,
      "step": 40608
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8242813348770142,
      "learning_rate": 0.0005551735107931219,
      "loss": 3.1574,
      "step": 40609
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8309427499771118,
      "learning_rate": 0.0005551713597542254,
      "loss": 3.403,
      "step": 40610
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5064218044281006,
      "learning_rate": 0.0005551692086678874,
      "loss": 3.0418,
      "step": 40611
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.210636854171753,
      "learning_rate": 0.0005551670575341088,
      "loss": 2.8115,
      "step": 40612
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.622207760810852,
      "learning_rate": 0.0005551649063528898,
      "loss": 3.1125,
      "step": 40613
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9523473978042603,
      "learning_rate": 0.0005551627551242308,
      "loss": 3.1485,
      "step": 40614
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.403622627258301,
      "learning_rate": 0.0005551606038481324,
      "loss": 3.1783,
      "step": 40615
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8091366291046143,
      "learning_rate": 0.0005551584525245947,
      "loss": 2.92,
      "step": 40616
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4628419876098633,
      "learning_rate": 0.0005551563011536181,
      "loss": 3.0429,
      "step": 40617
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6238659620285034,
      "learning_rate": 0.0005551541497352033,
      "loss": 2.7983,
      "step": 40618
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3773905038833618,
      "learning_rate": 0.0005551519982693505,
      "loss": 3.0556,
      "step": 40619
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6755460500717163,
      "learning_rate": 0.00055514984675606,
      "loss": 2.8739,
      "step": 40620
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3373998403549194,
      "learning_rate": 0.0005551476951953325,
      "loss": 2.9848,
      "step": 40621
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5669878721237183,
      "learning_rate": 0.0005551455435871681,
      "loss": 2.9633,
      "step": 40622
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3984029293060303,
      "learning_rate": 0.0005551433919315673,
      "loss": 3.0557,
      "step": 40623
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8877394199371338,
      "learning_rate": 0.0005551412402285306,
      "loss": 3.0813,
      "step": 40624
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8748499155044556,
      "learning_rate": 0.0005551390884780584,
      "loss": 3.1346,
      "step": 40625
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.728508234024048,
      "learning_rate": 0.000555136936680151,
      "loss": 2.969,
      "step": 40626
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4622905254364014,
      "learning_rate": 0.0005551347848348088,
      "loss": 3.171,
      "step": 40627
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5350643396377563,
      "learning_rate": 0.0005551326329420322,
      "loss": 3.3111,
      "step": 40628
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6433428525924683,
      "learning_rate": 0.0005551304810018216,
      "loss": 3.0804,
      "step": 40629
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1142163276672363,
      "learning_rate": 0.0005551283290141774,
      "loss": 2.949,
      "step": 40630
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3850054740905762,
      "learning_rate": 0.0005551261769791002,
      "loss": 3.131,
      "step": 40631
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.358101487159729,
      "learning_rate": 0.0005551240248965902,
      "loss": 3.0018,
      "step": 40632
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7859472036361694,
      "learning_rate": 0.0005551218727666476,
      "loss": 2.9714,
      "step": 40633
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4612337350845337,
      "learning_rate": 0.0005551197205892732,
      "loss": 3.0101,
      "step": 40634
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4301544427871704,
      "learning_rate": 0.0005551175683644672,
      "loss": 3.0952,
      "step": 40635
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.090421438217163,
      "learning_rate": 0.00055511541609223,
      "loss": 2.9985,
      "step": 40636
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8214948177337646,
      "learning_rate": 0.0005551132637725622,
      "loss": 2.9793,
      "step": 40637
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.0297927856445312,
      "learning_rate": 0.0005551111114054638,
      "loss": 2.9944,
      "step": 40638
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.71522057056427,
      "learning_rate": 0.0005551089589909355,
      "loss": 2.8784,
      "step": 40639
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4495772123336792,
      "learning_rate": 0.0005551068065289776,
      "loss": 3.0206,
      "step": 40640
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6600571870803833,
      "learning_rate": 0.0005551046540195906,
      "loss": 3.1886,
      "step": 40641
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1711318492889404,
      "learning_rate": 0.0005551025014627749,
      "loss": 2.8991,
      "step": 40642
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3733822107315063,
      "learning_rate": 0.0005551003488585307,
      "loss": 3.122,
      "step": 40643
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4830553531646729,
      "learning_rate": 0.0005550981962068586,
      "loss": 3.1201,
      "step": 40644
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6531084775924683,
      "learning_rate": 0.0005550960435077589,
      "loss": 2.9254,
      "step": 40645
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3603843450546265,
      "learning_rate": 0.000555093890761232,
      "loss": 3.0958,
      "step": 40646
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6141294240951538,
      "learning_rate": 0.0005550917379672784,
      "loss": 2.9461,
      "step": 40647
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3855921030044556,
      "learning_rate": 0.0005550895851258983,
      "loss": 3.0828,
      "step": 40648
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3957122564315796,
      "learning_rate": 0.0005550874322370924,
      "loss": 3.1173,
      "step": 40649
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9397636651992798,
      "learning_rate": 0.0005550852793008608,
      "loss": 3.2651,
      "step": 40650
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.0363621711730957,
      "learning_rate": 0.000555083126317204,
      "loss": 2.9797,
      "step": 40651
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7106194496154785,
      "learning_rate": 0.0005550809732861226,
      "loss": 2.942,
      "step": 40652
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.35028076171875,
      "learning_rate": 0.0005550788202076168,
      "loss": 3.2179,
      "step": 40653
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.659446120262146,
      "learning_rate": 0.0005550766670816869,
      "loss": 3.1926,
      "step": 40654
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.829651951789856,
      "learning_rate": 0.0005550745139083336,
      "loss": 3.1434,
      "step": 40655
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5758997201919556,
      "learning_rate": 0.000555072360687557,
      "loss": 3.0958,
      "step": 40656
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.712984561920166,
      "learning_rate": 0.0005550702074193577,
      "loss": 3.0379,
      "step": 40657
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5566797256469727,
      "learning_rate": 0.000555068054103736,
      "loss": 2.878,
      "step": 40658
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4691705703735352,
      "learning_rate": 0.0005550659007406923,
      "loss": 3.1719,
      "step": 40659
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4219743013381958,
      "learning_rate": 0.0005550637473302271,
      "loss": 3.065,
      "step": 40660
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.481701135635376,
      "learning_rate": 0.0005550615938723408,
      "loss": 3.3606,
      "step": 40661
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.42575204372406,
      "learning_rate": 0.0005550594403670336,
      "loss": 2.7885,
      "step": 40662
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4932163953781128,
      "learning_rate": 0.0005550572868143061,
      "loss": 3.2829,
      "step": 40663
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3084872961044312,
      "learning_rate": 0.0005550551332141586,
      "loss": 3.0028,
      "step": 40664
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5123087167739868,
      "learning_rate": 0.0005550529795665915,
      "loss": 3.1283,
      "step": 40665
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.024869203567505,
      "learning_rate": 0.0005550508258716052,
      "loss": 3.1316,
      "step": 40666
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5574802160263062,
      "learning_rate": 0.0005550486721292003,
      "loss": 3.1573,
      "step": 40667
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1264424324035645,
      "learning_rate": 0.0005550465183393771,
      "loss": 3.0782,
      "step": 40668
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7639975547790527,
      "learning_rate": 0.0005550443645021357,
      "loss": 2.688,
      "step": 40669
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5889596939086914,
      "learning_rate": 0.000555042210617477,
      "loss": 3.1528,
      "step": 40670
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.896062970161438,
      "learning_rate": 0.0005550400566854009,
      "loss": 2.9919,
      "step": 40671
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3850802183151245,
      "learning_rate": 0.0005550379027059082,
      "loss": 2.9917,
      "step": 40672
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8610697984695435,
      "learning_rate": 0.000555035748678999,
      "loss": 3.0667,
      "step": 40673
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6991456747055054,
      "learning_rate": 0.0005550335946046738,
      "loss": 2.9773,
      "step": 40674
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7166842222213745,
      "learning_rate": 0.0005550314404829332,
      "loss": 2.8673,
      "step": 40675
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.582350492477417,
      "learning_rate": 0.0005550292863137773,
      "loss": 3.0379,
      "step": 40676
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.724187970161438,
      "learning_rate": 0.0005550271320972068,
      "loss": 3.0008,
      "step": 40677
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.540614128112793,
      "learning_rate": 0.0005550249778332218,
      "loss": 2.9155,
      "step": 40678
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.2293968200683594,
      "learning_rate": 0.000555022823521823,
      "loss": 2.86,
      "step": 40679
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1118979454040527,
      "learning_rate": 0.0005550206691630105,
      "loss": 3.0212,
      "step": 40680
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.772125244140625,
      "learning_rate": 0.0005550185147567849,
      "loss": 3.0789,
      "step": 40681
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9865294694900513,
      "learning_rate": 0.0005550163603031466,
      "loss": 3.202,
      "step": 40682
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.237966775894165,
      "learning_rate": 0.0005550142058020958,
      "loss": 3.0632,
      "step": 40683
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7573869228363037,
      "learning_rate": 0.0005550120512536331,
      "loss": 3.1218,
      "step": 40684
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8797796964645386,
      "learning_rate": 0.000555009896657759,
      "loss": 2.9331,
      "step": 40685
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.519844651222229,
      "learning_rate": 0.0005550077420144737,
      "loss": 3.0005,
      "step": 40686
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3958888053894043,
      "learning_rate": 0.0005550055873237774,
      "loss": 3.0908,
      "step": 40687
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4553320407867432,
      "learning_rate": 0.0005550034325856709,
      "loss": 3.2805,
      "step": 40688
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5633281469345093,
      "learning_rate": 0.0005550012778001545,
      "loss": 2.9222,
      "step": 40689
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7998583316802979,
      "learning_rate": 0.0005549991229672286,
      "loss": 3.3929,
      "step": 40690
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.654282569885254,
      "learning_rate": 0.0005549969680868934,
      "loss": 2.8874,
      "step": 40691
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.458783507347107,
      "learning_rate": 0.0005549948131591496,
      "loss": 2.9595,
      "step": 40692
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4193774461746216,
      "learning_rate": 0.0005549926581839974,
      "loss": 3.0993,
      "step": 40693
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6655402183532715,
      "learning_rate": 0.0005549905031614371,
      "loss": 3.511,
      "step": 40694
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.070849895477295,
      "learning_rate": 0.0005549883480914695,
      "loss": 3.0048,
      "step": 40695
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6436100006103516,
      "learning_rate": 0.0005549861929740944,
      "loss": 3.0644,
      "step": 40696
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7927815914154053,
      "learning_rate": 0.0005549840378093129,
      "loss": 2.9354,
      "step": 40697
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.049616813659668,
      "learning_rate": 0.0005549818825971249,
      "loss": 3.2765,
      "step": 40698
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9891597032546997,
      "learning_rate": 0.000554979727337531,
      "loss": 3.2106,
      "step": 40699
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3059297800064087,
      "learning_rate": 0.0005549775720305316,
      "loss": 2.748,
      "step": 40700
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.28682279586792,
      "learning_rate": 0.0005549754166761268,
      "loss": 3.356,
      "step": 40701
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3769009113311768,
      "learning_rate": 0.0005549732612743176,
      "loss": 3.1865,
      "step": 40702
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5612969398498535,
      "learning_rate": 0.0005549711058251039,
      "loss": 2.889,
      "step": 40703
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5602117776870728,
      "learning_rate": 0.0005549689503284862,
      "loss": 3.2146,
      "step": 40704
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4609355926513672,
      "learning_rate": 0.0005549667947844651,
      "loss": 2.7776,
      "step": 40705
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5570082664489746,
      "learning_rate": 0.0005549646391930407,
      "loss": 3.0044,
      "step": 40706
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7051411867141724,
      "learning_rate": 0.0005549624835542137,
      "loss": 3.0569,
      "step": 40707
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5830820798873901,
      "learning_rate": 0.0005549603278679843,
      "loss": 3.0926,
      "step": 40708
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3968110084533691,
      "learning_rate": 0.000554958172134353,
      "loss": 3.1851,
      "step": 40709
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1600022315979004,
      "learning_rate": 0.0005549560163533201,
      "loss": 3.0117,
      "step": 40710
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6605459451675415,
      "learning_rate": 0.0005549538605248861,
      "loss": 3.1147,
      "step": 40711
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5284414291381836,
      "learning_rate": 0.0005549517046490514,
      "loss": 2.9791,
      "step": 40712
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.277710437774658,
      "learning_rate": 0.0005549495487258164,
      "loss": 3.0095,
      "step": 40713
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.7430496215820312,
      "learning_rate": 0.0005549473927551813,
      "loss": 3.0788,
      "step": 40714
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.037180185317993,
      "learning_rate": 0.0005549452367371469,
      "loss": 3.1471,
      "step": 40715
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8322370052337646,
      "learning_rate": 0.0005549430806717131,
      "loss": 3.2099,
      "step": 40716
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.836894989013672,
      "learning_rate": 0.0005549409245588808,
      "loss": 3.1082,
      "step": 40717
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.5656938552856445,
      "learning_rate": 0.00055493876839865,
      "loss": 3.1125,
      "step": 40718
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.780505895614624,
      "learning_rate": 0.0005549366121910214,
      "loss": 2.9394,
      "step": 40719
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4423059225082397,
      "learning_rate": 0.0005549344559359952,
      "loss": 2.9863,
      "step": 40720
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.6630945205688477,
      "learning_rate": 0.0005549322996335719,
      "loss": 3.0071,
      "step": 40721
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.6303870677948,
      "learning_rate": 0.0005549301432837519,
      "loss": 3.0929,
      "step": 40722
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.489498257637024,
      "learning_rate": 0.0005549279868865355,
      "loss": 2.9815,
      "step": 40723
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.2166168689727783,
      "learning_rate": 0.0005549258304419232,
      "loss": 2.9246,
      "step": 40724
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.639058828353882,
      "learning_rate": 0.0005549236739499153,
      "loss": 2.9069,
      "step": 40725
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.507582664489746,
      "learning_rate": 0.0005549215174105124,
      "loss": 2.7926,
      "step": 40726
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.759580373764038,
      "learning_rate": 0.0005549193608237147,
      "loss": 3.0872,
      "step": 40727
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9540749788284302,
      "learning_rate": 0.0005549172041895228,
      "loss": 3.0301,
      "step": 40728
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3487917184829712,
      "learning_rate": 0.0005549150475079368,
      "loss": 2.9019,
      "step": 40729
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8609273433685303,
      "learning_rate": 0.0005549128907789575,
      "loss": 2.7788,
      "step": 40730
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3324224948883057,
      "learning_rate": 0.0005549107340025849,
      "loss": 2.9796,
      "step": 40731
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.461869478225708,
      "learning_rate": 0.0005549085771788196,
      "loss": 3.0137,
      "step": 40732
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5204898118972778,
      "learning_rate": 0.000554906420307662,
      "loss": 3.086,
      "step": 40733
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4576616287231445,
      "learning_rate": 0.0005549042633891127,
      "loss": 3.0874,
      "step": 40734
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5475749969482422,
      "learning_rate": 0.0005549021064231716,
      "loss": 3.0598,
      "step": 40735
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.0624194145202637,
      "learning_rate": 0.0005548999494098395,
      "loss": 2.9549,
      "step": 40736
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3460124731063843,
      "learning_rate": 0.0005548977923491167,
      "loss": 2.9981,
      "step": 40737
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.3538618087768555,
      "learning_rate": 0.0005548956352410035,
      "loss": 2.9321,
      "step": 40738
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4556188583374023,
      "learning_rate": 0.0005548934780855004,
      "loss": 3.105,
      "step": 40739
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.2012856006622314,
      "learning_rate": 0.0005548913208826079,
      "loss": 2.842,
      "step": 40740
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5608527660369873,
      "learning_rate": 0.0005548891636323263,
      "loss": 3.024,
      "step": 40741
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.085240125656128,
      "learning_rate": 0.0005548870063346559,
      "loss": 2.8757,
      "step": 40742
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.8998894691467285,
      "learning_rate": 0.0005548848489895972,
      "loss": 3.0865,
      "step": 40743
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9509950876235962,
      "learning_rate": 0.0005548826915971507,
      "loss": 2.8059,
      "step": 40744
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7752597332000732,
      "learning_rate": 0.0005548805341573165,
      "loss": 2.9578,
      "step": 40745
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.703761100769043,
      "learning_rate": 0.0005548783766700954,
      "loss": 3.1204,
      "step": 40746
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.206869602203369,
      "learning_rate": 0.0005548762191354875,
      "loss": 3.1507,
      "step": 40747
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.6141834259033203,
      "learning_rate": 0.0005548740615534932,
      "loss": 2.9313,
      "step": 40748
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4364615678787231,
      "learning_rate": 0.0005548719039241132,
      "loss": 3.134,
      "step": 40749
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.7899012565612793,
      "learning_rate": 0.0005548697462473476,
      "loss": 3.19,
      "step": 40750
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.491828680038452,
      "learning_rate": 0.0005548675885231969,
      "loss": 2.9227,
      "step": 40751
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6724307537078857,
      "learning_rate": 0.0005548654307516615,
      "loss": 3.2095,
      "step": 40752
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.3199591636657715,
      "learning_rate": 0.0005548632729327418,
      "loss": 3.3058,
      "step": 40753
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4472601413726807,
      "learning_rate": 0.0005548611150664383,
      "loss": 2.8263,
      "step": 40754
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.498685598373413,
      "learning_rate": 0.0005548589571527511,
      "loss": 2.8551,
      "step": 40755
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7798876762390137,
      "learning_rate": 0.0005548567991916811,
      "loss": 3.3676,
      "step": 40756
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8096164464950562,
      "learning_rate": 0.0005548546411832282,
      "loss": 3.1518,
      "step": 40757
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.434712529182434,
      "learning_rate": 0.0005548524831273931,
      "loss": 3.2388,
      "step": 40758
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.351942300796509,
      "learning_rate": 0.000554850325024176,
      "loss": 3.0775,
      "step": 40759
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.446447491645813,
      "learning_rate": 0.0005548481668735775,
      "loss": 3.0453,
      "step": 40760
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.59780216217041,
      "learning_rate": 0.000554846008675598,
      "loss": 2.8637,
      "step": 40761
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7103161811828613,
      "learning_rate": 0.0005548438504302377,
      "loss": 3.0816,
      "step": 40762
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.027601480484009,
      "learning_rate": 0.0005548416921374971,
      "loss": 3.1532,
      "step": 40763
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.865520715713501,
      "learning_rate": 0.0005548395337973766,
      "loss": 3.113,
      "step": 40764
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.6211681365966797,
      "learning_rate": 0.0005548373754098768,
      "loss": 3.0761,
      "step": 40765
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2279341220855713,
      "learning_rate": 0.0005548352169749978,
      "loss": 3.0468,
      "step": 40766
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6446435451507568,
      "learning_rate": 0.0005548330584927402,
      "loss": 3.1062,
      "step": 40767
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.856632947921753,
      "learning_rate": 0.0005548308999631041,
      "loss": 2.9788,
      "step": 40768
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8419455289840698,
      "learning_rate": 0.0005548287413860903,
      "loss": 3.268,
      "step": 40769
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8639832735061646,
      "learning_rate": 0.000554826582761699,
      "loss": 3.1709,
      "step": 40770
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.824157953262329,
      "learning_rate": 0.0005548244240899307,
      "loss": 3.1895,
      "step": 40771
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.882704496383667,
      "learning_rate": 0.0005548222653707856,
      "loss": 2.9325,
      "step": 40772
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6254463195800781,
      "learning_rate": 0.0005548201066042644,
      "loss": 2.9641,
      "step": 40773
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.39704167842865,
      "learning_rate": 0.0005548179477903671,
      "loss": 3.1113,
      "step": 40774
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.854123830795288,
      "learning_rate": 0.0005548157889290945,
      "loss": 2.8397,
      "step": 40775
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.530909538269043,
      "learning_rate": 0.0005548136300204469,
      "loss": 3.105,
      "step": 40776
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.2197113037109375,
      "learning_rate": 0.0005548114710644244,
      "loss": 3.1094,
      "step": 40777
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.3387539386749268,
      "learning_rate": 0.0005548093120610278,
      "loss": 3.0577,
      "step": 40778
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.805410623550415,
      "learning_rate": 0.0005548071530102573,
      "loss": 3.3657,
      "step": 40779
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.0769736766815186,
      "learning_rate": 0.0005548049939121133,
      "loss": 2.9389,
      "step": 40780
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4473557472229004,
      "learning_rate": 0.0005548028347665963,
      "loss": 3.1553,
      "step": 40781
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.1777801513671875,
      "learning_rate": 0.0005548006755737067,
      "loss": 2.8674,
      "step": 40782
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.805861711502075,
      "learning_rate": 0.0005547985163334448,
      "loss": 3.0885,
      "step": 40783
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3555197715759277,
      "learning_rate": 0.000554796357045811,
      "loss": 3.0986,
      "step": 40784
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.891972303390503,
      "learning_rate": 0.0005547941977108058,
      "loss": 2.9431,
      "step": 40785
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5709459781646729,
      "learning_rate": 0.0005547920383284294,
      "loss": 3.0787,
      "step": 40786
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7119354009628296,
      "learning_rate": 0.0005547898788986825,
      "loss": 2.9972,
      "step": 40787
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6479809284210205,
      "learning_rate": 0.0005547877194215653,
      "loss": 3.131,
      "step": 40788
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5033079385757446,
      "learning_rate": 0.0005547855598970784,
      "loss": 2.9129,
      "step": 40789
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3905396461486816,
      "learning_rate": 0.0005547834003252219,
      "loss": 2.9293,
      "step": 40790
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5577207803726196,
      "learning_rate": 0.0005547812407059963,
      "loss": 3.1343,
      "step": 40791
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.823976993560791,
      "learning_rate": 0.0005547790810394022,
      "loss": 3.1201,
      "step": 40792
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4926308393478394,
      "learning_rate": 0.0005547769213254398,
      "loss": 3.0996,
      "step": 40793
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6151114702224731,
      "learning_rate": 0.0005547747615641096,
      "loss": 3.0115,
      "step": 40794
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7541133165359497,
      "learning_rate": 0.0005547726017554118,
      "loss": 3.0652,
      "step": 40795
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8354973793029785,
      "learning_rate": 0.0005547704418993472,
      "loss": 3.1259,
      "step": 40796
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.709049105644226,
      "learning_rate": 0.0005547682819959158,
      "loss": 3.0893,
      "step": 40797
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.427126407623291,
      "learning_rate": 0.0005547661220451183,
      "loss": 2.9465,
      "step": 40798
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.572869062423706,
      "learning_rate": 0.0005547639620469549,
      "loss": 3.1725,
      "step": 40799
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.546500563621521,
      "learning_rate": 0.0005547618020014261,
      "loss": 2.9937,
      "step": 40800
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.483318567276001,
      "learning_rate": 0.0005547596419085322,
      "loss": 3.1692,
      "step": 40801
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.70094633102417,
      "learning_rate": 0.0005547574817682736,
      "loss": 3.0295,
      "step": 40802
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.167008399963379,
      "learning_rate": 0.000554755321580651,
      "loss": 2.9514,
      "step": 40803
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9744582176208496,
      "learning_rate": 0.0005547531613456644,
      "loss": 2.9129,
      "step": 40804
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.431004762649536,
      "learning_rate": 0.0005547510010633144,
      "loss": 3.1055,
      "step": 40805
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5017297267913818,
      "learning_rate": 0.0005547488407336014,
      "loss": 3.185,
      "step": 40806
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.565504550933838,
      "learning_rate": 0.0005547466803565258,
      "loss": 3.2138,
      "step": 40807
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9193757772445679,
      "learning_rate": 0.0005547445199320881,
      "loss": 2.9221,
      "step": 40808
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.0755393505096436,
      "learning_rate": 0.0005547423594602885,
      "loss": 3.0806,
      "step": 40809
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4028739929199219,
      "learning_rate": 0.0005547401989411274,
      "loss": 3.2486,
      "step": 40810
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4526995420455933,
      "learning_rate": 0.0005547380383746054,
      "loss": 3.0043,
      "step": 40811
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9572516679763794,
      "learning_rate": 0.0005547358777607226,
      "loss": 3.1166,
      "step": 40812
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4598431587219238,
      "learning_rate": 0.0005547337170994798,
      "loss": 2.9656,
      "step": 40813
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.922055959701538,
      "learning_rate": 0.0005547315563908771,
      "loss": 2.9442,
      "step": 40814
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9050122499465942,
      "learning_rate": 0.000554729395634915,
      "loss": 3.1297,
      "step": 40815
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.562305450439453,
      "learning_rate": 0.0005547272348315939,
      "loss": 2.9219,
      "step": 40816
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.425614595413208,
      "learning_rate": 0.0005547250739809142,
      "loss": 3.1374,
      "step": 40817
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6577231884002686,
      "learning_rate": 0.0005547229130828763,
      "loss": 3.0686,
      "step": 40818
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4497425556182861,
      "learning_rate": 0.0005547207521374807,
      "loss": 3.3916,
      "step": 40819
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.250208854675293,
      "learning_rate": 0.0005547185911447275,
      "loss": 3.0656,
      "step": 40820
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.536203145980835,
      "learning_rate": 0.0005547164301046175,
      "loss": 3.0802,
      "step": 40821
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5617905855178833,
      "learning_rate": 0.0005547142690171508,
      "loss": 3.0242,
      "step": 40822
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5075863599777222,
      "learning_rate": 0.0005547121078823279,
      "loss": 3.1376,
      "step": 40823
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5610885620117188,
      "learning_rate": 0.0005547099467001494,
      "loss": 3.1011,
      "step": 40824
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4685540199279785,
      "learning_rate": 0.0005547077854706153,
      "loss": 3.0281,
      "step": 40825
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8492591381072998,
      "learning_rate": 0.0005547056241937263,
      "loss": 3.0494,
      "step": 40826
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1732988357543945,
      "learning_rate": 0.0005547034628694827,
      "loss": 2.9903,
      "step": 40827
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5880999565124512,
      "learning_rate": 0.0005547013014978849,
      "loss": 2.923,
      "step": 40828
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.853978157043457,
      "learning_rate": 0.0005546991400789333,
      "loss": 3.0865,
      "step": 40829
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5659898519515991,
      "learning_rate": 0.0005546969786126284,
      "loss": 3.2857,
      "step": 40830
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.434158444404602,
      "learning_rate": 0.0005546948170989704,
      "loss": 3.1836,
      "step": 40831
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8061379194259644,
      "learning_rate": 0.00055469265553796,
      "loss": 3.1334,
      "step": 40832
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6174312829971313,
      "learning_rate": 0.0005546904939295974,
      "loss": 3.0603,
      "step": 40833
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8233990669250488,
      "learning_rate": 0.0005546883322738829,
      "loss": 3.0973,
      "step": 40834
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.457899808883667,
      "learning_rate": 0.0005546861705708171,
      "loss": 2.9133,
      "step": 40835
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7891355752944946,
      "learning_rate": 0.0005546840088204003,
      "loss": 3.1041,
      "step": 40836
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.378493070602417,
      "learning_rate": 0.000554681847022633,
      "loss": 2.97,
      "step": 40837
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6868444681167603,
      "learning_rate": 0.0005546796851775154,
      "loss": 2.9908,
      "step": 40838
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6086184978485107,
      "learning_rate": 0.0005546775232850483,
      "loss": 2.9205,
      "step": 40839
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5413017272949219,
      "learning_rate": 0.0005546753613452315,
      "loss": 2.7373,
      "step": 40840
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7813738584518433,
      "learning_rate": 0.0005546731993580661,
      "loss": 2.9057,
      "step": 40841
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4376033544540405,
      "learning_rate": 0.0005546710373235519,
      "loss": 3.0161,
      "step": 40842
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.809686541557312,
      "learning_rate": 0.0005546688752416896,
      "loss": 3.0902,
      "step": 40843
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4148532152175903,
      "learning_rate": 0.0005546667131124795,
      "loss": 3.0805,
      "step": 40844
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.456205129623413,
      "learning_rate": 0.0005546645509359222,
      "loss": 3.2377,
      "step": 40845
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.161478281021118,
      "learning_rate": 0.0005546623887120178,
      "loss": 3.0647,
      "step": 40846
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4367451667785645,
      "learning_rate": 0.000554660226440767,
      "loss": 3.2308,
      "step": 40847
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8623226881027222,
      "learning_rate": 0.00055465806412217,
      "loss": 2.9192,
      "step": 40848
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4781800508499146,
      "learning_rate": 0.0005546559017562271,
      "loss": 3.1798,
      "step": 40849
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6252518892288208,
      "learning_rate": 0.0005546537393429391,
      "loss": 3.1428,
      "step": 40850
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.55403470993042,
      "learning_rate": 0.000554651576882306,
      "loss": 3.0822,
      "step": 40851
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.393759250640869,
      "learning_rate": 0.0005546494143743284,
      "loss": 2.9513,
      "step": 40852
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.99199640750885,
      "learning_rate": 0.0005546472518190067,
      "loss": 2.9133,
      "step": 40853
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5767749547958374,
      "learning_rate": 0.0005546450892163413,
      "loss": 3.2525,
      "step": 40854
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.911014437675476,
      "learning_rate": 0.0005546429265663326,
      "loss": 3.2827,
      "step": 40855
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.6446056365966797,
      "learning_rate": 0.0005546407638689809,
      "loss": 2.873,
      "step": 40856
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6194005012512207,
      "learning_rate": 0.0005546386011242866,
      "loss": 3.2612,
      "step": 40857
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5060527324676514,
      "learning_rate": 0.0005546364383322503,
      "loss": 3.0865,
      "step": 40858
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.554527997970581,
      "learning_rate": 0.0005546342754928723,
      "loss": 2.9397,
      "step": 40859
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5221070051193237,
      "learning_rate": 0.0005546321126061528,
      "loss": 2.8907,
      "step": 40860
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1586270332336426,
      "learning_rate": 0.0005546299496720925,
      "loss": 2.907,
      "step": 40861
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1262080669403076,
      "learning_rate": 0.0005546277866906917,
      "loss": 2.999,
      "step": 40862
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.842749834060669,
      "learning_rate": 0.0005546256236619509,
      "loss": 3.1643,
      "step": 40863
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3390514850616455,
      "learning_rate": 0.0005546234605858702,
      "loss": 3.0543,
      "step": 40864
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.5517547130584717,
      "learning_rate": 0.0005546212974624503,
      "loss": 3.0787,
      "step": 40865
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4340113401412964,
      "learning_rate": 0.0005546191342916914,
      "loss": 3.0093,
      "step": 40866
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4769337177276611,
      "learning_rate": 0.0005546169710735941,
      "loss": 3.0895,
      "step": 40867
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.1717774868011475,
      "learning_rate": 0.0005546148078081586,
      "loss": 3.3029,
      "step": 40868
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8554329872131348,
      "learning_rate": 0.0005546126444953855,
      "loss": 2.9333,
      "step": 40869
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.592723846435547,
      "learning_rate": 0.000554610481135275,
      "loss": 3.0037,
      "step": 40870
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3220711946487427,
      "learning_rate": 0.0005546083177278277,
      "loss": 3.0283,
      "step": 40871
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4136722087860107,
      "learning_rate": 0.0005546061542730439,
      "loss": 2.9756,
      "step": 40872
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9029269218444824,
      "learning_rate": 0.0005546039907709238,
      "loss": 3.2528,
      "step": 40873
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7390046119689941,
      "learning_rate": 0.0005546018272214682,
      "loss": 3.0904,
      "step": 40874
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.388029932975769,
      "learning_rate": 0.0005545996636246775,
      "loss": 2.8236,
      "step": 40875
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6854277849197388,
      "learning_rate": 0.0005545974999805516,
      "loss": 3.0644,
      "step": 40876
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8790156841278076,
      "learning_rate": 0.0005545953362890914,
      "loss": 2.9424,
      "step": 40877
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7166105508804321,
      "learning_rate": 0.0005545931725502971,
      "loss": 3.3075,
      "step": 40878
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8270883560180664,
      "learning_rate": 0.000554591008764169,
      "loss": 3.0149,
      "step": 40879
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3642959594726562,
      "learning_rate": 0.0005545888449307077,
      "loss": 3.3112,
      "step": 40880
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.2042717933654785,
      "learning_rate": 0.0005545866810499135,
      "loss": 3.0234,
      "step": 40881
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4456686973571777,
      "learning_rate": 0.0005545845171217869,
      "loss": 2.9033,
      "step": 40882
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4891489744186401,
      "learning_rate": 0.0005545823531463282,
      "loss": 2.8922,
      "step": 40883
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.346511125564575,
      "learning_rate": 0.000554580189123538,
      "loss": 3.0004,
      "step": 40884
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.0904383659362793,
      "learning_rate": 0.0005545780250534163,
      "loss": 2.8026,
      "step": 40885
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3522223234176636,
      "learning_rate": 0.0005545758609359638,
      "loss": 2.8783,
      "step": 40886
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.499833345413208,
      "learning_rate": 0.0005545736967711808,
      "loss": 2.9632,
      "step": 40887
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6151684522628784,
      "learning_rate": 0.0005545715325590678,
      "loss": 2.9278,
      "step": 40888
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.760357141494751,
      "learning_rate": 0.0005545693682996252,
      "loss": 3.0562,
      "step": 40889
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.945677399635315,
      "learning_rate": 0.0005545672039928533,
      "loss": 3.2167,
      "step": 40890
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4814015626907349,
      "learning_rate": 0.0005545650396387525,
      "loss": 3.0954,
      "step": 40891
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.0665080547332764,
      "learning_rate": 0.0005545628752373234,
      "loss": 3.1322,
      "step": 40892
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7509939670562744,
      "learning_rate": 0.0005545607107885661,
      "loss": 3.26,
      "step": 40893
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.045128107070923,
      "learning_rate": 0.0005545585462924811,
      "loss": 3.3302,
      "step": 40894
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.2479419708251953,
      "learning_rate": 0.000554556381749069,
      "loss": 3.0377,
      "step": 40895
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.709488868713379,
      "learning_rate": 0.0005545542171583301,
      "loss": 3.1656,
      "step": 40896
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1235013008117676,
      "learning_rate": 0.0005545520525202648,
      "loss": 3.2745,
      "step": 40897
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5732218027114868,
      "learning_rate": 0.0005545498878348733,
      "loss": 2.9907,
      "step": 40898
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.598793864250183,
      "learning_rate": 0.0005545477231021562,
      "loss": 3.2388,
      "step": 40899
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.900926947593689,
      "learning_rate": 0.000554545558322114,
      "loss": 3.0048,
      "step": 40900
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5112305879592896,
      "learning_rate": 0.0005545433934947469,
      "loss": 2.8128,
      "step": 40901
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7214105129241943,
      "learning_rate": 0.0005545412286200554,
      "loss": 3.0483,
      "step": 40902
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2727402448654175,
      "learning_rate": 0.0005545390636980397,
      "loss": 3.01,
      "step": 40903
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.1405162811279297,
      "learning_rate": 0.0005545368987287006,
      "loss": 3.1914,
      "step": 40904
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7357633113861084,
      "learning_rate": 0.0005545347337120381,
      "loss": 3.041,
      "step": 40905
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6530994176864624,
      "learning_rate": 0.0005545325686480531,
      "loss": 3.0886,
      "step": 40906
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6767988204956055,
      "learning_rate": 0.0005545304035367455,
      "loss": 2.9916,
      "step": 40907
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.2733378410339355,
      "learning_rate": 0.0005545282383781158,
      "loss": 3.2853,
      "step": 40908
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5568019151687622,
      "learning_rate": 0.0005545260731721646,
      "loss": 2.8417,
      "step": 40909
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7353042364120483,
      "learning_rate": 0.0005545239079188922,
      "loss": 2.8493,
      "step": 40910
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5659961700439453,
      "learning_rate": 0.000554521742618299,
      "loss": 3.1465,
      "step": 40911
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.490617036819458,
      "learning_rate": 0.0005545195772703854,
      "loss": 2.8945,
      "step": 40912
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4921464920043945,
      "learning_rate": 0.0005545174118751517,
      "loss": 2.7764,
      "step": 40913
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7383179664611816,
      "learning_rate": 0.0005545152464325986,
      "loss": 3.1109,
      "step": 40914
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6350550651550293,
      "learning_rate": 0.0005545130809427262,
      "loss": 3.3389,
      "step": 40915
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6475120782852173,
      "learning_rate": 0.000554510915405535,
      "loss": 3.0395,
      "step": 40916
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.312568187713623,
      "learning_rate": 0.0005545087498210255,
      "loss": 2.9357,
      "step": 40917
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.505329966545105,
      "learning_rate": 0.000554506584189198,
      "loss": 2.9417,
      "step": 40918
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7063448429107666,
      "learning_rate": 0.0005545044185100528,
      "loss": 2.8334,
      "step": 40919
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3541334867477417,
      "learning_rate": 0.0005545022527835906,
      "loss": 3.0623,
      "step": 40920
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.030911684036255,
      "learning_rate": 0.0005545000870098116,
      "loss": 3.0769,
      "step": 40921
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5623137950897217,
      "learning_rate": 0.0005544979211887162,
      "loss": 2.9178,
      "step": 40922
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4144201278686523,
      "learning_rate": 0.0005544957553203047,
      "loss": 2.867,
      "step": 40923
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.6028993129730225,
      "learning_rate": 0.0005544935894045777,
      "loss": 2.9403,
      "step": 40924
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6569931507110596,
      "learning_rate": 0.0005544914234415356,
      "loss": 2.9713,
      "step": 40925
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2326269149780273,
      "learning_rate": 0.0005544892574311787,
      "loss": 3.1632,
      "step": 40926
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.614511251449585,
      "learning_rate": 0.0005544870913735075,
      "loss": 3.1306,
      "step": 40927
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.064284563064575,
      "learning_rate": 0.0005544849252685222,
      "loss": 3.0683,
      "step": 40928
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8531103134155273,
      "learning_rate": 0.0005544827591162236,
      "loss": 3.0731,
      "step": 40929
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4517327547073364,
      "learning_rate": 0.0005544805929166117,
      "loss": 3.3363,
      "step": 40930
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1036787033081055,
      "learning_rate": 0.000554478426669687,
      "loss": 2.9127,
      "step": 40931
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.029402494430542,
      "learning_rate": 0.00055447626037545,
      "loss": 3.2268,
      "step": 40932
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.0032119750976562,
      "learning_rate": 0.0005544740940339011,
      "loss": 3.0765,
      "step": 40933
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.3191723823547363,
      "learning_rate": 0.0005544719276450406,
      "loss": 2.8697,
      "step": 40934
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.8001394271850586,
      "learning_rate": 0.0005544697612088691,
      "loss": 2.8111,
      "step": 40935
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.427971363067627,
      "learning_rate": 0.0005544675947253868,
      "loss": 2.7312,
      "step": 40936
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.292160153388977,
      "learning_rate": 0.0005544654281945941,
      "loss": 2.7184,
      "step": 40937
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.8007090091705322,
      "learning_rate": 0.0005544632616164914,
      "loss": 3.0844,
      "step": 40938
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.411324977874756,
      "learning_rate": 0.0005544610949910794,
      "loss": 3.3442,
      "step": 40939
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7067880630493164,
      "learning_rate": 0.0005544589283183581,
      "loss": 3.1024,
      "step": 40940
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.749892234802246,
      "learning_rate": 0.0005544567615983282,
      "loss": 2.8988,
      "step": 40941
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.9635701179504395,
      "learning_rate": 0.0005544545948309899,
      "loss": 3.1861,
      "step": 40942
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7078015804290771,
      "learning_rate": 0.0005544524280163438,
      "loss": 3.0545,
      "step": 40943
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9193165302276611,
      "learning_rate": 0.0005544502611543901,
      "loss": 2.9435,
      "step": 40944
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.616196393966675,
      "learning_rate": 0.0005544480942451293,
      "loss": 2.8167,
      "step": 40945
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9837939739227295,
      "learning_rate": 0.0005544459272885618,
      "loss": 3.1949,
      "step": 40946
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8255529403686523,
      "learning_rate": 0.000554443760284688,
      "loss": 3.2099,
      "step": 40947
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8059463500976562,
      "learning_rate": 0.0005544415932335084,
      "loss": 2.7641,
      "step": 40948
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.492985486984253,
      "learning_rate": 0.0005544394261350232,
      "loss": 2.9246,
      "step": 40949
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3639565706253052,
      "learning_rate": 0.0005544372589892329,
      "loss": 3.0094,
      "step": 40950
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4526287317276,
      "learning_rate": 0.0005544350917961379,
      "loss": 3.2398,
      "step": 40951
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.796251058578491,
      "learning_rate": 0.0005544329245557386,
      "loss": 2.992,
      "step": 40952
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4047406911849976,
      "learning_rate": 0.0005544307572680355,
      "loss": 2.7134,
      "step": 40953
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.006948709487915,
      "learning_rate": 0.0005544285899330289,
      "loss": 2.9721,
      "step": 40954
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5125240087509155,
      "learning_rate": 0.0005544264225507191,
      "loss": 3.2428,
      "step": 40955
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7196203470230103,
      "learning_rate": 0.0005544242551211068,
      "loss": 3.1799,
      "step": 40956
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6234288215637207,
      "learning_rate": 0.0005544220876441922,
      "loss": 2.9882,
      "step": 40957
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4747012853622437,
      "learning_rate": 0.0005544199201199757,
      "loss": 3.0049,
      "step": 40958
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.419988751411438,
      "learning_rate": 0.0005544177525484577,
      "loss": 2.8911,
      "step": 40959
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3399436473846436,
      "learning_rate": 0.0005544155849296386,
      "loss": 3.2254,
      "step": 40960
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3806763887405396,
      "learning_rate": 0.0005544134172635189,
      "loss": 2.9072,
      "step": 40961
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5402566194534302,
      "learning_rate": 0.000554411249550099,
      "loss": 2.9729,
      "step": 40962
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1677920818328857,
      "learning_rate": 0.0005544090817893792,
      "loss": 3.2681,
      "step": 40963
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5976377725601196,
      "learning_rate": 0.0005544069139813599,
      "loss": 3.0766,
      "step": 40964
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3941078186035156,
      "learning_rate": 0.0005544047461260416,
      "loss": 3.0096,
      "step": 40965
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6523014307022095,
      "learning_rate": 0.0005544025782234247,
      "loss": 3.0822,
      "step": 40966
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5822981595993042,
      "learning_rate": 0.0005544004102735094,
      "loss": 3.0318,
      "step": 40967
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7711814641952515,
      "learning_rate": 0.0005543982422762964,
      "loss": 3.1242,
      "step": 40968
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5669324398040771,
      "learning_rate": 0.0005543960742317859,
      "loss": 3.1113,
      "step": 40969
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6998200416564941,
      "learning_rate": 0.0005543939061399784,
      "loss": 3.0607,
      "step": 40970
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.586524248123169,
      "learning_rate": 0.0005543917380008743,
      "loss": 2.9504,
      "step": 40971
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7919105291366577,
      "learning_rate": 0.000554389569814474,
      "loss": 2.8841,
      "step": 40972
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2902532815933228,
      "learning_rate": 0.0005543874015807778,
      "loss": 3.3986,
      "step": 40973
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8672969341278076,
      "learning_rate": 0.0005543852332997862,
      "loss": 2.8188,
      "step": 40974
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.396041750907898,
      "learning_rate": 0.0005543830649714996,
      "loss": 2.9463,
      "step": 40975
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.864349603652954,
      "learning_rate": 0.0005543808965959183,
      "loss": 2.8323,
      "step": 40976
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9838999509811401,
      "learning_rate": 0.0005543787281730429,
      "loss": 3.0752,
      "step": 40977
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9505202770233154,
      "learning_rate": 0.0005543765597028737,
      "loss": 3.0845,
      "step": 40978
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2587604522705078,
      "learning_rate": 0.000554374391185411,
      "loss": 3.1025,
      "step": 40979
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5170913934707642,
      "learning_rate": 0.0005543722226206553,
      "loss": 3.2888,
      "step": 40980
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3216537237167358,
      "learning_rate": 0.0005543700540086071,
      "loss": 3.1047,
      "step": 40981
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1239922046661377,
      "learning_rate": 0.0005543678853492666,
      "loss": 3.1077,
      "step": 40982
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3423089981079102,
      "learning_rate": 0.0005543657166426345,
      "loss": 2.9007,
      "step": 40983
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5200870037078857,
      "learning_rate": 0.0005543635478887108,
      "loss": 3.0054,
      "step": 40984
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3773876428604126,
      "learning_rate": 0.0005543613790874963,
      "loss": 2.8804,
      "step": 40985
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4956895112991333,
      "learning_rate": 0.0005543592102389911,
      "loss": 3.0309,
      "step": 40986
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3176250457763672,
      "learning_rate": 0.0005543570413431956,
      "loss": 2.787,
      "step": 40987
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3057307004928589,
      "learning_rate": 0.0005543548724001104,
      "loss": 3.2169,
      "step": 40988
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7412148714065552,
      "learning_rate": 0.000554352703409736,
      "loss": 2.9318,
      "step": 40989
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3217014074325562,
      "learning_rate": 0.0005543505343720725,
      "loss": 3.2169,
      "step": 40990
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5146450996398926,
      "learning_rate": 0.0005543483652871204,
      "loss": 3.1869,
      "step": 40991
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5125733613967896,
      "learning_rate": 0.0005543461961548803,
      "loss": 3.1099,
      "step": 40992
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8104320764541626,
      "learning_rate": 0.0005543440269753522,
      "loss": 2.7684,
      "step": 40993
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2881567478179932,
      "learning_rate": 0.0005543418577485369,
      "loss": 3.0803,
      "step": 40994
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.957679271697998,
      "learning_rate": 0.0005543396884744346,
      "loss": 2.9113,
      "step": 40995
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6907775402069092,
      "learning_rate": 0.0005543375191530458,
      "loss": 3.08,
      "step": 40996
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7377771139144897,
      "learning_rate": 0.0005543353497843708,
      "loss": 3.1476,
      "step": 40997
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7252856492996216,
      "learning_rate": 0.00055433318036841,
      "loss": 3.018,
      "step": 40998
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.388210654258728,
      "learning_rate": 0.0005543310109051641,
      "loss": 3.2243,
      "step": 40999
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4034422636032104,
      "learning_rate": 0.0005543288413946331,
      "loss": 3.0786,
      "step": 41000
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.015294075012207,
      "learning_rate": 0.0005543266718368176,
      "loss": 3.0631,
      "step": 41001
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4793462753295898,
      "learning_rate": 0.0005543245022317179,
      "loss": 3.2247,
      "step": 41002
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5176422595977783,
      "learning_rate": 0.0005543223325793345,
      "loss": 2.7413,
      "step": 41003
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.509838819503784,
      "learning_rate": 0.0005543201628796678,
      "loss": 3.2102,
      "step": 41004
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.485493540763855,
      "learning_rate": 0.0005543179931327181,
      "loss": 3.0475,
      "step": 41005
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5557777881622314,
      "learning_rate": 0.000554315823338486,
      "loss": 3.1045,
      "step": 41006
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7788535356521606,
      "learning_rate": 0.0005543136534969718,
      "loss": 3.2511,
      "step": 41007
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5147786140441895,
      "learning_rate": 0.0005543114836081758,
      "loss": 3.1622,
      "step": 41008
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7400367259979248,
      "learning_rate": 0.0005543093136720985,
      "loss": 2.928,
      "step": 41009
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5509899854660034,
      "learning_rate": 0.0005543071436887404,
      "loss": 2.8623,
      "step": 41010
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7863041162490845,
      "learning_rate": 0.0005543049736581017,
      "loss": 3.0037,
      "step": 41011
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7321761846542358,
      "learning_rate": 0.000554302803580183,
      "loss": 3.119,
      "step": 41012
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5314677953720093,
      "learning_rate": 0.0005543006334549845,
      "loss": 2.9024,
      "step": 41013
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.429017424583435,
      "learning_rate": 0.0005542984632825068,
      "loss": 3.0051,
      "step": 41014
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7037981748580933,
      "learning_rate": 0.0005542962930627502,
      "loss": 3.006,
      "step": 41015
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5362590551376343,
      "learning_rate": 0.0005542941227957151,
      "loss": 3.2997,
      "step": 41016
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2824324369430542,
      "learning_rate": 0.0005542919524814019,
      "loss": 3.1748,
      "step": 41017
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4274646043777466,
      "learning_rate": 0.000554289782119811,
      "loss": 2.8789,
      "step": 41018
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4306589365005493,
      "learning_rate": 0.000554287611710943,
      "loss": 3.0679,
      "step": 41019
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.2015082836151123,
      "learning_rate": 0.000554285441254798,
      "loss": 2.9856,
      "step": 41020
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4419770240783691,
      "learning_rate": 0.0005542832707513764,
      "loss": 3.1132,
      "step": 41021
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4889087677001953,
      "learning_rate": 0.0005542811002006789,
      "loss": 2.975,
      "step": 41022
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.1344568729400635,
      "learning_rate": 0.0005542789296027057,
      "loss": 3.1184,
      "step": 41023
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.669466018676758,
      "learning_rate": 0.0005542767589574573,
      "loss": 3.1336,
      "step": 41024
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4984705448150635,
      "learning_rate": 0.000554274588264934,
      "loss": 3.4303,
      "step": 41025
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.0122618675231934,
      "learning_rate": 0.0005542724175251363,
      "loss": 3.1556,
      "step": 41026
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.0015549659729004,
      "learning_rate": 0.0005542702467380646,
      "loss": 3.0152,
      "step": 41027
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7926734685897827,
      "learning_rate": 0.0005542680759037192,
      "loss": 2.9759,
      "step": 41028
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4784449338912964,
      "learning_rate": 0.0005542659050221005,
      "loss": 3.1487,
      "step": 41029
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8794689178466797,
      "learning_rate": 0.0005542637340932091,
      "loss": 3.2343,
      "step": 41030
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.484512209892273,
      "learning_rate": 0.0005542615631170452,
      "loss": 3.2197,
      "step": 41031
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7330511808395386,
      "learning_rate": 0.0005542593920936092,
      "loss": 2.8952,
      "step": 41032
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.0915398597717285,
      "learning_rate": 0.0005542572210229018,
      "loss": 3.1807,
      "step": 41033
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.728817343711853,
      "learning_rate": 0.000554255049904923,
      "loss": 2.9526,
      "step": 41034
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4746509790420532,
      "learning_rate": 0.0005542528787396733,
      "loss": 3.0539,
      "step": 41035
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.9026637077331543,
      "learning_rate": 0.0005542507075271533,
      "loss": 2.9313,
      "step": 41036
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.0820424556732178,
      "learning_rate": 0.0005542485362673634,
      "loss": 2.7766,
      "step": 41037
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4829763174057007,
      "learning_rate": 0.0005542463649603038,
      "loss": 3.2794,
      "step": 41038
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.660138726234436,
      "learning_rate": 0.000554244193605975,
      "loss": 2.9473,
      "step": 41039
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.37611985206604,
      "learning_rate": 0.0005542420222043775,
      "loss": 3.3209,
      "step": 41040
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.2673606872558594,
      "learning_rate": 0.0005542398507555115,
      "loss": 3.1355,
      "step": 41041
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.4596312046051025,
      "learning_rate": 0.0005542376792593775,
      "loss": 2.9653,
      "step": 41042
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5930248498916626,
      "learning_rate": 0.000554235507715976,
      "loss": 3.025,
      "step": 41043
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.783790111541748,
      "learning_rate": 0.0005542333361253072,
      "loss": 3.0322,
      "step": 41044
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.0608811378479004,
      "learning_rate": 0.0005542311644873717,
      "loss": 3.2147,
      "step": 41045
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6158860921859741,
      "learning_rate": 0.0005542289928021698,
      "loss": 3.0704,
      "step": 41046
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7768261432647705,
      "learning_rate": 0.000554226821069702,
      "loss": 3.0182,
      "step": 41047
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7269480228424072,
      "learning_rate": 0.0005542246492899687,
      "loss": 3.1585,
      "step": 41048
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4890291690826416,
      "learning_rate": 0.00055422247746297,
      "loss": 3.3278,
      "step": 41049
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6040600538253784,
      "learning_rate": 0.0005542203055887067,
      "loss": 3.0422,
      "step": 41050
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6479085683822632,
      "learning_rate": 0.0005542181336671791,
      "loss": 2.8803,
      "step": 41051
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.822167992591858,
      "learning_rate": 0.0005542159616983874,
      "loss": 3.0145,
      "step": 41052
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.392258644104004,
      "learning_rate": 0.0005542137896823322,
      "loss": 2.8209,
      "step": 41053
    },
    {
      "epoch": 0.53,
      "grad_norm": 3.3963770866394043,
      "learning_rate": 0.000554211617619014,
      "loss": 2.8029,
      "step": 41054
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.5418643951416016,
      "learning_rate": 0.0005542094455084328,
      "loss": 3.1711,
      "step": 41055
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4128575325012207,
      "learning_rate": 0.0005542072733505895,
      "loss": 3.2093,
      "step": 41056
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8741921186447144,
      "learning_rate": 0.0005542051011454841,
      "loss": 3.099,
      "step": 41057
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.40586256980896,
      "learning_rate": 0.0005542029288931173,
      "loss": 3.0764,
      "step": 41058
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.8513013124465942,
      "learning_rate": 0.0005542007565934894,
      "loss": 3.1287,
      "step": 41059
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.235876202583313,
      "learning_rate": 0.0005541985842466006,
      "loss": 3.0137,
      "step": 41060
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.583801507949829,
      "learning_rate": 0.0005541964118524516,
      "loss": 3.0006,
      "step": 41061
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5489259958267212,
      "learning_rate": 0.0005541942394110428,
      "loss": 3.009,
      "step": 41062
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3048025369644165,
      "learning_rate": 0.0005541920669223743,
      "loss": 2.9386,
      "step": 41063
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4561457633972168,
      "learning_rate": 0.0005541898943864468,
      "loss": 3.023,
      "step": 41064
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.596002221107483,
      "learning_rate": 0.0005541877218032606,
      "loss": 2.8626,
      "step": 41065
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5030802488327026,
      "learning_rate": 0.0005541855491728161,
      "loss": 2.9583,
      "step": 41066
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3266545534133911,
      "learning_rate": 0.0005541833764951136,
      "loss": 2.8114,
      "step": 41067
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4541937112808228,
      "learning_rate": 0.0005541812037701538,
      "loss": 3.3025,
      "step": 41068
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6023722887039185,
      "learning_rate": 0.0005541790309979368,
      "loss": 3.0541,
      "step": 41069
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.6856062412261963,
      "learning_rate": 0.0005541768581784632,
      "loss": 3.0626,
      "step": 41070
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3896269798278809,
      "learning_rate": 0.0005541746853117331,
      "loss": 3.1953,
      "step": 41071
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4296895265579224,
      "learning_rate": 0.0005541725123977473,
      "loss": 2.8727,
      "step": 41072
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.0361785888671875,
      "learning_rate": 0.0005541703394365061,
      "loss": 3.142,
      "step": 41073
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3081244230270386,
      "learning_rate": 0.0005541681664280097,
      "loss": 3.1217,
      "step": 41074
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4648728370666504,
      "learning_rate": 0.0005541659933722587,
      "loss": 3.3674,
      "step": 41075
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.2883769273757935,
      "learning_rate": 0.0005541638202692534,
      "loss": 2.9503,
      "step": 41076
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7753467559814453,
      "learning_rate": 0.0005541616471189943,
      "loss": 2.9238,
      "step": 41077
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4968984127044678,
      "learning_rate": 0.0005541594739214818,
      "loss": 3.1186,
      "step": 41078
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.5790992975234985,
      "learning_rate": 0.0005541573006767162,
      "loss": 3.0124,
      "step": 41079
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.3883413076400757,
      "learning_rate": 0.0005541551273846979,
      "loss": 2.8929,
      "step": 41080
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.654960036277771,
      "learning_rate": 0.0005541529540454274,
      "loss": 3.329,
      "step": 41081
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6721521615982056,
      "learning_rate": 0.0005541507806589051,
      "loss": 2.978,
      "step": 41082
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.7310419082641602,
      "learning_rate": 0.0005541486072251314,
      "loss": 2.9987,
      "step": 41083
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.0563578605651855,
      "learning_rate": 0.0005541464337441066,
      "loss": 3.1056,
      "step": 41084
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4783607721328735,
      "learning_rate": 0.0005541442602158312,
      "loss": 2.9719,
      "step": 41085
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6422053575515747,
      "learning_rate": 0.0005541420866403057,
      "loss": 2.9786,
      "step": 41086
    },
    {
      "epoch": 0.53,
      "grad_norm": 2.4102725982666016,
      "learning_rate": 0.0005541399130175302,
      "loss": 3.0781,
      "step": 41087
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.6392439603805542,
      "learning_rate": 0.0005541377393475054,
      "loss": 3.0539,
      "step": 41088
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.620272159576416,
      "learning_rate": 0.0005541355656302317,
      "loss": 3.0435,
      "step": 41089
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6744940280914307,
      "learning_rate": 0.0005541333918657092,
      "loss": 2.9541,
      "step": 41090
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.505303978919983,
      "learning_rate": 0.0005541312180539385,
      "loss": 2.972,
      "step": 41091
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.658457636833191,
      "learning_rate": 0.0005541290441949202,
      "loss": 3.0281,
      "step": 41092
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6151398420333862,
      "learning_rate": 0.0005541268702886543,
      "loss": 3.0175,
      "step": 41093
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6976337432861328,
      "learning_rate": 0.0005541246963351417,
      "loss": 3.1143,
      "step": 41094
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3992329835891724,
      "learning_rate": 0.0005541225223343822,
      "loss": 2.8825,
      "step": 41095
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.3503499031066895,
      "learning_rate": 0.0005541203482863767,
      "loss": 2.9203,
      "step": 41096
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.599545955657959,
      "learning_rate": 0.0005541181741911254,
      "loss": 3.0324,
      "step": 41097
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.739937424659729,
      "learning_rate": 0.0005541160000486289,
      "loss": 2.9436,
      "step": 41098
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3734794855117798,
      "learning_rate": 0.0005541138258588872,
      "loss": 2.9214,
      "step": 41099
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7806739807128906,
      "learning_rate": 0.000554111651621901,
      "loss": 3.0694,
      "step": 41100
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7060410976409912,
      "learning_rate": 0.0005541094773376707,
      "loss": 2.8978,
      "step": 41101
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.777848958969116,
      "learning_rate": 0.0005541073030061966,
      "loss": 3.0551,
      "step": 41102
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.1959433555603027,
      "learning_rate": 0.0005541051286274792,
      "loss": 3.082,
      "step": 41103
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.4858510494232178,
      "learning_rate": 0.0005541029542015188,
      "loss": 3.284,
      "step": 41104
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.3959832191467285,
      "learning_rate": 0.000554100779728316,
      "loss": 3.1367,
      "step": 41105
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9479957818984985,
      "learning_rate": 0.0005540986052078708,
      "loss": 2.8815,
      "step": 41106
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7042092084884644,
      "learning_rate": 0.0005540964306401841,
      "loss": 3.1926,
      "step": 41107
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8598171472549438,
      "learning_rate": 0.0005540942560252561,
      "loss": 3.0947,
      "step": 41108
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0458405017852783,
      "learning_rate": 0.0005540920813630871,
      "loss": 3.1611,
      "step": 41109
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8791078329086304,
      "learning_rate": 0.0005540899066536775,
      "loss": 2.7508,
      "step": 41110
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4325731992721558,
      "learning_rate": 0.0005540877318970279,
      "loss": 2.8461,
      "step": 41111
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.17457914352417,
      "learning_rate": 0.0005540855570931386,
      "loss": 3.195,
      "step": 41112
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4096875190734863,
      "learning_rate": 0.00055408338224201,
      "loss": 3.1626,
      "step": 41113
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3762754201889038,
      "learning_rate": 0.0005540812073436425,
      "loss": 3.2613,
      "step": 41114
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.616096019744873,
      "learning_rate": 0.0005540790323980365,
      "loss": 2.8217,
      "step": 41115
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4314334392547607,
      "learning_rate": 0.0005540768574051923,
      "loss": 2.9787,
      "step": 41116
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.627575159072876,
      "learning_rate": 0.0005540746823651106,
      "loss": 3.0868,
      "step": 41117
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.471562385559082,
      "learning_rate": 0.0005540725072777915,
      "loss": 3.1706,
      "step": 41118
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6111037731170654,
      "learning_rate": 0.0005540703321432355,
      "loss": 2.9548,
      "step": 41119
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5534249544143677,
      "learning_rate": 0.0005540681569614431,
      "loss": 3.3234,
      "step": 41120
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6848138570785522,
      "learning_rate": 0.0005540659817324147,
      "loss": 3.2589,
      "step": 41121
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6929017305374146,
      "learning_rate": 0.0005540638064561505,
      "loss": 2.8777,
      "step": 41122
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5846619606018066,
      "learning_rate": 0.000554061631132651,
      "loss": 3.0237,
      "step": 41123
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7341930866241455,
      "learning_rate": 0.0005540594557619168,
      "loss": 3.1956,
      "step": 41124
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.615452766418457,
      "learning_rate": 0.0005540572803439481,
      "loss": 3.1383,
      "step": 41125
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.1910412311553955,
      "learning_rate": 0.0005540551048787454,
      "loss": 2.9568,
      "step": 41126
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5076109170913696,
      "learning_rate": 0.000554052929366309,
      "loss": 3.0193,
      "step": 41127
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.490203857421875,
      "learning_rate": 0.0005540507538066393,
      "loss": 3.185,
      "step": 41128
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.304930567741394,
      "learning_rate": 0.0005540485781997369,
      "loss": 3.0425,
      "step": 41129
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4781677722930908,
      "learning_rate": 0.0005540464025456021,
      "loss": 2.8863,
      "step": 41130
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6998792886734009,
      "learning_rate": 0.0005540442268442351,
      "loss": 3.1358,
      "step": 41131
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4958871603012085,
      "learning_rate": 0.0005540420510956367,
      "loss": 3.0828,
      "step": 41132
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.76140558719635,
      "learning_rate": 0.0005540398752998068,
      "loss": 3.1652,
      "step": 41133
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8063054084777832,
      "learning_rate": 0.0005540376994567463,
      "loss": 2.8958,
      "step": 41134
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.338550567626953,
      "learning_rate": 0.0005540355235664554,
      "loss": 2.8805,
      "step": 41135
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.060375690460205,
      "learning_rate": 0.0005540333476289344,
      "loss": 2.8237,
      "step": 41136
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.1775906085968018,
      "learning_rate": 0.000554031171644184,
      "loss": 3.0956,
      "step": 41137
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.622409462928772,
      "learning_rate": 0.0005540289956122041,
      "loss": 3.0232,
      "step": 41138
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2758758068084717,
      "learning_rate": 0.0005540268195329957,
      "loss": 2.9068,
      "step": 41139
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.393371820449829,
      "learning_rate": 0.0005540246434065588,
      "loss": 2.9654,
      "step": 41140
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5365811586380005,
      "learning_rate": 0.0005540224672328939,
      "loss": 3.1876,
      "step": 41141
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3412617444992065,
      "learning_rate": 0.0005540202910120015,
      "loss": 3.207,
      "step": 41142
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.4601047039031982,
      "learning_rate": 0.0005540181147438819,
      "loss": 3.2784,
      "step": 41143
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8203623294830322,
      "learning_rate": 0.0005540159384285354,
      "loss": 3.029,
      "step": 41144
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9639900922775269,
      "learning_rate": 0.0005540137620659628,
      "loss": 3.0939,
      "step": 41145
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7321878671646118,
      "learning_rate": 0.0005540115856561642,
      "loss": 2.988,
      "step": 41146
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.5629522800445557,
      "learning_rate": 0.0005540094091991399,
      "loss": 3.3017,
      "step": 41147
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.0255095958709717,
      "learning_rate": 0.0005540072326948905,
      "loss": 2.961,
      "step": 41148
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2212791442871094,
      "learning_rate": 0.0005540050561434164,
      "loss": 3.1382,
      "step": 41149
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.7974178791046143,
      "learning_rate": 0.0005540028795447179,
      "loss": 3.1671,
      "step": 41150
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.038266181945801,
      "learning_rate": 0.0005540007028987955,
      "loss": 2.7887,
      "step": 41151
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6697299480438232,
      "learning_rate": 0.0005539985262056497,
      "loss": 3.2301,
      "step": 41152
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.633614182472229,
      "learning_rate": 0.0005539963494652807,
      "loss": 3.0754,
      "step": 41153
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8104454278945923,
      "learning_rate": 0.0005539941726776889,
      "loss": 3.1143,
      "step": 41154
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3249483108520508,
      "learning_rate": 0.0005539919958428748,
      "loss": 3.414,
      "step": 41155
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.374586820602417,
      "learning_rate": 0.0005539898189608389,
      "loss": 3.0574,
      "step": 41156
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.414440631866455,
      "learning_rate": 0.0005539876420315815,
      "loss": 2.9325,
      "step": 41157
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.235561728477478,
      "learning_rate": 0.0005539854650551029,
      "loss": 2.9521,
      "step": 41158
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.1477513313293457,
      "learning_rate": 0.0005539832880314036,
      "loss": 3.3255,
      "step": 41159
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.677779197692871,
      "learning_rate": 0.0005539811109604841,
      "loss": 3.2764,
      "step": 41160
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4380755424499512,
      "learning_rate": 0.0005539789338423447,
      "loss": 2.99,
      "step": 41161
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6925314664840698,
      "learning_rate": 0.0005539767566769858,
      "loss": 2.9027,
      "step": 41162
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.3082003593444824,
      "learning_rate": 0.0005539745794644078,
      "loss": 3.011,
      "step": 41163
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.265613555908203,
      "learning_rate": 0.0005539724022046112,
      "loss": 2.8161,
      "step": 41164
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8810644149780273,
      "learning_rate": 0.0005539702248975963,
      "loss": 2.8993,
      "step": 41165
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8524320125579834,
      "learning_rate": 0.0005539680475433635,
      "loss": 2.9692,
      "step": 41166
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5342435836791992,
      "learning_rate": 0.0005539658701419133,
      "loss": 2.8785,
      "step": 41167
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.510411262512207,
      "learning_rate": 0.0005539636926932461,
      "loss": 3.0523,
      "step": 41168
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5497850179672241,
      "learning_rate": 0.0005539615151973622,
      "loss": 3.0989,
      "step": 41169
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8043028116226196,
      "learning_rate": 0.000553959337654262,
      "loss": 3.1041,
      "step": 41170
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4971896409988403,
      "learning_rate": 0.0005539571600639459,
      "loss": 3.0992,
      "step": 41171
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5640887022018433,
      "learning_rate": 0.0005539549824264146,
      "loss": 3.1118,
      "step": 41172
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9354846477508545,
      "learning_rate": 0.0005539528047416682,
      "loss": 3.0381,
      "step": 41173
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6807183027267456,
      "learning_rate": 0.0005539506270097072,
      "loss": 3.165,
      "step": 41174
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.011415958404541,
      "learning_rate": 0.0005539484492305319,
      "loss": 3.0675,
      "step": 41175
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.5432097911834717,
      "learning_rate": 0.0005539462714041428,
      "loss": 3.1243,
      "step": 41176
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8638540506362915,
      "learning_rate": 0.0005539440935305402,
      "loss": 3.0234,
      "step": 41177
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.530143141746521,
      "learning_rate": 0.0005539419156097247,
      "loss": 3.1266,
      "step": 41178
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9901776313781738,
      "learning_rate": 0.0005539397376416966,
      "loss": 3.1877,
      "step": 41179
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9302462339401245,
      "learning_rate": 0.0005539375596264564,
      "loss": 3.0604,
      "step": 41180
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6251311302185059,
      "learning_rate": 0.0005539353815640043,
      "loss": 3.0325,
      "step": 41181
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.455849051475525,
      "learning_rate": 0.0005539332034543409,
      "loss": 3.149,
      "step": 41182
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.284064769744873,
      "learning_rate": 0.0005539310252974664,
      "loss": 3.0226,
      "step": 41183
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6203283071517944,
      "learning_rate": 0.0005539288470933814,
      "loss": 2.7932,
      "step": 41184
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3962478637695312,
      "learning_rate": 0.0005539266688420864,
      "loss": 3.1488,
      "step": 41185
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.594657301902771,
      "learning_rate": 0.0005539244905435814,
      "loss": 2.9474,
      "step": 41186
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.6723408699035645,
      "learning_rate": 0.0005539223121978672,
      "loss": 3.1383,
      "step": 41187
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4397143125534058,
      "learning_rate": 0.0005539201338049439,
      "loss": 3.0053,
      "step": 41188
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.354158878326416,
      "learning_rate": 0.0005539179553648121,
      "loss": 3.1051,
      "step": 41189
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.038076639175415,
      "learning_rate": 0.0005539157768774722,
      "loss": 3.1202,
      "step": 41190
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2377634048461914,
      "learning_rate": 0.0005539135983429246,
      "loss": 3.2005,
      "step": 41191
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3164474964141846,
      "learning_rate": 0.0005539114197611696,
      "loss": 3.0391,
      "step": 41192
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.307800054550171,
      "learning_rate": 0.0005539092411322077,
      "loss": 2.8654,
      "step": 41193
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.975147008895874,
      "learning_rate": 0.0005539070624560393,
      "loss": 2.9503,
      "step": 41194
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4352866411209106,
      "learning_rate": 0.0005539048837326648,
      "loss": 3.1717,
      "step": 41195
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7748386859893799,
      "learning_rate": 0.0005539027049620845,
      "loss": 3.0211,
      "step": 41196
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.4014668464660645,
      "learning_rate": 0.000553900526144299,
      "loss": 3.0879,
      "step": 41197
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5809285640716553,
      "learning_rate": 0.0005538983472793084,
      "loss": 3.1294,
      "step": 41198
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8079564571380615,
      "learning_rate": 0.0005538961683671136,
      "loss": 2.9369,
      "step": 41199
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7382112741470337,
      "learning_rate": 0.0005538939894077145,
      "loss": 3.0054,
      "step": 41200
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8836719989776611,
      "learning_rate": 0.0005538918104011118,
      "loss": 3.0987,
      "step": 41201
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3710688352584839,
      "learning_rate": 0.0005538896313473058,
      "loss": 3.1457,
      "step": 41202
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5267243385314941,
      "learning_rate": 0.0005538874522462969,
      "loss": 3.1601,
      "step": 41203
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.1669845581054688,
      "learning_rate": 0.0005538852730980855,
      "loss": 3.2808,
      "step": 41204
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.062415599822998,
      "learning_rate": 0.0005538830939026721,
      "loss": 2.9619,
      "step": 41205
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.696770191192627,
      "learning_rate": 0.000553880914660057,
      "loss": 2.9336,
      "step": 41206
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5195746421813965,
      "learning_rate": 0.0005538787353702408,
      "loss": 3.1112,
      "step": 41207
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4631357192993164,
      "learning_rate": 0.0005538765560332235,
      "loss": 3.197,
      "step": 41208
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3752731084823608,
      "learning_rate": 0.0005538743766490059,
      "loss": 3.0218,
      "step": 41209
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5398858785629272,
      "learning_rate": 0.0005538721972175882,
      "loss": 3.1328,
      "step": 41210
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9466276168823242,
      "learning_rate": 0.0005538700177389709,
      "loss": 3.4369,
      "step": 41211
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4150340557098389,
      "learning_rate": 0.0005538678382131545,
      "loss": 3.0701,
      "step": 41212
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.3355209827423096,
      "learning_rate": 0.0005538656586401391,
      "loss": 2.8651,
      "step": 41213
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6379530429840088,
      "learning_rate": 0.0005538634790199253,
      "loss": 3.1116,
      "step": 41214
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6031912565231323,
      "learning_rate": 0.0005538612993525136,
      "loss": 2.9318,
      "step": 41215
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5804109573364258,
      "learning_rate": 0.0005538591196379042,
      "loss": 3.2196,
      "step": 41216
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.392399787902832,
      "learning_rate": 0.0005538569398760976,
      "loss": 3.2474,
      "step": 41217
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.2937591075897217,
      "learning_rate": 0.0005538547600670941,
      "loss": 3.0885,
      "step": 41218
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5489330291748047,
      "learning_rate": 0.0005538525802108944,
      "loss": 3.2566,
      "step": 41219
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.965474247932434,
      "learning_rate": 0.0005538504003074987,
      "loss": 3.1089,
      "step": 41220
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2357592582702637,
      "learning_rate": 0.0005538482203569073,
      "loss": 2.8524,
      "step": 41221
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6460970640182495,
      "learning_rate": 0.0005538460403591208,
      "loss": 3.1127,
      "step": 41222
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.342130184173584,
      "learning_rate": 0.0005538438603141394,
      "loss": 2.8596,
      "step": 41223
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5909368991851807,
      "learning_rate": 0.0005538416802219638,
      "loss": 3.3758,
      "step": 41224
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.8271028995513916,
      "learning_rate": 0.0005538395000825941,
      "loss": 3.1708,
      "step": 41225
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.586170196533203,
      "learning_rate": 0.0005538373198960311,
      "loss": 3.1469,
      "step": 41226
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.421705484390259,
      "learning_rate": 0.0005538351396622747,
      "loss": 2.9456,
      "step": 41227
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3519662618637085,
      "learning_rate": 0.0005538329593813257,
      "loss": 2.9123,
      "step": 41228
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.1118760108947754,
      "learning_rate": 0.0005538307790531842,
      "loss": 3.2298,
      "step": 41229
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.799160957336426,
      "learning_rate": 0.0005538285986778509,
      "loss": 3.1937,
      "step": 41230
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7781943082809448,
      "learning_rate": 0.000553826418255326,
      "loss": 2.8407,
      "step": 41231
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.35784912109375,
      "learning_rate": 0.00055382423778561,
      "loss": 3.069,
      "step": 41232
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.5298357009887695,
      "learning_rate": 0.0005538220572687034,
      "loss": 2.9701,
      "step": 41233
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.6291756629943848,
      "learning_rate": 0.0005538198767046064,
      "loss": 3.1169,
      "step": 41234
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8896071910858154,
      "learning_rate": 0.0005538176960933193,
      "loss": 2.9192,
      "step": 41235
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8461272716522217,
      "learning_rate": 0.000553815515434843,
      "loss": 3.2421,
      "step": 41236
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.954332113265991,
      "learning_rate": 0.0005538133347291774,
      "loss": 3.1342,
      "step": 41237
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2921216487884521,
      "learning_rate": 0.0005538111539763232,
      "loss": 3.0014,
      "step": 41238
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3171894550323486,
      "learning_rate": 0.0005538089731762808,
      "loss": 3.129,
      "step": 41239
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.592693567276001,
      "learning_rate": 0.0005538067923290504,
      "loss": 2.9669,
      "step": 41240
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.353471279144287,
      "learning_rate": 0.0005538046114346325,
      "loss": 2.7633,
      "step": 41241
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5240310430526733,
      "learning_rate": 0.0005538024304930277,
      "loss": 3.0485,
      "step": 41242
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.528044581413269,
      "learning_rate": 0.0005538002495042361,
      "loss": 3.1547,
      "step": 41243
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3774209022521973,
      "learning_rate": 0.0005537980684682582,
      "loss": 3.225,
      "step": 41244
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5167973041534424,
      "learning_rate": 0.0005537958873850946,
      "loss": 2.9856,
      "step": 41245
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0691654682159424,
      "learning_rate": 0.0005537937062547455,
      "loss": 3.0725,
      "step": 41246
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5397847890853882,
      "learning_rate": 0.0005537915250772112,
      "loss": 3.0785,
      "step": 41247
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4449591636657715,
      "learning_rate": 0.0005537893438524923,
      "loss": 3.0738,
      "step": 41248
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.7190327644348145,
      "learning_rate": 0.0005537871625805893,
      "loss": 2.7068,
      "step": 41249
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4092596769332886,
      "learning_rate": 0.0005537849812615025,
      "loss": 2.7933,
      "step": 41250
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5533422231674194,
      "learning_rate": 0.0005537827998952321,
      "loss": 3.0485,
      "step": 41251
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7108650207519531,
      "learning_rate": 0.000553780618481779,
      "loss": 2.8556,
      "step": 41252
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3803415298461914,
      "learning_rate": 0.000553778437021143,
      "loss": 3.0239,
      "step": 41253
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5399750471115112,
      "learning_rate": 0.0005537762555133249,
      "loss": 3.2009,
      "step": 41254
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.1164424419403076,
      "learning_rate": 0.000553774073958325,
      "loss": 3.3079,
      "step": 41255
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6557856798171997,
      "learning_rate": 0.0005537718923561437,
      "loss": 3.2191,
      "step": 41256
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3062783479690552,
      "learning_rate": 0.0005537697107067815,
      "loss": 2.9616,
      "step": 41257
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0388712882995605,
      "learning_rate": 0.0005537675290102385,
      "loss": 3.0542,
      "step": 41258
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.1874027252197266,
      "learning_rate": 0.0005537653472665156,
      "loss": 3.0029,
      "step": 41259
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.576227068901062,
      "learning_rate": 0.0005537631654756128,
      "loss": 3.1622,
      "step": 41260
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9509834051132202,
      "learning_rate": 0.0005537609836375306,
      "loss": 3.2228,
      "step": 41261
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.1631155014038086,
      "learning_rate": 0.0005537588017522695,
      "loss": 3.1342,
      "step": 41262
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.175233840942383,
      "learning_rate": 0.0005537566198198298,
      "loss": 3.0941,
      "step": 41263
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5997560024261475,
      "learning_rate": 0.0005537544378402121,
      "loss": 3.0422,
      "step": 41264
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2600913047790527,
      "learning_rate": 0.0005537522558134165,
      "loss": 2.9913,
      "step": 41265
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.759605884552002,
      "learning_rate": 0.0005537500737394436,
      "loss": 3.0668,
      "step": 41266
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.8276755809783936,
      "learning_rate": 0.0005537478916182938,
      "loss": 3.2338,
      "step": 41267
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5855443477630615,
      "learning_rate": 0.0005537457094499674,
      "loss": 3.1435,
      "step": 41268
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.147207260131836,
      "learning_rate": 0.000553743527234465,
      "loss": 3.0459,
      "step": 41269
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.05674409866333,
      "learning_rate": 0.0005537413449717868,
      "loss": 3.2511,
      "step": 41270
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.502819776535034,
      "learning_rate": 0.0005537391626619333,
      "loss": 2.8255,
      "step": 41271
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8657279014587402,
      "learning_rate": 0.0005537369803049049,
      "loss": 2.8595,
      "step": 41272
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.084916830062866,
      "learning_rate": 0.0005537347979007021,
      "loss": 3.1547,
      "step": 41273
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0326523780822754,
      "learning_rate": 0.0005537326154493251,
      "loss": 3.3141,
      "step": 41274
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0288915634155273,
      "learning_rate": 0.0005537304329507745,
      "loss": 3.0845,
      "step": 41275
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.731999158859253,
      "learning_rate": 0.0005537282504050506,
      "loss": 3.1729,
      "step": 41276
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4639495611190796,
      "learning_rate": 0.0005537260678121538,
      "loss": 2.9543,
      "step": 41277
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.804766058921814,
      "learning_rate": 0.0005537238851720845,
      "loss": 3.2555,
      "step": 41278
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.567972183227539,
      "learning_rate": 0.0005537217024848433,
      "loss": 3.0965,
      "step": 41279
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3563463687896729,
      "learning_rate": 0.0005537195197504302,
      "loss": 2.8463,
      "step": 41280
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3470492362976074,
      "learning_rate": 0.000553717336968846,
      "loss": 3.0342,
      "step": 41281
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.294914722442627,
      "learning_rate": 0.000553715154140091,
      "loss": 3.1899,
      "step": 41282
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5295120477676392,
      "learning_rate": 0.0005537129712641654,
      "loss": 2.9562,
      "step": 41283
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.537204623222351,
      "learning_rate": 0.00055371078834107,
      "loss": 3.0789,
      "step": 41284
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5297809839248657,
      "learning_rate": 0.0005537086053708048,
      "loss": 2.8664,
      "step": 41285
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3747713565826416,
      "learning_rate": 0.0005537064223533704,
      "loss": 2.9422,
      "step": 41286
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6069295406341553,
      "learning_rate": 0.0005537042392887672,
      "loss": 3.1576,
      "step": 41287
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.565911889076233,
      "learning_rate": 0.0005537020561769957,
      "loss": 2.9842,
      "step": 41288
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.675372362136841,
      "learning_rate": 0.0005536998730180561,
      "loss": 3.0046,
      "step": 41289
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.562796950340271,
      "learning_rate": 0.0005536976898119489,
      "loss": 3.2112,
      "step": 41290
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5356091260910034,
      "learning_rate": 0.0005536955065586746,
      "loss": 2.974,
      "step": 41291
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9323266744613647,
      "learning_rate": 0.0005536933232582333,
      "loss": 3.1508,
      "step": 41292
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6398952007293701,
      "learning_rate": 0.0005536911399106258,
      "loss": 3.1486,
      "step": 41293
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.101360321044922,
      "learning_rate": 0.0005536889565158523,
      "loss": 2.9463,
      "step": 41294
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8446675539016724,
      "learning_rate": 0.0005536867730739132,
      "loss": 2.9961,
      "step": 41295
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9770582914352417,
      "learning_rate": 0.000553684589584809,
      "loss": 2.8476,
      "step": 41296
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8018685579299927,
      "learning_rate": 0.00055368240604854,
      "loss": 3.204,
      "step": 41297
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9285396337509155,
      "learning_rate": 0.0005536802224651067,
      "loss": 3.0267,
      "step": 41298
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4713423252105713,
      "learning_rate": 0.0005536780388345094,
      "loss": 2.9762,
      "step": 41299
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0245916843414307,
      "learning_rate": 0.0005536758551567486,
      "loss": 2.8758,
      "step": 41300
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.508690118789673,
      "learning_rate": 0.0005536736714318246,
      "loss": 3.0788,
      "step": 41301
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6873670816421509,
      "learning_rate": 0.0005536714876597379,
      "loss": 3.1658,
      "step": 41302
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6265606880187988,
      "learning_rate": 0.000553669303840489,
      "loss": 2.9698,
      "step": 41303
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8967305421829224,
      "learning_rate": 0.0005536671199740781,
      "loss": 3.2181,
      "step": 41304
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8051477670669556,
      "learning_rate": 0.0005536649360605056,
      "loss": 2.7104,
      "step": 41305
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5227035284042358,
      "learning_rate": 0.0005536627520997722,
      "loss": 3.1164,
      "step": 41306
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8522781133651733,
      "learning_rate": 0.000553660568091878,
      "loss": 3.2004,
      "step": 41307
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7011258602142334,
      "learning_rate": 0.0005536583840368236,
      "loss": 3.1078,
      "step": 41308
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4661818742752075,
      "learning_rate": 0.0005536561999346092,
      "loss": 3.0374,
      "step": 41309
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7361420392990112,
      "learning_rate": 0.0005536540157852353,
      "loss": 2.9355,
      "step": 41310
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.70082426071167,
      "learning_rate": 0.0005536518315887024,
      "loss": 3.0203,
      "step": 41311
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4037941694259644,
      "learning_rate": 0.0005536496473450108,
      "loss": 2.8866,
      "step": 41312
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.638512372970581,
      "learning_rate": 0.000553647463054161,
      "loss": 2.9775,
      "step": 41313
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5022201538085938,
      "learning_rate": 0.0005536452787161533,
      "loss": 3.0154,
      "step": 41314
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2940171957015991,
      "learning_rate": 0.0005536430943309883,
      "loss": 3.0865,
      "step": 41315
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.455030083656311,
      "learning_rate": 0.0005536409098986661,
      "loss": 3.0623,
      "step": 41316
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7113248109817505,
      "learning_rate": 0.0005536387254191872,
      "loss": 3.0912,
      "step": 41317
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2073783874511719,
      "learning_rate": 0.0005536365408925521,
      "loss": 3.1059,
      "step": 41318
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7693288326263428,
      "learning_rate": 0.0005536343563187614,
      "loss": 3.1743,
      "step": 41319
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.549377679824829,
      "learning_rate": 0.0005536321716978151,
      "loss": 2.7863,
      "step": 41320
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5280147790908813,
      "learning_rate": 0.0005536299870297139,
      "loss": 2.9431,
      "step": 41321
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.858485460281372,
      "learning_rate": 0.0005536278023144579,
      "loss": 2.9674,
      "step": 41322
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8552169799804688,
      "learning_rate": 0.0005536256175520479,
      "loss": 3.0848,
      "step": 41323
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7213579416275024,
      "learning_rate": 0.000553623432742484,
      "loss": 3.1628,
      "step": 41324
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9155820608139038,
      "learning_rate": 0.0005536212478857668,
      "loss": 3.0403,
      "step": 41325
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0649569034576416,
      "learning_rate": 0.0005536190629818966,
      "loss": 3.0662,
      "step": 41326
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7265828847885132,
      "learning_rate": 0.0005536168780308737,
      "loss": 2.9211,
      "step": 41327
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.168012857437134,
      "learning_rate": 0.0005536146930326987,
      "loss": 3.1607,
      "step": 41328
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.571516990661621,
      "learning_rate": 0.0005536125079873721,
      "loss": 3.3269,
      "step": 41329
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.051159143447876,
      "learning_rate": 0.000553610322894894,
      "loss": 3.1206,
      "step": 41330
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4257060289382935,
      "learning_rate": 0.000553608137755265,
      "loss": 2.9246,
      "step": 41331
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8239914178848267,
      "learning_rate": 0.0005536059525684854,
      "loss": 2.8271,
      "step": 41332
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7629863023757935,
      "learning_rate": 0.0005536037673345558,
      "loss": 3.1437,
      "step": 41333
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4781681299209595,
      "learning_rate": 0.0005536015820534764,
      "loss": 3.1602,
      "step": 41334
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.7879996299743652,
      "learning_rate": 0.0005535993967252476,
      "loss": 3.0272,
      "step": 41335
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8061407804489136,
      "learning_rate": 0.00055359721134987,
      "loss": 2.872,
      "step": 41336
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.417946219444275,
      "learning_rate": 0.0005535950259273439,
      "loss": 2.9675,
      "step": 41337
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6195743083953857,
      "learning_rate": 0.0005535928404576695,
      "loss": 3.1701,
      "step": 41338
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7829010486602783,
      "learning_rate": 0.0005535906549408476,
      "loss": 3.0459,
      "step": 41339
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5363712310791016,
      "learning_rate": 0.0005535884693768783,
      "loss": 3.1521,
      "step": 41340
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.406487464904785,
      "learning_rate": 0.0005535862837657623,
      "loss": 3.2454,
      "step": 41341
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4262858629226685,
      "learning_rate": 0.0005535840981074996,
      "loss": 3.2941,
      "step": 41342
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7055230140686035,
      "learning_rate": 0.000553581912402091,
      "loss": 3.0464,
      "step": 41343
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4587483406066895,
      "learning_rate": 0.0005535797266495366,
      "loss": 3.1641,
      "step": 41344
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.355244517326355,
      "learning_rate": 0.000553577540849837,
      "loss": 3.182,
      "step": 41345
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5986213684082031,
      "learning_rate": 0.0005535753550029925,
      "loss": 3.2495,
      "step": 41346
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5620404481887817,
      "learning_rate": 0.0005535731691090037,
      "loss": 3.05,
      "step": 41347
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2952989339828491,
      "learning_rate": 0.0005535709831678707,
      "loss": 2.9289,
      "step": 41348
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7801681756973267,
      "learning_rate": 0.0005535687971795941,
      "loss": 3.2522,
      "step": 41349
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3877822160720825,
      "learning_rate": 0.0005535666111441743,
      "loss": 3.1387,
      "step": 41350
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7309763431549072,
      "learning_rate": 0.0005535644250616117,
      "loss": 3.1497,
      "step": 41351
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5083118677139282,
      "learning_rate": 0.0005535622389319067,
      "loss": 2.9415,
      "step": 41352
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5722930431365967,
      "learning_rate": 0.0005535600527550597,
      "loss": 3.1149,
      "step": 41353
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9408771991729736,
      "learning_rate": 0.0005535578665310711,
      "loss": 2.96,
      "step": 41354
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2833043336868286,
      "learning_rate": 0.0005535556802599412,
      "loss": 3.1253,
      "step": 41355
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6682926416397095,
      "learning_rate": 0.0005535534939416706,
      "loss": 3.1577,
      "step": 41356
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5589121580123901,
      "learning_rate": 0.0005535513075762596,
      "loss": 3.0848,
      "step": 41357
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4681285619735718,
      "learning_rate": 0.0005535491211637086,
      "loss": 2.9506,
      "step": 41358
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6909668445587158,
      "learning_rate": 0.0005535469347040181,
      "loss": 3.016,
      "step": 41359
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8208472728729248,
      "learning_rate": 0.0005535447481971883,
      "loss": 2.8202,
      "step": 41360
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5045593976974487,
      "learning_rate": 0.00055354256164322,
      "loss": 3.1731,
      "step": 41361
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4815541505813599,
      "learning_rate": 0.0005535403750421131,
      "loss": 2.8919,
      "step": 41362
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0505785942077637,
      "learning_rate": 0.0005535381883938683,
      "loss": 2.8927,
      "step": 41363
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.98188054561615,
      "learning_rate": 0.0005535360016984862,
      "loss": 2.9407,
      "step": 41364
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.3576834201812744,
      "learning_rate": 0.0005535338149559667,
      "loss": 3.1049,
      "step": 41365
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.667836904525757,
      "learning_rate": 0.0005535316281663105,
      "loss": 3.1966,
      "step": 41366
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6023763418197632,
      "learning_rate": 0.0005535294413295182,
      "loss": 3.1662,
      "step": 41367
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3205598592758179,
      "learning_rate": 0.0005535272544455898,
      "loss": 2.886,
      "step": 41368
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.855044364929199,
      "learning_rate": 0.0005535250675145259,
      "loss": 3.0262,
      "step": 41369
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.658212423324585,
      "learning_rate": 0.000553522880536327,
      "loss": 2.9694,
      "step": 41370
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6062520742416382,
      "learning_rate": 0.0005535206935109933,
      "loss": 3.0285,
      "step": 41371
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.367434024810791,
      "learning_rate": 0.0005535185064385254,
      "loss": 3.0598,
      "step": 41372
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.141834020614624,
      "learning_rate": 0.0005535163193189236,
      "loss": 3.3267,
      "step": 41373
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4796661138534546,
      "learning_rate": 0.0005535141321521883,
      "loss": 3.0487,
      "step": 41374
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5211349725723267,
      "learning_rate": 0.0005535119449383201,
      "loss": 2.953,
      "step": 41375
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.279771089553833,
      "learning_rate": 0.0005535097576773191,
      "loss": 3.2325,
      "step": 41376
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9234119653701782,
      "learning_rate": 0.0005535075703691858,
      "loss": 3.1013,
      "step": 41377
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9096513986587524,
      "learning_rate": 0.0005535053830139207,
      "loss": 3.043,
      "step": 41378
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.308558702468872,
      "learning_rate": 0.0005535031956115242,
      "loss": 2.7657,
      "step": 41379
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.424336552619934,
      "learning_rate": 0.0005535010081619967,
      "loss": 3.3518,
      "step": 41380
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7019400596618652,
      "learning_rate": 0.0005534988206653385,
      "loss": 3.0412,
      "step": 41381
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.303476095199585,
      "learning_rate": 0.0005534966331215501,
      "loss": 3.0443,
      "step": 41382
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8623754978179932,
      "learning_rate": 0.000553494445530632,
      "loss": 3.1064,
      "step": 41383
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.83876371383667,
      "learning_rate": 0.0005534922578925843,
      "loss": 3.2668,
      "step": 41384
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.562177300453186,
      "learning_rate": 0.0005534900702074077,
      "loss": 2.8847,
      "step": 41385
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.83882737159729,
      "learning_rate": 0.0005534878824751026,
      "loss": 3.1149,
      "step": 41386
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.54398775100708,
      "learning_rate": 0.0005534856946956692,
      "loss": 2.9033,
      "step": 41387
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.91788649559021,
      "learning_rate": 0.000553483506869108,
      "loss": 3.004,
      "step": 41388
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8132847547531128,
      "learning_rate": 0.0005534813189954196,
      "loss": 3.3325,
      "step": 41389
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9069308042526245,
      "learning_rate": 0.0005534791310746041,
      "loss": 3.0635,
      "step": 41390
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8418694734573364,
      "learning_rate": 0.0005534769431066621,
      "loss": 3.1728,
      "step": 41391
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9417203664779663,
      "learning_rate": 0.0005534747550915939,
      "loss": 3.2753,
      "step": 41392
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0587615966796875,
      "learning_rate": 0.0005534725670293999,
      "loss": 3.013,
      "step": 41393
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2869315147399902,
      "learning_rate": 0.0005534703789200808,
      "loss": 3.021,
      "step": 41394
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7003942728042603,
      "learning_rate": 0.0005534681907636365,
      "loss": 3.3946,
      "step": 41395
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.7873775959014893,
      "learning_rate": 0.0005534660025600679,
      "loss": 3.0016,
      "step": 41396
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.470986008644104,
      "learning_rate": 0.000553463814309375,
      "loss": 3.0418,
      "step": 41397
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6209969520568848,
      "learning_rate": 0.0005534616260115586,
      "loss": 2.9755,
      "step": 41398
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9797383546829224,
      "learning_rate": 0.0005534594376666187,
      "loss": 2.9292,
      "step": 41399
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6427053213119507,
      "learning_rate": 0.000553457249274556,
      "loss": 3.0459,
      "step": 41400
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9276301860809326,
      "learning_rate": 0.0005534550608353708,
      "loss": 2.9908,
      "step": 41401
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4659839868545532,
      "learning_rate": 0.0005534528723490635,
      "loss": 3.2142,
      "step": 41402
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.783198356628418,
      "learning_rate": 0.0005534506838156346,
      "loss": 3.155,
      "step": 41403
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.217273712158203,
      "learning_rate": 0.0005534484952350844,
      "loss": 3.2287,
      "step": 41404
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4179917573928833,
      "learning_rate": 0.0005534463066074133,
      "loss": 3.0412,
      "step": 41405
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.1246497631073,
      "learning_rate": 0.0005534441179326217,
      "loss": 2.9938,
      "step": 41406
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9678466320037842,
      "learning_rate": 0.0005534419292107101,
      "loss": 2.9797,
      "step": 41407
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8329066038131714,
      "learning_rate": 0.0005534397404416789,
      "loss": 3.1522,
      "step": 41408
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7318943738937378,
      "learning_rate": 0.0005534375516255285,
      "loss": 3.1037,
      "step": 41409
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.7084975242614746,
      "learning_rate": 0.0005534353627622593,
      "loss": 3.1819,
      "step": 41410
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.05145001411438,
      "learning_rate": 0.0005534331738518716,
      "loss": 3.0522,
      "step": 41411
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6695163249969482,
      "learning_rate": 0.0005534309848943659,
      "loss": 3.31,
      "step": 41412
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4800328016281128,
      "learning_rate": 0.0005534287958897426,
      "loss": 3.1475,
      "step": 41413
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6776129007339478,
      "learning_rate": 0.000553426606838002,
      "loss": 3.3017,
      "step": 41414
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6211349964141846,
      "learning_rate": 0.0005534244177391448,
      "loss": 3.0732,
      "step": 41415
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.184175729751587,
      "learning_rate": 0.0005534222285931711,
      "loss": 2.7441,
      "step": 41416
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.554560422897339,
      "learning_rate": 0.0005534200394000814,
      "loss": 3.2733,
      "step": 41417
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.577217936515808,
      "learning_rate": 0.0005534178501598763,
      "loss": 3.0688,
      "step": 41418
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4630993604660034,
      "learning_rate": 0.0005534156608725558,
      "loss": 3.184,
      "step": 41419
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.655639886856079,
      "learning_rate": 0.0005534134715381206,
      "loss": 3.2298,
      "step": 41420
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8326268196105957,
      "learning_rate": 0.0005534112821565712,
      "loss": 2.9765,
      "step": 41421
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4626946449279785,
      "learning_rate": 0.0005534090927279077,
      "loss": 3.3134,
      "step": 41422
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8924338817596436,
      "learning_rate": 0.0005534069032521307,
      "loss": 2.9642,
      "step": 41423
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3328245878219604,
      "learning_rate": 0.0005534047137292407,
      "loss": 3.0338,
      "step": 41424
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.818795084953308,
      "learning_rate": 0.0005534025241592379,
      "loss": 3.0794,
      "step": 41425
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0646209716796875,
      "learning_rate": 0.0005534003345421229,
      "loss": 3.2456,
      "step": 41426
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.692305564880371,
      "learning_rate": 0.0005533981448778958,
      "loss": 3.0963,
      "step": 41427
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.85074782371521,
      "learning_rate": 0.0005533959551665572,
      "loss": 3.0911,
      "step": 41428
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.239281177520752,
      "learning_rate": 0.0005533937654081077,
      "loss": 2.9196,
      "step": 41429
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.031214952468872,
      "learning_rate": 0.0005533915756025474,
      "loss": 2.9954,
      "step": 41430
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4212453365325928,
      "learning_rate": 0.0005533893857498768,
      "loss": 2.8718,
      "step": 41431
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.237382173538208,
      "learning_rate": 0.0005533871958500964,
      "loss": 2.8665,
      "step": 41432
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6559717655181885,
      "learning_rate": 0.0005533850059032064,
      "loss": 3.2638,
      "step": 41433
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3346449136734009,
      "learning_rate": 0.0005533828159092074,
      "loss": 3.2517,
      "step": 41434
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4095851182937622,
      "learning_rate": 0.0005533806258680998,
      "loss": 3.1363,
      "step": 41435
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3683441877365112,
      "learning_rate": 0.000553378435779884,
      "loss": 3.0695,
      "step": 41436
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.313172698020935,
      "learning_rate": 0.0005533762456445603,
      "loss": 3.1079,
      "step": 41437
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4121043682098389,
      "learning_rate": 0.0005533740554621292,
      "loss": 3.1193,
      "step": 41438
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.068650007247925,
      "learning_rate": 0.000553371865232591,
      "loss": 3.2568,
      "step": 41439
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.551331639289856,
      "learning_rate": 0.0005533696749559463,
      "loss": 3.0097,
      "step": 41440
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7264256477355957,
      "learning_rate": 0.0005533674846321953,
      "loss": 2.8088,
      "step": 41441
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5984437465667725,
      "learning_rate": 0.0005533652942613386,
      "loss": 3.129,
      "step": 41442
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.319888949394226,
      "learning_rate": 0.0005533631038433765,
      "loss": 3.1369,
      "step": 41443
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3942383527755737,
      "learning_rate": 0.0005533609133783093,
      "loss": 3.0015,
      "step": 41444
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6132948398590088,
      "learning_rate": 0.0005533587228661376,
      "loss": 3.0699,
      "step": 41445
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2574597597122192,
      "learning_rate": 0.0005533565323068617,
      "loss": 2.9736,
      "step": 41446
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.689125657081604,
      "learning_rate": 0.0005533543417004821,
      "loss": 3.2019,
      "step": 41447
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.1616090536117554,
      "learning_rate": 0.0005533521510469991,
      "loss": 2.7037,
      "step": 41448
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.8657619953155518,
      "learning_rate": 0.0005533499603464131,
      "loss": 3.1437,
      "step": 41449
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.4850001335144043,
      "learning_rate": 0.0005533477695987247,
      "loss": 3.0008,
      "step": 41450
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5902345180511475,
      "learning_rate": 0.0005533455788039342,
      "loss": 3.4276,
      "step": 41451
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.7283129692077637,
      "learning_rate": 0.0005533433879620417,
      "loss": 2.9977,
      "step": 41452
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8575495481491089,
      "learning_rate": 0.0005533411970730481,
      "loss": 2.9061,
      "step": 41453
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3901898860931396,
      "learning_rate": 0.0005533390061369536,
      "loss": 3.1564,
      "step": 41454
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.126432180404663,
      "learning_rate": 0.0005533368151537585,
      "loss": 2.7298,
      "step": 41455
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4065110683441162,
      "learning_rate": 0.0005533346241234632,
      "loss": 2.8396,
      "step": 41456
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6359057426452637,
      "learning_rate": 0.0005533324330460684,
      "loss": 3.2719,
      "step": 41457
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5947884321212769,
      "learning_rate": 0.0005533302419215743,
      "loss": 3.2732,
      "step": 41458
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7254661321640015,
      "learning_rate": 0.0005533280507499813,
      "loss": 3.1498,
      "step": 41459
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.6342341899871826,
      "learning_rate": 0.0005533258595312899,
      "loss": 2.9658,
      "step": 41460
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.3838279247283936,
      "learning_rate": 0.0005533236682655003,
      "loss": 2.9623,
      "step": 41461
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4156172275543213,
      "learning_rate": 0.0005533214769526132,
      "loss": 2.9556,
      "step": 41462
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.414762020111084,
      "learning_rate": 0.0005533192855926288,
      "loss": 2.8911,
      "step": 41463
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.692598819732666,
      "learning_rate": 0.0005533170941855475,
      "loss": 3.0441,
      "step": 41464
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.616375207901001,
      "learning_rate": 0.0005533149027313698,
      "loss": 2.9358,
      "step": 41465
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5275309085845947,
      "learning_rate": 0.0005533127112300961,
      "loss": 3.2041,
      "step": 41466
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.7481210231781006,
      "learning_rate": 0.0005533105196817268,
      "loss": 3.2785,
      "step": 41467
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.325809359550476,
      "learning_rate": 0.0005533083280862622,
      "loss": 3.2159,
      "step": 41468
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7891533374786377,
      "learning_rate": 0.0005533061364437028,
      "loss": 2.9445,
      "step": 41469
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8289284706115723,
      "learning_rate": 0.0005533039447540492,
      "loss": 3.0562,
      "step": 41470
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8811008930206299,
      "learning_rate": 0.0005533017530173015,
      "loss": 2.966,
      "step": 41471
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3047302961349487,
      "learning_rate": 0.0005532995612334601,
      "loss": 3.2354,
      "step": 41472
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.5868797302246094,
      "learning_rate": 0.0005532973694025256,
      "loss": 2.9246,
      "step": 41473
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.4843862056732178,
      "learning_rate": 0.0005532951775244984,
      "loss": 3.1337,
      "step": 41474
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.586495041847229,
      "learning_rate": 0.0005532929855993788,
      "loss": 3.1422,
      "step": 41475
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5390077829360962,
      "learning_rate": 0.0005532907936271674,
      "loss": 3.1028,
      "step": 41476
    },
    {
      "epoch": 0.54,
      "grad_norm": 4.445034980773926,
      "learning_rate": 0.0005532886016078643,
      "loss": 3.022,
      "step": 41477
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.442094087600708,
      "learning_rate": 0.0005532864095414701,
      "loss": 3.1105,
      "step": 41478
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4678739309310913,
      "learning_rate": 0.0005532842174279851,
      "loss": 2.6799,
      "step": 41479
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2075183391571045,
      "learning_rate": 0.0005532820252674099,
      "loss": 2.9464,
      "step": 41480
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.255120038986206,
      "learning_rate": 0.0005532798330597448,
      "loss": 2.8506,
      "step": 41481
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.711855173110962,
      "learning_rate": 0.0005532776408049901,
      "loss": 2.8944,
      "step": 41482
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.759398102760315,
      "learning_rate": 0.0005532754485031464,
      "loss": 3.1699,
      "step": 41483
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8429023027420044,
      "learning_rate": 0.000553273256154214,
      "loss": 2.9374,
      "step": 41484
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7551487684249878,
      "learning_rate": 0.0005532710637581932,
      "loss": 3.2501,
      "step": 41485
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4839948415756226,
      "learning_rate": 0.0005532688713150847,
      "loss": 3.1874,
      "step": 41486
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2791138887405396,
      "learning_rate": 0.0005532666788248886,
      "loss": 3.1288,
      "step": 41487
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.715696930885315,
      "learning_rate": 0.0005532644862876055,
      "loss": 3.2178,
      "step": 41488
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.436091661453247,
      "learning_rate": 0.0005532622937032358,
      "loss": 3.0556,
      "step": 41489
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0102078914642334,
      "learning_rate": 0.0005532601010717798,
      "loss": 3.1207,
      "step": 41490
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.122716188430786,
      "learning_rate": 0.0005532579083932379,
      "loss": 3.0412,
      "step": 41491
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9101489782333374,
      "learning_rate": 0.0005532557156676107,
      "loss": 3.3483,
      "step": 41492
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.066723346710205,
      "learning_rate": 0.0005532535228948985,
      "loss": 3.0067,
      "step": 41493
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.8798344135284424,
      "learning_rate": 0.0005532513300751015,
      "loss": 3.1523,
      "step": 41494
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.770521879196167,
      "learning_rate": 0.0005532491372082205,
      "loss": 2.792,
      "step": 41495
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7087925672531128,
      "learning_rate": 0.0005532469442942555,
      "loss": 3.0838,
      "step": 41496
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6589332818984985,
      "learning_rate": 0.0005532447513332073,
      "loss": 3.1852,
      "step": 41497
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7152791023254395,
      "learning_rate": 0.0005532425583250761,
      "loss": 2.9532,
      "step": 41498
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5998516082763672,
      "learning_rate": 0.0005532403652698623,
      "loss": 3.14,
      "step": 41499
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5790990591049194,
      "learning_rate": 0.0005532381721675662,
      "loss": 2.8866,
      "step": 41500
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3992912769317627,
      "learning_rate": 0.0005532359790181886,
      "loss": 3.1122,
      "step": 41501
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6767123937606812,
      "learning_rate": 0.0005532337858217294,
      "loss": 3.2592,
      "step": 41502
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.646095871925354,
      "learning_rate": 0.0005532315925781895,
      "loss": 2.8472,
      "step": 41503
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.4777307510375977,
      "learning_rate": 0.0005532293992875689,
      "loss": 3.2202,
      "step": 41504
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0429842472076416,
      "learning_rate": 0.0005532272059498682,
      "loss": 3.0366,
      "step": 41505
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.65960693359375,
      "learning_rate": 0.0005532250125650878,
      "loss": 2.9324,
      "step": 41506
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.4676132202148438,
      "learning_rate": 0.0005532228191332281,
      "loss": 3.1135,
      "step": 41507
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7539536952972412,
      "learning_rate": 0.0005532206256542894,
      "loss": 3.0736,
      "step": 41508
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5977665185928345,
      "learning_rate": 0.0005532184321282724,
      "loss": 3.3902,
      "step": 41509
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9301245212554932,
      "learning_rate": 0.0005532162385551772,
      "loss": 3.2522,
      "step": 41510
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.053239345550537,
      "learning_rate": 0.0005532140449350043,
      "loss": 3.0846,
      "step": 41511
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4621260166168213,
      "learning_rate": 0.0005532118512677542,
      "loss": 3.0931,
      "step": 41512
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5773651599884033,
      "learning_rate": 0.0005532096575534271,
      "loss": 3.2815,
      "step": 41513
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.1440224647521973,
      "learning_rate": 0.0005532074637920237,
      "loss": 3.2117,
      "step": 41514
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0036838054656982,
      "learning_rate": 0.0005532052699835442,
      "loss": 2.9816,
      "step": 41515
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.673575520515442,
      "learning_rate": 0.000553203076127989,
      "loss": 3.1222,
      "step": 41516
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5784238576889038,
      "learning_rate": 0.0005532008822253586,
      "loss": 3.1428,
      "step": 41517
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7702773809432983,
      "learning_rate": 0.0005531986882756535,
      "loss": 3.0108,
      "step": 41518
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4782562255859375,
      "learning_rate": 0.0005531964942788737,
      "loss": 2.8773,
      "step": 41519
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.717221975326538,
      "learning_rate": 0.0005531943002350201,
      "loss": 3.1608,
      "step": 41520
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.522306203842163,
      "learning_rate": 0.000553192106144093,
      "loss": 3.0007,
      "step": 41521
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5330673456192017,
      "learning_rate": 0.0005531899120060924,
      "loss": 3.3111,
      "step": 41522
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.940962791442871,
      "learning_rate": 0.0005531877178210193,
      "loss": 2.849,
      "step": 41523
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8463767766952515,
      "learning_rate": 0.0005531855235888736,
      "loss": 3.0848,
      "step": 41524
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4734607934951782,
      "learning_rate": 0.0005531833293096561,
      "loss": 2.812,
      "step": 41525
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.1345903873443604,
      "learning_rate": 0.0005531811349833671,
      "loss": 2.9895,
      "step": 41526
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.364412784576416,
      "learning_rate": 0.0005531789406100069,
      "loss": 3.2612,
      "step": 41527
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4660634994506836,
      "learning_rate": 0.0005531767461895758,
      "loss": 3.1101,
      "step": 41528
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.502340316772461,
      "learning_rate": 0.0005531745517220744,
      "loss": 3.1009,
      "step": 41529
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6208479404449463,
      "learning_rate": 0.0005531723572075032,
      "loss": 3.0037,
      "step": 41530
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5980629920959473,
      "learning_rate": 0.0005531701626458623,
      "loss": 3.0841,
      "step": 41531
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2298855781555176,
      "learning_rate": 0.0005531679680371525,
      "loss": 3.1341,
      "step": 41532
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.600450873374939,
      "learning_rate": 0.0005531657733813739,
      "loss": 2.7916,
      "step": 41533
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.4277899265289307,
      "learning_rate": 0.000553163578678527,
      "loss": 2.9601,
      "step": 41534
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4531543254852295,
      "learning_rate": 0.0005531613839286121,
      "loss": 3.0631,
      "step": 41535
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8371376991271973,
      "learning_rate": 0.00055315918913163,
      "loss": 3.1243,
      "step": 41536
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.431088924407959,
      "learning_rate": 0.0005531569942875805,
      "loss": 2.7898,
      "step": 41537
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.686382532119751,
      "learning_rate": 0.0005531547993964646,
      "loss": 3.011,
      "step": 41538
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.5096912384033203,
      "learning_rate": 0.0005531526044582824,
      "loss": 3.1996,
      "step": 41539
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9715152978897095,
      "learning_rate": 0.0005531504094730342,
      "loss": 2.9419,
      "step": 41540
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5541223287582397,
      "learning_rate": 0.0005531482144407207,
      "loss": 3.0693,
      "step": 41541
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.9647092819213867,
      "learning_rate": 0.0005531460193613421,
      "loss": 3.0354,
      "step": 41542
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7636626958847046,
      "learning_rate": 0.0005531438242348988,
      "loss": 3.342,
      "step": 41543
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.568272352218628,
      "learning_rate": 0.0005531416290613915,
      "loss": 3.1183,
      "step": 41544
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.446886420249939,
      "learning_rate": 0.0005531394338408203,
      "loss": 3.0296,
      "step": 41545
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3247064352035522,
      "learning_rate": 0.0005531372385731856,
      "loss": 3.1207,
      "step": 41546
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3453538417816162,
      "learning_rate": 0.000553135043258488,
      "loss": 2.9891,
      "step": 41547
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5690759420394897,
      "learning_rate": 0.0005531328478967278,
      "loss": 3.2038,
      "step": 41548
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6389364004135132,
      "learning_rate": 0.0005531306524879054,
      "loss": 2.8847,
      "step": 41549
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6386467218399048,
      "learning_rate": 0.0005531284570320212,
      "loss": 3.002,
      "step": 41550
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4066293239593506,
      "learning_rate": 0.0005531262615290758,
      "loss": 2.967,
      "step": 41551
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3722872734069824,
      "learning_rate": 0.0005531240659790693,
      "loss": 3.0463,
      "step": 41552
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5261459350585938,
      "learning_rate": 0.0005531218703820023,
      "loss": 3.2173,
      "step": 41553
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.840145230293274,
      "learning_rate": 0.0005531196747378752,
      "loss": 2.8661,
      "step": 41554
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6201393604278564,
      "learning_rate": 0.0005531174790466884,
      "loss": 2.7078,
      "step": 41555
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9861509799957275,
      "learning_rate": 0.0005531152833084422,
      "loss": 3.2555,
      "step": 41556
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4099856615066528,
      "learning_rate": 0.0005531130875231371,
      "loss": 3.1805,
      "step": 41557
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0407936573028564,
      "learning_rate": 0.0005531108916907735,
      "loss": 3.3305,
      "step": 41558
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4944608211517334,
      "learning_rate": 0.0005531086958113519,
      "loss": 2.8156,
      "step": 41559
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5277765989303589,
      "learning_rate": 0.0005531064998848726,
      "loss": 2.8921,
      "step": 41560
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.502932071685791,
      "learning_rate": 0.0005531043039113359,
      "loss": 3.2428,
      "step": 41561
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7066208124160767,
      "learning_rate": 0.0005531021078907424,
      "loss": 3.1536,
      "step": 41562
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3437155485153198,
      "learning_rate": 0.0005530999118230924,
      "loss": 3.1117,
      "step": 41563
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.252892255783081,
      "learning_rate": 0.0005530977157083864,
      "loss": 3.1746,
      "step": 41564
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4894015789031982,
      "learning_rate": 0.0005530955195466248,
      "loss": 2.9776,
      "step": 41565
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3838584423065186,
      "learning_rate": 0.0005530933233378079,
      "loss": 3.1844,
      "step": 41566
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9555903673171997,
      "learning_rate": 0.0005530911270819361,
      "loss": 3.1746,
      "step": 41567
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5559098720550537,
      "learning_rate": 0.00055308893077901,
      "loss": 3.0501,
      "step": 41568
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0560452938079834,
      "learning_rate": 0.0005530867344290299,
      "loss": 3.2382,
      "step": 41569
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3585491180419922,
      "learning_rate": 0.0005530845380319961,
      "loss": 3.1714,
      "step": 41570
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.468657374382019,
      "learning_rate": 0.0005530823415879092,
      "loss": 2.7638,
      "step": 41571
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2083699703216553,
      "learning_rate": 0.0005530801450967694,
      "loss": 3.0412,
      "step": 41572
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.683160662651062,
      "learning_rate": 0.0005530779485585774,
      "loss": 2.7237,
      "step": 41573
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.528249740600586,
      "learning_rate": 0.0005530757519733333,
      "loss": 2.8482,
      "step": 41574
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9964646100997925,
      "learning_rate": 0.0005530735553410377,
      "loss": 3.0147,
      "step": 41575
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3703887462615967,
      "learning_rate": 0.000553071358661691,
      "loss": 3.2222,
      "step": 41576
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3080723285675049,
      "learning_rate": 0.0005530691619352934,
      "loss": 3.1083,
      "step": 41577
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4536494016647339,
      "learning_rate": 0.0005530669651618456,
      "loss": 3.1353,
      "step": 41578
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.539987325668335,
      "learning_rate": 0.0005530647683413478,
      "loss": 2.9158,
      "step": 41579
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6408681869506836,
      "learning_rate": 0.0005530625714738005,
      "loss": 3.1815,
      "step": 41580
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2320475578308105,
      "learning_rate": 0.0005530603745592041,
      "loss": 3.1162,
      "step": 41581
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6475486755371094,
      "learning_rate": 0.000553058177597559,
      "loss": 3.3171,
      "step": 41582
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4789353609085083,
      "learning_rate": 0.0005530559805888656,
      "loss": 3.2057,
      "step": 41583
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4474728107452393,
      "learning_rate": 0.0005530537835331243,
      "loss": 3.0942,
      "step": 41584
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7922002077102661,
      "learning_rate": 0.0005530515864303355,
      "loss": 3.1006,
      "step": 41585
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8193014860153198,
      "learning_rate": 0.0005530493892804998,
      "loss": 3.0443,
      "step": 41586
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3303226232528687,
      "learning_rate": 0.0005530471920836172,
      "loss": 3.0719,
      "step": 41587
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.76276433467865,
      "learning_rate": 0.0005530449948396885,
      "loss": 2.9387,
      "step": 41588
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.101731300354004,
      "learning_rate": 0.0005530427975487139,
      "loss": 3.074,
      "step": 41589
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3667701482772827,
      "learning_rate": 0.000553040600210694,
      "loss": 3.0758,
      "step": 41590
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.1884546279907227,
      "learning_rate": 0.0005530384028256289,
      "loss": 3.0605,
      "step": 41591
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.173029661178589,
      "learning_rate": 0.0005530362053935193,
      "loss": 3.0822,
      "step": 41592
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4212063550949097,
      "learning_rate": 0.0005530340079143654,
      "loss": 3.2557,
      "step": 41593
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8588844537734985,
      "learning_rate": 0.0005530318103881678,
      "loss": 3.0581,
      "step": 41594
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6084890365600586,
      "learning_rate": 0.0005530296128149268,
      "loss": 3.1227,
      "step": 41595
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3034236431121826,
      "learning_rate": 0.0005530274151946428,
      "loss": 3.1025,
      "step": 41596
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.425445795059204,
      "learning_rate": 0.0005530252175273161,
      "loss": 2.8374,
      "step": 41597
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3712221384048462,
      "learning_rate": 0.0005530230198129473,
      "loss": 3.0347,
      "step": 41598
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.76399827003479,
      "learning_rate": 0.0005530208220515369,
      "loss": 2.9771,
      "step": 41599
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6063309907913208,
      "learning_rate": 0.000553018624243085,
      "loss": 2.9823,
      "step": 41600
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6364375352859497,
      "learning_rate": 0.0005530164263875922,
      "loss": 3.0541,
      "step": 41601
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4237759113311768,
      "learning_rate": 0.0005530142284850589,
      "loss": 3.044,
      "step": 41602
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.483640432357788,
      "learning_rate": 0.0005530120305354855,
      "loss": 3.1358,
      "step": 41603
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4414210319519043,
      "learning_rate": 0.0005530098325388722,
      "loss": 3.0693,
      "step": 41604
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4848214387893677,
      "learning_rate": 0.0005530076344952198,
      "loss": 3.0054,
      "step": 41605
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9483853578567505,
      "learning_rate": 0.0005530054364045285,
      "loss": 3.227,
      "step": 41606
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6112480163574219,
      "learning_rate": 0.0005530032382667986,
      "loss": 3.1331,
      "step": 41607
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6477304697036743,
      "learning_rate": 0.0005530010400820307,
      "loss": 3.0438,
      "step": 41608
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4718639850616455,
      "learning_rate": 0.000552998841850225,
      "loss": 3.0759,
      "step": 41609
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0597782135009766,
      "learning_rate": 0.0005529966435713822,
      "loss": 3.0386,
      "step": 41610
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5983365774154663,
      "learning_rate": 0.0005529944452455025,
      "loss": 3.0915,
      "step": 41611
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2143685817718506,
      "learning_rate": 0.0005529922468725864,
      "loss": 3.0201,
      "step": 41612
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.1670725345611572,
      "learning_rate": 0.0005529900484526342,
      "loss": 2.7941,
      "step": 41613
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7319014072418213,
      "learning_rate": 0.0005529878499856463,
      "loss": 3.0912,
      "step": 41614
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5500366687774658,
      "learning_rate": 0.0005529856514716234,
      "loss": 3.3526,
      "step": 41615
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.6586103439331055,
      "learning_rate": 0.0005529834529105656,
      "loss": 2.9107,
      "step": 41616
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5417025089263916,
      "learning_rate": 0.0005529812543024733,
      "loss": 3.1825,
      "step": 41617
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.514482855796814,
      "learning_rate": 0.0005529790556473471,
      "loss": 3.1583,
      "step": 41618
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.8055777549743652,
      "learning_rate": 0.0005529768569451873,
      "loss": 3.3053,
      "step": 41619
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8169220685958862,
      "learning_rate": 0.0005529746581959943,
      "loss": 3.0505,
      "step": 41620
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8103841543197632,
      "learning_rate": 0.0005529724593997685,
      "loss": 2.8748,
      "step": 41621
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.525935649871826,
      "learning_rate": 0.0005529702605565105,
      "loss": 3.1974,
      "step": 41622
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6199544668197632,
      "learning_rate": 0.0005529680616662203,
      "loss": 2.9814,
      "step": 41623
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.841609239578247,
      "learning_rate": 0.0005529658627288988,
      "loss": 3.1234,
      "step": 41624
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.442206859588623,
      "learning_rate": 0.000552963663744546,
      "loss": 3.0885,
      "step": 41625
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.682963490486145,
      "learning_rate": 0.0005529614647131627,
      "loss": 3.0849,
      "step": 41626
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.843665361404419,
      "learning_rate": 0.0005529592656347489,
      "loss": 2.9866,
      "step": 41627
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.4754927158355713,
      "learning_rate": 0.0005529570665093053,
      "loss": 3.0891,
      "step": 41628
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.777280330657959,
      "learning_rate": 0.0005529548673368322,
      "loss": 2.9143,
      "step": 41629
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.6930906772613525,
      "learning_rate": 0.0005529526681173299,
      "loss": 2.94,
      "step": 41630
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.174811363220215,
      "learning_rate": 0.0005529504688507991,
      "loss": 3.0439,
      "step": 41631
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6077392101287842,
      "learning_rate": 0.0005529482695372399,
      "loss": 2.7463,
      "step": 41632
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8332438468933105,
      "learning_rate": 0.0005529460701766529,
      "loss": 3.1009,
      "step": 41633
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.035796880722046,
      "learning_rate": 0.0005529438707690384,
      "loss": 3.1709,
      "step": 41634
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3827571868896484,
      "learning_rate": 0.0005529416713143969,
      "loss": 2.9348,
      "step": 41635
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4613966941833496,
      "learning_rate": 0.0005529394718127287,
      "loss": 3.2314,
      "step": 41636
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.676207184791565,
      "learning_rate": 0.0005529372722640343,
      "loss": 2.8211,
      "step": 41637
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6941155195236206,
      "learning_rate": 0.0005529350726683142,
      "loss": 3.0125,
      "step": 41638
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6875948905944824,
      "learning_rate": 0.0005529328730255686,
      "loss": 3.0884,
      "step": 41639
    },
    {
      "epoch": 0.54,
      "grad_norm": 4.314582824707031,
      "learning_rate": 0.000552930673335798,
      "loss": 3.0488,
      "step": 41640
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.447007417678833,
      "learning_rate": 0.0005529284735990028,
      "loss": 2.9933,
      "step": 41641
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.3606669902801514,
      "learning_rate": 0.0005529262738151836,
      "loss": 2.9372,
      "step": 41642
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4781361818313599,
      "learning_rate": 0.0005529240739843403,
      "loss": 3.1283,
      "step": 41643
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4073750972747803,
      "learning_rate": 0.000552921874106474,
      "loss": 2.9984,
      "step": 41644
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.777150273323059,
      "learning_rate": 0.0005529196741815845,
      "loss": 2.977,
      "step": 41645
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8126819133758545,
      "learning_rate": 0.0005529174742096725,
      "loss": 2.9135,
      "step": 41646
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.918534278869629,
      "learning_rate": 0.0005529152741907384,
      "loss": 3.089,
      "step": 41647
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5709136724472046,
      "learning_rate": 0.0005529130741247826,
      "loss": 3.0967,
      "step": 41648
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9616632461547852,
      "learning_rate": 0.0005529108740118055,
      "loss": 3.0121,
      "step": 41649
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.8653488159179688,
      "learning_rate": 0.0005529086738518074,
      "loss": 3.1196,
      "step": 41650
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.354069471359253,
      "learning_rate": 0.0005529064736447889,
      "loss": 3.0918,
      "step": 41651
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.4731125831604004,
      "learning_rate": 0.0005529042733907503,
      "loss": 2.9746,
      "step": 41652
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.32205867767334,
      "learning_rate": 0.000552902073089692,
      "loss": 3.1575,
      "step": 41653
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9268828630447388,
      "learning_rate": 0.0005528998727416144,
      "loss": 2.7935,
      "step": 41654
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.1268866062164307,
      "learning_rate": 0.000552897672346518,
      "loss": 3.0101,
      "step": 41655
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7847890853881836,
      "learning_rate": 0.0005528954719044031,
      "loss": 2.9168,
      "step": 41656
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.820888876914978,
      "learning_rate": 0.0005528932714152702,
      "loss": 3.1271,
      "step": 41657
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6314737796783447,
      "learning_rate": 0.0005528910708791195,
      "loss": 2.8413,
      "step": 41658
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.545168399810791,
      "learning_rate": 0.0005528888702959518,
      "loss": 3.0112,
      "step": 41659
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.4463584423065186,
      "learning_rate": 0.0005528866696657672,
      "loss": 3.0406,
      "step": 41660
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.621553897857666,
      "learning_rate": 0.0005528844689885662,
      "loss": 2.9719,
      "step": 41661
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7722612619400024,
      "learning_rate": 0.0005528822682643492,
      "loss": 3.1673,
      "step": 41662
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2755255699157715,
      "learning_rate": 0.0005528800674931166,
      "loss": 3.0809,
      "step": 41663
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4652125835418701,
      "learning_rate": 0.0005528778666748688,
      "loss": 3.037,
      "step": 41664
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3614418506622314,
      "learning_rate": 0.0005528756658096063,
      "loss": 3.1109,
      "step": 41665
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5467547178268433,
      "learning_rate": 0.0005528734648973295,
      "loss": 2.9172,
      "step": 41666
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5587072372436523,
      "learning_rate": 0.0005528712639380386,
      "loss": 2.9803,
      "step": 41667
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5420938730239868,
      "learning_rate": 0.0005528690629317343,
      "loss": 3.1307,
      "step": 41668
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4253895282745361,
      "learning_rate": 0.0005528668618784167,
      "loss": 3.1679,
      "step": 41669
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6099255084991455,
      "learning_rate": 0.0005528646607780865,
      "loss": 3.2612,
      "step": 41670
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2033894062042236,
      "learning_rate": 0.0005528624596307439,
      "loss": 2.9177,
      "step": 41671
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5436310768127441,
      "learning_rate": 0.0005528602584363894,
      "loss": 3.065,
      "step": 41672
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8074803352355957,
      "learning_rate": 0.0005528580571950235,
      "loss": 3.0761,
      "step": 41673
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8677546977996826,
      "learning_rate": 0.0005528558559066464,
      "loss": 2.9624,
      "step": 41674
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4556905031204224,
      "learning_rate": 0.0005528536545712587,
      "loss": 2.9636,
      "step": 41675
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.702073574066162,
      "learning_rate": 0.0005528514531888607,
      "loss": 2.9606,
      "step": 41676
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7716068029403687,
      "learning_rate": 0.0005528492517594529,
      "loss": 3.2059,
      "step": 41677
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5408949851989746,
      "learning_rate": 0.0005528470502830356,
      "loss": 3.028,
      "step": 41678
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2410072088241577,
      "learning_rate": 0.0005528448487596093,
      "loss": 3.1079,
      "step": 41679
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.0858116149902344,
      "learning_rate": 0.0005528426471891743,
      "loss": 2.992,
      "step": 41680
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.469589948654175,
      "learning_rate": 0.0005528404455717312,
      "loss": 3.2421,
      "step": 41681
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8852039575576782,
      "learning_rate": 0.0005528382439072802,
      "loss": 3.1586,
      "step": 41682
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.623089551925659,
      "learning_rate": 0.0005528360421958217,
      "loss": 3.2998,
      "step": 41683
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9945164918899536,
      "learning_rate": 0.0005528338404373564,
      "loss": 3.2776,
      "step": 41684
    },
    {
      "epoch": 0.54,
      "grad_norm": 4.0309624671936035,
      "learning_rate": 0.0005528316386318844,
      "loss": 2.927,
      "step": 41685
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6031585931777954,
      "learning_rate": 0.0005528294367794063,
      "loss": 3.0582,
      "step": 41686
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4950716495513916,
      "learning_rate": 0.0005528272348799224,
      "loss": 2.9438,
      "step": 41687
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6312278509140015,
      "learning_rate": 0.0005528250329334331,
      "loss": 3.0554,
      "step": 41688
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3396151065826416,
      "learning_rate": 0.0005528228309399388,
      "loss": 2.9632,
      "step": 41689
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4130502939224243,
      "learning_rate": 0.0005528206288994401,
      "loss": 3.0451,
      "step": 41690
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6126185655593872,
      "learning_rate": 0.0005528184268119372,
      "loss": 3.0376,
      "step": 41691
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9180703163146973,
      "learning_rate": 0.0005528162246774307,
      "loss": 3.1108,
      "step": 41692
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.1123428344726562,
      "learning_rate": 0.0005528140224959208,
      "loss": 3.1149,
      "step": 41693
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.4427378177642822,
      "learning_rate": 0.000552811820267408,
      "loss": 3.2996,
      "step": 41694
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4685717821121216,
      "learning_rate": 0.0005528096179918927,
      "loss": 3.1149,
      "step": 41695
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3217419385910034,
      "learning_rate": 0.0005528074156693753,
      "loss": 3.0314,
      "step": 41696
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3063369989395142,
      "learning_rate": 0.0005528052132998563,
      "loss": 3.2421,
      "step": 41697
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4989687204360962,
      "learning_rate": 0.0005528030108833359,
      "loss": 3.0585,
      "step": 41698
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4403228759765625,
      "learning_rate": 0.0005528008084198148,
      "loss": 2.9702,
      "step": 41699
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8206197023391724,
      "learning_rate": 0.0005527986059092932,
      "loss": 3.1751,
      "step": 41700
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.424074411392212,
      "learning_rate": 0.0005527964033517715,
      "loss": 3.1806,
      "step": 41701
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9835841655731201,
      "learning_rate": 0.0005527942007472503,
      "loss": 2.9069,
      "step": 41702
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3344732522964478,
      "learning_rate": 0.0005527919980957298,
      "loss": 3.1254,
      "step": 41703
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2468576431274414,
      "learning_rate": 0.0005527897953972106,
      "loss": 2.8012,
      "step": 41704
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9195493459701538,
      "learning_rate": 0.0005527875926516929,
      "loss": 3.062,
      "step": 41705
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6100701093673706,
      "learning_rate": 0.0005527853898591772,
      "loss": 2.8782,
      "step": 41706
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2715413570404053,
      "learning_rate": 0.000552783187019664,
      "loss": 2.9581,
      "step": 41707
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4222863912582397,
      "learning_rate": 0.0005527809841331536,
      "loss": 3.0354,
      "step": 41708
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.609025478363037,
      "learning_rate": 0.0005527787811996465,
      "loss": 2.9476,
      "step": 41709
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3853566646575928,
      "learning_rate": 0.0005527765782191431,
      "loss": 3.1118,
      "step": 41710
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7051734924316406,
      "learning_rate": 0.0005527743751916435,
      "loss": 2.9576,
      "step": 41711
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4249145984649658,
      "learning_rate": 0.0005527721721171485,
      "loss": 3.0787,
      "step": 41712
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5899275541305542,
      "learning_rate": 0.0005527699689956585,
      "loss": 3.033,
      "step": 41713
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.750837802886963,
      "learning_rate": 0.0005527677658271737,
      "loss": 3.0195,
      "step": 41714
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4227347373962402,
      "learning_rate": 0.0005527655626116946,
      "loss": 2.9345,
      "step": 41715
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4821752309799194,
      "learning_rate": 0.0005527633593492217,
      "loss": 3.2369,
      "step": 41716
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5425937175750732,
      "learning_rate": 0.0005527611560397553,
      "loss": 3.1718,
      "step": 41717
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4437142610549927,
      "learning_rate": 0.0005527589526832956,
      "loss": 3.392,
      "step": 41718
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7987130880355835,
      "learning_rate": 0.0005527567492798435,
      "loss": 3.0416,
      "step": 41719
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.759331464767456,
      "learning_rate": 0.0005527545458293991,
      "loss": 3.352,
      "step": 41720
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9714235067367554,
      "learning_rate": 0.0005527523423319628,
      "loss": 3.1164,
      "step": 41721
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5082908868789673,
      "learning_rate": 0.0005527501387875351,
      "loss": 3.0763,
      "step": 41722
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.617479681968689,
      "learning_rate": 0.0005527479351961163,
      "loss": 3.0625,
      "step": 41723
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7230491638183594,
      "learning_rate": 0.000552745731557707,
      "loss": 3.0,
      "step": 41724
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5782934427261353,
      "learning_rate": 0.0005527435278723075,
      "loss": 3.2148,
      "step": 41725
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6036041975021362,
      "learning_rate": 0.0005527413241399181,
      "loss": 3.0098,
      "step": 41726
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.698667287826538,
      "learning_rate": 0.0005527391203605394,
      "loss": 3.017,
      "step": 41727
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.531179428100586,
      "learning_rate": 0.0005527369165341717,
      "loss": 3.2043,
      "step": 41728
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.343372106552124,
      "learning_rate": 0.0005527347126608154,
      "loss": 2.988,
      "step": 41729
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3556643724441528,
      "learning_rate": 0.000552732508740471,
      "loss": 3.0012,
      "step": 41730
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0948166847229004,
      "learning_rate": 0.0005527303047731389,
      "loss": 2.9323,
      "step": 41731
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.703938603401184,
      "learning_rate": 0.0005527281007588194,
      "loss": 3.0769,
      "step": 41732
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.748226523399353,
      "learning_rate": 0.000552725896697513,
      "loss": 3.1843,
      "step": 41733
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5229164361953735,
      "learning_rate": 0.00055272369258922,
      "loss": 3.1937,
      "step": 41734
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.845669150352478,
      "learning_rate": 0.000552721488433941,
      "loss": 3.5081,
      "step": 41735
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7551090717315674,
      "learning_rate": 0.0005527192842316763,
      "loss": 3.0844,
      "step": 41736
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5976678133010864,
      "learning_rate": 0.0005527170799824263,
      "loss": 3.2277,
      "step": 41737
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6884472370147705,
      "learning_rate": 0.0005527148756861914,
      "loss": 3.4186,
      "step": 41738
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5436832904815674,
      "learning_rate": 0.000552712671342972,
      "loss": 3.0923,
      "step": 41739
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3910959959030151,
      "learning_rate": 0.0005527104669527686,
      "loss": 3.0212,
      "step": 41740
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4269702434539795,
      "learning_rate": 0.0005527082625155816,
      "loss": 3.0619,
      "step": 41741
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.540576696395874,
      "learning_rate": 0.0005527060580314113,
      "loss": 3.2291,
      "step": 41742
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0669779777526855,
      "learning_rate": 0.0005527038535002581,
      "loss": 3.1109,
      "step": 41743
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.655110478401184,
      "learning_rate": 0.0005527016489221226,
      "loss": 3.1017,
      "step": 41744
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.54961359500885,
      "learning_rate": 0.000552699444297005,
      "loss": 3.2343,
      "step": 41745
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.12164306640625,
      "learning_rate": 0.0005526972396249059,
      "loss": 2.8378,
      "step": 41746
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4617464542388916,
      "learning_rate": 0.0005526950349058255,
      "loss": 3.0689,
      "step": 41747
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.47748601436615,
      "learning_rate": 0.0005526928301397645,
      "loss": 3.3095,
      "step": 41748
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.547078013420105,
      "learning_rate": 0.000552690625326723,
      "loss": 2.9131,
      "step": 41749
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4966446161270142,
      "learning_rate": 0.0005526884204667016,
      "loss": 2.9177,
      "step": 41750
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5756813287734985,
      "learning_rate": 0.0005526862155597006,
      "loss": 2.942,
      "step": 41751
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5771565437316895,
      "learning_rate": 0.0005526840106057204,
      "loss": 2.694,
      "step": 41752
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3872761726379395,
      "learning_rate": 0.0005526818056047616,
      "loss": 3.0893,
      "step": 41753
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7375229597091675,
      "learning_rate": 0.0005526796005568245,
      "loss": 3.0747,
      "step": 41754
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0648322105407715,
      "learning_rate": 0.0005526773954619093,
      "loss": 3.0755,
      "step": 41755
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8601527214050293,
      "learning_rate": 0.0005526751903200168,
      "loss": 3.2664,
      "step": 41756
    },
    {
      "epoch": 0.54,
      "grad_norm": 3.525892734527588,
      "learning_rate": 0.0005526729851311472,
      "loss": 2.9996,
      "step": 41757
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.868073582649231,
      "learning_rate": 0.0005526707798953008,
      "loss": 2.7383,
      "step": 41758
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6613739728927612,
      "learning_rate": 0.0005526685746124782,
      "loss": 3.1339,
      "step": 41759
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4146842956542969,
      "learning_rate": 0.0005526663692826798,
      "loss": 2.9414,
      "step": 41760
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.782198190689087,
      "learning_rate": 0.0005526641639059059,
      "loss": 3.123,
      "step": 41761
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9087024927139282,
      "learning_rate": 0.000552661958482157,
      "loss": 3.2116,
      "step": 41762
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.2461191415786743,
      "learning_rate": 0.0005526597530114334,
      "loss": 3.0514,
      "step": 41763
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.437792420387268,
      "learning_rate": 0.0005526575474937356,
      "loss": 3.0501,
      "step": 41764
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6364163160324097,
      "learning_rate": 0.000552655341929064,
      "loss": 3.0859,
      "step": 41765
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4431431293487549,
      "learning_rate": 0.000552653136317419,
      "loss": 3.0483,
      "step": 41766
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5124691724777222,
      "learning_rate": 0.000552650930658801,
      "loss": 2.9196,
      "step": 41767
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.960258960723877,
      "learning_rate": 0.0005526487249532105,
      "loss": 3.039,
      "step": 41768
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5529433488845825,
      "learning_rate": 0.0005526465192006477,
      "loss": 2.867,
      "step": 41769
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8744921684265137,
      "learning_rate": 0.0005526443134011134,
      "loss": 2.9437,
      "step": 41770
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6202785968780518,
      "learning_rate": 0.0005526421075546075,
      "loss": 3.1594,
      "step": 41771
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7236192226409912,
      "learning_rate": 0.0005526399016611307,
      "loss": 3.0719,
      "step": 41772
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.627069354057312,
      "learning_rate": 0.0005526376957206835,
      "loss": 3.095,
      "step": 41773
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.820850372314453,
      "learning_rate": 0.000552635489733266,
      "loss": 3.0082,
      "step": 41774
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6748850345611572,
      "learning_rate": 0.0005526332836988789,
      "loss": 2.8517,
      "step": 41775
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5776276588439941,
      "learning_rate": 0.0005526310776175225,
      "loss": 3.1571,
      "step": 41776
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9684011936187744,
      "learning_rate": 0.0005526288714891972,
      "loss": 3.1091,
      "step": 41777
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6146403551101685,
      "learning_rate": 0.0005526266653139034,
      "loss": 3.2977,
      "step": 41778
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3245126008987427,
      "learning_rate": 0.0005526244590916416,
      "loss": 3.1312,
      "step": 41779
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2990639209747314,
      "learning_rate": 0.0005526222528224121,
      "loss": 3.1919,
      "step": 41780
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.58870267868042,
      "learning_rate": 0.0005526200465062154,
      "loss": 2.9055,
      "step": 41781
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.388161063194275,
      "learning_rate": 0.0005526178401430517,
      "loss": 3.1811,
      "step": 41782
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5417062044143677,
      "learning_rate": 0.0005526156337329218,
      "loss": 3.2285,
      "step": 41783
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6910964250564575,
      "learning_rate": 0.0005526134272758258,
      "loss": 3.2223,
      "step": 41784
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.576136589050293,
      "learning_rate": 0.0005526112207717642,
      "loss": 3.0283,
      "step": 41785
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7392185926437378,
      "learning_rate": 0.0005526090142207373,
      "loss": 2.8556,
      "step": 41786
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7126691341400146,
      "learning_rate": 0.0005526068076227458,
      "loss": 3.0179,
      "step": 41787
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6650047302246094,
      "learning_rate": 0.0005526046009777898,
      "loss": 2.8728,
      "step": 41788
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7918542623519897,
      "learning_rate": 0.0005526023942858699,
      "loss": 2.9732,
      "step": 41789
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3882440328598022,
      "learning_rate": 0.0005526001875469864,
      "loss": 3.0901,
      "step": 41790
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.593648076057434,
      "learning_rate": 0.0005525979807611398,
      "loss": 3.0179,
      "step": 41791
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9041500091552734,
      "learning_rate": 0.0005525957739283304,
      "loss": 3.1328,
      "step": 41792
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.709209680557251,
      "learning_rate": 0.0005525935670485587,
      "loss": 3.0604,
      "step": 41793
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3422620296478271,
      "learning_rate": 0.0005525913601218251,
      "loss": 3.181,
      "step": 41794
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4783134460449219,
      "learning_rate": 0.0005525891531481301,
      "loss": 3.0746,
      "step": 41795
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5825003385543823,
      "learning_rate": 0.0005525869461274739,
      "loss": 2.9934,
      "step": 41796
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.1806912422180176,
      "learning_rate": 0.0005525847390598569,
      "loss": 2.8548,
      "step": 41797
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6027437448501587,
      "learning_rate": 0.0005525825319452798,
      "loss": 3.1943,
      "step": 41798
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.952759027481079,
      "learning_rate": 0.0005525803247837428,
      "loss": 2.9638,
      "step": 41799
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.129312515258789,
      "learning_rate": 0.0005525781175752464,
      "loss": 2.8907,
      "step": 41800
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6178187131881714,
      "learning_rate": 0.0005525759103197909,
      "loss": 2.8706,
      "step": 41801
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8889949321746826,
      "learning_rate": 0.0005525737030173767,
      "loss": 3.0344,
      "step": 41802
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.721153974533081,
      "learning_rate": 0.0005525714956680044,
      "loss": 3.0974,
      "step": 41803
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.527428388595581,
      "learning_rate": 0.0005525692882716743,
      "loss": 2.9906,
      "step": 41804
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8478972911834717,
      "learning_rate": 0.0005525670808283866,
      "loss": 2.9263,
      "step": 41805
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6039851903915405,
      "learning_rate": 0.0005525648733381422,
      "loss": 2.8655,
      "step": 41806
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4464982748031616,
      "learning_rate": 0.000552562665800941,
      "loss": 3.5514,
      "step": 41807
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.2904181480407715,
      "learning_rate": 0.0005525604582167839,
      "loss": 3.0256,
      "step": 41808
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4395089149475098,
      "learning_rate": 0.0005525582505856707,
      "loss": 3.0644,
      "step": 41809
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4799643754959106,
      "learning_rate": 0.0005525560429076023,
      "loss": 3.4916,
      "step": 41810
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4131739139556885,
      "learning_rate": 0.000552553835182579,
      "loss": 2.8616,
      "step": 41811
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.0012032985687256,
      "learning_rate": 0.0005525516274106013,
      "loss": 2.8997,
      "step": 41812
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.725680947303772,
      "learning_rate": 0.0005525494195916693,
      "loss": 2.9647,
      "step": 41813
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8645964860916138,
      "learning_rate": 0.0005525472117257836,
      "loss": 3.1048,
      "step": 41814
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9206466674804688,
      "learning_rate": 0.0005525450038129446,
      "loss": 3.4047,
      "step": 41815
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.745780110359192,
      "learning_rate": 0.0005525427958531529,
      "loss": 3.1004,
      "step": 41816
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4203037023544312,
      "learning_rate": 0.0005525405878464085,
      "loss": 3.4408,
      "step": 41817
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4350171089172363,
      "learning_rate": 0.0005525383797927121,
      "loss": 3.1262,
      "step": 41818
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5726101398468018,
      "learning_rate": 0.0005525361716920642,
      "loss": 3.1422,
      "step": 41819
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4001842737197876,
      "learning_rate": 0.0005525339635444648,
      "loss": 3.3489,
      "step": 41820
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5036273002624512,
      "learning_rate": 0.0005525317553499148,
      "loss": 2.9728,
      "step": 41821
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4722108840942383,
      "learning_rate": 0.0005525295471084142,
      "loss": 3.0856,
      "step": 41822
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4133976697921753,
      "learning_rate": 0.0005525273388199638,
      "loss": 2.9407,
      "step": 41823
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4092469215393066,
      "learning_rate": 0.0005525251304845636,
      "loss": 2.9392,
      "step": 41824
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.296077013015747,
      "learning_rate": 0.0005525229221022142,
      "loss": 3.0603,
      "step": 41825
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.744642734527588,
      "learning_rate": 0.0005525207136729161,
      "loss": 2.9854,
      "step": 41826
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.054163932800293,
      "learning_rate": 0.0005525185051966697,
      "loss": 3.0135,
      "step": 41827
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4299125671386719,
      "learning_rate": 0.0005525162966734752,
      "loss": 3.0859,
      "step": 41828
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.234743356704712,
      "learning_rate": 0.0005525140881033333,
      "loss": 3.0809,
      "step": 41829
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4357962608337402,
      "learning_rate": 0.0005525118794862441,
      "loss": 3.1337,
      "step": 41830
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6195145845413208,
      "learning_rate": 0.0005525096708222084,
      "loss": 3.2432,
      "step": 41831
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.312617301940918,
      "learning_rate": 0.0005525074621112263,
      "loss": 2.8334,
      "step": 41832
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5356719493865967,
      "learning_rate": 0.0005525052533532982,
      "loss": 3.0696,
      "step": 41833
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3532603979110718,
      "learning_rate": 0.0005525030445484247,
      "loss": 3.1338,
      "step": 41834
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8257420063018799,
      "learning_rate": 0.000552500835696606,
      "loss": 3.0873,
      "step": 41835
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4367002248764038,
      "learning_rate": 0.0005524986267978427,
      "loss": 2.9809,
      "step": 41836
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3566049337387085,
      "learning_rate": 0.0005524964178521351,
      "loss": 3.058,
      "step": 41837
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3518095016479492,
      "learning_rate": 0.0005524942088594838,
      "loss": 3.3071,
      "step": 41838
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.5480151176452637,
      "learning_rate": 0.0005524919998198889,
      "loss": 3.1197,
      "step": 41839
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4108768701553345,
      "learning_rate": 0.0005524897907333511,
      "loss": 3.2634,
      "step": 41840
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9006929397583008,
      "learning_rate": 0.0005524875815998705,
      "loss": 3.0088,
      "step": 41841
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.924209713935852,
      "learning_rate": 0.000552485372419448,
      "loss": 2.8164,
      "step": 41842
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3161537647247314,
      "learning_rate": 0.0005524831631920834,
      "loss": 3.0398,
      "step": 41843
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.6346372365951538,
      "learning_rate": 0.0005524809539177776,
      "loss": 2.9665,
      "step": 41844
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.3998761177062988,
      "learning_rate": 0.0005524787445965307,
      "loss": 3.253,
      "step": 41845
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.4202467203140259,
      "learning_rate": 0.0005524765352283432,
      "loss": 3.263,
      "step": 41846
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.499788761138916,
      "learning_rate": 0.0005524743258132157,
      "loss": 3.1754,
      "step": 41847
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.7167422771453857,
      "learning_rate": 0.0005524721163511484,
      "loss": 3.5221,
      "step": 41848
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.5990538597106934,
      "learning_rate": 0.0005524699068421417,
      "loss": 3.1244,
      "step": 41849
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8241581916809082,
      "learning_rate": 0.0005524676972861962,
      "loss": 3.3092,
      "step": 41850
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.401185393333435,
      "learning_rate": 0.0005524654876833121,
      "loss": 3.0339,
      "step": 41851
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5504167079925537,
      "learning_rate": 0.0005524632780334899,
      "loss": 3.0021,
      "step": 41852
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.5171147584915161,
      "learning_rate": 0.00055246106833673,
      "loss": 2.948,
      "step": 41853
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.9776675701141357,
      "learning_rate": 0.000552458858593033,
      "loss": 3.0102,
      "step": 41854
    },
    {
      "epoch": 0.54,
      "grad_norm": 2.079476833343506,
      "learning_rate": 0.0005524566488023988,
      "loss": 2.9893,
      "step": 41855
    },
    {
      "epoch": 0.54,
      "grad_norm": 1.8189386129379272,
      "learning_rate": 0.0005524544389648284,
      "loss": 3.0265,
      "step": 41856
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.783381700515747,
      "learning_rate": 0.0005524522290803218,
      "loss": 3.1763,
      "step": 41857
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.5290188789367676,
      "learning_rate": 0.0005524500191488798,
      "loss": 2.9315,
      "step": 41858
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.942411184310913,
      "learning_rate": 0.0005524478091705024,
      "loss": 2.8941,
      "step": 41859
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4378206729888916,
      "learning_rate": 0.0005524455991451902,
      "loss": 2.9813,
      "step": 41860
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7598357200622559,
      "learning_rate": 0.0005524433890729436,
      "loss": 3.2471,
      "step": 41861
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.7412073612213135,
      "learning_rate": 0.000552441178953763,
      "loss": 2.9566,
      "step": 41862
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5668580532073975,
      "learning_rate": 0.0005524389687876488,
      "loss": 2.9631,
      "step": 41863
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3597586154937744,
      "learning_rate": 0.0005524367585746015,
      "loss": 3.1036,
      "step": 41864
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.813175916671753,
      "learning_rate": 0.0005524345483146214,
      "loss": 3.0494,
      "step": 41865
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.638503909111023,
      "learning_rate": 0.000552432338007709,
      "loss": 3.1749,
      "step": 41866
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.0273563861846924,
      "learning_rate": 0.0005524301276538646,
      "loss": 3.0412,
      "step": 41867
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.0607430934906006,
      "learning_rate": 0.0005524279172530888,
      "loss": 3.0902,
      "step": 41868
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.919420838356018,
      "learning_rate": 0.0005524257068053817,
      "loss": 2.9883,
      "step": 41869
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6426185369491577,
      "learning_rate": 0.0005524234963107439,
      "loss": 2.9035,
      "step": 41870
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7878735065460205,
      "learning_rate": 0.000552421285769176,
      "loss": 3.0136,
      "step": 41871
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.957680106163025,
      "learning_rate": 0.000552419075180678,
      "loss": 2.7226,
      "step": 41872
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1041617393493652,
      "learning_rate": 0.0005524168645452507,
      "loss": 3.1103,
      "step": 41873
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.922074794769287,
      "learning_rate": 0.0005524146538628944,
      "loss": 2.9707,
      "step": 41874
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7037969827651978,
      "learning_rate": 0.0005524124431336092,
      "loss": 2.8375,
      "step": 41875
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7175880670547485,
      "learning_rate": 0.0005524102323573959,
      "loss": 3.015,
      "step": 41876
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7464722394943237,
      "learning_rate": 0.0005524080215342547,
      "loss": 2.9671,
      "step": 41877
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.348335027694702,
      "learning_rate": 0.0005524058106641862,
      "loss": 3.1073,
      "step": 41878
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6918545961380005,
      "learning_rate": 0.0005524035997471906,
      "loss": 3.3162,
      "step": 41879
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.056722402572632,
      "learning_rate": 0.0005524013887832685,
      "loss": 3.1557,
      "step": 41880
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6169124841690063,
      "learning_rate": 0.0005523991777724201,
      "loss": 3.0718,
      "step": 41881
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.204730272293091,
      "learning_rate": 0.0005523969667146461,
      "loss": 2.9822,
      "step": 41882
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7343533039093018,
      "learning_rate": 0.0005523947556099466,
      "loss": 3.1789,
      "step": 41883
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2992383241653442,
      "learning_rate": 0.0005523925444583221,
      "loss": 2.9502,
      "step": 41884
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.4274051189422607,
      "learning_rate": 0.0005523903332597732,
      "loss": 2.9716,
      "step": 41885
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9836673736572266,
      "learning_rate": 0.0005523881220143002,
      "loss": 2.8294,
      "step": 41886
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.562083125114441,
      "learning_rate": 0.0005523859107219033,
      "loss": 3.0816,
      "step": 41887
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.471018671989441,
      "learning_rate": 0.0005523836993825832,
      "loss": 2.9176,
      "step": 41888
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.71627938747406,
      "learning_rate": 0.0005523814879963403,
      "loss": 2.9964,
      "step": 41889
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9585341215133667,
      "learning_rate": 0.0005523792765631748,
      "loss": 2.8062,
      "step": 41890
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5442962646484375,
      "learning_rate": 0.0005523770650830872,
      "loss": 2.7888,
      "step": 41891
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.8193211555480957,
      "learning_rate": 0.000552374853556078,
      "loss": 3.0745,
      "step": 41892
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6886647939682007,
      "learning_rate": 0.0005523726419821475,
      "loss": 3.0837,
      "step": 41893
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.02175235748291,
      "learning_rate": 0.0005523704303612962,
      "loss": 3.0059,
      "step": 41894
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1440587043762207,
      "learning_rate": 0.0005523682186935246,
      "loss": 2.9768,
      "step": 41895
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.642962694168091,
      "learning_rate": 0.0005523660069788329,
      "loss": 3.0017,
      "step": 41896
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.0086822509765625,
      "learning_rate": 0.0005523637952172214,
      "loss": 3.2726,
      "step": 41897
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.152519941329956,
      "learning_rate": 0.000552361583408691,
      "loss": 2.9951,
      "step": 41898
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.252082109451294,
      "learning_rate": 0.0005523593715532416,
      "loss": 3.1272,
      "step": 41899
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8027937412261963,
      "learning_rate": 0.000552357159650874,
      "loss": 3.1387,
      "step": 41900
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.528407335281372,
      "learning_rate": 0.0005523549477015882,
      "loss": 3.289,
      "step": 41901
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8566070795059204,
      "learning_rate": 0.0005523527357053851,
      "loss": 3.1051,
      "step": 41902
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4440711736679077,
      "learning_rate": 0.0005523505236622648,
      "loss": 3.0647,
      "step": 41903
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4915469884872437,
      "learning_rate": 0.0005523483115722277,
      "loss": 3.1022,
      "step": 41904
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4444197416305542,
      "learning_rate": 0.0005523460994352744,
      "loss": 2.886,
      "step": 41905
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8419198989868164,
      "learning_rate": 0.000552343887251405,
      "loss": 2.9524,
      "step": 41906
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3508042097091675,
      "learning_rate": 0.0005523416750206202,
      "loss": 3.0428,
      "step": 41907
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.154003858566284,
      "learning_rate": 0.0005523394627429204,
      "loss": 3.1217,
      "step": 41908
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.798429250717163,
      "learning_rate": 0.0005523372504183059,
      "loss": 2.9545,
      "step": 41909
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5978541374206543,
      "learning_rate": 0.0005523350380467771,
      "loss": 3.1788,
      "step": 41910
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7099450826644897,
      "learning_rate": 0.0005523328256283345,
      "loss": 3.1575,
      "step": 41911
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.599277138710022,
      "learning_rate": 0.0005523306131629785,
      "loss": 2.8822,
      "step": 41912
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.846122145652771,
      "learning_rate": 0.0005523284006507094,
      "loss": 3.1561,
      "step": 41913
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.745717167854309,
      "learning_rate": 0.0005523261880915276,
      "loss": 3.0982,
      "step": 41914
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.806046485900879,
      "learning_rate": 0.0005523239754854338,
      "loss": 3.082,
      "step": 41915
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.023564577102661,
      "learning_rate": 0.000552321762832428,
      "loss": 2.9585,
      "step": 41916
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.918088436126709,
      "learning_rate": 0.0005523195501325111,
      "loss": 3.0941,
      "step": 41917
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3591108322143555,
      "learning_rate": 0.000552317337385683,
      "loss": 2.9344,
      "step": 41918
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5492022037506104,
      "learning_rate": 0.0005523151245919443,
      "loss": 3.1643,
      "step": 41919
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8200945854187012,
      "learning_rate": 0.0005523129117512956,
      "loss": 3.2289,
      "step": 41920
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.757599115371704,
      "learning_rate": 0.000552310698863737,
      "loss": 2.952,
      "step": 41921
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5624979734420776,
      "learning_rate": 0.0005523084859292693,
      "loss": 3.0074,
      "step": 41922
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5495319366455078,
      "learning_rate": 0.0005523062729478926,
      "loss": 3.0747,
      "step": 41923
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7411836385726929,
      "learning_rate": 0.0005523040599196075,
      "loss": 2.7925,
      "step": 41924
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.087893009185791,
      "learning_rate": 0.0005523018468444141,
      "loss": 3.3224,
      "step": 41925
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.305570363998413,
      "learning_rate": 0.0005522996337223131,
      "loss": 3.2165,
      "step": 41926
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5903319120407104,
      "learning_rate": 0.000552297420553305,
      "loss": 3.1928,
      "step": 41927
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.3863041400909424,
      "learning_rate": 0.0005522952073373899,
      "loss": 3.0792,
      "step": 41928
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.9693617820739746,
      "learning_rate": 0.0005522929940745683,
      "loss": 2.9551,
      "step": 41929
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2883285284042358,
      "learning_rate": 0.0005522907807648408,
      "loss": 3.0065,
      "step": 41930
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1809916496276855,
      "learning_rate": 0.0005522885674082077,
      "loss": 3.1402,
      "step": 41931
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.7830052375793457,
      "learning_rate": 0.0005522863540046693,
      "loss": 2.9666,
      "step": 41932
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1910476684570312,
      "learning_rate": 0.0005522841405542261,
      "loss": 3.2112,
      "step": 41933
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3458243608474731,
      "learning_rate": 0.0005522819270568786,
      "loss": 3.1043,
      "step": 41934
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5569158792495728,
      "learning_rate": 0.0005522797135126269,
      "loss": 2.9533,
      "step": 41935
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5898240804672241,
      "learning_rate": 0.000552277499921472,
      "loss": 3.0418,
      "step": 41936
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6743385791778564,
      "learning_rate": 0.0005522752862834137,
      "loss": 3.103,
      "step": 41937
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3308285474777222,
      "learning_rate": 0.0005522730725984527,
      "loss": 3.0025,
      "step": 41938
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8595976829528809,
      "learning_rate": 0.0005522708588665894,
      "loss": 2.9259,
      "step": 41939
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.338315486907959,
      "learning_rate": 0.0005522686450878242,
      "loss": 3.1325,
      "step": 41940
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4170968532562256,
      "learning_rate": 0.0005522664312621575,
      "loss": 2.7823,
      "step": 41941
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3272265195846558,
      "learning_rate": 0.0005522642173895896,
      "loss": 3.1141,
      "step": 41942
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3972314596176147,
      "learning_rate": 0.0005522620034701212,
      "loss": 3.1316,
      "step": 41943
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4828616380691528,
      "learning_rate": 0.0005522597895037523,
      "loss": 2.9744,
      "step": 41944
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.331430196762085,
      "learning_rate": 0.0005522575754904838,
      "loss": 3.1403,
      "step": 41945
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4709540605545044,
      "learning_rate": 0.0005522553614303158,
      "loss": 3.0597,
      "step": 41946
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.432663917541504,
      "learning_rate": 0.0005522531473232486,
      "loss": 3.1654,
      "step": 41947
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2761354446411133,
      "learning_rate": 0.0005522509331692828,
      "loss": 3.1937,
      "step": 41948
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.624461054801941,
      "learning_rate": 0.000552248718968419,
      "loss": 2.9972,
      "step": 41949
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8236366510391235,
      "learning_rate": 0.0005522465047206572,
      "loss": 3.1351,
      "step": 41950
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.40390145778656,
      "learning_rate": 0.000552244290425998,
      "loss": 3.0293,
      "step": 41951
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4694031476974487,
      "learning_rate": 0.0005522420760844421,
      "loss": 3.0463,
      "step": 41952
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2040306329727173,
      "learning_rate": 0.0005522398616959894,
      "loss": 3.0942,
      "step": 41953
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.298490285873413,
      "learning_rate": 0.0005522376472606406,
      "loss": 3.2464,
      "step": 41954
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3998538255691528,
      "learning_rate": 0.000552235432778396,
      "loss": 3.0817,
      "step": 41955
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4060440063476562,
      "learning_rate": 0.0005522332182492562,
      "loss": 2.8977,
      "step": 41956
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.418690800666809,
      "learning_rate": 0.0005522310036732214,
      "loss": 3.0104,
      "step": 41957
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9987610578536987,
      "learning_rate": 0.0005522287890502921,
      "loss": 3.1062,
      "step": 41958
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.556299090385437,
      "learning_rate": 0.0005522265743804687,
      "loss": 3.0963,
      "step": 41959
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.015662670135498,
      "learning_rate": 0.0005522243596637516,
      "loss": 3.074,
      "step": 41960
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5697318315505981,
      "learning_rate": 0.0005522221449001414,
      "loss": 2.8756,
      "step": 41961
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4675037860870361,
      "learning_rate": 0.0005522199300896381,
      "loss": 2.8719,
      "step": 41962
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5158085823059082,
      "learning_rate": 0.0005522177152322425,
      "loss": 3.144,
      "step": 41963
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3006993532180786,
      "learning_rate": 0.0005522155003279547,
      "loss": 3.2348,
      "step": 41964
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2559689283370972,
      "learning_rate": 0.0005522132853767756,
      "loss": 3.0442,
      "step": 41965
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8973380327224731,
      "learning_rate": 0.0005522110703787051,
      "loss": 2.9202,
      "step": 41966
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8374303579330444,
      "learning_rate": 0.0005522088553337437,
      "loss": 2.9327,
      "step": 41967
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5543079376220703,
      "learning_rate": 0.000552206640241892,
      "loss": 3.2508,
      "step": 41968
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5274677276611328,
      "learning_rate": 0.0005522044251031503,
      "loss": 3.0295,
      "step": 41969
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4071052074432373,
      "learning_rate": 0.0005522022099175191,
      "loss": 2.8747,
      "step": 41970
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5712705850601196,
      "learning_rate": 0.0005521999946849987,
      "loss": 3.04,
      "step": 41971
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.353690505027771,
      "learning_rate": 0.0005521977794055896,
      "loss": 3.3618,
      "step": 41972
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.4184377193450928,
      "learning_rate": 0.0005521955640792922,
      "loss": 3.227,
      "step": 41973
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.242357015609741,
      "learning_rate": 0.0005521933487061067,
      "loss": 3.0365,
      "step": 41974
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7241283655166626,
      "learning_rate": 0.000552191133286034,
      "loss": 3.1303,
      "step": 41975
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5349664688110352,
      "learning_rate": 0.000552188917819074,
      "loss": 3.2492,
      "step": 41976
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3955879211425781,
      "learning_rate": 0.0005521867023052273,
      "loss": 3.2554,
      "step": 41977
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.443878173828125,
      "learning_rate": 0.0005521844867444944,
      "loss": 3.2628,
      "step": 41978
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9356474876403809,
      "learning_rate": 0.0005521822711368757,
      "loss": 2.9064,
      "step": 41979
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4795106649398804,
      "learning_rate": 0.0005521800554823714,
      "loss": 3.1312,
      "step": 41980
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5067198276519775,
      "learning_rate": 0.0005521778397809821,
      "loss": 3.0591,
      "step": 41981
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3555915355682373,
      "learning_rate": 0.0005521756240327083,
      "loss": 3.1437,
      "step": 41982
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3657782077789307,
      "learning_rate": 0.0005521734082375503,
      "loss": 3.1747,
      "step": 41983
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4091547727584839,
      "learning_rate": 0.0005521711923955083,
      "loss": 3.1873,
      "step": 41984
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.045405387878418,
      "learning_rate": 0.0005521689765065832,
      "loss": 3.0663,
      "step": 41985
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6394460201263428,
      "learning_rate": 0.0005521667605707749,
      "loss": 2.7505,
      "step": 41986
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7733805179595947,
      "learning_rate": 0.0005521645445880841,
      "loss": 2.9603,
      "step": 41987
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5184941291809082,
      "learning_rate": 0.000552162328558511,
      "loss": 3.0032,
      "step": 41988
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5160558223724365,
      "learning_rate": 0.0005521601124820565,
      "loss": 2.9475,
      "step": 41989
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.291114330291748,
      "learning_rate": 0.0005521578963587205,
      "loss": 2.9246,
      "step": 41990
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8458350896835327,
      "learning_rate": 0.0005521556801885035,
      "loss": 3.216,
      "step": 41991
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.419395923614502,
      "learning_rate": 0.0005521534639714061,
      "loss": 3.0129,
      "step": 41992
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7193115949630737,
      "learning_rate": 0.0005521512477074286,
      "loss": 3.0715,
      "step": 41993
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5940481424331665,
      "learning_rate": 0.0005521490313965715,
      "loss": 3.0697,
      "step": 41994
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.685571312904358,
      "learning_rate": 0.000552146815038835,
      "loss": 3.2701,
      "step": 41995
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3357512950897217,
      "learning_rate": 0.0005521445986342197,
      "loss": 2.9519,
      "step": 41996
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.45473051071167,
      "learning_rate": 0.0005521423821827259,
      "loss": 3.1524,
      "step": 41997
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9845702648162842,
      "learning_rate": 0.0005521401656843543,
      "loss": 2.9441,
      "step": 41998
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.007882595062256,
      "learning_rate": 0.0005521379491391048,
      "loss": 2.9844,
      "step": 41999
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.8643481731414795,
      "learning_rate": 0.0005521357325469783,
      "loss": 3.2656,
      "step": 42000
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9184931516647339,
      "learning_rate": 0.0005521335159079749,
      "loss": 3.2874,
      "step": 42001
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.5482637882232666,
      "learning_rate": 0.0005521312992220951,
      "loss": 3.1433,
      "step": 42002
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.570880174636841,
      "learning_rate": 0.0005521290824893394,
      "loss": 3.0823,
      "step": 42003
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.057910680770874,
      "learning_rate": 0.0005521268657097081,
      "loss": 3.2392,
      "step": 42004
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8033229112625122,
      "learning_rate": 0.0005521246488832017,
      "loss": 2.8664,
      "step": 42005
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8812707662582397,
      "learning_rate": 0.0005521224320098205,
      "loss": 3.0401,
      "step": 42006
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.5926201343536377,
      "learning_rate": 0.0005521202150895651,
      "loss": 3.1544,
      "step": 42007
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4297457933425903,
      "learning_rate": 0.0005521179981224357,
      "loss": 2.9503,
      "step": 42008
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4753947257995605,
      "learning_rate": 0.0005521157811084328,
      "loss": 3.1656,
      "step": 42009
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7271332740783691,
      "learning_rate": 0.0005521135640475568,
      "loss": 3.0785,
      "step": 42010
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.375466227531433,
      "learning_rate": 0.0005521113469398081,
      "loss": 3.0003,
      "step": 42011
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8681970834732056,
      "learning_rate": 0.0005521091297851873,
      "loss": 3.0619,
      "step": 42012
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4825608730316162,
      "learning_rate": 0.0005521069125836945,
      "loss": 2.8066,
      "step": 42013
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4586093425750732,
      "learning_rate": 0.0005521046953353304,
      "loss": 3.2578,
      "step": 42014
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.0244882106781006,
      "learning_rate": 0.0005521024780400952,
      "loss": 2.8848,
      "step": 42015
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.544453740119934,
      "learning_rate": 0.0005521002606979894,
      "loss": 3.0632,
      "step": 42016
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7612680196762085,
      "learning_rate": 0.0005520980433090135,
      "loss": 3.1388,
      "step": 42017
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.726855993270874,
      "learning_rate": 0.0005520958258731677,
      "loss": 2.9798,
      "step": 42018
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1006267070770264,
      "learning_rate": 0.0005520936083904525,
      "loss": 2.9234,
      "step": 42019
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.60300874710083,
      "learning_rate": 0.0005520913908608684,
      "loss": 3.0715,
      "step": 42020
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4253357648849487,
      "learning_rate": 0.0005520891732844158,
      "loss": 3.0485,
      "step": 42021
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.691827416419983,
      "learning_rate": 0.0005520869556610949,
      "loss": 3.0744,
      "step": 42022
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.592088222503662,
      "learning_rate": 0.0005520847379909063,
      "loss": 2.9357,
      "step": 42023
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.287980079650879,
      "learning_rate": 0.0005520825202738505,
      "loss": 2.9215,
      "step": 42024
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9979537725448608,
      "learning_rate": 0.0005520803025099278,
      "loss": 2.7751,
      "step": 42025
    },
    {
      "epoch": 0.55,
      "grad_norm": 5.160745143890381,
      "learning_rate": 0.0005520780846991386,
      "loss": 3.2272,
      "step": 42026
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1409990787506104,
      "learning_rate": 0.0005520758668414832,
      "loss": 3.2105,
      "step": 42027
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.9012458324432373,
      "learning_rate": 0.0005520736489369624,
      "loss": 3.2499,
      "step": 42028
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.327786922454834,
      "learning_rate": 0.0005520714309855761,
      "loss": 3.1592,
      "step": 42029
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.228439450263977,
      "learning_rate": 0.000552069212987325,
      "loss": 3.0515,
      "step": 42030
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4566506147384644,
      "learning_rate": 0.0005520669949422096,
      "loss": 2.975,
      "step": 42031
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6226383447647095,
      "learning_rate": 0.0005520647768502301,
      "loss": 2.8971,
      "step": 42032
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.525817394256592,
      "learning_rate": 0.000552062558711387,
      "loss": 2.9896,
      "step": 42033
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4782826900482178,
      "learning_rate": 0.0005520603405256808,
      "loss": 3.0224,
      "step": 42034
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6735970973968506,
      "learning_rate": 0.0005520581222931117,
      "loss": 3.1488,
      "step": 42035
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8086345195770264,
      "learning_rate": 0.0005520559040136803,
      "loss": 3.1132,
      "step": 42036
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.0950326919555664,
      "learning_rate": 0.000552053685687387,
      "loss": 3.1411,
      "step": 42037
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.36214542388916,
      "learning_rate": 0.0005520514673142321,
      "loss": 3.1184,
      "step": 42038
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7527763843536377,
      "learning_rate": 0.0005520492488942161,
      "loss": 2.8911,
      "step": 42039
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4946599006652832,
      "learning_rate": 0.0005520470304273394,
      "loss": 2.9128,
      "step": 42040
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.180542230606079,
      "learning_rate": 0.0005520448119136022,
      "loss": 3.115,
      "step": 42041
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3875335454940796,
      "learning_rate": 0.0005520425933530054,
      "loss": 3.1448,
      "step": 42042
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3178068399429321,
      "learning_rate": 0.000552040374745549,
      "loss": 2.8804,
      "step": 42043
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6772661209106445,
      "learning_rate": 0.0005520381560912336,
      "loss": 3.3771,
      "step": 42044
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2256262302398682,
      "learning_rate": 0.0005520359373900594,
      "loss": 3.1453,
      "step": 42045
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.346113681793213,
      "learning_rate": 0.0005520337186420271,
      "loss": 3.2407,
      "step": 42046
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.005600929260254,
      "learning_rate": 0.0005520314998471369,
      "loss": 2.9533,
      "step": 42047
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.338568091392517,
      "learning_rate": 0.0005520292810053893,
      "loss": 3.121,
      "step": 42048
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4105143547058105,
      "learning_rate": 0.0005520270621167848,
      "loss": 2.9297,
      "step": 42049
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.468425989151001,
      "learning_rate": 0.0005520248431813235,
      "loss": 3.405,
      "step": 42050
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.133962392807007,
      "learning_rate": 0.0005520226241990062,
      "loss": 3.0066,
      "step": 42051
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.794134497642517,
      "learning_rate": 0.0005520204051698331,
      "loss": 3.2475,
      "step": 42052
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8510358333587646,
      "learning_rate": 0.0005520181860938046,
      "loss": 3.1172,
      "step": 42053
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5026644468307495,
      "learning_rate": 0.0005520159669709212,
      "loss": 2.9401,
      "step": 42054
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3883211612701416,
      "learning_rate": 0.0005520137478011832,
      "loss": 2.9818,
      "step": 42055
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.06266713142395,
      "learning_rate": 0.0005520115285845912,
      "loss": 3.1377,
      "step": 42056
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.605323314666748,
      "learning_rate": 0.0005520093093211454,
      "loss": 3.0708,
      "step": 42057
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8484385013580322,
      "learning_rate": 0.0005520070900108465,
      "loss": 2.8153,
      "step": 42058
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7360718250274658,
      "learning_rate": 0.0005520048706536946,
      "loss": 3.0812,
      "step": 42059
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2080368995666504,
      "learning_rate": 0.0005520026512496901,
      "loss": 3.0834,
      "step": 42060
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.527816653251648,
      "learning_rate": 0.0005520004317988336,
      "loss": 3.1776,
      "step": 42061
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3873083591461182,
      "learning_rate": 0.0005519982123011256,
      "loss": 3.2211,
      "step": 42062
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5684329271316528,
      "learning_rate": 0.0005519959927565664,
      "loss": 2.9005,
      "step": 42063
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1102240085601807,
      "learning_rate": 0.0005519937731651563,
      "loss": 3.2202,
      "step": 42064
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5478174686431885,
      "learning_rate": 0.0005519915535268958,
      "loss": 3.1024,
      "step": 42065
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4953382015228271,
      "learning_rate": 0.0005519893338417853,
      "loss": 2.9273,
      "step": 42066
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9205700159072876,
      "learning_rate": 0.0005519871141098253,
      "loss": 3.4066,
      "step": 42067
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.6683781147003174,
      "learning_rate": 0.0005519848943310161,
      "loss": 2.9529,
      "step": 42068
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7578836679458618,
      "learning_rate": 0.000551982674505358,
      "loss": 2.8297,
      "step": 42069
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5637116432189941,
      "learning_rate": 0.0005519804546328519,
      "loss": 3.2467,
      "step": 42070
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.242034912109375,
      "learning_rate": 0.0005519782347134976,
      "loss": 3.1845,
      "step": 42071
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6408889293670654,
      "learning_rate": 0.0005519760147472958,
      "loss": 3.0661,
      "step": 42072
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6799808740615845,
      "learning_rate": 0.000551973794734247,
      "loss": 2.8381,
      "step": 42073
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2580559253692627,
      "learning_rate": 0.0005519715746743515,
      "loss": 3.3727,
      "step": 42074
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9052839279174805,
      "learning_rate": 0.0005519693545676097,
      "loss": 3.001,
      "step": 42075
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8769110441207886,
      "learning_rate": 0.0005519671344140221,
      "loss": 3.0595,
      "step": 42076
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3842353820800781,
      "learning_rate": 0.000551964914213589,
      "loss": 3.0585,
      "step": 42077
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5020997524261475,
      "learning_rate": 0.0005519626939663109,
      "loss": 2.9531,
      "step": 42078
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.097217321395874,
      "learning_rate": 0.0005519604736721881,
      "loss": 2.856,
      "step": 42079
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2972208261489868,
      "learning_rate": 0.0005519582533312212,
      "loss": 3.1023,
      "step": 42080
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5673832893371582,
      "learning_rate": 0.0005519560329434104,
      "loss": 3.2676,
      "step": 42081
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.851379156112671,
      "learning_rate": 0.0005519538125087562,
      "loss": 2.8761,
      "step": 42082
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9334403276443481,
      "learning_rate": 0.0005519515920272592,
      "loss": 2.9165,
      "step": 42083
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6361958980560303,
      "learning_rate": 0.0005519493714989196,
      "loss": 2.9531,
      "step": 42084
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.585524082183838,
      "learning_rate": 0.0005519471509237377,
      "loss": 3.0293,
      "step": 42085
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4862948656082153,
      "learning_rate": 0.0005519449303017141,
      "loss": 3.3078,
      "step": 42086
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.258106231689453,
      "learning_rate": 0.0005519427096328494,
      "loss": 3.1779,
      "step": 42087
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8505573272705078,
      "learning_rate": 0.0005519404889171435,
      "loss": 3.1387,
      "step": 42088
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1863036155700684,
      "learning_rate": 0.0005519382681545973,
      "loss": 3.0526,
      "step": 42089
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.209064245223999,
      "learning_rate": 0.000551936047345211,
      "loss": 3.1735,
      "step": 42090
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4655894041061401,
      "learning_rate": 0.000551933826488985,
      "loss": 3.2004,
      "step": 42091
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5245864391326904,
      "learning_rate": 0.0005519316055859197,
      "loss": 2.9487,
      "step": 42092
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4486043453216553,
      "learning_rate": 0.0005519293846360157,
      "loss": 2.9906,
      "step": 42093
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6673576831817627,
      "learning_rate": 0.0005519271636392732,
      "loss": 3.0628,
      "step": 42094
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1002917289733887,
      "learning_rate": 0.0005519249425956925,
      "loss": 2.8644,
      "step": 42095
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6342930793762207,
      "learning_rate": 0.0005519227215052744,
      "loss": 3.092,
      "step": 42096
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.0303354263305664,
      "learning_rate": 0.0005519205003680191,
      "loss": 3.1482,
      "step": 42097
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.0958099365234375,
      "learning_rate": 0.000551918279183927,
      "loss": 3.0456,
      "step": 42098
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7314239740371704,
      "learning_rate": 0.0005519160579529986,
      "loss": 3.2466,
      "step": 42099
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.572554588317871,
      "learning_rate": 0.0005519138366752341,
      "loss": 3.0966,
      "step": 42100
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2983767986297607,
      "learning_rate": 0.0005519116153506341,
      "loss": 3.2756,
      "step": 42101
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.0355043411254883,
      "learning_rate": 0.0005519093939791991,
      "loss": 3.1002,
      "step": 42102
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.616965413093567,
      "learning_rate": 0.0005519071725609292,
      "loss": 3.0354,
      "step": 42103
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.0762994289398193,
      "learning_rate": 0.0005519049510958252,
      "loss": 3.0558,
      "step": 42104
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8991351127624512,
      "learning_rate": 0.0005519027295838873,
      "loss": 3.1252,
      "step": 42105
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.9297189712524414,
      "learning_rate": 0.0005519005080251158,
      "loss": 3.0656,
      "step": 42106
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.2973835468292236,
      "learning_rate": 0.0005518982864195113,
      "loss": 2.9201,
      "step": 42107
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5397846698760986,
      "learning_rate": 0.0005518960647670742,
      "loss": 2.9709,
      "step": 42108
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.747898578643799,
      "learning_rate": 0.0005518938430678048,
      "loss": 2.8986,
      "step": 42109
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.377955436706543,
      "learning_rate": 0.0005518916213217035,
      "loss": 3.2687,
      "step": 42110
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.9385194778442383,
      "learning_rate": 0.0005518893995287709,
      "loss": 3.3645,
      "step": 42111
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.352454423904419,
      "learning_rate": 0.0005518871776890073,
      "loss": 2.9712,
      "step": 42112
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1768076419830322,
      "learning_rate": 0.0005518849558024131,
      "loss": 2.8539,
      "step": 42113
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.381223440170288,
      "learning_rate": 0.0005518827338689888,
      "loss": 3.1896,
      "step": 42114
    },
    {
      "epoch": 0.55,
      "grad_norm": 4.052659511566162,
      "learning_rate": 0.0005518805118887347,
      "loss": 2.8562,
      "step": 42115
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5575168132781982,
      "learning_rate": 0.0005518782898616512,
      "loss": 2.958,
      "step": 42116
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.2098875045776367,
      "learning_rate": 0.0005518760677877388,
      "loss": 3.1545,
      "step": 42117
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.773350954055786,
      "learning_rate": 0.0005518738456669979,
      "loss": 3.2974,
      "step": 42118
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5822662115097046,
      "learning_rate": 0.0005518716234994288,
      "loss": 3.098,
      "step": 42119
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.479856252670288,
      "learning_rate": 0.0005518694012850323,
      "loss": 3.0097,
      "step": 42120
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.5387120246887207,
      "learning_rate": 0.0005518671790238083,
      "loss": 3.087,
      "step": 42121
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.5224549770355225,
      "learning_rate": 0.0005518649567157574,
      "loss": 3.2329,
      "step": 42122
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6123157739639282,
      "learning_rate": 0.0005518627343608801,
      "loss": 3.0461,
      "step": 42123
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.0968871116638184,
      "learning_rate": 0.0005518605119591768,
      "loss": 2.8063,
      "step": 42124
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.0441482067108154,
      "learning_rate": 0.0005518582895106479,
      "loss": 2.9194,
      "step": 42125
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7913756370544434,
      "learning_rate": 0.0005518560670152938,
      "loss": 3.1923,
      "step": 42126
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5318751335144043,
      "learning_rate": 0.0005518538444731147,
      "loss": 2.9491,
      "step": 42127
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1209232807159424,
      "learning_rate": 0.0005518516218841114,
      "loss": 3.0946,
      "step": 42128
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.1658172607421875,
      "learning_rate": 0.0005518493992482842,
      "loss": 3.221,
      "step": 42129
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.018383026123047,
      "learning_rate": 0.0005518471765656333,
      "loss": 3.0601,
      "step": 42130
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8765116930007935,
      "learning_rate": 0.0005518449538361594,
      "loss": 3.1364,
      "step": 42131
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.2567033767700195,
      "learning_rate": 0.0005518427310598626,
      "loss": 2.78,
      "step": 42132
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.22585129737854,
      "learning_rate": 0.0005518405082367435,
      "loss": 3.1263,
      "step": 42133
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.24927020072937,
      "learning_rate": 0.0005518382853668026,
      "loss": 2.8924,
      "step": 42134
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7464423179626465,
      "learning_rate": 0.0005518360624500401,
      "loss": 3.0094,
      "step": 42135
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.6193363666534424,
      "learning_rate": 0.0005518338394864566,
      "loss": 3.2748,
      "step": 42136
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.2216153144836426,
      "learning_rate": 0.0005518316164760524,
      "loss": 2.9014,
      "step": 42137
    },
    {
      "epoch": 0.55,
      "grad_norm": 4.329081058502197,
      "learning_rate": 0.000551829393418828,
      "loss": 3.1703,
      "step": 42138
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4871032238006592,
      "learning_rate": 0.0005518271703147837,
      "loss": 3.021,
      "step": 42139
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8357880115509033,
      "learning_rate": 0.00055182494716392,
      "loss": 2.9295,
      "step": 42140
    },
    {
      "epoch": 0.55,
      "grad_norm": 4.721870422363281,
      "learning_rate": 0.0005518227239662373,
      "loss": 2.8575,
      "step": 42141
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1835339069366455,
      "learning_rate": 0.000551820500721736,
      "loss": 3.2333,
      "step": 42142
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.8955557346343994,
      "learning_rate": 0.0005518182774304165,
      "loss": 3.2432,
      "step": 42143
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4487308263778687,
      "learning_rate": 0.0005518160540922793,
      "loss": 3.2535,
      "step": 42144
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8105586767196655,
      "learning_rate": 0.0005518138307073245,
      "loss": 3.0395,
      "step": 42145
    },
    {
      "epoch": 0.55,
      "grad_norm": 4.5820794105529785,
      "learning_rate": 0.000551811607275553,
      "loss": 3.04,
      "step": 42146
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.439967632293701,
      "learning_rate": 0.0005518093837969649,
      "loss": 2.8804,
      "step": 42147
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8325084447860718,
      "learning_rate": 0.0005518071602715606,
      "loss": 3.1627,
      "step": 42148
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5887774229049683,
      "learning_rate": 0.0005518049366993407,
      "loss": 3.1261,
      "step": 42149
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.359023094177246,
      "learning_rate": 0.0005518027130803055,
      "loss": 3.0893,
      "step": 42150
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.0515952110290527,
      "learning_rate": 0.0005518004894144554,
      "loss": 3.21,
      "step": 42151
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1517512798309326,
      "learning_rate": 0.0005517982657017909,
      "loss": 3.1394,
      "step": 42152
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7087974548339844,
      "learning_rate": 0.0005517960419423124,
      "loss": 3.0575,
      "step": 42153
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.5703859329223633,
      "learning_rate": 0.00055179381813602,
      "loss": 3.3583,
      "step": 42154
    },
    {
      "epoch": 0.55,
      "grad_norm": 4.381026744842529,
      "learning_rate": 0.0005517915942829146,
      "loss": 2.7294,
      "step": 42155
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7926852703094482,
      "learning_rate": 0.0005517893703829963,
      "loss": 3.3405,
      "step": 42156
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6678580045700073,
      "learning_rate": 0.0005517871464362657,
      "loss": 2.9393,
      "step": 42157
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.435788154602051,
      "learning_rate": 0.0005517849224427229,
      "loss": 2.9462,
      "step": 42158
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.209104299545288,
      "learning_rate": 0.0005517826984023687,
      "loss": 2.9594,
      "step": 42159
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2831003665924072,
      "learning_rate": 0.0005517804743152033,
      "loss": 3.2942,
      "step": 42160
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3566569089889526,
      "learning_rate": 0.0005517782501812272,
      "loss": 3.1991,
      "step": 42161
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4185817241668701,
      "learning_rate": 0.0005517760260004408,
      "loss": 2.9805,
      "step": 42162
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7801902294158936,
      "learning_rate": 0.0005517738017728444,
      "loss": 2.808,
      "step": 42163
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.964199423789978,
      "learning_rate": 0.0005517715774984386,
      "loss": 3.0125,
      "step": 42164
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.179962396621704,
      "learning_rate": 0.0005517693531772236,
      "loss": 3.216,
      "step": 42165
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3763346672058105,
      "learning_rate": 0.0005517671288092,
      "loss": 3.0965,
      "step": 42166
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.041471004486084,
      "learning_rate": 0.000551764904394368,
      "loss": 2.7354,
      "step": 42167
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4049060344696045,
      "learning_rate": 0.0005517626799327283,
      "loss": 3.1603,
      "step": 42168
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1467344760894775,
      "learning_rate": 0.0005517604554242812,
      "loss": 3.2811,
      "step": 42169
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9175307750701904,
      "learning_rate": 0.0005517582308690269,
      "loss": 3.0898,
      "step": 42170
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5350536108016968,
      "learning_rate": 0.0005517560062669661,
      "loss": 2.9919,
      "step": 42171
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4421725273132324,
      "learning_rate": 0.0005517537816180991,
      "loss": 2.9911,
      "step": 42172
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8614405393600464,
      "learning_rate": 0.0005517515569224263,
      "loss": 3.2286,
      "step": 42173
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3967167139053345,
      "learning_rate": 0.0005517493321799482,
      "loss": 3.0788,
      "step": 42174
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7509560585021973,
      "learning_rate": 0.000551747107390665,
      "loss": 3.089,
      "step": 42175
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.415732502937317,
      "learning_rate": 0.0005517448825545773,
      "loss": 3.0596,
      "step": 42176
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6256375312805176,
      "learning_rate": 0.0005517426576716857,
      "loss": 3.1223,
      "step": 42177
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3057618141174316,
      "learning_rate": 0.0005517404327419901,
      "loss": 2.8498,
      "step": 42178
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6185245513916016,
      "learning_rate": 0.0005517382077654914,
      "loss": 3.2527,
      "step": 42179
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6482009887695312,
      "learning_rate": 0.0005517359827421897,
      "loss": 3.1801,
      "step": 42180
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7256230115890503,
      "learning_rate": 0.0005517337576720856,
      "loss": 3.1085,
      "step": 42181
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.404434323310852,
      "learning_rate": 0.0005517315325551794,
      "loss": 3.2602,
      "step": 42182
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.415573000907898,
      "learning_rate": 0.0005517293073914716,
      "loss": 3.2515,
      "step": 42183
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4849573373794556,
      "learning_rate": 0.0005517270821809625,
      "loss": 2.8304,
      "step": 42184
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2751635313034058,
      "learning_rate": 0.0005517248569236526,
      "loss": 3.2575,
      "step": 42185
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3403191566467285,
      "learning_rate": 0.0005517226316195424,
      "loss": 2.79,
      "step": 42186
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7734770774841309,
      "learning_rate": 0.000551720406268632,
      "loss": 3.0743,
      "step": 42187
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9557710886001587,
      "learning_rate": 0.0005517181808709222,
      "loss": 3.3415,
      "step": 42188
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.393489122390747,
      "learning_rate": 0.0005517159554264133,
      "loss": 3.0527,
      "step": 42189
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7975609302520752,
      "learning_rate": 0.0005517137299351054,
      "loss": 3.0474,
      "step": 42190
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3888365030288696,
      "learning_rate": 0.0005517115043969993,
      "loss": 2.9116,
      "step": 42191
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7807072401046753,
      "learning_rate": 0.0005517092788120953,
      "loss": 3.1558,
      "step": 42192
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7906678915023804,
      "learning_rate": 0.0005517070531803938,
      "loss": 3.2624,
      "step": 42193
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3466614484786987,
      "learning_rate": 0.0005517048275018952,
      "loss": 3.1906,
      "step": 42194
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7328301668167114,
      "learning_rate": 0.0005517026017765998,
      "loss": 2.9519,
      "step": 42195
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4390238523483276,
      "learning_rate": 0.0005517003760045084,
      "loss": 3.0026,
      "step": 42196
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.621252179145813,
      "learning_rate": 0.000551698150185621,
      "loss": 3.1154,
      "step": 42197
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.483596920967102,
      "learning_rate": 0.0005516959243199381,
      "loss": 3.1543,
      "step": 42198
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4708362817764282,
      "learning_rate": 0.0005516936984074602,
      "loss": 2.7918,
      "step": 42199
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8915706872940063,
      "learning_rate": 0.0005516914724481878,
      "loss": 3.356,
      "step": 42200
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7791367769241333,
      "learning_rate": 0.000551689246442121,
      "loss": 3.2433,
      "step": 42201
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.473205327987671,
      "learning_rate": 0.0005516870203892606,
      "loss": 3.2894,
      "step": 42202
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6319704055786133,
      "learning_rate": 0.0005516847942896068,
      "loss": 3.2514,
      "step": 42203
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6593538522720337,
      "learning_rate": 0.00055168256814316,
      "loss": 3.0687,
      "step": 42204
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4841169118881226,
      "learning_rate": 0.0005516803419499208,
      "loss": 2.9777,
      "step": 42205
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.4599733352661133,
      "learning_rate": 0.0005516781157098893,
      "loss": 2.8809,
      "step": 42206
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6915037631988525,
      "learning_rate": 0.0005516758894230661,
      "loss": 3.035,
      "step": 42207
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4914236068725586,
      "learning_rate": 0.0005516736630894516,
      "loss": 3.1177,
      "step": 42208
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1566333770751953,
      "learning_rate": 0.0005516714367090463,
      "loss": 3.0073,
      "step": 42209
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.802540898323059,
      "learning_rate": 0.0005516692102818505,
      "loss": 2.8521,
      "step": 42210
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5491591691970825,
      "learning_rate": 0.0005516669838078646,
      "loss": 3.0325,
      "step": 42211
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6933163404464722,
      "learning_rate": 0.0005516647572870892,
      "loss": 3.0576,
      "step": 42212
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.123791217803955,
      "learning_rate": 0.0005516625307195244,
      "loss": 2.9385,
      "step": 42213
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1978342533111572,
      "learning_rate": 0.0005516603041051709,
      "loss": 3.1244,
      "step": 42214
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7270519733428955,
      "learning_rate": 0.0005516580774440288,
      "loss": 3.3371,
      "step": 42215
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.73425030708313,
      "learning_rate": 0.0005516558507360989,
      "loss": 3.3819,
      "step": 42216
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.195584774017334,
      "learning_rate": 0.0005516536239813814,
      "loss": 2.7991,
      "step": 42217
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2539689540863037,
      "learning_rate": 0.0005516513971798768,
      "loss": 3.1003,
      "step": 42218
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5804312229156494,
      "learning_rate": 0.0005516491703315854,
      "loss": 3.288,
      "step": 42219
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.78389310836792,
      "learning_rate": 0.0005516469434365076,
      "loss": 2.9769,
      "step": 42220
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4007313251495361,
      "learning_rate": 0.0005516447164946439,
      "loss": 3.0974,
      "step": 42221
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4217764139175415,
      "learning_rate": 0.0005516424895059947,
      "loss": 3.2519,
      "step": 42222
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7640849351882935,
      "learning_rate": 0.0005516402624705605,
      "loss": 3.0445,
      "step": 42223
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.4038143157958984,
      "learning_rate": 0.0005516380353883415,
      "loss": 3.3887,
      "step": 42224
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5534294843673706,
      "learning_rate": 0.0005516358082593383,
      "loss": 2.7721,
      "step": 42225
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6795045137405396,
      "learning_rate": 0.0005516335810835513,
      "loss": 2.9899,
      "step": 42226
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.5347704887390137,
      "learning_rate": 0.0005516313538609808,
      "loss": 3.2578,
      "step": 42227
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5734177827835083,
      "learning_rate": 0.0005516291265916274,
      "loss": 3.0152,
      "step": 42228
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.635382890701294,
      "learning_rate": 0.0005516268992754913,
      "loss": 2.9481,
      "step": 42229
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.362421751022339,
      "learning_rate": 0.0005516246719125729,
      "loss": 2.6851,
      "step": 42230
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6605875492095947,
      "learning_rate": 0.0005516224445028729,
      "loss": 3.3477,
      "step": 42231
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.255366563796997,
      "learning_rate": 0.0005516202170463914,
      "loss": 2.9462,
      "step": 42232
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5800549983978271,
      "learning_rate": 0.0005516179895431291,
      "loss": 2.8378,
      "step": 42233
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4063653945922852,
      "learning_rate": 0.0005516157619930861,
      "loss": 2.9974,
      "step": 42234
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6561853885650635,
      "learning_rate": 0.0005516135343962632,
      "loss": 3.1316,
      "step": 42235
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.520464301109314,
      "learning_rate": 0.0005516113067526604,
      "loss": 3.1411,
      "step": 42236
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3886545896530151,
      "learning_rate": 0.0005516090790622784,
      "loss": 3.0281,
      "step": 42237
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5947357416152954,
      "learning_rate": 0.0005516068513251176,
      "loss": 3.2909,
      "step": 42238
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.0813205242156982,
      "learning_rate": 0.0005516046235411781,
      "loss": 3.1345,
      "step": 42239
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4946390390396118,
      "learning_rate": 0.0005516023957104608,
      "loss": 3.2526,
      "step": 42240
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6581498384475708,
      "learning_rate": 0.0005516001678329657,
      "loss": 2.8977,
      "step": 42241
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.659330129623413,
      "learning_rate": 0.0005515979399086934,
      "loss": 3.0473,
      "step": 42242
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8018733263015747,
      "learning_rate": 0.0005515957119376444,
      "loss": 3.0038,
      "step": 42243
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7862352132797241,
      "learning_rate": 0.0005515934839198189,
      "loss": 3.2576,
      "step": 42244
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.493971347808838,
      "learning_rate": 0.0005515912558552175,
      "loss": 2.9556,
      "step": 42245
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.2642955780029297,
      "learning_rate": 0.0005515890277438404,
      "loss": 3.018,
      "step": 42246
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.265056610107422,
      "learning_rate": 0.0005515867995856882,
      "loss": 2.7536,
      "step": 42247
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.592943787574768,
      "learning_rate": 0.0005515845713807613,
      "loss": 2.913,
      "step": 42248
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.164458751678467,
      "learning_rate": 0.0005515823431290602,
      "loss": 2.6215,
      "step": 42249
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.6839137077331543,
      "learning_rate": 0.000551580114830585,
      "loss": 3.2304,
      "step": 42250
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9144682884216309,
      "learning_rate": 0.0005515778864853365,
      "loss": 2.9565,
      "step": 42251
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.089280843734741,
      "learning_rate": 0.0005515756580933147,
      "loss": 3.0959,
      "step": 42252
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1426236629486084,
      "learning_rate": 0.0005515734296545204,
      "loss": 2.972,
      "step": 42253
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.050020694732666,
      "learning_rate": 0.0005515712011689538,
      "loss": 3.056,
      "step": 42254
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7865700721740723,
      "learning_rate": 0.0005515689726366153,
      "loss": 3.1112,
      "step": 42255
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.455398440361023,
      "learning_rate": 0.0005515667440575055,
      "loss": 2.8214,
      "step": 42256
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.282928705215454,
      "learning_rate": 0.0005515645154316245,
      "loss": 3.375,
      "step": 42257
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.802176594734192,
      "learning_rate": 0.0005515622867589732,
      "loss": 3.272,
      "step": 42258
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6782366037368774,
      "learning_rate": 0.0005515600580395516,
      "loss": 3.0928,
      "step": 42259
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.843454122543335,
      "learning_rate": 0.0005515578292733601,
      "loss": 2.9391,
      "step": 42260
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6632654666900635,
      "learning_rate": 0.0005515556004603993,
      "loss": 3.2911,
      "step": 42261
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3559222221374512,
      "learning_rate": 0.0005515533716006697,
      "loss": 3.3784,
      "step": 42262
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.446588158607483,
      "learning_rate": 0.0005515511426941714,
      "loss": 3.3816,
      "step": 42263
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6104657649993896,
      "learning_rate": 0.0005515489137409052,
      "loss": 3.1112,
      "step": 42264
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3762571811676025,
      "learning_rate": 0.0005515466847408712,
      "loss": 3.0799,
      "step": 42265
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1222712993621826,
      "learning_rate": 0.0005515444556940699,
      "loss": 3.0362,
      "step": 42266
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2575604915618896,
      "learning_rate": 0.0005515422266005018,
      "loss": 2.9922,
      "step": 42267
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.654695987701416,
      "learning_rate": 0.0005515399974601672,
      "loss": 2.8306,
      "step": 42268
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5207306146621704,
      "learning_rate": 0.0005515377682730665,
      "loss": 3.019,
      "step": 42269
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.721771478652954,
      "learning_rate": 0.0005515355390392002,
      "loss": 2.9128,
      "step": 42270
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.380874514579773,
      "learning_rate": 0.0005515333097585688,
      "loss": 3.0742,
      "step": 42271
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.006441116333008,
      "learning_rate": 0.0005515310804311726,
      "loss": 3.1526,
      "step": 42272
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.179563045501709,
      "learning_rate": 0.000551528851057012,
      "loss": 3.0419,
      "step": 42273
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9517772197723389,
      "learning_rate": 0.0005515266216360874,
      "loss": 3.1071,
      "step": 42274
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4976866245269775,
      "learning_rate": 0.0005515243921683994,
      "loss": 2.8001,
      "step": 42275
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.086385488510132,
      "learning_rate": 0.0005515221626539481,
      "loss": 3.0088,
      "step": 42276
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5438069105148315,
      "learning_rate": 0.0005515199330927341,
      "loss": 3.0635,
      "step": 42277
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7327927350997925,
      "learning_rate": 0.0005515177034847578,
      "loss": 2.9152,
      "step": 42278
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8128714561462402,
      "learning_rate": 0.0005515154738300196,
      "loss": 2.9195,
      "step": 42279
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5414528846740723,
      "learning_rate": 0.00055151324412852,
      "loss": 3.103,
      "step": 42280
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.015650749206543,
      "learning_rate": 0.0005515110143802593,
      "loss": 3.3076,
      "step": 42281
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6811683177947998,
      "learning_rate": 0.000551508784585238,
      "loss": 2.9637,
      "step": 42282
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4308373928070068,
      "learning_rate": 0.0005515065547434564,
      "loss": 3.0385,
      "step": 42283
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3759669065475464,
      "learning_rate": 0.0005515043248549151,
      "loss": 3.113,
      "step": 42284
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5252442359924316,
      "learning_rate": 0.0005515020949196143,
      "loss": 3.1812,
      "step": 42285
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8338162899017334,
      "learning_rate": 0.0005514998649375545,
      "loss": 2.8978,
      "step": 42286
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6736620664596558,
      "learning_rate": 0.000551497634908736,
      "loss": 2.9505,
      "step": 42287
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9938186407089233,
      "learning_rate": 0.0005514954048331596,
      "loss": 3.0386,
      "step": 42288
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.701294183731079,
      "learning_rate": 0.0005514931747108254,
      "loss": 3.3009,
      "step": 42289
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4217588901519775,
      "learning_rate": 0.0005514909445417337,
      "loss": 2.9965,
      "step": 42290
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7896957397460938,
      "learning_rate": 0.0005514887143258853,
      "loss": 3.2301,
      "step": 42291
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7339699268341064,
      "learning_rate": 0.0005514864840632802,
      "loss": 2.8154,
      "step": 42292
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3627876043319702,
      "learning_rate": 0.0005514842537539192,
      "loss": 3.1337,
      "step": 42293
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.678594708442688,
      "learning_rate": 0.0005514820233978025,
      "loss": 2.988,
      "step": 42294
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3033041954040527,
      "learning_rate": 0.0005514797929949305,
      "loss": 3.3354,
      "step": 42295
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8261840343475342,
      "learning_rate": 0.0005514775625453036,
      "loss": 3.2772,
      "step": 42296
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4529932737350464,
      "learning_rate": 0.0005514753320489223,
      "loss": 3.1283,
      "step": 42297
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9964931011199951,
      "learning_rate": 0.000551473101505787,
      "loss": 2.9524,
      "step": 42298
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2368577718734741,
      "learning_rate": 0.0005514708709158982,
      "loss": 3.1824,
      "step": 42299
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6087037324905396,
      "learning_rate": 0.0005514686402792562,
      "loss": 3.0459,
      "step": 42300
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4908199310302734,
      "learning_rate": 0.0005514664095958613,
      "loss": 3.0969,
      "step": 42301
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.438275933265686,
      "learning_rate": 0.0005514641788657141,
      "loss": 3.2172,
      "step": 42302
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.021277904510498,
      "learning_rate": 0.0005514619480888151,
      "loss": 3.2001,
      "step": 42303
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.430272102355957,
      "learning_rate": 0.0005514597172651644,
      "loss": 3.3223,
      "step": 42304
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8830254077911377,
      "learning_rate": 0.0005514574863947627,
      "loss": 3.0027,
      "step": 42305
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4354933500289917,
      "learning_rate": 0.0005514552554776102,
      "loss": 3.1692,
      "step": 42306
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3617360591888428,
      "learning_rate": 0.0005514530245137075,
      "loss": 3.1639,
      "step": 42307
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.71624755859375,
      "learning_rate": 0.0005514507935030549,
      "loss": 3.0006,
      "step": 42308
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4073032140731812,
      "learning_rate": 0.0005514485624456528,
      "loss": 3.0641,
      "step": 42309
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.242972731590271,
      "learning_rate": 0.0005514463313415018,
      "loss": 3.1502,
      "step": 42310
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2592895030975342,
      "learning_rate": 0.0005514441001906021,
      "loss": 3.1212,
      "step": 42311
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.372218132019043,
      "learning_rate": 0.0005514418689929543,
      "loss": 3.0839,
      "step": 42312
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.668578863143921,
      "learning_rate": 0.0005514396377485586,
      "loss": 3.0471,
      "step": 42313
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.413375735282898,
      "learning_rate": 0.0005514374064574156,
      "loss": 3.0347,
      "step": 42314
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.438673973083496,
      "learning_rate": 0.0005514351751195255,
      "loss": 3.0284,
      "step": 42315
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6708319187164307,
      "learning_rate": 0.000551432943734889,
      "loss": 3.2175,
      "step": 42316
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4356893301010132,
      "learning_rate": 0.0005514307123035062,
      "loss": 2.7576,
      "step": 42317
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7745082378387451,
      "learning_rate": 0.0005514284808253779,
      "loss": 3.159,
      "step": 42318
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3925280570983887,
      "learning_rate": 0.0005514262493005041,
      "loss": 2.859,
      "step": 42319
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.470582365989685,
      "learning_rate": 0.0005514240177288855,
      "loss": 2.9608,
      "step": 42320
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.621753215789795,
      "learning_rate": 0.0005514217861105226,
      "loss": 2.8179,
      "step": 42321
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7514983415603638,
      "learning_rate": 0.0005514195544454153,
      "loss": 3.0708,
      "step": 42322
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6415168046951294,
      "learning_rate": 0.0005514173227335646,
      "loss": 2.9533,
      "step": 42323
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.870935320854187,
      "learning_rate": 0.0005514150909749706,
      "loss": 2.9196,
      "step": 42324
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6910336017608643,
      "learning_rate": 0.0005514128591696338,
      "loss": 2.8559,
      "step": 42325
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6359672546386719,
      "learning_rate": 0.0005514106273175546,
      "loss": 3.1749,
      "step": 42326
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.057210683822632,
      "learning_rate": 0.0005514083954187334,
      "loss": 2.8662,
      "step": 42327
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.0191211700439453,
      "learning_rate": 0.0005514061634731707,
      "loss": 3.0645,
      "step": 42328
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.147529363632202,
      "learning_rate": 0.0005514039314808668,
      "loss": 2.9286,
      "step": 42329
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5223300457000732,
      "learning_rate": 0.0005514016994418221,
      "loss": 2.9934,
      "step": 42330
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1769118309020996,
      "learning_rate": 0.0005513994673560373,
      "loss": 2.9339,
      "step": 42331
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.054152011871338,
      "learning_rate": 0.0005513972352235123,
      "loss": 2.8311,
      "step": 42332
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3613481521606445,
      "learning_rate": 0.000551395003044248,
      "loss": 3.0551,
      "step": 42333
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6861400604248047,
      "learning_rate": 0.0005513927708182446,
      "loss": 3.0222,
      "step": 42334
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5935980081558228,
      "learning_rate": 0.0005513905385455025,
      "loss": 3.3911,
      "step": 42335
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5523765087127686,
      "learning_rate": 0.0005513883062260222,
      "loss": 3.0668,
      "step": 42336
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7236348390579224,
      "learning_rate": 0.000551386073859804,
      "loss": 3.1399,
      "step": 42337
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5509815216064453,
      "learning_rate": 0.0005513838414468484,
      "loss": 3.1014,
      "step": 42338
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3607470989227295,
      "learning_rate": 0.0005513816089871559,
      "loss": 3.0591,
      "step": 42339
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8758176565170288,
      "learning_rate": 0.0005513793764807266,
      "loss": 3.1151,
      "step": 42340
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6840870380401611,
      "learning_rate": 0.0005513771439275614,
      "loss": 3.0033,
      "step": 42341
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.7599382400512695,
      "learning_rate": 0.0005513749113276602,
      "loss": 3.0169,
      "step": 42342
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9999094009399414,
      "learning_rate": 0.0005513726786810239,
      "loss": 2.9278,
      "step": 42343
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.56886887550354,
      "learning_rate": 0.0005513704459876524,
      "loss": 3.0997,
      "step": 42344
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6611944437026978,
      "learning_rate": 0.0005513682132475466,
      "loss": 3.1066,
      "step": 42345
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.513598918914795,
      "learning_rate": 0.0005513659804607066,
      "loss": 2.8181,
      "step": 42346
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.5882041454315186,
      "learning_rate": 0.000551363747627133,
      "loss": 2.7944,
      "step": 42347
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.0173845291137695,
      "learning_rate": 0.0005513615147468261,
      "loss": 3.0329,
      "step": 42348
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.519423007965088,
      "learning_rate": 0.0005513592818197863,
      "loss": 3.0472,
      "step": 42349
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9947688579559326,
      "learning_rate": 0.0005513570488460141,
      "loss": 3.1686,
      "step": 42350
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5951515436172485,
      "learning_rate": 0.0005513548158255099,
      "loss": 3.1335,
      "step": 42351
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.1345648765563965,
      "learning_rate": 0.0005513525827582741,
      "loss": 3.0819,
      "step": 42352
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5087106227874756,
      "learning_rate": 0.000551350349644307,
      "loss": 3.1424,
      "step": 42353
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4280086755752563,
      "learning_rate": 0.0005513481164836092,
      "loss": 3.0052,
      "step": 42354
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8890436887741089,
      "learning_rate": 0.0005513458832761811,
      "loss": 2.9658,
      "step": 42355
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.632027506828308,
      "learning_rate": 0.000551343650022023,
      "loss": 3.1094,
      "step": 42356
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4350628852844238,
      "learning_rate": 0.0005513414167211354,
      "loss": 3.0322,
      "step": 42357
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9287078380584717,
      "learning_rate": 0.0005513391833735186,
      "loss": 2.8661,
      "step": 42358
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3939934968948364,
      "learning_rate": 0.0005513369499791732,
      "loss": 2.9275,
      "step": 42359
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4526950120925903,
      "learning_rate": 0.0005513347165380995,
      "loss": 3.0373,
      "step": 42360
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5831975936889648,
      "learning_rate": 0.0005513324830502979,
      "loss": 3.0056,
      "step": 42361
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5809191465377808,
      "learning_rate": 0.0005513302495157689,
      "loss": 3.1338,
      "step": 42362
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3578050136566162,
      "learning_rate": 0.0005513280159345128,
      "loss": 3.1762,
      "step": 42363
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3389947414398193,
      "learning_rate": 0.0005513257823065301,
      "loss": 2.9699,
      "step": 42364
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3748193979263306,
      "learning_rate": 0.0005513235486318212,
      "loss": 3.1073,
      "step": 42365
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9565160274505615,
      "learning_rate": 0.0005513213149103864,
      "loss": 2.7968,
      "step": 42366
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6081091165542603,
      "learning_rate": 0.0005513190811422264,
      "loss": 3.2209,
      "step": 42367
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8149863481521606,
      "learning_rate": 0.0005513168473273414,
      "loss": 3.166,
      "step": 42368
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6576052904129028,
      "learning_rate": 0.0005513146134657318,
      "loss": 2.9011,
      "step": 42369
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.56829833984375,
      "learning_rate": 0.0005513123795573981,
      "loss": 2.9557,
      "step": 42370
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1877427101135254,
      "learning_rate": 0.0005513101456023407,
      "loss": 3.1152,
      "step": 42371
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5868935585021973,
      "learning_rate": 0.0005513079116005599,
      "loss": 2.9854,
      "step": 42372
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9548691511154175,
      "learning_rate": 0.0005513056775520563,
      "loss": 2.9383,
      "step": 42373
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.630479335784912,
      "learning_rate": 0.0005513034434568302,
      "loss": 3.1354,
      "step": 42374
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2834047079086304,
      "learning_rate": 0.0005513012093148821,
      "loss": 2.9998,
      "step": 42375
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3048961162567139,
      "learning_rate": 0.0005512989751262123,
      "loss": 2.905,
      "step": 42376
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4667786359786987,
      "learning_rate": 0.0005512967408908213,
      "loss": 2.987,
      "step": 42377
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.906290054321289,
      "learning_rate": 0.0005512945066087095,
      "loss": 2.8726,
      "step": 42378
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.074458122253418,
      "learning_rate": 0.0005512922722798773,
      "loss": 3.0972,
      "step": 42379
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9246693849563599,
      "learning_rate": 0.0005512900379043251,
      "loss": 2.6322,
      "step": 42380
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.094982385635376,
      "learning_rate": 0.0005512878034820535,
      "loss": 3.1558,
      "step": 42381
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8223586082458496,
      "learning_rate": 0.0005512855690130626,
      "loss": 2.8966,
      "step": 42382
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8635623455047607,
      "learning_rate": 0.0005512833344973528,
      "loss": 2.9787,
      "step": 42383
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4936751127243042,
      "learning_rate": 0.000551281099934925,
      "loss": 3.0752,
      "step": 42384
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.364602565765381,
      "learning_rate": 0.0005512788653257791,
      "loss": 3.1678,
      "step": 42385
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7331126928329468,
      "learning_rate": 0.0005512766306699159,
      "loss": 2.9902,
      "step": 42386
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4900202751159668,
      "learning_rate": 0.0005512743959673355,
      "loss": 3.1201,
      "step": 42387
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.326424479484558,
      "learning_rate": 0.0005512721612180385,
      "loss": 3.2823,
      "step": 42388
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.4214346408843994,
      "learning_rate": 0.0005512699264220251,
      "loss": 3.1464,
      "step": 42389
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6937774419784546,
      "learning_rate": 0.000551267691579296,
      "loss": 2.8287,
      "step": 42390
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4974806308746338,
      "learning_rate": 0.0005512654566898515,
      "loss": 2.9077,
      "step": 42391
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6650627851486206,
      "learning_rate": 0.0005512632217536921,
      "loss": 2.9886,
      "step": 42392
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.843064546585083,
      "learning_rate": 0.000551260986770818,
      "loss": 3.2019,
      "step": 42393
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3769816160202026,
      "learning_rate": 0.0005512587517412298,
      "loss": 3.1474,
      "step": 42394
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6282886266708374,
      "learning_rate": 0.0005512565166649278,
      "loss": 3.0737,
      "step": 42395
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.776921033859253,
      "learning_rate": 0.0005512542815419126,
      "loss": 2.9649,
      "step": 42396
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8835902214050293,
      "learning_rate": 0.0005512520463721843,
      "loss": 2.9141,
      "step": 42397
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3913798332214355,
      "learning_rate": 0.0005512498111557436,
      "loss": 2.7441,
      "step": 42398
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9259122610092163,
      "learning_rate": 0.0005512475758925907,
      "loss": 3.2317,
      "step": 42399
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7589837312698364,
      "learning_rate": 0.0005512453405827263,
      "loss": 2.9721,
      "step": 42400
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7909011840820312,
      "learning_rate": 0.0005512431052261506,
      "loss": 3.2258,
      "step": 42401
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6141787767410278,
      "learning_rate": 0.000551240869822864,
      "loss": 2.956,
      "step": 42402
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3739330768585205,
      "learning_rate": 0.0005512386343728671,
      "loss": 3.1452,
      "step": 42403
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6132774353027344,
      "learning_rate": 0.00055123639887616,
      "loss": 2.8693,
      "step": 42404
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7938672304153442,
      "learning_rate": 0.0005512341633327435,
      "loss": 3.1106,
      "step": 42405
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.806247591972351,
      "learning_rate": 0.0005512319277426178,
      "loss": 3.0493,
      "step": 42406
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5572599172592163,
      "learning_rate": 0.0005512296921057833,
      "loss": 3.1183,
      "step": 42407
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.363975763320923,
      "learning_rate": 0.0005512274564222404,
      "loss": 2.9373,
      "step": 42408
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.7606089115142822,
      "learning_rate": 0.0005512252206919897,
      "loss": 3.0515,
      "step": 42409
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2873272895812988,
      "learning_rate": 0.0005512229849150315,
      "loss": 3.1759,
      "step": 42410
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6368988752365112,
      "learning_rate": 0.000551220749091366,
      "loss": 2.8826,
      "step": 42411
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8272628784179688,
      "learning_rate": 0.0005512185132209939,
      "loss": 2.8862,
      "step": 42412
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.972884178161621,
      "learning_rate": 0.0005512162773039157,
      "loss": 2.9784,
      "step": 42413
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8664343357086182,
      "learning_rate": 0.0005512140413401316,
      "loss": 2.8078,
      "step": 42414
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4872217178344727,
      "learning_rate": 0.0005512118053296419,
      "loss": 3.0991,
      "step": 42415
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1406052112579346,
      "learning_rate": 0.0005512095692724473,
      "loss": 2.9214,
      "step": 42416
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.526132345199585,
      "learning_rate": 0.0005512073331685482,
      "loss": 3.0339,
      "step": 42417
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5889699459075928,
      "learning_rate": 0.0005512050970179446,
      "loss": 3.14,
      "step": 42418
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5238746404647827,
      "learning_rate": 0.0005512028608206375,
      "loss": 2.9652,
      "step": 42419
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1488475799560547,
      "learning_rate": 0.000551200624576627,
      "loss": 2.9295,
      "step": 42420
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6622997522354126,
      "learning_rate": 0.0005511983882859135,
      "loss": 2.9881,
      "step": 42421
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.994148015975952,
      "learning_rate": 0.0005511961519484975,
      "loss": 2.9338,
      "step": 42422
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.805954933166504,
      "learning_rate": 0.0005511939155643795,
      "loss": 3.0978,
      "step": 42423
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.087827444076538,
      "learning_rate": 0.0005511916791335597,
      "loss": 2.9255,
      "step": 42424
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1877334117889404,
      "learning_rate": 0.0005511894426560387,
      "loss": 2.9011,
      "step": 42425
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6062654256820679,
      "learning_rate": 0.0005511872061318168,
      "loss": 3.0974,
      "step": 42426
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3571075201034546,
      "learning_rate": 0.0005511849695608944,
      "loss": 3.1112,
      "step": 42427
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.2455646991729736,
      "learning_rate": 0.0005511827329432719,
      "loss": 2.9816,
      "step": 42428
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5212700366973877,
      "learning_rate": 0.00055118049627895,
      "loss": 3.0487,
      "step": 42429
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3611013889312744,
      "learning_rate": 0.0005511782595679288,
      "loss": 2.9904,
      "step": 42430
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6004657745361328,
      "learning_rate": 0.0005511760228102089,
      "loss": 3.1994,
      "step": 42431
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5356149673461914,
      "learning_rate": 0.0005511737860057906,
      "loss": 2.9329,
      "step": 42432
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4292199611663818,
      "learning_rate": 0.0005511715491546742,
      "loss": 3.0413,
      "step": 42433
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5206518173217773,
      "learning_rate": 0.0005511693122568605,
      "loss": 2.989,
      "step": 42434
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.801949381828308,
      "learning_rate": 0.0005511670753123494,
      "loss": 2.98,
      "step": 42435
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6382111310958862,
      "learning_rate": 0.0005511648383211418,
      "loss": 2.751,
      "step": 42436
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.695165753364563,
      "learning_rate": 0.0005511626012832379,
      "loss": 3.1266,
      "step": 42437
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4151047468185425,
      "learning_rate": 0.0005511603641986382,
      "loss": 3.0629,
      "step": 42438
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6055587530136108,
      "learning_rate": 0.0005511581270673429,
      "loss": 3.094,
      "step": 42439
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4813350439071655,
      "learning_rate": 0.0005511558898893527,
      "loss": 3.2593,
      "step": 42440
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2456090450286865,
      "learning_rate": 0.0005511536526646677,
      "loss": 3.02,
      "step": 42441
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.743371844291687,
      "learning_rate": 0.0005511514153932886,
      "loss": 3.1359,
      "step": 42442
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8343510627746582,
      "learning_rate": 0.0005511491780752158,
      "loss": 2.947,
      "step": 42443
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.371288537979126,
      "learning_rate": 0.0005511469407104496,
      "loss": 3.2338,
      "step": 42444
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3725813627243042,
      "learning_rate": 0.0005511447032989904,
      "loss": 3.0727,
      "step": 42445
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.143129587173462,
      "learning_rate": 0.0005511424658408386,
      "loss": 2.863,
      "step": 42446
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8917654752731323,
      "learning_rate": 0.0005511402283359946,
      "loss": 3.1145,
      "step": 42447
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.890493392944336,
      "learning_rate": 0.0005511379907844591,
      "loss": 3.0297,
      "step": 42448
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7634270191192627,
      "learning_rate": 0.0005511357531862321,
      "loss": 3.0827,
      "step": 42449
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8869351148605347,
      "learning_rate": 0.0005511335155413144,
      "loss": 2.8221,
      "step": 42450
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7596299648284912,
      "learning_rate": 0.0005511312778497063,
      "loss": 2.9866,
      "step": 42451
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3235204219818115,
      "learning_rate": 0.000551129040111408,
      "loss": 3.0047,
      "step": 42452
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8513083457946777,
      "learning_rate": 0.00055112680232642,
      "loss": 3.0937,
      "step": 42453
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8914291858673096,
      "learning_rate": 0.000551124564494743,
      "loss": 2.9682,
      "step": 42454
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.849130630493164,
      "learning_rate": 0.000551122326616377,
      "loss": 3.2885,
      "step": 42455
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.970418095588684,
      "learning_rate": 0.0005511200886913227,
      "loss": 3.1672,
      "step": 42456
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8337490558624268,
      "learning_rate": 0.0005511178507195804,
      "loss": 2.9576,
      "step": 42457
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3909175395965576,
      "learning_rate": 0.0005511156127011507,
      "loss": 3.0768,
      "step": 42458
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1534414291381836,
      "learning_rate": 0.0005511133746360338,
      "loss": 2.8913,
      "step": 42459
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3078198432922363,
      "learning_rate": 0.0005511111365242301,
      "loss": 3.0615,
      "step": 42460
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5834861993789673,
      "learning_rate": 0.0005511088983657401,
      "loss": 2.8081,
      "step": 42461
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3455207347869873,
      "learning_rate": 0.0005511066601605643,
      "loss": 2.8537,
      "step": 42462
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.749577522277832,
      "learning_rate": 0.000551104421908703,
      "loss": 3.0086,
      "step": 42463
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6953898668289185,
      "learning_rate": 0.0005511021836101567,
      "loss": 3.0368,
      "step": 42464
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4034675359725952,
      "learning_rate": 0.0005510999452649257,
      "loss": 2.8686,
      "step": 42465
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.123332977294922,
      "learning_rate": 0.0005510977068730105,
      "loss": 3.0562,
      "step": 42466
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3865914344787598,
      "learning_rate": 0.0005510954684344114,
      "loss": 3.1221,
      "step": 42467
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3438613414764404,
      "learning_rate": 0.0005510932299491291,
      "loss": 3.2495,
      "step": 42468
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5646047592163086,
      "learning_rate": 0.0005510909914171636,
      "loss": 2.8242,
      "step": 42469
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.3356573581695557,
      "learning_rate": 0.0005510887528385157,
      "loss": 3.1352,
      "step": 42470
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9563905000686646,
      "learning_rate": 0.0005510865142131857,
      "loss": 3.0708,
      "step": 42471
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9397026300430298,
      "learning_rate": 0.0005510842755411739,
      "loss": 3.0801,
      "step": 42472
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.045051097869873,
      "learning_rate": 0.0005510820368224808,
      "loss": 3.0222,
      "step": 42473
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4045530557632446,
      "learning_rate": 0.0005510797980571068,
      "loss": 3.1501,
      "step": 42474
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6436488628387451,
      "learning_rate": 0.0005510775592450524,
      "loss": 3.174,
      "step": 42475
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4205703735351562,
      "learning_rate": 0.0005510753203863179,
      "loss": 3.4291,
      "step": 42476
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6329840421676636,
      "learning_rate": 0.0005510730814809037,
      "loss": 2.9392,
      "step": 42477
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1730141639709473,
      "learning_rate": 0.0005510708425288102,
      "loss": 3.1949,
      "step": 42478
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.817218542098999,
      "learning_rate": 0.0005510686035300382,
      "loss": 3.1743,
      "step": 42479
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.171539545059204,
      "learning_rate": 0.0005510663644845875,
      "loss": 3.0529,
      "step": 42480
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.544185757637024,
      "learning_rate": 0.000551064125392459,
      "loss": 2.9528,
      "step": 42481
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.0595340728759766,
      "learning_rate": 0.0005510618862536529,
      "loss": 3.0464,
      "step": 42482
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.730432152748108,
      "learning_rate": 0.0005510596470681696,
      "loss": 2.7522,
      "step": 42483
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9909731149673462,
      "learning_rate": 0.0005510574078360095,
      "loss": 2.8654,
      "step": 42484
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.68480384349823,
      "learning_rate": 0.0005510551685571733,
      "loss": 2.9956,
      "step": 42485
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8210608959197998,
      "learning_rate": 0.000551052929231661,
      "loss": 3.1767,
      "step": 42486
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.4454963207244873,
      "learning_rate": 0.0005510506898594733,
      "loss": 3.1507,
      "step": 42487
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.492166519165039,
      "learning_rate": 0.0005510484504406106,
      "loss": 3.1748,
      "step": 42488
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7690253257751465,
      "learning_rate": 0.0005510462109750733,
      "loss": 3.1447,
      "step": 42489
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.4054911136627197,
      "learning_rate": 0.0005510439714628616,
      "loss": 3.355,
      "step": 42490
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9372268915176392,
      "learning_rate": 0.0005510417319039763,
      "loss": 2.9204,
      "step": 42491
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8004286289215088,
      "learning_rate": 0.0005510394922984173,
      "loss": 2.8438,
      "step": 42492
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.715155601501465,
      "learning_rate": 0.0005510372526461855,
      "loss": 2.9391,
      "step": 42493
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7407726049423218,
      "learning_rate": 0.0005510350129472812,
      "loss": 2.9293,
      "step": 42494
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4246340990066528,
      "learning_rate": 0.0005510327732017047,
      "loss": 3.1802,
      "step": 42495
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.661410331726074,
      "learning_rate": 0.0005510305334094565,
      "loss": 3.1114,
      "step": 42496
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.237220525741577,
      "learning_rate": 0.0005510282935705369,
      "loss": 2.9337,
      "step": 42497
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.538433313369751,
      "learning_rate": 0.0005510260536849464,
      "loss": 2.948,
      "step": 42498
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.492777943611145,
      "learning_rate": 0.0005510238137526854,
      "loss": 2.8953,
      "step": 42499
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.1802775859832764,
      "learning_rate": 0.0005510215737737545,
      "loss": 3.15,
      "step": 42500
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5858385562896729,
      "learning_rate": 0.0005510193337481538,
      "loss": 3.2501,
      "step": 42501
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.4996378421783447,
      "learning_rate": 0.0005510170936758839,
      "loss": 3.1457,
      "step": 42502
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3845982551574707,
      "learning_rate": 0.0005510148535569452,
      "loss": 3.248,
      "step": 42503
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8436245918273926,
      "learning_rate": 0.0005510126133913381,
      "loss": 3.2599,
      "step": 42504
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6663975715637207,
      "learning_rate": 0.000551010373179063,
      "loss": 2.8111,
      "step": 42505
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.346246361732483,
      "learning_rate": 0.0005510081329201203,
      "loss": 3.0578,
      "step": 42506
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.34909188747406,
      "learning_rate": 0.0005510058926145105,
      "loss": 2.8844,
      "step": 42507
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6108982563018799,
      "learning_rate": 0.000551003652262234,
      "loss": 2.8931,
      "step": 42508
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.653124213218689,
      "learning_rate": 0.0005510014118632912,
      "loss": 3.0432,
      "step": 42509
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3912452459335327,
      "learning_rate": 0.0005509991714176824,
      "loss": 2.8764,
      "step": 42510
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7266428470611572,
      "learning_rate": 0.0005509969309254082,
      "loss": 3.2513,
      "step": 42511
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6856365203857422,
      "learning_rate": 0.0005509946903864689,
      "loss": 3.0842,
      "step": 42512
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.762510895729065,
      "learning_rate": 0.0005509924498008649,
      "loss": 3.0191,
      "step": 42513
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5207942724227905,
      "learning_rate": 0.0005509902091685966,
      "loss": 2.8558,
      "step": 42514
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3295156955718994,
      "learning_rate": 0.0005509879684896646,
      "loss": 3.1968,
      "step": 42515
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4439820051193237,
      "learning_rate": 0.0005509857277640693,
      "loss": 3.1672,
      "step": 42516
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.823103666305542,
      "learning_rate": 0.0005509834869918109,
      "loss": 3.2618,
      "step": 42517
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5240843296051025,
      "learning_rate": 0.00055098124617289,
      "loss": 3.0089,
      "step": 42518
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5567294359207153,
      "learning_rate": 0.0005509790053073068,
      "loss": 3.0275,
      "step": 42519
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6599445343017578,
      "learning_rate": 0.0005509767643950619,
      "loss": 3.0062,
      "step": 42520
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6015228033065796,
      "learning_rate": 0.0005509745234361557,
      "loss": 2.9465,
      "step": 42521
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8553175926208496,
      "learning_rate": 0.0005509722824305887,
      "loss": 3.1597,
      "step": 42522
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.5536961555480957,
      "learning_rate": 0.0005509700413783612,
      "loss": 3.0809,
      "step": 42523
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.528917670249939,
      "learning_rate": 0.0005509678002794735,
      "loss": 3.2659,
      "step": 42524
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6573587656021118,
      "learning_rate": 0.0005509655591339263,
      "loss": 3.0795,
      "step": 42525
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5182290077209473,
      "learning_rate": 0.0005509633179417198,
      "loss": 3.0399,
      "step": 42526
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4025706052780151,
      "learning_rate": 0.0005509610767028544,
      "loss": 2.8525,
      "step": 42527
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.507356882095337,
      "learning_rate": 0.0005509588354173306,
      "loss": 3.2086,
      "step": 42528
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8464269638061523,
      "learning_rate": 0.000550956594085149,
      "loss": 3.2402,
      "step": 42529
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8948934078216553,
      "learning_rate": 0.0005509543527063098,
      "loss": 3.2241,
      "step": 42530
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6921826601028442,
      "learning_rate": 0.0005509521112808133,
      "loss": 3.0282,
      "step": 42531
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2842005491256714,
      "learning_rate": 0.0005509498698086601,
      "loss": 2.6628,
      "step": 42532
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8980956077575684,
      "learning_rate": 0.0005509476282898506,
      "loss": 3.1914,
      "step": 42533
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.646803379058838,
      "learning_rate": 0.0005509453867243852,
      "loss": 3.0398,
      "step": 42534
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.50335693359375,
      "learning_rate": 0.0005509431451122644,
      "loss": 3.0803,
      "step": 42535
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3606469631195068,
      "learning_rate": 0.0005509409034534884,
      "loss": 3.3717,
      "step": 42536
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.688814401626587,
      "learning_rate": 0.0005509386617480579,
      "loss": 3.3522,
      "step": 42537
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4423092603683472,
      "learning_rate": 0.000550936419995973,
      "loss": 3.1552,
      "step": 42538
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9705694913864136,
      "learning_rate": 0.0005509341781972344,
      "loss": 3.0064,
      "step": 42539
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4347901344299316,
      "learning_rate": 0.0005509319363518423,
      "loss": 3.1222,
      "step": 42540
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4954673051834106,
      "learning_rate": 0.0005509296944597973,
      "loss": 3.2197,
      "step": 42541
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6429482698440552,
      "learning_rate": 0.0005509274525210997,
      "loss": 3.2776,
      "step": 42542
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.830519437789917,
      "learning_rate": 0.0005509252105357499,
      "loss": 2.9354,
      "step": 42543
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.712286114692688,
      "learning_rate": 0.0005509229685037485,
      "loss": 2.9101,
      "step": 42544
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8293094635009766,
      "learning_rate": 0.0005509207264250957,
      "loss": 3.2335,
      "step": 42545
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.2305240631103516,
      "learning_rate": 0.0005509184842997921,
      "loss": 2.9101,
      "step": 42546
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.904194712638855,
      "learning_rate": 0.0005509162421278379,
      "loss": 3.2794,
      "step": 42547
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6297574043273926,
      "learning_rate": 0.0005509139999092336,
      "loss": 2.8259,
      "step": 42548
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.4466960430145264,
      "learning_rate": 0.0005509117576439797,
      "loss": 3.0887,
      "step": 42549
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.993817925453186,
      "learning_rate": 0.0005509095153320765,
      "loss": 2.9972,
      "step": 42550
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.270779609680176,
      "learning_rate": 0.0005509072729735246,
      "loss": 3.0715,
      "step": 42551
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8828222751617432,
      "learning_rate": 0.0005509050305683243,
      "loss": 3.0563,
      "step": 42552
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.0343899726867676,
      "learning_rate": 0.000550902788116476,
      "loss": 3.2288,
      "step": 42553
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.306544780731201,
      "learning_rate": 0.0005509005456179802,
      "loss": 3.0455,
      "step": 42554
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4390314817428589,
      "learning_rate": 0.0005508983030728371,
      "loss": 3.0175,
      "step": 42555
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5990616083145142,
      "learning_rate": 0.0005508960604810474,
      "loss": 3.0326,
      "step": 42556
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7635704278945923,
      "learning_rate": 0.0005508938178426112,
      "loss": 2.909,
      "step": 42557
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8080384731292725,
      "learning_rate": 0.0005508915751575293,
      "loss": 3.4851,
      "step": 42558
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.605277180671692,
      "learning_rate": 0.0005508893324258019,
      "loss": 3.1569,
      "step": 42559
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5850205421447754,
      "learning_rate": 0.0005508870896474294,
      "loss": 3.1225,
      "step": 42560
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2761316299438477,
      "learning_rate": 0.0005508848468224123,
      "loss": 3.3151,
      "step": 42561
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3673142194747925,
      "learning_rate": 0.0005508826039507508,
      "loss": 3.0961,
      "step": 42562
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3530142307281494,
      "learning_rate": 0.0005508803610324456,
      "loss": 2.9379,
      "step": 42563
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5392217636108398,
      "learning_rate": 0.000550878118067497,
      "loss": 3.1419,
      "step": 42564
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.066110849380493,
      "learning_rate": 0.0005508758750559055,
      "loss": 3.0666,
      "step": 42565
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5856153964996338,
      "learning_rate": 0.0005508736319976713,
      "loss": 3.0115,
      "step": 42566
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3791171312332153,
      "learning_rate": 0.0005508713888927951,
      "loss": 2.7937,
      "step": 42567
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5265038013458252,
      "learning_rate": 0.000550869145741277,
      "loss": 2.9127,
      "step": 42568
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6601178646087646,
      "learning_rate": 0.0005508669025431178,
      "loss": 3.0608,
      "step": 42569
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5506513118743896,
      "learning_rate": 0.0005508646592983175,
      "loss": 2.9817,
      "step": 42570
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7494914531707764,
      "learning_rate": 0.0005508624160068769,
      "loss": 3.0826,
      "step": 42571
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6253422498703003,
      "learning_rate": 0.0005508601726687961,
      "loss": 3.2147,
      "step": 42572
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8294634819030762,
      "learning_rate": 0.0005508579292840757,
      "loss": 3.0378,
      "step": 42573
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3712034225463867,
      "learning_rate": 0.0005508556858527161,
      "loss": 3.1766,
      "step": 42574
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4969197511672974,
      "learning_rate": 0.0005508534423747178,
      "loss": 3.0432,
      "step": 42575
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6494975090026855,
      "learning_rate": 0.0005508511988500809,
      "loss": 2.9803,
      "step": 42576
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8517290353775024,
      "learning_rate": 0.0005508489552788062,
      "loss": 3.1719,
      "step": 42577
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6721688508987427,
      "learning_rate": 0.0005508467116608939,
      "loss": 3.0771,
      "step": 42578
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9300614595413208,
      "learning_rate": 0.0005508444679963444,
      "loss": 3.0284,
      "step": 42579
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7143586874008179,
      "learning_rate": 0.0005508422242851581,
      "loss": 2.7455,
      "step": 42580
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.0890986919403076,
      "learning_rate": 0.0005508399805273357,
      "loss": 3.0492,
      "step": 42581
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6284428834915161,
      "learning_rate": 0.0005508377367228772,
      "loss": 3.1761,
      "step": 42582
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7347174882888794,
      "learning_rate": 0.0005508354928717835,
      "loss": 3.1916,
      "step": 42583
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3585602045059204,
      "learning_rate": 0.0005508332489740545,
      "loss": 2.9596,
      "step": 42584
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.2310900688171387,
      "learning_rate": 0.0005508310050296911,
      "loss": 3.1342,
      "step": 42585
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.8725731372833252,
      "learning_rate": 0.0005508287610386932,
      "loss": 3.1381,
      "step": 42586
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9906408786773682,
      "learning_rate": 0.0005508265170010617,
      "loss": 2.8643,
      "step": 42587
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7402056455612183,
      "learning_rate": 0.0005508242729167967,
      "loss": 3.2138,
      "step": 42588
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9903337955474854,
      "learning_rate": 0.0005508220287858988,
      "loss": 3.3334,
      "step": 42589
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6058263778686523,
      "learning_rate": 0.0005508197846083683,
      "loss": 3.2143,
      "step": 42590
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4758697748184204,
      "learning_rate": 0.0005508175403842057,
      "loss": 2.9897,
      "step": 42591
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.546186089515686,
      "learning_rate": 0.0005508152961134114,
      "loss": 3.0697,
      "step": 42592
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6342397928237915,
      "learning_rate": 0.0005508130517959858,
      "loss": 3.0948,
      "step": 42593
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6701215505599976,
      "learning_rate": 0.0005508108074319293,
      "loss": 3.1381,
      "step": 42594
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5561827421188354,
      "learning_rate": 0.0005508085630212424,
      "loss": 2.9663,
      "step": 42595
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9241266250610352,
      "learning_rate": 0.0005508063185639254,
      "loss": 3.0724,
      "step": 42596
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4789159297943115,
      "learning_rate": 0.0005508040740599788,
      "loss": 2.9923,
      "step": 42597
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4733980894088745,
      "learning_rate": 0.000550801829509403,
      "loss": 2.9612,
      "step": 42598
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7898675203323364,
      "learning_rate": 0.0005507995849121984,
      "loss": 3.012,
      "step": 42599
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.669155240058899,
      "learning_rate": 0.0005507973402683652,
      "loss": 3.2529,
      "step": 42600
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4102448225021362,
      "learning_rate": 0.0005507950955779043,
      "loss": 3.3053,
      "step": 42601
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.802903175354004,
      "learning_rate": 0.0005507928508408158,
      "loss": 2.6785,
      "step": 42602
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.034771680831909,
      "learning_rate": 0.0005507906060571001,
      "loss": 3.0189,
      "step": 42603
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.3860217332839966,
      "learning_rate": 0.0005507883612267578,
      "loss": 3.0767,
      "step": 42604
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7568691968917847,
      "learning_rate": 0.0005507861163497892,
      "loss": 3.0752,
      "step": 42605
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.788925290107727,
      "learning_rate": 0.0005507838714261947,
      "loss": 2.9836,
      "step": 42606
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.2291631698608398,
      "learning_rate": 0.0005507816264559747,
      "loss": 3.0424,
      "step": 42607
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5000275373458862,
      "learning_rate": 0.0005507793814391297,
      "loss": 2.9987,
      "step": 42608
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5448158979415894,
      "learning_rate": 0.0005507771363756601,
      "loss": 3.1502,
      "step": 42609
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5395750999450684,
      "learning_rate": 0.0005507748912655663,
      "loss": 3.1192,
      "step": 42610
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9231544733047485,
      "learning_rate": 0.0005507726461088487,
      "loss": 3.2102,
      "step": 42611
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4988322257995605,
      "learning_rate": 0.0005507704009055077,
      "loss": 2.8358,
      "step": 42612
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.9000617265701294,
      "learning_rate": 0.0005507681556555438,
      "loss": 2.8964,
      "step": 42613
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.625770926475525,
      "learning_rate": 0.0005507659103589573,
      "loss": 3.1089,
      "step": 42614
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4818079471588135,
      "learning_rate": 0.0005507636650157486,
      "loss": 2.9251,
      "step": 42615
    },
    {
      "epoch": 0.55,
      "grad_norm": 3.4947192668914795,
      "learning_rate": 0.0005507614196259184,
      "loss": 3.0515,
      "step": 42616
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3960490226745605,
      "learning_rate": 0.0005507591741894668,
      "loss": 2.83,
      "step": 42617
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.758393406867981,
      "learning_rate": 0.0005507569287063945,
      "loss": 2.9592,
      "step": 42618
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.6837061643600464,
      "learning_rate": 0.0005507546831767016,
      "loss": 3.1562,
      "step": 42619
    },
    {
      "epoch": 0.55,
      "grad_norm": 2.3922953605651855,
      "learning_rate": 0.0005507524376003886,
      "loss": 2.9509,
      "step": 42620
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.7895770072937012,
      "learning_rate": 0.0005507501919774562,
      "loss": 3.1773,
      "step": 42621
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.5317256450653076,
      "learning_rate": 0.0005507479463079044,
      "loss": 2.9477,
      "step": 42622
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.4871013164520264,
      "learning_rate": 0.0005507457005917339,
      "loss": 3.1451,
      "step": 42623
    },
    {
      "epoch": 0.55,
      "grad_norm": 1.894931674003601,
      "learning_rate": 0.0005507434548289451,
      "loss": 3.0501,
      "step": 42624
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9340806007385254,
      "learning_rate": 0.0005507412090195383,
      "loss": 2.8314,
      "step": 42625
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.225358009338379,
      "learning_rate": 0.0005507389631635141,
      "loss": 3.237,
      "step": 42626
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8311126232147217,
      "learning_rate": 0.0005507367172608726,
      "loss": 3.2896,
      "step": 42627
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.2393648624420166,
      "learning_rate": 0.0005507344713116145,
      "loss": 2.9821,
      "step": 42628
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.061937093734741,
      "learning_rate": 0.0005507322253157401,
      "loss": 3.1708,
      "step": 42629
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4975804090499878,
      "learning_rate": 0.00055072997927325,
      "loss": 2.8501,
      "step": 42630
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.2933287620544434,
      "learning_rate": 0.0005507277331841442,
      "loss": 2.763,
      "step": 42631
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.3222055435180664,
      "learning_rate": 0.0005507254870484236,
      "loss": 2.8867,
      "step": 42632
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8753491640090942,
      "learning_rate": 0.0005507232408660882,
      "loss": 3.0892,
      "step": 42633
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.402096748352051,
      "learning_rate": 0.0005507209946371388,
      "loss": 3.0654,
      "step": 42634
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.2336952686309814,
      "learning_rate": 0.0005507187483615756,
      "loss": 3.0282,
      "step": 42635
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.359825849533081,
      "learning_rate": 0.000550716502039399,
      "loss": 2.9908,
      "step": 42636
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7450313568115234,
      "learning_rate": 0.0005507142556706095,
      "loss": 3.0487,
      "step": 42637
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2610886096954346,
      "learning_rate": 0.0005507120092552074,
      "loss": 3.1659,
      "step": 42638
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4588068723678589,
      "learning_rate": 0.0005507097627931933,
      "loss": 2.7699,
      "step": 42639
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7928873300552368,
      "learning_rate": 0.0005507075162845675,
      "loss": 3.0013,
      "step": 42640
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.740525245666504,
      "learning_rate": 0.0005507052697293304,
      "loss": 2.9462,
      "step": 42641
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6397451162338257,
      "learning_rate": 0.0005507030231274825,
      "loss": 3.2029,
      "step": 42642
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7192223072052002,
      "learning_rate": 0.0005507007764790242,
      "loss": 3.0657,
      "step": 42643
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8094613552093506,
      "learning_rate": 0.0005506985297839559,
      "loss": 3.2119,
      "step": 42644
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7192436456680298,
      "learning_rate": 0.000550696283042278,
      "loss": 2.7641,
      "step": 42645
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4844812154769897,
      "learning_rate": 0.0005506940362539909,
      "loss": 3.0667,
      "step": 42646
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.058415174484253,
      "learning_rate": 0.000550691789419095,
      "loss": 3.1008,
      "step": 42647
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.2628161907196045,
      "learning_rate": 0.0005506895425375908,
      "loss": 2.9244,
      "step": 42648
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2605997323989868,
      "learning_rate": 0.0005506872956094787,
      "loss": 2.8792,
      "step": 42649
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8977783918380737,
      "learning_rate": 0.0005506850486347592,
      "loss": 3.0593,
      "step": 42650
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6174771785736084,
      "learning_rate": 0.0005506828016134325,
      "loss": 2.7871,
      "step": 42651
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6958273649215698,
      "learning_rate": 0.0005506805545454992,
      "loss": 3.1488,
      "step": 42652
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5605415105819702,
      "learning_rate": 0.0005506783074309596,
      "loss": 3.2136,
      "step": 42653
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3989843130111694,
      "learning_rate": 0.0005506760602698141,
      "loss": 3.0879,
      "step": 42654
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.514988660812378,
      "learning_rate": 0.0005506738130620633,
      "loss": 2.9265,
      "step": 42655
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5150322914123535,
      "learning_rate": 0.0005506715658077075,
      "loss": 3.0675,
      "step": 42656
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5244014263153076,
      "learning_rate": 0.0005506693185067471,
      "loss": 2.9748,
      "step": 42657
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4438211917877197,
      "learning_rate": 0.0005506670711591825,
      "loss": 3.153,
      "step": 42658
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3554081916809082,
      "learning_rate": 0.0005506648237650143,
      "loss": 3.0168,
      "step": 42659
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6532384157180786,
      "learning_rate": 0.0005506625763242427,
      "loss": 2.7918,
      "step": 42660
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5478755235671997,
      "learning_rate": 0.000550660328836868,
      "loss": 3.0722,
      "step": 42661
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.225975751876831,
      "learning_rate": 0.000550658081302891,
      "loss": 2.8882,
      "step": 42662
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5164927244186401,
      "learning_rate": 0.000550655833722312,
      "loss": 3.0672,
      "step": 42663
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6475263833999634,
      "learning_rate": 0.0005506535860951312,
      "loss": 3.0508,
      "step": 42664
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5389132499694824,
      "learning_rate": 0.0005506513384213493,
      "loss": 3.0432,
      "step": 42665
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.1919283866882324,
      "learning_rate": 0.0005506490907009665,
      "loss": 3.1791,
      "step": 42666
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6414788961410522,
      "learning_rate": 0.0005506468429339833,
      "loss": 3.0067,
      "step": 42667
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.942681074142456,
      "learning_rate": 0.0005506445951204002,
      "loss": 3.3207,
      "step": 42668
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.289720296859741,
      "learning_rate": 0.0005506423472602175,
      "loss": 3.2507,
      "step": 42669
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5835518836975098,
      "learning_rate": 0.0005506400993534356,
      "loss": 3.1093,
      "step": 42670
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.296766757965088,
      "learning_rate": 0.000550637851400055,
      "loss": 2.9175,
      "step": 42671
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.8922393321990967,
      "learning_rate": 0.0005506356034000761,
      "loss": 3.3616,
      "step": 42672
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.728499412536621,
      "learning_rate": 0.0005506333553534993,
      "loss": 3.0208,
      "step": 42673
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.585052728652954,
      "learning_rate": 0.0005506311072603251,
      "loss": 3.0046,
      "step": 42674
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8428212404251099,
      "learning_rate": 0.0005506288591205537,
      "loss": 3.0424,
      "step": 42675
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9526071548461914,
      "learning_rate": 0.0005506266109341859,
      "loss": 3.1356,
      "step": 42676
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7657902240753174,
      "learning_rate": 0.0005506243627012216,
      "loss": 3.0951,
      "step": 42677
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7345646619796753,
      "learning_rate": 0.0005506221144216617,
      "loss": 2.8815,
      "step": 42678
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.654359817504883,
      "learning_rate": 0.0005506198660955063,
      "loss": 3.0223,
      "step": 42679
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.30131459236145,
      "learning_rate": 0.000550617617722756,
      "loss": 3.1379,
      "step": 42680
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4268163442611694,
      "learning_rate": 0.0005506153693034112,
      "loss": 3.0216,
      "step": 42681
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0505619049072266,
      "learning_rate": 0.0005506131208374722,
      "loss": 2.9777,
      "step": 42682
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.147024631500244,
      "learning_rate": 0.0005506108723249395,
      "loss": 2.9106,
      "step": 42683
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5580723285675049,
      "learning_rate": 0.0005506086237658135,
      "loss": 3.0468,
      "step": 42684
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.278577208518982,
      "learning_rate": 0.0005506063751600945,
      "loss": 2.8007,
      "step": 42685
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9937697649002075,
      "learning_rate": 0.0005506041265077833,
      "loss": 3.0849,
      "step": 42686
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8495980501174927,
      "learning_rate": 0.0005506018778088799,
      "loss": 3.1863,
      "step": 42687
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6017662286758423,
      "learning_rate": 0.0005505996290633849,
      "loss": 3.0192,
      "step": 42688
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7465016841888428,
      "learning_rate": 0.0005505973802712987,
      "loss": 3.2027,
      "step": 42689
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.728353500366211,
      "learning_rate": 0.0005505951314326218,
      "loss": 3.2121,
      "step": 42690
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.568982481956482,
      "learning_rate": 0.0005505928825473546,
      "loss": 2.8206,
      "step": 42691
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4118794202804565,
      "learning_rate": 0.0005505906336154973,
      "loss": 2.9156,
      "step": 42692
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2753604650497437,
      "learning_rate": 0.0005505883846370505,
      "loss": 3.1091,
      "step": 42693
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6284438371658325,
      "learning_rate": 0.0005505861356120145,
      "loss": 2.9967,
      "step": 42694
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6280066967010498,
      "learning_rate": 0.0005505838865403899,
      "loss": 3.3965,
      "step": 42695
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4929330348968506,
      "learning_rate": 0.000550581637422177,
      "loss": 3.0967,
      "step": 42696
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4543766975402832,
      "learning_rate": 0.0005505793882573763,
      "loss": 3.0688,
      "step": 42697
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9027550220489502,
      "learning_rate": 0.0005505771390459881,
      "loss": 3.1238,
      "step": 42698
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6410224437713623,
      "learning_rate": 0.0005505748897880129,
      "loss": 3.0966,
      "step": 42699
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8844413757324219,
      "learning_rate": 0.000550572640483451,
      "loss": 3.0948,
      "step": 42700
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.3032383918762207,
      "learning_rate": 0.0005505703911323032,
      "loss": 2.9828,
      "step": 42701
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3583674430847168,
      "learning_rate": 0.0005505681417345694,
      "loss": 3.1323,
      "step": 42702
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.1376514434814453,
      "learning_rate": 0.0005505658922902503,
      "loss": 2.8788,
      "step": 42703
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5109772682189941,
      "learning_rate": 0.0005505636427993463,
      "loss": 2.838,
      "step": 42704
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.342995047569275,
      "learning_rate": 0.0005505613932618577,
      "loss": 3.0738,
      "step": 42705
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7629481554031372,
      "learning_rate": 0.0005505591436777851,
      "loss": 3.2046,
      "step": 42706
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2684931755065918,
      "learning_rate": 0.0005505568940471289,
      "loss": 3.1529,
      "step": 42707
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.373423457145691,
      "learning_rate": 0.0005505546443698892,
      "loss": 3.1779,
      "step": 42708
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.1355810165405273,
      "learning_rate": 0.000550552394646067,
      "loss": 2.7351,
      "step": 42709
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.657311201095581,
      "learning_rate": 0.000550550144875662,
      "loss": 3.1099,
      "step": 42710
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.2839112281799316,
      "learning_rate": 0.0005505478950586753,
      "loss": 2.9894,
      "step": 42711
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6161843538284302,
      "learning_rate": 0.0005505456451951069,
      "loss": 3.1467,
      "step": 42712
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5759788751602173,
      "learning_rate": 0.0005505433952849573,
      "loss": 3.2268,
      "step": 42713
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.647764205932617,
      "learning_rate": 0.000550541145328227,
      "loss": 3.2097,
      "step": 42714
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.4635956287384033,
      "learning_rate": 0.0005505388953249164,
      "loss": 3.1834,
      "step": 42715
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5573606491088867,
      "learning_rate": 0.0005505366452750258,
      "loss": 3.0845,
      "step": 42716
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.5695948600769043,
      "learning_rate": 0.0005505343951785558,
      "loss": 3.0447,
      "step": 42717
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.76875638961792,
      "learning_rate": 0.0005505321450355067,
      "loss": 3.149,
      "step": 42718
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.960866093635559,
      "learning_rate": 0.0005505298948458788,
      "loss": 3.1427,
      "step": 42719
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7352159023284912,
      "learning_rate": 0.0005505276446096729,
      "loss": 3.0435,
      "step": 42720
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0590696334838867,
      "learning_rate": 0.000550525394326889,
      "loss": 3.1675,
      "step": 42721
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.8615314960479736,
      "learning_rate": 0.0005505231439975279,
      "loss": 2.8183,
      "step": 42722
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.498866558074951,
      "learning_rate": 0.0005505208936215896,
      "loss": 2.9619,
      "step": 42723
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6083852052688599,
      "learning_rate": 0.0005505186431990749,
      "loss": 2.9325,
      "step": 42724
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.2232465744018555,
      "learning_rate": 0.0005505163927299839,
      "loss": 3.186,
      "step": 42725
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.9741389751434326,
      "learning_rate": 0.0005505141422143172,
      "loss": 3.1049,
      "step": 42726
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.037315845489502,
      "learning_rate": 0.0005505118916520754,
      "loss": 2.9702,
      "step": 42727
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9399107694625854,
      "learning_rate": 0.0005505096410432584,
      "loss": 3.2837,
      "step": 42728
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7840616703033447,
      "learning_rate": 0.0005505073903878672,
      "loss": 2.942,
      "step": 42729
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7731622457504272,
      "learning_rate": 0.0005505051396859019,
      "loss": 2.8871,
      "step": 42730
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4407774209976196,
      "learning_rate": 0.0005505028889373629,
      "loss": 3.0861,
      "step": 42731
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.5200037956237793,
      "learning_rate": 0.0005505006381422506,
      "loss": 3.1044,
      "step": 42732
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.933424472808838,
      "learning_rate": 0.0005504983873005657,
      "loss": 3.012,
      "step": 42733
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6027618646621704,
      "learning_rate": 0.0005504961364123082,
      "loss": 3.161,
      "step": 42734
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.653111219406128,
      "learning_rate": 0.0005504938854774789,
      "loss": 3.2349,
      "step": 42735
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4576456546783447,
      "learning_rate": 0.000550491634496078,
      "loss": 2.9683,
      "step": 42736
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6383572816848755,
      "learning_rate": 0.000550489383468106,
      "loss": 2.8142,
      "step": 42737
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.831409215927124,
      "learning_rate": 0.0005504871323935633,
      "loss": 3.2955,
      "step": 42738
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5768882036209106,
      "learning_rate": 0.0005504848812724504,
      "loss": 3.2132,
      "step": 42739
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5145052671432495,
      "learning_rate": 0.0005504826301047673,
      "loss": 3.0116,
      "step": 42740
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4189318418502808,
      "learning_rate": 0.0005504803788905152,
      "loss": 3.0098,
      "step": 42741
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2479993104934692,
      "learning_rate": 0.0005504781276296938,
      "loss": 2.9607,
      "step": 42742
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4344346523284912,
      "learning_rate": 0.0005504758763223038,
      "loss": 2.8713,
      "step": 42743
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.323960304260254,
      "learning_rate": 0.0005504736249683456,
      "loss": 2.9764,
      "step": 42744
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7350172996520996,
      "learning_rate": 0.0005504713735678198,
      "loss": 3.2293,
      "step": 42745
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6474591493606567,
      "learning_rate": 0.0005504691221207264,
      "loss": 2.8613,
      "step": 42746
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5712743997573853,
      "learning_rate": 0.0005504668706270662,
      "loss": 3.0351,
      "step": 42747
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.340708613395691,
      "learning_rate": 0.0005504646190868395,
      "loss": 3.12,
      "step": 42748
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9451476335525513,
      "learning_rate": 0.0005504623675000466,
      "loss": 3.1709,
      "step": 42749
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5443834066390991,
      "learning_rate": 0.0005504601158666881,
      "loss": 3.1549,
      "step": 42750
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5708250999450684,
      "learning_rate": 0.0005504578641867643,
      "loss": 3.0851,
      "step": 42751
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.367553472518921,
      "learning_rate": 0.0005504556124602758,
      "loss": 3.0427,
      "step": 42752
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.478721857070923,
      "learning_rate": 0.0005504533606872226,
      "loss": 3.0306,
      "step": 42753
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9723336696624756,
      "learning_rate": 0.0005504511088676057,
      "loss": 2.9624,
      "step": 42754
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9473034143447876,
      "learning_rate": 0.0005504488570014249,
      "loss": 3.1751,
      "step": 42755
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7846180200576782,
      "learning_rate": 0.0005504466050886811,
      "loss": 3.0419,
      "step": 42756
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.310908317565918,
      "learning_rate": 0.0005504443531293746,
      "loss": 3.0709,
      "step": 42757
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.5963919162750244,
      "learning_rate": 0.0005504421011235056,
      "loss": 3.1101,
      "step": 42758
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3706165552139282,
      "learning_rate": 0.0005504398490710747,
      "loss": 3.021,
      "step": 42759
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4564015865325928,
      "learning_rate": 0.0005504375969720825,
      "loss": 3.1632,
      "step": 42760
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.9533913135528564,
      "learning_rate": 0.0005504353448265291,
      "loss": 3.0964,
      "step": 42761
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9278814792633057,
      "learning_rate": 0.000550433092634415,
      "loss": 3.1821,
      "step": 42762
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.710904598236084,
      "learning_rate": 0.0005504308403957407,
      "loss": 2.9066,
      "step": 42763
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.3370234966278076,
      "learning_rate": 0.0005504285881105065,
      "loss": 2.7153,
      "step": 42764
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7684236764907837,
      "learning_rate": 0.0005504263357787131,
      "loss": 3.2196,
      "step": 42765
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5798245668411255,
      "learning_rate": 0.0005504240834003606,
      "loss": 2.8607,
      "step": 42766
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.469660758972168,
      "learning_rate": 0.0005504218309754495,
      "loss": 3.0967,
      "step": 42767
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4180395603179932,
      "learning_rate": 0.0005504195785039803,
      "loss": 3.1754,
      "step": 42768
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4770689010620117,
      "learning_rate": 0.0005504173259859534,
      "loss": 3.1595,
      "step": 42769
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.563791036605835,
      "learning_rate": 0.0005504150734213692,
      "loss": 3.0468,
      "step": 42770
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.392225980758667,
      "learning_rate": 0.0005504128208102281,
      "loss": 2.8875,
      "step": 42771
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.47142493724823,
      "learning_rate": 0.0005504105681525303,
      "loss": 3.1882,
      "step": 42772
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9366594552993774,
      "learning_rate": 0.0005504083154482767,
      "loss": 3.0522,
      "step": 42773
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.04607892036438,
      "learning_rate": 0.0005504060626974675,
      "loss": 3.183,
      "step": 42774
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4836552143096924,
      "learning_rate": 0.0005504038099001029,
      "loss": 2.8103,
      "step": 42775
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.5000698566436768,
      "learning_rate": 0.0005504015570561837,
      "loss": 3.0753,
      "step": 42776
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4069087505340576,
      "learning_rate": 0.00055039930416571,
      "loss": 2.8498,
      "step": 42777
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5469274520874023,
      "learning_rate": 0.0005503970512286823,
      "loss": 3.1887,
      "step": 42778
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7779959440231323,
      "learning_rate": 0.0005503947982451013,
      "loss": 3.0296,
      "step": 42779
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5694507360458374,
      "learning_rate": 0.000550392545214967,
      "loss": 3.0501,
      "step": 42780
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7208534479141235,
      "learning_rate": 0.00055039029213828,
      "loss": 3.032,
      "step": 42781
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8964167833328247,
      "learning_rate": 0.0005503880390150406,
      "loss": 2.9877,
      "step": 42782
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3492605686187744,
      "learning_rate": 0.0005503857858452496,
      "loss": 3.1221,
      "step": 42783
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7151916027069092,
      "learning_rate": 0.0005503835326289069,
      "loss": 3.0838,
      "step": 42784
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7450439929962158,
      "learning_rate": 0.0005503812793660134,
      "loss": 3.2966,
      "step": 42785
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.473027229309082,
      "learning_rate": 0.0005503790260565692,
      "loss": 3.0892,
      "step": 42786
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2919062376022339,
      "learning_rate": 0.0005503767727005747,
      "loss": 3.0707,
      "step": 42787
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.99329674243927,
      "learning_rate": 0.0005503745192980306,
      "loss": 2.9613,
      "step": 42788
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8574647903442383,
      "learning_rate": 0.0005503722658489371,
      "loss": 2.9304,
      "step": 42789
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7001724243164062,
      "learning_rate": 0.0005503700123532947,
      "loss": 3.282,
      "step": 42790
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.390863299369812,
      "learning_rate": 0.0005503677588111036,
      "loss": 3.1386,
      "step": 42791
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8462268114089966,
      "learning_rate": 0.0005503655052223646,
      "loss": 3.1166,
      "step": 42792
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4924595355987549,
      "learning_rate": 0.000550363251587078,
      "loss": 3.2997,
      "step": 42793
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5524206161499023,
      "learning_rate": 0.000550360997905244,
      "loss": 3.1536,
      "step": 42794
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5366904735565186,
      "learning_rate": 0.0005503587441768632,
      "loss": 2.9883,
      "step": 42795
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5462487936019897,
      "learning_rate": 0.000550356490401936,
      "loss": 3.1614,
      "step": 42796
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6802741289138794,
      "learning_rate": 0.0005503542365804628,
      "loss": 3.1318,
      "step": 42797
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8501653671264648,
      "learning_rate": 0.0005503519827124439,
      "loss": 2.8288,
      "step": 42798
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6905944347381592,
      "learning_rate": 0.0005503497287978799,
      "loss": 2.983,
      "step": 42799
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8505241870880127,
      "learning_rate": 0.0005503474748367712,
      "loss": 3.1198,
      "step": 42800
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.86067795753479,
      "learning_rate": 0.0005503452208291182,
      "loss": 3.1309,
      "step": 42801
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3749053478240967,
      "learning_rate": 0.0005503429667749213,
      "loss": 3.0726,
      "step": 42802
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.499969720840454,
      "learning_rate": 0.0005503407126741809,
      "loss": 3.2902,
      "step": 42803
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8589799404144287,
      "learning_rate": 0.0005503384585268975,
      "loss": 3.066,
      "step": 42804
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4157439470291138,
      "learning_rate": 0.0005503362043330713,
      "loss": 3.3332,
      "step": 42805
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6864436864852905,
      "learning_rate": 0.0005503339500927029,
      "loss": 2.953,
      "step": 42806
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9874147176742554,
      "learning_rate": 0.0005503316958057928,
      "loss": 2.9929,
      "step": 42807
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6030467748641968,
      "learning_rate": 0.0005503294414723412,
      "loss": 2.9767,
      "step": 42808
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8151949644088745,
      "learning_rate": 0.0005503271870923489,
      "loss": 2.8656,
      "step": 42809
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4994456768035889,
      "learning_rate": 0.0005503249326658158,
      "loss": 3.1259,
      "step": 42810
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3822989463806152,
      "learning_rate": 0.0005503226781927425,
      "loss": 3.0208,
      "step": 42811
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4159592390060425,
      "learning_rate": 0.0005503204236731296,
      "loss": 3.0411,
      "step": 42812
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0783188343048096,
      "learning_rate": 0.0005503181691069774,
      "loss": 2.7232,
      "step": 42813
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7160916328430176,
      "learning_rate": 0.0005503159144942864,
      "loss": 3.0437,
      "step": 42814
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4826409816741943,
      "learning_rate": 0.0005503136598350569,
      "loss": 2.9136,
      "step": 42815
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.523374557495117,
      "learning_rate": 0.0005503114051292893,
      "loss": 2.9958,
      "step": 42816
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.091777801513672,
      "learning_rate": 0.0005503091503769841,
      "loss": 2.9772,
      "step": 42817
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7201251983642578,
      "learning_rate": 0.0005503068955781418,
      "loss": 3.0004,
      "step": 42818
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0605580806732178,
      "learning_rate": 0.0005503046407327627,
      "loss": 3.1763,
      "step": 42819
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.365710973739624,
      "learning_rate": 0.0005503023858408471,
      "loss": 2.9439,
      "step": 42820
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.316922664642334,
      "learning_rate": 0.0005503001309023958,
      "loss": 3.0384,
      "step": 42821
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3644765615463257,
      "learning_rate": 0.0005502978759174089,
      "loss": 2.9955,
      "step": 42822
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9649070501327515,
      "learning_rate": 0.0005502956208858868,
      "loss": 3.2474,
      "step": 42823
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4932291507720947,
      "learning_rate": 0.00055029336580783,
      "loss": 2.9135,
      "step": 42824
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3449651002883911,
      "learning_rate": 0.000550291110683239,
      "loss": 3.2528,
      "step": 42825
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.053473949432373,
      "learning_rate": 0.0005502888555121142,
      "loss": 3.035,
      "step": 42826
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7611439228057861,
      "learning_rate": 0.0005502866002944558,
      "loss": 3.0153,
      "step": 42827
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.540281891822815,
      "learning_rate": 0.0005502843450302646,
      "loss": 3.1151,
      "step": 42828
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6793965101242065,
      "learning_rate": 0.0005502820897195409,
      "loss": 3.2994,
      "step": 42829
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.584132432937622,
      "learning_rate": 0.0005502798343622849,
      "loss": 2.9976,
      "step": 42830
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.371269941329956,
      "learning_rate": 0.0005502775789584971,
      "loss": 3.2554,
      "step": 42831
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.520559549331665,
      "learning_rate": 0.000550275323508178,
      "loss": 2.6889,
      "step": 42832
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.57470703125,
      "learning_rate": 0.000550273068011328,
      "loss": 3.3093,
      "step": 42833
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4289544820785522,
      "learning_rate": 0.0005502708124679475,
      "loss": 2.9528,
      "step": 42834
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4042762517929077,
      "learning_rate": 0.000550268556878037,
      "loss": 3.1657,
      "step": 42835
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.369300365447998,
      "learning_rate": 0.0005502663012415968,
      "loss": 3.181,
      "step": 42836
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3560245037078857,
      "learning_rate": 0.0005502640455586274,
      "loss": 2.9459,
      "step": 42837
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5335873365402222,
      "learning_rate": 0.0005502617898291291,
      "loss": 3.3359,
      "step": 42838
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4835121631622314,
      "learning_rate": 0.0005502595340531025,
      "loss": 3.1928,
      "step": 42839
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5964045524597168,
      "learning_rate": 0.0005502572782305479,
      "loss": 3.1952,
      "step": 42840
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.482301115989685,
      "learning_rate": 0.0005502550223614658,
      "loss": 3.1278,
      "step": 42841
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6332496404647827,
      "learning_rate": 0.0005502527664458564,
      "loss": 2.936,
      "step": 42842
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5349282026290894,
      "learning_rate": 0.0005502505104837205,
      "loss": 3.1075,
      "step": 42843
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6707724332809448,
      "learning_rate": 0.0005502482544750581,
      "loss": 2.8777,
      "step": 42844
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4537436962127686,
      "learning_rate": 0.00055024599841987,
      "loss": 2.8642,
      "step": 42845
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.514632225036621,
      "learning_rate": 0.0005502437423181564,
      "loss": 3.0275,
      "step": 42846
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8991527557373047,
      "learning_rate": 0.0005502414861699177,
      "loss": 2.8654,
      "step": 42847
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0830442905426025,
      "learning_rate": 0.0005502392299751545,
      "loss": 2.8399,
      "step": 42848
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.7009291648864746,
      "learning_rate": 0.000550236973733867,
      "loss": 3.1404,
      "step": 42849
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.549946665763855,
      "learning_rate": 0.0005502347174460557,
      "loss": 2.9933,
      "step": 42850
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.5360567569732666,
      "learning_rate": 0.0005502324611117212,
      "loss": 3.1197,
      "step": 42851
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.0977959632873535,
      "learning_rate": 0.0005502302047308636,
      "loss": 3.0351,
      "step": 42852
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5662673711776733,
      "learning_rate": 0.0005502279483034836,
      "loss": 3.2001,
      "step": 42853
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3085973262786865,
      "learning_rate": 0.0005502256918295815,
      "loss": 3.0205,
      "step": 42854
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3933042287826538,
      "learning_rate": 0.0005502234353091576,
      "loss": 2.9846,
      "step": 42855
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.525592565536499,
      "learning_rate": 0.0005502211787422125,
      "loss": 3.253,
      "step": 42856
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.984484314918518,
      "learning_rate": 0.0005502189221287465,
      "loss": 3.0518,
      "step": 42857
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4260834455490112,
      "learning_rate": 0.0005502166654687601,
      "loss": 2.9158,
      "step": 42858
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.384667158126831,
      "learning_rate": 0.0005502144087622538,
      "loss": 2.8507,
      "step": 42859
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.5698046684265137,
      "learning_rate": 0.0005502121520092279,
      "loss": 3.0888,
      "step": 42860
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9462475776672363,
      "learning_rate": 0.0005502098952096828,
      "loss": 2.999,
      "step": 42861
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.335709810256958,
      "learning_rate": 0.000550207638363619,
      "loss": 2.6556,
      "step": 42862
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0850982666015625,
      "learning_rate": 0.0005502053814710367,
      "loss": 2.9822,
      "step": 42863
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.184896230697632,
      "learning_rate": 0.0005502031245319367,
      "loss": 2.7868,
      "step": 42864
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7189550399780273,
      "learning_rate": 0.0005502008675463191,
      "loss": 3.1678,
      "step": 42865
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9467222690582275,
      "learning_rate": 0.0005501986105141846,
      "loss": 3.2142,
      "step": 42866
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5992956161499023,
      "learning_rate": 0.0005501963534355333,
      "loss": 3.3836,
      "step": 42867
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.976605772972107,
      "learning_rate": 0.0005501940963103658,
      "loss": 2.996,
      "step": 42868
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.1012043952941895,
      "learning_rate": 0.0005501918391386825,
      "loss": 3.1332,
      "step": 42869
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8554296493530273,
      "learning_rate": 0.0005501895819204839,
      "loss": 2.9775,
      "step": 42870
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.104625701904297,
      "learning_rate": 0.0005501873246557702,
      "loss": 2.7867,
      "step": 42871
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.591367244720459,
      "learning_rate": 0.0005501850673445421,
      "loss": 3.2838,
      "step": 42872
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4495723247528076,
      "learning_rate": 0.0005501828099867997,
      "loss": 3.0779,
      "step": 42873
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.750730514526367,
      "learning_rate": 0.0005501805525825437,
      "loss": 3.0668,
      "step": 42874
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.86972975730896,
      "learning_rate": 0.0005501782951317743,
      "loss": 3.037,
      "step": 42875
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6950392723083496,
      "learning_rate": 0.0005501760376344922,
      "loss": 3.0662,
      "step": 42876
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4368140697479248,
      "learning_rate": 0.0005501737800906976,
      "loss": 2.8952,
      "step": 42877
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.2661314010620117,
      "learning_rate": 0.0005501715225003908,
      "loss": 3.0055,
      "step": 42878
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4978147745132446,
      "learning_rate": 0.0005501692648635727,
      "loss": 3.2787,
      "step": 42879
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2439501285552979,
      "learning_rate": 0.0005501670071802432,
      "loss": 3.2839,
      "step": 42880
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8392480611801147,
      "learning_rate": 0.0005501647494504029,
      "loss": 2.749,
      "step": 42881
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.370463252067566,
      "learning_rate": 0.0005501624916740524,
      "loss": 2.9302,
      "step": 42882
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5922569036483765,
      "learning_rate": 0.0005501602338511918,
      "loss": 3.1496,
      "step": 42883
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7042701244354248,
      "learning_rate": 0.0005501579759818218,
      "loss": 3.3015,
      "step": 42884
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.639901041984558,
      "learning_rate": 0.0005501557180659427,
      "loss": 3.033,
      "step": 42885
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4078447818756104,
      "learning_rate": 0.000550153460103555,
      "loss": 3.0853,
      "step": 42886
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5312509536743164,
      "learning_rate": 0.000550151202094659,
      "loss": 3.2681,
      "step": 42887
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.2757315635681152,
      "learning_rate": 0.0005501489440392552,
      "loss": 3.0025,
      "step": 42888
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.769243001937866,
      "learning_rate": 0.0005501466859373439,
      "loss": 2.7985,
      "step": 42889
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0527594089508057,
      "learning_rate": 0.0005501444277889256,
      "loss": 3.1133,
      "step": 42890
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.3648509979248047,
      "learning_rate": 0.0005501421695940008,
      "loss": 2.9112,
      "step": 42891
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.0465221405029297,
      "learning_rate": 0.0005501399113525699,
      "loss": 3.1242,
      "step": 42892
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5998194217681885,
      "learning_rate": 0.0005501376530646331,
      "loss": 2.9991,
      "step": 42893
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.1726977825164795,
      "learning_rate": 0.0005501353947301911,
      "loss": 3.0011,
      "step": 42894
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.717092752456665,
      "learning_rate": 0.0005501331363492442,
      "loss": 3.0072,
      "step": 42895
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.864866256713867,
      "learning_rate": 0.0005501308779217928,
      "loss": 3.1653,
      "step": 42896
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5647084712982178,
      "learning_rate": 0.0005501286194478374,
      "loss": 3.1802,
      "step": 42897
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.1145122051239014,
      "learning_rate": 0.0005501263609273783,
      "loss": 2.962,
      "step": 42898
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.060396194458008,
      "learning_rate": 0.000550124102360416,
      "loss": 3.1276,
      "step": 42899
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4291352033615112,
      "learning_rate": 0.000550121843746951,
      "loss": 3.0889,
      "step": 42900
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4321037530899048,
      "learning_rate": 0.0005501195850869836,
      "loss": 3.1256,
      "step": 42901
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3128389120101929,
      "learning_rate": 0.0005501173263805142,
      "loss": 2.9037,
      "step": 42902
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4080685377120972,
      "learning_rate": 0.0005501150676275432,
      "loss": 3.0303,
      "step": 42903
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0587868690490723,
      "learning_rate": 0.0005501128088280712,
      "loss": 2.961,
      "step": 42904
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.878201961517334,
      "learning_rate": 0.0005501105499820984,
      "loss": 2.9379,
      "step": 42905
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4496172666549683,
      "learning_rate": 0.0005501082910896255,
      "loss": 3.0897,
      "step": 42906
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5344418287277222,
      "learning_rate": 0.0005501060321506527,
      "loss": 3.1454,
      "step": 42907
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.3662185668945312,
      "learning_rate": 0.0005501037731651803,
      "loss": 2.9307,
      "step": 42908
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0767998695373535,
      "learning_rate": 0.0005501015141332091,
      "loss": 3.2028,
      "step": 42909
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.155097723007202,
      "learning_rate": 0.0005500992550547391,
      "loss": 3.2757,
      "step": 42910
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0520808696746826,
      "learning_rate": 0.0005500969959297712,
      "loss": 3.1455,
      "step": 42911
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.6304609775543213,
      "learning_rate": 0.0005500947367583053,
      "loss": 2.8716,
      "step": 42912
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6543633937835693,
      "learning_rate": 0.0005500924775403421,
      "loss": 2.8263,
      "step": 42913
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4802765846252441,
      "learning_rate": 0.0005500902182758821,
      "loss": 2.9347,
      "step": 42914
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6828315258026123,
      "learning_rate": 0.0005500879589649255,
      "loss": 3.0537,
      "step": 42915
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.462517499923706,
      "learning_rate": 0.0005500856996074729,
      "loss": 2.963,
      "step": 42916
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7920335531234741,
      "learning_rate": 0.0005500834402035246,
      "loss": 3.298,
      "step": 42917
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7276332378387451,
      "learning_rate": 0.0005500811807530811,
      "loss": 2.944,
      "step": 42918
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7018386125564575,
      "learning_rate": 0.0005500789212561428,
      "loss": 2.8627,
      "step": 42919
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0992977619171143,
      "learning_rate": 0.00055007666171271,
      "loss": 3.2902,
      "step": 42920
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8674392700195312,
      "learning_rate": 0.0005500744021227834,
      "loss": 3.3051,
      "step": 42921
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.333383321762085,
      "learning_rate": 0.0005500721424863632,
      "loss": 2.9827,
      "step": 42922
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6071770191192627,
      "learning_rate": 0.0005500698828034498,
      "loss": 2.9006,
      "step": 42923
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.855846881866455,
      "learning_rate": 0.0005500676230740438,
      "loss": 2.9021,
      "step": 42924
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3238168954849243,
      "learning_rate": 0.0005500653632981453,
      "loss": 3.1526,
      "step": 42925
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0793707370758057,
      "learning_rate": 0.000550063103475755,
      "loss": 3.2354,
      "step": 42926
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7038547992706299,
      "learning_rate": 0.0005500608436068734,
      "loss": 2.8854,
      "step": 42927
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8515713214874268,
      "learning_rate": 0.0005500585836915007,
      "loss": 3.2035,
      "step": 42928
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6572726964950562,
      "learning_rate": 0.0005500563237296373,
      "loss": 2.9803,
      "step": 42929
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0644404888153076,
      "learning_rate": 0.0005500540637212838,
      "loss": 2.9865,
      "step": 42930
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3101656436920166,
      "learning_rate": 0.0005500518036664405,
      "loss": 3.025,
      "step": 42931
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.682838797569275,
      "learning_rate": 0.0005500495435651078,
      "loss": 3.1057,
      "step": 42932
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.510986089706421,
      "learning_rate": 0.0005500472834172863,
      "loss": 2.8351,
      "step": 42933
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2459275722503662,
      "learning_rate": 0.0005500450232229763,
      "loss": 3.1509,
      "step": 42934
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9244669675827026,
      "learning_rate": 0.000550042762982178,
      "loss": 3.1269,
      "step": 42935
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.2069618701934814,
      "learning_rate": 0.0005500405026948921,
      "loss": 3.1912,
      "step": 42936
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6861066818237305,
      "learning_rate": 0.0005500382423611191,
      "loss": 3.1699,
      "step": 42937
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4757696390151978,
      "learning_rate": 0.0005500359819808591,
      "loss": 3.256,
      "step": 42938
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5939809083938599,
      "learning_rate": 0.0005500337215541128,
      "loss": 3.2553,
      "step": 42939
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9660227298736572,
      "learning_rate": 0.0005500314610808805,
      "loss": 2.9582,
      "step": 42940
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9573447704315186,
      "learning_rate": 0.0005500292005611626,
      "loss": 3.0562,
      "step": 42941
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3770707845687866,
      "learning_rate": 0.0005500269399949595,
      "loss": 3.1784,
      "step": 42942
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.4099316596984863,
      "learning_rate": 0.0005500246793822718,
      "loss": 3.2826,
      "step": 42943
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9243539571762085,
      "learning_rate": 0.0005500224187230997,
      "loss": 3.1228,
      "step": 42944
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5533044338226318,
      "learning_rate": 0.0005500201580174438,
      "loss": 3.054,
      "step": 42945
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7942698001861572,
      "learning_rate": 0.0005500178972653043,
      "loss": 2.8009,
      "step": 42946
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7739449739456177,
      "learning_rate": 0.0005500156364666818,
      "loss": 3.0766,
      "step": 42947
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6668168306350708,
      "learning_rate": 0.0005500133756215767,
      "loss": 2.98,
      "step": 42948
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6273880004882812,
      "learning_rate": 0.0005500111147299895,
      "loss": 2.9423,
      "step": 42949
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.717949390411377,
      "learning_rate": 0.0005500088537919202,
      "loss": 2.998,
      "step": 42950
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3903865814208984,
      "learning_rate": 0.0005500065928073699,
      "loss": 3.0341,
      "step": 42951
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4401243925094604,
      "learning_rate": 0.0005500043317763384,
      "loss": 3.0772,
      "step": 42952
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7541043758392334,
      "learning_rate": 0.0005500020706988265,
      "loss": 3.0683,
      "step": 42953
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4669891595840454,
      "learning_rate": 0.0005499998095748345,
      "loss": 3.1611,
      "step": 42954
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7098008394241333,
      "learning_rate": 0.0005499975484043628,
      "loss": 3.0471,
      "step": 42955
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.504266381263733,
      "learning_rate": 0.0005499952871874118,
      "loss": 3.0958,
      "step": 42956
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6126601696014404,
      "learning_rate": 0.000549993025923982,
      "loss": 3.315,
      "step": 42957
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.483694314956665,
      "learning_rate": 0.0005499907646140738,
      "loss": 3.1573,
      "step": 42958
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6647144556045532,
      "learning_rate": 0.0005499885032576874,
      "loss": 3.115,
      "step": 42959
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6377108097076416,
      "learning_rate": 0.0005499862418548236,
      "loss": 2.9402,
      "step": 42960
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.652214765548706,
      "learning_rate": 0.0005499839804054827,
      "loss": 3.1221,
      "step": 42961
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.486682891845703,
      "learning_rate": 0.0005499817189096649,
      "loss": 3.1332,
      "step": 42962
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.093217134475708,
      "learning_rate": 0.0005499794573673709,
      "loss": 2.9098,
      "step": 42963
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.1997063159942627,
      "learning_rate": 0.0005499771957786009,
      "loss": 3.0076,
      "step": 42964
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7967826128005981,
      "learning_rate": 0.0005499749341433555,
      "loss": 3.224,
      "step": 42965
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8393748998641968,
      "learning_rate": 0.000549972672461635,
      "loss": 2.8789,
      "step": 42966
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.762894630432129,
      "learning_rate": 0.0005499704107334399,
      "loss": 3.0119,
      "step": 42967
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4011715650558472,
      "learning_rate": 0.0005499681489587705,
      "loss": 3.0512,
      "step": 42968
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0071399211883545,
      "learning_rate": 0.0005499658871376274,
      "loss": 3.1801,
      "step": 42969
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.3809585571289062,
      "learning_rate": 0.0005499636252700109,
      "loss": 2.9543,
      "step": 42970
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.4553802013397217,
      "learning_rate": 0.0005499613633559214,
      "loss": 3.1172,
      "step": 42971
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5919450521469116,
      "learning_rate": 0.0005499591013953595,
      "loss": 3.107,
      "step": 42972
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9686294794082642,
      "learning_rate": 0.0005499568393883252,
      "loss": 3.0079,
      "step": 42973
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.411332845687866,
      "learning_rate": 0.0005499545773348195,
      "loss": 3.0201,
      "step": 42974
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8161778450012207,
      "learning_rate": 0.0005499523152348423,
      "loss": 3.21,
      "step": 42975
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6679025888442993,
      "learning_rate": 0.0005499500530883944,
      "loss": 3.3742,
      "step": 42976
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.0070176124572754,
      "learning_rate": 0.000549947790895476,
      "loss": 2.8842,
      "step": 42977
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8914093971252441,
      "learning_rate": 0.0005499455286560876,
      "loss": 2.9823,
      "step": 42978
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9624236822128296,
      "learning_rate": 0.0005499432663702296,
      "loss": 3.2181,
      "step": 42979
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4238077402114868,
      "learning_rate": 0.0005499410040379025,
      "loss": 3.1369,
      "step": 42980
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3612759113311768,
      "learning_rate": 0.0005499387416591066,
      "loss": 3.0912,
      "step": 42981
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3240488767623901,
      "learning_rate": 0.0005499364792338423,
      "loss": 2.9331,
      "step": 42982
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6659457683563232,
      "learning_rate": 0.0005499342167621102,
      "loss": 2.879,
      "step": 42983
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3446811437606812,
      "learning_rate": 0.0005499319542439105,
      "loss": 3.0946,
      "step": 42984
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.621684193611145,
      "learning_rate": 0.0005499296916792438,
      "loss": 2.9636,
      "step": 42985
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8081668615341187,
      "learning_rate": 0.0005499274290681104,
      "loss": 3.1304,
      "step": 42986
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8569655418395996,
      "learning_rate": 0.0005499251664105109,
      "loss": 3.1775,
      "step": 42987
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.285693883895874,
      "learning_rate": 0.0005499229037064455,
      "loss": 3.2251,
      "step": 42988
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4215143918991089,
      "learning_rate": 0.0005499206409559147,
      "loss": 3.4772,
      "step": 42989
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4271557331085205,
      "learning_rate": 0.0005499183781589189,
      "loss": 3.1259,
      "step": 42990
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2695491313934326,
      "learning_rate": 0.0005499161153154587,
      "loss": 3.1615,
      "step": 42991
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5000041723251343,
      "learning_rate": 0.0005499138524255343,
      "loss": 3.0814,
      "step": 42992
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4064801931381226,
      "learning_rate": 0.0005499115894891461,
      "loss": 2.919,
      "step": 42993
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9479808807373047,
      "learning_rate": 0.0005499093265062948,
      "loss": 2.9187,
      "step": 42994
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4113190174102783,
      "learning_rate": 0.0005499070634769805,
      "loss": 2.9886,
      "step": 42995
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.470841407775879,
      "learning_rate": 0.0005499048004012039,
      "loss": 2.9306,
      "step": 42996
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.546090006828308,
      "learning_rate": 0.0005499025372789652,
      "loss": 3.0891,
      "step": 42997
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.304535150527954,
      "learning_rate": 0.0005499002741102648,
      "loss": 3.2404,
      "step": 42998
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.56080961227417,
      "learning_rate": 0.0005498980108951033,
      "loss": 3.1181,
      "step": 42999
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8679656982421875,
      "learning_rate": 0.0005498957476334811,
      "loss": 3.0635,
      "step": 43000
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4634095430374146,
      "learning_rate": 0.0005498934843253986,
      "loss": 3.1274,
      "step": 43001
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6210187673568726,
      "learning_rate": 0.000549891220970856,
      "loss": 3.1191,
      "step": 43002
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2915658950805664,
      "learning_rate": 0.000549888957569854,
      "loss": 2.9359,
      "step": 43003
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6766722202301025,
      "learning_rate": 0.0005498866941223929,
      "loss": 2.9902,
      "step": 43004
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6613587141036987,
      "learning_rate": 0.0005498844306284733,
      "loss": 3.003,
      "step": 43005
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5497366189956665,
      "learning_rate": 0.0005498821670880953,
      "loss": 2.8665,
      "step": 43006
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6724928617477417,
      "learning_rate": 0.0005498799035012596,
      "loss": 3.0359,
      "step": 43007
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5258382558822632,
      "learning_rate": 0.0005498776398679664,
      "loss": 2.9366,
      "step": 43008
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8665270805358887,
      "learning_rate": 0.0005498753761882163,
      "loss": 2.9926,
      "step": 43009
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3657389879226685,
      "learning_rate": 0.0005498731124620096,
      "loss": 2.7688,
      "step": 43010
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4388704299926758,
      "learning_rate": 0.0005498708486893468,
      "loss": 3.1198,
      "step": 43011
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4815053939819336,
      "learning_rate": 0.0005498685848702283,
      "loss": 3.0488,
      "step": 43012
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6528249979019165,
      "learning_rate": 0.0005498663210046545,
      "loss": 3.155,
      "step": 43013
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9350707530975342,
      "learning_rate": 0.0005498640570926258,
      "loss": 2.8528,
      "step": 43014
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.464538335800171,
      "learning_rate": 0.0005498617931341427,
      "loss": 2.9313,
      "step": 43015
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.729887843132019,
      "learning_rate": 0.0005498595291292056,
      "loss": 3.1216,
      "step": 43016
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6152583360671997,
      "learning_rate": 0.0005498572650778149,
      "loss": 2.6652,
      "step": 43017
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3823585510253906,
      "learning_rate": 0.0005498550009799709,
      "loss": 3.09,
      "step": 43018
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3973623514175415,
      "learning_rate": 0.0005498527368356743,
      "loss": 3.2033,
      "step": 43019
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4167685508728027,
      "learning_rate": 0.0005498504726449253,
      "loss": 3.0459,
      "step": 43020
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4276491403579712,
      "learning_rate": 0.0005498482084077244,
      "loss": 3.1558,
      "step": 43021
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4429309368133545,
      "learning_rate": 0.0005498459441240719,
      "loss": 3.0531,
      "step": 43022
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3060479164123535,
      "learning_rate": 0.0005498436797939684,
      "loss": 3.0661,
      "step": 43023
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5012258291244507,
      "learning_rate": 0.0005498414154174142,
      "loss": 3.227,
      "step": 43024
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3666232824325562,
      "learning_rate": 0.0005498391509944098,
      "loss": 3.0345,
      "step": 43025
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4247812032699585,
      "learning_rate": 0.0005498368865249556,
      "loss": 3.3412,
      "step": 43026
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.065511465072632,
      "learning_rate": 0.0005498346220090519,
      "loss": 3.0617,
      "step": 43027
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5066441297531128,
      "learning_rate": 0.0005498323574466994,
      "loss": 3.22,
      "step": 43028
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6300396919250488,
      "learning_rate": 0.0005498300928378982,
      "loss": 3.0683,
      "step": 43029
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3695061206817627,
      "learning_rate": 0.000549827828182649,
      "loss": 3.1108,
      "step": 43030
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7234035730361938,
      "learning_rate": 0.000549825563480952,
      "loss": 2.782,
      "step": 43031
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.188373565673828,
      "learning_rate": 0.0005498232987328078,
      "loss": 2.8778,
      "step": 43032
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6558196544647217,
      "learning_rate": 0.0005498210339382166,
      "loss": 3.0287,
      "step": 43033
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7519643306732178,
      "learning_rate": 0.000549818769097179,
      "loss": 3.0983,
      "step": 43034
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6607955694198608,
      "learning_rate": 0.0005498165042096954,
      "loss": 2.8825,
      "step": 43035
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6668728590011597,
      "learning_rate": 0.0005498142392757662,
      "loss": 3.0834,
      "step": 43036
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.84605872631073,
      "learning_rate": 0.0005498119742953919,
      "loss": 3.0662,
      "step": 43037
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.828667402267456,
      "learning_rate": 0.0005498097092685728,
      "loss": 3.0632,
      "step": 43038
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.317732572555542,
      "learning_rate": 0.0005498074441953093,
      "loss": 3.0133,
      "step": 43039
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.646380066871643,
      "learning_rate": 0.0005498051790756019,
      "loss": 2.9969,
      "step": 43040
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5052456855773926,
      "learning_rate": 0.000549802913909451,
      "loss": 3.1692,
      "step": 43041
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.456618309020996,
      "learning_rate": 0.0005498006486968569,
      "loss": 3.0785,
      "step": 43042
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5243953466415405,
      "learning_rate": 0.0005497983834378203,
      "loss": 3.1432,
      "step": 43043
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8112235069274902,
      "learning_rate": 0.0005497961181323414,
      "loss": 3.1424,
      "step": 43044
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3734591007232666,
      "learning_rate": 0.0005497938527804207,
      "loss": 3.2571,
      "step": 43045
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.783267617225647,
      "learning_rate": 0.0005497915873820587,
      "loss": 2.9202,
      "step": 43046
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2847411632537842,
      "learning_rate": 0.0005497893219372556,
      "loss": 3.1132,
      "step": 43047
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8324823379516602,
      "learning_rate": 0.0005497870564460121,
      "loss": 2.984,
      "step": 43048
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3517677783966064,
      "learning_rate": 0.0005497847909083283,
      "loss": 3.1177,
      "step": 43049
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6494122743606567,
      "learning_rate": 0.0005497825253242049,
      "loss": 3.0683,
      "step": 43050
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6670377254486084,
      "learning_rate": 0.0005497802596936422,
      "loss": 2.9433,
      "step": 43051
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.2771060466766357,
      "learning_rate": 0.0005497779940166406,
      "loss": 3.2753,
      "step": 43052
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.408836007118225,
      "learning_rate": 0.0005497757282932006,
      "loss": 2.8583,
      "step": 43053
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.002026081085205,
      "learning_rate": 0.0005497734625233224,
      "loss": 2.8737,
      "step": 43054
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.651261806488037,
      "learning_rate": 0.0005497711967070068,
      "loss": 3.0391,
      "step": 43055
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7746267318725586,
      "learning_rate": 0.000549768930844254,
      "loss": 2.8952,
      "step": 43056
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.710111379623413,
      "learning_rate": 0.0005497666649350643,
      "loss": 3.0592,
      "step": 43057
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7584940195083618,
      "learning_rate": 0.0005497643989794385,
      "loss": 3.1046,
      "step": 43058
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.104212522506714,
      "learning_rate": 0.0005497621329773766,
      "loss": 3.0722,
      "step": 43059
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6967459917068481,
      "learning_rate": 0.0005497598669288792,
      "loss": 2.638,
      "step": 43060
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9065114259719849,
      "learning_rate": 0.0005497576008339468,
      "loss": 3.1739,
      "step": 43061
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4402153491973877,
      "learning_rate": 0.0005497553346925798,
      "loss": 3.2253,
      "step": 43062
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.069240093231201,
      "learning_rate": 0.0005497530685047784,
      "loss": 3.0922,
      "step": 43063
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4830377101898193,
      "learning_rate": 0.0005497508022705433,
      "loss": 3.2425,
      "step": 43064
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7358230352401733,
      "learning_rate": 0.0005497485359898748,
      "loss": 3.1572,
      "step": 43065
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.657540202140808,
      "learning_rate": 0.0005497462696627734,
      "loss": 3.1381,
      "step": 43066
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2207823991775513,
      "learning_rate": 0.0005497440032892393,
      "loss": 3.0663,
      "step": 43067
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0189049243927,
      "learning_rate": 0.0005497417368692732,
      "loss": 2.9928,
      "step": 43068
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5484403371810913,
      "learning_rate": 0.0005497394704028753,
      "loss": 3.1517,
      "step": 43069
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5251027345657349,
      "learning_rate": 0.0005497372038900462,
      "loss": 3.1613,
      "step": 43070
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9312223196029663,
      "learning_rate": 0.0005497349373307862,
      "loss": 2.8044,
      "step": 43071
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.393196702003479,
      "learning_rate": 0.0005497326707250958,
      "loss": 3.0995,
      "step": 43072
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.5428779125213623,
      "learning_rate": 0.0005497304040729754,
      "loss": 2.9268,
      "step": 43073
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6656858921051025,
      "learning_rate": 0.0005497281373744254,
      "loss": 3.1155,
      "step": 43074
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6041977405548096,
      "learning_rate": 0.0005497258706294462,
      "loss": 3.0685,
      "step": 43075
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6759811639785767,
      "learning_rate": 0.0005497236038380382,
      "loss": 3.0488,
      "step": 43076
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3799101114273071,
      "learning_rate": 0.0005497213370002019,
      "loss": 3.1646,
      "step": 43077
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.114675521850586,
      "learning_rate": 0.0005497190701159376,
      "loss": 2.8941,
      "step": 43078
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6233654022216797,
      "learning_rate": 0.000549716803185246,
      "loss": 3.2481,
      "step": 43079
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.012514352798462,
      "learning_rate": 0.0005497145362081272,
      "loss": 2.9762,
      "step": 43080
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8687331676483154,
      "learning_rate": 0.0005497122691845818,
      "loss": 3.0647,
      "step": 43081
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9016990661621094,
      "learning_rate": 0.0005497100021146102,
      "loss": 3.0368,
      "step": 43082
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6796470880508423,
      "learning_rate": 0.0005497077349982128,
      "loss": 2.9559,
      "step": 43083
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.737042188644409,
      "learning_rate": 0.0005497054678353898,
      "loss": 2.9865,
      "step": 43084
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.620097041130066,
      "learning_rate": 0.0005497032006261421,
      "loss": 3.1582,
      "step": 43085
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3398069143295288,
      "learning_rate": 0.0005497009333704699,
      "loss": 3.3412,
      "step": 43086
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5073798894882202,
      "learning_rate": 0.0005496986660683734,
      "loss": 2.9988,
      "step": 43087
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3248190879821777,
      "learning_rate": 0.0005496963987198533,
      "loss": 3.2591,
      "step": 43088
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.897721529006958,
      "learning_rate": 0.0005496941313249099,
      "loss": 2.8975,
      "step": 43089
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.568683385848999,
      "learning_rate": 0.0005496918638835436,
      "loss": 3.2319,
      "step": 43090
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.54283607006073,
      "learning_rate": 0.0005496895963957548,
      "loss": 3.2031,
      "step": 43091
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5532747507095337,
      "learning_rate": 0.0005496873288615442,
      "loss": 3.1706,
      "step": 43092
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7490123510360718,
      "learning_rate": 0.0005496850612809118,
      "loss": 3.0338,
      "step": 43093
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4903573989868164,
      "learning_rate": 0.0005496827936538585,
      "loss": 2.7162,
      "step": 43094
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4931230545043945,
      "learning_rate": 0.0005496805259803842,
      "loss": 3.0263,
      "step": 43095
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.132202625274658,
      "learning_rate": 0.0005496782582604898,
      "loss": 3.1142,
      "step": 43096
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2750638723373413,
      "learning_rate": 0.0005496759904941753,
      "loss": 3.0226,
      "step": 43097
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.892197608947754,
      "learning_rate": 0.0005496737226814415,
      "loss": 2.9602,
      "step": 43098
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.1527297496795654,
      "learning_rate": 0.0005496714548222885,
      "loss": 2.8935,
      "step": 43099
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2899293899536133,
      "learning_rate": 0.0005496691869167168,
      "loss": 2.9626,
      "step": 43100
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8889572620391846,
      "learning_rate": 0.0005496669189647271,
      "loss": 3.0842,
      "step": 43101
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.558229684829712,
      "learning_rate": 0.0005496646509663195,
      "loss": 3.1572,
      "step": 43102
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3927229642868042,
      "learning_rate": 0.0005496623829214946,
      "loss": 3.1257,
      "step": 43103
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5205734968185425,
      "learning_rate": 0.0005496601148302526,
      "loss": 3.0933,
      "step": 43104
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0713119506835938,
      "learning_rate": 0.0005496578466925942,
      "loss": 3.0654,
      "step": 43105
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.536207675933838,
      "learning_rate": 0.0005496555785085196,
      "loss": 2.9559,
      "step": 43106
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8282973766326904,
      "learning_rate": 0.0005496533102780294,
      "loss": 2.9606,
      "step": 43107
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6103416681289673,
      "learning_rate": 0.0005496510420011239,
      "loss": 3.1195,
      "step": 43108
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2637182474136353,
      "learning_rate": 0.0005496487736778035,
      "loss": 3.3068,
      "step": 43109
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3991310596466064,
      "learning_rate": 0.0005496465053080687,
      "loss": 2.9428,
      "step": 43110
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.630661964416504,
      "learning_rate": 0.00054964423689192,
      "loss": 3.1018,
      "step": 43111
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6914712190628052,
      "learning_rate": 0.0005496419684293577,
      "loss": 3.1179,
      "step": 43112
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6969599723815918,
      "learning_rate": 0.0005496396999203822,
      "loss": 2.9387,
      "step": 43113
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7572245597839355,
      "learning_rate": 0.0005496374313649939,
      "loss": 3.077,
      "step": 43114
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.719252109527588,
      "learning_rate": 0.0005496351627631934,
      "loss": 3.1288,
      "step": 43115
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5456355810165405,
      "learning_rate": 0.0005496328941149809,
      "loss": 2.9006,
      "step": 43116
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5076115131378174,
      "learning_rate": 0.0005496306254203571,
      "loss": 2.8417,
      "step": 43117
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0222015380859375,
      "learning_rate": 0.0005496283566793222,
      "loss": 3.1304,
      "step": 43118
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.652639627456665,
      "learning_rate": 0.0005496260878918766,
      "loss": 3.271,
      "step": 43119
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5779948234558105,
      "learning_rate": 0.0005496238190580208,
      "loss": 3.1477,
      "step": 43120
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.2325143814086914,
      "learning_rate": 0.0005496215501777553,
      "loss": 2.8672,
      "step": 43121
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6697272062301636,
      "learning_rate": 0.0005496192812510804,
      "loss": 3.1798,
      "step": 43122
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5506317615509033,
      "learning_rate": 0.0005496170122779965,
      "loss": 3.0362,
      "step": 43123
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8403472900390625,
      "learning_rate": 0.0005496147432585042,
      "loss": 3.0129,
      "step": 43124
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5226647853851318,
      "learning_rate": 0.0005496124741926037,
      "loss": 3.0218,
      "step": 43125
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0933938026428223,
      "learning_rate": 0.0005496102050802957,
      "loss": 2.9984,
      "step": 43126
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5890851020812988,
      "learning_rate": 0.0005496079359215803,
      "loss": 2.9934,
      "step": 43127
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5688738822937012,
      "learning_rate": 0.0005496056667164581,
      "loss": 3.147,
      "step": 43128
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.1222643852233887,
      "learning_rate": 0.0005496033974649295,
      "loss": 3.0059,
      "step": 43129
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.816743850708008,
      "learning_rate": 0.000549601128166995,
      "loss": 3.233,
      "step": 43130
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8055137395858765,
      "learning_rate": 0.0005495988588226548,
      "loss": 2.8986,
      "step": 43131
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4151936769485474,
      "learning_rate": 0.0005495965894319095,
      "loss": 2.8055,
      "step": 43132
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.3614423274993896,
      "learning_rate": 0.0005495943199947595,
      "loss": 2.9562,
      "step": 43133
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.041249990463257,
      "learning_rate": 0.0005495920505112053,
      "loss": 3.1623,
      "step": 43134
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3982501029968262,
      "learning_rate": 0.0005495897809812471,
      "loss": 3.2962,
      "step": 43135
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6916160583496094,
      "learning_rate": 0.0005495875114048854,
      "loss": 3.0515,
      "step": 43136
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.675299644470215,
      "learning_rate": 0.0005495852417821208,
      "loss": 2.8092,
      "step": 43137
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8656560182571411,
      "learning_rate": 0.0005495829721129536,
      "loss": 3.1708,
      "step": 43138
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4415128231048584,
      "learning_rate": 0.000549580702397384,
      "loss": 3.1113,
      "step": 43139
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.777584433555603,
      "learning_rate": 0.0005495784326354128,
      "loss": 3.0206,
      "step": 43140
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7730765342712402,
      "learning_rate": 0.0005495761628270403,
      "loss": 3.3249,
      "step": 43141
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4035310745239258,
      "learning_rate": 0.0005495738929722668,
      "loss": 3.1048,
      "step": 43142
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2860500812530518,
      "learning_rate": 0.0005495716230710929,
      "loss": 3.1234,
      "step": 43143
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.755078911781311,
      "learning_rate": 0.0005495693531235189,
      "loss": 3.4452,
      "step": 43144
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.2481303215026855,
      "learning_rate": 0.0005495670831295452,
      "loss": 2.7376,
      "step": 43145
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3614436388015747,
      "learning_rate": 0.0005495648130891722,
      "loss": 3.0209,
      "step": 43146
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7667157649993896,
      "learning_rate": 0.0005495625430024005,
      "loss": 3.0416,
      "step": 43147
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5470441579818726,
      "learning_rate": 0.0005495602728692303,
      "loss": 2.912,
      "step": 43148
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5917608737945557,
      "learning_rate": 0.0005495580026896623,
      "loss": 2.9457,
      "step": 43149
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7789833545684814,
      "learning_rate": 0.0005495557324636966,
      "loss": 2.9131,
      "step": 43150
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.66196346282959,
      "learning_rate": 0.0005495534621913339,
      "loss": 3.1608,
      "step": 43151
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3737884759902954,
      "learning_rate": 0.0005495511918725745,
      "loss": 3.107,
      "step": 43152
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5484462976455688,
      "learning_rate": 0.0005495489215074187,
      "loss": 2.9206,
      "step": 43153
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.109936237335205,
      "learning_rate": 0.0005495466510958671,
      "loss": 3.1104,
      "step": 43154
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7783215045928955,
      "learning_rate": 0.0005495443806379202,
      "loss": 3.0296,
      "step": 43155
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0008816719055176,
      "learning_rate": 0.000549542110133578,
      "loss": 2.9551,
      "step": 43156
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6178252696990967,
      "learning_rate": 0.0005495398395828414,
      "loss": 3.3798,
      "step": 43157
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4940741062164307,
      "learning_rate": 0.0005495375689857107,
      "loss": 2.8224,
      "step": 43158
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9402469396591187,
      "learning_rate": 0.0005495352983421862,
      "loss": 3.0163,
      "step": 43159
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4822638034820557,
      "learning_rate": 0.0005495330276522683,
      "loss": 3.0309,
      "step": 43160
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5643256902694702,
      "learning_rate": 0.0005495307569159575,
      "loss": 3.1361,
      "step": 43161
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5634124279022217,
      "learning_rate": 0.0005495284861332543,
      "loss": 3.0296,
      "step": 43162
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6554696559906006,
      "learning_rate": 0.000549526215304159,
      "loss": 3.0168,
      "step": 43163
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6841797828674316,
      "learning_rate": 0.0005495239444286721,
      "loss": 3.1047,
      "step": 43164
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6263394355773926,
      "learning_rate": 0.000549521673506794,
      "loss": 3.2113,
      "step": 43165
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.2172131538391113,
      "learning_rate": 0.0005495194025385251,
      "loss": 3.0161,
      "step": 43166
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.307029962539673,
      "learning_rate": 0.0005495171315238658,
      "loss": 3.0194,
      "step": 43167
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4415653944015503,
      "learning_rate": 0.0005495148604628165,
      "loss": 3.1267,
      "step": 43168
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8408669233322144,
      "learning_rate": 0.0005495125893553778,
      "loss": 3.0168,
      "step": 43169
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5637694597244263,
      "learning_rate": 0.00054951031820155,
      "loss": 3.0392,
      "step": 43170
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5701018571853638,
      "learning_rate": 0.0005495080470013334,
      "loss": 3.1755,
      "step": 43171
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7071813344955444,
      "learning_rate": 0.0005495057757547286,
      "loss": 2.9541,
      "step": 43172
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8752050399780273,
      "learning_rate": 0.0005495035044617361,
      "loss": 2.8213,
      "step": 43173
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7554571628570557,
      "learning_rate": 0.000549501233122356,
      "loss": 2.7703,
      "step": 43174
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9023936986923218,
      "learning_rate": 0.000549498961736589,
      "loss": 3.1451,
      "step": 43175
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.388258934020996,
      "learning_rate": 0.0005494966903044354,
      "loss": 2.8815,
      "step": 43176
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.926194667816162,
      "learning_rate": 0.0005494944188258956,
      "loss": 3.0236,
      "step": 43177
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4921009540557861,
      "learning_rate": 0.0005494921473009702,
      "loss": 3.0614,
      "step": 43178
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.599705696105957,
      "learning_rate": 0.0005494898757296595,
      "loss": 3.0731,
      "step": 43179
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.955120086669922,
      "learning_rate": 0.0005494876041119638,
      "loss": 2.9826,
      "step": 43180
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8461297750473022,
      "learning_rate": 0.0005494853324478837,
      "loss": 3.0736,
      "step": 43181
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5054094791412354,
      "learning_rate": 0.0005494830607374196,
      "loss": 2.8001,
      "step": 43182
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.5409176349639893,
      "learning_rate": 0.0005494807889805718,
      "loss": 3.0353,
      "step": 43183
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.4407496452331543,
      "learning_rate": 0.000549478517177341,
      "loss": 3.0044,
      "step": 43184
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.808028221130371,
      "learning_rate": 0.0005494762453277273,
      "loss": 2.9919,
      "step": 43185
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5589686632156372,
      "learning_rate": 0.0005494739734317312,
      "loss": 2.8806,
      "step": 43186
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.720786690711975,
      "learning_rate": 0.0005494717014893533,
      "loss": 2.8113,
      "step": 43187
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.072067975997925,
      "learning_rate": 0.0005494694295005939,
      "loss": 3.0583,
      "step": 43188
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3964457511901855,
      "learning_rate": 0.0005494671574654533,
      "loss": 3.1478,
      "step": 43189
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.3123812675476074,
      "learning_rate": 0.0005494648853839321,
      "loss": 3.0868,
      "step": 43190
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4638367891311646,
      "learning_rate": 0.0005494626132560307,
      "loss": 3.2369,
      "step": 43191
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.529109001159668,
      "learning_rate": 0.0005494603410817495,
      "loss": 2.7307,
      "step": 43192
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9709396362304688,
      "learning_rate": 0.000549458068861089,
      "loss": 2.8932,
      "step": 43193
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.586581230163574,
      "learning_rate": 0.0005494557965940494,
      "loss": 2.8215,
      "step": 43194
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0923078060150146,
      "learning_rate": 0.0005494535242806314,
      "loss": 3.0451,
      "step": 43195
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3456146717071533,
      "learning_rate": 0.0005494512519208351,
      "loss": 3.0248,
      "step": 43196
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7873350381851196,
      "learning_rate": 0.0005494489795146611,
      "loss": 3.0522,
      "step": 43197
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5066434144973755,
      "learning_rate": 0.0005494467070621099,
      "loss": 3.0553,
      "step": 43198
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2568262815475464,
      "learning_rate": 0.0005494444345631819,
      "loss": 3.1386,
      "step": 43199
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5052345991134644,
      "learning_rate": 0.0005494421620178774,
      "loss": 2.9203,
      "step": 43200
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3099825382232666,
      "learning_rate": 0.0005494398894261968,
      "loss": 3.0461,
      "step": 43201
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6831951141357422,
      "learning_rate": 0.0005494376167881409,
      "loss": 3.1793,
      "step": 43202
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6112666130065918,
      "learning_rate": 0.0005494353441037096,
      "loss": 3.0263,
      "step": 43203
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5816890001296997,
      "learning_rate": 0.0005494330713729037,
      "loss": 3.0719,
      "step": 43204
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5984927415847778,
      "learning_rate": 0.0005494307985957235,
      "loss": 3.1315,
      "step": 43205
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.6734941005706787,
      "learning_rate": 0.0005494285257721692,
      "loss": 2.9665,
      "step": 43206
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4053834676742554,
      "learning_rate": 0.0005494262529022416,
      "loss": 2.8341,
      "step": 43207
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.509980320930481,
      "learning_rate": 0.0005494239799859409,
      "loss": 3.0607,
      "step": 43208
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.8680760860443115,
      "learning_rate": 0.0005494217070232674,
      "loss": 3.0186,
      "step": 43209
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7059524059295654,
      "learning_rate": 0.0005494194340142219,
      "loss": 2.9839,
      "step": 43210
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9928499460220337,
      "learning_rate": 0.0005494171609588047,
      "loss": 3.0514,
      "step": 43211
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5256972312927246,
      "learning_rate": 0.000549414887857016,
      "loss": 3.2057,
      "step": 43212
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3513025045394897,
      "learning_rate": 0.0005494126147088564,
      "loss": 3.083,
      "step": 43213
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2810159921646118,
      "learning_rate": 0.0005494103415143263,
      "loss": 3.1614,
      "step": 43214
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6685093641281128,
      "learning_rate": 0.0005494080682734261,
      "loss": 3.054,
      "step": 43215
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8899056911468506,
      "learning_rate": 0.0005494057949861562,
      "loss": 2.8902,
      "step": 43216
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7757787704467773,
      "learning_rate": 0.0005494035216525171,
      "loss": 3.0487,
      "step": 43217
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3618955612182617,
      "learning_rate": 0.0005494012482725091,
      "loss": 2.8915,
      "step": 43218
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4241081476211548,
      "learning_rate": 0.0005493989748461328,
      "loss": 3.0041,
      "step": 43219
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3796130418777466,
      "learning_rate": 0.0005493967013733885,
      "loss": 2.8491,
      "step": 43220
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5909992456436157,
      "learning_rate": 0.0005493944278542767,
      "loss": 3.0007,
      "step": 43221
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5155854225158691,
      "learning_rate": 0.0005493921542887977,
      "loss": 3.1473,
      "step": 43222
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.727056622505188,
      "learning_rate": 0.0005493898806769519,
      "loss": 3.1796,
      "step": 43223
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.797825813293457,
      "learning_rate": 0.0005493876070187399,
      "loss": 3.0757,
      "step": 43224
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.67493736743927,
      "learning_rate": 0.0005493853333141621,
      "loss": 3.2886,
      "step": 43225
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0561015605926514,
      "learning_rate": 0.0005493830595632187,
      "loss": 2.7984,
      "step": 43226
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.408219814300537,
      "learning_rate": 0.0005493807857659104,
      "loss": 3.007,
      "step": 43227
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5438354015350342,
      "learning_rate": 0.0005493785119222376,
      "loss": 2.9297,
      "step": 43228
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4065431356430054,
      "learning_rate": 0.0005493762380322004,
      "loss": 3.0856,
      "step": 43229
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7241729497909546,
      "learning_rate": 0.0005493739640957996,
      "loss": 3.0576,
      "step": 43230
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.486486792564392,
      "learning_rate": 0.0005493716901130354,
      "loss": 2.8726,
      "step": 43231
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7879170179367065,
      "learning_rate": 0.0005493694160839083,
      "loss": 3.0409,
      "step": 43232
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3692421913146973,
      "learning_rate": 0.0005493671420084187,
      "loss": 2.8061,
      "step": 43233
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7620655298233032,
      "learning_rate": 0.0005493648678865672,
      "loss": 2.9955,
      "step": 43234
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9832484722137451,
      "learning_rate": 0.0005493625937183538,
      "loss": 3.1106,
      "step": 43235
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5824812650680542,
      "learning_rate": 0.0005493603195037795,
      "loss": 3.1365,
      "step": 43236
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4965590238571167,
      "learning_rate": 0.0005493580452428442,
      "loss": 3.1939,
      "step": 43237
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5009256601333618,
      "learning_rate": 0.0005493557709355487,
      "loss": 3.2505,
      "step": 43238
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.113542318344116,
      "learning_rate": 0.000549353496581893,
      "loss": 2.8414,
      "step": 43239
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.620208978652954,
      "learning_rate": 0.0005493512221818779,
      "loss": 3.1438,
      "step": 43240
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.799452304840088,
      "learning_rate": 0.0005493489477355037,
      "loss": 3.3946,
      "step": 43241
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.5027565956115723,
      "learning_rate": 0.0005493466732427709,
      "loss": 3.3102,
      "step": 43242
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0179336071014404,
      "learning_rate": 0.0005493443987036798,
      "loss": 2.9963,
      "step": 43243
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5972343683242798,
      "learning_rate": 0.0005493421241182309,
      "loss": 3.1201,
      "step": 43244
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5943598747253418,
      "learning_rate": 0.0005493398494864245,
      "loss": 3.1176,
      "step": 43245
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2296541929244995,
      "learning_rate": 0.0005493375748082612,
      "loss": 3.2413,
      "step": 43246
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6856328248977661,
      "learning_rate": 0.0005493353000837412,
      "loss": 3.2695,
      "step": 43247
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.1833832263946533,
      "learning_rate": 0.0005493330253128651,
      "loss": 3.053,
      "step": 43248
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6515134572982788,
      "learning_rate": 0.0005493307504956334,
      "loss": 3.0524,
      "step": 43249
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4745745658874512,
      "learning_rate": 0.0005493284756320464,
      "loss": 3.1475,
      "step": 43250
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3618719577789307,
      "learning_rate": 0.0005493262007221045,
      "loss": 3.3241,
      "step": 43251
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9181445837020874,
      "learning_rate": 0.0005493239257658081,
      "loss": 2.9561,
      "step": 43252
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.227673292160034,
      "learning_rate": 0.0005493216507631577,
      "loss": 3.3369,
      "step": 43253
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.4447436332702637,
      "learning_rate": 0.0005493193757141537,
      "loss": 3.0094,
      "step": 43254
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7701408863067627,
      "learning_rate": 0.0005493171006187965,
      "loss": 3.2006,
      "step": 43255
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.366889476776123,
      "learning_rate": 0.0005493148254770866,
      "loss": 3.0932,
      "step": 43256
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2848703861236572,
      "learning_rate": 0.0005493125502890244,
      "loss": 3.2745,
      "step": 43257
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.496701717376709,
      "learning_rate": 0.0005493102750546102,
      "loss": 2.9508,
      "step": 43258
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.3359618186950684,
      "learning_rate": 0.0005493079997738445,
      "loss": 3.0186,
      "step": 43259
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3588204383850098,
      "learning_rate": 0.0005493057244467278,
      "loss": 2.9204,
      "step": 43260
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.216722011566162,
      "learning_rate": 0.0005493034490732605,
      "loss": 3.1579,
      "step": 43261
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.567461609840393,
      "learning_rate": 0.000549301173653443,
      "loss": 3.1767,
      "step": 43262
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6502351760864258,
      "learning_rate": 0.0005492988981872756,
      "loss": 3.3714,
      "step": 43263
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6023502349853516,
      "learning_rate": 0.0005492966226747588,
      "loss": 3.0757,
      "step": 43264
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4038609266281128,
      "learning_rate": 0.0005492943471158931,
      "loss": 3.1752,
      "step": 43265
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2693220376968384,
      "learning_rate": 0.0005492920715106788,
      "loss": 2.9543,
      "step": 43266
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.535815715789795,
      "learning_rate": 0.0005492897958591166,
      "loss": 3.2992,
      "step": 43267
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.9162309169769287,
      "learning_rate": 0.0005492875201612066,
      "loss": 3.2224,
      "step": 43268
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.382564067840576,
      "learning_rate": 0.0005492852444169493,
      "loss": 2.8348,
      "step": 43269
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6890480518341064,
      "learning_rate": 0.0005492829686263453,
      "loss": 3.1647,
      "step": 43270
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.0682735443115234,
      "learning_rate": 0.0005492806927893948,
      "loss": 3.1796,
      "step": 43271
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5160188674926758,
      "learning_rate": 0.0005492784169060983,
      "loss": 3.2222,
      "step": 43272
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.949377179145813,
      "learning_rate": 0.0005492761409764563,
      "loss": 2.7946,
      "step": 43273
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6691781282424927,
      "learning_rate": 0.0005492738650004691,
      "loss": 3.1461,
      "step": 43274
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.646350383758545,
      "learning_rate": 0.0005492715889781373,
      "loss": 3.2165,
      "step": 43275
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.462570309638977,
      "learning_rate": 0.000549269312909461,
      "loss": 3.0367,
      "step": 43276
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8094966411590576,
      "learning_rate": 0.000549267036794441,
      "loss": 2.8413,
      "step": 43277
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.7373931407928467,
      "learning_rate": 0.0005492647606330774,
      "loss": 3.1401,
      "step": 43278
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8540055751800537,
      "learning_rate": 0.0005492624844253709,
      "loss": 2.9136,
      "step": 43279
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0644445419311523,
      "learning_rate": 0.0005492602081713217,
      "loss": 3.1061,
      "step": 43280
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5797693729400635,
      "learning_rate": 0.0005492579318709305,
      "loss": 3.0134,
      "step": 43281
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6424307823181152,
      "learning_rate": 0.0005492556555241973,
      "loss": 3.1318,
      "step": 43282
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.205743432044983,
      "learning_rate": 0.0005492533791311231,
      "loss": 3.0696,
      "step": 43283
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6755648851394653,
      "learning_rate": 0.0005492511026917077,
      "loss": 2.9664,
      "step": 43284
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3483772277832031,
      "learning_rate": 0.000549248826205952,
      "loss": 3.188,
      "step": 43285
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6848760843276978,
      "learning_rate": 0.0005492465496738561,
      "loss": 3.1517,
      "step": 43286
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3907034397125244,
      "learning_rate": 0.0005492442730954206,
      "loss": 2.9566,
      "step": 43287
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.3358075618743896,
      "learning_rate": 0.0005492419964706459,
      "loss": 3.2015,
      "step": 43288
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.218980312347412,
      "learning_rate": 0.0005492397197995323,
      "loss": 2.8153,
      "step": 43289
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5312951803207397,
      "learning_rate": 0.0005492374430820805,
      "loss": 3.152,
      "step": 43290
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.713025450706482,
      "learning_rate": 0.0005492351663182908,
      "loss": 3.2417,
      "step": 43291
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.155003070831299,
      "learning_rate": 0.0005492328895081634,
      "loss": 2.9077,
      "step": 43292
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8092918395996094,
      "learning_rate": 0.000549230612651699,
      "loss": 2.9919,
      "step": 43293
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3893721103668213,
      "learning_rate": 0.0005492283357488979,
      "loss": 2.9296,
      "step": 43294
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.2074267864227295,
      "learning_rate": 0.0005492260587997605,
      "loss": 3.1703,
      "step": 43295
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.471019983291626,
      "learning_rate": 0.0005492237818042873,
      "loss": 3.301,
      "step": 43296
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.983890175819397,
      "learning_rate": 0.0005492215047624787,
      "loss": 3.0877,
      "step": 43297
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3892006874084473,
      "learning_rate": 0.0005492192276743351,
      "loss": 3.321,
      "step": 43298
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.1467740535736084,
      "learning_rate": 0.000549216950539857,
      "loss": 3.0064,
      "step": 43299
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.1114814281463623,
      "learning_rate": 0.0005492146733590446,
      "loss": 2.9496,
      "step": 43300
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9741445779800415,
      "learning_rate": 0.0005492123961318988,
      "loss": 2.9204,
      "step": 43301
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4241746664047241,
      "learning_rate": 0.0005492101188584195,
      "loss": 3.3245,
      "step": 43302
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5794000625610352,
      "learning_rate": 0.0005492078415386074,
      "loss": 2.826,
      "step": 43303
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.347607135772705,
      "learning_rate": 0.0005492055641724627,
      "loss": 3.0039,
      "step": 43304
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8923101425170898,
      "learning_rate": 0.0005492032867599862,
      "loss": 2.956,
      "step": 43305
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.030123710632324,
      "learning_rate": 0.0005492010093011779,
      "loss": 3.3271,
      "step": 43306
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.983701229095459,
      "learning_rate": 0.0005491987317960387,
      "loss": 3.0749,
      "step": 43307
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7462153434753418,
      "learning_rate": 0.0005491964542445685,
      "loss": 2.9122,
      "step": 43308
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7139371633529663,
      "learning_rate": 0.0005491941766467681,
      "loss": 3.1321,
      "step": 43309
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3311134576797485,
      "learning_rate": 0.0005491918990026377,
      "loss": 3.0372,
      "step": 43310
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.1314098834991455,
      "learning_rate": 0.0005491896213121779,
      "loss": 2.9303,
      "step": 43311
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6622542142868042,
      "learning_rate": 0.000549187343575389,
      "loss": 3.053,
      "step": 43312
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.779821515083313,
      "learning_rate": 0.0005491850657922715,
      "loss": 2.9124,
      "step": 43313
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5502965450286865,
      "learning_rate": 0.0005491827879628258,
      "loss": 3.2565,
      "step": 43314
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4655201435089111,
      "learning_rate": 0.0005491805100870523,
      "loss": 3.031,
      "step": 43315
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5051460266113281,
      "learning_rate": 0.0005491782321649514,
      "loss": 2.9882,
      "step": 43316
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.364593267440796,
      "learning_rate": 0.0005491759541965237,
      "loss": 3.0898,
      "step": 43317
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.440086841583252,
      "learning_rate": 0.0005491736761817695,
      "loss": 3.0027,
      "step": 43318
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5611590147018433,
      "learning_rate": 0.000549171398120689,
      "loss": 3.0712,
      "step": 43319
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3932563066482544,
      "learning_rate": 0.0005491691200132829,
      "loss": 2.8471,
      "step": 43320
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.386152744293213,
      "learning_rate": 0.0005491668418595517,
      "loss": 3.1452,
      "step": 43321
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8815711736679077,
      "learning_rate": 0.0005491645636594955,
      "loss": 3.0572,
      "step": 43322
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5365955829620361,
      "learning_rate": 0.000549162285413115,
      "loss": 3.1883,
      "step": 43323
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.33086359500885,
      "learning_rate": 0.0005491600071204105,
      "loss": 3.4153,
      "step": 43324
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2690620422363281,
      "learning_rate": 0.0005491577287813825,
      "loss": 3.1491,
      "step": 43325
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.97622549533844,
      "learning_rate": 0.0005491554503960313,
      "loss": 3.1503,
      "step": 43326
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3887357711791992,
      "learning_rate": 0.0005491531719643575,
      "loss": 3.1262,
      "step": 43327
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5957075357437134,
      "learning_rate": 0.0005491508934863613,
      "loss": 2.8981,
      "step": 43328
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4820808172225952,
      "learning_rate": 0.0005491486149620433,
      "loss": 3.0316,
      "step": 43329
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4028356075286865,
      "learning_rate": 0.0005491463363914039,
      "loss": 3.2207,
      "step": 43330
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.5882492065429688,
      "learning_rate": 0.0005491440577744434,
      "loss": 2.8641,
      "step": 43331
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8373050689697266,
      "learning_rate": 0.0005491417791111624,
      "loss": 2.9625,
      "step": 43332
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.738473892211914,
      "learning_rate": 0.0005491395004015613,
      "loss": 3.1142,
      "step": 43333
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.017685651779175,
      "learning_rate": 0.0005491372216456403,
      "loss": 2.8304,
      "step": 43334
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2674978971481323,
      "learning_rate": 0.0005491349428434001,
      "loss": 3.0135,
      "step": 43335
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.1196701526641846,
      "learning_rate": 0.000549132663994841,
      "loss": 3.0049,
      "step": 43336
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.219364643096924,
      "learning_rate": 0.0005491303850999634,
      "loss": 3.1779,
      "step": 43337
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0559613704681396,
      "learning_rate": 0.0005491281061587678,
      "loss": 3.1553,
      "step": 43338
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4632319211959839,
      "learning_rate": 0.0005491258271712546,
      "loss": 3.175,
      "step": 43339
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6081011295318604,
      "learning_rate": 0.0005491235481374241,
      "loss": 3.0387,
      "step": 43340
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5887792110443115,
      "learning_rate": 0.0005491212690572769,
      "loss": 2.9654,
      "step": 43341
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.24735426902771,
      "learning_rate": 0.0005491189899308135,
      "loss": 3.1363,
      "step": 43342
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2198022603988647,
      "learning_rate": 0.0005491167107580338,
      "loss": 3.1399,
      "step": 43343
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5276027917861938,
      "learning_rate": 0.0005491144315389389,
      "loss": 3.2574,
      "step": 43344
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.099113941192627,
      "learning_rate": 0.0005491121522735289,
      "loss": 2.9852,
      "step": 43345
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3570683002471924,
      "learning_rate": 0.0005491098729618042,
      "loss": 3.0114,
      "step": 43346
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.6507761478424072,
      "learning_rate": 0.0005491075936037652,
      "loss": 3.1089,
      "step": 43347
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.581833004951477,
      "learning_rate": 0.0005491053141994125,
      "loss": 3.1381,
      "step": 43348
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.784630060195923,
      "learning_rate": 0.0005491030347487464,
      "loss": 2.9096,
      "step": 43349
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6290651559829712,
      "learning_rate": 0.0005491007552517673,
      "loss": 3.1525,
      "step": 43350
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6893082857131958,
      "learning_rate": 0.0005490984757084758,
      "loss": 2.8832,
      "step": 43351
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6647560596466064,
      "learning_rate": 0.000549096196118872,
      "loss": 3.3354,
      "step": 43352
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4901913404464722,
      "learning_rate": 0.0005490939164829566,
      "loss": 3.281,
      "step": 43353
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.2033852338790894,
      "learning_rate": 0.00054909163680073,
      "loss": 2.9868,
      "step": 43354
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7080005407333374,
      "learning_rate": 0.0005490893570721925,
      "loss": 3.1945,
      "step": 43355
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0364468097686768,
      "learning_rate": 0.0005490870772973446,
      "loss": 3.1135,
      "step": 43356
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9461085796356201,
      "learning_rate": 0.0005490847974761867,
      "loss": 3.2033,
      "step": 43357
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5266469717025757,
      "learning_rate": 0.0005490825176087192,
      "loss": 3.0646,
      "step": 43358
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4846659898757935,
      "learning_rate": 0.0005490802376949426,
      "loss": 3.2242,
      "step": 43359
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6513713598251343,
      "learning_rate": 0.0005490779577348574,
      "loss": 2.959,
      "step": 43360
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5066756010055542,
      "learning_rate": 0.0005490756777284637,
      "loss": 2.8542,
      "step": 43361
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.412630319595337,
      "learning_rate": 0.0005490733976757622,
      "loss": 3.0281,
      "step": 43362
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4386838674545288,
      "learning_rate": 0.0005490711175767533,
      "loss": 3.2568,
      "step": 43363
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5233098268508911,
      "learning_rate": 0.0005490688374314374,
      "loss": 3.0785,
      "step": 43364
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6100685596466064,
      "learning_rate": 0.0005490665572398148,
      "loss": 2.8734,
      "step": 43365
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.3675155639648438,
      "learning_rate": 0.0005490642770018861,
      "loss": 2.9567,
      "step": 43366
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7959673404693604,
      "learning_rate": 0.0005490619967176515,
      "loss": 3.1063,
      "step": 43367
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5999678373336792,
      "learning_rate": 0.0005490597163871118,
      "loss": 3.0175,
      "step": 43368
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.0507493019104004,
      "learning_rate": 0.0005490574360102671,
      "loss": 3.0141,
      "step": 43369
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5529037714004517,
      "learning_rate": 0.000549055155587118,
      "loss": 2.9907,
      "step": 43370
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0514590740203857,
      "learning_rate": 0.0005490528751176647,
      "loss": 3.0071,
      "step": 43371
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8804103136062622,
      "learning_rate": 0.0005490505946019079,
      "loss": 3.0462,
      "step": 43372
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.5211193561553955,
      "learning_rate": 0.0005490483140398477,
      "loss": 2.9365,
      "step": 43373
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6930885314941406,
      "learning_rate": 0.0005490460334314849,
      "loss": 3.3645,
      "step": 43374
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.801074743270874,
      "learning_rate": 0.0005490437527768196,
      "loss": 2.923,
      "step": 43375
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.8126176595687866,
      "learning_rate": 0.0005490414720758525,
      "loss": 2.9833,
      "step": 43376
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4993358850479126,
      "learning_rate": 0.0005490391913285839,
      "loss": 2.9415,
      "step": 43377
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9304156303405762,
      "learning_rate": 0.0005490369105350142,
      "loss": 3.0653,
      "step": 43378
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5419093370437622,
      "learning_rate": 0.0005490346296951437,
      "loss": 3.1782,
      "step": 43379
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7706233263015747,
      "learning_rate": 0.0005490323488089731,
      "loss": 2.9404,
      "step": 43380
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5706077814102173,
      "learning_rate": 0.0005490300678765025,
      "loss": 3.13,
      "step": 43381
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.6102659702301025,
      "learning_rate": 0.0005490277868977327,
      "loss": 3.1566,
      "step": 43382
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.7504783868789673,
      "learning_rate": 0.0005490255058726639,
      "loss": 3.0167,
      "step": 43383
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.4842989444732666,
      "learning_rate": 0.0005490232248012965,
      "loss": 3.1645,
      "step": 43384
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.9427872896194458,
      "learning_rate": 0.0005490209436836311,
      "loss": 3.2086,
      "step": 43385
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.125840663909912,
      "learning_rate": 0.0005490186625196679,
      "loss": 2.913,
      "step": 43386
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.017025947570801,
      "learning_rate": 0.0005490163813094075,
      "loss": 3.1597,
      "step": 43387
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.778688430786133,
      "learning_rate": 0.0005490141000528502,
      "loss": 3.0105,
      "step": 43388
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5045493841171265,
      "learning_rate": 0.0005490118187499965,
      "loss": 2.9391,
      "step": 43389
    },
    {
      "epoch": 0.56,
      "grad_norm": 3.2607452869415283,
      "learning_rate": 0.0005490095374008467,
      "loss": 3.2325,
      "step": 43390
    },
    {
      "epoch": 0.56,
      "grad_norm": 1.5108277797698975,
      "learning_rate": 0.0005490072560054015,
      "loss": 3.0368,
      "step": 43391
    },
    {
      "epoch": 0.56,
      "grad_norm": 2.0163216590881348,
      "learning_rate": 0.0005490049745636611,
      "loss": 3.115,
      "step": 43392
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4837018251419067,
      "learning_rate": 0.0005490026930756259,
      "loss": 2.9959,
      "step": 43393
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3635473251342773,
      "learning_rate": 0.0005490004115412964,
      "loss": 3.0223,
      "step": 43394
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6511427164077759,
      "learning_rate": 0.0005489981299606732,
      "loss": 3.2146,
      "step": 43395
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.774459958076477,
      "learning_rate": 0.0005489958483337563,
      "loss": 3.1031,
      "step": 43396
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.7650668621063232,
      "learning_rate": 0.0005489935666605465,
      "loss": 3.0709,
      "step": 43397
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.452401041984558,
      "learning_rate": 0.000548991284941044,
      "loss": 2.8633,
      "step": 43398
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.5180158615112305,
      "learning_rate": 0.0005489890031752494,
      "loss": 3.1099,
      "step": 43399
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.627486228942871,
      "learning_rate": 0.0005489867213631631,
      "loss": 3.1583,
      "step": 43400
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.5676145553588867,
      "learning_rate": 0.0005489844395047854,
      "loss": 3.0849,
      "step": 43401
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5574567317962646,
      "learning_rate": 0.0005489821576001168,
      "loss": 3.0307,
      "step": 43402
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2958301305770874,
      "learning_rate": 0.0005489798756491578,
      "loss": 3.0648,
      "step": 43403
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.681181788444519,
      "learning_rate": 0.0005489775936519086,
      "loss": 3.2062,
      "step": 43404
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8143603801727295,
      "learning_rate": 0.0005489753116083698,
      "loss": 2.9808,
      "step": 43405
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5448466539382935,
      "learning_rate": 0.0005489730295185419,
      "loss": 2.9382,
      "step": 43406
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6459789276123047,
      "learning_rate": 0.000548970747382425,
      "loss": 3.1368,
      "step": 43407
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4870493412017822,
      "learning_rate": 0.00054896846520002,
      "loss": 3.1198,
      "step": 43408
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2306385040283203,
      "learning_rate": 0.0005489661829713269,
      "loss": 3.0424,
      "step": 43409
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5010398626327515,
      "learning_rate": 0.0005489639006963463,
      "loss": 2.9789,
      "step": 43410
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.1812968254089355,
      "learning_rate": 0.0005489616183750786,
      "loss": 3.0811,
      "step": 43411
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6603069305419922,
      "learning_rate": 0.0005489593360075243,
      "loss": 3.2063,
      "step": 43412
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5375109910964966,
      "learning_rate": 0.0005489570535936837,
      "loss": 2.9861,
      "step": 43413
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.365389347076416,
      "learning_rate": 0.0005489547711335573,
      "loss": 3.1848,
      "step": 43414
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6288871765136719,
      "learning_rate": 0.0005489524886271456,
      "loss": 2.826,
      "step": 43415
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.091123104095459,
      "learning_rate": 0.0005489502060744489,
      "loss": 3.0465,
      "step": 43416
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.386978268623352,
      "learning_rate": 0.0005489479234754676,
      "loss": 3.3805,
      "step": 43417
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0777196884155273,
      "learning_rate": 0.0005489456408302021,
      "loss": 3.1814,
      "step": 43418
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2394843101501465,
      "learning_rate": 0.000548943358138653,
      "loss": 2.9097,
      "step": 43419
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9385335445404053,
      "learning_rate": 0.0005489410754008206,
      "loss": 3.1805,
      "step": 43420
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.59627103805542,
      "learning_rate": 0.0005489387926167055,
      "loss": 2.9646,
      "step": 43421
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7820491790771484,
      "learning_rate": 0.0005489365097863078,
      "loss": 2.995,
      "step": 43422
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.2196662425994873,
      "learning_rate": 0.0005489342269096282,
      "loss": 3.1671,
      "step": 43423
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6740206480026245,
      "learning_rate": 0.000548931943986667,
      "loss": 3.1181,
      "step": 43424
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.039729118347168,
      "learning_rate": 0.0005489296610174247,
      "loss": 3.1941,
      "step": 43425
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.7416610717773438,
      "learning_rate": 0.0005489273780019015,
      "loss": 3.1194,
      "step": 43426
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2714788913726807,
      "learning_rate": 0.0005489250949400982,
      "loss": 3.1561,
      "step": 43427
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2360799312591553,
      "learning_rate": 0.0005489228118320149,
      "loss": 3.062,
      "step": 43428
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.3242006301879883,
      "learning_rate": 0.0005489205286776523,
      "loss": 2.9542,
      "step": 43429
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.9544341564178467,
      "learning_rate": 0.0005489182454770106,
      "loss": 3.1377,
      "step": 43430
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.29262113571167,
      "learning_rate": 0.0005489159622300902,
      "loss": 3.0936,
      "step": 43431
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.1820569038391113,
      "learning_rate": 0.0005489136789368917,
      "loss": 3.1113,
      "step": 43432
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8832018375396729,
      "learning_rate": 0.0005489113955974154,
      "loss": 3.0378,
      "step": 43433
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5217920541763306,
      "learning_rate": 0.0005489091122116618,
      "loss": 3.2181,
      "step": 43434
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4492298364639282,
      "learning_rate": 0.0005489068287796314,
      "loss": 3.1085,
      "step": 43435
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8147794008255005,
      "learning_rate": 0.0005489045453013244,
      "loss": 3.0351,
      "step": 43436
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.454108476638794,
      "learning_rate": 0.0005489022617767414,
      "loss": 2.7618,
      "step": 43437
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5797139406204224,
      "learning_rate": 0.0005488999782058828,
      "loss": 3.2256,
      "step": 43438
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4280741214752197,
      "learning_rate": 0.0005488976945887489,
      "loss": 2.9789,
      "step": 43439
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3567700386047363,
      "learning_rate": 0.0005488954109253404,
      "loss": 3.1056,
      "step": 43440
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4598888158798218,
      "learning_rate": 0.0005488931272156573,
      "loss": 3.1051,
      "step": 43441
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.476759433746338,
      "learning_rate": 0.0005488908434597004,
      "loss": 3.2218,
      "step": 43442
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3693805932998657,
      "learning_rate": 0.00054888855965747,
      "loss": 3.1004,
      "step": 43443
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4422551393508911,
      "learning_rate": 0.0005488862758089665,
      "loss": 2.9305,
      "step": 43444
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.365051031112671,
      "learning_rate": 0.0005488839919141904,
      "loss": 3.3829,
      "step": 43445
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5231647491455078,
      "learning_rate": 0.000548881707973142,
      "loss": 3.027,
      "step": 43446
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4267786741256714,
      "learning_rate": 0.0005488794239858219,
      "loss": 2.6567,
      "step": 43447
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7456881999969482,
      "learning_rate": 0.0005488771399522303,
      "loss": 3.0568,
      "step": 43448
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8566420078277588,
      "learning_rate": 0.0005488748558723677,
      "loss": 3.1844,
      "step": 43449
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5936262607574463,
      "learning_rate": 0.0005488725717462347,
      "loss": 2.873,
      "step": 43450
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9092984199523926,
      "learning_rate": 0.0005488702875738314,
      "loss": 2.9465,
      "step": 43451
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6990474462509155,
      "learning_rate": 0.0005488680033551586,
      "loss": 2.9184,
      "step": 43452
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5813182592391968,
      "learning_rate": 0.0005488657190902165,
      "loss": 2.8238,
      "step": 43453
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9085122346878052,
      "learning_rate": 0.0005488634347790056,
      "loss": 3.1068,
      "step": 43454
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8982950448989868,
      "learning_rate": 0.0005488611504215261,
      "loss": 3.1087,
      "step": 43455
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9147440195083618,
      "learning_rate": 0.0005488588660177788,
      "loss": 2.8488,
      "step": 43456
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7664803266525269,
      "learning_rate": 0.0005488565815677639,
      "loss": 3.0127,
      "step": 43457
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8709226846694946,
      "learning_rate": 0.0005488542970714819,
      "loss": 2.8912,
      "step": 43458
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4092257022857666,
      "learning_rate": 0.0005488520125289331,
      "loss": 3.0117,
      "step": 43459
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0923619270324707,
      "learning_rate": 0.0005488497279401181,
      "loss": 2.9237,
      "step": 43460
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.036564350128174,
      "learning_rate": 0.0005488474433050371,
      "loss": 2.9081,
      "step": 43461
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4332314729690552,
      "learning_rate": 0.0005488451586236909,
      "loss": 2.7549,
      "step": 43462
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4808272123336792,
      "learning_rate": 0.0005488428738960794,
      "loss": 2.9177,
      "step": 43463
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9295374155044556,
      "learning_rate": 0.0005488405891222036,
      "loss": 3.0823,
      "step": 43464
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4487422704696655,
      "learning_rate": 0.0005488383043020634,
      "loss": 2.9272,
      "step": 43465
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.428073525428772,
      "learning_rate": 0.0005488360194356596,
      "loss": 2.9743,
      "step": 43466
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5524041652679443,
      "learning_rate": 0.0005488337345229923,
      "loss": 3.1033,
      "step": 43467
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.361375093460083,
      "learning_rate": 0.0005488314495640624,
      "loss": 2.8004,
      "step": 43468
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.300105094909668,
      "learning_rate": 0.0005488291645588698,
      "loss": 2.9017,
      "step": 43469
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3932074308395386,
      "learning_rate": 0.0005488268795074153,
      "loss": 3.1698,
      "step": 43470
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0578787326812744,
      "learning_rate": 0.000548824594409699,
      "loss": 2.8415,
      "step": 43471
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5981519222259521,
      "learning_rate": 0.0005488223092657218,
      "loss": 3.1117,
      "step": 43472
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7109527587890625,
      "learning_rate": 0.0005488200240754837,
      "loss": 3.0663,
      "step": 43473
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6761342287063599,
      "learning_rate": 0.0005488177388389852,
      "loss": 3.2211,
      "step": 43474
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.606142282485962,
      "learning_rate": 0.0005488154535562268,
      "loss": 2.9293,
      "step": 43475
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5619558095932007,
      "learning_rate": 0.000548813168227209,
      "loss": 3.1838,
      "step": 43476
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.642464280128479,
      "learning_rate": 0.000548810882851932,
      "loss": 3.2002,
      "step": 43477
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5298700332641602,
      "learning_rate": 0.0005488085974303965,
      "loss": 3.1026,
      "step": 43478
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5954242944717407,
      "learning_rate": 0.0005488063119626028,
      "loss": 3.0513,
      "step": 43479
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7917239665985107,
      "learning_rate": 0.0005488040264485511,
      "loss": 2.9172,
      "step": 43480
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3591002225875854,
      "learning_rate": 0.0005488017408882423,
      "loss": 3.0837,
      "step": 43481
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.4560859203338623,
      "learning_rate": 0.0005487994552816764,
      "loss": 2.8365,
      "step": 43482
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5233105421066284,
      "learning_rate": 0.0005487971696288539,
      "loss": 2.9541,
      "step": 43483
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6388828754425049,
      "learning_rate": 0.0005487948839297754,
      "loss": 3.2382,
      "step": 43484
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2649803161621094,
      "learning_rate": 0.0005487925981844413,
      "loss": 2.8422,
      "step": 43485
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9502207040786743,
      "learning_rate": 0.0005487903123928519,
      "loss": 3.1112,
      "step": 43486
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6566144227981567,
      "learning_rate": 0.0005487880265550077,
      "loss": 2.9561,
      "step": 43487
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.5733132362365723,
      "learning_rate": 0.0005487857406709091,
      "loss": 2.8936,
      "step": 43488
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.551215410232544,
      "learning_rate": 0.0005487834547405566,
      "loss": 3.1536,
      "step": 43489
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0861122608184814,
      "learning_rate": 0.0005487811687639505,
      "loss": 2.918,
      "step": 43490
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.583583354949951,
      "learning_rate": 0.0005487788827410913,
      "loss": 2.9126,
      "step": 43491
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9471216201782227,
      "learning_rate": 0.0005487765966719794,
      "loss": 2.9723,
      "step": 43492
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.522802710533142,
      "learning_rate": 0.0005487743105566153,
      "loss": 2.9904,
      "step": 43493
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5533028841018677,
      "learning_rate": 0.0005487720243949991,
      "loss": 3.0634,
      "step": 43494
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6374226808547974,
      "learning_rate": 0.0005487697381871318,
      "loss": 3.1026,
      "step": 43495
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.290364980697632,
      "learning_rate": 0.0005487674519330134,
      "loss": 2.9205,
      "step": 43496
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3685377836227417,
      "learning_rate": 0.0005487651656326444,
      "loss": 3.2159,
      "step": 43497
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5173225402832031,
      "learning_rate": 0.0005487628792860253,
      "loss": 2.9676,
      "step": 43498
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.468899130821228,
      "learning_rate": 0.0005487605928931565,
      "loss": 3.1968,
      "step": 43499
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4868230819702148,
      "learning_rate": 0.0005487583064540384,
      "loss": 2.8261,
      "step": 43500
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4451308250427246,
      "learning_rate": 0.0005487560199686713,
      "loss": 3.3734,
      "step": 43501
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6838796138763428,
      "learning_rate": 0.000548753733437056,
      "loss": 3.0911,
      "step": 43502
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.353334903717041,
      "learning_rate": 0.0005487514468591925,
      "loss": 3.1568,
      "step": 43503
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3922724723815918,
      "learning_rate": 0.0005487491602350815,
      "loss": 3.0701,
      "step": 43504
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4321380853652954,
      "learning_rate": 0.0005487468735647233,
      "loss": 2.9608,
      "step": 43505
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.644121527671814,
      "learning_rate": 0.0005487445868481184,
      "loss": 3.1386,
      "step": 43506
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4038434028625488,
      "learning_rate": 0.000548742300085267,
      "loss": 2.8401,
      "step": 43507
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8717219829559326,
      "learning_rate": 0.0005487400132761699,
      "loss": 3.041,
      "step": 43508
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4832390546798706,
      "learning_rate": 0.0005487377264208273,
      "loss": 3.0815,
      "step": 43509
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5510313510894775,
      "learning_rate": 0.0005487354395192398,
      "loss": 3.2981,
      "step": 43510
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8602668046951294,
      "learning_rate": 0.0005487331525714075,
      "loss": 3.0037,
      "step": 43511
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.665163278579712,
      "learning_rate": 0.0005487308655773309,
      "loss": 3.1812,
      "step": 43512
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6238752603530884,
      "learning_rate": 0.0005487285785370108,
      "loss": 3.1418,
      "step": 43513
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2086546421051025,
      "learning_rate": 0.0005487262914504472,
      "loss": 3.0409,
      "step": 43514
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.089108467102051,
      "learning_rate": 0.0005487240043176408,
      "loss": 3.0572,
      "step": 43515
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.123406171798706,
      "learning_rate": 0.0005487217171385918,
      "loss": 3.0989,
      "step": 43516
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.527489185333252,
      "learning_rate": 0.0005487194299133008,
      "loss": 2.9338,
      "step": 43517
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6832643747329712,
      "learning_rate": 0.0005487171426417683,
      "loss": 2.9804,
      "step": 43518
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.4645185470581055,
      "learning_rate": 0.0005487148553239943,
      "loss": 2.9653,
      "step": 43519
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6749299764633179,
      "learning_rate": 0.0005487125679599796,
      "loss": 3.0131,
      "step": 43520
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0495176315307617,
      "learning_rate": 0.0005487102805497247,
      "loss": 3.2493,
      "step": 43521
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.921643853187561,
      "learning_rate": 0.0005487079930932298,
      "loss": 3.0758,
      "step": 43522
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.396254539489746,
      "learning_rate": 0.0005487057055904952,
      "loss": 3.0152,
      "step": 43523
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7720117568969727,
      "learning_rate": 0.0005487034180415217,
      "loss": 3.2232,
      "step": 43524
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.8704237937927246,
      "learning_rate": 0.0005487011304463096,
      "loss": 3.0785,
      "step": 43525
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5879069566726685,
      "learning_rate": 0.0005486988428048591,
      "loss": 2.8977,
      "step": 43526
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4951062202453613,
      "learning_rate": 0.0005486965551171709,
      "loss": 2.9224,
      "step": 43527
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6023980379104614,
      "learning_rate": 0.0005486942673832453,
      "loss": 3.1523,
      "step": 43528
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.291590690612793,
      "learning_rate": 0.0005486919796030827,
      "loss": 3.1516,
      "step": 43529
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8272134065628052,
      "learning_rate": 0.0005486896917766836,
      "loss": 3.4928,
      "step": 43530
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5168094635009766,
      "learning_rate": 0.0005486874039040484,
      "loss": 2.96,
      "step": 43531
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.5337398052215576,
      "learning_rate": 0.0005486851159851776,
      "loss": 2.9703,
      "step": 43532
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.6976912021636963,
      "learning_rate": 0.0005486828280200713,
      "loss": 2.9359,
      "step": 43533
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.1351144313812256,
      "learning_rate": 0.0005486805400087303,
      "loss": 3.0641,
      "step": 43534
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.553914785385132,
      "learning_rate": 0.0005486782519511549,
      "loss": 3.0009,
      "step": 43535
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7403733730316162,
      "learning_rate": 0.0005486759638473456,
      "loss": 2.8773,
      "step": 43536
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5350474119186401,
      "learning_rate": 0.0005486736756973026,
      "loss": 3.0696,
      "step": 43537
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5944164991378784,
      "learning_rate": 0.0005486713875010265,
      "loss": 3.1661,
      "step": 43538
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.103196620941162,
      "learning_rate": 0.0005486690992585177,
      "loss": 2.9828,
      "step": 43539
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.503594398498535,
      "learning_rate": 0.0005486668109697767,
      "loss": 2.6443,
      "step": 43540
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.428915500640869,
      "learning_rate": 0.0005486645226348038,
      "loss": 3.084,
      "step": 43541
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4892925024032593,
      "learning_rate": 0.0005486622342535995,
      "loss": 3.0162,
      "step": 43542
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6909562349319458,
      "learning_rate": 0.0005486599458261641,
      "loss": 3.1434,
      "step": 43543
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.1166975498199463,
      "learning_rate": 0.0005486576573524982,
      "loss": 3.2704,
      "step": 43544
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5409760475158691,
      "learning_rate": 0.0005486553688326022,
      "loss": 3.1819,
      "step": 43545
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3898745775222778,
      "learning_rate": 0.0005486530802664764,
      "loss": 3.1374,
      "step": 43546
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5654947757720947,
      "learning_rate": 0.0005486507916541213,
      "loss": 2.9139,
      "step": 43547
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.1799156665802,
      "learning_rate": 0.0005486485029955373,
      "loss": 3.0518,
      "step": 43548
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4895225763320923,
      "learning_rate": 0.0005486462142907248,
      "loss": 3.0093,
      "step": 43549
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9483437538146973,
      "learning_rate": 0.0005486439255396844,
      "loss": 2.8747,
      "step": 43550
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3059959411621094,
      "learning_rate": 0.0005486416367424164,
      "loss": 2.9659,
      "step": 43551
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4540350437164307,
      "learning_rate": 0.0005486393478989211,
      "loss": 2.9926,
      "step": 43552
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4448245763778687,
      "learning_rate": 0.0005486370590091992,
      "loss": 3.3867,
      "step": 43553
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.346991777420044,
      "learning_rate": 0.0005486347700732509,
      "loss": 2.9389,
      "step": 43554
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.334371566772461,
      "learning_rate": 0.0005486324810910767,
      "loss": 3.0847,
      "step": 43555
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8697417974472046,
      "learning_rate": 0.000548630192062677,
      "loss": 2.7145,
      "step": 43556
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7689546346664429,
      "learning_rate": 0.0005486279029880524,
      "loss": 3.0171,
      "step": 43557
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6524602174758911,
      "learning_rate": 0.000548625613867203,
      "loss": 3.1802,
      "step": 43558
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2859430313110352,
      "learning_rate": 0.0005486233247001294,
      "loss": 3.3678,
      "step": 43559
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.4344289302825928,
      "learning_rate": 0.0005486210354868321,
      "loss": 3.0507,
      "step": 43560
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.836328387260437,
      "learning_rate": 0.0005486187462273116,
      "loss": 3.1116,
      "step": 43561
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.150310754776001,
      "learning_rate": 0.000548616456921568,
      "loss": 2.7725,
      "step": 43562
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8715537786483765,
      "learning_rate": 0.0005486141675696019,
      "loss": 3.2421,
      "step": 43563
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.3305654525756836,
      "learning_rate": 0.0005486118781714138,
      "loss": 3.1506,
      "step": 43564
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.6520750522613525,
      "learning_rate": 0.000548609588727004,
      "loss": 3.038,
      "step": 43565
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.64388108253479,
      "learning_rate": 0.0005486072992363731,
      "loss": 3.1703,
      "step": 43566
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8864284753799438,
      "learning_rate": 0.0005486050096995213,
      "loss": 3.2575,
      "step": 43567
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.1673550605773926,
      "learning_rate": 0.0005486027201164492,
      "loss": 3.3589,
      "step": 43568
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.57867431640625,
      "learning_rate": 0.0005486004304871571,
      "loss": 3.0866,
      "step": 43569
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.647202491760254,
      "learning_rate": 0.0005485981408116455,
      "loss": 2.8655,
      "step": 43570
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5624107122421265,
      "learning_rate": 0.0005485958510899149,
      "loss": 3.1438,
      "step": 43571
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0864040851593018,
      "learning_rate": 0.0005485935613219656,
      "loss": 3.1563,
      "step": 43572
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5509415864944458,
      "learning_rate": 0.0005485912715077981,
      "loss": 2.9374,
      "step": 43573
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9929429292678833,
      "learning_rate": 0.0005485889816474127,
      "loss": 2.9556,
      "step": 43574
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3437577486038208,
      "learning_rate": 0.0005485866917408099,
      "loss": 3.0237,
      "step": 43575
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.89437735080719,
      "learning_rate": 0.0005485844017879902,
      "loss": 3.0435,
      "step": 43576
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9504802227020264,
      "learning_rate": 0.000548582111788954,
      "loss": 3.1009,
      "step": 43577
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.580156922340393,
      "learning_rate": 0.0005485798217437016,
      "loss": 3.114,
      "step": 43578
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5090515613555908,
      "learning_rate": 0.0005485775316522336,
      "loss": 2.9506,
      "step": 43579
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.693029761314392,
      "learning_rate": 0.0005485752415145504,
      "loss": 2.9541,
      "step": 43580
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.544173240661621,
      "learning_rate": 0.0005485729513306523,
      "loss": 3.0606,
      "step": 43581
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0391201972961426,
      "learning_rate": 0.0005485706611005397,
      "loss": 3.18,
      "step": 43582
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4487189054489136,
      "learning_rate": 0.0005485683708242132,
      "loss": 2.8878,
      "step": 43583
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2745397090911865,
      "learning_rate": 0.0005485660805016732,
      "loss": 3.2544,
      "step": 43584
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6604653596878052,
      "learning_rate": 0.0005485637901329199,
      "loss": 3.25,
      "step": 43585
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8480497598648071,
      "learning_rate": 0.0005485614997179541,
      "loss": 3.1961,
      "step": 43586
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.580629825592041,
      "learning_rate": 0.000548559209256776,
      "loss": 2.8487,
      "step": 43587
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0675735473632812,
      "learning_rate": 0.0005485569187493859,
      "loss": 2.8479,
      "step": 43588
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5466113090515137,
      "learning_rate": 0.0005485546281957846,
      "loss": 3.0046,
      "step": 43589
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.699542760848999,
      "learning_rate": 0.0005485523375959721,
      "loss": 2.8851,
      "step": 43590
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6898361444473267,
      "learning_rate": 0.0005485500469499491,
      "loss": 2.8793,
      "step": 43591
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3119899034500122,
      "learning_rate": 0.000548547756257716,
      "loss": 3.0707,
      "step": 43592
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.4342098236083984,
      "learning_rate": 0.0005485454655192731,
      "loss": 3.0969,
      "step": 43593
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4174325466156006,
      "learning_rate": 0.000548543174734621,
      "loss": 2.926,
      "step": 43594
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.652665376663208,
      "learning_rate": 0.0005485408839037599,
      "loss": 3.3191,
      "step": 43595
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4222389459609985,
      "learning_rate": 0.0005485385930266905,
      "loss": 3.1264,
      "step": 43596
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.633395791053772,
      "learning_rate": 0.000548536302103413,
      "loss": 3.0475,
      "step": 43597
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2435860633850098,
      "learning_rate": 0.0005485340111339279,
      "loss": 3.0746,
      "step": 43598
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4946224689483643,
      "learning_rate": 0.0005485317201182357,
      "loss": 3.1129,
      "step": 43599
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.27459716796875,
      "learning_rate": 0.0005485294290563368,
      "loss": 3.0366,
      "step": 43600
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5926125049591064,
      "learning_rate": 0.0005485271379482314,
      "loss": 3.1661,
      "step": 43601
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4039292335510254,
      "learning_rate": 0.0005485248467939204,
      "loss": 3.0422,
      "step": 43602
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2591428756713867,
      "learning_rate": 0.0005485225555934037,
      "loss": 2.8223,
      "step": 43603
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.992573857307434,
      "learning_rate": 0.0005485202643466821,
      "loss": 2.9949,
      "step": 43604
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2479639053344727,
      "learning_rate": 0.0005485179730537557,
      "loss": 3.1752,
      "step": 43605
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9758620262145996,
      "learning_rate": 0.0005485156817146253,
      "loss": 3.0018,
      "step": 43606
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.099724769592285,
      "learning_rate": 0.0005485133903292911,
      "loss": 3.2359,
      "step": 43607
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8621079921722412,
      "learning_rate": 0.0005485110988977536,
      "loss": 3.0168,
      "step": 43608
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3911619186401367,
      "learning_rate": 0.0005485088074200131,
      "loss": 2.9931,
      "step": 43609
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5971543788909912,
      "learning_rate": 0.0005485065158960701,
      "loss": 3.2626,
      "step": 43610
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5628907680511475,
      "learning_rate": 0.0005485042243259252,
      "loss": 3.0504,
      "step": 43611
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2597131729125977,
      "learning_rate": 0.0005485019327095786,
      "loss": 2.8047,
      "step": 43612
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2231260538101196,
      "learning_rate": 0.0005484996410470308,
      "loss": 2.8856,
      "step": 43613
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.592653751373291,
      "learning_rate": 0.0005484973493382822,
      "loss": 3.0372,
      "step": 43614
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.599167823791504,
      "learning_rate": 0.0005484950575833333,
      "loss": 3.1241,
      "step": 43615
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.867911696434021,
      "learning_rate": 0.0005484927657821844,
      "loss": 2.8925,
      "step": 43616
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7433788776397705,
      "learning_rate": 0.0005484904739348361,
      "loss": 3.0173,
      "step": 43617
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5568383932113647,
      "learning_rate": 0.0005484881820412887,
      "loss": 2.8694,
      "step": 43618
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3279097080230713,
      "learning_rate": 0.0005484858901015426,
      "loss": 2.9326,
      "step": 43619
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9112131595611572,
      "learning_rate": 0.0005484835981155984,
      "loss": 3.1298,
      "step": 43620
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5647119283676147,
      "learning_rate": 0.0005484813060834564,
      "loss": 3.1366,
      "step": 43621
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2337615489959717,
      "learning_rate": 0.0005484790140051169,
      "loss": 2.9687,
      "step": 43622
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6345438957214355,
      "learning_rate": 0.0005484767218805805,
      "loss": 3.0702,
      "step": 43623
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4717363119125366,
      "learning_rate": 0.0005484744297098477,
      "loss": 2.9512,
      "step": 43624
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.344834089279175,
      "learning_rate": 0.0005484721374929187,
      "loss": 3.1205,
      "step": 43625
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2230334281921387,
      "learning_rate": 0.0005484698452297941,
      "loss": 3.0556,
      "step": 43626
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7418068647384644,
      "learning_rate": 0.0005484675529204742,
      "loss": 3.0866,
      "step": 43627
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7901932001113892,
      "learning_rate": 0.0005484652605649595,
      "loss": 2.9056,
      "step": 43628
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5310639142990112,
      "learning_rate": 0.0005484629681632505,
      "loss": 2.9092,
      "step": 43629
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6286016702651978,
      "learning_rate": 0.0005484606757153475,
      "loss": 3.1935,
      "step": 43630
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7685290575027466,
      "learning_rate": 0.000548458383221251,
      "loss": 3.0998,
      "step": 43631
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8097188472747803,
      "learning_rate": 0.0005484560906809612,
      "loss": 3.1529,
      "step": 43632
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4481656551361084,
      "learning_rate": 0.000548453798094479,
      "loss": 2.8192,
      "step": 43633
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9245703220367432,
      "learning_rate": 0.0005484515054618044,
      "loss": 3.0645,
      "step": 43634
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4741142988204956,
      "learning_rate": 0.000548449212782938,
      "loss": 3.1041,
      "step": 43635
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3432224988937378,
      "learning_rate": 0.0005484469200578802,
      "loss": 3.2217,
      "step": 43636
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.761099338531494,
      "learning_rate": 0.0005484446272866315,
      "loss": 2.8772,
      "step": 43637
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.3266115188598633,
      "learning_rate": 0.0005484423344691921,
      "loss": 3.2228,
      "step": 43638
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.52803635597229,
      "learning_rate": 0.0005484400416055627,
      "loss": 3.1358,
      "step": 43639
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3602447509765625,
      "learning_rate": 0.0005484377486957436,
      "loss": 3.1403,
      "step": 43640
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.27915358543396,
      "learning_rate": 0.0005484354557397352,
      "loss": 3.1028,
      "step": 43641
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5978573560714722,
      "learning_rate": 0.0005484331627375379,
      "loss": 3.1434,
      "step": 43642
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.390336513519287,
      "learning_rate": 0.0005484308696891523,
      "loss": 2.8691,
      "step": 43643
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.785994529724121,
      "learning_rate": 0.0005484285765945787,
      "loss": 3.091,
      "step": 43644
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9198565483093262,
      "learning_rate": 0.0005484262834538176,
      "loss": 3.0124,
      "step": 43645
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4832358360290527,
      "learning_rate": 0.0005484239902668692,
      "loss": 3.1175,
      "step": 43646
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.1662840843200684,
      "learning_rate": 0.0005484216970337341,
      "loss": 2.9599,
      "step": 43647
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.885802149772644,
      "learning_rate": 0.0005484194037544128,
      "loss": 3.1391,
      "step": 43648
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5729283094406128,
      "learning_rate": 0.0005484171104289056,
      "loss": 3.0564,
      "step": 43649
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5570141077041626,
      "learning_rate": 0.000548414817057213,
      "loss": 3.0862,
      "step": 43650
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4398889541625977,
      "learning_rate": 0.0005484125236393354,
      "loss": 3.0289,
      "step": 43651
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.289166808128357,
      "learning_rate": 0.0005484102301752732,
      "loss": 3.1413,
      "step": 43652
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5923926830291748,
      "learning_rate": 0.0005484079366650269,
      "loss": 3.1357,
      "step": 43653
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4536947011947632,
      "learning_rate": 0.0005484056431085968,
      "loss": 2.8745,
      "step": 43654
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4168065786361694,
      "learning_rate": 0.0005484033495059834,
      "loss": 3.189,
      "step": 43655
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4261345863342285,
      "learning_rate": 0.0005484010558571872,
      "loss": 2.7678,
      "step": 43656
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4149093627929688,
      "learning_rate": 0.0005483987621622085,
      "loss": 3.0281,
      "step": 43657
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8512517213821411,
      "learning_rate": 0.0005483964684210479,
      "loss": 2.9324,
      "step": 43658
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5861378908157349,
      "learning_rate": 0.0005483941746337055,
      "loss": 2.9821,
      "step": 43659
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.794701099395752,
      "learning_rate": 0.0005483918808001821,
      "loss": 3.1553,
      "step": 43660
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4513285160064697,
      "learning_rate": 0.000548389586920478,
      "loss": 3.2096,
      "step": 43661
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.427250623703003,
      "learning_rate": 0.0005483872929945933,
      "loss": 2.9026,
      "step": 43662
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5109919309616089,
      "learning_rate": 0.0005483849990225289,
      "loss": 2.8141,
      "step": 43663
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6296802759170532,
      "learning_rate": 0.0005483827050042851,
      "loss": 2.7561,
      "step": 43664
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.426145076751709,
      "learning_rate": 0.0005483804109398622,
      "loss": 3.0511,
      "step": 43665
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.636662483215332,
      "learning_rate": 0.0005483781168292606,
      "loss": 3.0987,
      "step": 43666
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.757156252861023,
      "learning_rate": 0.0005483758226724811,
      "loss": 2.7454,
      "step": 43667
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2800710201263428,
      "learning_rate": 0.0005483735284695236,
      "loss": 3.0021,
      "step": 43668
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4954315423965454,
      "learning_rate": 0.0005483712342203888,
      "loss": 3.0785,
      "step": 43669
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8012313842773438,
      "learning_rate": 0.0005483689399250771,
      "loss": 3.2468,
      "step": 43670
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4438194036483765,
      "learning_rate": 0.0005483666455835891,
      "loss": 3.1057,
      "step": 43671
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4596225023269653,
      "learning_rate": 0.000548364351195925,
      "loss": 2.8922,
      "step": 43672
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.738612413406372,
      "learning_rate": 0.0005483620567620851,
      "loss": 2.9119,
      "step": 43673
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5346120595932007,
      "learning_rate": 0.0005483597622820701,
      "loss": 2.6983,
      "step": 43674
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4274662733078003,
      "learning_rate": 0.0005483574677558803,
      "loss": 3.1559,
      "step": 43675
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.930647373199463,
      "learning_rate": 0.0005483551731835162,
      "loss": 2.9975,
      "step": 43676
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.373124122619629,
      "learning_rate": 0.0005483528785649783,
      "loss": 3.1186,
      "step": 43677
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5513572692871094,
      "learning_rate": 0.0005483505839002666,
      "loss": 3.0778,
      "step": 43678
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8525543212890625,
      "learning_rate": 0.0005483482891893821,
      "loss": 3.2071,
      "step": 43679
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.95842707157135,
      "learning_rate": 0.000548345994432325,
      "loss": 3.0276,
      "step": 43680
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.39572274684906,
      "learning_rate": 0.0005483436996290955,
      "loss": 3.2143,
      "step": 43681
    },
    {
      "epoch": 0.57,
      "grad_norm": 4.000117301940918,
      "learning_rate": 0.0005483414047796944,
      "loss": 2.9736,
      "step": 43682
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.443549633026123,
      "learning_rate": 0.0005483391098841218,
      "loss": 2.9354,
      "step": 43683
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.423418641090393,
      "learning_rate": 0.0005483368149423783,
      "loss": 3.0847,
      "step": 43684
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.100999116897583,
      "learning_rate": 0.0005483345199544643,
      "loss": 3.0202,
      "step": 43685
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8605892658233643,
      "learning_rate": 0.0005483322249203803,
      "loss": 2.9376,
      "step": 43686
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.646360158920288,
      "learning_rate": 0.0005483299298401265,
      "loss": 2.8951,
      "step": 43687
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5038810968399048,
      "learning_rate": 0.0005483276347137036,
      "loss": 3.098,
      "step": 43688
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8664337396621704,
      "learning_rate": 0.0005483253395411119,
      "loss": 3.0916,
      "step": 43689
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5215176343917847,
      "learning_rate": 0.0005483230443223519,
      "loss": 2.8284,
      "step": 43690
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.506368637084961,
      "learning_rate": 0.0005483207490574238,
      "loss": 2.9609,
      "step": 43691
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.686109781265259,
      "learning_rate": 0.0005483184537463283,
      "loss": 2.9542,
      "step": 43692
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7373769283294678,
      "learning_rate": 0.0005483161583890656,
      "loss": 3.0922,
      "step": 43693
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.460932970046997,
      "learning_rate": 0.0005483138629856363,
      "loss": 3.0922,
      "step": 43694
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4927704334259033,
      "learning_rate": 0.0005483115675360408,
      "loss": 2.9742,
      "step": 43695
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.373336672782898,
      "learning_rate": 0.0005483092720402795,
      "loss": 2.9084,
      "step": 43696
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9166803359985352,
      "learning_rate": 0.0005483069764983527,
      "loss": 2.928,
      "step": 43697
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.736725091934204,
      "learning_rate": 0.000548304680910261,
      "loss": 3.1323,
      "step": 43698
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8923405408859253,
      "learning_rate": 0.0005483023852760049,
      "loss": 2.8918,
      "step": 43699
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6670889854431152,
      "learning_rate": 0.0005483000895955846,
      "loss": 3.287,
      "step": 43700
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7920551300048828,
      "learning_rate": 0.0005482977938690006,
      "loss": 2.9003,
      "step": 43701
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6806143522262573,
      "learning_rate": 0.0005482954980962533,
      "loss": 3.1466,
      "step": 43702
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5857739448547363,
      "learning_rate": 0.0005482932022773434,
      "loss": 3.1341,
      "step": 43703
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3797547817230225,
      "learning_rate": 0.0005482909064122709,
      "loss": 3.0107,
      "step": 43704
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8987131118774414,
      "learning_rate": 0.0005482886105010365,
      "loss": 3.0524,
      "step": 43705
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4928655624389648,
      "learning_rate": 0.0005482863145436405,
      "loss": 3.1203,
      "step": 43706
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5131629705429077,
      "learning_rate": 0.0005482840185400835,
      "loss": 2.9706,
      "step": 43707
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7672920227050781,
      "learning_rate": 0.0005482817224903658,
      "loss": 3.1778,
      "step": 43708
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.933103084564209,
      "learning_rate": 0.0005482794263944878,
      "loss": 2.9976,
      "step": 43709
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.6830105781555176,
      "learning_rate": 0.0005482771302524499,
      "loss": 2.8944,
      "step": 43710
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3522610664367676,
      "learning_rate": 0.0005482748340642527,
      "loss": 2.9463,
      "step": 43711
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2805829048156738,
      "learning_rate": 0.0005482725378298964,
      "loss": 2.9841,
      "step": 43712
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.294595718383789,
      "learning_rate": 0.0005482702415493817,
      "loss": 2.6695,
      "step": 43713
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5772098302841187,
      "learning_rate": 0.0005482679452227089,
      "loss": 3.0189,
      "step": 43714
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.4776554107666016,
      "learning_rate": 0.0005482656488498783,
      "loss": 2.8558,
      "step": 43715
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3541109561920166,
      "learning_rate": 0.0005482633524308905,
      "loss": 3.1418,
      "step": 43716
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7207063436508179,
      "learning_rate": 0.0005482610559657457,
      "loss": 2.9742,
      "step": 43717
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7165501117706299,
      "learning_rate": 0.0005482587594544447,
      "loss": 3.0591,
      "step": 43718
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7006393671035767,
      "learning_rate": 0.0005482564628969876,
      "loss": 3.0321,
      "step": 43719
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6393765211105347,
      "learning_rate": 0.0005482541662933748,
      "loss": 2.9411,
      "step": 43720
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5794856548309326,
      "learning_rate": 0.000548251869643607,
      "loss": 2.9481,
      "step": 43721
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.646613359451294,
      "learning_rate": 0.0005482495729476845,
      "loss": 3.1325,
      "step": 43722
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.762416958808899,
      "learning_rate": 0.0005482472762056077,
      "loss": 3.1984,
      "step": 43723
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7093175649642944,
      "learning_rate": 0.0005482449794173771,
      "loss": 2.9734,
      "step": 43724
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5259734392166138,
      "learning_rate": 0.000548242682582993,
      "loss": 3.0684,
      "step": 43725
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6801762580871582,
      "learning_rate": 0.0005482403857024559,
      "loss": 3.0736,
      "step": 43726
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4782291650772095,
      "learning_rate": 0.0005482380887757663,
      "loss": 3.0904,
      "step": 43727
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.3820531368255615,
      "learning_rate": 0.0005482357918029245,
      "loss": 3.0036,
      "step": 43728
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.072606325149536,
      "learning_rate": 0.000548233494783931,
      "loss": 3.004,
      "step": 43729
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4362164735794067,
      "learning_rate": 0.0005482311977187862,
      "loss": 3.1743,
      "step": 43730
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6250243186950684,
      "learning_rate": 0.0005482289006074906,
      "loss": 2.9075,
      "step": 43731
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.719733476638794,
      "learning_rate": 0.0005482266034500445,
      "loss": 2.8947,
      "step": 43732
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0630393028259277,
      "learning_rate": 0.0005482243062464483,
      "loss": 2.9952,
      "step": 43733
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8233723640441895,
      "learning_rate": 0.0005482220089967026,
      "loss": 3.1311,
      "step": 43734
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5291788578033447,
      "learning_rate": 0.0005482197117008076,
      "loss": 3.1478,
      "step": 43735
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7640284299850464,
      "learning_rate": 0.0005482174143587641,
      "loss": 2.861,
      "step": 43736
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5350415706634521,
      "learning_rate": 0.0005482151169705722,
      "loss": 2.8267,
      "step": 43737
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3061246871948242,
      "learning_rate": 0.0005482128195362325,
      "loss": 3.1267,
      "step": 43738
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.6348116397857666,
      "learning_rate": 0.0005482105220557452,
      "loss": 2.8607,
      "step": 43739
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9817452430725098,
      "learning_rate": 0.000548208224529111,
      "loss": 3.0081,
      "step": 43740
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6590889692306519,
      "learning_rate": 0.0005482059269563302,
      "loss": 3.0968,
      "step": 43741
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7814356088638306,
      "learning_rate": 0.0005482036293374032,
      "loss": 3.1546,
      "step": 43742
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.519267201423645,
      "learning_rate": 0.0005482013316723304,
      "loss": 3.1312,
      "step": 43743
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.468396782875061,
      "learning_rate": 0.0005481990339611123,
      "loss": 2.8978,
      "step": 43744
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2364004850387573,
      "learning_rate": 0.0005481967362037494,
      "loss": 2.9906,
      "step": 43745
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3899972438812256,
      "learning_rate": 0.000548194438400242,
      "loss": 3.041,
      "step": 43746
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6022921800613403,
      "learning_rate": 0.0005481921405505906,
      "loss": 2.8625,
      "step": 43747
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4279205799102783,
      "learning_rate": 0.0005481898426547956,
      "loss": 3.263,
      "step": 43748
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.610734462738037,
      "learning_rate": 0.0005481875447128573,
      "loss": 3.0457,
      "step": 43749
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5908355712890625,
      "learning_rate": 0.0005481852467247763,
      "loss": 3.014,
      "step": 43750
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8945448398590088,
      "learning_rate": 0.0005481829486905531,
      "loss": 3.0826,
      "step": 43751
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6102241277694702,
      "learning_rate": 0.0005481806506101879,
      "loss": 2.9697,
      "step": 43752
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9558777809143066,
      "learning_rate": 0.0005481783524836812,
      "loss": 2.9776,
      "step": 43753
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4701480865478516,
      "learning_rate": 0.0005481760543110336,
      "loss": 3.0084,
      "step": 43754
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3811793327331543,
      "learning_rate": 0.0005481737560922453,
      "loss": 3.0505,
      "step": 43755
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.56547212600708,
      "learning_rate": 0.0005481714578273168,
      "loss": 3.1889,
      "step": 43756
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.697788119316101,
      "learning_rate": 0.0005481691595162486,
      "loss": 3.0807,
      "step": 43757
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.359870195388794,
      "learning_rate": 0.000548166861159041,
      "loss": 3.0594,
      "step": 43758
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.6239237785339355,
      "learning_rate": 0.0005481645627556945,
      "loss": 3.1791,
      "step": 43759
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6426583528518677,
      "learning_rate": 0.0005481622643062096,
      "loss": 2.8836,
      "step": 43760
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.8089938163757324,
      "learning_rate": 0.0005481599658105864,
      "loss": 3.0402,
      "step": 43761
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.097994327545166,
      "learning_rate": 0.0005481576672688259,
      "loss": 3.0317,
      "step": 43762
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8156243562698364,
      "learning_rate": 0.0005481553686809281,
      "loss": 3.2611,
      "step": 43763
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.4043362140655518,
      "learning_rate": 0.0005481530700468935,
      "loss": 3.1723,
      "step": 43764
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3908244371414185,
      "learning_rate": 0.0005481507713667225,
      "loss": 2.8295,
      "step": 43765
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.7554545402526855,
      "learning_rate": 0.0005481484726404158,
      "loss": 3.118,
      "step": 43766
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4635995626449585,
      "learning_rate": 0.0005481461738679734,
      "loss": 2.9835,
      "step": 43767
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8081239461898804,
      "learning_rate": 0.000548143875049396,
      "loss": 2.9061,
      "step": 43768
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.609284520149231,
      "learning_rate": 0.000548141576184684,
      "loss": 3.042,
      "step": 43769
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5422931909561157,
      "learning_rate": 0.0005481392772738378,
      "loss": 2.9984,
      "step": 43770
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2758299112319946,
      "learning_rate": 0.0005481369783168578,
      "loss": 3.1,
      "step": 43771
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.1706278324127197,
      "learning_rate": 0.0005481346793137445,
      "loss": 3.1941,
      "step": 43772
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.086412191390991,
      "learning_rate": 0.0005481323802644982,
      "loss": 3.1604,
      "step": 43773
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4816871881484985,
      "learning_rate": 0.0005481300811691195,
      "loss": 3.1309,
      "step": 43774
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6649702787399292,
      "learning_rate": 0.0005481277820276086,
      "loss": 3.1253,
      "step": 43775
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6191649436950684,
      "learning_rate": 0.0005481254828399661,
      "loss": 3.3147,
      "step": 43776
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6066185235977173,
      "learning_rate": 0.0005481231836061925,
      "loss": 2.8578,
      "step": 43777
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3974438905715942,
      "learning_rate": 0.000548120884326288,
      "loss": 3.1301,
      "step": 43778
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.999211311340332,
      "learning_rate": 0.0005481185850002532,
      "loss": 3.2367,
      "step": 43779
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.662179946899414,
      "learning_rate": 0.0005481162856280884,
      "loss": 2.6809,
      "step": 43780
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6320456266403198,
      "learning_rate": 0.0005481139862097943,
      "loss": 2.9195,
      "step": 43781
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.571088433265686,
      "learning_rate": 0.000548111686745371,
      "loss": 2.9546,
      "step": 43782
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4927812814712524,
      "learning_rate": 0.0005481093872348189,
      "loss": 3.1205,
      "step": 43783
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.756826400756836,
      "learning_rate": 0.0005481070876781388,
      "loss": 3.0786,
      "step": 43784
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3146817684173584,
      "learning_rate": 0.0005481047880753308,
      "loss": 3.2466,
      "step": 43785
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8680310249328613,
      "learning_rate": 0.0005481024884263954,
      "loss": 3.2855,
      "step": 43786
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6261829137802124,
      "learning_rate": 0.0005481001887313331,
      "loss": 3.0042,
      "step": 43787
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6551364660263062,
      "learning_rate": 0.0005480978889901444,
      "loss": 3.2059,
      "step": 43788
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.670483112335205,
      "learning_rate": 0.0005480955892028296,
      "loss": 3.0757,
      "step": 43789
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7636597156524658,
      "learning_rate": 0.0005480932893693891,
      "loss": 2.9414,
      "step": 43790
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.804358959197998,
      "learning_rate": 0.0005480909894898233,
      "loss": 2.8748,
      "step": 43791
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.372551679611206,
      "learning_rate": 0.0005480886895641328,
      "loss": 3.0428,
      "step": 43792
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9552770853042603,
      "learning_rate": 0.0005480863895923179,
      "loss": 2.9119,
      "step": 43793
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.460778832435608,
      "learning_rate": 0.0005480840895743791,
      "loss": 3.2955,
      "step": 43794
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0685911178588867,
      "learning_rate": 0.0005480817895103167,
      "loss": 3.0127,
      "step": 43795
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.4212918281555176,
      "learning_rate": 0.0005480794894001312,
      "loss": 2.9391,
      "step": 43796
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7450461387634277,
      "learning_rate": 0.0005480771892438231,
      "loss": 2.9244,
      "step": 43797
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.314760684967041,
      "learning_rate": 0.0005480748890413928,
      "loss": 3.2771,
      "step": 43798
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.3976502418518066,
      "learning_rate": 0.0005480725887928407,
      "loss": 2.7568,
      "step": 43799
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4951093196868896,
      "learning_rate": 0.0005480702884981671,
      "loss": 3.0617,
      "step": 43800
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3973923921585083,
      "learning_rate": 0.0005480679881573726,
      "loss": 3.1777,
      "step": 43801
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.282278060913086,
      "learning_rate": 0.0005480656877704576,
      "loss": 3.1394,
      "step": 43802
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.1547751426696777,
      "learning_rate": 0.0005480633873374226,
      "loss": 2.9174,
      "step": 43803
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2208988666534424,
      "learning_rate": 0.0005480610868582677,
      "loss": 3.1478,
      "step": 43804
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.518265962600708,
      "learning_rate": 0.0005480587863329938,
      "loss": 3.0777,
      "step": 43805
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6545928716659546,
      "learning_rate": 0.000548056485761601,
      "loss": 3.0057,
      "step": 43806
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7794177532196045,
      "learning_rate": 0.0005480541851440898,
      "loss": 2.9026,
      "step": 43807
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.789917230606079,
      "learning_rate": 0.0005480518844804606,
      "loss": 3.0333,
      "step": 43808
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7433949708938599,
      "learning_rate": 0.000548049583770714,
      "loss": 3.0034,
      "step": 43809
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.446950078010559,
      "learning_rate": 0.0005480472830148502,
      "loss": 3.2024,
      "step": 43810
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7851310968399048,
      "learning_rate": 0.0005480449822128698,
      "loss": 3.0059,
      "step": 43811
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3983575105667114,
      "learning_rate": 0.0005480426813647731,
      "loss": 2.9701,
      "step": 43812
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.1124398708343506,
      "learning_rate": 0.0005480403804705606,
      "loss": 3.0521,
      "step": 43813
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.276519536972046,
      "learning_rate": 0.0005480380795302327,
      "loss": 3.1022,
      "step": 43814
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7194746732711792,
      "learning_rate": 0.0005480357785437899,
      "loss": 3.1796,
      "step": 43815
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0253493785858154,
      "learning_rate": 0.0005480334775112324,
      "loss": 3.0586,
      "step": 43816
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6857385635375977,
      "learning_rate": 0.000548031176432561,
      "loss": 3.2114,
      "step": 43817
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3422772884368896,
      "learning_rate": 0.0005480288753077757,
      "loss": 3.0406,
      "step": 43818
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.9442806243896484,
      "learning_rate": 0.0005480265741368773,
      "loss": 2.8859,
      "step": 43819
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7878568172454834,
      "learning_rate": 0.0005480242729198661,
      "loss": 3.1694,
      "step": 43820
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5917654037475586,
      "learning_rate": 0.0005480219716567425,
      "loss": 3.1585,
      "step": 43821
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9250990152359009,
      "learning_rate": 0.0005480196703475068,
      "loss": 3.1117,
      "step": 43822
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2498340606689453,
      "learning_rate": 0.0005480173689921597,
      "loss": 3.1392,
      "step": 43823
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8723843097686768,
      "learning_rate": 0.0005480150675907015,
      "loss": 2.7065,
      "step": 43824
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8845326900482178,
      "learning_rate": 0.0005480127661431325,
      "loss": 3.1403,
      "step": 43825
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.729873538017273,
      "learning_rate": 0.0005480104646494533,
      "loss": 3.0255,
      "step": 43826
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.518898606300354,
      "learning_rate": 0.0005480081631096643,
      "loss": 3.2429,
      "step": 43827
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5594874620437622,
      "learning_rate": 0.0005480058615237659,
      "loss": 3.0451,
      "step": 43828
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.5070159435272217,
      "learning_rate": 0.0005480035598917585,
      "loss": 3.1804,
      "step": 43829
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.400820016860962,
      "learning_rate": 0.0005480012582136425,
      "loss": 2.9939,
      "step": 43830
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5201897621154785,
      "learning_rate": 0.0005479989564894185,
      "loss": 3.0118,
      "step": 43831
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7192895412445068,
      "learning_rate": 0.0005479966547190867,
      "loss": 3.073,
      "step": 43832
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3219811916351318,
      "learning_rate": 0.0005479943529026479,
      "loss": 3.0558,
      "step": 43833
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4174383878707886,
      "learning_rate": 0.000547992051040102,
      "loss": 3.2427,
      "step": 43834
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.772834300994873,
      "learning_rate": 0.0005479897491314497,
      "loss": 2.8935,
      "step": 43835
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4348751306533813,
      "learning_rate": 0.0005479874471766915,
      "loss": 3.1198,
      "step": 43836
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.370202898979187,
      "learning_rate": 0.0005479851451758277,
      "loss": 2.952,
      "step": 43837
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7737261056900024,
      "learning_rate": 0.0005479828431288589,
      "loss": 3.092,
      "step": 43838
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3298377990722656,
      "learning_rate": 0.0005479805410357853,
      "loss": 2.9045,
      "step": 43839
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.948297381401062,
      "learning_rate": 0.0005479782388966075,
      "loss": 2.888,
      "step": 43840
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.967972755432129,
      "learning_rate": 0.0005479759367113259,
      "loss": 2.9504,
      "step": 43841
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.424837350845337,
      "learning_rate": 0.0005479736344799408,
      "loss": 3.2119,
      "step": 43842
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0076160430908203,
      "learning_rate": 0.0005479713322024528,
      "loss": 3.1848,
      "step": 43843
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9805715084075928,
      "learning_rate": 0.0005479690298788622,
      "loss": 3.2031,
      "step": 43844
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7217497825622559,
      "learning_rate": 0.0005479667275091694,
      "loss": 3.01,
      "step": 43845
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.4005086421966553,
      "learning_rate": 0.0005479644250933751,
      "loss": 2.9831,
      "step": 43846
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.1266329288482666,
      "learning_rate": 0.0005479621226314793,
      "loss": 3.2419,
      "step": 43847
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.68856680393219,
      "learning_rate": 0.0005479598201234828,
      "loss": 2.949,
      "step": 43848
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7542344331741333,
      "learning_rate": 0.0005479575175693859,
      "loss": 3.1951,
      "step": 43849
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.023581027984619,
      "learning_rate": 0.0005479552149691889,
      "loss": 3.101,
      "step": 43850
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4921164512634277,
      "learning_rate": 0.0005479529123228925,
      "loss": 3.0741,
      "step": 43851
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.606703281402588,
      "learning_rate": 0.0005479506096304969,
      "loss": 3.1751,
      "step": 43852
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3347018957138062,
      "learning_rate": 0.0005479483068920026,
      "loss": 3.0008,
      "step": 43853
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.307147741317749,
      "learning_rate": 0.00054794600410741,
      "loss": 2.96,
      "step": 43854
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.7938292026519775,
      "learning_rate": 0.0005479437012767196,
      "loss": 3.0427,
      "step": 43855
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.518372893333435,
      "learning_rate": 0.0005479413983999318,
      "loss": 3.0034,
      "step": 43856
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0777342319488525,
      "learning_rate": 0.000547939095477047,
      "loss": 2.9048,
      "step": 43857
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2465145587921143,
      "learning_rate": 0.0005479367925080655,
      "loss": 3.2844,
      "step": 43858
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.443969964981079,
      "learning_rate": 0.0005479344894929881,
      "loss": 3.1975,
      "step": 43859
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.656290888786316,
      "learning_rate": 0.0005479321864318149,
      "loss": 3.0565,
      "step": 43860
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6862716674804688,
      "learning_rate": 0.0005479298833245464,
      "loss": 2.9649,
      "step": 43861
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.396490454673767,
      "learning_rate": 0.0005479275801711832,
      "loss": 3.0524,
      "step": 43862
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.512128472328186,
      "learning_rate": 0.0005479252769717254,
      "loss": 3.099,
      "step": 43863
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.965710997581482,
      "learning_rate": 0.0005479229737261736,
      "loss": 3.0652,
      "step": 43864
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6505200862884521,
      "learning_rate": 0.0005479206704345283,
      "loss": 3.2603,
      "step": 43865
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4425184726715088,
      "learning_rate": 0.0005479183670967899,
      "loss": 2.8934,
      "step": 43866
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6095558404922485,
      "learning_rate": 0.0005479160637129588,
      "loss": 2.9786,
      "step": 43867
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3196769952774048,
      "learning_rate": 0.0005479137602830353,
      "loss": 3.075,
      "step": 43868
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4120898246765137,
      "learning_rate": 0.0005479114568070202,
      "loss": 3.3706,
      "step": 43869
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5678240060806274,
      "learning_rate": 0.0005479091532849135,
      "loss": 3.1279,
      "step": 43870
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3742010593414307,
      "learning_rate": 0.0005479068497167157,
      "loss": 3.1939,
      "step": 43871
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3267524242401123,
      "learning_rate": 0.0005479045461024275,
      "loss": 3.1349,
      "step": 43872
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9076801538467407,
      "learning_rate": 0.0005479022424420493,
      "loss": 2.9422,
      "step": 43873
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.715003252029419,
      "learning_rate": 0.0005478999387355812,
      "loss": 2.847,
      "step": 43874
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4777535200119019,
      "learning_rate": 0.0005478976349830238,
      "loss": 2.8727,
      "step": 43875
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.272222638130188,
      "learning_rate": 0.0005478953311843776,
      "loss": 3.0041,
      "step": 43876
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.40773606300354,
      "learning_rate": 0.000547893027339643,
      "loss": 2.7726,
      "step": 43877
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6454339027404785,
      "learning_rate": 0.0005478907234488204,
      "loss": 2.9552,
      "step": 43878
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7947194576263428,
      "learning_rate": 0.0005478884195119102,
      "loss": 2.8762,
      "step": 43879
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5793570280075073,
      "learning_rate": 0.0005478861155289129,
      "loss": 3.0514,
      "step": 43880
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.015810966491699,
      "learning_rate": 0.0005478838114998289,
      "loss": 2.8667,
      "step": 43881
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7306175231933594,
      "learning_rate": 0.0005478815074246586,
      "loss": 3.075,
      "step": 43882
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.10276198387146,
      "learning_rate": 0.0005478792033034025,
      "loss": 2.9957,
      "step": 43883
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.279196262359619,
      "learning_rate": 0.0005478768991360608,
      "loss": 3.0301,
      "step": 43884
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.3574819564819336,
      "learning_rate": 0.0005478745949226343,
      "loss": 2.7749,
      "step": 43885
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6567529439926147,
      "learning_rate": 0.0005478722906631233,
      "loss": 2.9061,
      "step": 43886
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.106339454650879,
      "learning_rate": 0.000547869986357528,
      "loss": 3.2762,
      "step": 43887
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.822159767150879,
      "learning_rate": 0.000547867682005849,
      "loss": 2.9707,
      "step": 43888
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5219650268554688,
      "learning_rate": 0.0005478653776080866,
      "loss": 3.1449,
      "step": 43889
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7639164924621582,
      "learning_rate": 0.0005478630731642416,
      "loss": 3.0308,
      "step": 43890
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.2706491947174072,
      "learning_rate": 0.000547860768674314,
      "loss": 3.0665,
      "step": 43891
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3368139266967773,
      "learning_rate": 0.0005478584641383045,
      "loss": 3.005,
      "step": 43892
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6303337812423706,
      "learning_rate": 0.0005478561595562134,
      "loss": 2.8606,
      "step": 43893
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.9761922359466553,
      "learning_rate": 0.0005478538549280412,
      "loss": 3.1033,
      "step": 43894
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8000763654708862,
      "learning_rate": 0.0005478515502537882,
      "loss": 3.0233,
      "step": 43895
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6754913330078125,
      "learning_rate": 0.000547849245533455,
      "loss": 3.2655,
      "step": 43896
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.3511276245117188,
      "learning_rate": 0.0005478469407670419,
      "loss": 2.966,
      "step": 43897
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5487722158432007,
      "learning_rate": 0.0005478446359545495,
      "loss": 3.1099,
      "step": 43898
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3221588134765625,
      "learning_rate": 0.000547842331095978,
      "loss": 2.9327,
      "step": 43899
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.262728691101074,
      "learning_rate": 0.0005478400261913279,
      "loss": 2.7888,
      "step": 43900
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0451130867004395,
      "learning_rate": 0.0005478377212405996,
      "loss": 3.1461,
      "step": 43901
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5616830587387085,
      "learning_rate": 0.0005478354162437937,
      "loss": 3.0577,
      "step": 43902
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0681393146514893,
      "learning_rate": 0.0005478331112009105,
      "loss": 2.8628,
      "step": 43903
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.1543383598327637,
      "learning_rate": 0.0005478308061119504,
      "loss": 3.1232,
      "step": 43904
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.271719217300415,
      "learning_rate": 0.000547828500976914,
      "loss": 3.0357,
      "step": 43905
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4014923572540283,
      "learning_rate": 0.0005478261957958014,
      "loss": 3.2132,
      "step": 43906
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.1225054264068604,
      "learning_rate": 0.0005478238905686134,
      "loss": 3.1098,
      "step": 43907
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2917051315307617,
      "learning_rate": 0.0005478215852953503,
      "loss": 3.2996,
      "step": 43908
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6632870435714722,
      "learning_rate": 0.0005478192799760124,
      "loss": 3.2002,
      "step": 43909
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5809452533721924,
      "learning_rate": 0.0005478169746106001,
      "loss": 3.2724,
      "step": 43910
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6636749505996704,
      "learning_rate": 0.0005478146691991142,
      "loss": 2.9575,
      "step": 43911
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.656129002571106,
      "learning_rate": 0.0005478123637415546,
      "loss": 2.8784,
      "step": 43912
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2079193592071533,
      "learning_rate": 0.0005478100582379222,
      "loss": 3.0396,
      "step": 43913
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6001458168029785,
      "learning_rate": 0.0005478077526882173,
      "loss": 3.0627,
      "step": 43914
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6143580675125122,
      "learning_rate": 0.0005478054470924401,
      "loss": 3.1301,
      "step": 43915
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.796920657157898,
      "learning_rate": 0.0005478031414505913,
      "loss": 3.0531,
      "step": 43916
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6893445253372192,
      "learning_rate": 0.0005478008357626711,
      "loss": 3.0752,
      "step": 43917
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6846750974655151,
      "learning_rate": 0.0005477985300286801,
      "loss": 3.0481,
      "step": 43918
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5550252199172974,
      "learning_rate": 0.0005477962242486186,
      "loss": 3.1336,
      "step": 43919
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6309548616409302,
      "learning_rate": 0.0005477939184224872,
      "loss": 3.1337,
      "step": 43920
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5308431386947632,
      "learning_rate": 0.0005477916125502862,
      "loss": 3.1796,
      "step": 43921
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6019611358642578,
      "learning_rate": 0.000547789306632016,
      "loss": 3.0689,
      "step": 43922
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4200239181518555,
      "learning_rate": 0.0005477870006676772,
      "loss": 3.1031,
      "step": 43923
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.48368239402771,
      "learning_rate": 0.0005477846946572701,
      "loss": 3.0284,
      "step": 43924
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.754872441291809,
      "learning_rate": 0.0005477823886007951,
      "loss": 3.1194,
      "step": 43925
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6487982273101807,
      "learning_rate": 0.0005477800824982527,
      "loss": 3.0585,
      "step": 43926
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7597404718399048,
      "learning_rate": 0.0005477777763496434,
      "loss": 3.2787,
      "step": 43927
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2640888690948486,
      "learning_rate": 0.0005477754701549674,
      "loss": 2.742,
      "step": 43928
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6472010612487793,
      "learning_rate": 0.0005477731639142253,
      "loss": 3.0578,
      "step": 43929
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3786793947219849,
      "learning_rate": 0.0005477708576274175,
      "loss": 3.1592,
      "step": 43930
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7275453805923462,
      "learning_rate": 0.0005477685512945444,
      "loss": 3.0037,
      "step": 43931
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.38010573387146,
      "learning_rate": 0.0005477662449156064,
      "loss": 3.2366,
      "step": 43932
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4614312648773193,
      "learning_rate": 0.0005477639384906042,
      "loss": 2.911,
      "step": 43933
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.770959496498108,
      "learning_rate": 0.0005477616320195377,
      "loss": 3.0874,
      "step": 43934
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4593737125396729,
      "learning_rate": 0.0005477593255024079,
      "loss": 2.9232,
      "step": 43935
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6524198055267334,
      "learning_rate": 0.0005477570189392148,
      "loss": 2.9042,
      "step": 43936
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.134711980819702,
      "learning_rate": 0.0005477547123299591,
      "loss": 3.3653,
      "step": 43937
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8036185503005981,
      "learning_rate": 0.0005477524056746411,
      "loss": 3.3155,
      "step": 43938
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9054582118988037,
      "learning_rate": 0.0005477500989732612,
      "loss": 3.1749,
      "step": 43939
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9054346084594727,
      "learning_rate": 0.0005477477922258199,
      "loss": 2.979,
      "step": 43940
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.333350658416748,
      "learning_rate": 0.0005477454854323176,
      "loss": 3.4303,
      "step": 43941
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.516798973083496,
      "learning_rate": 0.0005477431785927547,
      "loss": 3.1959,
      "step": 43942
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.8084099292755127,
      "learning_rate": 0.0005477408717071318,
      "loss": 2.8256,
      "step": 43943
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7145534753799438,
      "learning_rate": 0.000547738564775449,
      "loss": 2.9636,
      "step": 43944
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3956639766693115,
      "learning_rate": 0.0005477362577977072,
      "loss": 3.2001,
      "step": 43945
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7323527336120605,
      "learning_rate": 0.0005477339507739062,
      "loss": 3.0963,
      "step": 43946
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8120524883270264,
      "learning_rate": 0.0005477316437040471,
      "loss": 3.0589,
      "step": 43947
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5171457529067993,
      "learning_rate": 0.0005477293365881298,
      "loss": 2.864,
      "step": 43948
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9478579759597778,
      "learning_rate": 0.0005477270294261551,
      "loss": 2.9776,
      "step": 43949
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3663609027862549,
      "learning_rate": 0.0005477247222181232,
      "loss": 2.9751,
      "step": 43950
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8860853910446167,
      "learning_rate": 0.0005477224149640345,
      "loss": 3.1676,
      "step": 43951
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.557194709777832,
      "learning_rate": 0.0005477201076638897,
      "loss": 2.9124,
      "step": 43952
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.5739028453826904,
      "learning_rate": 0.0005477178003176889,
      "loss": 3.0814,
      "step": 43953
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.6289267539978027,
      "learning_rate": 0.0005477154929254328,
      "loss": 3.0705,
      "step": 43954
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6287918090820312,
      "learning_rate": 0.0005477131854871217,
      "loss": 3.1688,
      "step": 43955
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.6876463890075684,
      "learning_rate": 0.000547710878002756,
      "loss": 2.9782,
      "step": 43956
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.3447608947753906,
      "learning_rate": 0.0005477085704723361,
      "loss": 3.267,
      "step": 43957
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.765232801437378,
      "learning_rate": 0.0005477062628958626,
      "loss": 2.8853,
      "step": 43958
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.510123372077942,
      "learning_rate": 0.0005477039552733359,
      "loss": 2.746,
      "step": 43959
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9155317544937134,
      "learning_rate": 0.0005477016476047562,
      "loss": 3.0503,
      "step": 43960
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.87082576751709,
      "learning_rate": 0.0005476993398901242,
      "loss": 3.1772,
      "step": 43961
    },
    {
      "epoch": 0.57,
      "grad_norm": 4.612054824829102,
      "learning_rate": 0.0005476970321294402,
      "loss": 2.9652,
      "step": 43962
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4734301567077637,
      "learning_rate": 0.0005476947243227045,
      "loss": 3.0735,
      "step": 43963
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.5417490005493164,
      "learning_rate": 0.0005476924164699179,
      "loss": 2.9685,
      "step": 43964
    },
    {
      "epoch": 0.57,
      "grad_norm": 4.724314212799072,
      "learning_rate": 0.0005476901085710805,
      "loss": 3.2017,
      "step": 43965
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.6970648765563965,
      "learning_rate": 0.0005476878006261929,
      "loss": 2.9904,
      "step": 43966
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5534648895263672,
      "learning_rate": 0.0005476854926352553,
      "loss": 2.9343,
      "step": 43967
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.5697319507598877,
      "learning_rate": 0.0005476831845982684,
      "loss": 2.9928,
      "step": 43968
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5420714616775513,
      "learning_rate": 0.0005476808765152325,
      "loss": 2.955,
      "step": 43969
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3265520334243774,
      "learning_rate": 0.0005476785683861482,
      "loss": 3.0807,
      "step": 43970
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4184170961380005,
      "learning_rate": 0.0005476762602110156,
      "loss": 3.0681,
      "step": 43971
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3210809230804443,
      "learning_rate": 0.0005476739519898353,
      "loss": 3.3699,
      "step": 43972
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6468465328216553,
      "learning_rate": 0.0005476716437226078,
      "loss": 3.1518,
      "step": 43973
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.39510977268219,
      "learning_rate": 0.0005476693354093334,
      "loss": 3.4392,
      "step": 43974
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5455899238586426,
      "learning_rate": 0.0005476670270500127,
      "loss": 3.1473,
      "step": 43975
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.853009581565857,
      "learning_rate": 0.000547664718644646,
      "loss": 3.0191,
      "step": 43976
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.885040044784546,
      "learning_rate": 0.0005476624101932336,
      "loss": 3.0356,
      "step": 43977
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6728780269622803,
      "learning_rate": 0.0005476601016957763,
      "loss": 2.9054,
      "step": 43978
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.3312411308288574,
      "learning_rate": 0.0005476577931522742,
      "loss": 2.8235,
      "step": 43979
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.381969690322876,
      "learning_rate": 0.0005476554845627279,
      "loss": 2.9663,
      "step": 43980
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7557728290557861,
      "learning_rate": 0.0005476531759271377,
      "loss": 3.191,
      "step": 43981
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5345003604888916,
      "learning_rate": 0.0005476508672455041,
      "loss": 2.9283,
      "step": 43982
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.680687189102173,
      "learning_rate": 0.0005476485585178277,
      "loss": 2.9277,
      "step": 43983
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.2929205894470215,
      "learning_rate": 0.0005476462497441085,
      "loss": 2.9488,
      "step": 43984
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.8145551681518555,
      "learning_rate": 0.0005476439409243473,
      "loss": 2.9844,
      "step": 43985
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4358609914779663,
      "learning_rate": 0.0005476416320585445,
      "loss": 3.0127,
      "step": 43986
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2913870811462402,
      "learning_rate": 0.0005476393231467003,
      "loss": 2.9229,
      "step": 43987
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.1133081912994385,
      "learning_rate": 0.0005476370141888154,
      "loss": 3.1029,
      "step": 43988
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9114692211151123,
      "learning_rate": 0.00054763470518489,
      "loss": 3.2767,
      "step": 43989
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.1640050411224365,
      "learning_rate": 0.0005476323961349247,
      "loss": 2.9323,
      "step": 43990
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.941309928894043,
      "learning_rate": 0.0005476300870389198,
      "loss": 3.1059,
      "step": 43991
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.242696523666382,
      "learning_rate": 0.0005476277778968759,
      "loss": 2.9831,
      "step": 43992
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3871114253997803,
      "learning_rate": 0.0005476254687087933,
      "loss": 2.7787,
      "step": 43993
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.3174264430999756,
      "learning_rate": 0.0005476231594746724,
      "loss": 3.0614,
      "step": 43994
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.8048198223114014,
      "learning_rate": 0.0005476208501945136,
      "loss": 3.2704,
      "step": 43995
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.1031837463378906,
      "learning_rate": 0.0005476185408683176,
      "loss": 2.8068,
      "step": 43996
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8354448080062866,
      "learning_rate": 0.0005476162314960846,
      "loss": 3.0269,
      "step": 43997
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7237576246261597,
      "learning_rate": 0.0005476139220778149,
      "loss": 3.1071,
      "step": 43998
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6697304248809814,
      "learning_rate": 0.0005476116126135092,
      "loss": 3.0249,
      "step": 43999
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.489405632019043,
      "learning_rate": 0.0005476093031031679,
      "loss": 2.8647,
      "step": 44000
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3115884065628052,
      "learning_rate": 0.0005476069935467912,
      "loss": 3.1258,
      "step": 44001
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4142810106277466,
      "learning_rate": 0.0005476046839443798,
      "loss": 3.0199,
      "step": 44002
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4299782514572144,
      "learning_rate": 0.000547602374295934,
      "loss": 3.0053,
      "step": 44003
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5367035865783691,
      "learning_rate": 0.0005476000646014543,
      "loss": 2.8078,
      "step": 44004
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5911753177642822,
      "learning_rate": 0.0005475977548609409,
      "loss": 3.0509,
      "step": 44005
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7061452865600586,
      "learning_rate": 0.0005475954450743947,
      "loss": 3.1703,
      "step": 44006
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.501212239265442,
      "learning_rate": 0.0005475931352418155,
      "loss": 3.1463,
      "step": 44007
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5358620882034302,
      "learning_rate": 0.0005475908253632042,
      "loss": 3.2157,
      "step": 44008
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.1502904891967773,
      "learning_rate": 0.0005475885154385612,
      "loss": 2.9881,
      "step": 44009
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4949666261672974,
      "learning_rate": 0.0005475862054678867,
      "loss": 2.829,
      "step": 44010
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9327162504196167,
      "learning_rate": 0.0005475838954511812,
      "loss": 3.0165,
      "step": 44011
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8992935419082642,
      "learning_rate": 0.0005475815853884453,
      "loss": 3.0819,
      "step": 44012
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7612663507461548,
      "learning_rate": 0.0005475792752796793,
      "loss": 2.9526,
      "step": 44013
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4687896966934204,
      "learning_rate": 0.0005475769651248836,
      "loss": 2.7185,
      "step": 44014
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5401748418807983,
      "learning_rate": 0.0005475746549240587,
      "loss": 3.062,
      "step": 44015
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.537665843963623,
      "learning_rate": 0.0005475723446772049,
      "loss": 2.972,
      "step": 44016
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5922783613204956,
      "learning_rate": 0.0005475700343843228,
      "loss": 3.167,
      "step": 44017
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5686979293823242,
      "learning_rate": 0.0005475677240454128,
      "loss": 2.8629,
      "step": 44018
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6114026308059692,
      "learning_rate": 0.0005475654136604752,
      "loss": 3.1882,
      "step": 44019
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.786007046699524,
      "learning_rate": 0.0005475631032295106,
      "loss": 3.0694,
      "step": 44020
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.652878999710083,
      "learning_rate": 0.0005475607927525193,
      "loss": 3.3093,
      "step": 44021
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5864590406417847,
      "learning_rate": 0.0005475584822295017,
      "loss": 3.0649,
      "step": 44022
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.587827444076538,
      "learning_rate": 0.0005475561716604585,
      "loss": 3.045,
      "step": 44023
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8143038749694824,
      "learning_rate": 0.0005475538610453897,
      "loss": 3.1379,
      "step": 44024
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3583950996398926,
      "learning_rate": 0.0005475515503842963,
      "loss": 3.2714,
      "step": 44025
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7660706043243408,
      "learning_rate": 0.000547549239677178,
      "loss": 3.1787,
      "step": 44026
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4126524925231934,
      "learning_rate": 0.0005475469289240359,
      "loss": 3.0807,
      "step": 44027
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4724233150482178,
      "learning_rate": 0.0005475446181248699,
      "loss": 3.2249,
      "step": 44028
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9684003591537476,
      "learning_rate": 0.0005475423072796809,
      "loss": 3.1398,
      "step": 44029
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4968078136444092,
      "learning_rate": 0.000547539996388469,
      "loss": 3.2465,
      "step": 44030
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6853394508361816,
      "learning_rate": 0.0005475376854512349,
      "loss": 2.8658,
      "step": 44031
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.1927733421325684,
      "learning_rate": 0.0005475353744679787,
      "loss": 3.133,
      "step": 44032
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5521451234817505,
      "learning_rate": 0.0005475330634387011,
      "loss": 3.1964,
      "step": 44033
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4558939933776855,
      "learning_rate": 0.0005475307523634024,
      "loss": 3.1403,
      "step": 44034
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3702558279037476,
      "learning_rate": 0.000547528441242083,
      "loss": 3.1561,
      "step": 44035
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3595385551452637,
      "learning_rate": 0.0005475261300747435,
      "loss": 3.056,
      "step": 44036
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4594777822494507,
      "learning_rate": 0.0005475238188613841,
      "loss": 2.9478,
      "step": 44037
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7913672924041748,
      "learning_rate": 0.0005475215076020055,
      "loss": 2.7791,
      "step": 44038
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.089143753051758,
      "learning_rate": 0.0005475191962966079,
      "loss": 3.0611,
      "step": 44039
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5980733633041382,
      "learning_rate": 0.0005475168849451918,
      "loss": 3.1276,
      "step": 44040
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.816096544265747,
      "learning_rate": 0.0005475145735477576,
      "loss": 3.1497,
      "step": 44041
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.862853765487671,
      "learning_rate": 0.0005475122621043059,
      "loss": 2.6542,
      "step": 44042
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4944583177566528,
      "learning_rate": 0.0005475099506148369,
      "loss": 3.0751,
      "step": 44043
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9116156101226807,
      "learning_rate": 0.0005475076390793511,
      "loss": 3.1536,
      "step": 44044
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7450790405273438,
      "learning_rate": 0.000547505327497849,
      "loss": 2.9076,
      "step": 44045
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9191248416900635,
      "learning_rate": 0.0005475030158703311,
      "loss": 3.0426,
      "step": 44046
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.07419490814209,
      "learning_rate": 0.0005475007041967976,
      "loss": 2.9996,
      "step": 44047
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.4696688652038574,
      "learning_rate": 0.0005474983924772489,
      "loss": 3.4276,
      "step": 44048
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5262916088104248,
      "learning_rate": 0.0005474960807116859,
      "loss": 2.8535,
      "step": 44049
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.627520203590393,
      "learning_rate": 0.0005474937689001084,
      "loss": 3.0153,
      "step": 44050
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.62285578250885,
      "learning_rate": 0.0005474914570425173,
      "loss": 3.0591,
      "step": 44051
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4078251123428345,
      "learning_rate": 0.0005474891451389128,
      "loss": 3.0916,
      "step": 44052
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.445712685585022,
      "learning_rate": 0.0005474868331892955,
      "loss": 2.9582,
      "step": 44053
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.75519859790802,
      "learning_rate": 0.0005474845211936656,
      "loss": 3.1018,
      "step": 44054
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6885701417922974,
      "learning_rate": 0.0005474822091520237,
      "loss": 3.2204,
      "step": 44055
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4997339248657227,
      "learning_rate": 0.0005474798970643703,
      "loss": 2.9793,
      "step": 44056
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7662632465362549,
      "learning_rate": 0.0005474775849307056,
      "loss": 3.1937,
      "step": 44057
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5950909852981567,
      "learning_rate": 0.0005474752727510302,
      "loss": 3.2383,
      "step": 44058
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3315626382827759,
      "learning_rate": 0.0005474729605253444,
      "loss": 3.1391,
      "step": 44059
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.253943681716919,
      "learning_rate": 0.0005474706482536487,
      "loss": 2.7862,
      "step": 44060
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.697810173034668,
      "learning_rate": 0.0005474683359359436,
      "loss": 3.0101,
      "step": 44061
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5531045198440552,
      "learning_rate": 0.0005474660235722294,
      "loss": 3.1005,
      "step": 44062
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.857786178588867,
      "learning_rate": 0.0005474637111625067,
      "loss": 3.2096,
      "step": 44063
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.3407740592956543,
      "learning_rate": 0.0005474613987067758,
      "loss": 3.0457,
      "step": 44064
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5970197916030884,
      "learning_rate": 0.0005474590862050371,
      "loss": 3.1876,
      "step": 44065
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4579312801361084,
      "learning_rate": 0.000547456773657291,
      "loss": 3.0526,
      "step": 44066
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9301230907440186,
      "learning_rate": 0.0005474544610635382,
      "loss": 3.2964,
      "step": 44067
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5909247398376465,
      "learning_rate": 0.0005474521484237788,
      "loss": 2.79,
      "step": 44068
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.761584758758545,
      "learning_rate": 0.0005474498357380135,
      "loss": 3.0193,
      "step": 44069
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.805534839630127,
      "learning_rate": 0.0005474475230062425,
      "loss": 3.2148,
      "step": 44070
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5241663455963135,
      "learning_rate": 0.0005474452102284664,
      "loss": 3.1052,
      "step": 44071
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4205498695373535,
      "learning_rate": 0.0005474428974046855,
      "loss": 3.1321,
      "step": 44072
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5110963582992554,
      "learning_rate": 0.0005474405845349004,
      "loss": 2.898,
      "step": 44073
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9829307794570923,
      "learning_rate": 0.0005474382716191113,
      "loss": 3.1002,
      "step": 44074
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.422044038772583,
      "learning_rate": 0.0005474359586573189,
      "loss": 2.9742,
      "step": 44075
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7152903079986572,
      "learning_rate": 0.0005474336456495234,
      "loss": 3.3686,
      "step": 44076
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8629852533340454,
      "learning_rate": 0.0005474313325957253,
      "loss": 2.8485,
      "step": 44077
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5002646446228027,
      "learning_rate": 0.000547429019495925,
      "loss": 3.1917,
      "step": 44078
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8640730381011963,
      "learning_rate": 0.0005474267063501231,
      "loss": 3.0374,
      "step": 44079
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4263075590133667,
      "learning_rate": 0.0005474243931583198,
      "loss": 2.992,
      "step": 44080
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3297090530395508,
      "learning_rate": 0.0005474220799205156,
      "loss": 3.068,
      "step": 44081
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8561820983886719,
      "learning_rate": 0.0005474197666367111,
      "loss": 2.9938,
      "step": 44082
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5012485980987549,
      "learning_rate": 0.0005474174533069066,
      "loss": 3.1349,
      "step": 44083
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4814351797103882,
      "learning_rate": 0.0005474151399311024,
      "loss": 2.9484,
      "step": 44084
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4326341152191162,
      "learning_rate": 0.0005474128265092991,
      "loss": 3.238,
      "step": 44085
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.46024751663208,
      "learning_rate": 0.0005474105130414971,
      "loss": 3.0647,
      "step": 44086
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6916090250015259,
      "learning_rate": 0.0005474081995276969,
      "loss": 3.0893,
      "step": 44087
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.35721492767334,
      "learning_rate": 0.0005474058859678986,
      "loss": 2.9891,
      "step": 44088
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.517855405807495,
      "learning_rate": 0.000547403572362103,
      "loss": 3.0724,
      "step": 44089
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5783673524856567,
      "learning_rate": 0.0005474012587103105,
      "loss": 3.2554,
      "step": 44090
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2566187381744385,
      "learning_rate": 0.0005473989450125214,
      "loss": 3.1354,
      "step": 44091
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.1652510166168213,
      "learning_rate": 0.0005473966312687362,
      "loss": 3.0836,
      "step": 44092
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.364808201789856,
      "learning_rate": 0.0005473943174789552,
      "loss": 3.3483,
      "step": 44093
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.872854232788086,
      "learning_rate": 0.0005473920036431789,
      "loss": 2.942,
      "step": 44094
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8409016132354736,
      "learning_rate": 0.0005473896897614078,
      "loss": 3.0172,
      "step": 44095
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7970952987670898,
      "learning_rate": 0.0005473873758336424,
      "loss": 2.9663,
      "step": 44096
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6003077030181885,
      "learning_rate": 0.0005473850618598828,
      "loss": 3.0385,
      "step": 44097
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.9129557609558105,
      "learning_rate": 0.0005473827478401299,
      "loss": 3.4397,
      "step": 44098
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.090338945388794,
      "learning_rate": 0.0005473804337743837,
      "loss": 3.2382,
      "step": 44099
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.047130584716797,
      "learning_rate": 0.0005473781196626449,
      "loss": 3.0452,
      "step": 44100
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.7520065307617188,
      "learning_rate": 0.0005473758055049138,
      "loss": 3.0074,
      "step": 44101
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.170271396636963,
      "learning_rate": 0.0005473734913011908,
      "loss": 3.1502,
      "step": 44102
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.3061680793762207,
      "learning_rate": 0.0005473711770514765,
      "loss": 2.907,
      "step": 44103
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3582525253295898,
      "learning_rate": 0.0005473688627557711,
      "loss": 3.0325,
      "step": 44104
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0126733779907227,
      "learning_rate": 0.0005473665484140753,
      "loss": 3.203,
      "step": 44105
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.742847204208374,
      "learning_rate": 0.0005473642340263894,
      "loss": 3.0379,
      "step": 44106
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9733264446258545,
      "learning_rate": 0.0005473619195927137,
      "loss": 3.0505,
      "step": 44107
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2616294622421265,
      "learning_rate": 0.0005473596051130488,
      "loss": 3.0656,
      "step": 44108
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8110514879226685,
      "learning_rate": 0.0005473572905873951,
      "loss": 3.225,
      "step": 44109
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.6674492359161377,
      "learning_rate": 0.000547354976015753,
      "loss": 3.0793,
      "step": 44110
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.682061195373535,
      "learning_rate": 0.0005473526613981229,
      "loss": 3.2685,
      "step": 44111
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3792423009872437,
      "learning_rate": 0.0005473503467345053,
      "loss": 2.9639,
      "step": 44112
    },
    {
      "epoch": 0.57,
      "grad_norm": 3.0442678928375244,
      "learning_rate": 0.0005473480320249007,
      "loss": 3.1724,
      "step": 44113
    },
    {
      "epoch": 0.57,
      "grad_norm": 4.635446071624756,
      "learning_rate": 0.0005473457172693093,
      "loss": 2.9369,
      "step": 44114
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6489617824554443,
      "learning_rate": 0.0005473434024677317,
      "loss": 3.1747,
      "step": 44115
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4081816673278809,
      "learning_rate": 0.0005473410876201683,
      "loss": 3.015,
      "step": 44116
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.544310450553894,
      "learning_rate": 0.0005473387727266195,
      "loss": 3.0352,
      "step": 44117
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.2197468280792236,
      "learning_rate": 0.0005473364577870858,
      "loss": 2.878,
      "step": 44118
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0046579837799072,
      "learning_rate": 0.0005473341428015676,
      "loss": 3.1036,
      "step": 44119
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8163299560546875,
      "learning_rate": 0.0005473318277700652,
      "loss": 3.2266,
      "step": 44120
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8348854780197144,
      "learning_rate": 0.0005473295126925793,
      "loss": 3.1695,
      "step": 44121
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6315914392471313,
      "learning_rate": 0.0005473271975691101,
      "loss": 3.0276,
      "step": 44122
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3891607522964478,
      "learning_rate": 0.0005473248823996581,
      "loss": 3.2938,
      "step": 44123
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6856353282928467,
      "learning_rate": 0.0005473225671842237,
      "loss": 3.1419,
      "step": 44124
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4279015064239502,
      "learning_rate": 0.0005473202519228074,
      "loss": 3.0096,
      "step": 44125
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6554985046386719,
      "learning_rate": 0.0005473179366154095,
      "loss": 2.9123,
      "step": 44126
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4620630741119385,
      "learning_rate": 0.0005473156212620307,
      "loss": 3.0805,
      "step": 44127
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6005631685256958,
      "learning_rate": 0.0005473133058626711,
      "loss": 3.1077,
      "step": 44128
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4964959621429443,
      "learning_rate": 0.0005473109904173314,
      "loss": 2.9444,
      "step": 44129
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.684241771697998,
      "learning_rate": 0.0005473086749260119,
      "loss": 3.2048,
      "step": 44130
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.897250771522522,
      "learning_rate": 0.000547306359388713,
      "loss": 3.0223,
      "step": 44131
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5177854299545288,
      "learning_rate": 0.0005473040438054353,
      "loss": 3.1643,
      "step": 44132
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7583613395690918,
      "learning_rate": 0.000547301728176179,
      "loss": 2.9305,
      "step": 44133
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.250791311264038,
      "learning_rate": 0.0005472994125009446,
      "loss": 2.8922,
      "step": 44134
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5371326208114624,
      "learning_rate": 0.0005472970967797327,
      "loss": 2.8505,
      "step": 44135
    },
    {
      "epoch": 0.57,
      "grad_norm": 5.529193878173828,
      "learning_rate": 0.0005472947810125435,
      "loss": 3.0909,
      "step": 44136
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5551666021347046,
      "learning_rate": 0.0005472924651993776,
      "loss": 3.1234,
      "step": 44137
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.30261492729187,
      "learning_rate": 0.0005472901493402354,
      "loss": 3.1228,
      "step": 44138
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5921735763549805,
      "learning_rate": 0.0005472878334351172,
      "loss": 2.841,
      "step": 44139
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9622719287872314,
      "learning_rate": 0.0005472855174840235,
      "loss": 3.0952,
      "step": 44140
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.9565517902374268,
      "learning_rate": 0.0005472832014869548,
      "loss": 2.9976,
      "step": 44141
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6661157608032227,
      "learning_rate": 0.0005472808854439116,
      "loss": 2.9688,
      "step": 44142
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.331221580505371,
      "learning_rate": 0.0005472785693548941,
      "loss": 3.2472,
      "step": 44143
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.919897198677063,
      "learning_rate": 0.0005472762532199028,
      "loss": 2.7667,
      "step": 44144
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.6967445611953735,
      "learning_rate": 0.0005472739370389381,
      "loss": 3.1164,
      "step": 44145
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3172749280929565,
      "learning_rate": 0.0005472716208120008,
      "loss": 2.9108,
      "step": 44146
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.738440990447998,
      "learning_rate": 0.0005472693045390909,
      "loss": 3.1209,
      "step": 44147
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.301780104637146,
      "learning_rate": 0.0005472669882202088,
      "loss": 3.1902,
      "step": 44148
    },
    {
      "epoch": 0.57,
      "grad_norm": 2.0949716567993164,
      "learning_rate": 0.0005472646718553553,
      "loss": 2.9511,
      "step": 44149
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.3878380060195923,
      "learning_rate": 0.0005472623554445305,
      "loss": 2.9161,
      "step": 44150
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7136735916137695,
      "learning_rate": 0.0005472600389877351,
      "loss": 2.9692,
      "step": 44151
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7143701314926147,
      "learning_rate": 0.0005472577224849693,
      "loss": 3.1923,
      "step": 44152
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2329357862472534,
      "learning_rate": 0.0005472554059362337,
      "loss": 3.2197,
      "step": 44153
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.4548966884613037,
      "learning_rate": 0.0005472530893415286,
      "loss": 3.1043,
      "step": 44154
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.2987239360809326,
      "learning_rate": 0.0005472507727008545,
      "loss": 3.2717,
      "step": 44155
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5002034902572632,
      "learning_rate": 0.0005472484560142117,
      "loss": 3.0646,
      "step": 44156
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.5237783193588257,
      "learning_rate": 0.0005472461392816008,
      "loss": 2.9666,
      "step": 44157
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.8974108695983887,
      "learning_rate": 0.0005472438225030223,
      "loss": 2.9314,
      "step": 44158
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.7053173780441284,
      "learning_rate": 0.0005472415056784765,
      "loss": 2.9258,
      "step": 44159
    },
    {
      "epoch": 0.57,
      "grad_norm": 1.832523226737976,
      "learning_rate": 0.0005472391888079637,
      "loss": 2.8125,
      "step": 44160
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4780213832855225,
      "learning_rate": 0.0005472368718914845,
      "loss": 2.9604,
      "step": 44161
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6119813919067383,
      "learning_rate": 0.0005472345549290394,
      "loss": 3.0103,
      "step": 44162
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.1912894248962402,
      "learning_rate": 0.0005472322379206287,
      "loss": 2.8341,
      "step": 44163
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4425691366195679,
      "learning_rate": 0.0005472299208662527,
      "loss": 3.1385,
      "step": 44164
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4891471862792969,
      "learning_rate": 0.0005472276037659121,
      "loss": 3.0414,
      "step": 44165
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.615406036376953,
      "learning_rate": 0.0005472252866196072,
      "loss": 2.7148,
      "step": 44166
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4755678176879883,
      "learning_rate": 0.0005472229694273385,
      "loss": 2.8075,
      "step": 44167
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4658657312393188,
      "learning_rate": 0.0005472206521891064,
      "loss": 3.0831,
      "step": 44168
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.514978051185608,
      "learning_rate": 0.0005472183349049113,
      "loss": 3.1288,
      "step": 44169
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3771377801895142,
      "learning_rate": 0.0005472160175747536,
      "loss": 3.139,
      "step": 44170
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.628197193145752,
      "learning_rate": 0.0005472137001986338,
      "loss": 3.1495,
      "step": 44171
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5462726354599,
      "learning_rate": 0.0005472113827765523,
      "loss": 3.2852,
      "step": 44172
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.3226635456085205,
      "learning_rate": 0.0005472090653085095,
      "loss": 3.2083,
      "step": 44173
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5438755750656128,
      "learning_rate": 0.000547206747794506,
      "loss": 2.7958,
      "step": 44174
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.482253909111023,
      "learning_rate": 0.0005472044302345419,
      "loss": 3.171,
      "step": 44175
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.148179292678833,
      "learning_rate": 0.000547202112628618,
      "loss": 3.0114,
      "step": 44176
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2071006298065186,
      "learning_rate": 0.0005471997949767345,
      "loss": 3.2462,
      "step": 44177
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6781327724456787,
      "learning_rate": 0.0005471974772788919,
      "loss": 2.9271,
      "step": 44178
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.014390230178833,
      "learning_rate": 0.0005471951595350906,
      "loss": 3.0986,
      "step": 44179
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4314289093017578,
      "learning_rate": 0.000547192841745331,
      "loss": 3.1462,
      "step": 44180
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6686216592788696,
      "learning_rate": 0.0005471905239096137,
      "loss": 3.0634,
      "step": 44181
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8312398195266724,
      "learning_rate": 0.000547188206027939,
      "loss": 2.8435,
      "step": 44182
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.7366085052490234,
      "learning_rate": 0.0005471858881003073,
      "loss": 3.0348,
      "step": 44183
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5465480089187622,
      "learning_rate": 0.0005471835701267191,
      "loss": 3.0197,
      "step": 44184
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2855706214904785,
      "learning_rate": 0.0005471812521071748,
      "loss": 2.8174,
      "step": 44185
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6111230850219727,
      "learning_rate": 0.0005471789340416748,
      "loss": 3.0035,
      "step": 44186
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.314378023147583,
      "learning_rate": 0.0005471766159302196,
      "loss": 3.1075,
      "step": 44187
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7739629745483398,
      "learning_rate": 0.0005471742977728097,
      "loss": 3.1399,
      "step": 44188
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.387750506401062,
      "learning_rate": 0.0005471719795694454,
      "loss": 2.8519,
      "step": 44189
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.090803623199463,
      "learning_rate": 0.000547169661320127,
      "loss": 2.9245,
      "step": 44190
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5820245742797852,
      "learning_rate": 0.0005471673430248553,
      "loss": 3.1045,
      "step": 44191
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.556136131286621,
      "learning_rate": 0.0005471650246836304,
      "loss": 3.0214,
      "step": 44192
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3142496347427368,
      "learning_rate": 0.0005471627062964529,
      "loss": 3.3193,
      "step": 44193
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4345139265060425,
      "learning_rate": 0.0005471603878633232,
      "loss": 3.2221,
      "step": 44194
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4851566553115845,
      "learning_rate": 0.0005471580693842416,
      "loss": 2.8875,
      "step": 44195
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5743838548660278,
      "learning_rate": 0.0005471557508592087,
      "loss": 3.2738,
      "step": 44196
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.823805332183838,
      "learning_rate": 0.000547153432288225,
      "loss": 3.0663,
      "step": 44197
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8165943622589111,
      "learning_rate": 0.0005471511136712906,
      "loss": 2.9013,
      "step": 44198
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8102502822875977,
      "learning_rate": 0.0005471487950084065,
      "loss": 2.8399,
      "step": 44199
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6572972536087036,
      "learning_rate": 0.0005471464762995724,
      "loss": 3.2318,
      "step": 44200
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6769177913665771,
      "learning_rate": 0.0005471441575447893,
      "loss": 2.9375,
      "step": 44201
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4809907674789429,
      "learning_rate": 0.0005471418387440573,
      "loss": 3.1286,
      "step": 44202
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9607880115509033,
      "learning_rate": 0.0005471395198973771,
      "loss": 3.16,
      "step": 44203
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6138793230056763,
      "learning_rate": 0.000547137201004749,
      "loss": 3.0379,
      "step": 44204
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5246175527572632,
      "learning_rate": 0.0005471348820661733,
      "loss": 3.0954,
      "step": 44205
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8092079162597656,
      "learning_rate": 0.0005471325630816507,
      "loss": 3.1819,
      "step": 44206
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.010215997695923,
      "learning_rate": 0.0005471302440511814,
      "loss": 2.9714,
      "step": 44207
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3552218675613403,
      "learning_rate": 0.0005471279249747661,
      "loss": 3.1841,
      "step": 44208
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9126601219177246,
      "learning_rate": 0.0005471256058524049,
      "loss": 3.0899,
      "step": 44209
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.531485915184021,
      "learning_rate": 0.0005471232866840983,
      "loss": 2.8711,
      "step": 44210
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4409377574920654,
      "learning_rate": 0.000547120967469847,
      "loss": 3.0625,
      "step": 44211
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.563071846961975,
      "learning_rate": 0.0005471186482096511,
      "loss": 3.1358,
      "step": 44212
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9842866659164429,
      "learning_rate": 0.0005471163289035112,
      "loss": 3.0088,
      "step": 44213
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4071143865585327,
      "learning_rate": 0.0005471140095514278,
      "loss": 3.163,
      "step": 44214
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.783433437347412,
      "learning_rate": 0.0005471116901534012,
      "loss": 2.8582,
      "step": 44215
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6162035465240479,
      "learning_rate": 0.0005471093707094318,
      "loss": 3.2101,
      "step": 44216
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5799133777618408,
      "learning_rate": 0.0005471070512195202,
      "loss": 3.0277,
      "step": 44217
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.690186619758606,
      "learning_rate": 0.0005471047316836666,
      "loss": 2.8532,
      "step": 44218
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.1982617378234863,
      "learning_rate": 0.0005471024121018718,
      "loss": 2.8412,
      "step": 44219
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7164217233657837,
      "learning_rate": 0.0005471000924741359,
      "loss": 3.3297,
      "step": 44220
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8407634496688843,
      "learning_rate": 0.0005470977728004593,
      "loss": 2.8788,
      "step": 44221
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.558699369430542,
      "learning_rate": 0.0005470954530808426,
      "loss": 3.1404,
      "step": 44222
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.772615671157837,
      "learning_rate": 0.0005470931333152862,
      "loss": 3.1594,
      "step": 44223
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7552028894424438,
      "learning_rate": 0.0005470908135037907,
      "loss": 3.0551,
      "step": 44224
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6886892318725586,
      "learning_rate": 0.0005470884936463562,
      "loss": 3.2821,
      "step": 44225
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.34554922580719,
      "learning_rate": 0.0005470861737429833,
      "loss": 3.1114,
      "step": 44226
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7917736768722534,
      "learning_rate": 0.0005470838537936723,
      "loss": 3.3891,
      "step": 44227
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0090255737304688,
      "learning_rate": 0.0005470815337984239,
      "loss": 3.0557,
      "step": 44228
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3832851648330688,
      "learning_rate": 0.0005470792137572384,
      "loss": 3.1536,
      "step": 44229
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7101727724075317,
      "learning_rate": 0.0005470768936701162,
      "loss": 2.949,
      "step": 44230
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9096304178237915,
      "learning_rate": 0.0005470745735370576,
      "loss": 2.969,
      "step": 44231
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.6150991916656494,
      "learning_rate": 0.0005470722533580634,
      "loss": 3.0506,
      "step": 44232
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3540799617767334,
      "learning_rate": 0.0005470699331331337,
      "loss": 2.9687,
      "step": 44233
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0880117416381836,
      "learning_rate": 0.0005470676128622689,
      "loss": 2.9875,
      "step": 44234
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.90860116481781,
      "learning_rate": 0.0005470652925454696,
      "loss": 3.2052,
      "step": 44235
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4786144495010376,
      "learning_rate": 0.0005470629721827364,
      "loss": 3.0463,
      "step": 44236
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.045356273651123,
      "learning_rate": 0.0005470606517740693,
      "loss": 2.9628,
      "step": 44237
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4126393795013428,
      "learning_rate": 0.0005470583313194692,
      "loss": 2.9845,
      "step": 44238
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5431714057922363,
      "learning_rate": 0.0005470560108189361,
      "loss": 3.025,
      "step": 44239
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3483964204788208,
      "learning_rate": 0.0005470536902724706,
      "loss": 3.0479,
      "step": 44240
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.07686448097229,
      "learning_rate": 0.0005470513696800733,
      "loss": 2.8072,
      "step": 44241
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.7501726150512695,
      "learning_rate": 0.0005470490490417445,
      "loss": 3.1869,
      "step": 44242
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6818844079971313,
      "learning_rate": 0.0005470467283574845,
      "loss": 3.2315,
      "step": 44243
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.277970552444458,
      "learning_rate": 0.0005470444076272939,
      "loss": 3.2052,
      "step": 44244
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.932831048965454,
      "learning_rate": 0.000547042086851173,
      "loss": 2.9461,
      "step": 44245
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9948039054870605,
      "learning_rate": 0.0005470397660291224,
      "loss": 2.8902,
      "step": 44246
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3510569334030151,
      "learning_rate": 0.0005470374451611425,
      "loss": 3.0523,
      "step": 44247
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6572473049163818,
      "learning_rate": 0.0005470351242472336,
      "loss": 2.9851,
      "step": 44248
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.1195342540740967,
      "learning_rate": 0.0005470328032873962,
      "loss": 2.9547,
      "step": 44249
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8310015201568604,
      "learning_rate": 0.0005470304822816308,
      "loss": 2.9803,
      "step": 44250
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.1119863986968994,
      "learning_rate": 0.0005470281612299377,
      "loss": 2.9066,
      "step": 44251
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.232771396636963,
      "learning_rate": 0.0005470258401323174,
      "loss": 2.6169,
      "step": 44252
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.5121243000030518,
      "learning_rate": 0.0005470235189887704,
      "loss": 2.9326,
      "step": 44253
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9172420501708984,
      "learning_rate": 0.000547021197799297,
      "loss": 2.9972,
      "step": 44254
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.60500967502594,
      "learning_rate": 0.0005470188765638977,
      "loss": 2.7869,
      "step": 44255
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.246924877166748,
      "learning_rate": 0.000547016555282573,
      "loss": 3.0975,
      "step": 44256
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0174999237060547,
      "learning_rate": 0.0005470142339553232,
      "loss": 3.1264,
      "step": 44257
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5802562236785889,
      "learning_rate": 0.0005470119125821488,
      "loss": 3.1158,
      "step": 44258
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2888472080230713,
      "learning_rate": 0.0005470095911630503,
      "loss": 2.9264,
      "step": 44259
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.16656231880188,
      "learning_rate": 0.0005470072696980279,
      "loss": 3.0029,
      "step": 44260
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8033456802368164,
      "learning_rate": 0.0005470049481870823,
      "loss": 3.2341,
      "step": 44261
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0103907585144043,
      "learning_rate": 0.0005470026266302137,
      "loss": 2.9751,
      "step": 44262
    },
    {
      "epoch": 0.58,
      "grad_norm": 4.187174320220947,
      "learning_rate": 0.0005470003050274228,
      "loss": 2.9552,
      "step": 44263
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.4183290004730225,
      "learning_rate": 0.0005469979833787098,
      "loss": 3.0456,
      "step": 44264
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9106488227844238,
      "learning_rate": 0.0005469956616840751,
      "loss": 2.9728,
      "step": 44265
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.38089656829834,
      "learning_rate": 0.0005469933399435194,
      "loss": 2.8772,
      "step": 44266
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.033217191696167,
      "learning_rate": 0.000546991018157043,
      "loss": 2.9499,
      "step": 44267
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5370371341705322,
      "learning_rate": 0.0005469886963246462,
      "loss": 2.9781,
      "step": 44268
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7128901481628418,
      "learning_rate": 0.0005469863744463297,
      "loss": 3.2241,
      "step": 44269
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5073630809783936,
      "learning_rate": 0.0005469840525220936,
      "loss": 3.1021,
      "step": 44270
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.396859884262085,
      "learning_rate": 0.0005469817305519385,
      "loss": 3.1371,
      "step": 44271
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3903526067733765,
      "learning_rate": 0.0005469794085358649,
      "loss": 3.0799,
      "step": 44272
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8646520376205444,
      "learning_rate": 0.0005469770864738731,
      "loss": 2.8654,
      "step": 44273
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6445531845092773,
      "learning_rate": 0.0005469747643659637,
      "loss": 2.9037,
      "step": 44274
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4622939825057983,
      "learning_rate": 0.0005469724422121371,
      "loss": 3.1062,
      "step": 44275
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4935715198516846,
      "learning_rate": 0.0005469701200123935,
      "loss": 2.9909,
      "step": 44276
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5680580139160156,
      "learning_rate": 0.0005469677977667335,
      "loss": 3.2097,
      "step": 44277
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.385289192199707,
      "learning_rate": 0.0005469654754751576,
      "loss": 3.3378,
      "step": 44278
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.73444402217865,
      "learning_rate": 0.0005469631531376661,
      "loss": 3.2454,
      "step": 44279
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.42218017578125,
      "learning_rate": 0.0005469608307542595,
      "loss": 3.0143,
      "step": 44280
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.655158281326294,
      "learning_rate": 0.0005469585083249383,
      "loss": 3.1428,
      "step": 44281
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2721543312072754,
      "learning_rate": 0.0005469561858497027,
      "loss": 2.9701,
      "step": 44282
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3860936164855957,
      "learning_rate": 0.0005469538633285534,
      "loss": 2.9,
      "step": 44283
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.700976014137268,
      "learning_rate": 0.0005469515407614907,
      "loss": 2.9288,
      "step": 44284
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7957733869552612,
      "learning_rate": 0.000546949218148515,
      "loss": 3.0042,
      "step": 44285
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8866955041885376,
      "learning_rate": 0.0005469468954896269,
      "loss": 3.2847,
      "step": 44286
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5302016735076904,
      "learning_rate": 0.0005469445727848266,
      "loss": 2.8594,
      "step": 44287
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6532647609710693,
      "learning_rate": 0.0005469422500341145,
      "loss": 2.9985,
      "step": 44288
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.331005096435547,
      "learning_rate": 0.0005469399272374914,
      "loss": 3.0122,
      "step": 44289
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6843342781066895,
      "learning_rate": 0.0005469376043949574,
      "loss": 3.0553,
      "step": 44290
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7229610681533813,
      "learning_rate": 0.0005469352815065132,
      "loss": 3.0536,
      "step": 44291
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.3382458686828613,
      "learning_rate": 0.0005469329585721589,
      "loss": 3.2647,
      "step": 44292
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3877027034759521,
      "learning_rate": 0.0005469306355918951,
      "loss": 3.0728,
      "step": 44293
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3109904527664185,
      "learning_rate": 0.0005469283125657223,
      "loss": 3.1185,
      "step": 44294
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.605381965637207,
      "learning_rate": 0.0005469259894936409,
      "loss": 3.0174,
      "step": 44295
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.013011932373047,
      "learning_rate": 0.0005469236663756513,
      "loss": 2.7327,
      "step": 44296
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7773617506027222,
      "learning_rate": 0.0005469213432117538,
      "loss": 2.9398,
      "step": 44297
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.109416961669922,
      "learning_rate": 0.000546919020001949,
      "loss": 3.1138,
      "step": 44298
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2649965286254883,
      "learning_rate": 0.0005469166967462375,
      "loss": 2.9261,
      "step": 44299
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4787136316299438,
      "learning_rate": 0.0005469143734446192,
      "loss": 3.3306,
      "step": 44300
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8672319650650024,
      "learning_rate": 0.0005469120500970951,
      "loss": 3.0023,
      "step": 44301
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0857784748077393,
      "learning_rate": 0.0005469097267036653,
      "loss": 2.9804,
      "step": 44302
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.534797191619873,
      "learning_rate": 0.0005469074032643304,
      "loss": 3.0336,
      "step": 44303
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.607832908630371,
      "learning_rate": 0.0005469050797790905,
      "loss": 3.0566,
      "step": 44304
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5558805465698242,
      "learning_rate": 0.0005469027562479465,
      "loss": 2.9397,
      "step": 44305
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.394630193710327,
      "learning_rate": 0.0005469004326708987,
      "loss": 2.9751,
      "step": 44306
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.47451913356781,
      "learning_rate": 0.0005468981090479473,
      "loss": 2.8351,
      "step": 44307
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7894763946533203,
      "learning_rate": 0.0005468957853790929,
      "loss": 3.0939,
      "step": 44308
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6596930027008057,
      "learning_rate": 0.0005468934616643359,
      "loss": 3.2025,
      "step": 44309
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.602437973022461,
      "learning_rate": 0.0005468911379036767,
      "loss": 3.0905,
      "step": 44310
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4626798629760742,
      "learning_rate": 0.0005468888140971158,
      "loss": 2.8287,
      "step": 44311
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4787095785140991,
      "learning_rate": 0.0005468864902446537,
      "loss": 3.3072,
      "step": 44312
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4912663698196411,
      "learning_rate": 0.0005468841663462907,
      "loss": 2.7651,
      "step": 44313
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.1461474895477295,
      "learning_rate": 0.0005468818424020273,
      "loss": 3.0156,
      "step": 44314
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6859523057937622,
      "learning_rate": 0.0005468795184118638,
      "loss": 3.1677,
      "step": 44315
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4868309497833252,
      "learning_rate": 0.0005468771943758008,
      "loss": 3.021,
      "step": 44316
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.795316219329834,
      "learning_rate": 0.0005468748702938388,
      "loss": 3.0628,
      "step": 44317
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.452923059463501,
      "learning_rate": 0.0005468725461659779,
      "loss": 3.048,
      "step": 44318
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2340261936187744,
      "learning_rate": 0.0005468702219922189,
      "loss": 3.0199,
      "step": 44319
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4146919250488281,
      "learning_rate": 0.0005468678977725618,
      "loss": 3.0332,
      "step": 44320
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4413611888885498,
      "learning_rate": 0.0005468655735070076,
      "loss": 3.045,
      "step": 44321
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4404700994491577,
      "learning_rate": 0.0005468632491955563,
      "loss": 3.0616,
      "step": 44322
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.1508753299713135,
      "learning_rate": 0.0005468609248382085,
      "loss": 3.1398,
      "step": 44323
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6259633302688599,
      "learning_rate": 0.0005468586004349644,
      "loss": 3.0885,
      "step": 44324
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2948172092437744,
      "learning_rate": 0.0005468562759858249,
      "loss": 3.0946,
      "step": 44325
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6793164014816284,
      "learning_rate": 0.00054685395149079,
      "loss": 3.063,
      "step": 44326
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6420210599899292,
      "learning_rate": 0.0005468516269498604,
      "loss": 3.2127,
      "step": 44327
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.632178544998169,
      "learning_rate": 0.0005468493023630364,
      "loss": 3.1568,
      "step": 44328
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.9400594234466553,
      "learning_rate": 0.0005468469777303184,
      "loss": 2.8884,
      "step": 44329
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5767167806625366,
      "learning_rate": 0.0005468446530517068,
      "loss": 3.1007,
      "step": 44330
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9870481491088867,
      "learning_rate": 0.0005468423283272023,
      "loss": 3.0453,
      "step": 44331
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3595620393753052,
      "learning_rate": 0.0005468400035568051,
      "loss": 3.0837,
      "step": 44332
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4160521030426025,
      "learning_rate": 0.0005468376787405156,
      "loss": 2.9474,
      "step": 44333
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.4868645668029785,
      "learning_rate": 0.0005468353538783344,
      "loss": 3.1634,
      "step": 44334
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.387138843536377,
      "learning_rate": 0.0005468330289702618,
      "loss": 3.1396,
      "step": 44335
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8517714738845825,
      "learning_rate": 0.0005468307040162984,
      "loss": 3.2069,
      "step": 44336
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9952212572097778,
      "learning_rate": 0.0005468283790164444,
      "loss": 2.9792,
      "step": 44337
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5648213624954224,
      "learning_rate": 0.0005468260539707003,
      "loss": 2.9439,
      "step": 44338
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9985523223876953,
      "learning_rate": 0.0005468237288790667,
      "loss": 2.9549,
      "step": 44339
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.617355227470398,
      "learning_rate": 0.0005468214037415437,
      "loss": 2.9366,
      "step": 44340
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4417115449905396,
      "learning_rate": 0.0005468190785581323,
      "loss": 3.0669,
      "step": 44341
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.35286283493042,
      "learning_rate": 0.0005468167533288322,
      "loss": 3.0846,
      "step": 44342
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4375640153884888,
      "learning_rate": 0.0005468144280536445,
      "loss": 3.1036,
      "step": 44343
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3821686506271362,
      "learning_rate": 0.0005468121027325691,
      "loss": 2.9252,
      "step": 44344
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.3633921146392822,
      "learning_rate": 0.0005468097773656069,
      "loss": 2.7736,
      "step": 44345
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5201822519302368,
      "learning_rate": 0.0005468074519527578,
      "loss": 3.0324,
      "step": 44346
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4068063497543335,
      "learning_rate": 0.0005468051264940229,
      "loss": 2.915,
      "step": 44347
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.6255245208740234,
      "learning_rate": 0.000546802800989402,
      "loss": 3.066,
      "step": 44348
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.3272881507873535,
      "learning_rate": 0.0005468004754388959,
      "loss": 2.9892,
      "step": 44349
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.423259973526001,
      "learning_rate": 0.000546798149842505,
      "loss": 3.1509,
      "step": 44350
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7302122116088867,
      "learning_rate": 0.0005467958242002295,
      "loss": 2.9366,
      "step": 44351
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6993623971939087,
      "learning_rate": 0.00054679349851207,
      "loss": 3.1007,
      "step": 44352
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7801626920700073,
      "learning_rate": 0.0005467911727780272,
      "loss": 3.0517,
      "step": 44353
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.32143235206604,
      "learning_rate": 0.000546788846998101,
      "loss": 3.0673,
      "step": 44354
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.008655309677124,
      "learning_rate": 0.0005467865211722922,
      "loss": 2.9236,
      "step": 44355
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.428853988647461,
      "learning_rate": 0.000546784195300601,
      "loss": 2.9423,
      "step": 44356
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.560388445854187,
      "learning_rate": 0.0005467818693830281,
      "loss": 2.8937,
      "step": 44357
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5456074476242065,
      "learning_rate": 0.0005467795434195738,
      "loss": 3.3487,
      "step": 44358
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5667427778244019,
      "learning_rate": 0.0005467772174102384,
      "loss": 3.2653,
      "step": 44359
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7957817316055298,
      "learning_rate": 0.0005467748913550225,
      "loss": 2.9746,
      "step": 44360
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9419187307357788,
      "learning_rate": 0.0005467725652539266,
      "loss": 3.0248,
      "step": 44361
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.390486240386963,
      "learning_rate": 0.000546770239106951,
      "loss": 3.0926,
      "step": 44362
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.232337236404419,
      "learning_rate": 0.0005467679129140961,
      "loss": 3.4814,
      "step": 44363
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5272856950759888,
      "learning_rate": 0.0005467655866753623,
      "loss": 3.2771,
      "step": 44364
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4593547582626343,
      "learning_rate": 0.0005467632603907502,
      "loss": 3.2453,
      "step": 44365
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.051684856414795,
      "learning_rate": 0.0005467609340602602,
      "loss": 3.2276,
      "step": 44366
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4167104959487915,
      "learning_rate": 0.0005467586076838926,
      "loss": 2.9533,
      "step": 44367
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5231612920761108,
      "learning_rate": 0.000546756281261648,
      "loss": 2.9199,
      "step": 44368
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0361039638519287,
      "learning_rate": 0.0005467539547935267,
      "loss": 2.8771,
      "step": 44369
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4069337844848633,
      "learning_rate": 0.0005467516282795293,
      "loss": 2.9917,
      "step": 44370
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6239359378814697,
      "learning_rate": 0.0005467493017196559,
      "loss": 3.0402,
      "step": 44371
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0505785942077637,
      "learning_rate": 0.0005467469751139072,
      "loss": 2.8762,
      "step": 44372
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5892651081085205,
      "learning_rate": 0.0005467446484622837,
      "loss": 2.9774,
      "step": 44373
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6828469038009644,
      "learning_rate": 0.0005467423217647856,
      "loss": 3.0827,
      "step": 44374
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8449950218200684,
      "learning_rate": 0.0005467399950214134,
      "loss": 2.9443,
      "step": 44375
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4684587717056274,
      "learning_rate": 0.0005467376682321678,
      "loss": 2.9483,
      "step": 44376
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3531242609024048,
      "learning_rate": 0.0005467353413970488,
      "loss": 3.1556,
      "step": 44377
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4808892011642456,
      "learning_rate": 0.0005467330145160572,
      "loss": 3.2013,
      "step": 44378
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7989124059677124,
      "learning_rate": 0.0005467306875891932,
      "loss": 2.8857,
      "step": 44379
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4723026752471924,
      "learning_rate": 0.0005467283606164573,
      "loss": 3.2257,
      "step": 44380
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4951924085617065,
      "learning_rate": 0.0005467260335978499,
      "loss": 2.9368,
      "step": 44381
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4733331203460693,
      "learning_rate": 0.0005467237065333716,
      "loss": 2.9382,
      "step": 44382
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4003862142562866,
      "learning_rate": 0.0005467213794230225,
      "loss": 2.9396,
      "step": 44383
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3944059610366821,
      "learning_rate": 0.0005467190522668034,
      "loss": 3.0966,
      "step": 44384
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.507005214691162,
      "learning_rate": 0.0005467167250647146,
      "loss": 3.1605,
      "step": 44385
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.521551489830017,
      "learning_rate": 0.0005467143978167564,
      "loss": 3.2148,
      "step": 44386
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5585345029830933,
      "learning_rate": 0.0005467120705229294,
      "loss": 3.1093,
      "step": 44387
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4252134561538696,
      "learning_rate": 0.0005467097431832339,
      "loss": 3.3811,
      "step": 44388
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4477368593215942,
      "learning_rate": 0.0005467074157976704,
      "loss": 3.2429,
      "step": 44389
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6230944395065308,
      "learning_rate": 0.0005467050883662394,
      "loss": 2.9769,
      "step": 44390
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9491780996322632,
      "learning_rate": 0.0005467027608889412,
      "loss": 3.0355,
      "step": 44391
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3823927640914917,
      "learning_rate": 0.0005467004333657763,
      "loss": 2.9959,
      "step": 44392
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6862543821334839,
      "learning_rate": 0.0005466981057967453,
      "loss": 3.1117,
      "step": 44393
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.456803798675537,
      "learning_rate": 0.0005466957781818483,
      "loss": 3.1988,
      "step": 44394
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7458655834197998,
      "learning_rate": 0.0005466934505210859,
      "loss": 2.9024,
      "step": 44395
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9082967042922974,
      "learning_rate": 0.0005466911228144587,
      "loss": 2.7109,
      "step": 44396
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7978078126907349,
      "learning_rate": 0.0005466887950619667,
      "loss": 2.9316,
      "step": 44397
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2810845375061035,
      "learning_rate": 0.0005466864672636107,
      "loss": 3.0658,
      "step": 44398
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8548280000686646,
      "learning_rate": 0.000546684139419391,
      "loss": 2.9104,
      "step": 44399
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.987199306488037,
      "learning_rate": 0.0005466818115293083,
      "loss": 3.0145,
      "step": 44400
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5672575235366821,
      "learning_rate": 0.0005466794835933625,
      "loss": 3.3169,
      "step": 44401
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7453055381774902,
      "learning_rate": 0.0005466771556115545,
      "loss": 3.0506,
      "step": 44402
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.473091721534729,
      "learning_rate": 0.0005466748275838845,
      "loss": 3.032,
      "step": 44403
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.551180124282837,
      "learning_rate": 0.000546672499510353,
      "loss": 2.9728,
      "step": 44404
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3167812824249268,
      "learning_rate": 0.0005466701713909604,
      "loss": 3.3669,
      "step": 44405
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6440423727035522,
      "learning_rate": 0.0005466678432257072,
      "loss": 2.9856,
      "step": 44406
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5444190502166748,
      "learning_rate": 0.0005466655150145938,
      "loss": 2.9614,
      "step": 44407
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6811732053756714,
      "learning_rate": 0.0005466631867576205,
      "loss": 3.0096,
      "step": 44408
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3855396509170532,
      "learning_rate": 0.000546660858454788,
      "loss": 3.1133,
      "step": 44409
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6723754405975342,
      "learning_rate": 0.0005466585301060966,
      "loss": 3.1248,
      "step": 44410
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5596073865890503,
      "learning_rate": 0.0005466562017115466,
      "loss": 3.0526,
      "step": 44411
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5469223260879517,
      "learning_rate": 0.0005466538732711387,
      "loss": 2.9999,
      "step": 44412
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3824769258499146,
      "learning_rate": 0.000546651544784873,
      "loss": 3.0136,
      "step": 44413
    },
    {
      "epoch": 0.58,
      "grad_norm": 3.3198838233947754,
      "learning_rate": 0.0005466492162527503,
      "loss": 3.114,
      "step": 44414
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.4278457164764404,
      "learning_rate": 0.0005466468876747707,
      "loss": 2.8711,
      "step": 44415
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5338735580444336,
      "learning_rate": 0.000546644559050935,
      "loss": 3.1886,
      "step": 44416
    },
    {
      "epoch": 0.58,
      "grad_norm": 3.1126418113708496,
      "learning_rate": 0.0005466422303812432,
      "loss": 3.0539,
      "step": 44417
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.172130823135376,
      "learning_rate": 0.000546639901665696,
      "loss": 3.0336,
      "step": 44418
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4951095581054688,
      "learning_rate": 0.000546637572904294,
      "loss": 3.0469,
      "step": 44419
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3705676794052124,
      "learning_rate": 0.0005466352440970371,
      "loss": 3.2295,
      "step": 44420
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7961885929107666,
      "learning_rate": 0.0005466329152439262,
      "loss": 3.0469,
      "step": 44421
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.496182680130005,
      "learning_rate": 0.0005466305863449617,
      "loss": 2.9616,
      "step": 44422
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.555906057357788,
      "learning_rate": 0.0005466282574001438,
      "loss": 2.8488,
      "step": 44423
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.312755584716797,
      "learning_rate": 0.0005466259284094731,
      "loss": 2.9055,
      "step": 44424
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.4745538234710693,
      "learning_rate": 0.0005466235993729499,
      "loss": 3.1947,
      "step": 44425
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.303187370300293,
      "learning_rate": 0.0005466212702905748,
      "loss": 3.0196,
      "step": 44426
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5307714939117432,
      "learning_rate": 0.0005466189411623482,
      "loss": 3.0788,
      "step": 44427
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5482470989227295,
      "learning_rate": 0.0005466166119882704,
      "loss": 3.1564,
      "step": 44428
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.969372272491455,
      "learning_rate": 0.0005466142827683419,
      "loss": 2.9672,
      "step": 44429
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.504378080368042,
      "learning_rate": 0.000546611953502563,
      "loss": 2.9715,
      "step": 44430
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4082694053649902,
      "learning_rate": 0.0005466096241909346,
      "loss": 2.9787,
      "step": 44431
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3999534845352173,
      "learning_rate": 0.0005466072948334566,
      "loss": 3.1147,
      "step": 44432
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3429920673370361,
      "learning_rate": 0.0005466049654301297,
      "loss": 3.2163,
      "step": 44433
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6250970363616943,
      "learning_rate": 0.0005466026359809543,
      "loss": 2.7432,
      "step": 44434
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5501708984375,
      "learning_rate": 0.0005466003064859309,
      "loss": 3.0156,
      "step": 44435
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7781691551208496,
      "learning_rate": 0.0005465979769450597,
      "loss": 3.0844,
      "step": 44436
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6124825477600098,
      "learning_rate": 0.0005465956473583414,
      "loss": 3.0936,
      "step": 44437
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.430702805519104,
      "learning_rate": 0.0005465933177257762,
      "loss": 2.871,
      "step": 44438
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.471286654472351,
      "learning_rate": 0.0005465909880473647,
      "loss": 3.0221,
      "step": 44439
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.623555302619934,
      "learning_rate": 0.0005465886583231073,
      "loss": 3.0478,
      "step": 44440
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4069935083389282,
      "learning_rate": 0.0005465863285530044,
      "loss": 2.8877,
      "step": 44441
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5673829317092896,
      "learning_rate": 0.0005465839987370565,
      "loss": 2.8498,
      "step": 44442
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5581982135772705,
      "learning_rate": 0.0005465816688752639,
      "loss": 3.1817,
      "step": 44443
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4827666282653809,
      "learning_rate": 0.0005465793389676271,
      "loss": 3.3096,
      "step": 44444
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3090513944625854,
      "learning_rate": 0.0005465770090141465,
      "loss": 3.0288,
      "step": 44445
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.254256010055542,
      "learning_rate": 0.0005465746790148228,
      "loss": 3.0356,
      "step": 44446
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2695518732070923,
      "learning_rate": 0.0005465723489696561,
      "loss": 3.0692,
      "step": 44447
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5398128032684326,
      "learning_rate": 0.0005465700188786468,
      "loss": 3.0569,
      "step": 44448
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5863287448883057,
      "learning_rate": 0.0005465676887417957,
      "loss": 3.1965,
      "step": 44449
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.3471100330352783,
      "learning_rate": 0.0005465653585591028,
      "loss": 3.054,
      "step": 44450
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5001157522201538,
      "learning_rate": 0.0005465630283305689,
      "loss": 2.8447,
      "step": 44451
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5410206317901611,
      "learning_rate": 0.0005465606980561943,
      "loss": 3.2895,
      "step": 44452
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6402329206466675,
      "learning_rate": 0.0005465583677359793,
      "loss": 3.2239,
      "step": 44453
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4958454370498657,
      "learning_rate": 0.0005465560373699246,
      "loss": 2.9349,
      "step": 44454
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4484095573425293,
      "learning_rate": 0.0005465537069580303,
      "loss": 2.9131,
      "step": 44455
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.01926851272583,
      "learning_rate": 0.000546551376500297,
      "loss": 3.1986,
      "step": 44456
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6257046461105347,
      "learning_rate": 0.0005465490459967252,
      "loss": 2.8872,
      "step": 44457
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6075447797775269,
      "learning_rate": 0.0005465467154473154,
      "loss": 3.0444,
      "step": 44458
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5056992769241333,
      "learning_rate": 0.0005465443848520678,
      "loss": 3.1247,
      "step": 44459
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8285882472991943,
      "learning_rate": 0.000546542054210983,
      "loss": 3.1075,
      "step": 44460
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6491131782531738,
      "learning_rate": 0.0005465397235240613,
      "loss": 3.0241,
      "step": 44461
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2020022869110107,
      "learning_rate": 0.0005465373927913033,
      "loss": 2.9532,
      "step": 44462
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.202986001968384,
      "learning_rate": 0.0005465350620127093,
      "loss": 3.0356,
      "step": 44463
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4357751607894897,
      "learning_rate": 0.0005465327311882797,
      "loss": 3.0546,
      "step": 44464
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3472357988357544,
      "learning_rate": 0.0005465304003180152,
      "loss": 3.3644,
      "step": 44465
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.714531421661377,
      "learning_rate": 0.0005465280694019159,
      "loss": 2.9888,
      "step": 44466
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.756367802619934,
      "learning_rate": 0.0005465257384399824,
      "loss": 2.6955,
      "step": 44467
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6592457294464111,
      "learning_rate": 0.0005465234074322152,
      "loss": 2.9488,
      "step": 44468
    },
    {
      "epoch": 0.58,
      "grad_norm": 4.905873775482178,
      "learning_rate": 0.0005465210763786145,
      "loss": 3.1398,
      "step": 44469
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.4434750080108643,
      "learning_rate": 0.0005465187452791809,
      "loss": 3.2337,
      "step": 44470
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4948116540908813,
      "learning_rate": 0.0005465164141339149,
      "loss": 3.2437,
      "step": 44471
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4264192581176758,
      "learning_rate": 0.0005465140829428168,
      "loss": 2.9176,
      "step": 44472
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6923857927322388,
      "learning_rate": 0.0005465117517058871,
      "loss": 3.1558,
      "step": 44473
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8006184101104736,
      "learning_rate": 0.0005465094204231263,
      "loss": 3.226,
      "step": 44474
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0524795055389404,
      "learning_rate": 0.0005465070890945346,
      "loss": 3.2441,
      "step": 44475
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.27709698677063,
      "learning_rate": 0.0005465047577201125,
      "loss": 2.9842,
      "step": 44476
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5171438455581665,
      "learning_rate": 0.0005465024262998608,
      "loss": 2.8987,
      "step": 44477
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4040409326553345,
      "learning_rate": 0.0005465000948337794,
      "loss": 2.8817,
      "step": 44478
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5537711381912231,
      "learning_rate": 0.000546497763321869,
      "loss": 3.0894,
      "step": 44479
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7764365673065186,
      "learning_rate": 0.0005464954317641301,
      "loss": 3.0328,
      "step": 44480
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8922823667526245,
      "learning_rate": 0.000546493100160563,
      "loss": 2.9676,
      "step": 44481
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7859946489334106,
      "learning_rate": 0.0005464907685111682,
      "loss": 2.9602,
      "step": 44482
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7921924591064453,
      "learning_rate": 0.0005464884368159461,
      "loss": 2.8501,
      "step": 44483
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.706628441810608,
      "learning_rate": 0.0005464861050748972,
      "loss": 2.9176,
      "step": 44484
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5732840299606323,
      "learning_rate": 0.0005464837732880218,
      "loss": 2.982,
      "step": 44485
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7920900583267212,
      "learning_rate": 0.0005464814414553205,
      "loss": 2.9065,
      "step": 44486
    },
    {
      "epoch": 0.58,
      "grad_norm": 3.0840554237365723,
      "learning_rate": 0.0005464791095767936,
      "loss": 3.146,
      "step": 44487
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.1087965965270996,
      "learning_rate": 0.0005464767776524416,
      "loss": 3.4233,
      "step": 44488
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5200262069702148,
      "learning_rate": 0.0005464744456822649,
      "loss": 3.2797,
      "step": 44489
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4511516094207764,
      "learning_rate": 0.0005464721136662639,
      "loss": 3.0753,
      "step": 44490
    },
    {
      "epoch": 0.58,
      "grad_norm": 3.446108818054199,
      "learning_rate": 0.0005464697816044391,
      "loss": 3.106,
      "step": 44491
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6835596561431885,
      "learning_rate": 0.0005464674494967909,
      "loss": 2.8942,
      "step": 44492
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7377455234527588,
      "learning_rate": 0.0005464651173433198,
      "loss": 3.0432,
      "step": 44493
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2211291790008545,
      "learning_rate": 0.0005464627851440262,
      "loss": 3.1689,
      "step": 44494
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.3390274047851562,
      "learning_rate": 0.0005464604528989105,
      "loss": 3.0612,
      "step": 44495
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.3321452140808105,
      "learning_rate": 0.0005464581206079731,
      "loss": 2.6937,
      "step": 44496
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.425696849822998,
      "learning_rate": 0.0005464557882712145,
      "loss": 2.7595,
      "step": 44497
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.257169246673584,
      "learning_rate": 0.0005464534558886351,
      "loss": 3.1576,
      "step": 44498
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0557949542999268,
      "learning_rate": 0.0005464511234602354,
      "loss": 3.017,
      "step": 44499
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7388718128204346,
      "learning_rate": 0.0005464487909860158,
      "loss": 2.9335,
      "step": 44500
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7293156385421753,
      "learning_rate": 0.0005464464584659768,
      "loss": 2.962,
      "step": 44501
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7178739309310913,
      "learning_rate": 0.0005464441259001186,
      "loss": 3.0194,
      "step": 44502
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6358259916305542,
      "learning_rate": 0.0005464417932884418,
      "loss": 3.28,
      "step": 44503
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6624877452850342,
      "learning_rate": 0.0005464394606309468,
      "loss": 3.1737,
      "step": 44504
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4376722574234009,
      "learning_rate": 0.0005464371279276341,
      "loss": 2.9267,
      "step": 44505
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6453522443771362,
      "learning_rate": 0.0005464347951785041,
      "loss": 3.1598,
      "step": 44506
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4090840816497803,
      "learning_rate": 0.0005464324623835572,
      "loss": 3.2016,
      "step": 44507
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5892810821533203,
      "learning_rate": 0.0005464301295427939,
      "loss": 3.2031,
      "step": 44508
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5567901134490967,
      "learning_rate": 0.0005464277966562145,
      "loss": 3.3206,
      "step": 44509
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.44124698638916,
      "learning_rate": 0.0005464254637238196,
      "loss": 3.0547,
      "step": 44510
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8371833562850952,
      "learning_rate": 0.0005464231307456095,
      "loss": 3.2385,
      "step": 44511
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.686285376548767,
      "learning_rate": 0.0005464207977215847,
      "loss": 3.1637,
      "step": 44512
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9500129222869873,
      "learning_rate": 0.0005464184646517456,
      "loss": 3.086,
      "step": 44513
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9037864208221436,
      "learning_rate": 0.0005464161315360927,
      "loss": 2.7899,
      "step": 44514
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5495682954788208,
      "learning_rate": 0.0005464137983746263,
      "loss": 2.8173,
      "step": 44515
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2097244262695312,
      "learning_rate": 0.0005464114651673471,
      "loss": 3.3264,
      "step": 44516
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8740156888961792,
      "learning_rate": 0.0005464091319142552,
      "loss": 3.2376,
      "step": 44517
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4129751920700073,
      "learning_rate": 0.0005464067986153513,
      "loss": 3.0048,
      "step": 44518
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.394094705581665,
      "learning_rate": 0.0005464044652706356,
      "loss": 3.0934,
      "step": 44519
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6969077587127686,
      "learning_rate": 0.0005464021318801088,
      "loss": 2.8691,
      "step": 44520
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.442570447921753,
      "learning_rate": 0.0005463997984437711,
      "loss": 3.1188,
      "step": 44521
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7083526849746704,
      "learning_rate": 0.000546397464961623,
      "loss": 2.8399,
      "step": 44522
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4147443771362305,
      "learning_rate": 0.000546395131433665,
      "loss": 3.2678,
      "step": 44523
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.39110267162323,
      "learning_rate": 0.0005463927978598975,
      "loss": 3.2299,
      "step": 44524
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.39443838596344,
      "learning_rate": 0.000546390464240321,
      "loss": 3.119,
      "step": 44525
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5619672536849976,
      "learning_rate": 0.0005463881305749358,
      "loss": 3.1901,
      "step": 44526
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6383476257324219,
      "learning_rate": 0.0005463857968637423,
      "loss": 2.8073,
      "step": 44527
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0249133110046387,
      "learning_rate": 0.0005463834631067412,
      "loss": 3.2697,
      "step": 44528
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.252673864364624,
      "learning_rate": 0.0005463811293039327,
      "loss": 2.8803,
      "step": 44529
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.7435755729675293,
      "learning_rate": 0.0005463787954553172,
      "loss": 3.1091,
      "step": 44530
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3038057088851929,
      "learning_rate": 0.0005463764615608953,
      "loss": 3.1596,
      "step": 44531
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9073537588119507,
      "learning_rate": 0.0005463741276206675,
      "loss": 3.0853,
      "step": 44532
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6220972537994385,
      "learning_rate": 0.0005463717936346338,
      "loss": 3.1496,
      "step": 44533
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4875885248184204,
      "learning_rate": 0.0005463694596027952,
      "loss": 2.8605,
      "step": 44534
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.6620748043060303,
      "learning_rate": 0.0005463671255251518,
      "loss": 2.9751,
      "step": 44535
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.4141454696655273,
      "learning_rate": 0.0005463647914017041,
      "loss": 3.2051,
      "step": 44536
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.138659715652466,
      "learning_rate": 0.0005463624572324524,
      "loss": 3.0882,
      "step": 44537
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0698447227478027,
      "learning_rate": 0.0005463601230173975,
      "loss": 3.1646,
      "step": 44538
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4734797477722168,
      "learning_rate": 0.0005463577887565394,
      "loss": 3.1775,
      "step": 44539
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3728511333465576,
      "learning_rate": 0.0005463554544498788,
      "loss": 3.1389,
      "step": 44540
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7978270053863525,
      "learning_rate": 0.0005463531200974161,
      "loss": 2.921,
      "step": 44541
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.626010775566101,
      "learning_rate": 0.0005463507856991517,
      "loss": 2.9615,
      "step": 44542
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8892831802368164,
      "learning_rate": 0.000546348451255086,
      "loss": 3.0158,
      "step": 44543
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6926801204681396,
      "learning_rate": 0.0005463461167652195,
      "loss": 2.8685,
      "step": 44544
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.451473355293274,
      "learning_rate": 0.0005463437822295526,
      "loss": 2.9673,
      "step": 44545
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.610545039176941,
      "learning_rate": 0.0005463414476480857,
      "loss": 3.3328,
      "step": 44546
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8142917156219482,
      "learning_rate": 0.0005463391130208193,
      "loss": 3.0263,
      "step": 44547
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5939229726791382,
      "learning_rate": 0.000546336778347754,
      "loss": 3.1239,
      "step": 44548
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3044662475585938,
      "learning_rate": 0.0005463344436288898,
      "loss": 3.0273,
      "step": 44549
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3963067531585693,
      "learning_rate": 0.0005463321088642275,
      "loss": 3.092,
      "step": 44550
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6532748937606812,
      "learning_rate": 0.0005463297740537673,
      "loss": 2.9637,
      "step": 44551
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3808858394622803,
      "learning_rate": 0.0005463274391975098,
      "loss": 3.0641,
      "step": 44552
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4503626823425293,
      "learning_rate": 0.0005463251042954554,
      "loss": 2.7893,
      "step": 44553
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5086373090744019,
      "learning_rate": 0.0005463227693476045,
      "loss": 3.1834,
      "step": 44554
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5257525444030762,
      "learning_rate": 0.0005463204343539576,
      "loss": 3.0414,
      "step": 44555
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3021084070205688,
      "learning_rate": 0.000546318099314515,
      "loss": 3.1174,
      "step": 44556
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8138107061386108,
      "learning_rate": 0.0005463157642292772,
      "loss": 3.2832,
      "step": 44557
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7189406156539917,
      "learning_rate": 0.0005463134290982448,
      "loss": 2.9766,
      "step": 44558
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9387502670288086,
      "learning_rate": 0.0005463110939214179,
      "loss": 2.9221,
      "step": 44559
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8852721452713013,
      "learning_rate": 0.0005463087586987972,
      "loss": 3.1789,
      "step": 44560
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3661909103393555,
      "learning_rate": 0.0005463064234303831,
      "loss": 2.9649,
      "step": 44561
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8359748125076294,
      "learning_rate": 0.0005463040881161759,
      "loss": 3.1502,
      "step": 44562
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8043102025985718,
      "learning_rate": 0.0005463017527561762,
      "loss": 3.0946,
      "step": 44563
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.528001308441162,
      "learning_rate": 0.0005462994173503843,
      "loss": 2.9347,
      "step": 44564
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7230960130691528,
      "learning_rate": 0.0005462970818988006,
      "loss": 3.2774,
      "step": 44565
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3693549633026123,
      "learning_rate": 0.0005462947464014258,
      "loss": 3.2034,
      "step": 44566
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4530837535858154,
      "learning_rate": 0.00054629241085826,
      "loss": 3.2609,
      "step": 44567
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9135105609893799,
      "learning_rate": 0.000546290075269304,
      "loss": 2.8393,
      "step": 44568
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0339999198913574,
      "learning_rate": 0.0005462877396345578,
      "loss": 3.0094,
      "step": 44569
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4674129486083984,
      "learning_rate": 0.0005462854039540222,
      "loss": 3.1327,
      "step": 44570
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.1459102630615234,
      "learning_rate": 0.0005462830682276975,
      "loss": 2.9686,
      "step": 44571
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8854135274887085,
      "learning_rate": 0.000546280732455584,
      "loss": 3.1158,
      "step": 44572
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4119811058044434,
      "learning_rate": 0.0005462783966376824,
      "loss": 3.1324,
      "step": 44573
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.288177967071533,
      "learning_rate": 0.000546276060773993,
      "loss": 3.057,
      "step": 44574
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4112651348114014,
      "learning_rate": 0.0005462737248645161,
      "loss": 3.0723,
      "step": 44575
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4556994438171387,
      "learning_rate": 0.0005462713889092523,
      "loss": 3.1151,
      "step": 44576
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4402151107788086,
      "learning_rate": 0.0005462690529082022,
      "loss": 2.9059,
      "step": 44577
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7938957214355469,
      "learning_rate": 0.0005462667168613657,
      "loss": 3.1522,
      "step": 44578
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.644850254058838,
      "learning_rate": 0.0005462643807687439,
      "loss": 2.9645,
      "step": 44579
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3837521076202393,
      "learning_rate": 0.0005462620446303367,
      "loss": 3.0845,
      "step": 44580
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7339177131652832,
      "learning_rate": 0.0005462597084461448,
      "loss": 3.1356,
      "step": 44581
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.3490495681762695,
      "learning_rate": 0.0005462573722161686,
      "loss": 2.7198,
      "step": 44582
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.1507039070129395,
      "learning_rate": 0.0005462550359404084,
      "loss": 2.9063,
      "step": 44583
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3167747259140015,
      "learning_rate": 0.0005462526996188648,
      "loss": 2.9672,
      "step": 44584
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6750807762145996,
      "learning_rate": 0.0005462503632515381,
      "loss": 3.2175,
      "step": 44585
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5850220918655396,
      "learning_rate": 0.000546248026838429,
      "loss": 2.8725,
      "step": 44586
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.8275930881500244,
      "learning_rate": 0.0005462456903795376,
      "loss": 3.1861,
      "step": 44587
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3706432580947876,
      "learning_rate": 0.0005462433538748645,
      "loss": 3.3028,
      "step": 44588
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.104581117630005,
      "learning_rate": 0.0005462410173244101,
      "loss": 2.9038,
      "step": 44589
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4125183820724487,
      "learning_rate": 0.0005462386807281749,
      "loss": 3.3196,
      "step": 44590
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5974717140197754,
      "learning_rate": 0.0005462363440861593,
      "loss": 2.8821,
      "step": 44591
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.6217427253723145,
      "learning_rate": 0.0005462340073983636,
      "loss": 2.9826,
      "step": 44592
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.430846929550171,
      "learning_rate": 0.0005462316706647883,
      "loss": 2.8789,
      "step": 44593
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.610779047012329,
      "learning_rate": 0.0005462293338854341,
      "loss": 3.0962,
      "step": 44594
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3146371841430664,
      "learning_rate": 0.0005462269970603011,
      "loss": 3.0073,
      "step": 44595
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7557182312011719,
      "learning_rate": 0.0005462246601893899,
      "loss": 2.8917,
      "step": 44596
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3081040382385254,
      "learning_rate": 0.0005462223232727009,
      "loss": 3.2484,
      "step": 44597
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.68172025680542,
      "learning_rate": 0.0005462199863102345,
      "loss": 2.9546,
      "step": 44598
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6137882471084595,
      "learning_rate": 0.0005462176493019911,
      "loss": 2.9822,
      "step": 44599
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4372706413269043,
      "learning_rate": 0.0005462153122479711,
      "loss": 3.2894,
      "step": 44600
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5730212926864624,
      "learning_rate": 0.0005462129751481752,
      "loss": 3.103,
      "step": 44601
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.882509708404541,
      "learning_rate": 0.0005462106380026036,
      "loss": 3.0123,
      "step": 44602
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7409863471984863,
      "learning_rate": 0.0005462083008112569,
      "loss": 2.8792,
      "step": 44603
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.795592188835144,
      "learning_rate": 0.0005462059635741353,
      "loss": 3.1475,
      "step": 44604
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7045613527297974,
      "learning_rate": 0.0005462036262912395,
      "loss": 3.0536,
      "step": 44605
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2620822191238403,
      "learning_rate": 0.0005462012889625696,
      "loss": 2.934,
      "step": 44606
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0939364433288574,
      "learning_rate": 0.0005461989515881263,
      "loss": 2.8576,
      "step": 44607
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7024388313293457,
      "learning_rate": 0.0005461966141679101,
      "loss": 2.9893,
      "step": 44608
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6365699768066406,
      "learning_rate": 0.0005461942767019212,
      "loss": 2.9281,
      "step": 44609
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6010544300079346,
      "learning_rate": 0.0005461919391901601,
      "loss": 3.3462,
      "step": 44610
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2554614543914795,
      "learning_rate": 0.0005461896016326273,
      "loss": 2.8709,
      "step": 44611
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.545636773109436,
      "learning_rate": 0.0005461872640293233,
      "loss": 3.148,
      "step": 44612
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.9444687366485596,
      "learning_rate": 0.0005461849263802482,
      "loss": 3.068,
      "step": 44613
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0579397678375244,
      "learning_rate": 0.0005461825886854029,
      "loss": 3.0448,
      "step": 44614
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.506575584411621,
      "learning_rate": 0.0005461802509447876,
      "loss": 3.035,
      "step": 44615
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9570116996765137,
      "learning_rate": 0.0005461779131584026,
      "loss": 3.16,
      "step": 44616
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.5564773082733154,
      "learning_rate": 0.0005461755753262486,
      "loss": 2.8977,
      "step": 44617
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6672669649124146,
      "learning_rate": 0.0005461732374483259,
      "loss": 2.9805,
      "step": 44618
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6165692806243896,
      "learning_rate": 0.0005461708995246349,
      "loss": 3.3117,
      "step": 44619
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.914757251739502,
      "learning_rate": 0.0005461685615551761,
      "loss": 2.7891,
      "step": 44620
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.994949460029602,
      "learning_rate": 0.00054616622353995,
      "loss": 2.9374,
      "step": 44621
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4941489696502686,
      "learning_rate": 0.0005461638854789569,
      "loss": 3.1334,
      "step": 44622
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.780240535736084,
      "learning_rate": 0.0005461615473721972,
      "loss": 3.158,
      "step": 44623
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3697848320007324,
      "learning_rate": 0.0005461592092196715,
      "loss": 3.192,
      "step": 44624
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.826959252357483,
      "learning_rate": 0.0005461568710213801,
      "loss": 2.9104,
      "step": 44625
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7326833009719849,
      "learning_rate": 0.0005461545327773235,
      "loss": 3.0192,
      "step": 44626
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.576715111732483,
      "learning_rate": 0.0005461521944875022,
      "loss": 3.0307,
      "step": 44627
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.516160488128662,
      "learning_rate": 0.0005461498561519166,
      "loss": 3.2073,
      "step": 44628
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.428619146347046,
      "learning_rate": 0.0005461475177705671,
      "loss": 3.1181,
      "step": 44629
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4982216358184814,
      "learning_rate": 0.000546145179343454,
      "loss": 3.1871,
      "step": 44630
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7049821615219116,
      "learning_rate": 0.0005461428408705779,
      "loss": 3.3064,
      "step": 44631
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.71483314037323,
      "learning_rate": 0.0005461405023519391,
      "loss": 3.079,
      "step": 44632
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3010677099227905,
      "learning_rate": 0.0005461381637875384,
      "loss": 3.2116,
      "step": 44633
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3692281246185303,
      "learning_rate": 0.0005461358251773758,
      "loss": 3.1329,
      "step": 44634
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.558233618736267,
      "learning_rate": 0.0005461334865214519,
      "loss": 3.0522,
      "step": 44635
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4772166013717651,
      "learning_rate": 0.0005461311478197671,
      "loss": 3.1838,
      "step": 44636
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.631284236907959,
      "learning_rate": 0.0005461288090723219,
      "loss": 3.0971,
      "step": 44637
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7045552730560303,
      "learning_rate": 0.0005461264702791168,
      "loss": 2.8395,
      "step": 44638
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5023847818374634,
      "learning_rate": 0.0005461241314401521,
      "loss": 3.05,
      "step": 44639
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8460966348648071,
      "learning_rate": 0.0005461217925554283,
      "loss": 2.7143,
      "step": 44640
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7652415037155151,
      "learning_rate": 0.0005461194536249457,
      "loss": 3.0834,
      "step": 44641
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.1856470108032227,
      "learning_rate": 0.000546117114648705,
      "loss": 3.1731,
      "step": 44642
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6909953355789185,
      "learning_rate": 0.0005461147756267063,
      "loss": 2.9945,
      "step": 44643
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3457965850830078,
      "learning_rate": 0.0005461124365589504,
      "loss": 3.4185,
      "step": 44644
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.006776809692383,
      "learning_rate": 0.0005461100974454374,
      "loss": 3.0481,
      "step": 44645
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.8505184650421143,
      "learning_rate": 0.000546107758286168,
      "loss": 3.0812,
      "step": 44646
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.559619426727295,
      "learning_rate": 0.0005461054190811425,
      "loss": 3.1688,
      "step": 44647
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.632280707359314,
      "learning_rate": 0.0005461030798303613,
      "loss": 2.989,
      "step": 44648
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.141415596008301,
      "learning_rate": 0.0005461007405338249,
      "loss": 3.2215,
      "step": 44649
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.908006191253662,
      "learning_rate": 0.0005460984011915338,
      "loss": 2.9614,
      "step": 44650
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5488760471343994,
      "learning_rate": 0.0005460960618034884,
      "loss": 2.7512,
      "step": 44651
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9991552829742432,
      "learning_rate": 0.000546093722369689,
      "loss": 3.0856,
      "step": 44652
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8604085445404053,
      "learning_rate": 0.0005460913828901361,
      "loss": 3.1524,
      "step": 44653
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3277759552001953,
      "learning_rate": 0.0005460890433648302,
      "loss": 3.2315,
      "step": 44654
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7142131328582764,
      "learning_rate": 0.0005460867037937717,
      "loss": 3.1301,
      "step": 44655
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4980995655059814,
      "learning_rate": 0.0005460843641769611,
      "loss": 3.1234,
      "step": 44656
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3926854133605957,
      "learning_rate": 0.0005460820245143987,
      "loss": 3.1977,
      "step": 44657
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7502641677856445,
      "learning_rate": 0.0005460796848060851,
      "loss": 2.9195,
      "step": 44658
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7427992820739746,
      "learning_rate": 0.0005460773450520205,
      "loss": 3.141,
      "step": 44659
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4478328227996826,
      "learning_rate": 0.0005460750052522056,
      "loss": 2.9531,
      "step": 44660
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6601043939590454,
      "learning_rate": 0.0005460726654066407,
      "loss": 3.1871,
      "step": 44661
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7223762273788452,
      "learning_rate": 0.0005460703255153261,
      "loss": 3.2996,
      "step": 44662
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.855250120162964,
      "learning_rate": 0.0005460679855782626,
      "loss": 2.9936,
      "step": 44663
    },
    {
      "epoch": 0.58,
      "grad_norm": 3.6283841133117676,
      "learning_rate": 0.0005460656455954502,
      "loss": 3.1518,
      "step": 44664
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6299618482589722,
      "learning_rate": 0.0005460633055668897,
      "loss": 3.0682,
      "step": 44665
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5595813989639282,
      "learning_rate": 0.0005460609654925814,
      "loss": 2.9492,
      "step": 44666
    },
    {
      "epoch": 0.58,
      "grad_norm": 3.3280727863311768,
      "learning_rate": 0.0005460586253725257,
      "loss": 3.029,
      "step": 44667
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7958112955093384,
      "learning_rate": 0.000546056285206723,
      "loss": 3.1064,
      "step": 44668
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7475686073303223,
      "learning_rate": 0.0005460539449951738,
      "loss": 3.0276,
      "step": 44669
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.604893207550049,
      "learning_rate": 0.0005460516047378786,
      "loss": 3.0693,
      "step": 44670
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7415834665298462,
      "learning_rate": 0.0005460492644348376,
      "loss": 2.9242,
      "step": 44671
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.283890962600708,
      "learning_rate": 0.0005460469240860515,
      "loss": 3.2091,
      "step": 44672
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5556946992874146,
      "learning_rate": 0.0005460445836915207,
      "loss": 3.0184,
      "step": 44673
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.722407579421997,
      "learning_rate": 0.0005460422432512455,
      "loss": 2.9633,
      "step": 44674
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4265893697738647,
      "learning_rate": 0.0005460399027652263,
      "loss": 3.0727,
      "step": 44675
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4200093746185303,
      "learning_rate": 0.0005460375622334638,
      "loss": 3.2569,
      "step": 44676
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4405707120895386,
      "learning_rate": 0.0005460352216559583,
      "loss": 2.8133,
      "step": 44677
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7896819114685059,
      "learning_rate": 0.00054603288103271,
      "loss": 2.9947,
      "step": 44678
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.552229881286621,
      "learning_rate": 0.0005460305403637198,
      "loss": 2.971,
      "step": 44679
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.200613498687744,
      "learning_rate": 0.0005460281996489877,
      "loss": 2.898,
      "step": 44680
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7484372854232788,
      "learning_rate": 0.0005460258588885144,
      "loss": 3.0101,
      "step": 44681
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9466807842254639,
      "learning_rate": 0.0005460235180823001,
      "loss": 3.2194,
      "step": 44682
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8019883632659912,
      "learning_rate": 0.0005460211772303455,
      "loss": 2.8701,
      "step": 44683
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6545286178588867,
      "learning_rate": 0.0005460188363326509,
      "loss": 3.1153,
      "step": 44684
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6842718124389648,
      "learning_rate": 0.0005460164953892168,
      "loss": 3.3608,
      "step": 44685
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.630785584449768,
      "learning_rate": 0.0005460141544000436,
      "loss": 3.1176,
      "step": 44686
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9424705505371094,
      "learning_rate": 0.0005460118133651316,
      "loss": 3.0201,
      "step": 44687
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4761403799057007,
      "learning_rate": 0.0005460094722844815,
      "loss": 3.2893,
      "step": 44688
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5457878112792969,
      "learning_rate": 0.0005460071311580935,
      "loss": 3.3464,
      "step": 44689
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.775629997253418,
      "learning_rate": 0.0005460047899859681,
      "loss": 3.0991,
      "step": 44690
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.365939974784851,
      "learning_rate": 0.000546002448768106,
      "loss": 2.7845,
      "step": 44691
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6120232343673706,
      "learning_rate": 0.0005460001075045072,
      "loss": 2.9658,
      "step": 44692
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.697630763053894,
      "learning_rate": 0.0005459977661951723,
      "loss": 2.9867,
      "step": 44693
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.587621808052063,
      "learning_rate": 0.0005459954248401019,
      "loss": 3.1968,
      "step": 44694
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5360820293426514,
      "learning_rate": 0.0005459930834392962,
      "loss": 2.9116,
      "step": 44695
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5999728441238403,
      "learning_rate": 0.0005459907419927558,
      "loss": 3.0735,
      "step": 44696
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5915297269821167,
      "learning_rate": 0.000545988400500481,
      "loss": 2.932,
      "step": 44697
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5896368026733398,
      "learning_rate": 0.0005459860589624724,
      "loss": 2.9385,
      "step": 44698
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.750651478767395,
      "learning_rate": 0.0005459837173787303,
      "loss": 3.1144,
      "step": 44699
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.840445637702942,
      "learning_rate": 0.0005459813757492552,
      "loss": 3.2338,
      "step": 44700
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.57423734664917,
      "learning_rate": 0.0005459790340740477,
      "loss": 3.0001,
      "step": 44701
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9206451177597046,
      "learning_rate": 0.0005459766923531078,
      "loss": 2.7676,
      "step": 44702
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.089026689529419,
      "learning_rate": 0.0005459743505864363,
      "loss": 2.833,
      "step": 44703
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.400734543800354,
      "learning_rate": 0.0005459720087740335,
      "loss": 3.0505,
      "step": 44704
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9759052991867065,
      "learning_rate": 0.0005459696669158998,
      "loss": 3.281,
      "step": 44705
    },
    {
      "epoch": 0.58,
      "grad_norm": 4.045997142791748,
      "learning_rate": 0.0005459673250120358,
      "loss": 3.105,
      "step": 44706
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.6326422691345215,
      "learning_rate": 0.0005459649830624418,
      "loss": 2.7454,
      "step": 44707
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6608136892318726,
      "learning_rate": 0.0005459626410671182,
      "loss": 2.9953,
      "step": 44708
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.147897958755493,
      "learning_rate": 0.0005459602990260656,
      "loss": 3.1905,
      "step": 44709
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.6544604301452637,
      "learning_rate": 0.0005459579569392842,
      "loss": 3.2948,
      "step": 44710
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9630281925201416,
      "learning_rate": 0.0005459556148067747,
      "loss": 2.9785,
      "step": 44711
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4149788618087769,
      "learning_rate": 0.0005459532726285374,
      "loss": 2.7881,
      "step": 44712
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.09836483001709,
      "learning_rate": 0.0005459509304045727,
      "loss": 3.1042,
      "step": 44713
    },
    {
      "epoch": 0.58,
      "grad_norm": 3.7598183155059814,
      "learning_rate": 0.0005459485881348812,
      "loss": 3.155,
      "step": 44714
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.7890079021453857,
      "learning_rate": 0.000545946245819463,
      "loss": 2.9068,
      "step": 44715
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5503734350204468,
      "learning_rate": 0.0005459439034583189,
      "loss": 3.0598,
      "step": 44716
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.9550647735595703,
      "learning_rate": 0.0005459415610514491,
      "loss": 3.1129,
      "step": 44717
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.1571102142333984,
      "learning_rate": 0.0005459392185988542,
      "loss": 2.8565,
      "step": 44718
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.967942714691162,
      "learning_rate": 0.0005459368761005346,
      "loss": 3.0102,
      "step": 44719
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.331180453300476,
      "learning_rate": 0.0005459345335564906,
      "loss": 3.1324,
      "step": 44720
    },
    {
      "epoch": 0.58,
      "grad_norm": 3.679443597793579,
      "learning_rate": 0.0005459321909667227,
      "loss": 2.7256,
      "step": 44721
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9069693088531494,
      "learning_rate": 0.0005459298483312314,
      "loss": 3.2393,
      "step": 44722
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.897117853164673,
      "learning_rate": 0.0005459275056500172,
      "loss": 3.1033,
      "step": 44723
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.444139003753662,
      "learning_rate": 0.0005459251629230804,
      "loss": 3.1574,
      "step": 44724
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.641965866088867,
      "learning_rate": 0.0005459228201504213,
      "loss": 3.061,
      "step": 44725
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.5777153968811035,
      "learning_rate": 0.0005459204773320406,
      "loss": 2.9496,
      "step": 44726
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.820723533630371,
      "learning_rate": 0.0005459181344679387,
      "loss": 3.1324,
      "step": 44727
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.579840064048767,
      "learning_rate": 0.0005459157915581158,
      "loss": 2.8208,
      "step": 44728
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5383024215698242,
      "learning_rate": 0.0005459134486025727,
      "loss": 2.9273,
      "step": 44729
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.850509524345398,
      "learning_rate": 0.0005459111056013097,
      "loss": 3.1275,
      "step": 44730
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.737088441848755,
      "learning_rate": 0.0005459087625543271,
      "loss": 3.2209,
      "step": 44731
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5488351583480835,
      "learning_rate": 0.0005459064194616253,
      "loss": 3.1383,
      "step": 44732
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4631348848342896,
      "learning_rate": 0.0005459040763232051,
      "loss": 3.1037,
      "step": 44733
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7998244762420654,
      "learning_rate": 0.0005459017331390665,
      "loss": 3.0377,
      "step": 44734
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0564498901367188,
      "learning_rate": 0.0005458993899092101,
      "loss": 2.8966,
      "step": 44735
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2402478456497192,
      "learning_rate": 0.0005458970466336365,
      "loss": 3.1164,
      "step": 44736
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9885590076446533,
      "learning_rate": 0.0005458947033123459,
      "loss": 3.0309,
      "step": 44737
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.1156411170959473,
      "learning_rate": 0.0005458923599453389,
      "loss": 2.9938,
      "step": 44738
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.993849515914917,
      "learning_rate": 0.0005458900165326158,
      "loss": 2.9567,
      "step": 44739
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3723026514053345,
      "learning_rate": 0.0005458876730741771,
      "loss": 2.9309,
      "step": 44740
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3718445301055908,
      "learning_rate": 0.0005458853295700233,
      "loss": 3.2219,
      "step": 44741
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2933825254440308,
      "learning_rate": 0.0005458829860201548,
      "loss": 3.06,
      "step": 44742
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7656446695327759,
      "learning_rate": 0.0005458806424245719,
      "loss": 3.0979,
      "step": 44743
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.684018611907959,
      "learning_rate": 0.0005458782987832752,
      "loss": 3.0809,
      "step": 44744
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2858365774154663,
      "learning_rate": 0.0005458759550962651,
      "loss": 2.9909,
      "step": 44745
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.030674457550049,
      "learning_rate": 0.000545873611363542,
      "loss": 3.0421,
      "step": 44746
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2056846618652344,
      "learning_rate": 0.0005458712675851064,
      "loss": 3.0464,
      "step": 44747
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.398267388343811,
      "learning_rate": 0.0005458689237609586,
      "loss": 3.142,
      "step": 44748
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5715078115463257,
      "learning_rate": 0.0005458665798910992,
      "loss": 2.9674,
      "step": 44749
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5670654773712158,
      "learning_rate": 0.0005458642359755286,
      "loss": 3.0301,
      "step": 44750
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.614645004272461,
      "learning_rate": 0.0005458618920142472,
      "loss": 2.9986,
      "step": 44751
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9763396978378296,
      "learning_rate": 0.0005458595480072553,
      "loss": 3.0354,
      "step": 44752
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8472371101379395,
      "learning_rate": 0.0005458572039545535,
      "loss": 3.1684,
      "step": 44753
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6865055561065674,
      "learning_rate": 0.0005458548598561423,
      "loss": 3.3036,
      "step": 44754
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.922042965888977,
      "learning_rate": 0.000545852515712022,
      "loss": 3.0935,
      "step": 44755
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.142267942428589,
      "learning_rate": 0.0005458501715221931,
      "loss": 3.036,
      "step": 44756
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8553106784820557,
      "learning_rate": 0.000545847827286656,
      "loss": 2.9159,
      "step": 44757
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3637629747390747,
      "learning_rate": 0.000545845483005411,
      "loss": 2.9157,
      "step": 44758
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4823527336120605,
      "learning_rate": 0.000545843138678459,
      "loss": 2.9983,
      "step": 44759
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.890873908996582,
      "learning_rate": 0.0005458407943057999,
      "loss": 3.1514,
      "step": 44760
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6396697759628296,
      "learning_rate": 0.0005458384498874343,
      "loss": 2.9943,
      "step": 44761
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7817503213882446,
      "learning_rate": 0.0005458361054233628,
      "loss": 3.1898,
      "step": 44762
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.071532964706421,
      "learning_rate": 0.0005458337609135857,
      "loss": 3.2405,
      "step": 44763
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.487277626991272,
      "learning_rate": 0.0005458314163581035,
      "loss": 2.8558,
      "step": 44764
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7044074535369873,
      "learning_rate": 0.0005458290717569166,
      "loss": 3.1338,
      "step": 44765
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.2429425716400146,
      "learning_rate": 0.0005458267271100253,
      "loss": 3.069,
      "step": 44766
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4828544855117798,
      "learning_rate": 0.0005458243824174304,
      "loss": 3.0745,
      "step": 44767
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8306326866149902,
      "learning_rate": 0.0005458220376791319,
      "loss": 2.8389,
      "step": 44768
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7507853507995605,
      "learning_rate": 0.0005458196928951305,
      "loss": 3.0752,
      "step": 44769
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4270358085632324,
      "learning_rate": 0.0005458173480654266,
      "loss": 3.0388,
      "step": 44770
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8609551191329956,
      "learning_rate": 0.0005458150031900207,
      "loss": 2.7329,
      "step": 44771
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5360221862792969,
      "learning_rate": 0.000545812658268913,
      "loss": 3.0766,
      "step": 44772
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4440258741378784,
      "learning_rate": 0.0005458103133021042,
      "loss": 3.0941,
      "step": 44773
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6650670766830444,
      "learning_rate": 0.0005458079682895946,
      "loss": 3.1465,
      "step": 44774
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0760836601257324,
      "learning_rate": 0.0005458056232313846,
      "loss": 3.073,
      "step": 44775
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4705986976623535,
      "learning_rate": 0.0005458032781274747,
      "loss": 3.0391,
      "step": 44776
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8121638298034668,
      "learning_rate": 0.0005458009329778655,
      "loss": 2.9102,
      "step": 44777
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4992917776107788,
      "learning_rate": 0.0005457985877825571,
      "loss": 2.9403,
      "step": 44778
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8727785348892212,
      "learning_rate": 0.0005457962425415501,
      "loss": 3.1449,
      "step": 44779
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.438141107559204,
      "learning_rate": 0.000545793897254845,
      "loss": 3.0843,
      "step": 44780
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.466286063194275,
      "learning_rate": 0.0005457915519224421,
      "loss": 3.1299,
      "step": 44781
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8124644756317139,
      "learning_rate": 0.0005457892065443419,
      "loss": 3.0259,
      "step": 44782
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6492928266525269,
      "learning_rate": 0.000545786861120545,
      "loss": 2.8638,
      "step": 44783
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7695252895355225,
      "learning_rate": 0.0005457845156510516,
      "loss": 3.3287,
      "step": 44784
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5462169647216797,
      "learning_rate": 0.0005457821701358622,
      "loss": 3.2268,
      "step": 44785
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6088804006576538,
      "learning_rate": 0.0005457798245749772,
      "loss": 3.0682,
      "step": 44786
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9041281938552856,
      "learning_rate": 0.0005457774789683972,
      "loss": 2.9837,
      "step": 44787
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.198695421218872,
      "learning_rate": 0.0005457751333161224,
      "loss": 2.9928,
      "step": 44788
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4512362480163574,
      "learning_rate": 0.0005457727876181535,
      "loss": 3.0903,
      "step": 44789
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5941609144210815,
      "learning_rate": 0.0005457704418744908,
      "loss": 3.2977,
      "step": 44790
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9388659000396729,
      "learning_rate": 0.0005457680960851346,
      "loss": 2.898,
      "step": 44791
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.467106819152832,
      "learning_rate": 0.0005457657502500855,
      "loss": 3.139,
      "step": 44792
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6133073568344116,
      "learning_rate": 0.000545763404369344,
      "loss": 3.217,
      "step": 44793
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3777856826782227,
      "learning_rate": 0.0005457610584429104,
      "loss": 3.3885,
      "step": 44794
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7766358852386475,
      "learning_rate": 0.0005457587124707852,
      "loss": 3.2595,
      "step": 44795
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5487943887710571,
      "learning_rate": 0.0005457563664529688,
      "loss": 3.0562,
      "step": 44796
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3902753591537476,
      "learning_rate": 0.0005457540203894616,
      "loss": 2.7681,
      "step": 44797
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5196833610534668,
      "learning_rate": 0.0005457516742802641,
      "loss": 3.0035,
      "step": 44798
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4020750522613525,
      "learning_rate": 0.0005457493281253769,
      "loss": 2.9365,
      "step": 44799
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8359568119049072,
      "learning_rate": 0.0005457469819248001,
      "loss": 3.1611,
      "step": 44800
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0589232444763184,
      "learning_rate": 0.0005457446356785343,
      "loss": 3.0829,
      "step": 44801
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4227421283721924,
      "learning_rate": 0.00054574228938658,
      "loss": 2.923,
      "step": 44802
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.209085702896118,
      "learning_rate": 0.0005457399430489375,
      "loss": 3.0815,
      "step": 44803
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6626170873641968,
      "learning_rate": 0.0005457375966656074,
      "loss": 3.2021,
      "step": 44804
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5669305324554443,
      "learning_rate": 0.0005457352502365899,
      "loss": 3.1976,
      "step": 44805
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.831624150276184,
      "learning_rate": 0.0005457329037618856,
      "loss": 3.1755,
      "step": 44806
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.673727035522461,
      "learning_rate": 0.000545730557241495,
      "loss": 3.0594,
      "step": 44807
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3929637670516968,
      "learning_rate": 0.0005457282106754184,
      "loss": 3.2046,
      "step": 44808
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.883163332939148,
      "learning_rate": 0.0005457258640636563,
      "loss": 2.899,
      "step": 44809
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7904049158096313,
      "learning_rate": 0.0005457235174062092,
      "loss": 3.041,
      "step": 44810
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4585775136947632,
      "learning_rate": 0.0005457211707030773,
      "loss": 3.1702,
      "step": 44811
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.758713722229004,
      "learning_rate": 0.0005457188239542613,
      "loss": 3.0227,
      "step": 44812
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6939234733581543,
      "learning_rate": 0.0005457164771597616,
      "loss": 2.9749,
      "step": 44813
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.171171188354492,
      "learning_rate": 0.0005457141303195785,
      "loss": 3.0936,
      "step": 44814
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.1582324504852295,
      "learning_rate": 0.0005457117834337125,
      "loss": 2.961,
      "step": 44815
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5054444074630737,
      "learning_rate": 0.000545709436502164,
      "loss": 3.2013,
      "step": 44816
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8057281970977783,
      "learning_rate": 0.0005457070895249335,
      "loss": 2.7979,
      "step": 44817
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.674194097518921,
      "learning_rate": 0.0005457047425020214,
      "loss": 3.143,
      "step": 44818
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0613958835601807,
      "learning_rate": 0.0005457023954334282,
      "loss": 2.7379,
      "step": 44819
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.692302942276001,
      "learning_rate": 0.0005457000483191542,
      "loss": 3.2949,
      "step": 44820
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4863959550857544,
      "learning_rate": 0.0005456977011592,
      "loss": 3.0243,
      "step": 44821
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3129043579101562,
      "learning_rate": 0.000545695353953566,
      "loss": 3.3377,
      "step": 44822
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.228624105453491,
      "learning_rate": 0.0005456930067022526,
      "loss": 2.7185,
      "step": 44823
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9824614524841309,
      "learning_rate": 0.00054569065940526,
      "loss": 2.9038,
      "step": 44824
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8267569541931152,
      "learning_rate": 0.0005456883120625891,
      "loss": 3.0037,
      "step": 44825
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8210747241973877,
      "learning_rate": 0.0005456859646742401,
      "loss": 3.2238,
      "step": 44826
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6154977083206177,
      "learning_rate": 0.0005456836172402133,
      "loss": 3.156,
      "step": 44827
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.1097233295440674,
      "learning_rate": 0.0005456812697605093,
      "loss": 3.0915,
      "step": 44828
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2802382707595825,
      "learning_rate": 0.0005456789222351286,
      "loss": 3.0934,
      "step": 44829
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3988209962844849,
      "learning_rate": 0.0005456765746640716,
      "loss": 2.6589,
      "step": 44830
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9884395599365234,
      "learning_rate": 0.0005456742270473385,
      "loss": 2.8656,
      "step": 44831
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5099194049835205,
      "learning_rate": 0.0005456718793849302,
      "loss": 3.0966,
      "step": 44832
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5918238162994385,
      "learning_rate": 0.0005456695316768467,
      "loss": 3.0529,
      "step": 44833
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2675144672393799,
      "learning_rate": 0.0005456671839230886,
      "loss": 2.9997,
      "step": 44834
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5271729230880737,
      "learning_rate": 0.0005456648361236563,
      "loss": 2.8072,
      "step": 44835
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.403450846672058,
      "learning_rate": 0.0005456624882785503,
      "loss": 2.7994,
      "step": 44836
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.702841877937317,
      "learning_rate": 0.000545660140387771,
      "loss": 3.1936,
      "step": 44837
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8858124017715454,
      "learning_rate": 0.0005456577924513188,
      "loss": 3.0613,
      "step": 44838
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.246351480484009,
      "learning_rate": 0.0005456554444691943,
      "loss": 2.8218,
      "step": 44839
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.0090513229370117,
      "learning_rate": 0.0005456530964413977,
      "loss": 3.0788,
      "step": 44840
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5069290399551392,
      "learning_rate": 0.0005456507483679296,
      "loss": 3.2727,
      "step": 44841
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9428821802139282,
      "learning_rate": 0.0005456484002487904,
      "loss": 3.1317,
      "step": 44842
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4546823501586914,
      "learning_rate": 0.0005456460520839805,
      "loss": 3.1111,
      "step": 44843
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4126948118209839,
      "learning_rate": 0.0005456437038735003,
      "loss": 3.2773,
      "step": 44844
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7999656200408936,
      "learning_rate": 0.0005456413556173505,
      "loss": 3.1229,
      "step": 44845
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.2671902179718018,
      "learning_rate": 0.0005456390073155312,
      "loss": 3.0458,
      "step": 44846
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.54190993309021,
      "learning_rate": 0.000545636658968043,
      "loss": 3.2374,
      "step": 44847
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3999847173690796,
      "learning_rate": 0.0005456343105748863,
      "loss": 3.2472,
      "step": 44848
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.919180989265442,
      "learning_rate": 0.0005456319621360613,
      "loss": 3.0764,
      "step": 44849
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8780995607376099,
      "learning_rate": 0.0005456296136515691,
      "loss": 2.9364,
      "step": 44850
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6062805652618408,
      "learning_rate": 0.0005456272651214094,
      "loss": 3.0208,
      "step": 44851
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.430582880973816,
      "learning_rate": 0.0005456249165455831,
      "loss": 2.9498,
      "step": 44852
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3562794923782349,
      "learning_rate": 0.0005456225679240905,
      "loss": 3.0774,
      "step": 44853
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.473117709159851,
      "learning_rate": 0.000545620219256932,
      "loss": 2.8401,
      "step": 44854
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4492428302764893,
      "learning_rate": 0.000545617870544108,
      "loss": 3.0774,
      "step": 44855
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5636664628982544,
      "learning_rate": 0.0005456155217856191,
      "loss": 3.0453,
      "step": 44856
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3075798749923706,
      "learning_rate": 0.0005456131729814658,
      "loss": 3.3373,
      "step": 44857
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.318993330001831,
      "learning_rate": 0.0005456108241316481,
      "loss": 3.16,
      "step": 44858
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4988912343978882,
      "learning_rate": 0.0005456084752361668,
      "loss": 3.0125,
      "step": 44859
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4610888957977295,
      "learning_rate": 0.0005456061262950222,
      "loss": 3.2568,
      "step": 44860
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.724737286567688,
      "learning_rate": 0.0005456037773082148,
      "loss": 3.058,
      "step": 44861
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.438394546508789,
      "learning_rate": 0.0005456014282757451,
      "loss": 2.8841,
      "step": 44862
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5433191061019897,
      "learning_rate": 0.0005455990791976134,
      "loss": 3.2004,
      "step": 44863
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.012002468109131,
      "learning_rate": 0.0005455967300738203,
      "loss": 3.0604,
      "step": 44864
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8281726837158203,
      "learning_rate": 0.0005455943809043659,
      "loss": 3.1849,
      "step": 44865
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.42896568775177,
      "learning_rate": 0.0005455920316892511,
      "loss": 3.1741,
      "step": 44866
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.7761805057525635,
      "learning_rate": 0.000545589682428476,
      "loss": 3.0783,
      "step": 44867
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4521424770355225,
      "learning_rate": 0.0005455873331220412,
      "loss": 3.0804,
      "step": 44868
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9874366521835327,
      "learning_rate": 0.0005455849837699469,
      "loss": 2.9154,
      "step": 44869
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.6346280574798584,
      "learning_rate": 0.000545582634372194,
      "loss": 3.0578,
      "step": 44870
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.1412482261657715,
      "learning_rate": 0.0005455802849287824,
      "loss": 3.0484,
      "step": 44871
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.479981780052185,
      "learning_rate": 0.0005455779354397129,
      "loss": 2.9384,
      "step": 44872
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3608964681625366,
      "learning_rate": 0.0005455755859049857,
      "loss": 3.2361,
      "step": 44873
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5467885732650757,
      "learning_rate": 0.0005455732363246016,
      "loss": 2.9493,
      "step": 44874
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6720565557479858,
      "learning_rate": 0.0005455708866985607,
      "loss": 2.7461,
      "step": 44875
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4660893678665161,
      "learning_rate": 0.0005455685370268635,
      "loss": 3.023,
      "step": 44876
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5366476774215698,
      "learning_rate": 0.0005455661873095104,
      "loss": 3.3395,
      "step": 44877
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.561908483505249,
      "learning_rate": 0.0005455638375465021,
      "loss": 3.0473,
      "step": 44878
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3889256715774536,
      "learning_rate": 0.0005455614877378388,
      "loss": 3.0851,
      "step": 44879
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9133421182632446,
      "learning_rate": 0.0005455591378835208,
      "loss": 2.8996,
      "step": 44880
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.663159966468811,
      "learning_rate": 0.0005455567879835488,
      "loss": 3.237,
      "step": 44881
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6393650770187378,
      "learning_rate": 0.0005455544380379233,
      "loss": 3.0009,
      "step": 44882
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8436871767044067,
      "learning_rate": 0.0005455520880466446,
      "loss": 2.8715,
      "step": 44883
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7093710899353027,
      "learning_rate": 0.0005455497380097129,
      "loss": 2.9087,
      "step": 44884
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6662474870681763,
      "learning_rate": 0.0005455473879271291,
      "loss": 3.0806,
      "step": 44885
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4216485023498535,
      "learning_rate": 0.0005455450377988932,
      "loss": 3.0889,
      "step": 44886
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.607609510421753,
      "learning_rate": 0.000545542687625006,
      "loss": 3.0898,
      "step": 44887
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4493286609649658,
      "learning_rate": 0.0005455403374054677,
      "loss": 3.0877,
      "step": 44888
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8299133777618408,
      "learning_rate": 0.0005455379871402788,
      "loss": 3.2282,
      "step": 44889
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5463216304779053,
      "learning_rate": 0.0005455356368294399,
      "loss": 2.9855,
      "step": 44890
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.409003496170044,
      "learning_rate": 0.0005455332864729511,
      "loss": 2.9594,
      "step": 44891
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5304142236709595,
      "learning_rate": 0.0005455309360708131,
      "loss": 3.03,
      "step": 44892
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7195990085601807,
      "learning_rate": 0.0005455285856230263,
      "loss": 3.2861,
      "step": 44893
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.374506950378418,
      "learning_rate": 0.0005455262351295911,
      "loss": 2.853,
      "step": 44894
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.638197898864746,
      "learning_rate": 0.000545523884590508,
      "loss": 3.0384,
      "step": 44895
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4600632190704346,
      "learning_rate": 0.0005455215340057773,
      "loss": 3.0844,
      "step": 44896
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4858211278915405,
      "learning_rate": 0.0005455191833753995,
      "loss": 3.0544,
      "step": 44897
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3463560342788696,
      "learning_rate": 0.000545516832699375,
      "loss": 3.0779,
      "step": 44898
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9116451740264893,
      "learning_rate": 0.0005455144819777044,
      "loss": 2.9628,
      "step": 44899
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3069043159484863,
      "learning_rate": 0.0005455121312103879,
      "loss": 2.9982,
      "step": 44900
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5813345909118652,
      "learning_rate": 0.0005455097803974262,
      "loss": 3.0105,
      "step": 44901
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5128251314163208,
      "learning_rate": 0.0005455074295388194,
      "loss": 3.0453,
      "step": 44902
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5750995874404907,
      "learning_rate": 0.0005455050786345683,
      "loss": 3.0554,
      "step": 44903
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5881651639938354,
      "learning_rate": 0.0005455027276846732,
      "loss": 3.0395,
      "step": 44904
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4884053468704224,
      "learning_rate": 0.0005455003766891344,
      "loss": 3.1124,
      "step": 44905
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7028839588165283,
      "learning_rate": 0.0005454980256479525,
      "loss": 2.936,
      "step": 44906
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3997046947479248,
      "learning_rate": 0.0005454956745611279,
      "loss": 3.0911,
      "step": 44907
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9419910907745361,
      "learning_rate": 0.000545493323428661,
      "loss": 2.7958,
      "step": 44908
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6013209819793701,
      "learning_rate": 0.0005454909722505522,
      "loss": 3.0101,
      "step": 44909
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5315383672714233,
      "learning_rate": 0.000545488621026802,
      "loss": 3.3043,
      "step": 44910
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7849180698394775,
      "learning_rate": 0.0005454862697574109,
      "loss": 3.0436,
      "step": 44911
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.9431383609771729,
      "learning_rate": 0.0005454839184423794,
      "loss": 3.2166,
      "step": 44912
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7750352621078491,
      "learning_rate": 0.0005454815670817076,
      "loss": 2.9768,
      "step": 44913
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8731255531311035,
      "learning_rate": 0.0005454792156753961,
      "loss": 3.0804,
      "step": 44914
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4668956995010376,
      "learning_rate": 0.0005454768642234456,
      "loss": 2.9514,
      "step": 44915
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.7838743925094604,
      "learning_rate": 0.0005454745127258561,
      "loss": 3.0814,
      "step": 44916
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.5100435018539429,
      "learning_rate": 0.0005454721611826283,
      "loss": 3.0648,
      "step": 44917
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3743295669555664,
      "learning_rate": 0.0005454698095937626,
      "loss": 2.9435,
      "step": 44918
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.374621033668518,
      "learning_rate": 0.0005454674579592595,
      "loss": 3.2394,
      "step": 44919
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6446685791015625,
      "learning_rate": 0.0005454651062791194,
      "loss": 2.9862,
      "step": 44920
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.3424893617630005,
      "learning_rate": 0.0005454627545533426,
      "loss": 2.9565,
      "step": 44921
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.6796523332595825,
      "learning_rate": 0.0005454604027819297,
      "loss": 2.8799,
      "step": 44922
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.8212486505508423,
      "learning_rate": 0.000545458050964881,
      "loss": 3.0842,
      "step": 44923
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.309995174407959,
      "learning_rate": 0.000545455699102197,
      "loss": 3.0855,
      "step": 44924
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.44241201877594,
      "learning_rate": 0.0005454533471938782,
      "loss": 2.9845,
      "step": 44925
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4200613498687744,
      "learning_rate": 0.000545450995239925,
      "loss": 2.8957,
      "step": 44926
    },
    {
      "epoch": 0.58,
      "grad_norm": 1.4170091152191162,
      "learning_rate": 0.000545448643240338,
      "loss": 3.2288,
      "step": 44927
    },
    {
      "epoch": 0.58,
      "grad_norm": 2.078221082687378,
      "learning_rate": 0.0005454462911951173,
      "loss": 2.9897,
      "step": 44928
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8337842226028442,
      "learning_rate": 0.0005454439391042634,
      "loss": 3.2095,
      "step": 44929
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.5528388023376465,
      "learning_rate": 0.0005454415869677769,
      "loss": 3.0617,
      "step": 44930
    },
    {
      "epoch": 0.59,
      "grad_norm": 4.164382457733154,
      "learning_rate": 0.0005454392347856582,
      "loss": 3.1457,
      "step": 44931
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9679484367370605,
      "learning_rate": 0.0005454368825579078,
      "loss": 2.8804,
      "step": 44932
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.571516990661621,
      "learning_rate": 0.000545434530284526,
      "loss": 3.0482,
      "step": 44933
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.685690402984619,
      "learning_rate": 0.0005454321779655132,
      "loss": 3.1482,
      "step": 44934
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.2945432662963867,
      "learning_rate": 0.00054542982560087,
      "loss": 3.1226,
      "step": 44935
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3726913928985596,
      "learning_rate": 0.0005454274731905968,
      "loss": 3.2821,
      "step": 44936
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4555822610855103,
      "learning_rate": 0.0005454251207346939,
      "loss": 3.2976,
      "step": 44937
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.184478521347046,
      "learning_rate": 0.0005454227682331618,
      "loss": 3.085,
      "step": 44938
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7455793619155884,
      "learning_rate": 0.0005454204156860012,
      "loss": 3.0055,
      "step": 44939
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5380076169967651,
      "learning_rate": 0.0005454180630932121,
      "loss": 2.8409,
      "step": 44940
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7511903047561646,
      "learning_rate": 0.0005454157104547953,
      "loss": 2.9997,
      "step": 44941
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.44783616065979,
      "learning_rate": 0.000545413357770751,
      "loss": 2.9976,
      "step": 44942
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.577978253364563,
      "learning_rate": 0.0005454110050410797,
      "loss": 3.1629,
      "step": 44943
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.826171875,
      "learning_rate": 0.0005454086522657819,
      "loss": 3.2941,
      "step": 44944
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.545988917350769,
      "learning_rate": 0.000545406299444858,
      "loss": 2.9193,
      "step": 44945
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6137677431106567,
      "learning_rate": 0.0005454039465783084,
      "loss": 2.9618,
      "step": 44946
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9104021787643433,
      "learning_rate": 0.0005454015936661337,
      "loss": 2.9189,
      "step": 44947
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1223337650299072,
      "learning_rate": 0.000545399240708334,
      "loss": 2.8154,
      "step": 44948
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.657947063446045,
      "learning_rate": 0.0005453968877049101,
      "loss": 3.2194,
      "step": 44949
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5134257078170776,
      "learning_rate": 0.0005453945346558623,
      "loss": 3.0925,
      "step": 44950
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.725463628768921,
      "learning_rate": 0.0005453921815611909,
      "loss": 3.0192,
      "step": 44951
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5367491245269775,
      "learning_rate": 0.0005453898284208966,
      "loss": 3.0872,
      "step": 44952
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.6661739349365234,
      "learning_rate": 0.0005453874752349797,
      "loss": 3.0589,
      "step": 44953
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.5181641578674316,
      "learning_rate": 0.0005453851220034406,
      "loss": 3.1021,
      "step": 44954
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.553421974182129,
      "learning_rate": 0.0005453827687262796,
      "loss": 3.1514,
      "step": 44955
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.691758155822754,
      "learning_rate": 0.0005453804154034976,
      "loss": 3.0412,
      "step": 44956
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6243544816970825,
      "learning_rate": 0.0005453780620350945,
      "loss": 3.1517,
      "step": 44957
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5132273435592651,
      "learning_rate": 0.0005453757086210711,
      "loss": 3.015,
      "step": 44958
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0540711879730225,
      "learning_rate": 0.0005453733551614278,
      "loss": 3.1225,
      "step": 44959
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.968810796737671,
      "learning_rate": 0.0005453710016561649,
      "loss": 3.1273,
      "step": 44960
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.3268628120422363,
      "learning_rate": 0.0005453686481052828,
      "loss": 2.9406,
      "step": 44961
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.9968388080596924,
      "learning_rate": 0.0005453662945087821,
      "loss": 2.8359,
      "step": 44962
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.9116828441619873,
      "learning_rate": 0.0005453639408666632,
      "loss": 2.9431,
      "step": 44963
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8097201585769653,
      "learning_rate": 0.0005453615871789265,
      "loss": 2.8271,
      "step": 44964
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7276972532272339,
      "learning_rate": 0.0005453592334455725,
      "loss": 3.1905,
      "step": 44965
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4275275468826294,
      "learning_rate": 0.0005453568796666015,
      "loss": 3.0112,
      "step": 44966
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1570188999176025,
      "learning_rate": 0.0005453545258420141,
      "loss": 3.0988,
      "step": 44967
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5542771816253662,
      "learning_rate": 0.0005453521719718106,
      "loss": 3.0978,
      "step": 44968
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7526530027389526,
      "learning_rate": 0.0005453498180559915,
      "loss": 3.1534,
      "step": 44969
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1741628646850586,
      "learning_rate": 0.0005453474640945572,
      "loss": 3.029,
      "step": 44970
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7036666870117188,
      "learning_rate": 0.0005453451100875084,
      "loss": 3.0308,
      "step": 44971
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.141037702560425,
      "learning_rate": 0.0005453427560348451,
      "loss": 3.0818,
      "step": 44972
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5357919931411743,
      "learning_rate": 0.000545340401936568,
      "loss": 3.1787,
      "step": 44973
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.380155324935913,
      "learning_rate": 0.0005453380477926774,
      "loss": 3.0978,
      "step": 44974
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6296546459197998,
      "learning_rate": 0.000545335693603174,
      "loss": 3.1695,
      "step": 44975
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.90410315990448,
      "learning_rate": 0.000545333339368058,
      "loss": 2.9158,
      "step": 44976
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7557474374771118,
      "learning_rate": 0.0005453309850873299,
      "loss": 3.2048,
      "step": 44977
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8835619688034058,
      "learning_rate": 0.00054532863076099,
      "loss": 3.1442,
      "step": 44978
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5909066200256348,
      "learning_rate": 0.000545326276389039,
      "loss": 2.9207,
      "step": 44979
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4491053819656372,
      "learning_rate": 0.0005453239219714773,
      "loss": 3.1487,
      "step": 44980
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5974810123443604,
      "learning_rate": 0.0005453215675083051,
      "loss": 3.2815,
      "step": 44981
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.665054440498352,
      "learning_rate": 0.000545319212999523,
      "loss": 3.0501,
      "step": 44982
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.490527868270874,
      "learning_rate": 0.0005453168584451315,
      "loss": 3.0066,
      "step": 44983
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6946543455123901,
      "learning_rate": 0.0005453145038451311,
      "loss": 3.2678,
      "step": 44984
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4604144096374512,
      "learning_rate": 0.0005453121491995218,
      "loss": 3.1312,
      "step": 44985
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5307971239089966,
      "learning_rate": 0.0005453097945083045,
      "loss": 2.736,
      "step": 44986
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6182461977005005,
      "learning_rate": 0.0005453074397714795,
      "loss": 3.0766,
      "step": 44987
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6342719793319702,
      "learning_rate": 0.0005453050849890471,
      "loss": 3.3158,
      "step": 44988
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4033641815185547,
      "learning_rate": 0.0005453027301610079,
      "loss": 2.7864,
      "step": 44989
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7544862031936646,
      "learning_rate": 0.0005453003752873625,
      "loss": 2.9558,
      "step": 44990
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2592850923538208,
      "learning_rate": 0.0005452980203681109,
      "loss": 3.007,
      "step": 44991
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.514786720275879,
      "learning_rate": 0.0005452956654032538,
      "loss": 3.0903,
      "step": 44992
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8185147047042847,
      "learning_rate": 0.0005452933103927916,
      "loss": 3.0642,
      "step": 44993
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.471224308013916,
      "learning_rate": 0.0005452909553367248,
      "loss": 2.9738,
      "step": 44994
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5320539474487305,
      "learning_rate": 0.0005452886002350537,
      "loss": 3.0019,
      "step": 44995
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4439181089401245,
      "learning_rate": 0.0005452862450877789,
      "loss": 2.6377,
      "step": 44996
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5589827299118042,
      "learning_rate": 0.0005452838898949007,
      "loss": 2.9946,
      "step": 44997
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.642984390258789,
      "learning_rate": 0.0005452815346564197,
      "loss": 2.94,
      "step": 44998
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9122884273529053,
      "learning_rate": 0.0005452791793723362,
      "loss": 3.0036,
      "step": 44999
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.450327157974243,
      "learning_rate": 0.0005452768240426507,
      "loss": 3.1802,
      "step": 45000
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1022093296051025,
      "learning_rate": 0.0005452744686673634,
      "loss": 3.0403,
      "step": 45001
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7332767248153687,
      "learning_rate": 0.0005452721132464751,
      "loss": 3.1942,
      "step": 45002
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6677958965301514,
      "learning_rate": 0.0005452697577799861,
      "loss": 2.8355,
      "step": 45003
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.4849612712860107,
      "learning_rate": 0.0005452674022678967,
      "loss": 3.1608,
      "step": 45004
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3926597833633423,
      "learning_rate": 0.0005452650467102076,
      "loss": 3.2567,
      "step": 45005
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.636624813079834,
      "learning_rate": 0.0005452626911069191,
      "loss": 3.1332,
      "step": 45006
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.513188362121582,
      "learning_rate": 0.0005452603354580316,
      "loss": 2.9731,
      "step": 45007
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7999016046524048,
      "learning_rate": 0.0005452579797635456,
      "loss": 3.0703,
      "step": 45008
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7144955396652222,
      "learning_rate": 0.0005452556240234614,
      "loss": 3.1442,
      "step": 45009
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4104925394058228,
      "learning_rate": 0.0005452532682377796,
      "loss": 3.0081,
      "step": 45010
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9376225471496582,
      "learning_rate": 0.0005452509124065007,
      "loss": 3.0313,
      "step": 45011
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3859796524047852,
      "learning_rate": 0.0005452485565296249,
      "loss": 3.2072,
      "step": 45012
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.409832239151001,
      "learning_rate": 0.0005452462006071527,
      "loss": 2.9618,
      "step": 45013
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.1741292476654053,
      "learning_rate": 0.0005452438446390849,
      "loss": 2.733,
      "step": 45014
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4816429615020752,
      "learning_rate": 0.0005452414886254213,
      "loss": 2.9704,
      "step": 45015
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3628143072128296,
      "learning_rate": 0.0005452391325661628,
      "loss": 3.2246,
      "step": 45016
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6705923080444336,
      "learning_rate": 0.0005452367764613098,
      "loss": 3.0059,
      "step": 45017
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.4807915687561035,
      "learning_rate": 0.0005452344203108626,
      "loss": 3.5397,
      "step": 45018
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3862189054489136,
      "learning_rate": 0.0005452320641148216,
      "loss": 3.1929,
      "step": 45019
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7576814889907837,
      "learning_rate": 0.0005452297078731874,
      "loss": 3.1335,
      "step": 45020
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.621335744857788,
      "learning_rate": 0.0005452273515859604,
      "loss": 3.2457,
      "step": 45021
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8242433071136475,
      "learning_rate": 0.0005452249952531409,
      "loss": 3.0121,
      "step": 45022
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.252855062484741,
      "learning_rate": 0.0005452226388747296,
      "loss": 2.7616,
      "step": 45023
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4351061582565308,
      "learning_rate": 0.0005452202824507268,
      "loss": 3.1149,
      "step": 45024
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5299640893936157,
      "learning_rate": 0.0005452179259811328,
      "loss": 3.199,
      "step": 45025
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8323967456817627,
      "learning_rate": 0.0005452155694659482,
      "loss": 2.9447,
      "step": 45026
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6790307760238647,
      "learning_rate": 0.0005452132129051733,
      "loss": 3.1946,
      "step": 45027
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7132917642593384,
      "learning_rate": 0.0005452108562988088,
      "loss": 3.2491,
      "step": 45028
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9319905042648315,
      "learning_rate": 0.0005452084996468549,
      "loss": 3.0491,
      "step": 45029
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.2020504474639893,
      "learning_rate": 0.0005452061429493121,
      "loss": 2.8631,
      "step": 45030
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5330793857574463,
      "learning_rate": 0.0005452037862061809,
      "loss": 3.0216,
      "step": 45031
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6753184795379639,
      "learning_rate": 0.0005452014294174616,
      "loss": 3.1291,
      "step": 45032
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3279236555099487,
      "learning_rate": 0.0005451990725831548,
      "loss": 2.9378,
      "step": 45033
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4602394104003906,
      "learning_rate": 0.0005451967157032608,
      "loss": 2.9876,
      "step": 45034
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7997792959213257,
      "learning_rate": 0.0005451943587777801,
      "loss": 3.1717,
      "step": 45035
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5192474126815796,
      "learning_rate": 0.0005451920018067132,
      "loss": 2.7942,
      "step": 45036
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1123666763305664,
      "learning_rate": 0.0005451896447900605,
      "loss": 3.0397,
      "step": 45037
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1607489585876465,
      "learning_rate": 0.0005451872877278224,
      "loss": 2.8224,
      "step": 45038
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4566463232040405,
      "learning_rate": 0.0005451849306199992,
      "loss": 3.1446,
      "step": 45039
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4836581945419312,
      "learning_rate": 0.0005451825734665917,
      "loss": 3.0939,
      "step": 45040
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.299281120300293,
      "learning_rate": 0.0005451802162676001,
      "loss": 3.16,
      "step": 45041
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4143990278244019,
      "learning_rate": 0.0005451778590230248,
      "loss": 3.0886,
      "step": 45042
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6494501829147339,
      "learning_rate": 0.0005451755017328663,
      "loss": 3.3057,
      "step": 45043
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4449187517166138,
      "learning_rate": 0.0005451731443971251,
      "loss": 3.0749,
      "step": 45044
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.018953323364258,
      "learning_rate": 0.0005451707870158015,
      "loss": 2.7894,
      "step": 45045
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.313091516494751,
      "learning_rate": 0.0005451684295888961,
      "loss": 2.9346,
      "step": 45046
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.318448781967163,
      "learning_rate": 0.0005451660721164092,
      "loss": 3.0123,
      "step": 45047
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.744046926498413,
      "learning_rate": 0.0005451637145983414,
      "loss": 3.2521,
      "step": 45048
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4671977758407593,
      "learning_rate": 0.000545161357034693,
      "loss": 3.3447,
      "step": 45049
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.776233196258545,
      "learning_rate": 0.0005451589994254645,
      "loss": 3.0618,
      "step": 45050
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.096914052963257,
      "learning_rate": 0.0005451566417706562,
      "loss": 3.1181,
      "step": 45051
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2102861404418945,
      "learning_rate": 0.0005451542840702688,
      "loss": 3.1321,
      "step": 45052
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4279025793075562,
      "learning_rate": 0.0005451519263243024,
      "loss": 3.0242,
      "step": 45053
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.652945637702942,
      "learning_rate": 0.0005451495685327579,
      "loss": 3.1555,
      "step": 45054
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3603365421295166,
      "learning_rate": 0.0005451472106956353,
      "loss": 3.2899,
      "step": 45055
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.744932770729065,
      "learning_rate": 0.0005451448528129352,
      "loss": 3.2794,
      "step": 45056
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9523332118988037,
      "learning_rate": 0.0005451424948846581,
      "loss": 3.1255,
      "step": 45057
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3852440118789673,
      "learning_rate": 0.0005451401369108043,
      "loss": 3.0584,
      "step": 45058
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.7328290939331055,
      "learning_rate": 0.0005451377788913743,
      "loss": 3.1555,
      "step": 45059
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.882271409034729,
      "learning_rate": 0.0005451354208263686,
      "loss": 3.035,
      "step": 45060
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.3206946849823,
      "learning_rate": 0.0005451330627157877,
      "loss": 3.0114,
      "step": 45061
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0577056407928467,
      "learning_rate": 0.0005451307045596319,
      "loss": 2.9896,
      "step": 45062
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9586249589920044,
      "learning_rate": 0.0005451283463579016,
      "loss": 3.2078,
      "step": 45063
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6620995998382568,
      "learning_rate": 0.0005451259881105974,
      "loss": 2.6741,
      "step": 45064
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7509583234786987,
      "learning_rate": 0.0005451236298177196,
      "loss": 3.1575,
      "step": 45065
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9959532022476196,
      "learning_rate": 0.0005451212714792686,
      "loss": 3.0251,
      "step": 45066
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5154575109481812,
      "learning_rate": 0.000545118913095245,
      "loss": 3.1935,
      "step": 45067
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.391922116279602,
      "learning_rate": 0.0005451165546656492,
      "loss": 3.0048,
      "step": 45068
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9340019226074219,
      "learning_rate": 0.0005451141961904816,
      "loss": 2.8476,
      "step": 45069
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8625595569610596,
      "learning_rate": 0.0005451118376697426,
      "loss": 3.3235,
      "step": 45070
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6568692922592163,
      "learning_rate": 0.0005451094791034327,
      "loss": 3.24,
      "step": 45071
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.536257743835449,
      "learning_rate": 0.0005451071204915524,
      "loss": 3.176,
      "step": 45072
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5598390102386475,
      "learning_rate": 0.000545104761834102,
      "loss": 2.8959,
      "step": 45073
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8744105100631714,
      "learning_rate": 0.000545102403131082,
      "loss": 2.8626,
      "step": 45074
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5686140060424805,
      "learning_rate": 0.0005451000443824929,
      "loss": 2.7896,
      "step": 45075
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4331611394882202,
      "learning_rate": 0.0005450976855883349,
      "loss": 2.9952,
      "step": 45076
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6060631275177002,
      "learning_rate": 0.0005450953267486089,
      "loss": 3.1742,
      "step": 45077
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6168935298919678,
      "learning_rate": 0.0005450929678633147,
      "loss": 3.4459,
      "step": 45078
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6210747957229614,
      "learning_rate": 0.0005450906089324533,
      "loss": 3.1966,
      "step": 45079
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9078526496887207,
      "learning_rate": 0.0005450882499560248,
      "loss": 3.0611,
      "step": 45080
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3719695806503296,
      "learning_rate": 0.00054508589093403,
      "loss": 3.074,
      "step": 45081
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.417234420776367,
      "learning_rate": 0.0005450835318664689,
      "loss": 3.0088,
      "step": 45082
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7217211723327637,
      "learning_rate": 0.0005450811727533422,
      "loss": 3.0658,
      "step": 45083
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7555134296417236,
      "learning_rate": 0.0005450788135946503,
      "loss": 2.8888,
      "step": 45084
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0943117141723633,
      "learning_rate": 0.0005450764543903937,
      "loss": 3.1025,
      "step": 45085
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.589296579360962,
      "learning_rate": 0.0005450740951405726,
      "loss": 2.8815,
      "step": 45086
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3043396472930908,
      "learning_rate": 0.0005450717358451877,
      "loss": 3.1986,
      "step": 45087
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.90488600730896,
      "learning_rate": 0.0005450693765042393,
      "loss": 3.1556,
      "step": 45088
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.2005605697631836,
      "learning_rate": 0.0005450670171177279,
      "loss": 2.8941,
      "step": 45089
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0013911724090576,
      "learning_rate": 0.0005450646576856539,
      "loss": 2.9226,
      "step": 45090
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0718369483947754,
      "learning_rate": 0.0005450622982080179,
      "loss": 3.2787,
      "step": 45091
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9744497537612915,
      "learning_rate": 0.0005450599386848201,
      "loss": 3.4585,
      "step": 45092
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4722647666931152,
      "learning_rate": 0.0005450575791160608,
      "loss": 2.876,
      "step": 45093
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8333220481872559,
      "learning_rate": 0.0005450552195017409,
      "loss": 3.2298,
      "step": 45094
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.6793417930603027,
      "learning_rate": 0.0005450528598418606,
      "loss": 2.9132,
      "step": 45095
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.3266868591308594,
      "learning_rate": 0.0005450505001364204,
      "loss": 3.2359,
      "step": 45096
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.2671282291412354,
      "learning_rate": 0.0005450481403854205,
      "loss": 2.9544,
      "step": 45097
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3967233896255493,
      "learning_rate": 0.0005450457805888616,
      "loss": 3.0839,
      "step": 45098
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.4896323680877686,
      "learning_rate": 0.0005450434207467441,
      "loss": 3.0516,
      "step": 45099
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.5800857543945312,
      "learning_rate": 0.0005450410608590685,
      "loss": 3.0905,
      "step": 45100
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.122544050216675,
      "learning_rate": 0.0005450387009258349,
      "loss": 3.2062,
      "step": 45101
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.6365737915039062,
      "learning_rate": 0.0005450363409470442,
      "loss": 3.1704,
      "step": 45102
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.9387471675872803,
      "learning_rate": 0.0005450339809226965,
      "loss": 3.2406,
      "step": 45103
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6939406394958496,
      "learning_rate": 0.0005450316208527924,
      "loss": 3.162,
      "step": 45104
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7382103204727173,
      "learning_rate": 0.0005450292607373324,
      "loss": 3.0827,
      "step": 45105
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8748787641525269,
      "learning_rate": 0.0005450269005763166,
      "loss": 3.1355,
      "step": 45106
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.7469515800476074,
      "learning_rate": 0.0005450245403697459,
      "loss": 3.1997,
      "step": 45107
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8859422206878662,
      "learning_rate": 0.0005450221801176203,
      "loss": 3.0377,
      "step": 45108
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5620445013046265,
      "learning_rate": 0.0005450198198199407,
      "loss": 2.933,
      "step": 45109
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7575607299804688,
      "learning_rate": 0.0005450174594767071,
      "loss": 3.3352,
      "step": 45110
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.070905923843384,
      "learning_rate": 0.0005450150990879201,
      "loss": 3.0328,
      "step": 45111
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5059291124343872,
      "learning_rate": 0.0005450127386535802,
      "loss": 3.2187,
      "step": 45112
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5884169340133667,
      "learning_rate": 0.000545010378173688,
      "loss": 2.7608,
      "step": 45113
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4578564167022705,
      "learning_rate": 0.0005450080176482436,
      "loss": 2.9391,
      "step": 45114
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.496268630027771,
      "learning_rate": 0.0005450056570772475,
      "loss": 2.8305,
      "step": 45115
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5607635974884033,
      "learning_rate": 0.0005450032964607002,
      "loss": 3.1116,
      "step": 45116
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6705005168914795,
      "learning_rate": 0.0005450009357986024,
      "loss": 2.8537,
      "step": 45117
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3168058395385742,
      "learning_rate": 0.0005449985750909542,
      "loss": 2.8707,
      "step": 45118
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6057322025299072,
      "learning_rate": 0.000544996214337756,
      "loss": 2.9441,
      "step": 45119
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0424599647521973,
      "learning_rate": 0.0005449938535390086,
      "loss": 2.8181,
      "step": 45120
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.348854422569275,
      "learning_rate": 0.0005449914926947121,
      "loss": 3.2674,
      "step": 45121
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1609725952148438,
      "learning_rate": 0.0005449891318048671,
      "loss": 2.8591,
      "step": 45122
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4484198093414307,
      "learning_rate": 0.000544986770869474,
      "loss": 3.029,
      "step": 45123
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5233378410339355,
      "learning_rate": 0.0005449844098885331,
      "loss": 3.1265,
      "step": 45124
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.589998722076416,
      "learning_rate": 0.0005449820488620451,
      "loss": 3.0689,
      "step": 45125
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.253827452659607,
      "learning_rate": 0.0005449796877900103,
      "loss": 2.9224,
      "step": 45126
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5272865295410156,
      "learning_rate": 0.0005449773266724291,
      "loss": 3.0789,
      "step": 45127
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6308413743972778,
      "learning_rate": 0.0005449749655093021,
      "loss": 3.0768,
      "step": 45128
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5247377157211304,
      "learning_rate": 0.0005449726043006295,
      "loss": 3.0982,
      "step": 45129
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6160863637924194,
      "learning_rate": 0.000544970243046412,
      "loss": 3.0672,
      "step": 45130
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4236326217651367,
      "learning_rate": 0.0005449678817466498,
      "loss": 2.8978,
      "step": 45131
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3931283950805664,
      "learning_rate": 0.0005449655204013434,
      "loss": 3.5203,
      "step": 45132
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9254150390625,
      "learning_rate": 0.0005449631590104934,
      "loss": 2.963,
      "step": 45133
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6915544271469116,
      "learning_rate": 0.0005449607975741001,
      "loss": 3.1035,
      "step": 45134
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.60177481174469,
      "learning_rate": 0.000544958436092164,
      "loss": 3.1142,
      "step": 45135
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1512200832366943,
      "learning_rate": 0.0005449560745646855,
      "loss": 3.2192,
      "step": 45136
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.591109037399292,
      "learning_rate": 0.0005449537129916649,
      "loss": 3.2424,
      "step": 45137
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.623500108718872,
      "learning_rate": 0.0005449513513731029,
      "loss": 2.9903,
      "step": 45138
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4889541864395142,
      "learning_rate": 0.0005449489897089998,
      "loss": 2.9523,
      "step": 45139
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9609369039535522,
      "learning_rate": 0.000544946627999356,
      "loss": 2.911,
      "step": 45140
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8246536254882812,
      "learning_rate": 0.0005449442662441721,
      "loss": 3.344,
      "step": 45141
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.2901558876037598,
      "learning_rate": 0.0005449419044434485,
      "loss": 2.5521,
      "step": 45142
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.7671091556549072,
      "learning_rate": 0.0005449395425971854,
      "loss": 3.1716,
      "step": 45143
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.919955253601074,
      "learning_rate": 0.0005449371807053835,
      "loss": 3.0509,
      "step": 45144
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6560931205749512,
      "learning_rate": 0.0005449348187680431,
      "loss": 3.3417,
      "step": 45145
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.0622122287750244,
      "learning_rate": 0.0005449324567851647,
      "loss": 2.9068,
      "step": 45146
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.2951321601867676,
      "learning_rate": 0.0005449300947567488,
      "loss": 3.064,
      "step": 45147
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.571023941040039,
      "learning_rate": 0.0005449277326827958,
      "loss": 3.276,
      "step": 45148
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7437946796417236,
      "learning_rate": 0.000544925370563306,
      "loss": 3.2512,
      "step": 45149
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8835035562515259,
      "learning_rate": 0.00054492300839828,
      "loss": 3.1407,
      "step": 45150
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5601574182510376,
      "learning_rate": 0.0005449206461877182,
      "loss": 3.2307,
      "step": 45151
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7982747554779053,
      "learning_rate": 0.000544918283931621,
      "loss": 2.8991,
      "step": 45152
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8597098588943481,
      "learning_rate": 0.0005449159216299889,
      "loss": 3.1773,
      "step": 45153
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4170047044754028,
      "learning_rate": 0.0005449135592828223,
      "loss": 3.0889,
      "step": 45154
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2917219400405884,
      "learning_rate": 0.0005449111968901216,
      "loss": 3.0883,
      "step": 45155
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.776247501373291,
      "learning_rate": 0.0005449088344518875,
      "loss": 3.1944,
      "step": 45156
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6809723377227783,
      "learning_rate": 0.0005449064719681199,
      "loss": 3.3728,
      "step": 45157
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5763646364212036,
      "learning_rate": 0.0005449041094388197,
      "loss": 3.2843,
      "step": 45158
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.295975923538208,
      "learning_rate": 0.0005449017468639872,
      "loss": 2.9126,
      "step": 45159
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.540696620941162,
      "learning_rate": 0.0005448993842436229,
      "loss": 3.1392,
      "step": 45160
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.5243067741394043,
      "learning_rate": 0.0005448970215777272,
      "loss": 3.1256,
      "step": 45161
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5089843273162842,
      "learning_rate": 0.0005448946588663005,
      "loss": 2.9683,
      "step": 45162
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0398902893066406,
      "learning_rate": 0.0005448922961093432,
      "loss": 2.9784,
      "step": 45163
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.5215446949005127,
      "learning_rate": 0.0005448899333068557,
      "loss": 2.9896,
      "step": 45164
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8049445152282715,
      "learning_rate": 0.0005448875704588387,
      "loss": 3.2001,
      "step": 45165
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0130248069763184,
      "learning_rate": 0.0005448852075652924,
      "loss": 2.8192,
      "step": 45166
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8225417137145996,
      "learning_rate": 0.0005448828446262174,
      "loss": 3.1909,
      "step": 45167
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.025972843170166,
      "learning_rate": 0.000544880481641614,
      "loss": 3.0292,
      "step": 45168
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.693812370300293,
      "learning_rate": 0.0005448781186114827,
      "loss": 3.1082,
      "step": 45169
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.0607259273529053,
      "learning_rate": 0.000544875755535824,
      "loss": 2.7849,
      "step": 45170
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.0029075145721436,
      "learning_rate": 0.0005448733924146382,
      "loss": 3.0363,
      "step": 45171
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6449787616729736,
      "learning_rate": 0.0005448710292479257,
      "loss": 3.1102,
      "step": 45172
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.058661937713623,
      "learning_rate": 0.0005448686660356873,
      "loss": 3.079,
      "step": 45173
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.290114641189575,
      "learning_rate": 0.000544866302777923,
      "loss": 3.0741,
      "step": 45174
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.439329981803894,
      "learning_rate": 0.0005448639394746336,
      "loss": 3.0461,
      "step": 45175
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.428465485572815,
      "learning_rate": 0.0005448615761258193,
      "loss": 3.3508,
      "step": 45176
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7775492668151855,
      "learning_rate": 0.0005448592127314806,
      "loss": 3.3587,
      "step": 45177
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.4828598499298096,
      "learning_rate": 0.0005448568492916179,
      "loss": 3.1805,
      "step": 45178
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5652351379394531,
      "learning_rate": 0.0005448544858062317,
      "loss": 3.0426,
      "step": 45179
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5605028867721558,
      "learning_rate": 0.0005448521222753225,
      "loss": 2.8775,
      "step": 45180
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7028485536575317,
      "learning_rate": 0.0005448497586988906,
      "loss": 3.173,
      "step": 45181
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8471344709396362,
      "learning_rate": 0.0005448473950769365,
      "loss": 2.9773,
      "step": 45182
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9591726064682007,
      "learning_rate": 0.0005448450314094607,
      "loss": 3.2012,
      "step": 45183
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8263466358184814,
      "learning_rate": 0.0005448426676964637,
      "loss": 3.2188,
      "step": 45184
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1529524326324463,
      "learning_rate": 0.0005448403039379457,
      "loss": 2.8536,
      "step": 45185
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5159646272659302,
      "learning_rate": 0.0005448379401339072,
      "loss": 3.2799,
      "step": 45186
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.844620704650879,
      "learning_rate": 0.0005448355762843488,
      "loss": 3.2238,
      "step": 45187
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.652940034866333,
      "learning_rate": 0.0005448332123892709,
      "loss": 3.0294,
      "step": 45188
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8397120237350464,
      "learning_rate": 0.0005448308484486737,
      "loss": 3.1039,
      "step": 45189
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.042064666748047,
      "learning_rate": 0.0005448284844625579,
      "loss": 2.7766,
      "step": 45190
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4452253580093384,
      "learning_rate": 0.0005448261204309239,
      "loss": 2.9545,
      "step": 45191
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0689165592193604,
      "learning_rate": 0.0005448237563537721,
      "loss": 3.0697,
      "step": 45192
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.777801036834717,
      "learning_rate": 0.0005448213922311029,
      "loss": 3.1912,
      "step": 45193
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7237855195999146,
      "learning_rate": 0.0005448190280629168,
      "loss": 2.9117,
      "step": 45194
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.3097355365753174,
      "learning_rate": 0.0005448166638492143,
      "loss": 3.2051,
      "step": 45195
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.23779559135437,
      "learning_rate": 0.0005448142995899956,
      "loss": 2.8807,
      "step": 45196
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0989437103271484,
      "learning_rate": 0.0005448119352852614,
      "loss": 2.9688,
      "step": 45197
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4861990213394165,
      "learning_rate": 0.000544809570935012,
      "loss": 3.1369,
      "step": 45198
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.806715726852417,
      "learning_rate": 0.000544807206539248,
      "loss": 3.2101,
      "step": 45199
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.6699788570404053,
      "learning_rate": 0.0005448048420979697,
      "loss": 3.1117,
      "step": 45200
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5893255472183228,
      "learning_rate": 0.0005448024776111774,
      "loss": 3.0389,
      "step": 45201
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8023061752319336,
      "learning_rate": 0.0005448001130788718,
      "loss": 3.0741,
      "step": 45202
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.063608407974243,
      "learning_rate": 0.0005447977485010533,
      "loss": 2.788,
      "step": 45203
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.3555610179901123,
      "learning_rate": 0.0005447953838777222,
      "loss": 3.2162,
      "step": 45204
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5020370483398438,
      "learning_rate": 0.000544793019208879,
      "loss": 2.9993,
      "step": 45205
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.216797351837158,
      "learning_rate": 0.0005447906544945242,
      "loss": 3.0295,
      "step": 45206
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.6327853202819824,
      "learning_rate": 0.0005447882897346581,
      "loss": 3.0041,
      "step": 45207
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.024639368057251,
      "learning_rate": 0.0005447859249292814,
      "loss": 3.0285,
      "step": 45208
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4797539710998535,
      "learning_rate": 0.0005447835600783942,
      "loss": 3.1913,
      "step": 45209
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6212093830108643,
      "learning_rate": 0.0005447811951819972,
      "loss": 3.0781,
      "step": 45210
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.8030407428741455,
      "learning_rate": 0.0005447788302400909,
      "loss": 3.1829,
      "step": 45211
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4781200885772705,
      "learning_rate": 0.0005447764652526753,
      "loss": 3.1882,
      "step": 45212
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.679400682449341,
      "learning_rate": 0.0005447741002197513,
      "loss": 2.8267,
      "step": 45213
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6357436180114746,
      "learning_rate": 0.0005447717351413192,
      "loss": 3.1462,
      "step": 45214
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6347543001174927,
      "learning_rate": 0.0005447693700173793,
      "loss": 3.1234,
      "step": 45215
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2588024139404297,
      "learning_rate": 0.0005447670048479323,
      "loss": 3.0779,
      "step": 45216
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.165200710296631,
      "learning_rate": 0.0005447646396329784,
      "loss": 2.8092,
      "step": 45217
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7403820753097534,
      "learning_rate": 0.0005447622743725183,
      "loss": 3.2026,
      "step": 45218
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.581396222114563,
      "learning_rate": 0.0005447599090665521,
      "loss": 2.9951,
      "step": 45219
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.62180495262146,
      "learning_rate": 0.0005447575437150803,
      "loss": 3.0794,
      "step": 45220
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1569113731384277,
      "learning_rate": 0.0005447551783181037,
      "loss": 3.1792,
      "step": 45221
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.617113471031189,
      "learning_rate": 0.0005447528128756224,
      "loss": 2.9715,
      "step": 45222
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4165186882019043,
      "learning_rate": 0.0005447504473876369,
      "loss": 2.758,
      "step": 45223
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5175559520721436,
      "learning_rate": 0.0005447480818541477,
      "loss": 3.476,
      "step": 45224
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9082190990447998,
      "learning_rate": 0.0005447457162751552,
      "loss": 2.9861,
      "step": 45225
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.161369800567627,
      "learning_rate": 0.0005447433506506599,
      "loss": 2.8105,
      "step": 45226
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5381195545196533,
      "learning_rate": 0.0005447409849806622,
      "loss": 3.1286,
      "step": 45227
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.256124496459961,
      "learning_rate": 0.0005447386192651624,
      "loss": 2.9417,
      "step": 45228
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.4296774864196777,
      "learning_rate": 0.0005447362535041611,
      "loss": 3.2263,
      "step": 45229
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.476460337638855,
      "learning_rate": 0.0005447338876976588,
      "loss": 3.0569,
      "step": 45230
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6165926456451416,
      "learning_rate": 0.0005447315218456558,
      "loss": 3.194,
      "step": 45231
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7781107425689697,
      "learning_rate": 0.0005447291559481526,
      "loss": 3.0936,
      "step": 45232
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.565398931503296,
      "learning_rate": 0.0005447267900051497,
      "loss": 2.8699,
      "step": 45233
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.512378215789795,
      "learning_rate": 0.0005447244240166473,
      "loss": 3.0535,
      "step": 45234
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.4005470275878906,
      "learning_rate": 0.0005447220579826461,
      "loss": 3.031,
      "step": 45235
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9465417861938477,
      "learning_rate": 0.0005447196919031466,
      "loss": 3.1442,
      "step": 45236
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5298988819122314,
      "learning_rate": 0.0005447173257781489,
      "loss": 2.9045,
      "step": 45237
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.4353723526000977,
      "learning_rate": 0.0005447149596076536,
      "loss": 3.1001,
      "step": 45238
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.9822018146514893,
      "learning_rate": 0.0005447125933916613,
      "loss": 3.0073,
      "step": 45239
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5661729574203491,
      "learning_rate": 0.0005447102271301722,
      "loss": 3.0871,
      "step": 45240
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4439036846160889,
      "learning_rate": 0.000544707860823187,
      "loss": 3.0462,
      "step": 45241
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9400193691253662,
      "learning_rate": 0.0005447054944707058,
      "loss": 3.2327,
      "step": 45242
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.296513795852661,
      "learning_rate": 0.0005447031280727294,
      "loss": 2.9336,
      "step": 45243
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5068093538284302,
      "learning_rate": 0.000544700761629258,
      "loss": 3.0225,
      "step": 45244
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.512399673461914,
      "learning_rate": 0.0005446983951402922,
      "loss": 3.1007,
      "step": 45245
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.7340962886810303,
      "learning_rate": 0.0005446960286058321,
      "loss": 3.0444,
      "step": 45246
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.01118540763855,
      "learning_rate": 0.0005446936620258786,
      "loss": 2.9427,
      "step": 45247
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7461309432983398,
      "learning_rate": 0.0005446912954004318,
      "loss": 3.0914,
      "step": 45248
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5091477632522583,
      "learning_rate": 0.0005446889287294924,
      "loss": 2.7246,
      "step": 45249
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.572761297225952,
      "learning_rate": 0.0005446865620130607,
      "loss": 2.9083,
      "step": 45250
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.9945483207702637,
      "learning_rate": 0.0005446841952511371,
      "loss": 3.0351,
      "step": 45251
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1472063064575195,
      "learning_rate": 0.000544681828443722,
      "loss": 3.2131,
      "step": 45252
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5942686796188354,
      "learning_rate": 0.0005446794615908161,
      "loss": 3.1082,
      "step": 45253
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5958163738250732,
      "learning_rate": 0.0005446770946924195,
      "loss": 3.1789,
      "step": 45254
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.049691677093506,
      "learning_rate": 0.000544674727748533,
      "loss": 3.3913,
      "step": 45255
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.5589537620544434,
      "learning_rate": 0.0005446723607591568,
      "loss": 3.1082,
      "step": 45256
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5221115350723267,
      "learning_rate": 0.0005446699937242912,
      "loss": 3.1921,
      "step": 45257
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8889321088790894,
      "learning_rate": 0.000544667626643937,
      "loss": 2.9354,
      "step": 45258
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8830807209014893,
      "learning_rate": 0.0005446652595180945,
      "loss": 3.0979,
      "step": 45259
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.758198618888855,
      "learning_rate": 0.000544662892346764,
      "loss": 3.0957,
      "step": 45260
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5931782722473145,
      "learning_rate": 0.0005446605251299461,
      "loss": 3.1526,
      "step": 45261
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1625025272369385,
      "learning_rate": 0.0005446581578676412,
      "loss": 2.8928,
      "step": 45262
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7534266710281372,
      "learning_rate": 0.0005446557905598497,
      "loss": 2.898,
      "step": 45263
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.3930037021636963,
      "learning_rate": 0.0005446534232065721,
      "loss": 3.0045,
      "step": 45264
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7238620519638062,
      "learning_rate": 0.0005446510558078088,
      "loss": 3.1203,
      "step": 45265
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.186992883682251,
      "learning_rate": 0.0005446486883635603,
      "loss": 3.0241,
      "step": 45266
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.018728494644165,
      "learning_rate": 0.0005446463208738269,
      "loss": 3.1462,
      "step": 45267
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6424049139022827,
      "learning_rate": 0.0005446439533386092,
      "loss": 3.0331,
      "step": 45268
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.537620186805725,
      "learning_rate": 0.0005446415857579076,
      "loss": 3.2506,
      "step": 45269
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.15618896484375,
      "learning_rate": 0.0005446392181317224,
      "loss": 3.0015,
      "step": 45270
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.792810082435608,
      "learning_rate": 0.0005446368504600542,
      "loss": 2.94,
      "step": 45271
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3848745822906494,
      "learning_rate": 0.0005446344827429034,
      "loss": 3.2719,
      "step": 45272
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3681823015213013,
      "learning_rate": 0.0005446321149802704,
      "loss": 3.048,
      "step": 45273
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3820234537124634,
      "learning_rate": 0.0005446297471721558,
      "loss": 2.7569,
      "step": 45274
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8850080966949463,
      "learning_rate": 0.0005446273793185597,
      "loss": 3.234,
      "step": 45275
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.3333890438079834,
      "learning_rate": 0.000544625011419483,
      "loss": 2.7826,
      "step": 45276
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4144939184188843,
      "learning_rate": 0.0005446226434749257,
      "loss": 3.0878,
      "step": 45277
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.4881060123443604,
      "learning_rate": 0.0005446202754848884,
      "loss": 3.1055,
      "step": 45278
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.2182931900024414,
      "learning_rate": 0.0005446179074493718,
      "loss": 3.0492,
      "step": 45279
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3729655742645264,
      "learning_rate": 0.0005446155393683758,
      "loss": 2.9295,
      "step": 45280
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8789474964141846,
      "learning_rate": 0.0005446131712419015,
      "loss": 2.9411,
      "step": 45281
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6795060634613037,
      "learning_rate": 0.0005446108030699488,
      "loss": 3.0645,
      "step": 45282
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3863991498947144,
      "learning_rate": 0.0005446084348525183,
      "loss": 3.1741,
      "step": 45283
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7508060932159424,
      "learning_rate": 0.0005446060665896106,
      "loss": 3.3919,
      "step": 45284
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.611582040786743,
      "learning_rate": 0.0005446036982812259,
      "loss": 3.0598,
      "step": 45285
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5995336771011353,
      "learning_rate": 0.0005446013299273649,
      "loss": 3.0796,
      "step": 45286
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1645572185516357,
      "learning_rate": 0.0005445989615280278,
      "loss": 3.1919,
      "step": 45287
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6471164226531982,
      "learning_rate": 0.0005445965930832151,
      "loss": 3.0436,
      "step": 45288
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.592881917953491,
      "learning_rate": 0.0005445942245929273,
      "loss": 2.8318,
      "step": 45289
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3702534437179565,
      "learning_rate": 0.0005445918560571648,
      "loss": 2.9698,
      "step": 45290
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.551702857017517,
      "learning_rate": 0.0005445894874759281,
      "loss": 3.2196,
      "step": 45291
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9603160619735718,
      "learning_rate": 0.0005445871188492175,
      "loss": 2.9598,
      "step": 45292
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5405606031417847,
      "learning_rate": 0.0005445847501770336,
      "loss": 3.37,
      "step": 45293
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7903616428375244,
      "learning_rate": 0.0005445823814593768,
      "loss": 3.2841,
      "step": 45294
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.5632877349853516,
      "learning_rate": 0.0005445800126962476,
      "loss": 3.1269,
      "step": 45295
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0307772159576416,
      "learning_rate": 0.0005445776438876463,
      "loss": 2.927,
      "step": 45296
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5335779190063477,
      "learning_rate": 0.0005445752750335733,
      "loss": 3.1612,
      "step": 45297
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1234922409057617,
      "learning_rate": 0.0005445729061340293,
      "loss": 3.0523,
      "step": 45298
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.278958559036255,
      "learning_rate": 0.0005445705371890145,
      "loss": 2.8818,
      "step": 45299
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7809207439422607,
      "learning_rate": 0.0005445681681985294,
      "loss": 3.0478,
      "step": 45300
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8919087648391724,
      "learning_rate": 0.0005445657991625744,
      "loss": 3.1713,
      "step": 45301
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.906396508216858,
      "learning_rate": 0.0005445634300811501,
      "loss": 3.1657,
      "step": 45302
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.386753797531128,
      "learning_rate": 0.0005445610609542569,
      "loss": 3.2002,
      "step": 45303
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.594175100326538,
      "learning_rate": 0.000544558691781895,
      "loss": 3.0938,
      "step": 45304
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7521823644638062,
      "learning_rate": 0.0005445563225640653,
      "loss": 3.0929,
      "step": 45305
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.193894624710083,
      "learning_rate": 0.0005445539533007677,
      "loss": 2.834,
      "step": 45306
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2208478450775146,
      "learning_rate": 0.000544551583992003,
      "loss": 2.9455,
      "step": 45307
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5839873552322388,
      "learning_rate": 0.0005445492146377715,
      "loss": 2.8877,
      "step": 45308
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.529923915863037,
      "learning_rate": 0.0005445468452380738,
      "loss": 3.1971,
      "step": 45309
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.7437357902526855,
      "learning_rate": 0.0005445444757929101,
      "loss": 2.9199,
      "step": 45310
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0133559703826904,
      "learning_rate": 0.000544542106302281,
      "loss": 2.9121,
      "step": 45311
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5080785751342773,
      "learning_rate": 0.0005445397367661869,
      "loss": 3.0922,
      "step": 45312
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4035001993179321,
      "learning_rate": 0.0005445373671846283,
      "loss": 2.9385,
      "step": 45313
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4335185289382935,
      "learning_rate": 0.0005445349975576056,
      "loss": 2.8923,
      "step": 45314
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6760870218276978,
      "learning_rate": 0.0005445326278851191,
      "loss": 3.1209,
      "step": 45315
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7878243923187256,
      "learning_rate": 0.0005445302581671695,
      "loss": 2.8789,
      "step": 45316
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.51518976688385,
      "learning_rate": 0.0005445278884037571,
      "loss": 2.8232,
      "step": 45317
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5018819570541382,
      "learning_rate": 0.0005445255185948822,
      "loss": 2.9766,
      "step": 45318
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5221294164657593,
      "learning_rate": 0.0005445231487405456,
      "loss": 3.1437,
      "step": 45319
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.39384925365448,
      "learning_rate": 0.0005445207788407474,
      "loss": 3.181,
      "step": 45320
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.395937442779541,
      "learning_rate": 0.0005445184088954882,
      "loss": 3.183,
      "step": 45321
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5280581712722778,
      "learning_rate": 0.0005445160389047684,
      "loss": 2.9705,
      "step": 45322
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6298357248306274,
      "learning_rate": 0.0005445136688685885,
      "loss": 2.9763,
      "step": 45323
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.134624481201172,
      "learning_rate": 0.0005445112987869488,
      "loss": 3.0505,
      "step": 45324
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5945483446121216,
      "learning_rate": 0.0005445089286598499,
      "loss": 3.1058,
      "step": 45325
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7297534942626953,
      "learning_rate": 0.0005445065584872922,
      "loss": 3.1716,
      "step": 45326
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.291490316390991,
      "learning_rate": 0.000544504188269276,
      "loss": 3.1967,
      "step": 45327
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4979408979415894,
      "learning_rate": 0.000544501818005802,
      "loss": 3.1532,
      "step": 45328
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.5453693866729736,
      "learning_rate": 0.0005444994476968704,
      "loss": 3.146,
      "step": 45329
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.2677791118621826,
      "learning_rate": 0.0005444970773424818,
      "loss": 3.0834,
      "step": 45330
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.639693260192871,
      "learning_rate": 0.0005444947069426366,
      "loss": 3.1069,
      "step": 45331
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6335270404815674,
      "learning_rate": 0.000544492336497335,
      "loss": 3.2687,
      "step": 45332
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.262194037437439,
      "learning_rate": 0.0005444899660065779,
      "loss": 3.1857,
      "step": 45333
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.422136664390564,
      "learning_rate": 0.0005444875954703655,
      "loss": 2.9557,
      "step": 45334
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.341591238975525,
      "learning_rate": 0.0005444852248886982,
      "loss": 3.1629,
      "step": 45335
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.968169093132019,
      "learning_rate": 0.0005444828542615764,
      "loss": 2.9251,
      "step": 45336
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2994320392608643,
      "learning_rate": 0.0005444804835890006,
      "loss": 3.0825,
      "step": 45337
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.748840093612671,
      "learning_rate": 0.0005444781128709714,
      "loss": 3.065,
      "step": 45338
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6453163623809814,
      "learning_rate": 0.0005444757421074891,
      "loss": 3.0972,
      "step": 45339
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.601251244544983,
      "learning_rate": 0.000544473371298554,
      "loss": 2.9879,
      "step": 45340
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1631455421447754,
      "learning_rate": 0.0005444710004441669,
      "loss": 2.9974,
      "step": 45341
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4696710109710693,
      "learning_rate": 0.0005444686295443279,
      "loss": 3.272,
      "step": 45342
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7203614711761475,
      "learning_rate": 0.0005444662585990375,
      "loss": 3.167,
      "step": 45343
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8342552185058594,
      "learning_rate": 0.0005444638876082962,
      "loss": 3.0518,
      "step": 45344
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8930414915084839,
      "learning_rate": 0.0005444615165721046,
      "loss": 3.1032,
      "step": 45345
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7411104440689087,
      "learning_rate": 0.000544459145490463,
      "loss": 2.9289,
      "step": 45346
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6432132720947266,
      "learning_rate": 0.0005444567743633716,
      "loss": 2.9521,
      "step": 45347
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.9778947830200195,
      "learning_rate": 0.0005444544031908313,
      "loss": 3.183,
      "step": 45348
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5742387771606445,
      "learning_rate": 0.0005444520319728422,
      "loss": 3.2455,
      "step": 45349
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.331898808479309,
      "learning_rate": 0.0005444496607094048,
      "loss": 2.9035,
      "step": 45350
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6230344772338867,
      "learning_rate": 0.0005444472894005197,
      "loss": 3.3886,
      "step": 45351
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4742251634597778,
      "learning_rate": 0.0005444449180461873,
      "loss": 3.2481,
      "step": 45352
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2591006755828857,
      "learning_rate": 0.0005444425466464078,
      "loss": 3.0272,
      "step": 45353
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.49127995967865,
      "learning_rate": 0.0005444401752011819,
      "loss": 3.2702,
      "step": 45354
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.038496732711792,
      "learning_rate": 0.00054443780371051,
      "loss": 3.1539,
      "step": 45355
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9292837381362915,
      "learning_rate": 0.0005444354321743924,
      "loss": 2.9899,
      "step": 45356
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6501003503799438,
      "learning_rate": 0.0005444330605928297,
      "loss": 3.194,
      "step": 45357
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1395411491394043,
      "learning_rate": 0.0005444306889658223,
      "loss": 3.3351,
      "step": 45358
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5512069463729858,
      "learning_rate": 0.0005444283172933706,
      "loss": 2.9557,
      "step": 45359
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9047527313232422,
      "learning_rate": 0.0005444259455754751,
      "loss": 2.7886,
      "step": 45360
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8550529479980469,
      "learning_rate": 0.0005444235738121361,
      "loss": 3.0056,
      "step": 45361
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5378532409667969,
      "learning_rate": 0.0005444212020033543,
      "loss": 3.1329,
      "step": 45362
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6995408535003662,
      "learning_rate": 0.0005444188301491298,
      "loss": 2.8605,
      "step": 45363
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3552180528640747,
      "learning_rate": 0.0005444164582494634,
      "loss": 3.0337,
      "step": 45364
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2897799015045166,
      "learning_rate": 0.0005444140863043552,
      "loss": 2.9463,
      "step": 45365
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.6357650756835938,
      "learning_rate": 0.0005444117143138059,
      "loss": 3.1687,
      "step": 45366
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8846385478973389,
      "learning_rate": 0.0005444093422778158,
      "loss": 3.4079,
      "step": 45367
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.259731650352478,
      "learning_rate": 0.0005444069701963855,
      "loss": 3.024,
      "step": 45368
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6686064004898071,
      "learning_rate": 0.0005444045980695152,
      "loss": 2.9575,
      "step": 45369
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.850040316581726,
      "learning_rate": 0.0005444022258972056,
      "loss": 3.0442,
      "step": 45370
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.031862497329712,
      "learning_rate": 0.000544399853679457,
      "loss": 3.2357,
      "step": 45371
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8445782661437988,
      "learning_rate": 0.0005443974814162698,
      "loss": 3.0888,
      "step": 45372
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.627072811126709,
      "learning_rate": 0.0005443951091076444,
      "loss": 3.0552,
      "step": 45373
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6715419292449951,
      "learning_rate": 0.0005443927367535814,
      "loss": 3.1534,
      "step": 45374
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.252272844314575,
      "learning_rate": 0.0005443903643540812,
      "loss": 3.2736,
      "step": 45375
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8241924047470093,
      "learning_rate": 0.0005443879919091442,
      "loss": 3.1219,
      "step": 45376
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.589808464050293,
      "learning_rate": 0.0005443856194187709,
      "loss": 2.9561,
      "step": 45377
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.6596059799194336,
      "learning_rate": 0.0005443832468829616,
      "loss": 2.9846,
      "step": 45378
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.4918508529663086,
      "learning_rate": 0.000544380874301717,
      "loss": 3.0434,
      "step": 45379
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.5716025829315186,
      "learning_rate": 0.0005443785016750373,
      "loss": 3.1485,
      "step": 45380
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6691339015960693,
      "learning_rate": 0.0005443761290029231,
      "loss": 2.8002,
      "step": 45381
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0131561756134033,
      "learning_rate": 0.0005443737562853746,
      "loss": 2.9913,
      "step": 45382
    },
    {
      "epoch": 0.59,
      "grad_norm": 4.455365180969238,
      "learning_rate": 0.0005443713835223925,
      "loss": 2.8907,
      "step": 45383
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.6608521938323975,
      "learning_rate": 0.0005443690107139772,
      "loss": 2.9626,
      "step": 45384
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8258836269378662,
      "learning_rate": 0.000544366637860129,
      "loss": 3.261,
      "step": 45385
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5662883520126343,
      "learning_rate": 0.0005443642649608485,
      "loss": 3.0844,
      "step": 45386
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.663301944732666,
      "learning_rate": 0.000544361892016136,
      "loss": 3.1821,
      "step": 45387
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.043599843978882,
      "learning_rate": 0.000544359519025992,
      "loss": 2.8746,
      "step": 45388
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2504987716674805,
      "learning_rate": 0.000544357145990417,
      "loss": 2.9589,
      "step": 45389
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4719183444976807,
      "learning_rate": 0.0005443547729094114,
      "loss": 2.9516,
      "step": 45390
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6990193128585815,
      "learning_rate": 0.0005443523997829757,
      "loss": 2.8028,
      "step": 45391
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5062888860702515,
      "learning_rate": 0.0005443500266111101,
      "loss": 2.9704,
      "step": 45392
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5871038436889648,
      "learning_rate": 0.0005443476533938154,
      "loss": 3.1875,
      "step": 45393
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6703122854232788,
      "learning_rate": 0.0005443452801310917,
      "loss": 2.9129,
      "step": 45394
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7234489917755127,
      "learning_rate": 0.0005443429068229397,
      "loss": 2.9274,
      "step": 45395
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2455461025238037,
      "learning_rate": 0.0005443405334693598,
      "loss": 3.2335,
      "step": 45396
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6882307529449463,
      "learning_rate": 0.0005443381600703523,
      "loss": 2.9148,
      "step": 45397
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3624937534332275,
      "learning_rate": 0.0005443357866259177,
      "loss": 3.0424,
      "step": 45398
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.387389063835144,
      "learning_rate": 0.0005443334131360564,
      "loss": 3.0002,
      "step": 45399
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4347422122955322,
      "learning_rate": 0.0005443310396007691,
      "loss": 3.1085,
      "step": 45400
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6338658332824707,
      "learning_rate": 0.0005443286660200559,
      "loss": 3.2444,
      "step": 45401
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4819921255111694,
      "learning_rate": 0.0005443262923939175,
      "loss": 2.9398,
      "step": 45402
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4365068674087524,
      "learning_rate": 0.0005443239187223541,
      "loss": 2.9816,
      "step": 45403
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6725529432296753,
      "learning_rate": 0.0005443215450053664,
      "loss": 2.9393,
      "step": 45404
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5305497646331787,
      "learning_rate": 0.0005443191712429545,
      "loss": 3.232,
      "step": 45405
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5458754301071167,
      "learning_rate": 0.0005443167974351193,
      "loss": 3.0399,
      "step": 45406
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5962393283843994,
      "learning_rate": 0.0005443144235818609,
      "loss": 3.0249,
      "step": 45407
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9215638637542725,
      "learning_rate": 0.0005443120496831799,
      "loss": 3.2351,
      "step": 45408
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4694987535476685,
      "learning_rate": 0.0005443096757390765,
      "loss": 2.9762,
      "step": 45409
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.798403263092041,
      "learning_rate": 0.0005443073017495515,
      "loss": 2.9221,
      "step": 45410
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5289970636367798,
      "learning_rate": 0.0005443049277146051,
      "loss": 3.1952,
      "step": 45411
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8832162618637085,
      "learning_rate": 0.0005443025536342377,
      "loss": 3.2926,
      "step": 45412
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6497098207473755,
      "learning_rate": 0.00054430017950845,
      "loss": 3.2277,
      "step": 45413
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6178189516067505,
      "learning_rate": 0.0005442978053372423,
      "loss": 2.8327,
      "step": 45414
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5817753076553345,
      "learning_rate": 0.0005442954311206149,
      "loss": 3.1852,
      "step": 45415
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7429063320159912,
      "learning_rate": 0.0005442930568585685,
      "loss": 2.8635,
      "step": 45416
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.537461519241333,
      "learning_rate": 0.0005442906825511032,
      "loss": 2.9175,
      "step": 45417
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3895643949508667,
      "learning_rate": 0.0005442883081982199,
      "loss": 3.0325,
      "step": 45418
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3583793640136719,
      "learning_rate": 0.0005442859337999186,
      "loss": 2.9814,
      "step": 45419
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7863144874572754,
      "learning_rate": 0.0005442835593562,
      "loss": 2.9255,
      "step": 45420
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4141736030578613,
      "learning_rate": 0.0005442811848670644,
      "loss": 3.0683,
      "step": 45421
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4657700061798096,
      "learning_rate": 0.0005442788103325125,
      "loss": 3.0604,
      "step": 45422
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5189234018325806,
      "learning_rate": 0.0005442764357525444,
      "loss": 3.2435,
      "step": 45423
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4144607782363892,
      "learning_rate": 0.0005442740611271608,
      "loss": 3.0037,
      "step": 45424
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4989140033721924,
      "learning_rate": 0.0005442716864563621,
      "loss": 3.0006,
      "step": 45425
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5024172067642212,
      "learning_rate": 0.0005442693117401486,
      "loss": 3.1265,
      "step": 45426
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1926958560943604,
      "learning_rate": 0.0005442669369785207,
      "loss": 3.0488,
      "step": 45427
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.752723217010498,
      "learning_rate": 0.0005442645621714791,
      "loss": 3.1709,
      "step": 45428
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3344587087631226,
      "learning_rate": 0.000544262187319024,
      "loss": 3.2421,
      "step": 45429
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.919251799583435,
      "learning_rate": 0.000544259812421156,
      "loss": 3.0722,
      "step": 45430
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4802405834197998,
      "learning_rate": 0.0005442574374778756,
      "loss": 3.1034,
      "step": 45431
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.5380704402923584,
      "learning_rate": 0.000544255062489183,
      "loss": 3.0717,
      "step": 45432
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.479948878288269,
      "learning_rate": 0.0005442526874550788,
      "loss": 3.1253,
      "step": 45433
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8551758527755737,
      "learning_rate": 0.0005442503123755635,
      "loss": 3.1692,
      "step": 45434
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7727079391479492,
      "learning_rate": 0.0005442479372506373,
      "loss": 3.1377,
      "step": 45435
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3082576990127563,
      "learning_rate": 0.000544245562080301,
      "loss": 3.1312,
      "step": 45436
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.432449460029602,
      "learning_rate": 0.0005442431868645547,
      "loss": 2.79,
      "step": 45437
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5287715196609497,
      "learning_rate": 0.0005442408116033989,
      "loss": 3.013,
      "step": 45438
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7931100130081177,
      "learning_rate": 0.0005442384362968343,
      "loss": 2.9848,
      "step": 45439
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.937697410583496,
      "learning_rate": 0.000544236060944861,
      "loss": 3.461,
      "step": 45440
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7434910535812378,
      "learning_rate": 0.0005442336855474798,
      "loss": 3.3496,
      "step": 45441
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5590119361877441,
      "learning_rate": 0.0005442313101046909,
      "loss": 3.0537,
      "step": 45442
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7460187673568726,
      "learning_rate": 0.0005442289346164948,
      "loss": 2.982,
      "step": 45443
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.651427149772644,
      "learning_rate": 0.0005442265590828918,
      "loss": 2.9408,
      "step": 45444
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4642014503479004,
      "learning_rate": 0.0005442241835038825,
      "loss": 2.9563,
      "step": 45445
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.76099693775177,
      "learning_rate": 0.0005442218078794673,
      "loss": 3.0166,
      "step": 45446
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4238884449005127,
      "learning_rate": 0.0005442194322096468,
      "loss": 2.9914,
      "step": 45447
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5677653551101685,
      "learning_rate": 0.0005442170564944212,
      "loss": 3.2302,
      "step": 45448
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7913110256195068,
      "learning_rate": 0.000544214680733791,
      "loss": 3.097,
      "step": 45449
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4600582122802734,
      "learning_rate": 0.0005442123049277568,
      "loss": 3.0589,
      "step": 45450
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3937947750091553,
      "learning_rate": 0.0005442099290763188,
      "loss": 3.0571,
      "step": 45451
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6528915166854858,
      "learning_rate": 0.0005442075531794776,
      "loss": 3.091,
      "step": 45452
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.336534023284912,
      "learning_rate": 0.0005442051772372336,
      "loss": 3.0852,
      "step": 45453
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.4455296993255615,
      "learning_rate": 0.0005442028012495873,
      "loss": 3.1001,
      "step": 45454
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.640290379524231,
      "learning_rate": 0.0005442004252165392,
      "loss": 3.0253,
      "step": 45455
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4097591638565063,
      "learning_rate": 0.0005441980491380894,
      "loss": 2.9103,
      "step": 45456
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.833583950996399,
      "learning_rate": 0.0005441956730142387,
      "loss": 3.1959,
      "step": 45457
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4007112979888916,
      "learning_rate": 0.0005441932968449873,
      "loss": 3.1008,
      "step": 45458
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.430114984512329,
      "learning_rate": 0.0005441909206303359,
      "loss": 3.0411,
      "step": 45459
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4821767807006836,
      "learning_rate": 0.0005441885443702848,
      "loss": 2.8073,
      "step": 45460
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0622780323028564,
      "learning_rate": 0.0005441861680648343,
      "loss": 2.8477,
      "step": 45461
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4932881593704224,
      "learning_rate": 0.000544183791713985,
      "loss": 2.9087,
      "step": 45462
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7909913063049316,
      "learning_rate": 0.0005441814153177374,
      "loss": 3.142,
      "step": 45463
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0234122276306152,
      "learning_rate": 0.0005441790388760919,
      "loss": 3.0213,
      "step": 45464
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.6137006282806396,
      "learning_rate": 0.0005441766623890488,
      "loss": 3.2368,
      "step": 45465
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.714358925819397,
      "learning_rate": 0.0005441742858566087,
      "loss": 3.0949,
      "step": 45466
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.628631830215454,
      "learning_rate": 0.000544171909278772,
      "loss": 3.0822,
      "step": 45467
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7547434568405151,
      "learning_rate": 0.0005441695326555392,
      "loss": 3.0432,
      "step": 45468
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.726981520652771,
      "learning_rate": 0.0005441671559869106,
      "loss": 3.1636,
      "step": 45469
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4998074769973755,
      "learning_rate": 0.0005441647792728866,
      "loss": 3.2781,
      "step": 45470
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8763575553894043,
      "learning_rate": 0.0005441624025134679,
      "loss": 2.8698,
      "step": 45471
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6843675374984741,
      "learning_rate": 0.0005441600257086547,
      "loss": 3.0085,
      "step": 45472
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4287502765655518,
      "learning_rate": 0.0005441576488584477,
      "loss": 2.9716,
      "step": 45473
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.348263144493103,
      "learning_rate": 0.0005441552719628472,
      "loss": 2.9818,
      "step": 45474
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9642102718353271,
      "learning_rate": 0.0005441528950218535,
      "loss": 3.313,
      "step": 45475
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5969927310943604,
      "learning_rate": 0.0005441505180354671,
      "loss": 2.9372,
      "step": 45476
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3842742443084717,
      "learning_rate": 0.0005441481410036887,
      "loss": 3.0553,
      "step": 45477
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2301470041275024,
      "learning_rate": 0.0005441457639265184,
      "loss": 3.1491,
      "step": 45478
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9698734283447266,
      "learning_rate": 0.0005441433868039569,
      "loss": 3.0733,
      "step": 45479
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.65463125705719,
      "learning_rate": 0.0005441410096360045,
      "loss": 3.1249,
      "step": 45480
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4312021732330322,
      "learning_rate": 0.0005441386324226617,
      "loss": 2.7873,
      "step": 45481
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6481748819351196,
      "learning_rate": 0.0005441362551639289,
      "loss": 3.1847,
      "step": 45482
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4693249464035034,
      "learning_rate": 0.0005441338778598065,
      "loss": 3.0804,
      "step": 45483
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.482404351234436,
      "learning_rate": 0.000544131500510295,
      "loss": 2.8489,
      "step": 45484
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6026866436004639,
      "learning_rate": 0.000544129123115395,
      "loss": 3.18,
      "step": 45485
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4738517999649048,
      "learning_rate": 0.0005441267456751067,
      "loss": 3.1165,
      "step": 45486
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.561155080795288,
      "learning_rate": 0.0005441243681894305,
      "loss": 3.1434,
      "step": 45487
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5867453813552856,
      "learning_rate": 0.0005441219906583671,
      "loss": 3.2189,
      "step": 45488
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0871827602386475,
      "learning_rate": 0.0005441196130819169,
      "loss": 3.2113,
      "step": 45489
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7555947303771973,
      "learning_rate": 0.00054411723546008,
      "loss": 3.0257,
      "step": 45490
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.4884955883026123,
      "learning_rate": 0.0005441148577928573,
      "loss": 2.8924,
      "step": 45491
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3249059915542603,
      "learning_rate": 0.0005441124800802491,
      "loss": 3.4379,
      "step": 45492
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7503817081451416,
      "learning_rate": 0.0005441101023222557,
      "loss": 3.1688,
      "step": 45493
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.301132917404175,
      "learning_rate": 0.0005441077245188776,
      "loss": 2.8763,
      "step": 45494
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.334557294845581,
      "learning_rate": 0.0005441053466701152,
      "loss": 3.2409,
      "step": 45495
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7150851488113403,
      "learning_rate": 0.000544102968775969,
      "loss": 3.0611,
      "step": 45496
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0782878398895264,
      "learning_rate": 0.0005441005908364396,
      "loss": 2.995,
      "step": 45497
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6923184394836426,
      "learning_rate": 0.0005440982128515272,
      "loss": 3.3066,
      "step": 45498
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.5505645275115967,
      "learning_rate": 0.0005440958348212324,
      "loss": 2.8065,
      "step": 45499
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.655460000038147,
      "learning_rate": 0.0005440934567455555,
      "loss": 3.2699,
      "step": 45500
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.847415566444397,
      "learning_rate": 0.000544091078624497,
      "loss": 3.098,
      "step": 45501
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.0764358043670654,
      "learning_rate": 0.0005440887004580574,
      "loss": 3.0847,
      "step": 45502
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9915330410003662,
      "learning_rate": 0.0005440863222462372,
      "loss": 3.0563,
      "step": 45503
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.478750467300415,
      "learning_rate": 0.0005440839439890366,
      "loss": 3.2067,
      "step": 45504
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5719164609909058,
      "learning_rate": 0.0005440815656864563,
      "loss": 3.0957,
      "step": 45505
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5483301877975464,
      "learning_rate": 0.0005440791873384965,
      "loss": 3.038,
      "step": 45506
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8366261720657349,
      "learning_rate": 0.0005440768089451579,
      "loss": 3.1087,
      "step": 45507
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3701822757720947,
      "learning_rate": 0.0005440744305064407,
      "loss": 3.0408,
      "step": 45508
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.6102874279022217,
      "learning_rate": 0.0005440720520223456,
      "loss": 3.0532,
      "step": 45509
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.2220005989074707,
      "learning_rate": 0.0005440696734928728,
      "loss": 3.3661,
      "step": 45510
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6118539571762085,
      "learning_rate": 0.0005440672949180228,
      "loss": 3.1223,
      "step": 45511
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.018592357635498,
      "learning_rate": 0.0005440649162977961,
      "loss": 3.127,
      "step": 45512
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7735086679458618,
      "learning_rate": 0.0005440625376321932,
      "loss": 3.2312,
      "step": 45513
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5096471309661865,
      "learning_rate": 0.0005440601589212145,
      "loss": 2.9211,
      "step": 45514
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4749408960342407,
      "learning_rate": 0.0005440577801648603,
      "loss": 3.1225,
      "step": 45515
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2731294631958008,
      "learning_rate": 0.0005440554013631312,
      "loss": 2.8293,
      "step": 45516
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4220380783081055,
      "learning_rate": 0.0005440530225160276,
      "loss": 2.9273,
      "step": 45517
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.540517807006836,
      "learning_rate": 0.0005440506436235499,
      "loss": 3.1744,
      "step": 45518
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4122438430786133,
      "learning_rate": 0.0005440482646856986,
      "loss": 2.8883,
      "step": 45519
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9368901252746582,
      "learning_rate": 0.0005440458857024741,
      "loss": 3.295,
      "step": 45520
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7106467485427856,
      "learning_rate": 0.0005440435066738768,
      "loss": 2.7671,
      "step": 45521
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1453301906585693,
      "learning_rate": 0.0005440411275999074,
      "loss": 2.9388,
      "step": 45522
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.892376184463501,
      "learning_rate": 0.000544038748480566,
      "loss": 2.7188,
      "step": 45523
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6005042791366577,
      "learning_rate": 0.0005440363693158532,
      "loss": 2.86,
      "step": 45524
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.601533055305481,
      "learning_rate": 0.0005440339901057695,
      "loss": 3.3811,
      "step": 45525
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.1754465103149414,
      "learning_rate": 0.0005440316108503153,
      "loss": 3.1117,
      "step": 45526
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5669751167297363,
      "learning_rate": 0.0005440292315494909,
      "loss": 2.9721,
      "step": 45527
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.721072793006897,
      "learning_rate": 0.0005440268522032969,
      "loss": 3.0808,
      "step": 45528
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5977239608764648,
      "learning_rate": 0.0005440244728117337,
      "loss": 2.988,
      "step": 45529
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7089214324951172,
      "learning_rate": 0.0005440220933748017,
      "loss": 3.0001,
      "step": 45530
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.4138779640197754,
      "learning_rate": 0.0005440197138925015,
      "loss": 2.9455,
      "step": 45531
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.8052818775177,
      "learning_rate": 0.0005440173343648334,
      "loss": 3.2501,
      "step": 45532
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4720655679702759,
      "learning_rate": 0.0005440149547917978,
      "loss": 3.0918,
      "step": 45533
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1453702449798584,
      "learning_rate": 0.0005440125751733952,
      "loss": 3.012,
      "step": 45534
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6753191947937012,
      "learning_rate": 0.0005440101955096262,
      "loss": 3.0153,
      "step": 45535
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.547640323638916,
      "learning_rate": 0.000544007815800491,
      "loss": 3.1718,
      "step": 45536
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9794552326202393,
      "learning_rate": 0.0005440054360459902,
      "loss": 3.0019,
      "step": 45537
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.2786667346954346,
      "learning_rate": 0.0005440030562461241,
      "loss": 2.8669,
      "step": 45538
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.010347366333008,
      "learning_rate": 0.0005440006764008933,
      "loss": 2.9319,
      "step": 45539
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8977841138839722,
      "learning_rate": 0.000543998296510298,
      "loss": 2.9355,
      "step": 45540
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.3482472896575928,
      "learning_rate": 0.000543995916574339,
      "loss": 3.1982,
      "step": 45541
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4547123908996582,
      "learning_rate": 0.0005439935365930165,
      "loss": 2.8613,
      "step": 45542
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.8119168281555176,
      "learning_rate": 0.000543991156566331,
      "loss": 2.8511,
      "step": 45543
    },
    {
      "epoch": 0.59,
      "grad_norm": 4.520327568054199,
      "learning_rate": 0.000543988776494283,
      "loss": 2.743,
      "step": 45544
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.395651340484619,
      "learning_rate": 0.0005439863963768727,
      "loss": 2.9569,
      "step": 45545
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.524932861328125,
      "learning_rate": 0.0005439840162141009,
      "loss": 3.3408,
      "step": 45546
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.5064198970794678,
      "learning_rate": 0.0005439816360059679,
      "loss": 3.3546,
      "step": 45547
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.2647156715393066,
      "learning_rate": 0.0005439792557524739,
      "loss": 3.1003,
      "step": 45548
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4943722486495972,
      "learning_rate": 0.0005439768754536197,
      "loss": 3.3842,
      "step": 45549
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.884265422821045,
      "learning_rate": 0.0005439744951094055,
      "loss": 3.0331,
      "step": 45550
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6344510316848755,
      "learning_rate": 0.0005439721147198319,
      "loss": 3.1264,
      "step": 45551
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7557059526443481,
      "learning_rate": 0.0005439697342848993,
      "loss": 2.9536,
      "step": 45552
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7011128664016724,
      "learning_rate": 0.000543967353804608,
      "loss": 2.9855,
      "step": 45553
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.929016351699829,
      "learning_rate": 0.0005439649732789587,
      "loss": 3.1869,
      "step": 45554
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9356369972229004,
      "learning_rate": 0.0005439625927079517,
      "loss": 3.2288,
      "step": 45555
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.428703784942627,
      "learning_rate": 0.0005439602120915874,
      "loss": 3.0705,
      "step": 45556
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3306087255477905,
      "learning_rate": 0.0005439578314298662,
      "loss": 3.2635,
      "step": 45557
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0675361156463623,
      "learning_rate": 0.0005439554507227888,
      "loss": 3.2116,
      "step": 45558
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4255661964416504,
      "learning_rate": 0.0005439530699703554,
      "loss": 3.2323,
      "step": 45559
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4567744731903076,
      "learning_rate": 0.0005439506891725665,
      "loss": 3.0682,
      "step": 45560
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0455634593963623,
      "learning_rate": 0.0005439483083294227,
      "loss": 3.0139,
      "step": 45561
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5741698741912842,
      "learning_rate": 0.0005439459274409241,
      "loss": 2.9653,
      "step": 45562
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8563076257705688,
      "learning_rate": 0.0005439435465070714,
      "loss": 2.8976,
      "step": 45563
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.088188648223877,
      "learning_rate": 0.0005439411655278651,
      "loss": 2.9006,
      "step": 45564
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4527472257614136,
      "learning_rate": 0.0005439387845033055,
      "loss": 3.0484,
      "step": 45565
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5673604011535645,
      "learning_rate": 0.000543936403433393,
      "loss": 3.0032,
      "step": 45566
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5674337148666382,
      "learning_rate": 0.0005439340223181282,
      "loss": 3.189,
      "step": 45567
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6973803043365479,
      "learning_rate": 0.0005439316411575114,
      "loss": 3.1734,
      "step": 45568
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4186443090438843,
      "learning_rate": 0.0005439292599515431,
      "loss": 2.8863,
      "step": 45569
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4677979946136475,
      "learning_rate": 0.0005439268787002238,
      "loss": 2.9797,
      "step": 45570
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7288050651550293,
      "learning_rate": 0.0005439244974035539,
      "loss": 2.9816,
      "step": 45571
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.9656765460968018,
      "learning_rate": 0.0005439221160615337,
      "loss": 2.9973,
      "step": 45572
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9608036279678345,
      "learning_rate": 0.0005439197346741639,
      "loss": 3.1118,
      "step": 45573
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4903736114501953,
      "learning_rate": 0.0005439173532414448,
      "loss": 3.1357,
      "step": 45574
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.6261258125305176,
      "learning_rate": 0.0005439149717633767,
      "loss": 3.1573,
      "step": 45575
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6241531372070312,
      "learning_rate": 0.0005439125902399604,
      "loss": 3.3327,
      "step": 45576
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8314388990402222,
      "learning_rate": 0.0005439102086711961,
      "loss": 3.1968,
      "step": 45577
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5629146099090576,
      "learning_rate": 0.0005439078270570843,
      "loss": 2.9521,
      "step": 45578
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9836828708648682,
      "learning_rate": 0.0005439054453976254,
      "loss": 2.8447,
      "step": 45579
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6398578882217407,
      "learning_rate": 0.0005439030636928199,
      "loss": 3.0409,
      "step": 45580
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8124271631240845,
      "learning_rate": 0.0005439006819426682,
      "loss": 2.9088,
      "step": 45581
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4820330142974854,
      "learning_rate": 0.0005438983001471707,
      "loss": 2.9895,
      "step": 45582
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4207289218902588,
      "learning_rate": 0.000543895918306328,
      "loss": 3.0549,
      "step": 45583
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5823286771774292,
      "learning_rate": 0.0005438935364201404,
      "loss": 3.0413,
      "step": 45584
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6063705682754517,
      "learning_rate": 0.0005438911544886083,
      "loss": 3.1484,
      "step": 45585
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6608392000198364,
      "learning_rate": 0.0005438887725117324,
      "loss": 2.7272,
      "step": 45586
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4467426538467407,
      "learning_rate": 0.000543886390489513,
      "loss": 3.0086,
      "step": 45587
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5021560192108154,
      "learning_rate": 0.0005438840084219503,
      "loss": 2.9077,
      "step": 45588
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4985476732254028,
      "learning_rate": 0.0005438816263090451,
      "loss": 2.8297,
      "step": 45589
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6075292825698853,
      "learning_rate": 0.0005438792441507976,
      "loss": 3.1536,
      "step": 45590
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6837571859359741,
      "learning_rate": 0.0005438768619472085,
      "loss": 3.2153,
      "step": 45591
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7851181030273438,
      "learning_rate": 0.000543874479698278,
      "loss": 2.9348,
      "step": 45592
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3986575603485107,
      "learning_rate": 0.0005438720974040066,
      "loss": 2.8631,
      "step": 45593
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6514981985092163,
      "learning_rate": 0.0005438697150643948,
      "loss": 3.1001,
      "step": 45594
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5806952714920044,
      "learning_rate": 0.0005438673326794431,
      "loss": 2.8866,
      "step": 45595
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.842833161354065,
      "learning_rate": 0.0005438649502491516,
      "loss": 2.9117,
      "step": 45596
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5952579975128174,
      "learning_rate": 0.0005438625677735213,
      "loss": 3.0898,
      "step": 45597
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.42289137840271,
      "learning_rate": 0.0005438601852525522,
      "loss": 3.1994,
      "step": 45598
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.537174940109253,
      "learning_rate": 0.000543857802686245,
      "loss": 3.2342,
      "step": 45599
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5028619766235352,
      "learning_rate": 0.0005438554200745999,
      "loss": 3.0608,
      "step": 45600
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6532926559448242,
      "learning_rate": 0.0005438530374176175,
      "loss": 3.0369,
      "step": 45601
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.42035710811615,
      "learning_rate": 0.0005438506547152983,
      "loss": 3.1282,
      "step": 45602
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9255701303482056,
      "learning_rate": 0.0005438482719676426,
      "loss": 2.7804,
      "step": 45603
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8622212409973145,
      "learning_rate": 0.0005438458891746508,
      "loss": 3.1041,
      "step": 45604
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.2357542514801025,
      "learning_rate": 0.0005438435063363236,
      "loss": 2.7912,
      "step": 45605
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4513243436813354,
      "learning_rate": 0.0005438411234526613,
      "loss": 2.9686,
      "step": 45606
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.3509700298309326,
      "learning_rate": 0.0005438387405236642,
      "loss": 2.7928,
      "step": 45607
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8215644359588623,
      "learning_rate": 0.000543836357549333,
      "loss": 3.1057,
      "step": 45608
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.567603826522827,
      "learning_rate": 0.000543833974529668,
      "loss": 3.1657,
      "step": 45609
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3864750862121582,
      "learning_rate": 0.0005438315914646696,
      "loss": 3.0756,
      "step": 45610
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3990727663040161,
      "learning_rate": 0.0005438292083543384,
      "loss": 3.0767,
      "step": 45611
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5063554048538208,
      "learning_rate": 0.0005438268251986747,
      "loss": 3.1929,
      "step": 45612
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.749063491821289,
      "learning_rate": 0.0005438244419976788,
      "loss": 2.823,
      "step": 45613
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.2766824960708618,
      "learning_rate": 0.0005438220587513516,
      "loss": 3.0809,
      "step": 45614
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7655245065689087,
      "learning_rate": 0.0005438196754596931,
      "loss": 3.2571,
      "step": 45615
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9559437036514282,
      "learning_rate": 0.0005438172921227041,
      "loss": 3.1196,
      "step": 45616
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6749054193496704,
      "learning_rate": 0.0005438149087403848,
      "loss": 2.9317,
      "step": 45617
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.0631678104400635,
      "learning_rate": 0.0005438125253127357,
      "loss": 2.8462,
      "step": 45618
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3608853816986084,
      "learning_rate": 0.0005438101418397571,
      "loss": 2.7339,
      "step": 45619
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3801435232162476,
      "learning_rate": 0.0005438077583214498,
      "loss": 3.0566,
      "step": 45620
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.179009437561035,
      "learning_rate": 0.0005438053747578139,
      "loss": 3.009,
      "step": 45621
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6501091718673706,
      "learning_rate": 0.00054380299114885,
      "loss": 3.2912,
      "step": 45622
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3380358219146729,
      "learning_rate": 0.0005438006074945586,
      "loss": 3.3317,
      "step": 45623
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3178468942642212,
      "learning_rate": 0.00054379822379494,
      "loss": 3.106,
      "step": 45624
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.130952835083008,
      "learning_rate": 0.0005437958400499948,
      "loss": 3.1098,
      "step": 45625
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.3823986053466797,
      "learning_rate": 0.0005437934562597231,
      "loss": 3.1117,
      "step": 45626
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.327557921409607,
      "learning_rate": 0.0005437910724241259,
      "loss": 2.9655,
      "step": 45627
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.1142032146453857,
      "learning_rate": 0.0005437886885432032,
      "loss": 3.317,
      "step": 45628
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.222710132598877,
      "learning_rate": 0.0005437863046169556,
      "loss": 2.9675,
      "step": 45629
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4147694110870361,
      "learning_rate": 0.0005437839206453834,
      "loss": 2.8803,
      "step": 45630
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3468457460403442,
      "learning_rate": 0.0005437815366284873,
      "loss": 3.0451,
      "step": 45631
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.376493215560913,
      "learning_rate": 0.0005437791525662677,
      "loss": 3.234,
      "step": 45632
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.6449246406555176,
      "learning_rate": 0.0005437767684587248,
      "loss": 3.1118,
      "step": 45633
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3840293884277344,
      "learning_rate": 0.0005437743843058593,
      "loss": 3.2826,
      "step": 45634
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4331917762756348,
      "learning_rate": 0.0005437720001076715,
      "loss": 2.9583,
      "step": 45635
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4926505088806152,
      "learning_rate": 0.0005437696158641618,
      "loss": 3.299,
      "step": 45636
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.9003196954727173,
      "learning_rate": 0.0005437672315753308,
      "loss": 2.9338,
      "step": 45637
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5327458381652832,
      "learning_rate": 0.0005437648472411788,
      "loss": 2.9702,
      "step": 45638
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8173420429229736,
      "learning_rate": 0.0005437624628617064,
      "loss": 2.9197,
      "step": 45639
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.459574580192566,
      "learning_rate": 0.0005437600784369139,
      "loss": 3.0583,
      "step": 45640
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.9894702434539795,
      "learning_rate": 0.0005437576939668018,
      "loss": 2.8966,
      "step": 45641
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7242385149002075,
      "learning_rate": 0.0005437553094513706,
      "loss": 2.9642,
      "step": 45642
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.991258978843689,
      "learning_rate": 0.0005437529248906207,
      "loss": 2.9652,
      "step": 45643
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8800036907196045,
      "learning_rate": 0.0005437505402845525,
      "loss": 2.9982,
      "step": 45644
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4427002668380737,
      "learning_rate": 0.0005437481556331664,
      "loss": 3.0679,
      "step": 45645
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4299224615097046,
      "learning_rate": 0.000543745770936463,
      "loss": 3.0189,
      "step": 45646
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.19451904296875,
      "learning_rate": 0.0005437433861944426,
      "loss": 3.1283,
      "step": 45647
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.5738954544067383,
      "learning_rate": 0.0005437410014071058,
      "loss": 2.7872,
      "step": 45648
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6234354972839355,
      "learning_rate": 0.0005437386165744529,
      "loss": 2.9563,
      "step": 45649
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.498253345489502,
      "learning_rate": 0.0005437362316964844,
      "loss": 2.9019,
      "step": 45650
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5028910636901855,
      "learning_rate": 0.0005437338467732007,
      "loss": 3.2077,
      "step": 45651
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.426522970199585,
      "learning_rate": 0.0005437314618046023,
      "loss": 3.0061,
      "step": 45652
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4782843589782715,
      "learning_rate": 0.0005437290767906896,
      "loss": 2.8734,
      "step": 45653
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.550622820854187,
      "learning_rate": 0.0005437266917314631,
      "loss": 3.2307,
      "step": 45654
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8790942430496216,
      "learning_rate": 0.0005437243066269232,
      "loss": 2.9542,
      "step": 45655
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8254917860031128,
      "learning_rate": 0.0005437219214770703,
      "loss": 3.1231,
      "step": 45656
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5376029014587402,
      "learning_rate": 0.0005437195362819049,
      "loss": 2.8421,
      "step": 45657
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.45677649974823,
      "learning_rate": 0.0005437171510414275,
      "loss": 3.0941,
      "step": 45658
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3692971467971802,
      "learning_rate": 0.0005437147657556385,
      "loss": 3.2495,
      "step": 45659
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.467214822769165,
      "learning_rate": 0.0005437123804245382,
      "loss": 3.3471,
      "step": 45660
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3195468187332153,
      "learning_rate": 0.0005437099950481272,
      "loss": 2.8639,
      "step": 45661
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.779176950454712,
      "learning_rate": 0.000543707609626406,
      "loss": 3.0676,
      "step": 45662
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.529676914215088,
      "learning_rate": 0.0005437052241593749,
      "loss": 2.8375,
      "step": 45663
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6487218141555786,
      "learning_rate": 0.0005437028386470344,
      "loss": 2.8586,
      "step": 45664
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5735410451889038,
      "learning_rate": 0.000543700453089385,
      "loss": 3.0946,
      "step": 45665
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4389375448226929,
      "learning_rate": 0.0005436980674864269,
      "loss": 3.2516,
      "step": 45666
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6802213191986084,
      "learning_rate": 0.000543695681838161,
      "loss": 2.8532,
      "step": 45667
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7416446208953857,
      "learning_rate": 0.0005436932961445873,
      "loss": 2.9478,
      "step": 45668
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8337184190750122,
      "learning_rate": 0.0005436909104057065,
      "loss": 3.0523,
      "step": 45669
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.647818684577942,
      "learning_rate": 0.0005436885246215188,
      "loss": 3.1082,
      "step": 45670
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5454076528549194,
      "learning_rate": 0.000543686138792025,
      "loss": 3.1126,
      "step": 45671
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7698832750320435,
      "learning_rate": 0.0005436837529172253,
      "loss": 3.0628,
      "step": 45672
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7871078252792358,
      "learning_rate": 0.00054368136699712,
      "loss": 2.9293,
      "step": 45673
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7724201679229736,
      "learning_rate": 0.00054367898103171,
      "loss": 3.1407,
      "step": 45674
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.581680417060852,
      "learning_rate": 0.0005436765950209954,
      "loss": 3.0339,
      "step": 45675
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.6925578117370605,
      "learning_rate": 0.0005436742089649767,
      "loss": 2.9872,
      "step": 45676
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.433071494102478,
      "learning_rate": 0.0005436718228636543,
      "loss": 2.7423,
      "step": 45677
    },
    {
      "epoch": 0.59,
      "grad_norm": 3.462261915206909,
      "learning_rate": 0.0005436694367170287,
      "loss": 2.9249,
      "step": 45678
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5526820421218872,
      "learning_rate": 0.0005436670505251004,
      "loss": 3.1192,
      "step": 45679
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.504449725151062,
      "learning_rate": 0.0005436646642878698,
      "loss": 2.9404,
      "step": 45680
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.419251561164856,
      "learning_rate": 0.0005436622780053375,
      "loss": 2.7351,
      "step": 45681
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4618346691131592,
      "learning_rate": 0.0005436598916775035,
      "loss": 3.1947,
      "step": 45682
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7496496438980103,
      "learning_rate": 0.0005436575053043687,
      "loss": 2.7804,
      "step": 45683
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.474937915802002,
      "learning_rate": 0.0005436551188859333,
      "loss": 2.8889,
      "step": 45684
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3891165256500244,
      "learning_rate": 0.0005436527324221979,
      "loss": 3.2785,
      "step": 45685
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7414504289627075,
      "learning_rate": 0.0005436503459131627,
      "loss": 2.8532,
      "step": 45686
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.4338115453720093,
      "learning_rate": 0.0005436479593588285,
      "loss": 3.1093,
      "step": 45687
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8128246068954468,
      "learning_rate": 0.0005436455727591954,
      "loss": 3.0946,
      "step": 45688
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.708356499671936,
      "learning_rate": 0.0005436431861142641,
      "loss": 2.8776,
      "step": 45689
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.7009408473968506,
      "learning_rate": 0.0005436407994240347,
      "loss": 3.0046,
      "step": 45690
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.3967809677124023,
      "learning_rate": 0.0005436384126885081,
      "loss": 3.0696,
      "step": 45691
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.393357276916504,
      "learning_rate": 0.0005436360259076845,
      "loss": 3.2364,
      "step": 45692
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.5026696920394897,
      "learning_rate": 0.0005436336390815643,
      "loss": 3.079,
      "step": 45693
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.8517403602600098,
      "learning_rate": 0.0005436312522101481,
      "loss": 2.8207,
      "step": 45694
    },
    {
      "epoch": 0.59,
      "grad_norm": 2.6643965244293213,
      "learning_rate": 0.0005436288652934362,
      "loss": 3.0707,
      "step": 45695
    },
    {
      "epoch": 0.59,
      "grad_norm": 1.801938772201538,
      "learning_rate": 0.000543626478331429,
      "loss": 2.9745,
      "step": 45696
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3897552490234375,
      "learning_rate": 0.0005436240913241271,
      "loss": 3.1479,
      "step": 45697
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.7925679683685303,
      "learning_rate": 0.000543621704271531,
      "loss": 3.0889,
      "step": 45698
    },
    {
      "epoch": 0.6,
      "grad_norm": 4.068729877471924,
      "learning_rate": 0.0005436193171736408,
      "loss": 3.2007,
      "step": 45699
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9044047594070435,
      "learning_rate": 0.0005436169300304573,
      "loss": 3.1674,
      "step": 45700
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5936909914016724,
      "learning_rate": 0.0005436145428419809,
      "loss": 3.0316,
      "step": 45701
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.215989589691162,
      "learning_rate": 0.0005436121556082118,
      "loss": 2.9992,
      "step": 45702
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.4470996856689453,
      "learning_rate": 0.0005436097683291508,
      "loss": 3.1791,
      "step": 45703
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0276236534118652,
      "learning_rate": 0.0005436073810047979,
      "loss": 2.9758,
      "step": 45704
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5785977840423584,
      "learning_rate": 0.0005436049936351539,
      "loss": 3.3621,
      "step": 45705
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.539205312728882,
      "learning_rate": 0.0005436026062202191,
      "loss": 3.0969,
      "step": 45706
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.320498466491699,
      "learning_rate": 0.0005436002187599942,
      "loss": 2.9318,
      "step": 45707
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.309704065322876,
      "learning_rate": 0.0005435978312544791,
      "loss": 2.9984,
      "step": 45708
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8616467714309692,
      "learning_rate": 0.0005435954437036747,
      "loss": 2.89,
      "step": 45709
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2514703273773193,
      "learning_rate": 0.0005435930561075813,
      "loss": 3.0981,
      "step": 45710
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7735942602157593,
      "learning_rate": 0.0005435906684661994,
      "loss": 3.2076,
      "step": 45711
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.564187526702881,
      "learning_rate": 0.0005435882807795294,
      "loss": 3.1599,
      "step": 45712
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.325925350189209,
      "learning_rate": 0.0005435858930475717,
      "loss": 3.2027,
      "step": 45713
    },
    {
      "epoch": 0.6,
      "grad_norm": 4.1873779296875,
      "learning_rate": 0.0005435835052703268,
      "loss": 3.0241,
      "step": 45714
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2010257244110107,
      "learning_rate": 0.0005435811174477951,
      "loss": 3.0529,
      "step": 45715
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9800913333892822,
      "learning_rate": 0.0005435787295799771,
      "loss": 3.0401,
      "step": 45716
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8467059135437012,
      "learning_rate": 0.0005435763416668732,
      "loss": 2.988,
      "step": 45717
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4374276399612427,
      "learning_rate": 0.0005435739537084838,
      "loss": 3.0501,
      "step": 45718
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7529597282409668,
      "learning_rate": 0.0005435715657048095,
      "loss": 2.8833,
      "step": 45719
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6838161945343018,
      "learning_rate": 0.0005435691776558507,
      "loss": 3.0446,
      "step": 45720
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4130710363388062,
      "learning_rate": 0.0005435667895616076,
      "loss": 2.8476,
      "step": 45721
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2287020683288574,
      "learning_rate": 0.000543564401422081,
      "loss": 2.923,
      "step": 45722
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.695937395095825,
      "learning_rate": 0.000543562013237271,
      "loss": 3.0132,
      "step": 45723
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9890170097351074,
      "learning_rate": 0.0005435596250071785,
      "loss": 2.9189,
      "step": 45724
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2660036087036133,
      "learning_rate": 0.0005435572367318035,
      "loss": 3.1698,
      "step": 45725
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.894998550415039,
      "learning_rate": 0.0005435548484111466,
      "loss": 3.1371,
      "step": 45726
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3810791969299316,
      "learning_rate": 0.0005435524600452083,
      "loss": 3.1333,
      "step": 45727
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6598297357559204,
      "learning_rate": 0.000543550071633989,
      "loss": 3.0546,
      "step": 45728
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.093754529953003,
      "learning_rate": 0.0005435476831774891,
      "loss": 3.0297,
      "step": 45729
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.8267455101013184,
      "learning_rate": 0.0005435452946757091,
      "loss": 3.1323,
      "step": 45730
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.289283514022827,
      "learning_rate": 0.0005435429061286495,
      "loss": 3.0504,
      "step": 45731
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7666816711425781,
      "learning_rate": 0.0005435405175363106,
      "loss": 2.8946,
      "step": 45732
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3819336891174316,
      "learning_rate": 0.0005435381288986929,
      "loss": 3.0589,
      "step": 45733
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3222328424453735,
      "learning_rate": 0.0005435357402157969,
      "loss": 3.3502,
      "step": 45734
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5417075157165527,
      "learning_rate": 0.000543533351487623,
      "loss": 2.8855,
      "step": 45735
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4322638511657715,
      "learning_rate": 0.0005435309627141717,
      "loss": 3.1872,
      "step": 45736
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.354293942451477,
      "learning_rate": 0.0005435285738954434,
      "loss": 2.9974,
      "step": 45737
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2699998617172241,
      "learning_rate": 0.0005435261850314385,
      "loss": 2.9365,
      "step": 45738
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.712793231010437,
      "learning_rate": 0.0005435237961221575,
      "loss": 3.1162,
      "step": 45739
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4423209428787231,
      "learning_rate": 0.0005435214071676008,
      "loss": 2.9888,
      "step": 45740
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4326385259628296,
      "learning_rate": 0.000543519018167769,
      "loss": 2.9475,
      "step": 45741
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2904908657073975,
      "learning_rate": 0.0005435166291226622,
      "loss": 2.8709,
      "step": 45742
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.2660021781921387,
      "learning_rate": 0.0005435142400322813,
      "loss": 2.9049,
      "step": 45743
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5914784669876099,
      "learning_rate": 0.0005435118508966264,
      "loss": 3.1667,
      "step": 45744
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8177038431167603,
      "learning_rate": 0.000543509461715698,
      "loss": 2.9048,
      "step": 45745
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.408458709716797,
      "learning_rate": 0.0005435070724894966,
      "loss": 3.0333,
      "step": 45746
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3864355087280273,
      "learning_rate": 0.0005435046832180226,
      "loss": 3.3238,
      "step": 45747
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8687559366226196,
      "learning_rate": 0.0005435022939012765,
      "loss": 2.9244,
      "step": 45748
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9274907112121582,
      "learning_rate": 0.0005434999045392588,
      "loss": 2.9755,
      "step": 45749
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3249832391738892,
      "learning_rate": 0.00054349751513197,
      "loss": 2.9245,
      "step": 45750
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.521691083908081,
      "learning_rate": 0.0005434951256794102,
      "loss": 3.2057,
      "step": 45751
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5530627965927124,
      "learning_rate": 0.0005434927361815801,
      "loss": 2.9523,
      "step": 45752
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4503570795059204,
      "learning_rate": 0.0005434903466384802,
      "loss": 3.081,
      "step": 45753
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5308175086975098,
      "learning_rate": 0.0005434879570501108,
      "loss": 3.1019,
      "step": 45754
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7255210876464844,
      "learning_rate": 0.0005434855674164723,
      "loss": 3.0712,
      "step": 45755
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4988387823104858,
      "learning_rate": 0.0005434831777375653,
      "loss": 3.4567,
      "step": 45756
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3452705144882202,
      "learning_rate": 0.0005434807880133902,
      "loss": 2.8257,
      "step": 45757
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3878167867660522,
      "learning_rate": 0.0005434783982439475,
      "loss": 3.1606,
      "step": 45758
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5429552793502808,
      "learning_rate": 0.0005434760084292374,
      "loss": 3.1996,
      "step": 45759
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.33324134349823,
      "learning_rate": 0.0005434736185692606,
      "loss": 2.9342,
      "step": 45760
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6546505689620972,
      "learning_rate": 0.0005434712286640174,
      "loss": 2.9236,
      "step": 45761
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.520407795906067,
      "learning_rate": 0.0005434688387135084,
      "loss": 2.8997,
      "step": 45762
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5879017114639282,
      "learning_rate": 0.0005434664487177339,
      "loss": 2.8192,
      "step": 45763
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4953902959823608,
      "learning_rate": 0.0005434640586766945,
      "loss": 2.9554,
      "step": 45764
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6271640062332153,
      "learning_rate": 0.0005434616685903904,
      "loss": 3.3581,
      "step": 45765
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.523982524871826,
      "learning_rate": 0.0005434592784588222,
      "loss": 2.996,
      "step": 45766
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9837747812271118,
      "learning_rate": 0.0005434568882819903,
      "loss": 3.0754,
      "step": 45767
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8051098585128784,
      "learning_rate": 0.0005434544980598952,
      "loss": 2.9722,
      "step": 45768
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.673247218132019,
      "learning_rate": 0.0005434521077925374,
      "loss": 3.1467,
      "step": 45769
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5981935262680054,
      "learning_rate": 0.0005434497174799172,
      "loss": 2.9833,
      "step": 45770
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3867462873458862,
      "learning_rate": 0.000543447327122035,
      "loss": 3.0532,
      "step": 45771
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9000552892684937,
      "learning_rate": 0.0005434449367188915,
      "loss": 3.0965,
      "step": 45772
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5893524885177612,
      "learning_rate": 0.0005434425462704868,
      "loss": 3.1203,
      "step": 45773
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6780056953430176,
      "learning_rate": 0.0005434401557768217,
      "loss": 2.88,
      "step": 45774
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4352819919586182,
      "learning_rate": 0.0005434377652378964,
      "loss": 2.9446,
      "step": 45775
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6985578536987305,
      "learning_rate": 0.0005434353746537115,
      "loss": 3.1556,
      "step": 45776
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.045657157897949,
      "learning_rate": 0.0005434329840242673,
      "loss": 3.1632,
      "step": 45777
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5297272205352783,
      "learning_rate": 0.0005434305933495644,
      "loss": 2.9739,
      "step": 45778
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6826225519180298,
      "learning_rate": 0.0005434282026296031,
      "loss": 2.7479,
      "step": 45779
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6083357334136963,
      "learning_rate": 0.0005434258118643838,
      "loss": 3.0521,
      "step": 45780
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.014618396759033,
      "learning_rate": 0.0005434234210539071,
      "loss": 3.0087,
      "step": 45781
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.1416187286376953,
      "learning_rate": 0.0005434210301981735,
      "loss": 2.7448,
      "step": 45782
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8286563158035278,
      "learning_rate": 0.0005434186392971833,
      "loss": 3.0889,
      "step": 45783
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5439140796661377,
      "learning_rate": 0.0005434162483509369,
      "loss": 3.1215,
      "step": 45784
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5728894472122192,
      "learning_rate": 0.0005434138573594349,
      "loss": 3.2025,
      "step": 45785
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.648544430732727,
      "learning_rate": 0.0005434114663226777,
      "loss": 3.107,
      "step": 45786
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7077583074569702,
      "learning_rate": 0.0005434090752406656,
      "loss": 3.219,
      "step": 45787
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7525266408920288,
      "learning_rate": 0.0005434066841133993,
      "loss": 3.173,
      "step": 45788
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2811590433120728,
      "learning_rate": 0.000543404292940879,
      "loss": 3.0633,
      "step": 45789
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7686810493469238,
      "learning_rate": 0.0005434019017231051,
      "loss": 3.0852,
      "step": 45790
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3163115978240967,
      "learning_rate": 0.0005433995104600786,
      "loss": 3.4331,
      "step": 45791
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.1598877906799316,
      "learning_rate": 0.0005433971191517991,
      "loss": 3.289,
      "step": 45792
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8109798431396484,
      "learning_rate": 0.0005433947277982678,
      "loss": 3.02,
      "step": 45793
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6658246517181396,
      "learning_rate": 0.0005433923363994846,
      "loss": 3.0869,
      "step": 45794
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.267177104949951,
      "learning_rate": 0.0005433899449554504,
      "loss": 3.4117,
      "step": 45795
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5663634538650513,
      "learning_rate": 0.0005433875534661653,
      "loss": 2.9296,
      "step": 45796
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3588963747024536,
      "learning_rate": 0.0005433851619316299,
      "loss": 3.019,
      "step": 45797
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2825583219528198,
      "learning_rate": 0.0005433827703518444,
      "loss": 2.9614,
      "step": 45798
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7978194952011108,
      "learning_rate": 0.0005433803787268097,
      "loss": 3.2816,
      "step": 45799
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7302179336547852,
      "learning_rate": 0.0005433779870565259,
      "loss": 3.0153,
      "step": 45800
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6870644092559814,
      "learning_rate": 0.0005433755953409934,
      "loss": 3.1157,
      "step": 45801
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5958480834960938,
      "learning_rate": 0.000543373203580213,
      "loss": 3.0567,
      "step": 45802
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9629998207092285,
      "learning_rate": 0.0005433708117741848,
      "loss": 3.098,
      "step": 45803
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8934444189071655,
      "learning_rate": 0.0005433684199229094,
      "loss": 3.0269,
      "step": 45804
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9113545417785645,
      "learning_rate": 0.0005433660280263873,
      "loss": 3.0718,
      "step": 45805
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8232680559158325,
      "learning_rate": 0.0005433636360846188,
      "loss": 3.0508,
      "step": 45806
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.342020034790039,
      "learning_rate": 0.0005433612440976043,
      "loss": 3.044,
      "step": 45807
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7723740339279175,
      "learning_rate": 0.0005433588520653445,
      "loss": 3.0344,
      "step": 45808
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5977303981781006,
      "learning_rate": 0.0005433564599878396,
      "loss": 3.0085,
      "step": 45809
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.63321053981781,
      "learning_rate": 0.0005433540678650902,
      "loss": 3.1561,
      "step": 45810
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3620301485061646,
      "learning_rate": 0.0005433516756970967,
      "loss": 3.0321,
      "step": 45811
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9099375009536743,
      "learning_rate": 0.0005433492834838595,
      "loss": 3.1393,
      "step": 45812
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5959972143173218,
      "learning_rate": 0.0005433468912253791,
      "loss": 3.1616,
      "step": 45813
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.01987886428833,
      "learning_rate": 0.0005433444989216559,
      "loss": 2.9127,
      "step": 45814
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4365770816802979,
      "learning_rate": 0.0005433421065726904,
      "loss": 2.8968,
      "step": 45815
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.500158667564392,
      "learning_rate": 0.0005433397141784828,
      "loss": 2.869,
      "step": 45816
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.292076349258423,
      "learning_rate": 0.000543337321739034,
      "loss": 2.8628,
      "step": 45817
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.40260648727417,
      "learning_rate": 0.0005433349292543442,
      "loss": 2.852,
      "step": 45818
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.693968653678894,
      "learning_rate": 0.0005433325367244137,
      "loss": 3.1818,
      "step": 45819
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4309924840927124,
      "learning_rate": 0.0005433301441492432,
      "loss": 3.104,
      "step": 45820
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5587767362594604,
      "learning_rate": 0.0005433277515288329,
      "loss": 2.9771,
      "step": 45821
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4160871505737305,
      "learning_rate": 0.0005433253588631835,
      "loss": 3.0064,
      "step": 45822
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.516891598701477,
      "learning_rate": 0.0005433229661522953,
      "loss": 3.1593,
      "step": 45823
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8875006437301636,
      "learning_rate": 0.0005433205733961688,
      "loss": 2.9757,
      "step": 45824
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.47146475315094,
      "learning_rate": 0.0005433181805948042,
      "loss": 3.0639,
      "step": 45825
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6986653804779053,
      "learning_rate": 0.0005433157877482024,
      "loss": 2.9333,
      "step": 45826
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5356141328811646,
      "learning_rate": 0.0005433133948563634,
      "loss": 3.0505,
      "step": 45827
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7204939126968384,
      "learning_rate": 0.0005433110019192881,
      "loss": 2.9676,
      "step": 45828
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6118979454040527,
      "learning_rate": 0.0005433086089369765,
      "loss": 2.8262,
      "step": 45829
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9186497926712036,
      "learning_rate": 0.0005433062159094294,
      "loss": 2.9578,
      "step": 45830
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.582037091255188,
      "learning_rate": 0.0005433038228366469,
      "loss": 2.8154,
      "step": 45831
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.157830238342285,
      "learning_rate": 0.0005433014297186298,
      "loss": 3.1427,
      "step": 45832
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0918140411376953,
      "learning_rate": 0.0005432990365553782,
      "loss": 3.0222,
      "step": 45833
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3619723320007324,
      "learning_rate": 0.0005432966433468929,
      "loss": 3.0388,
      "step": 45834
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.215001344680786,
      "learning_rate": 0.000543294250093174,
      "loss": 2.8434,
      "step": 45835
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7443791627883911,
      "learning_rate": 0.0005432918567942222,
      "loss": 3.1345,
      "step": 45836
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.352142095565796,
      "learning_rate": 0.0005432894634500379,
      "loss": 2.9461,
      "step": 45837
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4446296691894531,
      "learning_rate": 0.0005432870700606213,
      "loss": 3.1417,
      "step": 45838
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3224906921386719,
      "learning_rate": 0.0005432846766259731,
      "loss": 2.9842,
      "step": 45839
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.672410488128662,
      "learning_rate": 0.0005432822831460937,
      "loss": 2.9602,
      "step": 45840
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.837679147720337,
      "learning_rate": 0.0005432798896209836,
      "loss": 2.8347,
      "step": 45841
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.527527093887329,
      "learning_rate": 0.0005432774960506432,
      "loss": 2.8617,
      "step": 45842
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.529934883117676,
      "learning_rate": 0.0005432751024350729,
      "loss": 2.7806,
      "step": 45843
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3905105590820312,
      "learning_rate": 0.0005432727087742731,
      "loss": 2.7493,
      "step": 45844
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.927936553955078,
      "learning_rate": 0.0005432703150682443,
      "loss": 2.7445,
      "step": 45845
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7637735605239868,
      "learning_rate": 0.0005432679213169871,
      "loss": 2.9172,
      "step": 45846
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5905653238296509,
      "learning_rate": 0.0005432655275205017,
      "loss": 2.9812,
      "step": 45847
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3954434394836426,
      "learning_rate": 0.0005432631336787885,
      "loss": 3.0186,
      "step": 45848
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6816359758377075,
      "learning_rate": 0.0005432607397918483,
      "loss": 2.9334,
      "step": 45849
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.073716640472412,
      "learning_rate": 0.0005432583458596813,
      "loss": 3.0856,
      "step": 45850
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.898826241493225,
      "learning_rate": 0.0005432559518822879,
      "loss": 3.0677,
      "step": 45851
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.837965488433838,
      "learning_rate": 0.0005432535578596688,
      "loss": 3.0382,
      "step": 45852
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.782313346862793,
      "learning_rate": 0.0005432511637918241,
      "loss": 2.8444,
      "step": 45853
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.1426215171813965,
      "learning_rate": 0.0005432487696787544,
      "loss": 3.1977,
      "step": 45854
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4683133363723755,
      "learning_rate": 0.0005432463755204603,
      "loss": 2.8786,
      "step": 45855
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2803587913513184,
      "learning_rate": 0.0005432439813169421,
      "loss": 3.0706,
      "step": 45856
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6262544393539429,
      "learning_rate": 0.0005432415870682001,
      "loss": 3.1932,
      "step": 45857
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.387340784072876,
      "learning_rate": 0.000543239192774235,
      "loss": 3.0434,
      "step": 45858
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0092554092407227,
      "learning_rate": 0.0005432367984350472,
      "loss": 3.1084,
      "step": 45859
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4756559133529663,
      "learning_rate": 0.000543234404050637,
      "loss": 3.1109,
      "step": 45860
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5394315719604492,
      "learning_rate": 0.000543232009621005,
      "loss": 3.1797,
      "step": 45861
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6714982986450195,
      "learning_rate": 0.0005432296151461515,
      "loss": 2.9459,
      "step": 45862
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4436140060424805,
      "learning_rate": 0.000543227220626077,
      "loss": 2.8837,
      "step": 45863
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8614453077316284,
      "learning_rate": 0.0005432248260607821,
      "loss": 3.2071,
      "step": 45864
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4791483879089355,
      "learning_rate": 0.0005432224314502671,
      "loss": 3.2007,
      "step": 45865
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6206272840499878,
      "learning_rate": 0.0005432200367945323,
      "loss": 3.0891,
      "step": 45866
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4675334692001343,
      "learning_rate": 0.0005432176420935784,
      "loss": 3.2405,
      "step": 45867
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3438304662704468,
      "learning_rate": 0.0005432152473474058,
      "loss": 3.2134,
      "step": 45868
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8998472690582275,
      "learning_rate": 0.0005432128525560148,
      "loss": 2.8755,
      "step": 45869
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0018231868743896,
      "learning_rate": 0.000543210457719406,
      "loss": 3.1412,
      "step": 45870
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4011613130569458,
      "learning_rate": 0.0005432080628375798,
      "loss": 2.8858,
      "step": 45871
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4877197742462158,
      "learning_rate": 0.0005432056679105366,
      "loss": 2.9443,
      "step": 45872
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5186506509780884,
      "learning_rate": 0.0005432032729382769,
      "loss": 3.344,
      "step": 45873
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5203869342803955,
      "learning_rate": 0.000543200877920801,
      "loss": 2.9422,
      "step": 45874
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0793020725250244,
      "learning_rate": 0.0005431984828581097,
      "loss": 3.0705,
      "step": 45875
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3867545127868652,
      "learning_rate": 0.0005431960877502031,
      "loss": 2.9685,
      "step": 45876
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.4303126335144043,
      "learning_rate": 0.0005431936925970815,
      "loss": 3.1058,
      "step": 45877
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6263900995254517,
      "learning_rate": 0.0005431912973987459,
      "loss": 3.4613,
      "step": 45878
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.791994094848633,
      "learning_rate": 0.0005431889021551964,
      "loss": 2.98,
      "step": 45879
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.978830099105835,
      "learning_rate": 0.0005431865068664335,
      "loss": 3.1638,
      "step": 45880
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.561826229095459,
      "learning_rate": 0.0005431841115324577,
      "loss": 3.1297,
      "step": 45881
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.1694695949554443,
      "learning_rate": 0.0005431817161532693,
      "loss": 2.9552,
      "step": 45882
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.8749401569366455,
      "learning_rate": 0.0005431793207288688,
      "loss": 3.301,
      "step": 45883
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.6779119968414307,
      "learning_rate": 0.0005431769252592566,
      "loss": 3.0656,
      "step": 45884
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.155120372772217,
      "learning_rate": 0.0005431745297444333,
      "loss": 2.8922,
      "step": 45885
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2980544567108154,
      "learning_rate": 0.0005431721341843993,
      "loss": 3.1706,
      "step": 45886
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.437203884124756,
      "learning_rate": 0.000543169738579155,
      "loss": 3.1495,
      "step": 45887
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3703025579452515,
      "learning_rate": 0.000543167342928701,
      "loss": 2.9134,
      "step": 45888
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8084750175476074,
      "learning_rate": 0.0005431649472330373,
      "loss": 3.1608,
      "step": 45889
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6085947751998901,
      "learning_rate": 0.0005431625514921648,
      "loss": 3.2069,
      "step": 45890
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6163573265075684,
      "learning_rate": 0.0005431601557060838,
      "loss": 3.0803,
      "step": 45891
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4207921028137207,
      "learning_rate": 0.0005431577598747948,
      "loss": 3.1539,
      "step": 45892
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3994272947311401,
      "learning_rate": 0.000543155363998298,
      "loss": 3.1194,
      "step": 45893
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6337549686431885,
      "learning_rate": 0.0005431529680765941,
      "loss": 2.7516,
      "step": 45894
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7768917083740234,
      "learning_rate": 0.0005431505721096836,
      "loss": 3.1679,
      "step": 45895
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.106790065765381,
      "learning_rate": 0.0005431481760975667,
      "loss": 2.9735,
      "step": 45896
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4748677015304565,
      "learning_rate": 0.000543145780040244,
      "loss": 3.1387,
      "step": 45897
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8100523948669434,
      "learning_rate": 0.0005431433839377159,
      "loss": 2.9189,
      "step": 45898
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7053196430206299,
      "learning_rate": 0.0005431409877899828,
      "loss": 3.0145,
      "step": 45899
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.0541694164276123,
      "learning_rate": 0.0005431385915970453,
      "loss": 2.9328,
      "step": 45900
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5450716018676758,
      "learning_rate": 0.0005431361953589037,
      "loss": 3.2935,
      "step": 45901
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9880123138427734,
      "learning_rate": 0.0005431337990755585,
      "loss": 3.1502,
      "step": 45902
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.5190036296844482,
      "learning_rate": 0.0005431314027470101,
      "loss": 3.0529,
      "step": 45903
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7445539236068726,
      "learning_rate": 0.000543129006373259,
      "loss": 3.1422,
      "step": 45904
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6609504222869873,
      "learning_rate": 0.0005431266099543057,
      "loss": 3.0623,
      "step": 45905
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7915087938308716,
      "learning_rate": 0.0005431242134901504,
      "loss": 3.1301,
      "step": 45906
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8706425428390503,
      "learning_rate": 0.0005431218169807939,
      "loss": 3.0068,
      "step": 45907
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.350138783454895,
      "learning_rate": 0.0005431194204262365,
      "loss": 3.1338,
      "step": 45908
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.445846438407898,
      "learning_rate": 0.0005431170238264784,
      "loss": 3.0713,
      "step": 45909
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0899364948272705,
      "learning_rate": 0.0005431146271815204,
      "loss": 3.2126,
      "step": 45910
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.477383852005005,
      "learning_rate": 0.0005431122304913626,
      "loss": 3.0951,
      "step": 45911
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7206052541732788,
      "learning_rate": 0.000543109833756006,
      "loss": 3.1612,
      "step": 45912
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7622432708740234,
      "learning_rate": 0.0005431074369754505,
      "loss": 3.2296,
      "step": 45913
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.1955771446228027,
      "learning_rate": 0.0005431050401496967,
      "loss": 3.0282,
      "step": 45914
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.469773292541504,
      "learning_rate": 0.0005431026432787452,
      "loss": 3.1659,
      "step": 45915
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7296208143234253,
      "learning_rate": 0.0005431002463625963,
      "loss": 3.1301,
      "step": 45916
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8727068901062012,
      "learning_rate": 0.0005430978494012504,
      "loss": 3.0225,
      "step": 45917
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.3488352298736572,
      "learning_rate": 0.000543095452394708,
      "loss": 3.1362,
      "step": 45918
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.4201669692993164,
      "learning_rate": 0.0005430930553429696,
      "loss": 3.1688,
      "step": 45919
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5312330722808838,
      "learning_rate": 0.0005430906582460357,
      "loss": 3.1267,
      "step": 45920
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8473923206329346,
      "learning_rate": 0.0005430882611039065,
      "loss": 3.1302,
      "step": 45921
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7125327587127686,
      "learning_rate": 0.0005430858639165827,
      "loss": 3.1947,
      "step": 45922
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.317409038543701,
      "learning_rate": 0.0005430834666840647,
      "loss": 3.2901,
      "step": 45923
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4492088556289673,
      "learning_rate": 0.0005430810694063528,
      "loss": 3.194,
      "step": 45924
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7040221691131592,
      "learning_rate": 0.0005430786720834476,
      "loss": 3.145,
      "step": 45925
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4730620384216309,
      "learning_rate": 0.0005430762747153495,
      "loss": 2.9979,
      "step": 45926
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2627551555633545,
      "learning_rate": 0.0005430738773020589,
      "loss": 2.7915,
      "step": 45927
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.477078914642334,
      "learning_rate": 0.0005430714798435762,
      "loss": 2.8727,
      "step": 45928
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.433577299118042,
      "learning_rate": 0.000543069082339902,
      "loss": 3.0868,
      "step": 45929
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.556270718574524,
      "learning_rate": 0.0005430666847910366,
      "loss": 3.0539,
      "step": 45930
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4863836765289307,
      "learning_rate": 0.0005430642871969807,
      "loss": 3.0746,
      "step": 45931
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6977572441101074,
      "learning_rate": 0.0005430618895577344,
      "loss": 2.9987,
      "step": 45932
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0776472091674805,
      "learning_rate": 0.0005430594918732984,
      "loss": 3.1055,
      "step": 45933
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0555922985076904,
      "learning_rate": 0.0005430570941436731,
      "loss": 3.1712,
      "step": 45934
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.340626835823059,
      "learning_rate": 0.0005430546963688587,
      "loss": 3.1576,
      "step": 45935
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5850906372070312,
      "learning_rate": 0.0005430522985488559,
      "loss": 2.9762,
      "step": 45936
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9570579528808594,
      "learning_rate": 0.0005430499006836652,
      "loss": 3.0633,
      "step": 45937
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6091625690460205,
      "learning_rate": 0.0005430475027732869,
      "loss": 3.0073,
      "step": 45938
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.588160514831543,
      "learning_rate": 0.0005430451048177215,
      "loss": 3.0581,
      "step": 45939
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.349860668182373,
      "learning_rate": 0.0005430427068169693,
      "loss": 2.857,
      "step": 45940
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3900023698806763,
      "learning_rate": 0.0005430403087710309,
      "loss": 2.7971,
      "step": 45941
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.497567057609558,
      "learning_rate": 0.0005430379106799068,
      "loss": 2.9014,
      "step": 45942
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.813965916633606,
      "learning_rate": 0.0005430355125435974,
      "loss": 3.1598,
      "step": 45943
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4230549335479736,
      "learning_rate": 0.0005430331143621031,
      "loss": 3.0734,
      "step": 45944
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5312366485595703,
      "learning_rate": 0.0005430307161354242,
      "loss": 2.7484,
      "step": 45945
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6681040525436401,
      "learning_rate": 0.0005430283178635615,
      "loss": 2.9945,
      "step": 45946
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6727560758590698,
      "learning_rate": 0.000543025919546515,
      "loss": 2.9586,
      "step": 45947
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4958739280700684,
      "learning_rate": 0.0005430235211842856,
      "loss": 3.0857,
      "step": 45948
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7677661180496216,
      "learning_rate": 0.0005430211227768736,
      "loss": 2.8271,
      "step": 45949
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7237615585327148,
      "learning_rate": 0.0005430187243242792,
      "loss": 2.8944,
      "step": 45950
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4853755235671997,
      "learning_rate": 0.0005430163258265032,
      "loss": 2.8394,
      "step": 45951
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7328213453292847,
      "learning_rate": 0.0005430139272835457,
      "loss": 2.9703,
      "step": 45952
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8302664756774902,
      "learning_rate": 0.0005430115286954074,
      "loss": 3.0513,
      "step": 45953
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4914960861206055,
      "learning_rate": 0.0005430091300620887,
      "loss": 2.8769,
      "step": 45954
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4094924926757812,
      "learning_rate": 0.0005430067313835899,
      "loss": 2.9408,
      "step": 45955
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5421686172485352,
      "learning_rate": 0.0005430043326599117,
      "loss": 2.9859,
      "step": 45956
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4971400499343872,
      "learning_rate": 0.0005430019338910544,
      "loss": 3.1053,
      "step": 45957
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3925460577011108,
      "learning_rate": 0.0005429995350770183,
      "loss": 3.1983,
      "step": 45958
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.027489423751831,
      "learning_rate": 0.0005429971362178042,
      "loss": 3.2754,
      "step": 45959
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.4663736820220947,
      "learning_rate": 0.0005429947373134122,
      "loss": 3.1069,
      "step": 45960
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4779000282287598,
      "learning_rate": 0.000542992338363843,
      "loss": 3.2773,
      "step": 45961
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.85308039188385,
      "learning_rate": 0.000542989939369097,
      "loss": 3.3281,
      "step": 45962
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.578552007675171,
      "learning_rate": 0.0005429875403291744,
      "loss": 3.0868,
      "step": 45963
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6481064558029175,
      "learning_rate": 0.000542985141244076,
      "loss": 3.2675,
      "step": 45964
    },
    {
      "epoch": 0.6,
      "grad_norm": 4.004709243774414,
      "learning_rate": 0.0005429827421138018,
      "loss": 3.166,
      "step": 45965
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.4714975357055664,
      "learning_rate": 0.0005429803429383527,
      "loss": 3.1054,
      "step": 45966
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.315624952316284,
      "learning_rate": 0.000542977943717729,
      "loss": 3.0879,
      "step": 45967
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.51419198513031,
      "learning_rate": 0.0005429755444519311,
      "loss": 2.9649,
      "step": 45968
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.373591661453247,
      "learning_rate": 0.0005429731451409595,
      "loss": 3.1163,
      "step": 45969
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.143202304840088,
      "learning_rate": 0.0005429707457848145,
      "loss": 2.8833,
      "step": 45970
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.060673475265503,
      "learning_rate": 0.0005429683463834968,
      "loss": 2.9932,
      "step": 45971
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.684190273284912,
      "learning_rate": 0.0005429659469370065,
      "loss": 2.9983,
      "step": 45972
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0391173362731934,
      "learning_rate": 0.0005429635474453444,
      "loss": 2.8515,
      "step": 45973
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.861122965812683,
      "learning_rate": 0.0005429611479085106,
      "loss": 3.1031,
      "step": 45974
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.07999849319458,
      "learning_rate": 0.000542958748326506,
      "loss": 3.0244,
      "step": 45975
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7488869428634644,
      "learning_rate": 0.0005429563486993305,
      "loss": 3.1675,
      "step": 45976
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.297130823135376,
      "learning_rate": 0.000542953949026985,
      "loss": 3.2342,
      "step": 45977
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4597015380859375,
      "learning_rate": 0.0005429515493094698,
      "loss": 2.9929,
      "step": 45978
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6596195697784424,
      "learning_rate": 0.0005429491495467854,
      "loss": 2.9492,
      "step": 45979
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4225561618804932,
      "learning_rate": 0.000542946749738932,
      "loss": 3.0068,
      "step": 45980
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.432008981704712,
      "learning_rate": 0.0005429443498859102,
      "loss": 3.0865,
      "step": 45981
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5180294513702393,
      "learning_rate": 0.0005429419499877206,
      "loss": 3.1394,
      "step": 45982
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5091242790222168,
      "learning_rate": 0.0005429395500443633,
      "loss": 2.9303,
      "step": 45983
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3666191101074219,
      "learning_rate": 0.0005429371500558391,
      "loss": 2.9508,
      "step": 45984
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7385421991348267,
      "learning_rate": 0.0005429347500221484,
      "loss": 3.1782,
      "step": 45985
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4372049570083618,
      "learning_rate": 0.0005429323499432913,
      "loss": 3.1681,
      "step": 45986
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3728067874908447,
      "learning_rate": 0.0005429299498192686,
      "loss": 3.1092,
      "step": 45987
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0644073486328125,
      "learning_rate": 0.0005429275496500807,
      "loss": 3.1035,
      "step": 45988
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4778194427490234,
      "learning_rate": 0.0005429251494357279,
      "loss": 3.3621,
      "step": 45989
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5748273134231567,
      "learning_rate": 0.0005429227491762107,
      "loss": 3.0258,
      "step": 45990
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3108566999435425,
      "learning_rate": 0.0005429203488715297,
      "loss": 2.7908,
      "step": 45991
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5149275064468384,
      "learning_rate": 0.0005429179485216852,
      "loss": 2.8683,
      "step": 45992
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8792392015457153,
      "learning_rate": 0.0005429155481266776,
      "loss": 3.0955,
      "step": 45993
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5594197511672974,
      "learning_rate": 0.0005429131476865073,
      "loss": 3.235,
      "step": 45994
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4128656387329102,
      "learning_rate": 0.000542910747201175,
      "loss": 3.2805,
      "step": 45995
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3786075115203857,
      "learning_rate": 0.000542908346670681,
      "loss": 2.9614,
      "step": 45996
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4382940530776978,
      "learning_rate": 0.0005429059460950258,
      "loss": 3.0138,
      "step": 45997
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.450644850730896,
      "learning_rate": 0.0005429035454742097,
      "loss": 3.089,
      "step": 45998
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7461109161376953,
      "learning_rate": 0.0005429011448082334,
      "loss": 3.1551,
      "step": 45999
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4699609279632568,
      "learning_rate": 0.0005428987440970971,
      "loss": 2.97,
      "step": 46000
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.652114748954773,
      "learning_rate": 0.0005428963433408013,
      "loss": 3.268,
      "step": 46001
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.635211944580078,
      "learning_rate": 0.0005428939425393465,
      "loss": 3.15,
      "step": 46002
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4972044229507446,
      "learning_rate": 0.0005428915416927331,
      "loss": 3.2003,
      "step": 46003
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.312554121017456,
      "learning_rate": 0.0005428891408009616,
      "loss": 2.9782,
      "step": 46004
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7705271244049072,
      "learning_rate": 0.0005428867398640325,
      "loss": 3.016,
      "step": 46005
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5058960914611816,
      "learning_rate": 0.0005428843388819461,
      "loss": 3.0429,
      "step": 46006
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9062438011169434,
      "learning_rate": 0.0005428819378547029,
      "loss": 3.0979,
      "step": 46007
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.420942544937134,
      "learning_rate": 0.0005428795367823034,
      "loss": 3.0447,
      "step": 46008
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5472607612609863,
      "learning_rate": 0.0005428771356647481,
      "loss": 3.1229,
      "step": 46009
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6860185861587524,
      "learning_rate": 0.0005428747345020371,
      "loss": 2.9908,
      "step": 46010
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3266860246658325,
      "learning_rate": 0.0005428723332941714,
      "loss": 3.1615,
      "step": 46011
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8319696187973022,
      "learning_rate": 0.0005428699320411509,
      "loss": 3.006,
      "step": 46012
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.967726230621338,
      "learning_rate": 0.0005428675307429765,
      "loss": 2.9715,
      "step": 46013
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9490445852279663,
      "learning_rate": 0.0005428651293996483,
      "loss": 3.3436,
      "step": 46014
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7098503112792969,
      "learning_rate": 0.0005428627280111669,
      "loss": 3.1191,
      "step": 46015
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.1110904216766357,
      "learning_rate": 0.0005428603265775329,
      "loss": 2.9367,
      "step": 46016
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0042426586151123,
      "learning_rate": 0.0005428579250987464,
      "loss": 3.0688,
      "step": 46017
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.61058509349823,
      "learning_rate": 0.000542855523574808,
      "loss": 2.9639,
      "step": 46018
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9179648160934448,
      "learning_rate": 0.0005428531220057183,
      "loss": 3.1486,
      "step": 46019
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6891083717346191,
      "learning_rate": 0.0005428507203914776,
      "loss": 3.0525,
      "step": 46020
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.425637125968933,
      "learning_rate": 0.0005428483187320864,
      "loss": 3.244,
      "step": 46021
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6985286474227905,
      "learning_rate": 0.000542845917027545,
      "loss": 3.1831,
      "step": 46022
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3507089614868164,
      "learning_rate": 0.000542843515277854,
      "loss": 3.1366,
      "step": 46023
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.583013653755188,
      "learning_rate": 0.0005428411134830137,
      "loss": 3.033,
      "step": 46024
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6103886365890503,
      "learning_rate": 0.0005428387116430249,
      "loss": 2.8665,
      "step": 46025
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6697824001312256,
      "learning_rate": 0.0005428363097578877,
      "loss": 3.1316,
      "step": 46026
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7199816703796387,
      "learning_rate": 0.0005428339078276026,
      "loss": 3.3581,
      "step": 46027
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3740408420562744,
      "learning_rate": 0.0005428315058521701,
      "loss": 2.7214,
      "step": 46028
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2487123012542725,
      "learning_rate": 0.0005428291038315907,
      "loss": 3.1748,
      "step": 46029
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.628551959991455,
      "learning_rate": 0.0005428267017658647,
      "loss": 2.998,
      "step": 46030
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.761039137840271,
      "learning_rate": 0.0005428242996549927,
      "loss": 2.9021,
      "step": 46031
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5005688667297363,
      "learning_rate": 0.000542821897498975,
      "loss": 3.2349,
      "step": 46032
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6649744510650635,
      "learning_rate": 0.0005428194952978123,
      "loss": 2.7648,
      "step": 46033
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.824315071105957,
      "learning_rate": 0.0005428170930515046,
      "loss": 3.0713,
      "step": 46034
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.551803469657898,
      "learning_rate": 0.0005428146907600528,
      "loss": 3.2446,
      "step": 46035
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4869747161865234,
      "learning_rate": 0.0005428122884234571,
      "loss": 3.3028,
      "step": 46036
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.325653076171875,
      "learning_rate": 0.0005428098860417181,
      "loss": 3.0285,
      "step": 46037
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.125308036804199,
      "learning_rate": 0.0005428074836148361,
      "loss": 3.1502,
      "step": 46038
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9166266918182373,
      "learning_rate": 0.0005428050811428116,
      "loss": 3.2373,
      "step": 46039
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7319711446762085,
      "learning_rate": 0.000542802678625645,
      "loss": 2.8685,
      "step": 46040
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7262247800827026,
      "learning_rate": 0.0005428002760633369,
      "loss": 3.2957,
      "step": 46041
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4801112413406372,
      "learning_rate": 0.0005427978734558876,
      "loss": 2.9713,
      "step": 46042
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.262263774871826,
      "learning_rate": 0.0005427954708032976,
      "loss": 3.0983,
      "step": 46043
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4330811500549316,
      "learning_rate": 0.0005427930681055673,
      "loss": 3.2332,
      "step": 46044
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.985527515411377,
      "learning_rate": 0.0005427906653626973,
      "loss": 3.1218,
      "step": 46045
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.822021484375,
      "learning_rate": 0.0005427882625746878,
      "loss": 2.9577,
      "step": 46046
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6822779178619385,
      "learning_rate": 0.0005427858597415394,
      "loss": 3.1529,
      "step": 46047
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.802399754524231,
      "learning_rate": 0.0005427834568632525,
      "loss": 2.9691,
      "step": 46048
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.475988507270813,
      "learning_rate": 0.0005427810539398276,
      "loss": 2.9792,
      "step": 46049
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3402448892593384,
      "learning_rate": 0.0005427786509712652,
      "loss": 3.0367,
      "step": 46050
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.623352289199829,
      "learning_rate": 0.0005427762479575656,
      "loss": 3.0311,
      "step": 46051
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3545491695404053,
      "learning_rate": 0.0005427738448987294,
      "loss": 3.0743,
      "step": 46052
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8398246765136719,
      "learning_rate": 0.0005427714417947569,
      "loss": 3.326,
      "step": 46053
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5839117765426636,
      "learning_rate": 0.0005427690386456486,
      "loss": 2.9836,
      "step": 46054
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4239946603775024,
      "learning_rate": 0.0005427666354514049,
      "loss": 3.0632,
      "step": 46055
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.995995283126831,
      "learning_rate": 0.0005427642322120264,
      "loss": 3.0816,
      "step": 46056
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.520867109298706,
      "learning_rate": 0.0005427618289275133,
      "loss": 2.9214,
      "step": 46057
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3856347799301147,
      "learning_rate": 0.0005427594255978664,
      "loss": 2.9774,
      "step": 46058
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3157498836517334,
      "learning_rate": 0.0005427570222230857,
      "loss": 3.1005,
      "step": 46059
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3827757835388184,
      "learning_rate": 0.000542754618803172,
      "loss": 3.2076,
      "step": 46060
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4332318305969238,
      "learning_rate": 0.0005427522153381258,
      "loss": 3.2439,
      "step": 46061
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.113067150115967,
      "learning_rate": 0.0005427498118279472,
      "loss": 2.979,
      "step": 46062
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4412903785705566,
      "learning_rate": 0.0005427474082726368,
      "loss": 3.0608,
      "step": 46063
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4551429748535156,
      "learning_rate": 0.0005427450046721951,
      "loss": 3.0369,
      "step": 46064
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.684880018234253,
      "learning_rate": 0.0005427426010266226,
      "loss": 2.9403,
      "step": 46065
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.670977234840393,
      "learning_rate": 0.0005427401973359196,
      "loss": 3.319,
      "step": 46066
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.428737759590149,
      "learning_rate": 0.0005427377936000866,
      "loss": 2.7488,
      "step": 46067
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7602452039718628,
      "learning_rate": 0.0005427353898191242,
      "loss": 3.0319,
      "step": 46068
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.848464846611023,
      "learning_rate": 0.0005427329859930325,
      "loss": 3.1306,
      "step": 46069
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.591048240661621,
      "learning_rate": 0.0005427305821218123,
      "loss": 3.096,
      "step": 46070
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.573888897895813,
      "learning_rate": 0.0005427281782054639,
      "loss": 2.8264,
      "step": 46071
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7872588634490967,
      "learning_rate": 0.0005427257742439877,
      "loss": 2.9846,
      "step": 46072
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.759866714477539,
      "learning_rate": 0.0005427233702373842,
      "loss": 3.1508,
      "step": 46073
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.065192461013794,
      "learning_rate": 0.0005427209661856538,
      "loss": 3.0203,
      "step": 46074
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.082780599594116,
      "learning_rate": 0.0005427185620887971,
      "loss": 2.8689,
      "step": 46075
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.512017846107483,
      "learning_rate": 0.0005427161579468144,
      "loss": 2.9951,
      "step": 46076
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5367058515548706,
      "learning_rate": 0.0005427137537597062,
      "loss": 3.1273,
      "step": 46077
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5339055061340332,
      "learning_rate": 0.0005427113495274729,
      "loss": 2.9463,
      "step": 46078
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.9903173446655273,
      "learning_rate": 0.000542708945250115,
      "loss": 3.2555,
      "step": 46079
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9507144689559937,
      "learning_rate": 0.0005427065409276329,
      "loss": 3.147,
      "step": 46080
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3474845886230469,
      "learning_rate": 0.000542704136560027,
      "loss": 3.0552,
      "step": 46081
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8547768592834473,
      "learning_rate": 0.0005427017321472979,
      "loss": 2.9974,
      "step": 46082
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.826390027999878,
      "learning_rate": 0.000542699327689446,
      "loss": 3.0923,
      "step": 46083
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3867359161376953,
      "learning_rate": 0.0005426969231864717,
      "loss": 2.7537,
      "step": 46084
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4067453145980835,
      "learning_rate": 0.0005426945186383754,
      "loss": 3.1751,
      "step": 46085
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5615801811218262,
      "learning_rate": 0.0005426921140451576,
      "loss": 3.2708,
      "step": 46086
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4498331546783447,
      "learning_rate": 0.0005426897094068188,
      "loss": 3.1248,
      "step": 46087
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.563218355178833,
      "learning_rate": 0.0005426873047233594,
      "loss": 2.944,
      "step": 46088
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3830755949020386,
      "learning_rate": 0.00054268489999478,
      "loss": 3.1193,
      "step": 46089
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.1303558349609375,
      "learning_rate": 0.0005426824952210807,
      "loss": 3.2425,
      "step": 46090
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.8381588459014893,
      "learning_rate": 0.0005426800904022621,
      "loss": 2.8894,
      "step": 46091
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4673758745193481,
      "learning_rate": 0.0005426776855383249,
      "loss": 2.8958,
      "step": 46092
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8595480918884277,
      "learning_rate": 0.0005426752806292691,
      "loss": 3.3672,
      "step": 46093
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0050036907196045,
      "learning_rate": 0.0005426728756750956,
      "loss": 3.0871,
      "step": 46094
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3517016172409058,
      "learning_rate": 0.0005426704706758045,
      "loss": 2.9233,
      "step": 46095
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6065412759780884,
      "learning_rate": 0.0005426680656313964,
      "loss": 3.001,
      "step": 46096
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4670250415802002,
      "learning_rate": 0.0005426656605418718,
      "loss": 2.9413,
      "step": 46097
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6766273975372314,
      "learning_rate": 0.000542663255407231,
      "loss": 3.0455,
      "step": 46098
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8267316818237305,
      "learning_rate": 0.0005426608502274745,
      "loss": 2.9012,
      "step": 46099
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.8736495971679688,
      "learning_rate": 0.0005426584450026028,
      "loss": 3.0157,
      "step": 46100
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9225488901138306,
      "learning_rate": 0.0005426560397326163,
      "loss": 2.7603,
      "step": 46101
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2790664434432983,
      "learning_rate": 0.0005426536344175155,
      "loss": 2.9587,
      "step": 46102
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3446404933929443,
      "learning_rate": 0.0005426512290573008,
      "loss": 2.899,
      "step": 46103
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7955210208892822,
      "learning_rate": 0.0005426488236519725,
      "loss": 2.9926,
      "step": 46104
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.397808313369751,
      "learning_rate": 0.0005426464182015315,
      "loss": 3.1754,
      "step": 46105
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7626875638961792,
      "learning_rate": 0.0005426440127059778,
      "loss": 3.1485,
      "step": 46106
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.997002124786377,
      "learning_rate": 0.0005426416071653119,
      "loss": 3.0618,
      "step": 46107
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3795254230499268,
      "learning_rate": 0.0005426392015795345,
      "loss": 3.3298,
      "step": 46108
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4382656812667847,
      "learning_rate": 0.0005426367959486457,
      "loss": 3.1272,
      "step": 46109
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5908970832824707,
      "learning_rate": 0.0005426343902726464,
      "loss": 2.8146,
      "step": 46110
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.840165376663208,
      "learning_rate": 0.0005426319845515367,
      "loss": 3.2293,
      "step": 46111
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3972816467285156,
      "learning_rate": 0.0005426295787853171,
      "loss": 2.8602,
      "step": 46112
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5096237659454346,
      "learning_rate": 0.000542627172973988,
      "loss": 3.2585,
      "step": 46113
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.7237777709960938,
      "learning_rate": 0.0005426247671175499,
      "loss": 3.0036,
      "step": 46114
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.970349073410034,
      "learning_rate": 0.0005426223612160035,
      "loss": 2.9436,
      "step": 46115
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5506057739257812,
      "learning_rate": 0.0005426199552693488,
      "loss": 3.2301,
      "step": 46116
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7334575653076172,
      "learning_rate": 0.0005426175492775866,
      "loss": 3.0627,
      "step": 46117
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2673813104629517,
      "learning_rate": 0.0005426151432407172,
      "loss": 3.0414,
      "step": 46118
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.538766622543335,
      "learning_rate": 0.0005426127371587411,
      "loss": 2.9166,
      "step": 46119
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.372657299041748,
      "learning_rate": 0.0005426103310316587,
      "loss": 3.1016,
      "step": 46120
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7652339935302734,
      "learning_rate": 0.0005426079248594704,
      "loss": 2.9355,
      "step": 46121
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8657344579696655,
      "learning_rate": 0.0005426055186421767,
      "loss": 3.1452,
      "step": 46122
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7665153741836548,
      "learning_rate": 0.0005426031123797781,
      "loss": 3.1355,
      "step": 46123
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.027385950088501,
      "learning_rate": 0.000542600706072275,
      "loss": 3.0716,
      "step": 46124
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2704169750213623,
      "learning_rate": 0.0005425982997196679,
      "loss": 2.89,
      "step": 46125
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.619206190109253,
      "learning_rate": 0.0005425958933219571,
      "loss": 3.051,
      "step": 46126
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9187588691711426,
      "learning_rate": 0.0005425934868791431,
      "loss": 2.9714,
      "step": 46127
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5740970373153687,
      "learning_rate": 0.0005425910803912265,
      "loss": 3.1069,
      "step": 46128
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.635216474533081,
      "learning_rate": 0.0005425886738582076,
      "loss": 3.0476,
      "step": 46129
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.445917010307312,
      "learning_rate": 0.0005425862672800869,
      "loss": 3.077,
      "step": 46130
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4675662517547607,
      "learning_rate": 0.0005425838606568648,
      "loss": 3.119,
      "step": 46131
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.000103235244751,
      "learning_rate": 0.0005425814539885418,
      "loss": 3.1044,
      "step": 46132
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5188952684402466,
      "learning_rate": 0.0005425790472751183,
      "loss": 2.9521,
      "step": 46133
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5373371839523315,
      "learning_rate": 0.0005425766405165949,
      "loss": 2.8931,
      "step": 46134
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6039122343063354,
      "learning_rate": 0.0005425742337129719,
      "loss": 3.2703,
      "step": 46135
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6480461359024048,
      "learning_rate": 0.0005425718268642496,
      "loss": 3.1165,
      "step": 46136
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.53024423122406,
      "learning_rate": 0.0005425694199704288,
      "loss": 2.8568,
      "step": 46137
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4954274892807007,
      "learning_rate": 0.0005425670130315097,
      "loss": 3.0116,
      "step": 46138
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3497198820114136,
      "learning_rate": 0.0005425646060474927,
      "loss": 3.1038,
      "step": 46139
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4360885620117188,
      "learning_rate": 0.0005425621990183785,
      "loss": 3.3701,
      "step": 46140
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6245001554489136,
      "learning_rate": 0.0005425597919441674,
      "loss": 3.1205,
      "step": 46141
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.475952386856079,
      "learning_rate": 0.0005425573848248598,
      "loss": 2.821,
      "step": 46142
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4825245141983032,
      "learning_rate": 0.0005425549776604563,
      "loss": 2.8568,
      "step": 46143
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5785495042800903,
      "learning_rate": 0.0005425525704509573,
      "loss": 3.1643,
      "step": 46144
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.546312689781189,
      "learning_rate": 0.0005425501631963629,
      "loss": 2.8609,
      "step": 46145
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3601311445236206,
      "learning_rate": 0.0005425477558966741,
      "loss": 2.9999,
      "step": 46146
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.023800849914551,
      "learning_rate": 0.000542545348551891,
      "loss": 2.8636,
      "step": 46147
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9894741773605347,
      "learning_rate": 0.0005425429411620142,
      "loss": 3.0544,
      "step": 46148
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.468109369277954,
      "learning_rate": 0.000542540533727044,
      "loss": 2.9222,
      "step": 46149
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2113823890686035,
      "learning_rate": 0.000542538126246981,
      "loss": 3.1552,
      "step": 46150
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.131636381149292,
      "learning_rate": 0.0005425357187218257,
      "loss": 2.9671,
      "step": 46151
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.0782158374786377,
      "learning_rate": 0.0005425333111515782,
      "loss": 3.14,
      "step": 46152
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.626261830329895,
      "learning_rate": 0.0005425309035362394,
      "loss": 2.915,
      "step": 46153
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7793623208999634,
      "learning_rate": 0.0005425284958758093,
      "loss": 3.0576,
      "step": 46154
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4404442310333252,
      "learning_rate": 0.0005425260881702888,
      "loss": 3.1818,
      "step": 46155
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.648999571800232,
      "learning_rate": 0.000542523680419678,
      "loss": 2.9267,
      "step": 46156
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4871171712875366,
      "learning_rate": 0.0005425212726239774,
      "loss": 3.1159,
      "step": 46157
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4675939083099365,
      "learning_rate": 0.0005425188647831876,
      "loss": 3.1762,
      "step": 46158
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.708274245262146,
      "learning_rate": 0.000542516456897309,
      "loss": 3.1662,
      "step": 46159
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.714779257774353,
      "learning_rate": 0.000542514048966342,
      "loss": 2.9566,
      "step": 46160
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3239812850952148,
      "learning_rate": 0.000542511640990287,
      "loss": 2.8049,
      "step": 46161
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3430662155151367,
      "learning_rate": 0.0005425092329691446,
      "loss": 3.1384,
      "step": 46162
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7785803079605103,
      "learning_rate": 0.0005425068249029151,
      "loss": 2.8672,
      "step": 46163
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5876611471176147,
      "learning_rate": 0.0005425044167915989,
      "loss": 3.0393,
      "step": 46164
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4344791173934937,
      "learning_rate": 0.0005425020086351968,
      "loss": 3.1048,
      "step": 46165
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5247503519058228,
      "learning_rate": 0.0005424996004337087,
      "loss": 3.0254,
      "step": 46166
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8622517585754395,
      "learning_rate": 0.0005424971921871355,
      "loss": 2.8335,
      "step": 46167
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4375941753387451,
      "learning_rate": 0.0005424947838954775,
      "loss": 2.9689,
      "step": 46168
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5320261716842651,
      "learning_rate": 0.0005424923755587353,
      "loss": 3.3886,
      "step": 46169
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.376754641532898,
      "learning_rate": 0.0005424899671769089,
      "loss": 3.0463,
      "step": 46170
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3283356428146362,
      "learning_rate": 0.0005424875587499992,
      "loss": 3.0103,
      "step": 46171
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8453248739242554,
      "learning_rate": 0.0005424851502780065,
      "loss": 3.1821,
      "step": 46172
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5353481769561768,
      "learning_rate": 0.0005424827417609312,
      "loss": 3.0614,
      "step": 46173
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8352376222610474,
      "learning_rate": 0.0005424803331987737,
      "loss": 2.7405,
      "step": 46174
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5699154138565063,
      "learning_rate": 0.0005424779245915345,
      "loss": 2.9251,
      "step": 46175
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6084743738174438,
      "learning_rate": 0.0005424755159392143,
      "loss": 3.3769,
      "step": 46176
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3612868785858154,
      "learning_rate": 0.0005424731072418131,
      "loss": 2.9275,
      "step": 46177
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3033117055892944,
      "learning_rate": 0.0005424706984993317,
      "loss": 3.0326,
      "step": 46178
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8544541597366333,
      "learning_rate": 0.0005424682897117704,
      "loss": 2.7951,
      "step": 46179
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6238155364990234,
      "learning_rate": 0.0005424658808791297,
      "loss": 2.8636,
      "step": 46180
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.084275960922241,
      "learning_rate": 0.0005424634720014099,
      "loss": 3.1424,
      "step": 46181
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.75896418094635,
      "learning_rate": 0.0005424610630786117,
      "loss": 3.0743,
      "step": 46182
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6029577255249023,
      "learning_rate": 0.0005424586541107353,
      "loss": 3.1221,
      "step": 46183
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3209336996078491,
      "learning_rate": 0.0005424562450977813,
      "loss": 2.734,
      "step": 46184
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.488255262374878,
      "learning_rate": 0.0005424538360397503,
      "loss": 2.8991,
      "step": 46185
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.439554214477539,
      "learning_rate": 0.0005424514269366422,
      "loss": 3.1007,
      "step": 46186
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3422363996505737,
      "learning_rate": 0.000542449017788458,
      "loss": 3.1498,
      "step": 46187
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5232027769088745,
      "learning_rate": 0.0005424466085951979,
      "loss": 2.9092,
      "step": 46188
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.588433861732483,
      "learning_rate": 0.0005424441993568624,
      "loss": 3.0109,
      "step": 46189
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7796837091445923,
      "learning_rate": 0.000542441790073452,
      "loss": 2.9296,
      "step": 46190
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7483512163162231,
      "learning_rate": 0.000542439380744967,
      "loss": 3.0399,
      "step": 46191
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4329487085342407,
      "learning_rate": 0.0005424369713714081,
      "loss": 3.1103,
      "step": 46192
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0979926586151123,
      "learning_rate": 0.0005424345619527755,
      "loss": 3.017,
      "step": 46193
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.1123502254486084,
      "learning_rate": 0.0005424321524890698,
      "loss": 3.2019,
      "step": 46194
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6878312826156616,
      "learning_rate": 0.0005424297429802912,
      "loss": 2.8094,
      "step": 46195
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9381502866744995,
      "learning_rate": 0.0005424273334264405,
      "loss": 3.1344,
      "step": 46196
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7852915525436401,
      "learning_rate": 0.000542424923827518,
      "loss": 3.0625,
      "step": 46197
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0447843074798584,
      "learning_rate": 0.0005424225141835241,
      "loss": 2.9962,
      "step": 46198
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5678001642227173,
      "learning_rate": 0.0005424201044944591,
      "loss": 3.0549,
      "step": 46199
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6020978689193726,
      "learning_rate": 0.0005424176947603238,
      "loss": 3.1477,
      "step": 46200
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.1789755821228027,
      "learning_rate": 0.0005424152849811185,
      "loss": 3.0013,
      "step": 46201
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.7044851779937744,
      "learning_rate": 0.0005424128751568436,
      "loss": 2.9965,
      "step": 46202
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.564481496810913,
      "learning_rate": 0.0005424104652874996,
      "loss": 3.263,
      "step": 46203
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.7070980072021484,
      "learning_rate": 0.0005424080553730867,
      "loss": 2.8861,
      "step": 46204
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.558284044265747,
      "learning_rate": 0.0005424056454136058,
      "loss": 2.7945,
      "step": 46205
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.092437982559204,
      "learning_rate": 0.000542403235409057,
      "loss": 2.944,
      "step": 46206
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2636966705322266,
      "learning_rate": 0.000542400825359441,
      "loss": 3.1136,
      "step": 46207
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.062485933303833,
      "learning_rate": 0.000542398415264758,
      "loss": 3.107,
      "step": 46208
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5640732049942017,
      "learning_rate": 0.0005423960051250086,
      "loss": 2.9934,
      "step": 46209
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2372372150421143,
      "learning_rate": 0.0005423935949401931,
      "loss": 3.235,
      "step": 46210
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6970796585083008,
      "learning_rate": 0.0005423911847103122,
      "loss": 2.942,
      "step": 46211
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4669246673583984,
      "learning_rate": 0.0005423887744353661,
      "loss": 2.8902,
      "step": 46212
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0864081382751465,
      "learning_rate": 0.0005423863641153554,
      "loss": 3.0129,
      "step": 46213
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6531423330307007,
      "learning_rate": 0.0005423839537502806,
      "loss": 2.7102,
      "step": 46214
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6269383430480957,
      "learning_rate": 0.0005423815433401419,
      "loss": 3.2863,
      "step": 46215
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5705077648162842,
      "learning_rate": 0.0005423791328849399,
      "loss": 3.081,
      "step": 46216
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6738688945770264,
      "learning_rate": 0.000542376722384675,
      "loss": 3.1084,
      "step": 46217
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0934009552001953,
      "learning_rate": 0.0005423743118393478,
      "loss": 3.0243,
      "step": 46218
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4302407503128052,
      "learning_rate": 0.0005423719012489586,
      "loss": 2.8994,
      "step": 46219
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4622629880905151,
      "learning_rate": 0.000542369490613508,
      "loss": 3.1037,
      "step": 46220
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.7596933841705322,
      "learning_rate": 0.0005423670799329962,
      "loss": 2.9789,
      "step": 46221
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6855987310409546,
      "learning_rate": 0.0005423646692074238,
      "loss": 2.8515,
      "step": 46222
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.5240306854248047,
      "learning_rate": 0.0005423622584367913,
      "loss": 3.0578,
      "step": 46223
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.5501651763916016,
      "learning_rate": 0.000542359847621099,
      "loss": 3.0704,
      "step": 46224
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7203892469406128,
      "learning_rate": 0.0005423574367603475,
      "loss": 3.3384,
      "step": 46225
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9805718660354614,
      "learning_rate": 0.000542355025854537,
      "loss": 3.1096,
      "step": 46226
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.5523440837860107,
      "learning_rate": 0.0005423526149036684,
      "loss": 3.235,
      "step": 46227
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8887839317321777,
      "learning_rate": 0.0005423502039077417,
      "loss": 2.9393,
      "step": 46228
    },
    {
      "epoch": 0.6,
      "grad_norm": 12.223226547241211,
      "learning_rate": 0.0005423477928667576,
      "loss": 3.3094,
      "step": 46229
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6400480270385742,
      "learning_rate": 0.0005423453817807165,
      "loss": 3.186,
      "step": 46230
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.163257598876953,
      "learning_rate": 0.0005423429706496188,
      "loss": 2.9207,
      "step": 46231
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3385815620422363,
      "learning_rate": 0.0005423405594734648,
      "loss": 2.9361,
      "step": 46232
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4613734483718872,
      "learning_rate": 0.0005423381482522553,
      "loss": 3.0632,
      "step": 46233
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7345974445343018,
      "learning_rate": 0.0005423357369859905,
      "loss": 3.2137,
      "step": 46234
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.112339735031128,
      "learning_rate": 0.0005423333256746708,
      "loss": 3.1855,
      "step": 46235
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4522027969360352,
      "learning_rate": 0.0005423309143182969,
      "loss": 3.1399,
      "step": 46236
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7611815929412842,
      "learning_rate": 0.0005423285029168692,
      "loss": 3.0395,
      "step": 46237
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4814187288284302,
      "learning_rate": 0.0005423260914703879,
      "loss": 3.1294,
      "step": 46238
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2731953859329224,
      "learning_rate": 0.0005423236799788537,
      "loss": 3.054,
      "step": 46239
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4878065586090088,
      "learning_rate": 0.0005423212684422669,
      "loss": 3.3287,
      "step": 46240
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6698613166809082,
      "learning_rate": 0.000542318856860628,
      "loss": 3.0657,
      "step": 46241
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.804008722305298,
      "learning_rate": 0.0005423164452339373,
      "loss": 2.9933,
      "step": 46242
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9793599843978882,
      "learning_rate": 0.0005423140335621957,
      "loss": 2.8075,
      "step": 46243
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.386918783187866,
      "learning_rate": 0.0005423116218454032,
      "loss": 3.3153,
      "step": 46244
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4758423566818237,
      "learning_rate": 0.0005423092100835603,
      "loss": 2.9941,
      "step": 46245
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7944425344467163,
      "learning_rate": 0.0005423067982766676,
      "loss": 2.8838,
      "step": 46246
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8844798803329468,
      "learning_rate": 0.0005423043864247255,
      "loss": 2.9341,
      "step": 46247
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9853994846343994,
      "learning_rate": 0.0005423019745277346,
      "loss": 3.0262,
      "step": 46248
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3985008001327515,
      "learning_rate": 0.000542299562585695,
      "loss": 3.0077,
      "step": 46249
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8398871421813965,
      "learning_rate": 0.0005422971505986074,
      "loss": 3.2419,
      "step": 46250
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0788917541503906,
      "learning_rate": 0.0005422947385664722,
      "loss": 3.0702,
      "step": 46251
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5095025300979614,
      "learning_rate": 0.0005422923264892899,
      "loss": 3.0073,
      "step": 46252
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6501835584640503,
      "learning_rate": 0.0005422899143670607,
      "loss": 2.9379,
      "step": 46253
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4662501811981201,
      "learning_rate": 0.0005422875021997854,
      "loss": 3.3006,
      "step": 46254
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2663553953170776,
      "learning_rate": 0.0005422850899874642,
      "loss": 3.4094,
      "step": 46255
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5248701572418213,
      "learning_rate": 0.0005422826777300976,
      "loss": 3.2229,
      "step": 46256
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.757206916809082,
      "learning_rate": 0.0005422802654276861,
      "loss": 3.032,
      "step": 46257
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.737989902496338,
      "learning_rate": 0.0005422778530802301,
      "loss": 2.9338,
      "step": 46258
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4992167949676514,
      "learning_rate": 0.0005422754406877302,
      "loss": 3.2788,
      "step": 46259
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.711287260055542,
      "learning_rate": 0.0005422730282501866,
      "loss": 3.2291,
      "step": 46260
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.622664213180542,
      "learning_rate": 0.0005422706157675999,
      "loss": 2.9551,
      "step": 46261
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5625414848327637,
      "learning_rate": 0.0005422682032399706,
      "loss": 3.0781,
      "step": 46262
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2748968601226807,
      "learning_rate": 0.0005422657906672989,
      "loss": 2.9397,
      "step": 46263
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8955525159835815,
      "learning_rate": 0.0005422633780495855,
      "loss": 3.0391,
      "step": 46264
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.601837635040283,
      "learning_rate": 0.0005422609653868308,
      "loss": 3.0895,
      "step": 46265
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.674942135810852,
      "learning_rate": 0.0005422585526790352,
      "loss": 3.1666,
      "step": 46266
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.823166847229004,
      "learning_rate": 0.0005422561399261992,
      "loss": 2.9669,
      "step": 46267
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9483102560043335,
      "learning_rate": 0.0005422537271283232,
      "loss": 3.0733,
      "step": 46268
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.606441617012024,
      "learning_rate": 0.0005422513142854076,
      "loss": 3.1335,
      "step": 46269
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5719377994537354,
      "learning_rate": 0.0005422489013974528,
      "loss": 2.9879,
      "step": 46270
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.169025182723999,
      "learning_rate": 0.0005422464884644596,
      "loss": 2.9985,
      "step": 46271
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4561351537704468,
      "learning_rate": 0.0005422440754864281,
      "loss": 3.0471,
      "step": 46272
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4907400608062744,
      "learning_rate": 0.0005422416624633587,
      "loss": 3.1045,
      "step": 46273
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7717996835708618,
      "learning_rate": 0.0005422392493952522,
      "loss": 3.0642,
      "step": 46274
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8157039880752563,
      "learning_rate": 0.0005422368362821089,
      "loss": 3.1668,
      "step": 46275
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8422174453735352,
      "learning_rate": 0.0005422344231239291,
      "loss": 3.0191,
      "step": 46276
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7410818338394165,
      "learning_rate": 0.0005422320099207134,
      "loss": 3.269,
      "step": 46277
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.0442698001861572,
      "learning_rate": 0.0005422295966724621,
      "loss": 2.9487,
      "step": 46278
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5911821126937866,
      "learning_rate": 0.0005422271833791757,
      "loss": 3.0918,
      "step": 46279
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6758086681365967,
      "learning_rate": 0.0005422247700408548,
      "loss": 3.1374,
      "step": 46280
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2591559886932373,
      "learning_rate": 0.0005422223566574997,
      "loss": 2.9618,
      "step": 46281
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.5328383445739746,
      "learning_rate": 0.000542219943229111,
      "loss": 2.9918,
      "step": 46282
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.68063485622406,
      "learning_rate": 0.000542217529755689,
      "loss": 2.7768,
      "step": 46283
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.553389549255371,
      "learning_rate": 0.0005422151162372341,
      "loss": 2.8932,
      "step": 46284
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.5699045658111572,
      "learning_rate": 0.0005422127026737468,
      "loss": 3.0797,
      "step": 46285
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.5671072006225586,
      "learning_rate": 0.0005422102890652277,
      "loss": 3.3913,
      "step": 46286
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6178237199783325,
      "learning_rate": 0.0005422078754116771,
      "loss": 3.035,
      "step": 46287
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.183922052383423,
      "learning_rate": 0.0005422054617130955,
      "loss": 3.1711,
      "step": 46288
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0404279232025146,
      "learning_rate": 0.0005422030479694833,
      "loss": 3.1307,
      "step": 46289
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7166666984558105,
      "learning_rate": 0.0005422006341808411,
      "loss": 3.2298,
      "step": 46290
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.369797945022583,
      "learning_rate": 0.000542198220347169,
      "loss": 3.0299,
      "step": 46291
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.423609733581543,
      "learning_rate": 0.0005421958064684679,
      "loss": 3.149,
      "step": 46292
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3377982378005981,
      "learning_rate": 0.0005421933925447379,
      "loss": 2.7228,
      "step": 46293
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6323739290237427,
      "learning_rate": 0.0005421909785759796,
      "loss": 2.8764,
      "step": 46294
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5140405893325806,
      "learning_rate": 0.0005421885645621935,
      "loss": 3.0611,
      "step": 46295
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3260917663574219,
      "learning_rate": 0.0005421861505033799,
      "loss": 3.0412,
      "step": 46296
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.532957673072815,
      "learning_rate": 0.0005421837363995393,
      "loss": 3.1162,
      "step": 46297
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6024012565612793,
      "learning_rate": 0.0005421813222506723,
      "loss": 3.22,
      "step": 46298
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3379648923873901,
      "learning_rate": 0.0005421789080567791,
      "loss": 3.1543,
      "step": 46299
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3781405687332153,
      "learning_rate": 0.0005421764938178603,
      "loss": 3.0178,
      "step": 46300
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6086050271987915,
      "learning_rate": 0.0005421740795339163,
      "loss": 2.9212,
      "step": 46301
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.624662160873413,
      "learning_rate": 0.0005421716652049476,
      "loss": 3.2356,
      "step": 46302
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.433992624282837,
      "learning_rate": 0.0005421692508309546,
      "loss": 3.2284,
      "step": 46303
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5196865797042847,
      "learning_rate": 0.0005421668364119378,
      "loss": 2.9577,
      "step": 46304
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.416334867477417,
      "learning_rate": 0.0005421644219478976,
      "loss": 2.8508,
      "step": 46305
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.398762822151184,
      "learning_rate": 0.0005421620074388344,
      "loss": 3.1763,
      "step": 46306
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3878799676895142,
      "learning_rate": 0.0005421595928847487,
      "loss": 3.1798,
      "step": 46307
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5277798175811768,
      "learning_rate": 0.0005421571782856411,
      "loss": 3.0628,
      "step": 46308
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5191713571548462,
      "learning_rate": 0.0005421547636415118,
      "loss": 3.1495,
      "step": 46309
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7254217863082886,
      "learning_rate": 0.0005421523489523614,
      "loss": 3.041,
      "step": 46310
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8119053840637207,
      "learning_rate": 0.0005421499342181902,
      "loss": 3.1643,
      "step": 46311
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3405998945236206,
      "learning_rate": 0.000542147519438999,
      "loss": 3.1624,
      "step": 46312
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6172876358032227,
      "learning_rate": 0.0005421451046147878,
      "loss": 3.0204,
      "step": 46313
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5112088918685913,
      "learning_rate": 0.0005421426897455572,
      "loss": 3.0716,
      "step": 46314
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6015818119049072,
      "learning_rate": 0.0005421402748313079,
      "loss": 2.9755,
      "step": 46315
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.563997507095337,
      "learning_rate": 0.00054213785987204,
      "loss": 3.1397,
      "step": 46316
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.838411569595337,
      "learning_rate": 0.0005421354448677543,
      "loss": 2.8994,
      "step": 46317
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9915635585784912,
      "learning_rate": 0.0005421330298184509,
      "loss": 3.2187,
      "step": 46318
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.332923412322998,
      "learning_rate": 0.0005421306147241304,
      "loss": 3.0584,
      "step": 46319
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9560514688491821,
      "learning_rate": 0.0005421281995847933,
      "loss": 3.153,
      "step": 46320
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5974814891815186,
      "learning_rate": 0.0005421257844004399,
      "loss": 3.1506,
      "step": 46321
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.1962616443634033,
      "learning_rate": 0.0005421233691710708,
      "loss": 3.2278,
      "step": 46322
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.604235291481018,
      "learning_rate": 0.0005421209538966864,
      "loss": 3.3282,
      "step": 46323
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.709344744682312,
      "learning_rate": 0.0005421185385772871,
      "loss": 3.0237,
      "step": 46324
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.698117971420288,
      "learning_rate": 0.0005421161232128734,
      "loss": 2.9842,
      "step": 46325
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.4563791751861572,
      "learning_rate": 0.0005421137078034458,
      "loss": 3.0419,
      "step": 46326
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7671892642974854,
      "learning_rate": 0.0005421112923490048,
      "loss": 2.8686,
      "step": 46327
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9185043573379517,
      "learning_rate": 0.0005421088768495506,
      "loss": 3.0836,
      "step": 46328
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0205178260803223,
      "learning_rate": 0.0005421064613050837,
      "loss": 3.0484,
      "step": 46329
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8488104343414307,
      "learning_rate": 0.0005421040457156048,
      "loss": 3.0988,
      "step": 46330
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5518683195114136,
      "learning_rate": 0.0005421016300811141,
      "loss": 2.9555,
      "step": 46331
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.669935941696167,
      "learning_rate": 0.0005420992144016122,
      "loss": 3.2306,
      "step": 46332
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.075655937194824,
      "learning_rate": 0.0005420967986770994,
      "loss": 2.8796,
      "step": 46333
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.54698646068573,
      "learning_rate": 0.0005420943829075763,
      "loss": 2.9671,
      "step": 46334
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.483095645904541,
      "learning_rate": 0.0005420919670930433,
      "loss": 3.126,
      "step": 46335
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.1063344478607178,
      "learning_rate": 0.0005420895512335008,
      "loss": 2.8169,
      "step": 46336
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.637768030166626,
      "learning_rate": 0.0005420871353289493,
      "loss": 3.1081,
      "step": 46337
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4102309942245483,
      "learning_rate": 0.0005420847193793893,
      "loss": 2.7259,
      "step": 46338
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3913029432296753,
      "learning_rate": 0.0005420823033848211,
      "loss": 3.0892,
      "step": 46339
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.727535367012024,
      "learning_rate": 0.0005420798873452451,
      "loss": 2.9921,
      "step": 46340
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4703336954116821,
      "learning_rate": 0.0005420774712606621,
      "loss": 3.0105,
      "step": 46341
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7710704803466797,
      "learning_rate": 0.0005420750551310722,
      "loss": 3.1496,
      "step": 46342
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3734990358352661,
      "learning_rate": 0.000542072638956476,
      "loss": 2.9207,
      "step": 46343
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4818220138549805,
      "learning_rate": 0.000542070222736874,
      "loss": 3.0927,
      "step": 46344
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3759549856185913,
      "learning_rate": 0.0005420678064722665,
      "loss": 3.0742,
      "step": 46345
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.479830265045166,
      "learning_rate": 0.000542065390162654,
      "loss": 3.1266,
      "step": 46346
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.614132285118103,
      "learning_rate": 0.0005420629738080371,
      "loss": 2.9854,
      "step": 46347
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.2820818424224854,
      "learning_rate": 0.000542060557408416,
      "loss": 3.011,
      "step": 46348
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0064685344696045,
      "learning_rate": 0.0005420581409637914,
      "loss": 3.3485,
      "step": 46349
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.858999252319336,
      "learning_rate": 0.0005420557244741634,
      "loss": 3.0853,
      "step": 46350
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6762937307357788,
      "learning_rate": 0.0005420533079395328,
      "loss": 3.1443,
      "step": 46351
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.8298416137695312,
      "learning_rate": 0.0005420508913599,
      "loss": 2.8668,
      "step": 46352
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4889971017837524,
      "learning_rate": 0.0005420484747352653,
      "loss": 3.0317,
      "step": 46353
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6666313409805298,
      "learning_rate": 0.0005420460580656293,
      "loss": 3.2339,
      "step": 46354
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.436447262763977,
      "learning_rate": 0.0005420436413509923,
      "loss": 3.1043,
      "step": 46355
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.167353630065918,
      "learning_rate": 0.0005420412245913547,
      "loss": 3.0691,
      "step": 46356
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.1329562664031982,
      "learning_rate": 0.0005420388077867172,
      "loss": 2.8301,
      "step": 46357
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4733637571334839,
      "learning_rate": 0.0005420363909370801,
      "loss": 3.3814,
      "step": 46358
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4096118211746216,
      "learning_rate": 0.0005420339740424438,
      "loss": 3.0606,
      "step": 46359
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8653980493545532,
      "learning_rate": 0.0005420315571028089,
      "loss": 3.1396,
      "step": 46360
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.218562364578247,
      "learning_rate": 0.0005420291401181757,
      "loss": 2.982,
      "step": 46361
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5489616394042969,
      "learning_rate": 0.0005420267230885448,
      "loss": 3.2023,
      "step": 46362
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5867102146148682,
      "learning_rate": 0.0005420243060139165,
      "loss": 2.9478,
      "step": 46363
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.7688217163085938,
      "learning_rate": 0.0005420218888942914,
      "loss": 3.1624,
      "step": 46364
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9853923320770264,
      "learning_rate": 0.0005420194717296698,
      "loss": 3.1369,
      "step": 46365
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3935669660568237,
      "learning_rate": 0.0005420170545200521,
      "loss": 3.0325,
      "step": 46366
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3755370378494263,
      "learning_rate": 0.0005420146372654389,
      "loss": 2.9544,
      "step": 46367
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2752108573913574,
      "learning_rate": 0.0005420122199658307,
      "loss": 2.8795,
      "step": 46368
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.160841941833496,
      "learning_rate": 0.0005420098026212279,
      "loss": 3.1071,
      "step": 46369
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4547784328460693,
      "learning_rate": 0.0005420073852316308,
      "loss": 3.0774,
      "step": 46370
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4693686962127686,
      "learning_rate": 0.0005420049677970401,
      "loss": 3.1801,
      "step": 46371
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4396237134933472,
      "learning_rate": 0.000542002550317456,
      "loss": 3.0491,
      "step": 46372
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.8904969692230225,
      "learning_rate": 0.0005420001327928791,
      "loss": 2.8665,
      "step": 46373
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.1994686126708984,
      "learning_rate": 0.0005419977152233097,
      "loss": 3.1579,
      "step": 46374
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4625593423843384,
      "learning_rate": 0.0005419952976087485,
      "loss": 3.1596,
      "step": 46375
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.194120407104492,
      "learning_rate": 0.0005419928799491957,
      "loss": 2.9529,
      "step": 46376
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3386714458465576,
      "learning_rate": 0.0005419904622446519,
      "loss": 3.0975,
      "step": 46377
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6230077743530273,
      "learning_rate": 0.0005419880444951175,
      "loss": 3.2216,
      "step": 46378
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5791687965393066,
      "learning_rate": 0.000541985626700593,
      "loss": 3.2396,
      "step": 46379
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.482516288757324,
      "learning_rate": 0.0005419832088610787,
      "loss": 2.9477,
      "step": 46380
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3778796195983887,
      "learning_rate": 0.0005419807909765752,
      "loss": 3.2037,
      "step": 46381
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3278779983520508,
      "learning_rate": 0.000541978373047083,
      "loss": 2.9657,
      "step": 46382
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9603255987167358,
      "learning_rate": 0.0005419759550726024,
      "loss": 2.9173,
      "step": 46383
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.1722512245178223,
      "learning_rate": 0.0005419735370531338,
      "loss": 3.3047,
      "step": 46384
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4776463508605957,
      "learning_rate": 0.0005419711189886779,
      "loss": 2.9842,
      "step": 46385
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6186609268188477,
      "learning_rate": 0.000541968700879235,
      "loss": 3.0416,
      "step": 46386
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9739863872528076,
      "learning_rate": 0.0005419662827248053,
      "loss": 2.89,
      "step": 46387
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.124377727508545,
      "learning_rate": 0.0005419638645253897,
      "loss": 3.1282,
      "step": 46388
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3644973039627075,
      "learning_rate": 0.0005419614462809885,
      "loss": 2.9852,
      "step": 46389
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6763261556625366,
      "learning_rate": 0.0005419590279916019,
      "loss": 2.8758,
      "step": 46390
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.2800614833831787,
      "learning_rate": 0.0005419566096572307,
      "loss": 2.9861,
      "step": 46391
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4583383798599243,
      "learning_rate": 0.0005419541912778751,
      "loss": 2.8942,
      "step": 46392
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6968783140182495,
      "learning_rate": 0.0005419517728535359,
      "loss": 3.1449,
      "step": 46393
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3460147380828857,
      "learning_rate": 0.0005419493543842131,
      "loss": 2.9935,
      "step": 46394
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3433451652526855,
      "learning_rate": 0.0005419469358699073,
      "loss": 3.0709,
      "step": 46395
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4498887062072754,
      "learning_rate": 0.0005419445173106191,
      "loss": 2.8751,
      "step": 46396
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4169626235961914,
      "learning_rate": 0.0005419420987063488,
      "loss": 2.8959,
      "step": 46397
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4687252044677734,
      "learning_rate": 0.0005419396800570969,
      "loss": 2.9803,
      "step": 46398
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4913495779037476,
      "learning_rate": 0.0005419372613628639,
      "loss": 3.3796,
      "step": 46399
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7474594116210938,
      "learning_rate": 0.0005419348426236501,
      "loss": 3.2817,
      "step": 46400
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5696064233779907,
      "learning_rate": 0.0005419324238394561,
      "loss": 2.9811,
      "step": 46401
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.667938470840454,
      "learning_rate": 0.0005419300050102824,
      "loss": 2.9434,
      "step": 46402
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0094075202941895,
      "learning_rate": 0.0005419275861361292,
      "loss": 2.9347,
      "step": 46403
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5133000612258911,
      "learning_rate": 0.0005419251672169972,
      "loss": 3.2058,
      "step": 46404
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8119674921035767,
      "learning_rate": 0.0005419227482528865,
      "loss": 3.025,
      "step": 46405
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.661793828010559,
      "learning_rate": 0.000541920329243798,
      "loss": 3.0374,
      "step": 46406
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6272687911987305,
      "learning_rate": 0.0005419179101897319,
      "loss": 3.0372,
      "step": 46407
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3658496141433716,
      "learning_rate": 0.0005419154910906889,
      "loss": 2.8606,
      "step": 46408
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7324450016021729,
      "learning_rate": 0.0005419130719466689,
      "loss": 3.0999,
      "step": 46409
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3368518352508545,
      "learning_rate": 0.0005419106527576728,
      "loss": 2.9669,
      "step": 46410
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9404776096343994,
      "learning_rate": 0.000541908233523701,
      "loss": 3.0924,
      "step": 46411
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.3015074729919434,
      "learning_rate": 0.0005419058142447541,
      "loss": 3.0199,
      "step": 46412
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.218147039413452,
      "learning_rate": 0.000541903394920832,
      "loss": 3.1552,
      "step": 46413
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.394623041152954,
      "learning_rate": 0.0005419009755519356,
      "loss": 3.0474,
      "step": 46414
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.460325837135315,
      "learning_rate": 0.0005418985561380653,
      "loss": 3.1575,
      "step": 46415
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.4095754623413086,
      "learning_rate": 0.0005418961366792215,
      "loss": 3.1256,
      "step": 46416
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7223933935165405,
      "learning_rate": 0.0005418937171754046,
      "loss": 3.1032,
      "step": 46417
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8698698282241821,
      "learning_rate": 0.000541891297626615,
      "loss": 3.1362,
      "step": 46418
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5664340257644653,
      "learning_rate": 0.0005418888780328534,
      "loss": 3.1572,
      "step": 46419
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.724494695663452,
      "learning_rate": 0.00054188645839412,
      "loss": 2.9031,
      "step": 46420
    },
    {
      "epoch": 0.6,
      "grad_norm": 4.859864234924316,
      "learning_rate": 0.0005418840387104154,
      "loss": 2.845,
      "step": 46421
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.11051082611084,
      "learning_rate": 0.0005418816189817399,
      "loss": 2.9108,
      "step": 46422
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0251736640930176,
      "learning_rate": 0.0005418791992080941,
      "loss": 2.8642,
      "step": 46423
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.929734468460083,
      "learning_rate": 0.0005418767793894785,
      "loss": 3.0325,
      "step": 46424
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5722965002059937,
      "learning_rate": 0.0005418743595258932,
      "loss": 2.971,
      "step": 46425
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8915899991989136,
      "learning_rate": 0.000541871939617339,
      "loss": 2.7666,
      "step": 46426
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7308851480484009,
      "learning_rate": 0.0005418695196638163,
      "loss": 3.0296,
      "step": 46427
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.1335647106170654,
      "learning_rate": 0.0005418670996653255,
      "loss": 3.2647,
      "step": 46428
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9291647672653198,
      "learning_rate": 0.000541864679621867,
      "loss": 3.1075,
      "step": 46429
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5478787422180176,
      "learning_rate": 0.0005418622595334412,
      "loss": 3.0556,
      "step": 46430
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.579346179962158,
      "learning_rate": 0.0005418598394000487,
      "loss": 3.1743,
      "step": 46431
    },
    {
      "epoch": 0.6,
      "grad_norm": 4.29732084274292,
      "learning_rate": 0.00054185741922169,
      "loss": 2.8315,
      "step": 46432
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5565637350082397,
      "learning_rate": 0.0005418549989983653,
      "loss": 2.9621,
      "step": 46433
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7436468601226807,
      "learning_rate": 0.0005418525787300753,
      "loss": 3.0745,
      "step": 46434
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9743468761444092,
      "learning_rate": 0.0005418501584168202,
      "loss": 2.8659,
      "step": 46435
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.465762734413147,
      "learning_rate": 0.0005418477380586008,
      "loss": 2.7896,
      "step": 46436
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.135890007019043,
      "learning_rate": 0.0005418453176554172,
      "loss": 2.799,
      "step": 46437
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5293362140655518,
      "learning_rate": 0.0005418428972072699,
      "loss": 3.2127,
      "step": 46438
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.7228896617889404,
      "learning_rate": 0.0005418404767141596,
      "loss": 3.1055,
      "step": 46439
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4888248443603516,
      "learning_rate": 0.0005418380561760866,
      "loss": 3.0431,
      "step": 46440
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4456367492675781,
      "learning_rate": 0.0005418356355930512,
      "loss": 3.2473,
      "step": 46441
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.415331244468689,
      "learning_rate": 0.000541833214965054,
      "loss": 3.0005,
      "step": 46442
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.475043773651123,
      "learning_rate": 0.0005418307942920956,
      "loss": 3.0412,
      "step": 46443
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.490273356437683,
      "learning_rate": 0.0005418283735741761,
      "loss": 3.1079,
      "step": 46444
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.3602800369262695,
      "learning_rate": 0.0005418259528112962,
      "loss": 3.139,
      "step": 46445
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.9348995685577393,
      "learning_rate": 0.0005418235320034565,
      "loss": 3.0293,
      "step": 46446
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4704052209854126,
      "learning_rate": 0.0005418211111506571,
      "loss": 3.1794,
      "step": 46447
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.8258816003799438,
      "learning_rate": 0.0005418186902528985,
      "loss": 3.1474,
      "step": 46448
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.6711599826812744,
      "learning_rate": 0.0005418162693101813,
      "loss": 2.9328,
      "step": 46449
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.0038177967071533,
      "learning_rate": 0.0005418138483225059,
      "loss": 3.0222,
      "step": 46450
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.008985996246338,
      "learning_rate": 0.0005418114272898728,
      "loss": 3.1316,
      "step": 46451
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.1820521354675293,
      "learning_rate": 0.0005418090062122822,
      "loss": 2.8113,
      "step": 46452
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.416041851043701,
      "learning_rate": 0.0005418065850897349,
      "loss": 2.7408,
      "step": 46453
    },
    {
      "epoch": 0.6,
      "grad_norm": 4.827468395233154,
      "learning_rate": 0.0005418041639222312,
      "loss": 2.9976,
      "step": 46454
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.1822385787963867,
      "learning_rate": 0.0005418017427097715,
      "loss": 2.9053,
      "step": 46455
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.6000474691390991,
      "learning_rate": 0.0005417993214523563,
      "loss": 2.9799,
      "step": 46456
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.8676087856292725,
      "learning_rate": 0.000541796900149986,
      "loss": 3.0396,
      "step": 46457
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.870218276977539,
      "learning_rate": 0.000541794478802661,
      "loss": 3.137,
      "step": 46458
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.8642306327819824,
      "learning_rate": 0.0005417920574103821,
      "loss": 2.9851,
      "step": 46459
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.5910487174987793,
      "learning_rate": 0.0005417896359731493,
      "loss": 2.8844,
      "step": 46460
    },
    {
      "epoch": 0.6,
      "grad_norm": 3.0910966396331787,
      "learning_rate": 0.0005417872144909632,
      "loss": 3.1137,
      "step": 46461
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.616092324256897,
      "learning_rate": 0.0005417847929638245,
      "loss": 3.0555,
      "step": 46462
    },
    {
      "epoch": 0.6,
      "grad_norm": 2.940992593765259,
      "learning_rate": 0.0005417823713917333,
      "loss": 2.8656,
      "step": 46463
    },
    {
      "epoch": 0.6,
      "grad_norm": 1.4705307483673096,
      "learning_rate": 0.0005417799497746904,
      "loss": 3.1402,
      "step": 46464
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7598785161972046,
      "learning_rate": 0.0005417775281126957,
      "loss": 3.1867,
      "step": 46465
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.271042823791504,
      "learning_rate": 0.0005417751064057502,
      "loss": 2.9194,
      "step": 46466
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.8369269371032715,
      "learning_rate": 0.0005417726846538542,
      "loss": 3.0072,
      "step": 46467
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3687576055526733,
      "learning_rate": 0.000541770262857008,
      "loss": 3.1092,
      "step": 46468
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.5069496631622314,
      "learning_rate": 0.0005417678410152121,
      "loss": 3.099,
      "step": 46469
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.8837361335754395,
      "learning_rate": 0.000541765419128467,
      "loss": 3.0237,
      "step": 46470
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.2959144115448,
      "learning_rate": 0.0005417629971967732,
      "loss": 3.1304,
      "step": 46471
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4825832843780518,
      "learning_rate": 0.0005417605752201311,
      "loss": 3.1432,
      "step": 46472
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.021592855453491,
      "learning_rate": 0.0005417581531985411,
      "loss": 2.752,
      "step": 46473
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9638285636901855,
      "learning_rate": 0.0005417557311320038,
      "loss": 3.0598,
      "step": 46474
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7450867891311646,
      "learning_rate": 0.0005417533090205194,
      "loss": 3.1489,
      "step": 46475
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.3841257095336914,
      "learning_rate": 0.0005417508868640886,
      "loss": 3.0704,
      "step": 46476
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.928316354751587,
      "learning_rate": 0.0005417484646627116,
      "loss": 3.1016,
      "step": 46477
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4147783517837524,
      "learning_rate": 0.0005417460424163893,
      "loss": 3.3238,
      "step": 46478
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4533919095993042,
      "learning_rate": 0.0005417436201251216,
      "loss": 3.3483,
      "step": 46479
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8457955121994019,
      "learning_rate": 0.0005417411977889093,
      "loss": 3.1513,
      "step": 46480
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5022741556167603,
      "learning_rate": 0.0005417387754077526,
      "loss": 3.3084,
      "step": 46481
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2597110271453857,
      "learning_rate": 0.0005417363529816523,
      "loss": 3.3173,
      "step": 46482
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5095624923706055,
      "learning_rate": 0.0005417339305106085,
      "loss": 3.154,
      "step": 46483
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8689906597137451,
      "learning_rate": 0.0005417315079946218,
      "loss": 3.284,
      "step": 46484
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4651219844818115,
      "learning_rate": 0.0005417290854336927,
      "loss": 3.0804,
      "step": 46485
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5729254484176636,
      "learning_rate": 0.0005417266628278216,
      "loss": 3.2164,
      "step": 46486
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6512653827667236,
      "learning_rate": 0.000541724240177009,
      "loss": 3.0773,
      "step": 46487
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3620880842208862,
      "learning_rate": 0.0005417218174812552,
      "loss": 3.1379,
      "step": 46488
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4634472131729126,
      "learning_rate": 0.0005417193947405608,
      "loss": 2.9764,
      "step": 46489
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3951507806777954,
      "learning_rate": 0.0005417169719549262,
      "loss": 2.9987,
      "step": 46490
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3999522924423218,
      "learning_rate": 0.0005417145491243519,
      "loss": 3.0257,
      "step": 46491
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6954643726348877,
      "learning_rate": 0.0005417121262488383,
      "loss": 3.0711,
      "step": 46492
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6891708374023438,
      "learning_rate": 0.0005417097033283858,
      "loss": 3.0815,
      "step": 46493
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9453150033950806,
      "learning_rate": 0.0005417072803629949,
      "loss": 2.9715,
      "step": 46494
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.963196039199829,
      "learning_rate": 0.000541704857352666,
      "loss": 3.201,
      "step": 46495
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6863566637039185,
      "learning_rate": 0.0005417024342973997,
      "loss": 2.9466,
      "step": 46496
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6431487798690796,
      "learning_rate": 0.0005417000111971964,
      "loss": 2.9757,
      "step": 46497
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4697840213775635,
      "learning_rate": 0.0005416975880520563,
      "loss": 3.0026,
      "step": 46498
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.627367377281189,
      "learning_rate": 0.0005416951648619803,
      "loss": 3.1912,
      "step": 46499
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.136812925338745,
      "learning_rate": 0.0005416927416269685,
      "loss": 3.0762,
      "step": 46500
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6178556680679321,
      "learning_rate": 0.0005416903183470214,
      "loss": 3.143,
      "step": 46501
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6074669361114502,
      "learning_rate": 0.0005416878950221396,
      "loss": 3.3383,
      "step": 46502
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8941415548324585,
      "learning_rate": 0.0005416854716523234,
      "loss": 3.0516,
      "step": 46503
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8583616018295288,
      "learning_rate": 0.0005416830482375734,
      "loss": 3.0611,
      "step": 46504
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.469430923461914,
      "learning_rate": 0.0005416806247778898,
      "loss": 3.2079,
      "step": 46505
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9311017990112305,
      "learning_rate": 0.0005416782012732733,
      "loss": 2.8768,
      "step": 46506
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.725346565246582,
      "learning_rate": 0.0005416757777237243,
      "loss": 2.9082,
      "step": 46507
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4714969396591187,
      "learning_rate": 0.0005416733541292432,
      "loss": 2.9388,
      "step": 46508
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8285068273544312,
      "learning_rate": 0.0005416709304898304,
      "loss": 3.1461,
      "step": 46509
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5797419548034668,
      "learning_rate": 0.0005416685068054864,
      "loss": 3.2064,
      "step": 46510
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.794867992401123,
      "learning_rate": 0.0005416660830762118,
      "loss": 2.7147,
      "step": 46511
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4604994058609009,
      "learning_rate": 0.0005416636593020067,
      "loss": 2.9482,
      "step": 46512
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.1162924766540527,
      "learning_rate": 0.000541661235482872,
      "loss": 3.1,
      "step": 46513
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3431235551834106,
      "learning_rate": 0.0005416588116188077,
      "loss": 3.0367,
      "step": 46514
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4748070240020752,
      "learning_rate": 0.0005416563877098146,
      "loss": 3.012,
      "step": 46515
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7548480033874512,
      "learning_rate": 0.000541653963755893,
      "loss": 2.749,
      "step": 46516
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8497459888458252,
      "learning_rate": 0.0005416515397570434,
      "loss": 2.8889,
      "step": 46517
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5985450744628906,
      "learning_rate": 0.0005416491157132661,
      "loss": 3.0375,
      "step": 46518
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7028467655181885,
      "learning_rate": 0.0005416466916245617,
      "loss": 2.8259,
      "step": 46519
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.4662787914276123,
      "learning_rate": 0.0005416442674909306,
      "loss": 3.1466,
      "step": 46520
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.903771996498108,
      "learning_rate": 0.0005416418433123734,
      "loss": 3.1224,
      "step": 46521
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9073728322982788,
      "learning_rate": 0.0005416394190888903,
      "loss": 3.0122,
      "step": 46522
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5843479633331299,
      "learning_rate": 0.0005416369948204819,
      "loss": 3.0747,
      "step": 46523
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6204451322555542,
      "learning_rate": 0.0005416345705071486,
      "loss": 2.7366,
      "step": 46524
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.075610876083374,
      "learning_rate": 0.000541632146148891,
      "loss": 2.7479,
      "step": 46525
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.786562204360962,
      "learning_rate": 0.0005416297217457093,
      "loss": 2.9921,
      "step": 46526
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5638490915298462,
      "learning_rate": 0.0005416272972976041,
      "loss": 3.2791,
      "step": 46527
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.771443247795105,
      "learning_rate": 0.0005416248728045758,
      "loss": 3.0697,
      "step": 46528
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4894822835922241,
      "learning_rate": 0.0005416224482666248,
      "loss": 3.053,
      "step": 46529
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4064983129501343,
      "learning_rate": 0.0005416200236837518,
      "loss": 3.1196,
      "step": 46530
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.5297350883483887,
      "learning_rate": 0.0005416175990559571,
      "loss": 3.0086,
      "step": 46531
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.0418734550476074,
      "learning_rate": 0.000541615174383241,
      "loss": 3.0483,
      "step": 46532
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5473371744155884,
      "learning_rate": 0.000541612749665604,
      "loss": 2.8778,
      "step": 46533
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.006932258605957,
      "learning_rate": 0.0005416103249030468,
      "loss": 2.9227,
      "step": 46534
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.7384746074676514,
      "learning_rate": 0.0005416079000955696,
      "loss": 2.7516,
      "step": 46535
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9247386455535889,
      "learning_rate": 0.000541605475243173,
      "loss": 3.1819,
      "step": 46536
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.723662853240967,
      "learning_rate": 0.0005416030503458572,
      "loss": 3.2035,
      "step": 46537
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.1697938442230225,
      "learning_rate": 0.000541600625403623,
      "loss": 3.0653,
      "step": 46538
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.3705170154571533,
      "learning_rate": 0.0005415982004164708,
      "loss": 3.2008,
      "step": 46539
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2688157558441162,
      "learning_rate": 0.0005415957753844006,
      "loss": 3.3317,
      "step": 46540
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.972296118736267,
      "learning_rate": 0.0005415933503074134,
      "loss": 3.0733,
      "step": 46541
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.5541555881500244,
      "learning_rate": 0.0005415909251855095,
      "loss": 3.2236,
      "step": 46542
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6290751695632935,
      "learning_rate": 0.0005415885000186892,
      "loss": 3.1362,
      "step": 46543
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8392112255096436,
      "learning_rate": 0.0005415860748069529,
      "loss": 3.0037,
      "step": 46544
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3864312171936035,
      "learning_rate": 0.0005415836495503013,
      "loss": 3.0449,
      "step": 46545
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4839012622833252,
      "learning_rate": 0.0005415812242487348,
      "loss": 3.3412,
      "step": 46546
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7141200304031372,
      "learning_rate": 0.0005415787989022537,
      "loss": 3.1142,
      "step": 46547
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9473378658294678,
      "learning_rate": 0.0005415763735108586,
      "loss": 3.0982,
      "step": 46548
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9057849645614624,
      "learning_rate": 0.0005415739480745499,
      "loss": 2.9375,
      "step": 46549
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9590084552764893,
      "learning_rate": 0.000541571522593328,
      "loss": 3.0784,
      "step": 46550
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6277220249176025,
      "learning_rate": 0.0005415690970671934,
      "loss": 3.2599,
      "step": 46551
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5001901388168335,
      "learning_rate": 0.0005415666714961466,
      "loss": 3.0205,
      "step": 46552
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6681771278381348,
      "learning_rate": 0.000541564245880188,
      "loss": 2.7575,
      "step": 46553
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9081145524978638,
      "learning_rate": 0.0005415618202193179,
      "loss": 2.8845,
      "step": 46554
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.541924238204956,
      "learning_rate": 0.000541559394513537,
      "loss": 2.8834,
      "step": 46555
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5757830142974854,
      "learning_rate": 0.0005415569687628457,
      "loss": 3.0807,
      "step": 46556
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4050246477127075,
      "learning_rate": 0.0005415545429672443,
      "loss": 2.999,
      "step": 46557
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3840930461883545,
      "learning_rate": 0.0005415521171267334,
      "loss": 3.1455,
      "step": 46558
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5162756443023682,
      "learning_rate": 0.0005415496912413134,
      "loss": 2.8548,
      "step": 46559
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4137508869171143,
      "learning_rate": 0.0005415472653109847,
      "loss": 2.8969,
      "step": 46560
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7175796031951904,
      "learning_rate": 0.0005415448393357479,
      "loss": 3.1331,
      "step": 46561
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4759286642074585,
      "learning_rate": 0.0005415424133156033,
      "loss": 3.0518,
      "step": 46562
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5944445133209229,
      "learning_rate": 0.0005415399872505514,
      "loss": 2.8211,
      "step": 46563
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.2009143829345703,
      "learning_rate": 0.0005415375611405927,
      "loss": 2.9592,
      "step": 46564
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.091477155685425,
      "learning_rate": 0.0005415351349857277,
      "loss": 3.2758,
      "step": 46565
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3811811208724976,
      "learning_rate": 0.0005415327087859565,
      "loss": 2.7816,
      "step": 46566
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8620712757110596,
      "learning_rate": 0.00054153028254128,
      "loss": 2.966,
      "step": 46567
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.520875930786133,
      "learning_rate": 0.0005415278562516984,
      "loss": 3.1565,
      "step": 46568
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.036885976791382,
      "learning_rate": 0.0005415254299172122,
      "loss": 3.0364,
      "step": 46569
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2532964944839478,
      "learning_rate": 0.000541523003537822,
      "loss": 2.9615,
      "step": 46570
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.008528470993042,
      "learning_rate": 0.000541520577113528,
      "loss": 2.9744,
      "step": 46571
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6981687545776367,
      "learning_rate": 0.0005415181506443306,
      "loss": 2.8313,
      "step": 46572
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3847051858901978,
      "learning_rate": 0.0005415157241302307,
      "loss": 3.2316,
      "step": 46573
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.450526237487793,
      "learning_rate": 0.0005415132975712283,
      "loss": 2.9546,
      "step": 46574
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.159139394760132,
      "learning_rate": 0.000541510870967324,
      "loss": 3.0936,
      "step": 46575
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.45461905002594,
      "learning_rate": 0.0005415084443185184,
      "loss": 3.1906,
      "step": 46576
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3415415287017822,
      "learning_rate": 0.0005415060176248117,
      "loss": 3.1744,
      "step": 46577
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2843966484069824,
      "learning_rate": 0.0005415035908862046,
      "loss": 3.1698,
      "step": 46578
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.437081813812256,
      "learning_rate": 0.0005415011641026973,
      "loss": 3.34,
      "step": 46579
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.641684889793396,
      "learning_rate": 0.0005414987372742905,
      "loss": 3.0333,
      "step": 46580
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3932559490203857,
      "learning_rate": 0.0005414963104009845,
      "loss": 2.9775,
      "step": 46581
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7679126262664795,
      "learning_rate": 0.0005414938834827797,
      "loss": 3.2055,
      "step": 46582
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4427944421768188,
      "learning_rate": 0.0005414914565196766,
      "loss": 2.836,
      "step": 46583
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5861046314239502,
      "learning_rate": 0.0005414890295116758,
      "loss": 3.1209,
      "step": 46584
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7162185907363892,
      "learning_rate": 0.0005414866024587776,
      "loss": 3.2214,
      "step": 46585
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9311230182647705,
      "learning_rate": 0.0005414841753609824,
      "loss": 2.9961,
      "step": 46586
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.405562162399292,
      "learning_rate": 0.0005414817482182908,
      "loss": 3.0001,
      "step": 46587
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.446760416030884,
      "learning_rate": 0.0005414793210307032,
      "loss": 3.3118,
      "step": 46588
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7108227014541626,
      "learning_rate": 0.0005414768937982199,
      "loss": 3.097,
      "step": 46589
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.288907051086426,
      "learning_rate": 0.0005414744665208416,
      "loss": 3.0248,
      "step": 46590
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.100250244140625,
      "learning_rate": 0.0005414720391985687,
      "loss": 3.0661,
      "step": 46591
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7300357818603516,
      "learning_rate": 0.0005414696118314016,
      "loss": 3.0182,
      "step": 46592
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7063089609146118,
      "learning_rate": 0.0005414671844193406,
      "loss": 2.9741,
      "step": 46593
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.250274419784546,
      "learning_rate": 0.0005414647569623864,
      "loss": 2.9719,
      "step": 46594
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7283257246017456,
      "learning_rate": 0.0005414623294605393,
      "loss": 3.1556,
      "step": 46595
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7060431241989136,
      "learning_rate": 0.0005414599019137998,
      "loss": 3.0094,
      "step": 46596
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.757855176925659,
      "learning_rate": 0.0005414574743221683,
      "loss": 3.0368,
      "step": 46597
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.5634067058563232,
      "learning_rate": 0.0005414550466856453,
      "loss": 2.975,
      "step": 46598
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.526444435119629,
      "learning_rate": 0.0005414526190042313,
      "loss": 3.0505,
      "step": 46599
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.8181958198547363,
      "learning_rate": 0.0005414501912779267,
      "loss": 3.1006,
      "step": 46600
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.1476197242736816,
      "learning_rate": 0.000541447763506732,
      "loss": 3.2375,
      "step": 46601
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8564603328704834,
      "learning_rate": 0.0005414453356906475,
      "loss": 2.9682,
      "step": 46602
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4575127363204956,
      "learning_rate": 0.0005414429078296739,
      "loss": 2.9573,
      "step": 46603
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.9631457328796387,
      "learning_rate": 0.0005414404799238113,
      "loss": 3.0705,
      "step": 46604
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.718626022338867,
      "learning_rate": 0.0005414380519730605,
      "loss": 2.5887,
      "step": 46605
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.339381456375122,
      "learning_rate": 0.0005414356239774218,
      "loss": 2.9614,
      "step": 46606
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8109766244888306,
      "learning_rate": 0.0005414331959368956,
      "loss": 2.8572,
      "step": 46607
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.022334337234497,
      "learning_rate": 0.0005414307678514825,
      "loss": 2.7824,
      "step": 46608
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.010784864425659,
      "learning_rate": 0.0005414283397211828,
      "loss": 2.9333,
      "step": 46609
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2846615314483643,
      "learning_rate": 0.000541425911545997,
      "loss": 3.0945,
      "step": 46610
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5747864246368408,
      "learning_rate": 0.0005414234833259256,
      "loss": 3.1094,
      "step": 46611
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.0399603843688965,
      "learning_rate": 0.000541421055060969,
      "loss": 2.8712,
      "step": 46612
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.456443190574646,
      "learning_rate": 0.0005414186267511277,
      "loss": 3.1528,
      "step": 46613
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.541359782218933,
      "learning_rate": 0.000541416198396402,
      "loss": 3.0905,
      "step": 46614
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4400380849838257,
      "learning_rate": 0.0005414137699967926,
      "loss": 2.73,
      "step": 46615
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.0928423404693604,
      "learning_rate": 0.0005414113415522997,
      "loss": 2.9203,
      "step": 46616
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6795066595077515,
      "learning_rate": 0.000541408913062924,
      "loss": 3.0333,
      "step": 46617
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.722042441368103,
      "learning_rate": 0.0005414064845286658,
      "loss": 2.8178,
      "step": 46618
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3920613527297974,
      "learning_rate": 0.0005414040559495256,
      "loss": 3.08,
      "step": 46619
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6418715715408325,
      "learning_rate": 0.0005414016273255038,
      "loss": 3.197,
      "step": 46620
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.285778522491455,
      "learning_rate": 0.000541399198656601,
      "loss": 3.0992,
      "step": 46621
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5722357034683228,
      "learning_rate": 0.0005413967699428173,
      "loss": 2.8618,
      "step": 46622
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.59783935546875,
      "learning_rate": 0.0005413943411841535,
      "loss": 3.1544,
      "step": 46623
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3859449625015259,
      "learning_rate": 0.0005413919123806099,
      "loss": 2.9454,
      "step": 46624
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6702018976211548,
      "learning_rate": 0.000541389483532187,
      "loss": 3.1404,
      "step": 46625
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.413955569267273,
      "learning_rate": 0.0005413870546388852,
      "loss": 3.0347,
      "step": 46626
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7050199508666992,
      "learning_rate": 0.0005413846257007051,
      "loss": 3.1327,
      "step": 46627
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4523504972457886,
      "learning_rate": 0.000541382196717647,
      "loss": 2.9873,
      "step": 46628
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6462781429290771,
      "learning_rate": 0.0005413797676897114,
      "loss": 3.2018,
      "step": 46629
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4256671667099,
      "learning_rate": 0.0005413773386168986,
      "loss": 3.2975,
      "step": 46630
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.684266448020935,
      "learning_rate": 0.0005413749094992094,
      "loss": 3.0935,
      "step": 46631
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8724863529205322,
      "learning_rate": 0.000541372480336644,
      "loss": 3.2794,
      "step": 46632
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.657358169555664,
      "learning_rate": 0.0005413700511292029,
      "loss": 2.8054,
      "step": 46633
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.682826280593872,
      "learning_rate": 0.0005413676218768864,
      "loss": 3.0833,
      "step": 46634
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.0602152347564697,
      "learning_rate": 0.0005413651925796953,
      "loss": 3.1257,
      "step": 46635
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9533767700195312,
      "learning_rate": 0.0005413627632376298,
      "loss": 3.0712,
      "step": 46636
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6336570978164673,
      "learning_rate": 0.0005413603338506903,
      "loss": 2.9757,
      "step": 46637
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.556817650794983,
      "learning_rate": 0.0005413579044188775,
      "loss": 3.188,
      "step": 46638
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.647699236869812,
      "learning_rate": 0.0005413554749421917,
      "loss": 2.9964,
      "step": 46639
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8277642726898193,
      "learning_rate": 0.0005413530454206332,
      "loss": 3.21,
      "step": 46640
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6691222190856934,
      "learning_rate": 0.0005413506158542028,
      "loss": 3.0778,
      "step": 46641
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6973103284835815,
      "learning_rate": 0.0005413481862429006,
      "loss": 3.2361,
      "step": 46642
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.88645339012146,
      "learning_rate": 0.0005413457565867273,
      "loss": 2.7535,
      "step": 46643
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.7656633853912354,
      "learning_rate": 0.0005413433268856834,
      "loss": 2.9694,
      "step": 46644
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9574975967407227,
      "learning_rate": 0.000541340897139769,
      "loss": 3.1172,
      "step": 46645
    },
    {
      "epoch": 0.61,
      "grad_norm": 4.669624328613281,
      "learning_rate": 0.0005413384673489849,
      "loss": 2.9303,
      "step": 46646
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.593413829803467,
      "learning_rate": 0.0005413360375133313,
      "loss": 3.0075,
      "step": 46647
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3729971647262573,
      "learning_rate": 0.0005413336076328089,
      "loss": 3.0229,
      "step": 46648
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8454132080078125,
      "learning_rate": 0.000541331177707418,
      "loss": 2.7412,
      "step": 46649
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4163001775741577,
      "learning_rate": 0.0005413287477371591,
      "loss": 3.2609,
      "step": 46650
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7598265409469604,
      "learning_rate": 0.0005413263177220326,
      "loss": 2.9259,
      "step": 46651
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6550984382629395,
      "learning_rate": 0.000541323887662039,
      "loss": 3.0841,
      "step": 46652
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8850973844528198,
      "learning_rate": 0.0005413214575571787,
      "loss": 3.02,
      "step": 46653
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7999595403671265,
      "learning_rate": 0.0005413190274074522,
      "loss": 2.7278,
      "step": 46654
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7140024900436401,
      "learning_rate": 0.0005413165972128599,
      "loss": 3.2088,
      "step": 46655
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4465413093566895,
      "learning_rate": 0.0005413141669734023,
      "loss": 3.1218,
      "step": 46656
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.082580327987671,
      "learning_rate": 0.0005413117366890799,
      "loss": 2.9586,
      "step": 46657
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.9758734703063965,
      "learning_rate": 0.0005413093063598931,
      "loss": 3.0237,
      "step": 46658
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3642361164093018,
      "learning_rate": 0.0005413068759858423,
      "loss": 3.0685,
      "step": 46659
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.047947645187378,
      "learning_rate": 0.0005413044455669279,
      "loss": 3.0781,
      "step": 46660
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3090373277664185,
      "learning_rate": 0.0005413020151031506,
      "loss": 3.05,
      "step": 46661
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4483075141906738,
      "learning_rate": 0.0005412995845945107,
      "loss": 2.7491,
      "step": 46662
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6505142450332642,
      "learning_rate": 0.0005412971540410086,
      "loss": 2.6942,
      "step": 46663
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6294282674789429,
      "learning_rate": 0.0005412947234426448,
      "loss": 2.901,
      "step": 46664
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6293296813964844,
      "learning_rate": 0.0005412922927994198,
      "loss": 2.9097,
      "step": 46665
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7996004819869995,
      "learning_rate": 0.0005412898621113338,
      "loss": 3.0652,
      "step": 46666
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.983256459236145,
      "learning_rate": 0.0005412874313783878,
      "loss": 2.9454,
      "step": 46667
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7728147506713867,
      "learning_rate": 0.0005412850006005817,
      "loss": 3.0848,
      "step": 46668
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.324646234512329,
      "learning_rate": 0.0005412825697779162,
      "loss": 3.0579,
      "step": 46669
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5137840509414673,
      "learning_rate": 0.0005412801389103918,
      "loss": 3.0507,
      "step": 46670
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.3498551845550537,
      "learning_rate": 0.0005412777079980088,
      "loss": 3.1023,
      "step": 46671
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8835690021514893,
      "learning_rate": 0.0005412752770407676,
      "loss": 3.0492,
      "step": 46672
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6636651754379272,
      "learning_rate": 0.0005412728460386689,
      "loss": 2.9486,
      "step": 46673
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5414071083068848,
      "learning_rate": 0.000541270414991713,
      "loss": 3.0537,
      "step": 46674
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5144916772842407,
      "learning_rate": 0.0005412679838999005,
      "loss": 2.9414,
      "step": 46675
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3043005466461182,
      "learning_rate": 0.0005412655527632315,
      "loss": 2.9994,
      "step": 46676
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7576789855957031,
      "learning_rate": 0.0005412631215817068,
      "loss": 3.0809,
      "step": 46677
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5088049173355103,
      "learning_rate": 0.0005412606903553266,
      "loss": 3.2449,
      "step": 46678
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7320622205734253,
      "learning_rate": 0.0005412582590840917,
      "loss": 3.2309,
      "step": 46679
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.5226566791534424,
      "learning_rate": 0.0005412558277680023,
      "loss": 2.8184,
      "step": 46680
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5648390054702759,
      "learning_rate": 0.0005412533964070588,
      "loss": 2.9087,
      "step": 46681
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6511201858520508,
      "learning_rate": 0.0005412509650012618,
      "loss": 3.135,
      "step": 46682
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7185522317886353,
      "learning_rate": 0.0005412485335506116,
      "loss": 3.2423,
      "step": 46683
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4893919229507446,
      "learning_rate": 0.0005412461020551088,
      "loss": 3.1928,
      "step": 46684
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4378318786621094,
      "learning_rate": 0.0005412436705147538,
      "loss": 3.124,
      "step": 46685
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5127663612365723,
      "learning_rate": 0.0005412412389295471,
      "loss": 3.2261,
      "step": 46686
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4618531465530396,
      "learning_rate": 0.000541238807299489,
      "loss": 2.6987,
      "step": 46687
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.1129119396209717,
      "learning_rate": 0.0005412363756245801,
      "loss": 2.818,
      "step": 46688
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.665016770362854,
      "learning_rate": 0.0005412339439048209,
      "loss": 3.273,
      "step": 46689
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.794740915298462,
      "learning_rate": 0.0005412315121402115,
      "loss": 2.9912,
      "step": 46690
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.900955080986023,
      "learning_rate": 0.0005412290803307529,
      "loss": 2.9856,
      "step": 46691
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2916333675384521,
      "learning_rate": 0.000541226648476445,
      "loss": 3.0512,
      "step": 46692
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5724538564682007,
      "learning_rate": 0.0005412242165772887,
      "loss": 3.0507,
      "step": 46693
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3817138671875,
      "learning_rate": 0.0005412217846332842,
      "loss": 2.9818,
      "step": 46694
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6890621185302734,
      "learning_rate": 0.000541219352644432,
      "loss": 2.9545,
      "step": 46695
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.013664484024048,
      "learning_rate": 0.0005412169206107326,
      "loss": 2.8463,
      "step": 46696
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.372013807296753,
      "learning_rate": 0.0005412144885321864,
      "loss": 3.1402,
      "step": 46697
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6950660943984985,
      "learning_rate": 0.0005412120564087939,
      "loss": 3.1291,
      "step": 46698
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8017606735229492,
      "learning_rate": 0.0005412096242405555,
      "loss": 2.9844,
      "step": 46699
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.662903904914856,
      "learning_rate": 0.0005412071920274718,
      "loss": 3.1942,
      "step": 46700
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.725450038909912,
      "learning_rate": 0.000541204759769543,
      "loss": 2.9369,
      "step": 46701
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.690915822982788,
      "learning_rate": 0.0005412023274667696,
      "loss": 2.9549,
      "step": 46702
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.0349740982055664,
      "learning_rate": 0.0005411998951191522,
      "loss": 2.8565,
      "step": 46703
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.3348708152770996,
      "learning_rate": 0.0005411974627266913,
      "loss": 2.9474,
      "step": 46704
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6252591609954834,
      "learning_rate": 0.0005411950302893872,
      "loss": 3.2345,
      "step": 46705
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4921677112579346,
      "learning_rate": 0.0005411925978072403,
      "loss": 2.8298,
      "step": 46706
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4826014041900635,
      "learning_rate": 0.0005411901652802512,
      "loss": 3.2839,
      "step": 46707
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5281521081924438,
      "learning_rate": 0.0005411877327084202,
      "loss": 2.9246,
      "step": 46708
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6637874841690063,
      "learning_rate": 0.000541185300091748,
      "loss": 3.122,
      "step": 46709
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7205778360366821,
      "learning_rate": 0.0005411828674302347,
      "loss": 3.0754,
      "step": 46710
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.762465238571167,
      "learning_rate": 0.0005411804347238812,
      "loss": 3.086,
      "step": 46711
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7791115045547485,
      "learning_rate": 0.0005411780019726877,
      "loss": 3.0017,
      "step": 46712
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8596594333648682,
      "learning_rate": 0.0005411755691766545,
      "loss": 3.0463,
      "step": 46713
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.482899785041809,
      "learning_rate": 0.0005411731363357822,
      "loss": 3.1057,
      "step": 46714
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4198250770568848,
      "learning_rate": 0.0005411707034500714,
      "loss": 2.8495,
      "step": 46715
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5830378532409668,
      "learning_rate": 0.0005411682705195222,
      "loss": 2.9164,
      "step": 46716
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4080005884170532,
      "learning_rate": 0.0005411658375441354,
      "loss": 3.0286,
      "step": 46717
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2930432558059692,
      "learning_rate": 0.0005411634045239113,
      "loss": 3.0284,
      "step": 46718
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3713217973709106,
      "learning_rate": 0.0005411609714588505,
      "loss": 3.2908,
      "step": 46719
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.1102702617645264,
      "learning_rate": 0.0005411585383489532,
      "loss": 2.9405,
      "step": 46720
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7888137102127075,
      "learning_rate": 0.00054115610519422,
      "loss": 3.0284,
      "step": 46721
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.127819299697876,
      "learning_rate": 0.0005411536719946513,
      "loss": 3.0627,
      "step": 46722
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.594175100326538,
      "learning_rate": 0.0005411512387502476,
      "loss": 3.0395,
      "step": 46723
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7749866247177124,
      "learning_rate": 0.0005411488054610093,
      "loss": 3.0207,
      "step": 46724
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4836125373840332,
      "learning_rate": 0.000541146372126937,
      "loss": 2.9914,
      "step": 46725
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7967398166656494,
      "learning_rate": 0.0005411439387480308,
      "loss": 3.0137,
      "step": 46726
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3284306526184082,
      "learning_rate": 0.0005411415053242916,
      "loss": 3.1783,
      "step": 46727
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3805819749832153,
      "learning_rate": 0.0005411390718557196,
      "loss": 3.2092,
      "step": 46728
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6687711477279663,
      "learning_rate": 0.0005411366383423153,
      "loss": 3.1168,
      "step": 46729
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5198901891708374,
      "learning_rate": 0.000541134204784079,
      "loss": 2.9597,
      "step": 46730
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7120989561080933,
      "learning_rate": 0.0005411317711810114,
      "loss": 2.8263,
      "step": 46731
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6276881694793701,
      "learning_rate": 0.000541129337533113,
      "loss": 2.9305,
      "step": 46732
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.557812452316284,
      "learning_rate": 0.000541126903840384,
      "loss": 3.2274,
      "step": 46733
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6262084245681763,
      "learning_rate": 0.0005411244701028249,
      "loss": 3.1,
      "step": 46734
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4543484449386597,
      "learning_rate": 0.0005411220363204362,
      "loss": 3.1425,
      "step": 46735
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5167948007583618,
      "learning_rate": 0.0005411196024932185,
      "loss": 3.3155,
      "step": 46736
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5571262836456299,
      "learning_rate": 0.000541117168621172,
      "loss": 3.1429,
      "step": 46737
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4178060293197632,
      "learning_rate": 0.0005411147347042973,
      "loss": 2.9177,
      "step": 46738
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.945499300956726,
      "learning_rate": 0.0005411123007425948,
      "loss": 3.0718,
      "step": 46739
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4066976308822632,
      "learning_rate": 0.000541109866736065,
      "loss": 3.0601,
      "step": 46740
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5018260478973389,
      "learning_rate": 0.0005411074326847082,
      "loss": 3.0027,
      "step": 46741
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8129805326461792,
      "learning_rate": 0.0005411049985885252,
      "loss": 2.967,
      "step": 46742
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.0114026069641113,
      "learning_rate": 0.0005411025644475161,
      "loss": 3.0035,
      "step": 46743
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.879470944404602,
      "learning_rate": 0.0005411001302616815,
      "loss": 3.1101,
      "step": 46744
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.338232159614563,
      "learning_rate": 0.0005410976960310218,
      "loss": 2.901,
      "step": 46745
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.1957812309265137,
      "learning_rate": 0.0005410952617555375,
      "loss": 3.1135,
      "step": 46746
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6092426776885986,
      "learning_rate": 0.000541092827435229,
      "loss": 3.0361,
      "step": 46747
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5476222038269043,
      "learning_rate": 0.0005410903930700969,
      "loss": 3.1294,
      "step": 46748
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7081855535507202,
      "learning_rate": 0.0005410879586601415,
      "loss": 2.8908,
      "step": 46749
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4534701108932495,
      "learning_rate": 0.0005410855242053633,
      "loss": 3.0841,
      "step": 46750
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.40719735622406,
      "learning_rate": 0.0005410830897057627,
      "loss": 2.9047,
      "step": 46751
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.658181071281433,
      "learning_rate": 0.0005410806551613403,
      "loss": 2.7879,
      "step": 46752
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.304750919342041,
      "learning_rate": 0.0005410782205720964,
      "loss": 3.1035,
      "step": 46753
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5721219778060913,
      "learning_rate": 0.0005410757859380315,
      "loss": 3.2428,
      "step": 46754
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.442007541656494,
      "learning_rate": 0.000541073351259146,
      "loss": 3.2627,
      "step": 46755
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.9636449813842773,
      "learning_rate": 0.0005410709165354405,
      "loss": 3.0295,
      "step": 46756
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.521942377090454,
      "learning_rate": 0.0005410684817669154,
      "loss": 2.9192,
      "step": 46757
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8014278411865234,
      "learning_rate": 0.000541066046953571,
      "loss": 3.159,
      "step": 46758
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4978041648864746,
      "learning_rate": 0.0005410636120954079,
      "loss": 2.9146,
      "step": 46759
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.095526695251465,
      "learning_rate": 0.0005410611771924267,
      "loss": 3.0046,
      "step": 46760
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6512706279754639,
      "learning_rate": 0.0005410587422446275,
      "loss": 2.9796,
      "step": 46761
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5596237182617188,
      "learning_rate": 0.0005410563072520109,
      "loss": 3.018,
      "step": 46762
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6411595344543457,
      "learning_rate": 0.0005410538722145775,
      "loss": 3.0111,
      "step": 46763
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6744399070739746,
      "learning_rate": 0.0005410514371323276,
      "loss": 3.1509,
      "step": 46764
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5446923971176147,
      "learning_rate": 0.0005410490020052617,
      "loss": 2.9611,
      "step": 46765
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.608636736869812,
      "learning_rate": 0.0005410465668333801,
      "loss": 2.9089,
      "step": 46766
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4968112707138062,
      "learning_rate": 0.0005410441316166837,
      "loss": 3.0181,
      "step": 46767
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9649009704589844,
      "learning_rate": 0.0005410416963551723,
      "loss": 3.0105,
      "step": 46768
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8576887845993042,
      "learning_rate": 0.0005410392610488468,
      "loss": 2.9793,
      "step": 46769
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4841482639312744,
      "learning_rate": 0.0005410368256977077,
      "loss": 2.7796,
      "step": 46770
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.249885082244873,
      "learning_rate": 0.0005410343903017552,
      "loss": 2.712,
      "step": 46771
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7539643049240112,
      "learning_rate": 0.0005410319548609898,
      "loss": 3.1872,
      "step": 46772
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9383693933486938,
      "learning_rate": 0.0005410295193754121,
      "loss": 2.9593,
      "step": 46773
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.62870192527771,
      "learning_rate": 0.0005410270838450225,
      "loss": 3.2156,
      "step": 46774
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.441267728805542,
      "learning_rate": 0.0005410246482698214,
      "loss": 3.0012,
      "step": 46775
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.202592611312866,
      "learning_rate": 0.0005410222126498092,
      "loss": 3.0894,
      "step": 46776
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4839062690734863,
      "learning_rate": 0.0005410197769849864,
      "loss": 2.9827,
      "step": 46777
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.2945690155029297,
      "learning_rate": 0.0005410173412753534,
      "loss": 3.1317,
      "step": 46778
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.494487762451172,
      "learning_rate": 0.0005410149055209109,
      "loss": 3.1331,
      "step": 46779
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5007445812225342,
      "learning_rate": 0.0005410124697216591,
      "loss": 3.2267,
      "step": 46780
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5110633373260498,
      "learning_rate": 0.0005410100338775986,
      "loss": 2.9872,
      "step": 46781
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.957669734954834,
      "learning_rate": 0.0005410075979887297,
      "loss": 2.6981,
      "step": 46782
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.1173837184906006,
      "learning_rate": 0.0005410051620550531,
      "loss": 2.7054,
      "step": 46783
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.599332332611084,
      "learning_rate": 0.0005410027260765689,
      "loss": 2.9926,
      "step": 46784
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.0856146812438965,
      "learning_rate": 0.0005410002900532779,
      "loss": 3.0573,
      "step": 46785
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8732539415359497,
      "learning_rate": 0.0005409978539851802,
      "loss": 3.0562,
      "step": 46786
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4383132457733154,
      "learning_rate": 0.0005409954178722766,
      "loss": 3.3159,
      "step": 46787
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7139222621917725,
      "learning_rate": 0.0005409929817145674,
      "loss": 2.9264,
      "step": 46788
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7105786800384521,
      "learning_rate": 0.0005409905455120529,
      "loss": 2.7958,
      "step": 46789
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.461486577987671,
      "learning_rate": 0.0005409881092647339,
      "loss": 3.0645,
      "step": 46790
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.1726744174957275,
      "learning_rate": 0.0005409856729726105,
      "loss": 2.8708,
      "step": 46791
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.445897340774536,
      "learning_rate": 0.0005409832366356835,
      "loss": 2.8758,
      "step": 46792
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8000271320343018,
      "learning_rate": 0.000540980800253953,
      "loss": 3.2126,
      "step": 46793
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.381894588470459,
      "learning_rate": 0.0005409783638274197,
      "loss": 2.8805,
      "step": 46794
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7738568782806396,
      "learning_rate": 0.0005409759273560839,
      "loss": 3.1592,
      "step": 46795
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7643665075302124,
      "learning_rate": 0.0005409734908399463,
      "loss": 3.1445,
      "step": 46796
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5655523538589478,
      "learning_rate": 0.000540971054279007,
      "loss": 3.1354,
      "step": 46797
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3706419467926025,
      "learning_rate": 0.0005409686176732668,
      "loss": 3.0102,
      "step": 46798
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.1236231327056885,
      "learning_rate": 0.0005409661810227258,
      "loss": 2.9952,
      "step": 46799
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9676830768585205,
      "learning_rate": 0.0005409637443273848,
      "loss": 2.9545,
      "step": 46800
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4160866737365723,
      "learning_rate": 0.000540961307587244,
      "loss": 3.0502,
      "step": 46801
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8851559162139893,
      "learning_rate": 0.0005409588708023039,
      "loss": 2.9236,
      "step": 46802
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8992680311203003,
      "learning_rate": 0.0005409564339725651,
      "loss": 3.159,
      "step": 46803
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7401989698410034,
      "learning_rate": 0.0005409539970980279,
      "loss": 2.7768,
      "step": 46804
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4853386878967285,
      "learning_rate": 0.0005409515601786927,
      "loss": 3.2955,
      "step": 46805
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5276788473129272,
      "learning_rate": 0.0005409491232145603,
      "loss": 3.0409,
      "step": 46806
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5906219482421875,
      "learning_rate": 0.0005409466862056308,
      "loss": 3.0294,
      "step": 46807
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8684465885162354,
      "learning_rate": 0.0005409442491519046,
      "loss": 3.1087,
      "step": 46808
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.831000566482544,
      "learning_rate": 0.0005409418120533825,
      "loss": 2.8873,
      "step": 46809
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3419069051742554,
      "learning_rate": 0.0005409393749100647,
      "loss": 2.9766,
      "step": 46810
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5016456842422485,
      "learning_rate": 0.0005409369377219518,
      "loss": 3.0178,
      "step": 46811
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2663575410842896,
      "learning_rate": 0.0005409345004890441,
      "loss": 3.0731,
      "step": 46812
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6487655639648438,
      "learning_rate": 0.0005409320632113421,
      "loss": 3.0947,
      "step": 46813
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6410449743270874,
      "learning_rate": 0.0005409296258888465,
      "loss": 3.0808,
      "step": 46814
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.5816216468811035,
      "learning_rate": 0.0005409271885215573,
      "loss": 3.0265,
      "step": 46815
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.6592154502868652,
      "learning_rate": 0.0005409247511094752,
      "loss": 3.3436,
      "step": 46816
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3754653930664062,
      "learning_rate": 0.0005409223136526007,
      "loss": 2.8408,
      "step": 46817
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8476440906524658,
      "learning_rate": 0.0005409198761509343,
      "loss": 3.1253,
      "step": 46818
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5392910242080688,
      "learning_rate": 0.0005409174386044762,
      "loss": 3.0239,
      "step": 46819
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.1271491050720215,
      "learning_rate": 0.000540915001013227,
      "loss": 3.4175,
      "step": 46820
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4750465154647827,
      "learning_rate": 0.0005409125633771872,
      "loss": 3.0005,
      "step": 46821
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.441899061203003,
      "learning_rate": 0.0005409101256963572,
      "loss": 3.0531,
      "step": 46822
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7327739000320435,
      "learning_rate": 0.0005409076879707374,
      "loss": 3.004,
      "step": 46823
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.897853970527649,
      "learning_rate": 0.0005409052502003284,
      "loss": 2.9268,
      "step": 46824
    },
    {
      "epoch": 0.61,
      "grad_norm": 4.589244365692139,
      "learning_rate": 0.0005409028123851305,
      "loss": 3.0362,
      "step": 46825
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9285718202590942,
      "learning_rate": 0.0005409003745251443,
      "loss": 2.9111,
      "step": 46826
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.0109639167785645,
      "learning_rate": 0.0005408979366203702,
      "loss": 2.9789,
      "step": 46827
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6704447269439697,
      "learning_rate": 0.0005408954986708085,
      "loss": 3.1367,
      "step": 46828
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8576663732528687,
      "learning_rate": 0.0005408930606764599,
      "loss": 3.1268,
      "step": 46829
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.0945181846618652,
      "learning_rate": 0.0005408906226373247,
      "loss": 2.9832,
      "step": 46830
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.864101529121399,
      "learning_rate": 0.0005408881845534033,
      "loss": 3.3378,
      "step": 46831
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7083492279052734,
      "learning_rate": 0.0005408857464246963,
      "loss": 3.1315,
      "step": 46832
    },
    {
      "epoch": 0.61,
      "grad_norm": 4.887156009674072,
      "learning_rate": 0.0005408833082512041,
      "loss": 3.0578,
      "step": 46833
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.7814524173736572,
      "learning_rate": 0.0005408808700329271,
      "loss": 2.824,
      "step": 46834
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7846782207489014,
      "learning_rate": 0.0005408784317698658,
      "loss": 2.9984,
      "step": 46835
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.76431405544281,
      "learning_rate": 0.0005408759934620207,
      "loss": 3.0674,
      "step": 46836
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9127297401428223,
      "learning_rate": 0.0005408735551093922,
      "loss": 2.9367,
      "step": 46837
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.4064290523529053,
      "learning_rate": 0.0005408711167119807,
      "loss": 3.0619,
      "step": 46838
    },
    {
      "epoch": 0.61,
      "grad_norm": 9.212973594665527,
      "learning_rate": 0.0005408686782697868,
      "loss": 2.9506,
      "step": 46839
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5678696632385254,
      "learning_rate": 0.0005408662397828109,
      "loss": 3.3429,
      "step": 46840
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8591933250427246,
      "learning_rate": 0.0005408638012510533,
      "loss": 2.9892,
      "step": 46841
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4397225379943848,
      "learning_rate": 0.0005408613626745147,
      "loss": 3.063,
      "step": 46842
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2179837226867676,
      "learning_rate": 0.0005408589240531953,
      "loss": 3.3486,
      "step": 46843
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.4910452365875244,
      "learning_rate": 0.0005408564853870957,
      "loss": 3.0053,
      "step": 46844
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9427158832550049,
      "learning_rate": 0.0005408540466762164,
      "loss": 3.2106,
      "step": 46845
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5121675729751587,
      "learning_rate": 0.0005408516079205577,
      "loss": 3.1541,
      "step": 46846
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.3987669944763184,
      "learning_rate": 0.0005408491691201202,
      "loss": 2.9663,
      "step": 46847
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.407195806503296,
      "learning_rate": 0.0005408467302749042,
      "loss": 3.1052,
      "step": 46848
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.346716284751892,
      "learning_rate": 0.0005408442913849104,
      "loss": 2.6258,
      "step": 46849
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7493832111358643,
      "learning_rate": 0.000540841852450139,
      "loss": 3.079,
      "step": 46850
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5888863801956177,
      "learning_rate": 0.0005408394134705907,
      "loss": 3.1651,
      "step": 46851
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6325522661209106,
      "learning_rate": 0.0005408369744462656,
      "loss": 3.0583,
      "step": 46852
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3919099569320679,
      "learning_rate": 0.0005408345353771644,
      "loss": 2.904,
      "step": 46853
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8146746158599854,
      "learning_rate": 0.0005408320962632876,
      "loss": 2.9412,
      "step": 46854
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.6598265171051025,
      "learning_rate": 0.0005408296571046355,
      "loss": 2.919,
      "step": 46855
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7361984252929688,
      "learning_rate": 0.0005408272179012087,
      "loss": 2.998,
      "step": 46856
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4654353857040405,
      "learning_rate": 0.0005408247786530075,
      "loss": 3.0403,
      "step": 46857
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.722460389137268,
      "learning_rate": 0.0005408223393600324,
      "loss": 2.9884,
      "step": 46858
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4153170585632324,
      "learning_rate": 0.000540819900022284,
      "loss": 3.0195,
      "step": 46859
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5742136240005493,
      "learning_rate": 0.0005408174606397625,
      "loss": 3.1307,
      "step": 46860
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8926944732666016,
      "learning_rate": 0.0005408150212124687,
      "loss": 3.1372,
      "step": 46861
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9045218229293823,
      "learning_rate": 0.0005408125817404026,
      "loss": 2.7327,
      "step": 46862
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4881020784378052,
      "learning_rate": 0.0005408101422235649,
      "loss": 2.953,
      "step": 46863
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3332966566085815,
      "learning_rate": 0.0005408077026619562,
      "loss": 2.8609,
      "step": 46864
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5790873765945435,
      "learning_rate": 0.0005408052630555768,
      "loss": 3.0044,
      "step": 46865
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6454827785491943,
      "learning_rate": 0.0005408028234044271,
      "loss": 3.1286,
      "step": 46866
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4162214994430542,
      "learning_rate": 0.0005408003837085076,
      "loss": 3.1132,
      "step": 46867
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.949203610420227,
      "learning_rate": 0.0005407979439678188,
      "loss": 3.2611,
      "step": 46868
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.1233980655670166,
      "learning_rate": 0.0005407955041823611,
      "loss": 3.0036,
      "step": 46869
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5559524297714233,
      "learning_rate": 0.0005407930643521349,
      "loss": 2.9523,
      "step": 46870
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5853911638259888,
      "learning_rate": 0.0005407906244771408,
      "loss": 2.962,
      "step": 46871
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.489862322807312,
      "learning_rate": 0.0005407881845573791,
      "loss": 3.0506,
      "step": 46872
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.079509973526001,
      "learning_rate": 0.0005407857445928504,
      "loss": 3.1338,
      "step": 46873
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3205910921096802,
      "learning_rate": 0.0005407833045835552,
      "loss": 3.096,
      "step": 46874
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2447315454483032,
      "learning_rate": 0.0005407808645294937,
      "loss": 3.0625,
      "step": 46875
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3579620122909546,
      "learning_rate": 0.0005407784244306664,
      "loss": 2.95,
      "step": 46876
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5734691619873047,
      "learning_rate": 0.000540775984287074,
      "loss": 2.9777,
      "step": 46877
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.36319100856781,
      "learning_rate": 0.0005407735440987167,
      "loss": 2.8839,
      "step": 46878
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3518847227096558,
      "learning_rate": 0.0005407711038655952,
      "loss": 2.8229,
      "step": 46879
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8031527996063232,
      "learning_rate": 0.0005407686635877097,
      "loss": 3.2119,
      "step": 46880
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5363463163375854,
      "learning_rate": 0.0005407662232650607,
      "loss": 3.1826,
      "step": 46881
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.747687578201294,
      "learning_rate": 0.0005407637828976488,
      "loss": 2.9675,
      "step": 46882
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9968912601470947,
      "learning_rate": 0.0005407613424854743,
      "loss": 3.0551,
      "step": 46883
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.1088578701019287,
      "learning_rate": 0.0005407589020285378,
      "loss": 3.1893,
      "step": 46884
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.6196796894073486,
      "learning_rate": 0.0005407564615268396,
      "loss": 3.0112,
      "step": 46885
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.3437297344207764,
      "learning_rate": 0.0005407540209803804,
      "loss": 2.9745,
      "step": 46886
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.5974905490875244,
      "learning_rate": 0.0005407515803891604,
      "loss": 2.9346,
      "step": 46887
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5129562616348267,
      "learning_rate": 0.00054074913975318,
      "loss": 3.0481,
      "step": 46888
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8418598175048828,
      "learning_rate": 0.00054074669907244,
      "loss": 3.1105,
      "step": 46889
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.567908763885498,
      "learning_rate": 0.0005407442583469405,
      "loss": 3.118,
      "step": 46890
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.0307412147521973,
      "learning_rate": 0.000540741817576682,
      "loss": 2.7749,
      "step": 46891
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.616986870765686,
      "learning_rate": 0.0005407393767616653,
      "loss": 2.8802,
      "step": 46892
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5436010360717773,
      "learning_rate": 0.0005407369359018905,
      "loss": 2.9312,
      "step": 46893
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.0141024589538574,
      "learning_rate": 0.0005407344949973581,
      "loss": 3.252,
      "step": 46894
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.4128005504608154,
      "learning_rate": 0.0005407320540480687,
      "loss": 3.142,
      "step": 46895
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.6999127864837646,
      "learning_rate": 0.0005407296130540226,
      "loss": 3.0228,
      "step": 46896
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5552810430526733,
      "learning_rate": 0.0005407271720152203,
      "loss": 3.2781,
      "step": 46897
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6828533411026,
      "learning_rate": 0.0005407247309316623,
      "loss": 3.158,
      "step": 46898
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.330990791320801,
      "learning_rate": 0.0005407222898033492,
      "loss": 3.2221,
      "step": 46899
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.2570559978485107,
      "learning_rate": 0.000540719848630281,
      "loss": 3.1023,
      "step": 46900
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7196921110153198,
      "learning_rate": 0.0005407174074124586,
      "loss": 3.3538,
      "step": 46901
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.5788414478302,
      "learning_rate": 0.0005407149661498823,
      "loss": 3.1459,
      "step": 46902
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.2580766677856445,
      "learning_rate": 0.0005407125248425525,
      "loss": 2.9513,
      "step": 46903
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.1215691566467285,
      "learning_rate": 0.0005407100834904698,
      "loss": 2.9435,
      "step": 46904
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.673860788345337,
      "learning_rate": 0.0005407076420936344,
      "loss": 2.9906,
      "step": 46905
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4653960466384888,
      "learning_rate": 0.000540705200652047,
      "loss": 3.2427,
      "step": 46906
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.5010478496551514,
      "learning_rate": 0.0005407027591657078,
      "loss": 3.1704,
      "step": 46907
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.1936097145080566,
      "learning_rate": 0.0005407003176346176,
      "loss": 2.9879,
      "step": 46908
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9384958744049072,
      "learning_rate": 0.0005406978760587767,
      "loss": 3.1814,
      "step": 46909
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.706804633140564,
      "learning_rate": 0.0005406954344381853,
      "loss": 3.0962,
      "step": 46910
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7431548833847046,
      "learning_rate": 0.0005406929927728442,
      "loss": 3.248,
      "step": 46911
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3491719961166382,
      "learning_rate": 0.0005406905510627538,
      "loss": 2.995,
      "step": 46912
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3399935960769653,
      "learning_rate": 0.0005406881093079144,
      "loss": 3.1975,
      "step": 46913
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3926608562469482,
      "learning_rate": 0.0005406856675083266,
      "loss": 2.9052,
      "step": 46914
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6035281419754028,
      "learning_rate": 0.0005406832256639907,
      "loss": 3.3002,
      "step": 46915
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.994099736213684,
      "learning_rate": 0.0005406807837749073,
      "loss": 2.9282,
      "step": 46916
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7760552167892456,
      "learning_rate": 0.0005406783418410768,
      "loss": 3.067,
      "step": 46917
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6165484189987183,
      "learning_rate": 0.0005406758998624996,
      "loss": 3.1882,
      "step": 46918
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7803391218185425,
      "learning_rate": 0.0005406734578391762,
      "loss": 3.1378,
      "step": 46919
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.438279390335083,
      "learning_rate": 0.0005406710157711071,
      "loss": 2.9289,
      "step": 46920
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6122870445251465,
      "learning_rate": 0.0005406685736582928,
      "loss": 3.0969,
      "step": 46921
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5564223527908325,
      "learning_rate": 0.0005406661315007336,
      "loss": 2.857,
      "step": 46922
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6095432043075562,
      "learning_rate": 0.00054066368929843,
      "loss": 3.2637,
      "step": 46923
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4400451183319092,
      "learning_rate": 0.0005406612470513825,
      "loss": 2.8774,
      "step": 46924
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8443591594696045,
      "learning_rate": 0.0005406588047595915,
      "loss": 3.0885,
      "step": 46925
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5242748260498047,
      "learning_rate": 0.0005406563624230575,
      "loss": 3.0801,
      "step": 46926
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6482720375061035,
      "learning_rate": 0.0005406539200417809,
      "loss": 3.005,
      "step": 46927
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5230647325515747,
      "learning_rate": 0.0005406514776157623,
      "loss": 2.9189,
      "step": 46928
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.0542068481445312,
      "learning_rate": 0.000540649035145002,
      "loss": 3.0538,
      "step": 46929
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9187722206115723,
      "learning_rate": 0.0005406465926295004,
      "loss": 2.9387,
      "step": 46930
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.2772698402404785,
      "learning_rate": 0.0005406441500692582,
      "loss": 2.9321,
      "step": 46931
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3703827857971191,
      "learning_rate": 0.0005406417074642757,
      "loss": 2.8972,
      "step": 46932
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.467231273651123,
      "learning_rate": 0.0005406392648145533,
      "loss": 3.1396,
      "step": 46933
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3852674961090088,
      "learning_rate": 0.0005406368221200916,
      "loss": 3.248,
      "step": 46934
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9728988409042358,
      "learning_rate": 0.0005406343793808909,
      "loss": 2.8477,
      "step": 46935
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6204020977020264,
      "learning_rate": 0.0005406319365969517,
      "loss": 3.1311,
      "step": 46936
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3742235898971558,
      "learning_rate": 0.0005406294937682745,
      "loss": 3.0578,
      "step": 46937
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.4385478496551514,
      "learning_rate": 0.0005406270508948596,
      "loss": 3.0248,
      "step": 46938
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4697908163070679,
      "learning_rate": 0.0005406246079767079,
      "loss": 3.1899,
      "step": 46939
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.811608076095581,
      "learning_rate": 0.0005406221650138194,
      "loss": 3.2076,
      "step": 46940
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.3487348556518555,
      "learning_rate": 0.0005406197220061946,
      "loss": 2.9001,
      "step": 46941
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8870717287063599,
      "learning_rate": 0.0005406172789538342,
      "loss": 2.8174,
      "step": 46942
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.096073627471924,
      "learning_rate": 0.0005406148358567384,
      "loss": 2.9218,
      "step": 46943
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.096975088119507,
      "learning_rate": 0.0005406123927149078,
      "loss": 2.944,
      "step": 46944
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.628444790840149,
      "learning_rate": 0.0005406099495283427,
      "loss": 3.0353,
      "step": 46945
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7845841646194458,
      "learning_rate": 0.0005406075062970439,
      "loss": 3.0368,
      "step": 46946
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.1729228496551514,
      "learning_rate": 0.0005406050630210115,
      "loss": 3.0267,
      "step": 46947
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7753190994262695,
      "learning_rate": 0.0005406026197002461,
      "loss": 3.0504,
      "step": 46948
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6175246238708496,
      "learning_rate": 0.0005406001763347481,
      "loss": 3.1651,
      "step": 46949
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3485302925109863,
      "learning_rate": 0.0005405977329245179,
      "loss": 3.0297,
      "step": 46950
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7719852924346924,
      "learning_rate": 0.0005405952894695562,
      "loss": 3.1133,
      "step": 46951
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.3918073177337646,
      "learning_rate": 0.0005405928459698633,
      "loss": 3.0071,
      "step": 46952
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4970498085021973,
      "learning_rate": 0.0005405904024254395,
      "loss": 2.969,
      "step": 46953
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6850069761276245,
      "learning_rate": 0.0005405879588362856,
      "loss": 3.2672,
      "step": 46954
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.1554818153381348,
      "learning_rate": 0.0005405855152024017,
      "loss": 3.0105,
      "step": 46955
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8383419513702393,
      "learning_rate": 0.0005405830715237885,
      "loss": 3.1141,
      "step": 46956
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.477929949760437,
      "learning_rate": 0.0005405806278004463,
      "loss": 3.0349,
      "step": 46957
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5401620864868164,
      "learning_rate": 0.0005405781840323756,
      "loss": 3.2759,
      "step": 46958
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.495187759399414,
      "learning_rate": 0.0005405757402195769,
      "loss": 3.207,
      "step": 46959
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5663145780563354,
      "learning_rate": 0.0005405732963620507,
      "loss": 3.1379,
      "step": 46960
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.0797574520111084,
      "learning_rate": 0.0005405708524597973,
      "loss": 2.8732,
      "step": 46961
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4628219604492188,
      "learning_rate": 0.0005405684085128172,
      "loss": 2.9096,
      "step": 46962
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8385223150253296,
      "learning_rate": 0.000540565964521111,
      "loss": 3.1163,
      "step": 46963
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8769631385803223,
      "learning_rate": 0.000540563520484679,
      "loss": 3.2774,
      "step": 46964
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6601260900497437,
      "learning_rate": 0.0005405610764035217,
      "loss": 3.1705,
      "step": 46965
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4127076864242554,
      "learning_rate": 0.0005405586322776395,
      "loss": 2.7933,
      "step": 46966
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.1374123096466064,
      "learning_rate": 0.000540556188107033,
      "loss": 2.8687,
      "step": 46967
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5632145404815674,
      "learning_rate": 0.0005405537438917025,
      "loss": 2.9071,
      "step": 46968
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8431710004806519,
      "learning_rate": 0.0005405512996316486,
      "loss": 3.1866,
      "step": 46969
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6851798295974731,
      "learning_rate": 0.0005405488553268716,
      "loss": 3.0643,
      "step": 46970
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4018983840942383,
      "learning_rate": 0.000540546410977372,
      "loss": 3.0324,
      "step": 46971
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5901156663894653,
      "learning_rate": 0.0005405439665831504,
      "loss": 3.1888,
      "step": 46972
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.587605595588684,
      "learning_rate": 0.0005405415221442071,
      "loss": 3.1104,
      "step": 46973
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7608351707458496,
      "learning_rate": 0.0005405390776605425,
      "loss": 3.1235,
      "step": 46974
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4489432573318481,
      "learning_rate": 0.0005405366331321572,
      "loss": 2.9756,
      "step": 46975
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6740549802780151,
      "learning_rate": 0.0005405341885590516,
      "loss": 3.0404,
      "step": 46976
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9886207580566406,
      "learning_rate": 0.0005405317439412261,
      "loss": 3.1588,
      "step": 46977
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.729803442955017,
      "learning_rate": 0.0005405292992786813,
      "loss": 3.0304,
      "step": 46978
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6758902072906494,
      "learning_rate": 0.0005405268545714175,
      "loss": 2.9715,
      "step": 46979
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8344975709915161,
      "learning_rate": 0.0005405244098194352,
      "loss": 3.167,
      "step": 46980
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.2003748416900635,
      "learning_rate": 0.000540521965022735,
      "loss": 3.0503,
      "step": 46981
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6691778898239136,
      "learning_rate": 0.0005405195201813171,
      "loss": 3.2136,
      "step": 46982
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8718546628952026,
      "learning_rate": 0.0005405170752951822,
      "loss": 2.9153,
      "step": 46983
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5275318622589111,
      "learning_rate": 0.0005405146303643306,
      "loss": 3.0636,
      "step": 46984
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6542407274246216,
      "learning_rate": 0.0005405121853887627,
      "loss": 3.1802,
      "step": 46985
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.2382965087890625,
      "learning_rate": 0.000540509740368479,
      "loss": 3.0725,
      "step": 46986
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6382087469100952,
      "learning_rate": 0.0005405072953034801,
      "loss": 3.1171,
      "step": 46987
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8842973709106445,
      "learning_rate": 0.0005405048501937663,
      "loss": 3.1577,
      "step": 46988
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8318159580230713,
      "learning_rate": 0.0005405024050393382,
      "loss": 3.0765,
      "step": 46989
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4619560241699219,
      "learning_rate": 0.0005404999598401961,
      "loss": 3.1872,
      "step": 46990
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5983058214187622,
      "learning_rate": 0.0005404975145963406,
      "loss": 3.2161,
      "step": 46991
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.2762980461120605,
      "learning_rate": 0.000540495069307772,
      "loss": 2.7594,
      "step": 46992
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8116686344146729,
      "learning_rate": 0.0005404926239744908,
      "loss": 2.9779,
      "step": 46993
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.416135311126709,
      "learning_rate": 0.0005404901785964975,
      "loss": 3.1975,
      "step": 46994
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.747288465499878,
      "learning_rate": 0.0005404877331737926,
      "loss": 3.3248,
      "step": 46995
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7599660158157349,
      "learning_rate": 0.0005404852877063764,
      "loss": 3.0467,
      "step": 46996
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.605386734008789,
      "learning_rate": 0.0005404828421942496,
      "loss": 3.0695,
      "step": 46997
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5777387619018555,
      "learning_rate": 0.0005404803966374123,
      "loss": 3.0411,
      "step": 46998
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.290419340133667,
      "learning_rate": 0.0005404779510358652,
      "loss": 2.9924,
      "step": 46999
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.6432507038116455,
      "learning_rate": 0.0005404755053896087,
      "loss": 3.0734,
      "step": 47000
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.652419090270996,
      "learning_rate": 0.0005404730596986435,
      "loss": 2.8979,
      "step": 47001
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.4481635093688965,
      "learning_rate": 0.0005404706139629696,
      "loss": 3.0502,
      "step": 47002
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.832263708114624,
      "learning_rate": 0.0005404681681825877,
      "loss": 3.052,
      "step": 47003
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.318429470062256,
      "learning_rate": 0.0005404657223574982,
      "loss": 3.0763,
      "step": 47004
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.682485818862915,
      "learning_rate": 0.0005404632764877016,
      "loss": 2.8822,
      "step": 47005
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9348881244659424,
      "learning_rate": 0.0005404608305731983,
      "loss": 3.0664,
      "step": 47006
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.4931230545043945,
      "learning_rate": 0.0005404583846139889,
      "loss": 3.0627,
      "step": 47007
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.826943039894104,
      "learning_rate": 0.0005404559386100738,
      "loss": 3.212,
      "step": 47008
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.5020885467529297,
      "learning_rate": 0.0005404534925614532,
      "loss": 2.8593,
      "step": 47009
    },
    {
      "epoch": 0.61,
      "grad_norm": 4.95867919921875,
      "learning_rate": 0.0005404510464681279,
      "loss": 2.774,
      "step": 47010
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.1345314979553223,
      "learning_rate": 0.0005404486003300981,
      "loss": 3.0012,
      "step": 47011
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4214198589324951,
      "learning_rate": 0.0005404461541473645,
      "loss": 3.0729,
      "step": 47012
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8847326040267944,
      "learning_rate": 0.0005404437079199274,
      "loss": 3.0887,
      "step": 47013
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.1051251888275146,
      "learning_rate": 0.0005404412616477871,
      "loss": 3.3239,
      "step": 47014
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.029052972793579,
      "learning_rate": 0.0005404388153309444,
      "loss": 2.9269,
      "step": 47015
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.0179338455200195,
      "learning_rate": 0.0005404363689693996,
      "loss": 2.9306,
      "step": 47016
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.299769401550293,
      "learning_rate": 0.000540433922563153,
      "loss": 3.0117,
      "step": 47017
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9438170194625854,
      "learning_rate": 0.0005404314761122053,
      "loss": 2.8769,
      "step": 47018
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.2884328365325928,
      "learning_rate": 0.0005404290296165569,
      "loss": 2.9974,
      "step": 47019
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7675251960754395,
      "learning_rate": 0.0005404265830762082,
      "loss": 2.9618,
      "step": 47020
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8761465549468994,
      "learning_rate": 0.0005404241364911595,
      "loss": 3.0305,
      "step": 47021
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5611841678619385,
      "learning_rate": 0.0005404216898614115,
      "loss": 2.7458,
      "step": 47022
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4597069025039673,
      "learning_rate": 0.0005404192431869646,
      "loss": 3.0815,
      "step": 47023
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.592487096786499,
      "learning_rate": 0.0005404167964678192,
      "loss": 3.1038,
      "step": 47024
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.7580373287200928,
      "learning_rate": 0.0005404143497039758,
      "loss": 2.894,
      "step": 47025
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.319011926651001,
      "learning_rate": 0.0005404119028954348,
      "loss": 3.3622,
      "step": 47026
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5072522163391113,
      "learning_rate": 0.0005404094560421967,
      "loss": 3.3104,
      "step": 47027
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6904696226119995,
      "learning_rate": 0.0005404070091442619,
      "loss": 3.1407,
      "step": 47028
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6692047119140625,
      "learning_rate": 0.000540404562201631,
      "loss": 3.0971,
      "step": 47029
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4231537580490112,
      "learning_rate": 0.0005404021152143043,
      "loss": 2.9851,
      "step": 47030
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.282799243927002,
      "learning_rate": 0.0005403996681822824,
      "loss": 2.8979,
      "step": 47031
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.802085280418396,
      "learning_rate": 0.0005403972211055656,
      "loss": 2.8678,
      "step": 47032
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2644922733306885,
      "learning_rate": 0.0005403947739841544,
      "loss": 3.0972,
      "step": 47033
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.054487705230713,
      "learning_rate": 0.0005403923268180492,
      "loss": 3.0729,
      "step": 47034
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3511713743209839,
      "learning_rate": 0.0005403898796072506,
      "loss": 2.876,
      "step": 47035
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.111222743988037,
      "learning_rate": 0.0005403874323517589,
      "loss": 2.9979,
      "step": 47036
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.182655096054077,
      "learning_rate": 0.0005403849850515748,
      "loss": 3.1003,
      "step": 47037
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5444492101669312,
      "learning_rate": 0.0005403825377066985,
      "loss": 2.9711,
      "step": 47038
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3099491596221924,
      "learning_rate": 0.0005403800903171305,
      "loss": 3.0529,
      "step": 47039
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4425923824310303,
      "learning_rate": 0.0005403776428828715,
      "loss": 3.0151,
      "step": 47040
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7173211574554443,
      "learning_rate": 0.0005403751954039215,
      "loss": 3.136,
      "step": 47041
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5383901596069336,
      "learning_rate": 0.0005403727478802814,
      "loss": 3.016,
      "step": 47042
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3525387048721313,
      "learning_rate": 0.0005403703003119514,
      "loss": 2.9141,
      "step": 47043
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4484329223632812,
      "learning_rate": 0.000540367852698932,
      "loss": 3.0548,
      "step": 47044
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.3980226516723633,
      "learning_rate": 0.0005403654050412238,
      "loss": 3.027,
      "step": 47045
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8246064186096191,
      "learning_rate": 0.000540362957338827,
      "loss": 3.0218,
      "step": 47046
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3601276874542236,
      "learning_rate": 0.0005403605095917422,
      "loss": 2.8838,
      "step": 47047
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9901398420333862,
      "learning_rate": 0.0005403580617999699,
      "loss": 3.1306,
      "step": 47048
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.413080096244812,
      "learning_rate": 0.0005403556139635105,
      "loss": 3.1613,
      "step": 47049
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.668228268623352,
      "learning_rate": 0.0005403531660823645,
      "loss": 3.1601,
      "step": 47050
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6763978004455566,
      "learning_rate": 0.0005403507181565322,
      "loss": 3.0235,
      "step": 47051
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8081656694412231,
      "learning_rate": 0.0005403482701860143,
      "loss": 3.1202,
      "step": 47052
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.7305660247802734,
      "learning_rate": 0.0005403458221708109,
      "loss": 3.129,
      "step": 47053
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5663200616836548,
      "learning_rate": 0.0005403433741109228,
      "loss": 2.9304,
      "step": 47054
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.2794411182403564,
      "learning_rate": 0.0005403409260063504,
      "loss": 2.9642,
      "step": 47055
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.00616192817688,
      "learning_rate": 0.000540338477857094,
      "loss": 3.0253,
      "step": 47056
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4252583980560303,
      "learning_rate": 0.0005403360296631542,
      "loss": 3.0595,
      "step": 47057
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6905068159103394,
      "learning_rate": 0.0005403335814245313,
      "loss": 2.959,
      "step": 47058
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.714247226715088,
      "learning_rate": 0.0005403311331412258,
      "loss": 2.9881,
      "step": 47059
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.1391732692718506,
      "learning_rate": 0.0005403286848132384,
      "loss": 3.1725,
      "step": 47060
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5375088453292847,
      "learning_rate": 0.0005403262364405693,
      "loss": 3.0265,
      "step": 47061
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8289811611175537,
      "learning_rate": 0.0005403237880232189,
      "loss": 3.0904,
      "step": 47062
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.1544504165649414,
      "learning_rate": 0.0005403213395611878,
      "loss": 3.0913,
      "step": 47063
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2440557479858398,
      "learning_rate": 0.0005403188910544765,
      "loss": 3.2336,
      "step": 47064
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4427649974822998,
      "learning_rate": 0.0005403164425030855,
      "loss": 3.0094,
      "step": 47065
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.6641995906829834,
      "learning_rate": 0.0005403139939070149,
      "loss": 2.7364,
      "step": 47066
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3708685636520386,
      "learning_rate": 0.0005403115452662655,
      "loss": 2.8166,
      "step": 47067
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5522516965866089,
      "learning_rate": 0.0005403090965808376,
      "loss": 2.9478,
      "step": 47068
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4531059265136719,
      "learning_rate": 0.0005403066478507319,
      "loss": 3.0642,
      "step": 47069
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.5395212173461914,
      "learning_rate": 0.0005403041990759485,
      "loss": 3.1692,
      "step": 47070
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8215399980545044,
      "learning_rate": 0.000540301750256488,
      "loss": 2.953,
      "step": 47071
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9415251016616821,
      "learning_rate": 0.0005402993013923508,
      "loss": 3.1014,
      "step": 47072
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5843504667282104,
      "learning_rate": 0.0005402968524835375,
      "loss": 3.157,
      "step": 47073
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5029568672180176,
      "learning_rate": 0.0005402944035300485,
      "loss": 2.917,
      "step": 47074
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4722638130187988,
      "learning_rate": 0.0005402919545318842,
      "loss": 2.7635,
      "step": 47075
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4569398164749146,
      "learning_rate": 0.0005402895054890453,
      "loss": 3.1175,
      "step": 47076
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5245814323425293,
      "learning_rate": 0.0005402870564015317,
      "loss": 3.0747,
      "step": 47077
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2738503217697144,
      "learning_rate": 0.0005402846072693443,
      "loss": 3.0497,
      "step": 47078
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5087943077087402,
      "learning_rate": 0.0005402821580924836,
      "loss": 3.1107,
      "step": 47079
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6790053844451904,
      "learning_rate": 0.0005402797088709498,
      "loss": 2.9579,
      "step": 47080
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8288321495056152,
      "learning_rate": 0.0005402772596047435,
      "loss": 3.0942,
      "step": 47081
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6179172992706299,
      "learning_rate": 0.0005402748102938652,
      "loss": 2.8508,
      "step": 47082
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6761770248413086,
      "learning_rate": 0.0005402723609383151,
      "loss": 3.0398,
      "step": 47083
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.605206847190857,
      "learning_rate": 0.0005402699115380939,
      "loss": 2.802,
      "step": 47084
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.08550763130188,
      "learning_rate": 0.000540267462093202,
      "loss": 3.1248,
      "step": 47085
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.855782389640808,
      "learning_rate": 0.0005402650126036399,
      "loss": 3.0623,
      "step": 47086
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2309892177581787,
      "learning_rate": 0.000540262563069408,
      "loss": 3.0368,
      "step": 47087
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6473448276519775,
      "learning_rate": 0.0005402601134905067,
      "loss": 3.0912,
      "step": 47088
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.838050127029419,
      "learning_rate": 0.0005402576638669365,
      "loss": 2.8601,
      "step": 47089
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.0345771312713623,
      "learning_rate": 0.0005402552141986979,
      "loss": 3.0181,
      "step": 47090
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6529200077056885,
      "learning_rate": 0.0005402527644857913,
      "loss": 3.1362,
      "step": 47091
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.533747911453247,
      "learning_rate": 0.0005402503147282172,
      "loss": 3.0286,
      "step": 47092
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.0032999515533447,
      "learning_rate": 0.000540247864925976,
      "loss": 2.951,
      "step": 47093
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7490085363388062,
      "learning_rate": 0.0005402454150790681,
      "loss": 3.2062,
      "step": 47094
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6237294673919678,
      "learning_rate": 0.0005402429651874942,
      "loss": 3.1581,
      "step": 47095
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8017055988311768,
      "learning_rate": 0.0005402405152512545,
      "loss": 3.1118,
      "step": 47096
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.221811056137085,
      "learning_rate": 0.0005402380652703496,
      "loss": 3.0241,
      "step": 47097
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8008928298950195,
      "learning_rate": 0.00054023561524478,
      "loss": 2.9343,
      "step": 47098
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.606724500656128,
      "learning_rate": 0.0005402331651745459,
      "loss": 3.0968,
      "step": 47099
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6905109882354736,
      "learning_rate": 0.0005402307150596479,
      "loss": 3.191,
      "step": 47100
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8615226745605469,
      "learning_rate": 0.0005402282649000866,
      "loss": 2.8378,
      "step": 47101
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7967135906219482,
      "learning_rate": 0.0005402258146958623,
      "loss": 3.0648,
      "step": 47102
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6121511459350586,
      "learning_rate": 0.0005402233644469755,
      "loss": 3.0459,
      "step": 47103
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9061681032180786,
      "learning_rate": 0.0005402209141534266,
      "loss": 3.1277,
      "step": 47104
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3949702978134155,
      "learning_rate": 0.0005402184638152161,
      "loss": 2.8833,
      "step": 47105
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9944840669631958,
      "learning_rate": 0.0005402160134323445,
      "loss": 3.089,
      "step": 47106
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.856624722480774,
      "learning_rate": 0.0005402135630048121,
      "loss": 3.0839,
      "step": 47107
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3673192262649536,
      "learning_rate": 0.0005402111125326196,
      "loss": 2.859,
      "step": 47108
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4753837585449219,
      "learning_rate": 0.0005402086620157672,
      "loss": 2.9405,
      "step": 47109
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6388806104660034,
      "learning_rate": 0.0005402062114542556,
      "loss": 3.0743,
      "step": 47110
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8301794528961182,
      "learning_rate": 0.0005402037608480851,
      "loss": 3.1167,
      "step": 47111
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7216306924819946,
      "learning_rate": 0.0005402013101972561,
      "loss": 3.1229,
      "step": 47112
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7387875318527222,
      "learning_rate": 0.0005401988595017692,
      "loss": 3.2961,
      "step": 47113
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8390666246414185,
      "learning_rate": 0.0005401964087616248,
      "loss": 3.0931,
      "step": 47114
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.824691653251648,
      "learning_rate": 0.0005401939579768233,
      "loss": 3.0378,
      "step": 47115
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.289064884185791,
      "learning_rate": 0.0005401915071473652,
      "loss": 3.1988,
      "step": 47116
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3829864263534546,
      "learning_rate": 0.0005401890562732511,
      "loss": 3.1653,
      "step": 47117
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8914775848388672,
      "learning_rate": 0.0005401866053544813,
      "loss": 2.8695,
      "step": 47118
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3708069324493408,
      "learning_rate": 0.0005401841543910562,
      "loss": 2.9265,
      "step": 47119
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7049583196640015,
      "learning_rate": 0.0005401817033829764,
      "loss": 3.1302,
      "step": 47120
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9846470355987549,
      "learning_rate": 0.0005401792523302422,
      "loss": 3.0024,
      "step": 47121
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8651299476623535,
      "learning_rate": 0.0005401768012328542,
      "loss": 2.8861,
      "step": 47122
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.1122875213623047,
      "learning_rate": 0.0005401743500908127,
      "loss": 3.0686,
      "step": 47123
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.597699522972107,
      "learning_rate": 0.0005401718989041183,
      "loss": 3.1641,
      "step": 47124
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.2249553203582764,
      "learning_rate": 0.0005401694476727715,
      "loss": 2.8429,
      "step": 47125
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5937379598617554,
      "learning_rate": 0.0005401669963967726,
      "loss": 2.9712,
      "step": 47126
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3871777057647705,
      "learning_rate": 0.0005401645450761221,
      "loss": 2.9426,
      "step": 47127
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4765958786010742,
      "learning_rate": 0.0005401620937108206,
      "loss": 3.2884,
      "step": 47128
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7434478998184204,
      "learning_rate": 0.0005401596423008683,
      "loss": 3.0416,
      "step": 47129
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.836299180984497,
      "learning_rate": 0.0005401571908462659,
      "loss": 2.9514,
      "step": 47130
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.7471935749053955,
      "learning_rate": 0.0005401547393470137,
      "loss": 2.9318,
      "step": 47131
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5423922538757324,
      "learning_rate": 0.0005401522878031121,
      "loss": 3.1802,
      "step": 47132
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.04374361038208,
      "learning_rate": 0.0005401498362145618,
      "loss": 3.0865,
      "step": 47133
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.278287649154663,
      "learning_rate": 0.000540147384581363,
      "loss": 3.1616,
      "step": 47134
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5130383968353271,
      "learning_rate": 0.0005401449329035164,
      "loss": 2.9393,
      "step": 47135
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.299004316329956,
      "learning_rate": 0.0005401424811810221,
      "loss": 3.0115,
      "step": 47136
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6847386360168457,
      "learning_rate": 0.000540140029413881,
      "loss": 2.8835,
      "step": 47137
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7185193300247192,
      "learning_rate": 0.0005401375776020932,
      "loss": 3.0925,
      "step": 47138
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4785289764404297,
      "learning_rate": 0.0005401351257456593,
      "loss": 2.9088,
      "step": 47139
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.659702181816101,
      "learning_rate": 0.0005401326738445799,
      "loss": 3.0866,
      "step": 47140
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.662397027015686,
      "learning_rate": 0.0005401302218988552,
      "loss": 3.2433,
      "step": 47141
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5358529090881348,
      "learning_rate": 0.0005401277699084857,
      "loss": 3.0765,
      "step": 47142
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4070273637771606,
      "learning_rate": 0.000540125317873472,
      "loss": 3.0196,
      "step": 47143
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.002671957015991,
      "learning_rate": 0.0005401228657938144,
      "loss": 3.1033,
      "step": 47144
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4655883312225342,
      "learning_rate": 0.0005401204136695134,
      "loss": 3.1565,
      "step": 47145
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.9868738651275635,
      "learning_rate": 0.0005401179615005696,
      "loss": 3.0927,
      "step": 47146
    },
    {
      "epoch": 0.61,
      "grad_norm": 3.135056972503662,
      "learning_rate": 0.0005401155092869833,
      "loss": 3.0204,
      "step": 47147
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4520238637924194,
      "learning_rate": 0.000540113057028755,
      "loss": 2.9512,
      "step": 47148
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.1647284030914307,
      "learning_rate": 0.0005401106047258851,
      "loss": 2.906,
      "step": 47149
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.496267795562744,
      "learning_rate": 0.0005401081523783742,
      "loss": 3.1052,
      "step": 47150
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3882890939712524,
      "learning_rate": 0.0005401056999862226,
      "loss": 3.1735,
      "step": 47151
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.799520492553711,
      "learning_rate": 0.0005401032475494308,
      "loss": 3.1516,
      "step": 47152
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6256461143493652,
      "learning_rate": 0.0005401007950679993,
      "loss": 2.8517,
      "step": 47153
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8178635835647583,
      "learning_rate": 0.0005400983425419286,
      "loss": 3.3003,
      "step": 47154
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.394178032875061,
      "learning_rate": 0.000540095889971219,
      "loss": 3.0384,
      "step": 47155
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.7135751247406006,
      "learning_rate": 0.000540093437355871,
      "loss": 2.9323,
      "step": 47156
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5297720432281494,
      "learning_rate": 0.0005400909846958851,
      "loss": 3.1712,
      "step": 47157
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5335447788238525,
      "learning_rate": 0.0005400885319912619,
      "loss": 2.8552,
      "step": 47158
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.5975756645202637,
      "learning_rate": 0.0005400860792420016,
      "loss": 3.1006,
      "step": 47159
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.382908821105957,
      "learning_rate": 0.0005400836264481048,
      "loss": 2.8196,
      "step": 47160
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.371018409729004,
      "learning_rate": 0.0005400811736095718,
      "loss": 3.054,
      "step": 47161
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6008375883102417,
      "learning_rate": 0.0005400787207264033,
      "loss": 3.1718,
      "step": 47162
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6180676221847534,
      "learning_rate": 0.0005400762677985996,
      "loss": 2.9993,
      "step": 47163
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5212026834487915,
      "learning_rate": 0.0005400738148261612,
      "loss": 3.1523,
      "step": 47164
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3526777029037476,
      "learning_rate": 0.0005400713618090885,
      "loss": 2.9937,
      "step": 47165
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4484223127365112,
      "learning_rate": 0.0005400689087473821,
      "loss": 3.1054,
      "step": 47166
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.569799542427063,
      "learning_rate": 0.0005400664556410423,
      "loss": 2.8781,
      "step": 47167
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8598308563232422,
      "learning_rate": 0.0005400640024900697,
      "loss": 3.3243,
      "step": 47168
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5020923614501953,
      "learning_rate": 0.0005400615492944645,
      "loss": 2.8883,
      "step": 47169
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4379832744598389,
      "learning_rate": 0.0005400590960542275,
      "loss": 2.9582,
      "step": 47170
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6637848615646362,
      "learning_rate": 0.0005400566427693589,
      "loss": 2.9668,
      "step": 47171
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.0500130653381348,
      "learning_rate": 0.0005400541894398593,
      "loss": 3.019,
      "step": 47172
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3948224782943726,
      "learning_rate": 0.0005400517360657289,
      "loss": 2.9491,
      "step": 47173
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8147975206375122,
      "learning_rate": 0.0005400492826469686,
      "loss": 3.085,
      "step": 47174
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4225034713745117,
      "learning_rate": 0.0005400468291835784,
      "loss": 3.0063,
      "step": 47175
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9416732788085938,
      "learning_rate": 0.0005400443756755591,
      "loss": 3.0816,
      "step": 47176
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4887994527816772,
      "learning_rate": 0.000540041922122911,
      "loss": 3.1851,
      "step": 47177
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.485758900642395,
      "learning_rate": 0.0005400394685256346,
      "loss": 3.0172,
      "step": 47178
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4546092748641968,
      "learning_rate": 0.0005400370148837301,
      "loss": 2.9137,
      "step": 47179
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4438507556915283,
      "learning_rate": 0.0005400345611971984,
      "loss": 3.0876,
      "step": 47180
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6413847208023071,
      "learning_rate": 0.0005400321074660397,
      "loss": 3.0282,
      "step": 47181
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6737101078033447,
      "learning_rate": 0.0005400296536902546,
      "loss": 3.0466,
      "step": 47182
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.590746521949768,
      "learning_rate": 0.0005400271998698434,
      "loss": 2.9553,
      "step": 47183
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8048069477081299,
      "learning_rate": 0.0005400247460048066,
      "loss": 2.883,
      "step": 47184
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4969661235809326,
      "learning_rate": 0.0005400222920951446,
      "loss": 3.0157,
      "step": 47185
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7609717845916748,
      "learning_rate": 0.000540019838140858,
      "loss": 2.8375,
      "step": 47186
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.627303957939148,
      "learning_rate": 0.0005400173841419472,
      "loss": 3.1369,
      "step": 47187
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6309925317764282,
      "learning_rate": 0.0005400149300984126,
      "loss": 3.1517,
      "step": 47188
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.0274362564086914,
      "learning_rate": 0.0005400124760102546,
      "loss": 3.2804,
      "step": 47189
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.521486759185791,
      "learning_rate": 0.000540010021877474,
      "loss": 3.0697,
      "step": 47190
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.780447244644165,
      "learning_rate": 0.0005400075677000708,
      "loss": 3.0692,
      "step": 47191
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.536278247833252,
      "learning_rate": 0.0005400051134780457,
      "loss": 2.9418,
      "step": 47192
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4431052207946777,
      "learning_rate": 0.0005400026592113992,
      "loss": 3.2867,
      "step": 47193
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.2790205478668213,
      "learning_rate": 0.0005400002049001314,
      "loss": 2.9248,
      "step": 47194
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.703349232673645,
      "learning_rate": 0.0005399977505442433,
      "loss": 2.8936,
      "step": 47195
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7526795864105225,
      "learning_rate": 0.0005399952961437351,
      "loss": 2.8483,
      "step": 47196
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6377636194229126,
      "learning_rate": 0.0005399928416986072,
      "loss": 3.0208,
      "step": 47197
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.219468116760254,
      "learning_rate": 0.00053999038720886,
      "loss": 3.2152,
      "step": 47198
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.008370876312256,
      "learning_rate": 0.0005399879326744941,
      "loss": 3.2174,
      "step": 47199
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6384156942367554,
      "learning_rate": 0.0005399854780955099,
      "loss": 3.1743,
      "step": 47200
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9590288400650024,
      "learning_rate": 0.000539983023471908,
      "loss": 3.0818,
      "step": 47201
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8069607019424438,
      "learning_rate": 0.0005399805688036886,
      "loss": 3.1157,
      "step": 47202
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5606024265289307,
      "learning_rate": 0.0005399781140908523,
      "loss": 3.029,
      "step": 47203
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9579061269760132,
      "learning_rate": 0.0005399756593333997,
      "loss": 2.8416,
      "step": 47204
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9399404525756836,
      "learning_rate": 0.0005399732045313308,
      "loss": 2.7963,
      "step": 47205
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3977481126785278,
      "learning_rate": 0.0005399707496846466,
      "loss": 3.1456,
      "step": 47206
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5435205698013306,
      "learning_rate": 0.0005399682947933471,
      "loss": 3.4879,
      "step": 47207
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.754793643951416,
      "learning_rate": 0.0005399658398574332,
      "loss": 3.1657,
      "step": 47208
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5503898859024048,
      "learning_rate": 0.0005399633848769049,
      "loss": 3.1437,
      "step": 47209
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7451294660568237,
      "learning_rate": 0.000539960929851763,
      "loss": 3.0207,
      "step": 47210
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.6082216501235962,
      "learning_rate": 0.0005399584747820078,
      "loss": 3.0298,
      "step": 47211
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9228975772857666,
      "learning_rate": 0.0005399560196676398,
      "loss": 3.2996,
      "step": 47212
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.441972255706787,
      "learning_rate": 0.0005399535645086595,
      "loss": 3.3876,
      "step": 47213
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.7286064624786377,
      "learning_rate": 0.0005399511093050672,
      "loss": 3.0256,
      "step": 47214
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8873487710952759,
      "learning_rate": 0.0005399486540568634,
      "loss": 2.9247,
      "step": 47215
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9514936208724976,
      "learning_rate": 0.0005399461987640488,
      "loss": 2.8444,
      "step": 47216
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8729995489120483,
      "learning_rate": 0.0005399437434266236,
      "loss": 3.1843,
      "step": 47217
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.517685890197754,
      "learning_rate": 0.0005399412880445883,
      "loss": 3.1065,
      "step": 47218
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.4364540576934814,
      "learning_rate": 0.0005399388326179435,
      "loss": 3.1071,
      "step": 47219
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3923559188842773,
      "learning_rate": 0.0005399363771466894,
      "loss": 3.0481,
      "step": 47220
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5940382480621338,
      "learning_rate": 0.0005399339216308266,
      "loss": 3.0408,
      "step": 47221
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.3756285905838013,
      "learning_rate": 0.0005399314660703556,
      "loss": 3.0403,
      "step": 47222
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.5099923610687256,
      "learning_rate": 0.0005399290104652768,
      "loss": 2.8551,
      "step": 47223
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4780688285827637,
      "learning_rate": 0.0005399265548155907,
      "loss": 2.9551,
      "step": 47224
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.8242337703704834,
      "learning_rate": 0.0005399240991212977,
      "loss": 2.7645,
      "step": 47225
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.118643283843994,
      "learning_rate": 0.0005399216433823983,
      "loss": 2.8637,
      "step": 47226
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.0172462463378906,
      "learning_rate": 0.0005399191875988929,
      "loss": 2.9806,
      "step": 47227
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4637483358383179,
      "learning_rate": 0.0005399167317707821,
      "loss": 3.1408,
      "step": 47228
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.9479187726974487,
      "learning_rate": 0.0005399142758980662,
      "loss": 2.9259,
      "step": 47229
    },
    {
      "epoch": 0.61,
      "grad_norm": 2.288287878036499,
      "learning_rate": 0.0005399118199807456,
      "loss": 3.0588,
      "step": 47230
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.66441810131073,
      "learning_rate": 0.0005399093640188209,
      "loss": 2.9495,
      "step": 47231
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.7639360427856445,
      "learning_rate": 0.0005399069080122927,
      "loss": 3.2708,
      "step": 47232
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0019307136535645,
      "learning_rate": 0.000539904451961161,
      "loss": 2.9773,
      "step": 47233
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6408700942993164,
      "learning_rate": 0.0005399019958654267,
      "loss": 3.1466,
      "step": 47234
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4983025789260864,
      "learning_rate": 0.0005398995397250901,
      "loss": 3.1536,
      "step": 47235
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.29261314868927,
      "learning_rate": 0.0005398970835401516,
      "loss": 3.068,
      "step": 47236
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0363404750823975,
      "learning_rate": 0.0005398946273106117,
      "loss": 3.0917,
      "step": 47237
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4046484231948853,
      "learning_rate": 0.0005398921710364708,
      "loss": 2.8491,
      "step": 47238
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.9737703800201416,
      "learning_rate": 0.0005398897147177295,
      "loss": 3.095,
      "step": 47239
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2406270503997803,
      "learning_rate": 0.0005398872583543882,
      "loss": 3.1602,
      "step": 47240
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.129218578338623,
      "learning_rate": 0.0005398848019464473,
      "loss": 3.2499,
      "step": 47241
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.280557632446289,
      "learning_rate": 0.0005398823454939072,
      "loss": 2.899,
      "step": 47242
    },
    {
      "epoch": 0.62,
      "grad_norm": 4.016861438751221,
      "learning_rate": 0.0005398798889967686,
      "loss": 3.0444,
      "step": 47243
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.060678482055664,
      "learning_rate": 0.0005398774324550318,
      "loss": 2.9342,
      "step": 47244
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5117989778518677,
      "learning_rate": 0.0005398749758686971,
      "loss": 3.1508,
      "step": 47245
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.21675181388855,
      "learning_rate": 0.0005398725192377651,
      "loss": 2.9712,
      "step": 47246
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7193559408187866,
      "learning_rate": 0.0005398700625622366,
      "loss": 2.902,
      "step": 47247
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6263872385025024,
      "learning_rate": 0.0005398676058421114,
      "loss": 3.1244,
      "step": 47248
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.469206690788269,
      "learning_rate": 0.0005398651490773903,
      "loss": 2.8943,
      "step": 47249
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.19028377532959,
      "learning_rate": 0.0005398626922680738,
      "loss": 3.1397,
      "step": 47250
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.479360818862915,
      "learning_rate": 0.0005398602354141625,
      "loss": 3.0617,
      "step": 47251
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4416475296020508,
      "learning_rate": 0.0005398577785156564,
      "loss": 2.9682,
      "step": 47252
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.07784366607666,
      "learning_rate": 0.0005398553215725563,
      "loss": 3.3825,
      "step": 47253
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3198702335357666,
      "learning_rate": 0.0005398528645848625,
      "loss": 2.9077,
      "step": 47254
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7141664028167725,
      "learning_rate": 0.0005398504075525756,
      "loss": 2.7524,
      "step": 47255
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9127038717269897,
      "learning_rate": 0.000539847950475696,
      "loss": 2.7674,
      "step": 47256
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.732567071914673,
      "learning_rate": 0.0005398454933542241,
      "loss": 3.0834,
      "step": 47257
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5154633522033691,
      "learning_rate": 0.0005398430361881604,
      "loss": 2.7622,
      "step": 47258
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6698318719863892,
      "learning_rate": 0.0005398405789775054,
      "loss": 2.9873,
      "step": 47259
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.1614131927490234,
      "learning_rate": 0.0005398381217222594,
      "loss": 3.1195,
      "step": 47260
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8967465162277222,
      "learning_rate": 0.000539835664422423,
      "loss": 3.0581,
      "step": 47261
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.1487245559692383,
      "learning_rate": 0.0005398332070779967,
      "loss": 3.1149,
      "step": 47262
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.664009690284729,
      "learning_rate": 0.0005398307496889807,
      "loss": 2.6887,
      "step": 47263
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.0187721252441406,
      "learning_rate": 0.0005398282922553758,
      "loss": 3.1607,
      "step": 47264
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.16855788230896,
      "learning_rate": 0.0005398258347771822,
      "loss": 3.0522,
      "step": 47265
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4191855192184448,
      "learning_rate": 0.0005398233772544006,
      "loss": 3.1689,
      "step": 47266
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9594452381134033,
      "learning_rate": 0.0005398209196870312,
      "loss": 3.2679,
      "step": 47267
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.026658773422241,
      "learning_rate": 0.0005398184620750746,
      "loss": 3.2373,
      "step": 47268
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.38395094871521,
      "learning_rate": 0.0005398160044185311,
      "loss": 3.1047,
      "step": 47269
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.758884072303772,
      "learning_rate": 0.0005398135467174014,
      "loss": 3.0341,
      "step": 47270
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.6435964107513428,
      "learning_rate": 0.0005398110889716859,
      "loss": 3.0983,
      "step": 47271
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9834314584732056,
      "learning_rate": 0.0005398086311813848,
      "loss": 3.0653,
      "step": 47272
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3862015008926392,
      "learning_rate": 0.0005398061733464988,
      "loss": 3.2059,
      "step": 47273
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.124221086502075,
      "learning_rate": 0.0005398037154670284,
      "loss": 3.0739,
      "step": 47274
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7319215536117554,
      "learning_rate": 0.0005398012575429739,
      "loss": 3.0624,
      "step": 47275
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.60640549659729,
      "learning_rate": 0.000539798799574336,
      "loss": 3.2508,
      "step": 47276
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.625333547592163,
      "learning_rate": 0.0005397963415611147,
      "loss": 3.1663,
      "step": 47277
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5759427547454834,
      "learning_rate": 0.0005397938835033109,
      "loss": 2.9986,
      "step": 47278
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5560194253921509,
      "learning_rate": 0.0005397914254009248,
      "loss": 3.1674,
      "step": 47279
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9741268157958984,
      "learning_rate": 0.000539788967253957,
      "loss": 3.0586,
      "step": 47280
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7924306392669678,
      "learning_rate": 0.0005397865090624079,
      "loss": 3.2159,
      "step": 47281
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.403529167175293,
      "learning_rate": 0.0005397840508262779,
      "loss": 3.0761,
      "step": 47282
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3030714988708496,
      "learning_rate": 0.0005397815925455677,
      "loss": 2.9164,
      "step": 47283
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0890698432922363,
      "learning_rate": 0.0005397791342202774,
      "loss": 3.0949,
      "step": 47284
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.120187282562256,
      "learning_rate": 0.0005397766758504076,
      "loss": 3.2495,
      "step": 47285
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8796404600143433,
      "learning_rate": 0.0005397742174359588,
      "loss": 3.0693,
      "step": 47286
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9438785314559937,
      "learning_rate": 0.0005397717589769315,
      "loss": 3.0802,
      "step": 47287
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8454707860946655,
      "learning_rate": 0.0005397693004733262,
      "loss": 2.8654,
      "step": 47288
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.143303394317627,
      "learning_rate": 0.0005397668419251432,
      "loss": 2.9893,
      "step": 47289
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.7766194343566895,
      "learning_rate": 0.000539764383332383,
      "loss": 2.9666,
      "step": 47290
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4728343486785889,
      "learning_rate": 0.000539761924695046,
      "loss": 2.9095,
      "step": 47291
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3553357124328613,
      "learning_rate": 0.0005397594660131328,
      "loss": 2.8836,
      "step": 47292
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4605140686035156,
      "learning_rate": 0.0005397570072866438,
      "loss": 2.9745,
      "step": 47293
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4206424951553345,
      "learning_rate": 0.0005397545485155795,
      "loss": 3.0779,
      "step": 47294
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.950519323348999,
      "learning_rate": 0.0005397520896999403,
      "loss": 2.8451,
      "step": 47295
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.382879614830017,
      "learning_rate": 0.0005397496308397266,
      "loss": 3.1407,
      "step": 47296
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2777860164642334,
      "learning_rate": 0.000539747171934939,
      "loss": 3.0706,
      "step": 47297
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6522018909454346,
      "learning_rate": 0.0005397447129855777,
      "loss": 2.7711,
      "step": 47298
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3983310461044312,
      "learning_rate": 0.0005397422539916434,
      "loss": 3.2928,
      "step": 47299
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.400114893913269,
      "learning_rate": 0.0005397397949531365,
      "loss": 3.1942,
      "step": 47300
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7197237014770508,
      "learning_rate": 0.0005397373358700576,
      "loss": 2.8378,
      "step": 47301
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.66121244430542,
      "learning_rate": 0.0005397348767424068,
      "loss": 2.8231,
      "step": 47302
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5838751792907715,
      "learning_rate": 0.0005397324175701848,
      "loss": 3.0516,
      "step": 47303
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.501793146133423,
      "learning_rate": 0.0005397299583533921,
      "loss": 2.7061,
      "step": 47304
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8428889513015747,
      "learning_rate": 0.000539727499092029,
      "loss": 2.976,
      "step": 47305
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.85852313041687,
      "learning_rate": 0.0005397250397860961,
      "loss": 2.9588,
      "step": 47306
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.3757636547088623,
      "learning_rate": 0.0005397225804355937,
      "loss": 3.0138,
      "step": 47307
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.707805871963501,
      "learning_rate": 0.0005397201210405224,
      "loss": 2.9831,
      "step": 47308
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5430939197540283,
      "learning_rate": 0.0005397176616008827,
      "loss": 3.3403,
      "step": 47309
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.707719087600708,
      "learning_rate": 0.0005397152021166747,
      "loss": 2.9615,
      "step": 47310
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.1393415927886963,
      "learning_rate": 0.0005397127425878993,
      "loss": 3.2214,
      "step": 47311
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2344363927841187,
      "learning_rate": 0.0005397102830145569,
      "loss": 3.0424,
      "step": 47312
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8966469764709473,
      "learning_rate": 0.0005397078233966477,
      "loss": 3.0189,
      "step": 47313
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6637568473815918,
      "learning_rate": 0.0005397053637341723,
      "loss": 2.9872,
      "step": 47314
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.568274974822998,
      "learning_rate": 0.000539702904027131,
      "loss": 3.034,
      "step": 47315
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.763215184211731,
      "learning_rate": 0.0005397004442755245,
      "loss": 3.1368,
      "step": 47316
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3814672231674194,
      "learning_rate": 0.0005396979844793533,
      "loss": 2.9736,
      "step": 47317
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8320270776748657,
      "learning_rate": 0.0005396955246386176,
      "loss": 2.8731,
      "step": 47318
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.719746708869934,
      "learning_rate": 0.0005396930647533179,
      "loss": 3.1122,
      "step": 47319
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5469330549240112,
      "learning_rate": 0.0005396906048234549,
      "loss": 3.0257,
      "step": 47320
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6244176626205444,
      "learning_rate": 0.0005396881448490288,
      "loss": 2.9178,
      "step": 47321
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.976736307144165,
      "learning_rate": 0.0005396856848300402,
      "loss": 3.2384,
      "step": 47322
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.835376262664795,
      "learning_rate": 0.0005396832247664895,
      "loss": 3.0165,
      "step": 47323
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5914751291275024,
      "learning_rate": 0.000539680764658377,
      "loss": 3.105,
      "step": 47324
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3723549842834473,
      "learning_rate": 0.0005396783045057036,
      "loss": 2.8926,
      "step": 47325
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6952660083770752,
      "learning_rate": 0.0005396758443084693,
      "loss": 2.9298,
      "step": 47326
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.894606590270996,
      "learning_rate": 0.0005396733840666748,
      "loss": 3.1494,
      "step": 47327
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2953171730041504,
      "learning_rate": 0.0005396709237803205,
      "loss": 2.9016,
      "step": 47328
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.4395649433135986,
      "learning_rate": 0.0005396684634494068,
      "loss": 2.9579,
      "step": 47329
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.4104926586151123,
      "learning_rate": 0.0005396660030739342,
      "loss": 3.0999,
      "step": 47330
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.655091643333435,
      "learning_rate": 0.0005396635426539033,
      "loss": 3.2654,
      "step": 47331
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4766515493392944,
      "learning_rate": 0.0005396610821893143,
      "loss": 2.8981,
      "step": 47332
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.918736219406128,
      "learning_rate": 0.0005396586216801678,
      "loss": 2.9153,
      "step": 47333
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.85492742061615,
      "learning_rate": 0.0005396561611264642,
      "loss": 2.9673,
      "step": 47334
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9026787281036377,
      "learning_rate": 0.000539653700528204,
      "loss": 3.0743,
      "step": 47335
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8516839742660522,
      "learning_rate": 0.0005396512398853877,
      "loss": 3.1954,
      "step": 47336
    },
    {
      "epoch": 0.62,
      "grad_norm": 4.81642484664917,
      "learning_rate": 0.0005396487791980157,
      "loss": 3.049,
      "step": 47337
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.6483352184295654,
      "learning_rate": 0.0005396463184660885,
      "loss": 3.1062,
      "step": 47338
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.167483329772949,
      "learning_rate": 0.0005396438576896065,
      "loss": 2.7448,
      "step": 47339
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.328085422515869,
      "learning_rate": 0.0005396413968685701,
      "loss": 3.1075,
      "step": 47340
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.8138113021850586,
      "learning_rate": 0.0005396389360029799,
      "loss": 2.9393,
      "step": 47341
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.721754312515259,
      "learning_rate": 0.0005396364750928364,
      "loss": 3.0125,
      "step": 47342
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9940215349197388,
      "learning_rate": 0.0005396340141381399,
      "loss": 2.8723,
      "step": 47343
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.128082752227783,
      "learning_rate": 0.0005396315531388909,
      "loss": 3.1998,
      "step": 47344
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.8402702808380127,
      "learning_rate": 0.0005396290920950898,
      "loss": 3.0337,
      "step": 47345
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.833096742630005,
      "learning_rate": 0.0005396266310067373,
      "loss": 2.802,
      "step": 47346
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.50763738155365,
      "learning_rate": 0.0005396241698738335,
      "loss": 3.3914,
      "step": 47347
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8214316368103027,
      "learning_rate": 0.0005396217086963791,
      "loss": 3.0237,
      "step": 47348
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.0876264572143555,
      "learning_rate": 0.0005396192474743745,
      "loss": 2.9487,
      "step": 47349
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5165112018585205,
      "learning_rate": 0.0005396167862078202,
      "loss": 3.0295,
      "step": 47350
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.543290376663208,
      "learning_rate": 0.0005396143248967166,
      "loss": 3.0377,
      "step": 47351
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.581351399421692,
      "learning_rate": 0.0005396118635410641,
      "loss": 3.0016,
      "step": 47352
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.767702341079712,
      "learning_rate": 0.0005396094021408634,
      "loss": 3.0036,
      "step": 47353
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8534759283065796,
      "learning_rate": 0.0005396069406961146,
      "loss": 2.8679,
      "step": 47354
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9757782220840454,
      "learning_rate": 0.0005396044792068185,
      "loss": 2.7034,
      "step": 47355
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.4121527671813965,
      "learning_rate": 0.0005396020176729753,
      "loss": 2.9879,
      "step": 47356
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.170137643814087,
      "learning_rate": 0.0005395995560945856,
      "loss": 2.9988,
      "step": 47357
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4643208980560303,
      "learning_rate": 0.0005395970944716499,
      "loss": 3.0126,
      "step": 47358
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4704512357711792,
      "learning_rate": 0.0005395946328041685,
      "loss": 2.7976,
      "step": 47359
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.1419382095336914,
      "learning_rate": 0.0005395921710921418,
      "loss": 3.2499,
      "step": 47360
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.083096504211426,
      "learning_rate": 0.0005395897093355706,
      "loss": 3.1804,
      "step": 47361
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3598192930221558,
      "learning_rate": 0.0005395872475344551,
      "loss": 2.6277,
      "step": 47362
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.756528615951538,
      "learning_rate": 0.0005395847856887958,
      "loss": 3.0925,
      "step": 47363
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.3402206897735596,
      "learning_rate": 0.0005395823237985931,
      "loss": 2.9122,
      "step": 47364
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8840950727462769,
      "learning_rate": 0.0005395798618638475,
      "loss": 3.491,
      "step": 47365
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.916031837463379,
      "learning_rate": 0.0005395773998845597,
      "loss": 3.0274,
      "step": 47366
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.7503645420074463,
      "learning_rate": 0.0005395749378607297,
      "loss": 2.9007,
      "step": 47367
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5870810747146606,
      "learning_rate": 0.0005395724757923583,
      "loss": 3.1154,
      "step": 47368
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2844109535217285,
      "learning_rate": 0.0005395700136794459,
      "loss": 2.9946,
      "step": 47369
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7362796068191528,
      "learning_rate": 0.0005395675515219928,
      "loss": 2.9138,
      "step": 47370
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3771886825561523,
      "learning_rate": 0.0005395650893199996,
      "loss": 3.0943,
      "step": 47371
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.327709436416626,
      "learning_rate": 0.0005395626270734668,
      "loss": 2.8226,
      "step": 47372
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.301622986793518,
      "learning_rate": 0.0005395601647823948,
      "loss": 3.2468,
      "step": 47373
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.724411129951477,
      "learning_rate": 0.0005395577024467839,
      "loss": 3.1019,
      "step": 47374
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.635737657546997,
      "learning_rate": 0.0005395552400666348,
      "loss": 3.4147,
      "step": 47375
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.597913384437561,
      "learning_rate": 0.0005395527776419478,
      "loss": 3.212,
      "step": 47376
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7488726377487183,
      "learning_rate": 0.0005395503151727235,
      "loss": 3.0117,
      "step": 47377
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6412324905395508,
      "learning_rate": 0.0005395478526589622,
      "loss": 3.2062,
      "step": 47378
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7916748523712158,
      "learning_rate": 0.0005395453901006644,
      "loss": 2.9143,
      "step": 47379
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.76804518699646,
      "learning_rate": 0.0005395429274978308,
      "loss": 3.0809,
      "step": 47380
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5625426769256592,
      "learning_rate": 0.0005395404648504614,
      "loss": 3.0892,
      "step": 47381
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.364730715751648,
      "learning_rate": 0.0005395380021585571,
      "loss": 3.1065,
      "step": 47382
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5804921388626099,
      "learning_rate": 0.000539535539422118,
      "loss": 2.9538,
      "step": 47383
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4765419960021973,
      "learning_rate": 0.0005395330766411448,
      "loss": 3.0139,
      "step": 47384
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3121285438537598,
      "learning_rate": 0.0005395306138156379,
      "loss": 2.9705,
      "step": 47385
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8069508075714111,
      "learning_rate": 0.0005395281509455978,
      "loss": 3.171,
      "step": 47386
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8399852514266968,
      "learning_rate": 0.0005395256880310247,
      "loss": 3.1446,
      "step": 47387
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.1715657711029053,
      "learning_rate": 0.0005395232250719194,
      "loss": 2.9915,
      "step": 47388
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.0251221656799316,
      "learning_rate": 0.0005395207620682822,
      "loss": 2.83,
      "step": 47389
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4556816816329956,
      "learning_rate": 0.0005395182990201136,
      "loss": 3.269,
      "step": 47390
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9455486536026,
      "learning_rate": 0.0005395158359274138,
      "loss": 3.1428,
      "step": 47391
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.79319167137146,
      "learning_rate": 0.0005395133727901837,
      "loss": 2.9401,
      "step": 47392
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3120248317718506,
      "learning_rate": 0.0005395109096084235,
      "loss": 3.2423,
      "step": 47393
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.236149311065674,
      "learning_rate": 0.0005395084463821337,
      "loss": 3.2512,
      "step": 47394
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.365628957748413,
      "learning_rate": 0.0005395059831113147,
      "loss": 3.1301,
      "step": 47395
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6063648462295532,
      "learning_rate": 0.0005395035197959672,
      "loss": 3.0476,
      "step": 47396
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4672383069992065,
      "learning_rate": 0.0005395010564360913,
      "loss": 3.0968,
      "step": 47397
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3788000345230103,
      "learning_rate": 0.0005394985930316877,
      "loss": 3.4133,
      "step": 47398
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5449296236038208,
      "learning_rate": 0.0005394961295827567,
      "loss": 3.1159,
      "step": 47399
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6646357774734497,
      "learning_rate": 0.000539493666089299,
      "loss": 2.8889,
      "step": 47400
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4318145513534546,
      "learning_rate": 0.0005394912025513148,
      "loss": 3.0046,
      "step": 47401
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1979650259017944,
      "learning_rate": 0.0005394887389688047,
      "loss": 3.3929,
      "step": 47402
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5505930185317993,
      "learning_rate": 0.0005394862753417691,
      "loss": 2.9318,
      "step": 47403
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6576565504074097,
      "learning_rate": 0.0005394838116702085,
      "loss": 3.1888,
      "step": 47404
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3940880298614502,
      "learning_rate": 0.0005394813479541234,
      "loss": 3.2537,
      "step": 47405
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.187143564224243,
      "learning_rate": 0.0005394788841935141,
      "loss": 3.1078,
      "step": 47406
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.1919443607330322,
      "learning_rate": 0.0005394764203883812,
      "loss": 3.0522,
      "step": 47407
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.239487409591675,
      "learning_rate": 0.000539473956538725,
      "loss": 2.7774,
      "step": 47408
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.9283008575439453,
      "learning_rate": 0.0005394714926445463,
      "loss": 2.9855,
      "step": 47409
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.7088217735290527,
      "learning_rate": 0.0005394690287058451,
      "loss": 3.1885,
      "step": 47410
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.609194278717041,
      "learning_rate": 0.0005394665647226222,
      "loss": 2.834,
      "step": 47411
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.69473934173584,
      "learning_rate": 0.0005394641006948779,
      "loss": 3.0999,
      "step": 47412
    },
    {
      "epoch": 0.62,
      "grad_norm": 5.439567565917969,
      "learning_rate": 0.0005394616366226127,
      "loss": 3.0483,
      "step": 47413
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8562818765640259,
      "learning_rate": 0.0005394591725058272,
      "loss": 2.7296,
      "step": 47414
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7585561275482178,
      "learning_rate": 0.0005394567083445216,
      "loss": 3.0664,
      "step": 47415
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3547065258026123,
      "learning_rate": 0.0005394542441386964,
      "loss": 2.9149,
      "step": 47416
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.5686752796173096,
      "learning_rate": 0.0005394517798883523,
      "loss": 3.3138,
      "step": 47417
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4993747472763062,
      "learning_rate": 0.0005394493155934895,
      "loss": 3.0132,
      "step": 47418
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6834590435028076,
      "learning_rate": 0.0005394468512541086,
      "loss": 2.8982,
      "step": 47419
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7788962125778198,
      "learning_rate": 0.00053944438687021,
      "loss": 3.1013,
      "step": 47420
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.897407054901123,
      "learning_rate": 0.0005394419224417942,
      "loss": 3.1421,
      "step": 47421
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9144704341888428,
      "learning_rate": 0.0005394394579688616,
      "loss": 3.1241,
      "step": 47422
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2927290201187134,
      "learning_rate": 0.0005394369934514126,
      "loss": 3.1238,
      "step": 47423
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6217944622039795,
      "learning_rate": 0.0005394345288894477,
      "loss": 3.0604,
      "step": 47424
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6427093744277954,
      "learning_rate": 0.0005394320642829676,
      "loss": 3.0479,
      "step": 47425
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5334280729293823,
      "learning_rate": 0.0005394295996319725,
      "loss": 2.9497,
      "step": 47426
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.693273901939392,
      "learning_rate": 0.0005394271349364628,
      "loss": 2.9504,
      "step": 47427
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3012408018112183,
      "learning_rate": 0.0005394246701964393,
      "loss": 3.1773,
      "step": 47428
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.05834698677063,
      "learning_rate": 0.000539422205411902,
      "loss": 3.061,
      "step": 47429
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5469168424606323,
      "learning_rate": 0.0005394197405828516,
      "loss": 3.0741,
      "step": 47430
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3077746629714966,
      "learning_rate": 0.0005394172757092888,
      "loss": 3.2331,
      "step": 47431
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.7222201824188232,
      "learning_rate": 0.0005394148107912136,
      "loss": 3.3014,
      "step": 47432
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8703596591949463,
      "learning_rate": 0.0005394123458286267,
      "loss": 2.9517,
      "step": 47433
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5501987934112549,
      "learning_rate": 0.0005394098808215286,
      "loss": 3.2094,
      "step": 47434
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0435335636138916,
      "learning_rate": 0.0005394074157699196,
      "loss": 2.9254,
      "step": 47435
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.756109595298767,
      "learning_rate": 0.0005394049506738003,
      "loss": 3.0764,
      "step": 47436
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.315496802330017,
      "learning_rate": 0.0005394024855331711,
      "loss": 3.0432,
      "step": 47437
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.290337324142456,
      "learning_rate": 0.0005394000203480324,
      "loss": 3.0853,
      "step": 47438
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7965837717056274,
      "learning_rate": 0.0005393975551183847,
      "loss": 3.2157,
      "step": 47439
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7568933963775635,
      "learning_rate": 0.0005393950898442285,
      "loss": 2.9615,
      "step": 47440
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3284051418304443,
      "learning_rate": 0.0005393926245255643,
      "loss": 2.9185,
      "step": 47441
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.563215732574463,
      "learning_rate": 0.0005393901591623925,
      "loss": 3.2362,
      "step": 47442
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.134411096572876,
      "learning_rate": 0.0005393876937547134,
      "loss": 3.1719,
      "step": 47443
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3705146312713623,
      "learning_rate": 0.0005393852283025278,
      "loss": 3.1425,
      "step": 47444
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.380411982536316,
      "learning_rate": 0.0005393827628058358,
      "loss": 2.7228,
      "step": 47445
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6825330257415771,
      "learning_rate": 0.0005393802972646381,
      "loss": 2.5936,
      "step": 47446
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7403717041015625,
      "learning_rate": 0.0005393778316789351,
      "loss": 3.1353,
      "step": 47447
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7986053228378296,
      "learning_rate": 0.0005393753660487273,
      "loss": 3.01,
      "step": 47448
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6777009963989258,
      "learning_rate": 0.000539372900374015,
      "loss": 2.9514,
      "step": 47449
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.562025547027588,
      "learning_rate": 0.0005393704346547988,
      "loss": 3.0239,
      "step": 47450
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.952242374420166,
      "learning_rate": 0.0005393679688910791,
      "loss": 2.7525,
      "step": 47451
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4140774011611938,
      "learning_rate": 0.0005393655030828564,
      "loss": 2.8615,
      "step": 47452
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.771590232849121,
      "learning_rate": 0.0005393630372301311,
      "loss": 3.1846,
      "step": 47453
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.819429636001587,
      "learning_rate": 0.0005393605713329037,
      "loss": 3.0093,
      "step": 47454
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6545436382293701,
      "learning_rate": 0.0005393581053911747,
      "loss": 3.2027,
      "step": 47455
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6769919395446777,
      "learning_rate": 0.0005393556394049444,
      "loss": 2.9938,
      "step": 47456
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.434994101524353,
      "learning_rate": 0.0005393531733742134,
      "loss": 3.0153,
      "step": 47457
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1751362085342407,
      "learning_rate": 0.0005393507072989823,
      "loss": 2.8167,
      "step": 47458
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6289876699447632,
      "learning_rate": 0.0005393482411792512,
      "loss": 3.0764,
      "step": 47459
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5087658166885376,
      "learning_rate": 0.0005393457750150208,
      "loss": 3.1973,
      "step": 47460
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.479880452156067,
      "learning_rate": 0.0005393433088062915,
      "loss": 3.2468,
      "step": 47461
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.611448049545288,
      "learning_rate": 0.0005393408425530637,
      "loss": 3.3849,
      "step": 47462
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0274112224578857,
      "learning_rate": 0.0005393383762553381,
      "loss": 2.9339,
      "step": 47463
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.5251219272613525,
      "learning_rate": 0.0005393359099131147,
      "loss": 2.8924,
      "step": 47464
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5033456087112427,
      "learning_rate": 0.0005393334435263945,
      "loss": 2.8849,
      "step": 47465
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.624508857727051,
      "learning_rate": 0.0005393309770951775,
      "loss": 3.1862,
      "step": 47466
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2016892433166504,
      "learning_rate": 0.0005393285106194645,
      "loss": 2.7802,
      "step": 47467
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.573307991027832,
      "learning_rate": 0.0005393260440992557,
      "loss": 2.8415,
      "step": 47468
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2690601348876953,
      "learning_rate": 0.0005393235775345517,
      "loss": 3.0051,
      "step": 47469
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7656786441802979,
      "learning_rate": 0.000539321110925353,
      "loss": 3.0352,
      "step": 47470
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4572947025299072,
      "learning_rate": 0.00053931864427166,
      "loss": 3.2405,
      "step": 47471
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0054407119750977,
      "learning_rate": 0.000539316177573473,
      "loss": 3.0275,
      "step": 47472
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.1405575275421143,
      "learning_rate": 0.0005393137108307927,
      "loss": 3.0982,
      "step": 47473
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6193318367004395,
      "learning_rate": 0.0005393112440436195,
      "loss": 3.3224,
      "step": 47474
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2123403549194336,
      "learning_rate": 0.0005393087772119537,
      "loss": 2.9165,
      "step": 47475
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8192700147628784,
      "learning_rate": 0.000539306310335796,
      "loss": 3.1416,
      "step": 47476
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.782922625541687,
      "learning_rate": 0.0005393038434151467,
      "loss": 3.0152,
      "step": 47477
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.57688307762146,
      "learning_rate": 0.0005393013764500063,
      "loss": 3.3628,
      "step": 47478
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.0663108825683594,
      "learning_rate": 0.0005392989094403753,
      "loss": 2.7984,
      "step": 47479
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.7727413177490234,
      "learning_rate": 0.0005392964423862541,
      "loss": 2.9923,
      "step": 47480
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.28592848777771,
      "learning_rate": 0.0005392939752876431,
      "loss": 2.9262,
      "step": 47481
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.663670063018799,
      "learning_rate": 0.000539291508144543,
      "loss": 2.9073,
      "step": 47482
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.5754058361053467,
      "learning_rate": 0.0005392890409569538,
      "loss": 2.7997,
      "step": 47483
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0000901222229004,
      "learning_rate": 0.0005392865737248765,
      "loss": 2.9079,
      "step": 47484
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3768645524978638,
      "learning_rate": 0.0005392841064483112,
      "loss": 3.0522,
      "step": 47485
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.021226167678833,
      "learning_rate": 0.0005392816391272586,
      "loss": 2.7461,
      "step": 47486
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3638447523117065,
      "learning_rate": 0.0005392791717617188,
      "loss": 3.2165,
      "step": 47487
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.083228588104248,
      "learning_rate": 0.0005392767043516926,
      "loss": 2.8718,
      "step": 47488
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.564744234085083,
      "learning_rate": 0.0005392742368971804,
      "loss": 3.2631,
      "step": 47489
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3698651790618896,
      "learning_rate": 0.0005392717693981826,
      "loss": 2.9437,
      "step": 47490
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.0904877185821533,
      "learning_rate": 0.0005392693018546995,
      "loss": 2.9393,
      "step": 47491
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6857258081436157,
      "learning_rate": 0.0005392668342667319,
      "loss": 2.9872,
      "step": 47492
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.091999053955078,
      "learning_rate": 0.00053926436663428,
      "loss": 2.9187,
      "step": 47493
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5893292427062988,
      "learning_rate": 0.0005392618989573444,
      "loss": 2.7991,
      "step": 47494
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3766378164291382,
      "learning_rate": 0.0005392594312359255,
      "loss": 3.1376,
      "step": 47495
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.1857550144195557,
      "learning_rate": 0.0005392569634700236,
      "loss": 2.8968,
      "step": 47496
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2727532386779785,
      "learning_rate": 0.0005392544956596395,
      "loss": 3.2273,
      "step": 47497
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.503513216972351,
      "learning_rate": 0.0005392520278047734,
      "loss": 3.1335,
      "step": 47498
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2470667362213135,
      "learning_rate": 0.0005392495599054258,
      "loss": 3.0919,
      "step": 47499
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.844412922859192,
      "learning_rate": 0.0005392470919615972,
      "loss": 2.9746,
      "step": 47500
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.9641196727752686,
      "learning_rate": 0.000539244623973288,
      "loss": 2.9264,
      "step": 47501
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4429421424865723,
      "learning_rate": 0.0005392421559404988,
      "loss": 3.0373,
      "step": 47502
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5277478694915771,
      "learning_rate": 0.00053923968786323,
      "loss": 3.1223,
      "step": 47503
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.388615846633911,
      "learning_rate": 0.0005392372197414819,
      "loss": 2.9543,
      "step": 47504
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0378758907318115,
      "learning_rate": 0.0005392347515752551,
      "loss": 3.0249,
      "step": 47505
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6139230728149414,
      "learning_rate": 0.0005392322833645502,
      "loss": 2.9906,
      "step": 47506
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5880850553512573,
      "learning_rate": 0.0005392298151093674,
      "loss": 3.2009,
      "step": 47507
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.4030210971832275,
      "learning_rate": 0.0005392273468097072,
      "loss": 3.4679,
      "step": 47508
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6869503259658813,
      "learning_rate": 0.0005392248784655702,
      "loss": 3.0003,
      "step": 47509
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4482157230377197,
      "learning_rate": 0.0005392224100769568,
      "loss": 3.1,
      "step": 47510
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8389579057693481,
      "learning_rate": 0.0005392199416438674,
      "loss": 3.1315,
      "step": 47511
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9336693286895752,
      "learning_rate": 0.0005392174731663025,
      "loss": 2.7409,
      "step": 47512
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5689690113067627,
      "learning_rate": 0.0005392150046442625,
      "loss": 3.1841,
      "step": 47513
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.45279860496521,
      "learning_rate": 0.000539212536077748,
      "loss": 2.9032,
      "step": 47514
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.6472198963165283,
      "learning_rate": 0.0005392100674667593,
      "loss": 3.0094,
      "step": 47515
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3189823627471924,
      "learning_rate": 0.000539207598811297,
      "loss": 3.3408,
      "step": 47516
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5492678880691528,
      "learning_rate": 0.0005392051301113614,
      "loss": 3.1341,
      "step": 47517
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.274144411087036,
      "learning_rate": 0.0005392026613669532,
      "loss": 3.0796,
      "step": 47518
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2370240688323975,
      "learning_rate": 0.0005392001925780727,
      "loss": 2.993,
      "step": 47519
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4156537055969238,
      "learning_rate": 0.0005391977237447202,
      "loss": 3.0921,
      "step": 47520
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.076317310333252,
      "learning_rate": 0.0005391952548668965,
      "loss": 3.1583,
      "step": 47521
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8143678903579712,
      "learning_rate": 0.0005391927859446017,
      "loss": 3.0148,
      "step": 47522
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3786523342132568,
      "learning_rate": 0.0005391903169778366,
      "loss": 2.9752,
      "step": 47523
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5769104957580566,
      "learning_rate": 0.0005391878479666016,
      "loss": 2.9117,
      "step": 47524
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4452420473098755,
      "learning_rate": 0.0005391853789108968,
      "loss": 2.975,
      "step": 47525
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7032833099365234,
      "learning_rate": 0.000539182909810723,
      "loss": 2.8923,
      "step": 47526
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.563181161880493,
      "learning_rate": 0.0005391804406660807,
      "loss": 2.981,
      "step": 47527
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4694736003875732,
      "learning_rate": 0.0005391779714769702,
      "loss": 2.9918,
      "step": 47528
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8908730745315552,
      "learning_rate": 0.0005391755022433919,
      "loss": 3.2753,
      "step": 47529
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.4121134281158447,
      "learning_rate": 0.0005391730329653465,
      "loss": 2.7848,
      "step": 47530
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.2730376720428467,
      "learning_rate": 0.0005391705636428342,
      "loss": 3.1362,
      "step": 47531
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8277835845947266,
      "learning_rate": 0.0005391680942758556,
      "loss": 3.176,
      "step": 47532
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.749173402786255,
      "learning_rate": 0.0005391656248644112,
      "loss": 2.8438,
      "step": 47533
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9985613822937012,
      "learning_rate": 0.0005391631554085013,
      "loss": 2.9754,
      "step": 47534
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9914270639419556,
      "learning_rate": 0.0005391606859081265,
      "loss": 2.9501,
      "step": 47535
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6199628114700317,
      "learning_rate": 0.0005391582163632873,
      "loss": 3.0617,
      "step": 47536
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2658393383026123,
      "learning_rate": 0.000539155746773984,
      "loss": 2.8601,
      "step": 47537
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.669642686843872,
      "learning_rate": 0.0005391532771402171,
      "loss": 3.3165,
      "step": 47538
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.670506477355957,
      "learning_rate": 0.0005391508074619872,
      "loss": 3.1702,
      "step": 47539
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5777814388275146,
      "learning_rate": 0.0005391483377392946,
      "loss": 3.0042,
      "step": 47540
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9099925756454468,
      "learning_rate": 0.0005391458679721397,
      "loss": 2.8891,
      "step": 47541
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3795888423919678,
      "learning_rate": 0.0005391433981605233,
      "loss": 2.9998,
      "step": 47542
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9315459728240967,
      "learning_rate": 0.0005391409283044454,
      "loss": 3.1051,
      "step": 47543
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4838544130325317,
      "learning_rate": 0.0005391384584039067,
      "loss": 3.2348,
      "step": 47544
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0623724460601807,
      "learning_rate": 0.0005391359884589078,
      "loss": 3.227,
      "step": 47545
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6958327293395996,
      "learning_rate": 0.0005391335184694489,
      "loss": 3.1783,
      "step": 47546
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6858466863632202,
      "learning_rate": 0.0005391310484355306,
      "loss": 2.8742,
      "step": 47547
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.071803092956543,
      "learning_rate": 0.0005391285783571532,
      "loss": 3.0366,
      "step": 47548
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.5292506217956543,
      "learning_rate": 0.0005391261082343175,
      "loss": 3.1986,
      "step": 47549
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3886011838912964,
      "learning_rate": 0.0005391236380670237,
      "loss": 3.0515,
      "step": 47550
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7277603149414062,
      "learning_rate": 0.0005391211678552722,
      "loss": 2.8058,
      "step": 47551
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.28674054145813,
      "learning_rate": 0.0005391186975990636,
      "loss": 2.8731,
      "step": 47552
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.096435785293579,
      "learning_rate": 0.0005391162272983983,
      "loss": 3.1364,
      "step": 47553
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4728842973709106,
      "learning_rate": 0.0005391137569532768,
      "loss": 3.1267,
      "step": 47554
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0073704719543457,
      "learning_rate": 0.0005391112865636995,
      "loss": 3.1802,
      "step": 47555
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.434770941734314,
      "learning_rate": 0.0005391088161296669,
      "loss": 3.0503,
      "step": 47556
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8666869401931763,
      "learning_rate": 0.0005391063456511796,
      "loss": 2.7249,
      "step": 47557
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8302950859069824,
      "learning_rate": 0.0005391038751282378,
      "loss": 3.1098,
      "step": 47558
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6622285842895508,
      "learning_rate": 0.000539101404560842,
      "loss": 3.0615,
      "step": 47559
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2456440925598145,
      "learning_rate": 0.0005390989339489929,
      "loss": 2.9828,
      "step": 47560
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7256230115890503,
      "learning_rate": 0.0005390964632926908,
      "loss": 3.2603,
      "step": 47561
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6491779088974,
      "learning_rate": 0.000539093992591936,
      "loss": 3.1454,
      "step": 47562
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.260549783706665,
      "learning_rate": 0.0005390915218467291,
      "loss": 3.1972,
      "step": 47563
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.070887327194214,
      "learning_rate": 0.0005390890510570707,
      "loss": 3.1108,
      "step": 47564
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9089776277542114,
      "learning_rate": 0.0005390865802229611,
      "loss": 3.0011,
      "step": 47565
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4925591945648193,
      "learning_rate": 0.0005390841093444007,
      "loss": 3.2564,
      "step": 47566
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7876734733581543,
      "learning_rate": 0.0005390816384213901,
      "loss": 2.8769,
      "step": 47567
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.1894001960754395,
      "learning_rate": 0.0005390791674539299,
      "loss": 3.2591,
      "step": 47568
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5274237394332886,
      "learning_rate": 0.0005390766964420201,
      "loss": 2.9859,
      "step": 47569
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.382198452949524,
      "learning_rate": 0.0005390742253856616,
      "loss": 3.1211,
      "step": 47570
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.8289520740509033,
      "learning_rate": 0.0005390717542848546,
      "loss": 2.9795,
      "step": 47571
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7670280933380127,
      "learning_rate": 0.0005390692831395996,
      "loss": 3.1376,
      "step": 47572
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9186028242111206,
      "learning_rate": 0.0005390668119498973,
      "loss": 3.0201,
      "step": 47573
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.739915609359741,
      "learning_rate": 0.000539064340715748,
      "loss": 3.1941,
      "step": 47574
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.113210916519165,
      "learning_rate": 0.0005390618694371519,
      "loss": 2.8594,
      "step": 47575
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2439889907836914,
      "learning_rate": 0.0005390593981141096,
      "loss": 3.0308,
      "step": 47576
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.510678768157959,
      "learning_rate": 0.0005390569267466219,
      "loss": 2.8701,
      "step": 47577
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4088654518127441,
      "learning_rate": 0.000539054455334689,
      "loss": 2.9389,
      "step": 47578
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.649523377418518,
      "learning_rate": 0.0005390519838783112,
      "loss": 2.9897,
      "step": 47579
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3561668395996094,
      "learning_rate": 0.0005390495123774893,
      "loss": 3.1241,
      "step": 47580
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6370171308517456,
      "learning_rate": 0.0005390470408322234,
      "loss": 2.9878,
      "step": 47581
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8278274536132812,
      "learning_rate": 0.0005390445692425143,
      "loss": 3.0914,
      "step": 47582
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5920240879058838,
      "learning_rate": 0.0005390420976083622,
      "loss": 2.961,
      "step": 47583
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7506951093673706,
      "learning_rate": 0.0005390396259297677,
      "loss": 3.3426,
      "step": 47584
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9017335176467896,
      "learning_rate": 0.0005390371542067313,
      "loss": 3.1003,
      "step": 47585
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9720169305801392,
      "learning_rate": 0.0005390346824392532,
      "loss": 2.7935,
      "step": 47586
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.454512596130371,
      "learning_rate": 0.0005390322106273341,
      "loss": 3.0211,
      "step": 47587
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9800528287887573,
      "learning_rate": 0.0005390297387709744,
      "loss": 3.0496,
      "step": 47588
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.568373680114746,
      "learning_rate": 0.0005390272668701747,
      "loss": 2.7786,
      "step": 47589
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4658974409103394,
      "learning_rate": 0.0005390247949249352,
      "loss": 3.1491,
      "step": 47590
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8918429613113403,
      "learning_rate": 0.0005390223229352564,
      "loss": 3.0472,
      "step": 47591
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6720068454742432,
      "learning_rate": 0.000539019850901139,
      "loss": 3.0398,
      "step": 47592
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5660358667373657,
      "learning_rate": 0.0005390173788225833,
      "loss": 2.8643,
      "step": 47593
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.12783145904541,
      "learning_rate": 0.0005390149066995896,
      "loss": 3.0655,
      "step": 47594
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.780163049697876,
      "learning_rate": 0.0005390124345321586,
      "loss": 3.0151,
      "step": 47595
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0075278282165527,
      "learning_rate": 0.0005390099623202907,
      "loss": 3.214,
      "step": 47596
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0696001052856445,
      "learning_rate": 0.0005390074900639863,
      "loss": 3.1124,
      "step": 47597
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7489514350891113,
      "learning_rate": 0.0005390050177632459,
      "loss": 2.9971,
      "step": 47598
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0680744647979736,
      "learning_rate": 0.00053900254541807,
      "loss": 3.0459,
      "step": 47599
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4200201034545898,
      "learning_rate": 0.000539000073028459,
      "loss": 3.2677,
      "step": 47600
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.034939765930176,
      "learning_rate": 0.0005389976005944134,
      "loss": 3.213,
      "step": 47601
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3163015842437744,
      "learning_rate": 0.0005389951281159336,
      "loss": 2.9128,
      "step": 47602
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.4892570972442627,
      "learning_rate": 0.00053899265559302,
      "loss": 3.09,
      "step": 47603
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8470438718795776,
      "learning_rate": 0.0005389901830256732,
      "loss": 3.2883,
      "step": 47604
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.580366849899292,
      "learning_rate": 0.0005389877104138937,
      "loss": 2.8784,
      "step": 47605
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.153397798538208,
      "learning_rate": 0.0005389852377576819,
      "loss": 2.7872,
      "step": 47606
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8529880046844482,
      "learning_rate": 0.0005389827650570381,
      "loss": 2.8575,
      "step": 47607
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.069417715072632,
      "learning_rate": 0.0005389802923119629,
      "loss": 3.023,
      "step": 47608
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5123788118362427,
      "learning_rate": 0.000538977819522457,
      "loss": 3.1327,
      "step": 47609
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9424620866775513,
      "learning_rate": 0.0005389753466885203,
      "loss": 2.8904,
      "step": 47610
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6097067594528198,
      "learning_rate": 0.0005389728738101538,
      "loss": 3.1923,
      "step": 47611
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5626945495605469,
      "learning_rate": 0.0005389704008873575,
      "loss": 2.9619,
      "step": 47612
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9973047971725464,
      "learning_rate": 0.0005389679279201322,
      "loss": 2.9844,
      "step": 47613
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.785343050956726,
      "learning_rate": 0.0005389654549084784,
      "loss": 2.7586,
      "step": 47614
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9894849061965942,
      "learning_rate": 0.0005389629818523962,
      "loss": 3.0643,
      "step": 47615
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5084278583526611,
      "learning_rate": 0.0005389605087518863,
      "loss": 2.8985,
      "step": 47616
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6083248853683472,
      "learning_rate": 0.0005389580356069492,
      "loss": 2.9067,
      "step": 47617
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.620169162750244,
      "learning_rate": 0.0005389555624175853,
      "loss": 3.0571,
      "step": 47618
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5525448322296143,
      "learning_rate": 0.000538953089183795,
      "loss": 3.1251,
      "step": 47619
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0707108974456787,
      "learning_rate": 0.0005389506159055789,
      "loss": 3.1939,
      "step": 47620
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9831000566482544,
      "learning_rate": 0.0005389481425829372,
      "loss": 2.9644,
      "step": 47621
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.592592477798462,
      "learning_rate": 0.0005389456692158707,
      "loss": 3.0582,
      "step": 47622
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.463600754737854,
      "learning_rate": 0.0005389431958043796,
      "loss": 2.9264,
      "step": 47623
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.637294054031372,
      "learning_rate": 0.0005389407223484645,
      "loss": 2.9188,
      "step": 47624
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.3054773807525635,
      "learning_rate": 0.0005389382488481258,
      "loss": 3.0326,
      "step": 47625
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4758422374725342,
      "learning_rate": 0.0005389357753033639,
      "loss": 2.9778,
      "step": 47626
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.719892144203186,
      "learning_rate": 0.0005389333017141794,
      "loss": 3.0746,
      "step": 47627
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2337160110473633,
      "learning_rate": 0.0005389308280805726,
      "loss": 2.9722,
      "step": 47628
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.9952173233032227,
      "learning_rate": 0.0005389283544025441,
      "loss": 3.1014,
      "step": 47629
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3084590435028076,
      "learning_rate": 0.0005389258806800944,
      "loss": 2.7566,
      "step": 47630
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7099474668502808,
      "learning_rate": 0.0005389234069132238,
      "loss": 3.0562,
      "step": 47631
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.648024559020996,
      "learning_rate": 0.0005389209331019328,
      "loss": 2.7666,
      "step": 47632
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8680830001831055,
      "learning_rate": 0.0005389184592462219,
      "loss": 2.9977,
      "step": 47633
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9650182723999023,
      "learning_rate": 0.0005389159853460917,
      "loss": 2.7146,
      "step": 47634
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6454083919525146,
      "learning_rate": 0.0005389135114015423,
      "loss": 3.1594,
      "step": 47635
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.232283353805542,
      "learning_rate": 0.0005389110374125744,
      "loss": 3.0381,
      "step": 47636
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3808932304382324,
      "learning_rate": 0.0005389085633791884,
      "loss": 2.9063,
      "step": 47637
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8069473505020142,
      "learning_rate": 0.0005389060893013849,
      "loss": 3.0891,
      "step": 47638
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.246781826019287,
      "learning_rate": 0.0005389036151791642,
      "loss": 3.1933,
      "step": 47639
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5690858364105225,
      "learning_rate": 0.0005389011410125268,
      "loss": 3.1702,
      "step": 47640
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.4367616176605225,
      "learning_rate": 0.0005388986668014732,
      "loss": 3.1744,
      "step": 47641
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4827793836593628,
      "learning_rate": 0.0005388961925460039,
      "loss": 3.0206,
      "step": 47642
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9492639303207397,
      "learning_rate": 0.0005388937182461191,
      "loss": 2.9306,
      "step": 47643
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7752535343170166,
      "learning_rate": 0.0005388912439018195,
      "loss": 2.871,
      "step": 47644
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3587627410888672,
      "learning_rate": 0.0005388887695131055,
      "loss": 2.981,
      "step": 47645
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7384936809539795,
      "learning_rate": 0.0005388862950799777,
      "loss": 3.0918,
      "step": 47646
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5533145666122437,
      "learning_rate": 0.0005388838206024364,
      "loss": 3.1406,
      "step": 47647
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.597041130065918,
      "learning_rate": 0.0005388813460804819,
      "loss": 3.1088,
      "step": 47648
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4156841039657593,
      "learning_rate": 0.0005388788715141151,
      "loss": 3.1633,
      "step": 47649
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4813822507858276,
      "learning_rate": 0.000538876396903336,
      "loss": 3.3628,
      "step": 47650
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5566223859786987,
      "learning_rate": 0.0005388739222481454,
      "loss": 2.9068,
      "step": 47651
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.48513662815094,
      "learning_rate": 0.0005388714475485437,
      "loss": 3.1297,
      "step": 47652
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6654127836227417,
      "learning_rate": 0.0005388689728045311,
      "loss": 3.0498,
      "step": 47653
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4846084117889404,
      "learning_rate": 0.0005388664980161083,
      "loss": 3.0941,
      "step": 47654
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.356400728225708,
      "learning_rate": 0.0005388640231832757,
      "loss": 2.9479,
      "step": 47655
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6912027597427368,
      "learning_rate": 0.0005388615483060339,
      "loss": 3.0083,
      "step": 47656
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.280045747756958,
      "learning_rate": 0.0005388590733843831,
      "loss": 2.9957,
      "step": 47657
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5651363134384155,
      "learning_rate": 0.000538856598418324,
      "loss": 3.1341,
      "step": 47658
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5622799396514893,
      "learning_rate": 0.0005388541234078568,
      "loss": 3.0989,
      "step": 47659
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.471567153930664,
      "learning_rate": 0.0005388516483529822,
      "loss": 3.0192,
      "step": 47660
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8560985326766968,
      "learning_rate": 0.0005388491732537005,
      "loss": 2.9103,
      "step": 47661
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5928312540054321,
      "learning_rate": 0.0005388466981100123,
      "loss": 2.9561,
      "step": 47662
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5125094652175903,
      "learning_rate": 0.000538844222921918,
      "loss": 2.771,
      "step": 47663
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5375844240188599,
      "learning_rate": 0.0005388417476894181,
      "loss": 3.0071,
      "step": 47664
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9225554466247559,
      "learning_rate": 0.0005388392724125128,
      "loss": 3.0777,
      "step": 47665
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.905333399772644,
      "learning_rate": 0.0005388367970912029,
      "loss": 3.0114,
      "step": 47666
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6056989431381226,
      "learning_rate": 0.0005388343217254888,
      "loss": 3.12,
      "step": 47667
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6186611652374268,
      "learning_rate": 0.0005388318463153708,
      "loss": 2.8958,
      "step": 47668
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.464911937713623,
      "learning_rate": 0.0005388293708608495,
      "loss": 3.0097,
      "step": 47669
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2853577136993408,
      "learning_rate": 0.0005388268953619253,
      "loss": 3.0722,
      "step": 47670
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.236422061920166,
      "learning_rate": 0.0005388244198185986,
      "loss": 3.1538,
      "step": 47671
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6803369522094727,
      "learning_rate": 0.00053882194423087,
      "loss": 2.9581,
      "step": 47672
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0811119079589844,
      "learning_rate": 0.00053881946859874,
      "loss": 3.1536,
      "step": 47673
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6970735788345337,
      "learning_rate": 0.0005388169929222088,
      "loss": 3.1506,
      "step": 47674
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5402075052261353,
      "learning_rate": 0.000538814517201277,
      "loss": 3.0969,
      "step": 47675
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.6834664344787598,
      "learning_rate": 0.0005388120414359452,
      "loss": 3.0035,
      "step": 47676
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8532429933547974,
      "learning_rate": 0.0005388095656262137,
      "loss": 2.9528,
      "step": 47677
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7579201459884644,
      "learning_rate": 0.0005388070897720829,
      "loss": 3.1794,
      "step": 47678
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6606236696243286,
      "learning_rate": 0.0005388046138735534,
      "loss": 3.1118,
      "step": 47679
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.2238645553588867,
      "learning_rate": 0.0005388021379306256,
      "loss": 3.0221,
      "step": 47680
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2360849380493164,
      "learning_rate": 0.0005387996619433,
      "loss": 3.1321,
      "step": 47681
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3429337739944458,
      "learning_rate": 0.0005387971859115771,
      "loss": 3.1185,
      "step": 47682
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6192740201950073,
      "learning_rate": 0.0005387947098354572,
      "loss": 3.1371,
      "step": 47683
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9241482019424438,
      "learning_rate": 0.0005387922337149408,
      "loss": 3.1742,
      "step": 47684
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0016307830810547,
      "learning_rate": 0.0005387897575500286,
      "loss": 3.23,
      "step": 47685
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.670458436012268,
      "learning_rate": 0.0005387872813407208,
      "loss": 3.0845,
      "step": 47686
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6054061651229858,
      "learning_rate": 0.0005387848050870179,
      "loss": 2.918,
      "step": 47687
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3952330350875854,
      "learning_rate": 0.0005387823287889204,
      "loss": 3.1353,
      "step": 47688
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6532564163208008,
      "learning_rate": 0.0005387798524464287,
      "loss": 3.0842,
      "step": 47689
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5805792808532715,
      "learning_rate": 0.0005387773760595435,
      "loss": 3.2122,
      "step": 47690
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.475618839263916,
      "learning_rate": 0.0005387748996282649,
      "loss": 3.0508,
      "step": 47691
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9006260633468628,
      "learning_rate": 0.0005387724231525937,
      "loss": 2.9268,
      "step": 47692
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0754151344299316,
      "learning_rate": 0.0005387699466325301,
      "loss": 3.0146,
      "step": 47693
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7694717645645142,
      "learning_rate": 0.0005387674700680745,
      "loss": 3.1657,
      "step": 47694
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7045807838439941,
      "learning_rate": 0.0005387649934592277,
      "loss": 2.9248,
      "step": 47695
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3624647855758667,
      "learning_rate": 0.0005387625168059901,
      "loss": 3.2349,
      "step": 47696
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5798529386520386,
      "learning_rate": 0.0005387600401083618,
      "loss": 3.0814,
      "step": 47697
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7778129577636719,
      "learning_rate": 0.0005387575633663437,
      "loss": 2.7439,
      "step": 47698
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0425169467926025,
      "learning_rate": 0.0005387550865799358,
      "loss": 2.9344,
      "step": 47699
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6279377937316895,
      "learning_rate": 0.0005387526097491391,
      "loss": 3.0791,
      "step": 47700
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8607122898101807,
      "learning_rate": 0.0005387501328739537,
      "loss": 2.679,
      "step": 47701
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.446256399154663,
      "learning_rate": 0.0005387476559543801,
      "loss": 3.0032,
      "step": 47702
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5117359161376953,
      "learning_rate": 0.0005387451789904188,
      "loss": 2.7304,
      "step": 47703
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5696207284927368,
      "learning_rate": 0.0005387427019820703,
      "loss": 3.0466,
      "step": 47704
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6945260763168335,
      "learning_rate": 0.000538740224929335,
      "loss": 2.9539,
      "step": 47705
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.43031644821167,
      "learning_rate": 0.0005387377478322134,
      "loss": 3.199,
      "step": 47706
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.5987370014190674,
      "learning_rate": 0.000538735270690706,
      "loss": 2.9355,
      "step": 47707
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0122861862182617,
      "learning_rate": 0.0005387327935048132,
      "loss": 2.8572,
      "step": 47708
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3555371761322021,
      "learning_rate": 0.0005387303162745354,
      "loss": 2.8476,
      "step": 47709
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.825225591659546,
      "learning_rate": 0.0005387278389998732,
      "loss": 2.9151,
      "step": 47710
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3184722661972046,
      "learning_rate": 0.0005387253616808269,
      "loss": 3.0596,
      "step": 47711
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.461672306060791,
      "learning_rate": 0.000538722884317397,
      "loss": 3.0829,
      "step": 47712
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0754284858703613,
      "learning_rate": 0.0005387204069095842,
      "loss": 3.0458,
      "step": 47713
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4754842519760132,
      "learning_rate": 0.0005387179294573885,
      "loss": 2.9183,
      "step": 47714
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4985840320587158,
      "learning_rate": 0.000538715451960811,
      "loss": 2.887,
      "step": 47715
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6319820880889893,
      "learning_rate": 0.0005387129744198514,
      "loss": 3.1101,
      "step": 47716
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.593936562538147,
      "learning_rate": 0.0005387104968345109,
      "loss": 2.994,
      "step": 47717
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.497599720954895,
      "learning_rate": 0.0005387080192047893,
      "loss": 2.8914,
      "step": 47718
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8433465957641602,
      "learning_rate": 0.0005387055415306876,
      "loss": 3.0607,
      "step": 47719
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.831937313079834,
      "learning_rate": 0.0005387030638122059,
      "loss": 3.12,
      "step": 47720
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4785523414611816,
      "learning_rate": 0.0005387005860493449,
      "loss": 3.1509,
      "step": 47721
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.010599136352539,
      "learning_rate": 0.000538698108242105,
      "loss": 3.0314,
      "step": 47722
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5734813213348389,
      "learning_rate": 0.0005386956303904864,
      "loss": 3.0691,
      "step": 47723
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5412390232086182,
      "learning_rate": 0.0005386931524944899,
      "loss": 3.3358,
      "step": 47724
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7001152038574219,
      "learning_rate": 0.0005386906745541159,
      "loss": 2.9518,
      "step": 47725
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.1260693073272705,
      "learning_rate": 0.0005386881965693647,
      "loss": 2.9629,
      "step": 47726
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3562026023864746,
      "learning_rate": 0.0005386857185402369,
      "loss": 2.9842,
      "step": 47727
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.151711940765381,
      "learning_rate": 0.0005386832404667329,
      "loss": 2.9974,
      "step": 47728
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8545663356781006,
      "learning_rate": 0.0005386807623488531,
      "loss": 3.2112,
      "step": 47729
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3802950382232666,
      "learning_rate": 0.0005386782841865982,
      "loss": 3.0144,
      "step": 47730
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5074162483215332,
      "learning_rate": 0.0005386758059799685,
      "loss": 2.9917,
      "step": 47731
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5328125953674316,
      "learning_rate": 0.0005386733277289643,
      "loss": 2.9052,
      "step": 47732
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6037307977676392,
      "learning_rate": 0.0005386708494335864,
      "loss": 2.873,
      "step": 47733
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3276801109313965,
      "learning_rate": 0.000538668371093835,
      "loss": 3.1961,
      "step": 47734
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5734803676605225,
      "learning_rate": 0.0005386658927097106,
      "loss": 3.0623,
      "step": 47735
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7273790836334229,
      "learning_rate": 0.0005386634142812138,
      "loss": 3.2504,
      "step": 47736
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6265661716461182,
      "learning_rate": 0.0005386609358083448,
      "loss": 2.9224,
      "step": 47737
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3076798915863037,
      "learning_rate": 0.0005386584572911045,
      "loss": 3.1517,
      "step": 47738
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6349809169769287,
      "learning_rate": 0.0005386559787294929,
      "loss": 2.9001,
      "step": 47739
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4482357501983643,
      "learning_rate": 0.0005386535001235107,
      "loss": 2.788,
      "step": 47740
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3568683862686157,
      "learning_rate": 0.0005386510214731583,
      "loss": 2.917,
      "step": 47741
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.570815086364746,
      "learning_rate": 0.0005386485427784362,
      "loss": 2.9444,
      "step": 47742
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6108770370483398,
      "learning_rate": 0.0005386460640393447,
      "loss": 3.2747,
      "step": 47743
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.535069227218628,
      "learning_rate": 0.0005386435852558844,
      "loss": 2.7226,
      "step": 47744
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.622804880142212,
      "learning_rate": 0.000538641106428056,
      "loss": 3.1473,
      "step": 47745
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9822319746017456,
      "learning_rate": 0.0005386386275558595,
      "loss": 3.0537,
      "step": 47746
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.467446208000183,
      "learning_rate": 0.0005386361486392956,
      "loss": 3.0065,
      "step": 47747
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6417068243026733,
      "learning_rate": 0.0005386336696783648,
      "loss": 2.7576,
      "step": 47748
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4283818006515503,
      "learning_rate": 0.0005386311906730674,
      "loss": 2.9834,
      "step": 47749
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4309288263320923,
      "learning_rate": 0.0005386287116234041,
      "loss": 2.9647,
      "step": 47750
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4250956773757935,
      "learning_rate": 0.0005386262325293752,
      "loss": 3.065,
      "step": 47751
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9264111518859863,
      "learning_rate": 0.000538623753390981,
      "loss": 3.1199,
      "step": 47752
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6854337453842163,
      "learning_rate": 0.0005386212742082223,
      "loss": 2.9948,
      "step": 47753
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3820539712905884,
      "learning_rate": 0.0005386187949810994,
      "loss": 2.9638,
      "step": 47754
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5229135751724243,
      "learning_rate": 0.0005386163157096126,
      "loss": 3.1701,
      "step": 47755
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3965116739273071,
      "learning_rate": 0.0005386138363937627,
      "loss": 2.8993,
      "step": 47756
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.276802659034729,
      "learning_rate": 0.0005386113570335499,
      "loss": 3.1563,
      "step": 47757
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5964499711990356,
      "learning_rate": 0.0005386088776289748,
      "loss": 2.9909,
      "step": 47758
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.757455348968506,
      "learning_rate": 0.0005386063981800377,
      "loss": 3.0616,
      "step": 47759
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0816166400909424,
      "learning_rate": 0.0005386039186867393,
      "loss": 3.1142,
      "step": 47760
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.330779790878296,
      "learning_rate": 0.0005386014391490798,
      "loss": 3.179,
      "step": 47761
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.06061053276062,
      "learning_rate": 0.0005385989595670599,
      "loss": 2.9135,
      "step": 47762
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6710084676742554,
      "learning_rate": 0.0005385964799406799,
      "loss": 3.2187,
      "step": 47763
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5402792692184448,
      "learning_rate": 0.0005385940002699403,
      "loss": 3.1254,
      "step": 47764
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3431236743927002,
      "learning_rate": 0.0005385915205548416,
      "loss": 3.0548,
      "step": 47765
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6212623119354248,
      "learning_rate": 0.0005385890407953842,
      "loss": 3.4142,
      "step": 47766
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.887967824935913,
      "learning_rate": 0.0005385865609915686,
      "loss": 2.9379,
      "step": 47767
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.522653341293335,
      "learning_rate": 0.0005385840811433953,
      "loss": 3.0639,
      "step": 47768
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5213450193405151,
      "learning_rate": 0.0005385816012508647,
      "loss": 3.1035,
      "step": 47769
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0650203227996826,
      "learning_rate": 0.0005385791213139773,
      "loss": 3.1516,
      "step": 47770
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.870530366897583,
      "learning_rate": 0.0005385766413327335,
      "loss": 2.9773,
      "step": 47771
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5576380491256714,
      "learning_rate": 0.0005385741613071338,
      "loss": 2.7722,
      "step": 47772
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4088486433029175,
      "learning_rate": 0.0005385716812371786,
      "loss": 3.2302,
      "step": 47773
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7133209705352783,
      "learning_rate": 0.0005385692011228685,
      "loss": 2.9241,
      "step": 47774
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3975555896759033,
      "learning_rate": 0.0005385667209642038,
      "loss": 3.0563,
      "step": 47775
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4105029106140137,
      "learning_rate": 0.0005385642407611851,
      "loss": 3.2574,
      "step": 47776
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8651293516159058,
      "learning_rate": 0.0005385617605138129,
      "loss": 2.9684,
      "step": 47777
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0604546070098877,
      "learning_rate": 0.0005385592802220875,
      "loss": 2.8724,
      "step": 47778
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.3939075469970703,
      "learning_rate": 0.0005385567998860094,
      "loss": 2.9156,
      "step": 47779
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.427704095840454,
      "learning_rate": 0.000538554319505579,
      "loss": 3.0684,
      "step": 47780
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9809143543243408,
      "learning_rate": 0.000538551839080797,
      "loss": 2.9684,
      "step": 47781
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.593294620513916,
      "learning_rate": 0.0005385493586116635,
      "loss": 3.2085,
      "step": 47782
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.550524353981018,
      "learning_rate": 0.0005385468780981794,
      "loss": 3.1491,
      "step": 47783
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6511285305023193,
      "learning_rate": 0.0005385443975403449,
      "loss": 3.2019,
      "step": 47784
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.355414867401123,
      "learning_rate": 0.0005385419169381604,
      "loss": 3.1358,
      "step": 47785
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5527373552322388,
      "learning_rate": 0.0005385394362916265,
      "loss": 3.3676,
      "step": 47786
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4080342054367065,
      "learning_rate": 0.0005385369556007437,
      "loss": 3.1402,
      "step": 47787
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.157733201980591,
      "learning_rate": 0.0005385344748655123,
      "loss": 3.2113,
      "step": 47788
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.365638494491577,
      "learning_rate": 0.0005385319940859328,
      "loss": 2.9776,
      "step": 47789
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3976051807403564,
      "learning_rate": 0.0005385295132620056,
      "loss": 3.1118,
      "step": 47790
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.922913670539856,
      "learning_rate": 0.0005385270323937315,
      "loss": 3.1252,
      "step": 47791
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.698158621788025,
      "learning_rate": 0.0005385245514811106,
      "loss": 3.1965,
      "step": 47792
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4859274625778198,
      "learning_rate": 0.0005385220705241434,
      "loss": 3.1662,
      "step": 47793
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4118919372558594,
      "learning_rate": 0.0005385195895228306,
      "loss": 2.8778,
      "step": 47794
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.923502206802368,
      "learning_rate": 0.0005385171084771725,
      "loss": 2.9846,
      "step": 47795
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7450861930847168,
      "learning_rate": 0.0005385146273871695,
      "loss": 2.9297,
      "step": 47796
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9008983373641968,
      "learning_rate": 0.0005385121462528221,
      "loss": 3.1873,
      "step": 47797
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8807721138000488,
      "learning_rate": 0.0005385096650741308,
      "loss": 3.1792,
      "step": 47798
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.548884153366089,
      "learning_rate": 0.0005385071838510962,
      "loss": 2.8455,
      "step": 47799
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.4904356002807617,
      "learning_rate": 0.0005385047025837184,
      "loss": 3.2328,
      "step": 47800
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.6626906394958496,
      "learning_rate": 0.0005385022212719983,
      "loss": 2.8645,
      "step": 47801
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8066068887710571,
      "learning_rate": 0.0005384997399159359,
      "loss": 2.8312,
      "step": 47802
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.556462287902832,
      "learning_rate": 0.000538497258515532,
      "loss": 3.1176,
      "step": 47803
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0224058628082275,
      "learning_rate": 0.000538494777070787,
      "loss": 2.9944,
      "step": 47804
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.265934467315674,
      "learning_rate": 0.0005384922955817013,
      "loss": 2.9381,
      "step": 47805
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4548085927963257,
      "learning_rate": 0.0005384898140482754,
      "loss": 3.124,
      "step": 47806
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3551796674728394,
      "learning_rate": 0.0005384873324705096,
      "loss": 3.1499,
      "step": 47807
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5971121788024902,
      "learning_rate": 0.0005384848508484046,
      "loss": 2.9088,
      "step": 47808
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4452646970748901,
      "learning_rate": 0.0005384823691819608,
      "loss": 3.1224,
      "step": 47809
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0056302547454834,
      "learning_rate": 0.0005384798874711786,
      "loss": 3.3112,
      "step": 47810
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.437308430671692,
      "learning_rate": 0.0005384774057160585,
      "loss": 3.0493,
      "step": 47811
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.971837043762207,
      "learning_rate": 0.0005384749239166008,
      "loss": 3.0165,
      "step": 47812
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4257537126541138,
      "learning_rate": 0.0005384724420728063,
      "loss": 2.9737,
      "step": 47813
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.673897385597229,
      "learning_rate": 0.0005384699601846752,
      "loss": 2.8356,
      "step": 47814
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0018177032470703,
      "learning_rate": 0.0005384674782522081,
      "loss": 3.1094,
      "step": 47815
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3901365995407104,
      "learning_rate": 0.0005384649962754054,
      "loss": 3.0706,
      "step": 47816
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4490913152694702,
      "learning_rate": 0.0005384625142542674,
      "loss": 3.2234,
      "step": 47817
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6521998643875122,
      "learning_rate": 0.0005384600321887948,
      "loss": 3.055,
      "step": 47818
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4071940183639526,
      "learning_rate": 0.000538457550078988,
      "loss": 2.9048,
      "step": 47819
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6079800128936768,
      "learning_rate": 0.0005384550679248474,
      "loss": 2.9898,
      "step": 47820
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7092552185058594,
      "learning_rate": 0.0005384525857263736,
      "loss": 2.9085,
      "step": 47821
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5289936065673828,
      "learning_rate": 0.0005384501034835669,
      "loss": 3.3094,
      "step": 47822
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.227576732635498,
      "learning_rate": 0.0005384476211964278,
      "loss": 3.1946,
      "step": 47823
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4914196729660034,
      "learning_rate": 0.0005384451388649568,
      "loss": 3.1588,
      "step": 47824
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4039220809936523,
      "learning_rate": 0.0005384426564891544,
      "loss": 3.0831,
      "step": 47825
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3678736686706543,
      "learning_rate": 0.0005384401740690209,
      "loss": 3.3932,
      "step": 47826
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.427965760231018,
      "learning_rate": 0.0005384376916045569,
      "loss": 3.0114,
      "step": 47827
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6331971883773804,
      "learning_rate": 0.0005384352090957629,
      "loss": 3.0769,
      "step": 47828
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.54070246219635,
      "learning_rate": 0.0005384327265426392,
      "loss": 2.8031,
      "step": 47829
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.535251259803772,
      "learning_rate": 0.0005384302439451864,
      "loss": 2.8307,
      "step": 47830
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5088714361190796,
      "learning_rate": 0.0005384277613034049,
      "loss": 3.2436,
      "step": 47831
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5676007270812988,
      "learning_rate": 0.0005384252786172954,
      "loss": 3.0703,
      "step": 47832
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4596565961837769,
      "learning_rate": 0.0005384227958868578,
      "loss": 3.165,
      "step": 47833
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6327245235443115,
      "learning_rate": 0.0005384203131120932,
      "loss": 3.1533,
      "step": 47834
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.232532024383545,
      "learning_rate": 0.0005384178302930016,
      "loss": 3.1046,
      "step": 47835
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5136605501174927,
      "learning_rate": 0.0005384153474295836,
      "loss": 3.0108,
      "step": 47836
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.713731288909912,
      "learning_rate": 0.0005384128645218397,
      "loss": 3.0118,
      "step": 47837
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.97225821018219,
      "learning_rate": 0.0005384103815697704,
      "loss": 3.1147,
      "step": 47838
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4988510608673096,
      "learning_rate": 0.000538407898573376,
      "loss": 3.0086,
      "step": 47839
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.286872386932373,
      "learning_rate": 0.0005384054155326572,
      "loss": 2.7032,
      "step": 47840
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.054685115814209,
      "learning_rate": 0.0005384029324476142,
      "loss": 2.8733,
      "step": 47841
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.67084538936615,
      "learning_rate": 0.0005384004493182477,
      "loss": 3.1857,
      "step": 47842
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.766430377960205,
      "learning_rate": 0.0005383979661445582,
      "loss": 2.9558,
      "step": 47843
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0142083168029785,
      "learning_rate": 0.0005383954829265458,
      "loss": 3.2503,
      "step": 47844
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7649803161621094,
      "learning_rate": 0.0005383929996642113,
      "loss": 3.1574,
      "step": 47845
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5685209035873413,
      "learning_rate": 0.0005383905163575548,
      "loss": 3.1491,
      "step": 47846
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.878597378730774,
      "learning_rate": 0.0005383880330065773,
      "loss": 2.862,
      "step": 47847
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2125325202941895,
      "learning_rate": 0.0005383855496112789,
      "loss": 2.9769,
      "step": 47848
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.336884021759033,
      "learning_rate": 0.00053838306617166,
      "loss": 3.0944,
      "step": 47849
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0566344261169434,
      "learning_rate": 0.0005383805826877212,
      "loss": 2.8411,
      "step": 47850
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.18745756149292,
      "learning_rate": 0.0005383780991594631,
      "loss": 3.3576,
      "step": 47851
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.6599743366241455,
      "learning_rate": 0.0005383756155868859,
      "loss": 2.9754,
      "step": 47852
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9415849447250366,
      "learning_rate": 0.0005383731319699901,
      "loss": 3.1476,
      "step": 47853
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.4386978149414062,
      "learning_rate": 0.0005383706483087764,
      "loss": 3.1426,
      "step": 47854
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.673964262008667,
      "learning_rate": 0.000538368164603245,
      "loss": 3.182,
      "step": 47855
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.003772020339966,
      "learning_rate": 0.0005383656808533966,
      "loss": 3.1965,
      "step": 47856
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.095364809036255,
      "learning_rate": 0.0005383631970592313,
      "loss": 2.985,
      "step": 47857
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8287572860717773,
      "learning_rate": 0.0005383607132207499,
      "loss": 3.3524,
      "step": 47858
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.06166672706604,
      "learning_rate": 0.0005383582293379528,
      "loss": 3.1089,
      "step": 47859
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2245850563049316,
      "learning_rate": 0.0005383557454108403,
      "loss": 3.1963,
      "step": 47860
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.904359221458435,
      "learning_rate": 0.000538353261439413,
      "loss": 3.0536,
      "step": 47861
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8298661708831787,
      "learning_rate": 0.0005383507774236713,
      "loss": 2.9944,
      "step": 47862
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.434450387954712,
      "learning_rate": 0.0005383482933636158,
      "loss": 3.2216,
      "step": 47863
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.728928804397583,
      "learning_rate": 0.0005383458092592467,
      "loss": 3.0047,
      "step": 47864
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.55964994430542,
      "learning_rate": 0.0005383433251105647,
      "loss": 2.8589,
      "step": 47865
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.53829824924469,
      "learning_rate": 0.0005383408409175702,
      "loss": 3.0115,
      "step": 47866
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.300870656967163,
      "learning_rate": 0.0005383383566802636,
      "loss": 3.074,
      "step": 47867
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4265718460083008,
      "learning_rate": 0.0005383358723986454,
      "loss": 2.9613,
      "step": 47868
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0218329429626465,
      "learning_rate": 0.0005383333880727161,
      "loss": 3.0905,
      "step": 47869
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2858755588531494,
      "learning_rate": 0.0005383309037024761,
      "loss": 2.9904,
      "step": 47870
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3227049112319946,
      "learning_rate": 0.0005383284192879258,
      "loss": 2.9174,
      "step": 47871
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4735360145568848,
      "learning_rate": 0.0005383259348290659,
      "loss": 3.1396,
      "step": 47872
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4511467218399048,
      "learning_rate": 0.0005383234503258966,
      "loss": 2.8556,
      "step": 47873
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.2323811054229736,
      "learning_rate": 0.0005383209657784186,
      "loss": 2.7786,
      "step": 47874
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2401676177978516,
      "learning_rate": 0.0005383184811866321,
      "loss": 3.0462,
      "step": 47875
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.578301191329956,
      "learning_rate": 0.0005383159965505378,
      "loss": 3.1349,
      "step": 47876
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.5027058124542236,
      "learning_rate": 0.000538313511870136,
      "loss": 3.0111,
      "step": 47877
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.099665403366089,
      "learning_rate": 0.0005383110271454272,
      "loss": 3.227,
      "step": 47878
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4598608016967773,
      "learning_rate": 0.0005383085423764119,
      "loss": 3.0617,
      "step": 47879
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.698888063430786,
      "learning_rate": 0.0005383060575630906,
      "loss": 3.0795,
      "step": 47880
    },
    {
      "epoch": 0.62,
      "grad_norm": 3.2946176528930664,
      "learning_rate": 0.0005383035727054635,
      "loss": 3.4681,
      "step": 47881
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.5306904315948486,
      "learning_rate": 0.0005383010878035315,
      "loss": 2.9912,
      "step": 47882
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4445055723190308,
      "learning_rate": 0.0005382986028572948,
      "loss": 3.2863,
      "step": 47883
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.255629539489746,
      "learning_rate": 0.0005382961178667539,
      "loss": 3.2177,
      "step": 47884
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.8326754570007324,
      "learning_rate": 0.0005382936328319091,
      "loss": 3.1949,
      "step": 47885
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.5315964221954346,
      "learning_rate": 0.0005382911477527611,
      "loss": 3.0344,
      "step": 47886
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3209105730056763,
      "learning_rate": 0.0005382886626293103,
      "loss": 3.1574,
      "step": 47887
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.4076778888702393,
      "learning_rate": 0.0005382861774615571,
      "loss": 3.1688,
      "step": 47888
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.2872490882873535,
      "learning_rate": 0.000538283692249502,
      "loss": 2.9961,
      "step": 47889
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9267271757125854,
      "learning_rate": 0.0005382812069931455,
      "loss": 3.3033,
      "step": 47890
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.443109393119812,
      "learning_rate": 0.0005382787216924881,
      "loss": 2.995,
      "step": 47891
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.698387861251831,
      "learning_rate": 0.0005382762363475301,
      "loss": 3.2547,
      "step": 47892
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8054146766662598,
      "learning_rate": 0.000538273750958272,
      "loss": 2.8091,
      "step": 47893
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6741899251937866,
      "learning_rate": 0.0005382712655247144,
      "loss": 3.2681,
      "step": 47894
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5980851650238037,
      "learning_rate": 0.0005382687800468577,
      "loss": 2.9614,
      "step": 47895
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7264738082885742,
      "learning_rate": 0.0005382662945247022,
      "loss": 3.0645,
      "step": 47896
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7309156656265259,
      "learning_rate": 0.0005382638089582487,
      "loss": 3.1101,
      "step": 47897
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7250895500183105,
      "learning_rate": 0.0005382613233474972,
      "loss": 2.943,
      "step": 47898
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4463602304458618,
      "learning_rate": 0.0005382588376924485,
      "loss": 3.1766,
      "step": 47899
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9142731428146362,
      "learning_rate": 0.0005382563519931032,
      "loss": 3.1857,
      "step": 47900
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.1983400583267212,
      "learning_rate": 0.0005382538662494613,
      "loss": 2.9474,
      "step": 47901
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.1580076217651367,
      "learning_rate": 0.0005382513804615236,
      "loss": 3.0763,
      "step": 47902
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8083868026733398,
      "learning_rate": 0.0005382488946292904,
      "loss": 3.2038,
      "step": 47903
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.248112678527832,
      "learning_rate": 0.0005382464087527624,
      "loss": 3.0511,
      "step": 47904
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.087369441986084,
      "learning_rate": 0.0005382439228319398,
      "loss": 3.1862,
      "step": 47905
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7866499423980713,
      "learning_rate": 0.0005382414368668231,
      "loss": 3.1701,
      "step": 47906
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3090533018112183,
      "learning_rate": 0.0005382389508574129,
      "loss": 3.1379,
      "step": 47907
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8284399509429932,
      "learning_rate": 0.0005382364648037096,
      "loss": 2.7218,
      "step": 47908
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5567303895950317,
      "learning_rate": 0.0005382339787057136,
      "loss": 3.247,
      "step": 47909
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5752218961715698,
      "learning_rate": 0.0005382314925634254,
      "loss": 3.2701,
      "step": 47910
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0063092708587646,
      "learning_rate": 0.0005382290063768454,
      "loss": 3.1143,
      "step": 47911
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4647343158721924,
      "learning_rate": 0.0005382265201459743,
      "loss": 3.2828,
      "step": 47912
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7391525506973267,
      "learning_rate": 0.0005382240338708123,
      "loss": 3.3141,
      "step": 47913
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6365643739700317,
      "learning_rate": 0.0005382215475513599,
      "loss": 3.0624,
      "step": 47914
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7021781206130981,
      "learning_rate": 0.0005382190611876176,
      "loss": 3.0649,
      "step": 47915
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2307721376419067,
      "learning_rate": 0.000538216574779586,
      "loss": 3.0685,
      "step": 47916
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.6014163494110107,
      "learning_rate": 0.0005382140883272654,
      "loss": 2.8412,
      "step": 47917
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5293327569961548,
      "learning_rate": 0.0005382116018306563,
      "loss": 3.2186,
      "step": 47918
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5056699514389038,
      "learning_rate": 0.0005382091152897592,
      "loss": 2.9491,
      "step": 47919
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5696748495101929,
      "learning_rate": 0.0005382066287045745,
      "loss": 3.0274,
      "step": 47920
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6044429540634155,
      "learning_rate": 0.0005382041420751027,
      "loss": 3.0675,
      "step": 47921
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7356663942337036,
      "learning_rate": 0.0005382016554013443,
      "loss": 3.0955,
      "step": 47922
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4061040878295898,
      "learning_rate": 0.0005381991686832996,
      "loss": 2.9872,
      "step": 47923
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4757686853408813,
      "learning_rate": 0.0005381966819209692,
      "loss": 2.9573,
      "step": 47924
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.746070146560669,
      "learning_rate": 0.0005381941951143536,
      "loss": 2.877,
      "step": 47925
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4370180368423462,
      "learning_rate": 0.0005381917082634531,
      "loss": 3.1007,
      "step": 47926
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7105339765548706,
      "learning_rate": 0.0005381892213682684,
      "loss": 3.2775,
      "step": 47927
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4020980596542358,
      "learning_rate": 0.0005381867344287997,
      "loss": 2.6832,
      "step": 47928
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4271175861358643,
      "learning_rate": 0.0005381842474450477,
      "loss": 3.0462,
      "step": 47929
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7672512531280518,
      "learning_rate": 0.0005381817604170127,
      "loss": 2.9483,
      "step": 47930
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.954811453819275,
      "learning_rate": 0.0005381792733446953,
      "loss": 3.2133,
      "step": 47931
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.318673610687256,
      "learning_rate": 0.0005381767862280958,
      "loss": 2.7903,
      "step": 47932
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4891563653945923,
      "learning_rate": 0.0005381742990672147,
      "loss": 2.9288,
      "step": 47933
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4358890056610107,
      "learning_rate": 0.0005381718118620526,
      "loss": 3.2764,
      "step": 47934
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6041715145111084,
      "learning_rate": 0.0005381693246126097,
      "loss": 3.0788,
      "step": 47935
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3726282119750977,
      "learning_rate": 0.0005381668373188867,
      "loss": 2.9785,
      "step": 47936
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6993885040283203,
      "learning_rate": 0.0005381643499808842,
      "loss": 2.9258,
      "step": 47937
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3590147495269775,
      "learning_rate": 0.0005381618625986021,
      "loss": 3.0432,
      "step": 47938
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8407727479934692,
      "learning_rate": 0.0005381593751720415,
      "loss": 2.926,
      "step": 47939
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5406110286712646,
      "learning_rate": 0.0005381568877012025,
      "loss": 2.9882,
      "step": 47940
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4913655519485474,
      "learning_rate": 0.0005381544001860855,
      "loss": 3.0992,
      "step": 47941
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6154581308364868,
      "learning_rate": 0.0005381519126266913,
      "loss": 3.0304,
      "step": 47942
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4140766859054565,
      "learning_rate": 0.0005381494250230201,
      "loss": 2.9837,
      "step": 47943
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6748509407043457,
      "learning_rate": 0.0005381469373750723,
      "loss": 3.2001,
      "step": 47944
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6704498529434204,
      "learning_rate": 0.0005381444496828486,
      "loss": 3.1176,
      "step": 47945
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6376947164535522,
      "learning_rate": 0.0005381419619463493,
      "loss": 3.2432,
      "step": 47946
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5139939785003662,
      "learning_rate": 0.0005381394741655751,
      "loss": 2.9475,
      "step": 47947
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9582948684692383,
      "learning_rate": 0.000538136986340526,
      "loss": 3.177,
      "step": 47948
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6684978008270264,
      "learning_rate": 0.0005381344984712029,
      "loss": 3.1477,
      "step": 47949
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.538071870803833,
      "learning_rate": 0.0005381320105576061,
      "loss": 3.022,
      "step": 47950
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.703810214996338,
      "learning_rate": 0.000538129522599736,
      "loss": 3.2019,
      "step": 47951
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0740628242492676,
      "learning_rate": 0.0005381270345975931,
      "loss": 3.1323,
      "step": 47952
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.1252036094665527,
      "learning_rate": 0.0005381245465511781,
      "loss": 3.0323,
      "step": 47953
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6779170036315918,
      "learning_rate": 0.0005381220584604909,
      "loss": 3.0311,
      "step": 47954
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6610493659973145,
      "learning_rate": 0.0005381195703255327,
      "loss": 2.9543,
      "step": 47955
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6764616966247559,
      "learning_rate": 0.0005381170821463033,
      "loss": 2.8141,
      "step": 47956
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8173660039901733,
      "learning_rate": 0.0005381145939228035,
      "loss": 3.0057,
      "step": 47957
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5044251680374146,
      "learning_rate": 0.0005381121056550338,
      "loss": 3.1305,
      "step": 47958
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.2353805303573608,
      "learning_rate": 0.0005381096173429945,
      "loss": 2.7994,
      "step": 47959
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3718724250793457,
      "learning_rate": 0.0005381071289866861,
      "loss": 3.0183,
      "step": 47960
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4565112590789795,
      "learning_rate": 0.0005381046405861091,
      "loss": 2.7206,
      "step": 47961
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7631713151931763,
      "learning_rate": 0.0005381021521412639,
      "loss": 2.9015,
      "step": 47962
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8474093675613403,
      "learning_rate": 0.0005380996636521512,
      "loss": 2.9622,
      "step": 47963
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.363257646560669,
      "learning_rate": 0.0005380971751187711,
      "loss": 3.0997,
      "step": 47964
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0645134449005127,
      "learning_rate": 0.0005380946865411243,
      "loss": 2.8662,
      "step": 47965
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.704358696937561,
      "learning_rate": 0.0005380921979192113,
      "loss": 3.0091,
      "step": 47966
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8762718439102173,
      "learning_rate": 0.0005380897092530323,
      "loss": 3.0931,
      "step": 47967
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.11484694480896,
      "learning_rate": 0.0005380872205425881,
      "loss": 2.9276,
      "step": 47968
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6628735065460205,
      "learning_rate": 0.0005380847317878789,
      "loss": 2.9656,
      "step": 47969
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.4121735095977783,
      "learning_rate": 0.0005380822429889052,
      "loss": 3.0697,
      "step": 47970
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.487983226776123,
      "learning_rate": 0.0005380797541456676,
      "loss": 3.1117,
      "step": 47971
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3791542053222656,
      "learning_rate": 0.0005380772652581664,
      "loss": 3.3744,
      "step": 47972
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3188304901123047,
      "learning_rate": 0.0005380747763264022,
      "loss": 3.2864,
      "step": 47973
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.885825753211975,
      "learning_rate": 0.0005380722873503754,
      "loss": 2.9223,
      "step": 47974
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6321697235107422,
      "learning_rate": 0.0005380697983300865,
      "loss": 3.0002,
      "step": 47975
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4748544692993164,
      "learning_rate": 0.0005380673092655359,
      "loss": 3.1354,
      "step": 47976
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6989328861236572,
      "learning_rate": 0.000538064820156724,
      "loss": 2.9901,
      "step": 47977
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.114901542663574,
      "learning_rate": 0.0005380623310036514,
      "loss": 2.8804,
      "step": 47978
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4346691370010376,
      "learning_rate": 0.0005380598418063186,
      "loss": 2.9944,
      "step": 47979
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.5414822101593018,
      "learning_rate": 0.0005380573525647259,
      "loss": 3.327,
      "step": 47980
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.621023178100586,
      "learning_rate": 0.0005380548632788739,
      "loss": 3.1443,
      "step": 47981
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.8451685905456543,
      "learning_rate": 0.000538052373948763,
      "loss": 3.2454,
      "step": 47982
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3468735218048096,
      "learning_rate": 0.0005380498845743936,
      "loss": 2.8075,
      "step": 47983
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4808937311172485,
      "learning_rate": 0.0005380473951557664,
      "loss": 3.0504,
      "step": 47984
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.6620017290115356,
      "learning_rate": 0.0005380449056928816,
      "loss": 3.3882,
      "step": 47985
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.517245888710022,
      "learning_rate": 0.0005380424161857397,
      "loss": 3.2923,
      "step": 47986
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.5437781810760498,
      "learning_rate": 0.0005380399266343413,
      "loss": 2.8676,
      "step": 47987
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.7055737972259521,
      "learning_rate": 0.0005380374370386867,
      "loss": 3.0255,
      "step": 47988
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.4471007585525513,
      "learning_rate": 0.0005380349473987766,
      "loss": 2.8262,
      "step": 47989
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0968217849731445,
      "learning_rate": 0.0005380324577146112,
      "loss": 2.84,
      "step": 47990
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9679455757141113,
      "learning_rate": 0.0005380299679861911,
      "loss": 3.0932,
      "step": 47991
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.557520866394043,
      "learning_rate": 0.0005380274782135167,
      "loss": 3.36,
      "step": 47992
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.561795473098755,
      "learning_rate": 0.0005380249883965884,
      "loss": 3.3146,
      "step": 47993
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.317486524581909,
      "learning_rate": 0.000538022498535407,
      "loss": 3.1426,
      "step": 47994
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.3939306735992432,
      "learning_rate": 0.0005380200086299726,
      "loss": 3.0689,
      "step": 47995
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.877392292022705,
      "learning_rate": 0.0005380175186802858,
      "loss": 3.1705,
      "step": 47996
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.8986048698425293,
      "learning_rate": 0.000538015028686347,
      "loss": 2.794,
      "step": 47997
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9332427978515625,
      "learning_rate": 0.0005380125386481568,
      "loss": 2.87,
      "step": 47998
    },
    {
      "epoch": 0.62,
      "grad_norm": 2.0177950859069824,
      "learning_rate": 0.0005380100485657155,
      "loss": 3.0405,
      "step": 47999
    },
    {
      "epoch": 0.62,
      "grad_norm": 1.9401289224624634,
      "learning_rate": 0.0005380075584390236,
      "loss": 3.0797,
      "step": 48000
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.215503454208374,
      "learning_rate": 0.0005380050682680818,
      "loss": 2.9767,
      "step": 48001
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2407573461532593,
      "learning_rate": 0.0005380025780528903,
      "loss": 3.0653,
      "step": 48002
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.7384250164031982,
      "learning_rate": 0.0005380000877934494,
      "loss": 3.2718,
      "step": 48003
    },
    {
      "epoch": 0.63,
      "grad_norm": 4.62955904006958,
      "learning_rate": 0.00053799759748976,
      "loss": 2.8028,
      "step": 48004
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.004347801208496,
      "learning_rate": 0.0005379951071418223,
      "loss": 3.3213,
      "step": 48005
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4253603219985962,
      "learning_rate": 0.0005379926167496369,
      "loss": 3.0115,
      "step": 48006
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0404231548309326,
      "learning_rate": 0.0005379901263132041,
      "loss": 3.2026,
      "step": 48007
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9706733226776123,
      "learning_rate": 0.0005379876358325244,
      "loss": 3.0498,
      "step": 48008
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9123727083206177,
      "learning_rate": 0.0005379851453075984,
      "loss": 3.3367,
      "step": 48009
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5013816356658936,
      "learning_rate": 0.0005379826547384265,
      "loss": 3.1642,
      "step": 48010
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.5709915161132812,
      "learning_rate": 0.000537980164125009,
      "loss": 3.0805,
      "step": 48011
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.155829906463623,
      "learning_rate": 0.0005379776734673465,
      "loss": 3.1447,
      "step": 48012
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.923755168914795,
      "learning_rate": 0.0005379751827654395,
      "loss": 3.1419,
      "step": 48013
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8671027421951294,
      "learning_rate": 0.0005379726920192885,
      "loss": 2.8471,
      "step": 48014
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9391692876815796,
      "learning_rate": 0.0005379702012288938,
      "loss": 3.0623,
      "step": 48015
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.5568418502807617,
      "learning_rate": 0.000537967710394256,
      "loss": 3.0121,
      "step": 48016
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5003526210784912,
      "learning_rate": 0.0005379652195153756,
      "loss": 2.9549,
      "step": 48017
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.670548439025879,
      "learning_rate": 0.0005379627285922526,
      "loss": 2.8157,
      "step": 48018
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.529801607131958,
      "learning_rate": 0.0005379602376248882,
      "loss": 2.8481,
      "step": 48019
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3911055326461792,
      "learning_rate": 0.0005379577466132824,
      "loss": 3.2139,
      "step": 48020
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8153854608535767,
      "learning_rate": 0.0005379552555574357,
      "loss": 2.8663,
      "step": 48021
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2522494792938232,
      "learning_rate": 0.0005379527644573487,
      "loss": 2.9793,
      "step": 48022
    },
    {
      "epoch": 0.63,
      "grad_norm": 4.359957695007324,
      "learning_rate": 0.0005379502733130218,
      "loss": 3.4216,
      "step": 48023
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3851444721221924,
      "learning_rate": 0.0005379477821244554,
      "loss": 3.1306,
      "step": 48024
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4075665473937988,
      "learning_rate": 0.00053794529089165,
      "loss": 3.2955,
      "step": 48025
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6147257089614868,
      "learning_rate": 0.0005379427996146061,
      "loss": 3.027,
      "step": 48026
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7312545776367188,
      "learning_rate": 0.0005379403082933242,
      "loss": 3.0503,
      "step": 48027
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.509191632270813,
      "learning_rate": 0.0005379378169278048,
      "loss": 3.5149,
      "step": 48028
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.007246971130371,
      "learning_rate": 0.000537935325518048,
      "loss": 2.9302,
      "step": 48029
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7056236267089844,
      "learning_rate": 0.0005379328340640547,
      "loss": 3.1451,
      "step": 48030
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.900693655014038,
      "learning_rate": 0.0005379303425658251,
      "loss": 3.1788,
      "step": 48031
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7031207084655762,
      "learning_rate": 0.0005379278510233599,
      "loss": 2.9645,
      "step": 48032
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3677401542663574,
      "learning_rate": 0.0005379253594366592,
      "loss": 2.9653,
      "step": 48033
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.7486836910247803,
      "learning_rate": 0.0005379228678057239,
      "loss": 2.89,
      "step": 48034
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6937533617019653,
      "learning_rate": 0.0005379203761305541,
      "loss": 3.2397,
      "step": 48035
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6072378158569336,
      "learning_rate": 0.0005379178844111506,
      "loss": 3.0938,
      "step": 48036
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2939085960388184,
      "learning_rate": 0.0005379153926475135,
      "loss": 2.9001,
      "step": 48037
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.8361854553222656,
      "learning_rate": 0.0005379129008396435,
      "loss": 2.8886,
      "step": 48038
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.592805027961731,
      "learning_rate": 0.0005379104089875409,
      "loss": 3.0742,
      "step": 48039
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.409233331680298,
      "learning_rate": 0.0005379079170912064,
      "loss": 3.1341,
      "step": 48040
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8244825601577759,
      "learning_rate": 0.0005379054251506404,
      "loss": 3.1389,
      "step": 48041
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.6669890880584717,
      "learning_rate": 0.000537902933165843,
      "loss": 2.9739,
      "step": 48042
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7137446403503418,
      "learning_rate": 0.0005379004411368152,
      "loss": 2.9167,
      "step": 48043
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2458717823028564,
      "learning_rate": 0.0005378979490635572,
      "loss": 2.8734,
      "step": 48044
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.056513547897339,
      "learning_rate": 0.0005378954569460693,
      "loss": 3.1463,
      "step": 48045
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.191352605819702,
      "learning_rate": 0.0005378929647843523,
      "loss": 3.1239,
      "step": 48046
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4273298978805542,
      "learning_rate": 0.0005378904725784065,
      "loss": 3.007,
      "step": 48047
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.767937183380127,
      "learning_rate": 0.0005378879803282324,
      "loss": 3.0436,
      "step": 48048
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.1076295375823975,
      "learning_rate": 0.0005378854880338304,
      "loss": 2.8535,
      "step": 48049
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.35294771194458,
      "learning_rate": 0.0005378829956952009,
      "loss": 2.837,
      "step": 48050
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6889888048171997,
      "learning_rate": 0.0005378805033123446,
      "loss": 3.0154,
      "step": 48051
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5943694114685059,
      "learning_rate": 0.0005378780108852618,
      "loss": 3.0044,
      "step": 48052
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.1504392623901367,
      "learning_rate": 0.000537875518413953,
      "loss": 2.8427,
      "step": 48053
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6298726797103882,
      "learning_rate": 0.0005378730258984185,
      "loss": 2.954,
      "step": 48054
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.542149543762207,
      "learning_rate": 0.000537870533338659,
      "loss": 2.9933,
      "step": 48055
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.496239423751831,
      "learning_rate": 0.0005378680407346749,
      "loss": 3.1909,
      "step": 48056
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.953200340270996,
      "learning_rate": 0.0005378655480864666,
      "loss": 3.1126,
      "step": 48057
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4493589401245117,
      "learning_rate": 0.0005378630553940347,
      "loss": 3.05,
      "step": 48058
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3244765996932983,
      "learning_rate": 0.0005378605626573795,
      "loss": 2.9133,
      "step": 48059
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5122698545455933,
      "learning_rate": 0.0005378580698765015,
      "loss": 2.7952,
      "step": 48060
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2923208475112915,
      "learning_rate": 0.0005378555770514013,
      "loss": 2.9642,
      "step": 48061
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8104782104492188,
      "learning_rate": 0.0005378530841820791,
      "loss": 2.9233,
      "step": 48062
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3989381790161133,
      "learning_rate": 0.0005378505912685356,
      "loss": 3.0193,
      "step": 48063
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.387345790863037,
      "learning_rate": 0.0005378480983107714,
      "loss": 2.9787,
      "step": 48064
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4072990417480469,
      "learning_rate": 0.0005378456053087865,
      "loss": 3.2627,
      "step": 48065
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9546527862548828,
      "learning_rate": 0.0005378431122625816,
      "loss": 2.7739,
      "step": 48066
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3261066675186157,
      "learning_rate": 0.0005378406191721572,
      "loss": 2.9781,
      "step": 48067
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3149250745773315,
      "learning_rate": 0.0005378381260375138,
      "loss": 2.9686,
      "step": 48068
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.377341866493225,
      "learning_rate": 0.0005378356328586518,
      "loss": 3.0813,
      "step": 48069
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5156923532485962,
      "learning_rate": 0.0005378331396355716,
      "loss": 3.1972,
      "step": 48070
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5529038906097412,
      "learning_rate": 0.0005378306463682738,
      "loss": 2.97,
      "step": 48071
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.5715348720550537,
      "learning_rate": 0.0005378281530567587,
      "loss": 2.9802,
      "step": 48072
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3905870914459229,
      "learning_rate": 0.000537825659701027,
      "loss": 3.2364,
      "step": 48073
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6543477773666382,
      "learning_rate": 0.0005378231663010789,
      "loss": 3.2113,
      "step": 48074
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.0000760555267334,
      "learning_rate": 0.0005378206728569151,
      "loss": 2.9648,
      "step": 48075
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4909489154815674,
      "learning_rate": 0.0005378181793685359,
      "loss": 3.0063,
      "step": 48076
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.8218610286712646,
      "learning_rate": 0.0005378156858359417,
      "loss": 2.9845,
      "step": 48077
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3426836729049683,
      "learning_rate": 0.0005378131922591331,
      "loss": 3.1436,
      "step": 48078
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6103068590164185,
      "learning_rate": 0.0005378106986381107,
      "loss": 3.2014,
      "step": 48079
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5575717687606812,
      "learning_rate": 0.0005378082049728747,
      "loss": 2.754,
      "step": 48080
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5097686052322388,
      "learning_rate": 0.0005378057112634257,
      "loss": 2.9122,
      "step": 48081
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8487756252288818,
      "learning_rate": 0.0005378032175097641,
      "loss": 2.8615,
      "step": 48082
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.518367886543274,
      "learning_rate": 0.0005378007237118904,
      "loss": 2.9609,
      "step": 48083
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4318194389343262,
      "learning_rate": 0.0005377982298698051,
      "loss": 3.2761,
      "step": 48084
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0753631591796875,
      "learning_rate": 0.0005377957359835085,
      "loss": 2.935,
      "step": 48085
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3418365716934204,
      "learning_rate": 0.0005377932420530012,
      "loss": 3.179,
      "step": 48086
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.517046332359314,
      "learning_rate": 0.0005377907480782837,
      "loss": 2.9545,
      "step": 48087
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4394102096557617,
      "learning_rate": 0.0005377882540593564,
      "loss": 3.0343,
      "step": 48088
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7423464059829712,
      "learning_rate": 0.0005377857599962199,
      "loss": 2.8426,
      "step": 48089
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3497196435928345,
      "learning_rate": 0.0005377832658888744,
      "loss": 2.862,
      "step": 48090
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7324947118759155,
      "learning_rate": 0.0005377807717373205,
      "loss": 3.0285,
      "step": 48091
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4207878112792969,
      "learning_rate": 0.0005377782775415587,
      "loss": 3.0425,
      "step": 48092
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6806193590164185,
      "learning_rate": 0.0005377757833015894,
      "loss": 2.8999,
      "step": 48093
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.598659634590149,
      "learning_rate": 0.0005377732890174132,
      "loss": 3.0045,
      "step": 48094
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6975902318954468,
      "learning_rate": 0.0005377707946890302,
      "loss": 2.9903,
      "step": 48095
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5060155391693115,
      "learning_rate": 0.0005377683003164414,
      "loss": 3.1201,
      "step": 48096
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3668149709701538,
      "learning_rate": 0.0005377658058996469,
      "loss": 3.1578,
      "step": 48097
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5569179058074951,
      "learning_rate": 0.0005377633114386471,
      "loss": 3.098,
      "step": 48098
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.697960615158081,
      "learning_rate": 0.0005377608169334429,
      "loss": 2.8232,
      "step": 48099
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5027475357055664,
      "learning_rate": 0.0005377583223840342,
      "loss": 3.0146,
      "step": 48100
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.564576506614685,
      "learning_rate": 0.0005377558277904219,
      "loss": 2.9209,
      "step": 48101
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.713932991027832,
      "learning_rate": 0.0005377533331526063,
      "loss": 3.1944,
      "step": 48102
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.3592753410339355,
      "learning_rate": 0.0005377508384705878,
      "loss": 3.0664,
      "step": 48103
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5581951141357422,
      "learning_rate": 0.000537748343744367,
      "loss": 2.9635,
      "step": 48104
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8959580659866333,
      "learning_rate": 0.0005377458489739443,
      "loss": 2.8942,
      "step": 48105
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.194664239883423,
      "learning_rate": 0.0005377433541593201,
      "loss": 2.9428,
      "step": 48106
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4848744869232178,
      "learning_rate": 0.0005377408593004949,
      "loss": 2.8507,
      "step": 48107
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6791990995407104,
      "learning_rate": 0.0005377383643974692,
      "loss": 3.0531,
      "step": 48108
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.736337423324585,
      "learning_rate": 0.0005377358694502435,
      "loss": 3.1182,
      "step": 48109
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6974221467971802,
      "learning_rate": 0.0005377333744588182,
      "loss": 3.1335,
      "step": 48110
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4063628911972046,
      "learning_rate": 0.0005377308794231937,
      "loss": 3.0303,
      "step": 48111
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4289251565933228,
      "learning_rate": 0.0005377283843433706,
      "loss": 2.9165,
      "step": 48112
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.595193862915039,
      "learning_rate": 0.0005377258892193494,
      "loss": 3.0596,
      "step": 48113
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5362522602081299,
      "learning_rate": 0.0005377233940511303,
      "loss": 3.0167,
      "step": 48114
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4830095767974854,
      "learning_rate": 0.000537720898838714,
      "loss": 3.2413,
      "step": 48115
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6606084108352661,
      "learning_rate": 0.000537718403582101,
      "loss": 3.0322,
      "step": 48116
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6433417797088623,
      "learning_rate": 0.0005377159082812915,
      "loss": 3.2117,
      "step": 48117
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.6831374168395996,
      "learning_rate": 0.0005377134129362862,
      "loss": 3.0727,
      "step": 48118
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5627652406692505,
      "learning_rate": 0.0005377109175470856,
      "loss": 3.1209,
      "step": 48119
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7966725826263428,
      "learning_rate": 0.0005377084221136898,
      "loss": 3.0719,
      "step": 48120
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6144146919250488,
      "learning_rate": 0.0005377059266360997,
      "loss": 3.0363,
      "step": 48121
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.650632381439209,
      "learning_rate": 0.0005377034311143156,
      "loss": 3.1107,
      "step": 48122
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6587791442871094,
      "learning_rate": 0.0005377009355483379,
      "loss": 3.2261,
      "step": 48123
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3807028532028198,
      "learning_rate": 0.0005376984399381671,
      "loss": 2.9503,
      "step": 48124
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.3512680530548096,
      "learning_rate": 0.0005376959442838036,
      "loss": 3.1927,
      "step": 48125
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7354475259780884,
      "learning_rate": 0.0005376934485852479,
      "loss": 3.1019,
      "step": 48126
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8470301628112793,
      "learning_rate": 0.0005376909528425007,
      "loss": 3.1569,
      "step": 48127
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.567154884338379,
      "learning_rate": 0.0005376884570555622,
      "loss": 2.9342,
      "step": 48128
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5937598943710327,
      "learning_rate": 0.0005376859612244329,
      "loss": 2.9606,
      "step": 48129
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.834646224975586,
      "learning_rate": 0.0005376834653491133,
      "loss": 2.9642,
      "step": 48130
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4947969913482666,
      "learning_rate": 0.0005376809694296038,
      "loss": 2.8905,
      "step": 48131
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6464375257492065,
      "learning_rate": 0.000537678473465905,
      "loss": 3.2058,
      "step": 48132
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.3140006065368652,
      "learning_rate": 0.0005376759774580173,
      "loss": 3.3181,
      "step": 48133
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5992170572280884,
      "learning_rate": 0.0005376734814059411,
      "loss": 2.9821,
      "step": 48134
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.245119094848633,
      "learning_rate": 0.0005376709853096769,
      "loss": 2.9538,
      "step": 48135
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.1646552085876465,
      "learning_rate": 0.0005376684891692252,
      "loss": 3.0289,
      "step": 48136
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6807806491851807,
      "learning_rate": 0.0005376659929845865,
      "loss": 2.9438,
      "step": 48137
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5112067461013794,
      "learning_rate": 0.0005376634967557611,
      "loss": 3.1617,
      "step": 48138
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.0592198371887207,
      "learning_rate": 0.0005376610004827496,
      "loss": 2.9914,
      "step": 48139
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.8475265502929688,
      "learning_rate": 0.0005376585041655525,
      "loss": 2.9339,
      "step": 48140
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7243201732635498,
      "learning_rate": 0.0005376560078041701,
      "loss": 2.9029,
      "step": 48141
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5240933895111084,
      "learning_rate": 0.0005376535113986031,
      "loss": 3.0688,
      "step": 48142
    },
    {
      "epoch": 0.63,
      "grad_norm": 4.795392036437988,
      "learning_rate": 0.0005376510149488516,
      "loss": 3.1072,
      "step": 48143
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.104448080062866,
      "learning_rate": 0.0005376485184549165,
      "loss": 2.9806,
      "step": 48144
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5629621744155884,
      "learning_rate": 0.0005376460219167979,
      "loss": 2.9838,
      "step": 48145
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.729900598526001,
      "learning_rate": 0.0005376435253344966,
      "loss": 3.0972,
      "step": 48146
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.3306870460510254,
      "learning_rate": 0.0005376410287080128,
      "loss": 3.0728,
      "step": 48147
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.0240743160247803,
      "learning_rate": 0.000537638532037347,
      "loss": 3.0255,
      "step": 48148
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.9742064476013184,
      "learning_rate": 0.0005376360353224997,
      "loss": 3.1674,
      "step": 48149
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3587934970855713,
      "learning_rate": 0.0005376335385634714,
      "loss": 2.9875,
      "step": 48150
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5316296815872192,
      "learning_rate": 0.0005376310417602626,
      "loss": 3.214,
      "step": 48151
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.8939449787139893,
      "learning_rate": 0.0005376285449128737,
      "loss": 3.27,
      "step": 48152
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.3869142532348633,
      "learning_rate": 0.0005376260480213052,
      "loss": 3.0631,
      "step": 48153
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2911714315414429,
      "learning_rate": 0.0005376235510855574,
      "loss": 3.0347,
      "step": 48154
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8270833492279053,
      "learning_rate": 0.000537621054105631,
      "loss": 2.9352,
      "step": 48155
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5179588794708252,
      "learning_rate": 0.0005376185570815262,
      "loss": 2.8277,
      "step": 48156
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.604407787322998,
      "learning_rate": 0.0005376160600132438,
      "loss": 3.1911,
      "step": 48157
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7708477973937988,
      "learning_rate": 0.000537613562900784,
      "loss": 3.3541,
      "step": 48158
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9612550735473633,
      "learning_rate": 0.0005376110657441474,
      "loss": 3.1456,
      "step": 48159
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5205087661743164,
      "learning_rate": 0.0005376085685433343,
      "loss": 3.1172,
      "step": 48160
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8775088787078857,
      "learning_rate": 0.0005376060712983454,
      "loss": 3.0812,
      "step": 48161
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3391735553741455,
      "learning_rate": 0.0005376035740091811,
      "loss": 3.0541,
      "step": 48162
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.021693229675293,
      "learning_rate": 0.0005376010766758417,
      "loss": 2.8226,
      "step": 48163
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5923397541046143,
      "learning_rate": 0.0005375985792983279,
      "loss": 3.1437,
      "step": 48164
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9434735774993896,
      "learning_rate": 0.0005375960818766399,
      "loss": 2.9013,
      "step": 48165
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5120130777359009,
      "learning_rate": 0.0005375935844107784,
      "loss": 3.0644,
      "step": 48166
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.251868486404419,
      "learning_rate": 0.0005375910869007437,
      "loss": 2.862,
      "step": 48167
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.889761209487915,
      "learning_rate": 0.0005375885893465363,
      "loss": 2.9435,
      "step": 48168
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.1997969150543213,
      "learning_rate": 0.0005375860917481568,
      "loss": 3.161,
      "step": 48169
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4523171186447144,
      "learning_rate": 0.0005375835941056055,
      "loss": 3.3114,
      "step": 48170
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7909526824951172,
      "learning_rate": 0.0005375810964188829,
      "loss": 3.0601,
      "step": 48171
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.299699068069458,
      "learning_rate": 0.0005375785986879895,
      "loss": 3.0372,
      "step": 48172
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6609493494033813,
      "learning_rate": 0.0005375761009129258,
      "loss": 3.196,
      "step": 48173
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.518798828125,
      "learning_rate": 0.0005375736030936922,
      "loss": 3.2093,
      "step": 48174
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8452658653259277,
      "learning_rate": 0.0005375711052302893,
      "loss": 2.827,
      "step": 48175
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9711295366287231,
      "learning_rate": 0.0005375686073227173,
      "loss": 2.9571,
      "step": 48176
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7900118827819824,
      "learning_rate": 0.0005375661093709767,
      "loss": 3.0889,
      "step": 48177
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6869723796844482,
      "learning_rate": 0.0005375636113750683,
      "loss": 2.9072,
      "step": 48178
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7535502910614014,
      "learning_rate": 0.0005375611133349922,
      "loss": 3.1061,
      "step": 48179
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.663270115852356,
      "learning_rate": 0.0005375586152507492,
      "loss": 3.1544,
      "step": 48180
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5066194534301758,
      "learning_rate": 0.0005375561171223394,
      "loss": 3.2281,
      "step": 48181
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7136479616165161,
      "learning_rate": 0.0005375536189497635,
      "loss": 2.9881,
      "step": 48182
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.791374683380127,
      "learning_rate": 0.0005375511207330218,
      "loss": 3.0365,
      "step": 48183
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.396850347518921,
      "learning_rate": 0.0005375486224721149,
      "loss": 3.1334,
      "step": 48184
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5018649101257324,
      "learning_rate": 0.0005375461241670433,
      "loss": 3.19,
      "step": 48185
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5064197778701782,
      "learning_rate": 0.0005375436258178073,
      "loss": 3.1908,
      "step": 48186
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.606257677078247,
      "learning_rate": 0.0005375411274244075,
      "loss": 3.0632,
      "step": 48187
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.026634693145752,
      "learning_rate": 0.0005375386289868443,
      "loss": 2.8536,
      "step": 48188
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.276888132095337,
      "learning_rate": 0.0005375361305051182,
      "loss": 3.0872,
      "step": 48189
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8592380285263062,
      "learning_rate": 0.0005375336319792297,
      "loss": 3.0296,
      "step": 48190
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5708427429199219,
      "learning_rate": 0.0005375311334091792,
      "loss": 3.1167,
      "step": 48191
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4170002937316895,
      "learning_rate": 0.000537528634794967,
      "loss": 2.9669,
      "step": 48192
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.323044776916504,
      "learning_rate": 0.000537526136136594,
      "loss": 3.2208,
      "step": 48193
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.426987409591675,
      "learning_rate": 0.0005375236374340602,
      "loss": 3.0258,
      "step": 48194
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5321996212005615,
      "learning_rate": 0.0005375211386873664,
      "loss": 3.0084,
      "step": 48195
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.7453579902648926,
      "learning_rate": 0.000537518639896513,
      "loss": 2.9648,
      "step": 48196
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6037161350250244,
      "learning_rate": 0.0005375161410615003,
      "loss": 3.0812,
      "step": 48197
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.613033652305603,
      "learning_rate": 0.0005375136421823289,
      "loss": 3.166,
      "step": 48198
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.922411561012268,
      "learning_rate": 0.0005375111432589991,
      "loss": 2.813,
      "step": 48199
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4084551334381104,
      "learning_rate": 0.0005375086442915117,
      "loss": 2.8998,
      "step": 48200
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.039395570755005,
      "learning_rate": 0.0005375061452798668,
      "loss": 2.9567,
      "step": 48201
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7362029552459717,
      "learning_rate": 0.0005375036462240651,
      "loss": 3.1032,
      "step": 48202
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8023287057876587,
      "learning_rate": 0.000537501147124107,
      "loss": 3.0746,
      "step": 48203
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3663842678070068,
      "learning_rate": 0.0005374986479799929,
      "loss": 3.071,
      "step": 48204
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4943654537200928,
      "learning_rate": 0.0005374961487917233,
      "loss": 3.2141,
      "step": 48205
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9569545984268188,
      "learning_rate": 0.0005374936495592987,
      "loss": 3.1286,
      "step": 48206
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.345542550086975,
      "learning_rate": 0.0005374911502827196,
      "loss": 3.2472,
      "step": 48207
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6845413446426392,
      "learning_rate": 0.0005374886509619864,
      "loss": 3.1778,
      "step": 48208
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.494086503982544,
      "learning_rate": 0.0005374861515970997,
      "loss": 3.0256,
      "step": 48209
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.34678053855896,
      "learning_rate": 0.0005374836521880596,
      "loss": 3.179,
      "step": 48210
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8958430290222168,
      "learning_rate": 0.000537481152734867,
      "loss": 3.1196,
      "step": 48211
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6422001123428345,
      "learning_rate": 0.000537478653237522,
      "loss": 3.3175,
      "step": 48212
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5888830423355103,
      "learning_rate": 0.0005374761536960254,
      "loss": 3.2059,
      "step": 48213
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6653436422348022,
      "learning_rate": 0.0005374736541103773,
      "loss": 3.3106,
      "step": 48214
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4341559410095215,
      "learning_rate": 0.0005374711544805785,
      "loss": 2.9365,
      "step": 48215
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4099808931350708,
      "learning_rate": 0.0005374686548066293,
      "loss": 2.8959,
      "step": 48216
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.693476676940918,
      "learning_rate": 0.0005374661550885302,
      "loss": 2.9694,
      "step": 48217
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.625922918319702,
      "learning_rate": 0.0005374636553262817,
      "loss": 3.0188,
      "step": 48218
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6788185834884644,
      "learning_rate": 0.0005374611555198842,
      "loss": 3.0972,
      "step": 48219
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0969579219818115,
      "learning_rate": 0.0005374586556693381,
      "loss": 3.0774,
      "step": 48220
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.9424989223480225,
      "learning_rate": 0.0005374561557746441,
      "loss": 2.9143,
      "step": 48221
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0133605003356934,
      "learning_rate": 0.0005374536558358025,
      "loss": 2.9138,
      "step": 48222
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7878674268722534,
      "learning_rate": 0.0005374511558528137,
      "loss": 3.161,
      "step": 48223
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.1206893920898438,
      "learning_rate": 0.0005374486558256783,
      "loss": 3.0166,
      "step": 48224
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9004509449005127,
      "learning_rate": 0.0005374461557543967,
      "loss": 3.3549,
      "step": 48225
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9244474172592163,
      "learning_rate": 0.0005374436556389693,
      "loss": 2.9295,
      "step": 48226
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.1500327587127686,
      "learning_rate": 0.0005374411554793968,
      "loss": 3.1861,
      "step": 48227
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6034965515136719,
      "learning_rate": 0.0005374386552756793,
      "loss": 3.0115,
      "step": 48228
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5139068365097046,
      "learning_rate": 0.0005374361550278177,
      "loss": 3.1988,
      "step": 48229
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4656755924224854,
      "learning_rate": 0.0005374336547358121,
      "loss": 3.3389,
      "step": 48230
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5711071491241455,
      "learning_rate": 0.0005374311543996632,
      "loss": 2.9327,
      "step": 48231
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6723284721374512,
      "learning_rate": 0.0005374286540193713,
      "loss": 3.019,
      "step": 48232
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6546212434768677,
      "learning_rate": 0.0005374261535949368,
      "loss": 3.1362,
      "step": 48233
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2586212158203125,
      "learning_rate": 0.0005374236531263605,
      "loss": 3.1025,
      "step": 48234
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.3542091846466064,
      "learning_rate": 0.0005374211526136426,
      "loss": 3.1108,
      "step": 48235
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8286325931549072,
      "learning_rate": 0.0005374186520567836,
      "loss": 3.0389,
      "step": 48236
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6353214979171753,
      "learning_rate": 0.000537416151455784,
      "loss": 3.3738,
      "step": 48237
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9828829765319824,
      "learning_rate": 0.0005374136508106443,
      "loss": 2.9241,
      "step": 48238
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3841135501861572,
      "learning_rate": 0.0005374111501213649,
      "loss": 3.0979,
      "step": 48239
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6299285888671875,
      "learning_rate": 0.0005374086493879462,
      "loss": 3.2454,
      "step": 48240
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.402589201927185,
      "learning_rate": 0.0005374061486103889,
      "loss": 3.1556,
      "step": 48241
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.5380494594573975,
      "learning_rate": 0.0005374036477886931,
      "loss": 2.7799,
      "step": 48242
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.542047381401062,
      "learning_rate": 0.0005374011469228597,
      "loss": 3.0784,
      "step": 48243
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7746752500534058,
      "learning_rate": 0.0005373986460128888,
      "loss": 3.0814,
      "step": 48244
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5777373313903809,
      "learning_rate": 0.0005373961450587812,
      "loss": 3.0953,
      "step": 48245
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4380240440368652,
      "learning_rate": 0.0005373936440605369,
      "loss": 3.0746,
      "step": 48246
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3666889667510986,
      "learning_rate": 0.0005373911430181569,
      "loss": 2.8745,
      "step": 48247
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4997076988220215,
      "learning_rate": 0.0005373886419316413,
      "loss": 3.1901,
      "step": 48248
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.619006872177124,
      "learning_rate": 0.0005373861408009906,
      "loss": 3.2513,
      "step": 48249
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4526771306991577,
      "learning_rate": 0.0005373836396262054,
      "loss": 2.8263,
      "step": 48250
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6944547891616821,
      "learning_rate": 0.0005373811384072861,
      "loss": 2.8602,
      "step": 48251
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8923689126968384,
      "learning_rate": 0.0005373786371442331,
      "loss": 3.0185,
      "step": 48252
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.417170524597168,
      "learning_rate": 0.0005373761358370471,
      "loss": 3.0958,
      "step": 48253
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.846686601638794,
      "learning_rate": 0.0005373736344857282,
      "loss": 2.838,
      "step": 48254
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.792198419570923,
      "learning_rate": 0.0005373711330902772,
      "loss": 3.118,
      "step": 48255
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0024514198303223,
      "learning_rate": 0.0005373686316506944,
      "loss": 2.969,
      "step": 48256
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7575185298919678,
      "learning_rate": 0.0005373661301669802,
      "loss": 3.2694,
      "step": 48257
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.687621831893921,
      "learning_rate": 0.0005373636286391352,
      "loss": 2.9929,
      "step": 48258
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6286588907241821,
      "learning_rate": 0.0005373611270671599,
      "loss": 2.9774,
      "step": 48259
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7056950330734253,
      "learning_rate": 0.0005373586254510546,
      "loss": 3.1517,
      "step": 48260
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7513576745986938,
      "learning_rate": 0.0005373561237908199,
      "loss": 2.8607,
      "step": 48261
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6738495826721191,
      "learning_rate": 0.0005373536220864562,
      "loss": 2.8919,
      "step": 48262
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6018222570419312,
      "learning_rate": 0.000537351120337964,
      "loss": 2.9698,
      "step": 48263
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.645729422569275,
      "learning_rate": 0.0005373486185453437,
      "loss": 3.1917,
      "step": 48264
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5207419395446777,
      "learning_rate": 0.0005373461167085959,
      "loss": 3.2025,
      "step": 48265
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4444814920425415,
      "learning_rate": 0.0005373436148277208,
      "loss": 2.9977,
      "step": 48266
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3040152788162231,
      "learning_rate": 0.0005373411129027193,
      "loss": 3.1187,
      "step": 48267
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7674667835235596,
      "learning_rate": 0.0005373386109335913,
      "loss": 2.9601,
      "step": 48268
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7574725151062012,
      "learning_rate": 0.0005373361089203378,
      "loss": 3.1458,
      "step": 48269
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6907849311828613,
      "learning_rate": 0.000537333606862959,
      "loss": 3.3205,
      "step": 48270
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.486926794052124,
      "learning_rate": 0.0005373311047614554,
      "loss": 2.9315,
      "step": 48271
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6263118982315063,
      "learning_rate": 0.0005373286026158274,
      "loss": 3.0253,
      "step": 48272
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.464160680770874,
      "learning_rate": 0.0005373261004260755,
      "loss": 3.1421,
      "step": 48273
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6345866918563843,
      "learning_rate": 0.0005373235981922003,
      "loss": 3.0412,
      "step": 48274
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6831979751586914,
      "learning_rate": 0.0005373210959142021,
      "loss": 2.9323,
      "step": 48275
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5003666877746582,
      "learning_rate": 0.0005373185935920816,
      "loss": 2.8909,
      "step": 48276
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8744150400161743,
      "learning_rate": 0.0005373160912258389,
      "loss": 3.0392,
      "step": 48277
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5106792449951172,
      "learning_rate": 0.0005373135888154747,
      "loss": 3.0713,
      "step": 48278
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.568031668663025,
      "learning_rate": 0.0005373110863609894,
      "loss": 3.0646,
      "step": 48279
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8959535360336304,
      "learning_rate": 0.0005373085838623836,
      "loss": 3.0983,
      "step": 48280
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9942115545272827,
      "learning_rate": 0.0005373060813196576,
      "loss": 2.9776,
      "step": 48281
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2588649988174438,
      "learning_rate": 0.0005373035787328119,
      "loss": 2.9582,
      "step": 48282
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.35567045211792,
      "learning_rate": 0.000537301076101847,
      "loss": 3.2001,
      "step": 48283
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0864551067352295,
      "learning_rate": 0.0005372985734267633,
      "loss": 3.0683,
      "step": 48284
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8837507963180542,
      "learning_rate": 0.0005372960707075613,
      "loss": 3.241,
      "step": 48285
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7629591226577759,
      "learning_rate": 0.0005372935679442415,
      "loss": 3.0908,
      "step": 48286
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4937756061553955,
      "learning_rate": 0.0005372910651368044,
      "loss": 2.746,
      "step": 48287
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4838553667068481,
      "learning_rate": 0.0005372885622852505,
      "loss": 2.8726,
      "step": 48288
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.3729734420776367,
      "learning_rate": 0.00053728605938958,
      "loss": 2.9764,
      "step": 48289
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.406062126159668,
      "learning_rate": 0.0005372835564497936,
      "loss": 2.8635,
      "step": 48290
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2375054359436035,
      "learning_rate": 0.0005372810534658918,
      "loss": 3.104,
      "step": 48291
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.371756672859192,
      "learning_rate": 0.0005372785504378748,
      "loss": 3.3589,
      "step": 48292
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4123626947402954,
      "learning_rate": 0.0005372760473657433,
      "loss": 3.0528,
      "step": 48293
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4515434503555298,
      "learning_rate": 0.0005372735442494978,
      "loss": 2.732,
      "step": 48294
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.561530590057373,
      "learning_rate": 0.0005372710410891387,
      "loss": 3.0637,
      "step": 48295
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4534859657287598,
      "learning_rate": 0.0005372685378846663,
      "loss": 3.0829,
      "step": 48296
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4308255910873413,
      "learning_rate": 0.0005372660346360812,
      "loss": 3.1525,
      "step": 48297
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.7778618335723877,
      "learning_rate": 0.000537263531343384,
      "loss": 3.2563,
      "step": 48298
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9613277912139893,
      "learning_rate": 0.0005372610280065748,
      "loss": 3.1403,
      "step": 48299
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5604897737503052,
      "learning_rate": 0.0005372585246256545,
      "loss": 3.1157,
      "step": 48300
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4590389728546143,
      "learning_rate": 0.0005372560212006233,
      "loss": 3.1581,
      "step": 48301
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.498531699180603,
      "learning_rate": 0.0005372535177314817,
      "loss": 3.0357,
      "step": 48302
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.676262378692627,
      "learning_rate": 0.0005372510142182302,
      "loss": 3.0691,
      "step": 48303
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3880031108856201,
      "learning_rate": 0.0005372485106608693,
      "loss": 3.1953,
      "step": 48304
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6834059953689575,
      "learning_rate": 0.0005372460070593994,
      "loss": 2.8678,
      "step": 48305
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4123135805130005,
      "learning_rate": 0.000537243503413821,
      "loss": 3.3301,
      "step": 48306
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5961613655090332,
      "learning_rate": 0.0005372409997241344,
      "loss": 3.1411,
      "step": 48307
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4608184099197388,
      "learning_rate": 0.0005372384959903405,
      "loss": 3.0986,
      "step": 48308
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6796181201934814,
      "learning_rate": 0.0005372359922124393,
      "loss": 2.9699,
      "step": 48309
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8501489162445068,
      "learning_rate": 0.0005372334883904315,
      "loss": 3.2898,
      "step": 48310
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4569796323776245,
      "learning_rate": 0.0005372309845243175,
      "loss": 2.9066,
      "step": 48311
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6924798488616943,
      "learning_rate": 0.0005372284806140978,
      "loss": 3.1126,
      "step": 48312
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6725587844848633,
      "learning_rate": 0.0005372259766597729,
      "loss": 2.9944,
      "step": 48313
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9317570924758911,
      "learning_rate": 0.0005372234726613432,
      "loss": 3.1788,
      "step": 48314
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6744476556777954,
      "learning_rate": 0.0005372209686188091,
      "loss": 2.7613,
      "step": 48315
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5312296152114868,
      "learning_rate": 0.0005372184645321712,
      "loss": 3.1544,
      "step": 48316
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7382677793502808,
      "learning_rate": 0.0005372159604014299,
      "loss": 3.2023,
      "step": 48317
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5111247301101685,
      "learning_rate": 0.0005372134562265856,
      "loss": 3.0511,
      "step": 48318
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5287092924118042,
      "learning_rate": 0.0005372109520076388,
      "loss": 3.0352,
      "step": 48319
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7869967222213745,
      "learning_rate": 0.0005372084477445902,
      "loss": 3.0029,
      "step": 48320
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2364017963409424,
      "learning_rate": 0.0005372059434374401,
      "loss": 3.0061,
      "step": 48321
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7567826509475708,
      "learning_rate": 0.0005372034390861887,
      "loss": 3.037,
      "step": 48322
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4736292362213135,
      "learning_rate": 0.0005372009346908368,
      "loss": 2.9462,
      "step": 48323
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4180641174316406,
      "learning_rate": 0.0005371984302513849,
      "loss": 3.4016,
      "step": 48324
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.769022226333618,
      "learning_rate": 0.0005371959257678331,
      "loss": 3.1441,
      "step": 48325
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.637133002281189,
      "learning_rate": 0.0005371934212401822,
      "loss": 2.8691,
      "step": 48326
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8099592924118042,
      "learning_rate": 0.0005371909166684327,
      "loss": 2.9547,
      "step": 48327
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.086921215057373,
      "learning_rate": 0.0005371884120525848,
      "loss": 3.1104,
      "step": 48328
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.4235355854034424,
      "learning_rate": 0.0005371859073926391,
      "loss": 3.092,
      "step": 48329
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5652401447296143,
      "learning_rate": 0.0005371834026885961,
      "loss": 2.9876,
      "step": 48330
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3513588905334473,
      "learning_rate": 0.0005371808979404562,
      "loss": 2.958,
      "step": 48331
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9498282670974731,
      "learning_rate": 0.00053717839314822,
      "loss": 3.0639,
      "step": 48332
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.266221761703491,
      "learning_rate": 0.0005371758883118877,
      "loss": 2.838,
      "step": 48333
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4654712677001953,
      "learning_rate": 0.0005371733834314599,
      "loss": 2.971,
      "step": 48334
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.33233904838562,
      "learning_rate": 0.0005371708785069372,
      "loss": 2.9558,
      "step": 48335
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7431304454803467,
      "learning_rate": 0.00053716837353832,
      "loss": 2.8487,
      "step": 48336
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6854277849197388,
      "learning_rate": 0.0005371658685256086,
      "loss": 2.9624,
      "step": 48337
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6462266445159912,
      "learning_rate": 0.0005371633634688037,
      "loss": 3.1004,
      "step": 48338
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8900952339172363,
      "learning_rate": 0.0005371608583679056,
      "loss": 3.3746,
      "step": 48339
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.1101555824279785,
      "learning_rate": 0.0005371583532229148,
      "loss": 3.2202,
      "step": 48340
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.741450548171997,
      "learning_rate": 0.0005371558480338318,
      "loss": 3.0466,
      "step": 48341
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.747976541519165,
      "learning_rate": 0.0005371533428006571,
      "loss": 3.0129,
      "step": 48342
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.1557939052581787,
      "learning_rate": 0.000537150837523391,
      "loss": 2.939,
      "step": 48343
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4344323873519897,
      "learning_rate": 0.0005371483322020341,
      "loss": 2.8472,
      "step": 48344
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2838168144226074,
      "learning_rate": 0.000537145826836587,
      "loss": 3.0258,
      "step": 48345
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5219600200653076,
      "learning_rate": 0.0005371433214270499,
      "loss": 3.0909,
      "step": 48346
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3970227241516113,
      "learning_rate": 0.0005371408159734235,
      "loss": 2.9366,
      "step": 48347
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.8557941913604736,
      "learning_rate": 0.0005371383104757079,
      "loss": 2.9132,
      "step": 48348
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9911755323410034,
      "learning_rate": 0.0005371358049339039,
      "loss": 2.634,
      "step": 48349
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.627215027809143,
      "learning_rate": 0.0005371332993480121,
      "loss": 3.0332,
      "step": 48350
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.35312819480896,
      "learning_rate": 0.0005371307937180325,
      "loss": 3.0335,
      "step": 48351
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0638675689697266,
      "learning_rate": 0.0005371282880439658,
      "loss": 3.0379,
      "step": 48352
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.431597352027893,
      "learning_rate": 0.0005371257823258127,
      "loss": 3.1115,
      "step": 48353
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.915837287902832,
      "learning_rate": 0.0005371232765635733,
      "loss": 2.9945,
      "step": 48354
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.439638614654541,
      "learning_rate": 0.0005371207707572483,
      "loss": 3.0621,
      "step": 48355
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2841899394989014,
      "learning_rate": 0.0005371182649068378,
      "loss": 2.9219,
      "step": 48356
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.39632248878479,
      "learning_rate": 0.0005371157590123428,
      "loss": 3.1097,
      "step": 48357
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.960749626159668,
      "learning_rate": 0.0005371132530737634,
      "loss": 3.2043,
      "step": 48358
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6148205995559692,
      "learning_rate": 0.0005371107470911002,
      "loss": 3.2219,
      "step": 48359
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5847563743591309,
      "learning_rate": 0.0005371082410643537,
      "loss": 2.8728,
      "step": 48360
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9352837800979614,
      "learning_rate": 0.0005371057349935241,
      "loss": 2.7446,
      "step": 48361
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6082926988601685,
      "learning_rate": 0.0005371032288786123,
      "loss": 2.8561,
      "step": 48362
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3279606103897095,
      "learning_rate": 0.0005371007227196184,
      "loss": 3.1779,
      "step": 48363
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2548003196716309,
      "learning_rate": 0.000537098216516543,
      "loss": 3.0523,
      "step": 48364
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.728630542755127,
      "learning_rate": 0.0005370957102693867,
      "loss": 3.1046,
      "step": 48365
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5228450298309326,
      "learning_rate": 0.0005370932039781498,
      "loss": 3.1683,
      "step": 48366
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.563795566558838,
      "learning_rate": 0.0005370906976428326,
      "loss": 2.9287,
      "step": 48367
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4562889337539673,
      "learning_rate": 0.0005370881912634359,
      "loss": 3.1784,
      "step": 48368
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4603681564331055,
      "learning_rate": 0.0005370856848399601,
      "loss": 2.9234,
      "step": 48369
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5861890316009521,
      "learning_rate": 0.0005370831783724054,
      "loss": 3.1215,
      "step": 48370
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.520873785018921,
      "learning_rate": 0.0005370806718607727,
      "loss": 2.8937,
      "step": 48371
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5774327516555786,
      "learning_rate": 0.0005370781653050621,
      "loss": 2.9831,
      "step": 48372
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4063966274261475,
      "learning_rate": 0.0005370756587052742,
      "loss": 3.3899,
      "step": 48373
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5082366466522217,
      "learning_rate": 0.0005370731520614095,
      "loss": 3.3213,
      "step": 48374
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6872148513793945,
      "learning_rate": 0.0005370706453734684,
      "loss": 3.0918,
      "step": 48375
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.339007019996643,
      "learning_rate": 0.0005370681386414513,
      "loss": 3.1095,
      "step": 48376
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.567435622215271,
      "learning_rate": 0.0005370656318653588,
      "loss": 3.0159,
      "step": 48377
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.643811583518982,
      "learning_rate": 0.0005370631250451915,
      "loss": 3.1636,
      "step": 48378
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5580053329467773,
      "learning_rate": 0.0005370606181809495,
      "loss": 2.9077,
      "step": 48379
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.1987749338150024,
      "learning_rate": 0.0005370581112726336,
      "loss": 3.1016,
      "step": 48380
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6457511186599731,
      "learning_rate": 0.000537055604320244,
      "loss": 3.0961,
      "step": 48381
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4041430950164795,
      "learning_rate": 0.0005370530973237813,
      "loss": 2.8171,
      "step": 48382
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3680126667022705,
      "learning_rate": 0.0005370505902832459,
      "loss": 3.1383,
      "step": 48383
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6319999694824219,
      "learning_rate": 0.0005370480831986384,
      "loss": 2.8248,
      "step": 48384
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5456912517547607,
      "learning_rate": 0.0005370455760699592,
      "loss": 2.9504,
      "step": 48385
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5076276063919067,
      "learning_rate": 0.0005370430688972088,
      "loss": 2.9658,
      "step": 48386
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0602355003356934,
      "learning_rate": 0.0005370405616803874,
      "loss": 2.9456,
      "step": 48387
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4586676359176636,
      "learning_rate": 0.0005370380544194959,
      "loss": 3.1068,
      "step": 48388
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.554033637046814,
      "learning_rate": 0.0005370355471145345,
      "loss": 3.0511,
      "step": 48389
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.528923749923706,
      "learning_rate": 0.0005370330397655037,
      "loss": 3.1621,
      "step": 48390
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4149339199066162,
      "learning_rate": 0.0005370305323724041,
      "loss": 3.1558,
      "step": 48391
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.3534607887268066,
      "learning_rate": 0.0005370280249352359,
      "loss": 2.9475,
      "step": 48392
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4775489568710327,
      "learning_rate": 0.0005370255174539998,
      "loss": 2.8394,
      "step": 48393
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6328842639923096,
      "learning_rate": 0.0005370230099286961,
      "loss": 3.2683,
      "step": 48394
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.199692726135254,
      "learning_rate": 0.0005370205023593254,
      "loss": 3.1106,
      "step": 48395
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8903318643569946,
      "learning_rate": 0.0005370179947458882,
      "loss": 3.2519,
      "step": 48396
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.530810832977295,
      "learning_rate": 0.0005370154870883847,
      "loss": 2.9533,
      "step": 48397
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.132267713546753,
      "learning_rate": 0.0005370129793868156,
      "loss": 3.5753,
      "step": 48398
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.4551384449005127,
      "learning_rate": 0.0005370104716411814,
      "loss": 2.7932,
      "step": 48399
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5190855264663696,
      "learning_rate": 0.0005370079638514824,
      "loss": 2.9836,
      "step": 48400
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.841408610343933,
      "learning_rate": 0.0005370054560177191,
      "loss": 3.2925,
      "step": 48401
    },
    {
      "epoch": 0.63,
      "grad_norm": 4.174457550048828,
      "learning_rate": 0.0005370029481398922,
      "loss": 3.0439,
      "step": 48402
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.091092109680176,
      "learning_rate": 0.0005370004402180019,
      "loss": 3.0957,
      "step": 48403
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.55012845993042,
      "learning_rate": 0.0005369979322520486,
      "loss": 2.9707,
      "step": 48404
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.396160840988159,
      "learning_rate": 0.0005369954242420331,
      "loss": 3.1475,
      "step": 48405
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.091855049133301,
      "learning_rate": 0.0005369929161879555,
      "loss": 3.0718,
      "step": 48406
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.7037136554718018,
      "learning_rate": 0.0005369904080898166,
      "loss": 3.2481,
      "step": 48407
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7356072664260864,
      "learning_rate": 0.0005369878999476166,
      "loss": 2.8037,
      "step": 48408
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.244565486907959,
      "learning_rate": 0.0005369853917613561,
      "loss": 3.0275,
      "step": 48409
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8233739137649536,
      "learning_rate": 0.0005369828835310356,
      "loss": 3.1149,
      "step": 48410
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3866539001464844,
      "learning_rate": 0.0005369803752566554,
      "loss": 3.0001,
      "step": 48411
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3953921794891357,
      "learning_rate": 0.0005369778669382162,
      "loss": 3.0338,
      "step": 48412
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0544166564941406,
      "learning_rate": 0.0005369753585757184,
      "loss": 2.9926,
      "step": 48413
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.450320839881897,
      "learning_rate": 0.0005369728501691622,
      "loss": 2.9672,
      "step": 48414
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0258522033691406,
      "learning_rate": 0.0005369703417185483,
      "loss": 2.9811,
      "step": 48415
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.092061996459961,
      "learning_rate": 0.0005369678332238772,
      "loss": 2.8885,
      "step": 48416
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9140090942382812,
      "learning_rate": 0.0005369653246851495,
      "loss": 3.2717,
      "step": 48417
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9094831943511963,
      "learning_rate": 0.0005369628161023653,
      "loss": 3.2557,
      "step": 48418
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.4971065521240234,
      "learning_rate": 0.0005369603074755251,
      "loss": 3.1584,
      "step": 48419
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.8745455741882324,
      "learning_rate": 0.0005369577988046297,
      "loss": 3.0131,
      "step": 48420
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5939074754714966,
      "learning_rate": 0.0005369552900896793,
      "loss": 3.07,
      "step": 48421
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.909698486328125,
      "learning_rate": 0.0005369527813306744,
      "loss": 3.0806,
      "step": 48422
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9786111116409302,
      "learning_rate": 0.0005369502725276156,
      "loss": 3.3553,
      "step": 48423
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.246629476547241,
      "learning_rate": 0.0005369477636805031,
      "loss": 2.8696,
      "step": 48424
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4250906705856323,
      "learning_rate": 0.0005369452547893378,
      "loss": 2.796,
      "step": 48425
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5572963953018188,
      "learning_rate": 0.0005369427458541197,
      "loss": 3.0091,
      "step": 48426
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2954952716827393,
      "learning_rate": 0.0005369402368748496,
      "loss": 3.0538,
      "step": 48427
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.880662441253662,
      "learning_rate": 0.0005369377278515277,
      "loss": 3.0918,
      "step": 48428
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0318212509155273,
      "learning_rate": 0.0005369352187841547,
      "loss": 3.0603,
      "step": 48429
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5110746622085571,
      "learning_rate": 0.000536932709672731,
      "loss": 3.1551,
      "step": 48430
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4876539707183838,
      "learning_rate": 0.000536930200517257,
      "loss": 3.2046,
      "step": 48431
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.4706268310546875,
      "learning_rate": 0.0005369276913177331,
      "loss": 2.868,
      "step": 48432
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9035574197769165,
      "learning_rate": 0.00053692518207416,
      "loss": 2.717,
      "step": 48433
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.756351113319397,
      "learning_rate": 0.000536922672786538,
      "loss": 2.6918,
      "step": 48434
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.071916103363037,
      "learning_rate": 0.0005369201634548676,
      "loss": 2.9785,
      "step": 48435
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.957063913345337,
      "learning_rate": 0.0005369176540791492,
      "loss": 2.9103,
      "step": 48436
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.4502112865448,
      "learning_rate": 0.0005369151446593834,
      "loss": 2.9705,
      "step": 48437
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.4458987712860107,
      "learning_rate": 0.0005369126351955706,
      "loss": 2.9491,
      "step": 48438
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.468145728111267,
      "learning_rate": 0.0005369101256877114,
      "loss": 3.0857,
      "step": 48439
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6867754459381104,
      "learning_rate": 0.0005369076161358059,
      "loss": 2.6106,
      "step": 48440
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.7249324321746826,
      "learning_rate": 0.0005369051065398549,
      "loss": 3.2506,
      "step": 48441
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2120580673217773,
      "learning_rate": 0.0005369025968998589,
      "loss": 3.2924,
      "step": 48442
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7007131576538086,
      "learning_rate": 0.000536900087215818,
      "loss": 2.9938,
      "step": 48443
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8740133047103882,
      "learning_rate": 0.000536897577487733,
      "loss": 3.1516,
      "step": 48444
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8591477870941162,
      "learning_rate": 0.0005368950677156043,
      "loss": 3.016,
      "step": 48445
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.008587598800659,
      "learning_rate": 0.0005368925578994323,
      "loss": 2.9269,
      "step": 48446
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4898762702941895,
      "learning_rate": 0.0005368900480392176,
      "loss": 2.8562,
      "step": 48447
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.188318967819214,
      "learning_rate": 0.0005368875381349605,
      "loss": 3.0022,
      "step": 48448
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6945067644119263,
      "learning_rate": 0.0005368850281866615,
      "loss": 2.9031,
      "step": 48449
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.5054266452789307,
      "learning_rate": 0.0005368825181943211,
      "loss": 2.956,
      "step": 48450
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.566524863243103,
      "learning_rate": 0.0005368800081579398,
      "loss": 3.204,
      "step": 48451
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.376737356185913,
      "learning_rate": 0.0005368774980775181,
      "loss": 2.9552,
      "step": 48452
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5823073387145996,
      "learning_rate": 0.0005368749879530563,
      "loss": 3.1102,
      "step": 48453
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.522730827331543,
      "learning_rate": 0.0005368724777845551,
      "loss": 3.1803,
      "step": 48454
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4615743160247803,
      "learning_rate": 0.0005368699675720146,
      "loss": 3.056,
      "step": 48455
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3088992834091187,
      "learning_rate": 0.0005368674573154356,
      "loss": 2.9764,
      "step": 48456
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.414845585823059,
      "learning_rate": 0.0005368649470148185,
      "loss": 2.9181,
      "step": 48457
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.540262222290039,
      "learning_rate": 0.0005368624366701638,
      "loss": 2.8985,
      "step": 48458
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5868711471557617,
      "learning_rate": 0.0005368599262814719,
      "loss": 2.9299,
      "step": 48459
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3770304918289185,
      "learning_rate": 0.0005368574158487431,
      "loss": 2.9723,
      "step": 48460
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4627653360366821,
      "learning_rate": 0.0005368549053719783,
      "loss": 2.8533,
      "step": 48461
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4727123975753784,
      "learning_rate": 0.0005368523948511775,
      "loss": 3.1972,
      "step": 48462
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.573296070098877,
      "learning_rate": 0.0005368498842863414,
      "loss": 2.6675,
      "step": 48463
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4640179872512817,
      "learning_rate": 0.0005368473736774705,
      "loss": 3.1767,
      "step": 48464
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4463270902633667,
      "learning_rate": 0.0005368448630245652,
      "loss": 3.1405,
      "step": 48465
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.627042531967163,
      "learning_rate": 0.0005368423523276259,
      "loss": 3.0024,
      "step": 48466
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.419267177581787,
      "learning_rate": 0.0005368398415866531,
      "loss": 2.8799,
      "step": 48467
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5441831350326538,
      "learning_rate": 0.0005368373308016473,
      "loss": 3.1505,
      "step": 48468
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5263636112213135,
      "learning_rate": 0.000536834819972609,
      "loss": 2.9868,
      "step": 48469
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7700083255767822,
      "learning_rate": 0.0005368323090995387,
      "loss": 3.1375,
      "step": 48470
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9588037729263306,
      "learning_rate": 0.0005368297981824369,
      "loss": 3.0742,
      "step": 48471
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6940524578094482,
      "learning_rate": 0.0005368272872213038,
      "loss": 2.998,
      "step": 48472
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.747450828552246,
      "learning_rate": 0.0005368247762161401,
      "loss": 3.1156,
      "step": 48473
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7449069023132324,
      "learning_rate": 0.0005368222651669461,
      "loss": 3.1205,
      "step": 48474
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7189627885818481,
      "learning_rate": 0.0005368197540737225,
      "loss": 3.2115,
      "step": 48475
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4545552730560303,
      "learning_rate": 0.0005368172429364695,
      "loss": 2.7197,
      "step": 48476
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.710649847984314,
      "learning_rate": 0.0005368147317551878,
      "loss": 3.1185,
      "step": 48477
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.876042127609253,
      "learning_rate": 0.0005368122205298779,
      "loss": 3.1861,
      "step": 48478
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7917550802230835,
      "learning_rate": 0.0005368097092605399,
      "loss": 3.0683,
      "step": 48479
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.099421501159668,
      "learning_rate": 0.0005368071979471746,
      "loss": 3.1817,
      "step": 48480
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5229328870773315,
      "learning_rate": 0.0005368046865897824,
      "loss": 3.0245,
      "step": 48481
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.5100321769714355,
      "learning_rate": 0.0005368021751883638,
      "loss": 3.0575,
      "step": 48482
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.567387342453003,
      "learning_rate": 0.0005367996637429192,
      "loss": 2.8899,
      "step": 48483
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.634660005569458,
      "learning_rate": 0.0005367971522534489,
      "loss": 2.9063,
      "step": 48484
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6682199239730835,
      "learning_rate": 0.0005367946407199536,
      "loss": 2.9134,
      "step": 48485
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.8440980911254883,
      "learning_rate": 0.0005367921291424339,
      "loss": 2.9767,
      "step": 48486
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.777697205543518,
      "learning_rate": 0.0005367896175208898,
      "loss": 3.0578,
      "step": 48487
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.906359314918518,
      "learning_rate": 0.0005367871058553222,
      "loss": 2.9569,
      "step": 48488
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3251088857650757,
      "learning_rate": 0.0005367845941457313,
      "loss": 3.1069,
      "step": 48489
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4551315307617188,
      "learning_rate": 0.0005367820823921178,
      "loss": 2.9972,
      "step": 48490
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6195560693740845,
      "learning_rate": 0.000536779570594482,
      "loss": 2.963,
      "step": 48491
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9491933584213257,
      "learning_rate": 0.0005367770587528243,
      "loss": 3.3294,
      "step": 48492
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4454301595687866,
      "learning_rate": 0.0005367745468671454,
      "loss": 2.8185,
      "step": 48493
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9980931282043457,
      "learning_rate": 0.0005367720349374457,
      "loss": 2.9825,
      "step": 48494
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.457937717437744,
      "learning_rate": 0.0005367695229637255,
      "loss": 2.9285,
      "step": 48495
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8512420654296875,
      "learning_rate": 0.0005367670109459854,
      "loss": 3.0105,
      "step": 48496
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5565094947814941,
      "learning_rate": 0.0005367644988842258,
      "loss": 2.8453,
      "step": 48497
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.078822135925293,
      "learning_rate": 0.0005367619867784472,
      "loss": 3.228,
      "step": 48498
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9096747636795044,
      "learning_rate": 0.0005367594746286501,
      "loss": 3.16,
      "step": 48499
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8730576038360596,
      "learning_rate": 0.000536756962434835,
      "loss": 3.0765,
      "step": 48500
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7346817255020142,
      "learning_rate": 0.0005367544501970023,
      "loss": 3.2747,
      "step": 48501
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8405472040176392,
      "learning_rate": 0.0005367519379151525,
      "loss": 3.1965,
      "step": 48502
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.771759033203125,
      "learning_rate": 0.000536749425589286,
      "loss": 3.2747,
      "step": 48503
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7605969905853271,
      "learning_rate": 0.0005367469132194034,
      "loss": 3.0693,
      "step": 48504
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.55220627784729,
      "learning_rate": 0.0005367444008055049,
      "loss": 3.0315,
      "step": 48505
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7842813730239868,
      "learning_rate": 0.0005367418883475912,
      "loss": 3.0562,
      "step": 48506
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.729423999786377,
      "learning_rate": 0.0005367393758456628,
      "loss": 3.0492,
      "step": 48507
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4114528894424438,
      "learning_rate": 0.0005367368632997201,
      "loss": 2.9298,
      "step": 48508
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6535900831222534,
      "learning_rate": 0.0005367343507097634,
      "loss": 2.9757,
      "step": 48509
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.334380865097046,
      "learning_rate": 0.0005367318380757935,
      "loss": 2.9964,
      "step": 48510
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6777838468551636,
      "learning_rate": 0.0005367293253978105,
      "loss": 2.9363,
      "step": 48511
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7820004224777222,
      "learning_rate": 0.0005367268126758152,
      "loss": 3.1435,
      "step": 48512
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5912481546401978,
      "learning_rate": 0.0005367242999098079,
      "loss": 2.8364,
      "step": 48513
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4817379713058472,
      "learning_rate": 0.000536721787099789,
      "loss": 3.0858,
      "step": 48514
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.4537534713745117,
      "learning_rate": 0.000536719274245759,
      "loss": 2.9903,
      "step": 48515
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2229504585266113,
      "learning_rate": 0.0005367167613477186,
      "loss": 3.1163,
      "step": 48516
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4539626836776733,
      "learning_rate": 0.000536714248405668,
      "loss": 2.6126,
      "step": 48517
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.130429983139038,
      "learning_rate": 0.0005367117354196076,
      "loss": 2.847,
      "step": 48518
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9859907627105713,
      "learning_rate": 0.0005367092223895383,
      "loss": 3.0388,
      "step": 48519
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6412147283554077,
      "learning_rate": 0.0005367067093154602,
      "loss": 3.0485,
      "step": 48520
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3699924945831299,
      "learning_rate": 0.0005367041961973737,
      "loss": 3.0894,
      "step": 48521
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0197646617889404,
      "learning_rate": 0.0005367016830352796,
      "loss": 3.2471,
      "step": 48522
    },
    {
      "epoch": 0.63,
      "grad_norm": 4.532883644104004,
      "learning_rate": 0.000536699169829178,
      "loss": 3.0368,
      "step": 48523
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2494101524353027,
      "learning_rate": 0.0005366966565790698,
      "loss": 3.0416,
      "step": 48524
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4697526693344116,
      "learning_rate": 0.0005366941432849551,
      "loss": 3.0943,
      "step": 48525
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2983548641204834,
      "learning_rate": 0.0005366916299468344,
      "loss": 3.0262,
      "step": 48526
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.132030487060547,
      "learning_rate": 0.0005366891165647083,
      "loss": 2.9593,
      "step": 48527
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4317114353179932,
      "learning_rate": 0.0005366866031385774,
      "loss": 3.0252,
      "step": 48528
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6723068952560425,
      "learning_rate": 0.0005366840896684418,
      "loss": 2.8977,
      "step": 48529
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.269369602203369,
      "learning_rate": 0.0005366815761543022,
      "loss": 3.0775,
      "step": 48530
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7660815715789795,
      "learning_rate": 0.000536679062596159,
      "loss": 2.8643,
      "step": 48531
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8868308067321777,
      "learning_rate": 0.0005366765489940129,
      "loss": 3.1785,
      "step": 48532
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.219115972518921,
      "learning_rate": 0.000536674035347864,
      "loss": 3.2091,
      "step": 48533
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5112966299057007,
      "learning_rate": 0.0005366715216577129,
      "loss": 3.22,
      "step": 48534
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.810817837715149,
      "learning_rate": 0.0005366690079235602,
      "loss": 2.967,
      "step": 48535
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5496426820755005,
      "learning_rate": 0.0005366664941454061,
      "loss": 2.8571,
      "step": 48536
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5027118921279907,
      "learning_rate": 0.0005366639803232514,
      "loss": 3.1541,
      "step": 48537
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8865091800689697,
      "learning_rate": 0.0005366614664570963,
      "loss": 2.967,
      "step": 48538
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6970075368881226,
      "learning_rate": 0.0005366589525469414,
      "loss": 2.8672,
      "step": 48539
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5076990127563477,
      "learning_rate": 0.0005366564385927871,
      "loss": 3.3893,
      "step": 48540
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6767528057098389,
      "learning_rate": 0.0005366539245946339,
      "loss": 3.2009,
      "step": 48541
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0500247478485107,
      "learning_rate": 0.0005366514105524824,
      "loss": 2.9496,
      "step": 48542
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.563755989074707,
      "learning_rate": 0.0005366488964663328,
      "loss": 3.0007,
      "step": 48543
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.267895460128784,
      "learning_rate": 0.0005366463823361857,
      "loss": 2.7967,
      "step": 48544
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5392922163009644,
      "learning_rate": 0.0005366438681620417,
      "loss": 2.9234,
      "step": 48545
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.852715015411377,
      "learning_rate": 0.000536641353943901,
      "loss": 3.2865,
      "step": 48546
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6778424978256226,
      "learning_rate": 0.0005366388396817642,
      "loss": 3.086,
      "step": 48547
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.4834232330322266,
      "learning_rate": 0.0005366363253756319,
      "loss": 2.9219,
      "step": 48548
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.429760217666626,
      "learning_rate": 0.0005366338110255044,
      "loss": 2.8921,
      "step": 48549
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.069586753845215,
      "learning_rate": 0.0005366312966313821,
      "loss": 3.0832,
      "step": 48550
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.617777109146118,
      "learning_rate": 0.0005366287821932656,
      "loss": 2.8461,
      "step": 48551
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.605938673019409,
      "learning_rate": 0.0005366262677111554,
      "loss": 3.1312,
      "step": 48552
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0548739433288574,
      "learning_rate": 0.0005366237531850519,
      "loss": 3.0514,
      "step": 48553
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4528475999832153,
      "learning_rate": 0.0005366212386149555,
      "loss": 3.144,
      "step": 48554
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.3559913635253906,
      "learning_rate": 0.000536618724000867,
      "loss": 3.1692,
      "step": 48555
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.756000518798828,
      "learning_rate": 0.0005366162093427863,
      "loss": 3.1488,
      "step": 48556
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.214702606201172,
      "learning_rate": 0.0005366136946407143,
      "loss": 3.0106,
      "step": 48557
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8925833702087402,
      "learning_rate": 0.0005366111798946514,
      "loss": 3.1574,
      "step": 48558
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.834191083908081,
      "learning_rate": 0.000536608665104598,
      "loss": 3.0571,
      "step": 48559
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.4282827377319336,
      "learning_rate": 0.0005366061502705546,
      "loss": 3.0713,
      "step": 48560
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6140084266662598,
      "learning_rate": 0.0005366036353925216,
      "loss": 3.1102,
      "step": 48561
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.1859052181243896,
      "learning_rate": 0.0005366011204704995,
      "loss": 3.116,
      "step": 48562
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.5886754989624023,
      "learning_rate": 0.0005365986055044888,
      "loss": 3.0092,
      "step": 48563
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.7578036785125732,
      "learning_rate": 0.00053659609049449,
      "loss": 3.2459,
      "step": 48564
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7199147939682007,
      "learning_rate": 0.0005365935754405034,
      "loss": 3.1748,
      "step": 48565
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.893970012664795,
      "learning_rate": 0.0005365910603425297,
      "loss": 2.9615,
      "step": 48566
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0233428478240967,
      "learning_rate": 0.0005365885452005693,
      "loss": 3.1921,
      "step": 48567
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.288270950317383,
      "learning_rate": 0.0005365860300146225,
      "loss": 3.0804,
      "step": 48568
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8648593425750732,
      "learning_rate": 0.00053658351478469,
      "loss": 3.1231,
      "step": 48569
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.395843505859375,
      "learning_rate": 0.000536580999510772,
      "loss": 3.0871,
      "step": 48570
    },
    {
      "epoch": 0.63,
      "grad_norm": 4.063288688659668,
      "learning_rate": 0.0005365784841928693,
      "loss": 3.0779,
      "step": 48571
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.162442445755005,
      "learning_rate": 0.0005365759688309822,
      "loss": 3.0173,
      "step": 48572
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.954283356666565,
      "learning_rate": 0.000536573453425111,
      "loss": 3.1426,
      "step": 48573
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5576207637786865,
      "learning_rate": 0.0005365709379752565,
      "loss": 3.0036,
      "step": 48574
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0288360118865967,
      "learning_rate": 0.0005365684224814188,
      "loss": 3.3375,
      "step": 48575
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.1410939693450928,
      "learning_rate": 0.0005365659069435988,
      "loss": 3.014,
      "step": 48576
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.7321617603302,
      "learning_rate": 0.0005365633913617966,
      "loss": 3.0489,
      "step": 48577
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4115419387817383,
      "learning_rate": 0.0005365608757360127,
      "loss": 3.1824,
      "step": 48578
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.115586996078491,
      "learning_rate": 0.0005365583600662479,
      "loss": 2.9516,
      "step": 48579
    },
    {
      "epoch": 0.63,
      "grad_norm": 4.191394805908203,
      "learning_rate": 0.0005365558443525023,
      "loss": 2.7899,
      "step": 48580
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.9404890537261963,
      "learning_rate": 0.0005365533285947764,
      "loss": 2.9885,
      "step": 48581
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4141871929168701,
      "learning_rate": 0.000536550812793071,
      "loss": 3.1039,
      "step": 48582
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.394986152648926,
      "learning_rate": 0.0005365482969473862,
      "loss": 2.8037,
      "step": 48583
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.3733692169189453,
      "learning_rate": 0.0005365457810577227,
      "loss": 3.119,
      "step": 48584
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.651267647743225,
      "learning_rate": 0.0005365432651240807,
      "loss": 3.1362,
      "step": 48585
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7114744186401367,
      "learning_rate": 0.000536540749146461,
      "loss": 3.0914,
      "step": 48586
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.1208808422088623,
      "learning_rate": 0.0005365382331248638,
      "loss": 3.3652,
      "step": 48587
    },
    {
      "epoch": 0.63,
      "grad_norm": 3.5246291160583496,
      "learning_rate": 0.0005365357170592898,
      "loss": 2.8784,
      "step": 48588
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9168728590011597,
      "learning_rate": 0.0005365332009497393,
      "loss": 2.9169,
      "step": 48589
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2010648250579834,
      "learning_rate": 0.0005365306847962127,
      "loss": 2.9439,
      "step": 48590
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9887443780899048,
      "learning_rate": 0.0005365281685987108,
      "loss": 3.3294,
      "step": 48591
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.034855604171753,
      "learning_rate": 0.0005365256523572337,
      "loss": 3.0552,
      "step": 48592
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4474503993988037,
      "learning_rate": 0.000536523136071782,
      "loss": 3.043,
      "step": 48593
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9263049364089966,
      "learning_rate": 0.0005365206197423562,
      "loss": 2.9783,
      "step": 48594
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5369694232940674,
      "learning_rate": 0.0005365181033689569,
      "loss": 3.1928,
      "step": 48595
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0680582523345947,
      "learning_rate": 0.0005365155869515842,
      "loss": 3.0423,
      "step": 48596
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0017285346984863,
      "learning_rate": 0.0005365130704902389,
      "loss": 3.2291,
      "step": 48597
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5170592069625854,
      "learning_rate": 0.0005365105539849213,
      "loss": 3.1355,
      "step": 48598
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7359750270843506,
      "learning_rate": 0.000536508037435632,
      "loss": 2.8991,
      "step": 48599
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0585317611694336,
      "learning_rate": 0.0005365055208423713,
      "loss": 3.0151,
      "step": 48600
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4472190141677856,
      "learning_rate": 0.0005365030042051398,
      "loss": 3.2022,
      "step": 48601
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.389626383781433,
      "learning_rate": 0.0005365004875239381,
      "loss": 3.0242,
      "step": 48602
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5778776407241821,
      "learning_rate": 0.0005364979707987663,
      "loss": 3.1098,
      "step": 48603
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4563279151916504,
      "learning_rate": 0.0005364954540296251,
      "loss": 3.1229,
      "step": 48604
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4108598232269287,
      "learning_rate": 0.0005364929372165149,
      "loss": 2.8972,
      "step": 48605
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7638412714004517,
      "learning_rate": 0.0005364904203594362,
      "loss": 3.0075,
      "step": 48606
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0844953060150146,
      "learning_rate": 0.0005364879034583895,
      "loss": 3.1075,
      "step": 48607
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.850401520729065,
      "learning_rate": 0.0005364853865133752,
      "loss": 2.9363,
      "step": 48608
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.021392583847046,
      "learning_rate": 0.0005364828695243938,
      "loss": 3.2531,
      "step": 48609
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.574035882949829,
      "learning_rate": 0.000536480352491446,
      "loss": 2.8315,
      "step": 48610
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.7114858627319336,
      "learning_rate": 0.0005364778354145317,
      "loss": 2.9579,
      "step": 48611
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4248340129852295,
      "learning_rate": 0.0005364753182936519,
      "loss": 3.052,
      "step": 48612
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8703758716583252,
      "learning_rate": 0.0005364728011288068,
      "loss": 3.0849,
      "step": 48613
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5729025602340698,
      "learning_rate": 0.0005364702839199969,
      "loss": 2.9565,
      "step": 48614
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4836316108703613,
      "learning_rate": 0.0005364677666672229,
      "loss": 3.1597,
      "step": 48615
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6597857475280762,
      "learning_rate": 0.000536465249370485,
      "loss": 2.8765,
      "step": 48616
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5173178911209106,
      "learning_rate": 0.0005364627320297836,
      "loss": 3.0414,
      "step": 48617
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0498554706573486,
      "learning_rate": 0.0005364602146451194,
      "loss": 3.0237,
      "step": 48618
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5420410633087158,
      "learning_rate": 0.0005364576972164928,
      "loss": 3.2363,
      "step": 48619
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.091219902038574,
      "learning_rate": 0.0005364551797439042,
      "loss": 3.1558,
      "step": 48620
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.584482192993164,
      "learning_rate": 0.0005364526622273542,
      "loss": 2.799,
      "step": 48621
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.387607216835022,
      "learning_rate": 0.0005364501446668432,
      "loss": 3.0641,
      "step": 48622
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.4150538444519043,
      "learning_rate": 0.0005364476270623717,
      "loss": 2.928,
      "step": 48623
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6438716650009155,
      "learning_rate": 0.00053644510941394,
      "loss": 3.1407,
      "step": 48624
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5954554080963135,
      "learning_rate": 0.0005364425917215486,
      "loss": 3.0786,
      "step": 48625
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.818196415901184,
      "learning_rate": 0.0005364400739851983,
      "loss": 2.9817,
      "step": 48626
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2841708660125732,
      "learning_rate": 0.0005364375562048892,
      "loss": 3.0717,
      "step": 48627
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4676494598388672,
      "learning_rate": 0.0005364350383806219,
      "loss": 2.9749,
      "step": 48628
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.4438445568084717,
      "learning_rate": 0.0005364325205123968,
      "loss": 3.1185,
      "step": 48629
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.4885706901550293,
      "learning_rate": 0.0005364300026002145,
      "loss": 3.1223,
      "step": 48630
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5570794343948364,
      "learning_rate": 0.0005364274846440754,
      "loss": 3.1132,
      "step": 48631
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8118584156036377,
      "learning_rate": 0.00053642496664398,
      "loss": 3.2102,
      "step": 48632
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5381324291229248,
      "learning_rate": 0.0005364224485999287,
      "loss": 3.1959,
      "step": 48633
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3373712301254272,
      "learning_rate": 0.0005364199305119221,
      "loss": 3.1293,
      "step": 48634
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3148521184921265,
      "learning_rate": 0.0005364174123799603,
      "loss": 3.0566,
      "step": 48635
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5055537223815918,
      "learning_rate": 0.0005364148942040443,
      "loss": 2.7944,
      "step": 48636
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4639931917190552,
      "learning_rate": 0.0005364123759841743,
      "loss": 2.9712,
      "step": 48637
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4386624097824097,
      "learning_rate": 0.0005364098577203508,
      "loss": 2.8883,
      "step": 48638
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7534410953521729,
      "learning_rate": 0.000536407339412574,
      "loss": 3.098,
      "step": 48639
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.59492027759552,
      "learning_rate": 0.0005364048210608447,
      "loss": 3.0082,
      "step": 48640
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6451990604400635,
      "learning_rate": 0.0005364023026651634,
      "loss": 3.2328,
      "step": 48641
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.580286979675293,
      "learning_rate": 0.0005363997842255304,
      "loss": 3.0856,
      "step": 48642
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5043045282363892,
      "learning_rate": 0.0005363972657419462,
      "loss": 2.9535,
      "step": 48643
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.697393536567688,
      "learning_rate": 0.0005363947472144113,
      "loss": 3.0386,
      "step": 48644
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.745208501815796,
      "learning_rate": 0.0005363922286429262,
      "loss": 3.2778,
      "step": 48645
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.828450322151184,
      "learning_rate": 0.0005363897100274913,
      "loss": 3.0319,
      "step": 48646
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0800719261169434,
      "learning_rate": 0.0005363871913681071,
      "loss": 3.0909,
      "step": 48647
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0542619228363037,
      "learning_rate": 0.0005363846726647739,
      "loss": 3.0911,
      "step": 48648
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.022915840148926,
      "learning_rate": 0.0005363821539174925,
      "loss": 2.7912,
      "step": 48649
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4253613948822021,
      "learning_rate": 0.0005363796351262631,
      "loss": 3.3001,
      "step": 48650
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.422248601913452,
      "learning_rate": 0.0005363771162910863,
      "loss": 2.7833,
      "step": 48651
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.7691683769226074,
      "learning_rate": 0.0005363745974119625,
      "loss": 2.9633,
      "step": 48652
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.436295747756958,
      "learning_rate": 0.0005363720784888923,
      "loss": 2.9901,
      "step": 48653
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2327871322631836,
      "learning_rate": 0.000536369559521876,
      "loss": 3.223,
      "step": 48654
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0803940296173096,
      "learning_rate": 0.0005363670405109141,
      "loss": 3.2131,
      "step": 48655
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7781625986099243,
      "learning_rate": 0.0005363645214560073,
      "loss": 2.9062,
      "step": 48656
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7917641401290894,
      "learning_rate": 0.0005363620023571557,
      "loss": 3.0988,
      "step": 48657
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.491161584854126,
      "learning_rate": 0.0005363594832143599,
      "loss": 3.0805,
      "step": 48658
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7166376113891602,
      "learning_rate": 0.0005363569640276205,
      "loss": 2.996,
      "step": 48659
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4258735179901123,
      "learning_rate": 0.0005363544447969378,
      "loss": 3.0628,
      "step": 48660
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.591206431388855,
      "learning_rate": 0.0005363519255223125,
      "loss": 3.0883,
      "step": 48661
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.657566785812378,
      "learning_rate": 0.0005363494062037448,
      "loss": 2.9012,
      "step": 48662
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.816810965538025,
      "learning_rate": 0.0005363468868412354,
      "loss": 3.0394,
      "step": 48663
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6016899347305298,
      "learning_rate": 0.0005363443674347844,
      "loss": 3.0151,
      "step": 48664
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4989324808120728,
      "learning_rate": 0.0005363418479843927,
      "loss": 2.8327,
      "step": 48665
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7085336446762085,
      "learning_rate": 0.0005363393284900606,
      "loss": 2.8526,
      "step": 48666
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7993097305297852,
      "learning_rate": 0.0005363368089517885,
      "loss": 2.8857,
      "step": 48667
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6382638216018677,
      "learning_rate": 0.000536334289369577,
      "loss": 3.1554,
      "step": 48668
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3917137384414673,
      "learning_rate": 0.0005363317697434265,
      "loss": 3.2268,
      "step": 48669
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5096906423568726,
      "learning_rate": 0.0005363292500733374,
      "loss": 2.9512,
      "step": 48670
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4194166660308838,
      "learning_rate": 0.0005363267303593102,
      "loss": 3.1151,
      "step": 48671
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.6825621128082275,
      "learning_rate": 0.0005363242106013454,
      "loss": 3.3603,
      "step": 48672
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.823228359222412,
      "learning_rate": 0.0005363216907994436,
      "loss": 3.1672,
      "step": 48673
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6119716167449951,
      "learning_rate": 0.000536319170953605,
      "loss": 2.7912,
      "step": 48674
    },
    {
      "epoch": 0.63,
      "grad_norm": 4.34666109085083,
      "learning_rate": 0.0005363166510638302,
      "loss": 3.0159,
      "step": 48675
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2656025886535645,
      "learning_rate": 0.0005363141311301198,
      "loss": 2.9321,
      "step": 48676
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9075332880020142,
      "learning_rate": 0.000536311611152474,
      "loss": 2.9677,
      "step": 48677
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0946223735809326,
      "learning_rate": 0.0005363090911308934,
      "loss": 3.1421,
      "step": 48678
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.695544958114624,
      "learning_rate": 0.0005363065710653786,
      "loss": 2.9907,
      "step": 48679
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5440281629562378,
      "learning_rate": 0.0005363040509559299,
      "loss": 3.0942,
      "step": 48680
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6604294776916504,
      "learning_rate": 0.0005363015308025478,
      "loss": 3.1934,
      "step": 48681
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.313247561454773,
      "learning_rate": 0.0005362990106052329,
      "loss": 2.8918,
      "step": 48682
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.237635374069214,
      "learning_rate": 0.0005362964903639854,
      "loss": 3.193,
      "step": 48683
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.4393930435180664,
      "learning_rate": 0.0005362939700788061,
      "loss": 2.858,
      "step": 48684
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.669337272644043,
      "learning_rate": 0.0005362914497496951,
      "loss": 3.2779,
      "step": 48685
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4901543855667114,
      "learning_rate": 0.0005362889293766531,
      "loss": 3.1304,
      "step": 48686
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6985702514648438,
      "learning_rate": 0.0005362864089596806,
      "loss": 3.0643,
      "step": 48687
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8468351364135742,
      "learning_rate": 0.0005362838884987779,
      "loss": 2.9687,
      "step": 48688
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4091575145721436,
      "learning_rate": 0.0005362813679939456,
      "loss": 3.1534,
      "step": 48689
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.602655291557312,
      "learning_rate": 0.0005362788474451842,
      "loss": 3.2192,
      "step": 48690
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8989914655685425,
      "learning_rate": 0.0005362763268524941,
      "loss": 2.7869,
      "step": 48691
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7788467407226562,
      "learning_rate": 0.0005362738062158757,
      "loss": 2.9948,
      "step": 48692
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9765398502349854,
      "learning_rate": 0.0005362712855353296,
      "loss": 2.9079,
      "step": 48693
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9343384504318237,
      "learning_rate": 0.0005362687648108561,
      "loss": 3.0107,
      "step": 48694
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6476733684539795,
      "learning_rate": 0.0005362662440424559,
      "loss": 3.2844,
      "step": 48695
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.940093755722046,
      "learning_rate": 0.0005362637232301294,
      "loss": 2.9988,
      "step": 48696
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3913084268569946,
      "learning_rate": 0.0005362612023738768,
      "loss": 2.9195,
      "step": 48697
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.771336555480957,
      "learning_rate": 0.0005362586814736988,
      "loss": 2.8094,
      "step": 48698
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.2415518760681152,
      "learning_rate": 0.0005362561605295961,
      "loss": 3.1226,
      "step": 48699
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7932473421096802,
      "learning_rate": 0.0005362536395415687,
      "loss": 3.1826,
      "step": 48700
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.342745304107666,
      "learning_rate": 0.0005362511185096173,
      "loss": 2.9888,
      "step": 48701
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.678725242614746,
      "learning_rate": 0.0005362485974337425,
      "loss": 3.3488,
      "step": 48702
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.582995057106018,
      "learning_rate": 0.0005362460763139444,
      "loss": 2.9746,
      "step": 48703
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.531538963317871,
      "learning_rate": 0.0005362435551502239,
      "loss": 3.0001,
      "step": 48704
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3899507522583008,
      "learning_rate": 0.0005362410339425812,
      "loss": 2.9847,
      "step": 48705
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.1910510063171387,
      "learning_rate": 0.0005362385126910169,
      "loss": 3.0851,
      "step": 48706
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.871325135231018,
      "learning_rate": 0.0005362359913955312,
      "loss": 2.9457,
      "step": 48707
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7644299268722534,
      "learning_rate": 0.000536233470056125,
      "loss": 2.9582,
      "step": 48708
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.3320465087890625,
      "learning_rate": 0.0005362309486727985,
      "loss": 2.9749,
      "step": 48709
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.469092845916748,
      "learning_rate": 0.0005362284272455521,
      "loss": 3.1885,
      "step": 48710
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4368160963058472,
      "learning_rate": 0.0005362259057743864,
      "loss": 2.8021,
      "step": 48711
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.437840461730957,
      "learning_rate": 0.0005362233842593018,
      "loss": 3.0064,
      "step": 48712
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9226174354553223,
      "learning_rate": 0.0005362208627002989,
      "loss": 3.1918,
      "step": 48713
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.264216661453247,
      "learning_rate": 0.0005362183410973781,
      "loss": 2.9532,
      "step": 48714
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9585074186325073,
      "learning_rate": 0.0005362158194505398,
      "loss": 2.9688,
      "step": 48715
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5452920198440552,
      "learning_rate": 0.0005362132977597846,
      "loss": 2.6638,
      "step": 48716
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2500932216644287,
      "learning_rate": 0.0005362107760251127,
      "loss": 3.194,
      "step": 48717
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4283674955368042,
      "learning_rate": 0.0005362082542465249,
      "loss": 3.0381,
      "step": 48718
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0828166007995605,
      "learning_rate": 0.0005362057324240215,
      "loss": 3.0973,
      "step": 48719
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4153025150299072,
      "learning_rate": 0.000536203210557603,
      "loss": 3.0719,
      "step": 48720
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5511025190353394,
      "learning_rate": 0.0005362006886472699,
      "loss": 2.7577,
      "step": 48721
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7409930229187012,
      "learning_rate": 0.0005361981666930227,
      "loss": 3.0599,
      "step": 48722
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.1906471252441406,
      "learning_rate": 0.0005361956446948617,
      "loss": 3.1695,
      "step": 48723
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.470150351524353,
      "learning_rate": 0.0005361931226527874,
      "loss": 2.982,
      "step": 48724
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9991222620010376,
      "learning_rate": 0.0005361906005668004,
      "loss": 3.0311,
      "step": 48725
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4034245014190674,
      "learning_rate": 0.0005361880784369011,
      "loss": 3.1434,
      "step": 48726
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.436969518661499,
      "learning_rate": 0.0005361855562630901,
      "loss": 2.905,
      "step": 48727
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3689918518066406,
      "learning_rate": 0.0005361830340453675,
      "loss": 3.2408,
      "step": 48728
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5360926389694214,
      "learning_rate": 0.0005361805117837342,
      "loss": 2.9677,
      "step": 48729
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7630212306976318,
      "learning_rate": 0.0005361779894781904,
      "loss": 3.2817,
      "step": 48730
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.67282235622406,
      "learning_rate": 0.0005361754671287368,
      "loss": 2.9801,
      "step": 48731
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.174715280532837,
      "learning_rate": 0.0005361729447353737,
      "loss": 2.962,
      "step": 48732
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3294377326965332,
      "learning_rate": 0.0005361704222981014,
      "loss": 3.0926,
      "step": 48733
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.2822203636169434,
      "learning_rate": 0.0005361678998169207,
      "loss": 2.9611,
      "step": 48734
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.877187728881836,
      "learning_rate": 0.0005361653772918319,
      "loss": 3.1578,
      "step": 48735
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.676934838294983,
      "learning_rate": 0.0005361628547228356,
      "loss": 3.0697,
      "step": 48736
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.8992035388946533,
      "learning_rate": 0.000536160332109932,
      "loss": 2.7677,
      "step": 48737
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9105530977249146,
      "learning_rate": 0.0005361578094531217,
      "loss": 3.0235,
      "step": 48738
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3358054161071777,
      "learning_rate": 0.0005361552867524054,
      "loss": 3.0756,
      "step": 48739
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.6498013734817505,
      "learning_rate": 0.0005361527640077833,
      "loss": 2.9329,
      "step": 48740
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.214764356613159,
      "learning_rate": 0.0005361502412192559,
      "loss": 3.2357,
      "step": 48741
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7986595630645752,
      "learning_rate": 0.0005361477183868237,
      "loss": 2.9327,
      "step": 48742
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7717167139053345,
      "learning_rate": 0.0005361451955104873,
      "loss": 2.9695,
      "step": 48743
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.9462153911590576,
      "learning_rate": 0.0005361426725902469,
      "loss": 3.0209,
      "step": 48744
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7232975959777832,
      "learning_rate": 0.0005361401496261033,
      "loss": 2.7741,
      "step": 48745
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5909923315048218,
      "learning_rate": 0.0005361376266180567,
      "loss": 3.0457,
      "step": 48746
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.3323585987091064,
      "learning_rate": 0.0005361351035661076,
      "loss": 2.9346,
      "step": 48747
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.215531349182129,
      "learning_rate": 0.0005361325804702565,
      "loss": 2.878,
      "step": 48748
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5351176261901855,
      "learning_rate": 0.000536130057330504,
      "loss": 3.2462,
      "step": 48749
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5490527153015137,
      "learning_rate": 0.0005361275341468504,
      "loss": 3.2634,
      "step": 48750
    },
    {
      "epoch": 0.63,
      "grad_norm": 4.147159576416016,
      "learning_rate": 0.0005361250109192963,
      "loss": 3.1071,
      "step": 48751
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0131874084472656,
      "learning_rate": 0.000536122487647842,
      "loss": 3.1306,
      "step": 48752
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.0516483783721924,
      "learning_rate": 0.0005361199643324882,
      "loss": 2.8724,
      "step": 48753
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4907820224761963,
      "learning_rate": 0.0005361174409732352,
      "loss": 3.0836,
      "step": 48754
    },
    {
      "epoch": 0.63,
      "grad_norm": 2.446709632873535,
      "learning_rate": 0.0005361149175700836,
      "loss": 3.0628,
      "step": 48755
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5385966300964355,
      "learning_rate": 0.0005361123941230335,
      "loss": 2.9414,
      "step": 48756
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3400269746780396,
      "learning_rate": 0.0005361098706320858,
      "loss": 3.1599,
      "step": 48757
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.7938786745071411,
      "learning_rate": 0.0005361073470972409,
      "loss": 2.8836,
      "step": 48758
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4172263145446777,
      "learning_rate": 0.0005361048235184992,
      "loss": 2.9215,
      "step": 48759
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.503691554069519,
      "learning_rate": 0.0005361022998958611,
      "loss": 3.1561,
      "step": 48760
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4445364475250244,
      "learning_rate": 0.000536099776229327,
      "loss": 2.7486,
      "step": 48761
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.3436169624328613,
      "learning_rate": 0.0005360972525188977,
      "loss": 3.0376,
      "step": 48762
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.4962372779846191,
      "learning_rate": 0.0005360947287645733,
      "loss": 3.5033,
      "step": 48763
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.710638403892517,
      "learning_rate": 0.0005360922049663545,
      "loss": 2.9013,
      "step": 48764
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.5904643535614014,
      "learning_rate": 0.0005360896811242418,
      "loss": 3.0156,
      "step": 48765
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.294431209564209,
      "learning_rate": 0.0005360871572382353,
      "loss": 3.1445,
      "step": 48766
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.68657648563385,
      "learning_rate": 0.000536084633308336,
      "loss": 2.9178,
      "step": 48767
    },
    {
      "epoch": 0.63,
      "grad_norm": 1.583479881286621,
      "learning_rate": 0.000536082109334544,
      "loss": 3.0287,
      "step": 48768
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8052208423614502,
      "learning_rate": 0.0005360795853168599,
      "loss": 3.3351,
      "step": 48769
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5839539766311646,
      "learning_rate": 0.0005360770612552842,
      "loss": 2.9932,
      "step": 48770
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.6249725818634033,
      "learning_rate": 0.0005360745371498173,
      "loss": 2.9213,
      "step": 48771
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3515774011611938,
      "learning_rate": 0.0005360720130004596,
      "loss": 2.9458,
      "step": 48772
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5393636226654053,
      "learning_rate": 0.0005360694888072117,
      "loss": 2.9999,
      "step": 48773
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4594943523406982,
      "learning_rate": 0.0005360669645700741,
      "loss": 3.2843,
      "step": 48774
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5893604755401611,
      "learning_rate": 0.0005360644402890471,
      "loss": 3.0529,
      "step": 48775
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4788413047790527,
      "learning_rate": 0.0005360619159641313,
      "loss": 3.1674,
      "step": 48776
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2814489603042603,
      "learning_rate": 0.0005360593915953272,
      "loss": 3.2568,
      "step": 48777
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7540100812911987,
      "learning_rate": 0.0005360568671826351,
      "loss": 3.1047,
      "step": 48778
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7301193475723267,
      "learning_rate": 0.0005360543427260555,
      "loss": 2.9248,
      "step": 48779
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8659013509750366,
      "learning_rate": 0.0005360518182255891,
      "loss": 3.2198,
      "step": 48780
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.8945751190185547,
      "learning_rate": 0.0005360492936812363,
      "loss": 2.8819,
      "step": 48781
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2116644382476807,
      "learning_rate": 0.0005360467690929973,
      "loss": 2.9661,
      "step": 48782
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.355870246887207,
      "learning_rate": 0.0005360442444608727,
      "loss": 3.1267,
      "step": 48783
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4675674438476562,
      "learning_rate": 0.0005360417197848632,
      "loss": 2.9773,
      "step": 48784
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.453654170036316,
      "learning_rate": 0.0005360391950649689,
      "loss": 2.8824,
      "step": 48785
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9105063676834106,
      "learning_rate": 0.0005360366703011906,
      "loss": 2.9389,
      "step": 48786
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.931541919708252,
      "learning_rate": 0.0005360341454935286,
      "loss": 3.0404,
      "step": 48787
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.334644317626953,
      "learning_rate": 0.0005360316206419834,
      "loss": 3.2201,
      "step": 48788
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.905191421508789,
      "learning_rate": 0.0005360290957465555,
      "loss": 2.9697,
      "step": 48789
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3879613876342773,
      "learning_rate": 0.0005360265708072452,
      "loss": 3.0478,
      "step": 48790
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0086028575897217,
      "learning_rate": 0.0005360240458240532,
      "loss": 3.0357,
      "step": 48791
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6878464221954346,
      "learning_rate": 0.0005360215207969799,
      "loss": 3.0258,
      "step": 48792
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7494168281555176,
      "learning_rate": 0.0005360189957260257,
      "loss": 3.0197,
      "step": 48793
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8529763221740723,
      "learning_rate": 0.0005360164706111911,
      "loss": 2.8512,
      "step": 48794
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7420192956924438,
      "learning_rate": 0.0005360139454524766,
      "loss": 2.9467,
      "step": 48795
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4738726615905762,
      "learning_rate": 0.0005360114202498826,
      "loss": 3.0513,
      "step": 48796
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.495469570159912,
      "learning_rate": 0.0005360088950034097,
      "loss": 2.6864,
      "step": 48797
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.587439775466919,
      "learning_rate": 0.0005360063697130583,
      "loss": 3.2035,
      "step": 48798
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.674565315246582,
      "learning_rate": 0.0005360038443788289,
      "loss": 3.0031,
      "step": 48799
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4637788534164429,
      "learning_rate": 0.0005360013190007218,
      "loss": 3.2643,
      "step": 48800
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8127617835998535,
      "learning_rate": 0.0005359987935787378,
      "loss": 2.9634,
      "step": 48801
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.895237684249878,
      "learning_rate": 0.0005359962681128769,
      "loss": 2.9063,
      "step": 48802
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5848803520202637,
      "learning_rate": 0.00053599374260314,
      "loss": 3.0248,
      "step": 48803
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3866914510726929,
      "learning_rate": 0.0005359912170495274,
      "loss": 3.2218,
      "step": 48804
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2728166580200195,
      "learning_rate": 0.0005359886914520396,
      "loss": 2.9197,
      "step": 48805
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4194635152816772,
      "learning_rate": 0.0005359861658106769,
      "loss": 3.1252,
      "step": 48806
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4954166412353516,
      "learning_rate": 0.00053598364012544,
      "loss": 2.8143,
      "step": 48807
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.121709108352661,
      "learning_rate": 0.0005359811143963295,
      "loss": 3.0023,
      "step": 48808
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7486159801483154,
      "learning_rate": 0.0005359785886233454,
      "loss": 3.172,
      "step": 48809
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3565514087677002,
      "learning_rate": 0.0005359760628064885,
      "loss": 2.9719,
      "step": 48810
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5974040031433105,
      "learning_rate": 0.0005359735369457591,
      "loss": 3.1431,
      "step": 48811
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9322762489318848,
      "learning_rate": 0.0005359710110411579,
      "loss": 3.1531,
      "step": 48812
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.325538158416748,
      "learning_rate": 0.0005359684850926853,
      "loss": 3.1436,
      "step": 48813
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7231119871139526,
      "learning_rate": 0.0005359659591003415,
      "loss": 3.0605,
      "step": 48814
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5386345386505127,
      "learning_rate": 0.0005359634330641273,
      "loss": 2.9357,
      "step": 48815
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7543103694915771,
      "learning_rate": 0.0005359609069840429,
      "loss": 3.268,
      "step": 48816
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.637017846107483,
      "learning_rate": 0.000535958380860089,
      "loss": 3.3656,
      "step": 48817
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4782522916793823,
      "learning_rate": 0.000535955854692266,
      "loss": 2.9031,
      "step": 48818
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.263911008834839,
      "learning_rate": 0.0005359533284805745,
      "loss": 3.0433,
      "step": 48819
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2598114013671875,
      "learning_rate": 0.0005359508022250146,
      "loss": 3.0613,
      "step": 48820
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7026112079620361,
      "learning_rate": 0.0005359482759255871,
      "loss": 2.9385,
      "step": 48821
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.312425136566162,
      "learning_rate": 0.0005359457495822923,
      "loss": 3.1677,
      "step": 48822
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0884387493133545,
      "learning_rate": 0.0005359432231951308,
      "loss": 3.0331,
      "step": 48823
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.907371997833252,
      "learning_rate": 0.000535940696764103,
      "loss": 3.3172,
      "step": 48824
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.3239262104034424,
      "learning_rate": 0.0005359381702892093,
      "loss": 3.0595,
      "step": 48825
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8994468450546265,
      "learning_rate": 0.0005359356437704503,
      "loss": 3.0739,
      "step": 48826
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6019389629364014,
      "learning_rate": 0.0005359331172078264,
      "loss": 2.8903,
      "step": 48827
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.8045294284820557,
      "learning_rate": 0.000535930590601338,
      "loss": 2.951,
      "step": 48828
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9042013883590698,
      "learning_rate": 0.0005359280639509858,
      "loss": 3.1166,
      "step": 48829
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4632699489593506,
      "learning_rate": 0.00053592553725677,
      "loss": 3.1621,
      "step": 48830
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2539541721343994,
      "learning_rate": 0.0005359230105186912,
      "loss": 2.9272,
      "step": 48831
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.656101107597351,
      "learning_rate": 0.0005359204837367499,
      "loss": 2.8739,
      "step": 48832
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0892868041992188,
      "learning_rate": 0.0005359179569109465,
      "loss": 3.2353,
      "step": 48833
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6118202209472656,
      "learning_rate": 0.0005359154300412815,
      "loss": 3.249,
      "step": 48834
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6910427808761597,
      "learning_rate": 0.0005359129031277555,
      "loss": 3.0064,
      "step": 48835
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.361769437789917,
      "learning_rate": 0.0005359103761703686,
      "loss": 3.0202,
      "step": 48836
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.0124120712280273,
      "learning_rate": 0.0005359078491691217,
      "loss": 3.2761,
      "step": 48837
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2485599517822266,
      "learning_rate": 0.0005359053221240149,
      "loss": 3.2771,
      "step": 48838
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5602744817733765,
      "learning_rate": 0.000535902795035049,
      "loss": 3.2044,
      "step": 48839
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0201189517974854,
      "learning_rate": 0.0005359002679022242,
      "loss": 3.0979,
      "step": 48840
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.3079724311828613,
      "learning_rate": 0.0005358977407255413,
      "loss": 2.9854,
      "step": 48841
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5952229499816895,
      "learning_rate": 0.0005358952135050003,
      "loss": 3.243,
      "step": 48842
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6501202583312988,
      "learning_rate": 0.0005358926862406022,
      "loss": 3.1672,
      "step": 48843
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.6789026260375977,
      "learning_rate": 0.0005358901589323469,
      "loss": 3.0145,
      "step": 48844
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.5790812969207764,
      "learning_rate": 0.0005358876315802353,
      "loss": 3.0018,
      "step": 48845
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3877556324005127,
      "learning_rate": 0.0005358851041842677,
      "loss": 2.9888,
      "step": 48846
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6165348291397095,
      "learning_rate": 0.0005358825767444447,
      "loss": 3.3979,
      "step": 48847
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9101914167404175,
      "learning_rate": 0.0005358800492607667,
      "loss": 2.8734,
      "step": 48848
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.83720064163208,
      "learning_rate": 0.000535877521733234,
      "loss": 3.0831,
      "step": 48849
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5761592388153076,
      "learning_rate": 0.0005358749941618473,
      "loss": 2.9212,
      "step": 48850
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.7519733905792236,
      "learning_rate": 0.000535872466546607,
      "loss": 3.056,
      "step": 48851
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.905688762664795,
      "learning_rate": 0.0005358699388875134,
      "loss": 2.9016,
      "step": 48852
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6733860969543457,
      "learning_rate": 0.0005358674111845673,
      "loss": 2.9683,
      "step": 48853
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.385825753211975,
      "learning_rate": 0.000535864883437769,
      "loss": 3.0353,
      "step": 48854
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.569136381149292,
      "learning_rate": 0.000535862355647119,
      "loss": 2.9931,
      "step": 48855
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.62083101272583,
      "learning_rate": 0.0005358598278126176,
      "loss": 3.1288,
      "step": 48856
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.163203001022339,
      "learning_rate": 0.0005358572999342654,
      "loss": 3.0433,
      "step": 48857
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8646737337112427,
      "learning_rate": 0.000535854772012063,
      "loss": 2.9154,
      "step": 48858
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.5797274112701416,
      "learning_rate": 0.0005358522440460107,
      "loss": 2.9696,
      "step": 48859
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.3221795558929443,
      "learning_rate": 0.0005358497160361089,
      "loss": 3.1604,
      "step": 48860
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9724054336547852,
      "learning_rate": 0.0005358471879823583,
      "loss": 2.9033,
      "step": 48861
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8969990015029907,
      "learning_rate": 0.0005358446598847592,
      "loss": 3.3651,
      "step": 48862
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.1919727325439453,
      "learning_rate": 0.0005358421317433122,
      "loss": 3.07,
      "step": 48863
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.592321515083313,
      "learning_rate": 0.0005358396035580177,
      "loss": 2.9912,
      "step": 48864
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.3308868408203125,
      "learning_rate": 0.0005358370753288761,
      "loss": 3.0149,
      "step": 48865
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.69949209690094,
      "learning_rate": 0.000535834547055888,
      "loss": 3.064,
      "step": 48866
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.492668867111206,
      "learning_rate": 0.0005358320187390538,
      "loss": 3.1945,
      "step": 48867
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.263057231903076,
      "learning_rate": 0.0005358294903783739,
      "loss": 3.0993,
      "step": 48868
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.8463597297668457,
      "learning_rate": 0.0005358269619738489,
      "loss": 3.007,
      "step": 48869
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.1436824798583984,
      "learning_rate": 0.0005358244335254791,
      "loss": 2.818,
      "step": 48870
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.756742000579834,
      "learning_rate": 0.0005358219050332653,
      "loss": 2.7665,
      "step": 48871
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8457437753677368,
      "learning_rate": 0.0005358193764972076,
      "loss": 3.2085,
      "step": 48872
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0453176498413086,
      "learning_rate": 0.0005358168479173067,
      "loss": 2.7537,
      "step": 48873
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7931065559387207,
      "learning_rate": 0.000535814319293563,
      "loss": 3.0293,
      "step": 48874
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.127171516418457,
      "learning_rate": 0.0005358117906259769,
      "loss": 2.9554,
      "step": 48875
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.4867725372314453,
      "learning_rate": 0.000535809261914549,
      "loss": 3.267,
      "step": 48876
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.752716302871704,
      "learning_rate": 0.0005358067331592797,
      "loss": 3.2212,
      "step": 48877
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5071468353271484,
      "learning_rate": 0.0005358042043601694,
      "loss": 2.9918,
      "step": 48878
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.183523654937744,
      "learning_rate": 0.0005358016755172187,
      "loss": 3.0327,
      "step": 48879
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.6765220165252686,
      "learning_rate": 0.0005357991466304281,
      "loss": 3.0912,
      "step": 48880
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8947988748550415,
      "learning_rate": 0.0005357966176997977,
      "loss": 3.1646,
      "step": 48881
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8527008295059204,
      "learning_rate": 0.0005357940887253287,
      "loss": 2.8816,
      "step": 48882
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6964713335037231,
      "learning_rate": 0.0005357915597070207,
      "loss": 3.1857,
      "step": 48883
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8920055627822876,
      "learning_rate": 0.0005357890306448749,
      "loss": 2.9402,
      "step": 48884
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.696327805519104,
      "learning_rate": 0.0005357865015388914,
      "loss": 2.9296,
      "step": 48885
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.587044596672058,
      "learning_rate": 0.0005357839723890707,
      "loss": 3.0796,
      "step": 48886
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.647373914718628,
      "learning_rate": 0.0005357814431954132,
      "loss": 3.1377,
      "step": 48887
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.151637077331543,
      "learning_rate": 0.0005357789139579196,
      "loss": 2.8803,
      "step": 48888
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9610404968261719,
      "learning_rate": 0.0005357763846765902,
      "loss": 3.052,
      "step": 48889
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5042346715927124,
      "learning_rate": 0.0005357738553514256,
      "loss": 3.0021,
      "step": 48890
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7832627296447754,
      "learning_rate": 0.0005357713259824262,
      "loss": 3.2033,
      "step": 48891
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0349490642547607,
      "learning_rate": 0.0005357687965695925,
      "loss": 2.9911,
      "step": 48892
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3469758033752441,
      "learning_rate": 0.0005357662671129247,
      "loss": 2.8635,
      "step": 48893
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6776281595230103,
      "learning_rate": 0.0005357637376124237,
      "loss": 3.1697,
      "step": 48894
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0285189151763916,
      "learning_rate": 0.0005357612080680897,
      "loss": 2.8986,
      "step": 48895
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6012628078460693,
      "learning_rate": 0.0005357586784799233,
      "loss": 2.8708,
      "step": 48896
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4972869157791138,
      "learning_rate": 0.0005357561488479248,
      "loss": 3.1222,
      "step": 48897
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.552433967590332,
      "learning_rate": 0.0005357536191720948,
      "loss": 3.0451,
      "step": 48898
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6073472499847412,
      "learning_rate": 0.0005357510894524339,
      "loss": 2.9591,
      "step": 48899
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6186946630477905,
      "learning_rate": 0.0005357485596889423,
      "loss": 3.2256,
      "step": 48900
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.478269100189209,
      "learning_rate": 0.0005357460298816205,
      "loss": 3.005,
      "step": 48901
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6357874870300293,
      "learning_rate": 0.0005357435000304693,
      "loss": 2.9969,
      "step": 48902
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.628147840499878,
      "learning_rate": 0.0005357409701354887,
      "loss": 3.0885,
      "step": 48903
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8477216958999634,
      "learning_rate": 0.0005357384401966796,
      "loss": 3.2941,
      "step": 48904
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.747772455215454,
      "learning_rate": 0.0005357359102140421,
      "loss": 2.9115,
      "step": 48905
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4431965351104736,
      "learning_rate": 0.000535733380187577,
      "loss": 3.0064,
      "step": 48906
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3870538473129272,
      "learning_rate": 0.0005357308501172845,
      "loss": 3.1658,
      "step": 48907
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.516594648361206,
      "learning_rate": 0.0005357283200031652,
      "loss": 3.0683,
      "step": 48908
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6680054664611816,
      "learning_rate": 0.0005357257898452196,
      "loss": 3.1945,
      "step": 48909
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9040076732635498,
      "learning_rate": 0.0005357232596434481,
      "loss": 3.0691,
      "step": 48910
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3746695518493652,
      "learning_rate": 0.0005357207293978513,
      "loss": 3.0402,
      "step": 48911
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4972801208496094,
      "learning_rate": 0.0005357181991084294,
      "loss": 3.305,
      "step": 48912
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5482251644134521,
      "learning_rate": 0.0005357156687751832,
      "loss": 3.1661,
      "step": 48913
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4569591283798218,
      "learning_rate": 0.0005357131383981129,
      "loss": 3.1414,
      "step": 48914
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.1433956623077393,
      "learning_rate": 0.000535710607977219,
      "loss": 3.1097,
      "step": 48915
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.527995228767395,
      "learning_rate": 0.0005357080775125021,
      "loss": 3.1765,
      "step": 48916
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7027482986450195,
      "learning_rate": 0.0005357055470039628,
      "loss": 3.252,
      "step": 48917
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2583239078521729,
      "learning_rate": 0.0005357030164516013,
      "loss": 3.243,
      "step": 48918
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5849609375,
      "learning_rate": 0.000535700485855418,
      "loss": 3.1622,
      "step": 48919
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.044076919555664,
      "learning_rate": 0.0005356979552154136,
      "loss": 2.8325,
      "step": 48920
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5528621673583984,
      "learning_rate": 0.0005356954245315886,
      "loss": 3.1063,
      "step": 48921
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7620058059692383,
      "learning_rate": 0.0005356928938039433,
      "loss": 2.9409,
      "step": 48922
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5898778438568115,
      "learning_rate": 0.0005356903630324782,
      "loss": 2.892,
      "step": 48923
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.617885112762451,
      "learning_rate": 0.0005356878322171939,
      "loss": 3.2492,
      "step": 48924
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.0390727519989014,
      "learning_rate": 0.0005356853013580908,
      "loss": 2.9154,
      "step": 48925
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.209771156311035,
      "learning_rate": 0.0005356827704551693,
      "loss": 3.16,
      "step": 48926
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.4049038887023926,
      "learning_rate": 0.00053568023950843,
      "loss": 3.1505,
      "step": 48927
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9612290859222412,
      "learning_rate": 0.0005356777085178731,
      "loss": 2.8803,
      "step": 48928
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2952643632888794,
      "learning_rate": 0.0005356751774834995,
      "loss": 3.2602,
      "step": 48929
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4878009557724,
      "learning_rate": 0.0005356726464053092,
      "loss": 2.8821,
      "step": 48930
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2878715991973877,
      "learning_rate": 0.0005356701152833031,
      "loss": 3.0736,
      "step": 48931
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.5642075538635254,
      "learning_rate": 0.0005356675841174814,
      "loss": 2.8039,
      "step": 48932
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.052332878112793,
      "learning_rate": 0.0005356650529078446,
      "loss": 3.0449,
      "step": 48933
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7273327112197876,
      "learning_rate": 0.0005356625216543934,
      "loss": 3.2991,
      "step": 48934
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2291162014007568,
      "learning_rate": 0.0005356599903571279,
      "loss": 3.2226,
      "step": 48935
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7731943130493164,
      "learning_rate": 0.0005356574590160488,
      "loss": 2.9062,
      "step": 48936
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.512769341468811,
      "learning_rate": 0.0005356549276311567,
      "loss": 3.1588,
      "step": 48937
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2832722663879395,
      "learning_rate": 0.0005356523962024517,
      "loss": 3.1329,
      "step": 48938
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.515528917312622,
      "learning_rate": 0.0005356498647299345,
      "loss": 3.022,
      "step": 48939
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.534530520439148,
      "learning_rate": 0.0005356473332136056,
      "loss": 2.969,
      "step": 48940
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8471862077713013,
      "learning_rate": 0.0005356448016534653,
      "loss": 3.0248,
      "step": 48941
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7771600484848022,
      "learning_rate": 0.0005356422700495142,
      "loss": 3.0968,
      "step": 48942
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8802428245544434,
      "learning_rate": 0.0005356397384017529,
      "loss": 3.0668,
      "step": 48943
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.4458372592926025,
      "learning_rate": 0.0005356372067101816,
      "loss": 3.1513,
      "step": 48944
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6421972513198853,
      "learning_rate": 0.000535634674974801,
      "loss": 2.9603,
      "step": 48945
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7994050979614258,
      "learning_rate": 0.0005356321431956115,
      "loss": 2.7832,
      "step": 48946
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7823312282562256,
      "learning_rate": 0.0005356296113726134,
      "loss": 2.9211,
      "step": 48947
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5586708784103394,
      "learning_rate": 0.0005356270795058073,
      "loss": 2.8767,
      "step": 48948
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9743921756744385,
      "learning_rate": 0.0005356245475951938,
      "loss": 3.04,
      "step": 48949
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.929869294166565,
      "learning_rate": 0.0005356220156407731,
      "loss": 2.8952,
      "step": 48950
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.550197720527649,
      "learning_rate": 0.0005356194836425458,
      "loss": 2.9813,
      "step": 48951
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6216555833816528,
      "learning_rate": 0.0005356169516005126,
      "loss": 3.0666,
      "step": 48952
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.879501223564148,
      "learning_rate": 0.0005356144195146737,
      "loss": 3.0305,
      "step": 48953
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5657907724380493,
      "learning_rate": 0.0005356118873850295,
      "loss": 2.9038,
      "step": 48954
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.8099639415740967,
      "learning_rate": 0.0005356093552115807,
      "loss": 3.0352,
      "step": 48955
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6369993686676025,
      "learning_rate": 0.0005356068229943276,
      "loss": 3.0523,
      "step": 48956
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4703216552734375,
      "learning_rate": 0.0005356042907332708,
      "loss": 3.1554,
      "step": 48957
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6876837015151978,
      "learning_rate": 0.0005356017584284107,
      "loss": 3.1589,
      "step": 48958
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8217617273330688,
      "learning_rate": 0.0005355992260797478,
      "loss": 3.1345,
      "step": 48959
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.306394100189209,
      "learning_rate": 0.0005355966936872827,
      "loss": 2.8125,
      "step": 48960
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6637072563171387,
      "learning_rate": 0.0005355941612510154,
      "loss": 3.1156,
      "step": 48961
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9034539461135864,
      "learning_rate": 0.000535591628770947,
      "loss": 3.157,
      "step": 48962
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.86588454246521,
      "learning_rate": 0.0005355890962470775,
      "loss": 2.7488,
      "step": 48963
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7675492763519287,
      "learning_rate": 0.0005355865636794076,
      "loss": 2.7868,
      "step": 48964
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4759997129440308,
      "learning_rate": 0.0005355840310679377,
      "loss": 3.1935,
      "step": 48965
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.335722804069519,
      "learning_rate": 0.0005355814984126683,
      "loss": 3.0844,
      "step": 48966
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4543201923370361,
      "learning_rate": 0.0005355789657135998,
      "loss": 3.0487,
      "step": 48967
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3208492994308472,
      "learning_rate": 0.0005355764329707328,
      "loss": 3.0907,
      "step": 48968
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4737627506256104,
      "learning_rate": 0.0005355739001840676,
      "loss": 2.895,
      "step": 48969
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.682512640953064,
      "learning_rate": 0.0005355713673536048,
      "loss": 3.2486,
      "step": 48970
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.456648826599121,
      "learning_rate": 0.0005355688344793448,
      "loss": 2.9078,
      "step": 48971
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6116390228271484,
      "learning_rate": 0.0005355663015612882,
      "loss": 3.0243,
      "step": 48972
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5049175024032593,
      "learning_rate": 0.0005355637685994354,
      "loss": 3.0258,
      "step": 48973
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7607160806655884,
      "learning_rate": 0.0005355612355937867,
      "loss": 2.9212,
      "step": 48974
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4123551845550537,
      "learning_rate": 0.0005355587025443429,
      "loss": 3.0533,
      "step": 48975
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4642714262008667,
      "learning_rate": 0.0005355561694511042,
      "loss": 2.9874,
      "step": 48976
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.424015998840332,
      "learning_rate": 0.0005355536363140711,
      "loss": 3.1011,
      "step": 48977
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5214852094650269,
      "learning_rate": 0.0005355511031332442,
      "loss": 3.2896,
      "step": 48978
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6583439111709595,
      "learning_rate": 0.0005355485699086239,
      "loss": 3.0364,
      "step": 48979
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8749185800552368,
      "learning_rate": 0.0005355460366402107,
      "loss": 2.946,
      "step": 48980
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5297698974609375,
      "learning_rate": 0.000535543503328005,
      "loss": 3.1427,
      "step": 48981
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.896925687789917,
      "learning_rate": 0.0005355409699720074,
      "loss": 2.9484,
      "step": 48982
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.6604909896850586,
      "learning_rate": 0.0005355384365722182,
      "loss": 2.9177,
      "step": 48983
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.958512783050537,
      "learning_rate": 0.0005355359031286379,
      "loss": 3.0969,
      "step": 48984
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6629606485366821,
      "learning_rate": 0.0005355333696412672,
      "loss": 2.9606,
      "step": 48985
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6932919025421143,
      "learning_rate": 0.0005355308361101063,
      "loss": 3.1321,
      "step": 48986
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.4085075855255127,
      "learning_rate": 0.0005355283025351557,
      "loss": 3.0748,
      "step": 48987
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8471355438232422,
      "learning_rate": 0.000535525768916416,
      "loss": 3.1796,
      "step": 48988
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8297314643859863,
      "learning_rate": 0.0005355232352538878,
      "loss": 2.9097,
      "step": 48989
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.057650566101074,
      "learning_rate": 0.0005355207015475711,
      "loss": 3.0838,
      "step": 48990
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.021744966506958,
      "learning_rate": 0.0005355181677974668,
      "loss": 2.8388,
      "step": 48991
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.922853946685791,
      "learning_rate": 0.0005355156340035752,
      "loss": 2.8926,
      "step": 48992
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.822401762008667,
      "learning_rate": 0.0005355131001658968,
      "loss": 3.0389,
      "step": 48993
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.905834436416626,
      "learning_rate": 0.0005355105662844321,
      "loss": 2.9069,
      "step": 48994
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.739211082458496,
      "learning_rate": 0.0005355080323591815,
      "loss": 3.1424,
      "step": 48995
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4526206254959106,
      "learning_rate": 0.0005355054983901455,
      "loss": 3.0012,
      "step": 48996
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.730903148651123,
      "learning_rate": 0.0005355029643773246,
      "loss": 3.048,
      "step": 48997
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.229600191116333,
      "learning_rate": 0.0005355004303207192,
      "loss": 2.8583,
      "step": 48998
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4785363674163818,
      "learning_rate": 0.00053549789622033,
      "loss": 3.2879,
      "step": 48999
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9704071283340454,
      "learning_rate": 0.0005354953620761571,
      "loss": 3.1009,
      "step": 49000
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4621317386627197,
      "learning_rate": 0.0005354928278882013,
      "loss": 2.9945,
      "step": 49001
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3534289598464966,
      "learning_rate": 0.0005354902936564628,
      "loss": 3.2513,
      "step": 49002
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5077272653579712,
      "learning_rate": 0.0005354877593809423,
      "loss": 2.8737,
      "step": 49003
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4673916101455688,
      "learning_rate": 0.0005354852250616403,
      "loss": 3.0334,
      "step": 49004
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4393714666366577,
      "learning_rate": 0.0005354826906985569,
      "loss": 3.2653,
      "step": 49005
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8047120571136475,
      "learning_rate": 0.000535480156291693,
      "loss": 2.9956,
      "step": 49006
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4458893537521362,
      "learning_rate": 0.0005354776218410487,
      "loss": 3.1324,
      "step": 49007
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.720685362815857,
      "learning_rate": 0.0005354750873466249,
      "loss": 2.9638,
      "step": 49008
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6050193309783936,
      "learning_rate": 0.0005354725528084218,
      "loss": 2.8344,
      "step": 49009
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6167420148849487,
      "learning_rate": 0.0005354700182264398,
      "loss": 3.1391,
      "step": 49010
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0013346672058105,
      "learning_rate": 0.0005354674836006795,
      "loss": 3.1518,
      "step": 49011
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9857182502746582,
      "learning_rate": 0.0005354649489311414,
      "loss": 3.2689,
      "step": 49012
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7703280448913574,
      "learning_rate": 0.000535462414217826,
      "loss": 2.882,
      "step": 49013
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9107671976089478,
      "learning_rate": 0.0005354598794607335,
      "loss": 3.15,
      "step": 49014
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.715930700302124,
      "learning_rate": 0.0005354573446598646,
      "loss": 3.0697,
      "step": 49015
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6202149391174316,
      "learning_rate": 0.0005354548098152199,
      "loss": 3.0565,
      "step": 49016
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6441388130187988,
      "learning_rate": 0.0005354522749267995,
      "loss": 3.0515,
      "step": 49017
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.622796654701233,
      "learning_rate": 0.0005354497399946042,
      "loss": 3.0909,
      "step": 49018
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6464720964431763,
      "learning_rate": 0.0005354472050186344,
      "loss": 3.0161,
      "step": 49019
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.502955436706543,
      "learning_rate": 0.0005354446699988904,
      "loss": 2.9484,
      "step": 49020
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5802921056747437,
      "learning_rate": 0.0005354421349353728,
      "loss": 3.0461,
      "step": 49021
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4859787225723267,
      "learning_rate": 0.0005354395998280821,
      "loss": 3.2037,
      "step": 49022
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.378739356994629,
      "learning_rate": 0.0005354370646770188,
      "loss": 3.2891,
      "step": 49023
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.53316593170166,
      "learning_rate": 0.0005354345294821832,
      "loss": 2.7441,
      "step": 49024
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.528024435043335,
      "learning_rate": 0.000535431994243576,
      "loss": 2.9066,
      "step": 49025
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6870564222335815,
      "learning_rate": 0.0005354294589611974,
      "loss": 2.8982,
      "step": 49026
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9152847528457642,
      "learning_rate": 0.0005354269236350481,
      "loss": 3.0593,
      "step": 49027
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0152204036712646,
      "learning_rate": 0.0005354243882651284,
      "loss": 3.0842,
      "step": 49028
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4536799192428589,
      "learning_rate": 0.000535421852851439,
      "loss": 3.0344,
      "step": 49029
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2937569618225098,
      "learning_rate": 0.0005354193173939802,
      "loss": 3.0844,
      "step": 49030
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8852639198303223,
      "learning_rate": 0.0005354167818927524,
      "loss": 2.8378,
      "step": 49031
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.716238260269165,
      "learning_rate": 0.0005354142463477563,
      "loss": 3.1705,
      "step": 49032
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6948442459106445,
      "learning_rate": 0.0005354117107589921,
      "loss": 3.1525,
      "step": 49033
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.4860846996307373,
      "learning_rate": 0.0005354091751264606,
      "loss": 3.142,
      "step": 49034
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.3448641300201416,
      "learning_rate": 0.000535406639450162,
      "loss": 3.0635,
      "step": 49035
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6616907119750977,
      "learning_rate": 0.0005354041037300968,
      "loss": 3.2019,
      "step": 49036
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9218014478683472,
      "learning_rate": 0.0005354015679662656,
      "loss": 3.2342,
      "step": 49037
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.684286594390869,
      "learning_rate": 0.0005353990321586688,
      "loss": 3.2729,
      "step": 49038
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9819910526275635,
      "learning_rate": 0.0005353964963073069,
      "loss": 3.066,
      "step": 49039
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5736745595932007,
      "learning_rate": 0.0005353939604121804,
      "loss": 2.9363,
      "step": 49040
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0812511444091797,
      "learning_rate": 0.0005353914244732897,
      "loss": 2.9829,
      "step": 49041
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.1096558570861816,
      "learning_rate": 0.0005353888884906352,
      "loss": 3.1439,
      "step": 49042
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7270824909210205,
      "learning_rate": 0.0005353863524642174,
      "loss": 3.0881,
      "step": 49043
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5355510711669922,
      "learning_rate": 0.000535383816394037,
      "loss": 3.1104,
      "step": 49044
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.584545135498047,
      "learning_rate": 0.0005353812802800943,
      "loss": 2.8275,
      "step": 49045
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8790861368179321,
      "learning_rate": 0.0005353787441223897,
      "loss": 2.9145,
      "step": 49046
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4144638776779175,
      "learning_rate": 0.0005353762079209238,
      "loss": 2.9994,
      "step": 49047
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.916841983795166,
      "learning_rate": 0.000535373671675697,
      "loss": 3.3146,
      "step": 49048
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.736185073852539,
      "learning_rate": 0.0005353711353867097,
      "loss": 3.1191,
      "step": 49049
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4365932941436768,
      "learning_rate": 0.0005353685990539626,
      "loss": 2.9862,
      "step": 49050
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6988089084625244,
      "learning_rate": 0.0005353660626774559,
      "loss": 3.1615,
      "step": 49051
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.0988893508911133,
      "learning_rate": 0.0005353635262571903,
      "loss": 2.8312,
      "step": 49052
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6172773838043213,
      "learning_rate": 0.0005353609897931662,
      "loss": 2.8071,
      "step": 49053
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4913561344146729,
      "learning_rate": 0.000535358453285384,
      "loss": 3.0378,
      "step": 49054
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.53244686126709,
      "learning_rate": 0.0005353559167338442,
      "loss": 3.06,
      "step": 49055
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2992420196533203,
      "learning_rate": 0.0005353533801385473,
      "loss": 3.311,
      "step": 49056
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5272639989852905,
      "learning_rate": 0.0005353508434994938,
      "loss": 2.8667,
      "step": 49057
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9379650354385376,
      "learning_rate": 0.0005353483068166841,
      "loss": 3.1737,
      "step": 49058
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8211700916290283,
      "learning_rate": 0.0005353457700901187,
      "loss": 2.9797,
      "step": 49059
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4418655633926392,
      "learning_rate": 0.0005353432333197981,
      "loss": 3.1133,
      "step": 49060
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4068207740783691,
      "learning_rate": 0.0005353406965057228,
      "loss": 3.1824,
      "step": 49061
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8879703283309937,
      "learning_rate": 0.0005353381596478932,
      "loss": 2.9145,
      "step": 49062
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6823699474334717,
      "learning_rate": 0.0005353356227463099,
      "loss": 3.0728,
      "step": 49063
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.027433156967163,
      "learning_rate": 0.0005353330858009731,
      "loss": 3.0728,
      "step": 49064
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.65803062915802,
      "learning_rate": 0.0005353305488118835,
      "loss": 2.7327,
      "step": 49065
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8911676406860352,
      "learning_rate": 0.0005353280117790415,
      "loss": 2.7449,
      "step": 49066
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.596840739250183,
      "learning_rate": 0.0005353254747024475,
      "loss": 2.9876,
      "step": 49067
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8803943395614624,
      "learning_rate": 0.0005353229375821021,
      "loss": 3.1164,
      "step": 49068
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.419761896133423,
      "learning_rate": 0.0005353204004180058,
      "loss": 3.3004,
      "step": 49069
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0204830169677734,
      "learning_rate": 0.000535317863210159,
      "loss": 3.046,
      "step": 49070
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5898098945617676,
      "learning_rate": 0.000535315325958562,
      "loss": 2.877,
      "step": 49071
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8855421543121338,
      "learning_rate": 0.0005353127886632155,
      "loss": 2.8775,
      "step": 49072
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.597076416015625,
      "learning_rate": 0.00053531025132412,
      "loss": 3.0256,
      "step": 49073
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4840471744537354,
      "learning_rate": 0.000535307713941276,
      "loss": 3.1858,
      "step": 49074
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7829328775405884,
      "learning_rate": 0.0005353051765146835,
      "loss": 2.957,
      "step": 49075
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.018955945968628,
      "learning_rate": 0.0005353026390443436,
      "loss": 2.9135,
      "step": 49076
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3268147706985474,
      "learning_rate": 0.0005353001015302564,
      "loss": 2.8408,
      "step": 49077
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5828076601028442,
      "learning_rate": 0.0005352975639724225,
      "loss": 2.944,
      "step": 49078
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.228015184402466,
      "learning_rate": 0.0005352950263708423,
      "loss": 3.0353,
      "step": 49079
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7191411256790161,
      "learning_rate": 0.0005352924887255164,
      "loss": 2.9334,
      "step": 49080
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.5207884311676025,
      "learning_rate": 0.000535289951036445,
      "loss": 3.0721,
      "step": 49081
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.8795530796051025,
      "learning_rate": 0.000535287413303629,
      "loss": 3.1075,
      "step": 49082
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.528762102127075,
      "learning_rate": 0.0005352848755270685,
      "loss": 3.0,
      "step": 49083
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5925164222717285,
      "learning_rate": 0.000535282337706764,
      "loss": 3.095,
      "step": 49084
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.8347134590148926,
      "learning_rate": 0.0005352797998427162,
      "loss": 2.9588,
      "step": 49085
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.302342176437378,
      "learning_rate": 0.0005352772619349255,
      "loss": 2.8792,
      "step": 49086
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.205780029296875,
      "learning_rate": 0.0005352747239833922,
      "loss": 2.886,
      "step": 49087
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.279289484024048,
      "learning_rate": 0.000535272185988117,
      "loss": 2.5984,
      "step": 49088
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9677560329437256,
      "learning_rate": 0.0005352696479491002,
      "loss": 3.1721,
      "step": 49089
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0689709186553955,
      "learning_rate": 0.0005352671098663422,
      "loss": 3.0836,
      "step": 49090
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6676685810089111,
      "learning_rate": 0.0005352645717398438,
      "loss": 3.0942,
      "step": 49091
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.1024694442749023,
      "learning_rate": 0.0005352620335696052,
      "loss": 3.174,
      "step": 49092
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0786020755767822,
      "learning_rate": 0.0005352594953556269,
      "loss": 3.142,
      "step": 49093
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6553651094436646,
      "learning_rate": 0.0005352569570979095,
      "loss": 2.8613,
      "step": 49094
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4269064664840698,
      "learning_rate": 0.0005352544187964533,
      "loss": 3.3527,
      "step": 49095
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5947017669677734,
      "learning_rate": 0.0005352518804512589,
      "loss": 3.2336,
      "step": 49096
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5889204740524292,
      "learning_rate": 0.0005352493420623268,
      "loss": 2.9656,
      "step": 49097
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3358980417251587,
      "learning_rate": 0.0005352468036296575,
      "loss": 2.8981,
      "step": 49098
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2195180654525757,
      "learning_rate": 0.0005352442651532512,
      "loss": 2.9177,
      "step": 49099
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9800316095352173,
      "learning_rate": 0.0005352417266331085,
      "loss": 3.0787,
      "step": 49100
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6318607330322266,
      "learning_rate": 0.00053523918806923,
      "loss": 2.9836,
      "step": 49101
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.672896146774292,
      "learning_rate": 0.0005352366494616161,
      "loss": 3.1355,
      "step": 49102
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5022441148757935,
      "learning_rate": 0.0005352341108102673,
      "loss": 3.1385,
      "step": 49103
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8328642845153809,
      "learning_rate": 0.000535231572115184,
      "loss": 3.1942,
      "step": 49104
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.318519115447998,
      "learning_rate": 0.0005352290333763667,
      "loss": 3.0821,
      "step": 49105
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3811322450637817,
      "learning_rate": 0.000535226494593816,
      "loss": 3.1078,
      "step": 49106
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4288840293884277,
      "learning_rate": 0.0005352239557675321,
      "loss": 3.1371,
      "step": 49107
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5296095609664917,
      "learning_rate": 0.0005352214168975156,
      "loss": 2.9698,
      "step": 49108
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3397830724716187,
      "learning_rate": 0.0005352188779837671,
      "loss": 3.2523,
      "step": 49109
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4879190921783447,
      "learning_rate": 0.000535216339026287,
      "loss": 2.856,
      "step": 49110
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7992788553237915,
      "learning_rate": 0.0005352138000250758,
      "loss": 3.1219,
      "step": 49111
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2245657444000244,
      "learning_rate": 0.0005352112609801337,
      "loss": 3.1661,
      "step": 49112
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6027299165725708,
      "learning_rate": 0.0005352087218914614,
      "loss": 2.8693,
      "step": 49113
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.424727201461792,
      "learning_rate": 0.0005352061827590596,
      "loss": 2.8717,
      "step": 49114
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5216952562332153,
      "learning_rate": 0.0005352036435829284,
      "loss": 2.9529,
      "step": 49115
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7587430477142334,
      "learning_rate": 0.0005352011043630683,
      "loss": 2.8955,
      "step": 49116
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7647732496261597,
      "learning_rate": 0.00053519856509948,
      "loss": 3.0773,
      "step": 49117
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8722271919250488,
      "learning_rate": 0.0005351960257921639,
      "loss": 2.9696,
      "step": 49118
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5194919109344482,
      "learning_rate": 0.0005351934864411203,
      "loss": 3.0194,
      "step": 49119
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5594598054885864,
      "learning_rate": 0.0005351909470463498,
      "loss": 3.1459,
      "step": 49120
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.550147294998169,
      "learning_rate": 0.0005351884076078529,
      "loss": 2.9541,
      "step": 49121
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.590372920036316,
      "learning_rate": 0.00053518586812563,
      "loss": 3.0892,
      "step": 49122
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7219557762145996,
      "learning_rate": 0.0005351833285996817,
      "loss": 2.9983,
      "step": 49123
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.381785273551941,
      "learning_rate": 0.0005351807890300083,
      "loss": 3.0616,
      "step": 49124
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7417453527450562,
      "learning_rate": 0.0005351782494166103,
      "loss": 3.205,
      "step": 49125
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4699053764343262,
      "learning_rate": 0.0005351757097594883,
      "loss": 3.0624,
      "step": 49126
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8322540521621704,
      "learning_rate": 0.0005351731700586426,
      "loss": 2.8912,
      "step": 49127
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6225441694259644,
      "learning_rate": 0.000535170630314074,
      "loss": 3.0854,
      "step": 49128
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3820277452468872,
      "learning_rate": 0.0005351680905257825,
      "loss": 3.0247,
      "step": 49129
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4646708965301514,
      "learning_rate": 0.0005351655506937689,
      "loss": 3.0141,
      "step": 49130
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7137569189071655,
      "learning_rate": 0.0005351630108180336,
      "loss": 3.0356,
      "step": 49131
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.363119125366211,
      "learning_rate": 0.0005351604708985771,
      "loss": 3.0228,
      "step": 49132
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8183181285858154,
      "learning_rate": 0.0005351579309353997,
      "loss": 3.2948,
      "step": 49133
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6614192724227905,
      "learning_rate": 0.0005351553909285021,
      "loss": 3.0961,
      "step": 49134
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4818423986434937,
      "learning_rate": 0.0005351528508778846,
      "loss": 3.2086,
      "step": 49135
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.102128744125366,
      "learning_rate": 0.0005351503107835478,
      "loss": 2.9338,
      "step": 49136
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8918037414550781,
      "learning_rate": 0.0005351477706454922,
      "loss": 2.8518,
      "step": 49137
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.89602792263031,
      "learning_rate": 0.0005351452304637181,
      "loss": 2.9092,
      "step": 49138
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.493577480316162,
      "learning_rate": 0.000535142690238226,
      "loss": 3.0417,
      "step": 49139
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5961135625839233,
      "learning_rate": 0.0005351401499690166,
      "loss": 3.149,
      "step": 49140
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.330714225769043,
      "learning_rate": 0.00053513760965609,
      "loss": 3.0905,
      "step": 49141
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.3475286960601807,
      "learning_rate": 0.0005351350692994471,
      "loss": 3.1116,
      "step": 49142
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.628381371498108,
      "learning_rate": 0.000535132528899088,
      "loss": 2.9469,
      "step": 49143
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.54524827003479,
      "learning_rate": 0.0005351299884550134,
      "loss": 3.016,
      "step": 49144
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.6670196056365967,
      "learning_rate": 0.0005351274479672236,
      "loss": 3.1322,
      "step": 49145
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.043156623840332,
      "learning_rate": 0.0005351249074357192,
      "loss": 3.0529,
      "step": 49146
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9875069856643677,
      "learning_rate": 0.0005351223668605007,
      "loss": 2.8535,
      "step": 49147
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9652119874954224,
      "learning_rate": 0.0005351198262415686,
      "loss": 3.0441,
      "step": 49148
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2794228792190552,
      "learning_rate": 0.000535117285578923,
      "loss": 3.0608,
      "step": 49149
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7024433612823486,
      "learning_rate": 0.0005351147448725649,
      "loss": 3.1788,
      "step": 49150
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6209778785705566,
      "learning_rate": 0.0005351122041224945,
      "loss": 3.0043,
      "step": 49151
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8845272064208984,
      "learning_rate": 0.0005351096633287122,
      "loss": 3.1265,
      "step": 49152
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5970979928970337,
      "learning_rate": 0.0005351071224912187,
      "loss": 2.9391,
      "step": 49153
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6573970317840576,
      "learning_rate": 0.0005351045816100143,
      "loss": 3.209,
      "step": 49154
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9095745086669922,
      "learning_rate": 0.0005351020406850995,
      "loss": 2.9614,
      "step": 49155
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4122004508972168,
      "learning_rate": 0.0005350994997164747,
      "loss": 3.0328,
      "step": 49156
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5839358568191528,
      "learning_rate": 0.0005350969587041406,
      "loss": 3.1359,
      "step": 49157
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6427863836288452,
      "learning_rate": 0.0005350944176480976,
      "loss": 2.8502,
      "step": 49158
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.1114253997802734,
      "learning_rate": 0.000535091876548346,
      "loss": 3.1795,
      "step": 49159
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.3423290252685547,
      "learning_rate": 0.0005350893354048864,
      "loss": 3.0297,
      "step": 49160
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.9457812309265137,
      "learning_rate": 0.0005350867942177193,
      "loss": 3.1438,
      "step": 49161
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.477135419845581,
      "learning_rate": 0.000535084252986845,
      "loss": 3.0622,
      "step": 49162
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.3002047538757324,
      "learning_rate": 0.0005350817117122643,
      "loss": 3.187,
      "step": 49163
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.3004720211029053,
      "learning_rate": 0.0005350791703939772,
      "loss": 2.9572,
      "step": 49164
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.126852035522461,
      "learning_rate": 0.0005350766290319847,
      "loss": 3.3514,
      "step": 49165
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0470199584960938,
      "learning_rate": 0.0005350740876262869,
      "loss": 3.0306,
      "step": 49166
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5447232723236084,
      "learning_rate": 0.0005350715461768843,
      "loss": 3.3281,
      "step": 49167
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.8632352352142334,
      "learning_rate": 0.0005350690046837776,
      "loss": 3.0217,
      "step": 49168
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.3045644760131836,
      "learning_rate": 0.0005350664631469671,
      "loss": 3.0179,
      "step": 49169
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5568546056747437,
      "learning_rate": 0.0005350639215664534,
      "loss": 3.2721,
      "step": 49170
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.563369631767273,
      "learning_rate": 0.0005350613799422368,
      "loss": 3.0962,
      "step": 49171
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5588769912719727,
      "learning_rate": 0.0005350588382743178,
      "loss": 3.1053,
      "step": 49172
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.5035531520843506,
      "learning_rate": 0.0005350562965626969,
      "loss": 2.9768,
      "step": 49173
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.815781593322754,
      "learning_rate": 0.0005350537548073748,
      "loss": 2.7881,
      "step": 49174
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.647951364517212,
      "learning_rate": 0.0005350512130083516,
      "loss": 3.0367,
      "step": 49175
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.436690092086792,
      "learning_rate": 0.000535048671165628,
      "loss": 2.9759,
      "step": 49176
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.605236291885376,
      "learning_rate": 0.0005350461292792044,
      "loss": 3.0846,
      "step": 49177
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.690790057182312,
      "learning_rate": 0.0005350435873490812,
      "loss": 3.2775,
      "step": 49178
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4632210731506348,
      "learning_rate": 0.0005350410453752591,
      "loss": 3.0114,
      "step": 49179
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9532543420791626,
      "learning_rate": 0.0005350385033577384,
      "loss": 3.0955,
      "step": 49180
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.9461309909820557,
      "learning_rate": 0.0005350359612965195,
      "loss": 2.9593,
      "step": 49181
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2914867401123047,
      "learning_rate": 0.0005350334191916031,
      "loss": 3.0106,
      "step": 49182
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.855580449104309,
      "learning_rate": 0.0005350308770429894,
      "loss": 2.9263,
      "step": 49183
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.930042266845703,
      "learning_rate": 0.0005350283348506792,
      "loss": 3.0896,
      "step": 49184
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.879842758178711,
      "learning_rate": 0.0005350257926146727,
      "loss": 2.8494,
      "step": 49185
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9018791913986206,
      "learning_rate": 0.0005350232503349706,
      "loss": 3.0625,
      "step": 49186
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5392251014709473,
      "learning_rate": 0.000535020708011573,
      "loss": 3.065,
      "step": 49187
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.630110263824463,
      "learning_rate": 0.0005350181656444808,
      "loss": 3.0418,
      "step": 49188
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.125222682952881,
      "learning_rate": 0.0005350156232336942,
      "loss": 3.1842,
      "step": 49189
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9230948686599731,
      "learning_rate": 0.0005350130807792138,
      "loss": 3.2323,
      "step": 49190
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0549471378326416,
      "learning_rate": 0.00053501053828104,
      "loss": 2.9221,
      "step": 49191
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.565217971801758,
      "learning_rate": 0.0005350079957391733,
      "loss": 2.9238,
      "step": 49192
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8029392957687378,
      "learning_rate": 0.0005350054531536143,
      "loss": 2.7873,
      "step": 49193
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3062338829040527,
      "learning_rate": 0.0005350029105243633,
      "loss": 3.1718,
      "step": 49194
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5617756843566895,
      "learning_rate": 0.0005350003678514208,
      "loss": 2.8479,
      "step": 49195
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.7704358100891113,
      "learning_rate": 0.0005349978251347872,
      "loss": 2.9504,
      "step": 49196
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9833617210388184,
      "learning_rate": 0.0005349952823744631,
      "loss": 2.9688,
      "step": 49197
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6996158361434937,
      "learning_rate": 0.000534992739570449,
      "loss": 2.8504,
      "step": 49198
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8184690475463867,
      "learning_rate": 0.0005349901967227455,
      "loss": 2.9746,
      "step": 49199
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.150566816329956,
      "learning_rate": 0.0005349876538313525,
      "loss": 2.9558,
      "step": 49200
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9995709657669067,
      "learning_rate": 0.0005349851108962711,
      "loss": 2.7494,
      "step": 49201
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5644456148147583,
      "learning_rate": 0.0005349825679175015,
      "loss": 2.8952,
      "step": 49202
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6306884288787842,
      "learning_rate": 0.0005349800248950441,
      "loss": 2.979,
      "step": 49203
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4944225549697876,
      "learning_rate": 0.0005349774818288997,
      "loss": 3.1805,
      "step": 49204
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.1589748859405518,
      "learning_rate": 0.0005349749387190683,
      "loss": 2.8773,
      "step": 49205
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.478961706161499,
      "learning_rate": 0.0005349723955655508,
      "loss": 3.127,
      "step": 49206
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.8817760944366455,
      "learning_rate": 0.0005349698523683474,
      "loss": 3.0127,
      "step": 49207
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0196173191070557,
      "learning_rate": 0.0005349673091274588,
      "loss": 3.2188,
      "step": 49208
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5924253463745117,
      "learning_rate": 0.0005349647658428852,
      "loss": 3.0181,
      "step": 49209
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0988173484802246,
      "learning_rate": 0.0005349622225146272,
      "loss": 2.79,
      "step": 49210
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6775926351547241,
      "learning_rate": 0.0005349596791426853,
      "loss": 3.2065,
      "step": 49211
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8242847919464111,
      "learning_rate": 0.0005349571357270601,
      "loss": 3.1455,
      "step": 49212
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.676466464996338,
      "learning_rate": 0.0005349545922677518,
      "loss": 3.1053,
      "step": 49213
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6398924589157104,
      "learning_rate": 0.000534952048764761,
      "loss": 3.3458,
      "step": 49214
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.0303590297698975,
      "learning_rate": 0.0005349495052180882,
      "loss": 2.7858,
      "step": 49215
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.131182909011841,
      "learning_rate": 0.0005349469616277339,
      "loss": 3.0703,
      "step": 49216
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5621176958084106,
      "learning_rate": 0.0005349444179936986,
      "loss": 2.9657,
      "step": 49217
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6987687349319458,
      "learning_rate": 0.0005349418743159826,
      "loss": 2.8983,
      "step": 49218
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.05064058303833,
      "learning_rate": 0.0005349393305945864,
      "loss": 3.1597,
      "step": 49219
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.932857871055603,
      "learning_rate": 0.0005349367868295107,
      "loss": 3.0123,
      "step": 49220
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7450804710388184,
      "learning_rate": 0.0005349342430207557,
      "loss": 2.886,
      "step": 49221
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.610713243484497,
      "learning_rate": 0.0005349316991683221,
      "loss": 3.0351,
      "step": 49222
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.1680519580841064,
      "learning_rate": 0.0005349291552722101,
      "loss": 3.0161,
      "step": 49223
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7410485744476318,
      "learning_rate": 0.0005349266113324204,
      "loss": 2.9873,
      "step": 49224
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2918264865875244,
      "learning_rate": 0.0005349240673489534,
      "loss": 2.9735,
      "step": 49225
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7604435682296753,
      "learning_rate": 0.0005349215233218097,
      "loss": 3.0729,
      "step": 49226
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4628976583480835,
      "learning_rate": 0.0005349189792509895,
      "loss": 3.0931,
      "step": 49227
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4449797868728638,
      "learning_rate": 0.0005349164351364935,
      "loss": 3.0375,
      "step": 49228
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.618821620941162,
      "learning_rate": 0.0005349138909783221,
      "loss": 3.1862,
      "step": 49229
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.29888653755188,
      "learning_rate": 0.0005349113467764758,
      "loss": 2.9019,
      "step": 49230
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6263601779937744,
      "learning_rate": 0.0005349088025309551,
      "loss": 3.1316,
      "step": 49231
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.479643702507019,
      "learning_rate": 0.0005349062582417603,
      "loss": 3.1603,
      "step": 49232
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9713789224624634,
      "learning_rate": 0.000534903713908892,
      "loss": 3.2269,
      "step": 49233
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.6063435077667236,
      "learning_rate": 0.0005349011695323507,
      "loss": 3.0025,
      "step": 49234
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.12522029876709,
      "learning_rate": 0.0005348986251121368,
      "loss": 2.9785,
      "step": 49235
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.156158447265625,
      "learning_rate": 0.0005348960806482508,
      "loss": 2.9225,
      "step": 49236
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.0523006916046143,
      "learning_rate": 0.0005348935361406932,
      "loss": 3.295,
      "step": 49237
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6264373064041138,
      "learning_rate": 0.0005348909915894645,
      "loss": 3.2316,
      "step": 49238
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4478856325149536,
      "learning_rate": 0.000534888446994565,
      "loss": 2.7532,
      "step": 49239
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5983214378356934,
      "learning_rate": 0.0005348859023559955,
      "loss": 3.0035,
      "step": 49240
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6386630535125732,
      "learning_rate": 0.0005348833576737562,
      "loss": 3.3843,
      "step": 49241
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.322927713394165,
      "learning_rate": 0.0005348808129478477,
      "loss": 3.0556,
      "step": 49242
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.500086784362793,
      "learning_rate": 0.0005348782681782703,
      "loss": 3.1486,
      "step": 49243
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7993944883346558,
      "learning_rate": 0.0005348757233650246,
      "loss": 3.1028,
      "step": 49244
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.4620046615600586,
      "learning_rate": 0.0005348731785081112,
      "loss": 3.0791,
      "step": 49245
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7139424085617065,
      "learning_rate": 0.0005348706336075304,
      "loss": 2.8546,
      "step": 49246
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4717217683792114,
      "learning_rate": 0.0005348680886632827,
      "loss": 3.3418,
      "step": 49247
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7316279411315918,
      "learning_rate": 0.0005348655436753685,
      "loss": 3.0424,
      "step": 49248
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.148991823196411,
      "learning_rate": 0.0005348629986437885,
      "loss": 3.2465,
      "step": 49249
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.5002095699310303,
      "learning_rate": 0.0005348604535685429,
      "loss": 2.86,
      "step": 49250
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6876651048660278,
      "learning_rate": 0.0005348579084496324,
      "loss": 3.0927,
      "step": 49251
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.253830909729004,
      "learning_rate": 0.0005348553632870574,
      "loss": 2.9798,
      "step": 49252
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8174264430999756,
      "learning_rate": 0.0005348528180808183,
      "loss": 2.9745,
      "step": 49253
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8990962505340576,
      "learning_rate": 0.0005348502728309156,
      "loss": 2.6849,
      "step": 49254
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5490723848342896,
      "learning_rate": 0.00053484772753735,
      "loss": 3.2307,
      "step": 49255
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.3501930236816406,
      "learning_rate": 0.0005348451822001215,
      "loss": 2.8384,
      "step": 49256
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.911297082901001,
      "learning_rate": 0.000534842636819231,
      "loss": 3.1419,
      "step": 49257
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.37977933883667,
      "learning_rate": 0.0005348400913946788,
      "loss": 2.9701,
      "step": 49258
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7603578567504883,
      "learning_rate": 0.0005348375459264654,
      "loss": 2.9029,
      "step": 49259
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.4933056831359863,
      "learning_rate": 0.0005348350004145912,
      "loss": 2.8521,
      "step": 49260
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.5709924697875977,
      "learning_rate": 0.0005348324548590568,
      "loss": 2.8344,
      "step": 49261
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7839956283569336,
      "learning_rate": 0.0005348299092598626,
      "loss": 3.1871,
      "step": 49262
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.567037582397461,
      "learning_rate": 0.0005348273636170092,
      "loss": 3.3718,
      "step": 49263
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.250267267227173,
      "learning_rate": 0.0005348248179304968,
      "loss": 3.2229,
      "step": 49264
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8403040170669556,
      "learning_rate": 0.0005348222722003261,
      "loss": 2.8617,
      "step": 49265
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3061161041259766,
      "learning_rate": 0.0005348197264264975,
      "loss": 3.0274,
      "step": 49266
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.575226068496704,
      "learning_rate": 0.0005348171806090114,
      "loss": 3.1557,
      "step": 49267
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8275716304779053,
      "learning_rate": 0.0005348146347478684,
      "loss": 2.9546,
      "step": 49268
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7507356405258179,
      "learning_rate": 0.000534812088843069,
      "loss": 3.1701,
      "step": 49269
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.499104619026184,
      "learning_rate": 0.0005348095428946136,
      "loss": 2.9534,
      "step": 49270
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9507019519805908,
      "learning_rate": 0.0005348069969025027,
      "loss": 2.9946,
      "step": 49271
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3200300931930542,
      "learning_rate": 0.0005348044508667366,
      "loss": 3.0915,
      "step": 49272
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.434277057647705,
      "learning_rate": 0.0005348019047873159,
      "loss": 3.0443,
      "step": 49273
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7908909320831299,
      "learning_rate": 0.0005347993586642413,
      "loss": 3.2023,
      "step": 49274
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.714770793914795,
      "learning_rate": 0.0005347968124975129,
      "loss": 2.9955,
      "step": 49275
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.7464420795440674,
      "learning_rate": 0.0005347942662871315,
      "loss": 3.1155,
      "step": 49276
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8195915222167969,
      "learning_rate": 0.0005347917200330972,
      "loss": 2.9917,
      "step": 49277
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.57960045337677,
      "learning_rate": 0.0005347891737354109,
      "loss": 2.8691,
      "step": 49278
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9277106523513794,
      "learning_rate": 0.0005347866273940727,
      "loss": 3.1054,
      "step": 49279
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.1231226921081543,
      "learning_rate": 0.0005347840810090833,
      "loss": 3.2675,
      "step": 49280
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5969362258911133,
      "learning_rate": 0.0005347815345804431,
      "loss": 2.9745,
      "step": 49281
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.151954174041748,
      "learning_rate": 0.0005347789881081526,
      "loss": 3.0439,
      "step": 49282
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6640641689300537,
      "learning_rate": 0.0005347764415922122,
      "loss": 3.1429,
      "step": 49283
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6368099451065063,
      "learning_rate": 0.0005347738950326224,
      "loss": 3.0491,
      "step": 49284
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5223212242126465,
      "learning_rate": 0.0005347713484293838,
      "loss": 2.9776,
      "step": 49285
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.478154182434082,
      "learning_rate": 0.0005347688017824968,
      "loss": 3.2154,
      "step": 49286
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7513753175735474,
      "learning_rate": 0.0005347662550919618,
      "loss": 3.0944,
      "step": 49287
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7095797061920166,
      "learning_rate": 0.0005347637083577793,
      "loss": 2.8741,
      "step": 49288
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.618805170059204,
      "learning_rate": 0.0005347611615799497,
      "loss": 2.8273,
      "step": 49289
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.1759114265441895,
      "learning_rate": 0.0005347586147584738,
      "loss": 3.1048,
      "step": 49290
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.085496664047241,
      "learning_rate": 0.0005347560678933517,
      "loss": 2.8423,
      "step": 49291
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.082390308380127,
      "learning_rate": 0.000534753520984584,
      "loss": 2.8737,
      "step": 49292
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2761192321777344,
      "learning_rate": 0.0005347509740321712,
      "loss": 3.0685,
      "step": 49293
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.580148458480835,
      "learning_rate": 0.0005347484270361138,
      "loss": 3.1692,
      "step": 49294
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8971973657608032,
      "learning_rate": 0.0005347458799964122,
      "loss": 3.3663,
      "step": 49295
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4647926092147827,
      "learning_rate": 0.000534743332913067,
      "loss": 3.2501,
      "step": 49296
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0380358695983887,
      "learning_rate": 0.0005347407857860785,
      "loss": 3.2002,
      "step": 49297
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.389101028442383,
      "learning_rate": 0.0005347382386154472,
      "loss": 3.1178,
      "step": 49298
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.00691294670105,
      "learning_rate": 0.0005347356914011737,
      "loss": 2.958,
      "step": 49299
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3711963891983032,
      "learning_rate": 0.0005347331441432584,
      "loss": 3.1765,
      "step": 49300
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.419083833694458,
      "learning_rate": 0.0005347305968417018,
      "loss": 3.2135,
      "step": 49301
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.3371686935424805,
      "learning_rate": 0.0005347280494965044,
      "loss": 3.1917,
      "step": 49302
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8003056049346924,
      "learning_rate": 0.0005347255021076665,
      "loss": 3.321,
      "step": 49303
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7095270156860352,
      "learning_rate": 0.0005347229546751888,
      "loss": 3.2253,
      "step": 49304
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.0266175270080566,
      "learning_rate": 0.0005347204071990716,
      "loss": 2.9537,
      "step": 49305
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.96591317653656,
      "learning_rate": 0.0005347178596793155,
      "loss": 3.0265,
      "step": 49306
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.524355411529541,
      "learning_rate": 0.0005347153121159208,
      "loss": 2.9056,
      "step": 49307
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8411970138549805,
      "learning_rate": 0.0005347127645088882,
      "loss": 3.0283,
      "step": 49308
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.486081123352051,
      "learning_rate": 0.0005347102168582181,
      "loss": 3.167,
      "step": 49309
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.4813952445983887,
      "learning_rate": 0.000534707669163911,
      "loss": 3.0502,
      "step": 49310
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.399823784828186,
      "learning_rate": 0.0005347051214259672,
      "loss": 3.0987,
      "step": 49311
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2852258682250977,
      "learning_rate": 0.0005347025736443872,
      "loss": 2.896,
      "step": 49312
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6647623777389526,
      "learning_rate": 0.0005347000258191717,
      "loss": 2.8517,
      "step": 49313
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2817038297653198,
      "learning_rate": 0.000534697477950321,
      "loss": 3.0916,
      "step": 49314
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.524083137512207,
      "learning_rate": 0.0005346949300378355,
      "loss": 2.9733,
      "step": 49315
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.007505178451538,
      "learning_rate": 0.0005346923820817159,
      "loss": 2.9672,
      "step": 49316
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.426764726638794,
      "learning_rate": 0.0005346898340819625,
      "loss": 3.0843,
      "step": 49317
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5980640649795532,
      "learning_rate": 0.0005346872860385759,
      "loss": 2.8174,
      "step": 49318
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9758422374725342,
      "learning_rate": 0.0005346847379515565,
      "loss": 3.087,
      "step": 49319
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3296518325805664,
      "learning_rate": 0.0005346821898209047,
      "loss": 3.0195,
      "step": 49320
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7506927251815796,
      "learning_rate": 0.0005346796416466211,
      "loss": 3.1876,
      "step": 49321
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.6187760829925537,
      "learning_rate": 0.0005346770934287062,
      "loss": 3.0611,
      "step": 49322
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5243449211120605,
      "learning_rate": 0.0005346745451671603,
      "loss": 3.2683,
      "step": 49323
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.3805110454559326,
      "learning_rate": 0.0005346719968619839,
      "loss": 3.1098,
      "step": 49324
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.434597969055176,
      "learning_rate": 0.0005346694485131776,
      "loss": 3.2433,
      "step": 49325
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.999545931816101,
      "learning_rate": 0.000534666900120742,
      "loss": 3.3973,
      "step": 49326
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7783398628234863,
      "learning_rate": 0.0005346643516846773,
      "loss": 3.0952,
      "step": 49327
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.7406952381134033,
      "learning_rate": 0.0005346618032049839,
      "loss": 3.077,
      "step": 49328
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0790913105010986,
      "learning_rate": 0.0005346592546816625,
      "loss": 2.8068,
      "step": 49329
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5342727899551392,
      "learning_rate": 0.0005346567061147137,
      "loss": 2.9804,
      "step": 49330
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4322906732559204,
      "learning_rate": 0.0005346541575041376,
      "loss": 3.0911,
      "step": 49331
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0614686012268066,
      "learning_rate": 0.000534651608849935,
      "loss": 3.0636,
      "step": 49332
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.401902675628662,
      "learning_rate": 0.0005346490601521062,
      "loss": 2.829,
      "step": 49333
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.514841914176941,
      "learning_rate": 0.0005346465114106515,
      "loss": 3.0228,
      "step": 49334
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.3512611389160156,
      "learning_rate": 0.0005346439626255719,
      "loss": 2.9522,
      "step": 49335
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7861652374267578,
      "learning_rate": 0.0005346414137968674,
      "loss": 3.3278,
      "step": 49336
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6225429773330688,
      "learning_rate": 0.0005346388649245386,
      "loss": 3.1809,
      "step": 49337
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4510047435760498,
      "learning_rate": 0.0005346363160085861,
      "loss": 2.8586,
      "step": 49338
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4493170976638794,
      "learning_rate": 0.0005346337670490102,
      "loss": 2.882,
      "step": 49339
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4244921207427979,
      "learning_rate": 0.0005346312180458115,
      "loss": 3.0742,
      "step": 49340
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9269111156463623,
      "learning_rate": 0.0005346286689989904,
      "loss": 3.2497,
      "step": 49341
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6614868640899658,
      "learning_rate": 0.0005346261199085475,
      "loss": 2.7881,
      "step": 49342
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9868654012680054,
      "learning_rate": 0.000534623570774483,
      "loss": 3.1736,
      "step": 49343
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.43048095703125,
      "learning_rate": 0.0005346210215967977,
      "loss": 3.2026,
      "step": 49344
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.6316111087799072,
      "learning_rate": 0.0005346184723754919,
      "loss": 2.9929,
      "step": 49345
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6007190942764282,
      "learning_rate": 0.000534615923110566,
      "loss": 2.8769,
      "step": 49346
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.989866852760315,
      "learning_rate": 0.0005346133738020207,
      "loss": 2.9887,
      "step": 49347
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7996753454208374,
      "learning_rate": 0.0005346108244498562,
      "loss": 3.0429,
      "step": 49348
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6780755519866943,
      "learning_rate": 0.0005346082750540732,
      "loss": 3.1392,
      "step": 49349
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4459316730499268,
      "learning_rate": 0.000534605725614672,
      "loss": 2.9075,
      "step": 49350
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.250535726547241,
      "learning_rate": 0.0005346031761316533,
      "loss": 2.7926,
      "step": 49351
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9877492189407349,
      "learning_rate": 0.0005346006266050174,
      "loss": 2.8012,
      "step": 49352
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.552362322807312,
      "learning_rate": 0.0005345980770347649,
      "loss": 2.8488,
      "step": 49353
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.3505361080169678,
      "learning_rate": 0.000534595527420896,
      "loss": 3.0911,
      "step": 49354
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8941855430603027,
      "learning_rate": 0.0005345929777634115,
      "loss": 2.8528,
      "step": 49355
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6659330129623413,
      "learning_rate": 0.0005345904280623118,
      "loss": 2.8956,
      "step": 49356
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.3299126625061035,
      "learning_rate": 0.0005345878783175971,
      "loss": 3.1066,
      "step": 49357
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.091603994369507,
      "learning_rate": 0.0005345853285292682,
      "loss": 2.9244,
      "step": 49358
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.955175518989563,
      "learning_rate": 0.0005345827786973255,
      "loss": 2.9929,
      "step": 49359
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9298149347305298,
      "learning_rate": 0.0005345802288217694,
      "loss": 3.0875,
      "step": 49360
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.912458896636963,
      "learning_rate": 0.0005345776789026004,
      "loss": 2.9603,
      "step": 49361
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.5993170738220215,
      "learning_rate": 0.000534575128939819,
      "loss": 3.0252,
      "step": 49362
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7558636665344238,
      "learning_rate": 0.0005345725789334256,
      "loss": 2.9876,
      "step": 49363
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.381227493286133,
      "learning_rate": 0.0005345700288834208,
      "loss": 2.9704,
      "step": 49364
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4013128280639648,
      "learning_rate": 0.0005345674787898049,
      "loss": 3.1729,
      "step": 49365
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.431188941001892,
      "learning_rate": 0.0005345649286525786,
      "loss": 2.7409,
      "step": 49366
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.653187870979309,
      "learning_rate": 0.0005345623784717422,
      "loss": 3.2316,
      "step": 49367
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5903270244598389,
      "learning_rate": 0.0005345598282472963,
      "loss": 3.1194,
      "step": 49368
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.677375316619873,
      "learning_rate": 0.0005345572779792411,
      "loss": 3.1065,
      "step": 49369
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6059515476226807,
      "learning_rate": 0.0005345547276675775,
      "loss": 3.1628,
      "step": 49370
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.870451807975769,
      "learning_rate": 0.0005345521773123057,
      "loss": 3.1166,
      "step": 49371
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.469819188117981,
      "learning_rate": 0.0005345496269134261,
      "loss": 2.9121,
      "step": 49372
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3488941192626953,
      "learning_rate": 0.0005345470764709393,
      "loss": 2.9905,
      "step": 49373
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5066578388214111,
      "learning_rate": 0.0005345445259848459,
      "loss": 3.1608,
      "step": 49374
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3840845823287964,
      "learning_rate": 0.0005345419754551462,
      "loss": 3.1123,
      "step": 49375
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8215125799179077,
      "learning_rate": 0.0005345394248818406,
      "loss": 3.0276,
      "step": 49376
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7373720407485962,
      "learning_rate": 0.0005345368742649298,
      "loss": 2.98,
      "step": 49377
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.890594244003296,
      "learning_rate": 0.0005345343236044142,
      "loss": 2.9803,
      "step": 49378
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.9823696613311768,
      "learning_rate": 0.0005345317729002943,
      "loss": 2.7498,
      "step": 49379
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4437170028686523,
      "learning_rate": 0.0005345292221525704,
      "loss": 3.2438,
      "step": 49380
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5049306154251099,
      "learning_rate": 0.0005345266713612432,
      "loss": 3.0636,
      "step": 49381
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.594581365585327,
      "learning_rate": 0.000534524120526313,
      "loss": 3.0599,
      "step": 49382
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8859467506408691,
      "learning_rate": 0.0005345215696477803,
      "loss": 3.0957,
      "step": 49383
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3875542879104614,
      "learning_rate": 0.0005345190187256455,
      "loss": 2.8503,
      "step": 49384
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.3125154972076416,
      "learning_rate": 0.0005345164677599093,
      "loss": 3.2254,
      "step": 49385
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.5153751373291016,
      "learning_rate": 0.0005345139167505722,
      "loss": 2.8712,
      "step": 49386
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5218322277069092,
      "learning_rate": 0.0005345113656976344,
      "loss": 3.1916,
      "step": 49387
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6214061975479126,
      "learning_rate": 0.0005345088146010965,
      "loss": 2.9677,
      "step": 49388
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7876274585723877,
      "learning_rate": 0.000534506263460959,
      "loss": 2.8982,
      "step": 49389
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5787783861160278,
      "learning_rate": 0.0005345037122772224,
      "loss": 2.9203,
      "step": 49390
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4113057851791382,
      "learning_rate": 0.000534501161049887,
      "loss": 3.0137,
      "step": 49391
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.755682349205017,
      "learning_rate": 0.0005344986097789536,
      "loss": 2.975,
      "step": 49392
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9828823804855347,
      "learning_rate": 0.0005344960584644224,
      "loss": 2.9308,
      "step": 49393
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6654994487762451,
      "learning_rate": 0.000534493507106294,
      "loss": 3.0307,
      "step": 49394
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4702422618865967,
      "learning_rate": 0.0005344909557045687,
      "loss": 3.1669,
      "step": 49395
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5502605438232422,
      "learning_rate": 0.0005344884042592471,
      "loss": 3.0041,
      "step": 49396
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5287067890167236,
      "learning_rate": 0.0005344858527703298,
      "loss": 3.0165,
      "step": 49397
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4539079666137695,
      "learning_rate": 0.0005344833012378172,
      "loss": 3.2029,
      "step": 49398
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6784098148345947,
      "learning_rate": 0.0005344807496617096,
      "loss": 3.0393,
      "step": 49399
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7531001567840576,
      "learning_rate": 0.0005344781980420077,
      "loss": 3.0552,
      "step": 49400
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3962812423706055,
      "learning_rate": 0.0005344756463787118,
      "loss": 3.2176,
      "step": 49401
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8683991432189941,
      "learning_rate": 0.0005344730946718224,
      "loss": 3.0528,
      "step": 49402
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5820029973983765,
      "learning_rate": 0.0005344705429213402,
      "loss": 2.9705,
      "step": 49403
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6568540334701538,
      "learning_rate": 0.0005344679911272654,
      "loss": 3.0747,
      "step": 49404
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8062971830368042,
      "learning_rate": 0.0005344654392895985,
      "loss": 2.6805,
      "step": 49405
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.607418179512024,
      "learning_rate": 0.0005344628874083402,
      "loss": 2.9926,
      "step": 49406
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7801024913787842,
      "learning_rate": 0.0005344603354834908,
      "loss": 3.0155,
      "step": 49407
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.1196727752685547,
      "learning_rate": 0.0005344577835150507,
      "loss": 2.9058,
      "step": 49408
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9687072038650513,
      "learning_rate": 0.0005344552315030206,
      "loss": 2.8718,
      "step": 49409
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5562841892242432,
      "learning_rate": 0.0005344526794474006,
      "loss": 3.0616,
      "step": 49410
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3827272653579712,
      "learning_rate": 0.0005344501273481917,
      "loss": 3.0007,
      "step": 49411
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6540098190307617,
      "learning_rate": 0.000534447575205394,
      "loss": 3.1061,
      "step": 49412
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9295157194137573,
      "learning_rate": 0.0005344450230190081,
      "loss": 3.1322,
      "step": 49413
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7094645500183105,
      "learning_rate": 0.0005344424707890343,
      "loss": 2.8768,
      "step": 49414
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.560413122177124,
      "learning_rate": 0.0005344399185154734,
      "loss": 2.945,
      "step": 49415
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.712418556213379,
      "learning_rate": 0.0005344373661983255,
      "loss": 2.9234,
      "step": 49416
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.431195855140686,
      "learning_rate": 0.0005344348138375914,
      "loss": 3.0388,
      "step": 49417
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0774803161621094,
      "learning_rate": 0.0005344322614332715,
      "loss": 2.8167,
      "step": 49418
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.8480327129364014,
      "learning_rate": 0.0005344297089853661,
      "loss": 3.0147,
      "step": 49419
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.702488660812378,
      "learning_rate": 0.0005344271564938758,
      "loss": 3.0259,
      "step": 49420
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.3006443977355957,
      "learning_rate": 0.0005344246039588011,
      "loss": 3.1693,
      "step": 49421
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.404921531677246,
      "learning_rate": 0.0005344220513801424,
      "loss": 2.8611,
      "step": 49422
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.383892297744751,
      "learning_rate": 0.0005344194987579003,
      "loss": 3.0762,
      "step": 49423
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9007880687713623,
      "learning_rate": 0.0005344169460920751,
      "loss": 2.9954,
      "step": 49424
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.021909713745117,
      "learning_rate": 0.0005344143933826675,
      "loss": 3.0988,
      "step": 49425
    },
    {
      "epoch": 0.64,
      "grad_norm": 3.963263988494873,
      "learning_rate": 0.0005344118406296777,
      "loss": 3.07,
      "step": 49426
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.163759708404541,
      "learning_rate": 0.0005344092878331064,
      "loss": 3.2298,
      "step": 49427
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5681872367858887,
      "learning_rate": 0.0005344067349929539,
      "loss": 3.291,
      "step": 49428
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2864725589752197,
      "learning_rate": 0.0005344041821092209,
      "loss": 3.0552,
      "step": 49429
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.648704767227173,
      "learning_rate": 0.0005344016291819076,
      "loss": 3.3057,
      "step": 49430
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5307425260543823,
      "learning_rate": 0.0005343990762110147,
      "loss": 3.102,
      "step": 49431
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.258399486541748,
      "learning_rate": 0.0005343965231965425,
      "loss": 3.0503,
      "step": 49432
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8828775882720947,
      "learning_rate": 0.0005343939701384917,
      "loss": 2.8858,
      "step": 49433
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.055809736251831,
      "learning_rate": 0.0005343914170368625,
      "loss": 3.2304,
      "step": 49434
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5463216304779053,
      "learning_rate": 0.0005343888638916556,
      "loss": 2.9908,
      "step": 49435
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3334344625473022,
      "learning_rate": 0.0005343863107028713,
      "loss": 3.0311,
      "step": 49436
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.613924741744995,
      "learning_rate": 0.0005343837574705102,
      "loss": 3.1568,
      "step": 49437
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.526026964187622,
      "learning_rate": 0.0005343812041945728,
      "loss": 3.211,
      "step": 49438
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4330689907073975,
      "learning_rate": 0.0005343786508750594,
      "loss": 2.7901,
      "step": 49439
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.765077829360962,
      "learning_rate": 0.0005343760975119706,
      "loss": 3.278,
      "step": 49440
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.385524034500122,
      "learning_rate": 0.000534373544105307,
      "loss": 3.3569,
      "step": 49441
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5634222030639648,
      "learning_rate": 0.0005343709906550689,
      "loss": 3.1958,
      "step": 49442
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.828577995300293,
      "learning_rate": 0.0005343684371612566,
      "loss": 3.121,
      "step": 49443
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.878749966621399,
      "learning_rate": 0.000534365883623871,
      "loss": 3.0012,
      "step": 49444
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.6250979900360107,
      "learning_rate": 0.0005343633300429122,
      "loss": 2.7387,
      "step": 49445
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.1301918029785156,
      "learning_rate": 0.000534360776418381,
      "loss": 3.128,
      "step": 49446
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2593092918395996,
      "learning_rate": 0.0005343582227502776,
      "loss": 3.0074,
      "step": 49447
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9793784618377686,
      "learning_rate": 0.0005343556690386027,
      "loss": 3.1594,
      "step": 49448
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.3820126056671143,
      "learning_rate": 0.0005343531152833564,
      "loss": 3.0159,
      "step": 49449
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8770793676376343,
      "learning_rate": 0.0005343505614845397,
      "loss": 3.3145,
      "step": 49450
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5739834308624268,
      "learning_rate": 0.0005343480076421527,
      "loss": 3.0685,
      "step": 49451
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7770116329193115,
      "learning_rate": 0.0005343454537561959,
      "loss": 3.0934,
      "step": 49452
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.302091598510742,
      "learning_rate": 0.00053434289982667,
      "loss": 2.8432,
      "step": 49453
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6486523151397705,
      "learning_rate": 0.0005343403458535753,
      "loss": 3.2955,
      "step": 49454
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6562391519546509,
      "learning_rate": 0.0005343377918369122,
      "loss": 3.2627,
      "step": 49455
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6732319593429565,
      "learning_rate": 0.0005343352377766814,
      "loss": 2.9797,
      "step": 49456
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7059310674667358,
      "learning_rate": 0.0005343326836728832,
      "loss": 3.0216,
      "step": 49457
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5369621515274048,
      "learning_rate": 0.0005343301295255181,
      "loss": 3.0419,
      "step": 49458
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5320833921432495,
      "learning_rate": 0.0005343275753345866,
      "loss": 2.8918,
      "step": 49459
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5549824237823486,
      "learning_rate": 0.0005343250211000893,
      "loss": 3.1137,
      "step": 49460
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8657755851745605,
      "learning_rate": 0.0005343224668220265,
      "loss": 3.3251,
      "step": 49461
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7467995882034302,
      "learning_rate": 0.0005343199125003987,
      "loss": 2.985,
      "step": 49462
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3137644529342651,
      "learning_rate": 0.0005343173581352064,
      "loss": 3.3473,
      "step": 49463
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6751084327697754,
      "learning_rate": 0.0005343148037264501,
      "loss": 2.7527,
      "step": 49464
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.447087049484253,
      "learning_rate": 0.0005343122492741303,
      "loss": 3.1017,
      "step": 49465
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4889273643493652,
      "learning_rate": 0.0005343096947782474,
      "loss": 3.2252,
      "step": 49466
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0369367599487305,
      "learning_rate": 0.0005343071402388018,
      "loss": 2.9732,
      "step": 49467
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5605987310409546,
      "learning_rate": 0.0005343045856557942,
      "loss": 3.0388,
      "step": 49468
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4954721927642822,
      "learning_rate": 0.000534302031029225,
      "loss": 2.9309,
      "step": 49469
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.428377628326416,
      "learning_rate": 0.0005342994763590945,
      "loss": 3.1172,
      "step": 49470
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0953798294067383,
      "learning_rate": 0.0005342969216454034,
      "loss": 3.0428,
      "step": 49471
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6627709865570068,
      "learning_rate": 0.0005342943668881521,
      "loss": 2.9569,
      "step": 49472
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.043668746948242,
      "learning_rate": 0.000534291812087341,
      "loss": 3.1726,
      "step": 49473
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7150154113769531,
      "learning_rate": 0.0005342892572429706,
      "loss": 2.9182,
      "step": 49474
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5533396005630493,
      "learning_rate": 0.0005342867023550413,
      "loss": 3.21,
      "step": 49475
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8863564729690552,
      "learning_rate": 0.0005342841474235539,
      "loss": 3.0091,
      "step": 49476
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.722272276878357,
      "learning_rate": 0.0005342815924485085,
      "loss": 3.0693,
      "step": 49477
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3441262245178223,
      "learning_rate": 0.0005342790374299059,
      "loss": 3.3466,
      "step": 49478
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5315760374069214,
      "learning_rate": 0.0005342764823677462,
      "loss": 3.1505,
      "step": 49479
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.483699917793274,
      "learning_rate": 0.0005342739272620302,
      "loss": 3.3243,
      "step": 49480
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.1151556968688965,
      "learning_rate": 0.0005342713721127583,
      "loss": 3.164,
      "step": 49481
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4521299600601196,
      "learning_rate": 0.0005342688169199309,
      "loss": 2.9421,
      "step": 49482
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5602222681045532,
      "learning_rate": 0.0005342662616835483,
      "loss": 2.8142,
      "step": 49483
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.401347041130066,
      "learning_rate": 0.0005342637064036114,
      "loss": 3.0312,
      "step": 49484
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.0506081581115723,
      "learning_rate": 0.0005342611510801204,
      "loss": 3.2385,
      "step": 49485
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5102736949920654,
      "learning_rate": 0.0005342585957130759,
      "loss": 2.9185,
      "step": 49486
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5197330713272095,
      "learning_rate": 0.0005342560403024781,
      "loss": 2.8914,
      "step": 49487
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6030476093292236,
      "learning_rate": 0.000534253484848328,
      "loss": 2.725,
      "step": 49488
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5016615390777588,
      "learning_rate": 0.0005342509293506254,
      "loss": 3.308,
      "step": 49489
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4177427291870117,
      "learning_rate": 0.0005342483738093713,
      "loss": 2.8952,
      "step": 49490
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.949058175086975,
      "learning_rate": 0.0005342458182245662,
      "loss": 3.2259,
      "step": 49491
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6961013078689575,
      "learning_rate": 0.0005342432625962101,
      "loss": 3.3063,
      "step": 49492
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4760857820510864,
      "learning_rate": 0.0005342407069243039,
      "loss": 3.0543,
      "step": 49493
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.753920555114746,
      "learning_rate": 0.0005342381512088479,
      "loss": 3.3526,
      "step": 49494
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6723721027374268,
      "learning_rate": 0.0005342355954498425,
      "loss": 2.8797,
      "step": 49495
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4636261463165283,
      "learning_rate": 0.0005342330396472883,
      "loss": 3.1703,
      "step": 49496
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8109773397445679,
      "learning_rate": 0.0005342304838011859,
      "loss": 2.9757,
      "step": 49497
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6301558017730713,
      "learning_rate": 0.0005342279279115356,
      "loss": 3.0169,
      "step": 49498
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3342869281768799,
      "learning_rate": 0.0005342253719783377,
      "loss": 3.4297,
      "step": 49499
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4163724184036255,
      "learning_rate": 0.0005342228160015931,
      "loss": 3.1809,
      "step": 49500
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4602450132369995,
      "learning_rate": 0.000534220259981302,
      "loss": 3.1915,
      "step": 49501
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5031222105026245,
      "learning_rate": 0.0005342177039174649,
      "loss": 3.047,
      "step": 49502
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.812679648399353,
      "learning_rate": 0.0005342151478100823,
      "loss": 3.1118,
      "step": 49503
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.7807390689849854,
      "learning_rate": 0.0005342125916591548,
      "loss": 3.0885,
      "step": 49504
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5312260389328003,
      "learning_rate": 0.0005342100354646826,
      "loss": 3.1889,
      "step": 49505
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.556457757949829,
      "learning_rate": 0.0005342074792266665,
      "loss": 2.9684,
      "step": 49506
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2771143913269043,
      "learning_rate": 0.0005342049229451067,
      "loss": 2.8786,
      "step": 49507
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4356958866119385,
      "learning_rate": 0.0005342023666200039,
      "loss": 3.1278,
      "step": 49508
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9976292848587036,
      "learning_rate": 0.0005341998102513583,
      "loss": 3.0742,
      "step": 49509
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.598412275314331,
      "learning_rate": 0.0005341972538391705,
      "loss": 3.1787,
      "step": 49510
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3937147855758667,
      "learning_rate": 0.0005341946973834411,
      "loss": 2.8786,
      "step": 49511
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.7239127159118652,
      "learning_rate": 0.0005341921408841705,
      "loss": 2.9863,
      "step": 49512
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5449097156524658,
      "learning_rate": 0.0005341895843413593,
      "loss": 3.2587,
      "step": 49513
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.2743608951568604,
      "learning_rate": 0.0005341870277550076,
      "loss": 2.9462,
      "step": 49514
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4081294536590576,
      "learning_rate": 0.0005341844711251162,
      "loss": 3.2064,
      "step": 49515
    },
    {
      "epoch": 0.64,
      "grad_norm": 2.219740867614746,
      "learning_rate": 0.0005341819144516855,
      "loss": 3.2757,
      "step": 49516
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.706705093383789,
      "learning_rate": 0.000534179357734716,
      "loss": 2.986,
      "step": 49517
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2795239686965942,
      "learning_rate": 0.000534176800974208,
      "loss": 3.155,
      "step": 49518
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.2897828817367554,
      "learning_rate": 0.0005341742441701622,
      "loss": 3.2001,
      "step": 49519
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.9095522165298462,
      "learning_rate": 0.0005341716873225789,
      "loss": 3.1847,
      "step": 49520
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5350782871246338,
      "learning_rate": 0.0005341691304314587,
      "loss": 2.9954,
      "step": 49521
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5472644567489624,
      "learning_rate": 0.0005341665734968022,
      "loss": 2.9053,
      "step": 49522
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4760708808898926,
      "learning_rate": 0.0005341640165186096,
      "loss": 3.1044,
      "step": 49523
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5572079420089722,
      "learning_rate": 0.0005341614594968815,
      "loss": 3.1178,
      "step": 49524
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.740862250328064,
      "learning_rate": 0.0005341589024316183,
      "loss": 3.1732,
      "step": 49525
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4926449060440063,
      "learning_rate": 0.0005341563453228206,
      "loss": 2.8002,
      "step": 49526
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4307717084884644,
      "learning_rate": 0.0005341537881704888,
      "loss": 3.1474,
      "step": 49527
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.335127830505371,
      "learning_rate": 0.0005341512309746234,
      "loss": 3.2344,
      "step": 49528
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.8046839237213135,
      "learning_rate": 0.0005341486737352249,
      "loss": 3.0872,
      "step": 49529
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5206173658370972,
      "learning_rate": 0.0005341461164522938,
      "loss": 3.0912,
      "step": 49530
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6532073020935059,
      "learning_rate": 0.0005341435591258303,
      "loss": 3.0259,
      "step": 49531
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.5647637844085693,
      "learning_rate": 0.0005341410017558352,
      "loss": 3.154,
      "step": 49532
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.4882769584655762,
      "learning_rate": 0.0005341384443423089,
      "loss": 3.174,
      "step": 49533
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.612594723701477,
      "learning_rate": 0.0005341358868852519,
      "loss": 2.8645,
      "step": 49534
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.3605753183364868,
      "learning_rate": 0.0005341333293846644,
      "loss": 3.1604,
      "step": 49535
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.396964192390442,
      "learning_rate": 0.0005341307718405473,
      "loss": 3.1926,
      "step": 49536
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8933182954788208,
      "learning_rate": 0.0005341282142529008,
      "loss": 2.7261,
      "step": 49537
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3549492359161377,
      "learning_rate": 0.0005341256566217255,
      "loss": 3.107,
      "step": 49538
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.564447283744812,
      "learning_rate": 0.0005341230989470219,
      "loss": 3.047,
      "step": 49539
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7657365798950195,
      "learning_rate": 0.0005341205412287901,
      "loss": 2.9549,
      "step": 49540
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6433088779449463,
      "learning_rate": 0.0005341179834670311,
      "loss": 2.8709,
      "step": 49541
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.561431646347046,
      "learning_rate": 0.000534115425661745,
      "loss": 3.0478,
      "step": 49542
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.759291410446167,
      "learning_rate": 0.0005341128678129326,
      "loss": 3.1622,
      "step": 49543
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.2207305431365967,
      "learning_rate": 0.000534110309920594,
      "loss": 2.9541,
      "step": 49544
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9926481246948242,
      "learning_rate": 0.00053410775198473,
      "loss": 3.2866,
      "step": 49545
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.275289535522461,
      "learning_rate": 0.0005341051940053411,
      "loss": 3.1799,
      "step": 49546
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8308672904968262,
      "learning_rate": 0.0005341026359824274,
      "loss": 3.1306,
      "step": 49547
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.086538314819336,
      "learning_rate": 0.0005341000779159897,
      "loss": 3.1231,
      "step": 49548
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3007644414901733,
      "learning_rate": 0.0005340975198060284,
      "loss": 3.1309,
      "step": 49549
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.52205753326416,
      "learning_rate": 0.0005340949616525438,
      "loss": 2.9818,
      "step": 49550
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.03464412689209,
      "learning_rate": 0.0005340924034555366,
      "loss": 3.0546,
      "step": 49551
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3259899616241455,
      "learning_rate": 0.0005340898452150072,
      "loss": 3.4079,
      "step": 49552
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6345096826553345,
      "learning_rate": 0.0005340872869309561,
      "loss": 3.2276,
      "step": 49553
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4875802993774414,
      "learning_rate": 0.0005340847286033838,
      "loss": 3.0744,
      "step": 49554
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6260401010513306,
      "learning_rate": 0.0005340821702322907,
      "loss": 3.0591,
      "step": 49555
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5479042530059814,
      "learning_rate": 0.0005340796118176773,
      "loss": 3.1419,
      "step": 49556
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.351138710975647,
      "learning_rate": 0.0005340770533595441,
      "loss": 3.0886,
      "step": 49557
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.849224328994751,
      "learning_rate": 0.0005340744948578915,
      "loss": 3.2148,
      "step": 49558
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5088413953781128,
      "learning_rate": 0.0005340719363127201,
      "loss": 3.0491,
      "step": 49559
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6055617332458496,
      "learning_rate": 0.0005340693777240303,
      "loss": 3.265,
      "step": 49560
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2913120985031128,
      "learning_rate": 0.0005340668190918225,
      "loss": 2.8193,
      "step": 49561
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5876435041427612,
      "learning_rate": 0.0005340642604160972,
      "loss": 2.9986,
      "step": 49562
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7565301656723022,
      "learning_rate": 0.0005340617016968552,
      "loss": 2.9649,
      "step": 49563
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4797388315200806,
      "learning_rate": 0.0005340591429340966,
      "loss": 2.9579,
      "step": 49564
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6703147888183594,
      "learning_rate": 0.0005340565841278218,
      "loss": 2.8111,
      "step": 49565
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.312692165374756,
      "learning_rate": 0.0005340540252780317,
      "loss": 3.1146,
      "step": 49566
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3022540807724,
      "learning_rate": 0.0005340514663847264,
      "loss": 2.8942,
      "step": 49567
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6925889253616333,
      "learning_rate": 0.0005340489074479066,
      "loss": 2.9693,
      "step": 49568
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.365983724594116,
      "learning_rate": 0.0005340463484675728,
      "loss": 2.6661,
      "step": 49569
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3703997135162354,
      "learning_rate": 0.0005340437894437251,
      "loss": 3.2299,
      "step": 49570
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.2929883003234863,
      "learning_rate": 0.0005340412303763644,
      "loss": 3.1871,
      "step": 49571
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6329572200775146,
      "learning_rate": 0.0005340386712654909,
      "loss": 3.0233,
      "step": 49572
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5414873361587524,
      "learning_rate": 0.0005340361121111053,
      "loss": 3.0535,
      "step": 49573
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9921448230743408,
      "learning_rate": 0.000534033552913208,
      "loss": 2.9666,
      "step": 49574
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7144086360931396,
      "learning_rate": 0.0005340309936717994,
      "loss": 2.9359,
      "step": 49575
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7031394243240356,
      "learning_rate": 0.00053402843438688,
      "loss": 2.8876,
      "step": 49576
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.640465497970581,
      "learning_rate": 0.0005340258750584503,
      "loss": 2.9232,
      "step": 49577
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.0741770267486572,
      "learning_rate": 0.0005340233156865108,
      "loss": 3.0787,
      "step": 49578
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5256565809249878,
      "learning_rate": 0.0005340207562710619,
      "loss": 3.1575,
      "step": 49579
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8460594415664673,
      "learning_rate": 0.0005340181968121042,
      "loss": 2.749,
      "step": 49580
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.122957944869995,
      "learning_rate": 0.000534015637309638,
      "loss": 3.0247,
      "step": 49581
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8577779531478882,
      "learning_rate": 0.0005340130777636638,
      "loss": 2.9811,
      "step": 49582
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.4957847595214844,
      "learning_rate": 0.0005340105181741825,
      "loss": 2.8872,
      "step": 49583
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8147450685501099,
      "learning_rate": 0.0005340079585411939,
      "loss": 3.0519,
      "step": 49584
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3393625020980835,
      "learning_rate": 0.0005340053988646989,
      "loss": 3.1362,
      "step": 49585
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.054764986038208,
      "learning_rate": 0.000534002839144698,
      "loss": 2.8242,
      "step": 49586
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3298671245574951,
      "learning_rate": 0.0005340002793811913,
      "loss": 3.3814,
      "step": 49587
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.726891279220581,
      "learning_rate": 0.0005339977195741797,
      "loss": 3.2112,
      "step": 49588
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.1337592601776123,
      "learning_rate": 0.0005339951597236634,
      "loss": 3.0285,
      "step": 49589
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9132479429244995,
      "learning_rate": 0.0005339925998296432,
      "loss": 3.1155,
      "step": 49590
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9257864952087402,
      "learning_rate": 0.0005339900398921192,
      "loss": 2.9981,
      "step": 49591
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.0217902660369873,
      "learning_rate": 0.000533987479911092,
      "loss": 3.1613,
      "step": 49592
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9077341556549072,
      "learning_rate": 0.0005339849198865621,
      "loss": 2.9174,
      "step": 49593
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7525856494903564,
      "learning_rate": 0.0005339823598185301,
      "loss": 2.861,
      "step": 49594
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.018833875656128,
      "learning_rate": 0.0005339797997069963,
      "loss": 3.0087,
      "step": 49595
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6784077882766724,
      "learning_rate": 0.0005339772395519613,
      "loss": 3.0159,
      "step": 49596
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6906013488769531,
      "learning_rate": 0.0005339746793534255,
      "loss": 3.0279,
      "step": 49597
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.1722068786621094,
      "learning_rate": 0.0005339721191113894,
      "loss": 3.3096,
      "step": 49598
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8313945531845093,
      "learning_rate": 0.0005339695588258535,
      "loss": 3.1422,
      "step": 49599
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6065175533294678,
      "learning_rate": 0.0005339669984968181,
      "loss": 3.2174,
      "step": 49600
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3631705045700073,
      "learning_rate": 0.0005339644381242839,
      "loss": 2.9699,
      "step": 49601
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8773566484451294,
      "learning_rate": 0.0005339618777082511,
      "loss": 3.0803,
      "step": 49602
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6280272006988525,
      "learning_rate": 0.0005339593172487206,
      "loss": 3.2026,
      "step": 49603
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.572695255279541,
      "learning_rate": 0.0005339567567456926,
      "loss": 2.858,
      "step": 49604
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9898251295089722,
      "learning_rate": 0.0005339541961991677,
      "loss": 3.0436,
      "step": 49605
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9440873861312866,
      "learning_rate": 0.0005339516356091462,
      "loss": 3.2699,
      "step": 49606
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.297473430633545,
      "learning_rate": 0.0005339490749756287,
      "loss": 3.0252,
      "step": 49607
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.604438066482544,
      "learning_rate": 0.0005339465142986156,
      "loss": 3.0429,
      "step": 49608
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.606750249862671,
      "learning_rate": 0.0005339439535781076,
      "loss": 3.0363,
      "step": 49609
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.0144221782684326,
      "learning_rate": 0.0005339413928141049,
      "loss": 3.2387,
      "step": 49610
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.713887333869934,
      "learning_rate": 0.000533938832006608,
      "loss": 2.9685,
      "step": 49611
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6985691785812378,
      "learning_rate": 0.0005339362711556175,
      "loss": 2.9585,
      "step": 49612
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4651955366134644,
      "learning_rate": 0.0005339337102611339,
      "loss": 3.1906,
      "step": 49613
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.2569849491119385,
      "learning_rate": 0.0005339311493231574,
      "loss": 2.8272,
      "step": 49614
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7476129531860352,
      "learning_rate": 0.0005339285883416889,
      "loss": 2.8636,
      "step": 49615
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5769462585449219,
      "learning_rate": 0.0005339260273167286,
      "loss": 3.048,
      "step": 49616
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.0886342525482178,
      "learning_rate": 0.000533923466248277,
      "loss": 3.0531,
      "step": 49617
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6390974521636963,
      "learning_rate": 0.0005339209051363347,
      "loss": 3.1723,
      "step": 49618
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9013564586639404,
      "learning_rate": 0.000533918343980902,
      "loss": 2.9156,
      "step": 49619
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6576391458511353,
      "learning_rate": 0.0005339157827819795,
      "loss": 3.0927,
      "step": 49620
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.303529977798462,
      "learning_rate": 0.0005339132215395676,
      "loss": 2.937,
      "step": 49621
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.074162244796753,
      "learning_rate": 0.0005339106602536669,
      "loss": 3.0577,
      "step": 49622
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.551514983177185,
      "learning_rate": 0.0005339080989242778,
      "loss": 3.1489,
      "step": 49623
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.60996413230896,
      "learning_rate": 0.0005339055375514006,
      "loss": 3.1227,
      "step": 49624
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.5684516429901123,
      "learning_rate": 0.000533902976135036,
      "loss": 2.969,
      "step": 49625
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2596864700317383,
      "learning_rate": 0.0005339004146751846,
      "loss": 2.968,
      "step": 49626
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4043803215026855,
      "learning_rate": 0.0005338978531718465,
      "loss": 3.0151,
      "step": 49627
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3840537071228027,
      "learning_rate": 0.0005338952916250226,
      "loss": 3.1263,
      "step": 49628
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5896613597869873,
      "learning_rate": 0.0005338927300347129,
      "loss": 3.0096,
      "step": 49629
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5219863653182983,
      "learning_rate": 0.0005338901684009183,
      "loss": 3.1424,
      "step": 49630
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.071141242980957,
      "learning_rate": 0.000533887606723639,
      "loss": 2.9741,
      "step": 49631
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6424134969711304,
      "learning_rate": 0.0005338850450028756,
      "loss": 2.9464,
      "step": 49632
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.7144827842712402,
      "learning_rate": 0.0005338824832386285,
      "loss": 3.2287,
      "step": 49633
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.856967568397522,
      "learning_rate": 0.0005338799214308983,
      "loss": 3.0823,
      "step": 49634
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.363896131515503,
      "learning_rate": 0.0005338773595796853,
      "loss": 3.0951,
      "step": 49635
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9483150243759155,
      "learning_rate": 0.0005338747976849904,
      "loss": 3.1068,
      "step": 49636
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4663702249526978,
      "learning_rate": 0.0005338722357468135,
      "loss": 3.0873,
      "step": 49637
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6010721921920776,
      "learning_rate": 0.0005338696737651554,
      "loss": 3.1607,
      "step": 49638
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8797920942306519,
      "learning_rate": 0.0005338671117400165,
      "loss": 2.8287,
      "step": 49639
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.183901309967041,
      "learning_rate": 0.0005338645496713973,
      "loss": 3.0033,
      "step": 49640
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.1854021549224854,
      "learning_rate": 0.0005338619875592982,
      "loss": 3.0553,
      "step": 49641
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.963095784187317,
      "learning_rate": 0.0005338594254037198,
      "loss": 3.008,
      "step": 49642
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9347078800201416,
      "learning_rate": 0.0005338568632046625,
      "loss": 3.0803,
      "step": 49643
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.962967038154602,
      "learning_rate": 0.0005338543009621268,
      "loss": 3.2087,
      "step": 49644
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.270876407623291,
      "learning_rate": 0.0005338517386761132,
      "loss": 3.1766,
      "step": 49645
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.6827285289764404,
      "learning_rate": 0.0005338491763466221,
      "loss": 2.8437,
      "step": 49646
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7605234384536743,
      "learning_rate": 0.0005338466139736541,
      "loss": 2.9252,
      "step": 49647
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.47497296333313,
      "learning_rate": 0.0005338440515572095,
      "loss": 3.0321,
      "step": 49648
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.39786958694458,
      "learning_rate": 0.0005338414890972889,
      "loss": 3.1452,
      "step": 49649
    },
    {
      "epoch": 0.65,
      "grad_norm": 4.139895439147949,
      "learning_rate": 0.0005338389265938929,
      "loss": 3.2045,
      "step": 49650
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.353116035461426,
      "learning_rate": 0.0005338363640470216,
      "loss": 3.2339,
      "step": 49651
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9667497873306274,
      "learning_rate": 0.0005338338014566758,
      "loss": 2.9818,
      "step": 49652
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.801210641860962,
      "learning_rate": 0.0005338312388228559,
      "loss": 2.9381,
      "step": 49653
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.171417713165283,
      "learning_rate": 0.0005338286761455625,
      "loss": 3.0229,
      "step": 49654
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.669507384300232,
      "learning_rate": 0.0005338261134247956,
      "loss": 2.9712,
      "step": 49655
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.584850549697876,
      "learning_rate": 0.0005338235506605563,
      "loss": 2.9904,
      "step": 49656
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.0214972496032715,
      "learning_rate": 0.0005338209878528447,
      "loss": 2.8032,
      "step": 49657
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.3578133583068848,
      "learning_rate": 0.0005338184250016614,
      "loss": 3.1539,
      "step": 49658
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5344511270523071,
      "learning_rate": 0.0005338158621070068,
      "loss": 3.3674,
      "step": 49659
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.303290605545044,
      "learning_rate": 0.0005338132991688814,
      "loss": 3.1649,
      "step": 49660
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9873874187469482,
      "learning_rate": 0.0005338107361872856,
      "loss": 2.7176,
      "step": 49661
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.37494695186615,
      "learning_rate": 0.0005338081731622201,
      "loss": 3.1311,
      "step": 49662
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.0457611083984375,
      "learning_rate": 0.0005338056100936853,
      "loss": 3.017,
      "step": 49663
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4369605779647827,
      "learning_rate": 0.0005338030469816816,
      "loss": 2.9634,
      "step": 49664
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5314751863479614,
      "learning_rate": 0.0005338004838262094,
      "loss": 3.1845,
      "step": 49665
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3936891555786133,
      "learning_rate": 0.0005337979206272693,
      "loss": 2.9025,
      "step": 49666
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.299266815185547,
      "learning_rate": 0.0005337953573848618,
      "loss": 2.9722,
      "step": 49667
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5878187417984009,
      "learning_rate": 0.0005337927940989873,
      "loss": 2.8887,
      "step": 49668
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4326682090759277,
      "learning_rate": 0.0005337902307696464,
      "loss": 2.8975,
      "step": 49669
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4455621242523193,
      "learning_rate": 0.0005337876673968394,
      "loss": 2.9798,
      "step": 49670
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5954009294509888,
      "learning_rate": 0.0005337851039805667,
      "loss": 3.0914,
      "step": 49671
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7428311109542847,
      "learning_rate": 0.0005337825405208292,
      "loss": 3.0534,
      "step": 49672
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.0274062156677246,
      "learning_rate": 0.000533779977017627,
      "loss": 2.95,
      "step": 49673
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.5642523765563965,
      "learning_rate": 0.0005337774134709607,
      "loss": 3.0329,
      "step": 49674
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5449777841567993,
      "learning_rate": 0.0005337748498808307,
      "loss": 2.6863,
      "step": 49675
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.4484283924102783,
      "learning_rate": 0.0005337722862472377,
      "loss": 2.9119,
      "step": 49676
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.3480467796325684,
      "learning_rate": 0.0005337697225701819,
      "loss": 3.2751,
      "step": 49677
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5141061544418335,
      "learning_rate": 0.0005337671588496639,
      "loss": 3.0104,
      "step": 49678
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3183685541152954,
      "learning_rate": 0.0005337645950856841,
      "loss": 3.1564,
      "step": 49679
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.079853057861328,
      "learning_rate": 0.0005337620312782433,
      "loss": 3.0457,
      "step": 49680
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7701411247253418,
      "learning_rate": 0.0005337594674273415,
      "loss": 2.9487,
      "step": 49681
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.731953501701355,
      "learning_rate": 0.0005337569035329795,
      "loss": 2.9182,
      "step": 49682
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6918410062789917,
      "learning_rate": 0.0005337543395951577,
      "loss": 3.204,
      "step": 49683
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.947949171066284,
      "learning_rate": 0.0005337517756138765,
      "loss": 3.0836,
      "step": 49684
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.786749243736267,
      "learning_rate": 0.0005337492115891363,
      "loss": 3.2303,
      "step": 49685
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7134922742843628,
      "learning_rate": 0.0005337466475209379,
      "loss": 3.1512,
      "step": 49686
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.554938793182373,
      "learning_rate": 0.0005337440834092816,
      "loss": 2.8886,
      "step": 49687
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.3187081813812256,
      "learning_rate": 0.0005337415192541678,
      "loss": 3.124,
      "step": 49688
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.0988409519195557,
      "learning_rate": 0.000533738955055597,
      "loss": 3.0234,
      "step": 49689
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5569201707839966,
      "learning_rate": 0.0005337363908135697,
      "loss": 2.9468,
      "step": 49690
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8459820747375488,
      "learning_rate": 0.0005337338265280865,
      "loss": 3.1353,
      "step": 49691
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6782704591751099,
      "learning_rate": 0.0005337312621991478,
      "loss": 3.1237,
      "step": 49692
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8877599239349365,
      "learning_rate": 0.0005337286978267539,
      "loss": 3.0382,
      "step": 49693
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4220824241638184,
      "learning_rate": 0.0005337261334109055,
      "loss": 3.1343,
      "step": 49694
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.1522772312164307,
      "learning_rate": 0.0005337235689516029,
      "loss": 2.8839,
      "step": 49695
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.4625253677368164,
      "learning_rate": 0.0005337210044488468,
      "loss": 3.1196,
      "step": 49696
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3715636730194092,
      "learning_rate": 0.0005337184399026375,
      "loss": 3.1143,
      "step": 49697
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.814346432685852,
      "learning_rate": 0.0005337158753129756,
      "loss": 3.0974,
      "step": 49698
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8608359098434448,
      "learning_rate": 0.0005337133106798614,
      "loss": 3.0833,
      "step": 49699
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.369537830352783,
      "learning_rate": 0.0005337107460032956,
      "loss": 3.2053,
      "step": 49700
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6998956203460693,
      "learning_rate": 0.0005337081812832784,
      "loss": 3.2237,
      "step": 49701
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.404276132583618,
      "learning_rate": 0.0005337056165198106,
      "loss": 3.0277,
      "step": 49702
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.743588924407959,
      "learning_rate": 0.0005337030517128925,
      "loss": 3.1728,
      "step": 49703
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5562736988067627,
      "learning_rate": 0.0005337004868625244,
      "loss": 3.1998,
      "step": 49704
    },
    {
      "epoch": 0.65,
      "grad_norm": 8.05671215057373,
      "learning_rate": 0.0005336979219687072,
      "loss": 3.0846,
      "step": 49705
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.862894296646118,
      "learning_rate": 0.000533695357031441,
      "loss": 2.9435,
      "step": 49706
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.062884569168091,
      "learning_rate": 0.0005336927920507264,
      "loss": 2.9007,
      "step": 49707
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.590336799621582,
      "learning_rate": 0.0005336902270265641,
      "loss": 3.0916,
      "step": 49708
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.512773036956787,
      "learning_rate": 0.0005336876619589542,
      "loss": 3.091,
      "step": 49709
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.0178463459014893,
      "learning_rate": 0.0005336850968478974,
      "loss": 3.0662,
      "step": 49710
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.574349045753479,
      "learning_rate": 0.0005336825316933941,
      "loss": 3.1895,
      "step": 49711
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4151804447174072,
      "learning_rate": 0.0005336799664954449,
      "loss": 2.9555,
      "step": 49712
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6110379695892334,
      "learning_rate": 0.00053367740125405,
      "loss": 3.1114,
      "step": 49713
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4944069385528564,
      "learning_rate": 0.0005336748359692102,
      "loss": 3.2398,
      "step": 49714
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4743150472640991,
      "learning_rate": 0.0005336722706409259,
      "loss": 3.042,
      "step": 49715
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5732414722442627,
      "learning_rate": 0.0005336697052691974,
      "loss": 3.1118,
      "step": 49716
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5106337070465088,
      "learning_rate": 0.0005336671398540253,
      "loss": 3.0724,
      "step": 49717
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.069658041000366,
      "learning_rate": 0.0005336645743954101,
      "loss": 3.002,
      "step": 49718
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9919545650482178,
      "learning_rate": 0.0005336620088933523,
      "loss": 3.1671,
      "step": 49719
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3878071308135986,
      "learning_rate": 0.0005336594433478523,
      "loss": 3.0646,
      "step": 49720
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6522047519683838,
      "learning_rate": 0.0005336568777589105,
      "loss": 3.178,
      "step": 49721
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.19834566116333,
      "learning_rate": 0.0005336543121265275,
      "loss": 3.0558,
      "step": 49722
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9510319232940674,
      "learning_rate": 0.0005336517464507038,
      "loss": 3.2104,
      "step": 49723
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7005600929260254,
      "learning_rate": 0.0005336491807314398,
      "loss": 2.7288,
      "step": 49724
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.878368854522705,
      "learning_rate": 0.000533646614968736,
      "loss": 2.9893,
      "step": 49725
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6834526062011719,
      "learning_rate": 0.0005336440491625929,
      "loss": 2.9683,
      "step": 49726
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7798880338668823,
      "learning_rate": 0.000533641483313011,
      "loss": 3.1727,
      "step": 49727
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6194369792938232,
      "learning_rate": 0.0005336389174199907,
      "loss": 2.8136,
      "step": 49728
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.4586942195892334,
      "learning_rate": 0.0005336363514835325,
      "loss": 2.9706,
      "step": 49729
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.1300675868988037,
      "learning_rate": 0.0005336337855036369,
      "loss": 3.2202,
      "step": 49730
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6040290594100952,
      "learning_rate": 0.0005336312194803044,
      "loss": 3.0986,
      "step": 49731
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7433767318725586,
      "learning_rate": 0.0005336286534135354,
      "loss": 3.2848,
      "step": 49732
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9074630737304688,
      "learning_rate": 0.0005336260873033304,
      "loss": 3.1689,
      "step": 49733
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9257268905639648,
      "learning_rate": 0.00053362352114969,
      "loss": 2.9415,
      "step": 49734
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4232556819915771,
      "learning_rate": 0.0005336209549526145,
      "loss": 3.1269,
      "step": 49735
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3730709552764893,
      "learning_rate": 0.0005336183887121045,
      "loss": 2.9984,
      "step": 49736
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.2233481407165527,
      "learning_rate": 0.0005336158224281604,
      "loss": 2.9309,
      "step": 49737
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6068904399871826,
      "learning_rate": 0.0005336132561007828,
      "loss": 3.1074,
      "step": 49738
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.40147864818573,
      "learning_rate": 0.0005336106897299719,
      "loss": 2.9943,
      "step": 49739
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3991650342941284,
      "learning_rate": 0.0005336081233157286,
      "loss": 3.1654,
      "step": 49740
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7097043991088867,
      "learning_rate": 0.000533605556858053,
      "loss": 3.2068,
      "step": 49741
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.548109769821167,
      "learning_rate": 0.0005336029903569456,
      "loss": 3.1588,
      "step": 49742
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4309877157211304,
      "learning_rate": 0.0005336004238124072,
      "loss": 3.0671,
      "step": 49743
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5334969758987427,
      "learning_rate": 0.000533597857224438,
      "loss": 2.7881,
      "step": 49744
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5353530645370483,
      "learning_rate": 0.0005335952905930384,
      "loss": 3.1872,
      "step": 49745
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6206798553466797,
      "learning_rate": 0.0005335927239182093,
      "loss": 2.7975,
      "step": 49746
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6376748085021973,
      "learning_rate": 0.0005335901571999508,
      "loss": 2.9731,
      "step": 49747
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8265668153762817,
      "learning_rate": 0.0005335875904382634,
      "loss": 2.9858,
      "step": 49748
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9212226867675781,
      "learning_rate": 0.0005335850236331477,
      "loss": 2.8481,
      "step": 49749
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.003662347793579,
      "learning_rate": 0.0005335824567846042,
      "loss": 3.1432,
      "step": 49750
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.3314740657806396,
      "learning_rate": 0.0005335798898926333,
      "loss": 3.0832,
      "step": 49751
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.964478850364685,
      "learning_rate": 0.0005335773229572353,
      "loss": 3.0507,
      "step": 49752
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.701978087425232,
      "learning_rate": 0.0005335747559784111,
      "loss": 3.2351,
      "step": 49753
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5459579229354858,
      "learning_rate": 0.0005335721889561608,
      "loss": 2.8778,
      "step": 49754
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.0065245628356934,
      "learning_rate": 0.0005335696218904851,
      "loss": 3.2494,
      "step": 49755
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3753259181976318,
      "learning_rate": 0.0005335670547813844,
      "loss": 2.8555,
      "step": 49756
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4034193754196167,
      "learning_rate": 0.0005335644876288591,
      "loss": 3.0261,
      "step": 49757
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.716729760169983,
      "learning_rate": 0.0005335619204329098,
      "loss": 2.8853,
      "step": 49758
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.625315546989441,
      "learning_rate": 0.000533559353193537,
      "loss": 2.9246,
      "step": 49759
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3401895761489868,
      "learning_rate": 0.000533556785910741,
      "loss": 3.0655,
      "step": 49760
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.171086549758911,
      "learning_rate": 0.0005335542185845224,
      "loss": 2.878,
      "step": 49761
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5347280502319336,
      "learning_rate": 0.0005335516512148817,
      "loss": 2.9789,
      "step": 49762
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.14578914642334,
      "learning_rate": 0.0005335490838018193,
      "loss": 3.0673,
      "step": 49763
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4731401205062866,
      "learning_rate": 0.0005335465163453358,
      "loss": 3.1001,
      "step": 49764
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.270751714706421,
      "learning_rate": 0.0005335439488454315,
      "loss": 2.8082,
      "step": 49765
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6137696504592896,
      "learning_rate": 0.0005335413813021071,
      "loss": 3.1483,
      "step": 49766
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8911710977554321,
      "learning_rate": 0.0005335388137153627,
      "loss": 3.1858,
      "step": 49767
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5389591455459595,
      "learning_rate": 0.0005335362460851992,
      "loss": 3.0297,
      "step": 49768
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.717990517616272,
      "learning_rate": 0.0005335336784116169,
      "loss": 3.2066,
      "step": 49769
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.7519679069519043,
      "learning_rate": 0.0005335311106946163,
      "loss": 2.8576,
      "step": 49770
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4289382696151733,
      "learning_rate": 0.0005335285429341978,
      "loss": 2.9797,
      "step": 49771
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2450155019760132,
      "learning_rate": 0.0005335259751303621,
      "loss": 2.9505,
      "step": 49772
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7488101720809937,
      "learning_rate": 0.0005335234072831093,
      "loss": 2.941,
      "step": 49773
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.848151683807373,
      "learning_rate": 0.0005335208393924402,
      "loss": 2.8815,
      "step": 49774
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8624087572097778,
      "learning_rate": 0.0005335182714583551,
      "loss": 3.0576,
      "step": 49775
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.5261456966400146,
      "learning_rate": 0.0005335157034808545,
      "loss": 2.9453,
      "step": 49776
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.692707061767578,
      "learning_rate": 0.0005335131354599391,
      "loss": 2.9823,
      "step": 49777
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9368644952774048,
      "learning_rate": 0.0005335105673956091,
      "loss": 3.2829,
      "step": 49778
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3921403884887695,
      "learning_rate": 0.0005335079992878651,
      "loss": 3.2952,
      "step": 49779
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.5150563716888428,
      "learning_rate": 0.0005335054311367075,
      "loss": 2.8112,
      "step": 49780
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.113318681716919,
      "learning_rate": 0.000533502862942137,
      "loss": 3.1116,
      "step": 49781
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.518261194229126,
      "learning_rate": 0.0005335002947041537,
      "loss": 3.1211,
      "step": 49782
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9311169385910034,
      "learning_rate": 0.0005334977264227583,
      "loss": 2.8895,
      "step": 49783
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.951258659362793,
      "learning_rate": 0.0005334951580979513,
      "loss": 3.0749,
      "step": 49784
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.1122500896453857,
      "learning_rate": 0.0005334925897297332,
      "loss": 3.0211,
      "step": 49785
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.6698737144470215,
      "learning_rate": 0.0005334900213181044,
      "loss": 3.3174,
      "step": 49786
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7647428512573242,
      "learning_rate": 0.0005334874528630654,
      "loss": 3.3456,
      "step": 49787
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.227712869644165,
      "learning_rate": 0.0005334848843646167,
      "loss": 2.7869,
      "step": 49788
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6435351371765137,
      "learning_rate": 0.0005334823158227587,
      "loss": 3.0407,
      "step": 49789
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.0978143215179443,
      "learning_rate": 0.0005334797472374919,
      "loss": 2.9865,
      "step": 49790
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5898330211639404,
      "learning_rate": 0.0005334771786088168,
      "loss": 3.1342,
      "step": 49791
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4925549030303955,
      "learning_rate": 0.0005334746099367341,
      "loss": 3.0309,
      "step": 49792
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.231653928756714,
      "learning_rate": 0.0005334720412212439,
      "loss": 3.1818,
      "step": 49793
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4941550493240356,
      "learning_rate": 0.0005334694724623468,
      "loss": 3.1525,
      "step": 49794
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.234860897064209,
      "learning_rate": 0.0005334669036600433,
      "loss": 2.7872,
      "step": 49795
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6099867820739746,
      "learning_rate": 0.000533464334814334,
      "loss": 3.1803,
      "step": 49796
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5860801935195923,
      "learning_rate": 0.0005334617659252192,
      "loss": 3.1464,
      "step": 49797
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5793174505233765,
      "learning_rate": 0.0005334591969926995,
      "loss": 2.8863,
      "step": 49798
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3804128170013428,
      "learning_rate": 0.0005334566280167753,
      "loss": 2.9583,
      "step": 49799
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7426773309707642,
      "learning_rate": 0.0005334540589974472,
      "loss": 3.1468,
      "step": 49800
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8082900047302246,
      "learning_rate": 0.0005334514899347155,
      "loss": 2.9703,
      "step": 49801
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.5602009296417236,
      "learning_rate": 0.0005334489208285808,
      "loss": 2.9615,
      "step": 49802
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7590731382369995,
      "learning_rate": 0.0005334463516790434,
      "loss": 3.1179,
      "step": 49803
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5050804615020752,
      "learning_rate": 0.000533443782486104,
      "loss": 3.045,
      "step": 49804
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6496820449829102,
      "learning_rate": 0.000533441213249763,
      "loss": 2.9143,
      "step": 49805
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4794032573699951,
      "learning_rate": 0.000533438643970021,
      "loss": 3.1213,
      "step": 49806
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8163350820541382,
      "learning_rate": 0.0005334360746468782,
      "loss": 3.1457,
      "step": 49807
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4320460557937622,
      "learning_rate": 0.0005334335052803352,
      "loss": 3.3413,
      "step": 49808
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.692055106163025,
      "learning_rate": 0.0005334309358703926,
      "loss": 3.1928,
      "step": 49809
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.4302759170532227,
      "learning_rate": 0.0005334283664170507,
      "loss": 3.2087,
      "step": 49810
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7971205711364746,
      "learning_rate": 0.0005334257969203101,
      "loss": 3.0114,
      "step": 49811
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5481555461883545,
      "learning_rate": 0.0005334232273801713,
      "loss": 3.1515,
      "step": 49812
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.380655288696289,
      "learning_rate": 0.0005334206577966347,
      "loss": 3.0025,
      "step": 49813
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.910177707672119,
      "learning_rate": 0.0005334180881697007,
      "loss": 2.9607,
      "step": 49814
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5209312438964844,
      "learning_rate": 0.0005334155184993699,
      "loss": 2.8263,
      "step": 49815
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4707615375518799,
      "learning_rate": 0.0005334129487856429,
      "loss": 3.0083,
      "step": 49816
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.003089666366577,
      "learning_rate": 0.0005334103790285198,
      "loss": 3.1589,
      "step": 49817
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6696405410766602,
      "learning_rate": 0.0005334078092280014,
      "loss": 3.1289,
      "step": 49818
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.570766806602478,
      "learning_rate": 0.0005334052393840881,
      "loss": 3.1413,
      "step": 49819
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6955360174179077,
      "learning_rate": 0.0005334026694967804,
      "loss": 2.8762,
      "step": 49820
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.985151767730713,
      "learning_rate": 0.0005334000995660786,
      "loss": 2.9729,
      "step": 49821
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3217006921768188,
      "learning_rate": 0.0005333975295919835,
      "loss": 2.8865,
      "step": 49822
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9771628379821777,
      "learning_rate": 0.0005333949595744952,
      "loss": 3.1949,
      "step": 49823
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8866775035858154,
      "learning_rate": 0.0005333923895136146,
      "loss": 3.1576,
      "step": 49824
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9487677812576294,
      "learning_rate": 0.0005333898194093417,
      "loss": 3.0123,
      "step": 49825
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8778364658355713,
      "learning_rate": 0.0005333872492616775,
      "loss": 3.1169,
      "step": 49826
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4101781845092773,
      "learning_rate": 0.000533384679070622,
      "loss": 3.1717,
      "step": 49827
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5506342649459839,
      "learning_rate": 0.000533382108836176,
      "loss": 3.0309,
      "step": 49828
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6270595788955688,
      "learning_rate": 0.0005333795385583397,
      "loss": 2.8882,
      "step": 49829
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5617492198944092,
      "learning_rate": 0.0005333769682371139,
      "loss": 3.0669,
      "step": 49830
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.1098194122314453,
      "learning_rate": 0.0005333743978724987,
      "loss": 2.8079,
      "step": 49831
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.0415303707122803,
      "learning_rate": 0.000533371827464495,
      "loss": 3.0311,
      "step": 49832
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.2471096515655518,
      "learning_rate": 0.000533369257013103,
      "loss": 3.0948,
      "step": 49833
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3526400327682495,
      "learning_rate": 0.0005333666865183233,
      "loss": 3.1772,
      "step": 49834
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.355362057685852,
      "learning_rate": 0.0005333641159801563,
      "loss": 3.3549,
      "step": 49835
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5837067365646362,
      "learning_rate": 0.0005333615453986025,
      "loss": 2.9056,
      "step": 49836
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4945155382156372,
      "learning_rate": 0.0005333589747736624,
      "loss": 3.1485,
      "step": 49837
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5737299919128418,
      "learning_rate": 0.0005333564041053363,
      "loss": 2.8782,
      "step": 49838
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2633042335510254,
      "learning_rate": 0.0005333538333936251,
      "loss": 3.2351,
      "step": 49839
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5441277027130127,
      "learning_rate": 0.0005333512626385289,
      "loss": 2.9534,
      "step": 49840
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4237258434295654,
      "learning_rate": 0.0005333486918400483,
      "loss": 3.0123,
      "step": 49841
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7895125150680542,
      "learning_rate": 0.0005333461209981838,
      "loss": 2.7638,
      "step": 49842
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.407235860824585,
      "learning_rate": 0.0005333435501129357,
      "loss": 2.9409,
      "step": 49843
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.56491219997406,
      "learning_rate": 0.0005333409791843048,
      "loss": 3.0859,
      "step": 49844
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.548364520072937,
      "learning_rate": 0.0005333384082122914,
      "loss": 2.9986,
      "step": 49845
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3878084421157837,
      "learning_rate": 0.0005333358371968959,
      "loss": 2.9993,
      "step": 49846
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.1193361282348633,
      "learning_rate": 0.0005333332661381189,
      "loss": 2.8545,
      "step": 49847
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5599514245986938,
      "learning_rate": 0.0005333306950359609,
      "loss": 3.1253,
      "step": 49848
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.385761022567749,
      "learning_rate": 0.0005333281238904222,
      "loss": 3.0841,
      "step": 49849
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6022868156433105,
      "learning_rate": 0.0005333255527015035,
      "loss": 3.0726,
      "step": 49850
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5433820486068726,
      "learning_rate": 0.0005333229814692051,
      "loss": 2.9781,
      "step": 49851
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7502137422561646,
      "learning_rate": 0.0005333204101935277,
      "loss": 3.0174,
      "step": 49852
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5800784826278687,
      "learning_rate": 0.0005333178388744715,
      "loss": 2.9727,
      "step": 49853
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.714148759841919,
      "learning_rate": 0.0005333152675120371,
      "loss": 3.0255,
      "step": 49854
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9476594924926758,
      "learning_rate": 0.0005333126961062249,
      "loss": 3.0596,
      "step": 49855
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4922159910202026,
      "learning_rate": 0.0005333101246570356,
      "loss": 2.9888,
      "step": 49856
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8156019449234009,
      "learning_rate": 0.0005333075531644695,
      "loss": 2.9832,
      "step": 49857
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.441636562347412,
      "learning_rate": 0.0005333049816285272,
      "loss": 2.8821,
      "step": 49858
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5742156505584717,
      "learning_rate": 0.0005333024100492091,
      "loss": 2.9569,
      "step": 49859
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7132482528686523,
      "learning_rate": 0.0005332998384265156,
      "loss": 2.951,
      "step": 49860
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8296949863433838,
      "learning_rate": 0.0005332972667604472,
      "loss": 3.1261,
      "step": 49861
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9348090887069702,
      "learning_rate": 0.0005332946950510047,
      "loss": 3.005,
      "step": 49862
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.280515670776367,
      "learning_rate": 0.0005332921232981882,
      "loss": 2.866,
      "step": 49863
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.248019218444824,
      "learning_rate": 0.0005332895515019981,
      "loss": 2.9637,
      "step": 49864
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.3338587284088135,
      "learning_rate": 0.0005332869796624352,
      "loss": 3.0138,
      "step": 49865
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.1288399696350098,
      "learning_rate": 0.0005332844077794998,
      "loss": 2.8691,
      "step": 49866
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4192110300064087,
      "learning_rate": 0.0005332818358531925,
      "loss": 3.0577,
      "step": 49867
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4209121465682983,
      "learning_rate": 0.0005332792638835137,
      "loss": 2.9554,
      "step": 49868
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.385206937789917,
      "learning_rate": 0.0005332766918704638,
      "loss": 3.1587,
      "step": 49869
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6181070804595947,
      "learning_rate": 0.0005332741198140434,
      "loss": 2.8951,
      "step": 49870
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4459834098815918,
      "learning_rate": 0.000533271547714253,
      "loss": 3.0953,
      "step": 49871
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.497532367706299,
      "learning_rate": 0.0005332689755710929,
      "loss": 2.8877,
      "step": 49872
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.421160936355591,
      "learning_rate": 0.0005332664033845637,
      "loss": 3.0089,
      "step": 49873
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.0542495250701904,
      "learning_rate": 0.000533263831154666,
      "loss": 3.3157,
      "step": 49874
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7557798624038696,
      "learning_rate": 0.0005332612588814001,
      "loss": 3.1653,
      "step": 49875
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.719416856765747,
      "learning_rate": 0.0005332586865647665,
      "loss": 2.9593,
      "step": 49876
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6339967250823975,
      "learning_rate": 0.0005332561142047657,
      "loss": 2.9871,
      "step": 49877
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4532719850540161,
      "learning_rate": 0.0005332535418013983,
      "loss": 3.1253,
      "step": 49878
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4256041049957275,
      "learning_rate": 0.0005332509693546644,
      "loss": 3.0271,
      "step": 49879
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3369930982589722,
      "learning_rate": 0.000533248396864565,
      "loss": 3.089,
      "step": 49880
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5443806648254395,
      "learning_rate": 0.0005332458243311002,
      "loss": 3.2665,
      "step": 49881
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3661085367202759,
      "learning_rate": 0.0005332432517542706,
      "loss": 3.3487,
      "step": 49882
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5217857360839844,
      "learning_rate": 0.0005332406791340767,
      "loss": 3.1363,
      "step": 49883
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6946412324905396,
      "learning_rate": 0.000533238106470519,
      "loss": 3.3028,
      "step": 49884
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6207612752914429,
      "learning_rate": 0.000533235533763598,
      "loss": 2.744,
      "step": 49885
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.485527515411377,
      "learning_rate": 0.0005332329610133139,
      "loss": 2.7962,
      "step": 49886
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3794201612472534,
      "learning_rate": 0.0005332303882196676,
      "loss": 3.0782,
      "step": 49887
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6823256015777588,
      "learning_rate": 0.0005332278153826591,
      "loss": 2.9563,
      "step": 49888
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4355378150939941,
      "learning_rate": 0.0005332252425022893,
      "loss": 2.9861,
      "step": 49889
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4570540189743042,
      "learning_rate": 0.0005332226695785586,
      "loss": 3.3991,
      "step": 49890
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.068112850189209,
      "learning_rate": 0.0005332200966114674,
      "loss": 3.0136,
      "step": 49891
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.93767511844635,
      "learning_rate": 0.0005332175236010161,
      "loss": 2.9424,
      "step": 49892
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5479869842529297,
      "learning_rate": 0.0005332149505472051,
      "loss": 3.001,
      "step": 49893
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.158329963684082,
      "learning_rate": 0.0005332123774500353,
      "loss": 3.1228,
      "step": 49894
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.8590328693389893,
      "learning_rate": 0.0005332098043095069,
      "loss": 3.0692,
      "step": 49895
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2983804941177368,
      "learning_rate": 0.0005332072311256202,
      "loss": 3.1985,
      "step": 49896
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.033839702606201,
      "learning_rate": 0.0005332046578983761,
      "loss": 2.9135,
      "step": 49897
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.3473732471466064,
      "learning_rate": 0.0005332020846277747,
      "loss": 3.0474,
      "step": 49898
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8314309120178223,
      "learning_rate": 0.0005331995113138167,
      "loss": 3.0908,
      "step": 49899
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2863893508911133,
      "learning_rate": 0.0005331969379565025,
      "loss": 3.1467,
      "step": 49900
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9707969427108765,
      "learning_rate": 0.0005331943645558325,
      "loss": 2.9798,
      "step": 49901
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.008708953857422,
      "learning_rate": 0.0005331917911118075,
      "loss": 2.9026,
      "step": 49902
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.149043321609497,
      "learning_rate": 0.0005331892176244275,
      "loss": 2.9002,
      "step": 49903
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4471728801727295,
      "learning_rate": 0.0005331866440936934,
      "loss": 3.0336,
      "step": 49904
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.3977489471435547,
      "learning_rate": 0.0005331840705196054,
      "loss": 3.1209,
      "step": 49905
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.5432963371276855,
      "learning_rate": 0.000533181496902164,
      "loss": 2.9456,
      "step": 49906
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5448243618011475,
      "learning_rate": 0.0005331789232413699,
      "loss": 3.1407,
      "step": 49907
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4838933944702148,
      "learning_rate": 0.0005331763495372234,
      "loss": 2.9881,
      "step": 49908
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.0674712657928467,
      "learning_rate": 0.000533173775789725,
      "loss": 3.1004,
      "step": 49909
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.1227283477783203,
      "learning_rate": 0.0005331712019988751,
      "loss": 3.1954,
      "step": 49910
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.367605686187744,
      "learning_rate": 0.0005331686281646745,
      "loss": 2.9657,
      "step": 49911
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7791496515274048,
      "learning_rate": 0.0005331660542871233,
      "loss": 2.9862,
      "step": 49912
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8305500745773315,
      "learning_rate": 0.0005331634803662222,
      "loss": 2.8511,
      "step": 49913
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9925537109375,
      "learning_rate": 0.0005331609064019715,
      "loss": 3.2965,
      "step": 49914
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.523576498031616,
      "learning_rate": 0.0005331583323943717,
      "loss": 3.0549,
      "step": 49915
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6164582967758179,
      "learning_rate": 0.0005331557583434236,
      "loss": 2.8242,
      "step": 49916
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4062628746032715,
      "learning_rate": 0.0005331531842491273,
      "loss": 3.0489,
      "step": 49917
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.8108444213867188,
      "learning_rate": 0.0005331506101114835,
      "loss": 3.1821,
      "step": 49918
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.489778995513916,
      "learning_rate": 0.0005331480359304926,
      "loss": 2.9664,
      "step": 49919
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7460906505584717,
      "learning_rate": 0.000533145461706155,
      "loss": 3.139,
      "step": 49920
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6869335174560547,
      "learning_rate": 0.0005331428874384714,
      "loss": 3.324,
      "step": 49921
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7007638216018677,
      "learning_rate": 0.0005331403131274419,
      "loss": 3.0082,
      "step": 49922
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2941553592681885,
      "learning_rate": 0.0005331377387730674,
      "loss": 3.1087,
      "step": 49923
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9185199737548828,
      "learning_rate": 0.0005331351643753482,
      "loss": 3.052,
      "step": 49924
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5928010940551758,
      "learning_rate": 0.0005331325899342847,
      "loss": 2.736,
      "step": 49925
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9782917499542236,
      "learning_rate": 0.0005331300154498775,
      "loss": 2.9925,
      "step": 49926
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.3213846683502197,
      "learning_rate": 0.0005331274409221269,
      "loss": 3.0115,
      "step": 49927
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.661595344543457,
      "learning_rate": 0.0005331248663510337,
      "loss": 2.8665,
      "step": 49928
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6060352325439453,
      "learning_rate": 0.0005331222917365981,
      "loss": 3.0019,
      "step": 49929
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.162095069885254,
      "learning_rate": 0.0005331197170788207,
      "loss": 3.0924,
      "step": 49930
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7292381525039673,
      "learning_rate": 0.0005331171423777019,
      "loss": 2.8205,
      "step": 49931
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3420326709747314,
      "learning_rate": 0.0005331145676332423,
      "loss": 2.8577,
      "step": 49932
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5999760627746582,
      "learning_rate": 0.0005331119928454422,
      "loss": 3.1481,
      "step": 49933
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.7086050510406494,
      "learning_rate": 0.0005331094180143021,
      "loss": 2.5658,
      "step": 49934
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8851674795150757,
      "learning_rate": 0.0005331068431398228,
      "loss": 2.9877,
      "step": 49935
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.491891860961914,
      "learning_rate": 0.0005331042682220044,
      "loss": 2.9592,
      "step": 49936
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.79033362865448,
      "learning_rate": 0.0005331016932608475,
      "loss": 3.068,
      "step": 49937
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5199198722839355,
      "learning_rate": 0.0005330991182563526,
      "loss": 2.8453,
      "step": 49938
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7878904342651367,
      "learning_rate": 0.0005330965432085202,
      "loss": 3.0521,
      "step": 49939
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.566656470298767,
      "learning_rate": 0.0005330939681173507,
      "loss": 3.0503,
      "step": 49940
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.766684651374817,
      "learning_rate": 0.0005330913929828447,
      "loss": 3.2321,
      "step": 49941
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4056270122528076,
      "learning_rate": 0.0005330888178050027,
      "loss": 2.7937,
      "step": 49942
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4912476539611816,
      "learning_rate": 0.0005330862425838249,
      "loss": 3.0921,
      "step": 49943
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.368171453475952,
      "learning_rate": 0.0005330836673193121,
      "loss": 2.9312,
      "step": 49944
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.609855055809021,
      "learning_rate": 0.0005330810920114646,
      "loss": 2.8684,
      "step": 49945
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.865009069442749,
      "learning_rate": 0.0005330785166602828,
      "loss": 3.1599,
      "step": 49946
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9329216480255127,
      "learning_rate": 0.0005330759412657674,
      "loss": 2.9561,
      "step": 49947
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5986273288726807,
      "learning_rate": 0.0005330733658279189,
      "loss": 3.0659,
      "step": 49948
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5548361539840698,
      "learning_rate": 0.0005330707903467376,
      "loss": 3.1966,
      "step": 49949
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9563919305801392,
      "learning_rate": 0.000533068214822224,
      "loss": 3.1572,
      "step": 49950
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.2687442302703857,
      "learning_rate": 0.0005330656392543786,
      "loss": 3.0262,
      "step": 49951
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.717727541923523,
      "learning_rate": 0.0005330630636432019,
      "loss": 3.0763,
      "step": 49952
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5829224586486816,
      "learning_rate": 0.0005330604879886944,
      "loss": 2.9576,
      "step": 49953
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7944151163101196,
      "learning_rate": 0.0005330579122908565,
      "loss": 3.273,
      "step": 49954
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8418302536010742,
      "learning_rate": 0.0005330553365496889,
      "loss": 2.9894,
      "step": 49955
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7796894311904907,
      "learning_rate": 0.0005330527607651917,
      "loss": 3.2373,
      "step": 49956
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8172427415847778,
      "learning_rate": 0.0005330501849373657,
      "loss": 3.183,
      "step": 49957
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4852330684661865,
      "learning_rate": 0.0005330476090662113,
      "loss": 3.0621,
      "step": 49958
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.1733741760253906,
      "learning_rate": 0.000533045033151729,
      "loss": 2.9548,
      "step": 49959
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4704898595809937,
      "learning_rate": 0.0005330424571939192,
      "loss": 2.9099,
      "step": 49960
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6135247945785522,
      "learning_rate": 0.0005330398811927823,
      "loss": 2.9171,
      "step": 49961
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8213059902191162,
      "learning_rate": 0.0005330373051483189,
      "loss": 3.077,
      "step": 49962
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7540825605392456,
      "learning_rate": 0.0005330347290605296,
      "loss": 3.1806,
      "step": 49963
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.425758957862854,
      "learning_rate": 0.0005330321529294146,
      "loss": 3.0363,
      "step": 49964
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4569183588027954,
      "learning_rate": 0.0005330295767549746,
      "loss": 2.7614,
      "step": 49965
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.905909538269043,
      "learning_rate": 0.00053302700053721,
      "loss": 3.0566,
      "step": 49966
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.186300039291382,
      "learning_rate": 0.0005330244242761212,
      "loss": 3.0474,
      "step": 49967
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.528052568435669,
      "learning_rate": 0.0005330218479717089,
      "loss": 2.8746,
      "step": 49968
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.1862688064575195,
      "learning_rate": 0.0005330192716239733,
      "loss": 2.8863,
      "step": 49969
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.486402988433838,
      "learning_rate": 0.0005330166952329151,
      "loss": 3.1407,
      "step": 49970
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3742424249649048,
      "learning_rate": 0.0005330141187985348,
      "loss": 3.0263,
      "step": 49971
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2599717378616333,
      "learning_rate": 0.0005330115423208326,
      "loss": 3.0239,
      "step": 49972
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4582409858703613,
      "learning_rate": 0.0005330089657998091,
      "loss": 2.9901,
      "step": 49973
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.2450063228607178,
      "learning_rate": 0.000533006389235465,
      "loss": 3.0456,
      "step": 49974
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.490228295326233,
      "learning_rate": 0.0005330038126278005,
      "loss": 3.0901,
      "step": 49975
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8149614334106445,
      "learning_rate": 0.0005330012359768164,
      "loss": 3.1809,
      "step": 49976
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.400580644607544,
      "learning_rate": 0.0005329986592825128,
      "loss": 2.9414,
      "step": 49977
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3729097843170166,
      "learning_rate": 0.0005329960825448904,
      "loss": 3.327,
      "step": 49978
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5374451875686646,
      "learning_rate": 0.0005329935057639495,
      "loss": 3.0144,
      "step": 49979
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4492311477661133,
      "learning_rate": 0.0005329909289396908,
      "loss": 3.0275,
      "step": 49980
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.4631106853485107,
      "learning_rate": 0.0005329883520721147,
      "loss": 3.0914,
      "step": 49981
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4144985675811768,
      "learning_rate": 0.0005329857751612218,
      "loss": 2.965,
      "step": 49982
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.0499179363250732,
      "learning_rate": 0.0005329831982070123,
      "loss": 3.3154,
      "step": 49983
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7932815551757812,
      "learning_rate": 0.0005329806212094868,
      "loss": 3.0701,
      "step": 49984
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4883246421813965,
      "learning_rate": 0.0005329780441686458,
      "loss": 2.9524,
      "step": 49985
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6515412330627441,
      "learning_rate": 0.0005329754670844899,
      "loss": 2.8455,
      "step": 49986
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4300761222839355,
      "learning_rate": 0.0005329728899570194,
      "loss": 2.9906,
      "step": 49987
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.913177728652954,
      "learning_rate": 0.0005329703127862349,
      "loss": 3.3286,
      "step": 49988
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3223830461502075,
      "learning_rate": 0.0005329677355721367,
      "loss": 2.9244,
      "step": 49989
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4861797094345093,
      "learning_rate": 0.0005329651583147254,
      "loss": 3.0716,
      "step": 49990
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.4037790298461914,
      "learning_rate": 0.0005329625810140016,
      "loss": 3.1035,
      "step": 49991
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.2054965496063232,
      "learning_rate": 0.0005329600036699656,
      "loss": 3.1562,
      "step": 49992
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6939879655838013,
      "learning_rate": 0.000532957426282618,
      "loss": 3.1283,
      "step": 49993
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8037301301956177,
      "learning_rate": 0.0005329548488519591,
      "loss": 3.1235,
      "step": 49994
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.844519019126892,
      "learning_rate": 0.0005329522713779895,
      "loss": 3.1854,
      "step": 49995
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.771363377571106,
      "learning_rate": 0.0005329496938607098,
      "loss": 3.12,
      "step": 49996
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4784862995147705,
      "learning_rate": 0.0005329471163001202,
      "loss": 3.0752,
      "step": 49997
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.6356287002563477,
      "learning_rate": 0.0005329445386962215,
      "loss": 3.3123,
      "step": 49998
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.033507823944092,
      "learning_rate": 0.0005329419610490139,
      "loss": 3.1566,
      "step": 49999
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5154873132705688,
      "learning_rate": 0.000532939383358498,
      "loss": 2.962,
      "step": 50000
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6243572235107422,
      "learning_rate": 0.0005329368056246743,
      "loss": 2.9619,
      "step": 50001
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8015576601028442,
      "learning_rate": 0.0005329342278475432,
      "loss": 3.184,
      "step": 50002
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.700161933898926,
      "learning_rate": 0.0005329316500271053,
      "loss": 2.9335,
      "step": 50003
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6440579891204834,
      "learning_rate": 0.0005329290721633609,
      "loss": 3.3112,
      "step": 50004
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.633983850479126,
      "learning_rate": 0.0005329264942563108,
      "loss": 3.0979,
      "step": 50005
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8724206686019897,
      "learning_rate": 0.0005329239163059552,
      "loss": 3.0782,
      "step": 50006
    },
    {
      "epoch": 0.65,
      "grad_norm": 4.354211807250977,
      "learning_rate": 0.0005329213383122944,
      "loss": 3.0923,
      "step": 50007
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.3992016315460205,
      "learning_rate": 0.0005329187602753294,
      "loss": 2.9329,
      "step": 50008
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5063811540603638,
      "learning_rate": 0.0005329161821950604,
      "loss": 2.807,
      "step": 50009
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.824678659439087,
      "learning_rate": 0.0005329136040714877,
      "loss": 3.0668,
      "step": 50010
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9001611471176147,
      "learning_rate": 0.0005329110259046122,
      "loss": 2.9766,
      "step": 50011
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.119729995727539,
      "learning_rate": 0.000532908447694434,
      "loss": 3.0623,
      "step": 50012
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5754597187042236,
      "learning_rate": 0.0005329058694409537,
      "loss": 3.0629,
      "step": 50013
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.3449060916900635,
      "learning_rate": 0.0005329032911441719,
      "loss": 3.0561,
      "step": 50014
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7483702898025513,
      "learning_rate": 0.0005329007128040889,
      "loss": 3.0855,
      "step": 50015
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.632880449295044,
      "learning_rate": 0.0005328981344207052,
      "loss": 3.0657,
      "step": 50016
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2465440034866333,
      "learning_rate": 0.0005328955559940214,
      "loss": 2.8213,
      "step": 50017
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5758792161941528,
      "learning_rate": 0.0005328929775240379,
      "loss": 2.9483,
      "step": 50018
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8535805940628052,
      "learning_rate": 0.0005328903990107554,
      "loss": 2.8052,
      "step": 50019
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8158971071243286,
      "learning_rate": 0.0005328878204541739,
      "loss": 3.2681,
      "step": 50020
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4733554124832153,
      "learning_rate": 0.0005328852418542943,
      "loss": 2.9392,
      "step": 50021
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.401970624923706,
      "learning_rate": 0.0005328826632111168,
      "loss": 2.7883,
      "step": 50022
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.708850383758545,
      "learning_rate": 0.0005328800845246423,
      "loss": 3.1942,
      "step": 50023
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6786199808120728,
      "learning_rate": 0.0005328775057948708,
      "loss": 3.1311,
      "step": 50024
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.485090732574463,
      "learning_rate": 0.000532874927021803,
      "loss": 3.1,
      "step": 50025
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5520248413085938,
      "learning_rate": 0.0005328723482054393,
      "loss": 3.1489,
      "step": 50026
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.2737109661102295,
      "learning_rate": 0.0005328697693457803,
      "loss": 3.1599,
      "step": 50027
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.397244930267334,
      "learning_rate": 0.0005328671904428264,
      "loss": 3.1166,
      "step": 50028
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.783714771270752,
      "learning_rate": 0.0005328646114965781,
      "loss": 2.829,
      "step": 50029
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7427812814712524,
      "learning_rate": 0.0005328620325070358,
      "loss": 3.125,
      "step": 50030
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4221813678741455,
      "learning_rate": 0.0005328594534742002,
      "loss": 3.1427,
      "step": 50031
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7574102878570557,
      "learning_rate": 0.0005328568743980716,
      "loss": 2.8872,
      "step": 50032
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5872629880905151,
      "learning_rate": 0.0005328542952786504,
      "loss": 3.2517,
      "step": 50033
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8868670463562012,
      "learning_rate": 0.0005328517161159373,
      "loss": 3.0082,
      "step": 50034
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.120717763900757,
      "learning_rate": 0.0005328491369099327,
      "loss": 2.872,
      "step": 50035
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3632131814956665,
      "learning_rate": 0.000532846557660637,
      "loss": 3.0292,
      "step": 50036
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5130127668380737,
      "learning_rate": 0.0005328439783680506,
      "loss": 3.1536,
      "step": 50037
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5876682996749878,
      "learning_rate": 0.0005328413990321743,
      "loss": 3.0387,
      "step": 50038
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5085688829421997,
      "learning_rate": 0.0005328388196530082,
      "loss": 3.0338,
      "step": 50039
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7240296602249146,
      "learning_rate": 0.0005328362402305532,
      "loss": 2.6029,
      "step": 50040
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3192332983016968,
      "learning_rate": 0.0005328336607648094,
      "loss": 2.9592,
      "step": 50041
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8018015623092651,
      "learning_rate": 0.0005328310812557774,
      "loss": 2.9476,
      "step": 50042
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5282609462738037,
      "learning_rate": 0.0005328285017034578,
      "loss": 3.1661,
      "step": 50043
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4622876644134521,
      "learning_rate": 0.0005328259221078509,
      "loss": 2.9969,
      "step": 50044
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4709149599075317,
      "learning_rate": 0.0005328233424689574,
      "loss": 3.0583,
      "step": 50045
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7438771724700928,
      "learning_rate": 0.0005328207627867775,
      "loss": 2.9816,
      "step": 50046
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.0148422718048096,
      "learning_rate": 0.0005328181830613119,
      "loss": 3.1374,
      "step": 50047
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9767866134643555,
      "learning_rate": 0.0005328156032925609,
      "loss": 3.1248,
      "step": 50048
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8867026567459106,
      "learning_rate": 0.0005328130234805253,
      "loss": 3.0809,
      "step": 50049
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4472090005874634,
      "learning_rate": 0.0005328104436252052,
      "loss": 3.1057,
      "step": 50050
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.3846373558044434,
      "learning_rate": 0.0005328078637266013,
      "loss": 3.1833,
      "step": 50051
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6280826330184937,
      "learning_rate": 0.000532805283784714,
      "loss": 3.1949,
      "step": 50052
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5223515033721924,
      "learning_rate": 0.0005328027037995438,
      "loss": 3.319,
      "step": 50053
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.216587543487549,
      "learning_rate": 0.0005328001237710912,
      "loss": 3.3809,
      "step": 50054
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.9310953617095947,
      "learning_rate": 0.0005327975436993566,
      "loss": 3.0156,
      "step": 50055
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9824907779693604,
      "learning_rate": 0.0005327949635843405,
      "loss": 2.9348,
      "step": 50056
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5784202814102173,
      "learning_rate": 0.0005327923834260437,
      "loss": 2.9949,
      "step": 50057
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7799934148788452,
      "learning_rate": 0.0005327898032244662,
      "loss": 3.2638,
      "step": 50058
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.051394462585449,
      "learning_rate": 0.0005327872229796087,
      "loss": 3.0715,
      "step": 50059
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5792948007583618,
      "learning_rate": 0.0005327846426914716,
      "loss": 2.9676,
      "step": 50060
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6147509813308716,
      "learning_rate": 0.0005327820623600556,
      "loss": 3.062,
      "step": 50061
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.7049179077148438,
      "learning_rate": 0.0005327794819853608,
      "loss": 2.7435,
      "step": 50062
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5063263177871704,
      "learning_rate": 0.0005327769015673881,
      "loss": 2.9128,
      "step": 50063
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.694589376449585,
      "learning_rate": 0.0005327743211061376,
      "loss": 2.7425,
      "step": 50064
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.242467164993286,
      "learning_rate": 0.0005327717406016101,
      "loss": 2.9608,
      "step": 50065
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4759867191314697,
      "learning_rate": 0.0005327691600538059,
      "loss": 3.1433,
      "step": 50066
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4100366830825806,
      "learning_rate": 0.0005327665794627256,
      "loss": 2.9663,
      "step": 50067
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7925602197647095,
      "learning_rate": 0.0005327639988283694,
      "loss": 3.0785,
      "step": 50068
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.666081190109253,
      "learning_rate": 0.0005327614181507382,
      "loss": 2.9718,
      "step": 50069
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5819677114486694,
      "learning_rate": 0.0005327588374298321,
      "loss": 2.8275,
      "step": 50070
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.57658851146698,
      "learning_rate": 0.0005327562566656518,
      "loss": 3.1348,
      "step": 50071
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.707884669303894,
      "learning_rate": 0.0005327536758581977,
      "loss": 3.0867,
      "step": 50072
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.966051459312439,
      "learning_rate": 0.0005327510950074704,
      "loss": 2.9867,
      "step": 50073
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.704005479812622,
      "learning_rate": 0.0005327485141134701,
      "loss": 3.1402,
      "step": 50074
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.641387701034546,
      "learning_rate": 0.0005327459331761976,
      "loss": 3.4587,
      "step": 50075
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.08103609085083,
      "learning_rate": 0.0005327433521956532,
      "loss": 2.8356,
      "step": 50076
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.46489679813385,
      "learning_rate": 0.0005327407711718374,
      "loss": 2.9809,
      "step": 50077
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7203489542007446,
      "learning_rate": 0.0005327381901047507,
      "loss": 3.1381,
      "step": 50078
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.425768494606018,
      "learning_rate": 0.0005327356089943935,
      "loss": 3.0013,
      "step": 50079
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6797103881835938,
      "learning_rate": 0.0005327330278407664,
      "loss": 2.9809,
      "step": 50080
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.779219388961792,
      "learning_rate": 0.00053273044664387,
      "loss": 2.8107,
      "step": 50081
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.146843194961548,
      "learning_rate": 0.0005327278654037044,
      "loss": 3.0053,
      "step": 50082
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4366979598999023,
      "learning_rate": 0.0005327252841202704,
      "loss": 3.2182,
      "step": 50083
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.686048984527588,
      "learning_rate": 0.0005327227027935684,
      "loss": 2.8029,
      "step": 50084
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3021527528762817,
      "learning_rate": 0.0005327201214235988,
      "loss": 3.0118,
      "step": 50085
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3649039268493652,
      "learning_rate": 0.0005327175400103622,
      "loss": 2.8565,
      "step": 50086
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4824984073638916,
      "learning_rate": 0.000532714958553859,
      "loss": 3.2281,
      "step": 50087
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.759759783744812,
      "learning_rate": 0.0005327123770540896,
      "loss": 3.0114,
      "step": 50088
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.048682928085327,
      "learning_rate": 0.0005327097955110546,
      "loss": 3.0218,
      "step": 50089
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4157787561416626,
      "learning_rate": 0.0005327072139247545,
      "loss": 2.8598,
      "step": 50090
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.2318179607391357,
      "learning_rate": 0.0005327046322951898,
      "loss": 3.1604,
      "step": 50091
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.451911449432373,
      "learning_rate": 0.0005327020506223608,
      "loss": 3.27,
      "step": 50092
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5496708154678345,
      "learning_rate": 0.0005326994689062682,
      "loss": 3.1179,
      "step": 50093
    },
    {
      "epoch": 0.65,
      "grad_norm": 4.171739101409912,
      "learning_rate": 0.0005326968871469124,
      "loss": 3.0652,
      "step": 50094
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.7713544368743896,
      "learning_rate": 0.0005326943053442937,
      "loss": 3.0994,
      "step": 50095
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6565382480621338,
      "learning_rate": 0.0005326917234984128,
      "loss": 3.0812,
      "step": 50096
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6526063680648804,
      "learning_rate": 0.0005326891416092701,
      "loss": 3.1801,
      "step": 50097
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.7244701385498047,
      "learning_rate": 0.0005326865596768661,
      "loss": 3.0042,
      "step": 50098
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.6047372817993164,
      "learning_rate": 0.0005326839777012014,
      "loss": 3.1075,
      "step": 50099
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8810778856277466,
      "learning_rate": 0.0005326813956822762,
      "loss": 3.0202,
      "step": 50100
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4478659629821777,
      "learning_rate": 0.0005326788136200913,
      "loss": 2.9741,
      "step": 50101
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.3887009620666504,
      "learning_rate": 0.0005326762315146469,
      "loss": 3.1429,
      "step": 50102
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5758370161056519,
      "learning_rate": 0.0005326736493659435,
      "loss": 3.1292,
      "step": 50103
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.27465558052063,
      "learning_rate": 0.0005326710671739818,
      "loss": 3.0731,
      "step": 50104
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6755261421203613,
      "learning_rate": 0.0005326684849387621,
      "loss": 3.0678,
      "step": 50105
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.4037389755249023,
      "learning_rate": 0.0005326659026602852,
      "loss": 2.87,
      "step": 50106
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.211350202560425,
      "learning_rate": 0.000532663320338551,
      "loss": 2.9196,
      "step": 50107
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.51018226146698,
      "learning_rate": 0.0005326607379735604,
      "loss": 3.0528,
      "step": 50108
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.1238653659820557,
      "learning_rate": 0.0005326581555653138,
      "loss": 3.2674,
      "step": 50109
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.359092950820923,
      "learning_rate": 0.0005326555731138117,
      "loss": 3.0175,
      "step": 50110
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7771445512771606,
      "learning_rate": 0.0005326529906190543,
      "loss": 2.9505,
      "step": 50111
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.630839228630066,
      "learning_rate": 0.0005326504080810426,
      "loss": 3.1405,
      "step": 50112
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4871010780334473,
      "learning_rate": 0.0005326478254997766,
      "loss": 2.9787,
      "step": 50113
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.217811107635498,
      "learning_rate": 0.000532645242875257,
      "loss": 2.8914,
      "step": 50114
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5837539434432983,
      "learning_rate": 0.0005326426602074844,
      "loss": 2.986,
      "step": 50115
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6962182521820068,
      "learning_rate": 0.000532640077496459,
      "loss": 3.1482,
      "step": 50116
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5018912553787231,
      "learning_rate": 0.0005326374947421816,
      "loss": 3.2154,
      "step": 50117
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.5527632236480713,
      "learning_rate": 0.0005326349119446523,
      "loss": 3.076,
      "step": 50118
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4049701690673828,
      "learning_rate": 0.0005326323291038719,
      "loss": 2.8594,
      "step": 50119
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5854696035385132,
      "learning_rate": 0.0005326297462198407,
      "loss": 3.2524,
      "step": 50120
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.564225673675537,
      "learning_rate": 0.0005326271632925592,
      "loss": 2.9905,
      "step": 50121
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6623505353927612,
      "learning_rate": 0.0005326245803220279,
      "loss": 2.8506,
      "step": 50122
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5480961799621582,
      "learning_rate": 0.0005326219973082474,
      "loss": 3.0023,
      "step": 50123
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4154852628707886,
      "learning_rate": 0.000532619414251218,
      "loss": 2.6999,
      "step": 50124
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5123169422149658,
      "learning_rate": 0.0005326168311509404,
      "loss": 2.9842,
      "step": 50125
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7316627502441406,
      "learning_rate": 0.0005326142480074149,
      "loss": 2.8733,
      "step": 50126
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.8546276092529297,
      "learning_rate": 0.0005326116648206419,
      "loss": 3.1461,
      "step": 50127
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6227741241455078,
      "learning_rate": 0.0005326090815906221,
      "loss": 3.0375,
      "step": 50128
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.324999451637268,
      "learning_rate": 0.0005326064983173558,
      "loss": 3.0499,
      "step": 50129
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5240614414215088,
      "learning_rate": 0.0005326039150008436,
      "loss": 3.2437,
      "step": 50130
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9129137992858887,
      "learning_rate": 0.000532601331641086,
      "loss": 3.0662,
      "step": 50131
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.474825859069824,
      "learning_rate": 0.0005325987482380834,
      "loss": 3.0998,
      "step": 50132
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7374509572982788,
      "learning_rate": 0.0005325961647918362,
      "loss": 2.9249,
      "step": 50133
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2958805561065674,
      "learning_rate": 0.0005325935813023451,
      "loss": 3.0882,
      "step": 50134
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.760122060775757,
      "learning_rate": 0.0005325909977696104,
      "loss": 3.047,
      "step": 50135
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9971517324447632,
      "learning_rate": 0.0005325884141936327,
      "loss": 3.1091,
      "step": 50136
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4413297176361084,
      "learning_rate": 0.0005325858305744124,
      "loss": 2.9841,
      "step": 50137
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4667222499847412,
      "learning_rate": 0.0005325832469119499,
      "loss": 2.7482,
      "step": 50138
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.1405301094055176,
      "learning_rate": 0.0005325806632062459,
      "loss": 3.436,
      "step": 50139
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.763741135597229,
      "learning_rate": 0.0005325780794573008,
      "loss": 3.0415,
      "step": 50140
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5661685466766357,
      "learning_rate": 0.0005325754956651149,
      "loss": 3.1275,
      "step": 50141
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.25432288646698,
      "learning_rate": 0.0005325729118296889,
      "loss": 3.0859,
      "step": 50142
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.357653260231018,
      "learning_rate": 0.0005325703279510232,
      "loss": 2.9609,
      "step": 50143
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4439178705215454,
      "learning_rate": 0.0005325677440291183,
      "loss": 3.1085,
      "step": 50144
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5577549934387207,
      "learning_rate": 0.0005325651600639747,
      "loss": 3.1604,
      "step": 50145
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5162643194198608,
      "learning_rate": 0.0005325625760555929,
      "loss": 3.0734,
      "step": 50146
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4570389986038208,
      "learning_rate": 0.0005325599920039732,
      "loss": 2.9607,
      "step": 50147
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8267018795013428,
      "learning_rate": 0.0005325574079091162,
      "loss": 3.082,
      "step": 50148
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5428969860076904,
      "learning_rate": 0.0005325548237710225,
      "loss": 3.1451,
      "step": 50149
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5541269779205322,
      "learning_rate": 0.0005325522395896923,
      "loss": 2.8888,
      "step": 50150
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5281472206115723,
      "learning_rate": 0.0005325496553651264,
      "loss": 3.1732,
      "step": 50151
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9765815734863281,
      "learning_rate": 0.0005325470710973252,
      "loss": 3.2471,
      "step": 50152
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5226856470108032,
      "learning_rate": 0.0005325444867862889,
      "loss": 2.9951,
      "step": 50153
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4159266948699951,
      "learning_rate": 0.0005325419024320182,
      "loss": 3.1817,
      "step": 50154
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9460328817367554,
      "learning_rate": 0.0005325393180345137,
      "loss": 2.8656,
      "step": 50155
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.686419129371643,
      "learning_rate": 0.0005325367335937758,
      "loss": 3.1172,
      "step": 50156
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4916387796401978,
      "learning_rate": 0.0005325341491098048,
      "loss": 3.0859,
      "step": 50157
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5608913898468018,
      "learning_rate": 0.0005325315645826013,
      "loss": 3.0089,
      "step": 50158
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.6213412284851074,
      "learning_rate": 0.0005325289800121658,
      "loss": 3.0365,
      "step": 50159
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4756146669387817,
      "learning_rate": 0.0005325263953984988,
      "loss": 3.2339,
      "step": 50160
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.672315835952759,
      "learning_rate": 0.0005325238107416008,
      "loss": 3.2878,
      "step": 50161
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5104862451553345,
      "learning_rate": 0.0005325212260414723,
      "loss": 3.1791,
      "step": 50162
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5794090032577515,
      "learning_rate": 0.0005325186412981136,
      "loss": 2.9411,
      "step": 50163
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6872161626815796,
      "learning_rate": 0.0005325160565115253,
      "loss": 3.1914,
      "step": 50164
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8428210020065308,
      "learning_rate": 0.0005325134716817079,
      "loss": 2.9575,
      "step": 50165
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.462538242340088,
      "learning_rate": 0.0005325108868086618,
      "loss": 2.99,
      "step": 50166
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.1282265186309814,
      "learning_rate": 0.0005325083018923876,
      "loss": 3.0676,
      "step": 50167
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7317994832992554,
      "learning_rate": 0.0005325057169328857,
      "loss": 2.8305,
      "step": 50168
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6543840169906616,
      "learning_rate": 0.0005325031319301566,
      "loss": 2.8652,
      "step": 50169
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.07548189163208,
      "learning_rate": 0.0005325005468842008,
      "loss": 3.2671,
      "step": 50170
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5591623783111572,
      "learning_rate": 0.0005324979617950188,
      "loss": 3.0312,
      "step": 50171
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.53727126121521,
      "learning_rate": 0.0005324953766626108,
      "loss": 2.9136,
      "step": 50172
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.458979606628418,
      "learning_rate": 0.0005324927914869778,
      "loss": 2.8695,
      "step": 50173
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.652635097503662,
      "learning_rate": 0.0005324902062681199,
      "loss": 3.1373,
      "step": 50174
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7811955213546753,
      "learning_rate": 0.0005324876210060376,
      "loss": 2.9222,
      "step": 50175
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5543951988220215,
      "learning_rate": 0.0005324850357007316,
      "loss": 3.1968,
      "step": 50176
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.1337122917175293,
      "learning_rate": 0.0005324824503522022,
      "loss": 3.1549,
      "step": 50177
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8552676439285278,
      "learning_rate": 0.0005324798649604499,
      "loss": 3.1166,
      "step": 50178
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4988343715667725,
      "learning_rate": 0.0005324772795254752,
      "loss": 2.9875,
      "step": 50179
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6449525356292725,
      "learning_rate": 0.0005324746940472786,
      "loss": 3.1881,
      "step": 50180
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.506343126296997,
      "learning_rate": 0.0005324721085258606,
      "loss": 3.1049,
      "step": 50181
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5800007581710815,
      "learning_rate": 0.0005324695229612217,
      "loss": 3.0526,
      "step": 50182
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4878714084625244,
      "learning_rate": 0.0005324669373533621,
      "loss": 2.7486,
      "step": 50183
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.326486587524414,
      "learning_rate": 0.0005324643517022827,
      "loss": 2.9827,
      "step": 50184
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8763527870178223,
      "learning_rate": 0.0005324617660079837,
      "loss": 3.3621,
      "step": 50185
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6078733205795288,
      "learning_rate": 0.0005324591802704658,
      "loss": 3.2395,
      "step": 50186
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7044116258621216,
      "learning_rate": 0.0005324565944897292,
      "loss": 2.9632,
      "step": 50187
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.2341246604919434,
      "learning_rate": 0.0005324540086657746,
      "loss": 3.2151,
      "step": 50188
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.1446499824523926,
      "learning_rate": 0.0005324514227986024,
      "loss": 2.9591,
      "step": 50189
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4377750158309937,
      "learning_rate": 0.0005324488368882131,
      "loss": 3.0995,
      "step": 50190
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.126227617263794,
      "learning_rate": 0.0005324462509346072,
      "loss": 3.1472,
      "step": 50191
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.858818531036377,
      "learning_rate": 0.000532443664937785,
      "loss": 3.0186,
      "step": 50192
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3446052074432373,
      "learning_rate": 0.0005324410788977473,
      "loss": 3.084,
      "step": 50193
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8912931680679321,
      "learning_rate": 0.0005324384928144945,
      "loss": 3.1157,
      "step": 50194
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5034115314483643,
      "learning_rate": 0.0005324359066880268,
      "loss": 2.7859,
      "step": 50195
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4226548671722412,
      "learning_rate": 0.000532433320518345,
      "loss": 3.193,
      "step": 50196
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.889272689819336,
      "learning_rate": 0.0005324307343054494,
      "loss": 3.0128,
      "step": 50197
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6654202938079834,
      "learning_rate": 0.0005324281480493405,
      "loss": 2.9744,
      "step": 50198
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4789543151855469,
      "learning_rate": 0.0005324255617500189,
      "loss": 2.8623,
      "step": 50199
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.328866720199585,
      "learning_rate": 0.0005324229754074849,
      "loss": 2.8311,
      "step": 50200
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3866848945617676,
      "learning_rate": 0.0005324203890217391,
      "loss": 3.0977,
      "step": 50201
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.577268123626709,
      "learning_rate": 0.000532417802592782,
      "loss": 2.9265,
      "step": 50202
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9542226791381836,
      "learning_rate": 0.0005324152161206141,
      "loss": 3.2592,
      "step": 50203
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5903067588806152,
      "learning_rate": 0.0005324126296052358,
      "loss": 2.8972,
      "step": 50204
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4013590812683105,
      "learning_rate": 0.0005324100430466476,
      "loss": 3.1157,
      "step": 50205
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7877609729766846,
      "learning_rate": 0.00053240745644485,
      "loss": 3.145,
      "step": 50206
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8166420459747314,
      "learning_rate": 0.0005324048697998434,
      "loss": 2.8316,
      "step": 50207
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.5084023475646973,
      "learning_rate": 0.0005324022831116284,
      "loss": 3.046,
      "step": 50208
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.690656065940857,
      "learning_rate": 0.0005323996963802054,
      "loss": 3.0563,
      "step": 50209
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4377096891403198,
      "learning_rate": 0.0005323971096055749,
      "loss": 3.2403,
      "step": 50210
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6814618110656738,
      "learning_rate": 0.0005323945227877374,
      "loss": 2.9427,
      "step": 50211
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5110540390014648,
      "learning_rate": 0.0005323919359266934,
      "loss": 2.9428,
      "step": 50212
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6584603786468506,
      "learning_rate": 0.0005323893490224433,
      "loss": 3.102,
      "step": 50213
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6247782707214355,
      "learning_rate": 0.0005323867620749877,
      "loss": 3.1184,
      "step": 50214
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5890944004058838,
      "learning_rate": 0.000532384175084327,
      "loss": 3.3551,
      "step": 50215
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.648147463798523,
      "learning_rate": 0.0005323815880504617,
      "loss": 3.1082,
      "step": 50216
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.608560562133789,
      "learning_rate": 0.0005323790009733922,
      "loss": 3.1809,
      "step": 50217
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5623358488082886,
      "learning_rate": 0.0005323764138531192,
      "loss": 3.2154,
      "step": 50218
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5960196256637573,
      "learning_rate": 0.0005323738266896429,
      "loss": 3.2512,
      "step": 50219
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4055691957473755,
      "learning_rate": 0.000532371239482964,
      "loss": 2.7488,
      "step": 50220
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9288139343261719,
      "learning_rate": 0.0005323686522330828,
      "loss": 3.0565,
      "step": 50221
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8384286165237427,
      "learning_rate": 0.0005323660649399999,
      "loss": 2.7614,
      "step": 50222
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5295137166976929,
      "learning_rate": 0.0005323634776037159,
      "loss": 3.0531,
      "step": 50223
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4383524656295776,
      "learning_rate": 0.000532360890224231,
      "loss": 3.219,
      "step": 50224
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.412692666053772,
      "learning_rate": 0.0005323583028015459,
      "loss": 2.9836,
      "step": 50225
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.425783395767212,
      "learning_rate": 0.000532355715335661,
      "loss": 3.289,
      "step": 50226
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.743406057357788,
      "learning_rate": 0.0005323531278265767,
      "loss": 3.1017,
      "step": 50227
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.4827635288238525,
      "learning_rate": 0.0005323505402742937,
      "loss": 3.0966,
      "step": 50228
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4506049156188965,
      "learning_rate": 0.0005323479526788123,
      "loss": 3.0545,
      "step": 50229
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.7435343265533447,
      "learning_rate": 0.000532345365040133,
      "loss": 2.9841,
      "step": 50230
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.8847038745880127,
      "learning_rate": 0.0005323427773582563,
      "loss": 3.1433,
      "step": 50231
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6408082246780396,
      "learning_rate": 0.0005323401896331827,
      "loss": 3.2043,
      "step": 50232
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9635199308395386,
      "learning_rate": 0.0005323376018649127,
      "loss": 2.8985,
      "step": 50233
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4330322742462158,
      "learning_rate": 0.0005323350140534468,
      "loss": 2.9951,
      "step": 50234
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3610996007919312,
      "learning_rate": 0.0005323324261987854,
      "loss": 3.0573,
      "step": 50235
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5163756608963013,
      "learning_rate": 0.0005323298383009289,
      "loss": 2.9967,
      "step": 50236
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5180541276931763,
      "learning_rate": 0.000532327250359878,
      "loss": 3.1925,
      "step": 50237
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5810457468032837,
      "learning_rate": 0.0005323246623756331,
      "loss": 3.2185,
      "step": 50238
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7312126159667969,
      "learning_rate": 0.0005323220743481947,
      "loss": 2.8691,
      "step": 50239
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3432573080062866,
      "learning_rate": 0.0005323194862775632,
      "loss": 3.1957,
      "step": 50240
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5091670751571655,
      "learning_rate": 0.000532316898163739,
      "loss": 3.0035,
      "step": 50241
    },
    {
      "epoch": 0.65,
      "grad_norm": 13.722419738769531,
      "learning_rate": 0.0005323143100067229,
      "loss": 2.9508,
      "step": 50242
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8276880979537964,
      "learning_rate": 0.000532311721806515,
      "loss": 3.1947,
      "step": 50243
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.059499740600586,
      "learning_rate": 0.0005323091335631161,
      "loss": 3.0796,
      "step": 50244
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.552484154701233,
      "learning_rate": 0.0005323065452765265,
      "loss": 3.0385,
      "step": 50245
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7667510509490967,
      "learning_rate": 0.0005323039569467467,
      "loss": 3.2722,
      "step": 50246
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.26037335395813,
      "learning_rate": 0.0005323013685737773,
      "loss": 2.97,
      "step": 50247
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.007859468460083,
      "learning_rate": 0.0005322987801576186,
      "loss": 3.2157,
      "step": 50248
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.302408218383789,
      "learning_rate": 0.0005322961916982712,
      "loss": 2.9349,
      "step": 50249
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.8369598388671875,
      "learning_rate": 0.0005322936031957355,
      "loss": 2.9047,
      "step": 50250
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.816157341003418,
      "learning_rate": 0.0005322910146500121,
      "loss": 3.0673,
      "step": 50251
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8151267766952515,
      "learning_rate": 0.0005322884260611014,
      "loss": 3.0428,
      "step": 50252
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.701087236404419,
      "learning_rate": 0.0005322858374290038,
      "loss": 2.9643,
      "step": 50253
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.712355375289917,
      "learning_rate": 0.0005322832487537201,
      "loss": 3.1675,
      "step": 50254
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7981677055358887,
      "learning_rate": 0.0005322806600352504,
      "loss": 2.9054,
      "step": 50255
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.531734585762024,
      "learning_rate": 0.0005322780712735954,
      "loss": 2.8412,
      "step": 50256
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.101489305496216,
      "learning_rate": 0.0005322754824687555,
      "loss": 2.9705,
      "step": 50257
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.5010509490966797,
      "learning_rate": 0.0005322728936207312,
      "loss": 3.1539,
      "step": 50258
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5099256038665771,
      "learning_rate": 0.0005322703047295229,
      "loss": 2.9561,
      "step": 50259
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4734537601470947,
      "learning_rate": 0.0005322677157951313,
      "loss": 2.8945,
      "step": 50260
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.5840630531311035,
      "learning_rate": 0.0005322651268175568,
      "loss": 3.017,
      "step": 50261
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.740995168685913,
      "learning_rate": 0.0005322625377967996,
      "loss": 3.011,
      "step": 50262
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.578803300857544,
      "learning_rate": 0.0005322599487328606,
      "loss": 2.9556,
      "step": 50263
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5036325454711914,
      "learning_rate": 0.0005322573596257401,
      "loss": 3.148,
      "step": 50264
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.7958900928497314,
      "learning_rate": 0.0005322547704754385,
      "loss": 2.717,
      "step": 50265
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.073334217071533,
      "learning_rate": 0.0005322521812819562,
      "loss": 3.1548,
      "step": 50266
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9070281982421875,
      "learning_rate": 0.000532249592045294,
      "loss": 2.969,
      "step": 50267
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8732820749282837,
      "learning_rate": 0.0005322470027654523,
      "loss": 3.0047,
      "step": 50268
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.5020012855529785,
      "learning_rate": 0.0005322444134424313,
      "loss": 3.2932,
      "step": 50269
    },
    {
      "epoch": 0.65,
      "grad_norm": 3.060283899307251,
      "learning_rate": 0.0005322418240762319,
      "loss": 2.9767,
      "step": 50270
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.326617956161499,
      "learning_rate": 0.0005322392346668542,
      "loss": 3.0159,
      "step": 50271
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5110194683074951,
      "learning_rate": 0.0005322366452142988,
      "loss": 3.135,
      "step": 50272
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.769768476486206,
      "learning_rate": 0.0005322340557185664,
      "loss": 3.1287,
      "step": 50273
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.058802843093872,
      "learning_rate": 0.0005322314661796572,
      "loss": 3.1181,
      "step": 50274
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.566786766052246,
      "learning_rate": 0.0005322288765975719,
      "loss": 2.9453,
      "step": 50275
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5918413400650024,
      "learning_rate": 0.0005322262869723107,
      "loss": 3.1176,
      "step": 50276
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.8483251333236694,
      "learning_rate": 0.0005322236973038743,
      "loss": 3.3409,
      "step": 50277
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5756969451904297,
      "learning_rate": 0.0005322211075922632,
      "loss": 3.3048,
      "step": 50278
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4337188005447388,
      "learning_rate": 0.0005322185178374778,
      "loss": 2.8779,
      "step": 50279
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7577786445617676,
      "learning_rate": 0.0005322159280395184,
      "loss": 3.0327,
      "step": 50280
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6665712594985962,
      "learning_rate": 0.000532213338198386,
      "loss": 3.0924,
      "step": 50281
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6094921827316284,
      "learning_rate": 0.0005322107483140805,
      "loss": 3.3136,
      "step": 50282
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.559203028678894,
      "learning_rate": 0.0005322081583866028,
      "loss": 3.1434,
      "step": 50283
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5921016931533813,
      "learning_rate": 0.0005322055684159531,
      "loss": 3.3328,
      "step": 50284
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7540833950042725,
      "learning_rate": 0.0005322029784021321,
      "loss": 2.9809,
      "step": 50285
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3193466663360596,
      "learning_rate": 0.0005322003883451402,
      "loss": 3.1516,
      "step": 50286
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4437495470046997,
      "learning_rate": 0.0005321977982449777,
      "loss": 2.7155,
      "step": 50287
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7781306505203247,
      "learning_rate": 0.0005321952081016454,
      "loss": 2.9781,
      "step": 50288
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.9563100337982178,
      "learning_rate": 0.0005321926179151434,
      "loss": 2.9906,
      "step": 50289
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4152346849441528,
      "learning_rate": 0.0005321900276854727,
      "loss": 3.0922,
      "step": 50290
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.7232415676116943,
      "learning_rate": 0.0005321874374126334,
      "loss": 2.9,
      "step": 50291
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.5059702396392822,
      "learning_rate": 0.000532184847096626,
      "loss": 3.2501,
      "step": 50292
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.632463812828064,
      "learning_rate": 0.0005321822567374511,
      "loss": 3.2052,
      "step": 50293
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.4489468336105347,
      "learning_rate": 0.0005321796663351092,
      "loss": 3.026,
      "step": 50294
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.84587562084198,
      "learning_rate": 0.0005321770758896007,
      "loss": 2.8649,
      "step": 50295
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.607901692390442,
      "learning_rate": 0.000532174485400926,
      "loss": 2.8724,
      "step": 50296
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.5307484865188599,
      "learning_rate": 0.0005321718948690856,
      "loss": 3.1043,
      "step": 50297
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6227500438690186,
      "learning_rate": 0.0005321693042940803,
      "loss": 2.7976,
      "step": 50298
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.2663952112197876,
      "learning_rate": 0.0005321667136759102,
      "loss": 3.0952,
      "step": 50299
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.3832874298095703,
      "learning_rate": 0.000532164123014576,
      "loss": 3.1457,
      "step": 50300
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.588754653930664,
      "learning_rate": 0.0005321615323100781,
      "loss": 2.9629,
      "step": 50301
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6190924644470215,
      "learning_rate": 0.0005321589415624169,
      "loss": 2.8989,
      "step": 50302
    },
    {
      "epoch": 0.65,
      "grad_norm": 1.6885021924972534,
      "learning_rate": 0.0005321563507715931,
      "loss": 2.9833,
      "step": 50303
    },
    {
      "epoch": 0.65,
      "grad_norm": 2.3281774520874023,
      "learning_rate": 0.0005321537599376069,
      "loss": 3.1551,
      "step": 50304
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.602363109588623,
      "learning_rate": 0.0005321511690604591,
      "loss": 3.2121,
      "step": 50305
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.426668405532837,
      "learning_rate": 0.0005321485781401499,
      "loss": 3.1022,
      "step": 50306
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5586457252502441,
      "learning_rate": 0.00053214598717668,
      "loss": 3.0235,
      "step": 50307
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.128755569458008,
      "learning_rate": 0.0005321433961700497,
      "loss": 3.0195,
      "step": 50308
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4738081693649292,
      "learning_rate": 0.0005321408051202595,
      "loss": 3.1837,
      "step": 50309
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5508004426956177,
      "learning_rate": 0.00053213821402731,
      "loss": 3.1288,
      "step": 50310
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9531947374343872,
      "learning_rate": 0.0005321356228912017,
      "loss": 3.287,
      "step": 50311
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.811317801475525,
      "learning_rate": 0.0005321330317119349,
      "loss": 2.9844,
      "step": 50312
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5250824689865112,
      "learning_rate": 0.0005321304404895102,
      "loss": 3.0034,
      "step": 50313
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.1483044624328613,
      "learning_rate": 0.0005321278492239281,
      "loss": 3.1431,
      "step": 50314
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8168011903762817,
      "learning_rate": 0.000532125257915189,
      "loss": 3.0171,
      "step": 50315
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.554880142211914,
      "learning_rate": 0.0005321226665632936,
      "loss": 3.0851,
      "step": 50316
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8117587566375732,
      "learning_rate": 0.000532120075168242,
      "loss": 3.0633,
      "step": 50317
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.503484845161438,
      "learning_rate": 0.000532117483730035,
      "loss": 2.9193,
      "step": 50318
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.429068684577942,
      "learning_rate": 0.000532114892248673,
      "loss": 3.087,
      "step": 50319
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5141417980194092,
      "learning_rate": 0.0005321123007241565,
      "loss": 3.0578,
      "step": 50320
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2529566287994385,
      "learning_rate": 0.0005321097091564858,
      "loss": 2.9061,
      "step": 50321
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.101231575012207,
      "learning_rate": 0.0005321071175456617,
      "loss": 3.2756,
      "step": 50322
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6795622110366821,
      "learning_rate": 0.0005321045258916843,
      "loss": 3.0837,
      "step": 50323
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.569158673286438,
      "learning_rate": 0.0005321019341945545,
      "loss": 3.0175,
      "step": 50324
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6144263744354248,
      "learning_rate": 0.0005320993424542724,
      "loss": 3.0697,
      "step": 50325
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.08579683303833,
      "learning_rate": 0.0005320967506708387,
      "loss": 3.2005,
      "step": 50326
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9582602977752686,
      "learning_rate": 0.0005320941588442539,
      "loss": 2.7884,
      "step": 50327
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.583319067955017,
      "learning_rate": 0.0005320915669745183,
      "loss": 2.9437,
      "step": 50328
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.139589786529541,
      "learning_rate": 0.0005320889750616326,
      "loss": 2.9596,
      "step": 50329
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.213742256164551,
      "learning_rate": 0.0005320863831055971,
      "loss": 3.2817,
      "step": 50330
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9033029079437256,
      "learning_rate": 0.0005320837911064124,
      "loss": 3.1314,
      "step": 50331
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.294757843017578,
      "learning_rate": 0.0005320811990640789,
      "loss": 2.855,
      "step": 50332
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.816668152809143,
      "learning_rate": 0.0005320786069785972,
      "loss": 2.9637,
      "step": 50333
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4631913900375366,
      "learning_rate": 0.0005320760148499676,
      "loss": 3.0154,
      "step": 50334
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.1640124320983887,
      "learning_rate": 0.0005320734226781908,
      "loss": 3.251,
      "step": 50335
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9830647706985474,
      "learning_rate": 0.0005320708304632671,
      "loss": 3.1747,
      "step": 50336
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0835492610931396,
      "learning_rate": 0.0005320682382051971,
      "loss": 3.1839,
      "step": 50337
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5326611995697021,
      "learning_rate": 0.0005320656459039812,
      "loss": 3.256,
      "step": 50338
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.062851667404175,
      "learning_rate": 0.0005320630535596199,
      "loss": 3.2004,
      "step": 50339
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.187080144882202,
      "learning_rate": 0.0005320604611721138,
      "loss": 3.3112,
      "step": 50340
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.655398964881897,
      "learning_rate": 0.0005320578687414632,
      "loss": 3.2081,
      "step": 50341
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.1186230182647705,
      "learning_rate": 0.0005320552762676686,
      "loss": 3.1544,
      "step": 50342
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.325981855392456,
      "learning_rate": 0.0005320526837507306,
      "loss": 2.8985,
      "step": 50343
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.3949642181396484,
      "learning_rate": 0.0005320500911906497,
      "loss": 3.0745,
      "step": 50344
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5044478178024292,
      "learning_rate": 0.0005320474985874261,
      "loss": 3.1616,
      "step": 50345
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.307744026184082,
      "learning_rate": 0.0005320449059410607,
      "loss": 3.0767,
      "step": 50346
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.005544662475586,
      "learning_rate": 0.0005320423132515538,
      "loss": 2.8313,
      "step": 50347
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.548738956451416,
      "learning_rate": 0.0005320397205189057,
      "loss": 3.0676,
      "step": 50348
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.478535771369934,
      "learning_rate": 0.0005320371277431172,
      "loss": 2.8662,
      "step": 50349
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.431769847869873,
      "learning_rate": 0.0005320345349241885,
      "loss": 2.8454,
      "step": 50350
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.5573840141296387,
      "learning_rate": 0.0005320319420621201,
      "loss": 2.9345,
      "step": 50351
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5279475450515747,
      "learning_rate": 0.0005320293491569127,
      "loss": 3.1793,
      "step": 50352
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.241868734359741,
      "learning_rate": 0.0005320267562085667,
      "loss": 2.8936,
      "step": 50353
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.3575217723846436,
      "learning_rate": 0.0005320241632170825,
      "loss": 3.0262,
      "step": 50354
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.238055944442749,
      "learning_rate": 0.0005320215701824606,
      "loss": 3.1212,
      "step": 50355
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7152882814407349,
      "learning_rate": 0.0005320189771047016,
      "loss": 2.9572,
      "step": 50356
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3523201942443848,
      "learning_rate": 0.0005320163839838058,
      "loss": 3.1617,
      "step": 50357
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7647627592086792,
      "learning_rate": 0.0005320137908197738,
      "loss": 3.1954,
      "step": 50358
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6457897424697876,
      "learning_rate": 0.0005320111976126061,
      "loss": 3.0449,
      "step": 50359
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5807968378067017,
      "learning_rate": 0.0005320086043623031,
      "loss": 3.0565,
      "step": 50360
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4119644165039062,
      "learning_rate": 0.0005320060110688653,
      "loss": 3.198,
      "step": 50361
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.371462106704712,
      "learning_rate": 0.0005320034177322933,
      "loss": 2.941,
      "step": 50362
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.499831199645996,
      "learning_rate": 0.0005320008243525874,
      "loss": 3.0482,
      "step": 50363
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.370113730430603,
      "learning_rate": 0.0005319982309297482,
      "loss": 3.121,
      "step": 50364
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6004599332809448,
      "learning_rate": 0.0005319956374637762,
      "loss": 3.2497,
      "step": 50365
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.501461148262024,
      "learning_rate": 0.0005319930439546716,
      "loss": 3.0895,
      "step": 50366
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5414468050003052,
      "learning_rate": 0.0005319904504024354,
      "loss": 2.9453,
      "step": 50367
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4551271200180054,
      "learning_rate": 0.0005319878568070676,
      "loss": 2.8089,
      "step": 50368
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6568620204925537,
      "learning_rate": 0.000531985263168569,
      "loss": 3.1629,
      "step": 50369
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4411866664886475,
      "learning_rate": 0.0005319826694869399,
      "loss": 3.1096,
      "step": 50370
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9092533588409424,
      "learning_rate": 0.0005319800757621807,
      "loss": 3.2856,
      "step": 50371
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3607234954833984,
      "learning_rate": 0.0005319774819942923,
      "loss": 2.8343,
      "step": 50372
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4585366249084473,
      "learning_rate": 0.0005319748881832747,
      "loss": 3.2813,
      "step": 50373
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.4934327602386475,
      "learning_rate": 0.0005319722943291287,
      "loss": 3.0481,
      "step": 50374
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8191009759902954,
      "learning_rate": 0.0005319697004318547,
      "loss": 2.9567,
      "step": 50375
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5460691452026367,
      "learning_rate": 0.0005319671064914532,
      "loss": 3.0096,
      "step": 50376
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4277693033218384,
      "learning_rate": 0.0005319645125079245,
      "loss": 3.1915,
      "step": 50377
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.1975531578063965,
      "learning_rate": 0.0005319619184812692,
      "loss": 3.2328,
      "step": 50378
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.077324390411377,
      "learning_rate": 0.0005319593244114878,
      "loss": 3.0366,
      "step": 50379
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6064921617507935,
      "learning_rate": 0.0005319567302985809,
      "loss": 3.1258,
      "step": 50380
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7241116762161255,
      "learning_rate": 0.0005319541361425488,
      "loss": 3.3459,
      "step": 50381
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.1805601119995117,
      "learning_rate": 0.0005319515419433921,
      "loss": 3.3139,
      "step": 50382
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.3803927898406982,
      "learning_rate": 0.0005319489477011111,
      "loss": 3.2368,
      "step": 50383
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4197980165481567,
      "learning_rate": 0.0005319463534157066,
      "loss": 3.037,
      "step": 50384
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6783713102340698,
      "learning_rate": 0.0005319437590871787,
      "loss": 3.0541,
      "step": 50385
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.6655354499816895,
      "learning_rate": 0.0005319411647155282,
      "loss": 3.1093,
      "step": 50386
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6733804941177368,
      "learning_rate": 0.0005319385703007554,
      "loss": 3.2704,
      "step": 50387
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7613786458969116,
      "learning_rate": 0.0005319359758428609,
      "loss": 2.724,
      "step": 50388
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6953009366989136,
      "learning_rate": 0.0005319333813418451,
      "loss": 3.0271,
      "step": 50389
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5703397989273071,
      "learning_rate": 0.0005319307867977085,
      "loss": 3.0647,
      "step": 50390
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6107336282730103,
      "learning_rate": 0.0005319281922104516,
      "loss": 3.0419,
      "step": 50391
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7969591617584229,
      "learning_rate": 0.0005319255975800748,
      "loss": 2.8526,
      "step": 50392
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2773584127426147,
      "learning_rate": 0.0005319230029065787,
      "loss": 3.1207,
      "step": 50393
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.470375657081604,
      "learning_rate": 0.0005319204081899638,
      "loss": 2.9744,
      "step": 50394
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.277239441871643,
      "learning_rate": 0.0005319178134302304,
      "loss": 3.1443,
      "step": 50395
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9703235626220703,
      "learning_rate": 0.0005319152186273792,
      "loss": 3.2723,
      "step": 50396
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7818230390548706,
      "learning_rate": 0.0005319126237814106,
      "loss": 2.9518,
      "step": 50397
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6077210903167725,
      "learning_rate": 0.000531910028892325,
      "loss": 3.0618,
      "step": 50398
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.305101990699768,
      "learning_rate": 0.0005319074339601229,
      "loss": 3.0773,
      "step": 50399
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4583375453948975,
      "learning_rate": 0.000531904838984805,
      "loss": 3.1071,
      "step": 50400
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6956183910369873,
      "learning_rate": 0.0005319022439663714,
      "loss": 3.083,
      "step": 50401
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3873943090438843,
      "learning_rate": 0.000531899648904823,
      "loss": 3.2555,
      "step": 50402
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4152358770370483,
      "learning_rate": 0.0005318970538001599,
      "loss": 3.0658,
      "step": 50403
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8692861795425415,
      "learning_rate": 0.0005318944586523829,
      "loss": 3.0832,
      "step": 50404
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5297220945358276,
      "learning_rate": 0.0005318918634614922,
      "loss": 3.0585,
      "step": 50405
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6203100681304932,
      "learning_rate": 0.0005318892682274885,
      "loss": 3.0926,
      "step": 50406
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.3191347122192383,
      "learning_rate": 0.0005318866729503723,
      "loss": 2.9353,
      "step": 50407
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5065668821334839,
      "learning_rate": 0.0005318840776301439,
      "loss": 2.8858,
      "step": 50408
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0649802684783936,
      "learning_rate": 0.0005318814822668039,
      "loss": 2.8717,
      "step": 50409
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.579067349433899,
      "learning_rate": 0.0005318788868603527,
      "loss": 3.0687,
      "step": 50410
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.3295788764953613,
      "learning_rate": 0.0005318762914107908,
      "loss": 3.0332,
      "step": 50411
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4577301740646362,
      "learning_rate": 0.0005318736959181188,
      "loss": 3.2443,
      "step": 50412
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0230345726013184,
      "learning_rate": 0.0005318711003823372,
      "loss": 3.1357,
      "step": 50413
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5693271160125732,
      "learning_rate": 0.0005318685048034462,
      "loss": 3.3289,
      "step": 50414
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5002814531326294,
      "learning_rate": 0.0005318659091814466,
      "loss": 3.0395,
      "step": 50415
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.944693684577942,
      "learning_rate": 0.0005318633135163387,
      "loss": 2.876,
      "step": 50416
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.342915415763855,
      "learning_rate": 0.000531860717808123,
      "loss": 3.1304,
      "step": 50417
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.318607211112976,
      "learning_rate": 0.0005318581220568001,
      "loss": 3.0332,
      "step": 50418
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.492292881011963,
      "learning_rate": 0.0005318555262623703,
      "loss": 2.8934,
      "step": 50419
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.330819845199585,
      "learning_rate": 0.0005318529304248343,
      "loss": 3.1275,
      "step": 50420
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.257464647293091,
      "learning_rate": 0.0005318503345441923,
      "loss": 3.2395,
      "step": 50421
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9056038856506348,
      "learning_rate": 0.0005318477386204451,
      "loss": 2.8493,
      "step": 50422
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5615241527557373,
      "learning_rate": 0.000531845142653593,
      "loss": 2.8899,
      "step": 50423
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7027215957641602,
      "learning_rate": 0.0005318425466436363,
      "loss": 2.9265,
      "step": 50424
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8541346788406372,
      "learning_rate": 0.000531839950590576,
      "loss": 3.3107,
      "step": 50425
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.1165637969970703,
      "learning_rate": 0.0005318373544944122,
      "loss": 3.1232,
      "step": 50426
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0292749404907227,
      "learning_rate": 0.0005318347583551453,
      "loss": 3.0048,
      "step": 50427
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.802052617073059,
      "learning_rate": 0.0005318321621727761,
      "loss": 3.0813,
      "step": 50428
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6083449125289917,
      "learning_rate": 0.0005318295659473049,
      "loss": 2.8762,
      "step": 50429
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5389068126678467,
      "learning_rate": 0.0005318269696787322,
      "loss": 3.0777,
      "step": 50430
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8897002935409546,
      "learning_rate": 0.0005318243733670585,
      "loss": 3.0265,
      "step": 50431
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.114079236984253,
      "learning_rate": 0.0005318217770122841,
      "loss": 3.1607,
      "step": 50432
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4907045364379883,
      "learning_rate": 0.0005318191806144098,
      "loss": 2.9881,
      "step": 50433
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.035614013671875,
      "learning_rate": 0.000531816584173436,
      "loss": 3.242,
      "step": 50434
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4889791011810303,
      "learning_rate": 0.000531813987689363,
      "loss": 2.9498,
      "step": 50435
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8516654968261719,
      "learning_rate": 0.0005318113911621915,
      "loss": 2.723,
      "step": 50436
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.875861406326294,
      "learning_rate": 0.0005318087945919219,
      "loss": 3.0744,
      "step": 50437
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4923207759857178,
      "learning_rate": 0.0005318061979785547,
      "loss": 3.1119,
      "step": 50438
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8852009773254395,
      "learning_rate": 0.0005318036013220902,
      "loss": 3.391,
      "step": 50439
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3907217979431152,
      "learning_rate": 0.0005318010046225291,
      "loss": 3.3684,
      "step": 50440
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.773089051246643,
      "learning_rate": 0.0005317984078798719,
      "loss": 3.1914,
      "step": 50441
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.200596332550049,
      "learning_rate": 0.0005317958110941189,
      "loss": 3.0692,
      "step": 50442
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6394559144973755,
      "learning_rate": 0.0005317932142652707,
      "loss": 2.9661,
      "step": 50443
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.541178822517395,
      "learning_rate": 0.0005317906173933277,
      "loss": 3.1385,
      "step": 50444
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8916006088256836,
      "learning_rate": 0.0005317880204782906,
      "loss": 3.2068,
      "step": 50445
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.298144578933716,
      "learning_rate": 0.0005317854235201596,
      "loss": 2.9761,
      "step": 50446
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8568650484085083,
      "learning_rate": 0.0005317828265189355,
      "loss": 3.1381,
      "step": 50447
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.743154525756836,
      "learning_rate": 0.0005317802294746184,
      "loss": 3.2091,
      "step": 50448
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.6909983158111572,
      "learning_rate": 0.000531777632387209,
      "loss": 2.9806,
      "step": 50449
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.785659670829773,
      "learning_rate": 0.0005317750352567078,
      "loss": 3.1438,
      "step": 50450
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5631545782089233,
      "learning_rate": 0.0005317724380831154,
      "loss": 3.2256,
      "step": 50451
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4620071649551392,
      "learning_rate": 0.0005317698408664319,
      "loss": 3.2078,
      "step": 50452
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4915825128555298,
      "learning_rate": 0.0005317672436066581,
      "loss": 2.9391,
      "step": 50453
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4605658054351807,
      "learning_rate": 0.0005317646463037943,
      "loss": 2.8784,
      "step": 50454
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.645380973815918,
      "learning_rate": 0.0005317620489578412,
      "loss": 3.0142,
      "step": 50455
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7002716064453125,
      "learning_rate": 0.0005317594515687992,
      "loss": 2.9358,
      "step": 50456
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4352388381958008,
      "learning_rate": 0.0005317568541366686,
      "loss": 2.9799,
      "step": 50457
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4441977739334106,
      "learning_rate": 0.0005317542566614502,
      "loss": 2.8106,
      "step": 50458
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6050965785980225,
      "learning_rate": 0.0005317516591431442,
      "loss": 2.9658,
      "step": 50459
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5231122970581055,
      "learning_rate": 0.0005317490615817512,
      "loss": 3.0091,
      "step": 50460
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3049025535583496,
      "learning_rate": 0.0005317464639772717,
      "loss": 3.0962,
      "step": 50461
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4271204471588135,
      "learning_rate": 0.0005317438663297061,
      "loss": 2.9569,
      "step": 50462
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8390098810195923,
      "learning_rate": 0.000531741268639055,
      "loss": 3.3184,
      "step": 50463
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9450571537017822,
      "learning_rate": 0.0005317386709053187,
      "loss": 3.1722,
      "step": 50464
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.684151291847229,
      "learning_rate": 0.0005317360731284979,
      "loss": 3.1033,
      "step": 50465
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9675756692886353,
      "learning_rate": 0.0005317334753085931,
      "loss": 3.1,
      "step": 50466
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3171802759170532,
      "learning_rate": 0.0005317308774456046,
      "loss": 3.0869,
      "step": 50467
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6902258396148682,
      "learning_rate": 0.0005317282795395328,
      "loss": 3.0695,
      "step": 50468
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4844403266906738,
      "learning_rate": 0.0005317256815903785,
      "loss": 2.9818,
      "step": 50469
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4705530405044556,
      "learning_rate": 0.000531723083598142,
      "loss": 3.15,
      "step": 50470
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9032280445098877,
      "learning_rate": 0.0005317204855628237,
      "loss": 3.0801,
      "step": 50471
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7411251068115234,
      "learning_rate": 0.0005317178874844243,
      "loss": 3.0914,
      "step": 50472
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8226494789123535,
      "learning_rate": 0.0005317152893629442,
      "loss": 2.8053,
      "step": 50473
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8685697317123413,
      "learning_rate": 0.0005317126911983837,
      "loss": 3.212,
      "step": 50474
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.298188328742981,
      "learning_rate": 0.0005317100929907436,
      "loss": 2.9713,
      "step": 50475
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.901808738708496,
      "learning_rate": 0.0005317074947400241,
      "loss": 3.0706,
      "step": 50476
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.594496726989746,
      "learning_rate": 0.000531704896446226,
      "loss": 2.9269,
      "step": 50477
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5001635551452637,
      "learning_rate": 0.0005317022981093494,
      "loss": 3.0272,
      "step": 50478
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6675817966461182,
      "learning_rate": 0.0005316996997293949,
      "loss": 3.1071,
      "step": 50479
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.325465440750122,
      "learning_rate": 0.0005316971013063632,
      "loss": 3.2003,
      "step": 50480
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.2520837783813477,
      "learning_rate": 0.0005316945028402547,
      "loss": 3.0351,
      "step": 50481
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7971469163894653,
      "learning_rate": 0.0005316919043310696,
      "loss": 2.9914,
      "step": 50482
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.432780146598816,
      "learning_rate": 0.0005316893057788088,
      "loss": 2.9939,
      "step": 50483
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4929301738739014,
      "learning_rate": 0.0005316867071834726,
      "loss": 2.9017,
      "step": 50484
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.415411114692688,
      "learning_rate": 0.0005316841085450612,
      "loss": 2.748,
      "step": 50485
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6922669410705566,
      "learning_rate": 0.0005316815098635756,
      "loss": 2.9638,
      "step": 50486
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4280487298965454,
      "learning_rate": 0.000531678911139016,
      "loss": 3.0477,
      "step": 50487
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9135645627975464,
      "learning_rate": 0.0005316763123713828,
      "loss": 3.0298,
      "step": 50488
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8391120433807373,
      "learning_rate": 0.0005316737135606767,
      "loss": 2.8603,
      "step": 50489
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4803777933120728,
      "learning_rate": 0.0005316711147068981,
      "loss": 2.9489,
      "step": 50490
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.670172095298767,
      "learning_rate": 0.0005316685158100475,
      "loss": 2.8879,
      "step": 50491
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.019961357116699,
      "learning_rate": 0.0005316659168701252,
      "loss": 3.1488,
      "step": 50492
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.013469934463501,
      "learning_rate": 0.0005316633178871319,
      "loss": 2.6993,
      "step": 50493
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.4680612087249756,
      "learning_rate": 0.000531660718861068,
      "loss": 3.0786,
      "step": 50494
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.027475357055664,
      "learning_rate": 0.0005316581197919339,
      "loss": 2.9293,
      "step": 50495
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.490917205810547,
      "learning_rate": 0.0005316555206797304,
      "loss": 2.8335,
      "step": 50496
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6948720216751099,
      "learning_rate": 0.0005316529215244577,
      "loss": 3.0123,
      "step": 50497
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.0260775089263916,
      "learning_rate": 0.0005316503223261163,
      "loss": 3.0827,
      "step": 50498
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6176674365997314,
      "learning_rate": 0.0005316477230847068,
      "loss": 3.1824,
      "step": 50499
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2384347915649414,
      "learning_rate": 0.0005316451238002296,
      "loss": 2.9003,
      "step": 50500
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4697972536087036,
      "learning_rate": 0.0005316425244726851,
      "loss": 3.1066,
      "step": 50501
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.7992210388183594,
      "learning_rate": 0.0005316399251020739,
      "loss": 2.7337,
      "step": 50502
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.8277788162231445,
      "learning_rate": 0.0005316373256883966,
      "loss": 2.9598,
      "step": 50503
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8016142845153809,
      "learning_rate": 0.0005316347262316535,
      "loss": 3.1956,
      "step": 50504
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.4465651512145996,
      "learning_rate": 0.000531632126731845,
      "loss": 2.8882,
      "step": 50505
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.6025874614715576,
      "learning_rate": 0.0005316295271889719,
      "loss": 2.911,
      "step": 50506
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.404820680618286,
      "learning_rate": 0.0005316269276030344,
      "loss": 2.9411,
      "step": 50507
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5027306079864502,
      "learning_rate": 0.0005316243279740331,
      "loss": 3.0464,
      "step": 50508
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6591458320617676,
      "learning_rate": 0.0005316217283019685,
      "loss": 3.1584,
      "step": 50509
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.562090516090393,
      "learning_rate": 0.0005316191285868409,
      "loss": 3.1182,
      "step": 50510
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.4231069087982178,
      "learning_rate": 0.0005316165288286511,
      "loss": 2.9829,
      "step": 50511
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8830264806747437,
      "learning_rate": 0.0005316139290273993,
      "loss": 2.9589,
      "step": 50512
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9488911628723145,
      "learning_rate": 0.0005316113291830861,
      "loss": 3.0561,
      "step": 50513
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.2010130882263184,
      "learning_rate": 0.0005316087292957121,
      "loss": 3.0924,
      "step": 50514
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.666202425956726,
      "learning_rate": 0.0005316061293652776,
      "loss": 3.0479,
      "step": 50515
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.419553279876709,
      "learning_rate": 0.0005316035293917831,
      "loss": 3.0952,
      "step": 50516
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8458800315856934,
      "learning_rate": 0.0005316009293752292,
      "loss": 3.029,
      "step": 50517
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0934717655181885,
      "learning_rate": 0.0005315983293156163,
      "loss": 3.2525,
      "step": 50518
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9652866125106812,
      "learning_rate": 0.000531595729212945,
      "loss": 2.8945,
      "step": 50519
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.345438003540039,
      "learning_rate": 0.0005315931290672154,
      "loss": 2.899,
      "step": 50520
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7162175178527832,
      "learning_rate": 0.0005315905288784284,
      "loss": 2.9667,
      "step": 50521
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.44963800907135,
      "learning_rate": 0.0005315879286465845,
      "loss": 3.0365,
      "step": 50522
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6002215147018433,
      "learning_rate": 0.0005315853283716839,
      "loss": 3.0305,
      "step": 50523
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6368285417556763,
      "learning_rate": 0.0005315827280537273,
      "loss": 3.0861,
      "step": 50524
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6015743017196655,
      "learning_rate": 0.000531580127692715,
      "loss": 2.9749,
      "step": 50525
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4527888298034668,
      "learning_rate": 0.0005315775272886476,
      "loss": 3.1803,
      "step": 50526
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3189162015914917,
      "learning_rate": 0.0005315749268415257,
      "loss": 2.9419,
      "step": 50527
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4260151386260986,
      "learning_rate": 0.0005315723263513495,
      "loss": 2.9927,
      "step": 50528
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4354621171951294,
      "learning_rate": 0.0005315697258181197,
      "loss": 3.1881,
      "step": 50529
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4307361841201782,
      "learning_rate": 0.0005315671252418366,
      "loss": 3.3791,
      "step": 50530
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9616317749023438,
      "learning_rate": 0.0005315645246225009,
      "loss": 3.0736,
      "step": 50531
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7561486959457397,
      "learning_rate": 0.000531561923960113,
      "loss": 2.9595,
      "step": 50532
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5739060640335083,
      "learning_rate": 0.0005315593232546733,
      "loss": 3.2351,
      "step": 50533
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4897654056549072,
      "learning_rate": 0.0005315567225061825,
      "loss": 3.0675,
      "step": 50534
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7810165882110596,
      "learning_rate": 0.0005315541217146408,
      "loss": 3.0924,
      "step": 50535
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5217804908752441,
      "learning_rate": 0.0005315515208800488,
      "loss": 3.0373,
      "step": 50536
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.2731850147247314,
      "learning_rate": 0.000531548920002407,
      "loss": 3.1767,
      "step": 50537
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.451963186264038,
      "learning_rate": 0.000531546319081716,
      "loss": 3.1499,
      "step": 50538
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.706977367401123,
      "learning_rate": 0.0005315437181179761,
      "loss": 3.3673,
      "step": 50539
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4411157369613647,
      "learning_rate": 0.0005315411171111878,
      "loss": 3.0715,
      "step": 50540
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.357790231704712,
      "learning_rate": 0.0005315385160613517,
      "loss": 3.0007,
      "step": 50541
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8888720273971558,
      "learning_rate": 0.0005315359149684682,
      "loss": 3.1024,
      "step": 50542
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7311160564422607,
      "learning_rate": 0.0005315333138325378,
      "loss": 2.8818,
      "step": 50543
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4835056066513062,
      "learning_rate": 0.000531530712653561,
      "loss": 2.9573,
      "step": 50544
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5812795162200928,
      "learning_rate": 0.0005315281114315382,
      "loss": 3.1303,
      "step": 50545
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5463725328445435,
      "learning_rate": 0.0005315255101664701,
      "loss": 3.201,
      "step": 50546
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.481154441833496,
      "learning_rate": 0.0005315229088583569,
      "loss": 3.1284,
      "step": 50547
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.566135287284851,
      "learning_rate": 0.0005315203075071993,
      "loss": 3.3313,
      "step": 50548
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.717146635055542,
      "learning_rate": 0.0005315177061129977,
      "loss": 2.9672,
      "step": 50549
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5148762464523315,
      "learning_rate": 0.0005315151046757525,
      "loss": 3.1648,
      "step": 50550
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4116849899291992,
      "learning_rate": 0.0005315125031954644,
      "loss": 3.1077,
      "step": 50551
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.594512701034546,
      "learning_rate": 0.0005315099016721338,
      "loss": 2.9362,
      "step": 50552
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5246771574020386,
      "learning_rate": 0.000531507300105761,
      "loss": 3.122,
      "step": 50553
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.815808653831482,
      "learning_rate": 0.0005315046984963465,
      "loss": 3.0875,
      "step": 50554
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6875535249710083,
      "learning_rate": 0.0005315020968438911,
      "loss": 3.262,
      "step": 50555
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.81190824508667,
      "learning_rate": 0.0005314994951483951,
      "loss": 3.4105,
      "step": 50556
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6898614168167114,
      "learning_rate": 0.0005314968934098589,
      "loss": 3.2782,
      "step": 50557
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4414938688278198,
      "learning_rate": 0.0005314942916282831,
      "loss": 3.272,
      "step": 50558
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8519346714019775,
      "learning_rate": 0.0005314916898036681,
      "loss": 2.8566,
      "step": 50559
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3796155452728271,
      "learning_rate": 0.0005314890879360145,
      "loss": 3.3262,
      "step": 50560
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5631327629089355,
      "learning_rate": 0.0005314864860253226,
      "loss": 2.9518,
      "step": 50561
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.5674540996551514,
      "learning_rate": 0.0005314838840715931,
      "loss": 2.7719,
      "step": 50562
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.134763717651367,
      "learning_rate": 0.0005314812820748264,
      "loss": 3.0425,
      "step": 50563
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5569424629211426,
      "learning_rate": 0.0005314786800350228,
      "loss": 3.11,
      "step": 50564
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4898591041564941,
      "learning_rate": 0.000531476077952183,
      "loss": 2.8861,
      "step": 50565
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7662538290023804,
      "learning_rate": 0.0005314734758263075,
      "loss": 2.8991,
      "step": 50566
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.967759132385254,
      "learning_rate": 0.0005314708736573967,
      "loss": 3.1779,
      "step": 50567
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8182209730148315,
      "learning_rate": 0.000531468271445451,
      "loss": 2.7592,
      "step": 50568
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.1616897583007812,
      "learning_rate": 0.0005314656691904711,
      "loss": 2.8765,
      "step": 50569
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.768008232116699,
      "learning_rate": 0.0005314630668924573,
      "loss": 3.0536,
      "step": 50570
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.070590019226074,
      "learning_rate": 0.0005314604645514101,
      "loss": 3.3456,
      "step": 50571
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5513468980789185,
      "learning_rate": 0.0005314578621673301,
      "loss": 3.0275,
      "step": 50572
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.666409969329834,
      "learning_rate": 0.0005314552597402177,
      "loss": 2.9389,
      "step": 50573
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.4559571743011475,
      "learning_rate": 0.0005314526572700734,
      "loss": 3.0087,
      "step": 50574
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.383943796157837,
      "learning_rate": 0.0005314500547568977,
      "loss": 3.0805,
      "step": 50575
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.831284761428833,
      "learning_rate": 0.0005314474522006912,
      "loss": 3.2061,
      "step": 50576
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.081996202468872,
      "learning_rate": 0.0005314448496014539,
      "loss": 3.4071,
      "step": 50577
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4682987928390503,
      "learning_rate": 0.0005314422469591868,
      "loss": 3.2164,
      "step": 50578
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.085625648498535,
      "learning_rate": 0.0005314396442738903,
      "loss": 3.0867,
      "step": 50579
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3824114799499512,
      "learning_rate": 0.0005314370415455648,
      "loss": 3.0277,
      "step": 50580
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.368014931678772,
      "learning_rate": 0.0005314344387742107,
      "loss": 3.2198,
      "step": 50581
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.577523946762085,
      "learning_rate": 0.0005314318359598287,
      "loss": 3.1391,
      "step": 50582
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.407336711883545,
      "learning_rate": 0.000531429233102419,
      "loss": 2.9342,
      "step": 50583
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.1474883556365967,
      "learning_rate": 0.0005314266302019823,
      "loss": 3.052,
      "step": 50584
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.521780014038086,
      "learning_rate": 0.000531424027258519,
      "loss": 3.1072,
      "step": 50585
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8109022378921509,
      "learning_rate": 0.0005314214242720296,
      "loss": 3.0149,
      "step": 50586
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6986043453216553,
      "learning_rate": 0.0005314188212425146,
      "loss": 3.1476,
      "step": 50587
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6553168296813965,
      "learning_rate": 0.0005314162181699745,
      "loss": 3.0823,
      "step": 50588
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4284656047821045,
      "learning_rate": 0.0005314136150544097,
      "loss": 2.9547,
      "step": 50589
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3274126052856445,
      "learning_rate": 0.0005314110118958209,
      "loss": 2.9628,
      "step": 50590
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.294968843460083,
      "learning_rate": 0.0005314084086942083,
      "loss": 3.0446,
      "step": 50591
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7519888877868652,
      "learning_rate": 0.0005314058054495725,
      "loss": 2.9618,
      "step": 50592
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4615167379379272,
      "learning_rate": 0.0005314032021619139,
      "loss": 2.9011,
      "step": 50593
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5342038869857788,
      "learning_rate": 0.0005314005988312332,
      "loss": 2.848,
      "step": 50594
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6182416677474976,
      "learning_rate": 0.0005313979954575307,
      "loss": 2.9593,
      "step": 50595
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4774560928344727,
      "learning_rate": 0.000531395392040807,
      "loss": 3.0687,
      "step": 50596
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5325053930282593,
      "learning_rate": 0.0005313927885810625,
      "loss": 3.0455,
      "step": 50597
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2643688917160034,
      "learning_rate": 0.0005313901850782978,
      "loss": 3.3154,
      "step": 50598
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.2677433490753174,
      "learning_rate": 0.0005313875815325133,
      "loss": 2.9831,
      "step": 50599
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.20896053314209,
      "learning_rate": 0.0005313849779437094,
      "loss": 3.0736,
      "step": 50600
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.811566948890686,
      "learning_rate": 0.0005313823743118867,
      "loss": 3.1001,
      "step": 50601
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7951993942260742,
      "learning_rate": 0.0005313797706370455,
      "loss": 2.7227,
      "step": 50602
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8378788232803345,
      "learning_rate": 0.0005313771669191866,
      "loss": 2.9831,
      "step": 50603
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6552448272705078,
      "learning_rate": 0.0005313745631583103,
      "loss": 3.156,
      "step": 50604
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.1042189598083496,
      "learning_rate": 0.000531371959354417,
      "loss": 2.9264,
      "step": 50605
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6724942922592163,
      "learning_rate": 0.0005313693555075074,
      "loss": 3.274,
      "step": 50606
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5398858785629272,
      "learning_rate": 0.0005313667516175818,
      "loss": 3.0756,
      "step": 50607
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.060133457183838,
      "learning_rate": 0.0005313641476846408,
      "loss": 3.0697,
      "step": 50608
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3733810186386108,
      "learning_rate": 0.0005313615437086848,
      "loss": 3.0189,
      "step": 50609
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4049112796783447,
      "learning_rate": 0.0005313589396897144,
      "loss": 3.2,
      "step": 50610
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6824582815170288,
      "learning_rate": 0.0005313563356277299,
      "loss": 3.1679,
      "step": 50611
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8813598155975342,
      "learning_rate": 0.0005313537315227319,
      "loss": 3.0001,
      "step": 50612
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4651381969451904,
      "learning_rate": 0.0005313511273747209,
      "loss": 2.8318,
      "step": 50613
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.7455077171325684,
      "learning_rate": 0.0005313485231836974,
      "loss": 3.0755,
      "step": 50614
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.702285885810852,
      "learning_rate": 0.0005313459189496618,
      "loss": 3.2306,
      "step": 50615
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7935692071914673,
      "learning_rate": 0.0005313433146726146,
      "loss": 3.0559,
      "step": 50616
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.512686014175415,
      "learning_rate": 0.0005313407103525563,
      "loss": 3.1773,
      "step": 50617
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.658947706222534,
      "learning_rate": 0.0005313381059894875,
      "loss": 2.8396,
      "step": 50618
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.745103359222412,
      "learning_rate": 0.0005313355015834086,
      "loss": 2.9564,
      "step": 50619
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8952581882476807,
      "learning_rate": 0.0005313328971343199,
      "loss": 2.9104,
      "step": 50620
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5516444444656372,
      "learning_rate": 0.0005313302926422222,
      "loss": 3.1573,
      "step": 50621
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4136263132095337,
      "learning_rate": 0.0005313276881071157,
      "loss": 3.192,
      "step": 50622
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0453333854675293,
      "learning_rate": 0.0005313250835290011,
      "loss": 3.1244,
      "step": 50623
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5281656980514526,
      "learning_rate": 0.0005313224789078787,
      "loss": 3.0212,
      "step": 50624
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.808089256286621,
      "learning_rate": 0.0005313198742437493,
      "loss": 2.7796,
      "step": 50625
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.649679183959961,
      "learning_rate": 0.000531317269536613,
      "loss": 2.882,
      "step": 50626
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6666184663772583,
      "learning_rate": 0.0005313146647864705,
      "loss": 3.1492,
      "step": 50627
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6260815858840942,
      "learning_rate": 0.0005313120599933222,
      "loss": 3.2272,
      "step": 50628
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9525842666625977,
      "learning_rate": 0.0005313094551571686,
      "loss": 3.2053,
      "step": 50629
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.477309226989746,
      "learning_rate": 0.0005313068502780103,
      "loss": 3.215,
      "step": 50630
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6224308013916016,
      "learning_rate": 0.0005313042453558476,
      "loss": 3.1892,
      "step": 50631
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3914096355438232,
      "learning_rate": 0.0005313016403906811,
      "loss": 3.1835,
      "step": 50632
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.854297161102295,
      "learning_rate": 0.0005312990353825113,
      "loss": 3.1779,
      "step": 50633
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0228867530822754,
      "learning_rate": 0.0005312964303313387,
      "loss": 2.8449,
      "step": 50634
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6103343963623047,
      "learning_rate": 0.0005312938252371636,
      "loss": 2.9922,
      "step": 50635
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8539620637893677,
      "learning_rate": 0.0005312912200999867,
      "loss": 3.1401,
      "step": 50636
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6129040718078613,
      "learning_rate": 0.0005312886149198083,
      "loss": 2.8723,
      "step": 50637
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.478657841682434,
      "learning_rate": 0.0005312860096966291,
      "loss": 2.8367,
      "step": 50638
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4910467863082886,
      "learning_rate": 0.0005312834044304493,
      "loss": 3.1026,
      "step": 50639
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4617892503738403,
      "learning_rate": 0.0005312807991212697,
      "loss": 3.095,
      "step": 50640
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.7288882732391357,
      "learning_rate": 0.0005312781937690906,
      "loss": 2.9604,
      "step": 50641
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9072118997573853,
      "learning_rate": 0.0005312755883739125,
      "loss": 3.1555,
      "step": 50642
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.324922800064087,
      "learning_rate": 0.0005312729829357359,
      "loss": 3.0754,
      "step": 50643
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8694428205490112,
      "learning_rate": 0.0005312703774545612,
      "loss": 2.9548,
      "step": 50644
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.1608166694641113,
      "learning_rate": 0.0005312677719303892,
      "loss": 3.0632,
      "step": 50645
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6057382822036743,
      "learning_rate": 0.0005312651663632199,
      "loss": 3.1911,
      "step": 50646
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0502030849456787,
      "learning_rate": 0.0005312625607530542,
      "loss": 2.9934,
      "step": 50647
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5320779085159302,
      "learning_rate": 0.0005312599550998925,
      "loss": 3.2801,
      "step": 50648
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6217586994171143,
      "learning_rate": 0.0005312573494037349,
      "loss": 3.0425,
      "step": 50649
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4830248355865479,
      "learning_rate": 0.0005312547436645825,
      "loss": 2.9561,
      "step": 50650
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4694880247116089,
      "learning_rate": 0.0005312521378824354,
      "loss": 2.86,
      "step": 50651
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5879924297332764,
      "learning_rate": 0.0005312495320572941,
      "loss": 3.0298,
      "step": 50652
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6118168830871582,
      "learning_rate": 0.0005312469261891591,
      "loss": 2.9114,
      "step": 50653
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0497140884399414,
      "learning_rate": 0.000531244320278031,
      "loss": 3.2111,
      "step": 50654
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4983932971954346,
      "learning_rate": 0.0005312417143239102,
      "loss": 3.0891,
      "step": 50655
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.4999895095825195,
      "learning_rate": 0.0005312391083267974,
      "loss": 2.9728,
      "step": 50656
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.082045793533325,
      "learning_rate": 0.0005312365022866925,
      "loss": 3.2308,
      "step": 50657
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5008488893508911,
      "learning_rate": 0.0005312338962035967,
      "loss": 2.9833,
      "step": 50658
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.497665286064148,
      "learning_rate": 0.00053123129007751,
      "loss": 3.227,
      "step": 50659
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4329748153686523,
      "learning_rate": 0.000531228683908433,
      "loss": 2.841,
      "step": 50660
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5440196990966797,
      "learning_rate": 0.0005312260776963664,
      "loss": 2.9512,
      "step": 50661
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.437807321548462,
      "learning_rate": 0.0005312234714413103,
      "loss": 2.9639,
      "step": 50662
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.692354440689087,
      "learning_rate": 0.0005312208651432655,
      "loss": 3.3376,
      "step": 50663
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.581261157989502,
      "learning_rate": 0.0005312182588022323,
      "loss": 3.2371,
      "step": 50664
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6340025663375854,
      "learning_rate": 0.0005312156524182114,
      "loss": 3.016,
      "step": 50665
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3574084043502808,
      "learning_rate": 0.000531213045991203,
      "loss": 2.8241,
      "step": 50666
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4876132011413574,
      "learning_rate": 0.0005312104395212078,
      "loss": 2.9405,
      "step": 50667
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.824968695640564,
      "learning_rate": 0.0005312078330082262,
      "loss": 3.08,
      "step": 50668
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4709712266921997,
      "learning_rate": 0.0005312052264522588,
      "loss": 3.0239,
      "step": 50669
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2685850858688354,
      "learning_rate": 0.0005312026198533059,
      "loss": 2.7994,
      "step": 50670
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8943486213684082,
      "learning_rate": 0.0005312000132113681,
      "loss": 3.0455,
      "step": 50671
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3191568851470947,
      "learning_rate": 0.0005311974065264457,
      "loss": 2.9786,
      "step": 50672
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7989827394485474,
      "learning_rate": 0.0005311947997985396,
      "loss": 2.955,
      "step": 50673
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6739507913589478,
      "learning_rate": 0.0005311921930276498,
      "loss": 2.8612,
      "step": 50674
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5280797481536865,
      "learning_rate": 0.0005311895862137771,
      "loss": 2.8982,
      "step": 50675
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6524229049682617,
      "learning_rate": 0.000531186979356922,
      "loss": 3.1811,
      "step": 50676
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.529652714729309,
      "learning_rate": 0.0005311843724570847,
      "loss": 3.3603,
      "step": 50677
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3086435794830322,
      "learning_rate": 0.0005311817655142659,
      "loss": 3.0988,
      "step": 50678
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4872887134552002,
      "learning_rate": 0.000531179158528466,
      "loss": 3.007,
      "step": 50679
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7140940427780151,
      "learning_rate": 0.0005311765514996856,
      "loss": 3.0675,
      "step": 50680
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5746455192565918,
      "learning_rate": 0.0005311739444279252,
      "loss": 3.056,
      "step": 50681
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4299728870391846,
      "learning_rate": 0.0005311713373131852,
      "loss": 2.9738,
      "step": 50682
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2841823101043701,
      "learning_rate": 0.0005311687301554659,
      "loss": 2.7702,
      "step": 50683
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.671297550201416,
      "learning_rate": 0.0005311661229547681,
      "loss": 3.1572,
      "step": 50684
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5314059257507324,
      "learning_rate": 0.0005311635157110921,
      "loss": 3.153,
      "step": 50685
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5761977434158325,
      "learning_rate": 0.0005311609084244385,
      "loss": 3.135,
      "step": 50686
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6723239421844482,
      "learning_rate": 0.0005311583010948076,
      "loss": 3.1708,
      "step": 50687
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3827065229415894,
      "learning_rate": 0.0005311556937222001,
      "loss": 3.0593,
      "step": 50688
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4033328294754028,
      "learning_rate": 0.0005311530863066164,
      "loss": 3.057,
      "step": 50689
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5447213649749756,
      "learning_rate": 0.000531150478848057,
      "loss": 2.9063,
      "step": 50690
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.389470100402832,
      "learning_rate": 0.0005311478713465222,
      "loss": 3.1218,
      "step": 50691
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.5574986934661865,
      "learning_rate": 0.0005311452638020128,
      "loss": 2.9861,
      "step": 50692
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6216927766799927,
      "learning_rate": 0.0005311426562145291,
      "loss": 2.9041,
      "step": 50693
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8446941375732422,
      "learning_rate": 0.0005311400485840716,
      "loss": 2.9904,
      "step": 50694
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.929495096206665,
      "learning_rate": 0.0005311374409106409,
      "loss": 3.1093,
      "step": 50695
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4237253665924072,
      "learning_rate": 0.0005311348331942373,
      "loss": 3.1779,
      "step": 50696
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5335965156555176,
      "learning_rate": 0.0005311322254348613,
      "loss": 2.9725,
      "step": 50697
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.467877745628357,
      "learning_rate": 0.0005311296176325136,
      "loss": 3.1133,
      "step": 50698
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4804610013961792,
      "learning_rate": 0.0005311270097871945,
      "loss": 3.2765,
      "step": 50699
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.655348539352417,
      "learning_rate": 0.0005311244018989045,
      "loss": 3.0056,
      "step": 50700
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5065287351608276,
      "learning_rate": 0.0005311217939676441,
      "loss": 2.8301,
      "step": 50701
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3890584707260132,
      "learning_rate": 0.0005311191859934138,
      "loss": 2.8361,
      "step": 50702
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.485514760017395,
      "learning_rate": 0.0005311165779762141,
      "loss": 3.0331,
      "step": 50703
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3010244369506836,
      "learning_rate": 0.0005311139699160454,
      "loss": 3.0766,
      "step": 50704
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.736845850944519,
      "learning_rate": 0.0005311113618129083,
      "loss": 3.0557,
      "step": 50705
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6981250047683716,
      "learning_rate": 0.0005311087536668033,
      "loss": 3.0968,
      "step": 50706
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5071532726287842,
      "learning_rate": 0.0005311061454777308,
      "loss": 3.389,
      "step": 50707
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9913991689682007,
      "learning_rate": 0.0005311035372456912,
      "loss": 2.9853,
      "step": 50708
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8230777978897095,
      "learning_rate": 0.0005311009289706852,
      "loss": 2.828,
      "step": 50709
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.011732578277588,
      "learning_rate": 0.0005310983206527131,
      "loss": 3.0484,
      "step": 50710
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6366487741470337,
      "learning_rate": 0.0005310957122917755,
      "loss": 3.2448,
      "step": 50711
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6046520471572876,
      "learning_rate": 0.0005310931038878729,
      "loss": 3.2131,
      "step": 50712
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8550653457641602,
      "learning_rate": 0.0005310904954410056,
      "loss": 2.9924,
      "step": 50713
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3855438232421875,
      "learning_rate": 0.0005310878869511743,
      "loss": 2.9824,
      "step": 50714
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0307199954986572,
      "learning_rate": 0.0005310852784183794,
      "loss": 3.2123,
      "step": 50715
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4351603984832764,
      "learning_rate": 0.0005310826698426214,
      "loss": 2.8692,
      "step": 50716
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6743472814559937,
      "learning_rate": 0.0005310800612239007,
      "loss": 3.0853,
      "step": 50717
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.680355191230774,
      "learning_rate": 0.0005310774525622179,
      "loss": 3.1277,
      "step": 50718
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4350812435150146,
      "learning_rate": 0.0005310748438575734,
      "loss": 2.5836,
      "step": 50719
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5005149841308594,
      "learning_rate": 0.0005310722351099678,
      "loss": 2.8873,
      "step": 50720
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0021896362304688,
      "learning_rate": 0.0005310696263194014,
      "loss": 2.9162,
      "step": 50721
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8544379472732544,
      "learning_rate": 0.0005310670174858749,
      "loss": 2.8834,
      "step": 50722
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6691522598266602,
      "learning_rate": 0.0005310644086093887,
      "loss": 3.0569,
      "step": 50723
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4081231355667114,
      "learning_rate": 0.0005310617996899432,
      "loss": 3.2802,
      "step": 50724
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2339422702789307,
      "learning_rate": 0.0005310591907275389,
      "loss": 3.0188,
      "step": 50725
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7694430351257324,
      "learning_rate": 0.0005310565817221763,
      "loss": 3.0856,
      "step": 50726
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.243950605392456,
      "learning_rate": 0.000531053972673856,
      "loss": 2.7995,
      "step": 50727
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3400464057922363,
      "learning_rate": 0.0005310513635825785,
      "loss": 3.1488,
      "step": 50728
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4017144441604614,
      "learning_rate": 0.0005310487544483441,
      "loss": 2.899,
      "step": 50729
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3102669715881348,
      "learning_rate": 0.0005310461452711534,
      "loss": 3.0526,
      "step": 50730
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.852088451385498,
      "learning_rate": 0.0005310435360510068,
      "loss": 3.2759,
      "step": 50731
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8226211071014404,
      "learning_rate": 0.0005310409267879049,
      "loss": 3.1945,
      "step": 50732
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4496898651123047,
      "learning_rate": 0.0005310383174818481,
      "loss": 2.9552,
      "step": 50733
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.785974383354187,
      "learning_rate": 0.000531035708132837,
      "loss": 3.3619,
      "step": 50734
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.5297205448150635,
      "learning_rate": 0.0005310330987408719,
      "loss": 3.1348,
      "step": 50735
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3449070453643799,
      "learning_rate": 0.0005310304893059533,
      "loss": 2.9662,
      "step": 50736
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.455820322036743,
      "learning_rate": 0.000531027879828082,
      "loss": 2.9488,
      "step": 50737
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9819645881652832,
      "learning_rate": 0.0005310252703072582,
      "loss": 3.1744,
      "step": 50738
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4199345111846924,
      "learning_rate": 0.0005310226607434823,
      "loss": 2.9519,
      "step": 50739
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.375477910041809,
      "learning_rate": 0.0005310200511367551,
      "loss": 3.0426,
      "step": 50740
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.666269540786743,
      "learning_rate": 0.0005310174414870767,
      "loss": 2.9155,
      "step": 50741
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9596768617630005,
      "learning_rate": 0.0005310148317944479,
      "loss": 3.2454,
      "step": 50742
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8680287599563599,
      "learning_rate": 0.0005310122220588692,
      "loss": 2.783,
      "step": 50743
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6556984186172485,
      "learning_rate": 0.0005310096122803408,
      "loss": 3.0789,
      "step": 50744
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.443282961845398,
      "learning_rate": 0.0005310070024588634,
      "loss": 3.0507,
      "step": 50745
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6545554399490356,
      "learning_rate": 0.0005310043925944375,
      "loss": 2.8912,
      "step": 50746
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.2604451179504395,
      "learning_rate": 0.0005310017826870635,
      "loss": 3.1078,
      "step": 50747
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7164201736450195,
      "learning_rate": 0.0005309991727367419,
      "loss": 3.1107,
      "step": 50748
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5994672775268555,
      "learning_rate": 0.0005309965627434732,
      "loss": 2.9722,
      "step": 50749
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.041983127593994,
      "learning_rate": 0.0005309939527072578,
      "loss": 2.9595,
      "step": 50750
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5044246912002563,
      "learning_rate": 0.0005309913426280963,
      "loss": 3.0548,
      "step": 50751
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.951367735862732,
      "learning_rate": 0.0005309887325059892,
      "loss": 2.8542,
      "step": 50752
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.2486095428466797,
      "learning_rate": 0.000530986122340937,
      "loss": 3.147,
      "step": 50753
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5921077728271484,
      "learning_rate": 0.00053098351213294,
      "loss": 3.1026,
      "step": 50754
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9391249418258667,
      "learning_rate": 0.0005309809018819988,
      "loss": 3.2046,
      "step": 50755
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.758920669555664,
      "learning_rate": 0.000530978291588114,
      "loss": 3.0454,
      "step": 50756
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5288301706314087,
      "learning_rate": 0.0005309756812512859,
      "loss": 2.997,
      "step": 50757
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.1153430938720703,
      "learning_rate": 0.0005309730708715149,
      "loss": 2.9439,
      "step": 50758
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4492714405059814,
      "learning_rate": 0.0005309704604488019,
      "loss": 3.101,
      "step": 50759
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4017301797866821,
      "learning_rate": 0.000530967849983147,
      "loss": 2.9388,
      "step": 50760
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6944218873977661,
      "learning_rate": 0.000530965239474551,
      "loss": 3.2312,
      "step": 50761
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.029412269592285,
      "learning_rate": 0.0005309626289230139,
      "loss": 2.7179,
      "step": 50762
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9263458251953125,
      "learning_rate": 0.0005309600183285367,
      "loss": 2.9469,
      "step": 50763
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.420309066772461,
      "learning_rate": 0.0005309574076911196,
      "loss": 3.131,
      "step": 50764
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2331857681274414,
      "learning_rate": 0.0005309547970107632,
      "loss": 3.297,
      "step": 50765
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.4492619037628174,
      "learning_rate": 0.0005309521862874679,
      "loss": 3.1586,
      "step": 50766
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.461852788925171,
      "learning_rate": 0.0005309495755212343,
      "loss": 3.058,
      "step": 50767
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6838010549545288,
      "learning_rate": 0.0005309469647120628,
      "loss": 3.0768,
      "step": 50768
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5485111474990845,
      "learning_rate": 0.0005309443538599538,
      "loss": 3.1768,
      "step": 50769
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9082359075546265,
      "learning_rate": 0.000530941742964908,
      "loss": 3.1024,
      "step": 50770
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5802849531173706,
      "learning_rate": 0.0005309391320269257,
      "loss": 2.7041,
      "step": 50771
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7697397470474243,
      "learning_rate": 0.0005309365210460075,
      "loss": 2.8601,
      "step": 50772
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5965218544006348,
      "learning_rate": 0.0005309339100221538,
      "loss": 3.3199,
      "step": 50773
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.256234884262085,
      "learning_rate": 0.0005309312989553651,
      "loss": 3.0397,
      "step": 50774
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8667607307434082,
      "learning_rate": 0.0005309286878456419,
      "loss": 2.8972,
      "step": 50775
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3419796228408813,
      "learning_rate": 0.0005309260766929848,
      "loss": 2.7861,
      "step": 50776
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5603007078170776,
      "learning_rate": 0.0005309234654973942,
      "loss": 3.1861,
      "step": 50777
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.512886643409729,
      "learning_rate": 0.0005309208542588704,
      "loss": 3.1443,
      "step": 50778
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4009805917739868,
      "learning_rate": 0.0005309182429774142,
      "loss": 2.877,
      "step": 50779
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.1308605670928955,
      "learning_rate": 0.0005309156316530259,
      "loss": 2.8498,
      "step": 50780
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5369679927825928,
      "learning_rate": 0.000530913020285706,
      "loss": 3.0676,
      "step": 50781
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3874930143356323,
      "learning_rate": 0.0005309104088754551,
      "loss": 2.9859,
      "step": 50782
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2630244493484497,
      "learning_rate": 0.0005309077974222735,
      "loss": 2.962,
      "step": 50783
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4017471075057983,
      "learning_rate": 0.0005309051859261618,
      "loss": 3.0103,
      "step": 50784
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3303552865982056,
      "learning_rate": 0.0005309025743871205,
      "loss": 2.848,
      "step": 50785
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3509454727172852,
      "learning_rate": 0.00053089996280515,
      "loss": 2.9051,
      "step": 50786
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.458653450012207,
      "learning_rate": 0.0005308973511802509,
      "loss": 2.9337,
      "step": 50787
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.526102066040039,
      "learning_rate": 0.0005308947395124236,
      "loss": 2.9768,
      "step": 50788
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.1743788719177246,
      "learning_rate": 0.0005308921278016686,
      "loss": 3.0222,
      "step": 50789
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5399502515792847,
      "learning_rate": 0.0005308895160479863,
      "loss": 3.0484,
      "step": 50790
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9294382333755493,
      "learning_rate": 0.0005308869042513775,
      "loss": 2.9406,
      "step": 50791
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7236428260803223,
      "learning_rate": 0.0005308842924118424,
      "loss": 2.9661,
      "step": 50792
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.6149709224700928,
      "learning_rate": 0.0005308816805293814,
      "loss": 2.9094,
      "step": 50793
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.351773738861084,
      "learning_rate": 0.0005308790686039953,
      "loss": 2.8525,
      "step": 50794
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5195480585098267,
      "learning_rate": 0.0005308764566356842,
      "loss": 2.9623,
      "step": 50795
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5125584602355957,
      "learning_rate": 0.0005308738446244491,
      "loss": 3.1106,
      "step": 50796
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6486680507659912,
      "learning_rate": 0.00053087123257029,
      "loss": 3.3752,
      "step": 50797
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4109820127487183,
      "learning_rate": 0.0005308686204732076,
      "loss": 2.8469,
      "step": 50798
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3916126489639282,
      "learning_rate": 0.0005308660083332025,
      "loss": 2.9086,
      "step": 50799
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.518372654914856,
      "learning_rate": 0.0005308633961502749,
      "loss": 2.834,
      "step": 50800
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9234212636947632,
      "learning_rate": 0.0005308607839244256,
      "loss": 3.0712,
      "step": 50801
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5596270561218262,
      "learning_rate": 0.0005308581716556547,
      "loss": 2.9167,
      "step": 50802
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6547515392303467,
      "learning_rate": 0.0005308555593439631,
      "loss": 2.8607,
      "step": 50803
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5135854482650757,
      "learning_rate": 0.000530852946989351,
      "loss": 3.1399,
      "step": 50804
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.498599648475647,
      "learning_rate": 0.0005308503345918189,
      "loss": 3.225,
      "step": 50805
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6751512289047241,
      "learning_rate": 0.0005308477221513675,
      "loss": 2.9794,
      "step": 50806
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7515065670013428,
      "learning_rate": 0.0005308451096679972,
      "loss": 3.2618,
      "step": 50807
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.5108866691589355,
      "learning_rate": 0.0005308424971417083,
      "loss": 2.9412,
      "step": 50808
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6072789430618286,
      "learning_rate": 0.0005308398845725015,
      "loss": 3.0179,
      "step": 50809
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.480164885520935,
      "learning_rate": 0.0005308372719603772,
      "loss": 3.2748,
      "step": 50810
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8182260990142822,
      "learning_rate": 0.0005308346593053359,
      "loss": 2.9729,
      "step": 50811
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4777051210403442,
      "learning_rate": 0.000530832046607378,
      "loss": 3.2053,
      "step": 50812
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4076019525527954,
      "learning_rate": 0.0005308294338665042,
      "loss": 2.8965,
      "step": 50813
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4321441650390625,
      "learning_rate": 0.0005308268210827147,
      "loss": 2.9237,
      "step": 50814
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4872595071792603,
      "learning_rate": 0.0005308242082560103,
      "loss": 3.1311,
      "step": 50815
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8221406936645508,
      "learning_rate": 0.0005308215953863911,
      "loss": 3.0732,
      "step": 50816
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7256581783294678,
      "learning_rate": 0.000530818982473858,
      "loss": 3.3155,
      "step": 50817
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.365349769592285,
      "learning_rate": 0.0005308163695184112,
      "loss": 2.8361,
      "step": 50818
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.827965497970581,
      "learning_rate": 0.0005308137565200513,
      "loss": 3.1016,
      "step": 50819
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.85784912109375,
      "learning_rate": 0.0005308111434787787,
      "loss": 3.0947,
      "step": 50820
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6568851470947266,
      "learning_rate": 0.0005308085303945941,
      "loss": 3.0036,
      "step": 50821
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.09132719039917,
      "learning_rate": 0.0005308059172674978,
      "loss": 3.0041,
      "step": 50822
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6372212171554565,
      "learning_rate": 0.0005308033040974903,
      "loss": 2.8473,
      "step": 50823
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.688858151435852,
      "learning_rate": 0.0005308006908845721,
      "loss": 3.2157,
      "step": 50824
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6675176620483398,
      "learning_rate": 0.0005307980776287437,
      "loss": 2.9801,
      "step": 50825
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.316022515296936,
      "learning_rate": 0.0005307954643300056,
      "loss": 3.1358,
      "step": 50826
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6401116847991943,
      "learning_rate": 0.0005307928509883582,
      "loss": 3.0429,
      "step": 50827
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6090408563613892,
      "learning_rate": 0.0005307902376038021,
      "loss": 2.8135,
      "step": 50828
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3666073083877563,
      "learning_rate": 0.0005307876241763377,
      "loss": 3.1979,
      "step": 50829
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6363463401794434,
      "learning_rate": 0.0005307850107059656,
      "loss": 2.8744,
      "step": 50830
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7215837240219116,
      "learning_rate": 0.0005307823971926861,
      "loss": 2.7768,
      "step": 50831
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8992640972137451,
      "learning_rate": 0.0005307797836365,
      "loss": 3.0398,
      "step": 50832
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5244884490966797,
      "learning_rate": 0.0005307771700374074,
      "loss": 2.9933,
      "step": 50833
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4726265668869019,
      "learning_rate": 0.0005307745563954089,
      "loss": 3.0683,
      "step": 50834
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7147252559661865,
      "learning_rate": 0.0005307719427105052,
      "loss": 2.915,
      "step": 50835
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4772813320159912,
      "learning_rate": 0.0005307693289826967,
      "loss": 3.0511,
      "step": 50836
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6155624389648438,
      "learning_rate": 0.0005307667152119836,
      "loss": 2.9281,
      "step": 50837
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8192675113677979,
      "learning_rate": 0.0005307641013983668,
      "loss": 3.0137,
      "step": 50838
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.677773118019104,
      "learning_rate": 0.0005307614875418467,
      "loss": 3.1206,
      "step": 50839
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6997127532958984,
      "learning_rate": 0.0005307588736424234,
      "loss": 3.4648,
      "step": 50840
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.44599449634552,
      "learning_rate": 0.0005307562597000978,
      "loss": 3.1201,
      "step": 50841
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5681160688400269,
      "learning_rate": 0.0005307536457148703,
      "loss": 3.2222,
      "step": 50842
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.383162260055542,
      "learning_rate": 0.0005307510316867412,
      "loss": 2.9241,
      "step": 50843
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.451954960823059,
      "learning_rate": 0.0005307484176157113,
      "loss": 2.9392,
      "step": 50844
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5131382942199707,
      "learning_rate": 0.0005307458035017808,
      "loss": 2.8971,
      "step": 50845
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.0092613697052,
      "learning_rate": 0.0005307431893449503,
      "loss": 2.8047,
      "step": 50846
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8138461112976074,
      "learning_rate": 0.0005307405751452203,
      "loss": 3.0126,
      "step": 50847
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.665724754333496,
      "learning_rate": 0.0005307379609025913,
      "loss": 2.934,
      "step": 50848
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.405306577682495,
      "learning_rate": 0.0005307353466170638,
      "loss": 3.1444,
      "step": 50849
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0862889289855957,
      "learning_rate": 0.0005307327322886382,
      "loss": 3.1439,
      "step": 50850
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.2972114086151123,
      "learning_rate": 0.000530730117917315,
      "loss": 2.9547,
      "step": 50851
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.5420634746551514,
      "learning_rate": 0.0005307275035030947,
      "loss": 2.9256,
      "step": 50852
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6145528554916382,
      "learning_rate": 0.0005307248890459779,
      "loss": 2.9686,
      "step": 50853
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6295421123504639,
      "learning_rate": 0.0005307222745459649,
      "loss": 3.2468,
      "step": 50854
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.526542901992798,
      "learning_rate": 0.0005307196600030562,
      "loss": 3.1708,
      "step": 50855
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6155458688735962,
      "learning_rate": 0.0005307170454172525,
      "loss": 3.0984,
      "step": 50856
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5995129346847534,
      "learning_rate": 0.0005307144307885541,
      "loss": 2.8648,
      "step": 50857
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.424615502357483,
      "learning_rate": 0.0005307118161169616,
      "loss": 3.0236,
      "step": 50858
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6836917400360107,
      "learning_rate": 0.0005307092014024753,
      "loss": 3.0762,
      "step": 50859
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8407719135284424,
      "learning_rate": 0.0005307065866450958,
      "loss": 3.0051,
      "step": 50860
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.47799551486969,
      "learning_rate": 0.0005307039718448237,
      "loss": 3.1209,
      "step": 50861
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4395191669464111,
      "learning_rate": 0.0005307013570016593,
      "loss": 3.1223,
      "step": 50862
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.3135695457458496,
      "learning_rate": 0.0005306987421156033,
      "loss": 3.1207,
      "step": 50863
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.9360690116882324,
      "learning_rate": 0.0005306961271866559,
      "loss": 2.7182,
      "step": 50864
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.783014178276062,
      "learning_rate": 0.0005306935122148178,
      "loss": 3.1746,
      "step": 50865
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.491235613822937,
      "learning_rate": 0.0005306908972000894,
      "loss": 3.0879,
      "step": 50866
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.6725430488586426,
      "learning_rate": 0.0005306882821424711,
      "loss": 2.943,
      "step": 50867
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.5493083000183105,
      "learning_rate": 0.0005306856670419637,
      "loss": 2.9889,
      "step": 50868
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6875373125076294,
      "learning_rate": 0.0005306830518985674,
      "loss": 2.9932,
      "step": 50869
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.9337785243988037,
      "learning_rate": 0.0005306804367122826,
      "loss": 3.1402,
      "step": 50870
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.923492908477783,
      "learning_rate": 0.0005306778214831102,
      "loss": 3.0444,
      "step": 50871
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8867729902267456,
      "learning_rate": 0.0005306752062110503,
      "loss": 3.2251,
      "step": 50872
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.35137939453125,
      "learning_rate": 0.0005306725908961036,
      "loss": 3.1613,
      "step": 50873
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6653550863265991,
      "learning_rate": 0.0005306699755382704,
      "loss": 3.1281,
      "step": 50874
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.4583096504211426,
      "learning_rate": 0.0005306673601375515,
      "loss": 2.9903,
      "step": 50875
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.760395884513855,
      "learning_rate": 0.0005306647446939469,
      "loss": 3.1906,
      "step": 50876
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.580002784729004,
      "learning_rate": 0.0005306621292074576,
      "loss": 3.0628,
      "step": 50877
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.785517692565918,
      "learning_rate": 0.0005306595136780837,
      "loss": 3.0622,
      "step": 50878
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4700305461883545,
      "learning_rate": 0.0005306568981058259,
      "loss": 2.919,
      "step": 50879
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.545491337776184,
      "learning_rate": 0.0005306542824906847,
      "loss": 2.865,
      "step": 50880
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6546626091003418,
      "learning_rate": 0.0005306516668326605,
      "loss": 3.1696,
      "step": 50881
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6309192180633545,
      "learning_rate": 0.0005306490511317538,
      "loss": 3.0486,
      "step": 50882
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4710109233856201,
      "learning_rate": 0.000530646435387965,
      "loss": 3.2477,
      "step": 50883
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7575732469558716,
      "learning_rate": 0.0005306438196012947,
      "loss": 3.0055,
      "step": 50884
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4339268207550049,
      "learning_rate": 0.0005306412037717434,
      "loss": 2.9529,
      "step": 50885
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.730293869972229,
      "learning_rate": 0.0005306385878993115,
      "loss": 2.7565,
      "step": 50886
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.771289348602295,
      "learning_rate": 0.0005306359719839996,
      "loss": 3.0118,
      "step": 50887
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7150858640670776,
      "learning_rate": 0.0005306333560258081,
      "loss": 3.2014,
      "step": 50888
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8121029138565063,
      "learning_rate": 0.0005306307400247375,
      "loss": 3.1966,
      "step": 50889
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.341902017593384,
      "learning_rate": 0.0005306281239807882,
      "loss": 3.2473,
      "step": 50890
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.517408847808838,
      "learning_rate": 0.0005306255078939609,
      "loss": 3.066,
      "step": 50891
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4644466638565063,
      "learning_rate": 0.0005306228917642559,
      "loss": 3.0781,
      "step": 50892
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.583801746368408,
      "learning_rate": 0.0005306202755916738,
      "loss": 3.146,
      "step": 50893
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6131858825683594,
      "learning_rate": 0.000530617659376215,
      "loss": 3.0769,
      "step": 50894
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.2531778812408447,
      "learning_rate": 0.00053061504311788,
      "loss": 3.3764,
      "step": 50895
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5927021503448486,
      "learning_rate": 0.0005306124268166694,
      "loss": 2.7706,
      "step": 50896
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.289772629737854,
      "learning_rate": 0.0005306098104725836,
      "loss": 2.9052,
      "step": 50897
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5798455476760864,
      "learning_rate": 0.000530607194085623,
      "loss": 2.833,
      "step": 50898
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.599440097808838,
      "learning_rate": 0.0005306045776557882,
      "loss": 3.1717,
      "step": 50899
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5058599710464478,
      "learning_rate": 0.0005306019611830796,
      "loss": 2.976,
      "step": 50900
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4915685653686523,
      "learning_rate": 0.0005305993446674979,
      "loss": 2.8279,
      "step": 50901
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.067002058029175,
      "learning_rate": 0.0005305967281090433,
      "loss": 2.982,
      "step": 50902
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.4216156005859375,
      "learning_rate": 0.0005305941115077164,
      "loss": 2.8356,
      "step": 50903
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8440725803375244,
      "learning_rate": 0.0005305914948635177,
      "loss": 2.818,
      "step": 50904
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9271900653839111,
      "learning_rate": 0.0005305888781764477,
      "loss": 2.8419,
      "step": 50905
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.1423885822296143,
      "learning_rate": 0.000530586261446507,
      "loss": 3.2277,
      "step": 50906
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.090762138366699,
      "learning_rate": 0.0005305836446736959,
      "loss": 3.1401,
      "step": 50907
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.338891625404358,
      "learning_rate": 0.0005305810278580149,
      "loss": 2.9823,
      "step": 50908
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.3161637783050537,
      "learning_rate": 0.0005305784109994645,
      "loss": 2.9898,
      "step": 50909
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.0578362941741943,
      "learning_rate": 0.0005305757940980453,
      "loss": 3.1082,
      "step": 50910
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5938199758529663,
      "learning_rate": 0.0005305731771537576,
      "loss": 3.0947,
      "step": 50911
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.484148621559143,
      "learning_rate": 0.0005305705601666021,
      "loss": 2.9232,
      "step": 50912
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9096429347991943,
      "learning_rate": 0.0005305679431365792,
      "loss": 3.0601,
      "step": 50913
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.325965404510498,
      "learning_rate": 0.0005305653260636893,
      "loss": 3.1583,
      "step": 50914
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.699316382408142,
      "learning_rate": 0.000530562708947933,
      "loss": 3.1022,
      "step": 50915
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0877063274383545,
      "learning_rate": 0.0005305600917893107,
      "loss": 3.0514,
      "step": 50916
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.255284309387207,
      "learning_rate": 0.0005305574745878229,
      "loss": 2.8997,
      "step": 50917
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.24619722366333,
      "learning_rate": 0.0005305548573434702,
      "loss": 3.1141,
      "step": 50918
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.185962677001953,
      "learning_rate": 0.0005305522400562529,
      "loss": 2.9349,
      "step": 50919
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0792102813720703,
      "learning_rate": 0.0005305496227261716,
      "loss": 2.9592,
      "step": 50920
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8163597583770752,
      "learning_rate": 0.0005305470053532269,
      "loss": 2.9169,
      "step": 50921
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.592376708984375,
      "learning_rate": 0.000530544387937419,
      "loss": 2.9398,
      "step": 50922
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8499133586883545,
      "learning_rate": 0.0005305417704787487,
      "loss": 2.9514,
      "step": 50923
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.090644359588623,
      "learning_rate": 0.0005305391529772162,
      "loss": 2.9555,
      "step": 50924
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5722235441207886,
      "learning_rate": 0.0005305365354328222,
      "loss": 2.9352,
      "step": 50925
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9475531578063965,
      "learning_rate": 0.000530533917845567,
      "loss": 3.159,
      "step": 50926
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0245087146759033,
      "learning_rate": 0.0005305313002154513,
      "loss": 2.9189,
      "step": 50927
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.348116636276245,
      "learning_rate": 0.0005305286825424755,
      "loss": 2.7907,
      "step": 50928
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.847511887550354,
      "learning_rate": 0.00053052606482664,
      "loss": 3.1328,
      "step": 50929
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.023144483566284,
      "learning_rate": 0.0005305234470679454,
      "loss": 3.1629,
      "step": 50930
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.3713865280151367,
      "learning_rate": 0.0005305208292663921,
      "loss": 3.0701,
      "step": 50931
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3736796379089355,
      "learning_rate": 0.0005305182114219806,
      "loss": 3.1631,
      "step": 50932
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4914087057113647,
      "learning_rate": 0.0005305155935347115,
      "loss": 3.1454,
      "step": 50933
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.809002637863159,
      "learning_rate": 0.0005305129756045851,
      "loss": 2.8806,
      "step": 50934
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.654092788696289,
      "learning_rate": 0.0005305103576316021,
      "loss": 2.8832,
      "step": 50935
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5413883924484253,
      "learning_rate": 0.0005305077396157627,
      "loss": 3.1241,
      "step": 50936
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.457401752471924,
      "learning_rate": 0.0005305051215570677,
      "loss": 2.9382,
      "step": 50937
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.382655620574951,
      "learning_rate": 0.0005305025034555175,
      "loss": 3.0967,
      "step": 50938
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4472219944000244,
      "learning_rate": 0.0005304998853111123,
      "loss": 2.9282,
      "step": 50939
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3090386390686035,
      "learning_rate": 0.000530497267123853,
      "loss": 3.0362,
      "step": 50940
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.4038524627685547,
      "learning_rate": 0.0005304946488937399,
      "loss": 3.3088,
      "step": 50941
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.142519474029541,
      "learning_rate": 0.0005304920306207735,
      "loss": 3.0156,
      "step": 50942
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.493266224861145,
      "learning_rate": 0.0005304894123049543,
      "loss": 2.9215,
      "step": 50943
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3198317289352417,
      "learning_rate": 0.0005304867939462827,
      "loss": 3.0379,
      "step": 50944
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.4862937927246094,
      "learning_rate": 0.0005304841755447592,
      "loss": 2.9457,
      "step": 50945
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8779641389846802,
      "learning_rate": 0.0005304815571003845,
      "loss": 3.011,
      "step": 50946
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4330686330795288,
      "learning_rate": 0.0005304789386131588,
      "loss": 3.1776,
      "step": 50947
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.235806703567505,
      "learning_rate": 0.0005304763200830826,
      "loss": 3.2218,
      "step": 50948
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.4522860050201416,
      "learning_rate": 0.0005304737015101568,
      "loss": 3.1571,
      "step": 50949
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4342293739318848,
      "learning_rate": 0.0005304710828943815,
      "loss": 3.2423,
      "step": 50950
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.400611400604248,
      "learning_rate": 0.0005304684642357571,
      "loss": 2.899,
      "step": 50951
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7914949655532837,
      "learning_rate": 0.0005304658455342844,
      "loss": 2.8169,
      "step": 50952
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3892114162445068,
      "learning_rate": 0.0005304632267899638,
      "loss": 2.982,
      "step": 50953
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7390717267990112,
      "learning_rate": 0.0005304606080027956,
      "loss": 3.0222,
      "step": 50954
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6810048818588257,
      "learning_rate": 0.0005304579891727806,
      "loss": 3.3878,
      "step": 50955
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3740556240081787,
      "learning_rate": 0.000530455370299919,
      "loss": 2.9839,
      "step": 50956
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5717343091964722,
      "learning_rate": 0.0005304527513842113,
      "loss": 3.1109,
      "step": 50957
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3799647092819214,
      "learning_rate": 0.0005304501324256582,
      "loss": 3.2867,
      "step": 50958
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4150501489639282,
      "learning_rate": 0.0005304475134242601,
      "loss": 3.0636,
      "step": 50959
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9642614126205444,
      "learning_rate": 0.0005304448943800173,
      "loss": 3.0707,
      "step": 50960
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5434924364089966,
      "learning_rate": 0.0005304422752929306,
      "loss": 2.9868,
      "step": 50961
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7302435636520386,
      "learning_rate": 0.0005304396561630003,
      "loss": 2.9657,
      "step": 50962
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4806727170944214,
      "learning_rate": 0.0005304370369902269,
      "loss": 2.7282,
      "step": 50963
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.533288836479187,
      "learning_rate": 0.0005304344177746109,
      "loss": 3.2949,
      "step": 50964
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5910381078720093,
      "learning_rate": 0.0005304317985161528,
      "loss": 2.9134,
      "step": 50965
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4465508460998535,
      "learning_rate": 0.000530429179214853,
      "loss": 3.1109,
      "step": 50966
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.2069897651672363,
      "learning_rate": 0.0005304265598707123,
      "loss": 3.2267,
      "step": 50967
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8774852752685547,
      "learning_rate": 0.0005304239404837306,
      "loss": 3.1475,
      "step": 50968
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4936983585357666,
      "learning_rate": 0.000530421321053909,
      "loss": 3.1354,
      "step": 50969
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.013181209564209,
      "learning_rate": 0.0005304187015812477,
      "loss": 3.138,
      "step": 50970
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.026517391204834,
      "learning_rate": 0.0005304160820657472,
      "loss": 2.8856,
      "step": 50971
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4612935781478882,
      "learning_rate": 0.000530413462507408,
      "loss": 3.2552,
      "step": 50972
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7906979322433472,
      "learning_rate": 0.0005304108429062305,
      "loss": 3.2414,
      "step": 50973
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.959680438041687,
      "learning_rate": 0.0005304082232622154,
      "loss": 3.0441,
      "step": 50974
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.389499306678772,
      "learning_rate": 0.0005304056035753629,
      "loss": 2.89,
      "step": 50975
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6409951448440552,
      "learning_rate": 0.0005304029838456738,
      "loss": 3.3275,
      "step": 50976
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5742454528808594,
      "learning_rate": 0.0005304003640731484,
      "loss": 2.8305,
      "step": 50977
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4849865436553955,
      "learning_rate": 0.0005303977442577872,
      "loss": 3.1567,
      "step": 50978
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5292243957519531,
      "learning_rate": 0.0005303951243995908,
      "loss": 2.759,
      "step": 50979
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5587331056594849,
      "learning_rate": 0.0005303925044985594,
      "loss": 3.1875,
      "step": 50980
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8898186683654785,
      "learning_rate": 0.0005303898845546938,
      "loss": 3.1146,
      "step": 50981
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3862268924713135,
      "learning_rate": 0.0005303872645679945,
      "loss": 3.0671,
      "step": 50982
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4994761943817139,
      "learning_rate": 0.0005303846445384616,
      "loss": 3.2761,
      "step": 50983
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6947338581085205,
      "learning_rate": 0.0005303820244660961,
      "loss": 3.0601,
      "step": 50984
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0299158096313477,
      "learning_rate": 0.000530379404350898,
      "loss": 3.1001,
      "step": 50985
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.790055751800537,
      "learning_rate": 0.0005303767841928681,
      "loss": 3.0951,
      "step": 50986
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3858691453933716,
      "learning_rate": 0.000530374163992007,
      "loss": 3.2275,
      "step": 50987
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.2758750915527344,
      "learning_rate": 0.0005303715437483148,
      "loss": 3.1584,
      "step": 50988
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4546972513198853,
      "learning_rate": 0.0005303689234617922,
      "loss": 2.9332,
      "step": 50989
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9156345129013062,
      "learning_rate": 0.0005303663031324396,
      "loss": 3.08,
      "step": 50990
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.604250431060791,
      "learning_rate": 0.0005303636827602577,
      "loss": 2.9569,
      "step": 50991
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.905008316040039,
      "learning_rate": 0.0005303610623452467,
      "loss": 3.1681,
      "step": 50992
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9043235778808594,
      "learning_rate": 0.0005303584418874074,
      "loss": 2.7544,
      "step": 50993
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7524434328079224,
      "learning_rate": 0.00053035582138674,
      "loss": 3.3771,
      "step": 50994
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.600541353225708,
      "learning_rate": 0.0005303532008432451,
      "loss": 3.2879,
      "step": 50995
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7409318685531616,
      "learning_rate": 0.0005303505802569233,
      "loss": 2.8458,
      "step": 50996
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.016606092453003,
      "learning_rate": 0.0005303479596277748,
      "loss": 2.8746,
      "step": 50997
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.1447439193725586,
      "learning_rate": 0.0005303453389558004,
      "loss": 3.1426,
      "step": 50998
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.732322335243225,
      "learning_rate": 0.0005303427182410005,
      "loss": 3.2346,
      "step": 50999
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.1211135387420654,
      "learning_rate": 0.0005303400974833755,
      "loss": 2.934,
      "step": 51000
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.394144892692566,
      "learning_rate": 0.0005303374766829259,
      "loss": 2.8566,
      "step": 51001
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.630388855934143,
      "learning_rate": 0.0005303348558396521,
      "loss": 3.0055,
      "step": 51002
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6705551147460938,
      "learning_rate": 0.0005303322349535549,
      "loss": 3.1568,
      "step": 51003
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.899222493171692,
      "learning_rate": 0.0005303296140246345,
      "loss": 3.2263,
      "step": 51004
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6082782745361328,
      "learning_rate": 0.0005303269930528913,
      "loss": 2.8626,
      "step": 51005
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8166168928146362,
      "learning_rate": 0.0005303243720383262,
      "loss": 3.11,
      "step": 51006
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.1020195484161377,
      "learning_rate": 0.0005303217509809394,
      "loss": 3.0353,
      "step": 51007
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.773059368133545,
      "learning_rate": 0.0005303191298807315,
      "loss": 2.9522,
      "step": 51008
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4932202100753784,
      "learning_rate": 0.0005303165087377028,
      "loss": 2.9163,
      "step": 51009
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4617873430252075,
      "learning_rate": 0.000530313887551854,
      "loss": 3.3035,
      "step": 51010
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.371053695678711,
      "learning_rate": 0.0005303112663231855,
      "loss": 2.9006,
      "step": 51011
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.766683578491211,
      "learning_rate": 0.0005303086450516978,
      "loss": 2.9632,
      "step": 51012
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4264613389968872,
      "learning_rate": 0.0005303060237373913,
      "loss": 3.0934,
      "step": 51013
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9591643810272217,
      "learning_rate": 0.0005303034023802666,
      "loss": 3.1371,
      "step": 51014
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9367316961288452,
      "learning_rate": 0.0005303007809803241,
      "loss": 3.1327,
      "step": 51015
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4905210733413696,
      "learning_rate": 0.0005302981595375644,
      "loss": 3.1816,
      "step": 51016
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5785466432571411,
      "learning_rate": 0.0005302955380519879,
      "loss": 2.7923,
      "step": 51017
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6913042068481445,
      "learning_rate": 0.0005302929165235951,
      "loss": 3.1485,
      "step": 51018
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3673677444458008,
      "learning_rate": 0.0005302902949523865,
      "loss": 3.02,
      "step": 51019
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.0957720279693604,
      "learning_rate": 0.0005302876733383627,
      "loss": 3.1427,
      "step": 51020
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5408234596252441,
      "learning_rate": 0.0005302850516815239,
      "loss": 2.9815,
      "step": 51021
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3961189985275269,
      "learning_rate": 0.0005302824299818708,
      "loss": 3.2873,
      "step": 51022
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5755525827407837,
      "learning_rate": 0.0005302798082394039,
      "loss": 3.0724,
      "step": 51023
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.708548665046692,
      "learning_rate": 0.0005302771864541237,
      "loss": 3.1231,
      "step": 51024
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6588046550750732,
      "learning_rate": 0.0005302745646260306,
      "loss": 2.9338,
      "step": 51025
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.9024311304092407,
      "learning_rate": 0.000530271942755125,
      "loss": 2.8346,
      "step": 51026
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.382310390472412,
      "learning_rate": 0.0005302693208414076,
      "loss": 2.9267,
      "step": 51027
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.3492250442504883,
      "learning_rate": 0.0005302666988848787,
      "loss": 3.2146,
      "step": 51028
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.65645170211792,
      "learning_rate": 0.000530264076885539,
      "loss": 2.9716,
      "step": 51029
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.4441378116607666,
      "learning_rate": 0.0005302614548433887,
      "loss": 3.0538,
      "step": 51030
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6509476900100708,
      "learning_rate": 0.0005302588327584287,
      "loss": 3.1964,
      "step": 51031
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6186513900756836,
      "learning_rate": 0.000530256210630659,
      "loss": 3.0722,
      "step": 51032
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.8351746797561646,
      "learning_rate": 0.0005302535884600804,
      "loss": 3.0468,
      "step": 51033
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.1368181705474854,
      "learning_rate": 0.0005302509662466933,
      "loss": 3.0678,
      "step": 51034
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.2939934730529785,
      "learning_rate": 0.0005302483439904982,
      "loss": 3.176,
      "step": 51035
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.6336495876312256,
      "learning_rate": 0.0005302457216914956,
      "loss": 2.9546,
      "step": 51036
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.70793879032135,
      "learning_rate": 0.0005302430993496859,
      "loss": 2.9801,
      "step": 51037
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7471423149108887,
      "learning_rate": 0.0005302404769650697,
      "loss": 3.0636,
      "step": 51038
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.8074874877929688,
      "learning_rate": 0.0005302378545376476,
      "loss": 3.0446,
      "step": 51039
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5946916341781616,
      "learning_rate": 0.0005302352320674199,
      "loss": 2.7658,
      "step": 51040
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.148847222328186,
      "learning_rate": 0.000530232609554387,
      "loss": 3.2503,
      "step": 51041
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6271754503250122,
      "learning_rate": 0.0005302299869985494,
      "loss": 3.1042,
      "step": 51042
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.4615941047668457,
      "learning_rate": 0.0005302273643999079,
      "loss": 3.0552,
      "step": 51043
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.891329050064087,
      "learning_rate": 0.0005302247417584627,
      "loss": 3.2258,
      "step": 51044
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.065563917160034,
      "learning_rate": 0.0005302221190742145,
      "loss": 3.0289,
      "step": 51045
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.789679765701294,
      "learning_rate": 0.0005302194963471635,
      "loss": 3.0112,
      "step": 51046
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4619017839431763,
      "learning_rate": 0.0005302168735773106,
      "loss": 3.0888,
      "step": 51047
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.452698826789856,
      "learning_rate": 0.0005302142507646557,
      "loss": 3.1732,
      "step": 51048
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.704245924949646,
      "learning_rate": 0.0005302116279091998,
      "loss": 3.1496,
      "step": 51049
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5048949718475342,
      "learning_rate": 0.0005302090050109433,
      "loss": 3.3412,
      "step": 51050
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.38303804397583,
      "learning_rate": 0.0005302063820698864,
      "loss": 2.9371,
      "step": 51051
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5523582696914673,
      "learning_rate": 0.00053020375908603,
      "loss": 3.0862,
      "step": 51052
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.2872297763824463,
      "learning_rate": 0.0005302011360593743,
      "loss": 3.181,
      "step": 51053
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3741804361343384,
      "learning_rate": 0.0005301985129899197,
      "loss": 3.1578,
      "step": 51054
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.7043704986572266,
      "learning_rate": 0.000530195889877667,
      "loss": 3.2185,
      "step": 51055
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.7655205726623535,
      "learning_rate": 0.0005301932667226165,
      "loss": 3.0905,
      "step": 51056
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.2746801376342773,
      "learning_rate": 0.0005301906435247688,
      "loss": 2.9899,
      "step": 51057
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.596955418586731,
      "learning_rate": 0.0005301880202841242,
      "loss": 3.1019,
      "step": 51058
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.1449804306030273,
      "learning_rate": 0.0005301853970006834,
      "loss": 3.0211,
      "step": 51059
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6920887231826782,
      "learning_rate": 0.0005301827736744467,
      "loss": 2.7611,
      "step": 51060
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5028866529464722,
      "learning_rate": 0.0005301801503054148,
      "loss": 3.1006,
      "step": 51061
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5188277959823608,
      "learning_rate": 0.000530177526893588,
      "loss": 3.0445,
      "step": 51062
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5938743352890015,
      "learning_rate": 0.0005301749034389669,
      "loss": 2.9978,
      "step": 51063
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.5083353519439697,
      "learning_rate": 0.0005301722799415518,
      "loss": 3.0703,
      "step": 51064
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.3385396003723145,
      "learning_rate": 0.0005301696564013434,
      "loss": 3.0993,
      "step": 51065
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.430211067199707,
      "learning_rate": 0.0005301670328183421,
      "loss": 3.1903,
      "step": 51066
    },
    {
      "epoch": 0.66,
      "grad_norm": 3.7865700721740723,
      "learning_rate": 0.0005301644091925484,
      "loss": 3.1771,
      "step": 51067
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.6044769287109375,
      "learning_rate": 0.0005301617855239628,
      "loss": 2.989,
      "step": 51068
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4384500980377197,
      "learning_rate": 0.0005301591618125859,
      "loss": 3.0864,
      "step": 51069
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.19816517829895,
      "learning_rate": 0.000530156538058418,
      "loss": 2.9896,
      "step": 51070
    },
    {
      "epoch": 0.66,
      "grad_norm": 1.4988017082214355,
      "learning_rate": 0.0005301539142614596,
      "loss": 3.0879,
      "step": 51071
    },
    {
      "epoch": 0.66,
      "grad_norm": 2.6838436126708984,
      "learning_rate": 0.0005301512904217114,
      "loss": 3.2139,
      "step": 51072
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2341252565383911,
      "learning_rate": 0.0005301486665391735,
      "loss": 3.0407,
      "step": 51073
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3754489421844482,
      "learning_rate": 0.0005301460426138467,
      "loss": 3.2271,
      "step": 51074
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.866753339767456,
      "learning_rate": 0.0005301434186457315,
      "loss": 2.976,
      "step": 51075
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.3354249000549316,
      "learning_rate": 0.0005301407946348281,
      "loss": 3.1422,
      "step": 51076
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4351694583892822,
      "learning_rate": 0.0005301381705811374,
      "loss": 2.8328,
      "step": 51077
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6327217817306519,
      "learning_rate": 0.0005301355464846595,
      "loss": 3.2641,
      "step": 51078
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6479825973510742,
      "learning_rate": 0.0005301329223453952,
      "loss": 3.2003,
      "step": 51079
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8152133226394653,
      "learning_rate": 0.0005301302981633448,
      "loss": 3.1256,
      "step": 51080
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3966857194900513,
      "learning_rate": 0.0005301276739385088,
      "loss": 3.0771,
      "step": 51081
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.39105486869812,
      "learning_rate": 0.0005301250496708877,
      "loss": 2.8644,
      "step": 51082
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3635004758834839,
      "learning_rate": 0.000530122425360482,
      "loss": 3.0398,
      "step": 51083
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2954533100128174,
      "learning_rate": 0.0005301198010072923,
      "loss": 3.086,
      "step": 51084
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.866808533668518,
      "learning_rate": 0.000530117176611319,
      "loss": 3.1438,
      "step": 51085
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5801104307174683,
      "learning_rate": 0.0005301145521725624,
      "loss": 2.8294,
      "step": 51086
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5674834251403809,
      "learning_rate": 0.0005301119276910232,
      "loss": 3.3508,
      "step": 51087
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4920566082000732,
      "learning_rate": 0.000530109303166702,
      "loss": 3.1345,
      "step": 51088
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5081787109375,
      "learning_rate": 0.0005301066785995991,
      "loss": 3.2127,
      "step": 51089
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4505614042282104,
      "learning_rate": 0.0005301040539897148,
      "loss": 2.8057,
      "step": 51090
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5134875774383545,
      "learning_rate": 0.00053010142933705,
      "loss": 3.2158,
      "step": 51091
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.367464303970337,
      "learning_rate": 0.000530098804641605,
      "loss": 2.8428,
      "step": 51092
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5676683187484741,
      "learning_rate": 0.0005300961799033803,
      "loss": 3.0812,
      "step": 51093
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5670161247253418,
      "learning_rate": 0.0005300935551223764,
      "loss": 3.2036,
      "step": 51094
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.727985143661499,
      "learning_rate": 0.0005300909302985936,
      "loss": 3.1524,
      "step": 51095
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5393460988998413,
      "learning_rate": 0.0005300883054320327,
      "loss": 2.9918,
      "step": 51096
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7314220666885376,
      "learning_rate": 0.000530085680522694,
      "loss": 2.9434,
      "step": 51097
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4574693441390991,
      "learning_rate": 0.0005300830555705781,
      "loss": 2.9173,
      "step": 51098
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4509292840957642,
      "learning_rate": 0.0005300804305756853,
      "loss": 3.3212,
      "step": 51099
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.533751368522644,
      "learning_rate": 0.0005300778055380163,
      "loss": 3.2082,
      "step": 51100
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.613736867904663,
      "learning_rate": 0.0005300751804575715,
      "loss": 3.2692,
      "step": 51101
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4970957040786743,
      "learning_rate": 0.0005300725553343512,
      "loss": 2.76,
      "step": 51102
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5250046253204346,
      "learning_rate": 0.0005300699301683562,
      "loss": 2.9912,
      "step": 51103
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8407610654830933,
      "learning_rate": 0.0005300673049595869,
      "loss": 3.0021,
      "step": 51104
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6402701139450073,
      "learning_rate": 0.0005300646797080437,
      "loss": 2.8322,
      "step": 51105
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.638745665550232,
      "learning_rate": 0.0005300620544137271,
      "loss": 3.0301,
      "step": 51106
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7239536046981812,
      "learning_rate": 0.0005300594290766378,
      "loss": 3.0824,
      "step": 51107
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6728999614715576,
      "learning_rate": 0.0005300568036967758,
      "loss": 3.2047,
      "step": 51108
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.632840633392334,
      "learning_rate": 0.0005300541782741422,
      "loss": 3.0353,
      "step": 51109
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.5602855682373047,
      "learning_rate": 0.000530051552808737,
      "loss": 3.0544,
      "step": 51110
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.5072977542877197,
      "learning_rate": 0.0005300489273005609,
      "loss": 2.9788,
      "step": 51111
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8053137063980103,
      "learning_rate": 0.0005300463017496143,
      "loss": 3.3285,
      "step": 51112
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.3166472911834717,
      "learning_rate": 0.0005300436761558979,
      "loss": 3.03,
      "step": 51113
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.986711263656616,
      "learning_rate": 0.000530041050519412,
      "loss": 2.9219,
      "step": 51114
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.3592073917388916,
      "learning_rate": 0.0005300384248401571,
      "loss": 2.892,
      "step": 51115
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.445521354675293,
      "learning_rate": 0.0005300357991181337,
      "loss": 3.2744,
      "step": 51116
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.884398102760315,
      "learning_rate": 0.0005300331733533423,
      "loss": 3.0641,
      "step": 51117
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.765655755996704,
      "learning_rate": 0.0005300305475457834,
      "loss": 3.2253,
      "step": 51118
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3690059185028076,
      "learning_rate": 0.0005300279216954576,
      "loss": 3.0002,
      "step": 51119
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6815276145935059,
      "learning_rate": 0.0005300252958023651,
      "loss": 2.9784,
      "step": 51120
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6137586832046509,
      "learning_rate": 0.0005300226698665066,
      "loss": 2.7757,
      "step": 51121
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5880464315414429,
      "learning_rate": 0.0005300200438878826,
      "loss": 3.0072,
      "step": 51122
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7779570817947388,
      "learning_rate": 0.0005300174178664934,
      "loss": 3.2264,
      "step": 51123
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9646581411361694,
      "learning_rate": 0.0005300147918023397,
      "loss": 2.9538,
      "step": 51124
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4352563619613647,
      "learning_rate": 0.000530012165695422,
      "loss": 2.8615,
      "step": 51125
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6674368381500244,
      "learning_rate": 0.0005300095395457405,
      "loss": 3.0621,
      "step": 51126
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4505908489227295,
      "learning_rate": 0.0005300069133532961,
      "loss": 2.943,
      "step": 51127
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7821784019470215,
      "learning_rate": 0.0005300042871180889,
      "loss": 2.9491,
      "step": 51128
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5293498039245605,
      "learning_rate": 0.0005300016608401196,
      "loss": 2.9756,
      "step": 51129
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8317762613296509,
      "learning_rate": 0.0005299990345193887,
      "loss": 2.7964,
      "step": 51130
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6653738021850586,
      "learning_rate": 0.0005299964081558966,
      "loss": 2.707,
      "step": 51131
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.0574400424957275,
      "learning_rate": 0.0005299937817496438,
      "loss": 3.1051,
      "step": 51132
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3984674215316772,
      "learning_rate": 0.0005299911553006309,
      "loss": 3.0465,
      "step": 51133
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3778244256973267,
      "learning_rate": 0.0005299885288088583,
      "loss": 3.0659,
      "step": 51134
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.362879753112793,
      "learning_rate": 0.0005299859022743264,
      "loss": 2.9882,
      "step": 51135
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3034160137176514,
      "learning_rate": 0.0005299832756970359,
      "loss": 3.001,
      "step": 51136
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4614545106887817,
      "learning_rate": 0.000529980649076987,
      "loss": 2.9691,
      "step": 51137
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4071319103240967,
      "learning_rate": 0.0005299780224141805,
      "loss": 2.9819,
      "step": 51138
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7304949760437012,
      "learning_rate": 0.0005299753957086167,
      "loss": 2.9911,
      "step": 51139
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6383585929870605,
      "learning_rate": 0.0005299727689602961,
      "loss": 2.832,
      "step": 51140
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4477698802947998,
      "learning_rate": 0.0005299701421692193,
      "loss": 3.1655,
      "step": 51141
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7621355056762695,
      "learning_rate": 0.0005299675153353866,
      "loss": 3.0352,
      "step": 51142
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9216952323913574,
      "learning_rate": 0.0005299648884587987,
      "loss": 3.0614,
      "step": 51143
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6378600597381592,
      "learning_rate": 0.0005299622615394559,
      "loss": 2.9318,
      "step": 51144
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5816401243209839,
      "learning_rate": 0.0005299596345773589,
      "loss": 3.0125,
      "step": 51145
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.399078607559204,
      "learning_rate": 0.0005299570075725079,
      "loss": 3.1906,
      "step": 51146
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7783482074737549,
      "learning_rate": 0.0005299543805249037,
      "loss": 2.8682,
      "step": 51147
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.775338888168335,
      "learning_rate": 0.0005299517534345466,
      "loss": 3.0394,
      "step": 51148
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.0963499546051025,
      "learning_rate": 0.000529949126301437,
      "loss": 2.9399,
      "step": 51149
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6910022497177124,
      "learning_rate": 0.0005299464991255756,
      "loss": 2.9097,
      "step": 51150
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5847665071487427,
      "learning_rate": 0.0005299438719069629,
      "loss": 3.0917,
      "step": 51151
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6308701038360596,
      "learning_rate": 0.0005299412446455992,
      "loss": 2.9464,
      "step": 51152
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8979253768920898,
      "learning_rate": 0.0005299386173414851,
      "loss": 3.0807,
      "step": 51153
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.465131163597107,
      "learning_rate": 0.0005299359899946212,
      "loss": 2.8572,
      "step": 51154
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4930206537246704,
      "learning_rate": 0.0005299333626050077,
      "loss": 3.2104,
      "step": 51155
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3694425821304321,
      "learning_rate": 0.0005299307351726453,
      "loss": 3.135,
      "step": 51156
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6805936098098755,
      "learning_rate": 0.0005299281076975343,
      "loss": 3.0369,
      "step": 51157
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.557769775390625,
      "learning_rate": 0.0005299254801796754,
      "loss": 3.0631,
      "step": 51158
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5031260251998901,
      "learning_rate": 0.0005299228526190692,
      "loss": 2.9618,
      "step": 51159
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3218168020248413,
      "learning_rate": 0.0005299202250157158,
      "loss": 3.0114,
      "step": 51160
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6697185039520264,
      "learning_rate": 0.000529917597369616,
      "loss": 3.1497,
      "step": 51161
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5213630199432373,
      "learning_rate": 0.0005299149696807701,
      "loss": 2.9821,
      "step": 51162
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.584104299545288,
      "learning_rate": 0.0005299123419491786,
      "loss": 3.0939,
      "step": 51163
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.3156309127807617,
      "learning_rate": 0.0005299097141748422,
      "loss": 3.0424,
      "step": 51164
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5086462497711182,
      "learning_rate": 0.0005299070863577613,
      "loss": 3.1806,
      "step": 51165
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4598654508590698,
      "learning_rate": 0.0005299044584979361,
      "loss": 3.4625,
      "step": 51166
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9774953126907349,
      "learning_rate": 0.0005299018305953675,
      "loss": 3.0252,
      "step": 51167
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4248331785202026,
      "learning_rate": 0.0005298992026500557,
      "loss": 3.2122,
      "step": 51168
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6084281206130981,
      "learning_rate": 0.0005298965746620013,
      "loss": 3.1021,
      "step": 51169
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.583404302597046,
      "learning_rate": 0.0005298939466312049,
      "loss": 3.2548,
      "step": 51170
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7496204376220703,
      "learning_rate": 0.0005298913185576668,
      "loss": 3.2608,
      "step": 51171
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5142004489898682,
      "learning_rate": 0.0005298886904413875,
      "loss": 3.0673,
      "step": 51172
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.1070125102996826,
      "learning_rate": 0.0005298860622823678,
      "loss": 3.1416,
      "step": 51173
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4809205532073975,
      "learning_rate": 0.0005298834340806077,
      "loss": 2.8475,
      "step": 51174
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5786659717559814,
      "learning_rate": 0.000529880805836108,
      "loss": 2.9266,
      "step": 51175
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5480613708496094,
      "learning_rate": 0.0005298781775488692,
      "loss": 2.9538,
      "step": 51176
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.013251543045044,
      "learning_rate": 0.0005298755492188916,
      "loss": 3.224,
      "step": 51177
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6915963888168335,
      "learning_rate": 0.0005298729208461757,
      "loss": 3.0353,
      "step": 51178
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.894723892211914,
      "learning_rate": 0.0005298702924307223,
      "loss": 3.1451,
      "step": 51179
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8490149974822998,
      "learning_rate": 0.0005298676639725315,
      "loss": 2.9373,
      "step": 51180
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5300227403640747,
      "learning_rate": 0.0005298650354716042,
      "loss": 3.0995,
      "step": 51181
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4144684076309204,
      "learning_rate": 0.0005298624069279403,
      "loss": 3.2952,
      "step": 51182
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.524168848991394,
      "learning_rate": 0.000529859778341541,
      "loss": 2.9812,
      "step": 51183
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3269996643066406,
      "learning_rate": 0.0005298571497124062,
      "loss": 2.9193,
      "step": 51184
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9990168809890747,
      "learning_rate": 0.0005298545210405367,
      "loss": 3.0088,
      "step": 51185
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6277744770050049,
      "learning_rate": 0.0005298518923259329,
      "loss": 3.349,
      "step": 51186
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.365494966506958,
      "learning_rate": 0.0005298492635685952,
      "loss": 3.1036,
      "step": 51187
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4068913459777832,
      "learning_rate": 0.0005298466347685244,
      "loss": 3.2928,
      "step": 51188
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.686630368232727,
      "learning_rate": 0.0005298440059257205,
      "loss": 3.0954,
      "step": 51189
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4464949369430542,
      "learning_rate": 0.0005298413770401845,
      "loss": 3.2051,
      "step": 51190
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4116497039794922,
      "learning_rate": 0.0005298387481119165,
      "loss": 3.0591,
      "step": 51191
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.260364055633545,
      "learning_rate": 0.0005298361191409172,
      "loss": 3.1048,
      "step": 51192
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.024273633956909,
      "learning_rate": 0.000529833490127187,
      "loss": 3.1002,
      "step": 51193
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3433881998062134,
      "learning_rate": 0.0005298308610707265,
      "loss": 3.1761,
      "step": 51194
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.3052566051483154,
      "learning_rate": 0.000529828231971536,
      "loss": 3.0484,
      "step": 51195
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5960313081741333,
      "learning_rate": 0.0005298256028296161,
      "loss": 2.8837,
      "step": 51196
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6929755210876465,
      "learning_rate": 0.0005298229736449673,
      "loss": 3.1189,
      "step": 51197
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6231672763824463,
      "learning_rate": 0.0005298203444175901,
      "loss": 3.3581,
      "step": 51198
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3871004581451416,
      "learning_rate": 0.0005298177151474849,
      "loss": 2.9901,
      "step": 51199
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8579257726669312,
      "learning_rate": 0.0005298150858346524,
      "loss": 3.2349,
      "step": 51200
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5098170042037964,
      "learning_rate": 0.0005298124564790928,
      "loss": 2.8042,
      "step": 51201
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.560627818107605,
      "learning_rate": 0.0005298098270808067,
      "loss": 3.0838,
      "step": 51202
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4086436033248901,
      "learning_rate": 0.0005298071976397947,
      "loss": 3.0456,
      "step": 51203
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4636456966400146,
      "learning_rate": 0.0005298045681560571,
      "loss": 3.0215,
      "step": 51204
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5100226402282715,
      "learning_rate": 0.0005298019386295946,
      "loss": 3.3093,
      "step": 51205
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8845411539077759,
      "learning_rate": 0.0005297993090604077,
      "loss": 3.0608,
      "step": 51206
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6013468503952026,
      "learning_rate": 0.0005297966794484965,
      "loss": 3.2577,
      "step": 51207
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8356258869171143,
      "learning_rate": 0.0005297940497938618,
      "loss": 2.9826,
      "step": 51208
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.134429693222046,
      "learning_rate": 0.0005297914200965042,
      "loss": 3.061,
      "step": 51209
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.40410578250885,
      "learning_rate": 0.0005297887903564239,
      "loss": 3.2555,
      "step": 51210
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7973096370697021,
      "learning_rate": 0.0005297861605736216,
      "loss": 3.0723,
      "step": 51211
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8280264139175415,
      "learning_rate": 0.0005297835307480977,
      "loss": 3.2761,
      "step": 51212
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6150233745574951,
      "learning_rate": 0.0005297809008798527,
      "loss": 2.7904,
      "step": 51213
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9350342750549316,
      "learning_rate": 0.0005297782709688871,
      "loss": 2.7934,
      "step": 51214
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6575065851211548,
      "learning_rate": 0.0005297756410152014,
      "loss": 3.0328,
      "step": 51215
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.1429004669189453,
      "learning_rate": 0.0005297730110187961,
      "loss": 2.8622,
      "step": 51216
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.083815336227417,
      "learning_rate": 0.0005297703809796716,
      "loss": 3.0206,
      "step": 51217
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7545485496520996,
      "learning_rate": 0.0005297677508978285,
      "loss": 3.0545,
      "step": 51218
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.836897611618042,
      "learning_rate": 0.0005297651207732672,
      "loss": 3.1026,
      "step": 51219
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.0556392669677734,
      "learning_rate": 0.0005297624906059883,
      "loss": 3.1308,
      "step": 51220
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.0331668853759766,
      "learning_rate": 0.000529759860395992,
      "loss": 3.0132,
      "step": 51221
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6807292699813843,
      "learning_rate": 0.0005297572301432792,
      "loss": 3.039,
      "step": 51222
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5882930755615234,
      "learning_rate": 0.0005297545998478501,
      "loss": 3.1347,
      "step": 51223
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6992735862731934,
      "learning_rate": 0.0005297519695097053,
      "loss": 3.0869,
      "step": 51224
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.6070716381073,
      "learning_rate": 0.0005297493391288453,
      "loss": 2.9815,
      "step": 51225
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6839289665222168,
      "learning_rate": 0.0005297467087052706,
      "loss": 2.9418,
      "step": 51226
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5579723119735718,
      "learning_rate": 0.0005297440782389817,
      "loss": 3.0944,
      "step": 51227
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.851445436477661,
      "learning_rate": 0.0005297414477299789,
      "loss": 3.2237,
      "step": 51228
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.1035263538360596,
      "learning_rate": 0.0005297388171782629,
      "loss": 3.0266,
      "step": 51229
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6085699796676636,
      "learning_rate": 0.000529736186583834,
      "loss": 3.1048,
      "step": 51230
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.749903917312622,
      "learning_rate": 0.000529733555946693,
      "loss": 3.0833,
      "step": 51231
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8351361751556396,
      "learning_rate": 0.00052973092526684,
      "loss": 3.027,
      "step": 51232
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6970685720443726,
      "learning_rate": 0.0005297282945442758,
      "loss": 2.8891,
      "step": 51233
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5627797842025757,
      "learning_rate": 0.0005297256637790007,
      "loss": 3.0191,
      "step": 51234
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6356258392333984,
      "learning_rate": 0.0005297230329710154,
      "loss": 3.2266,
      "step": 51235
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.468489408493042,
      "learning_rate": 0.0005297204021203201,
      "loss": 3.1712,
      "step": 51236
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.007227897644043,
      "learning_rate": 0.0005297177712269155,
      "loss": 3.0412,
      "step": 51237
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3933171033859253,
      "learning_rate": 0.0005297151402908021,
      "loss": 2.8892,
      "step": 51238
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.546279191970825,
      "learning_rate": 0.0005297125093119803,
      "loss": 2.9886,
      "step": 51239
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.86290442943573,
      "learning_rate": 0.0005297098782904505,
      "loss": 2.9427,
      "step": 51240
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4881649017333984,
      "learning_rate": 0.0005297072472262134,
      "loss": 2.9394,
      "step": 51241
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.9769670963287354,
      "learning_rate": 0.0005297046161192693,
      "loss": 3.0659,
      "step": 51242
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9500681161880493,
      "learning_rate": 0.0005297019849696188,
      "loss": 3.0387,
      "step": 51243
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.834932804107666,
      "learning_rate": 0.0005296993537772625,
      "loss": 2.9165,
      "step": 51244
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6710314750671387,
      "learning_rate": 0.0005296967225422005,
      "loss": 3.0977,
      "step": 51245
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.020131826400757,
      "learning_rate": 0.0005296940912644338,
      "loss": 3.1001,
      "step": 51246
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.382150888442993,
      "learning_rate": 0.0005296914599439625,
      "loss": 3.1301,
      "step": 51247
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5395253896713257,
      "learning_rate": 0.0005296888285807872,
      "loss": 3.0166,
      "step": 51248
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.512920618057251,
      "learning_rate": 0.0005296861971749086,
      "loss": 3.2428,
      "step": 51249
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8145091533660889,
      "learning_rate": 0.0005296835657263268,
      "loss": 3.0918,
      "step": 51250
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2759366035461426,
      "learning_rate": 0.0005296809342350424,
      "loss": 3.0228,
      "step": 51251
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.528672218322754,
      "learning_rate": 0.0005296783027010562,
      "loss": 3.0003,
      "step": 51252
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.551378607749939,
      "learning_rate": 0.0005296756711243684,
      "loss": 3.1618,
      "step": 51253
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5619854927062988,
      "learning_rate": 0.0005296730395049795,
      "loss": 3.1462,
      "step": 51254
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.419622540473938,
      "learning_rate": 0.0005296704078428902,
      "loss": 3.1159,
      "step": 51255
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3840917348861694,
      "learning_rate": 0.0005296677761381008,
      "loss": 3.3381,
      "step": 51256
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7666267156600952,
      "learning_rate": 0.0005296651443906116,
      "loss": 3.2408,
      "step": 51257
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6296769380569458,
      "learning_rate": 0.0005296625126004236,
      "loss": 2.9648,
      "step": 51258
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6140936613082886,
      "learning_rate": 0.0005296598807675368,
      "loss": 2.854,
      "step": 51259
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5658618211746216,
      "learning_rate": 0.000529657248891952,
      "loss": 3.1503,
      "step": 51260
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8602347373962402,
      "learning_rate": 0.0005296546169736695,
      "loss": 3.086,
      "step": 51261
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.1343352794647217,
      "learning_rate": 0.0005296519850126898,
      "loss": 3.0947,
      "step": 51262
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5326945781707764,
      "learning_rate": 0.0005296493530090136,
      "loss": 3.0102,
      "step": 51263
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3844631910324097,
      "learning_rate": 0.0005296467209626413,
      "loss": 3.2007,
      "step": 51264
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6045315265655518,
      "learning_rate": 0.0005296440888735733,
      "loss": 3.2781,
      "step": 51265
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6396703720092773,
      "learning_rate": 0.0005296414567418101,
      "loss": 3.0866,
      "step": 51266
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7402994632720947,
      "learning_rate": 0.0005296388245673523,
      "loss": 2.9582,
      "step": 51267
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3691903352737427,
      "learning_rate": 0.0005296361923502001,
      "loss": 3.1388,
      "step": 51268
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7741936445236206,
      "learning_rate": 0.0005296335600903544,
      "loss": 3.3026,
      "step": 51269
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8164799213409424,
      "learning_rate": 0.0005296309277878154,
      "loss": 2.9588,
      "step": 51270
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4566078186035156,
      "learning_rate": 0.0005296282954425837,
      "loss": 2.8977,
      "step": 51271
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6039639711380005,
      "learning_rate": 0.0005296256630546597,
      "loss": 2.8936,
      "step": 51272
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.187394380569458,
      "learning_rate": 0.000529623030624044,
      "loss": 2.8627,
      "step": 51273
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9177664518356323,
      "learning_rate": 0.0005296203981507371,
      "loss": 3.07,
      "step": 51274
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5277913808822632,
      "learning_rate": 0.0005296177656347393,
      "loss": 2.9729,
      "step": 51275
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4975335597991943,
      "learning_rate": 0.0005296151330760514,
      "loss": 3.2957,
      "step": 51276
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.0982863903045654,
      "learning_rate": 0.0005296125004746736,
      "loss": 3.0294,
      "step": 51277
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8957507610321045,
      "learning_rate": 0.0005296098678306065,
      "loss": 3.1582,
      "step": 51278
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2013168334960938,
      "learning_rate": 0.0005296072351438507,
      "loss": 3.2373,
      "step": 51279
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4981824159622192,
      "learning_rate": 0.0005296046024144064,
      "loss": 3.1749,
      "step": 51280
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.0776660442352295,
      "learning_rate": 0.0005296019696422743,
      "loss": 3.1346,
      "step": 51281
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7801847457885742,
      "learning_rate": 0.000529599336827455,
      "loss": 3.1189,
      "step": 51282
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8064806461334229,
      "learning_rate": 0.0005295967039699488,
      "loss": 3.3686,
      "step": 51283
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.173266649246216,
      "learning_rate": 0.0005295940710697562,
      "loss": 2.9228,
      "step": 51284
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.525611400604248,
      "learning_rate": 0.0005295914381268778,
      "loss": 3.0148,
      "step": 51285
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.903981328010559,
      "learning_rate": 0.0005295888051413139,
      "loss": 3.1161,
      "step": 51286
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.3562276363372803,
      "learning_rate": 0.0005295861721130652,
      "loss": 3.0609,
      "step": 51287
    },
    {
      "epoch": 0.67,
      "grad_norm": 4.47462272644043,
      "learning_rate": 0.000529583539042132,
      "loss": 2.8422,
      "step": 51288
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.044111728668213,
      "learning_rate": 0.000529580905928515,
      "loss": 2.8014,
      "step": 51289
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3846755027770996,
      "learning_rate": 0.0005295782727722145,
      "loss": 3.0797,
      "step": 51290
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.7224485874176025,
      "learning_rate": 0.0005295756395732311,
      "loss": 2.9666,
      "step": 51291
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.5548198223114014,
      "learning_rate": 0.0005295730063315652,
      "loss": 3.0914,
      "step": 51292
    },
    {
      "epoch": 0.67,
      "grad_norm": 4.030019760131836,
      "learning_rate": 0.0005295703730472174,
      "loss": 3.2251,
      "step": 51293
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6624698638916016,
      "learning_rate": 0.0005295677397201881,
      "loss": 3.2545,
      "step": 51294
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.0898971557617188,
      "learning_rate": 0.0005295651063504779,
      "loss": 3.034,
      "step": 51295
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.9238040447235107,
      "learning_rate": 0.000529562472938087,
      "loss": 2.8532,
      "step": 51296
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3625092506408691,
      "learning_rate": 0.0005295598394830164,
      "loss": 2.7597,
      "step": 51297
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.501474142074585,
      "learning_rate": 0.0005295572059852661,
      "loss": 2.952,
      "step": 51298
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2182326316833496,
      "learning_rate": 0.0005295545724448369,
      "loss": 2.9429,
      "step": 51299
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.629515528678894,
      "learning_rate": 0.0005295519388617291,
      "loss": 3.0827,
      "step": 51300
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3032139539718628,
      "learning_rate": 0.0005295493052359432,
      "loss": 3.1699,
      "step": 51301
    },
    {
      "epoch": 0.67,
      "grad_norm": 4.9650115966796875,
      "learning_rate": 0.0005295466715674799,
      "loss": 3.1328,
      "step": 51302
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.039459466934204,
      "learning_rate": 0.0005295440378563394,
      "loss": 2.8772,
      "step": 51303
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.109048366546631,
      "learning_rate": 0.0005295414041025224,
      "loss": 2.858,
      "step": 51304
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7105624675750732,
      "learning_rate": 0.0005295387703060293,
      "loss": 3.1698,
      "step": 51305
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.176520586013794,
      "learning_rate": 0.0005295361364668608,
      "loss": 3.0073,
      "step": 51306
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.7839128971099854,
      "learning_rate": 0.0005295335025850169,
      "loss": 3.0187,
      "step": 51307
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.493172287940979,
      "learning_rate": 0.0005295308686604985,
      "loss": 2.8924,
      "step": 51308
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2081708908081055,
      "learning_rate": 0.000529528234693306,
      "loss": 3.0554,
      "step": 51309
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.754514694213867,
      "learning_rate": 0.0005295256006834399,
      "loss": 3.1888,
      "step": 51310
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.0030932426452637,
      "learning_rate": 0.0005295229666309005,
      "loss": 3.096,
      "step": 51311
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9168901443481445,
      "learning_rate": 0.0005295203325356887,
      "loss": 3.1705,
      "step": 51312
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5323102474212646,
      "learning_rate": 0.0005295176983978045,
      "loss": 2.9956,
      "step": 51313
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5195070505142212,
      "learning_rate": 0.0005295150642172489,
      "loss": 3.0914,
      "step": 51314
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5718743801116943,
      "learning_rate": 0.0005295124299940218,
      "loss": 2.9769,
      "step": 51315
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7202743291854858,
      "learning_rate": 0.0005295097957281242,
      "loss": 3.1251,
      "step": 51316
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6730679273605347,
      "learning_rate": 0.0005295071614195562,
      "loss": 2.8664,
      "step": 51317
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4299896955490112,
      "learning_rate": 0.0005295045270683188,
      "loss": 3.0923,
      "step": 51318
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.02249813079834,
      "learning_rate": 0.000529501892674412,
      "loss": 3.1226,
      "step": 51319
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.595518946647644,
      "learning_rate": 0.0005294992582378364,
      "loss": 3.0411,
      "step": 51320
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2732397317886353,
      "learning_rate": 0.0005294966237585927,
      "loss": 3.1709,
      "step": 51321
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3976914882659912,
      "learning_rate": 0.0005294939892366812,
      "loss": 3.0549,
      "step": 51322
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2955331802368164,
      "learning_rate": 0.0005294913546721024,
      "loss": 3.2133,
      "step": 51323
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7198076248168945,
      "learning_rate": 0.0005294887200648569,
      "loss": 2.9211,
      "step": 51324
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3739947080612183,
      "learning_rate": 0.000529486085414945,
      "loss": 3.159,
      "step": 51325
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.135047197341919,
      "learning_rate": 0.0005294834507223673,
      "loss": 3.0284,
      "step": 51326
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.737685203552246,
      "learning_rate": 0.0005294808159871244,
      "loss": 3.1863,
      "step": 51327
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.511534333229065,
      "learning_rate": 0.0005294781812092167,
      "loss": 3.0453,
      "step": 51328
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4688154458999634,
      "learning_rate": 0.0005294755463886446,
      "loss": 3.2637,
      "step": 51329
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.9142558574676514,
      "learning_rate": 0.0005294729115254087,
      "loss": 3.1184,
      "step": 51330
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9440248012542725,
      "learning_rate": 0.0005294702766195094,
      "loss": 3.0167,
      "step": 51331
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.645848035812378,
      "learning_rate": 0.0005294676416709472,
      "loss": 3.1801,
      "step": 51332
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7073726654052734,
      "learning_rate": 0.0005294650066797228,
      "loss": 3.2063,
      "step": 51333
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.6824324131011963,
      "learning_rate": 0.0005294623716458364,
      "loss": 3.1517,
      "step": 51334
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8377131223678589,
      "learning_rate": 0.0005294597365692887,
      "loss": 3.0208,
      "step": 51335
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5588200092315674,
      "learning_rate": 0.00052945710145008,
      "loss": 2.7997,
      "step": 51336
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9510904550552368,
      "learning_rate": 0.0005294544662882109,
      "loss": 3.1181,
      "step": 51337
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5080510377883911,
      "learning_rate": 0.0005294518310836819,
      "loss": 3.0566,
      "step": 51338
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3999171257019043,
      "learning_rate": 0.0005294491958364936,
      "loss": 3.0212,
      "step": 51339
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7266480922698975,
      "learning_rate": 0.0005294465605466462,
      "loss": 3.0451,
      "step": 51340
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4801654815673828,
      "learning_rate": 0.0005294439252141404,
      "loss": 3.1531,
      "step": 51341
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.656535029411316,
      "learning_rate": 0.0005294412898389767,
      "loss": 3.1258,
      "step": 51342
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.483547568321228,
      "learning_rate": 0.0005294386544211555,
      "loss": 2.7771,
      "step": 51343
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4532655477523804,
      "learning_rate": 0.0005294360189606773,
      "loss": 3.1482,
      "step": 51344
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7544769048690796,
      "learning_rate": 0.0005294333834575427,
      "loss": 2.7529,
      "step": 51345
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.731667160987854,
      "learning_rate": 0.000529430747911752,
      "loss": 3.0362,
      "step": 51346
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6227458715438843,
      "learning_rate": 0.0005294281123233058,
      "loss": 2.8445,
      "step": 51347
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3278632164001465,
      "learning_rate": 0.0005294254766922046,
      "loss": 3.0875,
      "step": 51348
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8941411972045898,
      "learning_rate": 0.0005294228410184489,
      "loss": 3.3772,
      "step": 51349
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9390052556991577,
      "learning_rate": 0.0005294202053020392,
      "loss": 3.0995,
      "step": 51350
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.595165729522705,
      "learning_rate": 0.0005294175695429758,
      "loss": 3.1665,
      "step": 51351
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.013231039047241,
      "learning_rate": 0.0005294149337412595,
      "loss": 2.9723,
      "step": 51352
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7929743528366089,
      "learning_rate": 0.0005294122978968906,
      "loss": 2.8382,
      "step": 51353
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4916571378707886,
      "learning_rate": 0.0005294096620098696,
      "loss": 3.0303,
      "step": 51354
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.346832036972046,
      "learning_rate": 0.000529407026080197,
      "loss": 3.0674,
      "step": 51355
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6265549659729004,
      "learning_rate": 0.0005294043901078733,
      "loss": 3.0718,
      "step": 51356
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9268929958343506,
      "learning_rate": 0.000529401754092899,
      "loss": 3.0146,
      "step": 51357
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.787736415863037,
      "learning_rate": 0.0005293991180352746,
      "loss": 2.9292,
      "step": 51358
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5225086212158203,
      "learning_rate": 0.0005293964819350006,
      "loss": 3.199,
      "step": 51359
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9189313650131226,
      "learning_rate": 0.0005293938457920774,
      "loss": 3.0006,
      "step": 51360
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5513375997543335,
      "learning_rate": 0.0005293912096065056,
      "loss": 3.1138,
      "step": 51361
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8312368392944336,
      "learning_rate": 0.0005293885733782856,
      "loss": 2.9368,
      "step": 51362
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.357605218887329,
      "learning_rate": 0.000529385937107418,
      "loss": 2.9702,
      "step": 51363
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.011399269104004,
      "learning_rate": 0.0005293833007939032,
      "loss": 2.8386,
      "step": 51364
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5417287349700928,
      "learning_rate": 0.0005293806644377417,
      "loss": 2.9896,
      "step": 51365
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2045273780822754,
      "learning_rate": 0.000529378028038934,
      "loss": 3.1733,
      "step": 51366
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.43888258934021,
      "learning_rate": 0.0005293753915974806,
      "loss": 2.9552,
      "step": 51367
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.893802285194397,
      "learning_rate": 0.0005293727551133819,
      "loss": 2.9387,
      "step": 51368
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6280739307403564,
      "learning_rate": 0.0005293701185866387,
      "loss": 3.1733,
      "step": 51369
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.562238097190857,
      "learning_rate": 0.0005293674820172511,
      "loss": 3.0211,
      "step": 51370
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3274199962615967,
      "learning_rate": 0.0005293648454052197,
      "loss": 3.0896,
      "step": 51371
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.043370246887207,
      "learning_rate": 0.0005293622087505452,
      "loss": 3.3507,
      "step": 51372
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4952211380004883,
      "learning_rate": 0.0005293595720532279,
      "loss": 3.0898,
      "step": 51373
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.543952226638794,
      "learning_rate": 0.0005293569353132682,
      "loss": 3.0134,
      "step": 51374
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.258589744567871,
      "learning_rate": 0.0005293542985306668,
      "loss": 2.9086,
      "step": 51375
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7788244485855103,
      "learning_rate": 0.0005293516617054241,
      "loss": 3.0047,
      "step": 51376
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4848825931549072,
      "learning_rate": 0.0005293490248375406,
      "loss": 2.8748,
      "step": 51377
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.426120638847351,
      "learning_rate": 0.0005293463879270168,
      "loss": 3.2415,
      "step": 51378
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.4270544052124023,
      "learning_rate": 0.0005293437509738532,
      "loss": 2.8579,
      "step": 51379
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4356348514556885,
      "learning_rate": 0.0005293411139780502,
      "loss": 3.0982,
      "step": 51380
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8694061040878296,
      "learning_rate": 0.0005293384769396084,
      "loss": 3.0896,
      "step": 51381
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.671788215637207,
      "learning_rate": 0.0005293358398585283,
      "loss": 2.973,
      "step": 51382
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3599207401275635,
      "learning_rate": 0.0005293332027348103,
      "loss": 3.158,
      "step": 51383
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9949175119400024,
      "learning_rate": 0.000529330565568455,
      "loss": 2.9647,
      "step": 51384
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3099991083145142,
      "learning_rate": 0.0005293279283594627,
      "loss": 3.2126,
      "step": 51385
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8396002054214478,
      "learning_rate": 0.000529325291107834,
      "loss": 2.9526,
      "step": 51386
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7921593189239502,
      "learning_rate": 0.0005293226538135694,
      "loss": 3.0477,
      "step": 51387
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.944248080253601,
      "learning_rate": 0.0005293200164766694,
      "loss": 2.9043,
      "step": 51388
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.7157013416290283,
      "learning_rate": 0.0005293173790971346,
      "loss": 2.9661,
      "step": 51389
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5629808902740479,
      "learning_rate": 0.0005293147416749653,
      "loss": 3.2527,
      "step": 51390
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5601906776428223,
      "learning_rate": 0.000529312104210162,
      "loss": 3.3593,
      "step": 51391
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.1089136600494385,
      "learning_rate": 0.0005293094667027253,
      "loss": 3.2741,
      "step": 51392
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5298610925674438,
      "learning_rate": 0.0005293068291526556,
      "loss": 3.0614,
      "step": 51393
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3202126026153564,
      "learning_rate": 0.0005293041915599535,
      "loss": 3.0702,
      "step": 51394
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4646737575531006,
      "learning_rate": 0.0005293015539246194,
      "loss": 2.8056,
      "step": 51395
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3973965644836426,
      "learning_rate": 0.0005292989162466537,
      "loss": 2.9184,
      "step": 51396
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4735208749771118,
      "learning_rate": 0.0005292962785260572,
      "loss": 3.0009,
      "step": 51397
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4510557651519775,
      "learning_rate": 0.0005292936407628301,
      "loss": 3.0091,
      "step": 51398
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7339988946914673,
      "learning_rate": 0.000529291002956973,
      "loss": 3.0231,
      "step": 51399
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5837600231170654,
      "learning_rate": 0.0005292883651084864,
      "loss": 2.9751,
      "step": 51400
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.1529295444488525,
      "learning_rate": 0.0005292857272173706,
      "loss": 3.204,
      "step": 51401
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6429463624954224,
      "learning_rate": 0.0005292830892836264,
      "loss": 3.0776,
      "step": 51402
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2864458560943604,
      "learning_rate": 0.0005292804513072541,
      "loss": 2.9927,
      "step": 51403
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.690651297569275,
      "learning_rate": 0.0005292778132882543,
      "loss": 2.8236,
      "step": 51404
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5281344652175903,
      "learning_rate": 0.0005292751752266274,
      "loss": 3.0643,
      "step": 51405
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5637640953063965,
      "learning_rate": 0.0005292725371223738,
      "loss": 3.0623,
      "step": 51406
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4823176860809326,
      "learning_rate": 0.0005292698989754943,
      "loss": 3.0928,
      "step": 51407
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5369545221328735,
      "learning_rate": 0.0005292672607859891,
      "loss": 3.2003,
      "step": 51408
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.05993390083313,
      "learning_rate": 0.0005292646225538588,
      "loss": 3.1778,
      "step": 51409
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9646168947219849,
      "learning_rate": 0.0005292619842791038,
      "loss": 2.9869,
      "step": 51410
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7075837850570679,
      "learning_rate": 0.0005292593459617248,
      "loss": 3.097,
      "step": 51411
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.362885594367981,
      "learning_rate": 0.0005292567076017221,
      "loss": 3.2563,
      "step": 51412
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.014455795288086,
      "learning_rate": 0.0005292540691990963,
      "loss": 2.8672,
      "step": 51413
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.317738652229309,
      "learning_rate": 0.0005292514307538477,
      "loss": 3.1311,
      "step": 51414
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.286348581314087,
      "learning_rate": 0.0005292487922659772,
      "loss": 3.0449,
      "step": 51415
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4685295820236206,
      "learning_rate": 0.0005292461537354848,
      "loss": 3.0298,
      "step": 51416
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6283718347549438,
      "learning_rate": 0.0005292435151623712,
      "loss": 2.9829,
      "step": 51417
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9297174215316772,
      "learning_rate": 0.0005292408765466371,
      "loss": 2.9293,
      "step": 51418
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.632059931755066,
      "learning_rate": 0.0005292382378882825,
      "loss": 3.0782,
      "step": 51419
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4837219715118408,
      "learning_rate": 0.0005292355991873085,
      "loss": 2.8722,
      "step": 51420
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8407889604568481,
      "learning_rate": 0.000529232960443715,
      "loss": 3.1323,
      "step": 51421
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5552562475204468,
      "learning_rate": 0.0005292303216575029,
      "loss": 2.8742,
      "step": 51422
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.421479344367981,
      "learning_rate": 0.0005292276828286726,
      "loss": 2.7966,
      "step": 51423
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.414414882659912,
      "learning_rate": 0.0005292250439572245,
      "loss": 3.2624,
      "step": 51424
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5004526376724243,
      "learning_rate": 0.000529222405043159,
      "loss": 3.1369,
      "step": 51425
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2670724391937256,
      "learning_rate": 0.000529219766086477,
      "loss": 3.0495,
      "step": 51426
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.888222098350525,
      "learning_rate": 0.0005292171270871785,
      "loss": 3.1994,
      "step": 51427
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.6395721435546875,
      "learning_rate": 0.0005292144880452643,
      "loss": 3.0251,
      "step": 51428
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2254459857940674,
      "learning_rate": 0.0005292118489607347,
      "loss": 2.9235,
      "step": 51429
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4555714130401611,
      "learning_rate": 0.0005292092098335905,
      "loss": 3.07,
      "step": 51430
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.775028944015503,
      "learning_rate": 0.0005292065706638318,
      "loss": 3.1239,
      "step": 51431
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.8219382762908936,
      "learning_rate": 0.0005292039314514594,
      "loss": 2.9226,
      "step": 51432
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.799939513206482,
      "learning_rate": 0.0005292012921964735,
      "loss": 3.1278,
      "step": 51433
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.3200085163116455,
      "learning_rate": 0.0005291986528988749,
      "loss": 3.0409,
      "step": 51434
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.998993396759033,
      "learning_rate": 0.0005291960135586637,
      "loss": 3.1809,
      "step": 51435
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.584231376647949,
      "learning_rate": 0.000529193374175841,
      "loss": 3.227,
      "step": 51436
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7186168432235718,
      "learning_rate": 0.0005291907347504067,
      "loss": 3.0144,
      "step": 51437
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.000769853591919,
      "learning_rate": 0.0005291880952823615,
      "loss": 3.0481,
      "step": 51438
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8788589239120483,
      "learning_rate": 0.000529185455771706,
      "loss": 3.0293,
      "step": 51439
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9500176906585693,
      "learning_rate": 0.0005291828162184406,
      "loss": 2.8703,
      "step": 51440
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.115095615386963,
      "learning_rate": 0.0005291801766225657,
      "loss": 3.1665,
      "step": 51441
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.260037660598755,
      "learning_rate": 0.000529177536984082,
      "loss": 3.3678,
      "step": 51442
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2812485694885254,
      "learning_rate": 0.0005291748973029898,
      "loss": 2.9382,
      "step": 51443
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5760705471038818,
      "learning_rate": 0.0005291722575792896,
      "loss": 3.1181,
      "step": 51444
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.1983985900878906,
      "learning_rate": 0.0005291696178129821,
      "loss": 3.0707,
      "step": 51445
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.748476028442383,
      "learning_rate": 0.0005291669780040676,
      "loss": 3.0925,
      "step": 51446
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.651724934577942,
      "learning_rate": 0.0005291643381525465,
      "loss": 3.2064,
      "step": 51447
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3609951734542847,
      "learning_rate": 0.0005291616982584195,
      "loss": 3.0425,
      "step": 51448
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.220529794692993,
      "learning_rate": 0.000529159058321687,
      "loss": 3.1031,
      "step": 51449
    },
    {
      "epoch": 0.67,
      "grad_norm": 4.277745723724365,
      "learning_rate": 0.0005291564183423496,
      "loss": 2.8808,
      "step": 51450
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6871517896652222,
      "learning_rate": 0.0005291537783204076,
      "loss": 2.9316,
      "step": 51451
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.785117745399475,
      "learning_rate": 0.0005291511382558616,
      "loss": 3.1408,
      "step": 51452
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.184009313583374,
      "learning_rate": 0.0005291484981487121,
      "loss": 3.0451,
      "step": 51453
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6961150169372559,
      "learning_rate": 0.0005291458579989595,
      "loss": 3.1887,
      "step": 51454
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4979997873306274,
      "learning_rate": 0.0005291432178066045,
      "loss": 3.0825,
      "step": 51455
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9018796682357788,
      "learning_rate": 0.0005291405775716473,
      "loss": 3.0928,
      "step": 51456
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9694308042526245,
      "learning_rate": 0.0005291379372940886,
      "loss": 3.0036,
      "step": 51457
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5392014980316162,
      "learning_rate": 0.0005291352969739289,
      "loss": 2.795,
      "step": 51458
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7888462543487549,
      "learning_rate": 0.0005291326566111686,
      "loss": 3.05,
      "step": 51459
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.370896100997925,
      "learning_rate": 0.0005291300162058082,
      "loss": 3.1554,
      "step": 51460
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7450647354125977,
      "learning_rate": 0.0005291273757578481,
      "loss": 2.9828,
      "step": 51461
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.900761604309082,
      "learning_rate": 0.0005291247352672891,
      "loss": 2.8904,
      "step": 51462
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.1125106811523438,
      "learning_rate": 0.0005291220947341313,
      "loss": 3.068,
      "step": 51463
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.552886486053467,
      "learning_rate": 0.0005291194541583755,
      "loss": 2.8965,
      "step": 51464
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4914705753326416,
      "learning_rate": 0.0005291168135400221,
      "loss": 2.7538,
      "step": 51465
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8852251768112183,
      "learning_rate": 0.0005291141728790713,
      "loss": 2.8954,
      "step": 51466
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.314826250076294,
      "learning_rate": 0.0005291115321755241,
      "loss": 3.1338,
      "step": 51467
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.0664429664611816,
      "learning_rate": 0.0005291088914293806,
      "loss": 3.0218,
      "step": 51468
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3900401592254639,
      "learning_rate": 0.0005291062506406415,
      "loss": 3.1267,
      "step": 51469
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.1431167125701904,
      "learning_rate": 0.0005291036098093073,
      "loss": 3.1552,
      "step": 51470
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2725672721862793,
      "learning_rate": 0.0005291009689353783,
      "loss": 3.2374,
      "step": 51471
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.5008747577667236,
      "learning_rate": 0.0005290983280188551,
      "loss": 3.06,
      "step": 51472
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.737958312034607,
      "learning_rate": 0.0005290956870597384,
      "loss": 2.8846,
      "step": 51473
    },
    {
      "epoch": 0.67,
      "grad_norm": 4.104147911071777,
      "learning_rate": 0.0005290930460580282,
      "loss": 3.0252,
      "step": 51474
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.6129486560821533,
      "learning_rate": 0.0005290904050137254,
      "loss": 3.0892,
      "step": 51475
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4182847738265991,
      "learning_rate": 0.0005290877639268305,
      "loss": 2.9438,
      "step": 51476
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.3747756481170654,
      "learning_rate": 0.0005290851227973436,
      "loss": 3.0605,
      "step": 51477
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5150505304336548,
      "learning_rate": 0.0005290824816252655,
      "loss": 3.2202,
      "step": 51478
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6147878170013428,
      "learning_rate": 0.0005290798404105969,
      "loss": 3.0972,
      "step": 51479
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5548714399337769,
      "learning_rate": 0.0005290771991533378,
      "loss": 2.8937,
      "step": 51480
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8001683950424194,
      "learning_rate": 0.000529074557853489,
      "loss": 3.0693,
      "step": 51481
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.459886908531189,
      "learning_rate": 0.0005290719165110509,
      "loss": 3.1278,
      "step": 51482
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5652579069137573,
      "learning_rate": 0.000529069275126024,
      "loss": 2.7346,
      "step": 51483
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8156284093856812,
      "learning_rate": 0.0005290666336984088,
      "loss": 2.9809,
      "step": 51484
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3986347913742065,
      "learning_rate": 0.0005290639922282058,
      "loss": 2.7825,
      "step": 51485
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.520301103591919,
      "learning_rate": 0.0005290613507154154,
      "loss": 3.1235,
      "step": 51486
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.457330584526062,
      "learning_rate": 0.0005290587091600383,
      "loss": 2.9665,
      "step": 51487
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7411881685256958,
      "learning_rate": 0.0005290560675620748,
      "loss": 3.1315,
      "step": 51488
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9339549541473389,
      "learning_rate": 0.0005290534259215255,
      "loss": 2.844,
      "step": 51489
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.863553524017334,
      "learning_rate": 0.0005290507842383908,
      "loss": 3.1147,
      "step": 51490
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.295284390449524,
      "learning_rate": 0.0005290481425126712,
      "loss": 3.1693,
      "step": 51491
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.1426076889038086,
      "learning_rate": 0.0005290455007443673,
      "loss": 2.9094,
      "step": 51492
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8514577150344849,
      "learning_rate": 0.0005290428589334794,
      "loss": 3.1541,
      "step": 51493
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5574071407318115,
      "learning_rate": 0.0005290402170800083,
      "loss": 3.147,
      "step": 51494
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4626612663269043,
      "learning_rate": 0.0005290375751839542,
      "loss": 2.9004,
      "step": 51495
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.376093864440918,
      "learning_rate": 0.0005290349332453177,
      "loss": 2.967,
      "step": 51496
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9552186727523804,
      "learning_rate": 0.0005290322912640992,
      "loss": 3.2546,
      "step": 51497
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.185667037963867,
      "learning_rate": 0.0005290296492402993,
      "loss": 2.975,
      "step": 51498
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.779950737953186,
      "learning_rate": 0.0005290270071739186,
      "loss": 2.9648,
      "step": 51499
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.1492557525634766,
      "learning_rate": 0.0005290243650649573,
      "loss": 3.2189,
      "step": 51500
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.2004220485687256,
      "learning_rate": 0.0005290217229134162,
      "loss": 3.166,
      "step": 51501
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.575258731842041,
      "learning_rate": 0.0005290190807192954,
      "loss": 3.1673,
      "step": 51502
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7656201124191284,
      "learning_rate": 0.0005290164384825958,
      "loss": 2.9924,
      "step": 51503
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.4068756103515625,
      "learning_rate": 0.0005290137962033178,
      "loss": 3.2377,
      "step": 51504
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.444852590560913,
      "learning_rate": 0.0005290111538814617,
      "loss": 2.9034,
      "step": 51505
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.6891918182373047,
      "learning_rate": 0.0005290085115170281,
      "loss": 3.2253,
      "step": 51506
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.2905991077423096,
      "learning_rate": 0.0005290058691100175,
      "loss": 3.014,
      "step": 51507
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4355522394180298,
      "learning_rate": 0.0005290032266604304,
      "loss": 3.0134,
      "step": 51508
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.8722341060638428,
      "learning_rate": 0.0005290005841682673,
      "loss": 2.9845,
      "step": 51509
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.7618675231933594,
      "learning_rate": 0.0005289979416335287,
      "loss": 3.0775,
      "step": 51510
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.09167218208313,
      "learning_rate": 0.0005289952990562149,
      "loss": 3.0545,
      "step": 51511
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7903711795806885,
      "learning_rate": 0.0005289926564363267,
      "loss": 2.9135,
      "step": 51512
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4418681859970093,
      "learning_rate": 0.0005289900137738644,
      "loss": 2.8323,
      "step": 51513
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.952484130859375,
      "learning_rate": 0.0005289873710688285,
      "loss": 3.0754,
      "step": 51514
    },
    {
      "epoch": 0.67,
      "grad_norm": 4.347207546234131,
      "learning_rate": 0.0005289847283212196,
      "loss": 2.9749,
      "step": 51515
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.4470789432525635,
      "learning_rate": 0.0005289820855310381,
      "loss": 2.8846,
      "step": 51516
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9158376455307007,
      "learning_rate": 0.0005289794426982844,
      "loss": 3.0878,
      "step": 51517
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.696531891822815,
      "learning_rate": 0.0005289767998229592,
      "loss": 3.0943,
      "step": 51518
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.9719226360321045,
      "learning_rate": 0.0005289741569050629,
      "loss": 3.0536,
      "step": 51519
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.814718246459961,
      "learning_rate": 0.0005289715139445959,
      "loss": 3.0338,
      "step": 51520
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.9612646102905273,
      "learning_rate": 0.0005289688709415589,
      "loss": 3.1963,
      "step": 51521
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.795853853225708,
      "learning_rate": 0.0005289662278959522,
      "loss": 3.0387,
      "step": 51522
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8288265466690063,
      "learning_rate": 0.0005289635848077762,
      "loss": 3.0431,
      "step": 51523
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.5933876037597656,
      "learning_rate": 0.0005289609416770318,
      "loss": 3.2373,
      "step": 51524
    },
    {
      "epoch": 0.67,
      "grad_norm": 4.239521026611328,
      "learning_rate": 0.0005289582985037191,
      "loss": 3.0485,
      "step": 51525
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.6539530754089355,
      "learning_rate": 0.0005289556552878388,
      "loss": 3.2111,
      "step": 51526
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.964692234992981,
      "learning_rate": 0.0005289530120293912,
      "loss": 3.0468,
      "step": 51527
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.919411540031433,
      "learning_rate": 0.0005289503687283771,
      "loss": 3.2847,
      "step": 51528
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.5502989292144775,
      "learning_rate": 0.0005289477253847967,
      "loss": 3.105,
      "step": 51529
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.6819679737091064,
      "learning_rate": 0.0005289450819986507,
      "loss": 3.1124,
      "step": 51530
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.4351389408111572,
      "learning_rate": 0.0005289424385699393,
      "loss": 2.9564,
      "step": 51531
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9208871126174927,
      "learning_rate": 0.0005289397950986632,
      "loss": 2.9501,
      "step": 51532
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.825637936592102,
      "learning_rate": 0.000528937151584823,
      "loss": 2.9357,
      "step": 51533
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.7925422191619873,
      "learning_rate": 0.0005289345080284189,
      "loss": 3.1828,
      "step": 51534
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.9985926151275635,
      "learning_rate": 0.0005289318644294517,
      "loss": 2.6418,
      "step": 51535
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9282759428024292,
      "learning_rate": 0.0005289292207879217,
      "loss": 3.0525,
      "step": 51536
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5745600461959839,
      "learning_rate": 0.0005289265771038294,
      "loss": 3.2489,
      "step": 51537
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.4076285362243652,
      "learning_rate": 0.0005289239333771754,
      "loss": 3.0672,
      "step": 51538
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.395965814590454,
      "learning_rate": 0.00052892128960796,
      "loss": 3.2333,
      "step": 51539
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5568510293960571,
      "learning_rate": 0.000528918645796184,
      "loss": 2.964,
      "step": 51540
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6080539226531982,
      "learning_rate": 0.0005289160019418476,
      "loss": 3.0307,
      "step": 51541
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6742056608200073,
      "learning_rate": 0.0005289133580449513,
      "loss": 2.975,
      "step": 51542
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6781312227249146,
      "learning_rate": 0.0005289107141054958,
      "loss": 3.0172,
      "step": 51543
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2895561456680298,
      "learning_rate": 0.0005289080701234815,
      "loss": 3.1864,
      "step": 51544
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3216884136199951,
      "learning_rate": 0.0005289054260989087,
      "loss": 3.047,
      "step": 51545
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5041698217391968,
      "learning_rate": 0.0005289027820317784,
      "loss": 2.9653,
      "step": 51546
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.147775888442993,
      "learning_rate": 0.0005289001379220904,
      "loss": 2.8209,
      "step": 51547
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5364452600479126,
      "learning_rate": 0.0005288974937698457,
      "loss": 3.2263,
      "step": 51548
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7870205640792847,
      "learning_rate": 0.0005288948495750446,
      "loss": 3.1204,
      "step": 51549
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5595322847366333,
      "learning_rate": 0.0005288922053376877,
      "loss": 3.2402,
      "step": 51550
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3751105070114136,
      "learning_rate": 0.0005288895610577753,
      "loss": 3.0185,
      "step": 51551
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5540189743041992,
      "learning_rate": 0.0005288869167353081,
      "loss": 2.9658,
      "step": 51552
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6084271669387817,
      "learning_rate": 0.0005288842723702865,
      "loss": 3.3776,
      "step": 51553
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4806902408599854,
      "learning_rate": 0.0005288816279627109,
      "loss": 3.0353,
      "step": 51554
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.0156893730163574,
      "learning_rate": 0.0005288789835125819,
      "loss": 3.218,
      "step": 51555
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5393564701080322,
      "learning_rate": 0.0005288763390199,
      "loss": 3.3619,
      "step": 51556
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4709100723266602,
      "learning_rate": 0.0005288736944846658,
      "loss": 3.0133,
      "step": 51557
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4749712944030762,
      "learning_rate": 0.0005288710499068796,
      "loss": 2.7854,
      "step": 51558
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9868851900100708,
      "learning_rate": 0.0005288684052865419,
      "loss": 2.9645,
      "step": 51559
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7226028442382812,
      "learning_rate": 0.0005288657606236531,
      "loss": 3.2512,
      "step": 51560
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4015759229660034,
      "learning_rate": 0.000528863115918214,
      "loss": 2.8791,
      "step": 51561
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4649685621261597,
      "learning_rate": 0.0005288604711702249,
      "loss": 2.8712,
      "step": 51562
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9519983530044556,
      "learning_rate": 0.0005288578263796863,
      "loss": 3.233,
      "step": 51563
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2625300884246826,
      "learning_rate": 0.0005288551815465989,
      "loss": 2.9293,
      "step": 51564
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.1029176712036133,
      "learning_rate": 0.0005288525366709628,
      "loss": 3.1728,
      "step": 51565
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.0224993228912354,
      "learning_rate": 0.0005288498917527787,
      "loss": 2.9974,
      "step": 51566
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.3144829273223877,
      "learning_rate": 0.0005288472467920471,
      "loss": 2.917,
      "step": 51567
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8607041835784912,
      "learning_rate": 0.0005288446017887686,
      "loss": 3.0335,
      "step": 51568
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.4209933280944824,
      "learning_rate": 0.0005288419567429434,
      "loss": 3.188,
      "step": 51569
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8258401155471802,
      "learning_rate": 0.0005288393116545722,
      "loss": 3.0913,
      "step": 51570
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7336872816085815,
      "learning_rate": 0.0005288366665236555,
      "loss": 3.0604,
      "step": 51571
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.34304940700531,
      "learning_rate": 0.0005288340213501936,
      "loss": 2.9657,
      "step": 51572
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4732258319854736,
      "learning_rate": 0.0005288313761341874,
      "loss": 3.2278,
      "step": 51573
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9593825340270996,
      "learning_rate": 0.0005288287308756369,
      "loss": 3.0357,
      "step": 51574
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.359884738922119,
      "learning_rate": 0.0005288260855745429,
      "loss": 2.9639,
      "step": 51575
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5296870470046997,
      "learning_rate": 0.0005288234402309059,
      "loss": 3.0049,
      "step": 51576
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9557064771652222,
      "learning_rate": 0.0005288207948447261,
      "loss": 3.1619,
      "step": 51577
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8600773811340332,
      "learning_rate": 0.0005288181494160043,
      "loss": 3.1183,
      "step": 51578
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8152999877929688,
      "learning_rate": 0.0005288155039447409,
      "loss": 2.9091,
      "step": 51579
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5200774669647217,
      "learning_rate": 0.0005288128584309365,
      "loss": 2.6922,
      "step": 51580
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4343132972717285,
      "learning_rate": 0.0005288102128745913,
      "loss": 3.094,
      "step": 51581
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9213706254959106,
      "learning_rate": 0.000528807567275706,
      "loss": 3.0742,
      "step": 51582
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.87228524684906,
      "learning_rate": 0.0005288049216342811,
      "loss": 2.8438,
      "step": 51583
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8236253261566162,
      "learning_rate": 0.0005288022759503169,
      "loss": 2.8156,
      "step": 51584
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7609176635742188,
      "learning_rate": 0.0005287996302238142,
      "loss": 2.9941,
      "step": 51585
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.053863525390625,
      "learning_rate": 0.0005287969844547732,
      "loss": 3.2297,
      "step": 51586
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4840319156646729,
      "learning_rate": 0.0005287943386431947,
      "loss": 3.0072,
      "step": 51587
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8214828968048096,
      "learning_rate": 0.000528791692789079,
      "loss": 3.3299,
      "step": 51588
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7407366037368774,
      "learning_rate": 0.0005287890468924264,
      "loss": 3.0996,
      "step": 51589
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7543786764144897,
      "learning_rate": 0.0005287864009532377,
      "loss": 2.9821,
      "step": 51590
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9729470014572144,
      "learning_rate": 0.0005287837549715134,
      "loss": 3.042,
      "step": 51591
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.177546262741089,
      "learning_rate": 0.0005287811089472536,
      "loss": 3.0023,
      "step": 51592
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.588765025138855,
      "learning_rate": 0.0005287784628804594,
      "loss": 2.9121,
      "step": 51593
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8233369588851929,
      "learning_rate": 0.0005287758167711308,
      "loss": 3.1645,
      "step": 51594
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.219717025756836,
      "learning_rate": 0.0005287731706192685,
      "loss": 3.0829,
      "step": 51595
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6308244466781616,
      "learning_rate": 0.0005287705244248729,
      "loss": 3.0962,
      "step": 51596
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.4122183322906494,
      "learning_rate": 0.0005287678781879446,
      "loss": 3.0035,
      "step": 51597
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.136042356491089,
      "learning_rate": 0.000528765231908484,
      "loss": 2.8694,
      "step": 51598
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3087046146392822,
      "learning_rate": 0.0005287625855864916,
      "loss": 2.982,
      "step": 51599
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7908849716186523,
      "learning_rate": 0.000528759939221968,
      "loss": 3.1145,
      "step": 51600
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6784403324127197,
      "learning_rate": 0.0005287572928149135,
      "loss": 3.2773,
      "step": 51601
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4261761903762817,
      "learning_rate": 0.0005287546463653288,
      "loss": 3.0451,
      "step": 51602
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7239959239959717,
      "learning_rate": 0.0005287519998732142,
      "loss": 3.1353,
      "step": 51603
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4172230958938599,
      "learning_rate": 0.0005287493533385704,
      "loss": 3.0745,
      "step": 51604
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3984928131103516,
      "learning_rate": 0.0005287467067613976,
      "loss": 2.9115,
      "step": 51605
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2821342945098877,
      "learning_rate": 0.0005287440601416967,
      "loss": 3.0061,
      "step": 51606
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8683221340179443,
      "learning_rate": 0.0005287414134794678,
      "loss": 3.1749,
      "step": 51607
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5961261987686157,
      "learning_rate": 0.0005287387667747116,
      "loss": 2.8872,
      "step": 51608
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5173699855804443,
      "learning_rate": 0.0005287361200274286,
      "loss": 2.8148,
      "step": 51609
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.580150842666626,
      "learning_rate": 0.0005287334732376191,
      "loss": 3.0426,
      "step": 51610
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6201317310333252,
      "learning_rate": 0.0005287308264052838,
      "loss": 3.0288,
      "step": 51611
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5842632055282593,
      "learning_rate": 0.0005287281795304232,
      "loss": 2.8538,
      "step": 51612
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6796408891677856,
      "learning_rate": 0.0005287255326130376,
      "loss": 3.3171,
      "step": 51613
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9145790338516235,
      "learning_rate": 0.0005287228856531277,
      "loss": 3.167,
      "step": 51614
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4769515991210938,
      "learning_rate": 0.0005287202386506938,
      "loss": 3.436,
      "step": 51615
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4141008853912354,
      "learning_rate": 0.0005287175916057366,
      "loss": 2.9676,
      "step": 51616
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4987845420837402,
      "learning_rate": 0.0005287149445182565,
      "loss": 3.2113,
      "step": 51617
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6612671613693237,
      "learning_rate": 0.0005287122973882538,
      "loss": 2.9444,
      "step": 51618
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.029049873352051,
      "learning_rate": 0.0005287096502157293,
      "loss": 2.8336,
      "step": 51619
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6446924209594727,
      "learning_rate": 0.0005287070030006833,
      "loss": 2.9075,
      "step": 51620
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.508997917175293,
      "learning_rate": 0.0005287043557431163,
      "loss": 2.843,
      "step": 51621
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7991682291030884,
      "learning_rate": 0.000528701708443029,
      "loss": 2.9213,
      "step": 51622
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4546701908111572,
      "learning_rate": 0.0005286990611004216,
      "loss": 2.8274,
      "step": 51623
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7425379753112793,
      "learning_rate": 0.0005286964137152947,
      "loss": 2.8912,
      "step": 51624
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.4140262603759766,
      "learning_rate": 0.000528693766287649,
      "loss": 3.2077,
      "step": 51625
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9271622896194458,
      "learning_rate": 0.0005286911188174847,
      "loss": 2.9245,
      "step": 51626
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5736746788024902,
      "learning_rate": 0.0005286884713048024,
      "loss": 3.1217,
      "step": 51627
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6030455827713013,
      "learning_rate": 0.0005286858237496027,
      "loss": 3.2665,
      "step": 51628
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.356591820716858,
      "learning_rate": 0.0005286831761518859,
      "loss": 3.119,
      "step": 51629
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8487353324890137,
      "learning_rate": 0.0005286805285116525,
      "loss": 3.1697,
      "step": 51630
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7154432535171509,
      "learning_rate": 0.0005286778808289032,
      "loss": 2.9138,
      "step": 51631
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5988389253616333,
      "learning_rate": 0.0005286752331036383,
      "loss": 2.8786,
      "step": 51632
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.450667142868042,
      "learning_rate": 0.0005286725853358583,
      "loss": 3.2726,
      "step": 51633
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4662059545516968,
      "learning_rate": 0.0005286699375255639,
      "loss": 3.3926,
      "step": 51634
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5695056915283203,
      "learning_rate": 0.0005286672896727554,
      "loss": 2.8857,
      "step": 51635
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.153881549835205,
      "learning_rate": 0.0005286646417774333,
      "loss": 2.867,
      "step": 51636
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.36483895778656,
      "learning_rate": 0.000528661993839598,
      "loss": 2.9895,
      "step": 51637
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.737449049949646,
      "learning_rate": 0.0005286593458592503,
      "loss": 3.1401,
      "step": 51638
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9187859296798706,
      "learning_rate": 0.0005286566978363904,
      "loss": 3.2133,
      "step": 51639
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6887105703353882,
      "learning_rate": 0.000528654049771019,
      "loss": 2.8086,
      "step": 51640
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8698543310165405,
      "learning_rate": 0.0005286514016631366,
      "loss": 2.775,
      "step": 51641
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2099201679229736,
      "learning_rate": 0.0005286487535127434,
      "loss": 3.0544,
      "step": 51642
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.830870509147644,
      "learning_rate": 0.0005286461053198401,
      "loss": 3.0708,
      "step": 51643
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8998628854751587,
      "learning_rate": 0.0005286434570844272,
      "loss": 3.3495,
      "step": 51644
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9592175483703613,
      "learning_rate": 0.0005286408088065052,
      "loss": 3.1985,
      "step": 51645
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3647173643112183,
      "learning_rate": 0.0005286381604860745,
      "loss": 2.8793,
      "step": 51646
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7736831903457642,
      "learning_rate": 0.0005286355121231357,
      "loss": 2.8082,
      "step": 51647
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.091367244720459,
      "learning_rate": 0.0005286328637176892,
      "loss": 3.0481,
      "step": 51648
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.0030710697174072,
      "learning_rate": 0.0005286302152697355,
      "loss": 2.8735,
      "step": 51649
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4227455854415894,
      "learning_rate": 0.0005286275667792753,
      "loss": 2.9142,
      "step": 51650
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4761642217636108,
      "learning_rate": 0.0005286249182463087,
      "loss": 3.0373,
      "step": 51651
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4587234258651733,
      "learning_rate": 0.0005286222696708365,
      "loss": 2.7385,
      "step": 51652
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3907783031463623,
      "learning_rate": 0.0005286196210528591,
      "loss": 3.0472,
      "step": 51653
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.313470482826233,
      "learning_rate": 0.0005286169723923771,
      "loss": 3.0925,
      "step": 51654
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5964090824127197,
      "learning_rate": 0.0005286143236893908,
      "loss": 3.0185,
      "step": 51655
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6634784936904907,
      "learning_rate": 0.0005286116749439007,
      "loss": 3.2396,
      "step": 51656
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4396480321884155,
      "learning_rate": 0.0005286090261559075,
      "loss": 3.1232,
      "step": 51657
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2490670680999756,
      "learning_rate": 0.0005286063773254116,
      "loss": 2.778,
      "step": 51658
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3969470262527466,
      "learning_rate": 0.0005286037284524133,
      "loss": 2.9675,
      "step": 51659
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8137482404708862,
      "learning_rate": 0.0005286010795369134,
      "loss": 3.3171,
      "step": 51660
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7510172128677368,
      "learning_rate": 0.0005285984305789122,
      "loss": 3.1782,
      "step": 51661
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.114285945892334,
      "learning_rate": 0.0005285957815784101,
      "loss": 2.9878,
      "step": 51662
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3789901733398438,
      "learning_rate": 0.0005285931325354079,
      "loss": 3.1145,
      "step": 51663
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5867903232574463,
      "learning_rate": 0.0005285904834499059,
      "loss": 3.0482,
      "step": 51664
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5156835317611694,
      "learning_rate": 0.0005285878343219045,
      "loss": 2.8835,
      "step": 51665
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7198107242584229,
      "learning_rate": 0.0005285851851514044,
      "loss": 3.0792,
      "step": 51666
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.943595290184021,
      "learning_rate": 0.0005285825359384061,
      "loss": 3.024,
      "step": 51667
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.992329478263855,
      "learning_rate": 0.0005285798866829098,
      "loss": 3.1463,
      "step": 51668
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5966187715530396,
      "learning_rate": 0.0005285772373849162,
      "loss": 2.9861,
      "step": 51669
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3510489463806152,
      "learning_rate": 0.0005285745880444258,
      "loss": 2.8184,
      "step": 51670
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.3087193965911865,
      "learning_rate": 0.0005285719386614392,
      "loss": 2.9066,
      "step": 51671
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4430783987045288,
      "learning_rate": 0.0005285692892359567,
      "loss": 2.8793,
      "step": 51672
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5734057426452637,
      "learning_rate": 0.0005285666397679788,
      "loss": 3.3001,
      "step": 51673
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2942428588867188,
      "learning_rate": 0.000528563990257506,
      "loss": 2.9377,
      "step": 51674
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9332776069641113,
      "learning_rate": 0.000528561340704539,
      "loss": 3.1438,
      "step": 51675
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7506853342056274,
      "learning_rate": 0.000528558691109078,
      "loss": 3.1494,
      "step": 51676
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5960218906402588,
      "learning_rate": 0.0005285560414711236,
      "loss": 3.0484,
      "step": 51677
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5525774955749512,
      "learning_rate": 0.0005285533917906765,
      "loss": 3.1545,
      "step": 51678
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5376560688018799,
      "learning_rate": 0.0005285507420677369,
      "loss": 2.8244,
      "step": 51679
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5130934715270996,
      "learning_rate": 0.0005285480923023054,
      "loss": 3.052,
      "step": 51680
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.52150297164917,
      "learning_rate": 0.0005285454424943824,
      "loss": 3.2821,
      "step": 51681
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.383390188217163,
      "learning_rate": 0.0005285427926439686,
      "loss": 2.9262,
      "step": 51682
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6301867961883545,
      "learning_rate": 0.0005285401427510644,
      "loss": 3.1128,
      "step": 51683
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7309962511062622,
      "learning_rate": 0.0005285374928156703,
      "loss": 3.092,
      "step": 51684
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.69209623336792,
      "learning_rate": 0.0005285348428377866,
      "loss": 3.1001,
      "step": 51685
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4872560501098633,
      "learning_rate": 0.0005285321928174141,
      "loss": 2.8355,
      "step": 51686
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7285903692245483,
      "learning_rate": 0.0005285295427545532,
      "loss": 3.0465,
      "step": 51687
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.510851263999939,
      "learning_rate": 0.0005285268926492042,
      "loss": 3.0536,
      "step": 51688
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.773959755897522,
      "learning_rate": 0.0005285242425013678,
      "loss": 3.2,
      "step": 51689
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.670505166053772,
      "learning_rate": 0.0005285215923110444,
      "loss": 3.1575,
      "step": 51690
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7998671531677246,
      "learning_rate": 0.0005285189420782346,
      "loss": 2.8565,
      "step": 51691
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.181394338607788,
      "learning_rate": 0.0005285162918029387,
      "loss": 2.8405,
      "step": 51692
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.656947135925293,
      "learning_rate": 0.0005285136414851574,
      "loss": 3.1878,
      "step": 51693
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3994513750076294,
      "learning_rate": 0.0005285109911248911,
      "loss": 2.9911,
      "step": 51694
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.333688497543335,
      "learning_rate": 0.0005285083407221403,
      "loss": 2.951,
      "step": 51695
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.576823115348816,
      "learning_rate": 0.0005285056902769054,
      "loss": 3.2554,
      "step": 51696
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6278387308120728,
      "learning_rate": 0.000528503039789187,
      "loss": 3.1663,
      "step": 51697
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.140004873275757,
      "learning_rate": 0.0005285003892589856,
      "loss": 3.0035,
      "step": 51698
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6066426038742065,
      "learning_rate": 0.0005284977386863016,
      "loss": 2.9372,
      "step": 51699
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4245045185089111,
      "learning_rate": 0.0005284950880711357,
      "loss": 3.0975,
      "step": 51700
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.503781795501709,
      "learning_rate": 0.0005284924374134881,
      "loss": 3.162,
      "step": 51701
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.3733482360839844,
      "learning_rate": 0.0005284897867133595,
      "loss": 3.0781,
      "step": 51702
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7801533937454224,
      "learning_rate": 0.0005284871359707504,
      "loss": 3.0548,
      "step": 51703
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5279855728149414,
      "learning_rate": 0.0005284844851856613,
      "loss": 3.1092,
      "step": 51704
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8584461212158203,
      "learning_rate": 0.0005284818343580924,
      "loss": 3.1985,
      "step": 51705
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.015378952026367,
      "learning_rate": 0.0005284791834880446,
      "loss": 3.0151,
      "step": 51706
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8138837814331055,
      "learning_rate": 0.000528476532575518,
      "loss": 3.3195,
      "step": 51707
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.466027021408081,
      "learning_rate": 0.0005284738816205135,
      "loss": 3.0878,
      "step": 51708
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2007217407226562,
      "learning_rate": 0.0005284712306230313,
      "loss": 2.9397,
      "step": 51709
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8396886587142944,
      "learning_rate": 0.000528468579583072,
      "loss": 3.1115,
      "step": 51710
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9523944854736328,
      "learning_rate": 0.0005284659285006361,
      "loss": 3.0381,
      "step": 51711
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.442160725593567,
      "learning_rate": 0.000528463277375724,
      "loss": 3.2855,
      "step": 51712
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.650498628616333,
      "learning_rate": 0.0005284606262083364,
      "loss": 2.9659,
      "step": 51713
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6045756340026855,
      "learning_rate": 0.0005284579749984735,
      "loss": 3.1758,
      "step": 51714
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.809662938117981,
      "learning_rate": 0.000528455323746136,
      "loss": 2.7644,
      "step": 51715
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4281960725784302,
      "learning_rate": 0.0005284526724513244,
      "loss": 2.994,
      "step": 51716
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.074760913848877,
      "learning_rate": 0.0005284500211140391,
      "loss": 2.8271,
      "step": 51717
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6053295135498047,
      "learning_rate": 0.0005284473697342807,
      "loss": 2.9843,
      "step": 51718
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3786543607711792,
      "learning_rate": 0.0005284447183120495,
      "loss": 2.8617,
      "step": 51719
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.592973232269287,
      "learning_rate": 0.0005284420668473461,
      "loss": 3.4093,
      "step": 51720
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4908862113952637,
      "learning_rate": 0.0005284394153401711,
      "loss": 2.9302,
      "step": 51721
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3497307300567627,
      "learning_rate": 0.0005284367637905249,
      "loss": 2.9486,
      "step": 51722
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4309173822402954,
      "learning_rate": 0.0005284341121984079,
      "loss": 3.285,
      "step": 51723
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4194689989089966,
      "learning_rate": 0.0005284314605638207,
      "loss": 2.9311,
      "step": 51724
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4218616485595703,
      "learning_rate": 0.0005284288088867638,
      "loss": 3.2225,
      "step": 51725
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4423789978027344,
      "learning_rate": 0.0005284261571672378,
      "loss": 3.0388,
      "step": 51726
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6698683500289917,
      "learning_rate": 0.000528423505405243,
      "loss": 3.1981,
      "step": 51727
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9409738779067993,
      "learning_rate": 0.0005284208536007799,
      "loss": 3.2557,
      "step": 51728
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.487092137336731,
      "learning_rate": 0.0005284182017538491,
      "loss": 2.8571,
      "step": 51729
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6924618482589722,
      "learning_rate": 0.000528415549864451,
      "loss": 3.2402,
      "step": 51730
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6205439567565918,
      "learning_rate": 0.0005284128979325862,
      "loss": 2.9932,
      "step": 51731
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8082319498062134,
      "learning_rate": 0.0005284102459582551,
      "loss": 3.1618,
      "step": 51732
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5085018873214722,
      "learning_rate": 0.0005284075939414581,
      "loss": 2.8893,
      "step": 51733
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.3726253509521484,
      "learning_rate": 0.0005284049418821959,
      "loss": 3.1031,
      "step": 51734
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.362370491027832,
      "learning_rate": 0.0005284022897804691,
      "loss": 2.8573,
      "step": 51735
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.424710750579834,
      "learning_rate": 0.0005283996376362778,
      "loss": 2.9392,
      "step": 51736
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.6750171184539795,
      "learning_rate": 0.0005283969854496227,
      "loss": 3.0432,
      "step": 51737
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.671659469604492,
      "learning_rate": 0.0005283943332205043,
      "loss": 3.0548,
      "step": 51738
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6889983415603638,
      "learning_rate": 0.0005283916809489231,
      "loss": 3.2156,
      "step": 51739
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5201526880264282,
      "learning_rate": 0.0005283890286348797,
      "loss": 2.9243,
      "step": 51740
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.191018581390381,
      "learning_rate": 0.0005283863762783744,
      "loss": 2.9824,
      "step": 51741
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6295955181121826,
      "learning_rate": 0.0005283837238794076,
      "loss": 2.956,
      "step": 51742
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5130013227462769,
      "learning_rate": 0.0005283810714379801,
      "loss": 3.4205,
      "step": 51743
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2919952869415283,
      "learning_rate": 0.0005283784189540923,
      "loss": 2.9895,
      "step": 51744
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.1205906867980957,
      "learning_rate": 0.0005283757664277446,
      "loss": 2.8221,
      "step": 51745
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.60984206199646,
      "learning_rate": 0.0005283731138589374,
      "loss": 3.1147,
      "step": 51746
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6175774335861206,
      "learning_rate": 0.0005283704612476715,
      "loss": 3.1171,
      "step": 51747
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.424681305885315,
      "learning_rate": 0.0005283678085939471,
      "loss": 3.1448,
      "step": 51748
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.8335928916931152,
      "learning_rate": 0.0005283651558977649,
      "loss": 3.0287,
      "step": 51749
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.1351356506347656,
      "learning_rate": 0.0005283625031591251,
      "loss": 2.9941,
      "step": 51750
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6802364587783813,
      "learning_rate": 0.0005283598503780287,
      "loss": 3.1776,
      "step": 51751
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.7607533931732178,
      "learning_rate": 0.0005283571975544756,
      "loss": 2.9754,
      "step": 51752
    },
    {
      "epoch": 0.67,
      "grad_norm": 4.124264717102051,
      "learning_rate": 0.0005283545446884668,
      "loss": 2.8179,
      "step": 51753
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5308979749679565,
      "learning_rate": 0.0005283518917800025,
      "loss": 2.9867,
      "step": 51754
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9230183362960815,
      "learning_rate": 0.0005283492388290834,
      "loss": 3.039,
      "step": 51755
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3590790033340454,
      "learning_rate": 0.0005283465858357097,
      "loss": 2.9087,
      "step": 51756
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.4149587154388428,
      "learning_rate": 0.0005283439327998822,
      "loss": 2.9101,
      "step": 51757
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.616399884223938,
      "learning_rate": 0.0005283412797216011,
      "loss": 3.316,
      "step": 51758
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4179983139038086,
      "learning_rate": 0.0005283386266008672,
      "loss": 3.2009,
      "step": 51759
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8648638725280762,
      "learning_rate": 0.0005283359734376807,
      "loss": 3.0965,
      "step": 51760
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.55537748336792,
      "learning_rate": 0.0005283333202320422,
      "loss": 3.1135,
      "step": 51761
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.095310926437378,
      "learning_rate": 0.0005283306669839524,
      "loss": 3.1014,
      "step": 51762
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3357822895050049,
      "learning_rate": 0.0005283280136934115,
      "loss": 2.9874,
      "step": 51763
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.919846773147583,
      "learning_rate": 0.0005283253603604202,
      "loss": 3.1613,
      "step": 51764
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6749815940856934,
      "learning_rate": 0.0005283227069849788,
      "loss": 3.0953,
      "step": 51765
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.4754979610443115,
      "learning_rate": 0.0005283200535670879,
      "loss": 3.0702,
      "step": 51766
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6281400918960571,
      "learning_rate": 0.0005283174001067481,
      "loss": 2.9751,
      "step": 51767
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7178336381912231,
      "learning_rate": 0.0005283147466039598,
      "loss": 3.0164,
      "step": 51768
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9717986583709717,
      "learning_rate": 0.0005283120930587233,
      "loss": 2.8668,
      "step": 51769
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.017324209213257,
      "learning_rate": 0.0005283094394710394,
      "loss": 3.3171,
      "step": 51770
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.76302170753479,
      "learning_rate": 0.0005283067858409085,
      "loss": 2.973,
      "step": 51771
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.5212924480438232,
      "learning_rate": 0.0005283041321683309,
      "loss": 3.018,
      "step": 51772
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.671312928199768,
      "learning_rate": 0.0005283014784533075,
      "loss": 3.2482,
      "step": 51773
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4395546913146973,
      "learning_rate": 0.0005282988246958384,
      "loss": 3.0765,
      "step": 51774
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.536717176437378,
      "learning_rate": 0.0005282961708959242,
      "loss": 3.0192,
      "step": 51775
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9688973426818848,
      "learning_rate": 0.0005282935170535655,
      "loss": 2.9865,
      "step": 51776
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8128594160079956,
      "learning_rate": 0.0005282908631687626,
      "loss": 3.0757,
      "step": 51777
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5169634819030762,
      "learning_rate": 0.0005282882092415163,
      "loss": 2.9629,
      "step": 51778
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5577359199523926,
      "learning_rate": 0.0005282855552718268,
      "loss": 3.1355,
      "step": 51779
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2804362773895264,
      "learning_rate": 0.0005282829012596947,
      "loss": 3.4253,
      "step": 51780
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4784215688705444,
      "learning_rate": 0.0005282802472051206,
      "loss": 3.0704,
      "step": 51781
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.586198329925537,
      "learning_rate": 0.0005282775931081049,
      "loss": 3.2393,
      "step": 51782
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5977150201797485,
      "learning_rate": 0.000528274938968648,
      "loss": 3.0336,
      "step": 51783
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5561835765838623,
      "learning_rate": 0.0005282722847867505,
      "loss": 3.1661,
      "step": 51784
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9130784273147583,
      "learning_rate": 0.0005282696305624128,
      "loss": 2.9543,
      "step": 51785
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6890629529953003,
      "learning_rate": 0.0005282669762956356,
      "loss": 3.2612,
      "step": 51786
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.463689923286438,
      "learning_rate": 0.0005282643219864192,
      "loss": 3.0196,
      "step": 51787
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.62557852268219,
      "learning_rate": 0.0005282616676347641,
      "loss": 3.0591,
      "step": 51788
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2442374229431152,
      "learning_rate": 0.0005282590132406709,
      "loss": 3.0175,
      "step": 51789
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5652248859405518,
      "learning_rate": 0.00052825635880414,
      "loss": 3.2013,
      "step": 51790
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.2573747634887695,
      "learning_rate": 0.000528253704325172,
      "loss": 3.171,
      "step": 51791
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7933305501937866,
      "learning_rate": 0.0005282510498037673,
      "loss": 3.0538,
      "step": 51792
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.1330151557922363,
      "learning_rate": 0.0005282483952399264,
      "loss": 2.8994,
      "step": 51793
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4717055559158325,
      "learning_rate": 0.0005282457406336499,
      "loss": 3.3223,
      "step": 51794
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7566783428192139,
      "learning_rate": 0.0005282430859849381,
      "loss": 2.9193,
      "step": 51795
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.905815839767456,
      "learning_rate": 0.0005282404312937917,
      "loss": 3.0907,
      "step": 51796
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.631150722503662,
      "learning_rate": 0.000528237776560211,
      "loss": 2.9971,
      "step": 51797
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8348066806793213,
      "learning_rate": 0.0005282351217841966,
      "loss": 3.0612,
      "step": 51798
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.0598814487457275,
      "learning_rate": 0.0005282324669657491,
      "loss": 3.0981,
      "step": 51799
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.404555320739746,
      "learning_rate": 0.0005282298121048688,
      "loss": 2.8944,
      "step": 51800
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.6813918352127075,
      "learning_rate": 0.0005282271572015562,
      "loss": 2.8955,
      "step": 51801
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.0418202877044678,
      "learning_rate": 0.0005282245022558119,
      "loss": 2.9767,
      "step": 51802
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.5841522216796875,
      "learning_rate": 0.0005282218472676364,
      "loss": 3.2167,
      "step": 51803
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.674503803253174,
      "learning_rate": 0.0005282191922370301,
      "loss": 3.06,
      "step": 51804
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.3207783699035645,
      "learning_rate": 0.0005282165371639936,
      "loss": 2.974,
      "step": 51805
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4452908039093018,
      "learning_rate": 0.0005282138820485273,
      "loss": 3.2388,
      "step": 51806
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.9720818996429443,
      "learning_rate": 0.0005282112268906317,
      "loss": 2.916,
      "step": 51807
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.694300413131714,
      "learning_rate": 0.0005282085716903075,
      "loss": 3.0944,
      "step": 51808
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.8436179161071777,
      "learning_rate": 0.0005282059164475548,
      "loss": 2.9472,
      "step": 51809
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4786288738250732,
      "learning_rate": 0.0005282032611623744,
      "loss": 3.1922,
      "step": 51810
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.078512668609619,
      "learning_rate": 0.0005282006058347667,
      "loss": 2.6164,
      "step": 51811
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.309192657470703,
      "learning_rate": 0.0005281979504647322,
      "loss": 2.9522,
      "step": 51812
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.0453829765319824,
      "learning_rate": 0.0005281952950522714,
      "loss": 3.1579,
      "step": 51813
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.8476699590682983,
      "learning_rate": 0.0005281926395973848,
      "loss": 3.1478,
      "step": 51814
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.473559021949768,
      "learning_rate": 0.0005281899841000729,
      "loss": 3.1388,
      "step": 51815
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.735905647277832,
      "learning_rate": 0.0005281873285603361,
      "loss": 2.8689,
      "step": 51816
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9592591524124146,
      "learning_rate": 0.000528184672978175,
      "loss": 3.0284,
      "step": 51817
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.607373595237732,
      "learning_rate": 0.0005281820173535901,
      "loss": 3.1063,
      "step": 51818
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5617353916168213,
      "learning_rate": 0.0005281793616865818,
      "loss": 2.91,
      "step": 51819
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.7622954845428467,
      "learning_rate": 0.0005281767059771508,
      "loss": 2.9302,
      "step": 51820
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.1223437786102295,
      "learning_rate": 0.0005281740502252972,
      "loss": 3.2288,
      "step": 51821
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.3333736658096313,
      "learning_rate": 0.0005281713944310219,
      "loss": 3.3926,
      "step": 51822
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5861812829971313,
      "learning_rate": 0.0005281687385943252,
      "loss": 2.9669,
      "step": 51823
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.0894603729248047,
      "learning_rate": 0.0005281660827152076,
      "loss": 3.291,
      "step": 51824
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.599812626838684,
      "learning_rate": 0.0005281634267936696,
      "loss": 3.0774,
      "step": 51825
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5785424709320068,
      "learning_rate": 0.0005281607708297118,
      "loss": 3.0637,
      "step": 51826
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4906526803970337,
      "learning_rate": 0.0005281581148233346,
      "loss": 3.0894,
      "step": 51827
    },
    {
      "epoch": 0.67,
      "grad_norm": 2.633063554763794,
      "learning_rate": 0.0005281554587745383,
      "loss": 2.9223,
      "step": 51828
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.7720693349838257,
      "learning_rate": 0.0005281528026833238,
      "loss": 2.9967,
      "step": 51829
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.2757281064987183,
      "learning_rate": 0.0005281501465496913,
      "loss": 3.1316,
      "step": 51830
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5435658693313599,
      "learning_rate": 0.0005281474903736415,
      "loss": 2.9676,
      "step": 51831
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.9406845569610596,
      "learning_rate": 0.0005281448341551747,
      "loss": 2.8683,
      "step": 51832
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.370414137840271,
      "learning_rate": 0.0005281421778942914,
      "loss": 2.8317,
      "step": 51833
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4727040529251099,
      "learning_rate": 0.0005281395215909922,
      "loss": 3.0461,
      "step": 51834
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.628658652305603,
      "learning_rate": 0.0005281368652452776,
      "loss": 3.1855,
      "step": 51835
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.4651908874511719,
      "learning_rate": 0.0005281342088571479,
      "loss": 2.9546,
      "step": 51836
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.758521556854248,
      "learning_rate": 0.000528131552426604,
      "loss": 3.0167,
      "step": 51837
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5471960306167603,
      "learning_rate": 0.000528128895953646,
      "loss": 3.073,
      "step": 51838
    },
    {
      "epoch": 0.67,
      "grad_norm": 1.5719225406646729,
      "learning_rate": 0.0005281262394382745,
      "loss": 3.1172,
      "step": 51839
    },
    {
      "epoch": 0.67,
      "grad_norm": 3.7097694873809814,
      "learning_rate": 0.00052812358288049,
      "loss": 2.7763,
      "step": 51840
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.081932306289673,
      "learning_rate": 0.0005281209262802931,
      "loss": 3.1502,
      "step": 51841
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1846730709075928,
      "learning_rate": 0.0005281182696376842,
      "loss": 3.0883,
      "step": 51842
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5309762954711914,
      "learning_rate": 0.0005281156129526639,
      "loss": 2.9829,
      "step": 51843
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7343981266021729,
      "learning_rate": 0.0005281129562252324,
      "loss": 2.9892,
      "step": 51844
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2228896617889404,
      "learning_rate": 0.0005281102994553905,
      "loss": 3.2529,
      "step": 51845
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5734021663665771,
      "learning_rate": 0.0005281076426431387,
      "loss": 2.9642,
      "step": 51846
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6688605546951294,
      "learning_rate": 0.0005281049857884773,
      "loss": 3.2597,
      "step": 51847
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4880346059799194,
      "learning_rate": 0.0005281023288914068,
      "loss": 3.0891,
      "step": 51848
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.4919753074645996,
      "learning_rate": 0.0005280996719519279,
      "loss": 3.1688,
      "step": 51849
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.671257734298706,
      "learning_rate": 0.0005280970149700408,
      "loss": 2.8935,
      "step": 51850
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2361302375793457,
      "learning_rate": 0.0005280943579457462,
      "loss": 2.9625,
      "step": 51851
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7827396392822266,
      "learning_rate": 0.0005280917008790446,
      "loss": 3.1387,
      "step": 51852
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4681347608566284,
      "learning_rate": 0.0005280890437699365,
      "loss": 3.0693,
      "step": 51853
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9964476823806763,
      "learning_rate": 0.0005280863866184223,
      "loss": 2.9406,
      "step": 51854
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4620388746261597,
      "learning_rate": 0.0005280837294245025,
      "loss": 2.8907,
      "step": 51855
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6452794075012207,
      "learning_rate": 0.0005280810721881777,
      "loss": 3.0275,
      "step": 51856
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6321196556091309,
      "learning_rate": 0.0005280784149094482,
      "loss": 3.1656,
      "step": 51857
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.625385046005249,
      "learning_rate": 0.0005280757575883147,
      "loss": 3.1224,
      "step": 51858
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.764846920967102,
      "learning_rate": 0.0005280731002247777,
      "loss": 3.1178,
      "step": 51859
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5128380060195923,
      "learning_rate": 0.0005280704428188374,
      "loss": 2.8935,
      "step": 51860
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8140735626220703,
      "learning_rate": 0.0005280677853704947,
      "loss": 3.0556,
      "step": 51861
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3033558130264282,
      "learning_rate": 0.0005280651278797497,
      "loss": 3.1117,
      "step": 51862
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7412654161453247,
      "learning_rate": 0.0005280624703466033,
      "loss": 3.0757,
      "step": 51863
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.348868727684021,
      "learning_rate": 0.0005280598127710556,
      "loss": 3.0921,
      "step": 51864
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3623018264770508,
      "learning_rate": 0.0005280571551531073,
      "loss": 3.0412,
      "step": 51865
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2687115669250488,
      "learning_rate": 0.0005280544974927589,
      "loss": 2.9618,
      "step": 51866
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.975422978401184,
      "learning_rate": 0.0005280518397900109,
      "loss": 3.085,
      "step": 51867
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.480054259300232,
      "learning_rate": 0.0005280491820448638,
      "loss": 2.8419,
      "step": 51868
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6910769939422607,
      "learning_rate": 0.000528046524257318,
      "loss": 3.0314,
      "step": 51869
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5744355916976929,
      "learning_rate": 0.000528043866427374,
      "loss": 2.8462,
      "step": 51870
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.690699577331543,
      "learning_rate": 0.0005280412085550324,
      "loss": 2.8305,
      "step": 51871
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.529848337173462,
      "learning_rate": 0.0005280385506402937,
      "loss": 3.0879,
      "step": 51872
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6690928936004639,
      "learning_rate": 0.0005280358926831581,
      "loss": 2.9493,
      "step": 51873
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4889580011367798,
      "learning_rate": 0.0005280332346836266,
      "loss": 3.0893,
      "step": 51874
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.370290756225586,
      "learning_rate": 0.0005280305766416993,
      "loss": 2.9621,
      "step": 51875
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.359833002090454,
      "learning_rate": 0.0005280279185573768,
      "loss": 3.0744,
      "step": 51876
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.660341739654541,
      "learning_rate": 0.0005280252604306596,
      "loss": 2.9401,
      "step": 51877
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5448615550994873,
      "learning_rate": 0.0005280226022615481,
      "loss": 3.1022,
      "step": 51878
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.014024496078491,
      "learning_rate": 0.0005280199440500431,
      "loss": 3.0282,
      "step": 51879
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9241085052490234,
      "learning_rate": 0.0005280172857961448,
      "loss": 3.0103,
      "step": 51880
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7173413038253784,
      "learning_rate": 0.0005280146274998537,
      "loss": 3.1376,
      "step": 51881
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.347639799118042,
      "learning_rate": 0.0005280119691611705,
      "loss": 3.0069,
      "step": 51882
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.621009111404419,
      "learning_rate": 0.0005280093107800955,
      "loss": 3.0755,
      "step": 51883
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6277275085449219,
      "learning_rate": 0.0005280066523566292,
      "loss": 3.0283,
      "step": 51884
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.272416591644287,
      "learning_rate": 0.0005280039938907723,
      "loss": 2.9772,
      "step": 51885
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.89767587184906,
      "learning_rate": 0.0005280013353825251,
      "loss": 3.172,
      "step": 51886
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4902784824371338,
      "learning_rate": 0.0005279986768318881,
      "loss": 2.8166,
      "step": 51887
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.872257947921753,
      "learning_rate": 0.0005279960182388619,
      "loss": 2.8138,
      "step": 51888
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3403431177139282,
      "learning_rate": 0.0005279933596034469,
      "loss": 2.9913,
      "step": 51889
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5509276390075684,
      "learning_rate": 0.0005279907009256436,
      "loss": 3.2211,
      "step": 51890
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6128698587417603,
      "learning_rate": 0.0005279880422054526,
      "loss": 2.9641,
      "step": 51891
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7071352005004883,
      "learning_rate": 0.0005279853834428743,
      "loss": 2.9175,
      "step": 51892
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2518430948257446,
      "learning_rate": 0.0005279827246379092,
      "loss": 3.2238,
      "step": 51893
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3990535736083984,
      "learning_rate": 0.0005279800657905578,
      "loss": 3.2265,
      "step": 51894
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4733844995498657,
      "learning_rate": 0.0005279774069008207,
      "loss": 3.2567,
      "step": 51895
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.6146795749664307,
      "learning_rate": 0.0005279747479686981,
      "loss": 3.0266,
      "step": 51896
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.137592315673828,
      "learning_rate": 0.0005279720889941908,
      "loss": 3.0872,
      "step": 51897
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.4287772178649902,
      "learning_rate": 0.0005279694299772993,
      "loss": 2.8512,
      "step": 51898
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8244987726211548,
      "learning_rate": 0.0005279667709180239,
      "loss": 2.9793,
      "step": 51899
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4685680866241455,
      "learning_rate": 0.0005279641118163651,
      "loss": 3.0375,
      "step": 51900
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3437561988830566,
      "learning_rate": 0.0005279614526723235,
      "loss": 2.9844,
      "step": 51901
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.754874587059021,
      "learning_rate": 0.0005279587934858995,
      "loss": 3.113,
      "step": 51902
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7604889869689941,
      "learning_rate": 0.0005279561342570938,
      "loss": 3.0104,
      "step": 51903
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6830650568008423,
      "learning_rate": 0.0005279534749859067,
      "loss": 3.2103,
      "step": 51904
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5614300966262817,
      "learning_rate": 0.0005279508156723387,
      "loss": 2.9438,
      "step": 51905
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4398771524429321,
      "learning_rate": 0.0005279481563163904,
      "loss": 3.1714,
      "step": 51906
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5801873207092285,
      "learning_rate": 0.0005279454969180622,
      "loss": 3.109,
      "step": 51907
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2937140464782715,
      "learning_rate": 0.0005279428374773546,
      "loss": 3.1705,
      "step": 51908
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8718373775482178,
      "learning_rate": 0.0005279401779942682,
      "loss": 3.0624,
      "step": 51909
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.551643967628479,
      "learning_rate": 0.0005279375184688034,
      "loss": 3.1543,
      "step": 51910
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.758710265159607,
      "learning_rate": 0.0005279348589009606,
      "loss": 2.9781,
      "step": 51911
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6699111461639404,
      "learning_rate": 0.0005279321992907405,
      "loss": 3.3542,
      "step": 51912
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.476879119873047,
      "learning_rate": 0.0005279295396381436,
      "loss": 2.7849,
      "step": 51913
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7932261228561401,
      "learning_rate": 0.0005279268799431702,
      "loss": 2.9647,
      "step": 51914
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7451071739196777,
      "learning_rate": 0.0005279242202058208,
      "loss": 3.2668,
      "step": 51915
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6718392372131348,
      "learning_rate": 0.0005279215604260961,
      "loss": 3.298,
      "step": 51916
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5631523132324219,
      "learning_rate": 0.0005279189006039965,
      "loss": 3.024,
      "step": 51917
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3748629093170166,
      "learning_rate": 0.0005279162407395224,
      "loss": 2.9454,
      "step": 51918
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.9364850521087646,
      "learning_rate": 0.0005279135808326744,
      "loss": 2.9264,
      "step": 51919
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4712971448898315,
      "learning_rate": 0.000527910920883453,
      "loss": 3.1149,
      "step": 51920
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6224355697631836,
      "learning_rate": 0.0005279082608918586,
      "loss": 2.7901,
      "step": 51921
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9525004625320435,
      "learning_rate": 0.0005279056008578917,
      "loss": 2.9578,
      "step": 51922
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7846976518630981,
      "learning_rate": 0.0005279029407815529,
      "loss": 3.0001,
      "step": 51923
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.998910903930664,
      "learning_rate": 0.0005279002806628427,
      "loss": 2.9789,
      "step": 51924
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.768301248550415,
      "learning_rate": 0.0005278976205017615,
      "loss": 2.5645,
      "step": 51925
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3424110412597656,
      "learning_rate": 0.0005278949602983098,
      "loss": 3.1104,
      "step": 51926
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4785094261169434,
      "learning_rate": 0.0005278923000524881,
      "loss": 3.1441,
      "step": 51927
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7339493036270142,
      "learning_rate": 0.000527889639764297,
      "loss": 3.1976,
      "step": 51928
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6504467725753784,
      "learning_rate": 0.0005278869794337369,
      "loss": 3.2102,
      "step": 51929
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9850717782974243,
      "learning_rate": 0.0005278843190608082,
      "loss": 2.9552,
      "step": 51930
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9357399940490723,
      "learning_rate": 0.0005278816586455116,
      "loss": 2.8395,
      "step": 51931
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7145845890045166,
      "learning_rate": 0.0005278789981878476,
      "loss": 3.0914,
      "step": 51932
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9366250038146973,
      "learning_rate": 0.0005278763376878163,
      "loss": 3.0311,
      "step": 51933
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.788597822189331,
      "learning_rate": 0.0005278736771454188,
      "loss": 2.7718,
      "step": 51934
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.683382511138916,
      "learning_rate": 0.0005278710165606552,
      "loss": 3.1703,
      "step": 51935
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5731728076934814,
      "learning_rate": 0.000527868355933526,
      "loss": 3.0561,
      "step": 51936
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.5980308055877686,
      "learning_rate": 0.0005278656952640318,
      "loss": 3.084,
      "step": 51937
    },
    {
      "epoch": 0.68,
      "grad_norm": 5.272883892059326,
      "learning_rate": 0.000527863034552173,
      "loss": 3.0179,
      "step": 51938
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9253655672073364,
      "learning_rate": 0.0005278603737979503,
      "loss": 3.102,
      "step": 51939
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.9829180240631104,
      "learning_rate": 0.000527857713001364,
      "loss": 3.1237,
      "step": 51940
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0978102684020996,
      "learning_rate": 0.0005278550521624145,
      "loss": 3.1146,
      "step": 51941
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8866392374038696,
      "learning_rate": 0.0005278523912811027,
      "loss": 3.1503,
      "step": 51942
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.528073787689209,
      "learning_rate": 0.0005278497303574287,
      "loss": 2.8926,
      "step": 51943
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5485225915908813,
      "learning_rate": 0.0005278470693913931,
      "loss": 3.2163,
      "step": 51944
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.424964427947998,
      "learning_rate": 0.0005278444083829964,
      "loss": 3.1013,
      "step": 51945
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5653750896453857,
      "learning_rate": 0.0005278417473322393,
      "loss": 3.082,
      "step": 51946
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.856550931930542,
      "learning_rate": 0.000527839086239122,
      "loss": 2.9628,
      "step": 51947
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1050734519958496,
      "learning_rate": 0.000527836425103645,
      "loss": 3.4772,
      "step": 51948
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4834811687469482,
      "learning_rate": 0.0005278337639258092,
      "loss": 3.1062,
      "step": 51949
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5129618644714355,
      "learning_rate": 0.0005278311027056146,
      "loss": 3.179,
      "step": 51950
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5560567378997803,
      "learning_rate": 0.0005278284414430619,
      "loss": 2.8773,
      "step": 51951
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5957964658737183,
      "learning_rate": 0.0005278257801381518,
      "loss": 3.0828,
      "step": 51952
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8604753017425537,
      "learning_rate": 0.0005278231187908843,
      "loss": 3.1428,
      "step": 51953
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8944133520126343,
      "learning_rate": 0.0005278204574012603,
      "loss": 2.8505,
      "step": 51954
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9577451944351196,
      "learning_rate": 0.0005278177959692803,
      "loss": 2.8559,
      "step": 51955
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8734612464904785,
      "learning_rate": 0.0005278151344949444,
      "loss": 3.1873,
      "step": 51956
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.007467031478882,
      "learning_rate": 0.0005278124729782536,
      "loss": 3.1934,
      "step": 51957
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7491557598114014,
      "learning_rate": 0.000527809811419208,
      "loss": 3.0593,
      "step": 51958
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.817962884902954,
      "learning_rate": 0.0005278071498178084,
      "loss": 3.1836,
      "step": 51959
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.4789533615112305,
      "learning_rate": 0.0005278044881740551,
      "loss": 2.923,
      "step": 51960
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.615516185760498,
      "learning_rate": 0.0005278018264879486,
      "loss": 3.2122,
      "step": 51961
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9264328479766846,
      "learning_rate": 0.0005277991647594894,
      "loss": 2.9562,
      "step": 51962
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6621874570846558,
      "learning_rate": 0.0005277965029886782,
      "loss": 3.0322,
      "step": 51963
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9626106023788452,
      "learning_rate": 0.0005277938411755152,
      "loss": 3.0254,
      "step": 51964
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8815888166427612,
      "learning_rate": 0.0005277911793200011,
      "loss": 3.011,
      "step": 51965
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7199201583862305,
      "learning_rate": 0.0005277885174221362,
      "loss": 3.1213,
      "step": 51966
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0692949295043945,
      "learning_rate": 0.0005277858554819212,
      "loss": 3.2001,
      "step": 51967
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.283358573913574,
      "learning_rate": 0.0005277831934993565,
      "loss": 2.875,
      "step": 51968
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.463350772857666,
      "learning_rate": 0.0005277805314744426,
      "loss": 2.7522,
      "step": 51969
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9206057786941528,
      "learning_rate": 0.00052777786940718,
      "loss": 3.1227,
      "step": 51970
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.629733920097351,
      "learning_rate": 0.0005277752072975693,
      "loss": 3.084,
      "step": 51971
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7400217056274414,
      "learning_rate": 0.0005277725451456108,
      "loss": 3.1135,
      "step": 51972
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4900007247924805,
      "learning_rate": 0.0005277698829513049,
      "loss": 3.0495,
      "step": 51973
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9246877431869507,
      "learning_rate": 0.0005277672207146526,
      "loss": 3.2351,
      "step": 51974
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0285375118255615,
      "learning_rate": 0.000527764558435654,
      "loss": 2.95,
      "step": 51975
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4734207391738892,
      "learning_rate": 0.0005277618961143095,
      "loss": 3.2244,
      "step": 51976
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4644062519073486,
      "learning_rate": 0.0005277592337506198,
      "loss": 3.1343,
      "step": 51977
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9444719552993774,
      "learning_rate": 0.0005277565713445856,
      "loss": 3.067,
      "step": 51978
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1994614601135254,
      "learning_rate": 0.0005277539088962068,
      "loss": 3.0147,
      "step": 51979
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7628185749053955,
      "learning_rate": 0.0005277512464054845,
      "loss": 3.2498,
      "step": 51980
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9383878707885742,
      "learning_rate": 0.0005277485838724188,
      "loss": 3.4513,
      "step": 51981
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0914649963378906,
      "learning_rate": 0.0005277459212970105,
      "loss": 3.0425,
      "step": 51982
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.749110460281372,
      "learning_rate": 0.0005277432586792598,
      "loss": 2.9193,
      "step": 51983
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3518375158309937,
      "learning_rate": 0.0005277405960191674,
      "loss": 3.244,
      "step": 51984
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.257619857788086,
      "learning_rate": 0.0005277379333167337,
      "loss": 2.8278,
      "step": 51985
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3378560543060303,
      "learning_rate": 0.0005277352705719591,
      "loss": 3.0565,
      "step": 51986
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.614072322845459,
      "learning_rate": 0.0005277326077848444,
      "loss": 2.9694,
      "step": 51987
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.38430917263031,
      "learning_rate": 0.0005277299449553899,
      "loss": 3.0679,
      "step": 51988
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.917764902114868,
      "learning_rate": 0.000527727282083596,
      "loss": 3.1688,
      "step": 51989
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.521586298942566,
      "learning_rate": 0.0005277246191694633,
      "loss": 2.8835,
      "step": 51990
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3761646747589111,
      "learning_rate": 0.0005277219562129923,
      "loss": 2.9664,
      "step": 51991
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3451387882232666,
      "learning_rate": 0.0005277192932141837,
      "loss": 2.9503,
      "step": 51992
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.81944739818573,
      "learning_rate": 0.0005277166301730375,
      "loss": 2.9376,
      "step": 51993
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.132286548614502,
      "learning_rate": 0.0005277139670895546,
      "loss": 3.0301,
      "step": 51994
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3798565864562988,
      "learning_rate": 0.0005277113039637354,
      "loss": 2.8776,
      "step": 51995
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.130204200744629,
      "learning_rate": 0.0005277086407955803,
      "loss": 3.1297,
      "step": 51996
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.6250977516174316,
      "learning_rate": 0.0005277059775850899,
      "loss": 2.999,
      "step": 51997
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4043118953704834,
      "learning_rate": 0.0005277033143322647,
      "loss": 3.1187,
      "step": 51998
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6013861894607544,
      "learning_rate": 0.0005277006510371051,
      "loss": 3.0536,
      "step": 51999
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.573318600654602,
      "learning_rate": 0.0005276979876996117,
      "loss": 3.005,
      "step": 52000
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.928356885910034,
      "learning_rate": 0.0005276953243197849,
      "loss": 2.7311,
      "step": 52001
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9786312580108643,
      "learning_rate": 0.0005276926608976252,
      "loss": 2.8865,
      "step": 52002
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5996606349945068,
      "learning_rate": 0.0005276899974331333,
      "loss": 3.2303,
      "step": 52003
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7038826942443848,
      "learning_rate": 0.0005276873339263093,
      "loss": 2.9535,
      "step": 52004
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5873035192489624,
      "learning_rate": 0.0005276846703771541,
      "loss": 2.8875,
      "step": 52005
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.673609972000122,
      "learning_rate": 0.000527682006785668,
      "loss": 2.9486,
      "step": 52006
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7176599502563477,
      "learning_rate": 0.0005276793431518515,
      "loss": 3.3439,
      "step": 52007
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.771905541419983,
      "learning_rate": 0.0005276766794757051,
      "loss": 2.8664,
      "step": 52008
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5592553615570068,
      "learning_rate": 0.0005276740157572293,
      "loss": 3.1145,
      "step": 52009
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0240280628204346,
      "learning_rate": 0.0005276713519964247,
      "loss": 3.1221,
      "step": 52010
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.012234926223755,
      "learning_rate": 0.0005276686881932915,
      "loss": 2.9732,
      "step": 52011
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6287599802017212,
      "learning_rate": 0.0005276660243478305,
      "loss": 2.9435,
      "step": 52012
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7164556980133057,
      "learning_rate": 0.0005276633604600422,
      "loss": 2.8088,
      "step": 52013
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.8433680534362793,
      "learning_rate": 0.0005276606965299268,
      "loss": 3.0646,
      "step": 52014
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.887392997741699,
      "learning_rate": 0.000527658032557485,
      "loss": 2.9457,
      "step": 52015
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0835611820220947,
      "learning_rate": 0.0005276553685427174,
      "loss": 3.0303,
      "step": 52016
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.459511637687683,
      "learning_rate": 0.0005276527044856242,
      "loss": 3.0525,
      "step": 52017
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.0173983573913574,
      "learning_rate": 0.0005276500403862063,
      "loss": 3.1828,
      "step": 52018
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.751492738723755,
      "learning_rate": 0.0005276473762444638,
      "loss": 2.8743,
      "step": 52019
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.675121784210205,
      "learning_rate": 0.0005276447120603974,
      "loss": 3.0767,
      "step": 52020
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8956588506698608,
      "learning_rate": 0.0005276420478340075,
      "loss": 2.9045,
      "step": 52021
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.6636359691619873,
      "learning_rate": 0.0005276393835652946,
      "loss": 2.9058,
      "step": 52022
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.2031795978546143,
      "learning_rate": 0.0005276367192542594,
      "loss": 3.1444,
      "step": 52023
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.907163143157959,
      "learning_rate": 0.0005276340549009021,
      "loss": 3.2803,
      "step": 52024
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0763063430786133,
      "learning_rate": 0.0005276313905052234,
      "loss": 2.9188,
      "step": 52025
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6842061281204224,
      "learning_rate": 0.0005276287260672237,
      "loss": 3.0968,
      "step": 52026
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1444482803344727,
      "learning_rate": 0.0005276260615869036,
      "loss": 3.3899,
      "step": 52027
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.703235149383545,
      "learning_rate": 0.0005276233970642634,
      "loss": 2.8999,
      "step": 52028
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4154061079025269,
      "learning_rate": 0.0005276207324993039,
      "loss": 2.9917,
      "step": 52029
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4184668064117432,
      "learning_rate": 0.0005276180678920252,
      "loss": 3.3017,
      "step": 52030
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9149333238601685,
      "learning_rate": 0.0005276154032424281,
      "loss": 2.9882,
      "step": 52031
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.0404152870178223,
      "learning_rate": 0.000527612738550513,
      "loss": 2.9002,
      "step": 52032
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9478307962417603,
      "learning_rate": 0.0005276100738162803,
      "loss": 2.8932,
      "step": 52033
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.911635398864746,
      "learning_rate": 0.0005276074090397307,
      "loss": 2.792,
      "step": 52034
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.712376356124878,
      "learning_rate": 0.0005276047442208646,
      "loss": 2.7903,
      "step": 52035
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7119652032852173,
      "learning_rate": 0.0005276020793596824,
      "loss": 3.1303,
      "step": 52036
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7304319143295288,
      "learning_rate": 0.0005275994144561848,
      "loss": 2.9722,
      "step": 52037
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6940813064575195,
      "learning_rate": 0.000527596749510372,
      "loss": 3.1917,
      "step": 52038
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5545912981033325,
      "learning_rate": 0.0005275940845222448,
      "loss": 3.0056,
      "step": 52039
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.458950161933899,
      "learning_rate": 0.0005275914194918035,
      "loss": 3.1074,
      "step": 52040
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3671488761901855,
      "learning_rate": 0.0005275887544190487,
      "loss": 2.9829,
      "step": 52041
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9416760206222534,
      "learning_rate": 0.0005275860893039809,
      "loss": 3.0581,
      "step": 52042
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6673696041107178,
      "learning_rate": 0.0005275834241466004,
      "loss": 3.1167,
      "step": 52043
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.937949299812317,
      "learning_rate": 0.000527580758946908,
      "loss": 3.1415,
      "step": 52044
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.346860647201538,
      "learning_rate": 0.0005275780937049039,
      "loss": 2.7935,
      "step": 52045
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7010084390640259,
      "learning_rate": 0.0005275754284205888,
      "loss": 3.1219,
      "step": 52046
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.566124677658081,
      "learning_rate": 0.0005275727630939632,
      "loss": 3.27,
      "step": 52047
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.758000373840332,
      "learning_rate": 0.0005275700977250274,
      "loss": 2.9419,
      "step": 52048
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2507479190826416,
      "learning_rate": 0.000527567432313782,
      "loss": 3.0668,
      "step": 52049
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.407386064529419,
      "learning_rate": 0.0005275647668602277,
      "loss": 3.213,
      "step": 52050
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.505082130432129,
      "learning_rate": 0.0005275621013643647,
      "loss": 3.2568,
      "step": 52051
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.278826951980591,
      "learning_rate": 0.0005275594358261936,
      "loss": 3.0137,
      "step": 52052
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5745749473571777,
      "learning_rate": 0.000527556770245715,
      "loss": 3.047,
      "step": 52053
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8737257719039917,
      "learning_rate": 0.0005275541046229293,
      "loss": 2.9882,
      "step": 52054
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.3659727573394775,
      "learning_rate": 0.0005275514389578369,
      "loss": 3.2278,
      "step": 52055
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2400906085968018,
      "learning_rate": 0.0005275487732504384,
      "loss": 2.9988,
      "step": 52056
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7574671506881714,
      "learning_rate": 0.0005275461075007343,
      "loss": 2.9463,
      "step": 52057
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.7884397506713867,
      "learning_rate": 0.0005275434417087252,
      "loss": 2.9271,
      "step": 52058
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.236427068710327,
      "learning_rate": 0.0005275407758744113,
      "loss": 2.9607,
      "step": 52059
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2816476821899414,
      "learning_rate": 0.0005275381099977933,
      "loss": 3.0285,
      "step": 52060
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9221391677856445,
      "learning_rate": 0.0005275354440788717,
      "loss": 3.0584,
      "step": 52061
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8770172595977783,
      "learning_rate": 0.0005275327781176471,
      "loss": 2.8972,
      "step": 52062
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4020081758499146,
      "learning_rate": 0.0005275301121141198,
      "loss": 3.0593,
      "step": 52063
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4244269132614136,
      "learning_rate": 0.0005275274460682902,
      "loss": 3.0685,
      "step": 52064
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6481634378433228,
      "learning_rate": 0.000527524779980159,
      "loss": 3.1672,
      "step": 52065
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9876469373703003,
      "learning_rate": 0.0005275221138497268,
      "loss": 2.8309,
      "step": 52066
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7145655155181885,
      "learning_rate": 0.0005275194476769938,
      "loss": 2.8595,
      "step": 52067
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5681408643722534,
      "learning_rate": 0.0005275167814619607,
      "loss": 3.0224,
      "step": 52068
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2249908447265625,
      "learning_rate": 0.0005275141152046279,
      "loss": 2.9789,
      "step": 52069
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3803361654281616,
      "learning_rate": 0.000527511448904996,
      "loss": 3.0652,
      "step": 52070
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.938631534576416,
      "learning_rate": 0.0005275087825630654,
      "loss": 3.134,
      "step": 52071
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.788994312286377,
      "learning_rate": 0.0005275061161788366,
      "loss": 2.7744,
      "step": 52072
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.701089859008789,
      "learning_rate": 0.0005275034497523102,
      "loss": 2.9651,
      "step": 52073
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.884839653968811,
      "learning_rate": 0.0005275007832834864,
      "loss": 2.9271,
      "step": 52074
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5535863637924194,
      "learning_rate": 0.0005274981167723661,
      "loss": 3.3268,
      "step": 52075
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6657493114471436,
      "learning_rate": 0.0005274954502189496,
      "loss": 3.1234,
      "step": 52076
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6540447473526,
      "learning_rate": 0.0005274927836232374,
      "loss": 2.9939,
      "step": 52077
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.391226053237915,
      "learning_rate": 0.0005274901169852299,
      "loss": 2.8933,
      "step": 52078
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3828530311584473,
      "learning_rate": 0.0005274874503049278,
      "loss": 3.0308,
      "step": 52079
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4149110317230225,
      "learning_rate": 0.0005274847835823314,
      "loss": 3.0842,
      "step": 52080
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5258634090423584,
      "learning_rate": 0.0005274821168174413,
      "loss": 2.8278,
      "step": 52081
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7004694938659668,
      "learning_rate": 0.0005274794500102581,
      "loss": 2.9905,
      "step": 52082
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7388049364089966,
      "learning_rate": 0.000527476783160782,
      "loss": 2.9316,
      "step": 52083
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3548692464828491,
      "learning_rate": 0.0005274741162690138,
      "loss": 3.0054,
      "step": 52084
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4733705520629883,
      "learning_rate": 0.0005274714493349539,
      "loss": 2.9229,
      "step": 52085
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8098546266555786,
      "learning_rate": 0.0005274687823586026,
      "loss": 3.2921,
      "step": 52086
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5581083297729492,
      "learning_rate": 0.0005274661153399607,
      "loss": 3.1002,
      "step": 52087
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.794047474861145,
      "learning_rate": 0.0005274634482790284,
      "loss": 3.1525,
      "step": 52088
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8561279773712158,
      "learning_rate": 0.0005274607811758064,
      "loss": 3.2914,
      "step": 52089
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5045866966247559,
      "learning_rate": 0.0005274581140302953,
      "loss": 3.1724,
      "step": 52090
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.47557532787323,
      "learning_rate": 0.0005274554468424953,
      "loss": 3.1624,
      "step": 52091
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9171619415283203,
      "learning_rate": 0.000527452779612407,
      "loss": 3.0947,
      "step": 52092
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5812190771102905,
      "learning_rate": 0.000527450112340031,
      "loss": 3.0435,
      "step": 52093
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.424882411956787,
      "learning_rate": 0.0005274474450253678,
      "loss": 3.0324,
      "step": 52094
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5574182271957397,
      "learning_rate": 0.0005274447776684178,
      "loss": 3.0737,
      "step": 52095
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.0686957836151123,
      "learning_rate": 0.0005274421102691815,
      "loss": 3.0196,
      "step": 52096
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6197872161865234,
      "learning_rate": 0.0005274394428276594,
      "loss": 3.1321,
      "step": 52097
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.692420244216919,
      "learning_rate": 0.000527436775343852,
      "loss": 2.9735,
      "step": 52098
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.384770393371582,
      "learning_rate": 0.0005274341078177598,
      "loss": 2.9473,
      "step": 52099
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7091056108474731,
      "learning_rate": 0.0005274314402493834,
      "loss": 2.8659,
      "step": 52100
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.064351797103882,
      "learning_rate": 0.0005274287726387231,
      "loss": 3.2635,
      "step": 52101
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4012317657470703,
      "learning_rate": 0.0005274261049857796,
      "loss": 2.976,
      "step": 52102
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9177155494689941,
      "learning_rate": 0.0005274234372905532,
      "loss": 3.2917,
      "step": 52103
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.59540855884552,
      "learning_rate": 0.0005274207695530446,
      "loss": 2.8674,
      "step": 52104
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4346718788146973,
      "learning_rate": 0.000527418101773254,
      "loss": 3.0177,
      "step": 52105
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.800822138786316,
      "learning_rate": 0.0005274154339511822,
      "loss": 2.9302,
      "step": 52106
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6198920011520386,
      "learning_rate": 0.0005274127660868296,
      "loss": 2.9474,
      "step": 52107
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5244371891021729,
      "learning_rate": 0.0005274100981801967,
      "loss": 3.1574,
      "step": 52108
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2849104404449463,
      "learning_rate": 0.0005274074302312839,
      "loss": 3.2192,
      "step": 52109
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3478080034255981,
      "learning_rate": 0.0005274047622400919,
      "loss": 2.9303,
      "step": 52110
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7606351375579834,
      "learning_rate": 0.0005274020942066209,
      "loss": 3.0349,
      "step": 52111
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7200959920883179,
      "learning_rate": 0.0005273994261308715,
      "loss": 3.0612,
      "step": 52112
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4777857065200806,
      "learning_rate": 0.0005273967580128444,
      "loss": 2.9488,
      "step": 52113
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3827695846557617,
      "learning_rate": 0.0005273940898525398,
      "loss": 2.8882,
      "step": 52114
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7642613649368286,
      "learning_rate": 0.0005273914216499585,
      "loss": 3.135,
      "step": 52115
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4977575540542603,
      "learning_rate": 0.0005273887534051008,
      "loss": 3.2077,
      "step": 52116
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5055058002471924,
      "learning_rate": 0.0005273860851179673,
      "loss": 3.0526,
      "step": 52117
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7544621229171753,
      "learning_rate": 0.0005273834167885583,
      "loss": 3.0585,
      "step": 52118
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6561405658721924,
      "learning_rate": 0.0005273807484168745,
      "loss": 3.031,
      "step": 52119
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2250654697418213,
      "learning_rate": 0.0005273780800029164,
      "loss": 3.1739,
      "step": 52120
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7107127904891968,
      "learning_rate": 0.0005273754115466843,
      "loss": 3.1423,
      "step": 52121
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5180294513702393,
      "learning_rate": 0.0005273727430481789,
      "loss": 2.9812,
      "step": 52122
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4911577701568604,
      "learning_rate": 0.0005273700745074006,
      "loss": 3.0062,
      "step": 52123
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4159374237060547,
      "learning_rate": 0.0005273674059243498,
      "loss": 3.1427,
      "step": 52124
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.253262758255005,
      "learning_rate": 0.0005273647372990273,
      "loss": 2.8354,
      "step": 52125
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3006764650344849,
      "learning_rate": 0.0005273620686314333,
      "loss": 3.2472,
      "step": 52126
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6778714656829834,
      "learning_rate": 0.0005273593999215683,
      "loss": 3.3197,
      "step": 52127
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.4309823513031006,
      "learning_rate": 0.000527356731169433,
      "loss": 2.8771,
      "step": 52128
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8088510036468506,
      "learning_rate": 0.0005273540623750278,
      "loss": 3.0737,
      "step": 52129
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.311936616897583,
      "learning_rate": 0.0005273513935383533,
      "loss": 3.029,
      "step": 52130
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2799978256225586,
      "learning_rate": 0.0005273487246594097,
      "loss": 2.9775,
      "step": 52131
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.230205774307251,
      "learning_rate": 0.0005273460557381978,
      "loss": 3.2199,
      "step": 52132
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8102364540100098,
      "learning_rate": 0.0005273433867747178,
      "loss": 3.1651,
      "step": 52133
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.414764404296875,
      "learning_rate": 0.0005273407177689706,
      "loss": 2.8406,
      "step": 52134
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0308337211608887,
      "learning_rate": 0.0005273380487209563,
      "loss": 2.9208,
      "step": 52135
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5121707916259766,
      "learning_rate": 0.0005273353796306756,
      "loss": 3.1876,
      "step": 52136
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.168445348739624,
      "learning_rate": 0.000527332710498129,
      "loss": 3.1669,
      "step": 52137
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7786226272583008,
      "learning_rate": 0.0005273300413233171,
      "loss": 3.24,
      "step": 52138
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3554322719573975,
      "learning_rate": 0.00052732737210624,
      "loss": 2.7412,
      "step": 52139
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0465235710144043,
      "learning_rate": 0.0005273247028468985,
      "loss": 2.9991,
      "step": 52140
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7064651250839233,
      "learning_rate": 0.0005273220335452932,
      "loss": 2.9549,
      "step": 52141
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6762378215789795,
      "learning_rate": 0.0005273193642014243,
      "loss": 3.0221,
      "step": 52142
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.084707498550415,
      "learning_rate": 0.0005273166948152925,
      "loss": 2.9969,
      "step": 52143
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.1840434074401855,
      "learning_rate": 0.0005273140253868983,
      "loss": 3.0127,
      "step": 52144
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6356912851333618,
      "learning_rate": 0.0005273113559162419,
      "loss": 2.973,
      "step": 52145
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3144830465316772,
      "learning_rate": 0.0005273086864033242,
      "loss": 3.1469,
      "step": 52146
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3282736539840698,
      "learning_rate": 0.0005273060168481456,
      "loss": 3.1508,
      "step": 52147
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5826544761657715,
      "learning_rate": 0.0005273033472507064,
      "loss": 3.2525,
      "step": 52148
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.415679693222046,
      "learning_rate": 0.0005273006776110073,
      "loss": 3.0079,
      "step": 52149
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0748345851898193,
      "learning_rate": 0.0005272980079290485,
      "loss": 2.8756,
      "step": 52150
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6396583318710327,
      "learning_rate": 0.0005272953382048309,
      "loss": 3.1851,
      "step": 52151
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2995153665542603,
      "learning_rate": 0.0005272926684383549,
      "loss": 3.2417,
      "step": 52152
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5681406259536743,
      "learning_rate": 0.0005272899986296207,
      "loss": 3.0192,
      "step": 52153
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6067017316818237,
      "learning_rate": 0.0005272873287786291,
      "loss": 3.2336,
      "step": 52154
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6117541790008545,
      "learning_rate": 0.0005272846588853804,
      "loss": 2.8661,
      "step": 52155
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9520069360733032,
      "learning_rate": 0.0005272819889498753,
      "loss": 3.0642,
      "step": 52156
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9776582717895508,
      "learning_rate": 0.0005272793189721142,
      "loss": 2.8842,
      "step": 52157
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.027191638946533,
      "learning_rate": 0.0005272766489520975,
      "loss": 3.0467,
      "step": 52158
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6418567895889282,
      "learning_rate": 0.0005272739788898258,
      "loss": 3.0706,
      "step": 52159
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.859696865081787,
      "learning_rate": 0.0005272713087852995,
      "loss": 3.1159,
      "step": 52160
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.812595009803772,
      "learning_rate": 0.0005272686386385192,
      "loss": 3.1569,
      "step": 52161
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9511091709136963,
      "learning_rate": 0.0005272659684494855,
      "loss": 3.0213,
      "step": 52162
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4558427333831787,
      "learning_rate": 0.0005272632982181988,
      "loss": 3.2274,
      "step": 52163
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6294662952423096,
      "learning_rate": 0.0005272606279446595,
      "loss": 3.2699,
      "step": 52164
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2147841453552246,
      "learning_rate": 0.0005272579576288681,
      "loss": 2.9754,
      "step": 52165
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6143423318862915,
      "learning_rate": 0.0005272552872708251,
      "loss": 2.7893,
      "step": 52166
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.671108603477478,
      "learning_rate": 0.0005272526168705311,
      "loss": 2.847,
      "step": 52167
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.953794002532959,
      "learning_rate": 0.0005272499464279866,
      "loss": 3.2157,
      "step": 52168
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4140619039535522,
      "learning_rate": 0.000527247275943192,
      "loss": 2.9085,
      "step": 52169
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6826049089431763,
      "learning_rate": 0.0005272446054161478,
      "loss": 2.9523,
      "step": 52170
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.622369647026062,
      "learning_rate": 0.0005272419348468547,
      "loss": 3.2055,
      "step": 52171
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.595733642578125,
      "learning_rate": 0.000527239264235313,
      "loss": 3.0019,
      "step": 52172
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5622156858444214,
      "learning_rate": 0.0005272365935815231,
      "loss": 3.1427,
      "step": 52173
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.28371524810791,
      "learning_rate": 0.0005272339228854857,
      "loss": 3.0299,
      "step": 52174
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.066401720046997,
      "learning_rate": 0.0005272312521472012,
      "loss": 2.9436,
      "step": 52175
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4699063301086426,
      "learning_rate": 0.0005272285813666702,
      "loss": 3.1416,
      "step": 52176
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.340325355529785,
      "learning_rate": 0.000527225910543893,
      "loss": 3.2461,
      "step": 52177
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.323854684829712,
      "learning_rate": 0.0005272232396788703,
      "loss": 3.2542,
      "step": 52178
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.651629090309143,
      "learning_rate": 0.0005272205687716024,
      "loss": 3.1218,
      "step": 52179
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8229361772537231,
      "learning_rate": 0.0005272178978220901,
      "loss": 2.997,
      "step": 52180
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1153318881988525,
      "learning_rate": 0.0005272152268303336,
      "loss": 3.0218,
      "step": 52181
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5162553787231445,
      "learning_rate": 0.0005272125557963334,
      "loss": 3.1174,
      "step": 52182
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.131500720977783,
      "learning_rate": 0.0005272098847200903,
      "loss": 2.9235,
      "step": 52183
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5123190879821777,
      "learning_rate": 0.0005272072136016044,
      "loss": 2.9401,
      "step": 52184
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8953882455825806,
      "learning_rate": 0.0005272045424408765,
      "loss": 3.1561,
      "step": 52185
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5320768356323242,
      "learning_rate": 0.000527201871237907,
      "loss": 2.9412,
      "step": 52186
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6165432929992676,
      "learning_rate": 0.0005271991999926963,
      "loss": 2.7575,
      "step": 52187
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6169559955596924,
      "learning_rate": 0.000527196528705245,
      "loss": 3.0548,
      "step": 52188
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5484343767166138,
      "learning_rate": 0.0005271938573755537,
      "loss": 3.0425,
      "step": 52189
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5357906818389893,
      "learning_rate": 0.0005271911860036226,
      "loss": 3.183,
      "step": 52190
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.098620653152466,
      "learning_rate": 0.0005271885145894526,
      "loss": 3.0179,
      "step": 52191
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4666838645935059,
      "learning_rate": 0.0005271858431330436,
      "loss": 2.8219,
      "step": 52192
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5041943788528442,
      "learning_rate": 0.0005271831716343969,
      "loss": 3.0002,
      "step": 52193
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0117366313934326,
      "learning_rate": 0.0005271805000935122,
      "loss": 2.9625,
      "step": 52194
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6828974485397339,
      "learning_rate": 0.0005271778285103905,
      "loss": 3.0083,
      "step": 52195
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.479063868522644,
      "learning_rate": 0.0005271751568850322,
      "loss": 3.3443,
      "step": 52196
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4923555850982666,
      "learning_rate": 0.0005271724852174376,
      "loss": 3.1653,
      "step": 52197
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.76575767993927,
      "learning_rate": 0.0005271698135076075,
      "loss": 3.0284,
      "step": 52198
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1908977031707764,
      "learning_rate": 0.0005271671417555421,
      "loss": 3.1253,
      "step": 52199
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7414034605026245,
      "learning_rate": 0.0005271644699612422,
      "loss": 2.9849,
      "step": 52200
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.419129729270935,
      "learning_rate": 0.0005271617981247081,
      "loss": 2.9555,
      "step": 52201
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8352173566818237,
      "learning_rate": 0.0005271591262459402,
      "loss": 3.01,
      "step": 52202
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6934260129928589,
      "learning_rate": 0.0005271564543249392,
      "loss": 3.0554,
      "step": 52203
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7182931900024414,
      "learning_rate": 0.0005271537823617056,
      "loss": 3.1595,
      "step": 52204
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3901015520095825,
      "learning_rate": 0.0005271511103562398,
      "loss": 2.9947,
      "step": 52205
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.009835958480835,
      "learning_rate": 0.0005271484383085423,
      "loss": 3.0309,
      "step": 52206
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5175479650497437,
      "learning_rate": 0.0005271457662186136,
      "loss": 3.0711,
      "step": 52207
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5210659503936768,
      "learning_rate": 0.0005271430940864542,
      "loss": 3.0338,
      "step": 52208
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3624541759490967,
      "learning_rate": 0.0005271404219120647,
      "loss": 3.091,
      "step": 52209
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.008521556854248,
      "learning_rate": 0.0005271377496954454,
      "loss": 3.1135,
      "step": 52210
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3235385417938232,
      "learning_rate": 0.000527135077436597,
      "loss": 3.0274,
      "step": 52211
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9943417310714722,
      "learning_rate": 0.0005271324051355199,
      "loss": 2.914,
      "step": 52212
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4892657995224,
      "learning_rate": 0.0005271297327922146,
      "loss": 3.0226,
      "step": 52213
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3739582300186157,
      "learning_rate": 0.0005271270604066815,
      "loss": 3.0893,
      "step": 52214
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.332592248916626,
      "learning_rate": 0.0005271243879789213,
      "loss": 2.944,
      "step": 52215
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.484798789024353,
      "learning_rate": 0.0005271217155089342,
      "loss": 2.9657,
      "step": 52216
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7503314018249512,
      "learning_rate": 0.0005271190429967211,
      "loss": 3.2318,
      "step": 52217
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7856091260910034,
      "learning_rate": 0.0005271163704422821,
      "loss": 3.1589,
      "step": 52218
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.093036413192749,
      "learning_rate": 0.000527113697845618,
      "loss": 3.0477,
      "step": 52219
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.137037992477417,
      "learning_rate": 0.0005271110252067292,
      "loss": 3.1587,
      "step": 52220
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.2953782081604004,
      "learning_rate": 0.0005271083525256161,
      "loss": 3.0508,
      "step": 52221
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1930596828460693,
      "learning_rate": 0.0005271056798022794,
      "loss": 3.1614,
      "step": 52222
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2491061687469482,
      "learning_rate": 0.0005271030070367193,
      "loss": 2.945,
      "step": 52223
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.877687931060791,
      "learning_rate": 0.0005271003342289365,
      "loss": 2.7015,
      "step": 52224
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.890466570854187,
      "learning_rate": 0.0005270976613789316,
      "loss": 2.8702,
      "step": 52225
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.6414690017700195,
      "learning_rate": 0.0005270949884867048,
      "loss": 2.967,
      "step": 52226
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.517632484436035,
      "learning_rate": 0.0005270923155522568,
      "loss": 3.1525,
      "step": 52227
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8123623132705688,
      "learning_rate": 0.000527089642575588,
      "loss": 3.018,
      "step": 52228
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9776707887649536,
      "learning_rate": 0.0005270869695566991,
      "loss": 2.9999,
      "step": 52229
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.35084867477417,
      "learning_rate": 0.0005270842964955903,
      "loss": 2.8571,
      "step": 52230
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1531152725219727,
      "learning_rate": 0.0005270816233922624,
      "loss": 3.0712,
      "step": 52231
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4621632099151611,
      "learning_rate": 0.0005270789502467156,
      "loss": 3.1955,
      "step": 52232
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4804013967514038,
      "learning_rate": 0.0005270762770589506,
      "loss": 3.2042,
      "step": 52233
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.922569990158081,
      "learning_rate": 0.0005270736038289677,
      "loss": 2.7049,
      "step": 52234
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.698874831199646,
      "learning_rate": 0.0005270709305567676,
      "loss": 3.248,
      "step": 52235
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.373552918434143,
      "learning_rate": 0.0005270682572423508,
      "loss": 3.2816,
      "step": 52236
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9588278532028198,
      "learning_rate": 0.0005270655838857176,
      "loss": 3.0541,
      "step": 52237
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4122296571731567,
      "learning_rate": 0.0005270629104868689,
      "loss": 3.1451,
      "step": 52238
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6792352199554443,
      "learning_rate": 0.0005270602370458046,
      "loss": 3.0125,
      "step": 52239
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4950001239776611,
      "learning_rate": 0.0005270575635625257,
      "loss": 2.885,
      "step": 52240
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.45479154586792,
      "learning_rate": 0.0005270548900370325,
      "loss": 3.1145,
      "step": 52241
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2516586780548096,
      "learning_rate": 0.0005270522164693254,
      "loss": 2.9005,
      "step": 52242
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1733036041259766,
      "learning_rate": 0.0005270495428594051,
      "loss": 3.0341,
      "step": 52243
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.881641149520874,
      "learning_rate": 0.0005270468692072719,
      "loss": 3.0766,
      "step": 52244
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.844197392463684,
      "learning_rate": 0.0005270441955129265,
      "loss": 3.1948,
      "step": 52245
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3135881423950195,
      "learning_rate": 0.0005270415217763693,
      "loss": 3.1403,
      "step": 52246
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0796399116516113,
      "learning_rate": 0.0005270388479976008,
      "loss": 3.1514,
      "step": 52247
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.411379098892212,
      "learning_rate": 0.0005270361741766214,
      "loss": 2.9239,
      "step": 52248
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1678497791290283,
      "learning_rate": 0.0005270335003134318,
      "loss": 3.0183,
      "step": 52249
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.387157917022705,
      "learning_rate": 0.0005270308264080323,
      "loss": 3.0839,
      "step": 52250
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.541046142578125,
      "learning_rate": 0.0005270281524604235,
      "loss": 3.0408,
      "step": 52251
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.299835205078125,
      "learning_rate": 0.000527025478470606,
      "loss": 3.0029,
      "step": 52252
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7101643085479736,
      "learning_rate": 0.00052702280443858,
      "loss": 2.9408,
      "step": 52253
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.165468692779541,
      "learning_rate": 0.0005270201303643463,
      "loss": 2.9218,
      "step": 52254
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.810093402862549,
      "learning_rate": 0.0005270174562479053,
      "loss": 2.9816,
      "step": 52255
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1526782512664795,
      "learning_rate": 0.0005270147820892573,
      "loss": 3.1052,
      "step": 52256
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.300868272781372,
      "learning_rate": 0.0005270121078884031,
      "loss": 2.934,
      "step": 52257
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6725363731384277,
      "learning_rate": 0.000527009433645343,
      "loss": 3.0413,
      "step": 52258
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.509143114089966,
      "learning_rate": 0.0005270067593600776,
      "loss": 2.9893,
      "step": 52259
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4894393682479858,
      "learning_rate": 0.0005270040850326073,
      "loss": 3.1349,
      "step": 52260
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.71327805519104,
      "learning_rate": 0.0005270014106629328,
      "loss": 3.0647,
      "step": 52261
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5383281707763672,
      "learning_rate": 0.0005269987362510544,
      "loss": 2.9162,
      "step": 52262
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5877763032913208,
      "learning_rate": 0.0005269960617969725,
      "loss": 3.0157,
      "step": 52263
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3782747983932495,
      "learning_rate": 0.000526993387300688,
      "loss": 3.0114,
      "step": 52264
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0109341144561768,
      "learning_rate": 0.0005269907127622009,
      "loss": 3.0916,
      "step": 52265
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.536795973777771,
      "learning_rate": 0.000526988038181512,
      "loss": 3.0486,
      "step": 52266
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5540255308151245,
      "learning_rate": 0.0005269853635586218,
      "loss": 3.1612,
      "step": 52267
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6855742931365967,
      "learning_rate": 0.0005269826888935308,
      "loss": 2.9071,
      "step": 52268
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4801342487335205,
      "learning_rate": 0.0005269800141862393,
      "loss": 3.0276,
      "step": 52269
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4734914302825928,
      "learning_rate": 0.000526977339436748,
      "loss": 3.0969,
      "step": 52270
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3432873487472534,
      "learning_rate": 0.0005269746646450573,
      "loss": 3.0203,
      "step": 52271
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6684726476669312,
      "learning_rate": 0.0005269719898111678,
      "loss": 3.2198,
      "step": 52272
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7095540761947632,
      "learning_rate": 0.0005269693149350798,
      "loss": 3.1104,
      "step": 52273
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4088709354400635,
      "learning_rate": 0.000526966640016794,
      "loss": 3.2246,
      "step": 52274
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.696306586265564,
      "learning_rate": 0.0005269639650563108,
      "loss": 2.9507,
      "step": 52275
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9491281509399414,
      "learning_rate": 0.0005269612900536307,
      "loss": 3.0335,
      "step": 52276
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5294841527938843,
      "learning_rate": 0.0005269586150087542,
      "loss": 3.2149,
      "step": 52277
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.601723313331604,
      "learning_rate": 0.0005269559399216819,
      "loss": 3.0645,
      "step": 52278
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.074486494064331,
      "learning_rate": 0.000526953264792414,
      "loss": 2.8911,
      "step": 52279
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5650172233581543,
      "learning_rate": 0.0005269505896209514,
      "loss": 3.044,
      "step": 52280
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1750760078430176,
      "learning_rate": 0.0005269479144072944,
      "loss": 3.0817,
      "step": 52281
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1857569217681885,
      "learning_rate": 0.0005269452391514435,
      "loss": 2.7257,
      "step": 52282
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.443292260169983,
      "learning_rate": 0.0005269425638533991,
      "loss": 2.9165,
      "step": 52283
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.2640531063079834,
      "learning_rate": 0.0005269398885131618,
      "loss": 3.0922,
      "step": 52284
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.584524631500244,
      "learning_rate": 0.0005269372131307321,
      "loss": 3.1427,
      "step": 52285
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.6234657764434814,
      "learning_rate": 0.0005269345377061105,
      "loss": 3.2474,
      "step": 52286
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6721992492675781,
      "learning_rate": 0.0005269318622392976,
      "loss": 2.7717,
      "step": 52287
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8094433546066284,
      "learning_rate": 0.0005269291867302936,
      "loss": 2.9611,
      "step": 52288
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.520472764968872,
      "learning_rate": 0.0005269265111790993,
      "loss": 3.2191,
      "step": 52289
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.301337957382202,
      "learning_rate": 0.0005269238355857151,
      "loss": 3.1157,
      "step": 52290
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3555760383605957,
      "learning_rate": 0.0005269211599501413,
      "loss": 3.1613,
      "step": 52291
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.40512216091156,
      "learning_rate": 0.0005269184842723787,
      "loss": 3.105,
      "step": 52292
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.474656581878662,
      "learning_rate": 0.0005269158085524277,
      "loss": 3.042,
      "step": 52293
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.020829916000366,
      "learning_rate": 0.0005269131327902887,
      "loss": 3.293,
      "step": 52294
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3910737037658691,
      "learning_rate": 0.0005269104569859623,
      "loss": 3.3208,
      "step": 52295
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.0739710330963135,
      "learning_rate": 0.000526907781139449,
      "loss": 2.9924,
      "step": 52296
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.6654579639434814,
      "learning_rate": 0.0005269051052507491,
      "loss": 3.057,
      "step": 52297
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.01192045211792,
      "learning_rate": 0.0005269024293198634,
      "loss": 3.0153,
      "step": 52298
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4757009744644165,
      "learning_rate": 0.0005268997533467923,
      "loss": 2.9826,
      "step": 52299
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.8673853874206543,
      "learning_rate": 0.0005268970773315362,
      "loss": 3.1719,
      "step": 52300
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.7051260471343994,
      "learning_rate": 0.0005268944012740956,
      "loss": 3.0249,
      "step": 52301
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4367831945419312,
      "learning_rate": 0.0005268917251744711,
      "loss": 3.2737,
      "step": 52302
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7223528623580933,
      "learning_rate": 0.000526889049032663,
      "loss": 3.0322,
      "step": 52303
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7818201780319214,
      "learning_rate": 0.0005268863728486721,
      "loss": 3.1609,
      "step": 52304
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7999945878982544,
      "learning_rate": 0.0005268836966224987,
      "loss": 3.0344,
      "step": 52305
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5918128490447998,
      "learning_rate": 0.0005268810203541434,
      "loss": 3.1054,
      "step": 52306
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9355592727661133,
      "learning_rate": 0.0005268783440436066,
      "loss": 2.9483,
      "step": 52307
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6144558191299438,
      "learning_rate": 0.0005268756676908888,
      "loss": 3.1181,
      "step": 52308
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3103454113006592,
      "learning_rate": 0.0005268729912959906,
      "loss": 3.2735,
      "step": 52309
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5343326330184937,
      "learning_rate": 0.0005268703148589124,
      "loss": 3.1453,
      "step": 52310
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3541420698165894,
      "learning_rate": 0.0005268676383796546,
      "loss": 3.0728,
      "step": 52311
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.277647852897644,
      "learning_rate": 0.0005268649618582179,
      "loss": 3.0656,
      "step": 52312
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3355597257614136,
      "learning_rate": 0.0005268622852946027,
      "loss": 3.0768,
      "step": 52313
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.150960922241211,
      "learning_rate": 0.0005268596086888096,
      "loss": 2.9241,
      "step": 52314
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.775719165802002,
      "learning_rate": 0.0005268569320408391,
      "loss": 2.8665,
      "step": 52315
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5121140480041504,
      "learning_rate": 0.0005268542553506914,
      "loss": 2.8119,
      "step": 52316
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5711778402328491,
      "learning_rate": 0.0005268515786183674,
      "loss": 3.0191,
      "step": 52317
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8068599700927734,
      "learning_rate": 0.0005268489018438673,
      "loss": 3.2107,
      "step": 52318
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4717634916305542,
      "learning_rate": 0.0005268462250271916,
      "loss": 3.2515,
      "step": 52319
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5290875434875488,
      "learning_rate": 0.000526843548168341,
      "loss": 3.2038,
      "step": 52320
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4171544313430786,
      "learning_rate": 0.0005268408712673161,
      "loss": 2.851,
      "step": 52321
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.052786111831665,
      "learning_rate": 0.000526838194324117,
      "loss": 3.0229,
      "step": 52322
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8179054260253906,
      "learning_rate": 0.0005268355173387443,
      "loss": 3.074,
      "step": 52323
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0672452449798584,
      "learning_rate": 0.0005268328403111987,
      "loss": 2.6892,
      "step": 52324
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7753825187683105,
      "learning_rate": 0.0005268301632414807,
      "loss": 3.081,
      "step": 52325
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.507499098777771,
      "learning_rate": 0.0005268274861295907,
      "loss": 3.0781,
      "step": 52326
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.9304845333099365,
      "learning_rate": 0.000526824808975529,
      "loss": 2.7367,
      "step": 52327
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.637118935585022,
      "learning_rate": 0.0005268221317792964,
      "loss": 2.9594,
      "step": 52328
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6030908823013306,
      "learning_rate": 0.0005268194545408933,
      "loss": 3.0967,
      "step": 52329
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.202705144882202,
      "learning_rate": 0.0005268167772603201,
      "loss": 3.1349,
      "step": 52330
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4490571022033691,
      "learning_rate": 0.0005268140999375774,
      "loss": 2.8693,
      "step": 52331
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.321840763092041,
      "learning_rate": 0.0005268114225726657,
      "loss": 3.0554,
      "step": 52332
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.518884539604187,
      "learning_rate": 0.0005268087451655855,
      "loss": 3.1597,
      "step": 52333
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7430145740509033,
      "learning_rate": 0.0005268060677163373,
      "loss": 3.0008,
      "step": 52334
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4810243844985962,
      "learning_rate": 0.0005268033902249214,
      "loss": 3.1186,
      "step": 52335
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5368963479995728,
      "learning_rate": 0.0005268007126913387,
      "loss": 2.9342,
      "step": 52336
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6323587894439697,
      "learning_rate": 0.0005267980351155893,
      "loss": 2.9956,
      "step": 52337
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6789534091949463,
      "learning_rate": 0.000526795357497674,
      "loss": 3.1371,
      "step": 52338
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.738821029663086,
      "learning_rate": 0.000526792679837593,
      "loss": 2.8531,
      "step": 52339
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7075039148330688,
      "learning_rate": 0.0005267900021353471,
      "loss": 2.9943,
      "step": 52340
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.4505207538604736,
      "learning_rate": 0.0005267873243909366,
      "loss": 3.0529,
      "step": 52341
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4038337469100952,
      "learning_rate": 0.000526784646604362,
      "loss": 3.0534,
      "step": 52342
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.671278476715088,
      "learning_rate": 0.000526781968775624,
      "loss": 2.8567,
      "step": 52343
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4718064069747925,
      "learning_rate": 0.0005267792909047228,
      "loss": 3.0107,
      "step": 52344
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4761595726013184,
      "learning_rate": 0.0005267766129916591,
      "loss": 3.2055,
      "step": 52345
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.024843454360962,
      "learning_rate": 0.0005267739350364333,
      "loss": 2.9094,
      "step": 52346
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4201122522354126,
      "learning_rate": 0.0005267712570390461,
      "loss": 3.0829,
      "step": 52347
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5934339761734009,
      "learning_rate": 0.0005267685789994978,
      "loss": 3.1678,
      "step": 52348
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3161449432373047,
      "learning_rate": 0.0005267659009177888,
      "loss": 2.9967,
      "step": 52349
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8440805673599243,
      "learning_rate": 0.0005267632227939198,
      "loss": 2.9162,
      "step": 52350
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7951672077178955,
      "learning_rate": 0.0005267605446278913,
      "loss": 3.1741,
      "step": 52351
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.732646107673645,
      "learning_rate": 0.0005267578664197038,
      "loss": 3.3269,
      "step": 52352
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5364575386047363,
      "learning_rate": 0.0005267551881693575,
      "loss": 3.2418,
      "step": 52353
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9448260068893433,
      "learning_rate": 0.0005267525098768532,
      "loss": 3.0742,
      "step": 52354
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2424778938293457,
      "learning_rate": 0.0005267498315421914,
      "loss": 3.1322,
      "step": 52355
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5174520015716553,
      "learning_rate": 0.0005267471531653726,
      "loss": 2.8643,
      "step": 52356
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.004826068878174,
      "learning_rate": 0.0005267444747463971,
      "loss": 2.929,
      "step": 52357
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3333656787872314,
      "learning_rate": 0.0005267417962852655,
      "loss": 3.3365,
      "step": 52358
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4753369092941284,
      "learning_rate": 0.0005267391177819784,
      "loss": 3.1307,
      "step": 52359
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4921214580535889,
      "learning_rate": 0.0005267364392365362,
      "loss": 3.0733,
      "step": 52360
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3774056434631348,
      "learning_rate": 0.0005267337606489394,
      "loss": 3.0775,
      "step": 52361
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.761237382888794,
      "learning_rate": 0.0005267310820191887,
      "loss": 3.4116,
      "step": 52362
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.70066237449646,
      "learning_rate": 0.000526728403347284,
      "loss": 2.9665,
      "step": 52363
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.9724950790405273,
      "learning_rate": 0.0005267257246332266,
      "loss": 3.2262,
      "step": 52364
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.205598831176758,
      "learning_rate": 0.0005267230458770164,
      "loss": 3.176,
      "step": 52365
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.544860601425171,
      "learning_rate": 0.0005267203670786542,
      "loss": 3.1453,
      "step": 52366
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.127061605453491,
      "learning_rate": 0.0005267176882381403,
      "loss": 3.2061,
      "step": 52367
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8518950939178467,
      "learning_rate": 0.0005267150093554754,
      "loss": 3.0809,
      "step": 52368
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3095223903656006,
      "learning_rate": 0.0005267123304306599,
      "loss": 2.8982,
      "step": 52369
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.662705421447754,
      "learning_rate": 0.0005267096514636942,
      "loss": 2.894,
      "step": 52370
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4814764261245728,
      "learning_rate": 0.000526706972454579,
      "loss": 2.9392,
      "step": 52371
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.831109046936035,
      "learning_rate": 0.0005267042934033147,
      "loss": 2.8887,
      "step": 52372
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.733391761779785,
      "learning_rate": 0.0005267016143099016,
      "loss": 3.0434,
      "step": 52373
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.608757495880127,
      "learning_rate": 0.0005266989351743407,
      "loss": 3.2142,
      "step": 52374
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7701870203018188,
      "learning_rate": 0.000526696255996632,
      "loss": 3.0181,
      "step": 52375
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.0039525032043457,
      "learning_rate": 0.0005266935767767761,
      "loss": 3.1253,
      "step": 52376
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.57619309425354,
      "learning_rate": 0.0005266908975147738,
      "loss": 3.0636,
      "step": 52377
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.688725233078003,
      "learning_rate": 0.0005266882182106252,
      "loss": 3.2025,
      "step": 52378
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4030578136444092,
      "learning_rate": 0.0005266855388643311,
      "loss": 3.2889,
      "step": 52379
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.802524447441101,
      "learning_rate": 0.0005266828594758918,
      "loss": 3.1808,
      "step": 52380
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.4928197860717773,
      "learning_rate": 0.000526680180045308,
      "loss": 3.0581,
      "step": 52381
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8203235864639282,
      "learning_rate": 0.0005266775005725799,
      "loss": 2.9346,
      "step": 52382
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.924375057220459,
      "learning_rate": 0.0005266748210577083,
      "loss": 2.8965,
      "step": 52383
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9304434061050415,
      "learning_rate": 0.0005266721415006936,
      "loss": 2.9686,
      "step": 52384
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.195314884185791,
      "learning_rate": 0.0005266694619015362,
      "loss": 3.3086,
      "step": 52385
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0764000415802,
      "learning_rate": 0.0005266667822602367,
      "loss": 3.0049,
      "step": 52386
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4317305088043213,
      "learning_rate": 0.0005266641025767955,
      "loss": 3.0233,
      "step": 52387
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.5492682456970215,
      "learning_rate": 0.0005266614228512132,
      "loss": 3.0651,
      "step": 52388
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.105109214782715,
      "learning_rate": 0.0005266587430834903,
      "loss": 3.112,
      "step": 52389
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.575401782989502,
      "learning_rate": 0.0005266560632736272,
      "loss": 2.9775,
      "step": 52390
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6845077276229858,
      "learning_rate": 0.0005266533834216246,
      "loss": 3.0799,
      "step": 52391
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.1680309772491455,
      "learning_rate": 0.0005266507035274827,
      "loss": 3.1701,
      "step": 52392
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9399288892745972,
      "learning_rate": 0.0005266480235912021,
      "loss": 3.0998,
      "step": 52393
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.19179630279541,
      "learning_rate": 0.0005266453436127836,
      "loss": 3.0016,
      "step": 52394
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9880940914154053,
      "learning_rate": 0.0005266426635922273,
      "loss": 3.062,
      "step": 52395
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0716497898101807,
      "learning_rate": 0.0005266399835295338,
      "loss": 2.7513,
      "step": 52396
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.5455262660980225,
      "learning_rate": 0.0005266373034247038,
      "loss": 3.2278,
      "step": 52397
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4606174230575562,
      "learning_rate": 0.0005266346232777375,
      "loss": 3.0334,
      "step": 52398
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3376963138580322,
      "learning_rate": 0.0005266319430886357,
      "loss": 2.8645,
      "step": 52399
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.463458776473999,
      "learning_rate": 0.0005266292628573986,
      "loss": 2.9975,
      "step": 52400
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5365806818008423,
      "learning_rate": 0.0005266265825840268,
      "loss": 2.9277,
      "step": 52401
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7256269454956055,
      "learning_rate": 0.000526623902268521,
      "loss": 2.8387,
      "step": 52402
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.506184697151184,
      "learning_rate": 0.0005266212219108815,
      "loss": 2.871,
      "step": 52403
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5661728382110596,
      "learning_rate": 0.0005266185415111088,
      "loss": 3.1546,
      "step": 52404
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0452892780303955,
      "learning_rate": 0.0005266158610692034,
      "loss": 3.0244,
      "step": 52405
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5869377851486206,
      "learning_rate": 0.0005266131805851658,
      "loss": 3.1463,
      "step": 52406
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0591838359832764,
      "learning_rate": 0.0005266105000589968,
      "loss": 2.8651,
      "step": 52407
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.954506516456604,
      "learning_rate": 0.0005266078194906963,
      "loss": 2.8913,
      "step": 52408
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5819040536880493,
      "learning_rate": 0.0005266051388802653,
      "loss": 3.3341,
      "step": 52409
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8933178186416626,
      "learning_rate": 0.0005266024582277041,
      "loss": 2.6561,
      "step": 52410
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4386098384857178,
      "learning_rate": 0.0005265997775330132,
      "loss": 2.9726,
      "step": 52411
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7285643815994263,
      "learning_rate": 0.0005265970967961932,
      "loss": 3.0374,
      "step": 52412
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.694692850112915,
      "learning_rate": 0.0005265944160172444,
      "loss": 3.335,
      "step": 52413
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5768346786499023,
      "learning_rate": 0.0005265917351961675,
      "loss": 2.9939,
      "step": 52414
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5763764381408691,
      "learning_rate": 0.000526589054332963,
      "loss": 3.26,
      "step": 52415
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.547174334526062,
      "learning_rate": 0.0005265863734276312,
      "loss": 2.9736,
      "step": 52416
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6137423515319824,
      "learning_rate": 0.0005265836924801728,
      "loss": 3.0903,
      "step": 52417
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5421805381774902,
      "learning_rate": 0.000526581011490588,
      "loss": 2.7919,
      "step": 52418
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.594342827796936,
      "learning_rate": 0.0005265783304588778,
      "loss": 2.8043,
      "step": 52419
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6886696815490723,
      "learning_rate": 0.0005265756493850422,
      "loss": 3.0494,
      "step": 52420
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.7439582347869873,
      "learning_rate": 0.000526572968269082,
      "loss": 2.9821,
      "step": 52421
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7436939477920532,
      "learning_rate": 0.0005265702871109976,
      "loss": 3.2159,
      "step": 52422
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6709544658660889,
      "learning_rate": 0.0005265676059107895,
      "loss": 3.2969,
      "step": 52423
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7568105459213257,
      "learning_rate": 0.0005265649246684583,
      "loss": 3.1667,
      "step": 52424
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2480955123901367,
      "learning_rate": 0.0005265622433840042,
      "loss": 2.8672,
      "step": 52425
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.39492666721344,
      "learning_rate": 0.0005265595620574281,
      "loss": 2.9647,
      "step": 52426
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.327849268913269,
      "learning_rate": 0.0005265568806887302,
      "loss": 3.1128,
      "step": 52427
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.8851284980773926,
      "learning_rate": 0.0005265541992779112,
      "loss": 3.0527,
      "step": 52428
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5099023580551147,
      "learning_rate": 0.0005265515178249713,
      "loss": 3.0324,
      "step": 52429
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3813865184783936,
      "learning_rate": 0.0005265488363299113,
      "loss": 2.8909,
      "step": 52430
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8097467422485352,
      "learning_rate": 0.0005265461547927316,
      "loss": 3.0732,
      "step": 52431
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.586646318435669,
      "learning_rate": 0.0005265434732134327,
      "loss": 3.0759,
      "step": 52432
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6983041763305664,
      "learning_rate": 0.0005265407915920151,
      "loss": 2.9881,
      "step": 52433
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.952288031578064,
      "learning_rate": 0.0005265381099284793,
      "loss": 3.1601,
      "step": 52434
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7024880647659302,
      "learning_rate": 0.0005265354282228258,
      "loss": 3.0519,
      "step": 52435
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3852206468582153,
      "learning_rate": 0.000526532746475055,
      "loss": 3.0988,
      "step": 52436
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7534971237182617,
      "learning_rate": 0.0005265300646851676,
      "loss": 2.8298,
      "step": 52437
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6344584226608276,
      "learning_rate": 0.0005265273828531639,
      "loss": 3.1675,
      "step": 52438
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.581925630569458,
      "learning_rate": 0.0005265247009790445,
      "loss": 3.2017,
      "step": 52439
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4307448863983154,
      "learning_rate": 0.0005265220190628099,
      "loss": 3.1277,
      "step": 52440
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6432889699935913,
      "learning_rate": 0.0005265193371044606,
      "loss": 3.127,
      "step": 52441
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.234092950820923,
      "learning_rate": 0.0005265166551039971,
      "loss": 2.9926,
      "step": 52442
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7569645643234253,
      "learning_rate": 0.0005265139730614198,
      "loss": 3.0855,
      "step": 52443
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.728093385696411,
      "learning_rate": 0.0005265112909767293,
      "loss": 2.9802,
      "step": 52444
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.810725450515747,
      "learning_rate": 0.0005265086088499261,
      "loss": 2.9258,
      "step": 52445
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7727758884429932,
      "learning_rate": 0.0005265059266810107,
      "loss": 3.0621,
      "step": 52446
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5914546251296997,
      "learning_rate": 0.0005265032444699834,
      "loss": 3.1597,
      "step": 52447
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7933183908462524,
      "learning_rate": 0.0005265005622168451,
      "loss": 3.2186,
      "step": 52448
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5225234031677246,
      "learning_rate": 0.000526497879921596,
      "loss": 2.7564,
      "step": 52449
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5784001350402832,
      "learning_rate": 0.0005264951975842366,
      "loss": 3.181,
      "step": 52450
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4341676235198975,
      "learning_rate": 0.0005264925152047676,
      "loss": 2.7641,
      "step": 52451
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.543045163154602,
      "learning_rate": 0.0005264898327831892,
      "loss": 2.8934,
      "step": 52452
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.029529094696045,
      "learning_rate": 0.0005264871503195021,
      "loss": 3.1988,
      "step": 52453
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7469745874404907,
      "learning_rate": 0.0005264844678137067,
      "loss": 3.0947,
      "step": 52454
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7019299268722534,
      "learning_rate": 0.0005264817852658037,
      "loss": 3.2417,
      "step": 52455
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.532895803451538,
      "learning_rate": 0.0005264791026757935,
      "loss": 2.7716,
      "step": 52456
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.482862114906311,
      "learning_rate": 0.0005264764200436765,
      "loss": 3.1842,
      "step": 52457
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1092851161956787,
      "learning_rate": 0.0005264737373694532,
      "loss": 3.1653,
      "step": 52458
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6511695384979248,
      "learning_rate": 0.0005264710546531242,
      "loss": 2.568,
      "step": 52459
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1412250995635986,
      "learning_rate": 0.0005264683718946899,
      "loss": 3.063,
      "step": 52460
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6172960996627808,
      "learning_rate": 0.000526465689094151,
      "loss": 3.069,
      "step": 52461
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3114200830459595,
      "learning_rate": 0.0005264630062515078,
      "loss": 3.0573,
      "step": 52462
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8637539148330688,
      "learning_rate": 0.0005264603233667607,
      "loss": 2.9326,
      "step": 52463
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5738030672073364,
      "learning_rate": 0.0005264576404399106,
      "loss": 3.0429,
      "step": 52464
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3123853206634521,
      "learning_rate": 0.0005264549574709576,
      "loss": 2.9397,
      "step": 52465
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4260153770446777,
      "learning_rate": 0.0005264522744599023,
      "loss": 3.2075,
      "step": 52466
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1825952529907227,
      "learning_rate": 0.0005264495914067453,
      "loss": 3.0659,
      "step": 52467
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5188146829605103,
      "learning_rate": 0.0005264469083114871,
      "loss": 3.288,
      "step": 52468
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5201287269592285,
      "learning_rate": 0.0005264442251741281,
      "loss": 3.0232,
      "step": 52469
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3832807540893555,
      "learning_rate": 0.0005264415419946689,
      "loss": 3.2236,
      "step": 52470
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4050617218017578,
      "learning_rate": 0.0005264388587731099,
      "loss": 3.1607,
      "step": 52471
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9353150129318237,
      "learning_rate": 0.0005264361755094516,
      "loss": 3.028,
      "step": 52472
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.354233980178833,
      "learning_rate": 0.0005264334922036947,
      "loss": 2.9955,
      "step": 52473
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3688057661056519,
      "learning_rate": 0.0005264308088558393,
      "loss": 2.9714,
      "step": 52474
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4259036779403687,
      "learning_rate": 0.0005264281254658863,
      "loss": 2.9343,
      "step": 52475
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3658181428909302,
      "learning_rate": 0.0005264254420338359,
      "loss": 3.3724,
      "step": 52476
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4742281436920166,
      "learning_rate": 0.0005264227585596888,
      "loss": 3.1206,
      "step": 52477
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6633466482162476,
      "learning_rate": 0.0005264200750434455,
      "loss": 2.7071,
      "step": 52478
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.420677900314331,
      "learning_rate": 0.0005264173914851064,
      "loss": 3.2949,
      "step": 52479
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.53042733669281,
      "learning_rate": 0.0005264147078846721,
      "loss": 3.0915,
      "step": 52480
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5437805652618408,
      "learning_rate": 0.000526412024242143,
      "loss": 3.1516,
      "step": 52481
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5455024242401123,
      "learning_rate": 0.0005264093405575195,
      "loss": 3.0899,
      "step": 52482
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1139628887176514,
      "learning_rate": 0.0005264066568308024,
      "loss": 3.0429,
      "step": 52483
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4814612865447998,
      "learning_rate": 0.0005264039730619918,
      "loss": 3.1845,
      "step": 52484
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4790889024734497,
      "learning_rate": 0.0005264012892510887,
      "loss": 3.0624,
      "step": 52485
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0534493923187256,
      "learning_rate": 0.0005263986053980932,
      "loss": 3.0665,
      "step": 52486
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.195005178451538,
      "learning_rate": 0.000526395921503006,
      "loss": 2.9304,
      "step": 52487
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7153733968734741,
      "learning_rate": 0.0005263932375658275,
      "loss": 3.0533,
      "step": 52488
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.407808780670166,
      "learning_rate": 0.0005263905535865581,
      "loss": 3.0238,
      "step": 52489
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.9673407077789307,
      "learning_rate": 0.0005263878695651985,
      "loss": 2.9985,
      "step": 52490
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.125227451324463,
      "learning_rate": 0.0005263851855017491,
      "loss": 2.9843,
      "step": 52491
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.4669716358184814,
      "learning_rate": 0.0005263825013962104,
      "loss": 3.0591,
      "step": 52492
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8814054727554321,
      "learning_rate": 0.000526379817248583,
      "loss": 2.708,
      "step": 52493
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0002639293670654,
      "learning_rate": 0.0005263771330588673,
      "loss": 2.8534,
      "step": 52494
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3558748960494995,
      "learning_rate": 0.0005263744488270638,
      "loss": 3.0461,
      "step": 52495
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8166625499725342,
      "learning_rate": 0.000526371764553173,
      "loss": 3.1876,
      "step": 52496
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2095108032226562,
      "learning_rate": 0.0005263690802371954,
      "loss": 3.2819,
      "step": 52497
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3307735919952393,
      "learning_rate": 0.0005263663958791316,
      "loss": 3.1895,
      "step": 52498
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.85435152053833,
      "learning_rate": 0.0005263637114789819,
      "loss": 3.0151,
      "step": 52499
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5543231964111328,
      "learning_rate": 0.000526361027036747,
      "loss": 3.0936,
      "step": 52500
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6898303031921387,
      "learning_rate": 0.0005263583425524273,
      "loss": 2.8955,
      "step": 52501
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5500167608261108,
      "learning_rate": 0.0005263556580260233,
      "loss": 3.4132,
      "step": 52502
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5652786493301392,
      "learning_rate": 0.0005263529734575355,
      "loss": 3.1703,
      "step": 52503
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5385797023773193,
      "learning_rate": 0.0005263502888469644,
      "loss": 2.7129,
      "step": 52504
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4022434949874878,
      "learning_rate": 0.0005263476041943106,
      "loss": 2.9337,
      "step": 52505
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4089241027832031,
      "learning_rate": 0.0005263449194995745,
      "loss": 2.9419,
      "step": 52506
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3174179792404175,
      "learning_rate": 0.0005263422347627565,
      "loss": 2.958,
      "step": 52507
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7262020111083984,
      "learning_rate": 0.0005263395499838572,
      "loss": 2.8001,
      "step": 52508
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5477893352508545,
      "learning_rate": 0.0005263368651628771,
      "loss": 2.9307,
      "step": 52509
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3995158672332764,
      "learning_rate": 0.0005263341802998168,
      "loss": 2.7535,
      "step": 52510
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.634680151939392,
      "learning_rate": 0.0005263314953946767,
      "loss": 2.9908,
      "step": 52511
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.070446014404297,
      "learning_rate": 0.0005263288104474573,
      "loss": 3.118,
      "step": 52512
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.412146806716919,
      "learning_rate": 0.0005263261254581591,
      "loss": 2.9695,
      "step": 52513
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.323024272918701,
      "learning_rate": 0.0005263234404267825,
      "loss": 3.0971,
      "step": 52514
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.636207938194275,
      "learning_rate": 0.0005263207553533281,
      "loss": 3.1196,
      "step": 52515
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.01776123046875,
      "learning_rate": 0.0005263180702377965,
      "loss": 3.0594,
      "step": 52516
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.004584789276123,
      "learning_rate": 0.0005263153850801881,
      "loss": 2.9687,
      "step": 52517
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5358837842941284,
      "learning_rate": 0.0005263126998805033,
      "loss": 3.043,
      "step": 52518
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3454258441925049,
      "learning_rate": 0.0005263100146387428,
      "loss": 2.7937,
      "step": 52519
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.643211841583252,
      "learning_rate": 0.0005263073293549069,
      "loss": 3.1037,
      "step": 52520
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5051395893096924,
      "learning_rate": 0.0005263046440289963,
      "loss": 3.1395,
      "step": 52521
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.540616750717163,
      "learning_rate": 0.0005263019586610113,
      "loss": 2.9448,
      "step": 52522
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.607502818107605,
      "learning_rate": 0.0005262992732509526,
      "loss": 3.0849,
      "step": 52523
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5504555702209473,
      "learning_rate": 0.0005262965877988205,
      "loss": 2.8816,
      "step": 52524
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.983212947845459,
      "learning_rate": 0.0005262939023046156,
      "loss": 2.94,
      "step": 52525
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7644104957580566,
      "learning_rate": 0.0005262912167683385,
      "loss": 2.8597,
      "step": 52526
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7643499374389648,
      "learning_rate": 0.0005262885311899894,
      "loss": 3.2388,
      "step": 52527
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.222970724105835,
      "learning_rate": 0.0005262858455695692,
      "loss": 3.1054,
      "step": 52528
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3928426504135132,
      "learning_rate": 0.000526283159907078,
      "loss": 3.0989,
      "step": 52529
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6520591974258423,
      "learning_rate": 0.0005262804742025166,
      "loss": 2.983,
      "step": 52530
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5615168809890747,
      "learning_rate": 0.0005262777884558854,
      "loss": 3.1659,
      "step": 52531
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.756957769393921,
      "learning_rate": 0.0005262751026671849,
      "loss": 2.8242,
      "step": 52532
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7807832956314087,
      "learning_rate": 0.0005262724168364156,
      "loss": 3.1975,
      "step": 52533
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8366492986679077,
      "learning_rate": 0.0005262697309635779,
      "loss": 2.7219,
      "step": 52534
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1299338340759277,
      "learning_rate": 0.0005262670450486723,
      "loss": 2.9705,
      "step": 52535
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.682927131652832,
      "learning_rate": 0.0005262643590916995,
      "loss": 2.928,
      "step": 52536
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4812229871749878,
      "learning_rate": 0.0005262616730926599,
      "loss": 3.0793,
      "step": 52537
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.182619333267212,
      "learning_rate": 0.000526258987051554,
      "loss": 2.9492,
      "step": 52538
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.298886775970459,
      "learning_rate": 0.0005262563009683823,
      "loss": 2.872,
      "step": 52539
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.628005862236023,
      "learning_rate": 0.0005262536148431452,
      "loss": 3.1749,
      "step": 52540
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4187864065170288,
      "learning_rate": 0.0005262509286758433,
      "loss": 3.1506,
      "step": 52541
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3641420602798462,
      "learning_rate": 0.0005262482424664771,
      "loss": 3.2531,
      "step": 52542
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3305295705795288,
      "learning_rate": 0.0005262455562150472,
      "loss": 2.9683,
      "step": 52543
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4738835096359253,
      "learning_rate": 0.0005262428699215537,
      "loss": 2.8699,
      "step": 52544
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4699221849441528,
      "learning_rate": 0.0005262401835859976,
      "loss": 3.0071,
      "step": 52545
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1869373321533203,
      "learning_rate": 0.0005262374972083791,
      "loss": 2.8825,
      "step": 52546
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8960851430892944,
      "learning_rate": 0.0005262348107886989,
      "loss": 3.0871,
      "step": 52547
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.476830244064331,
      "learning_rate": 0.0005262321243269572,
      "loss": 3.114,
      "step": 52548
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.9396512508392334,
      "learning_rate": 0.0005262294378231547,
      "loss": 2.9902,
      "step": 52549
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.728473663330078,
      "learning_rate": 0.000526226751277292,
      "loss": 2.9862,
      "step": 52550
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7847949266433716,
      "learning_rate": 0.0005262240646893695,
      "loss": 3.0189,
      "step": 52551
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3924827575683594,
      "learning_rate": 0.0005262213780593875,
      "loss": 2.9705,
      "step": 52552
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.1941492557525635,
      "learning_rate": 0.0005262186913873467,
      "loss": 2.9903,
      "step": 52553
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6765772104263306,
      "learning_rate": 0.0005262160046732477,
      "loss": 2.9308,
      "step": 52554
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0190422534942627,
      "learning_rate": 0.0005262133179170909,
      "loss": 3.1546,
      "step": 52555
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.683553457260132,
      "learning_rate": 0.0005262106311188765,
      "loss": 3.0655,
      "step": 52556
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.483953833580017,
      "learning_rate": 0.0005262079442786055,
      "loss": 3.0367,
      "step": 52557
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4093663692474365,
      "learning_rate": 0.0005262052573962782,
      "loss": 2.9787,
      "step": 52558
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.746570348739624,
      "learning_rate": 0.000526202570471895,
      "loss": 2.9716,
      "step": 52559
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.4472832679748535,
      "learning_rate": 0.0005261998835054565,
      "loss": 3.1123,
      "step": 52560
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.081331968307495,
      "learning_rate": 0.0005261971964969631,
      "loss": 3.0648,
      "step": 52561
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4884055852890015,
      "learning_rate": 0.0005261945094464154,
      "loss": 3.1206,
      "step": 52562
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8815799951553345,
      "learning_rate": 0.0005261918223538139,
      "loss": 3.1849,
      "step": 52563
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9090968370437622,
      "learning_rate": 0.000526189135219159,
      "loss": 3.0194,
      "step": 52564
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8412801027297974,
      "learning_rate": 0.0005261864480424514,
      "loss": 3.2798,
      "step": 52565
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5946967601776123,
      "learning_rate": 0.0005261837608236913,
      "loss": 3.2377,
      "step": 52566
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.518477201461792,
      "learning_rate": 0.0005261810735628794,
      "loss": 3.1445,
      "step": 52567
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8129072189331055,
      "learning_rate": 0.0005261783862600164,
      "loss": 3.2239,
      "step": 52568
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.532993197441101,
      "learning_rate": 0.0005261756989151024,
      "loss": 3.0983,
      "step": 52569
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9894847869873047,
      "learning_rate": 0.000526173011528138,
      "loss": 3.1295,
      "step": 52570
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.0099828243255615,
      "learning_rate": 0.0005261703240991239,
      "loss": 2.7645,
      "step": 52571
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.619884967803955,
      "learning_rate": 0.0005261676366280604,
      "loss": 3.0336,
      "step": 52572
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.897627592086792,
      "learning_rate": 0.0005261649491149481,
      "loss": 3.0332,
      "step": 52573
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.131991386413574,
      "learning_rate": 0.0005261622615597874,
      "loss": 3.0701,
      "step": 52574
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3807899951934814,
      "learning_rate": 0.000526159573962579,
      "loss": 3.1019,
      "step": 52575
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9612524509429932,
      "learning_rate": 0.0005261568863233231,
      "loss": 3.0741,
      "step": 52576
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6061944961547852,
      "learning_rate": 0.0005261541986420205,
      "loss": 3.2549,
      "step": 52577
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3918699026107788,
      "learning_rate": 0.0005261515109186715,
      "loss": 3.1621,
      "step": 52578
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6055200099945068,
      "learning_rate": 0.0005261488231532766,
      "loss": 3.0406,
      "step": 52579
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.65422785282135,
      "learning_rate": 0.0005261461353458364,
      "loss": 2.9675,
      "step": 52580
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7965295314788818,
      "learning_rate": 0.0005261434474963515,
      "loss": 2.9753,
      "step": 52581
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7667001485824585,
      "learning_rate": 0.0005261407596048221,
      "loss": 2.981,
      "step": 52582
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5865602493286133,
      "learning_rate": 0.000526138071671249,
      "loss": 3.1233,
      "step": 52583
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.867023229598999,
      "learning_rate": 0.0005261353836956325,
      "loss": 2.9319,
      "step": 52584
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4344439506530762,
      "learning_rate": 0.0005261326956779731,
      "loss": 3.0863,
      "step": 52585
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.6801235675811768,
      "learning_rate": 0.0005261300076182714,
      "loss": 3.1097,
      "step": 52586
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.3741018772125244,
      "learning_rate": 0.0005261273195165279,
      "loss": 3.1796,
      "step": 52587
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.606689453125,
      "learning_rate": 0.000526124631372743,
      "loss": 3.1336,
      "step": 52588
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.8985865116119385,
      "learning_rate": 0.0005261219431869174,
      "loss": 3.1101,
      "step": 52589
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7491676807403564,
      "learning_rate": 0.0005261192549590514,
      "loss": 3.2215,
      "step": 52590
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7743393182754517,
      "learning_rate": 0.0005261165666891456,
      "loss": 3.2625,
      "step": 52591
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.5241889953613281,
      "learning_rate": 0.0005261138783772003,
      "loss": 3.0487,
      "step": 52592
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9288835525512695,
      "learning_rate": 0.0005261111900232162,
      "loss": 3.2324,
      "step": 52593
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.7511818408966064,
      "learning_rate": 0.0005261085016271938,
      "loss": 3.2559,
      "step": 52594
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7842998504638672,
      "learning_rate": 0.0005261058131891337,
      "loss": 3.1038,
      "step": 52595
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.688154697418213,
      "learning_rate": 0.000526103124709036,
      "loss": 3.0166,
      "step": 52596
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.303802967071533,
      "learning_rate": 0.0005261004361869018,
      "loss": 3.2306,
      "step": 52597
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.2811803817749023,
      "learning_rate": 0.000526097747622731,
      "loss": 3.14,
      "step": 52598
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3311713933944702,
      "learning_rate": 0.0005260950590165244,
      "loss": 3.1075,
      "step": 52599
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9464138746261597,
      "learning_rate": 0.0005260923703682825,
      "loss": 3.1048,
      "step": 52600
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.9737132787704468,
      "learning_rate": 0.0005260896816780057,
      "loss": 3.024,
      "step": 52601
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.569305181503296,
      "learning_rate": 0.0005260869929456946,
      "loss": 3.2074,
      "step": 52602
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.4497840404510498,
      "learning_rate": 0.0005260843041713497,
      "loss": 3.2758,
      "step": 52603
    },
    {
      "epoch": 0.68,
      "grad_norm": 3.0351319313049316,
      "learning_rate": 0.0005260816153549714,
      "loss": 3.0786,
      "step": 52604
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7838793992996216,
      "learning_rate": 0.0005260789264965603,
      "loss": 3.0326,
      "step": 52605
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7793259620666504,
      "learning_rate": 0.0005260762375961168,
      "loss": 2.9868,
      "step": 52606
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.3632835149765015,
      "learning_rate": 0.0005260735486536415,
      "loss": 3.1897,
      "step": 52607
    },
    {
      "epoch": 0.68,
      "grad_norm": 2.064824104309082,
      "learning_rate": 0.0005260708596691349,
      "loss": 2.9625,
      "step": 52608
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9418613910675049,
      "learning_rate": 0.0005260681706425974,
      "loss": 3.1086,
      "step": 52609
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6458505392074585,
      "learning_rate": 0.0005260654815740296,
      "loss": 3.1373,
      "step": 52610
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7259266376495361,
      "learning_rate": 0.0005260627924634319,
      "loss": 2.7492,
      "step": 52611
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6690534353256226,
      "learning_rate": 0.000526060103310805,
      "loss": 3.1072,
      "step": 52612
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6111507415771484,
      "learning_rate": 0.0005260574141161492,
      "loss": 2.8825,
      "step": 52613
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9943305253982544,
      "learning_rate": 0.0005260547248794649,
      "loss": 3.0884,
      "step": 52614
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4120802879333496,
      "learning_rate": 0.000526052035600753,
      "loss": 3.2274,
      "step": 52615
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.559495210647583,
      "learning_rate": 0.0005260493462800137,
      "loss": 3.0882,
      "step": 52616
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.476323127746582,
      "learning_rate": 0.0005260466569172474,
      "loss": 3.1222,
      "step": 52617
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5423669815063477,
      "learning_rate": 0.0005260439675124548,
      "loss": 2.9738,
      "step": 52618
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5364296436309814,
      "learning_rate": 0.0005260412780656365,
      "loss": 3.1644,
      "step": 52619
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5391368865966797,
      "learning_rate": 0.0005260385885767927,
      "loss": 2.9937,
      "step": 52620
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.879937767982483,
      "learning_rate": 0.0005260358990459242,
      "loss": 3.1275,
      "step": 52621
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.603885531425476,
      "learning_rate": 0.0005260332094730313,
      "loss": 3.2123,
      "step": 52622
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6485155820846558,
      "learning_rate": 0.0005260305198581146,
      "loss": 3.2379,
      "step": 52623
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4460508823394775,
      "learning_rate": 0.0005260278302011745,
      "loss": 3.2969,
      "step": 52624
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.643324613571167,
      "learning_rate": 0.0005260251405022116,
      "loss": 3.2256,
      "step": 52625
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.132550001144409,
      "learning_rate": 0.0005260224507612263,
      "loss": 3.0644,
      "step": 52626
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6247446537017822,
      "learning_rate": 0.0005260197609782192,
      "loss": 3.0471,
      "step": 52627
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3973779678344727,
      "learning_rate": 0.0005260170711531908,
      "loss": 2.9691,
      "step": 52628
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9246915578842163,
      "learning_rate": 0.0005260143812861415,
      "loss": 3.0106,
      "step": 52629
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2555664777755737,
      "learning_rate": 0.0005260116913770719,
      "loss": 3.1997,
      "step": 52630
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6599828004837036,
      "learning_rate": 0.0005260090014259824,
      "loss": 3.1268,
      "step": 52631
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6065863370895386,
      "learning_rate": 0.0005260063114328736,
      "loss": 3.0267,
      "step": 52632
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6242954730987549,
      "learning_rate": 0.000526003621397746,
      "loss": 3.2811,
      "step": 52633
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9740571975708008,
      "learning_rate": 0.0005260009313206,
      "loss": 3.2779,
      "step": 52634
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5753204822540283,
      "learning_rate": 0.0005259982412014362,
      "loss": 3.0725,
      "step": 52635
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7273025512695312,
      "learning_rate": 0.000525995551040255,
      "loss": 2.8922,
      "step": 52636
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.51384437084198,
      "learning_rate": 0.000525992860837057,
      "loss": 3.1623,
      "step": 52637
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.737974762916565,
      "learning_rate": 0.0005259901705918427,
      "loss": 3.1775,
      "step": 52638
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3152812719345093,
      "learning_rate": 0.0005259874803046125,
      "loss": 2.9505,
      "step": 52639
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8476321697235107,
      "learning_rate": 0.000525984789975367,
      "loss": 3.196,
      "step": 52640
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9966380596160889,
      "learning_rate": 0.0005259820996041067,
      "loss": 3.0439,
      "step": 52641
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5566833019256592,
      "learning_rate": 0.000525979409190832,
      "loss": 3.023,
      "step": 52642
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4052867889404297,
      "learning_rate": 0.0005259767187355435,
      "loss": 2.9919,
      "step": 52643
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3760082721710205,
      "learning_rate": 0.0005259740282382415,
      "loss": 3.0246,
      "step": 52644
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4990181922912598,
      "learning_rate": 0.0005259713376989268,
      "loss": 3.1851,
      "step": 52645
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.730318784713745,
      "learning_rate": 0.0005259686471175998,
      "loss": 2.919,
      "step": 52646
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9626613855361938,
      "learning_rate": 0.0005259659564942611,
      "loss": 2.8565,
      "step": 52647
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.551193356513977,
      "learning_rate": 0.0005259632658289107,
      "loss": 2.9594,
      "step": 52648
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8068900108337402,
      "learning_rate": 0.0005259605751215497,
      "loss": 2.9787,
      "step": 52649
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.76717209815979,
      "learning_rate": 0.0005259578843721783,
      "loss": 3.0558,
      "step": 52650
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.3659722805023193,
      "learning_rate": 0.000525955193580797,
      "loss": 2.9693,
      "step": 52651
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6474581956863403,
      "learning_rate": 0.0005259525027474065,
      "loss": 2.9445,
      "step": 52652
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.342381000518799,
      "learning_rate": 0.0005259498118720072,
      "loss": 2.9328,
      "step": 52653
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.245718240737915,
      "learning_rate": 0.0005259471209545994,
      "loss": 3.1253,
      "step": 52654
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4728046655654907,
      "learning_rate": 0.0005259444299951839,
      "loss": 3.0674,
      "step": 52655
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3402528762817383,
      "learning_rate": 0.000525941738993761,
      "loss": 3.0199,
      "step": 52656
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.1520466804504395,
      "learning_rate": 0.0005259390479503313,
      "loss": 3.2011,
      "step": 52657
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7044029235839844,
      "learning_rate": 0.0005259363568648952,
      "loss": 2.9633,
      "step": 52658
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5683071613311768,
      "learning_rate": 0.0005259336657374533,
      "loss": 3.1267,
      "step": 52659
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.71097993850708,
      "learning_rate": 0.0005259309745680061,
      "loss": 3.0297,
      "step": 52660
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.319532632827759,
      "learning_rate": 0.000525928283356554,
      "loss": 3.0746,
      "step": 52661
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3699283599853516,
      "learning_rate": 0.0005259255921030977,
      "loss": 3.2797,
      "step": 52662
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5107049942016602,
      "learning_rate": 0.0005259229008076374,
      "loss": 3.0576,
      "step": 52663
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.21724271774292,
      "learning_rate": 0.0005259202094701738,
      "loss": 3.2604,
      "step": 52664
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.561988115310669,
      "learning_rate": 0.0005259175180907074,
      "loss": 2.6884,
      "step": 52665
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.526673674583435,
      "learning_rate": 0.0005259148266692386,
      "loss": 3.0571,
      "step": 52666
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.4877750873565674,
      "learning_rate": 0.0005259121352057681,
      "loss": 3.0445,
      "step": 52667
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.301928758621216,
      "learning_rate": 0.0005259094437002963,
      "loss": 3.0846,
      "step": 52668
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.161012887954712,
      "learning_rate": 0.0005259067521528236,
      "loss": 2.8201,
      "step": 52669
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.407305359840393,
      "learning_rate": 0.0005259040605633504,
      "loss": 3.0095,
      "step": 52670
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.402893543243408,
      "learning_rate": 0.0005259013689318776,
      "loss": 3.0934,
      "step": 52671
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7361018657684326,
      "learning_rate": 0.0005258986772584054,
      "loss": 3.2639,
      "step": 52672
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6897931098937988,
      "learning_rate": 0.0005258959855429344,
      "loss": 3.1971,
      "step": 52673
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8529400825500488,
      "learning_rate": 0.0005258932937854651,
      "loss": 3.2353,
      "step": 52674
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6901146173477173,
      "learning_rate": 0.0005258906019859978,
      "loss": 2.8127,
      "step": 52675
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6133610010147095,
      "learning_rate": 0.0005258879101445334,
      "loss": 3.1237,
      "step": 52676
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.69598388671875,
      "learning_rate": 0.000525885218261072,
      "loss": 3.0214,
      "step": 52677
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.9721357822418213,
      "learning_rate": 0.0005258825263356143,
      "loss": 2.9458,
      "step": 52678
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5850043296813965,
      "learning_rate": 0.0005258798343681609,
      "loss": 3.0425,
      "step": 52679
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3663476705551147,
      "learning_rate": 0.000525877142358712,
      "loss": 3.081,
      "step": 52680
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4739506244659424,
      "learning_rate": 0.0005258744503072683,
      "loss": 2.9907,
      "step": 52681
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.575313925743103,
      "learning_rate": 0.0005258717582138305,
      "loss": 2.9865,
      "step": 52682
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.529833197593689,
      "learning_rate": 0.0005258690660783987,
      "loss": 3.1498,
      "step": 52683
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4376754760742188,
      "learning_rate": 0.0005258663739009736,
      "loss": 3.042,
      "step": 52684
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6048141717910767,
      "learning_rate": 0.0005258636816815556,
      "loss": 2.7072,
      "step": 52685
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.700541377067566,
      "learning_rate": 0.0005258609894201454,
      "loss": 2.9705,
      "step": 52686
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8162513971328735,
      "learning_rate": 0.0005258582971167432,
      "loss": 3.2506,
      "step": 52687
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4659737348556519,
      "learning_rate": 0.0005258556047713498,
      "loss": 3.0196,
      "step": 52688
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6952149868011475,
      "learning_rate": 0.0005258529123839656,
      "loss": 3.1081,
      "step": 52689
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2394977807998657,
      "learning_rate": 0.0005258502199545911,
      "loss": 2.9945,
      "step": 52690
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6612821817398071,
      "learning_rate": 0.0005258475274832268,
      "loss": 3.009,
      "step": 52691
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5836622714996338,
      "learning_rate": 0.0005258448349698731,
      "loss": 3.1586,
      "step": 52692
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5424325466156006,
      "learning_rate": 0.0005258421424145306,
      "loss": 3.2592,
      "step": 52693
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6022934913635254,
      "learning_rate": 0.0005258394498171997,
      "loss": 3.0924,
      "step": 52694
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.36734676361084,
      "learning_rate": 0.0005258367571778812,
      "loss": 2.9112,
      "step": 52695
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.548316478729248,
      "learning_rate": 0.0005258340644965753,
      "loss": 2.8652,
      "step": 52696
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4716097116470337,
      "learning_rate": 0.0005258313717732824,
      "loss": 2.8543,
      "step": 52697
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.733396291732788,
      "learning_rate": 0.0005258286790080034,
      "loss": 3.1284,
      "step": 52698
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7358381748199463,
      "learning_rate": 0.0005258259862007384,
      "loss": 3.3726,
      "step": 52699
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7077611684799194,
      "learning_rate": 0.0005258232933514883,
      "loss": 3.0994,
      "step": 52700
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7549550533294678,
      "learning_rate": 0.0005258206004602532,
      "loss": 3.1616,
      "step": 52701
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4270524978637695,
      "learning_rate": 0.0005258179075270339,
      "loss": 3.1242,
      "step": 52702
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.8629069328308105,
      "learning_rate": 0.0005258152145518308,
      "loss": 2.9564,
      "step": 52703
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.486361026763916,
      "learning_rate": 0.0005258125215346443,
      "loss": 2.9917,
      "step": 52704
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.029202699661255,
      "learning_rate": 0.0005258098284754751,
      "loss": 2.8585,
      "step": 52705
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.42036509513855,
      "learning_rate": 0.0005258071353743235,
      "loss": 2.9636,
      "step": 52706
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.568658471107483,
      "learning_rate": 0.0005258044422311901,
      "loss": 3.1635,
      "step": 52707
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6143651008605957,
      "learning_rate": 0.0005258017490460754,
      "loss": 2.8174,
      "step": 52708
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8174161911010742,
      "learning_rate": 0.0005257990558189799,
      "loss": 3.1592,
      "step": 52709
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5874649286270142,
      "learning_rate": 0.0005257963625499042,
      "loss": 2.9575,
      "step": 52710
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3568421602249146,
      "learning_rate": 0.0005257936692388486,
      "loss": 3.1933,
      "step": 52711
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.76981782913208,
      "learning_rate": 0.0005257909758858137,
      "loss": 3.0603,
      "step": 52712
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.0217483043670654,
      "learning_rate": 0.0005257882824908,
      "loss": 3.1083,
      "step": 52713
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3374642133712769,
      "learning_rate": 0.0005257855890538079,
      "loss": 2.9198,
      "step": 52714
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.1642463207244873,
      "learning_rate": 0.0005257828955748381,
      "loss": 2.7082,
      "step": 52715
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.429736852645874,
      "learning_rate": 0.000525780202053891,
      "loss": 3.0299,
      "step": 52716
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8619325160980225,
      "learning_rate": 0.0005257775084909671,
      "loss": 3.0699,
      "step": 52717
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3241530656814575,
      "learning_rate": 0.0005257748148860668,
      "loss": 3.032,
      "step": 52718
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9742668867111206,
      "learning_rate": 0.0005257721212391908,
      "loss": 3.1093,
      "step": 52719
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7824807167053223,
      "learning_rate": 0.0005257694275503395,
      "loss": 3.0653,
      "step": 52720
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4912934303283691,
      "learning_rate": 0.0005257667338195133,
      "loss": 3.1027,
      "step": 52721
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.5828123092651367,
      "learning_rate": 0.0005257640400467129,
      "loss": 2.9466,
      "step": 52722
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.087735652923584,
      "learning_rate": 0.0005257613462319388,
      "loss": 2.994,
      "step": 52723
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.498955249786377,
      "learning_rate": 0.0005257586523751912,
      "loss": 3.1323,
      "step": 52724
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6322981119155884,
      "learning_rate": 0.0005257559584764709,
      "loss": 2.8196,
      "step": 52725
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8976495265960693,
      "learning_rate": 0.0005257532645357783,
      "loss": 2.9291,
      "step": 52726
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.850856304168701,
      "learning_rate": 0.0005257505705531139,
      "loss": 2.9639,
      "step": 52727
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.097947120666504,
      "learning_rate": 0.0005257478765284782,
      "loss": 2.8783,
      "step": 52728
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7358096837997437,
      "learning_rate": 0.0005257451824618717,
      "loss": 2.9656,
      "step": 52729
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.72821307182312,
      "learning_rate": 0.0005257424883532949,
      "loss": 2.8023,
      "step": 52730
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.286079168319702,
      "learning_rate": 0.0005257397942027484,
      "loss": 3.0008,
      "step": 52731
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.8747429847717285,
      "learning_rate": 0.0005257371000102325,
      "loss": 2.95,
      "step": 52732
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6504820585250854,
      "learning_rate": 0.0005257344057757479,
      "loss": 3.1244,
      "step": 52733
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.593318462371826,
      "learning_rate": 0.0005257317114992949,
      "loss": 3.0048,
      "step": 52734
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.034613847732544,
      "learning_rate": 0.0005257290171808743,
      "loss": 3.0091,
      "step": 52735
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8510342836380005,
      "learning_rate": 0.0005257263228204863,
      "loss": 3.0287,
      "step": 52736
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.880782961845398,
      "learning_rate": 0.0005257236284181314,
      "loss": 3.025,
      "step": 52737
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.656407117843628,
      "learning_rate": 0.0005257209339738103,
      "loss": 3.2153,
      "step": 52738
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.274064540863037,
      "learning_rate": 0.0005257182394875236,
      "loss": 3.1997,
      "step": 52739
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.890199899673462,
      "learning_rate": 0.0005257155449592715,
      "loss": 3.0417,
      "step": 52740
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9135762453079224,
      "learning_rate": 0.0005257128503890544,
      "loss": 2.8963,
      "step": 52741
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8803671598434448,
      "learning_rate": 0.0005257101557768733,
      "loss": 3.0189,
      "step": 52742
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2561445236206055,
      "learning_rate": 0.0005257074611227284,
      "loss": 2.8681,
      "step": 52743
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.3165230751037598,
      "learning_rate": 0.0005257047664266201,
      "loss": 2.7938,
      "step": 52744
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3584169149398804,
      "learning_rate": 0.000525702071688549,
      "loss": 2.853,
      "step": 52745
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6065081357955933,
      "learning_rate": 0.0005256993769085157,
      "loss": 3.0899,
      "step": 52746
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.0599164962768555,
      "learning_rate": 0.0005256966820865207,
      "loss": 2.8657,
      "step": 52747
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9899545907974243,
      "learning_rate": 0.0005256939872225644,
      "loss": 3.2254,
      "step": 52748
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.645795226097107,
      "learning_rate": 0.0005256912923166472,
      "loss": 3.1298,
      "step": 52749
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4038106203079224,
      "learning_rate": 0.0005256885973687699,
      "loss": 2.8639,
      "step": 52750
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5115671157836914,
      "learning_rate": 0.0005256859023789327,
      "loss": 2.9861,
      "step": 52751
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.404444932937622,
      "learning_rate": 0.0005256832073471363,
      "loss": 3.0033,
      "step": 52752
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6262476444244385,
      "learning_rate": 0.0005256805122733811,
      "loss": 3.1819,
      "step": 52753
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3573968410491943,
      "learning_rate": 0.0005256778171576676,
      "loss": 3.2788,
      "step": 52754
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8642202615737915,
      "learning_rate": 0.0005256751219999963,
      "loss": 2.8832,
      "step": 52755
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.463785409927368,
      "learning_rate": 0.0005256724268003679,
      "loss": 3.1776,
      "step": 52756
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5136864185333252,
      "learning_rate": 0.0005256697315587826,
      "loss": 3.351,
      "step": 52757
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6026030778884888,
      "learning_rate": 0.0005256670362752411,
      "loss": 3.1561,
      "step": 52758
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5747220516204834,
      "learning_rate": 0.0005256643409497438,
      "loss": 3.0333,
      "step": 52759
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9967526197433472,
      "learning_rate": 0.0005256616455822913,
      "loss": 2.9719,
      "step": 52760
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8803999423980713,
      "learning_rate": 0.000525658950172884,
      "loss": 3.3956,
      "step": 52761
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6734665632247925,
      "learning_rate": 0.0005256562547215224,
      "loss": 3.2534,
      "step": 52762
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5877690315246582,
      "learning_rate": 0.0005256535592282071,
      "loss": 3.022,
      "step": 52763
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.9348080158233643,
      "learning_rate": 0.0005256508636929385,
      "loss": 3.194,
      "step": 52764
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7688117027282715,
      "learning_rate": 0.0005256481681157172,
      "loss": 2.8892,
      "step": 52765
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5139919519424438,
      "learning_rate": 0.0005256454724965436,
      "loss": 3.1135,
      "step": 52766
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8180642127990723,
      "learning_rate": 0.0005256427768354183,
      "loss": 2.9521,
      "step": 52767
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.5923945903778076,
      "learning_rate": 0.0005256400811323417,
      "loss": 3.0411,
      "step": 52768
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3523337841033936,
      "learning_rate": 0.0005256373853873143,
      "loss": 3.0155,
      "step": 52769
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.508544921875,
      "learning_rate": 0.0005256346896003367,
      "loss": 3.0633,
      "step": 52770
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2171757221221924,
      "learning_rate": 0.0005256319937714094,
      "loss": 3.0756,
      "step": 52771
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4788846969604492,
      "learning_rate": 0.0005256292979005328,
      "loss": 2.9933,
      "step": 52772
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5267852544784546,
      "learning_rate": 0.0005256266019877075,
      "loss": 3.2857,
      "step": 52773
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3393665552139282,
      "learning_rate": 0.0005256239060329339,
      "loss": 3.0332,
      "step": 52774
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.653088331222534,
      "learning_rate": 0.0005256212100362126,
      "loss": 3.0111,
      "step": 52775
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.508255124092102,
      "learning_rate": 0.000525618513997544,
      "loss": 2.6953,
      "step": 52776
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6400717496871948,
      "learning_rate": 0.0005256158179169287,
      "loss": 3.1479,
      "step": 52777
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4274510145187378,
      "learning_rate": 0.0005256131217943671,
      "loss": 3.0236,
      "step": 52778
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3695032596588135,
      "learning_rate": 0.0005256104256298598,
      "loss": 3.1443,
      "step": 52779
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7946722507476807,
      "learning_rate": 0.0005256077294234073,
      "loss": 3.0732,
      "step": 52780
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9528758525848389,
      "learning_rate": 0.0005256050331750101,
      "loss": 2.7711,
      "step": 52781
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.065567970275879,
      "learning_rate": 0.0005256023368846686,
      "loss": 3.0189,
      "step": 52782
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8009510040283203,
      "learning_rate": 0.0005255996405523835,
      "loss": 3.2091,
      "step": 52783
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.627490997314453,
      "learning_rate": 0.000525596944178155,
      "loss": 3.0898,
      "step": 52784
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8737690448760986,
      "learning_rate": 0.0005255942477619838,
      "loss": 2.9593,
      "step": 52785
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4968807697296143,
      "learning_rate": 0.0005255915513038703,
      "loss": 3.003,
      "step": 52786
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6616390943527222,
      "learning_rate": 0.0005255888548038153,
      "loss": 3.1307,
      "step": 52787
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9595279693603516,
      "learning_rate": 0.0005255861582618189,
      "loss": 3.1604,
      "step": 52788
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6307756900787354,
      "learning_rate": 0.0005255834616778817,
      "loss": 3.0369,
      "step": 52789
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4391026496887207,
      "learning_rate": 0.0005255807650520045,
      "loss": 3.1709,
      "step": 52790
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5222022533416748,
      "learning_rate": 0.0005255780683841875,
      "loss": 3.4005,
      "step": 52791
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.491180181503296,
      "learning_rate": 0.0005255753716744313,
      "loss": 3.1272,
      "step": 52792
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.510307788848877,
      "learning_rate": 0.0005255726749227362,
      "loss": 2.8894,
      "step": 52793
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9354429244995117,
      "learning_rate": 0.000525569978129103,
      "loss": 2.9625,
      "step": 52794
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3279882669448853,
      "learning_rate": 0.0005255672812935321,
      "loss": 3.3634,
      "step": 52795
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.4091012477874756,
      "learning_rate": 0.000525564584416024,
      "loss": 3.1875,
      "step": 52796
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7045388221740723,
      "learning_rate": 0.0005255618874965792,
      "loss": 2.9592,
      "step": 52797
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.0864791870117188,
      "learning_rate": 0.0005255591905351981,
      "loss": 3.2016,
      "step": 52798
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.158292770385742,
      "learning_rate": 0.0005255564935318813,
      "loss": 3.1343,
      "step": 52799
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9704355001449585,
      "learning_rate": 0.0005255537964866293,
      "loss": 2.9719,
      "step": 52800
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.451037883758545,
      "learning_rate": 0.0005255510993994426,
      "loss": 3.1082,
      "step": 52801
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4509724378585815,
      "learning_rate": 0.0005255484022703218,
      "loss": 3.3543,
      "step": 52802
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.1116135120391846,
      "learning_rate": 0.0005255457050992672,
      "loss": 2.8375,
      "step": 52803
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6352680921554565,
      "learning_rate": 0.0005255430078862794,
      "loss": 3.3092,
      "step": 52804
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7854900360107422,
      "learning_rate": 0.000525540310631359,
      "loss": 2.988,
      "step": 52805
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.522979497909546,
      "learning_rate": 0.0005255376133345062,
      "loss": 2.7534,
      "step": 52806
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2708194255828857,
      "learning_rate": 0.0005255349159957218,
      "loss": 3.0844,
      "step": 52807
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6275293827056885,
      "learning_rate": 0.0005255322186150062,
      "loss": 3.2355,
      "step": 52808
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.552504301071167,
      "learning_rate": 0.0005255295211923599,
      "loss": 3.0225,
      "step": 52809
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.1739895343780518,
      "learning_rate": 0.0005255268237277834,
      "loss": 2.9926,
      "step": 52810
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2769265174865723,
      "learning_rate": 0.0005255241262212772,
      "loss": 3.2089,
      "step": 52811
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.391836166381836,
      "learning_rate": 0.0005255214286728418,
      "loss": 3.0799,
      "step": 52812
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4794074296951294,
      "learning_rate": 0.0005255187310824778,
      "loss": 2.9855,
      "step": 52813
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8864927291870117,
      "learning_rate": 0.0005255160334501855,
      "loss": 3.1,
      "step": 52814
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.219489097595215,
      "learning_rate": 0.0005255133357759655,
      "loss": 2.8573,
      "step": 52815
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.736944317817688,
      "learning_rate": 0.0005255106380598184,
      "loss": 3.1457,
      "step": 52816
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.52077317237854,
      "learning_rate": 0.0005255079403017446,
      "loss": 3.1544,
      "step": 52817
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.6016597747802734,
      "learning_rate": 0.0005255052425017446,
      "loss": 2.9147,
      "step": 52818
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.4055633544921875,
      "learning_rate": 0.0005255025446598189,
      "loss": 3.2469,
      "step": 52819
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6763901710510254,
      "learning_rate": 0.000525499846775968,
      "loss": 2.9271,
      "step": 52820
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5518182516098022,
      "learning_rate": 0.0005254971488501925,
      "loss": 2.9335,
      "step": 52821
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9899307489395142,
      "learning_rate": 0.0005254944508824928,
      "loss": 3.0406,
      "step": 52822
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.1682820320129395,
      "learning_rate": 0.0005254917528728694,
      "loss": 3.1255,
      "step": 52823
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9362826347351074,
      "learning_rate": 0.0005254890548213228,
      "loss": 3.0641,
      "step": 52824
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5332465171813965,
      "learning_rate": 0.0005254863567278536,
      "loss": 3.3467,
      "step": 52825
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.002199411392212,
      "learning_rate": 0.0005254836585924622,
      "loss": 3.2233,
      "step": 52826
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.6769049167633057,
      "learning_rate": 0.0005254809604151491,
      "loss": 2.9245,
      "step": 52827
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2832534313201904,
      "learning_rate": 0.0005254782621959148,
      "loss": 3.0606,
      "step": 52828
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.554214596748352,
      "learning_rate": 0.0005254755639347601,
      "loss": 3.0932,
      "step": 52829
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.104945659637451,
      "learning_rate": 0.000525472865631685,
      "loss": 3.1105,
      "step": 52830
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9194732904434204,
      "learning_rate": 0.0005254701672866902,
      "loss": 3.1031,
      "step": 52831
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8910224437713623,
      "learning_rate": 0.0005254674688997763,
      "loss": 3.052,
      "step": 52832
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7068578004837036,
      "learning_rate": 0.0005254647704709438,
      "loss": 3.0017,
      "step": 52833
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.150355339050293,
      "learning_rate": 0.000525462072000193,
      "loss": 3.0717,
      "step": 52834
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2150230407714844,
      "learning_rate": 0.0005254593734875247,
      "loss": 3.1397,
      "step": 52835
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.017078161239624,
      "learning_rate": 0.0005254566749329392,
      "loss": 2.9934,
      "step": 52836
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.4812636375427246,
      "learning_rate": 0.000525453976336437,
      "loss": 2.7526,
      "step": 52837
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.652989149093628,
      "learning_rate": 0.0005254512776980187,
      "loss": 2.997,
      "step": 52838
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.0347900390625,
      "learning_rate": 0.0005254485790176847,
      "loss": 2.9633,
      "step": 52839
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9296257495880127,
      "learning_rate": 0.0005254458802954356,
      "loss": 3.1214,
      "step": 52840
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.232720136642456,
      "learning_rate": 0.0005254431815312719,
      "loss": 2.9789,
      "step": 52841
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9890525341033936,
      "learning_rate": 0.000525440482725194,
      "loss": 3.309,
      "step": 52842
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.527453064918518,
      "learning_rate": 0.0005254377838772024,
      "loss": 2.839,
      "step": 52843
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5886281728744507,
      "learning_rate": 0.0005254350849872978,
      "loss": 3.1838,
      "step": 52844
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3632129430770874,
      "learning_rate": 0.0005254323860554804,
      "loss": 3.2419,
      "step": 52845
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4983384609222412,
      "learning_rate": 0.000525429687081751,
      "loss": 3.2039,
      "step": 52846
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7119826078414917,
      "learning_rate": 0.0005254269880661099,
      "loss": 2.968,
      "step": 52847
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6357537508010864,
      "learning_rate": 0.0005254242890085577,
      "loss": 2.9762,
      "step": 52848
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5177812576293945,
      "learning_rate": 0.0005254215899090949,
      "loss": 3.0101,
      "step": 52849
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.480135202407837,
      "learning_rate": 0.0005254188907677219,
      "loss": 2.8425,
      "step": 52850
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.52619469165802,
      "learning_rate": 0.0005254161915844393,
      "loss": 3.1225,
      "step": 52851
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.387771487236023,
      "learning_rate": 0.0005254134923592475,
      "loss": 2.91,
      "step": 52852
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.434654712677002,
      "learning_rate": 0.0005254107930921471,
      "loss": 3.1015,
      "step": 52853
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6018742322921753,
      "learning_rate": 0.0005254080937831387,
      "loss": 2.8273,
      "step": 52854
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.860108494758606,
      "learning_rate": 0.0005254053944322225,
      "loss": 3.2774,
      "step": 52855
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8116108179092407,
      "learning_rate": 0.0005254026950393994,
      "loss": 3.1519,
      "step": 52856
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.021660566329956,
      "learning_rate": 0.0005253999956046695,
      "loss": 2.9406,
      "step": 52857
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6873034238815308,
      "learning_rate": 0.0005253972961280336,
      "loss": 2.9035,
      "step": 52858
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6117974519729614,
      "learning_rate": 0.000525394596609492,
      "loss": 3.0163,
      "step": 52859
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.0580894947052,
      "learning_rate": 0.0005253918970490454,
      "loss": 3.2817,
      "step": 52860
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5401045083999634,
      "learning_rate": 0.0005253891974466941,
      "loss": 3.1041,
      "step": 52861
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.665424346923828,
      "learning_rate": 0.0005253864978024387,
      "loss": 3.0735,
      "step": 52862
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.0468051433563232,
      "learning_rate": 0.0005253837981162797,
      "loss": 3.1878,
      "step": 52863
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8222841024398804,
      "learning_rate": 0.0005253810983882176,
      "loss": 2.9712,
      "step": 52864
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9293017387390137,
      "learning_rate": 0.0005253783986182529,
      "loss": 3.0698,
      "step": 52865
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.8514959812164307,
      "learning_rate": 0.0005253756988063861,
      "loss": 3.3539,
      "step": 52866
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.954007863998413,
      "learning_rate": 0.0005253729989526179,
      "loss": 3.0609,
      "step": 52867
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9615819454193115,
      "learning_rate": 0.0005253702990569483,
      "loss": 3.2318,
      "step": 52868
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9151335954666138,
      "learning_rate": 0.0005253675991193782,
      "loss": 3.2106,
      "step": 52869
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.9019439220428467,
      "learning_rate": 0.0005253648991399081,
      "loss": 2.987,
      "step": 52870
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.6591389179229736,
      "learning_rate": 0.0005253621991185384,
      "loss": 3.2369,
      "step": 52871
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.1866815090179443,
      "learning_rate": 0.0005253594990552695,
      "loss": 3.289,
      "step": 52872
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7777208089828491,
      "learning_rate": 0.0005253567989501021,
      "loss": 2.9328,
      "step": 52873
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.5263428688049316,
      "learning_rate": 0.0005253540988030365,
      "loss": 3.0615,
      "step": 52874
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.6432342529296875,
      "learning_rate": 0.0005253513986140735,
      "loss": 3.0681,
      "step": 52875
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4937485456466675,
      "learning_rate": 0.0005253486983832134,
      "loss": 3.127,
      "step": 52876
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4814079999923706,
      "learning_rate": 0.0005253459981104567,
      "loss": 3.0392,
      "step": 52877
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4025533199310303,
      "learning_rate": 0.0005253432977958038,
      "loss": 2.9036,
      "step": 52878
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6819626092910767,
      "learning_rate": 0.0005253405974392554,
      "loss": 3.1665,
      "step": 52879
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5471841096878052,
      "learning_rate": 0.0005253378970408119,
      "loss": 3.1746,
      "step": 52880
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4225480556488037,
      "learning_rate": 0.0005253351966004739,
      "loss": 3.0296,
      "step": 52881
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.0909993648529053,
      "learning_rate": 0.0005253324961182419,
      "loss": 3.0351,
      "step": 52882
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5811717510223389,
      "learning_rate": 0.0005253297955941161,
      "loss": 2.9931,
      "step": 52883
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8428633213043213,
      "learning_rate": 0.0005253270950280975,
      "loss": 2.8541,
      "step": 52884
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3954095840454102,
      "learning_rate": 0.0005253243944201862,
      "loss": 2.9349,
      "step": 52885
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8155807256698608,
      "learning_rate": 0.0005253216937703828,
      "loss": 3.3031,
      "step": 52886
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6557793617248535,
      "learning_rate": 0.000525318993078688,
      "loss": 3.1185,
      "step": 52887
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7898154258728027,
      "learning_rate": 0.000525316292345102,
      "loss": 3.2074,
      "step": 52888
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5780998468399048,
      "learning_rate": 0.0005253135915696254,
      "loss": 3.1855,
      "step": 52889
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.3519580364227295,
      "learning_rate": 0.0005253108907522589,
      "loss": 2.7604,
      "step": 52890
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2296292781829834,
      "learning_rate": 0.0005253081898930028,
      "loss": 3.176,
      "step": 52891
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3970026969909668,
      "learning_rate": 0.0005253054889918576,
      "loss": 3.1823,
      "step": 52892
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3855583667755127,
      "learning_rate": 0.000525302788048824,
      "loss": 3.4535,
      "step": 52893
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.03812837600708,
      "learning_rate": 0.0005253000870639022,
      "loss": 3.0998,
      "step": 52894
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.661522626876831,
      "learning_rate": 0.000525297386037093,
      "loss": 3.0579,
      "step": 52895
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4662593603134155,
      "learning_rate": 0.0005252946849683967,
      "loss": 3.0652,
      "step": 52896
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.5374300479888916,
      "learning_rate": 0.0005252919838578139,
      "loss": 2.9075,
      "step": 52897
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.3790316581726074,
      "learning_rate": 0.000525289282705345,
      "loss": 2.9709,
      "step": 52898
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4352474212646484,
      "learning_rate": 0.0005252865815109905,
      "loss": 3.1717,
      "step": 52899
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7255605459213257,
      "learning_rate": 0.0005252838802747512,
      "loss": 2.9118,
      "step": 52900
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.4285411834716797,
      "learning_rate": 0.0005252811789966271,
      "loss": 3.2927,
      "step": 52901
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9922995567321777,
      "learning_rate": 0.0005252784776766192,
      "loss": 2.9589,
      "step": 52902
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5008633136749268,
      "learning_rate": 0.0005252757763147277,
      "loss": 3.0094,
      "step": 52903
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.0683977603912354,
      "learning_rate": 0.0005252730749109532,
      "loss": 2.7452,
      "step": 52904
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2298097610473633,
      "learning_rate": 0.0005252703734652961,
      "loss": 2.9628,
      "step": 52905
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.224400520324707,
      "learning_rate": 0.0005252676719777571,
      "loss": 3.2169,
      "step": 52906
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7820830345153809,
      "learning_rate": 0.0005252649704483365,
      "loss": 3.0573,
      "step": 52907
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7419593334197998,
      "learning_rate": 0.000525262268877035,
      "loss": 3.1442,
      "step": 52908
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.470226764678955,
      "learning_rate": 0.0005252595672638529,
      "loss": 2.7998,
      "step": 52909
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.2284774780273438,
      "learning_rate": 0.0005252568656087908,
      "loss": 2.9111,
      "step": 52910
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.944744348526001,
      "learning_rate": 0.0005252541639118492,
      "loss": 3.1675,
      "step": 52911
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6805760860443115,
      "learning_rate": 0.0005252514621730286,
      "loss": 3.1448,
      "step": 52912
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.127955675125122,
      "learning_rate": 0.0005252487603923295,
      "loss": 2.8905,
      "step": 52913
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6707762479782104,
      "learning_rate": 0.0005252460585697524,
      "loss": 2.9935,
      "step": 52914
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6547926664352417,
      "learning_rate": 0.0005252433567052978,
      "loss": 3.1133,
      "step": 52915
    },
    {
      "epoch": 0.69,
      "grad_norm": 4.037103176116943,
      "learning_rate": 0.0005252406547989662,
      "loss": 3.1042,
      "step": 52916
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.69183349609375,
      "learning_rate": 0.0005252379528507582,
      "loss": 3.0588,
      "step": 52917
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7882575988769531,
      "learning_rate": 0.0005252352508606741,
      "loss": 2.8625,
      "step": 52918
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.4655628204345703,
      "learning_rate": 0.0005252325488287146,
      "loss": 3.1503,
      "step": 52919
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.3331167697906494,
      "learning_rate": 0.0005252298467548801,
      "loss": 2.587,
      "step": 52920
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.5317091941833496,
      "learning_rate": 0.000525227144639171,
      "loss": 2.9522,
      "step": 52921
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.530570149421692,
      "learning_rate": 0.0005252244424815882,
      "loss": 3.0053,
      "step": 52922
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2086639404296875,
      "learning_rate": 0.0005252217402821317,
      "loss": 2.8868,
      "step": 52923
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.314509868621826,
      "learning_rate": 0.0005252190380408022,
      "loss": 2.9952,
      "step": 52924
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2577790021896362,
      "learning_rate": 0.0005252163357576003,
      "loss": 3.1464,
      "step": 52925
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5818374156951904,
      "learning_rate": 0.0005252136334325264,
      "loss": 3.0507,
      "step": 52926
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4207996129989624,
      "learning_rate": 0.0005252109310655812,
      "loss": 3.3257,
      "step": 52927
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8234801292419434,
      "learning_rate": 0.0005252082286567648,
      "loss": 2.8245,
      "step": 52928
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5743684768676758,
      "learning_rate": 0.0005252055262060781,
      "loss": 3.179,
      "step": 52929
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.868652105331421,
      "learning_rate": 0.0005252028237135214,
      "loss": 3.045,
      "step": 52930
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6932569742202759,
      "learning_rate": 0.0005252001211790953,
      "loss": 3.0372,
      "step": 52931
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5278950929641724,
      "learning_rate": 0.0005251974186028,
      "loss": 3.3983,
      "step": 52932
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.651617169380188,
      "learning_rate": 0.0005251947159846366,
      "loss": 3.097,
      "step": 52933
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4182664155960083,
      "learning_rate": 0.000525192013324605,
      "loss": 3.2712,
      "step": 52934
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.3681349754333496,
      "learning_rate": 0.0005251893106227061,
      "loss": 3.2819,
      "step": 52935
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9302990436553955,
      "learning_rate": 0.0005251866078789402,
      "loss": 2.9879,
      "step": 52936
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.735285758972168,
      "learning_rate": 0.0005251839050933078,
      "loss": 3.0152,
      "step": 52937
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.808802604675293,
      "learning_rate": 0.0005251812022658095,
      "loss": 3.0783,
      "step": 52938
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6392678022384644,
      "learning_rate": 0.0005251784993964458,
      "loss": 2.8313,
      "step": 52939
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7964955568313599,
      "learning_rate": 0.0005251757964852172,
      "loss": 2.8773,
      "step": 52940
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6176375150680542,
      "learning_rate": 0.0005251730935321241,
      "loss": 2.95,
      "step": 52941
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9935414791107178,
      "learning_rate": 0.000525170390537167,
      "loss": 3.3463,
      "step": 52942
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.0277180671691895,
      "learning_rate": 0.0005251676875003466,
      "loss": 3.1422,
      "step": 52943
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6688034534454346,
      "learning_rate": 0.0005251649844216632,
      "loss": 3.1087,
      "step": 52944
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9500430822372437,
      "learning_rate": 0.0005251622813011174,
      "loss": 2.8902,
      "step": 52945
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4500197172164917,
      "learning_rate": 0.0005251595781387097,
      "loss": 3.0154,
      "step": 52946
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6965034008026123,
      "learning_rate": 0.0005251568749344406,
      "loss": 2.9253,
      "step": 52947
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2432610988616943,
      "learning_rate": 0.0005251541716883105,
      "loss": 2.7626,
      "step": 52948
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5015658140182495,
      "learning_rate": 0.0005251514684003201,
      "loss": 3.3448,
      "step": 52949
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4064854383468628,
      "learning_rate": 0.0005251487650704697,
      "loss": 3.0763,
      "step": 52950
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3706731796264648,
      "learning_rate": 0.0005251460616987599,
      "loss": 2.8753,
      "step": 52951
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6954540014266968,
      "learning_rate": 0.0005251433582851914,
      "loss": 2.8832,
      "step": 52952
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6910465955734253,
      "learning_rate": 0.0005251406548297642,
      "loss": 3.0299,
      "step": 52953
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.65743887424469,
      "learning_rate": 0.0005251379513324792,
      "loss": 3.0104,
      "step": 52954
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4902592897415161,
      "learning_rate": 0.0005251352477933369,
      "loss": 3.0361,
      "step": 52955
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.3151512145996094,
      "learning_rate": 0.0005251325442123377,
      "loss": 2.9445,
      "step": 52956
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5359063148498535,
      "learning_rate": 0.0005251298405894821,
      "loss": 2.8706,
      "step": 52957
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.637061357498169,
      "learning_rate": 0.0005251271369247705,
      "loss": 3.166,
      "step": 52958
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4389715194702148,
      "learning_rate": 0.0005251244332182036,
      "loss": 2.9274,
      "step": 52959
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5492160320281982,
      "learning_rate": 0.0005251217294697818,
      "loss": 3.1268,
      "step": 52960
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8577762842178345,
      "learning_rate": 0.0005251190256795056,
      "loss": 3.2372,
      "step": 52961
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7552934885025024,
      "learning_rate": 0.0005251163218473756,
      "loss": 2.9759,
      "step": 52962
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4250407218933105,
      "learning_rate": 0.0005251136179733922,
      "loss": 3.2238,
      "step": 52963
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.805113673210144,
      "learning_rate": 0.0005251109140575558,
      "loss": 3.07,
      "step": 52964
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.5666937828063965,
      "learning_rate": 0.0005251082100998672,
      "loss": 3.1603,
      "step": 52965
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7893428802490234,
      "learning_rate": 0.0005251055061003267,
      "loss": 3.1042,
      "step": 52966
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.512437105178833,
      "learning_rate": 0.0005251028020589348,
      "loss": 3.095,
      "step": 52967
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.466120958328247,
      "learning_rate": 0.000525100097975692,
      "loss": 3.124,
      "step": 52968
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.521559715270996,
      "learning_rate": 0.0005250973938505989,
      "loss": 3.2688,
      "step": 52969
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.428474187850952,
      "learning_rate": 0.0005250946896836559,
      "loss": 2.9701,
      "step": 52970
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.437321424484253,
      "learning_rate": 0.0005250919854748637,
      "loss": 3.229,
      "step": 52971
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7243907451629639,
      "learning_rate": 0.0005250892812242225,
      "loss": 3.1657,
      "step": 52972
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4758633375167847,
      "learning_rate": 0.000525086576931733,
      "loss": 2.8423,
      "step": 52973
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.921471357345581,
      "learning_rate": 0.0005250838725973957,
      "loss": 3.2562,
      "step": 52974
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7556097507476807,
      "learning_rate": 0.0005250811682212109,
      "loss": 2.8606,
      "step": 52975
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4938381910324097,
      "learning_rate": 0.0005250784638031794,
      "loss": 2.7917,
      "step": 52976
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.431082010269165,
      "learning_rate": 0.0005250757593433016,
      "loss": 2.8235,
      "step": 52977
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3960858583450317,
      "learning_rate": 0.0005250730548415778,
      "loss": 2.8203,
      "step": 52978
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4034020900726318,
      "learning_rate": 0.0005250703502980089,
      "loss": 2.8411,
      "step": 52979
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9790467023849487,
      "learning_rate": 0.000525067645712595,
      "loss": 3.015,
      "step": 52980
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.663902759552002,
      "learning_rate": 0.0005250649410853368,
      "loss": 3.0235,
      "step": 52981
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8617775440216064,
      "learning_rate": 0.0005250622364162349,
      "loss": 2.9304,
      "step": 52982
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8963159322738647,
      "learning_rate": 0.0005250595317052895,
      "loss": 3.0031,
      "step": 52983
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5470203161239624,
      "learning_rate": 0.0005250568269525014,
      "loss": 2.8855,
      "step": 52984
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5670406818389893,
      "learning_rate": 0.0005250541221578709,
      "loss": 2.8508,
      "step": 52985
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4473017454147339,
      "learning_rate": 0.0005250514173213987,
      "loss": 2.9133,
      "step": 52986
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7040581703186035,
      "learning_rate": 0.0005250487124430852,
      "loss": 3.2421,
      "step": 52987
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.320704698562622,
      "learning_rate": 0.0005250460075229308,
      "loss": 2.7613,
      "step": 52988
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.776002287864685,
      "learning_rate": 0.0005250433025609362,
      "loss": 3.0384,
      "step": 52989
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.6760923862457275,
      "learning_rate": 0.0005250405975571016,
      "loss": 2.8397,
      "step": 52990
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9500118494033813,
      "learning_rate": 0.0005250378925114279,
      "loss": 3.1421,
      "step": 52991
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.493781566619873,
      "learning_rate": 0.0005250351874239153,
      "loss": 3.0761,
      "step": 52992
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9613481760025024,
      "learning_rate": 0.0005250324822945645,
      "loss": 3.1958,
      "step": 52993
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7238346338272095,
      "learning_rate": 0.0005250297771233758,
      "loss": 2.8222,
      "step": 52994
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2370641231536865,
      "learning_rate": 0.0005250270719103499,
      "loss": 3.1543,
      "step": 52995
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6456493139266968,
      "learning_rate": 0.0005250243666554872,
      "loss": 3.3039,
      "step": 52996
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.5114121437072754,
      "learning_rate": 0.0005250216613587883,
      "loss": 3.0146,
      "step": 52997
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2880258560180664,
      "learning_rate": 0.0005250189560202536,
      "loss": 2.7823,
      "step": 52998
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7036659717559814,
      "learning_rate": 0.0005250162506398835,
      "loss": 3.0542,
      "step": 52999
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4726475477218628,
      "learning_rate": 0.0005250135452176788,
      "loss": 2.8666,
      "step": 53000
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.611211895942688,
      "learning_rate": 0.0005250108397536397,
      "loss": 2.9613,
      "step": 53001
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5844130516052246,
      "learning_rate": 0.0005250081342477669,
      "loss": 3.0352,
      "step": 53002
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.5603153705596924,
      "learning_rate": 0.0005250054287000608,
      "loss": 2.9152,
      "step": 53003
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4199081659317017,
      "learning_rate": 0.000525002723110522,
      "loss": 3.0142,
      "step": 53004
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4616622924804688,
      "learning_rate": 0.0005250000174791509,
      "loss": 3.2191,
      "step": 53005
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4776906967163086,
      "learning_rate": 0.0005249973118059482,
      "loss": 3.0445,
      "step": 53006
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3599119186401367,
      "learning_rate": 0.0005249946060909141,
      "loss": 3.3379,
      "step": 53007
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7705210447311401,
      "learning_rate": 0.0005249919003340492,
      "loss": 3.2026,
      "step": 53008
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2830075025558472,
      "learning_rate": 0.0005249891945353542,
      "loss": 3.1235,
      "step": 53009
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6737775802612305,
      "learning_rate": 0.0005249864886948294,
      "loss": 3.0275,
      "step": 53010
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7369979619979858,
      "learning_rate": 0.0005249837828124753,
      "loss": 3.1444,
      "step": 53011
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4264148473739624,
      "learning_rate": 0.0005249810768882926,
      "loss": 2.8749,
      "step": 53012
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4066956043243408,
      "learning_rate": 0.0005249783709222816,
      "loss": 3.2299,
      "step": 53013
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4179589748382568,
      "learning_rate": 0.0005249756649144428,
      "loss": 3.0067,
      "step": 53014
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5389931201934814,
      "learning_rate": 0.0005249729588647769,
      "loss": 3.3426,
      "step": 53015
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3902008533477783,
      "learning_rate": 0.0005249702527732842,
      "loss": 3.083,
      "step": 53016
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7320719957351685,
      "learning_rate": 0.0005249675466399654,
      "loss": 2.9466,
      "step": 53017
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.748549222946167,
      "learning_rate": 0.0005249648404648208,
      "loss": 2.555,
      "step": 53018
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.4214797019958496,
      "learning_rate": 0.0005249621342478509,
      "loss": 2.8942,
      "step": 53019
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4180763959884644,
      "learning_rate": 0.0005249594279890564,
      "loss": 2.9412,
      "step": 53020
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7092970609664917,
      "learning_rate": 0.0005249567216884378,
      "loss": 2.9516,
      "step": 53021
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.8614277839660645,
      "learning_rate": 0.0005249540153459953,
      "loss": 2.7155,
      "step": 53022
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5643447637557983,
      "learning_rate": 0.0005249513089617297,
      "loss": 2.9437,
      "step": 53023
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.124504327774048,
      "learning_rate": 0.0005249486025356414,
      "loss": 3.1177,
      "step": 53024
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.4079782962799072,
      "learning_rate": 0.0005249458960677308,
      "loss": 3.0341,
      "step": 53025
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5340443849563599,
      "learning_rate": 0.0005249431895579987,
      "loss": 3.1155,
      "step": 53026
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5728063583374023,
      "learning_rate": 0.0005249404830064454,
      "loss": 2.7957,
      "step": 53027
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3667882680892944,
      "learning_rate": 0.0005249377764130713,
      "loss": 2.9842,
      "step": 53028
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4304662942886353,
      "learning_rate": 0.000524935069777877,
      "loss": 3.1164,
      "step": 53029
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.0860848426818848,
      "learning_rate": 0.0005249323631008632,
      "loss": 2.978,
      "step": 53030
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9048222303390503,
      "learning_rate": 0.00052492965638203,
      "loss": 3.1389,
      "step": 53031
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2058472633361816,
      "learning_rate": 0.0005249269496213784,
      "loss": 3.0315,
      "step": 53032
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.0765342712402344,
      "learning_rate": 0.0005249242428189084,
      "loss": 2.9084,
      "step": 53033
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6009151935577393,
      "learning_rate": 0.0005249215359746208,
      "loss": 3.2306,
      "step": 53034
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3259122371673584,
      "learning_rate": 0.000524918829088516,
      "loss": 3.0721,
      "step": 53035
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.05515193939209,
      "learning_rate": 0.0005249161221605946,
      "loss": 3.1068,
      "step": 53036
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9789327383041382,
      "learning_rate": 0.0005249134151908571,
      "loss": 2.8687,
      "step": 53037
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6628391742706299,
      "learning_rate": 0.0005249107081793039,
      "loss": 3.0565,
      "step": 53038
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.3942368030548096,
      "learning_rate": 0.0005249080011259355,
      "loss": 2.981,
      "step": 53039
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8998881578445435,
      "learning_rate": 0.0005249052940307525,
      "loss": 3.1852,
      "step": 53040
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3267391920089722,
      "learning_rate": 0.0005249025868937554,
      "loss": 3.1223,
      "step": 53041
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.478744387626648,
      "learning_rate": 0.0005248998797149447,
      "loss": 3.2554,
      "step": 53042
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.111635684967041,
      "learning_rate": 0.0005248971724943208,
      "loss": 3.2437,
      "step": 53043
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.36665678024292,
      "learning_rate": 0.0005248944652318843,
      "loss": 2.9511,
      "step": 53044
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.260653018951416,
      "learning_rate": 0.0005248917579276357,
      "loss": 3.1421,
      "step": 53045
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.722331166267395,
      "learning_rate": 0.0005248890505815753,
      "loss": 2.9116,
      "step": 53046
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.535563349723816,
      "learning_rate": 0.0005248863431937039,
      "loss": 3.0946,
      "step": 53047
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8298850059509277,
      "learning_rate": 0.0005248836357640219,
      "loss": 3.1489,
      "step": 53048
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3377516269683838,
      "learning_rate": 0.0005248809282925298,
      "loss": 2.9322,
      "step": 53049
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.194530963897705,
      "learning_rate": 0.000524878220779228,
      "loss": 2.88,
      "step": 53050
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3526039123535156,
      "learning_rate": 0.0005248755132241172,
      "loss": 2.8434,
      "step": 53051
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.566920280456543,
      "learning_rate": 0.0005248728056271978,
      "loss": 2.8215,
      "step": 53052
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.05037260055542,
      "learning_rate": 0.0005248700979884703,
      "loss": 2.8735,
      "step": 53053
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6330511569976807,
      "learning_rate": 0.0005248673903079351,
      "loss": 2.9544,
      "step": 53054
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.1865146160125732,
      "learning_rate": 0.0005248646825855929,
      "loss": 2.9932,
      "step": 53055
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9782551527023315,
      "learning_rate": 0.0005248619748214441,
      "loss": 2.9181,
      "step": 53056
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7546944618225098,
      "learning_rate": 0.0005248592670154892,
      "loss": 2.9271,
      "step": 53057
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5826483964920044,
      "learning_rate": 0.0005248565591677287,
      "loss": 3.0828,
      "step": 53058
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8113939762115479,
      "learning_rate": 0.0005248538512781631,
      "loss": 3.3554,
      "step": 53059
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7935633659362793,
      "learning_rate": 0.000524851143346793,
      "loss": 3.2386,
      "step": 53060
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4138833284378052,
      "learning_rate": 0.0005248484353736187,
      "loss": 3.2526,
      "step": 53061
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4149701595306396,
      "learning_rate": 0.000524845727358641,
      "loss": 3.0962,
      "step": 53062
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3834716081619263,
      "learning_rate": 0.0005248430193018601,
      "loss": 3.0977,
      "step": 53063
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9466456174850464,
      "learning_rate": 0.0005248403112032767,
      "loss": 3.0147,
      "step": 53064
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.792512059211731,
      "learning_rate": 0.0005248376030628913,
      "loss": 2.8932,
      "step": 53065
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4466774463653564,
      "learning_rate": 0.0005248348948807042,
      "loss": 3.0606,
      "step": 53066
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.103694438934326,
      "learning_rate": 0.0005248321866567162,
      "loss": 2.9982,
      "step": 53067
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.412468671798706,
      "learning_rate": 0.0005248294783909276,
      "loss": 2.7241,
      "step": 53068
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.826155424118042,
      "learning_rate": 0.0005248267700833388,
      "loss": 2.9483,
      "step": 53069
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9062384366989136,
      "learning_rate": 0.0005248240617339506,
      "loss": 3.0311,
      "step": 53070
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.0099594593048096,
      "learning_rate": 0.0005248213533427634,
      "loss": 3.052,
      "step": 53071
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.2767221927642822,
      "learning_rate": 0.0005248186449097776,
      "loss": 3.1245,
      "step": 53072
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8498024940490723,
      "learning_rate": 0.0005248159364349939,
      "loss": 3.076,
      "step": 53073
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.982971429824829,
      "learning_rate": 0.0005248132279184125,
      "loss": 3.1588,
      "step": 53074
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.955385684967041,
      "learning_rate": 0.0005248105193600343,
      "loss": 3.0495,
      "step": 53075
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.763899326324463,
      "learning_rate": 0.0005248078107598593,
      "loss": 3.1352,
      "step": 53076
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8797856569290161,
      "learning_rate": 0.0005248051021178884,
      "loss": 2.9631,
      "step": 53077
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9328632354736328,
      "learning_rate": 0.0005248023934341221,
      "loss": 2.8858,
      "step": 53078
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.0532681941986084,
      "learning_rate": 0.0005247996847085606,
      "loss": 3.1659,
      "step": 53079
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3612191677093506,
      "learning_rate": 0.0005247969759412047,
      "loss": 3.014,
      "step": 53080
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4033576250076294,
      "learning_rate": 0.0005247942671320548,
      "loss": 3.3753,
      "step": 53081
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5973966121673584,
      "learning_rate": 0.0005247915582811114,
      "loss": 2.9435,
      "step": 53082
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3176333904266357,
      "learning_rate": 0.000524788849388375,
      "loss": 3.0533,
      "step": 53083
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6463960409164429,
      "learning_rate": 0.000524786140453846,
      "loss": 3.0912,
      "step": 53084
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9213966131210327,
      "learning_rate": 0.0005247834314775252,
      "loss": 3.1251,
      "step": 53085
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8029536008834839,
      "learning_rate": 0.0005247807224594128,
      "loss": 2.956,
      "step": 53086
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5118192434310913,
      "learning_rate": 0.0005247780133995094,
      "loss": 3.2268,
      "step": 53087
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8347610235214233,
      "learning_rate": 0.0005247753042978157,
      "loss": 3.0297,
      "step": 53088
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9003428220748901,
      "learning_rate": 0.0005247725951543319,
      "loss": 3.201,
      "step": 53089
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.393172025680542,
      "learning_rate": 0.0005247698859690586,
      "loss": 3.2145,
      "step": 53090
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.3514633178710938,
      "learning_rate": 0.0005247671767419963,
      "loss": 2.8056,
      "step": 53091
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.4739413261413574,
      "learning_rate": 0.0005247644674731456,
      "loss": 3.0585,
      "step": 53092
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6775333881378174,
      "learning_rate": 0.0005247617581625071,
      "loss": 3.0747,
      "step": 53093
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5558050870895386,
      "learning_rate": 0.0005247590488100809,
      "loss": 2.908,
      "step": 53094
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3063477277755737,
      "learning_rate": 0.0005247563394158679,
      "loss": 2.7949,
      "step": 53095
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.401668667793274,
      "learning_rate": 0.0005247536299798684,
      "loss": 3.0156,
      "step": 53096
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.1768722534179688,
      "learning_rate": 0.000524750920502083,
      "loss": 3.0862,
      "step": 53097
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4980555772781372,
      "learning_rate": 0.0005247482109825121,
      "loss": 3.2105,
      "step": 53098
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.50346040725708,
      "learning_rate": 0.0005247455014211563,
      "loss": 2.9733,
      "step": 53099
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7423728704452515,
      "learning_rate": 0.0005247427918180161,
      "loss": 3.1417,
      "step": 53100
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7621973752975464,
      "learning_rate": 0.0005247400821730919,
      "loss": 2.9449,
      "step": 53101
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.451896905899048,
      "learning_rate": 0.0005247373724863843,
      "loss": 2.8238,
      "step": 53102
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5105267763137817,
      "learning_rate": 0.0005247346627578938,
      "loss": 3.0847,
      "step": 53103
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7022361755371094,
      "learning_rate": 0.0005247319529876209,
      "loss": 3.1112,
      "step": 53104
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8614273071289062,
      "learning_rate": 0.000524729243175566,
      "loss": 3.0629,
      "step": 53105
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6755731105804443,
      "learning_rate": 0.0005247265333217298,
      "loss": 3.0061,
      "step": 53106
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2638604640960693,
      "learning_rate": 0.0005247238234261126,
      "loss": 3.0707,
      "step": 53107
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.1188573837280273,
      "learning_rate": 0.0005247211134887151,
      "loss": 3.2622,
      "step": 53108
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.562532901763916,
      "learning_rate": 0.0005247184035095376,
      "loss": 3.0914,
      "step": 53109
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.904137134552002,
      "learning_rate": 0.0005247156934885808,
      "loss": 2.9451,
      "step": 53110
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6686846017837524,
      "learning_rate": 0.0005247129834258451,
      "loss": 2.957,
      "step": 53111
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.613273024559021,
      "learning_rate": 0.0005247102733213309,
      "loss": 3.045,
      "step": 53112
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9607768058776855,
      "learning_rate": 0.000524707563175039,
      "loss": 3.1273,
      "step": 53113
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5123083591461182,
      "learning_rate": 0.0005247048529869697,
      "loss": 3.1372,
      "step": 53114
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9088457822799683,
      "learning_rate": 0.0005247021427571233,
      "loss": 3.0148,
      "step": 53115
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5342662334442139,
      "learning_rate": 0.0005246994324855008,
      "loss": 3.0292,
      "step": 53116
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7365235090255737,
      "learning_rate": 0.0005246967221721022,
      "loss": 3.0621,
      "step": 53117
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7329009771347046,
      "learning_rate": 0.0005246940118169283,
      "loss": 3.3101,
      "step": 53118
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8950841426849365,
      "learning_rate": 0.0005246913014199797,
      "loss": 2.9058,
      "step": 53119
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8250212669372559,
      "learning_rate": 0.0005246885909812566,
      "loss": 3.0799,
      "step": 53120
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7395563125610352,
      "learning_rate": 0.0005246858805007596,
      "loss": 3.0034,
      "step": 53121
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.490476131439209,
      "learning_rate": 0.0005246831699784895,
      "loss": 3.1444,
      "step": 53122
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4780206680297852,
      "learning_rate": 0.0005246804594144463,
      "loss": 3.0187,
      "step": 53123
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4806067943572998,
      "learning_rate": 0.0005246777488086309,
      "loss": 3.1237,
      "step": 53124
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.748484492301941,
      "learning_rate": 0.0005246750381610436,
      "loss": 3.2339,
      "step": 53125
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8053563833236694,
      "learning_rate": 0.000524672327471685,
      "loss": 2.9171,
      "step": 53126
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5447766780853271,
      "learning_rate": 0.0005246696167405555,
      "loss": 2.9931,
      "step": 53127
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7670820951461792,
      "learning_rate": 0.0005246669059676558,
      "loss": 3.0297,
      "step": 53128
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6885775327682495,
      "learning_rate": 0.0005246641951529862,
      "loss": 3.2134,
      "step": 53129
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7445265054702759,
      "learning_rate": 0.0005246614842965474,
      "loss": 2.9973,
      "step": 53130
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.662247657775879,
      "learning_rate": 0.0005246587733983396,
      "loss": 2.8865,
      "step": 53131
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.649592399597168,
      "learning_rate": 0.0005246560624583636,
      "loss": 3.1888,
      "step": 53132
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.643018364906311,
      "learning_rate": 0.0005246533514766199,
      "loss": 2.9566,
      "step": 53133
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5400521755218506,
      "learning_rate": 0.0005246506404531087,
      "loss": 3.3388,
      "step": 53134
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.700152039527893,
      "learning_rate": 0.0005246479293878308,
      "loss": 3.1521,
      "step": 53135
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3850531578063965,
      "learning_rate": 0.0005246452182807866,
      "loss": 2.9127,
      "step": 53136
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7539030313491821,
      "learning_rate": 0.0005246425071319767,
      "loss": 3.152,
      "step": 53137
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5628362894058228,
      "learning_rate": 0.0005246397959414014,
      "loss": 2.9142,
      "step": 53138
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.5017151832580566,
      "learning_rate": 0.0005246370847090614,
      "loss": 2.9318,
      "step": 53139
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.9378719329833984,
      "learning_rate": 0.0005246343734349571,
      "loss": 2.9972,
      "step": 53140
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5944998264312744,
      "learning_rate": 0.000524631662119089,
      "loss": 3.2771,
      "step": 53141
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9995036125183105,
      "learning_rate": 0.0005246289507614576,
      "loss": 3.0996,
      "step": 53142
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.3327722549438477,
      "learning_rate": 0.0005246262393620635,
      "loss": 3.0539,
      "step": 53143
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.173158884048462,
      "learning_rate": 0.0005246235279209072,
      "loss": 3.0929,
      "step": 53144
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4958516359329224,
      "learning_rate": 0.000524620816437989,
      "loss": 3.154,
      "step": 53145
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7156599760055542,
      "learning_rate": 0.0005246181049133096,
      "loss": 3.3057,
      "step": 53146
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.3813085556030273,
      "learning_rate": 0.0005246153933468695,
      "loss": 3.003,
      "step": 53147
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8445169925689697,
      "learning_rate": 0.0005246126817386691,
      "loss": 3.0382,
      "step": 53148
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4939789772033691,
      "learning_rate": 0.000524609970088709,
      "loss": 3.1671,
      "step": 53149
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8240962028503418,
      "learning_rate": 0.0005246072583969897,
      "loss": 3.0246,
      "step": 53150
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.382401943206787,
      "learning_rate": 0.0005246045466635117,
      "loss": 2.979,
      "step": 53151
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.570139765739441,
      "learning_rate": 0.0005246018348882753,
      "loss": 3.064,
      "step": 53152
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.714970350265503,
      "learning_rate": 0.0005245991230712814,
      "loss": 3.1998,
      "step": 53153
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6884301900863647,
      "learning_rate": 0.0005245964112125302,
      "loss": 3.2358,
      "step": 53154
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6702972650527954,
      "learning_rate": 0.0005245936993120222,
      "loss": 3.1212,
      "step": 53155
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6443182229995728,
      "learning_rate": 0.000524590987369758,
      "loss": 3.0328,
      "step": 53156
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4409197568893433,
      "learning_rate": 0.0005245882753857382,
      "loss": 3.0927,
      "step": 53157
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2907795906066895,
      "learning_rate": 0.0005245855633599632,
      "loss": 2.9827,
      "step": 53158
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.723789095878601,
      "learning_rate": 0.0005245828512924335,
      "loss": 3.2455,
      "step": 53159
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6653411388397217,
      "learning_rate": 0.0005245801391831495,
      "loss": 2.8156,
      "step": 53160
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9735209941864014,
      "learning_rate": 0.000524577427032112,
      "loss": 3.0181,
      "step": 53161
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.087193489074707,
      "learning_rate": 0.0005245747148393211,
      "loss": 3.1385,
      "step": 53162
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6900641918182373,
      "learning_rate": 0.0005245720026047776,
      "loss": 3.0431,
      "step": 53163
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8122607469558716,
      "learning_rate": 0.000524569290328482,
      "loss": 3.1839,
      "step": 53164
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6599822044372559,
      "learning_rate": 0.0005245665780104346,
      "loss": 3.1642,
      "step": 53165
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.9614810943603516,
      "learning_rate": 0.0005245638656506362,
      "loss": 2.9134,
      "step": 53166
    },
    {
      "epoch": 0.69,
      "grad_norm": 4.1809401512146,
      "learning_rate": 0.0005245611532490869,
      "loss": 3.0482,
      "step": 53167
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5427838563919067,
      "learning_rate": 0.0005245584408057875,
      "loss": 2.957,
      "step": 53168
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6174365282058716,
      "learning_rate": 0.0005245557283207386,
      "loss": 3.1285,
      "step": 53169
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.0943901538848877,
      "learning_rate": 0.0005245530157939405,
      "loss": 2.9363,
      "step": 53170
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3493432998657227,
      "learning_rate": 0.0005245503032253938,
      "loss": 3.1151,
      "step": 53171
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4824714660644531,
      "learning_rate": 0.0005245475906150989,
      "loss": 2.7917,
      "step": 53172
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3567702770233154,
      "learning_rate": 0.0005245448779630564,
      "loss": 3.1071,
      "step": 53173
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.356431245803833,
      "learning_rate": 0.0005245421652692667,
      "loss": 2.968,
      "step": 53174
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4307594299316406,
      "learning_rate": 0.0005245394525337304,
      "loss": 2.8374,
      "step": 53175
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8559926748275757,
      "learning_rate": 0.000524536739756448,
      "loss": 3.3681,
      "step": 53176
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5661500692367554,
      "learning_rate": 0.00052453402693742,
      "loss": 2.8491,
      "step": 53177
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4983245134353638,
      "learning_rate": 0.0005245313140766468,
      "loss": 2.9557,
      "step": 53178
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5898371934890747,
      "learning_rate": 0.000524528601174129,
      "loss": 3.1178,
      "step": 53179
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7026423215866089,
      "learning_rate": 0.0005245258882298671,
      "loss": 2.9953,
      "step": 53180
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7433537244796753,
      "learning_rate": 0.0005245231752438617,
      "loss": 3.2425,
      "step": 53181
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.767227292060852,
      "learning_rate": 0.0005245204622161131,
      "loss": 3.0651,
      "step": 53182
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.818907618522644,
      "learning_rate": 0.000524517749146622,
      "loss": 3.0993,
      "step": 53183
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.721325159072876,
      "learning_rate": 0.0005245150360353888,
      "loss": 3.036,
      "step": 53184
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4400036334991455,
      "learning_rate": 0.0005245123228824139,
      "loss": 3.1582,
      "step": 53185
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.4962878227233887,
      "learning_rate": 0.000524509609687698,
      "loss": 2.9387,
      "step": 53186
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8341093063354492,
      "learning_rate": 0.0005245068964512416,
      "loss": 3.079,
      "step": 53187
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.469599485397339,
      "learning_rate": 0.000524504183173045,
      "loss": 2.9849,
      "step": 53188
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8027909994125366,
      "learning_rate": 0.0005245014698531088,
      "loss": 3.0352,
      "step": 53189
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6870392560958862,
      "learning_rate": 0.0005244987564914337,
      "loss": 2.8389,
      "step": 53190
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.883237600326538,
      "learning_rate": 0.00052449604308802,
      "loss": 3.0413,
      "step": 53191
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.090886116027832,
      "learning_rate": 0.0005244933296428681,
      "loss": 2.9941,
      "step": 53192
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6223746538162231,
      "learning_rate": 0.0005244906161559788,
      "loss": 3.2343,
      "step": 53193
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8443573713302612,
      "learning_rate": 0.0005244879026273525,
      "loss": 2.931,
      "step": 53194
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4671962261199951,
      "learning_rate": 0.0005244851890569896,
      "loss": 2.9993,
      "step": 53195
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.475762963294983,
      "learning_rate": 0.0005244824754448906,
      "loss": 3.0728,
      "step": 53196
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5951259136199951,
      "learning_rate": 0.0005244797617910561,
      "loss": 3.053,
      "step": 53197
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4715092182159424,
      "learning_rate": 0.0005244770480954867,
      "loss": 3.2676,
      "step": 53198
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.512904405593872,
      "learning_rate": 0.0005244743343581826,
      "loss": 3.287,
      "step": 53199
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.9616122245788574,
      "learning_rate": 0.0005244716205791446,
      "loss": 3.2544,
      "step": 53200
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6237940788269043,
      "learning_rate": 0.0005244689067583731,
      "loss": 2.762,
      "step": 53201
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8514559268951416,
      "learning_rate": 0.0005244661928958686,
      "loss": 2.9108,
      "step": 53202
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4160212278366089,
      "learning_rate": 0.0005244634789916315,
      "loss": 3.0275,
      "step": 53203
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8698524236679077,
      "learning_rate": 0.0005244607650456625,
      "loss": 3.3201,
      "step": 53204
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.743759036064148,
      "learning_rate": 0.0005244580510579619,
      "loss": 2.8468,
      "step": 53205
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6185178756713867,
      "learning_rate": 0.0005244553370285303,
      "loss": 3.213,
      "step": 53206
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.509812593460083,
      "learning_rate": 0.0005244526229573683,
      "loss": 3.0874,
      "step": 53207
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3080683946609497,
      "learning_rate": 0.0005244499088444763,
      "loss": 2.9083,
      "step": 53208
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3681244850158691,
      "learning_rate": 0.0005244471946898549,
      "loss": 2.973,
      "step": 53209
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9753665924072266,
      "learning_rate": 0.0005244444804935044,
      "loss": 3.0584,
      "step": 53210
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7335700988769531,
      "learning_rate": 0.0005244417662554254,
      "loss": 3.1872,
      "step": 53211
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5029840469360352,
      "learning_rate": 0.0005244390519756186,
      "loss": 2.9983,
      "step": 53212
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7426873445510864,
      "learning_rate": 0.0005244363376540842,
      "loss": 3.1261,
      "step": 53213
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.139613389968872,
      "learning_rate": 0.000524433623290823,
      "loss": 3.0783,
      "step": 53214
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.492653250694275,
      "learning_rate": 0.0005244309088858352,
      "loss": 3.1174,
      "step": 53215
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.919974684715271,
      "learning_rate": 0.0005244281944391216,
      "loss": 2.9113,
      "step": 53216
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5803489685058594,
      "learning_rate": 0.0005244254799506824,
      "loss": 3.0359,
      "step": 53217
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6477940082550049,
      "learning_rate": 0.0005244227654205184,
      "loss": 3.0315,
      "step": 53218
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2398922443389893,
      "learning_rate": 0.0005244200508486299,
      "loss": 3.0101,
      "step": 53219
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.0187582969665527,
      "learning_rate": 0.0005244173362350176,
      "loss": 2.9007,
      "step": 53220
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6532858610153198,
      "learning_rate": 0.0005244146215796817,
      "loss": 3.1316,
      "step": 53221
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6211944818496704,
      "learning_rate": 0.0005244119068826229,
      "loss": 3.2392,
      "step": 53222
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.403989553451538,
      "learning_rate": 0.0005244091921438418,
      "loss": 2.945,
      "step": 53223
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5087890625,
      "learning_rate": 0.0005244064773633388,
      "loss": 2.8861,
      "step": 53224
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3552237749099731,
      "learning_rate": 0.0005244037625411145,
      "loss": 3.2101,
      "step": 53225
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8242428302764893,
      "learning_rate": 0.000524401047677169,
      "loss": 3.45,
      "step": 53226
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.3121914863586426,
      "learning_rate": 0.0005243983327715033,
      "loss": 3.0578,
      "step": 53227
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8511924743652344,
      "learning_rate": 0.0005243956178241177,
      "loss": 3.1518,
      "step": 53228
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.682428240776062,
      "learning_rate": 0.0005243929028350127,
      "loss": 3.041,
      "step": 53229
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8571316003799438,
      "learning_rate": 0.0005243901878041888,
      "loss": 3.2086,
      "step": 53230
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6483888626098633,
      "learning_rate": 0.0005243874727316465,
      "loss": 2.8695,
      "step": 53231
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5510791540145874,
      "learning_rate": 0.0005243847576173865,
      "loss": 3.163,
      "step": 53232
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3719937801361084,
      "learning_rate": 0.0005243820424614091,
      "loss": 3.1793,
      "step": 53233
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.192668914794922,
      "learning_rate": 0.0005243793272637147,
      "loss": 3.1934,
      "step": 53234
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3503377437591553,
      "learning_rate": 0.0005243766120243041,
      "loss": 3.3551,
      "step": 53235
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5046225786209106,
      "learning_rate": 0.0005243738967431774,
      "loss": 3.1082,
      "step": 53236
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.1917150020599365,
      "learning_rate": 0.0005243711814203356,
      "loss": 3.0239,
      "step": 53237
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2755316495895386,
      "learning_rate": 0.0005243684660557788,
      "loss": 3.0013,
      "step": 53238
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6132603883743286,
      "learning_rate": 0.0005243657506495078,
      "loss": 3.2599,
      "step": 53239
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.0352461338043213,
      "learning_rate": 0.0005243630352015229,
      "loss": 3.1708,
      "step": 53240
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.267533302307129,
      "learning_rate": 0.0005243603197118248,
      "loss": 3.1738,
      "step": 53241
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6416027545928955,
      "learning_rate": 0.0005243576041804137,
      "loss": 2.8863,
      "step": 53242
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.1227405071258545,
      "learning_rate": 0.0005243548886072904,
      "loss": 3.0732,
      "step": 53243
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8591543436050415,
      "learning_rate": 0.0005243521729924553,
      "loss": 2.9149,
      "step": 53244
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.151256799697876,
      "learning_rate": 0.0005243494573359089,
      "loss": 3.2398,
      "step": 53245
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.1152291297912598,
      "learning_rate": 0.0005243467416376517,
      "loss": 3.0455,
      "step": 53246
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.1973856687545776,
      "learning_rate": 0.000524344025897684,
      "loss": 3.1728,
      "step": 53247
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8709540367126465,
      "learning_rate": 0.0005243413101160068,
      "loss": 3.1994,
      "step": 53248
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8022770881652832,
      "learning_rate": 0.0005243385942926202,
      "loss": 3.1573,
      "step": 53249
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5609416961669922,
      "learning_rate": 0.0005243358784275247,
      "loss": 3.0262,
      "step": 53250
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.873440146446228,
      "learning_rate": 0.0005243331625207211,
      "loss": 3.0328,
      "step": 53251
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4664252996444702,
      "learning_rate": 0.0005243304465722097,
      "loss": 3.3201,
      "step": 53252
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6536401510238647,
      "learning_rate": 0.000524327730581991,
      "loss": 2.9861,
      "step": 53253
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5987625122070312,
      "learning_rate": 0.0005243250145500655,
      "loss": 2.9284,
      "step": 53254
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9089198112487793,
      "learning_rate": 0.0005243222984764339,
      "loss": 3.0529,
      "step": 53255
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4388595819473267,
      "learning_rate": 0.0005243195823610964,
      "loss": 3.148,
      "step": 53256
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5686396360397339,
      "learning_rate": 0.0005243168662040537,
      "loss": 3.2518,
      "step": 53257
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.48259699344635,
      "learning_rate": 0.0005243141500053063,
      "loss": 2.9723,
      "step": 53258
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7828675508499146,
      "learning_rate": 0.0005243114337648545,
      "loss": 2.9973,
      "step": 53259
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5477957725524902,
      "learning_rate": 0.0005243087174826991,
      "loss": 3.1826,
      "step": 53260
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7172762155532837,
      "learning_rate": 0.0005243060011588406,
      "loss": 3.2599,
      "step": 53261
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.893492341041565,
      "learning_rate": 0.0005243032847932792,
      "loss": 3.0092,
      "step": 53262
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.954831600189209,
      "learning_rate": 0.0005243005683860157,
      "loss": 3.1982,
      "step": 53263
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6958990097045898,
      "learning_rate": 0.0005242978519370503,
      "loss": 2.9633,
      "step": 53264
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3074997663497925,
      "learning_rate": 0.0005242951354463839,
      "loss": 3.2267,
      "step": 53265
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2633708715438843,
      "learning_rate": 0.0005242924189140166,
      "loss": 2.8019,
      "step": 53266
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.531245231628418,
      "learning_rate": 0.0005242897023399493,
      "loss": 3.0035,
      "step": 53267
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4512664079666138,
      "learning_rate": 0.0005242869857241822,
      "loss": 3.1582,
      "step": 53268
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5557862520217896,
      "learning_rate": 0.0005242842690667159,
      "loss": 2.93,
      "step": 53269
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8946974277496338,
      "learning_rate": 0.000524281552367551,
      "loss": 3.1371,
      "step": 53270
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.5364081859588623,
      "learning_rate": 0.000524278835626688,
      "loss": 2.9156,
      "step": 53271
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7653648853302002,
      "learning_rate": 0.0005242761188441272,
      "loss": 3.0583,
      "step": 53272
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.0039432048797607,
      "learning_rate": 0.0005242734020198692,
      "loss": 3.2092,
      "step": 53273
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.065127372741699,
      "learning_rate": 0.0005242706851539147,
      "loss": 3.0164,
      "step": 53274
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6980652809143066,
      "learning_rate": 0.0005242679682462639,
      "loss": 3.027,
      "step": 53275
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.435592770576477,
      "learning_rate": 0.0005242652512969175,
      "loss": 3.1237,
      "step": 53276
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.064399003982544,
      "learning_rate": 0.000524262534305876,
      "loss": 2.7366,
      "step": 53277
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.65782630443573,
      "learning_rate": 0.0005242598172731397,
      "loss": 2.8522,
      "step": 53278
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5089433193206787,
      "learning_rate": 0.0005242571001987094,
      "loss": 2.9486,
      "step": 53279
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.2286067008972168,
      "learning_rate": 0.0005242543830825855,
      "loss": 2.9629,
      "step": 53280
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8663560152053833,
      "learning_rate": 0.0005242516659247686,
      "loss": 2.7913,
      "step": 53281
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5661990642547607,
      "learning_rate": 0.0005242489487252588,
      "loss": 3.2392,
      "step": 53282
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5029473304748535,
      "learning_rate": 0.000524246231484057,
      "loss": 3.1374,
      "step": 53283
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.650805950164795,
      "learning_rate": 0.0005242435142011636,
      "loss": 3.1017,
      "step": 53284
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2698631286621094,
      "learning_rate": 0.0005242407968765791,
      "loss": 3.1757,
      "step": 53285
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4779980182647705,
      "learning_rate": 0.0005242380795103039,
      "loss": 3.0616,
      "step": 53286
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4602597951889038,
      "learning_rate": 0.0005242353621023387,
      "loss": 3.0496,
      "step": 53287
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.922928810119629,
      "learning_rate": 0.000524232644652684,
      "loss": 2.9884,
      "step": 53288
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.887395977973938,
      "learning_rate": 0.0005242299271613402,
      "loss": 3.1095,
      "step": 53289
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.685390830039978,
      "learning_rate": 0.0005242272096283077,
      "loss": 3.2246,
      "step": 53290
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6223286390304565,
      "learning_rate": 0.0005242244920535872,
      "loss": 3.2102,
      "step": 53291
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4462822675704956,
      "learning_rate": 0.0005242217744371792,
      "loss": 3.0163,
      "step": 53292
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6862558126449585,
      "learning_rate": 0.000524219056779084,
      "loss": 2.9035,
      "step": 53293
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5194289684295654,
      "learning_rate": 0.0005242163390793024,
      "loss": 3.0478,
      "step": 53294
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.173804521560669,
      "learning_rate": 0.0005242136213378346,
      "loss": 2.8805,
      "step": 53295
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.713881015777588,
      "learning_rate": 0.0005242109035546813,
      "loss": 3.1072,
      "step": 53296
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7331931591033936,
      "learning_rate": 0.000524208185729843,
      "loss": 3.0351,
      "step": 53297
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.661062002182007,
      "learning_rate": 0.0005242054678633202,
      "loss": 3.1891,
      "step": 53298
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.064753770828247,
      "learning_rate": 0.0005242027499551133,
      "loss": 3.2442,
      "step": 53299
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7511533498764038,
      "learning_rate": 0.0005242000320052229,
      "loss": 3.1497,
      "step": 53300
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8441414833068848,
      "learning_rate": 0.0005241973140136494,
      "loss": 3.0197,
      "step": 53301
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4550840854644775,
      "learning_rate": 0.0005241945959803935,
      "loss": 3.167,
      "step": 53302
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5459117889404297,
      "learning_rate": 0.0005241918779054555,
      "loss": 3.0782,
      "step": 53303
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6331815719604492,
      "learning_rate": 0.000524189159788836,
      "loss": 3.0906,
      "step": 53304
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.7245099544525146,
      "learning_rate": 0.0005241864416305357,
      "loss": 3.1073,
      "step": 53305
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5621057748794556,
      "learning_rate": 0.0005241837234305547,
      "loss": 2.6806,
      "step": 53306
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5538967847824097,
      "learning_rate": 0.0005241810051888938,
      "loss": 3.1867,
      "step": 53307
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8153330087661743,
      "learning_rate": 0.0005241782869055534,
      "loss": 3.1186,
      "step": 53308
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3893005847930908,
      "learning_rate": 0.000524175568580534,
      "loss": 2.9808,
      "step": 53309
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.940286636352539,
      "learning_rate": 0.0005241728502138361,
      "loss": 2.834,
      "step": 53310
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6916999816894531,
      "learning_rate": 0.0005241701318054603,
      "loss": 3.3669,
      "step": 53311
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4679781198501587,
      "learning_rate": 0.0005241674133554071,
      "loss": 2.8917,
      "step": 53312
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3681641817092896,
      "learning_rate": 0.0005241646948636768,
      "loss": 2.8867,
      "step": 53313
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.024803400039673,
      "learning_rate": 0.0005241619763302702,
      "loss": 2.6598,
      "step": 53314
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8984664678573608,
      "learning_rate": 0.0005241592577551876,
      "loss": 3.0464,
      "step": 53315
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.407200813293457,
      "learning_rate": 0.0005241565391384296,
      "loss": 2.9018,
      "step": 53316
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3642091751098633,
      "learning_rate": 0.0005241538204799965,
      "loss": 3.355,
      "step": 53317
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.4119012355804443,
      "learning_rate": 0.0005241511017798891,
      "loss": 3.1482,
      "step": 53318
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.791308045387268,
      "learning_rate": 0.0005241483830381077,
      "loss": 3.1806,
      "step": 53319
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.849484443664551,
      "learning_rate": 0.0005241456642546531,
      "loss": 3.0336,
      "step": 53320
    },
    {
      "epoch": 0.69,
      "grad_norm": 3.6092140674591064,
      "learning_rate": 0.0005241429454295254,
      "loss": 2.9948,
      "step": 53321
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.674148678779602,
      "learning_rate": 0.0005241402265627253,
      "loss": 2.8999,
      "step": 53322
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7787916660308838,
      "learning_rate": 0.0005241375076542533,
      "loss": 3.1378,
      "step": 53323
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4846513271331787,
      "learning_rate": 0.00052413478870411,
      "loss": 3.2373,
      "step": 53324
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3252475261688232,
      "learning_rate": 0.0005241320697122957,
      "loss": 2.8434,
      "step": 53325
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8466010093688965,
      "learning_rate": 0.0005241293506788112,
      "loss": 3.0904,
      "step": 53326
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6022955179214478,
      "learning_rate": 0.0005241266316036566,
      "loss": 3.0957,
      "step": 53327
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.970164179801941,
      "learning_rate": 0.0005241239124868327,
      "loss": 2.8487,
      "step": 53328
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.6139981746673584,
      "learning_rate": 0.00052412119332834,
      "loss": 3.0225,
      "step": 53329
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5657777786254883,
      "learning_rate": 0.0005241184741281788,
      "loss": 3.0626,
      "step": 53330
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.522321343421936,
      "learning_rate": 0.00052411575488635,
      "loss": 3.4717,
      "step": 53331
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5077764987945557,
      "learning_rate": 0.0005241130356028536,
      "loss": 3.1637,
      "step": 53332
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.525312662124634,
      "learning_rate": 0.0005241103162776905,
      "loss": 2.8592,
      "step": 53333
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4790970087051392,
      "learning_rate": 0.000524107596910861,
      "loss": 3.1988,
      "step": 53334
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6392343044281006,
      "learning_rate": 0.0005241048775023656,
      "loss": 2.9029,
      "step": 53335
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.937767505645752,
      "learning_rate": 0.000524102158052205,
      "loss": 2.9905,
      "step": 53336
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.2923035621643066,
      "learning_rate": 0.0005240994385603796,
      "loss": 2.9094,
      "step": 53337
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7177042961120605,
      "learning_rate": 0.0005240967190268898,
      "loss": 2.7968,
      "step": 53338
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.4979445934295654,
      "learning_rate": 0.0005240939994517362,
      "loss": 3.2357,
      "step": 53339
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.3727521896362305,
      "learning_rate": 0.0005240912798349193,
      "loss": 3.2832,
      "step": 53340
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5195939540863037,
      "learning_rate": 0.0005240885601764397,
      "loss": 3.0076,
      "step": 53341
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4463531970977783,
      "learning_rate": 0.0005240858404762976,
      "loss": 3.2019,
      "step": 53342
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7337360382080078,
      "learning_rate": 0.0005240831207344939,
      "loss": 3.0479,
      "step": 53343
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.403213381767273,
      "learning_rate": 0.0005240804009510288,
      "loss": 3.0959,
      "step": 53344
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.27969491481781,
      "learning_rate": 0.0005240776811259029,
      "loss": 2.7225,
      "step": 53345
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.4832370281219482,
      "learning_rate": 0.0005240749612591168,
      "loss": 3.0307,
      "step": 53346
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8098676204681396,
      "learning_rate": 0.0005240722413506709,
      "loss": 3.0471,
      "step": 53347
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5076977014541626,
      "learning_rate": 0.0005240695214005658,
      "loss": 3.0206,
      "step": 53348
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7020392417907715,
      "learning_rate": 0.000524066801408802,
      "loss": 3.0846,
      "step": 53349
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.0953164100646973,
      "learning_rate": 0.0005240640813753798,
      "loss": 2.8124,
      "step": 53350
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3155913352966309,
      "learning_rate": 0.0005240613613002999,
      "loss": 2.9298,
      "step": 53351
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.901980996131897,
      "learning_rate": 0.0005240586411835628,
      "loss": 2.9135,
      "step": 53352
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.0601327419281006,
      "learning_rate": 0.0005240559210251689,
      "loss": 2.93,
      "step": 53353
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4233144521713257,
      "learning_rate": 0.0005240532008251189,
      "loss": 3.0078,
      "step": 53354
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.786328911781311,
      "learning_rate": 0.0005240504805834131,
      "loss": 3.1682,
      "step": 53355
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9249944686889648,
      "learning_rate": 0.0005240477603000521,
      "loss": 3.0951,
      "step": 53356
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.677053451538086,
      "learning_rate": 0.0005240450399750364,
      "loss": 2.7616,
      "step": 53357
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.3449395895004272,
      "learning_rate": 0.0005240423196083665,
      "loss": 2.9948,
      "step": 53358
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6269339323043823,
      "learning_rate": 0.0005240395992000428,
      "loss": 2.9106,
      "step": 53359
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.9086860418319702,
      "learning_rate": 0.0005240368787500661,
      "loss": 2.9191,
      "step": 53360
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7609922885894775,
      "learning_rate": 0.0005240341582584367,
      "loss": 3.0096,
      "step": 53361
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.4213652610778809,
      "learning_rate": 0.000524031437725155,
      "loss": 2.8721,
      "step": 53362
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.858319640159607,
      "learning_rate": 0.0005240287171502215,
      "loss": 3.0809,
      "step": 53363
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.5815974473953247,
      "learning_rate": 0.0005240259965336371,
      "loss": 3.0977,
      "step": 53364
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7781800031661987,
      "learning_rate": 0.0005240232758754019,
      "loss": 2.8949,
      "step": 53365
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.8113099336624146,
      "learning_rate": 0.0005240205551755166,
      "loss": 2.9272,
      "step": 53366
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.6618897914886475,
      "learning_rate": 0.0005240178344339816,
      "loss": 3.2175,
      "step": 53367
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7052994966506958,
      "learning_rate": 0.0005240151136507975,
      "loss": 3.0073,
      "step": 53368
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7595345973968506,
      "learning_rate": 0.0005240123928259648,
      "loss": 3.195,
      "step": 53369
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.462748646736145,
      "learning_rate": 0.0005240096719594839,
      "loss": 3.0382,
      "step": 53370
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.653120756149292,
      "learning_rate": 0.0005240069510513553,
      "loss": 3.0651,
      "step": 53371
    },
    {
      "epoch": 0.69,
      "grad_norm": 2.035659074783325,
      "learning_rate": 0.0005240042301015797,
      "loss": 2.9398,
      "step": 53372
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.7084722518920898,
      "learning_rate": 0.0005240015091101575,
      "loss": 3.1588,
      "step": 53373
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.372918725013733,
      "learning_rate": 0.0005239987880770891,
      "loss": 3.0383,
      "step": 53374
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.431951880455017,
      "learning_rate": 0.0005239960670023751,
      "loss": 3.1084,
      "step": 53375
    },
    {
      "epoch": 0.69,
      "grad_norm": 1.6659914255142212,
      "learning_rate": 0.0005239933458860162,
      "loss": 2.9849,
      "step": 53376
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.386627197265625,
      "learning_rate": 0.0005239906247280125,
      "loss": 3.0328,
      "step": 53377
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6790447235107422,
      "learning_rate": 0.0005239879035283648,
      "loss": 2.9786,
      "step": 53378
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0153443813323975,
      "learning_rate": 0.0005239851822870734,
      "loss": 2.845,
      "step": 53379
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5907469987869263,
      "learning_rate": 0.000523982461004139,
      "loss": 3.2736,
      "step": 53380
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.4321322441101074,
      "learning_rate": 0.0005239797396795621,
      "loss": 3.0551,
      "step": 53381
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.166644334793091,
      "learning_rate": 0.000523977018313343,
      "loss": 2.9506,
      "step": 53382
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3612918853759766,
      "learning_rate": 0.0005239742969054825,
      "loss": 2.8538,
      "step": 53383
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5527560710906982,
      "learning_rate": 0.0005239715754559809,
      "loss": 2.8543,
      "step": 53384
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.419983148574829,
      "learning_rate": 0.0005239688539648387,
      "loss": 2.8032,
      "step": 53385
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4173827171325684,
      "learning_rate": 0.0005239661324320565,
      "loss": 3.2725,
      "step": 53386
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7174376249313354,
      "learning_rate": 0.0005239634108576348,
      "loss": 2.8877,
      "step": 53387
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7016676664352417,
      "learning_rate": 0.000523960689241574,
      "loss": 3.1789,
      "step": 53388
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3369594812393188,
      "learning_rate": 0.0005239579675838748,
      "loss": 3.012,
      "step": 53389
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.447417140007019,
      "learning_rate": 0.0005239552458845376,
      "loss": 3.1768,
      "step": 53390
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9069985151290894,
      "learning_rate": 0.0005239525241435627,
      "loss": 3.2154,
      "step": 53391
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3903330564498901,
      "learning_rate": 0.0005239498023609509,
      "loss": 2.7927,
      "step": 53392
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9899364709854126,
      "learning_rate": 0.0005239470805367026,
      "loss": 2.9468,
      "step": 53393
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5612545013427734,
      "learning_rate": 0.0005239443586708182,
      "loss": 3.2922,
      "step": 53394
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5966440439224243,
      "learning_rate": 0.0005239416367632985,
      "loss": 2.9748,
      "step": 53395
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9966925382614136,
      "learning_rate": 0.0005239389148141438,
      "loss": 3.0607,
      "step": 53396
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9840017557144165,
      "learning_rate": 0.0005239361928233545,
      "loss": 2.9374,
      "step": 53397
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.056891441345215,
      "learning_rate": 0.0005239334707909313,
      "loss": 2.8773,
      "step": 53398
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.814467191696167,
      "learning_rate": 0.0005239307487168748,
      "loss": 2.9029,
      "step": 53399
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.5568480491638184,
      "learning_rate": 0.0005239280266011851,
      "loss": 3.1917,
      "step": 53400
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.765583872795105,
      "learning_rate": 0.0005239253044438631,
      "loss": 3.1311,
      "step": 53401
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6881294250488281,
      "learning_rate": 0.0005239225822449091,
      "loss": 2.9492,
      "step": 53402
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7466890811920166,
      "learning_rate": 0.0005239198600043237,
      "loss": 3.0147,
      "step": 53403
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8911938667297363,
      "learning_rate": 0.0005239171377221074,
      "loss": 2.8982,
      "step": 53404
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.4938290119171143,
      "learning_rate": 0.0005239144153982606,
      "loss": 2.8034,
      "step": 53405
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6517465114593506,
      "learning_rate": 0.0005239116930327839,
      "loss": 3.4268,
      "step": 53406
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7571039199829102,
      "learning_rate": 0.0005239089706256777,
      "loss": 3.1519,
      "step": 53407
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.958672523498535,
      "learning_rate": 0.0005239062481769427,
      "loss": 2.9899,
      "step": 53408
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.1934597492218018,
      "learning_rate": 0.0005239035256865794,
      "loss": 2.9863,
      "step": 53409
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3221423625946045,
      "learning_rate": 0.0005239008031545881,
      "loss": 3.1105,
      "step": 53410
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.570044994354248,
      "learning_rate": 0.0005238980805809694,
      "loss": 3.1071,
      "step": 53411
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.157560348510742,
      "learning_rate": 0.0005238953579657238,
      "loss": 3.052,
      "step": 53412
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6334344148635864,
      "learning_rate": 0.0005238926353088519,
      "loss": 2.9708,
      "step": 53413
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8876616954803467,
      "learning_rate": 0.0005238899126103541,
      "loss": 3.2398,
      "step": 53414
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.998950719833374,
      "learning_rate": 0.000523887189870231,
      "loss": 2.9841,
      "step": 53415
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5277751684188843,
      "learning_rate": 0.000523884467088483,
      "loss": 3.0939,
      "step": 53416
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.116114616394043,
      "learning_rate": 0.0005238817442651106,
      "loss": 2.9803,
      "step": 53417
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8090170621871948,
      "learning_rate": 0.0005238790214001144,
      "loss": 3.036,
      "step": 53418
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3848143815994263,
      "learning_rate": 0.0005238762984934948,
      "loss": 3.2414,
      "step": 53419
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.498860478401184,
      "learning_rate": 0.0005238735755452525,
      "loss": 2.9725,
      "step": 53420
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.555109977722168,
      "learning_rate": 0.0005238708525553878,
      "loss": 3.172,
      "step": 53421
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6783273220062256,
      "learning_rate": 0.0005238681295239013,
      "loss": 3.0392,
      "step": 53422
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9674090147018433,
      "learning_rate": 0.0005238654064507934,
      "loss": 3.0702,
      "step": 53423
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4452606439590454,
      "learning_rate": 0.0005238626833360648,
      "loss": 2.8396,
      "step": 53424
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.10412335395813,
      "learning_rate": 0.0005238599601797159,
      "loss": 3.0012,
      "step": 53425
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9822311401367188,
      "learning_rate": 0.0005238572369817472,
      "loss": 2.9303,
      "step": 53426
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6565215587615967,
      "learning_rate": 0.0005238545137421591,
      "loss": 2.9913,
      "step": 53427
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3333386182785034,
      "learning_rate": 0.0005238517904609524,
      "loss": 3.1605,
      "step": 53428
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5119112730026245,
      "learning_rate": 0.0005238490671381274,
      "loss": 2.9895,
      "step": 53429
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.1112704277038574,
      "learning_rate": 0.0005238463437736846,
      "loss": 3.157,
      "step": 53430
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9671237468719482,
      "learning_rate": 0.0005238436203676244,
      "loss": 3.2085,
      "step": 53431
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.280179023742676,
      "learning_rate": 0.0005238408969199476,
      "loss": 2.7605,
      "step": 53432
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7437536716461182,
      "learning_rate": 0.0005238381734306545,
      "loss": 3.0492,
      "step": 53433
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.1978232860565186,
      "learning_rate": 0.0005238354498997457,
      "loss": 3.0703,
      "step": 53434
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7728219032287598,
      "learning_rate": 0.0005238327263272216,
      "loss": 2.9153,
      "step": 53435
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.24930477142334,
      "learning_rate": 0.0005238300027130829,
      "loss": 3.2132,
      "step": 53436
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.899841547012329,
      "learning_rate": 0.0005238272790573298,
      "loss": 2.8928,
      "step": 53437
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6887946128845215,
      "learning_rate": 0.0005238245553599631,
      "loss": 3.0332,
      "step": 53438
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0256800651550293,
      "learning_rate": 0.0005238218316209831,
      "loss": 2.9683,
      "step": 53439
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7027244567871094,
      "learning_rate": 0.0005238191078403904,
      "loss": 3.0466,
      "step": 53440
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9686014652252197,
      "learning_rate": 0.0005238163840181856,
      "loss": 2.9714,
      "step": 53441
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.837264895439148,
      "learning_rate": 0.000523813660154369,
      "loss": 3.1623,
      "step": 53442
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.817813754081726,
      "learning_rate": 0.0005238109362489413,
      "loss": 3.135,
      "step": 53443
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9368911981582642,
      "learning_rate": 0.0005238082123019029,
      "loss": 3.1922,
      "step": 53444
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7265236377716064,
      "learning_rate": 0.0005238054883132543,
      "loss": 2.996,
      "step": 53445
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0499508380889893,
      "learning_rate": 0.0005238027642829962,
      "loss": 3.0238,
      "step": 53446
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8376001119613647,
      "learning_rate": 0.0005238000402111287,
      "loss": 2.9089,
      "step": 53447
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9010801315307617,
      "learning_rate": 0.0005237973160976527,
      "loss": 2.8283,
      "step": 53448
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5134185552597046,
      "learning_rate": 0.0005237945919425686,
      "loss": 3.2414,
      "step": 53449
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.520801067352295,
      "learning_rate": 0.0005237918677458767,
      "loss": 2.9188,
      "step": 53450
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5262339115142822,
      "learning_rate": 0.0005237891435075777,
      "loss": 3.0955,
      "step": 53451
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8028391599655151,
      "learning_rate": 0.0005237864192276722,
      "loss": 2.7318,
      "step": 53452
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5467909574508667,
      "learning_rate": 0.0005237836949061605,
      "loss": 3.2968,
      "step": 53453
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5050113201141357,
      "learning_rate": 0.0005237809705430431,
      "loss": 3.0564,
      "step": 53454
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8420530557632446,
      "learning_rate": 0.0005237782461383207,
      "loss": 3.0165,
      "step": 53455
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.65249502658844,
      "learning_rate": 0.0005237755216919936,
      "loss": 3.0232,
      "step": 53456
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.446570873260498,
      "learning_rate": 0.0005237727972040626,
      "loss": 3.0981,
      "step": 53457
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5536991357803345,
      "learning_rate": 0.0005237700726745279,
      "loss": 3.1368,
      "step": 53458
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.2945899963378906,
      "learning_rate": 0.00052376734810339,
      "loss": 3.2598,
      "step": 53459
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6858198642730713,
      "learning_rate": 0.0005237646234906498,
      "loss": 3.1151,
      "step": 53460
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5737390518188477,
      "learning_rate": 0.0005237618988363073,
      "loss": 3.2218,
      "step": 53461
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8235610723495483,
      "learning_rate": 0.0005237591741403633,
      "loss": 3.1235,
      "step": 53462
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.252830743789673,
      "learning_rate": 0.0005237564494028182,
      "loss": 3.0614,
      "step": 53463
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8654651641845703,
      "learning_rate": 0.0005237537246236727,
      "loss": 3.0955,
      "step": 53464
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0645761489868164,
      "learning_rate": 0.000523750999802927,
      "loss": 3.028,
      "step": 53465
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.494291305541992,
      "learning_rate": 0.0005237482749405819,
      "loss": 3.2095,
      "step": 53466
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.373240351676941,
      "learning_rate": 0.0005237455500366377,
      "loss": 3.1127,
      "step": 53467
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.623187780380249,
      "learning_rate": 0.000523742825091095,
      "loss": 3.0872,
      "step": 53468
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.485560178756714,
      "learning_rate": 0.0005237401001039542,
      "loss": 3.1152,
      "step": 53469
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.2613186836242676,
      "learning_rate": 0.000523737375075216,
      "loss": 3.1987,
      "step": 53470
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7194801568984985,
      "learning_rate": 0.0005237346500048807,
      "loss": 2.9897,
      "step": 53471
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.122260332107544,
      "learning_rate": 0.0005237319248929491,
      "loss": 3.0507,
      "step": 53472
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5035966634750366,
      "learning_rate": 0.0005237291997394213,
      "loss": 3.0647,
      "step": 53473
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4408783912658691,
      "learning_rate": 0.0005237264745442981,
      "loss": 3.0079,
      "step": 53474
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.134105920791626,
      "learning_rate": 0.00052372374930758,
      "loss": 2.9576,
      "step": 53475
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.289865732192993,
      "learning_rate": 0.0005237210240292672,
      "loss": 2.8463,
      "step": 53476
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.711200475692749,
      "learning_rate": 0.0005237182987093607,
      "loss": 2.9977,
      "step": 53477
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0571160316467285,
      "learning_rate": 0.0005237155733478606,
      "loss": 3.0035,
      "step": 53478
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.1996171474456787,
      "learning_rate": 0.0005237128479447675,
      "loss": 3.091,
      "step": 53479
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5257251262664795,
      "learning_rate": 0.0005237101225000821,
      "loss": 2.9712,
      "step": 53480
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4553264379501343,
      "learning_rate": 0.0005237073970138047,
      "loss": 2.6272,
      "step": 53481
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9462376832962036,
      "learning_rate": 0.0005237046714859359,
      "loss": 3.1242,
      "step": 53482
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3334426879882812,
      "learning_rate": 0.0005237019459164762,
      "loss": 3.1476,
      "step": 53483
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.8180136680603027,
      "learning_rate": 0.000523699220305426,
      "loss": 2.8968,
      "step": 53484
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.062285900115967,
      "learning_rate": 0.000523696494652786,
      "loss": 3.1369,
      "step": 53485
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8227617740631104,
      "learning_rate": 0.0005236937689585564,
      "loss": 3.1822,
      "step": 53486
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4246395826339722,
      "learning_rate": 0.0005236910432227382,
      "loss": 3.2758,
      "step": 53487
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7252423763275146,
      "learning_rate": 0.0005236883174453313,
      "loss": 2.7479,
      "step": 53488
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.472904920578003,
      "learning_rate": 0.0005236855916263368,
      "loss": 3.0263,
      "step": 53489
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.597419023513794,
      "learning_rate": 0.0005236828657657548,
      "loss": 2.8885,
      "step": 53490
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7749913930892944,
      "learning_rate": 0.0005236801398635859,
      "loss": 3.0134,
      "step": 53491
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3972272872924805,
      "learning_rate": 0.0005236774139198308,
      "loss": 3.0391,
      "step": 53492
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9647985696792603,
      "learning_rate": 0.0005236746879344897,
      "loss": 3.1083,
      "step": 53493
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.010298013687134,
      "learning_rate": 0.0005236719619075632,
      "loss": 3.1952,
      "step": 53494
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.519152283668518,
      "learning_rate": 0.0005236692358390519,
      "loss": 3.1492,
      "step": 53495
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.342693328857422,
      "learning_rate": 0.0005236665097289564,
      "loss": 2.9839,
      "step": 53496
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.006608486175537,
      "learning_rate": 0.0005236637835772769,
      "loss": 2.9095,
      "step": 53497
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.714739203453064,
      "learning_rate": 0.0005236610573840142,
      "loss": 3.1525,
      "step": 53498
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.7775418758392334,
      "learning_rate": 0.0005236583311491686,
      "loss": 2.8249,
      "step": 53499
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3460123538970947,
      "learning_rate": 0.0005236556048727408,
      "loss": 3.0144,
      "step": 53500
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0511648654937744,
      "learning_rate": 0.0005236528785547311,
      "loss": 3.2241,
      "step": 53501
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8904019594192505,
      "learning_rate": 0.0005236501521951401,
      "loss": 2.9083,
      "step": 53502
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.233180046081543,
      "learning_rate": 0.0005236474257939685,
      "loss": 3.0243,
      "step": 53503
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4171998500823975,
      "learning_rate": 0.0005236446993512165,
      "loss": 2.9437,
      "step": 53504
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7000094652175903,
      "learning_rate": 0.0005236419728668846,
      "loss": 2.86,
      "step": 53505
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.083864688873291,
      "learning_rate": 0.0005236392463409736,
      "loss": 2.9746,
      "step": 53506
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8541756868362427,
      "learning_rate": 0.0005236365197734838,
      "loss": 2.9911,
      "step": 53507
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6764520406723022,
      "learning_rate": 0.0005236337931644157,
      "loss": 3.2046,
      "step": 53508
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0567119121551514,
      "learning_rate": 0.00052363106651377,
      "loss": 3.1165,
      "step": 53509
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8355512619018555,
      "learning_rate": 0.0005236283398215469,
      "loss": 3.1751,
      "step": 53510
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5994211435317993,
      "learning_rate": 0.0005236256130877471,
      "loss": 2.8949,
      "step": 53511
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7012505531311035,
      "learning_rate": 0.0005236228863123711,
      "loss": 3.0714,
      "step": 53512
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0247483253479004,
      "learning_rate": 0.0005236201594954194,
      "loss": 2.9016,
      "step": 53513
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6254632472991943,
      "learning_rate": 0.0005236174326368925,
      "loss": 2.9308,
      "step": 53514
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.2385168075561523,
      "learning_rate": 0.000523614705736791,
      "loss": 3.1037,
      "step": 53515
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6213299036026,
      "learning_rate": 0.0005236119787951152,
      "loss": 2.8767,
      "step": 53516
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6867552995681763,
      "learning_rate": 0.0005236092518118656,
      "loss": 3.1081,
      "step": 53517
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8798421621322632,
      "learning_rate": 0.000523606524787043,
      "loss": 3.2751,
      "step": 53518
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6594021320343018,
      "learning_rate": 0.0005236037977206475,
      "loss": 3.0454,
      "step": 53519
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6867988109588623,
      "learning_rate": 0.00052360107061268,
      "loss": 3.1177,
      "step": 53520
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4968712329864502,
      "learning_rate": 0.0005235983434631409,
      "loss": 2.9059,
      "step": 53521
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6915268898010254,
      "learning_rate": 0.0005235956162720305,
      "loss": 3.3542,
      "step": 53522
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.52937650680542,
      "learning_rate": 0.0005235928890393495,
      "loss": 3.242,
      "step": 53523
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0266029834747314,
      "learning_rate": 0.0005235901617650984,
      "loss": 3.2588,
      "step": 53524
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.213552951812744,
      "learning_rate": 0.0005235874344492776,
      "loss": 3.1951,
      "step": 53525
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7827651500701904,
      "learning_rate": 0.0005235847070918879,
      "loss": 2.9354,
      "step": 53526
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.061793804168701,
      "learning_rate": 0.0005235819796929293,
      "loss": 2.8823,
      "step": 53527
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7910007238388062,
      "learning_rate": 0.0005235792522524027,
      "loss": 3.2574,
      "step": 53528
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6870297193527222,
      "learning_rate": 0.0005235765247703085,
      "loss": 3.0851,
      "step": 53529
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9996137619018555,
      "learning_rate": 0.0005235737972466471,
      "loss": 2.9612,
      "step": 53530
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4562846422195435,
      "learning_rate": 0.0005235710696814193,
      "loss": 3.0382,
      "step": 53531
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4539448022842407,
      "learning_rate": 0.0005235683420746253,
      "loss": 2.7866,
      "step": 53532
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8177791833877563,
      "learning_rate": 0.0005235656144262658,
      "loss": 2.8642,
      "step": 53533
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.5888962745666504,
      "learning_rate": 0.0005235628867363411,
      "loss": 3.1502,
      "step": 53534
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8940367698669434,
      "learning_rate": 0.000523560159004852,
      "loss": 3.0456,
      "step": 53535
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8586466312408447,
      "learning_rate": 0.0005235574312317988,
      "loss": 2.9777,
      "step": 53536
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.51119065284729,
      "learning_rate": 0.000523554703417182,
      "loss": 2.8295,
      "step": 53537
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7126504182815552,
      "learning_rate": 0.0005235519755610021,
      "loss": 2.7278,
      "step": 53538
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.464992046356201,
      "learning_rate": 0.0005235492476632598,
      "loss": 2.9696,
      "step": 53539
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4999239444732666,
      "learning_rate": 0.0005235465197239554,
      "loss": 2.9866,
      "step": 53540
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3651225566864014,
      "learning_rate": 0.0005235437917430895,
      "loss": 2.914,
      "step": 53541
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.2597362995147705,
      "learning_rate": 0.0005235410637206626,
      "loss": 3.0978,
      "step": 53542
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.292710304260254,
      "learning_rate": 0.0005235383356566752,
      "loss": 3.2101,
      "step": 53543
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5800399780273438,
      "learning_rate": 0.0005235356075511279,
      "loss": 2.9963,
      "step": 53544
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8590636253356934,
      "learning_rate": 0.000523532879404021,
      "loss": 3.1221,
      "step": 53545
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.507895827293396,
      "learning_rate": 0.000523530151215355,
      "loss": 3.0947,
      "step": 53546
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3575067520141602,
      "learning_rate": 0.0005235274229851307,
      "loss": 2.871,
      "step": 53547
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.061495065689087,
      "learning_rate": 0.0005235246947133484,
      "loss": 3.1606,
      "step": 53548
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7083600759506226,
      "learning_rate": 0.0005235219664000087,
      "loss": 3.0877,
      "step": 53549
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4970403909683228,
      "learning_rate": 0.000523519238045112,
      "loss": 2.6048,
      "step": 53550
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3762729167938232,
      "learning_rate": 0.0005235165096486588,
      "loss": 2.9596,
      "step": 53551
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5104024410247803,
      "learning_rate": 0.0005235137812106497,
      "loss": 2.9211,
      "step": 53552
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7178927659988403,
      "learning_rate": 0.0005235110527310852,
      "loss": 3.2509,
      "step": 53553
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.5053296089172363,
      "learning_rate": 0.0005235083242099657,
      "loss": 3.0594,
      "step": 53554
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4699198007583618,
      "learning_rate": 0.0005235055956472918,
      "loss": 3.0647,
      "step": 53555
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3601866960525513,
      "learning_rate": 0.000523502867043064,
      "loss": 3.0573,
      "step": 53556
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7295759916305542,
      "learning_rate": 0.0005235001383972828,
      "loss": 3.0861,
      "step": 53557
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6058812141418457,
      "learning_rate": 0.0005234974097099487,
      "loss": 2.9736,
      "step": 53558
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.535584807395935,
      "learning_rate": 0.0005234946809810623,
      "loss": 3.0971,
      "step": 53559
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9202289581298828,
      "learning_rate": 0.0005234919522106239,
      "loss": 3.2695,
      "step": 53560
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5896753072738647,
      "learning_rate": 0.0005234892233986341,
      "loss": 3.077,
      "step": 53561
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.169410467147827,
      "learning_rate": 0.0005234864945450936,
      "loss": 3.3408,
      "step": 53562
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.750309944152832,
      "learning_rate": 0.0005234837656500026,
      "loss": 2.9713,
      "step": 53563
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8550002574920654,
      "learning_rate": 0.0005234810367133619,
      "loss": 3.1107,
      "step": 53564
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7123422622680664,
      "learning_rate": 0.0005234783077351717,
      "loss": 3.2061,
      "step": 53565
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.84702467918396,
      "learning_rate": 0.0005234755787154327,
      "loss": 2.9647,
      "step": 53566
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.6047563552856445,
      "learning_rate": 0.0005234728496541454,
      "loss": 2.9971,
      "step": 53567
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6433043479919434,
      "learning_rate": 0.0005234701205513103,
      "loss": 2.9621,
      "step": 53568
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.628671884536743,
      "learning_rate": 0.0005234673914069278,
      "loss": 2.8571,
      "step": 53569
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.5742645263671875,
      "learning_rate": 0.0005234646622209986,
      "loss": 3.2963,
      "step": 53570
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4544204473495483,
      "learning_rate": 0.000523461932993523,
      "loss": 2.8188,
      "step": 53571
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8104536533355713,
      "learning_rate": 0.0005234592037245017,
      "loss": 3.0846,
      "step": 53572
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.4285082817077637,
      "learning_rate": 0.0005234564744139351,
      "loss": 3.0224,
      "step": 53573
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.86286199092865,
      "learning_rate": 0.0005234537450618237,
      "loss": 3.0988,
      "step": 53574
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3791894912719727,
      "learning_rate": 0.0005234510156681679,
      "loss": 3.145,
      "step": 53575
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6099711656570435,
      "learning_rate": 0.0005234482862329686,
      "loss": 3.1686,
      "step": 53576
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3110804557800293,
      "learning_rate": 0.0005234455567562259,
      "loss": 2.7536,
      "step": 53577
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.511765480041504,
      "learning_rate": 0.0005234428272379404,
      "loss": 2.8864,
      "step": 53578
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3992542028427124,
      "learning_rate": 0.0005234400976781127,
      "loss": 2.8474,
      "step": 53579
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.5191001892089844,
      "learning_rate": 0.0005234373680767433,
      "loss": 2.9607,
      "step": 53580
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9768327474594116,
      "learning_rate": 0.0005234346384338327,
      "loss": 3.104,
      "step": 53581
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5206409692764282,
      "learning_rate": 0.0005234319087493814,
      "loss": 2.8479,
      "step": 53582
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.1939070224761963,
      "learning_rate": 0.0005234291790233898,
      "loss": 3.2396,
      "step": 53583
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.559721827507019,
      "learning_rate": 0.0005234264492558585,
      "loss": 3.2255,
      "step": 53584
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3683961629867554,
      "learning_rate": 0.0005234237194467881,
      "loss": 2.8954,
      "step": 53585
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4890397787094116,
      "learning_rate": 0.000523420989596179,
      "loss": 2.9271,
      "step": 53586
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5863250494003296,
      "learning_rate": 0.0005234182597040316,
      "loss": 3.0958,
      "step": 53587
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3870362043380737,
      "learning_rate": 0.0005234155297703466,
      "loss": 3.0531,
      "step": 53588
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5908297300338745,
      "learning_rate": 0.0005234127997951245,
      "loss": 3.0193,
      "step": 53589
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3111778497695923,
      "learning_rate": 0.0005234100697783657,
      "loss": 2.9706,
      "step": 53590
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4717046022415161,
      "learning_rate": 0.0005234073397200707,
      "loss": 3.0357,
      "step": 53591
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4374881982803345,
      "learning_rate": 0.0005234046096202401,
      "loss": 3.1419,
      "step": 53592
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6210393905639648,
      "learning_rate": 0.0005234018794788744,
      "loss": 3.0847,
      "step": 53593
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4822443723678589,
      "learning_rate": 0.000523399149295974,
      "loss": 3.1405,
      "step": 53594
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4224014282226562,
      "learning_rate": 0.0005233964190715396,
      "loss": 3.1089,
      "step": 53595
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4500296115875244,
      "learning_rate": 0.0005233936888055714,
      "loss": 3.1615,
      "step": 53596
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.2531886100769043,
      "learning_rate": 0.0005233909584980703,
      "loss": 2.9433,
      "step": 53597
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.626477837562561,
      "learning_rate": 0.0005233882281490364,
      "loss": 2.964,
      "step": 53598
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5937143564224243,
      "learning_rate": 0.0005233854977584706,
      "loss": 2.7682,
      "step": 53599
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.10754656791687,
      "learning_rate": 0.0005233827673263731,
      "loss": 3.1572,
      "step": 53600
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5225938558578491,
      "learning_rate": 0.0005233800368527445,
      "loss": 2.9986,
      "step": 53601
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8364790678024292,
      "learning_rate": 0.0005233773063375855,
      "loss": 3.0318,
      "step": 53602
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9982775449752808,
      "learning_rate": 0.0005233745757808964,
      "loss": 2.9074,
      "step": 53603
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6852216720581055,
      "learning_rate": 0.0005233718451826777,
      "loss": 2.9403,
      "step": 53604
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7830148935317993,
      "learning_rate": 0.0005233691145429299,
      "loss": 3.3288,
      "step": 53605
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.0392861366271973,
      "learning_rate": 0.0005233663838616536,
      "loss": 2.9611,
      "step": 53606
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.142051935195923,
      "learning_rate": 0.0005233636531388493,
      "loss": 3.1243,
      "step": 53607
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4829493761062622,
      "learning_rate": 0.0005233609223745174,
      "loss": 2.8334,
      "step": 53608
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7644554376602173,
      "learning_rate": 0.0005233581915686586,
      "loss": 2.9824,
      "step": 53609
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.228886365890503,
      "learning_rate": 0.0005233554607212733,
      "loss": 2.9944,
      "step": 53610
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9849201440811157,
      "learning_rate": 0.000523352729832362,
      "loss": 3.1962,
      "step": 53611
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5869591236114502,
      "learning_rate": 0.0005233499989019251,
      "loss": 3.0743,
      "step": 53612
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8617609739303589,
      "learning_rate": 0.0005233472679299632,
      "loss": 3.0364,
      "step": 53613
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.221564531326294,
      "learning_rate": 0.000523344536916477,
      "loss": 3.0299,
      "step": 53614
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5517845153808594,
      "learning_rate": 0.0005233418058614667,
      "loss": 2.9329,
      "step": 53615
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7162588834762573,
      "learning_rate": 0.000523339074764933,
      "loss": 2.9594,
      "step": 53616
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.5895068645477295,
      "learning_rate": 0.0005233363436268764,
      "loss": 3.1263,
      "step": 53617
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.866116523742676,
      "learning_rate": 0.0005233336124472973,
      "loss": 2.8051,
      "step": 53618
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3354192972183228,
      "learning_rate": 0.0005233308812261962,
      "loss": 3.1884,
      "step": 53619
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.419992446899414,
      "learning_rate": 0.0005233281499635738,
      "loss": 3.4549,
      "step": 53620
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7772220373153687,
      "learning_rate": 0.0005233254186594304,
      "loss": 2.9192,
      "step": 53621
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.345388650894165,
      "learning_rate": 0.0005233226873137666,
      "loss": 2.9828,
      "step": 53622
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7527775764465332,
      "learning_rate": 0.0005233199559265829,
      "loss": 3.0129,
      "step": 53623
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3705055713653564,
      "learning_rate": 0.0005233172244978798,
      "loss": 2.9111,
      "step": 53624
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5738892555236816,
      "learning_rate": 0.0005233144930276579,
      "loss": 3.0233,
      "step": 53625
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.308813452720642,
      "learning_rate": 0.0005233117615159174,
      "loss": 3.0854,
      "step": 53626
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.140596628189087,
      "learning_rate": 0.0005233090299626592,
      "loss": 2.8767,
      "step": 53627
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0323550701141357,
      "learning_rate": 0.0005233062983678836,
      "loss": 3.0203,
      "step": 53628
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5106192827224731,
      "learning_rate": 0.0005233035667315913,
      "loss": 2.8255,
      "step": 53629
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7372000217437744,
      "learning_rate": 0.0005233008350537824,
      "loss": 2.8158,
      "step": 53630
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.116629123687744,
      "learning_rate": 0.0005232981033344578,
      "loss": 2.9028,
      "step": 53631
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5158933401107788,
      "learning_rate": 0.0005232953715736178,
      "loss": 2.9169,
      "step": 53632
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3783042430877686,
      "learning_rate": 0.000523292639771263,
      "loss": 3.3153,
      "step": 53633
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0602962970733643,
      "learning_rate": 0.0005232899079273939,
      "loss": 2.8982,
      "step": 53634
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.4889771938323975,
      "learning_rate": 0.000523287176042011,
      "loss": 2.7487,
      "step": 53635
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.371908187866211,
      "learning_rate": 0.0005232844441151147,
      "loss": 2.7677,
      "step": 53636
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.744361162185669,
      "learning_rate": 0.0005232817121467058,
      "loss": 3.1606,
      "step": 53637
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5577417612075806,
      "learning_rate": 0.0005232789801367845,
      "loss": 2.9485,
      "step": 53638
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6037472486495972,
      "learning_rate": 0.0005232762480853513,
      "loss": 2.9672,
      "step": 53639
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4086698293685913,
      "learning_rate": 0.000523273515992407,
      "loss": 3.097,
      "step": 53640
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8098527193069458,
      "learning_rate": 0.0005232707838579519,
      "loss": 3.1587,
      "step": 53641
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3207223415374756,
      "learning_rate": 0.0005232680516819865,
      "loss": 3.0217,
      "step": 53642
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6590029001235962,
      "learning_rate": 0.0005232653194645115,
      "loss": 3.0102,
      "step": 53643
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.172323703765869,
      "learning_rate": 0.0005232625872055272,
      "loss": 3.0042,
      "step": 53644
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.4574108123779297,
      "learning_rate": 0.000523259854905034,
      "loss": 3.2593,
      "step": 53645
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7714060544967651,
      "learning_rate": 0.0005232571225630328,
      "loss": 2.8637,
      "step": 53646
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5264438390731812,
      "learning_rate": 0.0005232543901795237,
      "loss": 3.0265,
      "step": 53647
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.13869309425354,
      "learning_rate": 0.0005232516577545075,
      "loss": 3.3492,
      "step": 53648
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.488732099533081,
      "learning_rate": 0.0005232489252879846,
      "loss": 2.9227,
      "step": 53649
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9262455701828003,
      "learning_rate": 0.0005232461927799555,
      "loss": 3.0447,
      "step": 53650
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5441007614135742,
      "learning_rate": 0.0005232434602304208,
      "loss": 3.0754,
      "step": 53651
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7535837888717651,
      "learning_rate": 0.0005232407276393807,
      "loss": 2.7412,
      "step": 53652
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.1708667278289795,
      "learning_rate": 0.0005232379950068362,
      "loss": 2.9785,
      "step": 53653
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6892356872558594,
      "learning_rate": 0.0005232352623327874,
      "loss": 3.0321,
      "step": 53654
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.591982126235962,
      "learning_rate": 0.0005232325296172349,
      "loss": 3.1755,
      "step": 53655
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.1743128299713135,
      "learning_rate": 0.0005232297968601794,
      "loss": 3.1412,
      "step": 53656
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.5453453063964844,
      "learning_rate": 0.0005232270640616212,
      "loss": 2.7711,
      "step": 53657
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6105432510375977,
      "learning_rate": 0.0005232243312215609,
      "loss": 2.995,
      "step": 53658
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5322118997573853,
      "learning_rate": 0.0005232215983399989,
      "loss": 3.0939,
      "step": 53659
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.014188766479492,
      "learning_rate": 0.0005232188654169359,
      "loss": 3.354,
      "step": 53660
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.1691737174987793,
      "learning_rate": 0.0005232161324523723,
      "loss": 3.0427,
      "step": 53661
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7247828245162964,
      "learning_rate": 0.0005232133994463085,
      "loss": 2.9223,
      "step": 53662
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8908412456512451,
      "learning_rate": 0.0005232106663987452,
      "loss": 3.0489,
      "step": 53663
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8586843013763428,
      "learning_rate": 0.0005232079333096828,
      "loss": 2.9882,
      "step": 53664
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9547373056411743,
      "learning_rate": 0.0005232052001791218,
      "loss": 3.0277,
      "step": 53665
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3779888153076172,
      "learning_rate": 0.0005232024670070628,
      "loss": 2.9696,
      "step": 53666
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8827399015426636,
      "learning_rate": 0.0005231997337935061,
      "loss": 3.0929,
      "step": 53667
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.0944161415100098,
      "learning_rate": 0.0005231970005384525,
      "loss": 3.0109,
      "step": 53668
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.551538109779358,
      "learning_rate": 0.0005231942672419024,
      "loss": 3.1995,
      "step": 53669
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8211851119995117,
      "learning_rate": 0.0005231915339038562,
      "loss": 3.2362,
      "step": 53670
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.087097406387329,
      "learning_rate": 0.0005231888005243145,
      "loss": 2.8893,
      "step": 53671
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6903190612792969,
      "learning_rate": 0.0005231860671032778,
      "loss": 2.8756,
      "step": 53672
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.723219633102417,
      "learning_rate": 0.0005231833336407465,
      "loss": 3.0268,
      "step": 53673
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0467636585235596,
      "learning_rate": 0.0005231806001367213,
      "loss": 2.8493,
      "step": 53674
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8353056907653809,
      "learning_rate": 0.0005231778665912027,
      "loss": 3.2147,
      "step": 53675
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.456357479095459,
      "learning_rate": 0.000523175133004191,
      "loss": 3.1209,
      "step": 53676
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3243411779403687,
      "learning_rate": 0.0005231723993756867,
      "loss": 3.063,
      "step": 53677
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6211565732955933,
      "learning_rate": 0.0005231696657056906,
      "loss": 3.0865,
      "step": 53678
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5790859460830688,
      "learning_rate": 0.0005231669319942031,
      "loss": 3.1432,
      "step": 53679
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2299120426177979,
      "learning_rate": 0.0005231641982412246,
      "loss": 2.9052,
      "step": 53680
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3087356090545654,
      "learning_rate": 0.0005231614644467556,
      "loss": 3.0704,
      "step": 53681
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3342922925949097,
      "learning_rate": 0.0005231587306107968,
      "loss": 3.0787,
      "step": 53682
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6203874349594116,
      "learning_rate": 0.0005231559967333485,
      "loss": 3.1298,
      "step": 53683
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6641184091567993,
      "learning_rate": 0.0005231532628144112,
      "loss": 3.1041,
      "step": 53684
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7867913246154785,
      "learning_rate": 0.0005231505288539857,
      "loss": 2.9076,
      "step": 53685
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.830265998840332,
      "learning_rate": 0.0005231477948520721,
      "loss": 3.0401,
      "step": 53686
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.557677984237671,
      "learning_rate": 0.0005231450608086712,
      "loss": 3.0044,
      "step": 53687
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.7081005573272705,
      "learning_rate": 0.0005231423267237834,
      "loss": 2.9014,
      "step": 53688
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.590167284011841,
      "learning_rate": 0.0005231395925974093,
      "loss": 3.1388,
      "step": 53689
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.829413652420044,
      "learning_rate": 0.0005231368584295493,
      "loss": 3.0751,
      "step": 53690
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.969564199447632,
      "learning_rate": 0.000523134124220204,
      "loss": 3.0708,
      "step": 53691
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.2513856887817383,
      "learning_rate": 0.0005231313899693737,
      "loss": 2.9184,
      "step": 53692
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9287407398223877,
      "learning_rate": 0.0005231286556770592,
      "loss": 2.8396,
      "step": 53693
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.583755373954773,
      "learning_rate": 0.0005231259213432608,
      "loss": 2.9691,
      "step": 53694
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.2850570678710938,
      "learning_rate": 0.0005231231869679792,
      "loss": 2.9914,
      "step": 53695
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.5048794746398926,
      "learning_rate": 0.0005231204525512146,
      "loss": 3.0245,
      "step": 53696
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.767676830291748,
      "learning_rate": 0.0005231177180929677,
      "loss": 3.0698,
      "step": 53697
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6330182552337646,
      "learning_rate": 0.0005231149835932392,
      "loss": 2.8952,
      "step": 53698
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.9759724140167236,
      "learning_rate": 0.0005231122490520293,
      "loss": 2.9256,
      "step": 53699
    },
    {
      "epoch": 0.7,
      "grad_norm": 5.491284370422363,
      "learning_rate": 0.0005231095144693386,
      "loss": 2.9899,
      "step": 53700
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.4203732013702393,
      "learning_rate": 0.0005231067798451677,
      "loss": 3.1624,
      "step": 53701
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8139787912368774,
      "learning_rate": 0.000523104045179517,
      "loss": 2.9268,
      "step": 53702
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.7526516914367676,
      "learning_rate": 0.000523101310472387,
      "loss": 3.1582,
      "step": 53703
    },
    {
      "epoch": 0.7,
      "grad_norm": 4.24368143081665,
      "learning_rate": 0.0005230985757237783,
      "loss": 2.9344,
      "step": 53704
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.9366836547851562,
      "learning_rate": 0.0005230958409336914,
      "loss": 3.0352,
      "step": 53705
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8729580640792847,
      "learning_rate": 0.0005230931061021266,
      "loss": 2.8837,
      "step": 53706
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8128552436828613,
      "learning_rate": 0.0005230903712290847,
      "loss": 3.1477,
      "step": 53707
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9170829057693481,
      "learning_rate": 0.0005230876363145661,
      "loss": 3.0616,
      "step": 53708
    },
    {
      "epoch": 0.7,
      "grad_norm": 4.341248035430908,
      "learning_rate": 0.0005230849013585714,
      "loss": 2.8281,
      "step": 53709
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4582107067108154,
      "learning_rate": 0.0005230821663611009,
      "loss": 3.0388,
      "step": 53710
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6078457832336426,
      "learning_rate": 0.0005230794313221553,
      "loss": 3.2356,
      "step": 53711
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9432474374771118,
      "learning_rate": 0.0005230766962417348,
      "loss": 3.0074,
      "step": 53712
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.5129873752593994,
      "learning_rate": 0.0005230739611198403,
      "loss": 3.3336,
      "step": 53713
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.651453971862793,
      "learning_rate": 0.0005230712259564721,
      "loss": 3.1102,
      "step": 53714
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.174670934677124,
      "learning_rate": 0.0005230684907516308,
      "loss": 2.8199,
      "step": 53715
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.001453161239624,
      "learning_rate": 0.0005230657555053168,
      "loss": 2.9745,
      "step": 53716
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3130125999450684,
      "learning_rate": 0.0005230630202175306,
      "loss": 2.9001,
      "step": 53717
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.4649837017059326,
      "learning_rate": 0.0005230602848882729,
      "loss": 3.1066,
      "step": 53718
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6152039766311646,
      "learning_rate": 0.000523057549517544,
      "loss": 2.8129,
      "step": 53719
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.2895190715789795,
      "learning_rate": 0.0005230548141053446,
      "loss": 2.9146,
      "step": 53720
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.2516369819641113,
      "learning_rate": 0.000523052078651675,
      "loss": 3.2028,
      "step": 53721
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.238663673400879,
      "learning_rate": 0.0005230493431565359,
      "loss": 2.8906,
      "step": 53722
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7572546005249023,
      "learning_rate": 0.0005230466076199277,
      "loss": 2.9594,
      "step": 53723
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7636436223983765,
      "learning_rate": 0.0005230438720418508,
      "loss": 3.1587,
      "step": 53724
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3130404949188232,
      "learning_rate": 0.0005230411364223059,
      "loss": 3.0885,
      "step": 53725
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4053653478622437,
      "learning_rate": 0.0005230384007612935,
      "loss": 2.9993,
      "step": 53726
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.913948655128479,
      "learning_rate": 0.0005230356650588139,
      "loss": 2.8231,
      "step": 53727
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.995621919631958,
      "learning_rate": 0.000523032929314868,
      "loss": 3.006,
      "step": 53728
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.6192917823791504,
      "learning_rate": 0.0005230301935294559,
      "loss": 3.2349,
      "step": 53729
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7154488563537598,
      "learning_rate": 0.0005230274577025783,
      "loss": 3.106,
      "step": 53730
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.1274049282073975,
      "learning_rate": 0.0005230247218342357,
      "loss": 3.0643,
      "step": 53731
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8997116088867188,
      "learning_rate": 0.0005230219859244285,
      "loss": 2.9791,
      "step": 53732
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.396509051322937,
      "learning_rate": 0.0005230192499731575,
      "loss": 3.082,
      "step": 53733
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4214844703674316,
      "learning_rate": 0.000523016513980423,
      "loss": 3.1116,
      "step": 53734
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.114767074584961,
      "learning_rate": 0.0005230137779462252,
      "loss": 2.9425,
      "step": 53735
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5740900039672852,
      "learning_rate": 0.0005230110418705653,
      "loss": 3.0372,
      "step": 53736
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2863351106643677,
      "learning_rate": 0.0005230083057534432,
      "loss": 2.8162,
      "step": 53737
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7455174922943115,
      "learning_rate": 0.0005230055695948598,
      "loss": 3.0196,
      "step": 53738
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4816148281097412,
      "learning_rate": 0.0005230028333948153,
      "loss": 3.032,
      "step": 53739
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.754285454750061,
      "learning_rate": 0.0005230000971533105,
      "loss": 3.2022,
      "step": 53740
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3483331203460693,
      "learning_rate": 0.0005229973608703457,
      "loss": 2.7531,
      "step": 53741
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7359777688980103,
      "learning_rate": 0.0005229946245459215,
      "loss": 2.9989,
      "step": 53742
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3171685934066772,
      "learning_rate": 0.0005229918881800385,
      "loss": 2.9508,
      "step": 53743
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4126776456832886,
      "learning_rate": 0.000522989151772697,
      "loss": 3.0382,
      "step": 53744
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6417585611343384,
      "learning_rate": 0.0005229864153238976,
      "loss": 2.9627,
      "step": 53745
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5832782983779907,
      "learning_rate": 0.0005229836788336409,
      "loss": 2.9134,
      "step": 53746
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4803025722503662,
      "learning_rate": 0.0005229809423019272,
      "loss": 3.0729,
      "step": 53747
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8814663887023926,
      "learning_rate": 0.0005229782057287572,
      "loss": 2.9511,
      "step": 53748
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5063148736953735,
      "learning_rate": 0.0005229754691141313,
      "loss": 2.9355,
      "step": 53749
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5451842546463013,
      "learning_rate": 0.0005229727324580501,
      "loss": 3.1108,
      "step": 53750
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5869444608688354,
      "learning_rate": 0.0005229699957605141,
      "loss": 3.0235,
      "step": 53751
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4494030475616455,
      "learning_rate": 0.0005229672590215236,
      "loss": 2.8976,
      "step": 53752
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4527719020843506,
      "learning_rate": 0.0005229645222410794,
      "loss": 2.8553,
      "step": 53753
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7067129611968994,
      "learning_rate": 0.0005229617854191819,
      "loss": 2.8972,
      "step": 53754
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7676239013671875,
      "learning_rate": 0.0005229590485558315,
      "loss": 3.2166,
      "step": 53755
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8242634534835815,
      "learning_rate": 0.0005229563116510289,
      "loss": 2.9253,
      "step": 53756
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.514869213104248,
      "learning_rate": 0.0005229535747047745,
      "loss": 2.6692,
      "step": 53757
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9588068723678589,
      "learning_rate": 0.0005229508377170688,
      "loss": 2.8217,
      "step": 53758
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.478034257888794,
      "learning_rate": 0.0005229481006879123,
      "loss": 3.0395,
      "step": 53759
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.496987223625183,
      "learning_rate": 0.0005229453636173056,
      "loss": 2.9514,
      "step": 53760
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3081248998641968,
      "learning_rate": 0.000522942626505249,
      "loss": 2.9234,
      "step": 53761
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5117994546890259,
      "learning_rate": 0.0005229398893517433,
      "loss": 3.116,
      "step": 53762
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.483677625656128,
      "learning_rate": 0.0005229371521567889,
      "loss": 2.9563,
      "step": 53763
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5214186906814575,
      "learning_rate": 0.0005229344149203862,
      "loss": 2.9262,
      "step": 53764
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8965994119644165,
      "learning_rate": 0.0005229316776425359,
      "loss": 3.1614,
      "step": 53765
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.683481216430664,
      "learning_rate": 0.0005229289403232382,
      "loss": 3.0227,
      "step": 53766
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5075384378433228,
      "learning_rate": 0.000522926202962494,
      "loss": 3.0612,
      "step": 53767
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.511124849319458,
      "learning_rate": 0.0005229234655603034,
      "loss": 2.9258,
      "step": 53768
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4961674213409424,
      "learning_rate": 0.0005229207281166673,
      "loss": 3.1589,
      "step": 53769
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6861603260040283,
      "learning_rate": 0.0005229179906315859,
      "loss": 3.1801,
      "step": 53770
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5779352188110352,
      "learning_rate": 0.00052291525310506,
      "loss": 2.8648,
      "step": 53771
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0434634685516357,
      "learning_rate": 0.0005229125155370898,
      "loss": 2.952,
      "step": 53772
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4139604568481445,
      "learning_rate": 0.000522909777927676,
      "loss": 3.0665,
      "step": 53773
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4914129972457886,
      "learning_rate": 0.0005229070402768191,
      "loss": 3.0808,
      "step": 53774
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.40062415599823,
      "learning_rate": 0.0005229043025845195,
      "loss": 3.0399,
      "step": 53775
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1746803522109985,
      "learning_rate": 0.0005229015648507778,
      "loss": 3.098,
      "step": 53776
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.415299654006958,
      "learning_rate": 0.0005228988270755945,
      "loss": 3.3166,
      "step": 53777
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.786425232887268,
      "learning_rate": 0.0005228960892589701,
      "loss": 2.8806,
      "step": 53778
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3583009243011475,
      "learning_rate": 0.0005228933514009052,
      "loss": 3.0563,
      "step": 53779
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5595184564590454,
      "learning_rate": 0.0005228906135014001,
      "loss": 2.9166,
      "step": 53780
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7200051546096802,
      "learning_rate": 0.0005228878755604554,
      "loss": 3.1448,
      "step": 53781
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8520811796188354,
      "learning_rate": 0.0005228851375780717,
      "loss": 2.924,
      "step": 53782
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4573991298675537,
      "learning_rate": 0.0005228823995542495,
      "loss": 3.0781,
      "step": 53783
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.974466323852539,
      "learning_rate": 0.0005228796614889892,
      "loss": 3.0362,
      "step": 53784
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.028062105178833,
      "learning_rate": 0.0005228769233822913,
      "loss": 2.914,
      "step": 53785
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7742034196853638,
      "learning_rate": 0.0005228741852341566,
      "loss": 3.0015,
      "step": 53786
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5234911441802979,
      "learning_rate": 0.0005228714470445852,
      "loss": 3.039,
      "step": 53787
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7041386365890503,
      "learning_rate": 0.0005228687088135778,
      "loss": 3.1712,
      "step": 53788
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5408358573913574,
      "learning_rate": 0.0005228659705411348,
      "loss": 2.9327,
      "step": 53789
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.64142644405365,
      "learning_rate": 0.000522863232227257,
      "loss": 2.997,
      "step": 53790
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8011211156845093,
      "learning_rate": 0.0005228604938719446,
      "loss": 2.8604,
      "step": 53791
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5508009195327759,
      "learning_rate": 0.0005228577554751983,
      "loss": 2.9717,
      "step": 53792
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5031898021697998,
      "learning_rate": 0.0005228550170370185,
      "loss": 3.1939,
      "step": 53793
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.480620265007019,
      "learning_rate": 0.0005228522785574058,
      "loss": 3.2588,
      "step": 53794
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3284507989883423,
      "learning_rate": 0.0005228495400363605,
      "loss": 3.2398,
      "step": 53795
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7976075410842896,
      "learning_rate": 0.0005228468014738835,
      "loss": 3.2047,
      "step": 53796
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3961408138275146,
      "learning_rate": 0.0005228440628699749,
      "loss": 2.9629,
      "step": 53797
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9552713632583618,
      "learning_rate": 0.0005228413242246354,
      "loss": 2.9774,
      "step": 53798
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.380824327468872,
      "learning_rate": 0.0005228385855378656,
      "loss": 2.8431,
      "step": 53799
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6491124629974365,
      "learning_rate": 0.0005228358468096657,
      "loss": 2.831,
      "step": 53800
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.324211835861206,
      "learning_rate": 0.0005228331080400366,
      "loss": 2.9423,
      "step": 53801
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4056090116500854,
      "learning_rate": 0.0005228303692289785,
      "loss": 2.8993,
      "step": 53802
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5872719287872314,
      "learning_rate": 0.000522827630376492,
      "loss": 3.3604,
      "step": 53803
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7173627614974976,
      "learning_rate": 0.0005228248914825778,
      "loss": 3.1606,
      "step": 53804
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6835901737213135,
      "learning_rate": 0.0005228221525472362,
      "loss": 2.9437,
      "step": 53805
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7148900032043457,
      "learning_rate": 0.0005228194135704677,
      "loss": 2.9855,
      "step": 53806
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6960543394088745,
      "learning_rate": 0.0005228166745522728,
      "loss": 3.2582,
      "step": 53807
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4756033420562744,
      "learning_rate": 0.0005228139354926522,
      "loss": 2.9499,
      "step": 53808
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6958794593811035,
      "learning_rate": 0.0005228111963916063,
      "loss": 3.0517,
      "step": 53809
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9642460346221924,
      "learning_rate": 0.0005228084572491355,
      "loss": 2.9992,
      "step": 53810
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4085454940795898,
      "learning_rate": 0.0005228057180652404,
      "loss": 2.9635,
      "step": 53811
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.809167742729187,
      "learning_rate": 0.0005228029788399216,
      "loss": 3.0689,
      "step": 53812
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.398315191268921,
      "learning_rate": 0.0005228002395731795,
      "loss": 3.0501,
      "step": 53813
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5577361583709717,
      "learning_rate": 0.0005227975002650145,
      "loss": 3.2801,
      "step": 53814
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.1736929416656494,
      "learning_rate": 0.0005227947609154274,
      "loss": 2.9203,
      "step": 53815
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.7006311416625977,
      "learning_rate": 0.0005227920215244184,
      "loss": 3.4103,
      "step": 53816
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.858851671218872,
      "learning_rate": 0.0005227892820919883,
      "loss": 2.9717,
      "step": 53817
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.1951000690460205,
      "learning_rate": 0.0005227865426181373,
      "loss": 3.0839,
      "step": 53818
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6030629873275757,
      "learning_rate": 0.0005227838031028663,
      "loss": 3.0752,
      "step": 53819
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7821334600448608,
      "learning_rate": 0.0005227810635461754,
      "loss": 3.1471,
      "step": 53820
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.426817774772644,
      "learning_rate": 0.0005227783239480653,
      "loss": 2.9705,
      "step": 53821
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4698163270950317,
      "learning_rate": 0.0005227755843085366,
      "loss": 3.2271,
      "step": 53822
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8567460775375366,
      "learning_rate": 0.0005227728446275898,
      "loss": 3.1861,
      "step": 53823
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5666322708129883,
      "learning_rate": 0.0005227701049052251,
      "loss": 3.2536,
      "step": 53824
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.378883957862854,
      "learning_rate": 0.0005227673651414433,
      "loss": 3.2283,
      "step": 53825
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0284101963043213,
      "learning_rate": 0.0005227646253362449,
      "loss": 3.1017,
      "step": 53826
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.2873425483703613,
      "learning_rate": 0.0005227618854896304,
      "loss": 2.8231,
      "step": 53827
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.7680070400238037,
      "learning_rate": 0.0005227591456016001,
      "loss": 3.0432,
      "step": 53828
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4243377447128296,
      "learning_rate": 0.0005227564056721548,
      "loss": 2.9056,
      "step": 53829
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.7122693061828613,
      "learning_rate": 0.0005227536657012948,
      "loss": 3.2756,
      "step": 53830
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5684841871261597,
      "learning_rate": 0.0005227509256890208,
      "loss": 3.2484,
      "step": 53831
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4668978452682495,
      "learning_rate": 0.0005227481856353331,
      "loss": 3.0953,
      "step": 53832
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6297707557678223,
      "learning_rate": 0.0005227454455402323,
      "loss": 2.9782,
      "step": 53833
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4183392524719238,
      "learning_rate": 0.000522742705403719,
      "loss": 2.8203,
      "step": 53834
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4464446306228638,
      "learning_rate": 0.0005227399652257935,
      "loss": 3.0722,
      "step": 53835
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.610213279724121,
      "learning_rate": 0.0005227372250064566,
      "loss": 3.0345,
      "step": 53836
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3467235565185547,
      "learning_rate": 0.0005227344847457085,
      "loss": 3.07,
      "step": 53837
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.561699390411377,
      "learning_rate": 0.0005227317444435499,
      "loss": 2.9732,
      "step": 53838
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.660500407218933,
      "learning_rate": 0.0005227290040999813,
      "loss": 3.1219,
      "step": 53839
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.371975898742676,
      "learning_rate": 0.0005227262637150031,
      "loss": 2.9282,
      "step": 53840
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6188232898712158,
      "learning_rate": 0.000522723523288616,
      "loss": 3.1837,
      "step": 53841
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5119636058807373,
      "learning_rate": 0.0005227207828208203,
      "loss": 3.0522,
      "step": 53842
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5151668787002563,
      "learning_rate": 0.0005227180423116166,
      "loss": 3.0107,
      "step": 53843
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.598519206047058,
      "learning_rate": 0.0005227153017610054,
      "loss": 2.9795,
      "step": 53844
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8148822784423828,
      "learning_rate": 0.0005227125611689873,
      "loss": 2.893,
      "step": 53845
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4188982248306274,
      "learning_rate": 0.0005227098205355627,
      "loss": 2.9382,
      "step": 53846
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6690163612365723,
      "learning_rate": 0.0005227070798607321,
      "loss": 2.968,
      "step": 53847
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4618202447891235,
      "learning_rate": 0.0005227043391444962,
      "loss": 3.3172,
      "step": 53848
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4970589876174927,
      "learning_rate": 0.0005227015983868552,
      "loss": 3.3296,
      "step": 53849
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4655334949493408,
      "learning_rate": 0.0005226988575878097,
      "loss": 3.2359,
      "step": 53850
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8402280807495117,
      "learning_rate": 0.0005226961167473605,
      "loss": 2.821,
      "step": 53851
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.402687907218933,
      "learning_rate": 0.0005226933758655077,
      "loss": 3.0853,
      "step": 53852
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6298967599868774,
      "learning_rate": 0.0005226906349422521,
      "loss": 3.2527,
      "step": 53853
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7072757482528687,
      "learning_rate": 0.0005226878939775941,
      "loss": 3.2509,
      "step": 53854
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5114827156066895,
      "learning_rate": 0.0005226851529715343,
      "loss": 3.0764,
      "step": 53855
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3242872953414917,
      "learning_rate": 0.000522682411924073,
      "loss": 3.0889,
      "step": 53856
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7617638111114502,
      "learning_rate": 0.0005226796708352108,
      "loss": 3.0497,
      "step": 53857
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.551311731338501,
      "learning_rate": 0.0005226769297049482,
      "loss": 3.127,
      "step": 53858
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.808637022972107,
      "learning_rate": 0.000522674188533286,
      "loss": 3.2299,
      "step": 53859
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9113808870315552,
      "learning_rate": 0.0005226714473202243,
      "loss": 2.8366,
      "step": 53860
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3572306632995605,
      "learning_rate": 0.0005226687060657638,
      "loss": 3.1022,
      "step": 53861
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5238444805145264,
      "learning_rate": 0.000522665964769905,
      "loss": 3.2357,
      "step": 53862
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8808937072753906,
      "learning_rate": 0.0005226632234326484,
      "loss": 3.0061,
      "step": 53863
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.7556560039520264,
      "learning_rate": 0.0005226604820539945,
      "loss": 3.0258,
      "step": 53864
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0110936164855957,
      "learning_rate": 0.0005226577406339438,
      "loss": 3.3421,
      "step": 53865
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.2068519592285156,
      "learning_rate": 0.0005226549991724968,
      "loss": 3.003,
      "step": 53866
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8224231004714966,
      "learning_rate": 0.000522652257669654,
      "loss": 3.0382,
      "step": 53867
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4213298559188843,
      "learning_rate": 0.000522649516125416,
      "loss": 2.9349,
      "step": 53868
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6098133325576782,
      "learning_rate": 0.0005226467745397832,
      "loss": 3.2013,
      "step": 53869
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6247880458831787,
      "learning_rate": 0.0005226440329127563,
      "loss": 3.2437,
      "step": 53870
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7261035442352295,
      "learning_rate": 0.0005226412912443355,
      "loss": 2.9321,
      "step": 53871
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4315929412841797,
      "learning_rate": 0.0005226385495345216,
      "loss": 3.0412,
      "step": 53872
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3439171314239502,
      "learning_rate": 0.000522635807783315,
      "loss": 3.0486,
      "step": 53873
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7480542659759521,
      "learning_rate": 0.0005226330659907161,
      "loss": 3.3257,
      "step": 53874
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.638377070426941,
      "learning_rate": 0.0005226303241567256,
      "loss": 3.07,
      "step": 53875
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6406217813491821,
      "learning_rate": 0.0005226275822813439,
      "loss": 3.1644,
      "step": 53876
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7106698751449585,
      "learning_rate": 0.0005226248403645715,
      "loss": 3.0883,
      "step": 53877
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5982868671417236,
      "learning_rate": 0.000522622098406409,
      "loss": 3.2429,
      "step": 53878
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5007119178771973,
      "learning_rate": 0.0005226193564068568,
      "loss": 3.055,
      "step": 53879
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5500006675720215,
      "learning_rate": 0.0005226166143659154,
      "loss": 2.9906,
      "step": 53880
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7263959646224976,
      "learning_rate": 0.0005226138722835854,
      "loss": 3.0435,
      "step": 53881
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.956085205078125,
      "learning_rate": 0.0005226111301598673,
      "loss": 3.0003,
      "step": 53882
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4255472421646118,
      "learning_rate": 0.0005226083879947617,
      "loss": 2.9926,
      "step": 53883
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.51296067237854,
      "learning_rate": 0.0005226056457882689,
      "loss": 2.9375,
      "step": 53884
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6568081378936768,
      "learning_rate": 0.0005226029035403896,
      "loss": 3.0644,
      "step": 53885
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.734701156616211,
      "learning_rate": 0.000522600161251124,
      "loss": 3.223,
      "step": 53886
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8021783828735352,
      "learning_rate": 0.0005225974189204729,
      "loss": 2.8893,
      "step": 53887
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.840515375137329,
      "learning_rate": 0.0005225946765484369,
      "loss": 3.0812,
      "step": 53888
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6360417604446411,
      "learning_rate": 0.0005225919341350163,
      "loss": 3.04,
      "step": 53889
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9519227743148804,
      "learning_rate": 0.0005225891916802115,
      "loss": 3.0095,
      "step": 53890
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6190084218978882,
      "learning_rate": 0.0005225864491840233,
      "loss": 2.8572,
      "step": 53891
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6699169874191284,
      "learning_rate": 0.000522583706646452,
      "loss": 3.22,
      "step": 53892
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.581969141960144,
      "learning_rate": 0.0005225809640674983,
      "loss": 2.8681,
      "step": 53893
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6014444828033447,
      "learning_rate": 0.0005225782214471625,
      "loss": 3.093,
      "step": 53894
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.852086067199707,
      "learning_rate": 0.0005225754787854452,
      "loss": 3.0759,
      "step": 53895
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6100564002990723,
      "learning_rate": 0.000522572736082347,
      "loss": 2.9484,
      "step": 53896
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.630821466445923,
      "learning_rate": 0.0005225699933378682,
      "loss": 2.9335,
      "step": 53897
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9150493144989014,
      "learning_rate": 0.0005225672505520095,
      "loss": 2.9214,
      "step": 53898
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7774032354354858,
      "learning_rate": 0.0005225645077247714,
      "loss": 3.3281,
      "step": 53899
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.311655044555664,
      "learning_rate": 0.0005225617648561544,
      "loss": 3.0615,
      "step": 53900
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.97244131565094,
      "learning_rate": 0.0005225590219461589,
      "loss": 3.0546,
      "step": 53901
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9775841236114502,
      "learning_rate": 0.0005225562789947854,
      "loss": 2.8238,
      "step": 53902
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8521037101745605,
      "learning_rate": 0.0005225535360020346,
      "loss": 3.2515,
      "step": 53903
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.464620590209961,
      "learning_rate": 0.0005225507929679068,
      "loss": 3.1257,
      "step": 53904
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.6912999153137207,
      "learning_rate": 0.0005225480498924027,
      "loss": 3.0696,
      "step": 53905
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5094361305236816,
      "learning_rate": 0.0005225453067755226,
      "loss": 3.1298,
      "step": 53906
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.783066511154175,
      "learning_rate": 0.0005225425636172672,
      "loss": 3.1558,
      "step": 53907
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.393256187438965,
      "learning_rate": 0.000522539820417637,
      "loss": 3.0519,
      "step": 53908
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7511050701141357,
      "learning_rate": 0.0005225370771766324,
      "loss": 2.9311,
      "step": 53909
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3125100135803223,
      "learning_rate": 0.0005225343338942539,
      "loss": 2.9399,
      "step": 53910
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4796870946884155,
      "learning_rate": 0.000522531590570502,
      "loss": 3.0059,
      "step": 53911
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6015992164611816,
      "learning_rate": 0.0005225288472053774,
      "loss": 3.0421,
      "step": 53912
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.669797897338867,
      "learning_rate": 0.0005225261037988806,
      "loss": 2.6678,
      "step": 53913
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7956751585006714,
      "learning_rate": 0.0005225233603510118,
      "loss": 3.0032,
      "step": 53914
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4490203857421875,
      "learning_rate": 0.0005225206168617718,
      "loss": 3.1565,
      "step": 53915
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5634739398956299,
      "learning_rate": 0.000522517873331161,
      "loss": 3.3806,
      "step": 53916
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7888872623443604,
      "learning_rate": 0.0005225151297591798,
      "loss": 2.9098,
      "step": 53917
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4584630727767944,
      "learning_rate": 0.000522512386145829,
      "loss": 2.9204,
      "step": 53918
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3832608461380005,
      "learning_rate": 0.0005225096424911089,
      "loss": 3.3206,
      "step": 53919
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.364898920059204,
      "learning_rate": 0.0005225068987950201,
      "loss": 2.9888,
      "step": 53920
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.810880422592163,
      "learning_rate": 0.000522504155057563,
      "loss": 2.7604,
      "step": 53921
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6551927328109741,
      "learning_rate": 0.0005225014112787383,
      "loss": 3.1953,
      "step": 53922
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8827108144760132,
      "learning_rate": 0.0005224986674585462,
      "loss": 3.2457,
      "step": 53923
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7510401010513306,
      "learning_rate": 0.0005224959235969875,
      "loss": 2.8599,
      "step": 53924
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6332061290740967,
      "learning_rate": 0.0005224931796940625,
      "loss": 2.8793,
      "step": 53925
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.641864538192749,
      "learning_rate": 0.000522490435749772,
      "loss": 3.1185,
      "step": 53926
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9395313262939453,
      "learning_rate": 0.0005224876917641162,
      "loss": 3.0244,
      "step": 53927
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.262507677078247,
      "learning_rate": 0.0005224849477370958,
      "loss": 2.7137,
      "step": 53928
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9588881731033325,
      "learning_rate": 0.0005224822036687112,
      "loss": 3.1658,
      "step": 53929
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.0310795307159424,
      "learning_rate": 0.000522479459558963,
      "loss": 2.9049,
      "step": 53930
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.180147886276245,
      "learning_rate": 0.0005224767154078517,
      "loss": 3.2516,
      "step": 53931
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.2590389251708984,
      "learning_rate": 0.0005224739712153776,
      "loss": 2.9642,
      "step": 53932
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8184806108474731,
      "learning_rate": 0.0005224712269815416,
      "loss": 2.9016,
      "step": 53933
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.585665225982666,
      "learning_rate": 0.0005224684827063439,
      "loss": 2.956,
      "step": 53934
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7675570249557495,
      "learning_rate": 0.0005224657383897851,
      "loss": 3.1615,
      "step": 53935
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7473571300506592,
      "learning_rate": 0.0005224629940318658,
      "loss": 3.001,
      "step": 53936
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.57428777217865,
      "learning_rate": 0.0005224602496325864,
      "loss": 2.8721,
      "step": 53937
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3395819664001465,
      "learning_rate": 0.0005224575051919475,
      "loss": 3.2708,
      "step": 53938
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7641956806182861,
      "learning_rate": 0.0005224547607099493,
      "loss": 2.9644,
      "step": 53939
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8713372945785522,
      "learning_rate": 0.0005224520161865928,
      "loss": 2.8859,
      "step": 53940
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4628329277038574,
      "learning_rate": 0.0005224492716218782,
      "loss": 3.2135,
      "step": 53941
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2784593105316162,
      "learning_rate": 0.0005224465270158061,
      "loss": 3.1022,
      "step": 53942
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0841472148895264,
      "learning_rate": 0.000522443782368377,
      "loss": 2.9816,
      "step": 53943
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3264172077178955,
      "learning_rate": 0.0005224410376795913,
      "loss": 3.0751,
      "step": 53944
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4927400350570679,
      "learning_rate": 0.0005224382929494497,
      "loss": 3.0099,
      "step": 53945
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6312748193740845,
      "learning_rate": 0.0005224355481779527,
      "loss": 2.9896,
      "step": 53946
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.499200701713562,
      "learning_rate": 0.0005224328033651007,
      "loss": 3.1715,
      "step": 53947
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4451059103012085,
      "learning_rate": 0.0005224300585108941,
      "loss": 3.1234,
      "step": 53948
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3193796873092651,
      "learning_rate": 0.0005224273136153337,
      "loss": 2.9524,
      "step": 53949
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6380176544189453,
      "learning_rate": 0.0005224245686784197,
      "loss": 2.957,
      "step": 53950
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4781546592712402,
      "learning_rate": 0.0005224218237001529,
      "loss": 3.1666,
      "step": 53951
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6188958883285522,
      "learning_rate": 0.0005224190786805338,
      "loss": 2.7899,
      "step": 53952
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5947459936141968,
      "learning_rate": 0.0005224163336195626,
      "loss": 3.0803,
      "step": 53953
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8446887731552124,
      "learning_rate": 0.00052241358851724,
      "loss": 3.2107,
      "step": 53954
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6628470420837402,
      "learning_rate": 0.0005224108433735666,
      "loss": 3.125,
      "step": 53955
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6159850358963013,
      "learning_rate": 0.0005224080981885428,
      "loss": 3.0004,
      "step": 53956
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8633090257644653,
      "learning_rate": 0.0005224053529621691,
      "loss": 3.1096,
      "step": 53957
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6374537944793701,
      "learning_rate": 0.0005224026076944461,
      "loss": 2.9391,
      "step": 53958
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3923249244689941,
      "learning_rate": 0.0005223998623853743,
      "loss": 3.1546,
      "step": 53959
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4381059408187866,
      "learning_rate": 0.0005223971170349541,
      "loss": 3.0374,
      "step": 53960
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4939831495285034,
      "learning_rate": 0.0005223943716431862,
      "loss": 3.0861,
      "step": 53961
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5437791347503662,
      "learning_rate": 0.0005223916262100708,
      "loss": 3.156,
      "step": 53962
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4904206991195679,
      "learning_rate": 0.0005223888807356087,
      "loss": 3.0616,
      "step": 53963
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6970294713974,
      "learning_rate": 0.0005223861352198003,
      "loss": 2.9933,
      "step": 53964
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5597463846206665,
      "learning_rate": 0.000522383389662646,
      "loss": 3.0789,
      "step": 53965
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9312645196914673,
      "learning_rate": 0.0005223806440641465,
      "loss": 3.2347,
      "step": 53966
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0157992839813232,
      "learning_rate": 0.0005223778984243024,
      "loss": 2.8979,
      "step": 53967
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7410616874694824,
      "learning_rate": 0.0005223751527431139,
      "loss": 3.169,
      "step": 53968
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.689788341522217,
      "learning_rate": 0.0005223724070205817,
      "loss": 3.0366,
      "step": 53969
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.1513586044311523,
      "learning_rate": 0.0005223696612567063,
      "loss": 2.7836,
      "step": 53970
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5889660120010376,
      "learning_rate": 0.0005223669154514881,
      "loss": 3.0374,
      "step": 53971
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8104901313781738,
      "learning_rate": 0.0005223641696049277,
      "loss": 2.9442,
      "step": 53972
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6184827089309692,
      "learning_rate": 0.0005223614237170257,
      "loss": 3.0495,
      "step": 53973
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6515898704528809,
      "learning_rate": 0.0005223586777877824,
      "loss": 2.9359,
      "step": 53974
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.611607551574707,
      "learning_rate": 0.0005223559318171986,
      "loss": 3.1985,
      "step": 53975
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6738487482070923,
      "learning_rate": 0.0005223531858052744,
      "loss": 2.9753,
      "step": 53976
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5810835361480713,
      "learning_rate": 0.0005223504397520107,
      "loss": 3.3104,
      "step": 53977
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.379944086074829,
      "learning_rate": 0.0005223476936574078,
      "loss": 3.0167,
      "step": 53978
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.298793077468872,
      "learning_rate": 0.0005223449475214663,
      "loss": 3.1675,
      "step": 53979
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.016270399093628,
      "learning_rate": 0.0005223422013441866,
      "loss": 2.9014,
      "step": 53980
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.071110248565674,
      "learning_rate": 0.0005223394551255694,
      "loss": 2.842,
      "step": 53981
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2664369344711304,
      "learning_rate": 0.0005223367088656151,
      "loss": 3.3726,
      "step": 53982
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4571607112884521,
      "learning_rate": 0.0005223339625643241,
      "loss": 3.0858,
      "step": 53983
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0771446228027344,
      "learning_rate": 0.000522331216221697,
      "loss": 3.1878,
      "step": 53984
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6277735233306885,
      "learning_rate": 0.0005223284698377344,
      "loss": 2.8867,
      "step": 53985
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8316502571105957,
      "learning_rate": 0.0005223257234124369,
      "loss": 3.0218,
      "step": 53986
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.22457218170166,
      "learning_rate": 0.0005223229769458046,
      "loss": 3.2031,
      "step": 53987
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5289924144744873,
      "learning_rate": 0.0005223202304378384,
      "loss": 3.1361,
      "step": 53988
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3615761995315552,
      "learning_rate": 0.0005223174838885386,
      "loss": 3.0304,
      "step": 53989
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5060687065124512,
      "learning_rate": 0.0005223147372979058,
      "loss": 2.9704,
      "step": 53990
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5948057174682617,
      "learning_rate": 0.0005223119906659406,
      "loss": 3.1999,
      "step": 53991
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3504246473312378,
      "learning_rate": 0.0005223092439926432,
      "loss": 3.0409,
      "step": 53992
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5400116443634033,
      "learning_rate": 0.0005223064972780145,
      "loss": 3.0622,
      "step": 53993
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.784454584121704,
      "learning_rate": 0.0005223037505220548,
      "loss": 2.9585,
      "step": 53994
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5072449445724487,
      "learning_rate": 0.0005223010037247645,
      "loss": 2.8406,
      "step": 53995
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5393526554107666,
      "learning_rate": 0.0005222982568861443,
      "loss": 3.1306,
      "step": 53996
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.1427760124206543,
      "learning_rate": 0.0005222955100061948,
      "loss": 3.0324,
      "step": 53997
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5991661548614502,
      "learning_rate": 0.0005222927630849163,
      "loss": 3.2921,
      "step": 53998
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6053358316421509,
      "learning_rate": 0.0005222900161223092,
      "loss": 3.0249,
      "step": 53999
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.593102216720581,
      "learning_rate": 0.0005222872691183745,
      "loss": 3.1164,
      "step": 54000
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6471577882766724,
      "learning_rate": 0.0005222845220731122,
      "loss": 3.2144,
      "step": 54001
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0053298473358154,
      "learning_rate": 0.000522281774986523,
      "loss": 3.1131,
      "step": 54002
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9618666172027588,
      "learning_rate": 0.0005222790278586074,
      "loss": 2.9837,
      "step": 54003
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3715710639953613,
      "learning_rate": 0.0005222762806893661,
      "loss": 3.0892,
      "step": 54004
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.1160757541656494,
      "learning_rate": 0.0005222735334787993,
      "loss": 2.8283,
      "step": 54005
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.013058662414551,
      "learning_rate": 0.0005222707862269077,
      "loss": 3.053,
      "step": 54006
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4632614850997925,
      "learning_rate": 0.0005222680389336918,
      "loss": 2.9341,
      "step": 54007
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8952635526657104,
      "learning_rate": 0.000522265291599152,
      "loss": 3.2608,
      "step": 54008
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.7635772228240967,
      "learning_rate": 0.000522262544223289,
      "loss": 2.9867,
      "step": 54009
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9975777864456177,
      "learning_rate": 0.000522259796806103,
      "loss": 2.8988,
      "step": 54010
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.501317024230957,
      "learning_rate": 0.000522257049347595,
      "loss": 2.9232,
      "step": 54011
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.2523796558380127,
      "learning_rate": 0.000522254301847765,
      "loss": 3.0953,
      "step": 54012
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9343763589859009,
      "learning_rate": 0.0005222515543066138,
      "loss": 2.8781,
      "step": 54013
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5906261205673218,
      "learning_rate": 0.0005222488067241417,
      "loss": 3.286,
      "step": 54014
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.7546331882476807,
      "learning_rate": 0.0005222460591003495,
      "loss": 2.923,
      "step": 54015
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.1502230167388916,
      "learning_rate": 0.0005222433114352374,
      "loss": 3.1319,
      "step": 54016
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3099342584609985,
      "learning_rate": 0.0005222405637288062,
      "loss": 3.1732,
      "step": 54017
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4793070554733276,
      "learning_rate": 0.0005222378159810564,
      "loss": 2.9254,
      "step": 54018
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.1033835411071777,
      "learning_rate": 0.0005222350681919883,
      "loss": 3.0868,
      "step": 54019
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8536840677261353,
      "learning_rate": 0.0005222323203616026,
      "loss": 3.1714,
      "step": 54020
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.376415729522705,
      "learning_rate": 0.0005222295724898996,
      "loss": 3.1965,
      "step": 54021
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8590463399887085,
      "learning_rate": 0.0005222268245768799,
      "loss": 3.0205,
      "step": 54022
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6051111221313477,
      "learning_rate": 0.0005222240766225441,
      "loss": 3.0758,
      "step": 54023
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8365943431854248,
      "learning_rate": 0.0005222213286268925,
      "loss": 3.021,
      "step": 54024
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.270814895629883,
      "learning_rate": 0.000522218580589926,
      "loss": 2.9081,
      "step": 54025
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6348294019699097,
      "learning_rate": 0.0005222158325116447,
      "loss": 2.897,
      "step": 54026
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9570015668869019,
      "learning_rate": 0.0005222130843920493,
      "loss": 2.9707,
      "step": 54027
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8474432229995728,
      "learning_rate": 0.0005222103362311404,
      "loss": 3.0851,
      "step": 54028
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3775211572647095,
      "learning_rate": 0.0005222075880289183,
      "loss": 3.2352,
      "step": 54029
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5559515953063965,
      "learning_rate": 0.0005222048397853837,
      "loss": 2.8982,
      "step": 54030
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9965989589691162,
      "learning_rate": 0.000522202091500537,
      "loss": 2.9895,
      "step": 54031
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.1381189823150635,
      "learning_rate": 0.0005221993431743787,
      "loss": 2.838,
      "step": 54032
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4331440925598145,
      "learning_rate": 0.0005221965948069093,
      "loss": 2.9232,
      "step": 54033
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7832857370376587,
      "learning_rate": 0.0005221938463981294,
      "loss": 3.0953,
      "step": 54034
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.55775785446167,
      "learning_rate": 0.0005221910979480395,
      "loss": 3.21,
      "step": 54035
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.804976224899292,
      "learning_rate": 0.0005221883494566401,
      "loss": 2.8735,
      "step": 54036
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6098766326904297,
      "learning_rate": 0.0005221856009239317,
      "loss": 3.3112,
      "step": 54037
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.4235877990722656,
      "learning_rate": 0.0005221828523499148,
      "loss": 3.028,
      "step": 54038
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6511414051055908,
      "learning_rate": 0.0005221801037345898,
      "loss": 3.2953,
      "step": 54039
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9249989986419678,
      "learning_rate": 0.0005221773550779575,
      "loss": 3.099,
      "step": 54040
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.686182975769043,
      "learning_rate": 0.0005221746063800181,
      "loss": 2.9234,
      "step": 54041
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.9306046962738037,
      "learning_rate": 0.0005221718576407722,
      "loss": 3.3937,
      "step": 54042
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8496441841125488,
      "learning_rate": 0.0005221691088602205,
      "loss": 2.8387,
      "step": 54043
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9728927612304688,
      "learning_rate": 0.0005221663600383633,
      "loss": 2.9661,
      "step": 54044
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6640490293502808,
      "learning_rate": 0.0005221636111752012,
      "loss": 3.1227,
      "step": 54045
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.1432535648345947,
      "learning_rate": 0.0005221608622707347,
      "loss": 2.9818,
      "step": 54046
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6588654518127441,
      "learning_rate": 0.0005221581133249643,
      "loss": 3.1676,
      "step": 54047
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.6912152767181396,
      "learning_rate": 0.0005221553643378905,
      "loss": 2.8136,
      "step": 54048
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5974923372268677,
      "learning_rate": 0.0005221526153095139,
      "loss": 3.1653,
      "step": 54049
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.3779594898223877,
      "learning_rate": 0.0005221498662398347,
      "loss": 3.3481,
      "step": 54050
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.986758828163147,
      "learning_rate": 0.0005221471171288539,
      "loss": 3.04,
      "step": 54051
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8188039064407349,
      "learning_rate": 0.0005221443679765716,
      "loss": 2.9756,
      "step": 54052
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.564121127128601,
      "learning_rate": 0.0005221416187829886,
      "loss": 3.0836,
      "step": 54053
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9323848485946655,
      "learning_rate": 0.0005221388695481051,
      "loss": 2.683,
      "step": 54054
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.766624689102173,
      "learning_rate": 0.000522136120271922,
      "loss": 2.9303,
      "step": 54055
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6209323406219482,
      "learning_rate": 0.0005221333709544394,
      "loss": 2.6711,
      "step": 54056
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4710696935653687,
      "learning_rate": 0.0005221306215956582,
      "loss": 3.002,
      "step": 54057
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.735275149345398,
      "learning_rate": 0.0005221278721955786,
      "loss": 3.168,
      "step": 54058
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.6926848888397217,
      "learning_rate": 0.0005221251227542014,
      "loss": 3.1452,
      "step": 54059
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6912157535552979,
      "learning_rate": 0.0005221223732715267,
      "loss": 2.8946,
      "step": 54060
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9464294910430908,
      "learning_rate": 0.0005221196237475554,
      "loss": 3.0216,
      "step": 54061
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.35471773147583,
      "learning_rate": 0.0005221168741822879,
      "loss": 2.9862,
      "step": 54062
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4706602096557617,
      "learning_rate": 0.0005221141245757246,
      "loss": 3.0193,
      "step": 54063
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.684866189956665,
      "learning_rate": 0.0005221113749278661,
      "loss": 3.0917,
      "step": 54064
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.8707475662231445,
      "learning_rate": 0.0005221086252387129,
      "loss": 3.0793,
      "step": 54065
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.479743719100952,
      "learning_rate": 0.0005221058755082655,
      "loss": 2.9451,
      "step": 54066
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.825317859649658,
      "learning_rate": 0.0005221031257365246,
      "loss": 3.1146,
      "step": 54067
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9014358520507812,
      "learning_rate": 0.0005221003759234903,
      "loss": 2.9146,
      "step": 54068
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.871110200881958,
      "learning_rate": 0.0005220976260691634,
      "loss": 3.0548,
      "step": 54069
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5555557012557983,
      "learning_rate": 0.0005220948761735445,
      "loss": 3.2706,
      "step": 54070
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8059412240982056,
      "learning_rate": 0.0005220921262366339,
      "loss": 2.8964,
      "step": 54071
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.180255889892578,
      "learning_rate": 0.0005220893762584321,
      "loss": 3.2408,
      "step": 54072
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5256860256195068,
      "learning_rate": 0.0005220866262389398,
      "loss": 3.0557,
      "step": 54073
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.781219482421875,
      "learning_rate": 0.0005220838761781573,
      "loss": 3.1448,
      "step": 54074
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.505040168762207,
      "learning_rate": 0.0005220811260760853,
      "loss": 2.7388,
      "step": 54075
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7921772003173828,
      "learning_rate": 0.0005220783759327241,
      "loss": 3.0584,
      "step": 54076
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3223154544830322,
      "learning_rate": 0.0005220756257480745,
      "loss": 2.9977,
      "step": 54077
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8807592391967773,
      "learning_rate": 0.0005220728755221368,
      "loss": 3.013,
      "step": 54078
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8505504131317139,
      "learning_rate": 0.0005220701252549116,
      "loss": 3.0111,
      "step": 54079
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.037675619125366,
      "learning_rate": 0.0005220673749463993,
      "loss": 3.2074,
      "step": 54080
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4753228425979614,
      "learning_rate": 0.0005220646245966005,
      "loss": 3.1379,
      "step": 54081
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6384382247924805,
      "learning_rate": 0.0005220618742055157,
      "loss": 3.0012,
      "step": 54082
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5427626371383667,
      "learning_rate": 0.0005220591237731453,
      "loss": 3.028,
      "step": 54083
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6889642477035522,
      "learning_rate": 0.0005220563732994901,
      "loss": 3.2612,
      "step": 54084
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0144405364990234,
      "learning_rate": 0.0005220536227845502,
      "loss": 3.1655,
      "step": 54085
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5933878421783447,
      "learning_rate": 0.0005220508722283266,
      "loss": 2.9405,
      "step": 54086
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6072131395339966,
      "learning_rate": 0.0005220481216308195,
      "loss": 2.9721,
      "step": 54087
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.618719220161438,
      "learning_rate": 0.0005220453709920294,
      "loss": 3.127,
      "step": 54088
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4431447982788086,
      "learning_rate": 0.0005220426203119569,
      "loss": 2.8985,
      "step": 54089
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8071985244750977,
      "learning_rate": 0.0005220398695906025,
      "loss": 3.0864,
      "step": 54090
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4295012950897217,
      "learning_rate": 0.0005220371188279665,
      "loss": 3.1225,
      "step": 54091
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5894757509231567,
      "learning_rate": 0.0005220343680240499,
      "loss": 2.982,
      "step": 54092
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8310625553131104,
      "learning_rate": 0.0005220316171788527,
      "loss": 3.1274,
      "step": 54093
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.731163740158081,
      "learning_rate": 0.0005220288662923757,
      "loss": 3.1955,
      "step": 54094
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.4530441761016846,
      "learning_rate": 0.0005220261153646194,
      "loss": 3.1299,
      "step": 54095
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.9151532649993896,
      "learning_rate": 0.0005220233643955842,
      "loss": 2.9606,
      "step": 54096
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4883497953414917,
      "learning_rate": 0.0005220206133852707,
      "loss": 3.1408,
      "step": 54097
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.442772388458252,
      "learning_rate": 0.0005220178623336793,
      "loss": 2.9236,
      "step": 54098
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.2121269702911377,
      "learning_rate": 0.0005220151112408106,
      "loss": 2.9999,
      "step": 54099
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3436667919158936,
      "learning_rate": 0.0005220123601066653,
      "loss": 2.9757,
      "step": 54100
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4188812971115112,
      "learning_rate": 0.0005220096089312435,
      "loss": 2.9355,
      "step": 54101
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3126055002212524,
      "learning_rate": 0.000522006857714546,
      "loss": 3.1106,
      "step": 54102
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6563372611999512,
      "learning_rate": 0.0005220041064565732,
      "loss": 3.0032,
      "step": 54103
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.2736172676086426,
      "learning_rate": 0.0005220013551573256,
      "loss": 3.2163,
      "step": 54104
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5857902765274048,
      "learning_rate": 0.0005219986038168038,
      "loss": 3.0148,
      "step": 54105
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.7585610151290894,
      "learning_rate": 0.0005219958524350082,
      "loss": 2.8565,
      "step": 54106
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5069942474365234,
      "learning_rate": 0.0005219931010119396,
      "loss": 3.1192,
      "step": 54107
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3773889541625977,
      "learning_rate": 0.0005219903495475982,
      "loss": 3.0666,
      "step": 54108
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6905940771102905,
      "learning_rate": 0.0005219875980419845,
      "loss": 3.0133,
      "step": 54109
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3465206623077393,
      "learning_rate": 0.0005219848464950992,
      "loss": 2.9142,
      "step": 54110
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6917829513549805,
      "learning_rate": 0.0005219820949069427,
      "loss": 2.7499,
      "step": 54111
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8651386499404907,
      "learning_rate": 0.0005219793432775155,
      "loss": 2.9714,
      "step": 54112
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3012083768844604,
      "learning_rate": 0.0005219765916068182,
      "loss": 3.0172,
      "step": 54113
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.814887523651123,
      "learning_rate": 0.0005219738398948513,
      "loss": 2.9412,
      "step": 54114
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.110804796218872,
      "learning_rate": 0.0005219710881416152,
      "loss": 2.9867,
      "step": 54115
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.582046627998352,
      "learning_rate": 0.0005219683363471105,
      "loss": 2.9269,
      "step": 54116
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.887012004852295,
      "learning_rate": 0.0005219655845113378,
      "loss": 2.9141,
      "step": 54117
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.190554141998291,
      "learning_rate": 0.0005219628326342974,
      "loss": 3.0721,
      "step": 54118
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4094226360321045,
      "learning_rate": 0.0005219600807159899,
      "loss": 3.0041,
      "step": 54119
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.4333510398864746,
      "learning_rate": 0.0005219573287564158,
      "loss": 2.7391,
      "step": 54120
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.234675645828247,
      "learning_rate": 0.0005219545767555757,
      "loss": 2.9396,
      "step": 54121
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4858583211898804,
      "learning_rate": 0.0005219518247134701,
      "loss": 3.3774,
      "step": 54122
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5078295469284058,
      "learning_rate": 0.0005219490726300994,
      "loss": 2.9705,
      "step": 54123
    },
    {
      "epoch": 0.7,
      "grad_norm": 3.0935208797454834,
      "learning_rate": 0.0005219463205054642,
      "loss": 2.9584,
      "step": 54124
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.7299773693084717,
      "learning_rate": 0.000521943568339565,
      "loss": 2.899,
      "step": 54125
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.4575657844543457,
      "learning_rate": 0.0005219408161324024,
      "loss": 2.9167,
      "step": 54126
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8944754600524902,
      "learning_rate": 0.0005219380638839767,
      "loss": 2.8896,
      "step": 54127
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0801823139190674,
      "learning_rate": 0.0005219353115942885,
      "loss": 2.9831,
      "step": 54128
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3336093425750732,
      "learning_rate": 0.0005219325592633383,
      "loss": 3.0599,
      "step": 54129
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5209354162216187,
      "learning_rate": 0.0005219298068911268,
      "loss": 3.0006,
      "step": 54130
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.0147721767425537,
      "learning_rate": 0.0005219270544776543,
      "loss": 3.0245,
      "step": 54131
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.21067214012146,
      "learning_rate": 0.0005219243020229212,
      "loss": 3.1183,
      "step": 54132
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.144181251525879,
      "learning_rate": 0.0005219215495269284,
      "loss": 2.9337,
      "step": 54133
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3651230335235596,
      "learning_rate": 0.0005219187969896761,
      "loss": 3.2511,
      "step": 54134
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.6432900428771973,
      "learning_rate": 0.000521916044411165,
      "loss": 2.901,
      "step": 54135
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5167526006698608,
      "learning_rate": 0.0005219132917913955,
      "loss": 3.0104,
      "step": 54136
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.3600926399230957,
      "learning_rate": 0.0005219105391303681,
      "loss": 2.9456,
      "step": 54137
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.4918760061264038,
      "learning_rate": 0.0005219077864280833,
      "loss": 2.9409,
      "step": 54138
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8424721956253052,
      "learning_rate": 0.0005219050336845416,
      "loss": 2.9312,
      "step": 54139
    },
    {
      "epoch": 0.7,
      "grad_norm": 2.2284817695617676,
      "learning_rate": 0.0005219022808997437,
      "loss": 3.2211,
      "step": 54140
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.5449316501617432,
      "learning_rate": 0.0005218995280736898,
      "loss": 3.2324,
      "step": 54141
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.8169941902160645,
      "learning_rate": 0.0005218967752063809,
      "loss": 2.922,
      "step": 54142
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.454906940460205,
      "learning_rate": 0.0005218940222978169,
      "loss": 2.983,
      "step": 54143
    },
    {
      "epoch": 0.7,
      "grad_norm": 1.3958724737167358,
      "learning_rate": 0.0005218912693479988,
      "loss": 2.909,
      "step": 54144
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5368342399597168,
      "learning_rate": 0.0005218885163569268,
      "loss": 2.9139,
      "step": 54145
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7301911115646362,
      "learning_rate": 0.0005218857633246016,
      "loss": 3.1019,
      "step": 54146
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7156466245651245,
      "learning_rate": 0.0005218830102510237,
      "loss": 2.8113,
      "step": 54147
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4208037853240967,
      "learning_rate": 0.0005218802571361935,
      "loss": 3.0516,
      "step": 54148
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4723321199417114,
      "learning_rate": 0.0005218775039801116,
      "loss": 2.9409,
      "step": 54149
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6563102006912231,
      "learning_rate": 0.0005218747507827784,
      "loss": 3.3051,
      "step": 54150
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5881977081298828,
      "learning_rate": 0.0005218719975441944,
      "loss": 3.1708,
      "step": 54151
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4505655765533447,
      "learning_rate": 0.0005218692442643605,
      "loss": 3.1672,
      "step": 54152
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6675667762756348,
      "learning_rate": 0.0005218664909432767,
      "loss": 3.1609,
      "step": 54153
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4971150159835815,
      "learning_rate": 0.0005218637375809438,
      "loss": 3.1053,
      "step": 54154
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.450657844543457,
      "learning_rate": 0.0005218609841773622,
      "loss": 3.252,
      "step": 54155
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4581177234649658,
      "learning_rate": 0.0005218582307325325,
      "loss": 3.081,
      "step": 54156
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.2997844219207764,
      "learning_rate": 0.0005218554772464552,
      "loss": 3.0612,
      "step": 54157
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6754356622695923,
      "learning_rate": 0.0005218527237191307,
      "loss": 2.8418,
      "step": 54158
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.355534553527832,
      "learning_rate": 0.0005218499701505597,
      "loss": 2.6472,
      "step": 54159
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0052390098571777,
      "learning_rate": 0.0005218472165407426,
      "loss": 3.2822,
      "step": 54160
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.461663246154785,
      "learning_rate": 0.0005218444628896798,
      "loss": 2.9108,
      "step": 54161
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7813210487365723,
      "learning_rate": 0.0005218417091973719,
      "loss": 3.0909,
      "step": 54162
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.045760154724121,
      "learning_rate": 0.0005218389554638196,
      "loss": 3.4668,
      "step": 54163
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.584164619445801,
      "learning_rate": 0.0005218362016890231,
      "loss": 2.8534,
      "step": 54164
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7832863330841064,
      "learning_rate": 0.0005218334478729833,
      "loss": 3.0322,
      "step": 54165
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3245283365249634,
      "learning_rate": 0.0005218306940157002,
      "loss": 3.0949,
      "step": 54166
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2898986339569092,
      "learning_rate": 0.0005218279401171747,
      "loss": 3.0818,
      "step": 54167
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6746845245361328,
      "learning_rate": 0.0005218251861774071,
      "loss": 2.9546,
      "step": 54168
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.2656779289245605,
      "learning_rate": 0.0005218224321963982,
      "loss": 3.074,
      "step": 54169
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5150165557861328,
      "learning_rate": 0.0005218196781741482,
      "loss": 3.0863,
      "step": 54170
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5173956155776978,
      "learning_rate": 0.0005218169241106577,
      "loss": 2.8401,
      "step": 54171
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9909818172454834,
      "learning_rate": 0.0005218141700059273,
      "loss": 3.2746,
      "step": 54172
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5450514554977417,
      "learning_rate": 0.0005218114158599575,
      "loss": 2.921,
      "step": 54173
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3995212316513062,
      "learning_rate": 0.0005218086616727488,
      "loss": 3.1611,
      "step": 54174
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.869678258895874,
      "learning_rate": 0.0005218059074443016,
      "loss": 2.8545,
      "step": 54175
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.378198266029358,
      "learning_rate": 0.0005218031531746165,
      "loss": 3.0262,
      "step": 54176
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5541282892227173,
      "learning_rate": 0.000521800398863694,
      "loss": 3.0027,
      "step": 54177
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3824613094329834,
      "learning_rate": 0.0005217976445115347,
      "loss": 3.0719,
      "step": 54178
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6387090682983398,
      "learning_rate": 0.0005217948901181389,
      "loss": 3.1462,
      "step": 54179
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6230690479278564,
      "learning_rate": 0.0005217921356835074,
      "loss": 2.996,
      "step": 54180
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6271377801895142,
      "learning_rate": 0.0005217893812076405,
      "loss": 2.8526,
      "step": 54181
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.709231972694397,
      "learning_rate": 0.0005217866266905388,
      "loss": 3.1524,
      "step": 54182
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6006885766983032,
      "learning_rate": 0.0005217838721322027,
      "loss": 3.1524,
      "step": 54183
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.579178810119629,
      "learning_rate": 0.0005217811175326327,
      "loss": 3.4481,
      "step": 54184
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8630880117416382,
      "learning_rate": 0.0005217783628918295,
      "loss": 3.0186,
      "step": 54185
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5893903970718384,
      "learning_rate": 0.0005217756082097937,
      "loss": 3.3245,
      "step": 54186
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.6572296619415283,
      "learning_rate": 0.0005217728534865253,
      "loss": 3.1178,
      "step": 54187
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8984543085098267,
      "learning_rate": 0.0005217700987220253,
      "loss": 2.9929,
      "step": 54188
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6584515571594238,
      "learning_rate": 0.0005217673439162941,
      "loss": 3.1082,
      "step": 54189
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5413014888763428,
      "learning_rate": 0.000521764589069332,
      "loss": 2.9395,
      "step": 54190
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7531318664550781,
      "learning_rate": 0.0005217618341811399,
      "loss": 3.3123,
      "step": 54191
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3988088369369507,
      "learning_rate": 0.0005217590792517179,
      "loss": 2.9832,
      "step": 54192
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6155481338500977,
      "learning_rate": 0.0005217563242810668,
      "loss": 3.2509,
      "step": 54193
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8729028701782227,
      "learning_rate": 0.000521753569269187,
      "loss": 3.1049,
      "step": 54194
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6216351985931396,
      "learning_rate": 0.0005217508142160791,
      "loss": 3.0542,
      "step": 54195
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9117400646209717,
      "learning_rate": 0.0005217480591217434,
      "loss": 3.1835,
      "step": 54196
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8317633867263794,
      "learning_rate": 0.0005217453039861806,
      "loss": 2.9961,
      "step": 54197
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.82072913646698,
      "learning_rate": 0.0005217425488093912,
      "loss": 3.1516,
      "step": 54198
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4348260164260864,
      "learning_rate": 0.0005217397935913756,
      "loss": 2.9466,
      "step": 54199
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6495473384857178,
      "learning_rate": 0.0005217370383321344,
      "loss": 3.3334,
      "step": 54200
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.132655382156372,
      "learning_rate": 0.0005217342830316681,
      "loss": 2.784,
      "step": 54201
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4387545585632324,
      "learning_rate": 0.0005217315276899773,
      "loss": 3.1644,
      "step": 54202
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7481237649917603,
      "learning_rate": 0.0005217287723070624,
      "loss": 2.8178,
      "step": 54203
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4778950214385986,
      "learning_rate": 0.0005217260168829239,
      "loss": 2.9871,
      "step": 54204
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.423087477684021,
      "learning_rate": 0.0005217232614175624,
      "loss": 3.049,
      "step": 54205
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5635859966278076,
      "learning_rate": 0.0005217205059109782,
      "loss": 3.0052,
      "step": 54206
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5667881965637207,
      "learning_rate": 0.0005217177503631722,
      "loss": 3.0824,
      "step": 54207
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4382754564285278,
      "learning_rate": 0.0005217149947741445,
      "loss": 2.8715,
      "step": 54208
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4664500951766968,
      "learning_rate": 0.0005217122391438959,
      "loss": 3.0334,
      "step": 54209
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3754603862762451,
      "learning_rate": 0.0005217094834724268,
      "loss": 3.1566,
      "step": 54210
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3780345916748047,
      "learning_rate": 0.0005217067277597376,
      "loss": 3.1596,
      "step": 54211
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6517022848129272,
      "learning_rate": 0.0005217039720058291,
      "loss": 3.0367,
      "step": 54212
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6316394805908203,
      "learning_rate": 0.0005217012162107015,
      "loss": 3.1294,
      "step": 54213
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5314013957977295,
      "learning_rate": 0.0005216984603743556,
      "loss": 2.9173,
      "step": 54214
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5274567604064941,
      "learning_rate": 0.0005216957044967917,
      "loss": 3.0844,
      "step": 54215
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5681910514831543,
      "learning_rate": 0.0005216929485780104,
      "loss": 3.0756,
      "step": 54216
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3846700191497803,
      "learning_rate": 0.0005216901926180121,
      "loss": 2.7544,
      "step": 54217
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.3276419639587402,
      "learning_rate": 0.0005216874366167976,
      "loss": 3.0192,
      "step": 54218
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5840858221054077,
      "learning_rate": 0.0005216846805743671,
      "loss": 2.9946,
      "step": 54219
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5538955926895142,
      "learning_rate": 0.0005216819244907212,
      "loss": 3.1104,
      "step": 54220
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.991353988647461,
      "learning_rate": 0.0005216791683658605,
      "loss": 3.2826,
      "step": 54221
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.598233699798584,
      "learning_rate": 0.0005216764121997855,
      "loss": 3.0763,
      "step": 54222
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.12825345993042,
      "learning_rate": 0.0005216736559924966,
      "loss": 2.9056,
      "step": 54223
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.927172064781189,
      "learning_rate": 0.0005216708997439944,
      "loss": 3.0759,
      "step": 54224
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.3481149673461914,
      "learning_rate": 0.0005216681434542795,
      "loss": 3.3888,
      "step": 54225
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.617727279663086,
      "learning_rate": 0.0005216653871233521,
      "loss": 2.8599,
      "step": 54226
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6124143600463867,
      "learning_rate": 0.0005216626307512131,
      "loss": 2.7184,
      "step": 54227
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3913036584854126,
      "learning_rate": 0.0005216598743378627,
      "loss": 2.7425,
      "step": 54228
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.464114785194397,
      "learning_rate": 0.0005216571178833017,
      "loss": 3.0302,
      "step": 54229
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.812842607498169,
      "learning_rate": 0.0005216543613875303,
      "loss": 2.8838,
      "step": 54230
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.787692666053772,
      "learning_rate": 0.0005216516048505492,
      "loss": 3.0524,
      "step": 54231
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7228516340255737,
      "learning_rate": 0.000521648848272359,
      "loss": 3.1829,
      "step": 54232
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6575853824615479,
      "learning_rate": 0.00052164609165296,
      "loss": 3.0299,
      "step": 54233
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5082632303237915,
      "learning_rate": 0.0005216433349923528,
      "loss": 3.224,
      "step": 54234
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4501838684082031,
      "learning_rate": 0.0005216405782905379,
      "loss": 3.1297,
      "step": 54235
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0613276958465576,
      "learning_rate": 0.0005216378215475159,
      "loss": 2.9057,
      "step": 54236
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6961123943328857,
      "learning_rate": 0.0005216350647632872,
      "loss": 3.1141,
      "step": 54237
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.432356595993042,
      "learning_rate": 0.0005216323079378525,
      "loss": 3.02,
      "step": 54238
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6845512390136719,
      "learning_rate": 0.0005216295510712119,
      "loss": 3.067,
      "step": 54239
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7331538200378418,
      "learning_rate": 0.0005216267941633663,
      "loss": 3.272,
      "step": 54240
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4375578165054321,
      "learning_rate": 0.0005216240372143162,
      "loss": 2.9476,
      "step": 54241
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3384193181991577,
      "learning_rate": 0.0005216212802240618,
      "loss": 2.9534,
      "step": 54242
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5432735681533813,
      "learning_rate": 0.0005216185231926041,
      "loss": 3.1067,
      "step": 54243
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5505497455596924,
      "learning_rate": 0.0005216157661199432,
      "loss": 3.115,
      "step": 54244
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7394397258758545,
      "learning_rate": 0.0005216130090060797,
      "loss": 3.0495,
      "step": 54245
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5062404870986938,
      "learning_rate": 0.0005216102518510142,
      "loss": 3.018,
      "step": 54246
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.77036190032959,
      "learning_rate": 0.0005216074946547472,
      "loss": 3.0235,
      "step": 54247
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5476270914077759,
      "learning_rate": 0.0005216047374172791,
      "loss": 3.0474,
      "step": 54248
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3145499229431152,
      "learning_rate": 0.0005216019801386106,
      "loss": 3.0599,
      "step": 54249
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4301106929779053,
      "learning_rate": 0.0005215992228187421,
      "loss": 2.8226,
      "step": 54250
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.6748244762420654,
      "learning_rate": 0.000521596465457674,
      "loss": 2.7349,
      "step": 54251
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5501669645309448,
      "learning_rate": 0.0005215937080554069,
      "loss": 3.1111,
      "step": 54252
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7005387544631958,
      "learning_rate": 0.0005215909506119415,
      "loss": 2.9478,
      "step": 54253
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4445685148239136,
      "learning_rate": 0.0005215881931272782,
      "loss": 2.9882,
      "step": 54254
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8402748107910156,
      "learning_rate": 0.0005215854356014173,
      "loss": 3.0322,
      "step": 54255
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4208065271377563,
      "learning_rate": 0.0005215826780343597,
      "loss": 2.7171,
      "step": 54256
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8423172235488892,
      "learning_rate": 0.0005215799204261055,
      "loss": 2.9628,
      "step": 54257
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6292263269424438,
      "learning_rate": 0.0005215771627766555,
      "loss": 3.111,
      "step": 54258
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.8890810012817383,
      "learning_rate": 0.0005215744050860101,
      "loss": 3.0041,
      "step": 54259
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.083278179168701,
      "learning_rate": 0.0005215716473541698,
      "loss": 3.0478,
      "step": 54260
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8402312994003296,
      "learning_rate": 0.0005215688895811351,
      "loss": 3.0907,
      "step": 54261
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.056170701980591,
      "learning_rate": 0.0005215661317669067,
      "loss": 2.9273,
      "step": 54262
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5881147384643555,
      "learning_rate": 0.000521563373911485,
      "loss": 2.9867,
      "step": 54263
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3769290447235107,
      "learning_rate": 0.0005215606160148704,
      "loss": 2.762,
      "step": 54264
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.756050944328308,
      "learning_rate": 0.0005215578580770635,
      "loss": 3.0968,
      "step": 54265
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5576215982437134,
      "learning_rate": 0.0005215551000980648,
      "loss": 3.0142,
      "step": 54266
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5994534492492676,
      "learning_rate": 0.0005215523420778748,
      "loss": 2.9274,
      "step": 54267
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5367920398712158,
      "learning_rate": 0.0005215495840164941,
      "loss": 3.0734,
      "step": 54268
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.570374846458435,
      "learning_rate": 0.0005215468259139231,
      "loss": 2.7967,
      "step": 54269
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4022855758666992,
      "learning_rate": 0.0005215440677701624,
      "loss": 2.8872,
      "step": 54270
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4727442264556885,
      "learning_rate": 0.0005215413095852124,
      "loss": 3.1996,
      "step": 54271
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.20892596244812,
      "learning_rate": 0.0005215385513590738,
      "loss": 2.9821,
      "step": 54272
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9784587621688843,
      "learning_rate": 0.000521535793091747,
      "loss": 3.0351,
      "step": 54273
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8597694635391235,
      "learning_rate": 0.0005215330347832324,
      "loss": 3.0978,
      "step": 54274
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3834377527236938,
      "learning_rate": 0.0005215302764335308,
      "loss": 3.0666,
      "step": 54275
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8365440368652344,
      "learning_rate": 0.0005215275180426425,
      "loss": 2.9769,
      "step": 54276
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.688765525817871,
      "learning_rate": 0.0005215247596105679,
      "loss": 3.1318,
      "step": 54277
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7916221618652344,
      "learning_rate": 0.0005215220011373078,
      "loss": 2.9485,
      "step": 54278
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.526318907737732,
      "learning_rate": 0.0005215192426228626,
      "loss": 3.1102,
      "step": 54279
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4850401878356934,
      "learning_rate": 0.0005215164840672326,
      "loss": 3.1872,
      "step": 54280
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7100187540054321,
      "learning_rate": 0.0005215137254704186,
      "loss": 3.028,
      "step": 54281
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.005378484725952,
      "learning_rate": 0.0005215109668324211,
      "loss": 3.0615,
      "step": 54282
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7795926332473755,
      "learning_rate": 0.0005215082081532405,
      "loss": 2.9081,
      "step": 54283
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8915250301361084,
      "learning_rate": 0.0005215054494328773,
      "loss": 3.1535,
      "step": 54284
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.405322551727295,
      "learning_rate": 0.000521502690671332,
      "loss": 3.0209,
      "step": 54285
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.765133261680603,
      "learning_rate": 0.0005214999318686053,
      "loss": 3.0652,
      "step": 54286
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7984142303466797,
      "learning_rate": 0.0005214971730246975,
      "loss": 3.0997,
      "step": 54287
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.499932885169983,
      "learning_rate": 0.0005214944141396093,
      "loss": 3.2092,
      "step": 54288
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7421667575836182,
      "learning_rate": 0.000521491655213341,
      "loss": 2.9181,
      "step": 54289
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.863918423652649,
      "learning_rate": 0.0005214888962458932,
      "loss": 3.0,
      "step": 54290
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6875405311584473,
      "learning_rate": 0.0005214861372372665,
      "loss": 3.1897,
      "step": 54291
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7715675830841064,
      "learning_rate": 0.0005214833781874613,
      "loss": 2.9677,
      "step": 54292
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2351161241531372,
      "learning_rate": 0.0005214806190964782,
      "loss": 3.0688,
      "step": 54293
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4856173992156982,
      "learning_rate": 0.0005214778599643176,
      "loss": 2.9631,
      "step": 54294
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.474395990371704,
      "learning_rate": 0.0005214751007909802,
      "loss": 3.2055,
      "step": 54295
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5623379945755005,
      "learning_rate": 0.0005214723415764664,
      "loss": 2.9853,
      "step": 54296
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6598032712936401,
      "learning_rate": 0.0005214695823207767,
      "loss": 3.128,
      "step": 54297
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7241836786270142,
      "learning_rate": 0.0005214668230239116,
      "loss": 3.0461,
      "step": 54298
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.537651538848877,
      "learning_rate": 0.0005214640636858716,
      "loss": 3.3161,
      "step": 54299
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5110206604003906,
      "learning_rate": 0.0005214613043066572,
      "loss": 3.1159,
      "step": 54300
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.641843318939209,
      "learning_rate": 0.0005214585448862691,
      "loss": 3.0198,
      "step": 54301
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4346482753753662,
      "learning_rate": 0.0005214557854247076,
      "loss": 3.1438,
      "step": 54302
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.3115973472595215,
      "learning_rate": 0.0005214530259219733,
      "loss": 2.9494,
      "step": 54303
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5344568490982056,
      "learning_rate": 0.0005214502663780666,
      "loss": 2.9317,
      "step": 54304
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9485136270523071,
      "learning_rate": 0.0005214475067929882,
      "loss": 3.2679,
      "step": 54305
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.041898012161255,
      "learning_rate": 0.0005214447471667386,
      "loss": 3.1015,
      "step": 54306
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7870280742645264,
      "learning_rate": 0.0005214419874993181,
      "loss": 3.0255,
      "step": 54307
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0597751140594482,
      "learning_rate": 0.0005214392277907275,
      "loss": 3.0627,
      "step": 54308
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.824873685836792,
      "learning_rate": 0.0005214364680409671,
      "loss": 2.9814,
      "step": 54309
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8495980501174927,
      "learning_rate": 0.0005214337082500375,
      "loss": 2.9532,
      "step": 54310
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.55195152759552,
      "learning_rate": 0.0005214309484179392,
      "loss": 3.1522,
      "step": 54311
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.051161289215088,
      "learning_rate": 0.0005214281885446727,
      "loss": 2.9637,
      "step": 54312
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.1870522499084473,
      "learning_rate": 0.0005214254286302385,
      "loss": 2.9909,
      "step": 54313
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.297694206237793,
      "learning_rate": 0.000521422668674637,
      "loss": 3.1746,
      "step": 54314
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5243257284164429,
      "learning_rate": 0.0005214199086778691,
      "loss": 2.9902,
      "step": 54315
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.8528831005096436,
      "learning_rate": 0.0005214171486399349,
      "loss": 3.2622,
      "step": 54316
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.826673746109009,
      "learning_rate": 0.0005214143885608352,
      "loss": 3.1196,
      "step": 54317
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.485368013381958,
      "learning_rate": 0.0005214116284405703,
      "loss": 3.0548,
      "step": 54318
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0510952472686768,
      "learning_rate": 0.0005214088682791408,
      "loss": 2.8402,
      "step": 54319
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.147897958755493,
      "learning_rate": 0.0005214061080765471,
      "loss": 3.2234,
      "step": 54320
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0423271656036377,
      "learning_rate": 0.0005214033478327901,
      "loss": 2.8748,
      "step": 54321
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5262213945388794,
      "learning_rate": 0.0005214005875478698,
      "loss": 3.0621,
      "step": 54322
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6505343914031982,
      "learning_rate": 0.000521397827221787,
      "loss": 2.8864,
      "step": 54323
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3138785362243652,
      "learning_rate": 0.0005213950668545422,
      "loss": 2.9645,
      "step": 54324
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.326696515083313,
      "learning_rate": 0.0005213923064461358,
      "loss": 2.9511,
      "step": 54325
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3859306573867798,
      "learning_rate": 0.0005213895459965685,
      "loss": 3.0708,
      "step": 54326
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.53853440284729,
      "learning_rate": 0.0005213867855058406,
      "loss": 2.8778,
      "step": 54327
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.080991506576538,
      "learning_rate": 0.0005213840249739529,
      "loss": 3.2293,
      "step": 54328
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7783921957015991,
      "learning_rate": 0.0005213812644009056,
      "loss": 3.0562,
      "step": 54329
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5446972846984863,
      "learning_rate": 0.0005213785037866994,
      "loss": 3.0882,
      "step": 54330
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6029373407363892,
      "learning_rate": 0.0005213757431313347,
      "loss": 2.9796,
      "step": 54331
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5639450550079346,
      "learning_rate": 0.0005213729824348121,
      "loss": 3.0448,
      "step": 54332
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8486677408218384,
      "learning_rate": 0.000521370221697132,
      "loss": 3.0684,
      "step": 54333
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9629509449005127,
      "learning_rate": 0.0005213674609182951,
      "loss": 2.9374,
      "step": 54334
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8234236240386963,
      "learning_rate": 0.0005213647000983017,
      "loss": 3.2532,
      "step": 54335
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6307240724563599,
      "learning_rate": 0.0005213619392371526,
      "loss": 2.9504,
      "step": 54336
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.1239399909973145,
      "learning_rate": 0.0005213591783348481,
      "loss": 3.0541,
      "step": 54337
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.987994909286499,
      "learning_rate": 0.0005213564173913885,
      "loss": 3.0691,
      "step": 54338
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7859522104263306,
      "learning_rate": 0.0005213536564067749,
      "loss": 3.0326,
      "step": 54339
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9224557876586914,
      "learning_rate": 0.0005213508953810073,
      "loss": 3.2161,
      "step": 54340
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7980616092681885,
      "learning_rate": 0.0005213481343140864,
      "loss": 3.0544,
      "step": 54341
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6739946603775024,
      "learning_rate": 0.0005213453732060127,
      "loss": 3.0951,
      "step": 54342
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5483447313308716,
      "learning_rate": 0.0005213426120567867,
      "loss": 2.9284,
      "step": 54343
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.604515552520752,
      "learning_rate": 0.000521339850866409,
      "loss": 3.0586,
      "step": 54344
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5347651243209839,
      "learning_rate": 0.00052133708963488,
      "loss": 3.1006,
      "step": 54345
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6409623622894287,
      "learning_rate": 0.0005213343283622003,
      "loss": 3.1382,
      "step": 54346
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4204424619674683,
      "learning_rate": 0.0005213315670483703,
      "loss": 3.2228,
      "step": 54347
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7230877876281738,
      "learning_rate": 0.0005213288056933906,
      "loss": 3.2314,
      "step": 54348
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.438948631286621,
      "learning_rate": 0.0005213260442972618,
      "loss": 2.897,
      "step": 54349
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7952957153320312,
      "learning_rate": 0.0005213232828599842,
      "loss": 2.907,
      "step": 54350
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7006133794784546,
      "learning_rate": 0.0005213205213815585,
      "loss": 3.3126,
      "step": 54351
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5797995328903198,
      "learning_rate": 0.0005213177598619851,
      "loss": 2.9294,
      "step": 54352
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.690102458000183,
      "learning_rate": 0.0005213149983012645,
      "loss": 3.1,
      "step": 54353
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.451067328453064,
      "learning_rate": 0.0005213122366993973,
      "loss": 2.9845,
      "step": 54354
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.887331247329712,
      "learning_rate": 0.000521309475056384,
      "loss": 2.9366,
      "step": 54355
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6450917720794678,
      "learning_rate": 0.0005213067133722251,
      "loss": 2.8254,
      "step": 54356
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.110959053039551,
      "learning_rate": 0.0005213039516469211,
      "loss": 2.8218,
      "step": 54357
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.491560697555542,
      "learning_rate": 0.0005213011898804725,
      "loss": 3.0472,
      "step": 54358
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.619589924812317,
      "learning_rate": 0.0005212984280728798,
      "loss": 2.8688,
      "step": 54359
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8763506412506104,
      "learning_rate": 0.0005212956662241436,
      "loss": 3.0701,
      "step": 54360
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5153599977493286,
      "learning_rate": 0.0005212929043342643,
      "loss": 2.8029,
      "step": 54361
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.302893877029419,
      "learning_rate": 0.0005212901424032425,
      "loss": 3.1819,
      "step": 54362
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6430987119674683,
      "learning_rate": 0.0005212873804310787,
      "loss": 3.1985,
      "step": 54363
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.656670331954956,
      "learning_rate": 0.0005212846184177734,
      "loss": 2.9462,
      "step": 54364
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.532690405845642,
      "learning_rate": 0.0005212818563633271,
      "loss": 2.9111,
      "step": 54365
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4638642072677612,
      "learning_rate": 0.0005212790942677403,
      "loss": 3.0827,
      "step": 54366
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6503076553344727,
      "learning_rate": 0.0005212763321310135,
      "loss": 3.0046,
      "step": 54367
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5803886651992798,
      "learning_rate": 0.0005212735699531474,
      "loss": 3.0386,
      "step": 54368
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7394436597824097,
      "learning_rate": 0.0005212708077341423,
      "loss": 2.8845,
      "step": 54369
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6304441690444946,
      "learning_rate": 0.0005212680454739987,
      "loss": 3.0009,
      "step": 54370
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.1563472747802734,
      "learning_rate": 0.0005212652831727173,
      "loss": 2.9498,
      "step": 54371
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.568976879119873,
      "learning_rate": 0.0005212625208302985,
      "loss": 3.0314,
      "step": 54372
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8720804452896118,
      "learning_rate": 0.0005212597584467427,
      "loss": 3.0402,
      "step": 54373
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6875368356704712,
      "learning_rate": 0.0005212569960220506,
      "loss": 3.1658,
      "step": 54374
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9623794555664062,
      "learning_rate": 0.0005212542335562227,
      "loss": 3.2229,
      "step": 54375
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.836201548576355,
      "learning_rate": 0.0005212514710492595,
      "loss": 3.1205,
      "step": 54376
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0638229846954346,
      "learning_rate": 0.0005212487085011614,
      "loss": 3.0569,
      "step": 54377
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.538586139678955,
      "learning_rate": 0.000521245945911929,
      "loss": 2.9948,
      "step": 54378
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.656355619430542,
      "learning_rate": 0.0005212431832815628,
      "loss": 3.0619,
      "step": 54379
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.4132463932037354,
      "learning_rate": 0.0005212404206100632,
      "loss": 2.9383,
      "step": 54380
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7107235193252563,
      "learning_rate": 0.000521237657897431,
      "loss": 3.1889,
      "step": 54381
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4007493257522583,
      "learning_rate": 0.0005212348951436665,
      "loss": 3.2086,
      "step": 54382
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8911879062652588,
      "learning_rate": 0.0005212321323487702,
      "loss": 3.0883,
      "step": 54383
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0986900329589844,
      "learning_rate": 0.0005212293695127428,
      "loss": 3.031,
      "step": 54384
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8007371425628662,
      "learning_rate": 0.0005212266066355846,
      "loss": 2.9955,
      "step": 54385
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7234665155410767,
      "learning_rate": 0.0005212238437172961,
      "loss": 3.1212,
      "step": 54386
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.795939564704895,
      "learning_rate": 0.0005212210807578781,
      "loss": 3.3236,
      "step": 54387
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.664965271949768,
      "learning_rate": 0.0005212183177573308,
      "loss": 3.2261,
      "step": 54388
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9196522235870361,
      "learning_rate": 0.0005212155547156549,
      "loss": 3.0311,
      "step": 54389
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.677885890007019,
      "learning_rate": 0.0005212127916328508,
      "loss": 3.0461,
      "step": 54390
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6726566553115845,
      "learning_rate": 0.0005212100285089191,
      "loss": 2.9619,
      "step": 54391
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4741370677947998,
      "learning_rate": 0.0005212072653438603,
      "loss": 2.9373,
      "step": 54392
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7303792238235474,
      "learning_rate": 0.0005212045021376749,
      "loss": 2.8239,
      "step": 54393
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6424773931503296,
      "learning_rate": 0.0005212017388903634,
      "loss": 2.8346,
      "step": 54394
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5797542333602905,
      "learning_rate": 0.0005211989756019262,
      "loss": 2.916,
      "step": 54395
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5418384075164795,
      "learning_rate": 0.000521196212272364,
      "loss": 2.9342,
      "step": 54396
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.5342047214508057,
      "learning_rate": 0.0005211934489016773,
      "loss": 2.8054,
      "step": 54397
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4813783168792725,
      "learning_rate": 0.0005211906854898666,
      "loss": 3.115,
      "step": 54398
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5998749732971191,
      "learning_rate": 0.0005211879220369323,
      "loss": 2.9528,
      "step": 54399
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4805560111999512,
      "learning_rate": 0.000521185158542875,
      "loss": 3.1357,
      "step": 54400
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7079185247421265,
      "learning_rate": 0.0005211823950076952,
      "loss": 3.079,
      "step": 54401
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6757067441940308,
      "learning_rate": 0.0005211796314313934,
      "loss": 3.0817,
      "step": 54402
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6751744747161865,
      "learning_rate": 0.0005211768678139702,
      "loss": 2.9712,
      "step": 54403
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3667488098144531,
      "learning_rate": 0.0005211741041554259,
      "loss": 3.0904,
      "step": 54404
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7205339670181274,
      "learning_rate": 0.0005211713404557614,
      "loss": 2.9913,
      "step": 54405
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.437821626663208,
      "learning_rate": 0.0005211685767149768,
      "loss": 3.2369,
      "step": 54406
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9421896934509277,
      "learning_rate": 0.0005211658129330728,
      "loss": 2.9483,
      "step": 54407
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6903550624847412,
      "learning_rate": 0.0005211630491100498,
      "loss": 2.9966,
      "step": 54408
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3946123123168945,
      "learning_rate": 0.0005211602852459087,
      "loss": 3.0142,
      "step": 54409
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8843919038772583,
      "learning_rate": 0.0005211575213406495,
      "loss": 3.0719,
      "step": 54410
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6003382205963135,
      "learning_rate": 0.0005211547573942731,
      "loss": 3.1391,
      "step": 54411
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3895318508148193,
      "learning_rate": 0.0005211519934067797,
      "loss": 3.1256,
      "step": 54412
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4220387935638428,
      "learning_rate": 0.00052114922937817,
      "loss": 2.8652,
      "step": 54413
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4925243854522705,
      "learning_rate": 0.0005211464653084446,
      "loss": 3.0112,
      "step": 54414
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4352827072143555,
      "learning_rate": 0.0005211437011976037,
      "loss": 3.0463,
      "step": 54415
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6655104160308838,
      "learning_rate": 0.0005211409370456482,
      "loss": 3.1036,
      "step": 54416
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.857537865638733,
      "learning_rate": 0.0005211381728525784,
      "loss": 3.105,
      "step": 54417
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6003806591033936,
      "learning_rate": 0.0005211354086183947,
      "loss": 3.147,
      "step": 54418
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7201131582260132,
      "learning_rate": 0.0005211326443430979,
      "loss": 3.0169,
      "step": 54419
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8595744371414185,
      "learning_rate": 0.0005211298800266883,
      "loss": 2.7936,
      "step": 54420
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.464523196220398,
      "learning_rate": 0.0005211271156691665,
      "loss": 3.1125,
      "step": 54421
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4727511405944824,
      "learning_rate": 0.000521124351270533,
      "loss": 2.9998,
      "step": 54422
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5210360288619995,
      "learning_rate": 0.0005211215868307883,
      "loss": 2.856,
      "step": 54423
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2319178581237793,
      "learning_rate": 0.0005211188223499329,
      "loss": 2.8173,
      "step": 54424
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6061632633209229,
      "learning_rate": 0.0005211160578279674,
      "loss": 2.8337,
      "step": 54425
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4046720266342163,
      "learning_rate": 0.0005211132932648922,
      "loss": 3.0215,
      "step": 54426
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4914822578430176,
      "learning_rate": 0.0005211105286607077,
      "loss": 2.9749,
      "step": 54427
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5028587579727173,
      "learning_rate": 0.0005211077640154149,
      "loss": 3.241,
      "step": 54428
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.707623839378357,
      "learning_rate": 0.0005211049993290138,
      "loss": 2.866,
      "step": 54429
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8289289474487305,
      "learning_rate": 0.0005211022346015052,
      "loss": 3.1551,
      "step": 54430
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3604662418365479,
      "learning_rate": 0.0005210994698328895,
      "loss": 2.8659,
      "step": 54431
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.605568289756775,
      "learning_rate": 0.0005210967050231671,
      "loss": 2.9256,
      "step": 54432
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.509028434753418,
      "learning_rate": 0.0005210939401723389,
      "loss": 2.9933,
      "step": 54433
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3062723875045776,
      "learning_rate": 0.0005210911752804049,
      "loss": 2.9963,
      "step": 54434
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4398174285888672,
      "learning_rate": 0.0005210884103473661,
      "loss": 3.2179,
      "step": 54435
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.5077967643737793,
      "learning_rate": 0.0005210856453732227,
      "loss": 2.9382,
      "step": 54436
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.879665493965149,
      "learning_rate": 0.0005210828803579752,
      "loss": 2.879,
      "step": 54437
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0666205883026123,
      "learning_rate": 0.0005210801153016244,
      "loss": 3.1017,
      "step": 54438
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7747102975845337,
      "learning_rate": 0.0005210773502041705,
      "loss": 3.2153,
      "step": 54439
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.508968472480774,
      "learning_rate": 0.0005210745850656142,
      "loss": 3.0043,
      "step": 54440
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8091274499893188,
      "learning_rate": 0.000521071819885956,
      "loss": 3.1786,
      "step": 54441
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6370649337768555,
      "learning_rate": 0.0005210690546651963,
      "loss": 3.1045,
      "step": 54442
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5174211263656616,
      "learning_rate": 0.0005210662894033358,
      "loss": 3.2031,
      "step": 54443
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.8667685985565186,
      "learning_rate": 0.0005210635241003747,
      "loss": 3.2194,
      "step": 54444
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0698416233062744,
      "learning_rate": 0.0005210607587563139,
      "loss": 3.1787,
      "step": 54445
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.207714557647705,
      "learning_rate": 0.0005210579933711537,
      "loss": 2.9721,
      "step": 54446
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.8499867916107178,
      "learning_rate": 0.0005210552279448945,
      "loss": 2.9323,
      "step": 54447
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5237704515457153,
      "learning_rate": 0.0005210524624775371,
      "loss": 3.0546,
      "step": 54448
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8500481843948364,
      "learning_rate": 0.0005210496969690819,
      "loss": 2.9776,
      "step": 54449
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3606719970703125,
      "learning_rate": 0.0005210469314195293,
      "loss": 3.0994,
      "step": 54450
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.574022650718689,
      "learning_rate": 0.0005210441658288799,
      "loss": 3.2459,
      "step": 54451
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.478933334350586,
      "learning_rate": 0.0005210414001971344,
      "loss": 2.6944,
      "step": 54452
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.000223159790039,
      "learning_rate": 0.000521038634524293,
      "loss": 2.8534,
      "step": 54453
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3793225288391113,
      "learning_rate": 0.0005210358688103563,
      "loss": 3.2363,
      "step": 54454
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.9578287601470947,
      "learning_rate": 0.0005210331030553249,
      "loss": 2.9256,
      "step": 54455
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5797321796417236,
      "learning_rate": 0.0005210303372591993,
      "loss": 3.0653,
      "step": 54456
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7641704082489014,
      "learning_rate": 0.0005210275714219799,
      "loss": 2.9545,
      "step": 54457
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.610541820526123,
      "learning_rate": 0.0005210248055436674,
      "loss": 2.9237,
      "step": 54458
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.5287435054779053,
      "learning_rate": 0.0005210220396242621,
      "loss": 2.8911,
      "step": 54459
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9850237369537354,
      "learning_rate": 0.0005210192736637648,
      "loss": 3.006,
      "step": 54460
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.92523992061615,
      "learning_rate": 0.0005210165076621758,
      "loss": 2.9551,
      "step": 54461
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8535714149475098,
      "learning_rate": 0.0005210137416194955,
      "loss": 3.1357,
      "step": 54462
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.568848967552185,
      "learning_rate": 0.0005210109755357247,
      "loss": 3.0392,
      "step": 54463
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.917527914047241,
      "learning_rate": 0.0005210082094108638,
      "loss": 3.0707,
      "step": 54464
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5969451665878296,
      "learning_rate": 0.0005210054432449134,
      "loss": 3.2004,
      "step": 54465
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5872068405151367,
      "learning_rate": 0.0005210026770378737,
      "loss": 3.0916,
      "step": 54466
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4341967105865479,
      "learning_rate": 0.0005209999107897455,
      "loss": 3.1277,
      "step": 54467
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5460304021835327,
      "learning_rate": 0.0005209971445005292,
      "loss": 3.0162,
      "step": 54468
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.028125047683716,
      "learning_rate": 0.0005209943781702255,
      "loss": 3.2333,
      "step": 54469
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.8384106159210205,
      "learning_rate": 0.0005209916117988347,
      "loss": 3.1171,
      "step": 54470
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.636677861213684,
      "learning_rate": 0.0005209888453863574,
      "loss": 2.98,
      "step": 54471
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.8311355113983154,
      "learning_rate": 0.0005209860789327941,
      "loss": 3.0305,
      "step": 54472
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.1430838108062744,
      "learning_rate": 0.0005209833124381453,
      "loss": 3.2079,
      "step": 54473
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7685359716415405,
      "learning_rate": 0.0005209805459024116,
      "loss": 3.0766,
      "step": 54474
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7359257936477661,
      "learning_rate": 0.0005209777793255934,
      "loss": 3.0397,
      "step": 54475
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7763710021972656,
      "learning_rate": 0.0005209750127076911,
      "loss": 2.8823,
      "step": 54476
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4484810829162598,
      "learning_rate": 0.0005209722460487056,
      "loss": 3.0904,
      "step": 54477
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4078749418258667,
      "learning_rate": 0.0005209694793486371,
      "loss": 3.2434,
      "step": 54478
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7976362705230713,
      "learning_rate": 0.0005209667126074862,
      "loss": 3.06,
      "step": 54479
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3585370779037476,
      "learning_rate": 0.0005209639458252535,
      "loss": 2.9948,
      "step": 54480
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4958367347717285,
      "learning_rate": 0.0005209611790019393,
      "loss": 2.9039,
      "step": 54481
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7731244564056396,
      "learning_rate": 0.0005209584121375443,
      "loss": 3.0911,
      "step": 54482
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.1657848358154297,
      "learning_rate": 0.000520955645232069,
      "loss": 2.9705,
      "step": 54483
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5990240573883057,
      "learning_rate": 0.0005209528782855139,
      "loss": 3.395,
      "step": 54484
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5430166721343994,
      "learning_rate": 0.0005209501112978794,
      "loss": 3.1269,
      "step": 54485
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4910879135131836,
      "learning_rate": 0.0005209473442691661,
      "loss": 3.1073,
      "step": 54486
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.044187307357788,
      "learning_rate": 0.0005209445771993745,
      "loss": 2.9673,
      "step": 54487
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5807427167892456,
      "learning_rate": 0.0005209418100885052,
      "loss": 3.1684,
      "step": 54488
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6449315547943115,
      "learning_rate": 0.0005209390429365587,
      "loss": 2.9993,
      "step": 54489
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.657753348350525,
      "learning_rate": 0.0005209362757435353,
      "loss": 3.0339,
      "step": 54490
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.653276205062866,
      "learning_rate": 0.0005209335085094358,
      "loss": 3.0811,
      "step": 54491
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.356454372406006,
      "learning_rate": 0.0005209307412342606,
      "loss": 3.0517,
      "step": 54492
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9136686325073242,
      "learning_rate": 0.0005209279739180102,
      "loss": 2.7763,
      "step": 54493
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0823652744293213,
      "learning_rate": 0.000520925206560685,
      "loss": 3.0773,
      "step": 54494
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.688245415687561,
      "learning_rate": 0.0005209224391622858,
      "loss": 2.9217,
      "step": 54495
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5121043920516968,
      "learning_rate": 0.0005209196717228128,
      "loss": 2.873,
      "step": 54496
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.656869649887085,
      "learning_rate": 0.0005209169042422667,
      "loss": 2.9494,
      "step": 54497
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.701441526412964,
      "learning_rate": 0.000520914136720648,
      "loss": 3.0948,
      "step": 54498
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7243943214416504,
      "learning_rate": 0.0005209113691579572,
      "loss": 3.1444,
      "step": 54499
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.568047285079956,
      "learning_rate": 0.0005209086015541947,
      "loss": 3.0648,
      "step": 54500
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.659650206565857,
      "learning_rate": 0.0005209058339093614,
      "loss": 3.051,
      "step": 54501
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.844689965248108,
      "learning_rate": 0.0005209030662234572,
      "loss": 3.0377,
      "step": 54502
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4943416118621826,
      "learning_rate": 0.0005209002984964832,
      "loss": 3.3379,
      "step": 54503
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6415530443191528,
      "learning_rate": 0.0005208975307284396,
      "loss": 3.0136,
      "step": 54504
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.661800503730774,
      "learning_rate": 0.0005208947629193269,
      "loss": 3.0116,
      "step": 54505
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5161919593811035,
      "learning_rate": 0.0005208919950691458,
      "loss": 2.966,
      "step": 54506
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5226860046386719,
      "learning_rate": 0.0005208892271778966,
      "loss": 3.1064,
      "step": 54507
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5735173225402832,
      "learning_rate": 0.00052088645924558,
      "loss": 3.2955,
      "step": 54508
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.778352975845337,
      "learning_rate": 0.0005208836912721965,
      "loss": 3.1068,
      "step": 54509
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.858582615852356,
      "learning_rate": 0.0005208809232577464,
      "loss": 2.8895,
      "step": 54510
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5982213020324707,
      "learning_rate": 0.0005208781552022304,
      "loss": 3.1665,
      "step": 54511
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7517173290252686,
      "learning_rate": 0.000520875387105649,
      "loss": 3.1378,
      "step": 54512
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.595629096031189,
      "learning_rate": 0.0005208726189680027,
      "loss": 3.0716,
      "step": 54513
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.613390564918518,
      "learning_rate": 0.0005208698507892921,
      "loss": 3.2155,
      "step": 54514
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.492781639099121,
      "learning_rate": 0.0005208670825695176,
      "loss": 3.0338,
      "step": 54515
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4604482650756836,
      "learning_rate": 0.0005208643143086797,
      "loss": 3.0242,
      "step": 54516
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6886438131332397,
      "learning_rate": 0.0005208615460067789,
      "loss": 3.0574,
      "step": 54517
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9438157081604004,
      "learning_rate": 0.0005208587776638157,
      "loss": 2.8131,
      "step": 54518
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.057965040206909,
      "learning_rate": 0.0005208560092797909,
      "loss": 2.7056,
      "step": 54519
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4404743909835815,
      "learning_rate": 0.0005208532408547047,
      "loss": 3.3057,
      "step": 54520
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.63465416431427,
      "learning_rate": 0.0005208504723885576,
      "loss": 3.3077,
      "step": 54521
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.561908721923828,
      "learning_rate": 0.0005208477038813505,
      "loss": 3.0513,
      "step": 54522
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.2184932231903076,
      "learning_rate": 0.0005208449353330835,
      "loss": 2.8615,
      "step": 54523
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.755509376525879,
      "learning_rate": 0.0005208421667437571,
      "loss": 2.9123,
      "step": 54524
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.150010108947754,
      "learning_rate": 0.0005208393981133722,
      "loss": 2.9175,
      "step": 54525
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.2709286212921143,
      "learning_rate": 0.0005208366294419289,
      "loss": 2.9808,
      "step": 54526
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.630509376525879,
      "learning_rate": 0.0005208338607294281,
      "loss": 2.9622,
      "step": 54527
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.681896448135376,
      "learning_rate": 0.00052083109197587,
      "loss": 3.1173,
      "step": 54528
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.624830961227417,
      "learning_rate": 0.0005208283231812552,
      "loss": 3.1991,
      "step": 54529
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0026488304138184,
      "learning_rate": 0.0005208255543455843,
      "loss": 3.1881,
      "step": 54530
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6778885126113892,
      "learning_rate": 0.0005208227854688578,
      "loss": 3.1925,
      "step": 54531
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3857996463775635,
      "learning_rate": 0.0005208200165510761,
      "loss": 3.0497,
      "step": 54532
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.778351902961731,
      "learning_rate": 0.0005208172475922398,
      "loss": 3.3283,
      "step": 54533
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9126935005187988,
      "learning_rate": 0.0005208144785923494,
      "loss": 3.1016,
      "step": 54534
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0626015663146973,
      "learning_rate": 0.0005208117095514055,
      "loss": 3.1848,
      "step": 54535
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8310586214065552,
      "learning_rate": 0.0005208089404694085,
      "loss": 3.0469,
      "step": 54536
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0314719676971436,
      "learning_rate": 0.0005208061713463589,
      "loss": 3.1967,
      "step": 54537
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5193452835083008,
      "learning_rate": 0.0005208034021822574,
      "loss": 3.0753,
      "step": 54538
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.724441647529602,
      "learning_rate": 0.0005208006329771042,
      "loss": 3.0095,
      "step": 54539
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7723171710968018,
      "learning_rate": 0.0005207978637309001,
      "loss": 3.0105,
      "step": 54540
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3936083316802979,
      "learning_rate": 0.0005207950944436455,
      "loss": 2.9581,
      "step": 54541
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8126674890518188,
      "learning_rate": 0.0005207923251153409,
      "loss": 3.181,
      "step": 54542
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6396375894546509,
      "learning_rate": 0.0005207895557459868,
      "loss": 2.822,
      "step": 54543
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.291903257369995,
      "learning_rate": 0.0005207867863355837,
      "loss": 2.8668,
      "step": 54544
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7064424753189087,
      "learning_rate": 0.0005207840168841324,
      "loss": 3.0305,
      "step": 54545
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3568426370620728,
      "learning_rate": 0.000520781247391633,
      "loss": 2.946,
      "step": 54546
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8166089057922363,
      "learning_rate": 0.0005207784778580863,
      "loss": 2.8774,
      "step": 54547
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.516115427017212,
      "learning_rate": 0.0005207757082834926,
      "loss": 2.9318,
      "step": 54548
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.276381731033325,
      "learning_rate": 0.0005207729386678526,
      "loss": 2.9026,
      "step": 54549
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8419028520584106,
      "learning_rate": 0.0005207701690111668,
      "loss": 3.0968,
      "step": 54550
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.664459228515625,
      "learning_rate": 0.0005207673993134356,
      "loss": 3.0773,
      "step": 54551
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.841537356376648,
      "learning_rate": 0.0005207646295746595,
      "loss": 2.8673,
      "step": 54552
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.919356107711792,
      "learning_rate": 0.0005207618597948392,
      "loss": 3.26,
      "step": 54553
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3312419652938843,
      "learning_rate": 0.000520759089973975,
      "loss": 3.3012,
      "step": 54554
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3585389852523804,
      "learning_rate": 0.0005207563201120676,
      "loss": 3.3107,
      "step": 54555
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8115968704223633,
      "learning_rate": 0.0005207535502091174,
      "loss": 3.0833,
      "step": 54556
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7760270833969116,
      "learning_rate": 0.000520750780265125,
      "loss": 3.1509,
      "step": 54557
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.905492901802063,
      "learning_rate": 0.0005207480102800908,
      "loss": 3.2229,
      "step": 54558
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.086618661880493,
      "learning_rate": 0.0005207452402540154,
      "loss": 3.1188,
      "step": 54559
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.3311665058135986,
      "learning_rate": 0.0005207424701868993,
      "loss": 3.1796,
      "step": 54560
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.2956137657165527,
      "learning_rate": 0.000520739700078743,
      "loss": 3.022,
      "step": 54561
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7010726928710938,
      "learning_rate": 0.0005207369299295469,
      "loss": 3.1512,
      "step": 54562
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.9464056491851807,
      "learning_rate": 0.0005207341597393119,
      "loss": 2.9233,
      "step": 54563
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7119945287704468,
      "learning_rate": 0.0005207313895080381,
      "loss": 3.2554,
      "step": 54564
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5387200117111206,
      "learning_rate": 0.0005207286192357261,
      "loss": 3.1547,
      "step": 54565
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3924853801727295,
      "learning_rate": 0.0005207258489223766,
      "loss": 3.0044,
      "step": 54566
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.3449058532714844,
      "learning_rate": 0.0005207230785679901,
      "loss": 2.7476,
      "step": 54567
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.862514019012451,
      "learning_rate": 0.0005207203081725668,
      "loss": 3.0285,
      "step": 54568
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8376432657241821,
      "learning_rate": 0.0005207175377361075,
      "loss": 3.3984,
      "step": 54569
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.753528118133545,
      "learning_rate": 0.0005207147672586127,
      "loss": 2.9596,
      "step": 54570
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.0490827560424805,
      "learning_rate": 0.0005207119967400827,
      "loss": 3.1304,
      "step": 54571
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.519320011138916,
      "learning_rate": 0.0005207092261805183,
      "loss": 3.2196,
      "step": 54572
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8563177585601807,
      "learning_rate": 0.0005207064555799198,
      "loss": 2.9387,
      "step": 54573
    },
    {
      "epoch": 0.71,
      "grad_norm": 4.20258903503418,
      "learning_rate": 0.0005207036849382879,
      "loss": 3.156,
      "step": 54574
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.520815134048462,
      "learning_rate": 0.000520700914255623,
      "loss": 2.9051,
      "step": 54575
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5118173360824585,
      "learning_rate": 0.0005206981435319257,
      "loss": 3.062,
      "step": 54576
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.078861951828003,
      "learning_rate": 0.0005206953727671963,
      "loss": 3.1615,
      "step": 54577
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.946686029434204,
      "learning_rate": 0.0005206926019614356,
      "loss": 3.1604,
      "step": 54578
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0167906284332275,
      "learning_rate": 0.0005206898311146439,
      "loss": 2.9619,
      "step": 54579
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.420230746269226,
      "learning_rate": 0.0005206870602268217,
      "loss": 2.8298,
      "step": 54580
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.275658130645752,
      "learning_rate": 0.0005206842892979698,
      "loss": 3.0138,
      "step": 54581
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.5420117378234863,
      "learning_rate": 0.0005206815183280885,
      "loss": 3.0344,
      "step": 54582
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4964933395385742,
      "learning_rate": 0.0005206787473171782,
      "loss": 2.9862,
      "step": 54583
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9127049446105957,
      "learning_rate": 0.0005206759762652397,
      "loss": 3.0834,
      "step": 54584
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.056236505508423,
      "learning_rate": 0.0005206732051722733,
      "loss": 3.0364,
      "step": 54585
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4248926639556885,
      "learning_rate": 0.0005206704340382796,
      "loss": 3.0889,
      "step": 54586
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5860739946365356,
      "learning_rate": 0.0005206676628632591,
      "loss": 3.1411,
      "step": 54587
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.130690813064575,
      "learning_rate": 0.0005206648916472122,
      "loss": 2.8379,
      "step": 54588
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7219550609588623,
      "learning_rate": 0.0005206621203901398,
      "loss": 3.1372,
      "step": 54589
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7197442054748535,
      "learning_rate": 0.0005206593490920419,
      "loss": 2.9906,
      "step": 54590
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7435884475708008,
      "learning_rate": 0.0005206565777529194,
      "loss": 3.2175,
      "step": 54591
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7360122203826904,
      "learning_rate": 0.0005206538063727725,
      "loss": 3.344,
      "step": 54592
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9432371854782104,
      "learning_rate": 0.0005206510349516022,
      "loss": 3.1875,
      "step": 54593
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4275912046432495,
      "learning_rate": 0.0005206482634894084,
      "loss": 3.0756,
      "step": 54594
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4667978286743164,
      "learning_rate": 0.0005206454919861922,
      "loss": 3.2167,
      "step": 54595
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7251068353652954,
      "learning_rate": 0.0005206427204419536,
      "loss": 3.214,
      "step": 54596
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.759238839149475,
      "learning_rate": 0.0005206399488566935,
      "loss": 3.0411,
      "step": 54597
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7236758470535278,
      "learning_rate": 0.0005206371772304122,
      "loss": 2.8833,
      "step": 54598
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5001174211502075,
      "learning_rate": 0.0005206344055631103,
      "loss": 3.0838,
      "step": 54599
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4557987451553345,
      "learning_rate": 0.0005206316338547882,
      "loss": 2.8915,
      "step": 54600
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.5669453144073486,
      "learning_rate": 0.0005206288621054467,
      "loss": 2.9605,
      "step": 54601
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5406972169876099,
      "learning_rate": 0.000520626090315086,
      "loss": 2.9968,
      "step": 54602
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5735975503921509,
      "learning_rate": 0.0005206233184837068,
      "loss": 3.1009,
      "step": 54603
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3513537645339966,
      "learning_rate": 0.0005206205466113096,
      "loss": 3.0682,
      "step": 54604
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.1405184268951416,
      "learning_rate": 0.0005206177746978948,
      "loss": 2.9873,
      "step": 54605
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6234972476959229,
      "learning_rate": 0.0005206150027434629,
      "loss": 3.012,
      "step": 54606
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5908821821212769,
      "learning_rate": 0.0005206122307480147,
      "loss": 2.5715,
      "step": 54607
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9523862600326538,
      "learning_rate": 0.0005206094587115504,
      "loss": 3.0388,
      "step": 54608
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9825143814086914,
      "learning_rate": 0.0005206066866340706,
      "loss": 2.94,
      "step": 54609
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.628874659538269,
      "learning_rate": 0.000520603914515576,
      "loss": 2.9675,
      "step": 54610
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.25632643699646,
      "learning_rate": 0.000520601142356067,
      "loss": 2.9283,
      "step": 54611
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.537874698638916,
      "learning_rate": 0.000520598370155544,
      "loss": 2.9871,
      "step": 54612
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.421788454055786,
      "learning_rate": 0.0005205955979140075,
      "loss": 2.9706,
      "step": 54613
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.1245012283325195,
      "learning_rate": 0.0005205928256314582,
      "loss": 3.0297,
      "step": 54614
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8632465600967407,
      "learning_rate": 0.0005205900533078964,
      "loss": 3.0718,
      "step": 54615
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.896014928817749,
      "learning_rate": 0.000520587280943323,
      "loss": 3.0092,
      "step": 54616
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5170748233795166,
      "learning_rate": 0.0005205845085377381,
      "loss": 2.9765,
      "step": 54617
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.3465147018432617,
      "learning_rate": 0.0005205817360911425,
      "loss": 3.2102,
      "step": 54618
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.067257881164551,
      "learning_rate": 0.0005205789636035364,
      "loss": 2.8607,
      "step": 54619
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6732141971588135,
      "learning_rate": 0.0005205761910749207,
      "loss": 2.8804,
      "step": 54620
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0355594158172607,
      "learning_rate": 0.0005205734185052955,
      "loss": 3.0076,
      "step": 54621
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.5372860431671143,
      "learning_rate": 0.0005205706458946617,
      "loss": 3.2431,
      "step": 54622
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5863991975784302,
      "learning_rate": 0.0005205678732430197,
      "loss": 3.0311,
      "step": 54623
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4124374389648438,
      "learning_rate": 0.0005205651005503699,
      "loss": 3.1759,
      "step": 54624
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.1208431720733643,
      "learning_rate": 0.0005205623278167128,
      "loss": 3.3364,
      "step": 54625
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.848816156387329,
      "learning_rate": 0.0005205595550420491,
      "loss": 3.1505,
      "step": 54626
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0558106899261475,
      "learning_rate": 0.0005205567822263792,
      "loss": 2.9742,
      "step": 54627
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.657560110092163,
      "learning_rate": 0.0005205540093697036,
      "loss": 3.3295,
      "step": 54628
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.042651653289795,
      "learning_rate": 0.0005205512364720229,
      "loss": 3.1127,
      "step": 54629
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.513179302215576,
      "learning_rate": 0.0005205484635333375,
      "loss": 3.0414,
      "step": 54630
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.995774507522583,
      "learning_rate": 0.000520545690553648,
      "loss": 2.8275,
      "step": 54631
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.809129476547241,
      "learning_rate": 0.0005205429175329549,
      "loss": 3.2904,
      "step": 54632
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.57350492477417,
      "learning_rate": 0.0005205401444712587,
      "loss": 3.0977,
      "step": 54633
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9709242582321167,
      "learning_rate": 0.0005205373713685599,
      "loss": 3.0887,
      "step": 54634
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4486887454986572,
      "learning_rate": 0.0005205345982248591,
      "loss": 2.8645,
      "step": 54635
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.765008807182312,
      "learning_rate": 0.0005205318250401567,
      "loss": 3.1978,
      "step": 54636
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.195474624633789,
      "learning_rate": 0.0005205290518144533,
      "loss": 3.1021,
      "step": 54637
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.410224437713623,
      "learning_rate": 0.0005205262785477493,
      "loss": 3.2166,
      "step": 54638
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4811650514602661,
      "learning_rate": 0.0005205235052400455,
      "loss": 2.7264,
      "step": 54639
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9967002868652344,
      "learning_rate": 0.0005205207318913421,
      "loss": 3.2022,
      "step": 54640
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.535583257675171,
      "learning_rate": 0.0005205179585016397,
      "loss": 3.0313,
      "step": 54641
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4678887128829956,
      "learning_rate": 0.0005205151850709389,
      "loss": 3.2143,
      "step": 54642
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.591095447540283,
      "learning_rate": 0.0005205124115992401,
      "loss": 2.9421,
      "step": 54643
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.70911705493927,
      "learning_rate": 0.0005205096380865439,
      "loss": 3.0596,
      "step": 54644
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4801979064941406,
      "learning_rate": 0.0005205068645328509,
      "loss": 3.1002,
      "step": 54645
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5046396255493164,
      "learning_rate": 0.0005205040909381615,
      "loss": 2.9594,
      "step": 54646
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6648107767105103,
      "learning_rate": 0.0005205013173024761,
      "loss": 3.126,
      "step": 54647
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3991150856018066,
      "learning_rate": 0.0005204985436257953,
      "loss": 3.3259,
      "step": 54648
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4239118099212646,
      "learning_rate": 0.0005204957699081198,
      "loss": 3.0975,
      "step": 54649
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3099380731582642,
      "learning_rate": 0.00052049299614945,
      "loss": 3.0262,
      "step": 54650
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6315499544143677,
      "learning_rate": 0.0005204902223497863,
      "loss": 3.1885,
      "step": 54651
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5731121301651,
      "learning_rate": 0.0005204874485091294,
      "loss": 2.9427,
      "step": 54652
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5592154264450073,
      "learning_rate": 0.0005204846746274796,
      "loss": 3.1458,
      "step": 54653
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6156264543533325,
      "learning_rate": 0.0005204819007048376,
      "loss": 3.1023,
      "step": 54654
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9312783479690552,
      "learning_rate": 0.0005204791267412038,
      "loss": 2.9991,
      "step": 54655
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.738474726676941,
      "learning_rate": 0.000520476352736579,
      "loss": 3.0358,
      "step": 54656
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2631618976593018,
      "learning_rate": 0.0005204735786909631,
      "loss": 2.9458,
      "step": 54657
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8350757360458374,
      "learning_rate": 0.0005204708046043573,
      "loss": 3.0615,
      "step": 54658
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.904315710067749,
      "learning_rate": 0.0005204680304767617,
      "loss": 3.157,
      "step": 54659
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8418008089065552,
      "learning_rate": 0.000520465256308177,
      "loss": 3.076,
      "step": 54660
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8010826110839844,
      "learning_rate": 0.0005204624820986035,
      "loss": 3.147,
      "step": 54661
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.317080020904541,
      "learning_rate": 0.0005204597078480419,
      "loss": 3.0088,
      "step": 54662
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.515990972518921,
      "learning_rate": 0.0005204569335564928,
      "loss": 2.8223,
      "step": 54663
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5897878408432007,
      "learning_rate": 0.0005204541592239566,
      "loss": 2.852,
      "step": 54664
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3926945924758911,
      "learning_rate": 0.0005204513848504336,
      "loss": 3.0655,
      "step": 54665
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.7533955574035645,
      "learning_rate": 0.0005204486104359248,
      "loss": 2.9874,
      "step": 54666
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.3436453342437744,
      "learning_rate": 0.0005204458359804302,
      "loss": 2.9492,
      "step": 54667
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7482692003250122,
      "learning_rate": 0.0005204430614839507,
      "loss": 2.9862,
      "step": 54668
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7406177520751953,
      "learning_rate": 0.0005204402869464866,
      "loss": 3.0086,
      "step": 54669
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.8124308586120605,
      "learning_rate": 0.0005204375123680385,
      "loss": 2.7996,
      "step": 54670
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.408674478530884,
      "learning_rate": 0.0005204347377486071,
      "loss": 3.053,
      "step": 54671
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.039318084716797,
      "learning_rate": 0.0005204319630881925,
      "loss": 3.0294,
      "step": 54672
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5539945363998413,
      "learning_rate": 0.0005204291883867955,
      "loss": 2.9982,
      "step": 54673
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0385870933532715,
      "learning_rate": 0.0005204264136444165,
      "loss": 2.8498,
      "step": 54674
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.2803256511688232,
      "learning_rate": 0.0005204236388610562,
      "loss": 2.9412,
      "step": 54675
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7998281717300415,
      "learning_rate": 0.0005204208640367149,
      "loss": 3.0102,
      "step": 54676
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.444055438041687,
      "learning_rate": 0.0005204180891713931,
      "loss": 3.1304,
      "step": 54677
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.3230321407318115,
      "learning_rate": 0.0005204153142650916,
      "loss": 3.081,
      "step": 54678
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4543780088424683,
      "learning_rate": 0.0005204125393178105,
      "loss": 2.9646,
      "step": 54679
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.4113032817840576,
      "learning_rate": 0.0005204097643295508,
      "loss": 3.2512,
      "step": 54680
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3993544578552246,
      "learning_rate": 0.0005204069893003126,
      "loss": 3.2986,
      "step": 54681
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4119071960449219,
      "learning_rate": 0.0005204042142300967,
      "loss": 2.8785,
      "step": 54682
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.533618688583374,
      "learning_rate": 0.0005204014391189034,
      "loss": 2.8568,
      "step": 54683
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.3927245140075684,
      "learning_rate": 0.0005203986639667334,
      "loss": 2.8652,
      "step": 54684
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.482369899749756,
      "learning_rate": 0.0005203958887735871,
      "loss": 3.0487,
      "step": 54685
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9726049900054932,
      "learning_rate": 0.000520393113539465,
      "loss": 3.0452,
      "step": 54686
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.2635955810546875,
      "learning_rate": 0.0005203903382643677,
      "loss": 3.1887,
      "step": 54687
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8186700344085693,
      "learning_rate": 0.0005203875629482956,
      "loss": 2.9406,
      "step": 54688
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4512121677398682,
      "learning_rate": 0.0005203847875912494,
      "loss": 3.0664,
      "step": 54689
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.081665277481079,
      "learning_rate": 0.0005203820121932294,
      "loss": 3.319,
      "step": 54690
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.602707028388977,
      "learning_rate": 0.0005203792367542363,
      "loss": 2.9309,
      "step": 54691
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.2653214931488037,
      "learning_rate": 0.0005203764612742705,
      "loss": 2.921,
      "step": 54692
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7136471271514893,
      "learning_rate": 0.0005203736857533326,
      "loss": 2.9615,
      "step": 54693
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.961694359779358,
      "learning_rate": 0.000520370910191423,
      "loss": 3.2798,
      "step": 54694
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7840672731399536,
      "learning_rate": 0.0005203681345885425,
      "loss": 3.0199,
      "step": 54695
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.266305923461914,
      "learning_rate": 0.0005203653589446911,
      "loss": 2.9473,
      "step": 54696
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6427651643753052,
      "learning_rate": 0.0005203625832598698,
      "loss": 2.8272,
      "step": 54697
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3832805156707764,
      "learning_rate": 0.0005203598075340789,
      "loss": 3.2527,
      "step": 54698
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4439446926116943,
      "learning_rate": 0.000520357031767319,
      "loss": 3.0704,
      "step": 54699
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4671281576156616,
      "learning_rate": 0.0005203542559595904,
      "loss": 3.0182,
      "step": 54700
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8403196334838867,
      "learning_rate": 0.000520351480110894,
      "loss": 3.194,
      "step": 54701
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5953500270843506,
      "learning_rate": 0.00052034870422123,
      "loss": 3.2723,
      "step": 54702
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7465559244155884,
      "learning_rate": 0.000520345928290599,
      "loss": 3.1789,
      "step": 54703
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.8333921432495117,
      "learning_rate": 0.0005203431523190015,
      "loss": 3.326,
      "step": 54704
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0537571907043457,
      "learning_rate": 0.000520340376306438,
      "loss": 3.0,
      "step": 54705
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4239528179168701,
      "learning_rate": 0.0005203376002529093,
      "loss": 2.766,
      "step": 54706
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.166309356689453,
      "learning_rate": 0.0005203348241584156,
      "loss": 2.9187,
      "step": 54707
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.326094150543213,
      "learning_rate": 0.0005203320480229573,
      "loss": 3.2205,
      "step": 54708
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5956077575683594,
      "learning_rate": 0.0005203292718465353,
      "loss": 3.0081,
      "step": 54709
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.2201390266418457,
      "learning_rate": 0.00052032649562915,
      "loss": 2.9258,
      "step": 54710
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6238672733306885,
      "learning_rate": 0.0005203237193708017,
      "loss": 3.1093,
      "step": 54711
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.9048333168029785,
      "learning_rate": 0.000520320943071491,
      "loss": 2.9047,
      "step": 54712
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3368561267852783,
      "learning_rate": 0.0005203181667312186,
      "loss": 3.0218,
      "step": 54713
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4724578857421875,
      "learning_rate": 0.0005203153903499849,
      "loss": 3.2622,
      "step": 54714
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7462774515151978,
      "learning_rate": 0.0005203126139277903,
      "loss": 3.1752,
      "step": 54715
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5685678720474243,
      "learning_rate": 0.0005203098374646356,
      "loss": 3.3058,
      "step": 54716
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6585882902145386,
      "learning_rate": 0.000520307060960521,
      "loss": 3.0268,
      "step": 54717
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.5924956798553467,
      "learning_rate": 0.0005203042844154471,
      "loss": 3.2154,
      "step": 54718
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6202638149261475,
      "learning_rate": 0.0005203015078294146,
      "loss": 2.8448,
      "step": 54719
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4445650577545166,
      "learning_rate": 0.0005202987312024239,
      "loss": 3.2382,
      "step": 54720
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6220446825027466,
      "learning_rate": 0.0005202959545344755,
      "loss": 2.8104,
      "step": 54721
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0933704376220703,
      "learning_rate": 0.0005202931778255699,
      "loss": 3.0887,
      "step": 54722
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3269449472427368,
      "learning_rate": 0.0005202904010757077,
      "loss": 2.9069,
      "step": 54723
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8643141984939575,
      "learning_rate": 0.0005202876242848892,
      "loss": 3.1184,
      "step": 54724
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.9633772373199463,
      "learning_rate": 0.0005202848474531153,
      "loss": 3.1225,
      "step": 54725
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7455509901046753,
      "learning_rate": 0.000520282070580386,
      "loss": 3.2977,
      "step": 54726
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.953590750694275,
      "learning_rate": 0.0005202792936667024,
      "loss": 3.0432,
      "step": 54727
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4870002269744873,
      "learning_rate": 0.0005202765167120645,
      "loss": 2.9037,
      "step": 54728
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6170774698257446,
      "learning_rate": 0.0005202737397164732,
      "loss": 3.109,
      "step": 54729
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9083553552627563,
      "learning_rate": 0.0005202709626799287,
      "loss": 3.0396,
      "step": 54730
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6746762990951538,
      "learning_rate": 0.0005202681856024318,
      "loss": 3.0032,
      "step": 54731
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6748406887054443,
      "learning_rate": 0.0005202654084839828,
      "loss": 2.8889,
      "step": 54732
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5781513452529907,
      "learning_rate": 0.0005202626313245823,
      "loss": 2.9085,
      "step": 54733
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6690024137496948,
      "learning_rate": 0.0005202598541242309,
      "loss": 3.2622,
      "step": 54734
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.430040717124939,
      "learning_rate": 0.0005202570768829289,
      "loss": 2.9385,
      "step": 54735
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6234155893325806,
      "learning_rate": 0.0005202542996006771,
      "loss": 3.012,
      "step": 54736
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6587575674057007,
      "learning_rate": 0.0005202515222774758,
      "loss": 3.1107,
      "step": 54737
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.382042407989502,
      "learning_rate": 0.0005202487449133255,
      "loss": 3.0595,
      "step": 54738
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8884414434432983,
      "learning_rate": 0.000520245967508227,
      "loss": 2.9254,
      "step": 54739
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6070014238357544,
      "learning_rate": 0.0005202431900621805,
      "loss": 3.107,
      "step": 54740
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3107587099075317,
      "learning_rate": 0.0005202404125751866,
      "loss": 3.0646,
      "step": 54741
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.4892866611480713,
      "learning_rate": 0.0005202376350472459,
      "loss": 3.0905,
      "step": 54742
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.8348593711853027,
      "learning_rate": 0.0005202348574783589,
      "loss": 3.0198,
      "step": 54743
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.4240903854370117,
      "learning_rate": 0.000520232079868526,
      "loss": 3.0177,
      "step": 54744
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7241005897521973,
      "learning_rate": 0.0005202293022177479,
      "loss": 2.9392,
      "step": 54745
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.7890944480895996,
      "learning_rate": 0.000520226524526025,
      "loss": 3.1962,
      "step": 54746
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.537114381790161,
      "learning_rate": 0.0005202237467933577,
      "loss": 3.0293,
      "step": 54747
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.5685436725616455,
      "learning_rate": 0.0005202209690197468,
      "loss": 3.0041,
      "step": 54748
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3490049839019775,
      "learning_rate": 0.0005202181912051925,
      "loss": 2.7776,
      "step": 54749
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.611119031906128,
      "learning_rate": 0.0005202154133496957,
      "loss": 3.2332,
      "step": 54750
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.4490132331848145,
      "learning_rate": 0.0005202126354532566,
      "loss": 3.0271,
      "step": 54751
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.9537441730499268,
      "learning_rate": 0.0005202098575158757,
      "loss": 2.8347,
      "step": 54752
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3843848705291748,
      "learning_rate": 0.0005202070795375537,
      "loss": 2.9289,
      "step": 54753
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8611423969268799,
      "learning_rate": 0.0005202043015182912,
      "loss": 3.2471,
      "step": 54754
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9690766334533691,
      "learning_rate": 0.0005202015234580884,
      "loss": 3.0104,
      "step": 54755
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.458470344543457,
      "learning_rate": 0.000520198745356946,
      "loss": 2.8943,
      "step": 54756
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.486566185951233,
      "learning_rate": 0.0005201959672148645,
      "loss": 3.0422,
      "step": 54757
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7148641347885132,
      "learning_rate": 0.0005201931890318444,
      "loss": 2.953,
      "step": 54758
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.213305711746216,
      "learning_rate": 0.0005201904108078862,
      "loss": 2.8602,
      "step": 54759
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0140323638916016,
      "learning_rate": 0.0005201876325429906,
      "loss": 2.8839,
      "step": 54760
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.374880313873291,
      "learning_rate": 0.0005201848542371578,
      "loss": 3.0837,
      "step": 54761
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.5430498123168945,
      "learning_rate": 0.0005201820758903886,
      "loss": 3.0301,
      "step": 54762
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.477376699447632,
      "learning_rate": 0.0005201792975026832,
      "loss": 3.1845,
      "step": 54763
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.830369234085083,
      "learning_rate": 0.0005201765190740427,
      "loss": 3.021,
      "step": 54764
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6878082752227783,
      "learning_rate": 0.0005201737406044668,
      "loss": 2.6866,
      "step": 54765
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4477046728134155,
      "learning_rate": 0.0005201709620939566,
      "loss": 3.1224,
      "step": 54766
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.6173360347747803,
      "learning_rate": 0.0005201681835425126,
      "loss": 3.0705,
      "step": 54767
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.309926986694336,
      "learning_rate": 0.000520165404950135,
      "loss": 3.0728,
      "step": 54768
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9157150983810425,
      "learning_rate": 0.0005201626263168246,
      "loss": 3.1293,
      "step": 54769
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.1651105880737305,
      "learning_rate": 0.0005201598476425818,
      "loss": 2.9196,
      "step": 54770
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4302173852920532,
      "learning_rate": 0.000520157068927407,
      "loss": 3.014,
      "step": 54771
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.467101812362671,
      "learning_rate": 0.000520154290171301,
      "loss": 3.0581,
      "step": 54772
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4300763607025146,
      "learning_rate": 0.0005201515113742642,
      "loss": 3.0314,
      "step": 54773
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5510098934173584,
      "learning_rate": 0.0005201487325362969,
      "loss": 3.1353,
      "step": 54774
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6441094875335693,
      "learning_rate": 0.0005201459536573999,
      "loss": 2.8325,
      "step": 54775
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.383986473083496,
      "learning_rate": 0.0005201431747375736,
      "loss": 2.8448,
      "step": 54776
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4889352321624756,
      "learning_rate": 0.0005201403957768185,
      "loss": 3.1536,
      "step": 54777
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6543512344360352,
      "learning_rate": 0.0005201376167751351,
      "loss": 2.867,
      "step": 54778
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7960703372955322,
      "learning_rate": 0.0005201348377325241,
      "loss": 2.8182,
      "step": 54779
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6487165689468384,
      "learning_rate": 0.0005201320586489858,
      "loss": 3.18,
      "step": 54780
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6029391288757324,
      "learning_rate": 0.0005201292795245209,
      "loss": 3.0532,
      "step": 54781
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.440788984298706,
      "learning_rate": 0.0005201265003591297,
      "loss": 2.9691,
      "step": 54782
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7502520084381104,
      "learning_rate": 0.0005201237211528128,
      "loss": 2.7107,
      "step": 54783
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4383928775787354,
      "learning_rate": 0.0005201209419055708,
      "loss": 2.9104,
      "step": 54784
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.479280948638916,
      "learning_rate": 0.0005201181626174041,
      "loss": 3.1805,
      "step": 54785
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3780161142349243,
      "learning_rate": 0.0005201153832883134,
      "loss": 2.9632,
      "step": 54786
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.277152180671692,
      "learning_rate": 0.000520112603918299,
      "loss": 3.1685,
      "step": 54787
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5224212408065796,
      "learning_rate": 0.0005201098245073615,
      "loss": 3.0599,
      "step": 54788
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7090140581130981,
      "learning_rate": 0.0005201070450555014,
      "loss": 3.0635,
      "step": 54789
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.1716465950012207,
      "learning_rate": 0.0005201042655627193,
      "loss": 3.1645,
      "step": 54790
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0955450534820557,
      "learning_rate": 0.0005201014860290158,
      "loss": 2.9854,
      "step": 54791
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.67678701877594,
      "learning_rate": 0.0005200987064543912,
      "loss": 3.1033,
      "step": 54792
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8193391561508179,
      "learning_rate": 0.000520095926838846,
      "loss": 2.8131,
      "step": 54793
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8959968090057373,
      "learning_rate": 0.0005200931471823809,
      "loss": 3.0496,
      "step": 54794
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3742972612380981,
      "learning_rate": 0.0005200903674849963,
      "loss": 3.2385,
      "step": 54795
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9555326700210571,
      "learning_rate": 0.0005200875877466927,
      "loss": 2.9452,
      "step": 54796
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5375784635543823,
      "learning_rate": 0.0005200848079674707,
      "loss": 3.1385,
      "step": 54797
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7368226051330566,
      "learning_rate": 0.0005200820281473308,
      "loss": 3.1909,
      "step": 54798
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.517045259475708,
      "learning_rate": 0.0005200792482862734,
      "loss": 3.117,
      "step": 54799
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.7031683921813965,
      "learning_rate": 0.0005200764683842992,
      "loss": 3.0641,
      "step": 54800
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.819732666015625,
      "learning_rate": 0.0005200736884414086,
      "loss": 3.1751,
      "step": 54801
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4729530811309814,
      "learning_rate": 0.0005200709084576022,
      "loss": 3.2128,
      "step": 54802
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5482327938079834,
      "learning_rate": 0.0005200681284328805,
      "loss": 3.2257,
      "step": 54803
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.2674832344055176,
      "learning_rate": 0.0005200653483672438,
      "loss": 2.8429,
      "step": 54804
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.841291069984436,
      "learning_rate": 0.0005200625682606929,
      "loss": 2.9637,
      "step": 54805
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.42377769947052,
      "learning_rate": 0.0005200597881132282,
      "loss": 3.2203,
      "step": 54806
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.280473470687866,
      "learning_rate": 0.0005200570079248503,
      "loss": 2.9876,
      "step": 54807
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9054877758026123,
      "learning_rate": 0.0005200542276955597,
      "loss": 3.0628,
      "step": 54808
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5505969524383545,
      "learning_rate": 0.0005200514474253566,
      "loss": 2.9006,
      "step": 54809
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4967886209487915,
      "learning_rate": 0.0005200486671142421,
      "loss": 3.036,
      "step": 54810
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.219236373901367,
      "learning_rate": 0.0005200458867622162,
      "loss": 3.0513,
      "step": 54811
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.519904375076294,
      "learning_rate": 0.0005200431063692797,
      "loss": 2.9511,
      "step": 54812
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4606956243515015,
      "learning_rate": 0.000520040325935433,
      "loss": 3.0748,
      "step": 54813
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.1735351085662842,
      "learning_rate": 0.0005200375454606766,
      "loss": 2.9987,
      "step": 54814
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8835500478744507,
      "learning_rate": 0.0005200347649450111,
      "loss": 2.8176,
      "step": 54815
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9955812692642212,
      "learning_rate": 0.000520031984388437,
      "loss": 3.1102,
      "step": 54816
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5835505723953247,
      "learning_rate": 0.0005200292037909548,
      "loss": 3.0617,
      "step": 54817
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5700503587722778,
      "learning_rate": 0.000520026423152565,
      "loss": 2.8612,
      "step": 54818
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6774811744689941,
      "learning_rate": 0.0005200236424732682,
      "loss": 2.8286,
      "step": 54819
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5599886178970337,
      "learning_rate": 0.0005200208617530648,
      "loss": 3.2003,
      "step": 54820
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3206382989883423,
      "learning_rate": 0.0005200180809919554,
      "loss": 3.2188,
      "step": 54821
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7192754745483398,
      "learning_rate": 0.0005200153001899405,
      "loss": 2.972,
      "step": 54822
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8748831748962402,
      "learning_rate": 0.0005200125193470205,
      "loss": 3.0409,
      "step": 54823
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5129908323287964,
      "learning_rate": 0.0005200097384631961,
      "loss": 3.0351,
      "step": 54824
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6536369323730469,
      "learning_rate": 0.0005200069575384676,
      "loss": 3.1976,
      "step": 54825
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7315714359283447,
      "learning_rate": 0.0005200041765728358,
      "loss": 2.8344,
      "step": 54826
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6350252628326416,
      "learning_rate": 0.0005200013955663011,
      "loss": 3.0801,
      "step": 54827
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7061939239501953,
      "learning_rate": 0.0005199986145188639,
      "loss": 2.8099,
      "step": 54828
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.683512568473816,
      "learning_rate": 0.0005199958334305248,
      "loss": 3.2507,
      "step": 54829
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4976574182510376,
      "learning_rate": 0.0005199930523012845,
      "loss": 3.0532,
      "step": 54830
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3025215864181519,
      "learning_rate": 0.000519990271131143,
      "loss": 3.0603,
      "step": 54831
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7089611291885376,
      "learning_rate": 0.0005199874899201014,
      "loss": 3.0338,
      "step": 54832
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.3593958616256714,
      "learning_rate": 0.00051998470866816,
      "loss": 2.8464,
      "step": 54833
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.637906551361084,
      "learning_rate": 0.0005199819273753192,
      "loss": 3.1083,
      "step": 54834
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.136859893798828,
      "learning_rate": 0.0005199791460415796,
      "loss": 3.21,
      "step": 54835
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4782131910324097,
      "learning_rate": 0.0005199763646669417,
      "loss": 2.8819,
      "step": 54836
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4484201669692993,
      "learning_rate": 0.0005199735832514061,
      "loss": 3.0515,
      "step": 54837
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.888054132461548,
      "learning_rate": 0.0005199708017949732,
      "loss": 2.9283,
      "step": 54838
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.249140977859497,
      "learning_rate": 0.0005199680202976438,
      "loss": 3.1527,
      "step": 54839
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5501277446746826,
      "learning_rate": 0.0005199652387594179,
      "loss": 3.0014,
      "step": 54840
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.25948429107666,
      "learning_rate": 0.0005199624571802964,
      "loss": 2.9099,
      "step": 54841
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8465161323547363,
      "learning_rate": 0.0005199596755602798,
      "loss": 3.0514,
      "step": 54842
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.927194356918335,
      "learning_rate": 0.0005199568938993685,
      "loss": 3.1617,
      "step": 54843
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5828399658203125,
      "learning_rate": 0.0005199541121975631,
      "loss": 2.914,
      "step": 54844
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.2094340324401855,
      "learning_rate": 0.0005199513304548639,
      "loss": 3.201,
      "step": 54845
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.16340970993042,
      "learning_rate": 0.0005199485486712718,
      "loss": 3.0507,
      "step": 54846
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.532679796218872,
      "learning_rate": 0.000519945766846787,
      "loss": 3.3156,
      "step": 54847
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9582799673080444,
      "learning_rate": 0.0005199429849814101,
      "loss": 2.8355,
      "step": 54848
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.801660180091858,
      "learning_rate": 0.0005199402030751419,
      "loss": 2.8359,
      "step": 54849
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6362766027450562,
      "learning_rate": 0.0005199374211279825,
      "loss": 2.9426,
      "step": 54850
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6363242864608765,
      "learning_rate": 0.0005199346391399325,
      "loss": 2.8652,
      "step": 54851
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7943400144577026,
      "learning_rate": 0.0005199318571109925,
      "loss": 2.9961,
      "step": 54852
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4020226001739502,
      "learning_rate": 0.0005199290750411631,
      "loss": 2.9772,
      "step": 54853
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.100609064102173,
      "learning_rate": 0.0005199262929304447,
      "loss": 2.9567,
      "step": 54854
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4236398935317993,
      "learning_rate": 0.0005199235107788379,
      "loss": 2.9843,
      "step": 54855
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6582238674163818,
      "learning_rate": 0.0005199207285863431,
      "loss": 3.1218,
      "step": 54856
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.5748748779296875,
      "learning_rate": 0.000519917946352961,
      "loss": 3.0607,
      "step": 54857
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.754589319229126,
      "learning_rate": 0.000519915164078692,
      "loss": 3.0897,
      "step": 54858
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0996384620666504,
      "learning_rate": 0.0005199123817635365,
      "loss": 3.0939,
      "step": 54859
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0219924449920654,
      "learning_rate": 0.000519909599407495,
      "loss": 3.4188,
      "step": 54860
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.8175400495529175,
      "learning_rate": 0.0005199068170105685,
      "loss": 3.1908,
      "step": 54861
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5746068954467773,
      "learning_rate": 0.0005199040345727569,
      "loss": 3.1238,
      "step": 54862
    },
    {
      "epoch": 0.71,
      "grad_norm": 4.5392746925354,
      "learning_rate": 0.000519901252094061,
      "loss": 3.0211,
      "step": 54863
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.7250726222991943,
      "learning_rate": 0.0005198984695744815,
      "loss": 3.0637,
      "step": 54864
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7236928939819336,
      "learning_rate": 0.0005198956870140187,
      "loss": 2.7873,
      "step": 54865
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6032270193099976,
      "learning_rate": 0.000519892904412673,
      "loss": 3.1552,
      "step": 54866
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.031571388244629,
      "learning_rate": 0.0005198901217704451,
      "loss": 2.8198,
      "step": 54867
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6862201690673828,
      "learning_rate": 0.0005198873390873355,
      "loss": 3.0956,
      "step": 54868
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.58083975315094,
      "learning_rate": 0.0005198845563633447,
      "loss": 3.2476,
      "step": 54869
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.32200288772583,
      "learning_rate": 0.0005198817735984733,
      "loss": 3.1862,
      "step": 54870
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7146137952804565,
      "learning_rate": 0.0005198789907927216,
      "loss": 3.1428,
      "step": 54871
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0997121334075928,
      "learning_rate": 0.0005198762079460902,
      "loss": 3.1546,
      "step": 54872
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.540431022644043,
      "learning_rate": 0.0005198734250585797,
      "loss": 3.014,
      "step": 54873
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4297276735305786,
      "learning_rate": 0.0005198706421301907,
      "loss": 3.0602,
      "step": 54874
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.470655083656311,
      "learning_rate": 0.0005198678591609235,
      "loss": 3.1146,
      "step": 54875
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.2906296253204346,
      "learning_rate": 0.0005198650761507788,
      "loss": 2.9288,
      "step": 54876
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0581772327423096,
      "learning_rate": 0.0005198622930997569,
      "loss": 2.9731,
      "step": 54877
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.6052188873291016,
      "learning_rate": 0.0005198595100078584,
      "loss": 3.0031,
      "step": 54878
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6660518646240234,
      "learning_rate": 0.0005198567268750841,
      "loss": 2.9885,
      "step": 54879
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.501194953918457,
      "learning_rate": 0.0005198539437014342,
      "loss": 3.027,
      "step": 54880
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.2394590377807617,
      "learning_rate": 0.0005198511604869091,
      "loss": 2.9616,
      "step": 54881
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9042067527770996,
      "learning_rate": 0.0005198483772315098,
      "loss": 3.001,
      "step": 54882
    },
    {
      "epoch": 0.71,
      "grad_norm": 4.82993745803833,
      "learning_rate": 0.0005198455939352364,
      "loss": 3.1098,
      "step": 54883
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.423543930053711,
      "learning_rate": 0.0005198428105980896,
      "loss": 2.8758,
      "step": 54884
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7260233163833618,
      "learning_rate": 0.0005198400272200698,
      "loss": 3.2238,
      "step": 54885
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7195788621902466,
      "learning_rate": 0.0005198372438011776,
      "loss": 2.8921,
      "step": 54886
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.2725603580474854,
      "learning_rate": 0.0005198344603414136,
      "loss": 2.9669,
      "step": 54887
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9602251052856445,
      "learning_rate": 0.0005198316768407782,
      "loss": 3.0532,
      "step": 54888
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7789642810821533,
      "learning_rate": 0.0005198288932992719,
      "loss": 2.956,
      "step": 54889
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6903772354125977,
      "learning_rate": 0.0005198261097168952,
      "loss": 3.2028,
      "step": 54890
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.6646125316619873,
      "learning_rate": 0.0005198233260936488,
      "loss": 2.9105,
      "step": 54891
    },
    {
      "epoch": 0.71,
      "grad_norm": 3.0590827465057373,
      "learning_rate": 0.000519820542429533,
      "loss": 3.0143,
      "step": 54892
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.9581040143966675,
      "learning_rate": 0.0005198177587245485,
      "loss": 2.9001,
      "step": 54893
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.2898805141448975,
      "learning_rate": 0.0005198149749786956,
      "loss": 2.9136,
      "step": 54894
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.0954785346984863,
      "learning_rate": 0.000519812191191975,
      "loss": 2.8199,
      "step": 54895
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.932268500328064,
      "learning_rate": 0.0005198094073643872,
      "loss": 3.1012,
      "step": 54896
    },
    {
      "epoch": 0.71,
      "grad_norm": 2.3475570678710938,
      "learning_rate": 0.0005198066234959328,
      "loss": 2.9738,
      "step": 54897
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.7408767938613892,
      "learning_rate": 0.0005198038395866119,
      "loss": 3.0905,
      "step": 54898
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.859622597694397,
      "learning_rate": 0.0005198010556364256,
      "loss": 2.9621,
      "step": 54899
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4958136081695557,
      "learning_rate": 0.000519798271645374,
      "loss": 3.0443,
      "step": 54900
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4877139329910278,
      "learning_rate": 0.0005197954876134577,
      "loss": 3.0289,
      "step": 54901
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.686207890510559,
      "learning_rate": 0.0005197927035406774,
      "loss": 2.7559,
      "step": 54902
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.561142921447754,
      "learning_rate": 0.0005197899194270334,
      "loss": 2.7662,
      "step": 54903
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6438897848129272,
      "learning_rate": 0.0005197871352725263,
      "loss": 3.2515,
      "step": 54904
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.4379620552062988,
      "learning_rate": 0.0005197843510771567,
      "loss": 3.1997,
      "step": 54905
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6091959476470947,
      "learning_rate": 0.000519781566840925,
      "loss": 3.0273,
      "step": 54906
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.46324622631073,
      "learning_rate": 0.0005197787825638318,
      "loss": 3.1654,
      "step": 54907
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.455302119255066,
      "learning_rate": 0.0005197759982458776,
      "loss": 3.358,
      "step": 54908
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.729372501373291,
      "learning_rate": 0.0005197732138870628,
      "loss": 3.2284,
      "step": 54909
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.6333000659942627,
      "learning_rate": 0.000519770429487388,
      "loss": 2.899,
      "step": 54910
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.5017123222351074,
      "learning_rate": 0.0005197676450468537,
      "loss": 2.818,
      "step": 54911
    },
    {
      "epoch": 0.71,
      "grad_norm": 1.334820032119751,
      "learning_rate": 0.0005197648605654605,
      "loss": 3.1066,
      "step": 54912
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3597371578216553,
      "learning_rate": 0.000519762076043209,
      "loss": 2.9814,
      "step": 54913
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6123853921890259,
      "learning_rate": 0.0005197592914800993,
      "loss": 2.9809,
      "step": 54914
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.896951913833618,
      "learning_rate": 0.0005197565068761324,
      "loss": 3.0412,
      "step": 54915
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6221927404403687,
      "learning_rate": 0.0005197537222313087,
      "loss": 3.2033,
      "step": 54916
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8812514543533325,
      "learning_rate": 0.0005197509375456285,
      "loss": 3.0574,
      "step": 54917
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.098292112350464,
      "learning_rate": 0.0005197481528190923,
      "loss": 3.3727,
      "step": 54918
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8139625787734985,
      "learning_rate": 0.000519745368051701,
      "loss": 3.0458,
      "step": 54919
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3288369178771973,
      "learning_rate": 0.0005197425832434548,
      "loss": 3.3339,
      "step": 54920
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4603841304779053,
      "learning_rate": 0.0005197397983943544,
      "loss": 3.1994,
      "step": 54921
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5375162363052368,
      "learning_rate": 0.0005197370135044001,
      "loss": 2.8536,
      "step": 54922
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7929021120071411,
      "learning_rate": 0.0005197342285735927,
      "loss": 3.1198,
      "step": 54923
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4897876977920532,
      "learning_rate": 0.0005197314436019324,
      "loss": 3.1714,
      "step": 54924
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.511230230331421,
      "learning_rate": 0.0005197286585894199,
      "loss": 2.8949,
      "step": 54925
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.6655385494232178,
      "learning_rate": 0.0005197258735360557,
      "loss": 3.1014,
      "step": 54926
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5314289331436157,
      "learning_rate": 0.0005197230884418404,
      "loss": 3.0841,
      "step": 54927
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7322980165481567,
      "learning_rate": 0.0005197203033067743,
      "loss": 3.1024,
      "step": 54928
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9193205833435059,
      "learning_rate": 0.0005197175181308581,
      "loss": 3.0659,
      "step": 54929
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.640297532081604,
      "learning_rate": 0.0005197147329140923,
      "loss": 3.0512,
      "step": 54930
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6837211847305298,
      "learning_rate": 0.0005197119476564773,
      "loss": 3.0219,
      "step": 54931
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7655376195907593,
      "learning_rate": 0.0005197091623580138,
      "loss": 3.1047,
      "step": 54932
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.513983964920044,
      "learning_rate": 0.0005197063770187023,
      "loss": 2.8937,
      "step": 54933
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7945514917373657,
      "learning_rate": 0.000519703591638543,
      "loss": 3.2817,
      "step": 54934
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9620567560195923,
      "learning_rate": 0.0005197008062175369,
      "loss": 2.9436,
      "step": 54935
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5089590549468994,
      "learning_rate": 0.0005196980207556841,
      "loss": 2.9397,
      "step": 54936
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.9691548347473145,
      "learning_rate": 0.0005196952352529853,
      "loss": 2.8952,
      "step": 54937
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.05703067779541,
      "learning_rate": 0.0005196924497094411,
      "loss": 3.2541,
      "step": 54938
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8219600915908813,
      "learning_rate": 0.0005196896641250519,
      "loss": 2.9769,
      "step": 54939
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.261522889137268,
      "learning_rate": 0.0005196868784998181,
      "loss": 2.8517,
      "step": 54940
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.651014804840088,
      "learning_rate": 0.0005196840928337406,
      "loss": 2.9575,
      "step": 54941
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.772679090499878,
      "learning_rate": 0.0005196813071268196,
      "loss": 3.0825,
      "step": 54942
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6295418739318848,
      "learning_rate": 0.0005196785213790556,
      "loss": 3.0001,
      "step": 54943
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4702337980270386,
      "learning_rate": 0.0005196757355904493,
      "loss": 3.067,
      "step": 54944
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.586669683456421,
      "learning_rate": 0.000519672949761001,
      "loss": 3.2175,
      "step": 54945
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9747450351715088,
      "learning_rate": 0.0005196701638907115,
      "loss": 3.0342,
      "step": 54946
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5388803482055664,
      "learning_rate": 0.0005196673779795811,
      "loss": 2.8376,
      "step": 54947
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3701910972595215,
      "learning_rate": 0.0005196645920276104,
      "loss": 2.951,
      "step": 54948
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.457127332687378,
      "learning_rate": 0.0005196618060348,
      "loss": 3.0312,
      "step": 54949
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.226318120956421,
      "learning_rate": 0.0005196590200011502,
      "loss": 2.9045,
      "step": 54950
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.2051565647125244,
      "learning_rate": 0.0005196562339266617,
      "loss": 3.2837,
      "step": 54951
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3439152240753174,
      "learning_rate": 0.000519653447811335,
      "loss": 2.9434,
      "step": 54952
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.6251392364501953,
      "learning_rate": 0.0005196506616551705,
      "loss": 3.0826,
      "step": 54953
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7276015281677246,
      "learning_rate": 0.0005196478754581688,
      "loss": 3.3469,
      "step": 54954
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3118551969528198,
      "learning_rate": 0.0005196450892203305,
      "loss": 3.2793,
      "step": 54955
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.522484302520752,
      "learning_rate": 0.0005196423029416559,
      "loss": 2.8807,
      "step": 54956
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.6998040676116943,
      "learning_rate": 0.0005196395166221457,
      "loss": 3.1368,
      "step": 54957
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.2236979007720947,
      "learning_rate": 0.0005196367302618004,
      "loss": 2.6845,
      "step": 54958
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6799567937850952,
      "learning_rate": 0.0005196339438606206,
      "loss": 3.0024,
      "step": 54959
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4812517166137695,
      "learning_rate": 0.0005196311574186065,
      "loss": 3.1101,
      "step": 54960
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.146343946456909,
      "learning_rate": 0.000519628370935759,
      "loss": 3.2223,
      "step": 54961
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6914103031158447,
      "learning_rate": 0.0005196255844120782,
      "loss": 3.1148,
      "step": 54962
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.2921924591064453,
      "learning_rate": 0.000519622797847565,
      "loss": 3.1343,
      "step": 54963
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5278733968734741,
      "learning_rate": 0.0005196200112422199,
      "loss": 3.1235,
      "step": 54964
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8030214309692383,
      "learning_rate": 0.000519617224596043,
      "loss": 3.0075,
      "step": 54965
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9677964448928833,
      "learning_rate": 0.0005196144379090354,
      "loss": 3.1767,
      "step": 54966
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3825289011001587,
      "learning_rate": 0.0005196116511811972,
      "loss": 3.046,
      "step": 54967
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4051927328109741,
      "learning_rate": 0.000519608864412529,
      "loss": 3.0441,
      "step": 54968
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5159552097320557,
      "learning_rate": 0.0005196060776030316,
      "loss": 3.0848,
      "step": 54969
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.122645854949951,
      "learning_rate": 0.000519603290752705,
      "loss": 3.0691,
      "step": 54970
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7510969638824463,
      "learning_rate": 0.0005196005038615501,
      "loss": 2.8989,
      "step": 54971
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6149238348007202,
      "learning_rate": 0.0005195977169295673,
      "loss": 2.9481,
      "step": 54972
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5862199068069458,
      "learning_rate": 0.0005195949299567572,
      "loss": 3.0003,
      "step": 54973
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8267133235931396,
      "learning_rate": 0.0005195921429431203,
      "loss": 3.0374,
      "step": 54974
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.4026737213134766,
      "learning_rate": 0.000519589355888657,
      "loss": 3.121,
      "step": 54975
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4433090686798096,
      "learning_rate": 0.0005195865687933678,
      "loss": 3.1206,
      "step": 54976
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.935192346572876,
      "learning_rate": 0.0005195837816572535,
      "loss": 3.0676,
      "step": 54977
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5567353963851929,
      "learning_rate": 0.0005195809944803144,
      "loss": 2.9349,
      "step": 54978
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8909775018692017,
      "learning_rate": 0.0005195782072625509,
      "loss": 3.0112,
      "step": 54979
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.705702543258667,
      "learning_rate": 0.0005195754200039638,
      "loss": 3.0742,
      "step": 54980
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5959274768829346,
      "learning_rate": 0.0005195726327045534,
      "loss": 3.1509,
      "step": 54981
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6681132316589355,
      "learning_rate": 0.0005195698453643203,
      "loss": 2.9391,
      "step": 54982
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5156755447387695,
      "learning_rate": 0.0005195670579832651,
      "loss": 3.1367,
      "step": 54983
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7072581052780151,
      "learning_rate": 0.0005195642705613883,
      "loss": 2.9721,
      "step": 54984
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3564552068710327,
      "learning_rate": 0.0005195614830986902,
      "loss": 2.9204,
      "step": 54985
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6177527904510498,
      "learning_rate": 0.0005195586955951716,
      "loss": 2.8971,
      "step": 54986
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4833091497421265,
      "learning_rate": 0.0005195559080508328,
      "loss": 2.7995,
      "step": 54987
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4868546724319458,
      "learning_rate": 0.0005195531204656744,
      "loss": 3.2222,
      "step": 54988
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8039758205413818,
      "learning_rate": 0.0005195503328396969,
      "loss": 3.0381,
      "step": 54989
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7488704919815063,
      "learning_rate": 0.000519547545172901,
      "loss": 3.0711,
      "step": 54990
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.450128197669983,
      "learning_rate": 0.0005195447574652869,
      "loss": 2.9524,
      "step": 54991
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4679272174835205,
      "learning_rate": 0.0005195419697168554,
      "loss": 3.1373,
      "step": 54992
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.340864896774292,
      "learning_rate": 0.0005195391819276069,
      "loss": 2.912,
      "step": 54993
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4443707466125488,
      "learning_rate": 0.000519536394097542,
      "loss": 3.0114,
      "step": 54994
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.869470477104187,
      "learning_rate": 0.000519533606226661,
      "loss": 3.0455,
      "step": 54995
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5978167057037354,
      "learning_rate": 0.0005195308183149645,
      "loss": 3.0721,
      "step": 54996
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5354740619659424,
      "learning_rate": 0.0005195280303624533,
      "loss": 3.0665,
      "step": 54997
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1162185668945312,
      "learning_rate": 0.0005195252423691275,
      "loss": 2.8224,
      "step": 54998
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.05357027053833,
      "learning_rate": 0.0005195224543349879,
      "loss": 2.8683,
      "step": 54999
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5733587741851807,
      "learning_rate": 0.0005195196662600349,
      "loss": 2.9778,
      "step": 55000
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.6252601146698,
      "learning_rate": 0.0005195168781442691,
      "loss": 2.9693,
      "step": 55001
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8201755285263062,
      "learning_rate": 0.0005195140899876909,
      "loss": 3.2146,
      "step": 55002
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8548717498779297,
      "learning_rate": 0.000519511301790301,
      "loss": 3.0158,
      "step": 55003
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.725900650024414,
      "learning_rate": 0.0005195085135520997,
      "loss": 3.0641,
      "step": 55004
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3822813034057617,
      "learning_rate": 0.0005195057252730876,
      "loss": 3.076,
      "step": 55005
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.068006992340088,
      "learning_rate": 0.0005195029369532653,
      "loss": 2.9564,
      "step": 55006
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.427596092224121,
      "learning_rate": 0.0005195001485926333,
      "loss": 2.9195,
      "step": 55007
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7675228118896484,
      "learning_rate": 0.000519497360191192,
      "loss": 3.0993,
      "step": 55008
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.3117072582244873,
      "learning_rate": 0.000519494571748942,
      "loss": 2.9574,
      "step": 55009
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5661940574645996,
      "learning_rate": 0.000519491783265884,
      "loss": 3.0274,
      "step": 55010
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4616779088974,
      "learning_rate": 0.0005194889947420181,
      "loss": 3.2733,
      "step": 55011
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8600921630859375,
      "learning_rate": 0.0005194862061773452,
      "loss": 3.2269,
      "step": 55012
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.7573482990264893,
      "learning_rate": 0.0005194834175718655,
      "loss": 2.9106,
      "step": 55013
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5064600706100464,
      "learning_rate": 0.0005194806289255798,
      "loss": 2.9454,
      "step": 55014
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6096539497375488,
      "learning_rate": 0.0005194778402384886,
      "loss": 3.1034,
      "step": 55015
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.63014554977417,
      "learning_rate": 0.0005194750515105922,
      "loss": 3.2143,
      "step": 55016
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.721744179725647,
      "learning_rate": 0.0005194722627418913,
      "loss": 3.028,
      "step": 55017
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6101839542388916,
      "learning_rate": 0.0005194694739323863,
      "loss": 3.1756,
      "step": 55018
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0648627281188965,
      "learning_rate": 0.0005194666850820778,
      "loss": 3.0218,
      "step": 55019
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.272430419921875,
      "learning_rate": 0.0005194638961909663,
      "loss": 2.7763,
      "step": 55020
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2680665254592896,
      "learning_rate": 0.0005194611072590524,
      "loss": 3.2746,
      "step": 55021
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.623348355293274,
      "learning_rate": 0.0005194583182863365,
      "loss": 3.1508,
      "step": 55022
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7946321964263916,
      "learning_rate": 0.0005194555292728191,
      "loss": 2.8769,
      "step": 55023
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.306140184402466,
      "learning_rate": 0.0005194527402185009,
      "loss": 2.8888,
      "step": 55024
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5813100337982178,
      "learning_rate": 0.0005194499511233822,
      "loss": 3.2834,
      "step": 55025
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.0327136516571045,
      "learning_rate": 0.0005194471619874635,
      "loss": 2.8952,
      "step": 55026
    },
    {
      "epoch": 0.72,
      "grad_norm": 5.137756824493408,
      "learning_rate": 0.0005194443728107456,
      "loss": 3.0938,
      "step": 55027
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.6164872646331787,
      "learning_rate": 0.0005194415835932288,
      "loss": 3.1837,
      "step": 55028
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5576999187469482,
      "learning_rate": 0.0005194387943349136,
      "loss": 3.1016,
      "step": 55029
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.0122592449188232,
      "learning_rate": 0.0005194360050358007,
      "loss": 3.1182,
      "step": 55030
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1630606651306152,
      "learning_rate": 0.0005194332156958904,
      "loss": 2.9436,
      "step": 55031
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5360807180404663,
      "learning_rate": 0.0005194304263151834,
      "loss": 3.0506,
      "step": 55032
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0432374477386475,
      "learning_rate": 0.0005194276368936802,
      "loss": 2.8285,
      "step": 55033
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.651903748512268,
      "learning_rate": 0.0005194248474313811,
      "loss": 3.0365,
      "step": 55034
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.740553855895996,
      "learning_rate": 0.0005194220579282869,
      "loss": 3.0229,
      "step": 55035
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4866992235183716,
      "learning_rate": 0.000519419268384398,
      "loss": 3.194,
      "step": 55036
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.454221487045288,
      "learning_rate": 0.0005194164787997148,
      "loss": 3.0308,
      "step": 55037
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6698486804962158,
      "learning_rate": 0.0005194136891742379,
      "loss": 2.8648,
      "step": 55038
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4480023384094238,
      "learning_rate": 0.000519410899507968,
      "loss": 2.7838,
      "step": 55039
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6613118648529053,
      "learning_rate": 0.0005194081098009054,
      "loss": 2.9599,
      "step": 55040
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.404962420463562,
      "learning_rate": 0.0005194053200530508,
      "loss": 3.0828,
      "step": 55041
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5160096883773804,
      "learning_rate": 0.0005194025302644045,
      "loss": 3.0388,
      "step": 55042
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.789905071258545,
      "learning_rate": 0.0005193997404349671,
      "loss": 2.8482,
      "step": 55043
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.230487823486328,
      "learning_rate": 0.0005193969505647393,
      "loss": 3.0461,
      "step": 55044
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.522922158241272,
      "learning_rate": 0.0005193941606537213,
      "loss": 3.0529,
      "step": 55045
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8481426239013672,
      "learning_rate": 0.0005193913707019138,
      "loss": 3.0814,
      "step": 55046
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4813637733459473,
      "learning_rate": 0.0005193885807093174,
      "loss": 3.0796,
      "step": 55047
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8558605909347534,
      "learning_rate": 0.0005193857906759325,
      "loss": 3.1257,
      "step": 55048
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4583879709243774,
      "learning_rate": 0.0005193830006017596,
      "loss": 3.0175,
      "step": 55049
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6366993188858032,
      "learning_rate": 0.0005193802104867994,
      "loss": 3.0274,
      "step": 55050
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.510752558708191,
      "learning_rate": 0.0005193774203310521,
      "loss": 2.7866,
      "step": 55051
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7438266277313232,
      "learning_rate": 0.0005193746301345184,
      "loss": 3.0439,
      "step": 55052
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6322135925292969,
      "learning_rate": 0.0005193718398971989,
      "loss": 2.9432,
      "step": 55053
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1544735431671143,
      "learning_rate": 0.000519369049619094,
      "loss": 2.8898,
      "step": 55054
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3852150440216064,
      "learning_rate": 0.0005193662593002042,
      "loss": 2.9717,
      "step": 55055
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2842977046966553,
      "learning_rate": 0.0005193634689405302,
      "loss": 3.0577,
      "step": 55056
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.438041925430298,
      "learning_rate": 0.0005193606785400723,
      "loss": 2.8214,
      "step": 55057
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4248932600021362,
      "learning_rate": 0.0005193578880988312,
      "loss": 2.8987,
      "step": 55058
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5967063903808594,
      "learning_rate": 0.0005193550976168072,
      "loss": 2.9465,
      "step": 55059
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3755899667739868,
      "learning_rate": 0.000519352307094001,
      "loss": 2.9901,
      "step": 55060
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.787377119064331,
      "learning_rate": 0.0005193495165304131,
      "loss": 3.0806,
      "step": 55061
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.562591552734375,
      "learning_rate": 0.0005193467259260438,
      "loss": 2.9764,
      "step": 55062
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1500084400177,
      "learning_rate": 0.000519343935280894,
      "loss": 2.8719,
      "step": 55063
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6655046939849854,
      "learning_rate": 0.000519341144594964,
      "loss": 3.0124,
      "step": 55064
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.164532423019409,
      "learning_rate": 0.0005193383538682543,
      "loss": 2.8791,
      "step": 55065
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.26012921333313,
      "learning_rate": 0.0005193355631007655,
      "loss": 3.2438,
      "step": 55066
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9221062660217285,
      "learning_rate": 0.000519332772292498,
      "loss": 3.0124,
      "step": 55067
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5678831338882446,
      "learning_rate": 0.0005193299814434525,
      "loss": 2.9186,
      "step": 55068
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8276963233947754,
      "learning_rate": 0.0005193271905536294,
      "loss": 3.0993,
      "step": 55069
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8355064392089844,
      "learning_rate": 0.0005193243996230292,
      "loss": 3.0936,
      "step": 55070
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.516216516494751,
      "learning_rate": 0.0005193216086516525,
      "loss": 3.1465,
      "step": 55071
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.722031354904175,
      "learning_rate": 0.0005193188176394996,
      "loss": 2.9548,
      "step": 55072
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.9559433460235596,
      "learning_rate": 0.0005193160265865714,
      "loss": 2.9382,
      "step": 55073
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1864426136016846,
      "learning_rate": 0.000519313235492868,
      "loss": 3.2124,
      "step": 55074
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4493885040283203,
      "learning_rate": 0.0005193104443583903,
      "loss": 3.2485,
      "step": 55075
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5947256088256836,
      "learning_rate": 0.0005193076531831386,
      "loss": 2.9953,
      "step": 55076
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.335879325866699,
      "learning_rate": 0.0005193048619671135,
      "loss": 2.9568,
      "step": 55077
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8083317279815674,
      "learning_rate": 0.0005193020707103156,
      "loss": 2.9995,
      "step": 55078
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.209636688232422,
      "learning_rate": 0.0005192992794127451,
      "loss": 3.2249,
      "step": 55079
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5210466384887695,
      "learning_rate": 0.0005192964880744028,
      "loss": 3.0479,
      "step": 55080
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0383217334747314,
      "learning_rate": 0.000519293696695289,
      "loss": 3.1458,
      "step": 55081
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.614298701286316,
      "learning_rate": 0.0005192909052754046,
      "loss": 3.024,
      "step": 55082
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.2428390979766846,
      "learning_rate": 0.0005192881138147497,
      "loss": 2.986,
      "step": 55083
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.9599926471710205,
      "learning_rate": 0.0005192853223133252,
      "loss": 3.0172,
      "step": 55084
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4568417072296143,
      "learning_rate": 0.0005192825307711313,
      "loss": 3.3391,
      "step": 55085
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.105862855911255,
      "learning_rate": 0.0005192797391881686,
      "loss": 2.9123,
      "step": 55086
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5362343788146973,
      "learning_rate": 0.0005192769475644377,
      "loss": 3.1214,
      "step": 55087
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.792729139328003,
      "learning_rate": 0.0005192741558999391,
      "loss": 3.0924,
      "step": 55088
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8236489295959473,
      "learning_rate": 0.0005192713641946732,
      "loss": 2.7789,
      "step": 55089
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8751276731491089,
      "learning_rate": 0.0005192685724486407,
      "loss": 3.1316,
      "step": 55090
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7116988897323608,
      "learning_rate": 0.000519265780661842,
      "loss": 2.9562,
      "step": 55091
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5680588483810425,
      "learning_rate": 0.0005192629888342777,
      "loss": 3.2098,
      "step": 55092
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4963229894638062,
      "learning_rate": 0.0005192601969659482,
      "loss": 2.9102,
      "step": 55093
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0418574810028076,
      "learning_rate": 0.0005192574050568541,
      "loss": 3.0611,
      "step": 55094
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4814447164535522,
      "learning_rate": 0.000519254613106996,
      "loss": 2.9644,
      "step": 55095
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5972564220428467,
      "learning_rate": 0.0005192518211163742,
      "loss": 2.8065,
      "step": 55096
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4424524307250977,
      "learning_rate": 0.0005192490290849895,
      "loss": 3.0702,
      "step": 55097
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.6228435039520264,
      "learning_rate": 0.0005192462370128422,
      "loss": 2.8451,
      "step": 55098
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6021413803100586,
      "learning_rate": 0.0005192434448999328,
      "loss": 2.9432,
      "step": 55099
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5613059997558594,
      "learning_rate": 0.000519240652746262,
      "loss": 3.0989,
      "step": 55100
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.899007797241211,
      "learning_rate": 0.0005192378605518302,
      "loss": 3.2493,
      "step": 55101
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8433321714401245,
      "learning_rate": 0.000519235068316638,
      "loss": 3.0574,
      "step": 55102
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6711740493774414,
      "learning_rate": 0.0005192322760406858,
      "loss": 2.6851,
      "step": 55103
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.653741717338562,
      "learning_rate": 0.0005192294837239741,
      "loss": 3.1348,
      "step": 55104
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5894328355789185,
      "learning_rate": 0.0005192266913665036,
      "loss": 3.154,
      "step": 55105
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9690886735916138,
      "learning_rate": 0.0005192238989682747,
      "loss": 3.2709,
      "step": 55106
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.509760856628418,
      "learning_rate": 0.0005192211065292879,
      "loss": 3.1726,
      "step": 55107
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.557080864906311,
      "learning_rate": 0.0005192183140495439,
      "loss": 2.7458,
      "step": 55108
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.6912972927093506,
      "learning_rate": 0.000519215521529043,
      "loss": 2.9449,
      "step": 55109
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5027984380722046,
      "learning_rate": 0.0005192127289677858,
      "loss": 3.0252,
      "step": 55110
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8114333152770996,
      "learning_rate": 0.0005192099363657728,
      "loss": 3.028,
      "step": 55111
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5221385955810547,
      "learning_rate": 0.0005192071437230045,
      "loss": 2.9543,
      "step": 55112
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4080675840377808,
      "learning_rate": 0.0005192043510394814,
      "loss": 3.1976,
      "step": 55113
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.363363265991211,
      "learning_rate": 0.0005192015583152042,
      "loss": 3.1427,
      "step": 55114
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5442140102386475,
      "learning_rate": 0.0005191987655501733,
      "loss": 2.9711,
      "step": 55115
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5291223526000977,
      "learning_rate": 0.000519195972744389,
      "loss": 3.1506,
      "step": 55116
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7858487367630005,
      "learning_rate": 0.0005191931798978523,
      "loss": 3.0488,
      "step": 55117
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5257395505905151,
      "learning_rate": 0.0005191903870105633,
      "loss": 2.892,
      "step": 55118
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.547959804534912,
      "learning_rate": 0.0005191875940825227,
      "loss": 3.0166,
      "step": 55119
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.36497962474823,
      "learning_rate": 0.000519184801113731,
      "loss": 3.2497,
      "step": 55120
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4355075359344482,
      "learning_rate": 0.0005191820081041887,
      "loss": 3.077,
      "step": 55121
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.451630711555481,
      "learning_rate": 0.0005191792150538964,
      "loss": 3.0915,
      "step": 55122
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.13891339302063,
      "learning_rate": 0.0005191764219628544,
      "loss": 3.1199,
      "step": 55123
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6062099933624268,
      "learning_rate": 0.0005191736288310636,
      "loss": 2.8493,
      "step": 55124
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6795477867126465,
      "learning_rate": 0.0005191708356585239,
      "loss": 2.9072,
      "step": 55125
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.758020043373108,
      "learning_rate": 0.0005191680424452365,
      "loss": 3.0947,
      "step": 55126
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0068864822387695,
      "learning_rate": 0.0005191652491912017,
      "loss": 2.8223,
      "step": 55127
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6268645524978638,
      "learning_rate": 0.0005191624558964197,
      "loss": 3.161,
      "step": 55128
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5370734930038452,
      "learning_rate": 0.0005191596625608914,
      "loss": 3.3415,
      "step": 55129
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5028014183044434,
      "learning_rate": 0.0005191568691846171,
      "loss": 3.0926,
      "step": 55130
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5624089241027832,
      "learning_rate": 0.0005191540757675975,
      "loss": 3.0461,
      "step": 55131
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.871758222579956,
      "learning_rate": 0.000519151282309833,
      "loss": 2.9366,
      "step": 55132
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8032643795013428,
      "learning_rate": 0.0005191484888113241,
      "loss": 2.6674,
      "step": 55133
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0056726932525635,
      "learning_rate": 0.0005191456952720713,
      "loss": 3.1841,
      "step": 55134
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5776374340057373,
      "learning_rate": 0.0005191429016920752,
      "loss": 2.9807,
      "step": 55135
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.92180597782135,
      "learning_rate": 0.0005191401080713364,
      "loss": 3.0235,
      "step": 55136
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5166926383972168,
      "learning_rate": 0.0005191373144098553,
      "loss": 3.0395,
      "step": 55137
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5337637662887573,
      "learning_rate": 0.0005191345207076324,
      "loss": 3.0421,
      "step": 55138
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3941571712493896,
      "learning_rate": 0.0005191317269646682,
      "loss": 3.0896,
      "step": 55139
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3033698797225952,
      "learning_rate": 0.0005191289331809634,
      "loss": 3.1031,
      "step": 55140
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.504469871520996,
      "learning_rate": 0.0005191261393565183,
      "loss": 3.1083,
      "step": 55141
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.730986475944519,
      "learning_rate": 0.0005191233454913335,
      "loss": 3.0214,
      "step": 55142
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6914572715759277,
      "learning_rate": 0.0005191205515854096,
      "loss": 3.2191,
      "step": 55143
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.698004961013794,
      "learning_rate": 0.000519117757638747,
      "loss": 2.8513,
      "step": 55144
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6842511892318726,
      "learning_rate": 0.0005191149636513463,
      "loss": 2.9135,
      "step": 55145
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6201062202453613,
      "learning_rate": 0.000519112169623208,
      "loss": 3.0476,
      "step": 55146
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3816184997558594,
      "learning_rate": 0.0005191093755543326,
      "loss": 2.9968,
      "step": 55147
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7054007053375244,
      "learning_rate": 0.0005191065814447206,
      "loss": 2.917,
      "step": 55148
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1321377754211426,
      "learning_rate": 0.0005191037872943726,
      "loss": 2.9135,
      "step": 55149
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5343431234359741,
      "learning_rate": 0.000519100993103289,
      "loss": 3.2134,
      "step": 55150
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6400995254516602,
      "learning_rate": 0.0005190981988714706,
      "loss": 3.162,
      "step": 55151
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7675395011901855,
      "learning_rate": 0.0005190954045989174,
      "loss": 3.0161,
      "step": 55152
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4698761701583862,
      "learning_rate": 0.0005190926102856305,
      "loss": 2.9832,
      "step": 55153
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4911348819732666,
      "learning_rate": 0.00051908981593161,
      "loss": 3.1416,
      "step": 55154
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4849317073822021,
      "learning_rate": 0.0005190870215368565,
      "loss": 3.0766,
      "step": 55155
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.548797607421875,
      "learning_rate": 0.0005190842271013708,
      "loss": 3.0498,
      "step": 55156
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6476508378982544,
      "learning_rate": 0.000519081432625153,
      "loss": 2.8553,
      "step": 55157
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4459174871444702,
      "learning_rate": 0.0005190786381082039,
      "loss": 3.1022,
      "step": 55158
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5310169458389282,
      "learning_rate": 0.0005190758435505241,
      "loss": 3.1377,
      "step": 55159
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6629080772399902,
      "learning_rate": 0.0005190730489521137,
      "loss": 3.1516,
      "step": 55160
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0826363563537598,
      "learning_rate": 0.0005190702543129737,
      "loss": 3.1942,
      "step": 55161
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6653133630752563,
      "learning_rate": 0.0005190674596331043,
      "loss": 3.0178,
      "step": 55162
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8325690031051636,
      "learning_rate": 0.0005190646649125061,
      "loss": 3.1308,
      "step": 55163
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.134431838989258,
      "learning_rate": 0.0005190618701511798,
      "loss": 3.2145,
      "step": 55164
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5267046689987183,
      "learning_rate": 0.0005190590753491256,
      "loss": 3.0642,
      "step": 55165
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5090715885162354,
      "learning_rate": 0.0005190562805063442,
      "loss": 3.1926,
      "step": 55166
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.958385944366455,
      "learning_rate": 0.0005190534856228361,
      "loss": 3.1132,
      "step": 55167
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5886878967285156,
      "learning_rate": 0.0005190506906986019,
      "loss": 3.0768,
      "step": 55168
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5870927572250366,
      "learning_rate": 0.0005190478957336419,
      "loss": 2.9684,
      "step": 55169
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5324230194091797,
      "learning_rate": 0.0005190451007279569,
      "loss": 3.0095,
      "step": 55170
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8159199953079224,
      "learning_rate": 0.0005190423056815472,
      "loss": 3.1884,
      "step": 55171
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.96284019947052,
      "learning_rate": 0.0005190395105944135,
      "loss": 3.1114,
      "step": 55172
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3409106731414795,
      "learning_rate": 0.0005190367154665562,
      "loss": 3.1443,
      "step": 55173
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5292444229125977,
      "learning_rate": 0.0005190339202979758,
      "loss": 2.9586,
      "step": 55174
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.2863545417785645,
      "learning_rate": 0.0005190311250886728,
      "loss": 3.0363,
      "step": 55175
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9692639112472534,
      "learning_rate": 0.0005190283298386479,
      "loss": 3.0387,
      "step": 55176
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0360796451568604,
      "learning_rate": 0.0005190255345479014,
      "loss": 3.2173,
      "step": 55177
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5077409744262695,
      "learning_rate": 0.000519022739216434,
      "loss": 2.9242,
      "step": 55178
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.184468984603882,
      "learning_rate": 0.000519019943844246,
      "loss": 3.0176,
      "step": 55179
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6422384977340698,
      "learning_rate": 0.0005190171484313382,
      "loss": 2.8831,
      "step": 55180
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.084958553314209,
      "learning_rate": 0.000519014352977711,
      "loss": 3.1468,
      "step": 55181
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7331712245941162,
      "learning_rate": 0.0005190115574833648,
      "loss": 3.1531,
      "step": 55182
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.752993583679199,
      "learning_rate": 0.0005190087619483002,
      "loss": 2.8513,
      "step": 55183
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9240916967391968,
      "learning_rate": 0.0005190059663725178,
      "loss": 2.9835,
      "step": 55184
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7694731950759888,
      "learning_rate": 0.000519003170756018,
      "loss": 3.3502,
      "step": 55185
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.176995277404785,
      "learning_rate": 0.0005190003750988015,
      "loss": 2.8982,
      "step": 55186
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.722062349319458,
      "learning_rate": 0.0005189975794008685,
      "loss": 3.0956,
      "step": 55187
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8071668148040771,
      "learning_rate": 0.0005189947836622199,
      "loss": 3.0351,
      "step": 55188
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7814706563949585,
      "learning_rate": 0.0005189919878828559,
      "loss": 3.2097,
      "step": 55189
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.099506139755249,
      "learning_rate": 0.0005189891920627773,
      "loss": 3.1184,
      "step": 55190
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4660993814468384,
      "learning_rate": 0.0005189863962019845,
      "loss": 2.9409,
      "step": 55191
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0162127017974854,
      "learning_rate": 0.0005189836003004777,
      "loss": 3.0198,
      "step": 55192
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.4134833812713623,
      "learning_rate": 0.000518980804358258,
      "loss": 3.0371,
      "step": 55193
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7961913347244263,
      "learning_rate": 0.0005189780083753255,
      "loss": 3.2006,
      "step": 55194
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5744208097457886,
      "learning_rate": 0.0005189752123516809,
      "loss": 2.8099,
      "step": 55195
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.920194149017334,
      "learning_rate": 0.0005189724162873246,
      "loss": 3.0909,
      "step": 55196
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.512005567550659,
      "learning_rate": 0.0005189696201822573,
      "loss": 3.0385,
      "step": 55197
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5483776330947876,
      "learning_rate": 0.0005189668240364794,
      "loss": 3.0225,
      "step": 55198
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9532248973846436,
      "learning_rate": 0.0005189640278499914,
      "loss": 3.0303,
      "step": 55199
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.60151207447052,
      "learning_rate": 0.0005189612316227939,
      "loss": 3.0264,
      "step": 55200
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4898924827575684,
      "learning_rate": 0.0005189584353548874,
      "loss": 2.8946,
      "step": 55201
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.69431471824646,
      "learning_rate": 0.0005189556390462723,
      "loss": 3.1115,
      "step": 55202
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1201529502868652,
      "learning_rate": 0.0005189528426969492,
      "loss": 3.3337,
      "step": 55203
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.344557046890259,
      "learning_rate": 0.0005189500463069187,
      "loss": 3.1984,
      "step": 55204
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7116659879684448,
      "learning_rate": 0.0005189472498761812,
      "loss": 3.2466,
      "step": 55205
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.90804922580719,
      "learning_rate": 0.0005189444534047374,
      "loss": 2.8942,
      "step": 55206
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3331485986709595,
      "learning_rate": 0.0005189416568925875,
      "loss": 3.0606,
      "step": 55207
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4442355632781982,
      "learning_rate": 0.0005189388603397323,
      "loss": 2.9912,
      "step": 55208
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3426871299743652,
      "learning_rate": 0.0005189360637461723,
      "loss": 3.0167,
      "step": 55209
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0173490047454834,
      "learning_rate": 0.0005189332671119079,
      "loss": 2.7764,
      "step": 55210
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5628482103347778,
      "learning_rate": 0.0005189304704369396,
      "loss": 3.0526,
      "step": 55211
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5273523330688477,
      "learning_rate": 0.0005189276737212681,
      "loss": 3.0131,
      "step": 55212
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3979039192199707,
      "learning_rate": 0.0005189248769648939,
      "loss": 3.0449,
      "step": 55213
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5131409168243408,
      "learning_rate": 0.0005189220801678173,
      "loss": 2.9995,
      "step": 55214
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.988565683364868,
      "learning_rate": 0.0005189192833300389,
      "loss": 3.0445,
      "step": 55215
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.584256887435913,
      "learning_rate": 0.0005189164864515594,
      "loss": 2.9534,
      "step": 55216
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5849782228469849,
      "learning_rate": 0.0005189136895323791,
      "loss": 3.0274,
      "step": 55217
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.538443684577942,
      "learning_rate": 0.0005189108925724986,
      "loss": 2.8879,
      "step": 55218
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5786062479019165,
      "learning_rate": 0.0005189080955719185,
      "loss": 2.8727,
      "step": 55219
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8713794946670532,
      "learning_rate": 0.0005189052985306393,
      "loss": 2.9918,
      "step": 55220
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8415377140045166,
      "learning_rate": 0.0005189025014486613,
      "loss": 3.0016,
      "step": 55221
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.6935956478118896,
      "learning_rate": 0.0005188997043259853,
      "loss": 3.1836,
      "step": 55222
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.09833025932312,
      "learning_rate": 0.0005188969071626118,
      "loss": 2.99,
      "step": 55223
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7686294317245483,
      "learning_rate": 0.0005188941099585411,
      "loss": 3.1935,
      "step": 55224
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6922706365585327,
      "learning_rate": 0.0005188913127137739,
      "loss": 3.087,
      "step": 55225
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.7788660526275635,
      "learning_rate": 0.0005188885154283106,
      "loss": 2.8561,
      "step": 55226
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.6044657230377197,
      "learning_rate": 0.0005188857181021519,
      "loss": 2.8909,
      "step": 55227
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0021793842315674,
      "learning_rate": 0.0005188829207352981,
      "loss": 3.0856,
      "step": 55228
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7029825448989868,
      "learning_rate": 0.00051888012332775,
      "loss": 3.1107,
      "step": 55229
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.420978546142578,
      "learning_rate": 0.0005188773258795079,
      "loss": 3.0555,
      "step": 55230
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.8852524757385254,
      "learning_rate": 0.0005188745283905723,
      "loss": 3.1946,
      "step": 55231
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6968944072723389,
      "learning_rate": 0.0005188717308609438,
      "loss": 2.8101,
      "step": 55232
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.4473958015441895,
      "learning_rate": 0.000518868933290623,
      "loss": 3.0601,
      "step": 55233
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5123467445373535,
      "learning_rate": 0.0005188661356796102,
      "loss": 3.018,
      "step": 55234
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9133449792861938,
      "learning_rate": 0.0005188633380279061,
      "loss": 2.9275,
      "step": 55235
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.351533055305481,
      "learning_rate": 0.0005188605403355113,
      "loss": 3.1903,
      "step": 55236
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6328133344650269,
      "learning_rate": 0.000518857742602426,
      "loss": 3.1543,
      "step": 55237
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.0414516925811768,
      "learning_rate": 0.000518854944828651,
      "loss": 2.9335,
      "step": 55238
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.686713933944702,
      "learning_rate": 0.0005188521470141867,
      "loss": 2.8582,
      "step": 55239
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.74296236038208,
      "learning_rate": 0.0005188493491590338,
      "loss": 3.0644,
      "step": 55240
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5224270820617676,
      "learning_rate": 0.0005188465512631925,
      "loss": 2.7931,
      "step": 55241
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.121206521987915,
      "learning_rate": 0.0005188437533266636,
      "loss": 2.7353,
      "step": 55242
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.619823694229126,
      "learning_rate": 0.0005188409553494476,
      "loss": 3.0598,
      "step": 55243
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0213675498962402,
      "learning_rate": 0.0005188381573315447,
      "loss": 3.1905,
      "step": 55244
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.26304030418396,
      "learning_rate": 0.0005188353592729558,
      "loss": 3.0339,
      "step": 55245
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.0491368770599365,
      "learning_rate": 0.0005188325611736813,
      "loss": 3.2048,
      "step": 55246
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7247241735458374,
      "learning_rate": 0.0005188297630337216,
      "loss": 3.0093,
      "step": 55247
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3802289962768555,
      "learning_rate": 0.0005188269648530773,
      "loss": 3.0355,
      "step": 55248
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.2141010761260986,
      "learning_rate": 0.0005188241666317491,
      "loss": 3.0417,
      "step": 55249
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.362947940826416,
      "learning_rate": 0.0005188213683697372,
      "loss": 3.1854,
      "step": 55250
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6840577125549316,
      "learning_rate": 0.0005188185700670424,
      "loss": 3.1403,
      "step": 55251
    },
    {
      "epoch": 0.72,
      "grad_norm": 4.206020832061768,
      "learning_rate": 0.0005188157717236651,
      "loss": 3.0451,
      "step": 55252
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.442192792892456,
      "learning_rate": 0.0005188129733396056,
      "loss": 2.9937,
      "step": 55253
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6341153383255005,
      "learning_rate": 0.0005188101749148648,
      "loss": 2.8294,
      "step": 55254
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5773744583129883,
      "learning_rate": 0.000518807376449443,
      "loss": 3.2759,
      "step": 55255
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.0530471801757812,
      "learning_rate": 0.000518804577943341,
      "loss": 2.9274,
      "step": 55256
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.126044511795044,
      "learning_rate": 0.0005188017793965588,
      "loss": 3.3455,
      "step": 55257
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5047205686569214,
      "learning_rate": 0.0005187989808090974,
      "loss": 3.1217,
      "step": 55258
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6341608762741089,
      "learning_rate": 0.000518796182180957,
      "loss": 2.72,
      "step": 55259
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.6800596714019775,
      "learning_rate": 0.0005187933835121384,
      "loss": 3.204,
      "step": 55260
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.023369789123535,
      "learning_rate": 0.000518790584802642,
      "loss": 3.0809,
      "step": 55261
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.354449987411499,
      "learning_rate": 0.0005187877860524682,
      "loss": 2.949,
      "step": 55262
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.979407548904419,
      "learning_rate": 0.0005187849872616176,
      "loss": 2.6629,
      "step": 55263
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.7721047401428223,
      "learning_rate": 0.0005187821884300908,
      "loss": 2.9796,
      "step": 55264
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9714034795761108,
      "learning_rate": 0.0005187793895578881,
      "loss": 3.2099,
      "step": 55265
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.732958197593689,
      "learning_rate": 0.0005187765906450104,
      "loss": 3.0081,
      "step": 55266
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.326885223388672,
      "learning_rate": 0.0005187737916914579,
      "loss": 2.9971,
      "step": 55267
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.6508948802948,
      "learning_rate": 0.0005187709926972313,
      "loss": 3.3498,
      "step": 55268
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3382660150527954,
      "learning_rate": 0.000518768193662331,
      "loss": 3.3099,
      "step": 55269
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3869893550872803,
      "learning_rate": 0.0005187653945867575,
      "loss": 2.8673,
      "step": 55270
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.130978584289551,
      "learning_rate": 0.0005187625954705115,
      "loss": 3.1863,
      "step": 55271
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4888160228729248,
      "learning_rate": 0.0005187597963135933,
      "loss": 2.8331,
      "step": 55272
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0400452613830566,
      "learning_rate": 0.0005187569971160036,
      "loss": 2.9545,
      "step": 55273
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9839560985565186,
      "learning_rate": 0.0005187541978777428,
      "loss": 3.154,
      "step": 55274
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.546104907989502,
      "learning_rate": 0.0005187513985988114,
      "loss": 2.8141,
      "step": 55275
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1545259952545166,
      "learning_rate": 0.0005187485992792101,
      "loss": 3.0353,
      "step": 55276
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.510301351547241,
      "learning_rate": 0.0005187457999189394,
      "loss": 3.11,
      "step": 55277
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3326417207717896,
      "learning_rate": 0.0005187430005179995,
      "loss": 2.9856,
      "step": 55278
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6244319677352905,
      "learning_rate": 0.0005187402010763913,
      "loss": 2.8644,
      "step": 55279
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5735228061676025,
      "learning_rate": 0.0005187374015941151,
      "loss": 2.744,
      "step": 55280
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5850207805633545,
      "learning_rate": 0.0005187346020711715,
      "loss": 2.7914,
      "step": 55281
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4804927110671997,
      "learning_rate": 0.000518731802507561,
      "loss": 3.2598,
      "step": 55282
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6578242778778076,
      "learning_rate": 0.0005187290029032842,
      "loss": 2.8848,
      "step": 55283
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.542385220527649,
      "learning_rate": 0.0005187262032583414,
      "loss": 2.8398,
      "step": 55284
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.572526216506958,
      "learning_rate": 0.0005187234035727334,
      "loss": 3.013,
      "step": 55285
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9576128721237183,
      "learning_rate": 0.0005187206038464605,
      "loss": 3.0652,
      "step": 55286
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7671760320663452,
      "learning_rate": 0.0005187178040795235,
      "loss": 3.2427,
      "step": 55287
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.344882607460022,
      "learning_rate": 0.0005187150042719225,
      "loss": 3.2307,
      "step": 55288
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.636781096458435,
      "learning_rate": 0.0005187122044236583,
      "loss": 3.0966,
      "step": 55289
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6490135192871094,
      "learning_rate": 0.0005187094045347315,
      "loss": 3.1993,
      "step": 55290
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3740692138671875,
      "learning_rate": 0.0005187066046051424,
      "loss": 3.0028,
      "step": 55291
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4390335083007812,
      "learning_rate": 0.0005187038046348916,
      "loss": 2.8417,
      "step": 55292
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7580496072769165,
      "learning_rate": 0.0005187010046239797,
      "loss": 3.1067,
      "step": 55293
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8116458654403687,
      "learning_rate": 0.000518698204572407,
      "loss": 2.9442,
      "step": 55294
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.2254366874694824,
      "learning_rate": 0.0005186954044801744,
      "loss": 3.1913,
      "step": 55295
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.632633090019226,
      "learning_rate": 0.000518692604347282,
      "loss": 2.8503,
      "step": 55296
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5717463493347168,
      "learning_rate": 0.0005186898041737306,
      "loss": 3.0079,
      "step": 55297
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.952357292175293,
      "learning_rate": 0.0005186870039595206,
      "loss": 3.0296,
      "step": 55298
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.669114112854004,
      "learning_rate": 0.0005186842037046526,
      "loss": 3.1528,
      "step": 55299
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7603012323379517,
      "learning_rate": 0.0005186814034091272,
      "loss": 2.9814,
      "step": 55300
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3605194091796875,
      "learning_rate": 0.0005186786030729446,
      "loss": 2.9344,
      "step": 55301
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6824854612350464,
      "learning_rate": 0.0005186758026961056,
      "loss": 3.0655,
      "step": 55302
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.482869267463684,
      "learning_rate": 0.0005186730022786106,
      "loss": 3.1442,
      "step": 55303
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.2740395069122314,
      "learning_rate": 0.0005186702018204602,
      "loss": 2.99,
      "step": 55304
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.22481632232666,
      "learning_rate": 0.0005186674013216548,
      "loss": 3.0993,
      "step": 55305
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9292536973953247,
      "learning_rate": 0.0005186646007821952,
      "loss": 2.8111,
      "step": 55306
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9419662952423096,
      "learning_rate": 0.0005186618002020816,
      "loss": 2.9846,
      "step": 55307
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.822713613510132,
      "learning_rate": 0.0005186589995813146,
      "loss": 3.2639,
      "step": 55308
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.016432285308838,
      "learning_rate": 0.0005186561989198948,
      "loss": 2.9549,
      "step": 55309
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.392328143119812,
      "learning_rate": 0.0005186533982178228,
      "loss": 2.8269,
      "step": 55310
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4091787338256836,
      "learning_rate": 0.0005186505974750987,
      "loss": 3.0147,
      "step": 55311
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4000240564346313,
      "learning_rate": 0.0005186477966917237,
      "loss": 3.0633,
      "step": 55312
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3250374794006348,
      "learning_rate": 0.0005186449958676977,
      "loss": 3.1729,
      "step": 55313
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9305155277252197,
      "learning_rate": 0.0005186421950030216,
      "loss": 2.9427,
      "step": 55314
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.863832950592041,
      "learning_rate": 0.0005186393940976957,
      "loss": 3.2307,
      "step": 55315
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.4823246002197266,
      "learning_rate": 0.0005186365931517206,
      "loss": 2.9307,
      "step": 55316
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.615952730178833,
      "learning_rate": 0.000518633792165097,
      "loss": 2.8983,
      "step": 55317
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.010281801223755,
      "learning_rate": 0.0005186309911378251,
      "loss": 3.0737,
      "step": 55318
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7219409942626953,
      "learning_rate": 0.0005186281900699056,
      "loss": 2.5883,
      "step": 55319
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.510648250579834,
      "learning_rate": 0.0005186253889613389,
      "loss": 3.0311,
      "step": 55320
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6062792539596558,
      "learning_rate": 0.0005186225878121257,
      "loss": 3.2703,
      "step": 55321
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5154088735580444,
      "learning_rate": 0.0005186197866222665,
      "loss": 2.9171,
      "step": 55322
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4044954776763916,
      "learning_rate": 0.0005186169853917616,
      "loss": 3.1154,
      "step": 55323
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.354741096496582,
      "learning_rate": 0.0005186141841206118,
      "loss": 3.0629,
      "step": 55324
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9383753538131714,
      "learning_rate": 0.0005186113828088175,
      "loss": 2.9957,
      "step": 55325
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.359567642211914,
      "learning_rate": 0.0005186085814563792,
      "loss": 2.8057,
      "step": 55326
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.44050931930542,
      "learning_rate": 0.0005186057800632974,
      "loss": 3.2116,
      "step": 55327
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.469162106513977,
      "learning_rate": 0.0005186029786295726,
      "loss": 2.7707,
      "step": 55328
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8804161548614502,
      "learning_rate": 0.0005186001771552055,
      "loss": 3.1554,
      "step": 55329
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0054330825805664,
      "learning_rate": 0.0005185973756401964,
      "loss": 2.9613,
      "step": 55330
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5119473934173584,
      "learning_rate": 0.000518594574084546,
      "loss": 3.1083,
      "step": 55331
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.70470130443573,
      "learning_rate": 0.0005185917724882548,
      "loss": 2.9675,
      "step": 55332
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9527308940887451,
      "learning_rate": 0.0005185889708513231,
      "loss": 3.2883,
      "step": 55333
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.4168193340301514,
      "learning_rate": 0.0005185861691737517,
      "loss": 3.2462,
      "step": 55334
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8224314451217651,
      "learning_rate": 0.0005185833674555409,
      "loss": 3.2467,
      "step": 55335
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4320887327194214,
      "learning_rate": 0.0005185805656966915,
      "loss": 3.2262,
      "step": 55336
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.373167037963867,
      "learning_rate": 0.0005185777638972037,
      "loss": 3.1184,
      "step": 55337
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4287267923355103,
      "learning_rate": 0.0005185749620570783,
      "loss": 2.5551,
      "step": 55338
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8053861856460571,
      "learning_rate": 0.0005185721601763156,
      "loss": 2.8837,
      "step": 55339
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7281829118728638,
      "learning_rate": 0.0005185693582549162,
      "loss": 3.0015,
      "step": 55340
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.258208990097046,
      "learning_rate": 0.0005185665562928806,
      "loss": 3.1147,
      "step": 55341
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7804203033447266,
      "learning_rate": 0.0005185637542902094,
      "loss": 2.9904,
      "step": 55342
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3347244262695312,
      "learning_rate": 0.000518560952246903,
      "loss": 2.8602,
      "step": 55343
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.953786849975586,
      "learning_rate": 0.000518558150162962,
      "loss": 2.8741,
      "step": 55344
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.4705803394317627,
      "learning_rate": 0.000518555348038387,
      "loss": 3.1091,
      "step": 55345
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.476535439491272,
      "learning_rate": 0.0005185525458731785,
      "loss": 3.142,
      "step": 55346
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7246118783950806,
      "learning_rate": 0.0005185497436673368,
      "loss": 3.0707,
      "step": 55347
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8906373977661133,
      "learning_rate": 0.0005185469414208628,
      "loss": 3.2,
      "step": 55348
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7271957397460938,
      "learning_rate": 0.0005185441391337565,
      "loss": 3.146,
      "step": 55349
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5578879117965698,
      "learning_rate": 0.0005185413368060189,
      "loss": 2.9995,
      "step": 55350
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7310781478881836,
      "learning_rate": 0.0005185385344376503,
      "loss": 3.1161,
      "step": 55351
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.812427043914795,
      "learning_rate": 0.0005185357320286512,
      "loss": 3.1695,
      "step": 55352
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6326345205307007,
      "learning_rate": 0.0005185329295790223,
      "loss": 2.9754,
      "step": 55353
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4873781204223633,
      "learning_rate": 0.000518530127088764,
      "loss": 3.0533,
      "step": 55354
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.342777967453003,
      "learning_rate": 0.0005185273245578767,
      "loss": 2.881,
      "step": 55355
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1197433471679688,
      "learning_rate": 0.0005185245219863611,
      "loss": 3.1139,
      "step": 55356
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5316357612609863,
      "learning_rate": 0.0005185217193742176,
      "loss": 2.8256,
      "step": 55357
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.051849842071533,
      "learning_rate": 0.0005185189167214471,
      "loss": 3.1011,
      "step": 55358
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.068542957305908,
      "learning_rate": 0.0005185161140280495,
      "loss": 3.0967,
      "step": 55359
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4479103088378906,
      "learning_rate": 0.0005185133112940257,
      "loss": 3.0964,
      "step": 55360
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6086537837982178,
      "learning_rate": 0.0005185105085193762,
      "loss": 3.0366,
      "step": 55361
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5549758672714233,
      "learning_rate": 0.0005185077057041014,
      "loss": 3.0573,
      "step": 55362
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.635328769683838,
      "learning_rate": 0.0005185049028482019,
      "loss": 2.9926,
      "step": 55363
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.595710277557373,
      "learning_rate": 0.0005185020999516782,
      "loss": 2.8458,
      "step": 55364
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4821521043777466,
      "learning_rate": 0.0005184992970145309,
      "loss": 2.8254,
      "step": 55365
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5292549133300781,
      "learning_rate": 0.0005184964940367604,
      "loss": 3.0535,
      "step": 55366
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3338886499404907,
      "learning_rate": 0.0005184936910183673,
      "loss": 2.9679,
      "step": 55367
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5654997825622559,
      "learning_rate": 0.0005184908879593521,
      "loss": 2.9894,
      "step": 55368
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4323620796203613,
      "learning_rate": 0.0005184880848597153,
      "loss": 3.0868,
      "step": 55369
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7543009519577026,
      "learning_rate": 0.0005184852817194575,
      "loss": 3.0846,
      "step": 55370
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3463209867477417,
      "learning_rate": 0.0005184824785385792,
      "loss": 3.0761,
      "step": 55371
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4243533611297607,
      "learning_rate": 0.0005184796753170808,
      "loss": 3.0045,
      "step": 55372
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8330241441726685,
      "learning_rate": 0.0005184768720549629,
      "loss": 3.0595,
      "step": 55373
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6073493957519531,
      "learning_rate": 0.000518474068752226,
      "loss": 2.8746,
      "step": 55374
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2949857711791992,
      "learning_rate": 0.0005184712654088707,
      "loss": 2.8126,
      "step": 55375
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9141817092895508,
      "learning_rate": 0.0005184684620248975,
      "loss": 3.0219,
      "step": 55376
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6862974166870117,
      "learning_rate": 0.0005184656586003068,
      "loss": 3.239,
      "step": 55377
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3705811500549316,
      "learning_rate": 0.0005184628551350993,
      "loss": 3.1753,
      "step": 55378
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6912097930908203,
      "learning_rate": 0.0005184600516292753,
      "loss": 3.2242,
      "step": 55379
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7154135704040527,
      "learning_rate": 0.0005184572480828356,
      "loss": 3.2893,
      "step": 55380
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3723466396331787,
      "learning_rate": 0.0005184544444957805,
      "loss": 3.2376,
      "step": 55381
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9047313928604126,
      "learning_rate": 0.0005184516408681106,
      "loss": 3.111,
      "step": 55382
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0430984497070312,
      "learning_rate": 0.0005184488371998264,
      "loss": 2.956,
      "step": 55383
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.7357757091522217,
      "learning_rate": 0.0005184460334909284,
      "loss": 3.0903,
      "step": 55384
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4357936382293701,
      "learning_rate": 0.0005184432297414172,
      "loss": 3.0745,
      "step": 55385
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7214434146881104,
      "learning_rate": 0.0005184404259512933,
      "loss": 3.057,
      "step": 55386
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.77874755859375,
      "learning_rate": 0.0005184376221205573,
      "loss": 2.9169,
      "step": 55387
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9037539958953857,
      "learning_rate": 0.0005184348182492095,
      "loss": 2.855,
      "step": 55388
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5363929271697998,
      "learning_rate": 0.0005184320143372506,
      "loss": 3.2868,
      "step": 55389
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.8818421363830566,
      "learning_rate": 0.000518429210384681,
      "loss": 2.9155,
      "step": 55390
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0771677494049072,
      "learning_rate": 0.0005184264063915013,
      "loss": 2.803,
      "step": 55391
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7713348865509033,
      "learning_rate": 0.0005184236023577121,
      "loss": 2.9606,
      "step": 55392
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.69683837890625,
      "learning_rate": 0.0005184207982833137,
      "loss": 3.2618,
      "step": 55393
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.950710415840149,
      "learning_rate": 0.0005184179941683068,
      "loss": 2.9062,
      "step": 55394
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.723533034324646,
      "learning_rate": 0.0005184151900126919,
      "loss": 3.2575,
      "step": 55395
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6510930061340332,
      "learning_rate": 0.0005184123858164694,
      "loss": 3.2288,
      "step": 55396
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4355065822601318,
      "learning_rate": 0.00051840958157964,
      "loss": 2.9623,
      "step": 55397
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4256590604782104,
      "learning_rate": 0.0005184067773022042,
      "loss": 3.008,
      "step": 55398
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1782732009887695,
      "learning_rate": 0.0005184039729841624,
      "loss": 3.0639,
      "step": 55399
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1355624198913574,
      "learning_rate": 0.0005184011686255151,
      "loss": 2.9366,
      "step": 55400
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4895819425582886,
      "learning_rate": 0.000518398364226263,
      "loss": 3.0808,
      "step": 55401
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.2531960010528564,
      "learning_rate": 0.0005183955597864064,
      "loss": 3.2131,
      "step": 55402
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4860663414001465,
      "learning_rate": 0.000518392755305946,
      "loss": 3.1565,
      "step": 55403
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.568512201309204,
      "learning_rate": 0.0005183899507848823,
      "loss": 3.0911,
      "step": 55404
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9003772735595703,
      "learning_rate": 0.0005183871462232159,
      "loss": 2.7677,
      "step": 55405
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.2243754863739014,
      "learning_rate": 0.000518384341620947,
      "loss": 3.3312,
      "step": 55406
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5222541093826294,
      "learning_rate": 0.0005183815369780764,
      "loss": 2.894,
      "step": 55407
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4838156700134277,
      "learning_rate": 0.0005183787322946045,
      "loss": 3.1402,
      "step": 55408
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4381849765777588,
      "learning_rate": 0.000518375927570532,
      "loss": 3.3898,
      "step": 55409
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.488966464996338,
      "learning_rate": 0.0005183731228058592,
      "loss": 2.9788,
      "step": 55410
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5306143760681152,
      "learning_rate": 0.0005183703180005868,
      "loss": 3.1197,
      "step": 55411
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6065887212753296,
      "learning_rate": 0.0005183675131547152,
      "loss": 3.1994,
      "step": 55412
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4539591073989868,
      "learning_rate": 0.0005183647082682449,
      "loss": 3.1493,
      "step": 55413
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5429407358169556,
      "learning_rate": 0.0005183619033411764,
      "loss": 2.9343,
      "step": 55414
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0510313510894775,
      "learning_rate": 0.0005183590983735104,
      "loss": 2.8084,
      "step": 55415
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6202260255813599,
      "learning_rate": 0.0005183562933652474,
      "loss": 3.1641,
      "step": 55416
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5275877714157104,
      "learning_rate": 0.0005183534883163878,
      "loss": 3.2771,
      "step": 55417
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8277826309204102,
      "learning_rate": 0.0005183506832269322,
      "loss": 3.1966,
      "step": 55418
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5021460056304932,
      "learning_rate": 0.000518347878096881,
      "loss": 3.108,
      "step": 55419
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.515697717666626,
      "learning_rate": 0.0005183450729262348,
      "loss": 2.9386,
      "step": 55420
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.628775715827942,
      "learning_rate": 0.0005183422677149942,
      "loss": 2.9536,
      "step": 55421
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1245338916778564,
      "learning_rate": 0.0005183394624631595,
      "loss": 3.077,
      "step": 55422
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5463179349899292,
      "learning_rate": 0.0005183366571707316,
      "loss": 2.9456,
      "step": 55423
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0201830863952637,
      "learning_rate": 0.0005183338518377107,
      "loss": 2.8364,
      "step": 55424
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.639879822731018,
      "learning_rate": 0.0005183310464640973,
      "loss": 3.1031,
      "step": 55425
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5509223937988281,
      "learning_rate": 0.0005183282410498922,
      "loss": 3.0845,
      "step": 55426
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.465735673904419,
      "learning_rate": 0.0005183254355950956,
      "loss": 2.9182,
      "step": 55427
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8938961029052734,
      "learning_rate": 0.0005183226300997083,
      "loss": 3.1354,
      "step": 55428
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.828848958015442,
      "learning_rate": 0.0005183198245637307,
      "loss": 3.1558,
      "step": 55429
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5275192260742188,
      "learning_rate": 0.0005183170189871633,
      "loss": 3.2313,
      "step": 55430
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1975417137145996,
      "learning_rate": 0.0005183142133700066,
      "loss": 3.0036,
      "step": 55431
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1738154888153076,
      "learning_rate": 0.0005183114077122612,
      "loss": 2.9448,
      "step": 55432
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4109737873077393,
      "learning_rate": 0.0005183086020139276,
      "loss": 2.8655,
      "step": 55433
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.886969566345215,
      "learning_rate": 0.0005183057962750064,
      "loss": 2.7033,
      "step": 55434
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9586375951766968,
      "learning_rate": 0.0005183029904954979,
      "loss": 3.142,
      "step": 55435
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5272808074951172,
      "learning_rate": 0.0005183001846754028,
      "loss": 2.9474,
      "step": 55436
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5871188640594482,
      "learning_rate": 0.0005182973788147215,
      "loss": 2.9535,
      "step": 55437
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.154651165008545,
      "learning_rate": 0.0005182945729134546,
      "loss": 3.2666,
      "step": 55438
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3345720767974854,
      "learning_rate": 0.0005182917669716028,
      "loss": 3.0791,
      "step": 55439
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.970581293106079,
      "learning_rate": 0.0005182889609891663,
      "loss": 3.0751,
      "step": 55440
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5515872240066528,
      "learning_rate": 0.0005182861549661459,
      "loss": 3.2746,
      "step": 55441
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.639981269836426,
      "learning_rate": 0.0005182833489025418,
      "loss": 3.1123,
      "step": 55442
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.9773547649383545,
      "learning_rate": 0.0005182805427983547,
      "loss": 2.8985,
      "step": 55443
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4922106266021729,
      "learning_rate": 0.0005182777366535852,
      "loss": 2.928,
      "step": 55444
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6052180528640747,
      "learning_rate": 0.0005182749304682338,
      "loss": 2.9343,
      "step": 55445
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.2260890007019043,
      "learning_rate": 0.0005182721242423009,
      "loss": 3.1411,
      "step": 55446
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.477221727371216,
      "learning_rate": 0.0005182693179757871,
      "loss": 2.9396,
      "step": 55447
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5044910907745361,
      "learning_rate": 0.000518266511668693,
      "loss": 3.0648,
      "step": 55448
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4623546600341797,
      "learning_rate": 0.0005182637053210189,
      "loss": 2.9077,
      "step": 55449
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5590057373046875,
      "learning_rate": 0.0005182608989327656,
      "loss": 3.0006,
      "step": 55450
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4772632122039795,
      "learning_rate": 0.0005182580925039333,
      "loss": 2.943,
      "step": 55451
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9685384035110474,
      "learning_rate": 0.0005182552860345228,
      "loss": 3.0168,
      "step": 55452
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4401112794876099,
      "learning_rate": 0.0005182524795245345,
      "loss": 2.8198,
      "step": 55453
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6321747303009033,
      "learning_rate": 0.000518249672973969,
      "loss": 3.2252,
      "step": 55454
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8684983253479004,
      "learning_rate": 0.0005182468663828268,
      "loss": 3.0924,
      "step": 55455
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9721989631652832,
      "learning_rate": 0.0005182440597511082,
      "loss": 3.0957,
      "step": 55456
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.523832321166992,
      "learning_rate": 0.0005182412530788141,
      "loss": 2.9554,
      "step": 55457
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.506065011024475,
      "learning_rate": 0.0005182384463659447,
      "loss": 3.1517,
      "step": 55458
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7202823162078857,
      "learning_rate": 0.0005182356396125007,
      "loss": 3.0504,
      "step": 55459
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.6154427528381348,
      "learning_rate": 0.0005182328328184826,
      "loss": 3.0221,
      "step": 55460
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.538522720336914,
      "learning_rate": 0.0005182300259838909,
      "loss": 3.0625,
      "step": 55461
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5933473110198975,
      "learning_rate": 0.0005182272191087261,
      "loss": 3.3712,
      "step": 55462
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.045179843902588,
      "learning_rate": 0.0005182244121929887,
      "loss": 3.0671,
      "step": 55463
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.973508596420288,
      "learning_rate": 0.0005182216052366794,
      "loss": 3.0109,
      "step": 55464
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.551069498062134,
      "learning_rate": 0.0005182187982397985,
      "loss": 2.924,
      "step": 55465
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8367656469345093,
      "learning_rate": 0.0005182159912023465,
      "loss": 3.0497,
      "step": 55466
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.023858070373535,
      "learning_rate": 0.0005182131841243242,
      "loss": 3.0211,
      "step": 55467
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.880941152572632,
      "learning_rate": 0.0005182103770057318,
      "loss": 2.7551,
      "step": 55468
    },
    {
      "epoch": 0.72,
      "grad_norm": 4.044973373413086,
      "learning_rate": 0.00051820756984657,
      "loss": 2.9223,
      "step": 55469
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0320687294006348,
      "learning_rate": 0.0005182047626468394,
      "loss": 3.0916,
      "step": 55470
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2830936908721924,
      "learning_rate": 0.0005182019554065404,
      "loss": 2.9449,
      "step": 55471
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.683687925338745,
      "learning_rate": 0.0005181991481256735,
      "loss": 3.112,
      "step": 55472
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8990744352340698,
      "learning_rate": 0.0005181963408042392,
      "loss": 3.0366,
      "step": 55473
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9968907833099365,
      "learning_rate": 0.0005181935334422381,
      "loss": 3.2426,
      "step": 55474
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4425102472305298,
      "learning_rate": 0.0005181907260396707,
      "loss": 3.1103,
      "step": 55475
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7741155624389648,
      "learning_rate": 0.0005181879185965376,
      "loss": 3.1829,
      "step": 55476
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.770705223083496,
      "learning_rate": 0.0005181851111128393,
      "loss": 2.895,
      "step": 55477
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9983412027359009,
      "learning_rate": 0.0005181823035885761,
      "loss": 2.8814,
      "step": 55478
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6831649541854858,
      "learning_rate": 0.0005181794960237488,
      "loss": 3.2892,
      "step": 55479
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.642719030380249,
      "learning_rate": 0.0005181766884183578,
      "loss": 2.9135,
      "step": 55480
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.49782395362854,
      "learning_rate": 0.0005181738807724035,
      "loss": 3.0186,
      "step": 55481
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6398265361785889,
      "learning_rate": 0.0005181710730858868,
      "loss": 2.9261,
      "step": 55482
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8057781457901,
      "learning_rate": 0.0005181682653588078,
      "loss": 2.9416,
      "step": 55483
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2803864479064941,
      "learning_rate": 0.0005181654575911672,
      "loss": 2.8274,
      "step": 55484
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4676389694213867,
      "learning_rate": 0.0005181626497829656,
      "loss": 3.0875,
      "step": 55485
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6184359788894653,
      "learning_rate": 0.0005181598419342035,
      "loss": 3.1595,
      "step": 55486
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4561090469360352,
      "learning_rate": 0.0005181570340448812,
      "loss": 3.1305,
      "step": 55487
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5035923719406128,
      "learning_rate": 0.0005181542261149995,
      "loss": 3.4137,
      "step": 55488
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4627504348754883,
      "learning_rate": 0.0005181514181445588,
      "loss": 3.1824,
      "step": 55489
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.973680019378662,
      "learning_rate": 0.0005181486101335596,
      "loss": 2.9935,
      "step": 55490
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4082622528076172,
      "learning_rate": 0.0005181458020820025,
      "loss": 3.0705,
      "step": 55491
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5908950567245483,
      "learning_rate": 0.000518142993989888,
      "loss": 3.136,
      "step": 55492
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3725061416625977,
      "learning_rate": 0.0005181401858572165,
      "loss": 2.9462,
      "step": 55493
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.56795072555542,
      "learning_rate": 0.0005181373776839888,
      "loss": 2.8509,
      "step": 55494
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4535757303237915,
      "learning_rate": 0.000518134569470205,
      "loss": 2.8052,
      "step": 55495
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7874302864074707,
      "learning_rate": 0.0005181317612158661,
      "loss": 3.1161,
      "step": 55496
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.559116244316101,
      "learning_rate": 0.0005181289529209722,
      "loss": 3.1787,
      "step": 55497
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.697006106376648,
      "learning_rate": 0.0005181261445855242,
      "loss": 3.047,
      "step": 55498
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4554321765899658,
      "learning_rate": 0.0005181233362095222,
      "loss": 3.049,
      "step": 55499
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6092960834503174,
      "learning_rate": 0.0005181205277929672,
      "loss": 2.8403,
      "step": 55500
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8274720907211304,
      "learning_rate": 0.0005181177193358592,
      "loss": 3.2757,
      "step": 55501
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4502613544464111,
      "learning_rate": 0.0005181149108381992,
      "loss": 3.1534,
      "step": 55502
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7308650016784668,
      "learning_rate": 0.0005181121022999874,
      "loss": 2.8732,
      "step": 55503
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7908685207366943,
      "learning_rate": 0.0005181092937212245,
      "loss": 3.1681,
      "step": 55504
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4828540086746216,
      "learning_rate": 0.0005181064851019111,
      "loss": 2.8218,
      "step": 55505
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5239338874816895,
      "learning_rate": 0.0005181036764420474,
      "loss": 3.2887,
      "step": 55506
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6735656261444092,
      "learning_rate": 0.0005181008677416342,
      "loss": 3.0916,
      "step": 55507
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3795591592788696,
      "learning_rate": 0.0005180980590006719,
      "loss": 3.1544,
      "step": 55508
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5987145900726318,
      "learning_rate": 0.000518095250219161,
      "loss": 3.1355,
      "step": 55509
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.823790192604065,
      "learning_rate": 0.0005180924413971022,
      "loss": 2.9993,
      "step": 55510
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0786361694335938,
      "learning_rate": 0.0005180896325344958,
      "loss": 2.8837,
      "step": 55511
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4692026376724243,
      "learning_rate": 0.0005180868236313424,
      "loss": 3.1942,
      "step": 55512
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5324082374572754,
      "learning_rate": 0.0005180840146876427,
      "loss": 2.9878,
      "step": 55513
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1779673099517822,
      "learning_rate": 0.000518081205703397,
      "loss": 2.8567,
      "step": 55514
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6623451709747314,
      "learning_rate": 0.0005180783966786058,
      "loss": 2.9452,
      "step": 55515
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4688820838928223,
      "learning_rate": 0.0005180755876132698,
      "loss": 3.0508,
      "step": 55516
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.359955310821533,
      "learning_rate": 0.0005180727785073894,
      "loss": 3.0178,
      "step": 55517
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6132705211639404,
      "learning_rate": 0.0005180699693609651,
      "loss": 2.808,
      "step": 55518
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8600656986236572,
      "learning_rate": 0.0005180671601739975,
      "loss": 2.7733,
      "step": 55519
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4917266368865967,
      "learning_rate": 0.0005180643509464872,
      "loss": 2.9096,
      "step": 55520
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.30828595161438,
      "learning_rate": 0.0005180615416784345,
      "loss": 2.7076,
      "step": 55521
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.2398290634155273,
      "learning_rate": 0.0005180587323698401,
      "loss": 2.891,
      "step": 55522
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5299996137619019,
      "learning_rate": 0.0005180559230207044,
      "loss": 2.9853,
      "step": 55523
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.2705347537994385,
      "learning_rate": 0.0005180531136310281,
      "loss": 3.0643,
      "step": 55524
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8524365425109863,
      "learning_rate": 0.0005180503042008115,
      "loss": 2.9269,
      "step": 55525
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6996283531188965,
      "learning_rate": 0.0005180474947300553,
      "loss": 3.265,
      "step": 55526
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.942886471748352,
      "learning_rate": 0.0005180446852187599,
      "loss": 3.1822,
      "step": 55527
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1494457721710205,
      "learning_rate": 0.0005180418756669259,
      "loss": 3.1137,
      "step": 55528
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.677340030670166,
      "learning_rate": 0.0005180390660745539,
      "loss": 3.0412,
      "step": 55529
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6273325681686401,
      "learning_rate": 0.0005180362564416442,
      "loss": 3.1002,
      "step": 55530
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1150872707366943,
      "learning_rate": 0.0005180334467681975,
      "loss": 3.1243,
      "step": 55531
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5489087104797363,
      "learning_rate": 0.0005180306370542141,
      "loss": 2.9886,
      "step": 55532
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4023147821426392,
      "learning_rate": 0.000518027827299695,
      "loss": 3.0073,
      "step": 55533
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.4700851440429688,
      "learning_rate": 0.0005180250175046402,
      "loss": 3.2454,
      "step": 55534
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4695402383804321,
      "learning_rate": 0.0005180222076690504,
      "loss": 2.8303,
      "step": 55535
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.703031063079834,
      "learning_rate": 0.0005180193977929263,
      "loss": 3.1476,
      "step": 55536
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4880379438400269,
      "learning_rate": 0.0005180165878762682,
      "loss": 3.1604,
      "step": 55537
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5908291339874268,
      "learning_rate": 0.0005180137779190768,
      "loss": 3.0504,
      "step": 55538
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6417275667190552,
      "learning_rate": 0.0005180109679213524,
      "loss": 3.1486,
      "step": 55539
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6247129440307617,
      "learning_rate": 0.0005180081578830956,
      "loss": 3.0987,
      "step": 55540
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2457778453826904,
      "learning_rate": 0.000518005347804307,
      "loss": 3.002,
      "step": 55541
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4404065608978271,
      "learning_rate": 0.0005180025376849873,
      "loss": 3.0279,
      "step": 55542
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7822340726852417,
      "learning_rate": 0.0005179997275251366,
      "loss": 3.3062,
      "step": 55543
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3242177963256836,
      "learning_rate": 0.0005179969173247556,
      "loss": 3.0985,
      "step": 55544
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5941190719604492,
      "learning_rate": 0.000517994107083845,
      "loss": 2.98,
      "step": 55545
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3728269338607788,
      "learning_rate": 0.0005179912968024052,
      "loss": 2.9027,
      "step": 55546
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2554110288619995,
      "learning_rate": 0.0005179884864804366,
      "loss": 3.1025,
      "step": 55547
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4075591564178467,
      "learning_rate": 0.0005179856761179398,
      "loss": 2.9982,
      "step": 55548
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0273642539978027,
      "learning_rate": 0.0005179828657149155,
      "loss": 3.1806,
      "step": 55549
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6766375303268433,
      "learning_rate": 0.0005179800552713639,
      "loss": 3.1854,
      "step": 55550
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.406425952911377,
      "learning_rate": 0.0005179772447872858,
      "loss": 3.1994,
      "step": 55551
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7154674530029297,
      "learning_rate": 0.0005179744342626815,
      "loss": 2.9962,
      "step": 55552
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4590901136398315,
      "learning_rate": 0.0005179716236975518,
      "loss": 3.0164,
      "step": 55553
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3031935691833496,
      "learning_rate": 0.000517968813091897,
      "loss": 3.0437,
      "step": 55554
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.811398506164551,
      "learning_rate": 0.0005179660024457177,
      "loss": 2.7834,
      "step": 55555
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.703521490097046,
      "learning_rate": 0.0005179631917590143,
      "loss": 3.1789,
      "step": 55556
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1513893604278564,
      "learning_rate": 0.0005179603810317875,
      "loss": 2.9231,
      "step": 55557
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.7539896965026855,
      "learning_rate": 0.0005179575702640378,
      "loss": 3.0449,
      "step": 55558
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.3940212726593018,
      "learning_rate": 0.0005179547594557657,
      "loss": 3.1419,
      "step": 55559
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5839197635650635,
      "learning_rate": 0.0005179519486069716,
      "loss": 3.0055,
      "step": 55560
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8871839046478271,
      "learning_rate": 0.000517949137717656,
      "loss": 3.0056,
      "step": 55561
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.6595065593719482,
      "learning_rate": 0.0005179463267878198,
      "loss": 2.6538,
      "step": 55562
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9381425380706787,
      "learning_rate": 0.000517943515817463,
      "loss": 3.1371,
      "step": 55563
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.854432463645935,
      "learning_rate": 0.0005179407048065866,
      "loss": 3.101,
      "step": 55564
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8697290420532227,
      "learning_rate": 0.0005179378937551908,
      "loss": 3.001,
      "step": 55565
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.757704257965088,
      "learning_rate": 0.0005179350826632764,
      "loss": 3.2319,
      "step": 55566
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3865201473236084,
      "learning_rate": 0.0005179322715308435,
      "loss": 2.8526,
      "step": 55567
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8171378374099731,
      "learning_rate": 0.0005179294603578931,
      "loss": 3.0047,
      "step": 55568
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0708513259887695,
      "learning_rate": 0.0005179266491444254,
      "loss": 2.9209,
      "step": 55569
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4302786588668823,
      "learning_rate": 0.0005179238378904409,
      "loss": 3.0378,
      "step": 55570
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8739511966705322,
      "learning_rate": 0.0005179210265959405,
      "loss": 3.0613,
      "step": 55571
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4148589372634888,
      "learning_rate": 0.0005179182152609243,
      "loss": 2.8538,
      "step": 55572
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8817741870880127,
      "learning_rate": 0.000517915403885393,
      "loss": 3.0062,
      "step": 55573
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.2673771381378174,
      "learning_rate": 0.0005179125924693472,
      "loss": 3.2577,
      "step": 55574
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5293651819229126,
      "learning_rate": 0.0005179097810127872,
      "loss": 2.898,
      "step": 55575
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5093549489974976,
      "learning_rate": 0.0005179069695157139,
      "loss": 3.1845,
      "step": 55576
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5826597213745117,
      "learning_rate": 0.0005179041579781273,
      "loss": 2.9951,
      "step": 55577
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6437221765518188,
      "learning_rate": 0.0005179013464000283,
      "loss": 3.2113,
      "step": 55578
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4156090021133423,
      "learning_rate": 0.0005178985347814173,
      "loss": 3.117,
      "step": 55579
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4761215448379517,
      "learning_rate": 0.000517895723122295,
      "loss": 3.3185,
      "step": 55580
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0620172023773193,
      "learning_rate": 0.0005178929114226617,
      "loss": 2.9958,
      "step": 55581
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4269130229949951,
      "learning_rate": 0.0005178900996825179,
      "loss": 3.1207,
      "step": 55582
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4500597715377808,
      "learning_rate": 0.0005178872879018643,
      "loss": 3.2935,
      "step": 55583
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9434940814971924,
      "learning_rate": 0.0005178844760807014,
      "loss": 3.0205,
      "step": 55584
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.14180326461792,
      "learning_rate": 0.0005178816642190295,
      "loss": 3.0059,
      "step": 55585
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.543429970741272,
      "learning_rate": 0.0005178788523168493,
      "loss": 3.0495,
      "step": 55586
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8817535638809204,
      "learning_rate": 0.0005178760403741615,
      "loss": 3.0817,
      "step": 55587
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.0068559646606445,
      "learning_rate": 0.0005178732283909663,
      "loss": 2.8512,
      "step": 55588
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.015143394470215,
      "learning_rate": 0.0005178704163672643,
      "loss": 3.015,
      "step": 55589
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3623650074005127,
      "learning_rate": 0.0005178676043030561,
      "loss": 3.1406,
      "step": 55590
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.028158187866211,
      "learning_rate": 0.0005178647921983423,
      "loss": 3.0976,
      "step": 55591
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.1660168170928955,
      "learning_rate": 0.0005178619800531233,
      "loss": 2.7668,
      "step": 55592
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5522661209106445,
      "learning_rate": 0.0005178591678673996,
      "loss": 3.0902,
      "step": 55593
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6071062088012695,
      "learning_rate": 0.0005178563556411718,
      "loss": 3.3847,
      "step": 55594
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3127338886260986,
      "learning_rate": 0.0005178535433744403,
      "loss": 3.2417,
      "step": 55595
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.784872055053711,
      "learning_rate": 0.0005178507310672058,
      "loss": 3.0076,
      "step": 55596
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0007729530334473,
      "learning_rate": 0.0005178479187194687,
      "loss": 2.8696,
      "step": 55597
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6438932418823242,
      "learning_rate": 0.0005178451063312296,
      "loss": 3.0581,
      "step": 55598
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9763458967208862,
      "learning_rate": 0.000517842293902489,
      "loss": 2.8949,
      "step": 55599
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5210822820663452,
      "learning_rate": 0.0005178394814332473,
      "loss": 3.008,
      "step": 55600
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.440379023551941,
      "learning_rate": 0.0005178366689235052,
      "loss": 3.2205,
      "step": 55601
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0284316539764404,
      "learning_rate": 0.0005178338563732631,
      "loss": 2.8717,
      "step": 55602
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3354586362838745,
      "learning_rate": 0.0005178310437825216,
      "loss": 3.4119,
      "step": 55603
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.647856593132019,
      "learning_rate": 0.0005178282311512813,
      "loss": 3.0548,
      "step": 55604
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7993321418762207,
      "learning_rate": 0.0005178254184795425,
      "loss": 3.0472,
      "step": 55605
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.0679757595062256,
      "learning_rate": 0.0005178226057673059,
      "loss": 3.0237,
      "step": 55606
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.600989580154419,
      "learning_rate": 0.000517819793014572,
      "loss": 3.0848,
      "step": 55607
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6344761848449707,
      "learning_rate": 0.0005178169802213412,
      "loss": 3.2776,
      "step": 55608
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8086508512496948,
      "learning_rate": 0.0005178141673876141,
      "loss": 3.1455,
      "step": 55609
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7878106832504272,
      "learning_rate": 0.0005178113545133914,
      "loss": 2.8475,
      "step": 55610
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3605268001556396,
      "learning_rate": 0.0005178085415986733,
      "loss": 2.87,
      "step": 55611
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8885446786880493,
      "learning_rate": 0.0005178057286434604,
      "loss": 2.9029,
      "step": 55612
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8451759815216064,
      "learning_rate": 0.0005178029156477534,
      "loss": 2.8209,
      "step": 55613
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5339248180389404,
      "learning_rate": 0.0005178001026115528,
      "loss": 3.069,
      "step": 55614
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.562870979309082,
      "learning_rate": 0.000517797289534859,
      "loss": 3.0242,
      "step": 55615
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.540427327156067,
      "learning_rate": 0.0005177944764176726,
      "loss": 3.0689,
      "step": 55616
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.3252931833267212,
      "learning_rate": 0.000517791663259994,
      "loss": 3.0018,
      "step": 55617
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4342420101165771,
      "learning_rate": 0.000517788850061824,
      "loss": 2.7861,
      "step": 55618
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5235257148742676,
      "learning_rate": 0.0005177860368231628,
      "loss": 3.2403,
      "step": 55619
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.865790843963623,
      "learning_rate": 0.0005177832235440111,
      "loss": 2.9108,
      "step": 55620
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4384140968322754,
      "learning_rate": 0.0005177804102243694,
      "loss": 3.0229,
      "step": 55621
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4676119089126587,
      "learning_rate": 0.0005177775968642381,
      "loss": 3.0107,
      "step": 55622
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9747793674468994,
      "learning_rate": 0.0005177747834636179,
      "loss": 2.8951,
      "step": 55623
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3220760822296143,
      "learning_rate": 0.0005177719700225094,
      "loss": 3.0057,
      "step": 55624
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.4662086963653564,
      "learning_rate": 0.0005177691565409128,
      "loss": 3.0078,
      "step": 55625
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6464511156082153,
      "learning_rate": 0.0005177663430188289,
      "loss": 2.8412,
      "step": 55626
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.965201735496521,
      "learning_rate": 0.0005177635294562581,
      "loss": 2.5562,
      "step": 55627
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6999280452728271,
      "learning_rate": 0.000517760715853201,
      "loss": 3.1079,
      "step": 55628
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.96569561958313,
      "learning_rate": 0.0005177579022096579,
      "loss": 3.0166,
      "step": 55629
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6917097568511963,
      "learning_rate": 0.0005177550885256297,
      "loss": 2.8593,
      "step": 55630
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6388589143753052,
      "learning_rate": 0.0005177522748011166,
      "loss": 3.1163,
      "step": 55631
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.484989881515503,
      "learning_rate": 0.0005177494610361192,
      "loss": 3.1812,
      "step": 55632
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.966497540473938,
      "learning_rate": 0.0005177466472306381,
      "loss": 3.2629,
      "step": 55633
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6373505592346191,
      "learning_rate": 0.0005177438333846739,
      "loss": 3.1553,
      "step": 55634
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.9341859817504883,
      "learning_rate": 0.0005177410194982269,
      "loss": 3.0186,
      "step": 55635
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.141206979751587,
      "learning_rate": 0.0005177382055712978,
      "loss": 3.0817,
      "step": 55636
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.7996201515197754,
      "learning_rate": 0.000517735391603887,
      "loss": 3.0698,
      "step": 55637
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7803938388824463,
      "learning_rate": 0.000517732577595995,
      "loss": 2.9672,
      "step": 55638
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.6068313121795654,
      "learning_rate": 0.0005177297635476226,
      "loss": 3.0377,
      "step": 55639
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1423258781433105,
      "learning_rate": 0.00051772694945877,
      "loss": 2.9633,
      "step": 55640
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4071255922317505,
      "learning_rate": 0.0005177241353294379,
      "loss": 2.939,
      "step": 55641
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.71534264087677,
      "learning_rate": 0.0005177213211596268,
      "loss": 3.1695,
      "step": 55642
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1807193756103516,
      "learning_rate": 0.0005177185069493371,
      "loss": 2.8583,
      "step": 55643
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.702409267425537,
      "learning_rate": 0.0005177156926985695,
      "loss": 2.935,
      "step": 55644
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.531353235244751,
      "learning_rate": 0.0005177128784073245,
      "loss": 3.1891,
      "step": 55645
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5601840019226074,
      "learning_rate": 0.0005177100640756025,
      "loss": 2.8315,
      "step": 55646
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.395568370819092,
      "learning_rate": 0.0005177072497034041,
      "loss": 3.0255,
      "step": 55647
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.680259346961975,
      "learning_rate": 0.0005177044352907297,
      "loss": 2.8756,
      "step": 55648
    },
    {
      "epoch": 0.72,
      "grad_norm": 3.4288759231567383,
      "learning_rate": 0.00051770162083758,
      "loss": 3.1494,
      "step": 55649
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3968162536621094,
      "learning_rate": 0.0005176988063439555,
      "loss": 2.8632,
      "step": 55650
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1407299041748047,
      "learning_rate": 0.0005176959918098567,
      "loss": 3.2109,
      "step": 55651
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.363814115524292,
      "learning_rate": 0.000517693177235284,
      "loss": 3.1577,
      "step": 55652
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.585843563079834,
      "learning_rate": 0.0005176903626202381,
      "loss": 2.7161,
      "step": 55653
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.2179067134857178,
      "learning_rate": 0.0005176875479647195,
      "loss": 3.0571,
      "step": 55654
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.2908029556274414,
      "learning_rate": 0.0005176847332687286,
      "loss": 3.1407,
      "step": 55655
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.652356505393982,
      "learning_rate": 0.0005176819185322661,
      "loss": 3.233,
      "step": 55656
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5046175718307495,
      "learning_rate": 0.0005176791037553323,
      "loss": 3.2686,
      "step": 55657
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.912367343902588,
      "learning_rate": 0.0005176762889379278,
      "loss": 2.8467,
      "step": 55658
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.4603469371795654,
      "learning_rate": 0.0005176734740800533,
      "loss": 2.7151,
      "step": 55659
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6053423881530762,
      "learning_rate": 0.0005176706591817092,
      "loss": 3.0199,
      "step": 55660
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.85035240650177,
      "learning_rate": 0.000517667844242896,
      "loss": 3.0039,
      "step": 55661
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6842689514160156,
      "learning_rate": 0.0005176650292636142,
      "loss": 3.1984,
      "step": 55662
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7965950965881348,
      "learning_rate": 0.0005176622142438644,
      "loss": 2.8309,
      "step": 55663
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.13616681098938,
      "learning_rate": 0.000517659399183647,
      "loss": 3.0371,
      "step": 55664
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.5666871070861816,
      "learning_rate": 0.0005176565840829627,
      "loss": 2.8241,
      "step": 55665
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4738328456878662,
      "learning_rate": 0.0005176537689418119,
      "loss": 3.2105,
      "step": 55666
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.1610028743743896,
      "learning_rate": 0.0005176509537601951,
      "loss": 2.8901,
      "step": 55667
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.3442282676696777,
      "learning_rate": 0.000517648138538113,
      "loss": 3.0386,
      "step": 55668
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5416816473007202,
      "learning_rate": 0.0005176453232755659,
      "loss": 2.9796,
      "step": 55669
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.4140009880065918,
      "learning_rate": 0.0005176425079725544,
      "loss": 2.9678,
      "step": 55670
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8813790082931519,
      "learning_rate": 0.0005176396926290792,
      "loss": 3.0575,
      "step": 55671
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.5213614702224731,
      "learning_rate": 0.0005176368772451406,
      "loss": 3.1068,
      "step": 55672
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7567510604858398,
      "learning_rate": 0.0005176340618207392,
      "loss": 3.1731,
      "step": 55673
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6448763608932495,
      "learning_rate": 0.0005176312463558754,
      "loss": 3.1401,
      "step": 55674
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.6405882835388184,
      "learning_rate": 0.00051762843085055,
      "loss": 3.1606,
      "step": 55675
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.333526611328125,
      "learning_rate": 0.0005176256153047633,
      "loss": 3.014,
      "step": 55676
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.8425049781799316,
      "learning_rate": 0.0005176227997185159,
      "loss": 3.0989,
      "step": 55677
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.7493531703948975,
      "learning_rate": 0.0005176199840918082,
      "loss": 2.8673,
      "step": 55678
    },
    {
      "epoch": 0.72,
      "grad_norm": 2.01750111579895,
      "learning_rate": 0.0005176171684246412,
      "loss": 3.1516,
      "step": 55679
    },
    {
      "epoch": 0.72,
      "grad_norm": 1.487920880317688,
      "learning_rate": 0.0005176143527170147,
      "loss": 2.9165,
      "step": 55680
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5530346632003784,
      "learning_rate": 0.0005176115369689297,
      "loss": 3.1092,
      "step": 55681
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8423949480056763,
      "learning_rate": 0.0005176087211803866,
      "loss": 3.0711,
      "step": 55682
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9554555416107178,
      "learning_rate": 0.000517605905351386,
      "loss": 2.992,
      "step": 55683
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.838005542755127,
      "learning_rate": 0.0005176030894819282,
      "loss": 3.0177,
      "step": 55684
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8345348834991455,
      "learning_rate": 0.0005176002735720139,
      "loss": 2.9339,
      "step": 55685
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.389663577079773,
      "learning_rate": 0.0005175974576216438,
      "loss": 3.0535,
      "step": 55686
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.375983715057373,
      "learning_rate": 0.000517594641630818,
      "loss": 2.8261,
      "step": 55687
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4095557928085327,
      "learning_rate": 0.0005175918255995375,
      "loss": 2.8286,
      "step": 55688
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.549621343612671,
      "learning_rate": 0.0005175890095278023,
      "loss": 3.152,
      "step": 55689
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7232433557510376,
      "learning_rate": 0.0005175861934156134,
      "loss": 2.6137,
      "step": 55690
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5095336437225342,
      "learning_rate": 0.000517583377262971,
      "loss": 2.9696,
      "step": 55691
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.229818105697632,
      "learning_rate": 0.0005175805610698757,
      "loss": 2.9507,
      "step": 55692
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.610192894935608,
      "learning_rate": 0.0005175777448363282,
      "loss": 2.9823,
      "step": 55693
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.672301173210144,
      "learning_rate": 0.0005175749285623288,
      "loss": 2.8736,
      "step": 55694
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6410983800888062,
      "learning_rate": 0.0005175721122478781,
      "loss": 3.1154,
      "step": 55695
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.897671103477478,
      "learning_rate": 0.0005175692958929767,
      "loss": 3.1366,
      "step": 55696
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.4075565338134766,
      "learning_rate": 0.0005175664794976251,
      "loss": 2.9427,
      "step": 55697
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5495270490646362,
      "learning_rate": 0.0005175636630618237,
      "loss": 2.943,
      "step": 55698
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6546823978424072,
      "learning_rate": 0.000517560846585573,
      "loss": 3.1188,
      "step": 55699
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.4381093978881836,
      "learning_rate": 0.0005175580300688739,
      "loss": 3.2306,
      "step": 55700
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4931756258010864,
      "learning_rate": 0.0005175552135117265,
      "loss": 3.1085,
      "step": 55701
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.517088770866394,
      "learning_rate": 0.0005175523969141315,
      "loss": 2.7914,
      "step": 55702
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6479421854019165,
      "learning_rate": 0.0005175495802760894,
      "loss": 2.9543,
      "step": 55703
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8918675184249878,
      "learning_rate": 0.0005175467635976008,
      "loss": 3.1461,
      "step": 55704
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.706827163696289,
      "learning_rate": 0.000517543946878666,
      "loss": 3.0282,
      "step": 55705
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7941665649414062,
      "learning_rate": 0.0005175411301192858,
      "loss": 3.0374,
      "step": 55706
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5201138257980347,
      "learning_rate": 0.0005175383133194605,
      "loss": 2.8506,
      "step": 55707
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5548089742660522,
      "learning_rate": 0.0005175354964791907,
      "loss": 3.0008,
      "step": 55708
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4880398511886597,
      "learning_rate": 0.0005175326795984771,
      "loss": 2.8699,
      "step": 55709
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7371102571487427,
      "learning_rate": 0.0005175298626773201,
      "loss": 2.8444,
      "step": 55710
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.458693027496338,
      "learning_rate": 0.00051752704571572,
      "loss": 3.1675,
      "step": 55711
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2862987518310547,
      "learning_rate": 0.0005175242287136776,
      "loss": 3.21,
      "step": 55712
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4914052486419678,
      "learning_rate": 0.0005175214116711933,
      "loss": 3.1544,
      "step": 55713
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.546703577041626,
      "learning_rate": 0.0005175185945882677,
      "loss": 2.8797,
      "step": 55714
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5748372077941895,
      "learning_rate": 0.0005175157774649013,
      "loss": 3.2649,
      "step": 55715
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6773854494094849,
      "learning_rate": 0.0005175129603010945,
      "loss": 3.1367,
      "step": 55716
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5218368768692017,
      "learning_rate": 0.0005175101430968482,
      "loss": 2.9766,
      "step": 55717
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4479446411132812,
      "learning_rate": 0.0005175073258521625,
      "loss": 3.0048,
      "step": 55718
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.037588357925415,
      "learning_rate": 0.000517504508567038,
      "loss": 3.1274,
      "step": 55719
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.431936264038086,
      "learning_rate": 0.0005175016912414754,
      "loss": 3.1621,
      "step": 55720
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7963322401046753,
      "learning_rate": 0.0005174988738754752,
      "loss": 3.0383,
      "step": 55721
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6653425693511963,
      "learning_rate": 0.0005174960564690377,
      "loss": 2.87,
      "step": 55722
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5538899898529053,
      "learning_rate": 0.0005174932390221636,
      "loss": 3.0861,
      "step": 55723
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6316078901290894,
      "learning_rate": 0.0005174904215348535,
      "loss": 3.1433,
      "step": 55724
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.089975357055664,
      "learning_rate": 0.0005174876040071077,
      "loss": 3.0412,
      "step": 55725
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5155861377716064,
      "learning_rate": 0.0005174847864389269,
      "loss": 3.2491,
      "step": 55726
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4693580865859985,
      "learning_rate": 0.0005174819688303116,
      "loss": 3.0063,
      "step": 55727
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0331382751464844,
      "learning_rate": 0.0005174791511812623,
      "loss": 2.8435,
      "step": 55728
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.862939476966858,
      "learning_rate": 0.0005174763334917794,
      "loss": 3.1335,
      "step": 55729
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.1299359798431396,
      "learning_rate": 0.0005174735157618638,
      "loss": 2.9139,
      "step": 55730
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5183199644088745,
      "learning_rate": 0.0005174706979915156,
      "loss": 3.0272,
      "step": 55731
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8109391927719116,
      "learning_rate": 0.0005174678801807356,
      "loss": 2.9603,
      "step": 55732
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.597562551498413,
      "learning_rate": 0.0005174650623295241,
      "loss": 2.6799,
      "step": 55733
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9877413511276245,
      "learning_rate": 0.0005174622444378817,
      "loss": 2.981,
      "step": 55734
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.581263542175293,
      "learning_rate": 0.000517459426505809,
      "loss": 2.9276,
      "step": 55735
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3676199913024902,
      "learning_rate": 0.0005174566085333066,
      "loss": 3.0794,
      "step": 55736
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2954425811767578,
      "learning_rate": 0.0005174537905203747,
      "loss": 2.746,
      "step": 55737
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3693410158157349,
      "learning_rate": 0.0005174509724670143,
      "loss": 2.8714,
      "step": 55738
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.372291088104248,
      "learning_rate": 0.0005174481543732255,
      "loss": 3.1202,
      "step": 55739
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.88670015335083,
      "learning_rate": 0.000517445336239009,
      "loss": 3.1693,
      "step": 55740
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.519134759902954,
      "learning_rate": 0.0005174425180643653,
      "loss": 2.8106,
      "step": 55741
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5687732696533203,
      "learning_rate": 0.0005174396998492949,
      "loss": 3.4045,
      "step": 55742
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6233426332473755,
      "learning_rate": 0.0005174368815937984,
      "loss": 3.1447,
      "step": 55743
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7899127006530762,
      "learning_rate": 0.0005174340632978764,
      "loss": 3.0898,
      "step": 55744
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6295686960220337,
      "learning_rate": 0.0005174312449615291,
      "loss": 3.1396,
      "step": 55745
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4281514883041382,
      "learning_rate": 0.0005174284265847573,
      "loss": 3.0846,
      "step": 55746
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.9650979042053223,
      "learning_rate": 0.0005174256081675615,
      "loss": 2.9959,
      "step": 55747
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7901453971862793,
      "learning_rate": 0.0005174227897099421,
      "loss": 3.0881,
      "step": 55748
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.241490364074707,
      "learning_rate": 0.0005174199712118997,
      "loss": 3.0944,
      "step": 55749
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.872380256652832,
      "learning_rate": 0.0005174171526734349,
      "loss": 2.8771,
      "step": 55750
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.769311785697937,
      "learning_rate": 0.0005174143340945481,
      "loss": 3.0308,
      "step": 55751
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3751418590545654,
      "learning_rate": 0.0005174115154752399,
      "loss": 3.189,
      "step": 55752
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.3747997283935547,
      "learning_rate": 0.0005174086968155108,
      "loss": 3.0011,
      "step": 55753
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7763899564743042,
      "learning_rate": 0.0005174058781153612,
      "loss": 3.1744,
      "step": 55754
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7093756198883057,
      "learning_rate": 0.0005174030593747919,
      "loss": 3.1344,
      "step": 55755
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.0162200927734375,
      "learning_rate": 0.0005174002405938032,
      "loss": 2.9789,
      "step": 55756
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.352437973022461,
      "learning_rate": 0.0005173974217723957,
      "loss": 3.1041,
      "step": 55757
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5277966260910034,
      "learning_rate": 0.00051739460291057,
      "loss": 2.8475,
      "step": 55758
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.1473679542541504,
      "learning_rate": 0.0005173917840083264,
      "loss": 3.1944,
      "step": 55759
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.7474277019500732,
      "learning_rate": 0.0005173889650656655,
      "loss": 3.0052,
      "step": 55760
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3946361541748047,
      "learning_rate": 0.0005173861460825881,
      "loss": 3.0019,
      "step": 55761
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.282623052597046,
      "learning_rate": 0.0005173833270590945,
      "loss": 2.8664,
      "step": 55762
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.482722282409668,
      "learning_rate": 0.0005173805079951851,
      "loss": 3.0113,
      "step": 55763
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2903181314468384,
      "learning_rate": 0.0005173776888908606,
      "loss": 3.0799,
      "step": 55764
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.479696750640869,
      "learning_rate": 0.0005173748697461215,
      "loss": 2.9553,
      "step": 55765
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.6395263671875,
      "learning_rate": 0.0005173720505609683,
      "loss": 3.1137,
      "step": 55766
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5484497547149658,
      "learning_rate": 0.0005173692313354015,
      "loss": 2.9544,
      "step": 55767
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.695922613143921,
      "learning_rate": 0.0005173664120694217,
      "loss": 3.0923,
      "step": 55768
    },
    {
      "epoch": 0.73,
      "grad_norm": 4.292168140411377,
      "learning_rate": 0.0005173635927630294,
      "loss": 3.2259,
      "step": 55769
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.8383426666259766,
      "learning_rate": 0.0005173607734162251,
      "loss": 3.0514,
      "step": 55770
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7959927320480347,
      "learning_rate": 0.0005173579540290093,
      "loss": 2.9875,
      "step": 55771
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.568424940109253,
      "learning_rate": 0.0005173551346013825,
      "loss": 3.1286,
      "step": 55772
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.163464069366455,
      "learning_rate": 0.0005173523151333453,
      "loss": 3.0619,
      "step": 55773
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2818598747253418,
      "learning_rate": 0.0005173494956248984,
      "loss": 3.0523,
      "step": 55774
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.633035659790039,
      "learning_rate": 0.000517346676076042,
      "loss": 2.7097,
      "step": 55775
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.627084970474243,
      "learning_rate": 0.0005173438564867767,
      "loss": 2.8776,
      "step": 55776
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9851895570755005,
      "learning_rate": 0.000517341036857103,
      "loss": 3.0032,
      "step": 55777
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.470567226409912,
      "learning_rate": 0.0005173382171870216,
      "loss": 2.9484,
      "step": 55778
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7124896049499512,
      "learning_rate": 0.000517335397476533,
      "loss": 3.2956,
      "step": 55779
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.9682960510253906,
      "learning_rate": 0.0005173325777256375,
      "loss": 3.1899,
      "step": 55780
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7149808406829834,
      "learning_rate": 0.0005173297579343358,
      "loss": 3.2151,
      "step": 55781
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7035707235336304,
      "learning_rate": 0.0005173269381026284,
      "loss": 3.0524,
      "step": 55782
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.381630778312683,
      "learning_rate": 0.000517324118230516,
      "loss": 3.0709,
      "step": 55783
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9557828903198242,
      "learning_rate": 0.0005173212983179987,
      "loss": 2.9701,
      "step": 55784
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4789981842041016,
      "learning_rate": 0.0005173184783650774,
      "loss": 2.8096,
      "step": 55785
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4389543533325195,
      "learning_rate": 0.0005173156583717524,
      "loss": 3.1206,
      "step": 55786
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.376774549484253,
      "learning_rate": 0.0005173128383380243,
      "loss": 3.0556,
      "step": 55787
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.4686625003814697,
      "learning_rate": 0.0005173100182638938,
      "loss": 2.891,
      "step": 55788
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.374842643737793,
      "learning_rate": 0.0005173071981493611,
      "loss": 3.0394,
      "step": 55789
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.372513771057129,
      "learning_rate": 0.000517304377994427,
      "loss": 3.0125,
      "step": 55790
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5382801294326782,
      "learning_rate": 0.0005173015577990919,
      "loss": 3.0416,
      "step": 55791
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9900178909301758,
      "learning_rate": 0.0005172987375633563,
      "loss": 2.7635,
      "step": 55792
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.709342122077942,
      "learning_rate": 0.0005172959172872209,
      "loss": 2.8944,
      "step": 55793
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7972965240478516,
      "learning_rate": 0.0005172930969706859,
      "loss": 3.3114,
      "step": 55794
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9077500104904175,
      "learning_rate": 0.0005172902766137521,
      "loss": 3.0367,
      "step": 55795
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6960053443908691,
      "learning_rate": 0.0005172874562164198,
      "loss": 3.0837,
      "step": 55796
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5526834726333618,
      "learning_rate": 0.0005172846357786899,
      "loss": 3.2418,
      "step": 55797
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7433003187179565,
      "learning_rate": 0.0005172818153005625,
      "loss": 3.0725,
      "step": 55798
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.967014193534851,
      "learning_rate": 0.0005172789947820383,
      "loss": 3.0671,
      "step": 55799
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6828044652938843,
      "learning_rate": 0.000517276174223118,
      "loss": 3.1132,
      "step": 55800
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8503488302230835,
      "learning_rate": 0.0005172733536238019,
      "loss": 3.0897,
      "step": 55801
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6725856065750122,
      "learning_rate": 0.0005172705329840906,
      "loss": 3.2199,
      "step": 55802
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.1961686611175537,
      "learning_rate": 0.0005172677123039844,
      "loss": 2.9022,
      "step": 55803
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.942824363708496,
      "learning_rate": 0.0005172648915834843,
      "loss": 3.0802,
      "step": 55804
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.073197841644287,
      "learning_rate": 0.0005172620708225903,
      "loss": 3.2441,
      "step": 55805
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4141279458999634,
      "learning_rate": 0.0005172592500213034,
      "loss": 3.0298,
      "step": 55806
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.001007080078125,
      "learning_rate": 0.0005172564291796238,
      "loss": 3.1451,
      "step": 55807
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0821290016174316,
      "learning_rate": 0.0005172536082975522,
      "loss": 2.9664,
      "step": 55808
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.598118543624878,
      "learning_rate": 0.000517250787375089,
      "loss": 3.1382,
      "step": 55809
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8399131298065186,
      "learning_rate": 0.0005172479664122347,
      "loss": 3.0894,
      "step": 55810
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.4724974632263184,
      "learning_rate": 0.0005172451454089901,
      "loss": 3.3795,
      "step": 55811
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5548657178878784,
      "learning_rate": 0.0005172423243653553,
      "loss": 3.1772,
      "step": 55812
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.2012743949890137,
      "learning_rate": 0.0005172395032813312,
      "loss": 2.9237,
      "step": 55813
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4914679527282715,
      "learning_rate": 0.0005172366821569181,
      "loss": 2.9761,
      "step": 55814
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3629666566848755,
      "learning_rate": 0.0005172338609921167,
      "loss": 3.0704,
      "step": 55815
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4656556844711304,
      "learning_rate": 0.0005172310397869273,
      "loss": 3.1075,
      "step": 55816
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7275149822235107,
      "learning_rate": 0.0005172282185413507,
      "loss": 3.1901,
      "step": 55817
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3731279373168945,
      "learning_rate": 0.0005172253972553872,
      "loss": 3.0418,
      "step": 55818
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.767391562461853,
      "learning_rate": 0.0005172225759290373,
      "loss": 2.9073,
      "step": 55819
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.2379114627838135,
      "learning_rate": 0.0005172197545623017,
      "loss": 2.921,
      "step": 55820
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3948291540145874,
      "learning_rate": 0.0005172169331551809,
      "loss": 3.0597,
      "step": 55821
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.037205696105957,
      "learning_rate": 0.0005172141117076753,
      "loss": 3.0706,
      "step": 55822
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.246995449066162,
      "learning_rate": 0.0005172112902197855,
      "loss": 2.885,
      "step": 55823
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.2572216987609863,
      "learning_rate": 0.000517208468691512,
      "loss": 2.8732,
      "step": 55824
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8028895854949951,
      "learning_rate": 0.0005172056471228555,
      "loss": 3.1188,
      "step": 55825
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.956268310546875,
      "learning_rate": 0.0005172028255138163,
      "loss": 2.808,
      "step": 55826
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9301763772964478,
      "learning_rate": 0.0005172000038643949,
      "loss": 2.9552,
      "step": 55827
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3683843612670898,
      "learning_rate": 0.000517197182174592,
      "loss": 2.9923,
      "step": 55828
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7371509075164795,
      "learning_rate": 0.0005171943604444079,
      "loss": 3.1485,
      "step": 55829
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8912625312805176,
      "learning_rate": 0.0005171915386738434,
      "loss": 3.2153,
      "step": 55830
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.816575288772583,
      "learning_rate": 0.0005171887168628989,
      "loss": 3.1449,
      "step": 55831
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5970772504806519,
      "learning_rate": 0.0005171858950115749,
      "loss": 2.9152,
      "step": 55832
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.638183832168579,
      "learning_rate": 0.000517183073119872,
      "loss": 3.0652,
      "step": 55833
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3618282079696655,
      "learning_rate": 0.0005171802511877906,
      "loss": 3.1338,
      "step": 55834
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.473076581954956,
      "learning_rate": 0.0005171774292153313,
      "loss": 2.9647,
      "step": 55835
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6783510446548462,
      "learning_rate": 0.0005171746072024946,
      "loss": 3.0053,
      "step": 55836
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.259689450263977,
      "learning_rate": 0.0005171717851492811,
      "loss": 2.9644,
      "step": 55837
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0023136138916016,
      "learning_rate": 0.0005171689630556911,
      "loss": 2.9082,
      "step": 55838
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.679722785949707,
      "learning_rate": 0.0005171661409217255,
      "loss": 2.7447,
      "step": 55839
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.520092487335205,
      "learning_rate": 0.0005171633187473846,
      "loss": 3.1462,
      "step": 55840
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0985922813415527,
      "learning_rate": 0.0005171604965326687,
      "loss": 2.9616,
      "step": 55841
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.0567920207977295,
      "learning_rate": 0.0005171576742775787,
      "loss": 3.0576,
      "step": 55842
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7662113904953003,
      "learning_rate": 0.0005171548519821151,
      "loss": 3.2829,
      "step": 55843
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.712863564491272,
      "learning_rate": 0.0005171520296462782,
      "loss": 2.9441,
      "step": 55844
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.549715280532837,
      "learning_rate": 0.0005171492072700687,
      "loss": 3.1358,
      "step": 55845
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.555835008621216,
      "learning_rate": 0.000517146384853487,
      "loss": 2.9729,
      "step": 55846
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.62369704246521,
      "learning_rate": 0.0005171435623965338,
      "loss": 3.0997,
      "step": 55847
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.786630392074585,
      "learning_rate": 0.0005171407398992094,
      "loss": 3.1777,
      "step": 55848
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9045367240905762,
      "learning_rate": 0.0005171379173615145,
      "loss": 3.027,
      "step": 55849
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7121772766113281,
      "learning_rate": 0.0005171350947834495,
      "loss": 2.871,
      "step": 55850
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.24605131149292,
      "learning_rate": 0.000517132272165015,
      "loss": 3.0214,
      "step": 55851
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.7624850273132324,
      "learning_rate": 0.0005171294495062115,
      "loss": 3.1006,
      "step": 55852
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4194824695587158,
      "learning_rate": 0.0005171266268070397,
      "loss": 2.8134,
      "step": 55853
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.316273808479309,
      "learning_rate": 0.0005171238040674997,
      "loss": 3.0917,
      "step": 55854
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.2024600505828857,
      "learning_rate": 0.0005171209812875925,
      "loss": 3.0184,
      "step": 55855
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.194033145904541,
      "learning_rate": 0.0005171181584673184,
      "loss": 3.2052,
      "step": 55856
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.934486985206604,
      "learning_rate": 0.0005171153356066778,
      "loss": 2.931,
      "step": 55857
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4844865798950195,
      "learning_rate": 0.0005171125127056714,
      "loss": 3.1271,
      "step": 55858
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9465858936309814,
      "learning_rate": 0.0005171096897642997,
      "loss": 2.8277,
      "step": 55859
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8090928792953491,
      "learning_rate": 0.0005171068667825633,
      "loss": 3.1976,
      "step": 55860
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.543233036994934,
      "learning_rate": 0.0005171040437604625,
      "loss": 3.2839,
      "step": 55861
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7385971546173096,
      "learning_rate": 0.000517101220697998,
      "loss": 3.1928,
      "step": 55862
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5083081722259521,
      "learning_rate": 0.0005170983975951703,
      "loss": 2.9387,
      "step": 55863
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.483991265296936,
      "learning_rate": 0.00051709557445198,
      "loss": 2.8722,
      "step": 55864
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.458970308303833,
      "learning_rate": 0.0005170927512684274,
      "loss": 3.1725,
      "step": 55865
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4380812644958496,
      "learning_rate": 0.0005170899280445132,
      "loss": 3.0009,
      "step": 55866
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.57472825050354,
      "learning_rate": 0.0005170871047802379,
      "loss": 3.2713,
      "step": 55867
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5541492700576782,
      "learning_rate": 0.000517084281475602,
      "loss": 3.2012,
      "step": 55868
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5590602159500122,
      "learning_rate": 0.000517081458130606,
      "loss": 2.8717,
      "step": 55869
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4533565044403076,
      "learning_rate": 0.0005170786347452505,
      "loss": 2.8936,
      "step": 55870
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.557230830192566,
      "learning_rate": 0.000517075811319536,
      "loss": 3.1087,
      "step": 55871
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2437247037887573,
      "learning_rate": 0.000517072987853463,
      "loss": 3.184,
      "step": 55872
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6182866096496582,
      "learning_rate": 0.000517070164347032,
      "loss": 3.0187,
      "step": 55873
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5389357805252075,
      "learning_rate": 0.0005170673408002436,
      "loss": 3.0591,
      "step": 55874
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4888941049575806,
      "learning_rate": 0.0005170645172130983,
      "loss": 3.0617,
      "step": 55875
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4886798858642578,
      "learning_rate": 0.0005170616935855965,
      "loss": 2.9099,
      "step": 55876
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5467866659164429,
      "learning_rate": 0.0005170588699177389,
      "loss": 3.2372,
      "step": 55877
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7752846479415894,
      "learning_rate": 0.0005170560462095259,
      "loss": 3.1273,
      "step": 55878
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5534929037094116,
      "learning_rate": 0.0005170532224609581,
      "loss": 2.8437,
      "step": 55879
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8044071197509766,
      "learning_rate": 0.0005170503986720362,
      "loss": 2.7922,
      "step": 55880
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6560677289962769,
      "learning_rate": 0.0005170475748427604,
      "loss": 3.0097,
      "step": 55881
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5329594612121582,
      "learning_rate": 0.0005170447509731314,
      "loss": 3.0574,
      "step": 55882
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5017149448394775,
      "learning_rate": 0.0005170419270631495,
      "loss": 3.2579,
      "step": 55883
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.126134157180786,
      "learning_rate": 0.0005170391031128157,
      "loss": 2.9917,
      "step": 55884
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.44814932346344,
      "learning_rate": 0.0005170362791221299,
      "loss": 2.9421,
      "step": 55885
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.1409881114959717,
      "learning_rate": 0.0005170334550910932,
      "loss": 3.0776,
      "step": 55886
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.841388463973999,
      "learning_rate": 0.0005170306310197058,
      "loss": 2.9147,
      "step": 55887
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6629323959350586,
      "learning_rate": 0.0005170278069079684,
      "loss": 2.824,
      "step": 55888
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0967280864715576,
      "learning_rate": 0.0005170249827558814,
      "loss": 3.2803,
      "step": 55889
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9904651641845703,
      "learning_rate": 0.0005170221585634453,
      "loss": 2.9334,
      "step": 55890
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.732457160949707,
      "learning_rate": 0.0005170193343306607,
      "loss": 2.8278,
      "step": 55891
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3910603523254395,
      "learning_rate": 0.0005170165100575281,
      "loss": 3.0251,
      "step": 55892
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5543131828308105,
      "learning_rate": 0.0005170136857440481,
      "loss": 2.9747,
      "step": 55893
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8301208019256592,
      "learning_rate": 0.000517010861390221,
      "loss": 3.1984,
      "step": 55894
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5745502710342407,
      "learning_rate": 0.0005170080369960476,
      "loss": 3.1997,
      "step": 55895
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4510422945022583,
      "learning_rate": 0.0005170052125615283,
      "loss": 3.2104,
      "step": 55896
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7448716163635254,
      "learning_rate": 0.0005170023880866636,
      "loss": 3.076,
      "step": 55897
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.395348072052002,
      "learning_rate": 0.0005169995635714542,
      "loss": 3.0263,
      "step": 55898
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.504586935043335,
      "learning_rate": 0.0005169967390159004,
      "loss": 3.0534,
      "step": 55899
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0015854835510254,
      "learning_rate": 0.0005169939144200028,
      "loss": 2.9706,
      "step": 55900
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.1394619941711426,
      "learning_rate": 0.0005169910897837619,
      "loss": 3.2656,
      "step": 55901
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4487978219985962,
      "learning_rate": 0.0005169882651071783,
      "loss": 2.834,
      "step": 55902
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.59540855884552,
      "learning_rate": 0.0005169854403902524,
      "loss": 3.0815,
      "step": 55903
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.1218345165252686,
      "learning_rate": 0.0005169826156329849,
      "loss": 3.2513,
      "step": 55904
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3844609260559082,
      "learning_rate": 0.0005169797908353763,
      "loss": 3.2862,
      "step": 55905
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3391239643096924,
      "learning_rate": 0.000516976965997427,
      "loss": 2.946,
      "step": 55906
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8822630643844604,
      "learning_rate": 0.0005169741411191375,
      "loss": 3.1401,
      "step": 55907
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.326951265335083,
      "learning_rate": 0.0005169713162005085,
      "loss": 3.2399,
      "step": 55908
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.288345217704773,
      "learning_rate": 0.0005169684912415404,
      "loss": 3.0813,
      "step": 55909
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.1786458492279053,
      "learning_rate": 0.0005169656662422338,
      "loss": 2.9995,
      "step": 55910
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.930378794670105,
      "learning_rate": 0.0005169628412025891,
      "loss": 2.9648,
      "step": 55911
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5436269044876099,
      "learning_rate": 0.0005169600161226071,
      "loss": 3.0392,
      "step": 55912
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7198092937469482,
      "learning_rate": 0.000516957191002288,
      "loss": 3.0987,
      "step": 55913
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.742698907852173,
      "learning_rate": 0.0005169543658416324,
      "loss": 2.9993,
      "step": 55914
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5259968042373657,
      "learning_rate": 0.000516951540640641,
      "loss": 3.0143,
      "step": 55915
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.068145751953125,
      "learning_rate": 0.0005169487153993142,
      "loss": 3.1279,
      "step": 55916
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.2297871112823486,
      "learning_rate": 0.0005169458901176524,
      "loss": 2.8606,
      "step": 55917
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0019302368164062,
      "learning_rate": 0.0005169430647956564,
      "loss": 3.0358,
      "step": 55918
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0016767978668213,
      "learning_rate": 0.0005169402394333266,
      "loss": 2.9238,
      "step": 55919
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7219610214233398,
      "learning_rate": 0.0005169374140306635,
      "loss": 3.0715,
      "step": 55920
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.5766613483428955,
      "learning_rate": 0.0005169345885876676,
      "loss": 2.936,
      "step": 55921
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7470742464065552,
      "learning_rate": 0.0005169317631043395,
      "loss": 3.0002,
      "step": 55922
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6973752975463867,
      "learning_rate": 0.0005169289375806797,
      "loss": 2.7477,
      "step": 55923
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6814738512039185,
      "learning_rate": 0.0005169261120166887,
      "loss": 3.0449,
      "step": 55924
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8821395635604858,
      "learning_rate": 0.000516923286412367,
      "loss": 3.0876,
      "step": 55925
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.4694571495056152,
      "learning_rate": 0.0005169204607677152,
      "loss": 2.9232,
      "step": 55926
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.79154372215271,
      "learning_rate": 0.0005169176350827338,
      "loss": 3.138,
      "step": 55927
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.507643461227417,
      "learning_rate": 0.0005169148093574234,
      "loss": 3.1199,
      "step": 55928
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5681850910186768,
      "learning_rate": 0.0005169119835917843,
      "loss": 3.0265,
      "step": 55929
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0480732917785645,
      "learning_rate": 0.0005169091577858171,
      "loss": 3.161,
      "step": 55930
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.3176708221435547,
      "learning_rate": 0.0005169063319395227,
      "loss": 3.1499,
      "step": 55931
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.1890459060668945,
      "learning_rate": 0.0005169035060529011,
      "loss": 3.0984,
      "step": 55932
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6424739360809326,
      "learning_rate": 0.0005169006801259531,
      "loss": 3.4246,
      "step": 55933
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5533461570739746,
      "learning_rate": 0.000516897854158679,
      "loss": 2.7617,
      "step": 55934
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.4053955078125,
      "learning_rate": 0.0005168950281510797,
      "loss": 2.8359,
      "step": 55935
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5742592811584473,
      "learning_rate": 0.0005168922021031555,
      "loss": 3.1831,
      "step": 55936
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6035314798355103,
      "learning_rate": 0.0005168893760149069,
      "loss": 2.9507,
      "step": 55937
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5168945789337158,
      "learning_rate": 0.0005168865498863344,
      "loss": 3.0044,
      "step": 55938
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.994385004043579,
      "learning_rate": 0.0005168837237174387,
      "loss": 2.9695,
      "step": 55939
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9943416118621826,
      "learning_rate": 0.0005168808975082201,
      "loss": 3.1825,
      "step": 55940
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0299739837646484,
      "learning_rate": 0.0005168780712586794,
      "loss": 3.0668,
      "step": 55941
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.903388738632202,
      "learning_rate": 0.000516875244968817,
      "loss": 2.9681,
      "step": 55942
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4069643020629883,
      "learning_rate": 0.0005168724186386333,
      "loss": 3.092,
      "step": 55943
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6962110996246338,
      "learning_rate": 0.000516869592268129,
      "loss": 2.9785,
      "step": 55944
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7684309482574463,
      "learning_rate": 0.0005168667658573045,
      "loss": 2.7662,
      "step": 55945
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.739141821861267,
      "learning_rate": 0.0005168639394061603,
      "loss": 2.7321,
      "step": 55946
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.068079710006714,
      "learning_rate": 0.0005168611129146971,
      "loss": 3.0936,
      "step": 55947
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5676730871200562,
      "learning_rate": 0.0005168582863829152,
      "loss": 3.0024,
      "step": 55948
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4933029413223267,
      "learning_rate": 0.0005168554598108154,
      "loss": 3.1359,
      "step": 55949
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4319331645965576,
      "learning_rate": 0.000516852633198398,
      "loss": 2.8675,
      "step": 55950
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9038015604019165,
      "learning_rate": 0.0005168498065456636,
      "loss": 2.881,
      "step": 55951
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0635170936584473,
      "learning_rate": 0.0005168469798526127,
      "loss": 3.1116,
      "step": 55952
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3842350244522095,
      "learning_rate": 0.0005168441531192459,
      "loss": 3.3758,
      "step": 55953
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4443351030349731,
      "learning_rate": 0.0005168413263455637,
      "loss": 3.1553,
      "step": 55954
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7576810121536255,
      "learning_rate": 0.0005168384995315665,
      "loss": 3.1088,
      "step": 55955
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4614269733428955,
      "learning_rate": 0.0005168356726772551,
      "loss": 3.2237,
      "step": 55956
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.885449767112732,
      "learning_rate": 0.0005168328457826298,
      "loss": 3.072,
      "step": 55957
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5603923797607422,
      "learning_rate": 0.0005168300188476911,
      "loss": 3.111,
      "step": 55958
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.4214556217193604,
      "learning_rate": 0.0005168271918724397,
      "loss": 2.9087,
      "step": 55959
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7201803922653198,
      "learning_rate": 0.000516824364856876,
      "loss": 3.3177,
      "step": 55960
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4811749458312988,
      "learning_rate": 0.0005168215378010004,
      "loss": 2.9645,
      "step": 55961
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6865438222885132,
      "learning_rate": 0.0005168187107048138,
      "loss": 2.9346,
      "step": 55962
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3450216054916382,
      "learning_rate": 0.0005168158835683165,
      "loss": 2.8518,
      "step": 55963
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2850215435028076,
      "learning_rate": 0.000516813056391509,
      "loss": 2.8524,
      "step": 55964
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5781570672988892,
      "learning_rate": 0.0005168102291743917,
      "loss": 3.0496,
      "step": 55965
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4595720767974854,
      "learning_rate": 0.0005168074019169655,
      "loss": 3.1433,
      "step": 55966
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3585271835327148,
      "learning_rate": 0.0005168045746192305,
      "loss": 3.0716,
      "step": 55967
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9377970695495605,
      "learning_rate": 0.0005168017472811877,
      "loss": 2.8632,
      "step": 55968
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8058135509490967,
      "learning_rate": 0.0005167989199028371,
      "loss": 3.0164,
      "step": 55969
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8389036655426025,
      "learning_rate": 0.0005167960924841796,
      "loss": 3.1287,
      "step": 55970
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8346670866012573,
      "learning_rate": 0.0005167932650252157,
      "loss": 3.1446,
      "step": 55971
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.059119462966919,
      "learning_rate": 0.0005167904375259457,
      "loss": 3.1311,
      "step": 55972
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0462920665740967,
      "learning_rate": 0.0005167876099863704,
      "loss": 3.1246,
      "step": 55973
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.525577187538147,
      "learning_rate": 0.0005167847824064901,
      "loss": 3.3437,
      "step": 55974
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.729567289352417,
      "learning_rate": 0.0005167819547863055,
      "loss": 2.7746,
      "step": 55975
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.2172493934631348,
      "learning_rate": 0.000516779127125817,
      "loss": 3.1126,
      "step": 55976
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8528324365615845,
      "learning_rate": 0.0005167762994250252,
      "loss": 2.7629,
      "step": 55977
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.1465706825256348,
      "learning_rate": 0.0005167734716839305,
      "loss": 2.8901,
      "step": 55978
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.456706166267395,
      "learning_rate": 0.0005167706439025336,
      "loss": 3.1909,
      "step": 55979
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5655988454818726,
      "learning_rate": 0.0005167678160808349,
      "loss": 3.0009,
      "step": 55980
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.479540228843689,
      "learning_rate": 0.000516764988218835,
      "loss": 3.2025,
      "step": 55981
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6795923709869385,
      "learning_rate": 0.0005167621603165343,
      "loss": 2.9,
      "step": 55982
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6281319856643677,
      "learning_rate": 0.0005167593323739335,
      "loss": 2.8107,
      "step": 55983
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0268242359161377,
      "learning_rate": 0.000516756504391033,
      "loss": 3.1019,
      "step": 55984
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8149420022964478,
      "learning_rate": 0.0005167536763678335,
      "loss": 3.1523,
      "step": 55985
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8299450874328613,
      "learning_rate": 0.0005167508483043353,
      "loss": 3.1149,
      "step": 55986
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8957545757293701,
      "learning_rate": 0.0005167480202005391,
      "loss": 2.9822,
      "step": 55987
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3844702243804932,
      "learning_rate": 0.0005167451920564453,
      "loss": 2.9906,
      "step": 55988
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.612842082977295,
      "learning_rate": 0.0005167423638720544,
      "loss": 2.7958,
      "step": 55989
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.376968264579773,
      "learning_rate": 0.0005167395356473672,
      "loss": 2.9107,
      "step": 55990
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.0240018367767334,
      "learning_rate": 0.0005167367073823839,
      "loss": 2.9328,
      "step": 55991
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8908157348632812,
      "learning_rate": 0.0005167338790771052,
      "loss": 3.156,
      "step": 55992
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.268505096435547,
      "learning_rate": 0.0005167310507315315,
      "loss": 2.9204,
      "step": 55993
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.756709098815918,
      "learning_rate": 0.0005167282223456635,
      "loss": 3.1147,
      "step": 55994
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6797891855239868,
      "learning_rate": 0.0005167253939195015,
      "loss": 2.9781,
      "step": 55995
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.543447732925415,
      "learning_rate": 0.0005167225654530462,
      "loss": 3.0421,
      "step": 55996
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.5511720180511475,
      "learning_rate": 0.0005167197369462982,
      "loss": 3.0825,
      "step": 55997
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4764378070831299,
      "learning_rate": 0.0005167169083992579,
      "loss": 2.9355,
      "step": 55998
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.8648176193237305,
      "learning_rate": 0.0005167140798119257,
      "loss": 2.9566,
      "step": 55999
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.114907741546631,
      "learning_rate": 0.0005167112511843023,
      "loss": 2.9046,
      "step": 56000
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4881150722503662,
      "learning_rate": 0.0005167084225163883,
      "loss": 2.8404,
      "step": 56001
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5889594554901123,
      "learning_rate": 0.000516705593808184,
      "loss": 2.9889,
      "step": 56002
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5081697702407837,
      "learning_rate": 0.00051670276505969,
      "loss": 3.0668,
      "step": 56003
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5764944553375244,
      "learning_rate": 0.000516699936270907,
      "loss": 2.8865,
      "step": 56004
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9549416303634644,
      "learning_rate": 0.0005166971074418354,
      "loss": 3.155,
      "step": 56005
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.099342107772827,
      "learning_rate": 0.0005166942785724756,
      "loss": 3.1342,
      "step": 56006
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7093263864517212,
      "learning_rate": 0.0005166914496628284,
      "loss": 2.9013,
      "step": 56007
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.066086769104004,
      "learning_rate": 0.0005166886207128939,
      "loss": 2.733,
      "step": 56008
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5683174133300781,
      "learning_rate": 0.0005166857917226731,
      "loss": 2.9902,
      "step": 56009
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9900476932525635,
      "learning_rate": 0.0005166829626921663,
      "loss": 3.1637,
      "step": 56010
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.084707498550415,
      "learning_rate": 0.000516680133621374,
      "loss": 3.1491,
      "step": 56011
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.491292119026184,
      "learning_rate": 0.0005166773045102968,
      "loss": 2.9603,
      "step": 56012
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3582661151885986,
      "learning_rate": 0.0005166744753589352,
      "loss": 2.854,
      "step": 56013
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6048349142074585,
      "learning_rate": 0.0005166716461672897,
      "loss": 3.158,
      "step": 56014
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6981204748153687,
      "learning_rate": 0.0005166688169353609,
      "loss": 3.2295,
      "step": 56015
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5607755184173584,
      "learning_rate": 0.0005166659876631492,
      "loss": 3.0688,
      "step": 56016
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3779033422470093,
      "learning_rate": 0.0005166631583506554,
      "loss": 2.917,
      "step": 56017
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8287482261657715,
      "learning_rate": 0.0005166603289978796,
      "loss": 3.0803,
      "step": 56018
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7967153787612915,
      "learning_rate": 0.0005166574996048226,
      "loss": 2.9123,
      "step": 56019
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8109585046768188,
      "learning_rate": 0.0005166546701714849,
      "loss": 2.799,
      "step": 56020
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7288862466812134,
      "learning_rate": 0.000516651840697867,
      "loss": 3.0778,
      "step": 56021
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5396891832351685,
      "learning_rate": 0.0005166490111839694,
      "loss": 3.1608,
      "step": 56022
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0664174556732178,
      "learning_rate": 0.0005166461816297929,
      "loss": 3.3029,
      "step": 56023
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4870508909225464,
      "learning_rate": 0.0005166433520353376,
      "loss": 3.1706,
      "step": 56024
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.632811427116394,
      "learning_rate": 0.0005166405224006042,
      "loss": 3.1875,
      "step": 56025
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.338452935218811,
      "learning_rate": 0.0005166376927255932,
      "loss": 2.9414,
      "step": 56026
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4993003606796265,
      "learning_rate": 0.0005166348630103052,
      "loss": 3.0498,
      "step": 56027
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4857877492904663,
      "learning_rate": 0.0005166320332547407,
      "loss": 3.3047,
      "step": 56028
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2602390050888062,
      "learning_rate": 0.0005166292034589002,
      "loss": 3.2018,
      "step": 56029
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.904542326927185,
      "learning_rate": 0.0005166263736227844,
      "loss": 3.0878,
      "step": 56030
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.654198408126831,
      "learning_rate": 0.0005166235437463935,
      "loss": 3.0917,
      "step": 56031
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7063539028167725,
      "learning_rate": 0.0005166207138297282,
      "loss": 3.0759,
      "step": 56032
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8497861623764038,
      "learning_rate": 0.000516617883872789,
      "loss": 3.1963,
      "step": 56033
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4249037504196167,
      "learning_rate": 0.0005166150538755765,
      "loss": 2.8461,
      "step": 56034
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8029145002365112,
      "learning_rate": 0.0005166122238380912,
      "loss": 3.05,
      "step": 56035
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.563757300376892,
      "learning_rate": 0.0005166093937603336,
      "loss": 2.9871,
      "step": 56036
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4105037450790405,
      "learning_rate": 0.0005166065636423041,
      "loss": 3.25,
      "step": 56037
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6284037828445435,
      "learning_rate": 0.0005166037334840035,
      "loss": 3.1106,
      "step": 56038
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3943246603012085,
      "learning_rate": 0.0005166009032854321,
      "loss": 2.9629,
      "step": 56039
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4633795022964478,
      "learning_rate": 0.0005165980730465905,
      "loss": 3.0574,
      "step": 56040
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5125608444213867,
      "learning_rate": 0.0005165952427674793,
      "loss": 3.0626,
      "step": 56041
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3605533838272095,
      "learning_rate": 0.0005165924124480989,
      "loss": 3.1683,
      "step": 56042
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5646536350250244,
      "learning_rate": 0.00051658958208845,
      "loss": 3.2912,
      "step": 56043
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.697324514389038,
      "learning_rate": 0.0005165867516885328,
      "loss": 3.0597,
      "step": 56044
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6472060680389404,
      "learning_rate": 0.0005165839212483482,
      "loss": 3.0458,
      "step": 56045
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7171378135681152,
      "learning_rate": 0.0005165810907678965,
      "loss": 2.9051,
      "step": 56046
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7888679504394531,
      "learning_rate": 0.0005165782602471783,
      "loss": 2.9905,
      "step": 56047
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.626491904258728,
      "learning_rate": 0.000516575429686194,
      "loss": 2.9316,
      "step": 56048
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5218164920806885,
      "learning_rate": 0.0005165725990849444,
      "loss": 3.0365,
      "step": 56049
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.361522912979126,
      "learning_rate": 0.0005165697684434297,
      "loss": 3.2038,
      "step": 56050
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8280705213546753,
      "learning_rate": 0.0005165669377616508,
      "loss": 3.1394,
      "step": 56051
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.634835958480835,
      "learning_rate": 0.0005165641070396078,
      "loss": 3.1245,
      "step": 56052
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.506888508796692,
      "learning_rate": 0.0005165612762773015,
      "loss": 3.0803,
      "step": 56053
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5175209045410156,
      "learning_rate": 0.0005165584454747325,
      "loss": 3.1479,
      "step": 56054
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.562718152999878,
      "learning_rate": 0.0005165556146319011,
      "loss": 3.1738,
      "step": 56055
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.824004888534546,
      "learning_rate": 0.0005165527837488079,
      "loss": 3.2392,
      "step": 56056
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5504047870635986,
      "learning_rate": 0.0005165499528254535,
      "loss": 3.1707,
      "step": 56057
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.5135409832000732,
      "learning_rate": 0.0005165471218618385,
      "loss": 3.1065,
      "step": 56058
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.221102714538574,
      "learning_rate": 0.0005165442908579631,
      "loss": 3.0987,
      "step": 56059
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.978662371635437,
      "learning_rate": 0.000516541459813828,
      "loss": 3.0215,
      "step": 56060
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0259628295898438,
      "learning_rate": 0.000516538628729434,
      "loss": 2.9609,
      "step": 56061
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.947774648666382,
      "learning_rate": 0.0005165357976047812,
      "loss": 3.0393,
      "step": 56062
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4771696329116821,
      "learning_rate": 0.0005165329664398703,
      "loss": 3.2835,
      "step": 56063
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.398634672164917,
      "learning_rate": 0.0005165301352347019,
      "loss": 3.0165,
      "step": 56064
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4533220529556274,
      "learning_rate": 0.0005165273039892764,
      "loss": 3.1041,
      "step": 56065
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.432854413986206,
      "learning_rate": 0.0005165244727035945,
      "loss": 2.9133,
      "step": 56066
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4989944696426392,
      "learning_rate": 0.0005165216413776565,
      "loss": 3.0718,
      "step": 56067
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0405235290527344,
      "learning_rate": 0.0005165188100114631,
      "loss": 2.9846,
      "step": 56068
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5928943157196045,
      "learning_rate": 0.0005165159786050147,
      "loss": 3.3683,
      "step": 56069
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.439112663269043,
      "learning_rate": 0.000516513147158312,
      "loss": 3.0943,
      "step": 56070
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4194897413253784,
      "learning_rate": 0.0005165103156713553,
      "loss": 3.1442,
      "step": 56071
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.47611665725708,
      "learning_rate": 0.0005165074841441453,
      "loss": 2.9996,
      "step": 56072
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7280833721160889,
      "learning_rate": 0.0005165046525766826,
      "loss": 3.0986,
      "step": 56073
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7618001699447632,
      "learning_rate": 0.0005165018209689673,
      "loss": 2.8953,
      "step": 56074
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6608494520187378,
      "learning_rate": 0.0005164989893210004,
      "loss": 2.8862,
      "step": 56075
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6875712871551514,
      "learning_rate": 0.0005164961576327823,
      "loss": 3.1367,
      "step": 56076
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.638890266418457,
      "learning_rate": 0.0005164933259043134,
      "loss": 3.1931,
      "step": 56077
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.858043909072876,
      "learning_rate": 0.0005164904941355943,
      "loss": 3.1831,
      "step": 56078
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2504945993423462,
      "learning_rate": 0.0005164876623266257,
      "loss": 2.924,
      "step": 56079
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.66800057888031,
      "learning_rate": 0.0005164848304774078,
      "loss": 3.0753,
      "step": 56080
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3426402807235718,
      "learning_rate": 0.0005164819985879412,
      "loss": 2.9396,
      "step": 56081
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6057801246643066,
      "learning_rate": 0.0005164791666582266,
      "loss": 3.1431,
      "step": 56082
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6334950923919678,
      "learning_rate": 0.0005164763346882645,
      "loss": 3.1801,
      "step": 56083
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7699263095855713,
      "learning_rate": 0.0005164735026780554,
      "loss": 2.8798,
      "step": 56084
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5981113910675049,
      "learning_rate": 0.0005164706706275996,
      "loss": 3.0176,
      "step": 56085
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.37888503074646,
      "learning_rate": 0.000516467838536898,
      "loss": 3.0014,
      "step": 56086
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4516156911849976,
      "learning_rate": 0.0005164650064059509,
      "loss": 3.0222,
      "step": 56087
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9020495414733887,
      "learning_rate": 0.0005164621742347588,
      "loss": 3.0185,
      "step": 56088
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7322609424591064,
      "learning_rate": 0.0005164593420233222,
      "loss": 3.0927,
      "step": 56089
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.146780014038086,
      "learning_rate": 0.0005164565097716419,
      "loss": 2.8212,
      "step": 56090
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0184597969055176,
      "learning_rate": 0.0005164536774797182,
      "loss": 3.0533,
      "step": 56091
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9835740327835083,
      "learning_rate": 0.0005164508451475517,
      "loss": 2.9369,
      "step": 56092
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.228419303894043,
      "learning_rate": 0.0005164480127751428,
      "loss": 3.1301,
      "step": 56093
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.2730588912963867,
      "learning_rate": 0.0005164451803624922,
      "loss": 3.1726,
      "step": 56094
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9918100833892822,
      "learning_rate": 0.0005164423479096004,
      "loss": 3.1378,
      "step": 56095
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.239205837249756,
      "learning_rate": 0.0005164395154164679,
      "loss": 3.024,
      "step": 56096
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.825550079345703,
      "learning_rate": 0.0005164366828830951,
      "loss": 3.2721,
      "step": 56097
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.605225682258606,
      "learning_rate": 0.0005164338503094826,
      "loss": 2.9928,
      "step": 56098
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7453408241271973,
      "learning_rate": 0.0005164310176956311,
      "loss": 3.137,
      "step": 56099
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.116271734237671,
      "learning_rate": 0.0005164281850415409,
      "loss": 3.0937,
      "step": 56100
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7955563068389893,
      "learning_rate": 0.0005164253523472126,
      "loss": 3.0684,
      "step": 56101
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3826639652252197,
      "learning_rate": 0.0005164225196126469,
      "loss": 3.0199,
      "step": 56102
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8269749879837036,
      "learning_rate": 0.000516419686837844,
      "loss": 3.1107,
      "step": 56103
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.274629592895508,
      "learning_rate": 0.0005164168540228046,
      "loss": 3.0558,
      "step": 56104
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.278327465057373,
      "learning_rate": 0.0005164140211675294,
      "loss": 3.1687,
      "step": 56105
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7306418418884277,
      "learning_rate": 0.0005164111882720186,
      "loss": 3.0321,
      "step": 56106
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0326054096221924,
      "learning_rate": 0.0005164083553362729,
      "loss": 3.1984,
      "step": 56107
    },
    {
      "epoch": 0.73,
      "grad_norm": 4.60893440246582,
      "learning_rate": 0.0005164055223602928,
      "loss": 3.0875,
      "step": 56108
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0766828060150146,
      "learning_rate": 0.0005164026893440788,
      "loss": 3.0025,
      "step": 56109
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.951698899269104,
      "learning_rate": 0.0005163998562876315,
      "loss": 3.0014,
      "step": 56110
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0518016815185547,
      "learning_rate": 0.0005163970231909514,
      "loss": 3.2188,
      "step": 56111
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.5405778884887695,
      "learning_rate": 0.000516394190054039,
      "loss": 3.3874,
      "step": 56112
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9852509498596191,
      "learning_rate": 0.0005163913568768947,
      "loss": 3.214,
      "step": 56113
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5251946449279785,
      "learning_rate": 0.0005163885236595193,
      "loss": 2.9223,
      "step": 56114
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4127141237258911,
      "learning_rate": 0.0005163856904019132,
      "loss": 3.1285,
      "step": 56115
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.634531855583191,
      "learning_rate": 0.0005163828571040769,
      "loss": 3.3081,
      "step": 56116
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.2929139137268066,
      "learning_rate": 0.0005163800237660108,
      "loss": 2.8292,
      "step": 56117
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8488538265228271,
      "learning_rate": 0.0005163771903877157,
      "loss": 2.9391,
      "step": 56118
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.2137601375579834,
      "learning_rate": 0.000516374356969192,
      "loss": 3.2402,
      "step": 56119
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.6680166721343994,
      "learning_rate": 0.0005163715235104402,
      "loss": 3.0151,
      "step": 56120
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9623841047286987,
      "learning_rate": 0.000516368690011461,
      "loss": 3.0895,
      "step": 56121
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7723040580749512,
      "learning_rate": 0.0005163658564722545,
      "loss": 2.9056,
      "step": 56122
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0701401233673096,
      "learning_rate": 0.0005163630228928216,
      "loss": 3.1248,
      "step": 56123
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.292346239089966,
      "learning_rate": 0.0005163601892731629,
      "loss": 2.8891,
      "step": 56124
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8003520965576172,
      "learning_rate": 0.0005163573556132785,
      "loss": 3.1818,
      "step": 56125
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2948384284973145,
      "learning_rate": 0.0005163545219131694,
      "loss": 2.8024,
      "step": 56126
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4394482374191284,
      "learning_rate": 0.0005163516881728357,
      "loss": 3.1679,
      "step": 56127
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.7428112030029297,
      "learning_rate": 0.0005163488543922783,
      "loss": 2.9666,
      "step": 56128
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3312405347824097,
      "learning_rate": 0.0005163460205714976,
      "loss": 2.9599,
      "step": 56129
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2373257875442505,
      "learning_rate": 0.0005163431867104939,
      "loss": 3.2429,
      "step": 56130
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7750167846679688,
      "learning_rate": 0.000516340352809268,
      "loss": 2.936,
      "step": 56131
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9693341255187988,
      "learning_rate": 0.0005163375188678204,
      "loss": 3.054,
      "step": 56132
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6465917825698853,
      "learning_rate": 0.0005163346848861516,
      "loss": 2.9745,
      "step": 56133
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.540263295173645,
      "learning_rate": 0.000516331850864262,
      "loss": 3.0685,
      "step": 56134
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7964508533477783,
      "learning_rate": 0.0005163290168021523,
      "loss": 3.1204,
      "step": 56135
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3305538892745972,
      "learning_rate": 0.0005163261826998229,
      "loss": 3.0582,
      "step": 56136
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6107743978500366,
      "learning_rate": 0.0005163233485572744,
      "loss": 3.012,
      "step": 56137
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8237708806991577,
      "learning_rate": 0.0005163205143745072,
      "loss": 3.2457,
      "step": 56138
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5619961023330688,
      "learning_rate": 0.0005163176801515221,
      "loss": 2.8558,
      "step": 56139
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.704227328300476,
      "learning_rate": 0.0005163148458883194,
      "loss": 3.0485,
      "step": 56140
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7300047874450684,
      "learning_rate": 0.0005163120115848997,
      "loss": 2.8749,
      "step": 56141
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8117269277572632,
      "learning_rate": 0.0005163091772412634,
      "loss": 3.3587,
      "step": 56142
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5161042213439941,
      "learning_rate": 0.0005163063428574113,
      "loss": 3.0558,
      "step": 56143
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6298718452453613,
      "learning_rate": 0.0005163035084333437,
      "loss": 3.1428,
      "step": 56144
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.487984299659729,
      "learning_rate": 0.0005163006739690613,
      "loss": 3.0731,
      "step": 56145
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0871737003326416,
      "learning_rate": 0.0005162978394645644,
      "loss": 2.9816,
      "step": 56146
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9335386753082275,
      "learning_rate": 0.0005162950049198536,
      "loss": 2.8527,
      "step": 56147
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7783962488174438,
      "learning_rate": 0.0005162921703349295,
      "loss": 2.9095,
      "step": 56148
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5908015966415405,
      "learning_rate": 0.0005162893357097927,
      "loss": 3.1719,
      "step": 56149
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6617119312286377,
      "learning_rate": 0.0005162865010444436,
      "loss": 3.1328,
      "step": 56150
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.117966413497925,
      "learning_rate": 0.0005162836663388827,
      "loss": 2.8419,
      "step": 56151
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.287287950515747,
      "learning_rate": 0.0005162808315931106,
      "loss": 3.1551,
      "step": 56152
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7427279949188232,
      "learning_rate": 0.0005162779968071278,
      "loss": 3.0285,
      "step": 56153
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.629004716873169,
      "learning_rate": 0.0005162751619809349,
      "loss": 2.9614,
      "step": 56154
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.543703079223633,
      "learning_rate": 0.0005162723271145323,
      "loss": 3.0765,
      "step": 56155
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4657098054885864,
      "learning_rate": 0.0005162694922079207,
      "loss": 3.0244,
      "step": 56156
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6798714399337769,
      "learning_rate": 0.0005162666572611004,
      "loss": 2.8547,
      "step": 56157
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.532517910003662,
      "learning_rate": 0.0005162638222740721,
      "loss": 2.935,
      "step": 56158
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4899100065231323,
      "learning_rate": 0.0005162609872468363,
      "loss": 2.8874,
      "step": 56159
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9603114128112793,
      "learning_rate": 0.0005162581521793935,
      "loss": 3.1417,
      "step": 56160
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8907628059387207,
      "learning_rate": 0.0005162553170717442,
      "loss": 2.7377,
      "step": 56161
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6852093935012817,
      "learning_rate": 0.0005162524819238888,
      "loss": 3.055,
      "step": 56162
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6728713512420654,
      "learning_rate": 0.0005162496467358282,
      "loss": 3.0535,
      "step": 56163
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9354342222213745,
      "learning_rate": 0.0005162468115075627,
      "loss": 2.9205,
      "step": 56164
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.579289197921753,
      "learning_rate": 0.0005162439762390927,
      "loss": 3.036,
      "step": 56165
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3796278238296509,
      "learning_rate": 0.0005162411409304189,
      "loss": 3.0658,
      "step": 56166
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8224507570266724,
      "learning_rate": 0.0005162383055815419,
      "loss": 2.9901,
      "step": 56167
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5472904443740845,
      "learning_rate": 0.000516235470192462,
      "loss": 3.2246,
      "step": 56168
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.756327748298645,
      "learning_rate": 0.0005162326347631799,
      "loss": 3.0258,
      "step": 56169
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4028838872909546,
      "learning_rate": 0.000516229799293696,
      "loss": 3.0061,
      "step": 56170
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8337525129318237,
      "learning_rate": 0.000516226963784011,
      "loss": 2.9768,
      "step": 56171
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5899019241333008,
      "learning_rate": 0.0005162241282341253,
      "loss": 3.2152,
      "step": 56172
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8975857496261597,
      "learning_rate": 0.0005162212926440394,
      "loss": 3.3908,
      "step": 56173
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9542995691299438,
      "learning_rate": 0.0005162184570137539,
      "loss": 3.0473,
      "step": 56174
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0293490886688232,
      "learning_rate": 0.0005162156213432694,
      "loss": 2.8227,
      "step": 56175
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7867159843444824,
      "learning_rate": 0.0005162127856325862,
      "loss": 3.2853,
      "step": 56176
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6178456544876099,
      "learning_rate": 0.000516209949881705,
      "loss": 2.9175,
      "step": 56177
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6458996534347534,
      "learning_rate": 0.0005162071140906264,
      "loss": 2.9104,
      "step": 56178
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.035860300064087,
      "learning_rate": 0.0005162042782593506,
      "loss": 2.9791,
      "step": 56179
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.520020842552185,
      "learning_rate": 0.0005162014423878785,
      "loss": 3.0643,
      "step": 56180
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6046966314315796,
      "learning_rate": 0.0005161986064762104,
      "loss": 2.9463,
      "step": 56181
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.761501431465149,
      "learning_rate": 0.0005161957705243469,
      "loss": 3.1438,
      "step": 56182
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5044184923171997,
      "learning_rate": 0.0005161929345322886,
      "loss": 3.1951,
      "step": 56183
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.2226784229278564,
      "learning_rate": 0.000516190098500036,
      "loss": 3.0577,
      "step": 56184
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.720771074295044,
      "learning_rate": 0.0005161872624275894,
      "loss": 2.9109,
      "step": 56185
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7666066884994507,
      "learning_rate": 0.0005161844263149496,
      "loss": 3.1265,
      "step": 56186
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5075771808624268,
      "learning_rate": 0.0005161815901621171,
      "loss": 2.9562,
      "step": 56187
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7086386680603027,
      "learning_rate": 0.0005161787539690922,
      "loss": 2.9825,
      "step": 56188
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5076630115509033,
      "learning_rate": 0.0005161759177358757,
      "loss": 2.9546,
      "step": 56189
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0081934928894043,
      "learning_rate": 0.000516173081462468,
      "loss": 2.8693,
      "step": 56190
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4274210929870605,
      "learning_rate": 0.0005161702451488697,
      "loss": 3.1698,
      "step": 56191
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8018406629562378,
      "learning_rate": 0.0005161674087950811,
      "loss": 3.115,
      "step": 56192
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5723060369491577,
      "learning_rate": 0.000516164572401103,
      "loss": 3.0294,
      "step": 56193
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4306987524032593,
      "learning_rate": 0.000516161735966936,
      "loss": 3.0705,
      "step": 56194
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.414165735244751,
      "learning_rate": 0.0005161588994925803,
      "loss": 3.0263,
      "step": 56195
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5793496370315552,
      "learning_rate": 0.0005161560629780365,
      "loss": 3.332,
      "step": 56196
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6295973062515259,
      "learning_rate": 0.0005161532264233052,
      "loss": 2.9566,
      "step": 56197
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4609004259109497,
      "learning_rate": 0.0005161503898283871,
      "loss": 2.7819,
      "step": 56198
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4538686275482178,
      "learning_rate": 0.0005161475531932825,
      "loss": 2.956,
      "step": 56199
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8775699138641357,
      "learning_rate": 0.0005161447165179919,
      "loss": 3.0059,
      "step": 56200
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5203654766082764,
      "learning_rate": 0.000516141879802516,
      "loss": 3.1372,
      "step": 56201
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2549288272857666,
      "learning_rate": 0.0005161390430468552,
      "loss": 2.959,
      "step": 56202
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5691179037094116,
      "learning_rate": 0.0005161362062510101,
      "loss": 2.9503,
      "step": 56203
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6641044616699219,
      "learning_rate": 0.0005161333694149812,
      "loss": 2.8084,
      "step": 56204
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5717132091522217,
      "learning_rate": 0.000516130532538769,
      "loss": 3.2072,
      "step": 56205
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7123761177062988,
      "learning_rate": 0.0005161276956223742,
      "loss": 3.1075,
      "step": 56206
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6658538579940796,
      "learning_rate": 0.0005161248586657971,
      "loss": 3.0818,
      "step": 56207
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0667786598205566,
      "learning_rate": 0.0005161220216690381,
      "loss": 3.0302,
      "step": 56208
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7415614128112793,
      "learning_rate": 0.0005161191846320982,
      "loss": 2.8181,
      "step": 56209
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9169118404388428,
      "learning_rate": 0.0005161163475549776,
      "loss": 3.0144,
      "step": 56210
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.2058236598968506,
      "learning_rate": 0.0005161135104376768,
      "loss": 2.9987,
      "step": 56211
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.376917839050293,
      "learning_rate": 0.0005161106732801964,
      "loss": 2.9445,
      "step": 56212
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0035176277160645,
      "learning_rate": 0.0005161078360825371,
      "loss": 2.8496,
      "step": 56213
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.1957378387451172,
      "learning_rate": 0.0005161049988446992,
      "loss": 2.9745,
      "step": 56214
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.082002639770508,
      "learning_rate": 0.0005161021615666834,
      "loss": 2.951,
      "step": 56215
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0603487491607666,
      "learning_rate": 0.0005160993242484899,
      "loss": 2.9952,
      "step": 56216
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4466922283172607,
      "learning_rate": 0.0005160964868901197,
      "loss": 3.1306,
      "step": 56217
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.275495767593384,
      "learning_rate": 0.000516093649491573,
      "loss": 3.1264,
      "step": 56218
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.7072460651397705,
      "learning_rate": 0.0005160908120528504,
      "loss": 2.9282,
      "step": 56219
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6185801029205322,
      "learning_rate": 0.0005160879745739524,
      "loss": 3.0472,
      "step": 56220
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9105675220489502,
      "learning_rate": 0.0005160851370548796,
      "loss": 3.2098,
      "step": 56221
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.1744449138641357,
      "learning_rate": 0.0005160822994956325,
      "loss": 3.1918,
      "step": 56222
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.7007007598876953,
      "learning_rate": 0.0005160794618962115,
      "loss": 3.0446,
      "step": 56223
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.74619722366333,
      "learning_rate": 0.0005160766242566174,
      "loss": 2.9468,
      "step": 56224
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.078322172164917,
      "learning_rate": 0.0005160737865768506,
      "loss": 2.7171,
      "step": 56225
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6720964908599854,
      "learning_rate": 0.0005160709488569114,
      "loss": 3.1681,
      "step": 56226
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4341813325881958,
      "learning_rate": 0.0005160681110968008,
      "loss": 2.9624,
      "step": 56227
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.030095100402832,
      "learning_rate": 0.0005160652732965189,
      "loss": 3.0413,
      "step": 56228
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4907280206680298,
      "learning_rate": 0.0005160624354560664,
      "loss": 3.1098,
      "step": 56229
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7298519611358643,
      "learning_rate": 0.0005160595975754438,
      "loss": 3.0537,
      "step": 56230
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3961400985717773,
      "learning_rate": 0.0005160567596546517,
      "loss": 2.9411,
      "step": 56231
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4833422899246216,
      "learning_rate": 0.0005160539216936905,
      "loss": 3.1513,
      "step": 56232
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6637647151947021,
      "learning_rate": 0.0005160510836925608,
      "loss": 2.8714,
      "step": 56233
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.850938320159912,
      "learning_rate": 0.0005160482456512632,
      "loss": 2.7644,
      "step": 56234
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0632565021514893,
      "learning_rate": 0.0005160454075697981,
      "loss": 3.1716,
      "step": 56235
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.283889055252075,
      "learning_rate": 0.0005160425694481661,
      "loss": 3.1495,
      "step": 56236
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.1043710708618164,
      "learning_rate": 0.0005160397312863678,
      "loss": 2.8623,
      "step": 56237
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6943985223770142,
      "learning_rate": 0.0005160368930844035,
      "loss": 3.0023,
      "step": 56238
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3582944869995117,
      "learning_rate": 0.0005160340548422739,
      "loss": 2.8887,
      "step": 56239
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.1957945823669434,
      "learning_rate": 0.0005160312165599795,
      "loss": 3.0119,
      "step": 56240
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4240748882293701,
      "learning_rate": 0.0005160283782375208,
      "loss": 3.0512,
      "step": 56241
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.553580641746521,
      "learning_rate": 0.0005160255398748984,
      "loss": 3.0456,
      "step": 56242
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.408588171005249,
      "learning_rate": 0.0005160227014721127,
      "loss": 3.0957,
      "step": 56243
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3052568435668945,
      "learning_rate": 0.0005160198630291643,
      "loss": 3.0066,
      "step": 56244
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.288972020149231,
      "learning_rate": 0.0005160170245460539,
      "loss": 3.0902,
      "step": 56245
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.527449369430542,
      "learning_rate": 0.0005160141860227816,
      "loss": 3.2531,
      "step": 56246
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3953152894973755,
      "learning_rate": 0.0005160113474593483,
      "loss": 2.8966,
      "step": 56247
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4045233726501465,
      "learning_rate": 0.0005160085088557545,
      "loss": 3.0404,
      "step": 56248
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8533474206924438,
      "learning_rate": 0.0005160056702120005,
      "loss": 3.1431,
      "step": 56249
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.535962700843811,
      "learning_rate": 0.000516002831528087,
      "loss": 3.0446,
      "step": 56250
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7696282863616943,
      "learning_rate": 0.0005159999928040146,
      "loss": 3.1101,
      "step": 56251
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7577788829803467,
      "learning_rate": 0.0005159971540397837,
      "loss": 3.0678,
      "step": 56252
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9315688610076904,
      "learning_rate": 0.0005159943152353947,
      "loss": 2.7174,
      "step": 56253
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4607082605361938,
      "learning_rate": 0.0005159914763908485,
      "loss": 3.0758,
      "step": 56254
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.392063856124878,
      "learning_rate": 0.0005159886375061453,
      "loss": 3.2094,
      "step": 56255
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3722505569458008,
      "learning_rate": 0.0005159857985812857,
      "loss": 3.3067,
      "step": 56256
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8119279146194458,
      "learning_rate": 0.0005159829596162702,
      "loss": 2.981,
      "step": 56257
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.2197723388671875,
      "learning_rate": 0.0005159801206110995,
      "loss": 3.0527,
      "step": 56258
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6519312858581543,
      "learning_rate": 0.000515977281565774,
      "loss": 3.0135,
      "step": 56259
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.462923288345337,
      "learning_rate": 0.0005159744424802943,
      "loss": 3.0592,
      "step": 56260
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5894825458526611,
      "learning_rate": 0.0005159716033546608,
      "loss": 2.9648,
      "step": 56261
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4417922496795654,
      "learning_rate": 0.0005159687641888741,
      "loss": 3.2654,
      "step": 56262
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5318406820297241,
      "learning_rate": 0.0005159659249829348,
      "loss": 3.0082,
      "step": 56263
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.583374261856079,
      "learning_rate": 0.0005159630857368432,
      "loss": 3.3268,
      "step": 56264
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5858478546142578,
      "learning_rate": 0.0005159602464506003,
      "loss": 3.3832,
      "step": 56265
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8250941038131714,
      "learning_rate": 0.000515957407124206,
      "loss": 2.9467,
      "step": 56266
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7628947496414185,
      "learning_rate": 0.0005159545677576613,
      "loss": 3.2146,
      "step": 56267
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.581984043121338,
      "learning_rate": 0.0005159517283509665,
      "loss": 3.0968,
      "step": 56268
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8863024711608887,
      "learning_rate": 0.0005159488889041222,
      "loss": 3.0327,
      "step": 56269
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.397872805595398,
      "learning_rate": 0.000515946049417129,
      "loss": 2.9787,
      "step": 56270
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5965641736984253,
      "learning_rate": 0.0005159432098899873,
      "loss": 3.1666,
      "step": 56271
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6410741806030273,
      "learning_rate": 0.0005159403703226976,
      "loss": 3.0485,
      "step": 56272
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6790333986282349,
      "learning_rate": 0.0005159375307152605,
      "loss": 2.9378,
      "step": 56273
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.4883618354797363,
      "learning_rate": 0.0005159346910676767,
      "loss": 3.0865,
      "step": 56274
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9335590600967407,
      "learning_rate": 0.0005159318513799465,
      "loss": 3.033,
      "step": 56275
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.588417887687683,
      "learning_rate": 0.0005159290116520704,
      "loss": 3.2177,
      "step": 56276
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.577829122543335,
      "learning_rate": 0.0005159261718840492,
      "loss": 2.8629,
      "step": 56277
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.605240821838379,
      "learning_rate": 0.0005159233320758831,
      "loss": 3.0722,
      "step": 56278
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.420194149017334,
      "learning_rate": 0.0005159204922275728,
      "loss": 3.0305,
      "step": 56279
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3614308834075928,
      "learning_rate": 0.0005159176523391189,
      "loss": 3.1348,
      "step": 56280
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.56202232837677,
      "learning_rate": 0.0005159148124105218,
      "loss": 2.9939,
      "step": 56281
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.233861207962036,
      "learning_rate": 0.000515911972441782,
      "loss": 2.9268,
      "step": 56282
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6137981414794922,
      "learning_rate": 0.0005159091324329001,
      "loss": 2.7785,
      "step": 56283
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2986811399459839,
      "learning_rate": 0.0005159062923838767,
      "loss": 2.9364,
      "step": 56284
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.4084174633026123,
      "learning_rate": 0.0005159034522947122,
      "loss": 2.5624,
      "step": 56285
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.720940351486206,
      "learning_rate": 0.000515900612165407,
      "loss": 3.3124,
      "step": 56286
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8890666961669922,
      "learning_rate": 0.0005158977719959619,
      "loss": 3.2098,
      "step": 56287
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5947272777557373,
      "learning_rate": 0.0005158949317863775,
      "loss": 3.3951,
      "step": 56288
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6835440397262573,
      "learning_rate": 0.000515892091536654,
      "loss": 3.0927,
      "step": 56289
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.867012858390808,
      "learning_rate": 0.0005158892512467921,
      "loss": 3.2533,
      "step": 56290
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.74807608127594,
      "learning_rate": 0.0005158864109167924,
      "loss": 3.0322,
      "step": 56291
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.792746067047119,
      "learning_rate": 0.0005158835705466554,
      "loss": 2.9311,
      "step": 56292
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.1126365661621094,
      "learning_rate": 0.0005158807301363813,
      "loss": 2.9202,
      "step": 56293
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6585508584976196,
      "learning_rate": 0.000515877889685971,
      "loss": 3.0256,
      "step": 56294
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.554967164993286,
      "learning_rate": 0.000515875049195425,
      "loss": 2.9121,
      "step": 56295
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.1162681579589844,
      "learning_rate": 0.0005158722086647437,
      "loss": 3.3159,
      "step": 56296
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7725155353546143,
      "learning_rate": 0.0005158693680939277,
      "loss": 3.1941,
      "step": 56297
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7709983587265015,
      "learning_rate": 0.0005158665274829776,
      "loss": 2.9537,
      "step": 56298
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6008245944976807,
      "learning_rate": 0.0005158636868318936,
      "loss": 3.1018,
      "step": 56299
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2790473699569702,
      "learning_rate": 0.0005158608461406768,
      "loss": 3.2728,
      "step": 56300
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4971743822097778,
      "learning_rate": 0.0005158580054093272,
      "loss": 2.895,
      "step": 56301
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4114896059036255,
      "learning_rate": 0.0005158551646378455,
      "loss": 3.0192,
      "step": 56302
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8143185377120972,
      "learning_rate": 0.0005158523238262322,
      "loss": 2.8653,
      "step": 56303
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8245657682418823,
      "learning_rate": 0.000515849482974488,
      "loss": 3.2255,
      "step": 56304
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3687466382980347,
      "learning_rate": 0.0005158466420826132,
      "loss": 2.7045,
      "step": 56305
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5203887224197388,
      "learning_rate": 0.0005158438011506086,
      "loss": 2.8122,
      "step": 56306
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.438674807548523,
      "learning_rate": 0.0005158409601784743,
      "loss": 3.0173,
      "step": 56307
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5072166919708252,
      "learning_rate": 0.0005158381191662114,
      "loss": 2.9666,
      "step": 56308
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6759568452835083,
      "learning_rate": 0.0005158352781138199,
      "loss": 3.1335,
      "step": 56309
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.615348219871521,
      "learning_rate": 0.0005158324370213007,
      "loss": 2.6838,
      "step": 56310
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3719490766525269,
      "learning_rate": 0.0005158295958886542,
      "loss": 3.169,
      "step": 56311
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7128946781158447,
      "learning_rate": 0.0005158267547158807,
      "loss": 2.9406,
      "step": 56312
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7026087045669556,
      "learning_rate": 0.0005158239135029809,
      "loss": 3.1549,
      "step": 56313
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3630434274673462,
      "learning_rate": 0.0005158210722499556,
      "loss": 3.0807,
      "step": 56314
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3431851863861084,
      "learning_rate": 0.0005158182309568049,
      "loss": 3.0163,
      "step": 56315
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6367383003234863,
      "learning_rate": 0.0005158153896235297,
      "loss": 3.0483,
      "step": 56316
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3540911674499512,
      "learning_rate": 0.0005158125482501302,
      "loss": 3.2158,
      "step": 56317
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.516825556755066,
      "learning_rate": 0.0005158097068366071,
      "loss": 3.1761,
      "step": 56318
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5398738384246826,
      "learning_rate": 0.000515806865382961,
      "loss": 3.2473,
      "step": 56319
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4398800134658813,
      "learning_rate": 0.0005158040238891922,
      "loss": 2.9639,
      "step": 56320
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4782627820968628,
      "learning_rate": 0.0005158011823553014,
      "loss": 3.16,
      "step": 56321
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7707645893096924,
      "learning_rate": 0.0005157983407812891,
      "loss": 2.9261,
      "step": 56322
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6220098733901978,
      "learning_rate": 0.0005157954991671558,
      "loss": 2.9401,
      "step": 56323
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.562631607055664,
      "learning_rate": 0.0005157926575129021,
      "loss": 2.9555,
      "step": 56324
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5723131895065308,
      "learning_rate": 0.0005157898158185284,
      "loss": 3.0709,
      "step": 56325
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8892072439193726,
      "learning_rate": 0.0005157869740840353,
      "loss": 2.9625,
      "step": 56326
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.004924774169922,
      "learning_rate": 0.0005157841323094233,
      "loss": 3.0985,
      "step": 56327
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3873370885849,
      "learning_rate": 0.0005157812904946931,
      "loss": 2.9901,
      "step": 56328
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.868086576461792,
      "learning_rate": 0.0005157784486398449,
      "loss": 2.8546,
      "step": 56329
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6214213371276855,
      "learning_rate": 0.0005157756067448795,
      "loss": 3.147,
      "step": 56330
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6987736225128174,
      "learning_rate": 0.0005157727648097974,
      "loss": 3.0471,
      "step": 56331
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.084401845932007,
      "learning_rate": 0.0005157699228345991,
      "loss": 3.0866,
      "step": 56332
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8778468370437622,
      "learning_rate": 0.000515767080819285,
      "loss": 2.9737,
      "step": 56333
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3903815746307373,
      "learning_rate": 0.0005157642387638557,
      "loss": 3.0036,
      "step": 56334
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8526607751846313,
      "learning_rate": 0.0005157613966683118,
      "loss": 3.1735,
      "step": 56335
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4064369201660156,
      "learning_rate": 0.0005157585545326538,
      "loss": 3.2009,
      "step": 56336
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4374592304229736,
      "learning_rate": 0.0005157557123568822,
      "loss": 3.1337,
      "step": 56337
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3108854293823242,
      "learning_rate": 0.0005157528701409975,
      "loss": 2.901,
      "step": 56338
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.036942481994629,
      "learning_rate": 0.0005157500278850002,
      "loss": 3.0181,
      "step": 56339
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3563485145568848,
      "learning_rate": 0.0005157471855888911,
      "loss": 2.8745,
      "step": 56340
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.362900733947754,
      "learning_rate": 0.0005157443432526705,
      "loss": 2.9512,
      "step": 56341
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.485351085662842,
      "learning_rate": 0.0005157415008763389,
      "loss": 2.99,
      "step": 56342
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5438555479049683,
      "learning_rate": 0.0005157386584598968,
      "loss": 3.065,
      "step": 56343
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5126582384109497,
      "learning_rate": 0.0005157358160033449,
      "loss": 3.0989,
      "step": 56344
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7981775999069214,
      "learning_rate": 0.0005157329735066836,
      "loss": 3.1374,
      "step": 56345
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5355130434036255,
      "learning_rate": 0.0005157301309699135,
      "loss": 3.1086,
      "step": 56346
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5324684381484985,
      "learning_rate": 0.000515727288393035,
      "loss": 2.9427,
      "step": 56347
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9055991172790527,
      "learning_rate": 0.0005157244457760488,
      "loss": 3.1387,
      "step": 56348
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4801695346832275,
      "learning_rate": 0.0005157216031189554,
      "loss": 3.1912,
      "step": 56349
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7913706302642822,
      "learning_rate": 0.0005157187604217553,
      "loss": 3.2186,
      "step": 56350
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.066776990890503,
      "learning_rate": 0.0005157159176844489,
      "loss": 2.6168,
      "step": 56351
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.90535569190979,
      "learning_rate": 0.000515713074907037,
      "loss": 3.1596,
      "step": 56352
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5623948574066162,
      "learning_rate": 0.0005157102320895198,
      "loss": 3.2414,
      "step": 56353
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0242319107055664,
      "learning_rate": 0.000515707389231898,
      "loss": 3.1896,
      "step": 56354
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.344915747642517,
      "learning_rate": 0.0005157045463341722,
      "loss": 2.9854,
      "step": 56355
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5748348236083984,
      "learning_rate": 0.0005157017033963428,
      "loss": 3.0089,
      "step": 56356
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8542535305023193,
      "learning_rate": 0.0005156988604184104,
      "loss": 2.9338,
      "step": 56357
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4004948139190674,
      "learning_rate": 0.0005156960174003756,
      "loss": 2.779,
      "step": 56358
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7352895736694336,
      "learning_rate": 0.0005156931743422387,
      "loss": 2.9335,
      "step": 56359
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8244000673294067,
      "learning_rate": 0.0005156903312440005,
      "loss": 3.0445,
      "step": 56360
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.351492404937744,
      "learning_rate": 0.0005156874881056613,
      "loss": 2.9745,
      "step": 56361
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5225682258605957,
      "learning_rate": 0.0005156846449272217,
      "loss": 3.0474,
      "step": 56362
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3324356079101562,
      "learning_rate": 0.0005156818017086823,
      "loss": 3.161,
      "step": 56363
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4218112230300903,
      "learning_rate": 0.0005156789584500436,
      "loss": 2.9879,
      "step": 56364
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.3545050621032715,
      "learning_rate": 0.0005156761151513061,
      "loss": 2.9688,
      "step": 56365
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3366674184799194,
      "learning_rate": 0.0005156732718124702,
      "loss": 3.074,
      "step": 56366
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3794220685958862,
      "learning_rate": 0.0005156704284335368,
      "loss": 3.0199,
      "step": 56367
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.497135877609253,
      "learning_rate": 0.0005156675850145062,
      "loss": 2.7571,
      "step": 56368
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.240882635116577,
      "learning_rate": 0.0005156647415553787,
      "loss": 3.1344,
      "step": 56369
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2595887184143066,
      "learning_rate": 0.0005156618980561552,
      "loss": 3.2484,
      "step": 56370
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3924905061721802,
      "learning_rate": 0.000515659054516836,
      "loss": 2.9924,
      "step": 56371
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.441107988357544,
      "learning_rate": 0.0005156562109374218,
      "loss": 2.9528,
      "step": 56372
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.046699047088623,
      "learning_rate": 0.000515653367317913,
      "loss": 3.0343,
      "step": 56373
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8040128946304321,
      "learning_rate": 0.0005156505236583102,
      "loss": 3.0127,
      "step": 56374
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7562998533248901,
      "learning_rate": 0.0005156476799586139,
      "loss": 2.9613,
      "step": 56375
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.358388900756836,
      "learning_rate": 0.0005156448362188247,
      "loss": 3.333,
      "step": 56376
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4677358865737915,
      "learning_rate": 0.0005156419924389428,
      "loss": 2.8921,
      "step": 56377
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4739289283752441,
      "learning_rate": 0.0005156391486189692,
      "loss": 2.8834,
      "step": 56378
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5599815845489502,
      "learning_rate": 0.0005156363047589042,
      "loss": 2.956,
      "step": 56379
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4813685417175293,
      "learning_rate": 0.0005156334608587482,
      "loss": 3.1898,
      "step": 56380
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.892443299293518,
      "learning_rate": 0.000515630616918502,
      "loss": 3.2438,
      "step": 56381
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.2943480014801025,
      "learning_rate": 0.000515627772938166,
      "loss": 3.0483,
      "step": 56382
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4140042066574097,
      "learning_rate": 0.0005156249289177406,
      "loss": 3.2827,
      "step": 56383
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6686805486679077,
      "learning_rate": 0.0005156220848572265,
      "loss": 2.8946,
      "step": 56384
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0366852283477783,
      "learning_rate": 0.0005156192407566243,
      "loss": 3.1197,
      "step": 56385
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0200812816619873,
      "learning_rate": 0.0005156163966159342,
      "loss": 3.066,
      "step": 56386
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5999932289123535,
      "learning_rate": 0.0005156135524351572,
      "loss": 3.1259,
      "step": 56387
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.408463716506958,
      "learning_rate": 0.0005156107082142934,
      "loss": 3.1066,
      "step": 56388
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0471746921539307,
      "learning_rate": 0.0005156078639533436,
      "loss": 2.8988,
      "step": 56389
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6772366762161255,
      "learning_rate": 0.0005156050196523081,
      "loss": 3.0417,
      "step": 56390
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0081286430358887,
      "learning_rate": 0.0005156021753111878,
      "loss": 2.9819,
      "step": 56391
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.2310166358947754,
      "learning_rate": 0.0005155993309299827,
      "loss": 3.1422,
      "step": 56392
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5335118770599365,
      "learning_rate": 0.0005155964865086938,
      "loss": 3.0175,
      "step": 56393
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.3408620357513428,
      "learning_rate": 0.0005155936420473213,
      "loss": 3.1177,
      "step": 56394
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3487039804458618,
      "learning_rate": 0.000515590797545866,
      "loss": 2.9557,
      "step": 56395
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4357514381408691,
      "learning_rate": 0.0005155879530043282,
      "loss": 2.9684,
      "step": 56396
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.526777982711792,
      "learning_rate": 0.0005155851084227087,
      "loss": 2.9824,
      "step": 56397
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.589861273765564,
      "learning_rate": 0.0005155822638010076,
      "loss": 2.838,
      "step": 56398
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7031162977218628,
      "learning_rate": 0.0005155794191392259,
      "loss": 3.0061,
      "step": 56399
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.2050318717956543,
      "learning_rate": 0.0005155765744373638,
      "loss": 2.9381,
      "step": 56400
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.656606674194336,
      "learning_rate": 0.000515573729695422,
      "loss": 3.2279,
      "step": 56401
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.595680832862854,
      "learning_rate": 0.0005155708849134009,
      "loss": 3.0659,
      "step": 56402
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.6974263191223145,
      "learning_rate": 0.0005155680400913011,
      "loss": 2.8881,
      "step": 56403
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9684077501296997,
      "learning_rate": 0.0005155651952291233,
      "loss": 2.9274,
      "step": 56404
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0778591632843018,
      "learning_rate": 0.0005155623503268677,
      "loss": 2.9167,
      "step": 56405
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.260362386703491,
      "learning_rate": 0.0005155595053845351,
      "loss": 3.0113,
      "step": 56406
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.6917965412139893,
      "learning_rate": 0.0005155566604021258,
      "loss": 3.0223,
      "step": 56407
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4343687295913696,
      "learning_rate": 0.0005155538153796406,
      "loss": 3.2461,
      "step": 56408
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8277541399002075,
      "learning_rate": 0.0005155509703170798,
      "loss": 3.0468,
      "step": 56409
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.3124876022338867,
      "learning_rate": 0.000515548125214444,
      "loss": 2.9556,
      "step": 56410
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9108572006225586,
      "learning_rate": 0.0005155452800717337,
      "loss": 2.8573,
      "step": 56411
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4345356225967407,
      "learning_rate": 0.0005155424348889494,
      "loss": 3.031,
      "step": 56412
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.511549949645996,
      "learning_rate": 0.0005155395896660919,
      "loss": 3.0437,
      "step": 56413
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6908870935440063,
      "learning_rate": 0.0005155367444031614,
      "loss": 2.8936,
      "step": 56414
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.826018214225769,
      "learning_rate": 0.0005155338991001586,
      "loss": 3.0764,
      "step": 56415
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.228888988494873,
      "learning_rate": 0.0005155310537570838,
      "loss": 3.0467,
      "step": 56416
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0776140689849854,
      "learning_rate": 0.0005155282083739379,
      "loss": 2.9135,
      "step": 56417
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4114657640457153,
      "learning_rate": 0.0005155253629507212,
      "loss": 3.0864,
      "step": 56418
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5552115440368652,
      "learning_rate": 0.0005155225174874343,
      "loss": 2.9481,
      "step": 56419
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0927696228027344,
      "learning_rate": 0.0005155196719840776,
      "loss": 2.8882,
      "step": 56420
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6231658458709717,
      "learning_rate": 0.0005155168264406519,
      "loss": 3.0546,
      "step": 56421
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.536633014678955,
      "learning_rate": 0.0005155139808571574,
      "loss": 2.9226,
      "step": 56422
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9375014305114746,
      "learning_rate": 0.0005155111352335947,
      "loss": 3.0371,
      "step": 56423
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.495461106300354,
      "learning_rate": 0.0005155082895699645,
      "loss": 3.1189,
      "step": 56424
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.0398013591766357,
      "learning_rate": 0.0005155054438662673,
      "loss": 2.8583,
      "step": 56425
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.3174850940704346,
      "learning_rate": 0.0005155025981225036,
      "loss": 2.9245,
      "step": 56426
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.9064254760742188,
      "learning_rate": 0.0005154997523386738,
      "loss": 2.9509,
      "step": 56427
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.4490556716918945,
      "learning_rate": 0.0005154969065147787,
      "loss": 2.972,
      "step": 56428
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5551490783691406,
      "learning_rate": 0.0005154940606508184,
      "loss": 3.0167,
      "step": 56429
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.1978249549865723,
      "learning_rate": 0.0005154912147467938,
      "loss": 3.1732,
      "step": 56430
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.7863801717758179,
      "learning_rate": 0.0005154883688027053,
      "loss": 3.1332,
      "step": 56431
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.5725399255752563,
      "learning_rate": 0.0005154855228185535,
      "loss": 2.766,
      "step": 56432
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.3262383937835693,
      "learning_rate": 0.0005154826767943388,
      "loss": 2.9742,
      "step": 56433
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.8970744609832764,
      "learning_rate": 0.0005154798307300619,
      "loss": 2.882,
      "step": 56434
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8518556356430054,
      "learning_rate": 0.0005154769846257231,
      "loss": 2.99,
      "step": 56435
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.8980650901794434,
      "learning_rate": 0.0005154741384813231,
      "loss": 3.1865,
      "step": 56436
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.1625614166259766,
      "learning_rate": 0.0005154712922968625,
      "loss": 2.9902,
      "step": 56437
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.44442081451416,
      "learning_rate": 0.0005154684460723415,
      "loss": 3.1225,
      "step": 56438
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.900614857673645,
      "learning_rate": 0.0005154655998077612,
      "loss": 3.1765,
      "step": 56439
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.452769160270691,
      "learning_rate": 0.0005154627535031215,
      "loss": 2.8242,
      "step": 56440
    },
    {
      "epoch": 0.73,
      "grad_norm": 3.294145107269287,
      "learning_rate": 0.0005154599071584233,
      "loss": 3.2833,
      "step": 56441
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.214962959289551,
      "learning_rate": 0.000515457060773667,
      "loss": 3.2297,
      "step": 56442
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.930345058441162,
      "learning_rate": 0.0005154542143488532,
      "loss": 2.9793,
      "step": 56443
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.45115065574646,
      "learning_rate": 0.0005154513678839824,
      "loss": 3.2542,
      "step": 56444
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9529001712799072,
      "learning_rate": 0.000515448521379055,
      "loss": 3.2258,
      "step": 56445
    },
    {
      "epoch": 0.73,
      "grad_norm": 2.5900118350982666,
      "learning_rate": 0.0005154456748340718,
      "loss": 3.0084,
      "step": 56446
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.9403800964355469,
      "learning_rate": 0.0005154428282490331,
      "loss": 2.9813,
      "step": 56447
    },
    {
      "epoch": 0.73,
      "grad_norm": 1.6681519746780396,
      "learning_rate": 0.0005154399816239395,
      "loss": 2.8865,
      "step": 56448
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.388444423675537,
      "learning_rate": 0.0005154371349587916,
      "loss": 3.1414,
      "step": 56449
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.1414108276367188,
      "learning_rate": 0.0005154342882535899,
      "loss": 3.1166,
      "step": 56450
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6604136228561401,
      "learning_rate": 0.0005154314415083349,
      "loss": 3.0195,
      "step": 56451
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9282721281051636,
      "learning_rate": 0.000515428594723027,
      "loss": 2.8402,
      "step": 56452
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.113507032394409,
      "learning_rate": 0.000515425747897667,
      "loss": 3.2091,
      "step": 56453
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.074608564376831,
      "learning_rate": 0.0005154229010322552,
      "loss": 3.1051,
      "step": 56454
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8457319736480713,
      "learning_rate": 0.0005154200541267923,
      "loss": 3.1528,
      "step": 56455
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5470260381698608,
      "learning_rate": 0.0005154172071812785,
      "loss": 3.0511,
      "step": 56456
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.116694450378418,
      "learning_rate": 0.0005154143601957149,
      "loss": 3.0116,
      "step": 56457
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.1869876384735107,
      "learning_rate": 0.0005154115131701016,
      "loss": 3.1867,
      "step": 56458
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8241732120513916,
      "learning_rate": 0.0005154086661044392,
      "loss": 2.9768,
      "step": 56459
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8522878885269165,
      "learning_rate": 0.0005154058189987282,
      "loss": 3.052,
      "step": 56460
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.5556440353393555,
      "learning_rate": 0.0005154029718529693,
      "loss": 3.0623,
      "step": 56461
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5705221891403198,
      "learning_rate": 0.0005154001246671628,
      "loss": 2.9445,
      "step": 56462
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6094340085983276,
      "learning_rate": 0.0005153972774413094,
      "loss": 3.334,
      "step": 56463
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.437838554382324,
      "learning_rate": 0.0005153944301754097,
      "loss": 2.9824,
      "step": 56464
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7155247926712036,
      "learning_rate": 0.0005153915828694641,
      "loss": 3.1246,
      "step": 56465
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6378090381622314,
      "learning_rate": 0.0005153887355234729,
      "loss": 3.1039,
      "step": 56466
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5810818672180176,
      "learning_rate": 0.000515385888137437,
      "loss": 2.8962,
      "step": 56467
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2395999431610107,
      "learning_rate": 0.0005153830407113569,
      "loss": 3.0097,
      "step": 56468
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3458534479141235,
      "learning_rate": 0.0005153801932452329,
      "loss": 3.3029,
      "step": 56469
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7520744800567627,
      "learning_rate": 0.0005153773457390658,
      "loss": 3.2031,
      "step": 56470
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2024688720703125,
      "learning_rate": 0.0005153744981928559,
      "loss": 2.8909,
      "step": 56471
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6422874927520752,
      "learning_rate": 0.0005153716506066038,
      "loss": 3.0586,
      "step": 56472
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4058375358581543,
      "learning_rate": 0.00051536880298031,
      "loss": 3.1556,
      "step": 56473
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.1491098403930664,
      "learning_rate": 0.0005153659553139751,
      "loss": 2.9829,
      "step": 56474
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8248611688613892,
      "learning_rate": 0.0005153631076075997,
      "loss": 3.0228,
      "step": 56475
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6744133234024048,
      "learning_rate": 0.0005153602598611841,
      "loss": 3.0009,
      "step": 56476
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.4846584796905518,
      "learning_rate": 0.0005153574120747291,
      "loss": 3.0816,
      "step": 56477
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.430433511734009,
      "learning_rate": 0.0005153545642482349,
      "loss": 2.8436,
      "step": 56478
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.593698501586914,
      "learning_rate": 0.0005153517163817024,
      "loss": 2.9875,
      "step": 56479
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7964290380477905,
      "learning_rate": 0.000515348868475132,
      "loss": 2.8922,
      "step": 56480
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3117589950561523,
      "learning_rate": 0.000515346020528524,
      "loss": 2.8356,
      "step": 56481
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8771865367889404,
      "learning_rate": 0.0005153431725418791,
      "loss": 3.1255,
      "step": 56482
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3722869157791138,
      "learning_rate": 0.000515340324515198,
      "loss": 2.8741,
      "step": 56483
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8290801048278809,
      "learning_rate": 0.000515337476448481,
      "loss": 3.0377,
      "step": 56484
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6595816612243652,
      "learning_rate": 0.0005153346283417287,
      "loss": 3.1822,
      "step": 56485
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.845556616783142,
      "learning_rate": 0.0005153317801949416,
      "loss": 2.8895,
      "step": 56486
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.454017162322998,
      "learning_rate": 0.0005153289320081203,
      "loss": 2.973,
      "step": 56487
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.790778398513794,
      "learning_rate": 0.0005153260837812652,
      "loss": 3.1977,
      "step": 56488
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.137843608856201,
      "learning_rate": 0.000515323235514377,
      "loss": 3.0902,
      "step": 56489
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.459054946899414,
      "learning_rate": 0.0005153203872074561,
      "loss": 3.0245,
      "step": 56490
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6725921630859375,
      "learning_rate": 0.000515317538860503,
      "loss": 3.0174,
      "step": 56491
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3410959243774414,
      "learning_rate": 0.0005153146904735185,
      "loss": 2.9944,
      "step": 56492
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5155367851257324,
      "learning_rate": 0.0005153118420465028,
      "loss": 3.3817,
      "step": 56493
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.481048345565796,
      "learning_rate": 0.0005153089935794567,
      "loss": 2.9874,
      "step": 56494
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4750641584396362,
      "learning_rate": 0.0005153061450723804,
      "loss": 2.9529,
      "step": 56495
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4293577671051025,
      "learning_rate": 0.0005153032965252748,
      "loss": 3.0527,
      "step": 56496
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8889429569244385,
      "learning_rate": 0.0005153004479381401,
      "loss": 3.2282,
      "step": 56497
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6327093839645386,
      "learning_rate": 0.0005152975993109771,
      "loss": 2.9281,
      "step": 56498
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9781149625778198,
      "learning_rate": 0.0005152947506437861,
      "loss": 2.9588,
      "step": 56499
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7003992795944214,
      "learning_rate": 0.0005152919019365678,
      "loss": 3.0055,
      "step": 56500
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.898554801940918,
      "learning_rate": 0.0005152890531893227,
      "loss": 2.8613,
      "step": 56501
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7103509902954102,
      "learning_rate": 0.0005152862044020513,
      "loss": 2.8112,
      "step": 56502
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2066433429718018,
      "learning_rate": 0.0005152833555747541,
      "loss": 2.9313,
      "step": 56503
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.622246503829956,
      "learning_rate": 0.0005152805067074317,
      "loss": 3.2285,
      "step": 56504
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4667816162109375,
      "learning_rate": 0.0005152776578000846,
      "loss": 2.9613,
      "step": 56505
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6744929552078247,
      "learning_rate": 0.0005152748088527132,
      "loss": 3.2759,
      "step": 56506
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2046000957489014,
      "learning_rate": 0.0005152719598653183,
      "loss": 2.9962,
      "step": 56507
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4116986989974976,
      "learning_rate": 0.0005152691108379001,
      "loss": 3.0378,
      "step": 56508
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9197585582733154,
      "learning_rate": 0.0005152662617704595,
      "loss": 3.2133,
      "step": 56509
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.299759864807129,
      "learning_rate": 0.0005152634126629967,
      "loss": 2.7167,
      "step": 56510
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5471690893173218,
      "learning_rate": 0.0005152605635155125,
      "loss": 3.1239,
      "step": 56511
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0937507152557373,
      "learning_rate": 0.0005152577143280072,
      "loss": 3.063,
      "step": 56512
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3659709692001343,
      "learning_rate": 0.0005152548651004814,
      "loss": 3.2706,
      "step": 56513
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6688200235366821,
      "learning_rate": 0.0005152520158329358,
      "loss": 3.0123,
      "step": 56514
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9345203638076782,
      "learning_rate": 0.0005152491665253706,
      "loss": 3.2307,
      "step": 56515
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.1231069564819336,
      "learning_rate": 0.0005152463171777867,
      "loss": 3.1742,
      "step": 56516
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.469162106513977,
      "learning_rate": 0.0005152434677901843,
      "loss": 2.9593,
      "step": 56517
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.984929084777832,
      "learning_rate": 0.0005152406183625642,
      "loss": 3.1125,
      "step": 56518
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.5084517002105713,
      "learning_rate": 0.0005152377688949268,
      "loss": 2.8017,
      "step": 56519
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.440205454826355,
      "learning_rate": 0.0005152349193872724,
      "loss": 3.052,
      "step": 56520
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3608430624008179,
      "learning_rate": 0.000515232069839602,
      "loss": 2.9642,
      "step": 56521
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6616441011428833,
      "learning_rate": 0.0005152292202519159,
      "loss": 3.0309,
      "step": 56522
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8002270460128784,
      "learning_rate": 0.0005152263706242145,
      "loss": 3.2235,
      "step": 56523
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.485775113105774,
      "learning_rate": 0.0005152235209564985,
      "loss": 3.1705,
      "step": 56524
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5555397272109985,
      "learning_rate": 0.0005152206712487684,
      "loss": 3.031,
      "step": 56525
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.473644733428955,
      "learning_rate": 0.0005152178215010248,
      "loss": 3.0444,
      "step": 56526
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.387353539466858,
      "learning_rate": 0.0005152149717132681,
      "loss": 3.0379,
      "step": 56527
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.444395661354065,
      "learning_rate": 0.0005152121218854988,
      "loss": 2.9722,
      "step": 56528
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7604776620864868,
      "learning_rate": 0.0005152092720177175,
      "loss": 3.1322,
      "step": 56529
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.467076063156128,
      "learning_rate": 0.0005152064221099247,
      "loss": 2.827,
      "step": 56530
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7045780420303345,
      "learning_rate": 0.0005152035721621212,
      "loss": 2.9441,
      "step": 56531
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3714475631713867,
      "learning_rate": 0.000515200722174307,
      "loss": 3.0071,
      "step": 56532
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5512199401855469,
      "learning_rate": 0.0005151978721464831,
      "loss": 3.1186,
      "step": 56533
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7635666131973267,
      "learning_rate": 0.0005151950220786498,
      "loss": 2.9144,
      "step": 56534
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7621339559555054,
      "learning_rate": 0.0005151921719708076,
      "loss": 3.0299,
      "step": 56535
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7935218811035156,
      "learning_rate": 0.0005151893218229573,
      "loss": 3.103,
      "step": 56536
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5689308643341064,
      "learning_rate": 0.0005151864716350991,
      "loss": 2.9978,
      "step": 56537
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.7510924339294434,
      "learning_rate": 0.0005151836214072338,
      "loss": 2.8944,
      "step": 56538
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.261550188064575,
      "learning_rate": 0.0005151807711393616,
      "loss": 3.2823,
      "step": 56539
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7448457479476929,
      "learning_rate": 0.0005151779208314835,
      "loss": 3.185,
      "step": 56540
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.150087356567383,
      "learning_rate": 0.0005151750704835995,
      "loss": 2.9189,
      "step": 56541
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3125386238098145,
      "learning_rate": 0.0005151722200957105,
      "loss": 2.8454,
      "step": 56542
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7544946670532227,
      "learning_rate": 0.000515169369667817,
      "loss": 2.8372,
      "step": 56543
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5627236366271973,
      "learning_rate": 0.0005151665191999193,
      "loss": 2.925,
      "step": 56544
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.062197685241699,
      "learning_rate": 0.0005151636686920182,
      "loss": 3.1791,
      "step": 56545
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.997440814971924,
      "learning_rate": 0.0005151608181441141,
      "loss": 2.9976,
      "step": 56546
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8817336559295654,
      "learning_rate": 0.0005151579675562075,
      "loss": 3.227,
      "step": 56547
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8922260999679565,
      "learning_rate": 0.0005151551169282991,
      "loss": 3.1607,
      "step": 56548
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.279118776321411,
      "learning_rate": 0.0005151522662603891,
      "loss": 2.8203,
      "step": 56549
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7013154029846191,
      "learning_rate": 0.0005151494155524783,
      "loss": 3.1576,
      "step": 56550
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4964131116867065,
      "learning_rate": 0.0005151465648045672,
      "loss": 3.0621,
      "step": 56551
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.300666093826294,
      "learning_rate": 0.0005151437140166564,
      "loss": 2.9877,
      "step": 56552
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3196324110031128,
      "learning_rate": 0.0005151408631887462,
      "loss": 2.8367,
      "step": 56553
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6085301637649536,
      "learning_rate": 0.0005151380123208372,
      "loss": 3.0328,
      "step": 56554
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2455039024353027,
      "learning_rate": 0.00051513516141293,
      "loss": 3.2382,
      "step": 56555
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3945049047470093,
      "learning_rate": 0.0005151323104650252,
      "loss": 3.1936,
      "step": 56556
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6827627420425415,
      "learning_rate": 0.0005151294594771231,
      "loss": 3.1044,
      "step": 56557
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.348745584487915,
      "learning_rate": 0.0005151266084492246,
      "loss": 2.939,
      "step": 56558
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3982481956481934,
      "learning_rate": 0.0005151237573813299,
      "loss": 2.9542,
      "step": 56559
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.46763277053833,
      "learning_rate": 0.0005151209062734396,
      "loss": 3.0379,
      "step": 56560
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6270581483840942,
      "learning_rate": 0.0005151180551255543,
      "loss": 2.9421,
      "step": 56561
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5433698892593384,
      "learning_rate": 0.0005151152039376744,
      "loss": 3.0667,
      "step": 56562
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.844067096710205,
      "learning_rate": 0.0005151123527098006,
      "loss": 2.9011,
      "step": 56563
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9445209503173828,
      "learning_rate": 0.0005151095014419334,
      "loss": 3.1962,
      "step": 56564
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7305176258087158,
      "learning_rate": 0.0005151066501340731,
      "loss": 3.0381,
      "step": 56565
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.632465124130249,
      "learning_rate": 0.0005151037987862206,
      "loss": 3.2044,
      "step": 56566
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.5431275367736816,
      "learning_rate": 0.0005151009473983761,
      "loss": 3.0248,
      "step": 56567
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6770273447036743,
      "learning_rate": 0.0005150980959705404,
      "loss": 3.0419,
      "step": 56568
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.457491159439087,
      "learning_rate": 0.0005150952445027139,
      "loss": 2.97,
      "step": 56569
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.803762197494507,
      "learning_rate": 0.0005150923929948971,
      "loss": 3.3004,
      "step": 56570
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.403737783432007,
      "learning_rate": 0.0005150895414470905,
      "loss": 2.974,
      "step": 56571
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.471777319908142,
      "learning_rate": 0.0005150866898592948,
      "loss": 3.1085,
      "step": 56572
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7164281606674194,
      "learning_rate": 0.0005150838382315103,
      "loss": 2.8961,
      "step": 56573
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9672495126724243,
      "learning_rate": 0.0005150809865637377,
      "loss": 2.7629,
      "step": 56574
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.68267822265625,
      "learning_rate": 0.0005150781348559776,
      "loss": 2.9586,
      "step": 56575
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7090967893600464,
      "learning_rate": 0.0005150752831082303,
      "loss": 2.922,
      "step": 56576
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6471693515777588,
      "learning_rate": 0.0005150724313204965,
      "loss": 3.1529,
      "step": 56577
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4809554815292358,
      "learning_rate": 0.0005150695794927766,
      "loss": 3.2543,
      "step": 56578
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.571326732635498,
      "learning_rate": 0.0005150667276250713,
      "loss": 3.0985,
      "step": 56579
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5300047397613525,
      "learning_rate": 0.0005150638757173808,
      "loss": 3.3239,
      "step": 56580
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4519559144973755,
      "learning_rate": 0.0005150610237697062,
      "loss": 3.1889,
      "step": 56581
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4651020765304565,
      "learning_rate": 0.0005150581717820474,
      "loss": 2.7822,
      "step": 56582
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8841943740844727,
      "learning_rate": 0.0005150553197544055,
      "loss": 2.9018,
      "step": 56583
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.5317065715789795,
      "learning_rate": 0.0005150524676867806,
      "loss": 3.173,
      "step": 56584
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8899143934249878,
      "learning_rate": 0.0005150496155791734,
      "loss": 2.9878,
      "step": 56585
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.682534098625183,
      "learning_rate": 0.0005150467634315845,
      "loss": 3.0514,
      "step": 56586
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6265851259231567,
      "learning_rate": 0.0005150439112440142,
      "loss": 3.139,
      "step": 56587
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.752622365951538,
      "learning_rate": 0.0005150410590164633,
      "loss": 3.1754,
      "step": 56588
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4131643772125244,
      "learning_rate": 0.0005150382067489322,
      "loss": 3.2817,
      "step": 56589
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.7596912384033203,
      "learning_rate": 0.0005150353544414214,
      "loss": 2.8684,
      "step": 56590
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.45961332321167,
      "learning_rate": 0.0005150325020939314,
      "loss": 3.0468,
      "step": 56591
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.4765896797180176,
      "learning_rate": 0.000515029649706463,
      "loss": 3.0619,
      "step": 56592
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.4816734790802,
      "learning_rate": 0.0005150267972790163,
      "loss": 3.1629,
      "step": 56593
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8743621110916138,
      "learning_rate": 0.0005150239448115922,
      "loss": 2.8783,
      "step": 56594
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6554906368255615,
      "learning_rate": 0.000515021092304191,
      "loss": 3.136,
      "step": 56595
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.588932752609253,
      "learning_rate": 0.0005150182397568135,
      "loss": 3.1199,
      "step": 56596
    },
    {
      "epoch": 0.74,
      "grad_norm": 4.279433727264404,
      "learning_rate": 0.0005150153871694599,
      "loss": 2.9421,
      "step": 56597
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.140129566192627,
      "learning_rate": 0.0005150125345421308,
      "loss": 3.0535,
      "step": 56598
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7524361610412598,
      "learning_rate": 0.0005150096818748269,
      "loss": 2.9834,
      "step": 56599
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9194635152816772,
      "learning_rate": 0.0005150068291675487,
      "loss": 2.8684,
      "step": 56600
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.3183460235595703,
      "learning_rate": 0.0005150039764202966,
      "loss": 3.2004,
      "step": 56601
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.379265785217285,
      "learning_rate": 0.0005150011236330713,
      "loss": 3.1434,
      "step": 56602
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4757561683654785,
      "learning_rate": 0.0005149982708058732,
      "loss": 3.0197,
      "step": 56603
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.8166258335113525,
      "learning_rate": 0.0005149954179387028,
      "loss": 2.9807,
      "step": 56604
    },
    {
      "epoch": 0.74,
      "grad_norm": 4.1631245613098145,
      "learning_rate": 0.0005149925650315606,
      "loss": 2.941,
      "step": 56605
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.630784273147583,
      "learning_rate": 0.0005149897120844475,
      "loss": 3.0337,
      "step": 56606
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6021350622177124,
      "learning_rate": 0.0005149868590973635,
      "loss": 3.0297,
      "step": 56607
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.9502885341644287,
      "learning_rate": 0.0005149840060703095,
      "loss": 3.0192,
      "step": 56608
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.3293468952178955,
      "learning_rate": 0.0005149811530032859,
      "loss": 3.1932,
      "step": 56609
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.953275442123413,
      "learning_rate": 0.0005149782998962933,
      "loss": 2.9431,
      "step": 56610
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.8073854446411133,
      "learning_rate": 0.0005149754467493321,
      "loss": 3.1517,
      "step": 56611
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.4435036182403564,
      "learning_rate": 0.0005149725935624029,
      "loss": 2.9163,
      "step": 56612
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.5489590167999268,
      "learning_rate": 0.0005149697403355063,
      "loss": 3.2046,
      "step": 56613
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.6130852699279785,
      "learning_rate": 0.0005149668870686427,
      "loss": 3.1699,
      "step": 56614
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6946254968643188,
      "learning_rate": 0.0005149640337618127,
      "loss": 3.0574,
      "step": 56615
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5986422300338745,
      "learning_rate": 0.0005149611804150169,
      "loss": 2.9074,
      "step": 56616
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0159173011779785,
      "learning_rate": 0.0005149583270282556,
      "loss": 3.0494,
      "step": 56617
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.76615834236145,
      "learning_rate": 0.0005149554736015297,
      "loss": 3.3173,
      "step": 56618
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.80277681350708,
      "learning_rate": 0.0005149526201348395,
      "loss": 3.2578,
      "step": 56619
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7402451038360596,
      "learning_rate": 0.0005149497666281854,
      "loss": 3.1217,
      "step": 56620
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8074641227722168,
      "learning_rate": 0.0005149469130815681,
      "loss": 3.2021,
      "step": 56621
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8080803155899048,
      "learning_rate": 0.0005149440594949882,
      "loss": 3.2216,
      "step": 56622
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5967341661453247,
      "learning_rate": 0.0005149412058684462,
      "loss": 3.1603,
      "step": 56623
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6990268230438232,
      "learning_rate": 0.0005149383522019424,
      "loss": 3.1239,
      "step": 56624
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0140769481658936,
      "learning_rate": 0.0005149354984954776,
      "loss": 3.0764,
      "step": 56625
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8082557916641235,
      "learning_rate": 0.0005149326447490523,
      "loss": 2.9309,
      "step": 56626
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.905107021331787,
      "learning_rate": 0.0005149297909626668,
      "loss": 2.8489,
      "step": 56627
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8336701393127441,
      "learning_rate": 0.000514926937136322,
      "loss": 2.9829,
      "step": 56628
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0247833728790283,
      "learning_rate": 0.000514924083270018,
      "loss": 3.0824,
      "step": 56629
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9924200773239136,
      "learning_rate": 0.0005149212293637556,
      "loss": 2.8708,
      "step": 56630
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3560155630111694,
      "learning_rate": 0.0005149183754175353,
      "loss": 2.8717,
      "step": 56631
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5697941780090332,
      "learning_rate": 0.0005149155214313577,
      "loss": 2.9549,
      "step": 56632
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.748137354850769,
      "learning_rate": 0.0005149126674052233,
      "loss": 2.7858,
      "step": 56633
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4715325832366943,
      "learning_rate": 0.0005149098133391324,
      "loss": 3.1242,
      "step": 56634
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8515410423278809,
      "learning_rate": 0.0005149069592330858,
      "loss": 3.0199,
      "step": 56635
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7213484048843384,
      "learning_rate": 0.000514904105087084,
      "loss": 3.1933,
      "step": 56636
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3843708038330078,
      "learning_rate": 0.0005149012509011274,
      "loss": 2.8994,
      "step": 56637
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9662178754806519,
      "learning_rate": 0.0005148983966752165,
      "loss": 2.759,
      "step": 56638
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.9770119190216064,
      "learning_rate": 0.000514895542409352,
      "loss": 3.3167,
      "step": 56639
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.594611644744873,
      "learning_rate": 0.0005148926881035345,
      "loss": 3.044,
      "step": 56640
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.593562126159668,
      "learning_rate": 0.0005148898337577643,
      "loss": 3.1374,
      "step": 56641
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6913715600967407,
      "learning_rate": 0.000514886979372042,
      "loss": 2.966,
      "step": 56642
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5721319913864136,
      "learning_rate": 0.0005148841249463682,
      "loss": 3.2599,
      "step": 56643
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.446455478668213,
      "learning_rate": 0.0005148812704807434,
      "loss": 3.0119,
      "step": 56644
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3179101943969727,
      "learning_rate": 0.000514878415975168,
      "loss": 3.0812,
      "step": 56645
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0196125507354736,
      "learning_rate": 0.0005148755614296427,
      "loss": 3.3818,
      "step": 56646
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5518627166748047,
      "learning_rate": 0.000514872706844168,
      "loss": 2.9585,
      "step": 56647
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.576677083969116,
      "learning_rate": 0.0005148698522187444,
      "loss": 2.8166,
      "step": 56648
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.421156644821167,
      "learning_rate": 0.0005148669975533725,
      "loss": 3.0203,
      "step": 56649
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.447553277015686,
      "learning_rate": 0.0005148641428480527,
      "loss": 3.1812,
      "step": 56650
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.627431869506836,
      "learning_rate": 0.0005148612881027856,
      "loss": 3.0085,
      "step": 56651
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.562136173248291,
      "learning_rate": 0.0005148584333175718,
      "loss": 3.0583,
      "step": 56652
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3492354154586792,
      "learning_rate": 0.0005148555784924117,
      "loss": 3.0296,
      "step": 56653
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.289069890975952,
      "learning_rate": 0.0005148527236273059,
      "loss": 2.9734,
      "step": 56654
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2708929777145386,
      "learning_rate": 0.0005148498687222549,
      "loss": 2.8732,
      "step": 56655
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.664233684539795,
      "learning_rate": 0.0005148470137772593,
      "loss": 3.0934,
      "step": 56656
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.055530548095703,
      "learning_rate": 0.0005148441587923194,
      "loss": 2.7388,
      "step": 56657
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9622704982757568,
      "learning_rate": 0.0005148413037674362,
      "loss": 3.2953,
      "step": 56658
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.1188273429870605,
      "learning_rate": 0.0005148384487026098,
      "loss": 3.1379,
      "step": 56659
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8822656869888306,
      "learning_rate": 0.0005148355935978409,
      "loss": 2.8117,
      "step": 56660
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6004953384399414,
      "learning_rate": 0.0005148327384531299,
      "loss": 3.2527,
      "step": 56661
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.523797869682312,
      "learning_rate": 0.0005148298832684776,
      "loss": 3.0641,
      "step": 56662
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5310075283050537,
      "learning_rate": 0.0005148270280438843,
      "loss": 3.1198,
      "step": 56663
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0988056659698486,
      "learning_rate": 0.0005148241727793507,
      "loss": 2.9641,
      "step": 56664
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8383058309555054,
      "learning_rate": 0.0005148213174748771,
      "loss": 2.8684,
      "step": 56665
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7144845724105835,
      "learning_rate": 0.0005148184621304641,
      "loss": 3.0032,
      "step": 56666
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8529056310653687,
      "learning_rate": 0.0005148156067461125,
      "loss": 2.8271,
      "step": 56667
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4348945617675781,
      "learning_rate": 0.0005148127513218225,
      "loss": 3.3895,
      "step": 56668
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.4048802852630615,
      "learning_rate": 0.0005148098958575948,
      "loss": 2.9533,
      "step": 56669
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.1300058364868164,
      "learning_rate": 0.0005148070403534298,
      "loss": 3.3317,
      "step": 56670
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4278913736343384,
      "learning_rate": 0.0005148041848093282,
      "loss": 3.1257,
      "step": 56671
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3010365962982178,
      "learning_rate": 0.0005148013292252904,
      "loss": 2.8415,
      "step": 56672
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.303276538848877,
      "learning_rate": 0.000514798473601317,
      "loss": 3.1458,
      "step": 56673
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5125921964645386,
      "learning_rate": 0.0005147956179374086,
      "loss": 3.1174,
      "step": 56674
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.7583420276641846,
      "learning_rate": 0.0005147927622335655,
      "loss": 3.2178,
      "step": 56675
    },
    {
      "epoch": 0.74,
      "grad_norm": 4.359385967254639,
      "learning_rate": 0.0005147899064897885,
      "loss": 2.9037,
      "step": 56676
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8986084461212158,
      "learning_rate": 0.0005147870507060778,
      "loss": 3.0061,
      "step": 56677
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5300194025039673,
      "learning_rate": 0.0005147841948824343,
      "loss": 3.1775,
      "step": 56678
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3343703746795654,
      "learning_rate": 0.0005147813390188583,
      "loss": 3.2099,
      "step": 56679
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0730767250061035,
      "learning_rate": 0.0005147784831153504,
      "loss": 3.2037,
      "step": 56680
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4719021320343018,
      "learning_rate": 0.0005147756271719112,
      "loss": 3.0488,
      "step": 56681
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.675327181816101,
      "learning_rate": 0.0005147727711885411,
      "loss": 2.9693,
      "step": 56682
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.809694766998291,
      "learning_rate": 0.0005147699151652407,
      "loss": 2.923,
      "step": 56683
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.1467788219451904,
      "learning_rate": 0.0005147670591020105,
      "loss": 3.0016,
      "step": 56684
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.6902527809143066,
      "learning_rate": 0.000514764202998851,
      "loss": 2.978,
      "step": 56685
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.902531623840332,
      "learning_rate": 0.0005147613468557628,
      "loss": 2.9607,
      "step": 56686
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.105931043624878,
      "learning_rate": 0.0005147584906727466,
      "loss": 2.9922,
      "step": 56687
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.216031789779663,
      "learning_rate": 0.0005147556344498026,
      "loss": 3.1709,
      "step": 56688
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6527416706085205,
      "learning_rate": 0.0005147527781869314,
      "loss": 2.7258,
      "step": 56689
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6900750398635864,
      "learning_rate": 0.0005147499218841336,
      "loss": 3.0739,
      "step": 56690
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.066129207611084,
      "learning_rate": 0.0005147470655414098,
      "loss": 3.0392,
      "step": 56691
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0589394569396973,
      "learning_rate": 0.0005147442091587604,
      "loss": 2.9285,
      "step": 56692
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7319554090499878,
      "learning_rate": 0.0005147413527361862,
      "loss": 2.9962,
      "step": 56693
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4974123239517212,
      "learning_rate": 0.0005147384962736874,
      "loss": 3.0016,
      "step": 56694
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0432288646698,
      "learning_rate": 0.0005147356397712646,
      "loss": 3.0793,
      "step": 56695
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.933787226676941,
      "learning_rate": 0.0005147327832289185,
      "loss": 2.955,
      "step": 56696
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2953819036483765,
      "learning_rate": 0.0005147299266466495,
      "loss": 3.0684,
      "step": 56697
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7364368438720703,
      "learning_rate": 0.0005147270700244581,
      "loss": 3.1353,
      "step": 56698
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8203403949737549,
      "learning_rate": 0.0005147242133623449,
      "loss": 3.0682,
      "step": 56699
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3490875959396362,
      "learning_rate": 0.0005147213566603103,
      "loss": 3.4074,
      "step": 56700
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.56783926486969,
      "learning_rate": 0.0005147184999183552,
      "loss": 3.0446,
      "step": 56701
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4562667608261108,
      "learning_rate": 0.0005147156431364798,
      "loss": 2.9378,
      "step": 56702
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2072741985321045,
      "learning_rate": 0.0005147127863146845,
      "loss": 2.9845,
      "step": 56703
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5849025249481201,
      "learning_rate": 0.0005147099294529702,
      "loss": 3.0922,
      "step": 56704
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.876312255859375,
      "learning_rate": 0.0005147070725513374,
      "loss": 3.1188,
      "step": 56705
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5572329759597778,
      "learning_rate": 0.0005147042156097864,
      "loss": 3.0452,
      "step": 56706
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4155172109603882,
      "learning_rate": 0.0005147013586283177,
      "loss": 2.8527,
      "step": 56707
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7326022386550903,
      "learning_rate": 0.0005146985016069322,
      "loss": 3.0729,
      "step": 56708
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6697587966918945,
      "learning_rate": 0.00051469564454563,
      "loss": 3.1903,
      "step": 56709
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.629481315612793,
      "learning_rate": 0.000514692787444412,
      "loss": 3.1364,
      "step": 56710
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.620965838432312,
      "learning_rate": 0.0005146899303032783,
      "loss": 3.0287,
      "step": 56711
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3830299377441406,
      "learning_rate": 0.0005146870731222298,
      "loss": 3.0708,
      "step": 56712
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.269214391708374,
      "learning_rate": 0.0005146842159012671,
      "loss": 2.8146,
      "step": 56713
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9392008781433105,
      "learning_rate": 0.0005146813586403904,
      "loss": 3.1355,
      "step": 56714
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.079418182373047,
      "learning_rate": 0.0005146785013396003,
      "loss": 3.0852,
      "step": 56715
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6903553009033203,
      "learning_rate": 0.0005146756439988975,
      "loss": 3.2875,
      "step": 56716
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.432645320892334,
      "learning_rate": 0.0005146727866182825,
      "loss": 2.9329,
      "step": 56717
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9890239238739014,
      "learning_rate": 0.0005146699291977558,
      "loss": 3.2323,
      "step": 56718
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5066884756088257,
      "learning_rate": 0.0005146670717373178,
      "loss": 3.215,
      "step": 56719
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6185399293899536,
      "learning_rate": 0.0005146642142369692,
      "loss": 3.1488,
      "step": 56720
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5719969272613525,
      "learning_rate": 0.0005146613566967104,
      "loss": 3.2973,
      "step": 56721
    },
    {
      "epoch": 0.74,
      "grad_norm": 4.057178020477295,
      "learning_rate": 0.000514658499116542,
      "loss": 2.9242,
      "step": 56722
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.1259114742279053,
      "learning_rate": 0.0005146556414964646,
      "loss": 3.072,
      "step": 56723
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6533288955688477,
      "learning_rate": 0.0005146527838364787,
      "loss": 3.185,
      "step": 56724
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9834952354431152,
      "learning_rate": 0.0005146499261365847,
      "loss": 3.0697,
      "step": 56725
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0496678352355957,
      "learning_rate": 0.0005146470683967833,
      "loss": 2.7723,
      "step": 56726
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.1314868927001953,
      "learning_rate": 0.0005146442106170749,
      "loss": 3.0147,
      "step": 56727
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6911816596984863,
      "learning_rate": 0.0005146413527974601,
      "loss": 3.1164,
      "step": 56728
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.932527542114258,
      "learning_rate": 0.0005146384949379395,
      "loss": 2.9935,
      "step": 56729
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.685354232788086,
      "learning_rate": 0.0005146356370385135,
      "loss": 2.9993,
      "step": 56730
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9663584232330322,
      "learning_rate": 0.0005146327790991826,
      "loss": 3.1987,
      "step": 56731
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5700337886810303,
      "learning_rate": 0.0005146299211199476,
      "loss": 3.1754,
      "step": 56732
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8516653776168823,
      "learning_rate": 0.0005146270631008087,
      "loss": 3.1583,
      "step": 56733
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5340299606323242,
      "learning_rate": 0.0005146242050417665,
      "loss": 3.1994,
      "step": 56734
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6031420230865479,
      "learning_rate": 0.0005146213469428218,
      "loss": 2.919,
      "step": 56735
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.4913344383239746,
      "learning_rate": 0.0005146184888039749,
      "loss": 3.1488,
      "step": 56736
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6896055936813354,
      "learning_rate": 0.0005146156306252263,
      "loss": 3.0015,
      "step": 56737
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7086238861083984,
      "learning_rate": 0.0005146127724065766,
      "loss": 3.0399,
      "step": 56738
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8330600261688232,
      "learning_rate": 0.0005146099141480264,
      "loss": 3.029,
      "step": 56739
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6010706424713135,
      "learning_rate": 0.0005146070558495762,
      "loss": 2.8811,
      "step": 56740
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4101121425628662,
      "learning_rate": 0.0005146041975112263,
      "loss": 2.8915,
      "step": 56741
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7306803464889526,
      "learning_rate": 0.0005146013391329777,
      "loss": 3.2863,
      "step": 56742
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7666090726852417,
      "learning_rate": 0.0005145984807148305,
      "loss": 2.9544,
      "step": 56743
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7345902919769287,
      "learning_rate": 0.0005145956222567854,
      "loss": 2.9158,
      "step": 56744
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9984010457992554,
      "learning_rate": 0.000514592763758843,
      "loss": 3.2011,
      "step": 56745
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8416683673858643,
      "learning_rate": 0.0005145899052210036,
      "loss": 3.1169,
      "step": 56746
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8337804079055786,
      "learning_rate": 0.000514587046643268,
      "loss": 3.1179,
      "step": 56747
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7953602075576782,
      "learning_rate": 0.0005145841880256366,
      "loss": 2.9465,
      "step": 56748
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9628300666809082,
      "learning_rate": 0.00051458132936811,
      "loss": 2.9745,
      "step": 56749
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6436452865600586,
      "learning_rate": 0.0005145784706706885,
      "loss": 2.7694,
      "step": 56750
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5246626138687134,
      "learning_rate": 0.0005145756119333731,
      "loss": 3.2107,
      "step": 56751
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.243417978286743,
      "learning_rate": 0.0005145727531561638,
      "loss": 3.105,
      "step": 56752
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.1938047409057617,
      "learning_rate": 0.0005145698943390615,
      "loss": 3.1517,
      "step": 56753
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.264127731323242,
      "learning_rate": 0.0005145670354820666,
      "loss": 3.1972,
      "step": 56754
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.617362141609192,
      "learning_rate": 0.0005145641765851795,
      "loss": 3.2061,
      "step": 56755
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8702794313430786,
      "learning_rate": 0.0005145613176484009,
      "loss": 2.9846,
      "step": 56756
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6868149042129517,
      "learning_rate": 0.0005145584586717315,
      "loss": 3.0368,
      "step": 56757
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8564189672470093,
      "learning_rate": 0.0005145555996551714,
      "loss": 3.1545,
      "step": 56758
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5637526512145996,
      "learning_rate": 0.0005145527405987215,
      "loss": 3.0007,
      "step": 56759
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6218392848968506,
      "learning_rate": 0.0005145498815023822,
      "loss": 2.9338,
      "step": 56760
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6176762580871582,
      "learning_rate": 0.0005145470223661539,
      "loss": 3.0436,
      "step": 56761
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4592475891113281,
      "learning_rate": 0.0005145441631900375,
      "loss": 3.308,
      "step": 56762
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4955635070800781,
      "learning_rate": 0.0005145413039740331,
      "loss": 3.4405,
      "step": 56763
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4007976055145264,
      "learning_rate": 0.0005145384447181414,
      "loss": 3.0439,
      "step": 56764
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.308782935142517,
      "learning_rate": 0.000514535585422363,
      "loss": 3.0107,
      "step": 56765
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5505021810531616,
      "learning_rate": 0.0005145327260866985,
      "loss": 3.0535,
      "step": 56766
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4955942630767822,
      "learning_rate": 0.0005145298667111482,
      "loss": 3.045,
      "step": 56767
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.255568504333496,
      "learning_rate": 0.0005145270072957127,
      "loss": 2.8359,
      "step": 56768
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4922634363174438,
      "learning_rate": 0.0005145241478403926,
      "loss": 3.1341,
      "step": 56769
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8310556411743164,
      "learning_rate": 0.0005145212883451885,
      "loss": 3.2251,
      "step": 56770
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.39629328250885,
      "learning_rate": 0.0005145184288101008,
      "loss": 2.9134,
      "step": 56771
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4087053537368774,
      "learning_rate": 0.0005145155692351301,
      "loss": 3.0047,
      "step": 56772
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.516446590423584,
      "learning_rate": 0.0005145127096202769,
      "loss": 2.9613,
      "step": 56773
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.526846170425415,
      "learning_rate": 0.0005145098499655417,
      "loss": 2.9717,
      "step": 56774
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.652683138847351,
      "learning_rate": 0.0005145069902709251,
      "loss": 3.1145,
      "step": 56775
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5127702951431274,
      "learning_rate": 0.0005145041305364276,
      "loss": 3.0657,
      "step": 56776
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3290283679962158,
      "learning_rate": 0.0005145012707620497,
      "loss": 3.0458,
      "step": 56777
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7437173128128052,
      "learning_rate": 0.0005144984109477919,
      "loss": 3.1555,
      "step": 56778
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4853473901748657,
      "learning_rate": 0.000514495551093655,
      "loss": 3.1388,
      "step": 56779
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.080962896347046,
      "learning_rate": 0.0005144926911996392,
      "loss": 2.8708,
      "step": 56780
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5964038372039795,
      "learning_rate": 0.0005144898312657452,
      "loss": 3.1726,
      "step": 56781
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.770387649536133,
      "learning_rate": 0.0005144869712919734,
      "loss": 3.2401,
      "step": 56782
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6291524171829224,
      "learning_rate": 0.0005144841112783245,
      "loss": 2.8887,
      "step": 56783
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.999857783317566,
      "learning_rate": 0.0005144812512247989,
      "loss": 3.1194,
      "step": 56784
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6743533611297607,
      "learning_rate": 0.0005144783911313972,
      "loss": 3.3012,
      "step": 56785
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.529087781906128,
      "learning_rate": 0.0005144755309981201,
      "loss": 2.9871,
      "step": 56786
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4930546283721924,
      "learning_rate": 0.0005144726708249677,
      "loss": 3.2781,
      "step": 56787
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4363056421279907,
      "learning_rate": 0.0005144698106119409,
      "loss": 2.9704,
      "step": 56788
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4833399057388306,
      "learning_rate": 0.00051446695035904,
      "loss": 2.919,
      "step": 56789
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3651636838912964,
      "learning_rate": 0.0005144640900662658,
      "loss": 2.9496,
      "step": 56790
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.488352656364441,
      "learning_rate": 0.0005144612297336185,
      "loss": 2.9366,
      "step": 56791
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.676515817642212,
      "learning_rate": 0.0005144583693610991,
      "loss": 3.0989,
      "step": 56792
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8797390460968018,
      "learning_rate": 0.0005144555089487076,
      "loss": 3.3318,
      "step": 56793
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3338602781295776,
      "learning_rate": 0.0005144526484964448,
      "loss": 3.2222,
      "step": 56794
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.743189573287964,
      "learning_rate": 0.0005144497880043113,
      "loss": 2.7049,
      "step": 56795
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6919490098953247,
      "learning_rate": 0.0005144469274723074,
      "loss": 3.2335,
      "step": 56796
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.466809868812561,
      "learning_rate": 0.000514444066900434,
      "loss": 3.3072,
      "step": 56797
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.134098529815674,
      "learning_rate": 0.0005144412062886911,
      "loss": 3.1074,
      "step": 56798
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7994825839996338,
      "learning_rate": 0.0005144383456370797,
      "loss": 3.2982,
      "step": 56799
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4030120372772217,
      "learning_rate": 0.0005144354849456002,
      "loss": 2.9709,
      "step": 56800
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.1618404388427734,
      "learning_rate": 0.000514432624214253,
      "loss": 3.0751,
      "step": 56801
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8098646402359009,
      "learning_rate": 0.0005144297634430388,
      "loss": 3.2056,
      "step": 56802
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7564345598220825,
      "learning_rate": 0.000514426902631958,
      "loss": 2.96,
      "step": 56803
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8914964199066162,
      "learning_rate": 0.0005144240417810112,
      "loss": 3.2963,
      "step": 56804
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.451865792274475,
      "learning_rate": 0.0005144211808901989,
      "loss": 3.122,
      "step": 56805
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3335360288619995,
      "learning_rate": 0.0005144183199595218,
      "loss": 2.8643,
      "step": 56806
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.498608946800232,
      "learning_rate": 0.0005144154589889801,
      "loss": 3.1808,
      "step": 56807
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.652091145515442,
      "learning_rate": 0.0005144125979785745,
      "loss": 2.9769,
      "step": 56808
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5811049938201904,
      "learning_rate": 0.0005144097369283058,
      "loss": 2.8931,
      "step": 56809
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4838260412216187,
      "learning_rate": 0.000514406875838174,
      "loss": 2.9408,
      "step": 56810
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4937279224395752,
      "learning_rate": 0.0005144040147081801,
      "loss": 2.9358,
      "step": 56811
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7045643329620361,
      "learning_rate": 0.0005144011535383244,
      "loss": 2.8978,
      "step": 56812
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.303649663925171,
      "learning_rate": 0.0005143982923286075,
      "loss": 2.979,
      "step": 56813
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7740670442581177,
      "learning_rate": 0.0005143954310790299,
      "loss": 3.0259,
      "step": 56814
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.808476448059082,
      "learning_rate": 0.0005143925697895922,
      "loss": 2.8553,
      "step": 56815
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.442426323890686,
      "learning_rate": 0.0005143897084602946,
      "loss": 3.0961,
      "step": 56816
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5504825115203857,
      "learning_rate": 0.0005143868470911382,
      "loss": 3.2112,
      "step": 56817
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2237391471862793,
      "learning_rate": 0.0005143839856821233,
      "loss": 2.9204,
      "step": 56818
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.648203730583191,
      "learning_rate": 0.00051438112423325,
      "loss": 3.2593,
      "step": 56819
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5140619277954102,
      "learning_rate": 0.0005143782627445195,
      "loss": 2.8246,
      "step": 56820
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.856913685798645,
      "learning_rate": 0.000514375401215932,
      "loss": 3.0032,
      "step": 56821
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.563474178314209,
      "learning_rate": 0.000514372539647488,
      "loss": 3.0363,
      "step": 56822
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6973705291748047,
      "learning_rate": 0.000514369678039188,
      "loss": 3.1297,
      "step": 56823
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7480156421661377,
      "learning_rate": 0.0005143668163910328,
      "loss": 3.0377,
      "step": 56824
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5984309911727905,
      "learning_rate": 0.0005143639547030227,
      "loss": 3.2608,
      "step": 56825
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5368720293045044,
      "learning_rate": 0.0005143610929751584,
      "loss": 2.7909,
      "step": 56826
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6711795330047607,
      "learning_rate": 0.0005143582312074402,
      "loss": 3.1488,
      "step": 56827
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.860235333442688,
      "learning_rate": 0.0005143553693998687,
      "loss": 2.9344,
      "step": 56828
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.508105993270874,
      "learning_rate": 0.0005143525075524447,
      "loss": 3.1048,
      "step": 56829
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3643109798431396,
      "learning_rate": 0.0005143496456651683,
      "loss": 2.9243,
      "step": 56830
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7919950485229492,
      "learning_rate": 0.0005143467837380403,
      "loss": 2.8388,
      "step": 56831
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.729414701461792,
      "learning_rate": 0.0005143439217710613,
      "loss": 2.7298,
      "step": 56832
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5745311975479126,
      "learning_rate": 0.0005143410597642316,
      "loss": 3.1868,
      "step": 56833
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.379148244857788,
      "learning_rate": 0.000514338197717552,
      "loss": 2.9637,
      "step": 56834
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5811216831207275,
      "learning_rate": 0.0005143353356310227,
      "loss": 3.0455,
      "step": 56835
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.547648310661316,
      "learning_rate": 0.0005143324735046446,
      "loss": 3.0557,
      "step": 56836
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3485966920852661,
      "learning_rate": 0.0005143296113384179,
      "loss": 3.036,
      "step": 56837
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.71722149848938,
      "learning_rate": 0.0005143267491323434,
      "loss": 3.1772,
      "step": 56838
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.114123582839966,
      "learning_rate": 0.0005143238868864214,
      "loss": 3.1933,
      "step": 56839
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8861417770385742,
      "learning_rate": 0.0005143210246006527,
      "loss": 3.0332,
      "step": 56840
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.58638596534729,
      "learning_rate": 0.0005143181622750374,
      "loss": 3.0833,
      "step": 56841
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8395394086837769,
      "learning_rate": 0.0005143152999095766,
      "loss": 3.0161,
      "step": 56842
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4848828315734863,
      "learning_rate": 0.0005143124375042704,
      "loss": 3.274,
      "step": 56843
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6103041172027588,
      "learning_rate": 0.0005143095750591195,
      "loss": 3.0393,
      "step": 56844
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3798154592514038,
      "learning_rate": 0.0005143067125741243,
      "loss": 3.0376,
      "step": 56845
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5166133642196655,
      "learning_rate": 0.0005143038500492855,
      "loss": 3.0395,
      "step": 56846
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3983590602874756,
      "learning_rate": 0.0005143009874846037,
      "loss": 3.0667,
      "step": 56847
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8247305154800415,
      "learning_rate": 0.0005142981248800791,
      "loss": 3.1218,
      "step": 56848
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0971839427948,
      "learning_rate": 0.0005142952622357126,
      "loss": 3.3174,
      "step": 56849
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.312309980392456,
      "learning_rate": 0.0005142923995515045,
      "loss": 3.0855,
      "step": 56850
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.631176233291626,
      "learning_rate": 0.0005142895368274554,
      "loss": 3.2185,
      "step": 56851
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.414238929748535,
      "learning_rate": 0.0005142866740635658,
      "loss": 3.0865,
      "step": 56852
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.712507963180542,
      "learning_rate": 0.0005142838112598364,
      "loss": 3.0829,
      "step": 56853
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9703682661056519,
      "learning_rate": 0.0005142809484162674,
      "loss": 3.1878,
      "step": 56854
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4903804063796997,
      "learning_rate": 0.0005142780855328596,
      "loss": 3.2051,
      "step": 56855
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8446770906448364,
      "learning_rate": 0.0005142752226096135,
      "loss": 2.9916,
      "step": 56856
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6413476467132568,
      "learning_rate": 0.0005142723596465295,
      "loss": 3.0019,
      "step": 56857
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.480238914489746,
      "learning_rate": 0.0005142694966436083,
      "loss": 3.2429,
      "step": 56858
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6155427694320679,
      "learning_rate": 0.0005142666336008504,
      "loss": 3.161,
      "step": 56859
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.8639869689941406,
      "learning_rate": 0.0005142637705182562,
      "loss": 2.9383,
      "step": 56860
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.529404640197754,
      "learning_rate": 0.0005142609073958263,
      "loss": 2.8551,
      "step": 56861
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8020610809326172,
      "learning_rate": 0.0005142580442335613,
      "loss": 3.0345,
      "step": 56862
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.067002534866333,
      "learning_rate": 0.0005142551810314618,
      "loss": 2.9042,
      "step": 56863
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.474257230758667,
      "learning_rate": 0.0005142523177895281,
      "loss": 3.2284,
      "step": 56864
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6393743753433228,
      "learning_rate": 0.0005142494545077608,
      "loss": 3.0621,
      "step": 56865
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.1371641159057617,
      "learning_rate": 0.0005142465911861606,
      "loss": 2.9959,
      "step": 56866
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6905370950698853,
      "learning_rate": 0.0005142437278247277,
      "loss": 2.9022,
      "step": 56867
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6379185914993286,
      "learning_rate": 0.000514240864423463,
      "loss": 2.9314,
      "step": 56868
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5838584899902344,
      "learning_rate": 0.0005142380009823669,
      "loss": 2.9566,
      "step": 56869
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8291161060333252,
      "learning_rate": 0.00051423513750144,
      "loss": 3.2285,
      "step": 56870
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3988200426101685,
      "learning_rate": 0.0005142322739806825,
      "loss": 3.1599,
      "step": 56871
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6601018905639648,
      "learning_rate": 0.0005142294104200954,
      "loss": 3.0886,
      "step": 56872
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.488181233406067,
      "learning_rate": 0.0005142265468196788,
      "loss": 3.0941,
      "step": 56873
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8551760911941528,
      "learning_rate": 0.0005142236831794337,
      "loss": 3.0431,
      "step": 56874
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.2999908924102783,
      "learning_rate": 0.0005142208194993602,
      "loss": 2.9787,
      "step": 56875
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.570481538772583,
      "learning_rate": 0.000514217955779459,
      "loss": 3.1659,
      "step": 56876
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0717620849609375,
      "learning_rate": 0.0005142150920197306,
      "loss": 3.0095,
      "step": 56877
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5316272974014282,
      "learning_rate": 0.0005142122282201757,
      "loss": 2.9056,
      "step": 56878
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.433937430381775,
      "learning_rate": 0.0005142093643807947,
      "loss": 2.8613,
      "step": 56879
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3933217525482178,
      "learning_rate": 0.0005142065005015881,
      "loss": 3.1421,
      "step": 56880
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.713799238204956,
      "learning_rate": 0.0005142036365825564,
      "loss": 2.8574,
      "step": 56881
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3819392919540405,
      "learning_rate": 0.0005142007726237003,
      "loss": 3.0436,
      "step": 56882
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7362736463546753,
      "learning_rate": 0.0005141979086250203,
      "loss": 2.8622,
      "step": 56883
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.740763783454895,
      "learning_rate": 0.0005141950445865166,
      "loss": 3.1052,
      "step": 56884
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6835966110229492,
      "learning_rate": 0.0005141921805081901,
      "loss": 2.9886,
      "step": 56885
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3344967365264893,
      "learning_rate": 0.0005141893163900414,
      "loss": 2.9788,
      "step": 56886
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4896754026412964,
      "learning_rate": 0.0005141864522320707,
      "loss": 3.1248,
      "step": 56887
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6346144676208496,
      "learning_rate": 0.0005141835880342788,
      "loss": 3.1377,
      "step": 56888
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0485219955444336,
      "learning_rate": 0.000514180723796666,
      "loss": 2.9363,
      "step": 56889
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5444570779800415,
      "learning_rate": 0.000514177859519233,
      "loss": 2.9629,
      "step": 56890
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3729146718978882,
      "learning_rate": 0.0005141749952019804,
      "loss": 3.0929,
      "step": 56891
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7211973667144775,
      "learning_rate": 0.0005141721308449085,
      "loss": 2.9226,
      "step": 56892
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.077545642852783,
      "learning_rate": 0.000514169266448018,
      "loss": 2.942,
      "step": 56893
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.448269009590149,
      "learning_rate": 0.0005141664020113095,
      "loss": 3.0411,
      "step": 56894
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9099175930023193,
      "learning_rate": 0.0005141635375347833,
      "loss": 2.9935,
      "step": 56895
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.2687182426452637,
      "learning_rate": 0.00051416067301844,
      "loss": 2.8071,
      "step": 56896
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.229923963546753,
      "learning_rate": 0.0005141578084622802,
      "loss": 2.8338,
      "step": 56897
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2153446674346924,
      "learning_rate": 0.0005141549438663045,
      "loss": 3.0399,
      "step": 56898
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.199584722518921,
      "learning_rate": 0.0005141520792305134,
      "loss": 3.2903,
      "step": 56899
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5587588548660278,
      "learning_rate": 0.0005141492145549074,
      "loss": 3.0188,
      "step": 56900
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4935243129730225,
      "learning_rate": 0.0005141463498394869,
      "loss": 2.8511,
      "step": 56901
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.8951916694641113,
      "learning_rate": 0.0005141434850842525,
      "loss": 3.104,
      "step": 56902
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.228458881378174,
      "learning_rate": 0.0005141406202892049,
      "loss": 3.1196,
      "step": 56903
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5924724340438843,
      "learning_rate": 0.0005141377554543445,
      "loss": 3.0986,
      "step": 56904
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7304534912109375,
      "learning_rate": 0.0005141348905796718,
      "loss": 3.0763,
      "step": 56905
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6542928218841553,
      "learning_rate": 0.0005141320256651875,
      "loss": 3.0112,
      "step": 56906
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8684097528457642,
      "learning_rate": 0.000514129160710892,
      "loss": 3.0458,
      "step": 56907
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5851894617080688,
      "learning_rate": 0.0005141262957167858,
      "loss": 3.0947,
      "step": 56908
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8194682598114014,
      "learning_rate": 0.0005141234306828694,
      "loss": 3.1141,
      "step": 56909
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.1100759506225586,
      "learning_rate": 0.0005141205656091435,
      "loss": 3.0232,
      "step": 56910
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5969290733337402,
      "learning_rate": 0.0005141177004956085,
      "loss": 3.1984,
      "step": 56911
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8032798767089844,
      "learning_rate": 0.000514114835342265,
      "loss": 3.0562,
      "step": 56912
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.44057035446167,
      "learning_rate": 0.0005141119701491134,
      "loss": 3.1066,
      "step": 56913
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4076155424118042,
      "learning_rate": 0.0005141091049161545,
      "loss": 3.4431,
      "step": 56914
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5296427011489868,
      "learning_rate": 0.0005141062396433886,
      "loss": 3.2116,
      "step": 56915
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6910686492919922,
      "learning_rate": 0.0005141033743308164,
      "loss": 3.0253,
      "step": 56916
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7937304973602295,
      "learning_rate": 0.0005141005089784383,
      "loss": 3.2585,
      "step": 56917
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8623641729354858,
      "learning_rate": 0.0005140976435862549,
      "loss": 2.9797,
      "step": 56918
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7573322057724,
      "learning_rate": 0.0005140947781542666,
      "loss": 2.9753,
      "step": 56919
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7895487546920776,
      "learning_rate": 0.0005140919126824741,
      "loss": 2.8711,
      "step": 56920
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7411384582519531,
      "learning_rate": 0.0005140890471708779,
      "loss": 3.23,
      "step": 56921
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0284788608551025,
      "learning_rate": 0.0005140861816194784,
      "loss": 3.0327,
      "step": 56922
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4975920915603638,
      "learning_rate": 0.0005140833160282763,
      "loss": 3.1462,
      "step": 56923
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.740295171737671,
      "learning_rate": 0.0005140804503972722,
      "loss": 2.8784,
      "step": 56924
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.057126760482788,
      "learning_rate": 0.0005140775847264663,
      "loss": 3.0131,
      "step": 56925
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7311650514602661,
      "learning_rate": 0.0005140747190158594,
      "loss": 3.3985,
      "step": 56926
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4617148637771606,
      "learning_rate": 0.0005140718532654521,
      "loss": 3.0998,
      "step": 56927
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7297013998031616,
      "learning_rate": 0.0005140689874752447,
      "loss": 2.9223,
      "step": 56928
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2004024982452393,
      "learning_rate": 0.0005140661216452377,
      "loss": 3.1515,
      "step": 56929
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3898509740829468,
      "learning_rate": 0.000514063255775432,
      "loss": 2.8088,
      "step": 56930
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0233190059661865,
      "learning_rate": 0.0005140603898658278,
      "loss": 2.9151,
      "step": 56931
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6233453750610352,
      "learning_rate": 0.0005140575239164256,
      "loss": 3.0493,
      "step": 56932
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.608126401901245,
      "learning_rate": 0.0005140546579272263,
      "loss": 3.143,
      "step": 56933
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8571504354476929,
      "learning_rate": 0.0005140517918982301,
      "loss": 2.8707,
      "step": 56934
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5121797323226929,
      "learning_rate": 0.0005140489258294376,
      "loss": 3.1092,
      "step": 56935
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.4661476612091064,
      "learning_rate": 0.0005140460597208494,
      "loss": 3.1594,
      "step": 56936
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.880196452140808,
      "learning_rate": 0.0005140431935724659,
      "loss": 2.8509,
      "step": 56937
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8160840272903442,
      "learning_rate": 0.0005140403273842879,
      "loss": 2.7461,
      "step": 56938
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7460951805114746,
      "learning_rate": 0.0005140374611563156,
      "loss": 2.6617,
      "step": 56939
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.4196813106536865,
      "learning_rate": 0.0005140345948885498,
      "loss": 2.9535,
      "step": 56940
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.59036922454834,
      "learning_rate": 0.0005140317285809909,
      "loss": 3.028,
      "step": 56941
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5176434516906738,
      "learning_rate": 0.0005140288622336394,
      "loss": 3.0042,
      "step": 56942
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.641892194747925,
      "learning_rate": 0.0005140259958464961,
      "loss": 3.1468,
      "step": 56943
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2296085357666016,
      "learning_rate": 0.0005140231294195612,
      "loss": 3.157,
      "step": 56944
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.1794095039367676,
      "learning_rate": 0.0005140202629528353,
      "loss": 3.2491,
      "step": 56945
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7856436967849731,
      "learning_rate": 0.0005140173964463191,
      "loss": 3.1536,
      "step": 56946
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.640218734741211,
      "learning_rate": 0.0005140145299000129,
      "loss": 3.0534,
      "step": 56947
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.457632541656494,
      "learning_rate": 0.0005140116633139174,
      "loss": 2.9749,
      "step": 56948
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.17761492729187,
      "learning_rate": 0.0005140087966880332,
      "loss": 2.9785,
      "step": 56949
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6213915348052979,
      "learning_rate": 0.0005140059300223606,
      "loss": 3.2458,
      "step": 56950
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6764031648635864,
      "learning_rate": 0.0005140030633169002,
      "loss": 3.0114,
      "step": 56951
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8412339687347412,
      "learning_rate": 0.0005140001965716528,
      "loss": 3.0969,
      "step": 56952
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.546634554862976,
      "learning_rate": 0.0005139973297866186,
      "loss": 3.1432,
      "step": 56953
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5178687572479248,
      "learning_rate": 0.0005139944629617983,
      "loss": 3.0748,
      "step": 56954
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4143708944320679,
      "learning_rate": 0.0005139915960971923,
      "loss": 2.9486,
      "step": 56955
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.421966552734375,
      "learning_rate": 0.0005139887291928012,
      "loss": 3.1049,
      "step": 56956
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3756839036941528,
      "learning_rate": 0.0005139858622486257,
      "loss": 3.0398,
      "step": 56957
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4163705110549927,
      "learning_rate": 0.0005139829952646661,
      "loss": 2.7829,
      "step": 56958
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9444626569747925,
      "learning_rate": 0.0005139801282409231,
      "loss": 2.868,
      "step": 56959
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9368752241134644,
      "learning_rate": 0.0005139772611773971,
      "loss": 3.165,
      "step": 56960
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.591828465461731,
      "learning_rate": 0.0005139743940740887,
      "loss": 3.0272,
      "step": 56961
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6172025203704834,
      "learning_rate": 0.0005139715269309984,
      "loss": 3.2246,
      "step": 56962
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0689761638641357,
      "learning_rate": 0.0005139686597481267,
      "loss": 3.262,
      "step": 56963
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5295134782791138,
      "learning_rate": 0.0005139657925254743,
      "loss": 3.102,
      "step": 56964
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8255860805511475,
      "learning_rate": 0.0005139629252630416,
      "loss": 3.1128,
      "step": 56965
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5810073614120483,
      "learning_rate": 0.0005139600579608292,
      "loss": 2.759,
      "step": 56966
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5762608051300049,
      "learning_rate": 0.0005139571906188374,
      "loss": 3.2139,
      "step": 56967
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6299477815628052,
      "learning_rate": 0.0005139543232370671,
      "loss": 3.1569,
      "step": 56968
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4990110397338867,
      "learning_rate": 0.0005139514558155186,
      "loss": 3.1372,
      "step": 56969
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.502739429473877,
      "learning_rate": 0.0005139485883541925,
      "loss": 3.1308,
      "step": 56970
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7776094675064087,
      "learning_rate": 0.0005139457208530893,
      "loss": 2.7946,
      "step": 56971
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9563381671905518,
      "learning_rate": 0.0005139428533122095,
      "loss": 2.9297,
      "step": 56972
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5560683012008667,
      "learning_rate": 0.0005139399857315537,
      "loss": 3.0436,
      "step": 56973
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6544952392578125,
      "learning_rate": 0.0005139371181111225,
      "loss": 2.9098,
      "step": 56974
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5357638597488403,
      "learning_rate": 0.0005139342504509163,
      "loss": 3.2039,
      "step": 56975
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4894137382507324,
      "learning_rate": 0.0005139313827509357,
      "loss": 2.6943,
      "step": 56976
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.438759684562683,
      "learning_rate": 0.0005139285150111811,
      "loss": 3.0987,
      "step": 56977
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.588767409324646,
      "learning_rate": 0.0005139256472316533,
      "loss": 3.0344,
      "step": 56978
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7747467756271362,
      "learning_rate": 0.0005139227794123527,
      "loss": 2.833,
      "step": 56979
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.173980474472046,
      "learning_rate": 0.0005139199115532796,
      "loss": 2.9559,
      "step": 56980
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6884682178497314,
      "learning_rate": 0.0005139170436544349,
      "loss": 2.8675,
      "step": 56981
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3249024152755737,
      "learning_rate": 0.000513914175715819,
      "loss": 3.2158,
      "step": 56982
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.616621255874634,
      "learning_rate": 0.0005139113077374325,
      "loss": 2.981,
      "step": 56983
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.131549119949341,
      "learning_rate": 0.0005139084397192757,
      "loss": 2.8503,
      "step": 56984
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8470982313156128,
      "learning_rate": 0.0005139055716613494,
      "loss": 3.1209,
      "step": 56985
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.468569040298462,
      "learning_rate": 0.0005139027035636538,
      "loss": 3.2677,
      "step": 56986
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.715370535850525,
      "learning_rate": 0.00051389983542619,
      "loss": 3.2244,
      "step": 56987
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.051258087158203,
      "learning_rate": 0.0005138969672489579,
      "loss": 3.1948,
      "step": 56988
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9856948852539062,
      "learning_rate": 0.0005138940990319584,
      "loss": 3.2207,
      "step": 56989
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5733071565628052,
      "learning_rate": 0.000513891230775192,
      "loss": 2.8551,
      "step": 56990
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5289843082427979,
      "learning_rate": 0.0005138883624786592,
      "loss": 3.119,
      "step": 56991
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.636643409729004,
      "learning_rate": 0.0005138854941423603,
      "loss": 3.1206,
      "step": 56992
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5009874105453491,
      "learning_rate": 0.0005138826257662962,
      "loss": 2.9902,
      "step": 56993
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.4831066131591797,
      "learning_rate": 0.0005138797573504674,
      "loss": 3.163,
      "step": 56994
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7649863958358765,
      "learning_rate": 0.0005138768888948741,
      "loss": 3.0813,
      "step": 56995
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.799637794494629,
      "learning_rate": 0.0005138740203995171,
      "loss": 3.0367,
      "step": 56996
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.683969497680664,
      "learning_rate": 0.000513871151864397,
      "loss": 2.9955,
      "step": 56997
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5302406549453735,
      "learning_rate": 0.0005138682832895141,
      "loss": 3.2123,
      "step": 56998
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.250152111053467,
      "learning_rate": 0.0005138654146748692,
      "loss": 3.0853,
      "step": 56999
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.656968355178833,
      "learning_rate": 0.0005138625460204625,
      "loss": 2.8654,
      "step": 57000
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4884097576141357,
      "learning_rate": 0.0005138596773262947,
      "loss": 3.0288,
      "step": 57001
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5963468551635742,
      "learning_rate": 0.0005138568085923665,
      "loss": 3.008,
      "step": 57002
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5467710494995117,
      "learning_rate": 0.0005138539398186783,
      "loss": 3.0997,
      "step": 57003
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6947191953659058,
      "learning_rate": 0.0005138510710052305,
      "loss": 3.0701,
      "step": 57004
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.700216293334961,
      "learning_rate": 0.0005138482021520239,
      "loss": 3.11,
      "step": 57005
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4129976034164429,
      "learning_rate": 0.0005138453332590587,
      "loss": 3.0593,
      "step": 57006
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6870282888412476,
      "learning_rate": 0.0005138424643263357,
      "loss": 2.9873,
      "step": 57007
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3795537948608398,
      "learning_rate": 0.0005138395953538553,
      "loss": 3.2857,
      "step": 57008
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.350196123123169,
      "learning_rate": 0.0005138367263416181,
      "loss": 3.0079,
      "step": 57009
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4076229333877563,
      "learning_rate": 0.0005138338572896246,
      "loss": 3.0498,
      "step": 57010
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.156057357788086,
      "learning_rate": 0.0005138309881978755,
      "loss": 2.8719,
      "step": 57011
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6740736961364746,
      "learning_rate": 0.0005138281190663709,
      "loss": 2.9483,
      "step": 57012
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6280710697174072,
      "learning_rate": 0.0005138252498951119,
      "loss": 2.8097,
      "step": 57013
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3998216390609741,
      "learning_rate": 0.0005138223806840987,
      "loss": 3.0518,
      "step": 57014
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4270381927490234,
      "learning_rate": 0.0005138195114333317,
      "loss": 3.1592,
      "step": 57015
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5724186897277832,
      "learning_rate": 0.0005138166421428117,
      "loss": 3.0853,
      "step": 57016
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4238600730895996,
      "learning_rate": 0.0005138137728125392,
      "loss": 3.1607,
      "step": 57017
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.262038469314575,
      "learning_rate": 0.0005138109034425148,
      "loss": 3.096,
      "step": 57018
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3552058935165405,
      "learning_rate": 0.0005138080340327387,
      "loss": 3.1827,
      "step": 57019
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5768057107925415,
      "learning_rate": 0.0005138051645832117,
      "loss": 2.8961,
      "step": 57020
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5926741361618042,
      "learning_rate": 0.0005138022950939344,
      "loss": 3.1015,
      "step": 57021
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5418803691864014,
      "learning_rate": 0.0005137994255649071,
      "loss": 3.2359,
      "step": 57022
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.448630452156067,
      "learning_rate": 0.0005137965559961304,
      "loss": 3.1555,
      "step": 57023
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.611840844154358,
      "learning_rate": 0.0005137936863876051,
      "loss": 3.1473,
      "step": 57024
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9003428220748901,
      "learning_rate": 0.0005137908167393313,
      "loss": 3.0125,
      "step": 57025
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.519446849822998,
      "learning_rate": 0.0005137879470513098,
      "loss": 3.295,
      "step": 57026
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.084174871444702,
      "learning_rate": 0.0005137850773235412,
      "loss": 3.0012,
      "step": 57027
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3518562316894531,
      "learning_rate": 0.0005137822075560258,
      "loss": 3.1915,
      "step": 57028
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9491032361984253,
      "learning_rate": 0.0005137793377487643,
      "loss": 2.9012,
      "step": 57029
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5878517627716064,
      "learning_rate": 0.0005137764679017571,
      "loss": 3.2804,
      "step": 57030
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3548171520233154,
      "learning_rate": 0.000513773598015005,
      "loss": 2.997,
      "step": 57031
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7250807285308838,
      "learning_rate": 0.0005137707280885083,
      "loss": 3.0128,
      "step": 57032
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4346892833709717,
      "learning_rate": 0.0005137678581222675,
      "loss": 3.0314,
      "step": 57033
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9159044027328491,
      "learning_rate": 0.0005137649881162832,
      "loss": 2.9632,
      "step": 57034
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.9818406105041504,
      "learning_rate": 0.0005137621180705561,
      "loss": 3.0087,
      "step": 57035
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4800630807876587,
      "learning_rate": 0.0005137592479850865,
      "loss": 3.071,
      "step": 57036
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6275643110275269,
      "learning_rate": 0.000513756377859875,
      "loss": 3.2276,
      "step": 57037
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.612730026245117,
      "learning_rate": 0.0005137535076949222,
      "loss": 3.2457,
      "step": 57038
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.476960301399231,
      "learning_rate": 0.0005137506374902286,
      "loss": 2.8835,
      "step": 57039
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6538969278335571,
      "learning_rate": 0.0005137477672457946,
      "loss": 2.9813,
      "step": 57040
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5656059980392456,
      "learning_rate": 0.000513744896961621,
      "loss": 2.9955,
      "step": 57041
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4313898086547852,
      "learning_rate": 0.0005137420266377081,
      "loss": 2.8118,
      "step": 57042
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.600630760192871,
      "learning_rate": 0.0005137391562740567,
      "loss": 3.0569,
      "step": 57043
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6060792207717896,
      "learning_rate": 0.0005137362858706669,
      "loss": 3.0968,
      "step": 57044
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7156732082366943,
      "learning_rate": 0.0005137334154275395,
      "loss": 2.9244,
      "step": 57045
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4563297033309937,
      "learning_rate": 0.0005137305449446752,
      "loss": 3.041,
      "step": 57046
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6273075342178345,
      "learning_rate": 0.0005137276744220742,
      "loss": 3.2006,
      "step": 57047
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7745875120162964,
      "learning_rate": 0.0005137248038597374,
      "loss": 3.3061,
      "step": 57048
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7828272581100464,
      "learning_rate": 0.0005137219332576649,
      "loss": 3.129,
      "step": 57049
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6291776895523071,
      "learning_rate": 0.0005137190626158575,
      "loss": 2.9769,
      "step": 57050
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.406925082206726,
      "learning_rate": 0.0005137161919343159,
      "loss": 2.9726,
      "step": 57051
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8087151050567627,
      "learning_rate": 0.0005137133212130403,
      "loss": 3.216,
      "step": 57052
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0016393661499023,
      "learning_rate": 0.0005137104504520312,
      "loss": 3.1724,
      "step": 57053
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8837826251983643,
      "learning_rate": 0.0005137075796512895,
      "loss": 3.1223,
      "step": 57054
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8923510313034058,
      "learning_rate": 0.0005137047088108154,
      "loss": 2.9562,
      "step": 57055
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0944488048553467,
      "learning_rate": 0.0005137018379306096,
      "loss": 2.8079,
      "step": 57056
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9079381227493286,
      "learning_rate": 0.0005136989670106726,
      "loss": 3.1756,
      "step": 57057
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7963351011276245,
      "learning_rate": 0.0005136960960510049,
      "loss": 3.2715,
      "step": 57058
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.381199836730957,
      "learning_rate": 0.0005136932250516072,
      "loss": 2.8513,
      "step": 57059
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.085827350616455,
      "learning_rate": 0.0005136903540124798,
      "loss": 3.0629,
      "step": 57060
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4234495162963867,
      "learning_rate": 0.0005136874829336233,
      "loss": 2.9162,
      "step": 57061
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.0955681800842285,
      "learning_rate": 0.0005136846118150382,
      "loss": 2.8898,
      "step": 57062
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8441535234451294,
      "learning_rate": 0.0005136817406567253,
      "loss": 3.1711,
      "step": 57063
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7877777814865112,
      "learning_rate": 0.0005136788694586848,
      "loss": 3.1475,
      "step": 57064
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.7666075229644775,
      "learning_rate": 0.0005136759982209174,
      "loss": 3.2189,
      "step": 57065
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4149866104125977,
      "learning_rate": 0.0005136731269434235,
      "loss": 2.9553,
      "step": 57066
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5636110305786133,
      "learning_rate": 0.0005136702556262038,
      "loss": 3.1446,
      "step": 57067
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3976616859436035,
      "learning_rate": 0.0005136673842692588,
      "loss": 3.0513,
      "step": 57068
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.6757843494415283,
      "learning_rate": 0.000513664512872589,
      "loss": 3.1661,
      "step": 57069
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9760332107543945,
      "learning_rate": 0.000513661641436195,
      "loss": 3.2383,
      "step": 57070
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.595349669456482,
      "learning_rate": 0.000513658769960077,
      "loss": 2.8412,
      "step": 57071
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6783677339553833,
      "learning_rate": 0.000513655898444236,
      "loss": 3.1991,
      "step": 57072
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9657613039016724,
      "learning_rate": 0.0005136530268886723,
      "loss": 3.1291,
      "step": 57073
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.504555106163025,
      "learning_rate": 0.0005136501552933865,
      "loss": 3.1729,
      "step": 57074
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4671026468276978,
      "learning_rate": 0.0005136472836583791,
      "loss": 2.8723,
      "step": 57075
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.413748025894165,
      "learning_rate": 0.0005136444119836506,
      "loss": 3.069,
      "step": 57076
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6803836822509766,
      "learning_rate": 0.0005136415402692017,
      "loss": 2.8128,
      "step": 57077
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5237030982971191,
      "learning_rate": 0.0005136386685150327,
      "loss": 2.9676,
      "step": 57078
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8844026327133179,
      "learning_rate": 0.0005136357967211442,
      "loss": 2.9673,
      "step": 57079
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4538434743881226,
      "learning_rate": 0.0005136329248875368,
      "loss": 3.1129,
      "step": 57080
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6468125581741333,
      "learning_rate": 0.0005136300530142109,
      "loss": 3.1746,
      "step": 57081
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4216831922531128,
      "learning_rate": 0.0005136271811011673,
      "loss": 3.0332,
      "step": 57082
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4450068473815918,
      "learning_rate": 0.0005136243091484064,
      "loss": 3.1056,
      "step": 57083
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7293258905410767,
      "learning_rate": 0.0005136214371559287,
      "loss": 3.4323,
      "step": 57084
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3857357501983643,
      "learning_rate": 0.0005136185651237346,
      "loss": 3.0554,
      "step": 57085
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.078460693359375,
      "learning_rate": 0.0005136156930518248,
      "loss": 3.1499,
      "step": 57086
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4470665454864502,
      "learning_rate": 0.0005136128209401999,
      "loss": 3.1277,
      "step": 57087
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5770293474197388,
      "learning_rate": 0.0005136099487888604,
      "loss": 3.0733,
      "step": 57088
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.904514193534851,
      "learning_rate": 0.0005136070765978067,
      "loss": 2.9865,
      "step": 57089
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4886112213134766,
      "learning_rate": 0.0005136042043670393,
      "loss": 2.9313,
      "step": 57090
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7855864763259888,
      "learning_rate": 0.000513601332096559,
      "loss": 3.2352,
      "step": 57091
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5250976085662842,
      "learning_rate": 0.0005135984597863662,
      "loss": 2.8591,
      "step": 57092
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4076236486434937,
      "learning_rate": 0.0005135955874364613,
      "loss": 3.1156,
      "step": 57093
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4685134887695312,
      "learning_rate": 0.000513592715046845,
      "loss": 3.0644,
      "step": 57094
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.534305453300476,
      "learning_rate": 0.0005135898426175177,
      "loss": 3.2006,
      "step": 57095
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4747707843780518,
      "learning_rate": 0.0005135869701484801,
      "loss": 3.1634,
      "step": 57096
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5999445915222168,
      "learning_rate": 0.0005135840976397327,
      "loss": 2.9013,
      "step": 57097
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3187649250030518,
      "learning_rate": 0.000513581225091276,
      "loss": 2.9685,
      "step": 57098
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6060292720794678,
      "learning_rate": 0.0005135783525031104,
      "loss": 3.069,
      "step": 57099
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3588664531707764,
      "learning_rate": 0.0005135754798752366,
      "loss": 2.7298,
      "step": 57100
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.897584080696106,
      "learning_rate": 0.000513572607207655,
      "loss": 2.9433,
      "step": 57101
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6629109382629395,
      "learning_rate": 0.0005135697345003663,
      "loss": 2.8165,
      "step": 57102
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3956294059753418,
      "learning_rate": 0.0005135668617533709,
      "loss": 3.2828,
      "step": 57103
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6963201761245728,
      "learning_rate": 0.0005135639889666695,
      "loss": 2.9841,
      "step": 57104
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.493901014328003,
      "learning_rate": 0.0005135611161402625,
      "loss": 3.2456,
      "step": 57105
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6551780700683594,
      "learning_rate": 0.0005135582432741504,
      "loss": 2.9546,
      "step": 57106
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6171140670776367,
      "learning_rate": 0.0005135553703683338,
      "loss": 2.8341,
      "step": 57107
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8083817958831787,
      "learning_rate": 0.0005135524974228134,
      "loss": 3.1628,
      "step": 57108
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5031594038009644,
      "learning_rate": 0.0005135496244375893,
      "loss": 2.9199,
      "step": 57109
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4303518533706665,
      "learning_rate": 0.0005135467514126624,
      "loss": 3.406,
      "step": 57110
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.480618476867676,
      "learning_rate": 0.0005135438783480332,
      "loss": 3.2068,
      "step": 57111
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.580033540725708,
      "learning_rate": 0.000513541005243702,
      "loss": 3.0931,
      "step": 57112
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.781090259552002,
      "learning_rate": 0.0005135381320996697,
      "loss": 3.1742,
      "step": 57113
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.510428547859192,
      "learning_rate": 0.0005135352589159364,
      "loss": 3.2663,
      "step": 57114
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5625098943710327,
      "learning_rate": 0.000513532385692503,
      "loss": 3.007,
      "step": 57115
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8409608602523804,
      "learning_rate": 0.00051352951242937,
      "loss": 2.8755,
      "step": 57116
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7737303972244263,
      "learning_rate": 0.0005135266391265377,
      "loss": 2.9422,
      "step": 57117
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6984589099884033,
      "learning_rate": 0.0005135237657840067,
      "loss": 3.1562,
      "step": 57118
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5116139650344849,
      "learning_rate": 0.0005135208924017777,
      "loss": 3.382,
      "step": 57119
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.1001574993133545,
      "learning_rate": 0.0005135180189798511,
      "loss": 2.9575,
      "step": 57120
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8294917345046997,
      "learning_rate": 0.0005135151455182274,
      "loss": 3.1225,
      "step": 57121
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2809886932373047,
      "learning_rate": 0.0005135122720169074,
      "loss": 2.9367,
      "step": 57122
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7669689655303955,
      "learning_rate": 0.0005135093984758913,
      "loss": 3.1663,
      "step": 57123
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3796137571334839,
      "learning_rate": 0.0005135065248951798,
      "loss": 2.8672,
      "step": 57124
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.353879690170288,
      "learning_rate": 0.0005135036512747734,
      "loss": 2.8636,
      "step": 57125
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2100117206573486,
      "learning_rate": 0.0005135007776146727,
      "loss": 2.9733,
      "step": 57126
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5666401386260986,
      "learning_rate": 0.0005134979039148782,
      "loss": 3.1495,
      "step": 57127
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5637938976287842,
      "learning_rate": 0.0005134950301753903,
      "loss": 3.2216,
      "step": 57128
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.979642391204834,
      "learning_rate": 0.0005134921563962096,
      "loss": 2.9775,
      "step": 57129
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.135108232498169,
      "learning_rate": 0.0005134892825773369,
      "loss": 3.1767,
      "step": 57130
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5613478422164917,
      "learning_rate": 0.0005134864087187722,
      "loss": 3.078,
      "step": 57131
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.6154861450195312,
      "learning_rate": 0.0005134835348205166,
      "loss": 3.1456,
      "step": 57132
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.4611523151397705,
      "learning_rate": 0.0005134806608825703,
      "loss": 2.9591,
      "step": 57133
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6568379402160645,
      "learning_rate": 0.0005134777869049339,
      "loss": 3.0309,
      "step": 57134
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.96658456325531,
      "learning_rate": 0.0005134749128876079,
      "loss": 2.8397,
      "step": 57135
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.4251344203948975,
      "learning_rate": 0.000513472038830593,
      "loss": 3.2289,
      "step": 57136
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9749925136566162,
      "learning_rate": 0.0005134691647338895,
      "loss": 3.0309,
      "step": 57137
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.460715889930725,
      "learning_rate": 0.0005134662905974982,
      "loss": 3.0933,
      "step": 57138
    },
    {
      "epoch": 0.74,
      "grad_norm": 6.058473587036133,
      "learning_rate": 0.0005134634164214194,
      "loss": 2.7001,
      "step": 57139
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.0715794563293457,
      "learning_rate": 0.0005134605422056536,
      "loss": 2.9114,
      "step": 57140
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4523205757141113,
      "learning_rate": 0.0005134576679502017,
      "loss": 3.0428,
      "step": 57141
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0582780838012695,
      "learning_rate": 0.0005134547936550639,
      "loss": 3.1947,
      "step": 57142
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0250518321990967,
      "learning_rate": 0.0005134519193202406,
      "loss": 2.7495,
      "step": 57143
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.1465559005737305,
      "learning_rate": 0.0005134490449457329,
      "loss": 3.1365,
      "step": 57144
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.795306921005249,
      "learning_rate": 0.0005134461705315408,
      "loss": 2.9586,
      "step": 57145
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.9545416831970215,
      "learning_rate": 0.0005134432960776651,
      "loss": 2.8607,
      "step": 57146
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.047611951828003,
      "learning_rate": 0.0005134404215841063,
      "loss": 3.3075,
      "step": 57147
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2791144847869873,
      "learning_rate": 0.0005134375470508648,
      "loss": 3.0346,
      "step": 57148
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6225991249084473,
      "learning_rate": 0.0005134346724779413,
      "loss": 2.964,
      "step": 57149
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3432222604751587,
      "learning_rate": 0.0005134317978653361,
      "loss": 3.1547,
      "step": 57150
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.2649002075195312,
      "learning_rate": 0.0005134289232130502,
      "loss": 3.1811,
      "step": 57151
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.1045234203338623,
      "learning_rate": 0.0005134260485210836,
      "loss": 2.9339,
      "step": 57152
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.253101110458374,
      "learning_rate": 0.0005134231737894372,
      "loss": 2.8723,
      "step": 57153
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6616661548614502,
      "learning_rate": 0.0005134202990181113,
      "loss": 3.039,
      "step": 57154
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.972280502319336,
      "learning_rate": 0.0005134174242071066,
      "loss": 3.1184,
      "step": 57155
    },
    {
      "epoch": 0.74,
      "grad_norm": 4.963894844055176,
      "learning_rate": 0.0005134145493564235,
      "loss": 2.8922,
      "step": 57156
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.133185625076294,
      "learning_rate": 0.0005134116744660626,
      "loss": 2.665,
      "step": 57157
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8513096570968628,
      "learning_rate": 0.0005134087995360245,
      "loss": 3.1619,
      "step": 57158
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.758335590362549,
      "learning_rate": 0.0005134059245663097,
      "loss": 3.1929,
      "step": 57159
    },
    {
      "epoch": 0.74,
      "grad_norm": 3.3645100593566895,
      "learning_rate": 0.0005134030495569187,
      "loss": 3.0617,
      "step": 57160
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.7668955326080322,
      "learning_rate": 0.000513400174507852,
      "loss": 2.9712,
      "step": 57161
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0387282371520996,
      "learning_rate": 0.0005133972994191102,
      "loss": 2.8727,
      "step": 57162
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7036683559417725,
      "learning_rate": 0.0005133944242906939,
      "loss": 3.1117,
      "step": 57163
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0433239936828613,
      "learning_rate": 0.0005133915491226034,
      "loss": 3.0433,
      "step": 57164
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3430168628692627,
      "learning_rate": 0.0005133886739148394,
      "loss": 3.1049,
      "step": 57165
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.697606086730957,
      "learning_rate": 0.0005133857986674025,
      "loss": 2.9013,
      "step": 57166
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.383565068244934,
      "learning_rate": 0.0005133829233802932,
      "loss": 3.1906,
      "step": 57167
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.815725326538086,
      "learning_rate": 0.0005133800480535118,
      "loss": 3.0142,
      "step": 57168
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.3794867992401123,
      "learning_rate": 0.0005133771726870592,
      "loss": 3.0,
      "step": 57169
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.582770824432373,
      "learning_rate": 0.0005133742972809357,
      "loss": 3.1111,
      "step": 57170
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5871586799621582,
      "learning_rate": 0.0005133714218351419,
      "loss": 2.8515,
      "step": 57171
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.111964702606201,
      "learning_rate": 0.0005133685463496782,
      "loss": 3.0577,
      "step": 57172
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8660330772399902,
      "learning_rate": 0.0005133656708245454,
      "loss": 2.9653,
      "step": 57173
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4636483192443848,
      "learning_rate": 0.0005133627952597439,
      "loss": 2.9899,
      "step": 57174
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5191985368728638,
      "learning_rate": 0.0005133599196552742,
      "loss": 3.0817,
      "step": 57175
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7659432888031006,
      "learning_rate": 0.0005133570440111368,
      "loss": 3.3057,
      "step": 57176
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5899302959442139,
      "learning_rate": 0.0005133541683273324,
      "loss": 3.0005,
      "step": 57177
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3387372493743896,
      "learning_rate": 0.0005133512926038613,
      "loss": 2.9863,
      "step": 57178
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.640234351158142,
      "learning_rate": 0.0005133484168407243,
      "loss": 3.0372,
      "step": 57179
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.823304295539856,
      "learning_rate": 0.0005133455410379217,
      "loss": 3.2201,
      "step": 57180
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.555396795272827,
      "learning_rate": 0.0005133426651954542,
      "loss": 3.4572,
      "step": 57181
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.868788480758667,
      "learning_rate": 0.0005133397893133223,
      "loss": 3.1216,
      "step": 57182
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5860365629196167,
      "learning_rate": 0.0005133369133915265,
      "loss": 3.1592,
      "step": 57183
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0185599327087402,
      "learning_rate": 0.0005133340374300671,
      "loss": 3.0522,
      "step": 57184
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.8340518474578857,
      "learning_rate": 0.0005133311614289452,
      "loss": 3.0873,
      "step": 57185
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6491799354553223,
      "learning_rate": 0.000513328285388161,
      "loss": 3.2321,
      "step": 57186
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.0411581993103027,
      "learning_rate": 0.0005133254093077149,
      "loss": 3.1481,
      "step": 57187
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5125865936279297,
      "learning_rate": 0.0005133225331876075,
      "loss": 2.8817,
      "step": 57188
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5019065141677856,
      "learning_rate": 0.0005133196570278396,
      "loss": 2.9429,
      "step": 57189
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7269090414047241,
      "learning_rate": 0.0005133167808284114,
      "loss": 3.0822,
      "step": 57190
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8083417415618896,
      "learning_rate": 0.0005133139045893237,
      "loss": 3.1134,
      "step": 57191
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6988285779953003,
      "learning_rate": 0.0005133110283105769,
      "loss": 2.9561,
      "step": 57192
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4912399053573608,
      "learning_rate": 0.0005133081519921715,
      "loss": 3.1623,
      "step": 57193
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6888536214828491,
      "learning_rate": 0.0005133052756341081,
      "loss": 3.0425,
      "step": 57194
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.746293544769287,
      "learning_rate": 0.0005133023992363873,
      "loss": 3.3552,
      "step": 57195
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6803654432296753,
      "learning_rate": 0.0005132995227990094,
      "loss": 3.0291,
      "step": 57196
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.4262607097625732,
      "learning_rate": 0.0005132966463219753,
      "loss": 2.9754,
      "step": 57197
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.7869046926498413,
      "learning_rate": 0.0005132937698052852,
      "loss": 3.0254,
      "step": 57198
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2246406078338623,
      "learning_rate": 0.0005132908932489398,
      "loss": 3.3235,
      "step": 57199
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.584150552749634,
      "learning_rate": 0.0005132880166529395,
      "loss": 2.9861,
      "step": 57200
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3521230220794678,
      "learning_rate": 0.000513285140017285,
      "loss": 2.939,
      "step": 57201
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3305240869522095,
      "learning_rate": 0.0005132822633419768,
      "loss": 2.9338,
      "step": 57202
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8314729928970337,
      "learning_rate": 0.0005132793866270154,
      "loss": 3.0484,
      "step": 57203
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.963368535041809,
      "learning_rate": 0.0005132765098724014,
      "loss": 3.1322,
      "step": 57204
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.77755606174469,
      "learning_rate": 0.0005132736330781351,
      "loss": 3.0556,
      "step": 57205
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5327249765396118,
      "learning_rate": 0.0005132707562442172,
      "loss": 3.0872,
      "step": 57206
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6851834058761597,
      "learning_rate": 0.0005132678793706484,
      "loss": 2.9469,
      "step": 57207
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.3656991720199585,
      "learning_rate": 0.0005132650024574291,
      "loss": 2.8469,
      "step": 57208
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.2795753479003906,
      "learning_rate": 0.0005132621255045597,
      "loss": 2.9956,
      "step": 57209
    },
    {
      "epoch": 0.74,
      "grad_norm": 2.013026475906372,
      "learning_rate": 0.0005132592485120408,
      "loss": 2.9108,
      "step": 57210
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5201265811920166,
      "learning_rate": 0.000513256371479873,
      "loss": 2.9201,
      "step": 57211
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.6601450443267822,
      "learning_rate": 0.0005132534944080569,
      "loss": 2.8124,
      "step": 57212
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.918848991394043,
      "learning_rate": 0.0005132506172965929,
      "loss": 3.0573,
      "step": 57213
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.5092945098876953,
      "learning_rate": 0.0005132477401454816,
      "loss": 3.0484,
      "step": 57214
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8420307636260986,
      "learning_rate": 0.0005132448629547235,
      "loss": 3.1111,
      "step": 57215
    },
    {
      "epoch": 0.74,
      "grad_norm": 1.8578070402145386,
      "learning_rate": 0.0005132419857243191,
      "loss": 2.8247,
      "step": 57216
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.779151678085327,
      "learning_rate": 0.0005132391084542691,
      "loss": 2.9918,
      "step": 57217
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4824777841567993,
      "learning_rate": 0.0005132362311445739,
      "loss": 3.2193,
      "step": 57218
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.341146945953369,
      "learning_rate": 0.0005132333537952341,
      "loss": 2.8948,
      "step": 57219
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4363523721694946,
      "learning_rate": 0.00051323047640625,
      "loss": 3.1888,
      "step": 57220
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3418623208999634,
      "learning_rate": 0.0005132275989776225,
      "loss": 3.0579,
      "step": 57221
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3643089532852173,
      "learning_rate": 0.000513224721509352,
      "loss": 3.0082,
      "step": 57222
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.657957673072815,
      "learning_rate": 0.000513221844001439,
      "loss": 2.9426,
      "step": 57223
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6674087047576904,
      "learning_rate": 0.0005132189664538839,
      "loss": 3.1404,
      "step": 57224
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6295684576034546,
      "learning_rate": 0.0005132160888666874,
      "loss": 3.0079,
      "step": 57225
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8706480264663696,
      "learning_rate": 0.00051321321123985,
      "loss": 3.1544,
      "step": 57226
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.381761074066162,
      "learning_rate": 0.0005132103335733723,
      "loss": 3.1055,
      "step": 57227
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2872040271759033,
      "learning_rate": 0.0005132074558672548,
      "loss": 3.2246,
      "step": 57228
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3989901542663574,
      "learning_rate": 0.0005132045781214979,
      "loss": 3.2419,
      "step": 57229
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7965688705444336,
      "learning_rate": 0.0005132017003361023,
      "loss": 2.9017,
      "step": 57230
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4386273622512817,
      "learning_rate": 0.0005131988225110685,
      "loss": 2.9491,
      "step": 57231
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.716364860534668,
      "learning_rate": 0.000513195944646397,
      "loss": 3.1152,
      "step": 57232
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.9834935665130615,
      "learning_rate": 0.0005131930667420883,
      "loss": 2.9356,
      "step": 57233
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.535497784614563,
      "learning_rate": 0.0005131901887981431,
      "loss": 3.0063,
      "step": 57234
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.787713646888733,
      "learning_rate": 0.0005131873108145617,
      "loss": 2.9904,
      "step": 57235
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9564744234085083,
      "learning_rate": 0.0005131844327913448,
      "loss": 3.0143,
      "step": 57236
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5365989208221436,
      "learning_rate": 0.000513181554728493,
      "loss": 2.9045,
      "step": 57237
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2983372211456299,
      "learning_rate": 0.0005131786766260065,
      "loss": 2.8705,
      "step": 57238
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9055289030075073,
      "learning_rate": 0.0005131757984838863,
      "loss": 3.0103,
      "step": 57239
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.284160614013672,
      "learning_rate": 0.0005131729203021325,
      "loss": 3.1245,
      "step": 57240
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3657456636428833,
      "learning_rate": 0.0005131700420807459,
      "loss": 3.1615,
      "step": 57241
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8525017499923706,
      "learning_rate": 0.0005131671638197269,
      "loss": 2.882,
      "step": 57242
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.578456163406372,
      "learning_rate": 0.0005131642855190762,
      "loss": 3.0509,
      "step": 57243
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7549279928207397,
      "learning_rate": 0.0005131614071787942,
      "loss": 2.9744,
      "step": 57244
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7910106182098389,
      "learning_rate": 0.0005131585287988815,
      "loss": 2.9578,
      "step": 57245
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6527574062347412,
      "learning_rate": 0.0005131556503793385,
      "loss": 2.9643,
      "step": 57246
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3484152555465698,
      "learning_rate": 0.000513152771920166,
      "loss": 3.1355,
      "step": 57247
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0070362091064453,
      "learning_rate": 0.0005131498934213643,
      "loss": 2.9951,
      "step": 57248
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6256048679351807,
      "learning_rate": 0.000513147014882934,
      "loss": 2.8433,
      "step": 57249
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3639320135116577,
      "learning_rate": 0.0005131441363048757,
      "loss": 3.0247,
      "step": 57250
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7713192701339722,
      "learning_rate": 0.0005131412576871898,
      "loss": 3.142,
      "step": 57251
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9303786754608154,
      "learning_rate": 0.000513138379029877,
      "loss": 3.163,
      "step": 57252
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5602706670761108,
      "learning_rate": 0.0005131355003329377,
      "loss": 3.0951,
      "step": 57253
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5811457633972168,
      "learning_rate": 0.0005131326215963725,
      "loss": 2.9794,
      "step": 57254
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8016705513000488,
      "learning_rate": 0.0005131297428201819,
      "loss": 2.801,
      "step": 57255
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8631359338760376,
      "learning_rate": 0.0005131268640043665,
      "loss": 3.087,
      "step": 57256
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4897218942642212,
      "learning_rate": 0.0005131239851489267,
      "loss": 3.1373,
      "step": 57257
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4032801389694214,
      "learning_rate": 0.0005131211062538632,
      "loss": 2.7569,
      "step": 57258
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5475789308547974,
      "learning_rate": 0.0005131182273191765,
      "loss": 3.3122,
      "step": 57259
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.539435625076294,
      "learning_rate": 0.000513115348344867,
      "loss": 3.1401,
      "step": 57260
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3687552213668823,
      "learning_rate": 0.0005131124693309355,
      "loss": 3.0676,
      "step": 57261
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5612022876739502,
      "learning_rate": 0.0005131095902773822,
      "loss": 3.211,
      "step": 57262
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.592423915863037,
      "learning_rate": 0.0005131067111842078,
      "loss": 2.6618,
      "step": 57263
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6547011137008667,
      "learning_rate": 0.0005131038320514129,
      "loss": 3.4635,
      "step": 57264
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.090348958969116,
      "learning_rate": 0.000513100952878998,
      "loss": 3.1283,
      "step": 57265
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.5690512657165527,
      "learning_rate": 0.0005130980736669636,
      "loss": 3.0301,
      "step": 57266
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5227566957473755,
      "learning_rate": 0.0005130951944153103,
      "loss": 3.0933,
      "step": 57267
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.456043243408203,
      "learning_rate": 0.0005130923151240384,
      "loss": 2.9345,
      "step": 57268
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.483320951461792,
      "learning_rate": 0.0005130894357931487,
      "loss": 2.9079,
      "step": 57269
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5728073120117188,
      "learning_rate": 0.0005130865564226418,
      "loss": 3.3738,
      "step": 57270
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5731618404388428,
      "learning_rate": 0.000513083677012518,
      "loss": 3.1803,
      "step": 57271
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.4241771697998047,
      "learning_rate": 0.0005130807975627779,
      "loss": 3.1048,
      "step": 57272
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.490328311920166,
      "learning_rate": 0.000513077918073422,
      "loss": 3.1524,
      "step": 57273
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6761006116867065,
      "learning_rate": 0.0005130750385444509,
      "loss": 3.337,
      "step": 57274
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5436164140701294,
      "learning_rate": 0.0005130721589758652,
      "loss": 3.2168,
      "step": 57275
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8343818187713623,
      "learning_rate": 0.0005130692793676654,
      "loss": 2.9241,
      "step": 57276
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.552351474761963,
      "learning_rate": 0.0005130663997198519,
      "loss": 3.0138,
      "step": 57277
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6649975776672363,
      "learning_rate": 0.0005130635200324253,
      "loss": 2.9031,
      "step": 57278
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5660804510116577,
      "learning_rate": 0.0005130606403053863,
      "loss": 2.9796,
      "step": 57279
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.907007932662964,
      "learning_rate": 0.0005130577605387352,
      "loss": 3.0245,
      "step": 57280
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4843955039978027,
      "learning_rate": 0.0005130548807324727,
      "loss": 3.1997,
      "step": 57281
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3800867795944214,
      "learning_rate": 0.0005130520008865993,
      "loss": 3.034,
      "step": 57282
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7467131614685059,
      "learning_rate": 0.0005130491210011155,
      "loss": 3.1911,
      "step": 57283
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6732605695724487,
      "learning_rate": 0.0005130462410760217,
      "loss": 2.843,
      "step": 57284
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.817407488822937,
      "learning_rate": 0.0005130433611113188,
      "loss": 3.1473,
      "step": 57285
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5034152269363403,
      "learning_rate": 0.000513040481107007,
      "loss": 3.1183,
      "step": 57286
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8968850374221802,
      "learning_rate": 0.0005130376010630869,
      "loss": 3.066,
      "step": 57287
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8832513093948364,
      "learning_rate": 0.0005130347209795593,
      "loss": 2.9902,
      "step": 57288
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4849061965942383,
      "learning_rate": 0.0005130318408564243,
      "loss": 3.0303,
      "step": 57289
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3513246774673462,
      "learning_rate": 0.0005130289606936827,
      "loss": 3.2665,
      "step": 57290
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.75252103805542,
      "learning_rate": 0.000513026080491335,
      "loss": 3.1921,
      "step": 57291
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4825990200042725,
      "learning_rate": 0.0005130232002493818,
      "loss": 2.8592,
      "step": 57292
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1402947902679443,
      "learning_rate": 0.0005130203199678236,
      "loss": 2.9832,
      "step": 57293
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.613816261291504,
      "learning_rate": 0.0005130174396466608,
      "loss": 3.0318,
      "step": 57294
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.069436550140381,
      "learning_rate": 0.0005130145592858941,
      "loss": 3.1793,
      "step": 57295
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.486698865890503,
      "learning_rate": 0.0005130116788855239,
      "loss": 2.9402,
      "step": 57296
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.571243166923523,
      "learning_rate": 0.0005130087984455509,
      "loss": 2.9513,
      "step": 57297
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5634344816207886,
      "learning_rate": 0.0005130059179659754,
      "loss": 3.0997,
      "step": 57298
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4062680006027222,
      "learning_rate": 0.0005130030374467983,
      "loss": 3.008,
      "step": 57299
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6507172584533691,
      "learning_rate": 0.0005130001568880198,
      "loss": 2.8661,
      "step": 57300
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3476343154907227,
      "learning_rate": 0.0005129972762896405,
      "loss": 3.2075,
      "step": 57301
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5927534103393555,
      "learning_rate": 0.0005129943956516611,
      "loss": 3.0847,
      "step": 57302
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8432719707489014,
      "learning_rate": 0.0005129915149740819,
      "loss": 3.177,
      "step": 57303
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.483414888381958,
      "learning_rate": 0.0005129886342569035,
      "loss": 3.3514,
      "step": 57304
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4228792190551758,
      "learning_rate": 0.0005129857535001266,
      "loss": 3.1735,
      "step": 57305
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6290487051010132,
      "learning_rate": 0.0005129828727037516,
      "loss": 3.1506,
      "step": 57306
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6914680004119873,
      "learning_rate": 0.0005129799918677792,
      "loss": 3.3067,
      "step": 57307
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8714523315429688,
      "learning_rate": 0.0005129771109922096,
      "loss": 3.1798,
      "step": 57308
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6931875944137573,
      "learning_rate": 0.0005129742300770437,
      "loss": 2.8764,
      "step": 57309
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5713688135147095,
      "learning_rate": 0.0005129713491222817,
      "loss": 2.8829,
      "step": 57310
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3829107284545898,
      "learning_rate": 0.0005129684681279245,
      "loss": 3.0859,
      "step": 57311
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5633145570755005,
      "learning_rate": 0.0005129655870939724,
      "loss": 3.0564,
      "step": 57312
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5536770820617676,
      "learning_rate": 0.0005129627060204258,
      "loss": 3.0321,
      "step": 57313
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6079579591751099,
      "learning_rate": 0.0005129598249072856,
      "loss": 3.1347,
      "step": 57314
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7484525442123413,
      "learning_rate": 0.000512956943754552,
      "loss": 3.0419,
      "step": 57315
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.373095989227295,
      "learning_rate": 0.0005129540625622258,
      "loss": 3.0478,
      "step": 57316
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.556704044342041,
      "learning_rate": 0.0005129511813303074,
      "loss": 2.7278,
      "step": 57317
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.297353982925415,
      "learning_rate": 0.0005129483000587973,
      "loss": 2.8734,
      "step": 57318
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.595784306526184,
      "learning_rate": 0.0005129454187476962,
      "loss": 2.9839,
      "step": 57319
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4122824668884277,
      "learning_rate": 0.0005129425373970045,
      "loss": 3.0905,
      "step": 57320
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1412065029144287,
      "learning_rate": 0.0005129396560067226,
      "loss": 3.1507,
      "step": 57321
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5949865579605103,
      "learning_rate": 0.0005129367745768514,
      "loss": 3.1974,
      "step": 57322
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4387835264205933,
      "learning_rate": 0.0005129338931073911,
      "loss": 2.9299,
      "step": 57323
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.13249135017395,
      "learning_rate": 0.0005129310115983424,
      "loss": 3.1551,
      "step": 57324
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6885794401168823,
      "learning_rate": 0.000512928130049706,
      "loss": 2.8097,
      "step": 57325
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.343266487121582,
      "learning_rate": 0.000512925248461482,
      "loss": 2.7622,
      "step": 57326
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4679218530654907,
      "learning_rate": 0.0005129223668336712,
      "loss": 2.9696,
      "step": 57327
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.804238796234131,
      "learning_rate": 0.0005129194851662742,
      "loss": 2.8712,
      "step": 57328
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4186683893203735,
      "learning_rate": 0.0005129166034592914,
      "loss": 3.0895,
      "step": 57329
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5612047910690308,
      "learning_rate": 0.0005129137217127234,
      "loss": 2.8268,
      "step": 57330
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.070709705352783,
      "learning_rate": 0.0005129108399265707,
      "loss": 3.0719,
      "step": 57331
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.560629963874817,
      "learning_rate": 0.0005129079581008339,
      "loss": 3.1422,
      "step": 57332
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6327197551727295,
      "learning_rate": 0.0005129050762355135,
      "loss": 3.1254,
      "step": 57333
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5568370819091797,
      "learning_rate": 0.00051290219433061,
      "loss": 3.0687,
      "step": 57334
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5094925165176392,
      "learning_rate": 0.0005128993123861238,
      "loss": 3.237,
      "step": 57335
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8522077798843384,
      "learning_rate": 0.0005128964304020558,
      "loss": 2.9851,
      "step": 57336
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.136680841445923,
      "learning_rate": 0.0005128935483784063,
      "loss": 2.772,
      "step": 57337
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4105390310287476,
      "learning_rate": 0.0005128906663151759,
      "loss": 3.1421,
      "step": 57338
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3223564624786377,
      "learning_rate": 0.000512887784212365,
      "loss": 2.7648,
      "step": 57339
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.447566032409668,
      "learning_rate": 0.0005128849020699743,
      "loss": 2.9139,
      "step": 57340
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5343761444091797,
      "learning_rate": 0.0005128820198880043,
      "loss": 3.2054,
      "step": 57341
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6221654415130615,
      "learning_rate": 0.0005128791376664555,
      "loss": 3.0214,
      "step": 57342
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4026145935058594,
      "learning_rate": 0.0005128762554053284,
      "loss": 2.9632,
      "step": 57343
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.492732286453247,
      "learning_rate": 0.0005128733731046236,
      "loss": 2.6832,
      "step": 57344
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6807256937026978,
      "learning_rate": 0.0005128704907643417,
      "loss": 2.9817,
      "step": 57345
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3183202743530273,
      "learning_rate": 0.0005128676083844832,
      "loss": 2.9519,
      "step": 57346
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.341170310974121,
      "learning_rate": 0.0005128647259650484,
      "loss": 3.1585,
      "step": 57347
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4815605878829956,
      "learning_rate": 0.0005128618435060382,
      "loss": 3.1363,
      "step": 57348
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3481611013412476,
      "learning_rate": 0.0005128589610074529,
      "loss": 3.0052,
      "step": 57349
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4514296054840088,
      "learning_rate": 0.000512856078469293,
      "loss": 3.1708,
      "step": 57350
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.235447883605957,
      "learning_rate": 0.0005128531958915593,
      "loss": 3.2718,
      "step": 57351
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9570080041885376,
      "learning_rate": 0.0005128503132742522,
      "loss": 3.0526,
      "step": 57352
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0713391304016113,
      "learning_rate": 0.000512847430617372,
      "loss": 2.893,
      "step": 57353
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.6986618041992188,
      "learning_rate": 0.0005128445479209195,
      "loss": 2.942,
      "step": 57354
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9218751192092896,
      "learning_rate": 0.0005128416651848954,
      "loss": 3.3328,
      "step": 57355
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5309542417526245,
      "learning_rate": 0.0005128387824092998,
      "loss": 3.0982,
      "step": 57356
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.800286889076233,
      "learning_rate": 0.0005128358995941336,
      "loss": 2.8704,
      "step": 57357
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6133198738098145,
      "learning_rate": 0.000512833016739397,
      "loss": 2.9514,
      "step": 57358
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.417769193649292,
      "learning_rate": 0.0005128301338450909,
      "loss": 3.105,
      "step": 57359
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.002676010131836,
      "learning_rate": 0.0005128272509112155,
      "loss": 2.9981,
      "step": 57360
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5257033109664917,
      "learning_rate": 0.0005128243679377717,
      "loss": 2.8179,
      "step": 57361
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5991556644439697,
      "learning_rate": 0.0005128214849247596,
      "loss": 3.1492,
      "step": 57362
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4429101943969727,
      "learning_rate": 0.0005128186018721802,
      "loss": 3.1791,
      "step": 57363
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.912833333015442,
      "learning_rate": 0.0005128157187800337,
      "loss": 2.9395,
      "step": 57364
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5045067071914673,
      "learning_rate": 0.0005128128356483207,
      "loss": 3.0913,
      "step": 57365
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9675770998001099,
      "learning_rate": 0.0005128099524770418,
      "loss": 2.8249,
      "step": 57366
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0474681854248047,
      "learning_rate": 0.0005128070692661975,
      "loss": 3.0022,
      "step": 57367
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6788339614868164,
      "learning_rate": 0.0005128041860157883,
      "loss": 2.95,
      "step": 57368
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.726643443107605,
      "learning_rate": 0.0005128013027258149,
      "loss": 3.3165,
      "step": 57369
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7824119329452515,
      "learning_rate": 0.0005127984193962777,
      "loss": 2.8403,
      "step": 57370
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.72296941280365,
      "learning_rate": 0.0005127955360271772,
      "loss": 2.9939,
      "step": 57371
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9219942092895508,
      "learning_rate": 0.0005127926526185141,
      "loss": 2.9341,
      "step": 57372
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.766421914100647,
      "learning_rate": 0.0005127897691702887,
      "loss": 3.0519,
      "step": 57373
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3106043338775635,
      "learning_rate": 0.0005127868856825017,
      "loss": 3.0533,
      "step": 57374
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.61143159866333,
      "learning_rate": 0.0005127840021551536,
      "loss": 3.0406,
      "step": 57375
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.948287844657898,
      "learning_rate": 0.000512781118588245,
      "loss": 3.2771,
      "step": 57376
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9223060607910156,
      "learning_rate": 0.0005127782349817763,
      "loss": 3.0328,
      "step": 57377
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0514726638793945,
      "learning_rate": 0.0005127753513357482,
      "loss": 3.0207,
      "step": 57378
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.4599573612213135,
      "learning_rate": 0.0005127724676501611,
      "loss": 2.8252,
      "step": 57379
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.4523611068725586,
      "learning_rate": 0.0005127695839250155,
      "loss": 3.2787,
      "step": 57380
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.416582703590393,
      "learning_rate": 0.0005127667001603121,
      "loss": 3.0721,
      "step": 57381
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.528308391571045,
      "learning_rate": 0.0005127638163560514,
      "loss": 3.2,
      "step": 57382
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6533440351486206,
      "learning_rate": 0.0005127609325122338,
      "loss": 3.154,
      "step": 57383
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6854435205459595,
      "learning_rate": 0.0005127580486288599,
      "loss": 2.8725,
      "step": 57384
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.4811205863952637,
      "learning_rate": 0.0005127551647059303,
      "loss": 3.1111,
      "step": 57385
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.833815336227417,
      "learning_rate": 0.0005127522807434456,
      "loss": 3.1655,
      "step": 57386
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9057549238204956,
      "learning_rate": 0.0005127493967414062,
      "loss": 3.1024,
      "step": 57387
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9559763669967651,
      "learning_rate": 0.0005127465126998125,
      "loss": 2.8995,
      "step": 57388
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.49103581905365,
      "learning_rate": 0.0005127436286186653,
      "loss": 3.0739,
      "step": 57389
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.6884682178497314,
      "learning_rate": 0.0005127407444979651,
      "loss": 3.0866,
      "step": 57390
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.436615228652954,
      "learning_rate": 0.0005127378603377124,
      "loss": 3.3305,
      "step": 57391
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6462947130203247,
      "learning_rate": 0.0005127349761379077,
      "loss": 2.9737,
      "step": 57392
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5718410015106201,
      "learning_rate": 0.0005127320918985515,
      "loss": 3.2515,
      "step": 57393
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.591989040374756,
      "learning_rate": 0.0005127292076196444,
      "loss": 3.1351,
      "step": 57394
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.397205114364624,
      "learning_rate": 0.000512726323301187,
      "loss": 2.9218,
      "step": 57395
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7025432586669922,
      "learning_rate": 0.0005127234389431796,
      "loss": 2.9198,
      "step": 57396
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6980907917022705,
      "learning_rate": 0.0005127205545456231,
      "loss": 2.945,
      "step": 57397
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.084568738937378,
      "learning_rate": 0.0005127176701085177,
      "loss": 2.8871,
      "step": 57398
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6230984926223755,
      "learning_rate": 0.0005127147856318641,
      "loss": 3.2042,
      "step": 57399
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.342229962348938,
      "learning_rate": 0.0005127119011156628,
      "loss": 3.0616,
      "step": 57400
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9469250440597534,
      "learning_rate": 0.0005127090165599143,
      "loss": 3.106,
      "step": 57401
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.530658483505249,
      "learning_rate": 0.0005127061319646193,
      "loss": 3.2273,
      "step": 57402
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7757920026779175,
      "learning_rate": 0.0005127032473297782,
      "loss": 2.5978,
      "step": 57403
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.614229679107666,
      "learning_rate": 0.0005127003626553914,
      "loss": 2.9646,
      "step": 57404
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.119330883026123,
      "learning_rate": 0.0005126974779414597,
      "loss": 2.9166,
      "step": 57405
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.947910785675049,
      "learning_rate": 0.0005126945931879835,
      "loss": 3.0925,
      "step": 57406
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.734114170074463,
      "learning_rate": 0.0005126917083949634,
      "loss": 2.9285,
      "step": 57407
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7623180150985718,
      "learning_rate": 0.0005126888235623998,
      "loss": 3.12,
      "step": 57408
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.580310344696045,
      "learning_rate": 0.0005126859386902934,
      "loss": 2.9932,
      "step": 57409
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.2547295093536377,
      "learning_rate": 0.0005126830537786447,
      "loss": 3.1049,
      "step": 57410
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.355172872543335,
      "learning_rate": 0.0005126801688274541,
      "loss": 2.994,
      "step": 57411
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9440542459487915,
      "learning_rate": 0.0005126772838367224,
      "loss": 3.1555,
      "step": 57412
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.41329288482666,
      "learning_rate": 0.0005126743988064499,
      "loss": 2.8368,
      "step": 57413
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.9918465614318848,
      "learning_rate": 0.0005126715137366371,
      "loss": 2.9112,
      "step": 57414
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8012884855270386,
      "learning_rate": 0.0005126686286272848,
      "loss": 3.0479,
      "step": 57415
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8454557657241821,
      "learning_rate": 0.0005126657434783934,
      "loss": 3.0531,
      "step": 57416
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.116237163543701,
      "learning_rate": 0.0005126628582899634,
      "loss": 3.1517,
      "step": 57417
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8796117305755615,
      "learning_rate": 0.0005126599730619952,
      "loss": 3.215,
      "step": 57418
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1957178115844727,
      "learning_rate": 0.0005126570877944897,
      "loss": 3.098,
      "step": 57419
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5374431610107422,
      "learning_rate": 0.0005126542024874471,
      "loss": 3.0676,
      "step": 57420
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.311243772506714,
      "learning_rate": 0.0005126513171408682,
      "loss": 2.786,
      "step": 57421
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9440701007843018,
      "learning_rate": 0.0005126484317547534,
      "loss": 2.9016,
      "step": 57422
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4379446506500244,
      "learning_rate": 0.000512645546329103,
      "loss": 3.1039,
      "step": 57423
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.602637529373169,
      "learning_rate": 0.000512642660863918,
      "loss": 3.1137,
      "step": 57424
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1753368377685547,
      "learning_rate": 0.0005126397753591987,
      "loss": 3.0771,
      "step": 57425
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8677732944488525,
      "learning_rate": 0.0005126368898149456,
      "loss": 3.043,
      "step": 57426
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4092553853988647,
      "learning_rate": 0.0005126340042311594,
      "loss": 2.9538,
      "step": 57427
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3970462083816528,
      "learning_rate": 0.0005126311186078404,
      "loss": 2.79,
      "step": 57428
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4336968660354614,
      "learning_rate": 0.0005126282329449893,
      "loss": 2.8434,
      "step": 57429
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7078055143356323,
      "learning_rate": 0.0005126253472426066,
      "loss": 3.0194,
      "step": 57430
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8182501792907715,
      "learning_rate": 0.0005126224615006927,
      "loss": 2.7987,
      "step": 57431
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3429867029190063,
      "learning_rate": 0.0005126195757192485,
      "loss": 3.0826,
      "step": 57432
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4928343296051025,
      "learning_rate": 0.0005126166898982742,
      "loss": 2.7786,
      "step": 57433
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3456826210021973,
      "learning_rate": 0.0005126138040377705,
      "loss": 3.0324,
      "step": 57434
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1743903160095215,
      "learning_rate": 0.0005126109181377378,
      "loss": 3.0143,
      "step": 57435
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.6048684120178223,
      "learning_rate": 0.0005126080321981768,
      "loss": 2.8151,
      "step": 57436
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4507559537887573,
      "learning_rate": 0.0005126051462190878,
      "loss": 2.9178,
      "step": 57437
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9028956890106201,
      "learning_rate": 0.0005126022602004715,
      "loss": 2.9374,
      "step": 57438
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3946540355682373,
      "learning_rate": 0.0005125993741423285,
      "loss": 2.7486,
      "step": 57439
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6692485809326172,
      "learning_rate": 0.0005125964880446593,
      "loss": 2.9658,
      "step": 57440
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5438854694366455,
      "learning_rate": 0.0005125936019074645,
      "loss": 3.1295,
      "step": 57441
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.642369031906128,
      "learning_rate": 0.0005125907157307444,
      "loss": 3.0196,
      "step": 57442
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6647661924362183,
      "learning_rate": 0.0005125878295144996,
      "loss": 3.0171,
      "step": 57443
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4484963417053223,
      "learning_rate": 0.0005125849432587308,
      "loss": 3.1034,
      "step": 57444
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.773589849472046,
      "learning_rate": 0.0005125820569634386,
      "loss": 3.0529,
      "step": 57445
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.738485336303711,
      "learning_rate": 0.0005125791706286231,
      "loss": 2.9947,
      "step": 57446
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4510258436203003,
      "learning_rate": 0.0005125762842542853,
      "loss": 2.8402,
      "step": 57447
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.609989881515503,
      "learning_rate": 0.0005125733978404255,
      "loss": 2.9168,
      "step": 57448
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6005825996398926,
      "learning_rate": 0.0005125705113870442,
      "loss": 3.007,
      "step": 57449
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9228013753890991,
      "learning_rate": 0.0005125676248941422,
      "loss": 3.0618,
      "step": 57450
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5967144966125488,
      "learning_rate": 0.0005125647383617198,
      "loss": 3.1175,
      "step": 57451
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5656087398529053,
      "learning_rate": 0.0005125618517897776,
      "loss": 3.0011,
      "step": 57452
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6369459629058838,
      "learning_rate": 0.0005125589651783162,
      "loss": 3.0892,
      "step": 57453
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7207772731781006,
      "learning_rate": 0.000512556078527336,
      "loss": 3.0974,
      "step": 57454
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6044026613235474,
      "learning_rate": 0.0005125531918368375,
      "loss": 3.0309,
      "step": 57455
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3418734073638916,
      "learning_rate": 0.0005125503051068215,
      "loss": 3.0598,
      "step": 57456
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1257243156433105,
      "learning_rate": 0.0005125474183372884,
      "loss": 3.1607,
      "step": 57457
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.477091908454895,
      "learning_rate": 0.0005125445315282386,
      "loss": 2.9998,
      "step": 57458
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5214519500732422,
      "learning_rate": 0.000512541644679673,
      "loss": 3.1769,
      "step": 57459
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.593311071395874,
      "learning_rate": 0.0005125387577915916,
      "loss": 3.0212,
      "step": 57460
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7393348217010498,
      "learning_rate": 0.0005125358708639954,
      "loss": 2.9181,
      "step": 57461
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6085411310195923,
      "learning_rate": 0.0005125329838968848,
      "loss": 2.8324,
      "step": 57462
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.421474575996399,
      "learning_rate": 0.0005125300968902602,
      "loss": 2.9308,
      "step": 57463
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.532740592956543,
      "learning_rate": 0.0005125272098441223,
      "loss": 3.1414,
      "step": 57464
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6524574756622314,
      "learning_rate": 0.0005125243227584715,
      "loss": 3.1902,
      "step": 57465
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9198743104934692,
      "learning_rate": 0.0005125214356333084,
      "loss": 3.0369,
      "step": 57466
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.8417279720306396,
      "learning_rate": 0.0005125185484686338,
      "loss": 2.8392,
      "step": 57467
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.4664227962493896,
      "learning_rate": 0.0005125156612644479,
      "loss": 3.2241,
      "step": 57468
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7666338682174683,
      "learning_rate": 0.0005125127740207512,
      "loss": 3.0044,
      "step": 57469
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.591672420501709,
      "learning_rate": 0.0005125098867375444,
      "loss": 3.0973,
      "step": 57470
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.252573251724243,
      "learning_rate": 0.000512506999414828,
      "loss": 2.932,
      "step": 57471
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3232446908950806,
      "learning_rate": 0.0005125041120526025,
      "loss": 3.0734,
      "step": 57472
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4671270847320557,
      "learning_rate": 0.0005125012246508686,
      "loss": 3.0379,
      "step": 57473
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0993683338165283,
      "learning_rate": 0.0005124983372096267,
      "loss": 2.966,
      "step": 57474
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4267041683197021,
      "learning_rate": 0.0005124954497288772,
      "loss": 2.978,
      "step": 57475
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5651023387908936,
      "learning_rate": 0.0005124925622086209,
      "loss": 2.9945,
      "step": 57476
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3754853010177612,
      "learning_rate": 0.0005124896746488581,
      "loss": 3.0347,
      "step": 57477
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.442156434059143,
      "learning_rate": 0.0005124867870495897,
      "loss": 3.2216,
      "step": 57478
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7967970371246338,
      "learning_rate": 0.0005124838994108159,
      "loss": 2.9949,
      "step": 57479
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6280066967010498,
      "learning_rate": 0.0005124810117325372,
      "loss": 3.0922,
      "step": 57480
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8401415348052979,
      "learning_rate": 0.0005124781240147543,
      "loss": 2.9982,
      "step": 57481
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3645994663238525,
      "learning_rate": 0.0005124752362574678,
      "loss": 2.9926,
      "step": 57482
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8054143190383911,
      "learning_rate": 0.0005124723484606781,
      "loss": 3.1381,
      "step": 57483
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6217687129974365,
      "learning_rate": 0.0005124694606243857,
      "loss": 3.0071,
      "step": 57484
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6169252395629883,
      "learning_rate": 0.0005124665727485914,
      "loss": 3.1385,
      "step": 57485
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8284976482391357,
      "learning_rate": 0.0005124636848332954,
      "loss": 3.0537,
      "step": 57486
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1191368103027344,
      "learning_rate": 0.0005124607968784985,
      "loss": 3.1054,
      "step": 57487
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0265674591064453,
      "learning_rate": 0.0005124579088842011,
      "loss": 2.8817,
      "step": 57488
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.8318395614624023,
      "learning_rate": 0.0005124550208504036,
      "loss": 2.9244,
      "step": 57489
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5169585943222046,
      "learning_rate": 0.0005124521327771068,
      "loss": 2.8043,
      "step": 57490
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3262991905212402,
      "learning_rate": 0.0005124492446643113,
      "loss": 3.1072,
      "step": 57491
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.2159383296966553,
      "learning_rate": 0.0005124463565120173,
      "loss": 2.9565,
      "step": 57492
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4778107404708862,
      "learning_rate": 0.0005124434683202256,
      "loss": 3.0731,
      "step": 57493
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3011006116867065,
      "learning_rate": 0.0005124405800889366,
      "loss": 3.2262,
      "step": 57494
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3740931749343872,
      "learning_rate": 0.0005124376918181509,
      "loss": 2.875,
      "step": 57495
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6607705354690552,
      "learning_rate": 0.000512434803507869,
      "loss": 2.7727,
      "step": 57496
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.968438982963562,
      "learning_rate": 0.0005124319151580915,
      "loss": 3.1712,
      "step": 57497
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5181535482406616,
      "learning_rate": 0.0005124290267688189,
      "loss": 3.1917,
      "step": 57498
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.703808307647705,
      "learning_rate": 0.0005124261383400516,
      "loss": 2.8516,
      "step": 57499
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6501964330673218,
      "learning_rate": 0.0005124232498717905,
      "loss": 2.9754,
      "step": 57500
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4749224185943604,
      "learning_rate": 0.0005124203613640358,
      "loss": 3.0309,
      "step": 57501
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2973185777664185,
      "learning_rate": 0.0005124174728167881,
      "loss": 2.9089,
      "step": 57502
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.4753150939941406,
      "learning_rate": 0.000512414584230048,
      "loss": 3.0058,
      "step": 57503
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0476181507110596,
      "learning_rate": 0.000512411695603816,
      "loss": 3.1048,
      "step": 57504
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.599293828010559,
      "learning_rate": 0.0005124088069380926,
      "loss": 2.8311,
      "step": 57505
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7876195907592773,
      "learning_rate": 0.0005124059182328786,
      "loss": 3.2633,
      "step": 57506
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.4232282638549805,
      "learning_rate": 0.0005124030294881742,
      "loss": 3.0831,
      "step": 57507
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.5206665992736816,
      "learning_rate": 0.00051240014070398,
      "loss": 3.0353,
      "step": 57508
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6888154745101929,
      "learning_rate": 0.0005123972518802968,
      "loss": 3.0965,
      "step": 57509
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.023799419403076,
      "learning_rate": 0.0005123943630171248,
      "loss": 3.0249,
      "step": 57510
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.578130006790161,
      "learning_rate": 0.0005123914741144647,
      "loss": 2.8818,
      "step": 57511
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6586397886276245,
      "learning_rate": 0.0005123885851723171,
      "loss": 3.1044,
      "step": 57512
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7328827381134033,
      "learning_rate": 0.0005123856961906823,
      "loss": 3.0302,
      "step": 57513
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.9606776237487793,
      "learning_rate": 0.000512382807169561,
      "loss": 2.8949,
      "step": 57514
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.7376370429992676,
      "learning_rate": 0.0005123799181089539,
      "loss": 2.7407,
      "step": 57515
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1096715927124023,
      "learning_rate": 0.0005123770290088611,
      "loss": 3.081,
      "step": 57516
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.7126331329345703,
      "learning_rate": 0.0005123741398692837,
      "loss": 3.108,
      "step": 57517
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.2745800018310547,
      "learning_rate": 0.0005123712506902217,
      "loss": 2.7999,
      "step": 57518
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6679915189743042,
      "learning_rate": 0.000512368361471676,
      "loss": 2.9587,
      "step": 57519
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9895042181015015,
      "learning_rate": 0.0005123654722136471,
      "loss": 3.0246,
      "step": 57520
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6264605522155762,
      "learning_rate": 0.0005123625829161352,
      "loss": 3.2106,
      "step": 57521
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8067837953567505,
      "learning_rate": 0.0005123596935791413,
      "loss": 3.1537,
      "step": 57522
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4707574844360352,
      "learning_rate": 0.0005123568042026657,
      "loss": 2.8818,
      "step": 57523
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.2564404010772705,
      "learning_rate": 0.0005123539147867088,
      "loss": 3.1832,
      "step": 57524
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.024531841278076,
      "learning_rate": 0.0005123510253312715,
      "loss": 3.1101,
      "step": 57525
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7779024839401245,
      "learning_rate": 0.0005123481358363539,
      "loss": 2.9446,
      "step": 57526
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.942865014076233,
      "learning_rate": 0.000512345246301957,
      "loss": 3.082,
      "step": 57527
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.022524356842041,
      "learning_rate": 0.000512342356728081,
      "loss": 2.981,
      "step": 57528
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6209050416946411,
      "learning_rate": 0.0005123394671147267,
      "loss": 3.1327,
      "step": 57529
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6291993856430054,
      "learning_rate": 0.0005123365774618943,
      "loss": 2.8632,
      "step": 57530
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.4635372161865234,
      "learning_rate": 0.0005123336877695846,
      "loss": 3.2365,
      "step": 57531
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7328394651412964,
      "learning_rate": 0.000512330798037798,
      "loss": 3.2347,
      "step": 57532
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0254955291748047,
      "learning_rate": 0.0005123279082665351,
      "loss": 3.1281,
      "step": 57533
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6025737524032593,
      "learning_rate": 0.0005123250184557965,
      "loss": 3.0841,
      "step": 57534
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5430580377578735,
      "learning_rate": 0.0005123221286055826,
      "loss": 3.1394,
      "step": 57535
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8363265991210938,
      "learning_rate": 0.0005123192387158941,
      "loss": 2.9675,
      "step": 57536
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8546689748764038,
      "learning_rate": 0.0005123163487867313,
      "loss": 2.8034,
      "step": 57537
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7335584163665771,
      "learning_rate": 0.0005123134588180951,
      "loss": 3.1784,
      "step": 57538
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3816020488739014,
      "learning_rate": 0.0005123105688099856,
      "loss": 3.2642,
      "step": 57539
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.065234899520874,
      "learning_rate": 0.0005123076787624036,
      "loss": 3.1524,
      "step": 57540
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.554842710494995,
      "learning_rate": 0.0005123047886753497,
      "loss": 3.0997,
      "step": 57541
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.243004322052002,
      "learning_rate": 0.0005123018985488243,
      "loss": 2.9693,
      "step": 57542
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7107456922531128,
      "learning_rate": 0.0005122990083828281,
      "loss": 3.0214,
      "step": 57543
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.685453414916992,
      "learning_rate": 0.0005122961181773612,
      "loss": 3.1306,
      "step": 57544
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.2896318435668945,
      "learning_rate": 0.0005122932279324246,
      "loss": 3.2681,
      "step": 57545
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9430679082870483,
      "learning_rate": 0.0005122903376480188,
      "loss": 2.9718,
      "step": 57546
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.447290062904358,
      "learning_rate": 0.000512287447324144,
      "loss": 3.1722,
      "step": 57547
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0693888664245605,
      "learning_rate": 0.0005122845569608012,
      "loss": 3.0414,
      "step": 57548
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3935298919677734,
      "learning_rate": 0.0005122816665579904,
      "loss": 3.1527,
      "step": 57549
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.68788480758667,
      "learning_rate": 0.0005122787761157126,
      "loss": 2.7673,
      "step": 57550
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.018110513687134,
      "learning_rate": 0.0005122758856339683,
      "loss": 3.0271,
      "step": 57551
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1689136028289795,
      "learning_rate": 0.0005122729951127576,
      "loss": 3.0353,
      "step": 57552
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.158567428588867,
      "learning_rate": 0.0005122701045520816,
      "loss": 3.1077,
      "step": 57553
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.519793152809143,
      "learning_rate": 0.0005122672139519405,
      "loss": 3.2354,
      "step": 57554
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.654444694519043,
      "learning_rate": 0.0005122643233123348,
      "loss": 3.083,
      "step": 57555
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9732537269592285,
      "learning_rate": 0.0005122614326332653,
      "loss": 3.0756,
      "step": 57556
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.4567372798919678,
      "learning_rate": 0.0005122585419147324,
      "loss": 3.046,
      "step": 57557
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.868952751159668,
      "learning_rate": 0.0005122556511567365,
      "loss": 2.9897,
      "step": 57558
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.7614660263061523,
      "learning_rate": 0.0005122527603592784,
      "loss": 3.1349,
      "step": 57559
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8463926315307617,
      "learning_rate": 0.0005122498695223585,
      "loss": 3.2284,
      "step": 57560
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5887815952301025,
      "learning_rate": 0.0005122469786459772,
      "loss": 2.9292,
      "step": 57561
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.704234004020691,
      "learning_rate": 0.0005122440877301354,
      "loss": 3.0705,
      "step": 57562
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.886805772781372,
      "learning_rate": 0.0005122411967748332,
      "loss": 2.9559,
      "step": 57563
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1103763580322266,
      "learning_rate": 0.0005122383057800714,
      "loss": 2.8415,
      "step": 57564
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.595840573310852,
      "learning_rate": 0.0005122354147458506,
      "loss": 2.7653,
      "step": 57565
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.282414674758911,
      "learning_rate": 0.0005122325236721712,
      "loss": 3.192,
      "step": 57566
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.04484224319458,
      "learning_rate": 0.0005122296325590337,
      "loss": 2.8342,
      "step": 57567
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.129443883895874,
      "learning_rate": 0.0005122267414064387,
      "loss": 2.9735,
      "step": 57568
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4171292781829834,
      "learning_rate": 0.0005122238502143868,
      "loss": 2.9841,
      "step": 57569
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7200915813446045,
      "learning_rate": 0.0005122209589828784,
      "loss": 3.0514,
      "step": 57570
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4666810035705566,
      "learning_rate": 0.0005122180677119142,
      "loss": 2.8783,
      "step": 57571
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7002081871032715,
      "learning_rate": 0.0005122151764014948,
      "loss": 3.195,
      "step": 57572
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5065091848373413,
      "learning_rate": 0.0005122122850516204,
      "loss": 3.0449,
      "step": 57573
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5661479234695435,
      "learning_rate": 0.0005122093936622917,
      "loss": 2.9245,
      "step": 57574
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7841339111328125,
      "learning_rate": 0.0005122065022335094,
      "loss": 2.96,
      "step": 57575
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4467883110046387,
      "learning_rate": 0.0005122036107652738,
      "loss": 2.9735,
      "step": 57576
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.523768424987793,
      "learning_rate": 0.0005122007192575855,
      "loss": 2.9493,
      "step": 57577
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3748635053634644,
      "learning_rate": 0.0005121978277104453,
      "loss": 3.2853,
      "step": 57578
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.967260718345642,
      "learning_rate": 0.0005121949361238533,
      "loss": 3.2403,
      "step": 57579
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3677712678909302,
      "learning_rate": 0.0005121920444978103,
      "loss": 3.1056,
      "step": 57580
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5325112342834473,
      "learning_rate": 0.0005121891528323169,
      "loss": 3.1328,
      "step": 57581
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5723729133605957,
      "learning_rate": 0.0005121862611273735,
      "loss": 3.0918,
      "step": 57582
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7267988920211792,
      "learning_rate": 0.0005121833693829806,
      "loss": 3.029,
      "step": 57583
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7446414232254028,
      "learning_rate": 0.0005121804775991388,
      "loss": 2.9455,
      "step": 57584
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.165086507797241,
      "learning_rate": 0.0005121775857758487,
      "loss": 3.0469,
      "step": 57585
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7295845746994019,
      "learning_rate": 0.0005121746939131108,
      "loss": 3.069,
      "step": 57586
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0090420246124268,
      "learning_rate": 0.0005121718020109255,
      "loss": 3.065,
      "step": 57587
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8746308088302612,
      "learning_rate": 0.0005121689100692936,
      "loss": 3.0288,
      "step": 57588
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3271440267562866,
      "learning_rate": 0.0005121660180882154,
      "loss": 2.888,
      "step": 57589
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3919131755828857,
      "learning_rate": 0.0005121631260676915,
      "loss": 3.0049,
      "step": 57590
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7501237392425537,
      "learning_rate": 0.0005121602340077226,
      "loss": 3.276,
      "step": 57591
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.2236979007720947,
      "learning_rate": 0.000512157341908309,
      "loss": 2.9585,
      "step": 57592
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.610806941986084,
      "learning_rate": 0.0005121544497694514,
      "loss": 3.1919,
      "step": 57593
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.559343695640564,
      "learning_rate": 0.0005121515575911503,
      "loss": 3.1562,
      "step": 57594
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.871010422706604,
      "learning_rate": 0.0005121486653734063,
      "loss": 3.1025,
      "step": 57595
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3981307744979858,
      "learning_rate": 0.0005121457731162197,
      "loss": 3.116,
      "step": 57596
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7672213315963745,
      "learning_rate": 0.0005121428808195911,
      "loss": 3.1268,
      "step": 57597
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4898340702056885,
      "learning_rate": 0.0005121399884835214,
      "loss": 3.0086,
      "step": 57598
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.436030387878418,
      "learning_rate": 0.0005121370961080107,
      "loss": 2.7122,
      "step": 57599
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6371663808822632,
      "learning_rate": 0.0005121342036930597,
      "loss": 3.0604,
      "step": 57600
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5361871719360352,
      "learning_rate": 0.000512131311238669,
      "loss": 3.0658,
      "step": 57601
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6010911464691162,
      "learning_rate": 0.0005121284187448392,
      "loss": 2.9993,
      "step": 57602
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6462092399597168,
      "learning_rate": 0.0005121255262115706,
      "loss": 3.2018,
      "step": 57603
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.530224084854126,
      "learning_rate": 0.0005121226336388639,
      "loss": 3.1427,
      "step": 57604
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7703890800476074,
      "learning_rate": 0.0005121197410267196,
      "loss": 2.897,
      "step": 57605
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8038924932479858,
      "learning_rate": 0.0005121168483751381,
      "loss": 2.9147,
      "step": 57606
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3916699886322021,
      "learning_rate": 0.0005121139556841202,
      "loss": 3.1222,
      "step": 57607
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.660981297492981,
      "learning_rate": 0.0005121110629536664,
      "loss": 2.9078,
      "step": 57608
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6352956295013428,
      "learning_rate": 0.000512108170183777,
      "loss": 2.8382,
      "step": 57609
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7118085622787476,
      "learning_rate": 0.0005121052773744527,
      "loss": 3.1624,
      "step": 57610
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.716415286064148,
      "learning_rate": 0.0005121023845256941,
      "loss": 3.0451,
      "step": 57611
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6591774225234985,
      "learning_rate": 0.0005120994916375016,
      "loss": 3.1826,
      "step": 57612
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.665892243385315,
      "learning_rate": 0.0005120965987098758,
      "loss": 2.8886,
      "step": 57613
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5333235263824463,
      "learning_rate": 0.0005120937057428173,
      "loss": 3.0672,
      "step": 57614
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5768609046936035,
      "learning_rate": 0.0005120908127363265,
      "loss": 2.9395,
      "step": 57615
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1033639907836914,
      "learning_rate": 0.0005120879196904041,
      "loss": 2.9493,
      "step": 57616
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5743427276611328,
      "learning_rate": 0.0005120850266050505,
      "loss": 3.307,
      "step": 57617
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7126613855361938,
      "learning_rate": 0.0005120821334802662,
      "loss": 2.9649,
      "step": 57618
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5967774391174316,
      "learning_rate": 0.0005120792403160519,
      "loss": 2.8202,
      "step": 57619
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1096582412719727,
      "learning_rate": 0.0005120763471124081,
      "loss": 3.195,
      "step": 57620
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1391942501068115,
      "learning_rate": 0.0005120734538693354,
      "loss": 2.8873,
      "step": 57621
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5201528072357178,
      "learning_rate": 0.0005120705605868341,
      "loss": 2.9897,
      "step": 57622
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.5762083530426025,
      "learning_rate": 0.0005120676672649048,
      "loss": 3.1365,
      "step": 57623
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1188881397247314,
      "learning_rate": 0.0005120647739035484,
      "loss": 2.9679,
      "step": 57624
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5837724208831787,
      "learning_rate": 0.0005120618805027648,
      "loss": 3.1465,
      "step": 57625
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.319777488708496,
      "learning_rate": 0.0005120589870625552,
      "loss": 3.1738,
      "step": 57626
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.131770133972168,
      "learning_rate": 0.0005120560935829197,
      "loss": 3.0178,
      "step": 57627
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4337372779846191,
      "learning_rate": 0.000512053200063859,
      "loss": 2.9209,
      "step": 57628
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.829693078994751,
      "learning_rate": 0.0005120503065053736,
      "loss": 3.085,
      "step": 57629
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8402884006500244,
      "learning_rate": 0.000512047412907464,
      "loss": 3.0406,
      "step": 57630
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7967135906219482,
      "learning_rate": 0.0005120445192701308,
      "loss": 3.0844,
      "step": 57631
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5332040786743164,
      "learning_rate": 0.0005120416255933746,
      "loss": 2.9218,
      "step": 57632
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3364036083221436,
      "learning_rate": 0.0005120387318771958,
      "loss": 2.9991,
      "step": 57633
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5143007040023804,
      "learning_rate": 0.0005120358381215949,
      "loss": 3.0676,
      "step": 57634
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9155653715133667,
      "learning_rate": 0.0005120329443265727,
      "loss": 3.1785,
      "step": 57635
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.361096739768982,
      "learning_rate": 0.0005120300504921295,
      "loss": 2.927,
      "step": 57636
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7474548816680908,
      "learning_rate": 0.000512027156618266,
      "loss": 2.8351,
      "step": 57637
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9491859674453735,
      "learning_rate": 0.0005120242627049825,
      "loss": 2.9162,
      "step": 57638
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.264451026916504,
      "learning_rate": 0.0005120213687522797,
      "loss": 2.9036,
      "step": 57639
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4751750230789185,
      "learning_rate": 0.0005120184747601582,
      "loss": 3.2151,
      "step": 57640
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5781558752059937,
      "learning_rate": 0.0005120155807286184,
      "loss": 3.1552,
      "step": 57641
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7774345874786377,
      "learning_rate": 0.000512012686657661,
      "loss": 2.9268,
      "step": 57642
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.4606847763061523,
      "learning_rate": 0.0005120097925472863,
      "loss": 2.7903,
      "step": 57643
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8760305643081665,
      "learning_rate": 0.0005120068983974951,
      "loss": 3.219,
      "step": 57644
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1730551719665527,
      "learning_rate": 0.0005120040042082878,
      "loss": 3.0675,
      "step": 57645
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.776401996612549,
      "learning_rate": 0.0005120011099796649,
      "loss": 3.0397,
      "step": 57646
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.558134078979492,
      "learning_rate": 0.000511998215711627,
      "loss": 2.8544,
      "step": 57647
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8559443950653076,
      "learning_rate": 0.0005119953214041747,
      "loss": 3.193,
      "step": 57648
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1537423133850098,
      "learning_rate": 0.0005119924270573084,
      "loss": 2.8073,
      "step": 57649
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.8915867805480957,
      "learning_rate": 0.0005119895326710287,
      "loss": 3.066,
      "step": 57650
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.960666537284851,
      "learning_rate": 0.0005119866382453361,
      "loss": 2.8356,
      "step": 57651
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7264890670776367,
      "learning_rate": 0.0005119837437802313,
      "loss": 3.1268,
      "step": 57652
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5379174947738647,
      "learning_rate": 0.0005119808492757147,
      "loss": 2.9881,
      "step": 57653
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.5157928466796875,
      "learning_rate": 0.0005119779547317868,
      "loss": 3.1954,
      "step": 57654
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.5205113887786865,
      "learning_rate": 0.0005119750601484482,
      "loss": 3.143,
      "step": 57655
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5205881595611572,
      "learning_rate": 0.0005119721655256996,
      "loss": 2.9611,
      "step": 57656
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7077455520629883,
      "learning_rate": 0.0005119692708635412,
      "loss": 3.024,
      "step": 57657
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6605792045593262,
      "learning_rate": 0.0005119663761619736,
      "loss": 3.0381,
      "step": 57658
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5844433307647705,
      "learning_rate": 0.0005119634814209977,
      "loss": 3.2707,
      "step": 57659
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5502781867980957,
      "learning_rate": 0.0005119605866406137,
      "loss": 3.0115,
      "step": 57660
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.437583565711975,
      "learning_rate": 0.0005119576918208222,
      "loss": 3.0736,
      "step": 57661
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5575875043869019,
      "learning_rate": 0.0005119547969616238,
      "loss": 3.0725,
      "step": 57662
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3257951736450195,
      "learning_rate": 0.000511951902063019,
      "loss": 2.9762,
      "step": 57663
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3339980840682983,
      "learning_rate": 0.0005119490071250082,
      "loss": 3.0488,
      "step": 57664
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6115145683288574,
      "learning_rate": 0.0005119461121475922,
      "loss": 3.2064,
      "step": 57665
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.945619821548462,
      "learning_rate": 0.0005119432171307714,
      "loss": 3.332,
      "step": 57666
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7174879312515259,
      "learning_rate": 0.0005119403220745464,
      "loss": 2.9728,
      "step": 57667
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.899973750114441,
      "learning_rate": 0.0005119374269789177,
      "loss": 2.9122,
      "step": 57668
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7505691051483154,
      "learning_rate": 0.0005119345318438858,
      "loss": 2.9033,
      "step": 57669
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6979894638061523,
      "learning_rate": 0.0005119316366694512,
      "loss": 2.9574,
      "step": 57670
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7065041065216064,
      "learning_rate": 0.0005119287414556146,
      "loss": 2.9524,
      "step": 57671
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4451676607131958,
      "learning_rate": 0.0005119258462023764,
      "loss": 3.0542,
      "step": 57672
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5327969789505005,
      "learning_rate": 0.0005119229509097372,
      "loss": 2.9577,
      "step": 57673
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.510979175567627,
      "learning_rate": 0.0005119200555776976,
      "loss": 3.0239,
      "step": 57674
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.944405198097229,
      "learning_rate": 0.000511917160206258,
      "loss": 3.0842,
      "step": 57675
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4701011180877686,
      "learning_rate": 0.0005119142647954189,
      "loss": 3.1547,
      "step": 57676
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6075265407562256,
      "learning_rate": 0.000511911369345181,
      "loss": 3.0296,
      "step": 57677
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8343597650527954,
      "learning_rate": 0.0005119084738555448,
      "loss": 2.6949,
      "step": 57678
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4640384912490845,
      "learning_rate": 0.0005119055783265108,
      "loss": 2.9791,
      "step": 57679
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.440697193145752,
      "learning_rate": 0.0005119026827580796,
      "loss": 2.7717,
      "step": 57680
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8732564449310303,
      "learning_rate": 0.0005118997871502517,
      "loss": 2.9863,
      "step": 57681
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0923099517822266,
      "learning_rate": 0.0005118968915030276,
      "loss": 3.1295,
      "step": 57682
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6690926551818848,
      "learning_rate": 0.0005118939958164078,
      "loss": 3.2968,
      "step": 57683
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.007150888442993,
      "learning_rate": 0.000511891100090393,
      "loss": 2.9387,
      "step": 57684
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4936363697052002,
      "learning_rate": 0.0005118882043249836,
      "loss": 3.0801,
      "step": 57685
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.741051435470581,
      "learning_rate": 0.0005118853085201802,
      "loss": 2.9973,
      "step": 57686
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5764470100402832,
      "learning_rate": 0.0005118824126759833,
      "loss": 2.9797,
      "step": 57687
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.390772819519043,
      "learning_rate": 0.0005118795167923935,
      "loss": 2.8907,
      "step": 57688
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.851182222366333,
      "learning_rate": 0.0005118766208694112,
      "loss": 3.0466,
      "step": 57689
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.589570164680481,
      "learning_rate": 0.0005118737249070371,
      "loss": 3.1001,
      "step": 57690
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4307820796966553,
      "learning_rate": 0.0005118708289052717,
      "loss": 2.8148,
      "step": 57691
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.899152159690857,
      "learning_rate": 0.0005118679328641154,
      "loss": 2.9124,
      "step": 57692
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.270735740661621,
      "learning_rate": 0.000511865036783569,
      "loss": 2.7941,
      "step": 57693
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9843322038650513,
      "learning_rate": 0.0005118621406636328,
      "loss": 3.0938,
      "step": 57694
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5450907945632935,
      "learning_rate": 0.0005118592445043076,
      "loss": 3.2286,
      "step": 57695
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7352468967437744,
      "learning_rate": 0.0005118563483055936,
      "loss": 3.1473,
      "step": 57696
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.147792339324951,
      "learning_rate": 0.0005118534520674915,
      "loss": 3.1199,
      "step": 57697
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4893016815185547,
      "learning_rate": 0.0005118505557900019,
      "loss": 3.2465,
      "step": 57698
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5623806715011597,
      "learning_rate": 0.0005118476594731253,
      "loss": 3.0771,
      "step": 57699
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.2469825744628906,
      "learning_rate": 0.0005118447631168621,
      "loss": 3.0388,
      "step": 57700
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.659203052520752,
      "learning_rate": 0.000511841866721213,
      "loss": 3.1343,
      "step": 57701
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5241725444793701,
      "learning_rate": 0.0005118389702861787,
      "loss": 2.8654,
      "step": 57702
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7040059566497803,
      "learning_rate": 0.0005118360738117593,
      "loss": 2.9571,
      "step": 57703
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.160552501678467,
      "learning_rate": 0.0005118331772979557,
      "loss": 3.0093,
      "step": 57704
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.30491304397583,
      "learning_rate": 0.0005118302807447683,
      "loss": 2.8273,
      "step": 57705
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6705620288848877,
      "learning_rate": 0.0005118273841521975,
      "loss": 2.9677,
      "step": 57706
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9063575267791748,
      "learning_rate": 0.0005118244875202441,
      "loss": 3.1361,
      "step": 57707
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.541935920715332,
      "learning_rate": 0.0005118215908489086,
      "loss": 2.9685,
      "step": 57708
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6188015937805176,
      "learning_rate": 0.0005118186941381914,
      "loss": 3.0471,
      "step": 57709
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7925959825515747,
      "learning_rate": 0.000511815797388093,
      "loss": 3.1079,
      "step": 57710
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.377639055252075,
      "learning_rate": 0.0005118129005986143,
      "loss": 3.2057,
      "step": 57711
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.654625654220581,
      "learning_rate": 0.0005118100037697555,
      "loss": 2.8694,
      "step": 57712
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7974522113800049,
      "learning_rate": 0.0005118071069015171,
      "loss": 2.8886,
      "step": 57713
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.2413477897644043,
      "learning_rate": 0.0005118042099938998,
      "loss": 3.0941,
      "step": 57714
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.2019689083099365,
      "learning_rate": 0.0005118013130469041,
      "loss": 2.9132,
      "step": 57715
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.487717866897583,
      "learning_rate": 0.0005117984160605306,
      "loss": 3.0423,
      "step": 57716
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5680460929870605,
      "learning_rate": 0.0005117955190347798,
      "loss": 3.1887,
      "step": 57717
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.828629732131958,
      "learning_rate": 0.0005117926219696521,
      "loss": 3.2106,
      "step": 57718
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0723559856414795,
      "learning_rate": 0.0005117897248651482,
      "loss": 2.8998,
      "step": 57719
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.68957257270813,
      "learning_rate": 0.0005117868277212687,
      "loss": 2.8792,
      "step": 57720
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5622633695602417,
      "learning_rate": 0.0005117839305380139,
      "loss": 3.1426,
      "step": 57721
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1752161979675293,
      "learning_rate": 0.0005117810333153845,
      "loss": 3.0408,
      "step": 57722
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.806497812271118,
      "learning_rate": 0.0005117781360533811,
      "loss": 3.1475,
      "step": 57723
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1702191829681396,
      "learning_rate": 0.000511775238752004,
      "loss": 2.9095,
      "step": 57724
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9844578504562378,
      "learning_rate": 0.0005117723414112538,
      "loss": 2.8959,
      "step": 57725
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.682404637336731,
      "learning_rate": 0.0005117694440311313,
      "loss": 3.1385,
      "step": 57726
    },
    {
      "epoch": 0.75,
      "grad_norm": 4.185423374176025,
      "learning_rate": 0.0005117665466116369,
      "loss": 3.0008,
      "step": 57727
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.3645660877227783,
      "learning_rate": 0.000511763649152771,
      "loss": 3.0666,
      "step": 57728
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.707603931427002,
      "learning_rate": 0.0005117607516545343,
      "loss": 2.9845,
      "step": 57729
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.786355972290039,
      "learning_rate": 0.0005117578541169272,
      "loss": 3.1316,
      "step": 57730
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.135444164276123,
      "learning_rate": 0.0005117549565399503,
      "loss": 3.0302,
      "step": 57731
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.178243398666382,
      "learning_rate": 0.0005117520589236043,
      "loss": 2.7114,
      "step": 57732
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3915103673934937,
      "learning_rate": 0.0005117491612678895,
      "loss": 3.0453,
      "step": 57733
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4240552186965942,
      "learning_rate": 0.0005117462635728067,
      "loss": 2.913,
      "step": 57734
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.167238473892212,
      "learning_rate": 0.0005117433658383561,
      "loss": 2.9139,
      "step": 57735
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.735907793045044,
      "learning_rate": 0.0005117404680645383,
      "loss": 2.9774,
      "step": 57736
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4141663312911987,
      "learning_rate": 0.0005117375702513541,
      "loss": 3.3072,
      "step": 57737
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8882118463516235,
      "learning_rate": 0.000511734672398804,
      "loss": 3.0407,
      "step": 57738
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3542495965957642,
      "learning_rate": 0.0005117317745068883,
      "loss": 2.8234,
      "step": 57739
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.897101879119873,
      "learning_rate": 0.0005117288765756076,
      "loss": 3.0305,
      "step": 57740
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.153069257736206,
      "learning_rate": 0.0005117259786049626,
      "loss": 2.9845,
      "step": 57741
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.1327545642852783,
      "learning_rate": 0.0005117230805949537,
      "loss": 2.9749,
      "step": 57742
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5895377397537231,
      "learning_rate": 0.0005117201825455814,
      "loss": 2.8682,
      "step": 57743
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4305647611618042,
      "learning_rate": 0.0005117172844568465,
      "loss": 3.3365,
      "step": 57744
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5396606922149658,
      "learning_rate": 0.0005117143863287493,
      "loss": 3.2101,
      "step": 57745
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0374228954315186,
      "learning_rate": 0.0005117114881612903,
      "loss": 2.6762,
      "step": 57746
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.458848237991333,
      "learning_rate": 0.0005117085899544703,
      "loss": 3.1661,
      "step": 57747
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.65207040309906,
      "learning_rate": 0.0005117056917082896,
      "loss": 3.0223,
      "step": 57748
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9016692638397217,
      "learning_rate": 0.0005117027934227488,
      "loss": 3.0227,
      "step": 57749
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.72065806388855,
      "learning_rate": 0.0005116998950978485,
      "loss": 2.9505,
      "step": 57750
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9824178218841553,
      "learning_rate": 0.0005116969967335892,
      "loss": 2.8402,
      "step": 57751
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.358816146850586,
      "learning_rate": 0.0005116940983299714,
      "loss": 2.8943,
      "step": 57752
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8647096157073975,
      "learning_rate": 0.0005116911998869956,
      "loss": 3.0171,
      "step": 57753
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0270557403564453,
      "learning_rate": 0.0005116883014046625,
      "loss": 3.3082,
      "step": 57754
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3688186407089233,
      "learning_rate": 0.0005116854028829725,
      "loss": 3.2483,
      "step": 57755
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9300144910812378,
      "learning_rate": 0.0005116825043219262,
      "loss": 2.7885,
      "step": 57756
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.2497146129608154,
      "learning_rate": 0.0005116796057215242,
      "loss": 3.0705,
      "step": 57757
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3844209909439087,
      "learning_rate": 0.0005116767070817669,
      "loss": 3.0729,
      "step": 57758
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8882142305374146,
      "learning_rate": 0.0005116738084026549,
      "loss": 2.926,
      "step": 57759
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.540558338165283,
      "learning_rate": 0.0005116709096841887,
      "loss": 2.6435,
      "step": 57760
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9344584941864014,
      "learning_rate": 0.0005116680109263689,
      "loss": 2.7927,
      "step": 57761
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8266459703445435,
      "learning_rate": 0.000511665112129196,
      "loss": 3.034,
      "step": 57762
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7193018198013306,
      "learning_rate": 0.0005116622132926707,
      "loss": 2.9924,
      "step": 57763
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6582127809524536,
      "learning_rate": 0.0005116593144167933,
      "loss": 2.8983,
      "step": 57764
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6197503805160522,
      "learning_rate": 0.0005116564155015645,
      "loss": 3.1074,
      "step": 57765
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6487525701522827,
      "learning_rate": 0.0005116535165469848,
      "loss": 2.8172,
      "step": 57766
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4232532978057861,
      "learning_rate": 0.0005116506175530546,
      "loss": 2.8979,
      "step": 57767
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5633249282836914,
      "learning_rate": 0.0005116477185197746,
      "loss": 2.9724,
      "step": 57768
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6261186599731445,
      "learning_rate": 0.0005116448194471453,
      "loss": 3.0401,
      "step": 57769
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5934007167816162,
      "learning_rate": 0.0005116419203351672,
      "loss": 3.1709,
      "step": 57770
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9997777938842773,
      "learning_rate": 0.0005116390211838409,
      "loss": 2.9079,
      "step": 57771
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7928556203842163,
      "learning_rate": 0.000511636121993167,
      "loss": 2.9817,
      "step": 57772
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.925977110862732,
      "learning_rate": 0.0005116332227631458,
      "loss": 3.2214,
      "step": 57773
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5936970710754395,
      "learning_rate": 0.0005116303234937781,
      "loss": 3.1639,
      "step": 57774
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.9942853450775146,
      "learning_rate": 0.0005116274241850642,
      "loss": 2.883,
      "step": 57775
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.79745614528656,
      "learning_rate": 0.0005116245248370048,
      "loss": 2.8348,
      "step": 57776
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7428643703460693,
      "learning_rate": 0.0005116216254496005,
      "loss": 3.0331,
      "step": 57777
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4380244016647339,
      "learning_rate": 0.0005116187260228517,
      "loss": 3.0226,
      "step": 57778
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.353182554244995,
      "learning_rate": 0.000511615826556759,
      "loss": 3.2027,
      "step": 57779
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9656838178634644,
      "learning_rate": 0.0005116129270513229,
      "loss": 2.911,
      "step": 57780
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.933040976524353,
      "learning_rate": 0.0005116100275065439,
      "loss": 3.0324,
      "step": 57781
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.103207588195801,
      "learning_rate": 0.0005116071279224226,
      "loss": 3.069,
      "step": 57782
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.8679370880126953,
      "learning_rate": 0.0005116042282989597,
      "loss": 3.0991,
      "step": 57783
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.9265425205230713,
      "learning_rate": 0.0005116013286361555,
      "loss": 2.6826,
      "step": 57784
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6370106935501099,
      "learning_rate": 0.0005115984289340106,
      "loss": 3.293,
      "step": 57785
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.275383949279785,
      "learning_rate": 0.0005115955291925255,
      "loss": 2.9925,
      "step": 57786
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.1105360984802246,
      "learning_rate": 0.000511592629411701,
      "loss": 3.0905,
      "step": 57787
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7233104705810547,
      "learning_rate": 0.0005115897295915373,
      "loss": 2.8504,
      "step": 57788
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5897918939590454,
      "learning_rate": 0.000511586829732035,
      "loss": 2.9529,
      "step": 57789
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.5575294494628906,
      "learning_rate": 0.0005115839298331948,
      "loss": 3.0105,
      "step": 57790
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1438825130462646,
      "learning_rate": 0.0005115810298950171,
      "loss": 3.0878,
      "step": 57791
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6681289672851562,
      "learning_rate": 0.0005115781299175026,
      "loss": 2.8406,
      "step": 57792
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4547836780548096,
      "learning_rate": 0.0005115752299006517,
      "loss": 2.8762,
      "step": 57793
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5938602685928345,
      "learning_rate": 0.0005115723298444649,
      "loss": 2.7458,
      "step": 57794
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.596345067024231,
      "learning_rate": 0.0005115694297489428,
      "loss": 3.0634,
      "step": 57795
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.875752329826355,
      "learning_rate": 0.0005115665296140861,
      "loss": 3.1056,
      "step": 57796
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8889975547790527,
      "learning_rate": 0.000511563629439895,
      "loss": 3.0559,
      "step": 57797
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.434957504272461,
      "learning_rate": 0.0005115607292263704,
      "loss": 3.0837,
      "step": 57798
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7213349342346191,
      "learning_rate": 0.0005115578289735125,
      "loss": 3.0007,
      "step": 57799
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6428985595703125,
      "learning_rate": 0.0005115549286813221,
      "loss": 3.1153,
      "step": 57800
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5859373807907104,
      "learning_rate": 0.0005115520283497996,
      "loss": 2.8392,
      "step": 57801
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3645578622817993,
      "learning_rate": 0.0005115491279789456,
      "loss": 3.0924,
      "step": 57802
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.109031915664673,
      "learning_rate": 0.0005115462275687607,
      "loss": 2.9913,
      "step": 57803
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.495057225227356,
      "learning_rate": 0.0005115433271192453,
      "loss": 3.1518,
      "step": 57804
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3321222066879272,
      "learning_rate": 0.0005115404266304,
      "loss": 2.9998,
      "step": 57805
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4702246189117432,
      "learning_rate": 0.0005115375261022253,
      "loss": 3.2134,
      "step": 57806
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.708521842956543,
      "learning_rate": 0.0005115346255347218,
      "loss": 3.2669,
      "step": 57807
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3156707286834717,
      "learning_rate": 0.0005115317249278901,
      "loss": 2.9016,
      "step": 57808
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.912926435470581,
      "learning_rate": 0.0005115288242817306,
      "loss": 2.8405,
      "step": 57809
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8850643634796143,
      "learning_rate": 0.0005115259235962438,
      "loss": 2.9806,
      "step": 57810
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5019237995147705,
      "learning_rate": 0.0005115230228714305,
      "loss": 3.3016,
      "step": 57811
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7127251625061035,
      "learning_rate": 0.0005115201221072911,
      "loss": 2.9339,
      "step": 57812
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0053822994232178,
      "learning_rate": 0.0005115172213038259,
      "loss": 3.0535,
      "step": 57813
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6555967330932617,
      "learning_rate": 0.0005115143204610359,
      "loss": 2.8793,
      "step": 57814
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8261919021606445,
      "learning_rate": 0.0005115114195789213,
      "loss": 3.3293,
      "step": 57815
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.49612557888031,
      "learning_rate": 0.0005115085186574827,
      "loss": 3.2078,
      "step": 57816
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3021022081375122,
      "learning_rate": 0.0005115056176967207,
      "loss": 2.9417,
      "step": 57817
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4802217483520508,
      "learning_rate": 0.0005115027166966358,
      "loss": 3.0687,
      "step": 57818
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6875922679901123,
      "learning_rate": 0.0005114998156572285,
      "loss": 2.8936,
      "step": 57819
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.827551007270813,
      "learning_rate": 0.0005114969145784995,
      "loss": 3.0945,
      "step": 57820
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5171947479248047,
      "learning_rate": 0.0005114940134604492,
      "loss": 2.8517,
      "step": 57821
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.730544090270996,
      "learning_rate": 0.0005114911123030781,
      "loss": 2.8679,
      "step": 57822
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9494339227676392,
      "learning_rate": 0.0005114882111063869,
      "loss": 3.1569,
      "step": 57823
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3798938989639282,
      "learning_rate": 0.000511485309870376,
      "loss": 3.1767,
      "step": 57824
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6578913927078247,
      "learning_rate": 0.000511482408595046,
      "loss": 3.0199,
      "step": 57825
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.8814094066619873,
      "learning_rate": 0.0005114795072803974,
      "loss": 3.235,
      "step": 57826
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8923845291137695,
      "learning_rate": 0.0005114766059264308,
      "loss": 3.1825,
      "step": 57827
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0268754959106445,
      "learning_rate": 0.0005114737045331467,
      "loss": 2.8036,
      "step": 57828
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.37309730052948,
      "learning_rate": 0.0005114708031005457,
      "loss": 2.9962,
      "step": 57829
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6487140655517578,
      "learning_rate": 0.0005114679016286281,
      "loss": 3.0626,
      "step": 57830
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.8531885147094727,
      "learning_rate": 0.0005114650001173949,
      "loss": 3.2123,
      "step": 57831
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6086121797561646,
      "learning_rate": 0.0005114620985668462,
      "loss": 3.0074,
      "step": 57832
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9662500619888306,
      "learning_rate": 0.0005114591969769828,
      "loss": 3.0428,
      "step": 57833
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.0065414905548096,
      "learning_rate": 0.000511456295347805,
      "loss": 2.9998,
      "step": 57834
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0588173866271973,
      "learning_rate": 0.0005114533936793136,
      "loss": 3.2407,
      "step": 57835
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6736875772476196,
      "learning_rate": 0.000511450491971509,
      "loss": 3.0481,
      "step": 57836
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.5640008449554443,
      "learning_rate": 0.0005114475902243917,
      "loss": 3.2245,
      "step": 57837
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.0798099040985107,
      "learning_rate": 0.0005114446884379623,
      "loss": 2.9525,
      "step": 57838
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.422852039337158,
      "learning_rate": 0.0005114417866122215,
      "loss": 3.2931,
      "step": 57839
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5110304355621338,
      "learning_rate": 0.0005114388847471695,
      "loss": 3.0171,
      "step": 57840
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.8742942810058594,
      "learning_rate": 0.0005114359828428071,
      "loss": 3.0681,
      "step": 57841
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.76155686378479,
      "learning_rate": 0.0005114330808991348,
      "loss": 2.9884,
      "step": 57842
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1394753456115723,
      "learning_rate": 0.000511430178916153,
      "loss": 2.9838,
      "step": 57843
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5412657260894775,
      "learning_rate": 0.0005114272768938624,
      "loss": 3.2727,
      "step": 57844
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.632656455039978,
      "learning_rate": 0.0005114243748322634,
      "loss": 2.934,
      "step": 57845
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5256011486053467,
      "learning_rate": 0.0005114214727313567,
      "loss": 2.8474,
      "step": 57846
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.68851900100708,
      "learning_rate": 0.0005114185705911427,
      "loss": 3.1706,
      "step": 57847
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.645219087600708,
      "learning_rate": 0.0005114156684116221,
      "loss": 3.2876,
      "step": 57848
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.1029226779937744,
      "learning_rate": 0.0005114127661927952,
      "loss": 2.9532,
      "step": 57849
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.669586181640625,
      "learning_rate": 0.0005114098639346627,
      "loss": 3.379,
      "step": 57850
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4113965034484863,
      "learning_rate": 0.0005114069616372253,
      "loss": 2.8528,
      "step": 57851
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.126605749130249,
      "learning_rate": 0.0005114040593004832,
      "loss": 2.8419,
      "step": 57852
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.69651460647583,
      "learning_rate": 0.0005114011569244371,
      "loss": 3.0799,
      "step": 57853
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5150113105773926,
      "learning_rate": 0.0005113982545090876,
      "loss": 2.911,
      "step": 57854
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4095261096954346,
      "learning_rate": 0.0005113953520544351,
      "loss": 3.1349,
      "step": 57855
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8709352016448975,
      "learning_rate": 0.0005113924495604803,
      "loss": 2.977,
      "step": 57856
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.090397357940674,
      "learning_rate": 0.0005113895470272235,
      "loss": 3.123,
      "step": 57857
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5667837858200073,
      "learning_rate": 0.0005113866444546655,
      "loss": 2.9827,
      "step": 57858
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.423937201499939,
      "learning_rate": 0.0005113837418428067,
      "loss": 3.1184,
      "step": 57859
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5755679607391357,
      "learning_rate": 0.0005113808391916477,
      "loss": 2.8452,
      "step": 57860
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.829543113708496,
      "learning_rate": 0.0005113779365011889,
      "loss": 2.9612,
      "step": 57861
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6259557008743286,
      "learning_rate": 0.0005113750337714311,
      "loss": 3.1665,
      "step": 57862
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6174988746643066,
      "learning_rate": 0.0005113721310023746,
      "loss": 2.9583,
      "step": 57863
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.866377830505371,
      "learning_rate": 0.0005113692281940201,
      "loss": 2.9391,
      "step": 57864
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.1979243755340576,
      "learning_rate": 0.0005113663253463681,
      "loss": 3.0637,
      "step": 57865
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6109217405319214,
      "learning_rate": 0.000511363422459419,
      "loss": 3.0198,
      "step": 57866
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.518357515335083,
      "learning_rate": 0.0005113605195331735,
      "loss": 3.1146,
      "step": 57867
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.2704215049743652,
      "learning_rate": 0.0005113576165676322,
      "loss": 3.0536,
      "step": 57868
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9048243761062622,
      "learning_rate": 0.0005113547135627953,
      "loss": 2.8926,
      "step": 57869
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9612668752670288,
      "learning_rate": 0.0005113518105186637,
      "loss": 3.1202,
      "step": 57870
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3169667720794678,
      "learning_rate": 0.0005113489074352377,
      "loss": 3.1089,
      "step": 57871
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.6243438720703125,
      "learning_rate": 0.0005113460043125182,
      "loss": 2.9772,
      "step": 57872
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4540766477584839,
      "learning_rate": 0.0005113431011505053,
      "loss": 3.2708,
      "step": 57873
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6096837520599365,
      "learning_rate": 0.0005113401979491998,
      "loss": 3.0199,
      "step": 57874
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.2980194091796875,
      "learning_rate": 0.0005113372947086022,
      "loss": 3.0136,
      "step": 57875
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9263792037963867,
      "learning_rate": 0.0005113343914287129,
      "loss": 3.037,
      "step": 57876
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8438328504562378,
      "learning_rate": 0.0005113314881095326,
      "loss": 3.1545,
      "step": 57877
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.489635944366455,
      "learning_rate": 0.0005113285847510617,
      "loss": 3.0434,
      "step": 57878
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.8133912086486816,
      "learning_rate": 0.0005113256813533009,
      "loss": 3.1666,
      "step": 57879
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.2933762073516846,
      "learning_rate": 0.0005113227779162507,
      "loss": 2.952,
      "step": 57880
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5818135738372803,
      "learning_rate": 0.0005113198744399115,
      "loss": 3.2857,
      "step": 57881
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.2400143146514893,
      "learning_rate": 0.0005113169709242841,
      "loss": 3.0723,
      "step": 57882
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.05902361869812,
      "learning_rate": 0.0005113140673693688,
      "loss": 3.1065,
      "step": 57883
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.3116109371185303,
      "learning_rate": 0.0005113111637751663,
      "loss": 3.0322,
      "step": 57884
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8886399269104004,
      "learning_rate": 0.0005113082601416769,
      "loss": 3.0129,
      "step": 57885
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8224165439605713,
      "learning_rate": 0.0005113053564689015,
      "loss": 3.0934,
      "step": 57886
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.8803348541259766,
      "learning_rate": 0.0005113024527568403,
      "loss": 3.2404,
      "step": 57887
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.418223261833191,
      "learning_rate": 0.000511299549005494,
      "loss": 2.974,
      "step": 57888
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4963618516921997,
      "learning_rate": 0.0005112966452148632,
      "loss": 3.0224,
      "step": 57889
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.0435402393341064,
      "learning_rate": 0.0005112937413849483,
      "loss": 3.1589,
      "step": 57890
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8733141422271729,
      "learning_rate": 0.00051129083751575,
      "loss": 3.2764,
      "step": 57891
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7281359434127808,
      "learning_rate": 0.0005112879336072686,
      "loss": 2.9876,
      "step": 57892
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9735193252563477,
      "learning_rate": 0.0005112850296595049,
      "loss": 3.1788,
      "step": 57893
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.2401092052459717,
      "learning_rate": 0.0005112821256724593,
      "loss": 2.9229,
      "step": 57894
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9818540811538696,
      "learning_rate": 0.0005112792216461323,
      "loss": 3.4535,
      "step": 57895
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6110538244247437,
      "learning_rate": 0.0005112763175805246,
      "loss": 3.211,
      "step": 57896
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7043342590332031,
      "learning_rate": 0.0005112734134756366,
      "loss": 2.8253,
      "step": 57897
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6832611560821533,
      "learning_rate": 0.0005112705093314688,
      "loss": 3.1085,
      "step": 57898
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6091668605804443,
      "learning_rate": 0.0005112676051480219,
      "loss": 3.0521,
      "step": 57899
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4738627672195435,
      "learning_rate": 0.0005112647009252964,
      "loss": 2.9627,
      "step": 57900
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.930990219116211,
      "learning_rate": 0.0005112617966632928,
      "loss": 3.2191,
      "step": 57901
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.878425121307373,
      "learning_rate": 0.0005112588923620117,
      "loss": 3.0413,
      "step": 57902
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.785085678100586,
      "learning_rate": 0.0005112559880214535,
      "loss": 2.9218,
      "step": 57903
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5600687265396118,
      "learning_rate": 0.0005112530836416188,
      "loss": 2.9059,
      "step": 57904
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8646109104156494,
      "learning_rate": 0.0005112501792225081,
      "loss": 2.9858,
      "step": 57905
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.259106159210205,
      "learning_rate": 0.0005112472747641222,
      "loss": 3.0551,
      "step": 57906
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5234311819076538,
      "learning_rate": 0.0005112443702664613,
      "loss": 2.9404,
      "step": 57907
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.7037668228149414,
      "learning_rate": 0.0005112414657295261,
      "loss": 2.9663,
      "step": 57908
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8986567258834839,
      "learning_rate": 0.0005112385611533172,
      "loss": 2.9525,
      "step": 57909
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.830946922302246,
      "learning_rate": 0.000511235656537835,
      "loss": 3.2186,
      "step": 57910
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4009169340133667,
      "learning_rate": 0.0005112327518830801,
      "loss": 3.0581,
      "step": 57911
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.9396908283233643,
      "learning_rate": 0.000511229847189053,
      "loss": 2.8428,
      "step": 57912
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.645724892616272,
      "learning_rate": 0.0005112269424557544,
      "loss": 3.0921,
      "step": 57913
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6447031497955322,
      "learning_rate": 0.0005112240376831846,
      "loss": 3.0112,
      "step": 57914
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4909404516220093,
      "learning_rate": 0.0005112211328713444,
      "loss": 3.0398,
      "step": 57915
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3635514974594116,
      "learning_rate": 0.0005112182280202341,
      "loss": 2.795,
      "step": 57916
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.359097957611084,
      "learning_rate": 0.0005112153231298544,
      "loss": 3.0827,
      "step": 57917
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.865370273590088,
      "learning_rate": 0.0005112124182002057,
      "loss": 2.9583,
      "step": 57918
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6456794738769531,
      "learning_rate": 0.0005112095132312887,
      "loss": 2.9525,
      "step": 57919
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.9818196296691895,
      "learning_rate": 0.0005112066082231039,
      "loss": 2.8604,
      "step": 57920
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.7902979850769043,
      "learning_rate": 0.0005112037031756518,
      "loss": 3.1038,
      "step": 57921
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.565995216369629,
      "learning_rate": 0.0005112007980889329,
      "loss": 2.9826,
      "step": 57922
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5773264169692993,
      "learning_rate": 0.0005111978929629477,
      "loss": 3.0247,
      "step": 57923
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5533156394958496,
      "learning_rate": 0.000511194987797697,
      "loss": 3.1837,
      "step": 57924
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.838251829147339,
      "learning_rate": 0.000511192082593181,
      "loss": 2.6609,
      "step": 57925
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5493756532669067,
      "learning_rate": 0.0005111891773494006,
      "loss": 3.0014,
      "step": 57926
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.457961082458496,
      "learning_rate": 0.000511186272066356,
      "loss": 3.0068,
      "step": 57927
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.286358118057251,
      "learning_rate": 0.0005111833667440479,
      "loss": 3.2005,
      "step": 57928
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3246363401412964,
      "learning_rate": 0.0005111804613824768,
      "loss": 2.8892,
      "step": 57929
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4392443895339966,
      "learning_rate": 0.0005111775559816433,
      "loss": 3.2315,
      "step": 57930
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.927775502204895,
      "learning_rate": 0.0005111746505415479,
      "loss": 2.9797,
      "step": 57931
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4931076765060425,
      "learning_rate": 0.0005111717450621912,
      "loss": 2.9467,
      "step": 57932
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7731941938400269,
      "learning_rate": 0.0005111688395435737,
      "loss": 3.0642,
      "step": 57933
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3697125911712646,
      "learning_rate": 0.0005111659339856959,
      "loss": 3.1194,
      "step": 57934
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.585440993309021,
      "learning_rate": 0.0005111630283885582,
      "loss": 3.1411,
      "step": 57935
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7074544429779053,
      "learning_rate": 0.0005111601227521615,
      "loss": 3.1549,
      "step": 57936
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4690252542495728,
      "learning_rate": 0.0005111572170765061,
      "loss": 3.0839,
      "step": 57937
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3078879117965698,
      "learning_rate": 0.0005111543113615926,
      "loss": 3.2624,
      "step": 57938
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8615278005599976,
      "learning_rate": 0.0005111514056074215,
      "loss": 2.9689,
      "step": 57939
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4471862316131592,
      "learning_rate": 0.0005111484998139934,
      "loss": 2.8001,
      "step": 57940
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.485560417175293,
      "learning_rate": 0.0005111455939813088,
      "loss": 2.7757,
      "step": 57941
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6919587850570679,
      "learning_rate": 0.0005111426881093682,
      "loss": 2.9624,
      "step": 57942
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.662202000617981,
      "learning_rate": 0.0005111397821981722,
      "loss": 2.8392,
      "step": 57943
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.29436993598938,
      "learning_rate": 0.0005111368762477213,
      "loss": 3.0967,
      "step": 57944
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6128617525100708,
      "learning_rate": 0.0005111339702580163,
      "loss": 2.8763,
      "step": 57945
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7239924669265747,
      "learning_rate": 0.0005111310642290572,
      "loss": 2.8616,
      "step": 57946
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.693825364112854,
      "learning_rate": 0.000511128158160845,
      "loss": 2.9105,
      "step": 57947
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.546890139579773,
      "learning_rate": 0.0005111252520533801,
      "loss": 2.9548,
      "step": 57948
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.610376000404358,
      "learning_rate": 0.000511122345906663,
      "loss": 3.0623,
      "step": 57949
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.492926597595215,
      "learning_rate": 0.0005111194397206943,
      "loss": 3.0823,
      "step": 57950
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3688338994979858,
      "learning_rate": 0.0005111165334954744,
      "loss": 3.1857,
      "step": 57951
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.58821702003479,
      "learning_rate": 0.000511113627231004,
      "loss": 2.7591,
      "step": 57952
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5547099113464355,
      "learning_rate": 0.0005111107209272836,
      "loss": 3.0112,
      "step": 57953
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.3542267084121704,
      "learning_rate": 0.0005111078145843137,
      "loss": 3.1234,
      "step": 57954
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4622818231582642,
      "learning_rate": 0.0005111049082020951,
      "loss": 3.1782,
      "step": 57955
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4771571159362793,
      "learning_rate": 0.000511102001780628,
      "loss": 3.1342,
      "step": 57956
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.805753469467163,
      "learning_rate": 0.0005110990953199129,
      "loss": 3.1162,
      "step": 57957
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4579743146896362,
      "learning_rate": 0.0005110961888199506,
      "loss": 3.1582,
      "step": 57958
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.964229702949524,
      "learning_rate": 0.0005110932822807415,
      "loss": 3.135,
      "step": 57959
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6557605266571045,
      "learning_rate": 0.0005110903757022862,
      "loss": 3.3231,
      "step": 57960
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8408033847808838,
      "learning_rate": 0.0005110874690845852,
      "loss": 2.9029,
      "step": 57961
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8459219932556152,
      "learning_rate": 0.000511084562427639,
      "loss": 3.0225,
      "step": 57962
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5778058767318726,
      "learning_rate": 0.0005110816557314483,
      "loss": 3.0779,
      "step": 57963
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7043323516845703,
      "learning_rate": 0.0005110787489960135,
      "loss": 2.9907,
      "step": 57964
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.357412099838257,
      "learning_rate": 0.0005110758422213351,
      "loss": 2.9952,
      "step": 57965
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5992026329040527,
      "learning_rate": 0.0005110729354074139,
      "loss": 3.1854,
      "step": 57966
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4625333547592163,
      "learning_rate": 0.0005110700285542501,
      "loss": 3.1763,
      "step": 57967
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.8928312063217163,
      "learning_rate": 0.0005110671216618446,
      "loss": 3.0296,
      "step": 57968
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.756584644317627,
      "learning_rate": 0.0005110642147301975,
      "loss": 2.9462,
      "step": 57969
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.776425361633301,
      "learning_rate": 0.0005110613077593096,
      "loss": 2.8962,
      "step": 57970
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.0531907081604004,
      "learning_rate": 0.0005110584007491816,
      "loss": 3.0392,
      "step": 57971
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.5291569232940674,
      "learning_rate": 0.0005110554936998137,
      "loss": 2.8146,
      "step": 57972
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7931724786758423,
      "learning_rate": 0.0005110525866112067,
      "loss": 3.004,
      "step": 57973
    },
    {
      "epoch": 0.75,
      "grad_norm": 3.13059401512146,
      "learning_rate": 0.0005110496794833608,
      "loss": 2.696,
      "step": 57974
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.783004879951477,
      "learning_rate": 0.000511046772316277,
      "loss": 3.1028,
      "step": 57975
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6958457231521606,
      "learning_rate": 0.0005110438651099555,
      "loss": 3.0094,
      "step": 57976
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4514837265014648,
      "learning_rate": 0.0005110409578643971,
      "loss": 3.0673,
      "step": 57977
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.654784083366394,
      "learning_rate": 0.0005110380505796021,
      "loss": 3.1262,
      "step": 57978
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4605482816696167,
      "learning_rate": 0.0005110351432555712,
      "loss": 3.2056,
      "step": 57979
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.4394108057022095,
      "learning_rate": 0.000511032235892305,
      "loss": 2.8417,
      "step": 57980
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.004584789276123,
      "learning_rate": 0.0005110293284898037,
      "loss": 3.0238,
      "step": 57981
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.6484043598175049,
      "learning_rate": 0.0005110264210480682,
      "loss": 3.0401,
      "step": 57982
    },
    {
      "epoch": 0.75,
      "grad_norm": 1.7672611474990845,
      "learning_rate": 0.0005110235135670989,
      "loss": 3.1252,
      "step": 57983
    },
    {
      "epoch": 0.75,
      "grad_norm": 2.126542568206787,
      "learning_rate": 0.0005110206060468963,
      "loss": 2.937,
      "step": 57984
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7209957838058472,
      "learning_rate": 0.0005110176984874609,
      "loss": 2.9553,
      "step": 57985
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9302541017532349,
      "learning_rate": 0.0005110147908887935,
      "loss": 3.0762,
      "step": 57986
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7190641164779663,
      "learning_rate": 0.0005110118832508944,
      "loss": 3.0754,
      "step": 57987
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4087754487991333,
      "learning_rate": 0.0005110089755737642,
      "loss": 3.0093,
      "step": 57988
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5393506288528442,
      "learning_rate": 0.0005110060678574034,
      "loss": 3.1512,
      "step": 57989
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7596778869628906,
      "learning_rate": 0.0005110031601018127,
      "loss": 3.138,
      "step": 57990
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8157607316970825,
      "learning_rate": 0.0005110002523069924,
      "loss": 3.1265,
      "step": 57991
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5674822330474854,
      "learning_rate": 0.0005109973444729432,
      "loss": 3.1614,
      "step": 57992
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7744957208633423,
      "learning_rate": 0.0005109944365996657,
      "loss": 3.0478,
      "step": 57993
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.779900074005127,
      "learning_rate": 0.0005109915286871603,
      "loss": 3.1282,
      "step": 57994
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5756181478500366,
      "learning_rate": 0.0005109886207354276,
      "loss": 3.2013,
      "step": 57995
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9493390321731567,
      "learning_rate": 0.0005109857127444681,
      "loss": 2.8994,
      "step": 57996
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4886683225631714,
      "learning_rate": 0.0005109828047142824,
      "loss": 2.9838,
      "step": 57997
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2861250638961792,
      "learning_rate": 0.000510979896644871,
      "loss": 3.2635,
      "step": 57998
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8497235774993896,
      "learning_rate": 0.0005109769885362344,
      "loss": 3.0821,
      "step": 57999
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4551270008087158,
      "learning_rate": 0.0005109740803883732,
      "loss": 3.1974,
      "step": 58000
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5279617309570312,
      "learning_rate": 0.0005109711722012881,
      "loss": 2.9297,
      "step": 58001
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6258713006973267,
      "learning_rate": 0.0005109682639749793,
      "loss": 3.1081,
      "step": 58002
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4891738891601562,
      "learning_rate": 0.0005109653557094477,
      "loss": 3.176,
      "step": 58003
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4218714237213135,
      "learning_rate": 0.0005109624474046935,
      "loss": 3.1279,
      "step": 58004
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5309414863586426,
      "learning_rate": 0.0005109595390607175,
      "loss": 3.0449,
      "step": 58005
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.289585590362549,
      "learning_rate": 0.0005109566306775201,
      "loss": 2.813,
      "step": 58006
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.61904776096344,
      "learning_rate": 0.0005109537222551018,
      "loss": 2.8851,
      "step": 58007
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6984949111938477,
      "learning_rate": 0.0005109508137934632,
      "loss": 2.863,
      "step": 58008
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2547919750213623,
      "learning_rate": 0.0005109479052926051,
      "loss": 3.0323,
      "step": 58009
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5820497274398804,
      "learning_rate": 0.0005109449967525276,
      "loss": 3.122,
      "step": 58010
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.493147611618042,
      "learning_rate": 0.0005109420881732314,
      "loss": 3.0991,
      "step": 58011
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7405493259429932,
      "learning_rate": 0.0005109391795547173,
      "loss": 2.7225,
      "step": 58012
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6483474969863892,
      "learning_rate": 0.0005109362708969854,
      "loss": 2.8494,
      "step": 58013
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.523179531097412,
      "learning_rate": 0.0005109333622000367,
      "loss": 3.2007,
      "step": 58014
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.592246651649475,
      "learning_rate": 0.0005109304534638713,
      "loss": 2.8906,
      "step": 58015
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.539579153060913,
      "learning_rate": 0.0005109275446884899,
      "loss": 3.021,
      "step": 58016
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.736736536026001,
      "learning_rate": 0.0005109246358738933,
      "loss": 3.0433,
      "step": 58017
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7458100318908691,
      "learning_rate": 0.0005109217270200817,
      "loss": 3.1448,
      "step": 58018
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7516189813613892,
      "learning_rate": 0.0005109188181270559,
      "loss": 2.9761,
      "step": 58019
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5141187906265259,
      "learning_rate": 0.0005109159091948161,
      "loss": 2.9748,
      "step": 58020
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5309983491897583,
      "learning_rate": 0.0005109130002233632,
      "loss": 3.0929,
      "step": 58021
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5635439157485962,
      "learning_rate": 0.0005109100912126976,
      "loss": 2.8144,
      "step": 58022
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.711023211479187,
      "learning_rate": 0.0005109071821628198,
      "loss": 3.019,
      "step": 58023
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6561269760131836,
      "learning_rate": 0.0005109042730737304,
      "loss": 2.9712,
      "step": 58024
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8183071613311768,
      "learning_rate": 0.0005109013639454298,
      "loss": 3.0007,
      "step": 58025
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6538670063018799,
      "learning_rate": 0.0005108984547779187,
      "loss": 3.0033,
      "step": 58026
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7682455778121948,
      "learning_rate": 0.0005108955455711977,
      "loss": 2.855,
      "step": 58027
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.444675087928772,
      "learning_rate": 0.0005108926363252671,
      "loss": 2.9365,
      "step": 58028
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5951186418533325,
      "learning_rate": 0.0005108897270401277,
      "loss": 2.9439,
      "step": 58029
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2812906503677368,
      "learning_rate": 0.0005108868177157799,
      "loss": 2.9721,
      "step": 58030
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7034448385238647,
      "learning_rate": 0.0005108839083522242,
      "loss": 3.0398,
      "step": 58031
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.864150047302246,
      "learning_rate": 0.0005108809989494611,
      "loss": 3.2128,
      "step": 58032
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9331672191619873,
      "learning_rate": 0.0005108780895074915,
      "loss": 3.0891,
      "step": 58033
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4708247184753418,
      "learning_rate": 0.0005108751800263156,
      "loss": 2.888,
      "step": 58034
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.018187999725342,
      "learning_rate": 0.000510872270505934,
      "loss": 3.1843,
      "step": 58035
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.664635181427002,
      "learning_rate": 0.0005108693609463472,
      "loss": 2.7585,
      "step": 58036
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.828174352645874,
      "learning_rate": 0.0005108664513475558,
      "loss": 3.1008,
      "step": 58037
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4818694591522217,
      "learning_rate": 0.0005108635417095604,
      "loss": 2.8533,
      "step": 58038
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4815778732299805,
      "learning_rate": 0.0005108606320323615,
      "loss": 2.7568,
      "step": 58039
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6738289594650269,
      "learning_rate": 0.0005108577223159596,
      "loss": 3.0415,
      "step": 58040
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6639827489852905,
      "learning_rate": 0.0005108548125603553,
      "loss": 2.794,
      "step": 58041
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5574557781219482,
      "learning_rate": 0.000510851902765549,
      "loss": 3.1034,
      "step": 58042
    },
    {
      "epoch": 0.76,
      "grad_norm": 4.376289367675781,
      "learning_rate": 0.0005108489929315415,
      "loss": 3.0885,
      "step": 58043
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.571189522743225,
      "learning_rate": 0.0005108460830583331,
      "loss": 2.8266,
      "step": 58044
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.1037774085998535,
      "learning_rate": 0.0005108431731459244,
      "loss": 2.9934,
      "step": 58045
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9934186935424805,
      "learning_rate": 0.0005108402631943161,
      "loss": 2.7032,
      "step": 58046
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4108705520629883,
      "learning_rate": 0.0005108373532035085,
      "loss": 3.1155,
      "step": 58047
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7626478672027588,
      "learning_rate": 0.0005108344431735023,
      "loss": 2.9855,
      "step": 58048
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6090635061264038,
      "learning_rate": 0.000510831533104298,
      "loss": 2.9241,
      "step": 58049
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4996753931045532,
      "learning_rate": 0.0005108286229958961,
      "loss": 3.4121,
      "step": 58050
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6694557666778564,
      "learning_rate": 0.0005108257128482972,
      "loss": 3.3693,
      "step": 58051
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6231971979141235,
      "learning_rate": 0.000510822802661502,
      "loss": 2.8701,
      "step": 58052
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3917646408081055,
      "learning_rate": 0.0005108198924355107,
      "loss": 3.1544,
      "step": 58053
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.465641736984253,
      "learning_rate": 0.000510816982170324,
      "loss": 3.125,
      "step": 58054
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5335230827331543,
      "learning_rate": 0.0005108140718659424,
      "loss": 2.6901,
      "step": 58055
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3498584032058716,
      "learning_rate": 0.0005108111615223666,
      "loss": 3.0064,
      "step": 58056
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.069556713104248,
      "learning_rate": 0.0005108082511395969,
      "loss": 3.0392,
      "step": 58057
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.105659008026123,
      "learning_rate": 0.000510805340717634,
      "loss": 3.251,
      "step": 58058
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.029174327850342,
      "learning_rate": 0.0005108024302564785,
      "loss": 3.0341,
      "step": 58059
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.3932485580444336,
      "learning_rate": 0.0005107995197561308,
      "loss": 3.2238,
      "step": 58060
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5980703830718994,
      "learning_rate": 0.0005107966092165914,
      "loss": 3.1348,
      "step": 58061
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4310272932052612,
      "learning_rate": 0.000510793698637861,
      "loss": 2.7462,
      "step": 58062
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.555365562438965,
      "learning_rate": 0.0005107907880199401,
      "loss": 2.9628,
      "step": 58063
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.826207160949707,
      "learning_rate": 0.0005107878773628293,
      "loss": 2.8657,
      "step": 58064
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4518204927444458,
      "learning_rate": 0.0005107849666665289,
      "loss": 2.9209,
      "step": 58065
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.5267086029052734,
      "learning_rate": 0.0005107820559310396,
      "loss": 2.9573,
      "step": 58066
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.879380464553833,
      "learning_rate": 0.000510779145156362,
      "loss": 3.1225,
      "step": 58067
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.0953683853149414,
      "learning_rate": 0.0005107762343424965,
      "loss": 3.0111,
      "step": 58068
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4698249101638794,
      "learning_rate": 0.0005107733234894437,
      "loss": 3.0953,
      "step": 58069
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.1719985008239746,
      "learning_rate": 0.0005107704125972044,
      "loss": 3.0062,
      "step": 58070
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.476364850997925,
      "learning_rate": 0.0005107675016657787,
      "loss": 3.1219,
      "step": 58071
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3993579149246216,
      "learning_rate": 0.0005107645906951673,
      "loss": 3.0484,
      "step": 58072
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.278677225112915,
      "learning_rate": 0.0005107616796853709,
      "loss": 3.0548,
      "step": 58073
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2555158138275146,
      "learning_rate": 0.0005107587686363899,
      "loss": 3.1863,
      "step": 58074
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5903396606445312,
      "learning_rate": 0.0005107558575482248,
      "loss": 2.8767,
      "step": 58075
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.1801037788391113,
      "learning_rate": 0.0005107529464208763,
      "loss": 3.1637,
      "step": 58076
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8779802322387695,
      "learning_rate": 0.0005107500352543448,
      "loss": 3.1101,
      "step": 58077
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2785040140151978,
      "learning_rate": 0.0005107471240486309,
      "loss": 3.154,
      "step": 58078
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.081651210784912,
      "learning_rate": 0.0005107442128037352,
      "loss": 2.7763,
      "step": 58079
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.7019946575164795,
      "learning_rate": 0.0005107413015196581,
      "loss": 2.8862,
      "step": 58080
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6084961891174316,
      "learning_rate": 0.0005107383901964002,
      "loss": 2.7432,
      "step": 58081
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5500532388687134,
      "learning_rate": 0.0005107354788339621,
      "loss": 3.1686,
      "step": 58082
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.9968042373657227,
      "learning_rate": 0.0005107325674323442,
      "loss": 3.17,
      "step": 58083
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.5369105339050293,
      "learning_rate": 0.0005107296559915472,
      "loss": 3.077,
      "step": 58084
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2447000741958618,
      "learning_rate": 0.0005107267445115716,
      "loss": 3.2736,
      "step": 58085
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6417527198791504,
      "learning_rate": 0.0005107238329924179,
      "loss": 3.0713,
      "step": 58086
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.5043957233428955,
      "learning_rate": 0.0005107209214340867,
      "loss": 3.3303,
      "step": 58087
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2681777477264404,
      "learning_rate": 0.0005107180098365786,
      "loss": 3.1875,
      "step": 58088
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6985424757003784,
      "learning_rate": 0.0005107150981998939,
      "loss": 2.9217,
      "step": 58089
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6853916645050049,
      "learning_rate": 0.0005107121865240333,
      "loss": 3.1425,
      "step": 58090
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2300515174865723,
      "learning_rate": 0.0005107092748089973,
      "loss": 3.1525,
      "step": 58091
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3208627700805664,
      "learning_rate": 0.0005107063630547865,
      "loss": 2.9945,
      "step": 58092
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3815956115722656,
      "learning_rate": 0.0005107034512614014,
      "loss": 3.3475,
      "step": 58093
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.966521978378296,
      "learning_rate": 0.0005107005394288425,
      "loss": 3.0348,
      "step": 58094
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.1259593963623047,
      "learning_rate": 0.0005106976275571105,
      "loss": 3.0501,
      "step": 58095
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5114063024520874,
      "learning_rate": 0.0005106947156462058,
      "loss": 3.0989,
      "step": 58096
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3709166049957275,
      "learning_rate": 0.0005106918036961289,
      "loss": 2.9621,
      "step": 58097
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.576724648475647,
      "learning_rate": 0.0005106888917068806,
      "loss": 3.0745,
      "step": 58098
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.937533140182495,
      "learning_rate": 0.0005106859796784611,
      "loss": 2.8608,
      "step": 58099
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5507581233978271,
      "learning_rate": 0.0005106830676108711,
      "loss": 2.9895,
      "step": 58100
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5024847984313965,
      "learning_rate": 0.0005106801555041112,
      "loss": 3.2631,
      "step": 58101
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.3034815788269043,
      "learning_rate": 0.0005106772433581818,
      "loss": 3.1982,
      "step": 58102
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.5893564224243164,
      "learning_rate": 0.0005106743311730836,
      "loss": 3.2739,
      "step": 58103
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.788414478302002,
      "learning_rate": 0.0005106714189488171,
      "loss": 2.9044,
      "step": 58104
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.208648204803467,
      "learning_rate": 0.0005106685066853827,
      "loss": 3.2129,
      "step": 58105
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5261775255203247,
      "learning_rate": 0.000510665594382781,
      "loss": 3.0068,
      "step": 58106
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6199877262115479,
      "learning_rate": 0.0005106626820410128,
      "loss": 2.7971,
      "step": 58107
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7096002101898193,
      "learning_rate": 0.0005106597696600783,
      "loss": 2.9129,
      "step": 58108
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7506366968154907,
      "learning_rate": 0.0005106568572399783,
      "loss": 3.21,
      "step": 58109
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.6264772415161133,
      "learning_rate": 0.000510653944780713,
      "loss": 3.1688,
      "step": 58110
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4206424951553345,
      "learning_rate": 0.0005106510322822833,
      "loss": 3.0236,
      "step": 58111
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4274076223373413,
      "learning_rate": 0.0005106481197446896,
      "loss": 2.9741,
      "step": 58112
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8977766036987305,
      "learning_rate": 0.0005106452071679323,
      "loss": 3.1218,
      "step": 58113
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.081266164779663,
      "learning_rate": 0.0005106422945520122,
      "loss": 3.0194,
      "step": 58114
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8328624963760376,
      "learning_rate": 0.0005106393818969298,
      "loss": 2.8036,
      "step": 58115
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6151790618896484,
      "learning_rate": 0.0005106364692026855,
      "loss": 3.0965,
      "step": 58116
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3842781782150269,
      "learning_rate": 0.0005106335564692798,
      "loss": 2.9788,
      "step": 58117
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.0031590461730957,
      "learning_rate": 0.0005106306436967134,
      "loss": 2.9111,
      "step": 58118
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.532791018486023,
      "learning_rate": 0.0005106277308849868,
      "loss": 3.2135,
      "step": 58119
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9833474159240723,
      "learning_rate": 0.0005106248180341005,
      "loss": 3.077,
      "step": 58120
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.6987390518188477,
      "learning_rate": 0.0005106219051440551,
      "loss": 2.9081,
      "step": 58121
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4161261320114136,
      "learning_rate": 0.0005106189922148511,
      "loss": 3.1964,
      "step": 58122
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7135016918182373,
      "learning_rate": 0.0005106160792464891,
      "loss": 2.8039,
      "step": 58123
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.4503800868988037,
      "learning_rate": 0.0005106131662389696,
      "loss": 2.9523,
      "step": 58124
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5570787191390991,
      "learning_rate": 0.0005106102531922931,
      "loss": 2.8463,
      "step": 58125
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.4307477474212646,
      "learning_rate": 0.0005106073401064602,
      "loss": 3.0215,
      "step": 58126
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.7421634197235107,
      "learning_rate": 0.0005106044269814713,
      "loss": 3.0704,
      "step": 58127
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.0872085094451904,
      "learning_rate": 0.0005106015138173272,
      "loss": 2.9518,
      "step": 58128
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3611648082733154,
      "learning_rate": 0.0005105986006140283,
      "loss": 3.1689,
      "step": 58129
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.782560110092163,
      "learning_rate": 0.000510595687371575,
      "loss": 3.0056,
      "step": 58130
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.6430208683013916,
      "learning_rate": 0.0005105927740899682,
      "loss": 2.8724,
      "step": 58131
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4891589879989624,
      "learning_rate": 0.0005105898607692081,
      "loss": 2.9949,
      "step": 58132
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4870631694793701,
      "learning_rate": 0.0005105869474092954,
      "loss": 3.1187,
      "step": 58133
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.587608814239502,
      "learning_rate": 0.0005105840340102306,
      "loss": 3.0632,
      "step": 58134
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7665778398513794,
      "learning_rate": 0.0005105811205720141,
      "loss": 2.9983,
      "step": 58135
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5349704027175903,
      "learning_rate": 0.0005105782070946468,
      "loss": 3.1755,
      "step": 58136
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5728346109390259,
      "learning_rate": 0.000510575293578129,
      "loss": 3.0494,
      "step": 58137
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5173752307891846,
      "learning_rate": 0.0005105723800224612,
      "loss": 3.2108,
      "step": 58138
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.603581190109253,
      "learning_rate": 0.0005105694664276442,
      "loss": 2.838,
      "step": 58139
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.505160927772522,
      "learning_rate": 0.0005105665527936783,
      "loss": 2.9196,
      "step": 58140
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5814998149871826,
      "learning_rate": 0.000510563639120564,
      "loss": 2.9301,
      "step": 58141
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.601309061050415,
      "learning_rate": 0.000510560725408302,
      "loss": 3.0251,
      "step": 58142
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6690298318862915,
      "learning_rate": 0.0005105578116568928,
      "loss": 2.8354,
      "step": 58143
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4597477912902832,
      "learning_rate": 0.0005105548978663368,
      "loss": 3.1413,
      "step": 58144
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8495608568191528,
      "learning_rate": 0.0005105519840366349,
      "loss": 2.9127,
      "step": 58145
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9700795412063599,
      "learning_rate": 0.0005105490701677873,
      "loss": 2.9629,
      "step": 58146
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5591896772384644,
      "learning_rate": 0.0005105461562597946,
      "loss": 3.2454,
      "step": 58147
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5410972833633423,
      "learning_rate": 0.0005105432423126574,
      "loss": 2.9329,
      "step": 58148
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5574294328689575,
      "learning_rate": 0.0005105403283263763,
      "loss": 2.9982,
      "step": 58149
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5541412830352783,
      "learning_rate": 0.0005105374143009518,
      "loss": 2.9016,
      "step": 58150
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6649277210235596,
      "learning_rate": 0.0005105345002363843,
      "loss": 2.9413,
      "step": 58151
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3693870306015015,
      "learning_rate": 0.0005105315861326746,
      "loss": 2.9495,
      "step": 58152
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3831799030303955,
      "learning_rate": 0.000510528671989823,
      "loss": 3.0166,
      "step": 58153
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2514102458953857,
      "learning_rate": 0.0005105257578078302,
      "loss": 2.7608,
      "step": 58154
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6685336828231812,
      "learning_rate": 0.0005105228435866966,
      "loss": 3.1792,
      "step": 58155
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7256942987442017,
      "learning_rate": 0.000510519929326423,
      "loss": 2.8352,
      "step": 58156
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.245983839035034,
      "learning_rate": 0.0005105170150270095,
      "loss": 2.8827,
      "step": 58157
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3101418018341064,
      "learning_rate": 0.0005105141006884572,
      "loss": 3.4006,
      "step": 58158
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8717526197433472,
      "learning_rate": 0.0005105111863107662,
      "loss": 3.0701,
      "step": 58159
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.8244779109954834,
      "learning_rate": 0.0005105082718939373,
      "loss": 3.0343,
      "step": 58160
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6166319847106934,
      "learning_rate": 0.0005105053574379708,
      "loss": 3.1546,
      "step": 58161
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.845241665840149,
      "learning_rate": 0.0005105024429428675,
      "loss": 3.2196,
      "step": 58162
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5550247430801392,
      "learning_rate": 0.0005104995284086278,
      "loss": 3.202,
      "step": 58163
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4655488729476929,
      "learning_rate": 0.0005104966138352522,
      "loss": 3.1182,
      "step": 58164
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.629913568496704,
      "learning_rate": 0.0005104936992227414,
      "loss": 3.0207,
      "step": 58165
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.219423770904541,
      "learning_rate": 0.0005104907845710958,
      "loss": 2.911,
      "step": 58166
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5382845401763916,
      "learning_rate": 0.0005104878698803159,
      "loss": 3.0101,
      "step": 58167
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5311802625656128,
      "learning_rate": 0.0005104849551504025,
      "loss": 3.1103,
      "step": 58168
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.473400354385376,
      "learning_rate": 0.0005104820403813558,
      "loss": 3.1949,
      "step": 58169
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3647271394729614,
      "learning_rate": 0.0005104791255731766,
      "loss": 3.2319,
      "step": 58170
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.484459400177002,
      "learning_rate": 0.0005104762107258655,
      "loss": 2.9796,
      "step": 58171
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3969218730926514,
      "learning_rate": 0.0005104732958394227,
      "loss": 3.1519,
      "step": 58172
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.377801775932312,
      "learning_rate": 0.000510470380913849,
      "loss": 3.2626,
      "step": 58173
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8905937671661377,
      "learning_rate": 0.000510467465949145,
      "loss": 3.2028,
      "step": 58174
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4429351091384888,
      "learning_rate": 0.0005104645509453109,
      "loss": 2.9801,
      "step": 58175
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.572791337966919,
      "learning_rate": 0.0005104616359023476,
      "loss": 3.0043,
      "step": 58176
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7192742824554443,
      "learning_rate": 0.0005104587208202556,
      "loss": 3.0871,
      "step": 58177
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5764328241348267,
      "learning_rate": 0.0005104558056990351,
      "loss": 3.1423,
      "step": 58178
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.603440284729004,
      "learning_rate": 0.0005104528905386871,
      "loss": 3.2663,
      "step": 58179
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4831552505493164,
      "learning_rate": 0.0005104499753392119,
      "loss": 3.1219,
      "step": 58180
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5593966245651245,
      "learning_rate": 0.00051044706010061,
      "loss": 3.1332,
      "step": 58181
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9175697565078735,
      "learning_rate": 0.0005104441448228821,
      "loss": 2.9464,
      "step": 58182
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.764800786972046,
      "learning_rate": 0.0005104412295060286,
      "loss": 2.8193,
      "step": 58183
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5053049325942993,
      "learning_rate": 0.0005104383141500502,
      "loss": 2.9101,
      "step": 58184
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4712039232254028,
      "learning_rate": 0.0005104353987549472,
      "loss": 3.1586,
      "step": 58185
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7665808200836182,
      "learning_rate": 0.0005104324833207204,
      "loss": 3.1239,
      "step": 58186
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6213663816452026,
      "learning_rate": 0.0005104295678473702,
      "loss": 3.0725,
      "step": 58187
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4322689771652222,
      "learning_rate": 0.0005104266523348971,
      "loss": 2.9405,
      "step": 58188
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4035568237304688,
      "learning_rate": 0.0005104237367833018,
      "loss": 3.265,
      "step": 58189
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4664204120635986,
      "learning_rate": 0.0005104208211925847,
      "loss": 3.1008,
      "step": 58190
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.358756184577942,
      "learning_rate": 0.0005104179055627464,
      "loss": 3.1046,
      "step": 58191
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.8886046409606934,
      "learning_rate": 0.0005104149898937875,
      "loss": 3.1398,
      "step": 58192
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4980409145355225,
      "learning_rate": 0.0005104120741857085,
      "loss": 3.02,
      "step": 58193
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.95779550075531,
      "learning_rate": 0.0005104091584385098,
      "loss": 2.9522,
      "step": 58194
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6964101791381836,
      "learning_rate": 0.0005104062426521922,
      "loss": 3.1046,
      "step": 58195
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5161476135253906,
      "learning_rate": 0.0005104033268267561,
      "loss": 2.9891,
      "step": 58196
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6602282524108887,
      "learning_rate": 0.0005104004109622019,
      "loss": 3.1602,
      "step": 58197
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.989856481552124,
      "learning_rate": 0.0005103974950585304,
      "loss": 3.0406,
      "step": 58198
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6473349332809448,
      "learning_rate": 0.0005103945791157421,
      "loss": 3.0871,
      "step": 58199
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6869456768035889,
      "learning_rate": 0.0005103916631338374,
      "loss": 2.9916,
      "step": 58200
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6634812355041504,
      "learning_rate": 0.0005103887471128169,
      "loss": 3.1988,
      "step": 58201
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3801504373550415,
      "learning_rate": 0.0005103858310526812,
      "loss": 2.8986,
      "step": 58202
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.867492437362671,
      "learning_rate": 0.0005103829149534307,
      "loss": 3.2363,
      "step": 58203
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.535363793373108,
      "learning_rate": 0.0005103799988150662,
      "loss": 2.8237,
      "step": 58204
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6140142679214478,
      "learning_rate": 0.000510377082637588,
      "loss": 3.143,
      "step": 58205
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.394915223121643,
      "learning_rate": 0.0005103741664209967,
      "loss": 3.1293,
      "step": 58206
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.484516143798828,
      "learning_rate": 0.0005103712501652929,
      "loss": 2.9424,
      "step": 58207
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3458456993103027,
      "learning_rate": 0.0005103683338704771,
      "loss": 3.0502,
      "step": 58208
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3863565921783447,
      "learning_rate": 0.00051036541753655,
      "loss": 3.1209,
      "step": 58209
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.572826623916626,
      "learning_rate": 0.0005103625011635118,
      "loss": 3.0821,
      "step": 58210
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7292495965957642,
      "learning_rate": 0.0005103595847513634,
      "loss": 3.026,
      "step": 58211
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3371108770370483,
      "learning_rate": 0.000510356668300105,
      "loss": 3.1027,
      "step": 58212
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.683615803718567,
      "learning_rate": 0.0005103537518097375,
      "loss": 2.9275,
      "step": 58213
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.212610960006714,
      "learning_rate": 0.0005103508352802612,
      "loss": 3.1803,
      "step": 58214
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.890288233757019,
      "learning_rate": 0.0005103479187116767,
      "loss": 3.0997,
      "step": 58215
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.652182102203369,
      "learning_rate": 0.0005103450021039846,
      "loss": 3.0895,
      "step": 58216
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.3111541271209717,
      "learning_rate": 0.0005103420854571853,
      "loss": 2.9866,
      "step": 58217
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.664797306060791,
      "learning_rate": 0.0005103391687712795,
      "loss": 3.1052,
      "step": 58218
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.6714115142822266,
      "learning_rate": 0.0005103362520462676,
      "loss": 3.047,
      "step": 58219
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4323142766952515,
      "learning_rate": 0.0005103333352821503,
      "loss": 3.1234,
      "step": 58220
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2019994258880615,
      "learning_rate": 0.0005103304184789281,
      "loss": 2.5638,
      "step": 58221
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.257337808609009,
      "learning_rate": 0.0005103275016366014,
      "loss": 3.3018,
      "step": 58222
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.74580717086792,
      "learning_rate": 0.0005103245847551709,
      "loss": 3.0453,
      "step": 58223
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8807294368743896,
      "learning_rate": 0.0005103216678346371,
      "loss": 2.9674,
      "step": 58224
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4329413175582886,
      "learning_rate": 0.0005103187508750006,
      "loss": 3.2021,
      "step": 58225
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.624475121498108,
      "learning_rate": 0.0005103158338762617,
      "loss": 2.9665,
      "step": 58226
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.5804362297058105,
      "learning_rate": 0.0005103129168384211,
      "loss": 3.1357,
      "step": 58227
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2157766819000244,
      "learning_rate": 0.0005103099997614796,
      "loss": 2.9575,
      "step": 58228
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4375115633010864,
      "learning_rate": 0.0005103070826454374,
      "loss": 2.8907,
      "step": 58229
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.710132598876953,
      "learning_rate": 0.000510304165490295,
      "loss": 2.9012,
      "step": 58230
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.959648609161377,
      "learning_rate": 0.0005103012482960532,
      "loss": 3.1281,
      "step": 58231
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.5914552211761475,
      "learning_rate": 0.0005102983310627125,
      "loss": 2.9672,
      "step": 58232
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.1005160808563232,
      "learning_rate": 0.0005102954137902732,
      "loss": 3.0783,
      "step": 58233
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.8319203853607178,
      "learning_rate": 0.0005102924964787361,
      "loss": 2.9739,
      "step": 58234
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6127593517303467,
      "learning_rate": 0.0005102895791281017,
      "loss": 3.0717,
      "step": 58235
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4583410024642944,
      "learning_rate": 0.0005102866617383705,
      "loss": 2.9561,
      "step": 58236
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3844304084777832,
      "learning_rate": 0.000510283744309543,
      "loss": 3.1273,
      "step": 58237
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.5665998458862305,
      "learning_rate": 0.0005102808268416198,
      "loss": 2.8653,
      "step": 58238
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9772745370864868,
      "learning_rate": 0.0005102779093346015,
      "loss": 3.1058,
      "step": 58239
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6385136842727661,
      "learning_rate": 0.0005102749917884884,
      "loss": 3.1416,
      "step": 58240
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.219867706298828,
      "learning_rate": 0.0005102720742032813,
      "loss": 2.7447,
      "step": 58241
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.577948808670044,
      "learning_rate": 0.0005102691565789806,
      "loss": 2.9032,
      "step": 58242
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.189544916152954,
      "learning_rate": 0.000510266238915587,
      "loss": 2.9232,
      "step": 58243
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6494863033294678,
      "learning_rate": 0.0005102633212131009,
      "loss": 2.7544,
      "step": 58244
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.4920995235443115,
      "learning_rate": 0.0005102604034715228,
      "loss": 3.2222,
      "step": 58245
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.037471055984497,
      "learning_rate": 0.0005102574856908534,
      "loss": 2.9812,
      "step": 58246
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2050435543060303,
      "learning_rate": 0.0005102545678710931,
      "loss": 3.0934,
      "step": 58247
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8909403085708618,
      "learning_rate": 0.0005102516500122425,
      "loss": 3.215,
      "step": 58248
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.7250468730926514,
      "learning_rate": 0.0005102487321143022,
      "loss": 2.8826,
      "step": 58249
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.1160216331481934,
      "learning_rate": 0.0005102458141772727,
      "loss": 2.7571,
      "step": 58250
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3913072347640991,
      "learning_rate": 0.0005102428962011546,
      "loss": 2.8488,
      "step": 58251
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.055150032043457,
      "learning_rate": 0.0005102399781859483,
      "loss": 3.3347,
      "step": 58252
    },
    {
      "epoch": 0.76,
      "grad_norm": 4.371979236602783,
      "learning_rate": 0.0005102370601316544,
      "loss": 3.066,
      "step": 58253
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.1247446537017822,
      "learning_rate": 0.0005102341420382734,
      "loss": 2.7497,
      "step": 58254
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.421448826789856,
      "learning_rate": 0.000510231223905806,
      "loss": 3.1092,
      "step": 58255
    },
    {
      "epoch": 0.76,
      "grad_norm": 4.183188438415527,
      "learning_rate": 0.0005102283057342526,
      "loss": 2.8919,
      "step": 58256
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.29952335357666,
      "learning_rate": 0.0005102253875236139,
      "loss": 2.8229,
      "step": 58257
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9196968078613281,
      "learning_rate": 0.0005102224692738902,
      "loss": 2.856,
      "step": 58258
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6370954513549805,
      "learning_rate": 0.0005102195509850822,
      "loss": 3.1096,
      "step": 58259
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.804642677307129,
      "learning_rate": 0.0005102166326571905,
      "loss": 2.9831,
      "step": 58260
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.1419687271118164,
      "learning_rate": 0.0005102137142902154,
      "loss": 3.1025,
      "step": 58261
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4949615001678467,
      "learning_rate": 0.0005102107958841578,
      "loss": 3.0312,
      "step": 58262
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6455104351043701,
      "learning_rate": 0.000510207877439018,
      "loss": 2.896,
      "step": 58263
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.0524497032165527,
      "learning_rate": 0.0005102049589547965,
      "loss": 3.3585,
      "step": 58264
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.089694023132324,
      "learning_rate": 0.000510202040431494,
      "loss": 3.0956,
      "step": 58265
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.7456812858581543,
      "learning_rate": 0.000510199121869111,
      "loss": 2.8444,
      "step": 58266
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.0185649394989014,
      "learning_rate": 0.0005101962032676479,
      "loss": 3.3126,
      "step": 58267
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.4653050899505615,
      "learning_rate": 0.0005101932846271055,
      "loss": 2.8903,
      "step": 58268
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5082510709762573,
      "learning_rate": 0.000510190365947484,
      "loss": 2.965,
      "step": 58269
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.148895740509033,
      "learning_rate": 0.0005101874472287842,
      "loss": 2.9969,
      "step": 58270
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9954122304916382,
      "learning_rate": 0.0005101845284710067,
      "loss": 3.0351,
      "step": 58271
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.3771164417266846,
      "learning_rate": 0.0005101816096741518,
      "loss": 3.4256,
      "step": 58272
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6038663387298584,
      "learning_rate": 0.0005101786908382203,
      "loss": 3.1897,
      "step": 58273
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.2333109378814697,
      "learning_rate": 0.0005101757719632126,
      "loss": 3.0535,
      "step": 58274
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2477803230285645,
      "learning_rate": 0.0005101728530491291,
      "loss": 2.7649,
      "step": 58275
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5026473999023438,
      "learning_rate": 0.0005101699340959707,
      "loss": 2.7326,
      "step": 58276
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.806164264678955,
      "learning_rate": 0.0005101670151037377,
      "loss": 3.0424,
      "step": 58277
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6310182809829712,
      "learning_rate": 0.0005101640960724305,
      "loss": 3.009,
      "step": 58278
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.000779867172241,
      "learning_rate": 0.00051016117700205,
      "loss": 2.6022,
      "step": 58279
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3330872058868408,
      "learning_rate": 0.0005101582578925966,
      "loss": 3.0331,
      "step": 58280
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.950577735900879,
      "learning_rate": 0.0005101553387440708,
      "loss": 3.0964,
      "step": 58281
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.040985584259033,
      "learning_rate": 0.000510152419556473,
      "loss": 3.0592,
      "step": 58282
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.694027304649353,
      "learning_rate": 0.0005101495003298041,
      "loss": 3.0316,
      "step": 58283
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4403914213180542,
      "learning_rate": 0.0005101465810640643,
      "loss": 2.769,
      "step": 58284
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6258622407913208,
      "learning_rate": 0.0005101436617592544,
      "loss": 2.855,
      "step": 58285
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.408616304397583,
      "learning_rate": 0.0005101407424153747,
      "loss": 3.1782,
      "step": 58286
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5696516036987305,
      "learning_rate": 0.0005101378230324259,
      "loss": 3.015,
      "step": 58287
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7985903024673462,
      "learning_rate": 0.0005101349036104086,
      "loss": 3.1279,
      "step": 58288
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5379568338394165,
      "learning_rate": 0.0005101319841493232,
      "loss": 3.2505,
      "step": 58289
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5877952575683594,
      "learning_rate": 0.0005101290646491702,
      "loss": 3.2779,
      "step": 58290
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.73014235496521,
      "learning_rate": 0.0005101261451099504,
      "loss": 3.1891,
      "step": 58291
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7129485607147217,
      "learning_rate": 0.0005101232255316641,
      "loss": 3.0762,
      "step": 58292
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.3902580738067627,
      "learning_rate": 0.000510120305914312,
      "loss": 3.1211,
      "step": 58293
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7429122924804688,
      "learning_rate": 0.0005101173862578944,
      "loss": 2.8637,
      "step": 58294
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.498447299003601,
      "learning_rate": 0.0005101144665624122,
      "loss": 3.0963,
      "step": 58295
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6831809282302856,
      "learning_rate": 0.0005101115468278657,
      "loss": 2.9389,
      "step": 58296
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7162641286849976,
      "learning_rate": 0.0005101086270542554,
      "loss": 2.8951,
      "step": 58297
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8618441820144653,
      "learning_rate": 0.000510105707241582,
      "loss": 2.9934,
      "step": 58298
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5602186918258667,
      "learning_rate": 0.000510102787389846,
      "loss": 3.1483,
      "step": 58299
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6318491697311401,
      "learning_rate": 0.0005100998674990479,
      "loss": 3.0463,
      "step": 58300
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9816120862960815,
      "learning_rate": 0.0005100969475691884,
      "loss": 2.9511,
      "step": 58301
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.916149616241455,
      "learning_rate": 0.0005100940276002678,
      "loss": 3.0364,
      "step": 58302
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.623260498046875,
      "learning_rate": 0.0005100911075922867,
      "loss": 3.0206,
      "step": 58303
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7859551906585693,
      "learning_rate": 0.0005100881875452458,
      "loss": 3.0778,
      "step": 58304
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5001760721206665,
      "learning_rate": 0.0005100852674591455,
      "loss": 2.955,
      "step": 58305
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9569919109344482,
      "learning_rate": 0.0005100823473339864,
      "loss": 3.2234,
      "step": 58306
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8810451030731201,
      "learning_rate": 0.0005100794271697689,
      "loss": 2.9589,
      "step": 58307
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7949694395065308,
      "learning_rate": 0.0005100765069664938,
      "loss": 2.9836,
      "step": 58308
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.751080870628357,
      "learning_rate": 0.0005100735867241614,
      "loss": 3.2,
      "step": 58309
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.964523196220398,
      "learning_rate": 0.0005100706664427726,
      "loss": 3.087,
      "step": 58310
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7613887786865234,
      "learning_rate": 0.0005100677461223274,
      "loss": 2.9357,
      "step": 58311
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6955044269561768,
      "learning_rate": 0.0005100648257628268,
      "loss": 2.8396,
      "step": 58312
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.522896409034729,
      "learning_rate": 0.0005100619053642712,
      "loss": 3.0924,
      "step": 58313
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6221275329589844,
      "learning_rate": 0.000510058984926661,
      "loss": 2.9359,
      "step": 58314
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5609874725341797,
      "learning_rate": 0.000510056064449997,
      "loss": 3.0497,
      "step": 58315
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6396806240081787,
      "learning_rate": 0.0005100531439342795,
      "loss": 2.8117,
      "step": 58316
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8042999505996704,
      "learning_rate": 0.0005100502233795094,
      "loss": 3.0234,
      "step": 58317
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5867961645126343,
      "learning_rate": 0.0005100473027856867,
      "loss": 2.7809,
      "step": 58318
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6350066661834717,
      "learning_rate": 0.0005100443821528124,
      "loss": 2.7874,
      "step": 58319
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.791724920272827,
      "learning_rate": 0.0005100414614808868,
      "loss": 2.9473,
      "step": 58320
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7006930112838745,
      "learning_rate": 0.0005100385407699106,
      "loss": 3.1388,
      "step": 58321
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.289490818977356,
      "learning_rate": 0.0005100356200198842,
      "loss": 3.3843,
      "step": 58322
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.765889048576355,
      "learning_rate": 0.0005100326992308085,
      "loss": 2.8345,
      "step": 58323
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6148279905319214,
      "learning_rate": 0.0005100297784026834,
      "loss": 3.1143,
      "step": 58324
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.742268681526184,
      "learning_rate": 0.0005100268575355098,
      "loss": 3.2095,
      "step": 58325
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5849459171295166,
      "learning_rate": 0.0005100239366292885,
      "loss": 3.0271,
      "step": 58326
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.472357988357544,
      "learning_rate": 0.0005100210156840196,
      "loss": 3.0391,
      "step": 58327
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7244993448257446,
      "learning_rate": 0.0005100180946997039,
      "loss": 2.8693,
      "step": 58328
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.467515468597412,
      "learning_rate": 0.0005100151736763418,
      "loss": 2.9842,
      "step": 58329
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6526343822479248,
      "learning_rate": 0.000510012252613934,
      "loss": 2.9929,
      "step": 58330
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7463932037353516,
      "learning_rate": 0.0005100093315124808,
      "loss": 3.103,
      "step": 58331
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.727919101715088,
      "learning_rate": 0.0005100064103719832,
      "loss": 3.0435,
      "step": 58332
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5722100734710693,
      "learning_rate": 0.0005100034891924412,
      "loss": 3.2287,
      "step": 58333
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5688096284866333,
      "learning_rate": 0.0005100005679738556,
      "loss": 3.193,
      "step": 58334
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5567625761032104,
      "learning_rate": 0.0005099976467162271,
      "loss": 2.9237,
      "step": 58335
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8610049486160278,
      "learning_rate": 0.0005099947254195559,
      "loss": 3.227,
      "step": 58336
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5451164245605469,
      "learning_rate": 0.0005099918040838427,
      "loss": 3.1731,
      "step": 58337
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5814753770828247,
      "learning_rate": 0.0005099888827090882,
      "loss": 3.2566,
      "step": 58338
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3549988269805908,
      "learning_rate": 0.0005099859612952927,
      "loss": 3.2142,
      "step": 58339
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.160736322402954,
      "learning_rate": 0.0005099830398424569,
      "loss": 3.1537,
      "step": 58340
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8652549982070923,
      "learning_rate": 0.0005099801183505812,
      "loss": 3.2501,
      "step": 58341
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6219052076339722,
      "learning_rate": 0.0005099771968196665,
      "loss": 3.1155,
      "step": 58342
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.0969486236572266,
      "learning_rate": 0.0005099742752497127,
      "loss": 2.8486,
      "step": 58343
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.176687240600586,
      "learning_rate": 0.0005099713536407209,
      "loss": 2.9531,
      "step": 58344
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.626110315322876,
      "learning_rate": 0.0005099684319926916,
      "loss": 3.0705,
      "step": 58345
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6701537370681763,
      "learning_rate": 0.000509965510305625,
      "loss": 2.7899,
      "step": 58346
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.548952341079712,
      "learning_rate": 0.0005099625885795219,
      "loss": 2.8506,
      "step": 58347
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6369352340698242,
      "learning_rate": 0.0005099596668143828,
      "loss": 3.092,
      "step": 58348
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5027575492858887,
      "learning_rate": 0.0005099567450102081,
      "loss": 3.3228,
      "step": 58349
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6324288845062256,
      "learning_rate": 0.0005099538231669987,
      "loss": 2.9266,
      "step": 58350
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4669469594955444,
      "learning_rate": 0.0005099509012847548,
      "loss": 3.1373,
      "step": 58351
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4307336807250977,
      "learning_rate": 0.0005099479793634772,
      "loss": 3.0561,
      "step": 58352
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3824130296707153,
      "learning_rate": 0.0005099450574031661,
      "loss": 3.3832,
      "step": 58353
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5162298679351807,
      "learning_rate": 0.0005099421354038225,
      "loss": 3.2543,
      "step": 58354
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6877144575119019,
      "learning_rate": 0.0005099392133654466,
      "loss": 3.0623,
      "step": 58355
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7019468545913696,
      "learning_rate": 0.0005099362912880388,
      "loss": 2.8179,
      "step": 58356
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7300398349761963,
      "learning_rate": 0.0005099333691716001,
      "loss": 2.8443,
      "step": 58357
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.117534637451172,
      "learning_rate": 0.0005099304470161308,
      "loss": 3.1144,
      "step": 58358
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5953186750411987,
      "learning_rate": 0.0005099275248216314,
      "loss": 2.9831,
      "step": 58359
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.483424186706543,
      "learning_rate": 0.0005099246025881027,
      "loss": 2.7807,
      "step": 58360
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.292883276939392,
      "learning_rate": 0.0005099216803155449,
      "loss": 3.1442,
      "step": 58361
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3757416009902954,
      "learning_rate": 0.0005099187580039587,
      "loss": 3.1897,
      "step": 58362
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6172200441360474,
      "learning_rate": 0.0005099158356533446,
      "loss": 3.1415,
      "step": 58363
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.949149250984192,
      "learning_rate": 0.0005099129132637033,
      "loss": 2.9816,
      "step": 58364
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.1190805435180664,
      "learning_rate": 0.0005099099908350351,
      "loss": 2.8826,
      "step": 58365
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4417729377746582,
      "learning_rate": 0.0005099070683673409,
      "loss": 3.1596,
      "step": 58366
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6143972873687744,
      "learning_rate": 0.0005099041458606208,
      "loss": 3.0933,
      "step": 58367
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.144113540649414,
      "learning_rate": 0.0005099012233148756,
      "loss": 3.2735,
      "step": 58368
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9199756383895874,
      "learning_rate": 0.0005098983007301058,
      "loss": 2.9435,
      "step": 58369
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.748550534248352,
      "learning_rate": 0.000509895378106312,
      "loss": 2.9989,
      "step": 58370
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5281035900115967,
      "learning_rate": 0.0005098924554434946,
      "loss": 3.0476,
      "step": 58371
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9127225875854492,
      "learning_rate": 0.0005098895327416543,
      "loss": 3.0043,
      "step": 58372
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3098986148834229,
      "learning_rate": 0.0005098866100007915,
      "loss": 3.0851,
      "step": 58373
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.41238534450531,
      "learning_rate": 0.0005098836872209068,
      "loss": 3.1966,
      "step": 58374
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5248329639434814,
      "learning_rate": 0.0005098807644020009,
      "loss": 3.1809,
      "step": 58375
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.657415509223938,
      "learning_rate": 0.0005098778415440742,
      "loss": 2.8918,
      "step": 58376
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2769124507904053,
      "learning_rate": 0.0005098749186471271,
      "loss": 3.2033,
      "step": 58377
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4013900756835938,
      "learning_rate": 0.0005098719957111604,
      "loss": 2.8248,
      "step": 58378
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5293524265289307,
      "learning_rate": 0.0005098690727361745,
      "loss": 3.1226,
      "step": 58379
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9016859531402588,
      "learning_rate": 0.00050986614972217,
      "loss": 3.0869,
      "step": 58380
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8990978002548218,
      "learning_rate": 0.0005098632266691475,
      "loss": 2.9326,
      "step": 58381
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.4304873943328857,
      "learning_rate": 0.0005098603035771074,
      "loss": 3.1268,
      "step": 58382
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7734413146972656,
      "learning_rate": 0.0005098573804460502,
      "loss": 2.9566,
      "step": 58383
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5782629251480103,
      "learning_rate": 0.0005098544572759766,
      "loss": 3.1805,
      "step": 58384
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7176231145858765,
      "learning_rate": 0.0005098515340668871,
      "loss": 3.0924,
      "step": 58385
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.20165753364563,
      "learning_rate": 0.0005098486108187823,
      "loss": 2.8304,
      "step": 58386
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7402185201644897,
      "learning_rate": 0.0005098456875316627,
      "loss": 2.8817,
      "step": 58387
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.0350167751312256,
      "learning_rate": 0.0005098427642055287,
      "loss": 2.9326,
      "step": 58388
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.7442524433135986,
      "learning_rate": 0.000509839840840381,
      "loss": 3.1094,
      "step": 58389
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8123044967651367,
      "learning_rate": 0.0005098369174362201,
      "loss": 2.978,
      "step": 58390
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7550756931304932,
      "learning_rate": 0.0005098339939930467,
      "loss": 2.9297,
      "step": 58391
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.6435751914978027,
      "learning_rate": 0.0005098310705108611,
      "loss": 2.9519,
      "step": 58392
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.08689546585083,
      "learning_rate": 0.000509828146989664,
      "loss": 2.9944,
      "step": 58393
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6023503541946411,
      "learning_rate": 0.0005098252234294558,
      "loss": 3.0978,
      "step": 58394
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9629467725753784,
      "learning_rate": 0.0005098222998302372,
      "loss": 3.0196,
      "step": 58395
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8770304918289185,
      "learning_rate": 0.0005098193761920086,
      "loss": 3.3959,
      "step": 58396
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.541488766670227,
      "learning_rate": 0.0005098164525147706,
      "loss": 3.0326,
      "step": 58397
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.0115489959716797,
      "learning_rate": 0.0005098135287985239,
      "loss": 2.8921,
      "step": 58398
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6803646087646484,
      "learning_rate": 0.0005098106050432688,
      "loss": 3.0108,
      "step": 58399
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7758731842041016,
      "learning_rate": 0.0005098076812490059,
      "loss": 3.1193,
      "step": 58400
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5715482234954834,
      "learning_rate": 0.0005098047574157359,
      "loss": 2.9573,
      "step": 58401
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4462647438049316,
      "learning_rate": 0.0005098018335434592,
      "loss": 2.8748,
      "step": 58402
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7759462594985962,
      "learning_rate": 0.0005097989096321764,
      "loss": 3.0434,
      "step": 58403
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5674402713775635,
      "learning_rate": 0.000509795985681888,
      "loss": 3.1092,
      "step": 58404
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.531531810760498,
      "learning_rate": 0.0005097930616925946,
      "loss": 3.0045,
      "step": 58405
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.0101544857025146,
      "learning_rate": 0.0005097901376642966,
      "loss": 3.0122,
      "step": 58406
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6717183589935303,
      "learning_rate": 0.0005097872135969947,
      "loss": 3.0034,
      "step": 58407
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5712435245513916,
      "learning_rate": 0.0005097842894906894,
      "loss": 2.8402,
      "step": 58408
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5419731140136719,
      "learning_rate": 0.0005097813653453814,
      "loss": 3.3128,
      "step": 58409
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2476108074188232,
      "learning_rate": 0.0005097784411610709,
      "loss": 2.9002,
      "step": 58410
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.608135461807251,
      "learning_rate": 0.0005097755169377587,
      "loss": 3.0922,
      "step": 58411
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8189361095428467,
      "learning_rate": 0.0005097725926754452,
      "loss": 2.9057,
      "step": 58412
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.090017795562744,
      "learning_rate": 0.0005097696683741311,
      "loss": 3.1339,
      "step": 58413
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.644000768661499,
      "learning_rate": 0.0005097667440338169,
      "loss": 3.1294,
      "step": 58414
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.452449083328247,
      "learning_rate": 0.000509763819654503,
      "loss": 3.1713,
      "step": 58415
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9628217220306396,
      "learning_rate": 0.00050976089523619,
      "loss": 3.0187,
      "step": 58416
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5491474866867065,
      "learning_rate": 0.0005097579707788786,
      "loss": 3.0418,
      "step": 58417
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6206939220428467,
      "learning_rate": 0.0005097550462825692,
      "loss": 3.0775,
      "step": 58418
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5304346084594727,
      "learning_rate": 0.0005097521217472624,
      "loss": 3.0076,
      "step": 58419
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6974339485168457,
      "learning_rate": 0.0005097491971729586,
      "loss": 2.7538,
      "step": 58420
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.021303415298462,
      "learning_rate": 0.0005097462725596587,
      "loss": 3.0145,
      "step": 58421
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7342746257781982,
      "learning_rate": 0.0005097433479073629,
      "loss": 2.8684,
      "step": 58422
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6090346574783325,
      "learning_rate": 0.0005097404232160717,
      "loss": 3.1164,
      "step": 58423
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.157221794128418,
      "learning_rate": 0.000509737498485786,
      "loss": 3.2873,
      "step": 58424
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9627703428268433,
      "learning_rate": 0.000509734573716506,
      "loss": 3.085,
      "step": 58425
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6161295175552368,
      "learning_rate": 0.0005097316489082324,
      "loss": 3.0235,
      "step": 58426
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7123780250549316,
      "learning_rate": 0.0005097287240609658,
      "loss": 3.0753,
      "step": 58427
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.5387651920318604,
      "learning_rate": 0.0005097257991747066,
      "loss": 3.1194,
      "step": 58428
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.0548484325408936,
      "learning_rate": 0.0005097228742494555,
      "loss": 3.1043,
      "step": 58429
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.218848705291748,
      "learning_rate": 0.0005097199492852129,
      "loss": 3.1703,
      "step": 58430
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.533339500427246,
      "learning_rate": 0.0005097170242819794,
      "loss": 3.0862,
      "step": 58431
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5156807899475098,
      "learning_rate": 0.0005097140992397556,
      "loss": 2.8098,
      "step": 58432
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5480097532272339,
      "learning_rate": 0.000509711174158542,
      "loss": 2.7743,
      "step": 58433
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5286576747894287,
      "learning_rate": 0.0005097082490383391,
      "loss": 3.1309,
      "step": 58434
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6968547105789185,
      "learning_rate": 0.0005097053238791474,
      "loss": 3.088,
      "step": 58435
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6228123903274536,
      "learning_rate": 0.0005097023986809676,
      "loss": 2.8289,
      "step": 58436
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5510345697402954,
      "learning_rate": 0.0005096994734438002,
      "loss": 3.2188,
      "step": 58437
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.278165340423584,
      "learning_rate": 0.0005096965481676455,
      "loss": 3.0139,
      "step": 58438
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.621004581451416,
      "learning_rate": 0.0005096936228525045,
      "loss": 2.8585,
      "step": 58439
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.933658242225647,
      "learning_rate": 0.0005096906974983774,
      "loss": 2.8228,
      "step": 58440
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5582525730133057,
      "learning_rate": 0.0005096877721052649,
      "loss": 2.9285,
      "step": 58441
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3563674688339233,
      "learning_rate": 0.0005096848466731675,
      "loss": 3.0724,
      "step": 58442
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4876201152801514,
      "learning_rate": 0.0005096819212020856,
      "loss": 3.0699,
      "step": 58443
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4113454818725586,
      "learning_rate": 0.0005096789956920199,
      "loss": 3.0456,
      "step": 58444
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5568736791610718,
      "learning_rate": 0.000509676070142971,
      "loss": 3.0422,
      "step": 58445
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3791652917861938,
      "learning_rate": 0.0005096731445549394,
      "loss": 2.9628,
      "step": 58446
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.649196982383728,
      "learning_rate": 0.0005096702189279254,
      "loss": 3.0004,
      "step": 58447
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5818029642105103,
      "learning_rate": 0.0005096672932619299,
      "loss": 2.722,
      "step": 58448
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6666346788406372,
      "learning_rate": 0.0005096643675569532,
      "loss": 3.0533,
      "step": 58449
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4888825416564941,
      "learning_rate": 0.0005096614418129961,
      "loss": 2.9219,
      "step": 58450
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4322298765182495,
      "learning_rate": 0.0005096585160300588,
      "loss": 3.1718,
      "step": 58451
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6971709728240967,
      "learning_rate": 0.0005096555902081422,
      "loss": 2.8256,
      "step": 58452
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4642839431762695,
      "learning_rate": 0.0005096526643472466,
      "loss": 3.0013,
      "step": 58453
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7115176916122437,
      "learning_rate": 0.0005096497384473725,
      "loss": 3.3823,
      "step": 58454
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4424821138381958,
      "learning_rate": 0.0005096468125085206,
      "loss": 3.0156,
      "step": 58455
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7687220573425293,
      "learning_rate": 0.0005096438865306915,
      "loss": 2.9952,
      "step": 58456
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7771368026733398,
      "learning_rate": 0.0005096409605138856,
      "loss": 2.9001,
      "step": 58457
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2837868928909302,
      "learning_rate": 0.0005096380344581034,
      "loss": 3.092,
      "step": 58458
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.489235520362854,
      "learning_rate": 0.0005096351083633457,
      "loss": 2.8426,
      "step": 58459
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5172945261001587,
      "learning_rate": 0.0005096321822296127,
      "loss": 3.1252,
      "step": 58460
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.81956946849823,
      "learning_rate": 0.0005096292560569052,
      "loss": 3.0349,
      "step": 58461
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7963072061538696,
      "learning_rate": 0.0005096263298452237,
      "loss": 3.0885,
      "step": 58462
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.415039539337158,
      "learning_rate": 0.0005096234035945687,
      "loss": 3.0109,
      "step": 58463
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6184626817703247,
      "learning_rate": 0.0005096204773049407,
      "loss": 3.0178,
      "step": 58464
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.493347406387329,
      "learning_rate": 0.0005096175509763404,
      "loss": 2.9724,
      "step": 58465
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7773975133895874,
      "learning_rate": 0.0005096146246087682,
      "loss": 3.1717,
      "step": 58466
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5885679721832275,
      "learning_rate": 0.0005096116982022245,
      "loss": 3.0976,
      "step": 58467
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7402796745300293,
      "learning_rate": 0.0005096087717567102,
      "loss": 2.9494,
      "step": 58468
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.734519362449646,
      "learning_rate": 0.0005096058452722257,
      "loss": 2.8356,
      "step": 58469
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.7746870517730713,
      "learning_rate": 0.0005096029187487715,
      "loss": 3.3295,
      "step": 58470
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5536794662475586,
      "learning_rate": 0.0005095999921863481,
      "loss": 3.0469,
      "step": 58471
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.1683263778686523,
      "learning_rate": 0.0005095970655849562,
      "loss": 2.8948,
      "step": 58472
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7250359058380127,
      "learning_rate": 0.0005095941389445961,
      "loss": 3.2126,
      "step": 58473
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3507728576660156,
      "learning_rate": 0.0005095912122652685,
      "loss": 2.9644,
      "step": 58474
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8555411100387573,
      "learning_rate": 0.0005095882855469741,
      "loss": 3.0235,
      "step": 58475
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7117934226989746,
      "learning_rate": 0.0005095853587897132,
      "loss": 2.9996,
      "step": 58476
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.0587143898010254,
      "learning_rate": 0.0005095824319934864,
      "loss": 3.1162,
      "step": 58477
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4995205402374268,
      "learning_rate": 0.0005095795051582943,
      "loss": 3.1611,
      "step": 58478
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.598325490951538,
      "learning_rate": 0.0005095765782841375,
      "loss": 3.0406,
      "step": 58479
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.1823947429656982,
      "learning_rate": 0.0005095736513710162,
      "loss": 3.0438,
      "step": 58480
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6267293691635132,
      "learning_rate": 0.0005095707244189314,
      "loss": 3.1885,
      "step": 58481
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4359767436981201,
      "learning_rate": 0.0005095677974278834,
      "loss": 2.9542,
      "step": 58482
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.833522081375122,
      "learning_rate": 0.0005095648703978728,
      "loss": 3.0958,
      "step": 58483
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8069998025894165,
      "learning_rate": 0.0005095619433289001,
      "loss": 2.8863,
      "step": 58484
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6283445358276367,
      "learning_rate": 0.000509559016220966,
      "loss": 2.8836,
      "step": 58485
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4090673923492432,
      "learning_rate": 0.0005095560890740708,
      "loss": 3.1894,
      "step": 58486
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8082679510116577,
      "learning_rate": 0.0005095531618882152,
      "loss": 3.1632,
      "step": 58487
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7819311618804932,
      "learning_rate": 0.0005095502346633998,
      "loss": 3.0631,
      "step": 58488
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3201290369033813,
      "learning_rate": 0.0005095473073996249,
      "loss": 3.1817,
      "step": 58489
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.327970504760742,
      "learning_rate": 0.0005095443800968913,
      "loss": 2.9432,
      "step": 58490
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.998079538345337,
      "learning_rate": 0.0005095414527551994,
      "loss": 2.8339,
      "step": 58491
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.0244433879852295,
      "learning_rate": 0.0005095385253745498,
      "loss": 3.1506,
      "step": 58492
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8604581356048584,
      "learning_rate": 0.000509535597954943,
      "loss": 3.0649,
      "step": 58493
    },
    {
      "epoch": 0.76,
      "grad_norm": 4.010746955871582,
      "learning_rate": 0.0005095326704963796,
      "loss": 3.0713,
      "step": 58494
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.9181432723999023,
      "learning_rate": 0.0005095297429988601,
      "loss": 2.8994,
      "step": 58495
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5169049501419067,
      "learning_rate": 0.0005095268154623852,
      "loss": 2.9747,
      "step": 58496
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7698698043823242,
      "learning_rate": 0.0005095238878869552,
      "loss": 3.1582,
      "step": 58497
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.929525375366211,
      "learning_rate": 0.0005095209602725707,
      "loss": 3.0671,
      "step": 58498
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.262063980102539,
      "learning_rate": 0.0005095180326192324,
      "loss": 3.0777,
      "step": 58499
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.404287576675415,
      "learning_rate": 0.0005095151049269407,
      "loss": 3.0034,
      "step": 58500
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.9328453540802,
      "learning_rate": 0.0005095121771956962,
      "loss": 2.7675,
      "step": 58501
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7320702075958252,
      "learning_rate": 0.0005095092494254995,
      "loss": 3.1649,
      "step": 58502
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.632323980331421,
      "learning_rate": 0.000509506321616351,
      "loss": 2.9044,
      "step": 58503
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5871082544326782,
      "learning_rate": 0.0005095033937682513,
      "loss": 2.8832,
      "step": 58504
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4128320217132568,
      "learning_rate": 0.0005095004658812009,
      "loss": 2.9495,
      "step": 58505
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5538743734359741,
      "learning_rate": 0.0005094975379552006,
      "loss": 3.0656,
      "step": 58506
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3316993713378906,
      "learning_rate": 0.0005094946099902506,
      "loss": 3.4085,
      "step": 58507
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4591130018234253,
      "learning_rate": 0.0005094916819863516,
      "loss": 3.1104,
      "step": 58508
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.496924638748169,
      "learning_rate": 0.0005094887539435042,
      "loss": 3.1583,
      "step": 58509
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4615625143051147,
      "learning_rate": 0.0005094858258617088,
      "loss": 3.0263,
      "step": 58510
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3805549144744873,
      "learning_rate": 0.0005094828977409662,
      "loss": 3.0886,
      "step": 58511
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6820794343948364,
      "learning_rate": 0.0005094799695812767,
      "loss": 3.249,
      "step": 58512
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.1697182655334473,
      "learning_rate": 0.0005094770413826408,
      "loss": 3.2595,
      "step": 58513
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.944783329963684,
      "learning_rate": 0.0005094741131450594,
      "loss": 2.7711,
      "step": 58514
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.385047197341919,
      "learning_rate": 0.0005094711848685326,
      "loss": 3.3422,
      "step": 58515
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.783465027809143,
      "learning_rate": 0.0005094682565530611,
      "loss": 3.1835,
      "step": 58516
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6462112665176392,
      "learning_rate": 0.0005094653281986457,
      "loss": 2.8596,
      "step": 58517
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4734587669372559,
      "learning_rate": 0.0005094623998052867,
      "loss": 2.826,
      "step": 58518
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9165130853652954,
      "learning_rate": 0.0005094594713729847,
      "loss": 2.9378,
      "step": 58519
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4665502309799194,
      "learning_rate": 0.0005094565429017401,
      "loss": 3.1002,
      "step": 58520
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7242037057876587,
      "learning_rate": 0.0005094536143915537,
      "loss": 2.8063,
      "step": 58521
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5781751871109009,
      "learning_rate": 0.0005094506858424259,
      "loss": 2.9618,
      "step": 58522
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7135158777236938,
      "learning_rate": 0.0005094477572543572,
      "loss": 2.8749,
      "step": 58523
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6919695138931274,
      "learning_rate": 0.0005094448286273482,
      "loss": 2.9815,
      "step": 58524
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7935969829559326,
      "learning_rate": 0.0005094418999613995,
      "loss": 3.0415,
      "step": 58525
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7624115943908691,
      "learning_rate": 0.0005094389712565116,
      "loss": 3.0657,
      "step": 58526
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5493252277374268,
      "learning_rate": 0.000509436042512685,
      "loss": 3.0718,
      "step": 58527
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.500321626663208,
      "learning_rate": 0.0005094331137299203,
      "loss": 2.9567,
      "step": 58528
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.84445059299469,
      "learning_rate": 0.000509430184908218,
      "loss": 3.1614,
      "step": 58529
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.525285005569458,
      "learning_rate": 0.0005094272560475789,
      "loss": 3.2106,
      "step": 58530
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3893181085586548,
      "learning_rate": 0.0005094243271480031,
      "loss": 3.1973,
      "step": 58531
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.031118154525757,
      "learning_rate": 0.0005094213982094913,
      "loss": 2.9671,
      "step": 58532
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4490188360214233,
      "learning_rate": 0.0005094184692320444,
      "loss": 2.9409,
      "step": 58533
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7517322301864624,
      "learning_rate": 0.0005094155402156624,
      "loss": 3.1934,
      "step": 58534
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6339982748031616,
      "learning_rate": 0.0005094126111603462,
      "loss": 3.1252,
      "step": 58535
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6206709146499634,
      "learning_rate": 0.0005094096820660961,
      "loss": 3.1118,
      "step": 58536
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4636822938919067,
      "learning_rate": 0.000509406752932913,
      "loss": 2.8181,
      "step": 58537
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4355223178863525,
      "learning_rate": 0.0005094038237607972,
      "loss": 2.8111,
      "step": 58538
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.511156678199768,
      "learning_rate": 0.0005094008945497492,
      "loss": 2.9751,
      "step": 58539
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6474858522415161,
      "learning_rate": 0.0005093979652997696,
      "loss": 3.1557,
      "step": 58540
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.715957760810852,
      "learning_rate": 0.000509395036010859,
      "loss": 3.1157,
      "step": 58541
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5316332578659058,
      "learning_rate": 0.0005093921066830179,
      "loss": 2.7355,
      "step": 58542
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4582642316818237,
      "learning_rate": 0.0005093891773162468,
      "loss": 3.2189,
      "step": 58543
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5677980184555054,
      "learning_rate": 0.0005093862479105464,
      "loss": 3.1523,
      "step": 58544
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4989131689071655,
      "learning_rate": 0.000509383318465917,
      "loss": 2.934,
      "step": 58545
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9830487966537476,
      "learning_rate": 0.0005093803889823595,
      "loss": 2.9747,
      "step": 58546
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.2143288850784302,
      "learning_rate": 0.0005093774594598741,
      "loss": 3.0441,
      "step": 58547
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.67433762550354,
      "learning_rate": 0.0005093745298984615,
      "loss": 2.9357,
      "step": 58548
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4524884223937988,
      "learning_rate": 0.0005093716002981223,
      "loss": 3.0873,
      "step": 58549
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5219584703445435,
      "learning_rate": 0.0005093686706588569,
      "loss": 3.0598,
      "step": 58550
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6338623762130737,
      "learning_rate": 0.0005093657409806658,
      "loss": 2.9401,
      "step": 58551
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4202872514724731,
      "learning_rate": 0.0005093628112635498,
      "loss": 3.0698,
      "step": 58552
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3919627666473389,
      "learning_rate": 0.0005093598815075093,
      "loss": 2.8253,
      "step": 58553
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7109745740890503,
      "learning_rate": 0.0005093569517125448,
      "loss": 3.1789,
      "step": 58554
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.5511040687561035,
      "learning_rate": 0.0005093540218786569,
      "loss": 3.1003,
      "step": 58555
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.901930570602417,
      "learning_rate": 0.0005093510920058462,
      "loss": 3.0934,
      "step": 58556
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8134087324142456,
      "learning_rate": 0.000509348162094113,
      "loss": 3.0858,
      "step": 58557
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5988445281982422,
      "learning_rate": 0.0005093452321434581,
      "loss": 3.2063,
      "step": 58558
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.4741556644439697,
      "learning_rate": 0.0005093423021538822,
      "loss": 3.0208,
      "step": 58559
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.395792007446289,
      "learning_rate": 0.0005093393721253853,
      "loss": 3.3072,
      "step": 58560
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6141613721847534,
      "learning_rate": 0.0005093364420579685,
      "loss": 3.0557,
      "step": 58561
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.288784980773926,
      "learning_rate": 0.000509333511951632,
      "loss": 2.8678,
      "step": 58562
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.2389674186706543,
      "learning_rate": 0.0005093305818063763,
      "loss": 2.9004,
      "step": 58563
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6602271795272827,
      "learning_rate": 0.0005093276516222023,
      "loss": 2.6596,
      "step": 58564
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.508559226989746,
      "learning_rate": 0.0005093247213991103,
      "loss": 3.0745,
      "step": 58565
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.018925666809082,
      "learning_rate": 0.0005093217911371008,
      "loss": 2.9275,
      "step": 58566
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.6857059001922607,
      "learning_rate": 0.0005093188608361745,
      "loss": 3.1812,
      "step": 58567
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4710923433303833,
      "learning_rate": 0.0005093159304963319,
      "loss": 3.1686,
      "step": 58568
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3542400598526,
      "learning_rate": 0.0005093130001175734,
      "loss": 3.3383,
      "step": 58569
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6527011394500732,
      "learning_rate": 0.0005093100696998997,
      "loss": 3.1215,
      "step": 58570
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.5057153701782227,
      "learning_rate": 0.0005093071392433114,
      "loss": 2.9278,
      "step": 58571
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5141185522079468,
      "learning_rate": 0.0005093042087478089,
      "loss": 3.1152,
      "step": 58572
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.448556661605835,
      "learning_rate": 0.0005093012782133927,
      "loss": 2.9569,
      "step": 58573
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.320189118385315,
      "learning_rate": 0.0005092983476400636,
      "loss": 2.9821,
      "step": 58574
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.628540277481079,
      "learning_rate": 0.0005092954170278219,
      "loss": 3.2711,
      "step": 58575
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.181644916534424,
      "learning_rate": 0.0005092924863766683,
      "loss": 3.0701,
      "step": 58576
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8599175214767456,
      "learning_rate": 0.0005092895556866032,
      "loss": 3.1275,
      "step": 58577
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.049506187438965,
      "learning_rate": 0.0005092866249576273,
      "loss": 2.957,
      "step": 58578
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.7544987201690674,
      "learning_rate": 0.000509283694189741,
      "loss": 3.0595,
      "step": 58579
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3650325536727905,
      "learning_rate": 0.0005092807633829448,
      "loss": 3.0164,
      "step": 58580
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5540146827697754,
      "learning_rate": 0.0005092778325372395,
      "loss": 3.0807,
      "step": 58581
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.620640993118286,
      "learning_rate": 0.0005092749016526255,
      "loss": 3.2062,
      "step": 58582
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.691364288330078,
      "learning_rate": 0.0005092719707291034,
      "loss": 2.7939,
      "step": 58583
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7187182903289795,
      "learning_rate": 0.0005092690397666735,
      "loss": 2.9719,
      "step": 58584
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.343588352203369,
      "learning_rate": 0.0005092661087653368,
      "loss": 3.1868,
      "step": 58585
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.642601728439331,
      "learning_rate": 0.0005092631777250932,
      "loss": 3.0163,
      "step": 58586
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6527520418167114,
      "learning_rate": 0.000509260246645944,
      "loss": 3.1667,
      "step": 58587
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.53538978099823,
      "learning_rate": 0.0005092573155278892,
      "loss": 2.883,
      "step": 58588
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6200850009918213,
      "learning_rate": 0.0005092543843709295,
      "loss": 3.0391,
      "step": 58589
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.3386130332946777,
      "learning_rate": 0.0005092514531750655,
      "loss": 3.1252,
      "step": 58590
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5443453788757324,
      "learning_rate": 0.0005092485219402977,
      "loss": 3.0188,
      "step": 58591
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.745621919631958,
      "learning_rate": 0.0005092455906666267,
      "loss": 2.7262,
      "step": 58592
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6904572248458862,
      "learning_rate": 0.0005092426593540529,
      "loss": 3.1783,
      "step": 58593
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.1433024406433105,
      "learning_rate": 0.000509239728002577,
      "loss": 3.1067,
      "step": 58594
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5224201679229736,
      "learning_rate": 0.0005092367966121994,
      "loss": 3.0266,
      "step": 58595
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9730993509292603,
      "learning_rate": 0.0005092338651829209,
      "loss": 2.9051,
      "step": 58596
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4890851974487305,
      "learning_rate": 0.0005092309337147419,
      "loss": 3.1207,
      "step": 58597
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7341820001602173,
      "learning_rate": 0.0005092280022076627,
      "loss": 3.1594,
      "step": 58598
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.819152593612671,
      "learning_rate": 0.0005092250706616841,
      "loss": 3.1249,
      "step": 58599
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.519792914390564,
      "learning_rate": 0.0005092221390768068,
      "loss": 2.9796,
      "step": 58600
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.3769729137420654,
      "learning_rate": 0.000509219207453031,
      "loss": 2.9695,
      "step": 58601
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.549686074256897,
      "learning_rate": 0.0005092162757903574,
      "loss": 2.8217,
      "step": 58602
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.5400304794311523,
      "learning_rate": 0.0005092133440887865,
      "loss": 3.0852,
      "step": 58603
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.17976450920105,
      "learning_rate": 0.000509210412348319,
      "loss": 2.7413,
      "step": 58604
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9083296060562134,
      "learning_rate": 0.0005092074805689553,
      "loss": 3.1885,
      "step": 58605
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2120823860168457,
      "learning_rate": 0.000509204548750696,
      "loss": 3.0803,
      "step": 58606
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.3496270179748535,
      "learning_rate": 0.0005092016168935416,
      "loss": 2.8749,
      "step": 58607
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8710274696350098,
      "learning_rate": 0.0005091986849974926,
      "loss": 3.0072,
      "step": 58608
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6885658502578735,
      "learning_rate": 0.0005091957530625498,
      "loss": 2.9461,
      "step": 58609
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.1414079666137695,
      "learning_rate": 0.0005091928210887134,
      "loss": 2.9263,
      "step": 58610
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.870518684387207,
      "learning_rate": 0.0005091898890759842,
      "loss": 3.2362,
      "step": 58611
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4877145290374756,
      "learning_rate": 0.0005091869570243625,
      "loss": 2.9759,
      "step": 58612
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6026371717453003,
      "learning_rate": 0.0005091840249338491,
      "loss": 3.0223,
      "step": 58613
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.3772995471954346,
      "learning_rate": 0.0005091810928044445,
      "loss": 3.1477,
      "step": 58614
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9778882265090942,
      "learning_rate": 0.0005091781606361491,
      "loss": 3.0623,
      "step": 58615
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.700421929359436,
      "learning_rate": 0.0005091752284289636,
      "loss": 2.9138,
      "step": 58616
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.345083713531494,
      "learning_rate": 0.0005091722961828884,
      "loss": 3.0305,
      "step": 58617
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9838038682937622,
      "learning_rate": 0.0005091693638979242,
      "loss": 2.9476,
      "step": 58618
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5769919157028198,
      "learning_rate": 0.0005091664315740714,
      "loss": 3.1847,
      "step": 58619
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3303502798080444,
      "learning_rate": 0.0005091634992113307,
      "loss": 3.1112,
      "step": 58620
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5909067392349243,
      "learning_rate": 0.0005091605668097025,
      "loss": 2.862,
      "step": 58621
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7477951049804688,
      "learning_rate": 0.0005091576343691874,
      "loss": 3.109,
      "step": 58622
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9638036489486694,
      "learning_rate": 0.0005091547018897859,
      "loss": 2.7746,
      "step": 58623
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5104161500930786,
      "learning_rate": 0.0005091517693714986,
      "loss": 3.0787,
      "step": 58624
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6465668678283691,
      "learning_rate": 0.0005091488368143261,
      "loss": 2.9455,
      "step": 58625
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6823039054870605,
      "learning_rate": 0.0005091459042182689,
      "loss": 2.8118,
      "step": 58626
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.108213424682617,
      "learning_rate": 0.0005091429715833274,
      "loss": 3.1893,
      "step": 58627
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.298895001411438,
      "learning_rate": 0.0005091400389095024,
      "loss": 3.0409,
      "step": 58628
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7387304306030273,
      "learning_rate": 0.0005091371061967942,
      "loss": 3.1749,
      "step": 58629
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3433095216751099,
      "learning_rate": 0.0005091341734452036,
      "loss": 3.1224,
      "step": 58630
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4136667251586914,
      "learning_rate": 0.000509131240654731,
      "loss": 3.0844,
      "step": 58631
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.668950080871582,
      "learning_rate": 0.0005091283078253769,
      "loss": 2.9008,
      "step": 58632
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9642657041549683,
      "learning_rate": 0.0005091253749571419,
      "loss": 3.1019,
      "step": 58633
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.441611409187317,
      "learning_rate": 0.0005091224420500266,
      "loss": 2.9459,
      "step": 58634
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.446117639541626,
      "learning_rate": 0.0005091195091040314,
      "loss": 3.1844,
      "step": 58635
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6073577404022217,
      "learning_rate": 0.000509116576119157,
      "loss": 3.1203,
      "step": 58636
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.571190357208252,
      "learning_rate": 0.0005091136430954038,
      "loss": 3.029,
      "step": 58637
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3999892473220825,
      "learning_rate": 0.0005091107100327726,
      "loss": 3.0665,
      "step": 58638
    },
    {
      "epoch": 0.76,
      "grad_norm": 4.115451812744141,
      "learning_rate": 0.0005091077769312636,
      "loss": 3.1184,
      "step": 58639
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5435291528701782,
      "learning_rate": 0.0005091048437908776,
      "loss": 2.8096,
      "step": 58640
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7140614986419678,
      "learning_rate": 0.0005091019106116151,
      "loss": 2.9245,
      "step": 58641
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6201609373092651,
      "learning_rate": 0.0005090989773934764,
      "loss": 3.1209,
      "step": 58642
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.699939489364624,
      "learning_rate": 0.0005090960441364624,
      "loss": 3.1854,
      "step": 58643
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5700618028640747,
      "learning_rate": 0.0005090931108405735,
      "loss": 2.9435,
      "step": 58644
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6922763586044312,
      "learning_rate": 0.0005090901775058103,
      "loss": 3.1427,
      "step": 58645
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.397249698638916,
      "learning_rate": 0.0005090872441321733,
      "loss": 2.8724,
      "step": 58646
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8448134660720825,
      "learning_rate": 0.0005090843107196629,
      "loss": 3.0,
      "step": 58647
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6366169452667236,
      "learning_rate": 0.0005090813772682798,
      "loss": 2.8606,
      "step": 58648
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.630423903465271,
      "learning_rate": 0.0005090784437780247,
      "loss": 3.0359,
      "step": 58649
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.0193755626678467,
      "learning_rate": 0.0005090755102488977,
      "loss": 2.9238,
      "step": 58650
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.722410798072815,
      "learning_rate": 0.0005090725766808998,
      "loss": 3.0091,
      "step": 58651
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.385438323020935,
      "learning_rate": 0.0005090696430740314,
      "loss": 2.996,
      "step": 58652
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4701749086380005,
      "learning_rate": 0.0005090667094282929,
      "loss": 2.9717,
      "step": 58653
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.908034324645996,
      "learning_rate": 0.0005090637757436849,
      "loss": 3.0696,
      "step": 58654
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5230798721313477,
      "learning_rate": 0.0005090608420202081,
      "loss": 3.178,
      "step": 58655
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6158949136734009,
      "learning_rate": 0.000509057908257863,
      "loss": 3.1117,
      "step": 58656
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3518292903900146,
      "learning_rate": 0.00050905497445665,
      "loss": 3.2088,
      "step": 58657
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6750792264938354,
      "learning_rate": 0.0005090520406165698,
      "loss": 3.2552,
      "step": 58658
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5222339630126953,
      "learning_rate": 0.0005090491067376227,
      "loss": 2.941,
      "step": 58659
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.758195400238037,
      "learning_rate": 0.0005090461728198095,
      "loss": 2.88,
      "step": 58660
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.58096444606781,
      "learning_rate": 0.0005090432388631307,
      "loss": 3.0469,
      "step": 58661
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9416251182556152,
      "learning_rate": 0.0005090403048675869,
      "loss": 3.2033,
      "step": 58662
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.3528950214385986,
      "learning_rate": 0.0005090373708331786,
      "loss": 2.8782,
      "step": 58663
    },
    {
      "epoch": 0.76,
      "grad_norm": 5.575139045715332,
      "learning_rate": 0.0005090344367599061,
      "loss": 3.2429,
      "step": 58664
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.675543785095215,
      "learning_rate": 0.0005090315026477703,
      "loss": 3.2174,
      "step": 58665
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.1099705696105957,
      "learning_rate": 0.0005090285684967715,
      "loss": 2.7709,
      "step": 58666
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.424946665763855,
      "learning_rate": 0.0005090256343069105,
      "loss": 3.281,
      "step": 58667
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3331712484359741,
      "learning_rate": 0.0005090227000781875,
      "loss": 3.2778,
      "step": 58668
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.116405963897705,
      "learning_rate": 0.0005090197658106033,
      "loss": 3.0784,
      "step": 58669
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3697304725646973,
      "learning_rate": 0.0005090168315041584,
      "loss": 3.0182,
      "step": 58670
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.413766860961914,
      "learning_rate": 0.0005090138971588533,
      "loss": 3.2061,
      "step": 58671
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6581199169158936,
      "learning_rate": 0.0005090109627746886,
      "loss": 2.9753,
      "step": 58672
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.10835862159729,
      "learning_rate": 0.0005090080283516649,
      "loss": 2.802,
      "step": 58673
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5671950578689575,
      "learning_rate": 0.0005090050938897825,
      "loss": 2.8503,
      "step": 58674
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5192664861679077,
      "learning_rate": 0.0005090021593890422,
      "loss": 3.1067,
      "step": 58675
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.496242880821228,
      "learning_rate": 0.0005089992248494444,
      "loss": 2.8099,
      "step": 58676
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.9319612979888916,
      "learning_rate": 0.0005089962902709897,
      "loss": 2.9747,
      "step": 58677
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.290675401687622,
      "learning_rate": 0.0005089933556536787,
      "loss": 3.048,
      "step": 58678
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5823720693588257,
      "learning_rate": 0.0005089904209975119,
      "loss": 3.1094,
      "step": 58679
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.295166492462158,
      "learning_rate": 0.0005089874863024897,
      "loss": 3.0495,
      "step": 58680
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.7311205863952637,
      "learning_rate": 0.0005089845515686129,
      "loss": 2.9965,
      "step": 58681
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.7633538246154785,
      "learning_rate": 0.000508981616795882,
      "loss": 3.211,
      "step": 58682
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.54551362991333,
      "learning_rate": 0.0005089786819842974,
      "loss": 3.1054,
      "step": 58683
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.98201584815979,
      "learning_rate": 0.0005089757471338597,
      "loss": 2.9604,
      "step": 58684
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.371199131011963,
      "learning_rate": 0.0005089728122445695,
      "loss": 2.8462,
      "step": 58685
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2998580932617188,
      "learning_rate": 0.0005089698773164273,
      "loss": 2.8221,
      "step": 58686
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.647386908531189,
      "learning_rate": 0.0005089669423494335,
      "loss": 2.9272,
      "step": 58687
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.312295913696289,
      "learning_rate": 0.0005089640073435889,
      "loss": 3.0573,
      "step": 58688
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.541839838027954,
      "learning_rate": 0.000508961072298894,
      "loss": 2.8341,
      "step": 58689
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.4476099014282227,
      "learning_rate": 0.0005089581372153493,
      "loss": 3.0062,
      "step": 58690
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.363943099975586,
      "learning_rate": 0.0005089552020929553,
      "loss": 2.9752,
      "step": 58691
    },
    {
      "epoch": 0.76,
      "grad_norm": 3.074960708618164,
      "learning_rate": 0.0005089522669317126,
      "loss": 2.8142,
      "step": 58692
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.49743914604187,
      "learning_rate": 0.0005089493317316217,
      "loss": 2.9944,
      "step": 58693
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.089362382888794,
      "learning_rate": 0.0005089463964926832,
      "loss": 2.9339,
      "step": 58694
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5678397417068481,
      "learning_rate": 0.0005089434612148976,
      "loss": 3.0464,
      "step": 58695
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.8364970684051514,
      "learning_rate": 0.0005089405258982656,
      "loss": 3.0468,
      "step": 58696
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.793039321899414,
      "learning_rate": 0.0005089375905427874,
      "loss": 2.9934,
      "step": 58697
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5126354694366455,
      "learning_rate": 0.000508934655148464,
      "loss": 2.9994,
      "step": 58698
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.147252321243286,
      "learning_rate": 0.0005089317197152955,
      "loss": 3.1746,
      "step": 58699
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.4569218158721924,
      "learning_rate": 0.0005089287842432826,
      "loss": 3.0488,
      "step": 58700
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4744465351104736,
      "learning_rate": 0.000508925848732426,
      "loss": 3.1165,
      "step": 58701
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4837385416030884,
      "learning_rate": 0.0005089229131827262,
      "loss": 2.9859,
      "step": 58702
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.416088342666626,
      "learning_rate": 0.0005089199775941837,
      "loss": 3.1612,
      "step": 58703
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5356460809707642,
      "learning_rate": 0.000508917041966799,
      "loss": 2.879,
      "step": 58704
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5321933031082153,
      "learning_rate": 0.0005089141063005726,
      "loss": 3.1192,
      "step": 58705
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.744168758392334,
      "learning_rate": 0.0005089111705955051,
      "loss": 3.0256,
      "step": 58706
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5197105407714844,
      "learning_rate": 0.0005089082348515971,
      "loss": 3.0659,
      "step": 58707
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.28125,
      "learning_rate": 0.0005089052990688491,
      "loss": 2.9671,
      "step": 58708
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.537360191345215,
      "learning_rate": 0.0005089023632472618,
      "loss": 2.8789,
      "step": 58709
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7450790405273438,
      "learning_rate": 0.0005088994273868355,
      "loss": 2.994,
      "step": 58710
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3593180179595947,
      "learning_rate": 0.0005088964914875709,
      "loss": 2.8712,
      "step": 58711
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9293760061264038,
      "learning_rate": 0.0005088935555494685,
      "loss": 3.0451,
      "step": 58712
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.667368769645691,
      "learning_rate": 0.0005088906195725288,
      "loss": 2.9006,
      "step": 58713
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6210395097732544,
      "learning_rate": 0.0005088876835567524,
      "loss": 3.0561,
      "step": 58714
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7021889686584473,
      "learning_rate": 0.0005088847475021399,
      "loss": 3.1587,
      "step": 58715
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.722403883934021,
      "learning_rate": 0.0005088818114086917,
      "loss": 2.9441,
      "step": 58716
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2702670097351074,
      "learning_rate": 0.0005088788752764084,
      "loss": 3.0282,
      "step": 58717
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4298161268234253,
      "learning_rate": 0.0005088759391052906,
      "loss": 2.8565,
      "step": 58718
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3881531953811646,
      "learning_rate": 0.0005088730028953389,
      "loss": 3.0416,
      "step": 58719
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9516812562942505,
      "learning_rate": 0.0005088700666465536,
      "loss": 2.9865,
      "step": 58720
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.56743586063385,
      "learning_rate": 0.0005088671303589356,
      "loss": 3.2501,
      "step": 58721
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5503119230270386,
      "learning_rate": 0.0005088641940324852,
      "loss": 2.8211,
      "step": 58722
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.5711259841918945,
      "learning_rate": 0.000508861257667203,
      "loss": 3.1625,
      "step": 58723
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9258825778961182,
      "learning_rate": 0.0005088583212630895,
      "loss": 3.0348,
      "step": 58724
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6899923086166382,
      "learning_rate": 0.0005088553848201455,
      "loss": 3.0259,
      "step": 58725
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.4775044918060303,
      "learning_rate": 0.000508852448338371,
      "loss": 2.9413,
      "step": 58726
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6230489015579224,
      "learning_rate": 0.0005088495118177671,
      "loss": 3.0371,
      "step": 58727
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4504402875900269,
      "learning_rate": 0.0005088465752583341,
      "loss": 3.0424,
      "step": 58728
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9735543727874756,
      "learning_rate": 0.0005088436386600726,
      "loss": 2.9099,
      "step": 58729
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4788728952407837,
      "learning_rate": 0.0005088407020229831,
      "loss": 3.1868,
      "step": 58730
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7963536977767944,
      "learning_rate": 0.0005088377653470663,
      "loss": 2.8637,
      "step": 58731
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.3240714073181152,
      "learning_rate": 0.0005088348286323224,
      "loss": 3.1046,
      "step": 58732
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7492759227752686,
      "learning_rate": 0.0005088318918787522,
      "loss": 3.1775,
      "step": 58733
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.435770034790039,
      "learning_rate": 0.0005088289550863564,
      "loss": 2.9263,
      "step": 58734
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.8706213235855103,
      "learning_rate": 0.0005088260182551351,
      "loss": 3.1763,
      "step": 58735
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7508742809295654,
      "learning_rate": 0.0005088230813850893,
      "loss": 3.2184,
      "step": 58736
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5763217210769653,
      "learning_rate": 0.0005088201444762192,
      "loss": 2.9864,
      "step": 58737
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.191835880279541,
      "learning_rate": 0.0005088172075285256,
      "loss": 3.0384,
      "step": 58738
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.9541559219360352,
      "learning_rate": 0.0005088142705420088,
      "loss": 3.1615,
      "step": 58739
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.551872730255127,
      "learning_rate": 0.0005088113335166696,
      "loss": 3.2001,
      "step": 58740
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6167705059051514,
      "learning_rate": 0.0005088083964525084,
      "loss": 3.2178,
      "step": 58741
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5643212795257568,
      "learning_rate": 0.0005088054593495258,
      "loss": 3.0173,
      "step": 58742
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.7588776350021362,
      "learning_rate": 0.0005088025222077224,
      "loss": 3.0835,
      "step": 58743
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.372629404067993,
      "learning_rate": 0.0005087995850270985,
      "loss": 3.0344,
      "step": 58744
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.959122657775879,
      "learning_rate": 0.000508796647807655,
      "loss": 3.0083,
      "step": 58745
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.151705503463745,
      "learning_rate": 0.0005087937105493921,
      "loss": 3.146,
      "step": 58746
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.825519323348999,
      "learning_rate": 0.0005087907732523106,
      "loss": 3.0401,
      "step": 58747
    },
    {
      "epoch": 0.76,
      "grad_norm": 2.2674100399017334,
      "learning_rate": 0.000508787835916411,
      "loss": 2.9409,
      "step": 58748
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.887084722518921,
      "learning_rate": 0.0005087848985416936,
      "loss": 2.77,
      "step": 58749
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.5048465728759766,
      "learning_rate": 0.0005087819611281593,
      "loss": 2.9928,
      "step": 58750
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.621294379234314,
      "learning_rate": 0.0005087790236758085,
      "loss": 3.0932,
      "step": 58751
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.4063310623168945,
      "learning_rate": 0.0005087760861846416,
      "loss": 3.0892,
      "step": 58752
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.085394859313965,
      "learning_rate": 0.0005087731486546595,
      "loss": 2.9322,
      "step": 58753
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.488071918487549,
      "learning_rate": 0.0005087702110858623,
      "loss": 2.8265,
      "step": 58754
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8530603647232056,
      "learning_rate": 0.0005087672734782509,
      "loss": 3.2835,
      "step": 58755
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.13576602935791,
      "learning_rate": 0.0005087643358318258,
      "loss": 3.1267,
      "step": 58756
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7514700889587402,
      "learning_rate": 0.0005087613981465874,
      "loss": 3.3121,
      "step": 58757
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8299411535263062,
      "learning_rate": 0.0005087584604225362,
      "loss": 2.9861,
      "step": 58758
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.038072347640991,
      "learning_rate": 0.000508755522659673,
      "loss": 3.201,
      "step": 58759
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7745959758758545,
      "learning_rate": 0.0005087525848579982,
      "loss": 3.0303,
      "step": 58760
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6089359521865845,
      "learning_rate": 0.0005087496470175123,
      "loss": 3.0825,
      "step": 58761
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4844356775283813,
      "learning_rate": 0.000508746709138216,
      "loss": 3.3296,
      "step": 58762
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5114250183105469,
      "learning_rate": 0.0005087437712201096,
      "loss": 2.9797,
      "step": 58763
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6003392934799194,
      "learning_rate": 0.0005087408332631938,
      "loss": 3.0682,
      "step": 58764
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7785497903823853,
      "learning_rate": 0.0005087378952674693,
      "loss": 3.2753,
      "step": 58765
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7416408061981201,
      "learning_rate": 0.0005087349572329363,
      "loss": 3.1674,
      "step": 58766
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.737997055053711,
      "learning_rate": 0.0005087320191595957,
      "loss": 2.9642,
      "step": 58767
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7079362869262695,
      "learning_rate": 0.0005087290810474478,
      "loss": 3.0321,
      "step": 58768
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.866409182548523,
      "learning_rate": 0.0005087261428964932,
      "loss": 2.9837,
      "step": 58769
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.0707199573516846,
      "learning_rate": 0.0005087232047067326,
      "loss": 3.1126,
      "step": 58770
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.017310380935669,
      "learning_rate": 0.0005087202664781662,
      "loss": 3.0233,
      "step": 58771
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9045453071594238,
      "learning_rate": 0.0005087173282107949,
      "loss": 2.915,
      "step": 58772
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.897885799407959,
      "learning_rate": 0.0005087143899046191,
      "loss": 3.1368,
      "step": 58773
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5981266498565674,
      "learning_rate": 0.0005087114515596394,
      "loss": 3.2445,
      "step": 58774
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4531476497650146,
      "learning_rate": 0.0005087085131758562,
      "loss": 2.9919,
      "step": 58775
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.8486227989196777,
      "learning_rate": 0.0005087055747532701,
      "loss": 2.9867,
      "step": 58776
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6522928476333618,
      "learning_rate": 0.0005087026362918818,
      "loss": 3.1682,
      "step": 58777
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.764070987701416,
      "learning_rate": 0.0005086996977916918,
      "loss": 2.9265,
      "step": 58778
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7774484157562256,
      "learning_rate": 0.0005086967592527004,
      "loss": 3.162,
      "step": 58779
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.746774673461914,
      "learning_rate": 0.0005086938206749085,
      "loss": 2.9241,
      "step": 58780
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8983205556869507,
      "learning_rate": 0.0005086908820583164,
      "loss": 2.9789,
      "step": 58781
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7827857732772827,
      "learning_rate": 0.0005086879434029248,
      "loss": 2.7287,
      "step": 58782
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8806428909301758,
      "learning_rate": 0.0005086850047087341,
      "loss": 3.2897,
      "step": 58783
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7812258005142212,
      "learning_rate": 0.0005086820659757449,
      "loss": 2.9097,
      "step": 58784
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7946248054504395,
      "learning_rate": 0.0005086791272039578,
      "loss": 2.7765,
      "step": 58785
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.6523168087005615,
      "learning_rate": 0.0005086761883933733,
      "loss": 2.7886,
      "step": 58786
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.446262240409851,
      "learning_rate": 0.000508673249543992,
      "loss": 3.224,
      "step": 58787
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.199939727783203,
      "learning_rate": 0.0005086703106558144,
      "loss": 3.0026,
      "step": 58788
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.244682550430298,
      "learning_rate": 0.0005086673717288409,
      "loss": 3.3141,
      "step": 58789
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8154456615447998,
      "learning_rate": 0.0005086644327630724,
      "loss": 3.0231,
      "step": 58790
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.801091194152832,
      "learning_rate": 0.0005086614937585091,
      "loss": 3.0111,
      "step": 58791
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7963218688964844,
      "learning_rate": 0.0005086585547151518,
      "loss": 3.0752,
      "step": 58792
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4487242698669434,
      "learning_rate": 0.0005086556156330009,
      "loss": 2.8013,
      "step": 58793
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9600862264633179,
      "learning_rate": 0.0005086526765120568,
      "loss": 2.7746,
      "step": 58794
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6330482959747314,
      "learning_rate": 0.0005086497373523205,
      "loss": 3.1394,
      "step": 58795
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.696535348892212,
      "learning_rate": 0.0005086467981537922,
      "loss": 3.0614,
      "step": 58796
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7949905395507812,
      "learning_rate": 0.0005086438589164725,
      "loss": 2.8993,
      "step": 58797
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.715083360671997,
      "learning_rate": 0.000508640919640362,
      "loss": 3.1157,
      "step": 58798
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8214530944824219,
      "learning_rate": 0.000508637980325461,
      "loss": 2.9916,
      "step": 58799
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5688170194625854,
      "learning_rate": 0.0005086350409717706,
      "loss": 2.8814,
      "step": 58800
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.67001211643219,
      "learning_rate": 0.0005086321015792908,
      "loss": 3.1841,
      "step": 58801
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8376551866531372,
      "learning_rate": 0.0005086291621480225,
      "loss": 2.9798,
      "step": 58802
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.5245189666748047,
      "learning_rate": 0.0005086262226779659,
      "loss": 3.2212,
      "step": 58803
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9082505702972412,
      "learning_rate": 0.0005086232831691219,
      "loss": 3.0718,
      "step": 58804
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8254002332687378,
      "learning_rate": 0.0005086203436214907,
      "loss": 3.0429,
      "step": 58805
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3585444688796997,
      "learning_rate": 0.0005086174040350732,
      "loss": 3.1287,
      "step": 58806
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.8463432788848877,
      "learning_rate": 0.0005086144644098698,
      "loss": 3.0859,
      "step": 58807
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7659063339233398,
      "learning_rate": 0.000508611524745881,
      "loss": 3.0604,
      "step": 58808
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1002919673919678,
      "learning_rate": 0.0005086085850431074,
      "loss": 2.8267,
      "step": 58809
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4330639839172363,
      "learning_rate": 0.0005086056453015496,
      "loss": 3.0715,
      "step": 58810
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.454841136932373,
      "learning_rate": 0.0005086027055212079,
      "loss": 2.9763,
      "step": 58811
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9629480838775635,
      "learning_rate": 0.0005085997657020831,
      "loss": 2.9595,
      "step": 58812
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6875852346420288,
      "learning_rate": 0.0005085968258441758,
      "loss": 2.9877,
      "step": 58813
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7279036045074463,
      "learning_rate": 0.0005085938859474862,
      "loss": 3.0698,
      "step": 58814
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6639000177383423,
      "learning_rate": 0.0005085909460120151,
      "loss": 2.899,
      "step": 58815
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4013028144836426,
      "learning_rate": 0.0005085880060377631,
      "loss": 2.8575,
      "step": 58816
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1585237979888916,
      "learning_rate": 0.0005085850660247306,
      "loss": 2.9016,
      "step": 58817
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6026287078857422,
      "learning_rate": 0.0005085821259729181,
      "loss": 3.4003,
      "step": 58818
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.839606761932373,
      "learning_rate": 0.0005085791858823263,
      "loss": 2.7758,
      "step": 58819
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.676913857460022,
      "learning_rate": 0.0005085762457529558,
      "loss": 3.0324,
      "step": 58820
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7792316675186157,
      "learning_rate": 0.0005085733055848069,
      "loss": 2.896,
      "step": 58821
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.052605628967285,
      "learning_rate": 0.0005085703653778804,
      "loss": 3.0221,
      "step": 58822
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3936734199523926,
      "learning_rate": 0.0005085674251321768,
      "loss": 3.2212,
      "step": 58823
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8773505687713623,
      "learning_rate": 0.0005085644848476964,
      "loss": 3.0283,
      "step": 58824
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8023775815963745,
      "learning_rate": 0.00050856154452444,
      "loss": 2.9698,
      "step": 58825
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.736214518547058,
      "learning_rate": 0.000508558604162408,
      "loss": 3.1607,
      "step": 58826
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9951919317245483,
      "learning_rate": 0.0005085556637616012,
      "loss": 3.101,
      "step": 58827
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1319425106048584,
      "learning_rate": 0.0005085527233220197,
      "loss": 3.257,
      "step": 58828
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8797903060913086,
      "learning_rate": 0.0005085497828436645,
      "loss": 2.9521,
      "step": 58829
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4729429483413696,
      "learning_rate": 0.0005085468423265359,
      "loss": 3.4106,
      "step": 58830
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.682086706161499,
      "learning_rate": 0.0005085439017706346,
      "loss": 3.0719,
      "step": 58831
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.510068655014038,
      "learning_rate": 0.000508540961175961,
      "loss": 2.8879,
      "step": 58832
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.067676544189453,
      "learning_rate": 0.0005085380205425156,
      "loss": 2.8207,
      "step": 58833
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.010131359100342,
      "learning_rate": 0.0005085350798702991,
      "loss": 3.2523,
      "step": 58834
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1229140758514404,
      "learning_rate": 0.000508532139159312,
      "loss": 3.1531,
      "step": 58835
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9377189874649048,
      "learning_rate": 0.0005085291984095549,
      "loss": 3.231,
      "step": 58836
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2988357543945312,
      "learning_rate": 0.0005085262576210282,
      "loss": 2.779,
      "step": 58837
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4713400602340698,
      "learning_rate": 0.0005085233167937326,
      "loss": 2.9114,
      "step": 58838
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8236379623413086,
      "learning_rate": 0.0005085203759276685,
      "loss": 3.3321,
      "step": 58839
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1582558155059814,
      "learning_rate": 0.0005085174350228365,
      "loss": 2.6082,
      "step": 58840
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7691744565963745,
      "learning_rate": 0.0005085144940792374,
      "loss": 2.9577,
      "step": 58841
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4560446739196777,
      "learning_rate": 0.0005085115530968714,
      "loss": 3.0915,
      "step": 58842
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7197155952453613,
      "learning_rate": 0.000508508612075739,
      "loss": 2.9985,
      "step": 58843
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3712821006774902,
      "learning_rate": 0.0005085056710158411,
      "loss": 2.9329,
      "step": 58844
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5131596326828003,
      "learning_rate": 0.000508502729917178,
      "loss": 3.2089,
      "step": 58845
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9618605375289917,
      "learning_rate": 0.0005084997887797503,
      "loss": 3.0153,
      "step": 58846
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8473557233810425,
      "learning_rate": 0.0005084968476035585,
      "loss": 2.9922,
      "step": 58847
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4842710494995117,
      "learning_rate": 0.0005084939063886033,
      "loss": 2.909,
      "step": 58848
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.270636796951294,
      "learning_rate": 0.0005084909651348852,
      "loss": 3.2094,
      "step": 58849
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5966072082519531,
      "learning_rate": 0.0005084880238424046,
      "loss": 2.8806,
      "step": 58850
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5265699625015259,
      "learning_rate": 0.0005084850825111621,
      "loss": 2.8855,
      "step": 58851
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6132491827011108,
      "learning_rate": 0.0005084821411411583,
      "loss": 2.9412,
      "step": 58852
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4103580713272095,
      "learning_rate": 0.0005084791997323939,
      "loss": 3.26,
      "step": 58853
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4480150938034058,
      "learning_rate": 0.0005084762582848691,
      "loss": 3.1968,
      "step": 58854
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9396660327911377,
      "learning_rate": 0.0005084733167985846,
      "loss": 2.8643,
      "step": 58855
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8893166780471802,
      "learning_rate": 0.0005084703752735411,
      "loss": 3.014,
      "step": 58856
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6703217029571533,
      "learning_rate": 0.000508467433709739,
      "loss": 2.7538,
      "step": 58857
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5865576267242432,
      "learning_rate": 0.0005084644921071788,
      "loss": 2.8608,
      "step": 58858
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3146077394485474,
      "learning_rate": 0.0005084615504658612,
      "loss": 3.0809,
      "step": 58859
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6715959310531616,
      "learning_rate": 0.0005084586087857866,
      "loss": 3.0349,
      "step": 58860
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1237857341766357,
      "learning_rate": 0.0005084556670669557,
      "loss": 2.9827,
      "step": 58861
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0528392791748047,
      "learning_rate": 0.0005084527253093688,
      "loss": 3.2516,
      "step": 58862
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.50877845287323,
      "learning_rate": 0.0005084497835130267,
      "loss": 3.0483,
      "step": 58863
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5189827680587769,
      "learning_rate": 0.0005084468416779298,
      "loss": 3.1322,
      "step": 58864
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.019462823867798,
      "learning_rate": 0.0005084438998040788,
      "loss": 3.1852,
      "step": 58865
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4409587383270264,
      "learning_rate": 0.0005084409578914741,
      "loss": 2.9966,
      "step": 58866
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8449643850326538,
      "learning_rate": 0.0005084380159401164,
      "loss": 3.2184,
      "step": 58867
    },
    {
      "epoch": 0.77,
      "grad_norm": 4.148604393005371,
      "learning_rate": 0.000508435073950006,
      "loss": 3.1089,
      "step": 58868
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.5574982166290283,
      "learning_rate": 0.0005084321319211436,
      "loss": 3.0441,
      "step": 58869
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.6102488040924072,
      "learning_rate": 0.0005084291898535296,
      "loss": 2.9348,
      "step": 58870
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.194013833999634,
      "learning_rate": 0.0005084262477471649,
      "loss": 3.129,
      "step": 58871
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.9910683631896973,
      "learning_rate": 0.0005084233056020498,
      "loss": 3.0415,
      "step": 58872
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.148789882659912,
      "learning_rate": 0.0005084203634181847,
      "loss": 3.0511,
      "step": 58873
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.374674677848816,
      "learning_rate": 0.0005084174211955704,
      "loss": 2.9781,
      "step": 58874
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0003504753112793,
      "learning_rate": 0.0005084144789342075,
      "loss": 3.0059,
      "step": 58875
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.6877143383026123,
      "learning_rate": 0.0005084115366340962,
      "loss": 2.9404,
      "step": 58876
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.626103401184082,
      "learning_rate": 0.0005084085942952375,
      "loss": 3.2403,
      "step": 58877
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4223735332489014,
      "learning_rate": 0.0005084056519176316,
      "loss": 3.1704,
      "step": 58878
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6986041069030762,
      "learning_rate": 0.000508402709501279,
      "loss": 3.0622,
      "step": 58879
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.666346549987793,
      "learning_rate": 0.0005083997670461806,
      "loss": 3.0721,
      "step": 58880
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.5514822006225586,
      "learning_rate": 0.0005083968245523367,
      "loss": 3.1989,
      "step": 58881
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5062633752822876,
      "learning_rate": 0.0005083938820197479,
      "loss": 3.1161,
      "step": 58882
    },
    {
      "epoch": 0.77,
      "grad_norm": 4.197857856750488,
      "learning_rate": 0.0005083909394484147,
      "loss": 3.2326,
      "step": 58883
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.045168161392212,
      "learning_rate": 0.0005083879968383377,
      "loss": 2.91,
      "step": 58884
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.9238407611846924,
      "learning_rate": 0.0005083850541895175,
      "loss": 2.9791,
      "step": 58885
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7978346347808838,
      "learning_rate": 0.0005083821115019545,
      "loss": 3.1712,
      "step": 58886
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.6747474670410156,
      "learning_rate": 0.0005083791687756494,
      "loss": 3.2147,
      "step": 58887
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.2374444007873535,
      "learning_rate": 0.0005083762260106026,
      "loss": 2.8759,
      "step": 58888
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8293139934539795,
      "learning_rate": 0.0005083732832068148,
      "loss": 3.1492,
      "step": 58889
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8749802112579346,
      "learning_rate": 0.0005083703403642865,
      "loss": 3.0373,
      "step": 58890
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.5538747310638428,
      "learning_rate": 0.0005083673974830182,
      "loss": 3.1678,
      "step": 58891
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.538499116897583,
      "learning_rate": 0.0005083644545630104,
      "loss": 3.0303,
      "step": 58892
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.521466612815857,
      "learning_rate": 0.0005083615116042638,
      "loss": 3.0712,
      "step": 58893
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3340455293655396,
      "learning_rate": 0.0005083585686067787,
      "loss": 2.9989,
      "step": 58894
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3542261123657227,
      "learning_rate": 0.000508355625570556,
      "loss": 2.984,
      "step": 58895
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5658639669418335,
      "learning_rate": 0.0005083526824955961,
      "loss": 2.8997,
      "step": 58896
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2869036197662354,
      "learning_rate": 0.0005083497393818993,
      "loss": 3.0251,
      "step": 58897
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0098745822906494,
      "learning_rate": 0.0005083467962294665,
      "loss": 3.2366,
      "step": 58898
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8222512006759644,
      "learning_rate": 0.000508343853038298,
      "loss": 3.0382,
      "step": 58899
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3989174365997314,
      "learning_rate": 0.0005083409098083944,
      "loss": 3.1775,
      "step": 58900
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8972065448760986,
      "learning_rate": 0.0005083379665397564,
      "loss": 3.1937,
      "step": 58901
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4502723217010498,
      "learning_rate": 0.0005083350232323844,
      "loss": 3.0266,
      "step": 58902
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4878456592559814,
      "learning_rate": 0.000508332079886279,
      "loss": 2.9869,
      "step": 58903
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.286752700805664,
      "learning_rate": 0.0005083291365014408,
      "loss": 3.0897,
      "step": 58904
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8860422372817993,
      "learning_rate": 0.0005083261930778702,
      "loss": 3.1118,
      "step": 58905
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4527702331542969,
      "learning_rate": 0.0005083232496155678,
      "loss": 2.9631,
      "step": 58906
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6426968574523926,
      "learning_rate": 0.0005083203061145343,
      "loss": 3.0816,
      "step": 58907
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.1863937377929688,
      "learning_rate": 0.00050831736257477,
      "loss": 3.095,
      "step": 58908
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.604384183883667,
      "learning_rate": 0.0005083144189962756,
      "loss": 3.1266,
      "step": 58909
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9627989530563354,
      "learning_rate": 0.0005083114753790516,
      "loss": 3.0214,
      "step": 58910
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4519386291503906,
      "learning_rate": 0.0005083085317230986,
      "loss": 3.0535,
      "step": 58911
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.5079076290130615,
      "learning_rate": 0.0005083055880284171,
      "loss": 3.0767,
      "step": 58912
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.2210500240325928,
      "learning_rate": 0.0005083026442950077,
      "loss": 3.1343,
      "step": 58913
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.636776328086853,
      "learning_rate": 0.0005082997005228709,
      "loss": 3.083,
      "step": 58914
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6282706260681152,
      "learning_rate": 0.0005082967567120073,
      "loss": 2.8593,
      "step": 58915
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0605263710021973,
      "learning_rate": 0.0005082938128624172,
      "loss": 3.2423,
      "step": 58916
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.2751801013946533,
      "learning_rate": 0.0005082908689741014,
      "loss": 3.0574,
      "step": 58917
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9930251836776733,
      "learning_rate": 0.0005082879250470605,
      "loss": 2.7308,
      "step": 58918
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0618157386779785,
      "learning_rate": 0.000508284981081295,
      "loss": 2.9193,
      "step": 58919
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.2313241958618164,
      "learning_rate": 0.0005082820370768053,
      "loss": 3.2501,
      "step": 58920
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7415313720703125,
      "learning_rate": 0.000508279093033592,
      "loss": 3.0763,
      "step": 58921
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4137598276138306,
      "learning_rate": 0.0005082761489516557,
      "loss": 3.3431,
      "step": 58922
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0597548484802246,
      "learning_rate": 0.000508273204830997,
      "loss": 2.8933,
      "step": 58923
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7657153606414795,
      "learning_rate": 0.0005082702606716162,
      "loss": 3.2393,
      "step": 58924
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7513821125030518,
      "learning_rate": 0.0005082673164735144,
      "loss": 3.0601,
      "step": 58925
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5327599048614502,
      "learning_rate": 0.0005082643722366915,
      "loss": 3.328,
      "step": 58926
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9980090856552124,
      "learning_rate": 0.0005082614279611484,
      "loss": 2.9329,
      "step": 58927
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4268879890441895,
      "learning_rate": 0.0005082584836468854,
      "loss": 2.9305,
      "step": 58928
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5051060914993286,
      "learning_rate": 0.0005082555392939034,
      "loss": 3.1256,
      "step": 58929
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6280567646026611,
      "learning_rate": 0.0005082525949022027,
      "loss": 2.852,
      "step": 58930
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.7875094413757324,
      "learning_rate": 0.0005082496504717839,
      "loss": 2.8429,
      "step": 58931
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.082699775695801,
      "learning_rate": 0.0005082467060026476,
      "loss": 3.1604,
      "step": 58932
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.2319438457489014,
      "learning_rate": 0.0005082437614947942,
      "loss": 3.1519,
      "step": 58933
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.053568124771118,
      "learning_rate": 0.0005082408169482244,
      "loss": 2.9915,
      "step": 58934
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2773761749267578,
      "learning_rate": 0.0005082378723629389,
      "loss": 2.8722,
      "step": 58935
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.091594696044922,
      "learning_rate": 0.0005082349277389379,
      "loss": 3.1269,
      "step": 58936
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.0472936630249023,
      "learning_rate": 0.000508231983076222,
      "loss": 3.0646,
      "step": 58937
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5818672180175781,
      "learning_rate": 0.000508229038374792,
      "loss": 3.4511,
      "step": 58938
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7820944786071777,
      "learning_rate": 0.0005082260936346482,
      "loss": 3.147,
      "step": 58939
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5031195878982544,
      "learning_rate": 0.0005082231488557912,
      "loss": 3.1021,
      "step": 58940
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5252652168273926,
      "learning_rate": 0.0005082202040382217,
      "loss": 3.2392,
      "step": 58941
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8911241292953491,
      "learning_rate": 0.0005082172591819401,
      "loss": 3.2238,
      "step": 58942
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.597328782081604,
      "learning_rate": 0.0005082143142869469,
      "loss": 3.122,
      "step": 58943
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.599454402923584,
      "learning_rate": 0.0005082113693532428,
      "loss": 3.2898,
      "step": 58944
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3614678382873535,
      "learning_rate": 0.0005082084243808283,
      "loss": 3.0571,
      "step": 58945
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7838342189788818,
      "learning_rate": 0.0005082054793697039,
      "loss": 3.2573,
      "step": 58946
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.068303346633911,
      "learning_rate": 0.0005082025343198702,
      "loss": 2.9977,
      "step": 58947
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.716516137123108,
      "learning_rate": 0.0005081995892313277,
      "loss": 2.8656,
      "step": 58948
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.0394673347473145,
      "learning_rate": 0.0005081966441040769,
      "loss": 3.043,
      "step": 58949
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.2687973976135254,
      "learning_rate": 0.0005081936989381185,
      "loss": 3.0293,
      "step": 58950
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9814006090164185,
      "learning_rate": 0.0005081907537334529,
      "loss": 2.827,
      "step": 58951
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6211230754852295,
      "learning_rate": 0.0005081878084900807,
      "loss": 3.0257,
      "step": 58952
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.2260284423828125,
      "learning_rate": 0.0005081848632080024,
      "loss": 3.1967,
      "step": 58953
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.528684377670288,
      "learning_rate": 0.0005081819178872188,
      "loss": 3.0314,
      "step": 58954
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.2169272899627686,
      "learning_rate": 0.0005081789725277301,
      "loss": 3.2075,
      "step": 58955
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7011311054229736,
      "learning_rate": 0.0005081760271295371,
      "loss": 3.2608,
      "step": 58956
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.8313474655151367,
      "learning_rate": 0.0005081730816926402,
      "loss": 2.913,
      "step": 58957
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9239819049835205,
      "learning_rate": 0.0005081701362170399,
      "loss": 3.1253,
      "step": 58958
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.656293272972107,
      "learning_rate": 0.0005081671907027369,
      "loss": 2.9923,
      "step": 58959
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1425626277923584,
      "learning_rate": 0.0005081642451497319,
      "loss": 2.8761,
      "step": 58960
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6076377630233765,
      "learning_rate": 0.0005081612995580249,
      "loss": 3.0912,
      "step": 58961
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.31736159324646,
      "learning_rate": 0.0005081583539276169,
      "loss": 3.1102,
      "step": 58962
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5212963819503784,
      "learning_rate": 0.0005081554082585084,
      "loss": 2.8099,
      "step": 58963
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6194589138031006,
      "learning_rate": 0.0005081524625506999,
      "loss": 3.2128,
      "step": 58964
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3070770502090454,
      "learning_rate": 0.0005081495168041918,
      "loss": 3.0177,
      "step": 58965
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.503413438796997,
      "learning_rate": 0.0005081465710189849,
      "loss": 2.811,
      "step": 58966
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5038306713104248,
      "learning_rate": 0.0005081436251950796,
      "loss": 3.1507,
      "step": 58967
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6812756061553955,
      "learning_rate": 0.0005081406793324765,
      "loss": 3.0499,
      "step": 58968
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6817702054977417,
      "learning_rate": 0.000508137733431176,
      "loss": 2.9019,
      "step": 58969
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0072436332702637,
      "learning_rate": 0.0005081347874911789,
      "loss": 3.094,
      "step": 58970
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8330721855163574,
      "learning_rate": 0.0005081318415124854,
      "loss": 3.0529,
      "step": 58971
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6515048742294312,
      "learning_rate": 0.0005081288954950965,
      "loss": 3.0006,
      "step": 58972
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.465591549873352,
      "learning_rate": 0.0005081259494390124,
      "loss": 3.0267,
      "step": 58973
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5248247385025024,
      "learning_rate": 0.0005081230033442338,
      "loss": 2.8243,
      "step": 58974
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1034224033355713,
      "learning_rate": 0.0005081200572107611,
      "loss": 2.7441,
      "step": 58975
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7822086811065674,
      "learning_rate": 0.0005081171110385951,
      "loss": 2.9161,
      "step": 58976
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9628322124481201,
      "learning_rate": 0.0005081141648277361,
      "loss": 3.1266,
      "step": 58977
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9485455751419067,
      "learning_rate": 0.0005081112185781848,
      "loss": 3.0801,
      "step": 58978
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8538484573364258,
      "learning_rate": 0.0005081082722899416,
      "loss": 3.0475,
      "step": 58979
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.094438076019287,
      "learning_rate": 0.0005081053259630074,
      "loss": 2.9171,
      "step": 58980
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5848673582077026,
      "learning_rate": 0.0005081023795973822,
      "loss": 2.7654,
      "step": 58981
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9511722326278687,
      "learning_rate": 0.000508099433193067,
      "loss": 2.6525,
      "step": 58982
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7413991689682007,
      "learning_rate": 0.0005080964867500621,
      "loss": 2.9427,
      "step": 58983
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5592520236968994,
      "learning_rate": 0.0005080935402683682,
      "loss": 2.9281,
      "step": 58984
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.710219383239746,
      "learning_rate": 0.0005080905937479858,
      "loss": 3.1253,
      "step": 58985
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5642701387405396,
      "learning_rate": 0.0005080876471889154,
      "loss": 3.0515,
      "step": 58986
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.578084945678711,
      "learning_rate": 0.0005080847005911576,
      "loss": 2.9754,
      "step": 58987
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.112088918685913,
      "learning_rate": 0.0005080817539547129,
      "loss": 2.8637,
      "step": 58988
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4313526153564453,
      "learning_rate": 0.0005080788072795817,
      "loss": 2.8407,
      "step": 58989
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4518898725509644,
      "learning_rate": 0.000508075860565765,
      "loss": 3.2475,
      "step": 58990
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7485719919204712,
      "learning_rate": 0.0005080729138132629,
      "loss": 3.1255,
      "step": 58991
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2999202013015747,
      "learning_rate": 0.0005080699670220762,
      "loss": 2.9532,
      "step": 58992
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7910542488098145,
      "learning_rate": 0.0005080670201922054,
      "loss": 3.021,
      "step": 58993
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.606296420097351,
      "learning_rate": 0.0005080640733236509,
      "loss": 3.3345,
      "step": 58994
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4066376686096191,
      "learning_rate": 0.0005080611264164134,
      "loss": 3.0992,
      "step": 58995
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5802315473556519,
      "learning_rate": 0.0005080581794704935,
      "loss": 3.1392,
      "step": 58996
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.786290168762207,
      "learning_rate": 0.0005080552324858915,
      "loss": 3.0019,
      "step": 58997
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6475703716278076,
      "learning_rate": 0.0005080522854626082,
      "loss": 2.8908,
      "step": 58998
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.501272439956665,
      "learning_rate": 0.0005080493384006441,
      "loss": 2.9812,
      "step": 58999
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.792378544807434,
      "learning_rate": 0.0005080463912999996,
      "loss": 3.1044,
      "step": 59000
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4510126113891602,
      "learning_rate": 0.0005080434441606754,
      "loss": 2.8317,
      "step": 59001
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7451168298721313,
      "learning_rate": 0.000508040496982672,
      "loss": 3.0846,
      "step": 59002
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.279292583465576,
      "learning_rate": 0.00050803754976599,
      "loss": 2.9855,
      "step": 59003
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0122997760772705,
      "learning_rate": 0.0005080346025106298,
      "loss": 3.3016,
      "step": 59004
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9170663356781006,
      "learning_rate": 0.0005080316552165921,
      "loss": 2.9007,
      "step": 59005
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5552488565444946,
      "learning_rate": 0.0005080287078838774,
      "loss": 2.9676,
      "step": 59006
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5860002040863037,
      "learning_rate": 0.0005080257605124862,
      "loss": 3.0131,
      "step": 59007
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4053150415420532,
      "learning_rate": 0.000508022813102419,
      "loss": 3.0039,
      "step": 59008
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5003734827041626,
      "learning_rate": 0.0005080198656536765,
      "loss": 3.076,
      "step": 59009
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5943964719772339,
      "learning_rate": 0.0005080169181662592,
      "loss": 2.9429,
      "step": 59010
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7205554246902466,
      "learning_rate": 0.0005080139706401676,
      "loss": 2.8864,
      "step": 59011
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6884534358978271,
      "learning_rate": 0.0005080110230754023,
      "loss": 3.1099,
      "step": 59012
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.485912561416626,
      "learning_rate": 0.0005080080754719638,
      "loss": 3.0558,
      "step": 59013
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1016361713409424,
      "learning_rate": 0.0005080051278298527,
      "loss": 3.0467,
      "step": 59014
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.5819308757781982,
      "learning_rate": 0.0005080021801490695,
      "loss": 2.8936,
      "step": 59015
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5465916395187378,
      "learning_rate": 0.0005079992324296148,
      "loss": 3.2261,
      "step": 59016
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.119332790374756,
      "learning_rate": 0.0005079962846714891,
      "loss": 3.076,
      "step": 59017
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.196845531463623,
      "learning_rate": 0.000507993336874693,
      "loss": 3.2021,
      "step": 59018
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7204089164733887,
      "learning_rate": 0.0005079903890392269,
      "loss": 3.043,
      "step": 59019
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6760640144348145,
      "learning_rate": 0.0005079874411650915,
      "loss": 2.9384,
      "step": 59020
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5659533739089966,
      "learning_rate": 0.0005079844932522874,
      "loss": 3.1026,
      "step": 59021
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.43365478515625,
      "learning_rate": 0.0005079815453008149,
      "loss": 3.2953,
      "step": 59022
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9312609434127808,
      "learning_rate": 0.0005079785973106748,
      "loss": 2.9184,
      "step": 59023
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.702555537223816,
      "learning_rate": 0.0005079756492818677,
      "loss": 3.1718,
      "step": 59024
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6610958576202393,
      "learning_rate": 0.0005079727012143938,
      "loss": 3.0417,
      "step": 59025
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.453298568725586,
      "learning_rate": 0.000507969753108254,
      "loss": 2.9755,
      "step": 59026
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.660210371017456,
      "learning_rate": 0.0005079668049634485,
      "loss": 2.9966,
      "step": 59027
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.572090744972229,
      "learning_rate": 0.0005079638567799782,
      "loss": 3.1255,
      "step": 59028
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5318264961242676,
      "learning_rate": 0.0005079609085578433,
      "loss": 2.9659,
      "step": 59029
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6898547410964966,
      "learning_rate": 0.0005079579602970448,
      "loss": 3.0457,
      "step": 59030
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4139964580535889,
      "learning_rate": 0.0005079550119975829,
      "loss": 2.9267,
      "step": 59031
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.362926959991455,
      "learning_rate": 0.0005079520636594581,
      "loss": 2.926,
      "step": 59032
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.870444655418396,
      "learning_rate": 0.0005079491152826713,
      "loss": 3.0558,
      "step": 59033
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9991875886917114,
      "learning_rate": 0.0005079461668672226,
      "loss": 2.9418,
      "step": 59034
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.7078046798706055,
      "learning_rate": 0.000507943218413113,
      "loss": 3.0218,
      "step": 59035
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0971765518188477,
      "learning_rate": 0.0005079402699203427,
      "loss": 3.2889,
      "step": 59036
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8800567388534546,
      "learning_rate": 0.0005079373213889124,
      "loss": 2.8701,
      "step": 59037
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9934816360473633,
      "learning_rate": 0.0005079343728188227,
      "loss": 2.9187,
      "step": 59038
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7434860467910767,
      "learning_rate": 0.0005079314242100741,
      "loss": 2.7724,
      "step": 59039
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6919760704040527,
      "learning_rate": 0.000507928475562667,
      "loss": 3.2997,
      "step": 59040
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.91070556640625,
      "learning_rate": 0.0005079255268766021,
      "loss": 3.007,
      "step": 59041
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3992477655410767,
      "learning_rate": 0.00050792257815188,
      "loss": 2.9656,
      "step": 59042
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4243849515914917,
      "learning_rate": 0.000507919629388501,
      "loss": 3.1957,
      "step": 59043
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8042081594467163,
      "learning_rate": 0.000507916680586466,
      "loss": 3.1955,
      "step": 59044
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6205182075500488,
      "learning_rate": 0.0005079137317457752,
      "loss": 3.032,
      "step": 59045
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9539228677749634,
      "learning_rate": 0.0005079107828664294,
      "loss": 2.9722,
      "step": 59046
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8415077924728394,
      "learning_rate": 0.000507907833948429,
      "loss": 3.0555,
      "step": 59047
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.475778341293335,
      "learning_rate": 0.0005079048849917748,
      "loss": 3.183,
      "step": 59048
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8669637441635132,
      "learning_rate": 0.000507901935996467,
      "loss": 3.1141,
      "step": 59049
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.062696933746338,
      "learning_rate": 0.0005078989869625063,
      "loss": 3.1817,
      "step": 59050
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6827521324157715,
      "learning_rate": 0.0005078960378898933,
      "loss": 2.902,
      "step": 59051
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3740414381027222,
      "learning_rate": 0.0005078930887786285,
      "loss": 3.2079,
      "step": 59052
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9497284889221191,
      "learning_rate": 0.0005078901396287123,
      "loss": 3.1356,
      "step": 59053
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4793496131896973,
      "learning_rate": 0.0005078871904401457,
      "loss": 3.0722,
      "step": 59054
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4802354574203491,
      "learning_rate": 0.0005078842412129288,
      "loss": 3.1446,
      "step": 59055
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5831023454666138,
      "learning_rate": 0.0005078812919470623,
      "loss": 2.9654,
      "step": 59056
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5015223026275635,
      "learning_rate": 0.0005078783426425467,
      "loss": 3.0405,
      "step": 59057
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.00416898727417,
      "learning_rate": 0.0005078753932993826,
      "loss": 2.9313,
      "step": 59058
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6425002813339233,
      "learning_rate": 0.0005078724439175705,
      "loss": 3.1172,
      "step": 59059
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.125936508178711,
      "learning_rate": 0.000507869494497111,
      "loss": 2.8439,
      "step": 59060
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5428853034973145,
      "learning_rate": 0.0005078665450380048,
      "loss": 2.9282,
      "step": 59061
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4018282890319824,
      "learning_rate": 0.0005078635955402521,
      "loss": 2.8737,
      "step": 59062
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6695479154586792,
      "learning_rate": 0.0005078606460038538,
      "loss": 3.1217,
      "step": 59063
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.533555030822754,
      "learning_rate": 0.00050785769642881,
      "loss": 3.3007,
      "step": 59064
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4860268831253052,
      "learning_rate": 0.0005078547468151219,
      "loss": 2.8855,
      "step": 59065
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7031513452529907,
      "learning_rate": 0.0005078517971627895,
      "loss": 3.2842,
      "step": 59066
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6141865253448486,
      "learning_rate": 0.0005078488474718134,
      "loss": 3.0358,
      "step": 59067
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.377529263496399,
      "learning_rate": 0.0005078458977421944,
      "loss": 3.0599,
      "step": 59068
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5023115873336792,
      "learning_rate": 0.000507842947973933,
      "loss": 3.1666,
      "step": 59069
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.084707736968994,
      "learning_rate": 0.0005078399981670296,
      "loss": 2.9455,
      "step": 59070
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3769395351409912,
      "learning_rate": 0.0005078370483214847,
      "loss": 2.9682,
      "step": 59071
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0459506511688232,
      "learning_rate": 0.0005078340984372992,
      "loss": 3.1732,
      "step": 59072
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4713151454925537,
      "learning_rate": 0.0005078311485144732,
      "loss": 3.0167,
      "step": 59073
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.049774646759033,
      "learning_rate": 0.0005078281985530076,
      "loss": 2.9434,
      "step": 59074
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.429078459739685,
      "learning_rate": 0.0005078252485529028,
      "loss": 3.1413,
      "step": 59075
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.8631317615509033,
      "learning_rate": 0.0005078222985141594,
      "loss": 3.0798,
      "step": 59076
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.7912564277648926,
      "learning_rate": 0.0005078193484367778,
      "loss": 2.9945,
      "step": 59077
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.946485757827759,
      "learning_rate": 0.0005078163983207588,
      "loss": 2.8942,
      "step": 59078
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8308780193328857,
      "learning_rate": 0.0005078134481661026,
      "loss": 3.1199,
      "step": 59079
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.193391799926758,
      "learning_rate": 0.0005078104979728102,
      "loss": 3.0868,
      "step": 59080
    },
    {
      "epoch": 0.77,
      "grad_norm": 4.3534746170043945,
      "learning_rate": 0.0005078075477408817,
      "loss": 3.195,
      "step": 59081
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3715367317199707,
      "learning_rate": 0.000507804597470318,
      "loss": 2.9363,
      "step": 59082
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7803317308425903,
      "learning_rate": 0.0005078016471611194,
      "loss": 3.0672,
      "step": 59083
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.8322105407714844,
      "learning_rate": 0.0005077986968132866,
      "loss": 3.0801,
      "step": 59084
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8356436491012573,
      "learning_rate": 0.0005077957464268201,
      "loss": 2.9362,
      "step": 59085
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.556166648864746,
      "learning_rate": 0.0005077927960017204,
      "loss": 3.0997,
      "step": 59086
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.61489737033844,
      "learning_rate": 0.0005077898455379882,
      "loss": 3.1079,
      "step": 59087
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.913283348083496,
      "learning_rate": 0.0005077868950356239,
      "loss": 3.1357,
      "step": 59088
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.576904058456421,
      "learning_rate": 0.0005077839444946281,
      "loss": 2.977,
      "step": 59089
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0875346660614014,
      "learning_rate": 0.0005077809939150012,
      "loss": 3.0818,
      "step": 59090
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3537688255310059,
      "learning_rate": 0.0005077780432967441,
      "loss": 3.1548,
      "step": 59091
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.026294708251953,
      "learning_rate": 0.0005077750926398571,
      "loss": 3.1051,
      "step": 59092
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4447576999664307,
      "learning_rate": 0.0005077721419443406,
      "loss": 2.9193,
      "step": 59093
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.094064235687256,
      "learning_rate": 0.0005077691912101956,
      "loss": 3.1983,
      "step": 59094
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6920506954193115,
      "learning_rate": 0.0005077662404374222,
      "loss": 2.8167,
      "step": 59095
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4403090476989746,
      "learning_rate": 0.0005077632896260211,
      "loss": 2.8465,
      "step": 59096
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.505383014678955,
      "learning_rate": 0.0005077603387759932,
      "loss": 2.9563,
      "step": 59097
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9416038990020752,
      "learning_rate": 0.0005077573878873384,
      "loss": 3.1639,
      "step": 59098
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7759085893630981,
      "learning_rate": 0.0005077544369600577,
      "loss": 2.9589,
      "step": 59099
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1322579383850098,
      "learning_rate": 0.0005077514859941515,
      "loss": 2.9302,
      "step": 59100
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.919184684753418,
      "learning_rate": 0.0005077485349896205,
      "loss": 3.1612,
      "step": 59101
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.344557523727417,
      "learning_rate": 0.000507745583946465,
      "loss": 3.0704,
      "step": 59102
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.04571270942688,
      "learning_rate": 0.0005077426328646856,
      "loss": 3.0659,
      "step": 59103
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.060272693634033,
      "learning_rate": 0.0005077396817442831,
      "loss": 3.0098,
      "step": 59104
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.481618881225586,
      "learning_rate": 0.0005077367305852577,
      "loss": 3.0858,
      "step": 59105
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.456752300262451,
      "learning_rate": 0.0005077337793876102,
      "loss": 2.9997,
      "step": 59106
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7927310466766357,
      "learning_rate": 0.0005077308281513412,
      "loss": 3.1528,
      "step": 59107
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5726958513259888,
      "learning_rate": 0.0005077278768764509,
      "loss": 3.0428,
      "step": 59108
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.061032772064209,
      "learning_rate": 0.0005077249255629403,
      "loss": 2.9555,
      "step": 59109
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5808619260787964,
      "learning_rate": 0.0005077219742108095,
      "loss": 3.1001,
      "step": 59110
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.8740806579589844,
      "learning_rate": 0.0005077190228200593,
      "loss": 3.1961,
      "step": 59111
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4187827110290527,
      "learning_rate": 0.0005077160713906903,
      "loss": 2.9602,
      "step": 59112
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.675350546836853,
      "learning_rate": 0.0005077131199227029,
      "loss": 2.9705,
      "step": 59113
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.8528833389282227,
      "learning_rate": 0.0005077101684160978,
      "loss": 3.0828,
      "step": 59114
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0112149715423584,
      "learning_rate": 0.0005077072168708754,
      "loss": 3.2865,
      "step": 59115
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.359770655632019,
      "learning_rate": 0.0005077042652870363,
      "loss": 3.0303,
      "step": 59116
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.513866662979126,
      "learning_rate": 0.000507701313664581,
      "loss": 2.9933,
      "step": 59117
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0664641857147217,
      "learning_rate": 0.0005076983620035102,
      "loss": 3.1254,
      "step": 59118
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5234042406082153,
      "learning_rate": 0.0005076954103038243,
      "loss": 3.1405,
      "step": 59119
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.484458088874817,
      "learning_rate": 0.0005076924585655239,
      "loss": 3.1411,
      "step": 59120
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2776812314987183,
      "learning_rate": 0.0005076895067886096,
      "loss": 3.0096,
      "step": 59121
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4114559888839722,
      "learning_rate": 0.0005076865549730818,
      "loss": 3.0934,
      "step": 59122
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8978381156921387,
      "learning_rate": 0.0005076836031189412,
      "loss": 3.1404,
      "step": 59123
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4680840969085693,
      "learning_rate": 0.0005076806512261885,
      "loss": 3.1854,
      "step": 59124
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.357649326324463,
      "learning_rate": 0.0005076776992948238,
      "loss": 2.9993,
      "step": 59125
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4416723251342773,
      "learning_rate": 0.0005076747473248478,
      "loss": 3.2574,
      "step": 59126
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.142226219177246,
      "learning_rate": 0.0005076717953162614,
      "loss": 3.0648,
      "step": 59127
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4100673198699951,
      "learning_rate": 0.0005076688432690648,
      "loss": 3.124,
      "step": 59128
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.217787265777588,
      "learning_rate": 0.0005076658911832586,
      "loss": 2.9465,
      "step": 59129
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4416965246200562,
      "learning_rate": 0.0005076629390588435,
      "loss": 2.9433,
      "step": 59130
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.051124334335327,
      "learning_rate": 0.0005076599868958199,
      "loss": 3.2645,
      "step": 59131
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.747562289237976,
      "learning_rate": 0.0005076570346941883,
      "loss": 2.9162,
      "step": 59132
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.022812604904175,
      "learning_rate": 0.0005076540824539493,
      "loss": 3.0218,
      "step": 59133
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.842827320098877,
      "learning_rate": 0.0005076511301751037,
      "loss": 3.0638,
      "step": 59134
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4129741191864014,
      "learning_rate": 0.0005076481778576517,
      "loss": 3.1286,
      "step": 59135
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6008785963058472,
      "learning_rate": 0.000507645225501594,
      "loss": 3.092,
      "step": 59136
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.929404854774475,
      "learning_rate": 0.000507642273106931,
      "loss": 2.8318,
      "step": 59137
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.1293935775756836,
      "learning_rate": 0.0005076393206736637,
      "loss": 2.9101,
      "step": 59138
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7582054138183594,
      "learning_rate": 0.0005076363682017921,
      "loss": 3.1328,
      "step": 59139
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4625554084777832,
      "learning_rate": 0.000507633415691317,
      "loss": 2.7867,
      "step": 59140
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8336294889450073,
      "learning_rate": 0.0005076304631422389,
      "loss": 2.9798,
      "step": 59141
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.626503348350525,
      "learning_rate": 0.0005076275105545585,
      "loss": 3.0623,
      "step": 59142
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9305202960968018,
      "learning_rate": 0.0005076245579282761,
      "loss": 3.19,
      "step": 59143
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.476056694984436,
      "learning_rate": 0.0005076216052633924,
      "loss": 3.0526,
      "step": 59144
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.809314489364624,
      "learning_rate": 0.000507618652559908,
      "loss": 3.0743,
      "step": 59145
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.430145025253296,
      "learning_rate": 0.0005076156998178234,
      "loss": 2.8016,
      "step": 59146
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.965108871459961,
      "learning_rate": 0.0005076127470371389,
      "loss": 3.1416,
      "step": 59147
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5634242296218872,
      "learning_rate": 0.0005076097942178556,
      "loss": 3.0003,
      "step": 59148
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5338413715362549,
      "learning_rate": 0.0005076068413599735,
      "loss": 3.0405,
      "step": 59149
    },
    {
      "epoch": 0.77,
      "grad_norm": 4.0666184425354,
      "learning_rate": 0.0005076038884634933,
      "loss": 3.1653,
      "step": 59150
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.744356632232666,
      "learning_rate": 0.0005076009355284158,
      "loss": 3.1215,
      "step": 59151
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6512387990951538,
      "learning_rate": 0.0005075979825547413,
      "loss": 3.0861,
      "step": 59152
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9195140600204468,
      "learning_rate": 0.0005075950295424705,
      "loss": 2.9757,
      "step": 59153
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.296691656112671,
      "learning_rate": 0.0005075920764916037,
      "loss": 2.8061,
      "step": 59154
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.718432903289795,
      "learning_rate": 0.0005075891234021416,
      "loss": 3.0448,
      "step": 59155
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7575410604476929,
      "learning_rate": 0.0005075861702740848,
      "loss": 2.9712,
      "step": 59156
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.9492053985595703,
      "learning_rate": 0.000507583217107434,
      "loss": 2.863,
      "step": 59157
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.114135503768921,
      "learning_rate": 0.0005075802639021895,
      "loss": 3.2642,
      "step": 59158
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9011887311935425,
      "learning_rate": 0.0005075773106583517,
      "loss": 3.2054,
      "step": 59159
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4541165828704834,
      "learning_rate": 0.0005075743573759215,
      "loss": 2.992,
      "step": 59160
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.2429139614105225,
      "learning_rate": 0.0005075714040548993,
      "loss": 3.1483,
      "step": 59161
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.324105739593506,
      "learning_rate": 0.0005075684506952857,
      "loss": 2.8095,
      "step": 59162
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8751895427703857,
      "learning_rate": 0.0005075654972970812,
      "loss": 2.9012,
      "step": 59163
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6417795419692993,
      "learning_rate": 0.0005075625438602864,
      "loss": 3.0721,
      "step": 59164
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.059486150741577,
      "learning_rate": 0.0005075595903849016,
      "loss": 3.0952,
      "step": 59165
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.173245429992676,
      "learning_rate": 0.0005075566368709277,
      "loss": 3.0172,
      "step": 59166
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4293298721313477,
      "learning_rate": 0.0005075536833183652,
      "loss": 3.2808,
      "step": 59167
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7521424293518066,
      "learning_rate": 0.0005075507297272144,
      "loss": 3.2198,
      "step": 59168
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6076773405075073,
      "learning_rate": 0.0005075477760974761,
      "loss": 2.9287,
      "step": 59169
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3542174100875854,
      "learning_rate": 0.0005075448224291507,
      "loss": 2.835,
      "step": 59170
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9885859489440918,
      "learning_rate": 0.0005075418687222388,
      "loss": 3.0782,
      "step": 59171
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6135601997375488,
      "learning_rate": 0.0005075389149767409,
      "loss": 2.9574,
      "step": 59172
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0843141078948975,
      "learning_rate": 0.0005075359611926577,
      "loss": 2.5846,
      "step": 59173
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4299001693725586,
      "learning_rate": 0.0005075330073699895,
      "loss": 2.796,
      "step": 59174
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7331064939498901,
      "learning_rate": 0.0005075300535087371,
      "loss": 3.0576,
      "step": 59175
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.703535556793213,
      "learning_rate": 0.000507527099608901,
      "loss": 3.1763,
      "step": 59176
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.738784909248352,
      "learning_rate": 0.0005075241456704816,
      "loss": 2.9282,
      "step": 59177
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6091119050979614,
      "learning_rate": 0.0005075211916934795,
      "loss": 2.8762,
      "step": 59178
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5422953367233276,
      "learning_rate": 0.0005075182376778955,
      "loss": 3.0012,
      "step": 59179
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6904972791671753,
      "learning_rate": 0.0005075152836237298,
      "loss": 3.0886,
      "step": 59180
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8847155570983887,
      "learning_rate": 0.000507512329530983,
      "loss": 3.0597,
      "step": 59181
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.892223596572876,
      "learning_rate": 0.0005075093753996559,
      "loss": 3.0507,
      "step": 59182
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6918357610702515,
      "learning_rate": 0.0005075064212297489,
      "loss": 3.0739,
      "step": 59183
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0001330375671387,
      "learning_rate": 0.0005075034670212623,
      "loss": 3.0791,
      "step": 59184
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5733848810195923,
      "learning_rate": 0.0005075005127741972,
      "loss": 3.168,
      "step": 59185
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4706774950027466,
      "learning_rate": 0.0005074975584885537,
      "loss": 3.1864,
      "step": 59186
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.598296880722046,
      "learning_rate": 0.0005074946041643324,
      "loss": 3.2573,
      "step": 59187
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7178449630737305,
      "learning_rate": 0.0005074916498015341,
      "loss": 3.0069,
      "step": 59188
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6091951131820679,
      "learning_rate": 0.0005074886954001591,
      "loss": 2.8941,
      "step": 59189
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.803818464279175,
      "learning_rate": 0.000507485740960208,
      "loss": 3.0569,
      "step": 59190
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.562268614768982,
      "learning_rate": 0.0005074827864816814,
      "loss": 3.0944,
      "step": 59191
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7335729598999023,
      "learning_rate": 0.0005074798319645799,
      "loss": 3.17,
      "step": 59192
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.137474775314331,
      "learning_rate": 0.000507476877408904,
      "loss": 3.1516,
      "step": 59193
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9055356979370117,
      "learning_rate": 0.0005074739228146542,
      "loss": 3.0737,
      "step": 59194
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4090172052383423,
      "learning_rate": 0.0005074709681818309,
      "loss": 3.1054,
      "step": 59195
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.2591419219970703,
      "learning_rate": 0.000507468013510435,
      "loss": 2.9523,
      "step": 59196
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6829299926757812,
      "learning_rate": 0.0005074650588004669,
      "loss": 3.0914,
      "step": 59197
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5862120389938354,
      "learning_rate": 0.0005074621040519271,
      "loss": 3.092,
      "step": 59198
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7711838483810425,
      "learning_rate": 0.0005074591492648161,
      "loss": 3.0583,
      "step": 59199
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3176543712615967,
      "learning_rate": 0.0005074561944391347,
      "loss": 3.1599,
      "step": 59200
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6020610332489014,
      "learning_rate": 0.0005074532395748831,
      "loss": 2.7807,
      "step": 59201
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4926718473434448,
      "learning_rate": 0.0005074502846720622,
      "loss": 3.0778,
      "step": 59202
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.161101818084717,
      "learning_rate": 0.0005074473297306722,
      "loss": 2.8504,
      "step": 59203
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.6859962940216064,
      "learning_rate": 0.000507444374750714,
      "loss": 3.1218,
      "step": 59204
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5874316692352295,
      "learning_rate": 0.0005074414197321879,
      "loss": 3.3553,
      "step": 59205
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7989102602005005,
      "learning_rate": 0.0005074384646750944,
      "loss": 3.144,
      "step": 59206
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8628790378570557,
      "learning_rate": 0.0005074355095794343,
      "loss": 3.0253,
      "step": 59207
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.074035167694092,
      "learning_rate": 0.000507432554445208,
      "loss": 3.139,
      "step": 59208
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.627677083015442,
      "learning_rate": 0.0005074295992724161,
      "loss": 3.0345,
      "step": 59209
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3696932792663574,
      "learning_rate": 0.0005074266440610591,
      "loss": 3.0889,
      "step": 59210
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.574324131011963,
      "learning_rate": 0.0005074236888111375,
      "loss": 2.9058,
      "step": 59211
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.391657590866089,
      "learning_rate": 0.0005074207335226521,
      "loss": 3.0311,
      "step": 59212
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.727857828140259,
      "learning_rate": 0.0005074177781956031,
      "loss": 2.9572,
      "step": 59213
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.852194905281067,
      "learning_rate": 0.0005074148228299913,
      "loss": 3.0095,
      "step": 59214
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8995102643966675,
      "learning_rate": 0.0005074118674258172,
      "loss": 2.9217,
      "step": 59215
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4910749197006226,
      "learning_rate": 0.0005074089119830812,
      "loss": 2.7686,
      "step": 59216
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5761841535568237,
      "learning_rate": 0.0005074059565017841,
      "loss": 2.9004,
      "step": 59217
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.724234700202942,
      "learning_rate": 0.0005074030009819261,
      "loss": 3.1358,
      "step": 59218
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4744160175323486,
      "learning_rate": 0.0005074000454235081,
      "loss": 2.9666,
      "step": 59219
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7251828908920288,
      "learning_rate": 0.0005073970898265306,
      "loss": 2.8293,
      "step": 59220
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8305624723434448,
      "learning_rate": 0.000507394134190994,
      "loss": 2.858,
      "step": 59221
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.675162672996521,
      "learning_rate": 0.000507391178516899,
      "loss": 2.9708,
      "step": 59222
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4317677021026611,
      "learning_rate": 0.000507388222804246,
      "loss": 3.1179,
      "step": 59223
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.339327335357666,
      "learning_rate": 0.0005073852670530356,
      "loss": 2.8257,
      "step": 59224
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5386770963668823,
      "learning_rate": 0.0005073823112632683,
      "loss": 2.9164,
      "step": 59225
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3833674192428589,
      "learning_rate": 0.0005073793554349448,
      "loss": 3.0057,
      "step": 59226
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3786894083023071,
      "learning_rate": 0.0005073763995680655,
      "loss": 3.2301,
      "step": 59227
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.514370083808899,
      "learning_rate": 0.0005073734436626311,
      "loss": 3.1706,
      "step": 59228
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5504677295684814,
      "learning_rate": 0.000507370487718642,
      "loss": 3.1511,
      "step": 59229
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3768806457519531,
      "learning_rate": 0.0005073675317360988,
      "loss": 2.8374,
      "step": 59230
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.397562861442566,
      "learning_rate": 0.0005073645757150021,
      "loss": 3.2278,
      "step": 59231
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6388740539550781,
      "learning_rate": 0.0005073616196553524,
      "loss": 2.9989,
      "step": 59232
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6279840469360352,
      "learning_rate": 0.0005073586635571503,
      "loss": 3.0442,
      "step": 59233
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4861730337142944,
      "learning_rate": 0.0005073557074203963,
      "loss": 2.8457,
      "step": 59234
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.608075499534607,
      "learning_rate": 0.000507352751245091,
      "loss": 2.9701,
      "step": 59235
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.9660141468048096,
      "learning_rate": 0.0005073497950312347,
      "loss": 3.0586,
      "step": 59236
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8654305934906006,
      "learning_rate": 0.0005073468387788283,
      "loss": 3.0989,
      "step": 59237
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5909326076507568,
      "learning_rate": 0.0005073438824878722,
      "loss": 2.8172,
      "step": 59238
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4427480697631836,
      "learning_rate": 0.000507340926158367,
      "loss": 3.1624,
      "step": 59239
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9270234107971191,
      "learning_rate": 0.0005073379697903131,
      "loss": 3.2003,
      "step": 59240
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3811298608779907,
      "learning_rate": 0.0005073350133837113,
      "loss": 3.2668,
      "step": 59241
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1335439682006836,
      "learning_rate": 0.0005073320569385619,
      "loss": 3.1833,
      "step": 59242
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0029690265655518,
      "learning_rate": 0.0005073291004548656,
      "loss": 2.756,
      "step": 59243
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7905395030975342,
      "learning_rate": 0.000507326143932623,
      "loss": 3.1401,
      "step": 59244
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.738142490386963,
      "learning_rate": 0.0005073231873718344,
      "loss": 3.0492,
      "step": 59245
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.421374559402466,
      "learning_rate": 0.0005073202307725005,
      "loss": 2.9489,
      "step": 59246
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.679319143295288,
      "learning_rate": 0.000507317274134622,
      "loss": 2.7985,
      "step": 59247
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5771048069000244,
      "learning_rate": 0.0005073143174581992,
      "loss": 2.9579,
      "step": 59248
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.02285099029541,
      "learning_rate": 0.0005073113607432328,
      "loss": 2.8184,
      "step": 59249
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.4011709690093994,
      "learning_rate": 0.0005073084039897232,
      "loss": 2.9487,
      "step": 59250
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6895620822906494,
      "learning_rate": 0.0005073054471976711,
      "loss": 3.0191,
      "step": 59251
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.784698009490967,
      "learning_rate": 0.0005073024903670771,
      "loss": 3.0524,
      "step": 59252
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.635640025138855,
      "learning_rate": 0.0005072995334979416,
      "loss": 3.1376,
      "step": 59253
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5996664762496948,
      "learning_rate": 0.0005072965765902652,
      "loss": 3.0512,
      "step": 59254
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.200608491897583,
      "learning_rate": 0.0005072936196440484,
      "loss": 3.2179,
      "step": 59255
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6224778890609741,
      "learning_rate": 0.000507290662659292,
      "loss": 3.0135,
      "step": 59256
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4967209100723267,
      "learning_rate": 0.0005072877056359961,
      "loss": 2.9757,
      "step": 59257
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7510954141616821,
      "learning_rate": 0.0005072847485741616,
      "loss": 3.1788,
      "step": 59258
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.2845523357391357,
      "learning_rate": 0.0005072817914737891,
      "loss": 3.1222,
      "step": 59259
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.355069875717163,
      "learning_rate": 0.0005072788343348789,
      "loss": 2.8888,
      "step": 59260
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2732369899749756,
      "learning_rate": 0.0005072758771574315,
      "loss": 3.1145,
      "step": 59261
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5752941370010376,
      "learning_rate": 0.0005072729199414477,
      "loss": 2.9084,
      "step": 59262
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6032737493515015,
      "learning_rate": 0.000507269962686928,
      "loss": 3.0314,
      "step": 59263
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6347987651824951,
      "learning_rate": 0.0005072670053938729,
      "loss": 2.915,
      "step": 59264
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4297868013381958,
      "learning_rate": 0.0005072640480622829,
      "loss": 3.0806,
      "step": 59265
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.725014567375183,
      "learning_rate": 0.0005072610906921586,
      "loss": 2.9707,
      "step": 59266
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.611417531967163,
      "learning_rate": 0.0005072581332835007,
      "loss": 2.9918,
      "step": 59267
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.833180546760559,
      "learning_rate": 0.0005072551758363093,
      "loss": 3.1322,
      "step": 59268
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.649398922920227,
      "learning_rate": 0.0005072522183505856,
      "loss": 3.1278,
      "step": 59269
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.69571053981781,
      "learning_rate": 0.0005072492608263296,
      "loss": 3.1971,
      "step": 59270
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5343648195266724,
      "learning_rate": 0.0005072463032635421,
      "loss": 2.9525,
      "step": 59271
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.415609121322632,
      "learning_rate": 0.0005072433456622236,
      "loss": 2.9111,
      "step": 59272
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6353217363357544,
      "learning_rate": 0.0005072403880223746,
      "loss": 2.8207,
      "step": 59273
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6951816082000732,
      "learning_rate": 0.0005072374303439957,
      "loss": 3.0913,
      "step": 59274
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7523841857910156,
      "learning_rate": 0.0005072344726270875,
      "loss": 2.9388,
      "step": 59275
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.13539457321167,
      "learning_rate": 0.0005072315148716504,
      "loss": 2.8731,
      "step": 59276
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.49290132522583,
      "learning_rate": 0.0005072285570776852,
      "loss": 2.9179,
      "step": 59277
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8321912288665771,
      "learning_rate": 0.0005072255992451922,
      "loss": 3.1206,
      "step": 59278
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7709327936172485,
      "learning_rate": 0.0005072226413741721,
      "loss": 2.9232,
      "step": 59279
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0304012298583984,
      "learning_rate": 0.0005072196834646253,
      "loss": 3.0907,
      "step": 59280
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1850626468658447,
      "learning_rate": 0.0005072167255165525,
      "loss": 2.956,
      "step": 59281
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1309211254119873,
      "learning_rate": 0.0005072137675299544,
      "loss": 2.8097,
      "step": 59282
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8016945123672485,
      "learning_rate": 0.0005072108095048311,
      "loss": 2.9004,
      "step": 59283
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.783711552619934,
      "learning_rate": 0.0005072078514411835,
      "loss": 2.9705,
      "step": 59284
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9839503765106201,
      "learning_rate": 0.000507204893339012,
      "loss": 3.2286,
      "step": 59285
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4205173254013062,
      "learning_rate": 0.0005072019351983172,
      "loss": 3.0637,
      "step": 59286
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6494512557983398,
      "learning_rate": 0.0005071989770190997,
      "loss": 3.0196,
      "step": 59287
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7557276487350464,
      "learning_rate": 0.00050719601880136,
      "loss": 2.9338,
      "step": 59288
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.001213550567627,
      "learning_rate": 0.0005071930605450985,
      "loss": 3.028,
      "step": 59289
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.307276964187622,
      "learning_rate": 0.0005071901022503161,
      "loss": 2.833,
      "step": 59290
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7904598712921143,
      "learning_rate": 0.000507187143917013,
      "loss": 3.1098,
      "step": 59291
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.005413055419922,
      "learning_rate": 0.00050718418554519,
      "loss": 2.9486,
      "step": 59292
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8437076807022095,
      "learning_rate": 0.0005071812271348475,
      "loss": 2.818,
      "step": 59293
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5755553245544434,
      "learning_rate": 0.0005071782686859861,
      "loss": 2.8114,
      "step": 59294
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.835162401199341,
      "learning_rate": 0.0005071753101986064,
      "loss": 3.3094,
      "step": 59295
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6675987243652344,
      "learning_rate": 0.0005071723516727088,
      "loss": 3.0031,
      "step": 59296
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.42556631565094,
      "learning_rate": 0.000507169393108294,
      "loss": 3.1144,
      "step": 59297
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5537059307098389,
      "learning_rate": 0.0005071664345053625,
      "loss": 2.9592,
      "step": 59298
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6846396923065186,
      "learning_rate": 0.0005071634758639149,
      "loss": 2.8663,
      "step": 59299
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0084826946258545,
      "learning_rate": 0.0005071605171839517,
      "loss": 2.8892,
      "step": 59300
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.1593375205993652,
      "learning_rate": 0.0005071575584654735,
      "loss": 3.1916,
      "step": 59301
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.372197151184082,
      "learning_rate": 0.0005071545997084806,
      "loss": 2.9041,
      "step": 59302
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.206882953643799,
      "learning_rate": 0.0005071516409129739,
      "loss": 3.0628,
      "step": 59303
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7733025550842285,
      "learning_rate": 0.0005071486820789538,
      "loss": 2.8137,
      "step": 59304
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.874279260635376,
      "learning_rate": 0.0005071457232064207,
      "loss": 3.1467,
      "step": 59305
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.283298969268799,
      "learning_rate": 0.0005071427642953755,
      "loss": 3.0376,
      "step": 59306
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8366163969039917,
      "learning_rate": 0.0005071398053458184,
      "loss": 2.8719,
      "step": 59307
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6830631494522095,
      "learning_rate": 0.0005071368463577502,
      "loss": 2.7249,
      "step": 59308
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.5647366046905518,
      "learning_rate": 0.0005071338873311712,
      "loss": 2.8302,
      "step": 59309
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.7941462993621826,
      "learning_rate": 0.0005071309282660822,
      "loss": 3.2502,
      "step": 59310
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.06815242767334,
      "learning_rate": 0.0005071279691624837,
      "loss": 3.1767,
      "step": 59311
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.141416072845459,
      "learning_rate": 0.0005071250100203762,
      "loss": 3.045,
      "step": 59312
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.2897660732269287,
      "learning_rate": 0.0005071220508397602,
      "loss": 3.0729,
      "step": 59313
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.0492441654205322,
      "learning_rate": 0.0005071190916206363,
      "loss": 3.1506,
      "step": 59314
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4922990798950195,
      "learning_rate": 0.000507116132363005,
      "loss": 3.1119,
      "step": 59315
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8099925518035889,
      "learning_rate": 0.000507113173066867,
      "loss": 2.8427,
      "step": 59316
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9145276546478271,
      "learning_rate": 0.0005071102137322227,
      "loss": 3.0424,
      "step": 59317
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.360837459564209,
      "learning_rate": 0.0005071072543590727,
      "loss": 3.2392,
      "step": 59318
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6615047454833984,
      "learning_rate": 0.0005071042949474176,
      "loss": 3.2043,
      "step": 59319
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.368373155593872,
      "learning_rate": 0.0005071013354972579,
      "loss": 2.86,
      "step": 59320
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3204824924468994,
      "learning_rate": 0.0005070983760085941,
      "loss": 2.8128,
      "step": 59321
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.252929449081421,
      "learning_rate": 0.0005070954164814269,
      "loss": 3.0542,
      "step": 59322
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6662981510162354,
      "learning_rate": 0.0005070924569157566,
      "loss": 3.1729,
      "step": 59323
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.525461435317993,
      "learning_rate": 0.000507089497311584,
      "loss": 3.1904,
      "step": 59324
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9084161520004272,
      "learning_rate": 0.0005070865376689095,
      "loss": 3.0021,
      "step": 59325
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8363686800003052,
      "learning_rate": 0.0005070835779877337,
      "loss": 3.2112,
      "step": 59326
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6015337705612183,
      "learning_rate": 0.0005070806182680573,
      "loss": 2.9871,
      "step": 59327
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.73091721534729,
      "learning_rate": 0.0005070776585098806,
      "loss": 2.9672,
      "step": 59328
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.1033473014831543,
      "learning_rate": 0.0005070746987132042,
      "loss": 3.3465,
      "step": 59329
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0138187408447266,
      "learning_rate": 0.0005070717388780287,
      "loss": 2.9729,
      "step": 59330
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8994003534317017,
      "learning_rate": 0.0005070687790043548,
      "loss": 3.0523,
      "step": 59331
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.123748540878296,
      "learning_rate": 0.0005070658190921829,
      "loss": 3.0255,
      "step": 59332
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.5154571533203125,
      "learning_rate": 0.0005070628591415133,
      "loss": 3.4589,
      "step": 59333
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4208121299743652,
      "learning_rate": 0.000507059899152347,
      "loss": 3.2217,
      "step": 59334
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.578940510749817,
      "learning_rate": 0.0005070569391246844,
      "loss": 3.3043,
      "step": 59335
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4605491161346436,
      "learning_rate": 0.0005070539790585259,
      "loss": 2.9299,
      "step": 59336
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.4179611206054688,
      "learning_rate": 0.0005070510189538722,
      "loss": 3.0274,
      "step": 59337
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.092557430267334,
      "learning_rate": 0.0005070480588107238,
      "loss": 3.1027,
      "step": 59338
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5348825454711914,
      "learning_rate": 0.0005070450986290813,
      "loss": 2.9091,
      "step": 59339
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5146117210388184,
      "learning_rate": 0.0005070421384089452,
      "loss": 3.3091,
      "step": 59340
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9869674444198608,
      "learning_rate": 0.000507039178150316,
      "loss": 3.0008,
      "step": 59341
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.742226004600525,
      "learning_rate": 0.0005070362178531943,
      "loss": 3.1728,
      "step": 59342
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4781395196914673,
      "learning_rate": 0.0005070332575175808,
      "loss": 3.0825,
      "step": 59343
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.917776107788086,
      "learning_rate": 0.0005070302971434757,
      "loss": 3.1689,
      "step": 59344
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.5239415168762207,
      "learning_rate": 0.0005070273367308799,
      "loss": 3.2996,
      "step": 59345
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3624967336654663,
      "learning_rate": 0.0005070243762797938,
      "loss": 3.0872,
      "step": 59346
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4298820495605469,
      "learning_rate": 0.000507021415790218,
      "loss": 3.0028,
      "step": 59347
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.325491428375244,
      "learning_rate": 0.0005070184552621529,
      "loss": 2.8904,
      "step": 59348
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5621626377105713,
      "learning_rate": 0.0005070154946955991,
      "loss": 2.79,
      "step": 59349
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4007725715637207,
      "learning_rate": 0.0005070125340905574,
      "loss": 3.1409,
      "step": 59350
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.460003614425659,
      "learning_rate": 0.000507009573447028,
      "loss": 2.9404,
      "step": 59351
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8033634424209595,
      "learning_rate": 0.0005070066127650117,
      "loss": 3.2614,
      "step": 59352
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4213569164276123,
      "learning_rate": 0.000507003652044509,
      "loss": 2.8919,
      "step": 59353
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.690396785736084,
      "learning_rate": 0.0005070006912855203,
      "loss": 2.8762,
      "step": 59354
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7798529863357544,
      "learning_rate": 0.0005069977304880464,
      "loss": 3.2054,
      "step": 59355
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4306036233901978,
      "learning_rate": 0.0005069947696520877,
      "loss": 3.1479,
      "step": 59356
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6471832990646362,
      "learning_rate": 0.0005069918087776447,
      "loss": 3.2316,
      "step": 59357
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6024316549301147,
      "learning_rate": 0.000506988847864718,
      "loss": 2.9246,
      "step": 59358
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7422746419906616,
      "learning_rate": 0.0005069858869133083,
      "loss": 2.809,
      "step": 59359
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4643305540084839,
      "learning_rate": 0.0005069829259234159,
      "loss": 3.0971,
      "step": 59360
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3717843294143677,
      "learning_rate": 0.0005069799648950414,
      "loss": 3.2148,
      "step": 59361
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4407559633255005,
      "learning_rate": 0.0005069770038281855,
      "loss": 3.0895,
      "step": 59362
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.068297863006592,
      "learning_rate": 0.0005069740427228486,
      "loss": 2.7288,
      "step": 59363
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6106739044189453,
      "learning_rate": 0.0005069710815790314,
      "loss": 2.9057,
      "step": 59364
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1125011444091797,
      "learning_rate": 0.0005069681203967345,
      "loss": 2.9885,
      "step": 59365
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4506025314331055,
      "learning_rate": 0.0005069651591759581,
      "loss": 2.9734,
      "step": 59366
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5527284145355225,
      "learning_rate": 0.000506962197916703,
      "loss": 2.8836,
      "step": 59367
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.785284399986267,
      "learning_rate": 0.0005069592366189697,
      "loss": 3.0601,
      "step": 59368
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3503923416137695,
      "learning_rate": 0.0005069562752827589,
      "loss": 3.067,
      "step": 59369
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1276543140411377,
      "learning_rate": 0.0005069533139080709,
      "loss": 3.065,
      "step": 59370
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7212773561477661,
      "learning_rate": 0.0005069503524949065,
      "loss": 3.1245,
      "step": 59371
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7902084589004517,
      "learning_rate": 0.000506947391043266,
      "loss": 2.8877,
      "step": 59372
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.531102180480957,
      "learning_rate": 0.0005069444295531502,
      "loss": 3.1544,
      "step": 59373
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6263848543167114,
      "learning_rate": 0.0005069414680245594,
      "loss": 2.8429,
      "step": 59374
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9748187065124512,
      "learning_rate": 0.0005069385064574943,
      "loss": 3.1137,
      "step": 59375
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4782629013061523,
      "learning_rate": 0.0005069355448519555,
      "loss": 3.1012,
      "step": 59376
    },
    {
      "epoch": 0.77,
      "grad_norm": 7.159919261932373,
      "learning_rate": 0.0005069325832079434,
      "loss": 3.1814,
      "step": 59377
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.2487542629241943,
      "learning_rate": 0.0005069296215254586,
      "loss": 3.1558,
      "step": 59378
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3728232383728027,
      "learning_rate": 0.0005069266598045018,
      "loss": 3.1677,
      "step": 59379
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6304545402526855,
      "learning_rate": 0.0005069236980450733,
      "loss": 3.0106,
      "step": 59380
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3481003046035767,
      "learning_rate": 0.0005069207362471738,
      "loss": 3.1025,
      "step": 59381
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4746119976043701,
      "learning_rate": 0.000506917774410804,
      "loss": 3.0328,
      "step": 59382
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7506510019302368,
      "learning_rate": 0.0005069148125359641,
      "loss": 2.9029,
      "step": 59383
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4011027812957764,
      "learning_rate": 0.0005069118506226549,
      "loss": 3.1243,
      "step": 59384
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6832969188690186,
      "learning_rate": 0.0005069088886708767,
      "loss": 3.1882,
      "step": 59385
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.161673069000244,
      "learning_rate": 0.0005069059266806304,
      "loss": 2.8392,
      "step": 59386
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.334449529647827,
      "learning_rate": 0.0005069029646519164,
      "loss": 2.6035,
      "step": 59387
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4712660312652588,
      "learning_rate": 0.0005069000025847353,
      "loss": 2.9973,
      "step": 59388
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9256818294525146,
      "learning_rate": 0.0005068970404790875,
      "loss": 3.0039,
      "step": 59389
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.2466957569122314,
      "learning_rate": 0.0005068940783349736,
      "loss": 3.0919,
      "step": 59390
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3843510150909424,
      "learning_rate": 0.0005068911161523942,
      "loss": 3.1922,
      "step": 59391
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.69899582862854,
      "learning_rate": 0.0005068881539313499,
      "loss": 3.0422,
      "step": 59392
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7972118854522705,
      "learning_rate": 0.000506885191671841,
      "loss": 3.0293,
      "step": 59393
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.2481796741485596,
      "learning_rate": 0.0005068822293738684,
      "loss": 2.9654,
      "step": 59394
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8148090839385986,
      "learning_rate": 0.0005068792670374325,
      "loss": 3.1229,
      "step": 59395
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3575329780578613,
      "learning_rate": 0.0005068763046625338,
      "loss": 3.2029,
      "step": 59396
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6882703304290771,
      "learning_rate": 0.0005068733422491729,
      "loss": 2.7788,
      "step": 59397
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3523656129837036,
      "learning_rate": 0.0005068703797973503,
      "loss": 3.1494,
      "step": 59398
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9358270168304443,
      "learning_rate": 0.0005068674173070667,
      "loss": 3.0737,
      "step": 59399
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.547608733177185,
      "learning_rate": 0.0005068644547783224,
      "loss": 2.9685,
      "step": 59400
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6545549631118774,
      "learning_rate": 0.0005068614922111182,
      "loss": 2.7414,
      "step": 59401
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5635921955108643,
      "learning_rate": 0.0005068585296054545,
      "loss": 3.2284,
      "step": 59402
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1419951915740967,
      "learning_rate": 0.0005068555669613318,
      "loss": 3.1928,
      "step": 59403
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7054566144943237,
      "learning_rate": 0.000506852604278751,
      "loss": 2.8899,
      "step": 59404
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8745718002319336,
      "learning_rate": 0.0005068496415577121,
      "loss": 2.9636,
      "step": 59405
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.416128396987915,
      "learning_rate": 0.000506846678798216,
      "loss": 2.9683,
      "step": 59406
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4707847833633423,
      "learning_rate": 0.0005068437160002634,
      "loss": 2.7868,
      "step": 59407
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8539929389953613,
      "learning_rate": 0.0005068407531638545,
      "loss": 3.1581,
      "step": 59408
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.9023513793945312,
      "learning_rate": 0.0005068377902889901,
      "loss": 3.0482,
      "step": 59409
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.464829206466675,
      "learning_rate": 0.0005068348273756705,
      "loss": 2.9542,
      "step": 59410
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8632800579071045,
      "learning_rate": 0.0005068318644238965,
      "loss": 3.0696,
      "step": 59411
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6452966928482056,
      "learning_rate": 0.0005068289014336686,
      "loss": 3.2641,
      "step": 59412
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.257810115814209,
      "learning_rate": 0.0005068259384049872,
      "loss": 3.1138,
      "step": 59413
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7231334447860718,
      "learning_rate": 0.0005068229753378529,
      "loss": 3.1368,
      "step": 59414
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6924471855163574,
      "learning_rate": 0.0005068200122322665,
      "loss": 3.128,
      "step": 59415
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.857794761657715,
      "learning_rate": 0.0005068170490882282,
      "loss": 2.8976,
      "step": 59416
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1612350940704346,
      "learning_rate": 0.0005068140859057388,
      "loss": 2.972,
      "step": 59417
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6524823904037476,
      "learning_rate": 0.0005068111226847987,
      "loss": 2.9996,
      "step": 59418
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7632807493209839,
      "learning_rate": 0.0005068081594254085,
      "loss": 2.9301,
      "step": 59419
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.8758108615875244,
      "learning_rate": 0.0005068051961275688,
      "loss": 3.042,
      "step": 59420
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9182182550430298,
      "learning_rate": 0.00050680223279128,
      "loss": 2.8234,
      "step": 59421
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6104750633239746,
      "learning_rate": 0.000506799269416543,
      "loss": 3.0344,
      "step": 59422
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.30255126953125,
      "learning_rate": 0.0005067963060033577,
      "loss": 3.0103,
      "step": 59423
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.9687182903289795,
      "learning_rate": 0.0005067933425517254,
      "loss": 3.1003,
      "step": 59424
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.393863558769226,
      "learning_rate": 0.0005067903790616463,
      "loss": 2.9186,
      "step": 59425
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.763249158859253,
      "learning_rate": 0.0005067874155331209,
      "loss": 3.0205,
      "step": 59426
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.658299446105957,
      "learning_rate": 0.0005067844519661499,
      "loss": 3.1353,
      "step": 59427
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6778723001480103,
      "learning_rate": 0.0005067814883607336,
      "loss": 2.8247,
      "step": 59428
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.284315586090088,
      "learning_rate": 0.0005067785247168729,
      "loss": 3.2528,
      "step": 59429
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.887399673461914,
      "learning_rate": 0.000506775561034568,
      "loss": 2.9689,
      "step": 59430
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9183721542358398,
      "learning_rate": 0.0005067725973138197,
      "loss": 2.9423,
      "step": 59431
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4748543500900269,
      "learning_rate": 0.0005067696335546285,
      "loss": 2.7854,
      "step": 59432
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4585049152374268,
      "learning_rate": 0.0005067666697569948,
      "loss": 3.1564,
      "step": 59433
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.00897479057312,
      "learning_rate": 0.0005067637059209194,
      "loss": 2.9487,
      "step": 59434
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.8637421131134033,
      "learning_rate": 0.0005067607420464027,
      "loss": 2.9749,
      "step": 59435
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8002781867980957,
      "learning_rate": 0.0005067577781334452,
      "loss": 3.1203,
      "step": 59436
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8583210706710815,
      "learning_rate": 0.0005067548141820476,
      "loss": 3.1327,
      "step": 59437
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.047734022140503,
      "learning_rate": 0.0005067518501922104,
      "loss": 2.9918,
      "step": 59438
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8037910461425781,
      "learning_rate": 0.0005067488861639341,
      "loss": 3.1019,
      "step": 59439
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.692888617515564,
      "learning_rate": 0.0005067459220972193,
      "loss": 2.9663,
      "step": 59440
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.730540156364441,
      "learning_rate": 0.0005067429579920664,
      "loss": 3.041,
      "step": 59441
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4678046703338623,
      "learning_rate": 0.0005067399938484763,
      "loss": 3.2192,
      "step": 59442
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8575892448425293,
      "learning_rate": 0.0005067370296664491,
      "loss": 3.0989,
      "step": 59443
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0982513427734375,
      "learning_rate": 0.0005067340654459858,
      "loss": 2.9338,
      "step": 59444
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5529330968856812,
      "learning_rate": 0.0005067311011870867,
      "loss": 2.9586,
      "step": 59445
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.340087890625,
      "learning_rate": 0.0005067281368897523,
      "loss": 2.9653,
      "step": 59446
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.6782066822052,
      "learning_rate": 0.0005067251725539832,
      "loss": 3.0346,
      "step": 59447
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.2737934589385986,
      "learning_rate": 0.0005067222081797801,
      "loss": 2.9261,
      "step": 59448
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5346732139587402,
      "learning_rate": 0.0005067192437671433,
      "loss": 2.845,
      "step": 59449
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6545872688293457,
      "learning_rate": 0.0005067162793160736,
      "loss": 2.8693,
      "step": 59450
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7494691610336304,
      "learning_rate": 0.0005067133148265714,
      "loss": 2.9768,
      "step": 59451
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4616689682006836,
      "learning_rate": 0.0005067103502986374,
      "loss": 3.0295,
      "step": 59452
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7373814582824707,
      "learning_rate": 0.0005067073857322719,
      "loss": 2.9924,
      "step": 59453
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8080500364303589,
      "learning_rate": 0.0005067044211274757,
      "loss": 3.2048,
      "step": 59454
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4566383361816406,
      "learning_rate": 0.0005067014564842491,
      "loss": 2.9056,
      "step": 59455
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.5337135791778564,
      "learning_rate": 0.000506698491802593,
      "loss": 2.9322,
      "step": 59456
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8612593412399292,
      "learning_rate": 0.0005066955270825076,
      "loss": 3.0668,
      "step": 59457
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7972043752670288,
      "learning_rate": 0.0005066925623239936,
      "loss": 2.9281,
      "step": 59458
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6748511791229248,
      "learning_rate": 0.0005066895975270517,
      "loss": 2.9396,
      "step": 59459
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7765862941741943,
      "learning_rate": 0.0005066866326916821,
      "loss": 3.1087,
      "step": 59460
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5318379402160645,
      "learning_rate": 0.0005066836678178857,
      "loss": 2.857,
      "step": 59461
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5604065656661987,
      "learning_rate": 0.0005066807029056628,
      "loss": 3.0674,
      "step": 59462
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.022559881210327,
      "learning_rate": 0.0005066777379550141,
      "loss": 3.0773,
      "step": 59463
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5486938953399658,
      "learning_rate": 0.0005066747729659401,
      "loss": 3.099,
      "step": 59464
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.44712495803833,
      "learning_rate": 0.0005066718079384414,
      "loss": 2.8988,
      "step": 59465
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6068689823150635,
      "learning_rate": 0.0005066688428725185,
      "loss": 2.9887,
      "step": 59466
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.820876955986023,
      "learning_rate": 0.0005066658777681718,
      "loss": 2.8119,
      "step": 59467
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.39617919921875,
      "learning_rate": 0.0005066629126254023,
      "loss": 3.0584,
      "step": 59468
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4416019916534424,
      "learning_rate": 0.0005066599474442101,
      "loss": 3.0251,
      "step": 59469
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0056309700012207,
      "learning_rate": 0.0005066569822245959,
      "loss": 3.1631,
      "step": 59470
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.554358720779419,
      "learning_rate": 0.0005066540169665604,
      "loss": 2.8815,
      "step": 59471
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.4152653217315674,
      "learning_rate": 0.0005066510516701038,
      "loss": 3.0104,
      "step": 59472
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.617523431777954,
      "learning_rate": 0.000506648086335227,
      "loss": 2.9679,
      "step": 59473
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9803965091705322,
      "learning_rate": 0.0005066451209619305,
      "loss": 2.9504,
      "step": 59474
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5302647352218628,
      "learning_rate": 0.0005066421555502146,
      "loss": 3.062,
      "step": 59475
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8648954629898071,
      "learning_rate": 0.0005066391901000801,
      "loss": 2.9047,
      "step": 59476
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.8345627784729004,
      "learning_rate": 0.0005066362246115275,
      "loss": 3.1457,
      "step": 59477
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8589463233947754,
      "learning_rate": 0.0005066332590845573,
      "loss": 3.2779,
      "step": 59478
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6486462354660034,
      "learning_rate": 0.0005066302935191701,
      "loss": 2.9683,
      "step": 59479
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.170117139816284,
      "learning_rate": 0.0005066273279153665,
      "loss": 2.9703,
      "step": 59480
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.343718409538269,
      "learning_rate": 0.0005066243622731468,
      "loss": 2.9957,
      "step": 59481
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.235727548599243,
      "learning_rate": 0.0005066213965925117,
      "loss": 2.9444,
      "step": 59482
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.266795039176941,
      "learning_rate": 0.000506618430873462,
      "loss": 2.6795,
      "step": 59483
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5523591041564941,
      "learning_rate": 0.0005066154651159979,
      "loss": 3.0285,
      "step": 59484
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.814563274383545,
      "learning_rate": 0.0005066124993201201,
      "loss": 3.0704,
      "step": 59485
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5945736169815063,
      "learning_rate": 0.0005066095334858292,
      "loss": 3.2107,
      "step": 59486
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.342898964881897,
      "learning_rate": 0.0005066065676131256,
      "loss": 3.0985,
      "step": 59487
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.3740310668945312,
      "learning_rate": 0.00050660360170201,
      "loss": 3.2174,
      "step": 59488
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.5715107917785645,
      "learning_rate": 0.000506600635752483,
      "loss": 2.9011,
      "step": 59489
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.5938467979431152,
      "learning_rate": 0.0005065976697645448,
      "loss": 3.1311,
      "step": 59490
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9359890222549438,
      "learning_rate": 0.0005065947037381964,
      "loss": 3.0392,
      "step": 59491
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.526127338409424,
      "learning_rate": 0.0005065917376734379,
      "loss": 2.971,
      "step": 59492
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.578859806060791,
      "learning_rate": 0.0005065887715702703,
      "loss": 3.1203,
      "step": 59493
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3262218236923218,
      "learning_rate": 0.0005065858054286939,
      "loss": 2.8109,
      "step": 59494
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.8995490074157715,
      "learning_rate": 0.0005065828392487093,
      "loss": 3.0961,
      "step": 59495
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.974989414215088,
      "learning_rate": 0.000506579873030317,
      "loss": 2.9819,
      "step": 59496
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.612985610961914,
      "learning_rate": 0.0005065769067735177,
      "loss": 2.9756,
      "step": 59497
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6881012916564941,
      "learning_rate": 0.0005065739404783117,
      "loss": 3.1925,
      "step": 59498
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.8631138801574707,
      "learning_rate": 0.0005065709741446999,
      "loss": 2.8897,
      "step": 59499
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.8298133611679077,
      "learning_rate": 0.0005065680077726826,
      "loss": 2.8061,
      "step": 59500
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6131646633148193,
      "learning_rate": 0.0005065650413622603,
      "loss": 3.0436,
      "step": 59501
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.217454433441162,
      "learning_rate": 0.0005065620749134338,
      "loss": 2.9408,
      "step": 59502
    },
    {
      "epoch": 0.77,
      "grad_norm": 3.198422431945801,
      "learning_rate": 0.0005065591084262034,
      "loss": 2.7516,
      "step": 59503
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5606542825698853,
      "learning_rate": 0.0005065561419005698,
      "loss": 3.1335,
      "step": 59504
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.7404415607452393,
      "learning_rate": 0.0005065531753365335,
      "loss": 3.1781,
      "step": 59505
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.214444160461426,
      "learning_rate": 0.0005065502087340951,
      "loss": 3.2009,
      "step": 59506
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.3643901348114014,
      "learning_rate": 0.000506547242093255,
      "loss": 3.0291,
      "step": 59507
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4705181121826172,
      "learning_rate": 0.000506544275414014,
      "loss": 3.0086,
      "step": 59508
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.100220203399658,
      "learning_rate": 0.0005065413086963725,
      "loss": 2.8722,
      "step": 59509
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.4767050743103027,
      "learning_rate": 0.0005065383419403311,
      "loss": 2.8958,
      "step": 59510
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.533306360244751,
      "learning_rate": 0.0005065353751458903,
      "loss": 3.1337,
      "step": 59511
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.431275725364685,
      "learning_rate": 0.0005065324083130507,
      "loss": 2.9924,
      "step": 59512
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5739715099334717,
      "learning_rate": 0.0005065294414418127,
      "loss": 3.058,
      "step": 59513
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.0657289028167725,
      "learning_rate": 0.0005065264745321771,
      "loss": 3.2526,
      "step": 59514
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.1895275115966797,
      "learning_rate": 0.0005065235075841442,
      "loss": 3.0788,
      "step": 59515
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.5079597234725952,
      "learning_rate": 0.0005065205405977148,
      "loss": 3.0166,
      "step": 59516
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.577772617340088,
      "learning_rate": 0.0005065175735728894,
      "loss": 3.03,
      "step": 59517
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.9655154943466187,
      "learning_rate": 0.0005065146065096683,
      "loss": 3.2332,
      "step": 59518
    },
    {
      "epoch": 0.77,
      "grad_norm": 1.6616345643997192,
      "learning_rate": 0.0005065116394080524,
      "loss": 3.1383,
      "step": 59519
    },
    {
      "epoch": 0.77,
      "grad_norm": 2.057750701904297,
      "learning_rate": 0.000506508672268042,
      "loss": 3.0127,
      "step": 59520
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7048168182373047,
      "learning_rate": 0.0005065057050896378,
      "loss": 2.9427,
      "step": 59521
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.592615008354187,
      "learning_rate": 0.0005065027378728401,
      "loss": 3.2264,
      "step": 59522
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7134870290756226,
      "learning_rate": 0.00050649977061765,
      "loss": 3.1334,
      "step": 59523
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5099914073944092,
      "learning_rate": 0.0005064968033240674,
      "loss": 3.1245,
      "step": 59524
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5292025804519653,
      "learning_rate": 0.0005064938359920933,
      "loss": 2.9261,
      "step": 59525
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4433834552764893,
      "learning_rate": 0.0005064908686217279,
      "loss": 3.0334,
      "step": 59526
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4300007820129395,
      "learning_rate": 0.0005064879012129721,
      "loss": 3.0468,
      "step": 59527
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.713708758354187,
      "learning_rate": 0.0005064849337658262,
      "loss": 3.046,
      "step": 59528
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9541071653366089,
      "learning_rate": 0.0005064819662802912,
      "loss": 3.067,
      "step": 59529
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.904883623123169,
      "learning_rate": 0.0005064789987563669,
      "loss": 3.1581,
      "step": 59530
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7961405515670776,
      "learning_rate": 0.0005064760311940543,
      "loss": 3.0327,
      "step": 59531
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9580861330032349,
      "learning_rate": 0.0005064730635933541,
      "loss": 3.0873,
      "step": 59532
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6243643760681152,
      "learning_rate": 0.0005064700959542666,
      "loss": 3.2635,
      "step": 59533
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4754574298858643,
      "learning_rate": 0.0005064671282767923,
      "loss": 3.0579,
      "step": 59534
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.326629877090454,
      "learning_rate": 0.000506464160560932,
      "loss": 2.9621,
      "step": 59535
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0858967304229736,
      "learning_rate": 0.0005064611928066861,
      "loss": 2.9087,
      "step": 59536
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9881962537765503,
      "learning_rate": 0.000506458225014055,
      "loss": 2.9218,
      "step": 59537
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.770340919494629,
      "learning_rate": 0.0005064552571830397,
      "loss": 3.0918,
      "step": 59538
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.576996088027954,
      "learning_rate": 0.0005064522893136402,
      "loss": 3.3117,
      "step": 59539
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9901747703552246,
      "learning_rate": 0.0005064493214058574,
      "loss": 3.2152,
      "step": 59540
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4206256866455078,
      "learning_rate": 0.0005064463534596919,
      "loss": 3.2584,
      "step": 59541
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6060516834259033,
      "learning_rate": 0.000506443385475144,
      "loss": 2.7202,
      "step": 59542
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.951735734939575,
      "learning_rate": 0.0005064404174522144,
      "loss": 3.0485,
      "step": 59543
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.8432955741882324,
      "learning_rate": 0.0005064374493909037,
      "loss": 3.1458,
      "step": 59544
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0566444396972656,
      "learning_rate": 0.0005064344812912123,
      "loss": 2.872,
      "step": 59545
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.697587490081787,
      "learning_rate": 0.0005064315131531409,
      "loss": 3.1334,
      "step": 59546
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5235260725021362,
      "learning_rate": 0.0005064285449766898,
      "loss": 3.1918,
      "step": 59547
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.119089126586914,
      "learning_rate": 0.0005064255767618599,
      "loss": 3.1254,
      "step": 59548
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5654199123382568,
      "learning_rate": 0.0005064226085086516,
      "loss": 3.1745,
      "step": 59549
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7266180515289307,
      "learning_rate": 0.0005064196402170654,
      "loss": 3.0652,
      "step": 59550
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8127593994140625,
      "learning_rate": 0.0005064166718871019,
      "loss": 3.0444,
      "step": 59551
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6609257459640503,
      "learning_rate": 0.0005064137035187616,
      "loss": 3.2185,
      "step": 59552
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6341403722763062,
      "learning_rate": 0.0005064107351120451,
      "loss": 2.9297,
      "step": 59553
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6854007244110107,
      "learning_rate": 0.0005064077666669531,
      "loss": 3.1113,
      "step": 59554
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3352967500686646,
      "learning_rate": 0.0005064047981834859,
      "loss": 3.3759,
      "step": 59555
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.385422945022583,
      "learning_rate": 0.000506401829661644,
      "loss": 2.7529,
      "step": 59556
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6089897155761719,
      "learning_rate": 0.0005063988611014284,
      "loss": 3.0368,
      "step": 59557
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6148945093154907,
      "learning_rate": 0.0005063958925028392,
      "loss": 3.3634,
      "step": 59558
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2950701713562012,
      "learning_rate": 0.0005063929238658771,
      "loss": 2.9679,
      "step": 59559
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7905200719833374,
      "learning_rate": 0.0005063899551905426,
      "loss": 2.8458,
      "step": 59560
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2857134342193604,
      "learning_rate": 0.0005063869864768364,
      "loss": 3.2739,
      "step": 59561
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5740222930908203,
      "learning_rate": 0.0005063840177247589,
      "loss": 3.0529,
      "step": 59562
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7487088441848755,
      "learning_rate": 0.0005063810489343108,
      "loss": 3.0482,
      "step": 59563
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.589514970779419,
      "learning_rate": 0.0005063780801054925,
      "loss": 3.0337,
      "step": 59564
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9346683025360107,
      "learning_rate": 0.0005063751112383047,
      "loss": 2.8659,
      "step": 59565
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0297462940216064,
      "learning_rate": 0.0005063721423327479,
      "loss": 2.9932,
      "step": 59566
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8518232107162476,
      "learning_rate": 0.0005063691733888224,
      "loss": 3.0682,
      "step": 59567
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3332998752593994,
      "learning_rate": 0.0005063662044065291,
      "loss": 3.1561,
      "step": 59568
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.1534361839294434,
      "learning_rate": 0.0005063632353858685,
      "loss": 2.9107,
      "step": 59569
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4480475187301636,
      "learning_rate": 0.000506360266326841,
      "loss": 3.1366,
      "step": 59570
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7994002103805542,
      "learning_rate": 0.0005063572972294471,
      "loss": 3.1655,
      "step": 59571
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3468782901763916,
      "learning_rate": 0.0005063543280936878,
      "loss": 3.1435,
      "step": 59572
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.360792398452759,
      "learning_rate": 0.0005063513589195631,
      "loss": 2.9095,
      "step": 59573
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5414047241210938,
      "learning_rate": 0.0005063483897070739,
      "loss": 3.1279,
      "step": 59574
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.5751874446868896,
      "learning_rate": 0.0005063454204562206,
      "loss": 2.9959,
      "step": 59575
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9331820011138916,
      "learning_rate": 0.0005063424511670038,
      "loss": 2.905,
      "step": 59576
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.2633609771728516,
      "learning_rate": 0.000506339481839424,
      "loss": 3.1084,
      "step": 59577
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4234168529510498,
      "learning_rate": 0.0005063365124734819,
      "loss": 3.1238,
      "step": 59578
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.295328378677368,
      "learning_rate": 0.0005063335430691778,
      "loss": 2.8719,
      "step": 59579
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3117828369140625,
      "learning_rate": 0.0005063305736265124,
      "loss": 3.1397,
      "step": 59580
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.02632737159729,
      "learning_rate": 0.0005063276041454863,
      "loss": 2.9328,
      "step": 59581
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5835211277008057,
      "learning_rate": 0.0005063246346261001,
      "loss": 3.0313,
      "step": 59582
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8862608671188354,
      "learning_rate": 0.0005063216650683541,
      "loss": 3.2349,
      "step": 59583
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4701987504959106,
      "learning_rate": 0.0005063186954722492,
      "loss": 2.9631,
      "step": 59584
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.004178047180176,
      "learning_rate": 0.0005063157258377856,
      "loss": 3.0038,
      "step": 59585
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.71812105178833,
      "learning_rate": 0.0005063127561649641,
      "loss": 3.0862,
      "step": 59586
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4398382902145386,
      "learning_rate": 0.000506309786453785,
      "loss": 2.6759,
      "step": 59587
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3909103870391846,
      "learning_rate": 0.0005063068167042492,
      "loss": 3.1754,
      "step": 59588
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7250206470489502,
      "learning_rate": 0.0005063038469163571,
      "loss": 3.0222,
      "step": 59589
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.394054651260376,
      "learning_rate": 0.0005063008770901091,
      "loss": 3.124,
      "step": 59590
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.022528886795044,
      "learning_rate": 0.0005062979072255059,
      "loss": 3.2962,
      "step": 59591
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.641963005065918,
      "learning_rate": 0.000506294937322548,
      "loss": 2.8342,
      "step": 59592
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.795128583908081,
      "learning_rate": 0.000506291967381236,
      "loss": 3.1223,
      "step": 59593
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4922362565994263,
      "learning_rate": 0.0005062889974015704,
      "loss": 3.1992,
      "step": 59594
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.6032626628875732,
      "learning_rate": 0.0005062860273835519,
      "loss": 2.98,
      "step": 59595
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.809084415435791,
      "learning_rate": 0.0005062830573271808,
      "loss": 2.7735,
      "step": 59596
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.872220516204834,
      "learning_rate": 0.0005062800872324579,
      "loss": 3.1218,
      "step": 59597
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8868993520736694,
      "learning_rate": 0.0005062771170993835,
      "loss": 2.9123,
      "step": 59598
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7071664333343506,
      "learning_rate": 0.0005062741469279584,
      "loss": 3.1619,
      "step": 59599
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.799798846244812,
      "learning_rate": 0.000506271176718183,
      "loss": 3.1303,
      "step": 59600
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.7226667404174805,
      "learning_rate": 0.0005062682064700579,
      "loss": 3.0879,
      "step": 59601
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8842681646347046,
      "learning_rate": 0.0005062652361835836,
      "loss": 3.0571,
      "step": 59602
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.485599994659424,
      "learning_rate": 0.0005062622658587607,
      "loss": 2.9185,
      "step": 59603
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7564359903335571,
      "learning_rate": 0.0005062592954955899,
      "loss": 3.3659,
      "step": 59604
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8079878091812134,
      "learning_rate": 0.0005062563250940715,
      "loss": 3.1678,
      "step": 59605
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6684870719909668,
      "learning_rate": 0.0005062533546542062,
      "loss": 3.2642,
      "step": 59606
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7091408967971802,
      "learning_rate": 0.0005062503841759945,
      "loss": 3.0658,
      "step": 59607
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7215436697006226,
      "learning_rate": 0.0005062474136594369,
      "loss": 2.7949,
      "step": 59608
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.483146071434021,
      "learning_rate": 0.000506244443104534,
      "loss": 3.1428,
      "step": 59609
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7754522562026978,
      "learning_rate": 0.0005062414725112864,
      "loss": 3.1943,
      "step": 59610
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0118868350982666,
      "learning_rate": 0.0005062385018796946,
      "loss": 2.9702,
      "step": 59611
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0244555473327637,
      "learning_rate": 0.0005062355312097592,
      "loss": 3.036,
      "step": 59612
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.1324856281280518,
      "learning_rate": 0.0005062325605014806,
      "loss": 2.8845,
      "step": 59613
    },
    {
      "epoch": 0.78,
      "grad_norm": 4.018601417541504,
      "learning_rate": 0.0005062295897548597,
      "loss": 3.0206,
      "step": 59614
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6908564567565918,
      "learning_rate": 0.0005062266189698967,
      "loss": 2.988,
      "step": 59615
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6907774209976196,
      "learning_rate": 0.0005062236481465923,
      "loss": 3.1241,
      "step": 59616
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.203284502029419,
      "learning_rate": 0.0005062206772849471,
      "loss": 2.8488,
      "step": 59617
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.268453359603882,
      "learning_rate": 0.0005062177063849615,
      "loss": 3.008,
      "step": 59618
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.2553863525390625,
      "learning_rate": 0.0005062147354466362,
      "loss": 3.0008,
      "step": 59619
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4142245054244995,
      "learning_rate": 0.0005062117644699714,
      "loss": 3.0856,
      "step": 59620
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.6501219272613525,
      "learning_rate": 0.0005062087934549682,
      "loss": 3.0647,
      "step": 59621
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.72418212890625,
      "learning_rate": 0.000506205822401627,
      "loss": 2.9623,
      "step": 59622
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.690865159034729,
      "learning_rate": 0.0005062028513099481,
      "loss": 3.0898,
      "step": 59623
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6399730443954468,
      "learning_rate": 0.0005061998801799321,
      "loss": 3.3303,
      "step": 59624
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.2159698009490967,
      "learning_rate": 0.0005061969090115798,
      "loss": 3.1666,
      "step": 59625
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4089425802230835,
      "learning_rate": 0.0005061939378048915,
      "loss": 3.0481,
      "step": 59626
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.1986405849456787,
      "learning_rate": 0.000506190966559868,
      "loss": 3.066,
      "step": 59627
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0480172634124756,
      "learning_rate": 0.0005061879952765096,
      "loss": 3.1139,
      "step": 59628
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.464625358581543,
      "learning_rate": 0.000506185023954817,
      "loss": 2.9124,
      "step": 59629
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3440444469451904,
      "learning_rate": 0.0005061820525947907,
      "loss": 3.1339,
      "step": 59630
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.091888666152954,
      "learning_rate": 0.0005061790811964312,
      "loss": 3.1916,
      "step": 59631
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.742741346359253,
      "learning_rate": 0.0005061761097597392,
      "loss": 3.1167,
      "step": 59632
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5649250745773315,
      "learning_rate": 0.0005061731382847151,
      "loss": 3.3282,
      "step": 59633
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8718152046203613,
      "learning_rate": 0.0005061701667713596,
      "loss": 3.2867,
      "step": 59634
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9430097341537476,
      "learning_rate": 0.0005061671952196731,
      "loss": 3.0737,
      "step": 59635
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.943444013595581,
      "learning_rate": 0.0005061642236296563,
      "loss": 2.7525,
      "step": 59636
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.709136724472046,
      "learning_rate": 0.0005061612520013096,
      "loss": 2.8536,
      "step": 59637
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.030698537826538,
      "learning_rate": 0.0005061582803346337,
      "loss": 3.1938,
      "step": 59638
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9974569082260132,
      "learning_rate": 0.000506155308629629,
      "loss": 3.0067,
      "step": 59639
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2028306722640991,
      "learning_rate": 0.0005061523368862963,
      "loss": 2.9361,
      "step": 59640
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5328712463378906,
      "learning_rate": 0.0005061493651046358,
      "loss": 3.0909,
      "step": 59641
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7204456329345703,
      "learning_rate": 0.0005061463932846484,
      "loss": 2.9839,
      "step": 59642
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.4394493103027344,
      "learning_rate": 0.0005061434214263344,
      "loss": 3.0735,
      "step": 59643
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3386573791503906,
      "learning_rate": 0.0005061404495296945,
      "loss": 3.0334,
      "step": 59644
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4677708148956299,
      "learning_rate": 0.0005061374775947291,
      "loss": 2.9795,
      "step": 59645
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.1524341106414795,
      "learning_rate": 0.0005061345056214389,
      "loss": 3.0418,
      "step": 59646
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.406740188598633,
      "learning_rate": 0.0005061315336098245,
      "loss": 3.1,
      "step": 59647
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.2576870918273926,
      "learning_rate": 0.0005061285615598863,
      "loss": 2.9974,
      "step": 59648
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.358932375907898,
      "learning_rate": 0.0005061255894716248,
      "loss": 3.098,
      "step": 59649
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6662664413452148,
      "learning_rate": 0.0005061226173450408,
      "loss": 3.3033,
      "step": 59650
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7985656261444092,
      "learning_rate": 0.0005061196451801347,
      "loss": 3.2326,
      "step": 59651
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.8206002712249756,
      "learning_rate": 0.000506116672976907,
      "loss": 2.9487,
      "step": 59652
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6582973003387451,
      "learning_rate": 0.0005061137007353583,
      "loss": 3.1224,
      "step": 59653
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7436742782592773,
      "learning_rate": 0.0005061107284554892,
      "loss": 3.0001,
      "step": 59654
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9820265769958496,
      "learning_rate": 0.0005061077561373003,
      "loss": 2.93,
      "step": 59655
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.478235960006714,
      "learning_rate": 0.000506104783780792,
      "loss": 2.9222,
      "step": 59656
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8610880374908447,
      "learning_rate": 0.000506101811385965,
      "loss": 3.3077,
      "step": 59657
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.398718237876892,
      "learning_rate": 0.0005060988389528197,
      "loss": 3.1168,
      "step": 59658
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.399664878845215,
      "learning_rate": 0.0005060958664813569,
      "loss": 2.9439,
      "step": 59659
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6376769542694092,
      "learning_rate": 0.0005060928939715768,
      "loss": 3.3815,
      "step": 59660
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.4149110317230225,
      "learning_rate": 0.0005060899214234802,
      "loss": 3.0973,
      "step": 59661
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6284270286560059,
      "learning_rate": 0.0005060869488370675,
      "loss": 3.2093,
      "step": 59662
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3679683208465576,
      "learning_rate": 0.0005060839762123395,
      "loss": 3.1897,
      "step": 59663
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9464386701583862,
      "learning_rate": 0.0005060810035492965,
      "loss": 3.1167,
      "step": 59664
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.9694292545318604,
      "learning_rate": 0.0005060780308479392,
      "loss": 2.9684,
      "step": 59665
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5118597745895386,
      "learning_rate": 0.0005060750581082682,
      "loss": 2.955,
      "step": 59666
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5765622854232788,
      "learning_rate": 0.0005060720853302837,
      "loss": 3.0418,
      "step": 59667
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.1877598762512207,
      "learning_rate": 0.0005060691125139868,
      "loss": 3.0993,
      "step": 59668
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.436425805091858,
      "learning_rate": 0.0005060661396593776,
      "loss": 3.1724,
      "step": 59669
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.626296043395996,
      "learning_rate": 0.0005060631667664568,
      "loss": 3.1726,
      "step": 59670
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3330698013305664,
      "learning_rate": 0.000506060193835225,
      "loss": 3.0363,
      "step": 59671
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8141789436340332,
      "learning_rate": 0.0005060572208656828,
      "loss": 3.1529,
      "step": 59672
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6537811756134033,
      "learning_rate": 0.0005060542478578305,
      "loss": 2.9895,
      "step": 59673
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.794822335243225,
      "learning_rate": 0.000506051274811669,
      "loss": 3.0583,
      "step": 59674
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.056027889251709,
      "learning_rate": 0.0005060483017271986,
      "loss": 2.9486,
      "step": 59675
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0052907466888428,
      "learning_rate": 0.0005060453286044199,
      "loss": 3.2768,
      "step": 59676
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5776376724243164,
      "learning_rate": 0.0005060423554433336,
      "loss": 2.7744,
      "step": 59677
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0160977840423584,
      "learning_rate": 0.00050603938224394,
      "loss": 3.2173,
      "step": 59678
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8353430032730103,
      "learning_rate": 0.0005060364090062399,
      "loss": 3.1,
      "step": 59679
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.871508002281189,
      "learning_rate": 0.0005060334357302336,
      "loss": 3.05,
      "step": 59680
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.114720106124878,
      "learning_rate": 0.0005060304624159219,
      "loss": 3.1004,
      "step": 59681
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.380579710006714,
      "learning_rate": 0.0005060274890633053,
      "loss": 2.9045,
      "step": 59682
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.006869077682495,
      "learning_rate": 0.0005060245156723842,
      "loss": 3.1947,
      "step": 59683
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6231595277786255,
      "learning_rate": 0.0005060215422431593,
      "loss": 2.9149,
      "step": 59684
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.2745792865753174,
      "learning_rate": 0.000506018568775631,
      "loss": 2.8479,
      "step": 59685
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.546156167984009,
      "learning_rate": 0.0005060155952698001,
      "loss": 3.0312,
      "step": 59686
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7654142379760742,
      "learning_rate": 0.000506012621725667,
      "loss": 3.152,
      "step": 59687
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.413140296936035,
      "learning_rate": 0.0005060096481432323,
      "loss": 3.1772,
      "step": 59688
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.401451349258423,
      "learning_rate": 0.0005060066745224964,
      "loss": 3.0078,
      "step": 59689
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6798735857009888,
      "learning_rate": 0.00050600370086346,
      "loss": 2.8072,
      "step": 59690
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5316786766052246,
      "learning_rate": 0.0005060007271661236,
      "loss": 3.1357,
      "step": 59691
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5784106254577637,
      "learning_rate": 0.0005059977534304878,
      "loss": 3.0476,
      "step": 59692
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5029678344726562,
      "learning_rate": 0.0005059947796565532,
      "loss": 3.1144,
      "step": 59693
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4288297891616821,
      "learning_rate": 0.0005059918058443202,
      "loss": 2.6669,
      "step": 59694
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7263211011886597,
      "learning_rate": 0.0005059888319937895,
      "loss": 2.9443,
      "step": 59695
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6012250185012817,
      "learning_rate": 0.0005059858581049615,
      "loss": 3.2152,
      "step": 59696
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6379563808441162,
      "learning_rate": 0.0005059828841778369,
      "loss": 3.2082,
      "step": 59697
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.65406334400177,
      "learning_rate": 0.0005059799102124162,
      "loss": 3.0367,
      "step": 59698
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6048171520233154,
      "learning_rate": 0.0005059769362087,
      "loss": 2.9726,
      "step": 59699
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4187566041946411,
      "learning_rate": 0.0005059739621666887,
      "loss": 3.1022,
      "step": 59700
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4188586473464966,
      "learning_rate": 0.000505970988086383,
      "loss": 3.0591,
      "step": 59701
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.911578893661499,
      "learning_rate": 0.0005059680139677833,
      "loss": 3.0736,
      "step": 59702
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.879555583000183,
      "learning_rate": 0.0005059650398108904,
      "loss": 3.0156,
      "step": 59703
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6277188062667847,
      "learning_rate": 0.0005059620656157047,
      "loss": 3.1996,
      "step": 59704
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6597073078155518,
      "learning_rate": 0.0005059590913822266,
      "loss": 3.3162,
      "step": 59705
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.764032006263733,
      "learning_rate": 0.0005059561171104571,
      "loss": 2.9107,
      "step": 59706
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5434587001800537,
      "learning_rate": 0.0005059531428003961,
      "loss": 2.8794,
      "step": 59707
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5259757041931152,
      "learning_rate": 0.0005059501684520449,
      "loss": 2.8867,
      "step": 59708
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5385116338729858,
      "learning_rate": 0.0005059471940654035,
      "loss": 3.14,
      "step": 59709
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8551281690597534,
      "learning_rate": 0.0005059442196404725,
      "loss": 2.8936,
      "step": 59710
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.462341547012329,
      "learning_rate": 0.0005059412451772528,
      "loss": 2.975,
      "step": 59711
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6868990659713745,
      "learning_rate": 0.0005059382706757447,
      "loss": 3.1312,
      "step": 59712
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.445710301399231,
      "learning_rate": 0.0005059352961359488,
      "loss": 3.0367,
      "step": 59713
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5157098770141602,
      "learning_rate": 0.0005059323215578656,
      "loss": 2.8826,
      "step": 59714
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.402106761932373,
      "learning_rate": 0.0005059293469414956,
      "loss": 3.0634,
      "step": 59715
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.577142596244812,
      "learning_rate": 0.0005059263722868396,
      "loss": 2.9991,
      "step": 59716
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.336319923400879,
      "learning_rate": 0.0005059233975938978,
      "loss": 3.0766,
      "step": 59717
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2856117486953735,
      "learning_rate": 0.0005059204228626712,
      "loss": 3.1624,
      "step": 59718
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6857420206069946,
      "learning_rate": 0.00050591744809316,
      "loss": 3.0602,
      "step": 59719
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4725559949874878,
      "learning_rate": 0.0005059144732853648,
      "loss": 3.1717,
      "step": 59720
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5819646120071411,
      "learning_rate": 0.0005059114984392862,
      "loss": 3.1258,
      "step": 59721
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2496261596679688,
      "learning_rate": 0.0005059085235549249,
      "loss": 3.0082,
      "step": 59722
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3989083766937256,
      "learning_rate": 0.0005059055486322812,
      "loss": 3.2528,
      "step": 59723
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.871429681777954,
      "learning_rate": 0.0005059025736713558,
      "loss": 3.0713,
      "step": 59724
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.4650557041168213,
      "learning_rate": 0.0005058995986721493,
      "loss": 2.9568,
      "step": 59725
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6012580394744873,
      "learning_rate": 0.0005058966236346621,
      "loss": 3.0533,
      "step": 59726
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4686139822006226,
      "learning_rate": 0.0005058936485588948,
      "loss": 3.3064,
      "step": 59727
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7336581945419312,
      "learning_rate": 0.000505890673444848,
      "loss": 3.2024,
      "step": 59728
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4013696908950806,
      "learning_rate": 0.0005058876982925223,
      "loss": 3.0821,
      "step": 59729
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3486435413360596,
      "learning_rate": 0.0005058847231019181,
      "loss": 3.2996,
      "step": 59730
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.542554259300232,
      "learning_rate": 0.0005058817478730361,
      "loss": 3.0434,
      "step": 59731
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.568856954574585,
      "learning_rate": 0.0005058787726058768,
      "loss": 3.2247,
      "step": 59732
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5289196968078613,
      "learning_rate": 0.0005058757973004408,
      "loss": 3.0978,
      "step": 59733
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7394583225250244,
      "learning_rate": 0.0005058728219567286,
      "loss": 2.9799,
      "step": 59734
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.635392665863037,
      "learning_rate": 0.0005058698465747406,
      "loss": 3.2266,
      "step": 59735
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9433320760726929,
      "learning_rate": 0.0005058668711544778,
      "loss": 2.9686,
      "step": 59736
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8758811950683594,
      "learning_rate": 0.0005058638956959402,
      "loss": 3.2615,
      "step": 59737
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7662423849105835,
      "learning_rate": 0.0005058609201991286,
      "loss": 3.1175,
      "step": 59738
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.529327869415283,
      "learning_rate": 0.0005058579446640437,
      "loss": 2.9753,
      "step": 59739
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4864680767059326,
      "learning_rate": 0.0005058549690906859,
      "loss": 3.1161,
      "step": 59740
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7823243141174316,
      "learning_rate": 0.0005058519934790557,
      "loss": 3.0934,
      "step": 59741
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.2147696018218994,
      "learning_rate": 0.0005058490178291539,
      "loss": 3.1006,
      "step": 59742
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6601898670196533,
      "learning_rate": 0.0005058460421409807,
      "loss": 3.0224,
      "step": 59743
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8742438554763794,
      "learning_rate": 0.0005058430664145369,
      "loss": 2.9461,
      "step": 59744
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3870742321014404,
      "learning_rate": 0.0005058400906498231,
      "loss": 3.0494,
      "step": 59745
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8735815286636353,
      "learning_rate": 0.0005058371148468396,
      "loss": 3.2447,
      "step": 59746
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5197290182113647,
      "learning_rate": 0.000505834139005587,
      "loss": 2.9209,
      "step": 59747
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.073061943054199,
      "learning_rate": 0.0005058311631260662,
      "loss": 3.0362,
      "step": 59748
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3241326808929443,
      "learning_rate": 0.0005058281872082774,
      "loss": 3.0021,
      "step": 59749
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3601690530776978,
      "learning_rate": 0.0005058252112522212,
      "loss": 2.8128,
      "step": 59750
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5020760297775269,
      "learning_rate": 0.0005058222352578983,
      "loss": 3.2591,
      "step": 59751
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8607968091964722,
      "learning_rate": 0.0005058192592253091,
      "loss": 3.107,
      "step": 59752
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4455358982086182,
      "learning_rate": 0.0005058162831544542,
      "loss": 2.9403,
      "step": 59753
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6415973901748657,
      "learning_rate": 0.0005058133070453341,
      "loss": 2.9836,
      "step": 59754
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5461900234222412,
      "learning_rate": 0.0005058103308979495,
      "loss": 2.9123,
      "step": 59755
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.4405200481414795,
      "learning_rate": 0.000505807354712301,
      "loss": 2.9326,
      "step": 59756
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6593072414398193,
      "learning_rate": 0.0005058043784883888,
      "loss": 2.9879,
      "step": 59757
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.610834002494812,
      "learning_rate": 0.0005058014022262139,
      "loss": 3.1234,
      "step": 59758
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4491060972213745,
      "learning_rate": 0.0005057984259257765,
      "loss": 3.0474,
      "step": 59759
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6742430925369263,
      "learning_rate": 0.0005057954495870774,
      "loss": 2.9226,
      "step": 59760
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9003530740737915,
      "learning_rate": 0.000505792473210117,
      "loss": 2.6799,
      "step": 59761
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5889843702316284,
      "learning_rate": 0.0005057894967948958,
      "loss": 2.7945,
      "step": 59762
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.56354820728302,
      "learning_rate": 0.0005057865203414144,
      "loss": 3.0383,
      "step": 59763
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4504567384719849,
      "learning_rate": 0.0005057835438496735,
      "loss": 3.0948,
      "step": 59764
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0626068115234375,
      "learning_rate": 0.0005057805673196736,
      "loss": 2.8676,
      "step": 59765
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5673351287841797,
      "learning_rate": 0.0005057775907514151,
      "loss": 2.9644,
      "step": 59766
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4091914892196655,
      "learning_rate": 0.0005057746141448988,
      "loss": 3.1473,
      "step": 59767
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.769998550415039,
      "learning_rate": 0.0005057716375001251,
      "loss": 3.1301,
      "step": 59768
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4065831899642944,
      "learning_rate": 0.0005057686608170945,
      "loss": 2.8996,
      "step": 59769
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5646589994430542,
      "learning_rate": 0.0005057656840958076,
      "loss": 3.0312,
      "step": 59770
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5173453092575073,
      "learning_rate": 0.000505762707336265,
      "loss": 3.2597,
      "step": 59771
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7079532146453857,
      "learning_rate": 0.0005057597305384672,
      "loss": 3.0716,
      "step": 59772
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.174586772918701,
      "learning_rate": 0.0005057567537024148,
      "loss": 2.7303,
      "step": 59773
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5555073022842407,
      "learning_rate": 0.0005057537768281084,
      "loss": 3.1509,
      "step": 59774
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9151637554168701,
      "learning_rate": 0.0005057507999155484,
      "loss": 2.8606,
      "step": 59775
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6679714918136597,
      "learning_rate": 0.0005057478229647355,
      "loss": 3.064,
      "step": 59776
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.206509828567505,
      "learning_rate": 0.0005057448459756702,
      "loss": 3.0932,
      "step": 59777
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7102521657943726,
      "learning_rate": 0.000505741868948353,
      "loss": 2.9537,
      "step": 59778
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4323612451553345,
      "learning_rate": 0.0005057388918827846,
      "loss": 3.4001,
      "step": 59779
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.674769401550293,
      "learning_rate": 0.0005057359147789652,
      "loss": 3.1272,
      "step": 59780
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6596224308013916,
      "learning_rate": 0.0005057329376368959,
      "loss": 2.895,
      "step": 59781
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.592393159866333,
      "learning_rate": 0.0005057299604565769,
      "loss": 3.0732,
      "step": 59782
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.53659987449646,
      "learning_rate": 0.0005057269832380088,
      "loss": 2.948,
      "step": 59783
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6447267532348633,
      "learning_rate": 0.0005057240059811921,
      "loss": 3.2078,
      "step": 59784
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4764001369476318,
      "learning_rate": 0.0005057210286861276,
      "loss": 2.9651,
      "step": 59785
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.517103910446167,
      "learning_rate": 0.0005057180513528155,
      "loss": 3.057,
      "step": 59786
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6878620386123657,
      "learning_rate": 0.0005057150739812565,
      "loss": 3.0209,
      "step": 59787
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4471687078475952,
      "learning_rate": 0.0005057120965714513,
      "loss": 2.9897,
      "step": 59788
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6465122699737549,
      "learning_rate": 0.0005057091191234004,
      "loss": 2.8368,
      "step": 59789
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9320117235183716,
      "learning_rate": 0.0005057061416371041,
      "loss": 3.0327,
      "step": 59790
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5651119947433472,
      "learning_rate": 0.0005057031641125632,
      "loss": 2.9806,
      "step": 59791
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8187061548233032,
      "learning_rate": 0.0005057001865497783,
      "loss": 3.0122,
      "step": 59792
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5449090003967285,
      "learning_rate": 0.0005056972089487497,
      "loss": 2.807,
      "step": 59793
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.0521092414855957,
      "learning_rate": 0.0005056942313094783,
      "loss": 2.8892,
      "step": 59794
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.4800477027893066,
      "learning_rate": 0.0005056912536319643,
      "loss": 2.908,
      "step": 59795
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.5596015453338623,
      "learning_rate": 0.0005056882759162085,
      "loss": 3.0704,
      "step": 59796
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9013655185699463,
      "learning_rate": 0.0005056852981622114,
      "loss": 2.9351,
      "step": 59797
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8125745058059692,
      "learning_rate": 0.0005056823203699734,
      "loss": 3.0831,
      "step": 59798
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6009830236434937,
      "learning_rate": 0.0005056793425394952,
      "loss": 3.2388,
      "step": 59799
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4976545572280884,
      "learning_rate": 0.0005056763646707774,
      "loss": 2.9845,
      "step": 59800
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4860256910324097,
      "learning_rate": 0.0005056733867638205,
      "loss": 3.1428,
      "step": 59801
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5457117557525635,
      "learning_rate": 0.000505670408818625,
      "loss": 2.9671,
      "step": 59802
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8822988271713257,
      "learning_rate": 0.0005056674308351914,
      "loss": 2.9557,
      "step": 59803
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.4217162132263184,
      "learning_rate": 0.0005056644528135204,
      "loss": 2.9542,
      "step": 59804
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5459691286087036,
      "learning_rate": 0.0005056614747536125,
      "loss": 3.1838,
      "step": 59805
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.593224287033081,
      "learning_rate": 0.0005056584966554682,
      "loss": 2.9595,
      "step": 59806
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.434333324432373,
      "learning_rate": 0.0005056555185190883,
      "loss": 3.0806,
      "step": 59807
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.422229290008545,
      "learning_rate": 0.0005056525403444729,
      "loss": 2.9733,
      "step": 59808
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4571095705032349,
      "learning_rate": 0.000505649562131623,
      "loss": 2.9772,
      "step": 59809
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.651237964630127,
      "learning_rate": 0.0005056465838805389,
      "loss": 2.9222,
      "step": 59810
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8361870050430298,
      "learning_rate": 0.0005056436055912211,
      "loss": 2.9701,
      "step": 59811
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4840070009231567,
      "learning_rate": 0.0005056406272636704,
      "loss": 2.984,
      "step": 59812
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2838969230651855,
      "learning_rate": 0.0005056376488978872,
      "loss": 3.1268,
      "step": 59813
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5012459754943848,
      "learning_rate": 0.000505634670493872,
      "loss": 2.8407,
      "step": 59814
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3687715530395508,
      "learning_rate": 0.0005056316920516256,
      "loss": 3.0572,
      "step": 59815
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4482171535491943,
      "learning_rate": 0.0005056287135711483,
      "loss": 2.8111,
      "step": 59816
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6748533248901367,
      "learning_rate": 0.0005056257350524408,
      "loss": 3.065,
      "step": 59817
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8320422172546387,
      "learning_rate": 0.0005056227564955036,
      "loss": 2.8392,
      "step": 59818
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.530246615409851,
      "learning_rate": 0.0005056197779003371,
      "loss": 2.9813,
      "step": 59819
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7606369256973267,
      "learning_rate": 0.0005056167992669421,
      "loss": 3.0124,
      "step": 59820
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6974399089813232,
      "learning_rate": 0.000505613820595319,
      "loss": 2.943,
      "step": 59821
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4966621398925781,
      "learning_rate": 0.0005056108418854685,
      "loss": 2.9223,
      "step": 59822
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4493346214294434,
      "learning_rate": 0.000505607863137391,
      "loss": 3.1842,
      "step": 59823
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5869615077972412,
      "learning_rate": 0.0005056048843510871,
      "loss": 3.0906,
      "step": 59824
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5914762020111084,
      "learning_rate": 0.0005056019055265574,
      "loss": 3.0732,
      "step": 59825
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.809647798538208,
      "learning_rate": 0.0005055989266638024,
      "loss": 3.0584,
      "step": 59826
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9943562746047974,
      "learning_rate": 0.0005055959477628228,
      "loss": 3.0652,
      "step": 59827
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5537017583847046,
      "learning_rate": 0.000505592968823619,
      "loss": 2.9797,
      "step": 59828
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6774028539657593,
      "learning_rate": 0.0005055899898461914,
      "loss": 2.8917,
      "step": 59829
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6532483100891113,
      "learning_rate": 0.0005055870108305408,
      "loss": 3.2336,
      "step": 59830
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8640096187591553,
      "learning_rate": 0.000505584031776668,
      "loss": 3.0288,
      "step": 59831
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5247207880020142,
      "learning_rate": 0.0005055810526845729,
      "loss": 2.7881,
      "step": 59832
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6383706331253052,
      "learning_rate": 0.0005055780735542565,
      "loss": 3.0128,
      "step": 59833
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5424840450286865,
      "learning_rate": 0.0005055750943857193,
      "loss": 3.1254,
      "step": 59834
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4288424253463745,
      "learning_rate": 0.0005055721151789618,
      "loss": 3.0535,
      "step": 59835
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.092306137084961,
      "learning_rate": 0.0005055691359339845,
      "loss": 2.9214,
      "step": 59836
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.4093127250671387,
      "learning_rate": 0.0005055661566507881,
      "loss": 3.3084,
      "step": 59837
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8215383291244507,
      "learning_rate": 0.0005055631773293731,
      "loss": 2.9859,
      "step": 59838
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.4453859329223633,
      "learning_rate": 0.0005055601979697398,
      "loss": 3.1171,
      "step": 59839
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.1515538692474365,
      "learning_rate": 0.0005055572185718892,
      "loss": 2.9854,
      "step": 59840
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.559518814086914,
      "learning_rate": 0.0005055542391358216,
      "loss": 3.1993,
      "step": 59841
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5755637884140015,
      "learning_rate": 0.0005055512596615375,
      "loss": 3.3178,
      "step": 59842
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.450859546661377,
      "learning_rate": 0.0005055482801490376,
      "loss": 2.7636,
      "step": 59843
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7306724786758423,
      "learning_rate": 0.0005055453005983223,
      "loss": 3.1329,
      "step": 59844
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3468916416168213,
      "learning_rate": 0.0005055423210093923,
      "loss": 3.2409,
      "step": 59845
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6888052225112915,
      "learning_rate": 0.0005055393413822483,
      "loss": 2.8993,
      "step": 59846
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5881907939910889,
      "learning_rate": 0.0005055363617168905,
      "loss": 2.9543,
      "step": 59847
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.602882742881775,
      "learning_rate": 0.0005055333820133196,
      "loss": 2.8997,
      "step": 59848
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5732319355010986,
      "learning_rate": 0.0005055304022715361,
      "loss": 2.9779,
      "step": 59849
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7697389125823975,
      "learning_rate": 0.0005055274224915407,
      "loss": 3.1271,
      "step": 59850
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4032933712005615,
      "learning_rate": 0.0005055244426733339,
      "loss": 3.2358,
      "step": 59851
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.942234992980957,
      "learning_rate": 0.0005055214628169162,
      "loss": 3.0346,
      "step": 59852
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6634927988052368,
      "learning_rate": 0.0005055184829222881,
      "loss": 2.9482,
      "step": 59853
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5119645595550537,
      "learning_rate": 0.0005055155029894503,
      "loss": 2.7761,
      "step": 59854
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4427556991577148,
      "learning_rate": 0.0005055125230184033,
      "loss": 3.1199,
      "step": 59855
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6357643604278564,
      "learning_rate": 0.0005055095430091476,
      "loss": 3.1116,
      "step": 59856
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.763278603553772,
      "learning_rate": 0.0005055065629616839,
      "loss": 3.0444,
      "step": 59857
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4918630123138428,
      "learning_rate": 0.0005055035828760126,
      "loss": 2.9544,
      "step": 59858
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5750806331634521,
      "learning_rate": 0.0005055006027521343,
      "loss": 3.1144,
      "step": 59859
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5438876152038574,
      "learning_rate": 0.0005054976225900494,
      "loss": 2.9975,
      "step": 59860
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.9338135719299316,
      "learning_rate": 0.0005054946423897589,
      "loss": 2.8778,
      "step": 59861
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.7766916751861572,
      "learning_rate": 0.0005054916621512629,
      "loss": 3.0714,
      "step": 59862
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.85072922706604,
      "learning_rate": 0.0005054886818745621,
      "loss": 3.007,
      "step": 59863
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.2117607593536377,
      "learning_rate": 0.0005054857015596572,
      "loss": 3.2679,
      "step": 59864
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8511453866958618,
      "learning_rate": 0.0005054827212065485,
      "loss": 3.0504,
      "step": 59865
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6268526315689087,
      "learning_rate": 0.0005054797408152367,
      "loss": 2.9473,
      "step": 59866
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6930181980133057,
      "learning_rate": 0.0005054767603857224,
      "loss": 2.8543,
      "step": 59867
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4612585306167603,
      "learning_rate": 0.0005054737799180061,
      "loss": 2.9083,
      "step": 59868
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3314940929412842,
      "learning_rate": 0.0005054707994120883,
      "loss": 3.0835,
      "step": 59869
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4760621786117554,
      "learning_rate": 0.0005054678188679696,
      "loss": 3.1241,
      "step": 59870
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3133628368377686,
      "learning_rate": 0.0005054648382856505,
      "loss": 2.8865,
      "step": 59871
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.613904595375061,
      "learning_rate": 0.0005054618576651317,
      "loss": 3.0223,
      "step": 59872
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3854944705963135,
      "learning_rate": 0.0005054588770064136,
      "loss": 2.936,
      "step": 59873
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4290800094604492,
      "learning_rate": 0.0005054558963094969,
      "loss": 2.9948,
      "step": 59874
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5327157974243164,
      "learning_rate": 0.0005054529155743819,
      "loss": 2.9048,
      "step": 59875
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.709236741065979,
      "learning_rate": 0.0005054499348010696,
      "loss": 2.9847,
      "step": 59876
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.886370062828064,
      "learning_rate": 0.0005054469539895601,
      "loss": 2.8194,
      "step": 59877
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.2639827728271484,
      "learning_rate": 0.0005054439731398541,
      "loss": 3.0918,
      "step": 59878
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.7761409282684326,
      "learning_rate": 0.0005054409922519523,
      "loss": 3.2549,
      "step": 59879
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7724459171295166,
      "learning_rate": 0.0005054380113258551,
      "loss": 3.124,
      "step": 59880
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.495658278465271,
      "learning_rate": 0.000505435030361563,
      "loss": 3.0113,
      "step": 59881
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.002452850341797,
      "learning_rate": 0.0005054320493590768,
      "loss": 3.1685,
      "step": 59882
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.11238956451416,
      "learning_rate": 0.0005054290683183969,
      "loss": 3.1183,
      "step": 59883
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.748676300048828,
      "learning_rate": 0.0005054260872395237,
      "loss": 3.0462,
      "step": 59884
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6953479051589966,
      "learning_rate": 0.000505423106122458,
      "loss": 2.9446,
      "step": 59885
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7592155933380127,
      "learning_rate": 0.0005054201249672003,
      "loss": 2.9953,
      "step": 59886
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.7948927879333496,
      "learning_rate": 0.000505417143773751,
      "loss": 2.7081,
      "step": 59887
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.641945719718933,
      "learning_rate": 0.0005054141625421109,
      "loss": 3.0534,
      "step": 59888
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.720122218132019,
      "learning_rate": 0.0005054111812722805,
      "loss": 2.9466,
      "step": 59889
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.495640993118286,
      "learning_rate": 0.0005054081999642602,
      "loss": 3.0061,
      "step": 59890
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.029841661453247,
      "learning_rate": 0.0005054052186180505,
      "loss": 3.0186,
      "step": 59891
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7750351428985596,
      "learning_rate": 0.0005054022372336523,
      "loss": 3.0761,
      "step": 59892
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5093029737472534,
      "learning_rate": 0.0005053992558110658,
      "loss": 3.0823,
      "step": 59893
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.2553505897521973,
      "learning_rate": 0.0005053962743502918,
      "loss": 2.833,
      "step": 59894
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7272841930389404,
      "learning_rate": 0.0005053932928513307,
      "loss": 2.7836,
      "step": 59895
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5089353322982788,
      "learning_rate": 0.0005053903113141832,
      "loss": 2.8932,
      "step": 59896
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.105651378631592,
      "learning_rate": 0.0005053873297388496,
      "loss": 3.0468,
      "step": 59897
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8971633911132812,
      "learning_rate": 0.0005053843481253308,
      "loss": 2.9672,
      "step": 59898
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5511325597763062,
      "learning_rate": 0.000505381366473627,
      "loss": 2.8507,
      "step": 59899
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0207808017730713,
      "learning_rate": 0.000505378384783739,
      "loss": 3.0932,
      "step": 59900
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.63567054271698,
      "learning_rate": 0.0005053754030556674,
      "loss": 2.902,
      "step": 59901
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7229816913604736,
      "learning_rate": 0.0005053724212894125,
      "loss": 2.9703,
      "step": 59902
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.708867073059082,
      "learning_rate": 0.000505369439484975,
      "loss": 2.8449,
      "step": 59903
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9565669298171997,
      "learning_rate": 0.0005053664576423555,
      "loss": 2.9307,
      "step": 59904
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4383831024169922,
      "learning_rate": 0.0005053634757615544,
      "loss": 3.2477,
      "step": 59905
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5495128631591797,
      "learning_rate": 0.0005053604938425725,
      "loss": 2.8297,
      "step": 59906
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.487828254699707,
      "learning_rate": 0.00050535751188541,
      "loss": 2.8363,
      "step": 59907
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4507036209106445,
      "learning_rate": 0.0005053545298900678,
      "loss": 2.9894,
      "step": 59908
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.550498127937317,
      "learning_rate": 0.0005053515478565463,
      "loss": 2.9268,
      "step": 59909
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8172225952148438,
      "learning_rate": 0.000505348565784846,
      "loss": 2.9712,
      "step": 59910
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.545079231262207,
      "learning_rate": 0.0005053455836749678,
      "loss": 3.3005,
      "step": 59911
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8832709789276123,
      "learning_rate": 0.0005053426015269116,
      "loss": 3.2697,
      "step": 59912
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6464803218841553,
      "learning_rate": 0.0005053396193406786,
      "loss": 3.2725,
      "step": 59913
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.566143751144409,
      "learning_rate": 0.0005053366371162689,
      "loss": 3.0262,
      "step": 59914
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8219518661499023,
      "learning_rate": 0.0005053336548536834,
      "loss": 3.1528,
      "step": 59915
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8073011636734009,
      "learning_rate": 0.0005053306725529224,
      "loss": 3.0133,
      "step": 59916
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.8476481437683105,
      "learning_rate": 0.0005053276902139866,
      "loss": 3.062,
      "step": 59917
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8012984991073608,
      "learning_rate": 0.0005053247078368765,
      "loss": 3.1396,
      "step": 59918
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.226205348968506,
      "learning_rate": 0.0005053217254215927,
      "loss": 2.8516,
      "step": 59919
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.256101369857788,
      "learning_rate": 0.0005053187429681356,
      "loss": 3.1509,
      "step": 59920
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6150941848754883,
      "learning_rate": 0.000505315760476506,
      "loss": 3.0245,
      "step": 59921
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8934592008590698,
      "learning_rate": 0.0005053127779467043,
      "loss": 3.165,
      "step": 59922
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.8373591899871826,
      "learning_rate": 0.0005053097953787311,
      "loss": 3.0069,
      "step": 59923
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8657019138336182,
      "learning_rate": 0.0005053068127725868,
      "loss": 3.1679,
      "step": 59924
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9960962533950806,
      "learning_rate": 0.0005053038301282722,
      "loss": 2.9341,
      "step": 59925
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9861482381820679,
      "learning_rate": 0.0005053008474457877,
      "loss": 3.0999,
      "step": 59926
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3805853128433228,
      "learning_rate": 0.0005052978647251339,
      "loss": 2.9721,
      "step": 59927
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7227275371551514,
      "learning_rate": 0.0005052948819663114,
      "loss": 3.2295,
      "step": 59928
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8351049423217773,
      "learning_rate": 0.0005052918991693206,
      "loss": 2.9517,
      "step": 59929
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.598823070526123,
      "learning_rate": 0.0005052889163341623,
      "loss": 3.0599,
      "step": 59930
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4639289379119873,
      "learning_rate": 0.0005052859334608368,
      "loss": 3.0382,
      "step": 59931
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4056951999664307,
      "learning_rate": 0.0005052829505493449,
      "loss": 3.1016,
      "step": 59932
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.5248758792877197,
      "learning_rate": 0.0005052799675996869,
      "loss": 3.0645,
      "step": 59933
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.515601396560669,
      "learning_rate": 0.0005052769846118634,
      "loss": 3.2329,
      "step": 59934
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6828961372375488,
      "learning_rate": 0.0005052740015858752,
      "loss": 2.9829,
      "step": 59935
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4443711042404175,
      "learning_rate": 0.0005052710185217226,
      "loss": 2.9553,
      "step": 59936
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4146610498428345,
      "learning_rate": 0.0005052680354194064,
      "loss": 2.9561,
      "step": 59937
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2634127140045166,
      "learning_rate": 0.0005052650522789268,
      "loss": 2.9649,
      "step": 59938
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.611016869544983,
      "learning_rate": 0.0005052620691002847,
      "loss": 2.7933,
      "step": 59939
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6307846307754517,
      "learning_rate": 0.0005052590858834804,
      "loss": 3.0008,
      "step": 59940
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5338743925094604,
      "learning_rate": 0.0005052561026285146,
      "loss": 3.2448,
      "step": 59941
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.468229055404663,
      "learning_rate": 0.0005052531193353878,
      "loss": 3.1699,
      "step": 59942
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5937951803207397,
      "learning_rate": 0.0005052501360041006,
      "loss": 2.8429,
      "step": 59943
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5080921649932861,
      "learning_rate": 0.0005052471526346535,
      "loss": 2.9003,
      "step": 59944
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3388830423355103,
      "learning_rate": 0.000505244169227047,
      "loss": 3.1012,
      "step": 59945
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5227981805801392,
      "learning_rate": 0.000505241185781282,
      "loss": 3.1306,
      "step": 59946
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4331697225570679,
      "learning_rate": 0.0005052382022973586,
      "loss": 3.2042,
      "step": 59947
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6116812229156494,
      "learning_rate": 0.0005052352187752776,
      "loss": 3.0402,
      "step": 59948
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.212372064590454,
      "learning_rate": 0.0005052322352150394,
      "loss": 2.9986,
      "step": 59949
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6152442693710327,
      "learning_rate": 0.0005052292516166447,
      "loss": 3.0033,
      "step": 59950
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6998717784881592,
      "learning_rate": 0.0005052262679800941,
      "loss": 3.0049,
      "step": 59951
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.419468402862549,
      "learning_rate": 0.0005052232843053879,
      "loss": 2.7562,
      "step": 59952
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.530790328979492,
      "learning_rate": 0.0005052203005925269,
      "loss": 3.1272,
      "step": 59953
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.361114978790283,
      "learning_rate": 0.0005052173168415115,
      "loss": 2.9122,
      "step": 59954
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.2397899627685547,
      "learning_rate": 0.0005052143330523425,
      "loss": 3.0679,
      "step": 59955
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.247105598449707,
      "learning_rate": 0.0005052113492250202,
      "loss": 3.0813,
      "step": 59956
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6667414903640747,
      "learning_rate": 0.0005052083653595452,
      "loss": 3.068,
      "step": 59957
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.1699862480163574,
      "learning_rate": 0.0005052053814559181,
      "loss": 3.1625,
      "step": 59958
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.460814356803894,
      "learning_rate": 0.0005052023975141394,
      "loss": 2.9559,
      "step": 59959
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2625665664672852,
      "learning_rate": 0.0005051994135342098,
      "loss": 3.1314,
      "step": 59960
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0854265689849854,
      "learning_rate": 0.0005051964295161297,
      "loss": 3.0478,
      "step": 59961
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6387156248092651,
      "learning_rate": 0.0005051934454598997,
      "loss": 3.051,
      "step": 59962
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3680319786071777,
      "learning_rate": 0.0005051904613655204,
      "loss": 3.2251,
      "step": 59963
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4898923635482788,
      "learning_rate": 0.0005051874772329922,
      "loss": 2.9335,
      "step": 59964
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8508262634277344,
      "learning_rate": 0.000505184493062316,
      "loss": 2.95,
      "step": 59965
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4518638849258423,
      "learning_rate": 0.0005051815088534921,
      "loss": 3.0239,
      "step": 59966
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3132898807525635,
      "learning_rate": 0.0005051785246065209,
      "loss": 3.0451,
      "step": 59967
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7629483938217163,
      "learning_rate": 0.0005051755403214032,
      "loss": 3.0014,
      "step": 59968
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5100291967391968,
      "learning_rate": 0.0005051725559981396,
      "loss": 3.0901,
      "step": 59969
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4586809873580933,
      "learning_rate": 0.0005051695716367304,
      "loss": 3.1872,
      "step": 59970
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4655123949050903,
      "learning_rate": 0.0005051665872371763,
      "loss": 3.1488,
      "step": 59971
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7008318901062012,
      "learning_rate": 0.0005051636027994781,
      "loss": 2.9463,
      "step": 59972
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4039744138717651,
      "learning_rate": 0.0005051606183236359,
      "loss": 2.9666,
      "step": 59973
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7183661460876465,
      "learning_rate": 0.0005051576338096506,
      "loss": 2.9465,
      "step": 59974
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6201670169830322,
      "learning_rate": 0.0005051546492575225,
      "loss": 3.022,
      "step": 59975
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6144369840621948,
      "learning_rate": 0.0005051516646672522,
      "loss": 3.0486,
      "step": 59976
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5789175033569336,
      "learning_rate": 0.0005051486800388405,
      "loss": 3.0316,
      "step": 59977
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4397436380386353,
      "learning_rate": 0.0005051456953722878,
      "loss": 3.1142,
      "step": 59978
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5479432344436646,
      "learning_rate": 0.0005051427106675946,
      "loss": 3.0594,
      "step": 59979
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6843924522399902,
      "learning_rate": 0.0005051397259247615,
      "loss": 2.8346,
      "step": 59980
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.47592031955719,
      "learning_rate": 0.000505136741143789,
      "loss": 3.1464,
      "step": 59981
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8211665153503418,
      "learning_rate": 0.0005051337563246777,
      "loss": 3.2177,
      "step": 59982
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7549331188201904,
      "learning_rate": 0.0005051307714674283,
      "loss": 3.2445,
      "step": 59983
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4837971925735474,
      "learning_rate": 0.0005051277865720411,
      "loss": 3.1951,
      "step": 59984
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4778810739517212,
      "learning_rate": 0.0005051248016385168,
      "loss": 3.0089,
      "step": 59985
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.060727834701538,
      "learning_rate": 0.000505121816666856,
      "loss": 2.9129,
      "step": 59986
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4997873306274414,
      "learning_rate": 0.0005051188316570591,
      "loss": 3.0492,
      "step": 59987
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0814433097839355,
      "learning_rate": 0.0005051158466091267,
      "loss": 3.051,
      "step": 59988
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.540891408920288,
      "learning_rate": 0.0005051128615230594,
      "loss": 3.0212,
      "step": 59989
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6298915147781372,
      "learning_rate": 0.0005051098763988578,
      "loss": 3.2013,
      "step": 59990
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3219518661499023,
      "learning_rate": 0.0005051068912365224,
      "loss": 3.002,
      "step": 59991
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4830611944198608,
      "learning_rate": 0.0005051039060360537,
      "loss": 2.9434,
      "step": 59992
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3787977695465088,
      "learning_rate": 0.0005051009207974524,
      "loss": 3.1771,
      "step": 59993
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.1334705352783203,
      "learning_rate": 0.0005050979355207189,
      "loss": 2.9215,
      "step": 59994
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.606387734413147,
      "learning_rate": 0.0005050949502058539,
      "loss": 3.0084,
      "step": 59995
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6274635791778564,
      "learning_rate": 0.0005050919648528578,
      "loss": 3.0538,
      "step": 59996
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.937068223953247,
      "learning_rate": 0.0005050889794617312,
      "loss": 2.9578,
      "step": 59997
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5956579446792603,
      "learning_rate": 0.0005050859940324748,
      "loss": 3.0501,
      "step": 59998
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9101290702819824,
      "learning_rate": 0.000505083008565089,
      "loss": 3.1419,
      "step": 59999
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4532090425491333,
      "learning_rate": 0.0005050800230595743,
      "loss": 2.9592,
      "step": 60000
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.2353463172912598,
      "learning_rate": 0.0005050770375159315,
      "loss": 3.34,
      "step": 60001
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.243844985961914,
      "learning_rate": 0.000505074051934161,
      "loss": 2.8082,
      "step": 60002
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5327491760253906,
      "learning_rate": 0.0005050710663142633,
      "loss": 3.0502,
      "step": 60003
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9391403198242188,
      "learning_rate": 0.000505068080656239,
      "loss": 2.9855,
      "step": 60004
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.734423279762268,
      "learning_rate": 0.0005050650949600887,
      "loss": 3.1629,
      "step": 60005
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6476012468338013,
      "learning_rate": 0.0005050621092258129,
      "loss": 2.8962,
      "step": 60006
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.006145477294922,
      "learning_rate": 0.0005050591234534122,
      "loss": 3.1942,
      "step": 60007
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5309572219848633,
      "learning_rate": 0.000505056137642887,
      "loss": 3.0355,
      "step": 60008
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4947896003723145,
      "learning_rate": 0.0005050531517942382,
      "loss": 3.3027,
      "step": 60009
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.4914186000823975,
      "learning_rate": 0.0005050501659074661,
      "loss": 2.797,
      "step": 60010
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.710931658744812,
      "learning_rate": 0.0005050471799825713,
      "loss": 2.9663,
      "step": 60011
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.665086269378662,
      "learning_rate": 0.0005050441940195543,
      "loss": 2.9414,
      "step": 60012
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.1611478328704834,
      "learning_rate": 0.0005050412080184159,
      "loss": 2.9846,
      "step": 60013
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6611762046813965,
      "learning_rate": 0.0005050382219791563,
      "loss": 3.3377,
      "step": 60014
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0690059661865234,
      "learning_rate": 0.0005050352359017762,
      "loss": 3.125,
      "step": 60015
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.2097790241241455,
      "learning_rate": 0.0005050322497862763,
      "loss": 2.935,
      "step": 60016
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.79634690284729,
      "learning_rate": 0.0005050292636326569,
      "loss": 3.1646,
      "step": 60017
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5490893125534058,
      "learning_rate": 0.0005050262774409188,
      "loss": 3.1093,
      "step": 60018
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9180541038513184,
      "learning_rate": 0.0005050232912110624,
      "loss": 3.0521,
      "step": 60019
    },
    {
      "epoch": 0.78,
      "grad_norm": 4.778311729431152,
      "learning_rate": 0.0005050203049430883,
      "loss": 2.749,
      "step": 60020
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.663386344909668,
      "learning_rate": 0.0005050173186369971,
      "loss": 2.9631,
      "step": 60021
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4932290315628052,
      "learning_rate": 0.0005050143322927892,
      "loss": 3.1398,
      "step": 60022
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3662707805633545,
      "learning_rate": 0.0005050113459104654,
      "loss": 3.0662,
      "step": 60023
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.026700019836426,
      "learning_rate": 0.000505008359490026,
      "loss": 3.1052,
      "step": 60024
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6723402738571167,
      "learning_rate": 0.0005050053730314718,
      "loss": 3.0191,
      "step": 60025
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6566262245178223,
      "learning_rate": 0.0005050023865348031,
      "loss": 3.066,
      "step": 60026
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.39359974861145,
      "learning_rate": 0.0005049994000000206,
      "loss": 3.1356,
      "step": 60027
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6507954597473145,
      "learning_rate": 0.000504996413427125,
      "loss": 3.0124,
      "step": 60028
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.318729877471924,
      "learning_rate": 0.0005049934268161165,
      "loss": 2.8825,
      "step": 60029
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.571349620819092,
      "learning_rate": 0.000504990440166996,
      "loss": 2.8073,
      "step": 60030
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7856279611587524,
      "learning_rate": 0.0005049874534797639,
      "loss": 2.9809,
      "step": 60031
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5976394414901733,
      "learning_rate": 0.0005049844667544206,
      "loss": 3.0552,
      "step": 60032
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.737180233001709,
      "learning_rate": 0.000504981479990967,
      "loss": 3.196,
      "step": 60033
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5784640312194824,
      "learning_rate": 0.0005049784931894034,
      "loss": 3.0288,
      "step": 60034
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.010755777359009,
      "learning_rate": 0.0005049755063497304,
      "loss": 2.8321,
      "step": 60035
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4860492944717407,
      "learning_rate": 0.0005049725194719487,
      "loss": 3.1727,
      "step": 60036
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4718456268310547,
      "learning_rate": 0.0005049695325560586,
      "loss": 3.0882,
      "step": 60037
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.2430310249328613,
      "learning_rate": 0.0005049665456020608,
      "loss": 3.1885,
      "step": 60038
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8799580335617065,
      "learning_rate": 0.0005049635586099559,
      "loss": 3.2053,
      "step": 60039
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7423126697540283,
      "learning_rate": 0.0005049605715797444,
      "loss": 3.1617,
      "step": 60040
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8744781017303467,
      "learning_rate": 0.0005049575845114269,
      "loss": 2.9189,
      "step": 60041
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.78408682346344,
      "learning_rate": 0.0005049545974050039,
      "loss": 2.8196,
      "step": 60042
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5493395328521729,
      "learning_rate": 0.000504951610260476,
      "loss": 3.0733,
      "step": 60043
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6854737997055054,
      "learning_rate": 0.0005049486230778436,
      "loss": 2.6738,
      "step": 60044
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8698257207870483,
      "learning_rate": 0.0005049456358571076,
      "loss": 3.0603,
      "step": 60045
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5006074905395508,
      "learning_rate": 0.0005049426485982681,
      "loss": 3.1547,
      "step": 60046
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.279423713684082,
      "learning_rate": 0.000504939661301326,
      "loss": 3.0215,
      "step": 60047
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7882401943206787,
      "learning_rate": 0.0005049366739662818,
      "loss": 3.1409,
      "step": 60048
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3943692445755005,
      "learning_rate": 0.0005049336865931359,
      "loss": 3.1364,
      "step": 60049
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.2210371494293213,
      "learning_rate": 0.000504930699181889,
      "loss": 2.862,
      "step": 60050
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4521348476409912,
      "learning_rate": 0.0005049277117325417,
      "loss": 2.7723,
      "step": 60051
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5499085187911987,
      "learning_rate": 0.0005049247242450943,
      "loss": 3.1595,
      "step": 60052
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6371959447860718,
      "learning_rate": 0.0005049217367195476,
      "loss": 2.8556,
      "step": 60053
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5879663228988647,
      "learning_rate": 0.0005049187491559022,
      "loss": 2.8656,
      "step": 60054
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0008299350738525,
      "learning_rate": 0.0005049157615541584,
      "loss": 3.1623,
      "step": 60055
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4813127517700195,
      "learning_rate": 0.0005049127739143169,
      "loss": 2.9121,
      "step": 60056
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7592411041259766,
      "learning_rate": 0.0005049097862363782,
      "loss": 3.0539,
      "step": 60057
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0979971885681152,
      "learning_rate": 0.000504906798520343,
      "loss": 3.3319,
      "step": 60058
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5671868324279785,
      "learning_rate": 0.0005049038107662118,
      "loss": 3.0736,
      "step": 60059
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5458811521530151,
      "learning_rate": 0.000504900822973985,
      "loss": 3.1496,
      "step": 60060
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6600698232650757,
      "learning_rate": 0.0005048978351436633,
      "loss": 3.1542,
      "step": 60061
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.655055284500122,
      "learning_rate": 0.0005048948472752473,
      "loss": 3.2625,
      "step": 60062
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7552008628845215,
      "learning_rate": 0.0005048918593687375,
      "loss": 3.093,
      "step": 60063
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6084295511245728,
      "learning_rate": 0.0005048888714241342,
      "loss": 2.8381,
      "step": 60064
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.725595235824585,
      "learning_rate": 0.0005048858834414384,
      "loss": 3.0705,
      "step": 60065
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.89767324924469,
      "learning_rate": 0.0005048828954206504,
      "loss": 3.0928,
      "step": 60066
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.513015031814575,
      "learning_rate": 0.0005048799073617709,
      "loss": 2.8723,
      "step": 60067
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5988690853118896,
      "learning_rate": 0.0005048769192648002,
      "loss": 3.1502,
      "step": 60068
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4668526649475098,
      "learning_rate": 0.000504873931129739,
      "loss": 3.0199,
      "step": 60069
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.0881004333496094,
      "learning_rate": 0.000504870942956588,
      "loss": 2.9047,
      "step": 60070
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.9566917419433594,
      "learning_rate": 0.0005048679547453475,
      "loss": 3.0211,
      "step": 60071
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.436748743057251,
      "learning_rate": 0.0005048649664960183,
      "loss": 3.0643,
      "step": 60072
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.614762544631958,
      "learning_rate": 0.0005048619782086007,
      "loss": 2.8118,
      "step": 60073
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.8083841800689697,
      "learning_rate": 0.0005048589898830955,
      "loss": 3.0279,
      "step": 60074
    },
    {
      "epoch": 0.78,
      "grad_norm": 4.29063081741333,
      "learning_rate": 0.0005048560015195031,
      "loss": 3.1554,
      "step": 60075
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.2021543979644775,
      "learning_rate": 0.0005048530131178242,
      "loss": 3.0924,
      "step": 60076
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3536638021469116,
      "learning_rate": 0.0005048500246780591,
      "loss": 3.0926,
      "step": 60077
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.8888700008392334,
      "learning_rate": 0.0005048470362002086,
      "loss": 3.0683,
      "step": 60078
    },
    {
      "epoch": 0.78,
      "grad_norm": 4.441505432128906,
      "learning_rate": 0.0005048440476842731,
      "loss": 3.0303,
      "step": 60079
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3059728145599365,
      "learning_rate": 0.0005048410591302534,
      "loss": 3.1371,
      "step": 60080
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0210859775543213,
      "learning_rate": 0.0005048380705381497,
      "loss": 2.9987,
      "step": 60081
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7455600500106812,
      "learning_rate": 0.0005048350819079629,
      "loss": 3.0276,
      "step": 60082
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.229482889175415,
      "learning_rate": 0.0005048320932396932,
      "loss": 3.0944,
      "step": 60083
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5390396118164062,
      "learning_rate": 0.0005048291045333414,
      "loss": 2.9569,
      "step": 60084
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6092610359191895,
      "learning_rate": 0.0005048261157889081,
      "loss": 3.3184,
      "step": 60085
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.0747971534729004,
      "learning_rate": 0.0005048231270063937,
      "loss": 3.2143,
      "step": 60086
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.0866076946258545,
      "learning_rate": 0.0005048201381857989,
      "loss": 2.9902,
      "step": 60087
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.272014617919922,
      "learning_rate": 0.000504817149327124,
      "loss": 2.786,
      "step": 60088
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8734948635101318,
      "learning_rate": 0.0005048141604303697,
      "loss": 2.9621,
      "step": 60089
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.374504804611206,
      "learning_rate": 0.0005048111714955368,
      "loss": 3.053,
      "step": 60090
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.303337335586548,
      "learning_rate": 0.0005048081825226255,
      "loss": 3.0258,
      "step": 60091
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.761575698852539,
      "learning_rate": 0.0005048051935116365,
      "loss": 2.8504,
      "step": 60092
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.1472928524017334,
      "learning_rate": 0.0005048022044625704,
      "loss": 2.9651,
      "step": 60093
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8970483541488647,
      "learning_rate": 0.0005047992153754277,
      "loss": 2.9957,
      "step": 60094
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7007564306259155,
      "learning_rate": 0.000504796226250209,
      "loss": 3.2798,
      "step": 60095
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.564176321029663,
      "learning_rate": 0.0005047932370869147,
      "loss": 3.1407,
      "step": 60096
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.675364375114441,
      "learning_rate": 0.0005047902478855455,
      "loss": 2.6051,
      "step": 60097
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3956085443496704,
      "learning_rate": 0.000504787258646102,
      "loss": 3.2051,
      "step": 60098
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.826758623123169,
      "learning_rate": 0.0005047842693685846,
      "loss": 3.0283,
      "step": 60099
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3128108978271484,
      "learning_rate": 0.000504781280052994,
      "loss": 3.0593,
      "step": 60100
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4358478784561157,
      "learning_rate": 0.0005047782906993306,
      "loss": 2.9744,
      "step": 60101
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5434973239898682,
      "learning_rate": 0.0005047753013075951,
      "loss": 3.1576,
      "step": 60102
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8683650493621826,
      "learning_rate": 0.0005047723118777881,
      "loss": 2.9517,
      "step": 60103
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.936137318611145,
      "learning_rate": 0.0005047693224099099,
      "loss": 3.081,
      "step": 60104
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0980892181396484,
      "learning_rate": 0.0005047663329039615,
      "loss": 3.0386,
      "step": 60105
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5410765409469604,
      "learning_rate": 0.0005047633433599428,
      "loss": 3.07,
      "step": 60106
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9880492687225342,
      "learning_rate": 0.0005047603537778549,
      "loss": 2.7142,
      "step": 60107
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.493168354034424,
      "learning_rate": 0.0005047573641576983,
      "loss": 3.0311,
      "step": 60108
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6568989753723145,
      "learning_rate": 0.0005047543744994733,
      "loss": 3.0954,
      "step": 60109
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6799684762954712,
      "learning_rate": 0.0005047513848031805,
      "loss": 2.9225,
      "step": 60110
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.643764853477478,
      "learning_rate": 0.0005047483950688208,
      "loss": 2.9931,
      "step": 60111
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5022058486938477,
      "learning_rate": 0.0005047454052963944,
      "loss": 3.0632,
      "step": 60112
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4345718622207642,
      "learning_rate": 0.0005047424154859018,
      "loss": 2.9754,
      "step": 60113
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7283265590667725,
      "learning_rate": 0.000504739425637344,
      "loss": 2.8495,
      "step": 60114
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7972002029418945,
      "learning_rate": 0.0005047364357507211,
      "loss": 2.9915,
      "step": 60115
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3455383777618408,
      "learning_rate": 0.000504733445826034,
      "loss": 2.9601,
      "step": 60116
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6920201778411865,
      "learning_rate": 0.0005047304558632829,
      "loss": 3.0244,
      "step": 60117
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7494773864746094,
      "learning_rate": 0.0005047274658624687,
      "loss": 2.9471,
      "step": 60118
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0903475284576416,
      "learning_rate": 0.0005047244758235916,
      "loss": 3.0847,
      "step": 60119
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6090041399002075,
      "learning_rate": 0.0005047214857466525,
      "loss": 2.9119,
      "step": 60120
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5309535264968872,
      "learning_rate": 0.0005047184956316517,
      "loss": 3.0945,
      "step": 60121
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7853577136993408,
      "learning_rate": 0.00050471550547859,
      "loss": 3.0103,
      "step": 60122
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8107359409332275,
      "learning_rate": 0.0005047125152874678,
      "loss": 2.9867,
      "step": 60123
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0416741371154785,
      "learning_rate": 0.0005047095250582856,
      "loss": 2.9542,
      "step": 60124
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4832803010940552,
      "learning_rate": 0.0005047065347910441,
      "loss": 2.988,
      "step": 60125
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.1566481590270996,
      "learning_rate": 0.0005047035444857437,
      "loss": 3.0021,
      "step": 60126
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6935069561004639,
      "learning_rate": 0.0005047005541423853,
      "loss": 2.8807,
      "step": 60127
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4719223976135254,
      "learning_rate": 0.0005046975637609689,
      "loss": 2.966,
      "step": 60128
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3710602521896362,
      "learning_rate": 0.0005046945733414955,
      "loss": 2.9811,
      "step": 60129
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.4991118907928467,
      "learning_rate": 0.0005046915828839655,
      "loss": 3.0957,
      "step": 60130
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.629907488822937,
      "learning_rate": 0.0005046885923883794,
      "loss": 2.8435,
      "step": 60131
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3429489135742188,
      "learning_rate": 0.000504685601854738,
      "loss": 3.0521,
      "step": 60132
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.531956672668457,
      "learning_rate": 0.0005046826112830415,
      "loss": 3.0237,
      "step": 60133
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.501645565032959,
      "learning_rate": 0.0005046796206732908,
      "loss": 3.1839,
      "step": 60134
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.406879186630249,
      "learning_rate": 0.0005046766300254862,
      "loss": 3.0958,
      "step": 60135
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.9478201866149902,
      "learning_rate": 0.0005046736393396283,
      "loss": 2.7993,
      "step": 60136
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.2799365520477295,
      "learning_rate": 0.0005046706486157178,
      "loss": 2.989,
      "step": 60137
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6563249826431274,
      "learning_rate": 0.0005046676578537552,
      "loss": 3.1437,
      "step": 60138
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7672494649887085,
      "learning_rate": 0.000504664667053741,
      "loss": 2.9352,
      "step": 60139
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.825054168701172,
      "learning_rate": 0.0005046616762156757,
      "loss": 3.1642,
      "step": 60140
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.9256527423858643,
      "learning_rate": 0.0005046586853395599,
      "loss": 2.876,
      "step": 60141
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8708547353744507,
      "learning_rate": 0.0005046556944253943,
      "loss": 3.0512,
      "step": 60142
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.2112536430358887,
      "learning_rate": 0.0005046527034731792,
      "loss": 3.0074,
      "step": 60143
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.4557480812072754,
      "learning_rate": 0.0005046497124829155,
      "loss": 2.7601,
      "step": 60144
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.6156179904937744,
      "learning_rate": 0.0005046467214546034,
      "loss": 2.8085,
      "step": 60145
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.0314364433288574,
      "learning_rate": 0.0005046437303882436,
      "loss": 2.9428,
      "step": 60146
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9550814628601074,
      "learning_rate": 0.0005046407392838367,
      "loss": 3.3465,
      "step": 60147
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.8560824394226074,
      "learning_rate": 0.0005046377481413833,
      "loss": 2.9606,
      "step": 60148
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.319711923599243,
      "learning_rate": 0.0005046347569608838,
      "loss": 3.2686,
      "step": 60149
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8411564826965332,
      "learning_rate": 0.0005046317657423389,
      "loss": 3.1132,
      "step": 60150
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4166256189346313,
      "learning_rate": 0.0005046287744857491,
      "loss": 3.2147,
      "step": 60151
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.8855578899383545,
      "learning_rate": 0.0005046257831911149,
      "loss": 3.1163,
      "step": 60152
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.7759478092193604,
      "learning_rate": 0.0005046227918584368,
      "loss": 3.067,
      "step": 60153
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5823103189468384,
      "learning_rate": 0.0005046198004877156,
      "loss": 3.1466,
      "step": 60154
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5944617986679077,
      "learning_rate": 0.0005046168090789518,
      "loss": 3.0886,
      "step": 60155
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8748241662979126,
      "learning_rate": 0.0005046138176321456,
      "loss": 3.0545,
      "step": 60156
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3347971439361572,
      "learning_rate": 0.0005046108261472981,
      "loss": 3.1495,
      "step": 60157
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4563181400299072,
      "learning_rate": 0.0005046078346244094,
      "loss": 3.1757,
      "step": 60158
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9003874063491821,
      "learning_rate": 0.0005046048430634803,
      "loss": 3.0088,
      "step": 60159
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6889119148254395,
      "learning_rate": 0.0005046018514645113,
      "loss": 2.8011,
      "step": 60160
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.31569766998291,
      "learning_rate": 0.000504598859827503,
      "loss": 2.827,
      "step": 60161
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8006964921951294,
      "learning_rate": 0.0005045958681524558,
      "loss": 3.2177,
      "step": 60162
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.527555465698242,
      "learning_rate": 0.0005045928764393705,
      "loss": 3.0827,
      "step": 60163
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5765626430511475,
      "learning_rate": 0.0005045898846882474,
      "loss": 2.8995,
      "step": 60164
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.532118082046509,
      "learning_rate": 0.0005045868928990874,
      "loss": 3.033,
      "step": 60165
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.116083860397339,
      "learning_rate": 0.0005045839010718906,
      "loss": 3.2539,
      "step": 60166
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8636096715927124,
      "learning_rate": 0.0005045809092066579,
      "loss": 2.8918,
      "step": 60167
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6179280281066895,
      "learning_rate": 0.0005045779173033897,
      "loss": 2.6436,
      "step": 60168
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.8988804817199707,
      "learning_rate": 0.0005045749253620866,
      "loss": 2.928,
      "step": 60169
    },
    {
      "epoch": 0.78,
      "grad_norm": 4.030591011047363,
      "learning_rate": 0.0005045719333827491,
      "loss": 2.8006,
      "step": 60170
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5671309232711792,
      "learning_rate": 0.000504568941365378,
      "loss": 2.9246,
      "step": 60171
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.348909616470337,
      "learning_rate": 0.0005045659493099735,
      "loss": 3.1565,
      "step": 60172
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.8722662925720215,
      "learning_rate": 0.0005045629572165364,
      "loss": 2.8447,
      "step": 60173
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.9031357765197754,
      "learning_rate": 0.0005045599650850672,
      "loss": 3.1781,
      "step": 60174
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.421176791191101,
      "learning_rate": 0.0005045569729155664,
      "loss": 3.1218,
      "step": 60175
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4896012544631958,
      "learning_rate": 0.0005045539807080346,
      "loss": 3.063,
      "step": 60176
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.9102513790130615,
      "learning_rate": 0.0005045509884624724,
      "loss": 2.9873,
      "step": 60177
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4909456968307495,
      "learning_rate": 0.0005045479961788803,
      "loss": 2.8717,
      "step": 60178
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3275073766708374,
      "learning_rate": 0.0005045450038572588,
      "loss": 3.0101,
      "step": 60179
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.1083717346191406,
      "learning_rate": 0.0005045420114976086,
      "loss": 3.0367,
      "step": 60180
    },
    {
      "epoch": 0.78,
      "grad_norm": 3.0252773761749268,
      "learning_rate": 0.0005045390190999301,
      "loss": 2.957,
      "step": 60181
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.51681387424469,
      "learning_rate": 0.000504536026664224,
      "loss": 2.9934,
      "step": 60182
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4220778942108154,
      "learning_rate": 0.0005045330341904908,
      "loss": 3.0927,
      "step": 60183
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.30500864982605,
      "learning_rate": 0.000504530041678731,
      "loss": 3.1525,
      "step": 60184
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6879717111587524,
      "learning_rate": 0.0005045270491289452,
      "loss": 3.0718,
      "step": 60185
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.154052495956421,
      "learning_rate": 0.000504524056541134,
      "loss": 2.9759,
      "step": 60186
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4194819927215576,
      "learning_rate": 0.0005045210639152979,
      "loss": 3.2148,
      "step": 60187
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9264554977416992,
      "learning_rate": 0.0005045180712514375,
      "loss": 2.9321,
      "step": 60188
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.222597122192383,
      "learning_rate": 0.0005045150785495533,
      "loss": 2.9589,
      "step": 60189
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5541744232177734,
      "learning_rate": 0.000504512085809646,
      "loss": 3.1262,
      "step": 60190
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.59895920753479,
      "learning_rate": 0.0005045090930317159,
      "loss": 2.5869,
      "step": 60191
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5422449111938477,
      "learning_rate": 0.0005045061002157637,
      "loss": 3.0444,
      "step": 60192
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3735430240631104,
      "learning_rate": 0.00050450310736179,
      "loss": 3.1078,
      "step": 60193
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8595929145812988,
      "learning_rate": 0.0005045001144697954,
      "loss": 2.9822,
      "step": 60194
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.065000057220459,
      "learning_rate": 0.0005044971215397802,
      "loss": 3.0834,
      "step": 60195
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.1967568397521973,
      "learning_rate": 0.0005044941285717451,
      "loss": 2.9392,
      "step": 60196
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.25073504447937,
      "learning_rate": 0.0005044911355656909,
      "loss": 3.0788,
      "step": 60197
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.564485549926758,
      "learning_rate": 0.0005044881425216178,
      "loss": 2.787,
      "step": 60198
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.062701940536499,
      "learning_rate": 0.0005044851494395264,
      "loss": 3.1372,
      "step": 60199
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9935455322265625,
      "learning_rate": 0.0005044821563194175,
      "loss": 3.1887,
      "step": 60200
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.075866222381592,
      "learning_rate": 0.0005044791631612915,
      "loss": 2.9513,
      "step": 60201
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7418233156204224,
      "learning_rate": 0.000504476169965149,
      "loss": 3.0007,
      "step": 60202
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3329871892929077,
      "learning_rate": 0.0005044731767309904,
      "loss": 3.1657,
      "step": 60203
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7145992517471313,
      "learning_rate": 0.0005044701834588164,
      "loss": 2.9663,
      "step": 60204
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4223021268844604,
      "learning_rate": 0.0005044671901486276,
      "loss": 3.2156,
      "step": 60205
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4844127893447876,
      "learning_rate": 0.0005044641968004245,
      "loss": 2.95,
      "step": 60206
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7278755903244019,
      "learning_rate": 0.0005044612034142076,
      "loss": 3.1238,
      "step": 60207
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.391304612159729,
      "learning_rate": 0.0005044582099899775,
      "loss": 3.0395,
      "step": 60208
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.5098726749420166,
      "learning_rate": 0.0005044552165277348,
      "loss": 3.0453,
      "step": 60209
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.797532081604004,
      "learning_rate": 0.0005044522230274798,
      "loss": 2.9368,
      "step": 60210
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.499577283859253,
      "learning_rate": 0.0005044492294892135,
      "loss": 2.9498,
      "step": 60211
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.453351378440857,
      "learning_rate": 0.0005044462359129363,
      "loss": 2.8439,
      "step": 60212
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6153535842895508,
      "learning_rate": 0.0005044432422986484,
      "loss": 2.9926,
      "step": 60213
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.4265024662017822,
      "learning_rate": 0.0005044402486463509,
      "loss": 3.0499,
      "step": 60214
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7165441513061523,
      "learning_rate": 0.0005044372549560441,
      "loss": 3.0883,
      "step": 60215
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7419921159744263,
      "learning_rate": 0.0005044342612277284,
      "loss": 3.0173,
      "step": 60216
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8242512941360474,
      "learning_rate": 0.0005044312674614045,
      "loss": 3.3023,
      "step": 60217
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6785962581634521,
      "learning_rate": 0.000504428273657073,
      "loss": 2.6482,
      "step": 60218
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7422308921813965,
      "learning_rate": 0.0005044252798147346,
      "loss": 2.8949,
      "step": 60219
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.092996835708618,
      "learning_rate": 0.0005044222859343895,
      "loss": 3.0184,
      "step": 60220
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6287651062011719,
      "learning_rate": 0.0005044192920160385,
      "loss": 3.213,
      "step": 60221
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.2484536170959473,
      "learning_rate": 0.000504416298059682,
      "loss": 2.9096,
      "step": 60222
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3658044338226318,
      "learning_rate": 0.0005044133040653209,
      "loss": 2.9079,
      "step": 60223
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.2887067794799805,
      "learning_rate": 0.0005044103100329552,
      "loss": 2.9287,
      "step": 60224
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6522942781448364,
      "learning_rate": 0.000504407315962586,
      "loss": 3.0039,
      "step": 60225
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7983043193817139,
      "learning_rate": 0.0005044043218542134,
      "loss": 3.1525,
      "step": 60226
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.693024754524231,
      "learning_rate": 0.0005044013277078383,
      "loss": 3.0416,
      "step": 60227
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.421968698501587,
      "learning_rate": 0.0005043983335234612,
      "loss": 3.0279,
      "step": 60228
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.391703724861145,
      "learning_rate": 0.0005043953393010825,
      "loss": 3.0431,
      "step": 60229
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.855931043624878,
      "learning_rate": 0.0005043923450407029,
      "loss": 2.8495,
      "step": 60230
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5268206596374512,
      "learning_rate": 0.0005043893507423229,
      "loss": 2.9677,
      "step": 60231
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5522222518920898,
      "learning_rate": 0.0005043863564059431,
      "loss": 3.1485,
      "step": 60232
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.463919758796692,
      "learning_rate": 0.0005043833620315639,
      "loss": 2.956,
      "step": 60233
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7878366708755493,
      "learning_rate": 0.0005043803676191861,
      "loss": 3.224,
      "step": 60234
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5277599096298218,
      "learning_rate": 0.00050437737316881,
      "loss": 3.2478,
      "step": 60235
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.636112093925476,
      "learning_rate": 0.0005043743786804364,
      "loss": 2.8142,
      "step": 60236
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8495962619781494,
      "learning_rate": 0.0005043713841540656,
      "loss": 3.1965,
      "step": 60237
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.289888381958008,
      "learning_rate": 0.0005043683895896985,
      "loss": 2.9661,
      "step": 60238
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4076504707336426,
      "learning_rate": 0.0005043653949873354,
      "loss": 2.9247,
      "step": 60239
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6350107192993164,
      "learning_rate": 0.0005043624003469769,
      "loss": 2.7513,
      "step": 60240
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9230148792266846,
      "learning_rate": 0.0005043594056686235,
      "loss": 3.0732,
      "step": 60241
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.886745810508728,
      "learning_rate": 0.0005043564109522759,
      "loss": 3.0724,
      "step": 60242
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7253355979919434,
      "learning_rate": 0.0005043534161979345,
      "loss": 2.9581,
      "step": 60243
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4487115144729614,
      "learning_rate": 0.0005043504214056001,
      "loss": 2.8254,
      "step": 60244
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6493631601333618,
      "learning_rate": 0.000504347426575273,
      "loss": 3.0804,
      "step": 60245
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.463606357574463,
      "learning_rate": 0.0005043444317069538,
      "loss": 2.9562,
      "step": 60246
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9368082284927368,
      "learning_rate": 0.0005043414368006432,
      "loss": 3.0496,
      "step": 60247
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9634939432144165,
      "learning_rate": 0.0005043384418563417,
      "loss": 3.1746,
      "step": 60248
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.69926917552948,
      "learning_rate": 0.0005043354468740497,
      "loss": 3.3722,
      "step": 60249
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.102084159851074,
      "learning_rate": 0.000504332451853768,
      "loss": 3.1268,
      "step": 60250
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.087932586669922,
      "learning_rate": 0.000504329456795497,
      "loss": 3.101,
      "step": 60251
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4210247993469238,
      "learning_rate": 0.0005043264616992373,
      "loss": 3.0615,
      "step": 60252
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.338953971862793,
      "learning_rate": 0.0005043234665649894,
      "loss": 3.2283,
      "step": 60253
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.126288652420044,
      "learning_rate": 0.000504320471392754,
      "loss": 2.9957,
      "step": 60254
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.738221287727356,
      "learning_rate": 0.0005043174761825315,
      "loss": 3.0366,
      "step": 60255
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7087833881378174,
      "learning_rate": 0.0005043144809343226,
      "loss": 3.1254,
      "step": 60256
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.531519889831543,
      "learning_rate": 0.0005043114856481277,
      "loss": 3.3465,
      "step": 60257
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7313395738601685,
      "learning_rate": 0.0005043084903239474,
      "loss": 3.3115,
      "step": 60258
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5466688871383667,
      "learning_rate": 0.0005043054949617824,
      "loss": 3.1858,
      "step": 60259
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6149249076843262,
      "learning_rate": 0.0005043024995616331,
      "loss": 2.9853,
      "step": 60260
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7246166467666626,
      "learning_rate": 0.0005042995041235002,
      "loss": 2.8943,
      "step": 60261
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.537617564201355,
      "learning_rate": 0.000504296508647384,
      "loss": 2.9901,
      "step": 60262
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7295562028884888,
      "learning_rate": 0.0005042935131332854,
      "loss": 3.0453,
      "step": 60263
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7904187440872192,
      "learning_rate": 0.0005042905175812047,
      "loss": 2.9038,
      "step": 60264
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5768672227859497,
      "learning_rate": 0.0005042875219911424,
      "loss": 3.1977,
      "step": 60265
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4332739114761353,
      "learning_rate": 0.0005042845263630994,
      "loss": 3.2515,
      "step": 60266
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.63450026512146,
      "learning_rate": 0.000504281530697076,
      "loss": 3.0324,
      "step": 60267
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5141193866729736,
      "learning_rate": 0.0005042785349930728,
      "loss": 3.1934,
      "step": 60268
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.664249300956726,
      "learning_rate": 0.0005042755392510903,
      "loss": 3.0442,
      "step": 60269
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.3899071216583252,
      "learning_rate": 0.0005042725434711293,
      "loss": 3.2382,
      "step": 60270
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.6484719514846802,
      "learning_rate": 0.00050426954765319,
      "loss": 2.871,
      "step": 60271
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4458400011062622,
      "learning_rate": 0.0005042665517972731,
      "loss": 3.1049,
      "step": 60272
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.479634165763855,
      "learning_rate": 0.0005042635559033793,
      "loss": 3.2042,
      "step": 60273
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.6324808597564697,
      "learning_rate": 0.000504260559971509,
      "loss": 3.2267,
      "step": 60274
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.0029211044311523,
      "learning_rate": 0.0005042575640016629,
      "loss": 2.9444,
      "step": 60275
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4266612529754639,
      "learning_rate": 0.0005042545679938414,
      "loss": 2.7742,
      "step": 60276
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.5827810764312744,
      "learning_rate": 0.000504251571948045,
      "loss": 3.3647,
      "step": 60277
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.9312248229980469,
      "learning_rate": 0.0005042485758642746,
      "loss": 2.9574,
      "step": 60278
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8075034618377686,
      "learning_rate": 0.0005042455797425304,
      "loss": 3.0047,
      "step": 60279
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8195831775665283,
      "learning_rate": 0.0005042425835828131,
      "loss": 3.0835,
      "step": 60280
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3843469619750977,
      "learning_rate": 0.0005042395873851233,
      "loss": 3.0739,
      "step": 60281
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.8279271125793457,
      "learning_rate": 0.0005042365911494614,
      "loss": 3.1657,
      "step": 60282
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4876281023025513,
      "learning_rate": 0.0005042335948758282,
      "loss": 3.0455,
      "step": 60283
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.7733381986618042,
      "learning_rate": 0.0005042305985642239,
      "loss": 2.8662,
      "step": 60284
    },
    {
      "epoch": 0.78,
      "grad_norm": 2.3794445991516113,
      "learning_rate": 0.0005042276022146495,
      "loss": 2.9592,
      "step": 60285
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.4451428651809692,
      "learning_rate": 0.0005042246058271053,
      "loss": 2.9657,
      "step": 60286
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.5306947231292725,
      "learning_rate": 0.0005042216094015918,
      "loss": 3.1204,
      "step": 60287
    },
    {
      "epoch": 0.78,
      "grad_norm": 1.742489218711853,
      "learning_rate": 0.0005042186129381097,
      "loss": 2.9705,
      "step": 60288
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.784045934677124,
      "learning_rate": 0.0005042156164366596,
      "loss": 3.001,
      "step": 60289
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.42190420627594,
      "learning_rate": 0.0005042126198972417,
      "loss": 2.8596,
      "step": 60290
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9052584171295166,
      "learning_rate": 0.0005042096233198571,
      "loss": 3.1672,
      "step": 60291
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0057952404022217,
      "learning_rate": 0.0005042066267045059,
      "loss": 3.025,
      "step": 60292
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6073030233383179,
      "learning_rate": 0.0005042036300511889,
      "loss": 2.9893,
      "step": 60293
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0944557189941406,
      "learning_rate": 0.0005042006333599066,
      "loss": 3.2839,
      "step": 60294
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.785377025604248,
      "learning_rate": 0.0005041976366306595,
      "loss": 2.9438,
      "step": 60295
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5658538341522217,
      "learning_rate": 0.0005041946398634483,
      "loss": 3.0116,
      "step": 60296
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6425219774246216,
      "learning_rate": 0.0005041916430582734,
      "loss": 3.1951,
      "step": 60297
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6622284650802612,
      "learning_rate": 0.0005041886462151355,
      "loss": 3.0461,
      "step": 60298
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7968958616256714,
      "learning_rate": 0.0005041856493340349,
      "loss": 3.0391,
      "step": 60299
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8212703466415405,
      "learning_rate": 0.0005041826524149725,
      "loss": 2.9026,
      "step": 60300
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4941837787628174,
      "learning_rate": 0.0005041796554579485,
      "loss": 2.948,
      "step": 60301
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.432023048400879,
      "learning_rate": 0.0005041766584629638,
      "loss": 3.2331,
      "step": 60302
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.4670445919036865,
      "learning_rate": 0.0005041736614300187,
      "loss": 3.0561,
      "step": 60303
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8868154287338257,
      "learning_rate": 0.000504170664359114,
      "loss": 3.0456,
      "step": 60304
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8369503021240234,
      "learning_rate": 0.00050416766725025,
      "loss": 3.0555,
      "step": 60305
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.108325719833374,
      "learning_rate": 0.0005041646701034275,
      "loss": 3.0567,
      "step": 60306
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4873464107513428,
      "learning_rate": 0.0005041616729186468,
      "loss": 2.968,
      "step": 60307
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.625618577003479,
      "learning_rate": 0.0005041586756959086,
      "loss": 3.0379,
      "step": 60308
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5895521640777588,
      "learning_rate": 0.0005041556784352137,
      "loss": 3.1506,
      "step": 60309
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.613412380218506,
      "learning_rate": 0.0005041526811365622,
      "loss": 3.0149,
      "step": 60310
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3092384338378906,
      "learning_rate": 0.0005041496837999548,
      "loss": 3.2081,
      "step": 60311
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3984112739562988,
      "learning_rate": 0.0005041466864253921,
      "loss": 2.9095,
      "step": 60312
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5893882513046265,
      "learning_rate": 0.0005041436890128748,
      "loss": 3.1038,
      "step": 60313
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.210557460784912,
      "learning_rate": 0.0005041406915624033,
      "loss": 3.0959,
      "step": 60314
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5078319311141968,
      "learning_rate": 0.0005041376940739783,
      "loss": 2.8036,
      "step": 60315
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5734690427780151,
      "learning_rate": 0.0005041346965476,
      "loss": 2.8775,
      "step": 60316
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6003779172897339,
      "learning_rate": 0.0005041316989832695,
      "loss": 3.2019,
      "step": 60317
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.1178226470947266,
      "learning_rate": 0.0005041287013809869,
      "loss": 3.2874,
      "step": 60318
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4966977834701538,
      "learning_rate": 0.0005041257037407529,
      "loss": 3.1971,
      "step": 60319
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.411461114883423,
      "learning_rate": 0.0005041227060625682,
      "loss": 2.9783,
      "step": 60320
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7234193086624146,
      "learning_rate": 0.0005041197083464331,
      "loss": 2.9699,
      "step": 60321
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.265135645866394,
      "learning_rate": 0.0005041167105923483,
      "loss": 3.059,
      "step": 60322
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.348624587059021,
      "learning_rate": 0.0005041137128003144,
      "loss": 2.9087,
      "step": 60323
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.415880799293518,
      "learning_rate": 0.000504110714970332,
      "loss": 3.162,
      "step": 60324
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8629426956176758,
      "learning_rate": 0.0005041077171024014,
      "loss": 2.9073,
      "step": 60325
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6625994443893433,
      "learning_rate": 0.0005041047191965233,
      "loss": 3.1836,
      "step": 60326
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6137535572052002,
      "learning_rate": 0.0005041017212526985,
      "loss": 2.9183,
      "step": 60327
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4125815629959106,
      "learning_rate": 0.0005040987232709271,
      "loss": 3.2088,
      "step": 60328
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8638590574264526,
      "learning_rate": 0.0005040957252512101,
      "loss": 3.2305,
      "step": 60329
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4943021535873413,
      "learning_rate": 0.0005040927271935477,
      "loss": 3.044,
      "step": 60330
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.491766095161438,
      "learning_rate": 0.0005040897290979406,
      "loss": 2.9784,
      "step": 60331
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9102944135665894,
      "learning_rate": 0.0005040867309643894,
      "loss": 3.3638,
      "step": 60332
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5085703134536743,
      "learning_rate": 0.0005040837327928947,
      "loss": 2.9966,
      "step": 60333
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5516343116760254,
      "learning_rate": 0.000504080734583457,
      "loss": 3.0095,
      "step": 60334
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4029154777526855,
      "learning_rate": 0.0005040777363360768,
      "loss": 2.6839,
      "step": 60335
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.980614185333252,
      "learning_rate": 0.0005040747380507547,
      "loss": 2.9292,
      "step": 60336
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8150889873504639,
      "learning_rate": 0.0005040717397274912,
      "loss": 2.9837,
      "step": 60337
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0622177124023438,
      "learning_rate": 0.0005040687413662869,
      "loss": 3.0438,
      "step": 60338
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.526151418685913,
      "learning_rate": 0.0005040657429671424,
      "loss": 2.9532,
      "step": 60339
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7102265357971191,
      "learning_rate": 0.0005040627445300583,
      "loss": 3.0289,
      "step": 60340
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8247108459472656,
      "learning_rate": 0.000504059746055035,
      "loss": 3.0388,
      "step": 60341
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.578776240348816,
      "learning_rate": 0.0005040567475420732,
      "loss": 3.2126,
      "step": 60342
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7994306087493896,
      "learning_rate": 0.0005040537489911733,
      "loss": 3.336,
      "step": 60343
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3695214986801147,
      "learning_rate": 0.000504050750402336,
      "loss": 3.0987,
      "step": 60344
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8391484022140503,
      "learning_rate": 0.0005040477517755618,
      "loss": 3.2019,
      "step": 60345
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7649120092391968,
      "learning_rate": 0.0005040447531108513,
      "loss": 3.1805,
      "step": 60346
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6037768125534058,
      "learning_rate": 0.0005040417544082051,
      "loss": 2.9731,
      "step": 60347
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5314995050430298,
      "learning_rate": 0.0005040387556676236,
      "loss": 3.1036,
      "step": 60348
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5481951236724854,
      "learning_rate": 0.0005040357568891074,
      "loss": 2.935,
      "step": 60349
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5040379762649536,
      "learning_rate": 0.0005040327580726572,
      "loss": 3.1819,
      "step": 60350
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0987651348114014,
      "learning_rate": 0.0005040297592182733,
      "loss": 3.1138,
      "step": 60351
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.011861801147461,
      "learning_rate": 0.0005040267603259567,
      "loss": 3.0204,
      "step": 60352
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9975672960281372,
      "learning_rate": 0.0005040237613957075,
      "loss": 2.8976,
      "step": 60353
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4491223096847534,
      "learning_rate": 0.0005040207624275264,
      "loss": 3.3354,
      "step": 60354
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8032256364822388,
      "learning_rate": 0.000504017763421414,
      "loss": 3.1389,
      "step": 60355
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6377705335617065,
      "learning_rate": 0.0005040147643773709,
      "loss": 2.9267,
      "step": 60356
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7503129243850708,
      "learning_rate": 0.0005040117652953975,
      "loss": 3.0644,
      "step": 60357
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5944442749023438,
      "learning_rate": 0.0005040087661754946,
      "loss": 3.1682,
      "step": 60358
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5636858940124512,
      "learning_rate": 0.0005040057670176626,
      "loss": 3.1373,
      "step": 60359
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.773056983947754,
      "learning_rate": 0.0005040027678219019,
      "loss": 2.8445,
      "step": 60360
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.445426106452942,
      "learning_rate": 0.0005039997685882135,
      "loss": 2.9782,
      "step": 60361
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4825551509857178,
      "learning_rate": 0.0005039967693165976,
      "loss": 2.9876,
      "step": 60362
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6082018613815308,
      "learning_rate": 0.0005039937700070548,
      "loss": 3.3757,
      "step": 60363
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.643730640411377,
      "learning_rate": 0.0005039907706595857,
      "loss": 2.9062,
      "step": 60364
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.602614402770996,
      "learning_rate": 0.0005039877712741908,
      "loss": 3.0046,
      "step": 60365
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0758652687072754,
      "learning_rate": 0.0005039847718508708,
      "loss": 3.1777,
      "step": 60366
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6136715412139893,
      "learning_rate": 0.0005039817723896262,
      "loss": 3.1048,
      "step": 60367
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.854720950126648,
      "learning_rate": 0.0005039787728904577,
      "loss": 3.0026,
      "step": 60368
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.144331693649292,
      "learning_rate": 0.0005039757733533654,
      "loss": 3.0699,
      "step": 60369
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.682982325553894,
      "learning_rate": 0.0005039727737783503,
      "loss": 3.1002,
      "step": 60370
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.718796730041504,
      "learning_rate": 0.0005039697741654128,
      "loss": 3.0273,
      "step": 60371
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7723112106323242,
      "learning_rate": 0.0005039667745145535,
      "loss": 3.3796,
      "step": 60372
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9336801767349243,
      "learning_rate": 0.0005039637748257728,
      "loss": 2.9534,
      "step": 60373
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7528215646743774,
      "learning_rate": 0.0005039607750990714,
      "loss": 3.1482,
      "step": 60374
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8770568370819092,
      "learning_rate": 0.0005039577753344499,
      "loss": 3.0658,
      "step": 60375
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2971863746643066,
      "learning_rate": 0.0005039547755319089,
      "loss": 3.0476,
      "step": 60376
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0120413303375244,
      "learning_rate": 0.0005039517756914488,
      "loss": 3.0456,
      "step": 60377
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6318565607070923,
      "learning_rate": 0.0005039487758130701,
      "loss": 2.9652,
      "step": 60378
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.765894889831543,
      "learning_rate": 0.0005039457758967737,
      "loss": 2.9401,
      "step": 60379
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5993880033493042,
      "learning_rate": 0.0005039427759425597,
      "loss": 3.0868,
      "step": 60380
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4732474088668823,
      "learning_rate": 0.000503939775950429,
      "loss": 3.1958,
      "step": 60381
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5586634874343872,
      "learning_rate": 0.0005039367759203821,
      "loss": 2.8927,
      "step": 60382
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.241206407546997,
      "learning_rate": 0.0005039337758524193,
      "loss": 3.1061,
      "step": 60383
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6111336946487427,
      "learning_rate": 0.0005039307757465415,
      "loss": 3.0276,
      "step": 60384
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5333874225616455,
      "learning_rate": 0.0005039277756027492,
      "loss": 2.8954,
      "step": 60385
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6215239763259888,
      "learning_rate": 0.0005039247754210428,
      "loss": 2.9758,
      "step": 60386
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4141836166381836,
      "learning_rate": 0.0005039217752014229,
      "loss": 3.1139,
      "step": 60387
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5868828296661377,
      "learning_rate": 0.00050391877494389,
      "loss": 3.1405,
      "step": 60388
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6482964754104614,
      "learning_rate": 0.0005039157746484449,
      "loss": 3.0808,
      "step": 60389
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.54252028465271,
      "learning_rate": 0.000503912774315088,
      "loss": 3.0239,
      "step": 60390
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.4735281467437744,
      "learning_rate": 0.0005039097739438198,
      "loss": 2.9811,
      "step": 60391
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6490931510925293,
      "learning_rate": 0.0005039067735346409,
      "loss": 3.2131,
      "step": 60392
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7351815700531006,
      "learning_rate": 0.000503903773087552,
      "loss": 3.0032,
      "step": 60393
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6049166917800903,
      "learning_rate": 0.0005039007726025534,
      "loss": 3.1002,
      "step": 60394
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.1003127098083496,
      "learning_rate": 0.000503897772079646,
      "loss": 3.0831,
      "step": 60395
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9765348434448242,
      "learning_rate": 0.00050389477151883,
      "loss": 2.9027,
      "step": 60396
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8936797380447388,
      "learning_rate": 0.0005038917709201061,
      "loss": 3.3259,
      "step": 60397
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.234102487564087,
      "learning_rate": 0.0005038887702834749,
      "loss": 2.8838,
      "step": 60398
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.561835765838623,
      "learning_rate": 0.0005038857696089369,
      "loss": 3.0629,
      "step": 60399
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6134512424468994,
      "learning_rate": 0.0005038827688964928,
      "loss": 2.9308,
      "step": 60400
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.7145028114318848,
      "learning_rate": 0.000503879768146143,
      "loss": 3.1494,
      "step": 60401
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5743935108184814,
      "learning_rate": 0.0005038767673578881,
      "loss": 3.1617,
      "step": 60402
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5996476411819458,
      "learning_rate": 0.0005038737665317286,
      "loss": 3.2805,
      "step": 60403
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.397243857383728,
      "learning_rate": 0.0005038707656676651,
      "loss": 3.0552,
      "step": 60404
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5212055444717407,
      "learning_rate": 0.0005038677647656983,
      "loss": 3.1311,
      "step": 60405
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5460565090179443,
      "learning_rate": 0.0005038647638258286,
      "loss": 3.0886,
      "step": 60406
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5988143682479858,
      "learning_rate": 0.0005038617628480565,
      "loss": 3.1365,
      "step": 60407
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.5757079124450684,
      "learning_rate": 0.0005038587618323827,
      "loss": 2.797,
      "step": 60408
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3465523719787598,
      "learning_rate": 0.0005038557607788078,
      "loss": 3.1068,
      "step": 60409
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5383059978485107,
      "learning_rate": 0.0005038527596873321,
      "loss": 3.1162,
      "step": 60410
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.017987012863159,
      "learning_rate": 0.0005038497585579564,
      "loss": 2.9528,
      "step": 60411
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.0373611450195312,
      "learning_rate": 0.0005038467573906813,
      "loss": 2.9944,
      "step": 60412
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5099632740020752,
      "learning_rate": 0.000503843756185507,
      "loss": 2.9766,
      "step": 60413
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.853347897529602,
      "learning_rate": 0.0005038407549424345,
      "loss": 3.2411,
      "step": 60414
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.9393131732940674,
      "learning_rate": 0.000503837753661464,
      "loss": 2.8762,
      "step": 60415
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.20796275138855,
      "learning_rate": 0.0005038347523425963,
      "loss": 3.0655,
      "step": 60416
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.70195472240448,
      "learning_rate": 0.0005038317509858318,
      "loss": 3.0153,
      "step": 60417
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3049604892730713,
      "learning_rate": 0.0005038287495911714,
      "loss": 3.1949,
      "step": 60418
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.2707459926605225,
      "learning_rate": 0.0005038257481586151,
      "loss": 2.9971,
      "step": 60419
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9608262777328491,
      "learning_rate": 0.0005038227466881639,
      "loss": 2.8589,
      "step": 60420
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.103240966796875,
      "learning_rate": 0.000503819745179818,
      "loss": 2.6356,
      "step": 60421
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.3551740646362305,
      "learning_rate": 0.0005038167436335784,
      "loss": 2.8783,
      "step": 60422
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.8623082637786865,
      "learning_rate": 0.0005038137420494453,
      "loss": 3.2617,
      "step": 60423
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.638790488243103,
      "learning_rate": 0.0005038107404274193,
      "loss": 3.0593,
      "step": 60424
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.026963949203491,
      "learning_rate": 0.0005038077387675013,
      "loss": 3.267,
      "step": 60425
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.309988021850586,
      "learning_rate": 0.0005038047370696914,
      "loss": 3.1012,
      "step": 60426
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.011399507522583,
      "learning_rate": 0.0005038017353339902,
      "loss": 2.9923,
      "step": 60427
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7272249460220337,
      "learning_rate": 0.0005037987335603987,
      "loss": 2.8879,
      "step": 60428
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.687506675720215,
      "learning_rate": 0.000503795731748917,
      "loss": 3.0499,
      "step": 60429
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.634929895401001,
      "learning_rate": 0.0005037927298995459,
      "loss": 3.1843,
      "step": 60430
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4003369808197021,
      "learning_rate": 0.0005037897280122859,
      "loss": 2.9763,
      "step": 60431
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.070838451385498,
      "learning_rate": 0.0005037867260871376,
      "loss": 2.9472,
      "step": 60432
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.4795281887054443,
      "learning_rate": 0.0005037837241241013,
      "loss": 3.1044,
      "step": 60433
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.300917387008667,
      "learning_rate": 0.000503780722123178,
      "loss": 3.0398,
      "step": 60434
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5147922039031982,
      "learning_rate": 0.0005037777200843679,
      "loss": 3.1614,
      "step": 60435
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4379289150238037,
      "learning_rate": 0.0005037747180076716,
      "loss": 2.9577,
      "step": 60436
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4434031248092651,
      "learning_rate": 0.0005037717158930898,
      "loss": 3.1115,
      "step": 60437
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8827956914901733,
      "learning_rate": 0.000503768713740623,
      "loss": 2.9914,
      "step": 60438
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4397183656692505,
      "learning_rate": 0.0005037657115502717,
      "loss": 3.0928,
      "step": 60439
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7506675720214844,
      "learning_rate": 0.0005037627093220366,
      "loss": 2.9752,
      "step": 60440
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.632230281829834,
      "learning_rate": 0.000503759707055918,
      "loss": 2.9951,
      "step": 60441
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6076648235321045,
      "learning_rate": 0.0005037567047519168,
      "loss": 3.051,
      "step": 60442
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4100085496902466,
      "learning_rate": 0.0005037537024100334,
      "loss": 3.0874,
      "step": 60443
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3435938358306885,
      "learning_rate": 0.0005037507000302681,
      "loss": 2.8424,
      "step": 60444
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9602302312850952,
      "learning_rate": 0.0005037476976126219,
      "loss": 2.8122,
      "step": 60445
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0921311378479004,
      "learning_rate": 0.0005037446951570952,
      "loss": 2.9152,
      "step": 60446
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.58729088306427,
      "learning_rate": 0.0005037416926636884,
      "loss": 3.088,
      "step": 60447
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6814111471176147,
      "learning_rate": 0.0005037386901324022,
      "loss": 2.926,
      "step": 60448
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5037375688552856,
      "learning_rate": 0.0005037356875632372,
      "loss": 2.9849,
      "step": 60449
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.308262586593628,
      "learning_rate": 0.0005037326849561938,
      "loss": 3.0641,
      "step": 60450
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.516862154006958,
      "learning_rate": 0.0005037296823112727,
      "loss": 3.2694,
      "step": 60451
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5745222568511963,
      "learning_rate": 0.0005037266796284743,
      "loss": 3.0017,
      "step": 60452
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.442960262298584,
      "learning_rate": 0.0005037236769077993,
      "loss": 3.0816,
      "step": 60453
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.452133059501648,
      "learning_rate": 0.0005037206741492483,
      "loss": 3.1293,
      "step": 60454
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4879376888275146,
      "learning_rate": 0.0005037176713528217,
      "loss": 3.0998,
      "step": 60455
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3831712007522583,
      "learning_rate": 0.0005037146685185203,
      "loss": 2.9424,
      "step": 60456
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5961657762527466,
      "learning_rate": 0.0005037116656463443,
      "loss": 3.168,
      "step": 60457
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.18540620803833,
      "learning_rate": 0.0005037086627362946,
      "loss": 3.1144,
      "step": 60458
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7687345743179321,
      "learning_rate": 0.0005037056597883715,
      "loss": 3.0503,
      "step": 60459
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4632138013839722,
      "learning_rate": 0.0005037026568025757,
      "loss": 3.216,
      "step": 60460
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8442422151565552,
      "learning_rate": 0.0005036996537789079,
      "loss": 3.042,
      "step": 60461
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6374340057373047,
      "learning_rate": 0.0005036966507173683,
      "loss": 3.0709,
      "step": 60462
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.092477321624756,
      "learning_rate": 0.0005036936476179577,
      "loss": 3.0497,
      "step": 60463
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7872991561889648,
      "learning_rate": 0.0005036906444806766,
      "loss": 3.0327,
      "step": 60464
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.5282669067382812,
      "learning_rate": 0.0005036876413055256,
      "loss": 2.9395,
      "step": 60465
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7770671844482422,
      "learning_rate": 0.0005036846380925052,
      "loss": 2.9933,
      "step": 60466
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4593569040298462,
      "learning_rate": 0.0005036816348416159,
      "loss": 3.0382,
      "step": 60467
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7201682329177856,
      "learning_rate": 0.0005036786315528584,
      "loss": 2.9774,
      "step": 60468
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.646336555480957,
      "learning_rate": 0.0005036756282262332,
      "loss": 3.1674,
      "step": 60469
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4656686782836914,
      "learning_rate": 0.0005036726248617408,
      "loss": 3.3118,
      "step": 60470
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.445208191871643,
      "learning_rate": 0.0005036696214593819,
      "loss": 3.0406,
      "step": 60471
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5167800188064575,
      "learning_rate": 0.000503666618019157,
      "loss": 3.0785,
      "step": 60472
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6909924745559692,
      "learning_rate": 0.0005036636145410666,
      "loss": 2.9668,
      "step": 60473
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5478873252868652,
      "learning_rate": 0.0005036606110251113,
      "loss": 2.9967,
      "step": 60474
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0577399730682373,
      "learning_rate": 0.0005036576074712916,
      "loss": 2.9949,
      "step": 60475
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5696845054626465,
      "learning_rate": 0.0005036546038796081,
      "loss": 3.2493,
      "step": 60476
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.031593084335327,
      "learning_rate": 0.0005036516002500614,
      "loss": 2.9132,
      "step": 60477
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5265705585479736,
      "learning_rate": 0.0005036485965826521,
      "loss": 3.0575,
      "step": 60478
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7656099796295166,
      "learning_rate": 0.0005036455928773805,
      "loss": 3.1351,
      "step": 60479
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6173268556594849,
      "learning_rate": 0.0005036425891342475,
      "loss": 2.9517,
      "step": 60480
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6708166599273682,
      "learning_rate": 0.0005036395853532533,
      "loss": 3.0579,
      "step": 60481
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5530158281326294,
      "learning_rate": 0.0005036365815343989,
      "loss": 3.2607,
      "step": 60482
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3700772523880005,
      "learning_rate": 0.0005036335776776845,
      "loss": 2.9288,
      "step": 60483
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.633026361465454,
      "learning_rate": 0.0005036305737831108,
      "loss": 2.968,
      "step": 60484
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5601403713226318,
      "learning_rate": 0.0005036275698506782,
      "loss": 3.0369,
      "step": 60485
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0077621936798096,
      "learning_rate": 0.0005036245658803875,
      "loss": 2.934,
      "step": 60486
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4089248180389404,
      "learning_rate": 0.0005036215618722392,
      "loss": 3.1597,
      "step": 60487
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8234888315200806,
      "learning_rate": 0.0005036185578262337,
      "loss": 3.1427,
      "step": 60488
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6145597696304321,
      "learning_rate": 0.0005036155537423718,
      "loss": 3.2568,
      "step": 60489
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.748605489730835,
      "learning_rate": 0.0005036125496206539,
      "loss": 2.9094,
      "step": 60490
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6536004543304443,
      "learning_rate": 0.0005036095454610805,
      "loss": 3.2452,
      "step": 60491
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.414764165878296,
      "learning_rate": 0.0005036065412636523,
      "loss": 3.2183,
      "step": 60492
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6751525402069092,
      "learning_rate": 0.0005036035370283697,
      "loss": 3.0271,
      "step": 60493
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6801906824111938,
      "learning_rate": 0.0005036005327552335,
      "loss": 2.876,
      "step": 60494
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5825754404067993,
      "learning_rate": 0.000503597528444244,
      "loss": 3.0123,
      "step": 60495
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.540099859237671,
      "learning_rate": 0.0005035945240954021,
      "loss": 3.0084,
      "step": 60496
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4024161100387573,
      "learning_rate": 0.0005035915197087078,
      "loss": 3.0765,
      "step": 60497
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.467366099357605,
      "learning_rate": 0.0005035885152841623,
      "loss": 2.7464,
      "step": 60498
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5318924188613892,
      "learning_rate": 0.0005035855108217656,
      "loss": 3.0127,
      "step": 60499
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5963789224624634,
      "learning_rate": 0.0005035825063215186,
      "loss": 2.8242,
      "step": 60500
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.497896432876587,
      "learning_rate": 0.0005035795017834219,
      "loss": 3.1171,
      "step": 60501
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4318873882293701,
      "learning_rate": 0.0005035764972074758,
      "loss": 2.9596,
      "step": 60502
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7969270944595337,
      "learning_rate": 0.0005035734925936809,
      "loss": 3.1584,
      "step": 60503
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.0414068698883057,
      "learning_rate": 0.0005035704879420381,
      "loss": 2.7867,
      "step": 60504
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5493824481964111,
      "learning_rate": 0.0005035674832525475,
      "loss": 3.0552,
      "step": 60505
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7862495183944702,
      "learning_rate": 0.00050356447852521,
      "loss": 3.063,
      "step": 60506
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.445066213607788,
      "learning_rate": 0.0005035614737600259,
      "loss": 2.9447,
      "step": 60507
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4444407224655151,
      "learning_rate": 0.0005035584689569958,
      "loss": 3.0972,
      "step": 60508
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.283632755279541,
      "learning_rate": 0.0005035554641161205,
      "loss": 3.1998,
      "step": 60509
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6171767711639404,
      "learning_rate": 0.0005035524592374004,
      "loss": 2.8885,
      "step": 60510
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5818990468978882,
      "learning_rate": 0.000503549454320836,
      "loss": 3.1263,
      "step": 60511
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.911838173866272,
      "learning_rate": 0.0005035464493664279,
      "loss": 3.0726,
      "step": 60512
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4717720746994019,
      "learning_rate": 0.0005035434443741767,
      "loss": 3.0838,
      "step": 60513
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8994228839874268,
      "learning_rate": 0.0005035404393440829,
      "loss": 3.119,
      "step": 60514
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7106986045837402,
      "learning_rate": 0.0005035374342761471,
      "loss": 3.0339,
      "step": 60515
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5649663209915161,
      "learning_rate": 0.00050353442917037,
      "loss": 3.0939,
      "step": 60516
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.831828236579895,
      "learning_rate": 0.0005035314240267518,
      "loss": 3.0563,
      "step": 60517
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7276273965835571,
      "learning_rate": 0.0005035284188452933,
      "loss": 2.8672,
      "step": 60518
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.827078104019165,
      "learning_rate": 0.0005035254136259951,
      "loss": 3.1204,
      "step": 60519
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.485503911972046,
      "learning_rate": 0.0005035224083688576,
      "loss": 2.9784,
      "step": 60520
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9564892053604126,
      "learning_rate": 0.0005035194030738815,
      "loss": 2.9447,
      "step": 60521
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4768564701080322,
      "learning_rate": 0.0005035163977410673,
      "loss": 3.0878,
      "step": 60522
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9117406606674194,
      "learning_rate": 0.0005035133923704156,
      "loss": 2.9702,
      "step": 60523
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6198060512542725,
      "learning_rate": 0.0005035103869619268,
      "loss": 2.9142,
      "step": 60524
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5494577884674072,
      "learning_rate": 0.0005035073815156017,
      "loss": 3.0028,
      "step": 60525
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6463857889175415,
      "learning_rate": 0.0005035043760314407,
      "loss": 3.146,
      "step": 60526
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.651473045349121,
      "learning_rate": 0.0005035013705094444,
      "loss": 2.7713,
      "step": 60527
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9134957790374756,
      "learning_rate": 0.0005034983649496133,
      "loss": 3.1062,
      "step": 60528
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.374733567237854,
      "learning_rate": 0.000503495359351948,
      "loss": 2.7729,
      "step": 60529
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0312132835388184,
      "learning_rate": 0.0005034923537164492,
      "loss": 2.8021,
      "step": 60530
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9855947494506836,
      "learning_rate": 0.0005034893480431172,
      "loss": 3.0725,
      "step": 60531
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.270404577255249,
      "learning_rate": 0.0005034863423319528,
      "loss": 3.0251,
      "step": 60532
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.023266077041626,
      "learning_rate": 0.0005034833365829563,
      "loss": 2.7372,
      "step": 60533
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.39540696144104,
      "learning_rate": 0.0005034803307961285,
      "loss": 3.2555,
      "step": 60534
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.427147150039673,
      "learning_rate": 0.0005034773249714699,
      "loss": 2.9711,
      "step": 60535
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8582913875579834,
      "learning_rate": 0.000503474319108981,
      "loss": 3.0207,
      "step": 60536
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.870976209640503,
      "learning_rate": 0.0005034713132086624,
      "loss": 2.9444,
      "step": 60537
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.7581369876861572,
      "learning_rate": 0.0005034683072705146,
      "loss": 2.9954,
      "step": 60538
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.720640182495117,
      "learning_rate": 0.0005034653012945381,
      "loss": 3.08,
      "step": 60539
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8568522930145264,
      "learning_rate": 0.0005034622952807338,
      "loss": 3.0585,
      "step": 60540
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.583937168121338,
      "learning_rate": 0.0005034592892291018,
      "loss": 2.9028,
      "step": 60541
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6269841194152832,
      "learning_rate": 0.0005034562831396429,
      "loss": 3.1478,
      "step": 60542
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.715604543685913,
      "learning_rate": 0.0005034532770123578,
      "loss": 3.0782,
      "step": 60543
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0390751361846924,
      "learning_rate": 0.0005034502708472468,
      "loss": 2.9435,
      "step": 60544
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4315263032913208,
      "learning_rate": 0.0005034472646443106,
      "loss": 2.9526,
      "step": 60545
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.4813232421875,
      "learning_rate": 0.0005034442584035496,
      "loss": 3.1155,
      "step": 60546
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.58217191696167,
      "learning_rate": 0.0005034412521249645,
      "loss": 3.0377,
      "step": 60547
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.891695737838745,
      "learning_rate": 0.0005034382458085558,
      "loss": 3.1478,
      "step": 60548
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5239520072937012,
      "learning_rate": 0.0005034352394543243,
      "loss": 3.0652,
      "step": 60549
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0075459480285645,
      "learning_rate": 0.0005034322330622702,
      "loss": 2.8237,
      "step": 60550
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.631690740585327,
      "learning_rate": 0.0005034292266323942,
      "loss": 3.1674,
      "step": 60551
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2350969314575195,
      "learning_rate": 0.0005034262201646969,
      "loss": 2.9612,
      "step": 60552
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8736430406570435,
      "learning_rate": 0.0005034232136591787,
      "loss": 2.8678,
      "step": 60553
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2333614826202393,
      "learning_rate": 0.0005034202071158404,
      "loss": 3.0966,
      "step": 60554
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.941056966781616,
      "learning_rate": 0.0005034172005346824,
      "loss": 3.084,
      "step": 60555
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.402411699295044,
      "learning_rate": 0.0005034141939157054,
      "loss": 3.1693,
      "step": 60556
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7061939239501953,
      "learning_rate": 0.0005034111872589098,
      "loss": 2.9923,
      "step": 60557
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4140325784683228,
      "learning_rate": 0.0005034081805642962,
      "loss": 2.7474,
      "step": 60558
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.6461637020111084,
      "learning_rate": 0.0005034051738318652,
      "loss": 2.9833,
      "step": 60559
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.739469051361084,
      "learning_rate": 0.0005034021670616173,
      "loss": 3.159,
      "step": 60560
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6828166246414185,
      "learning_rate": 0.0005033991602535531,
      "loss": 3.0565,
      "step": 60561
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.454824686050415,
      "learning_rate": 0.0005033961534076733,
      "loss": 2.9597,
      "step": 60562
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.06013822555542,
      "learning_rate": 0.0005033931465239782,
      "loss": 2.9119,
      "step": 60563
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.720841884613037,
      "learning_rate": 0.0005033901396024684,
      "loss": 2.8493,
      "step": 60564
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6695994138717651,
      "learning_rate": 0.0005033871326431446,
      "loss": 3.105,
      "step": 60565
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6961251497268677,
      "learning_rate": 0.0005033841256460073,
      "loss": 2.9681,
      "step": 60566
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.7259275913238525,
      "learning_rate": 0.0005033811186110571,
      "loss": 2.9629,
      "step": 60567
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.117342233657837,
      "learning_rate": 0.0005033781115382945,
      "loss": 2.9996,
      "step": 60568
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4120628833770752,
      "learning_rate": 0.0005033751044277201,
      "loss": 3.0809,
      "step": 60569
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5507540702819824,
      "learning_rate": 0.0005033720972793343,
      "loss": 3.1355,
      "step": 60570
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5829728841781616,
      "learning_rate": 0.0005033690900931378,
      "loss": 3.4382,
      "step": 60571
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.362230062484741,
      "learning_rate": 0.0005033660828691313,
      "loss": 3.0756,
      "step": 60572
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5577561855316162,
      "learning_rate": 0.0005033630756073151,
      "loss": 3.1374,
      "step": 60573
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4712904691696167,
      "learning_rate": 0.0005033600683076898,
      "loss": 3.0444,
      "step": 60574
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6704955101013184,
      "learning_rate": 0.0005033570609702561,
      "loss": 3.1229,
      "step": 60575
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.412825107574463,
      "learning_rate": 0.0005033540535950145,
      "loss": 3.1132,
      "step": 60576
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5648326873779297,
      "learning_rate": 0.0005033510461819656,
      "loss": 3.1109,
      "step": 60577
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4459747076034546,
      "learning_rate": 0.0005033480387311098,
      "loss": 2.9264,
      "step": 60578
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.419753074645996,
      "learning_rate": 0.0005033450312424479,
      "loss": 2.9496,
      "step": 60579
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6793159246444702,
      "learning_rate": 0.0005033420237159802,
      "loss": 3.0378,
      "step": 60580
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.617295742034912,
      "learning_rate": 0.0005033390161517074,
      "loss": 2.8061,
      "step": 60581
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0923871994018555,
      "learning_rate": 0.0005033360085496299,
      "loss": 3.1147,
      "step": 60582
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.1311028003692627,
      "learning_rate": 0.0005033330009097485,
      "loss": 2.9427,
      "step": 60583
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.353613018989563,
      "learning_rate": 0.0005033299932320637,
      "loss": 3.2754,
      "step": 60584
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5494178533554077,
      "learning_rate": 0.000503326985516576,
      "loss": 3.0308,
      "step": 60585
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9408104419708252,
      "learning_rate": 0.000503323977763286,
      "loss": 3.1933,
      "step": 60586
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.897965908050537,
      "learning_rate": 0.0005033209699721943,
      "loss": 2.9302,
      "step": 60587
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4761569499969482,
      "learning_rate": 0.0005033179621433012,
      "loss": 3.1453,
      "step": 60588
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4229952096939087,
      "learning_rate": 0.0005033149542766076,
      "loss": 2.8957,
      "step": 60589
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4352695941925049,
      "learning_rate": 0.0005033119463721139,
      "loss": 2.7965,
      "step": 60590
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5057686567306519,
      "learning_rate": 0.0005033089384298206,
      "loss": 2.8158,
      "step": 60591
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.612534999847412,
      "learning_rate": 0.0005033059304497283,
      "loss": 2.9606,
      "step": 60592
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5746527910232544,
      "learning_rate": 0.0005033029224318377,
      "loss": 2.972,
      "step": 60593
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8601828813552856,
      "learning_rate": 0.0005032999143761493,
      "loss": 3.1514,
      "step": 60594
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.769313097000122,
      "learning_rate": 0.0005032969062826635,
      "loss": 3.038,
      "step": 60595
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4673781394958496,
      "learning_rate": 0.0005032938981513809,
      "loss": 3.0727,
      "step": 60596
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5941499471664429,
      "learning_rate": 0.0005032908899823023,
      "loss": 3.2149,
      "step": 60597
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3155534267425537,
      "learning_rate": 0.000503287881775428,
      "loss": 3.208,
      "step": 60598
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.328040361404419,
      "learning_rate": 0.0005032848735307587,
      "loss": 2.957,
      "step": 60599
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.437185764312744,
      "learning_rate": 0.0005032818652482947,
      "loss": 2.986,
      "step": 60600
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.4106523990631104,
      "learning_rate": 0.000503278856928037,
      "loss": 2.973,
      "step": 60601
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.737256646156311,
      "learning_rate": 0.0005032758485699858,
      "loss": 3.2653,
      "step": 60602
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.737799048423767,
      "learning_rate": 0.0005032728401741419,
      "loss": 3.0926,
      "step": 60603
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.647258996963501,
      "learning_rate": 0.0005032698317405057,
      "loss": 3.0015,
      "step": 60604
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.1230711936950684,
      "learning_rate": 0.0005032668232690776,
      "loss": 3.0269,
      "step": 60605
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6952141523361206,
      "learning_rate": 0.0005032638147598585,
      "loss": 3.1459,
      "step": 60606
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4533319473266602,
      "learning_rate": 0.0005032608062128489,
      "loss": 3.0811,
      "step": 60607
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.0075793266296387,
      "learning_rate": 0.0005032577976280492,
      "loss": 2.7998,
      "step": 60608
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7102317810058594,
      "learning_rate": 0.0005032547890054601,
      "loss": 3.257,
      "step": 60609
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.7796199321746826,
      "learning_rate": 0.0005032517803450821,
      "loss": 3.1528,
      "step": 60610
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2467100620269775,
      "learning_rate": 0.0005032487716469156,
      "loss": 2.8722,
      "step": 60611
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3556346893310547,
      "learning_rate": 0.0005032457629109614,
      "loss": 3.0973,
      "step": 60612
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.526623487472534,
      "learning_rate": 0.0005032427541372201,
      "loss": 3.1087,
      "step": 60613
    },
    {
      "epoch": 0.79,
      "grad_norm": 4.596007347106934,
      "learning_rate": 0.0005032397453256921,
      "loss": 2.8427,
      "step": 60614
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.480071544647217,
      "learning_rate": 0.0005032367364763778,
      "loss": 3.0172,
      "step": 60615
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6615768671035767,
      "learning_rate": 0.0005032337275892782,
      "loss": 3.1094,
      "step": 60616
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.4174509048461914,
      "learning_rate": 0.0005032307186643935,
      "loss": 3.2691,
      "step": 60617
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9247032403945923,
      "learning_rate": 0.0005032277097017243,
      "loss": 2.987,
      "step": 60618
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6137971878051758,
      "learning_rate": 0.0005032247007012713,
      "loss": 3.1361,
      "step": 60619
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.437404990196228,
      "learning_rate": 0.0005032216916630351,
      "loss": 3.0791,
      "step": 60620
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.336362361907959,
      "learning_rate": 0.000503218682587016,
      "loss": 2.8957,
      "step": 60621
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0235939025878906,
      "learning_rate": 0.0005032156734732147,
      "loss": 3.1246,
      "step": 60622
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5813466310501099,
      "learning_rate": 0.0005032126643216319,
      "loss": 3.3363,
      "step": 60623
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.4957327842712402,
      "learning_rate": 0.000503209655132268,
      "loss": 3.0879,
      "step": 60624
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7541112899780273,
      "learning_rate": 0.0005032066459051235,
      "loss": 3.1119,
      "step": 60625
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5420154333114624,
      "learning_rate": 0.0005032036366401992,
      "loss": 2.8697,
      "step": 60626
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.465108871459961,
      "learning_rate": 0.0005032006273374954,
      "loss": 2.9594,
      "step": 60627
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.728218913078308,
      "learning_rate": 0.0005031976179970127,
      "loss": 2.8273,
      "step": 60628
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.435188889503479,
      "learning_rate": 0.0005031946086187519,
      "loss": 3.0568,
      "step": 60629
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3645524978637695,
      "learning_rate": 0.0005031915992027132,
      "loss": 3.2272,
      "step": 60630
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.539936900138855,
      "learning_rate": 0.0005031885897488975,
      "loss": 3.1733,
      "step": 60631
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5277093648910522,
      "learning_rate": 0.0005031855802573052,
      "loss": 3.0455,
      "step": 60632
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5096123218536377,
      "learning_rate": 0.0005031825707279367,
      "loss": 3.2004,
      "step": 60633
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7322421073913574,
      "learning_rate": 0.0005031795611607928,
      "loss": 3.2063,
      "step": 60634
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3645013570785522,
      "learning_rate": 0.0005031765515558741,
      "loss": 2.839,
      "step": 60635
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4555500745773315,
      "learning_rate": 0.000503173541913181,
      "loss": 2.9598,
      "step": 60636
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9484584331512451,
      "learning_rate": 0.0005031705322327141,
      "loss": 3.1262,
      "step": 60637
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.959136724472046,
      "learning_rate": 0.0005031675225144739,
      "loss": 3.0065,
      "step": 60638
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5344524383544922,
      "learning_rate": 0.000503164512758461,
      "loss": 3.1437,
      "step": 60639
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6403698921203613,
      "learning_rate": 0.000503161502964676,
      "loss": 2.9318,
      "step": 60640
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4470173120498657,
      "learning_rate": 0.0005031584931331196,
      "loss": 2.8425,
      "step": 60641
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.321903944015503,
      "learning_rate": 0.000503155483263792,
      "loss": 2.995,
      "step": 60642
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7319507598876953,
      "learning_rate": 0.000503152473356694,
      "loss": 2.9282,
      "step": 60643
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.1571097373962402,
      "learning_rate": 0.0005031494634118263,
      "loss": 2.8991,
      "step": 60644
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6493260860443115,
      "learning_rate": 0.000503146453429189,
      "loss": 2.6838,
      "step": 60645
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4955708980560303,
      "learning_rate": 0.0005031434434087832,
      "loss": 3.0678,
      "step": 60646
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4581571817398071,
      "learning_rate": 0.000503140433350609,
      "loss": 3.1906,
      "step": 60647
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0755679607391357,
      "learning_rate": 0.0005031374232546673,
      "loss": 3.1703,
      "step": 60648
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.448952317237854,
      "learning_rate": 0.0005031344131209585,
      "loss": 3.2843,
      "step": 60649
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.708760142326355,
      "learning_rate": 0.000503131402949483,
      "loss": 3.2355,
      "step": 60650
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.689443588256836,
      "learning_rate": 0.0005031283927402417,
      "loss": 2.987,
      "step": 60651
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5690463781356812,
      "learning_rate": 0.000503125382493235,
      "loss": 3.2346,
      "step": 60652
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4936152696609497,
      "learning_rate": 0.0005031223722084633,
      "loss": 3.0465,
      "step": 60653
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9751030206680298,
      "learning_rate": 0.0005031193618859276,
      "loss": 3.0678,
      "step": 60654
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.537887692451477,
      "learning_rate": 0.0005031163515256279,
      "loss": 2.8841,
      "step": 60655
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7611703872680664,
      "learning_rate": 0.0005031133411275652,
      "loss": 3.1839,
      "step": 60656
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5827752351760864,
      "learning_rate": 0.0005031103306917399,
      "loss": 2.9557,
      "step": 60657
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.407409429550171,
      "learning_rate": 0.0005031073202181525,
      "loss": 3.2013,
      "step": 60658
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8450738191604614,
      "learning_rate": 0.0005031043097068036,
      "loss": 2.9482,
      "step": 60659
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6574299335479736,
      "learning_rate": 0.0005031012991576938,
      "loss": 3.1561,
      "step": 60660
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9191253185272217,
      "learning_rate": 0.0005030982885708236,
      "loss": 2.875,
      "step": 60661
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6200528144836426,
      "learning_rate": 0.0005030952779461937,
      "loss": 3.0165,
      "step": 60662
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5908185243606567,
      "learning_rate": 0.0005030922672838045,
      "loss": 3.393,
      "step": 60663
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7011096477508545,
      "learning_rate": 0.0005030892565836565,
      "loss": 2.8782,
      "step": 60664
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5493415594100952,
      "learning_rate": 0.0005030862458457505,
      "loss": 2.7905,
      "step": 60665
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6324121952056885,
      "learning_rate": 0.0005030832350700869,
      "loss": 3.0998,
      "step": 60666
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5668295621871948,
      "learning_rate": 0.0005030802242566662,
      "loss": 2.9138,
      "step": 60667
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4953769445419312,
      "learning_rate": 0.0005030772134054892,
      "loss": 2.9606,
      "step": 60668
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4108086824417114,
      "learning_rate": 0.0005030742025165562,
      "loss": 3.0118,
      "step": 60669
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.603698968887329,
      "learning_rate": 0.000503071191589868,
      "loss": 3.01,
      "step": 60670
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.689821720123291,
      "learning_rate": 0.0005030681806254251,
      "loss": 3.0159,
      "step": 60671
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8105076551437378,
      "learning_rate": 0.0005030651696232277,
      "loss": 3.0795,
      "step": 60672
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8756375312805176,
      "learning_rate": 0.0005030621585832768,
      "loss": 2.9934,
      "step": 60673
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.115532159805298,
      "learning_rate": 0.0005030591475055729,
      "loss": 2.9229,
      "step": 60674
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6811256408691406,
      "learning_rate": 0.0005030561363901163,
      "loss": 2.941,
      "step": 60675
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6778842210769653,
      "learning_rate": 0.0005030531252369078,
      "loss": 3.1452,
      "step": 60676
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.604131817817688,
      "learning_rate": 0.000503050114045948,
      "loss": 2.8607,
      "step": 60677
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4170763492584229,
      "learning_rate": 0.0005030471028172373,
      "loss": 3.3007,
      "step": 60678
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4446995258331299,
      "learning_rate": 0.0005030440915507762,
      "loss": 2.931,
      "step": 60679
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5123271942138672,
      "learning_rate": 0.0005030410802465655,
      "loss": 3.191,
      "step": 60680
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6167078018188477,
      "learning_rate": 0.0005030380689046056,
      "loss": 3.0609,
      "step": 60681
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.024082899093628,
      "learning_rate": 0.000503035057524897,
      "loss": 2.9467,
      "step": 60682
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6619755029678345,
      "learning_rate": 0.0005030320461074405,
      "loss": 3.0951,
      "step": 60683
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7511738538742065,
      "learning_rate": 0.0005030290346522364,
      "loss": 2.8522,
      "step": 60684
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.481914758682251,
      "learning_rate": 0.0005030260231592854,
      "loss": 2.8479,
      "step": 60685
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4992824792861938,
      "learning_rate": 0.0005030230116285881,
      "loss": 3.1148,
      "step": 60686
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7183743715286255,
      "learning_rate": 0.0005030200000601448,
      "loss": 3.4301,
      "step": 60687
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8356584310531616,
      "learning_rate": 0.0005030169884539564,
      "loss": 3.0268,
      "step": 60688
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3765133619308472,
      "learning_rate": 0.0005030139768100232,
      "loss": 2.9599,
      "step": 60689
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6108561754226685,
      "learning_rate": 0.000503010965128346,
      "loss": 3.0206,
      "step": 60690
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5589511394500732,
      "learning_rate": 0.0005030079534089251,
      "loss": 2.5669,
      "step": 60691
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7058563232421875,
      "learning_rate": 0.0005030049416517614,
      "loss": 2.7946,
      "step": 60692
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.1587698459625244,
      "learning_rate": 0.0005030019298568551,
      "loss": 2.8316,
      "step": 60693
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8307374715805054,
      "learning_rate": 0.0005029989180242068,
      "loss": 2.8211,
      "step": 60694
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8327937126159668,
      "learning_rate": 0.0005029959061538174,
      "loss": 2.872,
      "step": 60695
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4263759851455688,
      "learning_rate": 0.0005029928942456871,
      "loss": 2.9163,
      "step": 60696
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.579818606376648,
      "learning_rate": 0.0005029898822998166,
      "loss": 3.399,
      "step": 60697
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.703514575958252,
      "learning_rate": 0.0005029868703162064,
      "loss": 2.9539,
      "step": 60698
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.1036856174468994,
      "learning_rate": 0.0005029838582948572,
      "loss": 3.2246,
      "step": 60699
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5752774477005005,
      "learning_rate": 0.0005029808462357695,
      "loss": 3.3435,
      "step": 60700
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.532238006591797,
      "learning_rate": 0.0005029778341389437,
      "loss": 3.0324,
      "step": 60701
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6174495220184326,
      "learning_rate": 0.0005029748220043806,
      "loss": 2.8355,
      "step": 60702
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4980930089950562,
      "learning_rate": 0.0005029718098320806,
      "loss": 2.8488,
      "step": 60703
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5001534223556519,
      "learning_rate": 0.0005029687976220443,
      "loss": 3.141,
      "step": 60704
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.000823736190796,
      "learning_rate": 0.0005029657853742723,
      "loss": 3.096,
      "step": 60705
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6048214435577393,
      "learning_rate": 0.0005029627730887651,
      "loss": 2.8704,
      "step": 60706
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3198460340499878,
      "learning_rate": 0.0005029597607655233,
      "loss": 3.1477,
      "step": 60707
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5533955097198486,
      "learning_rate": 0.0005029567484045475,
      "loss": 3.1125,
      "step": 60708
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.202972173690796,
      "learning_rate": 0.0005029537360058381,
      "loss": 2.8544,
      "step": 60709
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7313576936721802,
      "learning_rate": 0.0005029507235693958,
      "loss": 2.892,
      "step": 60710
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.649065613746643,
      "learning_rate": 0.0005029477110952212,
      "loss": 3.0997,
      "step": 60711
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4685701131820679,
      "learning_rate": 0.0005029446985833147,
      "loss": 2.9899,
      "step": 60712
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.531327486038208,
      "learning_rate": 0.000502941686033677,
      "loss": 3.0509,
      "step": 60713
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6165841817855835,
      "learning_rate": 0.0005029386734463087,
      "loss": 2.8781,
      "step": 60714
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5589393377304077,
      "learning_rate": 0.0005029356608212101,
      "loss": 3.2006,
      "step": 60715
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8002557754516602,
      "learning_rate": 0.000502932648158382,
      "loss": 3.5119,
      "step": 60716
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.124232769012451,
      "learning_rate": 0.000502929635457825,
      "loss": 3.1434,
      "step": 60717
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2052180767059326,
      "learning_rate": 0.0005029266227195395,
      "loss": 3.1513,
      "step": 60718
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4623109102249146,
      "learning_rate": 0.000502923609943526,
      "loss": 3.1242,
      "step": 60719
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4288570880889893,
      "learning_rate": 0.0005029205971297852,
      "loss": 3.0505,
      "step": 60720
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8544138669967651,
      "learning_rate": 0.0005029175842783176,
      "loss": 3.1709,
      "step": 60721
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6080468893051147,
      "learning_rate": 0.0005029145713891239,
      "loss": 3.1687,
      "step": 60722
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4470895528793335,
      "learning_rate": 0.0005029115584622045,
      "loss": 3.1991,
      "step": 60723
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5975390672683716,
      "learning_rate": 0.0005029085454975599,
      "loss": 3.1667,
      "step": 60724
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.1249423027038574,
      "learning_rate": 0.000502905532495191,
      "loss": 3.4198,
      "step": 60725
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7128307819366455,
      "learning_rate": 0.000502902519455098,
      "loss": 2.8433,
      "step": 60726
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8006691932678223,
      "learning_rate": 0.0005028995063772816,
      "loss": 2.9138,
      "step": 60727
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3430912494659424,
      "learning_rate": 0.0005028964932617424,
      "loss": 2.9015,
      "step": 60728
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5523300170898438,
      "learning_rate": 0.0005028934801084809,
      "loss": 3.077,
      "step": 60729
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4721524715423584,
      "learning_rate": 0.0005028904669174976,
      "loss": 2.993,
      "step": 60730
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.1491153240203857,
      "learning_rate": 0.0005028874536887931,
      "loss": 3.2227,
      "step": 60731
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5240485668182373,
      "learning_rate": 0.0005028844404223682,
      "loss": 3.0183,
      "step": 60732
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4841077327728271,
      "learning_rate": 0.000502881427118223,
      "loss": 3.0624,
      "step": 60733
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4003105163574219,
      "learning_rate": 0.0005028784137763585,
      "loss": 2.8989,
      "step": 60734
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9620025157928467,
      "learning_rate": 0.000502875400396775,
      "loss": 2.9171,
      "step": 60735
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6283096075057983,
      "learning_rate": 0.0005028723869794731,
      "loss": 2.9296,
      "step": 60736
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.280423641204834,
      "learning_rate": 0.0005028693735244534,
      "loss": 2.9991,
      "step": 60737
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7831389904022217,
      "learning_rate": 0.0005028663600317165,
      "loss": 2.7854,
      "step": 60738
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.402572751045227,
      "learning_rate": 0.000502863346501263,
      "loss": 2.8967,
      "step": 60739
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4551539421081543,
      "learning_rate": 0.0005028603329330933,
      "loss": 3.0536,
      "step": 60740
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5703729391098022,
      "learning_rate": 0.000502857319327208,
      "loss": 3.1397,
      "step": 60741
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.558544397354126,
      "learning_rate": 0.0005028543056836076,
      "loss": 3.0147,
      "step": 60742
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.6484930515289307,
      "learning_rate": 0.0005028512920022929,
      "loss": 3.0993,
      "step": 60743
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4904464483261108,
      "learning_rate": 0.0005028482782832642,
      "loss": 2.9382,
      "step": 60744
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8478411436080933,
      "learning_rate": 0.0005028452645265224,
      "loss": 3.0471,
      "step": 60745
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8942852020263672,
      "learning_rate": 0.0005028422507320677,
      "loss": 2.8924,
      "step": 60746
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2976460456848145,
      "learning_rate": 0.0005028392368999008,
      "loss": 2.9708,
      "step": 60747
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3345739841461182,
      "learning_rate": 0.0005028362230300222,
      "loss": 2.7795,
      "step": 60748
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.4727604389190674,
      "learning_rate": 0.0005028332091224325,
      "loss": 2.9614,
      "step": 60749
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7141108512878418,
      "learning_rate": 0.0005028301951771324,
      "loss": 3.095,
      "step": 60750
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.022263765335083,
      "learning_rate": 0.0005028271811941222,
      "loss": 3.1653,
      "step": 60751
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4183331727981567,
      "learning_rate": 0.0005028241671734026,
      "loss": 2.9365,
      "step": 60752
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6612690687179565,
      "learning_rate": 0.0005028211531149743,
      "loss": 3.006,
      "step": 60753
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5274624824523926,
      "learning_rate": 0.0005028181390188377,
      "loss": 3.2825,
      "step": 60754
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9941965341567993,
      "learning_rate": 0.0005028151248849933,
      "loss": 3.1718,
      "step": 60755
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5970345735549927,
      "learning_rate": 0.0005028121107134417,
      "loss": 3.3877,
      "step": 60756
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7113534212112427,
      "learning_rate": 0.0005028090965041837,
      "loss": 3.0345,
      "step": 60757
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.024237632751465,
      "learning_rate": 0.0005028060822572194,
      "loss": 3.039,
      "step": 60758
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2575483322143555,
      "learning_rate": 0.0005028030679725498,
      "loss": 3.079,
      "step": 60759
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.568006157875061,
      "learning_rate": 0.0005028000536501753,
      "loss": 3.1358,
      "step": 60760
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.450304627418518,
      "learning_rate": 0.0005027970392900964,
      "loss": 3.0181,
      "step": 60761
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.014343023300171,
      "learning_rate": 0.0005027940248923137,
      "loss": 2.9512,
      "step": 60762
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7984929084777832,
      "learning_rate": 0.0005027910104568277,
      "loss": 3.0443,
      "step": 60763
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4911935329437256,
      "learning_rate": 0.0005027879959836391,
      "loss": 3.1099,
      "step": 60764
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8094629049301147,
      "learning_rate": 0.0005027849814727483,
      "loss": 2.9199,
      "step": 60765
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7245577573776245,
      "learning_rate": 0.000502781966924156,
      "loss": 3.1267,
      "step": 60766
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2494447231292725,
      "learning_rate": 0.0005027789523378627,
      "loss": 3.1829,
      "step": 60767
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3638525009155273,
      "learning_rate": 0.0005027759377138689,
      "loss": 2.9906,
      "step": 60768
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7306420803070068,
      "learning_rate": 0.0005027729230521753,
      "loss": 3.0959,
      "step": 60769
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2636492252349854,
      "learning_rate": 0.0005027699083527823,
      "loss": 3.1159,
      "step": 60770
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2911581993103027,
      "learning_rate": 0.0005027668936156906,
      "loss": 3.1919,
      "step": 60771
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5925227403640747,
      "learning_rate": 0.0005027638788409009,
      "loss": 2.945,
      "step": 60772
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.51576566696167,
      "learning_rate": 0.0005027608640284134,
      "loss": 3.2808,
      "step": 60773
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6263175010681152,
      "learning_rate": 0.0005027578491782288,
      "loss": 2.876,
      "step": 60774
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7894808053970337,
      "learning_rate": 0.0005027548342903477,
      "loss": 2.7211,
      "step": 60775
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.79877769947052,
      "learning_rate": 0.0005027518193647707,
      "loss": 3.1763,
      "step": 60776
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5612341165542603,
      "learning_rate": 0.0005027488044014982,
      "loss": 2.8144,
      "step": 60777
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6426674127578735,
      "learning_rate": 0.000502745789400531,
      "loss": 2.995,
      "step": 60778
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3719027042388916,
      "learning_rate": 0.0005027427743618695,
      "loss": 3.1692,
      "step": 60779
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.1016557216644287,
      "learning_rate": 0.0005027397592855142,
      "loss": 3.1361,
      "step": 60780
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.415600299835205,
      "learning_rate": 0.0005027367441714658,
      "loss": 3.151,
      "step": 60781
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0874156951904297,
      "learning_rate": 0.0005027337290197248,
      "loss": 2.9449,
      "step": 60782
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.5372440814971924,
      "learning_rate": 0.0005027307138302918,
      "loss": 3.1546,
      "step": 60783
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5959981679916382,
      "learning_rate": 0.0005027276986031674,
      "loss": 3.036,
      "step": 60784
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9891071319580078,
      "learning_rate": 0.000502724683338352,
      "loss": 3.1418,
      "step": 60785
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.710591435432434,
      "learning_rate": 0.0005027216680358463,
      "loss": 2.9783,
      "step": 60786
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9402201175689697,
      "learning_rate": 0.0005027186526956508,
      "loss": 3.1181,
      "step": 60787
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.682409405708313,
      "learning_rate": 0.0005027156373177661,
      "loss": 3.2307,
      "step": 60788
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4971569776535034,
      "learning_rate": 0.0005027126219021926,
      "loss": 2.9518,
      "step": 60789
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7958078384399414,
      "learning_rate": 0.0005027096064489312,
      "loss": 3.3989,
      "step": 60790
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.1975131034851074,
      "learning_rate": 0.0005027065909579821,
      "loss": 3.082,
      "step": 60791
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8939697742462158,
      "learning_rate": 0.0005027035754293461,
      "loss": 3.1263,
      "step": 60792
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.9155659675598145,
      "learning_rate": 0.0005027005598630237,
      "loss": 3.0612,
      "step": 60793
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.061955213546753,
      "learning_rate": 0.0005026975442590153,
      "loss": 3.0927,
      "step": 60794
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5934057235717773,
      "learning_rate": 0.0005026945286173217,
      "loss": 2.6538,
      "step": 60795
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3702149391174316,
      "learning_rate": 0.0005026915129379434,
      "loss": 2.9206,
      "step": 60796
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.641542673110962,
      "learning_rate": 0.000502688497220881,
      "loss": 3.1547,
      "step": 60797
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5372885465621948,
      "learning_rate": 0.0005026854814661348,
      "loss": 2.8917,
      "step": 60798
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.324201822280884,
      "learning_rate": 0.0005026824656737056,
      "loss": 2.968,
      "step": 60799
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.5372586250305176,
      "learning_rate": 0.0005026794498435938,
      "loss": 2.9925,
      "step": 60800
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.422033429145813,
      "learning_rate": 0.0005026764339758003,
      "loss": 3.0872,
      "step": 60801
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3464293479919434,
      "learning_rate": 0.0005026734180703252,
      "loss": 3.1482,
      "step": 60802
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8216418027877808,
      "learning_rate": 0.0005026704021271694,
      "loss": 2.9465,
      "step": 60803
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7591382265090942,
      "learning_rate": 0.0005026673861463334,
      "loss": 3.1588,
      "step": 60804
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.9043362140655518,
      "learning_rate": 0.0005026643701278176,
      "loss": 3.0452,
      "step": 60805
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6199570894241333,
      "learning_rate": 0.0005026613540716227,
      "loss": 3.0247,
      "step": 60806
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7818089723587036,
      "learning_rate": 0.0005026583379777492,
      "loss": 2.7184,
      "step": 60807
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8973063230514526,
      "learning_rate": 0.0005026553218461978,
      "loss": 3.1504,
      "step": 60808
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4032599925994873,
      "learning_rate": 0.0005026523056769689,
      "loss": 3.1655,
      "step": 60809
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.944922685623169,
      "learning_rate": 0.000502649289470063,
      "loss": 3.0897,
      "step": 60810
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4921993017196655,
      "learning_rate": 0.000502646273225481,
      "loss": 2.8618,
      "step": 60811
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.034019708633423,
      "learning_rate": 0.000502643256943223,
      "loss": 2.9574,
      "step": 60812
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.259714126586914,
      "learning_rate": 0.0005026402406232899,
      "loss": 2.9433,
      "step": 60813
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0591421127319336,
      "learning_rate": 0.0005026372242656822,
      "loss": 3.0375,
      "step": 60814
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5795665979385376,
      "learning_rate": 0.0005026342078704003,
      "loss": 3.1817,
      "step": 60815
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9604729413986206,
      "learning_rate": 0.0005026311914374449,
      "loss": 3.2298,
      "step": 60816
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2992022037506104,
      "learning_rate": 0.0005026281749668166,
      "loss": 3.1727,
      "step": 60817
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4980931282043457,
      "learning_rate": 0.0005026251584585159,
      "loss": 3.1216,
      "step": 60818
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.800994634628296,
      "learning_rate": 0.0005026221419125433,
      "loss": 2.9457,
      "step": 60819
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4765735864639282,
      "learning_rate": 0.0005026191253288994,
      "loss": 3.0585,
      "step": 60820
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6040878295898438,
      "learning_rate": 0.0005026161087075849,
      "loss": 2.9964,
      "step": 60821
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4255307912826538,
      "learning_rate": 0.0005026130920486001,
      "loss": 3.1504,
      "step": 60822
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.754178524017334,
      "learning_rate": 0.0005026100753519457,
      "loss": 3.0376,
      "step": 60823
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5485612154006958,
      "learning_rate": 0.0005026070586176224,
      "loss": 2.8708,
      "step": 60824
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6823774576187134,
      "learning_rate": 0.0005026040418456306,
      "loss": 3.1818,
      "step": 60825
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9557172060012817,
      "learning_rate": 0.0005026010250359708,
      "loss": 2.949,
      "step": 60826
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3568406105041504,
      "learning_rate": 0.0005025980081886437,
      "loss": 3.2237,
      "step": 60827
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.070690155029297,
      "learning_rate": 0.0005025949913036498,
      "loss": 3.2288,
      "step": 60828
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.828477144241333,
      "learning_rate": 0.0005025919743809896,
      "loss": 3.1064,
      "step": 60829
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8486032485961914,
      "learning_rate": 0.0005025889574206638,
      "loss": 3.1545,
      "step": 60830
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.608535647392273,
      "learning_rate": 0.0005025859404226728,
      "loss": 2.9546,
      "step": 60831
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.760308027267456,
      "learning_rate": 0.0005025829233870175,
      "loss": 2.8313,
      "step": 60832
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0337071418762207,
      "learning_rate": 0.0005025799063136979,
      "loss": 2.7493,
      "step": 60833
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.404116153717041,
      "learning_rate": 0.0005025768892027151,
      "loss": 2.9608,
      "step": 60834
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6841403245925903,
      "learning_rate": 0.0005025738720540692,
      "loss": 2.8932,
      "step": 60835
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4439375400543213,
      "learning_rate": 0.0005025708548677613,
      "loss": 2.9917,
      "step": 60836
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.5412700176239014,
      "learning_rate": 0.0005025678376437913,
      "loss": 3.0854,
      "step": 60837
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4786781072616577,
      "learning_rate": 0.0005025648203821604,
      "loss": 3.081,
      "step": 60838
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4809409379959106,
      "learning_rate": 0.0005025618030828688,
      "loss": 3.3315,
      "step": 60839
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.1239309310913086,
      "learning_rate": 0.000502558785745917,
      "loss": 2.8803,
      "step": 60840
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.791294813156128,
      "learning_rate": 0.0005025557683713058,
      "loss": 2.6651,
      "step": 60841
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6031922101974487,
      "learning_rate": 0.0005025527509590357,
      "loss": 3.0645,
      "step": 60842
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8950358629226685,
      "learning_rate": 0.0005025497335091071,
      "loss": 3.0843,
      "step": 60843
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5414704084396362,
      "learning_rate": 0.0005025467160215208,
      "loss": 3.0484,
      "step": 60844
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.261090040206909,
      "learning_rate": 0.0005025436984962772,
      "loss": 2.9832,
      "step": 60845
    },
    {
      "epoch": 0.79,
      "grad_norm": 4.72695779800415,
      "learning_rate": 0.000502540680933377,
      "loss": 3.1633,
      "step": 60846
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.4429051876068115,
      "learning_rate": 0.0005025376633328206,
      "loss": 3.0277,
      "step": 60847
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4126670360565186,
      "learning_rate": 0.0005025346456946086,
      "loss": 2.9288,
      "step": 60848
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.479557514190674,
      "learning_rate": 0.0005025316280187415,
      "loss": 3.0991,
      "step": 60849
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.4479689598083496,
      "learning_rate": 0.00050252861030522,
      "loss": 2.9325,
      "step": 60850
    },
    {
      "epoch": 0.79,
      "grad_norm": 4.6024065017700195,
      "learning_rate": 0.0005025255925540448,
      "loss": 2.798,
      "step": 60851
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7910197973251343,
      "learning_rate": 0.000502522574765216,
      "loss": 3.1762,
      "step": 60852
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.6170690059661865,
      "learning_rate": 0.0005025195569387346,
      "loss": 2.9332,
      "step": 60853
    },
    {
      "epoch": 0.79,
      "grad_norm": 4.243627071380615,
      "learning_rate": 0.000502516539074601,
      "loss": 3.3444,
      "step": 60854
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.0018863677978516,
      "learning_rate": 0.0005025135211728156,
      "loss": 3.1467,
      "step": 60855
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.783791422843933,
      "learning_rate": 0.0005025105032333792,
      "loss": 3.0166,
      "step": 60856
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9906245470046997,
      "learning_rate": 0.0005025074852562923,
      "loss": 3.1325,
      "step": 60857
    },
    {
      "epoch": 0.79,
      "grad_norm": 4.271674156188965,
      "learning_rate": 0.0005025044672415554,
      "loss": 2.7902,
      "step": 60858
    },
    {
      "epoch": 0.79,
      "grad_norm": 5.048919677734375,
      "learning_rate": 0.0005025014491891691,
      "loss": 2.9625,
      "step": 60859
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6233175992965698,
      "learning_rate": 0.000502498431099134,
      "loss": 3.1517,
      "step": 60860
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.83174467086792,
      "learning_rate": 0.0005024954129714506,
      "loss": 2.8285,
      "step": 60861
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.58198618888855,
      "learning_rate": 0.0005024923948061193,
      "loss": 2.8971,
      "step": 60862
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.5220532417297363,
      "learning_rate": 0.0005024893766031412,
      "loss": 3.3363,
      "step": 60863
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3873567581176758,
      "learning_rate": 0.0005024863583625163,
      "loss": 2.9339,
      "step": 60864
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2777040004730225,
      "learning_rate": 0.0005024833400842453,
      "loss": 2.8364,
      "step": 60865
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7979083061218262,
      "learning_rate": 0.0005024803217683289,
      "loss": 3.221,
      "step": 60866
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2866885662078857,
      "learning_rate": 0.0005024773034147676,
      "loss": 2.8487,
      "step": 60867
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4094082117080688,
      "learning_rate": 0.000502474285023562,
      "loss": 3.0891,
      "step": 60868
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5613058805465698,
      "learning_rate": 0.0005024712665947126,
      "loss": 2.8096,
      "step": 60869
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2757387161254883,
      "learning_rate": 0.0005024682481282198,
      "loss": 3.1184,
      "step": 60870
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.6393771171569824,
      "learning_rate": 0.0005024652296240845,
      "loss": 3.0053,
      "step": 60871
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8277195692062378,
      "learning_rate": 0.000502462211082307,
      "loss": 2.9165,
      "step": 60872
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4944252967834473,
      "learning_rate": 0.0005024591925028879,
      "loss": 2.8993,
      "step": 60873
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.4980671405792236,
      "learning_rate": 0.0005024561738858279,
      "loss": 2.7782,
      "step": 60874
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.337977409362793,
      "learning_rate": 0.0005024531552311276,
      "loss": 2.9157,
      "step": 60875
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.532078504562378,
      "learning_rate": 0.0005024501365387873,
      "loss": 3.1717,
      "step": 60876
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9451216459274292,
      "learning_rate": 0.0005024471178088077,
      "loss": 3.2422,
      "step": 60877
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.05777907371521,
      "learning_rate": 0.0005024440990411893,
      "loss": 2.9027,
      "step": 60878
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3021669387817383,
      "learning_rate": 0.0005024410802359328,
      "loss": 3.2156,
      "step": 60879
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.820082426071167,
      "learning_rate": 0.0005024380613930387,
      "loss": 2.9405,
      "step": 60880
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.542202353477478,
      "learning_rate": 0.0005024350425125075,
      "loss": 3.1454,
      "step": 60881
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.03300404548645,
      "learning_rate": 0.0005024320235943398,
      "loss": 3.3947,
      "step": 60882
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.050017833709717,
      "learning_rate": 0.0005024290046385362,
      "loss": 3.0187,
      "step": 60883
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.926239490509033,
      "learning_rate": 0.0005024259856450973,
      "loss": 3.1482,
      "step": 60884
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4584869146347046,
      "learning_rate": 0.0005024229666140234,
      "loss": 2.9831,
      "step": 60885
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.8987491130828857,
      "learning_rate": 0.0005024199475453154,
      "loss": 2.8819,
      "step": 60886
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.7598161697387695,
      "learning_rate": 0.0005024169284389737,
      "loss": 3.1689,
      "step": 60887
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0305137634277344,
      "learning_rate": 0.0005024139092949987,
      "loss": 3.1142,
      "step": 60888
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.109923839569092,
      "learning_rate": 0.0005024108901133914,
      "loss": 2.8546,
      "step": 60889
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5332103967666626,
      "learning_rate": 0.0005024078708941519,
      "loss": 2.9042,
      "step": 60890
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.773068428039551,
      "learning_rate": 0.000502404851637281,
      "loss": 3.0593,
      "step": 60891
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.558772563934326,
      "learning_rate": 0.0005024018323427793,
      "loss": 3.2334,
      "step": 60892
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3942426443099976,
      "learning_rate": 0.0005023988130106471,
      "loss": 2.7043,
      "step": 60893
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.525153398513794,
      "learning_rate": 0.0005023957936408853,
      "loss": 3.0098,
      "step": 60894
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3945127725601196,
      "learning_rate": 0.0005023927742334942,
      "loss": 2.8962,
      "step": 60895
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.5881447792053223,
      "learning_rate": 0.0005023897547884747,
      "loss": 2.9053,
      "step": 60896
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.318019390106201,
      "learning_rate": 0.0005023867353058268,
      "loss": 3.0401,
      "step": 60897
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2938392162322998,
      "learning_rate": 0.0005023837157855516,
      "loss": 3.0333,
      "step": 60898
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.5703797340393066,
      "learning_rate": 0.0005023806962276493,
      "loss": 2.8717,
      "step": 60899
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6321979761123657,
      "learning_rate": 0.0005023776766321208,
      "loss": 3.193,
      "step": 60900
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5884755849838257,
      "learning_rate": 0.0005023746569989663,
      "loss": 2.8754,
      "step": 60901
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9790922403335571,
      "learning_rate": 0.0005023716373281866,
      "loss": 2.9128,
      "step": 60902
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.961193323135376,
      "learning_rate": 0.0005023686176197821,
      "loss": 2.9759,
      "step": 60903
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7747609615325928,
      "learning_rate": 0.0005023655978737536,
      "loss": 2.9035,
      "step": 60904
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.527190089225769,
      "learning_rate": 0.0005023625780901014,
      "loss": 2.885,
      "step": 60905
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3717849254608154,
      "learning_rate": 0.0005023595582688263,
      "loss": 3.025,
      "step": 60906
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6645969152450562,
      "learning_rate": 0.0005023565384099286,
      "loss": 3.0163,
      "step": 60907
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.708311676979065,
      "learning_rate": 0.0005023535185134091,
      "loss": 2.9485,
      "step": 60908
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6362652778625488,
      "learning_rate": 0.0005023504985792683,
      "loss": 3.0116,
      "step": 60909
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7185328006744385,
      "learning_rate": 0.0005023474786075065,
      "loss": 3.0919,
      "step": 60910
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3477836847305298,
      "learning_rate": 0.0005023444585981246,
      "loss": 3.0046,
      "step": 60911
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.482691526412964,
      "learning_rate": 0.0005023414385511232,
      "loss": 3.025,
      "step": 60912
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.283498525619507,
      "learning_rate": 0.0005023384184665025,
      "loss": 2.8164,
      "step": 60913
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.504448652267456,
      "learning_rate": 0.0005023353983442633,
      "loss": 3.184,
      "step": 60914
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6380615234375,
      "learning_rate": 0.0005023323781844063,
      "loss": 2.8776,
      "step": 60915
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.644456148147583,
      "learning_rate": 0.0005023293579869318,
      "loss": 3.1304,
      "step": 60916
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.335726261138916,
      "learning_rate": 0.0005023263377518404,
      "loss": 3.2208,
      "step": 60917
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3620301485061646,
      "learning_rate": 0.0005023233174791327,
      "loss": 2.9313,
      "step": 60918
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5448261499404907,
      "learning_rate": 0.0005023202971688093,
      "loss": 3.0733,
      "step": 60919
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6897051334381104,
      "learning_rate": 0.0005023172768208708,
      "loss": 3.0078,
      "step": 60920
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6937570571899414,
      "learning_rate": 0.0005023142564353177,
      "loss": 3.1206,
      "step": 60921
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6070436239242554,
      "learning_rate": 0.0005023112360121504,
      "loss": 3.0131,
      "step": 60922
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2914741039276123,
      "learning_rate": 0.0005023082155513698,
      "loss": 2.791,
      "step": 60923
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.605966329574585,
      "learning_rate": 0.0005023051950529762,
      "loss": 3.334,
      "step": 60924
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5312546491622925,
      "learning_rate": 0.0005023021745169703,
      "loss": 2.9285,
      "step": 60925
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.509405493736267,
      "learning_rate": 0.0005022991539433525,
      "loss": 2.984,
      "step": 60926
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6306641101837158,
      "learning_rate": 0.0005022961333321236,
      "loss": 3.0902,
      "step": 60927
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.526997685432434,
      "learning_rate": 0.0005022931126832839,
      "loss": 3.1947,
      "step": 60928
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.788697361946106,
      "learning_rate": 0.0005022900919968342,
      "loss": 2.8759,
      "step": 60929
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9290722608566284,
      "learning_rate": 0.0005022870712727749,
      "loss": 3.4098,
      "step": 60930
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5437053442001343,
      "learning_rate": 0.0005022840505111067,
      "loss": 3.0112,
      "step": 60931
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.2647639513015747,
      "learning_rate": 0.0005022810297118299,
      "loss": 2.9852,
      "step": 60932
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4691070318222046,
      "learning_rate": 0.0005022780088749455,
      "loss": 2.7729,
      "step": 60933
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.549959421157837,
      "learning_rate": 0.0005022749880004535,
      "loss": 2.9715,
      "step": 60934
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6832175254821777,
      "learning_rate": 0.0005022719670883549,
      "loss": 2.8398,
      "step": 60935
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6193091869354248,
      "learning_rate": 0.0005022689461386502,
      "loss": 3.1457,
      "step": 60936
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5481230020523071,
      "learning_rate": 0.0005022659251513398,
      "loss": 3.2661,
      "step": 60937
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3495601415634155,
      "learning_rate": 0.0005022629041264243,
      "loss": 3.0488,
      "step": 60938
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7633832693099976,
      "learning_rate": 0.0005022598830639045,
      "loss": 3.1072,
      "step": 60939
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.499685287475586,
      "learning_rate": 0.0005022568619637806,
      "loss": 2.9719,
      "step": 60940
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6540312767028809,
      "learning_rate": 0.0005022538408260533,
      "loss": 3.0265,
      "step": 60941
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.7364981174468994,
      "learning_rate": 0.0005022508196507233,
      "loss": 2.7552,
      "step": 60942
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.004067897796631,
      "learning_rate": 0.0005022477984377911,
      "loss": 2.7229,
      "step": 60943
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0406413078308105,
      "learning_rate": 0.000502244777187257,
      "loss": 2.9088,
      "step": 60944
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9491124153137207,
      "learning_rate": 0.000502241755899122,
      "loss": 3.0318,
      "step": 60945
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.233839273452759,
      "learning_rate": 0.0005022387345733863,
      "loss": 3.1195,
      "step": 60946
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.651319980621338,
      "learning_rate": 0.0005022357132100507,
      "loss": 2.8783,
      "step": 60947
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0959582328796387,
      "learning_rate": 0.0005022326918091156,
      "loss": 2.9404,
      "step": 60948
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8011846542358398,
      "learning_rate": 0.0005022296703705817,
      "loss": 3.1213,
      "step": 60949
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9561554193496704,
      "learning_rate": 0.0005022266488944494,
      "loss": 3.2429,
      "step": 60950
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3572328090667725,
      "learning_rate": 0.0005022236273807194,
      "loss": 3.0578,
      "step": 60951
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.447354793548584,
      "learning_rate": 0.0005022206058293922,
      "loss": 2.9026,
      "step": 60952
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4697209596633911,
      "learning_rate": 0.0005022175842404683,
      "loss": 3.1499,
      "step": 60953
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7384873628616333,
      "learning_rate": 0.0005022145626139484,
      "loss": 3.0049,
      "step": 60954
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.759130597114563,
      "learning_rate": 0.000502211540949833,
      "loss": 2.9881,
      "step": 60955
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.1467955112457275,
      "learning_rate": 0.0005022085192481227,
      "loss": 2.6915,
      "step": 60956
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0563271045684814,
      "learning_rate": 0.0005022054975088179,
      "loss": 3.0757,
      "step": 60957
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4026249647140503,
      "learning_rate": 0.0005022024757319194,
      "loss": 3.2088,
      "step": 60958
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.663667678833008,
      "learning_rate": 0.0005021994539174276,
      "loss": 3.2649,
      "step": 60959
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7257261276245117,
      "learning_rate": 0.000502196432065343,
      "loss": 2.9786,
      "step": 60960
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.633451223373413,
      "learning_rate": 0.0005021934101756665,
      "loss": 2.7544,
      "step": 60961
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.594146728515625,
      "learning_rate": 0.0005021903882483982,
      "loss": 3.0841,
      "step": 60962
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6202318668365479,
      "learning_rate": 0.000502187366283539,
      "loss": 3.2409,
      "step": 60963
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.557584285736084,
      "learning_rate": 0.0005021843442810893,
      "loss": 2.9134,
      "step": 60964
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2335727214813232,
      "learning_rate": 0.0005021813222410498,
      "loss": 3.0566,
      "step": 60965
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7133829593658447,
      "learning_rate": 0.0005021783001634209,
      "loss": 2.734,
      "step": 60966
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5142998695373535,
      "learning_rate": 0.0005021752780482033,
      "loss": 3.0074,
      "step": 60967
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.551951289176941,
      "learning_rate": 0.0005021722558953974,
      "loss": 3.0441,
      "step": 60968
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5784485340118408,
      "learning_rate": 0.0005021692337050039,
      "loss": 2.9504,
      "step": 60969
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4921525716781616,
      "learning_rate": 0.0005021662114770234,
      "loss": 3.1659,
      "step": 60970
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7412370443344116,
      "learning_rate": 0.0005021631892114563,
      "loss": 2.9314,
      "step": 60971
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4830608367919922,
      "learning_rate": 0.0005021601669083032,
      "loss": 3.1014,
      "step": 60972
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6916875839233398,
      "learning_rate": 0.0005021571445675649,
      "loss": 3.0733,
      "step": 60973
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5542086362838745,
      "learning_rate": 0.0005021541221892417,
      "loss": 2.8348,
      "step": 60974
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.3867487907409668,
      "learning_rate": 0.0005021510997733342,
      "loss": 3.0761,
      "step": 60975
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.690632939338684,
      "learning_rate": 0.0005021480773198429,
      "loss": 3.2478,
      "step": 60976
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5762073993682861,
      "learning_rate": 0.0005021450548287685,
      "loss": 3.0663,
      "step": 60977
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8407013416290283,
      "learning_rate": 0.0005021420323001116,
      "loss": 3.0356,
      "step": 60978
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4620556831359863,
      "learning_rate": 0.0005021390097338727,
      "loss": 3.0678,
      "step": 60979
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5344202518463135,
      "learning_rate": 0.0005021359871300523,
      "loss": 3.01,
      "step": 60980
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7007859945297241,
      "learning_rate": 0.0005021329644886509,
      "loss": 3.1629,
      "step": 60981
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.484812617301941,
      "learning_rate": 0.0005021299418096693,
      "loss": 3.0876,
      "step": 60982
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.588457465171814,
      "learning_rate": 0.0005021269190931079,
      "loss": 3.1807,
      "step": 60983
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.78659188747406,
      "learning_rate": 0.0005021238963389672,
      "loss": 3.1632,
      "step": 60984
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5401263236999512,
      "learning_rate": 0.000502120873547248,
      "loss": 3.035,
      "step": 60985
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4963630437850952,
      "learning_rate": 0.0005021178507179506,
      "loss": 3.2396,
      "step": 60986
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.653737187385559,
      "learning_rate": 0.0005021148278510757,
      "loss": 3.0292,
      "step": 60987
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.619718313217163,
      "learning_rate": 0.0005021118049466239,
      "loss": 3.1093,
      "step": 60988
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5834035873413086,
      "learning_rate": 0.0005021087820045957,
      "loss": 3.1648,
      "step": 60989
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.536982536315918,
      "learning_rate": 0.0005021057590249915,
      "loss": 2.9021,
      "step": 60990
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7631111145019531,
      "learning_rate": 0.0005021027360078121,
      "loss": 2.9952,
      "step": 60991
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7194037437438965,
      "learning_rate": 0.0005020997129530581,
      "loss": 2.9146,
      "step": 60992
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.635620355606079,
      "learning_rate": 0.0005020966898607299,
      "loss": 2.9962,
      "step": 60993
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.393606185913086,
      "learning_rate": 0.000502093666730828,
      "loss": 3.0852,
      "step": 60994
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.3002641201019287,
      "learning_rate": 0.0005020906435633532,
      "loss": 2.752,
      "step": 60995
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.845724105834961,
      "learning_rate": 0.0005020876203583059,
      "loss": 2.88,
      "step": 60996
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.013002395629883,
      "learning_rate": 0.0005020845971156867,
      "loss": 3.0412,
      "step": 60997
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7172995805740356,
      "learning_rate": 0.0005020815738354962,
      "loss": 2.9645,
      "step": 60998
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.007941722869873,
      "learning_rate": 0.0005020785505177348,
      "loss": 3.1168,
      "step": 60999
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7680045366287231,
      "learning_rate": 0.0005020755271624033,
      "loss": 3.0161,
      "step": 61000
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.5773565769195557,
      "learning_rate": 0.000502072503769502,
      "loss": 3.1551,
      "step": 61001
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.762350082397461,
      "learning_rate": 0.0005020694803390317,
      "loss": 2.8857,
      "step": 61002
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4032783508300781,
      "learning_rate": 0.000502066456870993,
      "loss": 3.0759,
      "step": 61003
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7384823560714722,
      "learning_rate": 0.0005020634333653862,
      "loss": 2.8991,
      "step": 61004
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9338802099227905,
      "learning_rate": 0.000502060409822212,
      "loss": 3.1913,
      "step": 61005
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.6333091259002686,
      "learning_rate": 0.0005020573862414709,
      "loss": 2.917,
      "step": 61006
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.269260048866272,
      "learning_rate": 0.0005020543626231636,
      "loss": 2.9955,
      "step": 61007
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.2816967964172363,
      "learning_rate": 0.0005020513389672905,
      "loss": 2.9907,
      "step": 61008
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0569498538970947,
      "learning_rate": 0.0005020483152738523,
      "loss": 2.861,
      "step": 61009
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8496164083480835,
      "learning_rate": 0.0005020452915428495,
      "loss": 2.9483,
      "step": 61010
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.1959149837493896,
      "learning_rate": 0.0005020422677742827,
      "loss": 2.9543,
      "step": 61011
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8869150876998901,
      "learning_rate": 0.0005020392439681525,
      "loss": 3.1993,
      "step": 61012
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.059879779815674,
      "learning_rate": 0.0005020362201244591,
      "loss": 2.9419,
      "step": 61013
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5384495258331299,
      "learning_rate": 0.0005020331962432036,
      "loss": 3.0125,
      "step": 61014
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.064366102218628,
      "learning_rate": 0.0005020301723243862,
      "loss": 2.9813,
      "step": 61015
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7763348817825317,
      "learning_rate": 0.0005020271483680077,
      "loss": 2.8633,
      "step": 61016
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.0986552238464355,
      "learning_rate": 0.0005020241243740684,
      "loss": 3.1267,
      "step": 61017
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5168205499649048,
      "learning_rate": 0.0005020211003425692,
      "loss": 3.107,
      "step": 61018
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.003788948059082,
      "learning_rate": 0.0005020180762735102,
      "loss": 3.1988,
      "step": 61019
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9140268564224243,
      "learning_rate": 0.0005020150521668924,
      "loss": 3.0453,
      "step": 61020
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5891749858856201,
      "learning_rate": 0.0005020120280227161,
      "loss": 2.9538,
      "step": 61021
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5006316900253296,
      "learning_rate": 0.0005020090038409821,
      "loss": 3.0992,
      "step": 61022
    },
    {
      "epoch": 0.79,
      "grad_norm": 3.232278347015381,
      "learning_rate": 0.0005020059796216907,
      "loss": 2.8931,
      "step": 61023
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6412839889526367,
      "learning_rate": 0.0005020029553648426,
      "loss": 2.8197,
      "step": 61024
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8334507942199707,
      "learning_rate": 0.0005019999310704383,
      "loss": 3.1856,
      "step": 61025
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7371548414230347,
      "learning_rate": 0.0005019969067384784,
      "loss": 3.1772,
      "step": 61026
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9238412380218506,
      "learning_rate": 0.0005019938823689634,
      "loss": 3.2209,
      "step": 61027
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.266386032104492,
      "learning_rate": 0.0005019908579618941,
      "loss": 2.9078,
      "step": 61028
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.478982925415039,
      "learning_rate": 0.0005019878335172708,
      "loss": 3.0577,
      "step": 61029
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5633727312088013,
      "learning_rate": 0.0005019848090350942,
      "loss": 3.2036,
      "step": 61030
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.688982367515564,
      "learning_rate": 0.0005019817845153648,
      "loss": 2.9634,
      "step": 61031
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8198662996292114,
      "learning_rate": 0.000501978759958083,
      "loss": 3.2811,
      "step": 61032
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8522087335586548,
      "learning_rate": 0.0005019757353632497,
      "loss": 2.9214,
      "step": 61033
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4369373321533203,
      "learning_rate": 0.0005019727107308653,
      "loss": 2.7667,
      "step": 61034
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8368123769760132,
      "learning_rate": 0.0005019696860609303,
      "loss": 3.0934,
      "step": 61035
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5647828578948975,
      "learning_rate": 0.0005019666613534454,
      "loss": 2.8633,
      "step": 61036
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.341829776763916,
      "learning_rate": 0.0005019636366084111,
      "loss": 3.0656,
      "step": 61037
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5889803171157837,
      "learning_rate": 0.0005019606118258277,
      "loss": 2.8854,
      "step": 61038
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8194729089736938,
      "learning_rate": 0.0005019575870056963,
      "loss": 3.1341,
      "step": 61039
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5096838474273682,
      "learning_rate": 0.000501954562148017,
      "loss": 3.11,
      "step": 61040
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4076112508773804,
      "learning_rate": 0.0005019515372527906,
      "loss": 2.9359,
      "step": 61041
    },
    {
      "epoch": 0.79,
      "grad_norm": 2.1031012535095215,
      "learning_rate": 0.0005019485123200177,
      "loss": 3.0259,
      "step": 61042
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.609940767288208,
      "learning_rate": 0.0005019454873496986,
      "loss": 3.1424,
      "step": 61043
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4177534580230713,
      "learning_rate": 0.0005019424623418341,
      "loss": 3.0064,
      "step": 61044
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5880831480026245,
      "learning_rate": 0.0005019394372964246,
      "loss": 2.9755,
      "step": 61045
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.9297235012054443,
      "learning_rate": 0.0005019364122134708,
      "loss": 3.0613,
      "step": 61046
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6141515970230103,
      "learning_rate": 0.0005019333870929732,
      "loss": 2.9523,
      "step": 61047
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.7611973285675049,
      "learning_rate": 0.0005019303619349325,
      "loss": 3.1743,
      "step": 61048
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5464686155319214,
      "learning_rate": 0.000501927336739349,
      "loss": 2.9133,
      "step": 61049
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4661691188812256,
      "learning_rate": 0.0005019243115062233,
      "loss": 2.9733,
      "step": 61050
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5437053442001343,
      "learning_rate": 0.0005019212862355563,
      "loss": 3.0407,
      "step": 61051
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.6527888774871826,
      "learning_rate": 0.0005019182609273482,
      "loss": 3.056,
      "step": 61052
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4985960721969604,
      "learning_rate": 0.0005019152355815996,
      "loss": 3.1435,
      "step": 61053
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.5255308151245117,
      "learning_rate": 0.0005019122101983113,
      "loss": 3.0424,
      "step": 61054
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.8660377264022827,
      "learning_rate": 0.0005019091847774836,
      "loss": 2.8279,
      "step": 61055
    },
    {
      "epoch": 0.79,
      "grad_norm": 1.4823503494262695,
      "learning_rate": 0.0005019061593191172,
      "loss": 3.3666,
      "step": 61056
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.668250322341919,
      "learning_rate": 0.0005019031338232127,
      "loss": 2.8663,
      "step": 61057
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7073349952697754,
      "learning_rate": 0.0005019001082897705,
      "loss": 2.9945,
      "step": 61058
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7368285655975342,
      "learning_rate": 0.0005018970827187913,
      "loss": 2.9842,
      "step": 61059
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.809114933013916,
      "learning_rate": 0.0005018940571102756,
      "loss": 2.8657,
      "step": 61060
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.4721715450286865,
      "learning_rate": 0.0005018910314642241,
      "loss": 3.1664,
      "step": 61061
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6759907007217407,
      "learning_rate": 0.0005018880057806371,
      "loss": 3.0198,
      "step": 61062
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.53074049949646,
      "learning_rate": 0.0005018849800595153,
      "loss": 3.177,
      "step": 61063
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8166249990463257,
      "learning_rate": 0.0005018819543008594,
      "loss": 3.162,
      "step": 61064
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.88553786277771,
      "learning_rate": 0.0005018789285046697,
      "loss": 3.023,
      "step": 61065
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4232417345046997,
      "learning_rate": 0.000501875902670947,
      "loss": 3.2011,
      "step": 61066
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.242368459701538,
      "learning_rate": 0.0005018728767996917,
      "loss": 3.0813,
      "step": 61067
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.703689455986023,
      "learning_rate": 0.0005018698508909045,
      "loss": 3.2775,
      "step": 61068
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7713544368743896,
      "learning_rate": 0.0005018668249445857,
      "loss": 3.0247,
      "step": 61069
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.9708197116851807,
      "learning_rate": 0.0005018637989607362,
      "loss": 3.0075,
      "step": 61070
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7069406509399414,
      "learning_rate": 0.0005018607729393564,
      "loss": 2.9889,
      "step": 61071
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.894327998161316,
      "learning_rate": 0.0005018577468804469,
      "loss": 3.4196,
      "step": 61072
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.4130001068115234,
      "learning_rate": 0.0005018547207840081,
      "loss": 2.8029,
      "step": 61073
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7908798456192017,
      "learning_rate": 0.0005018516946500408,
      "loss": 2.9735,
      "step": 61074
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9233434200286865,
      "learning_rate": 0.0005018486684785455,
      "loss": 2.8578,
      "step": 61075
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.906522274017334,
      "learning_rate": 0.0005018456422695226,
      "loss": 3.127,
      "step": 61076
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.2161593437194824,
      "learning_rate": 0.0005018426160229729,
      "loss": 3.0758,
      "step": 61077
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9846534729003906,
      "learning_rate": 0.0005018395897388968,
      "loss": 3.2372,
      "step": 61078
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.5818865299224854,
      "learning_rate": 0.0005018365634172949,
      "loss": 3.1086,
      "step": 61079
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.4926111698150635,
      "learning_rate": 0.0005018335370581678,
      "loss": 3.1309,
      "step": 61080
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4038753509521484,
      "learning_rate": 0.0005018305106615161,
      "loss": 3.1665,
      "step": 61081
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8839973211288452,
      "learning_rate": 0.0005018274842273401,
      "loss": 2.9115,
      "step": 61082
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7660855054855347,
      "learning_rate": 0.0005018244577556407,
      "loss": 2.9045,
      "step": 61083
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8956513404846191,
      "learning_rate": 0.0005018214312464183,
      "loss": 3.2814,
      "step": 61084
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7078964710235596,
      "learning_rate": 0.0005018184046996735,
      "loss": 3.0367,
      "step": 61085
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.486404299736023,
      "learning_rate": 0.0005018153781154069,
      "loss": 2.8914,
      "step": 61086
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.1828463077545166,
      "learning_rate": 0.000501812351493619,
      "loss": 3.1727,
      "step": 61087
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6350338459014893,
      "learning_rate": 0.0005018093248343101,
      "loss": 3.0304,
      "step": 61088
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2354788780212402,
      "learning_rate": 0.0005018062981374814,
      "loss": 3.106,
      "step": 61089
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.45705509185791,
      "learning_rate": 0.0005018032714031329,
      "loss": 3.187,
      "step": 61090
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.058410406112671,
      "learning_rate": 0.0005018002446312654,
      "loss": 3.0626,
      "step": 61091
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4574811458587646,
      "learning_rate": 0.0005017972178218796,
      "loss": 3.001,
      "step": 61092
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4505133628845215,
      "learning_rate": 0.0005017941909749756,
      "loss": 2.9712,
      "step": 61093
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.5529677867889404,
      "learning_rate": 0.0005017911640905544,
      "loss": 2.8236,
      "step": 61094
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5025644302368164,
      "learning_rate": 0.0005017881371686164,
      "loss": 3.1187,
      "step": 61095
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7005324363708496,
      "learning_rate": 0.0005017851102091623,
      "loss": 2.9484,
      "step": 61096
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.749051809310913,
      "learning_rate": 0.0005017820832121923,
      "loss": 3.0048,
      "step": 61097
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.893030047416687,
      "learning_rate": 0.0005017790561777073,
      "loss": 2.9652,
      "step": 61098
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6621806621551514,
      "learning_rate": 0.0005017760291057078,
      "loss": 3.0538,
      "step": 61099
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.644779086112976,
      "learning_rate": 0.0005017730019961944,
      "loss": 2.9956,
      "step": 61100
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6038612127304077,
      "learning_rate": 0.0005017699748491674,
      "loss": 3.206,
      "step": 61101
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5625678300857544,
      "learning_rate": 0.0005017669476646276,
      "loss": 2.9054,
      "step": 61102
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.4803380966186523,
      "learning_rate": 0.0005017639204425756,
      "loss": 3.1481,
      "step": 61103
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.488986611366272,
      "learning_rate": 0.0005017608931830119,
      "loss": 2.9499,
      "step": 61104
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6033916473388672,
      "learning_rate": 0.0005017578658859369,
      "loss": 2.8502,
      "step": 61105
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.704416513442993,
      "learning_rate": 0.0005017548385513514,
      "loss": 2.9131,
      "step": 61106
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4550858736038208,
      "learning_rate": 0.000501751811179256,
      "loss": 2.9996,
      "step": 61107
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5220725536346436,
      "learning_rate": 0.0005017487837696508,
      "loss": 3.1452,
      "step": 61108
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.437218427658081,
      "learning_rate": 0.000501745756322537,
      "loss": 3.3806,
      "step": 61109
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.2103452682495117,
      "learning_rate": 0.0005017427288379147,
      "loss": 2.8792,
      "step": 61110
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.9741227626800537,
      "learning_rate": 0.0005017397013157846,
      "loss": 3.0694,
      "step": 61111
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.8206207752227783,
      "learning_rate": 0.0005017366737561474,
      "loss": 3.0668,
      "step": 61112
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.638994812965393,
      "learning_rate": 0.0005017336461590034,
      "loss": 3.1628,
      "step": 61113
    },
    {
      "epoch": 0.8,
      "grad_norm": 4.413906097412109,
      "learning_rate": 0.0005017306185243534,
      "loss": 2.85,
      "step": 61114
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.3045904636383057,
      "learning_rate": 0.0005017275908521979,
      "loss": 3.0207,
      "step": 61115
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8355900049209595,
      "learning_rate": 0.0005017245631425374,
      "loss": 2.9872,
      "step": 61116
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5873022079467773,
      "learning_rate": 0.0005017215353953725,
      "loss": 3.2057,
      "step": 61117
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.059788227081299,
      "learning_rate": 0.0005017185076107038,
      "loss": 2.9617,
      "step": 61118
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.682936668395996,
      "learning_rate": 0.0005017154797885317,
      "loss": 2.905,
      "step": 61119
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7242300510406494,
      "learning_rate": 0.0005017124519288569,
      "loss": 3.1106,
      "step": 61120
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.841593623161316,
      "learning_rate": 0.0005017094240316801,
      "loss": 3.1655,
      "step": 61121
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8292584419250488,
      "learning_rate": 0.0005017063960970016,
      "loss": 3.1197,
      "step": 61122
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7481825351715088,
      "learning_rate": 0.0005017033681248221,
      "loss": 3.1567,
      "step": 61123
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.462451934814453,
      "learning_rate": 0.000501700340115142,
      "loss": 2.9102,
      "step": 61124
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4291468858718872,
      "learning_rate": 0.0005016973120679622,
      "loss": 3.2715,
      "step": 61125
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4578607082366943,
      "learning_rate": 0.000501694283983283,
      "loss": 2.9042,
      "step": 61126
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.606407642364502,
      "learning_rate": 0.0005016912558611049,
      "loss": 2.8793,
      "step": 61127
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7176539897918701,
      "learning_rate": 0.0005016882277014288,
      "loss": 3.049,
      "step": 61128
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5928858518600464,
      "learning_rate": 0.0005016851995042549,
      "loss": 3.193,
      "step": 61129
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5661953687667847,
      "learning_rate": 0.000501682171269584,
      "loss": 3.2341,
      "step": 61130
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.4132306575775146,
      "learning_rate": 0.0005016791429974165,
      "loss": 3.0731,
      "step": 61131
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8862463235855103,
      "learning_rate": 0.0005016761146877532,
      "loss": 3.1173,
      "step": 61132
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.651428461074829,
      "learning_rate": 0.0005016730863405943,
      "loss": 3.0956,
      "step": 61133
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.3360884189605713,
      "learning_rate": 0.0005016700579559407,
      "loss": 2.937,
      "step": 61134
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.829792857170105,
      "learning_rate": 0.0005016670295337927,
      "loss": 2.9128,
      "step": 61135
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.50557279586792,
      "learning_rate": 0.000501664001074151,
      "loss": 3.4077,
      "step": 61136
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.261174440383911,
      "learning_rate": 0.0005016609725770163,
      "loss": 2.9661,
      "step": 61137
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2831590175628662,
      "learning_rate": 0.0005016579440423889,
      "loss": 3.0729,
      "step": 61138
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4283695220947266,
      "learning_rate": 0.0005016549154702695,
      "loss": 3.0903,
      "step": 61139
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4528571367263794,
      "learning_rate": 0.0005016518868606587,
      "loss": 3.1914,
      "step": 61140
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7503936290740967,
      "learning_rate": 0.0005016488582135571,
      "loss": 2.9327,
      "step": 61141
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0852019786834717,
      "learning_rate": 0.000501645829528965,
      "loss": 3.0313,
      "step": 61142
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8314216136932373,
      "learning_rate": 0.0005016428008068832,
      "loss": 3.1427,
      "step": 61143
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6617181301116943,
      "learning_rate": 0.0005016397720473122,
      "loss": 3.2068,
      "step": 61144
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.453966736793518,
      "learning_rate": 0.0005016367432502525,
      "loss": 3.0318,
      "step": 61145
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3663402795791626,
      "learning_rate": 0.0005016337144157048,
      "loss": 3.0641,
      "step": 61146
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5695897340774536,
      "learning_rate": 0.0005016306855436694,
      "loss": 2.9256,
      "step": 61147
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.48831307888031,
      "learning_rate": 0.0005016276566341473,
      "loss": 3.1473,
      "step": 61148
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6065099239349365,
      "learning_rate": 0.0005016246276871387,
      "loss": 3.1106,
      "step": 61149
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4343217611312866,
      "learning_rate": 0.0005016215987026443,
      "loss": 3.1402,
      "step": 61150
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5384868383407593,
      "learning_rate": 0.0005016185696806646,
      "loss": 2.8389,
      "step": 61151
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5414748191833496,
      "learning_rate": 0.0005016155406212003,
      "loss": 3.116,
      "step": 61152
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.726254940032959,
      "learning_rate": 0.0005016125115242517,
      "loss": 2.9064,
      "step": 61153
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8497755527496338,
      "learning_rate": 0.0005016094823898196,
      "loss": 3.1553,
      "step": 61154
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5095205307006836,
      "learning_rate": 0.0005016064532179045,
      "loss": 3.1048,
      "step": 61155
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5228464603424072,
      "learning_rate": 0.0005016034240085071,
      "loss": 3.3098,
      "step": 61156
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.232410192489624,
      "learning_rate": 0.0005016003947616276,
      "loss": 3.0858,
      "step": 61157
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.620911717414856,
      "learning_rate": 0.0005015973654772668,
      "loss": 3.2388,
      "step": 61158
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2307729721069336,
      "learning_rate": 0.0005015943361554255,
      "loss": 2.9108,
      "step": 61159
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6401305198669434,
      "learning_rate": 0.0005015913067961037,
      "loss": 3.3291,
      "step": 61160
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.631916046142578,
      "learning_rate": 0.0005015882773993024,
      "loss": 3.0897,
      "step": 61161
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.761364698410034,
      "learning_rate": 0.000501585247965022,
      "loss": 2.9854,
      "step": 61162
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.440408706665039,
      "learning_rate": 0.0005015822184932632,
      "loss": 3.1244,
      "step": 61163
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.4745264053344727,
      "learning_rate": 0.0005015791889840263,
      "loss": 3.0358,
      "step": 61164
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5925096273422241,
      "learning_rate": 0.0005015761594373121,
      "loss": 3.3507,
      "step": 61165
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5592414140701294,
      "learning_rate": 0.0005015731298531212,
      "loss": 2.9006,
      "step": 61166
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0245132446289062,
      "learning_rate": 0.0005015701002314539,
      "loss": 3.1176,
      "step": 61167
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5531585216522217,
      "learning_rate": 0.000501567070572311,
      "loss": 3.0797,
      "step": 61168
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6028765439987183,
      "learning_rate": 0.0005015640408756928,
      "loss": 3.0465,
      "step": 61169
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7401180267333984,
      "learning_rate": 0.0005015610111416002,
      "loss": 2.9195,
      "step": 61170
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5680546760559082,
      "learning_rate": 0.0005015579813700337,
      "loss": 2.8162,
      "step": 61171
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5839920043945312,
      "learning_rate": 0.0005015549515609936,
      "loss": 2.9581,
      "step": 61172
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4735783338546753,
      "learning_rate": 0.0005015519217144806,
      "loss": 3.2085,
      "step": 61173
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.515647292137146,
      "learning_rate": 0.0005015488918304954,
      "loss": 3.0455,
      "step": 61174
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.398500680923462,
      "learning_rate": 0.0005015458619090384,
      "loss": 2.8515,
      "step": 61175
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3161280155181885,
      "learning_rate": 0.0005015428319501103,
      "loss": 2.8599,
      "step": 61176
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.468487024307251,
      "learning_rate": 0.0005015398019537115,
      "loss": 2.9069,
      "step": 61177
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7910921573638916,
      "learning_rate": 0.0005015367719198427,
      "loss": 2.9529,
      "step": 61178
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6814285516738892,
      "learning_rate": 0.0005015337418485044,
      "loss": 3.1905,
      "step": 61179
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.38624107837677,
      "learning_rate": 0.0005015307117396972,
      "loss": 2.8534,
      "step": 61180
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.347275495529175,
      "learning_rate": 0.0005015276815934214,
      "loss": 2.9366,
      "step": 61181
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7894513607025146,
      "learning_rate": 0.0005015246514096781,
      "loss": 3.0008,
      "step": 61182
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.729349136352539,
      "learning_rate": 0.0005015216211884674,
      "loss": 3.2071,
      "step": 61183
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.274426221847534,
      "learning_rate": 0.0005015185909297901,
      "loss": 2.9542,
      "step": 61184
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4778481721878052,
      "learning_rate": 0.0005015155606336467,
      "loss": 2.893,
      "step": 61185
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5157334804534912,
      "learning_rate": 0.0005015125303000377,
      "loss": 3.0635,
      "step": 61186
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4745627641677856,
      "learning_rate": 0.0005015094999289638,
      "loss": 3.0304,
      "step": 61187
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8812657594680786,
      "learning_rate": 0.0005015064695204253,
      "loss": 3.0218,
      "step": 61188
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.683089017868042,
      "learning_rate": 0.0005015034390744231,
      "loss": 2.9263,
      "step": 61189
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5500894784927368,
      "learning_rate": 0.0005015004085909576,
      "loss": 2.9479,
      "step": 61190
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6024765968322754,
      "learning_rate": 0.0005014973780700293,
      "loss": 2.947,
      "step": 61191
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5678738355636597,
      "learning_rate": 0.0005014943475116389,
      "loss": 3.1145,
      "step": 61192
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0584323406219482,
      "learning_rate": 0.0005014913169157869,
      "loss": 3.1573,
      "step": 61193
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.9821207523345947,
      "learning_rate": 0.0005014882862824738,
      "loss": 2.9814,
      "step": 61194
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.227560043334961,
      "learning_rate": 0.0005014852556117003,
      "loss": 2.8294,
      "step": 61195
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7010424137115479,
      "learning_rate": 0.0005014822249034668,
      "loss": 3.1638,
      "step": 61196
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.6634340286254883,
      "learning_rate": 0.000501479194157774,
      "loss": 3.0458,
      "step": 61197
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6281609535217285,
      "learning_rate": 0.0005014761633746225,
      "loss": 2.9961,
      "step": 61198
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.778852939605713,
      "learning_rate": 0.0005014731325540126,
      "loss": 2.9335,
      "step": 61199
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8370648622512817,
      "learning_rate": 0.0005014701016959451,
      "loss": 2.9727,
      "step": 61200
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6680864095687866,
      "learning_rate": 0.0005014670708004206,
      "loss": 3.2448,
      "step": 61201
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8181302547454834,
      "learning_rate": 0.0005014640398674395,
      "loss": 2.8247,
      "step": 61202
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6885569095611572,
      "learning_rate": 0.0005014610088970023,
      "loss": 2.9,
      "step": 61203
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7961984872817993,
      "learning_rate": 0.0005014579778891099,
      "loss": 3.2009,
      "step": 61204
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4517548084259033,
      "learning_rate": 0.0005014549468437626,
      "loss": 3.1964,
      "step": 61205
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.3723385334014893,
      "learning_rate": 0.0005014519157609611,
      "loss": 2.9704,
      "step": 61206
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5597268342971802,
      "learning_rate": 0.0005014488846407058,
      "loss": 2.9607,
      "step": 61207
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6739230155944824,
      "learning_rate": 0.0005014458534829974,
      "loss": 3.0057,
      "step": 61208
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.167698860168457,
      "learning_rate": 0.0005014428222878363,
      "loss": 3.0832,
      "step": 61209
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.653242588043213,
      "learning_rate": 0.0005014397910552232,
      "loss": 3.1507,
      "step": 61210
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7079023122787476,
      "learning_rate": 0.0005014367597851587,
      "loss": 3.1003,
      "step": 61211
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.163485527038574,
      "learning_rate": 0.0005014337284776432,
      "loss": 2.9335,
      "step": 61212
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9141653776168823,
      "learning_rate": 0.0005014306971326775,
      "loss": 3.1495,
      "step": 61213
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9915733337402344,
      "learning_rate": 0.000501427665750262,
      "loss": 2.9015,
      "step": 61214
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8919662237167358,
      "learning_rate": 0.0005014246343303973,
      "loss": 3.0768,
      "step": 61215
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6808054447174072,
      "learning_rate": 0.000501421602873084,
      "loss": 2.8476,
      "step": 61216
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8655847311019897,
      "learning_rate": 0.0005014185713783225,
      "loss": 3.14,
      "step": 61217
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.468419075012207,
      "learning_rate": 0.0005014155398461135,
      "loss": 2.8592,
      "step": 61218
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9037518501281738,
      "learning_rate": 0.0005014125082764577,
      "loss": 2.9452,
      "step": 61219
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.008436441421509,
      "learning_rate": 0.0005014094766693553,
      "loss": 3.0526,
      "step": 61220
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.5797970294952393,
      "learning_rate": 0.0005014064450248072,
      "loss": 3.1413,
      "step": 61221
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7243937253952026,
      "learning_rate": 0.0005014034133428137,
      "loss": 3.1724,
      "step": 61222
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3927026987075806,
      "learning_rate": 0.0005014003816233757,
      "loss": 3.3269,
      "step": 61223
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3146440982818604,
      "learning_rate": 0.0005013973498664935,
      "loss": 3.0983,
      "step": 61224
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7019309997558594,
      "learning_rate": 0.0005013943180721678,
      "loss": 3.0659,
      "step": 61225
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.5501887798309326,
      "learning_rate": 0.0005013912862403989,
      "loss": 3.1494,
      "step": 61226
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3874958753585815,
      "learning_rate": 0.0005013882543711878,
      "loss": 3.0636,
      "step": 61227
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4950450658798218,
      "learning_rate": 0.0005013852224645347,
      "loss": 2.9965,
      "step": 61228
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.0242154598236084,
      "learning_rate": 0.0005013821905204402,
      "loss": 3.1277,
      "step": 61229
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2488412857055664,
      "learning_rate": 0.0005013791585389051,
      "loss": 3.1353,
      "step": 61230
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.726572036743164,
      "learning_rate": 0.0005013761265199297,
      "loss": 2.8201,
      "step": 61231
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.711321473121643,
      "learning_rate": 0.0005013730944635148,
      "loss": 3.0659,
      "step": 61232
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0996763706207275,
      "learning_rate": 0.0005013700623696608,
      "loss": 2.7979,
      "step": 61233
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6334091424942017,
      "learning_rate": 0.0005013670302383682,
      "loss": 3.1071,
      "step": 61234
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9222297668457031,
      "learning_rate": 0.0005013639980696377,
      "loss": 3.0845,
      "step": 61235
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4986881017684937,
      "learning_rate": 0.0005013609658634699,
      "loss": 2.8476,
      "step": 61236
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5309549570083618,
      "learning_rate": 0.0005013579336198652,
      "loss": 3.0275,
      "step": 61237
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4222490787506104,
      "learning_rate": 0.0005013549013388244,
      "loss": 2.8637,
      "step": 61238
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6855010986328125,
      "learning_rate": 0.0005013518690203478,
      "loss": 2.9352,
      "step": 61239
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5096291303634644,
      "learning_rate": 0.0005013488366644362,
      "loss": 3.0786,
      "step": 61240
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.523969054222107,
      "learning_rate": 0.0005013458042710899,
      "loss": 3.2065,
      "step": 61241
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3708117008209229,
      "learning_rate": 0.0005013427718403098,
      "loss": 2.9413,
      "step": 61242
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4149168729782104,
      "learning_rate": 0.000501339739372096,
      "loss": 3.1309,
      "step": 61243
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9001219272613525,
      "learning_rate": 0.0005013367068664496,
      "loss": 2.9245,
      "step": 61244
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4024873971939087,
      "learning_rate": 0.0005013336743233709,
      "loss": 2.9258,
      "step": 61245
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.682518482208252,
      "learning_rate": 0.0005013306417428603,
      "loss": 2.9064,
      "step": 61246
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4887464046478271,
      "learning_rate": 0.0005013276091249187,
      "loss": 3.0899,
      "step": 61247
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.444115161895752,
      "learning_rate": 0.0005013245764695464,
      "loss": 3.0065,
      "step": 61248
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.39091157913208,
      "learning_rate": 0.0005013215437767441,
      "loss": 2.9549,
      "step": 61249
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8104147911071777,
      "learning_rate": 0.0005013185110465122,
      "loss": 3.1479,
      "step": 61250
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6381382942199707,
      "learning_rate": 0.0005013154782788516,
      "loss": 3.1566,
      "step": 61251
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9611669778823853,
      "learning_rate": 0.0005013124454737625,
      "loss": 2.9919,
      "step": 61252
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5091851949691772,
      "learning_rate": 0.0005013094126312457,
      "loss": 2.835,
      "step": 61253
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9212697744369507,
      "learning_rate": 0.0005013063797513016,
      "loss": 3.0976,
      "step": 61254
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.863204836845398,
      "learning_rate": 0.0005013033468339309,
      "loss": 2.9305,
      "step": 61255
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7786319255828857,
      "learning_rate": 0.0005013003138791342,
      "loss": 2.7667,
      "step": 61256
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8758518695831299,
      "learning_rate": 0.0005012972808869118,
      "loss": 3.0131,
      "step": 61257
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3721967935562134,
      "learning_rate": 0.0005012942478572644,
      "loss": 3.027,
      "step": 61258
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7262998819351196,
      "learning_rate": 0.0005012912147901929,
      "loss": 3.1705,
      "step": 61259
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0239453315734863,
      "learning_rate": 0.0005012881816856972,
      "loss": 3.0315,
      "step": 61260
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4216132164001465,
      "learning_rate": 0.0005012851485437784,
      "loss": 3.1122,
      "step": 61261
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5121573209762573,
      "learning_rate": 0.000501282115364437,
      "loss": 3.1341,
      "step": 61262
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4470313787460327,
      "learning_rate": 0.0005012790821476733,
      "loss": 3.2009,
      "step": 61263
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8340693712234497,
      "learning_rate": 0.000501276048893488,
      "loss": 3.0325,
      "step": 61264
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.446221113204956,
      "learning_rate": 0.0005012730156018818,
      "loss": 2.9251,
      "step": 61265
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5101557970046997,
      "learning_rate": 0.0005012699822728551,
      "loss": 3.0788,
      "step": 61266
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5582733154296875,
      "learning_rate": 0.0005012669489064084,
      "loss": 2.8323,
      "step": 61267
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.417197585105896,
      "learning_rate": 0.0005012639155025425,
      "loss": 3.0828,
      "step": 61268
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5616323947906494,
      "learning_rate": 0.0005012608820612579,
      "loss": 3.0603,
      "step": 61269
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.431047797203064,
      "learning_rate": 0.000501257848582555,
      "loss": 3.1296,
      "step": 61270
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3579297065734863,
      "learning_rate": 0.0005012548150664345,
      "loss": 3.15,
      "step": 61271
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.590377688407898,
      "learning_rate": 0.0005012517815128969,
      "loss": 2.8615,
      "step": 61272
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6694695949554443,
      "learning_rate": 0.0005012487479219428,
      "loss": 3.1633,
      "step": 61273
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6974687576293945,
      "learning_rate": 0.0005012457142935727,
      "loss": 2.8043,
      "step": 61274
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6985889673233032,
      "learning_rate": 0.0005012426806277874,
      "loss": 3.0806,
      "step": 61275
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.516700267791748,
      "learning_rate": 0.0005012396469245871,
      "loss": 3.0163,
      "step": 61276
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6973966360092163,
      "learning_rate": 0.0005012366131839726,
      "loss": 3.2003,
      "step": 61277
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0224673748016357,
      "learning_rate": 0.0005012335794059446,
      "loss": 3.2279,
      "step": 61278
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6153018474578857,
      "learning_rate": 0.0005012305455905033,
      "loss": 3.2648,
      "step": 61279
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4188096523284912,
      "learning_rate": 0.0005012275117376495,
      "loss": 3.0703,
      "step": 61280
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7859914302825928,
      "learning_rate": 0.0005012244778473836,
      "loss": 3.0456,
      "step": 61281
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.697337031364441,
      "learning_rate": 0.0005012214439197064,
      "loss": 2.9897,
      "step": 61282
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3147099018096924,
      "learning_rate": 0.0005012184099546183,
      "loss": 2.7584,
      "step": 61283
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.519179344177246,
      "learning_rate": 0.0005012153759521199,
      "loss": 2.9172,
      "step": 61284
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.59531569480896,
      "learning_rate": 0.0005012123419122116,
      "loss": 2.8743,
      "step": 61285
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.393908143043518,
      "learning_rate": 0.0005012093078348944,
      "loss": 3.2256,
      "step": 61286
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.3296563625335693,
      "learning_rate": 0.0005012062737201683,
      "loss": 2.7858,
      "step": 61287
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7086395025253296,
      "learning_rate": 0.0005012032395680344,
      "loss": 2.9774,
      "step": 61288
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7768175601959229,
      "learning_rate": 0.0005012002053784929,
      "loss": 2.8885,
      "step": 61289
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.3306243419647217,
      "learning_rate": 0.0005011971711515445,
      "loss": 3.0128,
      "step": 61290
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5671383142471313,
      "learning_rate": 0.0005011941368871899,
      "loss": 3.1583,
      "step": 61291
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0953369140625,
      "learning_rate": 0.0005011911025854293,
      "loss": 2.9982,
      "step": 61292
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2409508228302,
      "learning_rate": 0.0005011880682462636,
      "loss": 2.8994,
      "step": 61293
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9120283126831055,
      "learning_rate": 0.0005011850338696932,
      "loss": 2.9301,
      "step": 61294
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5749601125717163,
      "learning_rate": 0.0005011819994557187,
      "loss": 3.0981,
      "step": 61295
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6771466732025146,
      "learning_rate": 0.0005011789650043407,
      "loss": 2.957,
      "step": 61296
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.7151055335998535,
      "learning_rate": 0.0005011759305155597,
      "loss": 2.9927,
      "step": 61297
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9862017631530762,
      "learning_rate": 0.0005011728959893763,
      "loss": 3.0937,
      "step": 61298
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.871794581413269,
      "learning_rate": 0.000501169861425791,
      "loss": 2.882,
      "step": 61299
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.240201950073242,
      "learning_rate": 0.0005011668268248046,
      "loss": 3.0384,
      "step": 61300
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.475602388381958,
      "learning_rate": 0.0005011637921864174,
      "loss": 2.6737,
      "step": 61301
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.003277063369751,
      "learning_rate": 0.00050116075751063,
      "loss": 3.0367,
      "step": 61302
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7161492109298706,
      "learning_rate": 0.000501157722797443,
      "loss": 2.9705,
      "step": 61303
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.3681609630584717,
      "learning_rate": 0.0005011546880468571,
      "loss": 2.8857,
      "step": 61304
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0204317569732666,
      "learning_rate": 0.0005011516532588726,
      "loss": 2.9175,
      "step": 61305
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3629138469696045,
      "learning_rate": 0.0005011486184334903,
      "loss": 2.9231,
      "step": 61306
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8221051692962646,
      "learning_rate": 0.0005011455835707107,
      "loss": 3.0019,
      "step": 61307
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.520300030708313,
      "learning_rate": 0.0005011425486705342,
      "loss": 2.8705,
      "step": 61308
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7607719898223877,
      "learning_rate": 0.0005011395137329617,
      "loss": 3.1178,
      "step": 61309
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5293114185333252,
      "learning_rate": 0.0005011364787579934,
      "loss": 3.1207,
      "step": 61310
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4890543222427368,
      "learning_rate": 0.0005011334437456302,
      "loss": 3.0974,
      "step": 61311
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5510838031768799,
      "learning_rate": 0.0005011304086958723,
      "loss": 2.9299,
      "step": 61312
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4004985094070435,
      "learning_rate": 0.0005011273736087205,
      "loss": 3.0501,
      "step": 61313
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5222303867340088,
      "learning_rate": 0.0005011243384841756,
      "loss": 2.8804,
      "step": 61314
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.738996982574463,
      "learning_rate": 0.0005011213033222376,
      "loss": 3.0426,
      "step": 61315
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3812313079833984,
      "learning_rate": 0.0005011182681229074,
      "loss": 3.2935,
      "step": 61316
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5564019680023193,
      "learning_rate": 0.0005011152328861856,
      "loss": 3.1333,
      "step": 61317
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.789583444595337,
      "learning_rate": 0.0005011121976120726,
      "loss": 2.9949,
      "step": 61318
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9935321807861328,
      "learning_rate": 0.000501109162300569,
      "loss": 3.1505,
      "step": 61319
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8027499914169312,
      "learning_rate": 0.0005011061269516755,
      "loss": 2.9381,
      "step": 61320
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.89083731174469,
      "learning_rate": 0.0005011030915653927,
      "loss": 2.9106,
      "step": 61321
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.666658639907837,
      "learning_rate": 0.0005011000561417208,
      "loss": 2.9926,
      "step": 61322
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6074426174163818,
      "learning_rate": 0.0005010970206806608,
      "loss": 3.0039,
      "step": 61323
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.807199478149414,
      "learning_rate": 0.0005010939851822129,
      "loss": 3.325,
      "step": 61324
    },
    {
      "epoch": 0.8,
      "grad_norm": 4.089508533477783,
      "learning_rate": 0.000501090949646378,
      "loss": 2.7613,
      "step": 61325
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.213907480239868,
      "learning_rate": 0.0005010879140731564,
      "loss": 3.052,
      "step": 61326
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5923372507095337,
      "learning_rate": 0.0005010848784625486,
      "loss": 3.1054,
      "step": 61327
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9185757637023926,
      "learning_rate": 0.0005010818428145556,
      "loss": 2.9072,
      "step": 61328
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.568388819694519,
      "learning_rate": 0.0005010788071291776,
      "loss": 2.9444,
      "step": 61329
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5294296741485596,
      "learning_rate": 0.0005010757714064152,
      "loss": 3.0596,
      "step": 61330
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8249454498291016,
      "learning_rate": 0.000501072735646269,
      "loss": 3.0551,
      "step": 61331
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2347092628479004,
      "learning_rate": 0.0005010696998487397,
      "loss": 2.9477,
      "step": 61332
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5024559497833252,
      "learning_rate": 0.0005010666640138276,
      "loss": 3.0568,
      "step": 61333
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7498754262924194,
      "learning_rate": 0.0005010636281415335,
      "loss": 3.1928,
      "step": 61334
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2936110496520996,
      "learning_rate": 0.0005010605922318579,
      "loss": 3.0915,
      "step": 61335
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9749754667282104,
      "learning_rate": 0.0005010575562848014,
      "loss": 3.0895,
      "step": 61336
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.7031466960906982,
      "learning_rate": 0.0005010545203003643,
      "loss": 2.9157,
      "step": 61337
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4447206258773804,
      "learning_rate": 0.0005010514842785476,
      "loss": 3.148,
      "step": 61338
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6242872476577759,
      "learning_rate": 0.0005010484482193514,
      "loss": 3.137,
      "step": 61339
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6679219007492065,
      "learning_rate": 0.0005010454121227766,
      "loss": 2.9023,
      "step": 61340
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8750196695327759,
      "learning_rate": 0.0005010423759888238,
      "loss": 3.1388,
      "step": 61341
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6613720655441284,
      "learning_rate": 0.0005010393398174932,
      "loss": 3.1216,
      "step": 61342
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.279048204421997,
      "learning_rate": 0.0005010363036087856,
      "loss": 3.0059,
      "step": 61343
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7777056694030762,
      "learning_rate": 0.0005010332673627018,
      "loss": 2.9229,
      "step": 61344
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.1575794219970703,
      "learning_rate": 0.000501030231079242,
      "loss": 2.7675,
      "step": 61345
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7587132453918457,
      "learning_rate": 0.0005010271947584068,
      "loss": 2.9411,
      "step": 61346
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.1240556240081787,
      "learning_rate": 0.0005010241584001969,
      "loss": 3.1064,
      "step": 61347
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.109910726547241,
      "learning_rate": 0.0005010211220046128,
      "loss": 3.2081,
      "step": 61348
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8269915580749512,
      "learning_rate": 0.0005010180855716551,
      "loss": 3.1994,
      "step": 61349
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7730495929718018,
      "learning_rate": 0.0005010150491013244,
      "loss": 3.1329,
      "step": 61350
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.4442319869995117,
      "learning_rate": 0.0005010120125936211,
      "loss": 3.1373,
      "step": 61351
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2521603107452393,
      "learning_rate": 0.000501008976048546,
      "loss": 3.1382,
      "step": 61352
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6002146005630493,
      "learning_rate": 0.0005010059394660994,
      "loss": 3.1198,
      "step": 61353
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.7023744583129883,
      "learning_rate": 0.0005010029028462821,
      "loss": 2.9587,
      "step": 61354
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.3922135829925537,
      "learning_rate": 0.0005009998661890945,
      "loss": 3.1657,
      "step": 61355
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3757760524749756,
      "learning_rate": 0.0005009968294945373,
      "loss": 3.2129,
      "step": 61356
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4308483600616455,
      "learning_rate": 0.0005009937927626109,
      "loss": 2.9383,
      "step": 61357
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.35221266746521,
      "learning_rate": 0.0005009907559933161,
      "loss": 3.1015,
      "step": 61358
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.6444761753082275,
      "learning_rate": 0.0005009877191866531,
      "loss": 3.0265,
      "step": 61359
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.7608375549316406,
      "learning_rate": 0.0005009846823426229,
      "loss": 3.1243,
      "step": 61360
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4220234155654907,
      "learning_rate": 0.0005009816454612258,
      "loss": 2.9906,
      "step": 61361
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.1505966186523438,
      "learning_rate": 0.0005009786085424623,
      "loss": 3.064,
      "step": 61362
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.6150851249694824,
      "learning_rate": 0.0005009755715863334,
      "loss": 2.8286,
      "step": 61363
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.7575013637542725,
      "learning_rate": 0.000500972534592839,
      "loss": 2.8477,
      "step": 61364
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9265788793563843,
      "learning_rate": 0.0005009694975619801,
      "loss": 3.0343,
      "step": 61365
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.635291337966919,
      "learning_rate": 0.0005009664604937573,
      "loss": 2.9636,
      "step": 61366
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.595410108566284,
      "learning_rate": 0.000500963423388171,
      "loss": 3.0542,
      "step": 61367
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.414957284927368,
      "learning_rate": 0.0005009603862452217,
      "loss": 3.1191,
      "step": 61368
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4651775360107422,
      "learning_rate": 0.0005009573490649101,
      "loss": 2.9861,
      "step": 61369
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5700095891952515,
      "learning_rate": 0.0005009543118472369,
      "loss": 3.2287,
      "step": 61370
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0271193981170654,
      "learning_rate": 0.0005009512745922023,
      "loss": 3.0058,
      "step": 61371
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2895960807800293,
      "learning_rate": 0.0005009482372998072,
      "loss": 2.975,
      "step": 61372
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.29459547996521,
      "learning_rate": 0.000500945199970052,
      "loss": 2.897,
      "step": 61373
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5374350547790527,
      "learning_rate": 0.0005009421626029372,
      "loss": 2.9581,
      "step": 61374
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.403334379196167,
      "learning_rate": 0.0005009391251984635,
      "loss": 3.0203,
      "step": 61375
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9227254390716553,
      "learning_rate": 0.0005009360877566315,
      "loss": 2.9207,
      "step": 61376
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4932979345321655,
      "learning_rate": 0.0005009330502774416,
      "loss": 3.1371,
      "step": 61377
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.913995385169983,
      "learning_rate": 0.0005009300127608945,
      "loss": 2.9924,
      "step": 61378
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8593411445617676,
      "learning_rate": 0.0005009269752069906,
      "loss": 2.9518,
      "step": 61379
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8535218238830566,
      "learning_rate": 0.0005009239376157307,
      "loss": 2.8941,
      "step": 61380
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0884439945220947,
      "learning_rate": 0.0005009208999871153,
      "loss": 2.8697,
      "step": 61381
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0048046112060547,
      "learning_rate": 0.0005009178623211449,
      "loss": 3.2373,
      "step": 61382
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9875952005386353,
      "learning_rate": 0.00050091482461782,
      "loss": 3.0433,
      "step": 61383
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7040135860443115,
      "learning_rate": 0.0005009117868771413,
      "loss": 2.8491,
      "step": 61384
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.3351805210113525,
      "learning_rate": 0.0005009087490991092,
      "loss": 3.1202,
      "step": 61385
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.4670965671539307,
      "learning_rate": 0.0005009057112837245,
      "loss": 2.9468,
      "step": 61386
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8597325086593628,
      "learning_rate": 0.0005009026734309876,
      "loss": 3.2567,
      "step": 61387
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2621057033538818,
      "learning_rate": 0.0005008996355408991,
      "loss": 3.3001,
      "step": 61388
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7805358171463013,
      "learning_rate": 0.0005008965976134594,
      "loss": 3.1112,
      "step": 61389
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.6481335163116455,
      "learning_rate": 0.0005008935596486694,
      "loss": 3.1165,
      "step": 61390
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5393997430801392,
      "learning_rate": 0.0005008905216465295,
      "loss": 3.1496,
      "step": 61391
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6152156591415405,
      "learning_rate": 0.0005008874836070403,
      "loss": 2.9765,
      "step": 61392
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.915103793144226,
      "learning_rate": 0.0005008844455302022,
      "loss": 3.1619,
      "step": 61393
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6649583578109741,
      "learning_rate": 0.000500881407416016,
      "loss": 3.13,
      "step": 61394
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.474223017692566,
      "learning_rate": 0.000500878369264482,
      "loss": 3.0648,
      "step": 61395
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.452972650527954,
      "learning_rate": 0.0005008753310756011,
      "loss": 3.2716,
      "step": 61396
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.213975429534912,
      "learning_rate": 0.0005008722928493736,
      "loss": 3.1358,
      "step": 61397
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0791776180267334,
      "learning_rate": 0.0005008692545858001,
      "loss": 2.7682,
      "step": 61398
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.028871536254883,
      "learning_rate": 0.0005008662162848814,
      "loss": 3.2613,
      "step": 61399
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.398191213607788,
      "learning_rate": 0.0005008631779466177,
      "loss": 2.9988,
      "step": 61400
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8307969570159912,
      "learning_rate": 0.0005008601395710097,
      "loss": 3.2201,
      "step": 61401
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.41042160987854,
      "learning_rate": 0.0005008571011580581,
      "loss": 2.7299,
      "step": 61402
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.557400107383728,
      "learning_rate": 0.0005008540627077635,
      "loss": 2.9824,
      "step": 61403
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6520206928253174,
      "learning_rate": 0.0005008510242201261,
      "loss": 3.0703,
      "step": 61404
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7086141109466553,
      "learning_rate": 0.0005008479856951468,
      "loss": 3.2295,
      "step": 61405
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5455536842346191,
      "learning_rate": 0.0005008449471328261,
      "loss": 2.9624,
      "step": 61406
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.4002299308776855,
      "learning_rate": 0.0005008419085331645,
      "loss": 3.0933,
      "step": 61407
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.646784782409668,
      "learning_rate": 0.0005008388698961627,
      "loss": 2.9569,
      "step": 61408
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0666515827178955,
      "learning_rate": 0.000500835831221821,
      "loss": 3.0639,
      "step": 61409
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.769871473312378,
      "learning_rate": 0.0005008327925101402,
      "loss": 3.1611,
      "step": 61410
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.245497941970825,
      "learning_rate": 0.0005008297537611208,
      "loss": 3.1124,
      "step": 61411
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.026914119720459,
      "learning_rate": 0.0005008267149747633,
      "loss": 3.1323,
      "step": 61412
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4674432277679443,
      "learning_rate": 0.0005008236761510685,
      "loss": 2.9027,
      "step": 61413
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.785606861114502,
      "learning_rate": 0.0005008206372900365,
      "loss": 2.8463,
      "step": 61414
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2960445880889893,
      "learning_rate": 0.0005008175983916684,
      "loss": 3.0287,
      "step": 61415
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.399993658065796,
      "learning_rate": 0.0005008145594559644,
      "loss": 3.0119,
      "step": 61416
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4550896883010864,
      "learning_rate": 0.0005008115204829253,
      "loss": 3.0569,
      "step": 61417
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4796428680419922,
      "learning_rate": 0.0005008084814725514,
      "loss": 3.0501,
      "step": 61418
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5021871328353882,
      "learning_rate": 0.0005008054424248434,
      "loss": 2.9357,
      "step": 61419
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5753639936447144,
      "learning_rate": 0.000500802403339802,
      "loss": 3.0065,
      "step": 61420
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5909920930862427,
      "learning_rate": 0.0005007993642174276,
      "loss": 2.933,
      "step": 61421
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5998274087905884,
      "learning_rate": 0.0005007963250577208,
      "loss": 3.1433,
      "step": 61422
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4293354749679565,
      "learning_rate": 0.0005007932858606822,
      "loss": 2.966,
      "step": 61423
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8508754968643188,
      "learning_rate": 0.0005007902466263123,
      "loss": 3.0351,
      "step": 61424
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4397908449172974,
      "learning_rate": 0.0005007872073546117,
      "loss": 3.051,
      "step": 61425
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7925735712051392,
      "learning_rate": 0.000500784168045581,
      "loss": 2.8578,
      "step": 61426
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5426501035690308,
      "learning_rate": 0.0005007811286992207,
      "loss": 3.0505,
      "step": 61427
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3508409261703491,
      "learning_rate": 0.0005007780893155314,
      "loss": 3.0887,
      "step": 61428
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.618937373161316,
      "learning_rate": 0.0005007750498945137,
      "loss": 3.1853,
      "step": 61429
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.072911262512207,
      "learning_rate": 0.0005007720104361681,
      "loss": 2.7338,
      "step": 61430
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.282168984413147,
      "learning_rate": 0.0005007689709404952,
      "loss": 3.1775,
      "step": 61431
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5614851713180542,
      "learning_rate": 0.0005007659314074956,
      "loss": 3.0205,
      "step": 61432
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7002733945846558,
      "learning_rate": 0.0005007628918371698,
      "loss": 3.0617,
      "step": 61433
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8158907890319824,
      "learning_rate": 0.0005007598522295183,
      "loss": 3.0971,
      "step": 61434
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3267425298690796,
      "learning_rate": 0.0005007568125845419,
      "loss": 3.1716,
      "step": 61435
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.852806806564331,
      "learning_rate": 0.000500753772902241,
      "loss": 2.9125,
      "step": 61436
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.5530271530151367,
      "learning_rate": 0.0005007507331826161,
      "loss": 3.1418,
      "step": 61437
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7066662311553955,
      "learning_rate": 0.0005007476934256679,
      "loss": 2.8282,
      "step": 61438
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.078423023223877,
      "learning_rate": 0.0005007446536313969,
      "loss": 3.0698,
      "step": 61439
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6624236106872559,
      "learning_rate": 0.0005007416137998036,
      "loss": 3.1825,
      "step": 61440
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6862225532531738,
      "learning_rate": 0.0005007385739308887,
      "loss": 3.029,
      "step": 61441
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.1485402584075928,
      "learning_rate": 0.0005007355340246528,
      "loss": 3.0852,
      "step": 61442
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0489699840545654,
      "learning_rate": 0.0005007324940810963,
      "loss": 3.1335,
      "step": 61443
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.1963906288146973,
      "learning_rate": 0.0005007294541002199,
      "loss": 2.9904,
      "step": 61444
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.142643690109253,
      "learning_rate": 0.000500726414082024,
      "loss": 2.9221,
      "step": 61445
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.6046862602233887,
      "learning_rate": 0.0005007233740265093,
      "loss": 3.0146,
      "step": 61446
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.7929916381835938,
      "learning_rate": 0.0005007203339336764,
      "loss": 3.216,
      "step": 61447
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4460108280181885,
      "learning_rate": 0.0005007172938035257,
      "loss": 2.7472,
      "step": 61448
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5045887231826782,
      "learning_rate": 0.000500714253636058,
      "loss": 3.2317,
      "step": 61449
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5193860530853271,
      "learning_rate": 0.0005007112134312736,
      "loss": 2.701,
      "step": 61450
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.965807318687439,
      "learning_rate": 0.0005007081731891732,
      "loss": 2.944,
      "step": 61451
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5987247228622437,
      "learning_rate": 0.0005007051329097575,
      "loss": 3.0196,
      "step": 61452
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2929880619049072,
      "learning_rate": 0.0005007020925930268,
      "loss": 3.0051,
      "step": 61453
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.6663098335266113,
      "learning_rate": 0.0005006990522389818,
      "loss": 3.06,
      "step": 61454
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6129953861236572,
      "learning_rate": 0.000500696011847623,
      "loss": 2.838,
      "step": 61455
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7032135725021362,
      "learning_rate": 0.0005006929714189512,
      "loss": 3.0311,
      "step": 61456
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.1879026889801025,
      "learning_rate": 0.0005006899309529668,
      "loss": 3.1718,
      "step": 61457
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6506717205047607,
      "learning_rate": 0.0005006868904496701,
      "loss": 2.9019,
      "step": 61458
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.196314573287964,
      "learning_rate": 0.0005006838499090622,
      "loss": 2.839,
      "step": 61459
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9243742227554321,
      "learning_rate": 0.0005006808093311432,
      "loss": 3.1027,
      "step": 61460
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.542931079864502,
      "learning_rate": 0.000500677768715914,
      "loss": 2.921,
      "step": 61461
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6534065008163452,
      "learning_rate": 0.0005006747280633749,
      "loss": 3.0653,
      "step": 61462
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.613266944885254,
      "learning_rate": 0.0005006716873735265,
      "loss": 3.05,
      "step": 61463
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9310662746429443,
      "learning_rate": 0.0005006686466463695,
      "loss": 3.0473,
      "step": 61464
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6311007738113403,
      "learning_rate": 0.0005006656058819045,
      "loss": 3.1274,
      "step": 61465
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4503896236419678,
      "learning_rate": 0.000500662565080132,
      "loss": 2.8054,
      "step": 61466
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.857265830039978,
      "learning_rate": 0.0005006595242410523,
      "loss": 2.9657,
      "step": 61467
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5928008556365967,
      "learning_rate": 0.0005006564833646665,
      "loss": 2.8965,
      "step": 61468
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.083047866821289,
      "learning_rate": 0.0005006534424509748,
      "loss": 3.0155,
      "step": 61469
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9305834770202637,
      "learning_rate": 0.0005006504014999777,
      "loss": 3.0824,
      "step": 61470
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5900028944015503,
      "learning_rate": 0.0005006473605116761,
      "loss": 2.835,
      "step": 61471
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.485372543334961,
      "learning_rate": 0.0005006443194860703,
      "loss": 2.9239,
      "step": 61472
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8810663223266602,
      "learning_rate": 0.0005006412784231608,
      "loss": 3.2256,
      "step": 61473
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7017943859100342,
      "learning_rate": 0.0005006382373229485,
      "loss": 3.0232,
      "step": 61474
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2289183139801025,
      "learning_rate": 0.0005006351961854336,
      "loss": 3.0099,
      "step": 61475
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.896153450012207,
      "learning_rate": 0.0005006321550106169,
      "loss": 2.9432,
      "step": 61476
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.548427700996399,
      "learning_rate": 0.0005006291137984989,
      "loss": 3.1294,
      "step": 61477
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4345967769622803,
      "learning_rate": 0.0005006260725490801,
      "loss": 3.3691,
      "step": 61478
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.442333459854126,
      "learning_rate": 0.0005006230312623613,
      "loss": 2.9351,
      "step": 61479
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4214102029800415,
      "learning_rate": 0.0005006199899383426,
      "loss": 3.1025,
      "step": 61480
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5673233270645142,
      "learning_rate": 0.000500616948577025,
      "loss": 3.0863,
      "step": 61481
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4556262493133545,
      "learning_rate": 0.000500613907178409,
      "loss": 2.8936,
      "step": 61482
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5946283340454102,
      "learning_rate": 0.000500610865742495,
      "loss": 3.2104,
      "step": 61483
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2212719917297363,
      "learning_rate": 0.0005006078242692837,
      "loss": 3.2293,
      "step": 61484
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.889483094215393,
      "learning_rate": 0.0005006047827587756,
      "loss": 3.253,
      "step": 61485
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5362727642059326,
      "learning_rate": 0.0005006017412109712,
      "loss": 3.0287,
      "step": 61486
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.876754879951477,
      "learning_rate": 0.0005005986996258713,
      "loss": 3.0454,
      "step": 61487
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6663068532943726,
      "learning_rate": 0.0005005956580034763,
      "loss": 2.989,
      "step": 61488
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6994141340255737,
      "learning_rate": 0.0005005926163437868,
      "loss": 3.0409,
      "step": 61489
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.544040560722351,
      "learning_rate": 0.0005005895746468032,
      "loss": 2.9332,
      "step": 61490
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5672178268432617,
      "learning_rate": 0.0005005865329125263,
      "loss": 3.0682,
      "step": 61491
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3903312683105469,
      "learning_rate": 0.0005005834911409565,
      "loss": 3.1386,
      "step": 61492
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7561546564102173,
      "learning_rate": 0.0005005804493320946,
      "loss": 2.8218,
      "step": 61493
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4108201265335083,
      "learning_rate": 0.0005005774074859409,
      "loss": 3.1525,
      "step": 61494
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.63040292263031,
      "learning_rate": 0.0005005743656024961,
      "loss": 3.0725,
      "step": 61495
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4490230083465576,
      "learning_rate": 0.0005005713236817606,
      "loss": 3.0969,
      "step": 61496
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.561722993850708,
      "learning_rate": 0.0005005682817237353,
      "loss": 2.8759,
      "step": 61497
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8417704105377197,
      "learning_rate": 0.0005005652397284205,
      "loss": 2.8092,
      "step": 61498
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6318891048431396,
      "learning_rate": 0.0005005621976958168,
      "loss": 3.1342,
      "step": 61499
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3265442848205566,
      "learning_rate": 0.0005005591556259249,
      "loss": 3.0391,
      "step": 61500
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4574811458587646,
      "learning_rate": 0.0005005561135187452,
      "loss": 3.0437,
      "step": 61501
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5764378309249878,
      "learning_rate": 0.0005005530713742784,
      "loss": 2.9052,
      "step": 61502
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7248131036758423,
      "learning_rate": 0.0005005500291925249,
      "loss": 3.1327,
      "step": 61503
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6541799306869507,
      "learning_rate": 0.0005005469869734854,
      "loss": 2.9723,
      "step": 61504
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7592824697494507,
      "learning_rate": 0.0005005439447171604,
      "loss": 2.9721,
      "step": 61505
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8355677127838135,
      "learning_rate": 0.0005005409024235505,
      "loss": 3.067,
      "step": 61506
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.706507682800293,
      "learning_rate": 0.0005005378600926563,
      "loss": 2.9038,
      "step": 61507
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.826631784439087,
      "learning_rate": 0.0005005348177244784,
      "loss": 2.7513,
      "step": 61508
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5767332315444946,
      "learning_rate": 0.0005005317753190171,
      "loss": 3.0771,
      "step": 61509
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0567996501922607,
      "learning_rate": 0.0005005287328762733,
      "loss": 3.0264,
      "step": 61510
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5684350728988647,
      "learning_rate": 0.0005005256903962474,
      "loss": 3.1116,
      "step": 61511
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4982166290283203,
      "learning_rate": 0.00050052264787894,
      "loss": 2.8606,
      "step": 61512
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5057497024536133,
      "learning_rate": 0.0005005196053243517,
      "loss": 2.9207,
      "step": 61513
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.409276008605957,
      "learning_rate": 0.0005005165627324828,
      "loss": 3.0767,
      "step": 61514
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3284947872161865,
      "learning_rate": 0.0005005135201033344,
      "loss": 3.0521,
      "step": 61515
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5195579528808594,
      "learning_rate": 0.0005005104774369065,
      "loss": 3.0265,
      "step": 61516
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6100106239318848,
      "learning_rate": 0.0005005074347332,
      "loss": 3.0695,
      "step": 61517
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5787644386291504,
      "learning_rate": 0.0005005043919922154,
      "loss": 2.9514,
      "step": 61518
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8779548406600952,
      "learning_rate": 0.0005005013492139532,
      "loss": 3.015,
      "step": 61519
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4672799110412598,
      "learning_rate": 0.0005004983063984141,
      "loss": 2.9561,
      "step": 61520
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4800173044204712,
      "learning_rate": 0.0005004952635455985,
      "loss": 2.8608,
      "step": 61521
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.153259754180908,
      "learning_rate": 0.0005004922206555072,
      "loss": 3.1752,
      "step": 61522
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5535407066345215,
      "learning_rate": 0.0005004891777281404,
      "loss": 2.8194,
      "step": 61523
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7333753108978271,
      "learning_rate": 0.000500486134763499,
      "loss": 3.067,
      "step": 61524
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.6697304248809814,
      "learning_rate": 0.0005004830917615834,
      "loss": 2.8748,
      "step": 61525
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.960401773452759,
      "learning_rate": 0.0005004800487223943,
      "loss": 3.0074,
      "step": 61526
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6004353761672974,
      "learning_rate": 0.0005004770056459321,
      "loss": 3.1093,
      "step": 61527
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.7362749576568604,
      "learning_rate": 0.0005004739625321974,
      "loss": 2.9294,
      "step": 61528
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.239596366882324,
      "learning_rate": 0.0005004709193811908,
      "loss": 2.6983,
      "step": 61529
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5639067888259888,
      "learning_rate": 0.0005004678761929129,
      "loss": 3.4111,
      "step": 61530
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0376977920532227,
      "learning_rate": 0.0005004648329673643,
      "loss": 3.0057,
      "step": 61531
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5269235372543335,
      "learning_rate": 0.0005004617897045455,
      "loss": 2.9835,
      "step": 61532
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.686706304550171,
      "learning_rate": 0.000500458746404457,
      "loss": 2.9082,
      "step": 61533
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.103278875350952,
      "learning_rate": 0.0005004557030670995,
      "loss": 2.8638,
      "step": 61534
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6688363552093506,
      "learning_rate": 0.0005004526596924734,
      "loss": 3.1957,
      "step": 61535
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7143572568893433,
      "learning_rate": 0.0005004496162805794,
      "loss": 3.1215,
      "step": 61536
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4634865522384644,
      "learning_rate": 0.000500446572831418,
      "loss": 3.3164,
      "step": 61537
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.769920825958252,
      "learning_rate": 0.0005004435293449899,
      "loss": 2.9019,
      "step": 61538
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.416527271270752,
      "learning_rate": 0.0005004404858212955,
      "loss": 3.1655,
      "step": 61539
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4717458486557007,
      "learning_rate": 0.0005004374422603355,
      "loss": 3.145,
      "step": 61540
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.655194878578186,
      "learning_rate": 0.0005004343986621104,
      "loss": 3.0575,
      "step": 61541
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.999518871307373,
      "learning_rate": 0.0005004313550266206,
      "loss": 3.0055,
      "step": 61542
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.028611660003662,
      "learning_rate": 0.000500428311353867,
      "loss": 3.0468,
      "step": 61543
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7275559902191162,
      "learning_rate": 0.0005004252676438499,
      "loss": 3.1709,
      "step": 61544
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8983206748962402,
      "learning_rate": 0.0005004222238965699,
      "loss": 3.1886,
      "step": 61545
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9499551057815552,
      "learning_rate": 0.0005004191801120278,
      "loss": 2.8897,
      "step": 61546
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7209498882293701,
      "learning_rate": 0.0005004161362902238,
      "loss": 3.0008,
      "step": 61547
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0461833477020264,
      "learning_rate": 0.000500413092431159,
      "loss": 2.928,
      "step": 61548
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6769609451293945,
      "learning_rate": 0.0005004100485348333,
      "loss": 3.0951,
      "step": 61549
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.680681586265564,
      "learning_rate": 0.0005004070046012477,
      "loss": 2.9043,
      "step": 61550
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.808134913444519,
      "learning_rate": 0.0005004039606304025,
      "loss": 3.1206,
      "step": 61551
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.848124384880066,
      "learning_rate": 0.0005004009166222986,
      "loss": 3.2026,
      "step": 61552
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7731167078018188,
      "learning_rate": 0.0005003978725769364,
      "loss": 3.1099,
      "step": 61553
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9515326023101807,
      "learning_rate": 0.0005003948284943164,
      "loss": 3.1534,
      "step": 61554
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.519844651222229,
      "learning_rate": 0.0005003917843744392,
      "loss": 3.0779,
      "step": 61555
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.396850824356079,
      "learning_rate": 0.0005003887402173054,
      "loss": 3.023,
      "step": 61556
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5701417922973633,
      "learning_rate": 0.0005003856960229155,
      "loss": 3.2579,
      "step": 61557
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9932801723480225,
      "learning_rate": 0.0005003826517912702,
      "loss": 2.9576,
      "step": 61558
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.256110906600952,
      "learning_rate": 0.0005003796075223699,
      "loss": 3.0281,
      "step": 61559
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.853643536567688,
      "learning_rate": 0.0005003765632162153,
      "loss": 3.039,
      "step": 61560
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.511910319328308,
      "learning_rate": 0.0005003735188728069,
      "loss": 3.0882,
      "step": 61561
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6610565185546875,
      "learning_rate": 0.0005003704744921453,
      "loss": 3.0269,
      "step": 61562
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3826172351837158,
      "learning_rate": 0.000500367430074231,
      "loss": 3.253,
      "step": 61563
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.102036714553833,
      "learning_rate": 0.0005003643856190647,
      "loss": 3.2257,
      "step": 61564
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.317211151123047,
      "learning_rate": 0.0005003613411266468,
      "loss": 3.1116,
      "step": 61565
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7101107835769653,
      "learning_rate": 0.000500358296596978,
      "loss": 2.8721,
      "step": 61566
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6728270053863525,
      "learning_rate": 0.0005003552520300588,
      "loss": 3.1571,
      "step": 61567
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5945287942886353,
      "learning_rate": 0.0005003522074258897,
      "loss": 2.8231,
      "step": 61568
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.229518175125122,
      "learning_rate": 0.0005003491627844714,
      "loss": 3.1826,
      "step": 61569
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6165896654129028,
      "learning_rate": 0.0005003461181058045,
      "loss": 3.0152,
      "step": 61570
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8155816793441772,
      "learning_rate": 0.0005003430733898893,
      "loss": 3.3198,
      "step": 61571
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6521806716918945,
      "learning_rate": 0.0005003400286367266,
      "loss": 2.6696,
      "step": 61572
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8772404193878174,
      "learning_rate": 0.000500336983846317,
      "loss": 2.8874,
      "step": 61573
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8602445125579834,
      "learning_rate": 0.0005003339390186608,
      "loss": 2.9688,
      "step": 61574
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6184645891189575,
      "learning_rate": 0.000500330894153759,
      "loss": 2.9927,
      "step": 61575
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3333947658538818,
      "learning_rate": 0.0005003278492516117,
      "loss": 2.8899,
      "step": 61576
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4694570302963257,
      "learning_rate": 0.0005003248043122197,
      "loss": 2.8728,
      "step": 61577
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7147345542907715,
      "learning_rate": 0.0005003217593355835,
      "loss": 2.8604,
      "step": 61578
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7607126235961914,
      "learning_rate": 0.0005003187143217038,
      "loss": 3.0216,
      "step": 61579
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8059239387512207,
      "learning_rate": 0.0005003156692705811,
      "loss": 3.0474,
      "step": 61580
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.6533877849578857,
      "learning_rate": 0.0005003126241822158,
      "loss": 2.9005,
      "step": 61581
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5039398670196533,
      "learning_rate": 0.0005003095790566086,
      "loss": 3.048,
      "step": 61582
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0330846309661865,
      "learning_rate": 0.0005003065338937602,
      "loss": 3.1907,
      "step": 61583
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7845555543899536,
      "learning_rate": 0.0005003034886936709,
      "loss": 3.2094,
      "step": 61584
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3176906108856201,
      "learning_rate": 0.0005003004434563415,
      "loss": 3.1011,
      "step": 61585
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6704238653182983,
      "learning_rate": 0.0005002973981817725,
      "loss": 3.1683,
      "step": 61586
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7019875049591064,
      "learning_rate": 0.0005002943528699643,
      "loss": 2.9921,
      "step": 61587
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7401241064071655,
      "learning_rate": 0.0005002913075209177,
      "loss": 3.0061,
      "step": 61588
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0271413326263428,
      "learning_rate": 0.0005002882621346331,
      "loss": 3.2663,
      "step": 61589
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.827399730682373,
      "learning_rate": 0.0005002852167111112,
      "loss": 3.0984,
      "step": 61590
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.1837704181671143,
      "learning_rate": 0.0005002821712503524,
      "loss": 2.8064,
      "step": 61591
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4695793390274048,
      "learning_rate": 0.0005002791257523575,
      "loss": 3.1287,
      "step": 61592
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.6142618656158447,
      "learning_rate": 0.0005002760802171269,
      "loss": 3.0507,
      "step": 61593
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.2713546752929688,
      "learning_rate": 0.0005002730346446611,
      "loss": 3.0908,
      "step": 61594
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9956552982330322,
      "learning_rate": 0.0005002699890349608,
      "loss": 2.9402,
      "step": 61595
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5661581754684448,
      "learning_rate": 0.0005002669433880265,
      "loss": 2.8158,
      "step": 61596
    },
    {
      "epoch": 0.8,
      "grad_norm": 4.465353488922119,
      "learning_rate": 0.000500263897703859,
      "loss": 3.0012,
      "step": 61597
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8823028802871704,
      "learning_rate": 0.0005002608519824584,
      "loss": 3.2543,
      "step": 61598
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.979720950126648,
      "learning_rate": 0.0005002578062238256,
      "loss": 2.7691,
      "step": 61599
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5105094909667969,
      "learning_rate": 0.0005002547604279612,
      "loss": 3.3135,
      "step": 61600
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.422962188720703,
      "learning_rate": 0.0005002517145948655,
      "loss": 3.2217,
      "step": 61601
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.331134080886841,
      "learning_rate": 0.0005002486687245393,
      "loss": 3.2853,
      "step": 61602
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.2209537029266357,
      "learning_rate": 0.0005002456228169832,
      "loss": 3.3123,
      "step": 61603
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4766006469726562,
      "learning_rate": 0.0005002425768721975,
      "loss": 3.0222,
      "step": 61604
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.0755910873413086,
      "learning_rate": 0.000500239530890183,
      "loss": 2.8141,
      "step": 61605
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.7222330570220947,
      "learning_rate": 0.0005002364848709402,
      "loss": 3.0824,
      "step": 61606
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.1423304080963135,
      "learning_rate": 0.0005002334388144697,
      "loss": 2.9769,
      "step": 61607
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5036321878433228,
      "learning_rate": 0.0005002303927207719,
      "loss": 3.0952,
      "step": 61608
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.5232150554656982,
      "learning_rate": 0.0005002273465898475,
      "loss": 3.2047,
      "step": 61609
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.691418170928955,
      "learning_rate": 0.0005002243004216971,
      "loss": 2.8855,
      "step": 61610
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8567438125610352,
      "learning_rate": 0.0005002212542163212,
      "loss": 3.1362,
      "step": 61611
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.586313009262085,
      "learning_rate": 0.0005002182079737205,
      "loss": 3.3601,
      "step": 61612
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.5020694732666016,
      "learning_rate": 0.0005002151616938953,
      "loss": 3.3425,
      "step": 61613
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7868449687957764,
      "learning_rate": 0.0005002121153768464,
      "loss": 3.1626,
      "step": 61614
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5448484420776367,
      "learning_rate": 0.0005002090690225744,
      "loss": 2.8104,
      "step": 61615
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.626356601715088,
      "learning_rate": 0.0005002060226310796,
      "loss": 2.9813,
      "step": 61616
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.1785287857055664,
      "learning_rate": 0.0005002029762023627,
      "loss": 2.8219,
      "step": 61617
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7462292909622192,
      "learning_rate": 0.0005001999297364244,
      "loss": 2.9262,
      "step": 61618
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9424114227294922,
      "learning_rate": 0.000500196883233265,
      "loss": 3.0346,
      "step": 61619
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.1972784996032715,
      "learning_rate": 0.0005001938366928853,
      "loss": 3.1096,
      "step": 61620
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8243542909622192,
      "learning_rate": 0.0005001907901152859,
      "loss": 3.1132,
      "step": 61621
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.474380612373352,
      "learning_rate": 0.0005001877435004672,
      "loss": 3.0198,
      "step": 61622
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9033993482589722,
      "learning_rate": 0.0005001846968484297,
      "loss": 3.1409,
      "step": 61623
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7506364583969116,
      "learning_rate": 0.0005001816501591742,
      "loss": 2.7889,
      "step": 61624
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9276299476623535,
      "learning_rate": 0.0005001786034327012,
      "loss": 3.0668,
      "step": 61625
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.585212230682373,
      "learning_rate": 0.0005001755566690111,
      "loss": 3.1262,
      "step": 61626
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.4199650287628174,
      "learning_rate": 0.0005001725098681046,
      "loss": 3.0989,
      "step": 61627
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4387843608856201,
      "learning_rate": 0.0005001694630299823,
      "loss": 3.2266,
      "step": 61628
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.882165789604187,
      "learning_rate": 0.0005001664161546447,
      "loss": 2.7289,
      "step": 61629
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7507963180541992,
      "learning_rate": 0.0005001633692420923,
      "loss": 3.0842,
      "step": 61630
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7660731077194214,
      "learning_rate": 0.0005001603222923259,
      "loss": 3.1713,
      "step": 61631
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.626487374305725,
      "learning_rate": 0.0005001572753053459,
      "loss": 2.9408,
      "step": 61632
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9537858963012695,
      "learning_rate": 0.0005001542282811528,
      "loss": 3.0754,
      "step": 61633
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4796438217163086,
      "learning_rate": 0.0005001511812197472,
      "loss": 2.7567,
      "step": 61634
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7297834157943726,
      "learning_rate": 0.0005001481341211298,
      "loss": 2.974,
      "step": 61635
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5285121202468872,
      "learning_rate": 0.0005001450869853011,
      "loss": 2.9114,
      "step": 61636
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4733555316925049,
      "learning_rate": 0.0005001420398122616,
      "loss": 2.9661,
      "step": 61637
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9765878915786743,
      "learning_rate": 0.0005001389926020119,
      "loss": 3.146,
      "step": 61638
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8131482601165771,
      "learning_rate": 0.0005001359453545527,
      "loss": 3.1071,
      "step": 61639
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7048654556274414,
      "learning_rate": 0.0005001328980698844,
      "loss": 2.9622,
      "step": 61640
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8747806549072266,
      "learning_rate": 0.0005001298507480076,
      "loss": 2.757,
      "step": 61641
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9613840579986572,
      "learning_rate": 0.0005001268033889228,
      "loss": 2.9095,
      "step": 61642
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5437736511230469,
      "learning_rate": 0.0005001237559926308,
      "loss": 3.0485,
      "step": 61643
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3629233837127686,
      "learning_rate": 0.0005001207085591319,
      "loss": 3.0445,
      "step": 61644
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2991455793380737,
      "learning_rate": 0.0005001176610884268,
      "loss": 3.0659,
      "step": 61645
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3306149244308472,
      "learning_rate": 0.0005001146135805162,
      "loss": 3.0325,
      "step": 61646
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6398935317993164,
      "learning_rate": 0.0005001115660354004,
      "loss": 3.1739,
      "step": 61647
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6334012746810913,
      "learning_rate": 0.00050010851845308,
      "loss": 2.9678,
      "step": 61648
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3528136014938354,
      "learning_rate": 0.0005001054708335558,
      "loss": 3.2908,
      "step": 61649
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.517300009727478,
      "learning_rate": 0.0005001024231768282,
      "loss": 3.1804,
      "step": 61650
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5160787105560303,
      "learning_rate": 0.0005000993754828976,
      "loss": 2.9874,
      "step": 61651
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8493489027023315,
      "learning_rate": 0.000500096327751765,
      "loss": 3.1032,
      "step": 61652
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.389085531234741,
      "learning_rate": 0.0005000932799834306,
      "loss": 2.9531,
      "step": 61653
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.892529010772705,
      "learning_rate": 0.000500090232177895,
      "loss": 2.8947,
      "step": 61654
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4645599126815796,
      "learning_rate": 0.000500087184335159,
      "loss": 2.7527,
      "step": 61655
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4434646368026733,
      "learning_rate": 0.0005000841364552229,
      "loss": 3.1057,
      "step": 61656
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5057076215744019,
      "learning_rate": 0.0005000810885380873,
      "loss": 2.9871,
      "step": 61657
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8338721990585327,
      "learning_rate": 0.0005000780405837531,
      "loss": 3.0642,
      "step": 61658
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7942314147949219,
      "learning_rate": 0.0005000749925922204,
      "loss": 3.132,
      "step": 61659
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5998653173446655,
      "learning_rate": 0.0005000719445634901,
      "loss": 3.1,
      "step": 61660
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5670826435089111,
      "learning_rate": 0.0005000688964975627,
      "loss": 3.1263,
      "step": 61661
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5936237573623657,
      "learning_rate": 0.0005000658483944385,
      "loss": 3.0496,
      "step": 61662
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4976270198822021,
      "learning_rate": 0.0005000628002541185,
      "loss": 2.8402,
      "step": 61663
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9994250535964966,
      "learning_rate": 0.000500059752076603,
      "loss": 2.9109,
      "step": 61664
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.998391032218933,
      "learning_rate": 0.0005000567038618925,
      "loss": 3.3409,
      "step": 61665
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3910620212554932,
      "learning_rate": 0.0005000536556099877,
      "loss": 3.0642,
      "step": 61666
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2723183631896973,
      "learning_rate": 0.0005000506073208893,
      "loss": 2.8802,
      "step": 61667
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.1945650577545166,
      "learning_rate": 0.0005000475589945977,
      "loss": 3.2768,
      "step": 61668
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9348231554031372,
      "learning_rate": 0.0005000445106311133,
      "loss": 2.931,
      "step": 61669
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.786328673362732,
      "learning_rate": 0.000500041462230437,
      "loss": 3.0427,
      "step": 61670
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6430919170379639,
      "learning_rate": 0.0005000384137925692,
      "loss": 2.9717,
      "step": 61671
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8206230401992798,
      "learning_rate": 0.0005000353653175104,
      "loss": 3.2173,
      "step": 61672
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6829001903533936,
      "learning_rate": 0.0005000323168052614,
      "loss": 3.031,
      "step": 61673
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.147744655609131,
      "learning_rate": 0.0005000292682558224,
      "loss": 3.3633,
      "step": 61674
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.3463306427001953,
      "learning_rate": 0.0005000262196691943,
      "loss": 2.8938,
      "step": 61675
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.189865827560425,
      "learning_rate": 0.0005000231710453776,
      "loss": 2.9425,
      "step": 61676
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.353767991065979,
      "learning_rate": 0.0005000201223843727,
      "loss": 3.243,
      "step": 61677
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4998221397399902,
      "learning_rate": 0.0005000170736861804,
      "loss": 3.2567,
      "step": 61678
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.255171060562134,
      "learning_rate": 0.0005000140249508012,
      "loss": 3.137,
      "step": 61679
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.126812696456909,
      "learning_rate": 0.0005000109761782354,
      "loss": 2.8609,
      "step": 61680
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4551526308059692,
      "learning_rate": 0.000500007927368484,
      "loss": 2.9386,
      "step": 61681
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2853634357452393,
      "learning_rate": 0.0005000048785215471,
      "loss": 2.8032,
      "step": 61682
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6881814002990723,
      "learning_rate": 0.0005000018296374257,
      "loss": 3.2369,
      "step": 61683
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.8917322158813477,
      "learning_rate": 0.0004999987807161202,
      "loss": 3.1254,
      "step": 61684
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.409056544303894,
      "learning_rate": 0.0004999957317576312,
      "loss": 2.9093,
      "step": 61685
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4202117919921875,
      "learning_rate": 0.000499992682761959,
      "loss": 3.0895,
      "step": 61686
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5367237329483032,
      "learning_rate": 0.0004999896337291046,
      "loss": 2.8929,
      "step": 61687
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4164788722991943,
      "learning_rate": 0.0004999865846590682,
      "loss": 2.9527,
      "step": 61688
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5739580392837524,
      "learning_rate": 0.0004999835355518507,
      "loss": 2.9706,
      "step": 61689
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.436784029006958,
      "learning_rate": 0.0004999804864074523,
      "loss": 2.9815,
      "step": 61690
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9966930150985718,
      "learning_rate": 0.0004999774372258738,
      "loss": 2.9007,
      "step": 61691
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0474836826324463,
      "learning_rate": 0.0004999743880071157,
      "loss": 2.8997,
      "step": 61692
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4191226959228516,
      "learning_rate": 0.0004999713387511787,
      "loss": 3.0812,
      "step": 61693
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.33277428150177,
      "learning_rate": 0.0004999682894580633,
      "loss": 3.0761,
      "step": 61694
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.678514003753662,
      "learning_rate": 0.0004999652401277698,
      "loss": 3.0039,
      "step": 61695
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6039243936538696,
      "learning_rate": 0.0004999621907602991,
      "loss": 2.94,
      "step": 61696
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8913265466690063,
      "learning_rate": 0.0004999591413556516,
      "loss": 3.0033,
      "step": 61697
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9924452304840088,
      "learning_rate": 0.0004999560919138281,
      "loss": 2.8357,
      "step": 61698
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.701902985572815,
      "learning_rate": 0.0004999530424348287,
      "loss": 2.9491,
      "step": 61699
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5396274328231812,
      "learning_rate": 0.0004999499929186545,
      "loss": 2.9199,
      "step": 61700
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8204017877578735,
      "learning_rate": 0.0004999469433653057,
      "loss": 3.2623,
      "step": 61701
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4784756898880005,
      "learning_rate": 0.000499943893774783,
      "loss": 2.9577,
      "step": 61702
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6787710189819336,
      "learning_rate": 0.0004999408441470869,
      "loss": 3.235,
      "step": 61703
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.360268235206604,
      "learning_rate": 0.0004999377944822182,
      "loss": 2.9652,
      "step": 61704
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.285748243331909,
      "learning_rate": 0.0004999347447801773,
      "loss": 2.7919,
      "step": 61705
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.988852858543396,
      "learning_rate": 0.0004999316950409645,
      "loss": 3.0794,
      "step": 61706
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4652810096740723,
      "learning_rate": 0.0004999286452645808,
      "loss": 2.9893,
      "step": 61707
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7470818758010864,
      "learning_rate": 0.0004999255954510266,
      "loss": 3.0058,
      "step": 61708
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6362779140472412,
      "learning_rate": 0.0004999225456003023,
      "loss": 3.0057,
      "step": 61709
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9440770149230957,
      "learning_rate": 0.0004999194957124089,
      "loss": 3.0745,
      "step": 61710
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4949690103530884,
      "learning_rate": 0.0004999164457873465,
      "loss": 2.8435,
      "step": 61711
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.777822494506836,
      "learning_rate": 0.0004999133958251159,
      "loss": 3.0045,
      "step": 61712
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.928040623664856,
      "learning_rate": 0.0004999103458257177,
      "loss": 3.1265,
      "step": 61713
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3787047863006592,
      "learning_rate": 0.0004999072957891522,
      "loss": 3.1116,
      "step": 61714
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3977863788604736,
      "learning_rate": 0.0004999042457154202,
      "loss": 3.0985,
      "step": 61715
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.823001742362976,
      "learning_rate": 0.0004999011956045223,
      "loss": 2.8576,
      "step": 61716
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4377590417861938,
      "learning_rate": 0.0004998981454564591,
      "loss": 2.8761,
      "step": 61717
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4755946397781372,
      "learning_rate": 0.0004998950952712309,
      "loss": 2.9448,
      "step": 61718
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9869118928909302,
      "learning_rate": 0.0004998920450488385,
      "loss": 2.9431,
      "step": 61719
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6173701286315918,
      "learning_rate": 0.0004998889947892824,
      "loss": 3.1145,
      "step": 61720
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7028790712356567,
      "learning_rate": 0.0004998859444925631,
      "loss": 3.0176,
      "step": 61721
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7496665716171265,
      "learning_rate": 0.0004998828941586813,
      "loss": 3.2057,
      "step": 61722
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9172776937484741,
      "learning_rate": 0.0004998798437876375,
      "loss": 2.8409,
      "step": 61723
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7107486724853516,
      "learning_rate": 0.0004998767933794321,
      "loss": 3.1486,
      "step": 61724
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.90925133228302,
      "learning_rate": 0.0004998737429340661,
      "loss": 2.905,
      "step": 61725
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.151601791381836,
      "learning_rate": 0.0004998706924515396,
      "loss": 2.9967,
      "step": 61726
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0153706073760986,
      "learning_rate": 0.0004998676419318535,
      "loss": 2.9588,
      "step": 61727
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5770149230957031,
      "learning_rate": 0.0004998645913750081,
      "loss": 3.1827,
      "step": 61728
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.166078805923462,
      "learning_rate": 0.0004998615407810042,
      "loss": 2.9401,
      "step": 61729
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.129678726196289,
      "learning_rate": 0.0004998584901498422,
      "loss": 3.0555,
      "step": 61730
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.443690061569214,
      "learning_rate": 0.0004998554394815228,
      "loss": 2.9447,
      "step": 61731
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2333409786224365,
      "learning_rate": 0.0004998523887760465,
      "loss": 3.0019,
      "step": 61732
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.846203327178955,
      "learning_rate": 0.0004998493380334138,
      "loss": 2.8419,
      "step": 61733
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8733583688735962,
      "learning_rate": 0.0004998462872536254,
      "loss": 2.7641,
      "step": 61734
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.124133825302124,
      "learning_rate": 0.0004998432364366817,
      "loss": 3.1208,
      "step": 61735
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3492381572723389,
      "learning_rate": 0.0004998401855825835,
      "loss": 3.2594,
      "step": 61736
    },
    {
      "epoch": 0.8,
      "grad_norm": 4.827043056488037,
      "learning_rate": 0.0004998371346913312,
      "loss": 2.9728,
      "step": 61737
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.243736982345581,
      "learning_rate": 0.0004998340837629253,
      "loss": 3.1282,
      "step": 61738
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.8011348247528076,
      "learning_rate": 0.0004998310327973666,
      "loss": 2.9541,
      "step": 61739
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7113100290298462,
      "learning_rate": 0.0004998279817946554,
      "loss": 2.8939,
      "step": 61740
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.420325517654419,
      "learning_rate": 0.0004998249307547925,
      "loss": 3.2449,
      "step": 61741
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.044995069503784,
      "learning_rate": 0.0004998218796777784,
      "loss": 3.1927,
      "step": 61742
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0471816062927246,
      "learning_rate": 0.0004998188285636136,
      "loss": 3.2528,
      "step": 61743
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5942376852035522,
      "learning_rate": 0.0004998157774122987,
      "loss": 3.2339,
      "step": 61744
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.211634635925293,
      "learning_rate": 0.0004998127262238343,
      "loss": 2.9174,
      "step": 61745
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.1333963871002197,
      "learning_rate": 0.0004998096749982211,
      "loss": 2.9442,
      "step": 61746
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0504345893859863,
      "learning_rate": 0.0004998066237354592,
      "loss": 3.0466,
      "step": 61747
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3001518249511719,
      "learning_rate": 0.0004998035724355498,
      "loss": 2.7329,
      "step": 61748
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9783042669296265,
      "learning_rate": 0.0004998005210984929,
      "loss": 3.1599,
      "step": 61749
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.406595468521118,
      "learning_rate": 0.0004997974697242894,
      "loss": 3.1151,
      "step": 61750
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.894126296043396,
      "learning_rate": 0.0004997944183129396,
      "loss": 2.9459,
      "step": 61751
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8470531702041626,
      "learning_rate": 0.0004997913668644446,
      "loss": 3.1147,
      "step": 61752
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.358999252319336,
      "learning_rate": 0.0004997883153788044,
      "loss": 3.3509,
      "step": 61753
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7570812702178955,
      "learning_rate": 0.0004997852638560197,
      "loss": 3.0322,
      "step": 61754
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3559664487838745,
      "learning_rate": 0.0004997822122960912,
      "loss": 3.0981,
      "step": 61755
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.1173410415649414,
      "learning_rate": 0.0004997791606990195,
      "loss": 3.0022,
      "step": 61756
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7661526203155518,
      "learning_rate": 0.0004997761090648051,
      "loss": 3.0478,
      "step": 61757
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.32454252243042,
      "learning_rate": 0.0004997730573934484,
      "loss": 3.1776,
      "step": 61758
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3664408922195435,
      "learning_rate": 0.0004997700056849503,
      "loss": 3.1017,
      "step": 61759
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3113218545913696,
      "learning_rate": 0.0004997669539393111,
      "loss": 3.0269,
      "step": 61760
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6679974794387817,
      "learning_rate": 0.0004997639021565314,
      "loss": 2.9961,
      "step": 61761
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.4059031009674072,
      "learning_rate": 0.0004997608503366119,
      "loss": 2.8533,
      "step": 61762
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4041202068328857,
      "learning_rate": 0.0004997577984795531,
      "loss": 3.1176,
      "step": 61763
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7822257280349731,
      "learning_rate": 0.0004997547465853554,
      "loss": 2.886,
      "step": 61764
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4093515872955322,
      "learning_rate": 0.0004997516946540197,
      "loss": 3.0471,
      "step": 61765
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6246981620788574,
      "learning_rate": 0.0004997486426855463,
      "loss": 3.0377,
      "step": 61766
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2905353307724,
      "learning_rate": 0.000499745590679936,
      "loss": 3.2014,
      "step": 61767
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6106945276260376,
      "learning_rate": 0.0004997425386371891,
      "loss": 2.9644,
      "step": 61768
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.080070734024048,
      "learning_rate": 0.0004997394865573063,
      "loss": 3.0188,
      "step": 61769
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4091038703918457,
      "learning_rate": 0.0004997364344402881,
      "loss": 2.9115,
      "step": 61770
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8098697662353516,
      "learning_rate": 0.0004997333822861352,
      "loss": 2.6935,
      "step": 61771
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4821668863296509,
      "learning_rate": 0.000499730330094848,
      "loss": 3.12,
      "step": 61772
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.3780622482299805,
      "learning_rate": 0.0004997272778664273,
      "loss": 2.8335,
      "step": 61773
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7753134965896606,
      "learning_rate": 0.0004997242256008734,
      "loss": 3.0979,
      "step": 61774
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6108330488204956,
      "learning_rate": 0.0004997211732981871,
      "loss": 2.9079,
      "step": 61775
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.846286416053772,
      "learning_rate": 0.0004997181209583689,
      "loss": 3.0799,
      "step": 61776
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8368556499481201,
      "learning_rate": 0.0004997150685814193,
      "loss": 2.9895,
      "step": 61777
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.19142484664917,
      "learning_rate": 0.0004997120161673388,
      "loss": 3.2501,
      "step": 61778
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.0747745037078857,
      "learning_rate": 0.0004997089637161281,
      "loss": 3.1061,
      "step": 61779
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5165165662765503,
      "learning_rate": 0.0004997059112277877,
      "loss": 2.9816,
      "step": 61780
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.2563282251358032,
      "learning_rate": 0.0004997028587023183,
      "loss": 2.9321,
      "step": 61781
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6285076141357422,
      "learning_rate": 0.0004996998061397203,
      "loss": 3.1074,
      "step": 61782
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5792986154556274,
      "learning_rate": 0.0004996967535399945,
      "loss": 2.845,
      "step": 61783
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.491456151008606,
      "learning_rate": 0.000499693700903141,
      "loss": 3.1949,
      "step": 61784
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6651065349578857,
      "learning_rate": 0.0004996906482291609,
      "loss": 3.2161,
      "step": 61785
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5334457159042358,
      "learning_rate": 0.0004996875955180545,
      "loss": 3.0873,
      "step": 61786
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.708968162536621,
      "learning_rate": 0.0004996845427698224,
      "loss": 3.1647,
      "step": 61787
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7709996700286865,
      "learning_rate": 0.0004996814899844651,
      "loss": 2.8781,
      "step": 61788
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.632093071937561,
      "learning_rate": 0.0004996784371619833,
      "loss": 3.1513,
      "step": 61789
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2454473972320557,
      "learning_rate": 0.0004996753843023776,
      "loss": 3.232,
      "step": 61790
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.7624499797821045,
      "learning_rate": 0.0004996723314056483,
      "loss": 3.1083,
      "step": 61791
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6976622343063354,
      "learning_rate": 0.0004996692784717963,
      "loss": 3.0919,
      "step": 61792
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.113083600997925,
      "learning_rate": 0.000499666225500822,
      "loss": 2.9992,
      "step": 61793
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2654175758361816,
      "learning_rate": 0.0004996631724927258,
      "loss": 3.1553,
      "step": 61794
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5563069581985474,
      "learning_rate": 0.0004996601194475086,
      "loss": 2.9007,
      "step": 61795
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6495866775512695,
      "learning_rate": 0.0004996570663651708,
      "loss": 3.0333,
      "step": 61796
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9692999124526978,
      "learning_rate": 0.000499654013245713,
      "loss": 2.8891,
      "step": 61797
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5069655179977417,
      "learning_rate": 0.0004996509600891357,
      "loss": 3.1016,
      "step": 61798
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5346877574920654,
      "learning_rate": 0.0004996479068954396,
      "loss": 3.1966,
      "step": 61799
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.9548815488815308,
      "learning_rate": 0.000499644853664625,
      "loss": 2.8019,
      "step": 61800
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8691036701202393,
      "learning_rate": 0.0004996418003966929,
      "loss": 3.0855,
      "step": 61801
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7616757154464722,
      "learning_rate": 0.0004996387470916434,
      "loss": 2.9503,
      "step": 61802
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.385485291481018,
      "learning_rate": 0.0004996356937494774,
      "loss": 2.7627,
      "step": 61803
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.3678135871887207,
      "learning_rate": 0.0004996326403701954,
      "loss": 2.9282,
      "step": 61804
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6946730613708496,
      "learning_rate": 0.0004996295869537979,
      "loss": 2.9405,
      "step": 61805
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.628584623336792,
      "learning_rate": 0.0004996265335002854,
      "loss": 3.0333,
      "step": 61806
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5019124746322632,
      "learning_rate": 0.0004996234800096586,
      "loss": 2.9939,
      "step": 61807
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.472321629524231,
      "learning_rate": 0.000499620426481918,
      "loss": 2.971,
      "step": 61808
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.658989667892456,
      "learning_rate": 0.0004996173729170643,
      "loss": 3.2221,
      "step": 61809
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5902352333068848,
      "learning_rate": 0.0004996143193150979,
      "loss": 2.7673,
      "step": 61810
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.225403070449829,
      "learning_rate": 0.0004996112656760193,
      "loss": 3.0952,
      "step": 61811
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8397352695465088,
      "learning_rate": 0.0004996082119998294,
      "loss": 3.0574,
      "step": 61812
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.688855767250061,
      "learning_rate": 0.0004996051582865285,
      "loss": 3.0425,
      "step": 61813
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7700259685516357,
      "learning_rate": 0.0004996021045361172,
      "loss": 2.977,
      "step": 61814
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5691646337509155,
      "learning_rate": 0.0004995990507485962,
      "loss": 3.0878,
      "step": 61815
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4993094205856323,
      "learning_rate": 0.0004995959969239658,
      "loss": 3.1447,
      "step": 61816
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6773799657821655,
      "learning_rate": 0.0004995929430622269,
      "loss": 2.9566,
      "step": 61817
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7849154472351074,
      "learning_rate": 0.0004995898891633798,
      "loss": 3.1007,
      "step": 61818
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.8285566568374634,
      "learning_rate": 0.0004995868352274252,
      "loss": 2.9133,
      "step": 61819
    },
    {
      "epoch": 0.8,
      "grad_norm": 2.2161777019500732,
      "learning_rate": 0.0004995837812543637,
      "loss": 2.7581,
      "step": 61820
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.7711801528930664,
      "learning_rate": 0.0004995807272441958,
      "loss": 3.2544,
      "step": 61821
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.6242938041687012,
      "learning_rate": 0.000499577673196922,
      "loss": 2.9963,
      "step": 61822
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.5398412942886353,
      "learning_rate": 0.0004995746191125429,
      "loss": 3.1239,
      "step": 61823
    },
    {
      "epoch": 0.8,
      "grad_norm": 3.0891709327697754,
      "learning_rate": 0.0004995715649910592,
      "loss": 2.8474,
      "step": 61824
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2036731243133545,
      "learning_rate": 0.0004995685108324714,
      "loss": 2.9785,
      "step": 61825
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.3506860733032227,
      "learning_rate": 0.00049956545663678,
      "loss": 3.1966,
      "step": 61826
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4482239484786987,
      "learning_rate": 0.0004995624024039856,
      "loss": 2.929,
      "step": 61827
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0486695766448975,
      "learning_rate": 0.0004995593481340888,
      "loss": 2.9547,
      "step": 61828
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7555174827575684,
      "learning_rate": 0.0004995562938270902,
      "loss": 3.0661,
      "step": 61829
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7917448282241821,
      "learning_rate": 0.0004995532394829903,
      "loss": 2.9876,
      "step": 61830
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4065983295440674,
      "learning_rate": 0.0004995501851017897,
      "loss": 2.9672,
      "step": 61831
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4265305995941162,
      "learning_rate": 0.0004995471306834888,
      "loss": 2.8831,
      "step": 61832
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5299954414367676,
      "learning_rate": 0.0004995440762280885,
      "loss": 3.2127,
      "step": 61833
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7160760164260864,
      "learning_rate": 0.0004995410217355891,
      "loss": 3.1408,
      "step": 61834
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.428096055984497,
      "learning_rate": 0.0004995379672059914,
      "loss": 2.9608,
      "step": 61835
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.6743531227111816,
      "learning_rate": 0.0004995349126392957,
      "loss": 2.9706,
      "step": 61836
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6366844177246094,
      "learning_rate": 0.0004995318580355026,
      "loss": 3.2821,
      "step": 61837
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3453248739242554,
      "learning_rate": 0.0004995288033946129,
      "loss": 2.9273,
      "step": 61838
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4899497032165527,
      "learning_rate": 0.000499525748716627,
      "loss": 2.7442,
      "step": 61839
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6962324380874634,
      "learning_rate": 0.0004995226940015454,
      "loss": 3.2607,
      "step": 61840
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.8002970218658447,
      "learning_rate": 0.000499519639249369,
      "loss": 3.1755,
      "step": 61841
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4507172107696533,
      "learning_rate": 0.0004995165844600979,
      "loss": 3.3509,
      "step": 61842
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7643420696258545,
      "learning_rate": 0.000499513529633733,
      "loss": 2.8496,
      "step": 61843
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6142927408218384,
      "learning_rate": 0.0004995104747702748,
      "loss": 3.0117,
      "step": 61844
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5009108781814575,
      "learning_rate": 0.0004995074198697238,
      "loss": 3.0153,
      "step": 61845
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4336820840835571,
      "learning_rate": 0.0004995043649320805,
      "loss": 2.9769,
      "step": 61846
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5735684633255005,
      "learning_rate": 0.0004995013099573458,
      "loss": 3.201,
      "step": 61847
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2581405639648438,
      "learning_rate": 0.0004994982549455199,
      "loss": 2.9906,
      "step": 61848
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.573853611946106,
      "learning_rate": 0.0004994951998966035,
      "loss": 2.843,
      "step": 61849
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.793660044670105,
      "learning_rate": 0.0004994921448105971,
      "loss": 3.1493,
      "step": 61850
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4042441844940186,
      "learning_rate": 0.0004994890896875016,
      "loss": 2.9618,
      "step": 61851
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4319051504135132,
      "learning_rate": 0.000499486034527317,
      "loss": 2.8931,
      "step": 61852
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.545375943183899,
      "learning_rate": 0.0004994829793300443,
      "loss": 3.0648,
      "step": 61853
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.6905534267425537,
      "learning_rate": 0.000499479924095684,
      "loss": 3.0059,
      "step": 61854
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6558512449264526,
      "learning_rate": 0.0004994768688242366,
      "loss": 2.9377,
      "step": 61855
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7938636541366577,
      "learning_rate": 0.0004994738135157027,
      "loss": 2.942,
      "step": 61856
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6507750749588013,
      "learning_rate": 0.0004994707581700829,
      "loss": 3.2134,
      "step": 61857
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4467827081680298,
      "learning_rate": 0.0004994677027873776,
      "loss": 3.0126,
      "step": 61858
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4906288385391235,
      "learning_rate": 0.0004994646473675876,
      "loss": 3.0558,
      "step": 61859
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.3595921993255615,
      "learning_rate": 0.0004994615919107132,
      "loss": 3.2411,
      "step": 61860
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3954617977142334,
      "learning_rate": 0.0004994585364167553,
      "loss": 3.1637,
      "step": 61861
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7168588638305664,
      "learning_rate": 0.0004994554808857143,
      "loss": 3.2076,
      "step": 61862
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7436435222625732,
      "learning_rate": 0.0004994524253175907,
      "loss": 3.0665,
      "step": 61863
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7905035018920898,
      "learning_rate": 0.000499449369712385,
      "loss": 3.0728,
      "step": 61864
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6245836019515991,
      "learning_rate": 0.0004994463140700981,
      "loss": 3.0467,
      "step": 61865
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4844236373901367,
      "learning_rate": 0.0004994432583907303,
      "loss": 3.0023,
      "step": 61866
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2290196418762207,
      "learning_rate": 0.0004994402026742822,
      "loss": 2.9511,
      "step": 61867
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.015824794769287,
      "learning_rate": 0.0004994371469207544,
      "loss": 2.8678,
      "step": 61868
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5825022459030151,
      "learning_rate": 0.0004994340911301475,
      "loss": 2.8788,
      "step": 61869
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0755462646484375,
      "learning_rate": 0.000499431035302462,
      "loss": 3.1776,
      "step": 61870
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.25411057472229,
      "learning_rate": 0.0004994279794376985,
      "loss": 3.1064,
      "step": 61871
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3653336763381958,
      "learning_rate": 0.0004994249235358575,
      "loss": 3.1044,
      "step": 61872
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3378204107284546,
      "learning_rate": 0.0004994218675969398,
      "loss": 3.0094,
      "step": 61873
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.48699951171875,
      "learning_rate": 0.0004994188116209458,
      "loss": 2.8331,
      "step": 61874
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7577261924743652,
      "learning_rate": 0.000499415755607876,
      "loss": 2.9577,
      "step": 61875
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6770966053009033,
      "learning_rate": 0.000499412699557731,
      "loss": 3.129,
      "step": 61876
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.009617328643799,
      "learning_rate": 0.0004994096434705116,
      "loss": 3.3337,
      "step": 61877
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.9491376876831055,
      "learning_rate": 0.0004994065873462181,
      "loss": 2.9804,
      "step": 61878
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9324254989624023,
      "learning_rate": 0.000499403531184851,
      "loss": 2.9591,
      "step": 61879
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5568088293075562,
      "learning_rate": 0.0004994004749864112,
      "loss": 3.2661,
      "step": 61880
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8618659973144531,
      "learning_rate": 0.000499397418750899,
      "loss": 2.8041,
      "step": 61881
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9839372634887695,
      "learning_rate": 0.0004993943624783152,
      "loss": 2.9328,
      "step": 61882
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1710727214813232,
      "learning_rate": 0.00049939130616866,
      "loss": 3.0823,
      "step": 61883
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.551953673362732,
      "learning_rate": 0.0004993882498219343,
      "loss": 3.158,
      "step": 61884
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4397704601287842,
      "learning_rate": 0.0004993851934381386,
      "loss": 3.013,
      "step": 61885
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.3833866119384766,
      "learning_rate": 0.0004993821370172734,
      "loss": 3.0722,
      "step": 61886
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.060030937194824,
      "learning_rate": 0.0004993790805593394,
      "loss": 3.1707,
      "step": 61887
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.604241967201233,
      "learning_rate": 0.0004993760240643368,
      "loss": 3.1584,
      "step": 61888
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9651657342910767,
      "learning_rate": 0.0004993729675322666,
      "loss": 2.9969,
      "step": 61889
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5521107912063599,
      "learning_rate": 0.0004993699109631292,
      "loss": 3.2237,
      "step": 61890
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8003456592559814,
      "learning_rate": 0.0004993668543569253,
      "loss": 2.964,
      "step": 61891
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0407869815826416,
      "learning_rate": 0.0004993637977136552,
      "loss": 3.0925,
      "step": 61892
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.192657709121704,
      "learning_rate": 0.0004993607410333195,
      "loss": 3.0425,
      "step": 61893
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.469465732574463,
      "learning_rate": 0.000499357684315919,
      "loss": 3.1183,
      "step": 61894
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.817760944366455,
      "learning_rate": 0.0004993546275614541,
      "loss": 2.9901,
      "step": 61895
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5457218885421753,
      "learning_rate": 0.0004993515707699254,
      "loss": 2.9238,
      "step": 61896
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1003024578094482,
      "learning_rate": 0.0004993485139413335,
      "loss": 3.113,
      "step": 61897
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.440399408340454,
      "learning_rate": 0.0004993454570756791,
      "loss": 2.8521,
      "step": 61898
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4594804048538208,
      "learning_rate": 0.0004993424001729624,
      "loss": 3.0128,
      "step": 61899
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8163858652114868,
      "learning_rate": 0.0004993393432331843,
      "loss": 3.0061,
      "step": 61900
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.600184440612793,
      "learning_rate": 0.0004993362862563452,
      "loss": 3.1708,
      "step": 61901
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.6266837120056152,
      "learning_rate": 0.0004993332292424458,
      "loss": 2.8589,
      "step": 61902
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9690507650375366,
      "learning_rate": 0.0004993301721914864,
      "loss": 2.8082,
      "step": 61903
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.608642578125,
      "learning_rate": 0.000499327115103468,
      "loss": 3.2029,
      "step": 61904
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0986714363098145,
      "learning_rate": 0.0004993240579783907,
      "loss": 2.7389,
      "step": 61905
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.681330919265747,
      "learning_rate": 0.0004993210008162554,
      "loss": 3.0134,
      "step": 61906
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6384830474853516,
      "learning_rate": 0.0004993179436170626,
      "loss": 2.9697,
      "step": 61907
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7092338800430298,
      "learning_rate": 0.0004993148863808129,
      "loss": 2.7431,
      "step": 61908
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5052661895751953,
      "learning_rate": 0.0004993118291075067,
      "loss": 3.0702,
      "step": 61909
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3919731378555298,
      "learning_rate": 0.0004993087717971446,
      "loss": 2.9987,
      "step": 61910
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5543333292007446,
      "learning_rate": 0.0004993057144497274,
      "loss": 2.963,
      "step": 61911
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5804463624954224,
      "learning_rate": 0.0004993026570652554,
      "loss": 3.0281,
      "step": 61912
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7304819822311401,
      "learning_rate": 0.0004992995996437293,
      "loss": 3.1925,
      "step": 61913
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.35710608959198,
      "learning_rate": 0.0004992965421851496,
      "loss": 2.9968,
      "step": 61914
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7852009534835815,
      "learning_rate": 0.000499293484689517,
      "loss": 3.1876,
      "step": 61915
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5058097839355469,
      "learning_rate": 0.0004992904271568319,
      "loss": 3.1147,
      "step": 61916
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9138981103897095,
      "learning_rate": 0.0004992873695870949,
      "loss": 2.9424,
      "step": 61917
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9764338731765747,
      "learning_rate": 0.0004992843119803068,
      "loss": 2.8379,
      "step": 61918
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.449587345123291,
      "learning_rate": 0.0004992812543364679,
      "loss": 3.1445,
      "step": 61919
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.387848138809204,
      "learning_rate": 0.0004992781966555788,
      "loss": 3.1403,
      "step": 61920
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4079809188842773,
      "learning_rate": 0.0004992751389376401,
      "loss": 2.9208,
      "step": 61921
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7384669780731201,
      "learning_rate": 0.0004992720811826524,
      "loss": 3.1797,
      "step": 61922
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.726686954498291,
      "learning_rate": 0.0004992690233906164,
      "loss": 2.9831,
      "step": 61923
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.697668433189392,
      "learning_rate": 0.0004992659655615324,
      "loss": 3.138,
      "step": 61924
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6826951503753662,
      "learning_rate": 0.0004992629076954011,
      "loss": 2.8283,
      "step": 61925
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5936005115509033,
      "learning_rate": 0.0004992598497922231,
      "loss": 3.0387,
      "step": 61926
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7794677019119263,
      "learning_rate": 0.0004992567918519989,
      "loss": 2.9656,
      "step": 61927
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5239812135696411,
      "learning_rate": 0.0004992537338747291,
      "loss": 2.88,
      "step": 61928
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6539485454559326,
      "learning_rate": 0.0004992506758604143,
      "loss": 3.046,
      "step": 61929
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.6548237800598145,
      "learning_rate": 0.000499247617809055,
      "loss": 2.9226,
      "step": 61930
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5797791481018066,
      "learning_rate": 0.0004992445597206518,
      "loss": 3.1276,
      "step": 61931
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8667972087860107,
      "learning_rate": 0.0004992415015952054,
      "loss": 2.9758,
      "step": 61932
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.682000994682312,
      "learning_rate": 0.0004992384434327162,
      "loss": 3.0491,
      "step": 61933
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.50678288936615,
      "learning_rate": 0.0004992353852331847,
      "loss": 3.0638,
      "step": 61934
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8264639377593994,
      "learning_rate": 0.0004992323269966116,
      "loss": 2.932,
      "step": 61935
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.768213152885437,
      "learning_rate": 0.0004992292687229975,
      "loss": 3.0977,
      "step": 61936
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0743160247802734,
      "learning_rate": 0.0004992262104123429,
      "loss": 3.0284,
      "step": 61937
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.602691650390625,
      "learning_rate": 0.0004992231520646484,
      "loss": 2.7709,
      "step": 61938
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5987552404403687,
      "learning_rate": 0.0004992200936799146,
      "loss": 3.047,
      "step": 61939
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1733739376068115,
      "learning_rate": 0.0004992170352581419,
      "loss": 2.6809,
      "step": 61940
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9872835874557495,
      "learning_rate": 0.0004992139767993311,
      "loss": 2.8799,
      "step": 61941
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6068843603134155,
      "learning_rate": 0.0004992109183034827,
      "loss": 2.7606,
      "step": 61942
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5483428239822388,
      "learning_rate": 0.0004992078597705972,
      "loss": 3.1467,
      "step": 61943
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.3506412506103516,
      "learning_rate": 0.0004992048012006751,
      "loss": 3.1794,
      "step": 61944
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.309295415878296,
      "learning_rate": 0.0004992017425937171,
      "loss": 2.7377,
      "step": 61945
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7101869583129883,
      "learning_rate": 0.0004991986839497237,
      "loss": 3.0348,
      "step": 61946
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6010905504226685,
      "learning_rate": 0.0004991956252686957,
      "loss": 3.241,
      "step": 61947
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.7816503047943115,
      "learning_rate": 0.0004991925665506333,
      "loss": 3.0439,
      "step": 61948
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.273564100265503,
      "learning_rate": 0.0004991895077955373,
      "loss": 3.0776,
      "step": 61949
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.58545982837677,
      "learning_rate": 0.0004991864490034082,
      "loss": 2.9805,
      "step": 61950
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.698240876197815,
      "learning_rate": 0.0004991833901742466,
      "loss": 3.2821,
      "step": 61951
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.238896608352661,
      "learning_rate": 0.000499180331308053,
      "loss": 3.0933,
      "step": 61952
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.0877978801727295,
      "learning_rate": 0.000499177272404828,
      "loss": 3.2737,
      "step": 61953
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.454235553741455,
      "learning_rate": 0.0004991742134645722,
      "loss": 3.0766,
      "step": 61954
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.9031319618225098,
      "learning_rate": 0.0004991711544872862,
      "loss": 3.0536,
      "step": 61955
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.7055978775024414,
      "learning_rate": 0.0004991680954729705,
      "loss": 2.9597,
      "step": 61956
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.509073257446289,
      "learning_rate": 0.0004991650364216258,
      "loss": 3.3739,
      "step": 61957
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4467966556549072,
      "learning_rate": 0.0004991619773332523,
      "loss": 3.2151,
      "step": 61958
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.721631646156311,
      "learning_rate": 0.000499158918207851,
      "loss": 3.1083,
      "step": 61959
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1202337741851807,
      "learning_rate": 0.0004991558590454224,
      "loss": 3.1089,
      "step": 61960
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9302939176559448,
      "learning_rate": 0.0004991527998459668,
      "loss": 3.0296,
      "step": 61961
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4025919437408447,
      "learning_rate": 0.0004991497406094849,
      "loss": 3.0893,
      "step": 61962
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.911180019378662,
      "learning_rate": 0.0004991466813359774,
      "loss": 3.0924,
      "step": 61963
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5385090112686157,
      "learning_rate": 0.0004991436220254448,
      "loss": 3.2487,
      "step": 61964
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5996354818344116,
      "learning_rate": 0.0004991405626778876,
      "loss": 2.856,
      "step": 61965
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.523892879486084,
      "learning_rate": 0.0004991375032933064,
      "loss": 2.7779,
      "step": 61966
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9766669273376465,
      "learning_rate": 0.0004991344438717018,
      "loss": 3.184,
      "step": 61967
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4109117984771729,
      "learning_rate": 0.0004991313844130743,
      "loss": 2.8522,
      "step": 61968
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6373164653778076,
      "learning_rate": 0.0004991283249174246,
      "loss": 2.9689,
      "step": 61969
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.418644905090332,
      "learning_rate": 0.0004991252653847531,
      "loss": 3.0631,
      "step": 61970
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.8076369762420654,
      "learning_rate": 0.0004991222058150605,
      "loss": 3.0317,
      "step": 61971
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2849178314208984,
      "learning_rate": 0.0004991191462083473,
      "loss": 2.8821,
      "step": 61972
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.004147529602051,
      "learning_rate": 0.0004991160865646142,
      "loss": 3.1433,
      "step": 61973
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6044764518737793,
      "learning_rate": 0.0004991130268838615,
      "loss": 3.0474,
      "step": 61974
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5129474401474,
      "learning_rate": 0.00049910996716609,
      "loss": 2.9861,
      "step": 61975
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6020538806915283,
      "learning_rate": 0.0004991069074113001,
      "loss": 3.0452,
      "step": 61976
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7794469594955444,
      "learning_rate": 0.0004991038476194926,
      "loss": 3.0729,
      "step": 61977
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6329654455184937,
      "learning_rate": 0.000499100787790668,
      "loss": 2.9685,
      "step": 61978
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0140573978424072,
      "learning_rate": 0.0004990977279248266,
      "loss": 3.3017,
      "step": 61979
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1301305294036865,
      "learning_rate": 0.0004990946680219693,
      "loss": 3.1227,
      "step": 61980
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.2353012561798096,
      "learning_rate": 0.0004990916080820965,
      "loss": 2.9748,
      "step": 61981
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3760842084884644,
      "learning_rate": 0.0004990885481052089,
      "loss": 3.2631,
      "step": 61982
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9209479093551636,
      "learning_rate": 0.0004990854880913069,
      "loss": 2.9613,
      "step": 61983
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.220214605331421,
      "learning_rate": 0.0004990824280403912,
      "loss": 3.1474,
      "step": 61984
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6381932497024536,
      "learning_rate": 0.0004990793679524623,
      "loss": 3.0716,
      "step": 61985
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5210016965866089,
      "learning_rate": 0.0004990763078275207,
      "loss": 2.9259,
      "step": 61986
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9854544401168823,
      "learning_rate": 0.0004990732476655671,
      "loss": 3.009,
      "step": 61987
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.320269227027893,
      "learning_rate": 0.0004990701874666021,
      "loss": 3.1055,
      "step": 61988
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4625588655471802,
      "learning_rate": 0.0004990671272306262,
      "loss": 2.8867,
      "step": 61989
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.442093014717102,
      "learning_rate": 0.0004990640669576399,
      "loss": 3.1457,
      "step": 61990
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8841639757156372,
      "learning_rate": 0.0004990610066476438,
      "loss": 3.0749,
      "step": 61991
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.231759786605835,
      "learning_rate": 0.0004990579463006386,
      "loss": 3.067,
      "step": 61992
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5716551542282104,
      "learning_rate": 0.0004990548859166248,
      "loss": 3.1185,
      "step": 61993
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.447001338005066,
      "learning_rate": 0.0004990518254956028,
      "loss": 2.897,
      "step": 61994
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3921111822128296,
      "learning_rate": 0.0004990487650375733,
      "loss": 3.2127,
      "step": 61995
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.925640344619751,
      "learning_rate": 0.000499045704542537,
      "loss": 2.9696,
      "step": 61996
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.294687032699585,
      "learning_rate": 0.0004990426440104942,
      "loss": 2.9583,
      "step": 61997
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3940658569335938,
      "learning_rate": 0.0004990395834414457,
      "loss": 2.9836,
      "step": 61998
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.413789987564087,
      "learning_rate": 0.0004990365228353921,
      "loss": 3.148,
      "step": 61999
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6873500347137451,
      "learning_rate": 0.0004990334621923336,
      "loss": 3.1004,
      "step": 62000
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4184415340423584,
      "learning_rate": 0.0004990304015122712,
      "loss": 3.1857,
      "step": 62001
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6119024753570557,
      "learning_rate": 0.0004990273407952053,
      "loss": 2.8722,
      "step": 62002
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5417511463165283,
      "learning_rate": 0.0004990242800411364,
      "loss": 3.1795,
      "step": 62003
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.256383180618286,
      "learning_rate": 0.000499021219250065,
      "loss": 2.9361,
      "step": 62004
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.765838861465454,
      "learning_rate": 0.000499018158421992,
      "loss": 3.0493,
      "step": 62005
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.133601665496826,
      "learning_rate": 0.0004990150975569177,
      "loss": 3.1608,
      "step": 62006
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.9839115142822266,
      "learning_rate": 0.0004990120366548427,
      "loss": 3.1594,
      "step": 62007
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.855466365814209,
      "learning_rate": 0.0004990089757157678,
      "loss": 2.9998,
      "step": 62008
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3403457403182983,
      "learning_rate": 0.0004990059147396932,
      "loss": 3.0572,
      "step": 62009
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.4803998470306396,
      "learning_rate": 0.0004990028537266197,
      "loss": 3.0738,
      "step": 62010
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.144188165664673,
      "learning_rate": 0.0004989997926765477,
      "loss": 2.9256,
      "step": 62011
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.696548342704773,
      "learning_rate": 0.000498996731589478,
      "loss": 3.0268,
      "step": 62012
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8311755657196045,
      "learning_rate": 0.000498993670465411,
      "loss": 3.1968,
      "step": 62013
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.029987096786499,
      "learning_rate": 0.0004989906093043474,
      "loss": 3.053,
      "step": 62014
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7792716026306152,
      "learning_rate": 0.0004989875481062877,
      "loss": 3.3154,
      "step": 62015
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7424877882003784,
      "learning_rate": 0.0004989844868712323,
      "loss": 3.2205,
      "step": 62016
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2188501358032227,
      "learning_rate": 0.0004989814255991821,
      "loss": 2.8034,
      "step": 62017
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.016744613647461,
      "learning_rate": 0.0004989783642901373,
      "loss": 2.9369,
      "step": 62018
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4534001350402832,
      "learning_rate": 0.0004989753029440988,
      "loss": 3.2792,
      "step": 62019
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8597300052642822,
      "learning_rate": 0.000498972241561067,
      "loss": 3.1629,
      "step": 62020
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.758293628692627,
      "learning_rate": 0.0004989691801410425,
      "loss": 3.0506,
      "step": 62021
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7253972291946411,
      "learning_rate": 0.0004989661186840259,
      "loss": 2.9099,
      "step": 62022
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5146188735961914,
      "learning_rate": 0.0004989630571900177,
      "loss": 3.0334,
      "step": 62023
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8497477769851685,
      "learning_rate": 0.0004989599956590185,
      "loss": 2.9491,
      "step": 62024
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4111049175262451,
      "learning_rate": 0.000498956934091029,
      "loss": 2.972,
      "step": 62025
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2832460403442383,
      "learning_rate": 0.0004989538724860496,
      "loss": 2.9334,
      "step": 62026
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.002789258956909,
      "learning_rate": 0.0004989508108440809,
      "loss": 3.1296,
      "step": 62027
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7418965101242065,
      "learning_rate": 0.0004989477491651235,
      "loss": 3.1548,
      "step": 62028
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6211897134780884,
      "learning_rate": 0.000498944687449178,
      "loss": 3.2882,
      "step": 62029
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0371816158294678,
      "learning_rate": 0.0004989416256962447,
      "loss": 2.983,
      "step": 62030
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4907492399215698,
      "learning_rate": 0.0004989385639063246,
      "loss": 3.1208,
      "step": 62031
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3516473770141602,
      "learning_rate": 0.000498935502079418,
      "loss": 3.1649,
      "step": 62032
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6004972457885742,
      "learning_rate": 0.0004989324402155256,
      "loss": 3.0468,
      "step": 62033
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4480046033859253,
      "learning_rate": 0.0004989293783146478,
      "loss": 2.9022,
      "step": 62034
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.465809941291809,
      "learning_rate": 0.0004989263163767855,
      "loss": 3.1871,
      "step": 62035
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4980292320251465,
      "learning_rate": 0.0004989232544019388,
      "loss": 2.936,
      "step": 62036
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6908906698226929,
      "learning_rate": 0.0004989201923901086,
      "loss": 3.1773,
      "step": 62037
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.188204288482666,
      "learning_rate": 0.0004989171303412954,
      "loss": 2.9528,
      "step": 62038
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3632441759109497,
      "learning_rate": 0.0004989140682554998,
      "loss": 2.8706,
      "step": 62039
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.106703519821167,
      "learning_rate": 0.0004989110061327221,
      "loss": 3.2046,
      "step": 62040
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.479107141494751,
      "learning_rate": 0.0004989079439729633,
      "loss": 2.9741,
      "step": 62041
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.120487689971924,
      "learning_rate": 0.0004989048817762236,
      "loss": 2.9467,
      "step": 62042
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6000670194625854,
      "learning_rate": 0.0004989018195425039,
      "loss": 3.2438,
      "step": 62043
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.4294323921203613,
      "learning_rate": 0.0004988987572718045,
      "loss": 2.9877,
      "step": 62044
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9479615688323975,
      "learning_rate": 0.0004988956949641261,
      "loss": 3.0413,
      "step": 62045
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4401216506958008,
      "learning_rate": 0.0004988926326194693,
      "loss": 3.1698,
      "step": 62046
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7057709693908691,
      "learning_rate": 0.0004988895702378345,
      "loss": 2.9622,
      "step": 62047
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.708884596824646,
      "learning_rate": 0.0004988865078192225,
      "loss": 3.0821,
      "step": 62048
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7673075199127197,
      "learning_rate": 0.0004988834453636335,
      "loss": 3.2222,
      "step": 62049
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9456297159194946,
      "learning_rate": 0.0004988803828710684,
      "loss": 3.0418,
      "step": 62050
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5940372943878174,
      "learning_rate": 0.0004988773203415278,
      "loss": 3.1322,
      "step": 62051
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7684636116027832,
      "learning_rate": 0.0004988742577750121,
      "loss": 2.9379,
      "step": 62052
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2798879146575928,
      "learning_rate": 0.000498871195171522,
      "loss": 3.1386,
      "step": 62053
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3469371795654297,
      "learning_rate": 0.0004988681325310579,
      "loss": 3.1206,
      "step": 62054
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.077280282974243,
      "learning_rate": 0.0004988650698536205,
      "loss": 2.8593,
      "step": 62055
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.892541766166687,
      "learning_rate": 0.0004988620071392103,
      "loss": 3.1935,
      "step": 62056
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8702563047409058,
      "learning_rate": 0.000498858944387828,
      "loss": 2.9374,
      "step": 62057
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4179775714874268,
      "learning_rate": 0.0004988558815994739,
      "loss": 3.1084,
      "step": 62058
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.412696361541748,
      "learning_rate": 0.0004988528187741488,
      "loss": 2.9568,
      "step": 62059
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.2403087615966797,
      "learning_rate": 0.0004988497559118532,
      "loss": 2.9689,
      "step": 62060
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3945497274398804,
      "learning_rate": 0.0004988466930125877,
      "loss": 3.2061,
      "step": 62061
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.421220064163208,
      "learning_rate": 0.0004988436300763528,
      "loss": 3.1178,
      "step": 62062
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.3371386528015137,
      "learning_rate": 0.0004988405671031491,
      "loss": 3.1934,
      "step": 62063
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0782980918884277,
      "learning_rate": 0.0004988375040929772,
      "loss": 2.9016,
      "step": 62064
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5494626760482788,
      "learning_rate": 0.0004988344410458377,
      "loss": 2.9421,
      "step": 62065
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.624427080154419,
      "learning_rate": 0.0004988313779617312,
      "loss": 3.022,
      "step": 62066
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4516682624816895,
      "learning_rate": 0.0004988283148406581,
      "loss": 3.0036,
      "step": 62067
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.78020179271698,
      "learning_rate": 0.0004988252516826191,
      "loss": 3.0966,
      "step": 62068
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.577303409576416,
      "learning_rate": 0.0004988221884876146,
      "loss": 3.0472,
      "step": 62069
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6856344938278198,
      "learning_rate": 0.0004988191252556454,
      "loss": 2.9361,
      "step": 62070
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2456674575805664,
      "learning_rate": 0.000498816061986712,
      "loss": 3.0853,
      "step": 62071
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5177737474441528,
      "learning_rate": 0.000498812998680815,
      "loss": 2.9417,
      "step": 62072
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8721834421157837,
      "learning_rate": 0.0004988099353379548,
      "loss": 3.0576,
      "step": 62073
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5752264261245728,
      "learning_rate": 0.0004988068719581321,
      "loss": 3.1096,
      "step": 62074
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4635573625564575,
      "learning_rate": 0.0004988038085413475,
      "loss": 2.9778,
      "step": 62075
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.504385232925415,
      "learning_rate": 0.0004988007450876014,
      "loss": 3.1633,
      "step": 62076
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4176764488220215,
      "learning_rate": 0.0004987976815968946,
      "loss": 2.9562,
      "step": 62077
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.654103398323059,
      "learning_rate": 0.0004987946180692275,
      "loss": 3.0891,
      "step": 62078
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.477691888809204,
      "learning_rate": 0.0004987915545046007,
      "loss": 2.9326,
      "step": 62079
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.689908742904663,
      "learning_rate": 0.0004987884909030149,
      "loss": 3.0956,
      "step": 62080
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.4258882999420166,
      "learning_rate": 0.0004987854272644705,
      "loss": 3.0282,
      "step": 62081
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5132800340652466,
      "learning_rate": 0.0004987823635889682,
      "loss": 3.0113,
      "step": 62082
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9461560249328613,
      "learning_rate": 0.0004987792998765085,
      "loss": 3.2795,
      "step": 62083
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.830195665359497,
      "learning_rate": 0.0004987762361270919,
      "loss": 3.036,
      "step": 62084
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3686445951461792,
      "learning_rate": 0.000498773172340719,
      "loss": 3.0217,
      "step": 62085
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0818276405334473,
      "learning_rate": 0.0004987701085173905,
      "loss": 3.233,
      "step": 62086
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.089773654937744,
      "learning_rate": 0.000498767044657107,
      "loss": 3.1582,
      "step": 62087
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5842355489730835,
      "learning_rate": 0.0004987639807598689,
      "loss": 2.9707,
      "step": 62088
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2140352725982666,
      "learning_rate": 0.0004987609168256766,
      "loss": 3.1561,
      "step": 62089
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5466481447219849,
      "learning_rate": 0.0004987578528545311,
      "loss": 3.125,
      "step": 62090
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.656772494316101,
      "learning_rate": 0.0004987547888464327,
      "loss": 3.0684,
      "step": 62091
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4617655277252197,
      "learning_rate": 0.0004987517248013821,
      "loss": 3.0843,
      "step": 62092
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5889006853103638,
      "learning_rate": 0.0004987486607193797,
      "loss": 2.8458,
      "step": 62093
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5442614555358887,
      "learning_rate": 0.0004987455966004263,
      "loss": 3.029,
      "step": 62094
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5376787185668945,
      "learning_rate": 0.0004987425324445222,
      "loss": 3.1403,
      "step": 62095
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6461879014968872,
      "learning_rate": 0.0004987394682516682,
      "loss": 2.9591,
      "step": 62096
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7631080150604248,
      "learning_rate": 0.0004987364040218647,
      "loss": 3.308,
      "step": 62097
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4477251768112183,
      "learning_rate": 0.0004987333397551124,
      "loss": 2.8184,
      "step": 62098
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9474804401397705,
      "learning_rate": 0.000498730275451412,
      "loss": 2.8815,
      "step": 62099
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4720048904418945,
      "learning_rate": 0.0004987272111107637,
      "loss": 3.0025,
      "step": 62100
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7688889503479004,
      "learning_rate": 0.0004987241467331682,
      "loss": 3.0342,
      "step": 62101
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.702939748764038,
      "learning_rate": 0.0004987210823186262,
      "loss": 3.1639,
      "step": 62102
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6291873455047607,
      "learning_rate": 0.0004987180178671383,
      "loss": 2.8619,
      "step": 62103
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.051534414291382,
      "learning_rate": 0.0004987149533787049,
      "loss": 2.754,
      "step": 62104
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.427121162414551,
      "learning_rate": 0.0004987118888533265,
      "loss": 3.2853,
      "step": 62105
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5950465202331543,
      "learning_rate": 0.000498708824291004,
      "loss": 2.9964,
      "step": 62106
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.6936891078948975,
      "learning_rate": 0.0004987057596917377,
      "loss": 3.3082,
      "step": 62107
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.931781530380249,
      "learning_rate": 0.0004987026950555283,
      "loss": 2.9351,
      "step": 62108
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7225302457809448,
      "learning_rate": 0.0004986996303823763,
      "loss": 3.0973,
      "step": 62109
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5854933261871338,
      "learning_rate": 0.0004986965656722822,
      "loss": 2.966,
      "step": 62110
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6408402919769287,
      "learning_rate": 0.0004986935009252467,
      "loss": 3.0504,
      "step": 62111
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7326452732086182,
      "learning_rate": 0.0004986904361412704,
      "loss": 3.0745,
      "step": 62112
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8846932649612427,
      "learning_rate": 0.0004986873713203537,
      "loss": 2.967,
      "step": 62113
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4634099006652832,
      "learning_rate": 0.0004986843064624973,
      "loss": 2.9946,
      "step": 62114
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6058841943740845,
      "learning_rate": 0.0004986812415677018,
      "loss": 3.1863,
      "step": 62115
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2392959594726562,
      "learning_rate": 0.0004986781766359676,
      "loss": 2.7898,
      "step": 62116
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.559626579284668,
      "learning_rate": 0.0004986751116672954,
      "loss": 3.2879,
      "step": 62117
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6915215253829956,
      "learning_rate": 0.0004986720466616858,
      "loss": 2.9208,
      "step": 62118
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5718642473220825,
      "learning_rate": 0.0004986689816191393,
      "loss": 2.8829,
      "step": 62119
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7847274541854858,
      "learning_rate": 0.0004986659165396565,
      "loss": 2.9586,
      "step": 62120
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7185183763504028,
      "learning_rate": 0.000498662851423238,
      "loss": 3.1861,
      "step": 62121
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8486084938049316,
      "learning_rate": 0.0004986597862698842,
      "loss": 3.3223,
      "step": 62122
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6665951013565063,
      "learning_rate": 0.000498656721079596,
      "loss": 3.1023,
      "step": 62123
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5000277757644653,
      "learning_rate": 0.0004986536558523735,
      "loss": 2.876,
      "step": 62124
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7565912008285522,
      "learning_rate": 0.0004986505905882177,
      "loss": 3.1744,
      "step": 62125
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0046114921569824,
      "learning_rate": 0.000498647525287129,
      "loss": 3.1748,
      "step": 62126
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7390786409378052,
      "learning_rate": 0.000498644459949108,
      "loss": 3.0721,
      "step": 62127
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7670490741729736,
      "learning_rate": 0.0004986413945741551,
      "loss": 3.1008,
      "step": 62128
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1418020725250244,
      "learning_rate": 0.000498638329162271,
      "loss": 3.2813,
      "step": 62129
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.758650779724121,
      "learning_rate": 0.0004986352637134565,
      "loss": 2.9105,
      "step": 62130
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6807252168655396,
      "learning_rate": 0.0004986321982277119,
      "loss": 2.8516,
      "step": 62131
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5200879573822021,
      "learning_rate": 0.0004986291327050377,
      "loss": 2.9241,
      "step": 62132
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.438306450843811,
      "learning_rate": 0.0004986260671454347,
      "loss": 2.8996,
      "step": 62133
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5314342975616455,
      "learning_rate": 0.0004986230015489033,
      "loss": 2.8715,
      "step": 62134
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2855933904647827,
      "learning_rate": 0.0004986199359154442,
      "loss": 2.855,
      "step": 62135
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6173863410949707,
      "learning_rate": 0.0004986168702450578,
      "loss": 2.9631,
      "step": 62136
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4638367891311646,
      "learning_rate": 0.0004986138045377449,
      "loss": 2.8925,
      "step": 62137
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8030558824539185,
      "learning_rate": 0.0004986107387935059,
      "loss": 2.8159,
      "step": 62138
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.701238751411438,
      "learning_rate": 0.0004986076730123414,
      "loss": 3.3282,
      "step": 62139
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.615814208984375,
      "learning_rate": 0.000498604607194252,
      "loss": 3.0078,
      "step": 62140
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9584908485412598,
      "learning_rate": 0.0004986015413392382,
      "loss": 2.6529,
      "step": 62141
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6542370319366455,
      "learning_rate": 0.0004985984754473006,
      "loss": 3.1193,
      "step": 62142
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.585585594177246,
      "learning_rate": 0.0004985954095184399,
      "loss": 3.1025,
      "step": 62143
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6156522035598755,
      "learning_rate": 0.0004985923435526564,
      "loss": 2.8983,
      "step": 62144
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.154428482055664,
      "learning_rate": 0.000498589277549951,
      "loss": 3.0546,
      "step": 62145
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5331971645355225,
      "learning_rate": 0.0004985862115103242,
      "loss": 3.0108,
      "step": 62146
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9801369905471802,
      "learning_rate": 0.0004985831454337763,
      "loss": 2.9373,
      "step": 62147
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4657773971557617,
      "learning_rate": 0.0004985800793203081,
      "loss": 3.0707,
      "step": 62148
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9454896450042725,
      "learning_rate": 0.0004985770131699201,
      "loss": 2.7833,
      "step": 62149
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9848346710205078,
      "learning_rate": 0.0004985739469826129,
      "loss": 2.8865,
      "step": 62150
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9988051652908325,
      "learning_rate": 0.0004985708807583871,
      "loss": 3.1426,
      "step": 62151
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.976324439048767,
      "learning_rate": 0.0004985678144972431,
      "loss": 2.9819,
      "step": 62152
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9357556104660034,
      "learning_rate": 0.0004985647481991816,
      "loss": 2.9025,
      "step": 62153
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2630422115325928,
      "learning_rate": 0.0004985616818642031,
      "loss": 2.9342,
      "step": 62154
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7780169248580933,
      "learning_rate": 0.0004985586154923086,
      "loss": 2.9855,
      "step": 62155
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.7508413791656494,
      "learning_rate": 0.0004985555490834979,
      "loss": 2.8904,
      "step": 62156
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.727262258529663,
      "learning_rate": 0.0004985524826377722,
      "loss": 3.0343,
      "step": 62157
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2724578380584717,
      "learning_rate": 0.0004985494161551318,
      "loss": 2.8979,
      "step": 62158
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.3000192642211914,
      "learning_rate": 0.0004985463496355773,
      "loss": 3.142,
      "step": 62159
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5089153051376343,
      "learning_rate": 0.0004985432830791091,
      "loss": 2.9909,
      "step": 62160
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7531872987747192,
      "learning_rate": 0.0004985402164857282,
      "loss": 3.0976,
      "step": 62161
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.089745044708252,
      "learning_rate": 0.0004985371498554349,
      "loss": 2.7674,
      "step": 62162
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7392477989196777,
      "learning_rate": 0.0004985340831882298,
      "loss": 3.1262,
      "step": 62163
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.5613646507263184,
      "learning_rate": 0.0004985310164841134,
      "loss": 2.9685,
      "step": 62164
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.8173820972442627,
      "learning_rate": 0.0004985279497430864,
      "loss": 3.2092,
      "step": 62165
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.6205525398254395,
      "learning_rate": 0.0004985248829651492,
      "loss": 3.148,
      "step": 62166
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.363647937774658,
      "learning_rate": 0.0004985218161503026,
      "loss": 2.9576,
      "step": 62167
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.548080563545227,
      "learning_rate": 0.000498518749298547,
      "loss": 3.1045,
      "step": 62168
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.949045419692993,
      "learning_rate": 0.0004985156824098831,
      "loss": 3.0528,
      "step": 62169
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.544330596923828,
      "learning_rate": 0.0004985126154843113,
      "loss": 2.874,
      "step": 62170
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.6543776988983154,
      "learning_rate": 0.0004985095485218322,
      "loss": 3.2607,
      "step": 62171
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3884243965148926,
      "learning_rate": 0.0004985064815224464,
      "loss": 3.2303,
      "step": 62172
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.645401120185852,
      "learning_rate": 0.0004985034144861547,
      "loss": 3.2747,
      "step": 62173
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0279221534729004,
      "learning_rate": 0.0004985003474129573,
      "loss": 3.1838,
      "step": 62174
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.7502307891845703,
      "learning_rate": 0.000498497280302855,
      "loss": 3.0235,
      "step": 62175
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.474259614944458,
      "learning_rate": 0.0004984942131558484,
      "loss": 3.396,
      "step": 62176
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7361313104629517,
      "learning_rate": 0.0004984911459719378,
      "loss": 3.0873,
      "step": 62177
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.824196219444275,
      "learning_rate": 0.000498488078751124,
      "loss": 2.9683,
      "step": 62178
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5272061824798584,
      "learning_rate": 0.0004984850114934076,
      "loss": 3.1244,
      "step": 62179
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4016969203948975,
      "learning_rate": 0.000498481944198789,
      "loss": 3.2099,
      "step": 62180
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5648860931396484,
      "learning_rate": 0.0004984788768672689,
      "loss": 3.162,
      "step": 62181
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.482474684715271,
      "learning_rate": 0.0004984758094988478,
      "loss": 3.0385,
      "step": 62182
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7530779838562012,
      "learning_rate": 0.0004984727420935263,
      "loss": 3.1621,
      "step": 62183
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7835174798965454,
      "learning_rate": 0.000498469674651305,
      "loss": 2.9839,
      "step": 62184
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.7762277126312256,
      "learning_rate": 0.0004984666071721845,
      "loss": 3.0638,
      "step": 62185
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.364417552947998,
      "learning_rate": 0.0004984635396561652,
      "loss": 2.8628,
      "step": 62186
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.371952772140503,
      "learning_rate": 0.0004984604721032478,
      "loss": 3.1042,
      "step": 62187
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4882920980453491,
      "learning_rate": 0.000498457404513433,
      "loss": 2.9802,
      "step": 62188
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.040591239929199,
      "learning_rate": 0.0004984543368867211,
      "loss": 3.0266,
      "step": 62189
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4689350128173828,
      "learning_rate": 0.0004984512692231128,
      "loss": 3.117,
      "step": 62190
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.283023715019226,
      "learning_rate": 0.0004984482015226086,
      "loss": 3.1731,
      "step": 62191
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9841433763504028,
      "learning_rate": 0.0004984451337852092,
      "loss": 2.9491,
      "step": 62192
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4358851909637451,
      "learning_rate": 0.0004984420660109151,
      "loss": 2.9366,
      "step": 62193
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6569725275039673,
      "learning_rate": 0.0004984389981997269,
      "loss": 2.8192,
      "step": 62194
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7663148641586304,
      "learning_rate": 0.0004984359303516451,
      "loss": 3.2309,
      "step": 62195
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.916764497756958,
      "learning_rate": 0.0004984328624666703,
      "loss": 2.961,
      "step": 62196
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6914678812026978,
      "learning_rate": 0.0004984297945448032,
      "loss": 3.0582,
      "step": 62197
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4504268169403076,
      "learning_rate": 0.0004984267265860441,
      "loss": 3.122,
      "step": 62198
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.447804570198059,
      "learning_rate": 0.0004984236585903938,
      "loss": 3.077,
      "step": 62199
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.364863634109497,
      "learning_rate": 0.0004984205905578528,
      "loss": 2.8964,
      "step": 62200
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5590184926986694,
      "learning_rate": 0.0004984175224884217,
      "loss": 3.255,
      "step": 62201
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5242023468017578,
      "learning_rate": 0.0004984144543821009,
      "loss": 2.9619,
      "step": 62202
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7262647151947021,
      "learning_rate": 0.0004984113862388913,
      "loss": 3.3228,
      "step": 62203
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5692474842071533,
      "learning_rate": 0.0004984083180587932,
      "loss": 3.0071,
      "step": 62204
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4486786127090454,
      "learning_rate": 0.0004984052498418071,
      "loss": 3.1623,
      "step": 62205
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4479782581329346,
      "learning_rate": 0.0004984021815879339,
      "loss": 3.0195,
      "step": 62206
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9951893091201782,
      "learning_rate": 0.000498399113297174,
      "loss": 3.032,
      "step": 62207
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5316188335418701,
      "learning_rate": 0.0004983960449695279,
      "loss": 2.7922,
      "step": 62208
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8374230861663818,
      "learning_rate": 0.0004983929766049963,
      "loss": 3.0189,
      "step": 62209
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.857996940612793,
      "learning_rate": 0.0004983899082035795,
      "loss": 2.9924,
      "step": 62210
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0140116214752197,
      "learning_rate": 0.0004983868397652784,
      "loss": 3.1728,
      "step": 62211
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9173052310943604,
      "learning_rate": 0.0004983837712900935,
      "loss": 2.8997,
      "step": 62212
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.039163112640381,
      "learning_rate": 0.0004983807027780252,
      "loss": 3.0712,
      "step": 62213
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.695079803466797,
      "learning_rate": 0.0004983776342290742,
      "loss": 2.8231,
      "step": 62214
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9245268106460571,
      "learning_rate": 0.0004983745656432411,
      "loss": 2.9676,
      "step": 62215
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.103539228439331,
      "learning_rate": 0.0004983714970205265,
      "loss": 3.1283,
      "step": 62216
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5998343229293823,
      "learning_rate": 0.0004983684283609307,
      "loss": 3.2248,
      "step": 62217
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8215303421020508,
      "learning_rate": 0.0004983653596644546,
      "loss": 2.9006,
      "step": 62218
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0567004680633545,
      "learning_rate": 0.0004983622909310987,
      "loss": 2.8746,
      "step": 62219
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9762136936187744,
      "learning_rate": 0.0004983592221608634,
      "loss": 3.1059,
      "step": 62220
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8164535760879517,
      "learning_rate": 0.0004983561533537493,
      "loss": 3.116,
      "step": 62221
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.83329176902771,
      "learning_rate": 0.0004983530845097573,
      "loss": 3.0507,
      "step": 62222
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6472288370132446,
      "learning_rate": 0.0004983500156288875,
      "loss": 2.8788,
      "step": 62223
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.408361554145813,
      "learning_rate": 0.0004983469467111407,
      "loss": 2.9929,
      "step": 62224
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.030363082885742,
      "learning_rate": 0.0004983438777565175,
      "loss": 2.7833,
      "step": 62225
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5817404985427856,
      "learning_rate": 0.0004983408087650185,
      "loss": 2.733,
      "step": 62226
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5129286050796509,
      "learning_rate": 0.000498337739736644,
      "loss": 2.8794,
      "step": 62227
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4600071907043457,
      "learning_rate": 0.000498334670671395,
      "loss": 3.026,
      "step": 62228
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7697153091430664,
      "learning_rate": 0.0004983316015692717,
      "loss": 2.9608,
      "step": 62229
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7369135618209839,
      "learning_rate": 0.0004983285324302748,
      "loss": 3.0449,
      "step": 62230
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4977061748504639,
      "learning_rate": 0.000498325463254405,
      "loss": 3.0285,
      "step": 62231
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5753986835479736,
      "learning_rate": 0.0004983223940416627,
      "loss": 3.1761,
      "step": 62232
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4290443658828735,
      "learning_rate": 0.0004983193247920485,
      "loss": 3.2247,
      "step": 62233
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4050931930541992,
      "learning_rate": 0.0004983162555055631,
      "loss": 3.1063,
      "step": 62234
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.55820631980896,
      "learning_rate": 0.0004983131861822068,
      "loss": 3.1847,
      "step": 62235
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8729219436645508,
      "learning_rate": 0.0004983101168219804,
      "loss": 2.7773,
      "step": 62236
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5268230438232422,
      "learning_rate": 0.0004983070474248845,
      "loss": 3.2138,
      "step": 62237
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5015162229537964,
      "learning_rate": 0.0004983039779909194,
      "loss": 3.1801,
      "step": 62238
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.169351577758789,
      "learning_rate": 0.0004983009085200859,
      "loss": 3.1182,
      "step": 62239
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6791930198669434,
      "learning_rate": 0.0004982978390123846,
      "loss": 2.9981,
      "step": 62240
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4281302690505981,
      "learning_rate": 0.0004982947694678159,
      "loss": 2.9898,
      "step": 62241
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5168659687042236,
      "learning_rate": 0.0004982916998863805,
      "loss": 3.3603,
      "step": 62242
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.171592950820923,
      "learning_rate": 0.000498288630268079,
      "loss": 3.1925,
      "step": 62243
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.742871046066284,
      "learning_rate": 0.0004982855606129118,
      "loss": 2.7913,
      "step": 62244
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0869028568267822,
      "learning_rate": 0.0004982824909208796,
      "loss": 3.1942,
      "step": 62245
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5589796304702759,
      "learning_rate": 0.0004982794211919829,
      "loss": 3.0835,
      "step": 62246
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7022408246994019,
      "learning_rate": 0.0004982763514262224,
      "loss": 2.8636,
      "step": 62247
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9206856489181519,
      "learning_rate": 0.0004982732816235985,
      "loss": 3.1734,
      "step": 62248
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8146347999572754,
      "learning_rate": 0.0004982702117841119,
      "loss": 2.9608,
      "step": 62249
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.0499844551086426,
      "learning_rate": 0.0004982671419077631,
      "loss": 3.1742,
      "step": 62250
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7464903593063354,
      "learning_rate": 0.0004982640719945527,
      "loss": 2.841,
      "step": 62251
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6160465478897095,
      "learning_rate": 0.0004982610020444814,
      "loss": 3.0366,
      "step": 62252
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7087936401367188,
      "learning_rate": 0.0004982579320575495,
      "loss": 2.9922,
      "step": 62253
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0589487552642822,
      "learning_rate": 0.0004982548620337577,
      "loss": 2.8995,
      "step": 62254
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.218061923980713,
      "learning_rate": 0.0004982517919731066,
      "loss": 3.148,
      "step": 62255
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6220402717590332,
      "learning_rate": 0.0004982487218755968,
      "loss": 2.8288,
      "step": 62256
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4302504062652588,
      "learning_rate": 0.0004982456517412287,
      "loss": 3.1758,
      "step": 62257
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.91164231300354,
      "learning_rate": 0.0004982425815700031,
      "loss": 2.9775,
      "step": 62258
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9816590547561646,
      "learning_rate": 0.0004982395113619204,
      "loss": 2.8491,
      "step": 62259
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7433462142944336,
      "learning_rate": 0.0004982364411169813,
      "loss": 3.0573,
      "step": 62260
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8212333917617798,
      "learning_rate": 0.0004982333708351863,
      "loss": 2.7472,
      "step": 62261
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.6951515674591064,
      "learning_rate": 0.0004982303005165359,
      "loss": 3.1712,
      "step": 62262
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.6000919342041016,
      "learning_rate": 0.0004982272301610308,
      "loss": 2.9559,
      "step": 62263
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7048072814941406,
      "learning_rate": 0.0004982241597686715,
      "loss": 2.9798,
      "step": 62264
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4914816617965698,
      "learning_rate": 0.0004982210893394587,
      "loss": 3.1514,
      "step": 62265
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1571667194366455,
      "learning_rate": 0.0004982180188733926,
      "loss": 3.0092,
      "step": 62266
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.5813262462615967,
      "learning_rate": 0.0004982149483704742,
      "loss": 3.0179,
      "step": 62267
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.874690055847168,
      "learning_rate": 0.0004982118778307039,
      "loss": 3.0355,
      "step": 62268
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.338548183441162,
      "learning_rate": 0.0004982088072540822,
      "loss": 3.0932,
      "step": 62269
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1055245399475098,
      "learning_rate": 0.0004982057366406099,
      "loss": 3.049,
      "step": 62270
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5556385517120361,
      "learning_rate": 0.0004982026659902872,
      "loss": 3.1202,
      "step": 62271
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5948129892349243,
      "learning_rate": 0.000498199595303115,
      "loss": 2.9553,
      "step": 62272
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1248433589935303,
      "learning_rate": 0.0004981965245790937,
      "loss": 3.0173,
      "step": 62273
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6261556148529053,
      "learning_rate": 0.000498193453818224,
      "loss": 2.9749,
      "step": 62274
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5857977867126465,
      "learning_rate": 0.0004981903830205063,
      "loss": 3.1227,
      "step": 62275
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8025617599487305,
      "learning_rate": 0.0004981873121859414,
      "loss": 2.908,
      "step": 62276
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8931374549865723,
      "learning_rate": 0.0004981842413145296,
      "loss": 3.0227,
      "step": 62277
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.425901174545288,
      "learning_rate": 0.0004981811704062717,
      "loss": 3.0318,
      "step": 62278
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3665236234664917,
      "learning_rate": 0.0004981780994611682,
      "loss": 2.9667,
      "step": 62279
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8182313442230225,
      "learning_rate": 0.0004981750284792196,
      "loss": 2.9975,
      "step": 62280
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6184027194976807,
      "learning_rate": 0.0004981719574604264,
      "loss": 3.0597,
      "step": 62281
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1214005947113037,
      "learning_rate": 0.0004981688864047894,
      "loss": 3.4046,
      "step": 62282
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.715534210205078,
      "learning_rate": 0.0004981658153123092,
      "loss": 3.0779,
      "step": 62283
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.024122714996338,
      "learning_rate": 0.000498162744182986,
      "loss": 2.8295,
      "step": 62284
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9430763721466064,
      "learning_rate": 0.0004981596730168207,
      "loss": 3.0083,
      "step": 62285
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.6035470962524414,
      "learning_rate": 0.000498156601813814,
      "loss": 3.1928,
      "step": 62286
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2109763622283936,
      "learning_rate": 0.0004981535305739659,
      "loss": 2.9711,
      "step": 62287
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4653786420822144,
      "learning_rate": 0.0004981504592972774,
      "loss": 2.8566,
      "step": 62288
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2357394695281982,
      "learning_rate": 0.0004981473879837492,
      "loss": 2.8412,
      "step": 62289
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.999785304069519,
      "learning_rate": 0.0004981443166333814,
      "loss": 3.0084,
      "step": 62290
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3795262575149536,
      "learning_rate": 0.0004981412452461749,
      "loss": 2.9575,
      "step": 62291
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.351943254470825,
      "learning_rate": 0.0004981381738221304,
      "loss": 2.9336,
      "step": 62292
    },
    {
      "epoch": 0.81,
      "grad_norm": 4.345566749572754,
      "learning_rate": 0.000498135102361248,
      "loss": 2.8921,
      "step": 62293
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.647720217704773,
      "learning_rate": 0.0004981320308635286,
      "loss": 3.1593,
      "step": 62294
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3778562545776367,
      "learning_rate": 0.0004981289593289728,
      "loss": 2.9137,
      "step": 62295
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.294931173324585,
      "learning_rate": 0.0004981258877575811,
      "loss": 3.1043,
      "step": 62296
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7159945964813232,
      "learning_rate": 0.000498122816149354,
      "loss": 3.0636,
      "step": 62297
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8290749788284302,
      "learning_rate": 0.0004981197445042922,
      "loss": 2.9739,
      "step": 62298
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5941604375839233,
      "learning_rate": 0.000498116672822396,
      "loss": 3.2137,
      "step": 62299
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.474770188331604,
      "learning_rate": 0.0004981136011036664,
      "loss": 2.9454,
      "step": 62300
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8583130836486816,
      "learning_rate": 0.0004981105293481037,
      "loss": 3.0513,
      "step": 62301
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5855419635772705,
      "learning_rate": 0.0004981074575557084,
      "loss": 2.9313,
      "step": 62302
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4426673650741577,
      "learning_rate": 0.0004981043857264813,
      "loss": 3.0191,
      "step": 62303
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6636074781417847,
      "learning_rate": 0.0004981013138604228,
      "loss": 2.8905,
      "step": 62304
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6507846117019653,
      "learning_rate": 0.0004980982419575335,
      "loss": 2.7642,
      "step": 62305
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5154417753219604,
      "learning_rate": 0.000498095170017814,
      "loss": 3.3021,
      "step": 62306
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.7403526306152344,
      "learning_rate": 0.000498092098041265,
      "loss": 2.9551,
      "step": 62307
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7218571901321411,
      "learning_rate": 0.0004980890260278868,
      "loss": 2.9023,
      "step": 62308
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6608645915985107,
      "learning_rate": 0.0004980859539776802,
      "loss": 2.9592,
      "step": 62309
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6945562362670898,
      "learning_rate": 0.0004980828818906456,
      "loss": 2.9449,
      "step": 62310
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4374542236328125,
      "learning_rate": 0.0004980798097667837,
      "loss": 2.9559,
      "step": 62311
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3232344388961792,
      "learning_rate": 0.000498076737606095,
      "loss": 3.1422,
      "step": 62312
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3409639596939087,
      "learning_rate": 0.0004980736654085802,
      "loss": 3.0112,
      "step": 62313
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7003599405288696,
      "learning_rate": 0.0004980705931742396,
      "loss": 2.9948,
      "step": 62314
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6398491859436035,
      "learning_rate": 0.0004980675209030741,
      "loss": 3.1796,
      "step": 62315
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.460747241973877,
      "learning_rate": 0.0004980644485950839,
      "loss": 3.1129,
      "step": 62316
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6382131576538086,
      "learning_rate": 0.0004980613762502699,
      "loss": 3.289,
      "step": 62317
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8111153841018677,
      "learning_rate": 0.0004980583038686326,
      "loss": 2.9902,
      "step": 62318
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6335968971252441,
      "learning_rate": 0.0004980552314501725,
      "loss": 3.0282,
      "step": 62319
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.4577395915985107,
      "learning_rate": 0.0004980521589948901,
      "loss": 2.8668,
      "step": 62320
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.835099458694458,
      "learning_rate": 0.0004980490865027861,
      "loss": 2.9611,
      "step": 62321
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7687526941299438,
      "learning_rate": 0.0004980460139738612,
      "loss": 2.9356,
      "step": 62322
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7949817180633545,
      "learning_rate": 0.0004980429414081156,
      "loss": 3.0263,
      "step": 62323
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0336127281188965,
      "learning_rate": 0.0004980398688055501,
      "loss": 3.0587,
      "step": 62324
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.253235101699829,
      "learning_rate": 0.0004980367961661654,
      "loss": 2.9421,
      "step": 62325
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3314976692199707,
      "learning_rate": 0.0004980337234899619,
      "loss": 2.9285,
      "step": 62326
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4631239175796509,
      "learning_rate": 0.00049803065077694,
      "loss": 3.0747,
      "step": 62327
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0083420276641846,
      "learning_rate": 0.0004980275780271006,
      "loss": 3.1867,
      "step": 62328
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5652375221252441,
      "learning_rate": 0.0004980245052404442,
      "loss": 2.9656,
      "step": 62329
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7382644414901733,
      "learning_rate": 0.0004980214324169711,
      "loss": 2.9695,
      "step": 62330
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.192711591720581,
      "learning_rate": 0.0004980183595566823,
      "loss": 3.0186,
      "step": 62331
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1481406688690186,
      "learning_rate": 0.000498015286659578,
      "loss": 3.1429,
      "step": 62332
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6909594535827637,
      "learning_rate": 0.000498012213725659,
      "loss": 3.1701,
      "step": 62333
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.429673194885254,
      "learning_rate": 0.0004980091407549257,
      "loss": 3.0961,
      "step": 62334
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.230886936187744,
      "learning_rate": 0.0004980060677473789,
      "loss": 2.7178,
      "step": 62335
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.767528772354126,
      "learning_rate": 0.0004980029947030191,
      "loss": 2.8689,
      "step": 62336
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.733779788017273,
      "learning_rate": 0.0004979999216218466,
      "loss": 3.0616,
      "step": 62337
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5286005735397339,
      "learning_rate": 0.0004979968485038622,
      "loss": 2.9995,
      "step": 62338
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.737908363342285,
      "learning_rate": 0.0004979937753490665,
      "loss": 2.6645,
      "step": 62339
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8120973110198975,
      "learning_rate": 0.00049799070215746,
      "loss": 3.1388,
      "step": 62340
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2660439014434814,
      "learning_rate": 0.0004979876289290433,
      "loss": 2.6803,
      "step": 62341
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5573046207427979,
      "learning_rate": 0.000497984555663817,
      "loss": 3.1122,
      "step": 62342
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.544558048248291,
      "learning_rate": 0.0004979814823617816,
      "loss": 3.137,
      "step": 62343
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5076193809509277,
      "learning_rate": 0.0004979784090229377,
      "loss": 2.9808,
      "step": 62344
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.796208143234253,
      "learning_rate": 0.0004979753356472859,
      "loss": 2.9081,
      "step": 62345
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.122872829437256,
      "learning_rate": 0.0004979722622348267,
      "loss": 3.1698,
      "step": 62346
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6878993511199951,
      "learning_rate": 0.0004979691887855608,
      "loss": 3.1585,
      "step": 62347
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6861848831176758,
      "learning_rate": 0.0004979661152994886,
      "loss": 3.2269,
      "step": 62348
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9505091905593872,
      "learning_rate": 0.0004979630417766108,
      "loss": 3.3714,
      "step": 62349
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8068785667419434,
      "learning_rate": 0.0004979599682169279,
      "loss": 2.7414,
      "step": 62350
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.004561424255371,
      "learning_rate": 0.0004979568946204406,
      "loss": 3.1286,
      "step": 62351
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4155998229980469,
      "learning_rate": 0.0004979538209871493,
      "loss": 3.1062,
      "step": 62352
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7116360664367676,
      "learning_rate": 0.0004979507473170547,
      "loss": 3.2318,
      "step": 62353
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4604222774505615,
      "learning_rate": 0.0004979476736101573,
      "loss": 3.0733,
      "step": 62354
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.280883550643921,
      "learning_rate": 0.0004979445998664576,
      "loss": 2.8909,
      "step": 62355
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5463011264801025,
      "learning_rate": 0.0004979415260859564,
      "loss": 3.096,
      "step": 62356
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5202556848526,
      "learning_rate": 0.000497938452268654,
      "loss": 2.8847,
      "step": 62357
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5540803670883179,
      "learning_rate": 0.0004979353784145512,
      "loss": 2.8921,
      "step": 62358
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8084022998809814,
      "learning_rate": 0.0004979323045236486,
      "loss": 3.0354,
      "step": 62359
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5880851745605469,
      "learning_rate": 0.0004979292305959465,
      "loss": 3.2582,
      "step": 62360
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.419382095336914,
      "learning_rate": 0.0004979261566314454,
      "loss": 3.278,
      "step": 62361
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.59113347530365,
      "learning_rate": 0.0004979230826301465,
      "loss": 2.8177,
      "step": 62362
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.545627474784851,
      "learning_rate": 0.0004979200085920497,
      "loss": 3.3296,
      "step": 62363
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4027855396270752,
      "learning_rate": 0.0004979169345171559,
      "loss": 2.9429,
      "step": 62364
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5699412822723389,
      "learning_rate": 0.0004979138604054656,
      "loss": 3.2471,
      "step": 62365
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.063002347946167,
      "learning_rate": 0.0004979107862569795,
      "loss": 3.0537,
      "step": 62366
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8942183256149292,
      "learning_rate": 0.000497907712071698,
      "loss": 3.2351,
      "step": 62367
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.878388524055481,
      "learning_rate": 0.0004979046378496215,
      "loss": 3.1113,
      "step": 62368
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.620672583580017,
      "learning_rate": 0.000497901563590751,
      "loss": 2.9236,
      "step": 62369
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9366177320480347,
      "learning_rate": 0.0004978984892950869,
      "loss": 2.8653,
      "step": 62370
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4181753396987915,
      "learning_rate": 0.0004978954149626296,
      "loss": 2.9094,
      "step": 62371
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6967381238937378,
      "learning_rate": 0.0004978923405933799,
      "loss": 3.0145,
      "step": 62372
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7771672010421753,
      "learning_rate": 0.0004978892661873383,
      "loss": 3.0094,
      "step": 62373
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7907990217208862,
      "learning_rate": 0.0004978861917445053,
      "loss": 2.7641,
      "step": 62374
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8108389377593994,
      "learning_rate": 0.0004978831172648815,
      "loss": 3.3177,
      "step": 62375
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6107102632522583,
      "learning_rate": 0.0004978800427484674,
      "loss": 2.8454,
      "step": 62376
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5749366283416748,
      "learning_rate": 0.0004978769681952638,
      "loss": 3.076,
      "step": 62377
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.402799129486084,
      "learning_rate": 0.0004978738936052711,
      "loss": 2.8854,
      "step": 62378
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5966166257858276,
      "learning_rate": 0.0004978708189784899,
      "loss": 3.0471,
      "step": 62379
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.469216227531433,
      "learning_rate": 0.0004978677443149208,
      "loss": 2.9943,
      "step": 62380
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8915773630142212,
      "learning_rate": 0.0004978646696145644,
      "loss": 3.0776,
      "step": 62381
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5048468112945557,
      "learning_rate": 0.0004978615948774211,
      "loss": 3.1554,
      "step": 62382
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.013833522796631,
      "learning_rate": 0.0004978585201034917,
      "loss": 3.0144,
      "step": 62383
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6316615343093872,
      "learning_rate": 0.0004978554452927767,
      "loss": 3.1997,
      "step": 62384
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3776899576187134,
      "learning_rate": 0.0004978523704452766,
      "loss": 2.9894,
      "step": 62385
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.927822232246399,
      "learning_rate": 0.0004978492955609919,
      "loss": 3.1656,
      "step": 62386
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9495782852172852,
      "learning_rate": 0.0004978462206399234,
      "loss": 2.9873,
      "step": 62387
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5146795511245728,
      "learning_rate": 0.0004978431456820715,
      "loss": 2.9751,
      "step": 62388
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.8444597721099854,
      "learning_rate": 0.0004978400706874369,
      "loss": 3.1645,
      "step": 62389
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.057877779006958,
      "learning_rate": 0.0004978369956560199,
      "loss": 3.0112,
      "step": 62390
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9097654819488525,
      "learning_rate": 0.0004978339205878214,
      "loss": 3.1225,
      "step": 62391
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2161221504211426,
      "learning_rate": 0.0004978308454828419,
      "loss": 2.8808,
      "step": 62392
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.095766067504883,
      "learning_rate": 0.0004978277703410818,
      "loss": 2.9885,
      "step": 62393
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.310875177383423,
      "learning_rate": 0.0004978246951625418,
      "loss": 2.7434,
      "step": 62394
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5122851133346558,
      "learning_rate": 0.0004978216199472225,
      "loss": 3.0046,
      "step": 62395
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.8552093505859375,
      "learning_rate": 0.0004978185446951244,
      "loss": 2.9592,
      "step": 62396
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7977896928787231,
      "learning_rate": 0.0004978154694062481,
      "loss": 3.0008,
      "step": 62397
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.1336727142333984,
      "learning_rate": 0.0004978123940805943,
      "loss": 3.0611,
      "step": 62398
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8924109935760498,
      "learning_rate": 0.0004978093187181633,
      "loss": 3.009,
      "step": 62399
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9473077058792114,
      "learning_rate": 0.0004978062433189559,
      "loss": 3.05,
      "step": 62400
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.5517148971557617,
      "learning_rate": 0.0004978031678829725,
      "loss": 2.9093,
      "step": 62401
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8589187860488892,
      "learning_rate": 0.0004978000924102138,
      "loss": 2.9806,
      "step": 62402
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4532315731048584,
      "learning_rate": 0.0004977970169006803,
      "loss": 2.9231,
      "step": 62403
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.624231219291687,
      "learning_rate": 0.0004977939413543727,
      "loss": 3.139,
      "step": 62404
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.060884475708008,
      "learning_rate": 0.0004977908657712913,
      "loss": 2.9816,
      "step": 62405
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.536872386932373,
      "learning_rate": 0.0004977877901514369,
      "loss": 3.2991,
      "step": 62406
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3601092100143433,
      "learning_rate": 0.00049778471449481,
      "loss": 2.9727,
      "step": 62407
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.113219976425171,
      "learning_rate": 0.0004977816388014113,
      "loss": 3.0251,
      "step": 62408
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.9775443077087402,
      "learning_rate": 0.0004977785630712412,
      "loss": 3.1754,
      "step": 62409
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5010223388671875,
      "learning_rate": 0.0004977754873043003,
      "loss": 3.0944,
      "step": 62410
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1341652870178223,
      "learning_rate": 0.0004977724115005893,
      "loss": 3.038,
      "step": 62411
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3313872814178467,
      "learning_rate": 0.0004977693356601086,
      "loss": 2.9099,
      "step": 62412
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4613152742385864,
      "learning_rate": 0.0004977662597828589,
      "loss": 3.1281,
      "step": 62413
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7389394044876099,
      "learning_rate": 0.0004977631838688406,
      "loss": 2.9205,
      "step": 62414
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7081276178359985,
      "learning_rate": 0.0004977601079180545,
      "loss": 3.2458,
      "step": 62415
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.3334975242614746,
      "learning_rate": 0.000497757031930501,
      "loss": 3.532,
      "step": 62416
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.3550264835357666,
      "learning_rate": 0.0004977539559061807,
      "loss": 3.01,
      "step": 62417
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3281464576721191,
      "learning_rate": 0.0004977508798450944,
      "loss": 3.0236,
      "step": 62418
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2224440574645996,
      "learning_rate": 0.0004977478037472423,
      "loss": 3.0565,
      "step": 62419
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2181239128112793,
      "learning_rate": 0.0004977447276126251,
      "loss": 3.3257,
      "step": 62420
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.097529172897339,
      "learning_rate": 0.0004977416514412436,
      "loss": 3.1195,
      "step": 62421
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4652209281921387,
      "learning_rate": 0.0004977385752330981,
      "loss": 3.0837,
      "step": 62422
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.388463258743286,
      "learning_rate": 0.0004977354989881893,
      "loss": 2.9871,
      "step": 62423
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.0444388389587402,
      "learning_rate": 0.0004977324227065177,
      "loss": 3.0039,
      "step": 62424
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.904727816581726,
      "learning_rate": 0.0004977293463880839,
      "loss": 2.9672,
      "step": 62425
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7318525314331055,
      "learning_rate": 0.0004977262700328885,
      "loss": 3.1058,
      "step": 62426
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.8987655639648438,
      "learning_rate": 0.0004977231936409321,
      "loss": 3.1063,
      "step": 62427
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.6560330390930176,
      "learning_rate": 0.0004977201172122152,
      "loss": 2.8496,
      "step": 62428
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0296547412872314,
      "learning_rate": 0.0004977170407467384,
      "loss": 2.7323,
      "step": 62429
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9360278844833374,
      "learning_rate": 0.0004977139642445023,
      "loss": 2.9042,
      "step": 62430
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9416555166244507,
      "learning_rate": 0.0004977108877055074,
      "loss": 2.9657,
      "step": 62431
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0710551738739014,
      "learning_rate": 0.0004977078111297542,
      "loss": 3.0363,
      "step": 62432
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5984710454940796,
      "learning_rate": 0.0004977047345172435,
      "loss": 2.937,
      "step": 62433
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4752227067947388,
      "learning_rate": 0.0004977016578679758,
      "loss": 3.0442,
      "step": 62434
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.826547145843506,
      "learning_rate": 0.0004976985811819516,
      "loss": 3.0671,
      "step": 62435
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.9957616329193115,
      "learning_rate": 0.0004976955044591715,
      "loss": 2.9335,
      "step": 62436
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7856336832046509,
      "learning_rate": 0.0004976924276996361,
      "loss": 2.9082,
      "step": 62437
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.904691219329834,
      "learning_rate": 0.0004976893509033458,
      "loss": 2.92,
      "step": 62438
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1541497707366943,
      "learning_rate": 0.0004976862740703015,
      "loss": 2.9973,
      "step": 62439
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2811617851257324,
      "learning_rate": 0.0004976831972005035,
      "loss": 2.7736,
      "step": 62440
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6275066137313843,
      "learning_rate": 0.0004976801202939526,
      "loss": 3.0431,
      "step": 62441
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5571564435958862,
      "learning_rate": 0.000497677043350649,
      "loss": 3.017,
      "step": 62442
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5880006551742554,
      "learning_rate": 0.0004976739663705937,
      "loss": 2.7602,
      "step": 62443
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4071484804153442,
      "learning_rate": 0.000497670889353787,
      "loss": 2.8907,
      "step": 62444
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5081357955932617,
      "learning_rate": 0.0004976678123002295,
      "loss": 3.1689,
      "step": 62445
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4404356479644775,
      "learning_rate": 0.0004976647352099218,
      "loss": 2.9533,
      "step": 62446
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5891534090042114,
      "learning_rate": 0.0004976616580828646,
      "loss": 3.2055,
      "step": 62447
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.219766616821289,
      "learning_rate": 0.0004976585809190583,
      "loss": 3.1253,
      "step": 62448
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7769476175308228,
      "learning_rate": 0.0004976555037185036,
      "loss": 3.1912,
      "step": 62449
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2707302570343018,
      "learning_rate": 0.000497652426481201,
      "loss": 3.0997,
      "step": 62450
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.98567795753479,
      "learning_rate": 0.0004976493492071511,
      "loss": 2.8494,
      "step": 62451
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3864823579788208,
      "learning_rate": 0.0004976462718963544,
      "loss": 3.0723,
      "step": 62452
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.152414560317993,
      "learning_rate": 0.0004976431945488116,
      "loss": 2.8984,
      "step": 62453
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7941315174102783,
      "learning_rate": 0.000497640117164523,
      "loss": 2.906,
      "step": 62454
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0574495792388916,
      "learning_rate": 0.0004976370397434896,
      "loss": 2.9945,
      "step": 62455
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6531221866607666,
      "learning_rate": 0.0004976339622857117,
      "loss": 2.8682,
      "step": 62456
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.211193323135376,
      "learning_rate": 0.0004976308847911898,
      "loss": 3.0882,
      "step": 62457
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8720388412475586,
      "learning_rate": 0.0004976278072599248,
      "loss": 2.9977,
      "step": 62458
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4673775434494019,
      "learning_rate": 0.0004976247296919169,
      "loss": 2.9832,
      "step": 62459
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9967010021209717,
      "learning_rate": 0.0004976216520871669,
      "loss": 3.0852,
      "step": 62460
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.3197576999664307,
      "learning_rate": 0.0004976185744456752,
      "loss": 3.0937,
      "step": 62461
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.892776608467102,
      "learning_rate": 0.0004976154967674425,
      "loss": 3.2357,
      "step": 62462
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4831839799880981,
      "learning_rate": 0.0004976124190524694,
      "loss": 3.1764,
      "step": 62463
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2724881172180176,
      "learning_rate": 0.0004976093413007564,
      "loss": 3.0848,
      "step": 62464
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0035948753356934,
      "learning_rate": 0.0004976062635123041,
      "loss": 3.0762,
      "step": 62465
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6910966634750366,
      "learning_rate": 0.000497603185687113,
      "loss": 3.0429,
      "step": 62466
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5541346073150635,
      "learning_rate": 0.0004976001078251838,
      "loss": 3.0343,
      "step": 62467
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.769268274307251,
      "learning_rate": 0.0004975970299265171,
      "loss": 2.931,
      "step": 62468
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1053502559661865,
      "learning_rate": 0.0004975939519911134,
      "loss": 2.945,
      "step": 62469
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5021353960037231,
      "learning_rate": 0.0004975908740189731,
      "loss": 3.1818,
      "step": 62470
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5621142387390137,
      "learning_rate": 0.000497587796010097,
      "loss": 2.9053,
      "step": 62471
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3728361129760742,
      "learning_rate": 0.0004975847179644855,
      "loss": 3.0433,
      "step": 62472
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4426023960113525,
      "learning_rate": 0.0004975816398821393,
      "loss": 2.8724,
      "step": 62473
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3714640140533447,
      "learning_rate": 0.0004975785617630591,
      "loss": 2.8497,
      "step": 62474
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2609853744506836,
      "learning_rate": 0.0004975754836072452,
      "loss": 3.0596,
      "step": 62475
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.162942886352539,
      "learning_rate": 0.0004975724054146983,
      "loss": 2.9284,
      "step": 62476
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4371589422225952,
      "learning_rate": 0.000497569327185419,
      "loss": 3.2228,
      "step": 62477
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4952999353408813,
      "learning_rate": 0.0004975662489194077,
      "loss": 2.8713,
      "step": 62478
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.0437088012695312,
      "learning_rate": 0.0004975631706166652,
      "loss": 3.0999,
      "step": 62479
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8647751808166504,
      "learning_rate": 0.000497560092277192,
      "loss": 3.1021,
      "step": 62480
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6524708271026611,
      "learning_rate": 0.0004975570139009887,
      "loss": 3.0079,
      "step": 62481
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7295033931732178,
      "learning_rate": 0.0004975539354880556,
      "loss": 3.0815,
      "step": 62482
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9521039724349976,
      "learning_rate": 0.0004975508570383937,
      "loss": 3.1418,
      "step": 62483
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.408856987953186,
      "learning_rate": 0.0004975477785520033,
      "loss": 3.239,
      "step": 62484
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5147290229797363,
      "learning_rate": 0.0004975447000288851,
      "loss": 3.1022,
      "step": 62485
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6111921072006226,
      "learning_rate": 0.0004975416214690395,
      "loss": 3.0935,
      "step": 62486
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0919077396392822,
      "learning_rate": 0.0004975385428724673,
      "loss": 2.8401,
      "step": 62487
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.584981083869934,
      "learning_rate": 0.0004975354642391688,
      "loss": 2.9398,
      "step": 62488
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6818420886993408,
      "learning_rate": 0.0004975323855691448,
      "loss": 3.1758,
      "step": 62489
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8485782146453857,
      "learning_rate": 0.0004975293068623959,
      "loss": 3.1326,
      "step": 62490
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6659120321273804,
      "learning_rate": 0.0004975262281189225,
      "loss": 2.895,
      "step": 62491
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0026819705963135,
      "learning_rate": 0.0004975231493387252,
      "loss": 3.1897,
      "step": 62492
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1351583003997803,
      "learning_rate": 0.0004975200705218048,
      "loss": 3.1134,
      "step": 62493
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3368005752563477,
      "learning_rate": 0.0004975169916681614,
      "loss": 3.0562,
      "step": 62494
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9863768815994263,
      "learning_rate": 0.000497513912777796,
      "loss": 3.0311,
      "step": 62495
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0581107139587402,
      "learning_rate": 0.0004975108338507091,
      "loss": 3.0814,
      "step": 62496
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.621576189994812,
      "learning_rate": 0.0004975077548869011,
      "loss": 3.1703,
      "step": 62497
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6194366216659546,
      "learning_rate": 0.0004975046758863729,
      "loss": 3.0634,
      "step": 62498
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.0368552207946777,
      "learning_rate": 0.0004975015968491246,
      "loss": 2.9749,
      "step": 62499
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.2382954359054565,
      "learning_rate": 0.000497498517775157,
      "loss": 2.9221,
      "step": 62500
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4770057201385498,
      "learning_rate": 0.0004974954386644709,
      "loss": 3.0149,
      "step": 62501
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.75825834274292,
      "learning_rate": 0.0004974923595170666,
      "loss": 2.8468,
      "step": 62502
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.098445415496826,
      "learning_rate": 0.0004974892803329447,
      "loss": 3.2471,
      "step": 62503
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8551652431488037,
      "learning_rate": 0.0004974862011121059,
      "loss": 2.9412,
      "step": 62504
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.573945999145508,
      "learning_rate": 0.0004974831218545506,
      "loss": 2.8668,
      "step": 62505
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.311753034591675,
      "learning_rate": 0.0004974800425602795,
      "loss": 3.2617,
      "step": 62506
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1809425354003906,
      "learning_rate": 0.000497476963229293,
      "loss": 3.2091,
      "step": 62507
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7166303396224976,
      "learning_rate": 0.0004974738838615921,
      "loss": 3.0183,
      "step": 62508
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8398844003677368,
      "learning_rate": 0.0004974708044571768,
      "loss": 3.0267,
      "step": 62509
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.377220869064331,
      "learning_rate": 0.0004974677250160481,
      "loss": 3.1194,
      "step": 62510
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.1358208656311035,
      "learning_rate": 0.0004974646455382064,
      "loss": 2.9693,
      "step": 62511
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6420904397964478,
      "learning_rate": 0.0004974615660236523,
      "loss": 3.0222,
      "step": 62512
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7606600522994995,
      "learning_rate": 0.0004974584864723862,
      "loss": 2.8163,
      "step": 62513
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4885581731796265,
      "learning_rate": 0.000497455406884409,
      "loss": 3.1572,
      "step": 62514
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2706282138824463,
      "learning_rate": 0.0004974523272597212,
      "loss": 2.9403,
      "step": 62515
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1541857719421387,
      "learning_rate": 0.000497449247598323,
      "loss": 3.2133,
      "step": 62516
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5669405460357666,
      "learning_rate": 0.0004974461679002156,
      "loss": 3.1072,
      "step": 62517
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9183052778244019,
      "learning_rate": 0.0004974430881653989,
      "loss": 3.1513,
      "step": 62518
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.574918031692505,
      "learning_rate": 0.0004974400083938741,
      "loss": 3.043,
      "step": 62519
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.062941074371338,
      "learning_rate": 0.0004974369285856413,
      "loss": 2.9326,
      "step": 62520
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5143979787826538,
      "learning_rate": 0.0004974338487407012,
      "loss": 3.1906,
      "step": 62521
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.317976713180542,
      "learning_rate": 0.0004974307688590545,
      "loss": 3.1022,
      "step": 62522
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.665257453918457,
      "learning_rate": 0.0004974276889407018,
      "loss": 2.9105,
      "step": 62523
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1078543663024902,
      "learning_rate": 0.0004974246089856435,
      "loss": 2.9522,
      "step": 62524
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6861971616744995,
      "learning_rate": 0.0004974215289938801,
      "loss": 2.8599,
      "step": 62525
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.512157917022705,
      "learning_rate": 0.0004974184489654125,
      "loss": 2.8869,
      "step": 62526
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.9559268951416016,
      "learning_rate": 0.0004974153689002409,
      "loss": 3.0548,
      "step": 62527
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9477579593658447,
      "learning_rate": 0.0004974122887983662,
      "loss": 2.9837,
      "step": 62528
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.9047441482543945,
      "learning_rate": 0.0004974092086597889,
      "loss": 3.1482,
      "step": 62529
    },
    {
      "epoch": 0.81,
      "grad_norm": 3.856745958328247,
      "learning_rate": 0.0004974061284845093,
      "loss": 3.0366,
      "step": 62530
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.653093099594116,
      "learning_rate": 0.0004974030482725282,
      "loss": 2.9986,
      "step": 62531
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.835628628730774,
      "learning_rate": 0.0004973999680238463,
      "loss": 2.9047,
      "step": 62532
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2891366481781006,
      "learning_rate": 0.0004973968877384639,
      "loss": 3.0821,
      "step": 62533
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.3235056400299072,
      "learning_rate": 0.0004973938074163817,
      "loss": 3.0769,
      "step": 62534
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6764079332351685,
      "learning_rate": 0.0004973907270576004,
      "loss": 3.0051,
      "step": 62535
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1920056343078613,
      "learning_rate": 0.0004973876466621202,
      "loss": 3.1098,
      "step": 62536
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5927448272705078,
      "learning_rate": 0.0004973845662299421,
      "loss": 3.1386,
      "step": 62537
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1569182872772217,
      "learning_rate": 0.0004973814857610664,
      "loss": 3.2856,
      "step": 62538
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.061469793319702,
      "learning_rate": 0.0004973784052554938,
      "loss": 3.2947,
      "step": 62539
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.534485936164856,
      "learning_rate": 0.0004973753247132247,
      "loss": 2.8931,
      "step": 62540
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.460915207862854,
      "learning_rate": 0.0004973722441342601,
      "loss": 3.2694,
      "step": 62541
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.9265127182006836,
      "learning_rate": 0.0004973691635186,
      "loss": 3.143,
      "step": 62542
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.8783187866210938,
      "learning_rate": 0.0004973660828662453,
      "loss": 3.0152,
      "step": 62543
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5970594882965088,
      "learning_rate": 0.0004973630021771966,
      "loss": 3.0631,
      "step": 62544
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9543695449829102,
      "learning_rate": 0.0004973599214514543,
      "loss": 2.8276,
      "step": 62545
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9915074110031128,
      "learning_rate": 0.0004973568406890191,
      "loss": 3.1732,
      "step": 62546
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5967808961868286,
      "learning_rate": 0.0004973537598898915,
      "loss": 3.1062,
      "step": 62547
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.624572992324829,
      "learning_rate": 0.0004973506790540722,
      "loss": 3.0184,
      "step": 62548
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6188910007476807,
      "learning_rate": 0.0004973475981815616,
      "loss": 3.0473,
      "step": 62549
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4365122318267822,
      "learning_rate": 0.0004973445172723604,
      "loss": 3.1573,
      "step": 62550
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.365950345993042,
      "learning_rate": 0.0004973414363264692,
      "loss": 3.1461,
      "step": 62551
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6061402559280396,
      "learning_rate": 0.0004973383553438884,
      "loss": 3.1972,
      "step": 62552
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.71306312084198,
      "learning_rate": 0.0004973352743246187,
      "loss": 3.0986,
      "step": 62553
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.230602502822876,
      "learning_rate": 0.0004973321932686606,
      "loss": 2.9234,
      "step": 62554
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.670192003250122,
      "learning_rate": 0.0004973291121760147,
      "loss": 3.0407,
      "step": 62555
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8878806829452515,
      "learning_rate": 0.0004973260310466817,
      "loss": 3.3118,
      "step": 62556
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7902570962905884,
      "learning_rate": 0.0004973229498806621,
      "loss": 2.9198,
      "step": 62557
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3623058795928955,
      "learning_rate": 0.0004973198686779563,
      "loss": 2.9813,
      "step": 62558
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.786644697189331,
      "learning_rate": 0.000497316787438565,
      "loss": 2.8524,
      "step": 62559
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7394064664840698,
      "learning_rate": 0.0004973137061624889,
      "loss": 2.9607,
      "step": 62560
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.163105010986328,
      "learning_rate": 0.0004973106248497285,
      "loss": 3.144,
      "step": 62561
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7548959255218506,
      "learning_rate": 0.0004973075435002843,
      "loss": 2.9098,
      "step": 62562
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7043644189834595,
      "learning_rate": 0.0004973044621141568,
      "loss": 2.8906,
      "step": 62563
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7651652097702026,
      "learning_rate": 0.0004973013806913466,
      "loss": 3.0862,
      "step": 62564
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.3482506275177002,
      "learning_rate": 0.0004972982992318545,
      "loss": 2.9291,
      "step": 62565
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4702720642089844,
      "learning_rate": 0.000497295217735681,
      "loss": 3.1424,
      "step": 62566
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.70073664188385,
      "learning_rate": 0.0004972921362028265,
      "loss": 3.2308,
      "step": 62567
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7995460033416748,
      "learning_rate": 0.0004972890546332916,
      "loss": 3.2509,
      "step": 62568
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6050418615341187,
      "learning_rate": 0.0004972859730270771,
      "loss": 2.8534,
      "step": 62569
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6163663864135742,
      "learning_rate": 0.0004972828913841832,
      "loss": 3.0269,
      "step": 62570
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.9124865531921387,
      "learning_rate": 0.0004972798097046108,
      "loss": 3.1642,
      "step": 62571
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.594916820526123,
      "learning_rate": 0.0004972767279883604,
      "loss": 3.1306,
      "step": 62572
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.597764253616333,
      "learning_rate": 0.0004972736462354325,
      "loss": 3.079,
      "step": 62573
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7889245748519897,
      "learning_rate": 0.0004972705644458277,
      "loss": 2.9462,
      "step": 62574
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.4124737977981567,
      "learning_rate": 0.0004972674826195466,
      "loss": 3.1609,
      "step": 62575
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.6924697160720825,
      "learning_rate": 0.0004972644007565897,
      "loss": 3.1607,
      "step": 62576
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.5053327083587646,
      "learning_rate": 0.0004972613188569576,
      "loss": 3.3257,
      "step": 62577
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9931666851043701,
      "learning_rate": 0.0004972582369206511,
      "loss": 2.7982,
      "step": 62578
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8733609914779663,
      "learning_rate": 0.0004972551549476703,
      "loss": 3.1028,
      "step": 62579
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7502470016479492,
      "learning_rate": 0.0004972520729380161,
      "loss": 2.9875,
      "step": 62580
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9743417501449585,
      "learning_rate": 0.0004972489908916891,
      "loss": 2.8878,
      "step": 62581
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.2099838256835938,
      "learning_rate": 0.0004972459088086898,
      "loss": 2.9535,
      "step": 62582
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.984098196029663,
      "learning_rate": 0.0004972428266890186,
      "loss": 3.0586,
      "step": 62583
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.1164042949676514,
      "learning_rate": 0.0004972397445326765,
      "loss": 3.076,
      "step": 62584
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8289909362792969,
      "learning_rate": 0.0004972366623396637,
      "loss": 3.0621,
      "step": 62585
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.8162037134170532,
      "learning_rate": 0.0004972335801099808,
      "loss": 3.1137,
      "step": 62586
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.7633986473083496,
      "learning_rate": 0.0004972304978436285,
      "loss": 3.0234,
      "step": 62587
    },
    {
      "epoch": 0.81,
      "grad_norm": 2.535957098007202,
      "learning_rate": 0.0004972274155406074,
      "loss": 3.0206,
      "step": 62588
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.820907473564148,
      "learning_rate": 0.0004972243332009179,
      "loss": 2.8968,
      "step": 62589
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.5408166646957397,
      "learning_rate": 0.0004972212508245607,
      "loss": 3.2165,
      "step": 62590
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.7212077379226685,
      "learning_rate": 0.0004972181684115364,
      "loss": 2.962,
      "step": 62591
    },
    {
      "epoch": 0.81,
      "grad_norm": 1.9198992252349854,
      "learning_rate": 0.0004972150859618455,
      "loss": 3.0851,
      "step": 62592
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5882835388183594,
      "learning_rate": 0.0004972120034754886,
      "loss": 3.2414,
      "step": 62593
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6449111700057983,
      "learning_rate": 0.0004972089209524662,
      "loss": 3.1093,
      "step": 62594
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0797486305236816,
      "learning_rate": 0.0004972058383927789,
      "loss": 2.961,
      "step": 62595
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6884654760360718,
      "learning_rate": 0.0004972027557964274,
      "loss": 3.0018,
      "step": 62596
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.928629755973816,
      "learning_rate": 0.0004971996731634122,
      "loss": 3.0103,
      "step": 62597
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8231946229934692,
      "learning_rate": 0.0004971965904937339,
      "loss": 3.063,
      "step": 62598
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.554447889328003,
      "learning_rate": 0.0004971935077873928,
      "loss": 3.0814,
      "step": 62599
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.751796841621399,
      "learning_rate": 0.00049719042504439,
      "loss": 2.8736,
      "step": 62600
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1774704456329346,
      "learning_rate": 0.0004971873422647256,
      "loss": 3.1589,
      "step": 62601
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5579358339309692,
      "learning_rate": 0.0004971842594484004,
      "loss": 2.9741,
      "step": 62602
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6699286699295044,
      "learning_rate": 0.0004971811765954149,
      "loss": 3.1501,
      "step": 62603
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3840519189834595,
      "learning_rate": 0.0004971780937057697,
      "loss": 2.8837,
      "step": 62604
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7657910585403442,
      "learning_rate": 0.0004971750107794654,
      "loss": 2.8658,
      "step": 62605
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5545933246612549,
      "learning_rate": 0.0004971719278165025,
      "loss": 2.8288,
      "step": 62606
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4931443929672241,
      "learning_rate": 0.0004971688448168817,
      "loss": 2.9262,
      "step": 62607
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.6368415355682373,
      "learning_rate": 0.0004971657617806035,
      "loss": 3.1421,
      "step": 62608
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.409525156021118,
      "learning_rate": 0.0004971626787076683,
      "loss": 3.0828,
      "step": 62609
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6492187976837158,
      "learning_rate": 0.000497159595598077,
      "loss": 2.9291,
      "step": 62610
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7071893215179443,
      "learning_rate": 0.00049715651245183,
      "loss": 3.1671,
      "step": 62611
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2488090991973877,
      "learning_rate": 0.0004971534292689278,
      "loss": 3.2306,
      "step": 62612
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.86655855178833,
      "learning_rate": 0.000497150346049371,
      "loss": 2.9745,
      "step": 62613
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6546440124511719,
      "learning_rate": 0.0004971472627931603,
      "loss": 2.9359,
      "step": 62614
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.320399284362793,
      "learning_rate": 0.0004971441795002962,
      "loss": 3.1893,
      "step": 62615
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.028085470199585,
      "learning_rate": 0.0004971410961707793,
      "loss": 3.1527,
      "step": 62616
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6930301189422607,
      "learning_rate": 0.0004971380128046102,
      "loss": 3.0017,
      "step": 62617
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0764458179473877,
      "learning_rate": 0.0004971349294017893,
      "loss": 2.9003,
      "step": 62618
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5293445587158203,
      "learning_rate": 0.0004971318459623174,
      "loss": 3.2525,
      "step": 62619
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.489629864692688,
      "learning_rate": 0.000497128762486195,
      "loss": 2.7166,
      "step": 62620
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9348797798156738,
      "learning_rate": 0.0004971256789734226,
      "loss": 3.1306,
      "step": 62621
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5057374238967896,
      "learning_rate": 0.0004971225954240007,
      "loss": 2.9852,
      "step": 62622
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8662399053573608,
      "learning_rate": 0.00049711951183793,
      "loss": 2.7355,
      "step": 62623
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2417614459991455,
      "learning_rate": 0.0004971164282152113,
      "loss": 2.9669,
      "step": 62624
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4693278074264526,
      "learning_rate": 0.0004971133445558447,
      "loss": 2.9801,
      "step": 62625
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0298070907592773,
      "learning_rate": 0.000497110260859831,
      "loss": 3.0221,
      "step": 62626
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.686629056930542,
      "learning_rate": 0.0004971071771271709,
      "loss": 3.0764,
      "step": 62627
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7381948232650757,
      "learning_rate": 0.0004971040933578648,
      "loss": 2.9375,
      "step": 62628
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.914811611175537,
      "learning_rate": 0.0004971010095519134,
      "loss": 2.8988,
      "step": 62629
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9469510316848755,
      "learning_rate": 0.0004970979257093172,
      "loss": 2.9114,
      "step": 62630
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6585865020751953,
      "learning_rate": 0.0004970948418300767,
      "loss": 3.1812,
      "step": 62631
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0323293209075928,
      "learning_rate": 0.0004970917579141925,
      "loss": 3.1253,
      "step": 62632
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.0527188777923584,
      "learning_rate": 0.0004970886739616653,
      "loss": 2.8873,
      "step": 62633
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2775368690490723,
      "learning_rate": 0.0004970855899724955,
      "loss": 2.9458,
      "step": 62634
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.805507779121399,
      "learning_rate": 0.0004970825059466839,
      "loss": 3.1893,
      "step": 62635
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3934310674667358,
      "learning_rate": 0.000497079421884231,
      "loss": 3.0702,
      "step": 62636
    },
    {
      "epoch": 0.82,
      "grad_norm": 4.006123065948486,
      "learning_rate": 0.0004970763377851372,
      "loss": 3.1703,
      "step": 62637
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0189974308013916,
      "learning_rate": 0.0004970732536494031,
      "loss": 2.8896,
      "step": 62638
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6516691446304321,
      "learning_rate": 0.0004970701694770295,
      "loss": 2.9715,
      "step": 62639
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9581559896469116,
      "learning_rate": 0.0004970670852680168,
      "loss": 2.9156,
      "step": 62640
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9811912775039673,
      "learning_rate": 0.0004970640010223655,
      "loss": 3.0823,
      "step": 62641
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.563800573348999,
      "learning_rate": 0.0004970609167400764,
      "loss": 2.9485,
      "step": 62642
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5129213333129883,
      "learning_rate": 0.00049705783242115,
      "loss": 3.1429,
      "step": 62643
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.479413390159607,
      "learning_rate": 0.0004970547480655867,
      "loss": 2.9482,
      "step": 62644
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0819056034088135,
      "learning_rate": 0.0004970516636733874,
      "loss": 3.0414,
      "step": 62645
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.847550868988037,
      "learning_rate": 0.0004970485792445523,
      "loss": 3.2517,
      "step": 62646
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.308668613433838,
      "learning_rate": 0.0004970454947790823,
      "loss": 3.444,
      "step": 62647
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1742570400238037,
      "learning_rate": 0.0004970424102769776,
      "loss": 2.9919,
      "step": 62648
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.431197166442871,
      "learning_rate": 0.0004970393257382391,
      "loss": 2.9533,
      "step": 62649
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4356433153152466,
      "learning_rate": 0.0004970362411628673,
      "loss": 2.801,
      "step": 62650
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6635562181472778,
      "learning_rate": 0.0004970331565508628,
      "loss": 3.1294,
      "step": 62651
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4859169721603394,
      "learning_rate": 0.0004970300719022261,
      "loss": 3.0112,
      "step": 62652
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5515849590301514,
      "learning_rate": 0.0004970269872169576,
      "loss": 3.0165,
      "step": 62653
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2656216621398926,
      "learning_rate": 0.0004970239024950583,
      "loss": 3.0536,
      "step": 62654
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6824499368667603,
      "learning_rate": 0.0004970208177365285,
      "loss": 3.2457,
      "step": 62655
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7718532085418701,
      "learning_rate": 0.0004970177329413687,
      "loss": 3.0196,
      "step": 62656
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5443371534347534,
      "learning_rate": 0.0004970146481095797,
      "loss": 3.0678,
      "step": 62657
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.582322597503662,
      "learning_rate": 0.0004970115632411618,
      "loss": 3.1103,
      "step": 62658
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.561293601989746,
      "learning_rate": 0.0004970084783361158,
      "loss": 2.5531,
      "step": 62659
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6112459897994995,
      "learning_rate": 0.0004970053933944423,
      "loss": 3.0058,
      "step": 62660
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7566028833389282,
      "learning_rate": 0.0004970023084161417,
      "loss": 3.1595,
      "step": 62661
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9510810375213623,
      "learning_rate": 0.0004969992234012147,
      "loss": 3.0836,
      "step": 62662
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6356550455093384,
      "learning_rate": 0.0004969961383496618,
      "loss": 2.9493,
      "step": 62663
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.973151206970215,
      "learning_rate": 0.0004969930532614836,
      "loss": 2.7822,
      "step": 62664
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.4085843563079834,
      "learning_rate": 0.0004969899681366806,
      "loss": 2.8283,
      "step": 62665
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9131451845169067,
      "learning_rate": 0.0004969868829752535,
      "loss": 2.9286,
      "step": 62666
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6305983066558838,
      "learning_rate": 0.0004969837977772028,
      "loss": 2.8412,
      "step": 62667
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2935638427734375,
      "learning_rate": 0.0004969807125425292,
      "loss": 2.9994,
      "step": 62668
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8692859411239624,
      "learning_rate": 0.000496977627271233,
      "loss": 2.827,
      "step": 62669
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4970142841339111,
      "learning_rate": 0.0004969745419633151,
      "loss": 2.941,
      "step": 62670
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.074427604675293,
      "learning_rate": 0.0004969714566187758,
      "loss": 3.0819,
      "step": 62671
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.733851432800293,
      "learning_rate": 0.0004969683712376159,
      "loss": 2.8201,
      "step": 62672
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8996391296386719,
      "learning_rate": 0.0004969652858198358,
      "loss": 3.1478,
      "step": 62673
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5203734636306763,
      "learning_rate": 0.0004969622003654361,
      "loss": 3.133,
      "step": 62674
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.061466932296753,
      "learning_rate": 0.0004969591148744174,
      "loss": 3.1683,
      "step": 62675
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6018339395523071,
      "learning_rate": 0.0004969560293467803,
      "loss": 3.2458,
      "step": 62676
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6177423000335693,
      "learning_rate": 0.0004969529437825254,
      "loss": 3.1994,
      "step": 62677
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5183361768722534,
      "learning_rate": 0.0004969498581816532,
      "loss": 2.9724,
      "step": 62678
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.181792736053467,
      "learning_rate": 0.0004969467725441644,
      "loss": 3.0414,
      "step": 62679
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6784435510635376,
      "learning_rate": 0.0004969436868700593,
      "loss": 3.2553,
      "step": 62680
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5916942358016968,
      "learning_rate": 0.0004969406011593389,
      "loss": 2.9004,
      "step": 62681
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8841588497161865,
      "learning_rate": 0.0004969375154120033,
      "loss": 2.9935,
      "step": 62682
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.284846782684326,
      "learning_rate": 0.0004969344296280534,
      "loss": 3.1004,
      "step": 62683
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5320050716400146,
      "learning_rate": 0.0004969313438074896,
      "loss": 3.2736,
      "step": 62684
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8712795972824097,
      "learning_rate": 0.0004969282579503126,
      "loss": 3.0784,
      "step": 62685
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7579880952835083,
      "learning_rate": 0.0004969251720565227,
      "loss": 2.8598,
      "step": 62686
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8112996816635132,
      "learning_rate": 0.000496922086126121,
      "loss": 3.219,
      "step": 62687
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0276830196380615,
      "learning_rate": 0.0004969190001591077,
      "loss": 3.1856,
      "step": 62688
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8845906257629395,
      "learning_rate": 0.0004969159141554834,
      "loss": 3.2709,
      "step": 62689
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.761285662651062,
      "learning_rate": 0.0004969128281152488,
      "loss": 3.1317,
      "step": 62690
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5107159614562988,
      "learning_rate": 0.0004969097420384043,
      "loss": 3.0568,
      "step": 62691
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5979297161102295,
      "learning_rate": 0.0004969066559249505,
      "loss": 2.9729,
      "step": 62692
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4061579704284668,
      "learning_rate": 0.0004969035697748881,
      "loss": 3.2168,
      "step": 62693
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4207441806793213,
      "learning_rate": 0.0004969004835882176,
      "loss": 3.064,
      "step": 62694
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.027574300765991,
      "learning_rate": 0.0004968973973649396,
      "loss": 3.0509,
      "step": 62695
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.769739031791687,
      "learning_rate": 0.0004968943111050546,
      "loss": 2.901,
      "step": 62696
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.635353446006775,
      "learning_rate": 0.0004968912248085633,
      "loss": 2.8574,
      "step": 62697
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6874727010726929,
      "learning_rate": 0.0004968881384754662,
      "loss": 2.8255,
      "step": 62698
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7530583143234253,
      "learning_rate": 0.0004968850521057639,
      "loss": 3.1472,
      "step": 62699
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5646703243255615,
      "learning_rate": 0.0004968819656994568,
      "loss": 3.1731,
      "step": 62700
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7893580198287964,
      "learning_rate": 0.0004968788792565458,
      "loss": 2.9253,
      "step": 62701
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6711666584014893,
      "learning_rate": 0.0004968757927770312,
      "loss": 3.0598,
      "step": 62702
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4910129308700562,
      "learning_rate": 0.0004968727062609138,
      "loss": 3.2004,
      "step": 62703
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4847769737243652,
      "learning_rate": 0.000496869619708194,
      "loss": 2.88,
      "step": 62704
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4517061710357666,
      "learning_rate": 0.0004968665331188723,
      "loss": 2.9668,
      "step": 62705
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1451308727264404,
      "learning_rate": 0.0004968634464929495,
      "loss": 3.0423,
      "step": 62706
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5634980201721191,
      "learning_rate": 0.0004968603598304261,
      "loss": 2.727,
      "step": 62707
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5810840129852295,
      "learning_rate": 0.0004968572731313026,
      "loss": 3.0015,
      "step": 62708
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8800941705703735,
      "learning_rate": 0.0004968541863955796,
      "loss": 3.2374,
      "step": 62709
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4466643333435059,
      "learning_rate": 0.0004968510996232578,
      "loss": 2.6681,
      "step": 62710
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.477349877357483,
      "learning_rate": 0.0004968480128143374,
      "loss": 2.996,
      "step": 62711
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4064525365829468,
      "learning_rate": 0.0004968449259688195,
      "loss": 3.0934,
      "step": 62712
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3431001901626587,
      "learning_rate": 0.0004968418390867042,
      "loss": 3.0283,
      "step": 62713
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.980589747428894,
      "learning_rate": 0.0004968387521679925,
      "loss": 3.1141,
      "step": 62714
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4425932168960571,
      "learning_rate": 0.0004968356652126847,
      "loss": 3.0036,
      "step": 62715
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.46329927444458,
      "learning_rate": 0.0004968325782207814,
      "loss": 3.2292,
      "step": 62716
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6434481143951416,
      "learning_rate": 0.0004968294911922832,
      "loss": 3.2323,
      "step": 62717
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.641492247581482,
      "learning_rate": 0.0004968264041271907,
      "loss": 3.1169,
      "step": 62718
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.473759651184082,
      "learning_rate": 0.0004968233170255045,
      "loss": 3.0373,
      "step": 62719
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4127370119094849,
      "learning_rate": 0.000496820229887225,
      "loss": 2.8551,
      "step": 62720
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8297756910324097,
      "learning_rate": 0.000496817142712353,
      "loss": 3.0041,
      "step": 62721
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6667418479919434,
      "learning_rate": 0.0004968140555008889,
      "loss": 3.2971,
      "step": 62722
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6175737380981445,
      "learning_rate": 0.0004968109682528335,
      "loss": 3.1055,
      "step": 62723
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9094730615615845,
      "learning_rate": 0.0004968078809681871,
      "loss": 3.1085,
      "step": 62724
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.816741704940796,
      "learning_rate": 0.0004968047936469503,
      "loss": 2.9918,
      "step": 62725
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.9807395935058594,
      "learning_rate": 0.000496801706289124,
      "loss": 2.8508,
      "step": 62726
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1125314235687256,
      "learning_rate": 0.0004967986188947085,
      "loss": 3.2492,
      "step": 62727
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6603326797485352,
      "learning_rate": 0.0004967955314637044,
      "loss": 2.9045,
      "step": 62728
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.923738479614258,
      "learning_rate": 0.0004967924439961123,
      "loss": 3.2027,
      "step": 62729
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.61642587184906,
      "learning_rate": 0.0004967893564919327,
      "loss": 3.0681,
      "step": 62730
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.464697003364563,
      "learning_rate": 0.0004967862689511663,
      "loss": 3.0494,
      "step": 62731
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.3283843994140625,
      "learning_rate": 0.0004967831813738136,
      "loss": 3.1395,
      "step": 62732
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4703348875045776,
      "learning_rate": 0.0004967800937598752,
      "loss": 2.982,
      "step": 62733
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6346478462219238,
      "learning_rate": 0.0004967770061093516,
      "loss": 2.9719,
      "step": 62734
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7839475870132446,
      "learning_rate": 0.0004967739184222436,
      "loss": 2.9184,
      "step": 62735
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3575799465179443,
      "learning_rate": 0.0004967708306985515,
      "loss": 2.7966,
      "step": 62736
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.380974292755127,
      "learning_rate": 0.000496767742938276,
      "loss": 2.9908,
      "step": 62737
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.494346022605896,
      "learning_rate": 0.0004967646551414176,
      "loss": 3.0956,
      "step": 62738
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7059428691864014,
      "learning_rate": 0.000496761567307977,
      "loss": 2.9037,
      "step": 62739
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9212726354599,
      "learning_rate": 0.0004967584794379548,
      "loss": 2.8167,
      "step": 62740
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.586044430732727,
      "learning_rate": 0.0004967553915313513,
      "loss": 3.1899,
      "step": 62741
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5845448970794678,
      "learning_rate": 0.0004967523035881675,
      "loss": 2.9024,
      "step": 62742
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.552585244178772,
      "learning_rate": 0.0004967492156084034,
      "loss": 3.0218,
      "step": 62743
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7996217012405396,
      "learning_rate": 0.0004967461275920601,
      "loss": 2.9178,
      "step": 62744
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8670812845230103,
      "learning_rate": 0.0004967430395391381,
      "loss": 2.7524,
      "step": 62745
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2790439128875732,
      "learning_rate": 0.0004967399514496378,
      "loss": 3.0009,
      "step": 62746
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3945063352584839,
      "learning_rate": 0.0004967368633235597,
      "loss": 2.9435,
      "step": 62747
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.514316439628601,
      "learning_rate": 0.0004967337751609046,
      "loss": 2.918,
      "step": 62748
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4983681440353394,
      "learning_rate": 0.0004967306869616729,
      "loss": 3.0171,
      "step": 62749
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.415004849433899,
      "learning_rate": 0.0004967275987258653,
      "loss": 3.1833,
      "step": 62750
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4285959005355835,
      "learning_rate": 0.0004967245104534824,
      "loss": 2.9303,
      "step": 62751
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5458415746688843,
      "learning_rate": 0.0004967214221445245,
      "loss": 2.9267,
      "step": 62752
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8523037433624268,
      "learning_rate": 0.0004967183337989925,
      "loss": 3.0084,
      "step": 62753
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.71824049949646,
      "learning_rate": 0.0004967152454168869,
      "loss": 3.0893,
      "step": 62754
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.813974618911743,
      "learning_rate": 0.0004967121569982081,
      "loss": 3.0747,
      "step": 62755
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5171279907226562,
      "learning_rate": 0.0004967090685429569,
      "loss": 3.1238,
      "step": 62756
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8983571529388428,
      "learning_rate": 0.0004967059800511337,
      "loss": 2.9762,
      "step": 62757
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.501446008682251,
      "learning_rate": 0.0004967028915227392,
      "loss": 3.1876,
      "step": 62758
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.5153865814208984,
      "learning_rate": 0.0004966998029577739,
      "loss": 3.101,
      "step": 62759
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5762062072753906,
      "learning_rate": 0.0004966967143562384,
      "loss": 3.0497,
      "step": 62760
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5512083768844604,
      "learning_rate": 0.0004966936257181333,
      "loss": 3.1016,
      "step": 62761
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.748716950416565,
      "learning_rate": 0.0004966905370434591,
      "loss": 2.966,
      "step": 62762
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4271308183670044,
      "learning_rate": 0.0004966874483322164,
      "loss": 3.2871,
      "step": 62763
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5634273290634155,
      "learning_rate": 0.0004966843595844059,
      "loss": 2.962,
      "step": 62764
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6042717695236206,
      "learning_rate": 0.0004966812708000279,
      "loss": 3.1643,
      "step": 62765
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6535314321517944,
      "learning_rate": 0.0004966781819790832,
      "loss": 3.0609,
      "step": 62766
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6156352758407593,
      "learning_rate": 0.0004966750931215723,
      "loss": 2.9957,
      "step": 62767
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7705668210983276,
      "learning_rate": 0.0004966720042274959,
      "loss": 2.8015,
      "step": 62768
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.937663197517395,
      "learning_rate": 0.0004966689152968544,
      "loss": 2.9655,
      "step": 62769
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6166990995407104,
      "learning_rate": 0.0004966658263296483,
      "loss": 2.9069,
      "step": 62770
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.594807744026184,
      "learning_rate": 0.0004966627373258786,
      "loss": 3.2311,
      "step": 62771
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3813965320587158,
      "learning_rate": 0.0004966596482855452,
      "loss": 3.1212,
      "step": 62772
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6913034915924072,
      "learning_rate": 0.0004966565592086494,
      "loss": 2.9338,
      "step": 62773
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3909434080123901,
      "learning_rate": 0.0004966534700951913,
      "loss": 2.9841,
      "step": 62774
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1324446201324463,
      "learning_rate": 0.0004966503809451715,
      "loss": 3.1244,
      "step": 62775
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6850613355636597,
      "learning_rate": 0.0004966472917585909,
      "loss": 2.9735,
      "step": 62776
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6137772798538208,
      "learning_rate": 0.0004966442025354498,
      "loss": 2.7867,
      "step": 62777
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6225382089614868,
      "learning_rate": 0.0004966411132757486,
      "loss": 3.0575,
      "step": 62778
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1302599906921387,
      "learning_rate": 0.0004966380239794884,
      "loss": 3.0337,
      "step": 62779
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.5236454010009766,
      "learning_rate": 0.0004966349346466693,
      "loss": 2.7563,
      "step": 62780
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4355342388153076,
      "learning_rate": 0.0004966318452772922,
      "loss": 2.9848,
      "step": 62781
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7248070240020752,
      "learning_rate": 0.0004966287558713574,
      "loss": 2.972,
      "step": 62782
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7987879514694214,
      "learning_rate": 0.0004966256664288656,
      "loss": 3.2262,
      "step": 62783
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7538303136825562,
      "learning_rate": 0.0004966225769498175,
      "loss": 2.9171,
      "step": 62784
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.972270131111145,
      "learning_rate": 0.0004966194874342134,
      "loss": 2.9015,
      "step": 62785
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6410021781921387,
      "learning_rate": 0.0004966163978820542,
      "loss": 3.147,
      "step": 62786
    },
    {
      "epoch": 0.82,
      "grad_norm": 4.631828308105469,
      "learning_rate": 0.0004966133082933402,
      "loss": 2.9516,
      "step": 62787
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.619694471359253,
      "learning_rate": 0.000496610218668072,
      "loss": 2.7791,
      "step": 62788
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7211554050445557,
      "learning_rate": 0.0004966071290062504,
      "loss": 2.7288,
      "step": 62789
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.795818567276001,
      "learning_rate": 0.0004966040393078757,
      "loss": 2.9166,
      "step": 62790
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.5169787406921387,
      "learning_rate": 0.0004966009495729487,
      "loss": 3.1918,
      "step": 62791
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.6192662715911865,
      "learning_rate": 0.0004965978598014698,
      "loss": 2.9546,
      "step": 62792
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5748106241226196,
      "learning_rate": 0.0004965947699934396,
      "loss": 3.0656,
      "step": 62793
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.8817827701568604,
      "learning_rate": 0.0004965916801488589,
      "loss": 2.8836,
      "step": 62794
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.8239479064941406,
      "learning_rate": 0.000496588590267728,
      "loss": 3.1407,
      "step": 62795
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1788840293884277,
      "learning_rate": 0.0004965855003500474,
      "loss": 2.8445,
      "step": 62796
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.4185245037078857,
      "learning_rate": 0.0004965824103958182,
      "loss": 3.067,
      "step": 62797
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.388227939605713,
      "learning_rate": 0.0004965793204050403,
      "loss": 2.924,
      "step": 62798
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4977805614471436,
      "learning_rate": 0.0004965762303777147,
      "loss": 3.1007,
      "step": 62799
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5709357261657715,
      "learning_rate": 0.000496573140313842,
      "loss": 3.2414,
      "step": 62800
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.532155752182007,
      "learning_rate": 0.0004965700502134224,
      "loss": 2.8689,
      "step": 62801
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7521106004714966,
      "learning_rate": 0.0004965669600764569,
      "loss": 2.9252,
      "step": 62802
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.494664430618286,
      "learning_rate": 0.0004965638699029459,
      "loss": 2.9892,
      "step": 62803
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.7048137187957764,
      "learning_rate": 0.0004965607796928898,
      "loss": 3.0535,
      "step": 62804
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.660646915435791,
      "learning_rate": 0.0004965576894462895,
      "loss": 2.8245,
      "step": 62805
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0496973991394043,
      "learning_rate": 0.0004965545991631454,
      "loss": 2.9394,
      "step": 62806
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2302000522613525,
      "learning_rate": 0.000496551508843458,
      "loss": 3.1939,
      "step": 62807
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.407584547996521,
      "learning_rate": 0.0004965484184872279,
      "loss": 2.9296,
      "step": 62808
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4804108142852783,
      "learning_rate": 0.0004965453280944559,
      "loss": 3.1616,
      "step": 62809
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7441445589065552,
      "learning_rate": 0.0004965422376651424,
      "loss": 2.942,
      "step": 62810
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4625691175460815,
      "learning_rate": 0.0004965391471992879,
      "loss": 3.2372,
      "step": 62811
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.2934949398040771,
      "learning_rate": 0.0004965360566968932,
      "loss": 3.0131,
      "step": 62812
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4122788906097412,
      "learning_rate": 0.0004965329661579586,
      "loss": 3.2486,
      "step": 62813
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8426949977874756,
      "learning_rate": 0.0004965298755824849,
      "loss": 3.0028,
      "step": 62814
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8070334196090698,
      "learning_rate": 0.0004965267849704726,
      "loss": 3.2146,
      "step": 62815
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.774930477142334,
      "learning_rate": 0.0004965236943219221,
      "loss": 3.2214,
      "step": 62816
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7328612804412842,
      "learning_rate": 0.0004965206036368343,
      "loss": 2.9005,
      "step": 62817
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8758312463760376,
      "learning_rate": 0.0004965175129152095,
      "loss": 3.1987,
      "step": 62818
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1373767852783203,
      "learning_rate": 0.0004965144221570485,
      "loss": 3.2153,
      "step": 62819
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.850914716720581,
      "learning_rate": 0.0004965113313623517,
      "loss": 2.9733,
      "step": 62820
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.722434401512146,
      "learning_rate": 0.0004965082405311196,
      "loss": 2.8925,
      "step": 62821
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5563981533050537,
      "learning_rate": 0.0004965051496633531,
      "loss": 2.927,
      "step": 62822
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7741787433624268,
      "learning_rate": 0.0004965020587590525,
      "loss": 3.0255,
      "step": 62823
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.2031984329223633,
      "learning_rate": 0.0004964989678182185,
      "loss": 2.9267,
      "step": 62824
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5692284107208252,
      "learning_rate": 0.0004964958768408517,
      "loss": 2.7956,
      "step": 62825
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6387195587158203,
      "learning_rate": 0.0004964927858269524,
      "loss": 3.0966,
      "step": 62826
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.222860813140869,
      "learning_rate": 0.0004964896947765215,
      "loss": 2.9272,
      "step": 62827
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.728458881378174,
      "learning_rate": 0.0004964866036895595,
      "loss": 2.9717,
      "step": 62828
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4225822687149048,
      "learning_rate": 0.0004964835125660669,
      "loss": 2.8675,
      "step": 62829
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6491421461105347,
      "learning_rate": 0.0004964804214060443,
      "loss": 2.8588,
      "step": 62830
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.341476559638977,
      "learning_rate": 0.0004964773302094923,
      "loss": 3.0862,
      "step": 62831
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4602694511413574,
      "learning_rate": 0.0004964742389764114,
      "loss": 2.9981,
      "step": 62832
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3650906085968018,
      "learning_rate": 0.0004964711477068023,
      "loss": 2.8926,
      "step": 62833
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4564263820648193,
      "learning_rate": 0.0004964680564006654,
      "loss": 3.2666,
      "step": 62834
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4804762601852417,
      "learning_rate": 0.0004964649650580015,
      "loss": 3.0703,
      "step": 62835
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5951197147369385,
      "learning_rate": 0.000496461873678811,
      "loss": 3.0259,
      "step": 62836
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7151744365692139,
      "learning_rate": 0.0004964587822630945,
      "loss": 2.8357,
      "step": 62837
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.373865008354187,
      "learning_rate": 0.0004964556908108527,
      "loss": 2.8115,
      "step": 62838
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5488790273666382,
      "learning_rate": 0.000496452599322086,
      "loss": 3.2869,
      "step": 62839
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4310468435287476,
      "learning_rate": 0.000496449507796795,
      "loss": 3.1066,
      "step": 62840
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5836995840072632,
      "learning_rate": 0.0004964464162349804,
      "loss": 3.0811,
      "step": 62841
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9721908569335938,
      "learning_rate": 0.0004964433246366427,
      "loss": 3.1954,
      "step": 62842
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4432942867279053,
      "learning_rate": 0.0004964402330017826,
      "loss": 3.1687,
      "step": 62843
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5659326314926147,
      "learning_rate": 0.0004964371413304004,
      "loss": 3.1564,
      "step": 62844
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8463255167007446,
      "learning_rate": 0.0004964340496224968,
      "loss": 3.0245,
      "step": 62845
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7331613302230835,
      "learning_rate": 0.0004964309578780725,
      "loss": 3.1786,
      "step": 62846
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8474586009979248,
      "learning_rate": 0.0004964278660971278,
      "loss": 3.0105,
      "step": 62847
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5893837213516235,
      "learning_rate": 0.0004964247742796637,
      "loss": 3.2189,
      "step": 62848
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.995651125907898,
      "learning_rate": 0.0004964216824256805,
      "loss": 2.9456,
      "step": 62849
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8559893369674683,
      "learning_rate": 0.0004964185905351787,
      "loss": 2.9607,
      "step": 62850
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7508999109268188,
      "learning_rate": 0.0004964154986081589,
      "loss": 3.0436,
      "step": 62851
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8742049932479858,
      "learning_rate": 0.000496412406644622,
      "loss": 2.9533,
      "step": 62852
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6029255390167236,
      "learning_rate": 0.0004964093146445681,
      "loss": 2.7602,
      "step": 62853
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5968105792999268,
      "learning_rate": 0.000496406222607998,
      "loss": 3.0126,
      "step": 62854
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.783565640449524,
      "learning_rate": 0.0004964031305349124,
      "loss": 3.0148,
      "step": 62855
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6892971992492676,
      "learning_rate": 0.0004964000384253117,
      "loss": 3.121,
      "step": 62856
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5791187286376953,
      "learning_rate": 0.0004963969462791965,
      "loss": 2.9076,
      "step": 62857
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1299753189086914,
      "learning_rate": 0.0004963938540965675,
      "loss": 2.6919,
      "step": 62858
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.787070870399475,
      "learning_rate": 0.000496390761877425,
      "loss": 3.1938,
      "step": 62859
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9084683656692505,
      "learning_rate": 0.0004963876696217699,
      "loss": 2.9983,
      "step": 62860
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.787188172340393,
      "learning_rate": 0.0004963845773296026,
      "loss": 2.9214,
      "step": 62861
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8120859861373901,
      "learning_rate": 0.0004963814850009236,
      "loss": 3.1329,
      "step": 62862
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9581451416015625,
      "learning_rate": 0.0004963783926357337,
      "loss": 2.9688,
      "step": 62863
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.87054443359375,
      "learning_rate": 0.0004963753002340332,
      "loss": 2.988,
      "step": 62864
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4917505979537964,
      "learning_rate": 0.0004963722077958229,
      "loss": 3.0654,
      "step": 62865
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8174458742141724,
      "learning_rate": 0.0004963691153211034,
      "loss": 3.0623,
      "step": 62866
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7481391429901123,
      "learning_rate": 0.0004963660228098751,
      "loss": 2.8495,
      "step": 62867
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8262064456939697,
      "learning_rate": 0.0004963629302621387,
      "loss": 2.9039,
      "step": 62868
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5280054807662964,
      "learning_rate": 0.0004963598376778946,
      "loss": 3.1464,
      "step": 62869
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.311828136444092,
      "learning_rate": 0.0004963567450571436,
      "loss": 2.9642,
      "step": 62870
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4877979755401611,
      "learning_rate": 0.0004963536523998861,
      "loss": 2.9358,
      "step": 62871
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.690717101097107,
      "learning_rate": 0.0004963505597061228,
      "loss": 3.0033,
      "step": 62872
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5453014373779297,
      "learning_rate": 0.0004963474669758542,
      "loss": 3.0866,
      "step": 62873
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.422043800354004,
      "learning_rate": 0.000496344374209081,
      "loss": 3.1469,
      "step": 62874
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3745871782302856,
      "learning_rate": 0.0004963412814058036,
      "loss": 3.0594,
      "step": 62875
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.562105655670166,
      "learning_rate": 0.0004963381885660226,
      "loss": 3.0638,
      "step": 62876
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.055905818939209,
      "learning_rate": 0.0004963350956897387,
      "loss": 3.1478,
      "step": 62877
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7321715354919434,
      "learning_rate": 0.0004963320027769523,
      "loss": 3.0896,
      "step": 62878
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8100050687789917,
      "learning_rate": 0.0004963289098276643,
      "loss": 3.095,
      "step": 62879
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6033718585968018,
      "learning_rate": 0.0004963258168418748,
      "loss": 3.2951,
      "step": 62880
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5331000089645386,
      "learning_rate": 0.0004963227238195848,
      "loss": 3.2411,
      "step": 62881
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6835170984268188,
      "learning_rate": 0.0004963196307607946,
      "loss": 3.0204,
      "step": 62882
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1528830528259277,
      "learning_rate": 0.0004963165376655048,
      "loss": 2.9883,
      "step": 62883
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5727567672729492,
      "learning_rate": 0.0004963134445337161,
      "loss": 3.1663,
      "step": 62884
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.222975969314575,
      "learning_rate": 0.0004963103513654292,
      "loss": 3.2957,
      "step": 62885
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7031335830688477,
      "learning_rate": 0.0004963072581606443,
      "loss": 3.0676,
      "step": 62886
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.574091911315918,
      "learning_rate": 0.0004963041649193623,
      "loss": 2.806,
      "step": 62887
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8353520631790161,
      "learning_rate": 0.0004963010716415835,
      "loss": 3.0987,
      "step": 62888
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7836154699325562,
      "learning_rate": 0.0004962979783273088,
      "loss": 3.1372,
      "step": 62889
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8772419691085815,
      "learning_rate": 0.0004962948849765384,
      "loss": 3.1191,
      "step": 62890
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4651005268096924,
      "learning_rate": 0.0004962917915892732,
      "loss": 2.9421,
      "step": 62891
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5948002338409424,
      "learning_rate": 0.0004962886981655137,
      "loss": 3.0242,
      "step": 62892
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9345289468765259,
      "learning_rate": 0.0004962856047052603,
      "loss": 3.2584,
      "step": 62893
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7310255765914917,
      "learning_rate": 0.0004962825112085139,
      "loss": 2.9904,
      "step": 62894
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7414618730545044,
      "learning_rate": 0.0004962794176752747,
      "loss": 2.9574,
      "step": 62895
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9941413402557373,
      "learning_rate": 0.0004962763241055435,
      "loss": 3.1553,
      "step": 62896
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.591294527053833,
      "learning_rate": 0.0004962732304993209,
      "loss": 3.1416,
      "step": 62897
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6288684606552124,
      "learning_rate": 0.0004962701368566073,
      "loss": 3.1355,
      "step": 62898
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4884241819381714,
      "learning_rate": 0.0004962670431774034,
      "loss": 2.9527,
      "step": 62899
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9109165668487549,
      "learning_rate": 0.0004962639494617097,
      "loss": 3.0384,
      "step": 62900
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5032581090927124,
      "learning_rate": 0.0004962608557095269,
      "loss": 3.1279,
      "step": 62901
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4794496297836304,
      "learning_rate": 0.0004962577619208556,
      "loss": 3.1505,
      "step": 62902
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.596331238746643,
      "learning_rate": 0.000496254668095696,
      "loss": 3.2879,
      "step": 62903
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8206901550292969,
      "learning_rate": 0.0004962515742340492,
      "loss": 3.0538,
      "step": 62904
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.3075878620147705,
      "learning_rate": 0.0004962484803359154,
      "loss": 2.9659,
      "step": 62905
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.511303424835205,
      "learning_rate": 0.0004962453864012953,
      "loss": 3.0187,
      "step": 62906
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6872702836990356,
      "learning_rate": 0.0004962422924301896,
      "loss": 3.189,
      "step": 62907
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8607733249664307,
      "learning_rate": 0.0004962391984225987,
      "loss": 3.1283,
      "step": 62908
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3539247512817383,
      "learning_rate": 0.0004962361043785231,
      "loss": 2.9795,
      "step": 62909
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1363625526428223,
      "learning_rate": 0.0004962330102979637,
      "loss": 3.1967,
      "step": 62910
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.6269307136535645,
      "learning_rate": 0.0004962299161809207,
      "loss": 3.1072,
      "step": 62911
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8245625495910645,
      "learning_rate": 0.0004962268220273949,
      "loss": 2.929,
      "step": 62912
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.768152117729187,
      "learning_rate": 0.000496223727837387,
      "loss": 2.8834,
      "step": 62913
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7987356185913086,
      "learning_rate": 0.0004962206336108973,
      "loss": 3.2472,
      "step": 62914
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5671809911727905,
      "learning_rate": 0.0004962175393479263,
      "loss": 2.8063,
      "step": 62915
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.420941710472107,
      "learning_rate": 0.000496214445048475,
      "loss": 3.1558,
      "step": 62916
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6170978546142578,
      "learning_rate": 0.0004962113507125436,
      "loss": 3.242,
      "step": 62917
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4389687776565552,
      "learning_rate": 0.0004962082563401329,
      "loss": 3.1471,
      "step": 62918
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.475710153579712,
      "learning_rate": 0.0004962051619312433,
      "loss": 3.0911,
      "step": 62919
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.906907081604004,
      "learning_rate": 0.0004962020674858755,
      "loss": 3.1663,
      "step": 62920
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6974765062332153,
      "learning_rate": 0.0004961989730040301,
      "loss": 2.9758,
      "step": 62921
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7776542901992798,
      "learning_rate": 0.0004961958784857075,
      "loss": 2.8606,
      "step": 62922
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.681444764137268,
      "learning_rate": 0.0004961927839309084,
      "loss": 3.0841,
      "step": 62923
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.633498191833496,
      "learning_rate": 0.0004961896893396333,
      "loss": 3.0283,
      "step": 62924
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.5953965187072754,
      "learning_rate": 0.000496186594711883,
      "loss": 2.8514,
      "step": 62925
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4204031229019165,
      "learning_rate": 0.0004961835000476578,
      "loss": 3.0866,
      "step": 62926
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4454063177108765,
      "learning_rate": 0.0004961804053469583,
      "loss": 2.9912,
      "step": 62927
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5409959554672241,
      "learning_rate": 0.0004961773106097853,
      "loss": 3.1139,
      "step": 62928
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.977208137512207,
      "learning_rate": 0.0004961742158361392,
      "loss": 3.1189,
      "step": 62929
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6103966236114502,
      "learning_rate": 0.0004961711210260206,
      "loss": 3.0353,
      "step": 62930
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3825700283050537,
      "learning_rate": 0.0004961680261794301,
      "loss": 2.9944,
      "step": 62931
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.917240858078003,
      "learning_rate": 0.0004961649312963683,
      "loss": 2.6027,
      "step": 62932
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.499678373336792,
      "learning_rate": 0.0004961618363768356,
      "loss": 2.9878,
      "step": 62933
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3409072160720825,
      "learning_rate": 0.0004961587414208327,
      "loss": 2.9407,
      "step": 62934
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.518526315689087,
      "learning_rate": 0.0004961556464283604,
      "loss": 3.1083,
      "step": 62935
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3213523626327515,
      "learning_rate": 0.000496152551399419,
      "loss": 3.0634,
      "step": 62936
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.3744113445281982,
      "learning_rate": 0.0004961494563340091,
      "loss": 2.9534,
      "step": 62937
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8845540285110474,
      "learning_rate": 0.0004961463612321313,
      "loss": 3.239,
      "step": 62938
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4861191511154175,
      "learning_rate": 0.000496143266093786,
      "loss": 3.3321,
      "step": 62939
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.479657769203186,
      "learning_rate": 0.0004961401709189742,
      "loss": 3.3885,
      "step": 62940
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8381778001785278,
      "learning_rate": 0.0004961370757076962,
      "loss": 3.0336,
      "step": 62941
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0518407821655273,
      "learning_rate": 0.0004961339804599525,
      "loss": 3.0613,
      "step": 62942
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1634676456451416,
      "learning_rate": 0.000496130885175744,
      "loss": 2.985,
      "step": 62943
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4090453386306763,
      "learning_rate": 0.000496127789855071,
      "loss": 2.9867,
      "step": 62944
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8839895725250244,
      "learning_rate": 0.000496124694497934,
      "loss": 3.0978,
      "step": 62945
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6472744941711426,
      "learning_rate": 0.0004961215991043339,
      "loss": 3.0695,
      "step": 62946
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5908572673797607,
      "learning_rate": 0.0004961185036742711,
      "loss": 2.8303,
      "step": 62947
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.394352912902832,
      "learning_rate": 0.000496115408207746,
      "loss": 2.8753,
      "step": 62948
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7442420721054077,
      "learning_rate": 0.0004961123127047595,
      "loss": 3.236,
      "step": 62949
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.546769142150879,
      "learning_rate": 0.0004961092171653119,
      "loss": 3.0221,
      "step": 62950
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6040657758712769,
      "learning_rate": 0.0004961061215894039,
      "loss": 3.0881,
      "step": 62951
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7987617254257202,
      "learning_rate": 0.0004961030259770361,
      "loss": 3.2071,
      "step": 62952
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7290959358215332,
      "learning_rate": 0.0004960999303282091,
      "loss": 3.1979,
      "step": 62953
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7823469638824463,
      "learning_rate": 0.0004960968346429234,
      "loss": 3.1734,
      "step": 62954
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9945083856582642,
      "learning_rate": 0.0004960937389211794,
      "loss": 2.95,
      "step": 62955
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5875226259231567,
      "learning_rate": 0.0004960906431629782,
      "loss": 3.1576,
      "step": 62956
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6261167526245117,
      "learning_rate": 0.0004960875473683198,
      "loss": 3.1677,
      "step": 62957
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6560128927230835,
      "learning_rate": 0.0004960844515372052,
      "loss": 3.0063,
      "step": 62958
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9348559379577637,
      "learning_rate": 0.0004960813556696346,
      "loss": 2.9246,
      "step": 62959
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5261290073394775,
      "learning_rate": 0.0004960782597656089,
      "loss": 2.8487,
      "step": 62960
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4549715518951416,
      "learning_rate": 0.0004960751638251286,
      "loss": 3.1554,
      "step": 62961
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4599872827529907,
      "learning_rate": 0.0004960720678481941,
      "loss": 2.8831,
      "step": 62962
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3682398796081543,
      "learning_rate": 0.0004960689718348062,
      "loss": 3.1682,
      "step": 62963
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.425565481185913,
      "learning_rate": 0.0004960658757849653,
      "loss": 2.9233,
      "step": 62964
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4886915683746338,
      "learning_rate": 0.0004960627796986722,
      "loss": 3.0402,
      "step": 62965
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.640604853630066,
      "learning_rate": 0.0004960596835759272,
      "loss": 2.8907,
      "step": 62966
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3607211112976074,
      "learning_rate": 0.0004960565874167311,
      "loss": 3.2193,
      "step": 62967
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3384451866149902,
      "learning_rate": 0.0004960534912210842,
      "loss": 3.3882,
      "step": 62968
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7047154903411865,
      "learning_rate": 0.0004960503949889874,
      "loss": 2.9595,
      "step": 62969
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.641805648803711,
      "learning_rate": 0.0004960472987204411,
      "loss": 3.1197,
      "step": 62970
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7750831842422485,
      "learning_rate": 0.0004960442024154459,
      "loss": 2.9973,
      "step": 62971
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8582119941711426,
      "learning_rate": 0.0004960411060740024,
      "loss": 2.7642,
      "step": 62972
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6586565971374512,
      "learning_rate": 0.0004960380096961111,
      "loss": 3.1351,
      "step": 62973
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2972729206085205,
      "learning_rate": 0.0004960349132817727,
      "loss": 3.0022,
      "step": 62974
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.256603956222534,
      "learning_rate": 0.0004960318168309877,
      "loss": 3.091,
      "step": 62975
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4449950456619263,
      "learning_rate": 0.0004960287203437567,
      "loss": 2.9606,
      "step": 62976
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6095199584960938,
      "learning_rate": 0.0004960256238200802,
      "loss": 3.1798,
      "step": 62977
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4410661458969116,
      "learning_rate": 0.0004960225272599588,
      "loss": 3.001,
      "step": 62978
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.895246148109436,
      "learning_rate": 0.0004960194306633932,
      "loss": 3.1146,
      "step": 62979
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5989934206008911,
      "learning_rate": 0.0004960163340303839,
      "loss": 3.0107,
      "step": 62980
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.51981782913208,
      "learning_rate": 0.0004960132373609314,
      "loss": 2.9531,
      "step": 62981
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2062437534332275,
      "learning_rate": 0.0004960101406550364,
      "loss": 3.2179,
      "step": 62982
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.89053213596344,
      "learning_rate": 0.0004960070439126993,
      "loss": 2.9661,
      "step": 62983
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.826995611190796,
      "learning_rate": 0.000496003947133921,
      "loss": 3.1716,
      "step": 62984
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.329611897468567,
      "learning_rate": 0.0004960008503187016,
      "loss": 3.0053,
      "step": 62985
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5845484733581543,
      "learning_rate": 0.0004959977534670422,
      "loss": 2.9856,
      "step": 62986
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.460220217704773,
      "learning_rate": 0.0004959946565789429,
      "loss": 2.8561,
      "step": 62987
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3868591785430908,
      "learning_rate": 0.0004959915596544046,
      "loss": 3.1445,
      "step": 62988
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3335771560668945,
      "learning_rate": 0.0004959884626934279,
      "loss": 3.1749,
      "step": 62989
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4244154691696167,
      "learning_rate": 0.0004959853656960131,
      "loss": 2.8341,
      "step": 62990
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.808329463005066,
      "learning_rate": 0.0004959822686621608,
      "loss": 2.882,
      "step": 62991
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8640379905700684,
      "learning_rate": 0.0004959791715918719,
      "loss": 3.064,
      "step": 62992
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.388275384902954,
      "learning_rate": 0.0004959760744851466,
      "loss": 3.0253,
      "step": 62993
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2648415565490723,
      "learning_rate": 0.0004959729773419858,
      "loss": 2.9591,
      "step": 62994
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3190423250198364,
      "learning_rate": 0.0004959698801623899,
      "loss": 3.1243,
      "step": 62995
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.4012246131896973,
      "learning_rate": 0.0004959667829463595,
      "loss": 2.6294,
      "step": 62996
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.4129929542541504,
      "learning_rate": 0.0004959636856938951,
      "loss": 3.1872,
      "step": 62997
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.65311861038208,
      "learning_rate": 0.0004959605884049974,
      "loss": 3.0549,
      "step": 62998
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.141094446182251,
      "learning_rate": 0.0004959574910796669,
      "loss": 3.2257,
      "step": 62999
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4481083154678345,
      "learning_rate": 0.0004959543937179043,
      "loss": 3.2517,
      "step": 63000
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.57023024559021,
      "learning_rate": 0.00049595129631971,
      "loss": 3.2106,
      "step": 63001
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7474819421768188,
      "learning_rate": 0.0004959481988850847,
      "loss": 3.2107,
      "step": 63002
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.3663105964660645,
      "learning_rate": 0.0004959451014140288,
      "loss": 3.0622,
      "step": 63003
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1532983779907227,
      "learning_rate": 0.0004959420039065431,
      "loss": 3.0771,
      "step": 63004
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6432007551193237,
      "learning_rate": 0.0004959389063626281,
      "loss": 3.0436,
      "step": 63005
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.1424732208251953,
      "learning_rate": 0.0004959358087822844,
      "loss": 3.0904,
      "step": 63006
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.4333536624908447,
      "learning_rate": 0.0004959327111655125,
      "loss": 2.9404,
      "step": 63007
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6736371517181396,
      "learning_rate": 0.0004959296135123129,
      "loss": 3.1178,
      "step": 63008
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0351765155792236,
      "learning_rate": 0.0004959265158226864,
      "loss": 3.1866,
      "step": 63009
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.090474843978882,
      "learning_rate": 0.0004959234180966334,
      "loss": 2.943,
      "step": 63010
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1316306591033936,
      "learning_rate": 0.0004959203203341547,
      "loss": 2.8265,
      "step": 63011
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.660379409790039,
      "learning_rate": 0.0004959172225352504,
      "loss": 3.0939,
      "step": 63012
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2960965633392334,
      "learning_rate": 0.0004959141246999217,
      "loss": 3.1123,
      "step": 63013
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8001141548156738,
      "learning_rate": 0.0004959110268281687,
      "loss": 3.0782,
      "step": 63014
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7416311502456665,
      "learning_rate": 0.0004959079289199922,
      "loss": 2.8867,
      "step": 63015
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7673547267913818,
      "learning_rate": 0.0004959048309753928,
      "loss": 2.7054,
      "step": 63016
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.9380409717559814,
      "learning_rate": 0.0004959017329943708,
      "loss": 3.0104,
      "step": 63017
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0097498893737793,
      "learning_rate": 0.0004958986349769271,
      "loss": 2.9674,
      "step": 63018
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.909462332725525,
      "learning_rate": 0.0004958955369230621,
      "loss": 3.06,
      "step": 63019
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1273975372314453,
      "learning_rate": 0.0004958924388327764,
      "loss": 3.0835,
      "step": 63020
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6605080366134644,
      "learning_rate": 0.0004958893407060707,
      "loss": 3.0497,
      "step": 63021
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3534969091415405,
      "learning_rate": 0.0004958862425429454,
      "loss": 2.9812,
      "step": 63022
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5595107078552246,
      "learning_rate": 0.0004958831443434012,
      "loss": 2.7134,
      "step": 63023
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4497867822647095,
      "learning_rate": 0.0004958800461074385,
      "loss": 2.7994,
      "step": 63024
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4487816095352173,
      "learning_rate": 0.0004958769478350581,
      "loss": 3.1591,
      "step": 63025
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.400944471359253,
      "learning_rate": 0.0004958738495262605,
      "loss": 3.0074,
      "step": 63026
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5213264226913452,
      "learning_rate": 0.0004958707511810462,
      "loss": 2.9523,
      "step": 63027
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1630239486694336,
      "learning_rate": 0.0004958676527994159,
      "loss": 2.9507,
      "step": 63028
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4688609838485718,
      "learning_rate": 0.00049586455438137,
      "loss": 3.0098,
      "step": 63029
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5453580617904663,
      "learning_rate": 0.0004958614559269093,
      "loss": 3.0657,
      "step": 63030
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.428815484046936,
      "learning_rate": 0.0004958583574360342,
      "loss": 3.144,
      "step": 63031
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7854712009429932,
      "learning_rate": 0.0004958552589087453,
      "loss": 3.009,
      "step": 63032
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.4050755500793457,
      "learning_rate": 0.0004958521603450432,
      "loss": 2.9504,
      "step": 63033
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5244269371032715,
      "learning_rate": 0.0004958490617449285,
      "loss": 3.0131,
      "step": 63034
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2173218727111816,
      "learning_rate": 0.0004958459631084018,
      "loss": 3.1277,
      "step": 63035
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6903949975967407,
      "learning_rate": 0.0004958428644354636,
      "loss": 3.1559,
      "step": 63036
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5665966272354126,
      "learning_rate": 0.0004958397657261147,
      "loss": 3.0934,
      "step": 63037
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.027240514755249,
      "learning_rate": 0.0004958366669803552,
      "loss": 2.7967,
      "step": 63038
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.08514142036438,
      "learning_rate": 0.0004958335681981862,
      "loss": 3.0838,
      "step": 63039
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.612309694290161,
      "learning_rate": 0.0004958304693796079,
      "loss": 2.8361,
      "step": 63040
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3465850353240967,
      "learning_rate": 0.000495827370524621,
      "loss": 3.1048,
      "step": 63041
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0914833545684814,
      "learning_rate": 0.0004958242716332262,
      "loss": 3.4801,
      "step": 63042
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.313807487487793,
      "learning_rate": 0.000495821172705424,
      "loss": 2.7345,
      "step": 63043
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.424864649772644,
      "learning_rate": 0.0004958180737412148,
      "loss": 3.034,
      "step": 63044
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1744086742401123,
      "learning_rate": 0.0004958149747405993,
      "loss": 3.2167,
      "step": 63045
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0950140953063965,
      "learning_rate": 0.0004958118757035782,
      "loss": 3.2079,
      "step": 63046
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.120314598083496,
      "learning_rate": 0.000495808776630152,
      "loss": 3.0031,
      "step": 63047
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4427963495254517,
      "learning_rate": 0.0004958056775203213,
      "loss": 2.606,
      "step": 63048
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.035874605178833,
      "learning_rate": 0.0004958025783740865,
      "loss": 3.0279,
      "step": 63049
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.7313711643218994,
      "learning_rate": 0.0004957994791914483,
      "loss": 2.9931,
      "step": 63050
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.899648666381836,
      "learning_rate": 0.0004957963799724074,
      "loss": 2.9914,
      "step": 63051
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8629376888275146,
      "learning_rate": 0.0004957932807169642,
      "loss": 2.9718,
      "step": 63052
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.2855470180511475,
      "learning_rate": 0.0004957901814251194,
      "loss": 3.0687,
      "step": 63053
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.195951223373413,
      "learning_rate": 0.0004957870820968734,
      "loss": 3.1507,
      "step": 63054
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4529776573181152,
      "learning_rate": 0.0004957839827322269,
      "loss": 2.951,
      "step": 63055
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.442460060119629,
      "learning_rate": 0.0004957808833311806,
      "loss": 3.0249,
      "step": 63056
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7903271913528442,
      "learning_rate": 0.0004957777838937347,
      "loss": 2.9364,
      "step": 63057
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8526227474212646,
      "learning_rate": 0.0004957746844198902,
      "loss": 3.0132,
      "step": 63058
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6434651613235474,
      "learning_rate": 0.0004957715849096474,
      "loss": 3.2219,
      "step": 63059
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.7676782608032227,
      "learning_rate": 0.000495768485363007,
      "loss": 3.0704,
      "step": 63060
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.019944190979004,
      "learning_rate": 0.0004957653857799696,
      "loss": 3.0125,
      "step": 63061
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6043559312820435,
      "learning_rate": 0.0004957622861605357,
      "loss": 3.1714,
      "step": 63062
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5819239616394043,
      "learning_rate": 0.0004957591865047058,
      "loss": 3.0147,
      "step": 63063
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.306962013244629,
      "learning_rate": 0.0004957560868124806,
      "loss": 2.9376,
      "step": 63064
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.656586170196533,
      "learning_rate": 0.0004957529870838607,
      "loss": 3.2546,
      "step": 63065
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6148240566253662,
      "learning_rate": 0.0004957498873188466,
      "loss": 3.1095,
      "step": 63066
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8469191789627075,
      "learning_rate": 0.0004957467875174389,
      "loss": 3.3194,
      "step": 63067
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7242087125778198,
      "learning_rate": 0.0004957436876796382,
      "loss": 2.947,
      "step": 63068
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5356128215789795,
      "learning_rate": 0.000495740587805445,
      "loss": 2.9513,
      "step": 63069
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4801632165908813,
      "learning_rate": 0.0004957374878948599,
      "loss": 3.0554,
      "step": 63070
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6581330299377441,
      "learning_rate": 0.0004957343879478836,
      "loss": 3.0618,
      "step": 63071
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0686159133911133,
      "learning_rate": 0.0004957312879645165,
      "loss": 2.8343,
      "step": 63072
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4195308685302734,
      "learning_rate": 0.0004957281879447594,
      "loss": 3.0499,
      "step": 63073
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5662063360214233,
      "learning_rate": 0.0004957250878886126,
      "loss": 3.0757,
      "step": 63074
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.787807822227478,
      "learning_rate": 0.0004957219877960769,
      "loss": 3.1061,
      "step": 63075
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6350597143173218,
      "learning_rate": 0.0004957188876671527,
      "loss": 3.0793,
      "step": 63076
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3945534229278564,
      "learning_rate": 0.0004957157875018407,
      "loss": 3.1462,
      "step": 63077
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.916314125061035,
      "learning_rate": 0.0004957126873001415,
      "loss": 3.0124,
      "step": 63078
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7941821813583374,
      "learning_rate": 0.0004957095870620555,
      "loss": 2.9469,
      "step": 63079
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7175942659378052,
      "learning_rate": 0.0004957064867875836,
      "loss": 3.1918,
      "step": 63080
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4421234130859375,
      "learning_rate": 0.000495703386476726,
      "loss": 2.9404,
      "step": 63081
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3993109464645386,
      "learning_rate": 0.0004957002861294835,
      "loss": 3.1005,
      "step": 63082
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8182519674301147,
      "learning_rate": 0.0004956971857458565,
      "loss": 3.0878,
      "step": 63083
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3074779510498047,
      "learning_rate": 0.0004956940853258459,
      "loss": 2.8857,
      "step": 63084
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6895941495895386,
      "learning_rate": 0.000495690984869452,
      "loss": 2.9614,
      "step": 63085
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5751245021820068,
      "learning_rate": 0.0004956878843766754,
      "loss": 3.1076,
      "step": 63086
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4126800298690796,
      "learning_rate": 0.0004956847838475167,
      "loss": 3.054,
      "step": 63087
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7503963708877563,
      "learning_rate": 0.0004956816832819766,
      "loss": 2.9474,
      "step": 63088
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8256161212921143,
      "learning_rate": 0.0004956785826800555,
      "loss": 2.8981,
      "step": 63089
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5764758586883545,
      "learning_rate": 0.0004956754820417541,
      "loss": 3.133,
      "step": 63090
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.3535993099212646,
      "learning_rate": 0.000495672381367073,
      "loss": 3.0265,
      "step": 63091
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8630306720733643,
      "learning_rate": 0.0004956692806560127,
      "loss": 2.8829,
      "step": 63092
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8626775741577148,
      "learning_rate": 0.0004956661799085736,
      "loss": 2.7068,
      "step": 63093
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4139372110366821,
      "learning_rate": 0.0004956630791247566,
      "loss": 2.9656,
      "step": 63094
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.361424446105957,
      "learning_rate": 0.0004956599783045621,
      "loss": 3.0872,
      "step": 63095
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.5136144161224365,
      "learning_rate": 0.0004956568774479908,
      "loss": 2.933,
      "step": 63096
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6462284326553345,
      "learning_rate": 0.0004956537765550431,
      "loss": 3.1053,
      "step": 63097
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1862425804138184,
      "learning_rate": 0.0004956506756257197,
      "loss": 3.0595,
      "step": 63098
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.8462929725646973,
      "learning_rate": 0.0004956475746600212,
      "loss": 3.1702,
      "step": 63099
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.808133602142334,
      "learning_rate": 0.0004956444736579481,
      "loss": 2.7597,
      "step": 63100
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8449959754943848,
      "learning_rate": 0.000495641372619501,
      "loss": 3.0411,
      "step": 63101
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6120940446853638,
      "learning_rate": 0.0004956382715446805,
      "loss": 3.0678,
      "step": 63102
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.986094355583191,
      "learning_rate": 0.000495635170433487,
      "loss": 3.2269,
      "step": 63103
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.478066325187683,
      "learning_rate": 0.0004956320692859214,
      "loss": 2.9187,
      "step": 63104
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2372279167175293,
      "learning_rate": 0.000495628968101984,
      "loss": 3.0206,
      "step": 63105
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.46089506149292,
      "learning_rate": 0.0004956258668816756,
      "loss": 2.8221,
      "step": 63106
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5670578479766846,
      "learning_rate": 0.0004956227656249965,
      "loss": 3.2114,
      "step": 63107
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8739389181137085,
      "learning_rate": 0.0004956196643319476,
      "loss": 3.1158,
      "step": 63108
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.422673463821411,
      "learning_rate": 0.0004956165630025292,
      "loss": 2.7889,
      "step": 63109
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0800483226776123,
      "learning_rate": 0.000495613461636742,
      "loss": 2.9606,
      "step": 63110
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4569199085235596,
      "learning_rate": 0.0004956103602345866,
      "loss": 2.8646,
      "step": 63111
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.375645160675049,
      "learning_rate": 0.0004956072587960636,
      "loss": 3.206,
      "step": 63112
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.6092329025268555,
      "learning_rate": 0.0004956041573211733,
      "loss": 2.9221,
      "step": 63113
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5425223112106323,
      "learning_rate": 0.0004956010558099167,
      "loss": 3.242,
      "step": 63114
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5020627975463867,
      "learning_rate": 0.0004955979542622941,
      "loss": 3.0422,
      "step": 63115
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9408891201019287,
      "learning_rate": 0.0004955948526783061,
      "loss": 3.2537,
      "step": 63116
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.784220576286316,
      "learning_rate": 0.0004955917510579535,
      "loss": 3.1758,
      "step": 63117
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.4204578399658203,
      "learning_rate": 0.0004955886494012366,
      "loss": 3.0242,
      "step": 63118
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.3228375911712646,
      "learning_rate": 0.0004955855477081561,
      "loss": 2.8266,
      "step": 63119
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2762677669525146,
      "learning_rate": 0.0004955824459787125,
      "loss": 2.8531,
      "step": 63120
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.893843412399292,
      "learning_rate": 0.0004955793442129064,
      "loss": 2.8477,
      "step": 63121
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.779164433479309,
      "learning_rate": 0.0004955762424107385,
      "loss": 2.9863,
      "step": 63122
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.4618141651153564,
      "learning_rate": 0.0004955731405722093,
      "loss": 2.9681,
      "step": 63123
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.9872210025787354,
      "learning_rate": 0.0004955700386973192,
      "loss": 3.1049,
      "step": 63124
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.861020565032959,
      "learning_rate": 0.000495566936786069,
      "loss": 2.8092,
      "step": 63125
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.494887113571167,
      "learning_rate": 0.0004955638348384593,
      "loss": 2.9267,
      "step": 63126
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.003040313720703,
      "learning_rate": 0.0004955607328544906,
      "loss": 2.8522,
      "step": 63127
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2972781658172607,
      "learning_rate": 0.0004955576308341633,
      "loss": 3.0021,
      "step": 63128
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0963313579559326,
      "learning_rate": 0.0004955545287774783,
      "loss": 2.9818,
      "step": 63129
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0774879455566406,
      "learning_rate": 0.000495551426684436,
      "loss": 3.0914,
      "step": 63130
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.56498646736145,
      "learning_rate": 0.000495548324555037,
      "loss": 2.9833,
      "step": 63131
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.216249704360962,
      "learning_rate": 0.0004955452223892818,
      "loss": 3.0136,
      "step": 63132
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.209859609603882,
      "learning_rate": 0.0004955421201871712,
      "loss": 3.0315,
      "step": 63133
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.142030954360962,
      "learning_rate": 0.0004955390179487055,
      "loss": 2.8227,
      "step": 63134
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.0741145610809326,
      "learning_rate": 0.0004955359156738855,
      "loss": 2.948,
      "step": 63135
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8655356168746948,
      "learning_rate": 0.0004955328133627115,
      "loss": 3.1646,
      "step": 63136
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.815839171409607,
      "learning_rate": 0.0004955297110151844,
      "loss": 3.1528,
      "step": 63137
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.0687499046325684,
      "learning_rate": 0.0004955266086313047,
      "loss": 2.718,
      "step": 63138
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.3541600704193115,
      "learning_rate": 0.0004955235062110728,
      "loss": 2.8595,
      "step": 63139
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.4517385959625244,
      "learning_rate": 0.0004955204037544894,
      "loss": 2.8207,
      "step": 63140
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.084413766860962,
      "learning_rate": 0.0004955173012615552,
      "loss": 2.9815,
      "step": 63141
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.156433343887329,
      "learning_rate": 0.0004955141987322705,
      "loss": 2.9958,
      "step": 63142
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.5852158069610596,
      "learning_rate": 0.000495511096166636,
      "loss": 2.8113,
      "step": 63143
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0431418418884277,
      "learning_rate": 0.0004955079935646524,
      "loss": 3.0939,
      "step": 63144
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.1962409019470215,
      "learning_rate": 0.00049550489092632,
      "loss": 3.1221,
      "step": 63145
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9526853561401367,
      "learning_rate": 0.0004955017882516397,
      "loss": 2.9236,
      "step": 63146
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.459397554397583,
      "learning_rate": 0.000495498685540612,
      "loss": 3.1811,
      "step": 63147
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.282865285873413,
      "learning_rate": 0.0004954955827932373,
      "loss": 3.0051,
      "step": 63148
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6157844066619873,
      "learning_rate": 0.0004954924800095162,
      "loss": 3.0341,
      "step": 63149
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6636450290679932,
      "learning_rate": 0.0004954893771894495,
      "loss": 2.9681,
      "step": 63150
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.37583327293396,
      "learning_rate": 0.0004954862743330376,
      "loss": 3.1238,
      "step": 63151
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6641712188720703,
      "learning_rate": 0.000495483171440281,
      "loss": 3.105,
      "step": 63152
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7651499509811401,
      "learning_rate": 0.0004954800685111806,
      "loss": 2.9102,
      "step": 63153
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.0563149452209473,
      "learning_rate": 0.0004954769655457366,
      "loss": 2.7612,
      "step": 63154
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.4382238388061523,
      "learning_rate": 0.0004954738625439497,
      "loss": 2.9381,
      "step": 63155
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.685807228088379,
      "learning_rate": 0.0004954707595058207,
      "loss": 2.9226,
      "step": 63156
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5474143028259277,
      "learning_rate": 0.0004954676564313499,
      "loss": 2.973,
      "step": 63157
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3641269207000732,
      "learning_rate": 0.000495464553320538,
      "loss": 3.112,
      "step": 63158
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.5686962604522705,
      "learning_rate": 0.0004954614501733854,
      "loss": 3.1663,
      "step": 63159
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2235517501831055,
      "learning_rate": 0.000495458346989893,
      "loss": 2.9842,
      "step": 63160
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1882755756378174,
      "learning_rate": 0.0004954552437700612,
      "loss": 3.0389,
      "step": 63161
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.781682014465332,
      "learning_rate": 0.0004954521405138905,
      "loss": 3.0719,
      "step": 63162
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.8373684883117676,
      "learning_rate": 0.0004954490372213817,
      "loss": 3.0266,
      "step": 63163
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.09759783744812,
      "learning_rate": 0.000495445933892535,
      "loss": 3.0153,
      "step": 63164
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2294325828552246,
      "learning_rate": 0.0004954428305273513,
      "loss": 3.1252,
      "step": 63165
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8088507652282715,
      "learning_rate": 0.0004954397271258312,
      "loss": 2.9087,
      "step": 63166
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8894039392471313,
      "learning_rate": 0.0004954366236879751,
      "loss": 3.1432,
      "step": 63167
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1035923957824707,
      "learning_rate": 0.0004954335202137836,
      "loss": 3.0845,
      "step": 63168
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4563941955566406,
      "learning_rate": 0.0004954304167032573,
      "loss": 3.1317,
      "step": 63169
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8152194023132324,
      "learning_rate": 0.0004954273131563969,
      "loss": 2.95,
      "step": 63170
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7610629796981812,
      "learning_rate": 0.0004954242095732028,
      "loss": 2.9682,
      "step": 63171
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3995767831802368,
      "learning_rate": 0.0004954211059536757,
      "loss": 3.1294,
      "step": 63172
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8923512697219849,
      "learning_rate": 0.0004954180022978161,
      "loss": 3.2254,
      "step": 63173
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4669029712677002,
      "learning_rate": 0.0004954148986056246,
      "loss": 3.0133,
      "step": 63174
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3638298511505127,
      "learning_rate": 0.0004954117948771018,
      "loss": 2.9654,
      "step": 63175
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.575374960899353,
      "learning_rate": 0.0004954086911122483,
      "loss": 2.9779,
      "step": 63176
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.4944398403167725,
      "learning_rate": 0.0004954055873110645,
      "loss": 2.9737,
      "step": 63177
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.229807734489441,
      "learning_rate": 0.0004954024834735513,
      "loss": 2.9406,
      "step": 63178
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3799203634262085,
      "learning_rate": 0.0004953993795997089,
      "loss": 2.8631,
      "step": 63179
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2393598556518555,
      "learning_rate": 0.0004953962756895381,
      "loss": 3.1694,
      "step": 63180
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1705164909362793,
      "learning_rate": 0.0004953931717430396,
      "loss": 2.8383,
      "step": 63181
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6899805068969727,
      "learning_rate": 0.0004953900677602136,
      "loss": 3.0504,
      "step": 63182
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.907362461090088,
      "learning_rate": 0.0004953869637410611,
      "loss": 2.9382,
      "step": 63183
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4749211072921753,
      "learning_rate": 0.0004953838596855823,
      "loss": 3.1583,
      "step": 63184
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6166046857833862,
      "learning_rate": 0.0004953807555937781,
      "loss": 2.9947,
      "step": 63185
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4303288459777832,
      "learning_rate": 0.0004953776514656489,
      "loss": 3.0134,
      "step": 63186
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4166871309280396,
      "learning_rate": 0.0004953745473011953,
      "loss": 3.0806,
      "step": 63187
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6455076932907104,
      "learning_rate": 0.0004953714431004178,
      "loss": 2.8444,
      "step": 63188
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9928349256515503,
      "learning_rate": 0.0004953683388633171,
      "loss": 2.9207,
      "step": 63189
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8573940992355347,
      "learning_rate": 0.0004953652345898938,
      "loss": 3.2508,
      "step": 63190
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.533869981765747,
      "learning_rate": 0.0004953621302801485,
      "loss": 3.227,
      "step": 63191
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.484399437904358,
      "learning_rate": 0.0004953590259340814,
      "loss": 3.3328,
      "step": 63192
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2392687797546387,
      "learning_rate": 0.0004953559215516936,
      "loss": 2.898,
      "step": 63193
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5901570320129395,
      "learning_rate": 0.0004953528171329854,
      "loss": 2.8824,
      "step": 63194
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.176539659500122,
      "learning_rate": 0.0004953497126779574,
      "loss": 3.1828,
      "step": 63195
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.921399712562561,
      "learning_rate": 0.0004953466081866103,
      "loss": 3.3169,
      "step": 63196
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.820529818534851,
      "learning_rate": 0.0004953435036589444,
      "loss": 3.0859,
      "step": 63197
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6037181615829468,
      "learning_rate": 0.0004953403990949605,
      "loss": 2.9449,
      "step": 63198
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3997836112976074,
      "learning_rate": 0.0004953372944946592,
      "loss": 2.8429,
      "step": 63199
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.656562089920044,
      "learning_rate": 0.000495334189858041,
      "loss": 2.9383,
      "step": 63200
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.631239414215088,
      "learning_rate": 0.0004953310851851064,
      "loss": 3.1615,
      "step": 63201
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6274678707122803,
      "learning_rate": 0.0004953279804758562,
      "loss": 3.0867,
      "step": 63202
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.79922354221344,
      "learning_rate": 0.0004953248757302907,
      "loss": 3.0669,
      "step": 63203
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.334460496902466,
      "learning_rate": 0.0004953217709484108,
      "loss": 3.0236,
      "step": 63204
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8148553371429443,
      "learning_rate": 0.0004953186661302168,
      "loss": 2.8278,
      "step": 63205
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1080219745635986,
      "learning_rate": 0.0004953155612757094,
      "loss": 3.0524,
      "step": 63206
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.893724203109741,
      "learning_rate": 0.000495312456384889,
      "loss": 3.1352,
      "step": 63207
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9609313011169434,
      "learning_rate": 0.0004953093514577565,
      "loss": 2.9046,
      "step": 63208
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.539856195449829,
      "learning_rate": 0.0004953062464943122,
      "loss": 2.9526,
      "step": 63209
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0663673877716064,
      "learning_rate": 0.0004953031414945568,
      "loss": 2.905,
      "step": 63210
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.180628538131714,
      "learning_rate": 0.000495300036458491,
      "loss": 3.0053,
      "step": 63211
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.353905439376831,
      "learning_rate": 0.000495296931386115,
      "loss": 2.9835,
      "step": 63212
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8714202642440796,
      "learning_rate": 0.0004952938262774298,
      "loss": 3.1879,
      "step": 63213
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.293041944503784,
      "learning_rate": 0.0004952907211324357,
      "loss": 3.1778,
      "step": 63214
    },
    {
      "epoch": 0.82,
      "grad_norm": 4.817876815795898,
      "learning_rate": 0.0004952876159511335,
      "loss": 2.9428,
      "step": 63215
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8458107709884644,
      "learning_rate": 0.0004952845107335235,
      "loss": 3.206,
      "step": 63216
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9509942531585693,
      "learning_rate": 0.0004952814054796065,
      "loss": 3.0908,
      "step": 63217
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.114664316177368,
      "learning_rate": 0.0004952783001893829,
      "loss": 3.1975,
      "step": 63218
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9076508283615112,
      "learning_rate": 0.0004952751948628535,
      "loss": 2.9748,
      "step": 63219
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1965172290802,
      "learning_rate": 0.0004952720895000187,
      "loss": 3.0422,
      "step": 63220
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.582397699356079,
      "learning_rate": 0.0004952689841008791,
      "loss": 2.8113,
      "step": 63221
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7491025924682617,
      "learning_rate": 0.0004952658786654354,
      "loss": 3.0592,
      "step": 63222
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.83708655834198,
      "learning_rate": 0.0004952627731936879,
      "loss": 3.2419,
      "step": 63223
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4730814695358276,
      "learning_rate": 0.0004952596676856376,
      "loss": 3.0405,
      "step": 63224
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4960342645645142,
      "learning_rate": 0.0004952565621412847,
      "loss": 3.2297,
      "step": 63225
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.4849534034729004,
      "learning_rate": 0.00049525345656063,
      "loss": 2.9956,
      "step": 63226
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.458041787147522,
      "learning_rate": 0.000495250350943674,
      "loss": 3.1655,
      "step": 63227
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5047690868377686,
      "learning_rate": 0.0004952472452904171,
      "loss": 3.1566,
      "step": 63228
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.060713529586792,
      "learning_rate": 0.0004952441396008602,
      "loss": 2.7095,
      "step": 63229
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.9172825813293457,
      "learning_rate": 0.0004952410338750038,
      "loss": 2.9589,
      "step": 63230
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0950918197631836,
      "learning_rate": 0.0004952379281128482,
      "loss": 3.0566,
      "step": 63231
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6424129009246826,
      "learning_rate": 0.0004952348223143943,
      "loss": 3.1286,
      "step": 63232
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.7022061347961426,
      "learning_rate": 0.0004952317164796426,
      "loss": 3.0668,
      "step": 63233
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6332151889801025,
      "learning_rate": 0.0004952286106085935,
      "loss": 3.0514,
      "step": 63234
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9198203086853027,
      "learning_rate": 0.0004952255047012479,
      "loss": 3.1655,
      "step": 63235
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1544690132141113,
      "learning_rate": 0.0004952223987576062,
      "loss": 2.7562,
      "step": 63236
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5015904903411865,
      "learning_rate": 0.0004952192927776688,
      "loss": 3.0388,
      "step": 63237
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.725021481513977,
      "learning_rate": 0.0004952161867614364,
      "loss": 2.91,
      "step": 63238
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6887186765670776,
      "learning_rate": 0.0004952130807089099,
      "loss": 3.02,
      "step": 63239
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5094135999679565,
      "learning_rate": 0.0004952099746200894,
      "loss": 3.1069,
      "step": 63240
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.605249285697937,
      "learning_rate": 0.0004952068684949758,
      "loss": 2.9706,
      "step": 63241
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9830719232559204,
      "learning_rate": 0.0004952037623335695,
      "loss": 2.9223,
      "step": 63242
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.799104928970337,
      "learning_rate": 0.0004952006561358711,
      "loss": 3.0876,
      "step": 63243
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5936071872711182,
      "learning_rate": 0.0004951975499018813,
      "loss": 3.0427,
      "step": 63244
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.432504653930664,
      "learning_rate": 0.0004951944436316005,
      "loss": 3.1384,
      "step": 63245
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.692052960395813,
      "learning_rate": 0.0004951913373250293,
      "loss": 3.0806,
      "step": 63246
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8183599710464478,
      "learning_rate": 0.0004951882309821685,
      "loss": 3.0075,
      "step": 63247
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.800499677658081,
      "learning_rate": 0.0004951851246030185,
      "loss": 2.9737,
      "step": 63248
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5712354183197021,
      "learning_rate": 0.0004951820181875798,
      "loss": 3.0065,
      "step": 63249
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.469055652618408,
      "learning_rate": 0.0004951789117358532,
      "loss": 2.9674,
      "step": 63250
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.40999174118042,
      "learning_rate": 0.0004951758052478392,
      "loss": 3.0089,
      "step": 63251
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0574023723602295,
      "learning_rate": 0.0004951726987235382,
      "loss": 3.0738,
      "step": 63252
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0460641384124756,
      "learning_rate": 0.0004951695921629509,
      "loss": 3.0909,
      "step": 63253
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.4305450916290283,
      "learning_rate": 0.0004951664855660779,
      "loss": 2.9044,
      "step": 63254
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.540036916732788,
      "learning_rate": 0.0004951633789329199,
      "loss": 3.1936,
      "step": 63255
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.3610363006591797,
      "learning_rate": 0.0004951602722634772,
      "loss": 3.1203,
      "step": 63256
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4068224430084229,
      "learning_rate": 0.0004951571655577506,
      "loss": 3.0497,
      "step": 63257
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8053126335144043,
      "learning_rate": 0.0004951540588157405,
      "loss": 3.1329,
      "step": 63258
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9841843843460083,
      "learning_rate": 0.0004951509520374478,
      "loss": 2.8705,
      "step": 63259
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.240419387817383,
      "learning_rate": 0.0004951478452228726,
      "loss": 2.9049,
      "step": 63260
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6971923112869263,
      "learning_rate": 0.0004951447383720159,
      "loss": 3.2519,
      "step": 63261
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7710161209106445,
      "learning_rate": 0.000495141631484878,
      "loss": 3.062,
      "step": 63262
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9871560335159302,
      "learning_rate": 0.0004951385245614596,
      "loss": 3.2058,
      "step": 63263
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6939260959625244,
      "learning_rate": 0.0004951354176017613,
      "loss": 3.0236,
      "step": 63264
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.113057851791382,
      "learning_rate": 0.0004951323106057837,
      "loss": 3.0828,
      "step": 63265
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6015613079071045,
      "learning_rate": 0.0004951292035735273,
      "loss": 2.8888,
      "step": 63266
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3841116428375244,
      "learning_rate": 0.0004951260965049926,
      "loss": 3.0822,
      "step": 63267
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8650376796722412,
      "learning_rate": 0.0004951229894001803,
      "loss": 2.7799,
      "step": 63268
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5259594917297363,
      "learning_rate": 0.000495119882259091,
      "loss": 2.8622,
      "step": 63269
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.5448684692382812,
      "learning_rate": 0.0004951167750817252,
      "loss": 3.0511,
      "step": 63270
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6368056535720825,
      "learning_rate": 0.0004951136678680835,
      "loss": 3.1551,
      "step": 63271
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4548035860061646,
      "learning_rate": 0.0004951105606181666,
      "loss": 3.0267,
      "step": 63272
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.663687825202942,
      "learning_rate": 0.0004951074533319749,
      "loss": 2.9823,
      "step": 63273
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.721522331237793,
      "learning_rate": 0.000495104346009509,
      "loss": 3.3223,
      "step": 63274
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6510372161865234,
      "learning_rate": 0.0004951012386507695,
      "loss": 2.9266,
      "step": 63275
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.558286190032959,
      "learning_rate": 0.0004950981312557569,
      "loss": 3.205,
      "step": 63276
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6802233457565308,
      "learning_rate": 0.0004950950238244721,
      "loss": 3.1498,
      "step": 63277
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4845833778381348,
      "learning_rate": 0.0004950919163569154,
      "loss": 3.0881,
      "step": 63278
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3926434516906738,
      "learning_rate": 0.0004950888088530875,
      "loss": 3.0988,
      "step": 63279
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.7573838233947754,
      "learning_rate": 0.0004950857013129886,
      "loss": 2.9022,
      "step": 63280
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8043279647827148,
      "learning_rate": 0.0004950825937366199,
      "loss": 3.086,
      "step": 63281
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6741533279418945,
      "learning_rate": 0.0004950794861239815,
      "loss": 3.0698,
      "step": 63282
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7353947162628174,
      "learning_rate": 0.0004950763784750742,
      "loss": 2.9628,
      "step": 63283
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.236833095550537,
      "learning_rate": 0.0004950732707898985,
      "loss": 2.9069,
      "step": 63284
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6803513765335083,
      "learning_rate": 0.000495070163068455,
      "loss": 3.0038,
      "step": 63285
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7905598878860474,
      "learning_rate": 0.0004950670553107442,
      "loss": 2.9732,
      "step": 63286
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0086288452148438,
      "learning_rate": 0.0004950639475167669,
      "loss": 2.912,
      "step": 63287
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6828581094741821,
      "learning_rate": 0.0004950608396865234,
      "loss": 3.0382,
      "step": 63288
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9331032037734985,
      "learning_rate": 0.0004950577318200144,
      "loss": 2.847,
      "step": 63289
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7239652872085571,
      "learning_rate": 0.0004950546239172407,
      "loss": 2.9883,
      "step": 63290
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7140564918518066,
      "learning_rate": 0.0004950515159782025,
      "loss": 3.0919,
      "step": 63291
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.186663866043091,
      "learning_rate": 0.0004950484080029006,
      "loss": 3.2537,
      "step": 63292
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7947535514831543,
      "learning_rate": 0.0004950452999913354,
      "loss": 2.8402,
      "step": 63293
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.538205623626709,
      "learning_rate": 0.0004950421919435076,
      "loss": 3.0694,
      "step": 63294
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.314030647277832,
      "learning_rate": 0.000495039083859418,
      "loss": 2.8699,
      "step": 63295
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.477112889289856,
      "learning_rate": 0.0004950359757390668,
      "loss": 3.0717,
      "step": 63296
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.7541725635528564,
      "learning_rate": 0.0004950328675824547,
      "loss": 2.9076,
      "step": 63297
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.6361539363861084,
      "learning_rate": 0.0004950297593895823,
      "loss": 2.8632,
      "step": 63298
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6455658674240112,
      "learning_rate": 0.0004950266511604502,
      "loss": 3.0052,
      "step": 63299
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.3906636238098145,
      "learning_rate": 0.000495023542895059,
      "loss": 3.1124,
      "step": 63300
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.060041904449463,
      "learning_rate": 0.0004950204345934092,
      "loss": 2.796,
      "step": 63301
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.540282130241394,
      "learning_rate": 0.0004950173262555015,
      "loss": 2.8499,
      "step": 63302
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4934883117675781,
      "learning_rate": 0.0004950142178813363,
      "loss": 2.824,
      "step": 63303
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.9929888248443604,
      "learning_rate": 0.0004950111094709144,
      "loss": 3.1256,
      "step": 63304
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4981597661972046,
      "learning_rate": 0.0004950080010242362,
      "loss": 3.0654,
      "step": 63305
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1171956062316895,
      "learning_rate": 0.0004950048925413023,
      "loss": 2.9866,
      "step": 63306
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.871801733970642,
      "learning_rate": 0.0004950017840221133,
      "loss": 3.0808,
      "step": 63307
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6513859033584595,
      "learning_rate": 0.0004949986754666698,
      "loss": 2.9304,
      "step": 63308
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.4189560413360596,
      "learning_rate": 0.0004949955668749724,
      "loss": 3.2518,
      "step": 63309
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.1960361003875732,
      "learning_rate": 0.0004949924582470216,
      "loss": 2.9593,
      "step": 63310
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8796958923339844,
      "learning_rate": 0.000494989349582818,
      "loss": 3.1812,
      "step": 63311
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3237639665603638,
      "learning_rate": 0.0004949862408823623,
      "loss": 3.083,
      "step": 63312
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7379379272460938,
      "learning_rate": 0.0004949831321456549,
      "loss": 3.0272,
      "step": 63313
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6159933805465698,
      "learning_rate": 0.0004949800233726965,
      "loss": 2.9726,
      "step": 63314
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6635773181915283,
      "learning_rate": 0.0004949769145634875,
      "loss": 3.2763,
      "step": 63315
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5528680086135864,
      "learning_rate": 0.0004949738057180288,
      "loss": 2.9654,
      "step": 63316
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.3779079914093018,
      "learning_rate": 0.0004949706968363206,
      "loss": 2.9291,
      "step": 63317
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4008533954620361,
      "learning_rate": 0.0004949675879183639,
      "loss": 3.1372,
      "step": 63318
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.625401496887207,
      "learning_rate": 0.0004949644789641588,
      "loss": 2.888,
      "step": 63319
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6442482471466064,
      "learning_rate": 0.0004949613699737063,
      "loss": 3.3196,
      "step": 63320
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.8201874494552612,
      "learning_rate": 0.0004949582609470067,
      "loss": 2.8973,
      "step": 63321
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.580336093902588,
      "learning_rate": 0.0004949551518840607,
      "loss": 3.1234,
      "step": 63322
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5343362092971802,
      "learning_rate": 0.0004949520427848689,
      "loss": 3.0487,
      "step": 63323
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4911147356033325,
      "learning_rate": 0.0004949489336494319,
      "loss": 3.137,
      "step": 63324
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3790092468261719,
      "learning_rate": 0.0004949458244777501,
      "loss": 3.0596,
      "step": 63325
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4721940755844116,
      "learning_rate": 0.0004949427152698241,
      "loss": 2.8296,
      "step": 63326
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6010441780090332,
      "learning_rate": 0.0004949396060256548,
      "loss": 2.8988,
      "step": 63327
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4639577865600586,
      "learning_rate": 0.0004949364967452423,
      "loss": 3.1472,
      "step": 63328
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5179908275604248,
      "learning_rate": 0.0004949333874285876,
      "loss": 2.9459,
      "step": 63329
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3297507762908936,
      "learning_rate": 0.0004949302780756911,
      "loss": 3.0577,
      "step": 63330
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4774292707443237,
      "learning_rate": 0.0004949271686865534,
      "loss": 2.9216,
      "step": 63331
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.646584391593933,
      "learning_rate": 0.000494924059261175,
      "loss": 3.1479,
      "step": 63332
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4350879192352295,
      "learning_rate": 0.0004949209497995565,
      "loss": 3.1832,
      "step": 63333
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3879536390304565,
      "learning_rate": 0.0004949178403016985,
      "loss": 2.9635,
      "step": 63334
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7081369161605835,
      "learning_rate": 0.0004949147307676016,
      "loss": 3.0914,
      "step": 63335
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3588606119155884,
      "learning_rate": 0.0004949116211972666,
      "loss": 3.1627,
      "step": 63336
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5292195081710815,
      "learning_rate": 0.0004949085115906936,
      "loss": 3.0432,
      "step": 63337
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6258342266082764,
      "learning_rate": 0.0004949054019478835,
      "loss": 2.9526,
      "step": 63338
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6550782918930054,
      "learning_rate": 0.0004949022922688368,
      "loss": 2.9424,
      "step": 63339
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6030464172363281,
      "learning_rate": 0.0004948991825535541,
      "loss": 2.872,
      "step": 63340
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2477645874023438,
      "learning_rate": 0.0004948960728020359,
      "loss": 2.8971,
      "step": 63341
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.4153614044189453,
      "learning_rate": 0.000494892963014283,
      "loss": 2.8766,
      "step": 63342
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.1684629917144775,
      "learning_rate": 0.0004948898531902956,
      "loss": 3.1315,
      "step": 63343
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.3346269130706787,
      "learning_rate": 0.0004948867433300746,
      "loss": 2.6865,
      "step": 63344
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.315732717514038,
      "learning_rate": 0.0004948836334336205,
      "loss": 3.1377,
      "step": 63345
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.134375810623169,
      "learning_rate": 0.000494880523500934,
      "loss": 2.9149,
      "step": 63346
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5009602308273315,
      "learning_rate": 0.0004948774135320153,
      "loss": 2.8588,
      "step": 63347
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.198115825653076,
      "learning_rate": 0.0004948743035268653,
      "loss": 3.0642,
      "step": 63348
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.0184810161590576,
      "learning_rate": 0.0004948711934854845,
      "loss": 3.1888,
      "step": 63349
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5366036891937256,
      "learning_rate": 0.0004948680834078734,
      "loss": 3.0406,
      "step": 63350
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.7370949983596802,
      "learning_rate": 0.0004948649732940326,
      "loss": 2.8916,
      "step": 63351
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2814483642578125,
      "learning_rate": 0.000494861863143963,
      "loss": 3.0374,
      "step": 63352
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.5691490173339844,
      "learning_rate": 0.0004948587529576646,
      "loss": 2.9446,
      "step": 63353
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.6152300834655762,
      "learning_rate": 0.0004948556427351383,
      "loss": 3.0002,
      "step": 63354
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.69105863571167,
      "learning_rate": 0.0004948525324763849,
      "loss": 3.0613,
      "step": 63355
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.614607572555542,
      "learning_rate": 0.0004948494221814045,
      "loss": 2.9898,
      "step": 63356
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2710812091827393,
      "learning_rate": 0.0004948463118501981,
      "loss": 2.9969,
      "step": 63357
    },
    {
      "epoch": 0.82,
      "grad_norm": 2.2867414951324463,
      "learning_rate": 0.0004948432014827659,
      "loss": 2.9237,
      "step": 63358
    },
    {
      "epoch": 0.82,
      "grad_norm": 1.712647557258606,
      "learning_rate": 0.0004948400910791087,
      "loss": 2.8934,
      "step": 63359
    },
    {
      "epoch": 0.82,
      "grad_norm": 3.193375825881958,
      "learning_rate": 0.0004948369806392272,
      "loss": 3.0498,
      "step": 63360
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.693737268447876,
      "learning_rate": 0.0004948338701631218,
      "loss": 3.0674,
      "step": 63361
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6508593559265137,
      "learning_rate": 0.0004948307596507929,
      "loss": 3.0687,
      "step": 63362
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4811750650405884,
      "learning_rate": 0.0004948276491022415,
      "loss": 2.9222,
      "step": 63363
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.1979188919067383,
      "learning_rate": 0.000494824538517468,
      "loss": 3.0116,
      "step": 63364
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.8445184230804443,
      "learning_rate": 0.0004948214278964727,
      "loss": 3.245,
      "step": 63365
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5846365690231323,
      "learning_rate": 0.0004948183172392565,
      "loss": 2.7124,
      "step": 63366
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4443066120147705,
      "learning_rate": 0.00049481520654582,
      "loss": 3.1946,
      "step": 63367
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6189244985580444,
      "learning_rate": 0.0004948120958161637,
      "loss": 3.003,
      "step": 63368
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.218113660812378,
      "learning_rate": 0.000494808985050288,
      "loss": 2.6872,
      "step": 63369
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.034646987915039,
      "learning_rate": 0.0004948058742481937,
      "loss": 3.2664,
      "step": 63370
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.840574026107788,
      "learning_rate": 0.0004948027634098813,
      "loss": 3.307,
      "step": 63371
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7851942777633667,
      "learning_rate": 0.0004947996525353515,
      "loss": 3.1766,
      "step": 63372
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.556997299194336,
      "learning_rate": 0.0004947965416246046,
      "loss": 2.9381,
      "step": 63373
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.161804676055908,
      "learning_rate": 0.0004947934306776414,
      "loss": 2.9338,
      "step": 63374
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4892851114273071,
      "learning_rate": 0.0004947903196944625,
      "loss": 3.0654,
      "step": 63375
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8176409006118774,
      "learning_rate": 0.0004947872086750683,
      "loss": 3.2235,
      "step": 63376
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6479281187057495,
      "learning_rate": 0.0004947840976194596,
      "loss": 3.0555,
      "step": 63377
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7539982795715332,
      "learning_rate": 0.0004947809865276367,
      "loss": 2.8001,
      "step": 63378
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7541550397872925,
      "learning_rate": 0.0004947778753996005,
      "loss": 2.9301,
      "step": 63379
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.502625584602356,
      "learning_rate": 0.0004947747642353513,
      "loss": 2.8512,
      "step": 63380
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7591526508331299,
      "learning_rate": 0.0004947716530348898,
      "loss": 3.2844,
      "step": 63381
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.667421579360962,
      "learning_rate": 0.0004947685417982167,
      "loss": 3.1307,
      "step": 63382
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.4783987998962402,
      "learning_rate": 0.0004947654305253323,
      "loss": 2.9206,
      "step": 63383
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0897376537323,
      "learning_rate": 0.0004947623192162374,
      "loss": 3.03,
      "step": 63384
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.42695152759552,
      "learning_rate": 0.0004947592078709326,
      "loss": 2.8375,
      "step": 63385
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.061774492263794,
      "learning_rate": 0.0004947560964894183,
      "loss": 2.9545,
      "step": 63386
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.3269805908203125,
      "learning_rate": 0.000494752985071695,
      "loss": 2.8544,
      "step": 63387
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2426509857177734,
      "learning_rate": 0.0004947498736177638,
      "loss": 2.9197,
      "step": 63388
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.717355489730835,
      "learning_rate": 0.0004947467621276246,
      "loss": 3.1451,
      "step": 63389
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.925673007965088,
      "learning_rate": 0.0004947436506012784,
      "loss": 2.9443,
      "step": 63390
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.3046460151672363,
      "learning_rate": 0.0004947405390387258,
      "loss": 2.9539,
      "step": 63391
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.449431896209717,
      "learning_rate": 0.0004947374274399671,
      "loss": 3.1078,
      "step": 63392
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9135788679122925,
      "learning_rate": 0.0004947343158050031,
      "loss": 2.8037,
      "step": 63393
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9523842334747314,
      "learning_rate": 0.0004947312041338343,
      "loss": 2.8408,
      "step": 63394
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.7100272178649902,
      "learning_rate": 0.0004947280924264613,
      "loss": 2.9973,
      "step": 63395
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.7326507568359375,
      "learning_rate": 0.0004947249806828846,
      "loss": 2.8565,
      "step": 63396
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.410017728805542,
      "learning_rate": 0.000494721868903105,
      "loss": 2.9989,
      "step": 63397
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4732081890106201,
      "learning_rate": 0.0004947187570871228,
      "loss": 3.0797,
      "step": 63398
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.9850945472717285,
      "learning_rate": 0.0004947156452349388,
      "loss": 3.1199,
      "step": 63399
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.6023941040039062,
      "learning_rate": 0.0004947125333465535,
      "loss": 3.11,
      "step": 63400
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.1338725090026855,
      "learning_rate": 0.0004947094214219674,
      "loss": 3.0042,
      "step": 63401
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.771223783493042,
      "learning_rate": 0.0004947063094611811,
      "loss": 3.0359,
      "step": 63402
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.11959171295166,
      "learning_rate": 0.0004947031974641953,
      "loss": 3.3388,
      "step": 63403
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.307495594024658,
      "learning_rate": 0.0004947000854310105,
      "loss": 2.977,
      "step": 63404
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2917990684509277,
      "learning_rate": 0.0004946969733616273,
      "loss": 3.0869,
      "step": 63405
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.4019362926483154,
      "learning_rate": 0.0004946938612560461,
      "loss": 3.1116,
      "step": 63406
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4134453535079956,
      "learning_rate": 0.0004946907491142678,
      "loss": 3.0031,
      "step": 63407
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.3240275382995605,
      "learning_rate": 0.0004946876369362927,
      "loss": 2.991,
      "step": 63408
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.6392688751220703,
      "learning_rate": 0.0004946845247221217,
      "loss": 2.957,
      "step": 63409
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.155329942703247,
      "learning_rate": 0.000494681412471755,
      "loss": 2.8742,
      "step": 63410
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.732223391532898,
      "learning_rate": 0.0004946783001851933,
      "loss": 2.942,
      "step": 63411
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.6135804653167725,
      "learning_rate": 0.0004946751878624374,
      "loss": 3.3178,
      "step": 63412
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.1460487842559814,
      "learning_rate": 0.0004946720755034875,
      "loss": 3.0073,
      "step": 63413
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4825217723846436,
      "learning_rate": 0.0004946689631083446,
      "loss": 3.078,
      "step": 63414
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.381087064743042,
      "learning_rate": 0.000494665850677009,
      "loss": 3.0798,
      "step": 63415
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.340823173522949,
      "learning_rate": 0.0004946627382094813,
      "loss": 3.1153,
      "step": 63416
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.745765209197998,
      "learning_rate": 0.000494659625705762,
      "loss": 3.179,
      "step": 63417
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6660735607147217,
      "learning_rate": 0.000494656513165852,
      "loss": 3.1482,
      "step": 63418
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6372939348220825,
      "learning_rate": 0.0004946534005897516,
      "loss": 2.6682,
      "step": 63419
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.396881103515625,
      "learning_rate": 0.0004946502879774616,
      "loss": 2.8726,
      "step": 63420
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.3767757415771484,
      "learning_rate": 0.0004946471753289822,
      "loss": 3.0406,
      "step": 63421
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5398204326629639,
      "learning_rate": 0.0004946440626443144,
      "loss": 2.9269,
      "step": 63422
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2209551334381104,
      "learning_rate": 0.0004946409499234584,
      "loss": 2.9554,
      "step": 63423
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.9508321285247803,
      "learning_rate": 0.0004946378371664151,
      "loss": 3.2216,
      "step": 63424
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.905877709388733,
      "learning_rate": 0.000494634724373185,
      "loss": 3.0338,
      "step": 63425
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.524254322052002,
      "learning_rate": 0.0004946316115437686,
      "loss": 2.958,
      "step": 63426
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.551657199859619,
      "learning_rate": 0.0004946284986781664,
      "loss": 3.1781,
      "step": 63427
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7074230909347534,
      "learning_rate": 0.0004946253857763792,
      "loss": 3.1218,
      "step": 63428
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.66397225856781,
      "learning_rate": 0.0004946222728384075,
      "loss": 2.9749,
      "step": 63429
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.565340518951416,
      "learning_rate": 0.0004946191598642518,
      "loss": 2.9459,
      "step": 63430
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.738875150680542,
      "learning_rate": 0.0004946160468539127,
      "loss": 2.9547,
      "step": 63431
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4571096897125244,
      "learning_rate": 0.0004946129338073907,
      "loss": 3.2491,
      "step": 63432
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.495172142982483,
      "learning_rate": 0.0004946098207246867,
      "loss": 3.0808,
      "step": 63433
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5894347429275513,
      "learning_rate": 0.0004946067076058009,
      "loss": 3.1933,
      "step": 63434
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4641222953796387,
      "learning_rate": 0.0004946035944507341,
      "loss": 3.1581,
      "step": 63435
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8438618183135986,
      "learning_rate": 0.0004946004812594868,
      "loss": 2.986,
      "step": 63436
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9151993989944458,
      "learning_rate": 0.0004945973680320597,
      "loss": 2.8229,
      "step": 63437
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5070124864578247,
      "learning_rate": 0.0004945942547684532,
      "loss": 2.7769,
      "step": 63438
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.595117449760437,
      "learning_rate": 0.0004945911414686679,
      "loss": 3.1766,
      "step": 63439
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6774871349334717,
      "learning_rate": 0.0004945880281327045,
      "loss": 3.0977,
      "step": 63440
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.6138978004455566,
      "learning_rate": 0.0004945849147605635,
      "loss": 2.9108,
      "step": 63441
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5504108667373657,
      "learning_rate": 0.0004945818013522454,
      "loss": 2.8955,
      "step": 63442
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.665002703666687,
      "learning_rate": 0.000494578687907751,
      "loss": 2.8846,
      "step": 63443
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6685421466827393,
      "learning_rate": 0.0004945755744270807,
      "loss": 3.1183,
      "step": 63444
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.15102219581604,
      "learning_rate": 0.0004945724609102351,
      "loss": 3.2472,
      "step": 63445
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3889814615249634,
      "learning_rate": 0.0004945693473572149,
      "loss": 2.7466,
      "step": 63446
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4513089656829834,
      "learning_rate": 0.0004945662337680205,
      "loss": 2.9255,
      "step": 63447
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6149327754974365,
      "learning_rate": 0.0004945631201426526,
      "loss": 3.0925,
      "step": 63448
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4564322233200073,
      "learning_rate": 0.0004945600064811117,
      "loss": 3.087,
      "step": 63449
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5656453371047974,
      "learning_rate": 0.0004945568927833985,
      "loss": 3.1099,
      "step": 63450
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4870140552520752,
      "learning_rate": 0.0004945537790495134,
      "loss": 3.2479,
      "step": 63451
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.734073281288147,
      "learning_rate": 0.0004945506652794571,
      "loss": 3.0126,
      "step": 63452
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.4212710857391357,
      "learning_rate": 0.0004945475514732302,
      "loss": 3.3088,
      "step": 63453
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6011772155761719,
      "learning_rate": 0.0004945444376308331,
      "loss": 3.2268,
      "step": 63454
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4737200736999512,
      "learning_rate": 0.0004945413237522668,
      "loss": 3.0042,
      "step": 63455
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9158862829208374,
      "learning_rate": 0.0004945382098375315,
      "loss": 3.0381,
      "step": 63456
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6549655199050903,
      "learning_rate": 0.0004945350958866277,
      "loss": 3.1419,
      "step": 63457
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8988436460494995,
      "learning_rate": 0.0004945319818995563,
      "loss": 2.9282,
      "step": 63458
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7103168964385986,
      "learning_rate": 0.0004945288678763177,
      "loss": 3.0532,
      "step": 63459
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9520028829574585,
      "learning_rate": 0.0004945257538169125,
      "loss": 3.2474,
      "step": 63460
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.729770541191101,
      "learning_rate": 0.0004945226397213414,
      "loss": 2.9435,
      "step": 63461
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5958763360977173,
      "learning_rate": 0.0004945195255896047,
      "loss": 3.1816,
      "step": 63462
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6558786630630493,
      "learning_rate": 0.0004945164114217032,
      "loss": 2.9626,
      "step": 63463
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7386131286621094,
      "learning_rate": 0.0004945132972176375,
      "loss": 2.9631,
      "step": 63464
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7364277839660645,
      "learning_rate": 0.000494510182977408,
      "loss": 2.8626,
      "step": 63465
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9262338876724243,
      "learning_rate": 0.0004945070687010154,
      "loss": 3.3251,
      "step": 63466
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4613524675369263,
      "learning_rate": 0.0004945039543884604,
      "loss": 3.08,
      "step": 63467
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6131765842437744,
      "learning_rate": 0.0004945008400397433,
      "loss": 2.9044,
      "step": 63468
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7469147443771362,
      "learning_rate": 0.0004944977256548649,
      "loss": 3.0736,
      "step": 63469
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.823586106300354,
      "learning_rate": 0.0004944946112338257,
      "loss": 3.0038,
      "step": 63470
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6855852603912354,
      "learning_rate": 0.0004944914967766261,
      "loss": 3.2463,
      "step": 63471
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.570618748664856,
      "learning_rate": 0.0004944883822832671,
      "loss": 2.8189,
      "step": 63472
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6581753492355347,
      "learning_rate": 0.000494485267753749,
      "loss": 3.1776,
      "step": 63473
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.052032709121704,
      "learning_rate": 0.0004944821531880723,
      "loss": 2.9834,
      "step": 63474
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7684144973754883,
      "learning_rate": 0.0004944790385862378,
      "loss": 2.9568,
      "step": 63475
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9865612983703613,
      "learning_rate": 0.0004944759239482459,
      "loss": 3.0039,
      "step": 63476
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5867388248443604,
      "learning_rate": 0.0004944728092740973,
      "loss": 2.9132,
      "step": 63477
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5708369016647339,
      "learning_rate": 0.0004944696945637925,
      "loss": 3.0958,
      "step": 63478
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3001620769500732,
      "learning_rate": 0.0004944665798173322,
      "loss": 3.1562,
      "step": 63479
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3141403198242188,
      "learning_rate": 0.0004944634650347169,
      "loss": 3.1322,
      "step": 63480
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.973204255104065,
      "learning_rate": 0.000494460350215947,
      "loss": 3.044,
      "step": 63481
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.479349136352539,
      "learning_rate": 0.0004944572353610234,
      "loss": 3.1672,
      "step": 63482
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9313788414001465,
      "learning_rate": 0.0004944541204699465,
      "loss": 3.0283,
      "step": 63483
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.774247407913208,
      "learning_rate": 0.0004944510055427169,
      "loss": 3.0123,
      "step": 63484
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4465221166610718,
      "learning_rate": 0.0004944478905793351,
      "loss": 2.993,
      "step": 63485
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5151265859603882,
      "learning_rate": 0.0004944447755798019,
      "loss": 3.161,
      "step": 63486
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4914898872375488,
      "learning_rate": 0.0004944416605441176,
      "loss": 3.1128,
      "step": 63487
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4696837663650513,
      "learning_rate": 0.000494438545472283,
      "loss": 2.9177,
      "step": 63488
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6652382612228394,
      "learning_rate": 0.0004944354303642987,
      "loss": 3.2356,
      "step": 63489
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3197752237319946,
      "learning_rate": 0.0004944323152201651,
      "loss": 3.2514,
      "step": 63490
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.534913420677185,
      "learning_rate": 0.0004944292000398829,
      "loss": 3.1766,
      "step": 63491
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4704631567001343,
      "learning_rate": 0.0004944260848234526,
      "loss": 3.1093,
      "step": 63492
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5909940004348755,
      "learning_rate": 0.0004944229695708748,
      "loss": 3.0729,
      "step": 63493
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7598223686218262,
      "learning_rate": 0.0004944198542821501,
      "loss": 3.1373,
      "step": 63494
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.4316232204437256,
      "learning_rate": 0.0004944167389572792,
      "loss": 3.088,
      "step": 63495
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.174074411392212,
      "learning_rate": 0.0004944136235962624,
      "loss": 2.8577,
      "step": 63496
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.1464173793792725,
      "learning_rate": 0.0004944105081991005,
      "loss": 3.1853,
      "step": 63497
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.8451759815216064,
      "learning_rate": 0.000494407392765794,
      "loss": 2.9417,
      "step": 63498
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7749723196029663,
      "learning_rate": 0.0004944042772963435,
      "loss": 2.9808,
      "step": 63499
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.4357833862304688,
      "learning_rate": 0.0004944011617907496,
      "loss": 2.9568,
      "step": 63500
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4679791927337646,
      "learning_rate": 0.0004943980462490128,
      "loss": 2.9661,
      "step": 63501
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9277501106262207,
      "learning_rate": 0.0004943949306711338,
      "loss": 3.3863,
      "step": 63502
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7578630447387695,
      "learning_rate": 0.0004943918150571131,
      "loss": 3.0051,
      "step": 63503
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8111555576324463,
      "learning_rate": 0.0004943886994069513,
      "loss": 3.2905,
      "step": 63504
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.585296869277954,
      "learning_rate": 0.0004943855837206489,
      "loss": 2.892,
      "step": 63505
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.849668025970459,
      "learning_rate": 0.0004943824679982066,
      "loss": 3.4423,
      "step": 63506
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6749746799468994,
      "learning_rate": 0.000494379352239625,
      "loss": 3.2841,
      "step": 63507
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0039772987365723,
      "learning_rate": 0.0004943762364449046,
      "loss": 3.0238,
      "step": 63508
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5847526788711548,
      "learning_rate": 0.0004943731206140459,
      "loss": 3.019,
      "step": 63509
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3452788591384888,
      "learning_rate": 0.0004943700047470496,
      "loss": 2.9292,
      "step": 63510
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.00020694732666,
      "learning_rate": 0.0004943668888439162,
      "loss": 3.1151,
      "step": 63511
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8122835159301758,
      "learning_rate": 0.0004943637729046465,
      "loss": 2.9821,
      "step": 63512
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.744459867477417,
      "learning_rate": 0.0004943606569292407,
      "loss": 3.2069,
      "step": 63513
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.573085069656372,
      "learning_rate": 0.0004943575409176997,
      "loss": 3.1391,
      "step": 63514
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.408738374710083,
      "learning_rate": 0.000494354424870024,
      "loss": 3.0117,
      "step": 63515
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5549485683441162,
      "learning_rate": 0.000494351308786214,
      "loss": 2.9578,
      "step": 63516
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.011439323425293,
      "learning_rate": 0.0004943481926662704,
      "loss": 2.7647,
      "step": 63517
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6897214651107788,
      "learning_rate": 0.000494345076510194,
      "loss": 2.9717,
      "step": 63518
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.269691228866577,
      "learning_rate": 0.0004943419603179851,
      "loss": 3.1954,
      "step": 63519
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7738046646118164,
      "learning_rate": 0.0004943388440896443,
      "loss": 3.1127,
      "step": 63520
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.445786714553833,
      "learning_rate": 0.0004943357278251722,
      "loss": 3.1966,
      "step": 63521
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0521159172058105,
      "learning_rate": 0.0004943326115245696,
      "loss": 3.1622,
      "step": 63522
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.5311806201934814,
      "learning_rate": 0.0004943294951878368,
      "loss": 2.9589,
      "step": 63523
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.6277527809143066,
      "learning_rate": 0.0004943263788149745,
      "loss": 3.2079,
      "step": 63524
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2290918827056885,
      "learning_rate": 0.0004943232624059832,
      "loss": 3.1228,
      "step": 63525
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3462239503860474,
      "learning_rate": 0.0004943201459608636,
      "loss": 2.8799,
      "step": 63526
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6199108362197876,
      "learning_rate": 0.0004943170294796162,
      "loss": 2.8947,
      "step": 63527
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7359592914581299,
      "learning_rate": 0.0004943139129622416,
      "loss": 3.1624,
      "step": 63528
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.937766194343567,
      "learning_rate": 0.0004943107964087404,
      "loss": 2.9982,
      "step": 63529
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7308446168899536,
      "learning_rate": 0.000494307679819113,
      "loss": 3.0289,
      "step": 63530
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2683587074279785,
      "learning_rate": 0.0004943045631933603,
      "loss": 2.9621,
      "step": 63531
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8431365489959717,
      "learning_rate": 0.0004943014465314826,
      "loss": 3.0707,
      "step": 63532
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6487876176834106,
      "learning_rate": 0.0004942983298334807,
      "loss": 3.1508,
      "step": 63533
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.550392508506775,
      "learning_rate": 0.0004942952130993551,
      "loss": 3.0443,
      "step": 63534
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6574864387512207,
      "learning_rate": 0.0004942920963291063,
      "loss": 3.1135,
      "step": 63535
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4956965446472168,
      "learning_rate": 0.0004942889795227349,
      "loss": 3.0027,
      "step": 63536
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4707552194595337,
      "learning_rate": 0.0004942858626802416,
      "loss": 3.0226,
      "step": 63537
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4304640293121338,
      "learning_rate": 0.0004942827458016267,
      "loss": 3.0719,
      "step": 63538
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9055169820785522,
      "learning_rate": 0.000494279628886891,
      "loss": 3.0153,
      "step": 63539
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.847902774810791,
      "learning_rate": 0.0004942765119360353,
      "loss": 2.6845,
      "step": 63540
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.452382206916809,
      "learning_rate": 0.0004942733949490597,
      "loss": 2.6124,
      "step": 63541
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.58865225315094,
      "learning_rate": 0.0004942702779259651,
      "loss": 3.2281,
      "step": 63542
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.028691291809082,
      "learning_rate": 0.0004942671608667519,
      "loss": 2.8871,
      "step": 63543
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.000687599182129,
      "learning_rate": 0.000494264043771421,
      "loss": 2.9843,
      "step": 63544
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0887391567230225,
      "learning_rate": 0.0004942609266399725,
      "loss": 2.9411,
      "step": 63545
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.054875612258911,
      "learning_rate": 0.0004942578094724073,
      "loss": 3.1943,
      "step": 63546
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5902031660079956,
      "learning_rate": 0.0004942546922687259,
      "loss": 3.0734,
      "step": 63547
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6190989017486572,
      "learning_rate": 0.0004942515750289289,
      "loss": 3.3535,
      "step": 63548
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.506862998008728,
      "learning_rate": 0.0004942484577530169,
      "loss": 3.0691,
      "step": 63549
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8771932125091553,
      "learning_rate": 0.0004942453404409904,
      "loss": 2.949,
      "step": 63550
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5568193197250366,
      "learning_rate": 0.00049424222309285,
      "loss": 2.8825,
      "step": 63551
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.651881694793701,
      "learning_rate": 0.0004942391057085964,
      "loss": 3.136,
      "step": 63552
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7195290327072144,
      "learning_rate": 0.00049423598828823,
      "loss": 3.069,
      "step": 63553
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5952798128128052,
      "learning_rate": 0.0004942328708317515,
      "loss": 3.0331,
      "step": 63554
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4783400297164917,
      "learning_rate": 0.0004942297533391613,
      "loss": 2.8732,
      "step": 63555
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4636577367782593,
      "learning_rate": 0.0004942266358104603,
      "loss": 3.0629,
      "step": 63556
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7583717107772827,
      "learning_rate": 0.0004942235182456489,
      "loss": 2.957,
      "step": 63557
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.43220055103302,
      "learning_rate": 0.0004942204006447276,
      "loss": 2.9669,
      "step": 63558
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.536586046218872,
      "learning_rate": 0.0004942172830076972,
      "loss": 2.9442,
      "step": 63559
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6598844528198242,
      "learning_rate": 0.000494214165334558,
      "loss": 3.0302,
      "step": 63560
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6701483726501465,
      "learning_rate": 0.0004942110476253107,
      "loss": 2.9154,
      "step": 63561
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.703980803489685,
      "learning_rate": 0.000494207929879956,
      "loss": 2.9622,
      "step": 63562
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0198590755462646,
      "learning_rate": 0.0004942048120984943,
      "loss": 2.952,
      "step": 63563
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2013118267059326,
      "learning_rate": 0.0004942016942809263,
      "loss": 3.0381,
      "step": 63564
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4989980459213257,
      "learning_rate": 0.0004941985764272526,
      "loss": 3.0199,
      "step": 63565
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6537476778030396,
      "learning_rate": 0.0004941954585374736,
      "loss": 3.0609,
      "step": 63566
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4962687492370605,
      "learning_rate": 0.00049419234061159,
      "loss": 3.0145,
      "step": 63567
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4120885133743286,
      "learning_rate": 0.0004941892226496026,
      "loss": 2.9963,
      "step": 63568
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8879671096801758,
      "learning_rate": 0.0004941861046515115,
      "loss": 3.081,
      "step": 63569
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8484883308410645,
      "learning_rate": 0.0004941829866173176,
      "loss": 3.3368,
      "step": 63570
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.014556646347046,
      "learning_rate": 0.0004941798685470215,
      "loss": 3.1415,
      "step": 63571
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.53300940990448,
      "learning_rate": 0.0004941767504406236,
      "loss": 2.9676,
      "step": 63572
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.550408959388733,
      "learning_rate": 0.0004941736322981247,
      "loss": 3.0478,
      "step": 63573
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4242743253707886,
      "learning_rate": 0.0004941705141195251,
      "loss": 3.1783,
      "step": 63574
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6230223178863525,
      "learning_rate": 0.0004941673959048256,
      "loss": 3.0992,
      "step": 63575
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8454458713531494,
      "learning_rate": 0.0004941642776540268,
      "loss": 3.0158,
      "step": 63576
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.391742706298828,
      "learning_rate": 0.0004941611593671291,
      "loss": 3.0397,
      "step": 63577
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5749354362487793,
      "learning_rate": 0.0004941580410441331,
      "loss": 3.183,
      "step": 63578
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7269951105117798,
      "learning_rate": 0.0004941549226850396,
      "loss": 2.9555,
      "step": 63579
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0948028564453125,
      "learning_rate": 0.000494151804289849,
      "loss": 3.0777,
      "step": 63580
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.8067305088043213,
      "learning_rate": 0.0004941486858585619,
      "loss": 2.9416,
      "step": 63581
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9258815050125122,
      "learning_rate": 0.0004941455673911789,
      "loss": 2.8523,
      "step": 63582
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8513842821121216,
      "learning_rate": 0.0004941424488877005,
      "loss": 2.9089,
      "step": 63583
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9266467094421387,
      "learning_rate": 0.0004941393303481274,
      "loss": 2.8354,
      "step": 63584
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.843174695968628,
      "learning_rate": 0.0004941362117724602,
      "loss": 3.0718,
      "step": 63585
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8234342336654663,
      "learning_rate": 0.0004941330931606994,
      "loss": 2.955,
      "step": 63586
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0757014751434326,
      "learning_rate": 0.0004941299745128456,
      "loss": 2.9747,
      "step": 63587
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.687485933303833,
      "learning_rate": 0.0004941268558288993,
      "loss": 3.2192,
      "step": 63588
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4601390361785889,
      "learning_rate": 0.0004941237371088612,
      "loss": 3.0701,
      "step": 63589
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.030677556991577,
      "learning_rate": 0.0004941206183527319,
      "loss": 3.066,
      "step": 63590
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6208215951919556,
      "learning_rate": 0.0004941174995605117,
      "loss": 3.0884,
      "step": 63591
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5434261560440063,
      "learning_rate": 0.0004941143807322017,
      "loss": 2.9335,
      "step": 63592
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7640416622161865,
      "learning_rate": 0.000494111261867802,
      "loss": 2.9647,
      "step": 63593
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.329721450805664,
      "learning_rate": 0.0004941081429673133,
      "loss": 3.1078,
      "step": 63594
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4751927852630615,
      "learning_rate": 0.0004941050240307363,
      "loss": 3.0481,
      "step": 63595
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4735989570617676,
      "learning_rate": 0.0004941019050580715,
      "loss": 3.0313,
      "step": 63596
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4507811069488525,
      "learning_rate": 0.0004940987860493194,
      "loss": 2.9786,
      "step": 63597
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6350631713867188,
      "learning_rate": 0.0004940956670044809,
      "loss": 3.1131,
      "step": 63598
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7368543148040771,
      "learning_rate": 0.0004940925479235561,
      "loss": 3.0981,
      "step": 63599
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.648231863975525,
      "learning_rate": 0.000494089428806546,
      "loss": 3.063,
      "step": 63600
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4976483583450317,
      "learning_rate": 0.0004940863096534509,
      "loss": 2.9538,
      "step": 63601
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.3087496757507324,
      "learning_rate": 0.0004940831904642716,
      "loss": 2.9343,
      "step": 63602
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.8280019760131836,
      "learning_rate": 0.0004940800712390084,
      "loss": 2.9271,
      "step": 63603
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.654765009880066,
      "learning_rate": 0.0004940769519776623,
      "loss": 2.8774,
      "step": 63604
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.170027256011963,
      "learning_rate": 0.0004940738326802335,
      "loss": 2.93,
      "step": 63605
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7992832660675049,
      "learning_rate": 0.0004940707133467227,
      "loss": 3.1524,
      "step": 63606
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.634163737297058,
      "learning_rate": 0.0004940675939771304,
      "loss": 3.3453,
      "step": 63607
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.729935646057129,
      "learning_rate": 0.0004940644745714575,
      "loss": 3.2855,
      "step": 63608
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7280813455581665,
      "learning_rate": 0.0004940613551297042,
      "loss": 3.1083,
      "step": 63609
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.6916003227233887,
      "learning_rate": 0.0004940582356518713,
      "loss": 3.2135,
      "step": 63610
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.1954822540283203,
      "learning_rate": 0.0004940551161379592,
      "loss": 3.0616,
      "step": 63611
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2245161533355713,
      "learning_rate": 0.0004940519965879686,
      "loss": 2.8987,
      "step": 63612
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.8778131008148193,
      "learning_rate": 0.0004940488770019002,
      "loss": 3.0065,
      "step": 63613
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6740964651107788,
      "learning_rate": 0.0004940457573797544,
      "loss": 2.991,
      "step": 63614
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6838005781173706,
      "learning_rate": 0.0004940426377215318,
      "loss": 3.0815,
      "step": 63615
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6146754026412964,
      "learning_rate": 0.000494039518027233,
      "loss": 3.0096,
      "step": 63616
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7513189315795898,
      "learning_rate": 0.0004940363982968587,
      "loss": 3.0892,
      "step": 63617
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.4742329120635986,
      "learning_rate": 0.0004940332785304093,
      "loss": 2.9812,
      "step": 63618
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5566550493240356,
      "learning_rate": 0.0004940301587278853,
      "loss": 2.7662,
      "step": 63619
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.650112271308899,
      "learning_rate": 0.0004940270388892877,
      "loss": 3.1387,
      "step": 63620
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.5509138107299805,
      "learning_rate": 0.0004940239190146166,
      "loss": 3.256,
      "step": 63621
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.799371600151062,
      "learning_rate": 0.0004940207991038729,
      "loss": 3.0614,
      "step": 63622
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4341700077056885,
      "learning_rate": 0.000494017679157057,
      "loss": 2.957,
      "step": 63623
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4498454332351685,
      "learning_rate": 0.0004940145591741696,
      "loss": 2.9672,
      "step": 63624
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9860059022903442,
      "learning_rate": 0.0004940114391552113,
      "loss": 3.173,
      "step": 63625
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.847866177558899,
      "learning_rate": 0.0004940083191001825,
      "loss": 2.9069,
      "step": 63626
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.601685881614685,
      "learning_rate": 0.000494005199009084,
      "loss": 3.1006,
      "step": 63627
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7950663566589355,
      "learning_rate": 0.0004940020788819161,
      "loss": 2.9535,
      "step": 63628
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.687530755996704,
      "learning_rate": 0.0004939989587186797,
      "loss": 2.8686,
      "step": 63629
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5581077337265015,
      "learning_rate": 0.0004939958385193751,
      "loss": 2.9967,
      "step": 63630
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9120519161224365,
      "learning_rate": 0.0004939927182840031,
      "loss": 3.1341,
      "step": 63631
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.514097213745117,
      "learning_rate": 0.0004939895980125643,
      "loss": 2.8972,
      "step": 63632
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5766264200210571,
      "learning_rate": 0.000493986477705059,
      "loss": 2.757,
      "step": 63633
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.554105281829834,
      "learning_rate": 0.000493983357361488,
      "loss": 3.0608,
      "step": 63634
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6424269676208496,
      "learning_rate": 0.0004939802369818518,
      "loss": 2.8213,
      "step": 63635
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.43869948387146,
      "learning_rate": 0.000493977116566151,
      "loss": 3.0061,
      "step": 63636
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6400285959243774,
      "learning_rate": 0.0004939739961143863,
      "loss": 3.1061,
      "step": 63637
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4959617853164673,
      "learning_rate": 0.0004939708756265581,
      "loss": 3.4064,
      "step": 63638
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0041160583496094,
      "learning_rate": 0.000493967755102667,
      "loss": 2.8256,
      "step": 63639
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3170245885849,
      "learning_rate": 0.0004939646345427138,
      "loss": 3.2398,
      "step": 63640
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7902717590332031,
      "learning_rate": 0.0004939615139466988,
      "loss": 2.9684,
      "step": 63641
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7860809564590454,
      "learning_rate": 0.0004939583933146227,
      "loss": 2.7813,
      "step": 63642
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2913360595703125,
      "learning_rate": 0.000493955272646486,
      "loss": 3.293,
      "step": 63643
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2579227685928345,
      "learning_rate": 0.0004939521519422896,
      "loss": 2.9134,
      "step": 63644
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.511659860610962,
      "learning_rate": 0.0004939490312020336,
      "loss": 3.1758,
      "step": 63645
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.4348397254943848,
      "learning_rate": 0.0004939459104257188,
      "loss": 3.1401,
      "step": 63646
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7490595579147339,
      "learning_rate": 0.000493942789613346,
      "loss": 2.9973,
      "step": 63647
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5083791017532349,
      "learning_rate": 0.0004939396687649154,
      "loss": 2.9427,
      "step": 63648
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.6813952922821045,
      "learning_rate": 0.0004939365478804278,
      "loss": 3.2325,
      "step": 63649
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4278686046600342,
      "learning_rate": 0.0004939334269598837,
      "loss": 2.9662,
      "step": 63650
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5768084526062012,
      "learning_rate": 0.0004939303060032838,
      "loss": 3.094,
      "step": 63651
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.288100481033325,
      "learning_rate": 0.0004939271850106285,
      "loss": 2.9296,
      "step": 63652
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.372615337371826,
      "learning_rate": 0.0004939240639819186,
      "loss": 3.2054,
      "step": 63653
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6389307975769043,
      "learning_rate": 0.0004939209429171544,
      "loss": 3.1831,
      "step": 63654
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.822397232055664,
      "learning_rate": 0.0004939178218163367,
      "loss": 2.9462,
      "step": 63655
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.092881679534912,
      "learning_rate": 0.0004939147006794661,
      "loss": 3.1148,
      "step": 63656
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6354589462280273,
      "learning_rate": 0.000493911579506543,
      "loss": 2.8671,
      "step": 63657
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2817816734313965,
      "learning_rate": 0.0004939084582975681,
      "loss": 3.229,
      "step": 63658
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0427095890045166,
      "learning_rate": 0.000493905337052542,
      "loss": 2.9047,
      "step": 63659
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4794013500213623,
      "learning_rate": 0.0004939022157714652,
      "loss": 2.8877,
      "step": 63660
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8374427556991577,
      "learning_rate": 0.0004938990944543383,
      "loss": 3.2112,
      "step": 63661
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6902270317077637,
      "learning_rate": 0.000493895973101162,
      "loss": 2.9986,
      "step": 63662
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9668253660202026,
      "learning_rate": 0.0004938928517119367,
      "loss": 2.9632,
      "step": 63663
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.476589322090149,
      "learning_rate": 0.000493889730286663,
      "loss": 3.261,
      "step": 63664
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5280132293701172,
      "learning_rate": 0.0004938866088253416,
      "loss": 3.0799,
      "step": 63665
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4678943157196045,
      "learning_rate": 0.0004938834873279731,
      "loss": 2.9172,
      "step": 63666
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6813489198684692,
      "learning_rate": 0.0004938803657945579,
      "loss": 2.9108,
      "step": 63667
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5106585025787354,
      "learning_rate": 0.0004938772442250966,
      "loss": 2.6419,
      "step": 63668
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7854069471359253,
      "learning_rate": 0.00049387412261959,
      "loss": 3.0222,
      "step": 63669
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5367807149887085,
      "learning_rate": 0.0004938710009780385,
      "loss": 2.9732,
      "step": 63670
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4228923320770264,
      "learning_rate": 0.0004938678793004427,
      "loss": 2.977,
      "step": 63671
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6659730672836304,
      "learning_rate": 0.0004938647575868032,
      "loss": 2.9735,
      "step": 63672
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3622238636016846,
      "learning_rate": 0.0004938616358371206,
      "loss": 2.832,
      "step": 63673
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5274064540863037,
      "learning_rate": 0.0004938585140513955,
      "loss": 3.1092,
      "step": 63674
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.120199680328369,
      "learning_rate": 0.0004938553922296284,
      "loss": 3.0831,
      "step": 63675
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6931607723236084,
      "learning_rate": 0.0004938522703718199,
      "loss": 3.3005,
      "step": 63676
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.129373788833618,
      "learning_rate": 0.0004938491484779707,
      "loss": 3.1679,
      "step": 63677
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9188727140426636,
      "learning_rate": 0.0004938460265480812,
      "loss": 3.0,
      "step": 63678
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5929189920425415,
      "learning_rate": 0.000493842904582152,
      "loss": 2.8977,
      "step": 63679
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.48720645904541,
      "learning_rate": 0.0004938397825801839,
      "loss": 3.0982,
      "step": 63680
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.685286045074463,
      "learning_rate": 0.0004938366605421773,
      "loss": 3.1231,
      "step": 63681
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7992713451385498,
      "learning_rate": 0.0004938335384681327,
      "loss": 2.9524,
      "step": 63682
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9229909181594849,
      "learning_rate": 0.0004938304163580509,
      "loss": 3.0566,
      "step": 63683
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8479690551757812,
      "learning_rate": 0.0004938272942119322,
      "loss": 3.099,
      "step": 63684
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4105219841003418,
      "learning_rate": 0.0004938241720297776,
      "loss": 2.882,
      "step": 63685
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9396073818206787,
      "learning_rate": 0.0004938210498115872,
      "loss": 3.0059,
      "step": 63686
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3474735021591187,
      "learning_rate": 0.0004938179275573619,
      "loss": 3.0577,
      "step": 63687
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7365707159042358,
      "learning_rate": 0.0004938148052671022,
      "loss": 3.3205,
      "step": 63688
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6158795356750488,
      "learning_rate": 0.0004938116829408088,
      "loss": 3.0832,
      "step": 63689
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4806619882583618,
      "learning_rate": 0.0004938085605784818,
      "loss": 3.0808,
      "step": 63690
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6746916770935059,
      "learning_rate": 0.0004938054381801224,
      "loss": 3.1852,
      "step": 63691
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9183096885681152,
      "learning_rate": 0.0004938023157457309,
      "loss": 3.1032,
      "step": 63692
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.998741865158081,
      "learning_rate": 0.0004937991932753078,
      "loss": 3.2053,
      "step": 63693
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4504024982452393,
      "learning_rate": 0.0004937960707688538,
      "loss": 3.2879,
      "step": 63694
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.640478491783142,
      "learning_rate": 0.0004937929482263694,
      "loss": 3.0777,
      "step": 63695
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.114309072494507,
      "learning_rate": 0.0004937898256478554,
      "loss": 2.9071,
      "step": 63696
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.568444848060608,
      "learning_rate": 0.0004937867030333121,
      "loss": 2.9182,
      "step": 63697
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6350172758102417,
      "learning_rate": 0.0004937835803827402,
      "loss": 2.6906,
      "step": 63698
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5455405712127686,
      "learning_rate": 0.0004937804576961403,
      "loss": 2.9758,
      "step": 63699
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8670026063919067,
      "learning_rate": 0.0004937773349735128,
      "loss": 3.1599,
      "step": 63700
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8796947002410889,
      "learning_rate": 0.0004937742122148586,
      "loss": 3.3539,
      "step": 63701
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.032266855239868,
      "learning_rate": 0.0004937710894201781,
      "loss": 3.0828,
      "step": 63702
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5059711933135986,
      "learning_rate": 0.0004937679665894719,
      "loss": 3.0175,
      "step": 63703
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8755271434783936,
      "learning_rate": 0.0004937648437227406,
      "loss": 3.1895,
      "step": 63704
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8484777212142944,
      "learning_rate": 0.0004937617208199848,
      "loss": 2.8416,
      "step": 63705
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.613708257675171,
      "learning_rate": 0.0004937585978812048,
      "loss": 3.0015,
      "step": 63706
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4129570722579956,
      "learning_rate": 0.0004937554749064015,
      "loss": 3.1636,
      "step": 63707
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.716389536857605,
      "learning_rate": 0.0004937523518955756,
      "loss": 2.9619,
      "step": 63708
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5043151378631592,
      "learning_rate": 0.0004937492288487273,
      "loss": 2.8982,
      "step": 63709
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.26991605758667,
      "learning_rate": 0.0004937461057658574,
      "loss": 3.0244,
      "step": 63710
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.490268588066101,
      "learning_rate": 0.0004937429826469664,
      "loss": 3.1799,
      "step": 63711
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8100603818893433,
      "learning_rate": 0.0004937398594920551,
      "loss": 2.9749,
      "step": 63712
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4990395307540894,
      "learning_rate": 0.0004937367363011237,
      "loss": 3.071,
      "step": 63713
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7171974182128906,
      "learning_rate": 0.000493733613074173,
      "loss": 2.8644,
      "step": 63714
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4545117616653442,
      "learning_rate": 0.0004937304898112037,
      "loss": 3.0235,
      "step": 63715
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6514545679092407,
      "learning_rate": 0.0004937273665122162,
      "loss": 2.877,
      "step": 63716
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.744848370552063,
      "learning_rate": 0.0004937242431772111,
      "loss": 3.0066,
      "step": 63717
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4808062314987183,
      "learning_rate": 0.000493721119806189,
      "loss": 3.3864,
      "step": 63718
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5707085132598877,
      "learning_rate": 0.0004937179963991504,
      "loss": 2.8399,
      "step": 63719
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2951699495315552,
      "learning_rate": 0.0004937148729560961,
      "loss": 3.0511,
      "step": 63720
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.1822478771209717,
      "learning_rate": 0.0004937117494770265,
      "loss": 2.8356,
      "step": 63721
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.936832308769226,
      "learning_rate": 0.0004937086259619423,
      "loss": 3.0435,
      "step": 63722
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2320899963378906,
      "learning_rate": 0.0004937055024108439,
      "loss": 2.8282,
      "step": 63723
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9439424276351929,
      "learning_rate": 0.0004937023788237321,
      "loss": 3.3739,
      "step": 63724
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0137267112731934,
      "learning_rate": 0.0004936992552006073,
      "loss": 2.9925,
      "step": 63725
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7265293598175049,
      "learning_rate": 0.0004936961315414702,
      "loss": 3.0628,
      "step": 63726
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.652769684791565,
      "learning_rate": 0.0004936930078463213,
      "loss": 2.9958,
      "step": 63727
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6925362348556519,
      "learning_rate": 0.0004936898841151611,
      "loss": 3.2385,
      "step": 63728
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7224165201187134,
      "learning_rate": 0.0004936867603479904,
      "loss": 3.3401,
      "step": 63729
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2692339420318604,
      "learning_rate": 0.0004936836365448098,
      "loss": 2.8974,
      "step": 63730
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.212799072265625,
      "learning_rate": 0.0004936805127056195,
      "loss": 2.9666,
      "step": 63731
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6851437091827393,
      "learning_rate": 0.0004936773888304205,
      "loss": 3.2271,
      "step": 63732
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.17275333404541,
      "learning_rate": 0.0004936742649192132,
      "loss": 3.0008,
      "step": 63733
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.578086495399475,
      "learning_rate": 0.0004936711409719982,
      "loss": 2.9934,
      "step": 63734
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.654091238975525,
      "learning_rate": 0.0004936680169887761,
      "loss": 2.868,
      "step": 63735
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7895352840423584,
      "learning_rate": 0.0004936648929695474,
      "loss": 2.8751,
      "step": 63736
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.3305394649505615,
      "learning_rate": 0.0004936617689143127,
      "loss": 3.208,
      "step": 63737
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3194156885147095,
      "learning_rate": 0.0004936586448230728,
      "loss": 3.0395,
      "step": 63738
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0688226222991943,
      "learning_rate": 0.000493655520695828,
      "loss": 3.0054,
      "step": 63739
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4801567792892456,
      "learning_rate": 0.0004936523965325788,
      "loss": 3.1115,
      "step": 63740
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5706802606582642,
      "learning_rate": 0.0004936492723333263,
      "loss": 2.9028,
      "step": 63741
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.1079485416412354,
      "learning_rate": 0.0004936461480980705,
      "loss": 3.0452,
      "step": 63742
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.682906985282898,
      "learning_rate": 0.0004936430238268123,
      "loss": 3.2038,
      "step": 63743
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7369683980941772,
      "learning_rate": 0.0004936398995195523,
      "loss": 2.7697,
      "step": 63744
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8813914060592651,
      "learning_rate": 0.0004936367751762908,
      "loss": 2.9327,
      "step": 63745
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.900937795639038,
      "learning_rate": 0.0004936336507970287,
      "loss": 2.6828,
      "step": 63746
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.735056161880493,
      "learning_rate": 0.0004936305263817664,
      "loss": 2.8112,
      "step": 63747
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.073915958404541,
      "learning_rate": 0.0004936274019305046,
      "loss": 2.9462,
      "step": 63748
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0874030590057373,
      "learning_rate": 0.0004936242774432436,
      "loss": 2.9937,
      "step": 63749
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0379648208618164,
      "learning_rate": 0.0004936211529199845,
      "loss": 3.025,
      "step": 63750
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.810600519180298,
      "learning_rate": 0.0004936180283607274,
      "loss": 3.0018,
      "step": 63751
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.1738836765289307,
      "learning_rate": 0.000493614903765473,
      "loss": 2.9645,
      "step": 63752
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8629134893417358,
      "learning_rate": 0.0004936117791342221,
      "loss": 2.9195,
      "step": 63753
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.265392541885376,
      "learning_rate": 0.0004936086544669751,
      "loss": 2.8151,
      "step": 63754
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.7345595359802246,
      "learning_rate": 0.0004936055297637325,
      "loss": 3.0008,
      "step": 63755
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.5512735843658447,
      "learning_rate": 0.000493602405024495,
      "loss": 3.1023,
      "step": 63756
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.7266862392425537,
      "learning_rate": 0.0004935992802492631,
      "loss": 3.0821,
      "step": 63757
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.0825819969177246,
      "learning_rate": 0.0004935961554380375,
      "loss": 3.0244,
      "step": 63758
    },
    {
      "epoch": 0.83,
      "grad_norm": 4.525210380554199,
      "learning_rate": 0.0004935930305908187,
      "loss": 3.0075,
      "step": 63759
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.800300121307373,
      "learning_rate": 0.0004935899057076073,
      "loss": 2.9931,
      "step": 63760
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.7021541595458984,
      "learning_rate": 0.0004935867807884039,
      "loss": 3.0602,
      "step": 63761
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.7979869842529297,
      "learning_rate": 0.000493583655833209,
      "loss": 2.9567,
      "step": 63762
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.626929521560669,
      "learning_rate": 0.0004935805308420234,
      "loss": 3.1833,
      "step": 63763
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.4829466342926025,
      "learning_rate": 0.0004935774058148474,
      "loss": 3.102,
      "step": 63764
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3864167928695679,
      "learning_rate": 0.0004935742807516818,
      "loss": 3.0283,
      "step": 63765
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.569120168685913,
      "learning_rate": 0.000493571155652527,
      "loss": 3.0506,
      "step": 63766
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4113134145736694,
      "learning_rate": 0.0004935680305173837,
      "loss": 2.7918,
      "step": 63767
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.636854648590088,
      "learning_rate": 0.0004935649053462524,
      "loss": 3.0043,
      "step": 63768
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6565903425216675,
      "learning_rate": 0.0004935617801391338,
      "loss": 3.1589,
      "step": 63769
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4705286026000977,
      "learning_rate": 0.0004935586548960284,
      "loss": 2.9686,
      "step": 63770
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.931081771850586,
      "learning_rate": 0.0004935555296169368,
      "loss": 3.0688,
      "step": 63771
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.9018683433532715,
      "learning_rate": 0.0004935524043018595,
      "loss": 2.7554,
      "step": 63772
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.516127586364746,
      "learning_rate": 0.0004935492789507973,
      "loss": 3.0723,
      "step": 63773
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6859427690505981,
      "learning_rate": 0.0004935461535637506,
      "loss": 3.0377,
      "step": 63774
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.6748127937316895,
      "learning_rate": 0.0004935430281407198,
      "loss": 3.0841,
      "step": 63775
    },
    {
      "epoch": 0.83,
      "grad_norm": 4.956175804138184,
      "learning_rate": 0.000493539902681706,
      "loss": 2.9945,
      "step": 63776
    },
    {
      "epoch": 0.83,
      "grad_norm": 5.627432823181152,
      "learning_rate": 0.0004935367771867092,
      "loss": 2.8955,
      "step": 63777
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.5663001537323,
      "learning_rate": 0.0004935336516557305,
      "loss": 2.7412,
      "step": 63778
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.655421495437622,
      "learning_rate": 0.0004935305260887701,
      "loss": 2.9817,
      "step": 63779
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.5368764400482178,
      "learning_rate": 0.0004935274004858288,
      "loss": 2.8743,
      "step": 63780
    },
    {
      "epoch": 0.83,
      "grad_norm": 4.053040027618408,
      "learning_rate": 0.0004935242748469071,
      "loss": 2.861,
      "step": 63781
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.856309652328491,
      "learning_rate": 0.0004935211491720055,
      "loss": 3.053,
      "step": 63782
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9944818019866943,
      "learning_rate": 0.0004935180234611246,
      "loss": 3.0918,
      "step": 63783
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9022996425628662,
      "learning_rate": 0.0004935148977142652,
      "loss": 3.0515,
      "step": 63784
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.5201475620269775,
      "learning_rate": 0.0004935117719314277,
      "loss": 2.9873,
      "step": 63785
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.4379584789276123,
      "learning_rate": 0.0004935086461126128,
      "loss": 3.0426,
      "step": 63786
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.070073366165161,
      "learning_rate": 0.0004935055202578209,
      "loss": 2.9064,
      "step": 63787
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7256238460540771,
      "learning_rate": 0.0004935023943670526,
      "loss": 2.9238,
      "step": 63788
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8555434942245483,
      "learning_rate": 0.0004934992684403086,
      "loss": 3.1111,
      "step": 63789
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.536588430404663,
      "learning_rate": 0.0004934961424775894,
      "loss": 3.1125,
      "step": 63790
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8614510297775269,
      "learning_rate": 0.0004934930164788957,
      "loss": 3.2217,
      "step": 63791
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7494248151779175,
      "learning_rate": 0.000493489890444228,
      "loss": 2.9146,
      "step": 63792
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6343984603881836,
      "learning_rate": 0.0004934867643735868,
      "loss": 2.983,
      "step": 63793
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.8330161571502686,
      "learning_rate": 0.0004934836382669728,
      "loss": 3.2274,
      "step": 63794
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8233131170272827,
      "learning_rate": 0.0004934805121243864,
      "loss": 2.9309,
      "step": 63795
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.612985610961914,
      "learning_rate": 0.0004934773859458286,
      "loss": 2.8172,
      "step": 63796
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7004921436309814,
      "learning_rate": 0.0004934742597312994,
      "loss": 3.0383,
      "step": 63797
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9775675535202026,
      "learning_rate": 0.0004934711334807999,
      "loss": 2.8667,
      "step": 63798
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0447802543640137,
      "learning_rate": 0.0004934680071943305,
      "loss": 3.0029,
      "step": 63799
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.420440912246704,
      "learning_rate": 0.0004934648808718916,
      "loss": 2.8405,
      "step": 63800
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.750819444656372,
      "learning_rate": 0.000493461754513484,
      "loss": 3.1684,
      "step": 63801
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.09128475189209,
      "learning_rate": 0.000493458628119108,
      "loss": 2.9932,
      "step": 63802
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.444674015045166,
      "learning_rate": 0.0004934555016887646,
      "loss": 3.2726,
      "step": 63803
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7907625436782837,
      "learning_rate": 0.0004934523752224543,
      "loss": 2.9162,
      "step": 63804
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5244956016540527,
      "learning_rate": 0.0004934492487201772,
      "loss": 3.2966,
      "step": 63805
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.86346435546875,
      "learning_rate": 0.0004934461221819344,
      "loss": 3.093,
      "step": 63806
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4290807247161865,
      "learning_rate": 0.0004934429956077263,
      "loss": 3.1721,
      "step": 63807
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2201322317123413,
      "learning_rate": 0.0004934398689975536,
      "loss": 2.6804,
      "step": 63808
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8147449493408203,
      "learning_rate": 0.0004934367423514168,
      "loss": 3.2906,
      "step": 63809
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4027875661849976,
      "learning_rate": 0.0004934336156693164,
      "loss": 3.1173,
      "step": 63810
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0936403274536133,
      "learning_rate": 0.000493430488951253,
      "loss": 2.7946,
      "step": 63811
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.70059072971344,
      "learning_rate": 0.0004934273621972271,
      "loss": 3.0741,
      "step": 63812
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.612842082977295,
      "learning_rate": 0.0004934242354072396,
      "loss": 3.338,
      "step": 63813
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4545491933822632,
      "learning_rate": 0.0004934211085812908,
      "loss": 3.0137,
      "step": 63814
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.579533576965332,
      "learning_rate": 0.0004934179817193814,
      "loss": 2.9868,
      "step": 63815
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.410516381263733,
      "learning_rate": 0.0004934148548215119,
      "loss": 3.1989,
      "step": 63816
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3887923955917358,
      "learning_rate": 0.000493411727887683,
      "loss": 3.0326,
      "step": 63817
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5052721500396729,
      "learning_rate": 0.0004934086009178951,
      "loss": 3.0496,
      "step": 63818
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9452420473098755,
      "learning_rate": 0.000493405473912149,
      "loss": 3.1865,
      "step": 63819
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4754629135131836,
      "learning_rate": 0.0004934023468704451,
      "loss": 3.1171,
      "step": 63820
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.449763059616089,
      "learning_rate": 0.0004933992197927841,
      "loss": 2.7929,
      "step": 63821
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.50667142868042,
      "learning_rate": 0.0004933960926791664,
      "loss": 3.0841,
      "step": 63822
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4843018054962158,
      "learning_rate": 0.0004933929655295929,
      "loss": 2.9646,
      "step": 63823
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6022052764892578,
      "learning_rate": 0.000493389838344064,
      "loss": 2.9655,
      "step": 63824
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4970943927764893,
      "learning_rate": 0.0004933867111225802,
      "loss": 2.6777,
      "step": 63825
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.000006914138794,
      "learning_rate": 0.0004933835838651421,
      "loss": 2.8622,
      "step": 63826
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6997418403625488,
      "learning_rate": 0.0004933804565717504,
      "loss": 3.0388,
      "step": 63827
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6641403436660767,
      "learning_rate": 0.0004933773292424056,
      "loss": 3.3228,
      "step": 63828
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8114079236984253,
      "learning_rate": 0.0004933742018771083,
      "loss": 2.9794,
      "step": 63829
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7720584869384766,
      "learning_rate": 0.0004933710744758591,
      "loss": 3.1542,
      "step": 63830
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6677138805389404,
      "learning_rate": 0.0004933679470386586,
      "loss": 3.2152,
      "step": 63831
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.741412878036499,
      "learning_rate": 0.0004933648195655073,
      "loss": 3.0958,
      "step": 63832
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8470388650894165,
      "learning_rate": 0.0004933616920564059,
      "loss": 3.05,
      "step": 63833
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8093067407608032,
      "learning_rate": 0.0004933585645113549,
      "loss": 3.0979,
      "step": 63834
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5537534952163696,
      "learning_rate": 0.0004933554369303549,
      "loss": 3.0537,
      "step": 63835
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8152493238449097,
      "learning_rate": 0.0004933523093134064,
      "loss": 2.8389,
      "step": 63836
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6419603824615479,
      "learning_rate": 0.0004933491816605101,
      "loss": 2.9129,
      "step": 63837
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.907025933265686,
      "learning_rate": 0.0004933460539716665,
      "loss": 3.2649,
      "step": 63838
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4782015085220337,
      "learning_rate": 0.0004933429262468763,
      "loss": 3.1235,
      "step": 63839
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9563918113708496,
      "learning_rate": 0.0004933397984861399,
      "loss": 2.802,
      "step": 63840
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8495495319366455,
      "learning_rate": 0.0004933366706894581,
      "loss": 3.0149,
      "step": 63841
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8428411483764648,
      "learning_rate": 0.0004933335428568312,
      "loss": 2.8006,
      "step": 63842
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.373765468597412,
      "learning_rate": 0.0004933304149882601,
      "loss": 3.1509,
      "step": 63843
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5306233167648315,
      "learning_rate": 0.0004933272870837451,
      "loss": 2.8707,
      "step": 63844
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9450842142105103,
      "learning_rate": 0.000493324159143287,
      "loss": 2.976,
      "step": 63845
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.793914556503296,
      "learning_rate": 0.0004933210311668863,
      "loss": 3.168,
      "step": 63846
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9220304489135742,
      "learning_rate": 0.0004933179031545435,
      "loss": 2.9805,
      "step": 63847
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4565832614898682,
      "learning_rate": 0.0004933147751062594,
      "loss": 3.0617,
      "step": 63848
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6927707195281982,
      "learning_rate": 0.0004933116470220342,
      "loss": 3.0278,
      "step": 63849
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9211164712905884,
      "learning_rate": 0.0004933085189018689,
      "loss": 2.7984,
      "step": 63850
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.1177120208740234,
      "learning_rate": 0.0004933053907457639,
      "loss": 2.895,
      "step": 63851
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8511980772018433,
      "learning_rate": 0.0004933022625537196,
      "loss": 3.1435,
      "step": 63852
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.63289475440979,
      "learning_rate": 0.0004932991343257369,
      "loss": 2.7662,
      "step": 63853
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5420279502868652,
      "learning_rate": 0.0004932960060618163,
      "loss": 2.9278,
      "step": 63854
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5711580514907837,
      "learning_rate": 0.0004932928777619582,
      "loss": 2.9456,
      "step": 63855
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.48032808303833,
      "learning_rate": 0.0004932897494261633,
      "loss": 3.1368,
      "step": 63856
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2953068017959595,
      "learning_rate": 0.0004932866210544323,
      "loss": 2.934,
      "step": 63857
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.195420265197754,
      "learning_rate": 0.0004932834926467656,
      "loss": 3.185,
      "step": 63858
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5218719244003296,
      "learning_rate": 0.0004932803642031637,
      "loss": 2.9699,
      "step": 63859
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5413552522659302,
      "learning_rate": 0.0004932772357236275,
      "loss": 3.1699,
      "step": 63860
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.647484540939331,
      "learning_rate": 0.0004932741072081575,
      "loss": 3.0389,
      "step": 63861
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6924588680267334,
      "learning_rate": 0.0004932709786567541,
      "loss": 2.9229,
      "step": 63862
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7680470943450928,
      "learning_rate": 0.000493267850069418,
      "loss": 2.9604,
      "step": 63863
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0327370166778564,
      "learning_rate": 0.0004932647214461497,
      "loss": 3.1819,
      "step": 63864
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9688116312026978,
      "learning_rate": 0.0004932615927869498,
      "loss": 3.2924,
      "step": 63865
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.067246913909912,
      "learning_rate": 0.0004932584640918191,
      "loss": 3.3086,
      "step": 63866
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5526137351989746,
      "learning_rate": 0.0004932553353607578,
      "loss": 2.9313,
      "step": 63867
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6564117670059204,
      "learning_rate": 0.0004932522065937668,
      "loss": 2.8824,
      "step": 63868
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.02286958694458,
      "learning_rate": 0.0004932490777908465,
      "loss": 3.0444,
      "step": 63869
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.527631402015686,
      "learning_rate": 0.0004932459489519977,
      "loss": 3.1206,
      "step": 63870
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4715092182159424,
      "learning_rate": 0.0004932428200772207,
      "loss": 2.9588,
      "step": 63871
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.7914304733276367,
      "learning_rate": 0.0004932396911665162,
      "loss": 2.9646,
      "step": 63872
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.02074933052063,
      "learning_rate": 0.0004932365622198849,
      "loss": 3.0785,
      "step": 63873
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4934523105621338,
      "learning_rate": 0.0004932334332373271,
      "loss": 2.801,
      "step": 63874
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.799460530281067,
      "learning_rate": 0.0004932303042188437,
      "loss": 3.0287,
      "step": 63875
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3929812908172607,
      "learning_rate": 0.0004932271751644351,
      "loss": 3.1536,
      "step": 63876
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6672077178955078,
      "learning_rate": 0.0004932240460741019,
      "loss": 2.8156,
      "step": 63877
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0629351139068604,
      "learning_rate": 0.0004932209169478447,
      "loss": 3.0809,
      "step": 63878
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4520164728164673,
      "learning_rate": 0.0004932177877856642,
      "loss": 2.9907,
      "step": 63879
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.393479585647583,
      "learning_rate": 0.0004932146585875607,
      "loss": 2.9873,
      "step": 63880
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.686903476715088,
      "learning_rate": 0.0004932115293535351,
      "loss": 3.1239,
      "step": 63881
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.51902174949646,
      "learning_rate": 0.0004932084000835878,
      "loss": 2.9084,
      "step": 63882
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5129059553146362,
      "learning_rate": 0.0004932052707777193,
      "loss": 3.0376,
      "step": 63883
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4874879121780396,
      "learning_rate": 0.0004932021414359303,
      "loss": 2.9627,
      "step": 63884
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.542864441871643,
      "learning_rate": 0.0004931990120582214,
      "loss": 2.9906,
      "step": 63885
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.410022497177124,
      "learning_rate": 0.0004931958826445932,
      "loss": 3.0617,
      "step": 63886
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7553170919418335,
      "learning_rate": 0.0004931927531950463,
      "loss": 3.2046,
      "step": 63887
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9747024774551392,
      "learning_rate": 0.0004931896237095812,
      "loss": 3.1595,
      "step": 63888
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0641613006591797,
      "learning_rate": 0.0004931864941881985,
      "loss": 2.6871,
      "step": 63889
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5753374099731445,
      "learning_rate": 0.0004931833646308985,
      "loss": 3.0466,
      "step": 63890
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9315487146377563,
      "learning_rate": 0.0004931802350376824,
      "loss": 3.0429,
      "step": 63891
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5180225372314453,
      "learning_rate": 0.0004931771054085504,
      "loss": 3.0613,
      "step": 63892
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4966156482696533,
      "learning_rate": 0.000493173975743503,
      "loss": 2.9038,
      "step": 63893
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4739824533462524,
      "learning_rate": 0.0004931708460425411,
      "loss": 3.2529,
      "step": 63894
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6745518445968628,
      "learning_rate": 0.0004931677163056649,
      "loss": 3.023,
      "step": 63895
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4263039827346802,
      "learning_rate": 0.0004931645865328753,
      "loss": 3.1899,
      "step": 63896
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6767663955688477,
      "learning_rate": 0.0004931614567241727,
      "loss": 3.0234,
      "step": 63897
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7033315896987915,
      "learning_rate": 0.0004931583268795577,
      "loss": 2.9607,
      "step": 63898
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4702916145324707,
      "learning_rate": 0.0004931551969990309,
      "loss": 2.968,
      "step": 63899
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8359390497207642,
      "learning_rate": 0.0004931520670825931,
      "loss": 3.1749,
      "step": 63900
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6875916719436646,
      "learning_rate": 0.0004931489371302445,
      "loss": 3.1071,
      "step": 63901
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.1421875953674316,
      "learning_rate": 0.0004931458071419859,
      "loss": 3.0137,
      "step": 63902
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.820495843887329,
      "learning_rate": 0.000493142677117818,
      "loss": 3.1349,
      "step": 63903
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5258558988571167,
      "learning_rate": 0.0004931395470577411,
      "loss": 2.9316,
      "step": 63904
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8404533863067627,
      "learning_rate": 0.0004931364169617558,
      "loss": 3.052,
      "step": 63905
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6287258863449097,
      "learning_rate": 0.000493133286829863,
      "loss": 3.2453,
      "step": 63906
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5234901905059814,
      "learning_rate": 0.000493130156662063,
      "loss": 3.2198,
      "step": 63907
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0247271060943604,
      "learning_rate": 0.0004931270264583563,
      "loss": 3.2369,
      "step": 63908
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.828477144241333,
      "learning_rate": 0.0004931238962187439,
      "loss": 2.9412,
      "step": 63909
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.476019024848938,
      "learning_rate": 0.000493120765943226,
      "loss": 3.0264,
      "step": 63910
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.563549280166626,
      "learning_rate": 0.0004931176356318033,
      "loss": 2.9513,
      "step": 63911
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4882346391677856,
      "learning_rate": 0.0004931145052844765,
      "loss": 3.2466,
      "step": 63912
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0083062648773193,
      "learning_rate": 0.0004931113749012459,
      "loss": 3.1095,
      "step": 63913
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.902672052383423,
      "learning_rate": 0.0004931082444821123,
      "loss": 2.8896,
      "step": 63914
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9381022453308105,
      "learning_rate": 0.0004931051140270763,
      "loss": 2.8737,
      "step": 63915
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.44831120967865,
      "learning_rate": 0.0004931019835361383,
      "loss": 2.9188,
      "step": 63916
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.8212075233459473,
      "learning_rate": 0.000493098853009299,
      "loss": 2.8878,
      "step": 63917
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.871626138687134,
      "learning_rate": 0.0004930957224465592,
      "loss": 2.7024,
      "step": 63918
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.468759775161743,
      "learning_rate": 0.0004930925918479191,
      "loss": 3.0491,
      "step": 63919
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.524756669998169,
      "learning_rate": 0.0004930894612133794,
      "loss": 3.1302,
      "step": 63920
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.7905526161193848,
      "learning_rate": 0.0004930863305429408,
      "loss": 2.8396,
      "step": 63921
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.5967791080474854,
      "learning_rate": 0.0004930831998366037,
      "loss": 2.9735,
      "step": 63922
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6213172674179077,
      "learning_rate": 0.0004930800690943689,
      "loss": 3.0998,
      "step": 63923
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6087948083877563,
      "learning_rate": 0.0004930769383162368,
      "loss": 3.0412,
      "step": 63924
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6677887439727783,
      "learning_rate": 0.0004930738075022081,
      "loss": 2.7786,
      "step": 63925
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.6226654052734375,
      "learning_rate": 0.0004930706766522833,
      "loss": 2.8601,
      "step": 63926
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9410570859909058,
      "learning_rate": 0.0004930675457664631,
      "loss": 2.8583,
      "step": 63927
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7794710397720337,
      "learning_rate": 0.0004930644148447479,
      "loss": 3.0259,
      "step": 63928
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.782588243484497,
      "learning_rate": 0.0004930612838871384,
      "loss": 3.0905,
      "step": 63929
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.183748245239258,
      "learning_rate": 0.0004930581528936352,
      "loss": 2.9117,
      "step": 63930
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.6372158527374268,
      "learning_rate": 0.0004930550218642388,
      "loss": 2.9796,
      "step": 63931
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4956997632980347,
      "learning_rate": 0.0004930518907989499,
      "loss": 3.1419,
      "step": 63932
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2813870906829834,
      "learning_rate": 0.0004930487596977689,
      "loss": 3.1128,
      "step": 63933
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.4388461112976074,
      "learning_rate": 0.0004930456285606966,
      "loss": 3.1555,
      "step": 63934
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.7709105014801025,
      "learning_rate": 0.0004930424973877334,
      "loss": 2.938,
      "step": 63935
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.122696876525879,
      "learning_rate": 0.00049303936617888,
      "loss": 3.2688,
      "step": 63936
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.087343215942383,
      "learning_rate": 0.0004930362349341369,
      "loss": 3.209,
      "step": 63937
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.2681100368499756,
      "learning_rate": 0.0004930331036535048,
      "loss": 3.1065,
      "step": 63938
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.9111289978027344,
      "learning_rate": 0.0004930299723369841,
      "loss": 3.2117,
      "step": 63939
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.482862949371338,
      "learning_rate": 0.0004930268409845755,
      "loss": 3.4166,
      "step": 63940
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.85015070438385,
      "learning_rate": 0.0004930237095962796,
      "loss": 2.9927,
      "step": 63941
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.769923448562622,
      "learning_rate": 0.000493020578172097,
      "loss": 3.116,
      "step": 63942
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.3885138034820557,
      "learning_rate": 0.0004930174467120282,
      "loss": 3.2311,
      "step": 63943
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9382013082504272,
      "learning_rate": 0.0004930143152160737,
      "loss": 3.1084,
      "step": 63944
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7225438356399536,
      "learning_rate": 0.0004930111836842343,
      "loss": 3.0762,
      "step": 63945
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.663569450378418,
      "learning_rate": 0.0004930080521165104,
      "loss": 2.8027,
      "step": 63946
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.5404000282287598,
      "learning_rate": 0.0004930049205129028,
      "loss": 2.9634,
      "step": 63947
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6499983072280884,
      "learning_rate": 0.0004930017888734119,
      "loss": 2.8524,
      "step": 63948
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8998918533325195,
      "learning_rate": 0.0004929986571980383,
      "loss": 2.9757,
      "step": 63949
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6626964807510376,
      "learning_rate": 0.0004929955254867824,
      "loss": 3.1377,
      "step": 63950
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6221671104431152,
      "learning_rate": 0.0004929923937396453,
      "loss": 3.272,
      "step": 63951
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4598721265792847,
      "learning_rate": 0.0004929892619566272,
      "loss": 2.5697,
      "step": 63952
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.412442922592163,
      "learning_rate": 0.0004929861301377287,
      "loss": 3.021,
      "step": 63953
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.4552347660064697,
      "learning_rate": 0.0004929829982829504,
      "loss": 3.0653,
      "step": 63954
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9627248048782349,
      "learning_rate": 0.000492979866392293,
      "loss": 2.8188,
      "step": 63955
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7505104541778564,
      "learning_rate": 0.0004929767344657569,
      "loss": 3.0364,
      "step": 63956
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.456372022628784,
      "learning_rate": 0.0004929736025033427,
      "loss": 3.144,
      "step": 63957
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.201247215270996,
      "learning_rate": 0.0004929704705050514,
      "loss": 3.2527,
      "step": 63958
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5100479125976562,
      "learning_rate": 0.000492967338470883,
      "loss": 2.8441,
      "step": 63959
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7934991121292114,
      "learning_rate": 0.0004929642064008383,
      "loss": 2.995,
      "step": 63960
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0981669425964355,
      "learning_rate": 0.0004929610742949181,
      "loss": 3.1763,
      "step": 63961
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9625952243804932,
      "learning_rate": 0.0004929579421531227,
      "loss": 3.0324,
      "step": 63962
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7751977443695068,
      "learning_rate": 0.0004929548099754527,
      "loss": 3.0289,
      "step": 63963
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6339282989501953,
      "learning_rate": 0.0004929516777619088,
      "loss": 3.1173,
      "step": 63964
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8231130838394165,
      "learning_rate": 0.0004929485455124916,
      "loss": 3.0765,
      "step": 63965
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6286122798919678,
      "learning_rate": 0.0004929454132272015,
      "loss": 3.0133,
      "step": 63966
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5191066265106201,
      "learning_rate": 0.0004929422809060394,
      "loss": 3.1885,
      "step": 63967
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7922571897506714,
      "learning_rate": 0.0004929391485490055,
      "loss": 3.1458,
      "step": 63968
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9871549606323242,
      "learning_rate": 0.0004929360161561007,
      "loss": 3.0183,
      "step": 63969
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8089121580123901,
      "learning_rate": 0.0004929328837273254,
      "loss": 3.2731,
      "step": 63970
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6438369750976562,
      "learning_rate": 0.0004929297512626802,
      "loss": 3.2012,
      "step": 63971
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0400941371917725,
      "learning_rate": 0.0004929266187621657,
      "loss": 3.125,
      "step": 63972
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.312897205352783,
      "learning_rate": 0.0004929234862257827,
      "loss": 3.0575,
      "step": 63973
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.57487154006958,
      "learning_rate": 0.0004929203536535315,
      "loss": 3.0345,
      "step": 63974
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.509244680404663,
      "learning_rate": 0.0004929172210454126,
      "loss": 3.1047,
      "step": 63975
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8898800611495972,
      "learning_rate": 0.0004929140884014268,
      "loss": 2.9035,
      "step": 63976
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.688671827316284,
      "learning_rate": 0.0004929109557215747,
      "loss": 2.8972,
      "step": 63977
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4214130640029907,
      "learning_rate": 0.0004929078230058568,
      "loss": 2.9429,
      "step": 63978
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7547235488891602,
      "learning_rate": 0.0004929046902542736,
      "loss": 3.082,
      "step": 63979
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.0179128646850586,
      "learning_rate": 0.000492901557466826,
      "loss": 2.9866,
      "step": 63980
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5529701709747314,
      "learning_rate": 0.0004928984246435141,
      "loss": 3.0998,
      "step": 63981
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6129316091537476,
      "learning_rate": 0.0004928952917843389,
      "loss": 3.0741,
      "step": 63982
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7335968017578125,
      "learning_rate": 0.0004928921588893008,
      "loss": 3.0911,
      "step": 63983
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8322738409042358,
      "learning_rate": 0.0004928890259584004,
      "loss": 3.0439,
      "step": 63984
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7835495471954346,
      "learning_rate": 0.0004928858929916383,
      "loss": 3.038,
      "step": 63985
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7137365341186523,
      "learning_rate": 0.000492882759989015,
      "loss": 2.95,
      "step": 63986
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4881364107131958,
      "learning_rate": 0.0004928796269505312,
      "loss": 3.1675,
      "step": 63987
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6051267385482788,
      "learning_rate": 0.0004928764938761875,
      "loss": 3.0248,
      "step": 63988
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7794318199157715,
      "learning_rate": 0.0004928733607659844,
      "loss": 3.0913,
      "step": 63989
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.519288420677185,
      "learning_rate": 0.0004928702276199223,
      "loss": 3.2623,
      "step": 63990
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9547512531280518,
      "learning_rate": 0.0004928670944380023,
      "loss": 3.0477,
      "step": 63991
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2493534088134766,
      "learning_rate": 0.0004928639612202245,
      "loss": 2.9965,
      "step": 63992
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.199314832687378,
      "learning_rate": 0.0004928608279665896,
      "loss": 3.1535,
      "step": 63993
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.3809757232666016,
      "learning_rate": 0.0004928576946770984,
      "loss": 3.0578,
      "step": 63994
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.491908311843872,
      "learning_rate": 0.000492854561351751,
      "loss": 2.9008,
      "step": 63995
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4816193580627441,
      "learning_rate": 0.0004928514279905486,
      "loss": 3.3324,
      "step": 63996
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3457467555999756,
      "learning_rate": 0.0004928482945934915,
      "loss": 3.1835,
      "step": 63997
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.8578073978424072,
      "learning_rate": 0.00049284516116058,
      "loss": 2.8756,
      "step": 63998
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6032006740570068,
      "learning_rate": 0.0004928420276918152,
      "loss": 3.1702,
      "step": 63999
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4804250001907349,
      "learning_rate": 0.0004928388941871973,
      "loss": 3.2015,
      "step": 64000
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7246161699295044,
      "learning_rate": 0.000492835760646727,
      "loss": 2.8716,
      "step": 64001
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7338677644729614,
      "learning_rate": 0.000492832627070405,
      "loss": 2.9983,
      "step": 64002
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.289337158203125,
      "learning_rate": 0.0004928294934582316,
      "loss": 2.991,
      "step": 64003
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6914433240890503,
      "learning_rate": 0.0004928263598102077,
      "loss": 3.0992,
      "step": 64004
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8848496675491333,
      "learning_rate": 0.0004928232261263337,
      "loss": 3.0632,
      "step": 64005
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.3265798091888428,
      "learning_rate": 0.0004928200924066102,
      "loss": 3.1524,
      "step": 64006
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.984850287437439,
      "learning_rate": 0.0004928169586510378,
      "loss": 3.0222,
      "step": 64007
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.060421943664551,
      "learning_rate": 0.0004928138248596171,
      "loss": 2.7543,
      "step": 64008
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5677986145019531,
      "learning_rate": 0.0004928106910323486,
      "loss": 3.1381,
      "step": 64009
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9364545345306396,
      "learning_rate": 0.0004928075571692331,
      "loss": 2.9984,
      "step": 64010
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3950117826461792,
      "learning_rate": 0.0004928044232702709,
      "loss": 3.0471,
      "step": 64011
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5094883441925049,
      "learning_rate": 0.0004928012893354629,
      "loss": 3.3115,
      "step": 64012
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.6280457973480225,
      "learning_rate": 0.0004927981553648094,
      "loss": 2.777,
      "step": 64013
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6211597919464111,
      "learning_rate": 0.000492795021358311,
      "loss": 2.9563,
      "step": 64014
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.693801760673523,
      "learning_rate": 0.0004927918873159684,
      "loss": 3.2137,
      "step": 64015
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9070664644241333,
      "learning_rate": 0.0004927887532377822,
      "loss": 3.1108,
      "step": 64016
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8193563222885132,
      "learning_rate": 0.0004927856191237528,
      "loss": 3.049,
      "step": 64017
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.3475286960601807,
      "learning_rate": 0.0004927824849738811,
      "loss": 2.8908,
      "step": 64018
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.2943878173828125,
      "learning_rate": 0.0004927793507881674,
      "loss": 2.9345,
      "step": 64019
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3652399778366089,
      "learning_rate": 0.0004927762165666124,
      "loss": 2.9619,
      "step": 64020
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4757945537567139,
      "learning_rate": 0.0004927730823092168,
      "loss": 3.1092,
      "step": 64021
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.343347430229187,
      "learning_rate": 0.0004927699480159808,
      "loss": 2.9412,
      "step": 64022
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4351195096969604,
      "learning_rate": 0.0004927668136869055,
      "loss": 2.9234,
      "step": 64023
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5668140649795532,
      "learning_rate": 0.0004927636793219909,
      "loss": 2.9233,
      "step": 64024
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5669984817504883,
      "learning_rate": 0.0004927605449212381,
      "loss": 2.9906,
      "step": 64025
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4953643083572388,
      "learning_rate": 0.0004927574104846474,
      "loss": 2.9623,
      "step": 64026
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2595643997192383,
      "learning_rate": 0.0004927542760122195,
      "loss": 2.775,
      "step": 64027
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3552542924880981,
      "learning_rate": 0.000492751141503955,
      "loss": 3.2502,
      "step": 64028
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9632771015167236,
      "learning_rate": 0.0004927480069598542,
      "loss": 2.906,
      "step": 64029
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5348563194274902,
      "learning_rate": 0.0004927448723799181,
      "loss": 3.0997,
      "step": 64030
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8132202625274658,
      "learning_rate": 0.0004927417377641471,
      "loss": 2.9197,
      "step": 64031
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.371362328529358,
      "learning_rate": 0.0004927386031125416,
      "loss": 2.8525,
      "step": 64032
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5197862386703491,
      "learning_rate": 0.0004927354684251024,
      "loss": 2.9477,
      "step": 64033
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4945991039276123,
      "learning_rate": 0.0004927323337018302,
      "loss": 2.9614,
      "step": 64034
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5582382678985596,
      "learning_rate": 0.0004927291989427254,
      "loss": 2.8097,
      "step": 64035
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8070124387741089,
      "learning_rate": 0.0004927260641477884,
      "loss": 2.9869,
      "step": 64036
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4685063362121582,
      "learning_rate": 0.0004927229293170202,
      "loss": 3.2956,
      "step": 64037
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.002772808074951,
      "learning_rate": 0.0004927197944504211,
      "loss": 2.9307,
      "step": 64038
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.670236349105835,
      "learning_rate": 0.0004927166595479917,
      "loss": 2.9743,
      "step": 64039
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4997056722640991,
      "learning_rate": 0.0004927135246097327,
      "loss": 2.902,
      "step": 64040
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.5309884548187256,
      "learning_rate": 0.0004927103896356446,
      "loss": 3.0179,
      "step": 64041
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.556612730026245,
      "learning_rate": 0.0004927072546257279,
      "loss": 3.2872,
      "step": 64042
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8414199352264404,
      "learning_rate": 0.0004927041195799835,
      "loss": 2.9055,
      "step": 64043
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7225607633590698,
      "learning_rate": 0.0004927009844984116,
      "loss": 3.2421,
      "step": 64044
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.385099172592163,
      "learning_rate": 0.0004926978493810131,
      "loss": 3.1214,
      "step": 64045
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.1565747261047363,
      "learning_rate": 0.0004926947142277882,
      "loss": 3.1194,
      "step": 64046
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.756772518157959,
      "learning_rate": 0.0004926915790387379,
      "loss": 3.3066,
      "step": 64047
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.564523458480835,
      "learning_rate": 0.0004926884438138625,
      "loss": 2.8819,
      "step": 64048
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9743554592132568,
      "learning_rate": 0.0004926853085531628,
      "loss": 3.076,
      "step": 64049
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.126141309738159,
      "learning_rate": 0.0004926821732566391,
      "loss": 3.1787,
      "step": 64050
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.5893442630767822,
      "learning_rate": 0.0004926790379242923,
      "loss": 2.9109,
      "step": 64051
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.467165946960449,
      "learning_rate": 0.0004926759025561228,
      "loss": 3.0265,
      "step": 64052
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.9633476734161377,
      "learning_rate": 0.0004926727671521312,
      "loss": 2.7458,
      "step": 64053
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4756439924240112,
      "learning_rate": 0.0004926696317123179,
      "loss": 2.9962,
      "step": 64054
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.514601707458496,
      "learning_rate": 0.000492666496236684,
      "loss": 2.916,
      "step": 64055
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9913125038146973,
      "learning_rate": 0.0004926633607252295,
      "loss": 2.9632,
      "step": 64056
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6635265350341797,
      "learning_rate": 0.0004926602251779554,
      "loss": 3.0012,
      "step": 64057
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.722719669342041,
      "learning_rate": 0.0004926570895948621,
      "loss": 2.659,
      "step": 64058
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3918484449386597,
      "learning_rate": 0.0004926539539759502,
      "loss": 3.1073,
      "step": 64059
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.79374361038208,
      "learning_rate": 0.0004926508183212202,
      "loss": 3.1109,
      "step": 64060
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.914610743522644,
      "learning_rate": 0.0004926476826306729,
      "loss": 2.8842,
      "step": 64061
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4391440153121948,
      "learning_rate": 0.0004926445469043087,
      "loss": 3.0753,
      "step": 64062
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.270486831665039,
      "learning_rate": 0.0004926414111421283,
      "loss": 3.0711,
      "step": 64063
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.781591773033142,
      "learning_rate": 0.0004926382753441323,
      "loss": 3.0647,
      "step": 64064
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.8157782554626465,
      "learning_rate": 0.000492635139510321,
      "loss": 2.8587,
      "step": 64065
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2789013385772705,
      "learning_rate": 0.0004926320036406953,
      "loss": 3.0423,
      "step": 64066
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.543846607208252,
      "learning_rate": 0.0004926288677352557,
      "loss": 3.1094,
      "step": 64067
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9891794919967651,
      "learning_rate": 0.0004926257317940027,
      "loss": 2.9328,
      "step": 64068
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6334102153778076,
      "learning_rate": 0.000492622595816937,
      "loss": 3.0579,
      "step": 64069
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.602639079093933,
      "learning_rate": 0.000492619459804059,
      "loss": 3.0649,
      "step": 64070
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2639565467834473,
      "learning_rate": 0.0004926163237553695,
      "loss": 3.1076,
      "step": 64071
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.283655881881714,
      "learning_rate": 0.000492613187670869,
      "loss": 2.9629,
      "step": 64072
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6766430139541626,
      "learning_rate": 0.0004926100515505581,
      "loss": 3.133,
      "step": 64073
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.620753288269043,
      "learning_rate": 0.0004926069153944373,
      "loss": 3.003,
      "step": 64074
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.5045690536499023,
      "learning_rate": 0.0004926037792025071,
      "loss": 3.2872,
      "step": 64075
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.690857410430908,
      "learning_rate": 0.0004926006429747684,
      "loss": 3.1832,
      "step": 64076
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.442287564277649,
      "learning_rate": 0.0004925975067112216,
      "loss": 3.1406,
      "step": 64077
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.744794487953186,
      "learning_rate": 0.0004925943704118673,
      "loss": 3.1569,
      "step": 64078
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.4983527660369873,
      "learning_rate": 0.0004925912340767061,
      "loss": 3.1726,
      "step": 64079
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7272146940231323,
      "learning_rate": 0.0004925880977057384,
      "loss": 3.053,
      "step": 64080
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6943014860153198,
      "learning_rate": 0.000492584961298965,
      "loss": 3.198,
      "step": 64081
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.1842801570892334,
      "learning_rate": 0.0004925818248563865,
      "loss": 3.1685,
      "step": 64082
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.8634912967681885,
      "learning_rate": 0.0004925786883780033,
      "loss": 3.0886,
      "step": 64083
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.869576096534729,
      "learning_rate": 0.0004925755518638161,
      "loss": 3.0006,
      "step": 64084
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.391631841659546,
      "learning_rate": 0.0004925724153138256,
      "loss": 3.2291,
      "step": 64085
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6444634199142456,
      "learning_rate": 0.0004925692787280321,
      "loss": 2.7866,
      "step": 64086
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.1000218391418457,
      "learning_rate": 0.0004925661421064364,
      "loss": 3.0495,
      "step": 64087
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4978605508804321,
      "learning_rate": 0.000492563005449039,
      "loss": 3.0666,
      "step": 64088
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4476007223129272,
      "learning_rate": 0.0004925598687558405,
      "loss": 3.0001,
      "step": 64089
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7451469898223877,
      "learning_rate": 0.0004925567320268415,
      "loss": 3.1722,
      "step": 64090
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.418898820877075,
      "learning_rate": 0.0004925535952620426,
      "loss": 2.9177,
      "step": 64091
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4565644264221191,
      "learning_rate": 0.0004925504584614444,
      "loss": 3.0994,
      "step": 64092
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.481818675994873,
      "learning_rate": 0.0004925473216250472,
      "loss": 3.2809,
      "step": 64093
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.1516175270080566,
      "learning_rate": 0.000492544184752852,
      "loss": 3.0942,
      "step": 64094
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9103537797927856,
      "learning_rate": 0.0004925410478448592,
      "loss": 3.129,
      "step": 64095
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7353365421295166,
      "learning_rate": 0.0004925379109010694,
      "loss": 3.1574,
      "step": 64096
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.4094926118850708,
      "learning_rate": 0.0004925347739214831,
      "loss": 3.067,
      "step": 64097
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.306260347366333,
      "learning_rate": 0.000492531636906101,
      "loss": 3.0737,
      "step": 64098
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6839067935943604,
      "learning_rate": 0.0004925284998549236,
      "loss": 2.969,
      "step": 64099
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5606393814086914,
      "learning_rate": 0.0004925253627679515,
      "loss": 2.9138,
      "step": 64100
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.404563069343567,
      "learning_rate": 0.0004925222256451854,
      "loss": 2.8733,
      "step": 64101
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6856309175491333,
      "learning_rate": 0.0004925190884866257,
      "loss": 2.8127,
      "step": 64102
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.8430792093276978,
      "learning_rate": 0.000492515951292273,
      "loss": 2.9283,
      "step": 64103
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2618439197540283,
      "learning_rate": 0.0004925128140621281,
      "loss": 3.123,
      "step": 64104
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9293911457061768,
      "learning_rate": 0.0004925096767961914,
      "loss": 2.8841,
      "step": 64105
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6288731098175049,
      "learning_rate": 0.0004925065394944635,
      "loss": 2.9322,
      "step": 64106
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7937514781951904,
      "learning_rate": 0.0004925034021569449,
      "loss": 3.0941,
      "step": 64107
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.502284288406372,
      "learning_rate": 0.0004925002647836364,
      "loss": 3.0783,
      "step": 64108
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.409610629081726,
      "learning_rate": 0.0004924971273745385,
      "loss": 2.848,
      "step": 64109
    },
    {
      "epoch": 0.83,
      "grad_norm": 7.559243202209473,
      "learning_rate": 0.0004924939899296516,
      "loss": 3.0889,
      "step": 64110
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.3214621543884277,
      "learning_rate": 0.0004924908524489766,
      "loss": 3.108,
      "step": 64111
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7200839519500732,
      "learning_rate": 0.0004924877149325138,
      "loss": 3.0721,
      "step": 64112
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.72085440158844,
      "learning_rate": 0.000492484577380264,
      "loss": 2.8697,
      "step": 64113
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.850858449935913,
      "learning_rate": 0.0004924814397922275,
      "loss": 2.9484,
      "step": 64114
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.7082490921020508,
      "learning_rate": 0.0004924783021684053,
      "loss": 3.0595,
      "step": 64115
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.77265465259552,
      "learning_rate": 0.0004924751645087976,
      "loss": 2.8708,
      "step": 64116
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5931035280227661,
      "learning_rate": 0.0004924720268134051,
      "loss": 3.2364,
      "step": 64117
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.3335392475128174,
      "learning_rate": 0.0004924688890822285,
      "loss": 3.1687,
      "step": 64118
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.220301628112793,
      "learning_rate": 0.0004924657513152683,
      "loss": 2.9745,
      "step": 64119
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.663286805152893,
      "learning_rate": 0.0004924626135125251,
      "loss": 2.8694,
      "step": 64120
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.9055622816085815,
      "learning_rate": 0.0004924594756739994,
      "loss": 2.8199,
      "step": 64121
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.2902305126190186,
      "learning_rate": 0.000492456337799692,
      "loss": 3.0419,
      "step": 64122
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.4505128860473633,
      "learning_rate": 0.0004924531998896031,
      "loss": 3.0829,
      "step": 64123
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.628308653831482,
      "learning_rate": 0.0004924500619437337,
      "loss": 2.8088,
      "step": 64124
    },
    {
      "epoch": 0.83,
      "grad_norm": 2.4098963737487793,
      "learning_rate": 0.0004924469239620842,
      "loss": 3.1922,
      "step": 64125
    },
    {
      "epoch": 0.83,
      "grad_norm": 3.114922523498535,
      "learning_rate": 0.000492443785944655,
      "loss": 3.1466,
      "step": 64126
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.5396788120269775,
      "learning_rate": 0.0004924406478914471,
      "loss": 2.8936,
      "step": 64127
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.433657169342041,
      "learning_rate": 0.0004924375098024607,
      "loss": 3.0825,
      "step": 64128
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3039939403533936,
      "learning_rate": 0.0004924343716776967,
      "loss": 2.9766,
      "step": 64129
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1144559383392334,
      "learning_rate": 0.0004924312335171553,
      "loss": 3.0723,
      "step": 64130
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7152117490768433,
      "learning_rate": 0.0004924280953208375,
      "loss": 3.1859,
      "step": 64131
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6816879510879517,
      "learning_rate": 0.0004924249570887435,
      "loss": 2.8186,
      "step": 64132
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.967103362083435,
      "learning_rate": 0.0004924218188208741,
      "loss": 2.8638,
      "step": 64133
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0464863777160645,
      "learning_rate": 0.0004924186805172299,
      "loss": 2.9414,
      "step": 64134
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4550511837005615,
      "learning_rate": 0.0004924155421778115,
      "loss": 3.0894,
      "step": 64135
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.187748670578003,
      "learning_rate": 0.0004924124038026194,
      "loss": 3.0532,
      "step": 64136
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3535068035125732,
      "learning_rate": 0.0004924092653916541,
      "loss": 3.1742,
      "step": 64137
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5815693140029907,
      "learning_rate": 0.0004924061269449164,
      "loss": 3.0407,
      "step": 64138
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.891418695449829,
      "learning_rate": 0.0004924029884624067,
      "loss": 2.944,
      "step": 64139
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.858255624771118,
      "learning_rate": 0.0004923998499441257,
      "loss": 3.0739,
      "step": 64140
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3906867504119873,
      "learning_rate": 0.0004923967113900739,
      "loss": 2.9082,
      "step": 64141
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7745919227600098,
      "learning_rate": 0.0004923935728002518,
      "loss": 3.1174,
      "step": 64142
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9904685020446777,
      "learning_rate": 0.0004923904341746604,
      "loss": 3.0436,
      "step": 64143
    },
    {
      "epoch": 0.84,
      "grad_norm": 4.915799140930176,
      "learning_rate": 0.0004923872955132996,
      "loss": 2.8103,
      "step": 64144
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.994826555252075,
      "learning_rate": 0.0004923841568161707,
      "loss": 3.1233,
      "step": 64145
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.816945195198059,
      "learning_rate": 0.0004923810180832739,
      "loss": 3.1968,
      "step": 64146
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.618664503097534,
      "learning_rate": 0.0004923778793146097,
      "loss": 2.9608,
      "step": 64147
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.601266622543335,
      "learning_rate": 0.0004923747405101789,
      "loss": 2.9857,
      "step": 64148
    },
    {
      "epoch": 0.84,
      "grad_norm": 4.203941822052002,
      "learning_rate": 0.000492371601669982,
      "loss": 3.065,
      "step": 64149
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1406829357147217,
      "learning_rate": 0.0004923684627940197,
      "loss": 3.3193,
      "step": 64150
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.401811122894287,
      "learning_rate": 0.0004923653238822922,
      "loss": 3.0002,
      "step": 64151
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.387908458709717,
      "learning_rate": 0.0004923621849348005,
      "loss": 3.1522,
      "step": 64152
    },
    {
      "epoch": 0.84,
      "grad_norm": 4.99926233291626,
      "learning_rate": 0.0004923590459515451,
      "loss": 2.8161,
      "step": 64153
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.9027764797210693,
      "learning_rate": 0.0004923559069325265,
      "loss": 3.068,
      "step": 64154
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.520043134689331,
      "learning_rate": 0.0004923527678777453,
      "loss": 2.946,
      "step": 64155
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4960204362869263,
      "learning_rate": 0.0004923496287872021,
      "loss": 2.9708,
      "step": 64156
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.663856029510498,
      "learning_rate": 0.0004923464896608974,
      "loss": 3.0247,
      "step": 64157
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.7740256786346436,
      "learning_rate": 0.0004923433504988319,
      "loss": 3.1431,
      "step": 64158
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3330373764038086,
      "learning_rate": 0.0004923402113010061,
      "loss": 3.1466,
      "step": 64159
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.25607430934906,
      "learning_rate": 0.0004923370720674206,
      "loss": 3.0696,
      "step": 64160
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8472732305526733,
      "learning_rate": 0.0004923339327980762,
      "loss": 3.0554,
      "step": 64161
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8152045011520386,
      "learning_rate": 0.0004923307934929731,
      "loss": 3.1632,
      "step": 64162
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9735320806503296,
      "learning_rate": 0.0004923276541521121,
      "loss": 2.9465,
      "step": 64163
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.348322868347168,
      "learning_rate": 0.0004923245147754939,
      "loss": 3.0985,
      "step": 64164
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4769885540008545,
      "learning_rate": 0.0004923213753631188,
      "loss": 3.122,
      "step": 64165
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7791450023651123,
      "learning_rate": 0.0004923182359149876,
      "loss": 2.855,
      "step": 64166
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2782864570617676,
      "learning_rate": 0.0004923150964311007,
      "loss": 2.902,
      "step": 64167
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5345264673233032,
      "learning_rate": 0.0004923119569114589,
      "loss": 2.8129,
      "step": 64168
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7129151821136475,
      "learning_rate": 0.0004923088173560626,
      "loss": 2.9679,
      "step": 64169
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3452931642532349,
      "learning_rate": 0.0004923056777649125,
      "loss": 3.0828,
      "step": 64170
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7583842277526855,
      "learning_rate": 0.0004923025381380092,
      "loss": 3.0656,
      "step": 64171
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7139908075332642,
      "learning_rate": 0.0004922993984753532,
      "loss": 3.0919,
      "step": 64172
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3089818954467773,
      "learning_rate": 0.000492296258776945,
      "loss": 3.0134,
      "step": 64173
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.612669825553894,
      "learning_rate": 0.0004922931190427855,
      "loss": 3.1419,
      "step": 64174
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7553025484085083,
      "learning_rate": 0.000492289979272875,
      "loss": 3.146,
      "step": 64175
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5679938793182373,
      "learning_rate": 0.000492286839467214,
      "loss": 3.0092,
      "step": 64176
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7614887952804565,
      "learning_rate": 0.0004922836996258034,
      "loss": 3.0029,
      "step": 64177
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3802542686462402,
      "learning_rate": 0.0004922805597486437,
      "loss": 3.1701,
      "step": 64178
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9369572401046753,
      "learning_rate": 0.0004922774198357352,
      "loss": 3.1747,
      "step": 64179
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6737658977508545,
      "learning_rate": 0.0004922742798870789,
      "loss": 2.8841,
      "step": 64180
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.340085744857788,
      "learning_rate": 0.0004922711399026751,
      "loss": 3.1417,
      "step": 64181
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5830414295196533,
      "learning_rate": 0.0004922679998825244,
      "loss": 3.0927,
      "step": 64182
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5833418369293213,
      "learning_rate": 0.0004922648598266276,
      "loss": 2.9578,
      "step": 64183
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.558610200881958,
      "learning_rate": 0.0004922617197349851,
      "loss": 3.0574,
      "step": 64184
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0903217792510986,
      "learning_rate": 0.0004922585796075974,
      "loss": 3.0801,
      "step": 64185
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9082425832748413,
      "learning_rate": 0.0004922554394444653,
      "loss": 3.1023,
      "step": 64186
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.494507074356079,
      "learning_rate": 0.0004922522992455891,
      "loss": 3.1079,
      "step": 64187
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6987637281417847,
      "learning_rate": 0.0004922491590109698,
      "loss": 3.0221,
      "step": 64188
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7693300247192383,
      "learning_rate": 0.0004922460187406077,
      "loss": 3.1941,
      "step": 64189
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.074183702468872,
      "learning_rate": 0.0004922428784345034,
      "loss": 2.8287,
      "step": 64190
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5889238119125366,
      "learning_rate": 0.0004922397380926575,
      "loss": 2.9088,
      "step": 64191
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.416706085205078,
      "learning_rate": 0.0004922365977150708,
      "loss": 3.1115,
      "step": 64192
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.288228750228882,
      "learning_rate": 0.0004922334573017434,
      "loss": 3.0308,
      "step": 64193
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3643888235092163,
      "learning_rate": 0.0004922303168526762,
      "loss": 2.9896,
      "step": 64194
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5862400531768799,
      "learning_rate": 0.0004922271763678699,
      "loss": 3.0296,
      "step": 64195
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2608004808425903,
      "learning_rate": 0.0004922240358473249,
      "loss": 3.017,
      "step": 64196
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3456714153289795,
      "learning_rate": 0.0004922208952910419,
      "loss": 2.9267,
      "step": 64197
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7220407724380493,
      "learning_rate": 0.0004922177546990212,
      "loss": 3.1796,
      "step": 64198
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4350756406784058,
      "learning_rate": 0.0004922146140712637,
      "loss": 2.9181,
      "step": 64199
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.696238398551941,
      "learning_rate": 0.00049221147340777,
      "loss": 2.979,
      "step": 64200
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2698193788528442,
      "learning_rate": 0.0004922083327085404,
      "loss": 2.915,
      "step": 64201
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.716922640800476,
      "learning_rate": 0.0004922051919735756,
      "loss": 3.2396,
      "step": 64202
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3850111961364746,
      "learning_rate": 0.0004922020512028763,
      "loss": 2.9202,
      "step": 64203
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7745823860168457,
      "learning_rate": 0.0004921989103964431,
      "loss": 2.9925,
      "step": 64204
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1012563705444336,
      "learning_rate": 0.0004921957695542764,
      "loss": 2.9411,
      "step": 64205
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9669501781463623,
      "learning_rate": 0.0004921926286763769,
      "loss": 2.8517,
      "step": 64206
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.522374153137207,
      "learning_rate": 0.0004921894877627452,
      "loss": 3.2343,
      "step": 64207
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.736061692237854,
      "learning_rate": 0.0004921863468133818,
      "loss": 2.9147,
      "step": 64208
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8722844123840332,
      "learning_rate": 0.0004921832058282872,
      "loss": 2.7542,
      "step": 64209
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6771061420440674,
      "learning_rate": 0.0004921800648074623,
      "loss": 3.345,
      "step": 64210
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6228123903274536,
      "learning_rate": 0.0004921769237509075,
      "loss": 3.1472,
      "step": 64211
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7090750932693481,
      "learning_rate": 0.0004921737826586233,
      "loss": 3.1195,
      "step": 64212
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7350184917449951,
      "learning_rate": 0.0004921706415306104,
      "loss": 3.1216,
      "step": 64213
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.632022738456726,
      "learning_rate": 0.0004921675003668693,
      "loss": 2.8748,
      "step": 64214
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5363497734069824,
      "learning_rate": 0.0004921643591674008,
      "loss": 2.9139,
      "step": 64215
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9461716413497925,
      "learning_rate": 0.0004921612179322052,
      "loss": 3.1364,
      "step": 64216
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.671708106994629,
      "learning_rate": 0.0004921580766612831,
      "loss": 3.0852,
      "step": 64217
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.455217719078064,
      "learning_rate": 0.0004921549353546353,
      "loss": 2.9136,
      "step": 64218
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1072628498077393,
      "learning_rate": 0.0004921517940122622,
      "loss": 2.938,
      "step": 64219
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.184202194213867,
      "learning_rate": 0.0004921486526341646,
      "loss": 3.0118,
      "step": 64220
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9945311546325684,
      "learning_rate": 0.0004921455112203429,
      "loss": 3.1577,
      "step": 64221
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.660475254058838,
      "learning_rate": 0.0004921423697707976,
      "loss": 2.9512,
      "step": 64222
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.7586700916290283,
      "learning_rate": 0.0004921392282855295,
      "loss": 2.8996,
      "step": 64223
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8691866397857666,
      "learning_rate": 0.000492136086764539,
      "loss": 3.1502,
      "step": 64224
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6570310592651367,
      "learning_rate": 0.0004921329452078269,
      "loss": 3.0542,
      "step": 64225
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.5806736946105957,
      "learning_rate": 0.0004921298036153936,
      "loss": 3.1081,
      "step": 64226
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6814570426940918,
      "learning_rate": 0.0004921266619872397,
      "loss": 3.0155,
      "step": 64227
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.49811851978302,
      "learning_rate": 0.0004921235203233659,
      "loss": 3.0339,
      "step": 64228
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1372814178466797,
      "learning_rate": 0.0004921203786237727,
      "loss": 2.986,
      "step": 64229
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.8843135833740234,
      "learning_rate": 0.0004921172368884607,
      "loss": 3.1302,
      "step": 64230
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8558452129364014,
      "learning_rate": 0.0004921140951174304,
      "loss": 3.0944,
      "step": 64231
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1074419021606445,
      "learning_rate": 0.0004921109533106826,
      "loss": 2.886,
      "step": 64232
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.080150842666626,
      "learning_rate": 0.0004921078114682176,
      "loss": 3.0817,
      "step": 64233
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5438176393508911,
      "learning_rate": 0.0004921046695900362,
      "loss": 3.1808,
      "step": 64234
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5191644430160522,
      "learning_rate": 0.0004921015276761389,
      "loss": 3.1041,
      "step": 64235
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6431595087051392,
      "learning_rate": 0.0004920983857265264,
      "loss": 3.1852,
      "step": 64236
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.278580904006958,
      "learning_rate": 0.000492095243741199,
      "loss": 3.3369,
      "step": 64237
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.9552001953125,
      "learning_rate": 0.0004920921017201577,
      "loss": 3.0926,
      "step": 64238
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.572622537612915,
      "learning_rate": 0.0004920889596634027,
      "loss": 2.8253,
      "step": 64239
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.224928140640259,
      "learning_rate": 0.0004920858175709347,
      "loss": 3.0609,
      "step": 64240
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.7325661182403564,
      "learning_rate": 0.0004920826754427544,
      "loss": 3.0139,
      "step": 64241
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3194708824157715,
      "learning_rate": 0.0004920795332788622,
      "loss": 3.054,
      "step": 64242
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1461236476898193,
      "learning_rate": 0.000492076391079259,
      "loss": 2.8114,
      "step": 64243
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8098543882369995,
      "learning_rate": 0.000492073248843945,
      "loss": 3.0551,
      "step": 64244
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.945013999938965,
      "learning_rate": 0.0004920701065729209,
      "loss": 3.0435,
      "step": 64245
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7776798009872437,
      "learning_rate": 0.0004920669642661875,
      "loss": 3.1383,
      "step": 64246
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4131436347961426,
      "learning_rate": 0.0004920638219237453,
      "loss": 3.0738,
      "step": 64247
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0447897911071777,
      "learning_rate": 0.0004920606795455946,
      "loss": 2.9133,
      "step": 64248
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.046609878540039,
      "learning_rate": 0.0004920575371317363,
      "loss": 2.9489,
      "step": 64249
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.146392345428467,
      "learning_rate": 0.0004920543946821708,
      "loss": 3.0073,
      "step": 64250
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.5230071544647217,
      "learning_rate": 0.0004920512521968987,
      "loss": 2.9301,
      "step": 64251
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4149837493896484,
      "learning_rate": 0.0004920481096759208,
      "loss": 3.1124,
      "step": 64252
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.954063057899475,
      "learning_rate": 0.0004920449671192375,
      "loss": 3.1851,
      "step": 64253
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8571956157684326,
      "learning_rate": 0.0004920418245268495,
      "loss": 2.9661,
      "step": 64254
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.011603593826294,
      "learning_rate": 0.0004920386818987572,
      "loss": 2.9329,
      "step": 64255
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1448841094970703,
      "learning_rate": 0.0004920355392349612,
      "loss": 3.0878,
      "step": 64256
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7955437898635864,
      "learning_rate": 0.0004920323965354622,
      "loss": 3.259,
      "step": 64257
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1070363521575928,
      "learning_rate": 0.0004920292538002608,
      "loss": 3.0048,
      "step": 64258
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7731696367263794,
      "learning_rate": 0.0004920261110293577,
      "loss": 2.9481,
      "step": 64259
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4106671810150146,
      "learning_rate": 0.0004920229682227531,
      "loss": 3.3109,
      "step": 64260
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6151803731918335,
      "learning_rate": 0.0004920198253804478,
      "loss": 2.8483,
      "step": 64261
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.453503131866455,
      "learning_rate": 0.0004920166825024425,
      "loss": 3.1113,
      "step": 64262
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.529407024383545,
      "learning_rate": 0.0004920135395887378,
      "loss": 3.1843,
      "step": 64263
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6548337936401367,
      "learning_rate": 0.0004920103966393339,
      "loss": 2.9258,
      "step": 64264
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.409645438194275,
      "learning_rate": 0.0004920072536542318,
      "loss": 2.8887,
      "step": 64265
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5078243017196655,
      "learning_rate": 0.0004920041106334318,
      "loss": 3.1395,
      "step": 64266
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.908292531967163,
      "learning_rate": 0.0004920009675769347,
      "loss": 3.0516,
      "step": 64267
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4549293518066406,
      "learning_rate": 0.000491997824484741,
      "loss": 2.8091,
      "step": 64268
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.592802882194519,
      "learning_rate": 0.0004919946813568512,
      "loss": 3.0215,
      "step": 64269
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4537172317504883,
      "learning_rate": 0.000491991538193266,
      "loss": 2.9257,
      "step": 64270
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7269028425216675,
      "learning_rate": 0.000491988394993986,
      "loss": 3.0109,
      "step": 64271
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7043218612670898,
      "learning_rate": 0.0004919852517590117,
      "loss": 3.0891,
      "step": 64272
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5717170238494873,
      "learning_rate": 0.0004919821084883437,
      "loss": 2.8024,
      "step": 64273
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6047688722610474,
      "learning_rate": 0.0004919789651819827,
      "loss": 2.7876,
      "step": 64274
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.75750732421875,
      "learning_rate": 0.0004919758218399291,
      "loss": 3.129,
      "step": 64275
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.881682276725769,
      "learning_rate": 0.0004919726784621836,
      "loss": 2.8235,
      "step": 64276
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.953579068183899,
      "learning_rate": 0.0004919695350487467,
      "loss": 3.147,
      "step": 64277
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5599721670150757,
      "learning_rate": 0.000491966391599619,
      "loss": 3.1478,
      "step": 64278
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.2534778118133545,
      "learning_rate": 0.0004919632481148013,
      "loss": 3.2626,
      "step": 64279
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.1790878772735596,
      "learning_rate": 0.0004919601045942939,
      "loss": 2.7487,
      "step": 64280
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8302125930786133,
      "learning_rate": 0.0004919569610380975,
      "loss": 3.0721,
      "step": 64281
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.659773826599121,
      "learning_rate": 0.0004919538174462127,
      "loss": 2.9536,
      "step": 64282
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.310232639312744,
      "learning_rate": 0.00049195067381864,
      "loss": 3.0981,
      "step": 64283
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.82401704788208,
      "learning_rate": 0.0004919475301553801,
      "loss": 2.794,
      "step": 64284
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6421735286712646,
      "learning_rate": 0.0004919443864564335,
      "loss": 3.0624,
      "step": 64285
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7725591659545898,
      "learning_rate": 0.0004919412427218007,
      "loss": 3.2827,
      "step": 64286
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4225529432296753,
      "learning_rate": 0.0004919380989514826,
      "loss": 3.0039,
      "step": 64287
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6902607679367065,
      "learning_rate": 0.0004919349551454795,
      "loss": 2.8819,
      "step": 64288
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8584198951721191,
      "learning_rate": 0.0004919318113037922,
      "loss": 3.1552,
      "step": 64289
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.421832799911499,
      "learning_rate": 0.000491928667426421,
      "loss": 2.729,
      "step": 64290
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5228323936462402,
      "learning_rate": 0.0004919255235133667,
      "loss": 3.0436,
      "step": 64291
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5751985311508179,
      "learning_rate": 0.0004919223795646297,
      "loss": 3.1869,
      "step": 64292
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.603821039199829,
      "learning_rate": 0.0004919192355802108,
      "loss": 3.2385,
      "step": 64293
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4354015588760376,
      "learning_rate": 0.0004919160915601105,
      "loss": 3.1709,
      "step": 64294
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4940205812454224,
      "learning_rate": 0.0004919129475043294,
      "loss": 3.0749,
      "step": 64295
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.8248064517974854,
      "learning_rate": 0.000491909803412868,
      "loss": 2.8872,
      "step": 64296
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.19022798538208,
      "learning_rate": 0.000491906659285727,
      "loss": 2.8926,
      "step": 64297
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4304765462875366,
      "learning_rate": 0.0004919035151229069,
      "loss": 3.0816,
      "step": 64298
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6797897815704346,
      "learning_rate": 0.0004919003709244084,
      "loss": 3.1317,
      "step": 64299
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9668883085250854,
      "learning_rate": 0.0004918972266902319,
      "loss": 3.1016,
      "step": 64300
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5268994569778442,
      "learning_rate": 0.0004918940824203781,
      "loss": 2.9053,
      "step": 64301
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5412548780441284,
      "learning_rate": 0.0004918909381148476,
      "loss": 3.1642,
      "step": 64302
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9878135919570923,
      "learning_rate": 0.0004918877937736409,
      "loss": 3.0288,
      "step": 64303
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4479950666427612,
      "learning_rate": 0.0004918846493967586,
      "loss": 2.9445,
      "step": 64304
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6762220859527588,
      "learning_rate": 0.0004918815049842014,
      "loss": 2.9429,
      "step": 64305
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5533573627471924,
      "learning_rate": 0.0004918783605359698,
      "loss": 2.9047,
      "step": 64306
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8034213781356812,
      "learning_rate": 0.0004918752160520644,
      "loss": 2.9905,
      "step": 64307
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6347521543502808,
      "learning_rate": 0.0004918720715324857,
      "loss": 3.0635,
      "step": 64308
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5214006900787354,
      "learning_rate": 0.0004918689269772345,
      "loss": 3.3056,
      "step": 64309
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6292542219161987,
      "learning_rate": 0.0004918657823863111,
      "loss": 2.8836,
      "step": 64310
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.5812125205993652,
      "learning_rate": 0.0004918626377597163,
      "loss": 3.1216,
      "step": 64311
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0066099166870117,
      "learning_rate": 0.0004918594930974507,
      "loss": 3.0389,
      "step": 64312
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6427867412567139,
      "learning_rate": 0.0004918563483995146,
      "loss": 3.0026,
      "step": 64313
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3486216068267822,
      "learning_rate": 0.0004918532036659089,
      "loss": 3.1272,
      "step": 64314
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.175316095352173,
      "learning_rate": 0.000491850058896634,
      "loss": 3.0123,
      "step": 64315
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5945510864257812,
      "learning_rate": 0.0004918469140916907,
      "loss": 3.1377,
      "step": 64316
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6563692092895508,
      "learning_rate": 0.0004918437692510792,
      "loss": 2.918,
      "step": 64317
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3421293497085571,
      "learning_rate": 0.0004918406243748006,
      "loss": 3.2141,
      "step": 64318
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9403156042099,
      "learning_rate": 0.000491837479462855,
      "loss": 3.0006,
      "step": 64319
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.2910609245300293,
      "learning_rate": 0.0004918343345152433,
      "loss": 3.0646,
      "step": 64320
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9169082641601562,
      "learning_rate": 0.0004918311895319659,
      "loss": 2.7829,
      "step": 64321
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8070613145828247,
      "learning_rate": 0.0004918280445130235,
      "loss": 3.0968,
      "step": 64322
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.272155523300171,
      "learning_rate": 0.0004918248994584167,
      "loss": 2.7603,
      "step": 64323
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.2657105922698975,
      "learning_rate": 0.000491821754368146,
      "loss": 3.1874,
      "step": 64324
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5568406581878662,
      "learning_rate": 0.0004918186092422119,
      "loss": 3.1413,
      "step": 64325
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9134396314620972,
      "learning_rate": 0.0004918154640806152,
      "loss": 3.0141,
      "step": 64326
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.121509313583374,
      "learning_rate": 0.0004918123188833564,
      "loss": 3.1282,
      "step": 64327
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.468252182006836,
      "learning_rate": 0.000491809173650436,
      "loss": 3.1565,
      "step": 64328
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.4058804512023926,
      "learning_rate": 0.0004918060283818547,
      "loss": 3.2296,
      "step": 64329
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.751911997795105,
      "learning_rate": 0.000491802883077613,
      "loss": 3.0594,
      "step": 64330
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5892606973648071,
      "learning_rate": 0.0004917997377377116,
      "loss": 3.1521,
      "step": 64331
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9275953769683838,
      "learning_rate": 0.0004917965923621509,
      "loss": 3.2052,
      "step": 64332
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.894728422164917,
      "learning_rate": 0.0004917934469509317,
      "loss": 2.7517,
      "step": 64333
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.773439407348633,
      "learning_rate": 0.0004917903015040544,
      "loss": 3.0797,
      "step": 64334
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6082264184951782,
      "learning_rate": 0.0004917871560215197,
      "loss": 2.8099,
      "step": 64335
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8757939338684082,
      "learning_rate": 0.0004917840105033282,
      "loss": 3.031,
      "step": 64336
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.090333938598633,
      "learning_rate": 0.0004917808649494804,
      "loss": 2.9304,
      "step": 64337
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.2761099338531494,
      "learning_rate": 0.000491777719359977,
      "loss": 3.1279,
      "step": 64338
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5372329950332642,
      "learning_rate": 0.0004917745737348183,
      "loss": 3.2354,
      "step": 64339
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.42500376701355,
      "learning_rate": 0.0004917714280740052,
      "loss": 3.203,
      "step": 64340
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3572187423706055,
      "learning_rate": 0.0004917682823775382,
      "loss": 3.1254,
      "step": 64341
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4631602764129639,
      "learning_rate": 0.0004917651366454177,
      "loss": 3.0129,
      "step": 64342
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.5936169624328613,
      "learning_rate": 0.0004917619908776446,
      "loss": 2.9156,
      "step": 64343
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9249577522277832,
      "learning_rate": 0.0004917588450742193,
      "loss": 3.1364,
      "step": 64344
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.442425012588501,
      "learning_rate": 0.0004917556992351424,
      "loss": 2.9113,
      "step": 64345
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0137031078338623,
      "learning_rate": 0.0004917525533604145,
      "loss": 2.8509,
      "step": 64346
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.6842901706695557,
      "learning_rate": 0.0004917494074500362,
      "loss": 3.0346,
      "step": 64347
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9259499311447144,
      "learning_rate": 0.000491746261504008,
      "loss": 2.881,
      "step": 64348
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7555272579193115,
      "learning_rate": 0.0004917431155223305,
      "loss": 3.0269,
      "step": 64349
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6260563135147095,
      "learning_rate": 0.0004917399695050045,
      "loss": 3.3547,
      "step": 64350
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6316412687301636,
      "learning_rate": 0.0004917368234520303,
      "loss": 2.882,
      "step": 64351
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6691259145736694,
      "learning_rate": 0.0004917336773634087,
      "loss": 3.0842,
      "step": 64352
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.052760124206543,
      "learning_rate": 0.0004917305312391401,
      "loss": 3.1756,
      "step": 64353
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5435763597488403,
      "learning_rate": 0.0004917273850792252,
      "loss": 3.0629,
      "step": 64354
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.000478982925415,
      "learning_rate": 0.0004917242388836646,
      "loss": 3.2118,
      "step": 64355
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5566675662994385,
      "learning_rate": 0.0004917210926524588,
      "loss": 3.0121,
      "step": 64356
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.5830307006835938,
      "learning_rate": 0.0004917179463856085,
      "loss": 3.0553,
      "step": 64357
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3164501190185547,
      "learning_rate": 0.0004917148000831141,
      "loss": 3.047,
      "step": 64358
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.133429765701294,
      "learning_rate": 0.0004917116537449764,
      "loss": 2.9047,
      "step": 64359
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.5191245079040527,
      "learning_rate": 0.0004917085073711958,
      "loss": 2.9359,
      "step": 64360
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.6391711235046387,
      "learning_rate": 0.000491705360961773,
      "loss": 3.1315,
      "step": 64361
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.849014163017273,
      "learning_rate": 0.0004917022145167085,
      "loss": 3.2113,
      "step": 64362
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7952535152435303,
      "learning_rate": 0.0004916990680360031,
      "loss": 2.9152,
      "step": 64363
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.465458393096924,
      "learning_rate": 0.0004916959215196571,
      "loss": 3.1015,
      "step": 64364
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8724082708358765,
      "learning_rate": 0.0004916927749676713,
      "loss": 3.0647,
      "step": 64365
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6860544681549072,
      "learning_rate": 0.000491689628380046,
      "loss": 3.0181,
      "step": 64366
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.9039037227630615,
      "learning_rate": 0.0004916864817567822,
      "loss": 2.8801,
      "step": 64367
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8518927097320557,
      "learning_rate": 0.0004916833350978801,
      "loss": 3.0198,
      "step": 64368
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5762861967086792,
      "learning_rate": 0.0004916801884033407,
      "loss": 3.3207,
      "step": 64369
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5820214748382568,
      "learning_rate": 0.0004916770416731641,
      "loss": 2.8633,
      "step": 64370
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6993533372879028,
      "learning_rate": 0.0004916738949073511,
      "loss": 3.183,
      "step": 64371
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.350756049156189,
      "learning_rate": 0.0004916707481059026,
      "loss": 3.0853,
      "step": 64372
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6256405115127563,
      "learning_rate": 0.0004916676012688186,
      "loss": 3.1291,
      "step": 64373
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.969712495803833,
      "learning_rate": 0.0004916644543961,
      "loss": 3.0186,
      "step": 64374
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.2292287349700928,
      "learning_rate": 0.0004916613074877474,
      "loss": 2.9454,
      "step": 64375
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.568284511566162,
      "learning_rate": 0.0004916581605437614,
      "loss": 2.9889,
      "step": 64376
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6182979345321655,
      "learning_rate": 0.0004916550135641425,
      "loss": 2.8898,
      "step": 64377
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.514008045196533,
      "learning_rate": 0.0004916518665488913,
      "loss": 2.8549,
      "step": 64378
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6461577415466309,
      "learning_rate": 0.0004916487194980084,
      "loss": 2.9708,
      "step": 64379
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3880107402801514,
      "learning_rate": 0.0004916455724114944,
      "loss": 2.9223,
      "step": 64380
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.890331745147705,
      "learning_rate": 0.0004916424252893499,
      "loss": 2.6978,
      "step": 64381
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5884356498718262,
      "learning_rate": 0.0004916392781315754,
      "loss": 2.8146,
      "step": 64382
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.2158713340759277,
      "learning_rate": 0.0004916361309381715,
      "loss": 3.1397,
      "step": 64383
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8892356157302856,
      "learning_rate": 0.000491632983709139,
      "loss": 2.845,
      "step": 64384
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.9204049110412598,
      "learning_rate": 0.0004916298364444781,
      "loss": 3.0326,
      "step": 64385
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8763744831085205,
      "learning_rate": 0.0004916266891441896,
      "loss": 3.2079,
      "step": 64386
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8566367626190186,
      "learning_rate": 0.0004916235418082743,
      "loss": 2.9083,
      "step": 64387
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.037659168243408,
      "learning_rate": 0.0004916203944367324,
      "loss": 3.1035,
      "step": 64388
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5632957220077515,
      "learning_rate": 0.0004916172470295647,
      "loss": 3.1969,
      "step": 64389
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.705966830253601,
      "learning_rate": 0.0004916140995867718,
      "loss": 3.1613,
      "step": 64390
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6715734004974365,
      "learning_rate": 0.0004916109521083541,
      "loss": 3.2226,
      "step": 64391
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.485739827156067,
      "learning_rate": 0.0004916078045943123,
      "loss": 3.027,
      "step": 64392
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4012168645858765,
      "learning_rate": 0.0004916046570446472,
      "loss": 2.9416,
      "step": 64393
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3997358083724976,
      "learning_rate": 0.0004916015094593589,
      "loss": 3.1415,
      "step": 64394
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9744915962219238,
      "learning_rate": 0.0004915983618384484,
      "loss": 2.9767,
      "step": 64395
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.488217830657959,
      "learning_rate": 0.0004915952141819162,
      "loss": 3.2379,
      "step": 64396
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6099153757095337,
      "learning_rate": 0.0004915920664897627,
      "loss": 3.0108,
      "step": 64397
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0381600856781006,
      "learning_rate": 0.0004915889187619887,
      "loss": 2.8329,
      "step": 64398
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.600084900856018,
      "learning_rate": 0.0004915857709985947,
      "loss": 3.0341,
      "step": 64399
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5422415733337402,
      "learning_rate": 0.0004915826231995813,
      "loss": 3.2646,
      "step": 64400
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.732613205909729,
      "learning_rate": 0.000491579475364949,
      "loss": 2.9533,
      "step": 64401
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.519666075706482,
      "learning_rate": 0.0004915763274946985,
      "loss": 3.088,
      "step": 64402
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8999933004379272,
      "learning_rate": 0.0004915731795888304,
      "loss": 3.1228,
      "step": 64403
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3814198970794678,
      "learning_rate": 0.0004915700316473452,
      "loss": 3.3222,
      "step": 64404
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.025447130203247,
      "learning_rate": 0.0004915668836702436,
      "loss": 3.0713,
      "step": 64405
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6748130321502686,
      "learning_rate": 0.0004915637356575259,
      "loss": 3.1554,
      "step": 64406
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1105613708496094,
      "learning_rate": 0.0004915605876091929,
      "loss": 3.1219,
      "step": 64407
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9940133094787598,
      "learning_rate": 0.0004915574395252454,
      "loss": 2.9884,
      "step": 64408
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6072232723236084,
      "learning_rate": 0.0004915542914056836,
      "loss": 2.9998,
      "step": 64409
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4147471189498901,
      "learning_rate": 0.0004915511432505083,
      "loss": 2.9902,
      "step": 64410
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6605528593063354,
      "learning_rate": 0.0004915479950597199,
      "loss": 3.073,
      "step": 64411
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4554487466812134,
      "learning_rate": 0.0004915448468333192,
      "loss": 3.2615,
      "step": 64412
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7975966930389404,
      "learning_rate": 0.0004915416985713067,
      "loss": 2.8846,
      "step": 64413
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6730303764343262,
      "learning_rate": 0.0004915385502736829,
      "loss": 3.059,
      "step": 64414
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8220103979110718,
      "learning_rate": 0.0004915354019404486,
      "loss": 3.1115,
      "step": 64415
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9213584661483765,
      "learning_rate": 0.0004915322535716043,
      "loss": 3.0676,
      "step": 64416
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7286208868026733,
      "learning_rate": 0.0004915291051671503,
      "loss": 2.966,
      "step": 64417
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7088016271591187,
      "learning_rate": 0.0004915259567270877,
      "loss": 3.186,
      "step": 64418
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7626547813415527,
      "learning_rate": 0.0004915228082514165,
      "loss": 2.8312,
      "step": 64419
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.771071195602417,
      "learning_rate": 0.0004915196597401378,
      "loss": 3.1827,
      "step": 64420
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3778938055038452,
      "learning_rate": 0.0004915165111932519,
      "loss": 3.098,
      "step": 64421
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5723804235458374,
      "learning_rate": 0.0004915133626107595,
      "loss": 3.1163,
      "step": 64422
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4288097620010376,
      "learning_rate": 0.0004915102139926613,
      "loss": 3.3763,
      "step": 64423
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4137232303619385,
      "learning_rate": 0.0004915070653389575,
      "loss": 3.0957,
      "step": 64424
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.856021523475647,
      "learning_rate": 0.0004915039166496491,
      "loss": 2.8653,
      "step": 64425
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3776425123214722,
      "learning_rate": 0.0004915007679247364,
      "loss": 3.1451,
      "step": 64426
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.505579948425293,
      "learning_rate": 0.00049149761916422,
      "loss": 3.054,
      "step": 64427
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5586258172988892,
      "learning_rate": 0.0004914944703681008,
      "loss": 2.8699,
      "step": 64428
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6486823558807373,
      "learning_rate": 0.0004914913215363789,
      "loss": 3.3783,
      "step": 64429
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.60200834274292,
      "learning_rate": 0.0004914881726690554,
      "loss": 3.1527,
      "step": 64430
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4439823627471924,
      "learning_rate": 0.0004914850237661305,
      "loss": 2.9494,
      "step": 64431
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8611685037612915,
      "learning_rate": 0.000491481874827605,
      "loss": 3.2121,
      "step": 64432
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8564049005508423,
      "learning_rate": 0.0004914787258534794,
      "loss": 3.0342,
      "step": 64433
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8085838556289673,
      "learning_rate": 0.0004914755768437543,
      "loss": 3.0083,
      "step": 64434
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.458086609840393,
      "learning_rate": 0.0004914724277984301,
      "loss": 2.9751,
      "step": 64435
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.44174325466156,
      "learning_rate": 0.0004914692787175078,
      "loss": 3.0113,
      "step": 64436
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4277029037475586,
      "learning_rate": 0.0004914661296009877,
      "loss": 3.2181,
      "step": 64437
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9975262880325317,
      "learning_rate": 0.0004914629804488703,
      "loss": 3.1229,
      "step": 64438
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.688767910003662,
      "learning_rate": 0.0004914598312611563,
      "loss": 2.8473,
      "step": 64439
    },
    {
      "epoch": 0.84,
      "grad_norm": 7.750646591186523,
      "learning_rate": 0.0004914566820378465,
      "loss": 2.9835,
      "step": 64440
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.598698616027832,
      "learning_rate": 0.0004914535327789411,
      "loss": 2.8949,
      "step": 64441
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3876123428344727,
      "learning_rate": 0.000491450383484441,
      "loss": 3.0102,
      "step": 64442
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.191582441329956,
      "learning_rate": 0.0004914472341543466,
      "loss": 3.0599,
      "step": 64443
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7337509393692017,
      "learning_rate": 0.0004914440847886586,
      "loss": 3.0851,
      "step": 64444
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4172358512878418,
      "learning_rate": 0.0004914409353873775,
      "loss": 2.7274,
      "step": 64445
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5491795539855957,
      "learning_rate": 0.000491437785950504,
      "loss": 3.2018,
      "step": 64446
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.55836820602417,
      "learning_rate": 0.0004914346364780384,
      "loss": 2.925,
      "step": 64447
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.052335500717163,
      "learning_rate": 0.0004914314869699816,
      "loss": 3.121,
      "step": 64448
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7208507061004639,
      "learning_rate": 0.0004914283374263341,
      "loss": 3.0085,
      "step": 64449
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5803041458129883,
      "learning_rate": 0.0004914251878470964,
      "loss": 3.142,
      "step": 64450
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5516165494918823,
      "learning_rate": 0.0004914220382322692,
      "loss": 2.9215,
      "step": 64451
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6348506212234497,
      "learning_rate": 0.0004914188885818529,
      "loss": 3.1483,
      "step": 64452
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8904691934585571,
      "learning_rate": 0.0004914157388958484,
      "loss": 3.2538,
      "step": 64453
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3101719617843628,
      "learning_rate": 0.000491412589174256,
      "loss": 3.117,
      "step": 64454
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0767993927001953,
      "learning_rate": 0.0004914094394170765,
      "loss": 3.0564,
      "step": 64455
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9064699411392212,
      "learning_rate": 0.0004914062896243102,
      "loss": 3.0929,
      "step": 64456
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8619917631149292,
      "learning_rate": 0.000491403139795958,
      "loss": 3.2158,
      "step": 64457
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.2902579307556152,
      "learning_rate": 0.0004913999899320203,
      "loss": 2.787,
      "step": 64458
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1981754302978516,
      "learning_rate": 0.0004913968400324976,
      "loss": 3.0745,
      "step": 64459
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5625314712524414,
      "learning_rate": 0.0004913936900973907,
      "loss": 2.9278,
      "step": 64460
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5810610055923462,
      "learning_rate": 0.0004913905401267002,
      "loss": 2.9847,
      "step": 64461
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2587803602218628,
      "learning_rate": 0.0004913873901204264,
      "loss": 3.1415,
      "step": 64462
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.915154218673706,
      "learning_rate": 0.0004913842400785702,
      "loss": 3.0045,
      "step": 64463
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.465401291847229,
      "learning_rate": 0.0004913810900011321,
      "loss": 2.9042,
      "step": 64464
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6456129550933838,
      "learning_rate": 0.0004913779398881126,
      "loss": 3.1309,
      "step": 64465
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0555198192596436,
      "learning_rate": 0.0004913747897395122,
      "loss": 2.9116,
      "step": 64466
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3913511037826538,
      "learning_rate": 0.0004913716395553318,
      "loss": 2.9952,
      "step": 64467
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5010993480682373,
      "learning_rate": 0.0004913684893355717,
      "loss": 3.1364,
      "step": 64468
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7045278549194336,
      "learning_rate": 0.0004913653390802325,
      "loss": 3.2622,
      "step": 64469
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6635907888412476,
      "learning_rate": 0.0004913621887893151,
      "loss": 2.9982,
      "step": 64470
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7907451391220093,
      "learning_rate": 0.0004913590384628196,
      "loss": 3.0437,
      "step": 64471
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5368685722351074,
      "learning_rate": 0.0004913558881007471,
      "loss": 3.1749,
      "step": 64472
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7080721855163574,
      "learning_rate": 0.0004913527377030978,
      "loss": 3.1206,
      "step": 64473
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.549436092376709,
      "learning_rate": 0.0004913495872698724,
      "loss": 2.9698,
      "step": 64474
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7127519845962524,
      "learning_rate": 0.0004913464368010716,
      "loss": 2.9078,
      "step": 64475
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.47181236743927,
      "learning_rate": 0.0004913432862966958,
      "loss": 3.0632,
      "step": 64476
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5669639110565186,
      "learning_rate": 0.0004913401357567456,
      "loss": 3.1346,
      "step": 64477
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4146887063980103,
      "learning_rate": 0.0004913369851812217,
      "loss": 3.164,
      "step": 64478
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0476200580596924,
      "learning_rate": 0.0004913338345701248,
      "loss": 2.9309,
      "step": 64479
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0482230186462402,
      "learning_rate": 0.0004913306839234551,
      "loss": 2.9554,
      "step": 64480
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6595977544784546,
      "learning_rate": 0.0004913275332412136,
      "loss": 3.2904,
      "step": 64481
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4386990070343018,
      "learning_rate": 0.0004913243825234006,
      "loss": 2.93,
      "step": 64482
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.572608232498169,
      "learning_rate": 0.0004913212317700168,
      "loss": 3.0418,
      "step": 64483
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3002620935440063,
      "learning_rate": 0.0004913180809810628,
      "loss": 3.0628,
      "step": 64484
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8066461086273193,
      "learning_rate": 0.0004913149301565391,
      "loss": 2.9829,
      "step": 64485
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9143304824829102,
      "learning_rate": 0.0004913117792964463,
      "loss": 2.908,
      "step": 64486
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7313871383666992,
      "learning_rate": 0.0004913086284007852,
      "loss": 2.7998,
      "step": 64487
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.063811779022217,
      "learning_rate": 0.0004913054774695561,
      "loss": 2.9661,
      "step": 64488
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.825973391532898,
      "learning_rate": 0.0004913023265027597,
      "loss": 3.1681,
      "step": 64489
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5386807918548584,
      "learning_rate": 0.0004912991755003966,
      "loss": 2.9908,
      "step": 64490
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9572679996490479,
      "learning_rate": 0.0004912960244624674,
      "loss": 3.3026,
      "step": 64491
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8999079465866089,
      "learning_rate": 0.0004912928733889726,
      "loss": 3.1491,
      "step": 64492
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3210415840148926,
      "learning_rate": 0.0004912897222799129,
      "loss": 3.0399,
      "step": 64493
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0943281650543213,
      "learning_rate": 0.0004912865711352888,
      "loss": 2.9624,
      "step": 64494
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.671217441558838,
      "learning_rate": 0.0004912834199551009,
      "loss": 2.8855,
      "step": 64495
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.483525276184082,
      "learning_rate": 0.0004912802687393499,
      "loss": 2.8092,
      "step": 64496
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.499031662940979,
      "learning_rate": 0.0004912771174880362,
      "loss": 2.9969,
      "step": 64497
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5629078149795532,
      "learning_rate": 0.0004912739662011604,
      "loss": 3.2256,
      "step": 64498
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9092469215393066,
      "learning_rate": 0.0004912708148787233,
      "loss": 2.9515,
      "step": 64499
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5785332918167114,
      "learning_rate": 0.0004912676635207253,
      "loss": 3.0265,
      "step": 64500
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7887324094772339,
      "learning_rate": 0.000491264512127167,
      "loss": 3.0546,
      "step": 64501
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.612474203109741,
      "learning_rate": 0.000491261360698049,
      "loss": 2.9679,
      "step": 64502
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.479954719543457,
      "learning_rate": 0.000491258209233372,
      "loss": 2.967,
      "step": 64503
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5612032413482666,
      "learning_rate": 0.0004912550577331364,
      "loss": 2.8955,
      "step": 64504
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6653685569763184,
      "learning_rate": 0.0004912519061973429,
      "loss": 3.1293,
      "step": 64505
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4874814748764038,
      "learning_rate": 0.0004912487546259921,
      "loss": 2.9947,
      "step": 64506
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5317548513412476,
      "learning_rate": 0.0004912456030190845,
      "loss": 3.0468,
      "step": 64507
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6135329008102417,
      "learning_rate": 0.0004912424513766208,
      "loss": 3.0826,
      "step": 64508
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5599969625473022,
      "learning_rate": 0.0004912392996986015,
      "loss": 3.0807,
      "step": 64509
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.712098479270935,
      "learning_rate": 0.000491236147985027,
      "loss": 2.9835,
      "step": 64510
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.607569694519043,
      "learning_rate": 0.0004912329962358983,
      "loss": 3.0159,
      "step": 64511
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4462847709655762,
      "learning_rate": 0.0004912298444512158,
      "loss": 2.9232,
      "step": 64512
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.651161551475525,
      "learning_rate": 0.00049122669263098,
      "loss": 2.9418,
      "step": 64513
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6021360158920288,
      "learning_rate": 0.0004912235407751915,
      "loss": 3.1327,
      "step": 64514
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5464516878128052,
      "learning_rate": 0.000491220388883851,
      "loss": 3.0373,
      "step": 64515
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.945587158203125,
      "learning_rate": 0.000491217236956959,
      "loss": 2.958,
      "step": 64516
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7052428722381592,
      "learning_rate": 0.0004912140849945161,
      "loss": 2.9387,
      "step": 64517
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6465834379196167,
      "learning_rate": 0.0004912109329965229,
      "loss": 2.9115,
      "step": 64518
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5231101512908936,
      "learning_rate": 0.0004912077809629801,
      "loss": 3.0552,
      "step": 64519
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.61491858959198,
      "learning_rate": 0.0004912046288938879,
      "loss": 2.8634,
      "step": 64520
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.625900149345398,
      "learning_rate": 0.0004912014767892473,
      "loss": 2.9471,
      "step": 64521
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.53946852684021,
      "learning_rate": 0.0004911983246490588,
      "loss": 3.2205,
      "step": 64522
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.282106637954712,
      "learning_rate": 0.0004911951724733228,
      "loss": 3.0058,
      "step": 64523
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.59961998462677,
      "learning_rate": 0.0004911920202620401,
      "loss": 3.0519,
      "step": 64524
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.251009702682495,
      "learning_rate": 0.0004911888680152111,
      "loss": 2.8681,
      "step": 64525
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8114757537841797,
      "learning_rate": 0.0004911857157328365,
      "loss": 3.0339,
      "step": 64526
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3825920820236206,
      "learning_rate": 0.000491182563414917,
      "loss": 3.1179,
      "step": 64527
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.443451166152954,
      "learning_rate": 0.0004911794110614529,
      "loss": 3.191,
      "step": 64528
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5446295738220215,
      "learning_rate": 0.000491176258672445,
      "loss": 3.0784,
      "step": 64529
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6301792860031128,
      "learning_rate": 0.0004911731062478939,
      "loss": 2.927,
      "step": 64530
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7357869148254395,
      "learning_rate": 0.0004911699537878,
      "loss": 3.1298,
      "step": 64531
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.84373140335083,
      "learning_rate": 0.0004911668012921639,
      "loss": 3.0735,
      "step": 64532
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6337436437606812,
      "learning_rate": 0.0004911636487609865,
      "loss": 2.6641,
      "step": 64533
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4423284530639648,
      "learning_rate": 0.000491160496194268,
      "loss": 2.8793,
      "step": 64534
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7239408493041992,
      "learning_rate": 0.0004911573435920092,
      "loss": 3.1087,
      "step": 64535
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2626440525054932,
      "learning_rate": 0.0004911541909542107,
      "loss": 3.1005,
      "step": 64536
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3782479763031006,
      "learning_rate": 0.000491151038280873,
      "loss": 2.8976,
      "step": 64537
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8046458959579468,
      "learning_rate": 0.0004911478855719967,
      "loss": 3.0406,
      "step": 64538
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8767503499984741,
      "learning_rate": 0.0004911447328275825,
      "loss": 3.0113,
      "step": 64539
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4531530141830444,
      "learning_rate": 0.0004911415800476308,
      "loss": 2.8144,
      "step": 64540
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6502838134765625,
      "learning_rate": 0.0004911384272321423,
      "loss": 2.983,
      "step": 64541
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4571744203567505,
      "learning_rate": 0.0004911352743811175,
      "loss": 3.0629,
      "step": 64542
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7255573272705078,
      "learning_rate": 0.000491132121494557,
      "loss": 2.944,
      "step": 64543
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6645491123199463,
      "learning_rate": 0.0004911289685724616,
      "loss": 3.1019,
      "step": 64544
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6066657304763794,
      "learning_rate": 0.0004911258156148316,
      "loss": 2.8429,
      "step": 64545
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6877155303955078,
      "learning_rate": 0.0004911226626216677,
      "loss": 3.2078,
      "step": 64546
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7007300853729248,
      "learning_rate": 0.0004911195095929704,
      "loss": 2.8998,
      "step": 64547
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5449298620224,
      "learning_rate": 0.0004911163565287406,
      "loss": 2.9043,
      "step": 64548
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.561435341835022,
      "learning_rate": 0.0004911132034289785,
      "loss": 3.2688,
      "step": 64549
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5879034996032715,
      "learning_rate": 0.0004911100502936849,
      "loss": 3.1125,
      "step": 64550
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5208485126495361,
      "learning_rate": 0.0004911068971228604,
      "loss": 3.1495,
      "step": 64551
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6446579694747925,
      "learning_rate": 0.0004911037439165053,
      "loss": 3.1813,
      "step": 64552
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4457036256790161,
      "learning_rate": 0.0004911005906746206,
      "loss": 2.717,
      "step": 64553
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.703805923461914,
      "learning_rate": 0.0004910974373972067,
      "loss": 3.2556,
      "step": 64554
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.717887282371521,
      "learning_rate": 0.0004910942840842641,
      "loss": 2.886,
      "step": 64555
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.614388108253479,
      "learning_rate": 0.0004910911307357935,
      "loss": 3.0161,
      "step": 64556
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.975666880607605,
      "learning_rate": 0.0004910879773517955,
      "loss": 3.0764,
      "step": 64557
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5044898986816406,
      "learning_rate": 0.0004910848239322705,
      "loss": 3.3947,
      "step": 64558
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.159473180770874,
      "learning_rate": 0.0004910816704772194,
      "loss": 3.2928,
      "step": 64559
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8774597644805908,
      "learning_rate": 0.0004910785169866426,
      "loss": 3.392,
      "step": 64560
    },
    {
      "epoch": 0.84,
      "grad_norm": 9.625289916992188,
      "learning_rate": 0.0004910753634605405,
      "loss": 3.2286,
      "step": 64561
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8837463855743408,
      "learning_rate": 0.0004910722098989141,
      "loss": 3.1094,
      "step": 64562
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6632750034332275,
      "learning_rate": 0.0004910690563017636,
      "loss": 3.1323,
      "step": 64563
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.049405813217163,
      "learning_rate": 0.0004910659026690899,
      "loss": 3.1992,
      "step": 64564
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6964002847671509,
      "learning_rate": 0.0004910627490008933,
      "loss": 2.9431,
      "step": 64565
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9199258089065552,
      "learning_rate": 0.0004910595952971746,
      "loss": 3.0379,
      "step": 64566
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8033007383346558,
      "learning_rate": 0.0004910564415579343,
      "loss": 2.7767,
      "step": 64567
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4723353385925293,
      "learning_rate": 0.000491053287783173,
      "loss": 3.0948,
      "step": 64568
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6218969821929932,
      "learning_rate": 0.0004910501339728913,
      "loss": 2.975,
      "step": 64569
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0945401191711426,
      "learning_rate": 0.0004910469801270898,
      "loss": 3.0246,
      "step": 64570
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8474899530410767,
      "learning_rate": 0.000491043826245769,
      "loss": 2.852,
      "step": 64571
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5332062244415283,
      "learning_rate": 0.0004910406723289296,
      "loss": 3.2182,
      "step": 64572
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.547431468963623,
      "learning_rate": 0.0004910375183765722,
      "loss": 2.9862,
      "step": 64573
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.743586540222168,
      "learning_rate": 0.0004910343643886972,
      "loss": 3.1239,
      "step": 64574
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.300311803817749,
      "learning_rate": 0.0004910312103653053,
      "loss": 2.7041,
      "step": 64575
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4472579956054688,
      "learning_rate": 0.0004910280563063972,
      "loss": 3.193,
      "step": 64576
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4857513904571533,
      "learning_rate": 0.0004910249022119733,
      "loss": 3.0746,
      "step": 64577
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2121920585632324,
      "learning_rate": 0.0004910217480820342,
      "loss": 3.0722,
      "step": 64578
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.522059679031372,
      "learning_rate": 0.0004910185939165807,
      "loss": 3.1551,
      "step": 64579
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5011143684387207,
      "learning_rate": 0.000491015439715613,
      "loss": 3.0418,
      "step": 64580
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5103126764297485,
      "learning_rate": 0.0004910122854791321,
      "loss": 2.6761,
      "step": 64581
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.519002079963684,
      "learning_rate": 0.0004910091312071385,
      "loss": 3.0716,
      "step": 64582
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5536919832229614,
      "learning_rate": 0.0004910059768996326,
      "loss": 3.0935,
      "step": 64583
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9137778282165527,
      "learning_rate": 0.0004910028225566151,
      "loss": 3.0128,
      "step": 64584
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8892089128494263,
      "learning_rate": 0.0004909996681780865,
      "loss": 2.899,
      "step": 64585
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7372512817382812,
      "learning_rate": 0.0004909965137640475,
      "loss": 3.013,
      "step": 64586
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9139796495437622,
      "learning_rate": 0.0004909933593144986,
      "loss": 2.9645,
      "step": 64587
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1104612350463867,
      "learning_rate": 0.0004909902048294405,
      "loss": 2.9804,
      "step": 64588
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.646741509437561,
      "learning_rate": 0.0004909870503088737,
      "loss": 3.0893,
      "step": 64589
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.569381833076477,
      "learning_rate": 0.0004909838957527988,
      "loss": 3.1059,
      "step": 64590
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8342201709747314,
      "learning_rate": 0.0004909807411612164,
      "loss": 3.0588,
      "step": 64591
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4993999004364014,
      "learning_rate": 0.0004909775865341271,
      "loss": 3.1549,
      "step": 64592
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1987111568450928,
      "learning_rate": 0.0004909744318715313,
      "loss": 3.2086,
      "step": 64593
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6694731712341309,
      "learning_rate": 0.0004909712771734299,
      "loss": 2.9886,
      "step": 64594
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8378344774246216,
      "learning_rate": 0.0004909681224398234,
      "loss": 3.0695,
      "step": 64595
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6327908039093018,
      "learning_rate": 0.0004909649676707121,
      "loss": 3.0043,
      "step": 64596
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9504767656326294,
      "learning_rate": 0.000490961812866097,
      "loss": 2.8225,
      "step": 64597
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8962693214416504,
      "learning_rate": 0.0004909586580259784,
      "loss": 3.0861,
      "step": 64598
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7704073190689087,
      "learning_rate": 0.000490955503150357,
      "loss": 3.1466,
      "step": 64599
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3296407461166382,
      "learning_rate": 0.0004909523482392334,
      "loss": 3.0029,
      "step": 64600
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3776592016220093,
      "learning_rate": 0.0004909491932926081,
      "loss": 3.0683,
      "step": 64601
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9030920267105103,
      "learning_rate": 0.0004909460383104818,
      "loss": 3.3042,
      "step": 64602
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9237934350967407,
      "learning_rate": 0.0004909428832928549,
      "loss": 3.0852,
      "step": 64603
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6503311395645142,
      "learning_rate": 0.0004909397282397282,
      "loss": 2.9099,
      "step": 64604
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6079981327056885,
      "learning_rate": 0.0004909365731511023,
      "loss": 2.9204,
      "step": 64605
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2933149337768555,
      "learning_rate": 0.0004909334180269777,
      "loss": 3.1919,
      "step": 64606
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5081357955932617,
      "learning_rate": 0.0004909302628673547,
      "loss": 3.1327,
      "step": 64607
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.749620795249939,
      "learning_rate": 0.0004909271076722344,
      "loss": 3.0211,
      "step": 64608
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5986125469207764,
      "learning_rate": 0.000490923952441617,
      "loss": 2.9882,
      "step": 64609
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6274259090423584,
      "learning_rate": 0.0004909207971755033,
      "loss": 3.1279,
      "step": 64610
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6537572145462036,
      "learning_rate": 0.0004909176418738939,
      "loss": 3.1419,
      "step": 64611
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8948034048080444,
      "learning_rate": 0.0004909144865367892,
      "loss": 2.9625,
      "step": 64612
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7084280252456665,
      "learning_rate": 0.0004909113311641899,
      "loss": 3.0378,
      "step": 64613
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3917081356048584,
      "learning_rate": 0.0004909081757560966,
      "loss": 2.8368,
      "step": 64614
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8717389106750488,
      "learning_rate": 0.00049090502031251,
      "loss": 2.9588,
      "step": 64615
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5363847017288208,
      "learning_rate": 0.0004909018648334303,
      "loss": 3.0768,
      "step": 64616
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4740248918533325,
      "learning_rate": 0.0004908987093188586,
      "loss": 2.8143,
      "step": 64617
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4051238298416138,
      "learning_rate": 0.000490895553768795,
      "loss": 2.9467,
      "step": 64618
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5691041946411133,
      "learning_rate": 0.0004908923981832404,
      "loss": 3.1614,
      "step": 64619
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.52097749710083,
      "learning_rate": 0.0004908892425621953,
      "loss": 2.9519,
      "step": 64620
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.456228494644165,
      "learning_rate": 0.0004908860869056604,
      "loss": 3.1601,
      "step": 64621
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.192373514175415,
      "learning_rate": 0.000490882931213636,
      "loss": 2.8841,
      "step": 64622
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5460186004638672,
      "learning_rate": 0.0004908797754861229,
      "loss": 3.0245,
      "step": 64623
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.238346576690674,
      "learning_rate": 0.0004908766197231217,
      "loss": 2.9644,
      "step": 64624
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.8236896991729736,
      "learning_rate": 0.0004908734639246329,
      "loss": 3.0906,
      "step": 64625
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.627995491027832,
      "learning_rate": 0.0004908703080906572,
      "loss": 3.1853,
      "step": 64626
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.2192583084106445,
      "learning_rate": 0.0004908671522211949,
      "loss": 3.1596,
      "step": 64627
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.816937208175659,
      "learning_rate": 0.000490863996316247,
      "loss": 3.0426,
      "step": 64628
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9272613525390625,
      "learning_rate": 0.0004908608403758138,
      "loss": 3.2092,
      "step": 64629
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.566096305847168,
      "learning_rate": 0.000490857684399896,
      "loss": 2.8745,
      "step": 64630
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.658487558364868,
      "learning_rate": 0.0004908545283884941,
      "loss": 3.2514,
      "step": 64631
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.213968515396118,
      "learning_rate": 0.0004908513723416088,
      "loss": 3.2096,
      "step": 64632
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9698584079742432,
      "learning_rate": 0.0004908482162592406,
      "loss": 3.0937,
      "step": 64633
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5831142663955688,
      "learning_rate": 0.0004908450601413901,
      "loss": 3.0428,
      "step": 64634
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.615168571472168,
      "learning_rate": 0.0004908419039880579,
      "loss": 3.1622,
      "step": 64635
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7325026988983154,
      "learning_rate": 0.0004908387477992446,
      "loss": 3.1508,
      "step": 64636
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6923277378082275,
      "learning_rate": 0.0004908355915749508,
      "loss": 3.3562,
      "step": 64637
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3398027420043945,
      "learning_rate": 0.000490832435315177,
      "loss": 2.8333,
      "step": 64638
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.6791884899139404,
      "learning_rate": 0.0004908292790199238,
      "loss": 3.2653,
      "step": 64639
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.057297706604004,
      "learning_rate": 0.000490826122689192,
      "loss": 2.8952,
      "step": 64640
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6831554174423218,
      "learning_rate": 0.0004908229663229819,
      "loss": 3.0778,
      "step": 64641
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.893058180809021,
      "learning_rate": 0.0004908198099212943,
      "loss": 3.0093,
      "step": 64642
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.2808845043182373,
      "learning_rate": 0.0004908166534841296,
      "loss": 2.9432,
      "step": 64643
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6569337844848633,
      "learning_rate": 0.0004908134970114886,
      "loss": 3.174,
      "step": 64644
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4772993326187134,
      "learning_rate": 0.0004908103405033716,
      "loss": 3.0678,
      "step": 64645
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.4454638957977295,
      "learning_rate": 0.0004908071839597795,
      "loss": 2.9245,
      "step": 64646
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.856369972229004,
      "learning_rate": 0.0004908040273807127,
      "loss": 3.1252,
      "step": 64647
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5824713706970215,
      "learning_rate": 0.0004908008707661718,
      "loss": 3.2365,
      "step": 64648
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.683096170425415,
      "learning_rate": 0.0004907977141161574,
      "loss": 2.9726,
      "step": 64649
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0100467205047607,
      "learning_rate": 0.0004907945574306702,
      "loss": 2.9386,
      "step": 64650
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9813984632492065,
      "learning_rate": 0.0004907914007097105,
      "loss": 3.122,
      "step": 64651
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6885826587677002,
      "learning_rate": 0.0004907882439532794,
      "loss": 2.7642,
      "step": 64652
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4776238203048706,
      "learning_rate": 0.0004907850871613768,
      "loss": 3.0559,
      "step": 64653
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4518282413482666,
      "learning_rate": 0.0004907819303340039,
      "loss": 3.1402,
      "step": 64654
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8579230308532715,
      "learning_rate": 0.000490778773471161,
      "loss": 2.8593,
      "step": 64655
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6004317998886108,
      "learning_rate": 0.0004907756165728487,
      "loss": 3.3477,
      "step": 64656
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9189642667770386,
      "learning_rate": 0.0004907724596390676,
      "loss": 3.2081,
      "step": 64657
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8203846216201782,
      "learning_rate": 0.0004907693026698183,
      "loss": 3.0444,
      "step": 64658
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.964110255241394,
      "learning_rate": 0.0004907661456651013,
      "loss": 3.0932,
      "step": 64659
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6209137439727783,
      "learning_rate": 0.0004907629886249174,
      "loss": 3.0088,
      "step": 64660
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7247921228408813,
      "learning_rate": 0.0004907598315492671,
      "loss": 2.8616,
      "step": 64661
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9262571334838867,
      "learning_rate": 0.0004907566744381508,
      "loss": 3.2525,
      "step": 64662
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.570591688156128,
      "learning_rate": 0.0004907535172915693,
      "loss": 3.2768,
      "step": 64663
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6443721055984497,
      "learning_rate": 0.000490750360109523,
      "loss": 2.883,
      "step": 64664
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3545674085617065,
      "learning_rate": 0.0004907472028920128,
      "loss": 3.2,
      "step": 64665
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.50259268283844,
      "learning_rate": 0.000490744045639039,
      "loss": 3.0615,
      "step": 64666
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3769747018814087,
      "learning_rate": 0.0004907408883506023,
      "loss": 2.9255,
      "step": 64667
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.745530366897583,
      "learning_rate": 0.0004907377310267032,
      "loss": 2.944,
      "step": 64668
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5507558584213257,
      "learning_rate": 0.0004907345736673425,
      "loss": 3.0174,
      "step": 64669
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4609861373901367,
      "learning_rate": 0.0004907314162725206,
      "loss": 2.9247,
      "step": 64670
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8731757402420044,
      "learning_rate": 0.0004907282588422381,
      "loss": 3.0835,
      "step": 64671
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6124054193496704,
      "learning_rate": 0.0004907251013764955,
      "loss": 3.0462,
      "step": 64672
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.970617651939392,
      "learning_rate": 0.0004907219438752936,
      "loss": 2.9021,
      "step": 64673
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8012430667877197,
      "learning_rate": 0.000490718786338633,
      "loss": 2.7177,
      "step": 64674
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3543260097503662,
      "learning_rate": 0.000490715628766514,
      "loss": 3.0846,
      "step": 64675
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6559505462646484,
      "learning_rate": 0.0004907124711589375,
      "loss": 3.0253,
      "step": 64676
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.676318645477295,
      "learning_rate": 0.000490709313515904,
      "loss": 3.2197,
      "step": 64677
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9179564714431763,
      "learning_rate": 0.0004907061558374139,
      "loss": 3.1979,
      "step": 64678
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.631552815437317,
      "learning_rate": 0.0004907029981234679,
      "loss": 2.9254,
      "step": 64679
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8304495811462402,
      "learning_rate": 0.0004906998403740667,
      "loss": 2.769,
      "step": 64680
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6299928426742554,
      "learning_rate": 0.0004906966825892107,
      "loss": 3.1148,
      "step": 64681
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.452890157699585,
      "learning_rate": 0.0004906935247689007,
      "loss": 3.0095,
      "step": 64682
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5729293823242188,
      "learning_rate": 0.0004906903669131371,
      "loss": 3.1045,
      "step": 64683
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.2983551025390625,
      "learning_rate": 0.0004906872090219206,
      "loss": 2.9726,
      "step": 64684
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.778470516204834,
      "learning_rate": 0.0004906840510952517,
      "loss": 3.0924,
      "step": 64685
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4725546836853027,
      "learning_rate": 0.0004906808931331312,
      "loss": 3.0262,
      "step": 64686
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7068393230438232,
      "learning_rate": 0.0004906777351355593,
      "loss": 2.9539,
      "step": 64687
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9391204118728638,
      "learning_rate": 0.0004906745771025371,
      "loss": 3.3279,
      "step": 64688
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9477286338806152,
      "learning_rate": 0.0004906714190340646,
      "loss": 2.8684,
      "step": 64689
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4794580936431885,
      "learning_rate": 0.0004906682609301428,
      "loss": 2.9897,
      "step": 64690
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8000578880310059,
      "learning_rate": 0.0004906651027907723,
      "loss": 3.2259,
      "step": 64691
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6482532024383545,
      "learning_rate": 0.0004906619446159533,
      "loss": 2.9467,
      "step": 64692
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7528973817825317,
      "learning_rate": 0.0004906587864056867,
      "loss": 2.8666,
      "step": 64693
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.389615774154663,
      "learning_rate": 0.0004906556281599731,
      "loss": 2.9149,
      "step": 64694
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.944542646408081,
      "learning_rate": 0.0004906524698788131,
      "loss": 3.3139,
      "step": 64695
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9166978597640991,
      "learning_rate": 0.0004906493115622071,
      "loss": 2.9908,
      "step": 64696
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2504161596298218,
      "learning_rate": 0.0004906461532101558,
      "loss": 2.786,
      "step": 64697
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6421629190444946,
      "learning_rate": 0.0004906429948226599,
      "loss": 3.2372,
      "step": 64698
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.836141586303711,
      "learning_rate": 0.0004906398363997197,
      "loss": 2.9058,
      "step": 64699
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0116231441497803,
      "learning_rate": 0.000490636677941336,
      "loss": 2.6596,
      "step": 64700
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4729650020599365,
      "learning_rate": 0.0004906335194475094,
      "loss": 2.8651,
      "step": 64701
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0321576595306396,
      "learning_rate": 0.0004906303609182404,
      "loss": 3.1375,
      "step": 64702
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.0780253410339355,
      "learning_rate": 0.0004906272023535295,
      "loss": 2.9876,
      "step": 64703
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5633224248886108,
      "learning_rate": 0.0004906240437533776,
      "loss": 3.0903,
      "step": 64704
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5420551300048828,
      "learning_rate": 0.000490620885117785,
      "loss": 3.2843,
      "step": 64705
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9971470832824707,
      "learning_rate": 0.0004906177264467525,
      "loss": 2.8792,
      "step": 64706
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5655003786087036,
      "learning_rate": 0.0004906145677402805,
      "loss": 2.9986,
      "step": 64707
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.558544397354126,
      "learning_rate": 0.0004906114089983695,
      "loss": 3.1466,
      "step": 64708
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4197081327438354,
      "learning_rate": 0.0004906082502210204,
      "loss": 3.1766,
      "step": 64709
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.815966844558716,
      "learning_rate": 0.0004906050914082336,
      "loss": 3.1581,
      "step": 64710
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.074251890182495,
      "learning_rate": 0.0004906019325600096,
      "loss": 2.9587,
      "step": 64711
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8148280382156372,
      "learning_rate": 0.0004905987736763493,
      "loss": 3.1224,
      "step": 64712
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.539750337600708,
      "learning_rate": 0.0004905956147572529,
      "loss": 3.0959,
      "step": 64713
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4461570978164673,
      "learning_rate": 0.0004905924558027213,
      "loss": 3.1246,
      "step": 64714
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5351629257202148,
      "learning_rate": 0.000490589296812755,
      "loss": 3.1532,
      "step": 64715
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7244457006454468,
      "learning_rate": 0.0004905861377873545,
      "loss": 2.9342,
      "step": 64716
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.765905737876892,
      "learning_rate": 0.0004905829787265204,
      "loss": 2.933,
      "step": 64717
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.168971300125122,
      "learning_rate": 0.0004905798196302533,
      "loss": 2.8904,
      "step": 64718
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6584051847457886,
      "learning_rate": 0.000490576660498554,
      "loss": 3.2248,
      "step": 64719
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7433192729949951,
      "learning_rate": 0.0004905735013314227,
      "loss": 2.8534,
      "step": 64720
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.475803256034851,
      "learning_rate": 0.0004905703421288603,
      "loss": 2.9847,
      "step": 64721
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8396055698394775,
      "learning_rate": 0.0004905671828908672,
      "loss": 3.147,
      "step": 64722
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6335725784301758,
      "learning_rate": 0.000490564023617444,
      "loss": 2.9682,
      "step": 64723
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.520692229270935,
      "learning_rate": 0.0004905608643085915,
      "loss": 2.9557,
      "step": 64724
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5679078102111816,
      "learning_rate": 0.0004905577049643101,
      "loss": 2.9,
      "step": 64725
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.761252760887146,
      "learning_rate": 0.0004905545455846004,
      "loss": 3.0021,
      "step": 64726
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7678786516189575,
      "learning_rate": 0.000490551386169463,
      "loss": 2.9283,
      "step": 64727
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5779515504837036,
      "learning_rate": 0.0004905482267188985,
      "loss": 3.1643,
      "step": 64728
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7602925300598145,
      "learning_rate": 0.0004905450672329076,
      "loss": 3.024,
      "step": 64729
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9439725875854492,
      "learning_rate": 0.0004905419077114907,
      "loss": 3.1849,
      "step": 64730
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.5203139781951904,
      "learning_rate": 0.0004905387481546484,
      "loss": 3.0567,
      "step": 64731
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5590953826904297,
      "learning_rate": 0.0004905355885623814,
      "loss": 3.0247,
      "step": 64732
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9204705953598022,
      "learning_rate": 0.0004905324289346903,
      "loss": 3.0047,
      "step": 64733
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7365080118179321,
      "learning_rate": 0.0004905292692715755,
      "loss": 3.0814,
      "step": 64734
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.65396249294281,
      "learning_rate": 0.0004905261095730377,
      "loss": 2.9396,
      "step": 64735
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1764886379241943,
      "learning_rate": 0.0004905229498390777,
      "loss": 3.0714,
      "step": 64736
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1082773208618164,
      "learning_rate": 0.0004905197900696958,
      "loss": 2.9552,
      "step": 64737
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.507861375808716,
      "learning_rate": 0.0004905166302648926,
      "loss": 2.9159,
      "step": 64738
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5955206155776978,
      "learning_rate": 0.0004905134704246688,
      "loss": 3.0145,
      "step": 64739
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.746067762374878,
      "learning_rate": 0.000490510310549025,
      "loss": 2.9874,
      "step": 64740
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.2274909019470215,
      "learning_rate": 0.0004905071506379617,
      "loss": 3.0793,
      "step": 64741
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.022757053375244,
      "learning_rate": 0.0004905039906914795,
      "loss": 3.1401,
      "step": 64742
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0528030395507812,
      "learning_rate": 0.000490500830709579,
      "loss": 3.0405,
      "step": 64743
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.552181601524353,
      "learning_rate": 0.0004904976706922608,
      "loss": 3.1971,
      "step": 64744
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1928279399871826,
      "learning_rate": 0.0004904945106395254,
      "loss": 2.981,
      "step": 64745
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4984798431396484,
      "learning_rate": 0.0004904913505513736,
      "loss": 3.1078,
      "step": 64746
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.726156234741211,
      "learning_rate": 0.0004904881904278059,
      "loss": 3.109,
      "step": 64747
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5805327892303467,
      "learning_rate": 0.0004904850302688227,
      "loss": 3.0789,
      "step": 64748
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3412686586380005,
      "learning_rate": 0.0004904818700744249,
      "loss": 3.1018,
      "step": 64749
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9920734167099,
      "learning_rate": 0.0004904787098446128,
      "loss": 3.2341,
      "step": 64750
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7408380508422852,
      "learning_rate": 0.0004904755495793872,
      "loss": 3.527,
      "step": 64751
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0779335498809814,
      "learning_rate": 0.0004904723892787487,
      "loss": 3.1556,
      "step": 64752
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9175243377685547,
      "learning_rate": 0.0004904692289426976,
      "loss": 3.0787,
      "step": 64753
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7036666870117188,
      "learning_rate": 0.0004904660685712347,
      "loss": 3.1461,
      "step": 64754
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4500054121017456,
      "learning_rate": 0.0004904629081643607,
      "loss": 3.0817,
      "step": 64755
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.541793942451477,
      "learning_rate": 0.0004904597477220758,
      "loss": 3.0252,
      "step": 64756
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.3247729539871216,
      "learning_rate": 0.000490456587244381,
      "loss": 2.8266,
      "step": 64757
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.558428406715393,
      "learning_rate": 0.0004904534267312767,
      "loss": 3.1944,
      "step": 64758
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.4963486194610596,
      "learning_rate": 0.0004904502661827636,
      "loss": 3.1847,
      "step": 64759
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.979098081588745,
      "learning_rate": 0.000490447105598842,
      "loss": 3.0563,
      "step": 64760
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.977047085762024,
      "learning_rate": 0.0004904439449795128,
      "loss": 2.5983,
      "step": 64761
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8601336479187012,
      "learning_rate": 0.0004904407843247766,
      "loss": 3.1181,
      "step": 64762
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.2436726093292236,
      "learning_rate": 0.0004904376236346337,
      "loss": 3.3017,
      "step": 64763
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4976043701171875,
      "learning_rate": 0.000490434462909085,
      "loss": 3.1612,
      "step": 64764
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9937349557876587,
      "learning_rate": 0.0004904313021481308,
      "loss": 3.1056,
      "step": 64765
    },
    {
      "epoch": 0.84,
      "grad_norm": 3.712049961090088,
      "learning_rate": 0.0004904281413517719,
      "loss": 2.9265,
      "step": 64766
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9384263753890991,
      "learning_rate": 0.0004904249805200088,
      "loss": 2.912,
      "step": 64767
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4378023147583008,
      "learning_rate": 0.000490421819652842,
      "loss": 3.0908,
      "step": 64768
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.2629506587982178,
      "learning_rate": 0.0004904186587502723,
      "loss": 2.868,
      "step": 64769
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.35357666015625,
      "learning_rate": 0.0004904154978123001,
      "loss": 2.7356,
      "step": 64770
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7635815143585205,
      "learning_rate": 0.0004904123368389262,
      "loss": 3.0031,
      "step": 64771
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7332667112350464,
      "learning_rate": 0.0004904091758301509,
      "loss": 2.8648,
      "step": 64772
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3810980319976807,
      "learning_rate": 0.0004904060147859749,
      "loss": 3.0608,
      "step": 64773
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5069469213485718,
      "learning_rate": 0.000490402853706399,
      "loss": 2.9413,
      "step": 64774
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5762674808502197,
      "learning_rate": 0.0004903996925914235,
      "loss": 2.9488,
      "step": 64775
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4579969644546509,
      "learning_rate": 0.0004903965314410492,
      "loss": 2.9705,
      "step": 64776
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6998339891433716,
      "learning_rate": 0.0004903933702552765,
      "loss": 3.0462,
      "step": 64777
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6438987255096436,
      "learning_rate": 0.0004903902090341062,
      "loss": 2.8568,
      "step": 64778
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.655194640159607,
      "learning_rate": 0.0004903870477775387,
      "loss": 3.0618,
      "step": 64779
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.489284873008728,
      "learning_rate": 0.0004903838864855746,
      "loss": 3.1347,
      "step": 64780
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3459255695343018,
      "learning_rate": 0.0004903807251582146,
      "loss": 2.9777,
      "step": 64781
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.94199538230896,
      "learning_rate": 0.0004903775637954592,
      "loss": 2.9345,
      "step": 64782
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9216654300689697,
      "learning_rate": 0.000490374402397309,
      "loss": 2.9876,
      "step": 64783
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6987947225570679,
      "learning_rate": 0.0004903712409637646,
      "loss": 3.1305,
      "step": 64784
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5724753141403198,
      "learning_rate": 0.0004903680794948267,
      "loss": 3.1508,
      "step": 64785
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5665532350540161,
      "learning_rate": 0.0004903649179904957,
      "loss": 3.0422,
      "step": 64786
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.548255205154419,
      "learning_rate": 0.0004903617564507722,
      "loss": 3.1785,
      "step": 64787
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.399690866470337,
      "learning_rate": 0.0004903585948756569,
      "loss": 2.833,
      "step": 64788
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.2330973148345947,
      "learning_rate": 0.0004903554332651505,
      "loss": 3.2867,
      "step": 64789
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5198309421539307,
      "learning_rate": 0.0004903522716192532,
      "loss": 3.1656,
      "step": 64790
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8849018812179565,
      "learning_rate": 0.0004903491099379659,
      "loss": 3.0155,
      "step": 64791
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4934556484222412,
      "learning_rate": 0.0004903459482212891,
      "loss": 2.9406,
      "step": 64792
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9530260562896729,
      "learning_rate": 0.0004903427864692236,
      "loss": 2.974,
      "step": 64793
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4749932289123535,
      "learning_rate": 0.0004903396246817695,
      "loss": 3.1123,
      "step": 64794
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5350492000579834,
      "learning_rate": 0.0004903364628589279,
      "loss": 2.8524,
      "step": 64795
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7612464427947998,
      "learning_rate": 0.0004903333010006989,
      "loss": 3.0866,
      "step": 64796
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0390264987945557,
      "learning_rate": 0.0004903301391070836,
      "loss": 3.0381,
      "step": 64797
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8953644037246704,
      "learning_rate": 0.0004903269771780822,
      "loss": 3.2081,
      "step": 64798
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6236575841903687,
      "learning_rate": 0.0004903238152136955,
      "loss": 3.1851,
      "step": 64799
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8515886068344116,
      "learning_rate": 0.0004903206532139239,
      "loss": 2.694,
      "step": 64800
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.029967784881592,
      "learning_rate": 0.0004903174911787681,
      "loss": 3.004,
      "step": 64801
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.727489709854126,
      "learning_rate": 0.0004903143291082288,
      "loss": 3.2242,
      "step": 64802
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4708828926086426,
      "learning_rate": 0.0004903111670023064,
      "loss": 3.1493,
      "step": 64803
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6689937114715576,
      "learning_rate": 0.0004903080048610015,
      "loss": 3.0623,
      "step": 64804
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4409294128417969,
      "learning_rate": 0.0004903048426843148,
      "loss": 2.9651,
      "step": 64805
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4772132635116577,
      "learning_rate": 0.0004903016804722469,
      "loss": 3.173,
      "step": 64806
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.2587642669677734,
      "learning_rate": 0.0004902985182247983,
      "loss": 3.0585,
      "step": 64807
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6668956279754639,
      "learning_rate": 0.0004902953559419696,
      "loss": 2.9853,
      "step": 64808
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8195008039474487,
      "learning_rate": 0.0004902921936237614,
      "loss": 3.1162,
      "step": 64809
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.700413703918457,
      "learning_rate": 0.0004902890312701743,
      "loss": 2.8644,
      "step": 64810
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5625154972076416,
      "learning_rate": 0.0004902858688812088,
      "loss": 3.0374,
      "step": 64811
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6385784149169922,
      "learning_rate": 0.0004902827064568656,
      "loss": 3.0986,
      "step": 64812
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4013445377349854,
      "learning_rate": 0.0004902795439971453,
      "loss": 3.1526,
      "step": 64813
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.744817018508911,
      "learning_rate": 0.0004902763815020485,
      "loss": 3.32,
      "step": 64814
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6293781995773315,
      "learning_rate": 0.0004902732189715756,
      "loss": 3.0502,
      "step": 64815
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.659070611000061,
      "learning_rate": 0.0004902700564057274,
      "loss": 3.0121,
      "step": 64816
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9451324939727783,
      "learning_rate": 0.0004902668938045044,
      "loss": 3.0477,
      "step": 64817
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.2789411544799805,
      "learning_rate": 0.000490263731167907,
      "loss": 2.9878,
      "step": 64818
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4968191385269165,
      "learning_rate": 0.0004902605684959363,
      "loss": 3.0739,
      "step": 64819
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9861564636230469,
      "learning_rate": 0.0004902574057885923,
      "loss": 2.9982,
      "step": 64820
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5161100625991821,
      "learning_rate": 0.0004902542430458761,
      "loss": 2.9275,
      "step": 64821
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.205457925796509,
      "learning_rate": 0.0004902510802677879,
      "loss": 3.0315,
      "step": 64822
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3230369091033936,
      "learning_rate": 0.0004902479174543285,
      "loss": 3.198,
      "step": 64823
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4160629510879517,
      "learning_rate": 0.0004902447546054983,
      "loss": 2.9905,
      "step": 64824
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3801941871643066,
      "learning_rate": 0.0004902415917212981,
      "loss": 3.0034,
      "step": 64825
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8280236721038818,
      "learning_rate": 0.0004902384288017283,
      "loss": 3.0117,
      "step": 64826
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6053305864334106,
      "learning_rate": 0.0004902352658467898,
      "loss": 2.9224,
      "step": 64827
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5695675611495972,
      "learning_rate": 0.0004902321028564828,
      "loss": 3.0372,
      "step": 64828
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6954818964004517,
      "learning_rate": 0.0004902289398308081,
      "loss": 3.2595,
      "step": 64829
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0639147758483887,
      "learning_rate": 0.0004902257767697662,
      "loss": 3.0165,
      "step": 64830
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6181790828704834,
      "learning_rate": 0.0004902226136733578,
      "loss": 3.41,
      "step": 64831
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7489302158355713,
      "learning_rate": 0.0004902194505415834,
      "loss": 3.1371,
      "step": 64832
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7954550981521606,
      "learning_rate": 0.0004902162873744436,
      "loss": 3.1232,
      "step": 64833
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.0073354244232178,
      "learning_rate": 0.000490213124171939,
      "loss": 2.9107,
      "step": 64834
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.659166693687439,
      "learning_rate": 0.0004902099609340703,
      "loss": 3.0166,
      "step": 64835
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8738499879837036,
      "learning_rate": 0.0004902067976608379,
      "loss": 3.0047,
      "step": 64836
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3649086952209473,
      "learning_rate": 0.0004902036343522424,
      "loss": 2.9218,
      "step": 64837
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1174347400665283,
      "learning_rate": 0.0004902004710082846,
      "loss": 2.8293,
      "step": 64838
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.623023748397827,
      "learning_rate": 0.0004901973076289648,
      "loss": 2.8797,
      "step": 64839
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.871764898300171,
      "learning_rate": 0.0004901941442142837,
      "loss": 3.1859,
      "step": 64840
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5295079946517944,
      "learning_rate": 0.0004901909807642421,
      "loss": 2.7009,
      "step": 64841
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3281826972961426,
      "learning_rate": 0.0004901878172788403,
      "loss": 2.9029,
      "step": 64842
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1729671955108643,
      "learning_rate": 0.000490184653758079,
      "loss": 2.9632,
      "step": 64843
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5086119174957275,
      "learning_rate": 0.0004901814902019588,
      "loss": 2.8888,
      "step": 64844
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.56522798538208,
      "learning_rate": 0.0004901783266104803,
      "loss": 3.0809,
      "step": 64845
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6473705768585205,
      "learning_rate": 0.000490175162983644,
      "loss": 2.8235,
      "step": 64846
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.09334135055542,
      "learning_rate": 0.0004901719993214505,
      "loss": 3.2127,
      "step": 64847
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.814194679260254,
      "learning_rate": 0.0004901688356239006,
      "loss": 3.1721,
      "step": 64848
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6692261695861816,
      "learning_rate": 0.0004901656718909945,
      "loss": 3.3374,
      "step": 64849
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.4420855045318604,
      "learning_rate": 0.0004901625081227333,
      "loss": 3.1395,
      "step": 64850
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5132488012313843,
      "learning_rate": 0.000490159344319117,
      "loss": 3.1049,
      "step": 64851
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.1273906230926514,
      "learning_rate": 0.0004901561804801466,
      "loss": 3.2241,
      "step": 64852
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6380101442337036,
      "learning_rate": 0.0004901530166058227,
      "loss": 3.0636,
      "step": 64853
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6483443975448608,
      "learning_rate": 0.0004901498526961457,
      "loss": 3.0632,
      "step": 64854
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5655180215835571,
      "learning_rate": 0.0004901466887511162,
      "loss": 2.975,
      "step": 64855
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9829213619232178,
      "learning_rate": 0.0004901435247707349,
      "loss": 3.0946,
      "step": 64856
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6066203117370605,
      "learning_rate": 0.0004901403607550022,
      "loss": 2.9011,
      "step": 64857
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8625645637512207,
      "learning_rate": 0.000490137196703919,
      "loss": 3.1116,
      "step": 64858
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7840967178344727,
      "learning_rate": 0.0004901340326174856,
      "loss": 3.1481,
      "step": 64859
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.303047776222229,
      "learning_rate": 0.0004901308684957027,
      "loss": 2.8615,
      "step": 64860
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5947908163070679,
      "learning_rate": 0.0004901277043385708,
      "loss": 2.8543,
      "step": 64861
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.889692544937134,
      "learning_rate": 0.0004901245401460907,
      "loss": 3.0752,
      "step": 64862
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9525407552719116,
      "learning_rate": 0.0004901213759182628,
      "loss": 2.9906,
      "step": 64863
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.458359479904175,
      "learning_rate": 0.0004901182116550878,
      "loss": 3.0883,
      "step": 64864
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3578858375549316,
      "learning_rate": 0.0004901150473565662,
      "loss": 2.8774,
      "step": 64865
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.778923511505127,
      "learning_rate": 0.0004901118830226986,
      "loss": 3.2992,
      "step": 64866
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6296772956848145,
      "learning_rate": 0.0004901087186534856,
      "loss": 3.2153,
      "step": 64867
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6615142822265625,
      "learning_rate": 0.0004901055542489279,
      "loss": 3.0426,
      "step": 64868
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.279506206512451,
      "learning_rate": 0.000490102389809026,
      "loss": 3.0632,
      "step": 64869
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.451308012008667,
      "learning_rate": 0.0004900992253337803,
      "loss": 2.9889,
      "step": 64870
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.894426703453064,
      "learning_rate": 0.0004900960608231917,
      "loss": 2.8961,
      "step": 64871
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.471516728401184,
      "learning_rate": 0.0004900928962772607,
      "loss": 2.885,
      "step": 64872
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8746507167816162,
      "learning_rate": 0.0004900897316959876,
      "loss": 3.1294,
      "step": 64873
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.5872377157211304,
      "learning_rate": 0.0004900865670793735,
      "loss": 3.0106,
      "step": 64874
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7692537307739258,
      "learning_rate": 0.0004900834024274186,
      "loss": 3.0691,
      "step": 64875
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8297367095947266,
      "learning_rate": 0.0004900802377401236,
      "loss": 3.0579,
      "step": 64876
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.683898687362671,
      "learning_rate": 0.0004900770730174892,
      "loss": 2.9573,
      "step": 64877
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.598590612411499,
      "learning_rate": 0.0004900739082595157,
      "loss": 2.9611,
      "step": 64878
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.6798615455627441,
      "learning_rate": 0.0004900707434662041,
      "loss": 3.105,
      "step": 64879
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9501110315322876,
      "learning_rate": 0.0004900675786375546,
      "loss": 2.9964,
      "step": 64880
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.032132625579834,
      "learning_rate": 0.0004900644137735679,
      "loss": 3.2803,
      "step": 64881
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4045307636260986,
      "learning_rate": 0.0004900612488742447,
      "loss": 3.457,
      "step": 64882
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.961277961730957,
      "learning_rate": 0.0004900580839395857,
      "loss": 2.9878,
      "step": 64883
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.7978171110153198,
      "learning_rate": 0.0004900549189695911,
      "loss": 3.1018,
      "step": 64884
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.4434205293655396,
      "learning_rate": 0.0004900517539642618,
      "loss": 3.1093,
      "step": 64885
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.121534824371338,
      "learning_rate": 0.0004900485889235982,
      "loss": 3.1953,
      "step": 64886
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.159137725830078,
      "learning_rate": 0.0004900454238476012,
      "loss": 2.8943,
      "step": 64887
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.335908055305481,
      "learning_rate": 0.0004900422587362709,
      "loss": 3.234,
      "step": 64888
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.8385387659072876,
      "learning_rate": 0.0004900390935896082,
      "loss": 2.7707,
      "step": 64889
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.3984901905059814,
      "learning_rate": 0.0004900359284076138,
      "loss": 3.1448,
      "step": 64890
    },
    {
      "epoch": 0.84,
      "grad_norm": 5.679217338562012,
      "learning_rate": 0.000490032763190288,
      "loss": 3.1443,
      "step": 64891
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.7069544792175293,
      "learning_rate": 0.0004900295979376316,
      "loss": 3.1902,
      "step": 64892
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.587908387184143,
      "learning_rate": 0.0004900264326496451,
      "loss": 3.0488,
      "step": 64893
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.100107192993164,
      "learning_rate": 0.0004900232673263291,
      "loss": 3.0089,
      "step": 64894
    },
    {
      "epoch": 0.84,
      "grad_norm": 2.581592559814453,
      "learning_rate": 0.0004900201019676842,
      "loss": 3.1231,
      "step": 64895
    },
    {
      "epoch": 0.84,
      "grad_norm": 1.9508508443832397,
      "learning_rate": 0.0004900169365737109,
      "loss": 3.1184,
      "step": 64896
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4426889419555664,
      "learning_rate": 0.0004900137711444098,
      "loss": 3.0639,
      "step": 64897
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.196908950805664,
      "learning_rate": 0.0004900106056797816,
      "loss": 2.9556,
      "step": 64898
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5564762353897095,
      "learning_rate": 0.000490007440179827,
      "loss": 3.1736,
      "step": 64899
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.717288613319397,
      "learning_rate": 0.0004900042746445463,
      "loss": 3.0499,
      "step": 64900
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5766818523406982,
      "learning_rate": 0.0004900011090739402,
      "loss": 3.0048,
      "step": 64901
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8029472827911377,
      "learning_rate": 0.0004899979434680095,
      "loss": 3.2566,
      "step": 64902
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.277888298034668,
      "learning_rate": 0.0004899947778267544,
      "loss": 2.8539,
      "step": 64903
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0848636627197266,
      "learning_rate": 0.0004899916121501758,
      "loss": 2.7274,
      "step": 64904
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.570624589920044,
      "learning_rate": 0.0004899884464382741,
      "loss": 3.0732,
      "step": 64905
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.4996204376220703,
      "learning_rate": 0.0004899852806910499,
      "loss": 3.0542,
      "step": 64906
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.823447823524475,
      "learning_rate": 0.000489982114908504,
      "loss": 3.1434,
      "step": 64907
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0474441051483154,
      "learning_rate": 0.0004899789490906367,
      "loss": 2.963,
      "step": 64908
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8947540521621704,
      "learning_rate": 0.0004899757832374488,
      "loss": 2.9676,
      "step": 64909
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.7130942344665527,
      "learning_rate": 0.0004899726173489408,
      "loss": 3.0639,
      "step": 64910
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.2968556880950928,
      "learning_rate": 0.0004899694514251133,
      "loss": 2.9842,
      "step": 64911
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4237051010131836,
      "learning_rate": 0.0004899662854659668,
      "loss": 3.0745,
      "step": 64912
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5483238697052002,
      "learning_rate": 0.0004899631194715022,
      "loss": 2.9173,
      "step": 64913
    },
    {
      "epoch": 0.85,
      "grad_norm": 4.04383659362793,
      "learning_rate": 0.0004899599534417198,
      "loss": 2.963,
      "step": 64914
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.002199411392212,
      "learning_rate": 0.0004899567873766201,
      "loss": 2.9717,
      "step": 64915
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0047190189361572,
      "learning_rate": 0.000489953621276204,
      "loss": 3.0765,
      "step": 64916
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.0256052017211914,
      "learning_rate": 0.0004899504551404718,
      "loss": 2.9117,
      "step": 64917
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3928277492523193,
      "learning_rate": 0.0004899472889694244,
      "loss": 3.2138,
      "step": 64918
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4675257205963135,
      "learning_rate": 0.0004899441227630621,
      "loss": 3.2158,
      "step": 64919
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0492098331451416,
      "learning_rate": 0.0004899409565213858,
      "loss": 3.1339,
      "step": 64920
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.353320360183716,
      "learning_rate": 0.0004899377902443956,
      "loss": 2.9965,
      "step": 64921
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7001436948776245,
      "learning_rate": 0.0004899346239320925,
      "loss": 2.8787,
      "step": 64922
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6940385103225708,
      "learning_rate": 0.000489931457584477,
      "loss": 2.8513,
      "step": 64923
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9822415113449097,
      "learning_rate": 0.0004899282912015496,
      "loss": 3.0347,
      "step": 64924
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7396631240844727,
      "learning_rate": 0.000489925124783311,
      "loss": 3.2854,
      "step": 64925
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6675209999084473,
      "learning_rate": 0.0004899219583297617,
      "loss": 3.1501,
      "step": 64926
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6358729600906372,
      "learning_rate": 0.0004899187918409024,
      "loss": 2.8892,
      "step": 64927
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.444205641746521,
      "learning_rate": 0.0004899156253167336,
      "loss": 3.0026,
      "step": 64928
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4617165327072144,
      "learning_rate": 0.0004899124587572557,
      "loss": 2.9676,
      "step": 64929
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5789451599121094,
      "learning_rate": 0.0004899092921624697,
      "loss": 2.9378,
      "step": 64930
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5931406021118164,
      "learning_rate": 0.0004899061255323759,
      "loss": 3.1706,
      "step": 64931
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5123428106307983,
      "learning_rate": 0.000489902958866975,
      "loss": 3.0686,
      "step": 64932
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3802249431610107,
      "learning_rate": 0.0004898997921662675,
      "loss": 3.1275,
      "step": 64933
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0534725189208984,
      "learning_rate": 0.000489896625430254,
      "loss": 3.1162,
      "step": 64934
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4500517845153809,
      "learning_rate": 0.0004898934586589353,
      "loss": 3.009,
      "step": 64935
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1550133228302,
      "learning_rate": 0.0004898902918523117,
      "loss": 2.9119,
      "step": 64936
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5488884449005127,
      "learning_rate": 0.0004898871250103839,
      "loss": 2.9511,
      "step": 64937
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7542545795440674,
      "learning_rate": 0.0004898839581331527,
      "loss": 3.2099,
      "step": 64938
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1248416900634766,
      "learning_rate": 0.0004898807912206183,
      "loss": 3.0835,
      "step": 64939
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4526491165161133,
      "learning_rate": 0.0004898776242727814,
      "loss": 3.1685,
      "step": 64940
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.600540280342102,
      "learning_rate": 0.0004898744572896428,
      "loss": 2.9646,
      "step": 64941
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6146278381347656,
      "learning_rate": 0.000489871290271203,
      "loss": 3.1391,
      "step": 64942
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5820435285568237,
      "learning_rate": 0.0004898681232174623,
      "loss": 3.042,
      "step": 64943
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.719422459602356,
      "learning_rate": 0.0004898649561284218,
      "loss": 3.0714,
      "step": 64944
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7684460878372192,
      "learning_rate": 0.0004898617890040818,
      "loss": 2.9595,
      "step": 64945
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7295098304748535,
      "learning_rate": 0.0004898586218444427,
      "loss": 3.0239,
      "step": 64946
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7600303888320923,
      "learning_rate": 0.0004898554546495055,
      "loss": 3.1228,
      "step": 64947
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.650840401649475,
      "learning_rate": 0.0004898522874192706,
      "loss": 3.2411,
      "step": 64948
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7487967014312744,
      "learning_rate": 0.0004898491201537384,
      "loss": 3.2132,
      "step": 64949
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4935009479522705,
      "learning_rate": 0.0004898459528529098,
      "loss": 3.3123,
      "step": 64950
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4713635444641113,
      "learning_rate": 0.0004898427855167852,
      "loss": 3.0358,
      "step": 64951
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9811455011367798,
      "learning_rate": 0.0004898396181453652,
      "loss": 2.7669,
      "step": 64952
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.4925410747528076,
      "learning_rate": 0.0004898364507386505,
      "loss": 3.0105,
      "step": 64953
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7258179187774658,
      "learning_rate": 0.0004898332832966416,
      "loss": 2.9327,
      "step": 64954
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.564595103263855,
      "learning_rate": 0.0004898301158193391,
      "loss": 3.0993,
      "step": 64955
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.884228229522705,
      "learning_rate": 0.0004898269483067436,
      "loss": 3.0537,
      "step": 64956
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.596707820892334,
      "learning_rate": 0.0004898237807588557,
      "loss": 2.9874,
      "step": 64957
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6801173686981201,
      "learning_rate": 0.000489820613175676,
      "loss": 2.9838,
      "step": 64958
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.50221848487854,
      "learning_rate": 0.0004898174455572051,
      "loss": 2.8533,
      "step": 64959
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.464071273803711,
      "learning_rate": 0.0004898142779034434,
      "loss": 3.1318,
      "step": 64960
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4031130075454712,
      "learning_rate": 0.0004898111102143918,
      "loss": 3.0395,
      "step": 64961
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5168582201004028,
      "learning_rate": 0.0004898079424900507,
      "loss": 3.0035,
      "step": 64962
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.6975157260894775,
      "learning_rate": 0.0004898047747304206,
      "loss": 3.0286,
      "step": 64963
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.82272207736969,
      "learning_rate": 0.0004898016069355024,
      "loss": 3.0564,
      "step": 64964
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5203181505203247,
      "learning_rate": 0.0004897984391052963,
      "loss": 2.9191,
      "step": 64965
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7220596075057983,
      "learning_rate": 0.0004897952712398032,
      "loss": 3.2671,
      "step": 64966
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.378499746322632,
      "learning_rate": 0.0004897921033390235,
      "loss": 3.2185,
      "step": 64967
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.380491018295288,
      "learning_rate": 0.000489788935402958,
      "loss": 3.009,
      "step": 64968
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6546651124954224,
      "learning_rate": 0.000489785767431607,
      "loss": 3.1698,
      "step": 64969
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.0780584812164307,
      "learning_rate": 0.0004897825994249714,
      "loss": 3.0229,
      "step": 64970
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.402904748916626,
      "learning_rate": 0.0004897794313830515,
      "loss": 3.1266,
      "step": 64971
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0092246532440186,
      "learning_rate": 0.0004897762633058482,
      "loss": 3.0199,
      "step": 64972
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.4406368732452393,
      "learning_rate": 0.0004897730951933618,
      "loss": 2.9666,
      "step": 64973
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1945128440856934,
      "learning_rate": 0.0004897699270455929,
      "loss": 3.288,
      "step": 64974
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.5009114742279053,
      "learning_rate": 0.0004897667588625422,
      "loss": 3.0039,
      "step": 64975
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.504273772239685,
      "learning_rate": 0.0004897635906442105,
      "loss": 2.9185,
      "step": 64976
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3342320919036865,
      "learning_rate": 0.000489760422390598,
      "loss": 2.8347,
      "step": 64977
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0522053241729736,
      "learning_rate": 0.0004897572541017054,
      "loss": 3.1479,
      "step": 64978
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.2252299785614014,
      "learning_rate": 0.0004897540857775335,
      "loss": 3.0108,
      "step": 64979
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.488494873046875,
      "learning_rate": 0.0004897509174180826,
      "loss": 2.8153,
      "step": 64980
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0331671237945557,
      "learning_rate": 0.0004897477490233534,
      "loss": 2.7611,
      "step": 64981
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3905012607574463,
      "learning_rate": 0.0004897445805933467,
      "loss": 2.9406,
      "step": 64982
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.792656421661377,
      "learning_rate": 0.0004897414121280627,
      "loss": 2.9254,
      "step": 64983
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7914454936981201,
      "learning_rate": 0.0004897382436275024,
      "loss": 2.9849,
      "step": 64984
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5273357629776,
      "learning_rate": 0.000489735075091666,
      "loss": 2.9417,
      "step": 64985
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4090321063995361,
      "learning_rate": 0.0004897319065205542,
      "loss": 3.0243,
      "step": 64986
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4129496812820435,
      "learning_rate": 0.000489728737914168,
      "loss": 3.3083,
      "step": 64987
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5711005926132202,
      "learning_rate": 0.0004897255692725074,
      "loss": 2.768,
      "step": 64988
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.890159249305725,
      "learning_rate": 0.0004897224005955733,
      "loss": 3.0729,
      "step": 64989
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4362623691558838,
      "learning_rate": 0.000489719231883366,
      "loss": 2.8024,
      "step": 64990
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6454122066497803,
      "learning_rate": 0.0004897160631358866,
      "loss": 2.7749,
      "step": 64991
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8621898889541626,
      "learning_rate": 0.0004897128943531353,
      "loss": 3.0111,
      "step": 64992
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4406036138534546,
      "learning_rate": 0.0004897097255351128,
      "loss": 2.9395,
      "step": 64993
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0416879653930664,
      "learning_rate": 0.0004897065566818197,
      "loss": 2.9139,
      "step": 64994
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9453476667404175,
      "learning_rate": 0.0004897033877932565,
      "loss": 3.0968,
      "step": 64995
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7045613527297974,
      "learning_rate": 0.0004897002188694239,
      "loss": 3.0714,
      "step": 64996
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4588204622268677,
      "learning_rate": 0.0004896970499103225,
      "loss": 3.1047,
      "step": 64997
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1628856658935547,
      "learning_rate": 0.0004896938809159527,
      "loss": 3.1468,
      "step": 64998
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7692761421203613,
      "learning_rate": 0.0004896907118863154,
      "loss": 3.1793,
      "step": 64999
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.467076063156128,
      "learning_rate": 0.0004896875428214109,
      "loss": 3.1394,
      "step": 65000
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.558258533477783,
      "learning_rate": 0.0004896843737212399,
      "loss": 3.1752,
      "step": 65001
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.633413314819336,
      "learning_rate": 0.0004896812045858031,
      "loss": 3.0037,
      "step": 65002
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6359623670578003,
      "learning_rate": 0.000489678035415101,
      "loss": 2.827,
      "step": 65003
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.403701901435852,
      "learning_rate": 0.000489674866209134,
      "loss": 2.9542,
      "step": 65004
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4107004404067993,
      "learning_rate": 0.000489671696967903,
      "loss": 2.9419,
      "step": 65005
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4115828275680542,
      "learning_rate": 0.0004896685276914083,
      "loss": 3.1268,
      "step": 65006
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5616216659545898,
      "learning_rate": 0.0004896653583796507,
      "loss": 2.9746,
      "step": 65007
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0308890342712402,
      "learning_rate": 0.0004896621890326309,
      "loss": 3.1751,
      "step": 65008
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.763171672821045,
      "learning_rate": 0.0004896590196503492,
      "loss": 2.9761,
      "step": 65009
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.913272738456726,
      "learning_rate": 0.0004896558502328062,
      "loss": 2.8876,
      "step": 65010
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1543338298797607,
      "learning_rate": 0.0004896526807800028,
      "loss": 3.0362,
      "step": 65011
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.460283875465393,
      "learning_rate": 0.0004896495112919392,
      "loss": 3.151,
      "step": 65012
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7513198852539062,
      "learning_rate": 0.0004896463417686164,
      "loss": 2.9783,
      "step": 65013
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5624382495880127,
      "learning_rate": 0.0004896431722100345,
      "loss": 3.0197,
      "step": 65014
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3596456050872803,
      "learning_rate": 0.0004896400026161945,
      "loss": 2.885,
      "step": 65015
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.612815260887146,
      "learning_rate": 0.0004896368329870968,
      "loss": 2.9585,
      "step": 65016
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.450931429862976,
      "learning_rate": 0.0004896336633227422,
      "loss": 3.0114,
      "step": 65017
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5963795185089111,
      "learning_rate": 0.000489630493623131,
      "loss": 3.1608,
      "step": 65018
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6815506219863892,
      "learning_rate": 0.0004896273238882638,
      "loss": 3.2473,
      "step": 65019
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5400294065475464,
      "learning_rate": 0.0004896241541181415,
      "loss": 2.8491,
      "step": 65020
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1147210597991943,
      "learning_rate": 0.0004896209843127643,
      "loss": 3.2654,
      "step": 65021
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5238583087921143,
      "learning_rate": 0.000489617814472133,
      "loss": 3.0296,
      "step": 65022
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8124665021896362,
      "learning_rate": 0.0004896146445962483,
      "loss": 2.9515,
      "step": 65023
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6471514701843262,
      "learning_rate": 0.0004896114746851106,
      "loss": 3.0297,
      "step": 65024
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.632185935974121,
      "learning_rate": 0.0004896083047387205,
      "loss": 3.1469,
      "step": 65025
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4339500665664673,
      "learning_rate": 0.0004896051347570787,
      "loss": 3.2405,
      "step": 65026
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6392961740493774,
      "learning_rate": 0.0004896019647401857,
      "loss": 2.9447,
      "step": 65027
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.4415977001190186,
      "learning_rate": 0.0004895987946880421,
      "loss": 2.9511,
      "step": 65028
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6934809684753418,
      "learning_rate": 0.0004895956246006484,
      "loss": 2.7748,
      "step": 65029
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5036389827728271,
      "learning_rate": 0.0004895924544780055,
      "loss": 3.3925,
      "step": 65030
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0113561153411865,
      "learning_rate": 0.0004895892843201138,
      "loss": 3.0585,
      "step": 65031
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7052602767944336,
      "learning_rate": 0.0004895861141269737,
      "loss": 3.1595,
      "step": 65032
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8734980821609497,
      "learning_rate": 0.0004895829438985862,
      "loss": 2.8139,
      "step": 65033
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9993733167648315,
      "learning_rate": 0.0004895797736349513,
      "loss": 3.0276,
      "step": 65034
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.725968360900879,
      "learning_rate": 0.0004895766033360702,
      "loss": 3.0888,
      "step": 65035
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.5229663848876953,
      "learning_rate": 0.0004895734330019432,
      "loss": 2.948,
      "step": 65036
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8928266763687134,
      "learning_rate": 0.0004895702626325708,
      "loss": 3.1169,
      "step": 65037
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.015665054321289,
      "learning_rate": 0.0004895670922279538,
      "loss": 3.0369,
      "step": 65038
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.470604181289673,
      "learning_rate": 0.0004895639217880926,
      "loss": 3.001,
      "step": 65039
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5599055290222168,
      "learning_rate": 0.0004895607513129881,
      "loss": 3.293,
      "step": 65040
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3490334749221802,
      "learning_rate": 0.0004895575808026405,
      "loss": 3.1732,
      "step": 65041
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.859836220741272,
      "learning_rate": 0.0004895544102570507,
      "loss": 2.8791,
      "step": 65042
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4905816316604614,
      "learning_rate": 0.000489551239676219,
      "loss": 3.1406,
      "step": 65043
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1923253536224365,
      "learning_rate": 0.0004895480690601463,
      "loss": 2.8877,
      "step": 65044
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7659903764724731,
      "learning_rate": 0.000489544898408833,
      "loss": 2.9925,
      "step": 65045
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.883152723312378,
      "learning_rate": 0.0004895417277222796,
      "loss": 2.877,
      "step": 65046
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8766628503799438,
      "learning_rate": 0.0004895385570004869,
      "loss": 3.1949,
      "step": 65047
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0528085231781006,
      "learning_rate": 0.0004895353862434554,
      "loss": 3.091,
      "step": 65048
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.274839401245117,
      "learning_rate": 0.0004895322154511857,
      "loss": 2.7549,
      "step": 65049
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.796952486038208,
      "learning_rate": 0.0004895290446236784,
      "loss": 3.0157,
      "step": 65050
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9614530801773071,
      "learning_rate": 0.0004895258737609341,
      "loss": 3.0516,
      "step": 65051
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8417197465896606,
      "learning_rate": 0.0004895227028629533,
      "loss": 2.8223,
      "step": 65052
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.6691670417785645,
      "learning_rate": 0.0004895195319297367,
      "loss": 3.031,
      "step": 65053
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7154077291488647,
      "learning_rate": 0.0004895163609612848,
      "loss": 3.1826,
      "step": 65054
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4924486875534058,
      "learning_rate": 0.0004895131899575983,
      "loss": 3.2266,
      "step": 65055
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7091856002807617,
      "learning_rate": 0.0004895100189186777,
      "loss": 2.8347,
      "step": 65056
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5748623609542847,
      "learning_rate": 0.0004895068478445236,
      "loss": 3.065,
      "step": 65057
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5385162830352783,
      "learning_rate": 0.0004895036767351365,
      "loss": 3.1204,
      "step": 65058
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.522737741470337,
      "learning_rate": 0.0004895005055905171,
      "loss": 3.1465,
      "step": 65059
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.4143900871276855,
      "learning_rate": 0.0004894973344106661,
      "loss": 2.8773,
      "step": 65060
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8373544216156006,
      "learning_rate": 0.0004894941631955839,
      "loss": 3.0787,
      "step": 65061
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5562032461166382,
      "learning_rate": 0.0004894909919452712,
      "loss": 3.2322,
      "step": 65062
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.032651424407959,
      "learning_rate": 0.0004894878206597285,
      "loss": 2.8095,
      "step": 65063
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.862001419067383,
      "learning_rate": 0.0004894846493389564,
      "loss": 3.0603,
      "step": 65064
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4355850219726562,
      "learning_rate": 0.0004894814779829557,
      "loss": 3.202,
      "step": 65065
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9276007413864136,
      "learning_rate": 0.0004894783065917266,
      "loss": 2.8998,
      "step": 65066
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.4716882705688477,
      "learning_rate": 0.00048947513516527,
      "loss": 2.9779,
      "step": 65067
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7624914646148682,
      "learning_rate": 0.0004894719637035864,
      "loss": 3.2638,
      "step": 65068
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5874803066253662,
      "learning_rate": 0.0004894687922066764,
      "loss": 3.1238,
      "step": 65069
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6880954504013062,
      "learning_rate": 0.0004894656206745406,
      "loss": 2.8108,
      "step": 65070
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5383549928665161,
      "learning_rate": 0.0004894624491071795,
      "loss": 3.4017,
      "step": 65071
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6446045637130737,
      "learning_rate": 0.0004894592775045939,
      "loss": 3.0842,
      "step": 65072
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8389172554016113,
      "learning_rate": 0.000489456105866784,
      "loss": 3.0749,
      "step": 65073
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8268071413040161,
      "learning_rate": 0.0004894529341937508,
      "loss": 3.0573,
      "step": 65074
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.4436185359954834,
      "learning_rate": 0.0004894497624854946,
      "loss": 3.0886,
      "step": 65075
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7192301750183105,
      "learning_rate": 0.0004894465907420163,
      "loss": 2.86,
      "step": 65076
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9702328443527222,
      "learning_rate": 0.0004894434189633162,
      "loss": 3.0659,
      "step": 65077
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6589926481246948,
      "learning_rate": 0.000489440247149395,
      "loss": 3.206,
      "step": 65078
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3793723583221436,
      "learning_rate": 0.0004894370753002532,
      "loss": 3.1763,
      "step": 65079
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4575401544570923,
      "learning_rate": 0.0004894339034158915,
      "loss": 3.2733,
      "step": 65080
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.528687596321106,
      "learning_rate": 0.0004894307314963106,
      "loss": 3.2101,
      "step": 65081
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5437144041061401,
      "learning_rate": 0.0004894275595415109,
      "loss": 3.0417,
      "step": 65082
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.4683127403259277,
      "learning_rate": 0.000489424387551493,
      "loss": 3.0577,
      "step": 65083
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4950875043869019,
      "learning_rate": 0.0004894212155262574,
      "loss": 3.0446,
      "step": 65084
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8373043537139893,
      "learning_rate": 0.000489418043465805,
      "loss": 3.1043,
      "step": 65085
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.258814811706543,
      "learning_rate": 0.0004894148713701362,
      "loss": 2.7817,
      "step": 65086
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.035773515701294,
      "learning_rate": 0.0004894116992392515,
      "loss": 2.9498,
      "step": 65087
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7332549095153809,
      "learning_rate": 0.0004894085270731517,
      "loss": 3.08,
      "step": 65088
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8015505075454712,
      "learning_rate": 0.0004894053548718372,
      "loss": 3.0055,
      "step": 65089
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7192631959915161,
      "learning_rate": 0.0004894021826353087,
      "loss": 2.9983,
      "step": 65090
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5533372163772583,
      "learning_rate": 0.0004893990103635667,
      "loss": 3.0642,
      "step": 65091
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.106208562850952,
      "learning_rate": 0.000489395838056612,
      "loss": 3.1355,
      "step": 65092
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3786606788635254,
      "learning_rate": 0.0004893926657144449,
      "loss": 3.145,
      "step": 65093
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.629152774810791,
      "learning_rate": 0.0004893894933370662,
      "loss": 2.942,
      "step": 65094
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.333920955657959,
      "learning_rate": 0.0004893863209244764,
      "loss": 3.0337,
      "step": 65095
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5735983848571777,
      "learning_rate": 0.0004893831484766761,
      "loss": 2.7536,
      "step": 65096
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.5459601879119873,
      "learning_rate": 0.000489379975993666,
      "loss": 2.8246,
      "step": 65097
    },
    {
      "epoch": 0.85,
      "grad_norm": 4.285601615905762,
      "learning_rate": 0.0004893768034754462,
      "loss": 2.9444,
      "step": 65098
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9987473487854004,
      "learning_rate": 0.0004893736309220181,
      "loss": 3.0184,
      "step": 65099
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9652175903320312,
      "learning_rate": 0.0004893704583333817,
      "loss": 3.0584,
      "step": 65100
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0726635456085205,
      "learning_rate": 0.0004893672857095378,
      "loss": 2.9797,
      "step": 65101
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.352874517440796,
      "learning_rate": 0.000489364113050487,
      "loss": 3.0709,
      "step": 65102
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5433111190795898,
      "learning_rate": 0.0004893609403562297,
      "loss": 3.0518,
      "step": 65103
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6405590772628784,
      "learning_rate": 0.0004893577676267667,
      "loss": 3.0179,
      "step": 65104
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.8368990421295166,
      "learning_rate": 0.0004893545948620985,
      "loss": 2.9163,
      "step": 65105
    },
    {
      "epoch": 0.85,
      "grad_norm": 5.271059513092041,
      "learning_rate": 0.0004893514220622258,
      "loss": 3.072,
      "step": 65106
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.83744478225708,
      "learning_rate": 0.000489348249227149,
      "loss": 2.9566,
      "step": 65107
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.5022025108337402,
      "learning_rate": 0.0004893450763568687,
      "loss": 2.9229,
      "step": 65108
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.6591904163360596,
      "learning_rate": 0.0004893419034513858,
      "loss": 3.1187,
      "step": 65109
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.7743070125579834,
      "learning_rate": 0.0004893387305107005,
      "loss": 3.1497,
      "step": 65110
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.692753791809082,
      "learning_rate": 0.0004893355575348136,
      "loss": 2.8472,
      "step": 65111
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8010673522949219,
      "learning_rate": 0.0004893323845237257,
      "loss": 3.0592,
      "step": 65112
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.807835578918457,
      "learning_rate": 0.0004893292114774373,
      "loss": 3.0938,
      "step": 65113
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.103861093521118,
      "learning_rate": 0.000489326038395949,
      "loss": 3.2327,
      "step": 65114
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0295116901397705,
      "learning_rate": 0.0004893228652792614,
      "loss": 2.9037,
      "step": 65115
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2666068077087402,
      "learning_rate": 0.000489319692127375,
      "loss": 3.1256,
      "step": 65116
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.7902188301086426,
      "learning_rate": 0.0004893165189402908,
      "loss": 3.1949,
      "step": 65117
    },
    {
      "epoch": 0.85,
      "grad_norm": 4.953124046325684,
      "learning_rate": 0.0004893133457180089,
      "loss": 2.8912,
      "step": 65118
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.875365972518921,
      "learning_rate": 0.0004893101724605301,
      "loss": 2.896,
      "step": 65119
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7506717443466187,
      "learning_rate": 0.0004893069991678548,
      "loss": 3.0211,
      "step": 65120
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4065499305725098,
      "learning_rate": 0.000489303825839984,
      "loss": 3.0318,
      "step": 65121
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.2181971073150635,
      "learning_rate": 0.000489300652476918,
      "loss": 2.9643,
      "step": 65122
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.524850845336914,
      "learning_rate": 0.0004892974790786573,
      "loss": 2.8212,
      "step": 65123
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6019558906555176,
      "learning_rate": 0.0004892943056452028,
      "loss": 3.1927,
      "step": 65124
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3592168092727661,
      "learning_rate": 0.0004892911321765547,
      "loss": 3.0251,
      "step": 65125
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.317958116531372,
      "learning_rate": 0.000489287958672714,
      "loss": 3.1123,
      "step": 65126
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.4986865520477295,
      "learning_rate": 0.000489284785133681,
      "loss": 2.9003,
      "step": 65127
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.076915979385376,
      "learning_rate": 0.0004892816115594564,
      "loss": 3.3037,
      "step": 65128
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7639423608779907,
      "learning_rate": 0.0004892784379500408,
      "loss": 2.9178,
      "step": 65129
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.119640827178955,
      "learning_rate": 0.0004892752643054348,
      "loss": 3.2091,
      "step": 65130
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.5611140727996826,
      "learning_rate": 0.000489272090625639,
      "loss": 3.1395,
      "step": 65131
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7338533401489258,
      "learning_rate": 0.0004892689169106538,
      "loss": 3.1367,
      "step": 65132
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.532196044921875,
      "learning_rate": 0.00048926574316048,
      "loss": 2.9197,
      "step": 65133
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5387918949127197,
      "learning_rate": 0.000489262569375118,
      "loss": 2.777,
      "step": 65134
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5333045721054077,
      "learning_rate": 0.0004892593955545687,
      "loss": 2.8898,
      "step": 65135
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.145852565765381,
      "learning_rate": 0.0004892562216988324,
      "loss": 3.0458,
      "step": 65136
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.308607339859009,
      "learning_rate": 0.0004892530478079099,
      "loss": 2.7782,
      "step": 65137
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5619795322418213,
      "learning_rate": 0.0004892498738818015,
      "loss": 2.8512,
      "step": 65138
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8927419185638428,
      "learning_rate": 0.0004892466999205082,
      "loss": 3.0879,
      "step": 65139
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.373854398727417,
      "learning_rate": 0.0004892435259240302,
      "loss": 3.1322,
      "step": 65140
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.564536452293396,
      "learning_rate": 0.0004892403518923684,
      "loss": 3.0354,
      "step": 65141
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4091541767120361,
      "learning_rate": 0.0004892371778255231,
      "loss": 2.9864,
      "step": 65142
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.4217700958251953,
      "learning_rate": 0.0004892340037234951,
      "loss": 2.9743,
      "step": 65143
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.551900863647461,
      "learning_rate": 0.000489230829586285,
      "loss": 3.1944,
      "step": 65144
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6057430505752563,
      "learning_rate": 0.0004892276554138933,
      "loss": 3.0997,
      "step": 65145
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.873774528503418,
      "learning_rate": 0.0004892244812063205,
      "loss": 3.1716,
      "step": 65146
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4542816877365112,
      "learning_rate": 0.0004892213069635673,
      "loss": 2.9549,
      "step": 65147
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.228898525238037,
      "learning_rate": 0.0004892181326856343,
      "loss": 3.113,
      "step": 65148
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4951694011688232,
      "learning_rate": 0.0004892149583725222,
      "loss": 2.9735,
      "step": 65149
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4504132270812988,
      "learning_rate": 0.0004892117840242314,
      "loss": 3.2387,
      "step": 65150
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6026370525360107,
      "learning_rate": 0.0004892086096407624,
      "loss": 3.0568,
      "step": 65151
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.398061990737915,
      "learning_rate": 0.0004892054352221162,
      "loss": 2.9014,
      "step": 65152
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5599441528320312,
      "learning_rate": 0.000489202260768293,
      "loss": 2.9669,
      "step": 65153
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9711171388626099,
      "learning_rate": 0.0004891990862792936,
      "loss": 3.0026,
      "step": 65154
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6566486358642578,
      "learning_rate": 0.0004891959117551184,
      "loss": 3.3079,
      "step": 65155
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5009360313415527,
      "learning_rate": 0.0004891927371957682,
      "loss": 3.1323,
      "step": 65156
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.4558770656585693,
      "learning_rate": 0.0004891895626012436,
      "loss": 3.1932,
      "step": 65157
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7790522575378418,
      "learning_rate": 0.0004891863879715448,
      "loss": 3.13,
      "step": 65158
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7471126317977905,
      "learning_rate": 0.0004891832133066729,
      "loss": 2.7684,
      "step": 65159
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.00602650642395,
      "learning_rate": 0.0004891800386066282,
      "loss": 3.0973,
      "step": 65160
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.363354206085205,
      "learning_rate": 0.0004891768638714114,
      "loss": 2.8457,
      "step": 65161
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.587096929550171,
      "learning_rate": 0.0004891736891010231,
      "loss": 2.9494,
      "step": 65162
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.439716100692749,
      "learning_rate": 0.0004891705142954637,
      "loss": 2.9226,
      "step": 65163
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.701383113861084,
      "learning_rate": 0.0004891673394547341,
      "loss": 3.1025,
      "step": 65164
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.005901336669922,
      "learning_rate": 0.0004891641645788344,
      "loss": 2.9349,
      "step": 65165
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.971571922302246,
      "learning_rate": 0.0004891609896677658,
      "loss": 3.0153,
      "step": 65166
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3617008924484253,
      "learning_rate": 0.0004891578147215286,
      "loss": 3.0241,
      "step": 65167
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8583414554595947,
      "learning_rate": 0.0004891546397401233,
      "loss": 3.2788,
      "step": 65168
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0929532051086426,
      "learning_rate": 0.0004891514647235506,
      "loss": 2.9927,
      "step": 65169
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4856232404708862,
      "learning_rate": 0.0004891482896718112,
      "loss": 2.9418,
      "step": 65170
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5136796236038208,
      "learning_rate": 0.0004891451145849053,
      "loss": 3.2074,
      "step": 65171
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.1048269271850586,
      "learning_rate": 0.0004891419394628339,
      "loss": 2.8662,
      "step": 65172
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3998451232910156,
      "learning_rate": 0.0004891387643055975,
      "loss": 2.9712,
      "step": 65173
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8309974670410156,
      "learning_rate": 0.0004891355891131965,
      "loss": 2.9884,
      "step": 65174
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1024065017700195,
      "learning_rate": 0.0004891324138856318,
      "loss": 2.8281,
      "step": 65175
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.5993075370788574,
      "learning_rate": 0.0004891292386229037,
      "loss": 3.243,
      "step": 65176
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.628956913948059,
      "learning_rate": 0.0004891260633250128,
      "loss": 3.0591,
      "step": 65177
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1086928844451904,
      "learning_rate": 0.00048912288799196,
      "loss": 3.02,
      "step": 65178
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.326021671295166,
      "learning_rate": 0.0004891197126237455,
      "loss": 3.0907,
      "step": 65179
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0151875019073486,
      "learning_rate": 0.0004891165372203702,
      "loss": 3.0256,
      "step": 65180
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6790646314620972,
      "learning_rate": 0.0004891133617818345,
      "loss": 2.9439,
      "step": 65181
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6805953979492188,
      "learning_rate": 0.0004891101863081391,
      "loss": 2.9119,
      "step": 65182
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.8070738315582275,
      "learning_rate": 0.0004891070107992846,
      "loss": 3.0808,
      "step": 65183
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.744848608970642,
      "learning_rate": 0.0004891038352552714,
      "loss": 3.0746,
      "step": 65184
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1848034858703613,
      "learning_rate": 0.0004891006596761003,
      "loss": 3.0895,
      "step": 65185
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.7646124362945557,
      "learning_rate": 0.0004890974840617719,
      "loss": 3.2665,
      "step": 65186
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9761505126953125,
      "learning_rate": 0.0004890943084122865,
      "loss": 3.1433,
      "step": 65187
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.728837251663208,
      "learning_rate": 0.0004890911327276452,
      "loss": 3.0567,
      "step": 65188
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7159507274627686,
      "learning_rate": 0.000489087957007848,
      "loss": 3.2752,
      "step": 65189
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.7523717880249023,
      "learning_rate": 0.0004890847812528959,
      "loss": 2.7379,
      "step": 65190
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7827622890472412,
      "learning_rate": 0.0004890816054627894,
      "loss": 3.1546,
      "step": 65191
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4996342658996582,
      "learning_rate": 0.0004890784296375291,
      "loss": 2.8495,
      "step": 65192
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.914998173713684,
      "learning_rate": 0.0004890752537771154,
      "loss": 3.1534,
      "step": 65193
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.4744832515716553,
      "learning_rate": 0.0004890720778815491,
      "loss": 3.1121,
      "step": 65194
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3803048133850098,
      "learning_rate": 0.0004890689019508308,
      "loss": 3.0221,
      "step": 65195
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8788508176803589,
      "learning_rate": 0.0004890657259849608,
      "loss": 3.1734,
      "step": 65196
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9527771472930908,
      "learning_rate": 0.0004890625499839402,
      "loss": 3.1898,
      "step": 65197
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.753039002418518,
      "learning_rate": 0.0004890593739477691,
      "loss": 3.165,
      "step": 65198
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.795379638671875,
      "learning_rate": 0.0004890561978764485,
      "loss": 3.1113,
      "step": 65199
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.4886057376861572,
      "learning_rate": 0.0004890530217699787,
      "loss": 2.8262,
      "step": 65200
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5061287879943848,
      "learning_rate": 0.0004890498456283603,
      "loss": 3.0301,
      "step": 65201
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2220481634140015,
      "learning_rate": 0.000489046669451594,
      "loss": 2.853,
      "step": 65202
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6204532384872437,
      "learning_rate": 0.0004890434932396804,
      "loss": 2.7183,
      "step": 65203
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9286651611328125,
      "learning_rate": 0.0004890403169926199,
      "loss": 3.2181,
      "step": 65204
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.62226402759552,
      "learning_rate": 0.0004890371407104134,
      "loss": 3.0761,
      "step": 65205
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.52158784866333,
      "learning_rate": 0.0004890339643930613,
      "loss": 3.084,
      "step": 65206
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.520322561264038,
      "learning_rate": 0.0004890307880405641,
      "loss": 2.9487,
      "step": 65207
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6795095205307007,
      "learning_rate": 0.0004890276116529226,
      "loss": 3.0481,
      "step": 65208
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5504969358444214,
      "learning_rate": 0.0004890244352301373,
      "loss": 3.0279,
      "step": 65209
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4302631616592407,
      "learning_rate": 0.0004890212587722088,
      "loss": 3.0043,
      "step": 65210
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5660057067871094,
      "learning_rate": 0.0004890180822791376,
      "loss": 3.0181,
      "step": 65211
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0992753505706787,
      "learning_rate": 0.0004890149057509245,
      "loss": 2.8873,
      "step": 65212
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3477356433868408,
      "learning_rate": 0.0004890117291875699,
      "loss": 3.098,
      "step": 65213
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4350789785385132,
      "learning_rate": 0.0004890085525890745,
      "loss": 2.9252,
      "step": 65214
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.946690320968628,
      "learning_rate": 0.0004890053759554387,
      "loss": 2.9314,
      "step": 65215
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4927417039871216,
      "learning_rate": 0.0004890021992866633,
      "loss": 3.0457,
      "step": 65216
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5522198677062988,
      "learning_rate": 0.0004889990225827489,
      "loss": 2.9624,
      "step": 65217
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.002258062362671,
      "learning_rate": 0.000488995845843696,
      "loss": 2.9265,
      "step": 65218
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.747133731842041,
      "learning_rate": 0.0004889926690695051,
      "loss": 2.9338,
      "step": 65219
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5185481309890747,
      "learning_rate": 0.000488989492260177,
      "loss": 2.9652,
      "step": 65220
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.112786054611206,
      "learning_rate": 0.0004889863154157122,
      "loss": 3.1043,
      "step": 65221
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3390729427337646,
      "learning_rate": 0.0004889831385361111,
      "loss": 2.954,
      "step": 65222
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0028092861175537,
      "learning_rate": 0.0004889799616213746,
      "loss": 3.1725,
      "step": 65223
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5443980693817139,
      "learning_rate": 0.0004889767846715031,
      "loss": 2.9272,
      "step": 65224
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.6480908393859863,
      "learning_rate": 0.0004889736076864974,
      "loss": 2.9668,
      "step": 65225
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.437164545059204,
      "learning_rate": 0.0004889704306663577,
      "loss": 3.0791,
      "step": 65226
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5217254161834717,
      "learning_rate": 0.000488967253611085,
      "loss": 3.0966,
      "step": 65227
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.63340425491333,
      "learning_rate": 0.0004889640765206797,
      "loss": 2.9868,
      "step": 65228
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7269115447998047,
      "learning_rate": 0.0004889608993951424,
      "loss": 3.1376,
      "step": 65229
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.121821403503418,
      "learning_rate": 0.0004889577222344738,
      "loss": 3.1581,
      "step": 65230
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7309678792953491,
      "learning_rate": 0.0004889545450386742,
      "loss": 3.1085,
      "step": 65231
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.2036423683166504,
      "learning_rate": 0.0004889513678077445,
      "loss": 3.1207,
      "step": 65232
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7618415355682373,
      "learning_rate": 0.0004889481905416851,
      "loss": 2.768,
      "step": 65233
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9179694652557373,
      "learning_rate": 0.0004889450132404968,
      "loss": 3.1727,
      "step": 65234
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7639261484146118,
      "learning_rate": 0.00048894183590418,
      "loss": 2.9952,
      "step": 65235
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.802842855453491,
      "learning_rate": 0.0004889386585327352,
      "loss": 2.8323,
      "step": 65236
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5107094049453735,
      "learning_rate": 0.0004889354811261633,
      "loss": 2.9797,
      "step": 65237
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5082592964172363,
      "learning_rate": 0.0004889323036844647,
      "loss": 2.9471,
      "step": 65238
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.850510835647583,
      "learning_rate": 0.00048892912620764,
      "loss": 3.3701,
      "step": 65239
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6276090145111084,
      "learning_rate": 0.0004889259486956898,
      "loss": 3.0924,
      "step": 65240
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5167747735977173,
      "learning_rate": 0.0004889227711486146,
      "loss": 2.876,
      "step": 65241
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.598832368850708,
      "learning_rate": 0.0004889195935664153,
      "loss": 2.8647,
      "step": 65242
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9325250387191772,
      "learning_rate": 0.0004889164159490922,
      "loss": 3.0198,
      "step": 65243
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7561675310134888,
      "learning_rate": 0.0004889132382966459,
      "loss": 3.0073,
      "step": 65244
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0410163402557373,
      "learning_rate": 0.0004889100606090771,
      "loss": 3.1643,
      "step": 65245
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7305306196212769,
      "learning_rate": 0.0004889068828863864,
      "loss": 2.9572,
      "step": 65246
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0157718658447266,
      "learning_rate": 0.0004889037051285743,
      "loss": 3.3668,
      "step": 65247
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4615172147750854,
      "learning_rate": 0.0004889005273356414,
      "loss": 3.0407,
      "step": 65248
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4164259433746338,
      "learning_rate": 0.0004888973495075884,
      "loss": 2.9145,
      "step": 65249
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3728930950164795,
      "learning_rate": 0.0004888941716444158,
      "loss": 3.0974,
      "step": 65250
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6786015033721924,
      "learning_rate": 0.0004888909937461242,
      "loss": 2.9315,
      "step": 65251
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.786247730255127,
      "learning_rate": 0.0004888878158127143,
      "loss": 3.0987,
      "step": 65252
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8063501119613647,
      "learning_rate": 0.0004888846378441864,
      "loss": 3.0899,
      "step": 65253
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9186584949493408,
      "learning_rate": 0.0004888814598405413,
      "loss": 3.0272,
      "step": 65254
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7617815732955933,
      "learning_rate": 0.0004888782818017797,
      "loss": 2.8819,
      "step": 65255
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6808998584747314,
      "learning_rate": 0.0004888751037279021,
      "loss": 3.0063,
      "step": 65256
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7900959253311157,
      "learning_rate": 0.0004888719256189089,
      "loss": 3.0246,
      "step": 65257
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9429688453674316,
      "learning_rate": 0.0004888687474748009,
      "loss": 2.8802,
      "step": 65258
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.757538914680481,
      "learning_rate": 0.0004888655692955786,
      "loss": 2.9747,
      "step": 65259
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.971766471862793,
      "learning_rate": 0.0004888623910812426,
      "loss": 2.8807,
      "step": 65260
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.834118127822876,
      "learning_rate": 0.0004888592128317935,
      "loss": 3.0251,
      "step": 65261
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6827832460403442,
      "learning_rate": 0.000488856034547232,
      "loss": 3.2628,
      "step": 65262
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.041532278060913,
      "learning_rate": 0.0004888528562275586,
      "loss": 2.9594,
      "step": 65263
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4075332880020142,
      "learning_rate": 0.0004888496778727739,
      "loss": 2.9467,
      "step": 65264
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8544211387634277,
      "learning_rate": 0.0004888464994828785,
      "loss": 3.1547,
      "step": 65265
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9862103462219238,
      "learning_rate": 0.0004888433210578729,
      "loss": 2.9846,
      "step": 65266
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6281473636627197,
      "learning_rate": 0.0004888401425977579,
      "loss": 3.1066,
      "step": 65267
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6141510009765625,
      "learning_rate": 0.0004888369641025337,
      "loss": 2.9825,
      "step": 65268
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.910960078239441,
      "learning_rate": 0.0004888337855722013,
      "loss": 2.9722,
      "step": 65269
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5152671337127686,
      "learning_rate": 0.0004888306070067611,
      "loss": 3.0086,
      "step": 65270
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.544609785079956,
      "learning_rate": 0.0004888274284062137,
      "loss": 3.171,
      "step": 65271
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8204407691955566,
      "learning_rate": 0.0004888242497705598,
      "loss": 2.9374,
      "step": 65272
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6954797506332397,
      "learning_rate": 0.0004888210710997998,
      "loss": 3.0736,
      "step": 65273
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7429215908050537,
      "learning_rate": 0.0004888178923939346,
      "loss": 2.7623,
      "step": 65274
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.5458500385284424,
      "learning_rate": 0.0004888147136529644,
      "loss": 3.1083,
      "step": 65275
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.2777481079101562,
      "learning_rate": 0.00048881153487689,
      "loss": 3.0242,
      "step": 65276
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7514952421188354,
      "learning_rate": 0.0004888083560657119,
      "loss": 2.8227,
      "step": 65277
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7471646070480347,
      "learning_rate": 0.000488805177219431,
      "loss": 3.0023,
      "step": 65278
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.548966884613037,
      "learning_rate": 0.0004888019983380475,
      "loss": 3.2442,
      "step": 65279
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.955246090888977,
      "learning_rate": 0.0004887988194215621,
      "loss": 3.0157,
      "step": 65280
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1463944911956787,
      "learning_rate": 0.0004887956404699755,
      "loss": 2.7521,
      "step": 65281
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.610489845275879,
      "learning_rate": 0.0004887924614832883,
      "loss": 2.9323,
      "step": 65282
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5252577066421509,
      "learning_rate": 0.0004887892824615009,
      "loss": 3.1312,
      "step": 65283
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.539419174194336,
      "learning_rate": 0.000488786103404614,
      "loss": 3.0666,
      "step": 65284
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.678868055343628,
      "learning_rate": 0.0004887829243126284,
      "loss": 2.9122,
      "step": 65285
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5323630571365356,
      "learning_rate": 0.0004887797451855443,
      "loss": 2.9992,
      "step": 65286
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4844493865966797,
      "learning_rate": 0.0004887765660233625,
      "loss": 3.0471,
      "step": 65287
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1159746646881104,
      "learning_rate": 0.0004887733868260836,
      "loss": 3.1473,
      "step": 65288
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5434050559997559,
      "learning_rate": 0.0004887702075937081,
      "loss": 3.3064,
      "step": 65289
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9335590600967407,
      "learning_rate": 0.0004887670283262367,
      "loss": 2.9951,
      "step": 65290
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5220853090286255,
      "learning_rate": 0.00048876384902367,
      "loss": 3.1444,
      "step": 65291
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.4726717472076416,
      "learning_rate": 0.0004887606696860084,
      "loss": 2.9637,
      "step": 65292
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9153244495391846,
      "learning_rate": 0.0004887574903132528,
      "loss": 2.9955,
      "step": 65293
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6012252569198608,
      "learning_rate": 0.0004887543109054036,
      "loss": 2.8331,
      "step": 65294
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.994035005569458,
      "learning_rate": 0.0004887511314624613,
      "loss": 3.152,
      "step": 65295
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9721667766571045,
      "learning_rate": 0.0004887479519844267,
      "loss": 2.8338,
      "step": 65296
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.46330988407135,
      "learning_rate": 0.0004887447724713002,
      "loss": 2.8808,
      "step": 65297
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.619858503341675,
      "learning_rate": 0.0004887415929230826,
      "loss": 3.0447,
      "step": 65298
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8059585094451904,
      "learning_rate": 0.0004887384133397743,
      "loss": 3.1128,
      "step": 65299
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7299997806549072,
      "learning_rate": 0.0004887352337213759,
      "loss": 3.2782,
      "step": 65300
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.5397887229919434,
      "learning_rate": 0.0004887320540678882,
      "loss": 3.2312,
      "step": 65301
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.327286720275879,
      "learning_rate": 0.0004887288743793116,
      "loss": 2.8255,
      "step": 65302
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5893256664276123,
      "learning_rate": 0.0004887256946556468,
      "loss": 3.0062,
      "step": 65303
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6574913263320923,
      "learning_rate": 0.0004887225148968941,
      "loss": 3.1284,
      "step": 65304
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8056870698928833,
      "learning_rate": 0.0004887193351030546,
      "loss": 2.9705,
      "step": 65305
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8718292713165283,
      "learning_rate": 0.0004887161552741284,
      "loss": 3.1147,
      "step": 65306
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7384141683578491,
      "learning_rate": 0.0004887129754101165,
      "loss": 2.8475,
      "step": 65307
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9038764238357544,
      "learning_rate": 0.0004887097955110192,
      "loss": 3.0504,
      "step": 65308
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7326180934906006,
      "learning_rate": 0.0004887066155768371,
      "loss": 2.7939,
      "step": 65309
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.390885591506958,
      "learning_rate": 0.0004887034356075709,
      "loss": 3.0256,
      "step": 65310
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7417455911636353,
      "learning_rate": 0.0004887002556032212,
      "loss": 2.9133,
      "step": 65311
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9065803289413452,
      "learning_rate": 0.0004886970755637886,
      "loss": 3.2548,
      "step": 65312
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9261337518692017,
      "learning_rate": 0.0004886938954892737,
      "loss": 2.9386,
      "step": 65313
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6720986366271973,
      "learning_rate": 0.000488690715379677,
      "loss": 3.0295,
      "step": 65314
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6038587093353271,
      "learning_rate": 0.000488687535234999,
      "loss": 3.2407,
      "step": 65315
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7422548532485962,
      "learning_rate": 0.0004886843550552407,
      "loss": 2.9739,
      "step": 65316
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8885281085968018,
      "learning_rate": 0.0004886811748404022,
      "loss": 3.132,
      "step": 65317
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6499308347702026,
      "learning_rate": 0.0004886779945904843,
      "loss": 2.8417,
      "step": 65318
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6981878280639648,
      "learning_rate": 0.0004886748143054878,
      "loss": 3.2989,
      "step": 65319
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5371127128601074,
      "learning_rate": 0.0004886716339854129,
      "loss": 2.7996,
      "step": 65320
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.0548489093780518,
      "learning_rate": 0.0004886684536302604,
      "loss": 3.079,
      "step": 65321
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0600476264953613,
      "learning_rate": 0.000488665273240031,
      "loss": 2.8739,
      "step": 65322
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8971318006515503,
      "learning_rate": 0.0004886620928147249,
      "loss": 3.0293,
      "step": 65323
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5689018964767456,
      "learning_rate": 0.0004886589123543432,
      "loss": 2.9558,
      "step": 65324
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9855799674987793,
      "learning_rate": 0.0004886557318588863,
      "loss": 2.8548,
      "step": 65325
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.6509947776794434,
      "learning_rate": 0.0004886525513283545,
      "loss": 2.9457,
      "step": 65326
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6409051418304443,
      "learning_rate": 0.0004886493707627487,
      "loss": 3.2579,
      "step": 65327
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.722970962524414,
      "learning_rate": 0.0004886461901620696,
      "loss": 2.8836,
      "step": 65328
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8841608762741089,
      "learning_rate": 0.0004886430095263174,
      "loss": 3.2571,
      "step": 65329
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.8055648803710938,
      "learning_rate": 0.0004886398288554929,
      "loss": 2.9045,
      "step": 65330
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5943429470062256,
      "learning_rate": 0.0004886366481495969,
      "loss": 3.2238,
      "step": 65331
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3437867164611816,
      "learning_rate": 0.0004886334674086296,
      "loss": 3.1455,
      "step": 65332
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.4089200496673584,
      "learning_rate": 0.0004886302866325918,
      "loss": 2.8447,
      "step": 65333
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.425431489944458,
      "learning_rate": 0.0004886271058214841,
      "loss": 2.9811,
      "step": 65334
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6139127016067505,
      "learning_rate": 0.0004886239249753072,
      "loss": 3.0709,
      "step": 65335
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7568122148513794,
      "learning_rate": 0.0004886207440940613,
      "loss": 3.307,
      "step": 65336
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7943300008773804,
      "learning_rate": 0.0004886175631777475,
      "loss": 2.9628,
      "step": 65337
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5399916172027588,
      "learning_rate": 0.0004886143822263659,
      "loss": 3.0046,
      "step": 65338
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6272456645965576,
      "learning_rate": 0.0004886112012399174,
      "loss": 3.2133,
      "step": 65339
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.906590223312378,
      "learning_rate": 0.0004886080202184025,
      "loss": 3.3159,
      "step": 65340
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0881154537200928,
      "learning_rate": 0.0004886048391618219,
      "loss": 3.1656,
      "step": 65341
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.617133855819702,
      "learning_rate": 0.000488601658070176,
      "loss": 2.9224,
      "step": 65342
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5156999826431274,
      "learning_rate": 0.0004885984769434657,
      "loss": 3.1563,
      "step": 65343
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.05869197845459,
      "learning_rate": 0.0004885952957816911,
      "loss": 3.043,
      "step": 65344
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.5005111694335938,
      "learning_rate": 0.0004885921145848533,
      "loss": 3.0176,
      "step": 65345
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1051182746887207,
      "learning_rate": 0.0004885889333529525,
      "loss": 3.1448,
      "step": 65346
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4702844619750977,
      "learning_rate": 0.0004885857520859894,
      "loss": 3.2404,
      "step": 65347
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4987844228744507,
      "learning_rate": 0.0004885825707839649,
      "loss": 2.9993,
      "step": 65348
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4014861583709717,
      "learning_rate": 0.0004885793894468792,
      "loss": 2.9065,
      "step": 65349
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1001505851745605,
      "learning_rate": 0.000488576208074733,
      "loss": 3.0175,
      "step": 65350
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.642997145652771,
      "learning_rate": 0.0004885730266675271,
      "loss": 3.0946,
      "step": 65351
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9012447595596313,
      "learning_rate": 0.0004885698452252616,
      "loss": 2.6828,
      "step": 65352
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.681119680404663,
      "learning_rate": 0.0004885666637479376,
      "loss": 3.0864,
      "step": 65353
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5743762254714966,
      "learning_rate": 0.0004885634822355555,
      "loss": 3.1778,
      "step": 65354
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8415777683258057,
      "learning_rate": 0.000488560300688116,
      "loss": 2.8695,
      "step": 65355
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6168464422225952,
      "learning_rate": 0.0004885571191056194,
      "loss": 3.0545,
      "step": 65356
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.971386194229126,
      "learning_rate": 0.0004885539374880665,
      "loss": 2.7994,
      "step": 65357
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5382722616195679,
      "learning_rate": 0.0004885507558354578,
      "loss": 3.1638,
      "step": 65358
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.6687514781951904,
      "learning_rate": 0.0004885475741477942,
      "loss": 3.1011,
      "step": 65359
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.319532871246338,
      "learning_rate": 0.0004885443924250759,
      "loss": 3.2602,
      "step": 65360
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0151307582855225,
      "learning_rate": 0.0004885412106673036,
      "loss": 3.0031,
      "step": 65361
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3268887996673584,
      "learning_rate": 0.000488538028874478,
      "loss": 3.1838,
      "step": 65362
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.850325584411621,
      "learning_rate": 0.0004885348470465994,
      "loss": 3.0288,
      "step": 65363
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.128016710281372,
      "learning_rate": 0.0004885316651836688,
      "loss": 3.0189,
      "step": 65364
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5449018478393555,
      "learning_rate": 0.0004885284832856867,
      "loss": 3.1858,
      "step": 65365
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6427754163742065,
      "learning_rate": 0.0004885253013526535,
      "loss": 3.1518,
      "step": 65366
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.743309736251831,
      "learning_rate": 0.0004885221193845699,
      "loss": 2.8198,
      "step": 65367
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5132076740264893,
      "learning_rate": 0.0004885189373814364,
      "loss": 2.9929,
      "step": 65368
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6602588891983032,
      "learning_rate": 0.0004885157553432538,
      "loss": 3.4829,
      "step": 65369
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5295783281326294,
      "learning_rate": 0.0004885125732700225,
      "loss": 2.7663,
      "step": 65370
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.789476990699768,
      "learning_rate": 0.0004885093911617432,
      "loss": 3.1356,
      "step": 65371
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9600311517715454,
      "learning_rate": 0.0004885062090184163,
      "loss": 2.8804,
      "step": 65372
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7010126113891602,
      "learning_rate": 0.0004885030268400426,
      "loss": 3.1346,
      "step": 65373
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5690549612045288,
      "learning_rate": 0.0004884998446266228,
      "loss": 3.1291,
      "step": 65374
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5081278085708618,
      "learning_rate": 0.0004884966623781571,
      "loss": 3.2728,
      "step": 65375
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.656686782836914,
      "learning_rate": 0.0004884934800946463,
      "loss": 3.1636,
      "step": 65376
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.718729853630066,
      "learning_rate": 0.0004884902977760913,
      "loss": 3.0177,
      "step": 65377
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6757947206497192,
      "learning_rate": 0.0004884871154224921,
      "loss": 3.0202,
      "step": 65378
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7883620262145996,
      "learning_rate": 0.0004884839330338497,
      "loss": 2.9452,
      "step": 65379
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4606093168258667,
      "learning_rate": 0.0004884807506101646,
      "loss": 3.0679,
      "step": 65380
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8176602125167847,
      "learning_rate": 0.0004884775681514373,
      "loss": 2.8259,
      "step": 65381
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.763323426246643,
      "learning_rate": 0.0004884743856576686,
      "loss": 2.9058,
      "step": 65382
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6869300603866577,
      "learning_rate": 0.0004884712031288588,
      "loss": 3.0696,
      "step": 65383
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6210992336273193,
      "learning_rate": 0.0004884680205650087,
      "loss": 3.1588,
      "step": 65384
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5687731504440308,
      "learning_rate": 0.0004884648379661188,
      "loss": 2.8484,
      "step": 65385
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4639317989349365,
      "learning_rate": 0.0004884616553321898,
      "loss": 2.9706,
      "step": 65386
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4876930713653564,
      "learning_rate": 0.0004884584726632221,
      "loss": 3.3219,
      "step": 65387
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.727202296257019,
      "learning_rate": 0.0004884552899592165,
      "loss": 2.9303,
      "step": 65388
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.656935691833496,
      "learning_rate": 0.0004884521072201736,
      "loss": 2.7849,
      "step": 65389
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.410428762435913,
      "learning_rate": 0.0004884489244460937,
      "loss": 2.9681,
      "step": 65390
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6246920824050903,
      "learning_rate": 0.0004884457416369777,
      "loss": 3.2479,
      "step": 65391
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0596282482147217,
      "learning_rate": 0.0004884425587928261,
      "loss": 3.2182,
      "step": 65392
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4968775510787964,
      "learning_rate": 0.0004884393759136394,
      "loss": 3.2871,
      "step": 65393
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.920595407485962,
      "learning_rate": 0.0004884361929994183,
      "loss": 3.1162,
      "step": 65394
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3363916873931885,
      "learning_rate": 0.0004884330100501634,
      "loss": 3.0407,
      "step": 65395
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7717289924621582,
      "learning_rate": 0.0004884298270658752,
      "loss": 2.9314,
      "step": 65396
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.921196699142456,
      "learning_rate": 0.0004884266440465543,
      "loss": 3.0096,
      "step": 65397
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1903350353240967,
      "learning_rate": 0.0004884234609922013,
      "loss": 3.0032,
      "step": 65398
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.476581335067749,
      "learning_rate": 0.0004884202779028169,
      "loss": 3.0307,
      "step": 65399
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8881968259811401,
      "learning_rate": 0.0004884170947784016,
      "loss": 3.1322,
      "step": 65400
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4361543655395508,
      "learning_rate": 0.000488413911618956,
      "loss": 3.1584,
      "step": 65401
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6861075162887573,
      "learning_rate": 0.0004884107284244808,
      "loss": 3.2384,
      "step": 65402
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4632633924484253,
      "learning_rate": 0.0004884075451949764,
      "loss": 2.9774,
      "step": 65403
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8958567380905151,
      "learning_rate": 0.0004884043619304435,
      "loss": 2.8067,
      "step": 65404
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.404766321182251,
      "learning_rate": 0.0004884011786308826,
      "loss": 2.9327,
      "step": 65405
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7231422662734985,
      "learning_rate": 0.0004883979952962944,
      "loss": 3.0618,
      "step": 65406
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.452584981918335,
      "learning_rate": 0.0004883948119266795,
      "loss": 2.7838,
      "step": 65407
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.573613166809082,
      "learning_rate": 0.0004883916285220384,
      "loss": 3.1184,
      "step": 65408
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9423534870147705,
      "learning_rate": 0.0004883884450823718,
      "loss": 2.9671,
      "step": 65409
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.017267942428589,
      "learning_rate": 0.0004883852616076801,
      "loss": 2.9328,
      "step": 65410
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.539872646331787,
      "learning_rate": 0.0004883820780979641,
      "loss": 3.1129,
      "step": 65411
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.5941827297210693,
      "learning_rate": 0.0004883788945532244,
      "loss": 2.9595,
      "step": 65412
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.2482736110687256,
      "learning_rate": 0.0004883757109734614,
      "loss": 3.1801,
      "step": 65413
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6021744012832642,
      "learning_rate": 0.0004883725273586759,
      "loss": 2.9328,
      "step": 65414
    },
    {
      "epoch": 0.85,
      "grad_norm": 4.148270130157471,
      "learning_rate": 0.0004883693437088682,
      "loss": 2.849,
      "step": 65415
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.4352355003356934,
      "learning_rate": 0.0004883661600240392,
      "loss": 3.163,
      "step": 65416
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1918487548828125,
      "learning_rate": 0.0004883629763041895,
      "loss": 3.0832,
      "step": 65417
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.243513345718384,
      "learning_rate": 0.0004883597925493193,
      "loss": 2.8878,
      "step": 65418
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.2150542736053467,
      "learning_rate": 0.0004883566087594298,
      "loss": 2.9193,
      "step": 65419
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.810535430908203,
      "learning_rate": 0.000488353424934521,
      "loss": 2.8762,
      "step": 65420
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.0742132663726807,
      "learning_rate": 0.0004883502410745938,
      "loss": 2.9868,
      "step": 65421
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.864479422569275,
      "learning_rate": 0.0004883470571796488,
      "loss": 3.2053,
      "step": 65422
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.891514778137207,
      "learning_rate": 0.0004883438732496865,
      "loss": 3.0205,
      "step": 65423
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.2764368057250977,
      "learning_rate": 0.0004883406892847074,
      "loss": 3.2003,
      "step": 65424
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.042604684829712,
      "learning_rate": 0.0004883375052847122,
      "loss": 2.9175,
      "step": 65425
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.641648054122925,
      "learning_rate": 0.0004883343212497017,
      "loss": 2.9624,
      "step": 65426
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5963140726089478,
      "learning_rate": 0.0004883311371796762,
      "loss": 3.1968,
      "step": 65427
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.5057740211486816,
      "learning_rate": 0.0004883279530746363,
      "loss": 2.885,
      "step": 65428
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7800655364990234,
      "learning_rate": 0.0004883247689345828,
      "loss": 2.9888,
      "step": 65429
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4413858652114868,
      "learning_rate": 0.0004883215847595162,
      "loss": 2.7556,
      "step": 65430
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8561341762542725,
      "learning_rate": 0.0004883184005494369,
      "loss": 2.9704,
      "step": 65431
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.906114101409912,
      "learning_rate": 0.0004883152163043458,
      "loss": 3.054,
      "step": 65432
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.897469162940979,
      "learning_rate": 0.0004883120320242433,
      "loss": 3.2595,
      "step": 65433
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8497768640518188,
      "learning_rate": 0.00048830884770913,
      "loss": 3.125,
      "step": 65434
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.2640366554260254,
      "learning_rate": 0.0004883056633590066,
      "loss": 2.8565,
      "step": 65435
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6211928129196167,
      "learning_rate": 0.0004883024789738735,
      "loss": 2.8137,
      "step": 65436
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3916015625,
      "learning_rate": 0.0004882992945537315,
      "loss": 3.0704,
      "step": 65437
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.139718770980835,
      "learning_rate": 0.0004882961100985811,
      "loss": 3.1338,
      "step": 65438
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3190860748291016,
      "learning_rate": 0.0004882929256084229,
      "loss": 3.0172,
      "step": 65439
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5398262739181519,
      "learning_rate": 0.0004882897410832575,
      "loss": 3.0533,
      "step": 65440
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.47087824344635,
      "learning_rate": 0.0004882865565230855,
      "loss": 2.9419,
      "step": 65441
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9867613315582275,
      "learning_rate": 0.0004882833719279075,
      "loss": 3.145,
      "step": 65442
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7653566598892212,
      "learning_rate": 0.000488280187297724,
      "loss": 3.2643,
      "step": 65443
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9995652437210083,
      "learning_rate": 0.00048827700263253564,
      "loss": 3.238,
      "step": 65444
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6094189882278442,
      "learning_rate": 0.00048827381793234306,
      "loss": 3.0915,
      "step": 65445
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2461857795715332,
      "learning_rate": 0.0004882706331971469,
      "loss": 3.1055,
      "step": 65446
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4439382553100586,
      "learning_rate": 0.0004882674484269476,
      "loss": 3.0225,
      "step": 65447
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9826362133026123,
      "learning_rate": 0.0004882642636217458,
      "loss": 2.9856,
      "step": 65448
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4381580352783203,
      "learning_rate": 0.0004882610787815422,
      "loss": 3.2276,
      "step": 65449
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8768250942230225,
      "learning_rate": 0.00048825789390633727,
      "loss": 3.0414,
      "step": 65450
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6389011144638062,
      "learning_rate": 0.00048825470899613164,
      "loss": 3.0102,
      "step": 65451
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5202977657318115,
      "learning_rate": 0.00048825152405092585,
      "loss": 2.97,
      "step": 65452
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1215248107910156,
      "learning_rate": 0.0004882483390707206,
      "loss": 2.9516,
      "step": 65453
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.113773822784424,
      "learning_rate": 0.0004882451540555165,
      "loss": 3.0565,
      "step": 65454
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5925196409225464,
      "learning_rate": 0.000488241969005314,
      "loss": 2.8113,
      "step": 65455
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3219025135040283,
      "learning_rate": 0.00048823878392011377,
      "loss": 3.1916,
      "step": 65456
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1495680809020996,
      "learning_rate": 0.00048823559879991645,
      "loss": 3.0644,
      "step": 65457
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.118130922317505,
      "learning_rate": 0.00048823241364472243,
      "loss": 3.0451,
      "step": 65458
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.6816437244415283,
      "learning_rate": 0.00048822922845453265,
      "loss": 2.8895,
      "step": 65459
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8008627891540527,
      "learning_rate": 0.00048822604322934737,
      "loss": 3.188,
      "step": 65460
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5737906694412231,
      "learning_rate": 0.00048822285796916735,
      "loss": 3.1125,
      "step": 65461
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7362818717956543,
      "learning_rate": 0.0004882196726739932,
      "loss": 3.115,
      "step": 65462
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.074284791946411,
      "learning_rate": 0.00048821648734382537,
      "loss": 3.0679,
      "step": 65463
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5226576328277588,
      "learning_rate": 0.00048821330197866455,
      "loss": 3.2007,
      "step": 65464
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6261625289916992,
      "learning_rate": 0.0004882101165785113,
      "loss": 3.1582,
      "step": 65465
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.665832757949829,
      "learning_rate": 0.00048820693114336633,
      "loss": 3.0695,
      "step": 65466
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8882373571395874,
      "learning_rate": 0.00048820374567323013,
      "loss": 3.0362,
      "step": 65467
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3745239973068237,
      "learning_rate": 0.00048820056016810325,
      "loss": 3.0411,
      "step": 65468
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5662933588027954,
      "learning_rate": 0.00048819737462798635,
      "loss": 2.9567,
      "step": 65469
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.921522855758667,
      "learning_rate": 0.00048819418905287997,
      "loss": 3.0758,
      "step": 65470
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6891214847564697,
      "learning_rate": 0.0004881910034427848,
      "loss": 3.0314,
      "step": 65471
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0351204872131348,
      "learning_rate": 0.00048818781779770136,
      "loss": 3.3194,
      "step": 65472
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4637209177017212,
      "learning_rate": 0.0004881846321176303,
      "loss": 3.0992,
      "step": 65473
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3913758993148804,
      "learning_rate": 0.00048818144640257205,
      "loss": 3.0272,
      "step": 65474
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6359659433364868,
      "learning_rate": 0.00048817826065252737,
      "loss": 2.8507,
      "step": 65475
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4997893571853638,
      "learning_rate": 0.0004881750748674968,
      "loss": 3.1207,
      "step": 65476
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4883356094360352,
      "learning_rate": 0.00048817188904748095,
      "loss": 3.1277,
      "step": 65477
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.586343765258789,
      "learning_rate": 0.0004881687031924803,
      "loss": 2.9958,
      "step": 65478
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3852028846740723,
      "learning_rate": 0.0004881655173024957,
      "loss": 3.2861,
      "step": 65479
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5777301788330078,
      "learning_rate": 0.00048816233137752746,
      "loss": 2.9975,
      "step": 65480
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4022595882415771,
      "learning_rate": 0.0004881591454175763,
      "loss": 3.0167,
      "step": 65481
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8260390758514404,
      "learning_rate": 0.00048815595942264283,
      "loss": 3.0434,
      "step": 65482
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7016955614089966,
      "learning_rate": 0.0004881527733927276,
      "loss": 3.03,
      "step": 65483
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3723777532577515,
      "learning_rate": 0.00048814958732783123,
      "loss": 2.9248,
      "step": 65484
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5521187782287598,
      "learning_rate": 0.0004881464012279543,
      "loss": 2.6419,
      "step": 65485
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.2918360233306885,
      "learning_rate": 0.0004881432150930974,
      "loss": 2.9319,
      "step": 65486
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8208770751953125,
      "learning_rate": 0.00048814002892326116,
      "loss": 3.1292,
      "step": 65487
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7839945554733276,
      "learning_rate": 0.0004881368427184461,
      "loss": 2.8902,
      "step": 65488
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.614944577217102,
      "learning_rate": 0.0004881336564786529,
      "loss": 2.9868,
      "step": 65489
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.537933349609375,
      "learning_rate": 0.000488130470203882,
      "loss": 3.0274,
      "step": 65490
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.477254033088684,
      "learning_rate": 0.00048812728389413415,
      "loss": 3.1538,
      "step": 65491
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6184076070785522,
      "learning_rate": 0.0004881240975494098,
      "loss": 2.8403,
      "step": 65492
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.469550609588623,
      "learning_rate": 0.00048812091116970976,
      "loss": 2.9725,
      "step": 65493
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4972398281097412,
      "learning_rate": 0.0004881177247550344,
      "loss": 3.0321,
      "step": 65494
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5141078233718872,
      "learning_rate": 0.00048811453830538453,
      "loss": 3.074,
      "step": 65495
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4754735231399536,
      "learning_rate": 0.00048811135182076046,
      "loss": 3.1206,
      "step": 65496
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7309235334396362,
      "learning_rate": 0.000488108165301163,
      "loss": 3.0849,
      "step": 65497
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0697386264801025,
      "learning_rate": 0.0004881049787465928,
      "loss": 3.0163,
      "step": 65498
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.4139838218688965,
      "learning_rate": 0.0004881017921570502,
      "loss": 2.9954,
      "step": 65499
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.608623743057251,
      "learning_rate": 0.0004880986055325359,
      "loss": 2.8365,
      "step": 65500
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.953262448310852,
      "learning_rate": 0.0004880954188730506,
      "loss": 3.0123,
      "step": 65501
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.8675801753997803,
      "learning_rate": 0.00048809223217859485,
      "loss": 3.2213,
      "step": 65502
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7505857944488525,
      "learning_rate": 0.00048808904544916904,
      "loss": 3.131,
      "step": 65503
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7898362874984741,
      "learning_rate": 0.00048808585868477403,
      "loss": 3.1795,
      "step": 65504
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7244054079055786,
      "learning_rate": 0.00048808267188541035,
      "loss": 2.8292,
      "step": 65505
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.712571382522583,
      "learning_rate": 0.0004880794850510785,
      "loss": 3.0266,
      "step": 65506
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6411852836608887,
      "learning_rate": 0.0004880762981817791,
      "loss": 2.8771,
      "step": 65507
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6646194458007812,
      "learning_rate": 0.00048807311127751276,
      "loss": 3.0131,
      "step": 65508
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5391044616699219,
      "learning_rate": 0.0004880699243382801,
      "loss": 2.7548,
      "step": 65509
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.919893503189087,
      "learning_rate": 0.00048806673736408174,
      "loss": 2.8055,
      "step": 65510
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0210180282592773,
      "learning_rate": 0.0004880635503549182,
      "loss": 3.0538,
      "step": 65511
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6820908784866333,
      "learning_rate": 0.0004880603633107901,
      "loss": 2.9611,
      "step": 65512
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.823197841644287,
      "learning_rate": 0.000488057176231698,
      "loss": 3.1434,
      "step": 65513
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4539765119552612,
      "learning_rate": 0.00048805398911764257,
      "loss": 3.167,
      "step": 65514
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3625340461730957,
      "learning_rate": 0.0004880508019686243,
      "loss": 3.009,
      "step": 65515
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4743072986602783,
      "learning_rate": 0.0004880476147846438,
      "loss": 2.8779,
      "step": 65516
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3787165880203247,
      "learning_rate": 0.00048804442756570184,
      "loss": 3.1884,
      "step": 65517
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.4017772674560547,
      "learning_rate": 0.00048804124031179877,
      "loss": 2.9904,
      "step": 65518
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6487419605255127,
      "learning_rate": 0.00048803805302293526,
      "loss": 3.0636,
      "step": 65519
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7311642169952393,
      "learning_rate": 0.000488034865699112,
      "loss": 2.9772,
      "step": 65520
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.763695240020752,
      "learning_rate": 0.0004880316783403295,
      "loss": 3.0286,
      "step": 65521
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5332748889923096,
      "learning_rate": 0.0004880284909465883,
      "loss": 3.0133,
      "step": 65522
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6663365364074707,
      "learning_rate": 0.000488025303517889,
      "loss": 2.8667,
      "step": 65523
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.696595549583435,
      "learning_rate": 0.0004880221160542324,
      "loss": 3.1208,
      "step": 65524
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7575074434280396,
      "learning_rate": 0.0004880189285556188,
      "loss": 3.0114,
      "step": 65525
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.041254758834839,
      "learning_rate": 0.00048801574102204907,
      "loss": 3.2936,
      "step": 65526
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9176987409591675,
      "learning_rate": 0.0004880125534535236,
      "loss": 3.1321,
      "step": 65527
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6063129901885986,
      "learning_rate": 0.00048800936585004305,
      "loss": 2.9154,
      "step": 65528
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.819083571434021,
      "learning_rate": 0.00048800617821160794,
      "loss": 3.1966,
      "step": 65529
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9164427518844604,
      "learning_rate": 0.000488002990538219,
      "loss": 2.7921,
      "step": 65530
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.416439175605774,
      "learning_rate": 0.0004879998028298768,
      "loss": 3.0287,
      "step": 65531
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6067694425582886,
      "learning_rate": 0.0004879966150865818,
      "loss": 3.0998,
      "step": 65532
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4428375959396362,
      "learning_rate": 0.00048799342730833475,
      "loss": 3.0952,
      "step": 65533
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3773170709609985,
      "learning_rate": 0.0004879902394951362,
      "loss": 3.0632,
      "step": 65534
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7027974128723145,
      "learning_rate": 0.00048798705164698666,
      "loss": 3.0391,
      "step": 65535
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.074523687362671,
      "learning_rate": 0.00048798386376388674,
      "loss": 3.0572,
      "step": 65536
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7007801532745361,
      "learning_rate": 0.0004879806758458371,
      "loss": 2.9906,
      "step": 65537
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.663123369216919,
      "learning_rate": 0.00048797748789283833,
      "loss": 3.0519,
      "step": 65538
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4172720909118652,
      "learning_rate": 0.000487974299904891,
      "loss": 3.2406,
      "step": 65539
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6909809112548828,
      "learning_rate": 0.00048797111188199566,
      "loss": 3.0154,
      "step": 65540
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6732878684997559,
      "learning_rate": 0.000487967923824153,
      "loss": 2.9757,
      "step": 65541
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7607784271240234,
      "learning_rate": 0.0004879647357313635,
      "loss": 2.8338,
      "step": 65542
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.893373727798462,
      "learning_rate": 0.00048796154760362783,
      "loss": 2.9948,
      "step": 65543
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0307886600494385,
      "learning_rate": 0.00048795835944094655,
      "loss": 2.9409,
      "step": 65544
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4012608528137207,
      "learning_rate": 0.0004879551712433203,
      "loss": 2.8291,
      "step": 65545
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6914640665054321,
      "learning_rate": 0.0004879519830107496,
      "loss": 2.867,
      "step": 65546
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6316651105880737,
      "learning_rate": 0.0004879487947432351,
      "loss": 3.0901,
      "step": 65547
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4065618515014648,
      "learning_rate": 0.00048794560644077725,
      "loss": 2.8551,
      "step": 65548
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.32865309715271,
      "learning_rate": 0.0004879424181033769,
      "loss": 3.0654,
      "step": 65549
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8717174530029297,
      "learning_rate": 0.0004879392297310345,
      "loss": 3.025,
      "step": 65550
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3423668146133423,
      "learning_rate": 0.00048793604132375064,
      "loss": 3.3753,
      "step": 65551
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9675867557525635,
      "learning_rate": 0.00048793285288152593,
      "loss": 2.9261,
      "step": 65552
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0389647483825684,
      "learning_rate": 0.00048792966440436097,
      "loss": 2.9654,
      "step": 65553
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9427515268325806,
      "learning_rate": 0.00048792647589225623,
      "loss": 3.1786,
      "step": 65554
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.016890048980713,
      "learning_rate": 0.0004879232873452125,
      "loss": 2.8156,
      "step": 65555
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.522812008857727,
      "learning_rate": 0.0004879200987632303,
      "loss": 2.8341,
      "step": 65556
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3707544803619385,
      "learning_rate": 0.0004879169101463101,
      "loss": 3.2508,
      "step": 65557
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5085424184799194,
      "learning_rate": 0.0004879137214944527,
      "loss": 2.8859,
      "step": 65558
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7236881256103516,
      "learning_rate": 0.0004879105328076585,
      "loss": 2.9753,
      "step": 65559
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4392595291137695,
      "learning_rate": 0.0004879073440859283,
      "loss": 3.0939,
      "step": 65560
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4923310279846191,
      "learning_rate": 0.0004879041553292625,
      "loss": 2.9,
      "step": 65561
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.2226674556732178,
      "learning_rate": 0.0004879009665376618,
      "loss": 2.7745,
      "step": 65562
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.968607783317566,
      "learning_rate": 0.0004878977777111268,
      "loss": 2.844,
      "step": 65563
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.512345552444458,
      "learning_rate": 0.000487894588849658,
      "loss": 3.2182,
      "step": 65564
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.585490345954895,
      "learning_rate": 0.000487891399953256,
      "loss": 2.9618,
      "step": 65565
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6056605577468872,
      "learning_rate": 0.0004878882110219216,
      "loss": 2.9722,
      "step": 65566
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.583554983139038,
      "learning_rate": 0.0004878850220556551,
      "loss": 3.0044,
      "step": 65567
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.481994867324829,
      "learning_rate": 0.0004878818330544573,
      "loss": 3.0462,
      "step": 65568
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.239591360092163,
      "learning_rate": 0.0004878786440183287,
      "loss": 2.9623,
      "step": 65569
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6397203207015991,
      "learning_rate": 0.0004878754549472699,
      "loss": 2.866,
      "step": 65570
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5131869316101074,
      "learning_rate": 0.0004878722658412815,
      "loss": 3.0645,
      "step": 65571
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4883860349655151,
      "learning_rate": 0.00048786907670036404,
      "loss": 3.1735,
      "step": 65572
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6377363204956055,
      "learning_rate": 0.0004878658875245183,
      "loss": 3.149,
      "step": 65573
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4373505115509033,
      "learning_rate": 0.00048786269831374465,
      "loss": 2.7844,
      "step": 65574
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6153271198272705,
      "learning_rate": 0.00048785950906804384,
      "loss": 2.8788,
      "step": 65575
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8929710388183594,
      "learning_rate": 0.0004878563197874164,
      "loss": 3.1295,
      "step": 65576
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.278170108795166,
      "learning_rate": 0.0004878531304718629,
      "loss": 3.1823,
      "step": 65577
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7073805332183838,
      "learning_rate": 0.00048784994112138395,
      "loss": 2.8461,
      "step": 65578
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3343232870101929,
      "learning_rate": 0.0004878467517359801,
      "loss": 3.0828,
      "step": 65579
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6557894945144653,
      "learning_rate": 0.0004878435623156521,
      "loss": 2.8285,
      "step": 65580
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4987822771072388,
      "learning_rate": 0.00048784037286040034,
      "loss": 2.8545,
      "step": 65581
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7540247440338135,
      "learning_rate": 0.00048783718337022555,
      "loss": 3.1638,
      "step": 65582
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.040957450866699,
      "learning_rate": 0.00048783399384512834,
      "loss": 2.9943,
      "step": 65583
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4269992113113403,
      "learning_rate": 0.0004878308042851092,
      "loss": 3.152,
      "step": 65584
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.2163302898406982,
      "learning_rate": 0.00048782761469016875,
      "loss": 2.8556,
      "step": 65585
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5373772382736206,
      "learning_rate": 0.0004878244250603076,
      "loss": 2.9058,
      "step": 65586
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.811721920967102,
      "learning_rate": 0.00048782123539552636,
      "loss": 3.0629,
      "step": 65587
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7092231512069702,
      "learning_rate": 0.00048781804569582554,
      "loss": 2.9689,
      "step": 65588
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.617600440979004,
      "learning_rate": 0.00048781485596120593,
      "loss": 3.1047,
      "step": 65589
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7165383100509644,
      "learning_rate": 0.00048781166619166796,
      "loss": 3.0597,
      "step": 65590
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5735745429992676,
      "learning_rate": 0.0004878084763872122,
      "loss": 3.1336,
      "step": 65591
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.563421368598938,
      "learning_rate": 0.0004878052865478393,
      "loss": 2.8394,
      "step": 65592
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9811863899230957,
      "learning_rate": 0.0004878020966735499,
      "loss": 3.1832,
      "step": 65593
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.128373384475708,
      "learning_rate": 0.00048779890676434444,
      "loss": 2.842,
      "step": 65594
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8579983711242676,
      "learning_rate": 0.00048779571682022374,
      "loss": 3.0212,
      "step": 65595
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.923430323600769,
      "learning_rate": 0.00048779252684118826,
      "loss": 2.9516,
      "step": 65596
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1319141387939453,
      "learning_rate": 0.00048778933682723855,
      "loss": 2.8942,
      "step": 65597
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7627884149551392,
      "learning_rate": 0.0004877861467783753,
      "loss": 3.2079,
      "step": 65598
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.477486252784729,
      "learning_rate": 0.00048778295669459906,
      "loss": 2.978,
      "step": 65599
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3978161811828613,
      "learning_rate": 0.0004877797665759104,
      "loss": 2.9912,
      "step": 65600
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1270828247070312,
      "learning_rate": 0.00048777657642230994,
      "loss": 2.9544,
      "step": 65601
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.649492621421814,
      "learning_rate": 0.0004877733862337983,
      "loss": 2.9773,
      "step": 65602
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9826091527938843,
      "learning_rate": 0.0004877701960103759,
      "loss": 3.1427,
      "step": 65603
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5128191709518433,
      "learning_rate": 0.0004877670057520436,
      "loss": 2.9688,
      "step": 65604
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.102520227432251,
      "learning_rate": 0.0004877638154588019,
      "loss": 3.0376,
      "step": 65605
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8907610177993774,
      "learning_rate": 0.0004877606251306513,
      "loss": 3.1345,
      "step": 65606
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1214473247528076,
      "learning_rate": 0.0004877574347675925,
      "loss": 3.0182,
      "step": 65607
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.464654803276062,
      "learning_rate": 0.000487754244369626,
      "loss": 3.0612,
      "step": 65608
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5374640226364136,
      "learning_rate": 0.00048775105393675255,
      "loss": 2.8966,
      "step": 65609
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4679421186447144,
      "learning_rate": 0.00048774786346897245,
      "loss": 2.9508,
      "step": 65610
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9382911920547485,
      "learning_rate": 0.00048774467296628667,
      "loss": 2.7089,
      "step": 65611
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5878663063049316,
      "learning_rate": 0.00048774148242869544,
      "loss": 3.0867,
      "step": 65612
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5889604091644287,
      "learning_rate": 0.0004877382918561997,
      "loss": 3.162,
      "step": 65613
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9285900592803955,
      "learning_rate": 0.0004877351012487997,
      "loss": 2.8794,
      "step": 65614
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8361011743545532,
      "learning_rate": 0.0004877319106064963,
      "loss": 2.9796,
      "step": 65615
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9497206211090088,
      "learning_rate": 0.0004877287199292899,
      "loss": 3.0835,
      "step": 65616
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8588882684707642,
      "learning_rate": 0.0004877255292171813,
      "loss": 3.3054,
      "step": 65617
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5722253322601318,
      "learning_rate": 0.0004877223384701709,
      "loss": 2.8899,
      "step": 65618
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3766920566558838,
      "learning_rate": 0.0004877191476882594,
      "loss": 2.8623,
      "step": 65619
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.370570182800293,
      "learning_rate": 0.0004877159568714474,
      "loss": 2.9088,
      "step": 65620
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.0653605461120605,
      "learning_rate": 0.00048771276601973544,
      "loss": 2.8569,
      "step": 65621
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6610826253890991,
      "learning_rate": 0.00048770957513312414,
      "loss": 3.0687,
      "step": 65622
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6457868814468384,
      "learning_rate": 0.0004877063842116141,
      "loss": 2.9853,
      "step": 65623
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.792140245437622,
      "learning_rate": 0.00048770319325520587,
      "loss": 3.0696,
      "step": 65624
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6032074689865112,
      "learning_rate": 0.0004877000022639001,
      "loss": 3.2945,
      "step": 65625
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8911594152450562,
      "learning_rate": 0.0004876968112376973,
      "loss": 2.8837,
      "step": 65626
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.664733648300171,
      "learning_rate": 0.00048769362017659815,
      "loss": 2.9117,
      "step": 65627
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7678468227386475,
      "learning_rate": 0.0004876904290806032,
      "loss": 2.9017,
      "step": 65628
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6197792291641235,
      "learning_rate": 0.0004876872379497131,
      "loss": 3.075,
      "step": 65629
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5031661987304688,
      "learning_rate": 0.0004876840467839284,
      "loss": 2.8343,
      "step": 65630
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7061731815338135,
      "learning_rate": 0.00048768085558324964,
      "loss": 2.7433,
      "step": 65631
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3626956939697266,
      "learning_rate": 0.00048767766434767746,
      "loss": 3.1353,
      "step": 65632
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6390407085418701,
      "learning_rate": 0.0004876744730772125,
      "loss": 3.194,
      "step": 65633
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.836178183555603,
      "learning_rate": 0.0004876712817718553,
      "loss": 2.8065,
      "step": 65634
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3627866506576538,
      "learning_rate": 0.00048766809043160645,
      "loss": 2.9409,
      "step": 65635
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6304670572280884,
      "learning_rate": 0.0004876648990564667,
      "loss": 3.017,
      "step": 65636
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.3776142597198486,
      "learning_rate": 0.0004876617076464363,
      "loss": 3.1864,
      "step": 65637
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9834004640579224,
      "learning_rate": 0.00048765851620151613,
      "loss": 2.9876,
      "step": 65638
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6440573930740356,
      "learning_rate": 0.00048765532472170675,
      "loss": 2.9793,
      "step": 65639
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5251471996307373,
      "learning_rate": 0.00048765213320700867,
      "loss": 3.0869,
      "step": 65640
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.484932780265808,
      "learning_rate": 0.0004876489416574224,
      "loss": 2.9548,
      "step": 65641
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7691402435302734,
      "learning_rate": 0.0004876457500729488,
      "loss": 3.0884,
      "step": 65642
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.9959536790847778,
      "learning_rate": 0.00048764255845358827,
      "loss": 3.4543,
      "step": 65643
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6957685947418213,
      "learning_rate": 0.00048763936679934144,
      "loss": 3.1434,
      "step": 65644
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8674969673156738,
      "learning_rate": 0.0004876361751102089,
      "loss": 3.2684,
      "step": 65645
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5758322477340698,
      "learning_rate": 0.0004876329833861913,
      "loss": 3.3839,
      "step": 65646
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.1320834159851074,
      "learning_rate": 0.0004876297916272891,
      "loss": 3.1342,
      "step": 65647
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5679835081100464,
      "learning_rate": 0.00048762659983350314,
      "loss": 3.0968,
      "step": 65648
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.670467734336853,
      "learning_rate": 0.00048762340800483366,
      "loss": 3.293,
      "step": 65649
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.56881844997406,
      "learning_rate": 0.0004876202161412815,
      "loss": 2.9797,
      "step": 65650
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8927079439163208,
      "learning_rate": 0.00048761702424284724,
      "loss": 3.3175,
      "step": 65651
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.5303765535354614,
      "learning_rate": 0.0004876138323095315,
      "loss": 3.1652,
      "step": 65652
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6363463401794434,
      "learning_rate": 0.0004876106403413346,
      "loss": 2.9273,
      "step": 65653
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.633967876434326,
      "learning_rate": 0.00048760744833825756,
      "loss": 3.3682,
      "step": 65654
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.955073595046997,
      "learning_rate": 0.0004876042563003007,
      "loss": 3.054,
      "step": 65655
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.8402413129806519,
      "learning_rate": 0.00048760106422746454,
      "loss": 2.9076,
      "step": 65656
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.7628145217895508,
      "learning_rate": 0.00048759787211974994,
      "loss": 2.9328,
      "step": 65657
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.300549268722534,
      "learning_rate": 0.0004875946799771573,
      "loss": 3.2821,
      "step": 65658
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.6000984907150269,
      "learning_rate": 0.00048759148779968725,
      "loss": 2.8837,
      "step": 65659
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.083850622177124,
      "learning_rate": 0.0004875882955873405,
      "loss": 3.2529,
      "step": 65660
    },
    {
      "epoch": 0.85,
      "grad_norm": 2.3181400299072266,
      "learning_rate": 0.00048758510334011746,
      "loss": 2.7988,
      "step": 65661
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.97074556350708,
      "learning_rate": 0.0004875819110580188,
      "loss": 2.9619,
      "step": 65662
    },
    {
      "epoch": 0.85,
      "grad_norm": 1.4879274368286133,
      "learning_rate": 0.00048757871874104515,
      "loss": 3.2504,
      "step": 65663
    },
    {
      "epoch": 0.85,
      "grad_norm": 3.6339914798736572,
      "learning_rate": 0.000487575526389197,
      "loss": 3.0203,
      "step": 65664
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.882063865661621,
      "learning_rate": 0.0004875723340024751,
      "loss": 2.9374,
      "step": 65665
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6739221811294556,
      "learning_rate": 0.00048756914158088,
      "loss": 2.8665,
      "step": 65666
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.100922107696533,
      "learning_rate": 0.0004875659491244122,
      "loss": 2.9631,
      "step": 65667
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1554243564605713,
      "learning_rate": 0.0004875627566330723,
      "loss": 3.1045,
      "step": 65668
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.5002994537353516,
      "learning_rate": 0.00048755956410686103,
      "loss": 3.0737,
      "step": 65669
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5614268779754639,
      "learning_rate": 0.0004875563715457789,
      "loss": 3.0832,
      "step": 65670
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.283590078353882,
      "learning_rate": 0.0004875531789498264,
      "loss": 3.3779,
      "step": 65671
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9749181270599365,
      "learning_rate": 0.0004875499863190043,
      "loss": 3.1344,
      "step": 65672
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3786486387252808,
      "learning_rate": 0.00048754679365331304,
      "loss": 2.8475,
      "step": 65673
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4767934083938599,
      "learning_rate": 0.0004875436009527534,
      "loss": 3.0673,
      "step": 65674
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.840618371963501,
      "learning_rate": 0.0004875404082173259,
      "loss": 3.0058,
      "step": 65675
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8534499406814575,
      "learning_rate": 0.00048753721544703095,
      "loss": 2.9772,
      "step": 65676
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.46238374710083,
      "learning_rate": 0.00048753402264186936,
      "loss": 3.0503,
      "step": 65677
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7444676160812378,
      "learning_rate": 0.0004875308298018417,
      "loss": 2.8601,
      "step": 65678
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6606121063232422,
      "learning_rate": 0.00048752763692694844,
      "loss": 2.702,
      "step": 65679
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7044998407363892,
      "learning_rate": 0.0004875244440171903,
      "loss": 2.9761,
      "step": 65680
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.396643042564392,
      "learning_rate": 0.0004875212510725678,
      "loss": 3.1003,
      "step": 65681
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8796294927597046,
      "learning_rate": 0.00048751805809308155,
      "loss": 3.212,
      "step": 65682
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6062612533569336,
      "learning_rate": 0.00048751486507873215,
      "loss": 3.2601,
      "step": 65683
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5239430665969849,
      "learning_rate": 0.00048751167202952035,
      "loss": 2.8018,
      "step": 65684
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.5725011825561523,
      "learning_rate": 0.0004875084789454464,
      "loss": 3.2146,
      "step": 65685
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.056013584136963,
      "learning_rate": 0.0004875052858265111,
      "loss": 2.6244,
      "step": 65686
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8349369764328003,
      "learning_rate": 0.00048750209267271505,
      "loss": 3.0392,
      "step": 65687
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7863479852676392,
      "learning_rate": 0.0004874988994840589,
      "loss": 3.0155,
      "step": 65688
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4847832918167114,
      "learning_rate": 0.00048749570626054306,
      "loss": 3.2743,
      "step": 65689
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5761545896530151,
      "learning_rate": 0.0004874925130021684,
      "loss": 2.9388,
      "step": 65690
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.3651039600372314,
      "learning_rate": 0.00048748931970893513,
      "loss": 3.0463,
      "step": 65691
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5165996551513672,
      "learning_rate": 0.0004874861263808442,
      "loss": 3.1315,
      "step": 65692
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7779852151870728,
      "learning_rate": 0.000487482933017896,
      "loss": 2.9718,
      "step": 65693
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.888528823852539,
      "learning_rate": 0.0004874797396200911,
      "loss": 3.0385,
      "step": 65694
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.787799596786499,
      "learning_rate": 0.0004874765461874303,
      "loss": 3.0455,
      "step": 65695
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.253030776977539,
      "learning_rate": 0.0004874733527199141,
      "loss": 2.9209,
      "step": 65696
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5899090766906738,
      "learning_rate": 0.00048747015921754295,
      "loss": 3.0048,
      "step": 65697
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.476812481880188,
      "learning_rate": 0.00048746696568031765,
      "loss": 2.9767,
      "step": 65698
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6520050764083862,
      "learning_rate": 0.00048746377210823866,
      "loss": 3.1387,
      "step": 65699
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5387018918991089,
      "learning_rate": 0.0004874605785013067,
      "loss": 2.906,
      "step": 65700
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5496633052825928,
      "learning_rate": 0.00048745738485952213,
      "loss": 2.7867,
      "step": 65701
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9879553318023682,
      "learning_rate": 0.0004874541911828857,
      "loss": 2.9249,
      "step": 65702
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.82942795753479,
      "learning_rate": 0.0004874509974713981,
      "loss": 2.9237,
      "step": 65703
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0721547603607178,
      "learning_rate": 0.0004874478037250598,
      "loss": 3.1732,
      "step": 65704
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7499521970748901,
      "learning_rate": 0.0004874446099438714,
      "loss": 2.9927,
      "step": 65705
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.667553186416626,
      "learning_rate": 0.0004874414161278335,
      "loss": 3.1209,
      "step": 65706
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4457027912139893,
      "learning_rate": 0.0004874382222769467,
      "loss": 2.8546,
      "step": 65707
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.173786163330078,
      "learning_rate": 0.00048743502839121157,
      "loss": 2.6887,
      "step": 65708
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6178455352783203,
      "learning_rate": 0.00048743183447062886,
      "loss": 3.3154,
      "step": 65709
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7051568031311035,
      "learning_rate": 0.0004874286405151989,
      "loss": 2.9487,
      "step": 65710
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.8706047534942627,
      "learning_rate": 0.0004874254465249225,
      "loss": 3.1195,
      "step": 65711
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6042269468307495,
      "learning_rate": 0.0004874222524998001,
      "loss": 3.0365,
      "step": 65712
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5211198329925537,
      "learning_rate": 0.0004874190584398324,
      "loss": 2.8887,
      "step": 65713
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5140225887298584,
      "learning_rate": 0.00048741586434501987,
      "loss": 3.029,
      "step": 65714
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.726484775543213,
      "learning_rate": 0.0004874126702153634,
      "loss": 2.9599,
      "step": 65715
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5106027126312256,
      "learning_rate": 0.00048740947605086323,
      "loss": 3.0404,
      "step": 65716
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3655660152435303,
      "learning_rate": 0.0004874062818515201,
      "loss": 3.0661,
      "step": 65717
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.715165138244629,
      "learning_rate": 0.0004874030876173347,
      "loss": 3.1355,
      "step": 65718
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7267138957977295,
      "learning_rate": 0.0004873998933483074,
      "loss": 2.9462,
      "step": 65719
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4882166385650635,
      "learning_rate": 0.00048739669904443895,
      "loss": 3.1828,
      "step": 65720
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.531847596168518,
      "learning_rate": 0.00048739350470573,
      "loss": 2.9454,
      "step": 65721
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.478623628616333,
      "learning_rate": 0.000487390310332181,
      "loss": 2.9907,
      "step": 65722
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.4426307678222656,
      "learning_rate": 0.0004873871159237926,
      "loss": 3.1983,
      "step": 65723
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.793758749961853,
      "learning_rate": 0.00048738392148056544,
      "loss": 3.2048,
      "step": 65724
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5333458185195923,
      "learning_rate": 0.0004873807270025,
      "loss": 3.0521,
      "step": 65725
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5560847520828247,
      "learning_rate": 0.00048737753248959706,
      "loss": 3.066,
      "step": 65726
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0437116622924805,
      "learning_rate": 0.00048737433794185695,
      "loss": 3.1108,
      "step": 65727
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5894826650619507,
      "learning_rate": 0.0004873711433592805,
      "loss": 3.1181,
      "step": 65728
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4664583206176758,
      "learning_rate": 0.0004873679487418682,
      "loss": 2.941,
      "step": 65729
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0100929737091064,
      "learning_rate": 0.0004873647540896207,
      "loss": 3.0467,
      "step": 65730
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.700884222984314,
      "learning_rate": 0.0004873615594025385,
      "loss": 3.0117,
      "step": 65731
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9489085674285889,
      "learning_rate": 0.0004873583646806223,
      "loss": 3.1209,
      "step": 65732
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5882186889648438,
      "learning_rate": 0.00048735516992387263,
      "loss": 2.9488,
      "step": 65733
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.125295400619507,
      "learning_rate": 0.0004873519751322901,
      "loss": 2.8363,
      "step": 65734
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1774046421051025,
      "learning_rate": 0.0004873487803058753,
      "loss": 2.9774,
      "step": 65735
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.516491413116455,
      "learning_rate": 0.0004873455854446288,
      "loss": 3.0835,
      "step": 65736
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0316519737243652,
      "learning_rate": 0.0004873423905485512,
      "loss": 2.8475,
      "step": 65737
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.8054308891296387,
      "learning_rate": 0.0004873391956176431,
      "loss": 3.346,
      "step": 65738
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.053276538848877,
      "learning_rate": 0.0004873360006519053,
      "loss": 2.7416,
      "step": 65739
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.966647982597351,
      "learning_rate": 0.00048733280565133795,
      "loss": 3.0945,
      "step": 65740
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0379769802093506,
      "learning_rate": 0.000487329610615942,
      "loss": 2.8972,
      "step": 65741
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6625391244888306,
      "learning_rate": 0.000487326415545718,
      "loss": 3.1876,
      "step": 65742
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.607859492301941,
      "learning_rate": 0.0004873232204406664,
      "loss": 2.9513,
      "step": 65743
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.277775764465332,
      "learning_rate": 0.00048732002530078785,
      "loss": 3.1239,
      "step": 65744
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6297885179519653,
      "learning_rate": 0.000487316830126083,
      "loss": 3.1582,
      "step": 65745
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.479271411895752,
      "learning_rate": 0.00048731363491655245,
      "loss": 2.9919,
      "step": 65746
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7416576147079468,
      "learning_rate": 0.0004873104396721968,
      "loss": 3.018,
      "step": 65747
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.421877384185791,
      "learning_rate": 0.00048730724439301654,
      "loss": 3.0816,
      "step": 65748
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2756950855255127,
      "learning_rate": 0.0004873040490790123,
      "loss": 2.9352,
      "step": 65749
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3613404035568237,
      "learning_rate": 0.00048730085373018477,
      "loss": 3.2373,
      "step": 65750
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.441846489906311,
      "learning_rate": 0.00048729765834653444,
      "loss": 3.1032,
      "step": 65751
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.2955236434936523,
      "learning_rate": 0.000487294462928062,
      "loss": 2.9955,
      "step": 65752
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.53952157497406,
      "learning_rate": 0.00048729126747476786,
      "loss": 3.1745,
      "step": 65753
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9422481060028076,
      "learning_rate": 0.0004872880719866528,
      "loss": 2.7426,
      "step": 65754
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0458052158355713,
      "learning_rate": 0.00048728487646371735,
      "loss": 3.2849,
      "step": 65755
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.75849449634552,
      "learning_rate": 0.00048728168090596215,
      "loss": 3.3212,
      "step": 65756
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4677579402923584,
      "learning_rate": 0.00048727848531338774,
      "loss": 3.2569,
      "step": 65757
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9759317636489868,
      "learning_rate": 0.0004872752896859946,
      "loss": 3.1145,
      "step": 65758
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.020329713821411,
      "learning_rate": 0.00048727209402378357,
      "loss": 2.8682,
      "step": 65759
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6451259851455688,
      "learning_rate": 0.00048726889832675506,
      "loss": 3.0056,
      "step": 65760
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0363078117370605,
      "learning_rate": 0.0004872657025949098,
      "loss": 2.9219,
      "step": 65761
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.2321155071258545,
      "learning_rate": 0.0004872625068282483,
      "loss": 2.9385,
      "step": 65762
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6738756895065308,
      "learning_rate": 0.0004872593110267711,
      "loss": 2.9484,
      "step": 65763
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6240371465682983,
      "learning_rate": 0.000487256115190479,
      "loss": 3.0893,
      "step": 65764
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6133626699447632,
      "learning_rate": 0.0004872529193193723,
      "loss": 2.8165,
      "step": 65765
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.381711721420288,
      "learning_rate": 0.00048724972341345174,
      "loss": 3.3768,
      "step": 65766
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.48269522190094,
      "learning_rate": 0.00048724652747271805,
      "loss": 2.992,
      "step": 65767
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6069729328155518,
      "learning_rate": 0.00048724333149717156,
      "loss": 2.9547,
      "step": 65768
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.413710117340088,
      "learning_rate": 0.00048724013548681307,
      "loss": 3.0391,
      "step": 65769
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5689148902893066,
      "learning_rate": 0.0004872369394416432,
      "loss": 2.952,
      "step": 65770
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5160491466522217,
      "learning_rate": 0.0004872337433616623,
      "loss": 2.9236,
      "step": 65771
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7306275367736816,
      "learning_rate": 0.00048723054724687115,
      "loss": 3.0828,
      "step": 65772
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5600178241729736,
      "learning_rate": 0.00048722735109727036,
      "loss": 2.6467,
      "step": 65773
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.428518295288086,
      "learning_rate": 0.0004872241549128604,
      "loss": 3.105,
      "step": 65774
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5657933950424194,
      "learning_rate": 0.0004872209586936419,
      "loss": 2.9348,
      "step": 65775
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.817680597305298,
      "learning_rate": 0.0004872177624396157,
      "loss": 2.9969,
      "step": 65776
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6486653089523315,
      "learning_rate": 0.000487214566150782,
      "loss": 3.0522,
      "step": 65777
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5687148571014404,
      "learning_rate": 0.00048721136982714166,
      "loss": 3.0156,
      "step": 65778
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5863981246948242,
      "learning_rate": 0.0004872081734686951,
      "loss": 3.0418,
      "step": 65779
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8450679779052734,
      "learning_rate": 0.0004872049770754431,
      "loss": 2.9269,
      "step": 65780
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3106364011764526,
      "learning_rate": 0.0004872017806473861,
      "loss": 3.2377,
      "step": 65781
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8408453464508057,
      "learning_rate": 0.0004871985841845248,
      "loss": 3.0813,
      "step": 65782
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7432159185409546,
      "learning_rate": 0.0004871953876868597,
      "loss": 2.8334,
      "step": 65783
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5519366264343262,
      "learning_rate": 0.0004871921911543915,
      "loss": 3.0296,
      "step": 65784
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4639718532562256,
      "learning_rate": 0.0004871889945871207,
      "loss": 3.1873,
      "step": 65785
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.272139549255371,
      "learning_rate": 0.00048718579798504795,
      "loss": 3.1009,
      "step": 65786
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3951876163482666,
      "learning_rate": 0.0004871826013481738,
      "loss": 3.154,
      "step": 65787
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4419488906860352,
      "learning_rate": 0.0004871794046764989,
      "loss": 3.2673,
      "step": 65788
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5872538089752197,
      "learning_rate": 0.0004871762079700238,
      "loss": 3.1626,
      "step": 65789
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5634385347366333,
      "learning_rate": 0.0004871730112287491,
      "loss": 2.9484,
      "step": 65790
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.531546711921692,
      "learning_rate": 0.0004871698144526754,
      "loss": 3.1496,
      "step": 65791
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.858737826347351,
      "learning_rate": 0.00048716661764180335,
      "loss": 2.9121,
      "step": 65792
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5446083545684814,
      "learning_rate": 0.00048716342079613344,
      "loss": 3.0955,
      "step": 65793
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5163174867630005,
      "learning_rate": 0.00048716022391566633,
      "loss": 2.9393,
      "step": 65794
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4303125143051147,
      "learning_rate": 0.00048715702700040256,
      "loss": 2.9517,
      "step": 65795
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3092142343521118,
      "learning_rate": 0.0004871538300503429,
      "loss": 3.0225,
      "step": 65796
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7376512289047241,
      "learning_rate": 0.00048715063306548767,
      "loss": 3.428,
      "step": 65797
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.606740951538086,
      "learning_rate": 0.0004871474360458377,
      "loss": 2.7979,
      "step": 65798
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.352089285850525,
      "learning_rate": 0.0004871442389913934,
      "loss": 2.8572,
      "step": 65799
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3171660900115967,
      "learning_rate": 0.0004871410419021555,
      "loss": 2.9647,
      "step": 65800
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6655443906784058,
      "learning_rate": 0.0004871378447781246,
      "loss": 3.0824,
      "step": 65801
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5656193494796753,
      "learning_rate": 0.00048713464761930114,
      "loss": 3.129,
      "step": 65802
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1495351791381836,
      "learning_rate": 0.00048713145042568587,
      "loss": 2.8027,
      "step": 65803
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7740474939346313,
      "learning_rate": 0.0004871282531972793,
      "loss": 3.0568,
      "step": 65804
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5531021356582642,
      "learning_rate": 0.00048712505593408206,
      "loss": 3.1091,
      "step": 65805
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1197848320007324,
      "learning_rate": 0.0004871218586360948,
      "loss": 3.1834,
      "step": 65806
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0794477462768555,
      "learning_rate": 0.0004871186613033181,
      "loss": 3.1063,
      "step": 65807
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8488526344299316,
      "learning_rate": 0.0004871154639357523,
      "loss": 2.9897,
      "step": 65808
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.5233676433563232,
      "learning_rate": 0.00048711226653339843,
      "loss": 3.0212,
      "step": 65809
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1648268699645996,
      "learning_rate": 0.00048710906909625677,
      "loss": 2.9552,
      "step": 65810
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7846771478652954,
      "learning_rate": 0.00048710587162432794,
      "loss": 2.7998,
      "step": 65811
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.2337000370025635,
      "learning_rate": 0.0004871026741176127,
      "loss": 3.1396,
      "step": 65812
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.305612564086914,
      "learning_rate": 0.0004870994765761114,
      "loss": 3.1576,
      "step": 65813
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.5060691833496094,
      "learning_rate": 0.0004870962789998249,
      "loss": 3.1747,
      "step": 65814
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8593841791152954,
      "learning_rate": 0.00048709308138875367,
      "loss": 2.8335,
      "step": 65815
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7158643007278442,
      "learning_rate": 0.00048708988374289826,
      "loss": 3.1903,
      "step": 65816
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0758044719696045,
      "learning_rate": 0.0004870866860622593,
      "loss": 3.0108,
      "step": 65817
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.0587117671966553,
      "learning_rate": 0.0004870834883468374,
      "loss": 2.9638,
      "step": 65818
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.242555618286133,
      "learning_rate": 0.00048708029059663316,
      "loss": 2.9404,
      "step": 65819
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0325567722320557,
      "learning_rate": 0.00048707709281164713,
      "loss": 2.8619,
      "step": 65820
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.6954166889190674,
      "learning_rate": 0.00048707389499188,
      "loss": 3.0478,
      "step": 65821
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.568741798400879,
      "learning_rate": 0.0004870706971373323,
      "loss": 3.0941,
      "step": 65822
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4959547519683838,
      "learning_rate": 0.0004870674992480046,
      "loss": 3.0874,
      "step": 65823
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4380707740783691,
      "learning_rate": 0.00048706430132389747,
      "loss": 2.8514,
      "step": 65824
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8081306219100952,
      "learning_rate": 0.00048706110336501165,
      "loss": 3.0916,
      "step": 65825
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4595346450805664,
      "learning_rate": 0.00048705790537134755,
      "loss": 3.1767,
      "step": 65826
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5213338136672974,
      "learning_rate": 0.0004870547073429059,
      "loss": 3.0387,
      "step": 65827
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8269673585891724,
      "learning_rate": 0.0004870515092796872,
      "loss": 3.1085,
      "step": 65828
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9724111557006836,
      "learning_rate": 0.0004870483111816922,
      "loss": 3.1658,
      "step": 65829
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.484900712966919,
      "learning_rate": 0.00048704511304892127,
      "loss": 3.1135,
      "step": 65830
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6142542362213135,
      "learning_rate": 0.0004870419148813752,
      "loss": 3.0286,
      "step": 65831
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5372321605682373,
      "learning_rate": 0.0004870387166790545,
      "loss": 2.8725,
      "step": 65832
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7656886577606201,
      "learning_rate": 0.00048703551844195975,
      "loss": 3.1504,
      "step": 65833
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3472055196762085,
      "learning_rate": 0.0004870323201700916,
      "loss": 2.8709,
      "step": 65834
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3691445589065552,
      "learning_rate": 0.0004870291218634506,
      "loss": 3.2622,
      "step": 65835
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8102200031280518,
      "learning_rate": 0.0004870259235220373,
      "loss": 3.1563,
      "step": 65836
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3281668424606323,
      "learning_rate": 0.00048702272514585237,
      "loss": 3.0165,
      "step": 65837
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.463077187538147,
      "learning_rate": 0.0004870195267348964,
      "loss": 2.9856,
      "step": 65838
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0002362728118896,
      "learning_rate": 0.00048701632828917,
      "loss": 2.889,
      "step": 65839
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7183489799499512,
      "learning_rate": 0.0004870131298086737,
      "loss": 2.9877,
      "step": 65840
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.626429557800293,
      "learning_rate": 0.0004870099312934081,
      "loss": 3.0422,
      "step": 65841
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.253000497817993,
      "learning_rate": 0.0004870067327433739,
      "loss": 3.0511,
      "step": 65842
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5727177858352661,
      "learning_rate": 0.0004870035341585716,
      "loss": 2.9934,
      "step": 65843
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4941842555999756,
      "learning_rate": 0.0004870003355390018,
      "loss": 3.0412,
      "step": 65844
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4908496141433716,
      "learning_rate": 0.0004869971368846651,
      "loss": 3.1879,
      "step": 65845
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.506662130355835,
      "learning_rate": 0.00048699393819556216,
      "loss": 3.0138,
      "step": 65846
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4531172513961792,
      "learning_rate": 0.00048699073947169343,
      "loss": 3.1442,
      "step": 65847
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7300227880477905,
      "learning_rate": 0.0004869875407130596,
      "loss": 2.9627,
      "step": 65848
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7614079713821411,
      "learning_rate": 0.0004869843419196613,
      "loss": 3.1797,
      "step": 65849
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6726444959640503,
      "learning_rate": 0.0004869811430914991,
      "loss": 3.1728,
      "step": 65850
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7708719968795776,
      "learning_rate": 0.00048697794422857346,
      "loss": 3.0503,
      "step": 65851
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4440233707427979,
      "learning_rate": 0.00048697474533088516,
      "loss": 3.059,
      "step": 65852
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0945041179656982,
      "learning_rate": 0.00048697154639843474,
      "loss": 2.8037,
      "step": 65853
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4800225496292114,
      "learning_rate": 0.0004869683474312228,
      "loss": 3.0574,
      "step": 65854
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.609354019165039,
      "learning_rate": 0.00048696514842924986,
      "loss": 3.1923,
      "step": 65855
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.047804832458496,
      "learning_rate": 0.00048696194939251664,
      "loss": 3.0479,
      "step": 65856
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5665514469146729,
      "learning_rate": 0.0004869587503210236,
      "loss": 3.2006,
      "step": 65857
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5060220956802368,
      "learning_rate": 0.00048695555121477143,
      "loss": 2.9349,
      "step": 65858
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8137578964233398,
      "learning_rate": 0.00048695235207376063,
      "loss": 3.0673,
      "step": 65859
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.755897045135498,
      "learning_rate": 0.00048694915289799193,
      "loss": 2.8457,
      "step": 65860
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6648001670837402,
      "learning_rate": 0.0004869459536874658,
      "loss": 3.0062,
      "step": 65861
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9014805555343628,
      "learning_rate": 0.000486942754442183,
      "loss": 3.1752,
      "step": 65862
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5830210447311401,
      "learning_rate": 0.00048693955516214394,
      "loss": 3.1033,
      "step": 65863
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.320723056793213,
      "learning_rate": 0.0004869363558473493,
      "loss": 2.9484,
      "step": 65864
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0897645950317383,
      "learning_rate": 0.00048693315649779963,
      "loss": 3.1998,
      "step": 65865
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.605266809463501,
      "learning_rate": 0.0004869299571134956,
      "loss": 3.0302,
      "step": 65866
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9562585353851318,
      "learning_rate": 0.0004869267576944378,
      "loss": 3.233,
      "step": 65867
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.902494192123413,
      "learning_rate": 0.0004869235582406267,
      "loss": 2.9028,
      "step": 65868
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.026431083679199,
      "learning_rate": 0.00048692035875206295,
      "loss": 3.1971,
      "step": 65869
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5671768188476562,
      "learning_rate": 0.0004869171592287473,
      "loss": 2.8172,
      "step": 65870
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.7038722038269043,
      "learning_rate": 0.0004869139596706801,
      "loss": 2.7847,
      "step": 65871
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8791136741638184,
      "learning_rate": 0.00048691076007786215,
      "loss": 3.0225,
      "step": 65872
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4127941131591797,
      "learning_rate": 0.000486907560450294,
      "loss": 2.9349,
      "step": 65873
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5922778844833374,
      "learning_rate": 0.00048690436078797607,
      "loss": 2.8619,
      "step": 65874
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1128618717193604,
      "learning_rate": 0.0004869011610909092,
      "loss": 2.9374,
      "step": 65875
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.533774733543396,
      "learning_rate": 0.0004868979613590938,
      "loss": 2.8346,
      "step": 65876
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6202573776245117,
      "learning_rate": 0.0004868947615925306,
      "loss": 3.0929,
      "step": 65877
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4750481843948364,
      "learning_rate": 0.0004868915617912201,
      "loss": 3.3246,
      "step": 65878
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7094610929489136,
      "learning_rate": 0.000486888361955163,
      "loss": 3.1759,
      "step": 65879
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.512917160987854,
      "learning_rate": 0.0004868851620843597,
      "loss": 3.1243,
      "step": 65880
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9290649890899658,
      "learning_rate": 0.00048688196217881106,
      "loss": 2.8715,
      "step": 65881
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.113584280014038,
      "learning_rate": 0.00048687876223851747,
      "loss": 2.9785,
      "step": 65882
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.452038049697876,
      "learning_rate": 0.0004868755622634795,
      "loss": 3.1432,
      "step": 65883
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.36749005317688,
      "learning_rate": 0.00048687236225369797,
      "loss": 3.1091,
      "step": 65884
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.4804391860961914,
      "learning_rate": 0.0004868691622091733,
      "loss": 2.9836,
      "step": 65885
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7235395908355713,
      "learning_rate": 0.0004868659621299061,
      "loss": 3.0197,
      "step": 65886
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8089873790740967,
      "learning_rate": 0.000486862762015897,
      "loss": 3.0133,
      "step": 65887
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7093863487243652,
      "learning_rate": 0.0004868595618671467,
      "loss": 2.9912,
      "step": 65888
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7732477188110352,
      "learning_rate": 0.0004868563616836555,
      "loss": 3.1469,
      "step": 65889
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0837624073028564,
      "learning_rate": 0.00048685316146542424,
      "loss": 3.0861,
      "step": 65890
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7773923873901367,
      "learning_rate": 0.00048684996121245346,
      "loss": 2.9023,
      "step": 65891
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5888292789459229,
      "learning_rate": 0.0004868467609247438,
      "loss": 3.0594,
      "step": 65892
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9931621551513672,
      "learning_rate": 0.0004868435606022957,
      "loss": 3.0016,
      "step": 65893
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0564796924591064,
      "learning_rate": 0.0004868403602451099,
      "loss": 3.1286,
      "step": 65894
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5687962770462036,
      "learning_rate": 0.00048683715985318695,
      "loss": 3.1196,
      "step": 65895
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5505709648132324,
      "learning_rate": 0.0004868339594265275,
      "loss": 2.9943,
      "step": 65896
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5187194347381592,
      "learning_rate": 0.00048683075896513195,
      "loss": 2.9884,
      "step": 65897
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7233270406723022,
      "learning_rate": 0.0004868275584690012,
      "loss": 2.9813,
      "step": 65898
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4422760009765625,
      "learning_rate": 0.0004868243579381356,
      "loss": 2.9436,
      "step": 65899
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.15797758102417,
      "learning_rate": 0.00048682115737253587,
      "loss": 2.8757,
      "step": 65900
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0471208095550537,
      "learning_rate": 0.0004868179567722025,
      "loss": 2.8031,
      "step": 65901
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5720415115356445,
      "learning_rate": 0.0004868147561371361,
      "loss": 3.1945,
      "step": 65902
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.2589147090911865,
      "learning_rate": 0.0004868115554673374,
      "loss": 2.9116,
      "step": 65903
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.641972780227661,
      "learning_rate": 0.00048680835476280695,
      "loss": 3.0243,
      "step": 65904
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.556154489517212,
      "learning_rate": 0.0004868051540235452,
      "loss": 2.9182,
      "step": 65905
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4757888317108154,
      "learning_rate": 0.0004868019532495528,
      "loss": 3.0412,
      "step": 65906
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.574188470840454,
      "learning_rate": 0.00048679875244083055,
      "loss": 2.9015,
      "step": 65907
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3244372606277466,
      "learning_rate": 0.00048679555159737877,
      "loss": 3.0521,
      "step": 65908
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7796663045883179,
      "learning_rate": 0.0004867923507191982,
      "loss": 3.0029,
      "step": 65909
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5082389116287231,
      "learning_rate": 0.00048678914980628945,
      "loss": 3.2267,
      "step": 65910
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5066934823989868,
      "learning_rate": 0.000486785948858653,
      "loss": 3.0155,
      "step": 65911
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6492199897766113,
      "learning_rate": 0.00048678274787628955,
      "loss": 2.908,
      "step": 65912
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.581599473953247,
      "learning_rate": 0.0004867795468591997,
      "loss": 2.9346,
      "step": 65913
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.714104413986206,
      "learning_rate": 0.00048677634580738394,
      "loss": 2.9652,
      "step": 65914
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.590147852897644,
      "learning_rate": 0.0004867731447208429,
      "loss": 3.0124,
      "step": 65915
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5352762937545776,
      "learning_rate": 0.0004867699435995774,
      "loss": 2.9204,
      "step": 65916
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6775026321411133,
      "learning_rate": 0.00048676674244358764,
      "loss": 2.9515,
      "step": 65917
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.995064616203308,
      "learning_rate": 0.00048676354125287447,
      "loss": 2.9676,
      "step": 65918
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7331188917160034,
      "learning_rate": 0.0004867603400274384,
      "loss": 3.058,
      "step": 65919
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5247825384140015,
      "learning_rate": 0.00048675713876728015,
      "loss": 3.1474,
      "step": 65920
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.838456153869629,
      "learning_rate": 0.0004867539374724001,
      "loss": 2.8017,
      "step": 65921
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1086885929107666,
      "learning_rate": 0.0004867507361427991,
      "loss": 2.9999,
      "step": 65922
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6409796476364136,
      "learning_rate": 0.00048674753477847757,
      "loss": 3.259,
      "step": 65923
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7337650060653687,
      "learning_rate": 0.00048674433337943615,
      "loss": 2.9122,
      "step": 65924
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6183241605758667,
      "learning_rate": 0.00048674113194567537,
      "loss": 3.1949,
      "step": 65925
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3892818689346313,
      "learning_rate": 0.000486737930477196,
      "loss": 2.9836,
      "step": 65926
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6916942596435547,
      "learning_rate": 0.00048673472897399836,
      "loss": 3.0097,
      "step": 65927
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7065765857696533,
      "learning_rate": 0.0004867315274360833,
      "loss": 3.1494,
      "step": 65928
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7963615655899048,
      "learning_rate": 0.00048672832586345137,
      "loss": 3.2095,
      "step": 65929
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.651442289352417,
      "learning_rate": 0.0004867251242561031,
      "loss": 2.9571,
      "step": 65930
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.691776990890503,
      "learning_rate": 0.00048672192261403906,
      "loss": 2.9385,
      "step": 65931
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.225398063659668,
      "learning_rate": 0.0004867187209372599,
      "loss": 3.0304,
      "step": 65932
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.060448408126831,
      "learning_rate": 0.00048671551922576626,
      "loss": 2.9698,
      "step": 65933
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.742398977279663,
      "learning_rate": 0.00048671231747955863,
      "loss": 3.0741,
      "step": 65934
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1153171062469482,
      "learning_rate": 0.00048670911569863773,
      "loss": 3.2408,
      "step": 65935
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.560140371322632,
      "learning_rate": 0.00048670591388300403,
      "loss": 2.9304,
      "step": 65936
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.157311201095581,
      "learning_rate": 0.0004867027120326581,
      "loss": 3.135,
      "step": 65937
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.841830849647522,
      "learning_rate": 0.00048669951014760075,
      "loss": 2.8183,
      "step": 65938
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.235771417617798,
      "learning_rate": 0.00048669630822783237,
      "loss": 2.6879,
      "step": 65939
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.070387601852417,
      "learning_rate": 0.00048669310627335357,
      "loss": 3.1161,
      "step": 65940
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6669962406158447,
      "learning_rate": 0.0004866899042841651,
      "loss": 2.904,
      "step": 65941
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7846276760101318,
      "learning_rate": 0.00048668670226026736,
      "loss": 3.1917,
      "step": 65942
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7312566041946411,
      "learning_rate": 0.0004866835002016611,
      "loss": 2.9158,
      "step": 65943
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5750067234039307,
      "learning_rate": 0.0004866802981083468,
      "loss": 3.2346,
      "step": 65944
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.491512656211853,
      "learning_rate": 0.0004866770959803251,
      "loss": 2.9167,
      "step": 65945
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1170647144317627,
      "learning_rate": 0.00048667389381759667,
      "loss": 3.2734,
      "step": 65946
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.471619725227356,
      "learning_rate": 0.000486670691620162,
      "loss": 3.0276,
      "step": 65947
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8074545860290527,
      "learning_rate": 0.00048666748938802173,
      "loss": 2.8929,
      "step": 65948
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.9131290912628174,
      "learning_rate": 0.0004866642871211765,
      "loss": 2.9876,
      "step": 65949
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.5231969356536865,
      "learning_rate": 0.0004866610848196268,
      "loss": 3.0263,
      "step": 65950
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.544250726699829,
      "learning_rate": 0.0004866578824833732,
      "loss": 3.2001,
      "step": 65951
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9211260080337524,
      "learning_rate": 0.0004866546801124165,
      "loss": 2.8882,
      "step": 65952
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1843175888061523,
      "learning_rate": 0.00048665147770675724,
      "loss": 2.819,
      "step": 65953
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.4214162826538086,
      "learning_rate": 0.0004866482752663958,
      "loss": 2.8702,
      "step": 65954
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.658772349357605,
      "learning_rate": 0.000486645072791333,
      "loss": 3.2187,
      "step": 65955
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7307177782058716,
      "learning_rate": 0.00048664187028156925,
      "loss": 2.8107,
      "step": 65956
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.532336711883545,
      "learning_rate": 0.00048663866773710537,
      "loss": 3.1869,
      "step": 65957
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.463944673538208,
      "learning_rate": 0.0004866354651579418,
      "loss": 2.9797,
      "step": 65958
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1175124645233154,
      "learning_rate": 0.00048663226254407917,
      "loss": 2.9244,
      "step": 65959
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.570670247077942,
      "learning_rate": 0.00048662905989551805,
      "loss": 3.2528,
      "step": 65960
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.816778302192688,
      "learning_rate": 0.00048662585721225915,
      "loss": 3.05,
      "step": 65961
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.867257833480835,
      "learning_rate": 0.0004866226544943029,
      "loss": 2.9728,
      "step": 65962
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8979989290237427,
      "learning_rate": 0.00048661945174165,
      "loss": 3.2826,
      "step": 65963
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.193640947341919,
      "learning_rate": 0.000486616248954301,
      "loss": 3.0522,
      "step": 65964
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6870352029800415,
      "learning_rate": 0.00048661304613225654,
      "loss": 3.1918,
      "step": 65965
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.054856538772583,
      "learning_rate": 0.0004866098432755172,
      "loss": 2.8212,
      "step": 65966
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.8388237953186035,
      "learning_rate": 0.0004866066403840836,
      "loss": 2.9536,
      "step": 65967
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.890773892402649,
      "learning_rate": 0.0004866034374579562,
      "loss": 3.2385,
      "step": 65968
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5644160509109497,
      "learning_rate": 0.00048660023449713575,
      "loss": 3.0315,
      "step": 65969
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9090571403503418,
      "learning_rate": 0.00048659703150162283,
      "loss": 2.8466,
      "step": 65970
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8107068538665771,
      "learning_rate": 0.000486593828471418,
      "loss": 2.9338,
      "step": 65971
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.456794023513794,
      "learning_rate": 0.00048659062540652183,
      "loss": 2.839,
      "step": 65972
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.437890887260437,
      "learning_rate": 0.00048658742230693494,
      "loss": 3.0401,
      "step": 65973
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.8579416275024414,
      "learning_rate": 0.0004865842191726579,
      "loss": 3.1252,
      "step": 65974
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.4004225730895996,
      "learning_rate": 0.00048658101600369136,
      "loss": 3.0781,
      "step": 65975
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7200875282287598,
      "learning_rate": 0.0004865778128000359,
      "loss": 3.2289,
      "step": 65976
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9457776546478271,
      "learning_rate": 0.00048657460956169206,
      "loss": 3.2744,
      "step": 65977
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.7568650245666504,
      "learning_rate": 0.0004865714062886605,
      "loss": 3.1309,
      "step": 65978
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.805375576019287,
      "learning_rate": 0.00048656820298094184,
      "loss": 3.1294,
      "step": 65979
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6156816482543945,
      "learning_rate": 0.0004865649996385366,
      "loss": 2.8678,
      "step": 65980
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.275559186935425,
      "learning_rate": 0.0004865617962614454,
      "loss": 3.0588,
      "step": 65981
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.8957085609436035,
      "learning_rate": 0.0004865585928496688,
      "loss": 3.0438,
      "step": 65982
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5747617483139038,
      "learning_rate": 0.0004865553894032075,
      "loss": 3.0885,
      "step": 65983
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5624268054962158,
      "learning_rate": 0.00048655218592206205,
      "loss": 2.9383,
      "step": 65984
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.7117855548858643,
      "learning_rate": 0.00048654898240623287,
      "loss": 3.111,
      "step": 65985
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.6788604259490967,
      "learning_rate": 0.00048654577885572087,
      "loss": 3.0669,
      "step": 65986
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.909095048904419,
      "learning_rate": 0.0004865425752705265,
      "loss": 2.8201,
      "step": 65987
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5657118558883667,
      "learning_rate": 0.00048653937165065017,
      "loss": 3.1439,
      "step": 65988
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.2386343479156494,
      "learning_rate": 0.0004865361679960928,
      "loss": 3.0242,
      "step": 65989
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.6894493103027344,
      "learning_rate": 0.00048653296430685477,
      "loss": 3.1313,
      "step": 65990
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.540877103805542,
      "learning_rate": 0.00048652976058293683,
      "loss": 2.8571,
      "step": 65991
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6060603857040405,
      "learning_rate": 0.0004865265568243394,
      "loss": 2.883,
      "step": 65992
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.6420931816101074,
      "learning_rate": 0.0004865233530310632,
      "loss": 2.9661,
      "step": 65993
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9976528882980347,
      "learning_rate": 0.00048652014920310874,
      "loss": 2.9165,
      "step": 65994
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.971995234489441,
      "learning_rate": 0.00048651694534047673,
      "loss": 3.0698,
      "step": 65995
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.368327021598816,
      "learning_rate": 0.0004865137414431677,
      "loss": 2.8618,
      "step": 65996
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9680302143096924,
      "learning_rate": 0.0004865105375111822,
      "loss": 2.9133,
      "step": 65997
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8219304084777832,
      "learning_rate": 0.0004865073335445209,
      "loss": 3.1037,
      "step": 65998
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4393149614334106,
      "learning_rate": 0.00048650412954318436,
      "loss": 2.9399,
      "step": 65999
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4825176000595093,
      "learning_rate": 0.0004865009255071731,
      "loss": 3.2237,
      "step": 66000
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1155810356140137,
      "learning_rate": 0.00048649772143648793,
      "loss": 3.1898,
      "step": 66001
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.3767361640930176,
      "learning_rate": 0.0004864945173311293,
      "loss": 3.2107,
      "step": 66002
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8592878580093384,
      "learning_rate": 0.0004864913131910977,
      "loss": 3.3752,
      "step": 66003
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1630403995513916,
      "learning_rate": 0.000486488109016394,
      "loss": 3.0158,
      "step": 66004
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.5988311767578125,
      "learning_rate": 0.00048648490480701845,
      "loss": 3.0019,
      "step": 66005
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8415826559066772,
      "learning_rate": 0.000486481700562972,
      "loss": 3.0268,
      "step": 66006
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.768704891204834,
      "learning_rate": 0.0004864784962842551,
      "loss": 2.8576,
      "step": 66007
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0279293060302734,
      "learning_rate": 0.0004864752919708682,
      "loss": 3.0599,
      "step": 66008
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9797667264938354,
      "learning_rate": 0.000486472087622812,
      "loss": 3.1914,
      "step": 66009
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7351864576339722,
      "learning_rate": 0.00048646888324008723,
      "loss": 3.231,
      "step": 66010
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5763393640518188,
      "learning_rate": 0.00048646567882269434,
      "loss": 3.1026,
      "step": 66011
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.866797924041748,
      "learning_rate": 0.0004864624743706339,
      "loss": 3.086,
      "step": 66012
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.5948069095611572,
      "learning_rate": 0.0004864592698839067,
      "loss": 2.9908,
      "step": 66013
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1969833374023438,
      "learning_rate": 0.0004864560653625131,
      "loss": 3.0689,
      "step": 66014
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.472715139389038,
      "learning_rate": 0.00048645286080645385,
      "loss": 3.1247,
      "step": 66015
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.430657982826233,
      "learning_rate": 0.0004864496562157295,
      "loss": 3.1241,
      "step": 66016
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.782554864883423,
      "learning_rate": 0.00048644645159034056,
      "loss": 2.8484,
      "step": 66017
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5870739221572876,
      "learning_rate": 0.00048644324693028777,
      "loss": 3.0375,
      "step": 66018
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4374563694000244,
      "learning_rate": 0.00048644004223557163,
      "loss": 2.9745,
      "step": 66019
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.391527771949768,
      "learning_rate": 0.0004864368375061928,
      "loss": 2.9789,
      "step": 66020
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5007225275039673,
      "learning_rate": 0.0004864336327421518,
      "loss": 3.2445,
      "step": 66021
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.523737907409668,
      "learning_rate": 0.00048643042794344933,
      "loss": 3.0083,
      "step": 66022
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0111446380615234,
      "learning_rate": 0.00048642722311008585,
      "loss": 3.1546,
      "step": 66023
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.503078818321228,
      "learning_rate": 0.0004864240182420621,
      "loss": 3.1137,
      "step": 66024
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5641006231307983,
      "learning_rate": 0.0004864208133393786,
      "loss": 2.9828,
      "step": 66025
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6596144437789917,
      "learning_rate": 0.0004864176084020359,
      "loss": 2.9507,
      "step": 66026
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.056903123855591,
      "learning_rate": 0.0004864144034300346,
      "loss": 2.981,
      "step": 66027
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.690922498703003,
      "learning_rate": 0.00048641119842337546,
      "loss": 2.765,
      "step": 66028
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5370776653289795,
      "learning_rate": 0.0004864079933820589,
      "loss": 3.092,
      "step": 66029
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4189008474349976,
      "learning_rate": 0.00048640478830608557,
      "loss": 3.1339,
      "step": 66030
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9169394969940186,
      "learning_rate": 0.00048640158319545614,
      "loss": 2.9863,
      "step": 66031
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5556296110153198,
      "learning_rate": 0.0004863983780501711,
      "loss": 3.3131,
      "step": 66032
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9110021591186523,
      "learning_rate": 0.00048639517287023113,
      "loss": 2.9707,
      "step": 66033
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7717666625976562,
      "learning_rate": 0.00048639196765563663,
      "loss": 3.085,
      "step": 66034
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7746834754943848,
      "learning_rate": 0.0004863887624063885,
      "loss": 3.0256,
      "step": 66035
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.703856110572815,
      "learning_rate": 0.00048638555712248707,
      "loss": 2.8724,
      "step": 66036
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.425534725189209,
      "learning_rate": 0.0004863823518039331,
      "loss": 3.1917,
      "step": 66037
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.562207579612732,
      "learning_rate": 0.00048637914645072714,
      "loss": 3.1395,
      "step": 66038
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7422398328781128,
      "learning_rate": 0.00048637594106286977,
      "loss": 2.9984,
      "step": 66039
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6994251012802124,
      "learning_rate": 0.0004863727356403616,
      "loss": 3.1662,
      "step": 66040
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5474653244018555,
      "learning_rate": 0.0004863695301832032,
      "loss": 3.0091,
      "step": 66041
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6777738332748413,
      "learning_rate": 0.0004863663246913952,
      "loss": 3.0716,
      "step": 66042
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3991881608963013,
      "learning_rate": 0.00048636311916493814,
      "loss": 3.0261,
      "step": 66043
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6164189577102661,
      "learning_rate": 0.00048635991360383273,
      "loss": 3.3217,
      "step": 66044
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7539299726486206,
      "learning_rate": 0.00048635670800807943,
      "loss": 3.0398,
      "step": 66045
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.5047593116760254,
      "learning_rate": 0.000486353502377679,
      "loss": 2.9739,
      "step": 66046
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.595341444015503,
      "learning_rate": 0.0004863502967126318,
      "loss": 3.1421,
      "step": 66047
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9309278726577759,
      "learning_rate": 0.00048634709101293865,
      "loss": 2.7271,
      "step": 66048
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.9777090549468994,
      "learning_rate": 0.0004863438852786,
      "loss": 3.0274,
      "step": 66049
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9415730237960815,
      "learning_rate": 0.0004863406795096166,
      "loss": 2.9983,
      "step": 66050
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5436859130859375,
      "learning_rate": 0.0004863374737059889,
      "loss": 3.144,
      "step": 66051
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7794593572616577,
      "learning_rate": 0.00048633426786771753,
      "loss": 3.0931,
      "step": 66052
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.4343574047088623,
      "learning_rate": 0.0004863310619948031,
      "loss": 2.9624,
      "step": 66053
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6113801002502441,
      "learning_rate": 0.0004863278560872462,
      "loss": 3.0561,
      "step": 66054
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4126907587051392,
      "learning_rate": 0.00048632465014504745,
      "loss": 3.1079,
      "step": 66055
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.26297926902771,
      "learning_rate": 0.00048632144416820743,
      "loss": 2.835,
      "step": 66056
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.016160726547241,
      "learning_rate": 0.00048631823815672683,
      "loss": 3.0333,
      "step": 66057
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4068950414657593,
      "learning_rate": 0.00048631503211060597,
      "loss": 3.1157,
      "step": 66058
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.9562339782714844,
      "learning_rate": 0.0004863118260298458,
      "loss": 3.1535,
      "step": 66059
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.713564872741699,
      "learning_rate": 0.00048630861991444656,
      "loss": 3.0129,
      "step": 66060
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8911375999450684,
      "learning_rate": 0.0004863054137644092,
      "loss": 3.2989,
      "step": 66061
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3613436222076416,
      "learning_rate": 0.00048630220757973403,
      "loss": 3.1454,
      "step": 66062
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.891566514968872,
      "learning_rate": 0.00048629900136042185,
      "loss": 3.2763,
      "step": 66063
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.941802740097046,
      "learning_rate": 0.0004862957951064731,
      "loss": 3.112,
      "step": 66064
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4281318187713623,
      "learning_rate": 0.0004862925888178885,
      "loss": 3.0827,
      "step": 66065
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.2495639324188232,
      "learning_rate": 0.00048628938249466863,
      "loss": 2.965,
      "step": 66066
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0339910984039307,
      "learning_rate": 0.00048628617613681395,
      "loss": 3.207,
      "step": 66067
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5523258447647095,
      "learning_rate": 0.0004862829697443252,
      "loss": 3.1249,
      "step": 66068
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.300793170928955,
      "learning_rate": 0.00048627976331720285,
      "loss": 2.8545,
      "step": 66069
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5018771886825562,
      "learning_rate": 0.00048627655685544766,
      "loss": 3.1205,
      "step": 66070
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6437546014785767,
      "learning_rate": 0.0004862733503590602,
      "loss": 2.9533,
      "step": 66071
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.241929531097412,
      "learning_rate": 0.0004862701438280409,
      "loss": 3.0659,
      "step": 66072
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.081773042678833,
      "learning_rate": 0.0004862669372623905,
      "loss": 2.9469,
      "step": 66073
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.496188998222351,
      "learning_rate": 0.0004862637306621096,
      "loss": 3.0595,
      "step": 66074
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.999085545539856,
      "learning_rate": 0.00048626052402719866,
      "loss": 3.0812,
      "step": 66075
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.134105682373047,
      "learning_rate": 0.0004862573173576585,
      "loss": 3.1078,
      "step": 66076
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5126533508300781,
      "learning_rate": 0.0004862541106534895,
      "loss": 2.8308,
      "step": 66077
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9074394702911377,
      "learning_rate": 0.0004862509039146923,
      "loss": 3.1768,
      "step": 66078
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.274793863296509,
      "learning_rate": 0.00048624769714126766,
      "loss": 3.2216,
      "step": 66079
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.006511688232422,
      "learning_rate": 0.000486244490333216,
      "loss": 3.0901,
      "step": 66080
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.773436188697815,
      "learning_rate": 0.00048624128349053795,
      "loss": 3.0842,
      "step": 66081
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4618605375289917,
      "learning_rate": 0.00048623807661323415,
      "loss": 2.8362,
      "step": 66082
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5820245742797852,
      "learning_rate": 0.0004862348697013052,
      "loss": 2.976,
      "step": 66083
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.4430062770843506,
      "learning_rate": 0.00048623166275475163,
      "loss": 3.0616,
      "step": 66084
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.01059627532959,
      "learning_rate": 0.0004862284557735741,
      "loss": 3.364,
      "step": 66085
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5972100496292114,
      "learning_rate": 0.00048622524875777324,
      "loss": 2.8557,
      "step": 66086
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.728893518447876,
      "learning_rate": 0.00048622204170734956,
      "loss": 2.9095,
      "step": 66087
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.3190224170684814,
      "learning_rate": 0.00048621883462230363,
      "loss": 3.0734,
      "step": 66088
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.2541491985321045,
      "learning_rate": 0.0004862156275026362,
      "loss": 3.0804,
      "step": 66089
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.011709213256836,
      "learning_rate": 0.00048621242034834763,
      "loss": 2.62,
      "step": 66090
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9274905920028687,
      "learning_rate": 0.0004862092131594388,
      "loss": 3.0264,
      "step": 66091
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.3624637126922607,
      "learning_rate": 0.00048620600593591006,
      "loss": 3.1592,
      "step": 66092
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6098499298095703,
      "learning_rate": 0.0004862027986777622,
      "loss": 3.0179,
      "step": 66093
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4879074096679688,
      "learning_rate": 0.0004861995913849956,
      "loss": 3.005,
      "step": 66094
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4019644260406494,
      "learning_rate": 0.00048619638405761106,
      "loss": 3.0981,
      "step": 66095
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.728242039680481,
      "learning_rate": 0.00048619317669560915,
      "loss": 2.9102,
      "step": 66096
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9797312021255493,
      "learning_rate": 0.0004861899692989903,
      "loss": 2.9487,
      "step": 66097
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.68979012966156,
      "learning_rate": 0.0004861867618677553,
      "loss": 3.0979,
      "step": 66098
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6523785591125488,
      "learning_rate": 0.00048618355440190465,
      "loss": 3.2112,
      "step": 66099
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.263755202293396,
      "learning_rate": 0.0004861803469014389,
      "loss": 3.2752,
      "step": 66100
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6390560865402222,
      "learning_rate": 0.0004861771393663588,
      "loss": 3.0305,
      "step": 66101
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9832972288131714,
      "learning_rate": 0.0004861739317966648,
      "loss": 3.2147,
      "step": 66102
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.523206353187561,
      "learning_rate": 0.0004861707241923576,
      "loss": 2.9234,
      "step": 66103
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9190322160720825,
      "learning_rate": 0.00048616751655343763,
      "loss": 2.8355,
      "step": 66104
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6120244264602661,
      "learning_rate": 0.0004861643088799058,
      "loss": 2.8259,
      "step": 66105
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.6934549808502197,
      "learning_rate": 0.00048616110117176236,
      "loss": 3.1957,
      "step": 66106
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7579046487808228,
      "learning_rate": 0.00048615789342900805,
      "loss": 3.007,
      "step": 66107
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.91366708278656,
      "learning_rate": 0.0004861546856516435,
      "loss": 3.073,
      "step": 66108
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4569364786148071,
      "learning_rate": 0.00048615147783966935,
      "loss": 3.0708,
      "step": 66109
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.712374448776245,
      "learning_rate": 0.00048614826999308604,
      "loss": 2.9343,
      "step": 66110
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.611877918243408,
      "learning_rate": 0.00048614506211189435,
      "loss": 3.1931,
      "step": 66111
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.202103614807129,
      "learning_rate": 0.0004861418541960947,
      "loss": 3.0955,
      "step": 66112
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.88167142868042,
      "learning_rate": 0.00048613864624568777,
      "loss": 3.3084,
      "step": 66113
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9246021509170532,
      "learning_rate": 0.00048613543826067423,
      "loss": 2.939,
      "step": 66114
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.008685350418091,
      "learning_rate": 0.00048613223024105453,
      "loss": 3.1525,
      "step": 66115
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0400586128234863,
      "learning_rate": 0.0004861290221868293,
      "loss": 3.077,
      "step": 66116
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.1713287830352783,
      "learning_rate": 0.0004861258140979993,
      "loss": 2.7225,
      "step": 66117
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.809464454650879,
      "learning_rate": 0.00048612260597456487,
      "loss": 3.0561,
      "step": 66118
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6303178071975708,
      "learning_rate": 0.00048611939781652676,
      "loss": 3.2159,
      "step": 66119
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6571420431137085,
      "learning_rate": 0.00048611618962388564,
      "loss": 3.055,
      "step": 66120
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5974171161651611,
      "learning_rate": 0.0004861129813966419,
      "loss": 2.6117,
      "step": 66121
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5187712907791138,
      "learning_rate": 0.00048610977313479623,
      "loss": 2.7589,
      "step": 66122
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4197485446929932,
      "learning_rate": 0.0004861065648383493,
      "loss": 2.9876,
      "step": 66123
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9976718425750732,
      "learning_rate": 0.00048610335650730154,
      "loss": 3.0571,
      "step": 66124
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.2597298622131348,
      "learning_rate": 0.0004861001481416537,
      "loss": 3.2286,
      "step": 66125
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8995661735534668,
      "learning_rate": 0.00048609693974140653,
      "loss": 3.0166,
      "step": 66126
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5204520225524902,
      "learning_rate": 0.00048609373130656017,
      "loss": 2.9024,
      "step": 66127
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.361415386199951,
      "learning_rate": 0.0004860905228371155,
      "loss": 2.9825,
      "step": 66128
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.3594472408294678,
      "learning_rate": 0.00048608731433307325,
      "loss": 3.0634,
      "step": 66129
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7564795017242432,
      "learning_rate": 0.0004860841057944338,
      "loss": 3.3412,
      "step": 66130
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6451671123504639,
      "learning_rate": 0.00048608089722119774,
      "loss": 2.8989,
      "step": 66131
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4663450717926025,
      "learning_rate": 0.0004860776886133658,
      "loss": 3.2076,
      "step": 66132
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.8210527896881104,
      "learning_rate": 0.0004860744799709384,
      "loss": 3.101,
      "step": 66133
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.716858148574829,
      "learning_rate": 0.00048607127129391623,
      "loss": 3.0553,
      "step": 66134
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.845687747001648,
      "learning_rate": 0.00048606806258230006,
      "loss": 2.7085,
      "step": 66135
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.579561710357666,
      "learning_rate": 0.0004860648538360902,
      "loss": 2.9718,
      "step": 66136
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.325706958770752,
      "learning_rate": 0.0004860616450552874,
      "loss": 3.1423,
      "step": 66137
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.483332633972168,
      "learning_rate": 0.00048605843623989223,
      "loss": 2.9454,
      "step": 66138
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5397919416427612,
      "learning_rate": 0.00048605522738990524,
      "loss": 3.0261,
      "step": 66139
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.844943046569824,
      "learning_rate": 0.00048605201850532715,
      "loss": 2.9285,
      "step": 66140
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.277707576751709,
      "learning_rate": 0.0004860488095861584,
      "loss": 3.0176,
      "step": 66141
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1033120155334473,
      "learning_rate": 0.00048604560063239977,
      "loss": 2.7272,
      "step": 66142
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9051848649978638,
      "learning_rate": 0.00048604239164405167,
      "loss": 3.0239,
      "step": 66143
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.515332818031311,
      "learning_rate": 0.0004860391826211148,
      "loss": 3.0169,
      "step": 66144
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.001314401626587,
      "learning_rate": 0.0004860359735635897,
      "loss": 3.342,
      "step": 66145
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.7058937549591064,
      "learning_rate": 0.00048603276447147707,
      "loss": 3.251,
      "step": 66146
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.068755626678467,
      "learning_rate": 0.0004860295553447774,
      "loss": 3.1006,
      "step": 66147
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8441330194473267,
      "learning_rate": 0.0004860263461834913,
      "loss": 2.9846,
      "step": 66148
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.9723992347717285,
      "learning_rate": 0.0004860231369876194,
      "loss": 2.8041,
      "step": 66149
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0908658504486084,
      "learning_rate": 0.00048601992775716235,
      "loss": 3.068,
      "step": 66150
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6221457719802856,
      "learning_rate": 0.00048601671849212054,
      "loss": 3.012,
      "step": 66151
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8726037740707397,
      "learning_rate": 0.0004860135091924949,
      "loss": 2.9914,
      "step": 66152
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5987625122070312,
      "learning_rate": 0.0004860102998582857,
      "loss": 3.2609,
      "step": 66153
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6529152393341064,
      "learning_rate": 0.0004860070904894937,
      "loss": 2.9035,
      "step": 66154
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.381779670715332,
      "learning_rate": 0.0004860038810861195,
      "loss": 3.0622,
      "step": 66155
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7592687606811523,
      "learning_rate": 0.00048600067164816373,
      "loss": 3.2929,
      "step": 66156
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9231336116790771,
      "learning_rate": 0.0004859974621756268,
      "loss": 3.0537,
      "step": 66157
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7934446334838867,
      "learning_rate": 0.00048599425266850944,
      "loss": 3.3617,
      "step": 66158
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.31527841091156,
      "learning_rate": 0.0004859910431268123,
      "loss": 2.8891,
      "step": 66159
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.716574788093567,
      "learning_rate": 0.0004859878335505359,
      "loss": 2.9538,
      "step": 66160
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5028713941574097,
      "learning_rate": 0.00048598462393968086,
      "loss": 3.0688,
      "step": 66161
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.51810884475708,
      "learning_rate": 0.0004859814142942477,
      "loss": 3.2298,
      "step": 66162
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7267898321151733,
      "learning_rate": 0.0004859782046142372,
      "loss": 3.2349,
      "step": 66163
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.72658371925354,
      "learning_rate": 0.00048597499489964977,
      "loss": 2.8208,
      "step": 66164
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.7866852283477783,
      "learning_rate": 0.00048597178515048605,
      "loss": 2.8117,
      "step": 66165
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7888449430465698,
      "learning_rate": 0.0004859685753667467,
      "loss": 2.7987,
      "step": 66166
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7079395055770874,
      "learning_rate": 0.0004859653655484323,
      "loss": 3.1554,
      "step": 66167
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5497976541519165,
      "learning_rate": 0.0004859621556955434,
      "loss": 2.9993,
      "step": 66168
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.673105001449585,
      "learning_rate": 0.00048595894580808066,
      "loss": 3.1378,
      "step": 66169
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4227659702301025,
      "learning_rate": 0.0004859557358860446,
      "loss": 2.8178,
      "step": 66170
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1307036876678467,
      "learning_rate": 0.0004859525259294358,
      "loss": 3.0219,
      "step": 66171
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9223761558532715,
      "learning_rate": 0.00048594931593825506,
      "loss": 3.1331,
      "step": 66172
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5337541103363037,
      "learning_rate": 0.0004859461059125028,
      "loss": 3.2288,
      "step": 66173
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3644474744796753,
      "learning_rate": 0.0004859428958521795,
      "loss": 3.0984,
      "step": 66174
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3130834102630615,
      "learning_rate": 0.00048593968575728614,
      "loss": 3.0005,
      "step": 66175
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.714950680732727,
      "learning_rate": 0.0004859364756278229,
      "loss": 3.0424,
      "step": 66176
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8420329093933105,
      "learning_rate": 0.0004859332654637906,
      "loss": 2.8646,
      "step": 66177
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6945147514343262,
      "learning_rate": 0.0004859300552651898,
      "loss": 2.9681,
      "step": 66178
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.055868625640869,
      "learning_rate": 0.0004859268450320212,
      "loss": 2.9327,
      "step": 66179
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5242339372634888,
      "learning_rate": 0.00048592363476428516,
      "loss": 2.8972,
      "step": 66180
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.567110300064087,
      "learning_rate": 0.00048592042446198247,
      "loss": 3.1229,
      "step": 66181
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.503848910331726,
      "learning_rate": 0.0004859172141251136,
      "loss": 3.0108,
      "step": 66182
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5087534189224243,
      "learning_rate": 0.00048591400375367917,
      "loss": 3.1519,
      "step": 66183
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0433545112609863,
      "learning_rate": 0.00048591079334768,
      "loss": 2.9156,
      "step": 66184
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5738123655319214,
      "learning_rate": 0.00048590758290711634,
      "loss": 2.8937,
      "step": 66185
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9353617429733276,
      "learning_rate": 0.000485904372431989,
      "loss": 3.13,
      "step": 66186
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6819318532943726,
      "learning_rate": 0.00048590116192229866,
      "loss": 3.0542,
      "step": 66187
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.2731235027313232,
      "learning_rate": 0.00048589795137804557,
      "loss": 2.9275,
      "step": 66188
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.55410635471344,
      "learning_rate": 0.00048589474079923067,
      "loss": 2.901,
      "step": 66189
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5099029541015625,
      "learning_rate": 0.0004858915301858544,
      "loss": 2.9329,
      "step": 66190
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.966295838356018,
      "learning_rate": 0.00048588831953791736,
      "loss": 3.0759,
      "step": 66191
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.833131790161133,
      "learning_rate": 0.0004858851088554202,
      "loss": 2.9667,
      "step": 66192
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.368206262588501,
      "learning_rate": 0.00048588189813836345,
      "loss": 3.0722,
      "step": 66193
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.4602816104888916,
      "learning_rate": 0.00048587868738674775,
      "loss": 2.9184,
      "step": 66194
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8653230667114258,
      "learning_rate": 0.00048587547660057376,
      "loss": 3.0407,
      "step": 66195
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.466396689414978,
      "learning_rate": 0.00048587226577984197,
      "loss": 3.2267,
      "step": 66196
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.20383620262146,
      "learning_rate": 0.00048586905492455296,
      "loss": 2.8704,
      "step": 66197
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6816082000732422,
      "learning_rate": 0.0004858658440347075,
      "loss": 3.3657,
      "step": 66198
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9669963121414185,
      "learning_rate": 0.0004858626331103061,
      "loss": 2.8189,
      "step": 66199
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5319494009017944,
      "learning_rate": 0.00048585942215134914,
      "loss": 2.9651,
      "step": 66200
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.508771538734436,
      "learning_rate": 0.0004858562111578375,
      "loss": 2.9817,
      "step": 66201
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6111080646514893,
      "learning_rate": 0.00048585300012977175,
      "loss": 3.1212,
      "step": 66202
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4118527173995972,
      "learning_rate": 0.00048584978906715233,
      "loss": 2.9816,
      "step": 66203
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0468904972076416,
      "learning_rate": 0.00048584657796997994,
      "loss": 2.9579,
      "step": 66204
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4349064826965332,
      "learning_rate": 0.0004858433668382552,
      "loss": 3.0488,
      "step": 66205
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9181969165802002,
      "learning_rate": 0.00048584015567197864,
      "loss": 2.9162,
      "step": 66206
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6921757459640503,
      "learning_rate": 0.0004858369444711509,
      "loss": 2.9294,
      "step": 66207
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.752246141433716,
      "learning_rate": 0.0004858337332357726,
      "loss": 2.9543,
      "step": 66208
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4533628225326538,
      "learning_rate": 0.0004858305219658442,
      "loss": 3.0839,
      "step": 66209
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.824777364730835,
      "learning_rate": 0.0004858273106613664,
      "loss": 2.9407,
      "step": 66210
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.5224831104278564,
      "learning_rate": 0.00048582409932233995,
      "loss": 2.9224,
      "step": 66211
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.05452299118042,
      "learning_rate": 0.00048582088794876517,
      "loss": 2.9979,
      "step": 66212
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9397045373916626,
      "learning_rate": 0.0004858176765406428,
      "loss": 2.7796,
      "step": 66213
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.519059658050537,
      "learning_rate": 0.0004858144650979734,
      "loss": 3.0338,
      "step": 66214
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0641958713531494,
      "learning_rate": 0.0004858112536207576,
      "loss": 3.0668,
      "step": 66215
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.986204981803894,
      "learning_rate": 0.00048580804210899593,
      "loss": 3.1932,
      "step": 66216
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9765552282333374,
      "learning_rate": 0.0004858048305626892,
      "loss": 3.1693,
      "step": 66217
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.3934683799743652,
      "learning_rate": 0.0004858016189818377,
      "loss": 2.8928,
      "step": 66218
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8786864280700684,
      "learning_rate": 0.0004857984073664422,
      "loss": 3.0301,
      "step": 66219
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6537896394729614,
      "learning_rate": 0.00048579519571650325,
      "loss": 3.27,
      "step": 66220
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4362152814865112,
      "learning_rate": 0.00048579198403202157,
      "loss": 2.98,
      "step": 66221
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9053062200546265,
      "learning_rate": 0.0004857887723129975,
      "loss": 3.0455,
      "step": 66222
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3933370113372803,
      "learning_rate": 0.0004857855605594319,
      "loss": 2.924,
      "step": 66223
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.44809091091156,
      "learning_rate": 0.00048578234877132517,
      "loss": 3.0509,
      "step": 66224
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5762042999267578,
      "learning_rate": 0.0004857791369486781,
      "loss": 2.9215,
      "step": 66225
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6767222881317139,
      "learning_rate": 0.0004857759250914911,
      "loss": 3.086,
      "step": 66226
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4092084169387817,
      "learning_rate": 0.00048577271319976496,
      "loss": 3.1571,
      "step": 66227
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0488409996032715,
      "learning_rate": 0.00048576950127349997,
      "loss": 3.0377,
      "step": 66228
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.669223666191101,
      "learning_rate": 0.00048576628931269706,
      "loss": 2.8867,
      "step": 66229
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4976991415023804,
      "learning_rate": 0.0004857630773173567,
      "loss": 3.1762,
      "step": 66230
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.2706611156463623,
      "learning_rate": 0.0004857598652874794,
      "loss": 2.7543,
      "step": 66231
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3971540927886963,
      "learning_rate": 0.0004857566532230659,
      "loss": 2.9502,
      "step": 66232
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.2687864303588867,
      "learning_rate": 0.00048575344112411674,
      "loss": 2.9685,
      "step": 66233
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.678347110748291,
      "learning_rate": 0.00048575022899063246,
      "loss": 3.0813,
      "step": 66234
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9014941453933716,
      "learning_rate": 0.00048574701682261375,
      "loss": 2.9656,
      "step": 66235
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8507486581802368,
      "learning_rate": 0.00048574380462006113,
      "loss": 3.1848,
      "step": 66236
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7642275094985962,
      "learning_rate": 0.0004857405923829752,
      "loss": 2.9674,
      "step": 66237
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.573388695716858,
      "learning_rate": 0.00048573738011135664,
      "loss": 3.1578,
      "step": 66238
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.620166540145874,
      "learning_rate": 0.00048573416780520596,
      "loss": 3.3615,
      "step": 66239
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.366519570350647,
      "learning_rate": 0.0004857309554645239,
      "loss": 2.9889,
      "step": 66240
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.741881251335144,
      "learning_rate": 0.0004857277430893108,
      "loss": 3.0154,
      "step": 66241
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.548261046409607,
      "learning_rate": 0.00048572453067956755,
      "loss": 3.0777,
      "step": 66242
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5673972368240356,
      "learning_rate": 0.0004857213182352944,
      "loss": 3.127,
      "step": 66243
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4133347272872925,
      "learning_rate": 0.0004857181057564924,
      "loss": 3.0877,
      "step": 66244
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3021862506866455,
      "learning_rate": 0.00048571489324316167,
      "loss": 2.9909,
      "step": 66245
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4514919519424438,
      "learning_rate": 0.00048571168069530313,
      "loss": 2.9637,
      "step": 66246
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7410434484481812,
      "learning_rate": 0.0004857084681129173,
      "loss": 2.8453,
      "step": 66247
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9768418073654175,
      "learning_rate": 0.0004857052554960048,
      "loss": 3.1517,
      "step": 66248
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3871804475784302,
      "learning_rate": 0.00048570204284456613,
      "loss": 2.9202,
      "step": 66249
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6167746782302856,
      "learning_rate": 0.0004856988301586019,
      "loss": 3.0735,
      "step": 66250
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.737546443939209,
      "learning_rate": 0.0004856956174381129,
      "loss": 2.6178,
      "step": 66251
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.884300947189331,
      "learning_rate": 0.00048569240468309945,
      "loss": 2.9118,
      "step": 66252
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6700806617736816,
      "learning_rate": 0.0004856891918935623,
      "loss": 3.04,
      "step": 66253
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9874454736709595,
      "learning_rate": 0.00048568597906950204,
      "loss": 3.0225,
      "step": 66254
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.425285816192627,
      "learning_rate": 0.0004856827662109193,
      "loss": 2.9711,
      "step": 66255
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.458279013633728,
      "learning_rate": 0.0004856795533178145,
      "loss": 2.952,
      "step": 66256
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.0462260246276855,
      "learning_rate": 0.0004856763403901885,
      "loss": 3.1292,
      "step": 66257
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8758777379989624,
      "learning_rate": 0.0004856731274280417,
      "loss": 2.7499,
      "step": 66258
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4099699258804321,
      "learning_rate": 0.0004856699144313747,
      "loss": 2.8223,
      "step": 66259
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5050005912780762,
      "learning_rate": 0.0004856667014001883,
      "loss": 3.1731,
      "step": 66260
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1024062633514404,
      "learning_rate": 0.0004856634883344829,
      "loss": 3.0339,
      "step": 66261
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9200032949447632,
      "learning_rate": 0.00048566027523425917,
      "loss": 3.0313,
      "step": 66262
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5473840236663818,
      "learning_rate": 0.00048565706209951766,
      "loss": 3.269,
      "step": 66263
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9756957292556763,
      "learning_rate": 0.00048565384893025894,
      "loss": 2.8019,
      "step": 66264
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.403996467590332,
      "learning_rate": 0.0004856506357264837,
      "loss": 3.1869,
      "step": 66265
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.638363242149353,
      "learning_rate": 0.0004856474224881925,
      "loss": 3.1686,
      "step": 66266
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5938911437988281,
      "learning_rate": 0.000485644209215386,
      "loss": 3.0488,
      "step": 66267
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9834301471710205,
      "learning_rate": 0.0004856409959080647,
      "loss": 3.241,
      "step": 66268
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.344300627708435,
      "learning_rate": 0.0004856377825662293,
      "loss": 2.8386,
      "step": 66269
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4975621700286865,
      "learning_rate": 0.00048563456918988016,
      "loss": 2.9347,
      "step": 66270
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.592370867729187,
      "learning_rate": 0.0004856313557790182,
      "loss": 3.0244,
      "step": 66271
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6640170812606812,
      "learning_rate": 0.00048562814233364383,
      "loss": 2.8625,
      "step": 66272
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.83621346950531,
      "learning_rate": 0.0004856249288537577,
      "loss": 3.1558,
      "step": 66273
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.772972822189331,
      "learning_rate": 0.00048562171533936033,
      "loss": 2.9955,
      "step": 66274
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.566442847251892,
      "learning_rate": 0.0004856185017904524,
      "loss": 3.2255,
      "step": 66275
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7356804609298706,
      "learning_rate": 0.0004856152882070345,
      "loss": 2.9925,
      "step": 66276
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.738008737564087,
      "learning_rate": 0.0004856120745891072,
      "loss": 2.9541,
      "step": 66277
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7889817953109741,
      "learning_rate": 0.00048560886093667113,
      "loss": 2.9658,
      "step": 66278
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6400974988937378,
      "learning_rate": 0.00048560564724972686,
      "loss": 2.811,
      "step": 66279
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5216927528381348,
      "learning_rate": 0.00048560243352827504,
      "loss": 3.0222,
      "step": 66280
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.017306089401245,
      "learning_rate": 0.00048559921977231615,
      "loss": 2.9124,
      "step": 66281
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.6908671855926514,
      "learning_rate": 0.0004855960059818509,
      "loss": 2.9859,
      "step": 66282
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7702080011367798,
      "learning_rate": 0.0004855927921568799,
      "loss": 2.9524,
      "step": 66283
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9081426858901978,
      "learning_rate": 0.00048558957829740365,
      "loss": 2.9344,
      "step": 66284
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.455198049545288,
      "learning_rate": 0.00048558636440342273,
      "loss": 2.9333,
      "step": 66285
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.580398678779602,
      "learning_rate": 0.0004855831504749379,
      "loss": 3.123,
      "step": 66286
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.931368112564087,
      "learning_rate": 0.0004855799365119496,
      "loss": 3.0902,
      "step": 66287
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.2094626426696777,
      "learning_rate": 0.0004855767225144585,
      "loss": 2.84,
      "step": 66288
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6649870872497559,
      "learning_rate": 0.00048557350848246517,
      "loss": 3.0707,
      "step": 66289
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6032367944717407,
      "learning_rate": 0.00048557029441597023,
      "loss": 3.1156,
      "step": 66290
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0749199390411377,
      "learning_rate": 0.00048556708031497435,
      "loss": 3.0812,
      "step": 66291
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0988402366638184,
      "learning_rate": 0.0004855638661794779,
      "loss": 2.9281,
      "step": 66292
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.123724937438965,
      "learning_rate": 0.00048556065200948173,
      "loss": 3.138,
      "step": 66293
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.965362310409546,
      "learning_rate": 0.00048555743780498634,
      "loss": 3.1038,
      "step": 66294
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.3595738410949707,
      "learning_rate": 0.00048555422356599224,
      "loss": 2.8739,
      "step": 66295
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.528159260749817,
      "learning_rate": 0.0004855510092925002,
      "loss": 2.9224,
      "step": 66296
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.041166067123413,
      "learning_rate": 0.0004855477949845107,
      "loss": 2.943,
      "step": 66297
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.066593885421753,
      "learning_rate": 0.0004855445806420243,
      "loss": 3.1085,
      "step": 66298
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.518488883972168,
      "learning_rate": 0.00048554136626504166,
      "loss": 3.0837,
      "step": 66299
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0199801921844482,
      "learning_rate": 0.0004855381518535635,
      "loss": 3.2001,
      "step": 66300
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1022653579711914,
      "learning_rate": 0.00048553493740759017,
      "loss": 3.0174,
      "step": 66301
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.9849965572357178,
      "learning_rate": 0.0004855317229271224,
      "loss": 2.9006,
      "step": 66302
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7254751920700073,
      "learning_rate": 0.0004855285084121609,
      "loss": 3.013,
      "step": 66303
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.7250404357910156,
      "learning_rate": 0.000485525293862706,
      "loss": 3.0082,
      "step": 66304
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.3370141983032227,
      "learning_rate": 0.00048552207927875853,
      "loss": 2.9936,
      "step": 66305
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7401398420333862,
      "learning_rate": 0.000485518864660319,
      "loss": 2.9532,
      "step": 66306
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.063011884689331,
      "learning_rate": 0.000485515650007388,
      "loss": 2.8497,
      "step": 66307
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6081129312515259,
      "learning_rate": 0.00048551243531996606,
      "loss": 3.1724,
      "step": 66308
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9694198369979858,
      "learning_rate": 0.000485509220598054,
      "loss": 3.1004,
      "step": 66309
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7250852584838867,
      "learning_rate": 0.00048550600584165216,
      "loss": 3.0324,
      "step": 66310
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6627880334854126,
      "learning_rate": 0.00048550279105076137,
      "loss": 3.0368,
      "step": 66311
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.54508638381958,
      "learning_rate": 0.000485499576225382,
      "loss": 3.0957,
      "step": 66312
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.8665406703948975,
      "learning_rate": 0.0004854963613655148,
      "loss": 2.8922,
      "step": 66313
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8410086631774902,
      "learning_rate": 0.00048549314647116027,
      "loss": 3.2022,
      "step": 66314
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.254425287246704,
      "learning_rate": 0.0004854899315423191,
      "loss": 2.9906,
      "step": 66315
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8676623106002808,
      "learning_rate": 0.00048548671657899187,
      "loss": 2.8301,
      "step": 66316
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5112919807434082,
      "learning_rate": 0.0004854835015811792,
      "loss": 3.0118,
      "step": 66317
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3925647735595703,
      "learning_rate": 0.0004854802865488816,
      "loss": 3.0459,
      "step": 66318
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8362071514129639,
      "learning_rate": 0.00048547707148209975,
      "loss": 3.2489,
      "step": 66319
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5322455167770386,
      "learning_rate": 0.00048547385638083413,
      "loss": 3.0377,
      "step": 66320
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.3096860647201538,
      "learning_rate": 0.0004854706412450854,
      "loss": 2.8454,
      "step": 66321
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5790311098098755,
      "learning_rate": 0.0004854674260748543,
      "loss": 3.0867,
      "step": 66322
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4458138942718506,
      "learning_rate": 0.00048546421087014113,
      "loss": 3.0589,
      "step": 66323
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8982664346694946,
      "learning_rate": 0.0004854609956309468,
      "loss": 3.1757,
      "step": 66324
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7605243921279907,
      "learning_rate": 0.00048545778035727176,
      "loss": 3.0498,
      "step": 66325
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.798288106918335,
      "learning_rate": 0.00048545456504911656,
      "loss": 3.1967,
      "step": 66326
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8681495189666748,
      "learning_rate": 0.00048545134970648195,
      "loss": 2.8566,
      "step": 66327
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9803653955459595,
      "learning_rate": 0.0004854481343293684,
      "loss": 2.8335,
      "step": 66328
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5272274017333984,
      "learning_rate": 0.0004854449189177765,
      "loss": 2.9888,
      "step": 66329
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.714439630508423,
      "learning_rate": 0.00048544170347170697,
      "loss": 3.0517,
      "step": 66330
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.179718017578125,
      "learning_rate": 0.00048543848799116016,
      "loss": 2.9015,
      "step": 66331
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6279774904251099,
      "learning_rate": 0.000485435272476137,
      "loss": 3.2464,
      "step": 66332
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.818772554397583,
      "learning_rate": 0.0004854320569266379,
      "loss": 2.9909,
      "step": 66333
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.778128147125244,
      "learning_rate": 0.0004854288413426634,
      "loss": 2.9729,
      "step": 66334
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.377166748046875,
      "learning_rate": 0.0004854256257242142,
      "loss": 3.2115,
      "step": 66335
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8345180749893188,
      "learning_rate": 0.00048542241007129094,
      "loss": 2.763,
      "step": 66336
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9430346488952637,
      "learning_rate": 0.0004854191943838941,
      "loss": 3.1134,
      "step": 66337
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.1805691719055176,
      "learning_rate": 0.0004854159786620243,
      "loss": 3.1807,
      "step": 66338
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7840545177459717,
      "learning_rate": 0.0004854127629056822,
      "loss": 3.065,
      "step": 66339
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8880475759506226,
      "learning_rate": 0.00048540954711486845,
      "loss": 3.0632,
      "step": 66340
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.550519347190857,
      "learning_rate": 0.00048540633128958346,
      "loss": 2.9383,
      "step": 66341
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7774769067764282,
      "learning_rate": 0.00048540311542982805,
      "loss": 3.18,
      "step": 66342
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7475558519363403,
      "learning_rate": 0.0004853998995356026,
      "loss": 3.0182,
      "step": 66343
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4539637565612793,
      "learning_rate": 0.00048539668360690776,
      "loss": 2.8342,
      "step": 66344
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.3604891300201416,
      "learning_rate": 0.0004853934676437443,
      "loss": 2.8693,
      "step": 66345
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6406431198120117,
      "learning_rate": 0.0004853902516461126,
      "loss": 3.1684,
      "step": 66346
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5142290592193604,
      "learning_rate": 0.00048538703561401336,
      "loss": 2.9445,
      "step": 66347
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5775929689407349,
      "learning_rate": 0.00048538381954744725,
      "loss": 3.2978,
      "step": 66348
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.793055534362793,
      "learning_rate": 0.00048538060344641474,
      "loss": 2.7817,
      "step": 66349
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.661839485168457,
      "learning_rate": 0.00048537738731091646,
      "loss": 3.1624,
      "step": 66350
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.4255075454711914,
      "learning_rate": 0.00048537417114095303,
      "loss": 2.8903,
      "step": 66351
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.75637686252594,
      "learning_rate": 0.00048537095493652503,
      "loss": 2.8678,
      "step": 66352
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4818466901779175,
      "learning_rate": 0.0004853677386976332,
      "loss": 3.0509,
      "step": 66353
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8372348546981812,
      "learning_rate": 0.00048536452242427784,
      "loss": 3.0728,
      "step": 66354
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5543828010559082,
      "learning_rate": 0.0004853613061164598,
      "loss": 3.1121,
      "step": 66355
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.946289300918579,
      "learning_rate": 0.00048535808977417954,
      "loss": 2.9908,
      "step": 66356
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7406028509140015,
      "learning_rate": 0.0004853548733974378,
      "loss": 2.9617,
      "step": 66357
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9204328060150146,
      "learning_rate": 0.000485351656986235,
      "loss": 3.1073,
      "step": 66358
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9699586629867554,
      "learning_rate": 0.0004853484405405719,
      "loss": 3.3914,
      "step": 66359
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5249067544937134,
      "learning_rate": 0.00048534522406044894,
      "loss": 3.1229,
      "step": 66360
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.270568370819092,
      "learning_rate": 0.00048534200754586686,
      "loss": 2.9973,
      "step": 66361
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7711013555526733,
      "learning_rate": 0.00048533879099682623,
      "loss": 3.1677,
      "step": 66362
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5553075075149536,
      "learning_rate": 0.0004853355744133276,
      "loss": 2.7387,
      "step": 66363
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.930785894393921,
      "learning_rate": 0.0004853323577953716,
      "loss": 2.992,
      "step": 66364
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4657953977584839,
      "learning_rate": 0.00048532914114295876,
      "loss": 3.0857,
      "step": 66365
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6758450269699097,
      "learning_rate": 0.0004853259244560898,
      "loss": 3.0923,
      "step": 66366
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.68429696559906,
      "learning_rate": 0.00048532270773476516,
      "loss": 3.1606,
      "step": 66367
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7418251037597656,
      "learning_rate": 0.0004853194909789856,
      "loss": 3.2749,
      "step": 66368
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5015830993652344,
      "learning_rate": 0.00048531627418875163,
      "loss": 3.0155,
      "step": 66369
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6716749668121338,
      "learning_rate": 0.00048531305736406393,
      "loss": 3.1965,
      "step": 66370
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.3727996349334717,
      "learning_rate": 0.00048530984050492294,
      "loss": 3.0911,
      "step": 66371
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5433251857757568,
      "learning_rate": 0.00048530662361132937,
      "loss": 3.2512,
      "step": 66372
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5083891153335571,
      "learning_rate": 0.0004853034066832838,
      "loss": 2.9592,
      "step": 66373
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8497987985610962,
      "learning_rate": 0.00048530018972078686,
      "loss": 3.0643,
      "step": 66374
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6767420768737793,
      "learning_rate": 0.0004852969727238391,
      "loss": 2.9425,
      "step": 66375
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.734914779663086,
      "learning_rate": 0.00048529375569244116,
      "loss": 3.1202,
      "step": 66376
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6238173246383667,
      "learning_rate": 0.0004852905386265936,
      "loss": 2.8947,
      "step": 66377
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.372974157333374,
      "learning_rate": 0.00048528732152629706,
      "loss": 3.0264,
      "step": 66378
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7451494932174683,
      "learning_rate": 0.00048528410439155204,
      "loss": 3.1456,
      "step": 66379
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6135035753250122,
      "learning_rate": 0.00048528088722235926,
      "loss": 3.2312,
      "step": 66380
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0099871158599854,
      "learning_rate": 0.0004852776700187192,
      "loss": 3.078,
      "step": 66381
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.843173861503601,
      "learning_rate": 0.0004852744527806326,
      "loss": 3.2409,
      "step": 66382
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7935153245925903,
      "learning_rate": 0.00048527123550809996,
      "loss": 2.9046,
      "step": 66383
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4270857572555542,
      "learning_rate": 0.0004852680182011219,
      "loss": 3.0832,
      "step": 66384
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7094554901123047,
      "learning_rate": 0.00048526480085969904,
      "loss": 3.152,
      "step": 66385
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.5461440086364746,
      "learning_rate": 0.00048526158348383185,
      "loss": 3.0901,
      "step": 66386
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.065556049346924,
      "learning_rate": 0.00048525836607352113,
      "loss": 3.0257,
      "step": 66387
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6968097686767578,
      "learning_rate": 0.00048525514862876747,
      "loss": 3.1554,
      "step": 66388
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.317862033843994,
      "learning_rate": 0.0004852519311495712,
      "loss": 2.9691,
      "step": 66389
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.5574049949645996,
      "learning_rate": 0.00048524871363593314,
      "loss": 3.3148,
      "step": 66390
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.356283187866211,
      "learning_rate": 0.000485245496087854,
      "loss": 3.0755,
      "step": 66391
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.640907883644104,
      "learning_rate": 0.00048524227850533407,
      "loss": 2.9926,
      "step": 66392
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.754684567451477,
      "learning_rate": 0.00048523906088837414,
      "loss": 3.0534,
      "step": 66393
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5983580350875854,
      "learning_rate": 0.0004852358432369748,
      "loss": 2.9865,
      "step": 66394
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.527786135673523,
      "learning_rate": 0.0004852326255511366,
      "loss": 2.8777,
      "step": 66395
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6072081327438354,
      "learning_rate": 0.00048522940783086006,
      "loss": 3.3596,
      "step": 66396
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8837989568710327,
      "learning_rate": 0.000485226190076146,
      "loss": 3.3639,
      "step": 66397
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7860546112060547,
      "learning_rate": 0.0004852229722869949,
      "loss": 3.1643,
      "step": 66398
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.647594928741455,
      "learning_rate": 0.0004852197544634073,
      "loss": 2.8251,
      "step": 66399
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8248426914215088,
      "learning_rate": 0.0004852165366053839,
      "loss": 3.1269,
      "step": 66400
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.4822874069213867,
      "learning_rate": 0.0004852133187129252,
      "loss": 2.8622,
      "step": 66401
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.417677879333496,
      "learning_rate": 0.0004852101007860319,
      "loss": 3.0346,
      "step": 66402
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.307386875152588,
      "learning_rate": 0.0004852068828247045,
      "loss": 2.9619,
      "step": 66403
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.985097646713257,
      "learning_rate": 0.00048520366482894366,
      "loss": 3.0327,
      "step": 66404
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.014434337615967,
      "learning_rate": 0.00048520044679874994,
      "loss": 2.948,
      "step": 66405
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.2016119956970215,
      "learning_rate": 0.00048519722873412396,
      "loss": 2.9816,
      "step": 66406
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.0990755558013916,
      "learning_rate": 0.00048519401063506633,
      "loss": 2.9716,
      "step": 66407
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.9254016876220703,
      "learning_rate": 0.0004851907925015777,
      "loss": 3.055,
      "step": 66408
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4835362434387207,
      "learning_rate": 0.0004851875743336586,
      "loss": 3.2621,
      "step": 66409
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7265197038650513,
      "learning_rate": 0.00048518435613130955,
      "loss": 2.9332,
      "step": 66410
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.321075439453125,
      "learning_rate": 0.0004851811378945312,
      "loss": 3.0409,
      "step": 66411
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9825595617294312,
      "learning_rate": 0.00048517791962332437,
      "loss": 3.1946,
      "step": 66412
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.657163381576538,
      "learning_rate": 0.0004851747013176893,
      "loss": 2.9374,
      "step": 66413
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.244541645050049,
      "learning_rate": 0.00048517148297762676,
      "loss": 3.1662,
      "step": 66414
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.6112823486328125,
      "learning_rate": 0.0004851682646031374,
      "loss": 3.0694,
      "step": 66415
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.9091342687606812,
      "learning_rate": 0.00048516504619422175,
      "loss": 3.0643,
      "step": 66416
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4432622194290161,
      "learning_rate": 0.00048516182775088044,
      "loss": 3.1621,
      "step": 66417
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.52935791015625,
      "learning_rate": 0.0004851586092731141,
      "loss": 3.0238,
      "step": 66418
    },
    {
      "epoch": 0.86,
      "grad_norm": 3.4917938709259033,
      "learning_rate": 0.0004851553907609232,
      "loss": 3.0935,
      "step": 66419
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.23715877532959,
      "learning_rate": 0.00048515217221430844,
      "loss": 3.0624,
      "step": 66420
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.4530574083328247,
      "learning_rate": 0.0004851489536332704,
      "loss": 3.1133,
      "step": 66421
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6693782806396484,
      "learning_rate": 0.00048514573501780967,
      "loss": 3.0207,
      "step": 66422
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7947766780853271,
      "learning_rate": 0.00048514251636792686,
      "loss": 2.9977,
      "step": 66423
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.0537803173065186,
      "learning_rate": 0.0004851392976836226,
      "loss": 3.1148,
      "step": 66424
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6410126686096191,
      "learning_rate": 0.00048513607896489734,
      "loss": 2.9065,
      "step": 66425
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5734107494354248,
      "learning_rate": 0.00048513286021175186,
      "loss": 3.1677,
      "step": 66426
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.7969050407409668,
      "learning_rate": 0.00048512964142418666,
      "loss": 3.0412,
      "step": 66427
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.5590358972549438,
      "learning_rate": 0.0004851264226022024,
      "loss": 3.0504,
      "step": 66428
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6017876863479614,
      "learning_rate": 0.00048512320374579965,
      "loss": 2.9567,
      "step": 66429
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.6131072044372559,
      "learning_rate": 0.000485119984854979,
      "loss": 3.048,
      "step": 66430
    },
    {
      "epoch": 0.86,
      "grad_norm": 2.092155933380127,
      "learning_rate": 0.000485116765929741,
      "loss": 3.1298,
      "step": 66431
    },
    {
      "epoch": 0.86,
      "grad_norm": 1.8155378103256226,
      "learning_rate": 0.0004851135469700863,
      "loss": 3.111,
      "step": 66432
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4622429609298706,
      "learning_rate": 0.00048511032797601556,
      "loss": 2.9285,
      "step": 66433
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8605443239212036,
      "learning_rate": 0.00048510710894752925,
      "loss": 3.0724,
      "step": 66434
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3966670036315918,
      "learning_rate": 0.0004851038898846281,
      "loss": 3.0158,
      "step": 66435
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8526796102523804,
      "learning_rate": 0.0004851006707873126,
      "loss": 3.075,
      "step": 66436
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.612570881843567,
      "learning_rate": 0.0004850974516555834,
      "loss": 3.18,
      "step": 66437
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.8355464935302734,
      "learning_rate": 0.0004850942324894411,
      "loss": 2.9309,
      "step": 66438
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.202824115753174,
      "learning_rate": 0.00048509101328888626,
      "loss": 3.0601,
      "step": 66439
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4073776006698608,
      "learning_rate": 0.00048508779405391955,
      "loss": 3.1786,
      "step": 66440
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.766014814376831,
      "learning_rate": 0.00048508457478454157,
      "loss": 3.2313,
      "step": 66441
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7025572061538696,
      "learning_rate": 0.00048508135548075274,
      "loss": 3.0809,
      "step": 66442
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.422046184539795,
      "learning_rate": 0.00048507813614255383,
      "loss": 3.0435,
      "step": 66443
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.810925841331482,
      "learning_rate": 0.0004850749167699455,
      "loss": 2.9678,
      "step": 66444
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.954483151435852,
      "learning_rate": 0.0004850716973629281,
      "loss": 2.9847,
      "step": 66445
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.207777500152588,
      "learning_rate": 0.0004850684779215025,
      "loss": 3.0145,
      "step": 66446
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.8936080932617188,
      "learning_rate": 0.0004850652584456691,
      "loss": 3.0407,
      "step": 66447
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.452075719833374,
      "learning_rate": 0.0004850620389354286,
      "loss": 3.0377,
      "step": 66448
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.927625060081482,
      "learning_rate": 0.00048505881939078164,
      "loss": 2.8655,
      "step": 66449
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.671508550643921,
      "learning_rate": 0.0004850555998117287,
      "loss": 3.0043,
      "step": 66450
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8392090797424316,
      "learning_rate": 0.00048505238019827036,
      "loss": 3.0695,
      "step": 66451
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4350039958953857,
      "learning_rate": 0.00048504916055040737,
      "loss": 3.1657,
      "step": 66452
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.4648611545562744,
      "learning_rate": 0.00048504594086814026,
      "loss": 2.842,
      "step": 66453
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9805104732513428,
      "learning_rate": 0.0004850427211514696,
      "loss": 3.3067,
      "step": 66454
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.591901421546936,
      "learning_rate": 0.00048503950140039596,
      "loss": 3.3113,
      "step": 66455
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4369279146194458,
      "learning_rate": 0.00048503628161491996,
      "loss": 3.0346,
      "step": 66456
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.1525895595550537,
      "learning_rate": 0.00048503306179504234,
      "loss": 2.9267,
      "step": 66457
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.512711763381958,
      "learning_rate": 0.0004850298419407635,
      "loss": 3.0654,
      "step": 66458
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5922414064407349,
      "learning_rate": 0.0004850266220520841,
      "loss": 3.1393,
      "step": 66459
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0544211864471436,
      "learning_rate": 0.00048502340212900485,
      "loss": 3.1063,
      "step": 66460
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3507461547851562,
      "learning_rate": 0.00048502018217152625,
      "loss": 2.9684,
      "step": 66461
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6184402704238892,
      "learning_rate": 0.00048501696217964884,
      "loss": 2.963,
      "step": 66462
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4429149627685547,
      "learning_rate": 0.0004850137421533733,
      "loss": 3.1381,
      "step": 66463
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.71967613697052,
      "learning_rate": 0.00048501052209270015,
      "loss": 3.077,
      "step": 66464
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0336382389068604,
      "learning_rate": 0.00048500730199763016,
      "loss": 3.2039,
      "step": 66465
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1940839290618896,
      "learning_rate": 0.00048500408186816376,
      "loss": 3.2346,
      "step": 66466
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8434041738510132,
      "learning_rate": 0.0004850008617043017,
      "loss": 2.8907,
      "step": 66467
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6109590530395508,
      "learning_rate": 0.0004849976415060444,
      "loss": 2.7463,
      "step": 66468
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5870054960250854,
      "learning_rate": 0.0004849944212733925,
      "loss": 3.0659,
      "step": 66469
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6382793188095093,
      "learning_rate": 0.0004849912010063468,
      "loss": 2.9492,
      "step": 66470
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.569409966468811,
      "learning_rate": 0.00048498798070490757,
      "loss": 3.3991,
      "step": 66471
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6955325603485107,
      "learning_rate": 0.0004849847603690757,
      "loss": 2.8475,
      "step": 66472
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.2359132766723633,
      "learning_rate": 0.0004849815399988516,
      "loss": 2.9383,
      "step": 66473
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6311010122299194,
      "learning_rate": 0.00048497831959423595,
      "loss": 3.2173,
      "step": 66474
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.271328926086426,
      "learning_rate": 0.00048497509915522945,
      "loss": 3.038,
      "step": 66475
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4992903470993042,
      "learning_rate": 0.00048497187868183254,
      "loss": 2.7814,
      "step": 66476
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.564529299736023,
      "learning_rate": 0.00048496865817404576,
      "loss": 3.0497,
      "step": 66477
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5350935459136963,
      "learning_rate": 0.0004849654376318699,
      "loss": 3.0662,
      "step": 66478
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.565572738647461,
      "learning_rate": 0.00048496221705530545,
      "loss": 3.1486,
      "step": 66479
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9894059896469116,
      "learning_rate": 0.000484958996444353,
      "loss": 2.8865,
      "step": 66480
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.567559838294983,
      "learning_rate": 0.0004849557757990132,
      "loss": 2.9876,
      "step": 66481
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.668138027191162,
      "learning_rate": 0.0004849525551192867,
      "loss": 3.0613,
      "step": 66482
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1696557998657227,
      "learning_rate": 0.000484949334405174,
      "loss": 3.0069,
      "step": 66483
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.770986795425415,
      "learning_rate": 0.0004849461136566757,
      "loss": 2.9182,
      "step": 66484
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.6992146968841553,
      "learning_rate": 0.0004849428928737925,
      "loss": 2.9851,
      "step": 66485
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.294552803039551,
      "learning_rate": 0.0004849396720565248,
      "loss": 3.1213,
      "step": 66486
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.1059372425079346,
      "learning_rate": 0.00048493645120487337,
      "loss": 3.155,
      "step": 66487
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.529411792755127,
      "learning_rate": 0.0004849332303188388,
      "loss": 2.9106,
      "step": 66488
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.222079277038574,
      "learning_rate": 0.0004849300093984216,
      "loss": 2.9428,
      "step": 66489
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.2775990962982178,
      "learning_rate": 0.0004849267884436224,
      "loss": 3.274,
      "step": 66490
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1458587646484375,
      "learning_rate": 0.00048492356745444195,
      "loss": 2.9137,
      "step": 66491
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5849679708480835,
      "learning_rate": 0.00048492034643088063,
      "loss": 2.85,
      "step": 66492
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6080158948898315,
      "learning_rate": 0.00048491712537293905,
      "loss": 3.0894,
      "step": 66493
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0702571868896484,
      "learning_rate": 0.000484913904280618,
      "loss": 3.1051,
      "step": 66494
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4775886535644531,
      "learning_rate": 0.00048491068315391795,
      "loss": 3.148,
      "step": 66495
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6780022382736206,
      "learning_rate": 0.00048490746199283946,
      "loss": 3.185,
      "step": 66496
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5729080438613892,
      "learning_rate": 0.00048490424079738326,
      "loss": 3.0534,
      "step": 66497
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1567556858062744,
      "learning_rate": 0.0004849010195675498,
      "loss": 3.0639,
      "step": 66498
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3848803043365479,
      "learning_rate": 0.0004848977983033398,
      "loss": 2.9879,
      "step": 66499
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7968796491622925,
      "learning_rate": 0.0004848945770047538,
      "loss": 2.9399,
      "step": 66500
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.97370445728302,
      "learning_rate": 0.00048489135567179235,
      "loss": 2.954,
      "step": 66501
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6554428339004517,
      "learning_rate": 0.00048488813430445617,
      "loss": 3.0165,
      "step": 66502
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4567276239395142,
      "learning_rate": 0.0004848849129027458,
      "loss": 3.1348,
      "step": 66503
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9229021072387695,
      "learning_rate": 0.00048488169146666176,
      "loss": 2.8904,
      "step": 66504
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9359710216522217,
      "learning_rate": 0.0004848784699962049,
      "loss": 3.2506,
      "step": 66505
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4864829778671265,
      "learning_rate": 0.00048487524849137543,
      "loss": 3.1273,
      "step": 66506
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2849286794662476,
      "learning_rate": 0.0004848720269521743,
      "loss": 3.2494,
      "step": 66507
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3000552654266357,
      "learning_rate": 0.00048486880537860187,
      "loss": 2.914,
      "step": 66508
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6518317461013794,
      "learning_rate": 0.0004848655837706589,
      "loss": 2.9929,
      "step": 66509
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.481812834739685,
      "learning_rate": 0.0004848623621283459,
      "loss": 3.098,
      "step": 66510
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5225330591201782,
      "learning_rate": 0.00048485914045166346,
      "loss": 3.1244,
      "step": 66511
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5016776323318481,
      "learning_rate": 0.00048485591874061236,
      "loss": 2.8846,
      "step": 66512
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.7423627376556396,
      "learning_rate": 0.000484852696995193,
      "loss": 3.0125,
      "step": 66513
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.467376708984375,
      "learning_rate": 0.00048484947521540596,
      "loss": 2.8212,
      "step": 66514
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8753570318222046,
      "learning_rate": 0.00048484625340125184,
      "loss": 3.0906,
      "step": 66515
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8883806467056274,
      "learning_rate": 0.0004848430315527315,
      "loss": 2.9943,
      "step": 66516
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4070719480514526,
      "learning_rate": 0.00048483980966984526,
      "loss": 2.9672,
      "step": 66517
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7440366744995117,
      "learning_rate": 0.0004848365877525939,
      "loss": 3.019,
      "step": 66518
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.3861007690429688,
      "learning_rate": 0.00048483336580097783,
      "loss": 2.9817,
      "step": 66519
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5677905082702637,
      "learning_rate": 0.0004848301438149978,
      "loss": 3.0275,
      "step": 66520
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5490506887435913,
      "learning_rate": 0.0004848269217946543,
      "loss": 2.9789,
      "step": 66521
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.446842908859253,
      "learning_rate": 0.00048482369973994806,
      "loss": 2.8622,
      "step": 66522
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.514237642288208,
      "learning_rate": 0.0004848204776508795,
      "loss": 3.0442,
      "step": 66523
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9182157516479492,
      "learning_rate": 0.0004848172555274494,
      "loss": 2.9799,
      "step": 66524
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0881659984588623,
      "learning_rate": 0.0004848140333696583,
      "loss": 3.0016,
      "step": 66525
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.913707733154297,
      "learning_rate": 0.00048481081117750675,
      "loss": 2.958,
      "step": 66526
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1092283725738525,
      "learning_rate": 0.00048480758895099536,
      "loss": 3.0033,
      "step": 66527
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6413944959640503,
      "learning_rate": 0.0004848043666901248,
      "loss": 3.1868,
      "step": 66528
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.652026653289795,
      "learning_rate": 0.00048480114439489565,
      "loss": 2.9261,
      "step": 66529
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5675898790359497,
      "learning_rate": 0.00048479792206530836,
      "loss": 3.0131,
      "step": 66530
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5287243127822876,
      "learning_rate": 0.0004847946997013637,
      "loss": 2.7929,
      "step": 66531
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5347180366516113,
      "learning_rate": 0.00048479147730306235,
      "loss": 3.0671,
      "step": 66532
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5198098421096802,
      "learning_rate": 0.00048478825487040456,
      "loss": 3.0844,
      "step": 66533
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.4573142528533936,
      "learning_rate": 0.00048478503240339133,
      "loss": 2.8854,
      "step": 66534
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5620155334472656,
      "learning_rate": 0.00048478180990202293,
      "loss": 2.9084,
      "step": 66535
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5607110261917114,
      "learning_rate": 0.0004847785873663002,
      "loss": 3.0004,
      "step": 66536
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5880285501480103,
      "learning_rate": 0.00048477536479622356,
      "loss": 2.9845,
      "step": 66537
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.370182752609253,
      "learning_rate": 0.0004847721421917937,
      "loss": 3.0446,
      "step": 66538
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8055686950683594,
      "learning_rate": 0.00048476891955301127,
      "loss": 2.9876,
      "step": 66539
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9030300378799438,
      "learning_rate": 0.00048476569687987684,
      "loss": 2.871,
      "step": 66540
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9189947843551636,
      "learning_rate": 0.00048476247417239093,
      "loss": 3.079,
      "step": 66541
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3854219913482666,
      "learning_rate": 0.00048475925143055414,
      "loss": 2.9334,
      "step": 66542
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7663066387176514,
      "learning_rate": 0.00048475602865436716,
      "loss": 2.8678,
      "step": 66543
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.079714059829712,
      "learning_rate": 0.00048475280584383054,
      "loss": 2.9037,
      "step": 66544
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6241528987884521,
      "learning_rate": 0.0004847495829989449,
      "loss": 3.125,
      "step": 66545
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.562268853187561,
      "learning_rate": 0.0004847463601197109,
      "loss": 2.9798,
      "step": 66546
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.656201720237732,
      "learning_rate": 0.0004847431372061289,
      "loss": 3.0222,
      "step": 66547
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5238310098648071,
      "learning_rate": 0.0004847399142581998,
      "loss": 3.1078,
      "step": 66548
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6157442331314087,
      "learning_rate": 0.000484736691275924,
      "loss": 2.8833,
      "step": 66549
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6263396739959717,
      "learning_rate": 0.00048473346825930215,
      "loss": 3.0324,
      "step": 66550
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7272553443908691,
      "learning_rate": 0.0004847302452083349,
      "loss": 3.3445,
      "step": 66551
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5702924728393555,
      "learning_rate": 0.00048472702212302277,
      "loss": 3.1203,
      "step": 66552
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5698628425598145,
      "learning_rate": 0.0004847237990033664,
      "loss": 3.1022,
      "step": 66553
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9438884258270264,
      "learning_rate": 0.00048472057584936646,
      "loss": 2.7654,
      "step": 66554
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.300339460372925,
      "learning_rate": 0.00048471735266102345,
      "loss": 2.7934,
      "step": 66555
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2612199783325195,
      "learning_rate": 0.00048471412943833795,
      "loss": 3.0831,
      "step": 66556
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6239758729934692,
      "learning_rate": 0.0004847109061813106,
      "loss": 2.9247,
      "step": 66557
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5250416994094849,
      "learning_rate": 0.0004847076828899421,
      "loss": 3.0627,
      "step": 66558
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.78864586353302,
      "learning_rate": 0.00048470445956423283,
      "loss": 3.2831,
      "step": 66559
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7742234468460083,
      "learning_rate": 0.00048470123620418363,
      "loss": 2.9132,
      "step": 66560
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6968624591827393,
      "learning_rate": 0.0004846980128097949,
      "loss": 3.1898,
      "step": 66561
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4735989570617676,
      "learning_rate": 0.0004846947893810674,
      "loss": 2.8772,
      "step": 66562
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6765308380126953,
      "learning_rate": 0.0004846915659180016,
      "loss": 3.0147,
      "step": 66563
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.700161337852478,
      "learning_rate": 0.0004846883424205981,
      "loss": 3.0257,
      "step": 66564
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.644102931022644,
      "learning_rate": 0.00048468511888885763,
      "loss": 3.0271,
      "step": 66565
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6782010793685913,
      "learning_rate": 0.00048468189532278075,
      "loss": 3.1026,
      "step": 66566
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8056247234344482,
      "learning_rate": 0.00048467867172236794,
      "loss": 3.0507,
      "step": 66567
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8092049360275269,
      "learning_rate": 0.0004846754480876199,
      "loss": 2.6553,
      "step": 66568
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.2743818759918213,
      "learning_rate": 0.00048467222441853715,
      "loss": 3.2556,
      "step": 66569
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.221144676208496,
      "learning_rate": 0.0004846690007151204,
      "loss": 2.9808,
      "step": 66570
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5668728351593018,
      "learning_rate": 0.00048466577697737026,
      "loss": 3.1409,
      "step": 66571
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5234105587005615,
      "learning_rate": 0.0004846625532052872,
      "loss": 2.8246,
      "step": 66572
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0066452026367188,
      "learning_rate": 0.0004846593293988719,
      "loss": 3.0539,
      "step": 66573
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.2992303371429443,
      "learning_rate": 0.00048465610555812493,
      "loss": 2.9453,
      "step": 66574
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.857469081878662,
      "learning_rate": 0.0004846528816830469,
      "loss": 2.9558,
      "step": 66575
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.3977227210998535,
      "learning_rate": 0.00048464965777363843,
      "loss": 2.7533,
      "step": 66576
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5576473474502563,
      "learning_rate": 0.0004846464338299002,
      "loss": 2.9807,
      "step": 66577
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5428178310394287,
      "learning_rate": 0.0004846432098518325,
      "loss": 3.0117,
      "step": 66578
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5994595289230347,
      "learning_rate": 0.00048463998583943627,
      "loss": 2.7946,
      "step": 66579
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5701608657836914,
      "learning_rate": 0.00048463676179271196,
      "loss": 2.764,
      "step": 66580
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7830561399459839,
      "learning_rate": 0.00048463353771166024,
      "loss": 3.0277,
      "step": 66581
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.192084312438965,
      "learning_rate": 0.00048463031359628154,
      "loss": 3.049,
      "step": 66582
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3917784690856934,
      "learning_rate": 0.0004846270894465767,
      "loss": 3.3132,
      "step": 66583
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4732154607772827,
      "learning_rate": 0.0004846238652625462,
      "loss": 3.0812,
      "step": 66584
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.273561716079712,
      "learning_rate": 0.00048462064104419055,
      "loss": 3.1124,
      "step": 66585
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4054505825042725,
      "learning_rate": 0.0004846174167915105,
      "loss": 2.977,
      "step": 66586
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5121965408325195,
      "learning_rate": 0.00048461419250450655,
      "loss": 3.1767,
      "step": 66587
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5204143524169922,
      "learning_rate": 0.00048461096818317936,
      "loss": 2.9822,
      "step": 66588
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5807874202728271,
      "learning_rate": 0.0004846077438275295,
      "loss": 3.274,
      "step": 66589
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5730351209640503,
      "learning_rate": 0.0004846045194375576,
      "loss": 3.1711,
      "step": 66590
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5789086818695068,
      "learning_rate": 0.00048460129501326423,
      "loss": 3.2051,
      "step": 66591
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.773687481880188,
      "learning_rate": 0.00048459807055465,
      "loss": 2.8133,
      "step": 66592
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6806378364562988,
      "learning_rate": 0.00048459484606171545,
      "loss": 3.0235,
      "step": 66593
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3724949359893799,
      "learning_rate": 0.00048459162153446125,
      "loss": 2.8207,
      "step": 66594
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3834447860717773,
      "learning_rate": 0.000484588396972888,
      "loss": 2.9496,
      "step": 66595
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.546876311302185,
      "learning_rate": 0.00048458517237699625,
      "loss": 2.9064,
      "step": 66596
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5211336612701416,
      "learning_rate": 0.0004845819477467867,
      "loss": 3.048,
      "step": 66597
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9098745584487915,
      "learning_rate": 0.0004845787230822598,
      "loss": 2.8374,
      "step": 66598
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5915931463241577,
      "learning_rate": 0.00048457549838341624,
      "loss": 3.0095,
      "step": 66599
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0825772285461426,
      "learning_rate": 0.0004845722736502567,
      "loss": 2.8804,
      "step": 66600
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7579407691955566,
      "learning_rate": 0.0004845690488827816,
      "loss": 2.7897,
      "step": 66601
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.574479341506958,
      "learning_rate": 0.0004845658240809917,
      "loss": 2.9303,
      "step": 66602
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.827100992202759,
      "learning_rate": 0.00048456259924488745,
      "loss": 3.3496,
      "step": 66603
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.082291841506958,
      "learning_rate": 0.00048455937437446965,
      "loss": 2.9243,
      "step": 66604
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6884808540344238,
      "learning_rate": 0.0004845561494697387,
      "loss": 3.2264,
      "step": 66605
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9240237474441528,
      "learning_rate": 0.0004845529245306953,
      "loss": 2.9698,
      "step": 66606
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.300161361694336,
      "learning_rate": 0.00048454969955734,
      "loss": 3.1108,
      "step": 66607
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9403622150421143,
      "learning_rate": 0.0004845464745496734,
      "loss": 3.117,
      "step": 66608
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7967133522033691,
      "learning_rate": 0.0004845432495076961,
      "loss": 3.0653,
      "step": 66609
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.7303922176361084,
      "learning_rate": 0.00048454002443140893,
      "loss": 2.6491,
      "step": 66610
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.7246623039245605,
      "learning_rate": 0.00048453679932081205,
      "loss": 2.9182,
      "step": 66611
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5681217908859253,
      "learning_rate": 0.0004845335741759064,
      "loss": 3.2842,
      "step": 66612
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5655733346939087,
      "learning_rate": 0.0004845303489966925,
      "loss": 3.0545,
      "step": 66613
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1998586654663086,
      "learning_rate": 0.00048452712378317084,
      "loss": 3.1791,
      "step": 66614
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5314946174621582,
      "learning_rate": 0.00048452389853534216,
      "loss": 3.3013,
      "step": 66615
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6501710414886475,
      "learning_rate": 0.0004845206732532071,
      "loss": 2.9391,
      "step": 66616
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.090818405151367,
      "learning_rate": 0.00048451744793676603,
      "loss": 3.0288,
      "step": 66617
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.2592780590057373,
      "learning_rate": 0.0004845142225860197,
      "loss": 2.9784,
      "step": 66618
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4558346271514893,
      "learning_rate": 0.00048451099720096867,
      "loss": 3.233,
      "step": 66619
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0173676013946533,
      "learning_rate": 0.0004845077717816136,
      "loss": 3.1128,
      "step": 66620
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.902488946914673,
      "learning_rate": 0.00048450454632795507,
      "loss": 3.2324,
      "step": 66621
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6443547010421753,
      "learning_rate": 0.00048450132083999365,
      "loss": 3.1244,
      "step": 66622
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3733378648757935,
      "learning_rate": 0.00048449809531773,
      "loss": 3.0637,
      "step": 66623
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5476473569869995,
      "learning_rate": 0.00048449486976116454,
      "loss": 2.9385,
      "step": 66624
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1085493564605713,
      "learning_rate": 0.00048449164417029814,
      "loss": 3.0248,
      "step": 66625
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.161008596420288,
      "learning_rate": 0.00048448841854513116,
      "loss": 2.9499,
      "step": 66626
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.086243152618408,
      "learning_rate": 0.00048448519288566436,
      "loss": 2.8865,
      "step": 66627
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5530985593795776,
      "learning_rate": 0.0004844819671918982,
      "loss": 2.7617,
      "step": 66628
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.687273621559143,
      "learning_rate": 0.0004844787414638335,
      "loss": 3.1891,
      "step": 66629
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5581088066101074,
      "learning_rate": 0.00048447551570147057,
      "loss": 2.9798,
      "step": 66630
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8709536790847778,
      "learning_rate": 0.0004844722899048103,
      "loss": 2.8241,
      "step": 66631
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4756468534469604,
      "learning_rate": 0.0004844690640738531,
      "loss": 3.2047,
      "step": 66632
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6969977617263794,
      "learning_rate": 0.0004844658382085996,
      "loss": 2.9282,
      "step": 66633
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7770365476608276,
      "learning_rate": 0.00048446261230905037,
      "loss": 3.2202,
      "step": 66634
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3561608791351318,
      "learning_rate": 0.0004844593863752061,
      "loss": 3.1309,
      "step": 66635
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.269519329071045,
      "learning_rate": 0.0004844561604070674,
      "loss": 3.3059,
      "step": 66636
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.7450125217437744,
      "learning_rate": 0.0004844529344046347,
      "loss": 3.0392,
      "step": 66637
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.693276047706604,
      "learning_rate": 0.00048444970836790884,
      "loss": 3.2441,
      "step": 66638
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3611721992492676,
      "learning_rate": 0.0004844464822968902,
      "loss": 2.9011,
      "step": 66639
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7098325490951538,
      "learning_rate": 0.0004844432561915796,
      "loss": 2.9925,
      "step": 66640
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5463435649871826,
      "learning_rate": 0.0004844400300519774,
      "loss": 2.9451,
      "step": 66641
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6207977533340454,
      "learning_rate": 0.00048443680387808436,
      "loss": 3.0419,
      "step": 66642
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.656649112701416,
      "learning_rate": 0.00048443357766990105,
      "loss": 2.9767,
      "step": 66643
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4248645305633545,
      "learning_rate": 0.00048443035142742806,
      "loss": 3.0701,
      "step": 66644
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5777934789657593,
      "learning_rate": 0.00048442712515066593,
      "loss": 2.8177,
      "step": 66645
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.579109787940979,
      "learning_rate": 0.0004844238988396153,
      "loss": 2.9161,
      "step": 66646
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6864092350006104,
      "learning_rate": 0.00048442067249427695,
      "loss": 3.0236,
      "step": 66647
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.842684268951416,
      "learning_rate": 0.00048441744611465114,
      "loss": 2.9339,
      "step": 66648
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5264734029769897,
      "learning_rate": 0.00048441421970073874,
      "loss": 3.0286,
      "step": 66649
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0128257274627686,
      "learning_rate": 0.0004844109932525402,
      "loss": 3.061,
      "step": 66650
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7406893968582153,
      "learning_rate": 0.0004844077667700562,
      "loss": 3.001,
      "step": 66651
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.269127607345581,
      "learning_rate": 0.0004844045402532874,
      "loss": 3.0315,
      "step": 66652
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4963151216506958,
      "learning_rate": 0.00048440131370223425,
      "loss": 3.12,
      "step": 66653
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5282413959503174,
      "learning_rate": 0.00048439808711689733,
      "loss": 2.9247,
      "step": 66654
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7000795602798462,
      "learning_rate": 0.00048439486049727735,
      "loss": 3.055,
      "step": 66655
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.664446473121643,
      "learning_rate": 0.00048439163384337495,
      "loss": 2.7621,
      "step": 66656
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.922041177749634,
      "learning_rate": 0.0004843884071551906,
      "loss": 2.8376,
      "step": 66657
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6415884494781494,
      "learning_rate": 0.0004843851804327251,
      "loss": 2.9964,
      "step": 66658
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3995096683502197,
      "learning_rate": 0.0004843819536759788,
      "loss": 3.1385,
      "step": 66659
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6227343082427979,
      "learning_rate": 0.0004843787268849524,
      "loss": 3.0295,
      "step": 66660
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7815767526626587,
      "learning_rate": 0.0004843755000596466,
      "loss": 3.0326,
      "step": 66661
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.885740041732788,
      "learning_rate": 0.0004843722732000619,
      "loss": 2.6657,
      "step": 66662
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0714643001556396,
      "learning_rate": 0.00048436904630619885,
      "loss": 2.9091,
      "step": 66663
    },
    {
      "epoch": 0.87,
      "grad_norm": 4.065666198730469,
      "learning_rate": 0.0004843658193780581,
      "loss": 3.3195,
      "step": 66664
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5494102239608765,
      "learning_rate": 0.0004843625924156403,
      "loss": 3.27,
      "step": 66665
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.153258800506592,
      "learning_rate": 0.00048435936541894604,
      "loss": 2.871,
      "step": 66666
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.778748631477356,
      "learning_rate": 0.0004843561383879759,
      "loss": 3.0297,
      "step": 66667
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5637245178222656,
      "learning_rate": 0.00048435291132273045,
      "loss": 3.0693,
      "step": 66668
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4733405113220215,
      "learning_rate": 0.00048434968422321034,
      "loss": 3.1055,
      "step": 66669
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0678818225860596,
      "learning_rate": 0.000484346457089416,
      "loss": 3.0195,
      "step": 66670
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2993500232696533,
      "learning_rate": 0.0004843432299213483,
      "loss": 3.1215,
      "step": 66671
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.341493844985962,
      "learning_rate": 0.00048434000271900773,
      "loss": 2.8955,
      "step": 66672
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0590620040893555,
      "learning_rate": 0.00048433677548239486,
      "loss": 2.7311,
      "step": 66673
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5698537826538086,
      "learning_rate": 0.0004843335482115103,
      "loss": 2.8815,
      "step": 66674
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5447688102722168,
      "learning_rate": 0.00048433032090635465,
      "loss": 3.1031,
      "step": 66675
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6401020288467407,
      "learning_rate": 0.0004843270935669285,
      "loss": 2.9577,
      "step": 66676
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.020556926727295,
      "learning_rate": 0.0004843238661932325,
      "loss": 2.9445,
      "step": 66677
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6582318544387817,
      "learning_rate": 0.00048432063878526714,
      "loss": 3.0882,
      "step": 66678
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5213209390640259,
      "learning_rate": 0.0004843174113430331,
      "loss": 3.4072,
      "step": 66679
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.088245153427124,
      "learning_rate": 0.00048431418386653106,
      "loss": 2.9871,
      "step": 66680
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5559455156326294,
      "learning_rate": 0.0004843109563557615,
      "loss": 2.7951,
      "step": 66681
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.143395185470581,
      "learning_rate": 0.00048430772881072505,
      "loss": 2.7104,
      "step": 66682
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4292746782302856,
      "learning_rate": 0.0004843045012314223,
      "loss": 3.1373,
      "step": 66683
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.54513418674469,
      "learning_rate": 0.0004843012736178539,
      "loss": 2.9956,
      "step": 66684
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3920094966888428,
      "learning_rate": 0.00048429804597002036,
      "loss": 2.841,
      "step": 66685
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.440859794616699,
      "learning_rate": 0.00048429481828792236,
      "loss": 3.1884,
      "step": 66686
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.541853666305542,
      "learning_rate": 0.00048429159057156043,
      "loss": 3.0385,
      "step": 66687
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8189196586608887,
      "learning_rate": 0.0004842883628209353,
      "loss": 3.0204,
      "step": 66688
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.101529121398926,
      "learning_rate": 0.00048428513503604744,
      "loss": 3.297,
      "step": 66689
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.2283058166503906,
      "learning_rate": 0.00048428190721689745,
      "loss": 2.7586,
      "step": 66690
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5519514083862305,
      "learning_rate": 0.000484278679363486,
      "loss": 2.9988,
      "step": 66691
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.7851011753082275,
      "learning_rate": 0.00048427545147581376,
      "loss": 3.1096,
      "step": 66692
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.6723008155822754,
      "learning_rate": 0.00048427222355388114,
      "loss": 2.7536,
      "step": 66693
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.006192922592163,
      "learning_rate": 0.0004842689955976888,
      "loss": 3.0379,
      "step": 66694
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5111541748046875,
      "learning_rate": 0.00048426576760723743,
      "loss": 3.0113,
      "step": 66695
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.424165964126587,
      "learning_rate": 0.00048426253958252765,
      "loss": 2.9517,
      "step": 66696
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7633771896362305,
      "learning_rate": 0.00048425931152355995,
      "loss": 3.1524,
      "step": 66697
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0614426136016846,
      "learning_rate": 0.00048425608343033487,
      "loss": 2.9929,
      "step": 66698
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4213989973068237,
      "learning_rate": 0.0004842528553028532,
      "loss": 3.2533,
      "step": 66699
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5048043727874756,
      "learning_rate": 0.0004842496271411154,
      "loss": 2.953,
      "step": 66700
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0294268131256104,
      "learning_rate": 0.0004842463989451221,
      "loss": 2.9979,
      "step": 66701
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.5458245277404785,
      "learning_rate": 0.00048424317071487396,
      "loss": 3.0227,
      "step": 66702
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.824137568473816,
      "learning_rate": 0.0004842399424503715,
      "loss": 2.8809,
      "step": 66703
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0015740394592285,
      "learning_rate": 0.0004842367141516153,
      "loss": 2.7096,
      "step": 66704
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5493255853652954,
      "learning_rate": 0.0004842334858186062,
      "loss": 3.0236,
      "step": 66705
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3837511539459229,
      "learning_rate": 0.00048423025745134447,
      "loss": 3.0731,
      "step": 66706
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5918399095535278,
      "learning_rate": 0.0004842270290498309,
      "loss": 2.9453,
      "step": 66707
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.509971022605896,
      "learning_rate": 0.000484223800614066,
      "loss": 3.008,
      "step": 66708
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5636658668518066,
      "learning_rate": 0.0004842205721440506,
      "loss": 2.9488,
      "step": 66709
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.811540961265564,
      "learning_rate": 0.0004842173436397849,
      "loss": 2.9779,
      "step": 66710
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4638386964797974,
      "learning_rate": 0.0004842141151012699,
      "loss": 2.9388,
      "step": 66711
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8578742742538452,
      "learning_rate": 0.0004842108865285059,
      "loss": 3.0466,
      "step": 66712
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7982665300369263,
      "learning_rate": 0.0004842076579214936,
      "loss": 3.0701,
      "step": 66713
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7461711168289185,
      "learning_rate": 0.0004842044292802337,
      "loss": 3.1901,
      "step": 66714
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0012261867523193,
      "learning_rate": 0.00048420120060472665,
      "loss": 2.9361,
      "step": 66715
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.140035390853882,
      "learning_rate": 0.00048419797189497315,
      "loss": 3.2438,
      "step": 66716
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6960829496383667,
      "learning_rate": 0.0004841947431509738,
      "loss": 3.02,
      "step": 66717
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.69527006149292,
      "learning_rate": 0.00048419151437272906,
      "loss": 3.0642,
      "step": 66718
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7045425176620483,
      "learning_rate": 0.0004841882855602397,
      "loss": 3.1819,
      "step": 66719
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7174125909805298,
      "learning_rate": 0.00048418505671350633,
      "loss": 2.9116,
      "step": 66720
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3388177156448364,
      "learning_rate": 0.00048418182783252945,
      "loss": 3.0578,
      "step": 66721
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.408778429031372,
      "learning_rate": 0.00048417859891730967,
      "loss": 3.0408,
      "step": 66722
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6884773969650269,
      "learning_rate": 0.0004841753699678476,
      "loss": 3.0321,
      "step": 66723
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3710018396377563,
      "learning_rate": 0.00048417214098414385,
      "loss": 3.0452,
      "step": 66724
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.546587347984314,
      "learning_rate": 0.00048416891196619905,
      "loss": 2.9353,
      "step": 66725
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3658530712127686,
      "learning_rate": 0.00048416568291401375,
      "loss": 2.8774,
      "step": 66726
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.432721734046936,
      "learning_rate": 0.0004841624538275886,
      "loss": 3.128,
      "step": 66727
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.646959662437439,
      "learning_rate": 0.0004841592247069242,
      "loss": 3.0917,
      "step": 66728
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5954519510269165,
      "learning_rate": 0.00048415599555202103,
      "loss": 2.892,
      "step": 66729
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.606926679611206,
      "learning_rate": 0.00048415276636287985,
      "loss": 3.2732,
      "step": 66730
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5405988693237305,
      "learning_rate": 0.00048414953713950113,
      "loss": 3.159,
      "step": 66731
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5385514497756958,
      "learning_rate": 0.0004841463078818857,
      "loss": 2.9884,
      "step": 66732
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.555700659751892,
      "learning_rate": 0.0004841430785900338,
      "loss": 3.1859,
      "step": 66733
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5008900165557861,
      "learning_rate": 0.0004841398492639463,
      "loss": 3.0272,
      "step": 66734
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6657416820526123,
      "learning_rate": 0.0004841366199036238,
      "loss": 3.1205,
      "step": 66735
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5921577215194702,
      "learning_rate": 0.0004841333905090667,
      "loss": 3.3106,
      "step": 66736
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.670710802078247,
      "learning_rate": 0.0004841301610802758,
      "loss": 3.0996,
      "step": 66737
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.671743631362915,
      "learning_rate": 0.00048412693161725164,
      "loss": 3.2851,
      "step": 66738
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5309059619903564,
      "learning_rate": 0.0004841237021199948,
      "loss": 3.1662,
      "step": 66739
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5354796648025513,
      "learning_rate": 0.00048412047258850587,
      "loss": 3.2495,
      "step": 66740
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.5504565238952637,
      "learning_rate": 0.0004841172430227854,
      "loss": 3.1132,
      "step": 66741
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5408741235733032,
      "learning_rate": 0.0004841140134228342,
      "loss": 3.2831,
      "step": 66742
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.572313666343689,
      "learning_rate": 0.00048411078378865263,
      "loss": 3.045,
      "step": 66743
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.189716339111328,
      "learning_rate": 0.0004841075541202414,
      "loss": 3.0492,
      "step": 66744
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.742012619972229,
      "learning_rate": 0.00048410432441760114,
      "loss": 2.866,
      "step": 66745
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8388047218322754,
      "learning_rate": 0.0004841010946807324,
      "loss": 2.9928,
      "step": 66746
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9447404146194458,
      "learning_rate": 0.00048409786490963584,
      "loss": 2.7768,
      "step": 66747
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.135793685913086,
      "learning_rate": 0.0004840946351043118,
      "loss": 3.0201,
      "step": 66748
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6882352828979492,
      "learning_rate": 0.0004840914052647613,
      "loss": 2.8383,
      "step": 66749
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7069092988967896,
      "learning_rate": 0.0004840881753909847,
      "loss": 3.1168,
      "step": 66750
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7978920936584473,
      "learning_rate": 0.0004840849454829826,
      "loss": 3.0712,
      "step": 66751
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5048080682754517,
      "learning_rate": 0.00048408171554075564,
      "loss": 2.9654,
      "step": 66752
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.634620189666748,
      "learning_rate": 0.0004840784855643044,
      "loss": 3.1374,
      "step": 66753
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6770069599151611,
      "learning_rate": 0.00048407525555362957,
      "loss": 2.9217,
      "step": 66754
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5767287015914917,
      "learning_rate": 0.0004840720255087315,
      "loss": 3.3587,
      "step": 66755
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0052011013031006,
      "learning_rate": 0.0004840687954296112,
      "loss": 2.7254,
      "step": 66756
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.6238653659820557,
      "learning_rate": 0.0004840655653162689,
      "loss": 3.1008,
      "step": 66757
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.267697334289551,
      "learning_rate": 0.00048406233516870535,
      "loss": 2.9636,
      "step": 66758
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8530014753341675,
      "learning_rate": 0.0004840591049869211,
      "loss": 3.1267,
      "step": 66759
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5014324188232422,
      "learning_rate": 0.0004840558747709169,
      "loss": 2.9958,
      "step": 66760
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9208649396896362,
      "learning_rate": 0.0004840526445206931,
      "loss": 3.0395,
      "step": 66761
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.598495364189148,
      "learning_rate": 0.0004840494142362505,
      "loss": 3.0953,
      "step": 66762
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.549451470375061,
      "learning_rate": 0.00048404618391758973,
      "loss": 3.1544,
      "step": 66763
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8174066543579102,
      "learning_rate": 0.0004840429535647112,
      "loss": 3.1568,
      "step": 66764
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.456234097480774,
      "learning_rate": 0.0004840397231776156,
      "loss": 3.3817,
      "step": 66765
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7570452690124512,
      "learning_rate": 0.0004840364927563036,
      "loss": 3.0126,
      "step": 66766
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0708279609680176,
      "learning_rate": 0.00048403326230077575,
      "loss": 3.1099,
      "step": 66767
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5209894180297852,
      "learning_rate": 0.0004840300318110325,
      "loss": 3.2715,
      "step": 66768
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.631638526916504,
      "learning_rate": 0.0004840268012870748,
      "loss": 2.8865,
      "step": 66769
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.458837628364563,
      "learning_rate": 0.0004840235707289029,
      "loss": 2.9219,
      "step": 66770
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6959139108657837,
      "learning_rate": 0.0004840203401365176,
      "loss": 3.1592,
      "step": 66771
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4613416194915771,
      "learning_rate": 0.00048401710950991947,
      "loss": 3.019,
      "step": 66772
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3745068311691284,
      "learning_rate": 0.000484013878849109,
      "loss": 2.9041,
      "step": 66773
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.07746958732605,
      "learning_rate": 0.00048401064815408693,
      "loss": 2.8837,
      "step": 66774
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7492105960845947,
      "learning_rate": 0.0004840074174248538,
      "loss": 2.976,
      "step": 66775
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6754827499389648,
      "learning_rate": 0.00048400418666141024,
      "loss": 2.9519,
      "step": 66776
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7088648080825806,
      "learning_rate": 0.00048400095586375675,
      "loss": 3.0471,
      "step": 66777
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7403923273086548,
      "learning_rate": 0.0004839977250318942,
      "loss": 2.9007,
      "step": 66778
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6688075065612793,
      "learning_rate": 0.0004839944941658228,
      "loss": 3.1487,
      "step": 66779
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7295955419540405,
      "learning_rate": 0.0004839912632655434,
      "loss": 2.9631,
      "step": 66780
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0169827938079834,
      "learning_rate": 0.00048398803233105664,
      "loss": 3.0253,
      "step": 66781
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0239880084991455,
      "learning_rate": 0.0004839848013623629,
      "loss": 2.9553,
      "step": 66782
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.560880184173584,
      "learning_rate": 0.0004839815703594629,
      "loss": 2.8926,
      "step": 66783
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.418044924736023,
      "learning_rate": 0.0004839783393223574,
      "loss": 3.1694,
      "step": 66784
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5616710186004639,
      "learning_rate": 0.0004839751082510468,
      "loss": 2.9799,
      "step": 66785
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5229442119598389,
      "learning_rate": 0.0004839718771455317,
      "loss": 3.0902,
      "step": 66786
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.231069326400757,
      "learning_rate": 0.00048396864600581285,
      "loss": 2.8698,
      "step": 66787
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4354859590530396,
      "learning_rate": 0.00048396541483189067,
      "loss": 3.0912,
      "step": 66788
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4653680324554443,
      "learning_rate": 0.0004839621836237659,
      "loss": 2.5721,
      "step": 66789
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6512904167175293,
      "learning_rate": 0.00048395895238143906,
      "loss": 2.8325,
      "step": 66790
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7411972284317017,
      "learning_rate": 0.00048395572110491075,
      "loss": 3.011,
      "step": 66791
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6902110576629639,
      "learning_rate": 0.0004839524897941816,
      "loss": 3.0462,
      "step": 66792
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5512598752975464,
      "learning_rate": 0.0004839492584492523,
      "loss": 3.1225,
      "step": 66793
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6413464546203613,
      "learning_rate": 0.0004839460270701233,
      "loss": 3.0128,
      "step": 66794
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5419225692749023,
      "learning_rate": 0.00048394279565679523,
      "loss": 3.1717,
      "step": 66795
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.778626799583435,
      "learning_rate": 0.00048393956420926873,
      "loss": 2.8718,
      "step": 66796
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.923734188079834,
      "learning_rate": 0.00048393633272754443,
      "loss": 2.8877,
      "step": 66797
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.6428678035736084,
      "learning_rate": 0.0004839331012116229,
      "loss": 3.2282,
      "step": 66798
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.044090747833252,
      "learning_rate": 0.00048392986966150467,
      "loss": 2.9916,
      "step": 66799
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.027928590774536,
      "learning_rate": 0.00048392663807719045,
      "loss": 3.0851,
      "step": 66800
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.789386749267578,
      "learning_rate": 0.0004839234064586808,
      "loss": 3.0906,
      "step": 66801
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0325279235839844,
      "learning_rate": 0.0004839201748059763,
      "loss": 2.921,
      "step": 66802
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.806667685508728,
      "learning_rate": 0.00048391694311907755,
      "loss": 3.1737,
      "step": 66803
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8983725309371948,
      "learning_rate": 0.0004839137113979852,
      "loss": 3.2688,
      "step": 66804
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9655369520187378,
      "learning_rate": 0.00048391047964269985,
      "loss": 2.7335,
      "step": 66805
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.092353582382202,
      "learning_rate": 0.0004839072478532221,
      "loss": 2.9547,
      "step": 66806
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.5231802463531494,
      "learning_rate": 0.0004839040160295524,
      "loss": 3.1629,
      "step": 66807
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.9200382232666016,
      "learning_rate": 0.00048390078417169156,
      "loss": 3.0275,
      "step": 66808
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4208147525787354,
      "learning_rate": 0.0004838975522796401,
      "loss": 2.7644,
      "step": 66809
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5500054359436035,
      "learning_rate": 0.0004838943203533985,
      "loss": 3.0586,
      "step": 66810
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6002919673919678,
      "learning_rate": 0.00048389108839296755,
      "loss": 2.9092,
      "step": 66811
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4808862209320068,
      "learning_rate": 0.00048388785639834776,
      "loss": 3.0614,
      "step": 66812
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6560423374176025,
      "learning_rate": 0.0004838846243695398,
      "loss": 3.1368,
      "step": 66813
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.584791660308838,
      "learning_rate": 0.00048388139230654415,
      "loss": 3.1442,
      "step": 66814
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4582064151763916,
      "learning_rate": 0.00048387816020936154,
      "loss": 3.076,
      "step": 66815
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7886652946472168,
      "learning_rate": 0.0004838749280779925,
      "loss": 3.1437,
      "step": 66816
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4439654350280762,
      "learning_rate": 0.00048387169591243757,
      "loss": 2.8045,
      "step": 66817
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5549944639205933,
      "learning_rate": 0.00048386846371269745,
      "loss": 3.0608,
      "step": 66818
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7749232053756714,
      "learning_rate": 0.0004838652314787727,
      "loss": 3.0086,
      "step": 66819
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.649381160736084,
      "learning_rate": 0.00048386199921066406,
      "loss": 3.1819,
      "step": 66820
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6910080909729004,
      "learning_rate": 0.00048385876690837186,
      "loss": 2.9218,
      "step": 66821
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6425808668136597,
      "learning_rate": 0.00048385553457189694,
      "loss": 2.8636,
      "step": 66822
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7582744359970093,
      "learning_rate": 0.00048385230220123973,
      "loss": 3.0572,
      "step": 66823
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1229398250579834,
      "learning_rate": 0.0004838490697964009,
      "loss": 3.0644,
      "step": 66824
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8646272420883179,
      "learning_rate": 0.0004838458373573811,
      "loss": 2.8862,
      "step": 66825
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.403088092803955,
      "learning_rate": 0.00048384260488418084,
      "loss": 2.6513,
      "step": 66826
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7437924146652222,
      "learning_rate": 0.0004838393723768009,
      "loss": 2.9564,
      "step": 66827
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6297274827957153,
      "learning_rate": 0.0004838361398352416,
      "loss": 3.051,
      "step": 66828
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3376392126083374,
      "learning_rate": 0.00048383290725950376,
      "loss": 2.9689,
      "step": 66829
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6702649593353271,
      "learning_rate": 0.00048382967464958795,
      "loss": 3.0963,
      "step": 66830
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3320462703704834,
      "learning_rate": 0.0004838264420054946,
      "loss": 2.8081,
      "step": 66831
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5398883819580078,
      "learning_rate": 0.0004838232093272246,
      "loss": 3.0591,
      "step": 66832
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6215893030166626,
      "learning_rate": 0.00048381997661477833,
      "loss": 3.238,
      "step": 66833
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.2126832008361816,
      "learning_rate": 0.0004838167438681565,
      "loss": 2.8938,
      "step": 66834
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.142432928085327,
      "learning_rate": 0.00048381351108735954,
      "loss": 2.8135,
      "step": 66835
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8091896772384644,
      "learning_rate": 0.0004838102782723883,
      "loss": 3.2489,
      "step": 66836
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6091426610946655,
      "learning_rate": 0.0004838070454232432,
      "loss": 3.3469,
      "step": 66837
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.703687310218811,
      "learning_rate": 0.0004838038125399249,
      "loss": 2.9149,
      "step": 66838
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5477595329284668,
      "learning_rate": 0.00048380057962243403,
      "loss": 2.9687,
      "step": 66839
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5732955932617188,
      "learning_rate": 0.0004837973466707712,
      "loss": 2.9441,
      "step": 66840
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5578612089157104,
      "learning_rate": 0.00048379411368493685,
      "loss": 3.0401,
      "step": 66841
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6755971908569336,
      "learning_rate": 0.0004837908806649318,
      "loss": 2.9092,
      "step": 66842
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6431487798690796,
      "learning_rate": 0.0004837876476107566,
      "loss": 2.9541,
      "step": 66843
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.0574538707733154,
      "learning_rate": 0.00048378441452241175,
      "loss": 2.884,
      "step": 66844
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.5662167072296143,
      "learning_rate": 0.0004837811813998979,
      "loss": 3.1108,
      "step": 66845
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6111164093017578,
      "learning_rate": 0.0004837779482432156,
      "loss": 3.1364,
      "step": 66846
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.093851089477539,
      "learning_rate": 0.0004837747150523656,
      "loss": 3.029,
      "step": 66847
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.394282579421997,
      "learning_rate": 0.0004837714818273484,
      "loss": 3.0387,
      "step": 66848
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9747189283370972,
      "learning_rate": 0.0004837682485681646,
      "loss": 3.1613,
      "step": 66849
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9635313749313354,
      "learning_rate": 0.00048376501527481477,
      "loss": 3.054,
      "step": 66850
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.27728271484375,
      "learning_rate": 0.0004837617819472997,
      "loss": 3.1009,
      "step": 66851
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.147146701812744,
      "learning_rate": 0.0004837585485856197,
      "loss": 3.0763,
      "step": 66852
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0225181579589844,
      "learning_rate": 0.00048375531518977553,
      "loss": 2.8091,
      "step": 66853
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.2458481788635254,
      "learning_rate": 0.0004837520817597678,
      "loss": 2.8433,
      "step": 66854
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7495229244232178,
      "learning_rate": 0.0004837488482955971,
      "loss": 3.1479,
      "step": 66855
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.398003101348877,
      "learning_rate": 0.00048374561479726404,
      "loss": 3.0017,
      "step": 66856
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.8433470726013184,
      "learning_rate": 0.00048374238126476916,
      "loss": 2.8904,
      "step": 66857
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.9149208068847656,
      "learning_rate": 0.0004837391476981131,
      "loss": 2.8987,
      "step": 66858
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6517927646636963,
      "learning_rate": 0.00048373591409729645,
      "loss": 2.9485,
      "step": 66859
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.410670042037964,
      "learning_rate": 0.00048373268046231986,
      "loss": 3.1156,
      "step": 66860
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.0113067626953125,
      "learning_rate": 0.00048372944679318396,
      "loss": 2.8889,
      "step": 66861
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3231875896453857,
      "learning_rate": 0.00048372621308988924,
      "loss": 2.9981,
      "step": 66862
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.559877634048462,
      "learning_rate": 0.00048372297935243635,
      "loss": 3.0314,
      "step": 66863
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9608484506607056,
      "learning_rate": 0.00048371974558082583,
      "loss": 3.0879,
      "step": 66864
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.135434627532959,
      "learning_rate": 0.00048371651177505834,
      "loss": 3.1802,
      "step": 66865
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9746123552322388,
      "learning_rate": 0.0004837132779351346,
      "loss": 3.2217,
      "step": 66866
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7115658521652222,
      "learning_rate": 0.000483710044061055,
      "loss": 3.1736,
      "step": 66867
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.607959270477295,
      "learning_rate": 0.0004837068101528202,
      "loss": 3.2411,
      "step": 66868
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.0066206455230713,
      "learning_rate": 0.00048370357621043097,
      "loss": 2.9214,
      "step": 66869
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.3586673736572266,
      "learning_rate": 0.00048370034223388765,
      "loss": 3.1911,
      "step": 66870
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.656579852104187,
      "learning_rate": 0.00048369710822319104,
      "loss": 3.0456,
      "step": 66871
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9718375205993652,
      "learning_rate": 0.00048369387417834163,
      "loss": 3.0444,
      "step": 66872
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5499393939971924,
      "learning_rate": 0.0004836906400993401,
      "loss": 3.1702,
      "step": 66873
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0547871589660645,
      "learning_rate": 0.00048368740598618695,
      "loss": 2.8796,
      "step": 66874
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.9322142601013184,
      "learning_rate": 0.000483684171838883,
      "loss": 3.1717,
      "step": 66875
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6982115507125854,
      "learning_rate": 0.0004836809376574285,
      "loss": 3.0358,
      "step": 66876
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6274582147598267,
      "learning_rate": 0.00048367770344182433,
      "loss": 3.0905,
      "step": 66877
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1824405193328857,
      "learning_rate": 0.000483674469192071,
      "loss": 3.1182,
      "step": 66878
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8088345527648926,
      "learning_rate": 0.0004836712349081691,
      "loss": 2.9987,
      "step": 66879
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0056140422821045,
      "learning_rate": 0.0004836680005901193,
      "loss": 2.9562,
      "step": 66880
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5325195789337158,
      "learning_rate": 0.0004836647662379222,
      "loss": 3.0741,
      "step": 66881
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1920485496520996,
      "learning_rate": 0.0004836615318515782,
      "loss": 3.083,
      "step": 66882
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.029494047164917,
      "learning_rate": 0.00048365829743108815,
      "loss": 3.0426,
      "step": 66883
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.093780517578125,
      "learning_rate": 0.00048365506297645255,
      "loss": 3.2165,
      "step": 66884
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5854655504226685,
      "learning_rate": 0.000483651828487672,
      "loss": 3.1339,
      "step": 66885
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8521429300308228,
      "learning_rate": 0.0004836485939647471,
      "loss": 3.2087,
      "step": 66886
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8372769355773926,
      "learning_rate": 0.0004836453594076785,
      "loss": 2.8527,
      "step": 66887
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.397186279296875,
      "learning_rate": 0.00048364212481646673,
      "loss": 3.0898,
      "step": 66888
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7824369668960571,
      "learning_rate": 0.0004836388901911124,
      "loss": 2.9622,
      "step": 66889
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.554997444152832,
      "learning_rate": 0.00048363565553161613,
      "loss": 2.8944,
      "step": 66890
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.9980766773223877,
      "learning_rate": 0.0004836324208379785,
      "loss": 2.9415,
      "step": 66891
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7762125730514526,
      "learning_rate": 0.00048362918611020024,
      "loss": 2.8516,
      "step": 66892
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3775413036346436,
      "learning_rate": 0.0004836259513482818,
      "loss": 2.9179,
      "step": 66893
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.821683168411255,
      "learning_rate": 0.0004836227165522238,
      "loss": 2.9912,
      "step": 66894
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.721534252166748,
      "learning_rate": 0.0004836194817220269,
      "loss": 3.0851,
      "step": 66895
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4778029918670654,
      "learning_rate": 0.00048361624685769165,
      "loss": 2.9243,
      "step": 66896
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5437718629837036,
      "learning_rate": 0.00048361301195921875,
      "loss": 3.3036,
      "step": 66897
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6706901788711548,
      "learning_rate": 0.00048360977702660866,
      "loss": 2.8184,
      "step": 66898
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.755725383758545,
      "learning_rate": 0.00048360654205986203,
      "loss": 3.0138,
      "step": 66899
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.146458387374878,
      "learning_rate": 0.00048360330705897946,
      "loss": 3.0483,
      "step": 66900
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.912071943283081,
      "learning_rate": 0.00048360007202396166,
      "loss": 3.1579,
      "step": 66901
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.175499677658081,
      "learning_rate": 0.00048359683695480904,
      "loss": 2.9094,
      "step": 66902
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.7273192405700684,
      "learning_rate": 0.00048359360185152244,
      "loss": 2.9905,
      "step": 66903
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4860957860946655,
      "learning_rate": 0.0004835903667141022,
      "loss": 2.9903,
      "step": 66904
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5891921520233154,
      "learning_rate": 0.00048358713154254905,
      "loss": 2.842,
      "step": 66905
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.689376711845398,
      "learning_rate": 0.00048358389633686367,
      "loss": 2.9778,
      "step": 66906
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.2651753425598145,
      "learning_rate": 0.0004835806610970464,
      "loss": 2.966,
      "step": 66907
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5312137603759766,
      "learning_rate": 0.0004835774258230982,
      "loss": 3.3354,
      "step": 66908
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3917754888534546,
      "learning_rate": 0.00048357419051501947,
      "loss": 2.8024,
      "step": 66909
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.6741931438446045,
      "learning_rate": 0.00048357095517281075,
      "loss": 3.0252,
      "step": 66910
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.297839641571045,
      "learning_rate": 0.0004835677197964728,
      "loss": 3.161,
      "step": 66911
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.064317226409912,
      "learning_rate": 0.0004835644843860061,
      "loss": 2.8543,
      "step": 66912
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.092681646347046,
      "learning_rate": 0.00048356124894141135,
      "loss": 2.9581,
      "step": 66913
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7184544801712036,
      "learning_rate": 0.00048355801346268906,
      "loss": 2.8978,
      "step": 66914
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5075130462646484,
      "learning_rate": 0.0004835547779498399,
      "loss": 2.9825,
      "step": 66915
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.7027037143707275,
      "learning_rate": 0.0004835515424028644,
      "loss": 3.0454,
      "step": 66916
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.6373298168182373,
      "learning_rate": 0.00048354830682176324,
      "loss": 3.1182,
      "step": 66917
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9161890745162964,
      "learning_rate": 0.00048354507120653697,
      "loss": 2.959,
      "step": 66918
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6707960367202759,
      "learning_rate": 0.0004835418355571862,
      "loss": 2.8547,
      "step": 66919
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.315861463546753,
      "learning_rate": 0.00048353859987371146,
      "loss": 3.2136,
      "step": 66920
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.536853551864624,
      "learning_rate": 0.00048353536415611363,
      "loss": 2.8366,
      "step": 66921
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5878926515579224,
      "learning_rate": 0.0004835321284043929,
      "loss": 2.9961,
      "step": 66922
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9538177251815796,
      "learning_rate": 0.0004835288926185502,
      "loss": 3.0539,
      "step": 66923
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.026401996612549,
      "learning_rate": 0.000483525656798586,
      "loss": 2.9551,
      "step": 66924
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9790494441986084,
      "learning_rate": 0.0004835224209445009,
      "loss": 3.0851,
      "step": 66925
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7411479949951172,
      "learning_rate": 0.0004835191850562955,
      "loss": 3.1481,
      "step": 66926
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.538025975227356,
      "learning_rate": 0.00048351594913397043,
      "loss": 3.1472,
      "step": 66927
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.4617278575897217,
      "learning_rate": 0.00048351271317752633,
      "loss": 3.1328,
      "step": 66928
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9622961282730103,
      "learning_rate": 0.00048350947718696374,
      "loss": 3.1232,
      "step": 66929
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7439098358154297,
      "learning_rate": 0.00048350624116228326,
      "loss": 2.9792,
      "step": 66930
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6800074577331543,
      "learning_rate": 0.00048350300510348554,
      "loss": 2.7953,
      "step": 66931
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8669602870941162,
      "learning_rate": 0.000483499769010571,
      "loss": 3.0906,
      "step": 66932
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1217305660247803,
      "learning_rate": 0.0004834965328835406,
      "loss": 3.015,
      "step": 66933
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7980320453643799,
      "learning_rate": 0.00048349329672239463,
      "loss": 2.8771,
      "step": 66934
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3207783699035645,
      "learning_rate": 0.0004834900605271337,
      "loss": 3.0507,
      "step": 66935
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7929754257202148,
      "learning_rate": 0.0004834868242977586,
      "loss": 2.8168,
      "step": 66936
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.530320405960083,
      "learning_rate": 0.0004834835880342698,
      "loss": 3.0829,
      "step": 66937
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5517287254333496,
      "learning_rate": 0.000483480351736668,
      "loss": 2.8996,
      "step": 66938
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6498295068740845,
      "learning_rate": 0.0004834771154049537,
      "loss": 3.0234,
      "step": 66939
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9518041610717773,
      "learning_rate": 0.00048347387903912756,
      "loss": 3.009,
      "step": 66940
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6274168491363525,
      "learning_rate": 0.00048347064263919013,
      "loss": 2.9253,
      "step": 66941
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1899046897888184,
      "learning_rate": 0.0004834674062051421,
      "loss": 3.113,
      "step": 66942
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8327723741531372,
      "learning_rate": 0.000483464169736984,
      "loss": 2.9697,
      "step": 66943
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.465584397315979,
      "learning_rate": 0.0004834609332347163,
      "loss": 3.1037,
      "step": 66944
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.623548984527588,
      "learning_rate": 0.00048345769669833993,
      "loss": 2.8919,
      "step": 66945
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.877234697341919,
      "learning_rate": 0.0004834544601278553,
      "loss": 3.2287,
      "step": 66946
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.466964602470398,
      "learning_rate": 0.0004834512235232629,
      "loss": 3.0781,
      "step": 66947
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4837841987609863,
      "learning_rate": 0.00048344798688456357,
      "loss": 3.0568,
      "step": 66948
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.571205735206604,
      "learning_rate": 0.00048344475021175776,
      "loss": 3.047,
      "step": 66949
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1579132080078125,
      "learning_rate": 0.0004834415135048461,
      "loss": 3.1048,
      "step": 66950
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8709897994995117,
      "learning_rate": 0.0004834382767638292,
      "loss": 3.1158,
      "step": 66951
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4128867387771606,
      "learning_rate": 0.00048343503998870765,
      "loss": 3.0483,
      "step": 66952
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7185382843017578,
      "learning_rate": 0.00048343180317948206,
      "loss": 3.0924,
      "step": 66953
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6015610694885254,
      "learning_rate": 0.0004834285663361531,
      "loss": 3.1129,
      "step": 66954
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5751276016235352,
      "learning_rate": 0.00048342532945872126,
      "loss": 2.9253,
      "step": 66955
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.676926612854004,
      "learning_rate": 0.00048342209254718715,
      "loss": 2.9767,
      "step": 66956
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.893310308456421,
      "learning_rate": 0.00048341885560155147,
      "loss": 2.983,
      "step": 66957
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.154250144958496,
      "learning_rate": 0.0004834156186218148,
      "loss": 3.1752,
      "step": 66958
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.6068975925445557,
      "learning_rate": 0.0004834123816079776,
      "loss": 2.9391,
      "step": 66959
    },
    {
      "epoch": 0.87,
      "grad_norm": 7.286377429962158,
      "learning_rate": 0.0004834091445600406,
      "loss": 3.0467,
      "step": 66960
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.088524341583252,
      "learning_rate": 0.00048340590747800443,
      "loss": 2.9287,
      "step": 66961
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5977526903152466,
      "learning_rate": 0.00048340267036186966,
      "loss": 3.1568,
      "step": 66962
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.94994056224823,
      "learning_rate": 0.0004833994332116368,
      "loss": 2.8729,
      "step": 66963
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5546362400054932,
      "learning_rate": 0.00048339619602730656,
      "loss": 3.0753,
      "step": 66964
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7403820753097534,
      "learning_rate": 0.00048339295880887943,
      "loss": 3.0228,
      "step": 66965
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6570806503295898,
      "learning_rate": 0.0004833897215563562,
      "loss": 3.1215,
      "step": 66966
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.826546311378479,
      "learning_rate": 0.0004833864842697373,
      "loss": 3.2345,
      "step": 66967
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6434860229492188,
      "learning_rate": 0.0004833832469490234,
      "loss": 3.0349,
      "step": 66968
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.729502558708191,
      "learning_rate": 0.0004833800095942151,
      "loss": 3.0776,
      "step": 66969
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5183666944503784,
      "learning_rate": 0.00048337677220531303,
      "loss": 2.9185,
      "step": 66970
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5671181678771973,
      "learning_rate": 0.0004833735347823177,
      "loss": 2.9379,
      "step": 66971
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.7238609790802,
      "learning_rate": 0.0004833702973252297,
      "loss": 3.0146,
      "step": 66972
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.4206089973449707,
      "learning_rate": 0.0004833670598340499,
      "loss": 3.0112,
      "step": 66973
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5284929275512695,
      "learning_rate": 0.0004833638223087786,
      "loss": 2.987,
      "step": 66974
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8629833459854126,
      "learning_rate": 0.0004833605847494164,
      "loss": 3.2282,
      "step": 66975
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8714030981063843,
      "learning_rate": 0.0004833573471559641,
      "loss": 2.7097,
      "step": 66976
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0717830657958984,
      "learning_rate": 0.0004833541095284222,
      "loss": 3.1133,
      "step": 66977
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5994317531585693,
      "learning_rate": 0.0004833508718667913,
      "loss": 3.0931,
      "step": 66978
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8834725618362427,
      "learning_rate": 0.0004833476341710721,
      "loss": 2.9963,
      "step": 66979
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5438326597213745,
      "learning_rate": 0.00048334439644126504,
      "loss": 3.0414,
      "step": 66980
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.584193229675293,
      "learning_rate": 0.00048334115867737076,
      "loss": 2.7916,
      "step": 66981
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3337699174880981,
      "learning_rate": 0.00048333792087938997,
      "loss": 2.9741,
      "step": 66982
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.194145441055298,
      "learning_rate": 0.00048333468304732316,
      "loss": 3.1031,
      "step": 66983
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3560142517089844,
      "learning_rate": 0.00048333144518117094,
      "loss": 2.9737,
      "step": 66984
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6775859594345093,
      "learning_rate": 0.00048332820728093394,
      "loss": 3.2447,
      "step": 66985
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.457366943359375,
      "learning_rate": 0.0004833249693466129,
      "loss": 2.8059,
      "step": 66986
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.051347494125366,
      "learning_rate": 0.00048332173137820815,
      "loss": 3.0972,
      "step": 66987
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3499906063079834,
      "learning_rate": 0.00048331849337572043,
      "loss": 3.0992,
      "step": 66988
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5156644582748413,
      "learning_rate": 0.0004833152553391505,
      "loss": 3.1374,
      "step": 66989
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.111091375350952,
      "learning_rate": 0.0004833120172684986,
      "loss": 3.0478,
      "step": 66990
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.571860671043396,
      "learning_rate": 0.0004833087791637656,
      "loss": 3.3645,
      "step": 66991
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5219612121582031,
      "learning_rate": 0.0004833055410249521,
      "loss": 3.0032,
      "step": 66992
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7527754306793213,
      "learning_rate": 0.0004833023028520586,
      "loss": 3.2089,
      "step": 66993
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4135442972183228,
      "learning_rate": 0.0004832990646450857,
      "loss": 2.9693,
      "step": 66994
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.865409255027771,
      "learning_rate": 0.00048329582640403414,
      "loss": 2.8973,
      "step": 66995
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.422123432159424,
      "learning_rate": 0.0004832925881289043,
      "loss": 2.8798,
      "step": 66996
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5265554189682007,
      "learning_rate": 0.000483289349819697,
      "loss": 2.7539,
      "step": 66997
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6015704870224,
      "learning_rate": 0.00048328611147641277,
      "loss": 2.8047,
      "step": 66998
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.857730507850647,
      "learning_rate": 0.0004832828730990522,
      "loss": 3.0711,
      "step": 66999
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.536417841911316,
      "learning_rate": 0.0004832796346876158,
      "loss": 3.2526,
      "step": 67000
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.676648497581482,
      "learning_rate": 0.00048327639624210427,
      "loss": 3.0135,
      "step": 67001
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6109821796417236,
      "learning_rate": 0.0004832731577625182,
      "loss": 3.1169,
      "step": 67002
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5617600679397583,
      "learning_rate": 0.0004832699192488582,
      "loss": 3.2135,
      "step": 67003
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.380967378616333,
      "learning_rate": 0.00048326668070112485,
      "loss": 3.0061,
      "step": 67004
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.600726842880249,
      "learning_rate": 0.0004832634421193188,
      "loss": 2.893,
      "step": 67005
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.645324945449829,
      "learning_rate": 0.00048326020350344067,
      "loss": 3.1794,
      "step": 67006
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3140606880187988,
      "learning_rate": 0.000483256964853491,
      "loss": 3.1835,
      "step": 67007
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7905941009521484,
      "learning_rate": 0.00048325372616947026,
      "loss": 2.9774,
      "step": 67008
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.636214017868042,
      "learning_rate": 0.00048325048745137935,
      "loss": 2.8934,
      "step": 67009
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.172024965286255,
      "learning_rate": 0.0004832472486992186,
      "loss": 3.1053,
      "step": 67010
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9308010339736938,
      "learning_rate": 0.00048324400991298875,
      "loss": 2.9099,
      "step": 67011
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5257647037506104,
      "learning_rate": 0.0004832407710926904,
      "loss": 2.8913,
      "step": 67012
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.180772304534912,
      "learning_rate": 0.00048323753223832413,
      "loss": 3.0434,
      "step": 67013
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9833705425262451,
      "learning_rate": 0.0004832342933498905,
      "loss": 3.1155,
      "step": 67014
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.375594139099121,
      "learning_rate": 0.0004832310544273903,
      "loss": 2.9297,
      "step": 67015
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7241853475570679,
      "learning_rate": 0.00048322781547082387,
      "loss": 3.311,
      "step": 67016
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9158210754394531,
      "learning_rate": 0.0004832245764801919,
      "loss": 2.9205,
      "step": 67017
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5562763214111328,
      "learning_rate": 0.0004832213374554951,
      "loss": 3.0122,
      "step": 67018
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6701796054840088,
      "learning_rate": 0.00048321809839673395,
      "loss": 2.8267,
      "step": 67019
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8733786344528198,
      "learning_rate": 0.0004832148593039091,
      "loss": 3.2363,
      "step": 67020
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5759944915771484,
      "learning_rate": 0.00048321162017702117,
      "loss": 3.0124,
      "step": 67021
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.55220365524292,
      "learning_rate": 0.0004832083810160708,
      "loss": 2.7859,
      "step": 67022
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6792850494384766,
      "learning_rate": 0.00048320514182105843,
      "loss": 3.0004,
      "step": 67023
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4597827196121216,
      "learning_rate": 0.00048320190259198477,
      "loss": 2.974,
      "step": 67024
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.997794270515442,
      "learning_rate": 0.0004831986633288505,
      "loss": 2.7338,
      "step": 67025
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8512481451034546,
      "learning_rate": 0.00048319542403165604,
      "loss": 2.8682,
      "step": 67026
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5760055780410767,
      "learning_rate": 0.0004831921847004021,
      "loss": 2.8542,
      "step": 67027
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.612699270248413,
      "learning_rate": 0.0004831889453350894,
      "loss": 2.9345,
      "step": 67028
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4360111951828003,
      "learning_rate": 0.00048318570593571837,
      "loss": 3.2839,
      "step": 67029
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9767757654190063,
      "learning_rate": 0.00048318246650228957,
      "loss": 3.2229,
      "step": 67030
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7324426174163818,
      "learning_rate": 0.0004831792270348038,
      "loss": 3.1457,
      "step": 67031
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5401586294174194,
      "learning_rate": 0.0004831759875332615,
      "loss": 2.8199,
      "step": 67032
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7925105094909668,
      "learning_rate": 0.00048317274799766324,
      "loss": 3.1133,
      "step": 67033
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6839911937713623,
      "learning_rate": 0.00048316950842800985,
      "loss": 2.9363,
      "step": 67034
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9235546588897705,
      "learning_rate": 0.0004831662688243018,
      "loss": 2.9014,
      "step": 67035
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4458611011505127,
      "learning_rate": 0.0004831630291865396,
      "loss": 3.1044,
      "step": 67036
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7369329929351807,
      "learning_rate": 0.0004831597895147239,
      "loss": 3.1177,
      "step": 67037
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5540189743041992,
      "learning_rate": 0.0004831565498088554,
      "loss": 3.1596,
      "step": 67038
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6173973083496094,
      "learning_rate": 0.00048315331006893467,
      "loss": 2.9334,
      "step": 67039
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.792721152305603,
      "learning_rate": 0.00048315007029496224,
      "loss": 2.8099,
      "step": 67040
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5561490058898926,
      "learning_rate": 0.00048314683048693877,
      "loss": 2.9886,
      "step": 67041
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8596904277801514,
      "learning_rate": 0.0004831435906448648,
      "loss": 3.0382,
      "step": 67042
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4847277402877808,
      "learning_rate": 0.00048314035076874105,
      "loss": 2.9376,
      "step": 67043
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.593921184539795,
      "learning_rate": 0.000483137110858568,
      "loss": 2.8657,
      "step": 67044
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8142555952072144,
      "learning_rate": 0.00048313387091434635,
      "loss": 3.0731,
      "step": 67045
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.2592897415161133,
      "learning_rate": 0.0004831306309360766,
      "loss": 2.8895,
      "step": 67046
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6586743593215942,
      "learning_rate": 0.0004831273909237594,
      "loss": 3.1438,
      "step": 67047
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5240306854248047,
      "learning_rate": 0.00048312415087739546,
      "loss": 2.8749,
      "step": 67048
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.987215518951416,
      "learning_rate": 0.0004831209107969852,
      "loss": 2.9142,
      "step": 67049
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.5027475357055664,
      "learning_rate": 0.0004831176706825294,
      "loss": 3.2777,
      "step": 67050
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.624571681022644,
      "learning_rate": 0.00048311443053402844,
      "loss": 2.9566,
      "step": 67051
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7449806928634644,
      "learning_rate": 0.00048311119035148315,
      "loss": 2.9876,
      "step": 67052
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.2206099033355713,
      "learning_rate": 0.000483107950134894,
      "loss": 3.2183,
      "step": 67053
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.649872064590454,
      "learning_rate": 0.00048310470988426155,
      "loss": 3.147,
      "step": 67054
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7378568649291992,
      "learning_rate": 0.0004831014695995866,
      "loss": 3.0763,
      "step": 67055
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.743288278579712,
      "learning_rate": 0.0004830982292808695,
      "loss": 3.0843,
      "step": 67056
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.726844072341919,
      "learning_rate": 0.00048309498892811116,
      "loss": 2.9518,
      "step": 67057
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.2910891771316528,
      "learning_rate": 0.00048309174854131183,
      "loss": 3.1927,
      "step": 67058
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1068482398986816,
      "learning_rate": 0.0004830885081204724,
      "loss": 3.0753,
      "step": 67059
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9828810691833496,
      "learning_rate": 0.0004830852676655933,
      "loss": 3.0685,
      "step": 67060
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7850452661514282,
      "learning_rate": 0.0004830820271766752,
      "loss": 2.9945,
      "step": 67061
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6436866521835327,
      "learning_rate": 0.00048307878665371874,
      "loss": 2.9052,
      "step": 67062
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3918120861053467,
      "learning_rate": 0.00048307554609672445,
      "loss": 2.9935,
      "step": 67063
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5860227346420288,
      "learning_rate": 0.000483072305505693,
      "loss": 3.1987,
      "step": 67064
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3252460956573486,
      "learning_rate": 0.00048306906488062483,
      "loss": 3.0107,
      "step": 67065
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6064684391021729,
      "learning_rate": 0.00048306582422152076,
      "loss": 3.1123,
      "step": 67066
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9792002439498901,
      "learning_rate": 0.00048306258352838135,
      "loss": 2.7785,
      "step": 67067
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5779386758804321,
      "learning_rate": 0.00048305934280120706,
      "loss": 3.0794,
      "step": 67068
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.815232515335083,
      "learning_rate": 0.0004830561020399986,
      "loss": 3.0744,
      "step": 67069
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0989186763763428,
      "learning_rate": 0.0004830528612447566,
      "loss": 2.8087,
      "step": 67070
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.8269307613372803,
      "learning_rate": 0.00048304962041548157,
      "loss": 3.1229,
      "step": 67071
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7402069568634033,
      "learning_rate": 0.0004830463795521742,
      "loss": 3.2649,
      "step": 67072
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9127790927886963,
      "learning_rate": 0.000483043138654835,
      "loss": 2.8035,
      "step": 67073
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9188776016235352,
      "learning_rate": 0.0004830398977234647,
      "loss": 2.9427,
      "step": 67074
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.409144401550293,
      "learning_rate": 0.0004830366567580637,
      "loss": 3.2387,
      "step": 67075
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.000258684158325,
      "learning_rate": 0.0004830334157586329,
      "loss": 3.0159,
      "step": 67076
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.117774486541748,
      "learning_rate": 0.0004830301747251726,
      "loss": 3.0389,
      "step": 67077
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.9999399185180664,
      "learning_rate": 0.0004830269336576835,
      "loss": 2.9604,
      "step": 67078
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.436810255050659,
      "learning_rate": 0.00048302369255616635,
      "loss": 3.1483,
      "step": 67079
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5450787544250488,
      "learning_rate": 0.0004830204514206216,
      "loss": 3.268,
      "step": 67080
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.148460865020752,
      "learning_rate": 0.00048301721025104994,
      "loss": 3.2162,
      "step": 67081
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.155099391937256,
      "learning_rate": 0.0004830139690474519,
      "loss": 3.1569,
      "step": 67082
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3647737503051758,
      "learning_rate": 0.000483010727809828,
      "loss": 3.0182,
      "step": 67083
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.959821105003357,
      "learning_rate": 0.00048300748653817903,
      "loss": 2.9448,
      "step": 67084
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8849749565124512,
      "learning_rate": 0.00048300424523250557,
      "loss": 3.2406,
      "step": 67085
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6086292266845703,
      "learning_rate": 0.00048300100389280816,
      "loss": 3.0519,
      "step": 67086
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4922630786895752,
      "learning_rate": 0.0004829977625190873,
      "loss": 3.1409,
      "step": 67087
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.530206322669983,
      "learning_rate": 0.00048299452111134385,
      "loss": 3.369,
      "step": 67088
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7342227697372437,
      "learning_rate": 0.0004829912796695782,
      "loss": 2.8342,
      "step": 67089
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4445743560791016,
      "learning_rate": 0.0004829880381937909,
      "loss": 3.0117,
      "step": 67090
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.571643590927124,
      "learning_rate": 0.0004829847966839829,
      "loss": 2.8833,
      "step": 67091
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.538663625717163,
      "learning_rate": 0.0004829815551401544,
      "loss": 2.9995,
      "step": 67092
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5176081657409668,
      "learning_rate": 0.0004829783135623062,
      "loss": 2.8772,
      "step": 67093
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4926871061325073,
      "learning_rate": 0.00048297507195043884,
      "loss": 2.8811,
      "step": 67094
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5852712392807007,
      "learning_rate": 0.0004829718303045531,
      "loss": 3.197,
      "step": 67095
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3034712076187134,
      "learning_rate": 0.0004829685886246493,
      "loss": 2.8539,
      "step": 67096
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.411208987236023,
      "learning_rate": 0.00048296534691072825,
      "loss": 3.0477,
      "step": 67097
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4249762296676636,
      "learning_rate": 0.00048296210516279056,
      "loss": 2.9778,
      "step": 67098
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4160915613174438,
      "learning_rate": 0.0004829588633808366,
      "loss": 3.097,
      "step": 67099
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.47319495677948,
      "learning_rate": 0.0004829556215648672,
      "loss": 3.2531,
      "step": 67100
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.531223177909851,
      "learning_rate": 0.000482952379714883,
      "loss": 2.8906,
      "step": 67101
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5020310878753662,
      "learning_rate": 0.00048294913783088444,
      "loss": 2.9598,
      "step": 67102
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7756248712539673,
      "learning_rate": 0.00048294589591287213,
      "loss": 2.6881,
      "step": 67103
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1935582160949707,
      "learning_rate": 0.0004829426539608467,
      "loss": 2.8723,
      "step": 67104
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5748587846755981,
      "learning_rate": 0.00048293941197480886,
      "loss": 2.8968,
      "step": 67105
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.271514415740967,
      "learning_rate": 0.0004829361699547591,
      "loss": 2.9929,
      "step": 67106
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7553777694702148,
      "learning_rate": 0.0004829329279006981,
      "loss": 3.0754,
      "step": 67107
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.3970255851745605,
      "learning_rate": 0.0004829296858126263,
      "loss": 2.7523,
      "step": 67108
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1131186485290527,
      "learning_rate": 0.00048292644369054454,
      "loss": 3.0815,
      "step": 67109
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9987043142318726,
      "learning_rate": 0.0004829232015344532,
      "loss": 2.9438,
      "step": 67110
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.208423137664795,
      "learning_rate": 0.0004829199593443531,
      "loss": 3.0159,
      "step": 67111
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.771679401397705,
      "learning_rate": 0.00048291671712024454,
      "loss": 2.9416,
      "step": 67112
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.2918121814727783,
      "learning_rate": 0.0004829134748621285,
      "loss": 2.9984,
      "step": 67113
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4805654287338257,
      "learning_rate": 0.0004829102325700053,
      "loss": 3.2023,
      "step": 67114
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.63996160030365,
      "learning_rate": 0.0004829069902438757,
      "loss": 2.842,
      "step": 67115
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9401715993881226,
      "learning_rate": 0.0004829037478837402,
      "loss": 2.7455,
      "step": 67116
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.86585533618927,
      "learning_rate": 0.0004829005054895994,
      "loss": 3.1147,
      "step": 67117
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5063526630401611,
      "learning_rate": 0.000482897263061454,
      "loss": 3.1674,
      "step": 67118
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5253140926361084,
      "learning_rate": 0.0004828940205993045,
      "loss": 3.1225,
      "step": 67119
    },
    {
      "epoch": 0.87,
      "grad_norm": 5.455012798309326,
      "learning_rate": 0.00048289077810315155,
      "loss": 2.6854,
      "step": 67120
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1379950046539307,
      "learning_rate": 0.0004828875355729958,
      "loss": 3.0773,
      "step": 67121
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7150099277496338,
      "learning_rate": 0.00048288429300883784,
      "loss": 2.9064,
      "step": 67122
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.4382996559143066,
      "learning_rate": 0.00048288105041067813,
      "loss": 3.1747,
      "step": 67123
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.4948973655700684,
      "learning_rate": 0.00048287780777851746,
      "loss": 3.1744,
      "step": 67124
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.88731050491333,
      "learning_rate": 0.00048287456511235627,
      "loss": 3.1458,
      "step": 67125
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3916465044021606,
      "learning_rate": 0.0004828713224121953,
      "loss": 3.1057,
      "step": 67126
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.943625569343567,
      "learning_rate": 0.00048286807967803507,
      "loss": 3.2265,
      "step": 67127
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.28189754486084,
      "learning_rate": 0.0004828648369098763,
      "loss": 3.3399,
      "step": 67128
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7086048126220703,
      "learning_rate": 0.00048286159410771936,
      "loss": 3.1596,
      "step": 67129
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3570517301559448,
      "learning_rate": 0.0004828583512715651,
      "loss": 3.0,
      "step": 67130
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1260783672332764,
      "learning_rate": 0.000482855108401414,
      "loss": 2.9181,
      "step": 67131
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.2635087966918945,
      "learning_rate": 0.00048285186549726664,
      "loss": 3.0742,
      "step": 67132
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8291596174240112,
      "learning_rate": 0.0004828486225591238,
      "loss": 3.0023,
      "step": 67133
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8068946599960327,
      "learning_rate": 0.0004828453795869859,
      "loss": 2.9563,
      "step": 67134
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.583176612854004,
      "learning_rate": 0.0004828421365808535,
      "loss": 3.0078,
      "step": 67135
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7919127941131592,
      "learning_rate": 0.00048283889354072733,
      "loss": 2.8968,
      "step": 67136
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5607707500457764,
      "learning_rate": 0.000482835650466608,
      "loss": 3.0286,
      "step": 67137
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.5438144207000732,
      "learning_rate": 0.00048283240735849604,
      "loss": 3.0203,
      "step": 67138
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9040321111679077,
      "learning_rate": 0.0004828291642163921,
      "loss": 3.1308,
      "step": 67139
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6581618785858154,
      "learning_rate": 0.0004828259210402968,
      "loss": 2.725,
      "step": 67140
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7208304405212402,
      "learning_rate": 0.00048282267783021063,
      "loss": 3.0178,
      "step": 67141
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4074612855911255,
      "learning_rate": 0.00048281943458613433,
      "loss": 3.1276,
      "step": 67142
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5911160707473755,
      "learning_rate": 0.0004828161913080685,
      "loss": 3.0735,
      "step": 67143
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8160392045974731,
      "learning_rate": 0.00048281294799601356,
      "loss": 3.1362,
      "step": 67144
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4618638753890991,
      "learning_rate": 0.0004828097046499702,
      "loss": 3.0002,
      "step": 67145
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5333033800125122,
      "learning_rate": 0.0004828064612699393,
      "loss": 3.1433,
      "step": 67146
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4435616731643677,
      "learning_rate": 0.00048280321785592104,
      "loss": 3.0641,
      "step": 67147
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6040658950805664,
      "learning_rate": 0.0004827999744079163,
      "loss": 2.9061,
      "step": 67148
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4534406661987305,
      "learning_rate": 0.00048279673092592554,
      "loss": 3.0958,
      "step": 67149
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.692819595336914,
      "learning_rate": 0.0004827934874099495,
      "loss": 2.8977,
      "step": 67150
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.473172903060913,
      "learning_rate": 0.0004827902438599886,
      "loss": 2.8422,
      "step": 67151
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4367622137069702,
      "learning_rate": 0.0004827870002760436,
      "loss": 3.1794,
      "step": 67152
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.4513864517211914,
      "learning_rate": 0.000482783756658115,
      "loss": 3.0455,
      "step": 67153
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.447357416152954,
      "learning_rate": 0.00048278051300620347,
      "loss": 2.9518,
      "step": 67154
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5664421319961548,
      "learning_rate": 0.0004827772693203096,
      "loss": 2.9598,
      "step": 67155
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6259632110595703,
      "learning_rate": 0.00048277402560043393,
      "loss": 3.1523,
      "step": 67156
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7241733074188232,
      "learning_rate": 0.0004827707818465772,
      "loss": 3.0012,
      "step": 67157
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7586846351623535,
      "learning_rate": 0.00048276753805873997,
      "loss": 3.0489,
      "step": 67158
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4963274002075195,
      "learning_rate": 0.00048276429423692264,
      "loss": 2.7853,
      "step": 67159
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6761109828948975,
      "learning_rate": 0.00048276105038112615,
      "loss": 3.1573,
      "step": 67160
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8521329164505005,
      "learning_rate": 0.0004827578064913508,
      "loss": 2.9861,
      "step": 67161
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7019262313842773,
      "learning_rate": 0.0004827545625675975,
      "loss": 3.1521,
      "step": 67162
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5778180360794067,
      "learning_rate": 0.00048275131860986653,
      "loss": 2.7766,
      "step": 67163
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.2059149742126465,
      "learning_rate": 0.00048274807461815857,
      "loss": 2.8741,
      "step": 67164
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.622115135192871,
      "learning_rate": 0.0004827448305924744,
      "loss": 3.2267,
      "step": 67165
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5623174905776978,
      "learning_rate": 0.0004827415865328145,
      "loss": 3.3072,
      "step": 67166
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5882117748260498,
      "learning_rate": 0.0004827383424391795,
      "loss": 2.7874,
      "step": 67167
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6270369291305542,
      "learning_rate": 0.00048273509831157,
      "loss": 2.9733,
      "step": 67168
    },
    {
      "epoch": 0.87,
      "grad_norm": 3.133263111114502,
      "learning_rate": 0.0004827318541499866,
      "loss": 2.9467,
      "step": 67169
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.823452353477478,
      "learning_rate": 0.0004827286099544299,
      "loss": 2.9534,
      "step": 67170
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.879420518875122,
      "learning_rate": 0.0004827253657249005,
      "loss": 3.0439,
      "step": 67171
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1470789909362793,
      "learning_rate": 0.000482722121461399,
      "loss": 3.0285,
      "step": 67172
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9477707147598267,
      "learning_rate": 0.00048271887716392595,
      "loss": 2.9324,
      "step": 67173
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7160402536392212,
      "learning_rate": 0.00048271563283248204,
      "loss": 2.9444,
      "step": 67174
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8024471998214722,
      "learning_rate": 0.00048271238846706786,
      "loss": 3.1002,
      "step": 67175
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.0370442867279053,
      "learning_rate": 0.00048270914406768405,
      "loss": 2.8114,
      "step": 67176
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5529497861862183,
      "learning_rate": 0.00048270589963433116,
      "loss": 3.0561,
      "step": 67177
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6696127653121948,
      "learning_rate": 0.00048270265516700973,
      "loss": 2.9495,
      "step": 67178
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.1200056076049805,
      "learning_rate": 0.00048269941066572046,
      "loss": 3.2267,
      "step": 67179
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6909763813018799,
      "learning_rate": 0.0004826961661304639,
      "loss": 3.0017,
      "step": 67180
    },
    {
      "epoch": 0.87,
      "grad_norm": 2.100318670272827,
      "learning_rate": 0.0004826929215612407,
      "loss": 2.9799,
      "step": 67181
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6023622751235962,
      "learning_rate": 0.00048268967695805137,
      "loss": 3.2754,
      "step": 67182
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7921864986419678,
      "learning_rate": 0.00048268643232089666,
      "loss": 3.1988,
      "step": 67183
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8480509519577026,
      "learning_rate": 0.0004826831876497771,
      "loss": 3.1229,
      "step": 67184
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.9050885438919067,
      "learning_rate": 0.0004826799429446932,
      "loss": 3.1668,
      "step": 67185
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.810311198234558,
      "learning_rate": 0.0004826766982056458,
      "loss": 2.9769,
      "step": 67186
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5485645532608032,
      "learning_rate": 0.0004826734534326352,
      "loss": 3.3128,
      "step": 67187
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5899595022201538,
      "learning_rate": 0.00048267020862566225,
      "loss": 2.9704,
      "step": 67188
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7133629322052002,
      "learning_rate": 0.00048266696378472735,
      "loss": 3.0358,
      "step": 67189
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3950560092926025,
      "learning_rate": 0.00048266371890983133,
      "loss": 3.0139,
      "step": 67190
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8648622035980225,
      "learning_rate": 0.00048266047400097464,
      "loss": 3.0337,
      "step": 67191
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.4352620840072632,
      "learning_rate": 0.0004826572290581579,
      "loss": 2.946,
      "step": 67192
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6912988424301147,
      "learning_rate": 0.00048265398408138175,
      "loss": 2.691,
      "step": 67193
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5719070434570312,
      "learning_rate": 0.0004826507390706467,
      "loss": 3.0037,
      "step": 67194
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.7214164733886719,
      "learning_rate": 0.0004826474940259536,
      "loss": 3.0363,
      "step": 67195
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.3138655424118042,
      "learning_rate": 0.0004826442489473028,
      "loss": 2.8097,
      "step": 67196
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.5823086500167847,
      "learning_rate": 0.00048264100383469495,
      "loss": 3.2376,
      "step": 67197
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.8854601383209229,
      "learning_rate": 0.0004826377586881307,
      "loss": 3.1338,
      "step": 67198
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6866765022277832,
      "learning_rate": 0.00048263451350761063,
      "loss": 3.1337,
      "step": 67199
    },
    {
      "epoch": 0.87,
      "grad_norm": 1.6425336599349976,
      "learning_rate": 0.0004826312682931354,
      "loss": 2.8174,
      "step": 67200
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.024953842163086,
      "learning_rate": 0.0004826280230447056,
      "loss": 3.2285,
      "step": 67201
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4147076606750488,
      "learning_rate": 0.00048262477776232174,
      "loss": 3.059,
      "step": 67202
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.518605351448059,
      "learning_rate": 0.00048262153244598446,
      "loss": 3.0575,
      "step": 67203
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6702423095703125,
      "learning_rate": 0.0004826182870956945,
      "loss": 3.0695,
      "step": 67204
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4922075271606445,
      "learning_rate": 0.0004826150417114522,
      "loss": 3.091,
      "step": 67205
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8613739013671875,
      "learning_rate": 0.0004826117962932584,
      "loss": 3.2084,
      "step": 67206
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8362972736358643,
      "learning_rate": 0.00048260855084111365,
      "loss": 2.9179,
      "step": 67207
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1397933959960938,
      "learning_rate": 0.00048260530535501844,
      "loss": 3.0305,
      "step": 67208
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.6415984630584717,
      "learning_rate": 0.00048260205983497356,
      "loss": 3.1814,
      "step": 67209
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.344318151473999,
      "learning_rate": 0.00048259881428097944,
      "loss": 3.0845,
      "step": 67210
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8498928546905518,
      "learning_rate": 0.0004825955686930368,
      "loss": 3.1847,
      "step": 67211
    },
    {
      "epoch": 0.88,
      "grad_norm": 4.882019996643066,
      "learning_rate": 0.0004825923230711462,
      "loss": 3.1385,
      "step": 67212
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.313538074493408,
      "learning_rate": 0.00048258907741530817,
      "loss": 3.1228,
      "step": 67213
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8814243078231812,
      "learning_rate": 0.00048258583172552336,
      "loss": 3.0331,
      "step": 67214
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.655730128288269,
      "learning_rate": 0.00048258258600179256,
      "loss": 2.7601,
      "step": 67215
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.469801187515259,
      "learning_rate": 0.0004825793402441161,
      "loss": 2.9143,
      "step": 67216
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5657073259353638,
      "learning_rate": 0.0004825760944524947,
      "loss": 2.9705,
      "step": 67217
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8801628351211548,
      "learning_rate": 0.0004825728486269289,
      "loss": 2.9942,
      "step": 67218
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7171379327774048,
      "learning_rate": 0.0004825696027674195,
      "loss": 3.2047,
      "step": 67219
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6810142993927002,
      "learning_rate": 0.00048256635687396693,
      "loss": 3.2161,
      "step": 67220
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.859474778175354,
      "learning_rate": 0.0004825631109465718,
      "loss": 3.0622,
      "step": 67221
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6031954288482666,
      "learning_rate": 0.0004825598649852347,
      "loss": 2.872,
      "step": 67222
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6148138046264648,
      "learning_rate": 0.0004825566189899563,
      "loss": 3.1363,
      "step": 67223
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9064538478851318,
      "learning_rate": 0.00048255337296073715,
      "loss": 3.1594,
      "step": 67224
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8453367948532104,
      "learning_rate": 0.00048255012689757794,
      "loss": 3.1929,
      "step": 67225
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5314300060272217,
      "learning_rate": 0.00048254688080047925,
      "loss": 2.9539,
      "step": 67226
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4052926301956177,
      "learning_rate": 0.0004825436346694415,
      "loss": 2.9337,
      "step": 67227
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6773828268051147,
      "learning_rate": 0.0004825403885044655,
      "loss": 3.054,
      "step": 67228
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.524824857711792,
      "learning_rate": 0.0004825371423055519,
      "loss": 3.0507,
      "step": 67229
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4918711185455322,
      "learning_rate": 0.00048253389607270117,
      "loss": 3.1159,
      "step": 67230
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6482183933258057,
      "learning_rate": 0.00048253064980591383,
      "loss": 3.0074,
      "step": 67231
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3397773504257202,
      "learning_rate": 0.00048252740350519075,
      "loss": 3.1386,
      "step": 67232
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9510104656219482,
      "learning_rate": 0.00048252415717053225,
      "loss": 2.9274,
      "step": 67233
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4675636291503906,
      "learning_rate": 0.0004825209108019391,
      "loss": 3.0841,
      "step": 67234
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6153196096420288,
      "learning_rate": 0.0004825176643994119,
      "loss": 3.1684,
      "step": 67235
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0388903617858887,
      "learning_rate": 0.00048251441796295116,
      "loss": 2.8189,
      "step": 67236
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4336800575256348,
      "learning_rate": 0.0004825111714925576,
      "loss": 3.3009,
      "step": 67237
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9084807634353638,
      "learning_rate": 0.00048250792498823176,
      "loss": 3.0269,
      "step": 67238
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3403905630111694,
      "learning_rate": 0.0004825046784499743,
      "loss": 3.2766,
      "step": 67239
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.184727668762207,
      "learning_rate": 0.00048250143187778563,
      "loss": 2.944,
      "step": 67240
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7795695066452026,
      "learning_rate": 0.00048249818527166665,
      "loss": 3.0046,
      "step": 67241
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6670562028884888,
      "learning_rate": 0.0004824949386316178,
      "loss": 3.2618,
      "step": 67242
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8814256191253662,
      "learning_rate": 0.00048249169195763956,
      "loss": 2.9022,
      "step": 67243
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8908565044403076,
      "learning_rate": 0.00048248844524973285,
      "loss": 3.0509,
      "step": 67244
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.552901268005371,
      "learning_rate": 0.000482485198507898,
      "loss": 2.9229,
      "step": 67245
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8490915298461914,
      "learning_rate": 0.00048248195173213564,
      "loss": 2.8103,
      "step": 67246
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6130270957946777,
      "learning_rate": 0.00048247870492244653,
      "loss": 2.9992,
      "step": 67247
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6666725873947144,
      "learning_rate": 0.0004824754580788312,
      "loss": 2.9826,
      "step": 67248
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7631137371063232,
      "learning_rate": 0.00048247221120129014,
      "loss": 2.9108,
      "step": 67249
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8211772441864014,
      "learning_rate": 0.00048246896428982415,
      "loss": 3.0641,
      "step": 67250
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.587028980255127,
      "learning_rate": 0.00048246571734443376,
      "loss": 2.8007,
      "step": 67251
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.54069983959198,
      "learning_rate": 0.0004824624703651194,
      "loss": 2.9591,
      "step": 67252
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7786753177642822,
      "learning_rate": 0.0004824592233518819,
      "loss": 3.1407,
      "step": 67253
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.652160406112671,
      "learning_rate": 0.0004824559763047218,
      "loss": 3.1604,
      "step": 67254
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7206565141677856,
      "learning_rate": 0.00048245272922363967,
      "loss": 2.7507,
      "step": 67255
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6830095052719116,
      "learning_rate": 0.0004824494821086362,
      "loss": 3.1211,
      "step": 67256
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9894503355026245,
      "learning_rate": 0.0004824462349597118,
      "loss": 2.9543,
      "step": 67257
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.319178342819214,
      "learning_rate": 0.0004824429877768674,
      "loss": 2.9733,
      "step": 67258
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8312747478485107,
      "learning_rate": 0.00048243974056010324,
      "loss": 3.1488,
      "step": 67259
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7953139543533325,
      "learning_rate": 0.00048243649330942005,
      "loss": 2.9284,
      "step": 67260
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6921815872192383,
      "learning_rate": 0.00048243324602481857,
      "loss": 2.945,
      "step": 67261
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9715148210525513,
      "learning_rate": 0.0004824299987062993,
      "loss": 2.9474,
      "step": 67262
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.921702265739441,
      "learning_rate": 0.0004824267513538629,
      "loss": 2.8299,
      "step": 67263
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.607267379760742,
      "learning_rate": 0.0004824235039675098,
      "loss": 2.7763,
      "step": 67264
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.760620594024658,
      "learning_rate": 0.0004824202565472408,
      "loss": 3.0998,
      "step": 67265
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7400983572006226,
      "learning_rate": 0.0004824170090930564,
      "loss": 3.1601,
      "step": 67266
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0485446453094482,
      "learning_rate": 0.0004824137616049573,
      "loss": 2.8637,
      "step": 67267
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3678128719329834,
      "learning_rate": 0.000482410514082944,
      "loss": 2.9281,
      "step": 67268
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4386565685272217,
      "learning_rate": 0.0004824072665270171,
      "loss": 3.277,
      "step": 67269
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4470789432525635,
      "learning_rate": 0.0004824040189371773,
      "loss": 3.1323,
      "step": 67270
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5146269798278809,
      "learning_rate": 0.0004824007713134251,
      "loss": 3.1095,
      "step": 67271
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6346722841262817,
      "learning_rate": 0.0004823975236557612,
      "loss": 3.0604,
      "step": 67272
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.7653300762176514,
      "learning_rate": 0.0004823942759641861,
      "loss": 3.0839,
      "step": 67273
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5016990900039673,
      "learning_rate": 0.0004823910282387006,
      "loss": 3.1299,
      "step": 67274
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2115559577941895,
      "learning_rate": 0.000482387780479305,
      "loss": 3.1423,
      "step": 67275
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.669790744781494,
      "learning_rate": 0.0004823845326860001,
      "loss": 2.9102,
      "step": 67276
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6095292568206787,
      "learning_rate": 0.00048238128485878656,
      "loss": 2.7884,
      "step": 67277
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.1061036586761475,
      "learning_rate": 0.00048237803699766486,
      "loss": 3.0896,
      "step": 67278
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7696495056152344,
      "learning_rate": 0.0004823747891026356,
      "loss": 2.968,
      "step": 67279
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2578871250152588,
      "learning_rate": 0.00048237154117369954,
      "loss": 2.8631,
      "step": 67280
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6804612874984741,
      "learning_rate": 0.00048236829321085696,
      "loss": 3.1052,
      "step": 67281
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7989470958709717,
      "learning_rate": 0.00048236504521410883,
      "loss": 3.2318,
      "step": 67282
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3859953880310059,
      "learning_rate": 0.0004823617971834556,
      "loss": 2.8418,
      "step": 67283
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5151774883270264,
      "learning_rate": 0.0004823585491188978,
      "loss": 3.0819,
      "step": 67284
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.800736904144287,
      "learning_rate": 0.00048235530102043613,
      "loss": 2.8933,
      "step": 67285
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.653998613357544,
      "learning_rate": 0.0004823520528880712,
      "loss": 3.1254,
      "step": 67286
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6156368255615234,
      "learning_rate": 0.0004823488047218035,
      "loss": 2.7273,
      "step": 67287
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3974803686141968,
      "learning_rate": 0.00048234555652163375,
      "loss": 3.3037,
      "step": 67288
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4986505508422852,
      "learning_rate": 0.00048234230828756255,
      "loss": 2.8827,
      "step": 67289
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9940119981765747,
      "learning_rate": 0.00048233906001959046,
      "loss": 2.932,
      "step": 67290
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.723518967628479,
      "learning_rate": 0.0004823358117177181,
      "loss": 2.9774,
      "step": 67291
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.721431851387024,
      "learning_rate": 0.00048233256338194594,
      "loss": 2.839,
      "step": 67292
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.044296979904175,
      "learning_rate": 0.00048232931501227493,
      "loss": 3.0033,
      "step": 67293
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5914409160614014,
      "learning_rate": 0.0004823260666087054,
      "loss": 2.9003,
      "step": 67294
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8087835311889648,
      "learning_rate": 0.0004823228181712379,
      "loss": 2.8228,
      "step": 67295
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9199333190917969,
      "learning_rate": 0.00048231956969987326,
      "loss": 3.3203,
      "step": 67296
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9839547872543335,
      "learning_rate": 0.0004823163211946119,
      "loss": 3.0106,
      "step": 67297
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5053220987319946,
      "learning_rate": 0.0004823130726554545,
      "loss": 3.2188,
      "step": 67298
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.8426668643951416,
      "learning_rate": 0.0004823098240824017,
      "loss": 2.8948,
      "step": 67299
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7775769233703613,
      "learning_rate": 0.00048230657547545413,
      "loss": 3.0525,
      "step": 67300
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4087436199188232,
      "learning_rate": 0.00048230332683461217,
      "loss": 2.8474,
      "step": 67301
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4993362426757812,
      "learning_rate": 0.0004823000781598767,
      "loss": 3.1461,
      "step": 67302
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2317802906036377,
      "learning_rate": 0.00048229682945124814,
      "loss": 3.0997,
      "step": 67303
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4718836545944214,
      "learning_rate": 0.0004822935807087272,
      "loss": 3.1774,
      "step": 67304
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.739845633506775,
      "learning_rate": 0.0004822903319323144,
      "loss": 3.0895,
      "step": 67305
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3975284099578857,
      "learning_rate": 0.0004822870831220104,
      "loss": 3.1646,
      "step": 67306
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2575459480285645,
      "learning_rate": 0.0004822838342778158,
      "loss": 3.24,
      "step": 67307
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5841021537780762,
      "learning_rate": 0.0004822805853997312,
      "loss": 3.1929,
      "step": 67308
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4582960605621338,
      "learning_rate": 0.0004822773364877572,
      "loss": 2.8173,
      "step": 67309
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.132007122039795,
      "learning_rate": 0.00048227408754189434,
      "loss": 2.9961,
      "step": 67310
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.104417085647583,
      "learning_rate": 0.00048227083856214336,
      "loss": 3.1892,
      "step": 67311
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.87779700756073,
      "learning_rate": 0.0004822675895485047,
      "loss": 2.9846,
      "step": 67312
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9226311445236206,
      "learning_rate": 0.0004822643405009792,
      "loss": 3.1199,
      "step": 67313
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.890374779701233,
      "learning_rate": 0.0004822610914195673,
      "loss": 3.104,
      "step": 67314
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6456382274627686,
      "learning_rate": 0.0004822578423042695,
      "loss": 3.0895,
      "step": 67315
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7960944175720215,
      "learning_rate": 0.0004822545931550866,
      "loss": 3.0977,
      "step": 67316
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4055495262145996,
      "learning_rate": 0.00048225134397201905,
      "loss": 3.1733,
      "step": 67317
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6032944917678833,
      "learning_rate": 0.00048224809475506773,
      "loss": 2.7506,
      "step": 67318
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8815114498138428,
      "learning_rate": 0.00048224484550423287,
      "loss": 3.1739,
      "step": 67319
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5849448442459106,
      "learning_rate": 0.0004822415962195154,
      "loss": 2.9248,
      "step": 67320
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.821295142173767,
      "learning_rate": 0.00048223834690091556,
      "loss": 3.1303,
      "step": 67321
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7480007410049438,
      "learning_rate": 0.0004822350975484343,
      "loss": 2.8734,
      "step": 67322
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9785332679748535,
      "learning_rate": 0.0004822318481620721,
      "loss": 2.9393,
      "step": 67323
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6827507019042969,
      "learning_rate": 0.00048222859874182957,
      "loss": 2.9322,
      "step": 67324
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2793992757797241,
      "learning_rate": 0.0004822253492877073,
      "loss": 3.3558,
      "step": 67325
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8967185020446777,
      "learning_rate": 0.00048222209979970586,
      "loss": 3.0367,
      "step": 67326
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8310555219650269,
      "learning_rate": 0.00048221885027782593,
      "loss": 3.0716,
      "step": 67327
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4620333909988403,
      "learning_rate": 0.00048221560072206804,
      "loss": 3.1384,
      "step": 67328
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6728547811508179,
      "learning_rate": 0.0004822123511324329,
      "loss": 2.7883,
      "step": 67329
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5234991312026978,
      "learning_rate": 0.00048220910150892094,
      "loss": 3.0498,
      "step": 67330
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7339892387390137,
      "learning_rate": 0.000482205851851533,
      "loss": 2.8203,
      "step": 67331
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.725602149963379,
      "learning_rate": 0.00048220260216026944,
      "loss": 3.1201,
      "step": 67332
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5897327661514282,
      "learning_rate": 0.000482199352435131,
      "loss": 2.7456,
      "step": 67333
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4097390174865723,
      "learning_rate": 0.00048219610267611826,
      "loss": 2.9613,
      "step": 67334
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.3183677196502686,
      "learning_rate": 0.00048219285288323186,
      "loss": 2.8932,
      "step": 67335
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0803802013397217,
      "learning_rate": 0.0004821896030564724,
      "loss": 2.9041,
      "step": 67336
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2335197925567627,
      "learning_rate": 0.00048218635319584037,
      "loss": 2.8318,
      "step": 67337
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.176316499710083,
      "learning_rate": 0.00048218310330133647,
      "loss": 2.8648,
      "step": 67338
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6356430053710938,
      "learning_rate": 0.0004821798533729614,
      "loss": 2.9405,
      "step": 67339
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8652055263519287,
      "learning_rate": 0.0004821766034107155,
      "loss": 2.8314,
      "step": 67340
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4753490686416626,
      "learning_rate": 0.00048217335341459963,
      "loss": 3.059,
      "step": 67341
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4199020862579346,
      "learning_rate": 0.00048217010338461426,
      "loss": 2.9945,
      "step": 67342
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9071611166000366,
      "learning_rate": 0.00048216685332076,
      "loss": 3.0101,
      "step": 67343
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5469629764556885,
      "learning_rate": 0.00048216360322303757,
      "loss": 2.9821,
      "step": 67344
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7547236680984497,
      "learning_rate": 0.00048216035309144744,
      "loss": 2.9799,
      "step": 67345
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.8552496433258057,
      "learning_rate": 0.00048215710292599025,
      "loss": 2.9212,
      "step": 67346
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8957127332687378,
      "learning_rate": 0.0004821538527266666,
      "loss": 3.0642,
      "step": 67347
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9672820568084717,
      "learning_rate": 0.00048215060249347713,
      "loss": 3.1469,
      "step": 67348
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.220010995864868,
      "learning_rate": 0.00048214735222642234,
      "loss": 2.7377,
      "step": 67349
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.412192463874817,
      "learning_rate": 0.00048214410192550314,
      "loss": 3.0226,
      "step": 67350
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7668589353561401,
      "learning_rate": 0.0004821408515907197,
      "loss": 2.999,
      "step": 67351
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.83526611328125,
      "learning_rate": 0.00048213760122207294,
      "loss": 3.0108,
      "step": 67352
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6733405590057373,
      "learning_rate": 0.0004821343508195634,
      "loss": 3.094,
      "step": 67353
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7056366205215454,
      "learning_rate": 0.00048213110038319157,
      "loss": 2.9168,
      "step": 67354
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7738697528839111,
      "learning_rate": 0.0004821278499129581,
      "loss": 3.0007,
      "step": 67355
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.532727837562561,
      "learning_rate": 0.00048212459940886364,
      "loss": 3.0828,
      "step": 67356
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5823769569396973,
      "learning_rate": 0.00048212134887090885,
      "loss": 2.9362,
      "step": 67357
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5837856531143188,
      "learning_rate": 0.0004821180982990942,
      "loss": 2.9885,
      "step": 67358
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3534674644470215,
      "learning_rate": 0.00048211484769342046,
      "loss": 2.9486,
      "step": 67359
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6129528284072876,
      "learning_rate": 0.000482111597053888,
      "loss": 2.9549,
      "step": 67360
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8575901985168457,
      "learning_rate": 0.00048210834638049755,
      "loss": 3.0235,
      "step": 67361
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0929083824157715,
      "learning_rate": 0.0004821050956732498,
      "loss": 2.9184,
      "step": 67362
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.191028356552124,
      "learning_rate": 0.0004821018449321453,
      "loss": 3.0307,
      "step": 67363
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.549054741859436,
      "learning_rate": 0.0004820985941571846,
      "loss": 3.1527,
      "step": 67364
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.73102068901062,
      "learning_rate": 0.0004820953433483683,
      "loss": 2.9643,
      "step": 67365
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.2178916931152344,
      "learning_rate": 0.00048209209250569705,
      "loss": 3.0534,
      "step": 67366
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.762331247329712,
      "learning_rate": 0.0004820888416291715,
      "loss": 3.0818,
      "step": 67367
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8159027099609375,
      "learning_rate": 0.00048208559071879207,
      "loss": 3.239,
      "step": 67368
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.83081316947937,
      "learning_rate": 0.0004820823397745596,
      "loss": 2.9547,
      "step": 67369
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7682067155838013,
      "learning_rate": 0.00048207908879647457,
      "loss": 3.0563,
      "step": 67370
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.640792727470398,
      "learning_rate": 0.0004820758377845376,
      "loss": 2.7044,
      "step": 67371
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4707624912261963,
      "learning_rate": 0.0004820725867387493,
      "loss": 2.9101,
      "step": 67372
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9809738397598267,
      "learning_rate": 0.0004820693356591102,
      "loss": 3.1849,
      "step": 67373
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7987147569656372,
      "learning_rate": 0.000482066084545621,
      "loss": 3.0166,
      "step": 67374
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8879477977752686,
      "learning_rate": 0.00048206283339828236,
      "loss": 3.12,
      "step": 67375
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7307559251785278,
      "learning_rate": 0.0004820595822170947,
      "loss": 3.1174,
      "step": 67376
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.551024317741394,
      "learning_rate": 0.00048205633100205885,
      "loss": 3.0922,
      "step": 67377
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.5358073711395264,
      "learning_rate": 0.0004820530797531752,
      "loss": 3.2421,
      "step": 67378
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6064200401306152,
      "learning_rate": 0.00048204982847044445,
      "loss": 3.0153,
      "step": 67379
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.760787010192871,
      "learning_rate": 0.00048204657715386715,
      "loss": 3.1434,
      "step": 67380
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.602334976196289,
      "learning_rate": 0.00048204332580344404,
      "loss": 2.9715,
      "step": 67381
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5818198919296265,
      "learning_rate": 0.00048204007441917567,
      "loss": 2.8527,
      "step": 67382
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.539017677307129,
      "learning_rate": 0.0004820368230010625,
      "loss": 2.8905,
      "step": 67383
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.869645118713379,
      "learning_rate": 0.00048203357154910534,
      "loss": 2.9457,
      "step": 67384
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7466562986373901,
      "learning_rate": 0.0004820303200633046,
      "loss": 3.115,
      "step": 67385
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.769426941871643,
      "learning_rate": 0.0004820270685436611,
      "loss": 3.0413,
      "step": 67386
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5289658308029175,
      "learning_rate": 0.0004820238169901753,
      "loss": 3.0374,
      "step": 67387
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7917002439498901,
      "learning_rate": 0.00048202056540284786,
      "loss": 3.113,
      "step": 67388
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7815793752670288,
      "learning_rate": 0.0004820173137816793,
      "loss": 2.9784,
      "step": 67389
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7199935913085938,
      "learning_rate": 0.00048201406212667035,
      "loss": 3.0448,
      "step": 67390
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8594635725021362,
      "learning_rate": 0.0004820108104378215,
      "loss": 2.9041,
      "step": 67391
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4798994064331055,
      "learning_rate": 0.00048200755871513336,
      "loss": 3.0151,
      "step": 67392
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2590432167053223,
      "learning_rate": 0.00048200430695860666,
      "loss": 2.8904,
      "step": 67393
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.245176315307617,
      "learning_rate": 0.0004820010551682419,
      "loss": 2.9291,
      "step": 67394
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8010542392730713,
      "learning_rate": 0.0004819978033440397,
      "loss": 2.9155,
      "step": 67395
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4903008937835693,
      "learning_rate": 0.0004819945514860007,
      "loss": 3.1248,
      "step": 67396
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.505778193473816,
      "learning_rate": 0.0004819912995941254,
      "loss": 3.1266,
      "step": 67397
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5224404335021973,
      "learning_rate": 0.0004819880476684145,
      "loss": 3.097,
      "step": 67398
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.845481038093567,
      "learning_rate": 0.0004819847957088687,
      "loss": 3.1073,
      "step": 67399
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6715635061264038,
      "learning_rate": 0.0004819815437154884,
      "loss": 2.9811,
      "step": 67400
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.758581519126892,
      "learning_rate": 0.0004819782916882744,
      "loss": 2.8786,
      "step": 67401
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7547177076339722,
      "learning_rate": 0.0004819750396272271,
      "loss": 3.0595,
      "step": 67402
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.817601442337036,
      "learning_rate": 0.00048197178753234715,
      "loss": 2.9646,
      "step": 67403
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1845896244049072,
      "learning_rate": 0.00048196853540363526,
      "loss": 3.0009,
      "step": 67404
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.631956934928894,
      "learning_rate": 0.00048196528324109204,
      "loss": 3.2396,
      "step": 67405
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.0910489559173584,
      "learning_rate": 0.00048196203104471807,
      "loss": 2.9593,
      "step": 67406
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.256420135498047,
      "learning_rate": 0.00048195877881451374,
      "loss": 2.9179,
      "step": 67407
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.9179248809814453,
      "learning_rate": 0.00048195552655048003,
      "loss": 3.1222,
      "step": 67408
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6563353538513184,
      "learning_rate": 0.00048195227425261726,
      "loss": 3.0445,
      "step": 67409
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.5485739707946777,
      "learning_rate": 0.0004819490219209261,
      "loss": 3.0425,
      "step": 67410
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3210983276367188,
      "learning_rate": 0.00048194576955540725,
      "loss": 3.4939,
      "step": 67411
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1319658756256104,
      "learning_rate": 0.0004819425171560612,
      "loss": 3.1456,
      "step": 67412
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3693410158157349,
      "learning_rate": 0.00048193926472288865,
      "loss": 2.9401,
      "step": 67413
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.824450731277466,
      "learning_rate": 0.0004819360122558902,
      "loss": 3.1025,
      "step": 67414
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1860768795013428,
      "learning_rate": 0.00048193275975506633,
      "loss": 2.9186,
      "step": 67415
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5672847032546997,
      "learning_rate": 0.00048192950722041766,
      "loss": 3.0916,
      "step": 67416
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.93772292137146,
      "learning_rate": 0.000481926254651945,
      "loss": 3.0769,
      "step": 67417
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8975650072097778,
      "learning_rate": 0.0004819230020496488,
      "loss": 2.9761,
      "step": 67418
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8028663396835327,
      "learning_rate": 0.00048191974941352953,
      "loss": 3.0728,
      "step": 67419
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8865978717803955,
      "learning_rate": 0.00048191649674358814,
      "loss": 3.0596,
      "step": 67420
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.389954090118408,
      "learning_rate": 0.00048191324403982485,
      "loss": 3.2196,
      "step": 67421
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0404279232025146,
      "learning_rate": 0.0004819099913022406,
      "loss": 2.8439,
      "step": 67422
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3436336517333984,
      "learning_rate": 0.00048190673853083576,
      "loss": 3.0599,
      "step": 67423
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4739019870758057,
      "learning_rate": 0.00048190348572561115,
      "loss": 2.9535,
      "step": 67424
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.883375644683838,
      "learning_rate": 0.00048190023288656713,
      "loss": 3.2427,
      "step": 67425
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.499558448791504,
      "learning_rate": 0.00048189698001370446,
      "loss": 2.8864,
      "step": 67426
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7741267681121826,
      "learning_rate": 0.0004818937271070236,
      "loss": 3.0729,
      "step": 67427
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.227073907852173,
      "learning_rate": 0.0004818904741665255,
      "loss": 3.1765,
      "step": 67428
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.058978796005249,
      "learning_rate": 0.0004818872211922103,
      "loss": 3.1331,
      "step": 67429
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.321856737136841,
      "learning_rate": 0.000481883968184079,
      "loss": 2.9844,
      "step": 67430
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6598306894302368,
      "learning_rate": 0.00048188071514213194,
      "loss": 3.0282,
      "step": 67431
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.330711603164673,
      "learning_rate": 0.00048187746206636983,
      "loss": 2.7406,
      "step": 67432
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6177397966384888,
      "learning_rate": 0.00048187420895679325,
      "loss": 2.9621,
      "step": 67433
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5207831859588623,
      "learning_rate": 0.0004818709558134029,
      "loss": 3.054,
      "step": 67434
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0437843799591064,
      "learning_rate": 0.0004818677026361992,
      "loss": 3.0168,
      "step": 67435
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8243393898010254,
      "learning_rate": 0.0004818644494251829,
      "loss": 2.9726,
      "step": 67436
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6760274171829224,
      "learning_rate": 0.0004818611961803546,
      "loss": 3.0535,
      "step": 67437
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.831428050994873,
      "learning_rate": 0.00048185794290171485,
      "loss": 2.8925,
      "step": 67438
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2671620845794678,
      "learning_rate": 0.00048185468958926436,
      "loss": 2.9695,
      "step": 67439
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6442475318908691,
      "learning_rate": 0.00048185143624300357,
      "loss": 3.0155,
      "step": 67440
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5840665102005005,
      "learning_rate": 0.0004818481828629332,
      "loss": 2.9206,
      "step": 67441
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7848178148269653,
      "learning_rate": 0.00048184492944905374,
      "loss": 2.9152,
      "step": 67442
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.05309796333313,
      "learning_rate": 0.0004818416760013659,
      "loss": 3.1628,
      "step": 67443
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.870043158531189,
      "learning_rate": 0.0004818384225198703,
      "loss": 3.1851,
      "step": 67444
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.8024089336395264,
      "learning_rate": 0.0004818351690045675,
      "loss": 2.9978,
      "step": 67445
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.145212173461914,
      "learning_rate": 0.0004818319154554581,
      "loss": 2.835,
      "step": 67446
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8150426149368286,
      "learning_rate": 0.0004818286618725427,
      "loss": 3.1961,
      "step": 67447
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8947077989578247,
      "learning_rate": 0.0004818254082558219,
      "loss": 3.0134,
      "step": 67448
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4178152084350586,
      "learning_rate": 0.00048182215460529636,
      "loss": 2.9183,
      "step": 67449
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4521516561508179,
      "learning_rate": 0.00048181890092096664,
      "loss": 2.9493,
      "step": 67450
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9756052494049072,
      "learning_rate": 0.0004818156472028334,
      "loss": 3.2012,
      "step": 67451
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8047140836715698,
      "learning_rate": 0.0004818123934508971,
      "loss": 3.0085,
      "step": 67452
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4303473234176636,
      "learning_rate": 0.0004818091396651585,
      "loss": 2.9481,
      "step": 67453
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.379439353942871,
      "learning_rate": 0.0004818058858456182,
      "loss": 2.9199,
      "step": 67454
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.498948335647583,
      "learning_rate": 0.0004818026319922767,
      "loss": 3.0231,
      "step": 67455
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6296204328536987,
      "learning_rate": 0.00048179937810513464,
      "loss": 2.8739,
      "step": 67456
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.791185736656189,
      "learning_rate": 0.0004817961241841927,
      "loss": 2.9706,
      "step": 67457
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.428956389427185,
      "learning_rate": 0.0004817928702294514,
      "loss": 2.8729,
      "step": 67458
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6359472274780273,
      "learning_rate": 0.00048178961624091137,
      "loss": 3.0562,
      "step": 67459
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8021456003189087,
      "learning_rate": 0.0004817863622185732,
      "loss": 3.1677,
      "step": 67460
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7241986989974976,
      "learning_rate": 0.00048178310816243756,
      "loss": 2.9734,
      "step": 67461
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6835600137710571,
      "learning_rate": 0.000481779854072505,
      "loss": 3.0409,
      "step": 67462
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5219106674194336,
      "learning_rate": 0.00048177659994877615,
      "loss": 3.1131,
      "step": 67463
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4750653505325317,
      "learning_rate": 0.0004817733457912515,
      "loss": 2.9726,
      "step": 67464
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5517481565475464,
      "learning_rate": 0.00048177009159993176,
      "loss": 3.276,
      "step": 67465
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8799974918365479,
      "learning_rate": 0.0004817668373748177,
      "loss": 3.1768,
      "step": 67466
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7741808891296387,
      "learning_rate": 0.00048176358311590963,
      "loss": 3.0775,
      "step": 67467
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.287139892578125,
      "learning_rate": 0.00048176032882320816,
      "loss": 3.1903,
      "step": 67468
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.90147864818573,
      "learning_rate": 0.00048175707449671433,
      "loss": 3.1787,
      "step": 67469
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8205076456069946,
      "learning_rate": 0.00048175382013642814,
      "loss": 3.2139,
      "step": 67470
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5621311664581299,
      "learning_rate": 0.00048175056574235056,
      "loss": 2.937,
      "step": 67471
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5962796211242676,
      "learning_rate": 0.00048174731131448215,
      "loss": 2.9135,
      "step": 67472
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5663026571273804,
      "learning_rate": 0.0004817440568528235,
      "loss": 2.7834,
      "step": 67473
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.612117052078247,
      "learning_rate": 0.0004817408023573751,
      "loss": 3.0039,
      "step": 67474
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.651984691619873,
      "learning_rate": 0.0004817375478281378,
      "loss": 2.9386,
      "step": 67475
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4468916654586792,
      "learning_rate": 0.000481734293265112,
      "loss": 3.2837,
      "step": 67476
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7061777114868164,
      "learning_rate": 0.0004817310386682984,
      "loss": 3.4011,
      "step": 67477
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7474262714385986,
      "learning_rate": 0.00048172778403769755,
      "loss": 3.2756,
      "step": 67478
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.786320447921753,
      "learning_rate": 0.00048172452937330994,
      "loss": 2.9339,
      "step": 67479
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.602777361869812,
      "learning_rate": 0.00048172127467513643,
      "loss": 3.3677,
      "step": 67480
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2915492057800293,
      "learning_rate": 0.00048171801994317755,
      "loss": 3.0639,
      "step": 67481
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.678076148033142,
      "learning_rate": 0.00048171476517743384,
      "loss": 3.0931,
      "step": 67482
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5029966831207275,
      "learning_rate": 0.0004817115103779058,
      "loss": 3.1804,
      "step": 67483
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7429935932159424,
      "learning_rate": 0.0004817082555445943,
      "loss": 2.6905,
      "step": 67484
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0496490001678467,
      "learning_rate": 0.00048170500067749975,
      "loss": 3.0168,
      "step": 67485
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7916356325149536,
      "learning_rate": 0.0004817017457766228,
      "loss": 2.8044,
      "step": 67486
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8347879648208618,
      "learning_rate": 0.0004816984908419641,
      "loss": 2.8633,
      "step": 67487
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6405149698257446,
      "learning_rate": 0.0004816952358735242,
      "loss": 3.0623,
      "step": 67488
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5846997499465942,
      "learning_rate": 0.00048169198087130374,
      "loss": 3.2231,
      "step": 67489
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4305037260055542,
      "learning_rate": 0.0004816887258353033,
      "loss": 3.2956,
      "step": 67490
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6965371370315552,
      "learning_rate": 0.00048168547076552344,
      "loss": 2.9514,
      "step": 67491
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5031651258468628,
      "learning_rate": 0.0004816822156619649,
      "loss": 3.0848,
      "step": 67492
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6701995134353638,
      "learning_rate": 0.00048167896052462813,
      "loss": 2.6931,
      "step": 67493
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7556476593017578,
      "learning_rate": 0.00048167570535351397,
      "loss": 3.1567,
      "step": 67494
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6318657398223877,
      "learning_rate": 0.00048167245014862274,
      "loss": 3.1229,
      "step": 67495
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6000581979751587,
      "learning_rate": 0.00048166919490995515,
      "loss": 3.1582,
      "step": 67496
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5265352725982666,
      "learning_rate": 0.0004816659396375119,
      "loss": 2.8876,
      "step": 67497
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4368337392807007,
      "learning_rate": 0.00048166268433129345,
      "loss": 2.8138,
      "step": 67498
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8325769901275635,
      "learning_rate": 0.00048165942899130055,
      "loss": 2.971,
      "step": 67499
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8059260845184326,
      "learning_rate": 0.0004816561736175337,
      "loss": 3.11,
      "step": 67500
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8184263706207275,
      "learning_rate": 0.0004816529182099935,
      "loss": 3.1331,
      "step": 67501
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8551205396652222,
      "learning_rate": 0.0004816496627686807,
      "loss": 3.0226,
      "step": 67502
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.562656044960022,
      "learning_rate": 0.0004816464072935958,
      "loss": 2.9736,
      "step": 67503
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9187031984329224,
      "learning_rate": 0.00048164315178473926,
      "loss": 2.8871,
      "step": 67504
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.524051308631897,
      "learning_rate": 0.0004816398962421119,
      "loss": 3.1142,
      "step": 67505
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.057605028152466,
      "learning_rate": 0.00048163664066571435,
      "loss": 2.8444,
      "step": 67506
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4152939319610596,
      "learning_rate": 0.00048163338505554694,
      "loss": 2.9335,
      "step": 67507
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5543673038482666,
      "learning_rate": 0.0004816301294116106,
      "loss": 2.8927,
      "step": 67508
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5573863983154297,
      "learning_rate": 0.00048162687373390573,
      "loss": 2.8231,
      "step": 67509
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7091646194458008,
      "learning_rate": 0.00048162361802243307,
      "loss": 3.1161,
      "step": 67510
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.584919810295105,
      "learning_rate": 0.000481620362277193,
      "loss": 2.8906,
      "step": 67511
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6267781257629395,
      "learning_rate": 0.00048161710649818646,
      "loss": 3.2376,
      "step": 67512
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9803831577301025,
      "learning_rate": 0.00048161385068541375,
      "loss": 2.8456,
      "step": 67513
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4226727485656738,
      "learning_rate": 0.0004816105948388756,
      "loss": 2.9803,
      "step": 67514
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4373435974121094,
      "learning_rate": 0.0004816073389585727,
      "loss": 3.1636,
      "step": 67515
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6857296228408813,
      "learning_rate": 0.00048160408304450536,
      "loss": 2.9428,
      "step": 67516
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4594449996948242,
      "learning_rate": 0.00048160082709667457,
      "loss": 3.1174,
      "step": 67517
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6366080045700073,
      "learning_rate": 0.00048159757111508074,
      "loss": 3.1724,
      "step": 67518
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4737614393234253,
      "learning_rate": 0.0004815943150997245,
      "loss": 2.9477,
      "step": 67519
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7715790271759033,
      "learning_rate": 0.00048159105905060644,
      "loss": 3.2032,
      "step": 67520
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8677479028701782,
      "learning_rate": 0.0004815878029677271,
      "loss": 3.0449,
      "step": 67521
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9741424322128296,
      "learning_rate": 0.00048158454685108727,
      "loss": 3.212,
      "step": 67522
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6939237117767334,
      "learning_rate": 0.00048158129070068734,
      "loss": 2.7767,
      "step": 67523
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.6709020137786865,
      "learning_rate": 0.0004815780345165281,
      "loss": 3.1787,
      "step": 67524
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7027771472930908,
      "learning_rate": 0.00048157477829861,
      "loss": 3.0395,
      "step": 67525
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6704553365707397,
      "learning_rate": 0.0004815715220469338,
      "loss": 2.9775,
      "step": 67526
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0566837787628174,
      "learning_rate": 0.00048156826576150003,
      "loss": 3.1029,
      "step": 67527
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.516102910041809,
      "learning_rate": 0.0004815650094423092,
      "loss": 2.8158,
      "step": 67528
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0823028087615967,
      "learning_rate": 0.00048156175308936206,
      "loss": 2.8703,
      "step": 67529
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.793128490447998,
      "learning_rate": 0.0004815584967026592,
      "loss": 2.9887,
      "step": 67530
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3511931896209717,
      "learning_rate": 0.0004815552402822012,
      "loss": 3.1029,
      "step": 67531
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.966287851333618,
      "learning_rate": 0.00048155198382798857,
      "loss": 2.9679,
      "step": 67532
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9544124603271484,
      "learning_rate": 0.0004815487273400221,
      "loss": 3.2016,
      "step": 67533
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0078749656677246,
      "learning_rate": 0.0004815454708183022,
      "loss": 3.0732,
      "step": 67534
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5614725351333618,
      "learning_rate": 0.0004815422142628296,
      "loss": 2.901,
      "step": 67535
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.565133571624756,
      "learning_rate": 0.0004815389576736049,
      "loss": 2.6398,
      "step": 67536
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4456110000610352,
      "learning_rate": 0.0004815357010506287,
      "loss": 3.0041,
      "step": 67537
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8326517343521118,
      "learning_rate": 0.00048153244439390146,
      "loss": 2.8254,
      "step": 67538
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6854699850082397,
      "learning_rate": 0.000481529187703424,
      "loss": 3.086,
      "step": 67539
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9991096258163452,
      "learning_rate": 0.0004815259309791969,
      "loss": 3.1184,
      "step": 67540
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8998024463653564,
      "learning_rate": 0.00048152267422122054,
      "loss": 2.9656,
      "step": 67541
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.052278757095337,
      "learning_rate": 0.00048151941742949583,
      "loss": 3.2581,
      "step": 67542
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.609631061553955,
      "learning_rate": 0.0004815161606040232,
      "loss": 2.8974,
      "step": 67543
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.677595853805542,
      "learning_rate": 0.0004815129037448033,
      "loss": 3.0424,
      "step": 67544
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5976619720458984,
      "learning_rate": 0.00048150964685183677,
      "loss": 3.0868,
      "step": 67545
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4367141723632812,
      "learning_rate": 0.00048150638992512407,
      "loss": 3.1632,
      "step": 67546
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6297118663787842,
      "learning_rate": 0.0004815031329646659,
      "loss": 2.9545,
      "step": 67547
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2818405628204346,
      "learning_rate": 0.000481499875970463,
      "loss": 3.3007,
      "step": 67548
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.912153959274292,
      "learning_rate": 0.0004814966189425157,
      "loss": 3.1269,
      "step": 67549
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6691879034042358,
      "learning_rate": 0.00048149336188082483,
      "loss": 2.9857,
      "step": 67550
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.235285520553589,
      "learning_rate": 0.0004814901047853909,
      "loss": 3.1959,
      "step": 67551
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.330620288848877,
      "learning_rate": 0.0004814868476562145,
      "loss": 3.0373,
      "step": 67552
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9693081378936768,
      "learning_rate": 0.00048148359049329635,
      "loss": 3.2669,
      "step": 67553
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5399354696273804,
      "learning_rate": 0.00048148033329663693,
      "loss": 3.0783,
      "step": 67554
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7458488941192627,
      "learning_rate": 0.00048147707606623687,
      "loss": 3.0539,
      "step": 67555
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.842026710510254,
      "learning_rate": 0.0004814738188020968,
      "loss": 2.7251,
      "step": 67556
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6470407247543335,
      "learning_rate": 0.0004814705615042174,
      "loss": 2.9315,
      "step": 67557
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.134345769882202,
      "learning_rate": 0.0004814673041725991,
      "loss": 2.856,
      "step": 67558
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4777178764343262,
      "learning_rate": 0.0004814640468072426,
      "loss": 3.0361,
      "step": 67559
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.3483567237854004,
      "learning_rate": 0.00048146078940814856,
      "loss": 3.0614,
      "step": 67560
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2499752044677734,
      "learning_rate": 0.0004814575319753176,
      "loss": 3.1013,
      "step": 67561
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.783978819847107,
      "learning_rate": 0.0004814542745087501,
      "loss": 3.0425,
      "step": 67562
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4875274896621704,
      "learning_rate": 0.00048145101700844693,
      "loss": 3.1725,
      "step": 67563
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.058745861053467,
      "learning_rate": 0.0004814477594744086,
      "loss": 2.8046,
      "step": 67564
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.4914119243621826,
      "learning_rate": 0.00048144450190663564,
      "loss": 3.1642,
      "step": 67565
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6298131942749023,
      "learning_rate": 0.00048144124430512874,
      "loss": 2.8445,
      "step": 67566
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0474274158477783,
      "learning_rate": 0.0004814379866698885,
      "loss": 2.786,
      "step": 67567
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7704195976257324,
      "learning_rate": 0.00048143472900091555,
      "loss": 3.3429,
      "step": 67568
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.163846731185913,
      "learning_rate": 0.00048143147129821036,
      "loss": 3.0161,
      "step": 67569
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4136090278625488,
      "learning_rate": 0.00048142821356177373,
      "loss": 3.0803,
      "step": 67570
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2807724475860596,
      "learning_rate": 0.0004814249557916061,
      "loss": 3.3348,
      "step": 67571
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3666915893554688,
      "learning_rate": 0.00048142169798770826,
      "loss": 2.9306,
      "step": 67572
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5235134363174438,
      "learning_rate": 0.0004814184401500806,
      "loss": 2.8145,
      "step": 67573
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6960084438323975,
      "learning_rate": 0.00048141518227872386,
      "loss": 2.9553,
      "step": 67574
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.6333959102630615,
      "learning_rate": 0.0004814119243736386,
      "loss": 3.1137,
      "step": 67575
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.302062749862671,
      "learning_rate": 0.00048140866643482547,
      "loss": 2.8233,
      "step": 67576
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.472082495689392,
      "learning_rate": 0.000481405408462285,
      "loss": 3.1187,
      "step": 67577
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6052641868591309,
      "learning_rate": 0.00048140215045601784,
      "loss": 3.117,
      "step": 67578
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.954998254776001,
      "learning_rate": 0.0004813988924160246,
      "loss": 2.9354,
      "step": 67579
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0324454307556152,
      "learning_rate": 0.00048139563434230597,
      "loss": 2.9453,
      "step": 67580
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6551786661148071,
      "learning_rate": 0.00048139237623486236,
      "loss": 3.1008,
      "step": 67581
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.75960111618042,
      "learning_rate": 0.0004813891180936946,
      "loss": 3.1744,
      "step": 67582
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.059443950653076,
      "learning_rate": 0.000481385859918803,
      "loss": 2.9848,
      "step": 67583
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4964743852615356,
      "learning_rate": 0.00048138260171018844,
      "loss": 2.9082,
      "step": 67584
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3964039087295532,
      "learning_rate": 0.0004813793434678515,
      "loss": 2.8958,
      "step": 67585
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.561342477798462,
      "learning_rate": 0.00048137608519179263,
      "loss": 3.0811,
      "step": 67586
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5881109237670898,
      "learning_rate": 0.0004813728268820125,
      "loss": 3.0334,
      "step": 67587
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.437546730041504,
      "learning_rate": 0.00048136956853851183,
      "loss": 3.0891,
      "step": 67588
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5400583744049072,
      "learning_rate": 0.00048136631016129107,
      "loss": 3.0207,
      "step": 67589
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3431476354599,
      "learning_rate": 0.00048136305175035086,
      "loss": 3.0208,
      "step": 67590
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6452218294143677,
      "learning_rate": 0.0004813597933056919,
      "loss": 3.0589,
      "step": 67591
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5590753555297852,
      "learning_rate": 0.0004813565348273147,
      "loss": 3.1087,
      "step": 67592
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5339641571044922,
      "learning_rate": 0.0004813532763152199,
      "loss": 2.9111,
      "step": 67593
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4570032358169556,
      "learning_rate": 0.0004813500177694081,
      "loss": 2.822,
      "step": 67594
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.806969165802002,
      "learning_rate": 0.0004813467591898799,
      "loss": 2.8729,
      "step": 67595
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1416893005371094,
      "learning_rate": 0.000481343500576636,
      "loss": 2.9458,
      "step": 67596
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6907376050949097,
      "learning_rate": 0.00048134024192967684,
      "loss": 2.9138,
      "step": 67597
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.808262586593628,
      "learning_rate": 0.0004813369832490031,
      "loss": 3.0716,
      "step": 67598
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6817091703414917,
      "learning_rate": 0.00048133372453461543,
      "loss": 3.0877,
      "step": 67599
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8617874383926392,
      "learning_rate": 0.0004813304657865144,
      "loss": 2.7979,
      "step": 67600
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.7009940147399902,
      "learning_rate": 0.0004813272070047006,
      "loss": 2.7137,
      "step": 67601
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7897422313690186,
      "learning_rate": 0.00048132394818917456,
      "loss": 3.1559,
      "step": 67602
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4407293796539307,
      "learning_rate": 0.0004813206893399371,
      "loss": 3.0572,
      "step": 67603
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7390207052230835,
      "learning_rate": 0.00048131743045698867,
      "loss": 3.0135,
      "step": 67604
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6945101022720337,
      "learning_rate": 0.0004813141715403299,
      "loss": 3.0848,
      "step": 67605
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2649295330047607,
      "learning_rate": 0.0004813109125899614,
      "loss": 2.8806,
      "step": 67606
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.00727915763855,
      "learning_rate": 0.0004813076536058837,
      "loss": 3.2026,
      "step": 67607
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8335150480270386,
      "learning_rate": 0.0004813043945880976,
      "loss": 2.9798,
      "step": 67608
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.193429946899414,
      "learning_rate": 0.00048130113553660354,
      "loss": 2.9564,
      "step": 67609
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.770572304725647,
      "learning_rate": 0.00048129787645140223,
      "loss": 2.9086,
      "step": 67610
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0618433952331543,
      "learning_rate": 0.0004812946173324941,
      "loss": 2.8078,
      "step": 67611
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5291862487792969,
      "learning_rate": 0.00048129135817988,
      "loss": 2.9071,
      "step": 67612
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.757948637008667,
      "learning_rate": 0.0004812880989935604,
      "loss": 3.2284,
      "step": 67613
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8857033252716064,
      "learning_rate": 0.00048128483977353593,
      "loss": 2.8881,
      "step": 67614
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2046401500701904,
      "learning_rate": 0.00048128158051980715,
      "loss": 3.0038,
      "step": 67615
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5447726249694824,
      "learning_rate": 0.00048127832123237465,
      "loss": 3.0881,
      "step": 67616
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.6029045581817627,
      "learning_rate": 0.00048127506191123913,
      "loss": 3.0261,
      "step": 67617
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.346586227416992,
      "learning_rate": 0.0004812718025564012,
      "loss": 2.9976,
      "step": 67618
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8235359191894531,
      "learning_rate": 0.0004812685431678614,
      "loss": 3.2297,
      "step": 67619
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4434192180633545,
      "learning_rate": 0.0004812652837456203,
      "loss": 3.1886,
      "step": 67620
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4320106506347656,
      "learning_rate": 0.0004812620242896786,
      "loss": 2.8126,
      "step": 67621
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6912733316421509,
      "learning_rate": 0.00048125876480003686,
      "loss": 3.2085,
      "step": 67622
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5011630058288574,
      "learning_rate": 0.0004812555052766957,
      "loss": 2.9954,
      "step": 67623
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5887922048568726,
      "learning_rate": 0.00048125224571965577,
      "loss": 3.0557,
      "step": 67624
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2041943073272705,
      "learning_rate": 0.0004812489861289175,
      "loss": 3.3013,
      "step": 67625
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9217960834503174,
      "learning_rate": 0.00048124572650448173,
      "loss": 2.9802,
      "step": 67626
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.555182933807373,
      "learning_rate": 0.0004812424668463489,
      "loss": 3.0349,
      "step": 67627
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5531089305877686,
      "learning_rate": 0.0004812392071545198,
      "loss": 2.992,
      "step": 67628
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4950717687606812,
      "learning_rate": 0.00048123594742899474,
      "loss": 2.992,
      "step": 67629
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.50117564201355,
      "learning_rate": 0.00048123268766977453,
      "loss": 2.9761,
      "step": 67630
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1202778816223145,
      "learning_rate": 0.0004812294278768599,
      "loss": 3.1241,
      "step": 67631
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6924595832824707,
      "learning_rate": 0.0004812261680502511,
      "loss": 3.0972,
      "step": 67632
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3788424730300903,
      "learning_rate": 0.000481222908189949,
      "loss": 3.1308,
      "step": 67633
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2579336166381836,
      "learning_rate": 0.0004812196482959542,
      "loss": 3.0607,
      "step": 67634
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5933270454406738,
      "learning_rate": 0.00048121638836826726,
      "loss": 3.0714,
      "step": 67635
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.453607439994812,
      "learning_rate": 0.00048121312840688867,
      "loss": 3.1265,
      "step": 67636
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2239906787872314,
      "learning_rate": 0.00048120986841181915,
      "loss": 3.1,
      "step": 67637
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.801177740097046,
      "learning_rate": 0.00048120660838305934,
      "loss": 3.0887,
      "step": 67638
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3752124309539795,
      "learning_rate": 0.0004812033483206098,
      "loss": 2.8286,
      "step": 67639
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8773013353347778,
      "learning_rate": 0.00048120008822447117,
      "loss": 3.1252,
      "step": 67640
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6281239986419678,
      "learning_rate": 0.00048119682809464394,
      "loss": 2.9988,
      "step": 67641
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3288376331329346,
      "learning_rate": 0.00048119356793112887,
      "loss": 2.8739,
      "step": 67642
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0425782203674316,
      "learning_rate": 0.00048119030773392644,
      "loss": 2.8584,
      "step": 67643
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8755582571029663,
      "learning_rate": 0.0004811870475030373,
      "loss": 3.0356,
      "step": 67644
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5474709272384644,
      "learning_rate": 0.0004811837872384622,
      "loss": 2.9861,
      "step": 67645
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1333303451538086,
      "learning_rate": 0.0004811805269402015,
      "loss": 3.2281,
      "step": 67646
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8398432731628418,
      "learning_rate": 0.00048117726660825593,
      "loss": 3.0864,
      "step": 67647
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.106151819229126,
      "learning_rate": 0.0004811740062426261,
      "loss": 2.9438,
      "step": 67648
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9815093278884888,
      "learning_rate": 0.0004811707458433126,
      "loss": 2.8616,
      "step": 67649
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3298887014389038,
      "learning_rate": 0.00048116748541031596,
      "loss": 3.0115,
      "step": 67650
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.2762796878814697,
      "learning_rate": 0.0004811642249436369,
      "loss": 3.4236,
      "step": 67651
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5305500030517578,
      "learning_rate": 0.0004811609644432761,
      "loss": 3.1107,
      "step": 67652
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6776419878005981,
      "learning_rate": 0.00048115770390923403,
      "loss": 3.2126,
      "step": 67653
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.6142055988311768,
      "learning_rate": 0.00048115444334151126,
      "loss": 3.0526,
      "step": 67654
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7973721027374268,
      "learning_rate": 0.00048115118274010845,
      "loss": 3.0436,
      "step": 67655
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3560116291046143,
      "learning_rate": 0.00048114792210502625,
      "loss": 3.0944,
      "step": 67656
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0003292560577393,
      "learning_rate": 0.00048114466143626527,
      "loss": 2.9902,
      "step": 67657
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.4144415855407715,
      "learning_rate": 0.000481141400733826,
      "loss": 3.2013,
      "step": 67658
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6244593858718872,
      "learning_rate": 0.00048113813999770916,
      "loss": 3.1577,
      "step": 67659
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1195671558380127,
      "learning_rate": 0.0004811348792279154,
      "loss": 2.8981,
      "step": 67660
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6178920269012451,
      "learning_rate": 0.0004811316184244451,
      "loss": 3.0573,
      "step": 67661
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9212595224380493,
      "learning_rate": 0.00048112835758729906,
      "loss": 3.1314,
      "step": 67662
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4632543325424194,
      "learning_rate": 0.00048112509671647787,
      "loss": 2.9595,
      "step": 67663
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8205012083053589,
      "learning_rate": 0.00048112183581198217,
      "loss": 3.0671,
      "step": 67664
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.695082664489746,
      "learning_rate": 0.0004811185748738124,
      "loss": 2.8696,
      "step": 67665
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4516899585723877,
      "learning_rate": 0.00048111531390196925,
      "loss": 2.7475,
      "step": 67666
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.0813441276550293,
      "learning_rate": 0.00048111205289645345,
      "loss": 3.0384,
      "step": 67667
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.2105910778045654,
      "learning_rate": 0.0004811087918572654,
      "loss": 3.1296,
      "step": 67668
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4571163654327393,
      "learning_rate": 0.0004811055307844058,
      "loss": 3.107,
      "step": 67669
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6935819387435913,
      "learning_rate": 0.00048110226967787537,
      "loss": 2.8504,
      "step": 67670
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5375901460647583,
      "learning_rate": 0.00048109900853767455,
      "loss": 2.9372,
      "step": 67671
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4957165718078613,
      "learning_rate": 0.000481095747363804,
      "loss": 3.1802,
      "step": 67672
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8345712423324585,
      "learning_rate": 0.0004810924861562644,
      "loss": 3.0837,
      "step": 67673
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4416046142578125,
      "learning_rate": 0.00048108922491505617,
      "loss": 2.966,
      "step": 67674
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4703961610794067,
      "learning_rate": 0.0004810859636401801,
      "loss": 3.057,
      "step": 67675
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2979049682617188,
      "learning_rate": 0.0004810827023316367,
      "loss": 3.3386,
      "step": 67676
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4773993492126465,
      "learning_rate": 0.00048107944098942665,
      "loss": 3.1699,
      "step": 67677
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3390262126922607,
      "learning_rate": 0.0004810761796135505,
      "loss": 3.1781,
      "step": 67678
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5370513200759888,
      "learning_rate": 0.0004810729182040089,
      "loss": 3.0253,
      "step": 67679
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8780416250228882,
      "learning_rate": 0.00048106965676080236,
      "loss": 2.9068,
      "step": 67680
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4832683801651,
      "learning_rate": 0.0004810663952839315,
      "loss": 2.9662,
      "step": 67681
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4189505577087402,
      "learning_rate": 0.0004810631337733972,
      "loss": 2.7444,
      "step": 67682
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4450182914733887,
      "learning_rate": 0.00048105987222919966,
      "loss": 3.1958,
      "step": 67683
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6301884651184082,
      "learning_rate": 0.00048105661065133966,
      "loss": 3.2275,
      "step": 67684
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7754262685775757,
      "learning_rate": 0.00048105334903981794,
      "loss": 2.796,
      "step": 67685
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5094871520996094,
      "learning_rate": 0.00048105008739463494,
      "loss": 3.0923,
      "step": 67686
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4170852899551392,
      "learning_rate": 0.00048104682571579126,
      "loss": 2.7031,
      "step": 67687
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.88908851146698,
      "learning_rate": 0.00048104356400328755,
      "loss": 2.7039,
      "step": 67688
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.748699903488159,
      "learning_rate": 0.0004810403022571245,
      "loss": 3.1028,
      "step": 67689
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5221850872039795,
      "learning_rate": 0.0004810370404773026,
      "loss": 2.8859,
      "step": 67690
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.741767168045044,
      "learning_rate": 0.0004810337786638225,
      "loss": 2.9054,
      "step": 67691
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2127768993377686,
      "learning_rate": 0.00048103051681668485,
      "loss": 3.0244,
      "step": 67692
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8334167003631592,
      "learning_rate": 0.00048102725493589013,
      "loss": 3.1783,
      "step": 67693
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6306946277618408,
      "learning_rate": 0.000481023993021439,
      "loss": 3.1717,
      "step": 67694
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1097512245178223,
      "learning_rate": 0.0004810207310733322,
      "loss": 2.6425,
      "step": 67695
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.097485303878784,
      "learning_rate": 0.00048101746909157025,
      "loss": 3.0032,
      "step": 67696
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8388361930847168,
      "learning_rate": 0.0004810142070761536,
      "loss": 3.1248,
      "step": 67697
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3139195442199707,
      "learning_rate": 0.00048101094502708304,
      "loss": 3.0443,
      "step": 67698
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8775124549865723,
      "learning_rate": 0.0004810076829443592,
      "loss": 2.8354,
      "step": 67699
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.72779381275177,
      "learning_rate": 0.0004810044208279825,
      "loss": 2.8089,
      "step": 67700
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8868738412857056,
      "learning_rate": 0.00048100115867795375,
      "loss": 2.8966,
      "step": 67701
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6427865028381348,
      "learning_rate": 0.00048099789649427345,
      "loss": 2.9302,
      "step": 67702
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.193631649017334,
      "learning_rate": 0.00048099463427694227,
      "loss": 3.1681,
      "step": 67703
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5844889879226685,
      "learning_rate": 0.00048099137202596066,
      "loss": 3.0648,
      "step": 67704
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7535264492034912,
      "learning_rate": 0.0004809881097413294,
      "loss": 3.181,
      "step": 67705
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9654754400253296,
      "learning_rate": 0.00048098484742304905,
      "loss": 2.8198,
      "step": 67706
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8661668300628662,
      "learning_rate": 0.00048098158507112025,
      "loss": 2.9886,
      "step": 67707
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4730719327926636,
      "learning_rate": 0.0004809783226855434,
      "loss": 2.9589,
      "step": 67708
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3844280242919922,
      "learning_rate": 0.00048097506026631936,
      "loss": 3.2217,
      "step": 67709
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2015116214752197,
      "learning_rate": 0.0004809717978134487,
      "loss": 2.9209,
      "step": 67710
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0138297080993652,
      "learning_rate": 0.00048096853532693184,
      "loss": 3.1022,
      "step": 67711
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0857763290405273,
      "learning_rate": 0.00048096527280676955,
      "loss": 2.8407,
      "step": 67712
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9983056783676147,
      "learning_rate": 0.0004809620102529625,
      "loss": 2.9258,
      "step": 67713
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5889968872070312,
      "learning_rate": 0.0004809587476655111,
      "loss": 2.9342,
      "step": 67714
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6266307830810547,
      "learning_rate": 0.000480955485044416,
      "loss": 3.0601,
      "step": 67715
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6969690322875977,
      "learning_rate": 0.00048095222238967795,
      "loss": 3.1396,
      "step": 67716
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5196062326431274,
      "learning_rate": 0.0004809489597012974,
      "loss": 2.945,
      "step": 67717
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3569893836975098,
      "learning_rate": 0.00048094569697927517,
      "loss": 2.8695,
      "step": 67718
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4994151592254639,
      "learning_rate": 0.00048094243422361165,
      "loss": 3.4253,
      "step": 67719
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0612118244171143,
      "learning_rate": 0.00048093917143430745,
      "loss": 3.1324,
      "step": 67720
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.588387370109558,
      "learning_rate": 0.0004809359086113632,
      "loss": 3.0381,
      "step": 67721
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4695477485656738,
      "learning_rate": 0.0004809326457547797,
      "loss": 3.007,
      "step": 67722
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6416596174240112,
      "learning_rate": 0.00048092938286455733,
      "loss": 3.0007,
      "step": 67723
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.735985279083252,
      "learning_rate": 0.00048092611994069676,
      "loss": 3.2195,
      "step": 67724
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9350594282150269,
      "learning_rate": 0.00048092285698319865,
      "loss": 3.2208,
      "step": 67725
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.63626229763031,
      "learning_rate": 0.00048091959399206355,
      "loss": 2.9438,
      "step": 67726
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.7569119930267334,
      "learning_rate": 0.00048091633096729205,
      "loss": 2.8156,
      "step": 67727
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.396735668182373,
      "learning_rate": 0.0004809130679088848,
      "loss": 3.0252,
      "step": 67728
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.6866652965545654,
      "learning_rate": 0.00048090980481684247,
      "loss": 3.0928,
      "step": 67729
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1839022636413574,
      "learning_rate": 0.00048090654169116547,
      "loss": 2.9292,
      "step": 67730
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.061328411102295,
      "learning_rate": 0.0004809032785318546,
      "loss": 3.2588,
      "step": 67731
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.1695690155029297,
      "learning_rate": 0.00048090001533891035,
      "loss": 3.0448,
      "step": 67732
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3860392570495605,
      "learning_rate": 0.00048089675211233343,
      "loss": 3.0884,
      "step": 67733
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8250439167022705,
      "learning_rate": 0.00048089348885212434,
      "loss": 3.1461,
      "step": 67734
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.174370288848877,
      "learning_rate": 0.0004808902255582838,
      "loss": 3.256,
      "step": 67735
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3738317489624023,
      "learning_rate": 0.0004808869622308123,
      "loss": 2.9138,
      "step": 67736
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6437040567398071,
      "learning_rate": 0.00048088369886971057,
      "loss": 2.9229,
      "step": 67737
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3677443265914917,
      "learning_rate": 0.00048088043547497904,
      "loss": 2.8333,
      "step": 67738
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.724561095237732,
      "learning_rate": 0.00048087717204661843,
      "loss": 3.1005,
      "step": 67739
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.871713876724243,
      "learning_rate": 0.0004808739085846294,
      "loss": 3.1618,
      "step": 67740
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.845692753791809,
      "learning_rate": 0.0004808706450890125,
      "loss": 2.7373,
      "step": 67741
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6673252582550049,
      "learning_rate": 0.00048086738155976827,
      "loss": 2.9906,
      "step": 67742
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.482786178588867,
      "learning_rate": 0.0004808641179968974,
      "loss": 3.0028,
      "step": 67743
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.59881591796875,
      "learning_rate": 0.00048086085440040053,
      "loss": 2.8531,
      "step": 67744
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4336941242218018,
      "learning_rate": 0.0004808575907702782,
      "loss": 3.0789,
      "step": 67745
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.280831813812256,
      "learning_rate": 0.00048085432710653106,
      "loss": 3.0079,
      "step": 67746
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8864398002624512,
      "learning_rate": 0.00048085106340915956,
      "loss": 3.0116,
      "step": 67747
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.460714340209961,
      "learning_rate": 0.00048084779967816447,
      "loss": 3.1115,
      "step": 67748
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8109327554702759,
      "learning_rate": 0.00048084453591354647,
      "loss": 2.9894,
      "step": 67749
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.7605092525482178,
      "learning_rate": 0.0004808412721153059,
      "loss": 2.7874,
      "step": 67750
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.361081838607788,
      "learning_rate": 0.0004808380082834437,
      "loss": 3.0462,
      "step": 67751
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4774737358093262,
      "learning_rate": 0.0004808347444179602,
      "loss": 3.1286,
      "step": 67752
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6022573709487915,
      "learning_rate": 0.00048083148051885613,
      "loss": 2.9137,
      "step": 67753
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1907358169555664,
      "learning_rate": 0.00048082821658613204,
      "loss": 3.0469,
      "step": 67754
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8766236305236816,
      "learning_rate": 0.0004808249526197886,
      "loss": 2.8469,
      "step": 67755
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6355692148208618,
      "learning_rate": 0.0004808216886198264,
      "loss": 3.0324,
      "step": 67756
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7106539011001587,
      "learning_rate": 0.000480818424586246,
      "loss": 3.0011,
      "step": 67757
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.915712594985962,
      "learning_rate": 0.00048081516051904803,
      "loss": 3.0763,
      "step": 67758
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.236797571182251,
      "learning_rate": 0.0004808118964182332,
      "loss": 2.9899,
      "step": 67759
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3627731800079346,
      "learning_rate": 0.0004808086322838019,
      "loss": 2.9058,
      "step": 67760
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7301549911499023,
      "learning_rate": 0.0004808053681157549,
      "loss": 3.035,
      "step": 67761
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.7283997535705566,
      "learning_rate": 0.00048080210391409283,
      "loss": 2.8695,
      "step": 67762
    },
    {
      "epoch": 0.88,
      "grad_norm": 5.83772087097168,
      "learning_rate": 0.0004807988396788162,
      "loss": 3.1421,
      "step": 67763
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6912132501602173,
      "learning_rate": 0.00048079557540992566,
      "loss": 3.1087,
      "step": 67764
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6820405721664429,
      "learning_rate": 0.0004807923111074218,
      "loss": 2.981,
      "step": 67765
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4897663593292236,
      "learning_rate": 0.00048078904677130517,
      "loss": 3.0036,
      "step": 67766
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7208998203277588,
      "learning_rate": 0.00048078578240157654,
      "loss": 2.8609,
      "step": 67767
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9012531042099,
      "learning_rate": 0.0004807825179982365,
      "loss": 3.0492,
      "step": 67768
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.798521637916565,
      "learning_rate": 0.0004807792535612854,
      "loss": 2.9807,
      "step": 67769
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9520111083984375,
      "learning_rate": 0.0004807759890907241,
      "loss": 3.2423,
      "step": 67770
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.79207444190979,
      "learning_rate": 0.00048077272458655316,
      "loss": 3.1809,
      "step": 67771
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6257929801940918,
      "learning_rate": 0.0004807694600487731,
      "loss": 3.0514,
      "step": 67772
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6100929975509644,
      "learning_rate": 0.00048076619547738464,
      "loss": 2.8674,
      "step": 67773
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.517608880996704,
      "learning_rate": 0.00048076293087238826,
      "loss": 2.7629,
      "step": 67774
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.882953405380249,
      "learning_rate": 0.0004807596662337847,
      "loss": 2.885,
      "step": 67775
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6591130495071411,
      "learning_rate": 0.00048075640156157445,
      "loss": 3.0499,
      "step": 67776
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.521721601486206,
      "learning_rate": 0.0004807531368557582,
      "loss": 3.1017,
      "step": 67777
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6062068939208984,
      "learning_rate": 0.0004807498721163366,
      "loss": 2.8055,
      "step": 67778
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5492916107177734,
      "learning_rate": 0.00048074660734331013,
      "loss": 2.9292,
      "step": 67779
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.368404746055603,
      "learning_rate": 0.00048074334253667947,
      "loss": 2.9961,
      "step": 67780
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7290565967559814,
      "learning_rate": 0.0004807400776964452,
      "loss": 3.0225,
      "step": 67781
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5012376308441162,
      "learning_rate": 0.0004807368128226079,
      "loss": 3.0333,
      "step": 67782
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1726555824279785,
      "learning_rate": 0.00048073354791516816,
      "loss": 2.9392,
      "step": 67783
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.614405870437622,
      "learning_rate": 0.0004807302829741268,
      "loss": 2.9862,
      "step": 67784
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7245227098464966,
      "learning_rate": 0.00048072701799948414,
      "loss": 3.0729,
      "step": 67785
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8516695499420166,
      "learning_rate": 0.00048072375299124104,
      "loss": 3.1688,
      "step": 67786
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6281304359436035,
      "learning_rate": 0.00048072048794939787,
      "loss": 2.9703,
      "step": 67787
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5110868215560913,
      "learning_rate": 0.0004807172228739554,
      "loss": 2.8873,
      "step": 67788
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2804771661758423,
      "learning_rate": 0.00048071395776491416,
      "loss": 3.3173,
      "step": 67789
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6052814722061157,
      "learning_rate": 0.0004807106926222748,
      "loss": 3.152,
      "step": 67790
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8088338375091553,
      "learning_rate": 0.0004807074274460379,
      "loss": 2.9095,
      "step": 67791
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9552921056747437,
      "learning_rate": 0.00048070416223620416,
      "loss": 3.1223,
      "step": 67792
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6457414627075195,
      "learning_rate": 0.000480700896992774,
      "loss": 3.1692,
      "step": 67793
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5406134128570557,
      "learning_rate": 0.0004806976317157481,
      "loss": 2.9659,
      "step": 67794
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9332172870635986,
      "learning_rate": 0.00048069436640512726,
      "loss": 3.2245,
      "step": 67795
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6314829587936401,
      "learning_rate": 0.00048069110106091175,
      "loss": 3.0658,
      "step": 67796
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4887603521347046,
      "learning_rate": 0.00048068783568310246,
      "loss": 3.3101,
      "step": 67797
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6061248779296875,
      "learning_rate": 0.0004806845702716999,
      "loss": 2.8949,
      "step": 67798
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.038076400756836,
      "learning_rate": 0.00048068130482670457,
      "loss": 3.0926,
      "step": 67799
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7100874185562134,
      "learning_rate": 0.00048067803934811733,
      "loss": 3.1865,
      "step": 67800
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.14285945892334,
      "learning_rate": 0.0004806747738359385,
      "loss": 3.2309,
      "step": 67801
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.392803430557251,
      "learning_rate": 0.00048067150829016886,
      "loss": 3.0737,
      "step": 67802
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.391894817352295,
      "learning_rate": 0.00048066824271080904,
      "loss": 3.0973,
      "step": 67803
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3841618299484253,
      "learning_rate": 0.0004806649770978595,
      "loss": 3.001,
      "step": 67804
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.591775894165039,
      "learning_rate": 0.000480661711451321,
      "loss": 3.0617,
      "step": 67805
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.536404848098755,
      "learning_rate": 0.000480658445771194,
      "loss": 3.2293,
      "step": 67806
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.3470656871795654,
      "learning_rate": 0.0004806551800574792,
      "loss": 3.3117,
      "step": 67807
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.048816680908203,
      "learning_rate": 0.00048065191431017723,
      "loss": 3.2642,
      "step": 67808
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.6544742584228516,
      "learning_rate": 0.0004806486485292886,
      "loss": 2.9762,
      "step": 67809
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.250243902206421,
      "learning_rate": 0.000480645382714814,
      "loss": 2.9206,
      "step": 67810
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5566887855529785,
      "learning_rate": 0.00048064211686675405,
      "loss": 3.0056,
      "step": 67811
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.197726011276245,
      "learning_rate": 0.0004806388509851093,
      "loss": 3.0471,
      "step": 67812
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.198594331741333,
      "learning_rate": 0.00048063558506988044,
      "loss": 2.9968,
      "step": 67813
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.137037515640259,
      "learning_rate": 0.0004806323191210679,
      "loss": 2.9332,
      "step": 67814
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7408045530319214,
      "learning_rate": 0.00048062905313867244,
      "loss": 3.0367,
      "step": 67815
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.6726150512695312,
      "learning_rate": 0.0004806257871226947,
      "loss": 2.7635,
      "step": 67816
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.294161081314087,
      "learning_rate": 0.0004806225210731351,
      "loss": 3.0714,
      "step": 67817
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.384812831878662,
      "learning_rate": 0.00048061925498999434,
      "loss": 3.098,
      "step": 67818
    },
    {
      "epoch": 0.88,
      "grad_norm": 3.2446494102478027,
      "learning_rate": 0.0004806159888732733,
      "loss": 2.8631,
      "step": 67819
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.882383108139038,
      "learning_rate": 0.0004806127227229721,
      "loss": 3.1443,
      "step": 67820
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4377084970474243,
      "learning_rate": 0.00048060945653909164,
      "loss": 3.1708,
      "step": 67821
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.483330488204956,
      "learning_rate": 0.00048060619032163254,
      "loss": 3.1699,
      "step": 67822
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3884434700012207,
      "learning_rate": 0.0004806029240705952,
      "loss": 3.2102,
      "step": 67823
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7686413526535034,
      "learning_rate": 0.00048059965778598053,
      "loss": 3.0008,
      "step": 67824
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7138748168945312,
      "learning_rate": 0.00048059639146778887,
      "loss": 3.3617,
      "step": 67825
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6725062131881714,
      "learning_rate": 0.00048059312511602103,
      "loss": 3.008,
      "step": 67826
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4580327272415161,
      "learning_rate": 0.00048058985873067735,
      "loss": 3.0512,
      "step": 67827
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7564833164215088,
      "learning_rate": 0.0004805865923117587,
      "loss": 3.048,
      "step": 67828
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4075134992599487,
      "learning_rate": 0.00048058332585926565,
      "loss": 2.8739,
      "step": 67829
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5578186511993408,
      "learning_rate": 0.00048058005937319867,
      "loss": 3.0289,
      "step": 67830
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8929011821746826,
      "learning_rate": 0.00048057679285355844,
      "loss": 2.8176,
      "step": 67831
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3292545080184937,
      "learning_rate": 0.00048057352630034573,
      "loss": 3.1787,
      "step": 67832
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.673507571220398,
      "learning_rate": 0.00048057025971356087,
      "loss": 3.0068,
      "step": 67833
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4363152980804443,
      "learning_rate": 0.00048056699309320455,
      "loss": 3.1611,
      "step": 67834
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6788917779922485,
      "learning_rate": 0.0004805637264392775,
      "loss": 2.7009,
      "step": 67835
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7606412172317505,
      "learning_rate": 0.00048056045975178015,
      "loss": 3.0167,
      "step": 67836
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6992299556732178,
      "learning_rate": 0.0004805571930307133,
      "loss": 2.6142,
      "step": 67837
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0686557292938232,
      "learning_rate": 0.0004805539262760774,
      "loss": 3.1948,
      "step": 67838
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.549755573272705,
      "learning_rate": 0.0004805506594878731,
      "loss": 3.2097,
      "step": 67839
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.7838473320007324,
      "learning_rate": 0.0004805473926661011,
      "loss": 3.0176,
      "step": 67840
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6617615222930908,
      "learning_rate": 0.0004805441258107619,
      "loss": 3.1675,
      "step": 67841
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.843510627746582,
      "learning_rate": 0.0004805408589218561,
      "loss": 2.9925,
      "step": 67842
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8102473020553589,
      "learning_rate": 0.0004805375919993844,
      "loss": 3.2359,
      "step": 67843
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4901530742645264,
      "learning_rate": 0.0004805343250433474,
      "loss": 3.0918,
      "step": 67844
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7025507688522339,
      "learning_rate": 0.00048053105805374554,
      "loss": 3.2764,
      "step": 67845
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.5256404876708984,
      "learning_rate": 0.0004805277910305796,
      "loss": 2.9814,
      "step": 67846
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.204207420349121,
      "learning_rate": 0.00048052452397385017,
      "loss": 3.1697,
      "step": 67847
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7535271644592285,
      "learning_rate": 0.0004805212568835578,
      "loss": 2.8421,
      "step": 67848
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3338794708251953,
      "learning_rate": 0.0004805179897597031,
      "loss": 3.0503,
      "step": 67849
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.918440341949463,
      "learning_rate": 0.0004805147226022867,
      "loss": 3.2883,
      "step": 67850
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7232332229614258,
      "learning_rate": 0.0004805114554113092,
      "loss": 3.1913,
      "step": 67851
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1088783740997314,
      "learning_rate": 0.0004805081881867712,
      "loss": 3.213,
      "step": 67852
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7445803880691528,
      "learning_rate": 0.0004805049209286734,
      "loss": 3.0782,
      "step": 67853
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9914743900299072,
      "learning_rate": 0.0004805016536370163,
      "loss": 2.846,
      "step": 67854
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9904032945632935,
      "learning_rate": 0.00048049838631180047,
      "loss": 3.2506,
      "step": 67855
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9978406429290771,
      "learning_rate": 0.0004804951189530267,
      "loss": 3.0692,
      "step": 67856
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.776182174682617,
      "learning_rate": 0.00048049185156069543,
      "loss": 2.8583,
      "step": 67857
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.372746467590332,
      "learning_rate": 0.00048048858413480725,
      "loss": 3.324,
      "step": 67858
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7934468984603882,
      "learning_rate": 0.00048048531667536297,
      "loss": 3.1896,
      "step": 67859
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.6552932262420654,
      "learning_rate": 0.00048048204918236295,
      "loss": 2.9922,
      "step": 67860
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3614497184753418,
      "learning_rate": 0.00048047878165580785,
      "loss": 3.2078,
      "step": 67861
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6988762617111206,
      "learning_rate": 0.0004804755140956984,
      "loss": 3.1531,
      "step": 67862
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2936556339263916,
      "learning_rate": 0.0004804722465020352,
      "loss": 3.1593,
      "step": 67863
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.943679690361023,
      "learning_rate": 0.0004804689788748188,
      "loss": 3.0261,
      "step": 67864
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5113890171051025,
      "learning_rate": 0.00048046571121404975,
      "loss": 2.9286,
      "step": 67865
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6761746406555176,
      "learning_rate": 0.0004804624435197288,
      "loss": 2.7293,
      "step": 67866
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4563566446304321,
      "learning_rate": 0.0004804591757918564,
      "loss": 3.0092,
      "step": 67867
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.791312336921692,
      "learning_rate": 0.00048045590803043325,
      "loss": 2.9023,
      "step": 67868
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7719508409500122,
      "learning_rate": 0.00048045264023546,
      "loss": 3.2707,
      "step": 67869
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4687539339065552,
      "learning_rate": 0.0004804493724069371,
      "loss": 2.9103,
      "step": 67870
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4245452880859375,
      "learning_rate": 0.00048044610454486527,
      "loss": 2.8278,
      "step": 67871
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5354790687561035,
      "learning_rate": 0.00048044283664924514,
      "loss": 2.8369,
      "step": 67872
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.881861925125122,
      "learning_rate": 0.0004804395687200773,
      "loss": 3.0631,
      "step": 67873
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.7386088371276855,
      "learning_rate": 0.00048043630075736226,
      "loss": 3.0475,
      "step": 67874
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9571342468261719,
      "learning_rate": 0.0004804330327611008,
      "loss": 3.0088,
      "step": 67875
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.318443775177002,
      "learning_rate": 0.00048042976473129336,
      "loss": 3.1705,
      "step": 67876
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.849280834197998,
      "learning_rate": 0.00048042649666794066,
      "loss": 3.0092,
      "step": 67877
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7077500820159912,
      "learning_rate": 0.00048042322857104323,
      "loss": 2.9199,
      "step": 67878
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.605560302734375,
      "learning_rate": 0.0004804199604406018,
      "loss": 2.9644,
      "step": 67879
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2801389694213867,
      "learning_rate": 0.0004804166922766169,
      "loss": 3.1268,
      "step": 67880
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9390188455581665,
      "learning_rate": 0.000480413424079089,
      "loss": 3.0542,
      "step": 67881
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1751646995544434,
      "learning_rate": 0.0004804101558480189,
      "loss": 3.0482,
      "step": 67882
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.005431652069092,
      "learning_rate": 0.00048040688758340717,
      "loss": 3.1112,
      "step": 67883
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.888036847114563,
      "learning_rate": 0.0004804036192852544,
      "loss": 2.8275,
      "step": 67884
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6671202182769775,
      "learning_rate": 0.0004804003509535611,
      "loss": 3.366,
      "step": 67885
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.8527991771698,
      "learning_rate": 0.0004803970825883281,
      "loss": 3.0116,
      "step": 67886
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5163527727127075,
      "learning_rate": 0.0004803938141895558,
      "loss": 3.1516,
      "step": 67887
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.16079044342041,
      "learning_rate": 0.00048039054575724487,
      "loss": 3.1703,
      "step": 67888
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4690067768096924,
      "learning_rate": 0.00048038727729139604,
      "loss": 2.9975,
      "step": 67889
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8489116430282593,
      "learning_rate": 0.00048038400879200975,
      "loss": 2.9601,
      "step": 67890
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6484675407409668,
      "learning_rate": 0.00048038074025908665,
      "loss": 2.9618,
      "step": 67891
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7391849756240845,
      "learning_rate": 0.0004803774716926273,
      "loss": 3.0989,
      "step": 67892
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.6889991760253906,
      "learning_rate": 0.0004803742030926325,
      "loss": 3.0604,
      "step": 67893
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5126228332519531,
      "learning_rate": 0.00048037093445910264,
      "loss": 2.9616,
      "step": 67894
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6399667263031006,
      "learning_rate": 0.0004803676657920384,
      "loss": 3.0422,
      "step": 67895
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9244242906570435,
      "learning_rate": 0.0004803643970914405,
      "loss": 3.2537,
      "step": 67896
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.2126755714416504,
      "learning_rate": 0.0004803611283573094,
      "loss": 3.2231,
      "step": 67897
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.732985258102417,
      "learning_rate": 0.00048035785958964577,
      "loss": 3.0212,
      "step": 67898
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.48858642578125,
      "learning_rate": 0.0004803545907884503,
      "loss": 3.205,
      "step": 67899
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8523666858673096,
      "learning_rate": 0.00048035132195372335,
      "loss": 3.1004,
      "step": 67900
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.582990050315857,
      "learning_rate": 0.0004803480530854658,
      "loss": 2.9498,
      "step": 67901
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5132991075515747,
      "learning_rate": 0.0004803447841836781,
      "loss": 2.8908,
      "step": 67902
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1014745235443115,
      "learning_rate": 0.00048034151524836086,
      "loss": 2.8698,
      "step": 67903
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.497376799583435,
      "learning_rate": 0.0004803382462795147,
      "loss": 3.1292,
      "step": 67904
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5779616832733154,
      "learning_rate": 0.00048033497727714046,
      "loss": 3.3102,
      "step": 67905
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6508710384368896,
      "learning_rate": 0.00048033170824123835,
      "loss": 3.1493,
      "step": 67906
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.738857388496399,
      "learning_rate": 0.0004803284391718092,
      "loss": 3.0778,
      "step": 67907
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8127437829971313,
      "learning_rate": 0.0004803251700688537,
      "loss": 3.0918,
      "step": 67908
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5800857543945312,
      "learning_rate": 0.0004803219009323722,
      "loss": 3.022,
      "step": 67909
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.4340384006500244,
      "learning_rate": 0.0004803186317623655,
      "loss": 2.9104,
      "step": 67910
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7605031728744507,
      "learning_rate": 0.00048031536255883423,
      "loss": 3.1336,
      "step": 67911
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8816046714782715,
      "learning_rate": 0.00048031209332177884,
      "loss": 2.9391,
      "step": 67912
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.722575306892395,
      "learning_rate": 0.0004803088240512001,
      "loss": 3.1454,
      "step": 67913
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0384936332702637,
      "learning_rate": 0.0004803055547470985,
      "loss": 2.8803,
      "step": 67914
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.647031545639038,
      "learning_rate": 0.0004803022854094747,
      "loss": 2.885,
      "step": 67915
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.8606057167053223,
      "learning_rate": 0.00048029901603832925,
      "loss": 2.8946,
      "step": 67916
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1131527423858643,
      "learning_rate": 0.00048029574663366293,
      "loss": 3.2194,
      "step": 67917
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.441949725151062,
      "learning_rate": 0.00048029247719547617,
      "loss": 3.1891,
      "step": 67918
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.3672021627426147,
      "learning_rate": 0.00048028920772376967,
      "loss": 2.9272,
      "step": 67919
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.882377028465271,
      "learning_rate": 0.0004802859382185439,
      "loss": 2.6985,
      "step": 67920
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9774072170257568,
      "learning_rate": 0.00048028266867979964,
      "loss": 2.7129,
      "step": 67921
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8275429010391235,
      "learning_rate": 0.00048027939910753745,
      "loss": 2.8104,
      "step": 67922
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.585832357406616,
      "learning_rate": 0.0004802761295017579,
      "loss": 2.9951,
      "step": 67923
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8478479385375977,
      "learning_rate": 0.0004802728598624615,
      "loss": 2.9874,
      "step": 67924
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.8382893800735474,
      "learning_rate": 0.0004802695901896492,
      "loss": 3.0817,
      "step": 67925
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4182446002960205,
      "learning_rate": 0.0004802663204833212,
      "loss": 2.9944,
      "step": 67926
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4339481592178345,
      "learning_rate": 0.00048026305074347835,
      "loss": 2.919,
      "step": 67927
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.651395559310913,
      "learning_rate": 0.00048025978097012117,
      "loss": 3.2373,
      "step": 67928
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6595357656478882,
      "learning_rate": 0.0004802565111632504,
      "loss": 3.0592,
      "step": 67929
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6968034505844116,
      "learning_rate": 0.0004802532413228664,
      "loss": 2.8513,
      "step": 67930
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6389261484146118,
      "learning_rate": 0.00048024997144896996,
      "loss": 3.0639,
      "step": 67931
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6671671867370605,
      "learning_rate": 0.0004802467015415617,
      "loss": 2.9229,
      "step": 67932
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6573480367660522,
      "learning_rate": 0.00048024343160064215,
      "loss": 2.9784,
      "step": 67933
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.806252121925354,
      "learning_rate": 0.0004802401616262119,
      "loss": 2.8448,
      "step": 67934
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.0417091846466064,
      "learning_rate": 0.00048023689161827166,
      "loss": 2.8826,
      "step": 67935
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4911631345748901,
      "learning_rate": 0.0004802336215768219,
      "loss": 3.2593,
      "step": 67936
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5392334461212158,
      "learning_rate": 0.00048023035150186337,
      "loss": 2.891,
      "step": 67937
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6436792612075806,
      "learning_rate": 0.0004802270813933967,
      "loss": 3.107,
      "step": 67938
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.576557993888855,
      "learning_rate": 0.00048022381125142227,
      "loss": 2.9154,
      "step": 67939
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.02034854888916,
      "learning_rate": 0.00048022054107594095,
      "loss": 2.9172,
      "step": 67940
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9714205265045166,
      "learning_rate": 0.00048021727086695323,
      "loss": 2.8222,
      "step": 67941
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6523133516311646,
      "learning_rate": 0.0004802140006244596,
      "loss": 3.0426,
      "step": 67942
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9226295948028564,
      "learning_rate": 0.0004802107303484608,
      "loss": 2.86,
      "step": 67943
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7335480451583862,
      "learning_rate": 0.00048020746003895747,
      "loss": 3.0105,
      "step": 67944
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.580603837966919,
      "learning_rate": 0.00048020418969595016,
      "loss": 3.1541,
      "step": 67945
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.9793484210968018,
      "learning_rate": 0.0004802009193194395,
      "loss": 2.9503,
      "step": 67946
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.660881757736206,
      "learning_rate": 0.00048019764890942616,
      "loss": 2.741,
      "step": 67947
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7160476446151733,
      "learning_rate": 0.00048019437846591055,
      "loss": 3.0905,
      "step": 67948
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.9913326501846313,
      "learning_rate": 0.0004801911079888934,
      "loss": 2.9737,
      "step": 67949
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7380728721618652,
      "learning_rate": 0.0004801878374783754,
      "loss": 3.0377,
      "step": 67950
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4820057153701782,
      "learning_rate": 0.000480184566934357,
      "loss": 2.8733,
      "step": 67951
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5889480113983154,
      "learning_rate": 0.00048018129635683896,
      "loss": 3.1426,
      "step": 67952
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.649977684020996,
      "learning_rate": 0.0004801780257458218,
      "loss": 3.0771,
      "step": 67953
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.1284217834472656,
      "learning_rate": 0.0004801747551013061,
      "loss": 2.9112,
      "step": 67954
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.8417110443115234,
      "learning_rate": 0.0004801714844232926,
      "loss": 3.0257,
      "step": 67955
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.479142189025879,
      "learning_rate": 0.0004801682137117818,
      "loss": 3.0243,
      "step": 67956
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.755788803100586,
      "learning_rate": 0.00048016494296677426,
      "loss": 3.0942,
      "step": 67957
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.217494010925293,
      "learning_rate": 0.0004801616721882707,
      "loss": 2.9615,
      "step": 67958
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5602059364318848,
      "learning_rate": 0.00048015840137627164,
      "loss": 3.229,
      "step": 67959
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.4872907400131226,
      "learning_rate": 0.0004801551305307779,
      "loss": 2.9923,
      "step": 67960
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.6392524242401123,
      "learning_rate": 0.00048015185965178964,
      "loss": 3.2102,
      "step": 67961
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.2335313558578491,
      "learning_rate": 0.00048014858873930797,
      "loss": 2.9139,
      "step": 67962
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.663801670074463,
      "learning_rate": 0.00048014531779333317,
      "loss": 3.1621,
      "step": 67963
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.5909117460250854,
      "learning_rate": 0.000480142046813866,
      "loss": 3.0484,
      "step": 67964
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.823279619216919,
      "learning_rate": 0.0004801387758009071,
      "loss": 2.7666,
      "step": 67965
    },
    {
      "epoch": 0.88,
      "grad_norm": 1.7121117115020752,
      "learning_rate": 0.00048013550475445684,
      "loss": 3.0614,
      "step": 67966
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.3423104286193848,
      "learning_rate": 0.0004801322336745161,
      "loss": 2.921,
      "step": 67967
    },
    {
      "epoch": 0.88,
      "grad_norm": 2.8338327407836914,
      "learning_rate": 0.00048012896256108533,
      "loss": 2.852,
      "step": 67968
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.096910238265991,
      "learning_rate": 0.0004801256914141652,
      "loss": 3.0301,
      "step": 67969
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8194564580917358,
      "learning_rate": 0.0004801224202337563,
      "loss": 3.0892,
      "step": 67970
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.762289047241211,
      "learning_rate": 0.0004801191490198592,
      "loss": 2.9906,
      "step": 67971
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4662096500396729,
      "learning_rate": 0.00048011587777247464,
      "loss": 3.1158,
      "step": 67972
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9502002000808716,
      "learning_rate": 0.00048011260649160313,
      "loss": 2.8083,
      "step": 67973
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.416117787361145,
      "learning_rate": 0.00048010933517724516,
      "loss": 2.8978,
      "step": 67974
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6062161922454834,
      "learning_rate": 0.0004801060638294016,
      "loss": 3.3422,
      "step": 67975
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.672202467918396,
      "learning_rate": 0.00048010279244807287,
      "loss": 3.0751,
      "step": 67976
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.151923418045044,
      "learning_rate": 0.0004800995210332596,
      "loss": 2.9902,
      "step": 67977
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.8097782135009766,
      "learning_rate": 0.0004800962495849625,
      "loss": 2.9506,
      "step": 67978
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9285513162612915,
      "learning_rate": 0.0004800929781031821,
      "loss": 3.0386,
      "step": 67979
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.5704195499420166,
      "learning_rate": 0.000480089706587919,
      "loss": 2.7738,
      "step": 67980
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.421210765838623,
      "learning_rate": 0.00048008643503917386,
      "loss": 3.1589,
      "step": 67981
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.9925522804260254,
      "learning_rate": 0.00048008316345694716,
      "loss": 2.8715,
      "step": 67982
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7203049659729004,
      "learning_rate": 0.00048007989184123965,
      "loss": 3.0344,
      "step": 67983
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.6877200603485107,
      "learning_rate": 0.0004800766201920519,
      "loss": 2.9638,
      "step": 67984
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.7117807865142822,
      "learning_rate": 0.00048007334850938446,
      "loss": 2.8787,
      "step": 67985
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.4351935386657715,
      "learning_rate": 0.00048007007679323803,
      "loss": 3.0587,
      "step": 67986
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7333695888519287,
      "learning_rate": 0.0004800668050436132,
      "loss": 3.1474,
      "step": 67987
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.351219654083252,
      "learning_rate": 0.0004800635332605105,
      "loss": 3.0383,
      "step": 67988
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.680338144302368,
      "learning_rate": 0.0004800602614439306,
      "loss": 2.8941,
      "step": 67989
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3877766132354736,
      "learning_rate": 0.00048005698959387416,
      "loss": 2.8847,
      "step": 67990
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7802337408065796,
      "learning_rate": 0.0004800537177103416,
      "loss": 2.9067,
      "step": 67991
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.481132745742798,
      "learning_rate": 0.00048005044579333377,
      "loss": 2.9604,
      "step": 67992
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8841577768325806,
      "learning_rate": 0.0004800471738428511,
      "loss": 2.9012,
      "step": 67993
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.016317844390869,
      "learning_rate": 0.00048004390185889424,
      "loss": 2.899,
      "step": 67994
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8904391527175903,
      "learning_rate": 0.0004800406298414639,
      "loss": 3.1833,
      "step": 67995
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6910327672958374,
      "learning_rate": 0.00048003735779056063,
      "loss": 3.1689,
      "step": 67996
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.581660270690918,
      "learning_rate": 0.0004800340857061849,
      "loss": 2.89,
      "step": 67997
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.845147967338562,
      "learning_rate": 0.0004800308135883375,
      "loss": 3.1029,
      "step": 67998
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7123048305511475,
      "learning_rate": 0.000480027541437019,
      "loss": 2.9075,
      "step": 67999
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7422438859939575,
      "learning_rate": 0.0004800242692522299,
      "loss": 2.8806,
      "step": 68000
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6949989795684814,
      "learning_rate": 0.00048002099703397096,
      "loss": 2.8854,
      "step": 68001
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.489686131477356,
      "learning_rate": 0.0004800177247822427,
      "loss": 3.3784,
      "step": 68002
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4304978847503662,
      "learning_rate": 0.0004800144524970457,
      "loss": 3.433,
      "step": 68003
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5465071201324463,
      "learning_rate": 0.00048001118017838065,
      "loss": 3.0692,
      "step": 68004
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.002725839614868,
      "learning_rate": 0.0004800079078262482,
      "loss": 2.8677,
      "step": 68005
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.992087721824646,
      "learning_rate": 0.0004800046354406488,
      "loss": 3.2518,
      "step": 68006
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5152860879898071,
      "learning_rate": 0.00048000136302158315,
      "loss": 3.1439,
      "step": 68007
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.4639461040496826,
      "learning_rate": 0.0004799980905690518,
      "loss": 2.8814,
      "step": 68008
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6369657516479492,
      "learning_rate": 0.00047999481808305553,
      "loss": 2.6456,
      "step": 68009
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8596285581588745,
      "learning_rate": 0.00047999154556359477,
      "loss": 3.0928,
      "step": 68010
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0006837844848633,
      "learning_rate": 0.0004799882730106701,
      "loss": 2.957,
      "step": 68011
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6275376081466675,
      "learning_rate": 0.0004799850004242823,
      "loss": 3.1074,
      "step": 68012
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.797469973564148,
      "learning_rate": 0.00047998172780443194,
      "loss": 3.1015,
      "step": 68013
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.425283670425415,
      "learning_rate": 0.0004799784551511194,
      "loss": 3.0126,
      "step": 68014
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9257463216781616,
      "learning_rate": 0.0004799751824643456,
      "loss": 3.0907,
      "step": 68015
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5174736976623535,
      "learning_rate": 0.00047997190974411103,
      "loss": 2.9907,
      "step": 68016
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.208085536956787,
      "learning_rate": 0.00047996863699041625,
      "loss": 2.8169,
      "step": 68017
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.658865451812744,
      "learning_rate": 0.0004799653642032619,
      "loss": 3.0679,
      "step": 68018
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.537199020385742,
      "learning_rate": 0.0004799620913826486,
      "loss": 3.101,
      "step": 68019
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4575577974319458,
      "learning_rate": 0.00047995881852857695,
      "loss": 2.8947,
      "step": 68020
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.9901983737945557,
      "learning_rate": 0.0004799555456410475,
      "loss": 3.1943,
      "step": 68021
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.0665900707244873,
      "learning_rate": 0.000479952272720061,
      "loss": 3.2299,
      "step": 68022
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.9073867797851562,
      "learning_rate": 0.00047994899976561796,
      "loss": 2.7548,
      "step": 68023
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7700068950653076,
      "learning_rate": 0.00047994572677771897,
      "loss": 3.142,
      "step": 68024
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2805283069610596,
      "learning_rate": 0.0004799424537563647,
      "loss": 3.0366,
      "step": 68025
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.863400936126709,
      "learning_rate": 0.0004799391807015558,
      "loss": 2.8387,
      "step": 68026
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3208391666412354,
      "learning_rate": 0.0004799359076132927,
      "loss": 2.9868,
      "step": 68027
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5402770042419434,
      "learning_rate": 0.00047993263449157617,
      "loss": 2.9417,
      "step": 68028
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7289838790893555,
      "learning_rate": 0.00047992936133640674,
      "loss": 3.1526,
      "step": 68029
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.7831907272338867,
      "learning_rate": 0.00047992608814778506,
      "loss": 2.9825,
      "step": 68030
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.6141865253448486,
      "learning_rate": 0.00047992281492571174,
      "loss": 2.8002,
      "step": 68031
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.41929030418396,
      "learning_rate": 0.0004799195416701873,
      "loss": 2.8467,
      "step": 68032
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4954668283462524,
      "learning_rate": 0.0004799162683812125,
      "loss": 3.2102,
      "step": 68033
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.175748825073242,
      "learning_rate": 0.00047991299505878784,
      "loss": 2.9792,
      "step": 68034
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.167267322540283,
      "learning_rate": 0.0004799097217029139,
      "loss": 2.8745,
      "step": 68035
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0344252586364746,
      "learning_rate": 0.0004799064483135915,
      "loss": 3.2109,
      "step": 68036
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0178864002227783,
      "learning_rate": 0.0004799031748908209,
      "loss": 3.0136,
      "step": 68037
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3758037090301514,
      "learning_rate": 0.000479899901434603,
      "loss": 3.0421,
      "step": 68038
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.303135633468628,
      "learning_rate": 0.0004798966279449384,
      "loss": 2.8757,
      "step": 68039
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.436847448348999,
      "learning_rate": 0.0004798933544218275,
      "loss": 3.0038,
      "step": 68040
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.175814151763916,
      "learning_rate": 0.0004798900808652711,
      "loss": 2.9185,
      "step": 68041
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7120897769927979,
      "learning_rate": 0.00047988680727526975,
      "loss": 2.9886,
      "step": 68042
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.6870367527008057,
      "learning_rate": 0.00047988353365182397,
      "loss": 2.9075,
      "step": 68043
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.543774366378784,
      "learning_rate": 0.00047988025999493446,
      "loss": 3.1384,
      "step": 68044
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6213939189910889,
      "learning_rate": 0.0004798769863046018,
      "loss": 2.924,
      "step": 68045
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4737443923950195,
      "learning_rate": 0.0004798737125808267,
      "loss": 3.043,
      "step": 68046
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1064929962158203,
      "learning_rate": 0.0004798704388236096,
      "loss": 2.8294,
      "step": 68047
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6883447170257568,
      "learning_rate": 0.0004798671650329512,
      "loss": 2.6332,
      "step": 68048
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1513781547546387,
      "learning_rate": 0.00047986389120885213,
      "loss": 3.0407,
      "step": 68049
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9023640155792236,
      "learning_rate": 0.000479860617351313,
      "loss": 3.1835,
      "step": 68050
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5900837182998657,
      "learning_rate": 0.00047985734346033427,
      "loss": 3.0554,
      "step": 68051
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.177842617034912,
      "learning_rate": 0.0004798540695359168,
      "loss": 3.0584,
      "step": 68052
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.384211540222168,
      "learning_rate": 0.000479850795578061,
      "loss": 3.1643,
      "step": 68053
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6317607164382935,
      "learning_rate": 0.0004798475215867676,
      "loss": 2.9871,
      "step": 68054
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5158352851867676,
      "learning_rate": 0.00047984424756203705,
      "loss": 3.0734,
      "step": 68055
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5303171873092651,
      "learning_rate": 0.00047984097350387006,
      "loss": 2.9417,
      "step": 68056
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4809645414352417,
      "learning_rate": 0.0004798376994122673,
      "loss": 3.0703,
      "step": 68057
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6702775955200195,
      "learning_rate": 0.00047983442528722926,
      "loss": 2.9745,
      "step": 68058
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5216385126113892,
      "learning_rate": 0.00047983115112875665,
      "loss": 3.1432,
      "step": 68059
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.136505603790283,
      "learning_rate": 0.00047982787693685007,
      "loss": 3.1626,
      "step": 68060
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0762689113616943,
      "learning_rate": 0.00047982460271151,
      "loss": 3.0078,
      "step": 68061
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.501786708831787,
      "learning_rate": 0.00047982132845273713,
      "loss": 2.9968,
      "step": 68062
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9398987293243408,
      "learning_rate": 0.00047981805416053225,
      "loss": 3.0905,
      "step": 68063
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.921147108078003,
      "learning_rate": 0.00047981477983489567,
      "loss": 3.0206,
      "step": 68064
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.523079752922058,
      "learning_rate": 0.00047981150547582815,
      "loss": 2.8098,
      "step": 68065
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9081957340240479,
      "learning_rate": 0.0004798082310833303,
      "loss": 3.1588,
      "step": 68066
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.563483238220215,
      "learning_rate": 0.00047980495665740267,
      "loss": 3.1628,
      "step": 68067
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1587836742401123,
      "learning_rate": 0.0004798016821980459,
      "loss": 3.1392,
      "step": 68068
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.762585163116455,
      "learning_rate": 0.0004797984077052606,
      "loss": 2.728,
      "step": 68069
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.848002552986145,
      "learning_rate": 0.0004797951331790474,
      "loss": 3.1388,
      "step": 68070
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3980027437210083,
      "learning_rate": 0.00047979185861940694,
      "loss": 3.2204,
      "step": 68071
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6770416498184204,
      "learning_rate": 0.00047978858402633975,
      "loss": 3.0275,
      "step": 68072
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8914543390274048,
      "learning_rate": 0.00047978530939984644,
      "loss": 3.094,
      "step": 68073
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0611870288848877,
      "learning_rate": 0.0004797820347399276,
      "loss": 2.9492,
      "step": 68074
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6292831897735596,
      "learning_rate": 0.000479778760046584,
      "loss": 2.8586,
      "step": 68075
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3368350267410278,
      "learning_rate": 0.00047977548531981607,
      "loss": 2.9355,
      "step": 68076
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2229814529418945,
      "learning_rate": 0.0004797722105596245,
      "loss": 2.9647,
      "step": 68077
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2844173908233643,
      "learning_rate": 0.00047976893576600986,
      "loss": 3.1226,
      "step": 68078
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5381730794906616,
      "learning_rate": 0.0004797656609389728,
      "loss": 2.9198,
      "step": 68079
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.6711411476135254,
      "learning_rate": 0.0004797623860785139,
      "loss": 3.2055,
      "step": 68080
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.529254198074341,
      "learning_rate": 0.00047975911118463383,
      "loss": 3.1156,
      "step": 68081
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.494048833847046,
      "learning_rate": 0.0004797558362573331,
      "loss": 3.2294,
      "step": 68082
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.710060954093933,
      "learning_rate": 0.00047975256129661234,
      "loss": 2.9033,
      "step": 68083
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.447225332260132,
      "learning_rate": 0.00047974928630247225,
      "loss": 2.7324,
      "step": 68084
    },
    {
      "epoch": 0.89,
      "grad_norm": 4.14482307434082,
      "learning_rate": 0.0004797460112749133,
      "loss": 2.8106,
      "step": 68085
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8310457468032837,
      "learning_rate": 0.00047974273621393624,
      "loss": 3.1304,
      "step": 68086
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.60606050491333,
      "learning_rate": 0.0004797394611195416,
      "loss": 2.9405,
      "step": 68087
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2447621822357178,
      "learning_rate": 0.00047973618599173,
      "loss": 2.9093,
      "step": 68088
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.3576877117156982,
      "learning_rate": 0.000479732910830502,
      "loss": 2.9543,
      "step": 68089
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5627920627593994,
      "learning_rate": 0.00047972963563585827,
      "loss": 3.1162,
      "step": 68090
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7256847620010376,
      "learning_rate": 0.0004797263604077995,
      "loss": 2.9491,
      "step": 68091
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8371723890304565,
      "learning_rate": 0.0004797230851463262,
      "loss": 2.8097,
      "step": 68092
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9162518978118896,
      "learning_rate": 0.00047971980985143887,
      "loss": 2.9805,
      "step": 68093
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6089447736740112,
      "learning_rate": 0.00047971653452313825,
      "loss": 3.117,
      "step": 68094
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.789052963256836,
      "learning_rate": 0.000479713259161425,
      "loss": 3.1572,
      "step": 68095
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.252025604248047,
      "learning_rate": 0.00047970998376629963,
      "loss": 3.0178,
      "step": 68096
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0016322135925293,
      "learning_rate": 0.0004797067083377628,
      "loss": 2.9664,
      "step": 68097
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.650094985961914,
      "learning_rate": 0.0004797034328758151,
      "loss": 2.9345,
      "step": 68098
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.6907875537872314,
      "learning_rate": 0.00047970015738045714,
      "loss": 3.0578,
      "step": 68099
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5680865049362183,
      "learning_rate": 0.00047969688185168954,
      "loss": 2.7237,
      "step": 68100
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6769849061965942,
      "learning_rate": 0.00047969360628951285,
      "loss": 2.9851,
      "step": 68101
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7179092168807983,
      "learning_rate": 0.0004796903306939278,
      "loss": 2.8942,
      "step": 68102
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4209322929382324,
      "learning_rate": 0.0004796870550649349,
      "loss": 2.85,
      "step": 68103
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7564291954040527,
      "learning_rate": 0.0004796837794025347,
      "loss": 3.0146,
      "step": 68104
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5968561172485352,
      "learning_rate": 0.00047968050370672793,
      "loss": 3.033,
      "step": 68105
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4698784351348877,
      "learning_rate": 0.00047967722797751525,
      "loss": 2.9805,
      "step": 68106
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.727772831916809,
      "learning_rate": 0.0004796739522148971,
      "loss": 3.2776,
      "step": 68107
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7110515832901,
      "learning_rate": 0.0004796706764188742,
      "loss": 3.2204,
      "step": 68108
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.470202088356018,
      "learning_rate": 0.00047966740058944717,
      "loss": 3.1953,
      "step": 68109
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7215335369110107,
      "learning_rate": 0.00047966412472661655,
      "loss": 2.9654,
      "step": 68110
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6508055925369263,
      "learning_rate": 0.00047966084883038296,
      "loss": 3.0476,
      "step": 68111
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6296803951263428,
      "learning_rate": 0.000479657572900747,
      "loss": 3.0583,
      "step": 68112
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5260560512542725,
      "learning_rate": 0.0004796542969377093,
      "loss": 3.1727,
      "step": 68113
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.641724944114685,
      "learning_rate": 0.0004796510209412706,
      "loss": 3.0714,
      "step": 68114
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5964423418045044,
      "learning_rate": 0.0004796477449114313,
      "loss": 2.9063,
      "step": 68115
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.558680534362793,
      "learning_rate": 0.00047964446884819203,
      "loss": 3.1791,
      "step": 68116
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.796754002571106,
      "learning_rate": 0.0004796411927515535,
      "loss": 3.122,
      "step": 68117
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.547045350074768,
      "learning_rate": 0.0004796379166215163,
      "loss": 3.1334,
      "step": 68118
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4741594791412354,
      "learning_rate": 0.00047963464045808104,
      "loss": 3.0152,
      "step": 68119
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6916521787643433,
      "learning_rate": 0.0004796313642612483,
      "loss": 3.1741,
      "step": 68120
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.560927391052246,
      "learning_rate": 0.0004796280880310187,
      "loss": 2.6575,
      "step": 68121
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.580386996269226,
      "learning_rate": 0.00047962481176739284,
      "loss": 3.117,
      "step": 68122
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.54710054397583,
      "learning_rate": 0.0004796215354703713,
      "loss": 2.8443,
      "step": 68123
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1089253425598145,
      "learning_rate": 0.0004796182591399548,
      "loss": 3.1028,
      "step": 68124
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7872644662857056,
      "learning_rate": 0.0004796149827761438,
      "loss": 3.0598,
      "step": 68125
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8103251457214355,
      "learning_rate": 0.000479611706378939,
      "loss": 2.993,
      "step": 68126
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6382695436477661,
      "learning_rate": 0.000479608429948341,
      "loss": 2.8724,
      "step": 68127
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7189639806747437,
      "learning_rate": 0.0004796051534843505,
      "loss": 3.1325,
      "step": 68128
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.509405255317688,
      "learning_rate": 0.0004796018769869679,
      "loss": 3.0149,
      "step": 68129
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3873310089111328,
      "learning_rate": 0.00047959860045619396,
      "loss": 3.1185,
      "step": 68130
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.922767162322998,
      "learning_rate": 0.00047959532389202917,
      "loss": 2.7314,
      "step": 68131
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5269266366958618,
      "learning_rate": 0.0004795920472944743,
      "loss": 2.8053,
      "step": 68132
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5436333417892456,
      "learning_rate": 0.0004795887706635299,
      "loss": 3.1668,
      "step": 68133
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5771442651748657,
      "learning_rate": 0.00047958549399919657,
      "loss": 2.8401,
      "step": 68134
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.539759635925293,
      "learning_rate": 0.0004795822173014747,
      "loss": 3.0173,
      "step": 68135
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5441765785217285,
      "learning_rate": 0.00047957894057036534,
      "loss": 3.1884,
      "step": 68136
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6939550638198853,
      "learning_rate": 0.0004795756638058688,
      "loss": 3.0429,
      "step": 68137
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.67554771900177,
      "learning_rate": 0.00047957238700798577,
      "loss": 3.0223,
      "step": 68138
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.492636799812317,
      "learning_rate": 0.00047956911017671676,
      "loss": 3.1034,
      "step": 68139
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2004244327545166,
      "learning_rate": 0.0004795658333120626,
      "loss": 2.9157,
      "step": 68140
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4601463079452515,
      "learning_rate": 0.00047956255641402363,
      "loss": 3.0349,
      "step": 68141
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6474883556365967,
      "learning_rate": 0.0004795592794826007,
      "loss": 3.0658,
      "step": 68142
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7680615186691284,
      "learning_rate": 0.0004795560025177942,
      "loss": 2.9412,
      "step": 68143
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.644694209098816,
      "learning_rate": 0.0004795527255196049,
      "loss": 2.8757,
      "step": 68144
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4944754838943481,
      "learning_rate": 0.00047954944848803334,
      "loss": 2.9417,
      "step": 68145
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.798223614692688,
      "learning_rate": 0.00047954617142308023,
      "loss": 3.3423,
      "step": 68146
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.601731538772583,
      "learning_rate": 0.000479542894324746,
      "loss": 2.9935,
      "step": 68147
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6852248907089233,
      "learning_rate": 0.0004795396171930314,
      "loss": 3.1086,
      "step": 68148
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.217029333114624,
      "learning_rate": 0.00047953634002793704,
      "loss": 2.8711,
      "step": 68149
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6362637281417847,
      "learning_rate": 0.00047953306282946334,
      "loss": 2.9622,
      "step": 68150
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7353748083114624,
      "learning_rate": 0.0004795297855976112,
      "loss": 2.8051,
      "step": 68151
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8370569944381714,
      "learning_rate": 0.00047952650833238105,
      "loss": 3.3536,
      "step": 68152
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.762345314025879,
      "learning_rate": 0.00047952323103377353,
      "loss": 2.8437,
      "step": 68153
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.535409688949585,
      "learning_rate": 0.00047951995370178923,
      "loss": 2.9262,
      "step": 68154
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.275341749191284,
      "learning_rate": 0.00047951667633642874,
      "loss": 3.08,
      "step": 68155
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.486863613128662,
      "learning_rate": 0.0004795133989376928,
      "loss": 2.7482,
      "step": 68156
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.588244915008545,
      "learning_rate": 0.00047951012150558196,
      "loss": 2.9422,
      "step": 68157
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3496732711791992,
      "learning_rate": 0.0004795068440400967,
      "loss": 3.0873,
      "step": 68158
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.871909737586975,
      "learning_rate": 0.0004795035665412377,
      "loss": 3.0417,
      "step": 68159
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8293288946151733,
      "learning_rate": 0.0004795002890090057,
      "loss": 3.0802,
      "step": 68160
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5211843252182007,
      "learning_rate": 0.0004794970114434012,
      "loss": 3.0553,
      "step": 68161
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.591681718826294,
      "learning_rate": 0.00047949373384442473,
      "loss": 3.0958,
      "step": 68162
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6277341842651367,
      "learning_rate": 0.000479490456212077,
      "loss": 3.133,
      "step": 68163
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4031078815460205,
      "learning_rate": 0.0004794871785463587,
      "loss": 2.8299,
      "step": 68164
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4808051586151123,
      "learning_rate": 0.0004794839008472703,
      "loss": 2.9214,
      "step": 68165
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9059886932373047,
      "learning_rate": 0.00047948062311481247,
      "loss": 3.0854,
      "step": 68166
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4382379055023193,
      "learning_rate": 0.0004794773453489858,
      "loss": 3.1309,
      "step": 68167
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6428054571151733,
      "learning_rate": 0.0004794740675497908,
      "loss": 2.9315,
      "step": 68168
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.088695764541626,
      "learning_rate": 0.0004794707897172283,
      "loss": 2.8454,
      "step": 68169
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5290968418121338,
      "learning_rate": 0.00047946751185129884,
      "loss": 2.8496,
      "step": 68170
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.8960063457489014,
      "learning_rate": 0.00047946423395200293,
      "loss": 2.6955,
      "step": 68171
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5990480184555054,
      "learning_rate": 0.00047946095601934113,
      "loss": 2.9249,
      "step": 68172
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6963917016983032,
      "learning_rate": 0.0004794576780533143,
      "loss": 3.0139,
      "step": 68173
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.625136137008667,
      "learning_rate": 0.0004794544000539228,
      "loss": 2.9563,
      "step": 68174
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.90997314453125,
      "learning_rate": 0.0004794511220211673,
      "loss": 2.983,
      "step": 68175
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4082214832305908,
      "learning_rate": 0.0004794478439550486,
      "loss": 3.0365,
      "step": 68176
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6586205959320068,
      "learning_rate": 0.00047944456585556703,
      "loss": 3.2048,
      "step": 68177
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.292778253555298,
      "learning_rate": 0.00047944128772272335,
      "loss": 3.0333,
      "step": 68178
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5311286449432373,
      "learning_rate": 0.0004794380095565183,
      "loss": 3.0226,
      "step": 68179
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0380733013153076,
      "learning_rate": 0.0004794347313569521,
      "loss": 3.067,
      "step": 68180
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2022864818573,
      "learning_rate": 0.00047943145312402577,
      "loss": 3.2383,
      "step": 68181
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.689408302307129,
      "learning_rate": 0.00047942817485773957,
      "loss": 3.0832,
      "step": 68182
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4115785360336304,
      "learning_rate": 0.0004794248965580945,
      "loss": 3.0764,
      "step": 68183
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4610583782196045,
      "learning_rate": 0.00047942161822509075,
      "loss": 3.0101,
      "step": 68184
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6989479064941406,
      "learning_rate": 0.00047941833985872933,
      "loss": 2.9848,
      "step": 68185
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5752540826797485,
      "learning_rate": 0.0004794150614590106,
      "loss": 2.7902,
      "step": 68186
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4940606355667114,
      "learning_rate": 0.00047941178302593506,
      "loss": 2.9853,
      "step": 68187
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8963193893432617,
      "learning_rate": 0.0004794085045595036,
      "loss": 3.0924,
      "step": 68188
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4095513820648193,
      "learning_rate": 0.00047940522605971676,
      "loss": 2.9113,
      "step": 68189
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6838998794555664,
      "learning_rate": 0.00047940194752657506,
      "loss": 2.9496,
      "step": 68190
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5011420249938965,
      "learning_rate": 0.0004793986689600792,
      "loss": 3.1569,
      "step": 68191
    },
    {
      "epoch": 0.89,
      "grad_norm": 4.207890033721924,
      "learning_rate": 0.00047939539036022963,
      "loss": 2.8055,
      "step": 68192
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4557448625564575,
      "learning_rate": 0.0004793921117270271,
      "loss": 2.9976,
      "step": 68193
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6708347797393799,
      "learning_rate": 0.0004793888330604723,
      "loss": 2.8738,
      "step": 68194
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.915822148323059,
      "learning_rate": 0.0004793855543605656,
      "loss": 2.9463,
      "step": 68195
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4894479513168335,
      "learning_rate": 0.0004793822756273078,
      "loss": 3.0237,
      "step": 68196
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4968891143798828,
      "learning_rate": 0.0004793789968606995,
      "loss": 3.2882,
      "step": 68197
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.410010576248169,
      "learning_rate": 0.00047937571806074117,
      "loss": 3.0074,
      "step": 68198
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8844467401504517,
      "learning_rate": 0.0004793724392274335,
      "loss": 3.0807,
      "step": 68199
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.800950765609741,
      "learning_rate": 0.00047936916036077717,
      "loss": 2.6895,
      "step": 68200
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.660329818725586,
      "learning_rate": 0.00047936588146077274,
      "loss": 2.96,
      "step": 68201
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1716275215148926,
      "learning_rate": 0.0004793626025274207,
      "loss": 3.0948,
      "step": 68202
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0028655529022217,
      "learning_rate": 0.00047935932356072184,
      "loss": 3.0251,
      "step": 68203
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7718055248260498,
      "learning_rate": 0.0004793560445606767,
      "loss": 3.0053,
      "step": 68204
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6336649656295776,
      "learning_rate": 0.0004793527655272858,
      "loss": 3.0231,
      "step": 68205
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.5074119567871094,
      "learning_rate": 0.00047934948646054996,
      "loss": 3.2237,
      "step": 68206
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.1014771461486816,
      "learning_rate": 0.0004793462073604697,
      "loss": 2.9984,
      "step": 68207
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.9257869720458984,
      "learning_rate": 0.0004793429282270454,
      "loss": 2.8173,
      "step": 68208
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3515164852142334,
      "learning_rate": 0.00047933964906027803,
      "loss": 3.1834,
      "step": 68209
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2495503425598145,
      "learning_rate": 0.000479336369860168,
      "loss": 3.282,
      "step": 68210
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1297476291656494,
      "learning_rate": 0.0004793330906267159,
      "loss": 3.2093,
      "step": 68211
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6848959922790527,
      "learning_rate": 0.0004793298113599224,
      "loss": 2.9865,
      "step": 68212
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1615869998931885,
      "learning_rate": 0.0004793265320597882,
      "loss": 2.9872,
      "step": 68213
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.937288761138916,
      "learning_rate": 0.0004793232527263137,
      "loss": 2.8862,
      "step": 68214
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4051542282104492,
      "learning_rate": 0.00047931997335949974,
      "loss": 3.0855,
      "step": 68215
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4207013845443726,
      "learning_rate": 0.00047931669395934667,
      "loss": 2.9194,
      "step": 68216
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2308096885681152,
      "learning_rate": 0.00047931341452585525,
      "loss": 3.0816,
      "step": 68217
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.189208745956421,
      "learning_rate": 0.00047931013505902624,
      "loss": 2.9922,
      "step": 68218
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.78542160987854,
      "learning_rate": 0.00047930685555886,
      "loss": 2.8331,
      "step": 68219
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.806662082672119,
      "learning_rate": 0.0004793035760253572,
      "loss": 3.3282,
      "step": 68220
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.4215199947357178,
      "learning_rate": 0.0004793002964585185,
      "loss": 2.8176,
      "step": 68221
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.245515823364258,
      "learning_rate": 0.0004792970168583445,
      "loss": 3.017,
      "step": 68222
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3263843059539795,
      "learning_rate": 0.00047929373722483575,
      "loss": 3.0246,
      "step": 68223
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3522703647613525,
      "learning_rate": 0.00047929045755799296,
      "loss": 2.8632,
      "step": 68224
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.7570409774780273,
      "learning_rate": 0.0004792871778578167,
      "loss": 3.0941,
      "step": 68225
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.650516152381897,
      "learning_rate": 0.00047928389812430756,
      "loss": 3.1464,
      "step": 68226
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5226460695266724,
      "learning_rate": 0.0004792806183574661,
      "loss": 3.0995,
      "step": 68227
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2233307361602783,
      "learning_rate": 0.00047927733855729305,
      "loss": 3.0394,
      "step": 68228
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7143503427505493,
      "learning_rate": 0.0004792740587237889,
      "loss": 3.0432,
      "step": 68229
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.5505149364471436,
      "learning_rate": 0.0004792707788569544,
      "loss": 2.9573,
      "step": 68230
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.954793930053711,
      "learning_rate": 0.0004792674989567901,
      "loss": 2.989,
      "step": 68231
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.327528715133667,
      "learning_rate": 0.00047926421902329654,
      "loss": 2.8643,
      "step": 68232
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0420055389404297,
      "learning_rate": 0.0004792609390564743,
      "loss": 3.0357,
      "step": 68233
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.349809169769287,
      "learning_rate": 0.0004792576590563242,
      "loss": 3.2028,
      "step": 68234
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.003188371658325,
      "learning_rate": 0.0004792543790228466,
      "loss": 3.0143,
      "step": 68235
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6385581493377686,
      "learning_rate": 0.0004792510989560423,
      "loss": 3.0042,
      "step": 68236
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5103586912155151,
      "learning_rate": 0.00047924781885591187,
      "loss": 3.1731,
      "step": 68237
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6080316305160522,
      "learning_rate": 0.0004792445387224558,
      "loss": 3.0229,
      "step": 68238
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5606673955917358,
      "learning_rate": 0.0004792412585556748,
      "loss": 2.7627,
      "step": 68239
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4588788747787476,
      "learning_rate": 0.00047923797835556955,
      "loss": 2.9718,
      "step": 68240
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.795591950416565,
      "learning_rate": 0.00047923469812214043,
      "loss": 3.0753,
      "step": 68241
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5441375970840454,
      "learning_rate": 0.0004792314178553883,
      "loss": 2.9095,
      "step": 68242
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4221372604370117,
      "learning_rate": 0.00047922813755531366,
      "loss": 3.1472,
      "step": 68243
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7309179306030273,
      "learning_rate": 0.00047922485722191714,
      "loss": 2.9981,
      "step": 68244
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6746594905853271,
      "learning_rate": 0.0004792215768551993,
      "loss": 3.1434,
      "step": 68245
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4916248321533203,
      "learning_rate": 0.0004792182964551608,
      "loss": 3.154,
      "step": 68246
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0193071365356445,
      "learning_rate": 0.00047921501602180217,
      "loss": 3.1386,
      "step": 68247
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4290037155151367,
      "learning_rate": 0.00047921173555512414,
      "loss": 3.0991,
      "step": 68248
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.301527261734009,
      "learning_rate": 0.0004792084550551273,
      "loss": 3.1875,
      "step": 68249
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0519137382507324,
      "learning_rate": 0.0004792051745218122,
      "loss": 3.0282,
      "step": 68250
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4997992515563965,
      "learning_rate": 0.0004792018939551795,
      "loss": 3.1571,
      "step": 68251
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7574464082717896,
      "learning_rate": 0.00047919861335522974,
      "loss": 3.0433,
      "step": 68252
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.176896572113037,
      "learning_rate": 0.0004791953327219636,
      "loss": 2.9883,
      "step": 68253
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.894169569015503,
      "learning_rate": 0.00047919205205538163,
      "loss": 3.0056,
      "step": 68254
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5172137022018433,
      "learning_rate": 0.00047918877135548455,
      "loss": 3.2515,
      "step": 68255
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.166037082672119,
      "learning_rate": 0.00047918549062227287,
      "loss": 3.0438,
      "step": 68256
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6745558977127075,
      "learning_rate": 0.0004791822098557471,
      "loss": 3.0044,
      "step": 68257
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4079773426055908,
      "learning_rate": 0.0004791789290559082,
      "loss": 2.9946,
      "step": 68258
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3400617837905884,
      "learning_rate": 0.00047917564822275643,
      "loss": 2.9623,
      "step": 68259
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5454926490783691,
      "learning_rate": 0.00047917236735629245,
      "loss": 3.0055,
      "step": 68260
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.6971912384033203,
      "learning_rate": 0.00047916908645651707,
      "loss": 2.987,
      "step": 68261
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.4749257564544678,
      "learning_rate": 0.0004791658055234307,
      "loss": 3.1953,
      "step": 68262
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.043591260910034,
      "learning_rate": 0.00047916252455703403,
      "loss": 3.1743,
      "step": 68263
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.686269760131836,
      "learning_rate": 0.0004791592435573278,
      "loss": 3.0363,
      "step": 68264
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.2769250869750977,
      "learning_rate": 0.0004791559625243123,
      "loss": 3.0912,
      "step": 68265
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1244916915893555,
      "learning_rate": 0.0004791526814579884,
      "loss": 2.985,
      "step": 68266
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6117969751358032,
      "learning_rate": 0.00047914940035835666,
      "loss": 3.0838,
      "step": 68267
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0246763229370117,
      "learning_rate": 0.00047914611922541763,
      "loss": 2.9766,
      "step": 68268
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.789879322052002,
      "learning_rate": 0.000479142838059172,
      "loss": 3.1852,
      "step": 68269
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.152214527130127,
      "learning_rate": 0.0004791395568596202,
      "loss": 3.2673,
      "step": 68270
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6632587909698486,
      "learning_rate": 0.00047913627562676313,
      "loss": 3.1931,
      "step": 68271
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1410629749298096,
      "learning_rate": 0.00047913299436060113,
      "loss": 3.1109,
      "step": 68272
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.0521037578582764,
      "learning_rate": 0.000479129713061135,
      "loss": 3.0149,
      "step": 68273
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6489962339401245,
      "learning_rate": 0.0004791264317283653,
      "loss": 3.2459,
      "step": 68274
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.639883041381836,
      "learning_rate": 0.0004791231503622926,
      "loss": 3.1574,
      "step": 68275
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1389544010162354,
      "learning_rate": 0.00047911986896291743,
      "loss": 3.032,
      "step": 68276
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9907571077346802,
      "learning_rate": 0.0004791165875302406,
      "loss": 3.0068,
      "step": 68277
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.694385290145874,
      "learning_rate": 0.00047911330606426256,
      "loss": 2.8888,
      "step": 68278
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4955538511276245,
      "learning_rate": 0.000479110024564984,
      "loss": 3.0028,
      "step": 68279
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4986289739608765,
      "learning_rate": 0.00047910674303240544,
      "loss": 3.0196,
      "step": 68280
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5564792156219482,
      "learning_rate": 0.0004791034614665276,
      "loss": 3.0824,
      "step": 68281
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6223949193954468,
      "learning_rate": 0.0004791001798673511,
      "loss": 2.9621,
      "step": 68282
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.816736102104187,
      "learning_rate": 0.0004790968982348765,
      "loss": 3.045,
      "step": 68283
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4200505018234253,
      "learning_rate": 0.00047909361656910435,
      "loss": 3.1903,
      "step": 68284
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5869659185409546,
      "learning_rate": 0.00047909033487003534,
      "loss": 2.8474,
      "step": 68285
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4746195077896118,
      "learning_rate": 0.00047908705313767,
      "loss": 3.0147,
      "step": 68286
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.674122929573059,
      "learning_rate": 0.000479083771372009,
      "loss": 3.0672,
      "step": 68287
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6754637956619263,
      "learning_rate": 0.00047908048957305305,
      "loss": 2.753,
      "step": 68288
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5866557359695435,
      "learning_rate": 0.00047907720774080254,
      "loss": 3.0093,
      "step": 68289
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6991204023361206,
      "learning_rate": 0.00047907392587525824,
      "loss": 3.205,
      "step": 68290
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8459548950195312,
      "learning_rate": 0.00047907064397642075,
      "loss": 3.1289,
      "step": 68291
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.4772889614105225,
      "learning_rate": 0.00047906736204429066,
      "loss": 3.2803,
      "step": 68292
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9378271102905273,
      "learning_rate": 0.00047906408007886847,
      "loss": 3.0144,
      "step": 68293
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3668909072875977,
      "learning_rate": 0.000479060798080155,
      "loss": 3.2006,
      "step": 68294
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6005983352661133,
      "learning_rate": 0.0004790575160481507,
      "loss": 3.2366,
      "step": 68295
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.985433578491211,
      "learning_rate": 0.00047905423398285623,
      "loss": 2.7179,
      "step": 68296
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8369743824005127,
      "learning_rate": 0.0004790509518842721,
      "loss": 2.7798,
      "step": 68297
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.420778512954712,
      "learning_rate": 0.0004790476697523991,
      "loss": 3.0268,
      "step": 68298
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3738930225372314,
      "learning_rate": 0.00047904438758723783,
      "loss": 3.1087,
      "step": 68299
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5776084661483765,
      "learning_rate": 0.0004790411053887887,
      "loss": 3.0298,
      "step": 68300
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6418415307998657,
      "learning_rate": 0.0004790378231570526,
      "loss": 3.2231,
      "step": 68301
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5394498109817505,
      "learning_rate": 0.0004790345408920299,
      "loss": 3.0515,
      "step": 68302
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6883033514022827,
      "learning_rate": 0.0004790312585937213,
      "loss": 2.9474,
      "step": 68303
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4474669694900513,
      "learning_rate": 0.00047902797626212743,
      "loss": 3.2512,
      "step": 68304
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6463679075241089,
      "learning_rate": 0.00047902469389724884,
      "loss": 3.1371,
      "step": 68305
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0913665294647217,
      "learning_rate": 0.00047902141149908616,
      "loss": 2.9714,
      "step": 68306
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.893591284751892,
      "learning_rate": 0.00047901812906764015,
      "loss": 2.8384,
      "step": 68307
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7532906532287598,
      "learning_rate": 0.00047901484660291114,
      "loss": 2.9693,
      "step": 68308
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5101615190505981,
      "learning_rate": 0.00047901156410489994,
      "loss": 2.9576,
      "step": 68309
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6266368627548218,
      "learning_rate": 0.00047900828157360716,
      "loss": 3.186,
      "step": 68310
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.435060977935791,
      "learning_rate": 0.00047900499900903337,
      "loss": 3.0125,
      "step": 68311
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.6887471675872803,
      "learning_rate": 0.0004790017164111791,
      "loss": 2.8656,
      "step": 68312
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.4871933460235596,
      "learning_rate": 0.00047899843378004504,
      "loss": 2.9232,
      "step": 68313
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4461195468902588,
      "learning_rate": 0.0004789951511156318,
      "loss": 3.1473,
      "step": 68314
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3397891521453857,
      "learning_rate": 0.00047899186841794,
      "loss": 2.9983,
      "step": 68315
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9022469520568848,
      "learning_rate": 0.0004789885856869703,
      "loss": 2.7784,
      "step": 68316
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7383449077606201,
      "learning_rate": 0.00047898530292272317,
      "loss": 3.006,
      "step": 68317
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9496400356292725,
      "learning_rate": 0.0004789820201251992,
      "loss": 3.1915,
      "step": 68318
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.6357929706573486,
      "learning_rate": 0.0004789787372943992,
      "loss": 3.007,
      "step": 68319
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.132181406021118,
      "learning_rate": 0.0004789754544303237,
      "loss": 3.1302,
      "step": 68320
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6254931688308716,
      "learning_rate": 0.0004789721715329732,
      "loss": 3.1395,
      "step": 68321
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7124464511871338,
      "learning_rate": 0.0004789688886023485,
      "loss": 3.0846,
      "step": 68322
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1460494995117188,
      "learning_rate": 0.00047896560563845007,
      "loss": 2.8978,
      "step": 68323
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0858070850372314,
      "learning_rate": 0.0004789623226412785,
      "loss": 2.8888,
      "step": 68324
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5823558568954468,
      "learning_rate": 0.0004789590396108345,
      "loss": 2.9465,
      "step": 68325
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1009199619293213,
      "learning_rate": 0.0004789557565471186,
      "loss": 3.1148,
      "step": 68326
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.69364595413208,
      "learning_rate": 0.0004789524734501314,
      "loss": 3.0075,
      "step": 68327
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.8251349925994873,
      "learning_rate": 0.00047894919031987374,
      "loss": 3.1574,
      "step": 68328
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5706979036331177,
      "learning_rate": 0.0004789459071563459,
      "loss": 3.0819,
      "step": 68329
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.502028465270996,
      "learning_rate": 0.0004789426239595487,
      "loss": 3.0364,
      "step": 68330
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2635364532470703,
      "learning_rate": 0.00047893934072948266,
      "loss": 3.129,
      "step": 68331
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.493074893951416,
      "learning_rate": 0.00047893605746614844,
      "loss": 3.0443,
      "step": 68332
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.03538179397583,
      "learning_rate": 0.0004789327741695465,
      "loss": 2.8857,
      "step": 68333
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.002328634262085,
      "learning_rate": 0.0004789294908396778,
      "loss": 3.1334,
      "step": 68334
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7756950855255127,
      "learning_rate": 0.00047892620747654263,
      "loss": 3.1411,
      "step": 68335
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8869271278381348,
      "learning_rate": 0.0004789229240801417,
      "loss": 3.0937,
      "step": 68336
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.285871982574463,
      "learning_rate": 0.00047891964065047555,
      "loss": 2.7908,
      "step": 68337
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5241690874099731,
      "learning_rate": 0.00047891635718754497,
      "loss": 3.4074,
      "step": 68338
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3166446685791016,
      "learning_rate": 0.0004789130736913504,
      "loss": 2.9228,
      "step": 68339
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8566219806671143,
      "learning_rate": 0.00047890979016189253,
      "loss": 3.1047,
      "step": 68340
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4353874921798706,
      "learning_rate": 0.000478906506599172,
      "loss": 2.9396,
      "step": 68341
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0542547702789307,
      "learning_rate": 0.00047890322300318924,
      "loss": 3.1412,
      "step": 68342
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4829418659210205,
      "learning_rate": 0.00047889993937394514,
      "loss": 2.9753,
      "step": 68343
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8526049852371216,
      "learning_rate": 0.0004788966557114401,
      "loss": 2.9984,
      "step": 68344
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5243823528289795,
      "learning_rate": 0.0004788933720156747,
      "loss": 3.0113,
      "step": 68345
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.489919900894165,
      "learning_rate": 0.0004788900882866499,
      "loss": 3.2674,
      "step": 68346
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7166764736175537,
      "learning_rate": 0.0004788868045243658,
      "loss": 3.1445,
      "step": 68347
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4033310413360596,
      "learning_rate": 0.0004788835207288234,
      "loss": 3.0287,
      "step": 68348
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5698858499526978,
      "learning_rate": 0.0004788802369000232,
      "loss": 2.9138,
      "step": 68349
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6012049913406372,
      "learning_rate": 0.0004788769530379657,
      "loss": 3.2191,
      "step": 68350
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5333491563796997,
      "learning_rate": 0.0004788736691426516,
      "loss": 2.8302,
      "step": 68351
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6078999042510986,
      "learning_rate": 0.00047887038521408155,
      "loss": 2.8575,
      "step": 68352
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8964457511901855,
      "learning_rate": 0.00047886710125225604,
      "loss": 3.1409,
      "step": 68353
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.800197958946228,
      "learning_rate": 0.0004788638172571759,
      "loss": 3.3336,
      "step": 68354
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8030049800872803,
      "learning_rate": 0.00047886053322884146,
      "loss": 3.0708,
      "step": 68355
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3586218357086182,
      "learning_rate": 0.0004788572491672536,
      "loss": 3.1749,
      "step": 68356
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5992077589035034,
      "learning_rate": 0.0004788539650724127,
      "loss": 3.0537,
      "step": 68357
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.681728720664978,
      "learning_rate": 0.0004788506809443195,
      "loss": 3.0871,
      "step": 68358
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8207488059997559,
      "learning_rate": 0.00047884739678297466,
      "loss": 3.0199,
      "step": 68359
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3729217052459717,
      "learning_rate": 0.00047884411258837864,
      "loss": 3.0319,
      "step": 68360
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4801000356674194,
      "learning_rate": 0.00047884082836053204,
      "loss": 3.0051,
      "step": 68361
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8222360610961914,
      "learning_rate": 0.00047883754409943567,
      "loss": 3.0695,
      "step": 68362
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.344729423522949,
      "learning_rate": 0.00047883425980509007,
      "loss": 2.9355,
      "step": 68363
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.421219825744629,
      "learning_rate": 0.0004788309754774956,
      "loss": 3.0457,
      "step": 68364
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0225722789764404,
      "learning_rate": 0.00047882769111665325,
      "loss": 3.0161,
      "step": 68365
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5041083097457886,
      "learning_rate": 0.00047882440672256343,
      "loss": 2.9792,
      "step": 68366
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.260270118713379,
      "learning_rate": 0.0004788211222952267,
      "loss": 3.0625,
      "step": 68367
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.524951457977295,
      "learning_rate": 0.0004788178378346439,
      "loss": 3.0467,
      "step": 68368
    },
    {
      "epoch": 0.89,
      "grad_norm": 4.005552768707275,
      "learning_rate": 0.0004788145533408154,
      "loss": 2.9623,
      "step": 68369
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3770363330841064,
      "learning_rate": 0.0004788112688137419,
      "loss": 2.9741,
      "step": 68370
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8617607355117798,
      "learning_rate": 0.000478807984253424,
      "loss": 3.1416,
      "step": 68371
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9725773334503174,
      "learning_rate": 0.0004788046996598623,
      "loss": 3.0809,
      "step": 68372
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.526240348815918,
      "learning_rate": 0.0004788014150330574,
      "loss": 2.959,
      "step": 68373
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5708891153335571,
      "learning_rate": 0.0004787981303730101,
      "loss": 3.2237,
      "step": 68374
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6815341711044312,
      "learning_rate": 0.00047879484567972075,
      "loss": 2.7824,
      "step": 68375
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4970951080322266,
      "learning_rate": 0.00047879156095318994,
      "loss": 2.9106,
      "step": 68376
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4960018396377563,
      "learning_rate": 0.00047878827619341857,
      "loss": 3.1412,
      "step": 68377
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.851436734199524,
      "learning_rate": 0.00047878499140040714,
      "loss": 2.8523,
      "step": 68378
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6277412176132202,
      "learning_rate": 0.00047878170657415596,
      "loss": 2.8274,
      "step": 68379
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4658678770065308,
      "learning_rate": 0.0004787784217146661,
      "loss": 3.0055,
      "step": 68380
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.773452877998352,
      "learning_rate": 0.00047877513682193786,
      "loss": 3.0342,
      "step": 68381
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.725202202796936,
      "learning_rate": 0.000478771851895972,
      "loss": 3.1342,
      "step": 68382
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1449196338653564,
      "learning_rate": 0.0004787685669367691,
      "loss": 2.9292,
      "step": 68383
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.504967451095581,
      "learning_rate": 0.00047876528194432973,
      "loss": 2.8408,
      "step": 68384
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9074585437774658,
      "learning_rate": 0.0004787619969186544,
      "loss": 2.7644,
      "step": 68385
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.643218755722046,
      "learning_rate": 0.0004787587118597441,
      "loss": 3.0105,
      "step": 68386
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6992493867874146,
      "learning_rate": 0.00047875542676759894,
      "loss": 2.9609,
      "step": 68387
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9362597465515137,
      "learning_rate": 0.0004787521416422198,
      "loss": 2.9913,
      "step": 68388
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.646533489227295,
      "learning_rate": 0.0004787488564836073,
      "loss": 3.1066,
      "step": 68389
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5385005474090576,
      "learning_rate": 0.00047874557129176204,
      "loss": 2.9479,
      "step": 68390
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8475890159606934,
      "learning_rate": 0.0004787422860666846,
      "loss": 2.7091,
      "step": 68391
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4472700357437134,
      "learning_rate": 0.0004787390008083756,
      "loss": 2.892,
      "step": 68392
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5012660026550293,
      "learning_rate": 0.0004787357155168356,
      "loss": 2.9671,
      "step": 68393
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.746764898300171,
      "learning_rate": 0.0004787324301920653,
      "loss": 3.0472,
      "step": 68394
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5548784732818604,
      "learning_rate": 0.0004787291448340653,
      "loss": 3.033,
      "step": 68395
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7836281061172485,
      "learning_rate": 0.0004787258594428361,
      "loss": 2.9547,
      "step": 68396
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.616300106048584,
      "learning_rate": 0.00047872257401837846,
      "loss": 3.1887,
      "step": 68397
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6245558261871338,
      "learning_rate": 0.0004787192885606929,
      "loss": 3.0894,
      "step": 68398
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3251795768737793,
      "learning_rate": 0.00047871600306978,
      "loss": 2.9647,
      "step": 68399
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5197829008102417,
      "learning_rate": 0.0004787127175456404,
      "loss": 3.204,
      "step": 68400
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5375351905822754,
      "learning_rate": 0.0004787094319882748,
      "loss": 3.1103,
      "step": 68401
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.538051724433899,
      "learning_rate": 0.00047870614639768376,
      "loss": 3.1109,
      "step": 68402
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4616243839263916,
      "learning_rate": 0.0004787028607738678,
      "loss": 3.024,
      "step": 68403
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4519973993301392,
      "learning_rate": 0.00047869957511682766,
      "loss": 3.2787,
      "step": 68404
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5576889514923096,
      "learning_rate": 0.0004786962894265639,
      "loss": 3.1695,
      "step": 68405
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5317766666412354,
      "learning_rate": 0.0004786930037030771,
      "loss": 2.9969,
      "step": 68406
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1076653003692627,
      "learning_rate": 0.00047868971794636785,
      "loss": 2.9157,
      "step": 68407
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3839550018310547,
      "learning_rate": 0.0004786864321564369,
      "loss": 3.0393,
      "step": 68408
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5380021333694458,
      "learning_rate": 0.0004786831463332848,
      "loss": 2.9628,
      "step": 68409
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9629236459732056,
      "learning_rate": 0.00047867986047691203,
      "loss": 3.0829,
      "step": 68410
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9954338073730469,
      "learning_rate": 0.0004786765745873193,
      "loss": 3.3062,
      "step": 68411
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5803827047348022,
      "learning_rate": 0.0004786732886645073,
      "loss": 3.0718,
      "step": 68412
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.1748669147491455,
      "learning_rate": 0.00047867000270847656,
      "loss": 2.9329,
      "step": 68413
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4698119163513184,
      "learning_rate": 0.00047866671671922763,
      "loss": 3.3055,
      "step": 68414
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6929079294204712,
      "learning_rate": 0.0004786634306967612,
      "loss": 2.9318,
      "step": 68415
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6005887985229492,
      "learning_rate": 0.0004786601446410779,
      "loss": 2.8823,
      "step": 68416
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6838923692703247,
      "learning_rate": 0.00047865685855217824,
      "loss": 2.9963,
      "step": 68417
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.550560712814331,
      "learning_rate": 0.0004786535724300629,
      "loss": 3.0862,
      "step": 68418
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3395003080368042,
      "learning_rate": 0.0004786502862747326,
      "loss": 3.2889,
      "step": 68419
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5682774782180786,
      "learning_rate": 0.0004786470000861878,
      "loss": 3.1789,
      "step": 68420
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5728528499603271,
      "learning_rate": 0.00047864371386442905,
      "loss": 3.0751,
      "step": 68421
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4628068208694458,
      "learning_rate": 0.0004786404276094572,
      "loss": 2.9001,
      "step": 68422
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6943422555923462,
      "learning_rate": 0.0004786371413212726,
      "loss": 2.8137,
      "step": 68423
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8424402475357056,
      "learning_rate": 0.00047863385499987603,
      "loss": 3.1408,
      "step": 68424
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.527385950088501,
      "learning_rate": 0.00047863056864526815,
      "loss": 3.0721,
      "step": 68425
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7511495351791382,
      "learning_rate": 0.0004786272822574493,
      "loss": 2.7512,
      "step": 68426
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.768486738204956,
      "learning_rate": 0.00047862399583642034,
      "loss": 2.8158,
      "step": 68427
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.8539602756500244,
      "learning_rate": 0.0004786207093821818,
      "loss": 3.0304,
      "step": 68428
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6123356819152832,
      "learning_rate": 0.0004786174228947344,
      "loss": 3.1179,
      "step": 68429
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1858389377593994,
      "learning_rate": 0.0004786141363740785,
      "loss": 3.0673,
      "step": 68430
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.772077798843384,
      "learning_rate": 0.00047861084982021493,
      "loss": 3.1501,
      "step": 68431
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7854496240615845,
      "learning_rate": 0.0004786075632331443,
      "loss": 3.0004,
      "step": 68432
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.030102014541626,
      "learning_rate": 0.00047860427661286704,
      "loss": 3.0523,
      "step": 68433
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.264303207397461,
      "learning_rate": 0.00047860098995938387,
      "loss": 3.1014,
      "step": 68434
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.615592122077942,
      "learning_rate": 0.00047859770327269554,
      "loss": 3.0867,
      "step": 68435
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8383458852767944,
      "learning_rate": 0.00047859441655280233,
      "loss": 3.1515,
      "step": 68436
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.131422758102417,
      "learning_rate": 0.0004785911297997052,
      "loss": 2.9769,
      "step": 68437
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9383728504180908,
      "learning_rate": 0.0004785878430134046,
      "loss": 2.9943,
      "step": 68438
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6734013557434082,
      "learning_rate": 0.0004785845561939011,
      "loss": 2.905,
      "step": 68439
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7368525266647339,
      "learning_rate": 0.00047858126934119535,
      "loss": 2.9222,
      "step": 68440
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9027423858642578,
      "learning_rate": 0.00047857798245528807,
      "loss": 3.0295,
      "step": 68441
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7910041809082031,
      "learning_rate": 0.0004785746955361796,
      "loss": 3.0667,
      "step": 68442
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1474862098693848,
      "learning_rate": 0.0004785714085838708,
      "loss": 2.8345,
      "step": 68443
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1407546997070312,
      "learning_rate": 0.00047856812159836226,
      "loss": 3.0787,
      "step": 68444
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.593615770339966,
      "learning_rate": 0.00047856483457965447,
      "loss": 3.2124,
      "step": 68445
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.0574758052825928,
      "learning_rate": 0.0004785615475277481,
      "loss": 2.9814,
      "step": 68446
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4372894763946533,
      "learning_rate": 0.00047855826044264387,
      "loss": 3.1454,
      "step": 68447
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.7482194900512695,
      "learning_rate": 0.00047855497332434214,
      "loss": 2.9417,
      "step": 68448
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.5359432697296143,
      "learning_rate": 0.0004785516861728438,
      "loss": 2.938,
      "step": 68449
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.579244613647461,
      "learning_rate": 0.00047854839898814924,
      "loss": 3.0979,
      "step": 68450
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.067558526992798,
      "learning_rate": 0.0004785451117702592,
      "loss": 3.0627,
      "step": 68451
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1690821647644043,
      "learning_rate": 0.0004785418245191743,
      "loss": 3.2317,
      "step": 68452
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6093186140060425,
      "learning_rate": 0.0004785385372348951,
      "loss": 3.0307,
      "step": 68453
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5566856861114502,
      "learning_rate": 0.0004785352499174221,
      "loss": 3.0735,
      "step": 68454
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5690735578536987,
      "learning_rate": 0.0004785319625667561,
      "loss": 2.9629,
      "step": 68455
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.626929521560669,
      "learning_rate": 0.00047852867518289764,
      "loss": 3.2423,
      "step": 68456
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7209844589233398,
      "learning_rate": 0.00047852538776584736,
      "loss": 2.9998,
      "step": 68457
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8066785335540771,
      "learning_rate": 0.00047852210031560586,
      "loss": 3.3036,
      "step": 68458
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1821250915527344,
      "learning_rate": 0.00047851881283217363,
      "loss": 2.8051,
      "step": 68459
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5341954231262207,
      "learning_rate": 0.0004785155253155514,
      "loss": 2.9658,
      "step": 68460
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1992063522338867,
      "learning_rate": 0.0004785122377657399,
      "loss": 2.9046,
      "step": 68461
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.613295316696167,
      "learning_rate": 0.0004785089501827395,
      "loss": 2.992,
      "step": 68462
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.493516683578491,
      "learning_rate": 0.0004785056625665509,
      "loss": 3.1103,
      "step": 68463
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.474350094795227,
      "learning_rate": 0.0004785023749171747,
      "loss": 2.7966,
      "step": 68464
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.093209743499756,
      "learning_rate": 0.0004784990872346116,
      "loss": 3.0317,
      "step": 68465
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8703283071517944,
      "learning_rate": 0.00047849579951886215,
      "loss": 2.9094,
      "step": 68466
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7981667518615723,
      "learning_rate": 0.00047849251176992696,
      "loss": 2.9761,
      "step": 68467
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.945183277130127,
      "learning_rate": 0.0004784892239878066,
      "loss": 3.1939,
      "step": 68468
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9993005990982056,
      "learning_rate": 0.0004784859361725018,
      "loss": 2.9543,
      "step": 68469
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.4467220306396484,
      "learning_rate": 0.00047848264832401304,
      "loss": 3.143,
      "step": 68470
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5757896900177002,
      "learning_rate": 0.000478479360442341,
      "loss": 2.6182,
      "step": 68471
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1027517318725586,
      "learning_rate": 0.00047847607252748627,
      "loss": 2.7353,
      "step": 68472
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.5100488662719727,
      "learning_rate": 0.00047847278457944945,
      "loss": 3.0083,
      "step": 68473
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5287001132965088,
      "learning_rate": 0.0004784694965982312,
      "loss": 3.1033,
      "step": 68474
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7200177907943726,
      "learning_rate": 0.00047846620858383215,
      "loss": 2.8398,
      "step": 68475
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.939134955406189,
      "learning_rate": 0.0004784629205362527,
      "loss": 3.0291,
      "step": 68476
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0709152221679688,
      "learning_rate": 0.0004784596324554939,
      "loss": 3.0542,
      "step": 68477
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.81026029586792,
      "learning_rate": 0.0004784563443415558,
      "loss": 3.3292,
      "step": 68478
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.7361109256744385,
      "learning_rate": 0.0004784530561944394,
      "loss": 3.0175,
      "step": 68479
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.386789083480835,
      "learning_rate": 0.0004784497680141453,
      "loss": 2.8856,
      "step": 68480
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.538659930229187,
      "learning_rate": 0.0004784464798006739,
      "loss": 3.0006,
      "step": 68481
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9884697198867798,
      "learning_rate": 0.00047844319155402595,
      "loss": 2.8871,
      "step": 68482
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.7117674350738525,
      "learning_rate": 0.0004784399032742021,
      "loss": 3.0206,
      "step": 68483
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.051828384399414,
      "learning_rate": 0.0004784366149612029,
      "loss": 2.8423,
      "step": 68484
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.727291226387024,
      "learning_rate": 0.00047843332661502886,
      "loss": 3.2094,
      "step": 68485
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2169458866119385,
      "learning_rate": 0.00047843003823568086,
      "loss": 2.8723,
      "step": 68486
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4305320978164673,
      "learning_rate": 0.00047842674982315924,
      "loss": 3.217,
      "step": 68487
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.307511329650879,
      "learning_rate": 0.00047842346137746467,
      "loss": 3.192,
      "step": 68488
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.788370132446289,
      "learning_rate": 0.0004784201728985979,
      "loss": 2.8956,
      "step": 68489
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7114859819412231,
      "learning_rate": 0.00047841688438655944,
      "loss": 3.2307,
      "step": 68490
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.54897141456604,
      "learning_rate": 0.00047841359584134996,
      "loss": 2.9235,
      "step": 68491
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4986493587493896,
      "learning_rate": 0.00047841030726296997,
      "loss": 3.2771,
      "step": 68492
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9286725521087646,
      "learning_rate": 0.00047840701865142006,
      "loss": 3.0109,
      "step": 68493
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6200536489486694,
      "learning_rate": 0.000478403730006701,
      "loss": 3.2059,
      "step": 68494
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4848204851150513,
      "learning_rate": 0.0004784004413288133,
      "loss": 3.0993,
      "step": 68495
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.545287013053894,
      "learning_rate": 0.00047839715261775766,
      "loss": 2.9008,
      "step": 68496
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4604127407073975,
      "learning_rate": 0.0004783938638735346,
      "loss": 3.0165,
      "step": 68497
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4916754961013794,
      "learning_rate": 0.0004783905750961447,
      "loss": 3.2681,
      "step": 68498
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4465848207473755,
      "learning_rate": 0.00047838728628558865,
      "loss": 3.1096,
      "step": 68499
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8331843614578247,
      "learning_rate": 0.00047838399744186703,
      "loss": 2.7933,
      "step": 68500
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6958880424499512,
      "learning_rate": 0.00047838070856498044,
      "loss": 2.782,
      "step": 68501
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1371922492980957,
      "learning_rate": 0.0004783774196549295,
      "loss": 3.1325,
      "step": 68502
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.730202555656433,
      "learning_rate": 0.00047837413071171483,
      "loss": 3.0499,
      "step": 68503
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.783496856689453,
      "learning_rate": 0.00047837084173533713,
      "loss": 3.1235,
      "step": 68504
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5436265468597412,
      "learning_rate": 0.0004783675527257968,
      "loss": 2.9058,
      "step": 68505
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.481688976287842,
      "learning_rate": 0.0004783642636830946,
      "loss": 3.1,
      "step": 68506
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1267762184143066,
      "learning_rate": 0.00047836097460723123,
      "loss": 3.1225,
      "step": 68507
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.962428092956543,
      "learning_rate": 0.00047835768549820714,
      "loss": 3.0017,
      "step": 68508
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.739143967628479,
      "learning_rate": 0.00047835439635602293,
      "loss": 3.0963,
      "step": 68509
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5119303464889526,
      "learning_rate": 0.0004783511071806792,
      "loss": 3.1467,
      "step": 68510
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7220277786254883,
      "learning_rate": 0.00047834781797217687,
      "loss": 3.1238,
      "step": 68511
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5485612154006958,
      "learning_rate": 0.0004783445287305161,
      "loss": 3.2401,
      "step": 68512
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5605820417404175,
      "learning_rate": 0.00047834123945569786,
      "loss": 2.9379,
      "step": 68513
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6136865615844727,
      "learning_rate": 0.0004783379501477225,
      "loss": 3.3337,
      "step": 68514
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0436036586761475,
      "learning_rate": 0.00047833466080659083,
      "loss": 3.3931,
      "step": 68515
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.335574150085449,
      "learning_rate": 0.0004783313714323033,
      "loss": 3.088,
      "step": 68516
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.018389940261841,
      "learning_rate": 0.00047832808202486073,
      "loss": 2.9472,
      "step": 68517
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7150229215621948,
      "learning_rate": 0.0004783247925842635,
      "loss": 2.8671,
      "step": 68518
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4447532892227173,
      "learning_rate": 0.0004783215031105124,
      "loss": 2.9608,
      "step": 68519
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5758501291275024,
      "learning_rate": 0.0004783182136036079,
      "loss": 3.231,
      "step": 68520
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5338969230651855,
      "learning_rate": 0.0004783149240635507,
      "loss": 3.019,
      "step": 68521
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5911972522735596,
      "learning_rate": 0.0004783116344903413,
      "loss": 3.0026,
      "step": 68522
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.512395977973938,
      "learning_rate": 0.0004783083448839805,
      "loss": 3.0573,
      "step": 68523
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.857670783996582,
      "learning_rate": 0.0004783050552444688,
      "loss": 2.9316,
      "step": 68524
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.405643343925476,
      "learning_rate": 0.00047830176557180684,
      "loss": 3.1107,
      "step": 68525
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6286665201187134,
      "learning_rate": 0.00047829847586599526,
      "loss": 2.9401,
      "step": 68526
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3685071468353271,
      "learning_rate": 0.0004782951861270345,
      "loss": 3.0922,
      "step": 68527
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4717657566070557,
      "learning_rate": 0.0004782918963549254,
      "loss": 2.9557,
      "step": 68528
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.463468313217163,
      "learning_rate": 0.00047828860654966845,
      "loss": 3.0091,
      "step": 68529
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5075987577438354,
      "learning_rate": 0.0004782853167112642,
      "loss": 2.9623,
      "step": 68530
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9237143993377686,
      "learning_rate": 0.00047828202683971346,
      "loss": 3.0782,
      "step": 68531
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5945500135421753,
      "learning_rate": 0.0004782787369350167,
      "loss": 3.2735,
      "step": 68532
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4279571771621704,
      "learning_rate": 0.00047827544699717453,
      "loss": 3.2273,
      "step": 68533
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.642460823059082,
      "learning_rate": 0.0004782721570261876,
      "loss": 3.3051,
      "step": 68534
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9703301191329956,
      "learning_rate": 0.00047826886702205654,
      "loss": 2.9387,
      "step": 68535
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6140106916427612,
      "learning_rate": 0.0004782655769847819,
      "loss": 2.7826,
      "step": 68536
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3782366514205933,
      "learning_rate": 0.0004782622869143643,
      "loss": 3.02,
      "step": 68537
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5806752443313599,
      "learning_rate": 0.0004782589968108044,
      "loss": 3.1151,
      "step": 68538
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8418219089508057,
      "learning_rate": 0.0004782557066741028,
      "loss": 3.068,
      "step": 68539
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.656836986541748,
      "learning_rate": 0.0004782524165042601,
      "loss": 3.2121,
      "step": 68540
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8182048797607422,
      "learning_rate": 0.00047824912630127687,
      "loss": 2.8888,
      "step": 68541
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8164680004119873,
      "learning_rate": 0.00047824583606515377,
      "loss": 3.075,
      "step": 68542
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.000701904296875,
      "learning_rate": 0.0004782425457958915,
      "loss": 3.2142,
      "step": 68543
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7314354181289673,
      "learning_rate": 0.00047823925549349045,
      "loss": 3.0654,
      "step": 68544
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6190611124038696,
      "learning_rate": 0.0004782359651579514,
      "loss": 3.1139,
      "step": 68545
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8587560653686523,
      "learning_rate": 0.0004782326747892749,
      "loss": 3.1284,
      "step": 68546
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2569169998168945,
      "learning_rate": 0.0004782293843874617,
      "loss": 2.9556,
      "step": 68547
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6096371412277222,
      "learning_rate": 0.0004782260939525121,
      "loss": 3.1049,
      "step": 68548
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.524876356124878,
      "learning_rate": 0.00047822280348442704,
      "loss": 2.9853,
      "step": 68549
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.252462387084961,
      "learning_rate": 0.000478219512983207,
      "loss": 3.1268,
      "step": 68550
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.817193627357483,
      "learning_rate": 0.0004782162224488526,
      "loss": 3.0286,
      "step": 68551
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8349813222885132,
      "learning_rate": 0.00047821293188136433,
      "loss": 3.0673,
      "step": 68552
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6874719858169556,
      "learning_rate": 0.00047820964128074303,
      "loss": 3.0811,
      "step": 68553
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5216856002807617,
      "learning_rate": 0.00047820635064698905,
      "loss": 3.118,
      "step": 68554
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3381235599517822,
      "learning_rate": 0.0004782030599801032,
      "loss": 2.828,
      "step": 68555
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.906426429748535,
      "learning_rate": 0.0004781997692800862,
      "loss": 2.9136,
      "step": 68556
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6023555994033813,
      "learning_rate": 0.0004781964785469383,
      "loss": 2.9433,
      "step": 68557
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6426283121109009,
      "learning_rate": 0.00047819318778066035,
      "loss": 2.932,
      "step": 68558
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4581592082977295,
      "learning_rate": 0.00047818989698125296,
      "loss": 3.1544,
      "step": 68559
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5428107976913452,
      "learning_rate": 0.0004781866061487168,
      "loss": 2.7987,
      "step": 68560
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7593207359313965,
      "learning_rate": 0.0004781833152830522,
      "loss": 3.031,
      "step": 68561
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8409630060195923,
      "learning_rate": 0.00047818002438426,
      "loss": 2.8154,
      "step": 68562
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1469902992248535,
      "learning_rate": 0.0004781767334523409,
      "loss": 3.0184,
      "step": 68563
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8719120025634766,
      "learning_rate": 0.00047817344248729526,
      "loss": 2.9808,
      "step": 68564
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3660212755203247,
      "learning_rate": 0.00047817015148912386,
      "loss": 2.9001,
      "step": 68565
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.0079996585845947,
      "learning_rate": 0.0004781668604578273,
      "loss": 2.7751,
      "step": 68566
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5563044548034668,
      "learning_rate": 0.0004781635693934061,
      "loss": 3.1422,
      "step": 68567
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8568456172943115,
      "learning_rate": 0.00047816027829586095,
      "loss": 3.1577,
      "step": 68568
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.4186389446258545,
      "learning_rate": 0.0004781569871651924,
      "loss": 2.9326,
      "step": 68569
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5103034973144531,
      "learning_rate": 0.00047815369600140114,
      "loss": 2.9107,
      "step": 68570
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.717092514038086,
      "learning_rate": 0.0004781504048044877,
      "loss": 2.9628,
      "step": 68571
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9956250190734863,
      "learning_rate": 0.0004781471135744529,
      "loss": 2.878,
      "step": 68572
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.830080509185791,
      "learning_rate": 0.000478143822311297,
      "loss": 2.9607,
      "step": 68573
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6216508150100708,
      "learning_rate": 0.0004781405310150208,
      "loss": 3.1612,
      "step": 68574
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.206556558609009,
      "learning_rate": 0.00047813723968562503,
      "loss": 2.7946,
      "step": 68575
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5156174898147583,
      "learning_rate": 0.0004781339483231102,
      "loss": 3.1252,
      "step": 68576
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6911036968231201,
      "learning_rate": 0.00047813065692747683,
      "loss": 2.7827,
      "step": 68577
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.587666630744934,
      "learning_rate": 0.0004781273654987256,
      "loss": 3.0824,
      "step": 68578
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5950109958648682,
      "learning_rate": 0.0004781240740368571,
      "loss": 2.9848,
      "step": 68579
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5689473152160645,
      "learning_rate": 0.000478120782541872,
      "loss": 3.0014,
      "step": 68580
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.164706230163574,
      "learning_rate": 0.00047811749101377095,
      "loss": 3.0531,
      "step": 68581
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7746964693069458,
      "learning_rate": 0.0004781141994525545,
      "loss": 2.8938,
      "step": 68582
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.627303957939148,
      "learning_rate": 0.00047811090785822313,
      "loss": 3.0596,
      "step": 68583
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6433708667755127,
      "learning_rate": 0.0004781076162307777,
      "loss": 3.0301,
      "step": 68584
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6071981191635132,
      "learning_rate": 0.00047810432457021866,
      "loss": 3.3538,
      "step": 68585
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6594576835632324,
      "learning_rate": 0.00047810103287654663,
      "loss": 2.9822,
      "step": 68586
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4061572551727295,
      "learning_rate": 0.00047809774114976236,
      "loss": 3.1588,
      "step": 68587
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8285155296325684,
      "learning_rate": 0.0004780944493898663,
      "loss": 2.9154,
      "step": 68588
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8402503728866577,
      "learning_rate": 0.00047809115759685905,
      "loss": 2.9549,
      "step": 68589
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5963586568832397,
      "learning_rate": 0.0004780878657707413,
      "loss": 2.961,
      "step": 68590
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6175613403320312,
      "learning_rate": 0.00047808457391151373,
      "loss": 3.1505,
      "step": 68591
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.587043285369873,
      "learning_rate": 0.0004780812820191769,
      "loss": 2.9741,
      "step": 68592
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5512219667434692,
      "learning_rate": 0.0004780779900937314,
      "loss": 3.0044,
      "step": 68593
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8713027238845825,
      "learning_rate": 0.00047807469813517775,
      "loss": 3.0401,
      "step": 68594
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.8853495121002197,
      "learning_rate": 0.0004780714061435167,
      "loss": 3.0055,
      "step": 68595
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2682416439056396,
      "learning_rate": 0.00047806811411874885,
      "loss": 3.3178,
      "step": 68596
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4531842470169067,
      "learning_rate": 0.00047806482206087466,
      "loss": 3.0067,
      "step": 68597
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.153496265411377,
      "learning_rate": 0.00047806152996989496,
      "loss": 3.1352,
      "step": 68598
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.6122450828552246,
      "learning_rate": 0.0004780582378458103,
      "loss": 3.1738,
      "step": 68599
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8467984199523926,
      "learning_rate": 0.0004780549456886211,
      "loss": 2.9752,
      "step": 68600
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8385534286499023,
      "learning_rate": 0.00047805165349832827,
      "loss": 3.0743,
      "step": 68601
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.60359787940979,
      "learning_rate": 0.00047804836127493224,
      "loss": 3.074,
      "step": 68602
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.031369924545288,
      "learning_rate": 0.0004780450690184336,
      "loss": 3.1342,
      "step": 68603
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4642986059188843,
      "learning_rate": 0.00047804177672883307,
      "loss": 3.1078,
      "step": 68604
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6967376470565796,
      "learning_rate": 0.0004780384844061312,
      "loss": 2.9403,
      "step": 68605
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.095402956008911,
      "learning_rate": 0.00047803519205032863,
      "loss": 3.1733,
      "step": 68606
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0055785179138184,
      "learning_rate": 0.0004780318996614259,
      "loss": 3.1715,
      "step": 68607
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8094249963760376,
      "learning_rate": 0.00047802860723942375,
      "loss": 2.9179,
      "step": 68608
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.6278421878814697,
      "learning_rate": 0.00047802531478432266,
      "loss": 3.1118,
      "step": 68609
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.1649017333984375,
      "learning_rate": 0.0004780220222961233,
      "loss": 3.1883,
      "step": 68610
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.9489986896514893,
      "learning_rate": 0.0004780187297748264,
      "loss": 3.1653,
      "step": 68611
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9888807535171509,
      "learning_rate": 0.0004780154372204323,
      "loss": 3.1005,
      "step": 68612
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4564077854156494,
      "learning_rate": 0.0004780121446329419,
      "loss": 2.9261,
      "step": 68613
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3729727268218994,
      "learning_rate": 0.0004780088520123556,
      "loss": 2.9193,
      "step": 68614
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.417331576347351,
      "learning_rate": 0.00047800555935867417,
      "loss": 2.9328,
      "step": 68615
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5758639574050903,
      "learning_rate": 0.00047800226667189805,
      "loss": 2.8812,
      "step": 68616
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.737448811531067,
      "learning_rate": 0.000477998973952028,
      "loss": 3.0552,
      "step": 68617
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8534516096115112,
      "learning_rate": 0.00047799568119906465,
      "loss": 3.1536,
      "step": 68618
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2840733528137207,
      "learning_rate": 0.00047799238841300846,
      "loss": 3.1819,
      "step": 68619
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8966642618179321,
      "learning_rate": 0.00047798909559386015,
      "loss": 2.9943,
      "step": 68620
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9146963357925415,
      "learning_rate": 0.0004779858027416203,
      "loss": 2.9952,
      "step": 68621
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.8024258613586426,
      "learning_rate": 0.00047798250985628946,
      "loss": 3.1364,
      "step": 68622
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.582242965698242,
      "learning_rate": 0.0004779792169378684,
      "loss": 3.0019,
      "step": 68623
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.023369789123535,
      "learning_rate": 0.0004779759239863576,
      "loss": 3.123,
      "step": 68624
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.921071171760559,
      "learning_rate": 0.0004779726310017578,
      "loss": 3.2088,
      "step": 68625
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7079607248306274,
      "learning_rate": 0.0004779693379840694,
      "loss": 3.0869,
      "step": 68626
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3390969038009644,
      "learning_rate": 0.0004779660449332932,
      "loss": 3.2434,
      "step": 68627
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5601825714111328,
      "learning_rate": 0.0004779627518494298,
      "loss": 2.9702,
      "step": 68628
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1496822834014893,
      "learning_rate": 0.00047795945873247965,
      "loss": 3.1263,
      "step": 68629
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4827512502670288,
      "learning_rate": 0.0004779561655824436,
      "loss": 3.1671,
      "step": 68630
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5593323707580566,
      "learning_rate": 0.00047795287239932213,
      "loss": 3.0903,
      "step": 68631
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6622520685195923,
      "learning_rate": 0.00047794957918311583,
      "loss": 3.0208,
      "step": 68632
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6057796478271484,
      "learning_rate": 0.00047794628593382526,
      "loss": 3.1143,
      "step": 68633
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4404749870300293,
      "learning_rate": 0.00047794299265145124,
      "loss": 2.9939,
      "step": 68634
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5917680263519287,
      "learning_rate": 0.00047793969933599426,
      "loss": 3.0502,
      "step": 68635
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.735780954360962,
      "learning_rate": 0.00047793640598745485,
      "loss": 2.979,
      "step": 68636
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6724296808242798,
      "learning_rate": 0.0004779331126058338,
      "loss": 3.1376,
      "step": 68637
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7090178728103638,
      "learning_rate": 0.00047792981919113147,
      "loss": 2.8394,
      "step": 68638
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.25168514251709,
      "learning_rate": 0.0004779265257433487,
      "loss": 3.0583,
      "step": 68639
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5138881206512451,
      "learning_rate": 0.0004779232322624861,
      "loss": 2.9485,
      "step": 68640
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.35599946975708,
      "learning_rate": 0.00047791993874854417,
      "loss": 3.1153,
      "step": 68641
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.1121156215667725,
      "learning_rate": 0.0004779166452015236,
      "loss": 2.8617,
      "step": 68642
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7710074186325073,
      "learning_rate": 0.0004779133516214249,
      "loss": 2.6567,
      "step": 68643
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8905773162841797,
      "learning_rate": 0.00047791005800824877,
      "loss": 2.9952,
      "step": 68644
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6474636793136597,
      "learning_rate": 0.00047790676436199585,
      "loss": 2.7924,
      "step": 68645
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6221085786819458,
      "learning_rate": 0.0004779034706826666,
      "loss": 2.9928,
      "step": 68646
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9949992895126343,
      "learning_rate": 0.0004779001769702618,
      "loss": 2.9545,
      "step": 68647
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.05759596824646,
      "learning_rate": 0.00047789688322478205,
      "loss": 3.1309,
      "step": 68648
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.135438919067383,
      "learning_rate": 0.0004778935894462278,
      "loss": 3.1716,
      "step": 68649
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6974875926971436,
      "learning_rate": 0.00047789029563459983,
      "loss": 3.1735,
      "step": 68650
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.5619518756866455,
      "learning_rate": 0.00047788700178989877,
      "loss": 3.198,
      "step": 68651
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.156510353088379,
      "learning_rate": 0.00047788370791212504,
      "loss": 3.0434,
      "step": 68652
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.4480371475219727,
      "learning_rate": 0.00047788041400127936,
      "loss": 3.1096,
      "step": 68653
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0950305461883545,
      "learning_rate": 0.0004778771200573625,
      "loss": 2.9212,
      "step": 68654
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.8740553855895996,
      "learning_rate": 0.0004778738260803748,
      "loss": 2.8821,
      "step": 68655
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.707352638244629,
      "learning_rate": 0.00047787053207031704,
      "loss": 2.9457,
      "step": 68656
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8262348175048828,
      "learning_rate": 0.00047786723802718985,
      "loss": 3.2062,
      "step": 68657
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.4584875106811523,
      "learning_rate": 0.0004778639439509936,
      "loss": 3.0967,
      "step": 68658
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.030998706817627,
      "learning_rate": 0.00047786064984172924,
      "loss": 2.957,
      "step": 68659
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.88731849193573,
      "learning_rate": 0.0004778573556993972,
      "loss": 2.8052,
      "step": 68660
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.651808500289917,
      "learning_rate": 0.0004778540615239981,
      "loss": 2.9041,
      "step": 68661
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7249913215637207,
      "learning_rate": 0.0004778507673155326,
      "loss": 3.1043,
      "step": 68662
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.0754075050354004,
      "learning_rate": 0.0004778474730740013,
      "loss": 3.0182,
      "step": 68663
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7781720161437988,
      "learning_rate": 0.00047784417879940475,
      "loss": 2.7618,
      "step": 68664
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4519139528274536,
      "learning_rate": 0.0004778408844917436,
      "loss": 3.2242,
      "step": 68665
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3735941648483276,
      "learning_rate": 0.00047783759015101855,
      "loss": 3.2242,
      "step": 68666
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7134301662445068,
      "learning_rate": 0.0004778342957772301,
      "loss": 3.1058,
      "step": 68667
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3665833473205566,
      "learning_rate": 0.00047783100137037887,
      "loss": 3.0141,
      "step": 68668
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3584710359573364,
      "learning_rate": 0.0004778277069304655,
      "loss": 2.9776,
      "step": 68669
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6528681516647339,
      "learning_rate": 0.0004778244124574906,
      "loss": 3.199,
      "step": 68670
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9560381174087524,
      "learning_rate": 0.00047782111795145483,
      "loss": 2.9421,
      "step": 68671
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6859729290008545,
      "learning_rate": 0.0004778178234123587,
      "loss": 3.082,
      "step": 68672
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3774504661560059,
      "learning_rate": 0.0004778145288402029,
      "loss": 3.0798,
      "step": 68673
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6325732469558716,
      "learning_rate": 0.00047781123423498804,
      "loss": 3.1931,
      "step": 68674
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.35888671875,
      "learning_rate": 0.0004778079395967147,
      "loss": 3.0014,
      "step": 68675
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5291837453842163,
      "learning_rate": 0.00047780464492538346,
      "loss": 2.9185,
      "step": 68676
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6010488271713257,
      "learning_rate": 0.0004778013502209951,
      "loss": 2.8414,
      "step": 68677
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9035303592681885,
      "learning_rate": 0.00047779805548355003,
      "loss": 3.141,
      "step": 68678
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8959096670150757,
      "learning_rate": 0.00047779476071304895,
      "loss": 3.1042,
      "step": 68679
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6604517698287964,
      "learning_rate": 0.0004777914659094925,
      "loss": 3.1477,
      "step": 68680
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6408175230026245,
      "learning_rate": 0.0004777881710728812,
      "loss": 2.847,
      "step": 68681
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6942167282104492,
      "learning_rate": 0.00047778487620321586,
      "loss": 3.0132,
      "step": 68682
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5358039140701294,
      "learning_rate": 0.00047778158130049676,
      "loss": 3.1516,
      "step": 68683
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.478318691253662,
      "learning_rate": 0.00047777828636472484,
      "loss": 2.8978,
      "step": 68684
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5573818683624268,
      "learning_rate": 0.0004777749913959005,
      "loss": 2.8368,
      "step": 68685
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5781430006027222,
      "learning_rate": 0.00047777169639402447,
      "loss": 3.0047,
      "step": 68686
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4864119291305542,
      "learning_rate": 0.00047776840135909743,
      "loss": 2.9581,
      "step": 68687
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3806519508361816,
      "learning_rate": 0.0004777651062911198,
      "loss": 2.9504,
      "step": 68688
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4005335569381714,
      "learning_rate": 0.00047776181119009225,
      "loss": 2.9214,
      "step": 68689
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.896949052810669,
      "learning_rate": 0.00047775851605601547,
      "loss": 3.0658,
      "step": 68690
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4534631967544556,
      "learning_rate": 0.00047775522088889,
      "loss": 2.9464,
      "step": 68691
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8346351385116577,
      "learning_rate": 0.00047775192568871644,
      "loss": 3.0646,
      "step": 68692
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3311971426010132,
      "learning_rate": 0.00047774863045549554,
      "loss": 2.7639,
      "step": 68693
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5641655921936035,
      "learning_rate": 0.00047774533518922773,
      "loss": 3.0239,
      "step": 68694
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4659022092819214,
      "learning_rate": 0.00047774203988991377,
      "loss": 2.9754,
      "step": 68695
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9671132564544678,
      "learning_rate": 0.0004777387445575541,
      "loss": 2.7236,
      "step": 68696
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.2466654777526855,
      "learning_rate": 0.00047773544919214953,
      "loss": 3.0997,
      "step": 68697
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6609293222427368,
      "learning_rate": 0.0004777321537937006,
      "loss": 2.9337,
      "step": 68698
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.703081727027893,
      "learning_rate": 0.0004777288583622079,
      "loss": 2.9157,
      "step": 68699
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.4858074188232422,
      "learning_rate": 0.0004777255628976721,
      "loss": 3.2123,
      "step": 68700
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.914062261581421,
      "learning_rate": 0.00047772226740009364,
      "loss": 2.7902,
      "step": 68701
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3047187328338623,
      "learning_rate": 0.00047771897186947327,
      "loss": 3.0354,
      "step": 68702
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2393903732299805,
      "learning_rate": 0.0004777156763058117,
      "loss": 2.9191,
      "step": 68703
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.2101738452911377,
      "learning_rate": 0.00047771238070910935,
      "loss": 3.16,
      "step": 68704
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.7695908546447754,
      "learning_rate": 0.00047770908507936694,
      "loss": 3.2301,
      "step": 68705
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6381850242614746,
      "learning_rate": 0.00047770578941658503,
      "loss": 3.057,
      "step": 68706
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9244776964187622,
      "learning_rate": 0.00047770249372076424,
      "loss": 3.1038,
      "step": 68707
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3376290798187256,
      "learning_rate": 0.00047769919799190525,
      "loss": 2.8743,
      "step": 68708
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7956122159957886,
      "learning_rate": 0.0004776959022300086,
      "loss": 3.0018,
      "step": 68709
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5734717845916748,
      "learning_rate": 0.00047769260643507495,
      "loss": 2.8949,
      "step": 68710
    },
    {
      "epoch": 0.89,
      "grad_norm": 2.3346543312072754,
      "learning_rate": 0.00047768931060710487,
      "loss": 2.89,
      "step": 68711
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8891878128051758,
      "learning_rate": 0.00047768601474609904,
      "loss": 2.8807,
      "step": 68712
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6972315311431885,
      "learning_rate": 0.00047768271885205796,
      "loss": 2.9566,
      "step": 68713
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5200856924057007,
      "learning_rate": 0.00047767942292498226,
      "loss": 2.9295,
      "step": 68714
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.876602053642273,
      "learning_rate": 0.0004776761269648727,
      "loss": 3.1234,
      "step": 68715
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7338060140609741,
      "learning_rate": 0.00047767283097172974,
      "loss": 3.1993,
      "step": 68716
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6781132221221924,
      "learning_rate": 0.00047766953494555417,
      "loss": 3.2984,
      "step": 68717
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.3192837238311768,
      "learning_rate": 0.0004776662388863463,
      "loss": 3.0174,
      "step": 68718
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.7053433656692505,
      "learning_rate": 0.00047766294279410706,
      "loss": 2.8039,
      "step": 68719
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6468369960784912,
      "learning_rate": 0.0004776596466688369,
      "loss": 2.8836,
      "step": 68720
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.8303511142730713,
      "learning_rate": 0.0004776563505105364,
      "loss": 2.9642,
      "step": 68721
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6904610395431519,
      "learning_rate": 0.0004776530543192063,
      "loss": 3.0943,
      "step": 68722
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.605894923210144,
      "learning_rate": 0.00047764975809484706,
      "loss": 2.874,
      "step": 68723
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.389947772026062,
      "learning_rate": 0.0004776464618374594,
      "loss": 3.1139,
      "step": 68724
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6616322994232178,
      "learning_rate": 0.00047764316554704394,
      "loss": 3.0701,
      "step": 68725
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.573121428489685,
      "learning_rate": 0.0004776398692236012,
      "loss": 2.7904,
      "step": 68726
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5272243022918701,
      "learning_rate": 0.00047763657286713197,
      "loss": 3.2154,
      "step": 68727
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.719267725944519,
      "learning_rate": 0.0004776332764776367,
      "loss": 2.8509,
      "step": 68728
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.419234275817871,
      "learning_rate": 0.00047762998005511603,
      "loss": 3.2219,
      "step": 68729
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6356897354125977,
      "learning_rate": 0.0004776266835995706,
      "loss": 2.9925,
      "step": 68730
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6233150959014893,
      "learning_rate": 0.000477623387111001,
      "loss": 2.9955,
      "step": 68731
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.5834012031555176,
      "learning_rate": 0.0004776200905894078,
      "loss": 3.1227,
      "step": 68732
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.434908151626587,
      "learning_rate": 0.00047761679403479185,
      "loss": 2.8451,
      "step": 68733
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.9309911727905273,
      "learning_rate": 0.00047761349744715345,
      "loss": 3.4114,
      "step": 68734
    },
    {
      "epoch": 0.89,
      "grad_norm": 3.3758881092071533,
      "learning_rate": 0.00047761020082649335,
      "loss": 3.0295,
      "step": 68735
    },
    {
      "epoch": 0.89,
      "grad_norm": 1.6569145917892456,
      "learning_rate": 0.00047760690417281225,
      "loss": 3.1852,
      "step": 68736
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5523426532745361,
      "learning_rate": 0.0004776036074861106,
      "loss": 2.9992,
      "step": 68737
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.5722129344940186,
      "learning_rate": 0.00047760031076638914,
      "loss": 2.7451,
      "step": 68738
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3138816356658936,
      "learning_rate": 0.0004775970140136484,
      "loss": 3.0275,
      "step": 68739
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5991179943084717,
      "learning_rate": 0.000477593717227889,
      "loss": 3.0843,
      "step": 68740
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8955998420715332,
      "learning_rate": 0.00047759042040911156,
      "loss": 2.9874,
      "step": 68741
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6768014430999756,
      "learning_rate": 0.0004775871235573168,
      "loss": 3.1057,
      "step": 68742
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.444503903388977,
      "learning_rate": 0.00047758382667250515,
      "loss": 2.9882,
      "step": 68743
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6933950185775757,
      "learning_rate": 0.0004775805297546773,
      "loss": 3.2367,
      "step": 68744
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9248336553573608,
      "learning_rate": 0.000477577232803834,
      "loss": 2.9378,
      "step": 68745
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8585001230239868,
      "learning_rate": 0.0004775739358199757,
      "loss": 3.0836,
      "step": 68746
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.575770854949951,
      "learning_rate": 0.000477570638803103,
      "loss": 3.015,
      "step": 68747
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.557929277420044,
      "learning_rate": 0.0004775673417532166,
      "loss": 3.1277,
      "step": 68748
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6733218431472778,
      "learning_rate": 0.00047756404467031707,
      "loss": 3.1344,
      "step": 68749
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.737790584564209,
      "learning_rate": 0.00047756074755440496,
      "loss": 3.071,
      "step": 68750
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.099989175796509,
      "learning_rate": 0.00047755745040548107,
      "loss": 2.9661,
      "step": 68751
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7724343538284302,
      "learning_rate": 0.0004775541532235458,
      "loss": 3.124,
      "step": 68752
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7888339757919312,
      "learning_rate": 0.0004775508560085999,
      "loss": 2.9639,
      "step": 68753
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2449581623077393,
      "learning_rate": 0.00047754755876064397,
      "loss": 3.3033,
      "step": 68754
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6134151220321655,
      "learning_rate": 0.0004775442614796786,
      "loss": 2.8629,
      "step": 68755
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.218601942062378,
      "learning_rate": 0.00047754096416570435,
      "loss": 3.0187,
      "step": 68756
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4293406009674072,
      "learning_rate": 0.0004775376668187219,
      "loss": 3.0472,
      "step": 68757
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.0493791103363037,
      "learning_rate": 0.00047753436943873194,
      "loss": 2.9279,
      "step": 68758
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.939579486846924,
      "learning_rate": 0.0004775310720257349,
      "loss": 3.0033,
      "step": 68759
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8554953336715698,
      "learning_rate": 0.00047752777457973146,
      "loss": 3.0118,
      "step": 68760
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6123147010803223,
      "learning_rate": 0.00047752447710072227,
      "loss": 3.0651,
      "step": 68761
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5878002643585205,
      "learning_rate": 0.000477521179588708,
      "loss": 3.0647,
      "step": 68762
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8375409841537476,
      "learning_rate": 0.0004775178820436891,
      "loss": 3.194,
      "step": 68763
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.4829328060150146,
      "learning_rate": 0.00047751458446566636,
      "loss": 3.0183,
      "step": 68764
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5698398351669312,
      "learning_rate": 0.00047751128685464026,
      "loss": 2.7611,
      "step": 68765
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.867468237876892,
      "learning_rate": 0.0004775079892106115,
      "loss": 3.2779,
      "step": 68766
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.446362018585205,
      "learning_rate": 0.0004775046915335806,
      "loss": 2.9926,
      "step": 68767
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1807663440704346,
      "learning_rate": 0.0004775013938235482,
      "loss": 3.0492,
      "step": 68768
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7566330432891846,
      "learning_rate": 0.000477498096080515,
      "loss": 3.0075,
      "step": 68769
    },
    {
      "epoch": 0.9,
      "grad_norm": 4.270345687866211,
      "learning_rate": 0.00047749479830448157,
      "loss": 2.9086,
      "step": 68770
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.0279059410095215,
      "learning_rate": 0.0004774915004954485,
      "loss": 3.0366,
      "step": 68771
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.458183765411377,
      "learning_rate": 0.0004774882026534164,
      "loss": 3.0191,
      "step": 68772
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3677051067352295,
      "learning_rate": 0.0004774849047783858,
      "loss": 2.913,
      "step": 68773
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.141604423522949,
      "learning_rate": 0.0004774816068703575,
      "loss": 3.0871,
      "step": 68774
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.9177019596099854,
      "learning_rate": 0.00047747830892933197,
      "loss": 3.0078,
      "step": 68775
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.5602903366088867,
      "learning_rate": 0.00047747501095530995,
      "loss": 3.2321,
      "step": 68776
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0318329334259033,
      "learning_rate": 0.00047747171294829197,
      "loss": 2.963,
      "step": 68777
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0962226390838623,
      "learning_rate": 0.00047746841490827857,
      "loss": 2.8633,
      "step": 68778
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.32275652885437,
      "learning_rate": 0.0004774651168352705,
      "loss": 3.0475,
      "step": 68779
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9733121395111084,
      "learning_rate": 0.00047746181872926823,
      "loss": 3.0126,
      "step": 68780
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9602832794189453,
      "learning_rate": 0.00047745852059027255,
      "loss": 3.0107,
      "step": 68781
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.554250955581665,
      "learning_rate": 0.00047745522241828404,
      "loss": 3.2516,
      "step": 68782
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5815925598144531,
      "learning_rate": 0.00047745192421330316,
      "loss": 3.2733,
      "step": 68783
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1450388431549072,
      "learning_rate": 0.00047744862597533054,
      "loss": 2.6558,
      "step": 68784
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4378827810287476,
      "learning_rate": 0.000477445327704367,
      "loss": 3.0066,
      "step": 68785
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3502103090286255,
      "learning_rate": 0.000477442029400413,
      "loss": 3.0596,
      "step": 68786
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7679747343063354,
      "learning_rate": 0.0004774387310634691,
      "loss": 2.9755,
      "step": 68787
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5628135204315186,
      "learning_rate": 0.0004774354326935362,
      "loss": 2.859,
      "step": 68788
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6090232133865356,
      "learning_rate": 0.00047743213429061446,
      "loss": 3.1639,
      "step": 68789
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0825695991516113,
      "learning_rate": 0.00047742883585470485,
      "loss": 2.8672,
      "step": 68790
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0562548637390137,
      "learning_rate": 0.0004774255373858079,
      "loss": 2.9405,
      "step": 68791
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.736350178718567,
      "learning_rate": 0.0004774222388839241,
      "loss": 3.2042,
      "step": 68792
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4916795492172241,
      "learning_rate": 0.00047741894034905423,
      "loss": 2.7971,
      "step": 68793
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.492984414100647,
      "learning_rate": 0.00047741564178119885,
      "loss": 3.4556,
      "step": 68794
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0428972244262695,
      "learning_rate": 0.0004774123431803584,
      "loss": 3.0644,
      "step": 68795
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3959319591522217,
      "learning_rate": 0.0004774090445465337,
      "loss": 2.8761,
      "step": 68796
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4538553953170776,
      "learning_rate": 0.00047740574587972547,
      "loss": 3.3883,
      "step": 68797
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.732485055923462,
      "learning_rate": 0.000477402447179934,
      "loss": 3.2395,
      "step": 68798
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.914185881614685,
      "learning_rate": 0.00047739914844716015,
      "loss": 3.0355,
      "step": 68799
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.351945400238037,
      "learning_rate": 0.0004773958496814044,
      "loss": 3.1652,
      "step": 68800
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5566598176956177,
      "learning_rate": 0.00047739255088266743,
      "loss": 3.1797,
      "step": 68801
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3071115016937256,
      "learning_rate": 0.0004773892520509498,
      "loss": 3.136,
      "step": 68802
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2125730514526367,
      "learning_rate": 0.00047738595318625224,
      "loss": 2.9098,
      "step": 68803
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6773271560668945,
      "learning_rate": 0.0004773826542885752,
      "loss": 3.1121,
      "step": 68804
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2391271591186523,
      "learning_rate": 0.00047737935535791947,
      "loss": 3.1348,
      "step": 68805
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.003330707550049,
      "learning_rate": 0.0004773760563942855,
      "loss": 2.9297,
      "step": 68806
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.57463538646698,
      "learning_rate": 0.000477372757397674,
      "loss": 2.95,
      "step": 68807
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7801824808120728,
      "learning_rate": 0.00047736945836808557,
      "loss": 2.996,
      "step": 68808
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5578415393829346,
      "learning_rate": 0.00047736615930552077,
      "loss": 3.0184,
      "step": 68809
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4724043607711792,
      "learning_rate": 0.0004773628602099802,
      "loss": 2.9933,
      "step": 68810
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5890370607376099,
      "learning_rate": 0.0004773595610814646,
      "loss": 3.084,
      "step": 68811
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5856328010559082,
      "learning_rate": 0.00047735626191997455,
      "loss": 3.0536,
      "step": 68812
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7321616411209106,
      "learning_rate": 0.0004773529627255106,
      "loss": 2.8485,
      "step": 68813
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.868190884590149,
      "learning_rate": 0.00047734966349807334,
      "loss": 2.7466,
      "step": 68814
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9082976579666138,
      "learning_rate": 0.0004773463642376635,
      "loss": 2.8485,
      "step": 68815
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.666070580482483,
      "learning_rate": 0.0004773430649442816,
      "loss": 3.135,
      "step": 68816
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4597868919372559,
      "learning_rate": 0.0004773397656179282,
      "loss": 3.0084,
      "step": 68817
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0600502490997314,
      "learning_rate": 0.0004773364662586041,
      "loss": 2.9524,
      "step": 68818
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.4225013256073,
      "learning_rate": 0.00047733316686630965,
      "loss": 2.8283,
      "step": 68819
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.747771978378296,
      "learning_rate": 0.0004773298674410457,
      "loss": 2.7622,
      "step": 68820
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9193904399871826,
      "learning_rate": 0.00047732656798281283,
      "loss": 2.8696,
      "step": 68821
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.016507148742676,
      "learning_rate": 0.00047732326849161163,
      "loss": 3.1739,
      "step": 68822
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6731232404708862,
      "learning_rate": 0.00047731996896744255,
      "loss": 2.8201,
      "step": 68823
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7825286388397217,
      "learning_rate": 0.0004773166694103065,
      "loss": 2.8238,
      "step": 68824
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.659722924232483,
      "learning_rate": 0.00047731336982020375,
      "loss": 3.2557,
      "step": 68825
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6945542097091675,
      "learning_rate": 0.00047731007019713526,
      "loss": 3.3314,
      "step": 68826
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5305252075195312,
      "learning_rate": 0.00047730677054110145,
      "loss": 2.9169,
      "step": 68827
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7409098148345947,
      "learning_rate": 0.00047730347085210296,
      "loss": 3.0313,
      "step": 68828
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4595766067504883,
      "learning_rate": 0.00047730017113014033,
      "loss": 2.94,
      "step": 68829
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6063979864120483,
      "learning_rate": 0.0004772968713752142,
      "loss": 3.1141,
      "step": 68830
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.364572286605835,
      "learning_rate": 0.00047729357158732543,
      "loss": 3.0888,
      "step": 68831
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6069049835205078,
      "learning_rate": 0.00047729027176647435,
      "loss": 3.1566,
      "step": 68832
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4563415050506592,
      "learning_rate": 0.0004772869719126616,
      "loss": 2.7532,
      "step": 68833
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5932047367095947,
      "learning_rate": 0.00047728367202588796,
      "loss": 3.127,
      "step": 68834
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5429887771606445,
      "learning_rate": 0.0004772803721061539,
      "loss": 3.1874,
      "step": 68835
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6080471277236938,
      "learning_rate": 0.00047727707215346,
      "loss": 2.9966,
      "step": 68836
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.70829439163208,
      "learning_rate": 0.00047727377216780713,
      "loss": 3.1602,
      "step": 68837
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4978957176208496,
      "learning_rate": 0.00047727047214919557,
      "loss": 3.339,
      "step": 68838
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5777653455734253,
      "learning_rate": 0.0004772671720976261,
      "loss": 2.9998,
      "step": 68839
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5676089525222778,
      "learning_rate": 0.0004772638720130994,
      "loss": 2.7805,
      "step": 68840
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5399818420410156,
      "learning_rate": 0.0004772605718956158,
      "loss": 2.8182,
      "step": 68841
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1666276454925537,
      "learning_rate": 0.00047725727174517625,
      "loss": 2.9584,
      "step": 68842
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9465337991714478,
      "learning_rate": 0.00047725397156178124,
      "loss": 2.9054,
      "step": 68843
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.172779083251953,
      "learning_rate": 0.0004772506713454313,
      "loss": 2.8211,
      "step": 68844
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7141491174697876,
      "learning_rate": 0.0004772473710961272,
      "loss": 3.0803,
      "step": 68845
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7102113962173462,
      "learning_rate": 0.00047724407081386946,
      "loss": 2.7762,
      "step": 68846
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8785210847854614,
      "learning_rate": 0.0004772407704986587,
      "loss": 2.8282,
      "step": 68847
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6653648614883423,
      "learning_rate": 0.00047723747015049545,
      "loss": 3.1889,
      "step": 68848
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7543408870697021,
      "learning_rate": 0.0004772341697693805,
      "loss": 3.0417,
      "step": 68849
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4488399028778076,
      "learning_rate": 0.00047723086935531434,
      "loss": 3.2252,
      "step": 68850
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.9053046703338623,
      "learning_rate": 0.00047722756890829763,
      "loss": 3.3483,
      "step": 68851
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.450930118560791,
      "learning_rate": 0.00047722426842833094,
      "loss": 3.1985,
      "step": 68852
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5172687768936157,
      "learning_rate": 0.0004772209679154149,
      "loss": 3.1346,
      "step": 68853
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.610402226448059,
      "learning_rate": 0.00047721766736955013,
      "loss": 3.0604,
      "step": 68854
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.07025408744812,
      "learning_rate": 0.0004772143667907374,
      "loss": 2.8876,
      "step": 68855
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.822644829750061,
      "learning_rate": 0.0004772110661789771,
      "loss": 2.7771,
      "step": 68856
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4456002712249756,
      "learning_rate": 0.00047720776553426976,
      "loss": 2.8768,
      "step": 68857
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6308082342147827,
      "learning_rate": 0.0004772044648566163,
      "loss": 3.2637,
      "step": 68858
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.615142822265625,
      "learning_rate": 0.0004772011641460172,
      "loss": 3.1954,
      "step": 68859
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.65966796875,
      "learning_rate": 0.000477197863402473,
      "loss": 2.9676,
      "step": 68860
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7700809240341187,
      "learning_rate": 0.0004771945626259844,
      "loss": 3.0961,
      "step": 68861
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6172691583633423,
      "learning_rate": 0.00047719126181655196,
      "loss": 2.817,
      "step": 68862
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3255887031555176,
      "learning_rate": 0.00047718796097417633,
      "loss": 3.2108,
      "step": 68863
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7166064977645874,
      "learning_rate": 0.0004771846600988581,
      "loss": 3.0113,
      "step": 68864
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1640779972076416,
      "learning_rate": 0.0004771813591905979,
      "loss": 3.1749,
      "step": 68865
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.7128920555114746,
      "learning_rate": 0.0004771780582493964,
      "loss": 3.0661,
      "step": 68866
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.7278835773468018,
      "learning_rate": 0.0004771747572752541,
      "loss": 2.7645,
      "step": 68867
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.19390606880188,
      "learning_rate": 0.0004771714562681716,
      "loss": 3.1849,
      "step": 68868
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.5469753742218018,
      "learning_rate": 0.0004771681552281497,
      "loss": 3.0138,
      "step": 68869
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.6404037475585938,
      "learning_rate": 0.0004771648541551889,
      "loss": 3.088,
      "step": 68870
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.6573915481567383,
      "learning_rate": 0.0004771615530492897,
      "loss": 3.3502,
      "step": 68871
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9659533500671387,
      "learning_rate": 0.00047715825191045293,
      "loss": 2.9499,
      "step": 68872
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1702535152435303,
      "learning_rate": 0.00047715495073867906,
      "loss": 3.1021,
      "step": 68873
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.877330780029297,
      "learning_rate": 0.0004771516495339687,
      "loss": 2.8285,
      "step": 68874
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9981780052185059,
      "learning_rate": 0.00047714834829632255,
      "loss": 3.0356,
      "step": 68875
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4738471508026123,
      "learning_rate": 0.00047714504702574116,
      "loss": 3.0783,
      "step": 68876
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.044670820236206,
      "learning_rate": 0.0004771417457222252,
      "loss": 2.8354,
      "step": 68877
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3880207538604736,
      "learning_rate": 0.00047713844438577514,
      "loss": 3.1072,
      "step": 68878
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.066664934158325,
      "learning_rate": 0.0004771351430163918,
      "loss": 3.0176,
      "step": 68879
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.011300563812256,
      "learning_rate": 0.0004771318416140757,
      "loss": 2.8838,
      "step": 68880
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4157835245132446,
      "learning_rate": 0.00047712854017882734,
      "loss": 2.935,
      "step": 68881
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7431668043136597,
      "learning_rate": 0.00047712523871064756,
      "loss": 3.0529,
      "step": 68882
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.768749475479126,
      "learning_rate": 0.0004771219372095368,
      "loss": 2.8276,
      "step": 68883
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6302436590194702,
      "learning_rate": 0.00047711863567549576,
      "loss": 3.1259,
      "step": 68884
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6161930561065674,
      "learning_rate": 0.000477115334108525,
      "loss": 3.1209,
      "step": 68885
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6687352657318115,
      "learning_rate": 0.0004771120325086251,
      "loss": 3.0502,
      "step": 68886
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4956352710723877,
      "learning_rate": 0.0004771087308757968,
      "loss": 3.0244,
      "step": 68887
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.939916968345642,
      "learning_rate": 0.00047710542921004063,
      "loss": 3.1021,
      "step": 68888
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.526263952255249,
      "learning_rate": 0.00047710212751135724,
      "loss": 2.94,
      "step": 68889
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6105724573135376,
      "learning_rate": 0.0004770988257797471,
      "loss": 3.0143,
      "step": 68890
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9251313209533691,
      "learning_rate": 0.0004770955240152111,
      "loss": 2.9164,
      "step": 68891
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0480635166168213,
      "learning_rate": 0.00047709222221774966,
      "loss": 3.3215,
      "step": 68892
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6010828018188477,
      "learning_rate": 0.0004770889203873634,
      "loss": 3.03,
      "step": 68893
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.365480661392212,
      "learning_rate": 0.00047708561852405297,
      "loss": 2.9192,
      "step": 68894
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.46610164642334,
      "learning_rate": 0.000477082316627819,
      "loss": 3.0655,
      "step": 68895
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9188843965530396,
      "learning_rate": 0.00047707901469866215,
      "loss": 3.0524,
      "step": 68896
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.678658366203308,
      "learning_rate": 0.00047707571273658284,
      "loss": 3.3007,
      "step": 68897
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.378203868865967,
      "learning_rate": 0.00047707241074158185,
      "loss": 3.0268,
      "step": 68898
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.4586434364318848,
      "learning_rate": 0.00047706910871365976,
      "loss": 3.0302,
      "step": 68899
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0439515113830566,
      "learning_rate": 0.00047706580665281723,
      "loss": 3.1384,
      "step": 68900
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3820041418075562,
      "learning_rate": 0.0004770625045590548,
      "loss": 3.1508,
      "step": 68901
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.142573833465576,
      "learning_rate": 0.00047705920243237306,
      "loss": 2.8829,
      "step": 68902
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.486178159713745,
      "learning_rate": 0.0004770559002727727,
      "loss": 2.7848,
      "step": 68903
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8548500537872314,
      "learning_rate": 0.0004770525980802544,
      "loss": 3.0157,
      "step": 68904
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0330185890197754,
      "learning_rate": 0.00047704929585481857,
      "loss": 2.8716,
      "step": 68905
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2276408672332764,
      "learning_rate": 0.0004770459935964659,
      "loss": 3.1772,
      "step": 68906
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.025678873062134,
      "learning_rate": 0.0004770426913051971,
      "loss": 3.247,
      "step": 68907
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.429357647895813,
      "learning_rate": 0.0004770393889810128,
      "loss": 3.1753,
      "step": 68908
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9495735168457031,
      "learning_rate": 0.0004770360866239134,
      "loss": 2.8991,
      "step": 68909
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9656485319137573,
      "learning_rate": 0.00047703278423389967,
      "loss": 2.9731,
      "step": 68910
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1617190837860107,
      "learning_rate": 0.00047702948181097227,
      "loss": 2.9304,
      "step": 68911
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3889203071594238,
      "learning_rate": 0.0004770261793551317,
      "loss": 3.0923,
      "step": 68912
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1042230129241943,
      "learning_rate": 0.0004770228768663786,
      "loss": 3.3404,
      "step": 68913
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4429070949554443,
      "learning_rate": 0.0004770195743447137,
      "loss": 2.9989,
      "step": 68914
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7827385663986206,
      "learning_rate": 0.00047701627179013746,
      "loss": 2.8723,
      "step": 68915
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5794830322265625,
      "learning_rate": 0.00047701296920265056,
      "loss": 3.3103,
      "step": 68916
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7026385068893433,
      "learning_rate": 0.0004770096665822536,
      "loss": 3.0567,
      "step": 68917
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.744153380393982,
      "learning_rate": 0.0004770063639289472,
      "loss": 2.7635,
      "step": 68918
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.29929518699646,
      "learning_rate": 0.000477003061242732,
      "loss": 3.036,
      "step": 68919
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.535702109336853,
      "learning_rate": 0.00047699975852360854,
      "loss": 3.0118,
      "step": 68920
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7187061309814453,
      "learning_rate": 0.0004769964557715776,
      "loss": 2.765,
      "step": 68921
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.9719269275665283,
      "learning_rate": 0.00047699315298663957,
      "loss": 2.99,
      "step": 68922
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3626179695129395,
      "learning_rate": 0.00047698985016879516,
      "loss": 3.0497,
      "step": 68923
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5640795230865479,
      "learning_rate": 0.000476986547318045,
      "loss": 3.0935,
      "step": 68924
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.7822275161743164,
      "learning_rate": 0.0004769832444343897,
      "loss": 3.0906,
      "step": 68925
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8868404626846313,
      "learning_rate": 0.00047697994151782994,
      "loss": 3.1216,
      "step": 68926
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2607173919677734,
      "learning_rate": 0.0004769766385683662,
      "loss": 3.1342,
      "step": 68927
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6633880138397217,
      "learning_rate": 0.00047697333558599926,
      "loss": 2.9785,
      "step": 68928
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.8688318729400635,
      "learning_rate": 0.00047697003257072954,
      "loss": 3.1245,
      "step": 68929
    },
    {
      "epoch": 0.9,
      "grad_norm": 4.483025074005127,
      "learning_rate": 0.0004769667295225578,
      "loss": 3.0537,
      "step": 68930
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.107179641723633,
      "learning_rate": 0.0004769634264414846,
      "loss": 3.3176,
      "step": 68931
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5709961652755737,
      "learning_rate": 0.00047696012332751054,
      "loss": 2.9661,
      "step": 68932
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.5450313091278076,
      "learning_rate": 0.00047695682018063625,
      "loss": 3.1787,
      "step": 68933
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.996992349624634,
      "learning_rate": 0.0004769535170008623,
      "loss": 2.8136,
      "step": 68934
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.170527219772339,
      "learning_rate": 0.00047695021378818943,
      "loss": 3.0592,
      "step": 68935
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.403709650039673,
      "learning_rate": 0.0004769469105426182,
      "loss": 2.9858,
      "step": 68936
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8976212739944458,
      "learning_rate": 0.0004769436072641491,
      "loss": 3.009,
      "step": 68937
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.055659294128418,
      "learning_rate": 0.0004769403039527829,
      "loss": 3.0422,
      "step": 68938
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.7871646881103516,
      "learning_rate": 0.00047693700060852016,
      "loss": 2.7938,
      "step": 68939
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.5774075984954834,
      "learning_rate": 0.0004769336972313615,
      "loss": 3.0526,
      "step": 68940
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2993392944335938,
      "learning_rate": 0.0004769303938213074,
      "loss": 3.0421,
      "step": 68941
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.5657875537872314,
      "learning_rate": 0.00047692709037835875,
      "loss": 3.0269,
      "step": 68942
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.0912203788757324,
      "learning_rate": 0.00047692378690251605,
      "loss": 3.0161,
      "step": 68943
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1092417240142822,
      "learning_rate": 0.00047692048339377974,
      "loss": 2.9362,
      "step": 68944
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7090343236923218,
      "learning_rate": 0.00047691717985215063,
      "loss": 2.7869,
      "step": 68945
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.5682101249694824,
      "learning_rate": 0.00047691387627762927,
      "loss": 3.2037,
      "step": 68946
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.379101037979126,
      "learning_rate": 0.0004769105726702163,
      "loss": 2.9873,
      "step": 68947
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4436774253845215,
      "learning_rate": 0.0004769072690299123,
      "loss": 2.9562,
      "step": 68948
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5872150659561157,
      "learning_rate": 0.000476903965356718,
      "loss": 3.0936,
      "step": 68949
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.056771993637085,
      "learning_rate": 0.00047690066165063374,
      "loss": 3.0229,
      "step": 68950
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.8782997131347656,
      "learning_rate": 0.0004768973579116603,
      "loss": 2.9878,
      "step": 68951
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7259074449539185,
      "learning_rate": 0.00047689405413979855,
      "loss": 2.7872,
      "step": 68952
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3548672199249268,
      "learning_rate": 0.00047689075033504865,
      "loss": 3.0409,
      "step": 68953
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.6981167793273926,
      "learning_rate": 0.0004768874464974114,
      "loss": 2.9255,
      "step": 68954
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.169412136077881,
      "learning_rate": 0.0004768841426268876,
      "loss": 2.9806,
      "step": 68955
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9807440042495728,
      "learning_rate": 0.00047688083872347755,
      "loss": 2.887,
      "step": 68956
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.6864984035491943,
      "learning_rate": 0.00047687753478718205,
      "loss": 3.0966,
      "step": 68957
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4049274921417236,
      "learning_rate": 0.00047687423081800166,
      "loss": 2.9897,
      "step": 68958
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6056169271469116,
      "learning_rate": 0.00047687092681593705,
      "loss": 3.0464,
      "step": 68959
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5169154405593872,
      "learning_rate": 0.00047686762278098874,
      "loss": 3.0195,
      "step": 68960
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5856126546859741,
      "learning_rate": 0.00047686431871315745,
      "loss": 2.9471,
      "step": 68961
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3047484159469604,
      "learning_rate": 0.00047686101461244377,
      "loss": 3.1063,
      "step": 68962
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.174976110458374,
      "learning_rate": 0.00047685771047884824,
      "loss": 3.0425,
      "step": 68963
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6906952857971191,
      "learning_rate": 0.0004768544063123715,
      "loss": 3.0173,
      "step": 68964
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6300630569458008,
      "learning_rate": 0.0004768511021130143,
      "loss": 3.1237,
      "step": 68965
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4475218057632446,
      "learning_rate": 0.00047684779788077704,
      "loss": 3.0275,
      "step": 68966
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6787431240081787,
      "learning_rate": 0.0004768444936156604,
      "loss": 3.1339,
      "step": 68967
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.684104561805725,
      "learning_rate": 0.0004768411893176652,
      "loss": 3.0362,
      "step": 68968
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5156745910644531,
      "learning_rate": 0.0004768378849867918,
      "loss": 3.1488,
      "step": 68969
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5228381156921387,
      "learning_rate": 0.0004768345806230409,
      "loss": 2.7214,
      "step": 68970
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8111844062805176,
      "learning_rate": 0.0004768312762264131,
      "loss": 3.2333,
      "step": 68971
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7059673070907593,
      "learning_rate": 0.000476827971796909,
      "loss": 2.9939,
      "step": 68972
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4664362668991089,
      "learning_rate": 0.0004768246673345292,
      "loss": 2.9453,
      "step": 68973
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1009936332702637,
      "learning_rate": 0.0004768213628392744,
      "loss": 2.8224,
      "step": 68974
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5988982915878296,
      "learning_rate": 0.0004768180583111453,
      "loss": 2.9037,
      "step": 68975
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.488702654838562,
      "learning_rate": 0.0004768147537501422,
      "loss": 2.9677,
      "step": 68976
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8976757526397705,
      "learning_rate": 0.00047681144915626605,
      "loss": 3.0565,
      "step": 68977
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2375659942626953,
      "learning_rate": 0.0004768081445295172,
      "loss": 2.8358,
      "step": 68978
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5665701627731323,
      "learning_rate": 0.0004768048398698964,
      "loss": 3.0687,
      "step": 68979
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8268444538116455,
      "learning_rate": 0.0004768015351774043,
      "loss": 3.1351,
      "step": 68980
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8320456743240356,
      "learning_rate": 0.00047679823045204136,
      "loss": 2.9366,
      "step": 68981
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8713033199310303,
      "learning_rate": 0.00047679492569380845,
      "loss": 3.1822,
      "step": 68982
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5458059310913086,
      "learning_rate": 0.0004767916209027059,
      "loss": 2.7752,
      "step": 68983
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5602270364761353,
      "learning_rate": 0.0004767883160787345,
      "loss": 3.1684,
      "step": 68984
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7332220077514648,
      "learning_rate": 0.0004767850112218947,
      "loss": 3.1253,
      "step": 68985
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9455620050430298,
      "learning_rate": 0.00047678170633218736,
      "loss": 2.9516,
      "step": 68986
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5896469354629517,
      "learning_rate": 0.00047677840140961293,
      "loss": 3.0482,
      "step": 68987
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4732309579849243,
      "learning_rate": 0.00047677509645417204,
      "loss": 3.11,
      "step": 68988
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3954888582229614,
      "learning_rate": 0.0004767717914658654,
      "loss": 3.28,
      "step": 68989
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.546280860900879,
      "learning_rate": 0.00047676848644469347,
      "loss": 3.0629,
      "step": 68990
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.631935477256775,
      "learning_rate": 0.00047676518139065687,
      "loss": 3.266,
      "step": 68991
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6129616498947144,
      "learning_rate": 0.0004767618763037564,
      "loss": 2.844,
      "step": 68992
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4602160453796387,
      "learning_rate": 0.00047675857118399255,
      "loss": 3.0056,
      "step": 68993
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9927239418029785,
      "learning_rate": 0.0004767552660313659,
      "loss": 2.9234,
      "step": 68994
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.99631667137146,
      "learning_rate": 0.0004767519608458771,
      "loss": 2.8131,
      "step": 68995
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.582112431526184,
      "learning_rate": 0.0004767486556275269,
      "loss": 3.1392,
      "step": 68996
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.652772068977356,
      "learning_rate": 0.0004767453503763157,
      "loss": 2.8732,
      "step": 68997
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8572784662246704,
      "learning_rate": 0.00047674204509224417,
      "loss": 3.1898,
      "step": 68998
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9440186023712158,
      "learning_rate": 0.000476738739775313,
      "loss": 3.1336,
      "step": 68999
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8396133184432983,
      "learning_rate": 0.00047673543442552273,
      "loss": 3.241,
      "step": 69000
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0344669818878174,
      "learning_rate": 0.0004767321290428741,
      "loss": 2.8208,
      "step": 69001
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8011201620101929,
      "learning_rate": 0.00047672882362736753,
      "loss": 3.1726,
      "step": 69002
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.729429841041565,
      "learning_rate": 0.0004767255181790038,
      "loss": 2.9837,
      "step": 69003
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.299793243408203,
      "learning_rate": 0.00047672221269778344,
      "loss": 3.0837,
      "step": 69004
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.554546594619751,
      "learning_rate": 0.000476718907183707,
      "loss": 3.0333,
      "step": 69005
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6404579877853394,
      "learning_rate": 0.00047671560163677524,
      "loss": 3.0094,
      "step": 69006
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0923166275024414,
      "learning_rate": 0.00047671229605698883,
      "loss": 3.1203,
      "step": 69007
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.6939101219177246,
      "learning_rate": 0.00047670899044434807,
      "loss": 2.9123,
      "step": 69008
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5600453615188599,
      "learning_rate": 0.00047670568479885386,
      "loss": 2.8765,
      "step": 69009
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3079884052276611,
      "learning_rate": 0.0004767023791205068,
      "loss": 2.9812,
      "step": 69010
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.037667751312256,
      "learning_rate": 0.0004766990734093073,
      "loss": 3.1338,
      "step": 69011
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.678574800491333,
      "learning_rate": 0.0004766957676652562,
      "loss": 3.1631,
      "step": 69012
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.56011962890625,
      "learning_rate": 0.000476692461888354,
      "loss": 2.9884,
      "step": 69013
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2193756103515625,
      "learning_rate": 0.0004766891560786013,
      "loss": 3.0282,
      "step": 69014
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8566529750823975,
      "learning_rate": 0.0004766858502359988,
      "loss": 3.0785,
      "step": 69015
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7576855421066284,
      "learning_rate": 0.00047668254436054706,
      "loss": 2.9995,
      "step": 69016
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.454344630241394,
      "learning_rate": 0.0004766792384522467,
      "loss": 3.1396,
      "step": 69017
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6443054676055908,
      "learning_rate": 0.00047667593251109826,
      "loss": 3.0248,
      "step": 69018
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8045005798339844,
      "learning_rate": 0.0004766726265371025,
      "loss": 3.0604,
      "step": 69019
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4885059595108032,
      "learning_rate": 0.0004766693205302599,
      "loss": 3.2185,
      "step": 69020
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9282817840576172,
      "learning_rate": 0.0004766660144905713,
      "loss": 3.1985,
      "step": 69021
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8627333641052246,
      "learning_rate": 0.000476662708418037,
      "loss": 3.2617,
      "step": 69022
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6976076364517212,
      "learning_rate": 0.0004766594023126578,
      "loss": 3.2538,
      "step": 69023
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.214954376220703,
      "learning_rate": 0.00047665609617443435,
      "loss": 3.0163,
      "step": 69024
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5864579677581787,
      "learning_rate": 0.0004766527900033671,
      "loss": 2.9094,
      "step": 69025
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5464891195297241,
      "learning_rate": 0.00047664948379945676,
      "loss": 2.938,
      "step": 69026
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5395530462265015,
      "learning_rate": 0.000476646177562704,
      "loss": 2.9354,
      "step": 69027
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.663224458694458,
      "learning_rate": 0.0004766428712931094,
      "loss": 2.9358,
      "step": 69028
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9353798627853394,
      "learning_rate": 0.00047663956499067355,
      "loss": 3.1417,
      "step": 69029
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6593095064163208,
      "learning_rate": 0.000476636258655397,
      "loss": 3.1028,
      "step": 69030
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4871827363967896,
      "learning_rate": 0.00047663295228728055,
      "loss": 3.075,
      "step": 69031
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0120532512664795,
      "learning_rate": 0.0004766296458863246,
      "loss": 2.9344,
      "step": 69032
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8464499711990356,
      "learning_rate": 0.0004766263394525299,
      "loss": 3.1245,
      "step": 69033
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7844935655593872,
      "learning_rate": 0.00047662303298589697,
      "loss": 2.873,
      "step": 69034
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5754380226135254,
      "learning_rate": 0.0004766197264864266,
      "loss": 3.0336,
      "step": 69035
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.547491431236267,
      "learning_rate": 0.0004766164199541192,
      "loss": 3.1268,
      "step": 69036
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.279036283493042,
      "learning_rate": 0.0004766131133889755,
      "loss": 2.9153,
      "step": 69037
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.5424509048461914,
      "learning_rate": 0.0004766098067909961,
      "loss": 2.892,
      "step": 69038
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7635843753814697,
      "learning_rate": 0.0004766065001601816,
      "loss": 3.0018,
      "step": 69039
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0317418575286865,
      "learning_rate": 0.0004766031934965326,
      "loss": 3.0704,
      "step": 69040
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8199676275253296,
      "learning_rate": 0.00047659988680004976,
      "loss": 3.1965,
      "step": 69041
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7085120677947998,
      "learning_rate": 0.00047659658007073366,
      "loss": 2.9522,
      "step": 69042
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7844873666763306,
      "learning_rate": 0.0004765932733085849,
      "loss": 2.8502,
      "step": 69043
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.875030994415283,
      "learning_rate": 0.0004765899665136041,
      "loss": 2.9557,
      "step": 69044
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4600200653076172,
      "learning_rate": 0.000476586659685792,
      "loss": 3.0677,
      "step": 69045
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4370481967926025,
      "learning_rate": 0.00047658335282514897,
      "loss": 3.2767,
      "step": 69046
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.117635726928711,
      "learning_rate": 0.0004765800459316759,
      "loss": 3.1388,
      "step": 69047
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8136149644851685,
      "learning_rate": 0.0004765767390053731,
      "loss": 2.9474,
      "step": 69048
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.608188509941101,
      "learning_rate": 0.0004765734320462415,
      "loss": 2.8272,
      "step": 69049
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.416126012802124,
      "learning_rate": 0.0004765701250542815,
      "loss": 3.0168,
      "step": 69050
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8832696676254272,
      "learning_rate": 0.00047656681802949374,
      "loss": 3.0257,
      "step": 69051
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5229859352111816,
      "learning_rate": 0.0004765635109718789,
      "loss": 3.0992,
      "step": 69052
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6885403394699097,
      "learning_rate": 0.00047656020388143767,
      "loss": 3.0034,
      "step": 69053
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7015184164047241,
      "learning_rate": 0.0004765568967581705,
      "loss": 3.1806,
      "step": 69054
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.623452067375183,
      "learning_rate": 0.000476553589602078,
      "loss": 2.9502,
      "step": 69055
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.49577796459198,
      "learning_rate": 0.00047655028241316095,
      "loss": 3.3441,
      "step": 69056
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2667219638824463,
      "learning_rate": 0.0004765469751914198,
      "loss": 3.1147,
      "step": 69057
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.519083261489868,
      "learning_rate": 0.0004765436679368553,
      "loss": 2.9597,
      "step": 69058
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.586047887802124,
      "learning_rate": 0.000476540360649468,
      "loss": 2.9898,
      "step": 69059
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4932057857513428,
      "learning_rate": 0.0004765370533292584,
      "loss": 3.1131,
      "step": 69060
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8338215351104736,
      "learning_rate": 0.0004765337459762273,
      "loss": 2.927,
      "step": 69061
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.306347370147705,
      "learning_rate": 0.0004765304385903753,
      "loss": 2.9201,
      "step": 69062
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4718049764633179,
      "learning_rate": 0.0004765271311717029,
      "loss": 3.2764,
      "step": 69063
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6632171869277954,
      "learning_rate": 0.00047652382372021077,
      "loss": 3.0925,
      "step": 69064
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1404800415039062,
      "learning_rate": 0.0004765205162358996,
      "loss": 3.0528,
      "step": 69065
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.49077308177948,
      "learning_rate": 0.00047651720871876984,
      "loss": 3.1134,
      "step": 69066
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.657312035560608,
      "learning_rate": 0.0004765139011688222,
      "loss": 2.9641,
      "step": 69067
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4770007133483887,
      "learning_rate": 0.0004765105935860574,
      "loss": 2.9404,
      "step": 69068
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8375192880630493,
      "learning_rate": 0.00047650728597047585,
      "loss": 3.0562,
      "step": 69069
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7637624740600586,
      "learning_rate": 0.00047650397832207827,
      "loss": 3.1339,
      "step": 69070
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6628268957138062,
      "learning_rate": 0.00047650067064086536,
      "loss": 3.0861,
      "step": 69071
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.330457091331482,
      "learning_rate": 0.00047649736292683755,
      "loss": 3.1533,
      "step": 69072
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6110258102416992,
      "learning_rate": 0.00047649405517999555,
      "loss": 2.9641,
      "step": 69073
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5650736093521118,
      "learning_rate": 0.00047649074740034006,
      "loss": 3.035,
      "step": 69074
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7496896982192993,
      "learning_rate": 0.00047648743958787147,
      "loss": 3.123,
      "step": 69075
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9882357120513916,
      "learning_rate": 0.0004764841317425907,
      "loss": 2.9213,
      "step": 69076
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4693639278411865,
      "learning_rate": 0.00047648082386449804,
      "loss": 3.2344,
      "step": 69077
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.403612494468689,
      "learning_rate": 0.00047647751595359435,
      "loss": 2.9798,
      "step": 69078
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6052366495132446,
      "learning_rate": 0.0004764742080098801,
      "loss": 2.884,
      "step": 69079
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5201140642166138,
      "learning_rate": 0.0004764709000333561,
      "loss": 2.9947,
      "step": 69080
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8828507661819458,
      "learning_rate": 0.0004764675920240227,
      "loss": 3.0358,
      "step": 69081
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7407935857772827,
      "learning_rate": 0.00047646428398188067,
      "loss": 3.0284,
      "step": 69082
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6086102724075317,
      "learning_rate": 0.0004764609759069306,
      "loss": 2.952,
      "step": 69083
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7591071128845215,
      "learning_rate": 0.0004764576677991731,
      "loss": 3.0654,
      "step": 69084
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7298377752304077,
      "learning_rate": 0.00047645435965860885,
      "loss": 3.1381,
      "step": 69085
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6994081735610962,
      "learning_rate": 0.0004764510514852384,
      "loss": 3.1252,
      "step": 69086
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.306516170501709,
      "learning_rate": 0.0004764477432790623,
      "loss": 2.8322,
      "step": 69087
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9107521772384644,
      "learning_rate": 0.0004764444350400812,
      "loss": 3.2466,
      "step": 69088
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6950528621673584,
      "learning_rate": 0.00047644112676829587,
      "loss": 3.0236,
      "step": 69089
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.179046630859375,
      "learning_rate": 0.00047643781846370666,
      "loss": 3.0292,
      "step": 69090
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0160045623779297,
      "learning_rate": 0.0004764345101263145,
      "loss": 2.9712,
      "step": 69091
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.805877923965454,
      "learning_rate": 0.0004764312017561197,
      "loss": 2.9307,
      "step": 69092
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5080018043518066,
      "learning_rate": 0.00047642789335312306,
      "loss": 2.8272,
      "step": 69093
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4168754816055298,
      "learning_rate": 0.0004764245849173251,
      "loss": 3.1772,
      "step": 69094
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4799216985702515,
      "learning_rate": 0.00047642127644872647,
      "loss": 3.0339,
      "step": 69095
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.584568738937378,
      "learning_rate": 0.00047641796794732784,
      "loss": 2.9409,
      "step": 69096
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5631580352783203,
      "learning_rate": 0.00047641465941312977,
      "loss": 2.9971,
      "step": 69097
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.805448293685913,
      "learning_rate": 0.00047641135084613284,
      "loss": 3.0902,
      "step": 69098
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6005183458328247,
      "learning_rate": 0.0004764080422463379,
      "loss": 3.1777,
      "step": 69099
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8927581310272217,
      "learning_rate": 0.0004764047336137452,
      "loss": 3.204,
      "step": 69100
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.690870761871338,
      "learning_rate": 0.0004764014249483556,
      "loss": 3.0923,
      "step": 69101
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9533495903015137,
      "learning_rate": 0.0004763981162501696,
      "loss": 2.9562,
      "step": 69102
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7204079627990723,
      "learning_rate": 0.00047639480751918785,
      "loss": 3.1103,
      "step": 69103
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9578750133514404,
      "learning_rate": 0.00047639149875541105,
      "loss": 2.5708,
      "step": 69104
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9949922561645508,
      "learning_rate": 0.0004763881899588396,
      "loss": 3.0851,
      "step": 69105
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7972605228424072,
      "learning_rate": 0.0004763848811294744,
      "loss": 3.0774,
      "step": 69106
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.75612473487854,
      "learning_rate": 0.00047638157226731584,
      "loss": 3.1177,
      "step": 69107
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.4895458221435547,
      "learning_rate": 0.0004763782633723647,
      "loss": 3.1526,
      "step": 69108
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.653043508529663,
      "learning_rate": 0.00047637495444462143,
      "loss": 3.0652,
      "step": 69109
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.089271306991577,
      "learning_rate": 0.00047637164548408676,
      "loss": 2.6587,
      "step": 69110
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.800376534461975,
      "learning_rate": 0.00047636833649076127,
      "loss": 3.3294,
      "step": 69111
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6184087991714478,
      "learning_rate": 0.0004763650274646455,
      "loss": 3.1494,
      "step": 69112
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5946089029312134,
      "learning_rate": 0.00047636171840574024,
      "loss": 2.7885,
      "step": 69113
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2286217212677,
      "learning_rate": 0.00047635840931404595,
      "loss": 3.1229,
      "step": 69114
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.047156810760498,
      "learning_rate": 0.0004763551001895634,
      "loss": 2.9942,
      "step": 69115
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6598190069198608,
      "learning_rate": 0.0004763517910322931,
      "loss": 3.0006,
      "step": 69116
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.083911180496216,
      "learning_rate": 0.00047634848184223557,
      "loss": 3.0497,
      "step": 69117
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7093455791473389,
      "learning_rate": 0.0004763451726193915,
      "loss": 2.8256,
      "step": 69118
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6042166948318481,
      "learning_rate": 0.0004763418633637617,
      "loss": 2.9103,
      "step": 69119
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5225796699523926,
      "learning_rate": 0.0004763385540753465,
      "loss": 3.0505,
      "step": 69120
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3545308113098145,
      "learning_rate": 0.0004763352447541466,
      "loss": 3.2007,
      "step": 69121
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.6246888637542725,
      "learning_rate": 0.00047633193540016274,
      "loss": 3.3285,
      "step": 69122
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.107407569885254,
      "learning_rate": 0.00047632862601339546,
      "loss": 3.124,
      "step": 69123
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7370965480804443,
      "learning_rate": 0.0004763253165938453,
      "loss": 3.2022,
      "step": 69124
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.628200054168701,
      "learning_rate": 0.00047632200714151286,
      "loss": 2.7002,
      "step": 69125
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.5995147228240967,
      "learning_rate": 0.000476318697656399,
      "loss": 3.2366,
      "step": 69126
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0601229667663574,
      "learning_rate": 0.0004763153881385041,
      "loss": 2.7543,
      "step": 69127
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.004941701889038,
      "learning_rate": 0.00047631207858782874,
      "loss": 2.9248,
      "step": 69128
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.899561643600464,
      "learning_rate": 0.00047630876900437385,
      "loss": 3.1082,
      "step": 69129
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.383362293243408,
      "learning_rate": 0.0004763054593881396,
      "loss": 3.2032,
      "step": 69130
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8814178705215454,
      "learning_rate": 0.000476302149739127,
      "loss": 3.1661,
      "step": 69131
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.716896414756775,
      "learning_rate": 0.0004762988400573364,
      "loss": 3.0036,
      "step": 69132
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.9980990886688232,
      "learning_rate": 0.00047629553034276857,
      "loss": 3.398,
      "step": 69133
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.562492609024048,
      "learning_rate": 0.000476292220595424,
      "loss": 3.0254,
      "step": 69134
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7180876731872559,
      "learning_rate": 0.0004762889108153035,
      "loss": 3.0834,
      "step": 69135
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.657261848449707,
      "learning_rate": 0.00047628560100240747,
      "loss": 3.0378,
      "step": 69136
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0406830310821533,
      "learning_rate": 0.0004762822911567367,
      "loss": 3.3542,
      "step": 69137
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6213847398757935,
      "learning_rate": 0.00047627898127829165,
      "loss": 3.0699,
      "step": 69138
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8409314155578613,
      "learning_rate": 0.0004762756713670731,
      "loss": 3.019,
      "step": 69139
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.725054383277893,
      "learning_rate": 0.0004762723614230815,
      "loss": 2.909,
      "step": 69140
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5316908359527588,
      "learning_rate": 0.0004762690514463175,
      "loss": 2.9804,
      "step": 69141
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3709309101104736,
      "learning_rate": 0.0004762657414367819,
      "loss": 3.0698,
      "step": 69142
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6732306480407715,
      "learning_rate": 0.000476262431394475,
      "loss": 3.1783,
      "step": 69143
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9213379621505737,
      "learning_rate": 0.0004762591213193977,
      "loss": 3.1831,
      "step": 69144
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6493730545043945,
      "learning_rate": 0.0004762558112115505,
      "loss": 3.0217,
      "step": 69145
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.205063819885254,
      "learning_rate": 0.00047625250107093396,
      "loss": 2.8952,
      "step": 69146
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3799197673797607,
      "learning_rate": 0.00047624919089754884,
      "loss": 3.1183,
      "step": 69147
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5996767282485962,
      "learning_rate": 0.0004762458806913956,
      "loss": 3.034,
      "step": 69148
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.156679391860962,
      "learning_rate": 0.00047624257045247485,
      "loss": 3.1929,
      "step": 69149
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.8501675128936768,
      "learning_rate": 0.00047623926018078744,
      "loss": 2.8449,
      "step": 69150
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.479927897453308,
      "learning_rate": 0.0004762359498763337,
      "loss": 3.1758,
      "step": 69151
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6025093793869019,
      "learning_rate": 0.0004762326395391144,
      "loss": 3.2281,
      "step": 69152
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7945314645767212,
      "learning_rate": 0.00047622932916913016,
      "loss": 2.8012,
      "step": 69153
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8607232570648193,
      "learning_rate": 0.0004762260187663816,
      "loss": 2.9996,
      "step": 69154
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0948703289031982,
      "learning_rate": 0.00047622270833086916,
      "loss": 2.8864,
      "step": 69155
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9003722667694092,
      "learning_rate": 0.00047621939786259373,
      "loss": 2.9491,
      "step": 69156
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7402386665344238,
      "learning_rate": 0.00047621608736155577,
      "loss": 2.9353,
      "step": 69157
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.496192455291748,
      "learning_rate": 0.0004762127768277558,
      "loss": 3.098,
      "step": 69158
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.616273283958435,
      "learning_rate": 0.00047620946626119465,
      "loss": 2.94,
      "step": 69159
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3774853944778442,
      "learning_rate": 0.0004762061556618728,
      "loss": 2.9314,
      "step": 69160
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3979545831680298,
      "learning_rate": 0.00047620284502979094,
      "loss": 2.783,
      "step": 69161
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.4029555320739746,
      "learning_rate": 0.0004761995343649496,
      "loss": 3.0809,
      "step": 69162
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8344579935073853,
      "learning_rate": 0.0004761962236673494,
      "loss": 2.8497,
      "step": 69163
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5561301708221436,
      "learning_rate": 0.0004761929129369911,
      "loss": 2.9684,
      "step": 69164
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6365793943405151,
      "learning_rate": 0.0004761896021738752,
      "loss": 3.1525,
      "step": 69165
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0709173679351807,
      "learning_rate": 0.00047618629137800226,
      "loss": 3.1272,
      "step": 69166
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4363819360733032,
      "learning_rate": 0.00047618298054937294,
      "loss": 2.8279,
      "step": 69167
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4992048740386963,
      "learning_rate": 0.000476179669687988,
      "loss": 3.1159,
      "step": 69168
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.01139760017395,
      "learning_rate": 0.0004761763587938478,
      "loss": 2.9839,
      "step": 69169
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.231971025466919,
      "learning_rate": 0.00047617304786695317,
      "loss": 3.0817,
      "step": 69170
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9472945928573608,
      "learning_rate": 0.0004761697369073046,
      "loss": 3.2851,
      "step": 69171
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8690589666366577,
      "learning_rate": 0.0004761664259149029,
      "loss": 3.3156,
      "step": 69172
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4259216785430908,
      "learning_rate": 0.00047616311488974826,
      "loss": 3.1424,
      "step": 69173
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.286669969558716,
      "learning_rate": 0.0004761598038318418,
      "loss": 3.0902,
      "step": 69174
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.731835126876831,
      "learning_rate": 0.00047615649274118383,
      "loss": 2.924,
      "step": 69175
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6365416049957275,
      "learning_rate": 0.00047615318161777507,
      "loss": 3.1573,
      "step": 69176
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.764960527420044,
      "learning_rate": 0.0004761498704616161,
      "loss": 3.1757,
      "step": 69177
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4518301486968994,
      "learning_rate": 0.0004761465592727075,
      "loss": 2.7119,
      "step": 69178
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6258254051208496,
      "learning_rate": 0.0004761432480510499,
      "loss": 2.8504,
      "step": 69179
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.492531657218933,
      "learning_rate": 0.000476139936796644,
      "loss": 3.2248,
      "step": 69180
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5075585842132568,
      "learning_rate": 0.00047613662550949045,
      "loss": 2.9391,
      "step": 69181
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7631195783615112,
      "learning_rate": 0.00047613331418958966,
      "loss": 3.1421,
      "step": 69182
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.439301609992981,
      "learning_rate": 0.00047613000283694233,
      "loss": 3.1084,
      "step": 69183
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.520807147026062,
      "learning_rate": 0.0004761266914515493,
      "loss": 3.2093,
      "step": 69184
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.195880651473999,
      "learning_rate": 0.0004761233800334108,
      "loss": 2.9645,
      "step": 69185
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4015681743621826,
      "learning_rate": 0.0004761200685825277,
      "loss": 2.9923,
      "step": 69186
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3547313213348389,
      "learning_rate": 0.00047611675709890056,
      "loss": 3.1234,
      "step": 69187
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6219713687896729,
      "learning_rate": 0.00047611344558253,
      "loss": 3.0688,
      "step": 69188
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.535298228263855,
      "learning_rate": 0.0004761101340334166,
      "loss": 3.0466,
      "step": 69189
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6038745641708374,
      "learning_rate": 0.000476106822451561,
      "loss": 3.0881,
      "step": 69190
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8674657344818115,
      "learning_rate": 0.00047610351083696384,
      "loss": 2.7646,
      "step": 69191
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4579533338546753,
      "learning_rate": 0.00047610019918962565,
      "loss": 2.9758,
      "step": 69192
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7770916223526,
      "learning_rate": 0.00047609688750954726,
      "loss": 2.993,
      "step": 69193
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8914934396743774,
      "learning_rate": 0.000476093575796729,
      "loss": 3.1075,
      "step": 69194
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9173294305801392,
      "learning_rate": 0.0004760902640511717,
      "loss": 2.8574,
      "step": 69195
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5085653066635132,
      "learning_rate": 0.0004760869522728758,
      "loss": 3.0417,
      "step": 69196
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.984580159187317,
      "learning_rate": 0.00047608364046184206,
      "loss": 2.966,
      "step": 69197
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.165301561355591,
      "learning_rate": 0.00047608032861807105,
      "loss": 3.0691,
      "step": 69198
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.5222342014312744,
      "learning_rate": 0.0004760770167415634,
      "loss": 3.0199,
      "step": 69199
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9182567596435547,
      "learning_rate": 0.00047607370483231966,
      "loss": 2.8488,
      "step": 69200
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6803027391433716,
      "learning_rate": 0.00047607039289034053,
      "loss": 3.2776,
      "step": 69201
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0624008178710938,
      "learning_rate": 0.0004760670809156267,
      "loss": 3.0243,
      "step": 69202
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.142476797103882,
      "learning_rate": 0.0004760637689081785,
      "loss": 3.074,
      "step": 69203
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0913455486297607,
      "learning_rate": 0.00047606045686799677,
      "loss": 2.9129,
      "step": 69204
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9123547077178955,
      "learning_rate": 0.0004760571447950821,
      "loss": 3.013,
      "step": 69205
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.2933835983276367,
      "learning_rate": 0.00047605383268943514,
      "loss": 2.7407,
      "step": 69206
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8866199254989624,
      "learning_rate": 0.00047605052055105636,
      "loss": 3.1231,
      "step": 69207
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0732052326202393,
      "learning_rate": 0.00047604720837994644,
      "loss": 3.1138,
      "step": 69208
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2063755989074707,
      "learning_rate": 0.0004760438961761061,
      "loss": 2.9697,
      "step": 69209
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.872850775718689,
      "learning_rate": 0.00047604058393953583,
      "loss": 3.1174,
      "step": 69210
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9970580339431763,
      "learning_rate": 0.0004760372716702363,
      "loss": 3.1534,
      "step": 69211
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1867995262145996,
      "learning_rate": 0.0004760339593682082,
      "loss": 2.8581,
      "step": 69212
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.097445249557495,
      "learning_rate": 0.000476030647033452,
      "loss": 3.2258,
      "step": 69213
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.858552098274231,
      "learning_rate": 0.00047602733466596835,
      "loss": 3.1693,
      "step": 69214
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7416778802871704,
      "learning_rate": 0.00047602402226575794,
      "loss": 2.772,
      "step": 69215
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5949108600616455,
      "learning_rate": 0.0004760207098328213,
      "loss": 2.9073,
      "step": 69216
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5681251287460327,
      "learning_rate": 0.0004760173973671591,
      "loss": 2.8745,
      "step": 69217
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4253183603286743,
      "learning_rate": 0.000476014084868772,
      "loss": 3.0985,
      "step": 69218
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2808971405029297,
      "learning_rate": 0.0004760107723376604,
      "loss": 2.944,
      "step": 69219
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8380500078201294,
      "learning_rate": 0.00047600745977382527,
      "loss": 2.7865,
      "step": 69220
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6859365701675415,
      "learning_rate": 0.00047600414717726693,
      "loss": 2.9069,
      "step": 69221
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1624538898468018,
      "learning_rate": 0.0004760008345479861,
      "loss": 2.9873,
      "step": 69222
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5049927234649658,
      "learning_rate": 0.0004759975218859835,
      "loss": 3.2048,
      "step": 69223
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4950751066207886,
      "learning_rate": 0.0004759942091912595,
      "loss": 3.1042,
      "step": 69224
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.4601967334747314,
      "learning_rate": 0.0004759908964638149,
      "loss": 3.0556,
      "step": 69225
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8105741739273071,
      "learning_rate": 0.0004759875837036503,
      "loss": 3.2159,
      "step": 69226
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.533653974533081,
      "learning_rate": 0.0004759842709107662,
      "loss": 3.2353,
      "step": 69227
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2174830436706543,
      "learning_rate": 0.0004759809580851634,
      "loss": 3.0467,
      "step": 69228
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.043856143951416,
      "learning_rate": 0.0004759776452268424,
      "loss": 2.8579,
      "step": 69229
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.985755443572998,
      "learning_rate": 0.0004759743323358038,
      "loss": 2.7464,
      "step": 69230
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7787550687789917,
      "learning_rate": 0.0004759710194120482,
      "loss": 3.0417,
      "step": 69231
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.4831063747406006,
      "learning_rate": 0.0004759677064555763,
      "loss": 3.0402,
      "step": 69232
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7529993057250977,
      "learning_rate": 0.0004759643934663887,
      "loss": 2.9149,
      "step": 69233
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0926413536071777,
      "learning_rate": 0.00047596108044448604,
      "loss": 2.8362,
      "step": 69234
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.986404538154602,
      "learning_rate": 0.0004759577673898689,
      "loss": 3.1496,
      "step": 69235
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2471628189086914,
      "learning_rate": 0.00047595445430253787,
      "loss": 3.1727,
      "step": 69236
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7885671854019165,
      "learning_rate": 0.00047595114118249355,
      "loss": 3.0972,
      "step": 69237
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.355968713760376,
      "learning_rate": 0.0004759478280297366,
      "loss": 2.9984,
      "step": 69238
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.846726655960083,
      "learning_rate": 0.00047594451484426765,
      "loss": 3.1034,
      "step": 69239
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.563151240348816,
      "learning_rate": 0.00047594120162608726,
      "loss": 3.1,
      "step": 69240
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8835980892181396,
      "learning_rate": 0.00047593788837519606,
      "loss": 2.9241,
      "step": 69241
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0253849029541016,
      "learning_rate": 0.00047593457509159475,
      "loss": 3.1933,
      "step": 69242
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.267260789871216,
      "learning_rate": 0.0004759312617752838,
      "loss": 2.999,
      "step": 69243
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5414884090423584,
      "learning_rate": 0.000475927948426264,
      "loss": 2.9838,
      "step": 69244
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.051426649093628,
      "learning_rate": 0.0004759246350445358,
      "loss": 3.1349,
      "step": 69245
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6677402257919312,
      "learning_rate": 0.0004759213216301,
      "loss": 3.1238,
      "step": 69246
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4478713274002075,
      "learning_rate": 0.000475918008182957,
      "loss": 3.0529,
      "step": 69247
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2929977178573608,
      "learning_rate": 0.00047591469470310756,
      "loss": 3.0832,
      "step": 69248
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.563893437385559,
      "learning_rate": 0.0004759113811905523,
      "loss": 2.9001,
      "step": 69249
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.00683856010437,
      "learning_rate": 0.00047590806764529175,
      "loss": 2.9,
      "step": 69250
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.5869030952453613,
      "learning_rate": 0.0004759047540673265,
      "loss": 2.9342,
      "step": 69251
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6332337856292725,
      "learning_rate": 0.00047590144045665735,
      "loss": 2.9893,
      "step": 69252
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2422726154327393,
      "learning_rate": 0.0004758981268132847,
      "loss": 3.1249,
      "step": 69253
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8481618165969849,
      "learning_rate": 0.0004758948131372093,
      "loss": 2.9923,
      "step": 69254
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6861209869384766,
      "learning_rate": 0.0004758914994284318,
      "loss": 2.9692,
      "step": 69255
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.0385935306549072,
      "learning_rate": 0.0004758881856869527,
      "loss": 2.9192,
      "step": 69256
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7416527271270752,
      "learning_rate": 0.00047588487191277265,
      "loss": 3.3583,
      "step": 69257
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5748838186264038,
      "learning_rate": 0.0004758815581058923,
      "loss": 3.0697,
      "step": 69258
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4706515073776245,
      "learning_rate": 0.00047587824426631225,
      "loss": 3.0059,
      "step": 69259
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.574430465698242,
      "learning_rate": 0.00047587493039403313,
      "loss": 2.9347,
      "step": 69260
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.5370240211486816,
      "learning_rate": 0.0004758716164890555,
      "loss": 2.8192,
      "step": 69261
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0519216060638428,
      "learning_rate": 0.00047586830255138006,
      "loss": 3.0776,
      "step": 69262
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3880105018615723,
      "learning_rate": 0.0004758649885810074,
      "loss": 3.0381,
      "step": 69263
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.778985023498535,
      "learning_rate": 0.000475861674577938,
      "loss": 2.9896,
      "step": 69264
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8693983554840088,
      "learning_rate": 0.0004758583605421727,
      "loss": 2.6173,
      "step": 69265
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7638121843338013,
      "learning_rate": 0.000475855046473712,
      "loss": 3.0145,
      "step": 69266
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1902270317077637,
      "learning_rate": 0.0004758517323725565,
      "loss": 2.9461,
      "step": 69267
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3948160409927368,
      "learning_rate": 0.0004758484182387068,
      "loss": 2.9126,
      "step": 69268
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4302723407745361,
      "learning_rate": 0.00047584510407216367,
      "loss": 2.8834,
      "step": 69269
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5094726085662842,
      "learning_rate": 0.0004758417898729276,
      "loss": 3.182,
      "step": 69270
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.731203079223633,
      "learning_rate": 0.00047583847564099916,
      "loss": 2.9753,
      "step": 69271
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.199704170227051,
      "learning_rate": 0.0004758351613763791,
      "loss": 3.3304,
      "step": 69272
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9309309720993042,
      "learning_rate": 0.00047583184707906793,
      "loss": 3.0065,
      "step": 69273
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.45084547996521,
      "learning_rate": 0.00047582853274906624,
      "loss": 2.7969,
      "step": 69274
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.495007038116455,
      "learning_rate": 0.00047582521838637477,
      "loss": 3.2248,
      "step": 69275
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.046814441680908,
      "learning_rate": 0.00047582190399099406,
      "loss": 2.8655,
      "step": 69276
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.422563910484314,
      "learning_rate": 0.0004758185895629247,
      "loss": 3.12,
      "step": 69277
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7384650707244873,
      "learning_rate": 0.0004758152751021675,
      "loss": 2.8532,
      "step": 69278
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.5773918628692627,
      "learning_rate": 0.0004758119606087227,
      "loss": 2.9359,
      "step": 69279
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5390241146087646,
      "learning_rate": 0.00047580864608259127,
      "loss": 3.0325,
      "step": 69280
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.020822048187256,
      "learning_rate": 0.0004758053315237737,
      "loss": 3.0439,
      "step": 69281
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3930814266204834,
      "learning_rate": 0.0004758020169322706,
      "loss": 2.8408,
      "step": 69282
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3537665605545044,
      "learning_rate": 0.0004757987023080825,
      "loss": 3.0101,
      "step": 69283
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3561956882476807,
      "learning_rate": 0.0004757953876512102,
      "loss": 2.8357,
      "step": 69284
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9114047288894653,
      "learning_rate": 0.0004757920729616542,
      "loss": 3.133,
      "step": 69285
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9244188070297241,
      "learning_rate": 0.00047578875823941504,
      "loss": 3.298,
      "step": 69286
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.038520097732544,
      "learning_rate": 0.0004757854434844935,
      "loss": 3.1897,
      "step": 69287
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7137361764907837,
      "learning_rate": 0.0004757821286968901,
      "loss": 2.8698,
      "step": 69288
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3578379154205322,
      "learning_rate": 0.00047577881387660554,
      "loss": 3.221,
      "step": 69289
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7982146739959717,
      "learning_rate": 0.00047577549902364034,
      "loss": 2.9175,
      "step": 69290
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3101612329483032,
      "learning_rate": 0.0004757721841379952,
      "loss": 3.1338,
      "step": 69291
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8282185792922974,
      "learning_rate": 0.00047576886921967064,
      "loss": 3.1924,
      "step": 69292
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1929502487182617,
      "learning_rate": 0.0004757655542686674,
      "loss": 3.1451,
      "step": 69293
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9639662504196167,
      "learning_rate": 0.00047576223928498594,
      "loss": 3.1368,
      "step": 69294
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0316824913024902,
      "learning_rate": 0.00047575892426862703,
      "loss": 2.8715,
      "step": 69295
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9048655033111572,
      "learning_rate": 0.0004757556092195912,
      "loss": 3.1694,
      "step": 69296
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.694867491722107,
      "learning_rate": 0.00047575229413787906,
      "loss": 2.976,
      "step": 69297
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0561840534210205,
      "learning_rate": 0.00047574897902349125,
      "loss": 2.8,
      "step": 69298
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.366833448410034,
      "learning_rate": 0.0004757456638764284,
      "loss": 2.7847,
      "step": 69299
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.4138946533203125,
      "learning_rate": 0.0004757423486966911,
      "loss": 3.1251,
      "step": 69300
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.5203044414520264,
      "learning_rate": 0.00047573903348428,
      "loss": 3.2325,
      "step": 69301
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4393247365951538,
      "learning_rate": 0.00047573571823919574,
      "loss": 3.0282,
      "step": 69302
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.646296977996826,
      "learning_rate": 0.0004757324029614388,
      "loss": 2.8502,
      "step": 69303
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.835211753845215,
      "learning_rate": 0.0004757290876510099,
      "loss": 2.9781,
      "step": 69304
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5208094120025635,
      "learning_rate": 0.0004757257723079097,
      "loss": 3.1636,
      "step": 69305
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6661937236785889,
      "learning_rate": 0.0004757224569321388,
      "loss": 3.0432,
      "step": 69306
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.408296585083008,
      "learning_rate": 0.0004757191415236977,
      "loss": 2.972,
      "step": 69307
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.6144073009490967,
      "learning_rate": 0.0004757158260825871,
      "loss": 3.146,
      "step": 69308
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8499014377593994,
      "learning_rate": 0.00047571251060880767,
      "loss": 3.0645,
      "step": 69309
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8543975353240967,
      "learning_rate": 0.00047570919510235983,
      "loss": 2.9993,
      "step": 69310
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9693496227264404,
      "learning_rate": 0.0004757058795632445,
      "loss": 3.0807,
      "step": 69311
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8292937278747559,
      "learning_rate": 0.0004757025639914621,
      "loss": 3.0989,
      "step": 69312
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3436784744262695,
      "learning_rate": 0.00047569924838701316,
      "loss": 2.7168,
      "step": 69313
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5180323123931885,
      "learning_rate": 0.00047569593274989847,
      "loss": 3.1044,
      "step": 69314
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0933358669281006,
      "learning_rate": 0.00047569261708011866,
      "loss": 3.0913,
      "step": 69315
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.5564191341400146,
      "learning_rate": 0.0004756893013776742,
      "loss": 3.0007,
      "step": 69316
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7395738363265991,
      "learning_rate": 0.00047568598564256577,
      "loss": 2.6411,
      "step": 69317
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9781911373138428,
      "learning_rate": 0.0004756826698747941,
      "loss": 2.9491,
      "step": 69318
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9851129055023193,
      "learning_rate": 0.00047567935407435963,
      "loss": 3.094,
      "step": 69319
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8034528493881226,
      "learning_rate": 0.000475676038241263,
      "loss": 2.8724,
      "step": 69320
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7415969371795654,
      "learning_rate": 0.000475672722375505,
      "loss": 3.1388,
      "step": 69321
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8008695840835571,
      "learning_rate": 0.00047566940647708606,
      "loss": 3.0707,
      "step": 69322
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8595813512802124,
      "learning_rate": 0.00047566609054600686,
      "loss": 3.0375,
      "step": 69323
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6857860088348389,
      "learning_rate": 0.0004756627745822681,
      "loss": 3.1491,
      "step": 69324
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8763759136199951,
      "learning_rate": 0.00047565945858587015,
      "loss": 3.3341,
      "step": 69325
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1280953884124756,
      "learning_rate": 0.0004756561425568139,
      "loss": 2.9365,
      "step": 69326
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.7214622497558594,
      "learning_rate": 0.00047565282649509986,
      "loss": 3.1462,
      "step": 69327
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8391693830490112,
      "learning_rate": 0.0004756495104007286,
      "loss": 2.9791,
      "step": 69328
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5858803987503052,
      "learning_rate": 0.00047564619427370084,
      "loss": 3.1404,
      "step": 69329
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.828045129776001,
      "learning_rate": 0.00047564287811401707,
      "loss": 2.9156,
      "step": 69330
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.5104117393493652,
      "learning_rate": 0.000475639561921678,
      "loss": 3.0146,
      "step": 69331
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5922751426696777,
      "learning_rate": 0.00047563624569668423,
      "loss": 2.9213,
      "step": 69332
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.593310594558716,
      "learning_rate": 0.00047563292943903634,
      "loss": 3.0426,
      "step": 69333
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.2710647583007812,
      "learning_rate": 0.000475629613148735,
      "loss": 3.014,
      "step": 69334
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2528247833251953,
      "learning_rate": 0.00047562629682578074,
      "loss": 3.0892,
      "step": 69335
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9917917251586914,
      "learning_rate": 0.0004756229804701743,
      "loss": 3.0512,
      "step": 69336
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0440313816070557,
      "learning_rate": 0.00047561966408191624,
      "loss": 3.1993,
      "step": 69337
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.423672676086426,
      "learning_rate": 0.0004756163476610072,
      "loss": 2.9735,
      "step": 69338
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9999449253082275,
      "learning_rate": 0.0004756130312074477,
      "loss": 3.2149,
      "step": 69339
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1765811443328857,
      "learning_rate": 0.0004756097147212384,
      "loss": 3.3985,
      "step": 69340
    },
    {
      "epoch": 0.9,
      "grad_norm": 4.480102062225342,
      "learning_rate": 0.00047560639820238,
      "loss": 3.1186,
      "step": 69341
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.6259288787841797,
      "learning_rate": 0.000475603081650873,
      "loss": 2.8638,
      "step": 69342
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9564343690872192,
      "learning_rate": 0.0004755997650667181,
      "loss": 2.9085,
      "step": 69343
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6062417030334473,
      "learning_rate": 0.00047559644844991593,
      "loss": 3.3158,
      "step": 69344
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.7922251224517822,
      "learning_rate": 0.00047559313180046704,
      "loss": 2.7476,
      "step": 69345
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.039539337158203,
      "learning_rate": 0.000475589815118372,
      "loss": 2.6303,
      "step": 69346
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.578672409057617,
      "learning_rate": 0.0004755864984036315,
      "loss": 3.1791,
      "step": 69347
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.2891054153442383,
      "learning_rate": 0.0004755831816562463,
      "loss": 3.0404,
      "step": 69348
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.555283308029175,
      "learning_rate": 0.0004755798648762168,
      "loss": 3.0783,
      "step": 69349
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.053378105163574,
      "learning_rate": 0.0004755765480635437,
      "loss": 2.9749,
      "step": 69350
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1058785915374756,
      "learning_rate": 0.0004755732312182276,
      "loss": 3.1027,
      "step": 69351
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6409248113632202,
      "learning_rate": 0.0004755699143402691,
      "loss": 2.8829,
      "step": 69352
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8265854120254517,
      "learning_rate": 0.0004755665974296688,
      "loss": 3.0448,
      "step": 69353
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.10192608833313,
      "learning_rate": 0.00047556328048642735,
      "loss": 3.1555,
      "step": 69354
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8481990098953247,
      "learning_rate": 0.0004755599635105455,
      "loss": 2.9356,
      "step": 69355
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.762097954750061,
      "learning_rate": 0.00047555664650202365,
      "loss": 3.0279,
      "step": 69356
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9003546237945557,
      "learning_rate": 0.0004755533294608625,
      "loss": 3.0237,
      "step": 69357
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9128551483154297,
      "learning_rate": 0.0004755500123870627,
      "loss": 2.9134,
      "step": 69358
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3916943073272705,
      "learning_rate": 0.00047554669528062477,
      "loss": 3.1546,
      "step": 69359
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.622158408164978,
      "learning_rate": 0.0004755433781415495,
      "loss": 3.1668,
      "step": 69360
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8199206590652466,
      "learning_rate": 0.0004755400609698374,
      "loss": 3.2607,
      "step": 69361
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4220350980758667,
      "learning_rate": 0.00047553674376548896,
      "loss": 3.0914,
      "step": 69362
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4229074716567993,
      "learning_rate": 0.000475533426528505,
      "loss": 3.1932,
      "step": 69363
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6512267589569092,
      "learning_rate": 0.00047553010925888606,
      "loss": 2.951,
      "step": 69364
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7165088653564453,
      "learning_rate": 0.0004755267919566328,
      "loss": 3.1089,
      "step": 69365
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6169912815093994,
      "learning_rate": 0.00047552347462174576,
      "loss": 3.1894,
      "step": 69366
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.630662202835083,
      "learning_rate": 0.00047552015725422565,
      "loss": 3.1709,
      "step": 69367
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.297520875930786,
      "learning_rate": 0.000475516839854073,
      "loss": 3.1375,
      "step": 69368
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.20780348777771,
      "learning_rate": 0.0004755135224212884,
      "loss": 2.9152,
      "step": 69369
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7679526805877686,
      "learning_rate": 0.00047551020495587254,
      "loss": 2.9206,
      "step": 69370
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1701855659484863,
      "learning_rate": 0.00047550688745782603,
      "loss": 3.1105,
      "step": 69371
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6117364168167114,
      "learning_rate": 0.00047550356992714953,
      "loss": 2.7942,
      "step": 69372
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7077140808105469,
      "learning_rate": 0.0004755002523638436,
      "loss": 3.0343,
      "step": 69373
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8429588079452515,
      "learning_rate": 0.00047549693476790875,
      "loss": 3.0812,
      "step": 69374
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.562669038772583,
      "learning_rate": 0.0004754936171393458,
      "loss": 2.8978,
      "step": 69375
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5052152872085571,
      "learning_rate": 0.00047549029947815524,
      "loss": 3.2717,
      "step": 69376
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6170353889465332,
      "learning_rate": 0.0004754869817843377,
      "loss": 2.8919,
      "step": 69377
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6360938549041748,
      "learning_rate": 0.00047548366405789387,
      "loss": 2.9887,
      "step": 69378
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7496765851974487,
      "learning_rate": 0.0004754803462988243,
      "loss": 3.1647,
      "step": 69379
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0023787021636963,
      "learning_rate": 0.00047547702850712963,
      "loss": 2.9569,
      "step": 69380
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9689942598342896,
      "learning_rate": 0.0004754737106828104,
      "loss": 3.1143,
      "step": 69381
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.497301459312439,
      "learning_rate": 0.0004754703928258674,
      "loss": 2.9435,
      "step": 69382
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3818764686584473,
      "learning_rate": 0.0004754670749363011,
      "loss": 2.9946,
      "step": 69383
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.107609272003174,
      "learning_rate": 0.0004754637570141121,
      "loss": 3.0011,
      "step": 69384
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.266373634338379,
      "learning_rate": 0.00047546043905930116,
      "loss": 3.357,
      "step": 69385
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9896488189697266,
      "learning_rate": 0.0004754571210718687,
      "loss": 3.0515,
      "step": 69386
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.831795573234558,
      "learning_rate": 0.0004754538030518156,
      "loss": 3.0378,
      "step": 69387
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.620273470878601,
      "learning_rate": 0.0004754504849991422,
      "loss": 3.1257,
      "step": 69388
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.814515471458435,
      "learning_rate": 0.00047544716691384927,
      "loss": 3.0419,
      "step": 69389
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0085198879241943,
      "learning_rate": 0.00047544384879593743,
      "loss": 2.9672,
      "step": 69390
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9236594438552856,
      "learning_rate": 0.0004754405306454073,
      "loss": 2.9777,
      "step": 69391
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7589669227600098,
      "learning_rate": 0.0004754372124622595,
      "loss": 3.0433,
      "step": 69392
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5664114952087402,
      "learning_rate": 0.00047543389424649444,
      "loss": 3.2211,
      "step": 69393
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.495839238166809,
      "learning_rate": 0.000475430575998113,
      "loss": 3.139,
      "step": 69394
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9575319290161133,
      "learning_rate": 0.0004754272577171157,
      "loss": 3.0583,
      "step": 69395
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4070152044296265,
      "learning_rate": 0.00047542393940350317,
      "loss": 3.1788,
      "step": 69396
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.4083950519561768,
      "learning_rate": 0.0004754206210572761,
      "loss": 2.882,
      "step": 69397
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6982107162475586,
      "learning_rate": 0.0004754173026784349,
      "loss": 3.0538,
      "step": 69398
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.618490219116211,
      "learning_rate": 0.0004754139842669803,
      "loss": 2.987,
      "step": 69399
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5671885013580322,
      "learning_rate": 0.00047541066582291303,
      "loss": 3.0083,
      "step": 69400
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6996867656707764,
      "learning_rate": 0.0004754073473462335,
      "loss": 3.0179,
      "step": 69401
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.93921959400177,
      "learning_rate": 0.00047540402883694256,
      "loss": 2.9099,
      "step": 69402
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.60582435131073,
      "learning_rate": 0.0004754007102950406,
      "loss": 3.0898,
      "step": 69403
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.714076280593872,
      "learning_rate": 0.00047539739172052837,
      "loss": 3.0978,
      "step": 69404
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2205817699432373,
      "learning_rate": 0.00047539407311340647,
      "loss": 2.9084,
      "step": 69405
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9356104135513306,
      "learning_rate": 0.0004753907544736755,
      "loss": 2.7808,
      "step": 69406
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5250812768936157,
      "learning_rate": 0.000475387435801336,
      "loss": 3.1326,
      "step": 69407
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.032965898513794,
      "learning_rate": 0.0004753841170963887,
      "loss": 3.0823,
      "step": 69408
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8251407146453857,
      "learning_rate": 0.00047538079835883436,
      "loss": 2.9903,
      "step": 69409
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4199867248535156,
      "learning_rate": 0.00047537747958867316,
      "loss": 2.893,
      "step": 69410
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6181589365005493,
      "learning_rate": 0.0004753741607859061,
      "loss": 2.8068,
      "step": 69411
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4359177350997925,
      "learning_rate": 0.00047537084195053373,
      "loss": 3.0063,
      "step": 69412
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.095188617706299,
      "learning_rate": 0.00047536752308255647,
      "loss": 3.0774,
      "step": 69413
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4849988222122192,
      "learning_rate": 0.00047536420418197513,
      "loss": 2.9893,
      "step": 69414
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5527539253234863,
      "learning_rate": 0.0004753608852487903,
      "loss": 3.2543,
      "step": 69415
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5553922653198242,
      "learning_rate": 0.00047535756628300257,
      "loss": 2.9728,
      "step": 69416
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.473453164100647,
      "learning_rate": 0.00047535424728461254,
      "loss": 3.2103,
      "step": 69417
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5288830995559692,
      "learning_rate": 0.0004753509282536208,
      "loss": 3.1176,
      "step": 69418
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6646369695663452,
      "learning_rate": 0.0004753476091900281,
      "loss": 2.894,
      "step": 69419
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7888754606246948,
      "learning_rate": 0.00047534429009383496,
      "loss": 3.118,
      "step": 69420
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7758287191390991,
      "learning_rate": 0.0004753409709650419,
      "loss": 2.9933,
      "step": 69421
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6339201927185059,
      "learning_rate": 0.0004753376518036498,
      "loss": 3.2135,
      "step": 69422
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5812668800354004,
      "learning_rate": 0.00047533433260965905,
      "loss": 3.1085,
      "step": 69423
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1407175064086914,
      "learning_rate": 0.00047533101338307027,
      "loss": 3.1794,
      "step": 69424
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.1693832874298096,
      "learning_rate": 0.00047532769412388424,
      "loss": 2.8743,
      "step": 69425
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.866952657699585,
      "learning_rate": 0.0004753243748321015,
      "loss": 3.0321,
      "step": 69426
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.898679256439209,
      "learning_rate": 0.0004753210555077225,
      "loss": 3.1891,
      "step": 69427
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3298356533050537,
      "learning_rate": 0.00047531773615074816,
      "loss": 2.9458,
      "step": 69428
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6590229272842407,
      "learning_rate": 0.0004753144167611789,
      "loss": 3.1977,
      "step": 69429
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4898589849472046,
      "learning_rate": 0.00047531109733901534,
      "loss": 3.1833,
      "step": 69430
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4589747190475464,
      "learning_rate": 0.0004753077778842582,
      "loss": 3.0897,
      "step": 69431
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8763083219528198,
      "learning_rate": 0.00047530445839690796,
      "loss": 3.3421,
      "step": 69432
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.7999842166900635,
      "learning_rate": 0.0004753011388769654,
      "loss": 2.7808,
      "step": 69433
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9995707273483276,
      "learning_rate": 0.00047529781932443096,
      "loss": 3.0233,
      "step": 69434
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8194360733032227,
      "learning_rate": 0.0004752944997393054,
      "loss": 3.1486,
      "step": 69435
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7794109582901,
      "learning_rate": 0.0004752911801215893,
      "loss": 3.0956,
      "step": 69436
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5770972967147827,
      "learning_rate": 0.0004752878604712832,
      "loss": 2.881,
      "step": 69437
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0016634464263916,
      "learning_rate": 0.0004752845407883878,
      "loss": 3.1501,
      "step": 69438
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4862957000732422,
      "learning_rate": 0.00047528122107290367,
      "loss": 3.1067,
      "step": 69439
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0695836544036865,
      "learning_rate": 0.0004752779013248316,
      "loss": 2.8568,
      "step": 69440
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.824280023574829,
      "learning_rate": 0.00047527458154417195,
      "loss": 3.1897,
      "step": 69441
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.3936160802841187,
      "learning_rate": 0.0004752712617309254,
      "loss": 3.2264,
      "step": 69442
    },
    {
      "epoch": 0.9,
      "grad_norm": 3.0354726314544678,
      "learning_rate": 0.0004752679418850927,
      "loss": 2.9466,
      "step": 69443
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.524592399597168,
      "learning_rate": 0.0004752646220066743,
      "loss": 2.9897,
      "step": 69444
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3879899978637695,
      "learning_rate": 0.00047526130209567095,
      "loss": 3.1528,
      "step": 69445
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.984349489212036,
      "learning_rate": 0.0004752579821520832,
      "loss": 2.9734,
      "step": 69446
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.739443302154541,
      "learning_rate": 0.0004752546621759117,
      "loss": 3.0314,
      "step": 69447
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6695977449417114,
      "learning_rate": 0.00047525134216715707,
      "loss": 3.0614,
      "step": 69448
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.163616895675659,
      "learning_rate": 0.0004752480221258199,
      "loss": 3.0047,
      "step": 69449
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.50750732421875,
      "learning_rate": 0.00047524470205190076,
      "loss": 2.9515,
      "step": 69450
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2614853382110596,
      "learning_rate": 0.0004752413819454004,
      "loss": 3.0775,
      "step": 69451
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2418618202209473,
      "learning_rate": 0.00047523806180631924,
      "loss": 2.9275,
      "step": 69452
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8940831422805786,
      "learning_rate": 0.0004752347416346581,
      "loss": 3.0506,
      "step": 69453
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6152102947235107,
      "learning_rate": 0.0004752314214304175,
      "loss": 3.0209,
      "step": 69454
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6123952865600586,
      "learning_rate": 0.00047522810119359805,
      "loss": 2.8349,
      "step": 69455
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6521638631820679,
      "learning_rate": 0.00047522478092420047,
      "loss": 3.1414,
      "step": 69456
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7059497833251953,
      "learning_rate": 0.00047522146062222516,
      "loss": 3.1624,
      "step": 69457
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8877893686294556,
      "learning_rate": 0.00047521814028767304,
      "loss": 2.9454,
      "step": 69458
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6713162660598755,
      "learning_rate": 0.00047521481992054444,
      "loss": 3.1057,
      "step": 69459
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.895390510559082,
      "learning_rate": 0.00047521149952084007,
      "loss": 2.949,
      "step": 69460
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.310572862625122,
      "learning_rate": 0.0004752081790885607,
      "loss": 3.0436,
      "step": 69461
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7245509624481201,
      "learning_rate": 0.00047520485862370665,
      "loss": 3.0058,
      "step": 69462
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4444001913070679,
      "learning_rate": 0.00047520153812627885,
      "loss": 3.0499,
      "step": 69463
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4707392454147339,
      "learning_rate": 0.0004751982175962778,
      "loss": 3.0773,
      "step": 69464
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5559146404266357,
      "learning_rate": 0.00047519489703370406,
      "loss": 3.042,
      "step": 69465
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.441856861114502,
      "learning_rate": 0.0004751915764385582,
      "loss": 3.0208,
      "step": 69466
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.487405776977539,
      "learning_rate": 0.00047518825581084097,
      "loss": 3.2013,
      "step": 69467
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6930351257324219,
      "learning_rate": 0.000475184935150553,
      "loss": 2.9902,
      "step": 69468
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5743284225463867,
      "learning_rate": 0.0004751816144576947,
      "loss": 2.9274,
      "step": 69469
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5473259687423706,
      "learning_rate": 0.00047517829373226695,
      "loss": 3.024,
      "step": 69470
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7284400463104248,
      "learning_rate": 0.00047517497297427014,
      "loss": 2.862,
      "step": 69471
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7220563888549805,
      "learning_rate": 0.0004751716521837051,
      "loss": 3.0857,
      "step": 69472
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4730721712112427,
      "learning_rate": 0.0004751683313605724,
      "loss": 2.9741,
      "step": 69473
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.4454843997955322,
      "learning_rate": 0.00047516501050487247,
      "loss": 2.9956,
      "step": 69474
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3892300128936768,
      "learning_rate": 0.0004751616896166061,
      "loss": 2.9087,
      "step": 69475
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7162678241729736,
      "learning_rate": 0.0004751583686957739,
      "loss": 3.0527,
      "step": 69476
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8170104026794434,
      "learning_rate": 0.0004751550477423764,
      "loss": 2.8957,
      "step": 69477
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5045188665390015,
      "learning_rate": 0.0004751517267564143,
      "loss": 3.0701,
      "step": 69478
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4761654138565063,
      "learning_rate": 0.0004751484057378882,
      "loss": 3.0154,
      "step": 69479
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0835163593292236,
      "learning_rate": 0.00047514508468679865,
      "loss": 3.0065,
      "step": 69480
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7898154258728027,
      "learning_rate": 0.0004751417636031463,
      "loss": 2.9924,
      "step": 69481
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4359697103500366,
      "learning_rate": 0.0004751384424869319,
      "loss": 2.9609,
      "step": 69482
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4903240203857422,
      "learning_rate": 0.0004751351213381559,
      "loss": 2.8476,
      "step": 69483
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5149829387664795,
      "learning_rate": 0.000475131800156819,
      "loss": 2.8737,
      "step": 69484
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.622158408164978,
      "learning_rate": 0.0004751284789429218,
      "loss": 2.9658,
      "step": 69485
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7160835266113281,
      "learning_rate": 0.00047512515769646484,
      "loss": 3.0721,
      "step": 69486
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.6464694738388062,
      "learning_rate": 0.0004751218364174488,
      "loss": 3.1283,
      "step": 69487
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.6840362548828125,
      "learning_rate": 0.00047511851510587447,
      "loss": 3.2253,
      "step": 69488
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.9224568605422974,
      "learning_rate": 0.00047511519376174213,
      "loss": 3.197,
      "step": 69489
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.3202593326568604,
      "learning_rate": 0.0004751118723850526,
      "loss": 2.9092,
      "step": 69490
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.2721328735351562,
      "learning_rate": 0.00047510855097580656,
      "loss": 3.0206,
      "step": 69491
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8858212232589722,
      "learning_rate": 0.0004751052295340044,
      "loss": 3.0278,
      "step": 69492
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5779064893722534,
      "learning_rate": 0.0004751019080596469,
      "loss": 2.9741,
      "step": 69493
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.7972604036331177,
      "learning_rate": 0.00047509858655273474,
      "loss": 3.1785,
      "step": 69494
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0796592235565186,
      "learning_rate": 0.0004750952650132684,
      "loss": 2.9549,
      "step": 69495
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.70704984664917,
      "learning_rate": 0.00047509194344124854,
      "loss": 3.2355,
      "step": 69496
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.466861367225647,
      "learning_rate": 0.0004750886218366758,
      "loss": 2.7181,
      "step": 69497
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.500619888305664,
      "learning_rate": 0.0004750853001995508,
      "loss": 2.9375,
      "step": 69498
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.5730031728744507,
      "learning_rate": 0.0004750819785298741,
      "loss": 3.0699,
      "step": 69499
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.472078800201416,
      "learning_rate": 0.00047507865682764634,
      "loss": 2.9691,
      "step": 69500
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0271353721618652,
      "learning_rate": 0.0004750753350928682,
      "loss": 2.9785,
      "step": 69501
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.8549156188964844,
      "learning_rate": 0.0004750720133255402,
      "loss": 2.979,
      "step": 69502
    },
    {
      "epoch": 0.9,
      "grad_norm": 2.0180156230926514,
      "learning_rate": 0.00047506869152566305,
      "loss": 3.0029,
      "step": 69503
    },
    {
      "epoch": 0.9,
      "grad_norm": 1.4521896839141846,
      "learning_rate": 0.00047506536969323733,
      "loss": 2.9405,
      "step": 69504
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.701522946357727,
      "learning_rate": 0.00047506204782826365,
      "loss": 3.0971,
      "step": 69505
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5172322988510132,
      "learning_rate": 0.0004750587259307426,
      "loss": 3.0738,
      "step": 69506
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7789732217788696,
      "learning_rate": 0.0004750554040006748,
      "loss": 2.9271,
      "step": 69507
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8817747831344604,
      "learning_rate": 0.000475052082038061,
      "loss": 3.0158,
      "step": 69508
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6416070461273193,
      "learning_rate": 0.0004750487600429016,
      "loss": 2.8424,
      "step": 69509
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6685611009597778,
      "learning_rate": 0.00047504543801519736,
      "loss": 3.2237,
      "step": 69510
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.459684133529663,
      "learning_rate": 0.00047504211595494883,
      "loss": 3.0891,
      "step": 69511
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.719766616821289,
      "learning_rate": 0.0004750387938621568,
      "loss": 3.1372,
      "step": 69512
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5266402959823608,
      "learning_rate": 0.00047503547173682165,
      "loss": 3.0178,
      "step": 69513
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.363903522491455,
      "learning_rate": 0.00047503214957894414,
      "loss": 2.9189,
      "step": 69514
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4475507736206055,
      "learning_rate": 0.00047502882738852483,
      "loss": 3.1296,
      "step": 69515
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7509909868240356,
      "learning_rate": 0.00047502550516556434,
      "loss": 3.0222,
      "step": 69516
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5328491926193237,
      "learning_rate": 0.00047502218291006336,
      "loss": 2.9041,
      "step": 69517
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5400145053863525,
      "learning_rate": 0.00047501886062202244,
      "loss": 3.3302,
      "step": 69518
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5303924083709717,
      "learning_rate": 0.0004750155383014422,
      "loss": 3.172,
      "step": 69519
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.294405698776245,
      "learning_rate": 0.0004750122159483232,
      "loss": 3.1204,
      "step": 69520
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9721953868865967,
      "learning_rate": 0.0004750088935626662,
      "loss": 3.1707,
      "step": 69521
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5965884923934937,
      "learning_rate": 0.00047500557114447175,
      "loss": 2.945,
      "step": 69522
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4765373468399048,
      "learning_rate": 0.00047500224869374044,
      "loss": 3.1891,
      "step": 69523
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.965462565422058,
      "learning_rate": 0.0004749989262104729,
      "loss": 2.9696,
      "step": 69524
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6536444425582886,
      "learning_rate": 0.0004749956036946698,
      "loss": 2.9983,
      "step": 69525
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2612555027008057,
      "learning_rate": 0.0004749922811463316,
      "loss": 2.9087,
      "step": 69526
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7327899932861328,
      "learning_rate": 0.0004749889585654592,
      "loss": 3.3185,
      "step": 69527
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5698988437652588,
      "learning_rate": 0.00047498563595205293,
      "loss": 3.0202,
      "step": 69528
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.698434591293335,
      "learning_rate": 0.0004749823133061136,
      "loss": 3.0781,
      "step": 69529
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6474967002868652,
      "learning_rate": 0.00047497899062764164,
      "loss": 3.0467,
      "step": 69530
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.651803970336914,
      "learning_rate": 0.0004749756679166379,
      "loss": 3.2488,
      "step": 69531
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6834501028060913,
      "learning_rate": 0.0004749723451731029,
      "loss": 3.0962,
      "step": 69532
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8123937845230103,
      "learning_rate": 0.0004749690223970371,
      "loss": 3.1008,
      "step": 69533
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.734433889389038,
      "learning_rate": 0.00047496569958844137,
      "loss": 3.2593,
      "step": 69534
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5959044694900513,
      "learning_rate": 0.0004749623767473162,
      "loss": 3.1496,
      "step": 69535
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5825884342193604,
      "learning_rate": 0.00047495905387366215,
      "loss": 2.9736,
      "step": 69536
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8439019918441772,
      "learning_rate": 0.00047495573096748,
      "loss": 2.972,
      "step": 69537
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6085145473480225,
      "learning_rate": 0.00047495240802877017,
      "loss": 3.1666,
      "step": 69538
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.395562767982483,
      "learning_rate": 0.0004749490850575335,
      "loss": 3.2404,
      "step": 69539
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8384045362472534,
      "learning_rate": 0.0004749457620537704,
      "loss": 3.1473,
      "step": 69540
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.446720004081726,
      "learning_rate": 0.0004749424390174816,
      "loss": 2.812,
      "step": 69541
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5649003982543945,
      "learning_rate": 0.00047493911594866773,
      "loss": 3.491,
      "step": 69542
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5609478950500488,
      "learning_rate": 0.00047493579284732935,
      "loss": 3.135,
      "step": 69543
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.668194055557251,
      "learning_rate": 0.00047493246971346714,
      "loss": 3.0362,
      "step": 69544
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7569770812988281,
      "learning_rate": 0.0004749291465470816,
      "loss": 3.0932,
      "step": 69545
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5769298076629639,
      "learning_rate": 0.00047492582334817356,
      "loss": 2.862,
      "step": 69546
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.571254849433899,
      "learning_rate": 0.00047492250011674345,
      "loss": 3.4385,
      "step": 69547
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7740846872329712,
      "learning_rate": 0.0004749191768527919,
      "loss": 2.9945,
      "step": 69548
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8441606760025024,
      "learning_rate": 0.0004749158535563196,
      "loss": 3.0274,
      "step": 69549
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6173701286315918,
      "learning_rate": 0.0004749125302273272,
      "loss": 2.7314,
      "step": 69550
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.591652274131775,
      "learning_rate": 0.0004749092068658152,
      "loss": 2.9341,
      "step": 69551
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.660372257232666,
      "learning_rate": 0.0004749058834717843,
      "loss": 3.1501,
      "step": 69552
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7493828535079956,
      "learning_rate": 0.000474902560045235,
      "loss": 2.9151,
      "step": 69553
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2701289653778076,
      "learning_rate": 0.0004748992365861681,
      "loss": 2.8627,
      "step": 69554
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6922683715820312,
      "learning_rate": 0.0004748959130945842,
      "loss": 2.9893,
      "step": 69555
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.6561806201934814,
      "learning_rate": 0.0004748925895704837,
      "loss": 2.8962,
      "step": 69556
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5411869287490845,
      "learning_rate": 0.0004748892660138674,
      "loss": 2.7636,
      "step": 69557
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.0451762676239014,
      "learning_rate": 0.000474885942424736,
      "loss": 2.93,
      "step": 69558
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9376977682113647,
      "learning_rate": 0.00047488261880308986,
      "loss": 3.2339,
      "step": 69559
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9462271928787231,
      "learning_rate": 0.00047487929514892986,
      "loss": 3.0233,
      "step": 69560
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.934003233909607,
      "learning_rate": 0.00047487597146225643,
      "loss": 3.1339,
      "step": 69561
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.0672125816345215,
      "learning_rate": 0.0004748726477430702,
      "loss": 2.9534,
      "step": 69562
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.599043607711792,
      "learning_rate": 0.0004748693239913719,
      "loss": 2.869,
      "step": 69563
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.183511972427368,
      "learning_rate": 0.00047486600020716214,
      "loss": 3.1398,
      "step": 69564
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9185420274734497,
      "learning_rate": 0.0004748626763904414,
      "loss": 3.0856,
      "step": 69565
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9923069477081299,
      "learning_rate": 0.0004748593525412105,
      "loss": 2.9416,
      "step": 69566
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.536558747291565,
      "learning_rate": 0.0004748560286594699,
      "loss": 3.1592,
      "step": 69567
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4315968751907349,
      "learning_rate": 0.0004748527047452203,
      "loss": 3.2904,
      "step": 69568
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8447036743164062,
      "learning_rate": 0.00047484938079846216,
      "loss": 2.9513,
      "step": 69569
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5333210229873657,
      "learning_rate": 0.0004748460568191963,
      "loss": 3.1515,
      "step": 69570
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.048955202102661,
      "learning_rate": 0.00047484273280742325,
      "loss": 3.1494,
      "step": 69571
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4426523447036743,
      "learning_rate": 0.0004748394087631436,
      "loss": 3.098,
      "step": 69572
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.01251220703125,
      "learning_rate": 0.00047483608468635804,
      "loss": 3.0123,
      "step": 69573
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3451390266418457,
      "learning_rate": 0.00047483276057706716,
      "loss": 3.1021,
      "step": 69574
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.3641183376312256,
      "learning_rate": 0.0004748294364352715,
      "loss": 3.1448,
      "step": 69575
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6156131029129028,
      "learning_rate": 0.00047482611226097186,
      "loss": 3.0583,
      "step": 69576
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7591187953948975,
      "learning_rate": 0.0004748227880541687,
      "loss": 3.2841,
      "step": 69577
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7498128414154053,
      "learning_rate": 0.0004748194638148627,
      "loss": 3.1961,
      "step": 69578
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5541361570358276,
      "learning_rate": 0.0004748161395430544,
      "loss": 3.0756,
      "step": 69579
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8170725107192993,
      "learning_rate": 0.0004748128152387445,
      "loss": 3.0451,
      "step": 69580
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6245094537734985,
      "learning_rate": 0.0004748094909019336,
      "loss": 3.0187,
      "step": 69581
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.1086783409118652,
      "learning_rate": 0.0004748061665326223,
      "loss": 2.9663,
      "step": 69582
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.53594970703125,
      "learning_rate": 0.00047480284213081135,
      "loss": 3.2152,
      "step": 69583
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9442428350448608,
      "learning_rate": 0.00047479951769650115,
      "loss": 3.0766,
      "step": 69584
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8152886629104614,
      "learning_rate": 0.0004747961932296924,
      "loss": 2.9668,
      "step": 69585
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7732338905334473,
      "learning_rate": 0.0004747928687303858,
      "loss": 3.2281,
      "step": 69586
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.10431170463562,
      "learning_rate": 0.0004747895441985819,
      "loss": 3.007,
      "step": 69587
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4872149229049683,
      "learning_rate": 0.00047478621963428124,
      "loss": 3.0507,
      "step": 69588
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5277713537216187,
      "learning_rate": 0.00047478289503748465,
      "loss": 2.974,
      "step": 69589
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.635434627532959,
      "learning_rate": 0.00047477957040819256,
      "loss": 3.1591,
      "step": 69590
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6671032905578613,
      "learning_rate": 0.00047477624574640564,
      "loss": 2.8681,
      "step": 69591
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6184571981430054,
      "learning_rate": 0.00047477292105212453,
      "loss": 3.1349,
      "step": 69592
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6621348857879639,
      "learning_rate": 0.00047476959632534977,
      "loss": 3.0736,
      "step": 69593
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.422908067703247,
      "learning_rate": 0.00047476627156608207,
      "loss": 3.0761,
      "step": 69594
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8165565729141235,
      "learning_rate": 0.00047476294677432213,
      "loss": 2.8678,
      "step": 69595
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9574300050735474,
      "learning_rate": 0.00047475962195007033,
      "loss": 3.1292,
      "step": 69596
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2609596252441406,
      "learning_rate": 0.0004747562970933275,
      "loss": 3.1967,
      "step": 69597
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5499343872070312,
      "learning_rate": 0.00047475297220409414,
      "loss": 3.0449,
      "step": 69598
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.53414785861969,
      "learning_rate": 0.0004747496472823709,
      "loss": 2.9879,
      "step": 69599
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.42574143409729,
      "learning_rate": 0.00047474632232815837,
      "loss": 3.0191,
      "step": 69600
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4710321426391602,
      "learning_rate": 0.00047474299734145725,
      "loss": 3.0099,
      "step": 69601
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5289274454116821,
      "learning_rate": 0.0004747396723222681,
      "loss": 2.9502,
      "step": 69602
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.9575345516204834,
      "learning_rate": 0.00047473634727059153,
      "loss": 2.7854,
      "step": 69603
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4590603113174438,
      "learning_rate": 0.00047473302218642823,
      "loss": 3.0594,
      "step": 69604
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5013045072555542,
      "learning_rate": 0.00047472969706977865,
      "loss": 3.0341,
      "step": 69605
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8002268075942993,
      "learning_rate": 0.00047472637192064366,
      "loss": 2.9311,
      "step": 69606
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4855825901031494,
      "learning_rate": 0.00047472304673902365,
      "loss": 2.9695,
      "step": 69607
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4942104816436768,
      "learning_rate": 0.0004747197215249193,
      "loss": 2.9879,
      "step": 69608
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5343221426010132,
      "learning_rate": 0.00047471639627833136,
      "loss": 3.0142,
      "step": 69609
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.555053472518921,
      "learning_rate": 0.0004747130709992603,
      "loss": 3.4078,
      "step": 69610
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4535717964172363,
      "learning_rate": 0.00047470974568770676,
      "loss": 3.1269,
      "step": 69611
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8772428035736084,
      "learning_rate": 0.0004747064203436714,
      "loss": 2.8987,
      "step": 69612
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.424619197845459,
      "learning_rate": 0.0004747030949671548,
      "loss": 3.255,
      "step": 69613
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8892161846160889,
      "learning_rate": 0.0004746997695581576,
      "loss": 3.1517,
      "step": 69614
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.462576985359192,
      "learning_rate": 0.00047469644411668037,
      "loss": 3.1126,
      "step": 69615
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8953560590744019,
      "learning_rate": 0.0004746931186427239,
      "loss": 3.2172,
      "step": 69616
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6893117427825928,
      "learning_rate": 0.0004746897931362886,
      "loss": 3.1546,
      "step": 69617
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.207002639770508,
      "learning_rate": 0.00047468646759737514,
      "loss": 3.0354,
      "step": 69618
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.941172480583191,
      "learning_rate": 0.00047468314202598423,
      "loss": 3.0626,
      "step": 69619
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5695157051086426,
      "learning_rate": 0.0004746798164221164,
      "loss": 3.2424,
      "step": 69620
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.7607333660125732,
      "learning_rate": 0.0004746764907857723,
      "loss": 3.1948,
      "step": 69621
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.4412026405334473,
      "learning_rate": 0.0004746731651169525,
      "loss": 3.0009,
      "step": 69622
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5611523389816284,
      "learning_rate": 0.00047466983941565774,
      "loss": 2.9782,
      "step": 69623
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.588127851486206,
      "learning_rate": 0.00047466651368188853,
      "loss": 2.8331,
      "step": 69624
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9189589023590088,
      "learning_rate": 0.0004746631879156455,
      "loss": 3.1305,
      "step": 69625
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9330885410308838,
      "learning_rate": 0.0004746598621169293,
      "loss": 3.0528,
      "step": 69626
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6121773719787598,
      "learning_rate": 0.0004746565362857405,
      "loss": 3.0012,
      "step": 69627
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.141930103302002,
      "learning_rate": 0.00047465321042207984,
      "loss": 3.0256,
      "step": 69628
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.3317995071411133,
      "learning_rate": 0.00047464988452594777,
      "loss": 3.0341,
      "step": 69629
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5125974416732788,
      "learning_rate": 0.000474646558597345,
      "loss": 2.9376,
      "step": 69630
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.104438304901123,
      "learning_rate": 0.00047464323263627213,
      "loss": 2.9323,
      "step": 69631
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.300539255142212,
      "learning_rate": 0.00047463990664272987,
      "loss": 2.9603,
      "step": 69632
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9050098657608032,
      "learning_rate": 0.00047463658061671856,
      "loss": 3.0453,
      "step": 69633
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7436001300811768,
      "learning_rate": 0.0004746332545582392,
      "loss": 3.2755,
      "step": 69634
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.1680076122283936,
      "learning_rate": 0.0004746299284672922,
      "loss": 2.9074,
      "step": 69635
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9864059686660767,
      "learning_rate": 0.0004746266023438782,
      "loss": 3.134,
      "step": 69636
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9372525215148926,
      "learning_rate": 0.00047462327618799774,
      "loss": 3.1001,
      "step": 69637
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7451684474945068,
      "learning_rate": 0.0004746199499996516,
      "loss": 2.9919,
      "step": 69638
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.805038571357727,
      "learning_rate": 0.0004746166237788402,
      "loss": 2.8848,
      "step": 69639
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8624985218048096,
      "learning_rate": 0.0004746132975255644,
      "loss": 2.8049,
      "step": 69640
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7396436929702759,
      "learning_rate": 0.00047460997123982464,
      "loss": 3.2225,
      "step": 69641
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.140483856201172,
      "learning_rate": 0.0004746066449216216,
      "loss": 3.0377,
      "step": 69642
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3762716054916382,
      "learning_rate": 0.00047460331857095584,
      "loss": 3.1046,
      "step": 69643
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.741299033164978,
      "learning_rate": 0.0004745999921878281,
      "loss": 2.9381,
      "step": 69644
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.804853081703186,
      "learning_rate": 0.0004745966657722389,
      "loss": 3.0641,
      "step": 69645
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.795728325843811,
      "learning_rate": 0.0004745933393241889,
      "loss": 3.2794,
      "step": 69646
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7615934610366821,
      "learning_rate": 0.00047459001284367864,
      "loss": 2.8174,
      "step": 69647
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7707902193069458,
      "learning_rate": 0.0004745866863307089,
      "loss": 3.2695,
      "step": 69648
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7580214738845825,
      "learning_rate": 0.00047458335978528,
      "loss": 3.1756,
      "step": 69649
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.079744815826416,
      "learning_rate": 0.000474580033207393,
      "loss": 3.1662,
      "step": 69650
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7213788032531738,
      "learning_rate": 0.00047457670659704817,
      "loss": 3.1494,
      "step": 69651
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5729140043258667,
      "learning_rate": 0.0004745733799542462,
      "loss": 2.9379,
      "step": 69652
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7776762247085571,
      "learning_rate": 0.0004745700532789879,
      "loss": 2.8574,
      "step": 69653
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5712536573410034,
      "learning_rate": 0.0004745667265712735,
      "loss": 2.8907,
      "step": 69654
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.727182149887085,
      "learning_rate": 0.00047456339983110394,
      "loss": 2.9903,
      "step": 69655
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.662868857383728,
      "learning_rate": 0.00047456007305847985,
      "loss": 3.0008,
      "step": 69656
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3206040859222412,
      "learning_rate": 0.00047455674625340163,
      "loss": 3.3562,
      "step": 69657
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.981814980506897,
      "learning_rate": 0.00047455341941587004,
      "loss": 3.2536,
      "step": 69658
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7429263591766357,
      "learning_rate": 0.00047455009254588574,
      "loss": 3.3588,
      "step": 69659
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9099550247192383,
      "learning_rate": 0.00047454676564344925,
      "loss": 3.1105,
      "step": 69660
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4424331188201904,
      "learning_rate": 0.00047454343870856114,
      "loss": 3.2457,
      "step": 69661
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5278874635696411,
      "learning_rate": 0.0004745401117412222,
      "loss": 3.0484,
      "step": 69662
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8499712944030762,
      "learning_rate": 0.0004745367847414329,
      "loss": 3.1683,
      "step": 69663
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.549928903579712,
      "learning_rate": 0.000474533457709194,
      "loss": 2.8651,
      "step": 69664
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5972460508346558,
      "learning_rate": 0.000474530130644506,
      "loss": 3.1797,
      "step": 69665
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.4782216548919678,
      "learning_rate": 0.00047452680354736956,
      "loss": 3.1082,
      "step": 69666
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.420349359512329,
      "learning_rate": 0.00047452347641778524,
      "loss": 2.907,
      "step": 69667
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.633908748626709,
      "learning_rate": 0.00047452014925575384,
      "loss": 3.027,
      "step": 69668
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8473674058914185,
      "learning_rate": 0.0004745168220612757,
      "loss": 3.142,
      "step": 69669
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.1456782817840576,
      "learning_rate": 0.00047451349483435164,
      "loss": 2.8949,
      "step": 69670
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8163018226623535,
      "learning_rate": 0.0004745101675749823,
      "loss": 3.0316,
      "step": 69671
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.1619226932525635,
      "learning_rate": 0.00047450684028316805,
      "loss": 2.8193,
      "step": 69672
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.812115430831909,
      "learning_rate": 0.0004745035129589098,
      "loss": 2.7617,
      "step": 69673
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6304289102554321,
      "learning_rate": 0.00047450018560220807,
      "loss": 3.0975,
      "step": 69674
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5915684700012207,
      "learning_rate": 0.00047449685821306347,
      "loss": 3.0434,
      "step": 69675
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.6552839279174805,
      "learning_rate": 0.0004744935307914765,
      "loss": 3.0716,
      "step": 69676
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.0592026710510254,
      "learning_rate": 0.000474490203337448,
      "loss": 3.1913,
      "step": 69677
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7436153888702393,
      "learning_rate": 0.0004744868758509784,
      "loss": 2.7942,
      "step": 69678
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8867239952087402,
      "learning_rate": 0.0004744835483320685,
      "loss": 3.0698,
      "step": 69679
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3052728176116943,
      "learning_rate": 0.0004744802207807188,
      "loss": 2.8317,
      "step": 69680
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8749611377716064,
      "learning_rate": 0.00047447689319692976,
      "loss": 3.0226,
      "step": 69681
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3756762742996216,
      "learning_rate": 0.00047447356558070233,
      "loss": 2.944,
      "step": 69682
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7434011697769165,
      "learning_rate": 0.000474470237932037,
      "loss": 3.0246,
      "step": 69683
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.284120798110962,
      "learning_rate": 0.00047446691025093425,
      "loss": 3.2576,
      "step": 69684
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4157984256744385,
      "learning_rate": 0.0004744635825373949,
      "loss": 3.0274,
      "step": 69685
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.659483551979065,
      "learning_rate": 0.00047446025479141936,
      "loss": 3.0351,
      "step": 69686
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.50879967212677,
      "learning_rate": 0.0004744569270130085,
      "loss": 2.9897,
      "step": 69687
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7204772233963013,
      "learning_rate": 0.0004744535992021627,
      "loss": 3.0397,
      "step": 69688
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5051145553588867,
      "learning_rate": 0.00047445027135888275,
      "loss": 3.0,
      "step": 69689
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4546878337860107,
      "learning_rate": 0.00047444694348316915,
      "loss": 3.137,
      "step": 69690
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.7060928344726562,
      "learning_rate": 0.00047444361557502266,
      "loss": 3.0232,
      "step": 69691
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8481789827346802,
      "learning_rate": 0.00047444028763444373,
      "loss": 3.141,
      "step": 69692
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.0217247009277344,
      "learning_rate": 0.000474436959661433,
      "loss": 2.9397,
      "step": 69693
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.637986421585083,
      "learning_rate": 0.00047443363165599125,
      "loss": 3.0488,
      "step": 69694
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4187915325164795,
      "learning_rate": 0.000474430303618119,
      "loss": 3.1025,
      "step": 69695
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5217498540878296,
      "learning_rate": 0.00047442697554781686,
      "loss": 3.1757,
      "step": 69696
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6182007789611816,
      "learning_rate": 0.00047442364744508536,
      "loss": 3.0562,
      "step": 69697
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.456652283668518,
      "learning_rate": 0.0004744203193099253,
      "loss": 2.9822,
      "step": 69698
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7382465600967407,
      "learning_rate": 0.00047441699114233717,
      "loss": 3.0306,
      "step": 69699
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4817869663238525,
      "learning_rate": 0.0004744136629423216,
      "loss": 3.0142,
      "step": 69700
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.320318579673767,
      "learning_rate": 0.00047441033470987937,
      "loss": 2.9287,
      "step": 69701
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6288516521453857,
      "learning_rate": 0.00047440700644501085,
      "loss": 3.1425,
      "step": 69702
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6515676975250244,
      "learning_rate": 0.0004744036781477167,
      "loss": 3.1452,
      "step": 69703
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3478730916976929,
      "learning_rate": 0.0004744003498179978,
      "loss": 2.9372,
      "step": 69704
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8954936265945435,
      "learning_rate": 0.0004743970214558545,
      "loss": 2.924,
      "step": 69705
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4496748447418213,
      "learning_rate": 0.00047439369306128754,
      "loss": 2.9475,
      "step": 69706
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9513394832611084,
      "learning_rate": 0.0004743903646342974,
      "loss": 3.0705,
      "step": 69707
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.211146354675293,
      "learning_rate": 0.0004743870361748849,
      "loss": 2.9431,
      "step": 69708
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.1430020332336426,
      "learning_rate": 0.0004743837076830505,
      "loss": 2.9313,
      "step": 69709
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4337605237960815,
      "learning_rate": 0.0004743803791587949,
      "loss": 3.1722,
      "step": 69710
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7451956272125244,
      "learning_rate": 0.0004743770506021187,
      "loss": 3.0022,
      "step": 69711
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.1669225692749023,
      "learning_rate": 0.00047437372201302255,
      "loss": 2.9013,
      "step": 69712
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6819521188735962,
      "learning_rate": 0.000474370393391507,
      "loss": 2.9819,
      "step": 69713
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6318727731704712,
      "learning_rate": 0.0004743670647375726,
      "loss": 3.2792,
      "step": 69714
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.4342200756073,
      "learning_rate": 0.0004743637360512202,
      "loss": 3.14,
      "step": 69715
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.977299451828003,
      "learning_rate": 0.0004743604073324502,
      "loss": 3.0985,
      "step": 69716
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8548624515533447,
      "learning_rate": 0.0004743570785812634,
      "loss": 2.9675,
      "step": 69717
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6743942499160767,
      "learning_rate": 0.0004743537497976603,
      "loss": 2.9711,
      "step": 69718
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5608537197113037,
      "learning_rate": 0.00047435042098164154,
      "loss": 2.6351,
      "step": 69719
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6158016920089722,
      "learning_rate": 0.00047434709213320776,
      "loss": 2.9212,
      "step": 69720
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7000272274017334,
      "learning_rate": 0.00047434376325235954,
      "loss": 2.9536,
      "step": 69721
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6759099960327148,
      "learning_rate": 0.0004743404343390976,
      "loss": 3.0196,
      "step": 69722
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7724021673202515,
      "learning_rate": 0.00047433710539342236,
      "loss": 2.9119,
      "step": 69723
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5538275241851807,
      "learning_rate": 0.0004743337764153346,
      "loss": 2.8543,
      "step": 69724
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.596392273902893,
      "learning_rate": 0.00047433044740483493,
      "loss": 3.0706,
      "step": 69725
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.02725887298584,
      "learning_rate": 0.00047432711836192397,
      "loss": 3.0291,
      "step": 69726
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7429441213607788,
      "learning_rate": 0.00047432378928660224,
      "loss": 3.0578,
      "step": 69727
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6919984817504883,
      "learning_rate": 0.0004743204601788704,
      "loss": 3.2225,
      "step": 69728
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8069325685501099,
      "learning_rate": 0.0004743171310387292,
      "loss": 3.3098,
      "step": 69729
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.1452507972717285,
      "learning_rate": 0.0004743138018661791,
      "loss": 3.0857,
      "step": 69730
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6328452825546265,
      "learning_rate": 0.0004743104726612207,
      "loss": 2.9169,
      "step": 69731
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.284816265106201,
      "learning_rate": 0.0004743071434238548,
      "loss": 3.0548,
      "step": 69732
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.614189624786377,
      "learning_rate": 0.0004743038141540819,
      "loss": 2.8749,
      "step": 69733
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6706864833831787,
      "learning_rate": 0.0004743004848519026,
      "loss": 2.8993,
      "step": 69734
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4566733837127686,
      "learning_rate": 0.00047429715551731755,
      "loss": 2.9909,
      "step": 69735
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.3559117317199707,
      "learning_rate": 0.0004742938261503274,
      "loss": 3.0767,
      "step": 69736
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4561700820922852,
      "learning_rate": 0.0004742904967509327,
      "loss": 2.8932,
      "step": 69737
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.30971360206604,
      "learning_rate": 0.00047428716731913413,
      "loss": 3.2803,
      "step": 69738
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7753071784973145,
      "learning_rate": 0.0004742838378549323,
      "loss": 3.067,
      "step": 69739
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4842113256454468,
      "learning_rate": 0.00047428050835832776,
      "loss": 3.062,
      "step": 69740
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7741377353668213,
      "learning_rate": 0.0004742771788293212,
      "loss": 3.2642,
      "step": 69741
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5003862380981445,
      "learning_rate": 0.00047427384926791325,
      "loss": 3.0239,
      "step": 69742
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.849882960319519,
      "learning_rate": 0.0004742705196741044,
      "loss": 2.9142,
      "step": 69743
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7492345571517944,
      "learning_rate": 0.00047426719004789556,
      "loss": 3.1563,
      "step": 69744
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.754055142402649,
      "learning_rate": 0.00047426386038928705,
      "loss": 2.8486,
      "step": 69745
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7481043338775635,
      "learning_rate": 0.0004742605306982796,
      "loss": 2.9813,
      "step": 69746
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8002794981002808,
      "learning_rate": 0.00047425720097487385,
      "loss": 3.0102,
      "step": 69747
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5532232522964478,
      "learning_rate": 0.0004742538712190704,
      "loss": 2.7964,
      "step": 69748
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5295358896255493,
      "learning_rate": 0.0004742505414308698,
      "loss": 3.2005,
      "step": 69749
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9541165828704834,
      "learning_rate": 0.00047424721161027283,
      "loss": 2.8709,
      "step": 69750
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.7188186645507812,
      "learning_rate": 0.00047424388175728,
      "loss": 3.0721,
      "step": 69751
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6374567747116089,
      "learning_rate": 0.0004742405518718919,
      "loss": 3.3602,
      "step": 69752
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.69222092628479,
      "learning_rate": 0.00047423722195410925,
      "loss": 2.7535,
      "step": 69753
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7521215677261353,
      "learning_rate": 0.0004742338920039325,
      "loss": 3.0511,
      "step": 69754
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9881210327148438,
      "learning_rate": 0.0004742305620213624,
      "loss": 2.9345,
      "step": 69755
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.729407787322998,
      "learning_rate": 0.0004742272320063997,
      "loss": 3.0619,
      "step": 69756
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.4794764518737793,
      "learning_rate": 0.00047422390195904474,
      "loss": 2.9724,
      "step": 69757
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5150132179260254,
      "learning_rate": 0.0004742205718792983,
      "loss": 3.2672,
      "step": 69758
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2693357467651367,
      "learning_rate": 0.00047421724176716095,
      "loss": 3.1884,
      "step": 69759
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.174180269241333,
      "learning_rate": 0.0004742139116226333,
      "loss": 3.2812,
      "step": 69760
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.439883828163147,
      "learning_rate": 0.000474210581445716,
      "loss": 3.1727,
      "step": 69761
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4435317516326904,
      "learning_rate": 0.0004742072512364097,
      "loss": 3.0602,
      "step": 69762
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8038560152053833,
      "learning_rate": 0.000474203920994715,
      "loss": 3.0725,
      "step": 69763
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.867180585861206,
      "learning_rate": 0.0004742005907206325,
      "loss": 3.1154,
      "step": 69764
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3305765390396118,
      "learning_rate": 0.0004741972604141628,
      "loss": 3.2606,
      "step": 69765
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8584415912628174,
      "learning_rate": 0.00047419393007530657,
      "loss": 2.8928,
      "step": 69766
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5710490942001343,
      "learning_rate": 0.0004741905997040644,
      "loss": 3.0116,
      "step": 69767
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7291862964630127,
      "learning_rate": 0.0004741872693004368,
      "loss": 3.0014,
      "step": 69768
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7875210046768188,
      "learning_rate": 0.00047418393886442466,
      "loss": 3.0713,
      "step": 69769
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4057183265686035,
      "learning_rate": 0.0004741806083960283,
      "loss": 3.1341,
      "step": 69770
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.723327875137329,
      "learning_rate": 0.00047417727789524853,
      "loss": 2.9304,
      "step": 69771
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.319814682006836,
      "learning_rate": 0.000474173947362086,
      "loss": 3.1066,
      "step": 69772
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.013847589492798,
      "learning_rate": 0.00047417061679654114,
      "loss": 2.8321,
      "step": 69773
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.158478260040283,
      "learning_rate": 0.00047416728619861467,
      "loss": 3.3097,
      "step": 69774
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9236114025115967,
      "learning_rate": 0.0004741639555683073,
      "loss": 3.0104,
      "step": 69775
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5233663320541382,
      "learning_rate": 0.0004741606249056195,
      "loss": 3.0149,
      "step": 69776
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7813862562179565,
      "learning_rate": 0.000474157294210552,
      "loss": 2.7549,
      "step": 69777
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8985735177993774,
      "learning_rate": 0.00047415396348310536,
      "loss": 3.2068,
      "step": 69778
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7818752527236938,
      "learning_rate": 0.00047415063272328013,
      "loss": 2.8326,
      "step": 69779
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.10079288482666,
      "learning_rate": 0.0004741473019310771,
      "loss": 3.0607,
      "step": 69780
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6419553756713867,
      "learning_rate": 0.0004741439711064967,
      "loss": 2.9228,
      "step": 69781
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4269671440124512,
      "learning_rate": 0.00047414064024953976,
      "loss": 2.9433,
      "step": 69782
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.731075644493103,
      "learning_rate": 0.00047413730936020674,
      "loss": 3.0707,
      "step": 69783
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.794862151145935,
      "learning_rate": 0.00047413397843849825,
      "loss": 3.1999,
      "step": 69784
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6427597999572754,
      "learning_rate": 0.00047413064748441506,
      "loss": 3.2416,
      "step": 69785
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5883522033691406,
      "learning_rate": 0.00047412731649795775,
      "loss": 2.9681,
      "step": 69786
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4733881950378418,
      "learning_rate": 0.0004741239854791267,
      "loss": 2.8886,
      "step": 69787
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5570752620697021,
      "learning_rate": 0.00047412065442792284,
      "loss": 3.1296,
      "step": 69788
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7363513708114624,
      "learning_rate": 0.00047411732334434655,
      "loss": 3.2606,
      "step": 69789
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8296085596084595,
      "learning_rate": 0.0004741139922283987,
      "loss": 3.1062,
      "step": 69790
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5359809398651123,
      "learning_rate": 0.00047411066108007976,
      "loss": 2.951,
      "step": 69791
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8042559623718262,
      "learning_rate": 0.0004741073298993903,
      "loss": 3.1131,
      "step": 69792
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7842791080474854,
      "learning_rate": 0.0004741039986863311,
      "loss": 3.2403,
      "step": 69793
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6943320035934448,
      "learning_rate": 0.0004741006674409025,
      "loss": 3.0285,
      "step": 69794
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5205949544906616,
      "learning_rate": 0.0004740973361631054,
      "loss": 2.8739,
      "step": 69795
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6176475286483765,
      "learning_rate": 0.00047409400485294036,
      "loss": 3.0748,
      "step": 69796
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7882304191589355,
      "learning_rate": 0.00047409067351040785,
      "loss": 2.8016,
      "step": 69797
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7616548538208008,
      "learning_rate": 0.0004740873421355087,
      "loss": 2.9756,
      "step": 69798
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.257719039916992,
      "learning_rate": 0.0004740840107282435,
      "loss": 3.0098,
      "step": 69799
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.0028560161590576,
      "learning_rate": 0.00047408067928861265,
      "loss": 2.7578,
      "step": 69800
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.834276556968689,
      "learning_rate": 0.0004740773478166169,
      "loss": 2.9888,
      "step": 69801
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.94013512134552,
      "learning_rate": 0.00047407401631225694,
      "loss": 3.194,
      "step": 69802
    },
    {
      "epoch": 0.91,
      "grad_norm": 4.624435901641846,
      "learning_rate": 0.00047407068477553336,
      "loss": 3.2506,
      "step": 69803
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.085920572280884,
      "learning_rate": 0.00047406735320644673,
      "loss": 3.0536,
      "step": 69804
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.781055212020874,
      "learning_rate": 0.00047406402160499775,
      "loss": 2.9365,
      "step": 69805
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.843888759613037,
      "learning_rate": 0.00047406068997118684,
      "loss": 3.1463,
      "step": 69806
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.249659538269043,
      "learning_rate": 0.0004740573583050148,
      "loss": 3.1763,
      "step": 69807
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9277573823928833,
      "learning_rate": 0.00047405402660648233,
      "loss": 2.8714,
      "step": 69808
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7903696298599243,
      "learning_rate": 0.0004740506948755899,
      "loss": 2.8465,
      "step": 69809
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9290344715118408,
      "learning_rate": 0.00047404736311233806,
      "loss": 2.9855,
      "step": 69810
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.62571382522583,
      "learning_rate": 0.0004740440313167276,
      "loss": 3.1807,
      "step": 69811
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7035808563232422,
      "learning_rate": 0.000474040699488759,
      "loss": 3.0321,
      "step": 69812
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.599789023399353,
      "learning_rate": 0.00047403736762843303,
      "loss": 3.0575,
      "step": 69813
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.584031343460083,
      "learning_rate": 0.0004740340357357503,
      "loss": 3.0055,
      "step": 69814
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.1981074810028076,
      "learning_rate": 0.0004740307038107112,
      "loss": 3.2565,
      "step": 69815
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6493597030639648,
      "learning_rate": 0.00047402737185331655,
      "loss": 3.0404,
      "step": 69816
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4909173250198364,
      "learning_rate": 0.000474024039863567,
      "loss": 2.9431,
      "step": 69817
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.3771567344665527,
      "learning_rate": 0.00047402070784146303,
      "loss": 3.1759,
      "step": 69818
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.6667604446411133,
      "learning_rate": 0.0004740173757870053,
      "loss": 3.0334,
      "step": 69819
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4638447761535645,
      "learning_rate": 0.00047401404370019454,
      "loss": 2.9805,
      "step": 69820
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.620604157447815,
      "learning_rate": 0.0004740107115810312,
      "loss": 2.9865,
      "step": 69821
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5120151042938232,
      "learning_rate": 0.00047400737942951605,
      "loss": 2.9626,
      "step": 69822
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7147212028503418,
      "learning_rate": 0.0004740040472456496,
      "loss": 2.9324,
      "step": 69823
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8131253719329834,
      "learning_rate": 0.0004740007150294325,
      "loss": 3.2071,
      "step": 69824
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.181177854537964,
      "learning_rate": 0.0004739973827808654,
      "loss": 2.9351,
      "step": 69825
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5498249530792236,
      "learning_rate": 0.00047399405049994894,
      "loss": 3.2621,
      "step": 69826
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5040398836135864,
      "learning_rate": 0.00047399071818668366,
      "loss": 3.088,
      "step": 69827
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7694590091705322,
      "learning_rate": 0.00047398738584107024,
      "loss": 3.1297,
      "step": 69828
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5279662609100342,
      "learning_rate": 0.00047398405346310926,
      "loss": 3.054,
      "step": 69829
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5932921171188354,
      "learning_rate": 0.00047398072105280144,
      "loss": 2.9878,
      "step": 69830
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7136894464492798,
      "learning_rate": 0.0004739773886101472,
      "loss": 2.9687,
      "step": 69831
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5014371871948242,
      "learning_rate": 0.0004739740561351474,
      "loss": 3.0968,
      "step": 69832
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4973464012145996,
      "learning_rate": 0.00047397072362780247,
      "loss": 2.9723,
      "step": 69833
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2883200645446777,
      "learning_rate": 0.00047396739108811306,
      "loss": 2.8773,
      "step": 69834
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7378871440887451,
      "learning_rate": 0.00047396405851607984,
      "loss": 3.1081,
      "step": 69835
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.004462718963623,
      "learning_rate": 0.0004739607259117035,
      "loss": 3.0962,
      "step": 69836
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2613322734832764,
      "learning_rate": 0.0004739573932749845,
      "loss": 3.1216,
      "step": 69837
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7031073570251465,
      "learning_rate": 0.00047395406060592356,
      "loss": 2.8694,
      "step": 69838
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.3132081031799316,
      "learning_rate": 0.0004739507279045213,
      "loss": 2.9356,
      "step": 69839
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.194213390350342,
      "learning_rate": 0.00047394739517077827,
      "loss": 2.7181,
      "step": 69840
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6531181335449219,
      "learning_rate": 0.00047394406240469514,
      "loss": 2.8221,
      "step": 69841
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8018369674682617,
      "learning_rate": 0.0004739407296062726,
      "loss": 3.3806,
      "step": 69842
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.1616110801696777,
      "learning_rate": 0.00047393739677551117,
      "loss": 3.0914,
      "step": 69843
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.889094591140747,
      "learning_rate": 0.00047393406391241146,
      "loss": 3.2629,
      "step": 69844
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.685334324836731,
      "learning_rate": 0.0004739307310169741,
      "loss": 2.805,
      "step": 69845
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9241688251495361,
      "learning_rate": 0.00047392739808919975,
      "loss": 3.1436,
      "step": 69846
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9419708251953125,
      "learning_rate": 0.00047392406512908905,
      "loss": 2.8548,
      "step": 69847
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.80112886428833,
      "learning_rate": 0.00047392073213664256,
      "loss": 3.2289,
      "step": 69848
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.873763918876648,
      "learning_rate": 0.00047391739911186093,
      "loss": 3.0072,
      "step": 69849
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.70883047580719,
      "learning_rate": 0.00047391406605474477,
      "loss": 2.8836,
      "step": 69850
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6505978107452393,
      "learning_rate": 0.0004739107329652947,
      "loss": 3.1578,
      "step": 69851
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.403285264968872,
      "learning_rate": 0.00047390739984351136,
      "loss": 3.2139,
      "step": 69852
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.409210443496704,
      "learning_rate": 0.0004739040666893953,
      "loss": 3.02,
      "step": 69853
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.670822024345398,
      "learning_rate": 0.0004739007335029473,
      "loss": 2.9227,
      "step": 69854
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8714690208435059,
      "learning_rate": 0.00047389740028416777,
      "loss": 3.2968,
      "step": 69855
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6498910188674927,
      "learning_rate": 0.00047389406703305737,
      "loss": 2.9012,
      "step": 69856
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4083898067474365,
      "learning_rate": 0.0004738907337496169,
      "loss": 3.1754,
      "step": 69857
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9604789018630981,
      "learning_rate": 0.00047388740043384687,
      "loss": 3.1471,
      "step": 69858
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5114363431930542,
      "learning_rate": 0.00047388406708574776,
      "loss": 3.1513,
      "step": 69859
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6922588348388672,
      "learning_rate": 0.0004738807337053205,
      "loss": 3.0074,
      "step": 69860
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6405906677246094,
      "learning_rate": 0.0004738774002925654,
      "loss": 2.9788,
      "step": 69861
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5382099151611328,
      "learning_rate": 0.0004738740668474832,
      "loss": 3.0254,
      "step": 69862
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5506644248962402,
      "learning_rate": 0.0004738707333700746,
      "loss": 2.8886,
      "step": 69863
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5065518617630005,
      "learning_rate": 0.00047386739986034015,
      "loss": 3.0361,
      "step": 69864
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.6970443725585938,
      "learning_rate": 0.0004738640663182804,
      "loss": 3.0205,
      "step": 69865
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6060417890548706,
      "learning_rate": 0.0004738607327438961,
      "loss": 2.7898,
      "step": 69866
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6696091890335083,
      "learning_rate": 0.0004738573991371878,
      "loss": 3.0632,
      "step": 69867
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.666813373565674,
      "learning_rate": 0.00047385406549815606,
      "loss": 2.9049,
      "step": 69868
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.7997829914093018,
      "learning_rate": 0.00047385073182680163,
      "loss": 2.8659,
      "step": 69869
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4566110372543335,
      "learning_rate": 0.000473847398123125,
      "loss": 3.1841,
      "step": 69870
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.5268683433532715,
      "learning_rate": 0.000473844064387127,
      "loss": 3.1291,
      "step": 69871
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.6402440071105957,
      "learning_rate": 0.000473840730618808,
      "loss": 2.9847,
      "step": 69872
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2776174545288086,
      "learning_rate": 0.0004738373968181688,
      "loss": 2.9535,
      "step": 69873
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4853500127792358,
      "learning_rate": 0.0004738340629852099,
      "loss": 3.0838,
      "step": 69874
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.5887513160705566,
      "learning_rate": 0.000473830729119932,
      "loss": 3.0943,
      "step": 69875
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.0526556968688965,
      "learning_rate": 0.00047382739522233556,
      "loss": 3.0536,
      "step": 69876
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7142274379730225,
      "learning_rate": 0.0004738240612924214,
      "loss": 3.302,
      "step": 69877
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.1398446559906006,
      "learning_rate": 0.0004738207273301901,
      "loss": 2.8438,
      "step": 69878
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.3849146366119385,
      "learning_rate": 0.00047381739333564224,
      "loss": 3.0243,
      "step": 69879
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.578836679458618,
      "learning_rate": 0.0004738140593087784,
      "loss": 2.9819,
      "step": 69880
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.657899260520935,
      "learning_rate": 0.0004738107252495994,
      "loss": 2.9828,
      "step": 69881
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.472116708755493,
      "learning_rate": 0.0004738073911581055,
      "loss": 2.9386,
      "step": 69882
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.8456344604492188,
      "learning_rate": 0.00047380405703429757,
      "loss": 2.9527,
      "step": 69883
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4736933708190918,
      "learning_rate": 0.00047380072287817623,
      "loss": 3.1486,
      "step": 69884
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.7550368309020996,
      "learning_rate": 0.0004737973886897421,
      "loss": 3.1862,
      "step": 69885
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.420973062515259,
      "learning_rate": 0.00047379405446899565,
      "loss": 2.8734,
      "step": 69886
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.065951347351074,
      "learning_rate": 0.0004737907202159377,
      "loss": 2.9963,
      "step": 69887
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5383551120758057,
      "learning_rate": 0.00047378738593056873,
      "loss": 3.2485,
      "step": 69888
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.9589788913726807,
      "learning_rate": 0.00047378405161288935,
      "loss": 3.079,
      "step": 69889
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.3590087890625,
      "learning_rate": 0.00047378071726290037,
      "loss": 3.1003,
      "step": 69890
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.928973436355591,
      "learning_rate": 0.00047377738288060216,
      "loss": 2.9956,
      "step": 69891
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.649139165878296,
      "learning_rate": 0.0004737740484659954,
      "loss": 3.1929,
      "step": 69892
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.9152767658233643,
      "learning_rate": 0.000473770714019081,
      "loss": 2.8377,
      "step": 69893
    },
    {
      "epoch": 0.91,
      "grad_norm": 4.541345596313477,
      "learning_rate": 0.0004737673795398591,
      "loss": 2.8829,
      "step": 69894
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4606412649154663,
      "learning_rate": 0.00047376404502833067,
      "loss": 2.6517,
      "step": 69895
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8703466653823853,
      "learning_rate": 0.00047376071048449615,
      "loss": 2.805,
      "step": 69896
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4227739572525024,
      "learning_rate": 0.0004737573759083564,
      "loss": 3.0018,
      "step": 69897
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8449115753173828,
      "learning_rate": 0.00047375404129991174,
      "loss": 2.9923,
      "step": 69898
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.864946961402893,
      "learning_rate": 0.000473750706659163,
      "loss": 2.8599,
      "step": 69899
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.556982159614563,
      "learning_rate": 0.00047374737198611065,
      "loss": 3.0893,
      "step": 69900
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5302778482437134,
      "learning_rate": 0.0004737440372807554,
      "loss": 3.3204,
      "step": 69901
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6501846313476562,
      "learning_rate": 0.00047374070254309786,
      "loss": 2.7854,
      "step": 69902
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5799204111099243,
      "learning_rate": 0.0004737373677731387,
      "loss": 3.4684,
      "step": 69903
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4040319919586182,
      "learning_rate": 0.0004737340329708784,
      "loss": 2.8821,
      "step": 69904
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.197888135910034,
      "learning_rate": 0.0004737306981363177,
      "loss": 3.307,
      "step": 69905
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9380158185958862,
      "learning_rate": 0.0004737273632694572,
      "loss": 3.1812,
      "step": 69906
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.563740611076355,
      "learning_rate": 0.0004737240283702975,
      "loss": 2.7761,
      "step": 69907
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5196099281311035,
      "learning_rate": 0.0004737206934388392,
      "loss": 2.6968,
      "step": 69908
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8763182163238525,
      "learning_rate": 0.0004737173584750829,
      "loss": 2.9522,
      "step": 69909
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.410648226737976,
      "learning_rate": 0.00047371402347902935,
      "loss": 2.844,
      "step": 69910
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6332927942276,
      "learning_rate": 0.0004737106884506791,
      "loss": 3.0044,
      "step": 69911
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5553416013717651,
      "learning_rate": 0.00047370735339003265,
      "loss": 3.026,
      "step": 69912
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6502805948257446,
      "learning_rate": 0.0004737040182970908,
      "loss": 3.044,
      "step": 69913
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4732959270477295,
      "learning_rate": 0.000473700683171854,
      "loss": 3.1049,
      "step": 69914
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8346529006958008,
      "learning_rate": 0.00047369734801432306,
      "loss": 2.9375,
      "step": 69915
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5094847679138184,
      "learning_rate": 0.0004736940128244985,
      "loss": 3.1267,
      "step": 69916
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7716089487075806,
      "learning_rate": 0.00047369067760238097,
      "loss": 3.0969,
      "step": 69917
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8829373121261597,
      "learning_rate": 0.00047368734234797097,
      "loss": 2.9619,
      "step": 69918
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.748131513595581,
      "learning_rate": 0.00047368400706126925,
      "loss": 3.0543,
      "step": 69919
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.347705602645874,
      "learning_rate": 0.0004736806717422765,
      "loss": 2.9822,
      "step": 69920
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9133453369140625,
      "learning_rate": 0.0004736773363909931,
      "loss": 2.9213,
      "step": 69921
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3337914943695068,
      "learning_rate": 0.00047367400100741983,
      "loss": 2.9418,
      "step": 69922
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9555684328079224,
      "learning_rate": 0.0004736706655915573,
      "loss": 2.9691,
      "step": 69923
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8396177291870117,
      "learning_rate": 0.00047366733014340615,
      "loss": 3.1542,
      "step": 69924
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4962908029556274,
      "learning_rate": 0.00047366399466296694,
      "loss": 3.2336,
      "step": 69925
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.791101098060608,
      "learning_rate": 0.0004736606591502403,
      "loss": 2.9661,
      "step": 69926
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4813450574874878,
      "learning_rate": 0.00047365732360522687,
      "loss": 2.9084,
      "step": 69927
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.549806833267212,
      "learning_rate": 0.0004736539880279273,
      "loss": 3.1975,
      "step": 69928
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2692534923553467,
      "learning_rate": 0.0004736506524183421,
      "loss": 2.7458,
      "step": 69929
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.833402156829834,
      "learning_rate": 0.0004736473167764721,
      "loss": 2.9347,
      "step": 69930
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.586988687515259,
      "learning_rate": 0.00047364398110231765,
      "loss": 2.9425,
      "step": 69931
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6977087259292603,
      "learning_rate": 0.00047364064539587953,
      "loss": 3.0596,
      "step": 69932
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5677310228347778,
      "learning_rate": 0.0004736373096571584,
      "loss": 3.0452,
      "step": 69933
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.550711154937744,
      "learning_rate": 0.00047363397388615477,
      "loss": 3.0611,
      "step": 69934
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7432951927185059,
      "learning_rate": 0.0004736306380828693,
      "loss": 3.2616,
      "step": 69935
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.0500264167785645,
      "learning_rate": 0.00047362730224730266,
      "loss": 3.1674,
      "step": 69936
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6665359735488892,
      "learning_rate": 0.00047362396637945536,
      "loss": 3.0942,
      "step": 69937
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.606157660484314,
      "learning_rate": 0.0004736206304793281,
      "loss": 3.0973,
      "step": 69938
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7602351903915405,
      "learning_rate": 0.00047361729454692154,
      "loss": 2.8258,
      "step": 69939
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7424176931381226,
      "learning_rate": 0.0004736139585822362,
      "loss": 2.9697,
      "step": 69940
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6101429462432861,
      "learning_rate": 0.00047361062258527274,
      "loss": 2.868,
      "step": 69941
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7503117322921753,
      "learning_rate": 0.0004736072865560319,
      "loss": 3.1944,
      "step": 69942
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.0620737075805664,
      "learning_rate": 0.00047360395049451405,
      "loss": 2.9995,
      "step": 69943
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7114057540893555,
      "learning_rate": 0.00047360061440072,
      "loss": 3.0956,
      "step": 69944
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5699095726013184,
      "learning_rate": 0.0004735972782746504,
      "loss": 3.2538,
      "step": 69945
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2451515197753906,
      "learning_rate": 0.0004735939421163056,
      "loss": 3.0317,
      "step": 69946
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9397006034851074,
      "learning_rate": 0.0004735906059256865,
      "loss": 2.8768,
      "step": 69947
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.1337337493896484,
      "learning_rate": 0.00047358726970279377,
      "loss": 2.8703,
      "step": 69948
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7341010570526123,
      "learning_rate": 0.00047358393344762776,
      "loss": 3.0188,
      "step": 69949
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.6660969257354736,
      "learning_rate": 0.00047358059716018917,
      "loss": 3.234,
      "step": 69950
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.0484707355499268,
      "learning_rate": 0.00047357726084047876,
      "loss": 2.866,
      "step": 69951
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.672847867012024,
      "learning_rate": 0.00047357392448849707,
      "loss": 2.8705,
      "step": 69952
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6882412433624268,
      "learning_rate": 0.0004735705881042446,
      "loss": 3.1116,
      "step": 69953
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8687294721603394,
      "learning_rate": 0.0004735672516877222,
      "loss": 3.1807,
      "step": 69954
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3291270732879639,
      "learning_rate": 0.0004735639152389303,
      "loss": 2.9411,
      "step": 69955
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.375058889389038,
      "learning_rate": 0.00047356057875786963,
      "loss": 2.8566,
      "step": 69956
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.1640892028808594,
      "learning_rate": 0.0004735572422445407,
      "loss": 2.913,
      "step": 69957
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.652725338935852,
      "learning_rate": 0.00047355390569894427,
      "loss": 3.1704,
      "step": 69958
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8381060361862183,
      "learning_rate": 0.0004735505691210809,
      "loss": 3.0629,
      "step": 69959
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4400006532669067,
      "learning_rate": 0.00047354723251095125,
      "loss": 3.3457,
      "step": 69960
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5656956434249878,
      "learning_rate": 0.0004735438958685558,
      "loss": 3.0819,
      "step": 69961
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7016007900238037,
      "learning_rate": 0.00047354055919389523,
      "loss": 2.9656,
      "step": 69962
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8060903549194336,
      "learning_rate": 0.0004735372224869703,
      "loss": 2.9264,
      "step": 69963
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7343138456344604,
      "learning_rate": 0.0004735338857477815,
      "loss": 3.0624,
      "step": 69964
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9736404418945312,
      "learning_rate": 0.0004735305489763294,
      "loss": 3.0246,
      "step": 69965
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.839323878288269,
      "learning_rate": 0.00047352721217261477,
      "loss": 3.1619,
      "step": 69966
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.936033010482788,
      "learning_rate": 0.00047352387533663813,
      "loss": 3.0275,
      "step": 69967
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9335963726043701,
      "learning_rate": 0.0004735205384684002,
      "loss": 3.1143,
      "step": 69968
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9355385303497314,
      "learning_rate": 0.00047351720156790144,
      "loss": 2.9724,
      "step": 69969
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.488374948501587,
      "learning_rate": 0.0004735138646351425,
      "loss": 2.9155,
      "step": 69970
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7414661645889282,
      "learning_rate": 0.00047351052767012424,
      "loss": 3.0713,
      "step": 69971
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6483086347579956,
      "learning_rate": 0.00047350719067284694,
      "loss": 2.9899,
      "step": 69972
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5645966529846191,
      "learning_rate": 0.00047350385364331143,
      "loss": 3.1178,
      "step": 69973
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5754932165145874,
      "learning_rate": 0.00047350051658151825,
      "loss": 3.183,
      "step": 69974
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6889917850494385,
      "learning_rate": 0.0004734971794874681,
      "loss": 3.0806,
      "step": 69975
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.693001389503479,
      "learning_rate": 0.00047349384236116163,
      "loss": 2.9112,
      "step": 69976
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6869630813598633,
      "learning_rate": 0.0004734905052025992,
      "loss": 3.0439,
      "step": 69977
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6714659929275513,
      "learning_rate": 0.00047348716801178165,
      "loss": 3.0055,
      "step": 69978
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6462318897247314,
      "learning_rate": 0.0004734838307887096,
      "loss": 2.9284,
      "step": 69979
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6629458665847778,
      "learning_rate": 0.0004734804935333837,
      "loss": 2.9625,
      "step": 69980
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4379562139511108,
      "learning_rate": 0.00047347715624580436,
      "loss": 3.091,
      "step": 69981
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6157970428466797,
      "learning_rate": 0.00047347381892597247,
      "loss": 3.2636,
      "step": 69982
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9865623712539673,
      "learning_rate": 0.0004734704815738885,
      "loss": 2.7777,
      "step": 69983
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5739473104476929,
      "learning_rate": 0.00047346714418955297,
      "loss": 3.1265,
      "step": 69984
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7931506633758545,
      "learning_rate": 0.00047346380677296677,
      "loss": 2.959,
      "step": 69985
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8484392166137695,
      "learning_rate": 0.00047346046932413027,
      "loss": 3.0389,
      "step": 69986
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6901962757110596,
      "learning_rate": 0.00047345713184304423,
      "loss": 2.9057,
      "step": 69987
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.456083059310913,
      "learning_rate": 0.00047345379432970936,
      "loss": 2.9383,
      "step": 69988
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6431913375854492,
      "learning_rate": 0.000473450456784126,
      "loss": 3.0287,
      "step": 69989
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9322843551635742,
      "learning_rate": 0.0004734471192062949,
      "loss": 3.0414,
      "step": 69990
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.537032127380371,
      "learning_rate": 0.0004734437815962169,
      "loss": 3.4198,
      "step": 69991
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5777381658554077,
      "learning_rate": 0.0004734404439538922,
      "loss": 3.1258,
      "step": 69992
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6465873718261719,
      "learning_rate": 0.00047343710627932177,
      "loss": 2.8891,
      "step": 69993
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4962830543518066,
      "learning_rate": 0.0004734337685725061,
      "loss": 3.1587,
      "step": 69994
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.91348397731781,
      "learning_rate": 0.00047343043083344585,
      "loss": 3.1961,
      "step": 69995
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5943948030471802,
      "learning_rate": 0.0004734270930621415,
      "loss": 2.9342,
      "step": 69996
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.096163511276245,
      "learning_rate": 0.00047342375525859384,
      "loss": 3.0812,
      "step": 69997
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8945645093917847,
      "learning_rate": 0.0004734204174228034,
      "loss": 2.9712,
      "step": 69998
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6190142631530762,
      "learning_rate": 0.00047341707955477093,
      "loss": 3.03,
      "step": 69999
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.636597156524658,
      "learning_rate": 0.0004734137416544969,
      "loss": 3.0521,
      "step": 70000
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9552719593048096,
      "learning_rate": 0.00047341040372198196,
      "loss": 2.8954,
      "step": 70001
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.0764269828796387,
      "learning_rate": 0.00047340706575722667,
      "loss": 2.9405,
      "step": 70002
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9386035203933716,
      "learning_rate": 0.00047340372776023186,
      "loss": 3.1264,
      "step": 70003
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.4836645126342773,
      "learning_rate": 0.000473400389730998,
      "loss": 2.9814,
      "step": 70004
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8918592929840088,
      "learning_rate": 0.0004733970516695258,
      "loss": 3.0967,
      "step": 70005
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.0186166763305664,
      "learning_rate": 0.00047339371357581564,
      "loss": 3.0079,
      "step": 70006
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.1969549655914307,
      "learning_rate": 0.00047339037544986837,
      "loss": 2.9796,
      "step": 70007
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7931536436080933,
      "learning_rate": 0.00047338703729168463,
      "loss": 3.1337,
      "step": 70008
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.798025131225586,
      "learning_rate": 0.00047338369910126496,
      "loss": 3.1628,
      "step": 70009
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.618438720703125,
      "learning_rate": 0.00047338036087860995,
      "loss": 2.9678,
      "step": 70010
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5502207279205322,
      "learning_rate": 0.00047337702262372026,
      "loss": 3.0215,
      "step": 70011
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9732030630111694,
      "learning_rate": 0.0004733736843365966,
      "loss": 3.1073,
      "step": 70012
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.729203462600708,
      "learning_rate": 0.00047337034601723936,
      "loss": 2.9726,
      "step": 70013
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7537678480148315,
      "learning_rate": 0.00047336700766564935,
      "loss": 3.1531,
      "step": 70014
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6496094465255737,
      "learning_rate": 0.00047336366928182725,
      "loss": 2.7138,
      "step": 70015
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.009202718734741,
      "learning_rate": 0.00047336033086577335,
      "loss": 2.9359,
      "step": 70016
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7522637844085693,
      "learning_rate": 0.0004733569924174886,
      "loss": 2.7395,
      "step": 70017
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7670457363128662,
      "learning_rate": 0.00047335365393697367,
      "loss": 3.1126,
      "step": 70018
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5827113389968872,
      "learning_rate": 0.00047335031542422877,
      "loss": 2.9201,
      "step": 70019
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.790113925933838,
      "learning_rate": 0.00047334697687925493,
      "loss": 3.0593,
      "step": 70020
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6654987335205078,
      "learning_rate": 0.00047334363830205253,
      "loss": 3.1668,
      "step": 70021
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7992810010910034,
      "learning_rate": 0.0004733402996926224,
      "loss": 3.1468,
      "step": 70022
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5030741691589355,
      "learning_rate": 0.0004733369610509648,
      "loss": 3.0094,
      "step": 70023
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5740084648132324,
      "learning_rate": 0.0004733336223770808,
      "loss": 3.2415,
      "step": 70024
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.347006916999817,
      "learning_rate": 0.00047333028367097077,
      "loss": 2.9028,
      "step": 70025
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.857187032699585,
      "learning_rate": 0.00047332694493263527,
      "loss": 2.9553,
      "step": 70026
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.581030249595642,
      "learning_rate": 0.0004733236061620751,
      "loss": 2.9779,
      "step": 70027
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6467260122299194,
      "learning_rate": 0.0004733202673592909,
      "loss": 2.9452,
      "step": 70028
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8554974794387817,
      "learning_rate": 0.000473316928524283,
      "loss": 2.724,
      "step": 70029
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.852121114730835,
      "learning_rate": 0.00047331358965705224,
      "loss": 3.0814,
      "step": 70030
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9238632917404175,
      "learning_rate": 0.0004733102507575993,
      "loss": 2.8437,
      "step": 70031
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2399697303771973,
      "learning_rate": 0.0004733069118259247,
      "loss": 3.3222,
      "step": 70032
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8064422607421875,
      "learning_rate": 0.0004733035728620291,
      "loss": 3.0728,
      "step": 70033
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.020038366317749,
      "learning_rate": 0.00047330023386591303,
      "loss": 3.1257,
      "step": 70034
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.6903364658355713,
      "learning_rate": 0.00047329689483757714,
      "loss": 2.8912,
      "step": 70035
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6025168895721436,
      "learning_rate": 0.0004732935557770221,
      "loss": 3.0104,
      "step": 70036
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.546872854232788,
      "learning_rate": 0.0004732902166842487,
      "loss": 3.2476,
      "step": 70037
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.6897614002227783,
      "learning_rate": 0.00047328687755925715,
      "loss": 2.9057,
      "step": 70038
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9690319299697876,
      "learning_rate": 0.0004732835384020484,
      "loss": 3.0327,
      "step": 70039
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8686411380767822,
      "learning_rate": 0.000473280199212623,
      "loss": 2.961,
      "step": 70040
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.937703251838684,
      "learning_rate": 0.00047327685999098144,
      "loss": 2.9248,
      "step": 70041
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.1891722679138184,
      "learning_rate": 0.0004732735207371245,
      "loss": 3.2307,
      "step": 70042
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9271944761276245,
      "learning_rate": 0.0004732701814510527,
      "loss": 2.9738,
      "step": 70043
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.34346604347229,
      "learning_rate": 0.0004732668421327668,
      "loss": 3.1131,
      "step": 70044
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.90290367603302,
      "learning_rate": 0.00047326350278226723,
      "loss": 3.0641,
      "step": 70045
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.0240676403045654,
      "learning_rate": 0.00047326016339955477,
      "loss": 2.7394,
      "step": 70046
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.0353684425354004,
      "learning_rate": 0.0004732568239846299,
      "loss": 3.0872,
      "step": 70047
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.1868202686309814,
      "learning_rate": 0.00047325348453749333,
      "loss": 2.8417,
      "step": 70048
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7212398052215576,
      "learning_rate": 0.00047325014505814576,
      "loss": 3.1226,
      "step": 70049
    },
    {
      "epoch": 0.91,
      "grad_norm": 5.863486289978027,
      "learning_rate": 0.0004732468055465875,
      "loss": 2.7724,
      "step": 70050
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.8849987983703613,
      "learning_rate": 0.0004732434660028196,
      "loss": 2.7585,
      "step": 70051
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.5063931941986084,
      "learning_rate": 0.00047324012642684245,
      "loss": 2.9934,
      "step": 70052
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.856086254119873,
      "learning_rate": 0.00047323678681865665,
      "loss": 3.0586,
      "step": 70053
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.953575849533081,
      "learning_rate": 0.00047323344717826283,
      "loss": 2.9594,
      "step": 70054
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.62814998626709,
      "learning_rate": 0.0004732301075056617,
      "loss": 3.0527,
      "step": 70055
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5348286628723145,
      "learning_rate": 0.0004732267678008538,
      "loss": 2.7056,
      "step": 70056
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.714322805404663,
      "learning_rate": 0.00047322342806383974,
      "loss": 3.2412,
      "step": 70057
    },
    {
      "epoch": 0.91,
      "grad_norm": 4.106449604034424,
      "learning_rate": 0.0004732200882946202,
      "loss": 2.9269,
      "step": 70058
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.091115951538086,
      "learning_rate": 0.0004732167484931958,
      "loss": 2.9267,
      "step": 70059
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7558881044387817,
      "learning_rate": 0.00047321340865956707,
      "loss": 3.0328,
      "step": 70060
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8890936374664307,
      "learning_rate": 0.0004732100687937348,
      "loss": 2.924,
      "step": 70061
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7234442234039307,
      "learning_rate": 0.00047320672889569944,
      "loss": 3.2746,
      "step": 70062
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.512820243835449,
      "learning_rate": 0.0004732033889654617,
      "loss": 2.9032,
      "step": 70063
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.0254058837890625,
      "learning_rate": 0.0004732000490030222,
      "loss": 2.9278,
      "step": 70064
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.8691325187683105,
      "learning_rate": 0.0004731967090083815,
      "loss": 3.1023,
      "step": 70065
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.6299073696136475,
      "learning_rate": 0.00047319336898154026,
      "loss": 3.189,
      "step": 70066
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.829878330230713,
      "learning_rate": 0.0004731900289224991,
      "loss": 3.2341,
      "step": 70067
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.816440463066101,
      "learning_rate": 0.0004731866888312587,
      "loss": 3.0217,
      "step": 70068
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.9010863304138184,
      "learning_rate": 0.0004731833487078195,
      "loss": 3.1545,
      "step": 70069
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.70647931098938,
      "learning_rate": 0.00047318000855218237,
      "loss": 3.0466,
      "step": 70070
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.056255578994751,
      "learning_rate": 0.00047317666836434776,
      "loss": 3.2632,
      "step": 70071
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4468775987625122,
      "learning_rate": 0.0004731733281443164,
      "loss": 3.0018,
      "step": 70072
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8351755142211914,
      "learning_rate": 0.0004731699878920888,
      "loss": 3.155,
      "step": 70073
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.115139961242676,
      "learning_rate": 0.0004731666476076656,
      "loss": 3.1052,
      "step": 70074
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2354798316955566,
      "learning_rate": 0.0004731633072910475,
      "loss": 2.8943,
      "step": 70075
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5996291637420654,
      "learning_rate": 0.00047315996694223506,
      "loss": 3.0741,
      "step": 70076
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9120088815689087,
      "learning_rate": 0.0004731566265612289,
      "loss": 2.8507,
      "step": 70077
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.911445379257202,
      "learning_rate": 0.0004731532861480297,
      "loss": 3.1276,
      "step": 70078
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9032925367355347,
      "learning_rate": 0.000473149945702638,
      "loss": 2.8838,
      "step": 70079
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6165821552276611,
      "learning_rate": 0.00047314660522505445,
      "loss": 3.0615,
      "step": 70080
    },
    {
      "epoch": 0.91,
      "grad_norm": 4.47102689743042,
      "learning_rate": 0.0004731432647152797,
      "loss": 2.7295,
      "step": 70081
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.185558319091797,
      "learning_rate": 0.00047313992417331434,
      "loss": 3.1654,
      "step": 70082
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.6152329444885254,
      "learning_rate": 0.00047313658359915894,
      "loss": 2.8848,
      "step": 70083
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5892186164855957,
      "learning_rate": 0.0004731332429928142,
      "loss": 2.9172,
      "step": 70084
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7375855445861816,
      "learning_rate": 0.0004731299023542808,
      "loss": 2.9485,
      "step": 70085
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.0589826107025146,
      "learning_rate": 0.00047312656168355924,
      "loss": 2.9662,
      "step": 70086
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.7662672996520996,
      "learning_rate": 0.00047312322098065016,
      "loss": 2.9675,
      "step": 70087
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.0709099769592285,
      "learning_rate": 0.00047311988024555427,
      "loss": 2.9427,
      "step": 70088
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.672594428062439,
      "learning_rate": 0.00047311653947827205,
      "loss": 2.9954,
      "step": 70089
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.701301336288452,
      "learning_rate": 0.00047311319867880425,
      "loss": 3.359,
      "step": 70090
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.572475790977478,
      "learning_rate": 0.0004731098578471514,
      "loss": 3.1218,
      "step": 70091
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6408007144927979,
      "learning_rate": 0.00047310651698331417,
      "loss": 3.3367,
      "step": 70092
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3564711809158325,
      "learning_rate": 0.0004731031760872932,
      "loss": 2.8048,
      "step": 70093
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.1623308658599854,
      "learning_rate": 0.000473099835159089,
      "loss": 3.0484,
      "step": 70094
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.5577738285064697,
      "learning_rate": 0.0004730964941987024,
      "loss": 2.9709,
      "step": 70095
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7811224460601807,
      "learning_rate": 0.0004730931532061338,
      "loss": 3.0614,
      "step": 70096
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6811071634292603,
      "learning_rate": 0.0004730898121813839,
      "loss": 2.9978,
      "step": 70097
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.0344135761260986,
      "learning_rate": 0.0004730864711244534,
      "loss": 3.0209,
      "step": 70098
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.964111566543579,
      "learning_rate": 0.00047308313003534286,
      "loss": 2.8878,
      "step": 70099
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6172086000442505,
      "learning_rate": 0.00047307978891405285,
      "loss": 2.9854,
      "step": 70100
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7618016004562378,
      "learning_rate": 0.00047307644776058415,
      "loss": 2.9414,
      "step": 70101
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5800056457519531,
      "learning_rate": 0.0004730731065749371,
      "loss": 3.0583,
      "step": 70102
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6105390787124634,
      "learning_rate": 0.00047306976535711265,
      "loss": 2.8803,
      "step": 70103
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4707354307174683,
      "learning_rate": 0.00047306642410711115,
      "loss": 3.0442,
      "step": 70104
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.0958595275878906,
      "learning_rate": 0.0004730630828249334,
      "loss": 3.0621,
      "step": 70105
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7564566135406494,
      "learning_rate": 0.00047305974151057995,
      "loss": 3.2248,
      "step": 70106
    },
    {
      "epoch": 0.91,
      "grad_norm": 7.0172038078308105,
      "learning_rate": 0.00047305640016405144,
      "loss": 3.2022,
      "step": 70107
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.010493755340576,
      "learning_rate": 0.00047305305878534836,
      "loss": 3.1654,
      "step": 70108
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7944265604019165,
      "learning_rate": 0.0004730497173744716,
      "loss": 3.1164,
      "step": 70109
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5820462703704834,
      "learning_rate": 0.0004730463759314216,
      "loss": 3.1941,
      "step": 70110
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6067097187042236,
      "learning_rate": 0.00047304303445619894,
      "loss": 3.117,
      "step": 70111
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5682611465454102,
      "learning_rate": 0.00047303969294880437,
      "loss": 2.7845,
      "step": 70112
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.71996009349823,
      "learning_rate": 0.00047303635140923843,
      "loss": 3.341,
      "step": 70113
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.1663360595703125,
      "learning_rate": 0.0004730330098375018,
      "loss": 3.1062,
      "step": 70114
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.5258212089538574,
      "learning_rate": 0.000473029668233595,
      "loss": 3.008,
      "step": 70115
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.697890043258667,
      "learning_rate": 0.0004730263265975188,
      "loss": 2.7488,
      "step": 70116
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6498136520385742,
      "learning_rate": 0.0004730229849292737,
      "loss": 2.7866,
      "step": 70117
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.3361144065856934,
      "learning_rate": 0.0004730196432288604,
      "loss": 2.8972,
      "step": 70118
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5708372592926025,
      "learning_rate": 0.0004730163014962795,
      "loss": 3.1335,
      "step": 70119
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4037649631500244,
      "learning_rate": 0.0004730129597315315,
      "loss": 3.0028,
      "step": 70120
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.4573709964752197,
      "learning_rate": 0.0004730096179346171,
      "loss": 3.0148,
      "step": 70121
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8149441480636597,
      "learning_rate": 0.0004730062761055371,
      "loss": 3.1687,
      "step": 70122
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.468476414680481,
      "learning_rate": 0.0004730029342442919,
      "loss": 3.0602,
      "step": 70123
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.530545949935913,
      "learning_rate": 0.00047299959235088215,
      "loss": 2.9492,
      "step": 70124
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.281716823577881,
      "learning_rate": 0.0004729962504253086,
      "loss": 3.3356,
      "step": 70125
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7067452669143677,
      "learning_rate": 0.0004729929084675716,
      "loss": 3.1326,
      "step": 70126
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7328258752822876,
      "learning_rate": 0.0004729895664776721,
      "loss": 2.9768,
      "step": 70127
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4838330745697021,
      "learning_rate": 0.0004729862244556106,
      "loss": 2.9533,
      "step": 70128
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.609236240386963,
      "learning_rate": 0.0004729828824013877,
      "loss": 2.8322,
      "step": 70129
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.242748260498047,
      "learning_rate": 0.0004729795403150039,
      "loss": 2.9917,
      "step": 70130
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5119922161102295,
      "learning_rate": 0.00047297619819645996,
      "loss": 2.7255,
      "step": 70131
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.6607329845428467,
      "learning_rate": 0.0004729728560457566,
      "loss": 3.0089,
      "step": 70132
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5724024772644043,
      "learning_rate": 0.00047296951386289413,
      "loss": 2.764,
      "step": 70133
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6929398775100708,
      "learning_rate": 0.0004729661716478735,
      "loss": 2.8825,
      "step": 70134
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3156596422195435,
      "learning_rate": 0.00047296282940069515,
      "loss": 3.0293,
      "step": 70135
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7533200979232788,
      "learning_rate": 0.00047295948712135975,
      "loss": 3.1002,
      "step": 70136
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.996307373046875,
      "learning_rate": 0.00047295614480986794,
      "loss": 3.1933,
      "step": 70137
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.3311541080474854,
      "learning_rate": 0.0004729528024662203,
      "loss": 3.0837,
      "step": 70138
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6777019500732422,
      "learning_rate": 0.0004729494600904175,
      "loss": 3.0305,
      "step": 70139
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5561916828155518,
      "learning_rate": 0.0004729461176824601,
      "loss": 2.8799,
      "step": 70140
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.591940999031067,
      "learning_rate": 0.00047294277524234874,
      "loss": 3.0733,
      "step": 70141
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3814617395401,
      "learning_rate": 0.00047293943277008406,
      "loss": 3.1434,
      "step": 70142
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.568350911140442,
      "learning_rate": 0.00047293609026566677,
      "loss": 2.9738,
      "step": 70143
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7597061395645142,
      "learning_rate": 0.0004729327477290972,
      "loss": 3.1723,
      "step": 70144
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8325200080871582,
      "learning_rate": 0.00047292940516037636,
      "loss": 2.8928,
      "step": 70145
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6283308267593384,
      "learning_rate": 0.0004729260625595046,
      "loss": 3.0996,
      "step": 70146
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9381040334701538,
      "learning_rate": 0.00047292271992648255,
      "loss": 3.2395,
      "step": 70147
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.570665717124939,
      "learning_rate": 0.0004729193772613109,
      "loss": 3.3414,
      "step": 70148
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.583867073059082,
      "learning_rate": 0.0004729160345639905,
      "loss": 3.1942,
      "step": 70149
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7883073091506958,
      "learning_rate": 0.00047291269183452156,
      "loss": 2.9567,
      "step": 70150
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6922177076339722,
      "learning_rate": 0.0004729093490729049,
      "loss": 2.9145,
      "step": 70151
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.783600330352783,
      "learning_rate": 0.0004729060062791412,
      "loss": 2.9683,
      "step": 70152
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8191043138504028,
      "learning_rate": 0.00047290266345323086,
      "loss": 3.2334,
      "step": 70153
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2875449657440186,
      "learning_rate": 0.00047289932059517474,
      "loss": 3.034,
      "step": 70154
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9226723909378052,
      "learning_rate": 0.00047289597770497344,
      "loss": 3.3694,
      "step": 70155
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.022188901901245,
      "learning_rate": 0.00047289263478262743,
      "loss": 2.8499,
      "step": 70156
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8904485702514648,
      "learning_rate": 0.00047288929182813744,
      "loss": 3.1543,
      "step": 70157
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6577396392822266,
      "learning_rate": 0.0004728859488415041,
      "loss": 2.9354,
      "step": 70158
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8138954639434814,
      "learning_rate": 0.00047288260582272797,
      "loss": 2.8079,
      "step": 70159
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.0491042137145996,
      "learning_rate": 0.0004728792627718096,
      "loss": 2.846,
      "step": 70160
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.024661064147949,
      "learning_rate": 0.0004728759196887499,
      "loss": 3.1651,
      "step": 70161
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.7725682258605957,
      "learning_rate": 0.0004728725765735492,
      "loss": 2.9321,
      "step": 70162
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.5320589542388916,
      "learning_rate": 0.0004728692334262082,
      "loss": 3.2251,
      "step": 70163
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.293933391571045,
      "learning_rate": 0.00047286589024672757,
      "loss": 3.0085,
      "step": 70164
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3860759735107422,
      "learning_rate": 0.0004728625470351079,
      "loss": 3.0559,
      "step": 70165
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2083845138549805,
      "learning_rate": 0.0004728592037913499,
      "loss": 2.9856,
      "step": 70166
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.8246657848358154,
      "learning_rate": 0.00047285586051545403,
      "loss": 3.0378,
      "step": 70167
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.325791597366333,
      "learning_rate": 0.000472852517207421,
      "loss": 3.0029,
      "step": 70168
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.2159619331359863,
      "learning_rate": 0.00047284917386725145,
      "loss": 3.1862,
      "step": 70169
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4522151947021484,
      "learning_rate": 0.0004728458304949459,
      "loss": 3.1069,
      "step": 70170
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6519826650619507,
      "learning_rate": 0.0004728424870905052,
      "loss": 2.895,
      "step": 70171
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5324172973632812,
      "learning_rate": 0.00047283914365392964,
      "loss": 2.9058,
      "step": 70172
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7141200304031372,
      "learning_rate": 0.0004728358001852202,
      "loss": 3.187,
      "step": 70173
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5248470306396484,
      "learning_rate": 0.0004728324566843772,
      "loss": 3.0172,
      "step": 70174
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.465929627418518,
      "learning_rate": 0.00047282911315140136,
      "loss": 3.1041,
      "step": 70175
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4003283977508545,
      "learning_rate": 0.00047282576958629336,
      "loss": 3.1617,
      "step": 70176
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.3906071186065674,
      "learning_rate": 0.0004728224259890539,
      "loss": 3.28,
      "step": 70177
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5032646656036377,
      "learning_rate": 0.0004728190823596833,
      "loss": 3.111,
      "step": 70178
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7509896755218506,
      "learning_rate": 0.0004728157386981825,
      "loss": 2.9195,
      "step": 70179
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8517603874206543,
      "learning_rate": 0.00047281239500455193,
      "loss": 2.9331,
      "step": 70180
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5125930309295654,
      "learning_rate": 0.00047280905127879233,
      "loss": 2.9632,
      "step": 70181
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.598419189453125,
      "learning_rate": 0.0004728057075209043,
      "loss": 3.1392,
      "step": 70182
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5580434799194336,
      "learning_rate": 0.0004728023637308883,
      "loss": 3.1879,
      "step": 70183
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.285242795944214,
      "learning_rate": 0.00047279901990874516,
      "loss": 3.1998,
      "step": 70184
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.824983835220337,
      "learning_rate": 0.0004727956760544754,
      "loss": 3.1289,
      "step": 70185
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4723225831985474,
      "learning_rate": 0.00047279233216807965,
      "loss": 3.0202,
      "step": 70186
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5940510034561157,
      "learning_rate": 0.00047278898824955855,
      "loss": 3.0783,
      "step": 70187
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.528005599975586,
      "learning_rate": 0.0004727856442989127,
      "loss": 2.8897,
      "step": 70188
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6425689458847046,
      "learning_rate": 0.0004727823003161428,
      "loss": 3.2835,
      "step": 70189
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9190152883529663,
      "learning_rate": 0.0004727789563012494,
      "loss": 2.9969,
      "step": 70190
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5821055173873901,
      "learning_rate": 0.0004727756122542331,
      "loss": 2.969,
      "step": 70191
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7811651229858398,
      "learning_rate": 0.00047277226817509454,
      "loss": 2.8114,
      "step": 70192
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8642823696136475,
      "learning_rate": 0.00047276892406383433,
      "loss": 2.9501,
      "step": 70193
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7341563701629639,
      "learning_rate": 0.0004727655799204531,
      "loss": 3.1452,
      "step": 70194
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5765142440795898,
      "learning_rate": 0.0004727622357449516,
      "loss": 3.1613,
      "step": 70195
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7309205532073975,
      "learning_rate": 0.00047275889153733033,
      "loss": 3.0234,
      "step": 70196
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.0301408767700195,
      "learning_rate": 0.0004727555472975898,
      "loss": 3.0357,
      "step": 70197
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9505442380905151,
      "learning_rate": 0.00047275220302573083,
      "loss": 3.1389,
      "step": 70198
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7726761102676392,
      "learning_rate": 0.000472748858721754,
      "loss": 2.7066,
      "step": 70199
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6468696594238281,
      "learning_rate": 0.00047274551438565983,
      "loss": 3.1562,
      "step": 70200
    },
    {
      "epoch": 0.91,
      "grad_norm": 5.166282653808594,
      "learning_rate": 0.0004727421700174491,
      "loss": 2.8064,
      "step": 70201
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.359097957611084,
      "learning_rate": 0.00047273882561712225,
      "loss": 2.9828,
      "step": 70202
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5171805620193481,
      "learning_rate": 0.00047273548118467994,
      "loss": 3.215,
      "step": 70203
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.3476920127868652,
      "learning_rate": 0.00047273213672012303,
      "loss": 3.2274,
      "step": 70204
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.7348155975341797,
      "learning_rate": 0.00047272879222345183,
      "loss": 3.0832,
      "step": 70205
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.0918824672698975,
      "learning_rate": 0.00047272544769466705,
      "loss": 3.131,
      "step": 70206
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.566718339920044,
      "learning_rate": 0.0004727221031337694,
      "loss": 3.046,
      "step": 70207
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.6909196376800537,
      "learning_rate": 0.00047271875854075946,
      "loss": 3.0091,
      "step": 70208
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.492353916168213,
      "learning_rate": 0.0004727154139156378,
      "loss": 2.9359,
      "step": 70209
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.0631542205810547,
      "learning_rate": 0.00047271206925840513,
      "loss": 2.9642,
      "step": 70210
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7150527238845825,
      "learning_rate": 0.000472708724569062,
      "loss": 3.2241,
      "step": 70211
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.771627426147461,
      "learning_rate": 0.00047270537984760906,
      "loss": 3.0111,
      "step": 70212
    },
    {
      "epoch": 0.91,
      "grad_norm": 4.159314155578613,
      "learning_rate": 0.000472702035094047,
      "loss": 3.229,
      "step": 70213
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.4220170974731445,
      "learning_rate": 0.0004726986903083763,
      "loss": 2.5528,
      "step": 70214
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4521907567977905,
      "learning_rate": 0.00047269534549059764,
      "loss": 2.8185,
      "step": 70215
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.1544923782348633,
      "learning_rate": 0.0004726920006407117,
      "loss": 3.0807,
      "step": 70216
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7756909132003784,
      "learning_rate": 0.00047268865575871894,
      "loss": 2.9397,
      "step": 70217
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8806275129318237,
      "learning_rate": 0.0004726853108446202,
      "loss": 2.9828,
      "step": 70218
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8523945808410645,
      "learning_rate": 0.00047268196589841607,
      "loss": 3.0693,
      "step": 70219
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.559399962425232,
      "learning_rate": 0.00047267862092010703,
      "loss": 3.0579,
      "step": 70220
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.9937810897827148,
      "learning_rate": 0.00047267527590969373,
      "loss": 3.0395,
      "step": 70221
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4280164241790771,
      "learning_rate": 0.0004726719308671769,
      "loss": 3.1798,
      "step": 70222
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.648156762123108,
      "learning_rate": 0.000472668585792557,
      "loss": 3.0849,
      "step": 70223
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6565510034561157,
      "learning_rate": 0.0004726652406858349,
      "loss": 2.9865,
      "step": 70224
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.4405722618103027,
      "learning_rate": 0.00047266189554701104,
      "loss": 3.2153,
      "step": 70225
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7509039640426636,
      "learning_rate": 0.000472658550376086,
      "loss": 2.8925,
      "step": 70226
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.4069156646728516,
      "learning_rate": 0.0004726552051730605,
      "loss": 3.2807,
      "step": 70227
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7651968002319336,
      "learning_rate": 0.0004726518599379352,
      "loss": 3.0115,
      "step": 70228
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6470082998275757,
      "learning_rate": 0.0004726485146707106,
      "loss": 3.0952,
      "step": 70229
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6142486333847046,
      "learning_rate": 0.00047264516937138736,
      "loss": 2.8842,
      "step": 70230
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.392439365386963,
      "learning_rate": 0.00047264182403996614,
      "loss": 3.0665,
      "step": 70231
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.1620121002197266,
      "learning_rate": 0.0004726384786764476,
      "loss": 3.1253,
      "step": 70232
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.1291849613189697,
      "learning_rate": 0.0004726351332808323,
      "loss": 3.2177,
      "step": 70233
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.188673734664917,
      "learning_rate": 0.0004726317878531208,
      "loss": 2.9607,
      "step": 70234
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.337028741836548,
      "learning_rate": 0.0004726284423933139,
      "loss": 2.8248,
      "step": 70235
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.486443281173706,
      "learning_rate": 0.000472625096901412,
      "loss": 3.1571,
      "step": 70236
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.762625813484192,
      "learning_rate": 0.0004726217513774159,
      "loss": 3.0688,
      "step": 70237
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.441805362701416,
      "learning_rate": 0.00047261840582132615,
      "loss": 3.1202,
      "step": 70238
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.229257345199585,
      "learning_rate": 0.00047261506023314336,
      "loss": 3.4113,
      "step": 70239
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8111783266067505,
      "learning_rate": 0.0004726117146128681,
      "loss": 3.1302,
      "step": 70240
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4006500244140625,
      "learning_rate": 0.00047260836896050127,
      "loss": 2.9844,
      "step": 70241
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6362229585647583,
      "learning_rate": 0.0004726050232760431,
      "loss": 3.1832,
      "step": 70242
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4736181497573853,
      "learning_rate": 0.00047260167755949447,
      "loss": 3.0437,
      "step": 70243
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.478289246559143,
      "learning_rate": 0.000472598331810856,
      "loss": 3.3128,
      "step": 70244
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.568117618560791,
      "learning_rate": 0.0004725949860301281,
      "loss": 2.8732,
      "step": 70245
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7387545108795166,
      "learning_rate": 0.0004725916402173116,
      "loss": 3.0824,
      "step": 70246
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5169953107833862,
      "learning_rate": 0.00047258829437240706,
      "loss": 2.8646,
      "step": 70247
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.881867527961731,
      "learning_rate": 0.00047258494849541504,
      "loss": 3.0529,
      "step": 70248
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.7538725137710571,
      "learning_rate": 0.0004725816025863363,
      "loss": 3.0433,
      "step": 70249
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.722440242767334,
      "learning_rate": 0.0004725782566451714,
      "loss": 2.9997,
      "step": 70250
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6817450523376465,
      "learning_rate": 0.0004725749106719209,
      "loss": 3.1375,
      "step": 70251
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6841272115707397,
      "learning_rate": 0.00047257156466658543,
      "loss": 3.0858,
      "step": 70252
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.970503807067871,
      "learning_rate": 0.00047256821862916575,
      "loss": 2.9882,
      "step": 70253
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5799998044967651,
      "learning_rate": 0.0004725648725596623,
      "loss": 3.1857,
      "step": 70254
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.3046796321868896,
      "learning_rate": 0.00047256152645807573,
      "loss": 2.9446,
      "step": 70255
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6704683303833008,
      "learning_rate": 0.00047255818032440687,
      "loss": 3.0289,
      "step": 70256
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5407236814498901,
      "learning_rate": 0.000472554834158656,
      "loss": 3.1034,
      "step": 70257
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6421408653259277,
      "learning_rate": 0.0004725514879608241,
      "loss": 2.9024,
      "step": 70258
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8315215110778809,
      "learning_rate": 0.00047254814173091156,
      "loss": 3.2154,
      "step": 70259
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5216038227081299,
      "learning_rate": 0.000472544795468919,
      "loss": 3.1397,
      "step": 70260
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5354636907577515,
      "learning_rate": 0.0004725414491748472,
      "loss": 3.168,
      "step": 70261
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5363675355911255,
      "learning_rate": 0.0004725381028486966,
      "loss": 3.1398,
      "step": 70262
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.8432196378707886,
      "learning_rate": 0.00047253475649046796,
      "loss": 2.9623,
      "step": 70263
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.505052924156189,
      "learning_rate": 0.00047253141010016183,
      "loss": 3.0762,
      "step": 70264
    },
    {
      "epoch": 0.91,
      "grad_norm": 2.113269090652466,
      "learning_rate": 0.0004725280636777789,
      "loss": 2.7166,
      "step": 70265
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6075762510299683,
      "learning_rate": 0.0004725247172233197,
      "loss": 3.0002,
      "step": 70266
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.825056552886963,
      "learning_rate": 0.00047252137073678497,
      "loss": 3.076,
      "step": 70267
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.65204918384552,
      "learning_rate": 0.0004725180242181752,
      "loss": 3.0336,
      "step": 70268
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.4236130714416504,
      "learning_rate": 0.000472514677667491,
      "loss": 3.1526,
      "step": 70269
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5181260108947754,
      "learning_rate": 0.0004725113310847332,
      "loss": 3.1484,
      "step": 70270
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.5334275960922241,
      "learning_rate": 0.00047250798446990224,
      "loss": 3.0338,
      "step": 70271
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6942659616470337,
      "learning_rate": 0.0004725046378229988,
      "loss": 2.9805,
      "step": 70272
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.623720645904541,
      "learning_rate": 0.00047250129114402345,
      "loss": 3.0385,
      "step": 70273
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.592724323272705,
      "learning_rate": 0.00047249794443297686,
      "loss": 3.0603,
      "step": 70274
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7868175506591797,
      "learning_rate": 0.0004724945976898597,
      "loss": 2.7592,
      "step": 70275
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6539736986160278,
      "learning_rate": 0.00047249125091467246,
      "loss": 3.0011,
      "step": 70276
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7941128015518188,
      "learning_rate": 0.00047248790410741595,
      "loss": 2.9398,
      "step": 70277
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.009052038192749,
      "learning_rate": 0.00047248455726809057,
      "loss": 3.043,
      "step": 70278
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5579733848571777,
      "learning_rate": 0.0004724812103966971,
      "loss": 2.9151,
      "step": 70279
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.650883197784424,
      "learning_rate": 0.0004724778634932361,
      "loss": 2.9163,
      "step": 70280
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6857318878173828,
      "learning_rate": 0.00047247451655770825,
      "loss": 3.021,
      "step": 70281
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6325273513793945,
      "learning_rate": 0.00047247116959011406,
      "loss": 2.8496,
      "step": 70282
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.695165991783142,
      "learning_rate": 0.00047246782259045437,
      "loss": 2.6004,
      "step": 70283
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9467319250106812,
      "learning_rate": 0.0004724644755587295,
      "loss": 3.0061,
      "step": 70284
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.458228588104248,
      "learning_rate": 0.0004724611284949403,
      "loss": 2.825,
      "step": 70285
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6987665891647339,
      "learning_rate": 0.0004724577813990873,
      "loss": 2.8604,
      "step": 70286
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.036867141723633,
      "learning_rate": 0.00047245443427117113,
      "loss": 3.1122,
      "step": 70287
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6548576354980469,
      "learning_rate": 0.0004724510871111925,
      "loss": 2.8464,
      "step": 70288
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4846243858337402,
      "learning_rate": 0.00047244773991915185,
      "loss": 3.0243,
      "step": 70289
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8244322538375854,
      "learning_rate": 0.00047244439269505,
      "loss": 2.6826,
      "step": 70290
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8439338207244873,
      "learning_rate": 0.0004724410454388874,
      "loss": 3.2036,
      "step": 70291
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.835546851158142,
      "learning_rate": 0.00047243769815066486,
      "loss": 3.0466,
      "step": 70292
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6840122938156128,
      "learning_rate": 0.00047243435083038285,
      "loss": 2.9716,
      "step": 70293
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.714725136756897,
      "learning_rate": 0.00047243100347804195,
      "loss": 3.1105,
      "step": 70294
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5291273593902588,
      "learning_rate": 0.000472427656093643,
      "loss": 2.8845,
      "step": 70295
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8317854404449463,
      "learning_rate": 0.0004724243086771864,
      "loss": 3.2072,
      "step": 70296
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.754411220550537,
      "learning_rate": 0.00047242096122867297,
      "loss": 3.1382,
      "step": 70297
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7008894681930542,
      "learning_rate": 0.00047241761374810316,
      "loss": 3.0429,
      "step": 70298
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.9151432514190674,
      "learning_rate": 0.00047241426623547767,
      "loss": 2.8745,
      "step": 70299
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.807283878326416,
      "learning_rate": 0.00047241091869079703,
      "loss": 3.0477,
      "step": 70300
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5708909034729004,
      "learning_rate": 0.00047240757111406207,
      "loss": 3.1197,
      "step": 70301
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5745899677276611,
      "learning_rate": 0.00047240422350527326,
      "loss": 3.0493,
      "step": 70302
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4994289875030518,
      "learning_rate": 0.0004724008758644312,
      "loss": 2.8993,
      "step": 70303
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4679627418518066,
      "learning_rate": 0.0004723975281915366,
      "loss": 2.986,
      "step": 70304
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7629047632217407,
      "learning_rate": 0.00047239418048659003,
      "loss": 3.1043,
      "step": 70305
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3485099077224731,
      "learning_rate": 0.00047239083274959215,
      "loss": 3.047,
      "step": 70306
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5755800008773804,
      "learning_rate": 0.0004723874849805435,
      "loss": 3.1018,
      "step": 70307
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.460113763809204,
      "learning_rate": 0.0004723841371794449,
      "loss": 3.1336,
      "step": 70308
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.677071213722229,
      "learning_rate": 0.00047238078934629677,
      "loss": 3.0673,
      "step": 70309
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8290709257125854,
      "learning_rate": 0.0004723774414810997,
      "loss": 3.0135,
      "step": 70310
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5369925498962402,
      "learning_rate": 0.0004723740935838545,
      "loss": 3.2334,
      "step": 70311
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5937539339065552,
      "learning_rate": 0.0004723707456545617,
      "loss": 3.0618,
      "step": 70312
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8887351751327515,
      "learning_rate": 0.0004723673976932219,
      "loss": 3.2641,
      "step": 70313
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0356593132019043,
      "learning_rate": 0.0004723640496998358,
      "loss": 3.2138,
      "step": 70314
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0779521465301514,
      "learning_rate": 0.0004723607016744039,
      "loss": 2.7821,
      "step": 70315
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8058805465698242,
      "learning_rate": 0.00047235735361692694,
      "loss": 2.9839,
      "step": 70316
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4652892351150513,
      "learning_rate": 0.00047235400552740544,
      "loss": 2.9503,
      "step": 70317
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5902653932571411,
      "learning_rate": 0.0004723506574058401,
      "loss": 3.0013,
      "step": 70318
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7293599843978882,
      "learning_rate": 0.00047234730925223154,
      "loss": 3.1191,
      "step": 70319
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.99102783203125,
      "learning_rate": 0.0004723439610665804,
      "loss": 3.3213,
      "step": 70320
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6154426336288452,
      "learning_rate": 0.0004723406128488872,
      "loss": 3.0571,
      "step": 70321
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.839043140411377,
      "learning_rate": 0.0004723372645991526,
      "loss": 3.2247,
      "step": 70322
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6165874004364014,
      "learning_rate": 0.00047233391631737735,
      "loss": 2.9405,
      "step": 70323
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.178187131881714,
      "learning_rate": 0.0004723305680035619,
      "loss": 3.1783,
      "step": 70324
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7894786596298218,
      "learning_rate": 0.0004723272196577069,
      "loss": 3.1442,
      "step": 70325
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7728981971740723,
      "learning_rate": 0.00047232387127981313,
      "loss": 3.1313,
      "step": 70326
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5705463886260986,
      "learning_rate": 0.00047232052286988105,
      "loss": 2.8078,
      "step": 70327
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7144140005111694,
      "learning_rate": 0.00047231717442791134,
      "loss": 3.0568,
      "step": 70328
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5209227800369263,
      "learning_rate": 0.0004723138259539046,
      "loss": 2.9676,
      "step": 70329
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8488078117370605,
      "learning_rate": 0.0004723104774478615,
      "loss": 2.9389,
      "step": 70330
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9464051723480225,
      "learning_rate": 0.0004723071289097825,
      "loss": 3.1039,
      "step": 70331
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.1441221237182617,
      "learning_rate": 0.0004723037803396685,
      "loss": 3.1375,
      "step": 70332
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.1488852500915527,
      "learning_rate": 0.0004723004317375199,
      "loss": 2.9572,
      "step": 70333
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.07351016998291,
      "learning_rate": 0.0004722970831033374,
      "loss": 2.989,
      "step": 70334
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8661987781524658,
      "learning_rate": 0.0004722937344371218,
      "loss": 2.8471,
      "step": 70335
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.042771816253662,
      "learning_rate": 0.00047229038573887334,
      "loss": 2.9792,
      "step": 70336
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8595889806747437,
      "learning_rate": 0.0004722870370085928,
      "loss": 3.0854,
      "step": 70337
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.2202565670013428,
      "learning_rate": 0.00047228368824628103,
      "loss": 3.0832,
      "step": 70338
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9198349714279175,
      "learning_rate": 0.00047228033945193834,
      "loss": 3.2714,
      "step": 70339
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.067192792892456,
      "learning_rate": 0.0004722769906255655,
      "loss": 2.9644,
      "step": 70340
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6295980215072632,
      "learning_rate": 0.0004722736417671632,
      "loss": 2.8643,
      "step": 70341
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6269423961639404,
      "learning_rate": 0.00047227029287673193,
      "loss": 3.2893,
      "step": 70342
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.1931893825531006,
      "learning_rate": 0.00047226694395427227,
      "loss": 3.0306,
      "step": 70343
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.079712152481079,
      "learning_rate": 0.0004722635949997851,
      "loss": 3.256,
      "step": 70344
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8723262548446655,
      "learning_rate": 0.00047226024601327074,
      "loss": 3.1509,
      "step": 70345
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.732978343963623,
      "learning_rate": 0.00047225689699472996,
      "loss": 3.035,
      "step": 70346
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4776638746261597,
      "learning_rate": 0.0004722535479441635,
      "loss": 3.1395,
      "step": 70347
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.110743522644043,
      "learning_rate": 0.0004722501988615717,
      "loss": 3.1455,
      "step": 70348
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7407716512680054,
      "learning_rate": 0.0004722468497469554,
      "loss": 3.0648,
      "step": 70349
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8705308437347412,
      "learning_rate": 0.0004722435006003151,
      "loss": 2.8966,
      "step": 70350
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9902379512786865,
      "learning_rate": 0.0004722401514216516,
      "loss": 2.9382,
      "step": 70351
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.399686336517334,
      "learning_rate": 0.00047223680221096526,
      "loss": 3.2565,
      "step": 70352
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5582295656204224,
      "learning_rate": 0.00047223345296825705,
      "loss": 3.1016,
      "step": 70353
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.807294487953186,
      "learning_rate": 0.0004722301036935272,
      "loss": 3.1653,
      "step": 70354
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7904882431030273,
      "learning_rate": 0.0004722267543867766,
      "loss": 2.9784,
      "step": 70355
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.311605215072632,
      "learning_rate": 0.0004722234050480058,
      "loss": 3.2098,
      "step": 70356
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.6193692684173584,
      "learning_rate": 0.0004722200556772154,
      "loss": 3.2101,
      "step": 70357
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8692246675491333,
      "learning_rate": 0.00047221670627440606,
      "loss": 3.1356,
      "step": 70358
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.3718812465667725,
      "learning_rate": 0.00047221335683957837,
      "loss": 3.2373,
      "step": 70359
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8540252447128296,
      "learning_rate": 0.00047221000737273294,
      "loss": 3.074,
      "step": 70360
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3451000452041626,
      "learning_rate": 0.0004722066578738704,
      "loss": 3.0272,
      "step": 70361
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7420454025268555,
      "learning_rate": 0.00047220330834299147,
      "loss": 2.8843,
      "step": 70362
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.739372730255127,
      "learning_rate": 0.0004721999587800967,
      "loss": 3.1714,
      "step": 70363
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5634177923202515,
      "learning_rate": 0.0004721966091851866,
      "loss": 3.1464,
      "step": 70364
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.606643795967102,
      "learning_rate": 0.000472193259558262,
      "loss": 3.1834,
      "step": 70365
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.408959150314331,
      "learning_rate": 0.0004721899098993234,
      "loss": 3.3672,
      "step": 70366
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.232058525085449,
      "learning_rate": 0.00047218656020837137,
      "loss": 3.0552,
      "step": 70367
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6327329874038696,
      "learning_rate": 0.0004721832104854067,
      "loss": 3.2027,
      "step": 70368
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3351014852523804,
      "learning_rate": 0.0004721798607304299,
      "loss": 3.0995,
      "step": 70369
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6308704614639282,
      "learning_rate": 0.0004721765109434416,
      "loss": 3.1473,
      "step": 70370
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5516287088394165,
      "learning_rate": 0.00047217316112444245,
      "loss": 3.1098,
      "step": 70371
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.601318597793579,
      "learning_rate": 0.0004721698112734331,
      "loss": 3.1954,
      "step": 70372
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6763854026794434,
      "learning_rate": 0.0004721664613904141,
      "loss": 3.0464,
      "step": 70373
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.7637150287628174,
      "learning_rate": 0.00047216311147538605,
      "loss": 3.1473,
      "step": 70374
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5011454820632935,
      "learning_rate": 0.0004721597615283497,
      "loss": 2.9535,
      "step": 70375
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.635514259338379,
      "learning_rate": 0.0004721564115493056,
      "loss": 2.801,
      "step": 70376
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.084887981414795,
      "learning_rate": 0.0004721530615382542,
      "loss": 3.1005,
      "step": 70377
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5819957256317139,
      "learning_rate": 0.0004721497114951966,
      "loss": 3.0894,
      "step": 70378
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.1003313064575195,
      "learning_rate": 0.00047214636142013286,
      "loss": 2.9719,
      "step": 70379
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8000752925872803,
      "learning_rate": 0.00047214301131306405,
      "loss": 3.1802,
      "step": 70380
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7812987565994263,
      "learning_rate": 0.00047213966117399045,
      "loss": 2.9681,
      "step": 70381
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.787356972694397,
      "learning_rate": 0.00047213631100291294,
      "loss": 3.0158,
      "step": 70382
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9138600826263428,
      "learning_rate": 0.00047213296079983195,
      "loss": 3.2969,
      "step": 70383
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5158473253250122,
      "learning_rate": 0.00047212961056474834,
      "loss": 2.9528,
      "step": 70384
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.86431086063385,
      "learning_rate": 0.00047212626029766245,
      "loss": 3.0731,
      "step": 70385
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6962774991989136,
      "learning_rate": 0.0004721229099985751,
      "loss": 3.0253,
      "step": 70386
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.621030569076538,
      "learning_rate": 0.0004721195596674868,
      "loss": 3.3642,
      "step": 70387
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.112046003341675,
      "learning_rate": 0.00047211620930439827,
      "loss": 2.9135,
      "step": 70388
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4539376497268677,
      "learning_rate": 0.00047211285890931006,
      "loss": 3.0621,
      "step": 70389
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6425466537475586,
      "learning_rate": 0.00047210950848222287,
      "loss": 3.1557,
      "step": 70390
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8703464269638062,
      "learning_rate": 0.0004721061580231372,
      "loss": 3.2029,
      "step": 70391
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7957665920257568,
      "learning_rate": 0.0004721028075320538,
      "loss": 2.953,
      "step": 70392
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5068014860153198,
      "learning_rate": 0.0004720994570089732,
      "loss": 3.0691,
      "step": 70393
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5936882495880127,
      "learning_rate": 0.00047209610645389613,
      "loss": 2.9215,
      "step": 70394
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.2061171531677246,
      "learning_rate": 0.0004720927558668231,
      "loss": 2.8178,
      "step": 70395
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4986770153045654,
      "learning_rate": 0.0004720894052477548,
      "loss": 3.1625,
      "step": 70396
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6087833642959595,
      "learning_rate": 0.0004720860545966918,
      "loss": 2.9635,
      "step": 70397
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3272597789764404,
      "learning_rate": 0.00047208270391363476,
      "loss": 3.0449,
      "step": 70398
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6432393789291382,
      "learning_rate": 0.0004720793531985843,
      "loss": 3.0454,
      "step": 70399
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5385329723358154,
      "learning_rate": 0.000472076002451541,
      "loss": 3.0784,
      "step": 70400
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.874410629272461,
      "learning_rate": 0.00047207265167250554,
      "loss": 2.9734,
      "step": 70401
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.445332407951355,
      "learning_rate": 0.00047206930086147867,
      "loss": 2.9139,
      "step": 70402
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.395328164100647,
      "learning_rate": 0.0004720659500184607,
      "loss": 3.0438,
      "step": 70403
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3501081466674805,
      "learning_rate": 0.00047206259914345244,
      "loss": 2.7465,
      "step": 70404
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7042226791381836,
      "learning_rate": 0.0004720592482364546,
      "loss": 3.1682,
      "step": 70405
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.606336832046509,
      "learning_rate": 0.00047205589729746754,
      "loss": 2.8176,
      "step": 70406
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7340141534805298,
      "learning_rate": 0.00047205254632649217,
      "loss": 3.2485,
      "step": 70407
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7560060024261475,
      "learning_rate": 0.00047204919532352894,
      "loss": 2.9752,
      "step": 70408
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6770293712615967,
      "learning_rate": 0.0004720458442885785,
      "loss": 2.9324,
      "step": 70409
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5878000259399414,
      "learning_rate": 0.00047204249322164147,
      "loss": 2.8961,
      "step": 70410
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.680259346961975,
      "learning_rate": 0.0004720391421227186,
      "loss": 2.8195,
      "step": 70411
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6045082807540894,
      "learning_rate": 0.00047203579099181034,
      "loss": 3.2757,
      "step": 70412
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7068179845809937,
      "learning_rate": 0.00047203243982891737,
      "loss": 2.8693,
      "step": 70413
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5683305263519287,
      "learning_rate": 0.0004720290886340404,
      "loss": 2.8082,
      "step": 70414
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5666987895965576,
      "learning_rate": 0.0004720257374071798,
      "loss": 3.3083,
      "step": 70415
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8418221473693848,
      "learning_rate": 0.00047202238614833645,
      "loss": 3.0055,
      "step": 70416
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4875619411468506,
      "learning_rate": 0.000472019034857511,
      "loss": 3.1788,
      "step": 70417
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.776896357536316,
      "learning_rate": 0.0004720156835347038,
      "loss": 2.9711,
      "step": 70418
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5300986766815186,
      "learning_rate": 0.00047201233217991574,
      "loss": 3.1328,
      "step": 70419
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6411449909210205,
      "learning_rate": 0.0004720089807931473,
      "loss": 2.9916,
      "step": 70420
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7197660207748413,
      "learning_rate": 0.00047200562937439923,
      "loss": 3.0368,
      "step": 70421
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4484329223632812,
      "learning_rate": 0.00047200227792367194,
      "loss": 3.0632,
      "step": 70422
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7190618515014648,
      "learning_rate": 0.00047199892644096626,
      "loss": 2.9952,
      "step": 70423
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4600270986557007,
      "learning_rate": 0.00047199557492628275,
      "loss": 2.918,
      "step": 70424
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.965772271156311,
      "learning_rate": 0.0004719922233796219,
      "loss": 3.0162,
      "step": 70425
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3203167915344238,
      "learning_rate": 0.0004719888718009846,
      "loss": 3.0224,
      "step": 70426
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.476325511932373,
      "learning_rate": 0.0004719855201903712,
      "loss": 3.2397,
      "step": 70427
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.2152907848358154,
      "learning_rate": 0.00047198216854778247,
      "loss": 3.1003,
      "step": 70428
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8459534645080566,
      "learning_rate": 0.000471978816873219,
      "loss": 2.991,
      "step": 70429
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7954546213150024,
      "learning_rate": 0.00047197546516668145,
      "loss": 3.0502,
      "step": 70430
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.3842248916625977,
      "learning_rate": 0.0004719721134281705,
      "loss": 2.911,
      "step": 70431
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4955250024795532,
      "learning_rate": 0.00047196876165768654,
      "loss": 2.8828,
      "step": 70432
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.714545726776123,
      "learning_rate": 0.0004719654098552304,
      "loss": 3.0608,
      "step": 70433
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.457764983177185,
      "learning_rate": 0.00047196205802080266,
      "loss": 3.057,
      "step": 70434
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4528368711471558,
      "learning_rate": 0.00047195870615440396,
      "loss": 2.9652,
      "step": 70435
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9927031993865967,
      "learning_rate": 0.0004719553542560348,
      "loss": 3.1878,
      "step": 70436
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.516169548034668,
      "learning_rate": 0.00047195200232569593,
      "loss": 3.2407,
      "step": 70437
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.97979736328125,
      "learning_rate": 0.00047194865036338796,
      "loss": 3.0865,
      "step": 70438
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8653337955474854,
      "learning_rate": 0.0004719452983691115,
      "loss": 3.0678,
      "step": 70439
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8440208435058594,
      "learning_rate": 0.00047194194634286716,
      "loss": 3.1966,
      "step": 70440
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5918893814086914,
      "learning_rate": 0.00047193859428465554,
      "loss": 3.1479,
      "step": 70441
    },
    {
      "epoch": 0.92,
      "grad_norm": 4.03282356262207,
      "learning_rate": 0.0004719352421944773,
      "loss": 3.0252,
      "step": 70442
    },
    {
      "epoch": 0.92,
      "grad_norm": 4.086690902709961,
      "learning_rate": 0.00047193189007233304,
      "loss": 2.7646,
      "step": 70443
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.421691417694092,
      "learning_rate": 0.0004719285379182234,
      "loss": 2.8571,
      "step": 70444
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5950130224227905,
      "learning_rate": 0.00047192518573214895,
      "loss": 2.785,
      "step": 70445
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.535569190979004,
      "learning_rate": 0.00047192183351411047,
      "loss": 2.8813,
      "step": 70446
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.8822479248046875,
      "learning_rate": 0.00047191848126410836,
      "loss": 2.9846,
      "step": 70447
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.410170078277588,
      "learning_rate": 0.0004719151289821435,
      "loss": 2.8922,
      "step": 70448
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7246495485305786,
      "learning_rate": 0.0004719117766682163,
      "loss": 3.0808,
      "step": 70449
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.376955509185791,
      "learning_rate": 0.0004719084243223274,
      "loss": 3.3426,
      "step": 70450
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.711674928665161,
      "learning_rate": 0.00047190507194447756,
      "loss": 2.8153,
      "step": 70451
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.857040524482727,
      "learning_rate": 0.00047190171953466725,
      "loss": 2.8512,
      "step": 70452
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6512439250946045,
      "learning_rate": 0.0004718983670928972,
      "loss": 2.9363,
      "step": 70453
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5697050094604492,
      "learning_rate": 0.0004718950146191681,
      "loss": 2.7352,
      "step": 70454
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.741539478302002,
      "learning_rate": 0.0004718916621134803,
      "loss": 2.8403,
      "step": 70455
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5824395418167114,
      "learning_rate": 0.0004718883095758347,
      "loss": 3.0767,
      "step": 70456
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.935729503631592,
      "learning_rate": 0.00047188495700623185,
      "loss": 2.8101,
      "step": 70457
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7414309978485107,
      "learning_rate": 0.00047188160440467226,
      "loss": 3.0696,
      "step": 70458
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5833743810653687,
      "learning_rate": 0.0004718782517711566,
      "loss": 3.1681,
      "step": 70459
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9243016242980957,
      "learning_rate": 0.00047187489910568563,
      "loss": 3.1125,
      "step": 70460
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.641021728515625,
      "learning_rate": 0.0004718715464082598,
      "loss": 3.2704,
      "step": 70461
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8271076679229736,
      "learning_rate": 0.00047186819367887986,
      "loss": 2.9508,
      "step": 70462
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.498471736907959,
      "learning_rate": 0.0004718648409175464,
      "loss": 3.0772,
      "step": 70463
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.739546775817871,
      "learning_rate": 0.0004718614881242599,
      "loss": 3.0796,
      "step": 70464
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7358261346817017,
      "learning_rate": 0.0004718581352990212,
      "loss": 3.219,
      "step": 70465
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4344631433486938,
      "learning_rate": 0.00047185478244183085,
      "loss": 2.7835,
      "step": 70466
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3343172073364258,
      "learning_rate": 0.0004718514295526894,
      "loss": 2.9657,
      "step": 70467
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3427289724349976,
      "learning_rate": 0.00047184807663159747,
      "loss": 2.9918,
      "step": 70468
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8813434839248657,
      "learning_rate": 0.00047184472367855585,
      "loss": 3.1704,
      "step": 70469
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.607787847518921,
      "learning_rate": 0.000471841370693565,
      "loss": 2.9089,
      "step": 70470
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4986436367034912,
      "learning_rate": 0.00047183801767662555,
      "loss": 2.7847,
      "step": 70471
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8740580081939697,
      "learning_rate": 0.0004718346646277383,
      "loss": 2.9234,
      "step": 70472
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.736073613166809,
      "learning_rate": 0.00047183131154690365,
      "loss": 2.9698,
      "step": 70473
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.792297601699829,
      "learning_rate": 0.00047182795843412234,
      "loss": 2.9235,
      "step": 70474
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7219816446304321,
      "learning_rate": 0.00047182460528939497,
      "loss": 3.2986,
      "step": 70475
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7877967357635498,
      "learning_rate": 0.0004718212521127221,
      "loss": 3.0148,
      "step": 70476
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6036396026611328,
      "learning_rate": 0.00047181789890410444,
      "loss": 2.999,
      "step": 70477
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5745830535888672,
      "learning_rate": 0.00047181454566354265,
      "loss": 2.9603,
      "step": 70478
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4450263977050781,
      "learning_rate": 0.0004718111923910373,
      "loss": 2.7799,
      "step": 70479
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.1426937580108643,
      "learning_rate": 0.00047180783908658887,
      "loss": 3.0757,
      "step": 70480
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6280429363250732,
      "learning_rate": 0.0004718044857501983,
      "loss": 3.0417,
      "step": 70481
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6391805410385132,
      "learning_rate": 0.000471801132381866,
      "loss": 3.0534,
      "step": 70482
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.520914077758789,
      "learning_rate": 0.0004717977789815925,
      "loss": 3.1182,
      "step": 70483
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.228623867034912,
      "learning_rate": 0.0004717944255493786,
      "loss": 3.0957,
      "step": 70484
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.344038963317871,
      "learning_rate": 0.0004717910720852249,
      "loss": 2.9437,
      "step": 70485
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.318568229675293,
      "learning_rate": 0.000471787718589132,
      "loss": 3.2605,
      "step": 70486
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7302474975585938,
      "learning_rate": 0.00047178436506110054,
      "loss": 3.0522,
      "step": 70487
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9264028072357178,
      "learning_rate": 0.0004717810115011311,
      "loss": 2.8497,
      "step": 70488
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.6656603813171387,
      "learning_rate": 0.0004717776579092243,
      "loss": 3.2386,
      "step": 70489
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.991949439048767,
      "learning_rate": 0.0004717743042853808,
      "loss": 2.9682,
      "step": 70490
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.626806616783142,
      "learning_rate": 0.0004717709506296012,
      "loss": 3.0,
      "step": 70491
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.154280424118042,
      "learning_rate": 0.0004717675969418862,
      "loss": 2.9149,
      "step": 70492
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6770124435424805,
      "learning_rate": 0.00047176424322223636,
      "loss": 2.9178,
      "step": 70493
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5537086725234985,
      "learning_rate": 0.0004717608894706523,
      "loss": 3.0543,
      "step": 70494
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.550979733467102,
      "learning_rate": 0.0004717575356871346,
      "loss": 3.1559,
      "step": 70495
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.796032667160034,
      "learning_rate": 0.0004717541818716839,
      "loss": 3.1646,
      "step": 70496
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7658090591430664,
      "learning_rate": 0.00047175082802430093,
      "loss": 3.2597,
      "step": 70497
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5439982414245605,
      "learning_rate": 0.0004717474741449862,
      "loss": 3.2363,
      "step": 70498
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.010519027709961,
      "learning_rate": 0.0004717441202337404,
      "loss": 3.0279,
      "step": 70499
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.2274301052093506,
      "learning_rate": 0.00047174076629056416,
      "loss": 2.837,
      "step": 70500
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7845442295074463,
      "learning_rate": 0.000471737412315458,
      "loss": 3.0246,
      "step": 70501
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.544994831085205,
      "learning_rate": 0.0004717340583084226,
      "loss": 2.8147,
      "step": 70502
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.2504069805145264,
      "learning_rate": 0.00047173070426945874,
      "loss": 2.9529,
      "step": 70503
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.441345453262329,
      "learning_rate": 0.0004717273501985668,
      "loss": 2.9959,
      "step": 70504
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7119311094284058,
      "learning_rate": 0.00047172399609574745,
      "loss": 2.9487,
      "step": 70505
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.2377383708953857,
      "learning_rate": 0.0004717206419610014,
      "loss": 2.6948,
      "step": 70506
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8672888278961182,
      "learning_rate": 0.00047171728779432924,
      "loss": 3.1804,
      "step": 70507
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4061304330825806,
      "learning_rate": 0.00047171393359573166,
      "loss": 2.9658,
      "step": 70508
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6353286504745483,
      "learning_rate": 0.00047171057936520915,
      "loss": 2.938,
      "step": 70509
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5960584878921509,
      "learning_rate": 0.00047170722510276247,
      "loss": 2.9827,
      "step": 70510
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7749450206756592,
      "learning_rate": 0.00047170387080839205,
      "loss": 2.8946,
      "step": 70511
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9154750108718872,
      "learning_rate": 0.0004717005164820988,
      "loss": 3.3012,
      "step": 70512
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.570641040802002,
      "learning_rate": 0.0004716971621238831,
      "loss": 3.1551,
      "step": 70513
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6238433122634888,
      "learning_rate": 0.0004716938077337457,
      "loss": 3.0168,
      "step": 70514
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7054418325424194,
      "learning_rate": 0.00047169045331168716,
      "loss": 3.0686,
      "step": 70515
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7569797039031982,
      "learning_rate": 0.00047168709885770803,
      "loss": 3.0785,
      "step": 70516
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5491540431976318,
      "learning_rate": 0.0004716837443718091,
      "loss": 3.1204,
      "step": 70517
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.102863073348999,
      "learning_rate": 0.0004716803898539909,
      "loss": 2.9541,
      "step": 70518
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.614234209060669,
      "learning_rate": 0.00047167703530425414,
      "loss": 2.9262,
      "step": 70519
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.059600830078125,
      "learning_rate": 0.00047167368072259926,
      "loss": 3.1917,
      "step": 70520
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4811650514602661,
      "learning_rate": 0.00047167032610902714,
      "loss": 3.0794,
      "step": 70521
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.1950063705444336,
      "learning_rate": 0.00047166697146353823,
      "loss": 3.1769,
      "step": 70522
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.970752477645874,
      "learning_rate": 0.0004716636167861331,
      "loss": 2.9682,
      "step": 70523
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5245685577392578,
      "learning_rate": 0.0004716602620768126,
      "loss": 3.055,
      "step": 70524
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4452096223831177,
      "learning_rate": 0.00047165690733557715,
      "loss": 3.3273,
      "step": 70525
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9227986335754395,
      "learning_rate": 0.0004716535525624274,
      "loss": 2.8803,
      "step": 70526
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6282596588134766,
      "learning_rate": 0.00047165019775736414,
      "loss": 3.0564,
      "step": 70527
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.417466163635254,
      "learning_rate": 0.0004716468429203877,
      "loss": 2.8087,
      "step": 70528
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.590352177619934,
      "learning_rate": 0.00047164348805149895,
      "loss": 3.1519,
      "step": 70529
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4888155460357666,
      "learning_rate": 0.0004716401331506984,
      "loss": 2.9684,
      "step": 70530
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0294816493988037,
      "learning_rate": 0.0004716367782179868,
      "loss": 3.0713,
      "step": 70531
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5808078050613403,
      "learning_rate": 0.00047163342325336454,
      "loss": 2.9365,
      "step": 70532
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.549620270729065,
      "learning_rate": 0.0004716300682568325,
      "loss": 3.1349,
      "step": 70533
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8920726776123047,
      "learning_rate": 0.00047162671322839117,
      "loss": 3.1235,
      "step": 70534
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4191906452178955,
      "learning_rate": 0.00047162335816804124,
      "loss": 2.8629,
      "step": 70535
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3674694299697876,
      "learning_rate": 0.0004716200030757832,
      "loss": 2.9318,
      "step": 70536
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7585866451263428,
      "learning_rate": 0.00047161664795161777,
      "loss": 2.9848,
      "step": 70537
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7543288469314575,
      "learning_rate": 0.0004716132927955456,
      "loss": 3.0372,
      "step": 70538
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4266719818115234,
      "learning_rate": 0.0004716099376075674,
      "loss": 3.0246,
      "step": 70539
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5766338109970093,
      "learning_rate": 0.00047160658238768345,
      "loss": 3.0786,
      "step": 70540
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4227547645568848,
      "learning_rate": 0.00047160322713589464,
      "loss": 3.2391,
      "step": 70541
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.433176040649414,
      "learning_rate": 0.00047159987185220166,
      "loss": 3.1948,
      "step": 70542
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.862851619720459,
      "learning_rate": 0.00047159651653660495,
      "loss": 3.394,
      "step": 70543
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.928802251815796,
      "learning_rate": 0.0004715931611891052,
      "loss": 3.1439,
      "step": 70544
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8665324449539185,
      "learning_rate": 0.00047158980580970316,
      "loss": 3.1576,
      "step": 70545
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.091869831085205,
      "learning_rate": 0.0004715864503983992,
      "loss": 3.0144,
      "step": 70546
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9943666458129883,
      "learning_rate": 0.0004715830949551941,
      "loss": 3.114,
      "step": 70547
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4325306415557861,
      "learning_rate": 0.0004715797394800885,
      "loss": 2.9841,
      "step": 70548
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6049842834472656,
      "learning_rate": 0.000471576383973083,
      "loss": 2.9611,
      "step": 70549
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3787384033203125,
      "learning_rate": 0.00047157302843417817,
      "loss": 2.9281,
      "step": 70550
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.323495864868164,
      "learning_rate": 0.00047156967286337474,
      "loss": 3.0672,
      "step": 70551
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7111579179763794,
      "learning_rate": 0.0004715663172606732,
      "loss": 2.8429,
      "step": 70552
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4521124362945557,
      "learning_rate": 0.00047156296162607426,
      "loss": 3.1393,
      "step": 70553
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.0586488246917725,
      "learning_rate": 0.00047155960595957855,
      "loss": 3.0245,
      "step": 70554
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5085363388061523,
      "learning_rate": 0.00047155625026118665,
      "loss": 3.0038,
      "step": 70555
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.811688184738159,
      "learning_rate": 0.00047155289453089914,
      "loss": 2.9718,
      "step": 70556
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.054798126220703,
      "learning_rate": 0.0004715495387687169,
      "loss": 2.8144,
      "step": 70557
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5482993125915527,
      "learning_rate": 0.00047154618297464015,
      "loss": 3.0464,
      "step": 70558
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5259369611740112,
      "learning_rate": 0.0004715428271486698,
      "loss": 3.1715,
      "step": 70559
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.1581621170043945,
      "learning_rate": 0.0004715394712908065,
      "loss": 2.7111,
      "step": 70560
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7148070335388184,
      "learning_rate": 0.0004715361154010507,
      "loss": 3.2827,
      "step": 70561
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9407356977462769,
      "learning_rate": 0.000471532759479403,
      "loss": 2.9824,
      "step": 70562
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5945696830749512,
      "learning_rate": 0.00047152940352586424,
      "loss": 3.3265,
      "step": 70563
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.522555947303772,
      "learning_rate": 0.00047152604754043497,
      "loss": 2.9066,
      "step": 70564
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5080724954605103,
      "learning_rate": 0.0004715226915231156,
      "loss": 3.2503,
      "step": 70565
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0440144538879395,
      "learning_rate": 0.00047151933547390706,
      "loss": 2.9584,
      "step": 70566
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.594817042350769,
      "learning_rate": 0.0004715159793928098,
      "loss": 2.9813,
      "step": 70567
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.1223409175872803,
      "learning_rate": 0.0004715126232798245,
      "loss": 3.0612,
      "step": 70568
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.704828143119812,
      "learning_rate": 0.0004715092671349517,
      "loss": 2.8808,
      "step": 70569
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5929290056228638,
      "learning_rate": 0.0004715059109581922,
      "loss": 3.0851,
      "step": 70570
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7159837484359741,
      "learning_rate": 0.00047150255474954643,
      "loss": 3.082,
      "step": 70571
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5455235242843628,
      "learning_rate": 0.00047149919850901506,
      "loss": 3.0412,
      "step": 70572
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.942464828491211,
      "learning_rate": 0.0004714958422365988,
      "loss": 2.8552,
      "step": 70573
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8428120613098145,
      "learning_rate": 0.00047149248593229826,
      "loss": 3.0113,
      "step": 70574
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0211527347564697,
      "learning_rate": 0.000471489129596114,
      "loss": 2.8265,
      "step": 70575
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9043811559677124,
      "learning_rate": 0.0004714857732280467,
      "loss": 2.8446,
      "step": 70576
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7560027837753296,
      "learning_rate": 0.00047148241682809695,
      "loss": 2.9801,
      "step": 70577
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5828713178634644,
      "learning_rate": 0.0004714790603962653,
      "loss": 2.9446,
      "step": 70578
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7106982469558716,
      "learning_rate": 0.0004714757039325526,
      "loss": 3.1426,
      "step": 70579
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8607549667358398,
      "learning_rate": 0.0004714723474369592,
      "loss": 3.0323,
      "step": 70580
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5731747150421143,
      "learning_rate": 0.000471468990909486,
      "loss": 3.1829,
      "step": 70581
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.1603434085845947,
      "learning_rate": 0.0004714656343501333,
      "loss": 2.8293,
      "step": 70582
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.1876864433288574,
      "learning_rate": 0.000471462277758902,
      "loss": 2.7687,
      "step": 70583
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5068974494934082,
      "learning_rate": 0.0004714589211357926,
      "loss": 3.0574,
      "step": 70584
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7857085466384888,
      "learning_rate": 0.0004714555644808058,
      "loss": 2.9938,
      "step": 70585
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.7716305255889893,
      "learning_rate": 0.0004714522077939421,
      "loss": 2.9055,
      "step": 70586
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.730099678039551,
      "learning_rate": 0.00047144885107520225,
      "loss": 2.9031,
      "step": 70587
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.7236154079437256,
      "learning_rate": 0.0004714454943245868,
      "loss": 2.8793,
      "step": 70588
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5790343284606934,
      "learning_rate": 0.0004714421375420964,
      "loss": 3.0091,
      "step": 70589
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0832431316375732,
      "learning_rate": 0.00047143878072773167,
      "loss": 3.0501,
      "step": 70590
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.5665042400360107,
      "learning_rate": 0.00047143542388149325,
      "loss": 2.8942,
      "step": 70591
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.9948794841766357,
      "learning_rate": 0.0004714320670033817,
      "loss": 2.957,
      "step": 70592
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9231579303741455,
      "learning_rate": 0.00047142871009339774,
      "loss": 2.9053,
      "step": 70593
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.643211841583252,
      "learning_rate": 0.00047142535315154206,
      "loss": 2.9894,
      "step": 70594
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.687246322631836,
      "learning_rate": 0.00047142199617781496,
      "loss": 3.0816,
      "step": 70595
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.094994306564331,
      "learning_rate": 0.00047141863917221736,
      "loss": 3.0738,
      "step": 70596
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6133170127868652,
      "learning_rate": 0.0004714152821347498,
      "loss": 2.949,
      "step": 70597
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0159029960632324,
      "learning_rate": 0.00047141192506541287,
      "loss": 3.1473,
      "step": 70598
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.053734302520752,
      "learning_rate": 0.00047140856796420733,
      "loss": 3.2236,
      "step": 70599
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5971788167953491,
      "learning_rate": 0.00047140521083113363,
      "loss": 3.0498,
      "step": 70600
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7196706533432007,
      "learning_rate": 0.0004714018536661924,
      "loss": 2.9414,
      "step": 70601
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6619296073913574,
      "learning_rate": 0.00047139849646938436,
      "loss": 3.156,
      "step": 70602
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7031364440917969,
      "learning_rate": 0.00047139513924071027,
      "loss": 2.9904,
      "step": 70603
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5862382650375366,
      "learning_rate": 0.00047139178198017044,
      "loss": 3.2208,
      "step": 70604
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3149956464767456,
      "learning_rate": 0.0004713884246877657,
      "loss": 3.1994,
      "step": 70605
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6482255458831787,
      "learning_rate": 0.00047138506736349656,
      "loss": 3.1099,
      "step": 70606
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.0273423194885254,
      "learning_rate": 0.0004713817100073637,
      "loss": 3.1793,
      "step": 70607
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.1182734966278076,
      "learning_rate": 0.0004713783526193678,
      "loss": 3.0338,
      "step": 70608
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6851006746292114,
      "learning_rate": 0.00047137499519950943,
      "loss": 3.1604,
      "step": 70609
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.414137363433838,
      "learning_rate": 0.00047137163774778915,
      "loss": 3.1107,
      "step": 70610
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7803250551223755,
      "learning_rate": 0.0004713682802642076,
      "loss": 3.1033,
      "step": 70611
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5767371654510498,
      "learning_rate": 0.0004713649227487656,
      "loss": 3.2444,
      "step": 70612
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6172020435333252,
      "learning_rate": 0.0004713615652014635,
      "loss": 3.232,
      "step": 70613
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.4831690788269043,
      "learning_rate": 0.0004713582076223021,
      "loss": 3.0994,
      "step": 70614
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.870767831802368,
      "learning_rate": 0.000471354850011282,
      "loss": 2.9725,
      "step": 70615
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9520585536956787,
      "learning_rate": 0.00047135149236840387,
      "loss": 3.2968,
      "step": 70616
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6017811298370361,
      "learning_rate": 0.0004713481346936681,
      "loss": 3.0933,
      "step": 70617
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5617709159851074,
      "learning_rate": 0.0004713447769870755,
      "loss": 3.1958,
      "step": 70618
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8339998722076416,
      "learning_rate": 0.00047134141924862677,
      "loss": 2.9175,
      "step": 70619
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5622307062149048,
      "learning_rate": 0.00047133806147832245,
      "loss": 2.9703,
      "step": 70620
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5444186925888062,
      "learning_rate": 0.00047133470367616303,
      "loss": 3.0075,
      "step": 70621
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6788430213928223,
      "learning_rate": 0.00047133134584214926,
      "loss": 3.1201,
      "step": 70622
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6323552131652832,
      "learning_rate": 0.0004713279879762819,
      "loss": 3.2585,
      "step": 70623
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5267409086227417,
      "learning_rate": 0.00047132463007856134,
      "loss": 3.0507,
      "step": 70624
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5631812810897827,
      "learning_rate": 0.0004713212721489883,
      "loss": 3.1052,
      "step": 70625
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5453033447265625,
      "learning_rate": 0.0004713179141875634,
      "loss": 3.0245,
      "step": 70626
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9135223627090454,
      "learning_rate": 0.00047131455619428726,
      "loss": 2.9472,
      "step": 70627
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.031851053237915,
      "learning_rate": 0.00047131119816916047,
      "loss": 3.129,
      "step": 70628
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.525795578956604,
      "learning_rate": 0.0004713078401121837,
      "loss": 3.1316,
      "step": 70629
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.168668270111084,
      "learning_rate": 0.00047130448202335767,
      "loss": 3.0196,
      "step": 70630
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4952888488769531,
      "learning_rate": 0.00047130112390268285,
      "loss": 2.788,
      "step": 70631
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5822296142578125,
      "learning_rate": 0.0004712977657501599,
      "loss": 2.8345,
      "step": 70632
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5801446437835693,
      "learning_rate": 0.00047129440756578947,
      "loss": 3.1639,
      "step": 70633
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.171538829803467,
      "learning_rate": 0.00047129104934957226,
      "loss": 2.9253,
      "step": 70634
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8019770383834839,
      "learning_rate": 0.0004712876911015086,
      "loss": 3.0893,
      "step": 70635
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4798439741134644,
      "learning_rate": 0.0004712843328215995,
      "loss": 3.0183,
      "step": 70636
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7339016199111938,
      "learning_rate": 0.00047128097450984535,
      "loss": 3.3454,
      "step": 70637
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.4234225749969482,
      "learning_rate": 0.0004712776161662469,
      "loss": 3.1795,
      "step": 70638
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9841405153274536,
      "learning_rate": 0.0004712742577908046,
      "loss": 2.8066,
      "step": 70639
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6533477306365967,
      "learning_rate": 0.00047127089938351935,
      "loss": 3.2007,
      "step": 70640
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.4072787761688232,
      "learning_rate": 0.0004712675409443915,
      "loss": 2.9453,
      "step": 70641
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.3926103115081787,
      "learning_rate": 0.0004712641824734217,
      "loss": 3.321,
      "step": 70642
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9380853176116943,
      "learning_rate": 0.00047126082397061075,
      "loss": 3.0169,
      "step": 70643
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5630288124084473,
      "learning_rate": 0.0004712574654359591,
      "loss": 2.9603,
      "step": 70644
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.2590250968933105,
      "learning_rate": 0.00047125410686946754,
      "loss": 2.9161,
      "step": 70645
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5261048078536987,
      "learning_rate": 0.0004712507482711366,
      "loss": 3.1135,
      "step": 70646
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7211637496948242,
      "learning_rate": 0.0004712473896409669,
      "loss": 3.0982,
      "step": 70647
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8000831604003906,
      "learning_rate": 0.00047124403097895904,
      "loss": 3.1899,
      "step": 70648
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8929080963134766,
      "learning_rate": 0.0004712406722851138,
      "loss": 2.8352,
      "step": 70649
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5275729894638062,
      "learning_rate": 0.0004712373135594316,
      "loss": 2.9885,
      "step": 70650
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4979684352874756,
      "learning_rate": 0.0004712339548019131,
      "loss": 2.9891,
      "step": 70651
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9306269884109497,
      "learning_rate": 0.00047123059601255913,
      "loss": 2.6445,
      "step": 70652
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.182741641998291,
      "learning_rate": 0.00047122723719137,
      "loss": 3.0908,
      "step": 70653
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4749644994735718,
      "learning_rate": 0.00047122387833834655,
      "loss": 2.8681,
      "step": 70654
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.210880994796753,
      "learning_rate": 0.00047122051945348945,
      "loss": 3.0556,
      "step": 70655
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5484232902526855,
      "learning_rate": 0.00047121716053679904,
      "loss": 3.0611,
      "step": 70656
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4056193828582764,
      "learning_rate": 0.0004712138015882763,
      "loss": 3.0235,
      "step": 70657
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5571987628936768,
      "learning_rate": 0.0004712104426079216,
      "loss": 3.0146,
      "step": 70658
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.35990571975708,
      "learning_rate": 0.0004712070835957356,
      "loss": 3.1894,
      "step": 70659
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8343071937561035,
      "learning_rate": 0.000471203724551719,
      "loss": 3.231,
      "step": 70660
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.559356927871704,
      "learning_rate": 0.0004712003654758724,
      "loss": 3.0039,
      "step": 70661
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8345121145248413,
      "learning_rate": 0.00047119700636819647,
      "loss": 2.9446,
      "step": 70662
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6670695543289185,
      "learning_rate": 0.00047119364722869166,
      "loss": 3.2655,
      "step": 70663
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8002842664718628,
      "learning_rate": 0.00047119028805735886,
      "loss": 3.001,
      "step": 70664
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0120182037353516,
      "learning_rate": 0.00047118692885419854,
      "loss": 3.1614,
      "step": 70665
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0708208084106445,
      "learning_rate": 0.0004711835696192113,
      "loss": 2.998,
      "step": 70666
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9012582302093506,
      "learning_rate": 0.0004711802103523978,
      "loss": 3.0082,
      "step": 70667
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.65683913230896,
      "learning_rate": 0.0004711768510537587,
      "loss": 3.0143,
      "step": 70668
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7053723335266113,
      "learning_rate": 0.00047117349172329454,
      "loss": 2.7333,
      "step": 70669
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4724043607711792,
      "learning_rate": 0.000471170132361006,
      "loss": 3.1027,
      "step": 70670
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5625940561294556,
      "learning_rate": 0.0004711667729668937,
      "loss": 3.0202,
      "step": 70671
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.481526494026184,
      "learning_rate": 0.00047116341354095834,
      "loss": 3.0771,
      "step": 70672
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.708088755607605,
      "learning_rate": 0.00047116005408320036,
      "loss": 3.0826,
      "step": 70673
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4399067163467407,
      "learning_rate": 0.00047115669459362055,
      "loss": 2.9337,
      "step": 70674
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7469927072525024,
      "learning_rate": 0.00047115333507221946,
      "loss": 3.0948,
      "step": 70675
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6677933931350708,
      "learning_rate": 0.0004711499755189978,
      "loss": 2.9929,
      "step": 70676
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.388159990310669,
      "learning_rate": 0.00047114661593395605,
      "loss": 3.0447,
      "step": 70677
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4218336343765259,
      "learning_rate": 0.0004711432563170949,
      "loss": 3.0964,
      "step": 70678
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0758354663848877,
      "learning_rate": 0.00047113989666841517,
      "loss": 3.1412,
      "step": 70679
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.703316330909729,
      "learning_rate": 0.0004711365369879171,
      "loss": 2.8362,
      "step": 70680
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8801432847976685,
      "learning_rate": 0.00047113317727560163,
      "loss": 3.203,
      "step": 70681
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5408921241760254,
      "learning_rate": 0.00047112981753146926,
      "loss": 3.1165,
      "step": 70682
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.4409518241882324,
      "learning_rate": 0.00047112645775552054,
      "loss": 3.3594,
      "step": 70683
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7575721740722656,
      "learning_rate": 0.00047112309794775624,
      "loss": 3.16,
      "step": 70684
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5136202573776245,
      "learning_rate": 0.000471119738108177,
      "loss": 3.2671,
      "step": 70685
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.727781891822815,
      "learning_rate": 0.00047111637823678325,
      "loss": 3.2671,
      "step": 70686
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8452646732330322,
      "learning_rate": 0.0004711130183335757,
      "loss": 3.0352,
      "step": 70687
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6010140180587769,
      "learning_rate": 0.0004711096583985552,
      "loss": 3.0718,
      "step": 70688
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5988781452178955,
      "learning_rate": 0.0004711062984317221,
      "loss": 3.2478,
      "step": 70689
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.739139199256897,
      "learning_rate": 0.000471102938433077,
      "loss": 2.9571,
      "step": 70690
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5264166593551636,
      "learning_rate": 0.00047109957840262077,
      "loss": 3.0394,
      "step": 70691
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5401225090026855,
      "learning_rate": 0.00047109621834035383,
      "loss": 2.9617,
      "step": 70692
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5155452489852905,
      "learning_rate": 0.00047109285824627683,
      "loss": 3.2246,
      "step": 70693
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.7893624305725098,
      "learning_rate": 0.0004710894981203906,
      "loss": 2.8534,
      "step": 70694
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.603652000427246,
      "learning_rate": 0.0004710861379626955,
      "loss": 3.0273,
      "step": 70695
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.3746957778930664,
      "learning_rate": 0.00047108277777319215,
      "loss": 3.0461,
      "step": 70696
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6174085140228271,
      "learning_rate": 0.00047107941755188137,
      "loss": 3.1528,
      "step": 70697
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4689549207687378,
      "learning_rate": 0.00047107605729876373,
      "loss": 2.9877,
      "step": 70698
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.709381341934204,
      "learning_rate": 0.0004710726970138398,
      "loss": 2.9772,
      "step": 70699
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.710719108581543,
      "learning_rate": 0.0004710693366971103,
      "loss": 3.0881,
      "step": 70700
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6171789169311523,
      "learning_rate": 0.0004710659763485757,
      "loss": 2.7841,
      "step": 70701
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.943293809890747,
      "learning_rate": 0.0004710626159682367,
      "loss": 3.1507,
      "step": 70702
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.675327181816101,
      "learning_rate": 0.00047105925555609394,
      "loss": 3.2484,
      "step": 70703
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4006946086883545,
      "learning_rate": 0.00047105589511214815,
      "loss": 2.9576,
      "step": 70704
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6011998653411865,
      "learning_rate": 0.0004710525346363997,
      "loss": 3.1341,
      "step": 70705
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.4703917503356934,
      "learning_rate": 0.00047104917412884933,
      "loss": 2.8859,
      "step": 70706
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.1505789756774902,
      "learning_rate": 0.00047104581358949776,
      "loss": 2.928,
      "step": 70707
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0942039489746094,
      "learning_rate": 0.00047104245301834557,
      "loss": 2.9886,
      "step": 70708
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.6470377445220947,
      "learning_rate": 0.0004710390924153933,
      "loss": 3.0759,
      "step": 70709
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.893113374710083,
      "learning_rate": 0.0004710357317806417,
      "loss": 2.8511,
      "step": 70710
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3600480556488037,
      "learning_rate": 0.0004710323711140913,
      "loss": 3.17,
      "step": 70711
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7849245071411133,
      "learning_rate": 0.0004710290104157427,
      "loss": 3.2609,
      "step": 70712
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5134587287902832,
      "learning_rate": 0.0004710256496855966,
      "loss": 2.8822,
      "step": 70713
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5569891929626465,
      "learning_rate": 0.0004710222889236536,
      "loss": 3.1226,
      "step": 70714
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4167155027389526,
      "learning_rate": 0.0004710189281299144,
      "loss": 2.9804,
      "step": 70715
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5664578676223755,
      "learning_rate": 0.0004710155673043795,
      "loss": 3.0594,
      "step": 70716
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5935680866241455,
      "learning_rate": 0.0004710122064470496,
      "loss": 2.8596,
      "step": 70717
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.2500057220458984,
      "learning_rate": 0.00047100884555792525,
      "loss": 2.8783,
      "step": 70718
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8416637182235718,
      "learning_rate": 0.0004710054846370072,
      "loss": 3.1132,
      "step": 70719
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.993800401687622,
      "learning_rate": 0.0004710021236842959,
      "loss": 3.0356,
      "step": 70720
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9423545598983765,
      "learning_rate": 0.00047099876269979214,
      "loss": 2.9449,
      "step": 70721
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5150525569915771,
      "learning_rate": 0.00047099540168349656,
      "loss": 3.0425,
      "step": 70722
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8983540534973145,
      "learning_rate": 0.0004709920406354096,
      "loss": 3.0779,
      "step": 70723
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.818137288093567,
      "learning_rate": 0.000470988679555532,
      "loss": 2.9769,
      "step": 70724
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3359735012054443,
      "learning_rate": 0.0004709853184438644,
      "loss": 3.0738,
      "step": 70725
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5705828666687012,
      "learning_rate": 0.0004709819573004074,
      "loss": 3.0604,
      "step": 70726
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.1514358520507812,
      "learning_rate": 0.00047097859612516165,
      "loss": 3.0184,
      "step": 70727
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.2247140407562256,
      "learning_rate": 0.00047097523491812775,
      "loss": 3.0873,
      "step": 70728
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8387826681137085,
      "learning_rate": 0.00047097187367930635,
      "loss": 3.0019,
      "step": 70729
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7250864505767822,
      "learning_rate": 0.000470968512408698,
      "loss": 2.8913,
      "step": 70730
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.819183349609375,
      "learning_rate": 0.0004709651511063034,
      "loss": 3.1485,
      "step": 70731
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.23392391204834,
      "learning_rate": 0.00047096178977212307,
      "loss": 3.0268,
      "step": 70732
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.898071050643921,
      "learning_rate": 0.00047095842840615777,
      "loss": 3.1389,
      "step": 70733
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.41692054271698,
      "learning_rate": 0.00047095506700840816,
      "loss": 2.8664,
      "step": 70734
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.865188479423523,
      "learning_rate": 0.0004709517055788747,
      "loss": 3.1366,
      "step": 70735
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9240103960037231,
      "learning_rate": 0.000470948344117558,
      "loss": 3.1612,
      "step": 70736
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8556468486785889,
      "learning_rate": 0.00047094498262445896,
      "loss": 3.2272,
      "step": 70737
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7247439622879028,
      "learning_rate": 0.0004709416210995779,
      "loss": 3.012,
      "step": 70738
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.0079352855682373,
      "learning_rate": 0.0004709382595429156,
      "loss": 3.0173,
      "step": 70739
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9540400505065918,
      "learning_rate": 0.00047093489795447266,
      "loss": 3.172,
      "step": 70740
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3568779230117798,
      "learning_rate": 0.00047093153633424964,
      "loss": 2.8639,
      "step": 70741
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.265418291091919,
      "learning_rate": 0.00047092817468224725,
      "loss": 2.8491,
      "step": 70742
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.965871810913086,
      "learning_rate": 0.0004709248129984661,
      "loss": 2.9688,
      "step": 70743
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8906571865081787,
      "learning_rate": 0.00047092145128290684,
      "loss": 2.8695,
      "step": 70744
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5985219478607178,
      "learning_rate": 0.00047091808953556995,
      "loss": 3.2085,
      "step": 70745
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.347168445587158,
      "learning_rate": 0.0004709147277564563,
      "loss": 2.9337,
      "step": 70746
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9192304611206055,
      "learning_rate": 0.00047091136594556624,
      "loss": 3.058,
      "step": 70747
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.117716073989868,
      "learning_rate": 0.0004709080041029006,
      "loss": 3.0773,
      "step": 70748
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6661715507507324,
      "learning_rate": 0.00047090464222845994,
      "loss": 3.1814,
      "step": 70749
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.9091250896453857,
      "learning_rate": 0.0004709012803222449,
      "loss": 3.193,
      "step": 70750
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.7658028602600098,
      "learning_rate": 0.00047089791838425603,
      "loss": 3.1525,
      "step": 70751
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5674915313720703,
      "learning_rate": 0.00047089455641449404,
      "loss": 3.0776,
      "step": 70752
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.3232181072235107,
      "learning_rate": 0.00047089119441295945,
      "loss": 2.978,
      "step": 70753
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0716991424560547,
      "learning_rate": 0.000470887832379653,
      "loss": 3.1107,
      "step": 70754
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.105496406555176,
      "learning_rate": 0.00047088447031457534,
      "loss": 2.9952,
      "step": 70755
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7387317419052124,
      "learning_rate": 0.000470881108217727,
      "loss": 2.7463,
      "step": 70756
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.1989991664886475,
      "learning_rate": 0.00047087774608910865,
      "loss": 3.0431,
      "step": 70757
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.274512767791748,
      "learning_rate": 0.000470874383928721,
      "loss": 2.5919,
      "step": 70758
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6984506845474243,
      "learning_rate": 0.0004708710217365644,
      "loss": 2.7928,
      "step": 70759
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6364268064498901,
      "learning_rate": 0.0004708676595126397,
      "loss": 2.8636,
      "step": 70760
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.3282999992370605,
      "learning_rate": 0.00047086429725694756,
      "loss": 3.1926,
      "step": 70761
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6510193347930908,
      "learning_rate": 0.0004708609349694884,
      "loss": 3.0174,
      "step": 70762
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.885035991668701,
      "learning_rate": 0.00047085757265026296,
      "loss": 2.8206,
      "step": 70763
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9519063234329224,
      "learning_rate": 0.000470854210299272,
      "loss": 3.1152,
      "step": 70764
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5305030345916748,
      "learning_rate": 0.00047085084791651595,
      "loss": 3.0432,
      "step": 70765
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3753741979599,
      "learning_rate": 0.00047084748550199547,
      "loss": 3.1181,
      "step": 70766
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.448055624961853,
      "learning_rate": 0.00047084412305571123,
      "loss": 2.9627,
      "step": 70767
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.664824366569519,
      "learning_rate": 0.00047084076057766393,
      "loss": 3.0376,
      "step": 70768
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.1063263416290283,
      "learning_rate": 0.000470837398067854,
      "loss": 2.9713,
      "step": 70769
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.574657917022705,
      "learning_rate": 0.0004708340355262822,
      "loss": 2.9345,
      "step": 70770
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3887871503829956,
      "learning_rate": 0.00047083067295294916,
      "loss": 3.2274,
      "step": 70771
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4973260164260864,
      "learning_rate": 0.0004708273103478554,
      "loss": 2.9849,
      "step": 70772
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5047601461410522,
      "learning_rate": 0.0004708239477110017,
      "loss": 3.1321,
      "step": 70773
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4894384145736694,
      "learning_rate": 0.0004708205850423886,
      "loss": 3.0143,
      "step": 70774
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5049821138381958,
      "learning_rate": 0.0004708172223420167,
      "loss": 2.9345,
      "step": 70775
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8204333782196045,
      "learning_rate": 0.00047081385960988663,
      "loss": 2.9899,
      "step": 70776
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0582098960876465,
      "learning_rate": 0.00047081049684599913,
      "loss": 3.0552,
      "step": 70777
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6074573993682861,
      "learning_rate": 0.00047080713405035465,
      "loss": 3.3555,
      "step": 70778
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5992709398269653,
      "learning_rate": 0.0004708037712229539,
      "loss": 2.8932,
      "step": 70779
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.1423254013061523,
      "learning_rate": 0.00047080040836379754,
      "loss": 3.0135,
      "step": 70780
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8217617273330688,
      "learning_rate": 0.00047079704547288616,
      "loss": 2.9245,
      "step": 70781
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8604850769042969,
      "learning_rate": 0.00047079368255022034,
      "loss": 3.0683,
      "step": 70782
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8396180868148804,
      "learning_rate": 0.00047079031959580084,
      "loss": 3.0546,
      "step": 70783
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8933606147766113,
      "learning_rate": 0.0004707869566096281,
      "loss": 3.0663,
      "step": 70784
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6091382503509521,
      "learning_rate": 0.00047078359359170287,
      "loss": 2.8572,
      "step": 70785
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7942737340927124,
      "learning_rate": 0.0004707802305420258,
      "loss": 2.9174,
      "step": 70786
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9907801151275635,
      "learning_rate": 0.0004707768674605974,
      "loss": 3.1098,
      "step": 70787
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.908685326576233,
      "learning_rate": 0.0004707735043474184,
      "loss": 3.2023,
      "step": 70788
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8364989757537842,
      "learning_rate": 0.0004707701412024893,
      "loss": 3.2548,
      "step": 70789
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0078094005584717,
      "learning_rate": 0.0004707667780258109,
      "loss": 2.7205,
      "step": 70790
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5322651863098145,
      "learning_rate": 0.00047076341481738364,
      "loss": 2.9703,
      "step": 70791
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5703150033950806,
      "learning_rate": 0.0004707600515772083,
      "loss": 3.2371,
      "step": 70792
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7866473197937012,
      "learning_rate": 0.00047075668830528545,
      "loss": 2.9297,
      "step": 70793
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4672695398330688,
      "learning_rate": 0.00047075332500161565,
      "loss": 2.977,
      "step": 70794
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5635225772857666,
      "learning_rate": 0.00047074996166619965,
      "loss": 2.9452,
      "step": 70795
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7023688554763794,
      "learning_rate": 0.00047074659829903805,
      "loss": 3.2055,
      "step": 70796
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.49339759349823,
      "learning_rate": 0.00047074323490013133,
      "loss": 3.062,
      "step": 70797
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4532809257507324,
      "learning_rate": 0.00047073987146948026,
      "loss": 3.0968,
      "step": 70798
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7131284475326538,
      "learning_rate": 0.00047073650800708543,
      "loss": 2.7238,
      "step": 70799
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6503785848617554,
      "learning_rate": 0.0004707331445129475,
      "loss": 3.1016,
      "step": 70800
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4268971681594849,
      "learning_rate": 0.00047072978098706704,
      "loss": 3.2405,
      "step": 70801
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7393704652786255,
      "learning_rate": 0.0004707264174294446,
      "loss": 3.0838,
      "step": 70802
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8440629243850708,
      "learning_rate": 0.000470723053840081,
      "loss": 3.2774,
      "step": 70803
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7154401540756226,
      "learning_rate": 0.00047071969021897673,
      "loss": 2.9422,
      "step": 70804
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.4407339096069336,
      "learning_rate": 0.0004707163265661323,
      "loss": 3.2056,
      "step": 70805
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.878387689590454,
      "learning_rate": 0.00047071296288154867,
      "loss": 3.1259,
      "step": 70806
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.165637493133545,
      "learning_rate": 0.00047070959916522626,
      "loss": 3.1622,
      "step": 70807
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.699937582015991,
      "learning_rate": 0.0004707062354171657,
      "loss": 3.1359,
      "step": 70808
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7768505811691284,
      "learning_rate": 0.00047070287163736757,
      "loss": 2.9467,
      "step": 70809
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6028521060943604,
      "learning_rate": 0.00047069950782583264,
      "loss": 3.0782,
      "step": 70810
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0857596397399902,
      "learning_rate": 0.0004706961439825614,
      "loss": 2.9644,
      "step": 70811
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8417896032333374,
      "learning_rate": 0.00047069278010755444,
      "loss": 3.0937,
      "step": 70812
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.9040942192077637,
      "learning_rate": 0.00047068941620081263,
      "loss": 2.6245,
      "step": 70813
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6908559799194336,
      "learning_rate": 0.00047068605226233633,
      "loss": 3.1446,
      "step": 70814
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.21002459526062,
      "learning_rate": 0.0004706826882921263,
      "loss": 3.2024,
      "step": 70815
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.831745147705078,
      "learning_rate": 0.0004706793242901832,
      "loss": 3.1088,
      "step": 70816
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.9286141395568848,
      "learning_rate": 0.0004706759602565075,
      "loss": 2.9698,
      "step": 70817
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9244962930679321,
      "learning_rate": 0.0004706725961910999,
      "loss": 3.1291,
      "step": 70818
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.410832405090332,
      "learning_rate": 0.00047066923209396113,
      "loss": 2.8481,
      "step": 70819
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7959753274917603,
      "learning_rate": 0.00047066586796509167,
      "loss": 3.0248,
      "step": 70820
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.173288583755493,
      "learning_rate": 0.0004706625038044922,
      "loss": 3.1417,
      "step": 70821
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7948174476623535,
      "learning_rate": 0.0004706591396121634,
      "loss": 3.1269,
      "step": 70822
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.3252735137939453,
      "learning_rate": 0.0004706557753881058,
      "loss": 3.3003,
      "step": 70823
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.3791940212249756,
      "learning_rate": 0.0004706524111323201,
      "loss": 2.7729,
      "step": 70824
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7600139379501343,
      "learning_rate": 0.00047064904684480696,
      "loss": 2.9535,
      "step": 70825
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4983983039855957,
      "learning_rate": 0.00047064568252556683,
      "loss": 3.0352,
      "step": 70826
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0822558403015137,
      "learning_rate": 0.00047064231817460045,
      "loss": 3.1386,
      "step": 70827
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.6848931312561035,
      "learning_rate": 0.00047063895379190847,
      "loss": 2.975,
      "step": 70828
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5907319784164429,
      "learning_rate": 0.00047063558937749154,
      "loss": 3.066,
      "step": 70829
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8373044729232788,
      "learning_rate": 0.0004706322249313501,
      "loss": 2.9543,
      "step": 70830
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.2438514232635498,
      "learning_rate": 0.000470628860453485,
      "loss": 2.707,
      "step": 70831
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6221107244491577,
      "learning_rate": 0.0004706254959438968,
      "loss": 2.8999,
      "step": 70832
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0330698490142822,
      "learning_rate": 0.00047062213140258605,
      "loss": 3.3647,
      "step": 70833
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6702473163604736,
      "learning_rate": 0.0004706187668295534,
      "loss": 3.0194,
      "step": 70834
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.196444272994995,
      "learning_rate": 0.0004706154022247996,
      "loss": 2.9959,
      "step": 70835
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8833194971084595,
      "learning_rate": 0.0004706120375883251,
      "loss": 3.1604,
      "step": 70836
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5627868175506592,
      "learning_rate": 0.00047060867292013055,
      "loss": 3.1276,
      "step": 70837
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.842929482460022,
      "learning_rate": 0.0004706053082202168,
      "loss": 2.7995,
      "step": 70838
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.113039970397949,
      "learning_rate": 0.00047060194348858415,
      "loss": 3.0313,
      "step": 70839
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9308887720108032,
      "learning_rate": 0.0004705985787252334,
      "loss": 3.0034,
      "step": 70840
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.557140588760376,
      "learning_rate": 0.0004705952139301652,
      "loss": 2.9777,
      "step": 70841
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.1663100719451904,
      "learning_rate": 0.0004705918491033802,
      "loss": 2.8903,
      "step": 70842
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9760326147079468,
      "learning_rate": 0.00047058848424487876,
      "loss": 3.0615,
      "step": 70843
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.614626169204712,
      "learning_rate": 0.0004705851193546618,
      "loss": 3.3127,
      "step": 70844
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7486190795898438,
      "learning_rate": 0.00047058175443272986,
      "loss": 3.0856,
      "step": 70845
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.5158205032348633,
      "learning_rate": 0.0004705783894790836,
      "loss": 3.026,
      "step": 70846
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0533409118652344,
      "learning_rate": 0.0004705750244937235,
      "loss": 3.1875,
      "step": 70847
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6479034423828125,
      "learning_rate": 0.0004705716594766503,
      "loss": 2.9268,
      "step": 70848
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6319005489349365,
      "learning_rate": 0.0004705682944278646,
      "loss": 3.165,
      "step": 70849
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3179506063461304,
      "learning_rate": 0.0004705649293473671,
      "loss": 2.9853,
      "step": 70850
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5550665855407715,
      "learning_rate": 0.0004705615642351583,
      "loss": 3.1391,
      "step": 70851
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3580681085586548,
      "learning_rate": 0.00047055819909123893,
      "loss": 3.0462,
      "step": 70852
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5404229164123535,
      "learning_rate": 0.00047055483391560956,
      "loss": 2.914,
      "step": 70853
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6171852350234985,
      "learning_rate": 0.0004705514687082708,
      "loss": 2.6579,
      "step": 70854
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8797187805175781,
      "learning_rate": 0.0004705481034692233,
      "loss": 3.0158,
      "step": 70855
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5227222442626953,
      "learning_rate": 0.00047054473819846783,
      "loss": 3.0248,
      "step": 70856
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5324907302856445,
      "learning_rate": 0.0004705413728960047,
      "loss": 2.9997,
      "step": 70857
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3761063814163208,
      "learning_rate": 0.00047053800756183473,
      "loss": 3.1476,
      "step": 70858
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3810917139053345,
      "learning_rate": 0.0004705346421959586,
      "loss": 2.9395,
      "step": 70859
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8529977798461914,
      "learning_rate": 0.0004705312767983768,
      "loss": 3.0971,
      "step": 70860
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7587107419967651,
      "learning_rate": 0.00047052791136909005,
      "loss": 2.937,
      "step": 70861
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7854282855987549,
      "learning_rate": 0.000470524545908099,
      "loss": 3.0204,
      "step": 70862
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.576963186264038,
      "learning_rate": 0.00047052118041540403,
      "loss": 2.8385,
      "step": 70863
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5844695568084717,
      "learning_rate": 0.00047051781489100607,
      "loss": 2.9531,
      "step": 70864
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.116457462310791,
      "learning_rate": 0.00047051444933490576,
      "loss": 3.0098,
      "step": 70865
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9437313079833984,
      "learning_rate": 0.00047051108374710344,
      "loss": 2.8708,
      "step": 70866
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6791644096374512,
      "learning_rate": 0.00047050771812759987,
      "loss": 3.0219,
      "step": 70867
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.740999460220337,
      "learning_rate": 0.0004705043524763958,
      "loss": 2.836,
      "step": 70868
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.865067481994629,
      "learning_rate": 0.0004705009867934917,
      "loss": 3.0194,
      "step": 70869
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4472014904022217,
      "learning_rate": 0.00047049762107888814,
      "loss": 2.9131,
      "step": 70870
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.743107557296753,
      "learning_rate": 0.0004704942553325861,
      "loss": 3.2575,
      "step": 70871
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6208034753799438,
      "learning_rate": 0.00047049088955458576,
      "loss": 2.9692,
      "step": 70872
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6080842018127441,
      "learning_rate": 0.00047048752374488796,
      "loss": 3.1912,
      "step": 70873
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0558700561523438,
      "learning_rate": 0.00047048415790349336,
      "loss": 3.1258,
      "step": 70874
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5593780279159546,
      "learning_rate": 0.0004704807920304026,
      "loss": 2.8936,
      "step": 70875
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4123704433441162,
      "learning_rate": 0.0004704774261256161,
      "loss": 3.17,
      "step": 70876
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4784996509552002,
      "learning_rate": 0.00047047406018913473,
      "loss": 3.0001,
      "step": 70877
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.1544721126556396,
      "learning_rate": 0.00047047069422095894,
      "loss": 3.0694,
      "step": 70878
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5022515058517456,
      "learning_rate": 0.00047046732822108954,
      "loss": 2.9711,
      "step": 70879
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.901330828666687,
      "learning_rate": 0.00047046396218952696,
      "loss": 2.9253,
      "step": 70880
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7117029428482056,
      "learning_rate": 0.0004704605961262719,
      "loss": 2.9493,
      "step": 70881
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.5749590396881104,
      "learning_rate": 0.000470457230031325,
      "loss": 2.928,
      "step": 70882
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.7883412837982178,
      "learning_rate": 0.0004704538639046869,
      "loss": 3.0725,
      "step": 70883
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5207165479660034,
      "learning_rate": 0.0004704504977463583,
      "loss": 3.0674,
      "step": 70884
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6531482934951782,
      "learning_rate": 0.00047044713155633964,
      "loss": 3.1876,
      "step": 70885
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.878358244895935,
      "learning_rate": 0.0004704437653346317,
      "loss": 3.1647,
      "step": 70886
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8986471891403198,
      "learning_rate": 0.00047044039908123494,
      "loss": 3.2181,
      "step": 70887
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6997506618499756,
      "learning_rate": 0.00047043703279615016,
      "loss": 2.9737,
      "step": 70888
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5583571195602417,
      "learning_rate": 0.00047043366647937795,
      "loss": 3.1131,
      "step": 70889
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6314576864242554,
      "learning_rate": 0.00047043030013091885,
      "loss": 3.0588,
      "step": 70890
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3578846454620361,
      "learning_rate": 0.00047042693375077356,
      "loss": 2.9023,
      "step": 70891
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.607996940612793,
      "learning_rate": 0.00047042356733894273,
      "loss": 2.8993,
      "step": 70892
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4722261428833008,
      "learning_rate": 0.00047042020089542685,
      "loss": 3.111,
      "step": 70893
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6046154499053955,
      "learning_rate": 0.0004704168344202267,
      "loss": 2.7621,
      "step": 70894
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4240328073501587,
      "learning_rate": 0.0004704134679133428,
      "loss": 3.2468,
      "step": 70895
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4239978790283203,
      "learning_rate": 0.00047041010137477586,
      "loss": 3.2971,
      "step": 70896
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8558449745178223,
      "learning_rate": 0.00047040673480452645,
      "loss": 3.147,
      "step": 70897
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.711658000946045,
      "learning_rate": 0.0004704033682025952,
      "loss": 3.0591,
      "step": 70898
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4649059772491455,
      "learning_rate": 0.0004704000015689828,
      "loss": 3.1747,
      "step": 70899
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4798880815505981,
      "learning_rate": 0.0004703966349036898,
      "loss": 2.803,
      "step": 70900
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7565038204193115,
      "learning_rate": 0.00047039326820671676,
      "loss": 3.2079,
      "step": 70901
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8465245962142944,
      "learning_rate": 0.0004703899014780645,
      "loss": 3.0135,
      "step": 70902
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.627931833267212,
      "learning_rate": 0.00047038653471773357,
      "loss": 2.9901,
      "step": 70903
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.630281925201416,
      "learning_rate": 0.00047038316792572443,
      "loss": 3.2129,
      "step": 70904
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4091174602508545,
      "learning_rate": 0.000470379801102038,
      "loss": 3.0615,
      "step": 70905
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.517752170562744,
      "learning_rate": 0.00047037643424667464,
      "loss": 2.8992,
      "step": 70906
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9393219947814941,
      "learning_rate": 0.0004703730673596351,
      "loss": 3.0584,
      "step": 70907
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3226120471954346,
      "learning_rate": 0.00047036970044092005,
      "loss": 3.0953,
      "step": 70908
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6602636575698853,
      "learning_rate": 0.00047036633349053,
      "loss": 3.2051,
      "step": 70909
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6526836156845093,
      "learning_rate": 0.00047036296650846565,
      "loss": 3.0944,
      "step": 70910
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3520265817642212,
      "learning_rate": 0.00047035959949472757,
      "loss": 2.711,
      "step": 70911
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5687909126281738,
      "learning_rate": 0.0004703562324493165,
      "loss": 3.2528,
      "step": 70912
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5734130144119263,
      "learning_rate": 0.00047035286537223286,
      "loss": 3.1616,
      "step": 70913
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5759732723236084,
      "learning_rate": 0.0004703494982634776,
      "loss": 3.0076,
      "step": 70914
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4482944011688232,
      "learning_rate": 0.00047034613112305096,
      "loss": 2.8968,
      "step": 70915
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.50955069065094,
      "learning_rate": 0.0004703427639509539,
      "loss": 3.1851,
      "step": 70916
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5257794857025146,
      "learning_rate": 0.00047033939674718687,
      "loss": 3.1578,
      "step": 70917
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7414542436599731,
      "learning_rate": 0.0004703360295117505,
      "loss": 2.8807,
      "step": 70918
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6291614770889282,
      "learning_rate": 0.00047033266224464547,
      "loss": 3.0531,
      "step": 70919
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.69748854637146,
      "learning_rate": 0.0004703292949458724,
      "loss": 2.9845,
      "step": 70920
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7298308610916138,
      "learning_rate": 0.0004703259276154318,
      "loss": 3.1125,
      "step": 70921
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7076754570007324,
      "learning_rate": 0.00047032256025332454,
      "loss": 3.1329,
      "step": 70922
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5993373394012451,
      "learning_rate": 0.000470319192859551,
      "loss": 3.0521,
      "step": 70923
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.7304701805114746,
      "learning_rate": 0.00047031582543411197,
      "loss": 2.813,
      "step": 70924
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5311565399169922,
      "learning_rate": 0.0004703124579770079,
      "loss": 3.1075,
      "step": 70925
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6573604345321655,
      "learning_rate": 0.0004703090904882396,
      "loss": 3.1474,
      "step": 70926
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8301821947097778,
      "learning_rate": 0.00047030572296780765,
      "loss": 3.1676,
      "step": 70927
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9738341569900513,
      "learning_rate": 0.0004703023554157126,
      "loss": 2.9026,
      "step": 70928
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.1652414798736572,
      "learning_rate": 0.0004702989878319552,
      "loss": 3.0111,
      "step": 70929
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.720076560974121,
      "learning_rate": 0.000470295620216536,
      "loss": 3.1824,
      "step": 70930
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.2218194007873535,
      "learning_rate": 0.00047029225256945556,
      "loss": 2.8762,
      "step": 70931
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.527964472770691,
      "learning_rate": 0.0004702888848907146,
      "loss": 3.0928,
      "step": 70932
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7605695724487305,
      "learning_rate": 0.00047028551718031374,
      "loss": 2.9254,
      "step": 70933
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.800157308578491,
      "learning_rate": 0.0004702821494382536,
      "loss": 2.8806,
      "step": 70934
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.543250560760498,
      "learning_rate": 0.00047027878166453483,
      "loss": 3.1954,
      "step": 70935
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0494346618652344,
      "learning_rate": 0.00047027541385915786,
      "loss": 2.6903,
      "step": 70936
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7708739042282104,
      "learning_rate": 0.00047027204602212365,
      "loss": 2.9338,
      "step": 70937
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5497969388961792,
      "learning_rate": 0.00047026867815343253,
      "loss": 3.1259,
      "step": 70938
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4581011533737183,
      "learning_rate": 0.0004702653102530853,
      "loss": 3.0268,
      "step": 70939
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.1534950733184814,
      "learning_rate": 0.0004702619423210825,
      "loss": 2.9169,
      "step": 70940
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8648414611816406,
      "learning_rate": 0.00047025857435742494,
      "loss": 2.923,
      "step": 70941
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7526816129684448,
      "learning_rate": 0.00047025520636211293,
      "loss": 3.025,
      "step": 70942
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.275228977203369,
      "learning_rate": 0.0004702518383351473,
      "loss": 3.4611,
      "step": 70943
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6318998336791992,
      "learning_rate": 0.0004702484702765287,
      "loss": 2.869,
      "step": 70944
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7873046398162842,
      "learning_rate": 0.0004702451021862576,
      "loss": 3.2634,
      "step": 70945
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8353307247161865,
      "learning_rate": 0.0004702417340643348,
      "loss": 3.1374,
      "step": 70946
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6317659616470337,
      "learning_rate": 0.0004702383659107609,
      "loss": 3.1515,
      "step": 70947
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5251976251602173,
      "learning_rate": 0.0004702349977255363,
      "loss": 2.8778,
      "step": 70948
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4537396430969238,
      "learning_rate": 0.00047023162950866193,
      "loss": 2.9397,
      "step": 70949
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9187499284744263,
      "learning_rate": 0.00047022826126013825,
      "loss": 2.9486,
      "step": 70950
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8392839431762695,
      "learning_rate": 0.00047022489297996595,
      "loss": 2.8953,
      "step": 70951
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.4566919803619385,
      "learning_rate": 0.0004702215246681456,
      "loss": 2.8265,
      "step": 70952
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8721168041229248,
      "learning_rate": 0.00047021815632467787,
      "loss": 3.245,
      "step": 70953
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3182132244110107,
      "learning_rate": 0.0004702147879495633,
      "loss": 3.164,
      "step": 70954
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7248843908309937,
      "learning_rate": 0.0004702114195428027,
      "loss": 3.1924,
      "step": 70955
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6029821634292603,
      "learning_rate": 0.0004702080511043965,
      "loss": 2.9044,
      "step": 70956
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4874305725097656,
      "learning_rate": 0.0004702046826343455,
      "loss": 2.9541,
      "step": 70957
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.6293156147003174,
      "learning_rate": 0.0004702013141326502,
      "loss": 2.9206,
      "step": 70958
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.693555235862732,
      "learning_rate": 0.00047019794559931125,
      "loss": 3.244,
      "step": 70959
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.441535234451294,
      "learning_rate": 0.00047019457703432923,
      "loss": 2.6942,
      "step": 70960
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0183513164520264,
      "learning_rate": 0.0004701912084377049,
      "loss": 2.8824,
      "step": 70961
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7776683568954468,
      "learning_rate": 0.00047018783980943884,
      "loss": 3.0363,
      "step": 70962
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7325174808502197,
      "learning_rate": 0.00047018447114953156,
      "loss": 3.1235,
      "step": 70963
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7963049411773682,
      "learning_rate": 0.0004701811024579838,
      "loss": 3.2388,
      "step": 70964
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7838977575302124,
      "learning_rate": 0.00047017773373479613,
      "loss": 3.0606,
      "step": 70965
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5403003692626953,
      "learning_rate": 0.0004701743649799693,
      "loss": 2.7977,
      "step": 70966
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5888375043869019,
      "learning_rate": 0.0004701709961935038,
      "loss": 2.8763,
      "step": 70967
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0912585258483887,
      "learning_rate": 0.00047016762737540023,
      "loss": 3.1251,
      "step": 70968
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0310847759246826,
      "learning_rate": 0.00047016425852565945,
      "loss": 3.0373,
      "step": 70969
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4098646640777588,
      "learning_rate": 0.0004701608896442817,
      "loss": 3.3051,
      "step": 70970
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4690920114517212,
      "learning_rate": 0.00047015752073126797,
      "loss": 2.8735,
      "step": 70971
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7841640710830688,
      "learning_rate": 0.0004701541517866188,
      "loss": 2.7414,
      "step": 70972
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5639684200286865,
      "learning_rate": 0.00047015078281033464,
      "loss": 2.9447,
      "step": 70973
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.2132229804992676,
      "learning_rate": 0.00047014741380241624,
      "loss": 2.8956,
      "step": 70974
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0397863388061523,
      "learning_rate": 0.0004701440447628643,
      "loss": 2.8839,
      "step": 70975
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7123223543167114,
      "learning_rate": 0.00047014067569167933,
      "loss": 2.8883,
      "step": 70976
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9281306266784668,
      "learning_rate": 0.000470137306588862,
      "loss": 3.0559,
      "step": 70977
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.04194712638855,
      "learning_rate": 0.000470133937454413,
      "loss": 3.3993,
      "step": 70978
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.795588970184326,
      "learning_rate": 0.0004701305682883327,
      "loss": 3.081,
      "step": 70979
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6552118062973022,
      "learning_rate": 0.00047012719909062217,
      "loss": 3.0738,
      "step": 70980
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.3996031284332275,
      "learning_rate": 0.0004701238298612816,
      "loss": 2.9844,
      "step": 70981
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9894657135009766,
      "learning_rate": 0.0004701204606003119,
      "loss": 3.0846,
      "step": 70982
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5711071491241455,
      "learning_rate": 0.0004701170913077135,
      "loss": 3.0582,
      "step": 70983
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3424139022827148,
      "learning_rate": 0.00047011372198348724,
      "loss": 3.0224,
      "step": 70984
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6188112497329712,
      "learning_rate": 0.0004701103526276335,
      "loss": 2.9268,
      "step": 70985
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8343406915664673,
      "learning_rate": 0.00047010698324015306,
      "loss": 3.3696,
      "step": 70986
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8081239461898804,
      "learning_rate": 0.0004701036138210466,
      "loss": 2.8848,
      "step": 70987
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7325578927993774,
      "learning_rate": 0.00047010024437031466,
      "loss": 2.9763,
      "step": 70988
    },
    {
      "epoch": 0.92,
      "grad_norm": 4.07610559463501,
      "learning_rate": 0.0004700968748879578,
      "loss": 3.0353,
      "step": 70989
    },
    {
      "epoch": 0.92,
      "grad_norm": 4.805378437042236,
      "learning_rate": 0.00047009350537397676,
      "loss": 3.1487,
      "step": 70990
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.5822741985321045,
      "learning_rate": 0.0004700901358283721,
      "loss": 2.9337,
      "step": 70991
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5820441246032715,
      "learning_rate": 0.00047008676625114444,
      "loss": 2.7779,
      "step": 70992
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.4185569286346436,
      "learning_rate": 0.00047008339664229463,
      "loss": 3.2284,
      "step": 70993
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.7486140727996826,
      "learning_rate": 0.0004700800270018229,
      "loss": 2.7764,
      "step": 70994
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.782944440841675,
      "learning_rate": 0.0004700766573297302,
      "loss": 2.9937,
      "step": 70995
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5350465774536133,
      "learning_rate": 0.00047007328762601697,
      "loss": 2.8765,
      "step": 70996
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.3300423622131348,
      "learning_rate": 0.0004700699178906839,
      "loss": 2.8895,
      "step": 70997
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.7591168880462646,
      "learning_rate": 0.0004700665481237317,
      "loss": 3.0362,
      "step": 70998
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8304980993270874,
      "learning_rate": 0.00047006317832516085,
      "loss": 3.0137,
      "step": 70999
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.7158641815185547,
      "learning_rate": 0.00047005980849497203,
      "loss": 3.0677,
      "step": 71000
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6158804893493652,
      "learning_rate": 0.0004700564386331659,
      "loss": 2.9155,
      "step": 71001
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.005255937576294,
      "learning_rate": 0.0004700530687397431,
      "loss": 2.9372,
      "step": 71002
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.0157296657562256,
      "learning_rate": 0.00047004969881470424,
      "loss": 3.007,
      "step": 71003
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9355801343917847,
      "learning_rate": 0.0004700463288580499,
      "loss": 3.0752,
      "step": 71004
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9148588180541992,
      "learning_rate": 0.0004700429588697807,
      "loss": 3.0703,
      "step": 71005
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5120316743850708,
      "learning_rate": 0.0004700395888498974,
      "loss": 3.2028,
      "step": 71006
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8143318891525269,
      "learning_rate": 0.00047003621879840047,
      "loss": 2.9945,
      "step": 71007
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.579722285270691,
      "learning_rate": 0.0004700328487152906,
      "loss": 2.9458,
      "step": 71008
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4611423015594482,
      "learning_rate": 0.00047002947860056837,
      "loss": 3.2089,
      "step": 71009
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.4344544410705566,
      "learning_rate": 0.00047002610845423443,
      "loss": 2.9532,
      "step": 71010
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.810154914855957,
      "learning_rate": 0.0004700227382762896,
      "loss": 2.9602,
      "step": 71011
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6431877613067627,
      "learning_rate": 0.00047001936806673417,
      "loss": 3.0209,
      "step": 71012
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.697321891784668,
      "learning_rate": 0.000470015997825569,
      "loss": 2.942,
      "step": 71013
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6581459045410156,
      "learning_rate": 0.0004700126275527946,
      "loss": 2.8402,
      "step": 71014
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8652900457382202,
      "learning_rate": 0.0004700092572484117,
      "loss": 2.9611,
      "step": 71015
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.9391682147979736,
      "learning_rate": 0.00047000588691242085,
      "loss": 2.9038,
      "step": 71016
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.8345344066619873,
      "learning_rate": 0.00047000251654482267,
      "loss": 3.159,
      "step": 71017
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.026811122894287,
      "learning_rate": 0.0004699991461456179,
      "loss": 3.099,
      "step": 71018
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.774301290512085,
      "learning_rate": 0.0004699957757148069,
      "loss": 3.0236,
      "step": 71019
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.0321881771087646,
      "learning_rate": 0.0004699924052523907,
      "loss": 3.287,
      "step": 71020
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.461317777633667,
      "learning_rate": 0.00046998903475836955,
      "loss": 3.1632,
      "step": 71021
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.7624006271362305,
      "learning_rate": 0.00046998566423274424,
      "loss": 3.0272,
      "step": 71022
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4921011924743652,
      "learning_rate": 0.0004699822936755154,
      "loss": 3.0164,
      "step": 71023
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.6407536268234253,
      "learning_rate": 0.0004699789230866837,
      "loss": 3.0348,
      "step": 71024
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.032514810562134,
      "learning_rate": 0.0004699755524662497,
      "loss": 2.8289,
      "step": 71025
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.079237461090088,
      "learning_rate": 0.000469972181814214,
      "loss": 2.7579,
      "step": 71026
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.4021284580230713,
      "learning_rate": 0.0004699688111305772,
      "loss": 3.1853,
      "step": 71027
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.1359076499938965,
      "learning_rate": 0.00046996544041534013,
      "loss": 3.1342,
      "step": 71028
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.8153812885284424,
      "learning_rate": 0.0004699620696685032,
      "loss": 3.0075,
      "step": 71029
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.020829200744629,
      "learning_rate": 0.0004699586988900671,
      "loss": 3.1607,
      "step": 71030
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.540502905845642,
      "learning_rate": 0.00046995532808003247,
      "loss": 2.9399,
      "step": 71031
    },
    {
      "epoch": 0.92,
      "grad_norm": 3.3048408031463623,
      "learning_rate": 0.00046995195723839996,
      "loss": 3.148,
      "step": 71032
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.390458345413208,
      "learning_rate": 0.00046994858636517016,
      "loss": 3.2038,
      "step": 71033
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.5612293481826782,
      "learning_rate": 0.0004699452154603438,
      "loss": 2.991,
      "step": 71034
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.819090723991394,
      "learning_rate": 0.0004699418445239212,
      "loss": 3.162,
      "step": 71035
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.3060460090637207,
      "learning_rate": 0.00046993847355590346,
      "loss": 2.8928,
      "step": 71036
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.3484221696853638,
      "learning_rate": 0.00046993510255629076,
      "loss": 2.9081,
      "step": 71037
    },
    {
      "epoch": 0.92,
      "grad_norm": 1.727663278579712,
      "learning_rate": 0.0004699317315250839,
      "loss": 3.1433,
      "step": 71038
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.2836008071899414,
      "learning_rate": 0.0004699283604622837,
      "loss": 3.1117,
      "step": 71039
    },
    {
      "epoch": 0.92,
      "grad_norm": 2.8225204944610596,
      "learning_rate": 0.0004699249893678905,
      "loss": 3.1422,
      "step": 71040
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4997241497039795,
      "learning_rate": 0.00046992161824190503,
      "loss": 3.0819,
      "step": 71041
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7788187265396118,
      "learning_rate": 0.00046991824708432793,
      "loss": 3.0456,
      "step": 71042
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6414183378219604,
      "learning_rate": 0.0004699148758951599,
      "loss": 3.0143,
      "step": 71043
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5524675846099854,
      "learning_rate": 0.00046991150467440144,
      "loss": 3.2373,
      "step": 71044
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5233652591705322,
      "learning_rate": 0.00046990813342205325,
      "loss": 3.0768,
      "step": 71045
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4936118125915527,
      "learning_rate": 0.0004699047621381158,
      "loss": 3.2236,
      "step": 71046
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6565117835998535,
      "learning_rate": 0.00046990139082258995,
      "loss": 2.7655,
      "step": 71047
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4396520853042603,
      "learning_rate": 0.00046989801947547625,
      "loss": 3.1858,
      "step": 71048
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.006613254547119,
      "learning_rate": 0.00046989464809677525,
      "loss": 3.0239,
      "step": 71049
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.652773380279541,
      "learning_rate": 0.0004698912766864876,
      "loss": 3.1515,
      "step": 71050
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8407806158065796,
      "learning_rate": 0.000469887905244614,
      "loss": 3.1848,
      "step": 71051
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6673330068588257,
      "learning_rate": 0.00046988453377115505,
      "loss": 3.0705,
      "step": 71052
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.198051691055298,
      "learning_rate": 0.0004698811622661113,
      "loss": 2.9142,
      "step": 71053
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0404038429260254,
      "learning_rate": 0.0004698777907294835,
      "loss": 2.992,
      "step": 71054
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9321178197860718,
      "learning_rate": 0.0004698744191612722,
      "loss": 2.9177,
      "step": 71055
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0411899089813232,
      "learning_rate": 0.000469871047561478,
      "loss": 2.8822,
      "step": 71056
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6760581731796265,
      "learning_rate": 0.00046986767593010156,
      "loss": 2.9554,
      "step": 71057
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3699003458023071,
      "learning_rate": 0.00046986430426714356,
      "loss": 3.319,
      "step": 71058
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8762664794921875,
      "learning_rate": 0.0004698609325726046,
      "loss": 3.1886,
      "step": 71059
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7437936067581177,
      "learning_rate": 0.0004698575608464852,
      "loss": 3.0589,
      "step": 71060
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.755740761756897,
      "learning_rate": 0.00046985418908878615,
      "loss": 3.056,
      "step": 71061
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.500929355621338,
      "learning_rate": 0.0004698508172995079,
      "loss": 3.0659,
      "step": 71062
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4839051961898804,
      "learning_rate": 0.00046984744547865127,
      "loss": 2.8509,
      "step": 71063
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8371714353561401,
      "learning_rate": 0.0004698440736262168,
      "loss": 3.2155,
      "step": 71064
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5326484441757202,
      "learning_rate": 0.00046984070174220505,
      "loss": 2.9978,
      "step": 71065
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4927045106887817,
      "learning_rate": 0.0004698373298266168,
      "loss": 2.9259,
      "step": 71066
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9687658548355103,
      "learning_rate": 0.0004698339578794525,
      "loss": 2.934,
      "step": 71067
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9902594089508057,
      "learning_rate": 0.00046983058590071284,
      "loss": 3.1071,
      "step": 71068
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.1349375247955322,
      "learning_rate": 0.00046982721389039853,
      "loss": 3.0224,
      "step": 71069
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.7872908115386963,
      "learning_rate": 0.00046982384184851014,
      "loss": 2.6535,
      "step": 71070
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5508654117584229,
      "learning_rate": 0.00046982046977504826,
      "loss": 3.0028,
      "step": 71071
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5955922603607178,
      "learning_rate": 0.00046981709767001355,
      "loss": 3.1945,
      "step": 71072
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.77707040309906,
      "learning_rate": 0.0004698137255334067,
      "loss": 3.1754,
      "step": 71073
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6902168989181519,
      "learning_rate": 0.0004698103533652281,
      "loss": 2.9937,
      "step": 71074
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5084822177886963,
      "learning_rate": 0.00046980698116547875,
      "loss": 2.9237,
      "step": 71075
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.685901165008545,
      "learning_rate": 0.000469803608934159,
      "loss": 3.0645,
      "step": 71076
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.040332317352295,
      "learning_rate": 0.0004698002366712695,
      "loss": 3.1514,
      "step": 71077
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8981859683990479,
      "learning_rate": 0.000469796864376811,
      "loss": 3.053,
      "step": 71078
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.111280918121338,
      "learning_rate": 0.000469793492050784,
      "loss": 2.9248,
      "step": 71079
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5535073280334473,
      "learning_rate": 0.0004697901196931892,
      "loss": 2.9749,
      "step": 71080
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.230164051055908,
      "learning_rate": 0.0004697867473040273,
      "loss": 2.9134,
      "step": 71081
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.453479766845703,
      "learning_rate": 0.00046978337488329876,
      "loss": 3.1643,
      "step": 71082
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.418543815612793,
      "learning_rate": 0.00046978000243100423,
      "loss": 3.1559,
      "step": 71083
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3762338161468506,
      "learning_rate": 0.00046977662994714456,
      "loss": 2.8657,
      "step": 71084
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5394680500030518,
      "learning_rate": 0.0004697732574317201,
      "loss": 3.1241,
      "step": 71085
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.5013418197631836,
      "learning_rate": 0.0004697698848847316,
      "loss": 2.9976,
      "step": 71086
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3437037467956543,
      "learning_rate": 0.0004697665123061797,
      "loss": 3.172,
      "step": 71087
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9135284423828125,
      "learning_rate": 0.00046976313969606493,
      "loss": 3.0893,
      "step": 71088
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.425473213195801,
      "learning_rate": 0.000469759767054388,
      "loss": 2.7526,
      "step": 71089
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.234440803527832,
      "learning_rate": 0.00046975639438114964,
      "loss": 2.9465,
      "step": 71090
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.826378345489502,
      "learning_rate": 0.00046975302167635017,
      "loss": 3.1929,
      "step": 71091
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.145836114883423,
      "learning_rate": 0.0004697496489399905,
      "loss": 3.2189,
      "step": 71092
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.7662813663482666,
      "learning_rate": 0.0004697462761720712,
      "loss": 2.6801,
      "step": 71093
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9809215068817139,
      "learning_rate": 0.00046974290337259287,
      "loss": 2.9691,
      "step": 71094
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0510175228118896,
      "learning_rate": 0.00046973953054155605,
      "loss": 2.792,
      "step": 71095
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.080695152282715,
      "learning_rate": 0.0004697361576789615,
      "loss": 2.8768,
      "step": 71096
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5386017560958862,
      "learning_rate": 0.0004697327847848098,
      "loss": 3.0665,
      "step": 71097
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5114458799362183,
      "learning_rate": 0.0004697294118591016,
      "loss": 3.0215,
      "step": 71098
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8294886350631714,
      "learning_rate": 0.00046972603890183743,
      "loss": 3.0098,
      "step": 71099
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9010826349258423,
      "learning_rate": 0.00046972266591301803,
      "loss": 3.1015,
      "step": 71100
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.5813913345336914,
      "learning_rate": 0.000469719292892644,
      "loss": 2.8871,
      "step": 71101
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.048490047454834,
      "learning_rate": 0.00046971591984071586,
      "loss": 2.8927,
      "step": 71102
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.4302613735198975,
      "learning_rate": 0.00046971254675723444,
      "loss": 3.0313,
      "step": 71103
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.4868147373199463,
      "learning_rate": 0.0004697091736422002,
      "loss": 3.0466,
      "step": 71104
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.8966708183288574,
      "learning_rate": 0.0004697058004956138,
      "loss": 2.908,
      "step": 71105
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0443227291107178,
      "learning_rate": 0.00046970242731747596,
      "loss": 2.9266,
      "step": 71106
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.9197635650634766,
      "learning_rate": 0.00046969905410778717,
      "loss": 2.9293,
      "step": 71107
    },
    {
      "epoch": 0.93,
      "grad_norm": 4.032812118530273,
      "learning_rate": 0.0004696956808665481,
      "loss": 2.8673,
      "step": 71108
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0005085468292236,
      "learning_rate": 0.00046969230759375945,
      "loss": 2.9619,
      "step": 71109
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4182053804397583,
      "learning_rate": 0.0004696889342894218,
      "loss": 3.1142,
      "step": 71110
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.4418020248413086,
      "learning_rate": 0.00046968556095353575,
      "loss": 3.0191,
      "step": 71111
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.520733594894409,
      "learning_rate": 0.000469682187586102,
      "loss": 2.9369,
      "step": 71112
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.721811056137085,
      "learning_rate": 0.0004696788141871211,
      "loss": 2.8344,
      "step": 71113
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8709383010864258,
      "learning_rate": 0.0004696754407565936,
      "loss": 2.722,
      "step": 71114
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.904076099395752,
      "learning_rate": 0.00046967206729452035,
      "loss": 3.3076,
      "step": 71115
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0142197608947754,
      "learning_rate": 0.0004696686938009019,
      "loss": 2.9193,
      "step": 71116
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4820823669433594,
      "learning_rate": 0.00046966532027573873,
      "loss": 3.1705,
      "step": 71117
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9591363668441772,
      "learning_rate": 0.00046966194671903163,
      "loss": 3.1055,
      "step": 71118
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4967292547225952,
      "learning_rate": 0.00046965857313078114,
      "loss": 3.0948,
      "step": 71119
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3981339931488037,
      "learning_rate": 0.00046965519951098795,
      "loss": 2.9919,
      "step": 71120
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4784672260284424,
      "learning_rate": 0.00046965182585965266,
      "loss": 2.9411,
      "step": 71121
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5360901355743408,
      "learning_rate": 0.00046964845217677587,
      "loss": 2.9113,
      "step": 71122
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7054636478424072,
      "learning_rate": 0.0004696450784623582,
      "loss": 2.7399,
      "step": 71123
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.1620230674743652,
      "learning_rate": 0.00046964170471640044,
      "loss": 3.2067,
      "step": 71124
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7736637592315674,
      "learning_rate": 0.000469638330938903,
      "loss": 2.8809,
      "step": 71125
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.631446361541748,
      "learning_rate": 0.00046963495712986654,
      "loss": 3.0676,
      "step": 71126
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6603991985321045,
      "learning_rate": 0.0004696315832892918,
      "loss": 3.2428,
      "step": 71127
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.555300235748291,
      "learning_rate": 0.00046962820941717933,
      "loss": 3.1033,
      "step": 71128
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5627156496047974,
      "learning_rate": 0.0004696248355135298,
      "loss": 2.9282,
      "step": 71129
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8405890464782715,
      "learning_rate": 0.0004696214615783437,
      "loss": 2.9258,
      "step": 71130
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3731729984283447,
      "learning_rate": 0.0004696180876116219,
      "loss": 3.0531,
      "step": 71131
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4432798624038696,
      "learning_rate": 0.00046961471361336483,
      "loss": 2.9462,
      "step": 71132
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5257207155227661,
      "learning_rate": 0.00046961133958357327,
      "loss": 3.2786,
      "step": 71133
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7142057418823242,
      "learning_rate": 0.0004696079655222476,
      "loss": 3.0447,
      "step": 71134
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.05716609954834,
      "learning_rate": 0.00046960459142938874,
      "loss": 2.771,
      "step": 71135
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5509974956512451,
      "learning_rate": 0.0004696012173049971,
      "loss": 3.1019,
      "step": 71136
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.4387569427490234,
      "learning_rate": 0.0004695978431490735,
      "loss": 2.8792,
      "step": 71137
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5970276594161987,
      "learning_rate": 0.0004695944689616184,
      "loss": 3.051,
      "step": 71138
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.441301107406616,
      "learning_rate": 0.00046959109474263246,
      "loss": 3.0436,
      "step": 71139
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5348796844482422,
      "learning_rate": 0.00046958772049211637,
      "loss": 3.2284,
      "step": 71140
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5480287075042725,
      "learning_rate": 0.0004695843462100707,
      "loss": 3.1206,
      "step": 71141
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0159432888031006,
      "learning_rate": 0.00046958097189649616,
      "loss": 3.0034,
      "step": 71142
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3523741960525513,
      "learning_rate": 0.00046957759755139323,
      "loss": 3.071,
      "step": 71143
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5360665321350098,
      "learning_rate": 0.00046957422317476263,
      "loss": 2.9771,
      "step": 71144
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6575607061386108,
      "learning_rate": 0.000469570848766605,
      "loss": 3.3141,
      "step": 71145
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.2757222652435303,
      "learning_rate": 0.000469567474326921,
      "loss": 2.8379,
      "step": 71146
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.618531584739685,
      "learning_rate": 0.0004695640998557111,
      "loss": 3.1519,
      "step": 71147
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5586674213409424,
      "learning_rate": 0.00046956072535297614,
      "loss": 3.0464,
      "step": 71148
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.3711564540863037,
      "learning_rate": 0.0004695573508187166,
      "loss": 3.0782,
      "step": 71149
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6955451965332031,
      "learning_rate": 0.00046955397625293314,
      "loss": 2.8863,
      "step": 71150
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5053071975708008,
      "learning_rate": 0.00046955060165562636,
      "loss": 3.346,
      "step": 71151
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.218336582183838,
      "learning_rate": 0.00046954722702679695,
      "loss": 2.8889,
      "step": 71152
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0167367458343506,
      "learning_rate": 0.0004695438523664455,
      "loss": 3.0629,
      "step": 71153
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7537039518356323,
      "learning_rate": 0.00046954047767457276,
      "loss": 2.8185,
      "step": 71154
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.426055908203125,
      "learning_rate": 0.0004695371029511791,
      "loss": 3.1516,
      "step": 71155
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9624364376068115,
      "learning_rate": 0.0004695337281962654,
      "loss": 3.0885,
      "step": 71156
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.4317924976348877,
      "learning_rate": 0.00046953035340983206,
      "loss": 2.8143,
      "step": 71157
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3015379905700684,
      "learning_rate": 0.0004695269785918799,
      "loss": 3.418,
      "step": 71158
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3035712242126465,
      "learning_rate": 0.0004695236037424095,
      "loss": 3.0636,
      "step": 71159
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.631977915763855,
      "learning_rate": 0.0004695202288614215,
      "loss": 3.0132,
      "step": 71160
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6911922693252563,
      "learning_rate": 0.00046951685394891636,
      "loss": 2.751,
      "step": 71161
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.8951773643493652,
      "learning_rate": 0.00046951347900489485,
      "loss": 3.0117,
      "step": 71162
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.493549108505249,
      "learning_rate": 0.0004695101040293577,
      "loss": 3.192,
      "step": 71163
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.8142290115356445,
      "learning_rate": 0.0004695067290223053,
      "loss": 3.0583,
      "step": 71164
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.791588306427002,
      "learning_rate": 0.0004695033539837384,
      "loss": 3.1205,
      "step": 71165
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.2833752632141113,
      "learning_rate": 0.0004694999789136577,
      "loss": 3.1299,
      "step": 71166
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.7051217555999756,
      "learning_rate": 0.0004694966038120638,
      "loss": 2.9427,
      "step": 71167
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3062288761138916,
      "learning_rate": 0.0004694932286789572,
      "loss": 3.0431,
      "step": 71168
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5377583503723145,
      "learning_rate": 0.0004694898535143386,
      "loss": 2.9402,
      "step": 71169
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7949914932250977,
      "learning_rate": 0.0004694864783182087,
      "loss": 3.1617,
      "step": 71170
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.099029541015625,
      "learning_rate": 0.00046948310309056807,
      "loss": 3.2005,
      "step": 71171
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.136383533477783,
      "learning_rate": 0.00046947972783141723,
      "loss": 3.0664,
      "step": 71172
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7486590147018433,
      "learning_rate": 0.000469476352540757,
      "loss": 2.972,
      "step": 71173
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.1958789825439453,
      "learning_rate": 0.0004694729772185878,
      "loss": 3.1951,
      "step": 71174
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.821057915687561,
      "learning_rate": 0.0004694696018649104,
      "loss": 3.3243,
      "step": 71175
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.340103030204773,
      "learning_rate": 0.00046946622647972563,
      "loss": 2.9298,
      "step": 71176
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.1465375423431396,
      "learning_rate": 0.00046946285106303366,
      "loss": 3.0889,
      "step": 71177
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.4333512783050537,
      "learning_rate": 0.00046945947561483536,
      "loss": 3.0934,
      "step": 71178
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6623843908309937,
      "learning_rate": 0.00046945610013513145,
      "loss": 2.8709,
      "step": 71179
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7206007242202759,
      "learning_rate": 0.00046945272462392237,
      "loss": 2.9219,
      "step": 71180
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4355450868606567,
      "learning_rate": 0.00046944934908120886,
      "loss": 3.185,
      "step": 71181
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.148930788040161,
      "learning_rate": 0.0004694459735069915,
      "loss": 3.038,
      "step": 71182
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4308301210403442,
      "learning_rate": 0.000469442597901271,
      "loss": 3.188,
      "step": 71183
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.435131311416626,
      "learning_rate": 0.00046943922226404776,
      "loss": 3.0663,
      "step": 71184
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.6303088665008545,
      "learning_rate": 0.00046943584659532285,
      "loss": 2.9638,
      "step": 71185
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6517767906188965,
      "learning_rate": 0.00046943247089509634,
      "loss": 2.8612,
      "step": 71186
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.2692718505859375,
      "learning_rate": 0.00046942909516336926,
      "loss": 3.1203,
      "step": 71187
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.943014144897461,
      "learning_rate": 0.0004694257194001421,
      "loss": 3.1203,
      "step": 71188
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.8175315856933594,
      "learning_rate": 0.0004694223436054155,
      "loss": 3.0408,
      "step": 71189
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.782306432723999,
      "learning_rate": 0.00046941896777919005,
      "loss": 2.8389,
      "step": 71190
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9022750854492188,
      "learning_rate": 0.0004694155919214665,
      "loss": 3.1327,
      "step": 71191
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3913493156433105,
      "learning_rate": 0.0004694122160322453,
      "loss": 3.0768,
      "step": 71192
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5033067464828491,
      "learning_rate": 0.0004694088401115272,
      "loss": 3.0968,
      "step": 71193
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9690848588943481,
      "learning_rate": 0.0004694054641593128,
      "loss": 2.9978,
      "step": 71194
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8133814334869385,
      "learning_rate": 0.0004694020881756028,
      "loss": 3.2905,
      "step": 71195
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4450490474700928,
      "learning_rate": 0.00046939871216039765,
      "loss": 3.0231,
      "step": 71196
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9831236600875854,
      "learning_rate": 0.00046939533611369813,
      "loss": 2.845,
      "step": 71197
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5058009624481201,
      "learning_rate": 0.00046939196003550486,
      "loss": 2.8872,
      "step": 71198
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.446142554283142,
      "learning_rate": 0.0004693885839258184,
      "loss": 3.1209,
      "step": 71199
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7042118310928345,
      "learning_rate": 0.0004693852077846393,
      "loss": 2.919,
      "step": 71200
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4737228155136108,
      "learning_rate": 0.00046938183161196847,
      "loss": 2.8951,
      "step": 71201
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7347623109817505,
      "learning_rate": 0.0004693784554078062,
      "loss": 3.1146,
      "step": 71202
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.600541114807129,
      "learning_rate": 0.00046937507917215344,
      "loss": 3.1737,
      "step": 71203
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6515034437179565,
      "learning_rate": 0.0004693717029050105,
      "loss": 3.0696,
      "step": 71204
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.715826392173767,
      "learning_rate": 0.00046936832660637824,
      "loss": 3.0339,
      "step": 71205
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.911383032798767,
      "learning_rate": 0.0004693649502762572,
      "loss": 2.9669,
      "step": 71206
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6138869524002075,
      "learning_rate": 0.000469361573914648,
      "loss": 3.0386,
      "step": 71207
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.485602617263794,
      "learning_rate": 0.0004693581975215513,
      "loss": 3.221,
      "step": 71208
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3346198797225952,
      "learning_rate": 0.0004693548210969677,
      "loss": 2.8855,
      "step": 71209
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0348732471466064,
      "learning_rate": 0.00046935144464089784,
      "loss": 3.1657,
      "step": 71210
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6772792339324951,
      "learning_rate": 0.00046934806815334236,
      "loss": 2.575,
      "step": 71211
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.6325700283050537,
      "learning_rate": 0.0004693446916343019,
      "loss": 3.0014,
      "step": 71212
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.1072723865509033,
      "learning_rate": 0.0004693413150837771,
      "loss": 2.9756,
      "step": 71213
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.676395297050476,
      "learning_rate": 0.0004693379385017685,
      "loss": 3.01,
      "step": 71214
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7137999534606934,
      "learning_rate": 0.00046933456188827675,
      "loss": 2.7557,
      "step": 71215
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.715264081954956,
      "learning_rate": 0.0004693311852433026,
      "loss": 3.1972,
      "step": 71216
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.6911404132843018,
      "learning_rate": 0.00046932780856684647,
      "loss": 3.0841,
      "step": 71217
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4617515802383423,
      "learning_rate": 0.0004693244318589092,
      "loss": 3.1564,
      "step": 71218
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5620319843292236,
      "learning_rate": 0.00046932105511949136,
      "loss": 3.1867,
      "step": 71219
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.613670825958252,
      "learning_rate": 0.00046931767834859335,
      "loss": 3.102,
      "step": 71220
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.358685851097107,
      "learning_rate": 0.0004693143015462162,
      "loss": 3.0324,
      "step": 71221
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0634958744049072,
      "learning_rate": 0.0004693109247123602,
      "loss": 2.8905,
      "step": 71222
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7781946659088135,
      "learning_rate": 0.00046930754784702607,
      "loss": 3.026,
      "step": 71223
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6689964532852173,
      "learning_rate": 0.00046930417095021463,
      "loss": 3.1442,
      "step": 71224
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4192496538162231,
      "learning_rate": 0.0004693007940219262,
      "loss": 2.9461,
      "step": 71225
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4973665475845337,
      "learning_rate": 0.00046929741706216153,
      "loss": 3.0932,
      "step": 71226
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4401450157165527,
      "learning_rate": 0.00046929404007092144,
      "loss": 3.0897,
      "step": 71227
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6046526432037354,
      "learning_rate": 0.0004692906630482063,
      "loss": 2.8697,
      "step": 71228
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6724298000335693,
      "learning_rate": 0.0004692872859940168,
      "loss": 3.0046,
      "step": 71229
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.827437162399292,
      "learning_rate": 0.0004692839089083536,
      "loss": 3.2989,
      "step": 71230
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5719741582870483,
      "learning_rate": 0.00046928053179121743,
      "loss": 3.0234,
      "step": 71231
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.453233003616333,
      "learning_rate": 0.0004692771546426087,
      "loss": 2.9335,
      "step": 71232
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9181805849075317,
      "learning_rate": 0.00046927377746252814,
      "loss": 2.9228,
      "step": 71233
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.326993465423584,
      "learning_rate": 0.00046927040025097655,
      "loss": 2.868,
      "step": 71234
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5537028312683105,
      "learning_rate": 0.0004692670230079542,
      "loss": 3.078,
      "step": 71235
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5817137956619263,
      "learning_rate": 0.000469263645733462,
      "loss": 2.9325,
      "step": 71236
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4974840879440308,
      "learning_rate": 0.0004692602684275006,
      "loss": 3.1515,
      "step": 71237
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4856940507888794,
      "learning_rate": 0.00046925689109007036,
      "loss": 2.9177,
      "step": 71238
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.707562804222107,
      "learning_rate": 0.00046925351372117214,
      "loss": 2.9901,
      "step": 71239
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5928444862365723,
      "learning_rate": 0.00046925013632080647,
      "loss": 3.246,
      "step": 71240
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3612878322601318,
      "learning_rate": 0.00046924675888897405,
      "loss": 3.1836,
      "step": 71241
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5457664728164673,
      "learning_rate": 0.00046924338142567543,
      "loss": 3.1242,
      "step": 71242
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5148357152938843,
      "learning_rate": 0.0004692400039309113,
      "loss": 3.0886,
      "step": 71243
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9874546527862549,
      "learning_rate": 0.0004692366264046822,
      "loss": 3.2844,
      "step": 71244
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0183587074279785,
      "learning_rate": 0.0004692332488469888,
      "loss": 3.0944,
      "step": 71245
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6569222211837769,
      "learning_rate": 0.0004692298712578318,
      "loss": 3.0953,
      "step": 71246
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4336140155792236,
      "learning_rate": 0.0004692264936372118,
      "loss": 3.3391,
      "step": 71247
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.929884910583496,
      "learning_rate": 0.0004692231159851294,
      "loss": 2.958,
      "step": 71248
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.02087664604187,
      "learning_rate": 0.0004692197383015852,
      "loss": 3.1211,
      "step": 71249
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8113709688186646,
      "learning_rate": 0.0004692163605865798,
      "loss": 3.042,
      "step": 71250
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7759947776794434,
      "learning_rate": 0.000469212982840114,
      "loss": 2.9605,
      "step": 71251
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7092771530151367,
      "learning_rate": 0.00046920960506218823,
      "loss": 2.9269,
      "step": 71252
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4569883346557617,
      "learning_rate": 0.00046920622725280317,
      "loss": 3.1534,
      "step": 71253
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6030523777008057,
      "learning_rate": 0.00046920284941195955,
      "loss": 3.0263,
      "step": 71254
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.779718279838562,
      "learning_rate": 0.00046919947153965797,
      "loss": 3.0851,
      "step": 71255
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.361571788787842,
      "learning_rate": 0.00046919609363589885,
      "loss": 3.0971,
      "step": 71256
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6177672147750854,
      "learning_rate": 0.00046919271570068313,
      "loss": 2.8862,
      "step": 71257
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9255300760269165,
      "learning_rate": 0.0004691893377340112,
      "loss": 3.1636,
      "step": 71258
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.183823823928833,
      "learning_rate": 0.00046918595973588387,
      "loss": 3.0567,
      "step": 71259
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4518568515777588,
      "learning_rate": 0.00046918258170630157,
      "loss": 3.0018,
      "step": 71260
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.7862179279327393,
      "learning_rate": 0.00046917920364526516,
      "loss": 2.8313,
      "step": 71261
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3157143592834473,
      "learning_rate": 0.000469175825552775,
      "loss": 2.9399,
      "step": 71262
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.689692497253418,
      "learning_rate": 0.0004691724474288319,
      "loss": 3.0597,
      "step": 71263
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.026963233947754,
      "learning_rate": 0.00046916906927343656,
      "loss": 2.9511,
      "step": 71264
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7346816062927246,
      "learning_rate": 0.00046916569108658944,
      "loss": 3.0429,
      "step": 71265
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7102519273757935,
      "learning_rate": 0.00046916231286829113,
      "loss": 2.9155,
      "step": 71266
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.1591742038726807,
      "learning_rate": 0.0004691589346185425,
      "loss": 3.2072,
      "step": 71267
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.808106780052185,
      "learning_rate": 0.00046915555633734394,
      "loss": 2.9643,
      "step": 71268
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7580039501190186,
      "learning_rate": 0.00046915217802469614,
      "loss": 3.08,
      "step": 71269
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.3520877361297607,
      "learning_rate": 0.00046914879968059986,
      "loss": 2.8393,
      "step": 71270
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7171778678894043,
      "learning_rate": 0.00046914542130505553,
      "loss": 2.9784,
      "step": 71271
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4563663005828857,
      "learning_rate": 0.0004691420428980639,
      "loss": 3.1558,
      "step": 71272
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.498534083366394,
      "learning_rate": 0.00046913866445962563,
      "loss": 3.1312,
      "step": 71273
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9347686767578125,
      "learning_rate": 0.00046913528598974125,
      "loss": 3.0528,
      "step": 71274
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7982676029205322,
      "learning_rate": 0.0004691319074884114,
      "loss": 2.902,
      "step": 71275
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.850149154663086,
      "learning_rate": 0.0004691285289556368,
      "loss": 2.8909,
      "step": 71276
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6214746236801147,
      "learning_rate": 0.00046912515039141794,
      "loss": 3.0274,
      "step": 71277
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8777726888656616,
      "learning_rate": 0.0004691217717957556,
      "loss": 3.0001,
      "step": 71278
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.68463134765625,
      "learning_rate": 0.00046911839316865023,
      "loss": 3.1324,
      "step": 71279
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7994180917739868,
      "learning_rate": 0.00046911501451010263,
      "loss": 3.0769,
      "step": 71280
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6716252565383911,
      "learning_rate": 0.0004691116358201133,
      "loss": 3.089,
      "step": 71281
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6731817722320557,
      "learning_rate": 0.00046910825709868293,
      "loss": 3.0028,
      "step": 71282
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.944065809249878,
      "learning_rate": 0.00046910487834581223,
      "loss": 2.9551,
      "step": 71283
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4580692052841187,
      "learning_rate": 0.00046910149956150163,
      "loss": 2.9958,
      "step": 71284
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.5888285636901855,
      "learning_rate": 0.000469098120745752,
      "loss": 3.3062,
      "step": 71285
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9568065404891968,
      "learning_rate": 0.0004690947418985637,
      "loss": 3.194,
      "step": 71286
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4809231758117676,
      "learning_rate": 0.00046909136301993755,
      "loss": 2.9079,
      "step": 71287
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.912733793258667,
      "learning_rate": 0.00046908798410987413,
      "loss": 3.0791,
      "step": 71288
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4217594861984253,
      "learning_rate": 0.0004690846051683741,
      "loss": 2.9413,
      "step": 71289
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4036191701889038,
      "learning_rate": 0.00046908122619543797,
      "loss": 2.9341,
      "step": 71290
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9272292852401733,
      "learning_rate": 0.0004690778471910665,
      "loss": 3.3006,
      "step": 71291
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7037584781646729,
      "learning_rate": 0.00046907446815526026,
      "loss": 3.1143,
      "step": 71292
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.7828989028930664,
      "learning_rate": 0.0004690710890880199,
      "loss": 2.9804,
      "step": 71293
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6480482816696167,
      "learning_rate": 0.000469067709989346,
      "loss": 3.0796,
      "step": 71294
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.8499550819396973,
      "learning_rate": 0.0004690643308592392,
      "loss": 2.953,
      "step": 71295
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.4779069423675537,
      "learning_rate": 0.00046906095169770023,
      "loss": 2.9462,
      "step": 71296
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.611240029335022,
      "learning_rate": 0.0004690575725047296,
      "loss": 2.9509,
      "step": 71297
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3673970699310303,
      "learning_rate": 0.00046905419328032794,
      "loss": 3.1533,
      "step": 71298
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8612804412841797,
      "learning_rate": 0.000469050814024496,
      "loss": 3.0375,
      "step": 71299
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.0402817726135254,
      "learning_rate": 0.00046904743473723427,
      "loss": 2.7312,
      "step": 71300
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5446220636367798,
      "learning_rate": 0.00046904405541854335,
      "loss": 3.1933,
      "step": 71301
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.4797980785369873,
      "learning_rate": 0.00046904067606842406,
      "loss": 2.9332,
      "step": 71302
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9184646606445312,
      "learning_rate": 0.0004690372966868769,
      "loss": 3.0116,
      "step": 71303
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6324152946472168,
      "learning_rate": 0.0004690339172739025,
      "loss": 2.8739,
      "step": 71304
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6286828517913818,
      "learning_rate": 0.00046903053782950153,
      "loss": 3.0218,
      "step": 71305
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6661750078201294,
      "learning_rate": 0.0004690271583536745,
      "loss": 3.0954,
      "step": 71306
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9851276874542236,
      "learning_rate": 0.0004690237788464223,
      "loss": 2.8789,
      "step": 71307
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3560434579849243,
      "learning_rate": 0.00046902039930774524,
      "loss": 3.1241,
      "step": 71308
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9226734638214111,
      "learning_rate": 0.00046901701973764415,
      "loss": 3.1216,
      "step": 71309
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.653186321258545,
      "learning_rate": 0.00046901364013611957,
      "loss": 3.132,
      "step": 71310
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6369249820709229,
      "learning_rate": 0.0004690102605031722,
      "loss": 3.083,
      "step": 71311
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.523369312286377,
      "learning_rate": 0.0004690068808388026,
      "loss": 2.9638,
      "step": 71312
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.394263505935669,
      "learning_rate": 0.0004690035011430115,
      "loss": 3.2178,
      "step": 71313
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4484055042266846,
      "learning_rate": 0.0004690001214157994,
      "loss": 2.9412,
      "step": 71314
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6427677869796753,
      "learning_rate": 0.000468996741657167,
      "loss": 3.0043,
      "step": 71315
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4526666402816772,
      "learning_rate": 0.00046899336186711495,
      "loss": 3.0677,
      "step": 71316
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7994815111160278,
      "learning_rate": 0.0004689899820456438,
      "loss": 2.9692,
      "step": 71317
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4430352449417114,
      "learning_rate": 0.0004689866021927542,
      "loss": 2.9221,
      "step": 71318
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.453608751296997,
      "learning_rate": 0.00046898322230844684,
      "loss": 2.9774,
      "step": 71319
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.520609736442566,
      "learning_rate": 0.0004689798423927224,
      "loss": 2.8385,
      "step": 71320
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.256899833679199,
      "learning_rate": 0.00046897646244558125,
      "loss": 3.0923,
      "step": 71321
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.621933937072754,
      "learning_rate": 0.0004689730824670243,
      "loss": 2.9445,
      "step": 71322
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8042292594909668,
      "learning_rate": 0.000468969702457052,
      "loss": 3.1595,
      "step": 71323
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.915231943130493,
      "learning_rate": 0.000468966322415665,
      "loss": 3.4727,
      "step": 71324
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7702239751815796,
      "learning_rate": 0.0004689629423428642,
      "loss": 2.7706,
      "step": 71325
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6373801231384277,
      "learning_rate": 0.00046895956223864976,
      "loss": 3.1558,
      "step": 71326
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.639845132827759,
      "learning_rate": 0.0004689561821030226,
      "loss": 3.0278,
      "step": 71327
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.600520133972168,
      "learning_rate": 0.0004689528019359833,
      "loss": 3.0284,
      "step": 71328
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7932324409484863,
      "learning_rate": 0.00046894942173753256,
      "loss": 2.9261,
      "step": 71329
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.116532564163208,
      "learning_rate": 0.0004689460415076708,
      "loss": 2.7914,
      "step": 71330
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7071864604949951,
      "learning_rate": 0.0004689426612463989,
      "loss": 3.0042,
      "step": 71331
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.2288520336151123,
      "learning_rate": 0.00046893928095371735,
      "loss": 2.9915,
      "step": 71332
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5098744630813599,
      "learning_rate": 0.0004689359006296267,
      "loss": 3.2895,
      "step": 71333
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9460302591323853,
      "learning_rate": 0.00046893252027412785,
      "loss": 2.8602,
      "step": 71334
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7300790548324585,
      "learning_rate": 0.0004689291398872211,
      "loss": 2.9971,
      "step": 71335
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.941038966178894,
      "learning_rate": 0.00046892575946890733,
      "loss": 3.0481,
      "step": 71336
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.507479190826416,
      "learning_rate": 0.00046892237901918706,
      "loss": 2.8658,
      "step": 71337
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6620646715164185,
      "learning_rate": 0.0004689189985380609,
      "loss": 3.2318,
      "step": 71338
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.281172752380371,
      "learning_rate": 0.00046891561802552955,
      "loss": 2.911,
      "step": 71339
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.762805461883545,
      "learning_rate": 0.00046891223748159357,
      "loss": 3.0601,
      "step": 71340
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.514554500579834,
      "learning_rate": 0.00046890885690625356,
      "loss": 2.8199,
      "step": 71341
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8324978351593018,
      "learning_rate": 0.0004689054762995103,
      "loss": 2.9332,
      "step": 71342
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5926079750061035,
      "learning_rate": 0.00046890209566136435,
      "loss": 3.2791,
      "step": 71343
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5813010931015015,
      "learning_rate": 0.00046889871499181617,
      "loss": 2.9271,
      "step": 71344
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5046685934066772,
      "learning_rate": 0.00046889533429086657,
      "loss": 2.9326,
      "step": 71345
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.882076621055603,
      "learning_rate": 0.00046889195355851625,
      "loss": 2.9732,
      "step": 71346
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8817758560180664,
      "learning_rate": 0.00046888857279476554,
      "loss": 3.0328,
      "step": 71347
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4294331073760986,
      "learning_rate": 0.0004688851919996155,
      "loss": 3.1801,
      "step": 71348
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.151076316833496,
      "learning_rate": 0.0004688818111730663,
      "loss": 3.0149,
      "step": 71349
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5405809879302979,
      "learning_rate": 0.00046887843031511887,
      "loss": 2.9486,
      "step": 71350
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.488295555114746,
      "learning_rate": 0.0004688750494257737,
      "loss": 3.1943,
      "step": 71351
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6929165124893188,
      "learning_rate": 0.00046887166850503154,
      "loss": 3.0963,
      "step": 71352
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5300147533416748,
      "learning_rate": 0.00046886828755289295,
      "loss": 3.0305,
      "step": 71353
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8550992012023926,
      "learning_rate": 0.0004688649065693586,
      "loss": 2.8782,
      "step": 71354
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7779518365859985,
      "learning_rate": 0.000468861525554429,
      "loss": 3.2567,
      "step": 71355
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4069857597351074,
      "learning_rate": 0.0004688581445081048,
      "loss": 3.0804,
      "step": 71356
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5344098806381226,
      "learning_rate": 0.00046885476343038677,
      "loss": 3.0713,
      "step": 71357
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5553032159805298,
      "learning_rate": 0.00046885138232127555,
      "loss": 3.1112,
      "step": 71358
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8206467628479004,
      "learning_rate": 0.0004688480011807714,
      "loss": 2.9019,
      "step": 71359
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5358366966247559,
      "learning_rate": 0.0004688446200088754,
      "loss": 2.9933,
      "step": 71360
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9886199235916138,
      "learning_rate": 0.000468841238805588,
      "loss": 3.266,
      "step": 71361
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8371824026107788,
      "learning_rate": 0.0004688378575709098,
      "loss": 3.2311,
      "step": 71362
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6098471879959106,
      "learning_rate": 0.0004688344763048414,
      "loss": 3.3283,
      "step": 71363
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.7226264476776123,
      "learning_rate": 0.0004688310950073836,
      "loss": 3.2667,
      "step": 71364
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.8854479789733887,
      "learning_rate": 0.00046882771367853684,
      "loss": 3.1739,
      "step": 71365
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0706710815429688,
      "learning_rate": 0.00046882433231830185,
      "loss": 3.2699,
      "step": 71366
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9830362796783447,
      "learning_rate": 0.00046882095092667914,
      "loss": 3.1501,
      "step": 71367
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7327899932861328,
      "learning_rate": 0.00046881756950366955,
      "loss": 2.9335,
      "step": 71368
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.34889817237854,
      "learning_rate": 0.00046881418804927354,
      "loss": 2.9515,
      "step": 71369
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5839763879776,
      "learning_rate": 0.00046881080656349173,
      "loss": 2.9899,
      "step": 71370
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3757070302963257,
      "learning_rate": 0.0004688074250463249,
      "loss": 3.0574,
      "step": 71371
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.1415953636169434,
      "learning_rate": 0.00046880404349777355,
      "loss": 3.0729,
      "step": 71372
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.85975980758667,
      "learning_rate": 0.0004688006619178383,
      "loss": 2.8567,
      "step": 71373
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.400608777999878,
      "learning_rate": 0.0004687972803065199,
      "loss": 2.8673,
      "step": 71374
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3993520736694336,
      "learning_rate": 0.0004687938986638188,
      "loss": 2.912,
      "step": 71375
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5265421867370605,
      "learning_rate": 0.00046879051698973576,
      "loss": 3.2537,
      "step": 71376
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4097721576690674,
      "learning_rate": 0.00046878713528427144,
      "loss": 3.1266,
      "step": 71377
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.360358715057373,
      "learning_rate": 0.00046878375354742635,
      "loss": 2.8242,
      "step": 71378
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4587376117706299,
      "learning_rate": 0.0004687803717792012,
      "loss": 3.1278,
      "step": 71379
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.39065420627594,
      "learning_rate": 0.0004687769899795966,
      "loss": 3.2358,
      "step": 71380
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4915194511413574,
      "learning_rate": 0.0004687736081486131,
      "loss": 2.9541,
      "step": 71381
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5383044481277466,
      "learning_rate": 0.0004687702262862515,
      "loss": 2.9162,
      "step": 71382
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5912070274353027,
      "learning_rate": 0.00046876684439251226,
      "loss": 3.1187,
      "step": 71383
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.572782278060913,
      "learning_rate": 0.0004687634624673961,
      "loss": 3.0654,
      "step": 71384
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3897788524627686,
      "learning_rate": 0.0004687600805109036,
      "loss": 2.9507,
      "step": 71385
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6267869472503662,
      "learning_rate": 0.0004687566985230355,
      "loss": 2.9639,
      "step": 71386
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8962897062301636,
      "learning_rate": 0.0004687533165037922,
      "loss": 2.9097,
      "step": 71387
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.584192156791687,
      "learning_rate": 0.0004687499344531746,
      "loss": 3.1121,
      "step": 71388
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.067591905593872,
      "learning_rate": 0.0004687465523711832,
      "loss": 2.9973,
      "step": 71389
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7149934768676758,
      "learning_rate": 0.00046874317025781857,
      "loss": 2.7644,
      "step": 71390
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8532280921936035,
      "learning_rate": 0.00046873978811308133,
      "loss": 3.0018,
      "step": 71391
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.4583029747009277,
      "learning_rate": 0.0004687364059369724,
      "loss": 2.9429,
      "step": 71392
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9735660552978516,
      "learning_rate": 0.00046873302372949194,
      "loss": 3.2361,
      "step": 71393
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0555260181427,
      "learning_rate": 0.00046872964149064097,
      "loss": 2.7613,
      "step": 71394
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5676971673965454,
      "learning_rate": 0.00046872625922042,
      "loss": 2.9769,
      "step": 71395
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.003236770629883,
      "learning_rate": 0.00046872287691882957,
      "loss": 2.9597,
      "step": 71396
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0214974880218506,
      "learning_rate": 0.00046871949458587034,
      "loss": 3.0592,
      "step": 71397
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9405996799468994,
      "learning_rate": 0.0004687161122215431,
      "loss": 2.9628,
      "step": 71398
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3755109310150146,
      "learning_rate": 0.0004687127298258482,
      "loss": 3.0123,
      "step": 71399
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.26672625541687,
      "learning_rate": 0.0004687093473987865,
      "loss": 3.1044,
      "step": 71400
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4845575094223022,
      "learning_rate": 0.00046870596494035855,
      "loss": 3.0101,
      "step": 71401
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.356892704963684,
      "learning_rate": 0.00046870258245056487,
      "loss": 3.0211,
      "step": 71402
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.1008214950561523,
      "learning_rate": 0.0004686991999294063,
      "loss": 3.3867,
      "step": 71403
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.041102647781372,
      "learning_rate": 0.0004686958173768834,
      "loss": 3.2162,
      "step": 71404
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7007280588150024,
      "learning_rate": 0.00046869243479299663,
      "loss": 3.2137,
      "step": 71405
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.244774580001831,
      "learning_rate": 0.0004686890521777469,
      "loss": 2.9046,
      "step": 71406
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.262073278427124,
      "learning_rate": 0.0004686856695311346,
      "loss": 3.0201,
      "step": 71407
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.1859116554260254,
      "learning_rate": 0.00046868228685316047,
      "loss": 3.0104,
      "step": 71408
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.528082013130188,
      "learning_rate": 0.0004686789041438251,
      "loss": 3.2108,
      "step": 71409
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5639145374298096,
      "learning_rate": 0.0004686755214031292,
      "loss": 3.4254,
      "step": 71410
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5658334493637085,
      "learning_rate": 0.00046867213863107326,
      "loss": 2.9978,
      "step": 71411
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5727465152740479,
      "learning_rate": 0.000468668755827658,
      "loss": 2.8578,
      "step": 71412
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4358855485916138,
      "learning_rate": 0.00046866537299288414,
      "loss": 3.1092,
      "step": 71413
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6408308744430542,
      "learning_rate": 0.0004686619901267521,
      "loss": 3.0515,
      "step": 71414
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7503032684326172,
      "learning_rate": 0.00046865860722926264,
      "loss": 3.0909,
      "step": 71415
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5840963125228882,
      "learning_rate": 0.0004686552243004163,
      "loss": 2.8531,
      "step": 71416
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.823822021484375,
      "learning_rate": 0.00046865184134021386,
      "loss": 3.1508,
      "step": 71417
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6439803838729858,
      "learning_rate": 0.0004686484583486558,
      "loss": 2.9285,
      "step": 71418
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.642945647239685,
      "learning_rate": 0.0004686450753257428,
      "loss": 2.8718,
      "step": 71419
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0609428882598877,
      "learning_rate": 0.0004686416922714755,
      "loss": 2.9767,
      "step": 71420
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7544665336608887,
      "learning_rate": 0.00046863830918585455,
      "loss": 2.781,
      "step": 71421
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9248194694519043,
      "learning_rate": 0.00046863492606888065,
      "loss": 3.1072,
      "step": 71422
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9396864175796509,
      "learning_rate": 0.00046863154292055417,
      "loss": 2.8421,
      "step": 71423
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.293842315673828,
      "learning_rate": 0.000468628159740876,
      "loss": 2.8708,
      "step": 71424
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5844229459762573,
      "learning_rate": 0.0004686247765298466,
      "loss": 3.0614,
      "step": 71425
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5315918922424316,
      "learning_rate": 0.0004686213932874668,
      "loss": 3.4093,
      "step": 71426
    },
    {
      "epoch": 0.93,
      "grad_norm": 4.601168155670166,
      "learning_rate": 0.00046861801001373693,
      "loss": 2.908,
      "step": 71427
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.8660833835601807,
      "learning_rate": 0.0004686146267086579,
      "loss": 2.8448,
      "step": 71428
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.556523323059082,
      "learning_rate": 0.0004686112433722302,
      "loss": 2.9975,
      "step": 71429
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.2772650718688965,
      "learning_rate": 0.0004686078600044545,
      "loss": 3.097,
      "step": 71430
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.2362515926361084,
      "learning_rate": 0.0004686044766053314,
      "loss": 2.8559,
      "step": 71431
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5719496011734009,
      "learning_rate": 0.00046860109317486157,
      "loss": 2.7229,
      "step": 71432
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4775112867355347,
      "learning_rate": 0.00046859770971304556,
      "loss": 2.8071,
      "step": 71433
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6661649942398071,
      "learning_rate": 0.000468594326219884,
      "loss": 2.9798,
      "step": 71434
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4784717559814453,
      "learning_rate": 0.0004685909426953777,
      "loss": 2.9486,
      "step": 71435
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.622654676437378,
      "learning_rate": 0.0004685875591395272,
      "loss": 2.9233,
      "step": 71436
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5927854776382446,
      "learning_rate": 0.000468584175552333,
      "loss": 3.0517,
      "step": 71437
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5386229753494263,
      "learning_rate": 0.00046858079193379585,
      "loss": 2.9819,
      "step": 71438
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7411134243011475,
      "learning_rate": 0.0004685774082839163,
      "loss": 3.1249,
      "step": 71439
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0144031047821045,
      "learning_rate": 0.0004685740246026951,
      "loss": 2.8477,
      "step": 71440
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7145898342132568,
      "learning_rate": 0.00046857064089013276,
      "loss": 2.8324,
      "step": 71441
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.445669412612915,
      "learning_rate": 0.00046856725714622996,
      "loss": 2.9471,
      "step": 71442
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4621188640594482,
      "learning_rate": 0.0004685638733709873,
      "loss": 2.9624,
      "step": 71443
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.832958459854126,
      "learning_rate": 0.0004685604895644055,
      "loss": 2.8406,
      "step": 71444
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6398919820785522,
      "learning_rate": 0.00046855710572648506,
      "loss": 2.9521,
      "step": 71445
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.564315915107727,
      "learning_rate": 0.0004685537218572267,
      "loss": 3.1426,
      "step": 71446
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6073874235153198,
      "learning_rate": 0.0004685503379566311,
      "loss": 2.9511,
      "step": 71447
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8088324069976807,
      "learning_rate": 0.0004685469540246987,
      "loss": 2.9181,
      "step": 71448
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.890295147895813,
      "learning_rate": 0.0004685435700614302,
      "loss": 3.0995,
      "step": 71449
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8146246671676636,
      "learning_rate": 0.00046854018606682637,
      "loss": 3.0483,
      "step": 71450
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.423589825630188,
      "learning_rate": 0.00046853680204088767,
      "loss": 2.8222,
      "step": 71451
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5872730016708374,
      "learning_rate": 0.00046853341798361477,
      "loss": 3.0329,
      "step": 71452
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.1777966022491455,
      "learning_rate": 0.00046853003389500853,
      "loss": 2.778,
      "step": 71453
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8421874046325684,
      "learning_rate": 0.0004685266497750692,
      "loss": 2.9238,
      "step": 71454
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.682111382484436,
      "learning_rate": 0.0004685232656237976,
      "loss": 2.9724,
      "step": 71455
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4612609148025513,
      "learning_rate": 0.00046851988144119437,
      "loss": 2.909,
      "step": 71456
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6767833232879639,
      "learning_rate": 0.00046851649722726006,
      "loss": 2.9134,
      "step": 71457
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.732194662094116,
      "learning_rate": 0.0004685131129819954,
      "loss": 2.987,
      "step": 71458
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5302294492721558,
      "learning_rate": 0.0004685097287054009,
      "loss": 2.701,
      "step": 71459
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.641855001449585,
      "learning_rate": 0.00046850634439747743,
      "loss": 3.033,
      "step": 71460
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.2175004482269287,
      "learning_rate": 0.00046850296005822534,
      "loss": 2.9595,
      "step": 71461
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4903767108917236,
      "learning_rate": 0.00046849957568764533,
      "loss": 2.9949,
      "step": 71462
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0872113704681396,
      "learning_rate": 0.00046849619128573817,
      "loss": 2.8525,
      "step": 71463
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.632402777671814,
      "learning_rate": 0.0004684928068525043,
      "loss": 3.1875,
      "step": 71464
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.854509711265564,
      "learning_rate": 0.0004684894223879445,
      "loss": 3.0391,
      "step": 71465
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7659220695495605,
      "learning_rate": 0.00046848603789205923,
      "loss": 2.8967,
      "step": 71466
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8809847831726074,
      "learning_rate": 0.00046848265336484936,
      "loss": 3.0094,
      "step": 71467
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6435741186141968,
      "learning_rate": 0.0004684792688063153,
      "loss": 2.7885,
      "step": 71468
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6594247817993164,
      "learning_rate": 0.0004684758842164578,
      "loss": 3.1869,
      "step": 71469
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7099573612213135,
      "learning_rate": 0.0004684724995952774,
      "loss": 3.1219,
      "step": 71470
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7708619832992554,
      "learning_rate": 0.00046846911494277487,
      "loss": 2.8718,
      "step": 71471
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.2920546531677246,
      "learning_rate": 0.00046846573025895074,
      "loss": 3.0846,
      "step": 71472
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5388315916061401,
      "learning_rate": 0.0004684623455438055,
      "loss": 2.8826,
      "step": 71473
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.829420328140259,
      "learning_rate": 0.0004684589607973401,
      "loss": 2.7923,
      "step": 71474
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.7070722579956055,
      "learning_rate": 0.000468455576019555,
      "loss": 3.0037,
      "step": 71475
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.2656917572021484,
      "learning_rate": 0.0004684521912104507,
      "loss": 3.1462,
      "step": 71476
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7114989757537842,
      "learning_rate": 0.00046844880637002804,
      "loss": 2.7877,
      "step": 71477
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.039835214614868,
      "learning_rate": 0.00046844542149828756,
      "loss": 3.1979,
      "step": 71478
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8311338424682617,
      "learning_rate": 0.0004684420365952299,
      "loss": 3.1755,
      "step": 71479
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7387120723724365,
      "learning_rate": 0.0004684386516608557,
      "loss": 3.0846,
      "step": 71480
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7505828142166138,
      "learning_rate": 0.0004684352666951656,
      "loss": 3.1647,
      "step": 71481
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.8362040519714355,
      "learning_rate": 0.00046843188169816007,
      "loss": 3.0954,
      "step": 71482
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.9459354877471924,
      "learning_rate": 0.00046842849666984003,
      "loss": 3.2749,
      "step": 71483
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.168497323989868,
      "learning_rate": 0.00046842511161020585,
      "loss": 2.9652,
      "step": 71484
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.6974287033081055,
      "learning_rate": 0.0004684217265192583,
      "loss": 2.982,
      "step": 71485
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.0122079849243164,
      "learning_rate": 0.00046841834139699803,
      "loss": 3.0839,
      "step": 71486
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.56216299533844,
      "learning_rate": 0.00046841495624342554,
      "loss": 3.0505,
      "step": 71487
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5017154216766357,
      "learning_rate": 0.0004684115710585415,
      "loss": 3.3314,
      "step": 71488
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9898914098739624,
      "learning_rate": 0.0004684081858423467,
      "loss": 2.9614,
      "step": 71489
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.514369249343872,
      "learning_rate": 0.0004684048005948415,
      "loss": 3.0295,
      "step": 71490
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7047823667526245,
      "learning_rate": 0.0004684014153160267,
      "loss": 2.9064,
      "step": 71491
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8227133750915527,
      "learning_rate": 0.00046839803000590295,
      "loss": 3.0218,
      "step": 71492
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.124851942062378,
      "learning_rate": 0.00046839464466447075,
      "loss": 3.1986,
      "step": 71493
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.3145298957824707,
      "learning_rate": 0.0004683912592917309,
      "loss": 2.8587,
      "step": 71494
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.439833402633667,
      "learning_rate": 0.0004683878738876839,
      "loss": 3.0244,
      "step": 71495
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.192509651184082,
      "learning_rate": 0.0004683844884523304,
      "loss": 3.2409,
      "step": 71496
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.549290418624878,
      "learning_rate": 0.000468381102985671,
      "loss": 3.0712,
      "step": 71497
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6245763301849365,
      "learning_rate": 0.0004683777174877064,
      "loss": 3.1486,
      "step": 71498
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9814831018447876,
      "learning_rate": 0.00046837433195843727,
      "loss": 2.9979,
      "step": 71499
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9511239528656006,
      "learning_rate": 0.0004683709463978641,
      "loss": 3.0226,
      "step": 71500
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4879807233810425,
      "learning_rate": 0.0004683675608059876,
      "loss": 3.1765,
      "step": 71501
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7296077013015747,
      "learning_rate": 0.0004683641751828085,
      "loss": 3.0292,
      "step": 71502
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.893052577972412,
      "learning_rate": 0.0004683607895283272,
      "loss": 2.9093,
      "step": 71503
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6746634244918823,
      "learning_rate": 0.0004683574038425445,
      "loss": 2.9596,
      "step": 71504
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4077802896499634,
      "learning_rate": 0.000468354018125461,
      "loss": 3.0081,
      "step": 71505
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8991966247558594,
      "learning_rate": 0.0004683506323770772,
      "loss": 2.9925,
      "step": 71506
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9419472217559814,
      "learning_rate": 0.00046834724659739396,
      "loss": 2.9323,
      "step": 71507
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3284982442855835,
      "learning_rate": 0.0004683438607864118,
      "loss": 3.1007,
      "step": 71508
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9179621934890747,
      "learning_rate": 0.0004683404749441313,
      "loss": 3.0579,
      "step": 71509
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.768547773361206,
      "learning_rate": 0.0004683370890705531,
      "loss": 3.08,
      "step": 71510
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4519422054290771,
      "learning_rate": 0.00046833370316567784,
      "loss": 3.1614,
      "step": 71511
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4913281202316284,
      "learning_rate": 0.0004683303172295062,
      "loss": 3.1311,
      "step": 71512
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9192553758621216,
      "learning_rate": 0.0004683269312620388,
      "loss": 2.8829,
      "step": 71513
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.622841238975525,
      "learning_rate": 0.00046832354526327624,
      "loss": 2.7709,
      "step": 71514
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.699181318283081,
      "learning_rate": 0.0004683201592332191,
      "loss": 3.2389,
      "step": 71515
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9777946472167969,
      "learning_rate": 0.0004683167731718681,
      "loss": 3.2524,
      "step": 71516
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5487022399902344,
      "learning_rate": 0.0004683133870792239,
      "loss": 3.0126,
      "step": 71517
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.604386806488037,
      "learning_rate": 0.00046831000095528695,
      "loss": 2.9782,
      "step": 71518
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.471325397491455,
      "learning_rate": 0.00046830661480005807,
      "loss": 3.0041,
      "step": 71519
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9204710721969604,
      "learning_rate": 0.0004683032286135378,
      "loss": 2.9778,
      "step": 71520
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9453346729278564,
      "learning_rate": 0.0004682998423957267,
      "loss": 3.0308,
      "step": 71521
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.611743450164795,
      "learning_rate": 0.00046829645614662556,
      "loss": 3.1316,
      "step": 71522
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.6438956260681152,
      "learning_rate": 0.000468293069866235,
      "loss": 3.0835,
      "step": 71523
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6862378120422363,
      "learning_rate": 0.0004682896835545555,
      "loss": 2.889,
      "step": 71524
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4537338018417358,
      "learning_rate": 0.00046828629721158775,
      "loss": 2.9449,
      "step": 71525
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.201551675796509,
      "learning_rate": 0.00046828291083733246,
      "loss": 2.9771,
      "step": 71526
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0148658752441406,
      "learning_rate": 0.0004682795244317902,
      "loss": 2.9772,
      "step": 71527
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.80891752243042,
      "learning_rate": 0.0004682761379949615,
      "loss": 3.2609,
      "step": 71528
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7955716848373413,
      "learning_rate": 0.0004682727515268472,
      "loss": 2.7391,
      "step": 71529
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.048387289047241,
      "learning_rate": 0.0004682693650274477,
      "loss": 2.9312,
      "step": 71530
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9425748586654663,
      "learning_rate": 0.00046826597849676386,
      "loss": 3.047,
      "step": 71531
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.522993564605713,
      "learning_rate": 0.0004682625919347962,
      "loss": 2.9056,
      "step": 71532
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7888010740280151,
      "learning_rate": 0.0004682592053415453,
      "loss": 2.9759,
      "step": 71533
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.650211215019226,
      "learning_rate": 0.0004682558187170118,
      "loss": 3.111,
      "step": 71534
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9247448444366455,
      "learning_rate": 0.0004682524320611965,
      "loss": 2.945,
      "step": 71535
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.794926404953003,
      "learning_rate": 0.00046824904537409983,
      "loss": 2.98,
      "step": 71536
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.894875407218933,
      "learning_rate": 0.00046824565865572247,
      "loss": 3.2511,
      "step": 71537
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5205559730529785,
      "learning_rate": 0.0004682422719060651,
      "loss": 3.3043,
      "step": 71538
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7387300729751587,
      "learning_rate": 0.00046823888512512825,
      "loss": 3.091,
      "step": 71539
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8303461074829102,
      "learning_rate": 0.0004682354983129126,
      "loss": 2.9421,
      "step": 71540
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4557119607925415,
      "learning_rate": 0.00046823211146941894,
      "loss": 3.0028,
      "step": 71541
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.5817654132843018,
      "learning_rate": 0.00046822872459464763,
      "loss": 3.0705,
      "step": 71542
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4438556432724,
      "learning_rate": 0.0004682253376885995,
      "loss": 3.0269,
      "step": 71543
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5612791776657104,
      "learning_rate": 0.000468221950751275,
      "loss": 3.237,
      "step": 71544
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.358227491378784,
      "learning_rate": 0.0004682185637826749,
      "loss": 3.0045,
      "step": 71545
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.543842077255249,
      "learning_rate": 0.0004682151767827998,
      "loss": 3.2012,
      "step": 71546
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6077386140823364,
      "learning_rate": 0.00046821178975165037,
      "loss": 3.0114,
      "step": 71547
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0473318099975586,
      "learning_rate": 0.0004682084026892272,
      "loss": 2.9494,
      "step": 71548
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5594255924224854,
      "learning_rate": 0.00046820501559553086,
      "loss": 3.1879,
      "step": 71549
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.4735326766967773,
      "learning_rate": 0.000468201628470562,
      "loss": 2.9809,
      "step": 71550
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.469904661178589,
      "learning_rate": 0.00046819824131432135,
      "loss": 2.937,
      "step": 71551
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5536936521530151,
      "learning_rate": 0.00046819485412680944,
      "loss": 2.9573,
      "step": 71552
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.503831386566162,
      "learning_rate": 0.00046819146690802696,
      "loss": 3.0778,
      "step": 71553
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.4567177295684814,
      "learning_rate": 0.00046818807965797447,
      "loss": 3.1174,
      "step": 71554
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0015506744384766,
      "learning_rate": 0.00046818469237665266,
      "loss": 2.6297,
      "step": 71555
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6168206930160522,
      "learning_rate": 0.00046818130506406225,
      "loss": 2.809,
      "step": 71556
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9227503538131714,
      "learning_rate": 0.0004681779177202036,
      "loss": 3.0501,
      "step": 71557
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.832589864730835,
      "learning_rate": 0.0004681745303450775,
      "loss": 3.0305,
      "step": 71558
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6138395071029663,
      "learning_rate": 0.00046817114293868473,
      "loss": 3.146,
      "step": 71559
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.025616407394409,
      "learning_rate": 0.00046816775550102564,
      "loss": 2.9443,
      "step": 71560
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8324166536331177,
      "learning_rate": 0.000468164368032101,
      "loss": 3.0379,
      "step": 71561
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4239331483840942,
      "learning_rate": 0.0004681609805319115,
      "loss": 3.3556,
      "step": 71562
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.501528739929199,
      "learning_rate": 0.00046815759300045763,
      "loss": 2.821,
      "step": 71563
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.334056854248047,
      "learning_rate": 0.0004681542054377401,
      "loss": 2.9664,
      "step": 71564
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.0177857875823975,
      "learning_rate": 0.00046815081784375953,
      "loss": 2.9207,
      "step": 71565
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.384651780128479,
      "learning_rate": 0.0004681474302185166,
      "loss": 2.9627,
      "step": 71566
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6014026403427124,
      "learning_rate": 0.0004681440425620119,
      "loss": 2.8951,
      "step": 71567
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5480684041976929,
      "learning_rate": 0.00046814065487424595,
      "loss": 2.9688,
      "step": 71568
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.447716236114502,
      "learning_rate": 0.00046813726715521953,
      "loss": 3.0194,
      "step": 71569
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5527769327163696,
      "learning_rate": 0.0004681338794049332,
      "loss": 2.9526,
      "step": 71570
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.004868268966675,
      "learning_rate": 0.00046813049162338765,
      "loss": 3.0741,
      "step": 71571
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4710766077041626,
      "learning_rate": 0.0004681271038105835,
      "loss": 3.0176,
      "step": 71572
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7778475284576416,
      "learning_rate": 0.00046812371596652124,
      "loss": 3.0784,
      "step": 71573
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7698359489440918,
      "learning_rate": 0.00046812032809120165,
      "loss": 3.1079,
      "step": 71574
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2925653457641602,
      "learning_rate": 0.00046811694018462533,
      "loss": 3.1341,
      "step": 71575
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6054409742355347,
      "learning_rate": 0.0004681135522467929,
      "loss": 3.1311,
      "step": 71576
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6361470222473145,
      "learning_rate": 0.0004681101642777049,
      "loss": 3.1672,
      "step": 71577
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.945953607559204,
      "learning_rate": 0.00046810677627736225,
      "loss": 2.962,
      "step": 71578
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4539666175842285,
      "learning_rate": 0.0004681033882457653,
      "loss": 3.2691,
      "step": 71579
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.1924147605895996,
      "learning_rate": 0.0004681000001829146,
      "loss": 3.2572,
      "step": 71580
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7352415323257446,
      "learning_rate": 0.0004680966120888111,
      "loss": 2.9355,
      "step": 71581
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4240564107894897,
      "learning_rate": 0.00046809322396345514,
      "loss": 3.0814,
      "step": 71582
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5017151832580566,
      "learning_rate": 0.0004680898358068476,
      "loss": 2.8884,
      "step": 71583
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.530794620513916,
      "learning_rate": 0.00046808644761898896,
      "loss": 3.0499,
      "step": 71584
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.02353572845459,
      "learning_rate": 0.00046808305939987987,
      "loss": 3.0914,
      "step": 71585
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8622825145721436,
      "learning_rate": 0.000468079671149521,
      "loss": 3.0016,
      "step": 71586
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9122172594070435,
      "learning_rate": 0.0004680762828679129,
      "loss": 2.9192,
      "step": 71587
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.055865526199341,
      "learning_rate": 0.0004680728945550562,
      "loss": 2.9457,
      "step": 71588
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7463802099227905,
      "learning_rate": 0.00046806950621095157,
      "loss": 3.1897,
      "step": 71589
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8710888624191284,
      "learning_rate": 0.00046806611783559984,
      "loss": 2.9521,
      "step": 71590
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.6170198917388916,
      "learning_rate": 0.0004680627294290012,
      "loss": 2.779,
      "step": 71591
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.479520797729492,
      "learning_rate": 0.0004680593409911566,
      "loss": 2.9037,
      "step": 71592
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6951041221618652,
      "learning_rate": 0.00046805595252206664,
      "loss": 3.183,
      "step": 71593
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6097044944763184,
      "learning_rate": 0.0004680525640217319,
      "loss": 3.3918,
      "step": 71594
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.811091423034668,
      "learning_rate": 0.000468049175490153,
      "loss": 3.1931,
      "step": 71595
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3323814868927,
      "learning_rate": 0.00046804578692733064,
      "loss": 2.972,
      "step": 71596
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.300720691680908,
      "learning_rate": 0.0004680423983332653,
      "loss": 2.767,
      "step": 71597
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4786101579666138,
      "learning_rate": 0.0004680390097079578,
      "loss": 3.0542,
      "step": 71598
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.173962354660034,
      "learning_rate": 0.00046803562105140865,
      "loss": 2.745,
      "step": 71599
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.728716254234314,
      "learning_rate": 0.0004680322323636185,
      "loss": 2.9579,
      "step": 71600
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6131230592727661,
      "learning_rate": 0.00046802884364458794,
      "loss": 2.9866,
      "step": 71601
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.064011812210083,
      "learning_rate": 0.0004680254548943177,
      "loss": 3.0895,
      "step": 71602
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5049316883087158,
      "learning_rate": 0.0004680220661128083,
      "loss": 3.0705,
      "step": 71603
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.402335524559021,
      "learning_rate": 0.00046801867730006044,
      "loss": 3.1748,
      "step": 71604
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4378702640533447,
      "learning_rate": 0.00046801528845607485,
      "loss": 2.9853,
      "step": 71605
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.965891718864441,
      "learning_rate": 0.0004680118995808519,
      "loss": 3.1393,
      "step": 71606
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.1743533611297607,
      "learning_rate": 0.0004680085106743924,
      "loss": 3.0995,
      "step": 71607
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7426389455795288,
      "learning_rate": 0.000468005121736697,
      "loss": 2.9598,
      "step": 71608
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0708422660827637,
      "learning_rate": 0.00046800173276776623,
      "loss": 3.1286,
      "step": 71609
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7187507152557373,
      "learning_rate": 0.00046799834376760077,
      "loss": 3.1523,
      "step": 71610
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.245654582977295,
      "learning_rate": 0.00046799495473620134,
      "loss": 2.8437,
      "step": 71611
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7378700971603394,
      "learning_rate": 0.0004679915656735684,
      "loss": 2.9183,
      "step": 71612
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.4532876014709473,
      "learning_rate": 0.00046798817657970265,
      "loss": 2.9121,
      "step": 71613
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.805294990539551,
      "learning_rate": 0.00046798478745460473,
      "loss": 3.3486,
      "step": 71614
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4144246578216553,
      "learning_rate": 0.0004679813982982753,
      "loss": 2.9953,
      "step": 71615
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7272173166275024,
      "learning_rate": 0.0004679780091107148,
      "loss": 2.9619,
      "step": 71616
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3275182247161865,
      "learning_rate": 0.00046797461989192427,
      "loss": 3.1224,
      "step": 71617
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4582794904708862,
      "learning_rate": 0.0004679712306419039,
      "loss": 2.97,
      "step": 71618
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5759658813476562,
      "learning_rate": 0.0004679678413606545,
      "loss": 3.128,
      "step": 71619
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8034958839416504,
      "learning_rate": 0.0004679644520481769,
      "loss": 3.003,
      "step": 71620
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7189961671829224,
      "learning_rate": 0.0004679610627044713,
      "loss": 3.1111,
      "step": 71621
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.514519691467285,
      "learning_rate": 0.00046795767332953874,
      "loss": 3.0998,
      "step": 71622
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.144987106323242,
      "learning_rate": 0.0004679542839233796,
      "loss": 2.8173,
      "step": 71623
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7962467670440674,
      "learning_rate": 0.0004679508944859947,
      "loss": 2.9524,
      "step": 71624
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6100651025772095,
      "learning_rate": 0.0004679475050173843,
      "loss": 3.0949,
      "step": 71625
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8831114768981934,
      "learning_rate": 0.00046794411551754955,
      "loss": 2.9405,
      "step": 71626
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.48586905002594,
      "learning_rate": 0.00046794072598649073,
      "loss": 3.0807,
      "step": 71627
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.241356134414673,
      "learning_rate": 0.0004679373364242085,
      "loss": 3.0411,
      "step": 71628
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.715374231338501,
      "learning_rate": 0.00046793394683070365,
      "loss": 3.0394,
      "step": 71629
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.060012102127075,
      "learning_rate": 0.0004679305572059766,
      "loss": 3.1917,
      "step": 71630
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.589835524559021,
      "learning_rate": 0.00046792716755002814,
      "loss": 3.0731,
      "step": 71631
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7117395401000977,
      "learning_rate": 0.00046792377786285884,
      "loss": 3.2647,
      "step": 71632
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5672492980957031,
      "learning_rate": 0.0004679203881444695,
      "loss": 3.0622,
      "step": 71633
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.596906065940857,
      "learning_rate": 0.00046791699839486035,
      "loss": 3.2677,
      "step": 71634
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.85452401638031,
      "learning_rate": 0.0004679136086140323,
      "loss": 3.0205,
      "step": 71635
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0556838512420654,
      "learning_rate": 0.00046791021880198606,
      "loss": 2.9067,
      "step": 71636
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7569013833999634,
      "learning_rate": 0.0004679068289587221,
      "loss": 2.8919,
      "step": 71637
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.645337462425232,
      "learning_rate": 0.00046790343908424106,
      "loss": 3.0388,
      "step": 71638
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5056698322296143,
      "learning_rate": 0.0004679000491785436,
      "loss": 2.8832,
      "step": 71639
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9812190532684326,
      "learning_rate": 0.0004678966592416304,
      "loss": 3.4095,
      "step": 71640
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.828542947769165,
      "learning_rate": 0.000467893269273502,
      "loss": 3.0529,
      "step": 71641
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8322346210479736,
      "learning_rate": 0.00046788987927415905,
      "loss": 3.2556,
      "step": 71642
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.2746541500091553,
      "learning_rate": 0.0004678864892436022,
      "loss": 2.9707,
      "step": 71643
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7160190343856812,
      "learning_rate": 0.0004678830991818322,
      "loss": 3.1293,
      "step": 71644
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5100098848342896,
      "learning_rate": 0.00046787970908884947,
      "loss": 2.9948,
      "step": 71645
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.5077152252197266,
      "learning_rate": 0.00046787631896465465,
      "loss": 2.9722,
      "step": 71646
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.421875476837158,
      "learning_rate": 0.00046787292880924857,
      "loss": 3.1518,
      "step": 71647
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7632085084915161,
      "learning_rate": 0.00046786953862263173,
      "loss": 2.9079,
      "step": 71648
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4352993965148926,
      "learning_rate": 0.0004678661484048047,
      "loss": 2.8547,
      "step": 71649
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.248591184616089,
      "learning_rate": 0.0004678627581557683,
      "loss": 3.1923,
      "step": 71650
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.6466562747955322,
      "learning_rate": 0.000467859367875523,
      "loss": 3.3112,
      "step": 71651
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9326385259628296,
      "learning_rate": 0.00046785597756406945,
      "loss": 3.3056,
      "step": 71652
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.2047908306121826,
      "learning_rate": 0.0004678525872214083,
      "loss": 3.3666,
      "step": 71653
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.000434160232544,
      "learning_rate": 0.0004678491968475403,
      "loss": 2.9452,
      "step": 71654
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.0041019916534424,
      "learning_rate": 0.00046784580644246575,
      "loss": 3.3119,
      "step": 71655
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.1123993396759033,
      "learning_rate": 0.00046784241600618566,
      "loss": 3.1204,
      "step": 71656
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6178836822509766,
      "learning_rate": 0.0004678390255387005,
      "loss": 3.1946,
      "step": 71657
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.265092372894287,
      "learning_rate": 0.0004678356350400108,
      "loss": 3.0227,
      "step": 71658
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.3392982482910156,
      "learning_rate": 0.0004678322445101173,
      "loss": 3.1924,
      "step": 71659
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.274644136428833,
      "learning_rate": 0.00046782885394902076,
      "loss": 2.9737,
      "step": 71660
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2855582237243652,
      "learning_rate": 0.00046782546335672154,
      "loss": 2.9148,
      "step": 71661
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7376140356063843,
      "learning_rate": 0.0004678220727332204,
      "loss": 3.1216,
      "step": 71662
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.372199296951294,
      "learning_rate": 0.00046781868207851807,
      "loss": 3.1543,
      "step": 71663
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0678043365478516,
      "learning_rate": 0.00046781529139261496,
      "loss": 3.1405,
      "step": 71664
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6138781309127808,
      "learning_rate": 0.0004678119006755118,
      "loss": 3.078,
      "step": 71665
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.386301040649414,
      "learning_rate": 0.0004678085099272093,
      "loss": 3.0181,
      "step": 71666
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9877195358276367,
      "learning_rate": 0.00046780511914770804,
      "loss": 2.9617,
      "step": 71667
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.2529540061950684,
      "learning_rate": 0.0004678017283370086,
      "loss": 3.0268,
      "step": 71668
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7547577619552612,
      "learning_rate": 0.00046779833749511173,
      "loss": 3.026,
      "step": 71669
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.427879810333252,
      "learning_rate": 0.0004677949466220179,
      "loss": 3.1043,
      "step": 71670
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.0275068283081055,
      "learning_rate": 0.0004677915557177279,
      "loss": 2.9585,
      "step": 71671
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5963069200515747,
      "learning_rate": 0.00046778816478224223,
      "loss": 2.6685,
      "step": 71672
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7682068347930908,
      "learning_rate": 0.0004677847738155616,
      "loss": 2.8018,
      "step": 71673
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7676259279251099,
      "learning_rate": 0.0004677813828176866,
      "loss": 3.2953,
      "step": 71674
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0608346462249756,
      "learning_rate": 0.00046777799178861787,
      "loss": 2.7795,
      "step": 71675
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5952686071395874,
      "learning_rate": 0.000467774600728356,
      "loss": 3.1273,
      "step": 71676
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.861376166343689,
      "learning_rate": 0.0004677712096369017,
      "loss": 3.1249,
      "step": 71677
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0313825607299805,
      "learning_rate": 0.0004677678185142556,
      "loss": 3.088,
      "step": 71678
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9028840065002441,
      "learning_rate": 0.0004677644273604183,
      "loss": 3.2253,
      "step": 71679
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6011993885040283,
      "learning_rate": 0.0004677610361753903,
      "loss": 3.0414,
      "step": 71680
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.802744746208191,
      "learning_rate": 0.00046775764495917245,
      "loss": 2.9479,
      "step": 71681
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.7832987308502197,
      "learning_rate": 0.00046775425371176527,
      "loss": 3.265,
      "step": 71682
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8321863412857056,
      "learning_rate": 0.0004677508624331695,
      "loss": 3.0173,
      "step": 71683
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7012437582015991,
      "learning_rate": 0.00046774747112338555,
      "loss": 3.0256,
      "step": 71684
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.8384437561035156,
      "learning_rate": 0.0004677440797824143,
      "loss": 2.9197,
      "step": 71685
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8609764575958252,
      "learning_rate": 0.0004677406884102561,
      "loss": 3.2862,
      "step": 71686
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5998833179473877,
      "learning_rate": 0.0004677372970069119,
      "loss": 3.0729,
      "step": 71687
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5724912881851196,
      "learning_rate": 0.0004677339055723821,
      "loss": 3.0702,
      "step": 71688
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.855567216873169,
      "learning_rate": 0.0004677305141066674,
      "loss": 3.1425,
      "step": 71689
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.530231237411499,
      "learning_rate": 0.0004677271226097684,
      "loss": 2.9035,
      "step": 71690
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4601906538009644,
      "learning_rate": 0.0004677237310816858,
      "loss": 3.1137,
      "step": 71691
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6338881254196167,
      "learning_rate": 0.0004677203395224202,
      "loss": 3.0165,
      "step": 71692
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7413511276245117,
      "learning_rate": 0.00046771694793197225,
      "loss": 2.8946,
      "step": 71693
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6922237873077393,
      "learning_rate": 0.0004677135563103425,
      "loss": 3.1907,
      "step": 71694
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.482744812965393,
      "learning_rate": 0.0004677101646575316,
      "loss": 3.1047,
      "step": 71695
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.639410376548767,
      "learning_rate": 0.0004677067729735403,
      "loss": 2.8974,
      "step": 71696
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8655879497528076,
      "learning_rate": 0.0004677033812583691,
      "loss": 3.1126,
      "step": 71697
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3465819358825684,
      "learning_rate": 0.0004676999895120187,
      "loss": 3.2364,
      "step": 71698
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7408814430236816,
      "learning_rate": 0.0004676965977344896,
      "loss": 3.0611,
      "step": 71699
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7042858600616455,
      "learning_rate": 0.00046769320592578265,
      "loss": 2.8141,
      "step": 71700
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6606591939926147,
      "learning_rate": 0.0004676898140858984,
      "loss": 3.0644,
      "step": 71701
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.732367753982544,
      "learning_rate": 0.00046768642221483735,
      "loss": 3.1056,
      "step": 71702
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7739313840866089,
      "learning_rate": 0.00046768303031260034,
      "loss": 3.1195,
      "step": 71703
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8091247081756592,
      "learning_rate": 0.00046767963837918776,
      "loss": 2.9212,
      "step": 71704
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6171538829803467,
      "learning_rate": 0.00046767624641460046,
      "loss": 2.8913,
      "step": 71705
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0010035037994385,
      "learning_rate": 0.000467672854418839,
      "loss": 2.7635,
      "step": 71706
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5849031209945679,
      "learning_rate": 0.0004676694623919039,
      "loss": 2.687,
      "step": 71707
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3771870136260986,
      "learning_rate": 0.0004676660703337959,
      "loss": 3.0006,
      "step": 71708
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0892815589904785,
      "learning_rate": 0.00046766267824451565,
      "loss": 3.1691,
      "step": 71709
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.1220340728759766,
      "learning_rate": 0.0004676592861240637,
      "loss": 2.9182,
      "step": 71710
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6767224073410034,
      "learning_rate": 0.0004676558939724407,
      "loss": 3.0377,
      "step": 71711
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.639352560043335,
      "learning_rate": 0.00046765250178964747,
      "loss": 2.9446,
      "step": 71712
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9633837938308716,
      "learning_rate": 0.00046764910957568435,
      "loss": 2.9623,
      "step": 71713
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.1988134384155273,
      "learning_rate": 0.0004676457173305521,
      "loss": 3.0424,
      "step": 71714
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6834105253219604,
      "learning_rate": 0.0004676423250542514,
      "loss": 3.1817,
      "step": 71715
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.314382791519165,
      "learning_rate": 0.00046763893274678274,
      "loss": 2.9701,
      "step": 71716
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5373550653457642,
      "learning_rate": 0.0004676355404081469,
      "loss": 2.9645,
      "step": 71717
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.592532992362976,
      "learning_rate": 0.00046763214803834444,
      "loss": 3.1204,
      "step": 71718
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.646370768547058,
      "learning_rate": 0.00046762875563737596,
      "loss": 2.9702,
      "step": 71719
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.545090675354004,
      "learning_rate": 0.00046762536320524214,
      "loss": 2.9075,
      "step": 71720
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.302229881286621,
      "learning_rate": 0.00046762197074194363,
      "loss": 2.9019,
      "step": 71721
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6936489343643188,
      "learning_rate": 0.00046761857824748103,
      "loss": 3.1554,
      "step": 71722
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3888252973556519,
      "learning_rate": 0.00046761518572185494,
      "loss": 3.0297,
      "step": 71723
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.444077491760254,
      "learning_rate": 0.00046761179316506606,
      "loss": 3.1022,
      "step": 71724
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.707653522491455,
      "learning_rate": 0.00046760840057711504,
      "loss": 2.9173,
      "step": 71725
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4387997388839722,
      "learning_rate": 0.0004676050079580023,
      "loss": 3.0743,
      "step": 71726
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.14827561378479,
      "learning_rate": 0.0004676016153077287,
      "loss": 3.1763,
      "step": 71727
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7847381830215454,
      "learning_rate": 0.00046759822262629484,
      "loss": 3.0167,
      "step": 71728
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6256449222564697,
      "learning_rate": 0.0004675948299137013,
      "loss": 3.096,
      "step": 71729
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8020278215408325,
      "learning_rate": 0.0004675914371699487,
      "loss": 3.1057,
      "step": 71730
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.765809416770935,
      "learning_rate": 0.00046758804439503757,
      "loss": 2.8852,
      "step": 71731
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.1478817462921143,
      "learning_rate": 0.0004675846515889688,
      "loss": 2.7597,
      "step": 71732
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4078502655029297,
      "learning_rate": 0.00046758125875174286,
      "loss": 3.0883,
      "step": 71733
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8943544626235962,
      "learning_rate": 0.0004675778658833604,
      "loss": 2.8491,
      "step": 71734
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3498353958129883,
      "learning_rate": 0.0004675744729838221,
      "loss": 2.9927,
      "step": 71735
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.812283754348755,
      "learning_rate": 0.0004675710800531285,
      "loss": 2.9684,
      "step": 71736
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0484046936035156,
      "learning_rate": 0.0004675676870912802,
      "loss": 3.0309,
      "step": 71737
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4314699172973633,
      "learning_rate": 0.0004675642940982779,
      "loss": 3.1533,
      "step": 71738
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.186168670654297,
      "learning_rate": 0.0004675609010741224,
      "loss": 2.9947,
      "step": 71739
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.6904873847961426,
      "learning_rate": 0.00046755750801881404,
      "loss": 3.0573,
      "step": 71740
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6486282348632812,
      "learning_rate": 0.00046755411493235353,
      "loss": 2.8085,
      "step": 71741
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3686625957489014,
      "learning_rate": 0.00046755072181474167,
      "loss": 3.16,
      "step": 71742
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.562981367111206,
      "learning_rate": 0.0004675473286659789,
      "loss": 2.9275,
      "step": 71743
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8696287870407104,
      "learning_rate": 0.00046754393548606584,
      "loss": 3.192,
      "step": 71744
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.131148338317871,
      "learning_rate": 0.0004675405422750034,
      "loss": 3.0019,
      "step": 71745
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7175711393356323,
      "learning_rate": 0.0004675371490327918,
      "loss": 2.9764,
      "step": 71746
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.402794361114502,
      "learning_rate": 0.0004675337557594321,
      "loss": 3.2203,
      "step": 71747
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.86173415184021,
      "learning_rate": 0.00046753036245492454,
      "loss": 2.9424,
      "step": 71748
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3928959369659424,
      "learning_rate": 0.00046752696911927,
      "loss": 3.2238,
      "step": 71749
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5206472873687744,
      "learning_rate": 0.00046752357575246894,
      "loss": 3.153,
      "step": 71750
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7130868434906006,
      "learning_rate": 0.00046752018235452224,
      "loss": 2.9788,
      "step": 71751
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6707929372787476,
      "learning_rate": 0.0004675167889254303,
      "loss": 3.1793,
      "step": 71752
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.3453665971755981,
      "learning_rate": 0.00046751339546519376,
      "loss": 2.8264,
      "step": 71753
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6821770668029785,
      "learning_rate": 0.00046751000197381345,
      "loss": 2.8714,
      "step": 71754
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3436174392700195,
      "learning_rate": 0.00046750660845128975,
      "loss": 3.0777,
      "step": 71755
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.512852430343628,
      "learning_rate": 0.0004675032148976235,
      "loss": 3.1032,
      "step": 71756
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9198267459869385,
      "learning_rate": 0.0004674998213128152,
      "loss": 3.0518,
      "step": 71757
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5305830240249634,
      "learning_rate": 0.0004674964276968655,
      "loss": 2.874,
      "step": 71758
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5832562446594238,
      "learning_rate": 0.00046749303404977497,
      "loss": 2.7936,
      "step": 71759
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5453625917434692,
      "learning_rate": 0.00046748964037154455,
      "loss": 2.7893,
      "step": 71760
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5840790271759033,
      "learning_rate": 0.0004674862466621745,
      "loss": 2.9554,
      "step": 71761
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5967200994491577,
      "learning_rate": 0.0004674828529216656,
      "loss": 3.0536,
      "step": 71762
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6169430017471313,
      "learning_rate": 0.0004674794591500185,
      "loss": 2.9907,
      "step": 71763
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7418440580368042,
      "learning_rate": 0.0004674760653472338,
      "loss": 3.0414,
      "step": 71764
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.932350754737854,
      "learning_rate": 0.00046747267151331216,
      "loss": 3.0286,
      "step": 71765
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7297773361206055,
      "learning_rate": 0.0004674692776482541,
      "loss": 2.8149,
      "step": 71766
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.854831337928772,
      "learning_rate": 0.0004674658837520604,
      "loss": 2.8981,
      "step": 71767
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.064929485321045,
      "learning_rate": 0.00046746248982473164,
      "loss": 3.0644,
      "step": 71768
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6560420989990234,
      "learning_rate": 0.0004674590958662684,
      "loss": 3.1841,
      "step": 71769
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.8491989374160767,
      "learning_rate": 0.00046745570187667145,
      "loss": 3.0695,
      "step": 71770
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4540917873382568,
      "learning_rate": 0.00046745230785594124,
      "loss": 2.9331,
      "step": 71771
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.534110426902771,
      "learning_rate": 0.0004674489138040785,
      "loss": 2.7434,
      "step": 71772
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6892473697662354,
      "learning_rate": 0.00046744551972108386,
      "loss": 3.0202,
      "step": 71773
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6300469636917114,
      "learning_rate": 0.00046744212560695793,
      "loss": 3.0876,
      "step": 71774
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.734095811843872,
      "learning_rate": 0.00046743873146170135,
      "loss": 3.0805,
      "step": 71775
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.56519615650177,
      "learning_rate": 0.00046743533728531474,
      "loss": 3.0976,
      "step": 71776
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7172008752822876,
      "learning_rate": 0.0004674319430777988,
      "loss": 3.092,
      "step": 71777
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.356198787689209,
      "learning_rate": 0.00046742854883915396,
      "loss": 3.0875,
      "step": 71778
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.720744252204895,
      "learning_rate": 0.0004674251545693811,
      "loss": 3.1011,
      "step": 71779
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.7096896171569824,
      "learning_rate": 0.00046742176026848075,
      "loss": 3.2853,
      "step": 71780
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.724816083908081,
      "learning_rate": 0.0004674183659364535,
      "loss": 2.7973,
      "step": 71781
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.45276939868927,
      "learning_rate": 0.0004674149715733001,
      "loss": 3.1917,
      "step": 71782
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.2807356119155884,
      "learning_rate": 0.000467411577179021,
      "loss": 3.1328,
      "step": 71783
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.8226592540740967,
      "learning_rate": 0.00046740818275361687,
      "loss": 2.7852,
      "step": 71784
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.0694828033447266,
      "learning_rate": 0.00046740478829708856,
      "loss": 3.0701,
      "step": 71785
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.624977946281433,
      "learning_rate": 0.00046740139380943644,
      "loss": 3.205,
      "step": 71786
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.166337728500366,
      "learning_rate": 0.00046739799929066116,
      "loss": 3.0544,
      "step": 71787
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.912388563156128,
      "learning_rate": 0.0004673946047407636,
      "loss": 2.8983,
      "step": 71788
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.4543095827102661,
      "learning_rate": 0.0004673912101597441,
      "loss": 2.7844,
      "step": 71789
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5714977979660034,
      "learning_rate": 0.00046738781554760356,
      "loss": 2.8703,
      "step": 71790
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5068354606628418,
      "learning_rate": 0.0004673844209043424,
      "loss": 3.1774,
      "step": 71791
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.2092831134796143,
      "learning_rate": 0.0004673810262299612,
      "loss": 2.8492,
      "step": 71792
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.9460153579711914,
      "learning_rate": 0.0004673776315244608,
      "loss": 3.2149,
      "step": 71793
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5499730110168457,
      "learning_rate": 0.00046737423678784183,
      "loss": 2.9296,
      "step": 71794
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.682433843612671,
      "learning_rate": 0.0004673708420201047,
      "loss": 3.0563,
      "step": 71795
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.2652411460876465,
      "learning_rate": 0.0004673674472212501,
      "loss": 3.2583,
      "step": 71796
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.3711562156677246,
      "learning_rate": 0.000467364052391279,
      "loss": 2.8839,
      "step": 71797
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.6086541414260864,
      "learning_rate": 0.00046736065753019153,
      "loss": 2.9666,
      "step": 71798
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.8895182609558105,
      "learning_rate": 0.00046735726263798856,
      "loss": 3.302,
      "step": 71799
    },
    {
      "epoch": 0.93,
      "grad_norm": 3.643355131149292,
      "learning_rate": 0.0004673538677146709,
      "loss": 2.9868,
      "step": 71800
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.5034446716308594,
      "learning_rate": 0.0004673504727602388,
      "loss": 3.096,
      "step": 71801
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.502467393875122,
      "learning_rate": 0.00046734707777469316,
      "loss": 2.9154,
      "step": 71802
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.490074872970581,
      "learning_rate": 0.00046734368275803457,
      "loss": 3.2299,
      "step": 71803
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.2131896018981934,
      "learning_rate": 0.00046734028771026366,
      "loss": 3.072,
      "step": 71804
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.066409111022949,
      "learning_rate": 0.000467336892631381,
      "loss": 3.2143,
      "step": 71805
    },
    {
      "epoch": 0.93,
      "grad_norm": 1.665126085281372,
      "learning_rate": 0.0004673334975213872,
      "loss": 3.3419,
      "step": 71806
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.4206652641296387,
      "learning_rate": 0.000467330102380283,
      "loss": 3.0884,
      "step": 71807
    },
    {
      "epoch": 0.93,
      "grad_norm": 2.098212718963623,
      "learning_rate": 0.0004673267072080689,
      "loss": 3.0297,
      "step": 71808
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.028135299682617,
      "learning_rate": 0.0004673233120047457,
      "loss": 3.0637,
      "step": 71809
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3825550079345703,
      "learning_rate": 0.0004673199167703139,
      "loss": 2.6677,
      "step": 71810
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.213590383529663,
      "learning_rate": 0.00046731652150477416,
      "loss": 3.0909,
      "step": 71811
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.313131093978882,
      "learning_rate": 0.00046731312620812714,
      "loss": 2.9797,
      "step": 71812
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.220592737197876,
      "learning_rate": 0.0004673097308803734,
      "loss": 3.0677,
      "step": 71813
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6411422491073608,
      "learning_rate": 0.0004673063355215137,
      "loss": 3.0364,
      "step": 71814
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0044682025909424,
      "learning_rate": 0.0004673029401315485,
      "loss": 3.3178,
      "step": 71815
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.6019163131713867,
      "learning_rate": 0.0004672995447104786,
      "loss": 2.8731,
      "step": 71816
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9941948652267456,
      "learning_rate": 0.0004672961492583045,
      "loss": 3.0405,
      "step": 71817
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9356293678283691,
      "learning_rate": 0.000467292753775027,
      "loss": 3.1024,
      "step": 71818
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.6811461448669434,
      "learning_rate": 0.00046728935826064654,
      "loss": 3.0669,
      "step": 71819
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.949331045150757,
      "learning_rate": 0.0004672859627151638,
      "loss": 3.1418,
      "step": 71820
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5039066076278687,
      "learning_rate": 0.0004672825671385795,
      "loss": 3.1616,
      "step": 71821
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5520399808883667,
      "learning_rate": 0.0004672791715308942,
      "loss": 3.0827,
      "step": 71822
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.438323736190796,
      "learning_rate": 0.00046727577589210844,
      "loss": 2.9255,
      "step": 71823
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6236073970794678,
      "learning_rate": 0.0004672723802222232,
      "loss": 2.9844,
      "step": 71824
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5859500169754028,
      "learning_rate": 0.00046726898452123864,
      "loss": 3.0672,
      "step": 71825
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.4304733276367188,
      "learning_rate": 0.00046726558878915563,
      "loss": 3.1924,
      "step": 71826
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8787258863449097,
      "learning_rate": 0.00046726219302597495,
      "loss": 3.0509,
      "step": 71827
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7392960786819458,
      "learning_rate": 0.00046725879723169696,
      "loss": 2.88,
      "step": 71828
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.5896310806274414,
      "learning_rate": 0.00046725540140632243,
      "loss": 2.8619,
      "step": 71829
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.210623025894165,
      "learning_rate": 0.0004672520055498519,
      "loss": 3.2202,
      "step": 71830
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.590674877166748,
      "learning_rate": 0.0004672486096622862,
      "loss": 2.7108,
      "step": 71831
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.33603036403656,
      "learning_rate": 0.0004672452137436257,
      "loss": 3.224,
      "step": 71832
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.0195977687835693,
      "learning_rate": 0.00046724181779387116,
      "loss": 2.7959,
      "step": 71833
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0724823474884033,
      "learning_rate": 0.0004672384218130233,
      "loss": 3.0381,
      "step": 71834
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5300030708312988,
      "learning_rate": 0.0004672350258010826,
      "loss": 2.8779,
      "step": 71835
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.5302233695983887,
      "learning_rate": 0.0004672316297580497,
      "loss": 2.7691,
      "step": 71836
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.071092367172241,
      "learning_rate": 0.0004672282336839254,
      "loss": 2.9986,
      "step": 71837
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8353948593139648,
      "learning_rate": 0.0004672248375787102,
      "loss": 2.825,
      "step": 71838
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6995936632156372,
      "learning_rate": 0.0004672214414424047,
      "loss": 2.7066,
      "step": 71839
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.570210337638855,
      "learning_rate": 0.0004672180452750096,
      "loss": 3.1395,
      "step": 71840
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5473860502243042,
      "learning_rate": 0.00046721464907652547,
      "loss": 3.0778,
      "step": 71841
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.070817232131958,
      "learning_rate": 0.000467211252846953,
      "loss": 3.0353,
      "step": 71842
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.748668909072876,
      "learning_rate": 0.0004672078565862928,
      "loss": 2.8665,
      "step": 71843
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8438854217529297,
      "learning_rate": 0.0004672044602945455,
      "loss": 2.9937,
      "step": 71844
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.486527442932129,
      "learning_rate": 0.0004672010639717117,
      "loss": 2.9221,
      "step": 71845
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6432948112487793,
      "learning_rate": 0.00046719766761779214,
      "loss": 3.1585,
      "step": 71846
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6974327564239502,
      "learning_rate": 0.0004671942712327873,
      "loss": 3.1028,
      "step": 71847
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7580829858779907,
      "learning_rate": 0.000467190874816698,
      "loss": 2.9299,
      "step": 71848
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.5181362628936768,
      "learning_rate": 0.0004671874783695247,
      "loss": 2.9408,
      "step": 71849
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0141043663024902,
      "learning_rate": 0.0004671840818912681,
      "loss": 3.1162,
      "step": 71850
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5194228887557983,
      "learning_rate": 0.00046718068538192877,
      "loss": 3.061,
      "step": 71851
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.137376308441162,
      "learning_rate": 0.0004671772888415075,
      "loss": 2.9868,
      "step": 71852
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.693784236907959,
      "learning_rate": 0.0004671738922700047,
      "loss": 2.9266,
      "step": 71853
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8580219745635986,
      "learning_rate": 0.00046717049566742116,
      "loss": 3.0149,
      "step": 71854
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9895894527435303,
      "learning_rate": 0.0004671670990337576,
      "loss": 3.0918,
      "step": 71855
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.293168783187866,
      "learning_rate": 0.0004671637023690143,
      "loss": 3.01,
      "step": 71856
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5167756080627441,
      "learning_rate": 0.0004671603056731922,
      "loss": 3.1093,
      "step": 71857
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7440496683120728,
      "learning_rate": 0.00046715690894629186,
      "loss": 3.1485,
      "step": 71858
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6265984773635864,
      "learning_rate": 0.00046715351218831394,
      "loss": 3.1833,
      "step": 71859
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.448211193084717,
      "learning_rate": 0.00046715011539925894,
      "loss": 3.0729,
      "step": 71860
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8356513977050781,
      "learning_rate": 0.00046714671857912767,
      "loss": 2.9711,
      "step": 71861
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7163689136505127,
      "learning_rate": 0.0004671433217279206,
      "loss": 3.1507,
      "step": 71862
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.1830811500549316,
      "learning_rate": 0.00046713992484563843,
      "loss": 2.6496,
      "step": 71863
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7259901762008667,
      "learning_rate": 0.0004671365279322818,
      "loss": 3.055,
      "step": 71864
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.741176724433899,
      "learning_rate": 0.0004671331309878513,
      "loss": 3.1047,
      "step": 71865
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8335283994674683,
      "learning_rate": 0.0004671297340123477,
      "loss": 2.9816,
      "step": 71866
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.1401305198669434,
      "learning_rate": 0.0004671263370057715,
      "loss": 3.0467,
      "step": 71867
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.683120608329773,
      "learning_rate": 0.0004671229399681233,
      "loss": 2.9938,
      "step": 71868
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5498263835906982,
      "learning_rate": 0.00046711954289940376,
      "loss": 2.8944,
      "step": 71869
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7280776500701904,
      "learning_rate": 0.00046711614579961365,
      "loss": 3.1211,
      "step": 71870
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8427627086639404,
      "learning_rate": 0.0004671127486687534,
      "loss": 2.8611,
      "step": 71871
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6547406911849976,
      "learning_rate": 0.00046710935150682376,
      "loss": 2.9253,
      "step": 71872
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6281100511550903,
      "learning_rate": 0.0004671059543138254,
      "loss": 3.2383,
      "step": 71873
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7495847940444946,
      "learning_rate": 0.0004671025570897589,
      "loss": 3.3448,
      "step": 71874
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6749900579452515,
      "learning_rate": 0.0004670991598346248,
      "loss": 2.9822,
      "step": 71875
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9132436513900757,
      "learning_rate": 0.00046709576254842387,
      "loss": 3.2111,
      "step": 71876
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7232557535171509,
      "learning_rate": 0.00046709236523115663,
      "loss": 2.8614,
      "step": 71877
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8517245054244995,
      "learning_rate": 0.00046708896788282374,
      "loss": 2.9475,
      "step": 71878
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8218473196029663,
      "learning_rate": 0.0004670855705034259,
      "loss": 3.0152,
      "step": 71879
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.204951286315918,
      "learning_rate": 0.0004670821730929638,
      "loss": 3.0169,
      "step": 71880
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4845601320266724,
      "learning_rate": 0.00046707877565143783,
      "loss": 3.0084,
      "step": 71881
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5828862190246582,
      "learning_rate": 0.0004670753781788488,
      "loss": 3.185,
      "step": 71882
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7715504169464111,
      "learning_rate": 0.00046707198067519735,
      "loss": 3.1764,
      "step": 71883
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5624897480010986,
      "learning_rate": 0.000467068583140484,
      "loss": 2.8972,
      "step": 71884
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7278050184249878,
      "learning_rate": 0.0004670651855747094,
      "loss": 3.0756,
      "step": 71885
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8049379587173462,
      "learning_rate": 0.00046706178797787436,
      "loss": 2.9438,
      "step": 71886
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.520656943321228,
      "learning_rate": 0.00046705839034997935,
      "loss": 3.0945,
      "step": 71887
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.2365825176239014,
      "learning_rate": 0.00046705499269102503,
      "loss": 3.1392,
      "step": 71888
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.396554708480835,
      "learning_rate": 0.00046705159500101196,
      "loss": 3.1506,
      "step": 71889
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7361165285110474,
      "learning_rate": 0.00046704819727994083,
      "loss": 3.1258,
      "step": 71890
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9465234279632568,
      "learning_rate": 0.0004670447995278124,
      "loss": 3.1348,
      "step": 71891
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5301761627197266,
      "learning_rate": 0.0004670414017446271,
      "loss": 3.0857,
      "step": 71892
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.594228982925415,
      "learning_rate": 0.00046703800393038566,
      "loss": 3.0077,
      "step": 71893
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4284306764602661,
      "learning_rate": 0.0004670346060850888,
      "loss": 3.2768,
      "step": 71894
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5732355117797852,
      "learning_rate": 0.00046703120820873695,
      "loss": 3.2268,
      "step": 71895
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5508899688720703,
      "learning_rate": 0.0004670278103013308,
      "loss": 3.2506,
      "step": 71896
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9152692556381226,
      "learning_rate": 0.00046702441236287114,
      "loss": 2.9927,
      "step": 71897
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6581296920776367,
      "learning_rate": 0.00046702101439335844,
      "loss": 3.2546,
      "step": 71898
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.540677785873413,
      "learning_rate": 0.0004670176163927934,
      "loss": 2.9069,
      "step": 71899
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9728710651397705,
      "learning_rate": 0.00046701421836117666,
      "loss": 3.0854,
      "step": 71900
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5799928903579712,
      "learning_rate": 0.0004670108202985088,
      "loss": 3.1881,
      "step": 71901
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.499255657196045,
      "learning_rate": 0.00046700742220479047,
      "loss": 2.9801,
      "step": 71902
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9580782651901245,
      "learning_rate": 0.00046700402408002224,
      "loss": 3.2377,
      "step": 71903
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.593366265296936,
      "learning_rate": 0.00046700062592420496,
      "loss": 2.9427,
      "step": 71904
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6908072233200073,
      "learning_rate": 0.000466997227737339,
      "loss": 3.2856,
      "step": 71905
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.015270233154297,
      "learning_rate": 0.00046699382951942507,
      "loss": 2.9105,
      "step": 71906
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6002874374389648,
      "learning_rate": 0.0004669904312704639,
      "loss": 3.0907,
      "step": 71907
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8730398416519165,
      "learning_rate": 0.0004669870329904561,
      "loss": 3.3377,
      "step": 71908
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5008280277252197,
      "learning_rate": 0.0004669836346794022,
      "loss": 3.1174,
      "step": 71909
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7467807531356812,
      "learning_rate": 0.0004669802363373029,
      "loss": 2.9619,
      "step": 71910
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4071807861328125,
      "learning_rate": 0.0004669768379641588,
      "loss": 3.1396,
      "step": 71911
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6151766777038574,
      "learning_rate": 0.0004669734395599706,
      "loss": 2.9451,
      "step": 71912
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4881250858306885,
      "learning_rate": 0.00046697004112473885,
      "loss": 2.9813,
      "step": 71913
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.035886764526367,
      "learning_rate": 0.0004669666426584642,
      "loss": 3.0501,
      "step": 71914
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.38719642162323,
      "learning_rate": 0.0004669632441611473,
      "loss": 2.949,
      "step": 71915
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4807130098342896,
      "learning_rate": 0.0004669598456327888,
      "loss": 3.0856,
      "step": 71916
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.4382176399230957,
      "learning_rate": 0.0004669564470733893,
      "loss": 3.062,
      "step": 71917
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.492906093597412,
      "learning_rate": 0.0004669530484829495,
      "loss": 2.9579,
      "step": 71918
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6485098600387573,
      "learning_rate": 0.00046694964986147,
      "loss": 3.2995,
      "step": 71919
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5977119207382202,
      "learning_rate": 0.0004669462512089513,
      "loss": 2.9003,
      "step": 71920
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8102893829345703,
      "learning_rate": 0.00046694285252539417,
      "loss": 3.0828,
      "step": 71921
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.27095627784729,
      "learning_rate": 0.0004669394538107993,
      "loss": 3.1366,
      "step": 71922
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5209912061691284,
      "learning_rate": 0.00046693605506516715,
      "loss": 3.0442,
      "step": 71923
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6494110822677612,
      "learning_rate": 0.0004669326562884984,
      "loss": 3.1364,
      "step": 71924
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4562842845916748,
      "learning_rate": 0.00046692925748079384,
      "loss": 2.8931,
      "step": 71925
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4261815547943115,
      "learning_rate": 0.00046692585864205387,
      "loss": 2.8894,
      "step": 71926
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.2105636596679688,
      "learning_rate": 0.00046692245977227926,
      "loss": 3.0391,
      "step": 71927
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.160447120666504,
      "learning_rate": 0.00046691906087147067,
      "loss": 2.9683,
      "step": 71928
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7336088418960571,
      "learning_rate": 0.00046691566193962864,
      "loss": 2.973,
      "step": 71929
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.041961431503296,
      "learning_rate": 0.00046691226297675375,
      "loss": 2.7853,
      "step": 71930
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6648460626602173,
      "learning_rate": 0.0004669088639828469,
      "loss": 3.2479,
      "step": 71931
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6201770305633545,
      "learning_rate": 0.00046690546495790843,
      "loss": 3.1058,
      "step": 71932
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.667685627937317,
      "learning_rate": 0.00046690206590193907,
      "loss": 2.7655,
      "step": 71933
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6146268844604492,
      "learning_rate": 0.00046689866681493946,
      "loss": 2.8907,
      "step": 71934
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.1834707260131836,
      "learning_rate": 0.0004668952676969103,
      "loss": 3.3203,
      "step": 71935
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.658650517463684,
      "learning_rate": 0.00046689186854785206,
      "loss": 2.9875,
      "step": 71936
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4999507665634155,
      "learning_rate": 0.0004668884693677656,
      "loss": 3.0311,
      "step": 71937
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8505247831344604,
      "learning_rate": 0.00046688507015665143,
      "loss": 3.3963,
      "step": 71938
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.577866792678833,
      "learning_rate": 0.00046688167091451003,
      "loss": 2.9916,
      "step": 71939
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8789007663726807,
      "learning_rate": 0.0004668782716413423,
      "loss": 2.9545,
      "step": 71940
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3302148580551147,
      "learning_rate": 0.0004668748723371487,
      "loss": 2.9721,
      "step": 71941
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.852611541748047,
      "learning_rate": 0.00046687147300192994,
      "loss": 2.8556,
      "step": 71942
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5622512102127075,
      "learning_rate": 0.0004668680736356866,
      "loss": 2.9442,
      "step": 71943
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7266606092453003,
      "learning_rate": 0.00046686467423841927,
      "loss": 3.1643,
      "step": 71944
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4434127807617188,
      "learning_rate": 0.00046686127481012876,
      "loss": 2.9273,
      "step": 71945
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3750624656677246,
      "learning_rate": 0.00046685787535081553,
      "loss": 3.2237,
      "step": 71946
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5260682106018066,
      "learning_rate": 0.0004668544758604803,
      "loss": 2.9531,
      "step": 71947
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.444968581199646,
      "learning_rate": 0.0004668510763391236,
      "loss": 3.3292,
      "step": 71948
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7847070693969727,
      "learning_rate": 0.0004668476767867462,
      "loss": 3.1647,
      "step": 71949
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7152488231658936,
      "learning_rate": 0.00046684427720334866,
      "loss": 3.0271,
      "step": 71950
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0576181411743164,
      "learning_rate": 0.00046684087758893164,
      "loss": 2.77,
      "step": 71951
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7186951637268066,
      "learning_rate": 0.00046683747794349566,
      "loss": 3.2374,
      "step": 71952
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0563056468963623,
      "learning_rate": 0.00046683407826704157,
      "loss": 2.9328,
      "step": 71953
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.807214617729187,
      "learning_rate": 0.00046683067855956975,
      "loss": 3.1627,
      "step": 71954
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6695988178253174,
      "learning_rate": 0.00046682727882108103,
      "loss": 3.2378,
      "step": 71955
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6956572532653809,
      "learning_rate": 0.00046682387905157605,
      "loss": 2.9517,
      "step": 71956
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8079180717468262,
      "learning_rate": 0.0004668204792510552,
      "loss": 3.0934,
      "step": 71957
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6074343919754028,
      "learning_rate": 0.0004668170794195194,
      "loss": 3.1254,
      "step": 71958
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5020692348480225,
      "learning_rate": 0.0004668136795569691,
      "loss": 3.1356,
      "step": 71959
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.460793137550354,
      "learning_rate": 0.000466810279663405,
      "loss": 3.0914,
      "step": 71960
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5812195539474487,
      "learning_rate": 0.00046680687973882774,
      "loss": 2.9637,
      "step": 71961
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6276073455810547,
      "learning_rate": 0.0004668034797832379,
      "loss": 2.98,
      "step": 71962
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8360928297042847,
      "learning_rate": 0.00046680007979663606,
      "loss": 2.9389,
      "step": 71963
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7557249069213867,
      "learning_rate": 0.000466796679779023,
      "loss": 3.165,
      "step": 71964
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8635576963424683,
      "learning_rate": 0.00046679327973039945,
      "loss": 3.0262,
      "step": 71965
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.253401041030884,
      "learning_rate": 0.00046678987965076575,
      "loss": 3.543,
      "step": 71966
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6137406826019287,
      "learning_rate": 0.0004667864795401226,
      "loss": 3.1174,
      "step": 71967
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6142010688781738,
      "learning_rate": 0.0004667830793984708,
      "loss": 3.002,
      "step": 71968
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.685630202293396,
      "learning_rate": 0.00046677967922581075,
      "loss": 3.0503,
      "step": 71969
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.564409852027893,
      "learning_rate": 0.00046677627902214335,
      "loss": 2.9733,
      "step": 71970
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4995418787002563,
      "learning_rate": 0.0004667728787874691,
      "loss": 2.9249,
      "step": 71971
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0174529552459717,
      "learning_rate": 0.0004667694785217885,
      "loss": 3.1208,
      "step": 71972
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.037019968032837,
      "learning_rate": 0.00046676607822510236,
      "loss": 3.1235,
      "step": 71973
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.70249342918396,
      "learning_rate": 0.0004667626778974113,
      "loss": 3.0122,
      "step": 71974
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.464970111846924,
      "learning_rate": 0.0004667592775387158,
      "loss": 2.9152,
      "step": 71975
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.9233968257904053,
      "learning_rate": 0.0004667558771490167,
      "loss": 2.9057,
      "step": 71976
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.7576100826263428,
      "learning_rate": 0.00046675247672831456,
      "loss": 3.1043,
      "step": 71977
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.116234302520752,
      "learning_rate": 0.0004667490762766099,
      "loss": 3.0658,
      "step": 71978
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.539748430252075,
      "learning_rate": 0.00046674567579390344,
      "loss": 2.7458,
      "step": 71979
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.219116449356079,
      "learning_rate": 0.0004667422752801959,
      "loss": 3.0922,
      "step": 71980
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.191645622253418,
      "learning_rate": 0.00046673887473548773,
      "loss": 3.1278,
      "step": 71981
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.119309186935425,
      "learning_rate": 0.00046673547415977966,
      "loss": 2.8271,
      "step": 71982
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.590670347213745,
      "learning_rate": 0.0004667320735530724,
      "loss": 2.7991,
      "step": 71983
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0395309925079346,
      "learning_rate": 0.00046672867291536646,
      "loss": 3.2477,
      "step": 71984
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.400537371635437,
      "learning_rate": 0.00046672527224666247,
      "loss": 3.0992,
      "step": 71985
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6683608293533325,
      "learning_rate": 0.00046672187154696125,
      "loss": 2.8064,
      "step": 71986
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.5615639686584473,
      "learning_rate": 0.00046671847081626317,
      "loss": 2.9673,
      "step": 71987
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8930777311325073,
      "learning_rate": 0.0004667150700545689,
      "loss": 3.2161,
      "step": 71988
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.675959587097168,
      "learning_rate": 0.0004667116692618793,
      "loss": 2.8609,
      "step": 71989
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6252999305725098,
      "learning_rate": 0.0004667082684381948,
      "loss": 3.1652,
      "step": 71990
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.107872486114502,
      "learning_rate": 0.00046670486758351607,
      "loss": 3.2542,
      "step": 71991
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8351324796676636,
      "learning_rate": 0.0004667014666978437,
      "loss": 2.8958,
      "step": 71992
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8987116813659668,
      "learning_rate": 0.0004666980657811785,
      "loss": 2.9263,
      "step": 71993
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9146023988723755,
      "learning_rate": 0.0004666946648335209,
      "loss": 2.7042,
      "step": 71994
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.073457956314087,
      "learning_rate": 0.0004666912638548717,
      "loss": 3.1046,
      "step": 71995
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6933865547180176,
      "learning_rate": 0.00046668786284523127,
      "loss": 2.9706,
      "step": 71996
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.2041873931884766,
      "learning_rate": 0.0004666844618046006,
      "loss": 3.1911,
      "step": 71997
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0088603496551514,
      "learning_rate": 0.0004666810607329801,
      "loss": 2.8425,
      "step": 71998
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.2748498916625977,
      "learning_rate": 0.00046667765963037043,
      "loss": 3.1769,
      "step": 71999
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.5881643295288086,
      "learning_rate": 0.00046667425849677226,
      "loss": 2.8321,
      "step": 72000
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6871004104614258,
      "learning_rate": 0.0004666708573321862,
      "loss": 3.0633,
      "step": 72001
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.08249831199646,
      "learning_rate": 0.0004666674561366128,
      "loss": 3.0554,
      "step": 72002
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3653976917266846,
      "learning_rate": 0.0004666640549100528,
      "loss": 2.9277,
      "step": 72003
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6737031936645508,
      "learning_rate": 0.0004666606536525069,
      "loss": 3.0832,
      "step": 72004
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3875768184661865,
      "learning_rate": 0.00046665725236397557,
      "loss": 2.9995,
      "step": 72005
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7179924249649048,
      "learning_rate": 0.0004666538510444595,
      "loss": 3.0356,
      "step": 72006
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7067344188690186,
      "learning_rate": 0.0004666504496939594,
      "loss": 2.8866,
      "step": 72007
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.935316562652588,
      "learning_rate": 0.0004666470483124758,
      "loss": 3.059,
      "step": 72008
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.552223563194275,
      "learning_rate": 0.0004666436469000093,
      "loss": 3.1926,
      "step": 72009
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.64754319190979,
      "learning_rate": 0.00046664024545656067,
      "loss": 3.1463,
      "step": 72010
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7594201564788818,
      "learning_rate": 0.0004666368439821304,
      "loss": 3.1344,
      "step": 72011
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3911941051483154,
      "learning_rate": 0.00046663344247671926,
      "loss": 2.9517,
      "step": 72012
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4256987571716309,
      "learning_rate": 0.00046663004094032784,
      "loss": 3.087,
      "step": 72013
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.2985708713531494,
      "learning_rate": 0.00046662663937295675,
      "loss": 2.9902,
      "step": 72014
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6400370597839355,
      "learning_rate": 0.00046662323777460653,
      "loss": 2.921,
      "step": 72015
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5257476568222046,
      "learning_rate": 0.00046661983614527806,
      "loss": 2.8553,
      "step": 72016
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7435228824615479,
      "learning_rate": 0.0004666164344849717,
      "loss": 2.8673,
      "step": 72017
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.28298282623291,
      "learning_rate": 0.0004666130327936882,
      "loss": 2.8922,
      "step": 72018
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9179179668426514,
      "learning_rate": 0.00046660963107142823,
      "loss": 3.0869,
      "step": 72019
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.991734027862549,
      "learning_rate": 0.00046660622931819236,
      "loss": 2.8336,
      "step": 72020
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.515750765800476,
      "learning_rate": 0.0004666028275339812,
      "loss": 3.1828,
      "step": 72021
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8777649402618408,
      "learning_rate": 0.00046659942571879555,
      "loss": 2.992,
      "step": 72022
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.541229009628296,
      "learning_rate": 0.00046659602387263586,
      "loss": 3.0682,
      "step": 72023
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4047871828079224,
      "learning_rate": 0.0004665926219955028,
      "loss": 3.0479,
      "step": 72024
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.172898769378662,
      "learning_rate": 0.0004665892200873971,
      "loss": 2.9646,
      "step": 72025
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.726438045501709,
      "learning_rate": 0.00046658581814831927,
      "loss": 3.2692,
      "step": 72026
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.533911943435669,
      "learning_rate": 0.00046658241617827,
      "loss": 3.0949,
      "step": 72027
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6065882444381714,
      "learning_rate": 0.0004665790141772499,
      "loss": 3.136,
      "step": 72028
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.1724162101745605,
      "learning_rate": 0.00046657561214525966,
      "loss": 2.996,
      "step": 72029
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7025959491729736,
      "learning_rate": 0.00046657221008229976,
      "loss": 2.9899,
      "step": 72030
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7122342586517334,
      "learning_rate": 0.000466568807988371,
      "loss": 3.2086,
      "step": 72031
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.469733476638794,
      "learning_rate": 0.000466565405863474,
      "loss": 2.8834,
      "step": 72032
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5019525289535522,
      "learning_rate": 0.0004665620037076093,
      "loss": 3.0403,
      "step": 72033
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.494338035583496,
      "learning_rate": 0.0004665586015207776,
      "loss": 3.0378,
      "step": 72034
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0371992588043213,
      "learning_rate": 0.00046655519930297956,
      "loss": 2.945,
      "step": 72035
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7257568836212158,
      "learning_rate": 0.00046655179705421565,
      "loss": 3.0805,
      "step": 72036
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9901742935180664,
      "learning_rate": 0.0004665483947744866,
      "loss": 2.9603,
      "step": 72037
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7769396305084229,
      "learning_rate": 0.00046654499246379317,
      "loss": 2.9252,
      "step": 72038
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.585221767425537,
      "learning_rate": 0.0004665415901221359,
      "loss": 3.2888,
      "step": 72039
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4852566719055176,
      "learning_rate": 0.00046653818774951533,
      "loss": 2.9768,
      "step": 72040
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.592960834503174,
      "learning_rate": 0.0004665347853459322,
      "loss": 3.1052,
      "step": 72041
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5829341411590576,
      "learning_rate": 0.00046653138291138707,
      "loss": 3.1926,
      "step": 72042
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5064926147460938,
      "learning_rate": 0.0004665279804458807,
      "loss": 3.0805,
      "step": 72043
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6809748411178589,
      "learning_rate": 0.00046652457794941354,
      "loss": 2.9368,
      "step": 72044
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6001126766204834,
      "learning_rate": 0.00046652117542198636,
      "loss": 3.1621,
      "step": 72045
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5939843654632568,
      "learning_rate": 0.0004665177728635998,
      "loss": 2.9375,
      "step": 72046
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5047608613967896,
      "learning_rate": 0.00046651437027425436,
      "loss": 3.2035,
      "step": 72047
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8068742752075195,
      "learning_rate": 0.0004665109676539508,
      "loss": 2.8088,
      "step": 72048
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4076732397079468,
      "learning_rate": 0.0004665075650026896,
      "loss": 3.0269,
      "step": 72049
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8734253644943237,
      "learning_rate": 0.00046650416232047164,
      "loss": 2.9318,
      "step": 72050
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9872291088104248,
      "learning_rate": 0.0004665007596072973,
      "loss": 3.1516,
      "step": 72051
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.084036111831665,
      "learning_rate": 0.00046649735686316745,
      "loss": 3.1667,
      "step": 72052
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9266440868377686,
      "learning_rate": 0.00046649395408808263,
      "loss": 3.2354,
      "step": 72053
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.435922622680664,
      "learning_rate": 0.0004664905512820433,
      "loss": 2.8801,
      "step": 72054
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.442911148071289,
      "learning_rate": 0.00046648714844505033,
      "loss": 3.0526,
      "step": 72055
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3075430393218994,
      "learning_rate": 0.0004664837455771042,
      "loss": 3.0243,
      "step": 72056
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9904303550720215,
      "learning_rate": 0.00046648034267820555,
      "loss": 3.2446,
      "step": 72057
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5196857452392578,
      "learning_rate": 0.0004664769397483551,
      "loss": 2.8864,
      "step": 72058
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.4237911701202393,
      "learning_rate": 0.00046647353678755355,
      "loss": 3.1291,
      "step": 72059
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.6751818656921387,
      "learning_rate": 0.00046647013379580134,
      "loss": 3.215,
      "step": 72060
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8244584798812866,
      "learning_rate": 0.00046646673077309917,
      "loss": 3.0287,
      "step": 72061
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.796573281288147,
      "learning_rate": 0.00046646332771944774,
      "loss": 2.8645,
      "step": 72062
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7034517526626587,
      "learning_rate": 0.00046645992463484765,
      "loss": 2.9182,
      "step": 72063
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.208534002304077,
      "learning_rate": 0.0004664565215192994,
      "loss": 2.9997,
      "step": 72064
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6197524070739746,
      "learning_rate": 0.0004664531183728039,
      "loss": 3.1514,
      "step": 72065
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7487326860427856,
      "learning_rate": 0.00046644971519536145,
      "loss": 3.1199,
      "step": 72066
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4402750730514526,
      "learning_rate": 0.00046644631198697296,
      "loss": 3.1278,
      "step": 72067
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5873805284500122,
      "learning_rate": 0.000466442908747639,
      "loss": 3.0575,
      "step": 72068
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.646090269088745,
      "learning_rate": 0.00046643950547736013,
      "loss": 3.163,
      "step": 72069
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8702402114868164,
      "learning_rate": 0.00046643610217613694,
      "loss": 3.0373,
      "step": 72070
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.431027889251709,
      "learning_rate": 0.00046643269884397024,
      "loss": 3.0164,
      "step": 72071
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7152581214904785,
      "learning_rate": 0.00046642929548086046,
      "loss": 3.0929,
      "step": 72072
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.71620512008667,
      "learning_rate": 0.00046642589208680836,
      "loss": 3.0392,
      "step": 72073
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5736721754074097,
      "learning_rate": 0.00046642248866181465,
      "loss": 2.834,
      "step": 72074
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6740094423294067,
      "learning_rate": 0.0004664190852058797,
      "loss": 3.081,
      "step": 72075
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7769756317138672,
      "learning_rate": 0.0004664156817190044,
      "loss": 3.1308,
      "step": 72076
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4896880388259888,
      "learning_rate": 0.0004664122782011892,
      "loss": 2.9433,
      "step": 72077
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.14896297454834,
      "learning_rate": 0.000466408874652435,
      "loss": 2.8158,
      "step": 72078
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6659657955169678,
      "learning_rate": 0.00046640547107274204,
      "loss": 3.1954,
      "step": 72079
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.532424807548523,
      "learning_rate": 0.00046640206746211124,
      "loss": 3.238,
      "step": 72080
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.6052634716033936,
      "learning_rate": 0.0004663986638205432,
      "loss": 2.9314,
      "step": 72081
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8874521255493164,
      "learning_rate": 0.00046639526014803846,
      "loss": 3.0199,
      "step": 72082
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7280690670013428,
      "learning_rate": 0.0004663918564445977,
      "loss": 3.0196,
      "step": 72083
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.1539523601531982,
      "learning_rate": 0.0004663884527102215,
      "loss": 2.9298,
      "step": 72084
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4848066568374634,
      "learning_rate": 0.0004663850489449107,
      "loss": 2.8943,
      "step": 72085
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.365986704826355,
      "learning_rate": 0.00046638164514866564,
      "loss": 2.8686,
      "step": 72086
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.693713903427124,
      "learning_rate": 0.00046637824132148714,
      "loss": 2.7723,
      "step": 72087
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8562904596328735,
      "learning_rate": 0.00046637483746337575,
      "loss": 3.1387,
      "step": 72088
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6228420734405518,
      "learning_rate": 0.0004663714335743323,
      "loss": 2.8093,
      "step": 72089
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.4100723266601562,
      "learning_rate": 0.00046636802965435703,
      "loss": 3.0675,
      "step": 72090
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.1274752616882324,
      "learning_rate": 0.0004663646257034509,
      "loss": 2.7536,
      "step": 72091
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.45500910282135,
      "learning_rate": 0.0004663612217216144,
      "loss": 2.9228,
      "step": 72092
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4268693923950195,
      "learning_rate": 0.0004663578177088483,
      "loss": 3.2571,
      "step": 72093
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.6061959266662598,
      "learning_rate": 0.00046635441366515306,
      "loss": 3.1069,
      "step": 72094
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.303553819656372,
      "learning_rate": 0.00046635100959052954,
      "loss": 3.291,
      "step": 72095
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.0368459224700928,
      "learning_rate": 0.0004663476054849781,
      "loss": 3.0547,
      "step": 72096
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.4633591175079346,
      "learning_rate": 0.00046634420134849944,
      "loss": 3.1138,
      "step": 72097
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.4254443645477295,
      "learning_rate": 0.00046634079718109434,
      "loss": 2.9269,
      "step": 72098
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7810157537460327,
      "learning_rate": 0.0004663373929827634,
      "loss": 2.9952,
      "step": 72099
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.6395227909088135,
      "learning_rate": 0.0004663339887535071,
      "loss": 2.996,
      "step": 72100
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.1263253688812256,
      "learning_rate": 0.00046633058449332623,
      "loss": 3.2939,
      "step": 72101
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9883352518081665,
      "learning_rate": 0.00046632718020222136,
      "loss": 2.9456,
      "step": 72102
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4950731992721558,
      "learning_rate": 0.0004663237758801931,
      "loss": 2.9781,
      "step": 72103
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6199485063552856,
      "learning_rate": 0.00046632037152724207,
      "loss": 3.1344,
      "step": 72104
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8745702505111694,
      "learning_rate": 0.000466316967143369,
      "loss": 3.2241,
      "step": 72105
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5557032823562622,
      "learning_rate": 0.0004663135627285745,
      "loss": 2.777,
      "step": 72106
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8684006929397583,
      "learning_rate": 0.00046631015828285904,
      "loss": 3.0831,
      "step": 72107
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.6069626808166504,
      "learning_rate": 0.0004663067538062236,
      "loss": 2.9663,
      "step": 72108
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.2386977672576904,
      "learning_rate": 0.00046630334929866844,
      "loss": 3.1149,
      "step": 72109
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5097635984420776,
      "learning_rate": 0.0004662999447601944,
      "loss": 3.0212,
      "step": 72110
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9662988185882568,
      "learning_rate": 0.000466296540190802,
      "loss": 2.7048,
      "step": 72111
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.790118932723999,
      "learning_rate": 0.00046629313559049196,
      "loss": 3.2501,
      "step": 72112
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7560327053070068,
      "learning_rate": 0.0004662897309592649,
      "loss": 3.1144,
      "step": 72113
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7769712209701538,
      "learning_rate": 0.0004662863262971215,
      "loss": 2.8963,
      "step": 72114
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.4615204334259033,
      "learning_rate": 0.00046628292160406215,
      "loss": 3.0432,
      "step": 72115
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.1066737174987793,
      "learning_rate": 0.00046627951688008783,
      "loss": 2.9337,
      "step": 72116
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.996584177017212,
      "learning_rate": 0.000466276112125199,
      "loss": 3.1092,
      "step": 72117
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9443726539611816,
      "learning_rate": 0.00046627270733939625,
      "loss": 2.9958,
      "step": 72118
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.114692449569702,
      "learning_rate": 0.00046626930252268025,
      "loss": 3.0857,
      "step": 72119
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.2223968505859375,
      "learning_rate": 0.00046626589767505175,
      "loss": 3.3764,
      "step": 72120
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5387989282608032,
      "learning_rate": 0.0004662624927965112,
      "loss": 2.9114,
      "step": 72121
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.492525815963745,
      "learning_rate": 0.00046625908788705924,
      "loss": 2.9255,
      "step": 72122
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.065488576889038,
      "learning_rate": 0.00046625568294669675,
      "loss": 2.9702,
      "step": 72123
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.489912748336792,
      "learning_rate": 0.00046625227797542415,
      "loss": 2.9598,
      "step": 72124
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.543301820755005,
      "learning_rate": 0.000466248872973242,
      "loss": 2.9204,
      "step": 72125
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7420305013656616,
      "learning_rate": 0.0004662454679401511,
      "loss": 2.827,
      "step": 72126
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7195838689804077,
      "learning_rate": 0.00046624206287615205,
      "loss": 2.9916,
      "step": 72127
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5659581422805786,
      "learning_rate": 0.0004662386577812455,
      "loss": 2.8724,
      "step": 72128
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9189238548278809,
      "learning_rate": 0.00046623525265543196,
      "loss": 2.9554,
      "step": 72129
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.570570945739746,
      "learning_rate": 0.00046623184749871216,
      "loss": 2.9244,
      "step": 72130
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5559931993484497,
      "learning_rate": 0.0004662284423110868,
      "loss": 3.0587,
      "step": 72131
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8537651300430298,
      "learning_rate": 0.0004662250370925564,
      "loss": 3.0528,
      "step": 72132
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7379693984985352,
      "learning_rate": 0.0004662216318431216,
      "loss": 2.9312,
      "step": 72133
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5623011589050293,
      "learning_rate": 0.0004662182265627831,
      "loss": 3.077,
      "step": 72134
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.453720211982727,
      "learning_rate": 0.00046621482125154144,
      "loss": 3.0323,
      "step": 72135
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7246352434158325,
      "learning_rate": 0.0004662114159093973,
      "loss": 3.0311,
      "step": 72136
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.833883285522461,
      "learning_rate": 0.0004662080105363513,
      "loss": 3.0719,
      "step": 72137
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3660975694656372,
      "learning_rate": 0.00046620460513240427,
      "loss": 3.2333,
      "step": 72138
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6880292892456055,
      "learning_rate": 0.0004662011996975565,
      "loss": 2.9527,
      "step": 72139
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5332436561584473,
      "learning_rate": 0.00046619779423180886,
      "loss": 3.0011,
      "step": 72140
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6945626735687256,
      "learning_rate": 0.0004661943887351619,
      "loss": 3.1579,
      "step": 72141
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6366037130355835,
      "learning_rate": 0.00046619098320761624,
      "loss": 2.8591,
      "step": 72142
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7479422092437744,
      "learning_rate": 0.00046618757764917254,
      "loss": 3.2413,
      "step": 72143
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6186615228652954,
      "learning_rate": 0.0004661841720598315,
      "loss": 3.0332,
      "step": 72144
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.2926275730133057,
      "learning_rate": 0.00046618076643959355,
      "loss": 3.2343,
      "step": 72145
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8898494243621826,
      "learning_rate": 0.0004661773607884595,
      "loss": 3.2654,
      "step": 72146
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.466138482093811,
      "learning_rate": 0.0004661739551064301,
      "loss": 2.9997,
      "step": 72147
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5843998193740845,
      "learning_rate": 0.00046617054939350564,
      "loss": 3.0813,
      "step": 72148
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.653902530670166,
      "learning_rate": 0.00046616714364968706,
      "loss": 2.9287,
      "step": 72149
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.171680450439453,
      "learning_rate": 0.0004661637378749748,
      "loss": 2.9594,
      "step": 72150
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5972450971603394,
      "learning_rate": 0.0004661603320693696,
      "loss": 3.0312,
      "step": 72151
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.6499886512756348,
      "learning_rate": 0.00046615692623287204,
      "loss": 3.1299,
      "step": 72152
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6013274192810059,
      "learning_rate": 0.00046615352036548277,
      "loss": 2.9863,
      "step": 72153
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.258262872695923,
      "learning_rate": 0.00046615011446720235,
      "loss": 2.9298,
      "step": 72154
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9849908351898193,
      "learning_rate": 0.0004661467085380316,
      "loss": 3.0186,
      "step": 72155
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.5897533893585205,
      "learning_rate": 0.000466143302577971,
      "loss": 3.0627,
      "step": 72156
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9212498664855957,
      "learning_rate": 0.0004661398965870212,
      "loss": 3.067,
      "step": 72157
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8115980625152588,
      "learning_rate": 0.0004661364905651829,
      "loss": 2.9663,
      "step": 72158
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.352116584777832,
      "learning_rate": 0.00046613308451245674,
      "loss": 3.09,
      "step": 72159
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.5652573108673096,
      "learning_rate": 0.00046612967842884324,
      "loss": 2.8005,
      "step": 72160
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5497287511825562,
      "learning_rate": 0.00046612627231434303,
      "loss": 2.9997,
      "step": 72161
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9058476686477661,
      "learning_rate": 0.0004661228661689569,
      "loss": 3.1556,
      "step": 72162
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.777951955795288,
      "learning_rate": 0.0004661194599926853,
      "loss": 3.0952,
      "step": 72163
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.7535016536712646,
      "learning_rate": 0.0004661160537855291,
      "loss": 3.0648,
      "step": 72164
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.623653769493103,
      "learning_rate": 0.0004661126475474887,
      "loss": 2.9534,
      "step": 72165
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.375593423843384,
      "learning_rate": 0.0004661092412785648,
      "loss": 2.9432,
      "step": 72166
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.404442548751831,
      "learning_rate": 0.0004661058349787581,
      "loss": 2.9159,
      "step": 72167
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.911611557006836,
      "learning_rate": 0.00046610242864806916,
      "loss": 3.0749,
      "step": 72168
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.15099835395813,
      "learning_rate": 0.0004660990222864987,
      "loss": 2.8683,
      "step": 72169
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3243775367736816,
      "learning_rate": 0.00046609561589404727,
      "loss": 3.0688,
      "step": 72170
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7691800594329834,
      "learning_rate": 0.0004660922094707155,
      "loss": 2.8005,
      "step": 72171
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5431486368179321,
      "learning_rate": 0.00046608880301650407,
      "loss": 3.0066,
      "step": 72172
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0609967708587646,
      "learning_rate": 0.00046608539653141364,
      "loss": 3.1686,
      "step": 72173
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.435262680053711,
      "learning_rate": 0.00046608199001544476,
      "loss": 3.3814,
      "step": 72174
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0911781787872314,
      "learning_rate": 0.0004660785834685981,
      "loss": 2.8534,
      "step": 72175
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5814422369003296,
      "learning_rate": 0.0004660751768908742,
      "loss": 3.0938,
      "step": 72176
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.4336442947387695,
      "learning_rate": 0.000466071770282274,
      "loss": 2.9823,
      "step": 72177
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.6384949684143066,
      "learning_rate": 0.0004660683636427978,
      "loss": 3.1169,
      "step": 72178
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.5479001998901367,
      "learning_rate": 0.00046606495697244636,
      "loss": 3.1574,
      "step": 72179
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4879043102264404,
      "learning_rate": 0.00046606155027122034,
      "loss": 2.8222,
      "step": 72180
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7358406782150269,
      "learning_rate": 0.00046605814353912036,
      "loss": 3.0658,
      "step": 72181
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.3710129261016846,
      "learning_rate": 0.0004660547367761469,
      "loss": 2.9896,
      "step": 72182
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.564305543899536,
      "learning_rate": 0.0004660513299823008,
      "loss": 2.9146,
      "step": 72183
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.555163025856018,
      "learning_rate": 0.00046604792315758274,
      "loss": 2.8444,
      "step": 72184
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6745874881744385,
      "learning_rate": 0.0004660445163019931,
      "loss": 3.0457,
      "step": 72185
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.804043769836426,
      "learning_rate": 0.0004660411094155327,
      "loss": 3.4742,
      "step": 72186
    },
    {
      "epoch": 0.94,
      "grad_norm": 4.756807804107666,
      "learning_rate": 0.00046603770249820217,
      "loss": 3.1763,
      "step": 72187
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.8789827823638916,
      "learning_rate": 0.0004660342955500021,
      "loss": 2.697,
      "step": 72188
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.584739089012146,
      "learning_rate": 0.00046603088857093296,
      "loss": 3.2729,
      "step": 72189
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7596285343170166,
      "learning_rate": 0.0004660274815609957,
      "loss": 2.8726,
      "step": 72190
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.822852849960327,
      "learning_rate": 0.0004660240745201908,
      "loss": 3.2069,
      "step": 72191
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.114006996154785,
      "learning_rate": 0.0004660206674485188,
      "loss": 3.1979,
      "step": 72192
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7207849025726318,
      "learning_rate": 0.0004660172603459805,
      "loss": 2.9104,
      "step": 72193
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6805306673049927,
      "learning_rate": 0.0004660138532125764,
      "loss": 3.208,
      "step": 72194
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8592777252197266,
      "learning_rate": 0.0004660104460483072,
      "loss": 2.8448,
      "step": 72195
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9866114854812622,
      "learning_rate": 0.0004660070388531736,
      "loss": 3.1764,
      "step": 72196
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6334890127182007,
      "learning_rate": 0.00046600363162717606,
      "loss": 3.2305,
      "step": 72197
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6000763177871704,
      "learning_rate": 0.00046600022437031535,
      "loss": 3.1148,
      "step": 72198
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3885934352874756,
      "learning_rate": 0.000465996817082592,
      "loss": 2.7591,
      "step": 72199
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3565382957458496,
      "learning_rate": 0.0004659934097640068,
      "loss": 3.123,
      "step": 72200
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8759821653366089,
      "learning_rate": 0.0004659900024145603,
      "loss": 2.9914,
      "step": 72201
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5566738843917847,
      "learning_rate": 0.0004659865950342531,
      "loss": 3.1189,
      "step": 72202
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.5677552223205566,
      "learning_rate": 0.0004659831876230858,
      "loss": 2.9357,
      "step": 72203
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.803359031677246,
      "learning_rate": 0.0004659797801810591,
      "loss": 3.0259,
      "step": 72204
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.646234154701233,
      "learning_rate": 0.0004659763727081737,
      "loss": 2.815,
      "step": 72205
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6157543659210205,
      "learning_rate": 0.00046597296520443015,
      "loss": 2.9638,
      "step": 72206
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.291447401046753,
      "learning_rate": 0.00046596955766982904,
      "loss": 2.8041,
      "step": 72207
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.2410004138946533,
      "learning_rate": 0.00046596615010437116,
      "loss": 3.0579,
      "step": 72208
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8502082824707031,
      "learning_rate": 0.00046596274250805693,
      "loss": 3.042,
      "step": 72209
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7139540910720825,
      "learning_rate": 0.0004659593348808871,
      "loss": 3.029,
      "step": 72210
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.2334136962890625,
      "learning_rate": 0.00046595592722286234,
      "loss": 3.018,
      "step": 72211
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.721368670463562,
      "learning_rate": 0.00046595251953398316,
      "loss": 2.9777,
      "step": 72212
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6002793312072754,
      "learning_rate": 0.0004659491118142504,
      "loss": 3.0152,
      "step": 72213
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5511714220046997,
      "learning_rate": 0.0004659457040636645,
      "loss": 3.1399,
      "step": 72214
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3367445468902588,
      "learning_rate": 0.0004659422962822262,
      "loss": 3.1211,
      "step": 72215
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.615836262702942,
      "learning_rate": 0.000465938888469936,
      "loss": 3.0019,
      "step": 72216
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7575082778930664,
      "learning_rate": 0.00046593548062679466,
      "loss": 3.0581,
      "step": 72217
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3508051633834839,
      "learning_rate": 0.00046593207275280287,
      "loss": 3.069,
      "step": 72218
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4135236740112305,
      "learning_rate": 0.0004659286648479611,
      "loss": 2.9536,
      "step": 72219
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6682145595550537,
      "learning_rate": 0.0004659252569122701,
      "loss": 3.0294,
      "step": 72220
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.4000940322875977,
      "learning_rate": 0.00046592184894573045,
      "loss": 2.864,
      "step": 72221
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.794736385345459,
      "learning_rate": 0.0004659184409483427,
      "loss": 3.2687,
      "step": 72222
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.00217342376709,
      "learning_rate": 0.00046591503292010773,
      "loss": 2.9786,
      "step": 72223
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.1387977600097656,
      "learning_rate": 0.00046591162486102594,
      "loss": 3.187,
      "step": 72224
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3185940980911255,
      "learning_rate": 0.00046590821677109805,
      "loss": 3.1155,
      "step": 72225
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.039602756500244,
      "learning_rate": 0.0004659048086503247,
      "loss": 2.9404,
      "step": 72226
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.085782289505005,
      "learning_rate": 0.00046590140049870653,
      "loss": 3.0251,
      "step": 72227
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7259633541107178,
      "learning_rate": 0.00046589799231624415,
      "loss": 3.362,
      "step": 72228
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.833256483078003,
      "learning_rate": 0.0004658945841029382,
      "loss": 2.7786,
      "step": 72229
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4772088527679443,
      "learning_rate": 0.00046589117585878935,
      "loss": 3.205,
      "step": 72230
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7332462072372437,
      "learning_rate": 0.0004658877675837981,
      "loss": 3.01,
      "step": 72231
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9096662998199463,
      "learning_rate": 0.0004658843592779652,
      "loss": 3.066,
      "step": 72232
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.571338176727295,
      "learning_rate": 0.0004658809509412914,
      "loss": 2.8629,
      "step": 72233
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5562862157821655,
      "learning_rate": 0.0004658775425737771,
      "loss": 3.0385,
      "step": 72234
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8434160947799683,
      "learning_rate": 0.00046587413417542307,
      "loss": 2.7048,
      "step": 72235
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.502026915550232,
      "learning_rate": 0.00046587072574622994,
      "loss": 3.0013,
      "step": 72236
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6777887344360352,
      "learning_rate": 0.00046586731728619815,
      "loss": 2.8494,
      "step": 72237
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.724410057067871,
      "learning_rate": 0.0004658639087953286,
      "loss": 3.1717,
      "step": 72238
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6947888135910034,
      "learning_rate": 0.00046586050027362186,
      "loss": 3.0117,
      "step": 72239
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6768277883529663,
      "learning_rate": 0.0004658570917210785,
      "loss": 2.7663,
      "step": 72240
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9221755266189575,
      "learning_rate": 0.0004658536831376991,
      "loss": 3.021,
      "step": 72241
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.09788179397583,
      "learning_rate": 0.0004658502745234845,
      "loss": 2.8712,
      "step": 72242
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.5849568843841553,
      "learning_rate": 0.00046584686587843504,
      "loss": 2.841,
      "step": 72243
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.002445936203003,
      "learning_rate": 0.00046584345720255166,
      "loss": 2.9355,
      "step": 72244
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.388986110687256,
      "learning_rate": 0.0004658400484958348,
      "loss": 3.1556,
      "step": 72245
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.616204261779785,
      "learning_rate": 0.00046583663975828516,
      "loss": 3.0238,
      "step": 72246
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5054824352264404,
      "learning_rate": 0.0004658332309899033,
      "loss": 2.946,
      "step": 72247
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5936740636825562,
      "learning_rate": 0.00046582982219069,
      "loss": 3.2086,
      "step": 72248
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.797849178314209,
      "learning_rate": 0.0004658264133606457,
      "loss": 3.0932,
      "step": 72249
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6060110330581665,
      "learning_rate": 0.0004658230044997712,
      "loss": 2.9499,
      "step": 72250
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.463689923286438,
      "learning_rate": 0.00046581959560806706,
      "loss": 3.0612,
      "step": 72251
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5703169107437134,
      "learning_rate": 0.00046581618668553403,
      "loss": 2.9509,
      "step": 72252
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6369963884353638,
      "learning_rate": 0.0004658127777321725,
      "loss": 2.8289,
      "step": 72253
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5240005254745483,
      "learning_rate": 0.00046580936874798335,
      "loss": 3.0877,
      "step": 72254
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9561995267868042,
      "learning_rate": 0.00046580595973296694,
      "loss": 3.1764,
      "step": 72255
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.618532419204712,
      "learning_rate": 0.0004658025506871243,
      "loss": 3.0125,
      "step": 72256
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6350488662719727,
      "learning_rate": 0.00046579914161045565,
      "loss": 3.1821,
      "step": 72257
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8505957126617432,
      "learning_rate": 0.00046579573250296195,
      "loss": 3.0622,
      "step": 72258
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4883586168289185,
      "learning_rate": 0.0004657923233646436,
      "loss": 3.0898,
      "step": 72259
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5461976528167725,
      "learning_rate": 0.00046578891419550133,
      "loss": 3.1493,
      "step": 72260
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8309024572372437,
      "learning_rate": 0.00046578550499553576,
      "loss": 2.7489,
      "step": 72261
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.897358775138855,
      "learning_rate": 0.00046578209576474757,
      "loss": 3.1279,
      "step": 72262
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7933169603347778,
      "learning_rate": 0.00046577868650313735,
      "loss": 3.0473,
      "step": 72263
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8822212219238281,
      "learning_rate": 0.00046577527721070576,
      "loss": 3.1522,
      "step": 72264
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5632562637329102,
      "learning_rate": 0.0004657718678874534,
      "loss": 3.1054,
      "step": 72265
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8249112367630005,
      "learning_rate": 0.00046576845853338095,
      "loss": 2.9728,
      "step": 72266
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.26379656791687,
      "learning_rate": 0.000465765049148489,
      "loss": 3.0214,
      "step": 72267
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.698641300201416,
      "learning_rate": 0.0004657616397327782,
      "loss": 3.0696,
      "step": 72268
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8649016618728638,
      "learning_rate": 0.00046575823028624914,
      "loss": 2.9251,
      "step": 72269
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6881543397903442,
      "learning_rate": 0.0004657548208089025,
      "loss": 3.1332,
      "step": 72270
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5136871337890625,
      "learning_rate": 0.00046575141130073894,
      "loss": 2.8748,
      "step": 72271
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.325577735900879,
      "learning_rate": 0.0004657480017617591,
      "loss": 3.1258,
      "step": 72272
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.989540696144104,
      "learning_rate": 0.0004657445921919635,
      "loss": 2.886,
      "step": 72273
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9005342721939087,
      "learning_rate": 0.00046574118259135294,
      "loss": 2.899,
      "step": 72274
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7126197814941406,
      "learning_rate": 0.00046573777295992787,
      "loss": 2.9399,
      "step": 72275
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7390865087509155,
      "learning_rate": 0.00046573436329768906,
      "loss": 3.1346,
      "step": 72276
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.2630860805511475,
      "learning_rate": 0.0004657309536046371,
      "loss": 2.8197,
      "step": 72277
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.712597131729126,
      "learning_rate": 0.0004657275438807727,
      "loss": 2.9454,
      "step": 72278
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.1554088592529297,
      "learning_rate": 0.00046572413412609634,
      "loss": 3.2168,
      "step": 72279
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6686984300613403,
      "learning_rate": 0.00046572072434060874,
      "loss": 2.8551,
      "step": 72280
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8936506509780884,
      "learning_rate": 0.00046571731452431055,
      "loss": 3.1367,
      "step": 72281
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7010899782180786,
      "learning_rate": 0.0004657139046772024,
      "loss": 3.0239,
      "step": 72282
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6810309886932373,
      "learning_rate": 0.00046571049479928486,
      "loss": 2.9985,
      "step": 72283
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9730995893478394,
      "learning_rate": 0.00046570708489055866,
      "loss": 3.0458,
      "step": 72284
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.759440302848816,
      "learning_rate": 0.0004657036749510243,
      "loss": 3.2514,
      "step": 72285
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9818605184555054,
      "learning_rate": 0.00046570026498068257,
      "loss": 3.1765,
      "step": 72286
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.623077630996704,
      "learning_rate": 0.00046569685497953404,
      "loss": 3.2672,
      "step": 72287
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8759452104568481,
      "learning_rate": 0.0004656934449475793,
      "loss": 3.0625,
      "step": 72288
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9960881471633911,
      "learning_rate": 0.00046569003488481904,
      "loss": 3.0074,
      "step": 72289
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.049880266189575,
      "learning_rate": 0.00046568662479125393,
      "loss": 3.0622,
      "step": 72290
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0665183067321777,
      "learning_rate": 0.00046568321466688447,
      "loss": 3.1568,
      "step": 72291
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.588808536529541,
      "learning_rate": 0.00046567980451171136,
      "loss": 2.8628,
      "step": 72292
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5784441232681274,
      "learning_rate": 0.00046567639432573536,
      "loss": 2.9755,
      "step": 72293
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6252630949020386,
      "learning_rate": 0.00046567298410895685,
      "loss": 2.9909,
      "step": 72294
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5003304481506348,
      "learning_rate": 0.00046566957386137665,
      "loss": 3.0098,
      "step": 72295
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8243621587753296,
      "learning_rate": 0.0004656661635829954,
      "loss": 2.9185,
      "step": 72296
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7527105808258057,
      "learning_rate": 0.00046566275327381365,
      "loss": 2.8957,
      "step": 72297
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4985194206237793,
      "learning_rate": 0.00046565934293383204,
      "loss": 3.1039,
      "step": 72298
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.4291019439697266,
      "learning_rate": 0.00046565593256305124,
      "loss": 3.0868,
      "step": 72299
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.970883846282959,
      "learning_rate": 0.00046565252216147193,
      "loss": 2.9478,
      "step": 72300
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6816295385360718,
      "learning_rate": 0.00046564911172909467,
      "loss": 2.9654,
      "step": 72301
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.5421664714813232,
      "learning_rate": 0.00046564570126592,
      "loss": 3.0006,
      "step": 72302
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.4377057552337646,
      "learning_rate": 0.0004656422907719488,
      "loss": 3.1198,
      "step": 72303
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.9050350189208984,
      "learning_rate": 0.0004656388802471815,
      "loss": 2.6835,
      "step": 72304
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.2279212474823,
      "learning_rate": 0.0004656354696916189,
      "loss": 3.034,
      "step": 72305
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.125514030456543,
      "learning_rate": 0.0004656320591052615,
      "loss": 3.275,
      "step": 72306
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.2147858142852783,
      "learning_rate": 0.0004656286484881099,
      "loss": 3.1565,
      "step": 72307
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9124466180801392,
      "learning_rate": 0.0004656252378401649,
      "loss": 3.0306,
      "step": 72308
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7913752794265747,
      "learning_rate": 0.0004656218271614269,
      "loss": 2.9965,
      "step": 72309
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.657097339630127,
      "learning_rate": 0.0004656184164518968,
      "loss": 2.8635,
      "step": 72310
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5430577993392944,
      "learning_rate": 0.00046561500571157515,
      "loss": 2.8759,
      "step": 72311
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.029245138168335,
      "learning_rate": 0.00046561159494046245,
      "loss": 3.1054,
      "step": 72312
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5690544843673706,
      "learning_rate": 0.0004656081841385595,
      "loss": 3.0876,
      "step": 72313
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7622829675674438,
      "learning_rate": 0.0004656047733058668,
      "loss": 2.9358,
      "step": 72314
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6272884607315063,
      "learning_rate": 0.0004656013624423851,
      "loss": 2.9622,
      "step": 72315
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3880537748336792,
      "learning_rate": 0.00046559795154811493,
      "loss": 3.0231,
      "step": 72316
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7749089002609253,
      "learning_rate": 0.0004655945406230569,
      "loss": 2.8729,
      "step": 72317
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.251790761947632,
      "learning_rate": 0.0004655911296672119,
      "loss": 3.0229,
      "step": 72318
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6996573209762573,
      "learning_rate": 0.00046558771868058027,
      "loss": 3.0392,
      "step": 72319
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0320706367492676,
      "learning_rate": 0.0004655843076631628,
      "loss": 3.2053,
      "step": 72320
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.626491904258728,
      "learning_rate": 0.0004655808966149601,
      "loss": 2.9046,
      "step": 72321
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8967081308364868,
      "learning_rate": 0.0004655774855359727,
      "loss": 2.7892,
      "step": 72322
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.351887822151184,
      "learning_rate": 0.0004655740744262014,
      "loss": 3.0548,
      "step": 72323
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.5104787349700928,
      "learning_rate": 0.00046557066328564673,
      "loss": 2.7139,
      "step": 72324
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.406412959098816,
      "learning_rate": 0.0004655672521143093,
      "loss": 3.0551,
      "step": 72325
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.2300119400024414,
      "learning_rate": 0.00046556384091218987,
      "loss": 3.1119,
      "step": 72326
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.055523157119751,
      "learning_rate": 0.000465560429679289,
      "loss": 2.8082,
      "step": 72327
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4682987928390503,
      "learning_rate": 0.00046555701841560727,
      "loss": 2.8077,
      "step": 72328
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7028664350509644,
      "learning_rate": 0.00046555360712114536,
      "loss": 2.8612,
      "step": 72329
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5296597480773926,
      "learning_rate": 0.0004655501957959039,
      "loss": 2.9165,
      "step": 72330
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.473771572113037,
      "learning_rate": 0.0004655467844398836,
      "loss": 3.4321,
      "step": 72331
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.543819785118103,
      "learning_rate": 0.00046554337305308496,
      "loss": 3.0614,
      "step": 72332
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9662280082702637,
      "learning_rate": 0.0004655399616355088,
      "loss": 3.0672,
      "step": 72333
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9875446557998657,
      "learning_rate": 0.0004655365501871555,
      "loss": 2.8776,
      "step": 72334
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7689865827560425,
      "learning_rate": 0.0004655331387080259,
      "loss": 2.8064,
      "step": 72335
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.544655442237854,
      "learning_rate": 0.0004655297271981206,
      "loss": 2.9677,
      "step": 72336
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4924148321151733,
      "learning_rate": 0.0004655263156574401,
      "loss": 3.0927,
      "step": 72337
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.635850429534912,
      "learning_rate": 0.0004655229040859851,
      "loss": 2.9907,
      "step": 72338
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.364295244216919,
      "learning_rate": 0.0004655194924837564,
      "loss": 3.1803,
      "step": 72339
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7124664783477783,
      "learning_rate": 0.00046551608085075444,
      "loss": 3.0405,
      "step": 72340
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7865238189697266,
      "learning_rate": 0.0004655126691869799,
      "loss": 2.8121,
      "step": 72341
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.01741361618042,
      "learning_rate": 0.0004655092574924335,
      "loss": 3.0546,
      "step": 72342
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6032112836837769,
      "learning_rate": 0.0004655058457671157,
      "loss": 3.0483,
      "step": 72343
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5434143543243408,
      "learning_rate": 0.00046550243401102733,
      "loss": 2.8996,
      "step": 72344
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.4390554428100586,
      "learning_rate": 0.00046549902222416887,
      "loss": 2.9317,
      "step": 72345
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6820193529129028,
      "learning_rate": 0.00046549561040654114,
      "loss": 2.7675,
      "step": 72346
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3529930114746094,
      "learning_rate": 0.0004654921985581445,
      "loss": 2.758,
      "step": 72347
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7111130952835083,
      "learning_rate": 0.0004654887866789798,
      "loss": 3.0449,
      "step": 72348
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.637610673904419,
      "learning_rate": 0.0004654853747690475,
      "loss": 2.9313,
      "step": 72349
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.117377281188965,
      "learning_rate": 0.00046548196282834853,
      "loss": 3.1138,
      "step": 72350
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.521519660949707,
      "learning_rate": 0.0004654785508568833,
      "loss": 3.2504,
      "step": 72351
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6594712734222412,
      "learning_rate": 0.0004654751388546524,
      "loss": 3.153,
      "step": 72352
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9591209888458252,
      "learning_rate": 0.00046547172682165664,
      "loss": 3.196,
      "step": 72353
    },
    {
      "epoch": 0.94,
      "grad_norm": 5.078249454498291,
      "learning_rate": 0.00046546831475789653,
      "loss": 3.0316,
      "step": 72354
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6576964855194092,
      "learning_rate": 0.0004654649026633727,
      "loss": 3.0048,
      "step": 72355
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5148271322250366,
      "learning_rate": 0.0004654614905380858,
      "loss": 3.0031,
      "step": 72356
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.866884231567383,
      "learning_rate": 0.00046545807838203657,
      "loss": 2.9786,
      "step": 72357
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.734917163848877,
      "learning_rate": 0.00046545466619522547,
      "loss": 3.146,
      "step": 72358
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.167109727859497,
      "learning_rate": 0.0004654512539776533,
      "loss": 3.0354,
      "step": 72359
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.910621166229248,
      "learning_rate": 0.0004654478417293206,
      "loss": 2.9584,
      "step": 72360
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.596935749053955,
      "learning_rate": 0.00046544442945022806,
      "loss": 2.8303,
      "step": 72361
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.0899908542633057,
      "learning_rate": 0.00046544101714037617,
      "loss": 3.0579,
      "step": 72362
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7765823602676392,
      "learning_rate": 0.0004654376047997658,
      "loss": 3.1812,
      "step": 72363
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.079559803009033,
      "learning_rate": 0.00046543419242839736,
      "loss": 3.0436,
      "step": 72364
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7869826555252075,
      "learning_rate": 0.0004654307800262716,
      "loss": 3.1727,
      "step": 72365
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5840264558792114,
      "learning_rate": 0.0004654273675933891,
      "loss": 2.8508,
      "step": 72366
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6532864570617676,
      "learning_rate": 0.00046542395512975065,
      "loss": 3.3077,
      "step": 72367
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.651912808418274,
      "learning_rate": 0.00046542054263535665,
      "loss": 3.1184,
      "step": 72368
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7736341953277588,
      "learning_rate": 0.0004654171301102079,
      "loss": 3.2221,
      "step": 72369
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.639156460762024,
      "learning_rate": 0.000465413717554305,
      "loss": 2.8798,
      "step": 72370
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7124594449996948,
      "learning_rate": 0.00046541030496764855,
      "loss": 3.3329,
      "step": 72371
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4229559898376465,
      "learning_rate": 0.00046540689235023913,
      "loss": 2.9987,
      "step": 72372
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7167690992355347,
      "learning_rate": 0.0004654034797020775,
      "loss": 2.9067,
      "step": 72373
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5872770547866821,
      "learning_rate": 0.0004654000670231643,
      "loss": 2.8473,
      "step": 72374
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9241043329238892,
      "learning_rate": 0.0004653966543135,
      "loss": 2.9645,
      "step": 72375
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4440422058105469,
      "learning_rate": 0.00046539324157308546,
      "loss": 3.0449,
      "step": 72376
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.696640133857727,
      "learning_rate": 0.00046538982880192116,
      "loss": 2.8539,
      "step": 72377
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.682935357093811,
      "learning_rate": 0.0004653864160000077,
      "loss": 3.113,
      "step": 72378
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7830471992492676,
      "learning_rate": 0.0004653830031673459,
      "loss": 3.0272,
      "step": 72379
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6276189088821411,
      "learning_rate": 0.00046537959030393613,
      "loss": 2.9597,
      "step": 72380
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.821391224861145,
      "learning_rate": 0.00046537617740977923,
      "loss": 3.2187,
      "step": 72381
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.404158115386963,
      "learning_rate": 0.0004653727644848758,
      "loss": 3.0138,
      "step": 72382
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.4792096614837646,
      "learning_rate": 0.00046536935152922645,
      "loss": 3.0761,
      "step": 72383
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.575413703918457,
      "learning_rate": 0.00046536593854283175,
      "loss": 3.1167,
      "step": 72384
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9827985763549805,
      "learning_rate": 0.0004653625255256925,
      "loss": 3.0722,
      "step": 72385
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4872276782989502,
      "learning_rate": 0.00046535911247780917,
      "loss": 3.2262,
      "step": 72386
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8726121187210083,
      "learning_rate": 0.00046535569939918254,
      "loss": 2.9327,
      "step": 72387
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.401347041130066,
      "learning_rate": 0.00046535228628981313,
      "loss": 3.1001,
      "step": 72388
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.948671817779541,
      "learning_rate": 0.00046534887314970155,
      "loss": 2.8133,
      "step": 72389
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4935123920440674,
      "learning_rate": 0.0004653454599788485,
      "loss": 3.0766,
      "step": 72390
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9982855319976807,
      "learning_rate": 0.00046534204677725464,
      "loss": 3.1526,
      "step": 72391
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.634468913078308,
      "learning_rate": 0.00046533863354492055,
      "loss": 2.8187,
      "step": 72392
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7268885374069214,
      "learning_rate": 0.00046533522028184684,
      "loss": 2.8945,
      "step": 72393
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.0488595962524414,
      "learning_rate": 0.0004653318069880343,
      "loss": 2.9263,
      "step": 72394
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.804622769355774,
      "learning_rate": 0.00046532839366348347,
      "loss": 2.8682,
      "step": 72395
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.427180051803589,
      "learning_rate": 0.0004653249803081949,
      "loss": 3.3729,
      "step": 72396
    },
    {
      "epoch": 0.94,
      "grad_norm": 4.136999607086182,
      "learning_rate": 0.00046532156692216935,
      "loss": 2.9377,
      "step": 72397
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.0093796253204346,
      "learning_rate": 0.0004653181535054073,
      "loss": 2.9782,
      "step": 72398
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5550209283828735,
      "learning_rate": 0.0004653147400579096,
      "loss": 3.0289,
      "step": 72399
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.8939521312713623,
      "learning_rate": 0.0004653113265796767,
      "loss": 2.7185,
      "step": 72400
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.9354794025421143,
      "learning_rate": 0.00046530791307070925,
      "loss": 3.1019,
      "step": 72401
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.214353322982788,
      "learning_rate": 0.00046530449953100807,
      "loss": 3.0303,
      "step": 72402
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.833277940750122,
      "learning_rate": 0.0004653010859605736,
      "loss": 2.9943,
      "step": 72403
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.5023505687713623,
      "learning_rate": 0.00046529767235940653,
      "loss": 3.15,
      "step": 72404
    },
    {
      "epoch": 0.94,
      "grad_norm": 5.782943248748779,
      "learning_rate": 0.00046529425872750746,
      "loss": 2.8146,
      "step": 72405
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8493237495422363,
      "learning_rate": 0.00046529084506487716,
      "loss": 3.2108,
      "step": 72406
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5103726387023926,
      "learning_rate": 0.0004652874313715161,
      "loss": 2.8442,
      "step": 72407
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8786429166793823,
      "learning_rate": 0.000465284017647425,
      "loss": 3.0583,
      "step": 72408
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3070435523986816,
      "learning_rate": 0.00046528060389260453,
      "loss": 3.1087,
      "step": 72409
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0767180919647217,
      "learning_rate": 0.0004652771901070552,
      "loss": 2.9768,
      "step": 72410
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4412791728973389,
      "learning_rate": 0.00046527377629077774,
      "loss": 3.0119,
      "step": 72411
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9799108505249023,
      "learning_rate": 0.0004652703624437728,
      "loss": 3.0211,
      "step": 72412
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0949337482452393,
      "learning_rate": 0.00046526694856604097,
      "loss": 2.9565,
      "step": 72413
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.274646520614624,
      "learning_rate": 0.0004652635346575829,
      "loss": 2.7953,
      "step": 72414
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9303251504898071,
      "learning_rate": 0.0004652601207183993,
      "loss": 3.1941,
      "step": 72415
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.169659376144409,
      "learning_rate": 0.00046525670674849056,
      "loss": 3.0299,
      "step": 72416
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8198204040527344,
      "learning_rate": 0.00046525329274785753,
      "loss": 3.1319,
      "step": 72417
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.9265880584716797,
      "learning_rate": 0.0004652498787165008,
      "loss": 2.9882,
      "step": 72418
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7010862827301025,
      "learning_rate": 0.00046524646465442106,
      "loss": 3.2318,
      "step": 72419
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.7685766220092773,
      "learning_rate": 0.0004652430505616188,
      "loss": 3.1054,
      "step": 72420
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.6655070781707764,
      "learning_rate": 0.0004652396364380948,
      "loss": 2.8462,
      "step": 72421
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.543309211730957,
      "learning_rate": 0.00046523622228384964,
      "loss": 2.9365,
      "step": 72422
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.8573925495147705,
      "learning_rate": 0.0004652328080988839,
      "loss": 3.0301,
      "step": 72423
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.0478882789611816,
      "learning_rate": 0.00046522939388319825,
      "loss": 3.1537,
      "step": 72424
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.823744773864746,
      "learning_rate": 0.0004652259796367934,
      "loss": 2.8593,
      "step": 72425
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7244460582733154,
      "learning_rate": 0.0004652225653596699,
      "loss": 3.0543,
      "step": 72426
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.2452661991119385,
      "learning_rate": 0.0004652191510518284,
      "loss": 2.9887,
      "step": 72427
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7879526615142822,
      "learning_rate": 0.0004652157367132695,
      "loss": 3.1035,
      "step": 72428
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5182045698165894,
      "learning_rate": 0.00046521232234399395,
      "loss": 3.242,
      "step": 72429
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5355346202850342,
      "learning_rate": 0.00046520890794400226,
      "loss": 3.0892,
      "step": 72430
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.712614893913269,
      "learning_rate": 0.00046520549351329507,
      "loss": 3.0631,
      "step": 72431
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.103541612625122,
      "learning_rate": 0.00046520207905187313,
      "loss": 2.9066,
      "step": 72432
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.79829740524292,
      "learning_rate": 0.000465198664559737,
      "loss": 2.6825,
      "step": 72433
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9374064207077026,
      "learning_rate": 0.0004651952500368873,
      "loss": 2.9297,
      "step": 72434
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.0367045402526855,
      "learning_rate": 0.0004651918354833248,
      "loss": 3.0213,
      "step": 72435
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3271732330322266,
      "learning_rate": 0.00046518842089904983,
      "loss": 2.9579,
      "step": 72436
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8346670866012573,
      "learning_rate": 0.00046518500628406334,
      "loss": 3.1875,
      "step": 72437
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9356937408447266,
      "learning_rate": 0.0004651815916383658,
      "loss": 3.2995,
      "step": 72438
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.839249849319458,
      "learning_rate": 0.00046517817696195785,
      "loss": 3.1615,
      "step": 72439
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.804661512374878,
      "learning_rate": 0.00046517476225484014,
      "loss": 2.8895,
      "step": 72440
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.689780592918396,
      "learning_rate": 0.0004651713475170134,
      "loss": 2.7582,
      "step": 72441
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.947805643081665,
      "learning_rate": 0.0004651679327484781,
      "loss": 3.1181,
      "step": 72442
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.3858325481414795,
      "learning_rate": 0.00046516451794923516,
      "loss": 3.0888,
      "step": 72443
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.7264442443847656,
      "learning_rate": 0.0004651611031192848,
      "loss": 3.0887,
      "step": 72444
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4268344640731812,
      "learning_rate": 0.00046515768825862794,
      "loss": 2.9943,
      "step": 72445
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.0649120807647705,
      "learning_rate": 0.0004651542733672652,
      "loss": 3.1234,
      "step": 72446
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0977978706359863,
      "learning_rate": 0.00046515085844519715,
      "loss": 2.9194,
      "step": 72447
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9309650659561157,
      "learning_rate": 0.0004651474434924244,
      "loss": 2.8029,
      "step": 72448
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.1344943046569824,
      "learning_rate": 0.00046514402850894766,
      "loss": 3.1135,
      "step": 72449
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5406956672668457,
      "learning_rate": 0.0004651406134947675,
      "loss": 2.9451,
      "step": 72450
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.584584355354309,
      "learning_rate": 0.0004651371984498845,
      "loss": 2.9583,
      "step": 72451
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7295445203781128,
      "learning_rate": 0.0004651337833742995,
      "loss": 2.9765,
      "step": 72452
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9101066589355469,
      "learning_rate": 0.00046513036826801305,
      "loss": 2.9002,
      "step": 72453
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5787497758865356,
      "learning_rate": 0.00046512695313102566,
      "loss": 2.8194,
      "step": 72454
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5383989810943604,
      "learning_rate": 0.00046512353796333806,
      "loss": 3.2477,
      "step": 72455
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4470360279083252,
      "learning_rate": 0.0004651201227649509,
      "loss": 2.8264,
      "step": 72456
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6773600578308105,
      "learning_rate": 0.00046511670753586476,
      "loss": 3.0576,
      "step": 72457
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6024951934814453,
      "learning_rate": 0.00046511329227608036,
      "loss": 2.9534,
      "step": 72458
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7249929904937744,
      "learning_rate": 0.00046510987698559825,
      "loss": 2.8185,
      "step": 72459
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.030230760574341,
      "learning_rate": 0.000465106461664419,
      "loss": 2.8341,
      "step": 72460
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8148066997528076,
      "learning_rate": 0.00046510304631254356,
      "loss": 3.1283,
      "step": 72461
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0682246685028076,
      "learning_rate": 0.0004650996309299722,
      "loss": 2.995,
      "step": 72462
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.974420428276062,
      "learning_rate": 0.0004650962155167057,
      "loss": 3.0548,
      "step": 72463
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6182491779327393,
      "learning_rate": 0.0004650928000727448,
      "loss": 3.289,
      "step": 72464
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0082662105560303,
      "learning_rate": 0.00046508938459808994,
      "loss": 3.1634,
      "step": 72465
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6020230054855347,
      "learning_rate": 0.00046508596909274183,
      "loss": 3.2104,
      "step": 72466
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7255032062530518,
      "learning_rate": 0.0004650825535567012,
      "loss": 3.1201,
      "step": 72467
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6274622678756714,
      "learning_rate": 0.00046507913798996854,
      "loss": 3.1173,
      "step": 72468
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.857538938522339,
      "learning_rate": 0.00046507572239254456,
      "loss": 3.0479,
      "step": 72469
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.08566951751709,
      "learning_rate": 0.00046507230676442996,
      "loss": 2.7931,
      "step": 72470
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6706284284591675,
      "learning_rate": 0.00046506889110562523,
      "loss": 2.9336,
      "step": 72471
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9211751222610474,
      "learning_rate": 0.00046506547541613107,
      "loss": 2.9787,
      "step": 72472
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.1842429637908936,
      "learning_rate": 0.0004650620596959482,
      "loss": 3.0974,
      "step": 72473
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9446327686309814,
      "learning_rate": 0.0004650586439450771,
      "loss": 2.7484,
      "step": 72474
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.6788272857666016,
      "learning_rate": 0.0004650552281635185,
      "loss": 3.0862,
      "step": 72475
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.009965181350708,
      "learning_rate": 0.000465051812351273,
      "loss": 3.0226,
      "step": 72476
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.4710135459899902,
      "learning_rate": 0.0004650483965083413,
      "loss": 3.0386,
      "step": 72477
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.466185212135315,
      "learning_rate": 0.0004650449806347239,
      "loss": 3.0926,
      "step": 72478
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7514480352401733,
      "learning_rate": 0.00046504156473042163,
      "loss": 3.2636,
      "step": 72479
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0013065338134766,
      "learning_rate": 0.00046503814879543493,
      "loss": 3.1327,
      "step": 72480
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6779539585113525,
      "learning_rate": 0.0004650347328297646,
      "loss": 3.0436,
      "step": 72481
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9100697040557861,
      "learning_rate": 0.00046503131683341115,
      "loss": 3.1133,
      "step": 72482
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0852105617523193,
      "learning_rate": 0.0004650279008063753,
      "loss": 3.0765,
      "step": 72483
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7684571743011475,
      "learning_rate": 0.00046502448474865764,
      "loss": 3.008,
      "step": 72484
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.907858967781067,
      "learning_rate": 0.0004650210686602587,
      "loss": 3.1357,
      "step": 72485
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6442028284072876,
      "learning_rate": 0.00046501765254117937,
      "loss": 2.8957,
      "step": 72486
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9192386865615845,
      "learning_rate": 0.0004650142363914201,
      "loss": 2.8672,
      "step": 72487
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8464926481246948,
      "learning_rate": 0.0004650108202109816,
      "loss": 3.2322,
      "step": 72488
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.1371443271636963,
      "learning_rate": 0.00046500740399986446,
      "loss": 2.9636,
      "step": 72489
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5139098167419434,
      "learning_rate": 0.00046500398775806926,
      "loss": 3.0444,
      "step": 72490
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7796205282211304,
      "learning_rate": 0.0004650005714855968,
      "loss": 3.126,
      "step": 72491
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7767517566680908,
      "learning_rate": 0.0004649971551824476,
      "loss": 3.1958,
      "step": 72492
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.364205002784729,
      "learning_rate": 0.00046499373884862223,
      "loss": 3.0766,
      "step": 72493
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5273690223693848,
      "learning_rate": 0.0004649903224841215,
      "loss": 2.993,
      "step": 72494
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5248091220855713,
      "learning_rate": 0.00046498690608894587,
      "loss": 3.0317,
      "step": 72495
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.4228334426879883,
      "learning_rate": 0.0004649834896630961,
      "loss": 2.9841,
      "step": 72496
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7571102380752563,
      "learning_rate": 0.00046498007320657284,
      "loss": 3.128,
      "step": 72497
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8184642791748047,
      "learning_rate": 0.0004649766567193766,
      "loss": 3.1923,
      "step": 72498
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.056220054626465,
      "learning_rate": 0.00046497324020150813,
      "loss": 3.1934,
      "step": 72499
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.982227087020874,
      "learning_rate": 0.00046496982365296796,
      "loss": 3.1086,
      "step": 72500
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9335237741470337,
      "learning_rate": 0.0004649664070737569,
      "loss": 2.9969,
      "step": 72501
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8854858875274658,
      "learning_rate": 0.0004649629904638754,
      "loss": 2.9157,
      "step": 72502
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.8700149059295654,
      "learning_rate": 0.00046495957382332407,
      "loss": 2.7787,
      "step": 72503
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8423879146575928,
      "learning_rate": 0.0004649561571521038,
      "loss": 3.1087,
      "step": 72504
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.403602123260498,
      "learning_rate": 0.000464952740450215,
      "loss": 3.0473,
      "step": 72505
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.1894302368164062,
      "learning_rate": 0.0004649493237176584,
      "loss": 2.9133,
      "step": 72506
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8342559337615967,
      "learning_rate": 0.00046494590695443454,
      "loss": 3.0738,
      "step": 72507
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.548518419265747,
      "learning_rate": 0.0004649424901605442,
      "loss": 3.1705,
      "step": 72508
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7365663051605225,
      "learning_rate": 0.0004649390733359879,
      "loss": 2.9358,
      "step": 72509
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.043393850326538,
      "learning_rate": 0.0004649356564807663,
      "loss": 3.0797,
      "step": 72510
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6303043365478516,
      "learning_rate": 0.00046493223959488006,
      "loss": 2.7872,
      "step": 72511
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7722042798995972,
      "learning_rate": 0.00046492882267832984,
      "loss": 3.4193,
      "step": 72512
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0437116622924805,
      "learning_rate": 0.0004649254057311163,
      "loss": 3.0211,
      "step": 72513
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8606268167495728,
      "learning_rate": 0.0004649219887532398,
      "loss": 3.1394,
      "step": 72514
    },
    {
      "epoch": 0.94,
      "grad_norm": 5.266709327697754,
      "learning_rate": 0.00046491857174470126,
      "loss": 2.9653,
      "step": 72515
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.7398509979248047,
      "learning_rate": 0.0004649151547055014,
      "loss": 3.0562,
      "step": 72516
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8523533344268799,
      "learning_rate": 0.00046491173763564054,
      "loss": 2.723,
      "step": 72517
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.679915428161621,
      "learning_rate": 0.0004649083205351195,
      "loss": 3.0585,
      "step": 72518
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.7314226627349854,
      "learning_rate": 0.00046490490340393893,
      "loss": 3.071,
      "step": 72519
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.7965657711029053,
      "learning_rate": 0.00046490148624209947,
      "loss": 3.1375,
      "step": 72520
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4659340381622314,
      "learning_rate": 0.0004648980690496016,
      "loss": 3.2659,
      "step": 72521
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6867485046386719,
      "learning_rate": 0.00046489465182644616,
      "loss": 3.2838,
      "step": 72522
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.520566463470459,
      "learning_rate": 0.00046489123457263366,
      "loss": 3.2227,
      "step": 72523
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.9950206279754639,
      "learning_rate": 0.0004648878172881647,
      "loss": 2.9616,
      "step": 72524
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.3358328342437744,
      "learning_rate": 0.0004648843999730401,
      "loss": 3.1487,
      "step": 72525
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.702845573425293,
      "learning_rate": 0.0004648809826272603,
      "loss": 2.9902,
      "step": 72526
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3901615142822266,
      "learning_rate": 0.00046487756525082606,
      "loss": 3.1334,
      "step": 72527
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.415801525115967,
      "learning_rate": 0.00046487414784373795,
      "loss": 3.0681,
      "step": 72528
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0374255180358887,
      "learning_rate": 0.0004648707304059965,
      "loss": 3.0084,
      "step": 72529
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.798879623413086,
      "learning_rate": 0.0004648673129376026,
      "loss": 3.2169,
      "step": 72530
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.353153705596924,
      "learning_rate": 0.0004648638954385568,
      "loss": 2.8289,
      "step": 72531
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6902694702148438,
      "learning_rate": 0.0004648604779088596,
      "loss": 3.0003,
      "step": 72532
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.5743350982666016,
      "learning_rate": 0.0004648570603485118,
      "loss": 2.8562,
      "step": 72533
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.731476068496704,
      "learning_rate": 0.00046485364275751387,
      "loss": 3.2222,
      "step": 72534
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.2079825401306152,
      "learning_rate": 0.0004648502251358666,
      "loss": 2.9834,
      "step": 72535
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.808602213859558,
      "learning_rate": 0.0004648468074835705,
      "loss": 3.1127,
      "step": 72536
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.55830979347229,
      "learning_rate": 0.0004648433898006264,
      "loss": 3.0046,
      "step": 72537
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.5662708282470703,
      "learning_rate": 0.00046483997208703466,
      "loss": 3.1829,
      "step": 72538
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7821033000946045,
      "learning_rate": 0.0004648365543427961,
      "loss": 2.8782,
      "step": 72539
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.338878631591797,
      "learning_rate": 0.00046483313656791133,
      "loss": 2.9801,
      "step": 72540
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.4062600135803223,
      "learning_rate": 0.00046482971876238093,
      "loss": 2.9193,
      "step": 72541
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.083418846130371,
      "learning_rate": 0.0004648263009262056,
      "loss": 2.913,
      "step": 72542
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.148036241531372,
      "learning_rate": 0.00046482288305938597,
      "loss": 3.0383,
      "step": 72543
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.8041906356811523,
      "learning_rate": 0.00046481946516192265,
      "loss": 3.0274,
      "step": 72544
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.1517677307128906,
      "learning_rate": 0.0004648160472338162,
      "loss": 3.0963,
      "step": 72545
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7606717348098755,
      "learning_rate": 0.0004648126292750675,
      "loss": 3.1439,
      "step": 72546
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.5420784950256348,
      "learning_rate": 0.0004648092112856769,
      "loss": 3.0378,
      "step": 72547
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.2019593715667725,
      "learning_rate": 0.0004648057932656451,
      "loss": 2.9933,
      "step": 72548
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8319191932678223,
      "learning_rate": 0.0004648023752149729,
      "loss": 3.0018,
      "step": 72549
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.1248693466186523,
      "learning_rate": 0.00046479895713366084,
      "loss": 3.139,
      "step": 72550
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.0809555053710938,
      "learning_rate": 0.0004647955390217094,
      "loss": 2.8805,
      "step": 72551
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4421790838241577,
      "learning_rate": 0.00046479212087911954,
      "loss": 3.0981,
      "step": 72552
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5668896436691284,
      "learning_rate": 0.0004647887027058917,
      "loss": 2.8645,
      "step": 72553
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4254733324050903,
      "learning_rate": 0.00046478528450202643,
      "loss": 2.7635,
      "step": 72554
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.8060946464538574,
      "learning_rate": 0.0004647818662675245,
      "loss": 3.0523,
      "step": 72555
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.684800386428833,
      "learning_rate": 0.0004647784480023865,
      "loss": 3.2536,
      "step": 72556
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.5953288078308105,
      "learning_rate": 0.00046477502970661305,
      "loss": 2.7796,
      "step": 72557
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.3540656566619873,
      "learning_rate": 0.00046477161138020487,
      "loss": 3.1032,
      "step": 72558
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.3557207584381104,
      "learning_rate": 0.0004647681930231625,
      "loss": 2.97,
      "step": 72559
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.5912115573883057,
      "learning_rate": 0.00046476477463548667,
      "loss": 3.0828,
      "step": 72560
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.4969912767410278,
      "learning_rate": 0.00046476135621717794,
      "loss": 3.2708,
      "step": 72561
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.872150421142578,
      "learning_rate": 0.0004647579377682369,
      "loss": 2.9272,
      "step": 72562
    },
    {
      "epoch": 0.94,
      "grad_norm": 3.1136727333068848,
      "learning_rate": 0.0004647545192886642,
      "loss": 2.8994,
      "step": 72563
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.2555129528045654,
      "learning_rate": 0.0004647511007784607,
      "loss": 2.979,
      "step": 72564
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.72175931930542,
      "learning_rate": 0.0004647476822376268,
      "loss": 3.158,
      "step": 72565
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.1294033527374268,
      "learning_rate": 0.00046474426366616313,
      "loss": 2.7759,
      "step": 72566
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.355769395828247,
      "learning_rate": 0.00046474084506407037,
      "loss": 2.8788,
      "step": 72567
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.358712673187256,
      "learning_rate": 0.00046473742643134933,
      "loss": 3.0492,
      "step": 72568
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.983469009399414,
      "learning_rate": 0.0004647340077680004,
      "loss": 2.9292,
      "step": 72569
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.6467549800872803,
      "learning_rate": 0.0004647305890740243,
      "loss": 3.1564,
      "step": 72570
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.456890344619751,
      "learning_rate": 0.0004647271703494217,
      "loss": 3.0739,
      "step": 72571
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.5356624126434326,
      "learning_rate": 0.0004647237515941932,
      "loss": 3.0115,
      "step": 72572
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6577707529067993,
      "learning_rate": 0.0004647203328083394,
      "loss": 2.9861,
      "step": 72573
    },
    {
      "epoch": 0.94,
      "grad_norm": 2.1961517333984375,
      "learning_rate": 0.00046471691399186104,
      "loss": 3.0728,
      "step": 72574
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.7343599796295166,
      "learning_rate": 0.0004647134951447587,
      "loss": 2.9578,
      "step": 72575
    },
    {
      "epoch": 0.94,
      "grad_norm": 1.6646168231964111,
      "learning_rate": 0.00046471007626703296,
      "loss": 3.0932,
      "step": 72576
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.321319580078125,
      "learning_rate": 0.00046470665735868453,
      "loss": 2.9698,
      "step": 72577
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.071127414703369,
      "learning_rate": 0.00046470323841971405,
      "loss": 2.929,
      "step": 72578
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5932639837265015,
      "learning_rate": 0.0004646998194501221,
      "loss": 3.0072,
      "step": 72579
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7584021091461182,
      "learning_rate": 0.00046469640044990935,
      "loss": 2.7593,
      "step": 72580
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2571349143981934,
      "learning_rate": 0.00046469298141907643,
      "loss": 2.9649,
      "step": 72581
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.8123984336853027,
      "learning_rate": 0.0004646895623576239,
      "loss": 2.8889,
      "step": 72582
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3872069120407104,
      "learning_rate": 0.0004646861432655526,
      "loss": 3.2276,
      "step": 72583
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5356191396713257,
      "learning_rate": 0.000464682724142863,
      "loss": 3.1922,
      "step": 72584
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7831143140792847,
      "learning_rate": 0.00046467930498955576,
      "loss": 2.8293,
      "step": 72585
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7932698726654053,
      "learning_rate": 0.00046467588580563154,
      "loss": 3.1382,
      "step": 72586
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1130194664001465,
      "learning_rate": 0.00046467246659109097,
      "loss": 2.859,
      "step": 72587
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8765865564346313,
      "learning_rate": 0.00046466904734593465,
      "loss": 3.4191,
      "step": 72588
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5547422170639038,
      "learning_rate": 0.0004646656280701633,
      "loss": 2.9797,
      "step": 72589
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9939903020858765,
      "learning_rate": 0.00046466220876377744,
      "loss": 2.9764,
      "step": 72590
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8329248428344727,
      "learning_rate": 0.00046465878942677775,
      "loss": 2.9898,
      "step": 72591
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9289714097976685,
      "learning_rate": 0.00046465537005916497,
      "loss": 2.8408,
      "step": 72592
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7760815620422363,
      "learning_rate": 0.00046465195066093957,
      "loss": 2.8714,
      "step": 72593
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.497196912765503,
      "learning_rate": 0.0004646485312321023,
      "loss": 2.9796,
      "step": 72594
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4819555282592773,
      "learning_rate": 0.00046464511177265376,
      "loss": 3.0644,
      "step": 72595
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7266277074813843,
      "learning_rate": 0.00046464169228259454,
      "loss": 3.0154,
      "step": 72596
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6635539531707764,
      "learning_rate": 0.00046463827276192537,
      "loss": 3.1256,
      "step": 72597
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4684247970581055,
      "learning_rate": 0.00046463485321064684,
      "loss": 3.0834,
      "step": 72598
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.043206214904785,
      "learning_rate": 0.0004646314336287596,
      "loss": 3.1331,
      "step": 72599
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.472674012184143,
      "learning_rate": 0.0004646280140162642,
      "loss": 3.1715,
      "step": 72600
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9257863759994507,
      "learning_rate": 0.00046462459437316144,
      "loss": 3.1895,
      "step": 72601
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1983773708343506,
      "learning_rate": 0.0004646211746994518,
      "loss": 2.9199,
      "step": 72602
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6349290609359741,
      "learning_rate": 0.0004646177549951359,
      "loss": 3.1998,
      "step": 72603
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5034406185150146,
      "learning_rate": 0.00046461433526021463,
      "loss": 3.1966,
      "step": 72604
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.746755838394165,
      "learning_rate": 0.0004646109154946883,
      "loss": 3.0267,
      "step": 72605
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.648020625114441,
      "learning_rate": 0.0004646074956985578,
      "loss": 3.1387,
      "step": 72606
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4962776899337769,
      "learning_rate": 0.0004646040758718236,
      "loss": 3.0364,
      "step": 72607
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4381816387176514,
      "learning_rate": 0.0004646006560144864,
      "loss": 3.1085,
      "step": 72608
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7737987041473389,
      "learning_rate": 0.0004645972361265468,
      "loss": 3.1019,
      "step": 72609
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0893986225128174,
      "learning_rate": 0.0004645938162080055,
      "loss": 2.8235,
      "step": 72610
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.5126092433929443,
      "learning_rate": 0.0004645903962588631,
      "loss": 2.984,
      "step": 72611
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3717708587646484,
      "learning_rate": 0.0004645869762791202,
      "loss": 3.0692,
      "step": 72612
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8294607400894165,
      "learning_rate": 0.00046458355626877753,
      "loss": 3.3425,
      "step": 72613
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5144827365875244,
      "learning_rate": 0.0004645801362278356,
      "loss": 3.1007,
      "step": 72614
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6363534927368164,
      "learning_rate": 0.00046457671615629526,
      "loss": 2.9253,
      "step": 72615
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6253730058670044,
      "learning_rate": 0.0004645732960541569,
      "loss": 2.8737,
      "step": 72616
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8176889419555664,
      "learning_rate": 0.0004645698759214213,
      "loss": 3.0779,
      "step": 72617
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.734342098236084,
      "learning_rate": 0.000464566455758089,
      "loss": 2.7975,
      "step": 72618
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4575369358062744,
      "learning_rate": 0.00046456303556416064,
      "loss": 3.1208,
      "step": 72619
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.877938151359558,
      "learning_rate": 0.00046455961533963707,
      "loss": 3.1512,
      "step": 72620
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5435192584991455,
      "learning_rate": 0.0004645561950845186,
      "loss": 3.0251,
      "step": 72621
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.539598822593689,
      "learning_rate": 0.00046455277479880613,
      "loss": 2.9631,
      "step": 72622
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6768238544464111,
      "learning_rate": 0.0004645493544825002,
      "loss": 3.1114,
      "step": 72623
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6302804946899414,
      "learning_rate": 0.0004645459341356013,
      "loss": 2.9507,
      "step": 72624
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9905436038970947,
      "learning_rate": 0.00046454251375811035,
      "loss": 3.1076,
      "step": 72625
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.816141366958618,
      "learning_rate": 0.0004645390933500278,
      "loss": 2.7542,
      "step": 72626
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0624454021453857,
      "learning_rate": 0.0004645356729113543,
      "loss": 3.039,
      "step": 72627
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4781029224395752,
      "learning_rate": 0.0004645322524420906,
      "loss": 2.9547,
      "step": 72628
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.035694122314453,
      "learning_rate": 0.00046452883194223713,
      "loss": 2.9688,
      "step": 72629
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0387351512908936,
      "learning_rate": 0.00046452541141179463,
      "loss": 3.2115,
      "step": 72630
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9391703605651855,
      "learning_rate": 0.0004645219908507638,
      "loss": 3.1731,
      "step": 72631
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0647826194763184,
      "learning_rate": 0.0004645185702591453,
      "loss": 3.1294,
      "step": 72632
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8159937858581543,
      "learning_rate": 0.00046451514963693965,
      "loss": 2.8381,
      "step": 72633
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8349100351333618,
      "learning_rate": 0.0004645117289841475,
      "loss": 2.8734,
      "step": 72634
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9401612281799316,
      "learning_rate": 0.00046450830830076955,
      "loss": 2.9685,
      "step": 72635
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5913710594177246,
      "learning_rate": 0.0004645048875868063,
      "loss": 2.9735,
      "step": 72636
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5234043598175049,
      "learning_rate": 0.00046450146684225857,
      "loss": 3.0022,
      "step": 72637
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1045100688934326,
      "learning_rate": 0.000464498046067127,
      "loss": 3.1355,
      "step": 72638
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7526932954788208,
      "learning_rate": 0.000464494625261412,
      "loss": 3.1129,
      "step": 72639
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7990820407867432,
      "learning_rate": 0.00046449120442511435,
      "loss": 3.0086,
      "step": 72640
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8172012567520142,
      "learning_rate": 0.00046448778355823476,
      "loss": 3.0585,
      "step": 72641
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.6464593410491943,
      "learning_rate": 0.00046448436266077373,
      "loss": 2.8224,
      "step": 72642
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7553322315216064,
      "learning_rate": 0.00046448094173273197,
      "loss": 2.8894,
      "step": 72643
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7862783670425415,
      "learning_rate": 0.0004644775207741101,
      "loss": 3.1849,
      "step": 72644
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.2708799839019775,
      "learning_rate": 0.00046447409978490873,
      "loss": 3.0519,
      "step": 72645
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7507380247116089,
      "learning_rate": 0.00046447067876512855,
      "loss": 3.1902,
      "step": 72646
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4105585813522339,
      "learning_rate": 0.0004644672577147702,
      "loss": 3.0164,
      "step": 72647
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8187583684921265,
      "learning_rate": 0.0004644638366338342,
      "loss": 3.1015,
      "step": 72648
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0385711193084717,
      "learning_rate": 0.00046446041552232134,
      "loss": 3.1366,
      "step": 72649
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5709757804870605,
      "learning_rate": 0.0004644569943802321,
      "loss": 2.9907,
      "step": 72650
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7653846740722656,
      "learning_rate": 0.0004644535732075672,
      "loss": 3.0018,
      "step": 72651
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8698980808258057,
      "learning_rate": 0.00046445015200432735,
      "loss": 2.9534,
      "step": 72652
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5963267087936401,
      "learning_rate": 0.0004644467307705132,
      "loss": 3.1299,
      "step": 72653
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8216302394866943,
      "learning_rate": 0.00046444330950612514,
      "loss": 3.0899,
      "step": 72654
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1228840351104736,
      "learning_rate": 0.00046443988821116394,
      "loss": 3.0167,
      "step": 72655
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5458190441131592,
      "learning_rate": 0.00046443646688563035,
      "loss": 3.0235,
      "step": 72656
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1491429805755615,
      "learning_rate": 0.00046443304552952486,
      "loss": 3.0352,
      "step": 72657
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4486547708511353,
      "learning_rate": 0.0004644296241428482,
      "loss": 2.9105,
      "step": 72658
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9480177164077759,
      "learning_rate": 0.000464426202725601,
      "loss": 2.992,
      "step": 72659
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.194330930709839,
      "learning_rate": 0.0004644227812777839,
      "loss": 2.8423,
      "step": 72660
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.561842203140259,
      "learning_rate": 0.00046441935979939735,
      "loss": 3.118,
      "step": 72661
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.714439868927002,
      "learning_rate": 0.00046441593829044223,
      "loss": 2.9843,
      "step": 72662
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4849605560302734,
      "learning_rate": 0.0004644125167509191,
      "loss": 2.8415,
      "step": 72663
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8071539402008057,
      "learning_rate": 0.0004644090951808286,
      "loss": 3.1256,
      "step": 72664
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7068756818771362,
      "learning_rate": 0.0004644056735801712,
      "loss": 2.6675,
      "step": 72665
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.271458148956299,
      "learning_rate": 0.0004644022519489478,
      "loss": 3.1126,
      "step": 72666
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1275534629821777,
      "learning_rate": 0.00046439883028715894,
      "loss": 3.102,
      "step": 72667
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5456624031066895,
      "learning_rate": 0.0004643954085948053,
      "loss": 3.0709,
      "step": 72668
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6757174730300903,
      "learning_rate": 0.00046439198687188725,
      "loss": 3.2394,
      "step": 72669
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.552855134010315,
      "learning_rate": 0.0004643885651184057,
      "loss": 2.8471,
      "step": 72670
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5169339179992676,
      "learning_rate": 0.0004643851433343613,
      "loss": 2.9819,
      "step": 72671
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6511964797973633,
      "learning_rate": 0.00046438172151975446,
      "loss": 3.1443,
      "step": 72672
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.084397315979004,
      "learning_rate": 0.00046437829967458603,
      "loss": 2.9293,
      "step": 72673
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6671638488769531,
      "learning_rate": 0.0004643748777988566,
      "loss": 3.2287,
      "step": 72674
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8034533262252808,
      "learning_rate": 0.00046437145589256674,
      "loss": 2.9592,
      "step": 72675
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.505920648574829,
      "learning_rate": 0.00046436803395571714,
      "loss": 2.8956,
      "step": 72676
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7245010137557983,
      "learning_rate": 0.00046436461198830843,
      "loss": 3.3588,
      "step": 72677
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.422971487045288,
      "learning_rate": 0.00046436118999034116,
      "loss": 3.0288,
      "step": 72678
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6227418184280396,
      "learning_rate": 0.0004643577679618161,
      "loss": 3.1116,
      "step": 72679
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.475669026374817,
      "learning_rate": 0.00046435434590273386,
      "loss": 3.0709,
      "step": 72680
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7765586376190186,
      "learning_rate": 0.000464350923813095,
      "loss": 3.089,
      "step": 72681
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4775911569595337,
      "learning_rate": 0.00046434750169290027,
      "loss": 2.9515,
      "step": 72682
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.228473663330078,
      "learning_rate": 0.0004643440795421502,
      "loss": 2.9881,
      "step": 72683
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7782065868377686,
      "learning_rate": 0.0004643406573608454,
      "loss": 3.0573,
      "step": 72684
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5193718671798706,
      "learning_rate": 0.00046433723514898665,
      "loss": 2.8133,
      "step": 72685
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4744107723236084,
      "learning_rate": 0.0004643338129065745,
      "loss": 3.0678,
      "step": 72686
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7217788696289062,
      "learning_rate": 0.0004643303906336095,
      "loss": 2.7375,
      "step": 72687
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7709760665893555,
      "learning_rate": 0.0004643269683300924,
      "loss": 3.1969,
      "step": 72688
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8070799112319946,
      "learning_rate": 0.00046432354599602394,
      "loss": 3.0486,
      "step": 72689
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.396979570388794,
      "learning_rate": 0.00046432012363140454,
      "loss": 3.0097,
      "step": 72690
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4115126132965088,
      "learning_rate": 0.0004643167012362349,
      "loss": 3.0103,
      "step": 72691
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2645580768585205,
      "learning_rate": 0.00046431327881051574,
      "loss": 3.0803,
      "step": 72692
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5167418718338013,
      "learning_rate": 0.00046430985635424765,
      "loss": 3.0547,
      "step": 72693
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4780466556549072,
      "learning_rate": 0.0004643064338674312,
      "loss": 2.9064,
      "step": 72694
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6836382150650024,
      "learning_rate": 0.0004643030113500671,
      "loss": 3.0985,
      "step": 72695
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4259799718856812,
      "learning_rate": 0.00046429958880215595,
      "loss": 3.0158,
      "step": 72696
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6009228229522705,
      "learning_rate": 0.00046429616622369844,
      "loss": 2.994,
      "step": 72697
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.095250129699707,
      "learning_rate": 0.00046429274361469515,
      "loss": 3.0635,
      "step": 72698
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.083040475845337,
      "learning_rate": 0.0004642893209751468,
      "loss": 2.9325,
      "step": 72699
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4500892162322998,
      "learning_rate": 0.0004642858983050538,
      "loss": 2.8119,
      "step": 72700
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8785834312438965,
      "learning_rate": 0.0004642824756044171,
      "loss": 3.079,
      "step": 72701
    },
    {
      "epoch": 0.95,
      "grad_norm": 5.145229816436768,
      "learning_rate": 0.00046427905287323715,
      "loss": 2.8204,
      "step": 72702
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.0308754444122314,
      "learning_rate": 0.00046427563011151467,
      "loss": 3.0319,
      "step": 72703
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.558516502380371,
      "learning_rate": 0.00046427220731925015,
      "loss": 3.076,
      "step": 72704
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.54140305519104,
      "learning_rate": 0.00046426878449644436,
      "loss": 2.9233,
      "step": 72705
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.9323487281799316,
      "learning_rate": 0.000464265361643098,
      "loss": 3.1757,
      "step": 72706
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.6839516162872314,
      "learning_rate": 0.00046426193875921147,
      "loss": 3.0758,
      "step": 72707
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7568272352218628,
      "learning_rate": 0.00046425851584478567,
      "loss": 2.8795,
      "step": 72708
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6839865446090698,
      "learning_rate": 0.000464255092899821,
      "loss": 3.2089,
      "step": 72709
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5349452495574951,
      "learning_rate": 0.0004642516699243182,
      "loss": 3.2211,
      "step": 72710
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2218570709228516,
      "learning_rate": 0.000464248246918278,
      "loss": 3.1294,
      "step": 72711
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.6741158962249756,
      "learning_rate": 0.0004642448238817009,
      "loss": 2.9424,
      "step": 72712
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6149436235427856,
      "learning_rate": 0.00046424140081458763,
      "loss": 3.0464,
      "step": 72713
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.509338140487671,
      "learning_rate": 0.00046423797771693876,
      "loss": 2.6465,
      "step": 72714
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.39861798286438,
      "learning_rate": 0.0004642345545887549,
      "loss": 3.0331,
      "step": 72715
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.4138638973236084,
      "learning_rate": 0.0004642311314300368,
      "loss": 3.2174,
      "step": 72716
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.592335820198059,
      "learning_rate": 0.0004642277082407851,
      "loss": 3.1984,
      "step": 72717
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1207075119018555,
      "learning_rate": 0.0004642242850210003,
      "loss": 3.1244,
      "step": 72718
    },
    {
      "epoch": 0.95,
      "grad_norm": 4.1412739753723145,
      "learning_rate": 0.000464220861770683,
      "loss": 3.0039,
      "step": 72719
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.9943759441375732,
      "learning_rate": 0.0004642174384898341,
      "loss": 3.0857,
      "step": 72720
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6064709424972534,
      "learning_rate": 0.000464214015178454,
      "loss": 3.081,
      "step": 72721
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3700886964797974,
      "learning_rate": 0.00046421059183654337,
      "loss": 2.981,
      "step": 72722
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.282057523727417,
      "learning_rate": 0.000464207168464103,
      "loss": 3.0845,
      "step": 72723
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4510531425476074,
      "learning_rate": 0.00046420374506113334,
      "loss": 3.2822,
      "step": 72724
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.560781478881836,
      "learning_rate": 0.0004642003216276351,
      "loss": 2.9775,
      "step": 72725
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6572016477584839,
      "learning_rate": 0.00046419689816360897,
      "loss": 2.9975,
      "step": 72726
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.180788993835449,
      "learning_rate": 0.00046419347466905553,
      "loss": 2.9869,
      "step": 72727
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.74556827545166,
      "learning_rate": 0.0004641900511439754,
      "loss": 3.1338,
      "step": 72728
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.801985502243042,
      "learning_rate": 0.0004641866275883693,
      "loss": 3.0978,
      "step": 72729
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.200324535369873,
      "learning_rate": 0.0004641832040022377,
      "loss": 3.0671,
      "step": 72730
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.078828811645508,
      "learning_rate": 0.0004641797803855814,
      "loss": 3.1134,
      "step": 72731
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.2619788646698,
      "learning_rate": 0.00046417635673840104,
      "loss": 3.0955,
      "step": 72732
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0539584159851074,
      "learning_rate": 0.00046417293306069707,
      "loss": 2.949,
      "step": 72733
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4937652349472046,
      "learning_rate": 0.0004641695093524703,
      "loss": 3.1314,
      "step": 72734
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8784124851226807,
      "learning_rate": 0.0004641660856137214,
      "loss": 2.8339,
      "step": 72735
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3697707653045654,
      "learning_rate": 0.00046416266184445083,
      "loss": 3.0306,
      "step": 72736
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6085102558135986,
      "learning_rate": 0.0004641592380446593,
      "loss": 3.2985,
      "step": 72737
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8674037456512451,
      "learning_rate": 0.00046415581421434764,
      "loss": 3.1725,
      "step": 72738
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4956622123718262,
      "learning_rate": 0.0004641523903535161,
      "loss": 3.0528,
      "step": 72739
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.323678731918335,
      "learning_rate": 0.00046414896646216567,
      "loss": 3.2238,
      "step": 72740
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5864394903182983,
      "learning_rate": 0.0004641455425402969,
      "loss": 3.0834,
      "step": 72741
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5134904384613037,
      "learning_rate": 0.00046414211858791023,
      "loss": 3.3363,
      "step": 72742
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7185533046722412,
      "learning_rate": 0.00046413869460500644,
      "loss": 3.0763,
      "step": 72743
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7202894687652588,
      "learning_rate": 0.0004641352705915863,
      "loss": 3.0377,
      "step": 72744
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.654441237449646,
      "learning_rate": 0.0004641318465476502,
      "loss": 2.9653,
      "step": 72745
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9179173707962036,
      "learning_rate": 0.00046412842247319896,
      "loss": 3.0163,
      "step": 72746
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.754117727279663,
      "learning_rate": 0.00046412499836823314,
      "loss": 2.7973,
      "step": 72747
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5242199897766113,
      "learning_rate": 0.00046412157423275337,
      "loss": 3.0026,
      "step": 72748
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8829900026321411,
      "learning_rate": 0.0004641181500667602,
      "loss": 2.9227,
      "step": 72749
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3056970834732056,
      "learning_rate": 0.00046411472587025454,
      "loss": 2.8559,
      "step": 72750
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5919053554534912,
      "learning_rate": 0.0004641113016432367,
      "loss": 2.9468,
      "step": 72751
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.263517141342163,
      "learning_rate": 0.0004641078773857076,
      "loss": 2.9378,
      "step": 72752
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5245678424835205,
      "learning_rate": 0.0004641044530976677,
      "loss": 3.0267,
      "step": 72753
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7954541444778442,
      "learning_rate": 0.00046410102877911773,
      "loss": 2.7934,
      "step": 72754
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3820840120315552,
      "learning_rate": 0.0004640976044300581,
      "loss": 2.9689,
      "step": 72755
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7024376392364502,
      "learning_rate": 0.0004640941800504899,
      "loss": 3.115,
      "step": 72756
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8998606204986572,
      "learning_rate": 0.0004640907556404133,
      "loss": 2.982,
      "step": 72757
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9006552696228027,
      "learning_rate": 0.0004640873311998292,
      "loss": 2.7897,
      "step": 72758
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5081878900527954,
      "learning_rate": 0.00046408390672873816,
      "loss": 3.1678,
      "step": 72759
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4248360395431519,
      "learning_rate": 0.00046408048222714083,
      "loss": 3.1202,
      "step": 72760
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7790147066116333,
      "learning_rate": 0.0004640770576950378,
      "loss": 3.1067,
      "step": 72761
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8205366134643555,
      "learning_rate": 0.0004640736331324298,
      "loss": 3.0089,
      "step": 72762
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4235048294067383,
      "learning_rate": 0.00046407020853931735,
      "loss": 3.0539,
      "step": 72763
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7153117656707764,
      "learning_rate": 0.0004640667839157012,
      "loss": 2.9522,
      "step": 72764
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3402913808822632,
      "learning_rate": 0.00046406335926158193,
      "loss": 3.2259,
      "step": 72765
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1639716625213623,
      "learning_rate": 0.0004640599345769602,
      "loss": 3.0659,
      "step": 72766
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6286234855651855,
      "learning_rate": 0.0004640565098618366,
      "loss": 3.2089,
      "step": 72767
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.771129846572876,
      "learning_rate": 0.0004640530851162118,
      "loss": 2.9279,
      "step": 72768
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7312138080596924,
      "learning_rate": 0.00046404966034008655,
      "loss": 3.0256,
      "step": 72769
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.850454568862915,
      "learning_rate": 0.0004640462355334612,
      "loss": 3.0758,
      "step": 72770
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9253675937652588,
      "learning_rate": 0.0004640428106963366,
      "loss": 3.1341,
      "step": 72771
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.1740126609802246,
      "learning_rate": 0.00046403938582871345,
      "loss": 3.1611,
      "step": 72772
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.410390853881836,
      "learning_rate": 0.00046403596093059216,
      "loss": 3.0681,
      "step": 72773
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4639848470687866,
      "learning_rate": 0.00046403253600197356,
      "loss": 2.982,
      "step": 72774
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.9494268894195557,
      "learning_rate": 0.0004640291110428582,
      "loss": 3.1091,
      "step": 72775
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.4706008434295654,
      "learning_rate": 0.00046402568605324667,
      "loss": 2.9846,
      "step": 72776
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7535574436187744,
      "learning_rate": 0.00046402226103313973,
      "loss": 2.9823,
      "step": 72777
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7178736925125122,
      "learning_rate": 0.000464018835982538,
      "loss": 2.9878,
      "step": 72778
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.622544765472412,
      "learning_rate": 0.00046401541090144196,
      "loss": 2.8854,
      "step": 72779
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.086827039718628,
      "learning_rate": 0.00046401198578985243,
      "loss": 2.8968,
      "step": 72780
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5691503286361694,
      "learning_rate": 0.00046400856064777004,
      "loss": 2.8716,
      "step": 72781
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6325764656066895,
      "learning_rate": 0.00046400513547519527,
      "loss": 3.0373,
      "step": 72782
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9714540243148804,
      "learning_rate": 0.0004640017102721288,
      "loss": 3.2336,
      "step": 72783
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1088101863861084,
      "learning_rate": 0.00046399828503857144,
      "loss": 2.9563,
      "step": 72784
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.510738730430603,
      "learning_rate": 0.0004639948597745236,
      "loss": 2.8908,
      "step": 72785
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6371570825576782,
      "learning_rate": 0.0004639914344799861,
      "loss": 3.1709,
      "step": 72786
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7034882307052612,
      "learning_rate": 0.0004639880091549595,
      "loss": 2.8259,
      "step": 72787
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5781331062316895,
      "learning_rate": 0.0004639845837994443,
      "loss": 3.1748,
      "step": 72788
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3690763711929321,
      "learning_rate": 0.0004639811584134414,
      "loss": 3.1277,
      "step": 72789
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5553110837936401,
      "learning_rate": 0.00046397773299695124,
      "loss": 2.9386,
      "step": 72790
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5980393886566162,
      "learning_rate": 0.0004639743075499746,
      "loss": 3.1547,
      "step": 72791
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5279182195663452,
      "learning_rate": 0.000463970882072512,
      "loss": 3.0,
      "step": 72792
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.550392985343933,
      "learning_rate": 0.00046396745656456407,
      "loss": 2.8086,
      "step": 72793
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6425082683563232,
      "learning_rate": 0.00046396403102613157,
      "loss": 2.9377,
      "step": 72794
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8490169048309326,
      "learning_rate": 0.00046396060545721504,
      "loss": 3.009,
      "step": 72795
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5630764961242676,
      "learning_rate": 0.0004639571798578152,
      "loss": 2.8614,
      "step": 72796
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4478919506072998,
      "learning_rate": 0.00046395375422793246,
      "loss": 2.9965,
      "step": 72797
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6752175092697144,
      "learning_rate": 0.00046395032856756777,
      "loss": 3.1593,
      "step": 72798
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6976503133773804,
      "learning_rate": 0.0004639469028767216,
      "loss": 3.1564,
      "step": 72799
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.159811496734619,
      "learning_rate": 0.0004639434771553945,
      "loss": 2.9975,
      "step": 72800
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5490703582763672,
      "learning_rate": 0.00046394005140358726,
      "loss": 3.2197,
      "step": 72801
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.807878851890564,
      "learning_rate": 0.00046393662562130056,
      "loss": 3.2579,
      "step": 72802
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6870023012161255,
      "learning_rate": 0.0004639331998085349,
      "loss": 3.2359,
      "step": 72803
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.633299708366394,
      "learning_rate": 0.00046392977396529086,
      "loss": 2.8333,
      "step": 72804
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0749382972717285,
      "learning_rate": 0.0004639263480915694,
      "loss": 3.017,
      "step": 72805
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.681829571723938,
      "learning_rate": 0.0004639229221873707,
      "loss": 2.925,
      "step": 72806
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.677956223487854,
      "learning_rate": 0.00046391949625269576,
      "loss": 3.0896,
      "step": 72807
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0808157920837402,
      "learning_rate": 0.00046391607028754513,
      "loss": 3.0471,
      "step": 72808
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6865520477294922,
      "learning_rate": 0.00046391264429191927,
      "loss": 3.2552,
      "step": 72809
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9191677570343018,
      "learning_rate": 0.000463909218265819,
      "loss": 2.8189,
      "step": 72810
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9069401025772095,
      "learning_rate": 0.00046390579220924506,
      "loss": 2.8909,
      "step": 72811
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6651833057403564,
      "learning_rate": 0.0004639023661221978,
      "loss": 3.0109,
      "step": 72812
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.084503412246704,
      "learning_rate": 0.00046389894000467796,
      "loss": 3.2418,
      "step": 72813
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.839246153831482,
      "learning_rate": 0.00046389551385668634,
      "loss": 3.0671,
      "step": 72814
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.378528594970703,
      "learning_rate": 0.00046389208767822333,
      "loss": 3.1749,
      "step": 72815
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5724258422851562,
      "learning_rate": 0.00046388866146928974,
      "loss": 3.096,
      "step": 72816
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.0903398990631104,
      "learning_rate": 0.00046388523522988617,
      "loss": 2.8561,
      "step": 72817
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.6500244140625,
      "learning_rate": 0.00046388180896001326,
      "loss": 3.1976,
      "step": 72818
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.010953903198242,
      "learning_rate": 0.00046387838265967156,
      "loss": 3.0774,
      "step": 72819
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9310290813446045,
      "learning_rate": 0.00046387495632886183,
      "loss": 3.0941,
      "step": 72820
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2235703468322754,
      "learning_rate": 0.0004638715299675846,
      "loss": 2.8854,
      "step": 72821
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.6270248889923096,
      "learning_rate": 0.00046386810357584065,
      "loss": 3.0166,
      "step": 72822
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.051205635070801,
      "learning_rate": 0.00046386467715363045,
      "loss": 3.0552,
      "step": 72823
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.448781132698059,
      "learning_rate": 0.0004638612507009547,
      "loss": 3.0639,
      "step": 72824
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3212532997131348,
      "learning_rate": 0.00046385782421781414,
      "loss": 3.0833,
      "step": 72825
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7929339408874512,
      "learning_rate": 0.0004638543977042093,
      "loss": 2.8924,
      "step": 72826
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7798832654953003,
      "learning_rate": 0.0004638509711601407,
      "loss": 3.2332,
      "step": 72827
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.962834358215332,
      "learning_rate": 0.0004638475445856092,
      "loss": 2.9556,
      "step": 72828
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.6939094066619873,
      "learning_rate": 0.0004638441179806154,
      "loss": 2.8377,
      "step": 72829
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.589339017868042,
      "learning_rate": 0.0004638406913451598,
      "loss": 2.9754,
      "step": 72830
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8800930976867676,
      "learning_rate": 0.0004638372646792432,
      "loss": 2.9307,
      "step": 72831
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9840949773788452,
      "learning_rate": 0.00046383383798286607,
      "loss": 3.1966,
      "step": 72832
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.146249294281006,
      "learning_rate": 0.00046383041125602927,
      "loss": 3.1015,
      "step": 72833
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6121017932891846,
      "learning_rate": 0.00046382698449873316,
      "loss": 3.0786,
      "step": 72834
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5120270252227783,
      "learning_rate": 0.00046382355771097856,
      "loss": 3.1977,
      "step": 72835
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.168231725692749,
      "learning_rate": 0.0004638201308927661,
      "loss": 3.0234,
      "step": 72836
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.2628670930862427,
      "learning_rate": 0.00046381670404409644,
      "loss": 2.8167,
      "step": 72837
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.654194951057434,
      "learning_rate": 0.00046381327716497005,
      "loss": 2.7753,
      "step": 72838
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5650999546051025,
      "learning_rate": 0.0004638098502553878,
      "loss": 3.1377,
      "step": 72839
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4988951683044434,
      "learning_rate": 0.00046380642331535004,
      "loss": 2.9944,
      "step": 72840
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6405800580978394,
      "learning_rate": 0.0004638029963448577,
      "loss": 2.9823,
      "step": 72841
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7829536199569702,
      "learning_rate": 0.0004637995693439113,
      "loss": 2.8479,
      "step": 72842
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.402571678161621,
      "learning_rate": 0.0004637961423125114,
      "loss": 2.9484,
      "step": 72843
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8968422412872314,
      "learning_rate": 0.00046379271525065876,
      "loss": 2.7938,
      "step": 72844
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0694820880889893,
      "learning_rate": 0.00046378928815835394,
      "loss": 3.0073,
      "step": 72845
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4570468664169312,
      "learning_rate": 0.00046378586103559756,
      "loss": 3.1842,
      "step": 72846
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.703366756439209,
      "learning_rate": 0.00046378243388239034,
      "loss": 3.0642,
      "step": 72847
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7820316553115845,
      "learning_rate": 0.0004637790066987329,
      "loss": 2.8908,
      "step": 72848
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5311521291732788,
      "learning_rate": 0.00046377557948462585,
      "loss": 3.0902,
      "step": 72849
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0262012481689453,
      "learning_rate": 0.0004637721522400697,
      "loss": 2.8405,
      "step": 72850
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.931159496307373,
      "learning_rate": 0.0004637687249650654,
      "loss": 3.0357,
      "step": 72851
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4840781688690186,
      "learning_rate": 0.0004637652976596133,
      "loss": 2.9519,
      "step": 72852
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.050861120223999,
      "learning_rate": 0.00046376187032371406,
      "loss": 2.7988,
      "step": 72853
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.523611307144165,
      "learning_rate": 0.00046375844295736863,
      "loss": 3.1256,
      "step": 72854
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4704387187957764,
      "learning_rate": 0.00046375501556057723,
      "loss": 2.8456,
      "step": 72855
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.998153805732727,
      "learning_rate": 0.0004637515881333407,
      "loss": 2.8882,
      "step": 72856
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.753406047821045,
      "learning_rate": 0.0004637481606756597,
      "loss": 2.8362,
      "step": 72857
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6163301467895508,
      "learning_rate": 0.0004637447331875348,
      "loss": 3.1462,
      "step": 72858
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8826947212219238,
      "learning_rate": 0.00046374130566896675,
      "loss": 3.039,
      "step": 72859
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7232598066329956,
      "learning_rate": 0.00046373787811995596,
      "loss": 2.8439,
      "step": 72860
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9256953001022339,
      "learning_rate": 0.00046373445054050333,
      "loss": 3.0533,
      "step": 72861
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.835708498954773,
      "learning_rate": 0.0004637310229306093,
      "loss": 2.9973,
      "step": 72862
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0892088413238525,
      "learning_rate": 0.00046372759529027463,
      "loss": 3.1063,
      "step": 72863
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5521221160888672,
      "learning_rate": 0.0004637241676194998,
      "loss": 3.2112,
      "step": 72864
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2604570388793945,
      "learning_rate": 0.00046372073991828564,
      "loss": 3.2631,
      "step": 72865
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.244751214981079,
      "learning_rate": 0.00046371731218663277,
      "loss": 3.1091,
      "step": 72866
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5412204265594482,
      "learning_rate": 0.00046371388442454167,
      "loss": 3.0412,
      "step": 72867
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7265534400939941,
      "learning_rate": 0.0004637104566320131,
      "loss": 3.0959,
      "step": 72868
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7282047271728516,
      "learning_rate": 0.00046370702880904765,
      "loss": 3.149,
      "step": 72869
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5598105192184448,
      "learning_rate": 0.00046370360095564597,
      "loss": 3.123,
      "step": 72870
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6755235195159912,
      "learning_rate": 0.00046370017307180867,
      "loss": 3.0804,
      "step": 72871
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6626416444778442,
      "learning_rate": 0.0004636967451575365,
      "loss": 3.0309,
      "step": 72872
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.81858491897583,
      "learning_rate": 0.00046369331721283,
      "loss": 3.2431,
      "step": 72873
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6125047206878662,
      "learning_rate": 0.0004636898892376897,
      "loss": 3.092,
      "step": 72874
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4278136491775513,
      "learning_rate": 0.0004636864612321165,
      "loss": 3.1098,
      "step": 72875
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5250455141067505,
      "learning_rate": 0.00046368303319611084,
      "loss": 3.0599,
      "step": 72876
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6085355281829834,
      "learning_rate": 0.00046367960512967346,
      "loss": 2.8116,
      "step": 72877
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5731956958770752,
      "learning_rate": 0.00046367617703280497,
      "loss": 3.1646,
      "step": 72878
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.617751121520996,
      "learning_rate": 0.00046367274890550585,
      "loss": 2.9384,
      "step": 72879
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.606467604637146,
      "learning_rate": 0.00046366932074777697,
      "loss": 3.1543,
      "step": 72880
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.449448823928833,
      "learning_rate": 0.000463665892559619,
      "loss": 3.1994,
      "step": 72881
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.781599521636963,
      "learning_rate": 0.0004636624643410322,
      "loss": 3.1267,
      "step": 72882
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2995388507843018,
      "learning_rate": 0.0004636590360920176,
      "loss": 3.2978,
      "step": 72883
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6341328620910645,
      "learning_rate": 0.0004636556078125757,
      "loss": 2.9476,
      "step": 72884
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3367605209350586,
      "learning_rate": 0.0004636521795027071,
      "loss": 3.2846,
      "step": 72885
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4343317747116089,
      "learning_rate": 0.0004636487511624124,
      "loss": 3.1124,
      "step": 72886
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5578852891921997,
      "learning_rate": 0.00046364532279169246,
      "loss": 3.0866,
      "step": 72887
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4871082305908203,
      "learning_rate": 0.00046364189439054767,
      "loss": 2.9304,
      "step": 72888
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1066129207611084,
      "learning_rate": 0.00046363846595897875,
      "loss": 3.0583,
      "step": 72889
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6280626058578491,
      "learning_rate": 0.0004636350374969865,
      "loss": 3.12,
      "step": 72890
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.1851985454559326,
      "learning_rate": 0.0004636316090045712,
      "loss": 3.2662,
      "step": 72891
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.944873094558716,
      "learning_rate": 0.0004636281804817338,
      "loss": 3.0442,
      "step": 72892
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.606631875038147,
      "learning_rate": 0.0004636247519284748,
      "loss": 3.146,
      "step": 72893
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.831875801086426,
      "learning_rate": 0.00046362132334479495,
      "loss": 3.1873,
      "step": 72894
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.7847540378570557,
      "learning_rate": 0.0004636178947306947,
      "loss": 3.0177,
      "step": 72895
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.488938331604004,
      "learning_rate": 0.00046361446608617486,
      "loss": 3.1325,
      "step": 72896
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3613502979278564,
      "learning_rate": 0.00046361103741123595,
      "loss": 2.9891,
      "step": 72897
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9087313413619995,
      "learning_rate": 0.0004636076087058787,
      "loss": 2.9883,
      "step": 72898
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.497917413711548,
      "learning_rate": 0.0004636041799701037,
      "loss": 3.153,
      "step": 72899
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0457589626312256,
      "learning_rate": 0.0004636007512039117,
      "loss": 2.8882,
      "step": 72900
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6012747287750244,
      "learning_rate": 0.000463597322407303,
      "loss": 3.1819,
      "step": 72901
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.395779848098755,
      "learning_rate": 0.0004635938935802786,
      "loss": 3.1523,
      "step": 72902
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.663511276245117,
      "learning_rate": 0.0004635904647228391,
      "loss": 2.9333,
      "step": 72903
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7734869718551636,
      "learning_rate": 0.0004635870358349849,
      "loss": 3.2703,
      "step": 72904
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3866345882415771,
      "learning_rate": 0.00046358360691671686,
      "loss": 2.8773,
      "step": 72905
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.7253053188323975,
      "learning_rate": 0.0004635801779680355,
      "loss": 3.1147,
      "step": 72906
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.200584650039673,
      "learning_rate": 0.0004635767489889415,
      "loss": 3.0257,
      "step": 72907
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.799193263053894,
      "learning_rate": 0.0004635733199794355,
      "loss": 3.1734,
      "step": 72908
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8963385820388794,
      "learning_rate": 0.00046356989093951817,
      "loss": 2.9817,
      "step": 72909
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.4180688858032227,
      "learning_rate": 0.0004635664618691901,
      "loss": 2.9942,
      "step": 72910
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.121309280395508,
      "learning_rate": 0.00046356303276845186,
      "loss": 2.9947,
      "step": 72911
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5930850505828857,
      "learning_rate": 0.0004635596036373042,
      "loss": 3.0051,
      "step": 72912
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2070682048797607,
      "learning_rate": 0.00046355617447574777,
      "loss": 3.1673,
      "step": 72913
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.496368646621704,
      "learning_rate": 0.0004635527452837831,
      "loss": 3.04,
      "step": 72914
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5999433994293213,
      "learning_rate": 0.0004635493160614109,
      "loss": 3.0311,
      "step": 72915
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7050764560699463,
      "learning_rate": 0.00046354588680863184,
      "loss": 3.0744,
      "step": 72916
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.412878155708313,
      "learning_rate": 0.0004635424575254464,
      "loss": 3.1673,
      "step": 72917
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.080298662185669,
      "learning_rate": 0.0004635390282118555,
      "loss": 2.9069,
      "step": 72918
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6444743871688843,
      "learning_rate": 0.0004635355988678595,
      "loss": 3.0314,
      "step": 72919
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.590842604637146,
      "learning_rate": 0.00046353216949345915,
      "loss": 3.0415,
      "step": 72920
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.521802306175232,
      "learning_rate": 0.0004635287400886551,
      "loss": 3.2008,
      "step": 72921
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6681891679763794,
      "learning_rate": 0.000463525310653448,
      "loss": 3.077,
      "step": 72922
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.242056131362915,
      "learning_rate": 0.00046352188118783835,
      "loss": 2.7821,
      "step": 72923
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.939982533454895,
      "learning_rate": 0.00046351845169182706,
      "loss": 3.3487,
      "step": 72924
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5997414588928223,
      "learning_rate": 0.0004635150221654145,
      "loss": 3.2519,
      "step": 72925
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.264212131500244,
      "learning_rate": 0.0004635115926086014,
      "loss": 2.9503,
      "step": 72926
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.170478105545044,
      "learning_rate": 0.00046350816302138844,
      "loss": 2.865,
      "step": 72927
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.207282066345215,
      "learning_rate": 0.0004635047334037762,
      "loss": 2.6966,
      "step": 72928
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7972060441970825,
      "learning_rate": 0.0004635013037557654,
      "loss": 2.9586,
      "step": 72929
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.160003662109375,
      "learning_rate": 0.00046349787407735655,
      "loss": 3.08,
      "step": 72930
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.6842498779296875,
      "learning_rate": 0.00046349444436855037,
      "loss": 2.7321,
      "step": 72931
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5220106840133667,
      "learning_rate": 0.00046349101462934755,
      "loss": 3.0655,
      "step": 72932
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8323136568069458,
      "learning_rate": 0.00046348758485974863,
      "loss": 3.0613,
      "step": 72933
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.906752586364746,
      "learning_rate": 0.0004634841550597543,
      "loss": 2.7438,
      "step": 72934
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6131075620651245,
      "learning_rate": 0.00046348072522936517,
      "loss": 3.0091,
      "step": 72935
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6493500471115112,
      "learning_rate": 0.0004634772953685819,
      "loss": 3.0879,
      "step": 72936
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5453078746795654,
      "learning_rate": 0.000463473865477405,
      "loss": 2.7868,
      "step": 72937
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2472310066223145,
      "learning_rate": 0.00046347043555583533,
      "loss": 3.194,
      "step": 72938
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4703969955444336,
      "learning_rate": 0.0004634670056038735,
      "loss": 2.9254,
      "step": 72939
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.623775601387024,
      "learning_rate": 0.00046346357562151985,
      "loss": 3.0846,
      "step": 72940
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.89899480342865,
      "learning_rate": 0.0004634601456087754,
      "loss": 2.9452,
      "step": 72941
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5610377788543701,
      "learning_rate": 0.0004634567155656406,
      "loss": 2.9535,
      "step": 72942
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.185164213180542,
      "learning_rate": 0.0004634532854921161,
      "loss": 2.7789,
      "step": 72943
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7099167108535767,
      "learning_rate": 0.00046344985538820254,
      "loss": 2.8603,
      "step": 72944
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8889058828353882,
      "learning_rate": 0.0004634464252539006,
      "loss": 3.0074,
      "step": 72945
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9509916305541992,
      "learning_rate": 0.0004634429950892108,
      "loss": 2.9174,
      "step": 72946
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4541635513305664,
      "learning_rate": 0.0004634395648941339,
      "loss": 3.0888,
      "step": 72947
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.080853223800659,
      "learning_rate": 0.00046343613466867054,
      "loss": 3.0215,
      "step": 72948
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8695809841156006,
      "learning_rate": 0.0004634327044128213,
      "loss": 3.1874,
      "step": 72949
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2122933864593506,
      "learning_rate": 0.0004634292741265868,
      "loss": 3.1521,
      "step": 72950
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2947607040405273,
      "learning_rate": 0.00046342584380996776,
      "loss": 3.0551,
      "step": 72951
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4470595121383667,
      "learning_rate": 0.0004634224134629648,
      "loss": 3.3381,
      "step": 72952
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3831852674484253,
      "learning_rate": 0.00046341898308557833,
      "loss": 3.266,
      "step": 72953
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7625659704208374,
      "learning_rate": 0.00046341555267780945,
      "loss": 2.8345,
      "step": 72954
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.732745885848999,
      "learning_rate": 0.0004634121222396584,
      "loss": 3.0129,
      "step": 72955
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4552494287490845,
      "learning_rate": 0.0004634086917711259,
      "loss": 3.0341,
      "step": 72956
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3248229026794434,
      "learning_rate": 0.00046340526127221275,
      "loss": 2.9902,
      "step": 72957
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8234469890594482,
      "learning_rate": 0.0004634018307429195,
      "loss": 2.8948,
      "step": 72958
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8452948331832886,
      "learning_rate": 0.00046339840018324663,
      "loss": 2.9714,
      "step": 72959
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1877214908599854,
      "learning_rate": 0.00046339496959319496,
      "loss": 3.0817,
      "step": 72960
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.908894658088684,
      "learning_rate": 0.0004633915389727651,
      "loss": 3.0518,
      "step": 72961
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8129031658172607,
      "learning_rate": 0.00046338810832195765,
      "loss": 2.9568,
      "step": 72962
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8000766038894653,
      "learning_rate": 0.00046338467764077336,
      "loss": 3.124,
      "step": 72963
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0602731704711914,
      "learning_rate": 0.0004633812469292127,
      "loss": 2.825,
      "step": 72964
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5038042068481445,
      "learning_rate": 0.00046337781618727635,
      "loss": 3.1173,
      "step": 72965
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8879404067993164,
      "learning_rate": 0.00046337438541496504,
      "loss": 3.082,
      "step": 72966
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.3433195352554321,
      "learning_rate": 0.0004633709546122793,
      "loss": 3.2221,
      "step": 72967
    },
    {
      "epoch": 0.95,
      "grad_norm": 4.9051008224487305,
      "learning_rate": 0.0004633675237792199,
      "loss": 3.2932,
      "step": 72968
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.223031759262085,
      "learning_rate": 0.00046336409291578733,
      "loss": 3.1362,
      "step": 72969
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7582308053970337,
      "learning_rate": 0.0004633606620219823,
      "loss": 3.1391,
      "step": 72970
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.228271484375,
      "learning_rate": 0.00046335723109780535,
      "loss": 2.8687,
      "step": 72971
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8857016563415527,
      "learning_rate": 0.00046335380014325727,
      "loss": 3.0369,
      "step": 72972
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.490140438079834,
      "learning_rate": 0.00046335036915833873,
      "loss": 3.1776,
      "step": 72973
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8051432371139526,
      "learning_rate": 0.0004633469381430502,
      "loss": 3.1551,
      "step": 72974
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7648752927780151,
      "learning_rate": 0.0004633435070973924,
      "loss": 3.4236,
      "step": 72975
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1050429344177246,
      "learning_rate": 0.000463340076021366,
      "loss": 2.9386,
      "step": 72976
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2819857597351074,
      "learning_rate": 0.0004633366449149715,
      "loss": 3.0941,
      "step": 72977
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8483402729034424,
      "learning_rate": 0.00046333321377820967,
      "loss": 2.9906,
      "step": 72978
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7528737783432007,
      "learning_rate": 0.00046332978261108115,
      "loss": 3.1024,
      "step": 72979
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8709501028060913,
      "learning_rate": 0.0004633263514135865,
      "loss": 3.0413,
      "step": 72980
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.598938226699829,
      "learning_rate": 0.0004633229201857264,
      "loss": 3.1819,
      "step": 72981
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7342585325241089,
      "learning_rate": 0.0004633194889275015,
      "loss": 3.0231,
      "step": 72982
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7248196601867676,
      "learning_rate": 0.0004633160576389124,
      "loss": 3.2547,
      "step": 72983
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.864302158355713,
      "learning_rate": 0.0004633126263199598,
      "loss": 2.814,
      "step": 72984
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.827771544456482,
      "learning_rate": 0.0004633091949706443,
      "loss": 2.7967,
      "step": 72985
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7496167421340942,
      "learning_rate": 0.00046330576359096645,
      "loss": 3.0252,
      "step": 72986
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5812087059020996,
      "learning_rate": 0.0004633023321809271,
      "loss": 3.042,
      "step": 72987
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.425579071044922,
      "learning_rate": 0.0004632989007405267,
      "loss": 3.1416,
      "step": 72988
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5595111846923828,
      "learning_rate": 0.0004632954692697659,
      "loss": 3.2919,
      "step": 72989
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8855807781219482,
      "learning_rate": 0.00046329203776864544,
      "loss": 2.775,
      "step": 72990
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2193593978881836,
      "learning_rate": 0.000463288606237166,
      "loss": 2.8494,
      "step": 72991
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.59139883518219,
      "learning_rate": 0.00046328517467532804,
      "loss": 3.1506,
      "step": 72992
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6095869541168213,
      "learning_rate": 0.00046328174308313226,
      "loss": 2.9036,
      "step": 72993
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3343496322631836,
      "learning_rate": 0.0004632783114605794,
      "loss": 2.8848,
      "step": 72994
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5774343013763428,
      "learning_rate": 0.00046327487980767,
      "loss": 2.9157,
      "step": 72995
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1030685901641846,
      "learning_rate": 0.0004632714481244046,
      "loss": 2.944,
      "step": 72996
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.638611078262329,
      "learning_rate": 0.0004632680164107841,
      "loss": 3.2164,
      "step": 72997
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.701147437095642,
      "learning_rate": 0.00046326458466680896,
      "loss": 3.0894,
      "step": 72998
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4989869594573975,
      "learning_rate": 0.0004632611528924798,
      "loss": 2.8626,
      "step": 72999
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7602862119674683,
      "learning_rate": 0.00046325772108779734,
      "loss": 2.9443,
      "step": 73000
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6924176216125488,
      "learning_rate": 0.0004632542892527622,
      "loss": 2.992,
      "step": 73001
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.42277193069458,
      "learning_rate": 0.000463250857387375,
      "loss": 3.2,
      "step": 73002
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7926188707351685,
      "learning_rate": 0.00046324742549163635,
      "loss": 3.0148,
      "step": 73003
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6010421514511108,
      "learning_rate": 0.00046324399356554696,
      "loss": 3.027,
      "step": 73004
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6646416187286377,
      "learning_rate": 0.00046324056160910743,
      "loss": 3.1633,
      "step": 73005
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.901741623878479,
      "learning_rate": 0.00046323712962231844,
      "loss": 3.1725,
      "step": 73006
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8885571956634521,
      "learning_rate": 0.00046323369760518045,
      "loss": 3.025,
      "step": 73007
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6973592042922974,
      "learning_rate": 0.00046323026555769435,
      "loss": 3.1865,
      "step": 73008
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5557715892791748,
      "learning_rate": 0.0004632268334798606,
      "loss": 3.1318,
      "step": 73009
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5864815711975098,
      "learning_rate": 0.00046322340137168,
      "loss": 2.9543,
      "step": 73010
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2908830642700195,
      "learning_rate": 0.00046321996923315295,
      "loss": 3.1329,
      "step": 73011
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7250494956970215,
      "learning_rate": 0.00046321653706428033,
      "loss": 3.2489,
      "step": 73012
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.45857834815979,
      "learning_rate": 0.0004632131048650626,
      "loss": 3.1602,
      "step": 73013
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.79855477809906,
      "learning_rate": 0.00046320967263550054,
      "loss": 3.2837,
      "step": 73014
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7892060279846191,
      "learning_rate": 0.0004632062403755947,
      "loss": 3.0125,
      "step": 73015
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6362946033477783,
      "learning_rate": 0.0004632028080853458,
      "loss": 2.8991,
      "step": 73016
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8987925052642822,
      "learning_rate": 0.00046319937576475435,
      "loss": 3.0602,
      "step": 73017
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.076563596725464,
      "learning_rate": 0.000463195943413821,
      "loss": 2.7811,
      "step": 73018
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7470289468765259,
      "learning_rate": 0.0004631925110325465,
      "loss": 3.0646,
      "step": 73019
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9446908235549927,
      "learning_rate": 0.0004631890786209314,
      "loss": 3.0344,
      "step": 73020
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.642373561859131,
      "learning_rate": 0.00046318564617897645,
      "loss": 3.0757,
      "step": 73021
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2015347480773926,
      "learning_rate": 0.0004631822137066822,
      "loss": 2.921,
      "step": 73022
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6240564584732056,
      "learning_rate": 0.0004631787812040492,
      "loss": 2.876,
      "step": 73023
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.833188533782959,
      "learning_rate": 0.0004631753486710783,
      "loss": 3.1608,
      "step": 73024
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.3319222927093506,
      "learning_rate": 0.00046317191610776995,
      "loss": 3.0658,
      "step": 73025
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7070404291152954,
      "learning_rate": 0.0004631684835141248,
      "loss": 2.8946,
      "step": 73026
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6404122114181519,
      "learning_rate": 0.0004631650508901437,
      "loss": 3.0872,
      "step": 73027
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4807868003845215,
      "learning_rate": 0.000463161618235827,
      "loss": 3.0463,
      "step": 73028
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.391718864440918,
      "learning_rate": 0.0004631581855511756,
      "loss": 2.8673,
      "step": 73029
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4604604244232178,
      "learning_rate": 0.00046315475283619,
      "loss": 3.286,
      "step": 73030
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7208747863769531,
      "learning_rate": 0.0004631513200908707,
      "loss": 3.1621,
      "step": 73031
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8181804418563843,
      "learning_rate": 0.0004631478873152186,
      "loss": 2.8738,
      "step": 73032
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.310502290725708,
      "learning_rate": 0.00046314445450923425,
      "loss": 2.7619,
      "step": 73033
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0019989013671875,
      "learning_rate": 0.00046314102167291826,
      "loss": 3.0675,
      "step": 73034
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9086769819259644,
      "learning_rate": 0.0004631375888062712,
      "loss": 2.8882,
      "step": 73035
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.41929030418396,
      "learning_rate": 0.0004631341559092938,
      "loss": 2.8648,
      "step": 73036
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.213747501373291,
      "learning_rate": 0.00046313072298198674,
      "loss": 2.7465,
      "step": 73037
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.959228754043579,
      "learning_rate": 0.00046312729002435054,
      "loss": 2.7989,
      "step": 73038
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.642802357673645,
      "learning_rate": 0.000463123857036386,
      "loss": 3.217,
      "step": 73039
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4831247329711914,
      "learning_rate": 0.00046312042401809356,
      "loss": 2.8632,
      "step": 73040
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3621623516082764,
      "learning_rate": 0.00046311699096947403,
      "loss": 2.9776,
      "step": 73041
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.840723991394043,
      "learning_rate": 0.00046311355789052785,
      "loss": 3.0942,
      "step": 73042
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.447975993156433,
      "learning_rate": 0.0004631101247812559,
      "loss": 3.1197,
      "step": 73043
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.583251714706421,
      "learning_rate": 0.0004631066916416586,
      "loss": 3.1775,
      "step": 73044
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6900488138198853,
      "learning_rate": 0.00046310325847173677,
      "loss": 3.0217,
      "step": 73045
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4428536891937256,
      "learning_rate": 0.000463099825271491,
      "loss": 3.2925,
      "step": 73046
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6548309326171875,
      "learning_rate": 0.00046309639204092183,
      "loss": 3.0952,
      "step": 73047
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6434768438339233,
      "learning_rate": 0.0004630929587800299,
      "loss": 2.9863,
      "step": 73048
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7401493787765503,
      "learning_rate": 0.00046308952548881595,
      "loss": 3.1417,
      "step": 73049
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8508403301239014,
      "learning_rate": 0.0004630860921672806,
      "loss": 3.2182,
      "step": 73050
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7712656259536743,
      "learning_rate": 0.0004630826588154245,
      "loss": 3.1487,
      "step": 73051
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7968186140060425,
      "learning_rate": 0.00046307922543324827,
      "loss": 3.0686,
      "step": 73052
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.603962779045105,
      "learning_rate": 0.0004630757920207524,
      "loss": 2.9896,
      "step": 73053
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8279651403427124,
      "learning_rate": 0.0004630723585779378,
      "loss": 3.0125,
      "step": 73054
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6075810194015503,
      "learning_rate": 0.00046306892510480495,
      "loss": 3.3165,
      "step": 73055
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9190388917922974,
      "learning_rate": 0.00046306549160135443,
      "loss": 3.1519,
      "step": 73056
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.387800931930542,
      "learning_rate": 0.000463062058067587,
      "loss": 3.1172,
      "step": 73057
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.091165542602539,
      "learning_rate": 0.0004630586245035033,
      "loss": 2.7998,
      "step": 73058
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.561762809753418,
      "learning_rate": 0.0004630551909091038,
      "loss": 3.1312,
      "step": 73059
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.437371015548706,
      "learning_rate": 0.0004630517572843894,
      "loss": 3.0997,
      "step": 73060
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.776618003845215,
      "learning_rate": 0.00046304832362936054,
      "loss": 2.9716,
      "step": 73061
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.8608386516571045,
      "learning_rate": 0.0004630448899440179,
      "loss": 3.1612,
      "step": 73062
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5234408378601074,
      "learning_rate": 0.00046304145622836214,
      "loss": 2.901,
      "step": 73063
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.7183854579925537,
      "learning_rate": 0.0004630380224823939,
      "loss": 3.2957,
      "step": 73064
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.265583038330078,
      "learning_rate": 0.0004630345887061139,
      "loss": 3.007,
      "step": 73065
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8507053852081299,
      "learning_rate": 0.0004630311548995226,
      "loss": 2.7995,
      "step": 73066
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4368253946304321,
      "learning_rate": 0.00046302772106262077,
      "loss": 3.0415,
      "step": 73067
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4457035064697266,
      "learning_rate": 0.0004630242871954089,
      "loss": 2.8761,
      "step": 73068
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3363099098205566,
      "learning_rate": 0.00046302085329788794,
      "loss": 3.0096,
      "step": 73069
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.106750011444092,
      "learning_rate": 0.00046301741937005824,
      "loss": 3.185,
      "step": 73070
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5924285650253296,
      "learning_rate": 0.00046301398541192043,
      "loss": 2.9591,
      "step": 73071
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1082701683044434,
      "learning_rate": 0.0004630105514234753,
      "loss": 3.2372,
      "step": 73072
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8369559049606323,
      "learning_rate": 0.0004630071174047235,
      "loss": 3.1337,
      "step": 73073
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3031928539276123,
      "learning_rate": 0.00046300368335566554,
      "loss": 3.1323,
      "step": 73074
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7279123067855835,
      "learning_rate": 0.0004630002492763021,
      "loss": 3.0226,
      "step": 73075
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.6556808948516846,
      "learning_rate": 0.00046299681516663387,
      "loss": 3.1696,
      "step": 73076
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.444361925125122,
      "learning_rate": 0.0004629933810266615,
      "loss": 2.7225,
      "step": 73077
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.431151270866394,
      "learning_rate": 0.0004629899468563855,
      "loss": 2.9162,
      "step": 73078
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.6661691665649414,
      "learning_rate": 0.0004629865126558066,
      "loss": 2.9637,
      "step": 73079
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0995445251464844,
      "learning_rate": 0.0004629830784249255,
      "loss": 2.9379,
      "step": 73080
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.7991209030151367,
      "learning_rate": 0.00046297964416374266,
      "loss": 3.2038,
      "step": 73081
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6776357889175415,
      "learning_rate": 0.00046297620987225893,
      "loss": 2.8177,
      "step": 73082
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2169651985168457,
      "learning_rate": 0.0004629727755504748,
      "loss": 3.1839,
      "step": 73083
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.0922367572784424,
      "learning_rate": 0.0004629693411983909,
      "loss": 3.0058,
      "step": 73084
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.863074541091919,
      "learning_rate": 0.00046296590681600805,
      "loss": 2.7385,
      "step": 73085
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6161425113677979,
      "learning_rate": 0.0004629624724033267,
      "loss": 3.07,
      "step": 73086
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0224404335021973,
      "learning_rate": 0.0004629590379603475,
      "loss": 3.0057,
      "step": 73087
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.552361011505127,
      "learning_rate": 0.0004629556034870712,
      "loss": 3.2453,
      "step": 73088
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2466683387756348,
      "learning_rate": 0.0004629521689834984,
      "loss": 3.0607,
      "step": 73089
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6054869890213013,
      "learning_rate": 0.0004629487344496297,
      "loss": 3.0653,
      "step": 73090
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9184092283248901,
      "learning_rate": 0.00046294529988546567,
      "loss": 3.1936,
      "step": 73091
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.3218400478363037,
      "learning_rate": 0.0004629418652910071,
      "loss": 2.6749,
      "step": 73092
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5372908115386963,
      "learning_rate": 0.0004629384306662546,
      "loss": 2.8544,
      "step": 73093
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.561862587928772,
      "learning_rate": 0.00046293499601120865,
      "loss": 3.017,
      "step": 73094
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.635126829147339,
      "learning_rate": 0.00046293156132587015,
      "loss": 3.2385,
      "step": 73095
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9435193538665771,
      "learning_rate": 0.00046292812661023953,
      "loss": 2.935,
      "step": 73096
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.628926157951355,
      "learning_rate": 0.00046292469186431744,
      "loss": 3.0063,
      "step": 73097
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7428525686264038,
      "learning_rate": 0.0004629212570881047,
      "loss": 3.0936,
      "step": 73098
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.484647512435913,
      "learning_rate": 0.0004629178222816017,
      "loss": 3.4679,
      "step": 73099
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6199722290039062,
      "learning_rate": 0.0004629143874448093,
      "loss": 3.0608,
      "step": 73100
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.553581953048706,
      "learning_rate": 0.000462910952577728,
      "loss": 2.9645,
      "step": 73101
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7129981517791748,
      "learning_rate": 0.0004629075176803585,
      "loss": 3.0456,
      "step": 73102
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5363165140151978,
      "learning_rate": 0.0004629040827527014,
      "loss": 3.0687,
      "step": 73103
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5467705726623535,
      "learning_rate": 0.0004629006477947573,
      "loss": 3.1107,
      "step": 73104
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6653201580047607,
      "learning_rate": 0.00046289721280652694,
      "loss": 3.0577,
      "step": 73105
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6488851308822632,
      "learning_rate": 0.0004628937777880109,
      "loss": 3.1677,
      "step": 73106
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.425917625427246,
      "learning_rate": 0.0004628903427392099,
      "loss": 3.1662,
      "step": 73107
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.721214771270752,
      "learning_rate": 0.00046288690766012444,
      "loss": 2.9187,
      "step": 73108
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5885099172592163,
      "learning_rate": 0.00046288347255075523,
      "loss": 3.0016,
      "step": 73109
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.890085220336914,
      "learning_rate": 0.00046288003741110296,
      "loss": 3.1005,
      "step": 73110
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.657112956047058,
      "learning_rate": 0.0004628766022411681,
      "loss": 3.0813,
      "step": 73111
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6581310033798218,
      "learning_rate": 0.0004628731670409516,
      "loss": 2.6873,
      "step": 73112
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.891278624534607,
      "learning_rate": 0.0004628697318104538,
      "loss": 2.8923,
      "step": 73113
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8503397703170776,
      "learning_rate": 0.0004628662965496754,
      "loss": 3.2442,
      "step": 73114
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7290771007537842,
      "learning_rate": 0.00046286286125861713,
      "loss": 2.9822,
      "step": 73115
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7318981885910034,
      "learning_rate": 0.00046285942593727957,
      "loss": 2.8511,
      "step": 73116
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3238420486450195,
      "learning_rate": 0.00046285599058566333,
      "loss": 2.9231,
      "step": 73117
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.678549289703369,
      "learning_rate": 0.00046285255520376906,
      "loss": 2.9649,
      "step": 73118
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0088722705841064,
      "learning_rate": 0.0004628491197915975,
      "loss": 2.9132,
      "step": 73119
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0663068294525146,
      "learning_rate": 0.00046284568434914927,
      "loss": 3.0382,
      "step": 73120
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2229580879211426,
      "learning_rate": 0.0004628422488764248,
      "loss": 2.9382,
      "step": 73121
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.792739748954773,
      "learning_rate": 0.000462838813373425,
      "loss": 3.0877,
      "step": 73122
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6012980937957764,
      "learning_rate": 0.00046283537784015037,
      "loss": 3.0918,
      "step": 73123
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.19821834564209,
      "learning_rate": 0.00046283194227660155,
      "loss": 2.8518,
      "step": 73124
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6719194650650024,
      "learning_rate": 0.00046282850668277926,
      "loss": 3.0934,
      "step": 73125
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4400463104248047,
      "learning_rate": 0.000462825071058684,
      "loss": 3.1212,
      "step": 73126
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5499705076217651,
      "learning_rate": 0.0004628216354043165,
      "loss": 2.9922,
      "step": 73127
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6169353723526,
      "learning_rate": 0.0004628181997196774,
      "loss": 2.7896,
      "step": 73128
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6913049221038818,
      "learning_rate": 0.0004628147640047673,
      "loss": 2.9435,
      "step": 73129
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5799931287765503,
      "learning_rate": 0.00046281132825958687,
      "loss": 2.9153,
      "step": 73130
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5658364295959473,
      "learning_rate": 0.0004628078924841368,
      "loss": 3.0598,
      "step": 73131
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7502665519714355,
      "learning_rate": 0.0004628044566784176,
      "loss": 2.88,
      "step": 73132
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6118444204330444,
      "learning_rate": 0.00046280102084242993,
      "loss": 3.0036,
      "step": 73133
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.684807300567627,
      "learning_rate": 0.00046279758497617463,
      "loss": 3.1093,
      "step": 73134
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0613040924072266,
      "learning_rate": 0.00046279414907965204,
      "loss": 2.8563,
      "step": 73135
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4026904106140137,
      "learning_rate": 0.00046279071315286297,
      "loss": 3.1415,
      "step": 73136
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8613674640655518,
      "learning_rate": 0.0004627872771958081,
      "loss": 2.9299,
      "step": 73137
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8108292818069458,
      "learning_rate": 0.000462783841208488,
      "loss": 3.2806,
      "step": 73138
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.488088607788086,
      "learning_rate": 0.0004627804051909032,
      "loss": 2.8375,
      "step": 73139
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.48118257522583,
      "learning_rate": 0.0004627769691430546,
      "loss": 2.8436,
      "step": 73140
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9097931385040283,
      "learning_rate": 0.00046277353306494265,
      "loss": 3.0783,
      "step": 73141
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9454083442687988,
      "learning_rate": 0.0004627700969565679,
      "loss": 2.7196,
      "step": 73142
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7169028520584106,
      "learning_rate": 0.0004627666608179313,
      "loss": 3.0123,
      "step": 73143
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8126085996627808,
      "learning_rate": 0.0004627632246490332,
      "loss": 3.1418,
      "step": 73144
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.439448356628418,
      "learning_rate": 0.0004627597884498743,
      "loss": 3.0986,
      "step": 73145
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.198122262954712,
      "learning_rate": 0.0004627563522204555,
      "loss": 2.87,
      "step": 73146
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7071212530136108,
      "learning_rate": 0.00046275291596077693,
      "loss": 3.0034,
      "step": 73147
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.808595895767212,
      "learning_rate": 0.00046274947967083965,
      "loss": 2.8424,
      "step": 73148
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5900181531906128,
      "learning_rate": 0.0004627460433506442,
      "loss": 3.0438,
      "step": 73149
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0395772457122803,
      "learning_rate": 0.0004627426070001912,
      "loss": 2.9062,
      "step": 73150
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1268136501312256,
      "learning_rate": 0.0004627391706194812,
      "loss": 3.1378,
      "step": 73151
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7242549657821655,
      "learning_rate": 0.00046273573420851504,
      "loss": 3.1905,
      "step": 73152
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9988075494766235,
      "learning_rate": 0.0004627322977672931,
      "loss": 3.1994,
      "step": 73153
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4872822761535645,
      "learning_rate": 0.0004627288612958162,
      "loss": 3.1939,
      "step": 73154
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6589618921279907,
      "learning_rate": 0.00046272542479408504,
      "loss": 2.8918,
      "step": 73155
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2440922260284424,
      "learning_rate": 0.0004627219882621,
      "loss": 3.0328,
      "step": 73156
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.360230803489685,
      "learning_rate": 0.00046271855169986194,
      "loss": 3.0296,
      "step": 73157
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.044085741043091,
      "learning_rate": 0.00046271511510737143,
      "loss": 3.0094,
      "step": 73158
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4812785387039185,
      "learning_rate": 0.00046271167848462903,
      "loss": 3.1082,
      "step": 73159
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5306048393249512,
      "learning_rate": 0.00046270824183163554,
      "loss": 3.1098,
      "step": 73160
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4904866218566895,
      "learning_rate": 0.00046270480514839157,
      "loss": 3.1266,
      "step": 73161
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9062166213989258,
      "learning_rate": 0.00046270136843489754,
      "loss": 2.9255,
      "step": 73162
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5123895406723022,
      "learning_rate": 0.0004626979316911544,
      "loss": 3.1812,
      "step": 73163
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6796587705612183,
      "learning_rate": 0.0004626944949171626,
      "loss": 3.1558,
      "step": 73164
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.514359712600708,
      "learning_rate": 0.0004626910581129228,
      "loss": 2.9564,
      "step": 73165
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0793981552124023,
      "learning_rate": 0.00046268762127843564,
      "loss": 3.028,
      "step": 73166
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5884441137313843,
      "learning_rate": 0.00046268418441370186,
      "loss": 3.0373,
      "step": 73167
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5729939937591553,
      "learning_rate": 0.0004626807475187219,
      "loss": 3.1381,
      "step": 73168
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4870874881744385,
      "learning_rate": 0.00046267731059349666,
      "loss": 3.1289,
      "step": 73169
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3313851356506348,
      "learning_rate": 0.0004626738736380266,
      "loss": 3.008,
      "step": 73170
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.655182957649231,
      "learning_rate": 0.00046267043665231233,
      "loss": 3.0775,
      "step": 73171
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2258965969085693,
      "learning_rate": 0.0004626669996363546,
      "loss": 2.8193,
      "step": 73172
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2034215927124023,
      "learning_rate": 0.00046266356259015397,
      "loss": 2.8347,
      "step": 73173
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9635249376296997,
      "learning_rate": 0.0004626601255137112,
      "loss": 3.1364,
      "step": 73174
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.537559151649475,
      "learning_rate": 0.00046265668840702673,
      "loss": 3.0725,
      "step": 73175
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2798912525177,
      "learning_rate": 0.0004626532512701014,
      "loss": 2.8848,
      "step": 73176
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4803756475448608,
      "learning_rate": 0.0004626498141029357,
      "loss": 3.119,
      "step": 73177
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.575387716293335,
      "learning_rate": 0.0004626463769055303,
      "loss": 2.8771,
      "step": 73178
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4394973516464233,
      "learning_rate": 0.00046264293967788594,
      "loss": 3.0385,
      "step": 73179
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7367911338806152,
      "learning_rate": 0.0004626395024200032,
      "loss": 2.8983,
      "step": 73180
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4669493436813354,
      "learning_rate": 0.0004626360651318827,
      "loss": 3.1557,
      "step": 73181
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6013628244400024,
      "learning_rate": 0.000462632627813525,
      "loss": 3.1691,
      "step": 73182
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8747040033340454,
      "learning_rate": 0.00046262919046493093,
      "loss": 3.0209,
      "step": 73183
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6932733058929443,
      "learning_rate": 0.00046262575308610095,
      "loss": 2.9143,
      "step": 73184
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7154219150543213,
      "learning_rate": 0.00046262231567703577,
      "loss": 3.0463,
      "step": 73185
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.322340488433838,
      "learning_rate": 0.0004626188782377361,
      "loss": 2.771,
      "step": 73186
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5785596370697021,
      "learning_rate": 0.00046261544076820247,
      "loss": 2.7764,
      "step": 73187
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8375327587127686,
      "learning_rate": 0.0004626120032684355,
      "loss": 2.8108,
      "step": 73188
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8156977891921997,
      "learning_rate": 0.000462608565738436,
      "loss": 2.9883,
      "step": 73189
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7702863216400146,
      "learning_rate": 0.0004626051281782045,
      "loss": 2.8722,
      "step": 73190
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.497023582458496,
      "learning_rate": 0.0004626016905877415,
      "loss": 2.9419,
      "step": 73191
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.742663025856018,
      "learning_rate": 0.00046259825296704785,
      "loss": 3.2209,
      "step": 73192
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7939205169677734,
      "learning_rate": 0.00046259481531612417,
      "loss": 3.0275,
      "step": 73193
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.633229374885559,
      "learning_rate": 0.000462591377634971,
      "loss": 2.7227,
      "step": 73194
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6880106925964355,
      "learning_rate": 0.00046258793992358903,
      "loss": 3.2704,
      "step": 73195
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4727752208709717,
      "learning_rate": 0.0004625845021819789,
      "loss": 3.0242,
      "step": 73196
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.139000654220581,
      "learning_rate": 0.0004625810644101413,
      "loss": 3.1215,
      "step": 73197
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.775966763496399,
      "learning_rate": 0.00046257762660807666,
      "loss": 3.24,
      "step": 73198
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6625343561172485,
      "learning_rate": 0.0004625741887757858,
      "loss": 3.2039,
      "step": 73199
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.142488718032837,
      "learning_rate": 0.00046257075091326943,
      "loss": 2.9293,
      "step": 73200
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1063995361328125,
      "learning_rate": 0.00046256731302052805,
      "loss": 2.892,
      "step": 73201
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6076228618621826,
      "learning_rate": 0.0004625638750975623,
      "loss": 3.1243,
      "step": 73202
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.157968759536743,
      "learning_rate": 0.0004625604371443729,
      "loss": 3.1703,
      "step": 73203
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0355494022369385,
      "learning_rate": 0.00046255699916096044,
      "loss": 2.8924,
      "step": 73204
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9160529375076294,
      "learning_rate": 0.00046255356114732553,
      "loss": 2.8282,
      "step": 73205
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.045809268951416,
      "learning_rate": 0.0004625501231034689,
      "loss": 3.0813,
      "step": 73206
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.627639651298523,
      "learning_rate": 0.00046254668502939116,
      "loss": 3.0991,
      "step": 73207
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6218692064285278,
      "learning_rate": 0.00046254324692509283,
      "loss": 3.0136,
      "step": 73208
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8074398040771484,
      "learning_rate": 0.0004625398087905747,
      "loss": 2.9955,
      "step": 73209
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4101076126098633,
      "learning_rate": 0.00046253637062583736,
      "loss": 2.9645,
      "step": 73210
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6741900444030762,
      "learning_rate": 0.00046253293243088135,
      "loss": 3.1157,
      "step": 73211
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.68876051902771,
      "learning_rate": 0.0004625294942057075,
      "loss": 2.9953,
      "step": 73212
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6947592496871948,
      "learning_rate": 0.0004625260559503163,
      "loss": 2.7507,
      "step": 73213
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4564765691757202,
      "learning_rate": 0.00046252261766470843,
      "loss": 3.0237,
      "step": 73214
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8617409467697144,
      "learning_rate": 0.0004625191793488846,
      "loss": 3.3006,
      "step": 73215
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8138757944107056,
      "learning_rate": 0.0004625157410028454,
      "loss": 2.8836,
      "step": 73216
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3768844604492188,
      "learning_rate": 0.0004625123026265913,
      "loss": 3.0689,
      "step": 73217
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4070866107940674,
      "learning_rate": 0.0004625088642201232,
      "loss": 3.0329,
      "step": 73218
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.944079041481018,
      "learning_rate": 0.00046250542578344164,
      "loss": 2.9959,
      "step": 73219
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4259161949157715,
      "learning_rate": 0.00046250198731654724,
      "loss": 3.2311,
      "step": 73220
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.740007162094116,
      "learning_rate": 0.0004624985488194407,
      "loss": 3.1486,
      "step": 73221
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.328601121902466,
      "learning_rate": 0.0004624951102921225,
      "loss": 3.0209,
      "step": 73222
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4762539863586426,
      "learning_rate": 0.00046249167173459355,
      "loss": 2.6862,
      "step": 73223
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.713443636894226,
      "learning_rate": 0.0004624882331468542,
      "loss": 3.2275,
      "step": 73224
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1018388271331787,
      "learning_rate": 0.00046248479452890527,
      "loss": 3.1768,
      "step": 73225
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.5831587314605713,
      "learning_rate": 0.00046248135588074734,
      "loss": 3.2803,
      "step": 73226
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7354334592819214,
      "learning_rate": 0.00046247791720238103,
      "loss": 3.1021,
      "step": 73227
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.544258713722229,
      "learning_rate": 0.00046247447849380707,
      "loss": 3.0934,
      "step": 73228
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9674677848815918,
      "learning_rate": 0.000462471039755026,
      "loss": 3.1156,
      "step": 73229
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.5317139625549316,
      "learning_rate": 0.0004624676009860385,
      "loss": 2.8694,
      "step": 73230
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2788164615631104,
      "learning_rate": 0.0004624641621868452,
      "loss": 3.1681,
      "step": 73231
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6008942127227783,
      "learning_rate": 0.0004624607233574468,
      "loss": 3.1597,
      "step": 73232
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.328643798828125,
      "learning_rate": 0.0004624572844978438,
      "loss": 2.861,
      "step": 73233
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.861972451210022,
      "learning_rate": 0.0004624538456080369,
      "loss": 3.1108,
      "step": 73234
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.101263999938965,
      "learning_rate": 0.00046245040668802683,
      "loss": 2.7413,
      "step": 73235
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7305703163146973,
      "learning_rate": 0.00046244696773781424,
      "loss": 2.8606,
      "step": 73236
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.078385591506958,
      "learning_rate": 0.00046244352875739956,
      "loss": 2.8349,
      "step": 73237
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1498970985412598,
      "learning_rate": 0.0004624400897467837,
      "loss": 2.7672,
      "step": 73238
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.5596706867218018,
      "learning_rate": 0.000462436650705967,
      "loss": 2.966,
      "step": 73239
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8552377223968506,
      "learning_rate": 0.0004624332116349504,
      "loss": 3.0522,
      "step": 73240
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4649882316589355,
      "learning_rate": 0.0004624297725337343,
      "loss": 2.8593,
      "step": 73241
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3321926593780518,
      "learning_rate": 0.00046242633340231947,
      "loss": 2.9623,
      "step": 73242
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9181016683578491,
      "learning_rate": 0.0004624228942407065,
      "loss": 3.1972,
      "step": 73243
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3534204959869385,
      "learning_rate": 0.00046241945504889607,
      "loss": 3.0688,
      "step": 73244
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.088878870010376,
      "learning_rate": 0.0004624160158268888,
      "loss": 3.0585,
      "step": 73245
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.00142240524292,
      "learning_rate": 0.00046241257657468534,
      "loss": 3.1848,
      "step": 73246
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7246184349060059,
      "learning_rate": 0.0004624091372922863,
      "loss": 3.0516,
      "step": 73247
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.6578943729400635,
      "learning_rate": 0.0004624056979796923,
      "loss": 2.9212,
      "step": 73248
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.887382984161377,
      "learning_rate": 0.0004624022586369041,
      "loss": 2.8139,
      "step": 73249
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.2606115341186523,
      "learning_rate": 0.0004623988192639222,
      "loss": 2.9186,
      "step": 73250
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5231930017471313,
      "learning_rate": 0.0004623953798607473,
      "loss": 3.0096,
      "step": 73251
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.442986488342285,
      "learning_rate": 0.00046239194042737994,
      "loss": 3.0141,
      "step": 73252
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.518838405609131,
      "learning_rate": 0.0004623885009638211,
      "loss": 2.938,
      "step": 73253
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7953180074691772,
      "learning_rate": 0.000462385061470071,
      "loss": 3.0107,
      "step": 73254
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7730985879898071,
      "learning_rate": 0.0004623816219461304,
      "loss": 2.9408,
      "step": 73255
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7111319303512573,
      "learning_rate": 0.0004623781823920001,
      "loss": 3.0666,
      "step": 73256
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7510945796966553,
      "learning_rate": 0.0004623747428076806,
      "loss": 3.2789,
      "step": 73257
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4532872438430786,
      "learning_rate": 0.00046237130319317255,
      "loss": 3.1639,
      "step": 73258
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.696293830871582,
      "learning_rate": 0.0004623678635484767,
      "loss": 3.0141,
      "step": 73259
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6626574993133545,
      "learning_rate": 0.00046236442387359344,
      "loss": 2.8472,
      "step": 73260
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7082141637802124,
      "learning_rate": 0.0004623609841685237,
      "loss": 2.6108,
      "step": 73261
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6370762586593628,
      "learning_rate": 0.000462357544433268,
      "loss": 2.9141,
      "step": 73262
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6000334024429321,
      "learning_rate": 0.0004623541046678268,
      "loss": 3.3017,
      "step": 73263
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2005198001861572,
      "learning_rate": 0.000462350664872201,
      "loss": 2.9635,
      "step": 73264
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0139999389648438,
      "learning_rate": 0.00046234722504639124,
      "loss": 2.8715,
      "step": 73265
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9921936988830566,
      "learning_rate": 0.000462343785190398,
      "loss": 2.994,
      "step": 73266
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8905688524246216,
      "learning_rate": 0.00046234034530422195,
      "loss": 2.772,
      "step": 73267
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4690508842468262,
      "learning_rate": 0.0004623369053878638,
      "loss": 2.9944,
      "step": 73268
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7417222261428833,
      "learning_rate": 0.0004623334654413241,
      "loss": 2.8003,
      "step": 73269
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.717606782913208,
      "learning_rate": 0.0004623300254646036,
      "loss": 3.0132,
      "step": 73270
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5964275598526,
      "learning_rate": 0.00046232658545770287,
      "loss": 3.037,
      "step": 73271
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.672548770904541,
      "learning_rate": 0.0004623231454206226,
      "loss": 3.0384,
      "step": 73272
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6291128396987915,
      "learning_rate": 0.0004623197053533633,
      "loss": 2.8849,
      "step": 73273
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5862581729888916,
      "learning_rate": 0.0004623162652559258,
      "loss": 3.0369,
      "step": 73274
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7700347900390625,
      "learning_rate": 0.0004623128251283106,
      "loss": 2.8996,
      "step": 73275
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5810412168502808,
      "learning_rate": 0.0004623093849705184,
      "loss": 3.0716,
      "step": 73276
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7257200479507446,
      "learning_rate": 0.00046230594478254976,
      "loss": 2.9645,
      "step": 73277
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7907663583755493,
      "learning_rate": 0.0004623025045644055,
      "loss": 3.1713,
      "step": 73278
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7877748012542725,
      "learning_rate": 0.000462299064316086,
      "loss": 3.096,
      "step": 73279
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7054842710494995,
      "learning_rate": 0.00046229562403759215,
      "loss": 3.0407,
      "step": 73280
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6708587408065796,
      "learning_rate": 0.00046229218372892443,
      "loss": 3.1764,
      "step": 73281
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5079361200332642,
      "learning_rate": 0.00046228874339008353,
      "loss": 2.8995,
      "step": 73282
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8440457582473755,
      "learning_rate": 0.0004622853030210701,
      "loss": 2.7753,
      "step": 73283
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8683580160140991,
      "learning_rate": 0.0004622818626218848,
      "loss": 3.2189,
      "step": 73284
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9931786060333252,
      "learning_rate": 0.0004622784221925282,
      "loss": 2.9744,
      "step": 73285
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.519176721572876,
      "learning_rate": 0.000462274981733001,
      "loss": 3.1537,
      "step": 73286
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5538525581359863,
      "learning_rate": 0.0004622715412433038,
      "loss": 3.2119,
      "step": 73287
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.868881106376648,
      "learning_rate": 0.0004622681007234372,
      "loss": 2.9738,
      "step": 73288
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.67940354347229,
      "learning_rate": 0.00046226466017340205,
      "loss": 3.0585,
      "step": 73289
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.3225457668304443,
      "learning_rate": 0.00046226121959319866,
      "loss": 2.9337,
      "step": 73290
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6918201446533203,
      "learning_rate": 0.0004622577789828279,
      "loss": 3.2601,
      "step": 73291
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.561411142349243,
      "learning_rate": 0.0004622543383422905,
      "loss": 3.0719,
      "step": 73292
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.9454087018966675,
      "learning_rate": 0.0004622508976715868,
      "loss": 3.2928,
      "step": 73293
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7176835536956787,
      "learning_rate": 0.0004622474569707176,
      "loss": 2.8765,
      "step": 73294
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5289870500564575,
      "learning_rate": 0.0004622440162396836,
      "loss": 3.0214,
      "step": 73295
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5743223428726196,
      "learning_rate": 0.00046224057547848533,
      "loss": 2.9988,
      "step": 73296
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.4572370052337646,
      "learning_rate": 0.0004622371346871235,
      "loss": 2.7914,
      "step": 73297
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.490397572517395,
      "learning_rate": 0.00046223369386559865,
      "loss": 2.9857,
      "step": 73298
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8172236680984497,
      "learning_rate": 0.00046223025301391163,
      "loss": 3.0974,
      "step": 73299
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0810019969940186,
      "learning_rate": 0.00046222681213206287,
      "loss": 3.0037,
      "step": 73300
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.910043478012085,
      "learning_rate": 0.00046222337122005307,
      "loss": 3.0484,
      "step": 73301
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.143054246902466,
      "learning_rate": 0.0004622199302778829,
      "loss": 2.6837,
      "step": 73302
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.634765863418579,
      "learning_rate": 0.000462216489305553,
      "loss": 3.2499,
      "step": 73303
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.482158899307251,
      "learning_rate": 0.00046221304830306397,
      "loss": 2.9706,
      "step": 73304
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.277702808380127,
      "learning_rate": 0.0004622096072704165,
      "loss": 3.0393,
      "step": 73305
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1915299892425537,
      "learning_rate": 0.00046220616620761115,
      "loss": 3.153,
      "step": 73306
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0527899265289307,
      "learning_rate": 0.0004622027251146486,
      "loss": 3.0895,
      "step": 73307
    },
    {
      "epoch": 0.95,
      "grad_norm": 3.0356578826904297,
      "learning_rate": 0.00046219928399152956,
      "loss": 2.984,
      "step": 73308
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.02498459815979,
      "learning_rate": 0.0004621958428382546,
      "loss": 3.0802,
      "step": 73309
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5878055095672607,
      "learning_rate": 0.00046219240165482436,
      "loss": 2.6868,
      "step": 73310
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5017608404159546,
      "learning_rate": 0.00046218896044123955,
      "loss": 2.9876,
      "step": 73311
    },
    {
      "epoch": 0.95,
      "grad_norm": 6.170851707458496,
      "learning_rate": 0.0004621855191975007,
      "loss": 3.0528,
      "step": 73312
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.7751080989837646,
      "learning_rate": 0.0004621820779236085,
      "loss": 2.9471,
      "step": 73313
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.2406628131866455,
      "learning_rate": 0.00046217863661956354,
      "loss": 2.9479,
      "step": 73314
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5933204889297485,
      "learning_rate": 0.0004621751952853666,
      "loss": 3.107,
      "step": 73315
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.5737481117248535,
      "learning_rate": 0.00046217175392101816,
      "loss": 2.9407,
      "step": 73316
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0572965145111084,
      "learning_rate": 0.00046216831252651893,
      "loss": 2.9592,
      "step": 73317
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8657152652740479,
      "learning_rate": 0.0004621648711018696,
      "loss": 2.8587,
      "step": 73318
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7399080991744995,
      "learning_rate": 0.0004621614296470707,
      "loss": 3.2134,
      "step": 73319
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.0181937217712402,
      "learning_rate": 0.00046215798816212297,
      "loss": 2.8539,
      "step": 73320
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.8721798658370972,
      "learning_rate": 0.000462154546647027,
      "loss": 3.1473,
      "step": 73321
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.6782796382904053,
      "learning_rate": 0.00046215110510178345,
      "loss": 3.339,
      "step": 73322
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6134804487228394,
      "learning_rate": 0.00046214766352639295,
      "loss": 3.1258,
      "step": 73323
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.941680669784546,
      "learning_rate": 0.0004621442219208562,
      "loss": 3.1539,
      "step": 73324
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.7811834812164307,
      "learning_rate": 0.00046214078028517365,
      "loss": 2.886,
      "step": 73325
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4977362155914307,
      "learning_rate": 0.0004621373386193461,
      "loss": 3.0247,
      "step": 73326
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5083454847335815,
      "learning_rate": 0.0004621338969233742,
      "loss": 3.0602,
      "step": 73327
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.294881582260132,
      "learning_rate": 0.00046213045519725857,
      "loss": 3.1634,
      "step": 73328
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.281508445739746,
      "learning_rate": 0.0004621270134409998,
      "loss": 3.0447,
      "step": 73329
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.772626280784607,
      "learning_rate": 0.0004621235716545985,
      "loss": 2.9316,
      "step": 73330
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.114788770675659,
      "learning_rate": 0.0004621201298380554,
      "loss": 3.0237,
      "step": 73331
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.429644823074341,
      "learning_rate": 0.0004621166879913711,
      "loss": 3.193,
      "step": 73332
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5112773180007935,
      "learning_rate": 0.00046211324611454633,
      "loss": 3.124,
      "step": 73333
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5501465797424316,
      "learning_rate": 0.0004621098042075816,
      "loss": 2.9252,
      "step": 73334
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.5841927528381348,
      "learning_rate": 0.0004621063622704776,
      "loss": 3.0694,
      "step": 73335
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.1644036769866943,
      "learning_rate": 0.0004621029203032349,
      "loss": 2.9527,
      "step": 73336
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.5827494859695435,
      "learning_rate": 0.0004620994783058543,
      "loss": 2.8859,
      "step": 73337
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7256877422332764,
      "learning_rate": 0.00046209603627833635,
      "loss": 3.0075,
      "step": 73338
    },
    {
      "epoch": 0.95,
      "grad_norm": 2.960629940032959,
      "learning_rate": 0.00046209259422068166,
      "loss": 2.8852,
      "step": 73339
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6440746784210205,
      "learning_rate": 0.0004620891521328908,
      "loss": 3.2081,
      "step": 73340
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.7602403163909912,
      "learning_rate": 0.0004620857100149646,
      "loss": 2.9897,
      "step": 73341
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.6037598848342896,
      "learning_rate": 0.00046208226786690357,
      "loss": 2.9078,
      "step": 73342
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4818366765975952,
      "learning_rate": 0.0004620788256887084,
      "loss": 3.2055,
      "step": 73343
    },
    {
      "epoch": 0.95,
      "grad_norm": 1.4232637882232666,
      "learning_rate": 0.00046207538348037977,
      "loss": 2.9635,
      "step": 73344
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7431094646453857,
      "learning_rate": 0.0004620719412419182,
      "loss": 3.123,
      "step": 73345
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5975180864334106,
      "learning_rate": 0.0004620684989733244,
      "loss": 3.0399,
      "step": 73346
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4893101453781128,
      "learning_rate": 0.00046206505667459915,
      "loss": 3.0048,
      "step": 73347
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4314241409301758,
      "learning_rate": 0.0004620616143457428,
      "loss": 3.0509,
      "step": 73348
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9531652927398682,
      "learning_rate": 0.0004620581719867561,
      "loss": 3.0264,
      "step": 73349
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5078507661819458,
      "learning_rate": 0.00046205472959763984,
      "loss": 3.214,
      "step": 73350
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.500556230545044,
      "learning_rate": 0.0004620512871783945,
      "loss": 3.0085,
      "step": 73351
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4420976638793945,
      "learning_rate": 0.0004620478447290207,
      "loss": 3.0155,
      "step": 73352
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5558016300201416,
      "learning_rate": 0.0004620444022495193,
      "loss": 3.2509,
      "step": 73353
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8117622137069702,
      "learning_rate": 0.0004620409597398907,
      "loss": 3.2396,
      "step": 73354
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4599722623825073,
      "learning_rate": 0.0004620375172001356,
      "loss": 2.9469,
      "step": 73355
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7767486572265625,
      "learning_rate": 0.0004620340746302547,
      "loss": 3.0407,
      "step": 73356
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5915967226028442,
      "learning_rate": 0.00046203063203024855,
      "loss": 3.2981,
      "step": 73357
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.568069577217102,
      "learning_rate": 0.0004620271894001179,
      "loss": 3.1714,
      "step": 73358
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.180418014526367,
      "learning_rate": 0.0004620237467398634,
      "loss": 3.0941,
      "step": 73359
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5411123037338257,
      "learning_rate": 0.00046202030404948557,
      "loss": 3.101,
      "step": 73360
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8505134582519531,
      "learning_rate": 0.000462016861328985,
      "loss": 2.9848,
      "step": 73361
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9492101669311523,
      "learning_rate": 0.00046201341857836256,
      "loss": 2.9003,
      "step": 73362
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.1107070446014404,
      "learning_rate": 0.00046200997579761875,
      "loss": 2.8133,
      "step": 73363
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7853281497955322,
      "learning_rate": 0.0004620065329867542,
      "loss": 3.0938,
      "step": 73364
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4936045408248901,
      "learning_rate": 0.00046200309014576965,
      "loss": 3.0223,
      "step": 73365
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.724015235900879,
      "learning_rate": 0.00046199964727466555,
      "loss": 2.9689,
      "step": 73366
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.809610366821289,
      "learning_rate": 0.00046199620437344276,
      "loss": 3.1139,
      "step": 73367
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.003225564956665,
      "learning_rate": 0.00046199276144210174,
      "loss": 2.9964,
      "step": 73368
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4615219831466675,
      "learning_rate": 0.0004619893184806433,
      "loss": 2.9015,
      "step": 73369
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4477444887161255,
      "learning_rate": 0.0004619858754890679,
      "loss": 3.0268,
      "step": 73370
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9423727989196777,
      "learning_rate": 0.0004619824324673763,
      "loss": 2.7904,
      "step": 73371
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7122472524642944,
      "learning_rate": 0.00046197898941556916,
      "loss": 3.0305,
      "step": 73372
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6197292804718018,
      "learning_rate": 0.00046197554633364694,
      "loss": 2.6973,
      "step": 73373
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.58082914352417,
      "learning_rate": 0.0004619721032216105,
      "loss": 3.009,
      "step": 73374
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4009160995483398,
      "learning_rate": 0.00046196866007946043,
      "loss": 2.8827,
      "step": 73375
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.5062851905822754,
      "learning_rate": 0.0004619652169071973,
      "loss": 2.9454,
      "step": 73376
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8814343214035034,
      "learning_rate": 0.00046196177370482166,
      "loss": 2.9339,
      "step": 73377
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.962270975112915,
      "learning_rate": 0.0004619583304723344,
      "loss": 3.0435,
      "step": 73378
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.626707673072815,
      "learning_rate": 0.000461954887209736,
      "loss": 2.9486,
      "step": 73379
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3370935916900635,
      "learning_rate": 0.00046195144391702705,
      "loss": 3.0712,
      "step": 73380
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8510448932647705,
      "learning_rate": 0.00046194800059420844,
      "loss": 3.0152,
      "step": 73381
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6435341835021973,
      "learning_rate": 0.0004619445572412805,
      "loss": 2.9931,
      "step": 73382
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.171570301055908,
      "learning_rate": 0.000461941113858244,
      "loss": 2.9821,
      "step": 73383
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.747637987136841,
      "learning_rate": 0.0004619376704450997,
      "loss": 2.9305,
      "step": 73384
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6566921472549438,
      "learning_rate": 0.00046193422700184804,
      "loss": 3.1933,
      "step": 73385
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.636179208755493,
      "learning_rate": 0.0004619307835284898,
      "loss": 2.9408,
      "step": 73386
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.931243658065796,
      "learning_rate": 0.0004619273400250255,
      "loss": 3.2668,
      "step": 73387
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5938856601715088,
      "learning_rate": 0.000461923896491456,
      "loss": 3.2048,
      "step": 73388
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7280213832855225,
      "learning_rate": 0.00046192045292778165,
      "loss": 3.1186,
      "step": 73389
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.767273426055908,
      "learning_rate": 0.00046191700933400325,
      "loss": 2.9231,
      "step": 73390
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.620195150375366,
      "learning_rate": 0.0004619135657101215,
      "loss": 2.9917,
      "step": 73391
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.003290891647339,
      "learning_rate": 0.00046191012205613685,
      "loss": 2.9881,
      "step": 73392
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3374032974243164,
      "learning_rate": 0.00046190667837205015,
      "loss": 3.2045,
      "step": 73393
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.02126407623291,
      "learning_rate": 0.0004619032346578618,
      "loss": 2.9636,
      "step": 73394
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8952592611312866,
      "learning_rate": 0.00046189979091357274,
      "loss": 3.2673,
      "step": 73395
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8205715417861938,
      "learning_rate": 0.00046189634713918344,
      "loss": 2.9331,
      "step": 73396
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4727299213409424,
      "learning_rate": 0.0004618929033346944,
      "loss": 3.0831,
      "step": 73397
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5158665180206299,
      "learning_rate": 0.0004618894595001065,
      "loss": 2.9945,
      "step": 73398
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6952248811721802,
      "learning_rate": 0.0004618860156354204,
      "loss": 3.04,
      "step": 73399
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4658565521240234,
      "learning_rate": 0.00046188257174063654,
      "loss": 3.1214,
      "step": 73400
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4330967664718628,
      "learning_rate": 0.0004618791278157557,
      "loss": 3.0493,
      "step": 73401
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8139219284057617,
      "learning_rate": 0.00046187568386077844,
      "loss": 2.918,
      "step": 73402
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7346477508544922,
      "learning_rate": 0.00046187223987570534,
      "loss": 2.9064,
      "step": 73403
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6520484685897827,
      "learning_rate": 0.00046186879586053724,
      "loss": 2.9045,
      "step": 73404
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.417840838432312,
      "learning_rate": 0.0004618653518152747,
      "loss": 3.1191,
      "step": 73405
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.794108510017395,
      "learning_rate": 0.00046186190773991827,
      "loss": 2.9902,
      "step": 73406
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5723990201950073,
      "learning_rate": 0.0004618584636344687,
      "loss": 2.9429,
      "step": 73407
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2701597213745117,
      "learning_rate": 0.0004618550194989266,
      "loss": 2.9283,
      "step": 73408
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5419634580612183,
      "learning_rate": 0.0004618515753332926,
      "loss": 3.1136,
      "step": 73409
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6072793006896973,
      "learning_rate": 0.0004618481311375672,
      "loss": 3.0698,
      "step": 73410
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.692274570465088,
      "learning_rate": 0.0004618446869117514,
      "loss": 2.9436,
      "step": 73411
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.292149782180786,
      "learning_rate": 0.0004618412426558455,
      "loss": 3.1232,
      "step": 73412
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8857914209365845,
      "learning_rate": 0.0004618377983698502,
      "loss": 3.171,
      "step": 73413
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3307790756225586,
      "learning_rate": 0.00046183435405376634,
      "loss": 3.0371,
      "step": 73414
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.713192343711853,
      "learning_rate": 0.0004618309097075943,
      "loss": 2.7646,
      "step": 73415
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6822230815887451,
      "learning_rate": 0.0004618274653313348,
      "loss": 2.7765,
      "step": 73416
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6968783140182495,
      "learning_rate": 0.0004618240209249887,
      "loss": 3.114,
      "step": 73417
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.696993350982666,
      "learning_rate": 0.0004618205764885564,
      "loss": 3.1729,
      "step": 73418
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7140779495239258,
      "learning_rate": 0.0004618171320220385,
      "loss": 3.0057,
      "step": 73419
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6269570589065552,
      "learning_rate": 0.00046181368752543577,
      "loss": 2.7114,
      "step": 73420
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9793676137924194,
      "learning_rate": 0.00046181024299874883,
      "loss": 3.0268,
      "step": 73421
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5340301990509033,
      "learning_rate": 0.0004618067984419783,
      "loss": 2.7061,
      "step": 73422
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5848966836929321,
      "learning_rate": 0.00046180335385512494,
      "loss": 3.3607,
      "step": 73423
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2324070930480957,
      "learning_rate": 0.0004617999092381891,
      "loss": 3.0381,
      "step": 73424
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5985357761383057,
      "learning_rate": 0.0004617964645911718,
      "loss": 3.102,
      "step": 73425
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.537926197052002,
      "learning_rate": 0.00046179301991407343,
      "loss": 2.8937,
      "step": 73426
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.609165906906128,
      "learning_rate": 0.0004617895752068946,
      "loss": 3.268,
      "step": 73427
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4410508871078491,
      "learning_rate": 0.00046178613046963607,
      "loss": 2.8983,
      "step": 73428
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.667511224746704,
      "learning_rate": 0.0004617826857022984,
      "loss": 2.9255,
      "step": 73429
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.582940697669983,
      "learning_rate": 0.00046177924090488236,
      "loss": 2.9363,
      "step": 73430
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.167390823364258,
      "learning_rate": 0.00046177579607738846,
      "loss": 3.1635,
      "step": 73431
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.732319712638855,
      "learning_rate": 0.0004617723512198174,
      "loss": 3.0611,
      "step": 73432
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.444488763809204,
      "learning_rate": 0.0004617689063321698,
      "loss": 3.3242,
      "step": 73433
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.144575595855713,
      "learning_rate": 0.0004617654614144463,
      "loss": 2.8671,
      "step": 73434
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9928853511810303,
      "learning_rate": 0.00046176201646664755,
      "loss": 2.9841,
      "step": 73435
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.628434419631958,
      "learning_rate": 0.0004617585714887742,
      "loss": 2.9747,
      "step": 73436
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.931708335876465,
      "learning_rate": 0.00046175512648082684,
      "loss": 2.9314,
      "step": 73437
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.905120849609375,
      "learning_rate": 0.0004617516814428062,
      "loss": 2.9037,
      "step": 73438
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.859877109527588,
      "learning_rate": 0.00046174823637471293,
      "loss": 3.1464,
      "step": 73439
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.567346215248108,
      "learning_rate": 0.00046174479127654753,
      "loss": 3.1055,
      "step": 73440
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5008349418640137,
      "learning_rate": 0.00046174134614831064,
      "loss": 3.0693,
      "step": 73441
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.613391637802124,
      "learning_rate": 0.00046173790099000313,
      "loss": 3.03,
      "step": 73442
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6988441944122314,
      "learning_rate": 0.00046173445580162533,
      "loss": 3.1309,
      "step": 73443
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9345195293426514,
      "learning_rate": 0.00046173101058317816,
      "loss": 2.9686,
      "step": 73444
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.117429733276367,
      "learning_rate": 0.0004617275653346621,
      "loss": 2.9901,
      "step": 73445
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5044848918914795,
      "learning_rate": 0.0004617241200560779,
      "loss": 3.0037,
      "step": 73446
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8509474992752075,
      "learning_rate": 0.00046172067474742605,
      "loss": 2.9706,
      "step": 73447
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6755328178405762,
      "learning_rate": 0.00046171722940870736,
      "loss": 2.9824,
      "step": 73448
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6624988317489624,
      "learning_rate": 0.0004617137840399223,
      "loss": 3.2018,
      "step": 73449
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.536186695098877,
      "learning_rate": 0.00046171033864107156,
      "loss": 3.1202,
      "step": 73450
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5402302742004395,
      "learning_rate": 0.000461706893212156,
      "loss": 3.002,
      "step": 73451
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5877686738967896,
      "learning_rate": 0.0004617034477531759,
      "loss": 3.1628,
      "step": 73452
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.621448040008545,
      "learning_rate": 0.0004617000022641321,
      "loss": 2.9553,
      "step": 73453
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4978690147399902,
      "learning_rate": 0.00046169655674502524,
      "loss": 2.9833,
      "step": 73454
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.027074098587036,
      "learning_rate": 0.00046169311119585596,
      "loss": 3.1053,
      "step": 73455
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.239443778991699,
      "learning_rate": 0.0004616896656166248,
      "loss": 3.2337,
      "step": 73456
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7261466979980469,
      "learning_rate": 0.00046168622000733266,
      "loss": 2.9976,
      "step": 73457
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6271179914474487,
      "learning_rate": 0.0004616827743679799,
      "loss": 3.0397,
      "step": 73458
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8521696329116821,
      "learning_rate": 0.00046167932869856717,
      "loss": 2.8147,
      "step": 73459
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5009149312973022,
      "learning_rate": 0.00046167588299909527,
      "loss": 3.0706,
      "step": 73460
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.52101731300354,
      "learning_rate": 0.0004616724372695648,
      "loss": 3.1706,
      "step": 73461
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.7407619953155518,
      "learning_rate": 0.0004616689915099763,
      "loss": 3.0524,
      "step": 73462
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9175715446472168,
      "learning_rate": 0.00046166554572033057,
      "loss": 2.862,
      "step": 73463
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.603656530380249,
      "learning_rate": 0.0004616620999006282,
      "loss": 2.9136,
      "step": 73464
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8138140439987183,
      "learning_rate": 0.00046165865405086966,
      "loss": 2.9267,
      "step": 73465
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.567378044128418,
      "learning_rate": 0.00046165520817105586,
      "loss": 3.1891,
      "step": 73466
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8821687698364258,
      "learning_rate": 0.0004616517622611872,
      "loss": 3.1303,
      "step": 73467
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2798047065734863,
      "learning_rate": 0.00046164831632126437,
      "loss": 3.1594,
      "step": 73468
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5136622190475464,
      "learning_rate": 0.0004616448703512882,
      "loss": 3.1032,
      "step": 73469
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3465834856033325,
      "learning_rate": 0.00046164142435125915,
      "loss": 2.8328,
      "step": 73470
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.879906177520752,
      "learning_rate": 0.0004616379783211779,
      "loss": 3.2124,
      "step": 73471
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7944257259368896,
      "learning_rate": 0.0004616345322610451,
      "loss": 3.0602,
      "step": 73472
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.436807632446289,
      "learning_rate": 0.0004616310861708614,
      "loss": 2.7919,
      "step": 73473
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.94080650806427,
      "learning_rate": 0.0004616276400506274,
      "loss": 2.8175,
      "step": 73474
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.546800136566162,
      "learning_rate": 0.0004616241939003438,
      "loss": 3.3478,
      "step": 73475
    },
    {
      "epoch": 0.96,
      "grad_norm": 4.966271877288818,
      "learning_rate": 0.0004616207477200113,
      "loss": 3.0405,
      "step": 73476
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.4434173107147217,
      "learning_rate": 0.0004616173015096303,
      "loss": 2.9864,
      "step": 73477
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.6456499099731445,
      "learning_rate": 0.00046161385526920164,
      "loss": 3.0751,
      "step": 73478
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2219767570495605,
      "learning_rate": 0.0004616104089987259,
      "loss": 2.8063,
      "step": 73479
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.011214256286621,
      "learning_rate": 0.00046160696269820375,
      "loss": 3.0499,
      "step": 73480
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9278349876403809,
      "learning_rate": 0.00046160351636763587,
      "loss": 3.0064,
      "step": 73481
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4395893812179565,
      "learning_rate": 0.00046160007000702275,
      "loss": 3.3831,
      "step": 73482
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.568205714225769,
      "learning_rate": 0.0004615966236163652,
      "loss": 3.0532,
      "step": 73483
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.0455684661865234,
      "learning_rate": 0.0004615931771956638,
      "loss": 3.2731,
      "step": 73484
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5292497873306274,
      "learning_rate": 0.00046158973074491904,
      "loss": 2.9405,
      "step": 73485
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3659632205963135,
      "learning_rate": 0.0004615862842641318,
      "loss": 2.971,
      "step": 73486
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4068634510040283,
      "learning_rate": 0.0004615828377533027,
      "loss": 3.0684,
      "step": 73487
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.245842456817627,
      "learning_rate": 0.0004615793912124322,
      "loss": 2.9275,
      "step": 73488
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4811841249465942,
      "learning_rate": 0.00046157594464152097,
      "loss": 3.0088,
      "step": 73489
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5240603685379028,
      "learning_rate": 0.00046157249804056993,
      "loss": 3.166,
      "step": 73490
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9814473390579224,
      "learning_rate": 0.00046156905140957934,
      "loss": 3.1892,
      "step": 73491
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.61370050907135,
      "learning_rate": 0.0004615656047485501,
      "loss": 3.3911,
      "step": 73492
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4788947105407715,
      "learning_rate": 0.00046156215805748273,
      "loss": 3.1123,
      "step": 73493
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5180768966674805,
      "learning_rate": 0.0004615587113363779,
      "loss": 3.0607,
      "step": 73494
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.321732997894287,
      "learning_rate": 0.00046155526458523626,
      "loss": 3.1165,
      "step": 73495
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.58107328414917,
      "learning_rate": 0.0004615518178040585,
      "loss": 3.109,
      "step": 73496
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4935599565505981,
      "learning_rate": 0.00046154837099284516,
      "loss": 2.8406,
      "step": 73497
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.600573182106018,
      "learning_rate": 0.0004615449241515969,
      "loss": 3.0586,
      "step": 73498
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5543267726898193,
      "learning_rate": 0.0004615414772803144,
      "loss": 3.0114,
      "step": 73499
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3965067863464355,
      "learning_rate": 0.0004615380303789984,
      "loss": 3.0353,
      "step": 73500
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6042245626449585,
      "learning_rate": 0.00046153458344764923,
      "loss": 3.2153,
      "step": 73501
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7226027250289917,
      "learning_rate": 0.00046153113648626787,
      "loss": 3.018,
      "step": 73502
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.603186011314392,
      "learning_rate": 0.0004615276894948548,
      "loss": 3.0261,
      "step": 73503
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5326489210128784,
      "learning_rate": 0.00046152424247341065,
      "loss": 3.0202,
      "step": 73504
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.852195382118225,
      "learning_rate": 0.00046152079542193615,
      "loss": 2.7193,
      "step": 73505
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6550631523132324,
      "learning_rate": 0.00046151734834043185,
      "loss": 3.1071,
      "step": 73506
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8451896905899048,
      "learning_rate": 0.0004615139012288984,
      "loss": 3.4229,
      "step": 73507
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4501352310180664,
      "learning_rate": 0.0004615104540873365,
      "loss": 2.8994,
      "step": 73508
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5324621200561523,
      "learning_rate": 0.00046150700691574684,
      "loss": 3.0587,
      "step": 73509
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8327724933624268,
      "learning_rate": 0.0004615035597141299,
      "loss": 3.2382,
      "step": 73510
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.0819671154022217,
      "learning_rate": 0.00046150011248248627,
      "loss": 3.0912,
      "step": 73511
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5985201597213745,
      "learning_rate": 0.00046149666522081695,
      "loss": 2.8066,
      "step": 73512
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.076528787612915,
      "learning_rate": 0.00046149321792912217,
      "loss": 2.9902,
      "step": 73513
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5706144571304321,
      "learning_rate": 0.0004614897706074029,
      "loss": 2.9459,
      "step": 73514
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.1390039920806885,
      "learning_rate": 0.0004614863232556596,
      "loss": 2.8292,
      "step": 73515
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9045406579971313,
      "learning_rate": 0.0004614828758738929,
      "loss": 3.093,
      "step": 73516
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.0451245307922363,
      "learning_rate": 0.0004614794284621034,
      "loss": 3.1084,
      "step": 73517
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.9515271186828613,
      "learning_rate": 0.000461475981020292,
      "loss": 2.9151,
      "step": 73518
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7324687242507935,
      "learning_rate": 0.00046147253354845904,
      "loss": 3.1526,
      "step": 73519
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.21097469329834,
      "learning_rate": 0.00046146908604660533,
      "loss": 3.0513,
      "step": 73520
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.6142640113830566,
      "learning_rate": 0.0004614656385147315,
      "loss": 3.1335,
      "step": 73521
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5093026161193848,
      "learning_rate": 0.0004614621909528381,
      "loss": 3.0808,
      "step": 73522
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6442041397094727,
      "learning_rate": 0.0004614587433609259,
      "loss": 3.1025,
      "step": 73523
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.641937494277954,
      "learning_rate": 0.0004614552957389954,
      "loss": 2.7466,
      "step": 73524
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3631441593170166,
      "learning_rate": 0.00046145184808704735,
      "loss": 3.0654,
      "step": 73525
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7123830318450928,
      "learning_rate": 0.0004614484004050824,
      "loss": 3.0572,
      "step": 73526
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3547234535217285,
      "learning_rate": 0.00046144495269310115,
      "loss": 2.9177,
      "step": 73527
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.4517667293548584,
      "learning_rate": 0.0004614415049511041,
      "loss": 3.0221,
      "step": 73528
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5419548749923706,
      "learning_rate": 0.00046143805717909213,
      "loss": 3.0109,
      "step": 73529
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4907350540161133,
      "learning_rate": 0.0004614346093770657,
      "loss": 3.1642,
      "step": 73530
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9756158590316772,
      "learning_rate": 0.0004614311615450256,
      "loss": 3.2262,
      "step": 73531
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7500466108322144,
      "learning_rate": 0.0004614277136829724,
      "loss": 3.0711,
      "step": 73532
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6735727787017822,
      "learning_rate": 0.0004614242657909068,
      "loss": 3.0776,
      "step": 73533
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9461926221847534,
      "learning_rate": 0.00046142081786882923,
      "loss": 3.0977,
      "step": 73534
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9373139142990112,
      "learning_rate": 0.00046141736991674057,
      "loss": 3.0516,
      "step": 73535
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.704370379447937,
      "learning_rate": 0.0004614139219346414,
      "loss": 3.179,
      "step": 73536
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.709730863571167,
      "learning_rate": 0.00046141047392253223,
      "loss": 3.2119,
      "step": 73537
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9372501373291016,
      "learning_rate": 0.0004614070258804139,
      "loss": 3.0304,
      "step": 73538
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7762106657028198,
      "learning_rate": 0.000461403577808287,
      "loss": 3.0687,
      "step": 73539
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.02579927444458,
      "learning_rate": 0.000461400129706152,
      "loss": 2.9826,
      "step": 73540
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4553351402282715,
      "learning_rate": 0.0004613966815740098,
      "loss": 3.124,
      "step": 73541
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7624315023422241,
      "learning_rate": 0.00046139323341186085,
      "loss": 3.2532,
      "step": 73542
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.554302215576172,
      "learning_rate": 0.00046138978521970575,
      "loss": 2.7687,
      "step": 73543
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.7181508541107178,
      "learning_rate": 0.0004613863369975454,
      "loss": 3.1042,
      "step": 73544
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5660203695297241,
      "learning_rate": 0.0004613828887453802,
      "loss": 2.997,
      "step": 73545
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.4959516525268555,
      "learning_rate": 0.00046137944046321087,
      "loss": 3.012,
      "step": 73546
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3814773559570312,
      "learning_rate": 0.0004613759921510381,
      "loss": 3.0096,
      "step": 73547
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.1250438690185547,
      "learning_rate": 0.00046137254380886246,
      "loss": 3.0433,
      "step": 73548
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5313472747802734,
      "learning_rate": 0.00046136909543668464,
      "loss": 2.9475,
      "step": 73549
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.5601274967193604,
      "learning_rate": 0.0004613656470345052,
      "loss": 3.0734,
      "step": 73550
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.9278340339660645,
      "learning_rate": 0.0004613621986023249,
      "loss": 2.986,
      "step": 73551
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.060819149017334,
      "learning_rate": 0.0004613587501401443,
      "loss": 3.1581,
      "step": 73552
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.03637957572937,
      "learning_rate": 0.0004613553016479641,
      "loss": 3.255,
      "step": 73553
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.438704252243042,
      "learning_rate": 0.00046135185312578495,
      "loss": 2.811,
      "step": 73554
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5649343729019165,
      "learning_rate": 0.00046134840457360724,
      "loss": 2.8093,
      "step": 73555
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.127575159072876,
      "learning_rate": 0.00046134495599143194,
      "loss": 2.9423,
      "step": 73556
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9463850259780884,
      "learning_rate": 0.0004613415073792596,
      "loss": 2.8082,
      "step": 73557
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5315454006195068,
      "learning_rate": 0.0004613380587370908,
      "loss": 2.9477,
      "step": 73558
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9809881448745728,
      "learning_rate": 0.0004613346100649262,
      "loss": 2.8823,
      "step": 73559
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7793595790863037,
      "learning_rate": 0.00046133116136276646,
      "loss": 2.9717,
      "step": 73560
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5258400440216064,
      "learning_rate": 0.0004613277126306122,
      "loss": 2.7623,
      "step": 73561
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6901150941848755,
      "learning_rate": 0.00046132426386846407,
      "loss": 3.2982,
      "step": 73562
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.6215126514434814,
      "learning_rate": 0.00046132081507632277,
      "loss": 2.7141,
      "step": 73563
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.796409845352173,
      "learning_rate": 0.00046131736625418886,
      "loss": 3.0888,
      "step": 73564
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.958611011505127,
      "learning_rate": 0.000461313917402063,
      "loss": 3.1227,
      "step": 73565
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.985329270362854,
      "learning_rate": 0.00046131046851994575,
      "loss": 2.9064,
      "step": 73566
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.638474941253662,
      "learning_rate": 0.0004613070196078379,
      "loss": 2.9501,
      "step": 73567
    },
    {
      "epoch": 0.96,
      "grad_norm": 3.00162672996521,
      "learning_rate": 0.00046130357066574007,
      "loss": 2.8601,
      "step": 73568
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9343783855438232,
      "learning_rate": 0.0004613001216936528,
      "loss": 3.1494,
      "step": 73569
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7124038934707642,
      "learning_rate": 0.00046129667269157685,
      "loss": 3.1243,
      "step": 73570
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5599242448806763,
      "learning_rate": 0.0004612932236595128,
      "loss": 2.942,
      "step": 73571
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5367088317871094,
      "learning_rate": 0.00046128977459746124,
      "loss": 2.7878,
      "step": 73572
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7066948413848877,
      "learning_rate": 0.0004612863255054229,
      "loss": 3.1247,
      "step": 73573
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5880118608474731,
      "learning_rate": 0.0004612828763833984,
      "loss": 2.9494,
      "step": 73574
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9052808284759521,
      "learning_rate": 0.0004612794272313884,
      "loss": 3.2846,
      "step": 73575
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.10544490814209,
      "learning_rate": 0.0004612759780493934,
      "loss": 3.0983,
      "step": 73576
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6214244365692139,
      "learning_rate": 0.0004612725288374143,
      "loss": 2.9931,
      "step": 73577
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.543850302696228,
      "learning_rate": 0.0004612690795954514,
      "loss": 2.9618,
      "step": 73578
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5921258926391602,
      "learning_rate": 0.0004612656303235057,
      "loss": 3.0774,
      "step": 73579
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6778004169464111,
      "learning_rate": 0.00046126218102157765,
      "loss": 2.9598,
      "step": 73580
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7480614185333252,
      "learning_rate": 0.0004612587316896678,
      "loss": 2.9073,
      "step": 73581
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8163349628448486,
      "learning_rate": 0.00046125528232777703,
      "loss": 3.1741,
      "step": 73582
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6728203296661377,
      "learning_rate": 0.00046125183293590583,
      "loss": 2.7807,
      "step": 73583
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7990975379943848,
      "learning_rate": 0.00046124838351405486,
      "loss": 2.9589,
      "step": 73584
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.637602686882019,
      "learning_rate": 0.00046124493406222484,
      "loss": 2.9382,
      "step": 73585
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6384013891220093,
      "learning_rate": 0.00046124148458041625,
      "loss": 2.814,
      "step": 73586
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.190425395965576,
      "learning_rate": 0.00046123803506862985,
      "loss": 3.163,
      "step": 73587
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.622611165046692,
      "learning_rate": 0.0004612345855268663,
      "loss": 3.1701,
      "step": 73588
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9256374835968018,
      "learning_rate": 0.0004612311359551261,
      "loss": 3.3503,
      "step": 73589
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.268075466156006,
      "learning_rate": 0.00046122768635341007,
      "loss": 2.9813,
      "step": 73590
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.155033588409424,
      "learning_rate": 0.00046122423672171877,
      "loss": 2.8221,
      "step": 73591
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6750483512878418,
      "learning_rate": 0.0004612207870600528,
      "loss": 2.8864,
      "step": 73592
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6787593364715576,
      "learning_rate": 0.00046121733736841287,
      "loss": 2.9412,
      "step": 73593
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6646888256072998,
      "learning_rate": 0.00046121388764679963,
      "loss": 2.9291,
      "step": 73594
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.580986738204956,
      "learning_rate": 0.00046121043789521356,
      "loss": 3.0001,
      "step": 73595
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5105293989181519,
      "learning_rate": 0.00046120698811365554,
      "loss": 2.8137,
      "step": 73596
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.571959376335144,
      "learning_rate": 0.0004612035383021261,
      "loss": 2.9997,
      "step": 73597
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6163711547851562,
      "learning_rate": 0.0004612000884606258,
      "loss": 2.9411,
      "step": 73598
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5377833843231201,
      "learning_rate": 0.0004611966385891554,
      "loss": 3.2372,
      "step": 73599
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.4292473793029785,
      "learning_rate": 0.0004611931886877155,
      "loss": 2.9822,
      "step": 73600
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7741458415985107,
      "learning_rate": 0.00046118973875630676,
      "loss": 2.9533,
      "step": 73601
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.128474712371826,
      "learning_rate": 0.0004611862887949298,
      "loss": 2.796,
      "step": 73602
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7940796613693237,
      "learning_rate": 0.00046118283880358527,
      "loss": 3.0499,
      "step": 73603
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3249049186706543,
      "learning_rate": 0.0004611793887822738,
      "loss": 3.0164,
      "step": 73604
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.678542971611023,
      "learning_rate": 0.000461175938730996,
      "loss": 3.1359,
      "step": 73605
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7337896823883057,
      "learning_rate": 0.0004611724886497526,
      "loss": 3.0196,
      "step": 73606
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6483325958251953,
      "learning_rate": 0.0004611690385385442,
      "loss": 3.1146,
      "step": 73607
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6052278280258179,
      "learning_rate": 0.0004611655883973714,
      "loss": 2.9353,
      "step": 73608
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6440372467041016,
      "learning_rate": 0.0004611621382262349,
      "loss": 2.7283,
      "step": 73609
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5718408823013306,
      "learning_rate": 0.0004611586880251353,
      "loss": 3.1373,
      "step": 73610
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6981230974197388,
      "learning_rate": 0.00046115523779407326,
      "loss": 3.0619,
      "step": 73611
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.495222806930542,
      "learning_rate": 0.00046115178753304944,
      "loss": 3.1957,
      "step": 73612
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7753132581710815,
      "learning_rate": 0.0004611483372420644,
      "loss": 2.8995,
      "step": 73613
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7352348566055298,
      "learning_rate": 0.0004611448869211189,
      "loss": 3.0642,
      "step": 73614
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.082637071609497,
      "learning_rate": 0.0004611414365702135,
      "loss": 3.2348,
      "step": 73615
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.0824477672576904,
      "learning_rate": 0.0004611379861893489,
      "loss": 2.9011,
      "step": 73616
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9744445085525513,
      "learning_rate": 0.00046113453577852564,
      "loss": 2.9325,
      "step": 73617
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.499874472618103,
      "learning_rate": 0.00046113108533774445,
      "loss": 2.9923,
      "step": 73618
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7979682683944702,
      "learning_rate": 0.000461127634867006,
      "loss": 3.0589,
      "step": 73619
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5845873355865479,
      "learning_rate": 0.00046112418436631083,
      "loss": 3.1742,
      "step": 73620
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8359090089797974,
      "learning_rate": 0.0004611207338356597,
      "loss": 2.999,
      "step": 73621
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.474676251411438,
      "learning_rate": 0.0004611172832750531,
      "loss": 3.0129,
      "step": 73622
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7342921495437622,
      "learning_rate": 0.00046111383268449175,
      "loss": 3.0424,
      "step": 73623
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6674882173538208,
      "learning_rate": 0.0004611103820639764,
      "loss": 3.1571,
      "step": 73624
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7184247970581055,
      "learning_rate": 0.0004611069314135075,
      "loss": 2.966,
      "step": 73625
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.0352694988250732,
      "learning_rate": 0.0004611034807330858,
      "loss": 2.8873,
      "step": 73626
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4082889556884766,
      "learning_rate": 0.000461100030022712,
      "loss": 3.292,
      "step": 73627
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5405693054199219,
      "learning_rate": 0.0004610965792823865,
      "loss": 3.066,
      "step": 73628
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.615936517715454,
      "learning_rate": 0.00046109312851211017,
      "loss": 3.0752,
      "step": 73629
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7875092029571533,
      "learning_rate": 0.00046108967771188366,
      "loss": 3.2535,
      "step": 73630
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4773762226104736,
      "learning_rate": 0.0004610862268817075,
      "loss": 2.9976,
      "step": 73631
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2018541097640991,
      "learning_rate": 0.0004610827760215824,
      "loss": 2.9857,
      "step": 73632
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.090912342071533,
      "learning_rate": 0.00046107932513150883,
      "loss": 3.2806,
      "step": 73633
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.58771812915802,
      "learning_rate": 0.0004610758742114878,
      "loss": 3.1371,
      "step": 73634
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7287603616714478,
      "learning_rate": 0.00046107242326151956,
      "loss": 2.8561,
      "step": 73635
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5694738626480103,
      "learning_rate": 0.00046106897228160497,
      "loss": 3.0572,
      "step": 73636
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.186962366104126,
      "learning_rate": 0.0004610655212717446,
      "loss": 3.0438,
      "step": 73637
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9640601873397827,
      "learning_rate": 0.00046106207023193906,
      "loss": 3.101,
      "step": 73638
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5675190687179565,
      "learning_rate": 0.0004610586191621891,
      "loss": 3.1243,
      "step": 73639
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.1675446033477783,
      "learning_rate": 0.00046105516806249527,
      "loss": 3.054,
      "step": 73640
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.979103684425354,
      "learning_rate": 0.00046105171693285827,
      "loss": 3.1101,
      "step": 73641
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.028301239013672,
      "learning_rate": 0.0004610482657732787,
      "loss": 2.7977,
      "step": 73642
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6455391645431519,
      "learning_rate": 0.00046104481458375735,
      "loss": 2.9757,
      "step": 73643
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3918824195861816,
      "learning_rate": 0.0004610413633642946,
      "loss": 3.1529,
      "step": 73644
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6823945045471191,
      "learning_rate": 0.00046103791211489113,
      "loss": 2.961,
      "step": 73645
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6041693687438965,
      "learning_rate": 0.00046103446083554787,
      "loss": 3.0747,
      "step": 73646
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.851085901260376,
      "learning_rate": 0.0004610310095262651,
      "loss": 3.0508,
      "step": 73647
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.693535327911377,
      "learning_rate": 0.0004610275581870437,
      "loss": 2.9317,
      "step": 73648
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6287598609924316,
      "learning_rate": 0.0004610241068178843,
      "loss": 2.8589,
      "step": 73649
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.0676181316375732,
      "learning_rate": 0.0004610206554187874,
      "loss": 2.9532,
      "step": 73650
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.38736093044281,
      "learning_rate": 0.00046101720398975367,
      "loss": 2.7366,
      "step": 73651
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.82185697555542,
      "learning_rate": 0.00046101375253078386,
      "loss": 2.8629,
      "step": 73652
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7440367937088013,
      "learning_rate": 0.00046101030104187863,
      "loss": 2.9328,
      "step": 73653
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8918936252593994,
      "learning_rate": 0.0004610068495230384,
      "loss": 2.8562,
      "step": 73654
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6619985103607178,
      "learning_rate": 0.00046100339797426403,
      "loss": 3.2421,
      "step": 73655
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4432426691055298,
      "learning_rate": 0.00046099994639555613,
      "loss": 3.1909,
      "step": 73656
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.715743899345398,
      "learning_rate": 0.00046099649478691524,
      "loss": 2.6937,
      "step": 73657
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5177942514419556,
      "learning_rate": 0.00046099304314834214,
      "loss": 3.1522,
      "step": 73658
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6428776979446411,
      "learning_rate": 0.00046098959147983725,
      "loss": 2.5873,
      "step": 73659
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4451532363891602,
      "learning_rate": 0.0004609861397814015,
      "loss": 2.9302,
      "step": 73660
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.229843854904175,
      "learning_rate": 0.0004609826880530353,
      "loss": 3.2195,
      "step": 73661
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8629229068756104,
      "learning_rate": 0.00046097923629473945,
      "loss": 3.2315,
      "step": 73662
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.975445032119751,
      "learning_rate": 0.00046097578450651446,
      "loss": 3.3372,
      "step": 73663
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8416765928268433,
      "learning_rate": 0.00046097233268836105,
      "loss": 3.0915,
      "step": 73664
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.040414571762085,
      "learning_rate": 0.00046096888084027986,
      "loss": 2.9123,
      "step": 73665
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7557941675186157,
      "learning_rate": 0.0004609654289622715,
      "loss": 3.1805,
      "step": 73666
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.829965591430664,
      "learning_rate": 0.00046096197705433665,
      "loss": 3.1383,
      "step": 73667
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6316797733306885,
      "learning_rate": 0.0004609585251164759,
      "loss": 2.9703,
      "step": 73668
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6769695281982422,
      "learning_rate": 0.00046095507314868994,
      "loss": 3.0747,
      "step": 73669
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3168883323669434,
      "learning_rate": 0.0004609516211509794,
      "loss": 2.9444,
      "step": 73670
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5487539768218994,
      "learning_rate": 0.0004609481691233449,
      "loss": 2.8916,
      "step": 73671
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8514841794967651,
      "learning_rate": 0.00046094471706578714,
      "loss": 3.0564,
      "step": 73672
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.0367071628570557,
      "learning_rate": 0.00046094126497830663,
      "loss": 3.1808,
      "step": 73673
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.6915149688720703,
      "learning_rate": 0.0004609378128609041,
      "loss": 3.0354,
      "step": 73674
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9102668762207031,
      "learning_rate": 0.00046093436071358024,
      "loss": 2.9882,
      "step": 73675
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8177276849746704,
      "learning_rate": 0.0004609309085363358,
      "loss": 3.0907,
      "step": 73676
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5944135189056396,
      "learning_rate": 0.00046092745632917104,
      "loss": 3.2905,
      "step": 73677
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6605010032653809,
      "learning_rate": 0.0004609240040920869,
      "loss": 2.9106,
      "step": 73678
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.11942720413208,
      "learning_rate": 0.00046092055182508397,
      "loss": 2.9722,
      "step": 73679
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2593443393707275,
      "learning_rate": 0.00046091709952816287,
      "loss": 3.0003,
      "step": 73680
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4129525423049927,
      "learning_rate": 0.0004609136472013242,
      "loss": 3.0462,
      "step": 73681
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.4621806144714355,
      "learning_rate": 0.0004609101948445687,
      "loss": 2.9151,
      "step": 73682
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6255518198013306,
      "learning_rate": 0.0004609067424578969,
      "loss": 3.1807,
      "step": 73683
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.0419933795928955,
      "learning_rate": 0.0004609032900413095,
      "loss": 2.9695,
      "step": 73684
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.183685779571533,
      "learning_rate": 0.0004608998375948073,
      "loss": 2.959,
      "step": 73685
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9135913848876953,
      "learning_rate": 0.0004608963851183906,
      "loss": 2.8718,
      "step": 73686
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7853702306747437,
      "learning_rate": 0.0004608929326120603,
      "loss": 2.8839,
      "step": 73687
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3262300491333008,
      "learning_rate": 0.00046088948007581696,
      "loss": 2.8623,
      "step": 73688
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8333491086959839,
      "learning_rate": 0.0004608860275096612,
      "loss": 2.8253,
      "step": 73689
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.313488721847534,
      "learning_rate": 0.00046088257491359364,
      "loss": 3.1807,
      "step": 73690
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5549792051315308,
      "learning_rate": 0.0004608791222876151,
      "loss": 3.2308,
      "step": 73691
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.736147165298462,
      "learning_rate": 0.000460875669631726,
      "loss": 3.1001,
      "step": 73692
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.1541640758514404,
      "learning_rate": 0.0004608722169459271,
      "loss": 3.1229,
      "step": 73693
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.296938419342041,
      "learning_rate": 0.0004608687642302191,
      "loss": 3.04,
      "step": 73694
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8647946119308472,
      "learning_rate": 0.0004608653114846025,
      "loss": 3.0996,
      "step": 73695
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8328779935836792,
      "learning_rate": 0.00046086185870907793,
      "loss": 3.0519,
      "step": 73696
    },
    {
      "epoch": 0.96,
      "grad_norm": 4.288272857666016,
      "learning_rate": 0.0004608584059036462,
      "loss": 3.0126,
      "step": 73697
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3170697689056396,
      "learning_rate": 0.0004608549530683078,
      "loss": 2.9761,
      "step": 73698
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7922115325927734,
      "learning_rate": 0.0004608515002030634,
      "loss": 3.0942,
      "step": 73699
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.313084125518799,
      "learning_rate": 0.00046084804730791375,
      "loss": 3.0767,
      "step": 73700
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.1925907135009766,
      "learning_rate": 0.0004608445943828594,
      "loss": 2.9778,
      "step": 73701
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6964248418807983,
      "learning_rate": 0.00046084114142790094,
      "loss": 3.115,
      "step": 73702
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6932662725448608,
      "learning_rate": 0.00046083768844303914,
      "loss": 3.2289,
      "step": 73703
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.180229902267456,
      "learning_rate": 0.0004608342354282746,
      "loss": 2.8883,
      "step": 73704
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.857451915740967,
      "learning_rate": 0.00046083078238360784,
      "loss": 3.0698,
      "step": 73705
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.8853564262390137,
      "learning_rate": 0.00046082732930903964,
      "loss": 3.2499,
      "step": 73706
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.6298537254333496,
      "learning_rate": 0.00046082387620457067,
      "loss": 3.0584,
      "step": 73707
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.589242458343506,
      "learning_rate": 0.0004608204230702015,
      "loss": 3.2181,
      "step": 73708
    },
    {
      "epoch": 0.96,
      "grad_norm": 3.884117603302002,
      "learning_rate": 0.0004608169699059327,
      "loss": 2.854,
      "step": 73709
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.881602168083191,
      "learning_rate": 0.00046081351671176503,
      "loss": 3.0918,
      "step": 73710
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6420613527297974,
      "learning_rate": 0.00046081006348769907,
      "loss": 2.9483,
      "step": 73711
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2137627601623535,
      "learning_rate": 0.00046080661023373547,
      "loss": 3.067,
      "step": 73712
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9611233472824097,
      "learning_rate": 0.00046080315694987494,
      "loss": 2.912,
      "step": 73713
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6299928426742554,
      "learning_rate": 0.0004607997036361181,
      "loss": 3.1515,
      "step": 73714
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7349532842636108,
      "learning_rate": 0.0004607962502924655,
      "loss": 3.0032,
      "step": 73715
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8154679536819458,
      "learning_rate": 0.00046079279691891786,
      "loss": 3.2836,
      "step": 73716
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8655979633331299,
      "learning_rate": 0.0004607893435154757,
      "loss": 3.1252,
      "step": 73717
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5911682844161987,
      "learning_rate": 0.00046078589008213984,
      "loss": 3.3218,
      "step": 73718
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5887855291366577,
      "learning_rate": 0.00046078243661891097,
      "loss": 2.8861,
      "step": 73719
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.393726110458374,
      "learning_rate": 0.00046077898312578945,
      "loss": 2.9227,
      "step": 73720
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.600447177886963,
      "learning_rate": 0.0004607755296027761,
      "loss": 2.878,
      "step": 73721
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9320522546768188,
      "learning_rate": 0.00046077207604987167,
      "loss": 2.9521,
      "step": 73722
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9043394327163696,
      "learning_rate": 0.0004607686224670766,
      "loss": 2.9734,
      "step": 73723
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7002872228622437,
      "learning_rate": 0.00046076516885439154,
      "loss": 3.1025,
      "step": 73724
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3416733741760254,
      "learning_rate": 0.0004607617152118173,
      "loss": 2.7509,
      "step": 73725
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6238605976104736,
      "learning_rate": 0.0004607582615393543,
      "loss": 2.8451,
      "step": 73726
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6060254573822021,
      "learning_rate": 0.00046075480783700336,
      "loss": 3.0189,
      "step": 73727
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8825738430023193,
      "learning_rate": 0.00046075135410476517,
      "loss": 3.0451,
      "step": 73728
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.419600009918213,
      "learning_rate": 0.0004607479003426402,
      "loss": 2.9647,
      "step": 73729
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.836456537246704,
      "learning_rate": 0.00046074444655062907,
      "loss": 2.9849,
      "step": 73730
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7265520095825195,
      "learning_rate": 0.00046074099272873267,
      "loss": 2.9639,
      "step": 73731
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.67417573928833,
      "learning_rate": 0.0004607375388769513,
      "loss": 2.8989,
      "step": 73732
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.816943883895874,
      "learning_rate": 0.00046073408499528595,
      "loss": 3.1394,
      "step": 73733
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.689225912094116,
      "learning_rate": 0.00046073063108373704,
      "loss": 3.1446,
      "step": 73734
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8216297626495361,
      "learning_rate": 0.00046072717714230524,
      "loss": 3.2404,
      "step": 73735
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7768878936767578,
      "learning_rate": 0.0004607237231709913,
      "loss": 2.7575,
      "step": 73736
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.541295051574707,
      "learning_rate": 0.00046072026916979574,
      "loss": 3.1435,
      "step": 73737
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6211693286895752,
      "learning_rate": 0.0004607168151387193,
      "loss": 2.9286,
      "step": 73738
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.563947081565857,
      "learning_rate": 0.0004607133610777625,
      "loss": 2.8134,
      "step": 73739
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4095302820205688,
      "learning_rate": 0.000460709906986926,
      "loss": 3.0363,
      "step": 73740
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.59555983543396,
      "learning_rate": 0.0004607064528662107,
      "loss": 3.1414,
      "step": 73741
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5429646968841553,
      "learning_rate": 0.0004607029987156169,
      "loss": 3.0002,
      "step": 73742
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8459904193878174,
      "learning_rate": 0.0004606995445351453,
      "loss": 3.028,
      "step": 73743
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7703698873519897,
      "learning_rate": 0.00046069609032479666,
      "loss": 3.1004,
      "step": 73744
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5131767988204956,
      "learning_rate": 0.00046069263608457175,
      "loss": 2.9956,
      "step": 73745
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5091484785079956,
      "learning_rate": 0.00046068918181447083,
      "loss": 3.1655,
      "step": 73746
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.623210072517395,
      "learning_rate": 0.00046068572751449484,
      "loss": 3.0983,
      "step": 73747
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8517659902572632,
      "learning_rate": 0.00046068227318464436,
      "loss": 2.8721,
      "step": 73748
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.676927089691162,
      "learning_rate": 0.00046067881882492004,
      "loss": 2.9901,
      "step": 73749
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.1565301418304443,
      "learning_rate": 0.0004606753644353224,
      "loss": 3.214,
      "step": 73750
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.621217131614685,
      "learning_rate": 0.0004606719100158523,
      "loss": 2.8748,
      "step": 73751
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.0870962142944336,
      "learning_rate": 0.0004606684555665102,
      "loss": 2.9831,
      "step": 73752
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.703550934791565,
      "learning_rate": 0.0004606650010872968,
      "loss": 3.1706,
      "step": 73753
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.128307580947876,
      "learning_rate": 0.00046066154657821266,
      "loss": 2.8979,
      "step": 73754
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.39950692653656,
      "learning_rate": 0.0004606580920392587,
      "loss": 2.681,
      "step": 73755
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8991968631744385,
      "learning_rate": 0.0004606546374704352,
      "loss": 2.9819,
      "step": 73756
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4752646684646606,
      "learning_rate": 0.000460651182871743,
      "loss": 2.9753,
      "step": 73757
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.961412787437439,
      "learning_rate": 0.0004606477282431828,
      "loss": 3.0686,
      "step": 73758
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.495097041130066,
      "learning_rate": 0.0004606442735847551,
      "loss": 3.0386,
      "step": 73759
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2023303508758545,
      "learning_rate": 0.00046064081889646047,
      "loss": 2.8847,
      "step": 73760
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4854270219802856,
      "learning_rate": 0.0004606373641782999,
      "loss": 3.0474,
      "step": 73761
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6337940692901611,
      "learning_rate": 0.0004606339094302737,
      "loss": 2.9282,
      "step": 73762
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.219054937362671,
      "learning_rate": 0.0004606304546523826,
      "loss": 2.9747,
      "step": 73763
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.588890790939331,
      "learning_rate": 0.0004606269998446274,
      "loss": 3.1866,
      "step": 73764
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8825891017913818,
      "learning_rate": 0.00046062354500700845,
      "loss": 3.032,
      "step": 73765
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8153111934661865,
      "learning_rate": 0.0004606200901395266,
      "loss": 2.9036,
      "step": 73766
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5317442417144775,
      "learning_rate": 0.00046061663524218243,
      "loss": 3.0258,
      "step": 73767
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.1068115234375,
      "learning_rate": 0.0004606131803149766,
      "loss": 3.12,
      "step": 73768
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5340532064437866,
      "learning_rate": 0.0004606097253579098,
      "loss": 2.9926,
      "step": 73769
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7363752126693726,
      "learning_rate": 0.00046060627037098256,
      "loss": 3.1475,
      "step": 73770
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.2961108684539795,
      "learning_rate": 0.0004606028153541956,
      "loss": 3.0698,
      "step": 73771
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7609037160873413,
      "learning_rate": 0.00046059936030754965,
      "loss": 3.1836,
      "step": 73772
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8444693088531494,
      "learning_rate": 0.00046059590523104513,
      "loss": 3.1035,
      "step": 73773
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2678496837615967,
      "learning_rate": 0.0004605924501246828,
      "loss": 3.1232,
      "step": 73774
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7910969257354736,
      "learning_rate": 0.0004605889949884633,
      "loss": 3.0233,
      "step": 73775
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5620545148849487,
      "learning_rate": 0.0004605855398223874,
      "loss": 3.0497,
      "step": 73776
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5424292087554932,
      "learning_rate": 0.00046058208462645544,
      "loss": 3.0268,
      "step": 73777
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7731502056121826,
      "learning_rate": 0.00046057862940066836,
      "loss": 3.1212,
      "step": 73778
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.802711844444275,
      "learning_rate": 0.00046057517414502665,
      "loss": 3.1287,
      "step": 73779
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.647508144378662,
      "learning_rate": 0.000460571718859531,
      "loss": 3.0606,
      "step": 73780
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4370349645614624,
      "learning_rate": 0.000460568263544182,
      "loss": 2.9589,
      "step": 73781
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.51080322265625,
      "learning_rate": 0.0004605648081989803,
      "loss": 2.974,
      "step": 73782
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7784874439239502,
      "learning_rate": 0.0004605613528239266,
      "loss": 2.9981,
      "step": 73783
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6382019519805908,
      "learning_rate": 0.00046055789741902153,
      "loss": 3.1381,
      "step": 73784
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7907445430755615,
      "learning_rate": 0.00046055444198426566,
      "loss": 3.0579,
      "step": 73785
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.959681749343872,
      "learning_rate": 0.00046055098651965975,
      "loss": 2.9182,
      "step": 73786
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.586143732070923,
      "learning_rate": 0.0004605475310252044,
      "loss": 3.0172,
      "step": 73787
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.717545509338379,
      "learning_rate": 0.0004605440755009002,
      "loss": 2.9437,
      "step": 73788
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6366890668869019,
      "learning_rate": 0.0004605406199467478,
      "loss": 3.0339,
      "step": 73789
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8887783288955688,
      "learning_rate": 0.0004605371643627479,
      "loss": 3.1951,
      "step": 73790
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5522197484970093,
      "learning_rate": 0.0004605337087489011,
      "loss": 2.9747,
      "step": 73791
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5998681783676147,
      "learning_rate": 0.00046053025310520814,
      "loss": 3.1342,
      "step": 73792
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4148973226547241,
      "learning_rate": 0.0004605267974316694,
      "loss": 3.0527,
      "step": 73793
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6432572603225708,
      "learning_rate": 0.0004605233417282858,
      "loss": 3.0079,
      "step": 73794
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6315529346466064,
      "learning_rate": 0.000460519885995058,
      "loss": 2.9423,
      "step": 73795
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5443154573440552,
      "learning_rate": 0.0004605164302319863,
      "loss": 3.0107,
      "step": 73796
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6023333072662354,
      "learning_rate": 0.00046051297443907164,
      "loss": 2.6225,
      "step": 73797
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7237592935562134,
      "learning_rate": 0.00046050951861631467,
      "loss": 2.7008,
      "step": 73798
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7608362436294556,
      "learning_rate": 0.00046050606276371584,
      "loss": 3.2591,
      "step": 73799
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8701571226119995,
      "learning_rate": 0.000460502606881276,
      "loss": 3.0031,
      "step": 73800
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.918555498123169,
      "learning_rate": 0.0004604991509689957,
      "loss": 2.8681,
      "step": 73801
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.9393391609191895,
      "learning_rate": 0.00046049569502687546,
      "loss": 3.045,
      "step": 73802
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7547434568405151,
      "learning_rate": 0.00046049223905491606,
      "loss": 3.015,
      "step": 73803
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8074085712432861,
      "learning_rate": 0.0004604887830531183,
      "loss": 3.1602,
      "step": 73804
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2305514812469482,
      "learning_rate": 0.00046048532702148244,
      "loss": 2.9297,
      "step": 73805
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7819162607192993,
      "learning_rate": 0.0004604818709600094,
      "loss": 3.065,
      "step": 73806
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6447628736495972,
      "learning_rate": 0.0004604784148686998,
      "loss": 2.9042,
      "step": 73807
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.105659246444702,
      "learning_rate": 0.00046047495874755416,
      "loss": 3.1604,
      "step": 73808
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5556349754333496,
      "learning_rate": 0.00046047150259657325,
      "loss": 2.9901,
      "step": 73809
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7057608366012573,
      "learning_rate": 0.00046046804641575764,
      "loss": 2.9343,
      "step": 73810
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8179560899734497,
      "learning_rate": 0.00046046459020510804,
      "loss": 3.0685,
      "step": 73811
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5864533185958862,
      "learning_rate": 0.0004604611339646249,
      "loss": 3.07,
      "step": 73812
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7795429229736328,
      "learning_rate": 0.0004604576776943092,
      "loss": 3.1156,
      "step": 73813
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8205609321594238,
      "learning_rate": 0.00046045422139416126,
      "loss": 3.0595,
      "step": 73814
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7381857633590698,
      "learning_rate": 0.0004604507650641818,
      "loss": 3.0348,
      "step": 73815
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6829371452331543,
      "learning_rate": 0.0004604473087043716,
      "loss": 2.9871,
      "step": 73816
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7573107481002808,
      "learning_rate": 0.0004604438523147312,
      "loss": 3.1382,
      "step": 73817
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7071552276611328,
      "learning_rate": 0.00046044039589526124,
      "loss": 3.1137,
      "step": 73818
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5636671781539917,
      "learning_rate": 0.0004604369394459625,
      "loss": 2.9237,
      "step": 73819
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8081597089767456,
      "learning_rate": 0.0004604334829668354,
      "loss": 2.9845,
      "step": 73820
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.099883556365967,
      "learning_rate": 0.00046043002645788065,
      "loss": 3.0489,
      "step": 73821
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.5649378299713135,
      "learning_rate": 0.00046042656991909906,
      "loss": 2.9046,
      "step": 73822
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.9113519191741943,
      "learning_rate": 0.000460423113350491,
      "loss": 2.8216,
      "step": 73823
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7683892250061035,
      "learning_rate": 0.00046041965675205725,
      "loss": 2.9829,
      "step": 73824
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.862924575805664,
      "learning_rate": 0.00046041620012379856,
      "loss": 3.0417,
      "step": 73825
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.608003854751587,
      "learning_rate": 0.0004604127434657155,
      "loss": 3.0906,
      "step": 73826
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.875349998474121,
      "learning_rate": 0.0004604092867778086,
      "loss": 2.9544,
      "step": 73827
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.497989535331726,
      "learning_rate": 0.0004604058300600786,
      "loss": 2.8824,
      "step": 73828
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7532625198364258,
      "learning_rate": 0.00046040237331252613,
      "loss": 2.913,
      "step": 73829
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8644351959228516,
      "learning_rate": 0.00046039891653515175,
      "loss": 3.0758,
      "step": 73830
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.558923602104187,
      "learning_rate": 0.00046039545972795633,
      "loss": 2.8623,
      "step": 73831
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.527249336242676,
      "learning_rate": 0.00046039200289094026,
      "loss": 2.9328,
      "step": 73832
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5185610055923462,
      "learning_rate": 0.0004603885460241044,
      "loss": 3.0814,
      "step": 73833
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.965440034866333,
      "learning_rate": 0.0004603850891274492,
      "loss": 3.0516,
      "step": 73834
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2146856784820557,
      "learning_rate": 0.0004603816322009753,
      "loss": 2.8227,
      "step": 73835
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7149789333343506,
      "learning_rate": 0.0004603781752446836,
      "loss": 2.9717,
      "step": 73836
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6849391460418701,
      "learning_rate": 0.00046037471825857446,
      "loss": 2.8938,
      "step": 73837
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5413535833358765,
      "learning_rate": 0.00046037126124264866,
      "loss": 3.0614,
      "step": 73838
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6316497325897217,
      "learning_rate": 0.00046036780419690687,
      "loss": 3.1409,
      "step": 73839
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.250296115875244,
      "learning_rate": 0.0004603643471213496,
      "loss": 3.225,
      "step": 73840
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8236300945281982,
      "learning_rate": 0.00046036089001597763,
      "loss": 2.8215,
      "step": 73841
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.513657808303833,
      "learning_rate": 0.0004603574328807915,
      "loss": 2.8625,
      "step": 73842
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6922798156738281,
      "learning_rate": 0.00046035397571579184,
      "loss": 2.8733,
      "step": 73843
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5935240983963013,
      "learning_rate": 0.0004603505185209795,
      "loss": 3.0219,
      "step": 73844
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.61197829246521,
      "learning_rate": 0.0004603470612963549,
      "loss": 2.8759,
      "step": 73845
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5274016857147217,
      "learning_rate": 0.00046034360404191874,
      "loss": 3.0589,
      "step": 73846
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4278357028961182,
      "learning_rate": 0.0004603401467576717,
      "loss": 3.0195,
      "step": 73847
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.473310112953186,
      "learning_rate": 0.00046033668944361433,
      "loss": 3.0084,
      "step": 73848
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6242502927780151,
      "learning_rate": 0.0004603332320997474,
      "loss": 2.7763,
      "step": 73849
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9926252365112305,
      "learning_rate": 0.00046032977472607153,
      "loss": 3.1168,
      "step": 73850
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.1769087314605713,
      "learning_rate": 0.00046032631732258726,
      "loss": 3.2456,
      "step": 73851
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7899150848388672,
      "learning_rate": 0.00046032285988929534,
      "loss": 2.8194,
      "step": 73852
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6233817338943481,
      "learning_rate": 0.00046031940242619637,
      "loss": 3.0928,
      "step": 73853
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5224487781524658,
      "learning_rate": 0.000460315944933291,
      "loss": 3.0312,
      "step": 73854
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5859341621398926,
      "learning_rate": 0.0004603124874105799,
      "loss": 2.9442,
      "step": 73855
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6085665225982666,
      "learning_rate": 0.0004603090298580636,
      "loss": 3.1257,
      "step": 73856
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5232058763504028,
      "learning_rate": 0.00046030557227574284,
      "loss": 3.1524,
      "step": 73857
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.04787540435791,
      "learning_rate": 0.0004603021146636183,
      "loss": 2.9207,
      "step": 73858
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6527920961380005,
      "learning_rate": 0.0004602986570216906,
      "loss": 2.8622,
      "step": 73859
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7178497314453125,
      "learning_rate": 0.00046029519934996026,
      "loss": 3.0123,
      "step": 73860
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.0615358352661133,
      "learning_rate": 0.0004602917416484281,
      "loss": 2.9499,
      "step": 73861
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7015571594238281,
      "learning_rate": 0.00046028828391709464,
      "loss": 2.821,
      "step": 73862
    },
    {
      "epoch": 0.96,
      "grad_norm": 3.0514168739318848,
      "learning_rate": 0.00046028482615596047,
      "loss": 2.8681,
      "step": 73863
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7313135862350464,
      "learning_rate": 0.0004602813683650265,
      "loss": 3.1825,
      "step": 73864
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.5558533668518066,
      "learning_rate": 0.00046027791054429316,
      "loss": 2.7346,
      "step": 73865
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.702614188194275,
      "learning_rate": 0.000460274452693761,
      "loss": 2.8576,
      "step": 73866
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7390174865722656,
      "learning_rate": 0.00046027099481343094,
      "loss": 3.0724,
      "step": 73867
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5691838264465332,
      "learning_rate": 0.00046026753690330344,
      "loss": 3.1946,
      "step": 73868
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4193602800369263,
      "learning_rate": 0.00046026407896337907,
      "loss": 3.1726,
      "step": 73869
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.5165555477142334,
      "learning_rate": 0.00046026062099365866,
      "loss": 3.1111,
      "step": 73870
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.1435234546661377,
      "learning_rate": 0.00046025716299414286,
      "loss": 3.2026,
      "step": 73871
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6504805088043213,
      "learning_rate": 0.0004602537049648321,
      "loss": 3.2725,
      "step": 73872
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9200499057769775,
      "learning_rate": 0.00046025024690572725,
      "loss": 3.1729,
      "step": 73873
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5766535997390747,
      "learning_rate": 0.0004602467888168288,
      "loss": 3.2056,
      "step": 73874
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4829177856445312,
      "learning_rate": 0.0004602433306981375,
      "loss": 3.1489,
      "step": 73875
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8611950874328613,
      "learning_rate": 0.0004602398725496538,
      "loss": 2.8586,
      "step": 73876
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6799428462982178,
      "learning_rate": 0.00046023641437137867,
      "loss": 3.0484,
      "step": 73877
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.1772077083587646,
      "learning_rate": 0.00046023295616331246,
      "loss": 2.818,
      "step": 73878
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6981613636016846,
      "learning_rate": 0.0004602294979254559,
      "loss": 3.2389,
      "step": 73879
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.7412028312683105,
      "learning_rate": 0.0004602260396578097,
      "loss": 3.1195,
      "step": 73880
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.9821958541870117,
      "learning_rate": 0.00046022258136037443,
      "loss": 3.0031,
      "step": 73881
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5740625858306885,
      "learning_rate": 0.00046021912303315077,
      "loss": 2.7613,
      "step": 73882
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8149940967559814,
      "learning_rate": 0.0004602156646761393,
      "loss": 3.0515,
      "step": 73883
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8494986295700073,
      "learning_rate": 0.0004602122062893408,
      "loss": 3.0921,
      "step": 73884
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5018000602722168,
      "learning_rate": 0.0004602087478727558,
      "loss": 3.1912,
      "step": 73885
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.235208034515381,
      "learning_rate": 0.0004602052894263849,
      "loss": 2.8632,
      "step": 73886
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.704206109046936,
      "learning_rate": 0.0004602018309502289,
      "loss": 3.1281,
      "step": 73887
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3923301696777344,
      "learning_rate": 0.00046019837244428834,
      "loss": 2.8652,
      "step": 73888
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6598151922225952,
      "learning_rate": 0.0004601949139085639,
      "loss": 3.2392,
      "step": 73889
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.743659496307373,
      "learning_rate": 0.00046019145534305616,
      "loss": 3.0549,
      "step": 73890
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5886398553848267,
      "learning_rate": 0.00046018799674776574,
      "loss": 3.1861,
      "step": 73891
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6123981475830078,
      "learning_rate": 0.00046018453812269347,
      "loss": 3.1029,
      "step": 73892
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.346117377281189,
      "learning_rate": 0.0004601810794678398,
      "loss": 2.9256,
      "step": 73893
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.452936053276062,
      "learning_rate": 0.0004601776207832055,
      "loss": 2.9749,
      "step": 73894
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6849148273468018,
      "learning_rate": 0.0004601741620687911,
      "loss": 2.8113,
      "step": 73895
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.753139615058899,
      "learning_rate": 0.0004601707033245973,
      "loss": 2.9233,
      "step": 73896
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9438105821609497,
      "learning_rate": 0.00046016724455062475,
      "loss": 2.7853,
      "step": 73897
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5814011096954346,
      "learning_rate": 0.0004601637857468742,
      "loss": 2.7743,
      "step": 73898
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.061887741088867,
      "learning_rate": 0.00046016032691334604,
      "loss": 3.2036,
      "step": 73899
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9442880153656006,
      "learning_rate": 0.00046015686805004106,
      "loss": 2.8385,
      "step": 73900
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5039498805999756,
      "learning_rate": 0.0004601534091569599,
      "loss": 3.0525,
      "step": 73901
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.037397623062134,
      "learning_rate": 0.00046014995023410324,
      "loss": 3.0477,
      "step": 73902
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.721832275390625,
      "learning_rate": 0.00046014649128147165,
      "loss": 3.0922,
      "step": 73903
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.785943865776062,
      "learning_rate": 0.00046014303229906575,
      "loss": 2.8759,
      "step": 73904
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.6272006034851074,
      "learning_rate": 0.0004601395732868864,
      "loss": 3.0427,
      "step": 73905
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5164228677749634,
      "learning_rate": 0.0004601361142449339,
      "loss": 3.0622,
      "step": 73906
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7720799446105957,
      "learning_rate": 0.0004601326551732091,
      "loss": 2.9746,
      "step": 73907
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.1863837242126465,
      "learning_rate": 0.00046012919607171283,
      "loss": 3.2155,
      "step": 73908
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.060103178024292,
      "learning_rate": 0.0004601257369404453,
      "loss": 3.0,
      "step": 73909
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9630072116851807,
      "learning_rate": 0.0004601222777794074,
      "loss": 3.2244,
      "step": 73910
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9681730270385742,
      "learning_rate": 0.0004601188185885999,
      "loss": 3.0937,
      "step": 73911
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.653451681137085,
      "learning_rate": 0.00046011535936802306,
      "loss": 2.9382,
      "step": 73912
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6359553337097168,
      "learning_rate": 0.0004601119001176779,
      "loss": 3.0146,
      "step": 73913
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.1279408931732178,
      "learning_rate": 0.00046010844083756495,
      "loss": 3.07,
      "step": 73914
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.6152920722961426,
      "learning_rate": 0.0004601049815276847,
      "loss": 2.9046,
      "step": 73915
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7757469415664673,
      "learning_rate": 0.0004601015221880379,
      "loss": 2.9,
      "step": 73916
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.571501612663269,
      "learning_rate": 0.00046009806281862534,
      "loss": 2.8976,
      "step": 73917
    },
    {
      "epoch": 0.96,
      "grad_norm": 3.0167200565338135,
      "learning_rate": 0.00046009460341944747,
      "loss": 3.0255,
      "step": 73918
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.0914881229400635,
      "learning_rate": 0.000460091143990505,
      "loss": 2.9832,
      "step": 73919
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5416970252990723,
      "learning_rate": 0.0004600876845317984,
      "loss": 3.0722,
      "step": 73920
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.216270685195923,
      "learning_rate": 0.00046008422504332873,
      "loss": 2.9889,
      "step": 73921
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.014479637145996,
      "learning_rate": 0.00046008076552509625,
      "loss": 3.0817,
      "step": 73922
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8248809576034546,
      "learning_rate": 0.0004600773059771017,
      "loss": 3.0068,
      "step": 73923
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.6581101417541504,
      "learning_rate": 0.0004600738463993458,
      "loss": 2.8589,
      "step": 73924
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2697839736938477,
      "learning_rate": 0.00046007038679182923,
      "loss": 3.0748,
      "step": 73925
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6914604902267456,
      "learning_rate": 0.00046006692715455243,
      "loss": 3.045,
      "step": 73926
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7993098497390747,
      "learning_rate": 0.00046006346748751623,
      "loss": 3.1485,
      "step": 73927
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6065305471420288,
      "learning_rate": 0.0004600600077907212,
      "loss": 2.9451,
      "step": 73928
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5374929904937744,
      "learning_rate": 0.0004600565480641679,
      "loss": 3.1441,
      "step": 73929
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8375434875488281,
      "learning_rate": 0.0004600530883078572,
      "loss": 3.1724,
      "step": 73930
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8049001693725586,
      "learning_rate": 0.0004600496285217896,
      "loss": 3.1815,
      "step": 73931
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2970900535583496,
      "learning_rate": 0.0004600461687059657,
      "loss": 2.9083,
      "step": 73932
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6963125467300415,
      "learning_rate": 0.0004600427088603862,
      "loss": 2.8308,
      "step": 73933
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7971556186676025,
      "learning_rate": 0.00046003924898505185,
      "loss": 3.1884,
      "step": 73934
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.416694164276123,
      "learning_rate": 0.00046003578907996307,
      "loss": 3.1593,
      "step": 73935
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7372028827667236,
      "learning_rate": 0.00046003232914512064,
      "loss": 3.1219,
      "step": 73936
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9510136842727661,
      "learning_rate": 0.00046002886918052516,
      "loss": 2.9673,
      "step": 73937
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.25816011428833,
      "learning_rate": 0.0004600254091861773,
      "loss": 3.0787,
      "step": 73938
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.296684741973877,
      "learning_rate": 0.0004600219491620777,
      "loss": 2.9783,
      "step": 73939
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.39425790309906,
      "learning_rate": 0.000460018489108227,
      "loss": 2.7523,
      "step": 73940
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7430386543273926,
      "learning_rate": 0.00046001502902462583,
      "loss": 3.0285,
      "step": 73941
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.270970106124878,
      "learning_rate": 0.0004600115689112749,
      "loss": 2.6697,
      "step": 73942
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.56911301612854,
      "learning_rate": 0.00046000810876817473,
      "loss": 3.2042,
      "step": 73943
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.405146837234497,
      "learning_rate": 0.00046000464859532607,
      "loss": 2.7464,
      "step": 73944
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6491745710372925,
      "learning_rate": 0.0004600011883927295,
      "loss": 3.0732,
      "step": 73945
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8039824962615967,
      "learning_rate": 0.0004599977281603857,
      "loss": 3.0052,
      "step": 73946
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5848132371902466,
      "learning_rate": 0.0004599942678982953,
      "loss": 3.1123,
      "step": 73947
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.999379277229309,
      "learning_rate": 0.00045999080760645894,
      "loss": 3.2408,
      "step": 73948
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.376209259033203,
      "learning_rate": 0.0004599873472848773,
      "loss": 3.0304,
      "step": 73949
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6619465351104736,
      "learning_rate": 0.00045998388693355103,
      "loss": 2.8811,
      "step": 73950
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.5219321250915527,
      "learning_rate": 0.0004599804265524806,
      "loss": 2.8688,
      "step": 73951
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.738529086112976,
      "learning_rate": 0.00045997696614166685,
      "loss": 2.9592,
      "step": 73952
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.474951148033142,
      "learning_rate": 0.00045997350570111046,
      "loss": 3.021,
      "step": 73953
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8689923286437988,
      "learning_rate": 0.0004599700452308119,
      "loss": 2.9323,
      "step": 73954
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6658340692520142,
      "learning_rate": 0.00045996658473077185,
      "loss": 3.1407,
      "step": 73955
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6229990720748901,
      "learning_rate": 0.00045996312420099105,
      "loss": 3.0585,
      "step": 73956
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6650540828704834,
      "learning_rate": 0.00045995966364147003,
      "loss": 3.1156,
      "step": 73957
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7009590864181519,
      "learning_rate": 0.00045995620305220946,
      "loss": 3.0467,
      "step": 73958
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7000758647918701,
      "learning_rate": 0.00045995274243321014,
      "loss": 2.7595,
      "step": 73959
    },
    {
      "epoch": 0.96,
      "grad_norm": 3.1937143802642822,
      "learning_rate": 0.00045994928178447257,
      "loss": 3.1538,
      "step": 73960
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6228104829788208,
      "learning_rate": 0.0004599458211059973,
      "loss": 2.7872,
      "step": 73961
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6789153814315796,
      "learning_rate": 0.00045994236039778515,
      "loss": 2.8691,
      "step": 73962
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5166598558425903,
      "learning_rate": 0.00045993889965983666,
      "loss": 2.9121,
      "step": 73963
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4002801179885864,
      "learning_rate": 0.0004599354388921525,
      "loss": 3.1707,
      "step": 73964
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8915678262710571,
      "learning_rate": 0.00045993197809473347,
      "loss": 3.1516,
      "step": 73965
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.5337142944335938,
      "learning_rate": 0.00045992851726757997,
      "loss": 2.9978,
      "step": 73966
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7190585136413574,
      "learning_rate": 0.0004599250564106926,
      "loss": 2.8569,
      "step": 73967
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8066450357437134,
      "learning_rate": 0.00045992159552407235,
      "loss": 3.1701,
      "step": 73968
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6575406789779663,
      "learning_rate": 0.0004599181346077196,
      "loss": 3.0377,
      "step": 73969
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4983724355697632,
      "learning_rate": 0.00045991467366163506,
      "loss": 3.1087,
      "step": 73970
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7715860605239868,
      "learning_rate": 0.00045991121268581923,
      "loss": 3.0048,
      "step": 73971
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4368878602981567,
      "learning_rate": 0.00045990775168027305,
      "loss": 3.0233,
      "step": 73972
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9328657388687134,
      "learning_rate": 0.0004599042906449969,
      "loss": 3.123,
      "step": 73973
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9730719327926636,
      "learning_rate": 0.00045990082957999157,
      "loss": 3.3764,
      "step": 73974
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5100566148757935,
      "learning_rate": 0.0004598973684852577,
      "loss": 3.0948,
      "step": 73975
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.630545735359192,
      "learning_rate": 0.0004598939073607958,
      "loss": 3.1226,
      "step": 73976
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9683902263641357,
      "learning_rate": 0.0004598904462066066,
      "loss": 3.1935,
      "step": 73977
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6654378175735474,
      "learning_rate": 0.00045988698502269086,
      "loss": 2.9879,
      "step": 73978
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6005674600601196,
      "learning_rate": 0.00045988352380904905,
      "loss": 2.8088,
      "step": 73979
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7213709354400635,
      "learning_rate": 0.00045988006256568177,
      "loss": 3.1337,
      "step": 73980
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7314656972885132,
      "learning_rate": 0.0004598766012925899,
      "loss": 2.8685,
      "step": 73981
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8766264915466309,
      "learning_rate": 0.0004598731399897739,
      "loss": 3.1989,
      "step": 73982
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.032912015914917,
      "learning_rate": 0.00045986967865723453,
      "loss": 3.3155,
      "step": 73983
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5931936502456665,
      "learning_rate": 0.00045986621729497234,
      "loss": 3.1944,
      "step": 73984
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.562734603881836,
      "learning_rate": 0.00045986275590298795,
      "loss": 3.0912,
      "step": 73985
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.806227445602417,
      "learning_rate": 0.0004598592944812821,
      "loss": 3.0009,
      "step": 73986
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3517870903015137,
      "learning_rate": 0.0004598558330298554,
      "loss": 3.1717,
      "step": 73987
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5412302017211914,
      "learning_rate": 0.0004598523715487085,
      "loss": 2.8477,
      "step": 73988
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5449645519256592,
      "learning_rate": 0.00045984891003784186,
      "loss": 3.2185,
      "step": 73989
    },
    {
      "epoch": 0.96,
      "grad_norm": 4.403118133544922,
      "learning_rate": 0.0004598454484972565,
      "loss": 2.9264,
      "step": 73990
    },
    {
      "epoch": 0.96,
      "grad_norm": 3.3709957599639893,
      "learning_rate": 0.00045984198692695276,
      "loss": 2.788,
      "step": 73991
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9924875497817993,
      "learning_rate": 0.0004598385253269313,
      "loss": 3.0991,
      "step": 73992
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.5665087699890137,
      "learning_rate": 0.00045983506369719305,
      "loss": 3.0533,
      "step": 73993
    },
    {
      "epoch": 0.96,
      "grad_norm": 3.267174243927002,
      "learning_rate": 0.00045983160203773836,
      "loss": 3.1471,
      "step": 73994
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.368926763534546,
      "learning_rate": 0.00045982814034856785,
      "loss": 2.985,
      "step": 73995
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7181364297866821,
      "learning_rate": 0.0004598246786296824,
      "loss": 3.0768,
      "step": 73996
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8077812194824219,
      "learning_rate": 0.0004598212168810825,
      "loss": 2.8193,
      "step": 73997
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.6323540210723877,
      "learning_rate": 0.0004598177551027688,
      "loss": 3.2092,
      "step": 73998
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.7167065143585205,
      "learning_rate": 0.0004598142932947419,
      "loss": 3.0786,
      "step": 73999
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6967031955718994,
      "learning_rate": 0.0004598108314570026,
      "loss": 3.2608,
      "step": 74000
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3769335746765137,
      "learning_rate": 0.00045980736958955136,
      "loss": 3.0148,
      "step": 74001
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.8086235523223877,
      "learning_rate": 0.00045980390769238903,
      "loss": 2.8793,
      "step": 74002
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2515769004821777,
      "learning_rate": 0.00045980044576551603,
      "loss": 3.1005,
      "step": 74003
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5642415285110474,
      "learning_rate": 0.0004597969838089332,
      "loss": 3.1848,
      "step": 74004
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.5980982780456543,
      "learning_rate": 0.000459793521822641,
      "loss": 3.1792,
      "step": 74005
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.5784990787506104,
      "learning_rate": 0.0004597900598066402,
      "loss": 2.967,
      "step": 74006
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.0470640659332275,
      "learning_rate": 0.0004597865977609314,
      "loss": 3.0377,
      "step": 74007
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2405431270599365,
      "learning_rate": 0.0004597831356855153,
      "loss": 2.8666,
      "step": 74008
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6290634870529175,
      "learning_rate": 0.0004597796735803925,
      "loss": 2.8405,
      "step": 74009
    },
    {
      "epoch": 0.96,
      "grad_norm": 3.108916997909546,
      "learning_rate": 0.00045977621144556365,
      "loss": 2.8845,
      "step": 74010
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4892735481262207,
      "learning_rate": 0.00045977274928102926,
      "loss": 3.0693,
      "step": 74011
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.409881591796875,
      "learning_rate": 0.0004597692870867902,
      "loss": 2.9294,
      "step": 74012
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9350414276123047,
      "learning_rate": 0.0004597658248628471,
      "loss": 2.8506,
      "step": 74013
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.464994192123413,
      "learning_rate": 0.0004597623626092004,
      "loss": 2.9219,
      "step": 74014
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.1941864490509033,
      "learning_rate": 0.00045975890032585087,
      "loss": 3.0003,
      "step": 74015
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7367844581604004,
      "learning_rate": 0.00045975543801279916,
      "loss": 2.9739,
      "step": 74016
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5267293453216553,
      "learning_rate": 0.0004597519756700459,
      "loss": 2.9522,
      "step": 74017
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9633749723434448,
      "learning_rate": 0.0004597485132975917,
      "loss": 2.9445,
      "step": 74018
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9024039506912231,
      "learning_rate": 0.00045974505089543734,
      "loss": 2.9947,
      "step": 74019
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5155607461929321,
      "learning_rate": 0.0004597415884635833,
      "loss": 3.4494,
      "step": 74020
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6087965965270996,
      "learning_rate": 0.0004597381260020302,
      "loss": 2.9358,
      "step": 74021
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5546095371246338,
      "learning_rate": 0.0004597346635107788,
      "loss": 3.2686,
      "step": 74022
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.894052505493164,
      "learning_rate": 0.0004597312009898299,
      "loss": 3.0942,
      "step": 74023
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.418723702430725,
      "learning_rate": 0.0004597277384391837,
      "loss": 3.1261,
      "step": 74024
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8989371061325073,
      "learning_rate": 0.0004597242758588412,
      "loss": 2.8526,
      "step": 74025
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5818594694137573,
      "learning_rate": 0.000459720813248803,
      "loss": 2.8978,
      "step": 74026
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9281307458877563,
      "learning_rate": 0.00045971735060906956,
      "loss": 2.8755,
      "step": 74027
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5881253480911255,
      "learning_rate": 0.00045971388793964173,
      "loss": 3.0204,
      "step": 74028
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8798067569732666,
      "learning_rate": 0.00045971042524052015,
      "loss": 3.1528,
      "step": 74029
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4941868782043457,
      "learning_rate": 0.00045970696251170526,
      "loss": 2.666,
      "step": 74030
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7044621706008911,
      "learning_rate": 0.00045970349975319786,
      "loss": 2.9198,
      "step": 74031
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6827399730682373,
      "learning_rate": 0.00045970003696499856,
      "loss": 2.898,
      "step": 74032
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5237642526626587,
      "learning_rate": 0.00045969657414710806,
      "loss": 3.109,
      "step": 74033
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3021202087402344,
      "learning_rate": 0.0004596931112995269,
      "loss": 3.0075,
      "step": 74034
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.4963099956512451,
      "learning_rate": 0.00045968964842225577,
      "loss": 3.1726,
      "step": 74035
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.682263970375061,
      "learning_rate": 0.0004596861855152954,
      "loss": 2.9567,
      "step": 74036
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2776379585266113,
      "learning_rate": 0.00045968272257864627,
      "loss": 2.9622,
      "step": 74037
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.222832679748535,
      "learning_rate": 0.00045967925961230914,
      "loss": 3.0685,
      "step": 74038
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7836415767669678,
      "learning_rate": 0.00045967579661628465,
      "loss": 2.9056,
      "step": 74039
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.1775801181793213,
      "learning_rate": 0.00045967233359057335,
      "loss": 2.83,
      "step": 74040
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.0948586463928223,
      "learning_rate": 0.00045966887053517595,
      "loss": 2.9696,
      "step": 74041
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6546417474746704,
      "learning_rate": 0.0004596654074500932,
      "loss": 2.8897,
      "step": 74042
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8990768194198608,
      "learning_rate": 0.00045966194433532553,
      "loss": 2.9861,
      "step": 74043
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.915984034538269,
      "learning_rate": 0.00045965848119087376,
      "loss": 2.9478,
      "step": 74044
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.695967435836792,
      "learning_rate": 0.00045965501801673844,
      "loss": 3.0296,
      "step": 74045
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3965474367141724,
      "learning_rate": 0.00045965155481292016,
      "loss": 2.958,
      "step": 74046
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7211506366729736,
      "learning_rate": 0.0004596480915794197,
      "loss": 3.1357,
      "step": 74047
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8289016485214233,
      "learning_rate": 0.0004596446283162377,
      "loss": 3.1651,
      "step": 74048
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7963297367095947,
      "learning_rate": 0.0004596411650233747,
      "loss": 2.9176,
      "step": 74049
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3161396980285645,
      "learning_rate": 0.0004596377017008314,
      "loss": 2.8432,
      "step": 74050
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.657386064529419,
      "learning_rate": 0.00045963423834860853,
      "loss": 2.9136,
      "step": 74051
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.644643783569336,
      "learning_rate": 0.00045963077496670646,
      "loss": 3.0053,
      "step": 74052
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.836055874824524,
      "learning_rate": 0.0004596273115551261,
      "loss": 3.0641,
      "step": 74053
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7618581056594849,
      "learning_rate": 0.0004596238481138681,
      "loss": 2.9842,
      "step": 74054
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7231277227401733,
      "learning_rate": 0.0004596203846429328,
      "loss": 3.1427,
      "step": 74055
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5432180166244507,
      "learning_rate": 0.0004596169211423212,
      "loss": 2.9843,
      "step": 74056
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6508548259735107,
      "learning_rate": 0.00045961345761203387,
      "loss": 3.109,
      "step": 74057
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.1805548667907715,
      "learning_rate": 0.0004596099940520712,
      "loss": 3.1682,
      "step": 74058
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8204196691513062,
      "learning_rate": 0.0004596065304624341,
      "loss": 3.0466,
      "step": 74059
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.475477933883667,
      "learning_rate": 0.00045960306684312326,
      "loss": 3.1795,
      "step": 74060
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2428371906280518,
      "learning_rate": 0.00045959960319413905,
      "loss": 2.9443,
      "step": 74061
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5045307874679565,
      "learning_rate": 0.00045959613951548233,
      "loss": 2.9269,
      "step": 74062
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.812572956085205,
      "learning_rate": 0.0004595926758071536,
      "loss": 3.0455,
      "step": 74063
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.699133038520813,
      "learning_rate": 0.0004595892120691537,
      "loss": 3.033,
      "step": 74064
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.0125300884246826,
      "learning_rate": 0.00045958574830148305,
      "loss": 3.0048,
      "step": 74065
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7903791666030884,
      "learning_rate": 0.0004595822845041424,
      "loss": 2.9698,
      "step": 74066
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.42255699634552,
      "learning_rate": 0.00045957882067713246,
      "loss": 3.0049,
      "step": 74067
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3824816942214966,
      "learning_rate": 0.0004595753568204537,
      "loss": 2.8583,
      "step": 74068
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.284442663192749,
      "learning_rate": 0.00045957189293410695,
      "loss": 3.0609,
      "step": 74069
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5730407238006592,
      "learning_rate": 0.0004595684290180928,
      "loss": 2.7228,
      "step": 74070
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5314202308654785,
      "learning_rate": 0.0004595649650724118,
      "loss": 3.0476,
      "step": 74071
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9765814542770386,
      "learning_rate": 0.00045956150109706476,
      "loss": 3.0584,
      "step": 74072
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.6927008628845215,
      "learning_rate": 0.0004595580370920521,
      "loss": 3.1907,
      "step": 74073
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9751191139221191,
      "learning_rate": 0.00045955457305737454,
      "loss": 3.0035,
      "step": 74074
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7171984910964966,
      "learning_rate": 0.0004595511089930329,
      "loss": 3.1968,
      "step": 74075
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6535037755966187,
      "learning_rate": 0.00045954764489902766,
      "loss": 3.0966,
      "step": 74076
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.55392324924469,
      "learning_rate": 0.00045954418077535957,
      "loss": 3.0437,
      "step": 74077
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8870283365249634,
      "learning_rate": 0.00045954071662202917,
      "loss": 3.1217,
      "step": 74078
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5405389070510864,
      "learning_rate": 0.0004595372524390371,
      "loss": 3.1467,
      "step": 74079
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2426981925964355,
      "learning_rate": 0.00045953378822638404,
      "loss": 3.2239,
      "step": 74080
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.0546364784240723,
      "learning_rate": 0.0004595303239840707,
      "loss": 2.9284,
      "step": 74081
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8389172554016113,
      "learning_rate": 0.00045952685971209755,
      "loss": 3.1622,
      "step": 74082
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8561890125274658,
      "learning_rate": 0.0004595233954104654,
      "loss": 2.7355,
      "step": 74083
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8552786111831665,
      "learning_rate": 0.00045951993107917494,
      "loss": 3.1441,
      "step": 74084
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5592862367630005,
      "learning_rate": 0.00045951646671822655,
      "loss": 2.8595,
      "step": 74085
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.314438581466675,
      "learning_rate": 0.0004595130023276211,
      "loss": 3.1348,
      "step": 74086
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.316037178039551,
      "learning_rate": 0.0004595095379073592,
      "loss": 3.0061,
      "step": 74087
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.3541589975357056,
      "learning_rate": 0.0004595060734574415,
      "loss": 2.9456,
      "step": 74088
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5563774108886719,
      "learning_rate": 0.00045950260897786855,
      "loss": 2.8587,
      "step": 74089
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6307175159454346,
      "learning_rate": 0.00045949914446864103,
      "loss": 3.1303,
      "step": 74090
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6610584259033203,
      "learning_rate": 0.00045949567992975966,
      "loss": 3.0685,
      "step": 74091
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7780354022979736,
      "learning_rate": 0.00045949221536122493,
      "loss": 3.0937,
      "step": 74092
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8459479808807373,
      "learning_rate": 0.00045948875076303765,
      "loss": 2.8481,
      "step": 74093
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.6428868770599365,
      "learning_rate": 0.00045948528613519847,
      "loss": 3.0911,
      "step": 74094
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3294947147369385,
      "learning_rate": 0.00045948182147770793,
      "loss": 2.9242,
      "step": 74095
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7359601259231567,
      "learning_rate": 0.00045947835679056664,
      "loss": 3.2084,
      "step": 74096
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.9200115203857422,
      "learning_rate": 0.00045947489207377534,
      "loss": 3.0396,
      "step": 74097
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.8682228326797485,
      "learning_rate": 0.0004594714273273348,
      "loss": 2.9105,
      "step": 74098
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.5797607898712158,
      "learning_rate": 0.0004594679625512453,
      "loss": 3.0549,
      "step": 74099
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7778788805007935,
      "learning_rate": 0.00045946449774550776,
      "loss": 2.904,
      "step": 74100
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.110733985900879,
      "learning_rate": 0.0004594610329101229,
      "loss": 2.8205,
      "step": 74101
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.464991569519043,
      "learning_rate": 0.0004594575680450911,
      "loss": 3.0381,
      "step": 74102
    },
    {
      "epoch": 0.96,
      "grad_norm": 3.3724420070648193,
      "learning_rate": 0.000459454103150413,
      "loss": 3.0519,
      "step": 74103
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.3503541946411133,
      "learning_rate": 0.0004594506382260895,
      "loss": 3.0866,
      "step": 74104
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7426936626434326,
      "learning_rate": 0.00045944717327212124,
      "loss": 2.8914,
      "step": 74105
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.0297677516937256,
      "learning_rate": 0.0004594437082885085,
      "loss": 3.0344,
      "step": 74106
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.7036733627319336,
      "learning_rate": 0.00045944024327525235,
      "loss": 2.8963,
      "step": 74107
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.916924238204956,
      "learning_rate": 0.0004594367782323531,
      "loss": 3.1062,
      "step": 74108
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2358434200286865,
      "learning_rate": 0.0004594333131598117,
      "loss": 3.1164,
      "step": 74109
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.883018970489502,
      "learning_rate": 0.0004594298480576286,
      "loss": 2.7419,
      "step": 74110
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.9580917358398438,
      "learning_rate": 0.00045942638292580443,
      "loss": 3.0727,
      "step": 74111
    },
    {
      "epoch": 0.96,
      "grad_norm": 1.357692837715149,
      "learning_rate": 0.00045942291776433986,
      "loss": 3.1008,
      "step": 74112
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.987895965576172,
      "learning_rate": 0.00045941945257323564,
      "loss": 2.692,
      "step": 74113
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.030444622039795,
      "learning_rate": 0.00045941598735249226,
      "loss": 3.1821,
      "step": 74114
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.117607831954956,
      "learning_rate": 0.00045941252210211054,
      "loss": 3.0885,
      "step": 74115
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.503835678100586,
      "learning_rate": 0.00045940905682209096,
      "loss": 2.9563,
      "step": 74116
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4276468753814697,
      "learning_rate": 0.00045940559151243416,
      "loss": 3.0619,
      "step": 74117
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5523871183395386,
      "learning_rate": 0.00045940212617314103,
      "loss": 3.2087,
      "step": 74118
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8445137739181519,
      "learning_rate": 0.0004593986608042119,
      "loss": 3.1494,
      "step": 74119
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.1394853591918945,
      "learning_rate": 0.0004593951954056475,
      "loss": 3.0159,
      "step": 74120
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7043027877807617,
      "learning_rate": 0.0004593917299774487,
      "loss": 3.0035,
      "step": 74121
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3905229568481445,
      "learning_rate": 0.00045938826451961577,
      "loss": 2.8729,
      "step": 74122
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6276881694793701,
      "learning_rate": 0.00045938479903214965,
      "loss": 2.8577,
      "step": 74123
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5920614004135132,
      "learning_rate": 0.0004593813335150509,
      "loss": 2.8528,
      "step": 74124
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4867799282073975,
      "learning_rate": 0.0004593778679683201,
      "loss": 2.8595,
      "step": 74125
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4240092039108276,
      "learning_rate": 0.00045937440239195805,
      "loss": 2.8579,
      "step": 74126
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.193113327026367,
      "learning_rate": 0.0004593709367859653,
      "loss": 3.1149,
      "step": 74127
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8561617136001587,
      "learning_rate": 0.0004593674711503424,
      "loss": 3.1272,
      "step": 74128
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9244030714035034,
      "learning_rate": 0.00045936400548509,
      "loss": 3.0061,
      "step": 74129
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.583819031715393,
      "learning_rate": 0.0004593605397902089,
      "loss": 2.8025,
      "step": 74130
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6583384275436401,
      "learning_rate": 0.00045935707406569967,
      "loss": 2.9575,
      "step": 74131
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5202007293701172,
      "learning_rate": 0.000459353608311563,
      "loss": 2.7946,
      "step": 74132
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7364712953567505,
      "learning_rate": 0.0004593501425277995,
      "loss": 2.9837,
      "step": 74133
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5635673999786377,
      "learning_rate": 0.00045934667671440973,
      "loss": 2.9634,
      "step": 74134
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9143120050430298,
      "learning_rate": 0.00045934321087139437,
      "loss": 3.0628,
      "step": 74135
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7044214010238647,
      "learning_rate": 0.0004593397449987542,
      "loss": 3.2194,
      "step": 74136
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.4951469898223877,
      "learning_rate": 0.00045933627909648976,
      "loss": 3.0085,
      "step": 74137
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.0189125537872314,
      "learning_rate": 0.0004593328131646016,
      "loss": 2.9061,
      "step": 74138
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7650620937347412,
      "learning_rate": 0.0004593293472030906,
      "loss": 3.0727,
      "step": 74139
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5743008852005005,
      "learning_rate": 0.00045932588121195715,
      "loss": 3.1754,
      "step": 74140
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1404333114624023,
      "learning_rate": 0.00045932241519120197,
      "loss": 3.2046,
      "step": 74141
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8783992528915405,
      "learning_rate": 0.0004593189491408259,
      "loss": 3.0881,
      "step": 74142
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5838719606399536,
      "learning_rate": 0.0004593154830608293,
      "loss": 2.8335,
      "step": 74143
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.942545771598816,
      "learning_rate": 0.000459312016951213,
      "loss": 2.8433,
      "step": 74144
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0166542530059814,
      "learning_rate": 0.00045930855081197764,
      "loss": 3.0955,
      "step": 74145
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9499295949935913,
      "learning_rate": 0.00045930508464312375,
      "loss": 3.2138,
      "step": 74146
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2126548290252686,
      "learning_rate": 0.00045930161844465206,
      "loss": 2.8425,
      "step": 74147
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7217274904251099,
      "learning_rate": 0.00045929815221656316,
      "loss": 2.9703,
      "step": 74148
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5747365951538086,
      "learning_rate": 0.00045929468595885765,
      "loss": 3.1689,
      "step": 74149
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7427223920822144,
      "learning_rate": 0.00045929121967153635,
      "loss": 3.1993,
      "step": 74150
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9991955757141113,
      "learning_rate": 0.0004592877533545999,
      "loss": 2.9619,
      "step": 74151
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5831378698349,
      "learning_rate": 0.0004592842870080487,
      "loss": 2.9039,
      "step": 74152
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.365100383758545,
      "learning_rate": 0.0004592808206318836,
      "loss": 2.8574,
      "step": 74153
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1519887447357178,
      "learning_rate": 0.0004592773542261052,
      "loss": 2.8355,
      "step": 74154
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4270765781402588,
      "learning_rate": 0.0004592738877907141,
      "loss": 3.1201,
      "step": 74155
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.9724690914154053,
      "learning_rate": 0.000459270421325711,
      "loss": 2.9795,
      "step": 74156
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.5837137699127197,
      "learning_rate": 0.0004592669548310965,
      "loss": 2.7469,
      "step": 74157
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7420403957366943,
      "learning_rate": 0.0004592634883068713,
      "loss": 2.8772,
      "step": 74158
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8200613260269165,
      "learning_rate": 0.00045926002175303597,
      "loss": 3.0893,
      "step": 74159
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6062970161437988,
      "learning_rate": 0.0004592565551695913,
      "loss": 2.8919,
      "step": 74160
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7021528482437134,
      "learning_rate": 0.00045925308855653766,
      "loss": 2.7652,
      "step": 74161
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5282080173492432,
      "learning_rate": 0.000459249621913876,
      "loss": 3.0907,
      "step": 74162
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6756561994552612,
      "learning_rate": 0.0004592461552416068,
      "loss": 3.1718,
      "step": 74163
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.655804991722107,
      "learning_rate": 0.0004592426885397307,
      "loss": 2.9829,
      "step": 74164
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4906671047210693,
      "learning_rate": 0.0004592392218082484,
      "loss": 3.2625,
      "step": 74165
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.5502846240997314,
      "learning_rate": 0.0004592357550471605,
      "loss": 2.9206,
      "step": 74166
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6147286891937256,
      "learning_rate": 0.0004592322882564677,
      "loss": 3.2028,
      "step": 74167
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3173983097076416,
      "learning_rate": 0.0004592288214361706,
      "loss": 2.996,
      "step": 74168
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.208566904067993,
      "learning_rate": 0.00045922535458626986,
      "loss": 2.979,
      "step": 74169
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5748540163040161,
      "learning_rate": 0.0004592218877067661,
      "loss": 3.0516,
      "step": 74170
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6512587070465088,
      "learning_rate": 0.00045921842079766007,
      "loss": 3.2488,
      "step": 74171
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.094791889190674,
      "learning_rate": 0.0004592149538589522,
      "loss": 3.0775,
      "step": 74172
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.491333246231079,
      "learning_rate": 0.00045921148689064337,
      "loss": 3.2483,
      "step": 74173
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.757041096687317,
      "learning_rate": 0.00045920801989273406,
      "loss": 2.8128,
      "step": 74174
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.689915657043457,
      "learning_rate": 0.00045920455286522496,
      "loss": 2.9334,
      "step": 74175
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9324359893798828,
      "learning_rate": 0.00045920108580811675,
      "loss": 2.9815,
      "step": 74176
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.1948562860488892,
      "learning_rate": 0.0004591976187214101,
      "loss": 3.1162,
      "step": 74177
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2257211208343506,
      "learning_rate": 0.0004591941516051055,
      "loss": 2.9428,
      "step": 74178
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.008527994155884,
      "learning_rate": 0.0004591906844592038,
      "loss": 3.1616,
      "step": 74179
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7320003509521484,
      "learning_rate": 0.00045918721728370547,
      "loss": 2.9716,
      "step": 74180
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3899948596954346,
      "learning_rate": 0.0004591837500786113,
      "loss": 3.1056,
      "step": 74181
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9400298595428467,
      "learning_rate": 0.0004591802828439219,
      "loss": 2.9352,
      "step": 74182
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7513258457183838,
      "learning_rate": 0.00045917681557963784,
      "loss": 3.0002,
      "step": 74183
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4945647716522217,
      "learning_rate": 0.0004591733482857597,
      "loss": 3.2508,
      "step": 74184
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.441982626914978,
      "learning_rate": 0.0004591698809622883,
      "loss": 3.0855,
      "step": 74185
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7784581184387207,
      "learning_rate": 0.00045916641360922424,
      "loss": 3.1791,
      "step": 74186
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9970340728759766,
      "learning_rate": 0.0004591629462265682,
      "loss": 3.0983,
      "step": 74187
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3920034170150757,
      "learning_rate": 0.00045915947881432066,
      "loss": 2.9791,
      "step": 74188
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3277833461761475,
      "learning_rate": 0.0004591560113724824,
      "loss": 3.1619,
      "step": 74189
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3240411281585693,
      "learning_rate": 0.000459152543901054,
      "loss": 3.0811,
      "step": 74190
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7836308479309082,
      "learning_rate": 0.00045914907640003626,
      "loss": 2.8133,
      "step": 74191
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4116921424865723,
      "learning_rate": 0.0004591456088694296,
      "loss": 3.0361,
      "step": 74192
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7250075340270996,
      "learning_rate": 0.00045914214130923477,
      "loss": 2.9349,
      "step": 74193
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2180986404418945,
      "learning_rate": 0.00045913867371945245,
      "loss": 2.9931,
      "step": 74194
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6843940019607544,
      "learning_rate": 0.0004591352061000832,
      "loss": 2.9558,
      "step": 74195
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2932565212249756,
      "learning_rate": 0.00045913173845112766,
      "loss": 3.0841,
      "step": 74196
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.031532049179077,
      "learning_rate": 0.0004591282707725867,
      "loss": 2.8224,
      "step": 74197
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.901210904121399,
      "learning_rate": 0.00045912480306446073,
      "loss": 2.8828,
      "step": 74198
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0138845443725586,
      "learning_rate": 0.0004591213353267503,
      "loss": 2.7233,
      "step": 74199
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3790948390960693,
      "learning_rate": 0.0004591178675594564,
      "loss": 3.1618,
      "step": 74200
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.614224910736084,
      "learning_rate": 0.0004591143997625794,
      "loss": 2.9398,
      "step": 74201
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1429810523986816,
      "learning_rate": 0.00045911093193612,
      "loss": 3.0543,
      "step": 74202
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.986976146697998,
      "learning_rate": 0.000459107464080079,
      "loss": 3.2817,
      "step": 74203
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.7544610500335693,
      "learning_rate": 0.0004591039961944568,
      "loss": 2.8463,
      "step": 74204
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5279772281646729,
      "learning_rate": 0.0004591005282792542,
      "loss": 3.0694,
      "step": 74205
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4700236320495605,
      "learning_rate": 0.00045909706033447185,
      "loss": 3.1333,
      "step": 74206
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0098519325256348,
      "learning_rate": 0.0004590935923601103,
      "loss": 2.9097,
      "step": 74207
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.606936454772949,
      "learning_rate": 0.00045909012435617027,
      "loss": 2.7875,
      "step": 74208
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5570521354675293,
      "learning_rate": 0.00045908665632265236,
      "loss": 2.8834,
      "step": 74209
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0287132263183594,
      "learning_rate": 0.0004590831882595572,
      "loss": 2.9049,
      "step": 74210
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.631690740585327,
      "learning_rate": 0.00045907972016688553,
      "loss": 3.2265,
      "step": 74211
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6127022504806519,
      "learning_rate": 0.000459076252044638,
      "loss": 2.8384,
      "step": 74212
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.310504913330078,
      "learning_rate": 0.000459072783892815,
      "loss": 3.1456,
      "step": 74213
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.15116286277771,
      "learning_rate": 0.00045906931571141756,
      "loss": 2.8355,
      "step": 74214
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.661614179611206,
      "learning_rate": 0.0004590658475004461,
      "loss": 3.0799,
      "step": 74215
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.576549768447876,
      "learning_rate": 0.00045906237925990125,
      "loss": 3.0861,
      "step": 74216
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5440019369125366,
      "learning_rate": 0.00045905891098978364,
      "loss": 3.0506,
      "step": 74217
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.693833351135254,
      "learning_rate": 0.0004590554426900941,
      "loss": 3.1115,
      "step": 74218
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3335405588150024,
      "learning_rate": 0.00045905197436083304,
      "loss": 3.1358,
      "step": 74219
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7906851768493652,
      "learning_rate": 0.0004590485060020013,
      "loss": 2.7466,
      "step": 74220
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6304622888565063,
      "learning_rate": 0.0004590450376135995,
      "loss": 3.0404,
      "step": 74221
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.231489419937134,
      "learning_rate": 0.00045904156919562806,
      "loss": 3.1121,
      "step": 74222
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0559067726135254,
      "learning_rate": 0.00045903810074808787,
      "loss": 3.2414,
      "step": 74223
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7276561260223389,
      "learning_rate": 0.0004590346322709796,
      "loss": 3.0496,
      "step": 74224
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8973697423934937,
      "learning_rate": 0.00045903116376430354,
      "loss": 2.8378,
      "step": 74225
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.415604591369629,
      "learning_rate": 0.00045902769522806076,
      "loss": 2.9122,
      "step": 74226
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5026341676712036,
      "learning_rate": 0.00045902422666225167,
      "loss": 3.0941,
      "step": 74227
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5253057479858398,
      "learning_rate": 0.00045902075806687705,
      "loss": 2.9874,
      "step": 74228
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.414808988571167,
      "learning_rate": 0.00045901728944193736,
      "loss": 3.1382,
      "step": 74229
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5686856508255005,
      "learning_rate": 0.0004590138207874335,
      "loss": 2.9953,
      "step": 74230
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7090543508529663,
      "learning_rate": 0.0004590103521033658,
      "loss": 3.1878,
      "step": 74231
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.974289059638977,
      "learning_rate": 0.00045900688338973514,
      "loss": 3.0381,
      "step": 74232
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5276206731796265,
      "learning_rate": 0.0004590034146465421,
      "loss": 3.0118,
      "step": 74233
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6805764436721802,
      "learning_rate": 0.00045899994587378747,
      "loss": 3.1456,
      "step": 74234
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6213021278381348,
      "learning_rate": 0.00045899647707147156,
      "loss": 2.8646,
      "step": 74235
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.706604242324829,
      "learning_rate": 0.00045899300823959525,
      "loss": 2.8775,
      "step": 74236
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6690036058425903,
      "learning_rate": 0.00045898953937815914,
      "loss": 3.0737,
      "step": 74237
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5241079330444336,
      "learning_rate": 0.0004589860704871639,
      "loss": 3.0023,
      "step": 74238
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7915788888931274,
      "learning_rate": 0.0004589826015666101,
      "loss": 3.0786,
      "step": 74239
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.701198697090149,
      "learning_rate": 0.0004589791326164985,
      "loss": 2.9907,
      "step": 74240
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.379011869430542,
      "learning_rate": 0.0004589756636368296,
      "loss": 2.9869,
      "step": 74241
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3539904356002808,
      "learning_rate": 0.00045897219462760416,
      "loss": 2.984,
      "step": 74242
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5798442363739014,
      "learning_rate": 0.00045896872558882283,
      "loss": 3.1066,
      "step": 74243
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9122923612594604,
      "learning_rate": 0.0004589652565204862,
      "loss": 2.9776,
      "step": 74244
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4877625703811646,
      "learning_rate": 0.00045896178742259493,
      "loss": 3.3818,
      "step": 74245
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7248374223709106,
      "learning_rate": 0.0004589583182951496,
      "loss": 2.926,
      "step": 74246
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5591827630996704,
      "learning_rate": 0.000458954849138151,
      "loss": 2.9514,
      "step": 74247
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0180375576019287,
      "learning_rate": 0.0004589513799515996,
      "loss": 3.1472,
      "step": 74248
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5785939693450928,
      "learning_rate": 0.00045894791073549616,
      "loss": 2.9244,
      "step": 74249
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.614078402519226,
      "learning_rate": 0.0004589444414898414,
      "loss": 2.9475,
      "step": 74250
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.499633550643921,
      "learning_rate": 0.00045894097221463575,
      "loss": 3.0404,
      "step": 74251
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.246927499771118,
      "learning_rate": 0.00045893750290988005,
      "loss": 2.8573,
      "step": 74252
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.848544955253601,
      "learning_rate": 0.00045893403357557486,
      "loss": 2.9541,
      "step": 74253
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8358654975891113,
      "learning_rate": 0.00045893056421172077,
      "loss": 2.8099,
      "step": 74254
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.134565830230713,
      "learning_rate": 0.0004589270948183186,
      "loss": 3.1549,
      "step": 74255
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6084035634994507,
      "learning_rate": 0.00045892362539536876,
      "loss": 2.9945,
      "step": 74256
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4443546533584595,
      "learning_rate": 0.00045892015594287204,
      "loss": 3.1636,
      "step": 74257
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5259360074996948,
      "learning_rate": 0.0004589166864608291,
      "loss": 3.0596,
      "step": 74258
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.64651620388031,
      "learning_rate": 0.00045891321694924056,
      "loss": 2.7592,
      "step": 74259
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.584883213043213,
      "learning_rate": 0.00045890974740810705,
      "loss": 3.0588,
      "step": 74260
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4281752109527588,
      "learning_rate": 0.0004589062778374293,
      "loss": 3.0824,
      "step": 74261
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4922200441360474,
      "learning_rate": 0.0004589028082372077,
      "loss": 3.1529,
      "step": 74262
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6454423666000366,
      "learning_rate": 0.0004588993386074431,
      "loss": 3.2505,
      "step": 74263
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.602341651916504,
      "learning_rate": 0.0004588958689481363,
      "loss": 3.2449,
      "step": 74264
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7986705303192139,
      "learning_rate": 0.0004588923992592875,
      "loss": 2.92,
      "step": 74265
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6753681898117065,
      "learning_rate": 0.0004588889295408978,
      "loss": 3.0761,
      "step": 74266
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6877546310424805,
      "learning_rate": 0.0004588854597929675,
      "loss": 2.7591,
      "step": 74267
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6620583534240723,
      "learning_rate": 0.0004588819900154975,
      "loss": 2.892,
      "step": 74268
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6224349737167358,
      "learning_rate": 0.00045887852020848834,
      "loss": 3.0525,
      "step": 74269
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0791923999786377,
      "learning_rate": 0.0004588750503719406,
      "loss": 3.0629,
      "step": 74270
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.723805546760559,
      "learning_rate": 0.000458871580505855,
      "loss": 2.7801,
      "step": 74271
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9970622062683105,
      "learning_rate": 0.00045886811061023215,
      "loss": 3.2112,
      "step": 74272
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6799755096435547,
      "learning_rate": 0.00045886464068507283,
      "loss": 3.0838,
      "step": 74273
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1035120487213135,
      "learning_rate": 0.0004588611707303775,
      "loss": 3.2914,
      "step": 74274
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7033244371414185,
      "learning_rate": 0.0004588577007461469,
      "loss": 3.1353,
      "step": 74275
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.826401948928833,
      "learning_rate": 0.00045885423073238163,
      "loss": 3.2777,
      "step": 74276
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.248666524887085,
      "learning_rate": 0.0004588507606890823,
      "loss": 3.058,
      "step": 74277
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3049609661102295,
      "learning_rate": 0.00045884729061624967,
      "loss": 3.1169,
      "step": 74278
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5454872846603394,
      "learning_rate": 0.0004588438205138844,
      "loss": 3.2494,
      "step": 74279
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.574191093444824,
      "learning_rate": 0.00045884035038198695,
      "loss": 2.879,
      "step": 74280
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0144753456115723,
      "learning_rate": 0.0004588368802205582,
      "loss": 3.1462,
      "step": 74281
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9166862964630127,
      "learning_rate": 0.0004588334100295986,
      "loss": 3.3055,
      "step": 74282
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.137716293334961,
      "learning_rate": 0.0004588299398091089,
      "loss": 2.9065,
      "step": 74283
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.773474931716919,
      "learning_rate": 0.0004588264695590897,
      "loss": 2.9983,
      "step": 74284
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.0989797115325928,
      "learning_rate": 0.0004588229992795417,
      "loss": 3.0664,
      "step": 74285
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7102347612380981,
      "learning_rate": 0.0004588195289704654,
      "loss": 3.0079,
      "step": 74286
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.470205545425415,
      "learning_rate": 0.00045881605863186165,
      "loss": 3.0549,
      "step": 74287
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1957294940948486,
      "learning_rate": 0.000458812588263731,
      "loss": 3.1749,
      "step": 74288
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.028228759765625,
      "learning_rate": 0.000458809117866074,
      "loss": 3.0526,
      "step": 74289
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6577626466751099,
      "learning_rate": 0.00045880564743889146,
      "loss": 2.9711,
      "step": 74290
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.7618408203125,
      "learning_rate": 0.00045880217698218397,
      "loss": 2.9736,
      "step": 74291
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.597142457962036,
      "learning_rate": 0.00045879870649595213,
      "loss": 2.811,
      "step": 74292
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8904439210891724,
      "learning_rate": 0.0004587952359801966,
      "loss": 2.8683,
      "step": 74293
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5934383869171143,
      "learning_rate": 0.0004587917654349181,
      "loss": 2.9434,
      "step": 74294
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8089607954025269,
      "learning_rate": 0.0004587882948601171,
      "loss": 2.9871,
      "step": 74295
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.2195684909820557,
      "learning_rate": 0.00045878482425579443,
      "loss": 3.1366,
      "step": 74296
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.14141583442688,
      "learning_rate": 0.00045878135362195074,
      "loss": 3.1727,
      "step": 74297
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.7710323333740234,
      "learning_rate": 0.0004587778829585864,
      "loss": 3.146,
      "step": 74298
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.9049313068389893,
      "learning_rate": 0.0004587744122657024,
      "loss": 2.993,
      "step": 74299
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.3012781143188477,
      "learning_rate": 0.00045877094154329916,
      "loss": 3.1446,
      "step": 74300
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5815844535827637,
      "learning_rate": 0.0004587674707913775,
      "loss": 2.9467,
      "step": 74301
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.6627912521362305,
      "learning_rate": 0.0004587640000099379,
      "loss": 3.1284,
      "step": 74302
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.393167734146118,
      "learning_rate": 0.0004587605291989811,
      "loss": 3.2927,
      "step": 74303
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2903478145599365,
      "learning_rate": 0.0004587570583585078,
      "loss": 3.1515,
      "step": 74304
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8655747175216675,
      "learning_rate": 0.00045875358748851845,
      "loss": 2.7975,
      "step": 74305
    },
    {
      "epoch": 0.97,
      "grad_norm": 4.342624664306641,
      "learning_rate": 0.00045875011658901384,
      "loss": 3.0343,
      "step": 74306
    },
    {
      "epoch": 0.97,
      "grad_norm": 4.026228904724121,
      "learning_rate": 0.00045874664565999466,
      "loss": 3.0803,
      "step": 74307
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.649836540222168,
      "learning_rate": 0.00045874317470146137,
      "loss": 2.9046,
      "step": 74308
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.7449207305908203,
      "learning_rate": 0.00045873970371341474,
      "loss": 2.9213,
      "step": 74309
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.4487807750701904,
      "learning_rate": 0.0004587362326958555,
      "loss": 2.9628,
      "step": 74310
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.5551953315734863,
      "learning_rate": 0.0004587327616487841,
      "loss": 3.1368,
      "step": 74311
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.106860399246216,
      "learning_rate": 0.0004587292905722013,
      "loss": 3.0028,
      "step": 74312
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.9495952129364014,
      "learning_rate": 0.00045872581946610776,
      "loss": 2.8722,
      "step": 74313
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.0292835235595703,
      "learning_rate": 0.0004587223483305041,
      "loss": 2.8822,
      "step": 74314
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7756627798080444,
      "learning_rate": 0.0004587188771653909,
      "loss": 3.0658,
      "step": 74315
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6004647016525269,
      "learning_rate": 0.000458715405970769,
      "loss": 3.0207,
      "step": 74316
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.736894130706787,
      "learning_rate": 0.00045871193474663875,
      "loss": 3.0839,
      "step": 74317
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.867567300796509,
      "learning_rate": 0.00045870846349300103,
      "loss": 2.904,
      "step": 74318
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1798324584960938,
      "learning_rate": 0.00045870499220985637,
      "loss": 2.9885,
      "step": 74319
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3888278007507324,
      "learning_rate": 0.0004587015208972055,
      "loss": 3.0245,
      "step": 74320
    },
    {
      "epoch": 0.97,
      "grad_norm": 4.350532054901123,
      "learning_rate": 0.000458698049555049,
      "loss": 2.7679,
      "step": 74321
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3736112117767334,
      "learning_rate": 0.0004586945781833876,
      "loss": 3.0602,
      "step": 74322
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5595157146453857,
      "learning_rate": 0.0004586911067822218,
      "loss": 3.1379,
      "step": 74323
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8635410070419312,
      "learning_rate": 0.0004586876353515523,
      "loss": 3.05,
      "step": 74324
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7115190029144287,
      "learning_rate": 0.0004586841638913799,
      "loss": 3.1654,
      "step": 74325
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6356467008590698,
      "learning_rate": 0.00045868069240170503,
      "loss": 3.2253,
      "step": 74326
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9772247076034546,
      "learning_rate": 0.00045867722088252847,
      "loss": 2.9842,
      "step": 74327
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6578912734985352,
      "learning_rate": 0.0004586737493338508,
      "loss": 3.2527,
      "step": 74328
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6281404495239258,
      "learning_rate": 0.0004586702777556726,
      "loss": 3.0459,
      "step": 74329
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0448052883148193,
      "learning_rate": 0.00045866680614799476,
      "loss": 3.2005,
      "step": 74330
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8349422216415405,
      "learning_rate": 0.0004586633345108177,
      "loss": 3.0307,
      "step": 74331
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7784972190856934,
      "learning_rate": 0.00045865986284414204,
      "loss": 3.0413,
      "step": 74332
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.138087511062622,
      "learning_rate": 0.0004586563911479686,
      "loss": 3.031,
      "step": 74333
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9925545454025269,
      "learning_rate": 0.00045865291942229794,
      "loss": 3.0363,
      "step": 74334
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.061422824859619,
      "learning_rate": 0.0004586494476671307,
      "loss": 2.9836,
      "step": 74335
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4919613599777222,
      "learning_rate": 0.0004586459758824675,
      "loss": 2.9112,
      "step": 74336
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.69203782081604,
      "learning_rate": 0.0004586425040683091,
      "loss": 3.1338,
      "step": 74337
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0025763511657715,
      "learning_rate": 0.000458639032224656,
      "loss": 2.8737,
      "step": 74338
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5363867282867432,
      "learning_rate": 0.0004586355603515089,
      "loss": 3.1675,
      "step": 74339
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.220900774002075,
      "learning_rate": 0.0004586320884488685,
      "loss": 3.0549,
      "step": 74340
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5400218963623047,
      "learning_rate": 0.0004586286165167354,
      "loss": 3.1605,
      "step": 74341
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.5945608615875244,
      "learning_rate": 0.00045862514455511017,
      "loss": 3.1008,
      "step": 74342
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3763010501861572,
      "learning_rate": 0.00045862167256399363,
      "loss": 3.0525,
      "step": 74343
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.337841272354126,
      "learning_rate": 0.00045861820054338625,
      "loss": 2.773,
      "step": 74344
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8085285425186157,
      "learning_rate": 0.0004586147284932888,
      "loss": 2.9587,
      "step": 74345
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4711408615112305,
      "learning_rate": 0.0004586112564137019,
      "loss": 2.8765,
      "step": 74346
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7017440795898438,
      "learning_rate": 0.0004586077843046261,
      "loss": 2.838,
      "step": 74347
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9643815755844116,
      "learning_rate": 0.0004586043121660622,
      "loss": 3.0226,
      "step": 74348
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6488702297210693,
      "learning_rate": 0.0004586008399980107,
      "loss": 2.9153,
      "step": 74349
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.389753580093384,
      "learning_rate": 0.00045859736780047225,
      "loss": 3.0118,
      "step": 74350
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.9560070037841797,
      "learning_rate": 0.0004585938955734477,
      "loss": 2.9608,
      "step": 74351
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.912053108215332,
      "learning_rate": 0.00045859042331693753,
      "loss": 3.0618,
      "step": 74352
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8456995487213135,
      "learning_rate": 0.00045858695103094234,
      "loss": 3.0805,
      "step": 74353
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.521885871887207,
      "learning_rate": 0.0004585834787154628,
      "loss": 3.0963,
      "step": 74354
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.2431397438049316,
      "learning_rate": 0.00045858000637049977,
      "loss": 2.9741,
      "step": 74355
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.1768999099731445,
      "learning_rate": 0.0004585765339960536,
      "loss": 2.7879,
      "step": 74356
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.5866644382476807,
      "learning_rate": 0.00045857306159212505,
      "loss": 2.9761,
      "step": 74357
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2524044513702393,
      "learning_rate": 0.00045856958915871474,
      "loss": 2.9801,
      "step": 74358
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.57974910736084,
      "learning_rate": 0.00045856611669582346,
      "loss": 3.4389,
      "step": 74359
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.5475013256073,
      "learning_rate": 0.0004585626442034517,
      "loss": 3.1402,
      "step": 74360
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.140408992767334,
      "learning_rate": 0.0004585591716816001,
      "loss": 2.874,
      "step": 74361
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.100548505783081,
      "learning_rate": 0.00045855569913026944,
      "loss": 3.0482,
      "step": 74362
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.680453062057495,
      "learning_rate": 0.00045855222654946027,
      "loss": 2.9887,
      "step": 74363
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9018430709838867,
      "learning_rate": 0.00045854875393917317,
      "loss": 2.8339,
      "step": 74364
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6577280759811401,
      "learning_rate": 0.00045854528129940893,
      "loss": 2.9871,
      "step": 74365
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.6612818241119385,
      "learning_rate": 0.0004585418086301681,
      "loss": 3.1723,
      "step": 74366
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.4291419982910156,
      "learning_rate": 0.0004585383359314514,
      "loss": 3.0252,
      "step": 74367
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8899050951004028,
      "learning_rate": 0.0004585348632032594,
      "loss": 2.9309,
      "step": 74368
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9858770370483398,
      "learning_rate": 0.00045853139044559277,
      "loss": 3.0083,
      "step": 74369
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.60505211353302,
      "learning_rate": 0.0004585279176584522,
      "loss": 3.1412,
      "step": 74370
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.187293767929077,
      "learning_rate": 0.00045852444484183825,
      "loss": 3.1139,
      "step": 74371
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4501235485076904,
      "learning_rate": 0.0004585209719957516,
      "loss": 2.9848,
      "step": 74372
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7623952627182007,
      "learning_rate": 0.00045851749912019296,
      "loss": 2.9203,
      "step": 74373
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.9488422870635986,
      "learning_rate": 0.0004585140262151629,
      "loss": 2.8437,
      "step": 74374
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.43369722366333,
      "learning_rate": 0.0004585105532806621,
      "loss": 2.8955,
      "step": 74375
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4254227876663208,
      "learning_rate": 0.00045850708031669116,
      "loss": 3.0189,
      "step": 74376
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.028221607208252,
      "learning_rate": 0.00045850360732325085,
      "loss": 3.1556,
      "step": 74377
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.4753291606903076,
      "learning_rate": 0.0004585001343003416,
      "loss": 3.1175,
      "step": 74378
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4249513149261475,
      "learning_rate": 0.00045849666124796434,
      "loss": 2.865,
      "step": 74379
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.994093418121338,
      "learning_rate": 0.0004584931881661195,
      "loss": 3.1198,
      "step": 74380
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.4513187408447266,
      "learning_rate": 0.00045848971505480765,
      "loss": 3.0732,
      "step": 74381
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8027373552322388,
      "learning_rate": 0.00045848624191402974,
      "loss": 3.003,
      "step": 74382
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4668364524841309,
      "learning_rate": 0.0004584827687437862,
      "loss": 3.2471,
      "step": 74383
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.6181631088256836,
      "learning_rate": 0.0004584792955440777,
      "loss": 3.1566,
      "step": 74384
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.3744754791259766,
      "learning_rate": 0.0004584758223149049,
      "loss": 3.0541,
      "step": 74385
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.0666987895965576,
      "learning_rate": 0.00045847234905626845,
      "loss": 2.8236,
      "step": 74386
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5272881984710693,
      "learning_rate": 0.00045846887576816905,
      "loss": 2.7645,
      "step": 74387
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2431013584136963,
      "learning_rate": 0.00045846540245060726,
      "loss": 2.9243,
      "step": 74388
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.4478163719177246,
      "learning_rate": 0.0004584619291035838,
      "loss": 3.2255,
      "step": 74389
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6828476190567017,
      "learning_rate": 0.0004584584557270992,
      "loss": 3.0849,
      "step": 74390
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5051789283752441,
      "learning_rate": 0.00045845498232115423,
      "loss": 3.0706,
      "step": 74391
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.8460235595703125,
      "learning_rate": 0.0004584515088857494,
      "loss": 3.0499,
      "step": 74392
    },
    {
      "epoch": 0.97,
      "grad_norm": 4.186798572540283,
      "learning_rate": 0.0004584480354208856,
      "loss": 2.9546,
      "step": 74393
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6578282117843628,
      "learning_rate": 0.0004584445619265632,
      "loss": 2.9793,
      "step": 74394
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.108164072036743,
      "learning_rate": 0.00045844108840278306,
      "loss": 2.7959,
      "step": 74395
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3608741760253906,
      "learning_rate": 0.00045843761484954564,
      "loss": 3.0322,
      "step": 74396
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.9497385025024414,
      "learning_rate": 0.0004584341412668517,
      "loss": 3.0407,
      "step": 74397
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.624497652053833,
      "learning_rate": 0.0004584306676547019,
      "loss": 3.0116,
      "step": 74398
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8536497354507446,
      "learning_rate": 0.0004584271940130968,
      "loss": 3.1177,
      "step": 74399
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8119159936904907,
      "learning_rate": 0.00045842372034203706,
      "loss": 2.8202,
      "step": 74400
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.457329511642456,
      "learning_rate": 0.0004584202466415235,
      "loss": 3.1111,
      "step": 74401
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.41787052154541,
      "learning_rate": 0.00045841677291155654,
      "loss": 2.9349,
      "step": 74402
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6631397008895874,
      "learning_rate": 0.00045841329915213684,
      "loss": 3.0835,
      "step": 74403
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3755775690078735,
      "learning_rate": 0.0004584098253632652,
      "loss": 3.1818,
      "step": 74404
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.4815926551818848,
      "learning_rate": 0.0004584063515449422,
      "loss": 2.9058,
      "step": 74405
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9281237125396729,
      "learning_rate": 0.0004584028776971685,
      "loss": 2.9493,
      "step": 74406
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4250106811523438,
      "learning_rate": 0.0004583994038199446,
      "loss": 3.0432,
      "step": 74407
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4658057689666748,
      "learning_rate": 0.0004583959299132714,
      "loss": 2.7552,
      "step": 74408
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5714187622070312,
      "learning_rate": 0.0004583924559771493,
      "loss": 3.1075,
      "step": 74409
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0921967029571533,
      "learning_rate": 0.0004583889820115791,
      "loss": 3.1861,
      "step": 74410
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3431379795074463,
      "learning_rate": 0.00045838550801656134,
      "loss": 3.255,
      "step": 74411
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.013181209564209,
      "learning_rate": 0.0004583820339920967,
      "loss": 3.1433,
      "step": 74412
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.877172589302063,
      "learning_rate": 0.00045837855993818605,
      "loss": 3.1943,
      "step": 74413
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5420020818710327,
      "learning_rate": 0.0004583750858548296,
      "loss": 2.8775,
      "step": 74414
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4924743175506592,
      "learning_rate": 0.00045837161174202835,
      "loss": 2.9968,
      "step": 74415
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7955774068832397,
      "learning_rate": 0.0004583681375997828,
      "loss": 3.0278,
      "step": 74416
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4647072553634644,
      "learning_rate": 0.00045836466342809366,
      "loss": 3.1059,
      "step": 74417
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5328584909439087,
      "learning_rate": 0.0004583611892269615,
      "loss": 3.1332,
      "step": 74418
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6736502647399902,
      "learning_rate": 0.000458357714996387,
      "loss": 2.8902,
      "step": 74419
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7179824113845825,
      "learning_rate": 0.0004583542407363709,
      "loss": 2.7181,
      "step": 74420
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6080660820007324,
      "learning_rate": 0.0004583507664469136,
      "loss": 3.0041,
      "step": 74421
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3204832077026367,
      "learning_rate": 0.00045834729212801607,
      "loss": 2.8523,
      "step": 74422
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7189266681671143,
      "learning_rate": 0.0004583438177796786,
      "loss": 3.0757,
      "step": 74423
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6694334745407104,
      "learning_rate": 0.00045834034340190224,
      "loss": 2.8975,
      "step": 74424
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.539198398590088,
      "learning_rate": 0.0004583368689946873,
      "loss": 3.0138,
      "step": 74425
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.560422420501709,
      "learning_rate": 0.0004583333945580346,
      "loss": 2.904,
      "step": 74426
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4369951486587524,
      "learning_rate": 0.0004583299200919447,
      "loss": 3.0441,
      "step": 74427
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.7405643463134766,
      "learning_rate": 0.0004583264455964182,
      "loss": 3.0973,
      "step": 74428
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.276590347290039,
      "learning_rate": 0.0004583229710714559,
      "loss": 3.1479,
      "step": 74429
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.455594778060913,
      "learning_rate": 0.00045831949651705837,
      "loss": 2.9543,
      "step": 74430
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0953636169433594,
      "learning_rate": 0.0004583160219332263,
      "loss": 3.3219,
      "step": 74431
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2295877933502197,
      "learning_rate": 0.0004583125473199602,
      "loss": 2.9709,
      "step": 74432
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8334325551986694,
      "learning_rate": 0.0004583090726772609,
      "loss": 3.0151,
      "step": 74433
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.4551303386688232,
      "learning_rate": 0.0004583055980051289,
      "loss": 3.0183,
      "step": 74434
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2513515949249268,
      "learning_rate": 0.00045830212330356495,
      "loss": 3.1203,
      "step": 74435
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5466495752334595,
      "learning_rate": 0.0004582986485725696,
      "loss": 3.3338,
      "step": 74436
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7481476068496704,
      "learning_rate": 0.00045829517381214355,
      "loss": 3.1346,
      "step": 74437
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.128673553466797,
      "learning_rate": 0.0004582916990222874,
      "loss": 2.6834,
      "step": 74438
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0642149448394775,
      "learning_rate": 0.0004582882242030019,
      "loss": 2.9007,
      "step": 74439
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0829389095306396,
      "learning_rate": 0.00045828474935428754,
      "loss": 3.0912,
      "step": 74440
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9864377975463867,
      "learning_rate": 0.00045828127447614526,
      "loss": 2.9558,
      "step": 74441
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6350016593933105,
      "learning_rate": 0.0004582777995685753,
      "loss": 3.0221,
      "step": 74442
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.752179741859436,
      "learning_rate": 0.0004582743246315786,
      "loss": 2.9363,
      "step": 74443
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.391505718231201,
      "learning_rate": 0.00045827084966515577,
      "loss": 3.0285,
      "step": 74444
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.348889112472534,
      "learning_rate": 0.0004582673746693073,
      "loss": 2.9639,
      "step": 74445
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6631237268447876,
      "learning_rate": 0.00045826389964403395,
      "loss": 3.0126,
      "step": 74446
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5156149864196777,
      "learning_rate": 0.00045826042458933634,
      "loss": 2.9143,
      "step": 74447
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.8761303424835205,
      "learning_rate": 0.0004582569495052152,
      "loss": 2.9896,
      "step": 74448
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.7238874435424805,
      "learning_rate": 0.00045825347439167107,
      "loss": 3.0338,
      "step": 74449
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3729491233825684,
      "learning_rate": 0.00045824999924870465,
      "loss": 3.1533,
      "step": 74450
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9163140058517456,
      "learning_rate": 0.0004582465240763165,
      "loss": 3.2304,
      "step": 74451
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.034562349319458,
      "learning_rate": 0.00045824304887450736,
      "loss": 2.9414,
      "step": 74452
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5027154684066772,
      "learning_rate": 0.00045823957364327783,
      "loss": 3.1918,
      "step": 74453
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5391746759414673,
      "learning_rate": 0.00045823609838262864,
      "loss": 3.043,
      "step": 74454
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8263531923294067,
      "learning_rate": 0.0004582326230925604,
      "loss": 2.9003,
      "step": 74455
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.032871723175049,
      "learning_rate": 0.0004582291477730736,
      "loss": 3.192,
      "step": 74456
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.571131706237793,
      "learning_rate": 0.0004582256724241691,
      "loss": 3.1837,
      "step": 74457
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5242643356323242,
      "learning_rate": 0.0004582221970458475,
      "loss": 2.8548,
      "step": 74458
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6873122453689575,
      "learning_rate": 0.0004582187216381094,
      "loss": 3.0399,
      "step": 74459
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4945619106292725,
      "learning_rate": 0.00045821524620095534,
      "loss": 3.0449,
      "step": 74460
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8439527750015259,
      "learning_rate": 0.0004582117707343861,
      "loss": 2.9679,
      "step": 74461
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6209449768066406,
      "learning_rate": 0.00045820829523840245,
      "loss": 2.8474,
      "step": 74462
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3119252920150757,
      "learning_rate": 0.00045820481971300476,
      "loss": 2.9993,
      "step": 74463
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5732685327529907,
      "learning_rate": 0.0004582013441581938,
      "loss": 3.0026,
      "step": 74464
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.990899682044983,
      "learning_rate": 0.0004581978685739703,
      "loss": 3.0243,
      "step": 74465
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7041794061660767,
      "learning_rate": 0.00045819439296033483,
      "loss": 2.9633,
      "step": 74466
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0584652423858643,
      "learning_rate": 0.00045819091731728797,
      "loss": 2.9142,
      "step": 74467
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6320778131484985,
      "learning_rate": 0.0004581874416448305,
      "loss": 2.8451,
      "step": 74468
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5273367166519165,
      "learning_rate": 0.00045818396594296296,
      "loss": 2.8885,
      "step": 74469
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4487848281860352,
      "learning_rate": 0.00045818049021168606,
      "loss": 2.9912,
      "step": 74470
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7585755586624146,
      "learning_rate": 0.0004581770144510004,
      "loss": 3.1658,
      "step": 74471
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5390201807022095,
      "learning_rate": 0.0004581735386609066,
      "loss": 2.9593,
      "step": 74472
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1841037273406982,
      "learning_rate": 0.0004581700628414054,
      "loss": 2.8117,
      "step": 74473
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9027515649795532,
      "learning_rate": 0.00045816658699249745,
      "loss": 3.3369,
      "step": 74474
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9317928552627563,
      "learning_rate": 0.0004581631111141833,
      "loss": 3.0654,
      "step": 74475
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.493194341659546,
      "learning_rate": 0.00045815963520646363,
      "loss": 3.0252,
      "step": 74476
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6021385192871094,
      "learning_rate": 0.00045815615926933913,
      "loss": 2.9146,
      "step": 74477
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5170974731445312,
      "learning_rate": 0.0004581526833028104,
      "loss": 3.0355,
      "step": 74478
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.501509189605713,
      "learning_rate": 0.0004581492073068781,
      "loss": 3.2257,
      "step": 74479
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1250879764556885,
      "learning_rate": 0.0004581457312815429,
      "loss": 3.0041,
      "step": 74480
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7248647212982178,
      "learning_rate": 0.0004581422552268053,
      "loss": 3.0091,
      "step": 74481
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7200477123260498,
      "learning_rate": 0.0004581387791426662,
      "loss": 2.8353,
      "step": 74482
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.705663800239563,
      "learning_rate": 0.00045813530302912613,
      "loss": 3.0349,
      "step": 74483
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4729982614517212,
      "learning_rate": 0.0004581318268861856,
      "loss": 3.1135,
      "step": 74484
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.693282961845398,
      "learning_rate": 0.00045812835071384547,
      "loss": 3.1583,
      "step": 74485
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7232106924057007,
      "learning_rate": 0.00045812487451210636,
      "loss": 3.2295,
      "step": 74486
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.269594669342041,
      "learning_rate": 0.0004581213982809687,
      "loss": 2.9942,
      "step": 74487
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5590882301330566,
      "learning_rate": 0.00045811792202043333,
      "loss": 2.8021,
      "step": 74488
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8380848169326782,
      "learning_rate": 0.00045811444573050093,
      "loss": 2.7671,
      "step": 74489
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6519948244094849,
      "learning_rate": 0.000458110969411172,
      "loss": 3.1533,
      "step": 74490
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.220235586166382,
      "learning_rate": 0.00045810749306244723,
      "loss": 3.1357,
      "step": 74491
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6268433332443237,
      "learning_rate": 0.0004581040166843274,
      "loss": 2.8945,
      "step": 74492
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8658037185668945,
      "learning_rate": 0.00045810054027681295,
      "loss": 3.1918,
      "step": 74493
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6095105409622192,
      "learning_rate": 0.0004580970638399047,
      "loss": 3.2325,
      "step": 74494
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8614307641983032,
      "learning_rate": 0.00045809358737360317,
      "loss": 2.9831,
      "step": 74495
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6602381467819214,
      "learning_rate": 0.00045809011087790904,
      "loss": 3.1609,
      "step": 74496
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.86259126663208,
      "learning_rate": 0.000458086634352823,
      "loss": 2.7694,
      "step": 74497
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6270214319229126,
      "learning_rate": 0.00045808315779834576,
      "loss": 3.2772,
      "step": 74498
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4232771396636963,
      "learning_rate": 0.00045807968121447775,
      "loss": 3.0335,
      "step": 74499
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.071302890777588,
      "learning_rate": 0.0004580762046012197,
      "loss": 3.1106,
      "step": 74500
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.617978572845459,
      "learning_rate": 0.00045807272795857245,
      "loss": 2.9548,
      "step": 74501
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5630642175674438,
      "learning_rate": 0.00045806925128653647,
      "loss": 3.0165,
      "step": 74502
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5042529106140137,
      "learning_rate": 0.00045806577458511237,
      "loss": 2.7428,
      "step": 74503
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5939010381698608,
      "learning_rate": 0.00045806229785430087,
      "loss": 3.1069,
      "step": 74504
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.238783121109009,
      "learning_rate": 0.00045805882109410255,
      "loss": 2.8766,
      "step": 74505
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3812625408172607,
      "learning_rate": 0.0004580553443045182,
      "loss": 3.1063,
      "step": 74506
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9255728721618652,
      "learning_rate": 0.0004580518674855484,
      "loss": 3.0403,
      "step": 74507
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2205288410186768,
      "learning_rate": 0.0004580483906371937,
      "loss": 3.1066,
      "step": 74508
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6965718269348145,
      "learning_rate": 0.0004580449137594549,
      "loss": 2.9519,
      "step": 74509
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.4861090183258057,
      "learning_rate": 0.0004580414368523325,
      "loss": 2.9676,
      "step": 74510
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.7605185508728027,
      "learning_rate": 0.0004580379599158272,
      "loss": 2.9585,
      "step": 74511
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.160707950592041,
      "learning_rate": 0.00045803448294993976,
      "loss": 2.8198,
      "step": 74512
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.848003625869751,
      "learning_rate": 0.0004580310059546706,
      "loss": 3.1452,
      "step": 74513
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.1799354553222656,
      "learning_rate": 0.00045802752893002056,
      "loss": 3.1404,
      "step": 74514
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8332661390304565,
      "learning_rate": 0.00045802405187599023,
      "loss": 3.0861,
      "step": 74515
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.580333709716797,
      "learning_rate": 0.0004580205747925802,
      "loss": 3.1436,
      "step": 74516
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.038597822189331,
      "learning_rate": 0.0004580170976797912,
      "loss": 2.9605,
      "step": 74517
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.066681385040283,
      "learning_rate": 0.0004580136205376238,
      "loss": 3.0862,
      "step": 74518
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.729353427886963,
      "learning_rate": 0.0004580101433660787,
      "loss": 3.1199,
      "step": 74519
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1740448474884033,
      "learning_rate": 0.00045800666616515657,
      "loss": 3.076,
      "step": 74520
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5837459564208984,
      "learning_rate": 0.00045800318893485806,
      "loss": 3.2496,
      "step": 74521
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1206586360931396,
      "learning_rate": 0.0004579997116751836,
      "loss": 2.8976,
      "step": 74522
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.958585262298584,
      "learning_rate": 0.0004579962343861342,
      "loss": 3.1522,
      "step": 74523
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.067322015762329,
      "learning_rate": 0.00045799275706771023,
      "loss": 3.0148,
      "step": 74524
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6612277030944824,
      "learning_rate": 0.0004579892797199124,
      "loss": 3.1368,
      "step": 74525
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.923567056655884,
      "learning_rate": 0.0004579858023427415,
      "loss": 2.9848,
      "step": 74526
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.7017593383789062,
      "learning_rate": 0.0004579823249361979,
      "loss": 3.1678,
      "step": 74527
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5390245914459229,
      "learning_rate": 0.00045797884750028246,
      "loss": 3.1498,
      "step": 74528
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.59360671043396,
      "learning_rate": 0.0004579753700349958,
      "loss": 3.0543,
      "step": 74529
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.57899808883667,
      "learning_rate": 0.0004579718925403385,
      "loss": 2.755,
      "step": 74530
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4559788703918457,
      "learning_rate": 0.00045796841501631125,
      "loss": 2.8632,
      "step": 74531
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.0173404216766357,
      "learning_rate": 0.00045796493746291475,
      "loss": 3.0619,
      "step": 74532
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.2969508171081543,
      "learning_rate": 0.00045796145988014953,
      "loss": 3.1863,
      "step": 74533
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.572889804840088,
      "learning_rate": 0.00045795798226801627,
      "loss": 3.0564,
      "step": 74534
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1870429515838623,
      "learning_rate": 0.00045795450462651576,
      "loss": 3.0766,
      "step": 74535
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8709640502929688,
      "learning_rate": 0.00045795102695564833,
      "loss": 2.908,
      "step": 74536
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.490572214126587,
      "learning_rate": 0.00045794754925541497,
      "loss": 3.136,
      "step": 74537
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3052414655685425,
      "learning_rate": 0.0004579440715258161,
      "loss": 2.9523,
      "step": 74538
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6670949459075928,
      "learning_rate": 0.0004579405937668525,
      "loss": 2.9983,
      "step": 74539
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.532910704612732,
      "learning_rate": 0.0004579371159785248,
      "loss": 3.0213,
      "step": 74540
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4125069379806519,
      "learning_rate": 0.00045793363816083347,
      "loss": 3.2404,
      "step": 74541
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.524956464767456,
      "learning_rate": 0.0004579301603137794,
      "loss": 2.8819,
      "step": 74542
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6583266258239746,
      "learning_rate": 0.00045792668243736307,
      "loss": 2.9474,
      "step": 74543
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5581711530685425,
      "learning_rate": 0.0004579232045315853,
      "loss": 3.0007,
      "step": 74544
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7269269227981567,
      "learning_rate": 0.00045791972659644647,
      "loss": 2.9997,
      "step": 74545
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.907595157623291,
      "learning_rate": 0.00045791624863194755,
      "loss": 3.1447,
      "step": 74546
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9367481470108032,
      "learning_rate": 0.0004579127706380889,
      "loss": 3.218,
      "step": 74547
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5047563314437866,
      "learning_rate": 0.00045790929261487135,
      "loss": 2.9934,
      "step": 74548
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3823434114456177,
      "learning_rate": 0.0004579058145622954,
      "loss": 3.0698,
      "step": 74549
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8656893968582153,
      "learning_rate": 0.00045790233648036185,
      "loss": 3.2181,
      "step": 74550
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.391793966293335,
      "learning_rate": 0.0004578988583690713,
      "loss": 3.0105,
      "step": 74551
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.619123935699463,
      "learning_rate": 0.00045789538022842426,
      "loss": 3.0961,
      "step": 74552
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6517412662506104,
      "learning_rate": 0.0004578919020584216,
      "loss": 2.9365,
      "step": 74553
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8219962120056152,
      "learning_rate": 0.0004578884238590638,
      "loss": 3.1912,
      "step": 74554
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.714822769165039,
      "learning_rate": 0.00045788494563035157,
      "loss": 3.2571,
      "step": 74555
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7206979990005493,
      "learning_rate": 0.00045788146737228556,
      "loss": 3.0975,
      "step": 74556
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5287920236587524,
      "learning_rate": 0.0004578779890848664,
      "loss": 3.1314,
      "step": 74557
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7025614976882935,
      "learning_rate": 0.00045787451076809474,
      "loss": 2.7072,
      "step": 74558
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.656869649887085,
      "learning_rate": 0.0004578710324219712,
      "loss": 3.2059,
      "step": 74559
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.570772647857666,
      "learning_rate": 0.0004578675540464965,
      "loss": 3.2895,
      "step": 74560
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1283180713653564,
      "learning_rate": 0.00045786407564167125,
      "loss": 2.7578,
      "step": 74561
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4962726831436157,
      "learning_rate": 0.000457860597207496,
      "loss": 3.1163,
      "step": 74562
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9340461492538452,
      "learning_rate": 0.0004578571187439716,
      "loss": 3.0356,
      "step": 74563
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6687685251235962,
      "learning_rate": 0.00045785364025109854,
      "loss": 3.0378,
      "step": 74564
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5777472257614136,
      "learning_rate": 0.00045785016172887746,
      "loss": 3.1327,
      "step": 74565
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.687109112739563,
      "learning_rate": 0.0004578466831773091,
      "loss": 2.8487,
      "step": 74566
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8278082609176636,
      "learning_rate": 0.000457843204596394,
      "loss": 3.0815,
      "step": 74567
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4604686498641968,
      "learning_rate": 0.00045783972598613295,
      "loss": 2.9728,
      "step": 74568
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6798474788665771,
      "learning_rate": 0.00045783624734652655,
      "loss": 2.99,
      "step": 74569
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.678617000579834,
      "learning_rate": 0.0004578327686775753,
      "loss": 3.0992,
      "step": 74570
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.487493634223938,
      "learning_rate": 0.00045782928997928,
      "loss": 2.8537,
      "step": 74571
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.435482144355774,
      "learning_rate": 0.00045782581125164125,
      "loss": 3.1729,
      "step": 74572
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8890539407730103,
      "learning_rate": 0.0004578223324946597,
      "loss": 3.0593,
      "step": 74573
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.182722806930542,
      "learning_rate": 0.00045781885370833604,
      "loss": 3.0742,
      "step": 74574
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8539992570877075,
      "learning_rate": 0.0004578153748926709,
      "loss": 2.849,
      "step": 74575
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7146918773651123,
      "learning_rate": 0.00045781189604766486,
      "loss": 3.2379,
      "step": 74576
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.04952335357666,
      "learning_rate": 0.0004578084171733186,
      "loss": 3.1292,
      "step": 74577
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4370968341827393,
      "learning_rate": 0.0004578049382696328,
      "loss": 2.9062,
      "step": 74578
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5366456508636475,
      "learning_rate": 0.000457801459336608,
      "loss": 3.1361,
      "step": 74579
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0362794399261475,
      "learning_rate": 0.000457797980374245,
      "loss": 3.2153,
      "step": 74580
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5082476139068604,
      "learning_rate": 0.0004577945013825444,
      "loss": 3.1087,
      "step": 74581
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.671578049659729,
      "learning_rate": 0.0004577910223615068,
      "loss": 2.8966,
      "step": 74582
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.541467308998108,
      "learning_rate": 0.0004577875433111328,
      "loss": 3.1684,
      "step": 74583
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5576027631759644,
      "learning_rate": 0.00045778406423142333,
      "loss": 3.0838,
      "step": 74584
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5590740442276,
      "learning_rate": 0.0004577805851223786,
      "loss": 3.0808,
      "step": 74585
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4762368202209473,
      "learning_rate": 0.00045777710598399955,
      "loss": 2.8462,
      "step": 74586
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.817107915878296,
      "learning_rate": 0.0004577736268162868,
      "loss": 2.9976,
      "step": 74587
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5624476671218872,
      "learning_rate": 0.0004577701476192409,
      "loss": 2.9888,
      "step": 74588
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4054993391036987,
      "learning_rate": 0.0004577666683928625,
      "loss": 3.2388,
      "step": 74589
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.399981141090393,
      "learning_rate": 0.0004577631891371525,
      "loss": 2.9429,
      "step": 74590
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7609807252883911,
      "learning_rate": 0.00045775970985211114,
      "loss": 2.8846,
      "step": 74591
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.399092674255371,
      "learning_rate": 0.0004577562305377393,
      "loss": 3.0065,
      "step": 74592
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.030498743057251,
      "learning_rate": 0.00045775275119403776,
      "loss": 2.8374,
      "step": 74593
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7382100820541382,
      "learning_rate": 0.00045774927182100686,
      "loss": 3.0282,
      "step": 74594
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9404035806655884,
      "learning_rate": 0.00045774579241864737,
      "loss": 2.9156,
      "step": 74595
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9107494354248047,
      "learning_rate": 0.00045774231298696005,
      "loss": 2.9437,
      "step": 74596
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6592187881469727,
      "learning_rate": 0.00045773883352594544,
      "loss": 2.9692,
      "step": 74597
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.121263027191162,
      "learning_rate": 0.0004577353540356042,
      "loss": 3.0379,
      "step": 74598
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.326915979385376,
      "learning_rate": 0.000457731874515937,
      "loss": 2.9513,
      "step": 74599
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.022217035293579,
      "learning_rate": 0.0004577283949669443,
      "loss": 2.8555,
      "step": 74600
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.4658498764038086,
      "learning_rate": 0.0004577249153886271,
      "loss": 2.8493,
      "step": 74601
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8742183446884155,
      "learning_rate": 0.00045772143578098586,
      "loss": 3.0092,
      "step": 74602
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7776252031326294,
      "learning_rate": 0.0004577179561440211,
      "loss": 2.9497,
      "step": 74603
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.072207450866699,
      "learning_rate": 0.0004577144764777337,
      "loss": 2.9706,
      "step": 74604
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7823634147644043,
      "learning_rate": 0.00045771099678212413,
      "loss": 3.065,
      "step": 74605
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7580044269561768,
      "learning_rate": 0.00045770751705719314,
      "loss": 3.0505,
      "step": 74606
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.960423469543457,
      "learning_rate": 0.00045770403730294136,
      "loss": 2.9267,
      "step": 74607
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.1764304637908936,
      "learning_rate": 0.0004577005575193695,
      "loss": 2.9331,
      "step": 74608
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2423086166381836,
      "learning_rate": 0.000457697077706478,
      "loss": 3.0953,
      "step": 74609
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8301217555999756,
      "learning_rate": 0.0004576935978642677,
      "loss": 3.1036,
      "step": 74610
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.5262763500213623,
      "learning_rate": 0.00045769011799273913,
      "loss": 3.0001,
      "step": 74611
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3682878017425537,
      "learning_rate": 0.0004576866380918931,
      "loss": 2.9679,
      "step": 74612
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.835030436515808,
      "learning_rate": 0.00045768315816173004,
      "loss": 3.0034,
      "step": 74613
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.456877589225769,
      "learning_rate": 0.0004576796782022508,
      "loss": 3.0337,
      "step": 74614
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9303092956542969,
      "learning_rate": 0.0004576761982134558,
      "loss": 2.8769,
      "step": 74615
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.7684836387634277,
      "learning_rate": 0.00045767271819534586,
      "loss": 3.0391,
      "step": 74616
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.4236831665039062,
      "learning_rate": 0.00045766923814792164,
      "loss": 2.8381,
      "step": 74617
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7168011665344238,
      "learning_rate": 0.0004576657580711837,
      "loss": 3.0076,
      "step": 74618
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7390625476837158,
      "learning_rate": 0.0004576622779651327,
      "loss": 2.9566,
      "step": 74619
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.494755983352661,
      "learning_rate": 0.0004576587978297694,
      "loss": 3.1515,
      "step": 74620
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.4381496906280518,
      "learning_rate": 0.00045765531766509427,
      "loss": 3.0126,
      "step": 74621
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5460454225540161,
      "learning_rate": 0.00045765183747110803,
      "loss": 2.9175,
      "step": 74622
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4950039386749268,
      "learning_rate": 0.0004576483572478113,
      "loss": 3.01,
      "step": 74623
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.6994879245758057,
      "learning_rate": 0.0004576448769952049,
      "loss": 2.8668,
      "step": 74624
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.313464641571045,
      "learning_rate": 0.0004576413967132893,
      "loss": 2.8461,
      "step": 74625
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1985208988189697,
      "learning_rate": 0.00045763791640206507,
      "loss": 3.0125,
      "step": 74626
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.277207612991333,
      "learning_rate": 0.0004576344360615332,
      "loss": 3.0039,
      "step": 74627
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6332273483276367,
      "learning_rate": 0.0004576309556916939,
      "loss": 2.9786,
      "step": 74628
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.547548770904541,
      "learning_rate": 0.0004576274752925481,
      "loss": 2.7953,
      "step": 74629
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.550523042678833,
      "learning_rate": 0.00045762399486409637,
      "loss": 2.9541,
      "step": 74630
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5670418739318848,
      "learning_rate": 0.00045762051440633946,
      "loss": 2.9371,
      "step": 74631
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8633339405059814,
      "learning_rate": 0.00045761703391927777,
      "loss": 2.7961,
      "step": 74632
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9387613534927368,
      "learning_rate": 0.00045761355340291214,
      "loss": 3.1276,
      "step": 74633
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.541943907737732,
      "learning_rate": 0.00045761007285724314,
      "loss": 2.9313,
      "step": 74634
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8789697885513306,
      "learning_rate": 0.00045760659228227156,
      "loss": 3.0702,
      "step": 74635
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8785040378570557,
      "learning_rate": 0.0004576031116779979,
      "loss": 3.0852,
      "step": 74636
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.865852952003479,
      "learning_rate": 0.0004575996310444228,
      "loss": 3.1853,
      "step": 74637
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6914808750152588,
      "learning_rate": 0.0004575961503815471,
      "loss": 2.8579,
      "step": 74638
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4840291738510132,
      "learning_rate": 0.00045759266968937117,
      "loss": 2.9666,
      "step": 74639
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6696715354919434,
      "learning_rate": 0.0004575891889678958,
      "loss": 2.9808,
      "step": 74640
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8119796514511108,
      "learning_rate": 0.00045758570821712155,
      "loss": 2.9969,
      "step": 74641
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4399089813232422,
      "learning_rate": 0.0004575822274370493,
      "loss": 3.1717,
      "step": 74642
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8885694742202759,
      "learning_rate": 0.0004575787466276795,
      "loss": 2.9226,
      "step": 74643
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6855307817459106,
      "learning_rate": 0.00045757526578901276,
      "loss": 3.0025,
      "step": 74644
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0250494480133057,
      "learning_rate": 0.0004575717849210499,
      "loss": 3.0034,
      "step": 74645
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.6484742164611816,
      "learning_rate": 0.0004575683040237914,
      "loss": 2.9237,
      "step": 74646
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9659475088119507,
      "learning_rate": 0.0004575648230972379,
      "loss": 2.8382,
      "step": 74647
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.045384168624878,
      "learning_rate": 0.00045756134214139035,
      "loss": 3.065,
      "step": 74648
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9099386930465698,
      "learning_rate": 0.000457557861156249,
      "loss": 3.0945,
      "step": 74649
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.046116352081299,
      "learning_rate": 0.0004575543801418147,
      "loss": 3.2827,
      "step": 74650
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5627305507659912,
      "learning_rate": 0.0004575508990980881,
      "loss": 3.0168,
      "step": 74651
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9478023052215576,
      "learning_rate": 0.0004575474180250698,
      "loss": 2.9358,
      "step": 74652
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6432204246520996,
      "learning_rate": 0.0004575439369227604,
      "loss": 3.1328,
      "step": 74653
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1180579662323,
      "learning_rate": 0.00045754045579116077,
      "loss": 3.1219,
      "step": 74654
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6744908094406128,
      "learning_rate": 0.0004575369746302712,
      "loss": 2.9332,
      "step": 74655
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4431774616241455,
      "learning_rate": 0.00045753349344009264,
      "loss": 2.9513,
      "step": 74656
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0845675468444824,
      "learning_rate": 0.00045753001222062565,
      "loss": 2.9508,
      "step": 74657
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0635619163513184,
      "learning_rate": 0.00045752653097187084,
      "loss": 3.0764,
      "step": 74658
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6508800983428955,
      "learning_rate": 0.0004575230496938288,
      "loss": 2.853,
      "step": 74659
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7056928873062134,
      "learning_rate": 0.00045751956838650034,
      "loss": 3.0087,
      "step": 74660
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7845664024353027,
      "learning_rate": 0.000457516087049886,
      "loss": 2.8354,
      "step": 74661
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7040653228759766,
      "learning_rate": 0.0004575126056839864,
      "loss": 3.0287,
      "step": 74662
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9734727144241333,
      "learning_rate": 0.0004575091242888023,
      "loss": 2.7478,
      "step": 74663
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.305904746055603,
      "learning_rate": 0.00045750564286433417,
      "loss": 3.1215,
      "step": 74664
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5385725498199463,
      "learning_rate": 0.00045750216141058286,
      "loss": 3.1164,
      "step": 74665
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.482836961746216,
      "learning_rate": 0.00045749867992754897,
      "loss": 2.7419,
      "step": 74666
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0068535804748535,
      "learning_rate": 0.000457495198415233,
      "loss": 3.242,
      "step": 74667
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7142714262008667,
      "learning_rate": 0.00045749171687363573,
      "loss": 3.2058,
      "step": 74668
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2708311080932617,
      "learning_rate": 0.0004574882353027578,
      "loss": 2.916,
      "step": 74669
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4768394231796265,
      "learning_rate": 0.0004574847537025998,
      "loss": 3.2764,
      "step": 74670
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7114297151565552,
      "learning_rate": 0.0004574812720731625,
      "loss": 3.1338,
      "step": 74671
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5759707689285278,
      "learning_rate": 0.0004574777904144463,
      "loss": 3.0291,
      "step": 74672
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.081324815750122,
      "learning_rate": 0.00045747430872645215,
      "loss": 3.007,
      "step": 74673
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8533363342285156,
      "learning_rate": 0.00045747082700918045,
      "loss": 2.922,
      "step": 74674
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6044583320617676,
      "learning_rate": 0.00045746734526263205,
      "loss": 3.2529,
      "step": 74675
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1110270023345947,
      "learning_rate": 0.00045746386348680747,
      "loss": 2.8829,
      "step": 74676
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.280195474624634,
      "learning_rate": 0.00045746038168170737,
      "loss": 2.9962,
      "step": 74677
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8206069469451904,
      "learning_rate": 0.0004574568998473324,
      "loss": 3.104,
      "step": 74678
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7021262645721436,
      "learning_rate": 0.00045745341798368323,
      "loss": 2.8921,
      "step": 74679
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9295053482055664,
      "learning_rate": 0.0004574499360907605,
      "loss": 3.0578,
      "step": 74680
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5257686376571655,
      "learning_rate": 0.00045744645416856487,
      "loss": 2.9867,
      "step": 74681
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0913493633270264,
      "learning_rate": 0.00045744297221709694,
      "loss": 3.033,
      "step": 74682
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5702444314956665,
      "learning_rate": 0.0004574394902363574,
      "loss": 3.18,
      "step": 74683
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3748704195022583,
      "learning_rate": 0.0004574360082263468,
      "loss": 3.1336,
      "step": 74684
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.482638955116272,
      "learning_rate": 0.00045743252618706605,
      "loss": 3.0541,
      "step": 74685
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8205251693725586,
      "learning_rate": 0.0004574290441185155,
      "loss": 3.1799,
      "step": 74686
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.574439525604248,
      "learning_rate": 0.00045742556202069595,
      "loss": 3.0252,
      "step": 74687
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8680137395858765,
      "learning_rate": 0.0004574220798936081,
      "loss": 2.7419,
      "step": 74688
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1789767742156982,
      "learning_rate": 0.00045741859773725234,
      "loss": 3.0791,
      "step": 74689
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.639981985092163,
      "learning_rate": 0.00045741511555162965,
      "loss": 3.0074,
      "step": 74690
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.433310627937317,
      "learning_rate": 0.0004574116333367404,
      "loss": 3.1318,
      "step": 74691
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0448694229125977,
      "learning_rate": 0.00045740815109258535,
      "loss": 3.1793,
      "step": 74692
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8981045484542847,
      "learning_rate": 0.0004574046688191654,
      "loss": 2.823,
      "step": 74693
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8695080280303955,
      "learning_rate": 0.00045740118651648067,
      "loss": 3.1575,
      "step": 74694
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.8484342098236084,
      "learning_rate": 0.0004573977041845322,
      "loss": 2.9731,
      "step": 74695
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.206317901611328,
      "learning_rate": 0.00045739422182332053,
      "loss": 3.177,
      "step": 74696
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5212366580963135,
      "learning_rate": 0.00045739073943284623,
      "loss": 2.8919,
      "step": 74697
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8875916004180908,
      "learning_rate": 0.00045738725701311003,
      "loss": 2.9468,
      "step": 74698
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9640878438949585,
      "learning_rate": 0.00045738377456411267,
      "loss": 2.8616,
      "step": 74699
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.009939670562744,
      "learning_rate": 0.00045738029208585465,
      "loss": 3.1026,
      "step": 74700
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0071706771850586,
      "learning_rate": 0.0004573768095783366,
      "loss": 3.1892,
      "step": 74701
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7045586109161377,
      "learning_rate": 0.00045737332704155927,
      "loss": 3.0757,
      "step": 74702
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1069018840789795,
      "learning_rate": 0.0004573698444755233,
      "loss": 2.7358,
      "step": 74703
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5160789489746094,
      "learning_rate": 0.00045736636188022924,
      "loss": 2.8672,
      "step": 74704
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.7269742488861084,
      "learning_rate": 0.0004573628792556778,
      "loss": 2.7356,
      "step": 74705
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3957204818725586,
      "learning_rate": 0.00045735939660186984,
      "loss": 3.0317,
      "step": 74706
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6061652898788452,
      "learning_rate": 0.00045735591391880554,
      "loss": 3.0319,
      "step": 74707
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0389747619628906,
      "learning_rate": 0.0004573524312064859,
      "loss": 3.1239,
      "step": 74708
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1761295795440674,
      "learning_rate": 0.00045734894846491154,
      "loss": 2.8861,
      "step": 74709
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6636039018630981,
      "learning_rate": 0.00045734546569408286,
      "loss": 2.8087,
      "step": 74710
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8542622327804565,
      "learning_rate": 0.00045734198289400083,
      "loss": 3.0339,
      "step": 74711
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6214959621429443,
      "learning_rate": 0.000457338500064666,
      "loss": 3.2243,
      "step": 74712
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.08064341545105,
      "learning_rate": 0.00045733501720607886,
      "loss": 3.004,
      "step": 74713
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8063125610351562,
      "learning_rate": 0.0004573315343182402,
      "loss": 2.9741,
      "step": 74714
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8228049278259277,
      "learning_rate": 0.0004573280514011506,
      "loss": 3.0632,
      "step": 74715
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9515548944473267,
      "learning_rate": 0.0004573245684548108,
      "loss": 3.0526,
      "step": 74716
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5658806562423706,
      "learning_rate": 0.0004573210854792214,
      "loss": 2.9268,
      "step": 74717
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6433680057525635,
      "learning_rate": 0.000457317602474383,
      "loss": 3.0482,
      "step": 74718
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.5051097869873047,
      "learning_rate": 0.00045731411944029636,
      "loss": 2.9759,
      "step": 74719
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4255670309066772,
      "learning_rate": 0.0004573106363769619,
      "loss": 3.0974,
      "step": 74720
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1955394744873047,
      "learning_rate": 0.0004573071532843806,
      "loss": 3.0327,
      "step": 74721
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2477757930755615,
      "learning_rate": 0.00045730367016255283,
      "loss": 2.7044,
      "step": 74722
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5882152318954468,
      "learning_rate": 0.00045730018701147944,
      "loss": 2.9065,
      "step": 74723
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.71322762966156,
      "learning_rate": 0.0004572967038311609,
      "loss": 3.1134,
      "step": 74724
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.486245632171631,
      "learning_rate": 0.0004572932206215978,
      "loss": 3.0472,
      "step": 74725
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.500919222831726,
      "learning_rate": 0.00045728973738279116,
      "loss": 2.9644,
      "step": 74726
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.499227523803711,
      "learning_rate": 0.00045728625411474124,
      "loss": 3.0867,
      "step": 74727
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8737683296203613,
      "learning_rate": 0.0004572827708174489,
      "loss": 2.9069,
      "step": 74728
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.216809034347534,
      "learning_rate": 0.00045727928749091475,
      "loss": 3.1379,
      "step": 74729
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.514629602432251,
      "learning_rate": 0.0004572758041351393,
      "loss": 2.9509,
      "step": 74730
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4202909469604492,
      "learning_rate": 0.00045727232075012345,
      "loss": 2.9316,
      "step": 74731
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.172027587890625,
      "learning_rate": 0.00045726883733586757,
      "loss": 2.8969,
      "step": 74732
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.552334189414978,
      "learning_rate": 0.0004572653538923726,
      "loss": 2.9584,
      "step": 74733
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.150447368621826,
      "learning_rate": 0.00045726187041963886,
      "loss": 2.9663,
      "step": 74734
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1826655864715576,
      "learning_rate": 0.00045725838691766733,
      "loss": 3.285,
      "step": 74735
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.101851463317871,
      "learning_rate": 0.00045725490338645845,
      "loss": 2.9736,
      "step": 74736
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7410056591033936,
      "learning_rate": 0.0004572514198260128,
      "loss": 3.0663,
      "step": 74737
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.986588478088379,
      "learning_rate": 0.0004572479362363313,
      "loss": 2.8199,
      "step": 74738
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5516471862792969,
      "learning_rate": 0.00045724445261741435,
      "loss": 2.8213,
      "step": 74739
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9662081003189087,
      "learning_rate": 0.00045724096896926266,
      "loss": 3.0728,
      "step": 74740
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.738266110420227,
      "learning_rate": 0.000457237485291877,
      "loss": 2.9347,
      "step": 74741
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7010447978973389,
      "learning_rate": 0.00045723400158525785,
      "loss": 3.0561,
      "step": 74742
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6594361066818237,
      "learning_rate": 0.0004572305178494059,
      "loss": 3.0736,
      "step": 74743
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7420910596847534,
      "learning_rate": 0.0004572270340843219,
      "loss": 3.1713,
      "step": 74744
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6017695665359497,
      "learning_rate": 0.00045722355029000647,
      "loss": 2.8816,
      "step": 74745
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.379160761833191,
      "learning_rate": 0.00045722006646646015,
      "loss": 2.9888,
      "step": 74746
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7831369638442993,
      "learning_rate": 0.0004572165826136836,
      "loss": 2.9662,
      "step": 74747
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.368984580039978,
      "learning_rate": 0.00045721309873167766,
      "loss": 2.9751,
      "step": 74748
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.363497018814087,
      "learning_rate": 0.0004572096148204427,
      "loss": 3.0889,
      "step": 74749
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3664625883102417,
      "learning_rate": 0.0004572061308799796,
      "loss": 3.1171,
      "step": 74750
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4888120889663696,
      "learning_rate": 0.00045720264691028893,
      "loss": 3.1687,
      "step": 74751
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5995540618896484,
      "learning_rate": 0.0004571991629113712,
      "loss": 2.9679,
      "step": 74752
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5314524173736572,
      "learning_rate": 0.0004571956788832272,
      "loss": 3.1914,
      "step": 74753
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3452024459838867,
      "learning_rate": 0.00045719219482585763,
      "loss": 3.2221,
      "step": 74754
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8540335893630981,
      "learning_rate": 0.00045718871073926304,
      "loss": 2.8607,
      "step": 74755
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6752971410751343,
      "learning_rate": 0.00045718522662344406,
      "loss": 3.0586,
      "step": 74756
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.553640365600586,
      "learning_rate": 0.0004571817424784015,
      "loss": 3.0158,
      "step": 74757
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9300131797790527,
      "learning_rate": 0.00045717825830413576,
      "loss": 2.7849,
      "step": 74758
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.370224714279175,
      "learning_rate": 0.00045717477410064765,
      "loss": 3.1965,
      "step": 74759
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1378958225250244,
      "learning_rate": 0.00045717128986793784,
      "loss": 3.0553,
      "step": 74760
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6998887062072754,
      "learning_rate": 0.00045716780560600684,
      "loss": 3.1185,
      "step": 74761
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9979698657989502,
      "learning_rate": 0.0004571643213148554,
      "loss": 3.0846,
      "step": 74762
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2608203887939453,
      "learning_rate": 0.00045716083699448416,
      "loss": 3.0243,
      "step": 74763
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7312037944793701,
      "learning_rate": 0.0004571573526448938,
      "loss": 3.1754,
      "step": 74764
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6482295989990234,
      "learning_rate": 0.0004571538682660848,
      "loss": 3.1275,
      "step": 74765
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7060682773590088,
      "learning_rate": 0.0004571503838580579,
      "loss": 2.8405,
      "step": 74766
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.817000389099121,
      "learning_rate": 0.000457146899420814,
      "loss": 3.321,
      "step": 74767
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8437097072601318,
      "learning_rate": 0.00045714341495435335,
      "loss": 3.0021,
      "step": 74768
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.025846242904663,
      "learning_rate": 0.0004571399304586768,
      "loss": 3.0671,
      "step": 74769
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7367812395095825,
      "learning_rate": 0.00045713644593378504,
      "loss": 3.0182,
      "step": 74770
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8648351430892944,
      "learning_rate": 0.00045713296137967857,
      "loss": 2.8479,
      "step": 74771
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4468615055084229,
      "learning_rate": 0.00045712947679635817,
      "loss": 2.8292,
      "step": 74772
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.469994306564331,
      "learning_rate": 0.0004571259921838244,
      "loss": 2.9633,
      "step": 74773
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.942272424697876,
      "learning_rate": 0.00045712250754207794,
      "loss": 3.1086,
      "step": 74774
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2297539710998535,
      "learning_rate": 0.0004571190228711194,
      "loss": 3.2277,
      "step": 74775
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5994303226470947,
      "learning_rate": 0.0004571155381709496,
      "loss": 3.0639,
      "step": 74776
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3941166400909424,
      "learning_rate": 0.00045711205344156896,
      "loss": 2.8232,
      "step": 74777
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0168018341064453,
      "learning_rate": 0.0004571085686829782,
      "loss": 2.9056,
      "step": 74778
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5292145013809204,
      "learning_rate": 0.0004571050838951781,
      "loss": 2.9039,
      "step": 74779
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8348008394241333,
      "learning_rate": 0.00045710159907816914,
      "loss": 2.9117,
      "step": 74780
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.836705207824707,
      "learning_rate": 0.000457098114231952,
      "loss": 3.1161,
      "step": 74781
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3433218002319336,
      "learning_rate": 0.00045709462935652735,
      "loss": 3.1672,
      "step": 74782
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0652096271514893,
      "learning_rate": 0.00045709114445189583,
      "loss": 3.1192,
      "step": 74783
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.9512903690338135,
      "learning_rate": 0.0004570876595180582,
      "loss": 3.1207,
      "step": 74784
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7494169473648071,
      "learning_rate": 0.000457084174555015,
      "loss": 2.9798,
      "step": 74785
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.870461344718933,
      "learning_rate": 0.00045708068956276676,
      "loss": 3.1198,
      "step": 74786
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.036249876022339,
      "learning_rate": 0.0004570772045413143,
      "loss": 3.1864,
      "step": 74787
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.209603786468506,
      "learning_rate": 0.0004570737194906583,
      "loss": 3.1433,
      "step": 74788
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.3327356576919556,
      "learning_rate": 0.0004570702344107992,
      "loss": 3.0974,
      "step": 74789
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.5579235553741455,
      "learning_rate": 0.0004570667493017379,
      "loss": 3.2125,
      "step": 74790
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.3266987800598145,
      "learning_rate": 0.00045706326416347496,
      "loss": 3.0289,
      "step": 74791
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4701391458511353,
      "learning_rate": 0.0004570597789960109,
      "loss": 2.9442,
      "step": 74792
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9555416107177734,
      "learning_rate": 0.0004570562937993464,
      "loss": 3.1796,
      "step": 74793
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.014427661895752,
      "learning_rate": 0.00045705280857348237,
      "loss": 3.473,
      "step": 74794
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.599567174911499,
      "learning_rate": 0.0004570493233184191,
      "loss": 3.0938,
      "step": 74795
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4676783084869385,
      "learning_rate": 0.0004570458380341574,
      "loss": 2.8894,
      "step": 74796
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.424513816833496,
      "learning_rate": 0.000457042352720698,
      "loss": 3.0264,
      "step": 74797
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.58543062210083,
      "learning_rate": 0.00045703886737804135,
      "loss": 2.8693,
      "step": 74798
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5537477731704712,
      "learning_rate": 0.0004570353820061883,
      "loss": 2.9165,
      "step": 74799
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7565743923187256,
      "learning_rate": 0.0004570318966051394,
      "loss": 2.8548,
      "step": 74800
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5022886991500854,
      "learning_rate": 0.00045702841117489524,
      "loss": 3.1352,
      "step": 74801
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8069628477096558,
      "learning_rate": 0.0004570249257154566,
      "loss": 3.1577,
      "step": 74802
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.463993787765503,
      "learning_rate": 0.00045702144022682407,
      "loss": 3.0612,
      "step": 74803
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4814727306365967,
      "learning_rate": 0.0004570179547089982,
      "loss": 2.9234,
      "step": 74804
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5442560911178589,
      "learning_rate": 0.0004570144691619799,
      "loss": 3.0524,
      "step": 74805
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.157148599624634,
      "learning_rate": 0.0004570109835857694,
      "loss": 2.7929,
      "step": 74806
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0343174934387207,
      "learning_rate": 0.0004570074979803678,
      "loss": 2.9832,
      "step": 74807
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7978906631469727,
      "learning_rate": 0.0004570040123457755,
      "loss": 2.8413,
      "step": 74808
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5718340873718262,
      "learning_rate": 0.00045700052668199315,
      "loss": 2.9865,
      "step": 74809
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1462223529815674,
      "learning_rate": 0.0004569970409890215,
      "loss": 3.1207,
      "step": 74810
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.3186347484588623,
      "learning_rate": 0.0004569935552668611,
      "loss": 3.056,
      "step": 74811
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8172937631607056,
      "learning_rate": 0.0004569900695155126,
      "loss": 3.3968,
      "step": 74812
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.4869277477264404,
      "learning_rate": 0.00045698658373497663,
      "loss": 3.152,
      "step": 74813
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5127569437026978,
      "learning_rate": 0.000456983097925254,
      "loss": 3.1351,
      "step": 74814
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.6335678100585938,
      "learning_rate": 0.0004569796120863452,
      "loss": 3.1386,
      "step": 74815
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.8813064098358154,
      "learning_rate": 0.000456976126218251,
      "loss": 3.2018,
      "step": 74816
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8870742321014404,
      "learning_rate": 0.0004569726403209719,
      "loss": 3.2687,
      "step": 74817
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8776960372924805,
      "learning_rate": 0.0004569691543945087,
      "loss": 2.9587,
      "step": 74818
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.7066211700439453,
      "learning_rate": 0.00045696566843886187,
      "loss": 3.5483,
      "step": 74819
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.614138126373291,
      "learning_rate": 0.0004569621824540322,
      "loss": 2.9546,
      "step": 74820
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.63233482837677,
      "learning_rate": 0.0004569586964400203,
      "loss": 3.0457,
      "step": 74821
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6547518968582153,
      "learning_rate": 0.0004569552103968268,
      "loss": 3.047,
      "step": 74822
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.658289670944214,
      "learning_rate": 0.00045695172432445234,
      "loss": 3.3229,
      "step": 74823
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.9937517642974854,
      "learning_rate": 0.0004569482382228977,
      "loss": 3.0119,
      "step": 74824
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.6531457901000977,
      "learning_rate": 0.0004569447520921634,
      "loss": 3.4525,
      "step": 74825
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6596165895462036,
      "learning_rate": 0.00045694126593224995,
      "loss": 2.9572,
      "step": 74826
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.186748743057251,
      "learning_rate": 0.00045693777974315826,
      "loss": 2.6077,
      "step": 74827
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.478895902633667,
      "learning_rate": 0.0004569342935248889,
      "loss": 3.2502,
      "step": 74828
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8961418867111206,
      "learning_rate": 0.0004569308072774425,
      "loss": 2.9149,
      "step": 74829
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2876389026641846,
      "learning_rate": 0.0004569273210008197,
      "loss": 2.8637,
      "step": 74830
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.777646541595459,
      "learning_rate": 0.00045692383469502114,
      "loss": 2.9895,
      "step": 74831
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4878700971603394,
      "learning_rate": 0.0004569203483600474,
      "loss": 3.0021,
      "step": 74832
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0428526401519775,
      "learning_rate": 0.0004569168619958992,
      "loss": 3.1567,
      "step": 74833
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1712286472320557,
      "learning_rate": 0.0004569133756025774,
      "loss": 3.2205,
      "step": 74834
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.698463797569275,
      "learning_rate": 0.00045690988918008224,
      "loss": 3.0982,
      "step": 74835
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6830679178237915,
      "learning_rate": 0.0004569064027284146,
      "loss": 3.0659,
      "step": 74836
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.426816463470459,
      "learning_rate": 0.0004569029162475752,
      "loss": 2.9199,
      "step": 74837
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0018460750579834,
      "learning_rate": 0.00045689942973756447,
      "loss": 3.1164,
      "step": 74838
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.646754503250122,
      "learning_rate": 0.00045689594319838324,
      "loss": 3.1607,
      "step": 74839
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6560269594192505,
      "learning_rate": 0.00045689245663003205,
      "loss": 2.9284,
      "step": 74840
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.735009789466858,
      "learning_rate": 0.0004568889700325116,
      "loss": 3.1325,
      "step": 74841
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5531668663024902,
      "learning_rate": 0.00045688548340582245,
      "loss": 3.0627,
      "step": 74842
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2429349422454834,
      "learning_rate": 0.0004568819967499655,
      "loss": 3.1685,
      "step": 74843
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5282249450683594,
      "learning_rate": 0.00045687851006494116,
      "loss": 3.3047,
      "step": 74844
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6164854764938354,
      "learning_rate": 0.0004568750233507501,
      "loss": 2.8152,
      "step": 74845
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6633377075195312,
      "learning_rate": 0.00045687153660739304,
      "loss": 3.2084,
      "step": 74846
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.747683048248291,
      "learning_rate": 0.00045686804983487064,
      "loss": 3.1225,
      "step": 74847
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0461020469665527,
      "learning_rate": 0.00045686456303318347,
      "loss": 3.1751,
      "step": 74848
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9634394645690918,
      "learning_rate": 0.0004568610762023322,
      "loss": 2.8764,
      "step": 74849
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5832576751708984,
      "learning_rate": 0.0004568575893423175,
      "loss": 2.9385,
      "step": 74850
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.085080623626709,
      "learning_rate": 0.00045685410245314,
      "loss": 3.1869,
      "step": 74851
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8682026863098145,
      "learning_rate": 0.0004568506155348004,
      "loss": 3.0356,
      "step": 74852
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.5011279582977295,
      "learning_rate": 0.00045684712858729925,
      "loss": 2.8857,
      "step": 74853
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4538389444351196,
      "learning_rate": 0.0004568436416106373,
      "loss": 2.9207,
      "step": 74854
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7474812269210815,
      "learning_rate": 0.00045684015460481525,
      "loss": 2.7394,
      "step": 74855
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2967400550842285,
      "learning_rate": 0.0004568366675698335,
      "loss": 2.921,
      "step": 74856
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7164205312728882,
      "learning_rate": 0.0004568331805056929,
      "loss": 3.0365,
      "step": 74857
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.047921895980835,
      "learning_rate": 0.0004568296934123941,
      "loss": 2.8836,
      "step": 74858
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.930180072784424,
      "learning_rate": 0.0004568262062899376,
      "loss": 2.9678,
      "step": 74859
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8412784337997437,
      "learning_rate": 0.00045682271913832426,
      "loss": 2.9337,
      "step": 74860
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.1135594844818115,
      "learning_rate": 0.00045681923195755464,
      "loss": 2.8919,
      "step": 74861
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.2605981826782227,
      "learning_rate": 0.0004568157447476293,
      "loss": 3.0919,
      "step": 74862
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9879460334777832,
      "learning_rate": 0.00045681225750854884,
      "loss": 3.1519,
      "step": 74863
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.0122265815734863,
      "learning_rate": 0.0004568087702403142,
      "loss": 2.9927,
      "step": 74864
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.9303677082061768,
      "learning_rate": 0.0004568052829429258,
      "loss": 3.0086,
      "step": 74865
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.8527392148971558,
      "learning_rate": 0.00045680179561638425,
      "loss": 3.1462,
      "step": 74866
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.231781482696533,
      "learning_rate": 0.0004567983082606904,
      "loss": 2.9285,
      "step": 74867
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.591385841369629,
      "learning_rate": 0.00045679482087584473,
      "loss": 2.9126,
      "step": 74868
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7632421255111694,
      "learning_rate": 0.00045679133346184794,
      "loss": 3.2616,
      "step": 74869
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.702701210975647,
      "learning_rate": 0.00045678784601870074,
      "loss": 3.1381,
      "step": 74870
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.8213276863098145,
      "learning_rate": 0.0004567843585464036,
      "loss": 2.8639,
      "step": 74871
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.048731565475464,
      "learning_rate": 0.00045678087104495735,
      "loss": 3.047,
      "step": 74872
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.379142999649048,
      "learning_rate": 0.00045677738351436264,
      "loss": 3.2421,
      "step": 74873
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.707033634185791,
      "learning_rate": 0.00045677389595462,
      "loss": 3.1999,
      "step": 74874
    },
    {
      "epoch": 0.97,
      "grad_norm": 3.346942901611328,
      "learning_rate": 0.0004567704083657301,
      "loss": 3.0581,
      "step": 74875
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.035820722579956,
      "learning_rate": 0.00045676692074769374,
      "loss": 3.0998,
      "step": 74876
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.4776270389556885,
      "learning_rate": 0.0004567634331005113,
      "loss": 2.7966,
      "step": 74877
    },
    {
      "epoch": 0.97,
      "grad_norm": 2.0339839458465576,
      "learning_rate": 0.0004567599454241836,
      "loss": 2.9281,
      "step": 74878
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.6400172710418701,
      "learning_rate": 0.0004567564577187114,
      "loss": 3.1419,
      "step": 74879
    },
    {
      "epoch": 0.97,
      "grad_norm": 1.7985715866088867,
      "learning_rate": 0.0004567529699840951,
      "loss": 3.0337,
      "step": 74880
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8548117876052856,
      "learning_rate": 0.0004567494822203355,
      "loss": 3.0177,
      "step": 74881
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9287053346633911,
      "learning_rate": 0.00045674599442743324,
      "loss": 3.0147,
      "step": 74882
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0831868648529053,
      "learning_rate": 0.0004567425066053889,
      "loss": 3.0472,
      "step": 74883
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2147815227508545,
      "learning_rate": 0.0004567390187542031,
      "loss": 3.2568,
      "step": 74884
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.0659494400024414,
      "learning_rate": 0.0004567355308738767,
      "loss": 3.3403,
      "step": 74885
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.746044397354126,
      "learning_rate": 0.00045673204296441014,
      "loss": 2.9318,
      "step": 74886
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8171335458755493,
      "learning_rate": 0.00045672855502580407,
      "loss": 3.0227,
      "step": 74887
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0781731605529785,
      "learning_rate": 0.0004567250670580593,
      "loss": 3.3987,
      "step": 74888
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.546133041381836,
      "learning_rate": 0.0004567215790611764,
      "loss": 3.2086,
      "step": 74889
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4937630891799927,
      "learning_rate": 0.00045671809103515587,
      "loss": 3.081,
      "step": 74890
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.3183581829071045,
      "learning_rate": 0.00045671460297999855,
      "loss": 2.8671,
      "step": 74891
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.3672711849212646,
      "learning_rate": 0.00045671111489570504,
      "loss": 3.0325,
      "step": 74892
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6434993743896484,
      "learning_rate": 0.00045670762678227603,
      "loss": 3.0406,
      "step": 74893
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.494042992591858,
      "learning_rate": 0.000456704138639712,
      "loss": 2.6314,
      "step": 74894
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.3357808589935303,
      "learning_rate": 0.00045670065046801375,
      "loss": 2.9169,
      "step": 74895
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.421900510787964,
      "learning_rate": 0.00045669716226718195,
      "loss": 3.2221,
      "step": 74896
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9256833791732788,
      "learning_rate": 0.0004566936740372172,
      "loss": 3.1902,
      "step": 74897
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6203625202178955,
      "learning_rate": 0.00045669018577812004,
      "loss": 2.9388,
      "step": 74898
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.1418399810791016,
      "learning_rate": 0.0004566866974898913,
      "loss": 3.0588,
      "step": 74899
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.875748634338379,
      "learning_rate": 0.00045668320917253145,
      "loss": 3.0894,
      "step": 74900
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7496051788330078,
      "learning_rate": 0.00045667972082604136,
      "loss": 2.9693,
      "step": 74901
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4503716230392456,
      "learning_rate": 0.0004566762324504214,
      "loss": 2.9058,
      "step": 74902
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.01242995262146,
      "learning_rate": 0.0004566727440456726,
      "loss": 2.9958,
      "step": 74903
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.6938159465789795,
      "learning_rate": 0.00045666925561179517,
      "loss": 3.0765,
      "step": 74904
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5724029541015625,
      "learning_rate": 0.00045666576714879,
      "loss": 2.9888,
      "step": 74905
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0038132667541504,
      "learning_rate": 0.0004566622786566578,
      "loss": 2.9491,
      "step": 74906
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.507188320159912,
      "learning_rate": 0.00045665879013539914,
      "loss": 2.8041,
      "step": 74907
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.6700186729431152,
      "learning_rate": 0.00045665530158501454,
      "loss": 3.0158,
      "step": 74908
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5686427354812622,
      "learning_rate": 0.00045665181300550474,
      "loss": 2.9829,
      "step": 74909
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.893221378326416,
      "learning_rate": 0.0004566483243968705,
      "loss": 2.8648,
      "step": 74910
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.968369483947754,
      "learning_rate": 0.0004566448357591124,
      "loss": 2.8255,
      "step": 74911
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.95882248878479,
      "learning_rate": 0.00045664134709223097,
      "loss": 2.7938,
      "step": 74912
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.475502371788025,
      "learning_rate": 0.00045663785839622706,
      "loss": 2.9837,
      "step": 74913
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7197036743164062,
      "learning_rate": 0.00045663436967110116,
      "loss": 3.0676,
      "step": 74914
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3557485342025757,
      "learning_rate": 0.00045663088091685397,
      "loss": 2.8897,
      "step": 74915
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.835533618927002,
      "learning_rate": 0.00045662739213348614,
      "loss": 2.7971,
      "step": 74916
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7343881130218506,
      "learning_rate": 0.0004566239033209984,
      "loss": 3.1086,
      "step": 74917
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.703926682472229,
      "learning_rate": 0.0004566204144793912,
      "loss": 2.8397,
      "step": 74918
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6928703784942627,
      "learning_rate": 0.00045661692560866547,
      "loss": 3.1828,
      "step": 74919
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5838699340820312,
      "learning_rate": 0.0004566134367088215,
      "loss": 3.1199,
      "step": 74920
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.563731074333191,
      "learning_rate": 0.0004566099477798603,
      "loss": 3.1939,
      "step": 74921
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6465624570846558,
      "learning_rate": 0.00045660645882178224,
      "loss": 2.9646,
      "step": 74922
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2768566608428955,
      "learning_rate": 0.00045660296983458814,
      "loss": 2.87,
      "step": 74923
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5874700546264648,
      "learning_rate": 0.00045659948081827855,
      "loss": 3.0364,
      "step": 74924
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9036567211151123,
      "learning_rate": 0.00045659599177285423,
      "loss": 2.7584,
      "step": 74925
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9393969774246216,
      "learning_rate": 0.00045659250269831567,
      "loss": 3.1403,
      "step": 74926
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9634079933166504,
      "learning_rate": 0.0004565890135946636,
      "loss": 3.0009,
      "step": 74927
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.900829792022705,
      "learning_rate": 0.00045658552446189884,
      "loss": 3.0056,
      "step": 74928
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3732059001922607,
      "learning_rate": 0.0004565820353000218,
      "loss": 2.7833,
      "step": 74929
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8407478332519531,
      "learning_rate": 0.00045657854610903307,
      "loss": 3.0246,
      "step": 74930
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.439521312713623,
      "learning_rate": 0.0004565750568889336,
      "loss": 3.0283,
      "step": 74931
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8483164310455322,
      "learning_rate": 0.0004565715676397238,
      "loss": 2.9467,
      "step": 74932
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0580050945281982,
      "learning_rate": 0.0004565680783614044,
      "loss": 3.0105,
      "step": 74933
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5979325771331787,
      "learning_rate": 0.0004565645890539761,
      "loss": 3.0579,
      "step": 74934
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4714828729629517,
      "learning_rate": 0.00045656109971743935,
      "loss": 2.9629,
      "step": 74935
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.074185371398926,
      "learning_rate": 0.000456557610351795,
      "loss": 2.9576,
      "step": 74936
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.926462173461914,
      "learning_rate": 0.0004565541209570437,
      "loss": 2.9294,
      "step": 74937
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8205254077911377,
      "learning_rate": 0.0004565506315331859,
      "loss": 2.8054,
      "step": 74938
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.1164710521698,
      "learning_rate": 0.0004565471420802225,
      "loss": 3.1651,
      "step": 74939
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.734236478805542,
      "learning_rate": 0.00045654365259815397,
      "loss": 3.2243,
      "step": 74940
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8979911804199219,
      "learning_rate": 0.000456540163086981,
      "loss": 2.9935,
      "step": 74941
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.956334114074707,
      "learning_rate": 0.0004565366735467043,
      "loss": 2.8896,
      "step": 74942
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9211326837539673,
      "learning_rate": 0.00045653318397732455,
      "loss": 3.0567,
      "step": 74943
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7931941747665405,
      "learning_rate": 0.00045652969437884225,
      "loss": 2.9718,
      "step": 74944
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2077150344848633,
      "learning_rate": 0.0004565262047512581,
      "loss": 3.1469,
      "step": 74945
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.674615740776062,
      "learning_rate": 0.0004565227150945728,
      "loss": 2.9216,
      "step": 74946
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.002342462539673,
      "learning_rate": 0.000456519225408787,
      "loss": 2.9303,
      "step": 74947
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9066921472549438,
      "learning_rate": 0.00045651573569390126,
      "loss": 3.0062,
      "step": 74948
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4453147649765015,
      "learning_rate": 0.00045651224594991637,
      "loss": 3.0286,
      "step": 74949
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8982203006744385,
      "learning_rate": 0.00045650875617683285,
      "loss": 3.2047,
      "step": 74950
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6714797019958496,
      "learning_rate": 0.00045650526637465135,
      "loss": 3.0668,
      "step": 74951
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4235270023345947,
      "learning_rate": 0.00045650177654337273,
      "loss": 3.0888,
      "step": 74952
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9863038063049316,
      "learning_rate": 0.0004564982866829973,
      "loss": 2.6641,
      "step": 74953
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.634656548500061,
      "learning_rate": 0.00045649479679352596,
      "loss": 2.9876,
      "step": 74954
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.186772108078003,
      "learning_rate": 0.0004564913068749593,
      "loss": 2.8908,
      "step": 74955
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7091082334518433,
      "learning_rate": 0.0004564878169272979,
      "loss": 2.9728,
      "step": 74956
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9738165140151978,
      "learning_rate": 0.00045648432695054243,
      "loss": 2.9438,
      "step": 74957
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.580200433731079,
      "learning_rate": 0.0004564808369446937,
      "loss": 2.744,
      "step": 74958
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.1463236808776855,
      "learning_rate": 0.00045647734690975217,
      "loss": 3.0442,
      "step": 74959
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8707222938537598,
      "learning_rate": 0.0004564738568457185,
      "loss": 3.0171,
      "step": 74960
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.700545310974121,
      "learning_rate": 0.00045647036675259346,
      "loss": 2.965,
      "step": 74961
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9237220287322998,
      "learning_rate": 0.0004564668766303776,
      "loss": 3.2395,
      "step": 74962
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6638435125350952,
      "learning_rate": 0.00045646338647907145,
      "loss": 2.9114,
      "step": 74963
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9768308401107788,
      "learning_rate": 0.000456459896298676,
      "loss": 2.9578,
      "step": 74964
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.02669620513916,
      "learning_rate": 0.0004564564060891916,
      "loss": 2.9248,
      "step": 74965
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.147975206375122,
      "learning_rate": 0.00045645291585061907,
      "loss": 2.9351,
      "step": 74966
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5145248174667358,
      "learning_rate": 0.0004564494255829589,
      "loss": 3.0035,
      "step": 74967
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6629095077514648,
      "learning_rate": 0.0004564459352862119,
      "loss": 3.0844,
      "step": 74968
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8095788955688477,
      "learning_rate": 0.0004564424449603786,
      "loss": 2.9748,
      "step": 74969
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7654316425323486,
      "learning_rate": 0.00045643895460545977,
      "loss": 2.9833,
      "step": 74970
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.1648402214050293,
      "learning_rate": 0.00045643546422145597,
      "loss": 2.7538,
      "step": 74971
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6073628664016724,
      "learning_rate": 0.0004564319738083678,
      "loss": 3.1481,
      "step": 74972
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0132229328155518,
      "learning_rate": 0.00045642848336619596,
      "loss": 2.9532,
      "step": 74973
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7102855443954468,
      "learning_rate": 0.0004564249928949412,
      "loss": 2.7566,
      "step": 74974
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4974284172058105,
      "learning_rate": 0.00045642150239460404,
      "loss": 2.7471,
      "step": 74975
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4637823104858398,
      "learning_rate": 0.00045641801186518515,
      "loss": 2.7748,
      "step": 74976
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6231591701507568,
      "learning_rate": 0.0004564145213066853,
      "loss": 2.9732,
      "step": 74977
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6448487043380737,
      "learning_rate": 0.00045641103071910493,
      "loss": 2.9888,
      "step": 74978
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5594546794891357,
      "learning_rate": 0.0004564075401024448,
      "loss": 3.0253,
      "step": 74979
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5064473152160645,
      "learning_rate": 0.0004564040494567056,
      "loss": 3.0945,
      "step": 74980
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7227894067764282,
      "learning_rate": 0.0004564005587818879,
      "loss": 3.0497,
      "step": 74981
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8418190479278564,
      "learning_rate": 0.00045639706807799234,
      "loss": 3.1103,
      "step": 74982
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8854669332504272,
      "learning_rate": 0.00045639357734501964,
      "loss": 3.1345,
      "step": 74983
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.620702862739563,
      "learning_rate": 0.00045639008658297054,
      "loss": 3.0393,
      "step": 74984
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.669217586517334,
      "learning_rate": 0.00045638659579184547,
      "loss": 2.8149,
      "step": 74985
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9700264930725098,
      "learning_rate": 0.0004563831049716452,
      "loss": 3.0555,
      "step": 74986
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3654779195785522,
      "learning_rate": 0.00045637961412237034,
      "loss": 3.1371,
      "step": 74987
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.683401107788086,
      "learning_rate": 0.0004563761232440216,
      "loss": 3.1724,
      "step": 74988
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5323673486709595,
      "learning_rate": 0.00045637263233659956,
      "loss": 3.3558,
      "step": 74989
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5909122228622437,
      "learning_rate": 0.0004563691414001048,
      "loss": 3.1354,
      "step": 74990
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.939023733139038,
      "learning_rate": 0.00045636565043453823,
      "loss": 3.208,
      "step": 74991
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.9857254028320312,
      "learning_rate": 0.0004563621594399003,
      "loss": 2.9969,
      "step": 74992
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7941641807556152,
      "learning_rate": 0.0004563586684161916,
      "loss": 2.9285,
      "step": 74993
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9667011499404907,
      "learning_rate": 0.00045635517736341294,
      "loss": 2.8944,
      "step": 74994
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.493430256843567,
      "learning_rate": 0.0004563516862815649,
      "loss": 3.1595,
      "step": 74995
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2439095973968506,
      "learning_rate": 0.00045634819517064806,
      "loss": 2.885,
      "step": 74996
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6474848985671997,
      "learning_rate": 0.0004563447040306632,
      "loss": 3.0881,
      "step": 74997
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.120469570159912,
      "learning_rate": 0.00045634121286161096,
      "loss": 3.2689,
      "step": 74998
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7545626163482666,
      "learning_rate": 0.0004563377216634919,
      "loss": 3.1713,
      "step": 74999
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.7066073417663574,
      "learning_rate": 0.00045633423043630667,
      "loss": 2.7063,
      "step": 75000
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0334296226501465,
      "learning_rate": 0.00045633073918005594,
      "loss": 3.344,
      "step": 75001
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7121081352233887,
      "learning_rate": 0.00045632724789474046,
      "loss": 2.9694,
      "step": 75002
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.3153018951416016,
      "learning_rate": 0.0004563237565803607,
      "loss": 2.854,
      "step": 75003
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7013462781906128,
      "learning_rate": 0.00045632026523691747,
      "loss": 2.9817,
      "step": 75004
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5361655950546265,
      "learning_rate": 0.00045631677386441137,
      "loss": 2.6951,
      "step": 75005
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4280060529708862,
      "learning_rate": 0.00045631328246284296,
      "loss": 2.9683,
      "step": 75006
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6665306091308594,
      "learning_rate": 0.00045630979103221305,
      "loss": 3.1011,
      "step": 75007
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7303000688552856,
      "learning_rate": 0.0004563062995725221,
      "loss": 3.1296,
      "step": 75008
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5440466403961182,
      "learning_rate": 0.0004563028080837709,
      "loss": 3.0372,
      "step": 75009
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0419270992279053,
      "learning_rate": 0.00045629931656596004,
      "loss": 2.9884,
      "step": 75010
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6684036254882812,
      "learning_rate": 0.0004562958250190902,
      "loss": 2.9936,
      "step": 75011
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.349154472351074,
      "learning_rate": 0.00045629233344316204,
      "loss": 3.0154,
      "step": 75012
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9148271083831787,
      "learning_rate": 0.00045628884183817616,
      "loss": 3.065,
      "step": 75013
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5418428182601929,
      "learning_rate": 0.0004562853502041333,
      "loss": 3.0665,
      "step": 75014
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0228569507598877,
      "learning_rate": 0.0004562818585410339,
      "loss": 3.1402,
      "step": 75015
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.0065972805023193,
      "learning_rate": 0.00045627836684887887,
      "loss": 3.1266,
      "step": 75016
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.2409045696258545,
      "learning_rate": 0.0004562748751276687,
      "loss": 2.9555,
      "step": 75017
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7511945962905884,
      "learning_rate": 0.00045627138337740405,
      "loss": 3.0213,
      "step": 75018
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5570716857910156,
      "learning_rate": 0.00045626789159808567,
      "loss": 3.076,
      "step": 75019
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.150104284286499,
      "learning_rate": 0.0004562643997897141,
      "loss": 2.855,
      "step": 75020
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.3991243839263916,
      "learning_rate": 0.00045626090795228996,
      "loss": 3.0158,
      "step": 75021
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3387999534606934,
      "learning_rate": 0.00045625741608581414,
      "loss": 2.9703,
      "step": 75022
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.1400959491729736,
      "learning_rate": 0.000456253924190287,
      "loss": 2.8404,
      "step": 75023
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8644163608551025,
      "learning_rate": 0.0004562504322657093,
      "loss": 3.0849,
      "step": 75024
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8999547958374023,
      "learning_rate": 0.0004562469403120818,
      "loss": 2.9219,
      "step": 75025
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6484744548797607,
      "learning_rate": 0.0004562434483294049,
      "loss": 3.2002,
      "step": 75026
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.728593587875366,
      "learning_rate": 0.0004562399563176794,
      "loss": 3.1478,
      "step": 75027
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.991786241531372,
      "learning_rate": 0.00045623646427690604,
      "loss": 3.155,
      "step": 75028
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7348711490631104,
      "learning_rate": 0.00045623297220708534,
      "loss": 3.3188,
      "step": 75029
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6465988159179688,
      "learning_rate": 0.000456229480108218,
      "loss": 3.0931,
      "step": 75030
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.279484272003174,
      "learning_rate": 0.0004562259879803046,
      "loss": 3.2411,
      "step": 75031
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.204763412475586,
      "learning_rate": 0.00045622249582334587,
      "loss": 2.9893,
      "step": 75032
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4937753677368164,
      "learning_rate": 0.0004562190036373424,
      "loss": 3.0736,
      "step": 75033
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9167418479919434,
      "learning_rate": 0.0004562155114222948,
      "loss": 3.028,
      "step": 75034
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.0137834548950195,
      "learning_rate": 0.000456212019178204,
      "loss": 3.045,
      "step": 75035
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5693175792694092,
      "learning_rate": 0.00045620852690507024,
      "loss": 2.9446,
      "step": 75036
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7490079402923584,
      "learning_rate": 0.00045620503460289444,
      "loss": 3.0305,
      "step": 75037
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.518849492073059,
      "learning_rate": 0.00045620154227167726,
      "loss": 3.1111,
      "step": 75038
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0815486907958984,
      "learning_rate": 0.0004561980499114191,
      "loss": 3.0156,
      "step": 75039
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5332497358322144,
      "learning_rate": 0.0004561945575221209,
      "loss": 2.9161,
      "step": 75040
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4441603422164917,
      "learning_rate": 0.0004561910651037832,
      "loss": 3.1916,
      "step": 75041
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.815887689590454,
      "learning_rate": 0.00045618757265640655,
      "loss": 3.0496,
      "step": 75042
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8908122777938843,
      "learning_rate": 0.0004561840801799917,
      "loss": 3.0311,
      "step": 75043
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7595129013061523,
      "learning_rate": 0.0004561805876745392,
      "loss": 3.0837,
      "step": 75044
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8844810724258423,
      "learning_rate": 0.0004561770951400499,
      "loss": 2.9478,
      "step": 75045
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.867773175239563,
      "learning_rate": 0.0004561736025765243,
      "loss": 3.1071,
      "step": 75046
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8786382675170898,
      "learning_rate": 0.00045617010998396317,
      "loss": 2.8781,
      "step": 75047
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4436769485473633,
      "learning_rate": 0.0004561666173623669,
      "loss": 2.7859,
      "step": 75048
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6069225072860718,
      "learning_rate": 0.00045616312471173637,
      "loss": 2.9129,
      "step": 75049
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6152843236923218,
      "learning_rate": 0.0004561596320320722,
      "loss": 3.0982,
      "step": 75050
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6032819747924805,
      "learning_rate": 0.000456156139323375,
      "loss": 2.9953,
      "step": 75051
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.248145580291748,
      "learning_rate": 0.0004561526465856453,
      "loss": 2.9797,
      "step": 75052
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6536153554916382,
      "learning_rate": 0.0004561491538188841,
      "loss": 2.8892,
      "step": 75053
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7278159856796265,
      "learning_rate": 0.0004561456610230917,
      "loss": 2.8772,
      "step": 75054
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.840228796005249,
      "learning_rate": 0.0004561421681982689,
      "loss": 3.2293,
      "step": 75055
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.890076756477356,
      "learning_rate": 0.0004561386753444163,
      "loss": 3.1248,
      "step": 75056
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8166608810424805,
      "learning_rate": 0.0004561351824615346,
      "loss": 3.1024,
      "step": 75057
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6112035512924194,
      "learning_rate": 0.00045613168954962435,
      "loss": 3.1113,
      "step": 75058
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5123425722122192,
      "learning_rate": 0.0004561281966086864,
      "loss": 3.0565,
      "step": 75059
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.604576826095581,
      "learning_rate": 0.00045612470363872115,
      "loss": 2.8255,
      "step": 75060
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0723981857299805,
      "learning_rate": 0.0004561212106397293,
      "loss": 3.025,
      "step": 75061
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.537245273590088,
      "learning_rate": 0.00045611771761171183,
      "loss": 2.9791,
      "step": 75062
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8362828493118286,
      "learning_rate": 0.000456114224554669,
      "loss": 2.9746,
      "step": 75063
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6690477132797241,
      "learning_rate": 0.00045611073146860153,
      "loss": 3.2063,
      "step": 75064
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.012040138244629,
      "learning_rate": 0.0004561072383535102,
      "loss": 3.0796,
      "step": 75065
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.506314992904663,
      "learning_rate": 0.0004561037452093955,
      "loss": 2.7628,
      "step": 75066
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6443421840667725,
      "learning_rate": 0.0004561002520362582,
      "loss": 3.0859,
      "step": 75067
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6706287860870361,
      "learning_rate": 0.00045609675883409893,
      "loss": 2.894,
      "step": 75068
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7537161111831665,
      "learning_rate": 0.00045609326560291843,
      "loss": 2.9795,
      "step": 75069
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.474071741104126,
      "learning_rate": 0.00045608977234271714,
      "loss": 2.8478,
      "step": 75070
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5866382122039795,
      "learning_rate": 0.0004560862790534958,
      "loss": 2.9642,
      "step": 75071
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.1933043003082275,
      "learning_rate": 0.00045608278573525514,
      "loss": 2.7695,
      "step": 75072
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5562176704406738,
      "learning_rate": 0.0004560792923879957,
      "loss": 2.9899,
      "step": 75073
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.755628228187561,
      "learning_rate": 0.0004560757990117182,
      "loss": 2.9765,
      "step": 75074
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.035090923309326,
      "learning_rate": 0.0004560723056064232,
      "loss": 2.9363,
      "step": 75075
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.342796802520752,
      "learning_rate": 0.00045606881217211153,
      "loss": 3.1574,
      "step": 75076
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6976765394210815,
      "learning_rate": 0.0004560653187087836,
      "loss": 3.1153,
      "step": 75077
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4742845296859741,
      "learning_rate": 0.0004560618252164402,
      "loss": 2.9963,
      "step": 75078
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.89391827583313,
      "learning_rate": 0.000456058331695082,
      "loss": 2.5418,
      "step": 75079
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.429715871810913,
      "learning_rate": 0.0004560548381447096,
      "loss": 3.107,
      "step": 75080
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9023183584213257,
      "learning_rate": 0.00045605134456532366,
      "loss": 3.0755,
      "step": 75081
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5475949048995972,
      "learning_rate": 0.0004560478509569248,
      "loss": 3.012,
      "step": 75082
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.876467704772949,
      "learning_rate": 0.00045604435731951383,
      "loss": 2.9452,
      "step": 75083
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.681734561920166,
      "learning_rate": 0.00045604086365309116,
      "loss": 3.0772,
      "step": 75084
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.9997334480285645,
      "learning_rate": 0.0004560373699576575,
      "loss": 3.1086,
      "step": 75085
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5578545331954956,
      "learning_rate": 0.00045603387623321365,
      "loss": 2.9766,
      "step": 75086
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7439377307891846,
      "learning_rate": 0.0004560303824797601,
      "loss": 2.9472,
      "step": 75087
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8545305728912354,
      "learning_rate": 0.0004560268886972975,
      "loss": 3.071,
      "step": 75088
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5050342082977295,
      "learning_rate": 0.0004560233948858267,
      "loss": 3.0203,
      "step": 75089
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.577235460281372,
      "learning_rate": 0.00045601990104534814,
      "loss": 2.906,
      "step": 75090
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.443346619606018,
      "learning_rate": 0.0004560164071758624,
      "loss": 3.0873,
      "step": 75091
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6582798957824707,
      "learning_rate": 0.0004560129132773705,
      "loss": 2.9069,
      "step": 75092
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9729397296905518,
      "learning_rate": 0.0004560094193498727,
      "loss": 3.0229,
      "step": 75093
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.614842414855957,
      "learning_rate": 0.00045600592539336984,
      "loss": 2.904,
      "step": 75094
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6867239475250244,
      "learning_rate": 0.0004560024314078626,
      "loss": 2.9188,
      "step": 75095
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5235451459884644,
      "learning_rate": 0.0004559989373933515,
      "loss": 3.1681,
      "step": 75096
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.18627667427063,
      "learning_rate": 0.00045599544334983726,
      "loss": 2.788,
      "step": 75097
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5457764863967896,
      "learning_rate": 0.00045599194927732053,
      "loss": 3.031,
      "step": 75098
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7282824516296387,
      "learning_rate": 0.00045598845517580194,
      "loss": 2.9266,
      "step": 75099
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.75350284576416,
      "learning_rate": 0.00045598496104528214,
      "loss": 3.0656,
      "step": 75100
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.669540286064148,
      "learning_rate": 0.00045598146688576177,
      "loss": 3.0785,
      "step": 75101
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.1387617588043213,
      "learning_rate": 0.0004559779726972416,
      "loss": 2.9856,
      "step": 75102
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.524440050125122,
      "learning_rate": 0.00045597447847972204,
      "loss": 2.887,
      "step": 75103
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.72710120677948,
      "learning_rate": 0.000455970984233204,
      "loss": 3.1392,
      "step": 75104
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.834521770477295,
      "learning_rate": 0.00045596748995768797,
      "loss": 2.9964,
      "step": 75105
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5683284997940063,
      "learning_rate": 0.00045596399565317464,
      "loss": 2.8866,
      "step": 75106
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2011501789093018,
      "learning_rate": 0.0004559605013196646,
      "loss": 2.6481,
      "step": 75107
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7811344861984253,
      "learning_rate": 0.00045595700695715864,
      "loss": 3.0333,
      "step": 75108
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.017059803009033,
      "learning_rate": 0.00045595351256565733,
      "loss": 2.7966,
      "step": 75109
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6288543939590454,
      "learning_rate": 0.00045595001814516126,
      "loss": 3.0879,
      "step": 75110
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3701341152191162,
      "learning_rate": 0.00045594652369567123,
      "loss": 3.0662,
      "step": 75111
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9951262474060059,
      "learning_rate": 0.00045594302921718775,
      "loss": 3.0692,
      "step": 75112
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.545598268508911,
      "learning_rate": 0.0004559395347097114,
      "loss": 3.0467,
      "step": 75113
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.17611026763916,
      "learning_rate": 0.0004559360401732431,
      "loss": 2.8744,
      "step": 75114
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0622386932373047,
      "learning_rate": 0.0004559325456077833,
      "loss": 2.9971,
      "step": 75115
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.437991142272949,
      "learning_rate": 0.00045592905101333266,
      "loss": 2.7892,
      "step": 75116
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9783635139465332,
      "learning_rate": 0.000455925556389892,
      "loss": 3.2912,
      "step": 75117
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.854882836341858,
      "learning_rate": 0.00045592206173746165,
      "loss": 2.8691,
      "step": 75118
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5166869163513184,
      "learning_rate": 0.00045591856705604256,
      "loss": 3.0911,
      "step": 75119
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.656101107597351,
      "learning_rate": 0.0004559150723456353,
      "loss": 2.9451,
      "step": 75120
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.420511245727539,
      "learning_rate": 0.0004559115776062404,
      "loss": 3.23,
      "step": 75121
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.472198724746704,
      "learning_rate": 0.0004559080828378586,
      "loss": 3.0502,
      "step": 75122
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7469936609268188,
      "learning_rate": 0.0004559045880404906,
      "loss": 2.9758,
      "step": 75123
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5252453088760376,
      "learning_rate": 0.0004559010932141369,
      "loss": 3.2068,
      "step": 75124
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4324144124984741,
      "learning_rate": 0.00045589759835879825,
      "loss": 3.049,
      "step": 75125
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.532631516456604,
      "learning_rate": 0.0004558941034744754,
      "loss": 3.0838,
      "step": 75126
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.5025746822357178,
      "learning_rate": 0.0004558906085611688,
      "loss": 2.8677,
      "step": 75127
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5188933610916138,
      "learning_rate": 0.00045588711361887926,
      "loss": 2.928,
      "step": 75128
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7008026838302612,
      "learning_rate": 0.00045588361864760734,
      "loss": 3.0553,
      "step": 75129
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8698047399520874,
      "learning_rate": 0.0004558801236473537,
      "loss": 2.8568,
      "step": 75130
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6768159866333008,
      "learning_rate": 0.00045587662861811895,
      "loss": 2.8755,
      "step": 75131
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6332587003707886,
      "learning_rate": 0.0004558731335599039,
      "loss": 2.999,
      "step": 75132
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7905681133270264,
      "learning_rate": 0.000455869638472709,
      "loss": 2.8904,
      "step": 75133
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5011122226715088,
      "learning_rate": 0.00045586614335653506,
      "loss": 3.3187,
      "step": 75134
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6404409408569336,
      "learning_rate": 0.0004558626482113827,
      "loss": 3.046,
      "step": 75135
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4359784126281738,
      "learning_rate": 0.00045585915303725244,
      "loss": 3.0775,
      "step": 75136
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8334153890609741,
      "learning_rate": 0.000455855657834145,
      "loss": 3.222,
      "step": 75137
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7277295589447021,
      "learning_rate": 0.00045585216260206113,
      "loss": 3.0484,
      "step": 75138
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4979161024093628,
      "learning_rate": 0.0004558486673410014,
      "loss": 2.9504,
      "step": 75139
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7223376035690308,
      "learning_rate": 0.00045584517205096643,
      "loss": 3.0482,
      "step": 75140
    },
    {
      "epoch": 0.98,
      "grad_norm": 14.1654691696167,
      "learning_rate": 0.00045584167673195684,
      "loss": 2.8991,
      "step": 75141
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.1837689876556396,
      "learning_rate": 0.0004558381813839734,
      "loss": 3.1457,
      "step": 75142
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6930632591247559,
      "learning_rate": 0.0004558346860070167,
      "loss": 2.7875,
      "step": 75143
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5502952337265015,
      "learning_rate": 0.00045583119060108745,
      "loss": 3.1038,
      "step": 75144
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7771663665771484,
      "learning_rate": 0.00045582769516618613,
      "loss": 2.9471,
      "step": 75145
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.5647079944610596,
      "learning_rate": 0.00045582419970231357,
      "loss": 2.9869,
      "step": 75146
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0868513584136963,
      "learning_rate": 0.0004558207042094703,
      "loss": 2.939,
      "step": 75147
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.917890191078186,
      "learning_rate": 0.0004558172086876571,
      "loss": 3.2611,
      "step": 75148
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.781387448310852,
      "learning_rate": 0.0004558137131368745,
      "loss": 3.1546,
      "step": 75149
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2997422218322754,
      "learning_rate": 0.00045581021755712316,
      "loss": 2.9419,
      "step": 75150
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.9670233726501465,
      "learning_rate": 0.00045580672194840373,
      "loss": 3.0326,
      "step": 75151
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8025177717208862,
      "learning_rate": 0.000455803226310717,
      "loss": 3.0353,
      "step": 75152
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.657259225845337,
      "learning_rate": 0.0004557997306440635,
      "loss": 2.978,
      "step": 75153
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2604687213897705,
      "learning_rate": 0.0004557962349484438,
      "loss": 3.0434,
      "step": 75154
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.261273145675659,
      "learning_rate": 0.00045579273922385867,
      "loss": 2.8984,
      "step": 75155
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5793765783309937,
      "learning_rate": 0.00045578924347030876,
      "loss": 2.9221,
      "step": 75156
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7342967987060547,
      "learning_rate": 0.00045578574768779465,
      "loss": 2.9165,
      "step": 75157
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.123061180114746,
      "learning_rate": 0.000455782251876317,
      "loss": 3.0402,
      "step": 75158
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.195744752883911,
      "learning_rate": 0.00045577875603587653,
      "loss": 3.0332,
      "step": 75159
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8221943378448486,
      "learning_rate": 0.0004557752601664739,
      "loss": 3.0755,
      "step": 75160
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.569046139717102,
      "learning_rate": 0.0004557717642681097,
      "loss": 2.9473,
      "step": 75161
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4876829385757446,
      "learning_rate": 0.00045576826834078444,
      "loss": 3.0418,
      "step": 75162
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.3756844997406006,
      "learning_rate": 0.000455764772384499,
      "loss": 3.1897,
      "step": 75163
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.543529510498047,
      "learning_rate": 0.00045576127639925407,
      "loss": 3.1731,
      "step": 75164
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8197294473648071,
      "learning_rate": 0.00045575778038505003,
      "loss": 2.8787,
      "step": 75165
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.5867421627044678,
      "learning_rate": 0.0004557542843418877,
      "loss": 2.7904,
      "step": 75166
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5027977228164673,
      "learning_rate": 0.0004557507882697678,
      "loss": 3.117,
      "step": 75167
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3765605688095093,
      "learning_rate": 0.0004557472921686909,
      "loss": 3.0908,
      "step": 75168
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6530358791351318,
      "learning_rate": 0.00045574379603865756,
      "loss": 2.9517,
      "step": 75169
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9885731935501099,
      "learning_rate": 0.0004557402998796684,
      "loss": 2.823,
      "step": 75170
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6401875019073486,
      "learning_rate": 0.0004557368036917243,
      "loss": 2.9473,
      "step": 75171
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5840860605239868,
      "learning_rate": 0.0004557333074748258,
      "loss": 3.1104,
      "step": 75172
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6348702907562256,
      "learning_rate": 0.0004557298112289735,
      "loss": 2.7244,
      "step": 75173
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4778800010681152,
      "learning_rate": 0.0004557263149541681,
      "loss": 3.2052,
      "step": 75174
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9333142042160034,
      "learning_rate": 0.00045572281865041025,
      "loss": 2.8851,
      "step": 75175
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.570672631263733,
      "learning_rate": 0.0004557193223177007,
      "loss": 2.7237,
      "step": 75176
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.838651418685913,
      "learning_rate": 0.0004557158259560398,
      "loss": 2.974,
      "step": 75177
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5943219661712646,
      "learning_rate": 0.0004557123295654285,
      "loss": 2.8875,
      "step": 75178
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4649900197982788,
      "learning_rate": 0.0004557088331458673,
      "loss": 3.0851,
      "step": 75179
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7361643314361572,
      "learning_rate": 0.00045570533669735683,
      "loss": 2.9268,
      "step": 75180
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9813711643218994,
      "learning_rate": 0.0004557018402198979,
      "loss": 2.8302,
      "step": 75181
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.706313133239746,
      "learning_rate": 0.000455698343713491,
      "loss": 2.9502,
      "step": 75182
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5300698280334473,
      "learning_rate": 0.00045569484717813686,
      "loss": 2.8458,
      "step": 75183
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5962369441986084,
      "learning_rate": 0.0004556913506138362,
      "loss": 2.7245,
      "step": 75184
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.398582100868225,
      "learning_rate": 0.0004556878540205894,
      "loss": 2.9873,
      "step": 75185
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.742752194404602,
      "learning_rate": 0.00045568435739839737,
      "loss": 3.1749,
      "step": 75186
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.1700897216796875,
      "learning_rate": 0.0004556808607472607,
      "loss": 2.8962,
      "step": 75187
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.601592540740967,
      "learning_rate": 0.00045567736406718,
      "loss": 2.9923,
      "step": 75188
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6318588256835938,
      "learning_rate": 0.0004556738673581559,
      "loss": 3.2945,
      "step": 75189
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5853904485702515,
      "learning_rate": 0.00045567037062018925,
      "loss": 2.8812,
      "step": 75190
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.991831660270691,
      "learning_rate": 0.0004556668738532804,
      "loss": 3.1688,
      "step": 75191
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.331120014190674,
      "learning_rate": 0.0004556633770574301,
      "loss": 3.0142,
      "step": 75192
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8546360731124878,
      "learning_rate": 0.0004556598802326392,
      "loss": 3.0111,
      "step": 75193
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.748119831085205,
      "learning_rate": 0.0004556563833789081,
      "loss": 2.7204,
      "step": 75194
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.114678382873535,
      "learning_rate": 0.00045565288649623744,
      "loss": 3.2281,
      "step": 75195
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.6504039764404297,
      "learning_rate": 0.0004556493895846281,
      "loss": 3.1648,
      "step": 75196
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6796008348464966,
      "learning_rate": 0.0004556458926440806,
      "loss": 2.9753,
      "step": 75197
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4465454816818237,
      "learning_rate": 0.0004556423956745955,
      "loss": 3.1,
      "step": 75198
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7076516151428223,
      "learning_rate": 0.0004556388986761736,
      "loss": 3.0484,
      "step": 75199
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.02170467376709,
      "learning_rate": 0.0004556354016488155,
      "loss": 2.9387,
      "step": 75200
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.218395471572876,
      "learning_rate": 0.0004556319045925218,
      "loss": 3.0312,
      "step": 75201
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4248303174972534,
      "learning_rate": 0.0004556284075072933,
      "loss": 2.9697,
      "step": 75202
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4979374408721924,
      "learning_rate": 0.00045562491039313036,
      "loss": 2.9409,
      "step": 75203
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8697646856307983,
      "learning_rate": 0.00045562141325003393,
      "loss": 2.8468,
      "step": 75204
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7976772785186768,
      "learning_rate": 0.0004556179160780046,
      "loss": 2.7908,
      "step": 75205
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.637966513633728,
      "learning_rate": 0.0004556144188770428,
      "loss": 3.1927,
      "step": 75206
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7736836671829224,
      "learning_rate": 0.00045561092164714945,
      "loss": 3.1125,
      "step": 75207
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.961195468902588,
      "learning_rate": 0.00045560742438832506,
      "loss": 2.9831,
      "step": 75208
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.47978138923645,
      "learning_rate": 0.00045560392710057037,
      "loss": 2.9702,
      "step": 75209
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.763863444328308,
      "learning_rate": 0.0004556004297838858,
      "loss": 3.0221,
      "step": 75210
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3714628219604492,
      "learning_rate": 0.00045559693243827243,
      "loss": 2.8345,
      "step": 75211
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5641796588897705,
      "learning_rate": 0.0004555934350637305,
      "loss": 2.9508,
      "step": 75212
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.2740823030471802,
      "learning_rate": 0.00045558993766026075,
      "loss": 3.1975,
      "step": 75213
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3797687292099,
      "learning_rate": 0.0004555864402278641,
      "loss": 3.0268,
      "step": 75214
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9908403158187866,
      "learning_rate": 0.0004555829427665408,
      "loss": 2.9811,
      "step": 75215
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5459957122802734,
      "learning_rate": 0.0004555794452762917,
      "loss": 3.07,
      "step": 75216
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3520922660827637,
      "learning_rate": 0.0004555759477571176,
      "loss": 3.3065,
      "step": 75217
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.358768343925476,
      "learning_rate": 0.0004555724502090189,
      "loss": 3.0428,
      "step": 75218
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.546156883239746,
      "learning_rate": 0.0004555689526319963,
      "loss": 3.0462,
      "step": 75219
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0234744548797607,
      "learning_rate": 0.00045556545502605064,
      "loss": 2.8268,
      "step": 75220
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.728275179862976,
      "learning_rate": 0.0004555619573911822,
      "loss": 2.9936,
      "step": 75221
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.3121724128723145,
      "learning_rate": 0.000455558459727392,
      "loss": 2.9675,
      "step": 75222
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.390188455581665,
      "learning_rate": 0.00045555496203468054,
      "loss": 2.8789,
      "step": 75223
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6436669826507568,
      "learning_rate": 0.0004555514643130485,
      "loss": 3.0455,
      "step": 75224
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7227650880813599,
      "learning_rate": 0.0004555479665624965,
      "loss": 3.1284,
      "step": 75225
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5323750972747803,
      "learning_rate": 0.00045554446878302515,
      "loss": 2.8539,
      "step": 75226
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8169403076171875,
      "learning_rate": 0.00045554097097463516,
      "loss": 2.894,
      "step": 75227
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6420038938522339,
      "learning_rate": 0.00045553747313732717,
      "loss": 3.1426,
      "step": 75228
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4017722606658936,
      "learning_rate": 0.0004555339752711018,
      "loss": 3.1943,
      "step": 75229
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5918805599212646,
      "learning_rate": 0.0004555304773759598,
      "loss": 3.0581,
      "step": 75230
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0792782306671143,
      "learning_rate": 0.00045552697945190173,
      "loss": 2.826,
      "step": 75231
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5528064966201782,
      "learning_rate": 0.00045552348149892824,
      "loss": 2.9938,
      "step": 75232
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.016331911087036,
      "learning_rate": 0.00045551998351703993,
      "loss": 2.9451,
      "step": 75233
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6248831748962402,
      "learning_rate": 0.0004555164855062376,
      "loss": 2.9068,
      "step": 75234
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5350333452224731,
      "learning_rate": 0.00045551298746652184,
      "loss": 3.0784,
      "step": 75235
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.786794662475586,
      "learning_rate": 0.00045550948939789324,
      "loss": 2.9243,
      "step": 75236
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5844197273254395,
      "learning_rate": 0.0004555059913003525,
      "loss": 3.1967,
      "step": 75237
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.695842981338501,
      "learning_rate": 0.0004555024931739002,
      "loss": 3.0663,
      "step": 75238
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.4468045234680176,
      "learning_rate": 0.00045549899501853716,
      "loss": 3.0428,
      "step": 75239
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.279229164123535,
      "learning_rate": 0.0004554954968342639,
      "loss": 2.9847,
      "step": 75240
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.278377056121826,
      "learning_rate": 0.000455491998621081,
      "loss": 2.7625,
      "step": 75241
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6790637969970703,
      "learning_rate": 0.0004554885003789893,
      "loss": 3.0747,
      "step": 75242
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.530199408531189,
      "learning_rate": 0.0004554850021079893,
      "loss": 3.14,
      "step": 75243
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8296738862991333,
      "learning_rate": 0.00045548150380808166,
      "loss": 3.008,
      "step": 75244
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.165271520614624,
      "learning_rate": 0.0004554780054792672,
      "loss": 2.8231,
      "step": 75245
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.970280408859253,
      "learning_rate": 0.00045547450712154634,
      "loss": 2.9056,
      "step": 75246
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6162688732147217,
      "learning_rate": 0.0004554710087349199,
      "loss": 3.3336,
      "step": 75247
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.1887454986572266,
      "learning_rate": 0.0004554675103193884,
      "loss": 2.9331,
      "step": 75248
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8725999593734741,
      "learning_rate": 0.00045546401187495267,
      "loss": 3.0382,
      "step": 75249
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6025162935256958,
      "learning_rate": 0.00045546051340161314,
      "loss": 2.959,
      "step": 75250
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4889910221099854,
      "learning_rate": 0.00045545701489937057,
      "loss": 2.9043,
      "step": 75251
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8158626556396484,
      "learning_rate": 0.0004554535163682256,
      "loss": 2.9474,
      "step": 75252
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6596990823745728,
      "learning_rate": 0.00045545001780817896,
      "loss": 2.9403,
      "step": 75253
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6011656522750854,
      "learning_rate": 0.0004554465192192311,
      "loss": 3.182,
      "step": 75254
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6872444152832031,
      "learning_rate": 0.0004554430206013829,
      "loss": 3.1176,
      "step": 75255
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.031050682067871,
      "learning_rate": 0.0004554395219546349,
      "loss": 2.9109,
      "step": 75256
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2690587043762207,
      "learning_rate": 0.0004554360232789878,
      "loss": 3.1279,
      "step": 75257
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.678497552871704,
      "learning_rate": 0.0004554325245744421,
      "loss": 3.2302,
      "step": 75258
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.820884346961975,
      "learning_rate": 0.00045542902584099865,
      "loss": 2.9459,
      "step": 75259
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.334066152572632,
      "learning_rate": 0.000455425527078658,
      "loss": 2.8751,
      "step": 75260
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.7258825302124023,
      "learning_rate": 0.0004554220282874207,
      "loss": 3.0847,
      "step": 75261
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7150380611419678,
      "learning_rate": 0.00045541852946728764,
      "loss": 2.7196,
      "step": 75262
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7840723991394043,
      "learning_rate": 0.0004554150306182593,
      "loss": 2.8984,
      "step": 75263
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7455215454101562,
      "learning_rate": 0.0004554115317403364,
      "loss": 2.7931,
      "step": 75264
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7552826404571533,
      "learning_rate": 0.00045540803283351946,
      "loss": 2.9938,
      "step": 75265
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.395386815071106,
      "learning_rate": 0.00045540453389780934,
      "loss": 3.0441,
      "step": 75266
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5221256017684937,
      "learning_rate": 0.0004554010349332066,
      "loss": 3.0519,
      "step": 75267
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4860317707061768,
      "learning_rate": 0.00045539753593971184,
      "loss": 3.0467,
      "step": 75268
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5695432424545288,
      "learning_rate": 0.0004553940369173257,
      "loss": 2.6919,
      "step": 75269
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6742708683013916,
      "learning_rate": 0.00045539053786604886,
      "loss": 2.99,
      "step": 75270
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7180042266845703,
      "learning_rate": 0.000455387038785882,
      "loss": 2.9929,
      "step": 75271
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6099499464035034,
      "learning_rate": 0.00045538353967682587,
      "loss": 2.9517,
      "step": 75272
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5289924144744873,
      "learning_rate": 0.000455380040538881,
      "loss": 2.6725,
      "step": 75273
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9144090414047241,
      "learning_rate": 0.00045537654137204784,
      "loss": 2.9722,
      "step": 75274
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0896897315979004,
      "learning_rate": 0.0004553730421763275,
      "loss": 3.2052,
      "step": 75275
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8535808324813843,
      "learning_rate": 0.0004553695429517202,
      "loss": 3.0044,
      "step": 75276
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8748842477798462,
      "learning_rate": 0.00045536604369822686,
      "loss": 3.183,
      "step": 75277
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.53441321849823,
      "learning_rate": 0.00045536254441584797,
      "loss": 2.9901,
      "step": 75278
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5137791633605957,
      "learning_rate": 0.0004553590451045843,
      "loss": 2.8897,
      "step": 75279
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6609926223754883,
      "learning_rate": 0.00045535554576443647,
      "loss": 2.6685,
      "step": 75280
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7287497520446777,
      "learning_rate": 0.0004553520463954051,
      "loss": 2.9692,
      "step": 75281
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2018721103668213,
      "learning_rate": 0.0004553485469974908,
      "loss": 3.0358,
      "step": 75282
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5911824703216553,
      "learning_rate": 0.00045534504757069425,
      "loss": 2.9522,
      "step": 75283
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4805750846862793,
      "learning_rate": 0.0004553415481150162,
      "loss": 2.7512,
      "step": 75284
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.3224899768829346,
      "learning_rate": 0.0004553380486304572,
      "loss": 3.254,
      "step": 75285
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7555052042007446,
      "learning_rate": 0.0004553345491170179,
      "loss": 2.8566,
      "step": 75286
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.862696886062622,
      "learning_rate": 0.00045533104957469906,
      "loss": 2.9626,
      "step": 75287
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.472113609313965,
      "learning_rate": 0.00045532755000350124,
      "loss": 3.1126,
      "step": 75288
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9612563848495483,
      "learning_rate": 0.000455324050403425,
      "loss": 2.9844,
      "step": 75289
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7546792030334473,
      "learning_rate": 0.0004553205507744711,
      "loss": 3.0329,
      "step": 75290
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.445364236831665,
      "learning_rate": 0.0004553170511166403,
      "loss": 3.0415,
      "step": 75291
    },
    {
      "epoch": 0.98,
      "grad_norm": 5.820186138153076,
      "learning_rate": 0.0004553135514299329,
      "loss": 2.9939,
      "step": 75292
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.6025867462158203,
      "learning_rate": 0.00045531005171435,
      "loss": 3.2032,
      "step": 75293
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.6295268535614014,
      "learning_rate": 0.00045530655196989193,
      "loss": 2.9025,
      "step": 75294
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7469024658203125,
      "learning_rate": 0.0004553030521965594,
      "loss": 3.0732,
      "step": 75295
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.074453592300415,
      "learning_rate": 0.00045529955239435323,
      "loss": 3.2407,
      "step": 75296
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.9292798042297363,
      "learning_rate": 0.0004552960525632739,
      "loss": 3.1371,
      "step": 75297
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.0094897747039795,
      "learning_rate": 0.000455292552703322,
      "loss": 3.1678,
      "step": 75298
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8399617671966553,
      "learning_rate": 0.00045528905281449843,
      "loss": 2.8123,
      "step": 75299
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5083057880401611,
      "learning_rate": 0.00045528555289680363,
      "loss": 3.0957,
      "step": 75300
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.3949010372161865,
      "learning_rate": 0.00045528205295023827,
      "loss": 2.9725,
      "step": 75301
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.2023932933807373,
      "learning_rate": 0.00045527855297480303,
      "loss": 3.1806,
      "step": 75302
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7275915145874023,
      "learning_rate": 0.00045527505297049864,
      "loss": 3.1865,
      "step": 75303
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4089523553848267,
      "learning_rate": 0.00045527155293732563,
      "loss": 2.9511,
      "step": 75304
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6778037548065186,
      "learning_rate": 0.00045526805287528476,
      "loss": 3.0623,
      "step": 75305
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.112088918685913,
      "learning_rate": 0.0004552645527843767,
      "loss": 2.958,
      "step": 75306
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.400325298309326,
      "learning_rate": 0.00045526105266460194,
      "loss": 2.9511,
      "step": 75307
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5789670944213867,
      "learning_rate": 0.00045525755251596113,
      "loss": 3.1188,
      "step": 75308
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.3113250732421875,
      "learning_rate": 0.0004552540523384552,
      "loss": 2.7613,
      "step": 75309
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7467390298843384,
      "learning_rate": 0.00045525055213208444,
      "loss": 3.067,
      "step": 75310
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.741127371788025,
      "learning_rate": 0.0004552470518968497,
      "loss": 3.044,
      "step": 75311
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.8929100036621094,
      "learning_rate": 0.00045524355163275177,
      "loss": 2.8509,
      "step": 75312
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.01200532913208,
      "learning_rate": 0.000455240051339791,
      "loss": 3.0094,
      "step": 75313
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6191495656967163,
      "learning_rate": 0.00045523655101796815,
      "loss": 2.9001,
      "step": 75314
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5500050783157349,
      "learning_rate": 0.000455233050667284,
      "loss": 2.9812,
      "step": 75315
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5747106075286865,
      "learning_rate": 0.00045522955028773896,
      "loss": 3.025,
      "step": 75316
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4067249298095703,
      "learning_rate": 0.0004552260498793339,
      "loss": 2.983,
      "step": 75317
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.641944169998169,
      "learning_rate": 0.0004552225494420694,
      "loss": 3.0803,
      "step": 75318
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8140865564346313,
      "learning_rate": 0.000455219048975946,
      "loss": 3.1617,
      "step": 75319
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.593824028968811,
      "learning_rate": 0.00045521554848096456,
      "loss": 3.0491,
      "step": 75320
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.603430986404419,
      "learning_rate": 0.0004552120479571256,
      "loss": 3.2242,
      "step": 75321
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6146918535232544,
      "learning_rate": 0.00045520854740442975,
      "loss": 3.0582,
      "step": 75322
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6268874406814575,
      "learning_rate": 0.00045520504682287764,
      "loss": 2.9493,
      "step": 75323
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0689823627471924,
      "learning_rate": 0.00045520154621247013,
      "loss": 3.0173,
      "step": 75324
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.809922456741333,
      "learning_rate": 0.00045519804557320767,
      "loss": 3.0691,
      "step": 75325
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8347874879837036,
      "learning_rate": 0.0004551945449050909,
      "loss": 3.0877,
      "step": 75326
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8099117279052734,
      "learning_rate": 0.00045519104420812064,
      "loss": 3.1843,
      "step": 75327
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.837204098701477,
      "learning_rate": 0.0004551875434822974,
      "loss": 2.8921,
      "step": 75328
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7583366632461548,
      "learning_rate": 0.0004551840427276218,
      "loss": 2.8577,
      "step": 75329
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.1812992095947266,
      "learning_rate": 0.0004551805419440946,
      "loss": 3.1977,
      "step": 75330
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5940208435058594,
      "learning_rate": 0.0004551770411317165,
      "loss": 3.2776,
      "step": 75331
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.649444580078125,
      "learning_rate": 0.0004551735402904879,
      "loss": 2.9195,
      "step": 75332
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8230093717575073,
      "learning_rate": 0.0004551700394204097,
      "loss": 2.9423,
      "step": 75333
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6308581829071045,
      "learning_rate": 0.0004551665385214824,
      "loss": 3.1055,
      "step": 75334
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.1807191371917725,
      "learning_rate": 0.00045516303759370676,
      "loss": 2.9369,
      "step": 75335
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5418440103530884,
      "learning_rate": 0.00045515953663708336,
      "loss": 2.9842,
      "step": 75336
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5402753353118896,
      "learning_rate": 0.0004551560356516129,
      "loss": 2.8948,
      "step": 75337
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0238821506500244,
      "learning_rate": 0.000455152534637296,
      "loss": 2.8995,
      "step": 75338
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.800608515739441,
      "learning_rate": 0.00045514903359413324,
      "loss": 2.8577,
      "step": 75339
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8307241201400757,
      "learning_rate": 0.0004551455325221254,
      "loss": 2.854,
      "step": 75340
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5138256549835205,
      "learning_rate": 0.00045514203142127315,
      "loss": 3.0143,
      "step": 75341
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8704614639282227,
      "learning_rate": 0.000455138530291577,
      "loss": 2.886,
      "step": 75342
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.871363878250122,
      "learning_rate": 0.00045513502913303763,
      "loss": 3.0954,
      "step": 75343
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.80308198928833,
      "learning_rate": 0.00045513152794565584,
      "loss": 3.24,
      "step": 75344
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.210482120513916,
      "learning_rate": 0.0004551280267294321,
      "loss": 2.898,
      "step": 75345
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6412442922592163,
      "learning_rate": 0.0004551245254843671,
      "loss": 3.0236,
      "step": 75346
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.901252269744873,
      "learning_rate": 0.00045512102421046155,
      "loss": 3.1507,
      "step": 75347
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7201591730117798,
      "learning_rate": 0.0004551175229077161,
      "loss": 3.2967,
      "step": 75348
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.855708360671997,
      "learning_rate": 0.0004551140215761313,
      "loss": 2.9613,
      "step": 75349
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.501354217529297,
      "learning_rate": 0.00045511052021570797,
      "loss": 3.008,
      "step": 75350
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.666386604309082,
      "learning_rate": 0.00045510701882644665,
      "loss": 3.1194,
      "step": 75351
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4155473709106445,
      "learning_rate": 0.000455103517408348,
      "loss": 2.9741,
      "step": 75352
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6505595445632935,
      "learning_rate": 0.00045510001596141264,
      "loss": 3.0433,
      "step": 75353
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6879922151565552,
      "learning_rate": 0.0004550965144856413,
      "loss": 3.0675,
      "step": 75354
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6308974027633667,
      "learning_rate": 0.00045509301298103454,
      "loss": 2.8564,
      "step": 75355
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5257679224014282,
      "learning_rate": 0.0004550895114475931,
      "loss": 3.0,
      "step": 75356
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.487286925315857,
      "learning_rate": 0.0004550860098853176,
      "loss": 2.883,
      "step": 75357
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.857598066329956,
      "learning_rate": 0.0004550825082942087,
      "loss": 3.0097,
      "step": 75358
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6656056642532349,
      "learning_rate": 0.00045507900667426693,
      "loss": 3.0765,
      "step": 75359
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.711717128753662,
      "learning_rate": 0.0004550755050254932,
      "loss": 2.7927,
      "step": 75360
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.474569082260132,
      "learning_rate": 0.00045507200334788794,
      "loss": 2.9045,
      "step": 75361
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8479686975479126,
      "learning_rate": 0.00045506850164145184,
      "loss": 3.0495,
      "step": 75362
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.3007469177246094,
      "learning_rate": 0.00045506499990618555,
      "loss": 3.0098,
      "step": 75363
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7626107931137085,
      "learning_rate": 0.0004550614981420898,
      "loss": 3.1696,
      "step": 75364
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.4627556800842285,
      "learning_rate": 0.0004550579963491652,
      "loss": 2.9522,
      "step": 75365
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5769028663635254,
      "learning_rate": 0.0004550544945274124,
      "loss": 3.2251,
      "step": 75366
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8726390600204468,
      "learning_rate": 0.000455050992676832,
      "loss": 3.1135,
      "step": 75367
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9315072298049927,
      "learning_rate": 0.0004550474907974247,
      "loss": 2.8388,
      "step": 75368
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3680862188339233,
      "learning_rate": 0.00045504398888919116,
      "loss": 3.2292,
      "step": 75369
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7627865076065063,
      "learning_rate": 0.000455040486952132,
      "loss": 3.1247,
      "step": 75370
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6668621301651,
      "learning_rate": 0.00045503698498624786,
      "loss": 3.2125,
      "step": 75371
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5354598760604858,
      "learning_rate": 0.00045503348299153943,
      "loss": 3.2796,
      "step": 75372
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8329402208328247,
      "learning_rate": 0.0004550299809680075,
      "loss": 2.794,
      "step": 75373
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.035507917404175,
      "learning_rate": 0.0004550264789156524,
      "loss": 3.0226,
      "step": 75374
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6518014669418335,
      "learning_rate": 0.000455022976834475,
      "loss": 2.9325,
      "step": 75375
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4986248016357422,
      "learning_rate": 0.0004550194747244759,
      "loss": 2.8843,
      "step": 75376
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6504721641540527,
      "learning_rate": 0.0004550159725856557,
      "loss": 3.0652,
      "step": 75377
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.694326639175415,
      "learning_rate": 0.00045501247041801517,
      "loss": 3.0493,
      "step": 75378
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.841287612915039,
      "learning_rate": 0.0004550089682215549,
      "loss": 3.08,
      "step": 75379
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6098055839538574,
      "learning_rate": 0.00045500546599627556,
      "loss": 2.8909,
      "step": 75380
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.7153589725494385,
      "learning_rate": 0.00045500196374217766,
      "loss": 3.2086,
      "step": 75381
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.736598253250122,
      "learning_rate": 0.00045499846145926205,
      "loss": 3.3367,
      "step": 75382
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.46316397190094,
      "learning_rate": 0.0004549949591475293,
      "loss": 3.017,
      "step": 75383
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6497303247451782,
      "learning_rate": 0.00045499145680698007,
      "loss": 3.2028,
      "step": 75384
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4967308044433594,
      "learning_rate": 0.00045498795443761505,
      "loss": 3.0056,
      "step": 75385
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9113292694091797,
      "learning_rate": 0.00045498445203943477,
      "loss": 3.1706,
      "step": 75386
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5066797733306885,
      "learning_rate": 0.00045498094961243996,
      "loss": 3.0063,
      "step": 75387
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.712965726852417,
      "learning_rate": 0.00045497744715663124,
      "loss": 3.0193,
      "step": 75388
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.034973621368408,
      "learning_rate": 0.0004549739446720093,
      "loss": 3.0202,
      "step": 75389
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9897602796554565,
      "learning_rate": 0.00045497044215857485,
      "loss": 3.0019,
      "step": 75390
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.818241834640503,
      "learning_rate": 0.00045496693961632844,
      "loss": 2.8824,
      "step": 75391
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.856699824333191,
      "learning_rate": 0.00045496343704527076,
      "loss": 2.9002,
      "step": 75392
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6055678129196167,
      "learning_rate": 0.00045495993444540236,
      "loss": 3.0487,
      "step": 75393
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2524757385253906,
      "learning_rate": 0.00045495643181672405,
      "loss": 3.0027,
      "step": 75394
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.625907301902771,
      "learning_rate": 0.00045495292915923637,
      "loss": 3.0878,
      "step": 75395
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5773963928222656,
      "learning_rate": 0.00045494942647294004,
      "loss": 2.9803,
      "step": 75396
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9042459726333618,
      "learning_rate": 0.0004549459237578358,
      "loss": 2.8978,
      "step": 75397
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.504739761352539,
      "learning_rate": 0.0004549424210139241,
      "loss": 3.1001,
      "step": 75398
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3883979320526123,
      "learning_rate": 0.0004549389182412056,
      "loss": 3.0337,
      "step": 75399
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.524475574493408,
      "learning_rate": 0.0004549354154396811,
      "loss": 2.8765,
      "step": 75400
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6980780363082886,
      "learning_rate": 0.0004549319126093512,
      "loss": 3.1068,
      "step": 75401
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4305661916732788,
      "learning_rate": 0.0004549284097502165,
      "loss": 3.0375,
      "step": 75402
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3926126956939697,
      "learning_rate": 0.0004549249068622777,
      "loss": 3.2097,
      "step": 75403
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.473425030708313,
      "learning_rate": 0.00045492140394553544,
      "loss": 3.0515,
      "step": 75404
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4782803058624268,
      "learning_rate": 0.0004549179009999903,
      "loss": 3.247,
      "step": 75405
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3726378679275513,
      "learning_rate": 0.00045491439802564315,
      "loss": 3.3687,
      "step": 75406
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6612266302108765,
      "learning_rate": 0.0004549108950224944,
      "loss": 3.2145,
      "step": 75407
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.5797300338745117,
      "learning_rate": 0.00045490739199054473,
      "loss": 2.8218,
      "step": 75408
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7698076963424683,
      "learning_rate": 0.00045490388892979494,
      "loss": 2.9723,
      "step": 75409
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.9065802097320557,
      "learning_rate": 0.00045490038584024555,
      "loss": 2.9904,
      "step": 75410
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.9584851264953613,
      "learning_rate": 0.00045489688272189725,
      "loss": 3.0284,
      "step": 75411
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6693204641342163,
      "learning_rate": 0.00045489337957475075,
      "loss": 2.9913,
      "step": 75412
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6675552129745483,
      "learning_rate": 0.0004548898763988066,
      "loss": 2.7795,
      "step": 75413
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5270365476608276,
      "learning_rate": 0.0004548863731940655,
      "loss": 3.0029,
      "step": 75414
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5716853141784668,
      "learning_rate": 0.00045488286996052814,
      "loss": 3.1821,
      "step": 75415
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4942233562469482,
      "learning_rate": 0.0004548793666981951,
      "loss": 3.1167,
      "step": 75416
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.1470067501068115,
      "learning_rate": 0.000454875863407067,
      "loss": 3.1006,
      "step": 75417
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7325031757354736,
      "learning_rate": 0.00045487236008714466,
      "loss": 3.0546,
      "step": 75418
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4711637496948242,
      "learning_rate": 0.0004548688567384286,
      "loss": 2.9823,
      "step": 75419
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9152796268463135,
      "learning_rate": 0.00045486535336091945,
      "loss": 2.8257,
      "step": 75420
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3866307735443115,
      "learning_rate": 0.00045486184995461784,
      "loss": 2.981,
      "step": 75421
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.464601993560791,
      "learning_rate": 0.0004548583465195246,
      "loss": 2.938,
      "step": 75422
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.475634217262268,
      "learning_rate": 0.0004548548430556403,
      "loss": 3.2201,
      "step": 75423
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.634446620941162,
      "learning_rate": 0.0004548513395629655,
      "loss": 2.8734,
      "step": 75424
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8984698057174683,
      "learning_rate": 0.000454847836041501,
      "loss": 2.7362,
      "step": 75425
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.733436107635498,
      "learning_rate": 0.0004548443324912472,
      "loss": 2.9327,
      "step": 75426
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8284001350402832,
      "learning_rate": 0.00045484082891220496,
      "loss": 2.6778,
      "step": 75427
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6279194355010986,
      "learning_rate": 0.000454837325304375,
      "loss": 3.0563,
      "step": 75428
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.8633744716644287,
      "learning_rate": 0.00045483382166775773,
      "loss": 3.0094,
      "step": 75429
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7518187761306763,
      "learning_rate": 0.0004548303180023541,
      "loss": 2.7895,
      "step": 75430
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6187405586242676,
      "learning_rate": 0.0004548268143081644,
      "loss": 3.1957,
      "step": 75431
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.963239073753357,
      "learning_rate": 0.00045482331058518954,
      "loss": 3.2209,
      "step": 75432
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5463167428970337,
      "learning_rate": 0.00045481980683343017,
      "loss": 3.137,
      "step": 75433
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8753981590270996,
      "learning_rate": 0.0004548163030528868,
      "loss": 3.1772,
      "step": 75434
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0505530834198,
      "learning_rate": 0.0004548127992435602,
      "loss": 2.9189,
      "step": 75435
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0029213428497314,
      "learning_rate": 0.00045480929540545094,
      "loss": 3.1428,
      "step": 75436
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.641498327255249,
      "learning_rate": 0.00045480579153855976,
      "loss": 3.1335,
      "step": 75437
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6621804237365723,
      "learning_rate": 0.00045480228764288724,
      "loss": 3.0825,
      "step": 75438
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.226149797439575,
      "learning_rate": 0.00045479878371843397,
      "loss": 2.868,
      "step": 75439
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.422113060951233,
      "learning_rate": 0.0004547952797652008,
      "loss": 3.1041,
      "step": 75440
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4582090377807617,
      "learning_rate": 0.0004547917757831882,
      "loss": 3.0189,
      "step": 75441
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.4748072624206543,
      "learning_rate": 0.0004547882717723969,
      "loss": 3.0507,
      "step": 75442
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.468015432357788,
      "learning_rate": 0.0004547847677328276,
      "loss": 3.2069,
      "step": 75443
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6577644348144531,
      "learning_rate": 0.00045478126366448073,
      "loss": 3.1162,
      "step": 75444
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.634006381034851,
      "learning_rate": 0.0004547777595673572,
      "loss": 3.0645,
      "step": 75445
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8618078231811523,
      "learning_rate": 0.00045477425544145767,
      "loss": 2.9644,
      "step": 75446
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8453632593154907,
      "learning_rate": 0.0004547707512867826,
      "loss": 3.1093,
      "step": 75447
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6892504692077637,
      "learning_rate": 0.0004547672471033326,
      "loss": 2.9743,
      "step": 75448
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.760175108909607,
      "learning_rate": 0.0004547637428911086,
      "loss": 3.145,
      "step": 75449
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.017436981201172,
      "learning_rate": 0.0004547602386501111,
      "loss": 3.1106,
      "step": 75450
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7248027324676514,
      "learning_rate": 0.00045475673438034067,
      "loss": 3.0562,
      "step": 75451
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.093543291091919,
      "learning_rate": 0.0004547532300817981,
      "loss": 3.0814,
      "step": 75452
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6112194061279297,
      "learning_rate": 0.000454749725754484,
      "loss": 2.9994,
      "step": 75453
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8288859128952026,
      "learning_rate": 0.000454746221398399,
      "loss": 3.0975,
      "step": 75454
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6541906595230103,
      "learning_rate": 0.0004547427170135437,
      "loss": 2.8587,
      "step": 75455
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6555095911026,
      "learning_rate": 0.00045473921259991885,
      "loss": 3.2167,
      "step": 75456
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9926340579986572,
      "learning_rate": 0.00045473570815752497,
      "loss": 3.2861,
      "step": 75457
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5940465927124023,
      "learning_rate": 0.00045473220368636287,
      "loss": 2.9874,
      "step": 75458
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6207250356674194,
      "learning_rate": 0.0004547286991864332,
      "loss": 3.0455,
      "step": 75459
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7991584539413452,
      "learning_rate": 0.0004547251946577364,
      "loss": 2.9904,
      "step": 75460
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6950709819793701,
      "learning_rate": 0.0004547216901002734,
      "loss": 2.7896,
      "step": 75461
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.839712381362915,
      "learning_rate": 0.0004547181855140446,
      "loss": 3.0887,
      "step": 75462
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5882805585861206,
      "learning_rate": 0.0004547146808990508,
      "loss": 3.063,
      "step": 75463
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8555325269699097,
      "learning_rate": 0.00045471117625529275,
      "loss": 2.9818,
      "step": 75464
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4785763025283813,
      "learning_rate": 0.0004547076715827709,
      "loss": 2.8274,
      "step": 75465
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5323160886764526,
      "learning_rate": 0.00045470416688148585,
      "loss": 3.1286,
      "step": 75466
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9758641719818115,
      "learning_rate": 0.00045470066215143855,
      "loss": 2.9694,
      "step": 75467
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8026012182235718,
      "learning_rate": 0.00045469715739262937,
      "loss": 3.0383,
      "step": 75468
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.602687954902649,
      "learning_rate": 0.0004546936526050591,
      "loss": 3.1256,
      "step": 75469
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.1979782581329346,
      "learning_rate": 0.00045469014778872836,
      "loss": 2.8506,
      "step": 75470
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7654532194137573,
      "learning_rate": 0.0004546866429436378,
      "loss": 3.1916,
      "step": 75471
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5939456224441528,
      "learning_rate": 0.000454683138069788,
      "loss": 3.0254,
      "step": 75472
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.812596321105957,
      "learning_rate": 0.00045467963316717986,
      "loss": 3.0914,
      "step": 75473
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6725114583969116,
      "learning_rate": 0.00045467612823581373,
      "loss": 3.0879,
      "step": 75474
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.171323299407959,
      "learning_rate": 0.0004546726232756904,
      "loss": 3.0147,
      "step": 75475
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7670446634292603,
      "learning_rate": 0.0004546691182868105,
      "loss": 2.7742,
      "step": 75476
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.521071195602417,
      "learning_rate": 0.00045466561326917474,
      "loss": 3.0077,
      "step": 75477
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.550436019897461,
      "learning_rate": 0.00045466210822278364,
      "loss": 3.0455,
      "step": 75478
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.898149847984314,
      "learning_rate": 0.00045465860314763805,
      "loss": 3.2326,
      "step": 75479
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.616750955581665,
      "learning_rate": 0.00045465509804373843,
      "loss": 3.143,
      "step": 75480
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.696471095085144,
      "learning_rate": 0.0004546515929110855,
      "loss": 3.0521,
      "step": 75481
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8533129692077637,
      "learning_rate": 0.00045464808774967996,
      "loss": 2.9685,
      "step": 75482
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.606205940246582,
      "learning_rate": 0.0004546445825595224,
      "loss": 3.1715,
      "step": 75483
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.903497338294983,
      "learning_rate": 0.0004546410773406135,
      "loss": 2.9675,
      "step": 75484
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5329043865203857,
      "learning_rate": 0.0004546375720929539,
      "loss": 2.7657,
      "step": 75485
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.3916771411895752,
      "learning_rate": 0.00045463406681654425,
      "loss": 3.0902,
      "step": 75486
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5324935913085938,
      "learning_rate": 0.00045463056151138516,
      "loss": 3.1502,
      "step": 75487
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4474395513534546,
      "learning_rate": 0.0004546270561774775,
      "loss": 2.977,
      "step": 75488
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4516538381576538,
      "learning_rate": 0.0004546235508148216,
      "loss": 3.2512,
      "step": 75489
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6775496006011963,
      "learning_rate": 0.00045462004542341826,
      "loss": 3.006,
      "step": 75490
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5057517290115356,
      "learning_rate": 0.0004546165400032682,
      "loss": 2.9927,
      "step": 75491
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5673880577087402,
      "learning_rate": 0.000454613034554372,
      "loss": 3.1704,
      "step": 75492
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4411215782165527,
      "learning_rate": 0.00045460952907673026,
      "loss": 3.0071,
      "step": 75493
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0048816204071045,
      "learning_rate": 0.00045460602357034384,
      "loss": 2.8537,
      "step": 75494
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.269184112548828,
      "learning_rate": 0.00045460251803521306,
      "loss": 2.9198,
      "step": 75495
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.5309550762176514,
      "learning_rate": 0.0004545990124713388,
      "loss": 3.0069,
      "step": 75496
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8546080589294434,
      "learning_rate": 0.00045459550687872174,
      "loss": 2.9972,
      "step": 75497
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9273208379745483,
      "learning_rate": 0.0004545920012573624,
      "loss": 2.8041,
      "step": 75498
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.404125928878784,
      "learning_rate": 0.00045458849560726147,
      "loss": 3.2326,
      "step": 75499
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.426699638366699,
      "learning_rate": 0.00045458498992841963,
      "loss": 2.8698,
      "step": 75500
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6773478984832764,
      "learning_rate": 0.00045458148422083754,
      "loss": 3.0075,
      "step": 75501
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.007328748703003,
      "learning_rate": 0.00045457797848451586,
      "loss": 3.2679,
      "step": 75502
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2513036727905273,
      "learning_rate": 0.0004545744727194551,
      "loss": 3.0179,
      "step": 75503
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6589281558990479,
      "learning_rate": 0.00045457096692565625,
      "loss": 3.0719,
      "step": 75504
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.1694893836975098,
      "learning_rate": 0.00045456746110311957,
      "loss": 2.9609,
      "step": 75505
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4169807434082031,
      "learning_rate": 0.0004545639552518459,
      "loss": 2.8819,
      "step": 75506
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2644174098968506,
      "learning_rate": 0.0004545604493718359,
      "loss": 2.8686,
      "step": 75507
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.727921485900879,
      "learning_rate": 0.00045455694346309015,
      "loss": 3.2222,
      "step": 75508
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.8317461013793945,
      "learning_rate": 0.0004545534375256094,
      "loss": 3.0403,
      "step": 75509
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.0321547985076904,
      "learning_rate": 0.0004545499315593943,
      "loss": 2.8579,
      "step": 75510
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.270221710205078,
      "learning_rate": 0.0004545464255644453,
      "loss": 3.0891,
      "step": 75511
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7462143898010254,
      "learning_rate": 0.00045454291954076325,
      "loss": 2.9746,
      "step": 75512
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.516653537750244,
      "learning_rate": 0.00045453941348834887,
      "loss": 3.3198,
      "step": 75513
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.1937966346740723,
      "learning_rate": 0.0004545359074072026,
      "loss": 2.9235,
      "step": 75514
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7648717164993286,
      "learning_rate": 0.0004545324012973252,
      "loss": 3.1151,
      "step": 75515
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7781919240951538,
      "learning_rate": 0.00045452889515871733,
      "loss": 3.0057,
      "step": 75516
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9425116777420044,
      "learning_rate": 0.0004545253889913796,
      "loss": 3.1461,
      "step": 75517
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6464961767196655,
      "learning_rate": 0.0004545218827953127,
      "loss": 3.2229,
      "step": 75518
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.435699701309204,
      "learning_rate": 0.00045451837657051723,
      "loss": 2.9111,
      "step": 75519
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2559525966644287,
      "learning_rate": 0.0004545148703169939,
      "loss": 2.8144,
      "step": 75520
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.3855435848236084,
      "learning_rate": 0.0004545113640347434,
      "loss": 2.9225,
      "step": 75521
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8155494928359985,
      "learning_rate": 0.0004545078577237662,
      "loss": 3.0325,
      "step": 75522
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5169955492019653,
      "learning_rate": 0.00045450435138406307,
      "loss": 3.1385,
      "step": 75523
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8560422658920288,
      "learning_rate": 0.0004545008450156347,
      "loss": 3.0357,
      "step": 75524
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.398704171180725,
      "learning_rate": 0.0004544973386184818,
      "loss": 2.8407,
      "step": 75525
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.502132534980774,
      "learning_rate": 0.00045449383219260485,
      "loss": 2.8057,
      "step": 75526
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.510138750076294,
      "learning_rate": 0.00045449032573800454,
      "loss": 3.1802,
      "step": 75527
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7529116868972778,
      "learning_rate": 0.00045448681925468166,
      "loss": 2.7936,
      "step": 75528
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.849177122116089,
      "learning_rate": 0.00045448331274263674,
      "loss": 3.2231,
      "step": 75529
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.110468626022339,
      "learning_rate": 0.0004544798062018704,
      "loss": 3.2177,
      "step": 75530
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8590184450149536,
      "learning_rate": 0.0004544762996323834,
      "loss": 2.9671,
      "step": 75531
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.4012181758880615,
      "learning_rate": 0.0004544727930341763,
      "loss": 2.8747,
      "step": 75532
    },
    {
      "epoch": 0.98,
      "grad_norm": 3.43915057182312,
      "learning_rate": 0.00045446928640724985,
      "loss": 3.1206,
      "step": 75533
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.4356768131256104,
      "learning_rate": 0.0004544657797516046,
      "loss": 2.9138,
      "step": 75534
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0653562545776367,
      "learning_rate": 0.0004544622730672412,
      "loss": 3.1044,
      "step": 75535
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.9428672790527344,
      "learning_rate": 0.0004544587663541604,
      "loss": 3.0137,
      "step": 75536
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.104724168777466,
      "learning_rate": 0.00045445525961236284,
      "loss": 3.0205,
      "step": 75537
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.901968002319336,
      "learning_rate": 0.00045445175284184906,
      "loss": 2.9695,
      "step": 75538
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.13834285736084,
      "learning_rate": 0.0004544482460426198,
      "loss": 3.0258,
      "step": 75539
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9984426498413086,
      "learning_rate": 0.0004544447392146758,
      "loss": 3.0702,
      "step": 75540
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.8956525325775146,
      "learning_rate": 0.0004544412323580175,
      "loss": 2.9584,
      "step": 75541
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.5946216583251953,
      "learning_rate": 0.0004544377254726457,
      "loss": 3.0027,
      "step": 75542
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5315958261489868,
      "learning_rate": 0.00045443421855856093,
      "loss": 3.0186,
      "step": 75543
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8572696447372437,
      "learning_rate": 0.000454430711615764,
      "loss": 3.1387,
      "step": 75544
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.8654823303222656,
      "learning_rate": 0.00045442720464425546,
      "loss": 3.0528,
      "step": 75545
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6147278547286987,
      "learning_rate": 0.00045442369764403604,
      "loss": 3.0008,
      "step": 75546
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6988027095794678,
      "learning_rate": 0.00045442019061510625,
      "loss": 2.9149,
      "step": 75547
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.3015360832214355,
      "learning_rate": 0.0004544166835574669,
      "loss": 3.0154,
      "step": 75548
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.878096103668213,
      "learning_rate": 0.00045441317647111856,
      "loss": 2.8413,
      "step": 75549
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6647151708602905,
      "learning_rate": 0.00045440966935606186,
      "loss": 2.8838,
      "step": 75550
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7840148210525513,
      "learning_rate": 0.0004544061622122975,
      "loss": 3.213,
      "step": 75551
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7701197862625122,
      "learning_rate": 0.0004544026550398261,
      "loss": 2.8285,
      "step": 75552
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.602682113647461,
      "learning_rate": 0.0004543991478386484,
      "loss": 3.0242,
      "step": 75553
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.120326280593872,
      "learning_rate": 0.00045439564060876494,
      "loss": 3.1508,
      "step": 75554
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.3216888904571533,
      "learning_rate": 0.00045439213335017646,
      "loss": 3.027,
      "step": 75555
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7716350555419922,
      "learning_rate": 0.0004543886260628835,
      "loss": 2.9186,
      "step": 75556
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.973878264427185,
      "learning_rate": 0.00045438511874688686,
      "loss": 3.0966,
      "step": 75557
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4216079711914062,
      "learning_rate": 0.0004543816114021871,
      "loss": 2.987,
      "step": 75558
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6019952297210693,
      "learning_rate": 0.0004543781040287848,
      "loss": 2.826,
      "step": 75559
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5437520742416382,
      "learning_rate": 0.0004543745966266807,
      "loss": 3.0609,
      "step": 75560
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5040276050567627,
      "learning_rate": 0.0004543710891958756,
      "loss": 3.0241,
      "step": 75561
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4662116765975952,
      "learning_rate": 0.00045436758173636986,
      "loss": 2.8551,
      "step": 75562
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0359997749328613,
      "learning_rate": 0.00045436407424816433,
      "loss": 2.8037,
      "step": 75563
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6985217332839966,
      "learning_rate": 0.00045436056673125953,
      "loss": 2.8957,
      "step": 75564
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.068742275238037,
      "learning_rate": 0.00045435705918565623,
      "loss": 2.8296,
      "step": 75565
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4608755111694336,
      "learning_rate": 0.00045435355161135507,
      "loss": 3.1778,
      "step": 75566
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7622405290603638,
      "learning_rate": 0.00045435004400835665,
      "loss": 2.8663,
      "step": 75567
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6384903192520142,
      "learning_rate": 0.0004543465363766617,
      "loss": 2.9454,
      "step": 75568
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7721275091171265,
      "learning_rate": 0.00045434302871627075,
      "loss": 3.2301,
      "step": 75569
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6122630834579468,
      "learning_rate": 0.00045433952102718445,
      "loss": 3.1065,
      "step": 75570
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5121338367462158,
      "learning_rate": 0.0004543360133094036,
      "loss": 2.9924,
      "step": 75571
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6170181035995483,
      "learning_rate": 0.0004543325055629288,
      "loss": 2.8261,
      "step": 75572
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.889503836631775,
      "learning_rate": 0.0004543289977877606,
      "loss": 2.9018,
      "step": 75573
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7340941429138184,
      "learning_rate": 0.00045432548998389986,
      "loss": 2.8717,
      "step": 75574
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5835715532302856,
      "learning_rate": 0.00045432198215134695,
      "loss": 3.1396,
      "step": 75575
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.3039679527282715,
      "learning_rate": 0.00045431847429010274,
      "loss": 2.8633,
      "step": 75576
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5132578611373901,
      "learning_rate": 0.0004543149664001678,
      "loss": 2.9006,
      "step": 75577
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.041914939880371,
      "learning_rate": 0.0004543114584815428,
      "loss": 2.8566,
      "step": 75578
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5621832609176636,
      "learning_rate": 0.00045430795053422835,
      "loss": 3.0551,
      "step": 75579
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.7774691581726074,
      "learning_rate": 0.0004543044425582252,
      "loss": 3.0656,
      "step": 75580
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7564562559127808,
      "learning_rate": 0.00045430093455353394,
      "loss": 3.017,
      "step": 75581
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7723972797393799,
      "learning_rate": 0.0004542974265201552,
      "loss": 3.0822,
      "step": 75582
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.342373847961426,
      "learning_rate": 0.0004542939184580896,
      "loss": 3.0478,
      "step": 75583
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.1285312175750732,
      "learning_rate": 0.0004542904103673379,
      "loss": 3.045,
      "step": 75584
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.029245615005493,
      "learning_rate": 0.0004542869022479007,
      "loss": 3.1572,
      "step": 75585
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.502742052078247,
      "learning_rate": 0.00045428339409977873,
      "loss": 3.1611,
      "step": 75586
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.592079997062683,
      "learning_rate": 0.00045427988592297244,
      "loss": 3.0628,
      "step": 75587
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5540822744369507,
      "learning_rate": 0.0004542763777174826,
      "loss": 2.8662,
      "step": 75588
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7974889278411865,
      "learning_rate": 0.00045427286948331,
      "loss": 2.9287,
      "step": 75589
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7927030324935913,
      "learning_rate": 0.0004542693612204551,
      "loss": 2.7735,
      "step": 75590
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.7236034870147705,
      "learning_rate": 0.00045426585292891855,
      "loss": 2.9478,
      "step": 75591
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5613958835601807,
      "learning_rate": 0.00045426234460870115,
      "loss": 2.8455,
      "step": 75592
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.1982147693634033,
      "learning_rate": 0.0004542588362598034,
      "loss": 3.0521,
      "step": 75593
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9622505903244019,
      "learning_rate": 0.000454255327882226,
      "loss": 3.1528,
      "step": 75594
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5730106830596924,
      "learning_rate": 0.0004542518194759697,
      "loss": 3.0279,
      "step": 75595
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.828731656074524,
      "learning_rate": 0.0004542483110410351,
      "loss": 3.0817,
      "step": 75596
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.1575870513916016,
      "learning_rate": 0.00045424480257742273,
      "loss": 3.0569,
      "step": 75597
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0875744819641113,
      "learning_rate": 0.0004542412940851335,
      "loss": 3.0321,
      "step": 75598
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5803627967834473,
      "learning_rate": 0.0004542377855641677,
      "loss": 2.9431,
      "step": 75599
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8638943433761597,
      "learning_rate": 0.00045423427701452626,
      "loss": 2.9634,
      "step": 75600
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.8969435691833496,
      "learning_rate": 0.00045423076843620976,
      "loss": 3.2271,
      "step": 75601
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4276596307754517,
      "learning_rate": 0.0004542272598292189,
      "loss": 2.792,
      "step": 75602
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.496290922164917,
      "learning_rate": 0.0004542237511935542,
      "loss": 3.1731,
      "step": 75603
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4930036067962646,
      "learning_rate": 0.00045422024252921646,
      "loss": 2.8009,
      "step": 75604
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7093173265457153,
      "learning_rate": 0.00045421673383620623,
      "loss": 2.8812,
      "step": 75605
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9600931406021118,
      "learning_rate": 0.0004542132251145242,
      "loss": 2.8139,
      "step": 75606
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7628906965255737,
      "learning_rate": 0.000454209716364171,
      "loss": 3.1429,
      "step": 75607
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9553351402282715,
      "learning_rate": 0.00045420620758514734,
      "loss": 3.0281,
      "step": 75608
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7243256568908691,
      "learning_rate": 0.0004542026987774539,
      "loss": 3.0425,
      "step": 75609
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.3123679161071777,
      "learning_rate": 0.00045419918994109114,
      "loss": 2.9317,
      "step": 75610
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9000532627105713,
      "learning_rate": 0.00045419568107605987,
      "loss": 3.3122,
      "step": 75611
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6212137937545776,
      "learning_rate": 0.0004541921721823607,
      "loss": 3.0499,
      "step": 75612
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.7324929237365723,
      "learning_rate": 0.00045418866325999435,
      "loss": 2.9428,
      "step": 75613
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5945847034454346,
      "learning_rate": 0.0004541851543089614,
      "loss": 3.0207,
      "step": 75614
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6467012166976929,
      "learning_rate": 0.0004541816453292625,
      "loss": 2.964,
      "step": 75615
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7865527868270874,
      "learning_rate": 0.0004541781363208984,
      "loss": 2.959,
      "step": 75616
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8840914964675903,
      "learning_rate": 0.00045417462728386955,
      "loss": 3.0238,
      "step": 75617
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.602556586265564,
      "learning_rate": 0.0004541711182181768,
      "loss": 2.904,
      "step": 75618
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.79945707321167,
      "learning_rate": 0.0004541676091238208,
      "loss": 3.0651,
      "step": 75619
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8046754598617554,
      "learning_rate": 0.000454164100000802,
      "loss": 3.1348,
      "step": 75620
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.991044282913208,
      "learning_rate": 0.0004541605908491212,
      "loss": 3.1701,
      "step": 75621
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6311190128326416,
      "learning_rate": 0.00045415708166877906,
      "loss": 2.981,
      "step": 75622
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.535664677619934,
      "learning_rate": 0.00045415357245977625,
      "loss": 3.0678,
      "step": 75623
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4323121309280396,
      "learning_rate": 0.00045415006322211337,
      "loss": 3.1908,
      "step": 75624
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8604806661605835,
      "learning_rate": 0.00045414655395579107,
      "loss": 3.0189,
      "step": 75625
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7039748430252075,
      "learning_rate": 0.00045414304466081,
      "loss": 2.7475,
      "step": 75626
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6100174188613892,
      "learning_rate": 0.00045413953533717075,
      "loss": 3.1598,
      "step": 75627
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4575130939483643,
      "learning_rate": 0.00045413602598487425,
      "loss": 3.2013,
      "step": 75628
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4168258905410767,
      "learning_rate": 0.0004541325166039208,
      "loss": 3.0402,
      "step": 75629
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.689801573753357,
      "learning_rate": 0.00045412900719431123,
      "loss": 3.0131,
      "step": 75630
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5221469402313232,
      "learning_rate": 0.00045412549775604617,
      "loss": 2.9884,
      "step": 75631
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4805184602737427,
      "learning_rate": 0.0004541219882891263,
      "loss": 2.9126,
      "step": 75632
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7170493602752686,
      "learning_rate": 0.0004541184787935522,
      "loss": 2.932,
      "step": 75633
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.8576549291610718,
      "learning_rate": 0.0004541149692693246,
      "loss": 2.9162,
      "step": 75634
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.9877073764801025,
      "learning_rate": 0.00045411145971644416,
      "loss": 3.0789,
      "step": 75635
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.674443244934082,
      "learning_rate": 0.0004541079501349114,
      "loss": 3.0247,
      "step": 75636
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7176837921142578,
      "learning_rate": 0.0004541044405247271,
      "loss": 3.1874,
      "step": 75637
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6435199975967407,
      "learning_rate": 0.0004541009308858919,
      "loss": 2.9412,
      "step": 75638
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.2888669967651367,
      "learning_rate": 0.00045409742121840644,
      "loss": 3.0329,
      "step": 75639
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7381552457809448,
      "learning_rate": 0.00045409391152227127,
      "loss": 3.1494,
      "step": 75640
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.6964622735977173,
      "learning_rate": 0.00045409040179748725,
      "loss": 3.0041,
      "step": 75641
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.7086249589920044,
      "learning_rate": 0.00045408689204405486,
      "loss": 3.0258,
      "step": 75642
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4651676416397095,
      "learning_rate": 0.0004540833822619748,
      "loss": 3.006,
      "step": 75643
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4550272226333618,
      "learning_rate": 0.0004540798724512478,
      "loss": 2.9517,
      "step": 75644
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5087864398956299,
      "learning_rate": 0.00045407636261187426,
      "loss": 3.1189,
      "step": 75645
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.5956741571426392,
      "learning_rate": 0.0004540728527438551,
      "loss": 3.2753,
      "step": 75646
    },
    {
      "epoch": 0.98,
      "grad_norm": 2.0204975605010986,
      "learning_rate": 0.000454069342847191,
      "loss": 2.8986,
      "step": 75647
    },
    {
      "epoch": 0.98,
      "grad_norm": 1.4351418018341064,
      "learning_rate": 0.0004540658329218825,
      "loss": 3.0785,
      "step": 75648
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.836591362953186,
      "learning_rate": 0.00045406232296793005,
      "loss": 2.9016,
      "step": 75649
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5802444219589233,
      "learning_rate": 0.0004540588129853347,
      "loss": 2.677,
      "step": 75650
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2983551025390625,
      "learning_rate": 0.00045405530297409685,
      "loss": 3.1569,
      "step": 75651
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0812854766845703,
      "learning_rate": 0.0004540517929342172,
      "loss": 3.1446,
      "step": 75652
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7856076955795288,
      "learning_rate": 0.00045404828286569644,
      "loss": 3.2825,
      "step": 75653
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5755771398544312,
      "learning_rate": 0.0004540447727685351,
      "loss": 3.1221,
      "step": 75654
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8524065017700195,
      "learning_rate": 0.00045404126264273403,
      "loss": 2.9342,
      "step": 75655
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6418757438659668,
      "learning_rate": 0.00045403775248829386,
      "loss": 2.9015,
      "step": 75656
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5162580013275146,
      "learning_rate": 0.000454034242305215,
      "loss": 3.1079,
      "step": 75657
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8235257863998413,
      "learning_rate": 0.00045403073209349825,
      "loss": 3.1139,
      "step": 75658
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5676923990249634,
      "learning_rate": 0.00045402722185314444,
      "loss": 3.0938,
      "step": 75659
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4257761240005493,
      "learning_rate": 0.0004540237115841539,
      "loss": 3.2587,
      "step": 75660
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7954356670379639,
      "learning_rate": 0.00045402020128652756,
      "loss": 3.0527,
      "step": 75661
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5238704681396484,
      "learning_rate": 0.00045401669096026596,
      "loss": 3.2014,
      "step": 75662
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9678913354873657,
      "learning_rate": 0.00045401318060536965,
      "loss": 2.9225,
      "step": 75663
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.884548544883728,
      "learning_rate": 0.00045400967022183946,
      "loss": 3.3284,
      "step": 75664
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6219459772109985,
      "learning_rate": 0.000454006159809676,
      "loss": 2.9938,
      "step": 75665
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.438116431236267,
      "learning_rate": 0.0004540026493688798,
      "loss": 3.0707,
      "step": 75666
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4933645725250244,
      "learning_rate": 0.0004539991388994516,
      "loss": 2.906,
      "step": 75667
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.985695481300354,
      "learning_rate": 0.0004539956284013921,
      "loss": 3.0714,
      "step": 75668
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5165226459503174,
      "learning_rate": 0.0004539921178747019,
      "loss": 3.0346,
      "step": 75669
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0767722129821777,
      "learning_rate": 0.0004539886073193817,
      "loss": 3.0921,
      "step": 75670
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.629693627357483,
      "learning_rate": 0.000453985096735432,
      "loss": 2.8742,
      "step": 75671
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.771702766418457,
      "learning_rate": 0.00045398158612285365,
      "loss": 2.8559,
      "step": 75672
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7607883214950562,
      "learning_rate": 0.0004539780754816472,
      "loss": 3.0677,
      "step": 75673
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9989099502563477,
      "learning_rate": 0.0004539745648118133,
      "loss": 2.9222,
      "step": 75674
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7442779541015625,
      "learning_rate": 0.00045397105411335264,
      "loss": 3.2988,
      "step": 75675
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.267225503921509,
      "learning_rate": 0.00045396754338626583,
      "loss": 3.1217,
      "step": 75676
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5570393800735474,
      "learning_rate": 0.0004539640326305536,
      "loss": 3.0357,
      "step": 75677
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6729217767715454,
      "learning_rate": 0.0004539605218462165,
      "loss": 2.9271,
      "step": 75678
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4999295473098755,
      "learning_rate": 0.00045395701103325526,
      "loss": 3.1505,
      "step": 75679
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.485513210296631,
      "learning_rate": 0.00045395350019167047,
      "loss": 3.0002,
      "step": 75680
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8383160829544067,
      "learning_rate": 0.00045394998932146285,
      "loss": 3.1052,
      "step": 75681
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7736142873764038,
      "learning_rate": 0.000453946478422633,
      "loss": 2.8559,
      "step": 75682
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8500020503997803,
      "learning_rate": 0.00045394296749518166,
      "loss": 2.935,
      "step": 75683
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.677612066268921,
      "learning_rate": 0.00045393945653910935,
      "loss": 3.2654,
      "step": 75684
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0792934894561768,
      "learning_rate": 0.0004539359455544168,
      "loss": 2.966,
      "step": 75685
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8450851440429688,
      "learning_rate": 0.0004539324345411046,
      "loss": 3.1806,
      "step": 75686
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0976173877716064,
      "learning_rate": 0.0004539289234991736,
      "loss": 3.046,
      "step": 75687
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.9619247913360596,
      "learning_rate": 0.00045392541242862417,
      "loss": 2.7712,
      "step": 75688
    },
    {
      "epoch": 0.99,
      "grad_norm": 4.718533039093018,
      "learning_rate": 0.0004539219013294571,
      "loss": 3.0437,
      "step": 75689
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.459419846534729,
      "learning_rate": 0.00045391839020167315,
      "loss": 2.978,
      "step": 75690
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6234369277954102,
      "learning_rate": 0.0004539148790452728,
      "loss": 2.8553,
      "step": 75691
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.6613314151763916,
      "learning_rate": 0.0004539113678602568,
      "loss": 2.9027,
      "step": 75692
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2217891216278076,
      "learning_rate": 0.0004539078566466257,
      "loss": 2.916,
      "step": 75693
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8045501708984375,
      "learning_rate": 0.00045390434540438026,
      "loss": 3.2324,
      "step": 75694
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2272608280181885,
      "learning_rate": 0.0004539008341335211,
      "loss": 3.1149,
      "step": 75695
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.923798680305481,
      "learning_rate": 0.00045389732283404886,
      "loss": 3.1287,
      "step": 75696
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0027832984924316,
      "learning_rate": 0.0004538938115059643,
      "loss": 3.0152,
      "step": 75697
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7753554582595825,
      "learning_rate": 0.00045389030014926783,
      "loss": 3.1376,
      "step": 75698
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3410357236862183,
      "learning_rate": 0.0004538867887639603,
      "loss": 3.1188,
      "step": 75699
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.4080612659454346,
      "learning_rate": 0.0004538832773500424,
      "loss": 2.965,
      "step": 75700
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5774009227752686,
      "learning_rate": 0.0004538797659075145,
      "loss": 2.9419,
      "step": 75701
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9935051202774048,
      "learning_rate": 0.0004538762544363776,
      "loss": 2.8913,
      "step": 75702
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6717034578323364,
      "learning_rate": 0.0004538727429366322,
      "loss": 3.151,
      "step": 75703
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2027361392974854,
      "learning_rate": 0.00045386923140827885,
      "loss": 2.9951,
      "step": 75704
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3433899879455566,
      "learning_rate": 0.00045386571985131844,
      "loss": 3.2272,
      "step": 75705
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.359346389770508,
      "learning_rate": 0.0004538622082657514,
      "loss": 3.0864,
      "step": 75706
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9109854698181152,
      "learning_rate": 0.00045385869665157846,
      "loss": 2.9649,
      "step": 75707
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6889591217041016,
      "learning_rate": 0.0004538551850088004,
      "loss": 2.8415,
      "step": 75708
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.638088583946228,
      "learning_rate": 0.00045385167333741763,
      "loss": 2.9842,
      "step": 75709
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1165854930877686,
      "learning_rate": 0.00045384816163743095,
      "loss": 2.9812,
      "step": 75710
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4546105861663818,
      "learning_rate": 0.0004538446499088411,
      "loss": 2.8693,
      "step": 75711
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1971864700317383,
      "learning_rate": 0.00045384113815164845,
      "loss": 2.8827,
      "step": 75712
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6508066654205322,
      "learning_rate": 0.0004538376263658539,
      "loss": 2.9526,
      "step": 75713
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6405425071716309,
      "learning_rate": 0.0004538341145514581,
      "loss": 3.0294,
      "step": 75714
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9151933193206787,
      "learning_rate": 0.00045383060270846155,
      "loss": 3.2498,
      "step": 75715
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7728359699249268,
      "learning_rate": 0.00045382709083686507,
      "loss": 3.0066,
      "step": 75716
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5194592475891113,
      "learning_rate": 0.00045382357893666923,
      "loss": 3.13,
      "step": 75717
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.682043194770813,
      "learning_rate": 0.0004538200670078746,
      "loss": 3.2679,
      "step": 75718
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3745723962783813,
      "learning_rate": 0.0004538165550504819,
      "loss": 3.1444,
      "step": 75719
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6237651109695435,
      "learning_rate": 0.0004538130430644919,
      "loss": 3.2382,
      "step": 75720
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9136580228805542,
      "learning_rate": 0.0004538095310499051,
      "loss": 2.7945,
      "step": 75721
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5552453994750977,
      "learning_rate": 0.00045380601900672215,
      "loss": 2.8883,
      "step": 75722
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4935117959976196,
      "learning_rate": 0.00045380250693494384,
      "loss": 2.9956,
      "step": 75723
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2332178354263306,
      "learning_rate": 0.0004537989948345708,
      "loss": 2.9651,
      "step": 75724
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7149608135223389,
      "learning_rate": 0.00045379548270560346,
      "loss": 3.1548,
      "step": 75725
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8614405393600464,
      "learning_rate": 0.00045379197054804275,
      "loss": 2.9371,
      "step": 75726
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9826279878616333,
      "learning_rate": 0.0004537884583618892,
      "loss": 3.1462,
      "step": 75727
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2152724266052246,
      "learning_rate": 0.0004537849461471434,
      "loss": 2.8977,
      "step": 75728
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5417505502700806,
      "learning_rate": 0.00045378143390380617,
      "loss": 3.2719,
      "step": 75729
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.3845643997192383,
      "learning_rate": 0.000453777921631878,
      "loss": 3.0566,
      "step": 75730
    },
    {
      "epoch": 0.99,
      "grad_norm": 4.340196132659912,
      "learning_rate": 0.00045377440933135966,
      "loss": 2.8973,
      "step": 75731
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.5083892345428467,
      "learning_rate": 0.0004537708970022517,
      "loss": 3.0705,
      "step": 75732
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7759250402450562,
      "learning_rate": 0.00045376738464455486,
      "loss": 2.9443,
      "step": 75733
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.254595994949341,
      "learning_rate": 0.0004537638722582697,
      "loss": 3.0976,
      "step": 75734
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.960641622543335,
      "learning_rate": 0.00045376035984339704,
      "loss": 3.1461,
      "step": 75735
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.089954376220703,
      "learning_rate": 0.0004537568473999373,
      "loss": 3.2557,
      "step": 75736
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6075513362884521,
      "learning_rate": 0.00045375333492789146,
      "loss": 3.0121,
      "step": 75737
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.8704512119293213,
      "learning_rate": 0.00045374982242725985,
      "loss": 2.933,
      "step": 75738
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.366703748703003,
      "learning_rate": 0.0004537463098980432,
      "loss": 3.0327,
      "step": 75739
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.9636151790618896,
      "learning_rate": 0.00045374279734024226,
      "loss": 3.1049,
      "step": 75740
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5328651666641235,
      "learning_rate": 0.00045373928475385763,
      "loss": 2.8692,
      "step": 75741
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1347155570983887,
      "learning_rate": 0.00045373577213888997,
      "loss": 3.2694,
      "step": 75742
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.2227799892425537,
      "learning_rate": 0.0004537322594953399,
      "loss": 3.1127,
      "step": 75743
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.8024141788482666,
      "learning_rate": 0.0004537287468232081,
      "loss": 2.8267,
      "step": 75744
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2469005584716797,
      "learning_rate": 0.00045372523412249525,
      "loss": 3.0764,
      "step": 75745
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0275182723999023,
      "learning_rate": 0.000453721721393202,
      "loss": 3.117,
      "step": 75746
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2478532791137695,
      "learning_rate": 0.00045371820863532894,
      "loss": 3.0672,
      "step": 75747
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0685153007507324,
      "learning_rate": 0.0004537146958488768,
      "loss": 2.9999,
      "step": 75748
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5912936925888062,
      "learning_rate": 0.0004537111830338461,
      "loss": 2.986,
      "step": 75749
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5792430639266968,
      "learning_rate": 0.0004537076701902377,
      "loss": 2.9689,
      "step": 75750
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.709919810295105,
      "learning_rate": 0.00045370415731805217,
      "loss": 2.8707,
      "step": 75751
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4381016492843628,
      "learning_rate": 0.00045370064441729,
      "loss": 3.0774,
      "step": 75752
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5079313516616821,
      "learning_rate": 0.00045369713148795207,
      "loss": 2.9877,
      "step": 75753
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5632511377334595,
      "learning_rate": 0.00045369361853003895,
      "loss": 2.9331,
      "step": 75754
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.593694806098938,
      "learning_rate": 0.00045369010554355125,
      "loss": 2.8461,
      "step": 75755
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8182114362716675,
      "learning_rate": 0.00045368659252848967,
      "loss": 2.8842,
      "step": 75756
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8841263055801392,
      "learning_rate": 0.0004536830794848548,
      "loss": 2.9071,
      "step": 75757
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9195435047149658,
      "learning_rate": 0.0004536795664126474,
      "loss": 2.9362,
      "step": 75758
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.860710859298706,
      "learning_rate": 0.0004536760533118681,
      "loss": 3.1619,
      "step": 75759
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.6089062690734863,
      "learning_rate": 0.00045367254018251744,
      "loss": 3.1106,
      "step": 75760
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6106828451156616,
      "learning_rate": 0.0004536690270245962,
      "loss": 2.9564,
      "step": 75761
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.59531307220459,
      "learning_rate": 0.000453665513838105,
      "loss": 2.839,
      "step": 75762
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1349847316741943,
      "learning_rate": 0.00045366200062304444,
      "loss": 3.0154,
      "step": 75763
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1440823078155518,
      "learning_rate": 0.0004536584873794152,
      "loss": 2.7734,
      "step": 75764
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.585606098175049,
      "learning_rate": 0.000453654974107218,
      "loss": 3.0183,
      "step": 75765
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.5345423221588135,
      "learning_rate": 0.00045365146080645344,
      "loss": 2.9927,
      "step": 75766
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5170636177062988,
      "learning_rate": 0.0004536479474771221,
      "loss": 3.1639,
      "step": 75767
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.313753604888916,
      "learning_rate": 0.00045364443411922484,
      "loss": 3.109,
      "step": 75768
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6484942436218262,
      "learning_rate": 0.00045364092073276206,
      "loss": 2.9744,
      "step": 75769
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4635294675827026,
      "learning_rate": 0.00045363740731773456,
      "loss": 2.9555,
      "step": 75770
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6190288066864014,
      "learning_rate": 0.00045363389387414294,
      "loss": 2.859,
      "step": 75771
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.849407434463501,
      "learning_rate": 0.0004536303804019879,
      "loss": 2.9398,
      "step": 75772
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5097243785858154,
      "learning_rate": 0.0004536268669012701,
      "loss": 2.9251,
      "step": 75773
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.403423547744751,
      "learning_rate": 0.0004536233533719902,
      "loss": 2.9379,
      "step": 75774
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4136955738067627,
      "learning_rate": 0.00045361983981414883,
      "loss": 3.0711,
      "step": 75775
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6841713190078735,
      "learning_rate": 0.0004536163262277465,
      "loss": 3.0231,
      "step": 75776
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2509679794311523,
      "learning_rate": 0.000453612812612784,
      "loss": 2.8701,
      "step": 75777
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.013334035873413,
      "learning_rate": 0.00045360929896926216,
      "loss": 3.0344,
      "step": 75778
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.5649704933166504,
      "learning_rate": 0.0004536057852971813,
      "loss": 3.0175,
      "step": 75779
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.5351271629333496,
      "learning_rate": 0.0004536022715965423,
      "loss": 3.0459,
      "step": 75780
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.802776575088501,
      "learning_rate": 0.00045359875786734564,
      "loss": 3.0203,
      "step": 75781
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.7583889961242676,
      "learning_rate": 0.0004535952441095922,
      "loss": 3.3205,
      "step": 75782
    },
    {
      "epoch": 0.99,
      "grad_norm": 4.347951412200928,
      "learning_rate": 0.0004535917303232824,
      "loss": 2.9538,
      "step": 75783
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0253419876098633,
      "learning_rate": 0.00045358821650841704,
      "loss": 2.9631,
      "step": 75784
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.999248504638672,
      "learning_rate": 0.0004535847026649967,
      "loss": 2.9991,
      "step": 75785
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.9327261447906494,
      "learning_rate": 0.0004535811887930221,
      "loss": 3.0974,
      "step": 75786
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.2502241134643555,
      "learning_rate": 0.0004535776748924938,
      "loss": 2.8838,
      "step": 75787
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.711607575416565,
      "learning_rate": 0.00045357416096341257,
      "loss": 2.8552,
      "step": 75788
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5835989713668823,
      "learning_rate": 0.0004535706470057791,
      "loss": 2.9089,
      "step": 75789
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.527564764022827,
      "learning_rate": 0.00045356713301959376,
      "loss": 3.1027,
      "step": 75790
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.7680914402008057,
      "learning_rate": 0.0004535636190048575,
      "loss": 3.0248,
      "step": 75791
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5934386253356934,
      "learning_rate": 0.0004535601049615708,
      "loss": 2.9006,
      "step": 75792
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.4833450317382812,
      "learning_rate": 0.0004535565908897345,
      "loss": 3.2497,
      "step": 75793
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5899651050567627,
      "learning_rate": 0.000453553076789349,
      "loss": 2.8807,
      "step": 75794
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7994011640548706,
      "learning_rate": 0.0004535495626604151,
      "loss": 3.2611,
      "step": 75795
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8486472368240356,
      "learning_rate": 0.00045354604850293353,
      "loss": 3.3005,
      "step": 75796
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.343794584274292,
      "learning_rate": 0.00045354253431690477,
      "loss": 3.127,
      "step": 75797
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.551323413848877,
      "learning_rate": 0.00045353902010232956,
      "loss": 2.8153,
      "step": 75798
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1492960453033447,
      "learning_rate": 0.0004535355058592086,
      "loss": 2.8508,
      "step": 75799
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4127163887023926,
      "learning_rate": 0.00045353199158754246,
      "loss": 2.7634,
      "step": 75800
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.3712854385375977,
      "learning_rate": 0.0004535284772873318,
      "loss": 3.2978,
      "step": 75801
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.4584035873413086,
      "learning_rate": 0.00045352496295857737,
      "loss": 2.8746,
      "step": 75802
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5122251510620117,
      "learning_rate": 0.0004535214486012797,
      "loss": 2.9412,
      "step": 75803
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.106492280960083,
      "learning_rate": 0.00045351793421543945,
      "loss": 2.9886,
      "step": 75804
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7093749046325684,
      "learning_rate": 0.0004535144198010574,
      "loss": 3.3469,
      "step": 75805
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0601966381073,
      "learning_rate": 0.0004535109053581341,
      "loss": 3.2453,
      "step": 75806
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9313725233078003,
      "learning_rate": 0.0004535073908866701,
      "loss": 2.7162,
      "step": 75807
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2662498950958252,
      "learning_rate": 0.0004535038763866664,
      "loss": 2.9177,
      "step": 75808
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5655434131622314,
      "learning_rate": 0.0004535003618581233,
      "loss": 2.8939,
      "step": 75809
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5964175462722778,
      "learning_rate": 0.00045349684730104157,
      "loss": 3.1607,
      "step": 75810
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.738725185394287,
      "learning_rate": 0.00045349333271542196,
      "loss": 3.0415,
      "step": 75811
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4101650714874268,
      "learning_rate": 0.00045348981810126496,
      "loss": 3.1919,
      "step": 75812
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9995673894882202,
      "learning_rate": 0.00045348630345857133,
      "loss": 2.8665,
      "step": 75813
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.467755675315857,
      "learning_rate": 0.00045348278878734176,
      "loss": 2.8737,
      "step": 75814
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6715320348739624,
      "learning_rate": 0.00045347927408757676,
      "loss": 2.9282,
      "step": 75815
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.6307342052459717,
      "learning_rate": 0.0004534757593592771,
      "loss": 3.1153,
      "step": 75816
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.533718466758728,
      "learning_rate": 0.0004534722446024434,
      "loss": 3.0274,
      "step": 75817
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8853676319122314,
      "learning_rate": 0.0004534687298170763,
      "loss": 3.0093,
      "step": 75818
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.3230762481689453,
      "learning_rate": 0.00045346521500317646,
      "loss": 2.9287,
      "step": 75819
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5195027589797974,
      "learning_rate": 0.00045346170016074454,
      "loss": 2.9294,
      "step": 75820
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9830735921859741,
      "learning_rate": 0.0004534581852897812,
      "loss": 2.8092,
      "step": 75821
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8901115655899048,
      "learning_rate": 0.0004534546703902872,
      "loss": 3.1156,
      "step": 75822
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8550416231155396,
      "learning_rate": 0.00045345115546226297,
      "loss": 3.1894,
      "step": 75823
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7381352186203003,
      "learning_rate": 0.00045344764050570923,
      "loss": 2.8621,
      "step": 75824
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8563551902770996,
      "learning_rate": 0.0004534441255206267,
      "loss": 2.8847,
      "step": 75825
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6845694780349731,
      "learning_rate": 0.00045344061050701614,
      "loss": 2.912,
      "step": 75826
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.756327748298645,
      "learning_rate": 0.000453437095464878,
      "loss": 3.1569,
      "step": 75827
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6604965925216675,
      "learning_rate": 0.0004534335803942129,
      "loss": 3.2012,
      "step": 75828
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6313790082931519,
      "learning_rate": 0.0004534300652950217,
      "loss": 2.8031,
      "step": 75829
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7663311958312988,
      "learning_rate": 0.00045342655016730493,
      "loss": 3.0297,
      "step": 75830
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7628248929977417,
      "learning_rate": 0.0004534230350110633,
      "loss": 3.1005,
      "step": 75831
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.624359369277954,
      "learning_rate": 0.0004534195198262974,
      "loss": 2.9531,
      "step": 75832
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.907867670059204,
      "learning_rate": 0.00045341600461300794,
      "loss": 3.2078,
      "step": 75833
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.006666660308838,
      "learning_rate": 0.0004534124893711955,
      "loss": 2.9633,
      "step": 75834
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5324969291687012,
      "learning_rate": 0.00045340897410086077,
      "loss": 3.0227,
      "step": 75835
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6890887022018433,
      "learning_rate": 0.00045340545880200446,
      "loss": 2.8389,
      "step": 75836
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4923847913742065,
      "learning_rate": 0.00045340194347462723,
      "loss": 2.9396,
      "step": 75837
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.131253957748413,
      "learning_rate": 0.0004533984281187295,
      "loss": 3.0251,
      "step": 75838
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6806223392486572,
      "learning_rate": 0.00045339491273431237,
      "loss": 3.072,
      "step": 75839
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5521477460861206,
      "learning_rate": 0.000453391397321376,
      "loss": 3.2576,
      "step": 75840
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7308087348937988,
      "learning_rate": 0.0004533878818799214,
      "loss": 3.0045,
      "step": 75841
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8675068616867065,
      "learning_rate": 0.0004533843664099491,
      "loss": 3.1913,
      "step": 75842
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.388761281967163,
      "learning_rate": 0.0004533808509114597,
      "loss": 2.9895,
      "step": 75843
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6410927772521973,
      "learning_rate": 0.0004533773353844539,
      "loss": 3.0379,
      "step": 75844
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.63741934299469,
      "learning_rate": 0.0004533738198289324,
      "loss": 2.9253,
      "step": 75845
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8275550603866577,
      "learning_rate": 0.0004533703042448958,
      "loss": 3.035,
      "step": 75846
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7361620664596558,
      "learning_rate": 0.0004533667886323447,
      "loss": 3.1167,
      "step": 75847
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4292244911193848,
      "learning_rate": 0.00045336327299128,
      "loss": 2.9435,
      "step": 75848
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1825525760650635,
      "learning_rate": 0.000453359757321702,
      "loss": 3.049,
      "step": 75849
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.838728666305542,
      "learning_rate": 0.00045335624162361155,
      "loss": 3.1241,
      "step": 75850
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5655136108398438,
      "learning_rate": 0.00045335272589700935,
      "loss": 2.913,
      "step": 75851
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7417583465576172,
      "learning_rate": 0.00045334921014189594,
      "loss": 2.9674,
      "step": 75852
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.827307939529419,
      "learning_rate": 0.00045334569435827196,
      "loss": 2.9732,
      "step": 75853
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5301203727722168,
      "learning_rate": 0.00045334217854613824,
      "loss": 2.9643,
      "step": 75854
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.019256353378296,
      "learning_rate": 0.00045333866270549526,
      "loss": 2.8523,
      "step": 75855
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4250564575195312,
      "learning_rate": 0.0004533351468363436,
      "loss": 3.1363,
      "step": 75856
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3522236347198486,
      "learning_rate": 0.0004533316309386843,
      "loss": 3.1944,
      "step": 75857
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6705060005187988,
      "learning_rate": 0.00045332811501251754,
      "loss": 3.1306,
      "step": 75858
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.61509108543396,
      "learning_rate": 0.0004533245990578443,
      "loss": 3.0581,
      "step": 75859
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6084879636764526,
      "learning_rate": 0.00045332108307466506,
      "loss": 2.9126,
      "step": 75860
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7401095628738403,
      "learning_rate": 0.00045331756706298057,
      "loss": 3.0659,
      "step": 75861
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6720857620239258,
      "learning_rate": 0.0004533140510227914,
      "loss": 2.8823,
      "step": 75862
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.098484516143799,
      "learning_rate": 0.0004533105349540984,
      "loss": 2.9825,
      "step": 75863
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7927571535110474,
      "learning_rate": 0.00045330701885690195,
      "loss": 2.9246,
      "step": 75864
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8668893575668335,
      "learning_rate": 0.0004533035027312028,
      "loss": 3.1143,
      "step": 75865
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2355151176452637,
      "learning_rate": 0.0004532999865770017,
      "loss": 3.2315,
      "step": 75866
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8260524272918701,
      "learning_rate": 0.0004532964703942992,
      "loss": 3.0002,
      "step": 75867
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8260841369628906,
      "learning_rate": 0.0004532929541830961,
      "loss": 3.0045,
      "step": 75868
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5783783197402954,
      "learning_rate": 0.0004532894379433928,
      "loss": 2.8756,
      "step": 75869
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5648155212402344,
      "learning_rate": 0.0004532859216751902,
      "loss": 2.9801,
      "step": 75870
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6790231466293335,
      "learning_rate": 0.00045328240537848886,
      "loss": 2.8975,
      "step": 75871
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6591606140136719,
      "learning_rate": 0.00045327888905328937,
      "loss": 3.1621,
      "step": 75872
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5670427083969116,
      "learning_rate": 0.00045327537269959243,
      "loss": 2.9429,
      "step": 75873
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6416600942611694,
      "learning_rate": 0.0004532718563173988,
      "loss": 2.7312,
      "step": 75874
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.454646110534668,
      "learning_rate": 0.00045326833990670895,
      "loss": 2.9795,
      "step": 75875
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.7415213584899902,
      "learning_rate": 0.00045326482346752365,
      "loss": 2.7505,
      "step": 75876
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8235149383544922,
      "learning_rate": 0.0004532613069998435,
      "loss": 3.1236,
      "step": 75877
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9702997207641602,
      "learning_rate": 0.0004532577905036692,
      "loss": 3.0222,
      "step": 75878
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6021502017974854,
      "learning_rate": 0.00045325427397900137,
      "loss": 2.975,
      "step": 75879
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.5904853343963623,
      "learning_rate": 0.0004532507574258407,
      "loss": 2.7981,
      "step": 75880
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.627845048904419,
      "learning_rate": 0.00045324724084418785,
      "loss": 2.8082,
      "step": 75881
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5203548669815063,
      "learning_rate": 0.00045324372423404343,
      "loss": 2.7763,
      "step": 75882
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.200486183166504,
      "learning_rate": 0.0004532402075954081,
      "loss": 2.8122,
      "step": 75883
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6658514738082886,
      "learning_rate": 0.0004532366909282825,
      "loss": 2.9519,
      "step": 75884
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8802647590637207,
      "learning_rate": 0.0004532331742326673,
      "loss": 3.0274,
      "step": 75885
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5960378646850586,
      "learning_rate": 0.00045322965750856315,
      "loss": 3.0711,
      "step": 75886
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4442365169525146,
      "learning_rate": 0.0004532261407559708,
      "loss": 3.1705,
      "step": 75887
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.742833137512207,
      "learning_rate": 0.0004532226239748907,
      "loss": 2.8957,
      "step": 75888
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.537870168685913,
      "learning_rate": 0.0004532191071653237,
      "loss": 3.2131,
      "step": 75889
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6245933771133423,
      "learning_rate": 0.0004532155903272704,
      "loss": 2.9531,
      "step": 75890
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6696910858154297,
      "learning_rate": 0.0004532120734607314,
      "loss": 2.9957,
      "step": 75891
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8096575736999512,
      "learning_rate": 0.00045320855656570734,
      "loss": 3.2415,
      "step": 75892
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.4702916145324707,
      "learning_rate": 0.000453205039642199,
      "loss": 2.913,
      "step": 75893
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.765653133392334,
      "learning_rate": 0.00045320152269020684,
      "loss": 2.7861,
      "step": 75894
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7023006677627563,
      "learning_rate": 0.00045319800570973177,
      "loss": 2.9329,
      "step": 75895
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.0208568572998047,
      "learning_rate": 0.0004531944887007742,
      "loss": 3.0114,
      "step": 75896
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.295850992202759,
      "learning_rate": 0.0004531909716633349,
      "loss": 3.1623,
      "step": 75897
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3179517984390259,
      "learning_rate": 0.0004531874545974144,
      "loss": 2.9588,
      "step": 75898
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8789467811584473,
      "learning_rate": 0.00045318393750301366,
      "loss": 3.1347,
      "step": 75899
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1570451259613037,
      "learning_rate": 0.00045318042038013307,
      "loss": 3.1512,
      "step": 75900
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0799570083618164,
      "learning_rate": 0.0004531769032287732,
      "loss": 3.1524,
      "step": 75901
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1001834869384766,
      "learning_rate": 0.000453173386048935,
      "loss": 2.9699,
      "step": 75902
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.220263719558716,
      "learning_rate": 0.000453169868840619,
      "loss": 2.9538,
      "step": 75903
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4317039251327515,
      "learning_rate": 0.00045316635160382577,
      "loss": 2.926,
      "step": 75904
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7321045398712158,
      "learning_rate": 0.000453162834338556,
      "loss": 3.1044,
      "step": 75905
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.8624000549316406,
      "learning_rate": 0.0004531593170448104,
      "loss": 3.1937,
      "step": 75906
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5598260164260864,
      "learning_rate": 0.0004531557997225896,
      "loss": 3.0953,
      "step": 75907
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6577891111373901,
      "learning_rate": 0.0004531522823718942,
      "loss": 3.0663,
      "step": 75908
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5813486576080322,
      "learning_rate": 0.00045314876499272504,
      "loss": 2.9651,
      "step": 75909
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1858279705047607,
      "learning_rate": 0.0004531452475850825,
      "loss": 2.928,
      "step": 75910
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5395941734313965,
      "learning_rate": 0.0004531417301489674,
      "loss": 2.9426,
      "step": 75911
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8507376909255981,
      "learning_rate": 0.00045313821268438037,
      "loss": 3.0418,
      "step": 75912
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.4822933673858643,
      "learning_rate": 0.000453134695191322,
      "loss": 2.7107,
      "step": 75913
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6862976551055908,
      "learning_rate": 0.0004531311776697931,
      "loss": 2.8537,
      "step": 75914
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.1254208087921143,
      "learning_rate": 0.00045312766011979417,
      "loss": 2.915,
      "step": 75915
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.872094750404358,
      "learning_rate": 0.00045312414254132586,
      "loss": 3.1181,
      "step": 75916
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7715548276901245,
      "learning_rate": 0.0004531206249343889,
      "loss": 2.9974,
      "step": 75917
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8465389013290405,
      "learning_rate": 0.0004531171072989841,
      "loss": 3.0104,
      "step": 75918
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7315062284469604,
      "learning_rate": 0.0004531135896351118,
      "loss": 2.9334,
      "step": 75919
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6851481199264526,
      "learning_rate": 0.00045311007194277276,
      "loss": 3.175,
      "step": 75920
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7267966270446777,
      "learning_rate": 0.0004531065542219677,
      "loss": 3.0984,
      "step": 75921
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4532318115234375,
      "learning_rate": 0.00045310303647269726,
      "loss": 2.8039,
      "step": 75922
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2266712188720703,
      "learning_rate": 0.0004530995186949621,
      "loss": 2.9327,
      "step": 75923
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8015857934951782,
      "learning_rate": 0.0004530960008887628,
      "loss": 3.1968,
      "step": 75924
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.413837194442749,
      "learning_rate": 0.0004530924830541,
      "loss": 2.8641,
      "step": 75925
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.6800904273986816,
      "learning_rate": 0.0004530889651909745,
      "loss": 2.9578,
      "step": 75926
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.112795829772949,
      "learning_rate": 0.0004530854472993869,
      "loss": 2.9744,
      "step": 75927
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7422821521759033,
      "learning_rate": 0.0004530819293793378,
      "loss": 3.187,
      "step": 75928
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.418057918548584,
      "learning_rate": 0.0004530784114308278,
      "loss": 3.2037,
      "step": 75929
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6557397842407227,
      "learning_rate": 0.00045307489345385775,
      "loss": 2.9868,
      "step": 75930
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4923696517944336,
      "learning_rate": 0.0004530713754484282,
      "loss": 3.0979,
      "step": 75931
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4410032033920288,
      "learning_rate": 0.0004530678574145396,
      "loss": 3.0486,
      "step": 75932
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.887643575668335,
      "learning_rate": 0.000453064339352193,
      "loss": 3.2062,
      "step": 75933
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0096023082733154,
      "learning_rate": 0.0004530608212613888,
      "loss": 2.9008,
      "step": 75934
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4181231260299683,
      "learning_rate": 0.00045305730314212765,
      "loss": 2.7585,
      "step": 75935
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8039902448654175,
      "learning_rate": 0.0004530537849944103,
      "loss": 3.3327,
      "step": 75936
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.7577455043792725,
      "learning_rate": 0.00045305026681823735,
      "loss": 3.1567,
      "step": 75937
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.486631155014038,
      "learning_rate": 0.00045304674861360944,
      "loss": 3.2331,
      "step": 75938
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3944635391235352,
      "learning_rate": 0.00045304323038052735,
      "loss": 2.9384,
      "step": 75939
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2447867393493652,
      "learning_rate": 0.0004530397121189915,
      "loss": 3.1102,
      "step": 75940
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.180814266204834,
      "learning_rate": 0.00045303619382900274,
      "loss": 2.9966,
      "step": 75941
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5891658067703247,
      "learning_rate": 0.00045303267551056173,
      "loss": 3.0967,
      "step": 75942
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7439475059509277,
      "learning_rate": 0.000453029157163669,
      "loss": 3.164,
      "step": 75943
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7365459203720093,
      "learning_rate": 0.0004530256387883252,
      "loss": 3.1312,
      "step": 75944
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.853716492652893,
      "learning_rate": 0.0004530221203845311,
      "loss": 2.9678,
      "step": 75945
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5414193868637085,
      "learning_rate": 0.0004530186019522873,
      "loss": 2.9898,
      "step": 75946
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4790071249008179,
      "learning_rate": 0.0004530150834915944,
      "loss": 2.9405,
      "step": 75947
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.568124532699585,
      "learning_rate": 0.0004530115650024532,
      "loss": 3.0342,
      "step": 75948
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5142383575439453,
      "learning_rate": 0.0004530080464848642,
      "loss": 3.1248,
      "step": 75949
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4478719234466553,
      "learning_rate": 0.0004530045279388282,
      "loss": 2.9767,
      "step": 75950
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.358574390411377,
      "learning_rate": 0.00045300100936434575,
      "loss": 2.873,
      "step": 75951
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5815423727035522,
      "learning_rate": 0.00045299749076141736,
      "loss": 2.9458,
      "step": 75952
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8046972751617432,
      "learning_rate": 0.00045299397213004406,
      "loss": 3.0326,
      "step": 75953
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6021276712417603,
      "learning_rate": 0.0004529904534702261,
      "loss": 2.9878,
      "step": 75954
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6896765232086182,
      "learning_rate": 0.0004529869347819645,
      "loss": 3.1804,
      "step": 75955
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4751179218292236,
      "learning_rate": 0.0004529834160652596,
      "loss": 3.0089,
      "step": 75956
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.910240650177002,
      "learning_rate": 0.00045297989732011226,
      "loss": 3.0416,
      "step": 75957
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0841941833496094,
      "learning_rate": 0.0004529763785465231,
      "loss": 3.0828,
      "step": 75958
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5185747146606445,
      "learning_rate": 0.0004529728597444928,
      "loss": 2.9287,
      "step": 75959
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.581506371498108,
      "learning_rate": 0.0004529693409140218,
      "loss": 3.1427,
      "step": 75960
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6512360572814941,
      "learning_rate": 0.000452965822055111,
      "loss": 3.0335,
      "step": 75961
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.703508973121643,
      "learning_rate": 0.0004529623031677609,
      "loss": 2.8876,
      "step": 75962
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4643378257751465,
      "learning_rate": 0.00045295878425197233,
      "loss": 2.9723,
      "step": 75963
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.791715145111084,
      "learning_rate": 0.0004529552653077458,
      "loss": 2.8311,
      "step": 75964
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4695143699645996,
      "learning_rate": 0.0004529517463350819,
      "loss": 2.961,
      "step": 75965
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4168827533721924,
      "learning_rate": 0.00045294822733398145,
      "loss": 2.8942,
      "step": 75966
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.744423270225525,
      "learning_rate": 0.0004529447083044451,
      "loss": 2.7862,
      "step": 75967
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9562735557556152,
      "learning_rate": 0.0004529411892464733,
      "loss": 3.0495,
      "step": 75968
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8023526668548584,
      "learning_rate": 0.0004529376701600669,
      "loss": 2.9789,
      "step": 75969
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5694516897201538,
      "learning_rate": 0.00045293415104522667,
      "loss": 3.0256,
      "step": 75970
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.365053415298462,
      "learning_rate": 0.00045293063190195284,
      "loss": 3.1432,
      "step": 75971
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4664973020553589,
      "learning_rate": 0.00045292711273024647,
      "loss": 3.0728,
      "step": 75972
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5778491497039795,
      "learning_rate": 0.0004529235935301081,
      "loss": 3.1773,
      "step": 75973
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2178139686584473,
      "learning_rate": 0.0004529200743015383,
      "loss": 3.0802,
      "step": 75974
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2770907878875732,
      "learning_rate": 0.00045291655504453766,
      "loss": 2.989,
      "step": 75975
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5744426250457764,
      "learning_rate": 0.00045291303575910715,
      "loss": 2.8978,
      "step": 75976
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7920998334884644,
      "learning_rate": 0.00045290951644524706,
      "loss": 3.0494,
      "step": 75977
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.07621693611145,
      "learning_rate": 0.0004529059971029582,
      "loss": 2.9326,
      "step": 75978
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6378382444381714,
      "learning_rate": 0.00045290247773224133,
      "loss": 3.0649,
      "step": 75979
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7028366327285767,
      "learning_rate": 0.00045289895833309693,
      "loss": 2.7994,
      "step": 75980
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6469271183013916,
      "learning_rate": 0.0004528954389055257,
      "loss": 3.0584,
      "step": 75981
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6471939086914062,
      "learning_rate": 0.0004528919194495285,
      "loss": 2.9021,
      "step": 75982
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7647227048873901,
      "learning_rate": 0.00045288839996510566,
      "loss": 3.3543,
      "step": 75983
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7024903297424316,
      "learning_rate": 0.00045288488045225794,
      "loss": 3.1701,
      "step": 75984
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.566192388534546,
      "learning_rate": 0.00045288136091098613,
      "loss": 3.1125,
      "step": 75985
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6831285953521729,
      "learning_rate": 0.0004528778413412907,
      "loss": 3.042,
      "step": 75986
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7563666105270386,
      "learning_rate": 0.0004528743217431725,
      "loss": 2.8997,
      "step": 75987
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5006746053695679,
      "learning_rate": 0.00045287080211663203,
      "loss": 3.3015,
      "step": 75988
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3562583923339844,
      "learning_rate": 0.00045286728246167,
      "loss": 3.0024,
      "step": 75989
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0362398624420166,
      "learning_rate": 0.000452863762778287,
      "loss": 3.0922,
      "step": 75990
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4816679954528809,
      "learning_rate": 0.0004528602430664838,
      "loss": 3.0588,
      "step": 75991
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6036365032196045,
      "learning_rate": 0.000452856723326261,
      "loss": 3.0951,
      "step": 75992
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7393602132797241,
      "learning_rate": 0.00045285320355761913,
      "loss": 3.1841,
      "step": 75993
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.608262300491333,
      "learning_rate": 0.0004528496837605592,
      "loss": 2.8794,
      "step": 75994
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3340150117874146,
      "learning_rate": 0.00045284616393508146,
      "loss": 3.2339,
      "step": 75995
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.649698257446289,
      "learning_rate": 0.0004528426440811867,
      "loss": 3.0874,
      "step": 75996
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4622917175292969,
      "learning_rate": 0.00045283912419887576,
      "loss": 3.0501,
      "step": 75997
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.060349464416504,
      "learning_rate": 0.000452835604288149,
      "loss": 3.217,
      "step": 75998
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3830723762512207,
      "learning_rate": 0.00045283208434900723,
      "loss": 2.9895,
      "step": 75999
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4476238489151,
      "learning_rate": 0.0004528285643814512,
      "loss": 3.1149,
      "step": 76000
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.501846194267273,
      "learning_rate": 0.00045282504438548136,
      "loss": 3.0098,
      "step": 76001
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2466559410095215,
      "learning_rate": 0.00045282152436109844,
      "loss": 2.8879,
      "step": 76002
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5756202936172485,
      "learning_rate": 0.00045281800430830324,
      "loss": 2.9507,
      "step": 76003
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7045741081237793,
      "learning_rate": 0.00045281448422709614,
      "loss": 3.014,
      "step": 76004
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.490830183029175,
      "learning_rate": 0.000452810964117478,
      "loss": 2.9659,
      "step": 76005
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.340224504470825,
      "learning_rate": 0.0004528074439794495,
      "loss": 3.3841,
      "step": 76006
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2452714443206787,
      "learning_rate": 0.0004528039238130111,
      "loss": 2.8784,
      "step": 76007
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4487087726593018,
      "learning_rate": 0.00045280040361816363,
      "loss": 2.874,
      "step": 76008
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.700251579284668,
      "learning_rate": 0.0004527968833949077,
      "loss": 3.1775,
      "step": 76009
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.184964418411255,
      "learning_rate": 0.0004527933631432439,
      "loss": 2.9705,
      "step": 76010
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7979179620742798,
      "learning_rate": 0.00045278984286317286,
      "loss": 3.2254,
      "step": 76011
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6876600980758667,
      "learning_rate": 0.00045278632255469546,
      "loss": 2.9521,
      "step": 76012
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.826773762702942,
      "learning_rate": 0.00045278280221781213,
      "loss": 2.8655,
      "step": 76013
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8696092367172241,
      "learning_rate": 0.0004527792818525235,
      "loss": 2.98,
      "step": 76014
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8770127296447754,
      "learning_rate": 0.0004527757614588304,
      "loss": 2.9724,
      "step": 76015
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6856281757354736,
      "learning_rate": 0.0004527722410367334,
      "loss": 2.8763,
      "step": 76016
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4486202001571655,
      "learning_rate": 0.00045276872058623314,
      "loss": 3.0764,
      "step": 76017
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3714054822921753,
      "learning_rate": 0.00045276520010733033,
      "loss": 3.0408,
      "step": 76018
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5463764667510986,
      "learning_rate": 0.00045276167960002554,
      "loss": 3.2659,
      "step": 76019
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.2474030256271362,
      "learning_rate": 0.0004527581590643195,
      "loss": 3.0308,
      "step": 76020
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4496818780899048,
      "learning_rate": 0.00045275463850021286,
      "loss": 2.9201,
      "step": 76021
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7611191272735596,
      "learning_rate": 0.0004527511179077062,
      "loss": 2.9129,
      "step": 76022
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5582998991012573,
      "learning_rate": 0.0004527475972868002,
      "loss": 2.9592,
      "step": 76023
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6819813251495361,
      "learning_rate": 0.00045274407663749565,
      "loss": 3.1646,
      "step": 76024
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2088675498962402,
      "learning_rate": 0.00045274055595979294,
      "loss": 3.2393,
      "step": 76025
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.382401943206787,
      "learning_rate": 0.00045273703525369296,
      "loss": 3.163,
      "step": 76026
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5252280235290527,
      "learning_rate": 0.0004527335145191963,
      "loss": 2.9806,
      "step": 76027
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6327000856399536,
      "learning_rate": 0.00045272999375630356,
      "loss": 2.981,
      "step": 76028
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.579591989517212,
      "learning_rate": 0.00045272647296501543,
      "loss": 2.971,
      "step": 76029
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0357279777526855,
      "learning_rate": 0.00045272295214533265,
      "loss": 2.925,
      "step": 76030
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.4040329456329346,
      "learning_rate": 0.0004527194312972557,
      "loss": 2.8105,
      "step": 76031
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6226840019226074,
      "learning_rate": 0.00045271591042078533,
      "loss": 3.245,
      "step": 76032
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0701522827148438,
      "learning_rate": 0.0004527123895159222,
      "loss": 3.1836,
      "step": 76033
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7898389101028442,
      "learning_rate": 0.00045270886858266696,
      "loss": 2.8318,
      "step": 76034
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6064457893371582,
      "learning_rate": 0.00045270534762102026,
      "loss": 3.1022,
      "step": 76035
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7247761487960815,
      "learning_rate": 0.00045270182663098275,
      "loss": 2.8927,
      "step": 76036
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7656118869781494,
      "learning_rate": 0.0004526983056125551,
      "loss": 2.8994,
      "step": 76037
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1702470779418945,
      "learning_rate": 0.0004526947845657379,
      "loss": 3.1938,
      "step": 76038
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.071765661239624,
      "learning_rate": 0.00045269126349053193,
      "loss": 3.0852,
      "step": 76039
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.57138991355896,
      "learning_rate": 0.00045268774238693774,
      "loss": 2.9097,
      "step": 76040
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6636477708816528,
      "learning_rate": 0.00045268422125495605,
      "loss": 2.9792,
      "step": 76041
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.971247911453247,
      "learning_rate": 0.00045268070009458746,
      "loss": 2.9614,
      "step": 76042
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9109280109405518,
      "learning_rate": 0.0004526771789058326,
      "loss": 2.9463,
      "step": 76043
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0983517169952393,
      "learning_rate": 0.0004526736576886922,
      "loss": 3.0946,
      "step": 76044
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7755602598190308,
      "learning_rate": 0.0004526701364431669,
      "loss": 2.9985,
      "step": 76045
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6527842283248901,
      "learning_rate": 0.0004526666151692573,
      "loss": 3.1472,
      "step": 76046
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.582177996635437,
      "learning_rate": 0.0004526630938669642,
      "loss": 2.9919,
      "step": 76047
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.3519155979156494,
      "learning_rate": 0.000452659572536288,
      "loss": 2.9502,
      "step": 76048
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2744970321655273,
      "learning_rate": 0.00045265605117722957,
      "loss": 3.1853,
      "step": 76049
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7483127117156982,
      "learning_rate": 0.0004526525297897895,
      "loss": 2.9289,
      "step": 76050
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.7254600524902344,
      "learning_rate": 0.00045264900837396847,
      "loss": 2.8948,
      "step": 76051
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.190680503845215,
      "learning_rate": 0.00045264548692976707,
      "loss": 2.8951,
      "step": 76052
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5910491943359375,
      "learning_rate": 0.00045264196545718603,
      "loss": 3.1456,
      "step": 76053
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8579835891723633,
      "learning_rate": 0.00045263844395622595,
      "loss": 2.9701,
      "step": 76054
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.365135431289673,
      "learning_rate": 0.0004526349224268875,
      "loss": 3.1673,
      "step": 76055
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1924331188201904,
      "learning_rate": 0.00045263140086917135,
      "loss": 3.0658,
      "step": 76056
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.592302680015564,
      "learning_rate": 0.0004526278792830781,
      "loss": 2.9911,
      "step": 76057
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.6124789714813232,
      "learning_rate": 0.0004526243576686085,
      "loss": 2.9382,
      "step": 76058
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.550170660018921,
      "learning_rate": 0.0004526208360257631,
      "loss": 2.947,
      "step": 76059
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.5258965492248535,
      "learning_rate": 0.0004526173143545426,
      "loss": 2.7115,
      "step": 76060
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8942879438400269,
      "learning_rate": 0.0004526137926549477,
      "loss": 2.9608,
      "step": 76061
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.434816837310791,
      "learning_rate": 0.000452610270926979,
      "loss": 3.0233,
      "step": 76062
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.724326252937317,
      "learning_rate": 0.0004526067491706372,
      "loss": 3.0505,
      "step": 76063
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.794074535369873,
      "learning_rate": 0.0004526032273859229,
      "loss": 3.0797,
      "step": 76064
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4779489040374756,
      "learning_rate": 0.00045259970557283674,
      "loss": 3.1082,
      "step": 76065
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0458428859710693,
      "learning_rate": 0.0004525961837313795,
      "loss": 3.1141,
      "step": 76066
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7238832712173462,
      "learning_rate": 0.0004525926618615517,
      "loss": 3.0663,
      "step": 76067
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6679883003234863,
      "learning_rate": 0.00045258913996335404,
      "loss": 3.1682,
      "step": 76068
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5797410011291504,
      "learning_rate": 0.0004525856180367872,
      "loss": 3.0049,
      "step": 76069
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.912713646888733,
      "learning_rate": 0.0004525820960818518,
      "loss": 3.1971,
      "step": 76070
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5443570613861084,
      "learning_rate": 0.0004525785740985485,
      "loss": 2.9356,
      "step": 76071
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.163783550262451,
      "learning_rate": 0.0004525750520868779,
      "loss": 2.8575,
      "step": 76072
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6431318521499634,
      "learning_rate": 0.0004525715300468408,
      "loss": 3.0791,
      "step": 76073
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8261985778808594,
      "learning_rate": 0.00045256800797843777,
      "loss": 2.8789,
      "step": 76074
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8997306823730469,
      "learning_rate": 0.00045256448588166956,
      "loss": 2.8433,
      "step": 76075
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6378214359283447,
      "learning_rate": 0.00045256096375653656,
      "loss": 3.2757,
      "step": 76076
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.040661334991455,
      "learning_rate": 0.00045255744160303964,
      "loss": 3.0308,
      "step": 76077
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7153897285461426,
      "learning_rate": 0.0004525539194211795,
      "loss": 3.1715,
      "step": 76078
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.745330810546875,
      "learning_rate": 0.0004525503972109567,
      "loss": 3.1126,
      "step": 76079
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2943015098571777,
      "learning_rate": 0.00045254687497237175,
      "loss": 3.2838,
      "step": 76080
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.440285563468933,
      "learning_rate": 0.00045254335270542554,
      "loss": 2.9176,
      "step": 76081
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.765519618988037,
      "learning_rate": 0.0004525398304101187,
      "loss": 3.0866,
      "step": 76082
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.200425624847412,
      "learning_rate": 0.0004525363080864517,
      "loss": 2.8467,
      "step": 76083
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9026981592178345,
      "learning_rate": 0.0004525327857344254,
      "loss": 2.9973,
      "step": 76084
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.614684820175171,
      "learning_rate": 0.00045252926335404047,
      "loss": 3.0635,
      "step": 76085
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.860300302505493,
      "learning_rate": 0.00045252574094529737,
      "loss": 3.0749,
      "step": 76086
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6351041793823242,
      "learning_rate": 0.00045252221850819685,
      "loss": 2.9975,
      "step": 76087
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6500046253204346,
      "learning_rate": 0.0004525186960427397,
      "loss": 3.1034,
      "step": 76088
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6150175333023071,
      "learning_rate": 0.0004525151735489262,
      "loss": 2.8027,
      "step": 76089
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.62753427028656,
      "learning_rate": 0.0004525116510267574,
      "loss": 3.0984,
      "step": 76090
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7810460329055786,
      "learning_rate": 0.0004525081284762338,
      "loss": 2.9397,
      "step": 76091
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5253714323043823,
      "learning_rate": 0.000452504605897356,
      "loss": 3.1397,
      "step": 76092
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7056833505630493,
      "learning_rate": 0.00045250108329012475,
      "loss": 3.2905,
      "step": 76093
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6391754150390625,
      "learning_rate": 0.0004524975606545407,
      "loss": 3.1168,
      "step": 76094
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.846060872077942,
      "learning_rate": 0.0004524940379906045,
      "loss": 2.9096,
      "step": 76095
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4735732078552246,
      "learning_rate": 0.0004524905152983167,
      "loss": 3.2684,
      "step": 76096
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6985801458358765,
      "learning_rate": 0.00045248699257767813,
      "loss": 3.1723,
      "step": 76097
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5171186923980713,
      "learning_rate": 0.00045248346982868924,
      "loss": 3.1764,
      "step": 76098
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7058299779891968,
      "learning_rate": 0.0004524799470513509,
      "loss": 3.1187,
      "step": 76099
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5894136428833008,
      "learning_rate": 0.0004524764242456636,
      "loss": 2.9948,
      "step": 76100
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2010788917541504,
      "learning_rate": 0.00045247290141162805,
      "loss": 3.0573,
      "step": 76101
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6361987590789795,
      "learning_rate": 0.0004524693785492449,
      "loss": 2.7865,
      "step": 76102
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.7790935039520264,
      "learning_rate": 0.00045246585565851484,
      "loss": 2.9702,
      "step": 76103
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9118322134017944,
      "learning_rate": 0.0004524623327394385,
      "loss": 2.9681,
      "step": 76104
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9763591289520264,
      "learning_rate": 0.00045245880979201647,
      "loss": 3.1884,
      "step": 76105
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5429930686950684,
      "learning_rate": 0.00045245528681624945,
      "loss": 3.1047,
      "step": 76106
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9403904676437378,
      "learning_rate": 0.0004524517638121383,
      "loss": 3.1253,
      "step": 76107
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.080071210861206,
      "learning_rate": 0.0004524482407796834,
      "loss": 3.1105,
      "step": 76108
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.505316972732544,
      "learning_rate": 0.0004524447177188854,
      "loss": 2.8353,
      "step": 76109
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8747544288635254,
      "learning_rate": 0.0004524411946297452,
      "loss": 3.1331,
      "step": 76110
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.667586326599121,
      "learning_rate": 0.0004524376715122632,
      "loss": 2.929,
      "step": 76111
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8140649795532227,
      "learning_rate": 0.0004524341483664401,
      "loss": 3.0732,
      "step": 76112
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.718216896057129,
      "learning_rate": 0.0004524306251922768,
      "loss": 3.0534,
      "step": 76113
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.666182279586792,
      "learning_rate": 0.00045242710198977365,
      "loss": 2.9537,
      "step": 76114
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2109999656677246,
      "learning_rate": 0.00045242357875893143,
      "loss": 3.0508,
      "step": 76115
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1025593280792236,
      "learning_rate": 0.0004524200554997508,
      "loss": 3.066,
      "step": 76116
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.5610508918762207,
      "learning_rate": 0.00045241653221223243,
      "loss": 2.8849,
      "step": 76117
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.373073101043701,
      "learning_rate": 0.0004524130088963769,
      "loss": 3.042,
      "step": 76118
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0581107139587402,
      "learning_rate": 0.000452409485552185,
      "loss": 2.8339,
      "step": 76119
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.9640653133392334,
      "learning_rate": 0.0004524059621796572,
      "loss": 2.8842,
      "step": 76120
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.030759572982788,
      "learning_rate": 0.0004524024387787943,
      "loss": 2.9573,
      "step": 76121
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4861717224121094,
      "learning_rate": 0.0004523989153495969,
      "loss": 2.9662,
      "step": 76122
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3555773496627808,
      "learning_rate": 0.00045239539189206564,
      "loss": 2.888,
      "step": 76123
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0398073196411133,
      "learning_rate": 0.00045239186840620124,
      "loss": 2.7668,
      "step": 76124
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3581812381744385,
      "learning_rate": 0.00045238834489200435,
      "loss": 2.7868,
      "step": 76125
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.641984224319458,
      "learning_rate": 0.00045238482134947553,
      "loss": 3.2435,
      "step": 76126
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9309900999069214,
      "learning_rate": 0.0004523812977786155,
      "loss": 3.1711,
      "step": 76127
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.3735923767089844,
      "learning_rate": 0.000452377774179425,
      "loss": 3.0096,
      "step": 76128
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7670989036560059,
      "learning_rate": 0.0004523742505519045,
      "loss": 3.0321,
      "step": 76129
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7111109495162964,
      "learning_rate": 0.0004523707268960547,
      "loss": 2.9315,
      "step": 76130
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8720436096191406,
      "learning_rate": 0.00045236720321187643,
      "loss": 2.9773,
      "step": 76131
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.045107126235962,
      "learning_rate": 0.00045236367949937013,
      "loss": 3.2392,
      "step": 76132
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.995969533920288,
      "learning_rate": 0.00045236015575853656,
      "loss": 2.9565,
      "step": 76133
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.468002200126648,
      "learning_rate": 0.00045235663198937636,
      "loss": 3.1617,
      "step": 76134
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7767866849899292,
      "learning_rate": 0.0004523531081918903,
      "loss": 3.0228,
      "step": 76135
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5786527395248413,
      "learning_rate": 0.00045234958436607875,
      "loss": 3.1501,
      "step": 76136
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.2594027519226074,
      "learning_rate": 0.0004523460605119427,
      "loss": 3.0777,
      "step": 76137
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1101396083831787,
      "learning_rate": 0.00045234253662948244,
      "loss": 2.9282,
      "step": 76138
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.7576258182525635,
      "learning_rate": 0.000452339012718699,
      "loss": 2.9696,
      "step": 76139
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.3501124382019043,
      "learning_rate": 0.0004523354887795928,
      "loss": 3.2411,
      "step": 76140
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5571281909942627,
      "learning_rate": 0.00045233196481216456,
      "loss": 2.9622,
      "step": 76141
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.123265266418457,
      "learning_rate": 0.000452328440816415,
      "loss": 3.126,
      "step": 76142
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4593592882156372,
      "learning_rate": 0.00045232491679234456,
      "loss": 3.1098,
      "step": 76143
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7459686994552612,
      "learning_rate": 0.00045232139273995416,
      "loss": 3.0075,
      "step": 76144
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5226258039474487,
      "learning_rate": 0.00045231786865924424,
      "loss": 2.9957,
      "step": 76145
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2033090591430664,
      "learning_rate": 0.0004523143445502157,
      "loss": 2.9933,
      "step": 76146
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6406962871551514,
      "learning_rate": 0.00045231082041286886,
      "loss": 3.2327,
      "step": 76147
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6852881908416748,
      "learning_rate": 0.0004523072962472047,
      "loss": 3.0319,
      "step": 76148
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.697439432144165,
      "learning_rate": 0.00045230377205322374,
      "loss": 2.9925,
      "step": 76149
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9101401567459106,
      "learning_rate": 0.00045230024783092656,
      "loss": 2.9972,
      "step": 76150
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.713165044784546,
      "learning_rate": 0.000452296723580314,
      "loss": 3.096,
      "step": 76151
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1594879627227783,
      "learning_rate": 0.0004522931993013865,
      "loss": 2.9954,
      "step": 76152
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7150506973266602,
      "learning_rate": 0.00045228967499414487,
      "loss": 2.913,
      "step": 76153
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.838028073310852,
      "learning_rate": 0.0004522861506585896,
      "loss": 2.8872,
      "step": 76154
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9672555923461914,
      "learning_rate": 0.0004522826262947217,
      "loss": 3.2507,
      "step": 76155
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.708752989768982,
      "learning_rate": 0.00045227910190254135,
      "loss": 3.0378,
      "step": 76156
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6077598333358765,
      "learning_rate": 0.00045227557748204957,
      "loss": 3.1602,
      "step": 76157
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6165257692337036,
      "learning_rate": 0.00045227205303324694,
      "loss": 3.0182,
      "step": 76158
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9936177730560303,
      "learning_rate": 0.0004522685285561339,
      "loss": 3.0243,
      "step": 76159
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0060274600982666,
      "learning_rate": 0.0004522650040507113,
      "loss": 2.9654,
      "step": 76160
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9978256225585938,
      "learning_rate": 0.00045226147951697986,
      "loss": 3.1365,
      "step": 76161
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5508674383163452,
      "learning_rate": 0.00045225795495494,
      "loss": 2.9303,
      "step": 76162
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.721177577972412,
      "learning_rate": 0.00045225443036459257,
      "loss": 3.1458,
      "step": 76163
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5873281955718994,
      "learning_rate": 0.0004522509057459383,
      "loss": 2.9117,
      "step": 76164
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7105385065078735,
      "learning_rate": 0.00045224738109897754,
      "loss": 2.9891,
      "step": 76165
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.617057204246521,
      "learning_rate": 0.00045224385642371115,
      "loss": 2.7437,
      "step": 76166
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5741748809814453,
      "learning_rate": 0.0004522403317201398,
      "loss": 2.679,
      "step": 76167
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.052290678024292,
      "learning_rate": 0.00045223680698826404,
      "loss": 2.9566,
      "step": 76168
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9945504665374756,
      "learning_rate": 0.0004522332822280846,
      "loss": 3.0818,
      "step": 76169
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.899167776107788,
      "learning_rate": 0.00045222975743960215,
      "loss": 3.1239,
      "step": 76170
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5829198360443115,
      "learning_rate": 0.00045222623262281735,
      "loss": 2.837,
      "step": 76171
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4695916175842285,
      "learning_rate": 0.00045222270777773075,
      "loss": 3.0579,
      "step": 76172
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0667078495025635,
      "learning_rate": 0.0004522191829043431,
      "loss": 3.0833,
      "step": 76173
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.474124789237976,
      "learning_rate": 0.00045221565800265503,
      "loss": 3.3319,
      "step": 76174
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8942968845367432,
      "learning_rate": 0.0004522121330726672,
      "loss": 2.9585,
      "step": 76175
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.828365683555603,
      "learning_rate": 0.0004522086081143802,
      "loss": 2.9641,
      "step": 76176
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.549258828163147,
      "learning_rate": 0.0004522050831277949,
      "loss": 2.8156,
      "step": 76177
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7319551706314087,
      "learning_rate": 0.0004522015581129117,
      "loss": 2.8546,
      "step": 76178
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0415353775024414,
      "learning_rate": 0.0004521980330697313,
      "loss": 2.9466,
      "step": 76179
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7626253366470337,
      "learning_rate": 0.0004521945079982545,
      "loss": 2.8173,
      "step": 76180
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7899653911590576,
      "learning_rate": 0.00045219098289848184,
      "loss": 3.1978,
      "step": 76181
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0832254886627197,
      "learning_rate": 0.000452187457770414,
      "loss": 3.0362,
      "step": 76182
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9037820100784302,
      "learning_rate": 0.00045218393261405173,
      "loss": 3.1127,
      "step": 76183
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6853299140930176,
      "learning_rate": 0.0004521804074293954,
      "loss": 3.0879,
      "step": 76184
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3971705436706543,
      "learning_rate": 0.000452176882216446,
      "loss": 2.9731,
      "step": 76185
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5540467500686646,
      "learning_rate": 0.00045217335697520406,
      "loss": 2.9014,
      "step": 76186
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.3311278820037842,
      "learning_rate": 0.00045216983170567014,
      "loss": 3.1374,
      "step": 76187
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8521265983581543,
      "learning_rate": 0.000452166306407845,
      "loss": 3.0132,
      "step": 76188
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6163966655731201,
      "learning_rate": 0.0004521627810817293,
      "loss": 2.8642,
      "step": 76189
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.600536823272705,
      "learning_rate": 0.0004521592557273237,
      "loss": 3.2441,
      "step": 76190
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.196110486984253,
      "learning_rate": 0.0004521557303446287,
      "loss": 3.1189,
      "step": 76191
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4868478775024414,
      "learning_rate": 0.0004521522049336452,
      "loss": 3.0613,
      "step": 76192
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5515987873077393,
      "learning_rate": 0.00045214867949437367,
      "loss": 3.0578,
      "step": 76193
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8479949235916138,
      "learning_rate": 0.0004521451540268148,
      "loss": 3.0426,
      "step": 76194
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8538645505905151,
      "learning_rate": 0.0004521416285309694,
      "loss": 3.1012,
      "step": 76195
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6883522272109985,
      "learning_rate": 0.00045213810300683785,
      "loss": 3.0786,
      "step": 76196
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7936476469039917,
      "learning_rate": 0.000452134577454421,
      "loss": 3.3065,
      "step": 76197
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9410898685455322,
      "learning_rate": 0.0004521310518737195,
      "loss": 2.8023,
      "step": 76198
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5842666625976562,
      "learning_rate": 0.00045212752626473397,
      "loss": 2.9614,
      "step": 76199
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2122743129730225,
      "learning_rate": 0.000452124000627465,
      "loss": 3.0191,
      "step": 76200
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8960398435592651,
      "learning_rate": 0.0004521204749619134,
      "loss": 2.8653,
      "step": 76201
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.764338493347168,
      "learning_rate": 0.00045211694926807963,
      "loss": 2.8991,
      "step": 76202
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0864624977111816,
      "learning_rate": 0.0004521134235459645,
      "loss": 3.0697,
      "step": 76203
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7449692487716675,
      "learning_rate": 0.0004521098977955686,
      "loss": 3.1919,
      "step": 76204
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7080132961273193,
      "learning_rate": 0.0004521063720168927,
      "loss": 3.119,
      "step": 76205
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.7387850284576416,
      "learning_rate": 0.0004521028462099372,
      "loss": 2.9607,
      "step": 76206
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.7993154525756836,
      "learning_rate": 0.00045209932037470297,
      "loss": 2.9149,
      "step": 76207
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0429794788360596,
      "learning_rate": 0.0004520957945111905,
      "loss": 3.0736,
      "step": 76208
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.7681968212127686,
      "learning_rate": 0.00045209226861940067,
      "loss": 2.9295,
      "step": 76209
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.9323959350585938,
      "learning_rate": 0.00045208874269933407,
      "loss": 2.9585,
      "step": 76210
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0998075008392334,
      "learning_rate": 0.0004520852167509912,
      "loss": 2.8413,
      "step": 76211
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7026362419128418,
      "learning_rate": 0.0004520816907743729,
      "loss": 3.1831,
      "step": 76212
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.856971025466919,
      "learning_rate": 0.00045207816476947965,
      "loss": 2.9095,
      "step": 76213
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.5290143489837646,
      "learning_rate": 0.0004520746387363123,
      "loss": 2.9986,
      "step": 76214
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.134882688522339,
      "learning_rate": 0.0004520711126748713,
      "loss": 3.1749,
      "step": 76215
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8381603956222534,
      "learning_rate": 0.0004520675865851575,
      "loss": 2.9793,
      "step": 76216
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.437473773956299,
      "learning_rate": 0.00045206406046717143,
      "loss": 3.0303,
      "step": 76217
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.4470252990722656,
      "learning_rate": 0.0004520605343209137,
      "loss": 3.1882,
      "step": 76218
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9534809589385986,
      "learning_rate": 0.00045205700814638516,
      "loss": 2.8002,
      "step": 76219
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6593587398529053,
      "learning_rate": 0.00045205348194358633,
      "loss": 2.8338,
      "step": 76220
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.508741855621338,
      "learning_rate": 0.00045204995571251785,
      "loss": 2.8689,
      "step": 76221
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.039330244064331,
      "learning_rate": 0.0004520464294531804,
      "loss": 2.799,
      "step": 76222
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.3790433406829834,
      "learning_rate": 0.00045204290316557466,
      "loss": 3.0366,
      "step": 76223
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0998618602752686,
      "learning_rate": 0.0004520393768497013,
      "loss": 2.9553,
      "step": 76224
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.849045753479004,
      "learning_rate": 0.0004520358505055609,
      "loss": 2.8602,
      "step": 76225
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.5425236225128174,
      "learning_rate": 0.0004520323241331542,
      "loss": 2.9888,
      "step": 76226
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.391632556915283,
      "learning_rate": 0.00045202879773248184,
      "loss": 2.924,
      "step": 76227
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8528141975402832,
      "learning_rate": 0.00045202527130354446,
      "loss": 2.8726,
      "step": 76228
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6525945663452148,
      "learning_rate": 0.0004520217448463427,
      "loss": 2.9689,
      "step": 76229
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.848015308380127,
      "learning_rate": 0.0004520182183608772,
      "loss": 2.8932,
      "step": 76230
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.198796510696411,
      "learning_rate": 0.00045201469184714873,
      "loss": 3.2332,
      "step": 76231
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.016963481903076,
      "learning_rate": 0.0004520111653051578,
      "loss": 3.1327,
      "step": 76232
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.854950189590454,
      "learning_rate": 0.000452007638734905,
      "loss": 2.6032,
      "step": 76233
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.2333741188049316,
      "learning_rate": 0.00045200411213639136,
      "loss": 2.8756,
      "step": 76234
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.1211776733398438,
      "learning_rate": 0.0004520005855096171,
      "loss": 2.8517,
      "step": 76235
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6617259979248047,
      "learning_rate": 0.00045199705885458314,
      "loss": 3.0623,
      "step": 76236
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.61631178855896,
      "learning_rate": 0.00045199353217129,
      "loss": 3.1652,
      "step": 76237
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.309910535812378,
      "learning_rate": 0.00045199000545973844,
      "loss": 2.7822,
      "step": 76238
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.130373954772949,
      "learning_rate": 0.0004519864787199291,
      "loss": 2.8887,
      "step": 76239
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5739166736602783,
      "learning_rate": 0.0004519829519518626,
      "loss": 2.9744,
      "step": 76240
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6924257278442383,
      "learning_rate": 0.0004519794251555396,
      "loss": 2.8256,
      "step": 76241
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6203804016113281,
      "learning_rate": 0.00045197589833096064,
      "loss": 2.9197,
      "step": 76242
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0395243167877197,
      "learning_rate": 0.00045197237147812655,
      "loss": 2.8877,
      "step": 76243
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6220487356185913,
      "learning_rate": 0.00045196884459703794,
      "loss": 3.0347,
      "step": 76244
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4868336915969849,
      "learning_rate": 0.0004519653176876955,
      "loss": 3.1194,
      "step": 76245
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.524558663368225,
      "learning_rate": 0.0004519617907500998,
      "loss": 2.9113,
      "step": 76246
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5184048414230347,
      "learning_rate": 0.0004519582637842516,
      "loss": 2.9579,
      "step": 76247
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6985710859298706,
      "learning_rate": 0.0004519547367901514,
      "loss": 3.0904,
      "step": 76248
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5699492692947388,
      "learning_rate": 0.00045195120976779994,
      "loss": 3.0537,
      "step": 76249
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.3403220176696777,
      "learning_rate": 0.00045194768271719795,
      "loss": 2.9294,
      "step": 76250
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8207238912582397,
      "learning_rate": 0.000451944155638346,
      "loss": 3.0144,
      "step": 76251
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5560483932495117,
      "learning_rate": 0.00045194062853124464,
      "loss": 3.1035,
      "step": 76252
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.5816409587860107,
      "learning_rate": 0.00045193710139589485,
      "loss": 3.1598,
      "step": 76253
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.968413233757019,
      "learning_rate": 0.00045193357423229703,
      "loss": 3.0054,
      "step": 76254
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8510820865631104,
      "learning_rate": 0.00045193004704045174,
      "loss": 3.0654,
      "step": 76255
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.342806816101074,
      "learning_rate": 0.00045192651982036,
      "loss": 2.9062,
      "step": 76256
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.24333119392395,
      "learning_rate": 0.0004519229925720221,
      "loss": 3.1212,
      "step": 76257
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.80838143825531,
      "learning_rate": 0.00045191946529543885,
      "loss": 3.0937,
      "step": 76258
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1163203716278076,
      "learning_rate": 0.000451915937990611,
      "loss": 3.1162,
      "step": 76259
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0529181957244873,
      "learning_rate": 0.00045191241065753907,
      "loss": 3.1418,
      "step": 76260
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5739853382110596,
      "learning_rate": 0.0004519088832962237,
      "loss": 2.7403,
      "step": 76261
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5178414583206177,
      "learning_rate": 0.00045190535590666574,
      "loss": 2.9179,
      "step": 76262
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4888967275619507,
      "learning_rate": 0.00045190182848886556,
      "loss": 3.1425,
      "step": 76263
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7080661058425903,
      "learning_rate": 0.000451898301042824,
      "loss": 2.9672,
      "step": 76264
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7490870952606201,
      "learning_rate": 0.00045189477356854174,
      "loss": 2.8497,
      "step": 76265
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.4886856079101562,
      "learning_rate": 0.00045189124606601934,
      "loss": 3.0224,
      "step": 76266
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.081519842147827,
      "learning_rate": 0.0004518877185352575,
      "loss": 3.0799,
      "step": 76267
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.833965539932251,
      "learning_rate": 0.00045188419097625683,
      "loss": 3.0298,
      "step": 76268
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9123272895812988,
      "learning_rate": 0.000451880663389018,
      "loss": 2.7401,
      "step": 76269
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.3093457221984863,
      "learning_rate": 0.00045187713577354177,
      "loss": 2.9084,
      "step": 76270
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.5006580352783203,
      "learning_rate": 0.00045187360812982865,
      "loss": 2.9576,
      "step": 76271
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.6547188758850098,
      "learning_rate": 0.0004518700804578794,
      "loss": 3.1401,
      "step": 76272
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.59373140335083,
      "learning_rate": 0.00045186655275769464,
      "loss": 2.9739,
      "step": 76273
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.646953821182251,
      "learning_rate": 0.000451863025029275,
      "loss": 2.9125,
      "step": 76274
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.4021852016448975,
      "learning_rate": 0.0004518594972726211,
      "loss": 3.0739,
      "step": 76275
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5769702196121216,
      "learning_rate": 0.00045185596948773367,
      "loss": 2.9515,
      "step": 76276
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8471641540527344,
      "learning_rate": 0.0004518524416746135,
      "loss": 2.8246,
      "step": 76277
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.454237937927246,
      "learning_rate": 0.0004518489138332609,
      "loss": 2.9044,
      "step": 76278
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.3196871280670166,
      "learning_rate": 0.0004518453859636768,
      "loss": 3.0414,
      "step": 76279
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5847961902618408,
      "learning_rate": 0.0004518418580658619,
      "loss": 3.025,
      "step": 76280
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7912713289260864,
      "learning_rate": 0.0004518383301398165,
      "loss": 3.0059,
      "step": 76281
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9969245195388794,
      "learning_rate": 0.0004518348021855416,
      "loss": 3.1542,
      "step": 76282
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.280977725982666,
      "learning_rate": 0.00045183127420303775,
      "loss": 3.1243,
      "step": 76283
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5437268018722534,
      "learning_rate": 0.0004518277461923056,
      "loss": 3.0481,
      "step": 76284
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.860628604888916,
      "learning_rate": 0.0004518242181533458,
      "loss": 3.1784,
      "step": 76285
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0905966758728027,
      "learning_rate": 0.00045182069008615903,
      "loss": 2.9592,
      "step": 76286
    },
    {
      "epoch": 0.99,
      "grad_norm": 4.212475299835205,
      "learning_rate": 0.0004518171619907459,
      "loss": 2.8117,
      "step": 76287
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7703897953033447,
      "learning_rate": 0.00045181363386710704,
      "loss": 3.0359,
      "step": 76288
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4726568460464478,
      "learning_rate": 0.00045181010571524333,
      "loss": 3.0487,
      "step": 76289
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.659501075744629,
      "learning_rate": 0.0004518065775351551,
      "loss": 2.9959,
      "step": 76290
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.925377130508423,
      "learning_rate": 0.0004518030493268432,
      "loss": 2.7518,
      "step": 76291
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6629343032836914,
      "learning_rate": 0.00045179952109030823,
      "loss": 3.23,
      "step": 76292
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9395583868026733,
      "learning_rate": 0.0004517959928255509,
      "loss": 2.9578,
      "step": 76293
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.090423583984375,
      "learning_rate": 0.00045179246453257177,
      "loss": 3.0765,
      "step": 76294
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4742655754089355,
      "learning_rate": 0.0004517889362113717,
      "loss": 3.2636,
      "step": 76295
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.038353204727173,
      "learning_rate": 0.0004517854078619511,
      "loss": 2.9609,
      "step": 76296
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.035111427307129,
      "learning_rate": 0.0004517818794843107,
      "loss": 3.2101,
      "step": 76297
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.376425266265869,
      "learning_rate": 0.00045177835107845126,
      "loss": 2.9376,
      "step": 76298
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6158934831619263,
      "learning_rate": 0.0004517748226443733,
      "loss": 3.2332,
      "step": 76299
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.16247296333313,
      "learning_rate": 0.00045177129418207747,
      "loss": 2.7897,
      "step": 76300
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.126129388809204,
      "learning_rate": 0.0004517677656915646,
      "loss": 2.8657,
      "step": 76301
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.478235125541687,
      "learning_rate": 0.00045176423717283525,
      "loss": 2.9676,
      "step": 76302
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7338402271270752,
      "learning_rate": 0.00045176070862589,
      "loss": 3.13,
      "step": 76303
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.377065896987915,
      "learning_rate": 0.00045175718005072954,
      "loss": 3.0234,
      "step": 76304
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5722496509552002,
      "learning_rate": 0.0004517536514473547,
      "loss": 3.0898,
      "step": 76305
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6123645305633545,
      "learning_rate": 0.00045175012281576586,
      "loss": 3.1533,
      "step": 76306
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7208788394927979,
      "learning_rate": 0.00045174659415596384,
      "loss": 3.0004,
      "step": 76307
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.986315131187439,
      "learning_rate": 0.0004517430654679493,
      "loss": 3.2041,
      "step": 76308
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9110558032989502,
      "learning_rate": 0.0004517395367517228,
      "loss": 3.1909,
      "step": 76309
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8380290269851685,
      "learning_rate": 0.0004517360080072851,
      "loss": 3.0225,
      "step": 76310
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.311833143234253,
      "learning_rate": 0.0004517324792346368,
      "loss": 3.0197,
      "step": 76311
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4300235509872437,
      "learning_rate": 0.0004517289504337786,
      "loss": 3.1857,
      "step": 76312
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7523534297943115,
      "learning_rate": 0.0004517254216047111,
      "loss": 3.0162,
      "step": 76313
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5183779001235962,
      "learning_rate": 0.0004517218927474349,
      "loss": 2.8197,
      "step": 76314
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5242828130722046,
      "learning_rate": 0.00045171836386195086,
      "loss": 3.0391,
      "step": 76315
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.571273684501648,
      "learning_rate": 0.0004517148349482595,
      "loss": 2.8208,
      "step": 76316
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5392669439315796,
      "learning_rate": 0.00045171130600636135,
      "loss": 2.9503,
      "step": 76317
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6128349304199219,
      "learning_rate": 0.0004517077770362573,
      "loss": 3.0954,
      "step": 76318
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5601708889007568,
      "learning_rate": 0.00045170424803794797,
      "loss": 3.2313,
      "step": 76319
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9241031408309937,
      "learning_rate": 0.0004517007190114339,
      "loss": 2.9802,
      "step": 76320
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7298527956008911,
      "learning_rate": 0.0004516971899567157,
      "loss": 3.2563,
      "step": 76321
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5756813287734985,
      "learning_rate": 0.0004516936608737942,
      "loss": 2.9544,
      "step": 76322
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0631775856018066,
      "learning_rate": 0.0004516901317626701,
      "loss": 2.9018,
      "step": 76323
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.662939190864563,
      "learning_rate": 0.00045168660262334383,
      "loss": 3.2587,
      "step": 76324
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4412872791290283,
      "learning_rate": 0.00045168307345581617,
      "loss": 3.0064,
      "step": 76325
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4531772136688232,
      "learning_rate": 0.0004516795442600878,
      "loss": 2.6974,
      "step": 76326
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5779129266738892,
      "learning_rate": 0.00045167601503615936,
      "loss": 2.8258,
      "step": 76327
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7969015836715698,
      "learning_rate": 0.0004516724857840314,
      "loss": 2.9844,
      "step": 76328
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.919679045677185,
      "learning_rate": 0.00045166895650370476,
      "loss": 2.9578,
      "step": 76329
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4947569370269775,
      "learning_rate": 0.0004516654271951799,
      "loss": 3.068,
      "step": 76330
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.578599214553833,
      "learning_rate": 0.0004516618978584576,
      "loss": 3.0069,
      "step": 76331
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.979594111442566,
      "learning_rate": 0.0004516583684935386,
      "loss": 2.8723,
      "step": 76332
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6156620979309082,
      "learning_rate": 0.00045165483910042327,
      "loss": 3.3026,
      "step": 76333
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8650611639022827,
      "learning_rate": 0.00045165130967911256,
      "loss": 2.7083,
      "step": 76334
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.075568675994873,
      "learning_rate": 0.000451647780229607,
      "loss": 2.8283,
      "step": 76335
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.141000747680664,
      "learning_rate": 0.00045164425075190715,
      "loss": 3.1553,
      "step": 76336
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6104170083999634,
      "learning_rate": 0.0004516407212460139,
      "loss": 3.061,
      "step": 76337
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.437557339668274,
      "learning_rate": 0.00045163719171192776,
      "loss": 3.215,
      "step": 76338
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.5953006744384766,
      "learning_rate": 0.00045163366214964935,
      "loss": 3.2913,
      "step": 76339
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9287655353546143,
      "learning_rate": 0.0004516301325591794,
      "loss": 2.961,
      "step": 76340
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.585188388824463,
      "learning_rate": 0.0004516266029405186,
      "loss": 3.1695,
      "step": 76341
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.1712727546691895,
      "learning_rate": 0.0004516230732936675,
      "loss": 3.1374,
      "step": 76342
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.3211071491241455,
      "learning_rate": 0.00045161954361862673,
      "loss": 3.0884,
      "step": 76343
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7892823219299316,
      "learning_rate": 0.0004516160139153972,
      "loss": 2.9546,
      "step": 76344
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.1928157806396484,
      "learning_rate": 0.0004516124841839793,
      "loss": 3.224,
      "step": 76345
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.059103012084961,
      "learning_rate": 0.0004516089544243737,
      "loss": 3.1946,
      "step": 76346
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.3147945404052734,
      "learning_rate": 0.00045160542463658127,
      "loss": 2.5674,
      "step": 76347
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.911165714263916,
      "learning_rate": 0.0004516018948206025,
      "loss": 3.1694,
      "step": 76348
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7746793031692505,
      "learning_rate": 0.00045159836497643796,
      "loss": 3.1233,
      "step": 76349
    },
    {
      "epoch": 0.99,
      "grad_norm": 3.6866519451141357,
      "learning_rate": 0.00045159483510408857,
      "loss": 3.0896,
      "step": 76350
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.411402463912964,
      "learning_rate": 0.00045159130520355475,
      "loss": 3.1835,
      "step": 76351
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8069688081741333,
      "learning_rate": 0.0004515877752748372,
      "loss": 3.1999,
      "step": 76352
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9488664865493774,
      "learning_rate": 0.0004515842453179368,
      "loss": 2.8567,
      "step": 76353
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8539483547210693,
      "learning_rate": 0.00045158071533285386,
      "loss": 3.2042,
      "step": 76354
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.117621898651123,
      "learning_rate": 0.00045157718531958925,
      "loss": 3.1712,
      "step": 76355
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5635687112808228,
      "learning_rate": 0.00045157365527814363,
      "loss": 2.9758,
      "step": 76356
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.683663845062256,
      "learning_rate": 0.00045157012520851753,
      "loss": 2.9893,
      "step": 76357
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.504885673522949,
      "learning_rate": 0.0004515665951107117,
      "loss": 2.8982,
      "step": 76358
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.535788655281067,
      "learning_rate": 0.0004515630649847269,
      "loss": 3.15,
      "step": 76359
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5291268825531006,
      "learning_rate": 0.0004515595348305635,
      "loss": 3.187,
      "step": 76360
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5727436542510986,
      "learning_rate": 0.0004515560046482224,
      "loss": 2.7834,
      "step": 76361
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.854756474494934,
      "learning_rate": 0.0004515524744377042,
      "loss": 2.9803,
      "step": 76362
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4343416690826416,
      "learning_rate": 0.00045154894419900947,
      "loss": 3.1828,
      "step": 76363
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.934677243232727,
      "learning_rate": 0.0004515454139321389,
      "loss": 3.1263,
      "step": 76364
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8340262174606323,
      "learning_rate": 0.0004515418836370933,
      "loss": 3.0153,
      "step": 76365
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.652441382408142,
      "learning_rate": 0.00045153835331387314,
      "loss": 3.0451,
      "step": 76366
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7480098009109497,
      "learning_rate": 0.0004515348229624791,
      "loss": 2.8945,
      "step": 76367
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6320254802703857,
      "learning_rate": 0.0004515312925829119,
      "loss": 3.0819,
      "step": 76368
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.064286947250366,
      "learning_rate": 0.0004515277621751723,
      "loss": 2.9584,
      "step": 76369
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8130981922149658,
      "learning_rate": 0.00045152423173926067,
      "loss": 3.0865,
      "step": 76370
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.72994065284729,
      "learning_rate": 0.00045152070127517786,
      "loss": 3.0989,
      "step": 76371
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7902594804763794,
      "learning_rate": 0.0004515171707829245,
      "loss": 3.0281,
      "step": 76372
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6365729570388794,
      "learning_rate": 0.00045151364026250123,
      "loss": 2.933,
      "step": 76373
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5635883808135986,
      "learning_rate": 0.00045151010971390866,
      "loss": 2.8534,
      "step": 76374
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5378440618515015,
      "learning_rate": 0.0004515065791371477,
      "loss": 3.095,
      "step": 76375
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.952828049659729,
      "learning_rate": 0.00045150304853221857,
      "loss": 3.0146,
      "step": 76376
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.609752893447876,
      "learning_rate": 0.0004514995178991223,
      "loss": 2.9093,
      "step": 76377
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8921457529067993,
      "learning_rate": 0.00045149598723785933,
      "loss": 2.964,
      "step": 76378
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4860386848449707,
      "learning_rate": 0.00045149245654843047,
      "loss": 2.945,
      "step": 76379
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.9335075616836548,
      "learning_rate": 0.00045148892583083626,
      "loss": 3.121,
      "step": 76380
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4821990728378296,
      "learning_rate": 0.0004514853950850774,
      "loss": 2.9286,
      "step": 76381
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5375312566757202,
      "learning_rate": 0.0004514818643111546,
      "loss": 3.1625,
      "step": 76382
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.0843608379364014,
      "learning_rate": 0.0004514783335090684,
      "loss": 3.1431,
      "step": 76383
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6385071277618408,
      "learning_rate": 0.00045147480267881956,
      "loss": 2.843,
      "step": 76384
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8811230659484863,
      "learning_rate": 0.00045147127182040874,
      "loss": 3.1508,
      "step": 76385
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5514696836471558,
      "learning_rate": 0.0004514677409338364,
      "loss": 3.0621,
      "step": 76386
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.5799386501312256,
      "learning_rate": 0.00045146421001910345,
      "loss": 3.0957,
      "step": 76387
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5893489122390747,
      "learning_rate": 0.0004514606790762104,
      "loss": 2.9469,
      "step": 76388
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.524456262588501,
      "learning_rate": 0.00045145714810515796,
      "loss": 3.3713,
      "step": 76389
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.693684697151184,
      "learning_rate": 0.0004514536171059469,
      "loss": 2.8121,
      "step": 76390
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.571424126625061,
      "learning_rate": 0.0004514500860785776,
      "loss": 3.0682,
      "step": 76391
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5363856554031372,
      "learning_rate": 0.0004514465550230509,
      "loss": 3.0042,
      "step": 76392
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6873204708099365,
      "learning_rate": 0.0004514430239393675,
      "loss": 2.6861,
      "step": 76393
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6032798290252686,
      "learning_rate": 0.0004514394928275279,
      "loss": 2.9379,
      "step": 76394
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6941535472869873,
      "learning_rate": 0.00045143596168753286,
      "loss": 2.9902,
      "step": 76395
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5173420906066895,
      "learning_rate": 0.0004514324305193831,
      "loss": 2.94,
      "step": 76396
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7825117111206055,
      "learning_rate": 0.0004514288993230791,
      "loss": 3.1231,
      "step": 76397
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.4753177165985107,
      "learning_rate": 0.00045142536809862155,
      "loss": 3.0798,
      "step": 76398
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6705073118209839,
      "learning_rate": 0.00045142183684601124,
      "loss": 3.0165,
      "step": 76399
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6940803527832031,
      "learning_rate": 0.00045141830556524876,
      "loss": 3.1894,
      "step": 76400
    },
    {
      "epoch": 0.99,
      "grad_norm": 2.062011480331421,
      "learning_rate": 0.00045141477425633474,
      "loss": 3.0325,
      "step": 76401
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7864116430282593,
      "learning_rate": 0.0004514112429192699,
      "loss": 3.0899,
      "step": 76402
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5703065395355225,
      "learning_rate": 0.00045140771155405474,
      "loss": 2.9954,
      "step": 76403
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7003753185272217,
      "learning_rate": 0.0004514041801606902,
      "loss": 2.8094,
      "step": 76404
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5524747371673584,
      "learning_rate": 0.0004514006487391766,
      "loss": 3.1149,
      "step": 76405
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6017800569534302,
      "learning_rate": 0.0004513971172895148,
      "loss": 3.044,
      "step": 76406
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.6464427709579468,
      "learning_rate": 0.0004513935858117056,
      "loss": 3.0736,
      "step": 76407
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.406213641166687,
      "learning_rate": 0.0004513900543057492,
      "loss": 2.8864,
      "step": 76408
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.4503470659255981,
      "learning_rate": 0.0004513865227716467,
      "loss": 3.0694,
      "step": 76409
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8680588006973267,
      "learning_rate": 0.00045138299120939853,
      "loss": 2.8904,
      "step": 76410
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7225682735443115,
      "learning_rate": 0.00045137945961900547,
      "loss": 2.7215,
      "step": 76411
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7771464586257935,
      "learning_rate": 0.000451375928000468,
      "loss": 3.3322,
      "step": 76412
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8936238288879395,
      "learning_rate": 0.00045137239635378694,
      "loss": 3.0853,
      "step": 76413
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5590633153915405,
      "learning_rate": 0.000451368864678963,
      "loss": 2.9704,
      "step": 76414
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.8544468879699707,
      "learning_rate": 0.0004513653329759966,
      "loss": 3.2591,
      "step": 76415
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.5253369808197021,
      "learning_rate": 0.00045136180124488853,
      "loss": 3.1457,
      "step": 76416
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5959826707839966,
      "learning_rate": 0.0004513582694856395,
      "loss": 3.2379,
      "step": 76417
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6963826417922974,
      "learning_rate": 0.00045135473769825014,
      "loss": 2.8005,
      "step": 76418
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1103460788726807,
      "learning_rate": 0.000451351205882721,
      "loss": 2.9732,
      "step": 76419
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8448807001113892,
      "learning_rate": 0.0004513476740390528,
      "loss": 3.0296,
      "step": 76420
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.802764892578125,
      "learning_rate": 0.0004513441421672462,
      "loss": 3.2167,
      "step": 76421
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8071844577789307,
      "learning_rate": 0.00045134061026730197,
      "loss": 3.2853,
      "step": 76422
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5465176105499268,
      "learning_rate": 0.00045133707833922064,
      "loss": 2.9976,
      "step": 76423
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7690248489379883,
      "learning_rate": 0.0004513335463830028,
      "loss": 3.0459,
      "step": 76424
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.030531644821167,
      "learning_rate": 0.0004513300143986492,
      "loss": 3.0057,
      "step": 76425
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8972384929656982,
      "learning_rate": 0.0004513264823861606,
      "loss": 2.999,
      "step": 76426
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.553919553756714,
      "learning_rate": 0.00045132295034553755,
      "loss": 2.7709,
      "step": 76427
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6336523294448853,
      "learning_rate": 0.0004513194182767806,
      "loss": 3.0584,
      "step": 76428
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6251884698867798,
      "learning_rate": 0.00045131588617989063,
      "loss": 2.8843,
      "step": 76429
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5339981317520142,
      "learning_rate": 0.0004513123540548681,
      "loss": 2.9794,
      "step": 76430
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1453359127044678,
      "learning_rate": 0.00045130882190171377,
      "loss": 3.0411,
      "step": 76431
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8575420379638672,
      "learning_rate": 0.0004513052897204283,
      "loss": 2.9375,
      "step": 76432
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7817902565002441,
      "learning_rate": 0.0004513017575110123,
      "loss": 2.8767,
      "step": 76433
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7537394762039185,
      "learning_rate": 0.00045129822527346637,
      "loss": 2.9336,
      "step": 76434
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.13919734954834,
      "learning_rate": 0.0004512946930077913,
      "loss": 3.1872,
      "step": 76435
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7626941204071045,
      "learning_rate": 0.00045129116071398773,
      "loss": 3.0224,
      "step": 76436
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.3570547103881836,
      "learning_rate": 0.00045128762839205625,
      "loss": 3.0741,
      "step": 76437
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.088353395462036,
      "learning_rate": 0.0004512840960419975,
      "loss": 2.9173,
      "step": 76438
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6198457479476929,
      "learning_rate": 0.0004512805636638123,
      "loss": 2.9432,
      "step": 76439
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3927077054977417,
      "learning_rate": 0.0004512770312575011,
      "loss": 3.1756,
      "step": 76440
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.653874158859253,
      "learning_rate": 0.00045127349882306464,
      "loss": 3.0782,
      "step": 76441
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8302334547042847,
      "learning_rate": 0.00045126996636050363,
      "loss": 3.006,
      "step": 76442
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.156456232070923,
      "learning_rate": 0.0004512664338698187,
      "loss": 3.0015,
      "step": 76443
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.2404768466949463,
      "learning_rate": 0.00045126290135101034,
      "loss": 3.1136,
      "step": 76444
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4130303859710693,
      "learning_rate": 0.0004512593688040795,
      "loss": 3.0995,
      "step": 76445
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.866516351699829,
      "learning_rate": 0.00045125583622902663,
      "loss": 3.082,
      "step": 76446
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6427370309829712,
      "learning_rate": 0.00045125230362585246,
      "loss": 2.9777,
      "step": 76447
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8773949146270752,
      "learning_rate": 0.00045124877099455765,
      "loss": 3.0257,
      "step": 76448
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7281368970870972,
      "learning_rate": 0.0004512452383351428,
      "loss": 3.1473,
      "step": 76449
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.058924674987793,
      "learning_rate": 0.00045124170564760855,
      "loss": 2.8025,
      "step": 76450
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6854138374328613,
      "learning_rate": 0.0004512381729319558,
      "loss": 3.1202,
      "step": 76451
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.737621545791626,
      "learning_rate": 0.00045123464018818475,
      "loss": 2.9518,
      "step": 76452
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4724334478378296,
      "learning_rate": 0.00045123110741629647,
      "loss": 3.2071,
      "step": 76453
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.742034673690796,
      "learning_rate": 0.00045122757461629157,
      "loss": 3.0499,
      "step": 76454
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.346095561981201,
      "learning_rate": 0.0004512240417881705,
      "loss": 2.8841,
      "step": 76455
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.953338623046875,
      "learning_rate": 0.00045122050893193404,
      "loss": 3.0949,
      "step": 76456
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9623918533325195,
      "learning_rate": 0.0004512169760475828,
      "loss": 3.0569,
      "step": 76457
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7209599018096924,
      "learning_rate": 0.00045121344313511747,
      "loss": 2.9315,
      "step": 76458
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5484747886657715,
      "learning_rate": 0.00045120991019453877,
      "loss": 2.9832,
      "step": 76459
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.405053973197937,
      "learning_rate": 0.00045120637722584724,
      "loss": 3.1547,
      "step": 76460
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4774713516235352,
      "learning_rate": 0.0004512028442290436,
      "loss": 3.0565,
      "step": 76461
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5249305963516235,
      "learning_rate": 0.00045119931120412855,
      "loss": 3.0434,
      "step": 76462
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8029221296310425,
      "learning_rate": 0.0004511957781511027,
      "loss": 2.7235,
      "step": 76463
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7961275577545166,
      "learning_rate": 0.00045119224506996665,
      "loss": 2.8383,
      "step": 76464
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4209363460540771,
      "learning_rate": 0.00045118871196072105,
      "loss": 3.0034,
      "step": 76465
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7505664825439453,
      "learning_rate": 0.00045118517882336665,
      "loss": 2.7522,
      "step": 76466
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4187507629394531,
      "learning_rate": 0.00045118164565790414,
      "loss": 3.0257,
      "step": 76467
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0110931396484375,
      "learning_rate": 0.00045117811246433407,
      "loss": 3.1668,
      "step": 76468
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8155745267868042,
      "learning_rate": 0.0004511745792426571,
      "loss": 3.0596,
      "step": 76469
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3890352249145508,
      "learning_rate": 0.00045117104599287395,
      "loss": 3.004,
      "step": 76470
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0109052658081055,
      "learning_rate": 0.00045116751271498525,
      "loss": 2.8691,
      "step": 76471
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.552933931350708,
      "learning_rate": 0.0004511639794089916,
      "loss": 3.0614,
      "step": 76472
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.6990771293640137,
      "learning_rate": 0.0004511604460748938,
      "loss": 3.013,
      "step": 76473
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.951339602470398,
      "learning_rate": 0.00045115691271269245,
      "loss": 3.0253,
      "step": 76474
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6544616222381592,
      "learning_rate": 0.0004511533793223881,
      "loss": 3.1954,
      "step": 76475
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4386237859725952,
      "learning_rate": 0.0004511498459039815,
      "loss": 3.1897,
      "step": 76476
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7319062948226929,
      "learning_rate": 0.00045114631245747323,
      "loss": 2.9698,
      "step": 76477
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4730339050292969,
      "learning_rate": 0.00045114277898286413,
      "loss": 2.8692,
      "step": 76478
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6844884157180786,
      "learning_rate": 0.00045113924548015463,
      "loss": 3.103,
      "step": 76479
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9407113790512085,
      "learning_rate": 0.00045113571194934553,
      "loss": 2.909,
      "step": 76480
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3548223972320557,
      "learning_rate": 0.00045113217839043754,
      "loss": 3.1082,
      "step": 76481
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9783878326416016,
      "learning_rate": 0.00045112864480343105,
      "loss": 3.0475,
      "step": 76482
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.699880599975586,
      "learning_rate": 0.00045112511118832697,
      "loss": 3.1203,
      "step": 76483
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7322115898132324,
      "learning_rate": 0.00045112157754512584,
      "loss": 3.0943,
      "step": 76484
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.435706853866577,
      "learning_rate": 0.0004511180438738285,
      "loss": 3.1473,
      "step": 76485
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6085103750228882,
      "learning_rate": 0.0004511145101744353,
      "loss": 3.1691,
      "step": 76486
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5853040218353271,
      "learning_rate": 0.0004511109764469471,
      "loss": 2.9919,
      "step": 76487
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8386201858520508,
      "learning_rate": 0.00045110744269136465,
      "loss": 3.0352,
      "step": 76488
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8856778144836426,
      "learning_rate": 0.0004511039089076883,
      "loss": 3.1316,
      "step": 76489
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6781717538833618,
      "learning_rate": 0.00045110037509591897,
      "loss": 2.997,
      "step": 76490
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6206490993499756,
      "learning_rate": 0.00045109684125605717,
      "loss": 2.9052,
      "step": 76491
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.580841541290283,
      "learning_rate": 0.0004510933073881037,
      "loss": 3.211,
      "step": 76492
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6712135076522827,
      "learning_rate": 0.00045108977349205904,
      "loss": 2.9695,
      "step": 76493
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.4460320472717285,
      "learning_rate": 0.000451086239567924,
      "loss": 3.0644,
      "step": 76494
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.738041639328003,
      "learning_rate": 0.00045108270561569906,
      "loss": 2.7531,
      "step": 76495
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.5028553009033203,
      "learning_rate": 0.0004510791716353852,
      "loss": 3.0345,
      "step": 76496
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.915635108947754,
      "learning_rate": 0.00045107563762698274,
      "loss": 3.0274,
      "step": 76497
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7765612602233887,
      "learning_rate": 0.0004510721035904924,
      "loss": 3.3091,
      "step": 76498
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8329288959503174,
      "learning_rate": 0.00045106856952591514,
      "loss": 2.9834,
      "step": 76499
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.827033758163452,
      "learning_rate": 0.0004510650354332511,
      "loss": 2.903,
      "step": 76500
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7001625299453735,
      "learning_rate": 0.0004510615013125014,
      "loss": 3.0841,
      "step": 76501
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.5728182792663574,
      "learning_rate": 0.00045105796716366647,
      "loss": 2.761,
      "step": 76502
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.077671527862549,
      "learning_rate": 0.00045105443298674704,
      "loss": 2.888,
      "step": 76503
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6192926168441772,
      "learning_rate": 0.0004510508987817437,
      "loss": 2.9569,
      "step": 76504
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5145119428634644,
      "learning_rate": 0.00045104736454865715,
      "loss": 3.0339,
      "step": 76505
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8166086673736572,
      "learning_rate": 0.0004510438302874881,
      "loss": 2.9729,
      "step": 76506
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5022401809692383,
      "learning_rate": 0.00045104029599823703,
      "loss": 2.9713,
      "step": 76507
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.477738618850708,
      "learning_rate": 0.00045103676168090475,
      "loss": 2.859,
      "step": 76508
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6176960468292236,
      "learning_rate": 0.00045103322733549195,
      "loss": 3.1224,
      "step": 76509
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6698321104049683,
      "learning_rate": 0.00045102969296199923,
      "loss": 2.7857,
      "step": 76510
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6331219673156738,
      "learning_rate": 0.0004510261585604272,
      "loss": 3.2804,
      "step": 76511
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5579142570495605,
      "learning_rate": 0.0004510226241307765,
      "loss": 2.8425,
      "step": 76512
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8975048065185547,
      "learning_rate": 0.00045101908967304794,
      "loss": 2.8936,
      "step": 76513
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.927573800086975,
      "learning_rate": 0.00045101555518724197,
      "loss": 2.9613,
      "step": 76514
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6586312055587769,
      "learning_rate": 0.00045101202067335955,
      "loss": 2.9325,
      "step": 76515
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5521570444107056,
      "learning_rate": 0.00045100848613140106,
      "loss": 2.761,
      "step": 76516
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6555922031402588,
      "learning_rate": 0.00045100495156136716,
      "loss": 3.2473,
      "step": 76517
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6197913885116577,
      "learning_rate": 0.00045100141696325865,
      "loss": 2.9946,
      "step": 76518
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6386407613754272,
      "learning_rate": 0.0004509978823370761,
      "loss": 3.2704,
      "step": 76519
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.391431212425232,
      "learning_rate": 0.0004509943476828202,
      "loss": 3.0326,
      "step": 76520
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7408323287963867,
      "learning_rate": 0.00045099081300049165,
      "loss": 3.0896,
      "step": 76521
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4752839803695679,
      "learning_rate": 0.00045098727829009103,
      "loss": 3.0694,
      "step": 76522
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.3671960830688477,
      "learning_rate": 0.00045098374355161895,
      "loss": 2.9797,
      "step": 76523
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8430284261703491,
      "learning_rate": 0.00045098020878507627,
      "loss": 2.9154,
      "step": 76524
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5209875106811523,
      "learning_rate": 0.0004509766739904634,
      "loss": 3.1029,
      "step": 76525
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6056501865386963,
      "learning_rate": 0.0004509731391677811,
      "loss": 3.054,
      "step": 76526
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5419520139694214,
      "learning_rate": 0.0004509696043170302,
      "loss": 3.1798,
      "step": 76527
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7210882902145386,
      "learning_rate": 0.00045096606943821106,
      "loss": 3.0316,
      "step": 76528
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.2998433113098145,
      "learning_rate": 0.00045096253453132454,
      "loss": 3.1296,
      "step": 76529
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1439318656921387,
      "learning_rate": 0.0004509589995963712,
      "loss": 2.9997,
      "step": 76530
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7865070104599,
      "learning_rate": 0.0004509554646333518,
      "loss": 2.839,
      "step": 76531
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4970247745513916,
      "learning_rate": 0.0004509519296422669,
      "loss": 2.8675,
      "step": 76532
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5510149002075195,
      "learning_rate": 0.0004509483946231172,
      "loss": 3.2067,
      "step": 76533
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.822708249092102,
      "learning_rate": 0.0004509448595759033,
      "loss": 2.9874,
      "step": 76534
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.54899263381958,
      "learning_rate": 0.0004509413245006259,
      "loss": 3.347,
      "step": 76535
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1166903972625732,
      "learning_rate": 0.0004509377893972857,
      "loss": 3.1465,
      "step": 76536
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7563586235046387,
      "learning_rate": 0.0004509342542658833,
      "loss": 2.9151,
      "step": 76537
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8462432622909546,
      "learning_rate": 0.0004509307191064193,
      "loss": 2.9206,
      "step": 76538
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.783334732055664,
      "learning_rate": 0.00045092718391889455,
      "loss": 2.9754,
      "step": 76539
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8785864114761353,
      "learning_rate": 0.00045092364870330954,
      "loss": 3.0184,
      "step": 76540
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.517685890197754,
      "learning_rate": 0.000450920113459665,
      "loss": 3.2886,
      "step": 76541
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9149597883224487,
      "learning_rate": 0.00045091657818796153,
      "loss": 3.0163,
      "step": 76542
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0450711250305176,
      "learning_rate": 0.0004509130428881998,
      "loss": 3.0137,
      "step": 76543
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9679490327835083,
      "learning_rate": 0.0004509095075603805,
      "loss": 3.1086,
      "step": 76544
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.156470537185669,
      "learning_rate": 0.0004509059722045043,
      "loss": 2.9502,
      "step": 76545
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.8125460147857666,
      "learning_rate": 0.00045090243682057183,
      "loss": 3.0761,
      "step": 76546
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9761296510696411,
      "learning_rate": 0.0004508989014085837,
      "loss": 2.8908,
      "step": 76547
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.658068060874939,
      "learning_rate": 0.0004508953659685407,
      "loss": 2.792,
      "step": 76548
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7140239477157593,
      "learning_rate": 0.0004508918305004433,
      "loss": 3.0428,
      "step": 76549
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.617393732070923,
      "learning_rate": 0.0004508882950042923,
      "loss": 3.0394,
      "step": 76550
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.678532361984253,
      "learning_rate": 0.0004508847594800884,
      "loss": 3.0374,
      "step": 76551
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.961455225944519,
      "learning_rate": 0.0004508812239278321,
      "loss": 2.9957,
      "step": 76552
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.628619909286499,
      "learning_rate": 0.0004508776883475241,
      "loss": 2.8371,
      "step": 76553
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7581933736801147,
      "learning_rate": 0.00045087415273916515,
      "loss": 3.1485,
      "step": 76554
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7465602159500122,
      "learning_rate": 0.00045087061710275575,
      "loss": 2.8012,
      "step": 76555
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.7439725399017334,
      "learning_rate": 0.00045086708143829673,
      "loss": 3.1044,
      "step": 76556
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8895968198776245,
      "learning_rate": 0.0004508635457457887,
      "loss": 2.9335,
      "step": 76557
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7123397588729858,
      "learning_rate": 0.00045086001002523225,
      "loss": 2.9664,
      "step": 76558
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.729006290435791,
      "learning_rate": 0.0004508564742766281,
      "loss": 2.8795,
      "step": 76559
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7359533309936523,
      "learning_rate": 0.00045085293849997686,
      "loss": 3.1072,
      "step": 76560
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.084449291229248,
      "learning_rate": 0.00045084940269527927,
      "loss": 2.7525,
      "step": 76561
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7958766222000122,
      "learning_rate": 0.00045084586686253587,
      "loss": 2.9695,
      "step": 76562
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7653868198394775,
      "learning_rate": 0.00045084233100174735,
      "loss": 3.0818,
      "step": 76563
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.372422695159912,
      "learning_rate": 0.00045083879511291443,
      "loss": 3.077,
      "step": 76564
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.743787169456482,
      "learning_rate": 0.00045083525919603775,
      "loss": 2.8165,
      "step": 76565
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7517058849334717,
      "learning_rate": 0.0004508317232511179,
      "loss": 3.064,
      "step": 76566
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.954739570617676,
      "learning_rate": 0.0004508281872781556,
      "loss": 3.0034,
      "step": 76567
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7124029397964478,
      "learning_rate": 0.00045082465127715157,
      "loss": 3.0794,
      "step": 76568
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4731066226959229,
      "learning_rate": 0.00045082111524810626,
      "loss": 3.1146,
      "step": 76569
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5027753114700317,
      "learning_rate": 0.0004508175791910206,
      "loss": 3.0029,
      "step": 76570
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.633031964302063,
      "learning_rate": 0.00045081404310589495,
      "loss": 3.0519,
      "step": 76571
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4783083200454712,
      "learning_rate": 0.0004508105069927302,
      "loss": 3.084,
      "step": 76572
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9116246700286865,
      "learning_rate": 0.00045080697085152693,
      "loss": 2.8916,
      "step": 76573
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.629298448562622,
      "learning_rate": 0.00045080343468228577,
      "loss": 3.2654,
      "step": 76574
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5613081455230713,
      "learning_rate": 0.00045079989848500747,
      "loss": 3.0092,
      "step": 76575
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8126145601272583,
      "learning_rate": 0.00045079636225969257,
      "loss": 3.0792,
      "step": 76576
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.4141266345977783,
      "learning_rate": 0.0004507928260063418,
      "loss": 3.001,
      "step": 76577
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0193734169006348,
      "learning_rate": 0.00045078928972495583,
      "loss": 2.8211,
      "step": 76578
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7622032165527344,
      "learning_rate": 0.0004507857534155352,
      "loss": 3.0053,
      "step": 76579
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.3467817306518555,
      "learning_rate": 0.0004507822170780807,
      "loss": 2.9747,
      "step": 76580
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8880449533462524,
      "learning_rate": 0.00045077868071259294,
      "loss": 2.946,
      "step": 76581
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8775525093078613,
      "learning_rate": 0.00045077514431907253,
      "loss": 3.0925,
      "step": 76582
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7818963527679443,
      "learning_rate": 0.0004507716078975203,
      "loss": 3.1096,
      "step": 76583
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.698704957962036,
      "learning_rate": 0.0004507680714479367,
      "loss": 2.9918,
      "step": 76584
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6746017932891846,
      "learning_rate": 0.00045076453497032247,
      "loss": 3.111,
      "step": 76585
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5598726272583008,
      "learning_rate": 0.0004507609984646782,
      "loss": 2.9423,
      "step": 76586
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.02262282371521,
      "learning_rate": 0.00045075746193100477,
      "loss": 2.7574,
      "step": 76587
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.941319465637207,
      "learning_rate": 0.00045075392536930253,
      "loss": 2.9759,
      "step": 76588
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.686169147491455,
      "learning_rate": 0.00045075038877957237,
      "loss": 3.0963,
      "step": 76589
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.181648015975952,
      "learning_rate": 0.0004507468521618148,
      "loss": 3.1323,
      "step": 76590
    },
    {
      "epoch": 1.0,
      "grad_norm": 4.354729652404785,
      "learning_rate": 0.00045074331551603065,
      "loss": 3.1979,
      "step": 76591
    },
    {
      "epoch": 1.0,
      "grad_norm": 4.216864585876465,
      "learning_rate": 0.00045073977884222045,
      "loss": 2.9606,
      "step": 76592
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4895061254501343,
      "learning_rate": 0.0004507362421403848,
      "loss": 2.8662,
      "step": 76593
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7303544282913208,
      "learning_rate": 0.0004507327054105245,
      "loss": 3.0381,
      "step": 76594
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8084328174591064,
      "learning_rate": 0.0004507291686526401,
      "loss": 3.1153,
      "step": 76595
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.372058391571045,
      "learning_rate": 0.0004507256318667323,
      "loss": 2.9892,
      "step": 76596
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5542945861816406,
      "learning_rate": 0.0004507220950528019,
      "loss": 3.1556,
      "step": 76597
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7855287790298462,
      "learning_rate": 0.00045071855821084925,
      "loss": 2.9357,
      "step": 76598
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9999960660934448,
      "learning_rate": 0.00045071502134087527,
      "loss": 2.9385,
      "step": 76599
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5325957536697388,
      "learning_rate": 0.0004507114844428805,
      "loss": 3.1267,
      "step": 76600
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2961351871490479,
      "learning_rate": 0.00045070794751686554,
      "loss": 2.9235,
      "step": 76601
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8635773658752441,
      "learning_rate": 0.00045070441056283125,
      "loss": 2.7878,
      "step": 76602
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4755476713180542,
      "learning_rate": 0.00045070087358077814,
      "loss": 3.0427,
      "step": 76603
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5531599521636963,
      "learning_rate": 0.0004506973365707068,
      "loss": 3.1961,
      "step": 76604
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.639807939529419,
      "learning_rate": 0.00045069379953261807,
      "loss": 3.0543,
      "step": 76605
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7721058130264282,
      "learning_rate": 0.00045069026246651246,
      "loss": 3.1049,
      "step": 76606
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5152066946029663,
      "learning_rate": 0.00045068672537239073,
      "loss": 2.9127,
      "step": 76607
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.118584632873535,
      "learning_rate": 0.0004506831882502535,
      "loss": 3.0211,
      "step": 76608
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5556281805038452,
      "learning_rate": 0.00045067965110010146,
      "loss": 3.0689,
      "step": 76609
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5658499002456665,
      "learning_rate": 0.0004506761139219351,
      "loss": 2.9777,
      "step": 76610
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.189068555831909,
      "learning_rate": 0.00045067257671575523,
      "loss": 3.1466,
      "step": 76611
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5652408599853516,
      "learning_rate": 0.0004506690394815626,
      "loss": 2.9259,
      "step": 76612
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5976431369781494,
      "learning_rate": 0.0004506655022193577,
      "loss": 3.1132,
      "step": 76613
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9557479619979858,
      "learning_rate": 0.00045066196492914113,
      "loss": 3.2418,
      "step": 76614
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5086135864257812,
      "learning_rate": 0.0004506584276109139,
      "loss": 2.9372,
      "step": 76615
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5226058959960938,
      "learning_rate": 0.00045065489026467625,
      "loss": 3.2212,
      "step": 76616
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.60182785987854,
      "learning_rate": 0.00045065135289042895,
      "loss": 3.2245,
      "step": 76617
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7064818143844604,
      "learning_rate": 0.00045064781548817285,
      "loss": 3.1063,
      "step": 76618
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4175795316696167,
      "learning_rate": 0.0004506442780579084,
      "loss": 2.9279,
      "step": 76619
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.816986083984375,
      "learning_rate": 0.0004506407405996364,
      "loss": 3.095,
      "step": 76620
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.640436053276062,
      "learning_rate": 0.0004506372031133574,
      "loss": 3.0575,
      "step": 76621
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3277168273925781,
      "learning_rate": 0.0004506336655990721,
      "loss": 2.783,
      "step": 76622
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6097302436828613,
      "learning_rate": 0.00045063012805678115,
      "loss": 3.1053,
      "step": 76623
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8551709651947021,
      "learning_rate": 0.00045062659048648526,
      "loss": 2.9729,
      "step": 76624
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7750458717346191,
      "learning_rate": 0.00045062305288818503,
      "loss": 3.1172,
      "step": 76625
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7231509685516357,
      "learning_rate": 0.00045061951526188104,
      "loss": 3.0867,
      "step": 76626
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1438846588134766,
      "learning_rate": 0.00045061597760757423,
      "loss": 3.0396,
      "step": 76627
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6648244857788086,
      "learning_rate": 0.0004506124399252649,
      "loss": 3.1591,
      "step": 76628
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6762105226516724,
      "learning_rate": 0.0004506089022149539,
      "loss": 3.0913,
      "step": 76629
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6866942644119263,
      "learning_rate": 0.000450605364476642,
      "loss": 2.8478,
      "step": 76630
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.39182710647583,
      "learning_rate": 0.00045060182671032955,
      "loss": 3.445,
      "step": 76631
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.062575101852417,
      "learning_rate": 0.0004505982889160174,
      "loss": 2.9462,
      "step": 76632
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.2679407596588135,
      "learning_rate": 0.0004505947510937063,
      "loss": 2.9751,
      "step": 76633
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1896886825561523,
      "learning_rate": 0.00045059121324339665,
      "loss": 2.895,
      "step": 76634
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.033720016479492,
      "learning_rate": 0.00045058767536508933,
      "loss": 2.9378,
      "step": 76635
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.52388596534729,
      "learning_rate": 0.0004505841374587849,
      "loss": 3.0944,
      "step": 76636
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6533087491989136,
      "learning_rate": 0.0004505805995244841,
      "loss": 3.0009,
      "step": 76637
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.513796329498291,
      "learning_rate": 0.00045057706156218744,
      "loss": 2.9292,
      "step": 76638
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6531420946121216,
      "learning_rate": 0.0004505735235718957,
      "loss": 2.8527,
      "step": 76639
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6490864753723145,
      "learning_rate": 0.0004505699855536095,
      "loss": 2.9677,
      "step": 76640
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.359023332595825,
      "learning_rate": 0.0004505664475073294,
      "loss": 3.2963,
      "step": 76641
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0261759757995605,
      "learning_rate": 0.00045056290943305626,
      "loss": 2.9595,
      "step": 76642
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7422300577163696,
      "learning_rate": 0.00045055937133079067,
      "loss": 2.9985,
      "step": 76643
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.2779533863067627,
      "learning_rate": 0.00045055583320053314,
      "loss": 2.9017,
      "step": 76644
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.4794459342956543,
      "learning_rate": 0.00045055229504228445,
      "loss": 3.0399,
      "step": 76645
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6607356071472168,
      "learning_rate": 0.0004505487568560454,
      "loss": 2.9652,
      "step": 76646
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4775264263153076,
      "learning_rate": 0.0004505452186418163,
      "loss": 2.9746,
      "step": 76647
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8072530031204224,
      "learning_rate": 0.0004505416803995981,
      "loss": 3.1728,
      "step": 76648
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7657159566879272,
      "learning_rate": 0.00045053814212939143,
      "loss": 2.8905,
      "step": 76649
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8002429008483887,
      "learning_rate": 0.0004505346038311968,
      "loss": 3.1535,
      "step": 76650
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.563612461090088,
      "learning_rate": 0.0004505310655050149,
      "loss": 3.0334,
      "step": 76651
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.304832696914673,
      "learning_rate": 0.0004505275271508465,
      "loss": 2.8899,
      "step": 76652
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.489250898361206,
      "learning_rate": 0.0004505239887686921,
      "loss": 3.032,
      "step": 76653
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6377590894699097,
      "learning_rate": 0.00045052045035855247,
      "loss": 2.7791,
      "step": 76654
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.019134521484375,
      "learning_rate": 0.00045051691192042837,
      "loss": 2.8929,
      "step": 76655
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.137901544570923,
      "learning_rate": 0.00045051337345432024,
      "loss": 3.1165,
      "step": 76656
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7093862295150757,
      "learning_rate": 0.0004505098349602288,
      "loss": 3.2067,
      "step": 76657
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.072329044342041,
      "learning_rate": 0.0004505062964381549,
      "loss": 2.986,
      "step": 76658
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.3118298053741455,
      "learning_rate": 0.0004505027578880989,
      "loss": 3.1108,
      "step": 76659
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0888946056365967,
      "learning_rate": 0.00045049921931006163,
      "loss": 3.0261,
      "step": 76660
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.554317593574524,
      "learning_rate": 0.00045049568070404367,
      "loss": 3.0878,
      "step": 76661
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.926368474960327,
      "learning_rate": 0.0004504921420700457,
      "loss": 2.9123,
      "step": 76662
    },
    {
      "epoch": 1.0,
      "grad_norm": 4.058559894561768,
      "learning_rate": 0.00045048860340806855,
      "loss": 2.9729,
      "step": 76663
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6773500442504883,
      "learning_rate": 0.0004504850647181125,
      "loss": 2.9382,
      "step": 76664
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7465417385101318,
      "learning_rate": 0.00045048152600017866,
      "loss": 2.9367,
      "step": 76665
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.365731954574585,
      "learning_rate": 0.00045047798725426745,
      "loss": 3.1136,
      "step": 76666
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.761622428894043,
      "learning_rate": 0.0004504744484803794,
      "loss": 2.8448,
      "step": 76667
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4991856813430786,
      "learning_rate": 0.0004504709096785154,
      "loss": 3.2125,
      "step": 76668
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6120976209640503,
      "learning_rate": 0.000450467370848676,
      "loss": 3.0494,
      "step": 76669
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8648697137832642,
      "learning_rate": 0.00045046383199086185,
      "loss": 2.9933,
      "step": 76670
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7420518398284912,
      "learning_rate": 0.0004504602931050736,
      "loss": 3.0371,
      "step": 76671
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.827296495437622,
      "learning_rate": 0.00045045675419131206,
      "loss": 3.0303,
      "step": 76672
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.3969476222991943,
      "learning_rate": 0.0004504532152495777,
      "loss": 3.0741,
      "step": 76673
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5754708051681519,
      "learning_rate": 0.0004504496762798712,
      "loss": 3.1355,
      "step": 76674
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4939814805984497,
      "learning_rate": 0.0004504461372821933,
      "loss": 2.7569,
      "step": 76675
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1117591857910156,
      "learning_rate": 0.0004504425982565447,
      "loss": 3.1751,
      "step": 76676
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9426037073135376,
      "learning_rate": 0.00045043905920292594,
      "loss": 2.9872,
      "step": 76677
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4720373153686523,
      "learning_rate": 0.0004504355201213376,
      "loss": 3.0419,
      "step": 76678
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8750486373901367,
      "learning_rate": 0.0004504319810117806,
      "loss": 2.706,
      "step": 76679
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.0924510955810547,
      "learning_rate": 0.0004504284418742554,
      "loss": 2.8852,
      "step": 76680
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0206387042999268,
      "learning_rate": 0.00045042490270876277,
      "loss": 2.9135,
      "step": 76681
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6804918050765991,
      "learning_rate": 0.00045042136351530326,
      "loss": 3.1219,
      "step": 76682
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9479464292526245,
      "learning_rate": 0.00045041782429387756,
      "loss": 2.8683,
      "step": 76683
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.016423463821411,
      "learning_rate": 0.00045041428504448644,
      "loss": 2.9219,
      "step": 76684
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5525990724563599,
      "learning_rate": 0.00045041074576713044,
      "loss": 3.2066,
      "step": 76685
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6623951196670532,
      "learning_rate": 0.00045040720646181016,
      "loss": 3.0143,
      "step": 76686
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6797189712524414,
      "learning_rate": 0.00045040366712852635,
      "loss": 2.9181,
      "step": 76687
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5787427425384521,
      "learning_rate": 0.0004504001277672797,
      "loss": 3.0654,
      "step": 76688
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7053382396697998,
      "learning_rate": 0.0004503965883780708,
      "loss": 2.9345,
      "step": 76689
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8053874969482422,
      "learning_rate": 0.0004503930489609004,
      "loss": 3.1089,
      "step": 76690
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0121612548828125,
      "learning_rate": 0.000450389509515769,
      "loss": 2.9411,
      "step": 76691
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5825177431106567,
      "learning_rate": 0.00045038597004267746,
      "loss": 2.9882,
      "step": 76692
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8660622835159302,
      "learning_rate": 0.0004503824305416262,
      "loss": 3.0827,
      "step": 76693
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0773253440856934,
      "learning_rate": 0.00045037889101261615,
      "loss": 3.0485,
      "step": 76694
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7169431447982788,
      "learning_rate": 0.00045037535145564766,
      "loss": 2.832,
      "step": 76695
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.956636667251587,
      "learning_rate": 0.0004503718118707217,
      "loss": 3.2801,
      "step": 76696
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4856373071670532,
      "learning_rate": 0.00045036827225783876,
      "loss": 3.1014,
      "step": 76697
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1929666996002197,
      "learning_rate": 0.00045036473261699943,
      "loss": 2.8952,
      "step": 76698
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.9264020919799805,
      "learning_rate": 0.00045036119294820454,
      "loss": 2.8794,
      "step": 76699
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.552173376083374,
      "learning_rate": 0.0004503576532514547,
      "loss": 2.9976,
      "step": 76700
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.527649998664856,
      "learning_rate": 0.0004503541135267504,
      "loss": 3.1189,
      "step": 76701
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1426632404327393,
      "learning_rate": 0.0004503505737740925,
      "loss": 2.9981,
      "step": 76702
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.834104537963867,
      "learning_rate": 0.0004503470339934816,
      "loss": 2.8975,
      "step": 76703
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8078194856643677,
      "learning_rate": 0.0004503434941849184,
      "loss": 3.0387,
      "step": 76704
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0602259635925293,
      "learning_rate": 0.00045033995434840343,
      "loss": 3.0942,
      "step": 76705
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.217618465423584,
      "learning_rate": 0.0004503364144839374,
      "loss": 3.1803,
      "step": 76706
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.541245698928833,
      "learning_rate": 0.00045033287459152107,
      "loss": 2.9374,
      "step": 76707
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.169224500656128,
      "learning_rate": 0.00045032933467115506,
      "loss": 2.9664,
      "step": 76708
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.172645092010498,
      "learning_rate": 0.00045032579472283985,
      "loss": 2.9856,
      "step": 76709
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.506054162979126,
      "learning_rate": 0.0004503222547465763,
      "loss": 2.9581,
      "step": 76710
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5428102016448975,
      "learning_rate": 0.00045031871474236507,
      "loss": 3.1442,
      "step": 76711
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0435168743133545,
      "learning_rate": 0.0004503151747102066,
      "loss": 2.8339,
      "step": 76712
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0471909046173096,
      "learning_rate": 0.00045031163465010185,
      "loss": 2.9248,
      "step": 76713
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3369065523147583,
      "learning_rate": 0.00045030809456205124,
      "loss": 2.9568,
      "step": 76714
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4935981035232544,
      "learning_rate": 0.00045030455444605553,
      "loss": 3.0909,
      "step": 76715
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.2177302837371826,
      "learning_rate": 0.00045030101430211544,
      "loss": 3.0432,
      "step": 76716
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9147282838821411,
      "learning_rate": 0.0004502974741302315,
      "loss": 2.7325,
      "step": 76717
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5692527294158936,
      "learning_rate": 0.0004502939339304044,
      "loss": 2.7281,
      "step": 76718
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9160873889923096,
      "learning_rate": 0.0004502903937026349,
      "loss": 3.0038,
      "step": 76719
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1425552368164062,
      "learning_rate": 0.0004502868534469235,
      "loss": 2.7892,
      "step": 76720
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.179189443588257,
      "learning_rate": 0.000450283313163271,
      "loss": 2.7876,
      "step": 76721
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6855103969573975,
      "learning_rate": 0.000450279772851678,
      "loss": 3.0698,
      "step": 76722
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1969964504241943,
      "learning_rate": 0.00045027623251214506,
      "loss": 3.0218,
      "step": 76723
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.9872376918792725,
      "learning_rate": 0.000450272692144673,
      "loss": 2.8177,
      "step": 76724
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.392194390296936,
      "learning_rate": 0.0004502691517492624,
      "loss": 3.2073,
      "step": 76725
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9551403522491455,
      "learning_rate": 0.0004502656113259139,
      "loss": 2.9686,
      "step": 76726
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9953651428222656,
      "learning_rate": 0.00045026207087462826,
      "loss": 3.1176,
      "step": 76727
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9480429887771606,
      "learning_rate": 0.00045025853039540597,
      "loss": 2.7743,
      "step": 76728
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.825268030166626,
      "learning_rate": 0.0004502549898882478,
      "loss": 2.9202,
      "step": 76729
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4481241703033447,
      "learning_rate": 0.00045025144935315444,
      "loss": 3.1116,
      "step": 76730
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.292182445526123,
      "learning_rate": 0.00045024790879012646,
      "loss": 2.7862,
      "step": 76731
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8175007104873657,
      "learning_rate": 0.0004502443681991646,
      "loss": 3.1792,
      "step": 76732
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.2829153537750244,
      "learning_rate": 0.0004502408275802694,
      "loss": 3.0014,
      "step": 76733
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.358273983001709,
      "learning_rate": 0.0004502372869334417,
      "loss": 3.0274,
      "step": 76734
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.240485668182373,
      "learning_rate": 0.000450233746258682,
      "loss": 3.2529,
      "step": 76735
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.173781156539917,
      "learning_rate": 0.00045023020555599104,
      "loss": 3.0742,
      "step": 76736
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.446220636367798,
      "learning_rate": 0.0004502266648253694,
      "loss": 2.9022,
      "step": 76737
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.100518226623535,
      "learning_rate": 0.0004502231240668178,
      "loss": 2.9962,
      "step": 76738
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.0311026573181152,
      "learning_rate": 0.0004502195832803369,
      "loss": 2.9125,
      "step": 76739
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5832111835479736,
      "learning_rate": 0.00045021604246592734,
      "loss": 2.9285,
      "step": 76740
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.6239311695098877,
      "learning_rate": 0.00045021250162358974,
      "loss": 3.0477,
      "step": 76741
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5618141889572144,
      "learning_rate": 0.00045020896075332483,
      "loss": 3.1425,
      "step": 76742
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.152333974838257,
      "learning_rate": 0.0004502054198551333,
      "loss": 3.0068,
      "step": 76743
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.927542209625244,
      "learning_rate": 0.0004502018789290156,
      "loss": 2.9846,
      "step": 76744
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.612043023109436,
      "learning_rate": 0.0004501983379749727,
      "loss": 2.8467,
      "step": 76745
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7233637571334839,
      "learning_rate": 0.000450194796993005,
      "loss": 3.1468,
      "step": 76746
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.950951099395752,
      "learning_rate": 0.00045019125598311325,
      "loss": 2.7955,
      "step": 76747
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.505776882171631,
      "learning_rate": 0.00045018771494529817,
      "loss": 3.0688,
      "step": 76748
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.776047706604004,
      "learning_rate": 0.00045018417387956034,
      "loss": 3.228,
      "step": 76749
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6267485618591309,
      "learning_rate": 0.0004501806327859004,
      "loss": 3.0952,
      "step": 76750
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.2791080474853516,
      "learning_rate": 0.00045017709166431905,
      "loss": 2.8012,
      "step": 76751
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.6555838584899902,
      "learning_rate": 0.00045017355051481695,
      "loss": 3.1174,
      "step": 76752
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7301064729690552,
      "learning_rate": 0.0004501700093373948,
      "loss": 2.9208,
      "step": 76753
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.25736403465271,
      "learning_rate": 0.0004501664681320531,
      "loss": 2.8273,
      "step": 76754
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9899662733078003,
      "learning_rate": 0.00045016292689879274,
      "loss": 2.9116,
      "step": 76755
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.747562885284424,
      "learning_rate": 0.0004501593856376142,
      "loss": 3.1245,
      "step": 76756
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.477469563484192,
      "learning_rate": 0.00045015584434851823,
      "loss": 3.1856,
      "step": 76757
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.927472472190857,
      "learning_rate": 0.0004501523030315054,
      "loss": 2.7486,
      "step": 76758
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5225211381912231,
      "learning_rate": 0.00045014876168657643,
      "loss": 3.1521,
      "step": 76759
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5147411823272705,
      "learning_rate": 0.00045014522031373204,
      "loss": 3.1434,
      "step": 76760
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6348388195037842,
      "learning_rate": 0.0004501416789129727,
      "loss": 3.0446,
      "step": 76761
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.618538737297058,
      "learning_rate": 0.0004501381374842993,
      "loss": 3.1975,
      "step": 76762
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.94746994972229,
      "learning_rate": 0.0004501345960277123,
      "loss": 3.0161,
      "step": 76763
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7192109823226929,
      "learning_rate": 0.0004501310545432125,
      "loss": 3.1109,
      "step": 76764
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.028467893600464,
      "learning_rate": 0.0004501275130308005,
      "loss": 2.9027,
      "step": 76765
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.2384846210479736,
      "learning_rate": 0.00045012397149047696,
      "loss": 3.0593,
      "step": 76766
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6609883308410645,
      "learning_rate": 0.00045012042992224253,
      "loss": 3.1727,
      "step": 76767
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.963600993156433,
      "learning_rate": 0.0004501168883260979,
      "loss": 2.9554,
      "step": 76768
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.341686725616455,
      "learning_rate": 0.0004501133467020436,
      "loss": 3.1197,
      "step": 76769
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6446188688278198,
      "learning_rate": 0.0004501098050500805,
      "loss": 3.1485,
      "step": 76770
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6875964403152466,
      "learning_rate": 0.00045010626337020924,
      "loss": 3.1083,
      "step": 76771
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.716422200202942,
      "learning_rate": 0.0004501027216624302,
      "loss": 3.1333,
      "step": 76772
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8080443143844604,
      "learning_rate": 0.0004500991799267443,
      "loss": 3.1311,
      "step": 76773
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6581684350967407,
      "learning_rate": 0.0004500956381631522,
      "loss": 3.2587,
      "step": 76774
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0722713470458984,
      "learning_rate": 0.00045009209637165444,
      "loss": 2.9333,
      "step": 76775
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.5034964084625244,
      "learning_rate": 0.00045008855455225174,
      "loss": 3.1497,
      "step": 76776
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6222749948501587,
      "learning_rate": 0.0004500850127049447,
      "loss": 3.072,
      "step": 76777
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5570237636566162,
      "learning_rate": 0.0004500814708297341,
      "loss": 3.2143,
      "step": 76778
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9464142322540283,
      "learning_rate": 0.0004500779289266204,
      "loss": 2.8092,
      "step": 76779
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.149876356124878,
      "learning_rate": 0.0004500743869956045,
      "loss": 3.0757,
      "step": 76780
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.477709174156189,
      "learning_rate": 0.0004500708450366869,
      "loss": 3.1163,
      "step": 76781
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6365797519683838,
      "learning_rate": 0.00045006730304986825,
      "loss": 2.9972,
      "step": 76782
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.4269933700561523,
      "learning_rate": 0.00045006376103514935,
      "loss": 3.1594,
      "step": 76783
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.025327205657959,
      "learning_rate": 0.00045006021899253066,
      "loss": 2.902,
      "step": 76784
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.639209270477295,
      "learning_rate": 0.00045005667692201297,
      "loss": 3.2175,
      "step": 76785
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.2707619667053223,
      "learning_rate": 0.000450053134823597,
      "loss": 2.8492,
      "step": 76786
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.224553108215332,
      "learning_rate": 0.0004500495926972832,
      "loss": 3.1581,
      "step": 76787
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9987865686416626,
      "learning_rate": 0.0004500460505430724,
      "loss": 3.0614,
      "step": 76788
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9396319389343262,
      "learning_rate": 0.0004500425083609653,
      "loss": 3.0613,
      "step": 76789
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9404066801071167,
      "learning_rate": 0.0004500389661509623,
      "loss": 3.044,
      "step": 76790
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6488350629806519,
      "learning_rate": 0.0004500354239130643,
      "loss": 2.8954,
      "step": 76791
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0917365550994873,
      "learning_rate": 0.0004500318816472719,
      "loss": 2.8965,
      "step": 76792
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.768526077270508,
      "learning_rate": 0.0004500283393535857,
      "loss": 3.1498,
      "step": 76793
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9762734174728394,
      "learning_rate": 0.00045002479703200635,
      "loss": 2.9014,
      "step": 76794
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5559825897216797,
      "learning_rate": 0.00045002125468253477,
      "loss": 3.1433,
      "step": 76795
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.7701964378356934,
      "learning_rate": 0.0004500177123051712,
      "loss": 2.9573,
      "step": 76796
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4804832935333252,
      "learning_rate": 0.0004500141698999166,
      "loss": 2.9305,
      "step": 76797
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9840023517608643,
      "learning_rate": 0.0004500106274667715,
      "loss": 3.1441,
      "step": 76798
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.5706238746643066,
      "learning_rate": 0.0004500070850057366,
      "loss": 2.8442,
      "step": 76799
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7168084383010864,
      "learning_rate": 0.00045000354251681254,
      "loss": 3.1385,
      "step": 76800
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9677283763885498,
      "learning_rate": 0.00045,
      "loss": 3.176,
      "step": 76801
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4420772790908813,
      "learning_rate": 0.0004499964574552996,
      "loss": 2.7439,
      "step": 76802
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.3725430965423584,
      "learning_rate": 0.00044999291488271204,
      "loss": 2.917,
      "step": 76803
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8272984027862549,
      "learning_rate": 0.000449989372282238,
      "loss": 3.0621,
      "step": 76804
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.712158203125,
      "learning_rate": 0.00044998582965387806,
      "loss": 3.1196,
      "step": 76805
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.54080069065094,
      "learning_rate": 0.000449982286997633,
      "loss": 3.0253,
      "step": 76806
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9115393161773682,
      "learning_rate": 0.0004499787443135033,
      "loss": 2.9354,
      "step": 76807
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6780668497085571,
      "learning_rate": 0.00044997520160148976,
      "loss": 2.9001,
      "step": 76808
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6783795356750488,
      "learning_rate": 0.00044997165886159307,
      "loss": 3.0189,
      "step": 76809
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.797951579093933,
      "learning_rate": 0.0004499681160938138,
      "loss": 2.7779,
      "step": 76810
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4781672954559326,
      "learning_rate": 0.00044996457329815257,
      "loss": 2.9472,
      "step": 76811
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6002895832061768,
      "learning_rate": 0.00044996103047461,
      "loss": 2.9486,
      "step": 76812
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.2489686012268066,
      "learning_rate": 0.00044995748762318696,
      "loss": 3.1149,
      "step": 76813
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.611607313156128,
      "learning_rate": 0.00044995394474388404,
      "loss": 2.9647,
      "step": 76814
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.654074788093567,
      "learning_rate": 0.00044995040183670173,
      "loss": 3.0628,
      "step": 76815
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.850969672203064,
      "learning_rate": 0.00044994685890164095,
      "loss": 3.0228,
      "step": 76816
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.722325325012207,
      "learning_rate": 0.00044994331593870213,
      "loss": 2.5779,
      "step": 76817
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6438933610916138,
      "learning_rate": 0.000449939772947886,
      "loss": 2.9583,
      "step": 76818
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.637480616569519,
      "learning_rate": 0.0004499362299291933,
      "loss": 3.0406,
      "step": 76819
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.3633065223693848,
      "learning_rate": 0.00044993268688262456,
      "loss": 2.9414,
      "step": 76820
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7639819383621216,
      "learning_rate": 0.00044992914380818054,
      "loss": 3.2362,
      "step": 76821
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6141133308410645,
      "learning_rate": 0.00044992560070586184,
      "loss": 2.9241,
      "step": 76822
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.4733011722564697,
      "learning_rate": 0.0004499220575756692,
      "loss": 3.0458,
      "step": 76823
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.4469239711761475,
      "learning_rate": 0.00044991851441760313,
      "loss": 3.1058,
      "step": 76824
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8920187950134277,
      "learning_rate": 0.00044991497123166447,
      "loss": 2.924,
      "step": 76825
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.6375110149383545,
      "learning_rate": 0.00044991142801785374,
      "loss": 2.9551,
      "step": 76826
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.537234306335449,
      "learning_rate": 0.0004499078847761716,
      "loss": 2.8019,
      "step": 76827
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.5729146003723145,
      "learning_rate": 0.0004499043415066188,
      "loss": 3.0772,
      "step": 76828
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5054290294647217,
      "learning_rate": 0.000449900798209196,
      "loss": 3.1737,
      "step": 76829
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.7997429370880127,
      "learning_rate": 0.00044989725488390374,
      "loss": 3.0244,
      "step": 76830
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.172323226928711,
      "learning_rate": 0.0004498937115307428,
      "loss": 3.0401,
      "step": 76831
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9589331150054932,
      "learning_rate": 0.0004498901681497138,
      "loss": 3.0349,
      "step": 76832
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.597028374671936,
      "learning_rate": 0.0004498866247408173,
      "loss": 3.0262,
      "step": 76833
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.158111810684204,
      "learning_rate": 0.00044988308130405414,
      "loss": 2.9934,
      "step": 76834
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3631473779678345,
      "learning_rate": 0.00044987953783942483,
      "loss": 2.9543,
      "step": 76835
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.911925196647644,
      "learning_rate": 0.00044987599434693007,
      "loss": 3.2078,
      "step": 76836
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.507864236831665,
      "learning_rate": 0.00044987245082657055,
      "loss": 2.9493,
      "step": 76837
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.519670009613037,
      "learning_rate": 0.00044986890727834704,
      "loss": 3.0243,
      "step": 76838
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4516263008117676,
      "learning_rate": 0.0004498653637022599,
      "loss": 3.3967,
      "step": 76839
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.763276219367981,
      "learning_rate": 0.00044986182009831,
      "loss": 3.0162,
      "step": 76840
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3216493129730225,
      "learning_rate": 0.000449858276466498,
      "loss": 3.027,
      "step": 76841
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7804253101348877,
      "learning_rate": 0.00044985473280682446,
      "loss": 2.8982,
      "step": 76842
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6870144605636597,
      "learning_rate": 0.00044985118911929013,
      "loss": 2.931,
      "step": 76843
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5083367824554443,
      "learning_rate": 0.0004498476454038956,
      "loss": 2.8825,
      "step": 76844
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.003552198410034,
      "learning_rate": 0.0004498441016606416,
      "loss": 3.1176,
      "step": 76845
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9892164468765259,
      "learning_rate": 0.0004498405578895288,
      "loss": 3.0518,
      "step": 76846
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1224708557128906,
      "learning_rate": 0.00044983701409055774,
      "loss": 2.8379,
      "step": 76847
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5311219692230225,
      "learning_rate": 0.0004498334702637292,
      "loss": 2.9264,
      "step": 76848
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.053286552429199,
      "learning_rate": 0.0004498299264090438,
      "loss": 2.6051,
      "step": 76849
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.9956533908843994,
      "learning_rate": 0.0004498263825265021,
      "loss": 3.063,
      "step": 76850
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.693407416343689,
      "learning_rate": 0.0004498228386161049,
      "loss": 3.0901,
      "step": 76851
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.6331064701080322,
      "learning_rate": 0.0004498192946778529,
      "loss": 3.0969,
      "step": 76852
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8790172338485718,
      "learning_rate": 0.0004498157507117465,
      "loss": 3.1295,
      "step": 76853
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9366365671157837,
      "learning_rate": 0.00044981220671778656,
      "loss": 2.9915,
      "step": 76854
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5503419637680054,
      "learning_rate": 0.0004498086626959737,
      "loss": 3.3001,
      "step": 76855
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.899644374847412,
      "learning_rate": 0.0004498051186463087,
      "loss": 3.218,
      "step": 76856
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8463143110275269,
      "learning_rate": 0.000449801574568792,
      "loss": 3.0182,
      "step": 76857
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.9931907653808594,
      "learning_rate": 0.00044979803046342443,
      "loss": 3.1006,
      "step": 76858
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5917876958847046,
      "learning_rate": 0.0004497944863302065,
      "loss": 3.0955,
      "step": 76859
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.4681148529052734,
      "learning_rate": 0.000449790942169139,
      "loss": 2.9117,
      "step": 76860
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.714347004890442,
      "learning_rate": 0.00044978739798022245,
      "loss": 3.0129,
      "step": 76861
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.656613826751709,
      "learning_rate": 0.00044978385376345776,
      "loss": 2.946,
      "step": 76862
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7495081424713135,
      "learning_rate": 0.0004497803095188453,
      "loss": 2.8964,
      "step": 76863
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1057798862457275,
      "learning_rate": 0.00044977676524638583,
      "loss": 3.307,
      "step": 76864
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8866665363311768,
      "learning_rate": 0.0004497732209460801,
      "loss": 3.0783,
      "step": 76865
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6225621700286865,
      "learning_rate": 0.00044976967661792866,
      "loss": 2.7862,
      "step": 76866
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.138610363006592,
      "learning_rate": 0.00044976613226193224,
      "loss": 2.9325,
      "step": 76867
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5960183143615723,
      "learning_rate": 0.00044976258787809157,
      "loss": 2.769,
      "step": 76868
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.873860478401184,
      "learning_rate": 0.000449759043466407,
      "loss": 3.1068,
      "step": 76869
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4915980100631714,
      "learning_rate": 0.00044975549902687953,
      "loss": 2.9832,
      "step": 76870
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7243953943252563,
      "learning_rate": 0.00044975195455950964,
      "loss": 3.0813,
      "step": 76871
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.610400676727295,
      "learning_rate": 0.00044974841006429803,
      "loss": 2.9625,
      "step": 76872
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4293071031570435,
      "learning_rate": 0.00044974486554124536,
      "loss": 2.9729,
      "step": 76873
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.934463620185852,
      "learning_rate": 0.0004497413209903524,
      "loss": 2.8945,
      "step": 76874
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4072906970977783,
      "learning_rate": 0.0004497377764116196,
      "loss": 2.8448,
      "step": 76875
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4998234510421753,
      "learning_rate": 0.0004497342318050476,
      "loss": 2.9329,
      "step": 76876
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4881539344787598,
      "learning_rate": 0.00044973068717063736,
      "loss": 2.9912,
      "step": 76877
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8625701665878296,
      "learning_rate": 0.0004497271425083894,
      "loss": 3.0691,
      "step": 76878
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5742889642715454,
      "learning_rate": 0.0004497235978183042,
      "loss": 3.1399,
      "step": 76879
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7328442335128784,
      "learning_rate": 0.0004497200531003826,
      "loss": 2.8494,
      "step": 76880
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9301234483718872,
      "learning_rate": 0.00044971650835462523,
      "loss": 2.9806,
      "step": 76881
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.92510187625885,
      "learning_rate": 0.0004497129635810327,
      "loss": 3.2418,
      "step": 76882
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6632459163665771,
      "learning_rate": 0.0004497094187796058,
      "loss": 2.9798,
      "step": 76883
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6173206567764282,
      "learning_rate": 0.000449705873950345,
      "loss": 2.8962,
      "step": 76884
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6154513359069824,
      "learning_rate": 0.00044970232909325103,
      "loss": 3.0294,
      "step": 76885
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7312822341918945,
      "learning_rate": 0.00044969878420832464,
      "loss": 3.0262,
      "step": 76886
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.606095790863037,
      "learning_rate": 0.00044969523929556645,
      "loss": 2.8028,
      "step": 76887
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5806336402893066,
      "learning_rate": 0.00044969169435497705,
      "loss": 3.0701,
      "step": 76888
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9955981969833374,
      "learning_rate": 0.0004496881493865571,
      "loss": 3.093,
      "step": 76889
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4367951154708862,
      "learning_rate": 0.00044968460439030733,
      "loss": 2.9609,
      "step": 76890
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6310207843780518,
      "learning_rate": 0.00044968105936622836,
      "loss": 3.074,
      "step": 76891
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9022084474563599,
      "learning_rate": 0.0004496775143143209,
      "loss": 3.1635,
      "step": 76892
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7255566120147705,
      "learning_rate": 0.0004496739692345855,
      "loss": 2.9898,
      "step": 76893
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4036294221878052,
      "learning_rate": 0.0004496704241270229,
      "loss": 2.8065,
      "step": 76894
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6124051809310913,
      "learning_rate": 0.0004496668789916338,
      "loss": 2.972,
      "step": 76895
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.8620893955230713,
      "learning_rate": 0.0004496633338284187,
      "loss": 3.032,
      "step": 76896
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7688955068588257,
      "learning_rate": 0.00044965978863737837,
      "loss": 3.0964,
      "step": 76897
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.506007194519043,
      "learning_rate": 0.0004496562434185136,
      "loss": 3.0878,
      "step": 76898
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.465374708175659,
      "learning_rate": 0.00044965269817182475,
      "loss": 3.1274,
      "step": 76899
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7261062860488892,
      "learning_rate": 0.0004496491528973127,
      "loss": 2.666,
      "step": 76900
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0520987510681152,
      "learning_rate": 0.0004496456075949781,
      "loss": 3.1064,
      "step": 76901
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6956790685653687,
      "learning_rate": 0.00044964206226482143,
      "loss": 2.8884,
      "step": 76902
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6632113456726074,
      "learning_rate": 0.0004496385169068436,
      "loss": 3.2753,
      "step": 76903
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8275011777877808,
      "learning_rate": 0.00044963497152104506,
      "loss": 2.7191,
      "step": 76904
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.259610414505005,
      "learning_rate": 0.0004496314261074266,
      "loss": 2.8648,
      "step": 76905
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6620936393737793,
      "learning_rate": 0.00044962788066598883,
      "loss": 3.0396,
      "step": 76906
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5616875886917114,
      "learning_rate": 0.0004496243351967324,
      "loss": 2.8922,
      "step": 76907
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5183676481246948,
      "learning_rate": 0.000449620789699658,
      "loss": 2.9262,
      "step": 76908
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7821331024169922,
      "learning_rate": 0.00044961724417476626,
      "loss": 2.9623,
      "step": 76909
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.837817668914795,
      "learning_rate": 0.00044961369862205774,
      "loss": 2.9747,
      "step": 76910
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8210442066192627,
      "learning_rate": 0.00044961015304153336,
      "loss": 3.2309,
      "step": 76911
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8784350156784058,
      "learning_rate": 0.00044960660743319366,
      "loss": 2.8658,
      "step": 76912
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9429932832717896,
      "learning_rate": 0.00044960306179703914,
      "loss": 2.9825,
      "step": 76913
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.0555970668792725,
      "learning_rate": 0.0004495995161330707,
      "loss": 3.0249,
      "step": 76914
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.947289228439331,
      "learning_rate": 0.00044959597044128875,
      "loss": 3.2723,
      "step": 76915
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.744097113609314,
      "learning_rate": 0.0004495924247216942,
      "loss": 3.0038,
      "step": 76916
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.400813102722168,
      "learning_rate": 0.00044958887897428757,
      "loss": 3.0867,
      "step": 76917
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7433322668075562,
      "learning_rate": 0.00044958533319906953,
      "loss": 3.0681,
      "step": 76918
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.752091646194458,
      "learning_rate": 0.00044958178739604073,
      "loss": 3.1287,
      "step": 76919
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.7152440547943115,
      "learning_rate": 0.0004495782415652019,
      "loss": 2.9552,
      "step": 76920
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1709420680999756,
      "learning_rate": 0.0004495746957065536,
      "loss": 2.9532,
      "step": 76921
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7162232398986816,
      "learning_rate": 0.00044957114982009655,
      "loss": 3.0046,
      "step": 76922
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9405165910720825,
      "learning_rate": 0.0004495676039058315,
      "loss": 3.1533,
      "step": 76923
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7804124355316162,
      "learning_rate": 0.0004495640579637589,
      "loss": 2.9973,
      "step": 76924
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.56438410282135,
      "learning_rate": 0.00044956051199387953,
      "loss": 3.0305,
      "step": 76925
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5502283573150635,
      "learning_rate": 0.0004495569659961941,
      "loss": 3.0332,
      "step": 76926
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.2576682567596436,
      "learning_rate": 0.0004495534199707031,
      "loss": 2.9441,
      "step": 76927
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.5954926013946533,
      "learning_rate": 0.0004495498739174073,
      "loss": 3.0869,
      "step": 76928
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.42077374458313,
      "learning_rate": 0.0004495463278363074,
      "loss": 2.715,
      "step": 76929
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9173896312713623,
      "learning_rate": 0.0004495427817274041,
      "loss": 3.1499,
      "step": 76930
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6075706481933594,
      "learning_rate": 0.0004495392355906979,
      "loss": 3.1304,
      "step": 76931
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1670920848846436,
      "learning_rate": 0.00044953568942618946,
      "loss": 2.965,
      "step": 76932
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.3015048503875732,
      "learning_rate": 0.00044953214323387966,
      "loss": 2.7368,
      "step": 76933
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.60071861743927,
      "learning_rate": 0.00044952859701376886,
      "loss": 3.0731,
      "step": 76934
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.170166492462158,
      "learning_rate": 0.000449525050765858,
      "loss": 3.0149,
      "step": 76935
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0937609672546387,
      "learning_rate": 0.00044952150449014755,
      "loss": 2.7102,
      "step": 76936
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5540101528167725,
      "learning_rate": 0.00044951795818663824,
      "loss": 3.2034,
      "step": 76937
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.683762788772583,
      "learning_rate": 0.00044951441185533074,
      "loss": 3.0968,
      "step": 76938
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7926501035690308,
      "learning_rate": 0.00044951086549622566,
      "loss": 2.9561,
      "step": 76939
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0714070796966553,
      "learning_rate": 0.00044950731910932363,
      "loss": 2.994,
      "step": 76940
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.4523801803588867,
      "learning_rate": 0.00044950377269462543,
      "loss": 3.0077,
      "step": 76941
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4365557432174683,
      "learning_rate": 0.0004495002262521317,
      "loss": 3.1806,
      "step": 76942
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6980395317077637,
      "learning_rate": 0.000449496679781843,
      "loss": 2.841,
      "step": 76943
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9804373979568481,
      "learning_rate": 0.0004494931332837601,
      "loss": 2.9121,
      "step": 76944
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.9299368858337402,
      "learning_rate": 0.00044948958675788356,
      "loss": 3.0827,
      "step": 76945
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.308176040649414,
      "learning_rate": 0.000449486040204214,
      "loss": 2.9966,
      "step": 76946
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.733821988105774,
      "learning_rate": 0.0004494824936227523,
      "loss": 3.1197,
      "step": 76947
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.165419101715088,
      "learning_rate": 0.00044947894701349893,
      "loss": 2.9058,
      "step": 76948
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6592429876327515,
      "learning_rate": 0.00044947540037645455,
      "loss": 2.8713,
      "step": 76949
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.475126028060913,
      "learning_rate": 0.00044947185371162,
      "loss": 3.1468,
      "step": 76950
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.3693299293518066,
      "learning_rate": 0.00044946830701899574,
      "loss": 2.8606,
      "step": 76951
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.463038921356201,
      "learning_rate": 0.00044946476029858245,
      "loss": 2.913,
      "step": 76952
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0827219486236572,
      "learning_rate": 0.00044946121355038095,
      "loss": 3.0218,
      "step": 76953
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.3282649517059326,
      "learning_rate": 0.0004494576667743917,
      "loss": 2.9402,
      "step": 76954
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6844488382339478,
      "learning_rate": 0.0004494541199706155,
      "loss": 3.092,
      "step": 76955
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6316784620285034,
      "learning_rate": 0.000449450573139053,
      "loss": 2.8161,
      "step": 76956
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.532869815826416,
      "learning_rate": 0.00044944702627970466,
      "loss": 3.1936,
      "step": 76957
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7916450500488281,
      "learning_rate": 0.00044944347939257143,
      "loss": 3.0583,
      "step": 76958
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.340991497039795,
      "learning_rate": 0.0004494399324776538,
      "loss": 3.0678,
      "step": 76959
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.978447198867798,
      "learning_rate": 0.00044943638553495243,
      "loss": 2.9939,
      "step": 76960
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.490927219390869,
      "learning_rate": 0.000449432838564468,
      "loss": 2.9768,
      "step": 76961
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6942533254623413,
      "learning_rate": 0.00044942929156620125,
      "loss": 2.9768,
      "step": 76962
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.513535976409912,
      "learning_rate": 0.00044942574454015277,
      "loss": 2.7799,
      "step": 76963
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.803142547607422,
      "learning_rate": 0.0004494221974863232,
      "loss": 3.0666,
      "step": 76964
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.8640480041503906,
      "learning_rate": 0.0004494186504047133,
      "loss": 2.9023,
      "step": 76965
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.0642478466033936,
      "learning_rate": 0.00044941510329532357,
      "loss": 3.1286,
      "step": 76966
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.3921635150909424,
      "learning_rate": 0.00044941155615815467,
      "loss": 2.9953,
      "step": 76967
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.6488561630249023,
      "learning_rate": 0.0004494080089932075,
      "loss": 3.158,
      "step": 76968
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6261204481124878,
      "learning_rate": 0.0004494044618004825,
      "loss": 3.1071,
      "step": 76969
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6627224683761597,
      "learning_rate": 0.00044940091457998035,
      "loss": 3.0023,
      "step": 76970
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8560690879821777,
      "learning_rate": 0.0004493973673317018,
      "loss": 3.0594,
      "step": 76971
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.14042592048645,
      "learning_rate": 0.0004493938200556475,
      "loss": 3.138,
      "step": 76972
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9484751224517822,
      "learning_rate": 0.00044939027275181796,
      "loss": 3.2474,
      "step": 76973
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2307597398757935,
      "learning_rate": 0.000449386725420214,
      "loss": 3.061,
      "step": 76974
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.800903558731079,
      "learning_rate": 0.00044938317806083623,
      "loss": 3.1707,
      "step": 76975
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.2288618087768555,
      "learning_rate": 0.0004493796306736853,
      "loss": 3.0956,
      "step": 76976
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7716368436813354,
      "learning_rate": 0.0004493760832587619,
      "loss": 3.1737,
      "step": 76977
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.573635220527649,
      "learning_rate": 0.00044937253581606667,
      "loss": 3.1072,
      "step": 76978
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3679033517837524,
      "learning_rate": 0.00044936898834560025,
      "loss": 3.1952,
      "step": 76979
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7133195400238037,
      "learning_rate": 0.0004493654408473633,
      "loss": 3.0683,
      "step": 76980
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6381433010101318,
      "learning_rate": 0.0004493618933213565,
      "loss": 2.9058,
      "step": 76981
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6018728017807007,
      "learning_rate": 0.00044935834576758055,
      "loss": 3.0226,
      "step": 76982
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9593029022216797,
      "learning_rate": 0.000449354798186036,
      "loss": 3.0918,
      "step": 76983
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0693962574005127,
      "learning_rate": 0.0004493512505767236,
      "loss": 2.8948,
      "step": 76984
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1881473064422607,
      "learning_rate": 0.000449347702939644,
      "loss": 3.0274,
      "step": 76985
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8150291442871094,
      "learning_rate": 0.0004493441552747978,
      "loss": 3.0363,
      "step": 76986
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5117887258529663,
      "learning_rate": 0.0004493406075821858,
      "loss": 3.0263,
      "step": 76987
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.111020803451538,
      "learning_rate": 0.00044933705986180845,
      "loss": 2.9055,
      "step": 76988
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4513177871704102,
      "learning_rate": 0.0004493335121136665,
      "loss": 2.7389,
      "step": 76989
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8917436599731445,
      "learning_rate": 0.0004493299643377608,
      "loss": 2.8728,
      "step": 76990
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.237746477127075,
      "learning_rate": 0.00044932641653409167,
      "loss": 2.9961,
      "step": 76991
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.2861884832382202,
      "learning_rate": 0.00044932286870266,
      "loss": 3.1299,
      "step": 76992
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5326656103134155,
      "learning_rate": 0.00044931932084346643,
      "loss": 2.8835,
      "step": 76993
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6559526920318604,
      "learning_rate": 0.00044931577295651156,
      "loss": 3.0699,
      "step": 76994
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5026246309280396,
      "learning_rate": 0.00044931222504179596,
      "loss": 2.8701,
      "step": 76995
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4919317960739136,
      "learning_rate": 0.00044930867709932056,
      "loss": 3.0221,
      "step": 76996
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.907811164855957,
      "learning_rate": 0.0004493051291290858,
      "loss": 3.0218,
      "step": 76997
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5339932441711426,
      "learning_rate": 0.00044930158113109234,
      "loss": 3.1919,
      "step": 76998
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9426007270812988,
      "learning_rate": 0.00044929803310534105,
      "loss": 2.9571,
      "step": 76999
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5723936557769775,
      "learning_rate": 0.0004492944850518323,
      "loss": 3.0013,
      "step": 77000
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6676527261734009,
      "learning_rate": 0.0004492909369705669,
      "loss": 3.0158,
      "step": 77001
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8962458372116089,
      "learning_rate": 0.00044928738886154554,
      "loss": 2.8822,
      "step": 77002
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5367249250411987,
      "learning_rate": 0.0004492838407247688,
      "loss": 2.8756,
      "step": 77003
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5840085744857788,
      "learning_rate": 0.0004492802925602374,
      "loss": 3.1731,
      "step": 77004
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8611481189727783,
      "learning_rate": 0.00044927674436795196,
      "loss": 3.1695,
      "step": 77005
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8143815994262695,
      "learning_rate": 0.00044927319614791316,
      "loss": 2.9011,
      "step": 77006
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5458405017852783,
      "learning_rate": 0.00044926964790012165,
      "loss": 3.1136,
      "step": 77007
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.660410761833191,
      "learning_rate": 0.00044926609962457815,
      "loss": 2.9615,
      "step": 77008
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8048007488250732,
      "learning_rate": 0.0004492625513212832,
      "loss": 2.9129,
      "step": 77009
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4509907960891724,
      "learning_rate": 0.0004492590029902375,
      "loss": 2.6318,
      "step": 77010
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5067790746688843,
      "learning_rate": 0.00044925545463144183,
      "loss": 2.7729,
      "step": 77011
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6991465091705322,
      "learning_rate": 0.00044925190624489663,
      "loss": 3.0743,
      "step": 77012
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8272651433944702,
      "learning_rate": 0.0004492483578306028,
      "loss": 2.8789,
      "step": 77013
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8018876314163208,
      "learning_rate": 0.00044924480938856085,
      "loss": 3.2731,
      "step": 77014
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9619536399841309,
      "learning_rate": 0.0004492412609187714,
      "loss": 3.1624,
      "step": 77015
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.3391032218933105,
      "learning_rate": 0.00044923771242123524,
      "loss": 2.9946,
      "step": 77016
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.3246965408325195,
      "learning_rate": 0.0004492341638959529,
      "loss": 3.2241,
      "step": 77017
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.080993890762329,
      "learning_rate": 0.0004492306153429253,
      "loss": 2.8714,
      "step": 77018
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.031813383102417,
      "learning_rate": 0.0004492270667621528,
      "loss": 3.0408,
      "step": 77019
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.3116776943206787,
      "learning_rate": 0.0004492235181536361,
      "loss": 3.1557,
      "step": 77020
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5567883253097534,
      "learning_rate": 0.00044921996951737605,
      "loss": 3.0729,
      "step": 77021
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.108999729156494,
      "learning_rate": 0.0004492164208533731,
      "loss": 3.0563,
      "step": 77022
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.709177017211914,
      "learning_rate": 0.00044921287216162807,
      "loss": 3.1893,
      "step": 77023
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5827947854995728,
      "learning_rate": 0.00044920932344214145,
      "loss": 3.1038,
      "step": 77024
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.8019964694976807,
      "learning_rate": 0.00044920577469491417,
      "loss": 2.9787,
      "step": 77025
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.785912275314331,
      "learning_rate": 0.00044920222591994654,
      "loss": 2.9396,
      "step": 77026
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.912937879562378,
      "learning_rate": 0.0004491986771172395,
      "loss": 3.0754,
      "step": 77027
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9327441453933716,
      "learning_rate": 0.0004491951282867936,
      "loss": 3.1531,
      "step": 77028
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.64590585231781,
      "learning_rate": 0.00044919157942860947,
      "loss": 3.0821,
      "step": 77029
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0318105220794678,
      "learning_rate": 0.00044918803054268784,
      "loss": 3.1303,
      "step": 77030
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4634772539138794,
      "learning_rate": 0.00044918448162902934,
      "loss": 2.7708,
      "step": 77031
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.492049217224121,
      "learning_rate": 0.0004491809326876346,
      "loss": 2.9608,
      "step": 77032
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.404322862625122,
      "learning_rate": 0.00044917738371850433,
      "loss": 2.6011,
      "step": 77033
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1272659301757812,
      "learning_rate": 0.0004491738347216392,
      "loss": 2.9684,
      "step": 77034
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.6375601291656494,
      "learning_rate": 0.0004491702856970398,
      "loss": 2.8735,
      "step": 77035
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.671008586883545,
      "learning_rate": 0.00044916673664470685,
      "loss": 3.0562,
      "step": 77036
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4752674102783203,
      "learning_rate": 0.00044916318756464094,
      "loss": 3.0582,
      "step": 77037
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4929052591323853,
      "learning_rate": 0.0004491596384568428,
      "loss": 3.2876,
      "step": 77038
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.641282320022583,
      "learning_rate": 0.0004491560893213132,
      "loss": 2.8479,
      "step": 77039
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.416987419128418,
      "learning_rate": 0.0004491525401580525,
      "loss": 3.1047,
      "step": 77040
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9803062677383423,
      "learning_rate": 0.00044914899096706157,
      "loss": 2.9438,
      "step": 77041
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9353671073913574,
      "learning_rate": 0.00044914544174834106,
      "loss": 3.1609,
      "step": 77042
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.292236566543579,
      "learning_rate": 0.0004491418925018915,
      "loss": 2.8026,
      "step": 77043
    },
    {
      "epoch": 1.0,
      "grad_norm": 4.442615032196045,
      "learning_rate": 0.00044913834322771374,
      "loss": 2.7065,
      "step": 77044
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8175283670425415,
      "learning_rate": 0.00044913479392580836,
      "loss": 2.8361,
      "step": 77045
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5640090703964233,
      "learning_rate": 0.00044913124459617596,
      "loss": 3.0903,
      "step": 77046
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.996835470199585,
      "learning_rate": 0.00044912769523881727,
      "loss": 2.9609,
      "step": 77047
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.8050248622894287,
      "learning_rate": 0.0004491241458537329,
      "loss": 3.0263,
      "step": 77048
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9067888259887695,
      "learning_rate": 0.00044912059644092357,
      "loss": 3.0687,
      "step": 77049
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6097486019134521,
      "learning_rate": 0.00044911704700038987,
      "loss": 2.9971,
      "step": 77050
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.5876107215881348,
      "learning_rate": 0.0004491134975321326,
      "loss": 3.1455,
      "step": 77051
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.007855176925659,
      "learning_rate": 0.0004491099480361522,
      "loss": 3.2016,
      "step": 77052
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5496639013290405,
      "learning_rate": 0.0004491063985124494,
      "loss": 3.2557,
      "step": 77053
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6537246704101562,
      "learning_rate": 0.000449102848961025,
      "loss": 3.0184,
      "step": 77054
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.0246076583862305,
      "learning_rate": 0.0004490992993818796,
      "loss": 3.2412,
      "step": 77055
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.617967128753662,
      "learning_rate": 0.0004490957497750137,
      "loss": 3.1018,
      "step": 77056
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8036766052246094,
      "learning_rate": 0.0004490922001404282,
      "loss": 2.8887,
      "step": 77057
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.049089193344116,
      "learning_rate": 0.0004490886504781236,
      "loss": 3.1272,
      "step": 77058
    },
    {
      "epoch": 1.0,
      "grad_norm": 4.528275966644287,
      "learning_rate": 0.00044908510078810063,
      "loss": 2.8851,
      "step": 77059
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.25561261177063,
      "learning_rate": 0.00044908155107035996,
      "loss": 3.1162,
      "step": 77060
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3538997173309326,
      "learning_rate": 0.0004490780013249021,
      "loss": 3.1557,
      "step": 77061
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.648319959640503,
      "learning_rate": 0.0004490744515517279,
      "loss": 3.0016,
      "step": 77062
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.3004238605499268,
      "learning_rate": 0.00044907090175083794,
      "loss": 2.955,
      "step": 77063
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.070258617401123,
      "learning_rate": 0.0004490673519222329,
      "loss": 2.9703,
      "step": 77064
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3835463523864746,
      "learning_rate": 0.0004490638020659134,
      "loss": 2.974,
      "step": 77065
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.107107162475586,
      "learning_rate": 0.00044906025218188013,
      "loss": 3.1056,
      "step": 77066
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.4901175498962402,
      "learning_rate": 0.0004490567022701337,
      "loss": 3.0466,
      "step": 77067
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.608608603477478,
      "learning_rate": 0.0004490531523306748,
      "loss": 3.0503,
      "step": 77068
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.779905080795288,
      "learning_rate": 0.00044904960236350427,
      "loss": 2.9606,
      "step": 77069
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.622668981552124,
      "learning_rate": 0.0004490460523686225,
      "loss": 3.1344,
      "step": 77070
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.0695676803588867,
      "learning_rate": 0.0004490425023460302,
      "loss": 3.027,
      "step": 77071
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.776408314704895,
      "learning_rate": 0.0004490389522957282,
      "loss": 2.8179,
      "step": 77072
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6250874996185303,
      "learning_rate": 0.00044903540221771695,
      "loss": 3.1069,
      "step": 77073
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9066152572631836,
      "learning_rate": 0.00044903185211199725,
      "loss": 2.988,
      "step": 77074
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3901032209396362,
      "learning_rate": 0.0004490283019785697,
      "loss": 2.8781,
      "step": 77075
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6798431873321533,
      "learning_rate": 0.000449024751817435,
      "loss": 2.8853,
      "step": 77076
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5310434103012085,
      "learning_rate": 0.0004490212016285938,
      "loss": 3.21,
      "step": 77077
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9932856559753418,
      "learning_rate": 0.00044901765141204666,
      "loss": 3.0082,
      "step": 77078
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6481035947799683,
      "learning_rate": 0.0004490141011677944,
      "loss": 2.8994,
      "step": 77079
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.420300006866455,
      "learning_rate": 0.00044901055089583753,
      "loss": 3.1492,
      "step": 77080
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8034732341766357,
      "learning_rate": 0.0004490070005961769,
      "loss": 3.1334,
      "step": 77081
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4303029775619507,
      "learning_rate": 0.00044900345026881297,
      "loss": 2.8355,
      "step": 77082
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9509249925613403,
      "learning_rate": 0.0004489998999137465,
      "loss": 2.9485,
      "step": 77083
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.58172345161438,
      "learning_rate": 0.0004489963495309782,
      "loss": 2.8514,
      "step": 77084
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.746667504310608,
      "learning_rate": 0.0004489927991205086,
      "loss": 3.0375,
      "step": 77085
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4471521377563477,
      "learning_rate": 0.00044898924868233845,
      "loss": 2.9424,
      "step": 77086
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.8321237564086914,
      "learning_rate": 0.00044898569821646836,
      "loss": 2.9597,
      "step": 77087
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9662690162658691,
      "learning_rate": 0.0004489821477228991,
      "loss": 2.8551,
      "step": 77088
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.6374475955963135,
      "learning_rate": 0.00044897859720163113,
      "loss": 3.159,
      "step": 77089
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.760896921157837,
      "learning_rate": 0.0004489750466526653,
      "loss": 3.0033,
      "step": 77090
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.6148903369903564,
      "learning_rate": 0.00044897149607600216,
      "loss": 2.9809,
      "step": 77091
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.6758573055267334,
      "learning_rate": 0.0004489679454716424,
      "loss": 2.8226,
      "step": 77092
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6856523752212524,
      "learning_rate": 0.00044896439483958675,
      "loss": 3.0559,
      "step": 77093
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8933035135269165,
      "learning_rate": 0.0004489608441798357,
      "loss": 3.2575,
      "step": 77094
    },
    {
      "epoch": 1.0,
      "grad_norm": 4.044190883636475,
      "learning_rate": 0.0004489572934923901,
      "loss": 2.982,
      "step": 77095
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.3930113315582275,
      "learning_rate": 0.00044895374277725055,
      "loss": 3.0518,
      "step": 77096
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8427945375442505,
      "learning_rate": 0.00044895019203441765,
      "loss": 2.9264,
      "step": 77097
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.521550178527832,
      "learning_rate": 0.000448946641263892,
      "loss": 3.1722,
      "step": 77098
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.173487901687622,
      "learning_rate": 0.0004489430904656745,
      "loss": 2.7701,
      "step": 77099
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.566708564758301,
      "learning_rate": 0.00044893953963976563,
      "loss": 3.1661,
      "step": 77100
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0876941680908203,
      "learning_rate": 0.00044893598878616603,
      "loss": 2.8758,
      "step": 77101
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1338794231414795,
      "learning_rate": 0.0004489324379048765,
      "loss": 3.3282,
      "step": 77102
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.7525863647460938,
      "learning_rate": 0.0004489288869958976,
      "loss": 2.7956,
      "step": 77103
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7802072763442993,
      "learning_rate": 0.00044892533605923,
      "loss": 3.0711,
      "step": 77104
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7051117420196533,
      "learning_rate": 0.00044892178509487433,
      "loss": 3.0123,
      "step": 77105
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.452441692352295,
      "learning_rate": 0.0004489182341028314,
      "loss": 3.0447,
      "step": 77106
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.2745139598846436,
      "learning_rate": 0.00044891468308310166,
      "loss": 2.7465,
      "step": 77107
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5628231763839722,
      "learning_rate": 0.00044891113203568585,
      "loss": 2.973,
      "step": 77108
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1917455196380615,
      "learning_rate": 0.0004489075809605847,
      "loss": 2.8823,
      "step": 77109
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8375271558761597,
      "learning_rate": 0.0004489040298577988,
      "loss": 3.0398,
      "step": 77110
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4799013137817383,
      "learning_rate": 0.00044890047872732884,
      "loss": 2.8348,
      "step": 77111
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.020782709121704,
      "learning_rate": 0.00044889692756917545,
      "loss": 3.2904,
      "step": 77112
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.7398488521575928,
      "learning_rate": 0.00044889337638333933,
      "loss": 3.2681,
      "step": 77113
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6235122680664062,
      "learning_rate": 0.00044888982516982115,
      "loss": 2.9739,
      "step": 77114
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.639460802078247,
      "learning_rate": 0.0004488862739286215,
      "loss": 2.9399,
      "step": 77115
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6016649007797241,
      "learning_rate": 0.0004488827226597411,
      "loss": 3.1381,
      "step": 77116
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.8129162788391113,
      "learning_rate": 0.0004488791713631806,
      "loss": 3.0369,
      "step": 77117
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.5659754276275635,
      "learning_rate": 0.00044887562003894066,
      "loss": 3.1465,
      "step": 77118
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5936492681503296,
      "learning_rate": 0.00044887206868702184,
      "loss": 3.0386,
      "step": 77119
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5660542249679565,
      "learning_rate": 0.0004488685173074249,
      "loss": 3.014,
      "step": 77120
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.2441909313201904,
      "learning_rate": 0.00044886496590015065,
      "loss": 3.096,
      "step": 77121
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8203356266021729,
      "learning_rate": 0.00044886141446519946,
      "loss": 3.0137,
      "step": 77122
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.737895131111145,
      "learning_rate": 0.0004488578630025721,
      "loss": 3.0132,
      "step": 77123
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5749822854995728,
      "learning_rate": 0.00044885431151226937,
      "loss": 3.1745,
      "step": 77124
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.8047449588775635,
      "learning_rate": 0.00044885075999429177,
      "loss": 2.8919,
      "step": 77125
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.819993257522583,
      "learning_rate": 0.00044884720844863996,
      "loss": 2.8462,
      "step": 77126
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4338202476501465,
      "learning_rate": 0.0004488436568753147,
      "loss": 3.1753,
      "step": 77127
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8976002931594849,
      "learning_rate": 0.00044884010527431655,
      "loss": 3.1121,
      "step": 77128
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4649052619934082,
      "learning_rate": 0.00044883655364564624,
      "loss": 3.0467,
      "step": 77129
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1747615337371826,
      "learning_rate": 0.00044883300198930444,
      "loss": 2.8245,
      "step": 77130
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4227935075759888,
      "learning_rate": 0.00044882945030529173,
      "loss": 3.3051,
      "step": 77131
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6269463300704956,
      "learning_rate": 0.0004488258985936088,
      "loss": 3.1328,
      "step": 77132
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4254839420318604,
      "learning_rate": 0.00044882234685425634,
      "loss": 2.794,
      "step": 77133
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5042953491210938,
      "learning_rate": 0.000448818795087235,
      "loss": 3.2161,
      "step": 77134
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.4872565269470215,
      "learning_rate": 0.00044881524329254543,
      "loss": 3.0008,
      "step": 77135
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6603459119796753,
      "learning_rate": 0.0004488116914701883,
      "loss": 2.8252,
      "step": 77136
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6186109781265259,
      "learning_rate": 0.0004488081396201643,
      "loss": 3.068,
      "step": 77137
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.0255849361419678,
      "learning_rate": 0.000448804587742474,
      "loss": 2.7012,
      "step": 77138
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.035815954208374,
      "learning_rate": 0.00044880103583711824,
      "loss": 2.7636,
      "step": 77139
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.794682264328003,
      "learning_rate": 0.0004487974839040974,
      "loss": 3.0268,
      "step": 77140
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8085936307907104,
      "learning_rate": 0.00044879393194341237,
      "loss": 2.9982,
      "step": 77141
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5708973407745361,
      "learning_rate": 0.0004487903799550638,
      "loss": 3.0525,
      "step": 77142
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6771082878112793,
      "learning_rate": 0.0004487868279390522,
      "loss": 3.1632,
      "step": 77143
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9326975345611572,
      "learning_rate": 0.00044878327589537834,
      "loss": 2.8473,
      "step": 77144
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5251246690750122,
      "learning_rate": 0.0004487797238240429,
      "loss": 3.0294,
      "step": 77145
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7773363590240479,
      "learning_rate": 0.00044877617172504647,
      "loss": 2.8944,
      "step": 77146
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.9315825700759888,
      "learning_rate": 0.0004487726195983897,
      "loss": 3.0533,
      "step": 77147
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.3892797231674194,
      "learning_rate": 0.00044876906744407334,
      "loss": 2.8844,
      "step": 77148
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6582485437393188,
      "learning_rate": 0.00044876551526209797,
      "loss": 3.0149,
      "step": 77149
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7638673782348633,
      "learning_rate": 0.0004487619630524643,
      "loss": 3.1703,
      "step": 77150
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7670445442199707,
      "learning_rate": 0.000448758410815173,
      "loss": 3.1956,
      "step": 77151
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8282049894332886,
      "learning_rate": 0.0004487548585502247,
      "loss": 3.0859,
      "step": 77152
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.695629596710205,
      "learning_rate": 0.00044875130625762,
      "loss": 3.0654,
      "step": 77153
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5801060199737549,
      "learning_rate": 0.00044874775393735975,
      "loss": 3.1242,
      "step": 77154
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.440298080444336,
      "learning_rate": 0.0004487442015894444,
      "loss": 2.8065,
      "step": 77155
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.259812593460083,
      "learning_rate": 0.00044874064921387465,
      "loss": 3.1492,
      "step": 77156
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.398486852645874,
      "learning_rate": 0.0004487370968106513,
      "loss": 3.143,
      "step": 77157
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.37368106842041,
      "learning_rate": 0.00044873354437977486,
      "loss": 3.0903,
      "step": 77158
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8679991960525513,
      "learning_rate": 0.000448729991921246,
      "loss": 3.1022,
      "step": 77159
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.986612558364868,
      "learning_rate": 0.0004487264394350655,
      "loss": 2.8849,
      "step": 77160
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.2774691581726074,
      "learning_rate": 0.00044872288692123397,
      "loss": 3.1096,
      "step": 77161
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.4613966941833496,
      "learning_rate": 0.000448719334379752,
      "loss": 2.9793,
      "step": 77162
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.2220475673675537,
      "learning_rate": 0.0004487157818106204,
      "loss": 3.0905,
      "step": 77163
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.5794036388397217,
      "learning_rate": 0.00044871222921383957,
      "loss": 2.8205,
      "step": 77164
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.6252853870391846,
      "learning_rate": 0.0004487086765894104,
      "loss": 3.2953,
      "step": 77165
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6505415439605713,
      "learning_rate": 0.0004487051239373335,
      "loss": 2.9651,
      "step": 77166
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7065597772598267,
      "learning_rate": 0.00044870157125760944,
      "loss": 3.174,
      "step": 77167
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.6717758178710938,
      "learning_rate": 0.000448698018550239,
      "loss": 3.1691,
      "step": 77168
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.8657019138336182,
      "learning_rate": 0.0004486944658152228,
      "loss": 3.1033,
      "step": 77169
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.1809988021850586,
      "learning_rate": 0.0004486909130525615,
      "loss": 3.0835,
      "step": 77170
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.699040412902832,
      "learning_rate": 0.00044868736026225576,
      "loss": 2.8763,
      "step": 77171
    },
    {
      "epoch": 1.0,
      "grad_norm": 4.691980361938477,
      "learning_rate": 0.0004486838074443062,
      "loss": 3.0671,
      "step": 77172
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.156799793243408,
      "learning_rate": 0.00044868025459871355,
      "loss": 3.1917,
      "step": 77173
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.679347038269043,
      "learning_rate": 0.00044867670172547836,
      "loss": 3.0549,
      "step": 77174
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.2045514583587646,
      "learning_rate": 0.00044867314882460137,
      "loss": 3.0131,
      "step": 77175
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.3207457065582275,
      "learning_rate": 0.0004486695958960833,
      "loss": 3.0263,
      "step": 77176
    },
    {
      "epoch": 1.0,
      "grad_norm": 2.4834916591644287,
      "learning_rate": 0.00044866604293992474,
      "loss": 3.0541,
      "step": 77177
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.5974247455596924,
      "learning_rate": 0.00044866248995612635,
      "loss": 2.8984,
      "step": 77178
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.5162417888641357,
      "learning_rate": 0.00044865893694468877,
      "loss": 2.9743,
      "step": 77179
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.2727928161621094,
      "learning_rate": 0.0004486553839056127,
      "loss": 2.8806,
      "step": 77180
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6321066617965698,
      "learning_rate": 0.0004486518308388988,
      "loss": 3.0047,
      "step": 77181
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6427180767059326,
      "learning_rate": 0.00044864827774454774,
      "loss": 2.9434,
      "step": 77182
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.4672372341156006,
      "learning_rate": 0.0004486447246225601,
      "loss": 3.0074,
      "step": 77183
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.6992641687393188,
      "learning_rate": 0.00044864117147293666,
      "loss": 3.048,
      "step": 77184
    },
    {
      "epoch": 1.0,
      "grad_norm": 1.7100192308425903,
      "learning_rate": 0.000448637618295678,
      "loss": 3.2111,
      "step": 77185
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4900976419448853,
      "learning_rate": 0.00044863406509078476,
      "loss": 2.9598,
      "step": 77186
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5370758771896362,
      "learning_rate": 0.0004486305118582577,
      "loss": 3.1549,
      "step": 77187
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.446447730064392,
      "learning_rate": 0.00044862695859809737,
      "loss": 2.9945,
      "step": 77188
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5350826978683472,
      "learning_rate": 0.0004486234053103045,
      "loss": 3.0916,
      "step": 77189
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6339516639709473,
      "learning_rate": 0.00044861985199487974,
      "loss": 2.7258,
      "step": 77190
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8719731569290161,
      "learning_rate": 0.00044861629865182375,
      "loss": 2.9246,
      "step": 77191
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7580835819244385,
      "learning_rate": 0.0004486127452811372,
      "loss": 3.0317,
      "step": 77192
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8429332971572876,
      "learning_rate": 0.00044860919188282065,
      "loss": 2.9598,
      "step": 77193
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.361325979232788,
      "learning_rate": 0.0004486056384568749,
      "loss": 2.9679,
      "step": 77194
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7079216241836548,
      "learning_rate": 0.00044860208500330066,
      "loss": 2.8817,
      "step": 77195
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.595307469367981,
      "learning_rate": 0.0004485985315220984,
      "loss": 2.93,
      "step": 77196
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.6393542289733887,
      "learning_rate": 0.00044859497801326875,
      "loss": 3.2271,
      "step": 77197
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.043560266494751,
      "learning_rate": 0.00044859142447681267,
      "loss": 2.8627,
      "step": 77198
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.3581666946411133,
      "learning_rate": 0.00044858787091273057,
      "loss": 3.1933,
      "step": 77199
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8974547386169434,
      "learning_rate": 0.0004485843173210231,
      "loss": 2.9212,
      "step": 77200
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6888453960418701,
      "learning_rate": 0.00044858076370169115,
      "loss": 3.1455,
      "step": 77201
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7328828573226929,
      "learning_rate": 0.0004485772100547352,
      "loss": 2.8825,
      "step": 77202
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.709057092666626,
      "learning_rate": 0.0004485736563801558,
      "loss": 2.9586,
      "step": 77203
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5947660207748413,
      "learning_rate": 0.00044857010267795394,
      "loss": 2.9547,
      "step": 77204
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7548487186431885,
      "learning_rate": 0.00044856654894813,
      "loss": 3.2314,
      "step": 77205
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6086279153823853,
      "learning_rate": 0.00044856299519068475,
      "loss": 2.9822,
      "step": 77206
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7528412342071533,
      "learning_rate": 0.0004485594414056188,
      "loss": 3.0213,
      "step": 77207
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7272508144378662,
      "learning_rate": 0.00044855588759293286,
      "loss": 3.015,
      "step": 77208
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6861634254455566,
      "learning_rate": 0.0004485523337526276,
      "loss": 2.7052,
      "step": 77209
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5746625661849976,
      "learning_rate": 0.00044854877988470364,
      "loss": 2.9346,
      "step": 77210
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.00677752494812,
      "learning_rate": 0.0004485452259891617,
      "loss": 3.0424,
      "step": 77211
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5927881002426147,
      "learning_rate": 0.00044854167206600235,
      "loss": 3.0079,
      "step": 77212
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5942991971969604,
      "learning_rate": 0.0004485381181152263,
      "loss": 2.8603,
      "step": 77213
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.783511996269226,
      "learning_rate": 0.0004485345641368342,
      "loss": 3.1261,
      "step": 77214
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5507049560546875,
      "learning_rate": 0.0004485310101308268,
      "loss": 2.8536,
      "step": 77215
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5699832439422607,
      "learning_rate": 0.0004485274560972046,
      "loss": 2.9391,
      "step": 77216
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.835814356803894,
      "learning_rate": 0.0004485239020359683,
      "loss": 3.0823,
      "step": 77217
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.59079110622406,
      "learning_rate": 0.00044852034794711876,
      "loss": 3.0933,
      "step": 77218
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.600241780281067,
      "learning_rate": 0.00044851679383065636,
      "loss": 2.8324,
      "step": 77219
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5461417436599731,
      "learning_rate": 0.00044851323968658194,
      "loss": 2.8107,
      "step": 77220
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5790029764175415,
      "learning_rate": 0.0004485096855148961,
      "loss": 3.1673,
      "step": 77221
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.956562876701355,
      "learning_rate": 0.0004485061313155995,
      "loss": 2.6571,
      "step": 77222
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7295722961425781,
      "learning_rate": 0.0004485025770886928,
      "loss": 2.7948,
      "step": 77223
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5403895378112793,
      "learning_rate": 0.00044849902283417665,
      "loss": 2.9736,
      "step": 77224
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.742433786392212,
      "learning_rate": 0.00044849546855205174,
      "loss": 3.0441,
      "step": 77225
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.477525234222412,
      "learning_rate": 0.0004484919142423187,
      "loss": 3.0111,
      "step": 77226
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4974843263626099,
      "learning_rate": 0.00044848835990497825,
      "loss": 2.8569,
      "step": 77227
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.720906138420105,
      "learning_rate": 0.00044848480554003097,
      "loss": 2.883,
      "step": 77228
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.558512806892395,
      "learning_rate": 0.00044848125114747763,
      "loss": 2.9427,
      "step": 77229
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.428444266319275,
      "learning_rate": 0.0004484776967273188,
      "loss": 2.9834,
      "step": 77230
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7851812839508057,
      "learning_rate": 0.0004484741422795551,
      "loss": 2.9133,
      "step": 77231
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5203421115875244,
      "learning_rate": 0.00044847058780418724,
      "loss": 3.1966,
      "step": 77232
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4593831300735474,
      "learning_rate": 0.00044846703330121603,
      "loss": 3.0821,
      "step": 77233
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0986125469207764,
      "learning_rate": 0.00044846347877064197,
      "loss": 2.8606,
      "step": 77234
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9859126806259155,
      "learning_rate": 0.00044845992421246563,
      "loss": 3.1597,
      "step": 77235
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5615088939666748,
      "learning_rate": 0.00044845636962668784,
      "loss": 2.8268,
      "step": 77236
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.1998417377471924,
      "learning_rate": 0.00044845281501330924,
      "loss": 2.9909,
      "step": 77237
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4697215557098389,
      "learning_rate": 0.00044844926037233043,
      "loss": 3.2215,
      "step": 77238
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4756386280059814,
      "learning_rate": 0.00044844570570375206,
      "loss": 3.2375,
      "step": 77239
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6131722927093506,
      "learning_rate": 0.000448442151007575,
      "loss": 3.0622,
      "step": 77240
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5410513877868652,
      "learning_rate": 0.0004484385962837995,
      "loss": 2.8066,
      "step": 77241
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.609782099723816,
      "learning_rate": 0.00044843504153242665,
      "loss": 3.1519,
      "step": 77242
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8931807279586792,
      "learning_rate": 0.0004484314867534569,
      "loss": 3.0088,
      "step": 77243
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5905455350875854,
      "learning_rate": 0.00044842793194689086,
      "loss": 2.8024,
      "step": 77244
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6358920335769653,
      "learning_rate": 0.0004484243771127293,
      "loss": 3.0882,
      "step": 77245
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6364848613739014,
      "learning_rate": 0.00044842082225097287,
      "loss": 2.9797,
      "step": 77246
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.219960927963257,
      "learning_rate": 0.00044841726736162215,
      "loss": 3.0722,
      "step": 77247
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.591471552848816,
      "learning_rate": 0.0004484137124446779,
      "loss": 3.3278,
      "step": 77248
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6126734018325806,
      "learning_rate": 0.00044841015750014075,
      "loss": 3.0867,
      "step": 77249
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9245216846466064,
      "learning_rate": 0.0004484066025280113,
      "loss": 2.9332,
      "step": 77250
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.493929147720337,
      "learning_rate": 0.0004484030475282903,
      "loss": 2.9462,
      "step": 77251
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7566145658493042,
      "learning_rate": 0.0004483994925009784,
      "loss": 3.0881,
      "step": 77252
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7408593893051147,
      "learning_rate": 0.0004483959374460762,
      "loss": 2.9871,
      "step": 77253
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.757939100265503,
      "learning_rate": 0.00044839238236358436,
      "loss": 3.1601,
      "step": 77254
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7639212608337402,
      "learning_rate": 0.00044838882725350363,
      "loss": 2.9477,
      "step": 77255
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5120205879211426,
      "learning_rate": 0.00044838527211583457,
      "loss": 2.95,
      "step": 77256
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9305319786071777,
      "learning_rate": 0.00044838171695057793,
      "loss": 2.8964,
      "step": 77257
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.3018922805786133,
      "learning_rate": 0.00044837816175773436,
      "loss": 3.1105,
      "step": 77258
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.122349262237549,
      "learning_rate": 0.0004483746065373044,
      "loss": 3.1971,
      "step": 77259
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9266656637191772,
      "learning_rate": 0.0004483710512892888,
      "loss": 3.1758,
      "step": 77260
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8253450393676758,
      "learning_rate": 0.0004483674960136883,
      "loss": 2.909,
      "step": 77261
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.554959535598755,
      "learning_rate": 0.00044836394071050343,
      "loss": 3.1725,
      "step": 77262
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.9542860984802246,
      "learning_rate": 0.00044836038537973493,
      "loss": 2.943,
      "step": 77263
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.434826374053955,
      "learning_rate": 0.0004483568300213834,
      "loss": 3.046,
      "step": 77264
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.762613296508789,
      "learning_rate": 0.00044835327463544955,
      "loss": 2.9738,
      "step": 77265
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.8357250690460205,
      "learning_rate": 0.00044834971922193403,
      "loss": 2.9085,
      "step": 77266
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7112088203430176,
      "learning_rate": 0.00044834616378083755,
      "loss": 2.8138,
      "step": 77267
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.910229206085205,
      "learning_rate": 0.0004483426083121606,
      "loss": 3.1686,
      "step": 77268
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.577182650566101,
      "learning_rate": 0.00044833905281590407,
      "loss": 3.0792,
      "step": 77269
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7819167375564575,
      "learning_rate": 0.00044833549729206843,
      "loss": 2.8924,
      "step": 77270
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5466381311416626,
      "learning_rate": 0.0004483319417406545,
      "loss": 2.9849,
      "step": 77271
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0028560161590576,
      "learning_rate": 0.00044832838616166277,
      "loss": 3.0468,
      "step": 77272
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5676223039627075,
      "learning_rate": 0.00044832483055509405,
      "loss": 3.2154,
      "step": 77273
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0429298877716064,
      "learning_rate": 0.00044832127492094897,
      "loss": 3.113,
      "step": 77274
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.2831697463989258,
      "learning_rate": 0.000448317719259228,
      "loss": 2.9286,
      "step": 77275
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.6111268997192383,
      "learning_rate": 0.00044831416356993223,
      "loss": 3.1907,
      "step": 77276
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7569706439971924,
      "learning_rate": 0.0004483106078530619,
      "loss": 2.7154,
      "step": 77277
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4544121026992798,
      "learning_rate": 0.0004483070521086178,
      "loss": 3.1364,
      "step": 77278
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4044361114501953,
      "learning_rate": 0.0004483034963366007,
      "loss": 3.2295,
      "step": 77279
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6122592687606812,
      "learning_rate": 0.00044829994053701116,
      "loss": 2.9306,
      "step": 77280
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6459991931915283,
      "learning_rate": 0.00044829638470984985,
      "loss": 3.1901,
      "step": 77281
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7779829502105713,
      "learning_rate": 0.0004482928288551174,
      "loss": 3.1115,
      "step": 77282
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0432748794555664,
      "learning_rate": 0.00044828927297281454,
      "loss": 2.9898,
      "step": 77283
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5945196151733398,
      "learning_rate": 0.00044828571706294194,
      "loss": 3.3537,
      "step": 77284
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5066477060317993,
      "learning_rate": 0.0004482821611255002,
      "loss": 2.9142,
      "step": 77285
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.3515774011611938,
      "learning_rate": 0.00044827860516049006,
      "loss": 2.8839,
      "step": 77286
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6082348823547363,
      "learning_rate": 0.0004482750491679121,
      "loss": 3.0661,
      "step": 77287
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9733068943023682,
      "learning_rate": 0.00044827149314776696,
      "loss": 2.9456,
      "step": 77288
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5239943265914917,
      "learning_rate": 0.00044826793710005535,
      "loss": 3.1287,
      "step": 77289
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8968929052352905,
      "learning_rate": 0.00044826438102477806,
      "loss": 2.8999,
      "step": 77290
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7407969236373901,
      "learning_rate": 0.00044826082492193547,
      "loss": 2.8368,
      "step": 77291
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.596419095993042,
      "learning_rate": 0.00044825726879152845,
      "loss": 2.8526,
      "step": 77292
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.805381178855896,
      "learning_rate": 0.00044825371263355754,
      "loss": 2.9558,
      "step": 77293
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6364641189575195,
      "learning_rate": 0.00044825015644802356,
      "loss": 3.0225,
      "step": 77294
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7512612342834473,
      "learning_rate": 0.0004482466002349271,
      "loss": 3.0752,
      "step": 77295
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7739769220352173,
      "learning_rate": 0.00044824304399426865,
      "loss": 2.9728,
      "step": 77296
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.4025564193725586,
      "learning_rate": 0.00044823948772604923,
      "loss": 3.0898,
      "step": 77297
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6975430250167847,
      "learning_rate": 0.0004482359314302692,
      "loss": 2.81,
      "step": 77298
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6739639043807983,
      "learning_rate": 0.0004482323751069292,
      "loss": 3.0139,
      "step": 77299
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.014173984527588,
      "learning_rate": 0.0004482288187560301,
      "loss": 2.998,
      "step": 77300
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8446905612945557,
      "learning_rate": 0.00044822526237757253,
      "loss": 3.0489,
      "step": 77301
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.20090651512146,
      "learning_rate": 0.000448221705971557,
      "loss": 2.7542,
      "step": 77302
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.2042267322540283,
      "learning_rate": 0.0004482181495379842,
      "loss": 2.952,
      "step": 77303
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6499125957489014,
      "learning_rate": 0.000448214593076855,
      "loss": 2.9555,
      "step": 77304
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.635096788406372,
      "learning_rate": 0.0004482110365881698,
      "loss": 2.9426,
      "step": 77305
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5521206855773926,
      "learning_rate": 0.00044820748007192937,
      "loss": 2.957,
      "step": 77306
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8256522417068481,
      "learning_rate": 0.0004482039235281345,
      "loss": 3.1683,
      "step": 77307
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6285709142684937,
      "learning_rate": 0.0004482003669567856,
      "loss": 3.0311,
      "step": 77308
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8670083284378052,
      "learning_rate": 0.0004481968103578835,
      "loss": 2.807,
      "step": 77309
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.616318464279175,
      "learning_rate": 0.00044819325373142877,
      "loss": 3.0389,
      "step": 77310
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6960570812225342,
      "learning_rate": 0.00044818969707742214,
      "loss": 2.9926,
      "step": 77311
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6054600477218628,
      "learning_rate": 0.0004481861403958642,
      "loss": 2.9494,
      "step": 77312
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8994172811508179,
      "learning_rate": 0.00044818258368675576,
      "loss": 3.0175,
      "step": 77313
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.52561616897583,
      "learning_rate": 0.0004481790269500973,
      "loss": 2.9803,
      "step": 77314
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6341710090637207,
      "learning_rate": 0.0004481754701858896,
      "loss": 2.991,
      "step": 77315
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8308402299880981,
      "learning_rate": 0.0004481719133941333,
      "loss": 2.808,
      "step": 77316
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7368954420089722,
      "learning_rate": 0.000448168356574829,
      "loss": 2.9127,
      "step": 77317
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8238128423690796,
      "learning_rate": 0.00044816479972797744,
      "loss": 2.9779,
      "step": 77318
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6743360757827759,
      "learning_rate": 0.0004481612428535793,
      "loss": 2.9827,
      "step": 77319
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.801872968673706,
      "learning_rate": 0.00044815768595163504,
      "loss": 2.8548,
      "step": 77320
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4937108755111694,
      "learning_rate": 0.00044815412902214554,
      "loss": 3.0846,
      "step": 77321
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.2843875885009766,
      "learning_rate": 0.00044815057206511145,
      "loss": 2.8203,
      "step": 77322
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4873285293579102,
      "learning_rate": 0.0004481470150805333,
      "loss": 3.0096,
      "step": 77323
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.865457534790039,
      "learning_rate": 0.00044814345806841184,
      "loss": 3.3022,
      "step": 77324
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.503451347351074,
      "learning_rate": 0.0004481399010287478,
      "loss": 3.2256,
      "step": 77325
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6603628396987915,
      "learning_rate": 0.00044813634396154167,
      "loss": 2.978,
      "step": 77326
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5156701803207397,
      "learning_rate": 0.0004481327868667941,
      "loss": 3.2927,
      "step": 77327
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6905049085617065,
      "learning_rate": 0.00044812922974450604,
      "loss": 3.3577,
      "step": 77328
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5356115102767944,
      "learning_rate": 0.0004481256725946778,
      "loss": 3.0799,
      "step": 77329
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.971823811531067,
      "learning_rate": 0.0004481221154173103,
      "loss": 3.0586,
      "step": 77330
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.2446765899658203,
      "learning_rate": 0.0004481185582124041,
      "loss": 3.0703,
      "step": 77331
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.747092843055725,
      "learning_rate": 0.00044811500097995983,
      "loss": 3.0337,
      "step": 77332
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.589978814125061,
      "learning_rate": 0.0004481114437199782,
      "loss": 2.9507,
      "step": 77333
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8187297582626343,
      "learning_rate": 0.00044810788643245993,
      "loss": 3.1172,
      "step": 77334
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6708787679672241,
      "learning_rate": 0.0004481043291174055,
      "loss": 2.786,
      "step": 77335
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.503806233406067,
      "learning_rate": 0.0004481007717748157,
      "loss": 3.0546,
      "step": 77336
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.5432655811309814,
      "learning_rate": 0.0004480972144046912,
      "loss": 2.9265,
      "step": 77337
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6349326372146606,
      "learning_rate": 0.0004480936570070326,
      "loss": 3.1758,
      "step": 77338
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9002938270568848,
      "learning_rate": 0.0004480900995818406,
      "loss": 3.0037,
      "step": 77339
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.858175039291382,
      "learning_rate": 0.0004480865421291159,
      "loss": 2.9084,
      "step": 77340
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7647731304168701,
      "learning_rate": 0.000448082984648859,
      "loss": 2.8,
      "step": 77341
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6512306928634644,
      "learning_rate": 0.0004480794271410708,
      "loss": 3.0737,
      "step": 77342
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5555566549301147,
      "learning_rate": 0.0004480758696057518,
      "loss": 2.9756,
      "step": 77343
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.102546453475952,
      "learning_rate": 0.0004480723120429027,
      "loss": 3.1983,
      "step": 77344
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6766858100891113,
      "learning_rate": 0.00044806875445252414,
      "loss": 2.7588,
      "step": 77345
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6655385494232178,
      "learning_rate": 0.0004480651968346169,
      "loss": 2.9243,
      "step": 77346
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7611029148101807,
      "learning_rate": 0.00044806163918918143,
      "loss": 2.9432,
      "step": 77347
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0115880966186523,
      "learning_rate": 0.0004480580815162185,
      "loss": 2.9689,
      "step": 77348
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.480707049369812,
      "learning_rate": 0.0004480545238157289,
      "loss": 3.047,
      "step": 77349
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8628413677215576,
      "learning_rate": 0.00044805096608771305,
      "loss": 2.8909,
      "step": 77350
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4629768133163452,
      "learning_rate": 0.00044804740833217176,
      "loss": 2.9852,
      "step": 77351
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6440961360931396,
      "learning_rate": 0.0004480438505491057,
      "loss": 3.0338,
      "step": 77352
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5684620141983032,
      "learning_rate": 0.0004480402927385155,
      "loss": 2.9311,
      "step": 77353
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7461637258529663,
      "learning_rate": 0.00044803673490040173,
      "loss": 2.7565,
      "step": 77354
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.053074836730957,
      "learning_rate": 0.00044803317703476524,
      "loss": 3.0884,
      "step": 77355
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5597789287567139,
      "learning_rate": 0.0004480296191416065,
      "loss": 3.0423,
      "step": 77356
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.700887680053711,
      "learning_rate": 0.0004480260612209263,
      "loss": 2.9447,
      "step": 77357
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.156163215637207,
      "learning_rate": 0.00044802250327272527,
      "loss": 3.3212,
      "step": 77358
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6975927352905273,
      "learning_rate": 0.00044801894529700404,
      "loss": 3.0696,
      "step": 77359
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.521426796913147,
      "learning_rate": 0.0004480153872937633,
      "loss": 3.1822,
      "step": 77360
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9777637720108032,
      "learning_rate": 0.0004480118292630037,
      "loss": 2.9754,
      "step": 77361
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.618076205253601,
      "learning_rate": 0.00044800827120472595,
      "loss": 3.1952,
      "step": 77362
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5315134525299072,
      "learning_rate": 0.00044800471311893066,
      "loss": 2.8982,
      "step": 77363
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.3682295083999634,
      "learning_rate": 0.0004480011550056185,
      "loss": 2.8421,
      "step": 77364
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.438351035118103,
      "learning_rate": 0.00044799759686479007,
      "loss": 2.8336,
      "step": 77365
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.682088851928711,
      "learning_rate": 0.0004479940386964461,
      "loss": 3.1855,
      "step": 77366
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.197249174118042,
      "learning_rate": 0.0004479904805005873,
      "loss": 2.8918,
      "step": 77367
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.764707088470459,
      "learning_rate": 0.0004479869222772143,
      "loss": 3.1288,
      "step": 77368
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.930986762046814,
      "learning_rate": 0.00044798336402632763,
      "loss": 2.9033,
      "step": 77369
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6390352249145508,
      "learning_rate": 0.0004479798057479281,
      "loss": 2.905,
      "step": 77370
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7807233333587646,
      "learning_rate": 0.00044797624744201636,
      "loss": 2.993,
      "step": 77371
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.724999189376831,
      "learning_rate": 0.00044797268910859304,
      "loss": 2.959,
      "step": 77372
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9226657152175903,
      "learning_rate": 0.0004479691307476588,
      "loss": 2.9065,
      "step": 77373
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.204228401184082,
      "learning_rate": 0.00044796557235921426,
      "loss": 2.882,
      "step": 77374
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.1271426677703857,
      "learning_rate": 0.0004479620139432602,
      "loss": 3.1684,
      "step": 77375
    },
    {
      "epoch": 1.01,
      "grad_norm": 4.21415901184082,
      "learning_rate": 0.0004479584554997972,
      "loss": 2.8938,
      "step": 77376
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.3208189010620117,
      "learning_rate": 0.0004479548970288259,
      "loss": 2.802,
      "step": 77377
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.281815767288208,
      "learning_rate": 0.000447951338530347,
      "loss": 2.9356,
      "step": 77378
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.733001232147217,
      "learning_rate": 0.00044794778000436115,
      "loss": 2.7895,
      "step": 77379
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.166954278945923,
      "learning_rate": 0.000447944221450869,
      "loss": 2.9733,
      "step": 77380
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.9588255882263184,
      "learning_rate": 0.00044794066286987116,
      "loss": 2.9803,
      "step": 77381
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0432300567626953,
      "learning_rate": 0.0004479371042613685,
      "loss": 3.0488,
      "step": 77382
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.6982264518737793,
      "learning_rate": 0.0004479335456253615,
      "loss": 3.027,
      "step": 77383
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7324271202087402,
      "learning_rate": 0.0004479299869618508,
      "loss": 3.0805,
      "step": 77384
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6442065238952637,
      "learning_rate": 0.00044792642827083716,
      "loss": 3.0959,
      "step": 77385
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.4157981872558594,
      "learning_rate": 0.00044792286955232127,
      "loss": 2.7964,
      "step": 77386
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.867724895477295,
      "learning_rate": 0.0004479193108063037,
      "loss": 2.7486,
      "step": 77387
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8053432703018188,
      "learning_rate": 0.000447915752032785,
      "loss": 2.7448,
      "step": 77388
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.953626275062561,
      "learning_rate": 0.0004479121932317661,
      "loss": 2.9055,
      "step": 77389
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7174021005630493,
      "learning_rate": 0.0004479086344032475,
      "loss": 3.0489,
      "step": 77390
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.552590012550354,
      "learning_rate": 0.0004479050755472299,
      "loss": 3.187,
      "step": 77391
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8405977487564087,
      "learning_rate": 0.000447901516663714,
      "loss": 3.1696,
      "step": 77392
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5718357563018799,
      "learning_rate": 0.0004478979577527004,
      "loss": 3.0856,
      "step": 77393
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5408971309661865,
      "learning_rate": 0.0004478943988141897,
      "loss": 3.0361,
      "step": 77394
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4889966249465942,
      "learning_rate": 0.00044789083984818275,
      "loss": 3.0004,
      "step": 77395
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4349424839019775,
      "learning_rate": 0.00044788728085468,
      "loss": 3.143,
      "step": 77396
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.518027901649475,
      "learning_rate": 0.00044788372183368223,
      "loss": 3.0976,
      "step": 77397
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.534633994102478,
      "learning_rate": 0.0004478801627851901,
      "loss": 2.8473,
      "step": 77398
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.517648458480835,
      "learning_rate": 0.00044787660370920426,
      "loss": 3.1626,
      "step": 77399
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7907540798187256,
      "learning_rate": 0.0004478730446057254,
      "loss": 3.0794,
      "step": 77400
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.6574413776397705,
      "learning_rate": 0.00044786948547475414,
      "loss": 3.154,
      "step": 77401
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.493621587753296,
      "learning_rate": 0.0004478659263162911,
      "loss": 3.0569,
      "step": 77402
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5333900451660156,
      "learning_rate": 0.000447862367130337,
      "loss": 3.1182,
      "step": 77403
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.1085848808288574,
      "learning_rate": 0.00044785880791689255,
      "loss": 2.9524,
      "step": 77404
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.743995189666748,
      "learning_rate": 0.00044785524867595834,
      "loss": 3.2132,
      "step": 77405
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6090222597122192,
      "learning_rate": 0.000447851689407535,
      "loss": 3.1141,
      "step": 77406
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7048850059509277,
      "learning_rate": 0.00044784813011162334,
      "loss": 3.0062,
      "step": 77407
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.990313172340393,
      "learning_rate": 0.00044784457078822385,
      "loss": 2.9745,
      "step": 77408
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.660405158996582,
      "learning_rate": 0.0004478410114373372,
      "loss": 2.8059,
      "step": 77409
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.4312682151794434,
      "learning_rate": 0.00044783745205896424,
      "loss": 3.1302,
      "step": 77410
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4253426790237427,
      "learning_rate": 0.0004478338926531055,
      "loss": 3.0579,
      "step": 77411
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0654823780059814,
      "learning_rate": 0.00044783033321976154,
      "loss": 3.1363,
      "step": 77412
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.7646899223327637,
      "learning_rate": 0.0004478267737589332,
      "loss": 3.1834,
      "step": 77413
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7812730073928833,
      "learning_rate": 0.0004478232142706211,
      "loss": 3.119,
      "step": 77414
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.4775478839874268,
      "learning_rate": 0.00044781965475482573,
      "loss": 3.1656,
      "step": 77415
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8971307277679443,
      "learning_rate": 0.0004478160952115481,
      "loss": 3.0544,
      "step": 77416
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6268357038497925,
      "learning_rate": 0.0004478125356407885,
      "loss": 2.9131,
      "step": 77417
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8220833539962769,
      "learning_rate": 0.00044780897604254784,
      "loss": 2.9085,
      "step": 77418
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.675488829612732,
      "learning_rate": 0.0004478054164168267,
      "loss": 3.0171,
      "step": 77419
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.2772631645202637,
      "learning_rate": 0.0004478018567636257,
      "loss": 2.9124,
      "step": 77420
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.769992470741272,
      "learning_rate": 0.00044779829708294566,
      "loss": 3.0455,
      "step": 77421
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5663750171661377,
      "learning_rate": 0.00044779473737478705,
      "loss": 2.682,
      "step": 77422
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.504725694656372,
      "learning_rate": 0.0004477911776391506,
      "loss": 2.7925,
      "step": 77423
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.132370948791504,
      "learning_rate": 0.000447787617876037,
      "loss": 3.1657,
      "step": 77424
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5378801822662354,
      "learning_rate": 0.0004477840580854469,
      "loss": 3.0402,
      "step": 77425
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5381921529769897,
      "learning_rate": 0.0004477804982673809,
      "loss": 2.9893,
      "step": 77426
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7080224752426147,
      "learning_rate": 0.0004477769384218397,
      "loss": 2.9971,
      "step": 77427
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4567921161651611,
      "learning_rate": 0.00044777337854882407,
      "loss": 3.2092,
      "step": 77428
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5984830856323242,
      "learning_rate": 0.00044776981864833455,
      "loss": 2.8155,
      "step": 77429
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0544583797454834,
      "learning_rate": 0.0004477662587203718,
      "loss": 3.0547,
      "step": 77430
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8649964332580566,
      "learning_rate": 0.0004477626987649365,
      "loss": 3.175,
      "step": 77431
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0262906551361084,
      "learning_rate": 0.00044775913878202943,
      "loss": 3.0794,
      "step": 77432
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.195080280303955,
      "learning_rate": 0.000447755578771651,
      "loss": 2.9512,
      "step": 77433
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.2306253910064697,
      "learning_rate": 0.00044775201873380215,
      "loss": 3.0104,
      "step": 77434
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4526783227920532,
      "learning_rate": 0.00044774845866848333,
      "loss": 3.0644,
      "step": 77435
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.037400245666504,
      "learning_rate": 0.00044774489857569527,
      "loss": 2.6798,
      "step": 77436
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.290635347366333,
      "learning_rate": 0.0004477413384554386,
      "loss": 2.6091,
      "step": 77437
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.5419907569885254,
      "learning_rate": 0.0004477377783077142,
      "loss": 3.2375,
      "step": 77438
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7591958045959473,
      "learning_rate": 0.00044773421813252244,
      "loss": 2.9656,
      "step": 77439
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5745106935501099,
      "learning_rate": 0.00044773065792986404,
      "loss": 3.0307,
      "step": 77440
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8840522766113281,
      "learning_rate": 0.0004477270976997399,
      "loss": 2.9722,
      "step": 77441
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0457944869995117,
      "learning_rate": 0.0004477235374421503,
      "loss": 2.9459,
      "step": 77442
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9746849536895752,
      "learning_rate": 0.00044771997715709625,
      "loss": 3.2161,
      "step": 77443
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.5603373050689697,
      "learning_rate": 0.0004477164168445783,
      "loss": 3.1273,
      "step": 77444
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7452945709228516,
      "learning_rate": 0.000447712856504597,
      "loss": 3.0565,
      "step": 77445
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.989118218421936,
      "learning_rate": 0.00044770929613715307,
      "loss": 2.8468,
      "step": 77446
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.86440110206604,
      "learning_rate": 0.00044770573574224725,
      "loss": 3.1002,
      "step": 77447
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.531414270401001,
      "learning_rate": 0.0004477021753198801,
      "loss": 3.1596,
      "step": 77448
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8963912725448608,
      "learning_rate": 0.00044769861487005234,
      "loss": 2.8706,
      "step": 77449
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.882908582687378,
      "learning_rate": 0.00044769505439276467,
      "loss": 3.0139,
      "step": 77450
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7243613004684448,
      "learning_rate": 0.0004476914938880176,
      "loss": 2.9516,
      "step": 77451
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7706692218780518,
      "learning_rate": 0.00044768793335581196,
      "loss": 3.1787,
      "step": 77452
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7163156270980835,
      "learning_rate": 0.0004476843727961484,
      "loss": 3.0365,
      "step": 77453
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5127192735671997,
      "learning_rate": 0.00044768081220902746,
      "loss": 3.113,
      "step": 77454
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4426932334899902,
      "learning_rate": 0.00044767725159444987,
      "loss": 2.8449,
      "step": 77455
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.4263734817504883,
      "learning_rate": 0.0004476736909524163,
      "loss": 2.8749,
      "step": 77456
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4773231744766235,
      "learning_rate": 0.0004476701302829274,
      "loss": 3.0684,
      "step": 77457
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7018139362335205,
      "learning_rate": 0.00044766656958598384,
      "loss": 2.9393,
      "step": 77458
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7909314632415771,
      "learning_rate": 0.0004476630088615863,
      "loss": 2.974,
      "step": 77459
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.572850227355957,
      "learning_rate": 0.0004476594481097354,
      "loss": 2.8873,
      "step": 77460
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6027737855911255,
      "learning_rate": 0.00044765588733043177,
      "loss": 3.1001,
      "step": 77461
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8852287530899048,
      "learning_rate": 0.00044765232652367614,
      "loss": 3.2543,
      "step": 77462
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7672711610794067,
      "learning_rate": 0.0004476487656894692,
      "loss": 2.889,
      "step": 77463
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.1626362800598145,
      "learning_rate": 0.00044764520482781155,
      "loss": 2.9357,
      "step": 77464
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7162898778915405,
      "learning_rate": 0.0004476416439387039,
      "loss": 3.0941,
      "step": 77465
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.6843764781951904,
      "learning_rate": 0.0004476380830221468,
      "loss": 2.7726,
      "step": 77466
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0899453163146973,
      "learning_rate": 0.0004476345220781411,
      "loss": 3.0015,
      "step": 77467
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5627418756484985,
      "learning_rate": 0.00044763096110668726,
      "loss": 3.0943,
      "step": 77468
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.12186598777771,
      "learning_rate": 0.000447627400107786,
      "loss": 2.8659,
      "step": 77469
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7139310836791992,
      "learning_rate": 0.0004476238390814382,
      "loss": 3.0554,
      "step": 77470
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.635744094848633,
      "learning_rate": 0.0004476202780276442,
      "loss": 3.0444,
      "step": 77471
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.173548460006714,
      "learning_rate": 0.0004476167169464048,
      "loss": 3.224,
      "step": 77472
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0042712688446045,
      "learning_rate": 0.0004476131558377208,
      "loss": 3.0935,
      "step": 77473
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.592530608177185,
      "learning_rate": 0.00044760959470159257,
      "loss": 3.0759,
      "step": 77474
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.697834849357605,
      "learning_rate": 0.000447606033538021,
      "loss": 2.9787,
      "step": 77475
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7685877084732056,
      "learning_rate": 0.0004476024723470067,
      "loss": 2.8831,
      "step": 77476
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6007345914840698,
      "learning_rate": 0.00044759891112855027,
      "loss": 3.0575,
      "step": 77477
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6095918416976929,
      "learning_rate": 0.00044759534988265243,
      "loss": 2.996,
      "step": 77478
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6262956857681274,
      "learning_rate": 0.00044759178860931387,
      "loss": 3.1718,
      "step": 77479
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.695035696029663,
      "learning_rate": 0.00044758822730853523,
      "loss": 3.0496,
      "step": 77480
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6276979446411133,
      "learning_rate": 0.000447584665980317,
      "loss": 3.0168,
      "step": 77481
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8297936916351318,
      "learning_rate": 0.00044758110462466013,
      "loss": 3.0198,
      "step": 77482
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7612236738204956,
      "learning_rate": 0.0004475775432415651,
      "loss": 3.2215,
      "step": 77483
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.633493423461914,
      "learning_rate": 0.00044757398183103265,
      "loss": 3.105,
      "step": 77484
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6625406742095947,
      "learning_rate": 0.0004475704203930634,
      "loss": 2.8894,
      "step": 77485
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4027780294418335,
      "learning_rate": 0.00044756685892765805,
      "loss": 2.9232,
      "step": 77486
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.487473726272583,
      "learning_rate": 0.00044756329743481717,
      "loss": 2.9998,
      "step": 77487
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.565768837928772,
      "learning_rate": 0.00044755973591454153,
      "loss": 3.1264,
      "step": 77488
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.805541515350342,
      "learning_rate": 0.00044755617436683173,
      "loss": 2.9767,
      "step": 77489
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9939465522766113,
      "learning_rate": 0.0004475526127916884,
      "loss": 2.8872,
      "step": 77490
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.543432354927063,
      "learning_rate": 0.0004475490511891123,
      "loss": 2.8882,
      "step": 77491
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.460987091064453,
      "learning_rate": 0.0004475454895591042,
      "loss": 2.9135,
      "step": 77492
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8201907873153687,
      "learning_rate": 0.0004475419279016644,
      "loss": 3.0434,
      "step": 77493
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5307657718658447,
      "learning_rate": 0.0004475383662167938,
      "loss": 2.9857,
      "step": 77494
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0036895275115967,
      "learning_rate": 0.00044753480450449316,
      "loss": 3.1072,
      "step": 77495
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6356101036071777,
      "learning_rate": 0.00044753124276476286,
      "loss": 2.9901,
      "step": 77496
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6702837944030762,
      "learning_rate": 0.00044752768099760377,
      "loss": 2.958,
      "step": 77497
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9213862419128418,
      "learning_rate": 0.0004475241192030166,
      "loss": 2.9063,
      "step": 77498
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7236272096633911,
      "learning_rate": 0.0004475205573810018,
      "loss": 2.8246,
      "step": 77499
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.3261539936065674,
      "learning_rate": 0.00044751699553156015,
      "loss": 2.8844,
      "step": 77500
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.3229923248291016,
      "learning_rate": 0.00044751343365469237,
      "loss": 2.7864,
      "step": 77501
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.171032190322876,
      "learning_rate": 0.000447509871750399,
      "loss": 2.997,
      "step": 77502
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7816827297210693,
      "learning_rate": 0.0004475063098186807,
      "loss": 3.1603,
      "step": 77503
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8812718391418457,
      "learning_rate": 0.00044750274785953825,
      "loss": 3.0045,
      "step": 77504
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9144384860992432,
      "learning_rate": 0.0004474991858729723,
      "loss": 2.955,
      "step": 77505
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4612910747528076,
      "learning_rate": 0.0004474956238589834,
      "loss": 3.1894,
      "step": 77506
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.3470802307128906,
      "learning_rate": 0.0004474920618175723,
      "loss": 3.1736,
      "step": 77507
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.2030835151672363,
      "learning_rate": 0.00044748849974873974,
      "loss": 3.2084,
      "step": 77508
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.310429573059082,
      "learning_rate": 0.0004474849376524862,
      "loss": 2.7425,
      "step": 77509
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6301677227020264,
      "learning_rate": 0.00044748137552881237,
      "loss": 2.951,
      "step": 77510
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9287420511245728,
      "learning_rate": 0.00044747781337771907,
      "loss": 2.9727,
      "step": 77511
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.3223824501037598,
      "learning_rate": 0.00044747425119920684,
      "loss": 3.1283,
      "step": 77512
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.668810486793518,
      "learning_rate": 0.0004474706889932763,
      "loss": 2.838,
      "step": 77513
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.472373366355896,
      "learning_rate": 0.00044746712675992824,
      "loss": 2.9869,
      "step": 77514
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.3821927309036255,
      "learning_rate": 0.00044746356449916323,
      "loss": 3.0335,
      "step": 77515
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.2288730144500732,
      "learning_rate": 0.0004474600022109819,
      "loss": 2.8525,
      "step": 77516
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.771602988243103,
      "learning_rate": 0.00044745643989538514,
      "loss": 2.9045,
      "step": 77517
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.626855492591858,
      "learning_rate": 0.0004474528775523733,
      "loss": 2.8469,
      "step": 77518
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.4740493297576904,
      "learning_rate": 0.00044744931518194716,
      "loss": 2.9202,
      "step": 77519
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6109448671340942,
      "learning_rate": 0.00044744575278410755,
      "loss": 2.9352,
      "step": 77520
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0607986450195312,
      "learning_rate": 0.00044744219035885486,
      "loss": 3.1816,
      "step": 77521
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.502244234085083,
      "learning_rate": 0.0004474386279061899,
      "loss": 2.9703,
      "step": 77522
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6448050737380981,
      "learning_rate": 0.0004474350654261134,
      "loss": 2.9764,
      "step": 77523
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5339990854263306,
      "learning_rate": 0.00044743150291862594,
      "loss": 3.1892,
      "step": 77524
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0401687622070312,
      "learning_rate": 0.0004474279403837281,
      "loss": 3.1928,
      "step": 77525
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.1465113162994385,
      "learning_rate": 0.00044742437782142067,
      "loss": 2.8905,
      "step": 77526
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.3924607038497925,
      "learning_rate": 0.00044742081523170427,
      "loss": 3.0816,
      "step": 77527
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4935355186462402,
      "learning_rate": 0.00044741725261457946,
      "loss": 2.9847,
      "step": 77528
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.4145727157592773,
      "learning_rate": 0.00044741368997004715,
      "loss": 3.3262,
      "step": 77529
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.3949034214019775,
      "learning_rate": 0.0004474101272981078,
      "loss": 3.2106,
      "step": 77530
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.3016252517700195,
      "learning_rate": 0.000447406564598762,
      "loss": 2.7917,
      "step": 77531
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.320366621017456,
      "learning_rate": 0.00044740300187201076,
      "loss": 2.9905,
      "step": 77532
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.1200575828552246,
      "learning_rate": 0.0004473994391178544,
      "loss": 3.1554,
      "step": 77533
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.7037353515625,
      "learning_rate": 0.0004473958763362937,
      "loss": 3.1983,
      "step": 77534
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8123606443405151,
      "learning_rate": 0.0004473923135273294,
      "loss": 3.0436,
      "step": 77535
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.463193893432617,
      "learning_rate": 0.000447388750690962,
      "loss": 2.7222,
      "step": 77536
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0808396339416504,
      "learning_rate": 0.00044738518782719224,
      "loss": 2.9601,
      "step": 77537
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5146546363830566,
      "learning_rate": 0.0004473816249360208,
      "loss": 2.9182,
      "step": 77538
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.638274073600769,
      "learning_rate": 0.00044737806201744837,
      "loss": 2.7986,
      "step": 77539
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4253425598144531,
      "learning_rate": 0.0004473744990714755,
      "loss": 3.2822,
      "step": 77540
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.4518184661865234,
      "learning_rate": 0.000447370936098103,
      "loss": 3.0464,
      "step": 77541
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4894676208496094,
      "learning_rate": 0.00044736737309733146,
      "loss": 3.0091,
      "step": 77542
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.611387848854065,
      "learning_rate": 0.0004473638100691615,
      "loss": 2.9748,
      "step": 77543
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.3229830265045166,
      "learning_rate": 0.0004473602470135939,
      "loss": 3.184,
      "step": 77544
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.1675496101379395,
      "learning_rate": 0.00044735668393062914,
      "loss": 3.0048,
      "step": 77545
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.732222557067871,
      "learning_rate": 0.00044735312082026807,
      "loss": 3.0959,
      "step": 77546
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8809581995010376,
      "learning_rate": 0.00044734955768251127,
      "loss": 2.9797,
      "step": 77547
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.210318088531494,
      "learning_rate": 0.0004473459945173594,
      "loss": 3.0043,
      "step": 77548
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.551822543144226,
      "learning_rate": 0.00044734243132481315,
      "loss": 3.0224,
      "step": 77549
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5706239938735962,
      "learning_rate": 0.0004473388681048731,
      "loss": 2.9168,
      "step": 77550
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.084164619445801,
      "learning_rate": 0.00044733530485754,
      "loss": 3.137,
      "step": 77551
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.191206216812134,
      "learning_rate": 0.00044733174158281456,
      "loss": 3.0247,
      "step": 77552
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.1056597232818604,
      "learning_rate": 0.00044732817828069726,
      "loss": 2.7876,
      "step": 77553
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.1107373237609863,
      "learning_rate": 0.00044732461495118885,
      "loss": 3.1862,
      "step": 77554
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8332630395889282,
      "learning_rate": 0.0004473210515942902,
      "loss": 3.2302,
      "step": 77555
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.74924635887146,
      "learning_rate": 0.0004473174882100016,
      "loss": 3.1038,
      "step": 77556
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.4947025775909424,
      "learning_rate": 0.00044731392479832393,
      "loss": 3.0279,
      "step": 77557
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.7291529178619385,
      "learning_rate": 0.0004473103613592579,
      "loss": 2.9483,
      "step": 77558
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.751038670539856,
      "learning_rate": 0.000447306797892804,
      "loss": 3.0927,
      "step": 77559
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.7926626205444336,
      "learning_rate": 0.000447303234398963,
      "loss": 2.8708,
      "step": 77560
    },
    {
      "epoch": 1.01,
      "grad_norm": 5.286623001098633,
      "learning_rate": 0.00044729967087773554,
      "loss": 2.879,
      "step": 77561
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8290338516235352,
      "learning_rate": 0.00044729610732912234,
      "loss": 2.9865,
      "step": 77562
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.30037522315979,
      "learning_rate": 0.0004472925437531241,
      "loss": 2.7373,
      "step": 77563
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6779032945632935,
      "learning_rate": 0.0004472889801497412,
      "loss": 3.3344,
      "step": 77564
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6559104919433594,
      "learning_rate": 0.00044728541651897465,
      "loss": 3.0453,
      "step": 77565
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.635646104812622,
      "learning_rate": 0.0004472818528608249,
      "loss": 2.8715,
      "step": 77566
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7701555490493774,
      "learning_rate": 0.0004472782891752926,
      "loss": 2.9884,
      "step": 77567
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.8735945224761963,
      "learning_rate": 0.00044727472546237865,
      "loss": 3.1865,
      "step": 77568
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.444153070449829,
      "learning_rate": 0.0004472711617220835,
      "loss": 3.1397,
      "step": 77569
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.719099760055542,
      "learning_rate": 0.0004472675979544078,
      "loss": 3.1396,
      "step": 77570
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.1184089183807373,
      "learning_rate": 0.0004472640341593522,
      "loss": 2.7953,
      "step": 77571
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.292447805404663,
      "learning_rate": 0.00044726047033691763,
      "loss": 3.1063,
      "step": 77572
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9060462713241577,
      "learning_rate": 0.0004472569064871044,
      "loss": 3.1572,
      "step": 77573
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5957837104797363,
      "learning_rate": 0.0004472533426099134,
      "loss": 3.163,
      "step": 77574
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8353098630905151,
      "learning_rate": 0.00044724977870534526,
      "loss": 3.0298,
      "step": 77575
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.081859588623047,
      "learning_rate": 0.0004472462147734005,
      "loss": 2.8616,
      "step": 77576
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.523329019546509,
      "learning_rate": 0.00044724265081407997,
      "loss": 2.742,
      "step": 77577
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.668134093284607,
      "learning_rate": 0.00044723908682738425,
      "loss": 3.1639,
      "step": 77578
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.170048713684082,
      "learning_rate": 0.000447235522813314,
      "loss": 3.0799,
      "step": 77579
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.239898920059204,
      "learning_rate": 0.0004472319587718698,
      "loss": 3.0867,
      "step": 77580
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.1505839824676514,
      "learning_rate": 0.00044722839470305256,
      "loss": 2.8273,
      "step": 77581
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.1555299758911133,
      "learning_rate": 0.00044722483060686267,
      "loss": 2.9895,
      "step": 77582
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.206942319869995,
      "learning_rate": 0.0004472212664833009,
      "loss": 3.0081,
      "step": 77583
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.124525785446167,
      "learning_rate": 0.00044721770233236796,
      "loss": 3.0859,
      "step": 77584
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.837281346321106,
      "learning_rate": 0.0004472141381540645,
      "loss": 3.0116,
      "step": 77585
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6503885984420776,
      "learning_rate": 0.00044721057394839106,
      "loss": 3.0958,
      "step": 77586
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.85689640045166,
      "learning_rate": 0.0004472070097153485,
      "loss": 3.1098,
      "step": 77587
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.7784385681152344,
      "learning_rate": 0.00044720344545493733,
      "loss": 2.932,
      "step": 77588
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.598606824874878,
      "learning_rate": 0.00044719988116715824,
      "loss": 2.7667,
      "step": 77589
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.213132381439209,
      "learning_rate": 0.00044719631685201193,
      "loss": 2.9929,
      "step": 77590
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.109760284423828,
      "learning_rate": 0.00044719275250949906,
      "loss": 3.1361,
      "step": 77591
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.143728256225586,
      "learning_rate": 0.0004471891881396203,
      "loss": 2.7238,
      "step": 77592
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4888683557510376,
      "learning_rate": 0.0004471856237423763,
      "loss": 3.092,
      "step": 77593
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6430652141571045,
      "learning_rate": 0.0004471820593177677,
      "loss": 2.9304,
      "step": 77594
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.774336814880371,
      "learning_rate": 0.00044717849486579505,
      "loss": 2.8649,
      "step": 77595
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0106027126312256,
      "learning_rate": 0.00044717493038645933,
      "loss": 3.1104,
      "step": 77596
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5122112035751343,
      "learning_rate": 0.00044717136587976085,
      "loss": 3.072,
      "step": 77597
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7657033205032349,
      "learning_rate": 0.0004471678013457005,
      "loss": 2.8392,
      "step": 77598
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.923865556716919,
      "learning_rate": 0.0004471642367842789,
      "loss": 2.9595,
      "step": 77599
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5964442491531372,
      "learning_rate": 0.0004471606721954967,
      "loss": 3.1246,
      "step": 77600
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8391709327697754,
      "learning_rate": 0.0004471571075793545,
      "loss": 3.0604,
      "step": 77601
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.1560983657836914,
      "learning_rate": 0.00044715354293585306,
      "loss": 3.0252,
      "step": 77602
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7946196794509888,
      "learning_rate": 0.0004471499782649929,
      "loss": 2.8864,
      "step": 77603
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5307667255401611,
      "learning_rate": 0.0004471464135667749,
      "loss": 3.1799,
      "step": 77604
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4693573713302612,
      "learning_rate": 0.0004471428488411996,
      "loss": 2.7953,
      "step": 77605
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5151292085647583,
      "learning_rate": 0.00044713928408826766,
      "loss": 2.9706,
      "step": 77606
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4107006788253784,
      "learning_rate": 0.0004471357193079797,
      "loss": 3.1329,
      "step": 77607
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0007686614990234,
      "learning_rate": 0.0004471321545003365,
      "loss": 3.0807,
      "step": 77608
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5832244157791138,
      "learning_rate": 0.00044712858966533856,
      "loss": 2.9557,
      "step": 77609
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6323691606521606,
      "learning_rate": 0.0004471250248029867,
      "loss": 3.2236,
      "step": 77610
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8621379137039185,
      "learning_rate": 0.00044712145991328153,
      "loss": 3.3129,
      "step": 77611
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8673769235610962,
      "learning_rate": 0.00044711789499622364,
      "loss": 2.9867,
      "step": 77612
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.587742567062378,
      "learning_rate": 0.00044711433005181387,
      "loss": 3.2097,
      "step": 77613
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0022709369659424,
      "learning_rate": 0.0004471107650800527,
      "loss": 3.0009,
      "step": 77614
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7141168117523193,
      "learning_rate": 0.00044710720008094083,
      "loss": 3.2466,
      "step": 77615
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5478577613830566,
      "learning_rate": 0.00044710363505447897,
      "loss": 3.3131,
      "step": 77616
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7941335439682007,
      "learning_rate": 0.00044710007000066783,
      "loss": 2.9142,
      "step": 77617
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8385133743286133,
      "learning_rate": 0.0004470965049195079,
      "loss": 2.9141,
      "step": 77618
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.771208643913269,
      "learning_rate": 0.000447092939811,
      "loss": 3.0106,
      "step": 77619
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.802680492401123,
      "learning_rate": 0.0004470893746751449,
      "loss": 2.9681,
      "step": 77620
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5252244472503662,
      "learning_rate": 0.00044708580951194284,
      "loss": 3.1016,
      "step": 77621
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.145137071609497,
      "learning_rate": 0.0004470822443213949,
      "loss": 2.9953,
      "step": 77622
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6856939792633057,
      "learning_rate": 0.00044707867910350163,
      "loss": 3.0216,
      "step": 77623
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4691026210784912,
      "learning_rate": 0.0004470751138582636,
      "loss": 2.9599,
      "step": 77624
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9551578760147095,
      "learning_rate": 0.0004470715485856815,
      "loss": 2.8989,
      "step": 77625
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8075065612792969,
      "learning_rate": 0.0004470679832857561,
      "loss": 3.1565,
      "step": 77626
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6273454427719116,
      "learning_rate": 0.0004470644179584879,
      "loss": 3.1415,
      "step": 77627
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6963316202163696,
      "learning_rate": 0.00044706085260387774,
      "loss": 2.9341,
      "step": 77628
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.4221527576446533,
      "learning_rate": 0.0004470572872219261,
      "loss": 2.7212,
      "step": 77629
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.608814001083374,
      "learning_rate": 0.00044705372181263374,
      "loss": 3.0445,
      "step": 77630
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5878806114196777,
      "learning_rate": 0.0004470501563760014,
      "loss": 3.1268,
      "step": 77631
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.712130546569824,
      "learning_rate": 0.00044704659091202964,
      "loss": 2.8582,
      "step": 77632
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.3287482261657715,
      "learning_rate": 0.00044704302542071913,
      "loss": 3.2035,
      "step": 77633
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7974615097045898,
      "learning_rate": 0.0004470394599020705,
      "loss": 3.1876,
      "step": 77634
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.4561867713928223,
      "learning_rate": 0.00044703589435608445,
      "loss": 3.1901,
      "step": 77635
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7655436992645264,
      "learning_rate": 0.0004470323287827618,
      "loss": 3.0828,
      "step": 77636
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7271068096160889,
      "learning_rate": 0.00044702876318210294,
      "loss": 2.8511,
      "step": 77637
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.778157114982605,
      "learning_rate": 0.0004470251975541086,
      "loss": 3.0309,
      "step": 77638
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.66729736328125,
      "learning_rate": 0.0004470216318987796,
      "loss": 2.8351,
      "step": 77639
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.149414539337158,
      "learning_rate": 0.00044701806621611645,
      "loss": 3.0323,
      "step": 77640
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7102710008621216,
      "learning_rate": 0.0004470145005061199,
      "loss": 3.0547,
      "step": 77641
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4876266717910767,
      "learning_rate": 0.0004470109347687906,
      "loss": 3.0376,
      "step": 77642
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5143795013427734,
      "learning_rate": 0.0004470073690041292,
      "loss": 3.2462,
      "step": 77643
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8201324939727783,
      "learning_rate": 0.0004470038032121363,
      "loss": 3.1411,
      "step": 77644
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8430027961730957,
      "learning_rate": 0.0004470002373928126,
      "loss": 3.2006,
      "step": 77645
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7339913845062256,
      "learning_rate": 0.0004469966715461588,
      "loss": 2.8062,
      "step": 77646
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7282954454421997,
      "learning_rate": 0.0004469931056721757,
      "loss": 3.0607,
      "step": 77647
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.560149908065796,
      "learning_rate": 0.00044698953977086354,
      "loss": 3.391,
      "step": 77648
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0836849212646484,
      "learning_rate": 0.0004469859738422234,
      "loss": 3.0837,
      "step": 77649
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5066970586776733,
      "learning_rate": 0.0004469824078862558,
      "loss": 3.0587,
      "step": 77650
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4353982210159302,
      "learning_rate": 0.00044697884190296136,
      "loss": 3.2171,
      "step": 77651
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.020423173904419,
      "learning_rate": 0.0004469752758923408,
      "loss": 2.9575,
      "step": 77652
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9357739686965942,
      "learning_rate": 0.0004469717098543947,
      "loss": 3.354,
      "step": 77653
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.052029848098755,
      "learning_rate": 0.0004469681437891239,
      "loss": 3.0432,
      "step": 77654
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0053582191467285,
      "learning_rate": 0.00044696457769652885,
      "loss": 2.996,
      "step": 77655
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0564064979553223,
      "learning_rate": 0.0004469610115766104,
      "loss": 3.0303,
      "step": 77656
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.394636392593384,
      "learning_rate": 0.00044695744542936906,
      "loss": 3.1129,
      "step": 77657
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.44889235496521,
      "learning_rate": 0.00044695387925480556,
      "loss": 3.0611,
      "step": 77658
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4556337594985962,
      "learning_rate": 0.00044695031305292055,
      "loss": 2.6851,
      "step": 77659
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.740217685699463,
      "learning_rate": 0.0004469467468237147,
      "loss": 3.0964,
      "step": 77660
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.5456864833831787,
      "learning_rate": 0.00044694318056718874,
      "loss": 2.9164,
      "step": 77661
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6982874870300293,
      "learning_rate": 0.00044693961428334314,
      "loss": 3.21,
      "step": 77662
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5689948797225952,
      "learning_rate": 0.00044693604797217885,
      "loss": 2.8502,
      "step": 77663
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5656707286834717,
      "learning_rate": 0.0004469324816336963,
      "loss": 2.9735,
      "step": 77664
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8713632822036743,
      "learning_rate": 0.0004469289152678962,
      "loss": 3.2059,
      "step": 77665
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.060450553894043,
      "learning_rate": 0.0004469253488747793,
      "loss": 3.0671,
      "step": 77666
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.134261131286621,
      "learning_rate": 0.00044692178245434613,
      "loss": 2.9007,
      "step": 77667
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.879514455795288,
      "learning_rate": 0.0004469182160065975,
      "loss": 2.8481,
      "step": 77668
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5324960947036743,
      "learning_rate": 0.000446914649531534,
      "loss": 3.1561,
      "step": 77669
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.096085786819458,
      "learning_rate": 0.0004469110830291563,
      "loss": 2.9166,
      "step": 77670
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7438277006149292,
      "learning_rate": 0.00044690751649946496,
      "loss": 3.0502,
      "step": 77671
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.959960699081421,
      "learning_rate": 0.00044690394994246086,
      "loss": 3.1979,
      "step": 77672
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.560086488723755,
      "learning_rate": 0.00044690038335814446,
      "loss": 3.12,
      "step": 77673
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6347168684005737,
      "learning_rate": 0.0004468968167465165,
      "loss": 2.9928,
      "step": 77674
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4470767974853516,
      "learning_rate": 0.0004468932501075777,
      "loss": 3.0471,
      "step": 77675
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.2670717239379883,
      "learning_rate": 0.0004468896834413287,
      "loss": 2.8604,
      "step": 77676
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5826244354248047,
      "learning_rate": 0.00044688611674777005,
      "loss": 3.1773,
      "step": 77677
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.281445026397705,
      "learning_rate": 0.00044688255002690256,
      "loss": 2.9379,
      "step": 77678
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5307481288909912,
      "learning_rate": 0.0004468789832787268,
      "loss": 2.884,
      "step": 77679
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.1297340393066406,
      "learning_rate": 0.0004468754165032434,
      "loss": 3.3096,
      "step": 77680
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7877594232559204,
      "learning_rate": 0.0004468718497004532,
      "loss": 3.2005,
      "step": 77681
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0207104682922363,
      "learning_rate": 0.0004468682828703567,
      "loss": 2.9251,
      "step": 77682
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.930423617362976,
      "learning_rate": 0.0004468647160129546,
      "loss": 2.994,
      "step": 77683
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.749629259109497,
      "learning_rate": 0.0004468611491282477,
      "loss": 3.0781,
      "step": 77684
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6170711517333984,
      "learning_rate": 0.00044685758221623637,
      "loss": 3.3114,
      "step": 77685
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5980842113494873,
      "learning_rate": 0.00044685401527692155,
      "loss": 3.184,
      "step": 77686
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9138048887252808,
      "learning_rate": 0.0004468504483103038,
      "loss": 3.0058,
      "step": 77687
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.0878543853759766,
      "learning_rate": 0.0004468468813163837,
      "loss": 2.8835,
      "step": 77688
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5924464464187622,
      "learning_rate": 0.00044684331429516194,
      "loss": 3.0075,
      "step": 77689
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9486632347106934,
      "learning_rate": 0.0004468397472466394,
      "loss": 3.0532,
      "step": 77690
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7276718616485596,
      "learning_rate": 0.00044683618017081657,
      "loss": 3.0191,
      "step": 77691
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8734214305877686,
      "learning_rate": 0.000446832613067694,
      "loss": 3.1948,
      "step": 77692
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.773329257965088,
      "learning_rate": 0.0004468290459372726,
      "loss": 2.8619,
      "step": 77693
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.000098943710327,
      "learning_rate": 0.00044682547877955277,
      "loss": 3.1427,
      "step": 77694
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7615909576416016,
      "learning_rate": 0.0004468219115945354,
      "loss": 3.1765,
      "step": 77695
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.604533076286316,
      "learning_rate": 0.000446818344382221,
      "loss": 3.0816,
      "step": 77696
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.633655071258545,
      "learning_rate": 0.0004468147771426104,
      "loss": 3.2374,
      "step": 77697
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4583874940872192,
      "learning_rate": 0.00044681120987570414,
      "loss": 3.0487,
      "step": 77698
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5062973499298096,
      "learning_rate": 0.0004468076425815029,
      "loss": 3.0365,
      "step": 77699
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5375659465789795,
      "learning_rate": 0.0004468040752600073,
      "loss": 3.2499,
      "step": 77700
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5995372533798218,
      "learning_rate": 0.00044680050791121804,
      "loss": 2.9985,
      "step": 77701
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.47077476978302,
      "learning_rate": 0.0004467969405351358,
      "loss": 3.0379,
      "step": 77702
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.5345542430877686,
      "learning_rate": 0.0004467933731317613,
      "loss": 2.9033,
      "step": 77703
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7283166646957397,
      "learning_rate": 0.0004467898057010951,
      "loss": 2.9224,
      "step": 77704
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7650773525238037,
      "learning_rate": 0.0004467862382431379,
      "loss": 3.0304,
      "step": 77705
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8217015266418457,
      "learning_rate": 0.0004467826707578904,
      "loss": 3.2567,
      "step": 77706
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8171718120574951,
      "learning_rate": 0.00044677910324535325,
      "loss": 3.0842,
      "step": 77707
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6151232719421387,
      "learning_rate": 0.00044677553570552703,
      "loss": 2.9176,
      "step": 77708
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6627461910247803,
      "learning_rate": 0.0004467719681384125,
      "loss": 2.9194,
      "step": 77709
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6185170412063599,
      "learning_rate": 0.0004467684005440103,
      "loss": 2.9472,
      "step": 77710
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6859239339828491,
      "learning_rate": 0.000446764832922321,
      "loss": 3.0131,
      "step": 77711
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8438351154327393,
      "learning_rate": 0.0004467612652733455,
      "loss": 3.2334,
      "step": 77712
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.1787712574005127,
      "learning_rate": 0.00044675769759708415,
      "loss": 3.131,
      "step": 77713
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6252816915512085,
      "learning_rate": 0.0004467541298935379,
      "loss": 3.2277,
      "step": 77714
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7556812763214111,
      "learning_rate": 0.0004467505621627072,
      "loss": 2.9386,
      "step": 77715
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6864278316497803,
      "learning_rate": 0.00044674699440459283,
      "loss": 3.1536,
      "step": 77716
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8820334672927856,
      "learning_rate": 0.0004467434266191954,
      "loss": 3.0697,
      "step": 77717
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0341453552246094,
      "learning_rate": 0.0004467398588065157,
      "loss": 3.0895,
      "step": 77718
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.2250726222991943,
      "learning_rate": 0.0004467362909665542,
      "loss": 2.9632,
      "step": 77719
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7887420654296875,
      "learning_rate": 0.00044673272309931163,
      "loss": 3.199,
      "step": 77720
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5690490007400513,
      "learning_rate": 0.00044672915520478873,
      "loss": 3.2625,
      "step": 77721
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.88546085357666,
      "learning_rate": 0.0004467255872829861,
      "loss": 3.0167,
      "step": 77722
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.096053123474121,
      "learning_rate": 0.0004467220193339044,
      "loss": 2.9966,
      "step": 77723
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.706658959388733,
      "learning_rate": 0.00044671845135754436,
      "loss": 3.0307,
      "step": 77724
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.2302639484405518,
      "learning_rate": 0.0004467148833539066,
      "loss": 3.0185,
      "step": 77725
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7672487497329712,
      "learning_rate": 0.00044671131532299165,
      "loss": 2.9776,
      "step": 77726
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5091891288757324,
      "learning_rate": 0.00044670774726480035,
      "loss": 3.0441,
      "step": 77727
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.54900062084198,
      "learning_rate": 0.00044670417917933334,
      "loss": 2.8536,
      "step": 77728
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.2851359844207764,
      "learning_rate": 0.0004467006110665913,
      "loss": 2.9849,
      "step": 77729
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.775091290473938,
      "learning_rate": 0.00044669704292657475,
      "loss": 3.0117,
      "step": 77730
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0601837635040283,
      "learning_rate": 0.00044669347475928447,
      "loss": 2.8989,
      "step": 77731
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.3773393630981445,
      "learning_rate": 0.00044668990656472114,
      "loss": 2.9391,
      "step": 77732
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7514564990997314,
      "learning_rate": 0.00044668633834288535,
      "loss": 3.0568,
      "step": 77733
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5732319355010986,
      "learning_rate": 0.0004466827700937778,
      "loss": 2.8283,
      "step": 77734
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9955205917358398,
      "learning_rate": 0.00044667920181739923,
      "loss": 2.775,
      "step": 77735
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.9499738216400146,
      "learning_rate": 0.00044667563351375014,
      "loss": 2.7634,
      "step": 77736
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.411537528038025,
      "learning_rate": 0.00044667206518283125,
      "loss": 3.1848,
      "step": 77737
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7114365100860596,
      "learning_rate": 0.00044666849682464333,
      "loss": 3.0522,
      "step": 77738
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.139849901199341,
      "learning_rate": 0.000446664928439187,
      "loss": 2.9832,
      "step": 77739
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.495377779006958,
      "learning_rate": 0.00044666136002646285,
      "loss": 3.0477,
      "step": 77740
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.708475112915039,
      "learning_rate": 0.0004466577915864716,
      "loss": 3.0442,
      "step": 77741
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5288758277893066,
      "learning_rate": 0.0004466542231192139,
      "loss": 3.3045,
      "step": 77742
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8207873106002808,
      "learning_rate": 0.0004466506546246904,
      "loss": 2.9189,
      "step": 77743
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.0356340408325195,
      "learning_rate": 0.0004466470861029017,
      "loss": 2.8372,
      "step": 77744
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.720433235168457,
      "learning_rate": 0.00044664351755384867,
      "loss": 2.8535,
      "step": 77745
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.651353597640991,
      "learning_rate": 0.00044663994897753173,
      "loss": 3.0259,
      "step": 77746
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.3962202072143555,
      "learning_rate": 0.0004466363803739517,
      "loss": 2.8873,
      "step": 77747
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7457705736160278,
      "learning_rate": 0.0004466328117431093,
      "loss": 3.0338,
      "step": 77748
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9896304607391357,
      "learning_rate": 0.0004466292430850049,
      "loss": 3.1206,
      "step": 77749
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.1972084045410156,
      "learning_rate": 0.0004466256743996395,
      "loss": 2.8645,
      "step": 77750
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.809274673461914,
      "learning_rate": 0.00044662210568701365,
      "loss": 3.1865,
      "step": 77751
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6434603929519653,
      "learning_rate": 0.0004466185369471278,
      "loss": 2.9863,
      "step": 77752
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4897762537002563,
      "learning_rate": 0.00044661496817998287,
      "loss": 2.9268,
      "step": 77753
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.565706491470337,
      "learning_rate": 0.00044661139938557954,
      "loss": 2.9968,
      "step": 77754
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5517066717147827,
      "learning_rate": 0.00044660783056391833,
      "loss": 3.2274,
      "step": 77755
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5716795921325684,
      "learning_rate": 0.0004466042617149999,
      "loss": 2.859,
      "step": 77756
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5623877048492432,
      "learning_rate": 0.0004466006928388251,
      "loss": 2.9694,
      "step": 77757
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5889992713928223,
      "learning_rate": 0.0004465971239353944,
      "loss": 3.2741,
      "step": 77758
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.087491989135742,
      "learning_rate": 0.0004465935550047085,
      "loss": 3.0799,
      "step": 77759
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8887735605239868,
      "learning_rate": 0.0004465899860467681,
      "loss": 3.0137,
      "step": 77760
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5739500522613525,
      "learning_rate": 0.00044658641706157387,
      "loss": 3.0835,
      "step": 77761
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8841992616653442,
      "learning_rate": 0.0004465828480491264,
      "loss": 3.0062,
      "step": 77762
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8950265645980835,
      "learning_rate": 0.0004465792790094265,
      "loss": 3.0982,
      "step": 77763
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.3928557634353638,
      "learning_rate": 0.0004465757099424747,
      "loss": 2.7554,
      "step": 77764
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8162736892700195,
      "learning_rate": 0.0004465721408482717,
      "loss": 3.0971,
      "step": 77765
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.410039186477661,
      "learning_rate": 0.00044656857172681823,
      "loss": 3.0664,
      "step": 77766
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6405577659606934,
      "learning_rate": 0.0004465650025781148,
      "loss": 2.7708,
      "step": 77767
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5635942220687866,
      "learning_rate": 0.00044656143340216224,
      "loss": 3.1524,
      "step": 77768
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4951444864273071,
      "learning_rate": 0.000446557864198961,
      "loss": 2.9851,
      "step": 77769
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7669528722763062,
      "learning_rate": 0.0004465542949685121,
      "loss": 2.8914,
      "step": 77770
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5495774745941162,
      "learning_rate": 0.0004465507257108159,
      "loss": 3.152,
      "step": 77771
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0806519985198975,
      "learning_rate": 0.00044654715642587317,
      "loss": 3.1462,
      "step": 77772
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6281365156173706,
      "learning_rate": 0.0004465435871136846,
      "loss": 3.1008,
      "step": 77773
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.1067774295806885,
      "learning_rate": 0.0004465400177742507,
      "loss": 2.9405,
      "step": 77774
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6976463794708252,
      "learning_rate": 0.00044653644840757223,
      "loss": 2.8411,
      "step": 77775
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4029732942581177,
      "learning_rate": 0.00044653287901365004,
      "loss": 3.1135,
      "step": 77776
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9305838346481323,
      "learning_rate": 0.00044652930959248444,
      "loss": 3.0899,
      "step": 77777
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8922090530395508,
      "learning_rate": 0.0004465257401440763,
      "loss": 2.9583,
      "step": 77778
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7783403396606445,
      "learning_rate": 0.0004465221706684264,
      "loss": 3.0777,
      "step": 77779
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7091264724731445,
      "learning_rate": 0.00044651860116553513,
      "loss": 3.0405,
      "step": 77780
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8821327686309814,
      "learning_rate": 0.00044651503163540337,
      "loss": 2.8692,
      "step": 77781
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.680851936340332,
      "learning_rate": 0.0004465114620780317,
      "loss": 3.1945,
      "step": 77782
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.831394076347351,
      "learning_rate": 0.0004465078924934207,
      "loss": 3.0391,
      "step": 77783
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6773360967636108,
      "learning_rate": 0.0004465043228815711,
      "loss": 3.0864,
      "step": 77784
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8708890676498413,
      "learning_rate": 0.00044650075324248364,
      "loss": 3.047,
      "step": 77785
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0438899993896484,
      "learning_rate": 0.000446497183576159,
      "loss": 2.8977,
      "step": 77786
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8979547023773193,
      "learning_rate": 0.00044649361388259765,
      "loss": 3.0744,
      "step": 77787
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.823188304901123,
      "learning_rate": 0.0004464900441618004,
      "loss": 2.8976,
      "step": 77788
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.7814438343048096,
      "learning_rate": 0.00044648647441376793,
      "loss": 2.98,
      "step": 77789
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9678171873092651,
      "learning_rate": 0.0004464829046385008,
      "loss": 2.9074,
      "step": 77790
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4693247079849243,
      "learning_rate": 0.0004464793348359998,
      "loss": 3.0852,
      "step": 77791
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6479421854019165,
      "learning_rate": 0.00044647576500626547,
      "loss": 2.9844,
      "step": 77792
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8027756214141846,
      "learning_rate": 0.0004464721951492985,
      "loss": 2.9501,
      "step": 77793
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8301668167114258,
      "learning_rate": 0.0004464686252650997,
      "loss": 2.9337,
      "step": 77794
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.670783519744873,
      "learning_rate": 0.00044646505535366957,
      "loss": 2.9343,
      "step": 77795
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.4638545513153076,
      "learning_rate": 0.0004464614854150088,
      "loss": 2.9959,
      "step": 77796
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.4781248569488525,
      "learning_rate": 0.00044645791544911813,
      "loss": 2.8725,
      "step": 77797
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0939369201660156,
      "learning_rate": 0.00044645434545599805,
      "loss": 3.1699,
      "step": 77798
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.2081658840179443,
      "learning_rate": 0.00044645077543564943,
      "loss": 2.9396,
      "step": 77799
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.8535704612731934,
      "learning_rate": 0.0004464472053880728,
      "loss": 3.1839,
      "step": 77800
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6601051092147827,
      "learning_rate": 0.0004464436353132689,
      "loss": 2.976,
      "step": 77801
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.554042100906372,
      "learning_rate": 0.00044644006521123836,
      "loss": 2.9186,
      "step": 77802
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8115092515945435,
      "learning_rate": 0.00044643649508198185,
      "loss": 2.8018,
      "step": 77803
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.014063835144043,
      "learning_rate": 0.0004464329249255001,
      "loss": 3.1757,
      "step": 77804
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.59480881690979,
      "learning_rate": 0.00044642935474179353,
      "loss": 3.028,
      "step": 77805
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6674778461456299,
      "learning_rate": 0.0004464257845308632,
      "loss": 3.0073,
      "step": 77806
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7275843620300293,
      "learning_rate": 0.0004464222142927094,
      "loss": 2.9373,
      "step": 77807
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.1080217361450195,
      "learning_rate": 0.00044641864402733295,
      "loss": 2.9901,
      "step": 77808
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.873528003692627,
      "learning_rate": 0.0004464150737347346,
      "loss": 3.2652,
      "step": 77809
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6656169891357422,
      "learning_rate": 0.00044641150341491486,
      "loss": 2.8252,
      "step": 77810
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6582330465316772,
      "learning_rate": 0.00044640793306787444,
      "loss": 3.2498,
      "step": 77811
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6888720989227295,
      "learning_rate": 0.00044640436269361404,
      "loss": 2.9165,
      "step": 77812
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6199232339859009,
      "learning_rate": 0.0004464007922921343,
      "loss": 2.9232,
      "step": 77813
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7641602754592896,
      "learning_rate": 0.0004463972218634359,
      "loss": 3.0465,
      "step": 77814
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.572832465171814,
      "learning_rate": 0.00044639365140751946,
      "loss": 2.9067,
      "step": 77815
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.672551155090332,
      "learning_rate": 0.00044639008092438575,
      "loss": 3.114,
      "step": 77816
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6857821941375732,
      "learning_rate": 0.0004463865104140353,
      "loss": 2.9468,
      "step": 77817
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.802087426185608,
      "learning_rate": 0.0004463829398764688,
      "loss": 3.1625,
      "step": 77818
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.3086233139038086,
      "learning_rate": 0.00044637936931168703,
      "loss": 3.007,
      "step": 77819
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5819432735443115,
      "learning_rate": 0.0004463757987196906,
      "loss": 3.0306,
      "step": 77820
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.2461929321289062,
      "learning_rate": 0.0004463722281004801,
      "loss": 2.8257,
      "step": 77821
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6294758319854736,
      "learning_rate": 0.00044636865745405614,
      "loss": 3.1275,
      "step": 77822
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6645729541778564,
      "learning_rate": 0.00044636508678041966,
      "loss": 3.0707,
      "step": 77823
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7761015892028809,
      "learning_rate": 0.00044636151607957105,
      "loss": 3.0026,
      "step": 77824
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.957988977432251,
      "learning_rate": 0.00044635794535151103,
      "loss": 3.0487,
      "step": 77825
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7582520246505737,
      "learning_rate": 0.00044635437459624035,
      "loss": 3.0972,
      "step": 77826
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.3190250396728516,
      "learning_rate": 0.00044635080381375973,
      "loss": 2.9048,
      "step": 77827
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9318387508392334,
      "learning_rate": 0.0004463472330040696,
      "loss": 3.0963,
      "step": 77828
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4993101358413696,
      "learning_rate": 0.00044634366216717074,
      "loss": 2.9731,
      "step": 77829
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.2085063457489014,
      "learning_rate": 0.00044634009130306394,
      "loss": 2.9784,
      "step": 77830
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.905085802078247,
      "learning_rate": 0.0004463365204117497,
      "loss": 2.8523,
      "step": 77831
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5426768064498901,
      "learning_rate": 0.00044633294949322875,
      "loss": 2.9314,
      "step": 77832
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.554429054260254,
      "learning_rate": 0.00044632937854750175,
      "loss": 3.0828,
      "step": 77833
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.1117520332336426,
      "learning_rate": 0.00044632580757456936,
      "loss": 3.1528,
      "step": 77834
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.255890369415283,
      "learning_rate": 0.00044632223657443216,
      "loss": 2.9988,
      "step": 77835
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8238554000854492,
      "learning_rate": 0.00044631866554709093,
      "loss": 2.7642,
      "step": 77836
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.5681469440460205,
      "learning_rate": 0.0004463150944925464,
      "loss": 2.9081,
      "step": 77837
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.1170995235443115,
      "learning_rate": 0.000446311523410799,
      "loss": 3.1267,
      "step": 77838
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5461703538894653,
      "learning_rate": 0.0004463079523018496,
      "loss": 3.236,
      "step": 77839
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.896048903465271,
      "learning_rate": 0.0004463043811656988,
      "loss": 3.0722,
      "step": 77840
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.543128490447998,
      "learning_rate": 0.00044630081000234716,
      "loss": 2.8877,
      "step": 77841
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4886301755905151,
      "learning_rate": 0.0004462972388117955,
      "loss": 3.2044,
      "step": 77842
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6839313507080078,
      "learning_rate": 0.0004462936675940445,
      "loss": 3.2482,
      "step": 77843
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8755453824996948,
      "learning_rate": 0.0004462900963490946,
      "loss": 3.0087,
      "step": 77844
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5597057342529297,
      "learning_rate": 0.00044628652507694666,
      "loss": 3.0608,
      "step": 77845
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.144892454147339,
      "learning_rate": 0.00044628295377760143,
      "loss": 2.6951,
      "step": 77846
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.3478076457977295,
      "learning_rate": 0.00044627938245105927,
      "loss": 2.7729,
      "step": 77847
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7222775220870972,
      "learning_rate": 0.000446275811097321,
      "loss": 2.9806,
      "step": 77848
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6124391555786133,
      "learning_rate": 0.00044627223971638747,
      "loss": 3.1132,
      "step": 77849
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5484026670455933,
      "learning_rate": 0.00044626866830825904,
      "loss": 3.0557,
      "step": 77850
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.644011378288269,
      "learning_rate": 0.0004462650968729365,
      "loss": 3.0535,
      "step": 77851
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4960130453109741,
      "learning_rate": 0.00044626152541042056,
      "loss": 2.7116,
      "step": 77852
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8238426446914673,
      "learning_rate": 0.00044625795392071177,
      "loss": 3.3448,
      "step": 77853
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4910593032836914,
      "learning_rate": 0.00044625438240381097,
      "loss": 3.1433,
      "step": 77854
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.4647274017333984,
      "learning_rate": 0.0004462508108597186,
      "loss": 2.9698,
      "step": 77855
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.6030771732330322,
      "learning_rate": 0.00044624723928843557,
      "loss": 2.881,
      "step": 77856
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.3959565162658691,
      "learning_rate": 0.0004462436676899623,
      "loss": 3.1562,
      "step": 77857
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.735609531402588,
      "learning_rate": 0.0004462400960642997,
      "loss": 3.1684,
      "step": 77858
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.8536267280578613,
      "learning_rate": 0.0004462365244114482,
      "loss": 2.9736,
      "step": 77859
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4837589263916016,
      "learning_rate": 0.00044623295273140854,
      "loss": 3.1168,
      "step": 77860
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.657026767730713,
      "learning_rate": 0.0004462293810241816,
      "loss": 3.0618,
      "step": 77861
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.597932815551758,
      "learning_rate": 0.00044622580928976765,
      "loss": 3.2711,
      "step": 77862
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6460033655166626,
      "learning_rate": 0.0004462222375281677,
      "loss": 3.1727,
      "step": 77863
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9919339418411255,
      "learning_rate": 0.0004462186657393822,
      "loss": 2.9614,
      "step": 77864
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9460989236831665,
      "learning_rate": 0.00044621509392341193,
      "loss": 3.1246,
      "step": 77865
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4643540382385254,
      "learning_rate": 0.00044621152208025746,
      "loss": 3.1247,
      "step": 77866
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6153521537780762,
      "learning_rate": 0.0004462079502099196,
      "loss": 3.0955,
      "step": 77867
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.417386293411255,
      "learning_rate": 0.00044620437831239884,
      "loss": 2.8782,
      "step": 77868
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6780508756637573,
      "learning_rate": 0.00044620080638769593,
      "loss": 3.2003,
      "step": 77869
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6737314462661743,
      "learning_rate": 0.0004461972344358116,
      "loss": 3.0858,
      "step": 77870
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.553438663482666,
      "learning_rate": 0.0004461936624567463,
      "loss": 2.8981,
      "step": 77871
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.856937050819397,
      "learning_rate": 0.000446190090450501,
      "loss": 3.1529,
      "step": 77872
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.51698899269104,
      "learning_rate": 0.00044618651841707614,
      "loss": 3.1108,
      "step": 77873
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8250516653060913,
      "learning_rate": 0.00044618294635647245,
      "loss": 2.9821,
      "step": 77874
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.3879504203796387,
      "learning_rate": 0.00044617937426869053,
      "loss": 2.8751,
      "step": 77875
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8294752836227417,
      "learning_rate": 0.00044617580215373123,
      "loss": 3.0774,
      "step": 77876
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7804057598114014,
      "learning_rate": 0.00044617223001159505,
      "loss": 2.9764,
      "step": 77877
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5872942209243774,
      "learning_rate": 0.00044616865784228264,
      "loss": 2.7879,
      "step": 77878
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.440596342086792,
      "learning_rate": 0.00044616508564579476,
      "loss": 2.9495,
      "step": 77879
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.37888765335083,
      "learning_rate": 0.000446161513422132,
      "loss": 2.7729,
      "step": 77880
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4568023681640625,
      "learning_rate": 0.000446157941171295,
      "loss": 3.1134,
      "step": 77881
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.9862749576568604,
      "learning_rate": 0.00044615436889328466,
      "loss": 2.9607,
      "step": 77882
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.3382375240325928,
      "learning_rate": 0.0004461507965881013,
      "loss": 3.1424,
      "step": 77883
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.826910138130188,
      "learning_rate": 0.00044614722425574574,
      "loss": 2.8136,
      "step": 77884
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.806822657585144,
      "learning_rate": 0.00044614365189621884,
      "loss": 2.9467,
      "step": 77885
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.287062644958496,
      "learning_rate": 0.0004461400795095209,
      "loss": 2.8976,
      "step": 77886
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0157229900360107,
      "learning_rate": 0.00044613650709565277,
      "loss": 3.1927,
      "step": 77887
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6944241523742676,
      "learning_rate": 0.00044613293465461517,
      "loss": 3.0052,
      "step": 77888
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9309903383255005,
      "learning_rate": 0.00044612936218640866,
      "loss": 2.7003,
      "step": 77889
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4889715909957886,
      "learning_rate": 0.00044612578969103393,
      "loss": 3.1201,
      "step": 77890
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6909416913986206,
      "learning_rate": 0.0004461222171684917,
      "loss": 2.8995,
      "step": 77891
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6641300916671753,
      "learning_rate": 0.0004461186446187826,
      "loss": 2.9715,
      "step": 77892
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.828985333442688,
      "learning_rate": 0.00044611507204190724,
      "loss": 3.2364,
      "step": 77893
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8147283792495728,
      "learning_rate": 0.0004461114994378664,
      "loss": 3.2711,
      "step": 77894
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.3093512058258057,
      "learning_rate": 0.0004461079268066606,
      "loss": 3.1507,
      "step": 77895
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7391438484191895,
      "learning_rate": 0.00044610435414829056,
      "loss": 2.8824,
      "step": 77896
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.2459776401519775,
      "learning_rate": 0.0004461007814627571,
      "loss": 2.9919,
      "step": 77897
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6452194452285767,
      "learning_rate": 0.0004460972087500606,
      "loss": 3.013,
      "step": 77898
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.4390413761138916,
      "learning_rate": 0.0004460936360102019,
      "loss": 3.0612,
      "step": 77899
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5944836139678955,
      "learning_rate": 0.0004460900632431817,
      "loss": 3.1579,
      "step": 77900
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4253315925598145,
      "learning_rate": 0.0004460864904490006,
      "loss": 3.1684,
      "step": 77901
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5733633041381836,
      "learning_rate": 0.0004460829176276592,
      "loss": 3.1572,
      "step": 77902
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7419787645339966,
      "learning_rate": 0.00044607934477915824,
      "loss": 2.8645,
      "step": 77903
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9281132221221924,
      "learning_rate": 0.0004460757719034984,
      "loss": 3.0732,
      "step": 77904
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6665138006210327,
      "learning_rate": 0.0004460721990006803,
      "loss": 3.1609,
      "step": 77905
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.7614433765411377,
      "learning_rate": 0.0004460686260707046,
      "loss": 2.9617,
      "step": 77906
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.571442127227783,
      "learning_rate": 0.00044606505311357196,
      "loss": 3.0746,
      "step": 77907
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.174370288848877,
      "learning_rate": 0.00044606148012928316,
      "loss": 3.0842,
      "step": 77908
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.983397126197815,
      "learning_rate": 0.00044605790711783866,
      "loss": 2.7696,
      "step": 77909
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5256562232971191,
      "learning_rate": 0.0004460543340792393,
      "loss": 3.2486,
      "step": 77910
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9003170728683472,
      "learning_rate": 0.0004460507610134857,
      "loss": 3.0154,
      "step": 77911
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4846237897872925,
      "learning_rate": 0.00044604718792057855,
      "loss": 3.1958,
      "step": 77912
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4252841472625732,
      "learning_rate": 0.00044604361480051835,
      "loss": 2.8762,
      "step": 77913
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4382556676864624,
      "learning_rate": 0.000446040041653306,
      "loss": 3.0554,
      "step": 77914
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0177114009857178,
      "learning_rate": 0.000446036468478942,
      "loss": 2.9898,
      "step": 77915
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4886538982391357,
      "learning_rate": 0.0004460328952774271,
      "loss": 3.1232,
      "step": 77916
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.176703691482544,
      "learning_rate": 0.00044602932204876187,
      "loss": 3.1127,
      "step": 77917
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8146799802780151,
      "learning_rate": 0.00044602574879294704,
      "loss": 3.0382,
      "step": 77918
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.662057638168335,
      "learning_rate": 0.0004460221755099833,
      "loss": 3.05,
      "step": 77919
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.2335283756256104,
      "learning_rate": 0.0004460186021998712,
      "loss": 2.9244,
      "step": 77920
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.153170347213745,
      "learning_rate": 0.0004460150288626116,
      "loss": 2.901,
      "step": 77921
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6401467323303223,
      "learning_rate": 0.00044601145549820503,
      "loss": 2.9403,
      "step": 77922
    },
    {
      "epoch": 1.01,
      "grad_norm": 4.052353382110596,
      "learning_rate": 0.0004460078821066521,
      "loss": 3.0891,
      "step": 77923
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.659353494644165,
      "learning_rate": 0.00044600430868795357,
      "loss": 3.1651,
      "step": 77924
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.859948992729187,
      "learning_rate": 0.00044600073524211014,
      "loss": 2.8564,
      "step": 77925
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.228850841522217,
      "learning_rate": 0.00044599716176912235,
      "loss": 2.9266,
      "step": 77926
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.4972589015960693,
      "learning_rate": 0.0004459935882689909,
      "loss": 3.23,
      "step": 77927
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.0866403579711914,
      "learning_rate": 0.00044599001474171664,
      "loss": 2.8802,
      "step": 77928
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.0772006511688232,
      "learning_rate": 0.0004459864411873,
      "loss": 3.0047,
      "step": 77929
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.9158958196640015,
      "learning_rate": 0.0004459828676057417,
      "loss": 3.0585,
      "step": 77930
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.636136531829834,
      "learning_rate": 0.0004459792939970425,
      "loss": 2.6662,
      "step": 77931
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.562415361404419,
      "learning_rate": 0.0004459757203612029,
      "loss": 2.8357,
      "step": 77932
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.40111243724823,
      "learning_rate": 0.00044597214669822366,
      "loss": 2.9882,
      "step": 77933
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.4477561712265015,
      "learning_rate": 0.0004459685730081056,
      "loss": 2.9521,
      "step": 77934
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5753470659255981,
      "learning_rate": 0.00044596499929084904,
      "loss": 3.1332,
      "step": 77935
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.2242071628570557,
      "learning_rate": 0.0004459614255464549,
      "loss": 3.0149,
      "step": 77936
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.8101274967193604,
      "learning_rate": 0.0004459578517749238,
      "loss": 3.0356,
      "step": 77937
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7027850151062012,
      "learning_rate": 0.0004459542779762563,
      "loss": 3.0511,
      "step": 77938
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.7439539432525635,
      "learning_rate": 0.0004459507041504532,
      "loss": 3.3579,
      "step": 77939
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.050819158554077,
      "learning_rate": 0.00044594713029751516,
      "loss": 2.9423,
      "step": 77940
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.539902687072754,
      "learning_rate": 0.0004459435564174427,
      "loss": 2.845,
      "step": 77941
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5421209335327148,
      "learning_rate": 0.00044593998251023665,
      "loss": 3.0546,
      "step": 77942
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.116698741912842,
      "learning_rate": 0.0004459364085758976,
      "loss": 3.0663,
      "step": 77943
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.794398546218872,
      "learning_rate": 0.0004459328346144262,
      "loss": 2.8666,
      "step": 77944
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5801191329956055,
      "learning_rate": 0.0004459292606258231,
      "loss": 3.0292,
      "step": 77945
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.287163019180298,
      "learning_rate": 0.00044592568661008905,
      "loss": 2.8172,
      "step": 77946
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.034834623336792,
      "learning_rate": 0.00044592211256722455,
      "loss": 3.1266,
      "step": 77947
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6099039316177368,
      "learning_rate": 0.0004459185384972305,
      "loss": 2.9994,
      "step": 77948
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6091125011444092,
      "learning_rate": 0.00044591496440010735,
      "loss": 3.2081,
      "step": 77949
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.6073628664016724,
      "learning_rate": 0.0004459113902758559,
      "loss": 3.0277,
      "step": 77950
    },
    {
      "epoch": 1.01,
      "grad_norm": 2.065290927886963,
      "learning_rate": 0.0004459078161244767,
      "loss": 2.9663,
      "step": 77951
    },
    {
      "epoch": 1.01,
      "grad_norm": 1.5523114204406738,
      "learning_rate": 0.00044590424194597064,
      "loss": 3.1702,
      "step": 77952
    },
    {
      "epoch": 1.01,
      "grad_norm": 3.083648443222046,
      "learning_rate": 0.0004459006677403381,
      "loss": 2.6474,
      "step": 77953
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.4546937942504883,
      "learning_rate": 0.0004458970935075799,
      "loss": 2.8171,
      "step": 77954
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9023367166519165,
      "learning_rate": 0.00044589351924769675,
      "loss": 2.9912,
      "step": 77955
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.409388542175293,
      "learning_rate": 0.0004458899449606891,
      "loss": 2.827,
      "step": 77956
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1079659461975098,
      "learning_rate": 0.0004458863706465578,
      "loss": 3.017,
      "step": 77957
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.462477207183838,
      "learning_rate": 0.0004458827963053036,
      "loss": 2.9879,
      "step": 77958
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.853023886680603,
      "learning_rate": 0.00044587922193692684,
      "loss": 2.9181,
      "step": 77959
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.616027593612671,
      "learning_rate": 0.00044587564754142845,
      "loss": 3.1413,
      "step": 77960
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.7545411586761475,
      "learning_rate": 0.0004458720731188091,
      "loss": 3.0238,
      "step": 77961
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.941620945930481,
      "learning_rate": 0.0004458684986690693,
      "loss": 3.111,
      "step": 77962
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7536240816116333,
      "learning_rate": 0.0004458649241922098,
      "loss": 3.1085,
      "step": 77963
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0000970363616943,
      "learning_rate": 0.0004458613496882313,
      "loss": 3.151,
      "step": 77964
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4867737293243408,
      "learning_rate": 0.0004458577751571344,
      "loss": 3.1678,
      "step": 77965
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.495314121246338,
      "learning_rate": 0.0004458542005989198,
      "loss": 2.8751,
      "step": 77966
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.089778423309326,
      "learning_rate": 0.0004458506260135881,
      "loss": 2.9765,
      "step": 77967
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.334106683731079,
      "learning_rate": 0.0004458470514011401,
      "loss": 3.2491,
      "step": 77968
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8185514211654663,
      "learning_rate": 0.00044584347676157626,
      "loss": 3.0407,
      "step": 77969
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5317261219024658,
      "learning_rate": 0.00044583990209489744,
      "loss": 2.9253,
      "step": 77970
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.3438503742218018,
      "learning_rate": 0.00044583632740110424,
      "loss": 2.8693,
      "step": 77971
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7882461547851562,
      "learning_rate": 0.00044583275268019726,
      "loss": 3.0191,
      "step": 77972
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4337517023086548,
      "learning_rate": 0.00044582917793217726,
      "loss": 2.9139,
      "step": 77973
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8825092315673828,
      "learning_rate": 0.0004458256031570449,
      "loss": 3.0595,
      "step": 77974
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7146618366241455,
      "learning_rate": 0.00044582202835480073,
      "loss": 2.8947,
      "step": 77975
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7148363590240479,
      "learning_rate": 0.00044581845352544545,
      "loss": 3.1422,
      "step": 77976
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0948543548583984,
      "learning_rate": 0.00044581487866897997,
      "loss": 2.7392,
      "step": 77977
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9751756191253662,
      "learning_rate": 0.0004458113037854046,
      "loss": 3.1868,
      "step": 77978
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7627923488616943,
      "learning_rate": 0.0004458077288747201,
      "loss": 3.2556,
      "step": 77979
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.329777956008911,
      "learning_rate": 0.00044580415393692734,
      "loss": 2.8837,
      "step": 77980
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.290245532989502,
      "learning_rate": 0.00044580057897202673,
      "loss": 3.333,
      "step": 77981
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.2357029914855957,
      "learning_rate": 0.0004457970039800191,
      "loss": 3.066,
      "step": 77982
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.097348690032959,
      "learning_rate": 0.00044579342896090504,
      "loss": 2.9748,
      "step": 77983
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.593214511871338,
      "learning_rate": 0.0004457898539146852,
      "loss": 3.0111,
      "step": 77984
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9014652967453003,
      "learning_rate": 0.00044578627884136026,
      "loss": 3.0433,
      "step": 77985
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4396672248840332,
      "learning_rate": 0.000445782703740931,
      "loss": 2.7983,
      "step": 77986
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4806101322174072,
      "learning_rate": 0.00044577912861339786,
      "loss": 3.1503,
      "step": 77987
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7598652839660645,
      "learning_rate": 0.00044577555345876176,
      "loss": 3.1314,
      "step": 77988
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.3490777015686035,
      "learning_rate": 0.00044577197827702316,
      "loss": 3.0114,
      "step": 77989
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8689117431640625,
      "learning_rate": 0.00044576840306818276,
      "loss": 3.0691,
      "step": 77990
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5427806377410889,
      "learning_rate": 0.00044576482783224126,
      "loss": 3.0839,
      "step": 77991
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7283118963241577,
      "learning_rate": 0.00044576125256919936,
      "loss": 3.0458,
      "step": 77992
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6597222089767456,
      "learning_rate": 0.0004457576772790578,
      "loss": 2.9256,
      "step": 77993
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1375861167907715,
      "learning_rate": 0.000445754101961817,
      "loss": 2.9401,
      "step": 77994
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0528485774993896,
      "learning_rate": 0.0004457505266174777,
      "loss": 3.0836,
      "step": 77995
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8152862787246704,
      "learning_rate": 0.0004457469512460408,
      "loss": 3.187,
      "step": 77996
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.081815004348755,
      "learning_rate": 0.0004457433758475067,
      "loss": 2.9997,
      "step": 77997
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.4697744846343994,
      "learning_rate": 0.00044573980042187614,
      "loss": 2.7924,
      "step": 77998
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7855924367904663,
      "learning_rate": 0.0004457362249691498,
      "loss": 3.0331,
      "step": 77999
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.013963460922241,
      "learning_rate": 0.00044573264948932843,
      "loss": 2.8349,
      "step": 78000
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.068906307220459,
      "learning_rate": 0.00044572907398241255,
      "loss": 3.1715,
      "step": 78001
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6033375263214111,
      "learning_rate": 0.00044572549844840284,
      "loss": 2.8682,
      "step": 78002
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.140280246734619,
      "learning_rate": 0.0004457219228873001,
      "loss": 2.9549,
      "step": 78003
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6689475774765015,
      "learning_rate": 0.00044571834729910484,
      "loss": 2.9786,
      "step": 78004
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7303811311721802,
      "learning_rate": 0.00044571477168381775,
      "loss": 3.0073,
      "step": 78005
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.3691099882125854,
      "learning_rate": 0.0004457111960414396,
      "loss": 3.1085,
      "step": 78006
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5167829990386963,
      "learning_rate": 0.000445707620371971,
      "loss": 3.1288,
      "step": 78007
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8300468921661377,
      "learning_rate": 0.00044570404467541257,
      "loss": 3.0597,
      "step": 78008
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6174970865249634,
      "learning_rate": 0.000445700468951765,
      "loss": 2.952,
      "step": 78009
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4785144329071045,
      "learning_rate": 0.000445696893201029,
      "loss": 2.7947,
      "step": 78010
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.13643479347229,
      "learning_rate": 0.00044569331742320517,
      "loss": 2.9608,
      "step": 78011
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1849029064178467,
      "learning_rate": 0.0004456897416182942,
      "loss": 2.8778,
      "step": 78012
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9702646732330322,
      "learning_rate": 0.00044568616578629684,
      "loss": 3.394,
      "step": 78013
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8416916131973267,
      "learning_rate": 0.0004456825899272135,
      "loss": 2.9474,
      "step": 78014
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.462386131286621,
      "learning_rate": 0.00044567901404104517,
      "loss": 3.0198,
      "step": 78015
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7419990301132202,
      "learning_rate": 0.0004456754381277923,
      "loss": 2.8153,
      "step": 78016
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8807388544082642,
      "learning_rate": 0.0004456718621874557,
      "loss": 2.8884,
      "step": 78017
    },
    {
      "epoch": 1.02,
      "grad_norm": 4.469662189483643,
      "learning_rate": 0.00044566828622003577,
      "loss": 3.2721,
      "step": 78018
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6240181922912598,
      "learning_rate": 0.0004456647102255335,
      "loss": 3.101,
      "step": 78019
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8698636293411255,
      "learning_rate": 0.0004456611342039494,
      "loss": 3.1929,
      "step": 78020
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0938656330108643,
      "learning_rate": 0.00044565755815528406,
      "loss": 3.1178,
      "step": 78021
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.363535165786743,
      "learning_rate": 0.00044565398207953836,
      "loss": 2.9148,
      "step": 78022
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1464648246765137,
      "learning_rate": 0.00044565040597671277,
      "loss": 3.0827,
      "step": 78023
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4991557598114014,
      "learning_rate": 0.000445646829846808,
      "loss": 3.0139,
      "step": 78024
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5186446905136108,
      "learning_rate": 0.00044564325368982475,
      "loss": 2.7769,
      "step": 78025
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9247548580169678,
      "learning_rate": 0.0004456396775057637,
      "loss": 2.9189,
      "step": 78026
    },
    {
      "epoch": 1.02,
      "grad_norm": 4.131593704223633,
      "learning_rate": 0.00044563610129462537,
      "loss": 3.0281,
      "step": 78027
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8891854286193848,
      "learning_rate": 0.0004456325250564107,
      "loss": 3.1013,
      "step": 78028
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4734513759613037,
      "learning_rate": 0.00044562894879112005,
      "loss": 3.1219,
      "step": 78029
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5935404300689697,
      "learning_rate": 0.00044562537249875434,
      "loss": 3.115,
      "step": 78030
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.3220486640930176,
      "learning_rate": 0.00044562179617931404,
      "loss": 3.2212,
      "step": 78031
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.006742000579834,
      "learning_rate": 0.0004456182198327999,
      "loss": 3.0174,
      "step": 78032
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4202184677124023,
      "learning_rate": 0.00044561464345921266,
      "loss": 3.1599,
      "step": 78033
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7315367460250854,
      "learning_rate": 0.0004456110670585529,
      "loss": 3.1317,
      "step": 78034
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.5658130645751953,
      "learning_rate": 0.0004456074906308213,
      "loss": 2.9749,
      "step": 78035
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6461081504821777,
      "learning_rate": 0.0004456039141760184,
      "loss": 2.7769,
      "step": 78036
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9203978776931763,
      "learning_rate": 0.0004456003376941451,
      "loss": 3.0582,
      "step": 78037
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.055023193359375,
      "learning_rate": 0.000445596761185202,
      "loss": 3.0162,
      "step": 78038
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9225399494171143,
      "learning_rate": 0.00044559318464918953,
      "loss": 3.0484,
      "step": 78039
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.3899587392807007,
      "learning_rate": 0.0004455896080861086,
      "loss": 3.0259,
      "step": 78040
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.6528143882751465,
      "learning_rate": 0.0004455860314959599,
      "loss": 2.9483,
      "step": 78041
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.823831558227539,
      "learning_rate": 0.000445582454878744,
      "loss": 3.2176,
      "step": 78042
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6186274290084839,
      "learning_rate": 0.00044557887823446147,
      "loss": 2.9085,
      "step": 78043
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6213979721069336,
      "learning_rate": 0.0004455753015631132,
      "loss": 3.0275,
      "step": 78044
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.5264463424682617,
      "learning_rate": 0.00044557172486469964,
      "loss": 3.0681,
      "step": 78045
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.792951226234436,
      "learning_rate": 0.00044556814813922157,
      "loss": 2.9584,
      "step": 78046
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6739047765731812,
      "learning_rate": 0.0004455645713866797,
      "loss": 3.0943,
      "step": 78047
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.822473406791687,
      "learning_rate": 0.0004455609946070746,
      "loss": 3.0126,
      "step": 78048
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.225785255432129,
      "learning_rate": 0.0004455574178004069,
      "loss": 3.0482,
      "step": 78049
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8646793365478516,
      "learning_rate": 0.00044555384096667743,
      "loss": 2.9897,
      "step": 78050
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6343159675598145,
      "learning_rate": 0.0004455502641058867,
      "loss": 3.2547,
      "step": 78051
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9147371053695679,
      "learning_rate": 0.00044554668721803536,
      "loss": 2.9575,
      "step": 78052
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.308671712875366,
      "learning_rate": 0.00044554311030312423,
      "loss": 2.9939,
      "step": 78053
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.710797667503357,
      "learning_rate": 0.0004455395333611539,
      "loss": 2.9942,
      "step": 78054
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.2230706214904785,
      "learning_rate": 0.00044553595639212494,
      "loss": 3.1355,
      "step": 78055
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.043783187866211,
      "learning_rate": 0.0004455323793960382,
      "loss": 3.056,
      "step": 78056
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8564225435256958,
      "learning_rate": 0.00044552880237289414,
      "loss": 3.0943,
      "step": 78057
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6483981609344482,
      "learning_rate": 0.0004455252253226936,
      "loss": 2.9091,
      "step": 78058
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9443227052688599,
      "learning_rate": 0.00044552164824543714,
      "loss": 2.9683,
      "step": 78059
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6719478368759155,
      "learning_rate": 0.0004455180711411255,
      "loss": 2.8951,
      "step": 78060
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9981485605239868,
      "learning_rate": 0.00044551449400975925,
      "loss": 3.0413,
      "step": 78061
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.924820899963379,
      "learning_rate": 0.0004455109168513392,
      "loss": 3.0192,
      "step": 78062
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.932779312133789,
      "learning_rate": 0.0004455073396658658,
      "loss": 3.2057,
      "step": 78063
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8070707321166992,
      "learning_rate": 0.00044550376245333986,
      "loss": 3.2357,
      "step": 78064
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6908366680145264,
      "learning_rate": 0.0004455001852137621,
      "loss": 3.2787,
      "step": 78065
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1279869079589844,
      "learning_rate": 0.00044549660794713304,
      "loss": 2.6663,
      "step": 78066
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.794046640396118,
      "learning_rate": 0.00044549303065345345,
      "loss": 3.0516,
      "step": 78067
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5650898218154907,
      "learning_rate": 0.00044548945333272404,
      "loss": 2.894,
      "step": 78068
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0753440856933594,
      "learning_rate": 0.0004454858759849452,
      "loss": 2.8879,
      "step": 78069
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1311838626861572,
      "learning_rate": 0.0004454822986101179,
      "loss": 3.0384,
      "step": 78070
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4246227741241455,
      "learning_rate": 0.0004454787212082428,
      "loss": 2.9518,
      "step": 78071
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5059258937835693,
      "learning_rate": 0.0004454751437793203,
      "loss": 3.0162,
      "step": 78072
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.557344675064087,
      "learning_rate": 0.0004454715663233512,
      "loss": 3.0319,
      "step": 78073
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.31558895111084,
      "learning_rate": 0.00044546798884033634,
      "loss": 3.2228,
      "step": 78074
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.051915168762207,
      "learning_rate": 0.0004454644113302761,
      "loss": 2.858,
      "step": 78075
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7892600297927856,
      "learning_rate": 0.00044546083379317134,
      "loss": 3.1114,
      "step": 78076
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6042237281799316,
      "learning_rate": 0.0004454572562290227,
      "loss": 3.0275,
      "step": 78077
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5880473852157593,
      "learning_rate": 0.0004454536786378307,
      "loss": 2.9487,
      "step": 78078
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4283654689788818,
      "learning_rate": 0.0004454501010195963,
      "loss": 3.0931,
      "step": 78079
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.616190791130066,
      "learning_rate": 0.0004454465233743198,
      "loss": 3.2933,
      "step": 78080
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.72697913646698,
      "learning_rate": 0.00044544294570200216,
      "loss": 3.0504,
      "step": 78081
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.145256280899048,
      "learning_rate": 0.0004454393680026439,
      "loss": 2.7276,
      "step": 78082
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.772963285446167,
      "learning_rate": 0.0004454357902762457,
      "loss": 3.0183,
      "step": 78083
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6703699827194214,
      "learning_rate": 0.0004454322125228082,
      "loss": 3.1611,
      "step": 78084
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5752642154693604,
      "learning_rate": 0.00044542863474233215,
      "loss": 3.1113,
      "step": 78085
    },
    {
      "epoch": 1.02,
      "grad_norm": 6.452057361602783,
      "learning_rate": 0.00044542505693481813,
      "loss": 3.2705,
      "step": 78086
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4664759635925293,
      "learning_rate": 0.00044542147910026694,
      "loss": 2.8587,
      "step": 78087
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6276906728744507,
      "learning_rate": 0.00044541790123867915,
      "loss": 3.146,
      "step": 78088
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.946794867515564,
      "learning_rate": 0.0004454143233500554,
      "loss": 2.9709,
      "step": 78089
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7554998397827148,
      "learning_rate": 0.00044541074543439626,
      "loss": 2.9262,
      "step": 78090
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7492575645446777,
      "learning_rate": 0.0004454071674917026,
      "loss": 3.0177,
      "step": 78091
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6045509576797485,
      "learning_rate": 0.00044540358952197505,
      "loss": 3.0461,
      "step": 78092
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5321669578552246,
      "learning_rate": 0.0004454000115252142,
      "loss": 3.0249,
      "step": 78093
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.219019889831543,
      "learning_rate": 0.0004453964335014207,
      "loss": 3.2497,
      "step": 78094
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.838754653930664,
      "learning_rate": 0.00044539285545059536,
      "loss": 2.9792,
      "step": 78095
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6901997327804565,
      "learning_rate": 0.0004453892773727387,
      "loss": 3.1203,
      "step": 78096
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.951228141784668,
      "learning_rate": 0.0004453856992678514,
      "loss": 2.8864,
      "step": 78097
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9277623891830444,
      "learning_rate": 0.00044538212113593415,
      "loss": 2.9244,
      "step": 78098
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.62524676322937,
      "learning_rate": 0.0004453785429769877,
      "loss": 2.8822,
      "step": 78099
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.339434862136841,
      "learning_rate": 0.0004453749647910125,
      "loss": 3.0817,
      "step": 78100
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1694324016571045,
      "learning_rate": 0.0004453713865780095,
      "loss": 3.1233,
      "step": 78101
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.6789445877075195,
      "learning_rate": 0.0004453678083379791,
      "loss": 3.1144,
      "step": 78102
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.883739709854126,
      "learning_rate": 0.0004453642300709221,
      "loss": 2.9772,
      "step": 78103
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4634430408477783,
      "learning_rate": 0.00044536065177683916,
      "loss": 2.9143,
      "step": 78104
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.847618818283081,
      "learning_rate": 0.00044535707345573093,
      "loss": 3.0609,
      "step": 78105
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.445101022720337,
      "learning_rate": 0.0004453534951075981,
      "loss": 3.0704,
      "step": 78106
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7589311599731445,
      "learning_rate": 0.0004453499167324413,
      "loss": 3.2444,
      "step": 78107
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5471434593200684,
      "learning_rate": 0.00044534633833026127,
      "loss": 2.95,
      "step": 78108
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.2997982501983643,
      "learning_rate": 0.0004453427599010584,
      "loss": 2.9958,
      "step": 78109
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6805057525634766,
      "learning_rate": 0.00044533918144483377,
      "loss": 3.1886,
      "step": 78110
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.621537685394287,
      "learning_rate": 0.00044533560296158777,
      "loss": 3.1325,
      "step": 78111
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8169660568237305,
      "learning_rate": 0.0004453320244513212,
      "loss": 3.0661,
      "step": 78112
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.555814504623413,
      "learning_rate": 0.0004453284459140346,
      "loss": 2.9947,
      "step": 78113
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8586504459381104,
      "learning_rate": 0.0004453248673497287,
      "loss": 3.0832,
      "step": 78114
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6267825365066528,
      "learning_rate": 0.0004453212887584042,
      "loss": 2.952,
      "step": 78115
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7180243730545044,
      "learning_rate": 0.0004453177101400617,
      "loss": 3.2012,
      "step": 78116
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.720655918121338,
      "learning_rate": 0.0004453141314947019,
      "loss": 2.9894,
      "step": 78117
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6010355949401855,
      "learning_rate": 0.00044531055282232544,
      "loss": 3.3021,
      "step": 78118
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8824175596237183,
      "learning_rate": 0.00044530697412293295,
      "loss": 2.8245,
      "step": 78119
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.248344659805298,
      "learning_rate": 0.00044530339539652534,
      "loss": 3.178,
      "step": 78120
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1089096069335938,
      "learning_rate": 0.00044529981664310293,
      "loss": 3.2514,
      "step": 78121
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.240732192993164,
      "learning_rate": 0.00044529623786266655,
      "loss": 3.1245,
      "step": 78122
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9599307775497437,
      "learning_rate": 0.00044529265905521694,
      "loss": 3.2141,
      "step": 78123
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7592328786849976,
      "learning_rate": 0.0004452890802207546,
      "loss": 3.0555,
      "step": 78124
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7589646577835083,
      "learning_rate": 0.00044528550135928033,
      "loss": 2.8566,
      "step": 78125
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9086021184921265,
      "learning_rate": 0.00044528192247079475,
      "loss": 2.8719,
      "step": 78126
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.947009563446045,
      "learning_rate": 0.0004452783435552985,
      "loss": 3.0151,
      "step": 78127
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.3922134637832642,
      "learning_rate": 0.0004452747646127922,
      "loss": 3.148,
      "step": 78128
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1992475986480713,
      "learning_rate": 0.00044527118564327664,
      "loss": 2.933,
      "step": 78129
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6950087547302246,
      "learning_rate": 0.0004452676066467524,
      "loss": 3.0011,
      "step": 78130
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5181881189346313,
      "learning_rate": 0.00044526402762322024,
      "loss": 2.8794,
      "step": 78131
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5647497177124023,
      "learning_rate": 0.0004452604485726807,
      "loss": 3.0444,
      "step": 78132
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7249820232391357,
      "learning_rate": 0.00044525686949513447,
      "loss": 3.0523,
      "step": 78133
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.4403789043426514,
      "learning_rate": 0.0004452532903905823,
      "loss": 3.1112,
      "step": 78134
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6377718448638916,
      "learning_rate": 0.00044524971125902476,
      "loss": 3.1807,
      "step": 78135
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.3252089023590088,
      "learning_rate": 0.00044524613210046257,
      "loss": 2.9481,
      "step": 78136
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1885440349578857,
      "learning_rate": 0.00044524255291489637,
      "loss": 2.8893,
      "step": 78137
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5166914463043213,
      "learning_rate": 0.0004452389737023269,
      "loss": 2.9303,
      "step": 78138
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0414814949035645,
      "learning_rate": 0.0004452353944627547,
      "loss": 3.1643,
      "step": 78139
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1829473972320557,
      "learning_rate": 0.0004452318151961805,
      "loss": 2.998,
      "step": 78140
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0892350673675537,
      "learning_rate": 0.00044522823590260505,
      "loss": 2.9565,
      "step": 78141
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.800527572631836,
      "learning_rate": 0.00044522465658202885,
      "loss": 3.2693,
      "step": 78142
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4375375509262085,
      "learning_rate": 0.0004452210772344526,
      "loss": 3.1329,
      "step": 78143
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.403786301612854,
      "learning_rate": 0.0004452174978598771,
      "loss": 3.0439,
      "step": 78144
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5975526571273804,
      "learning_rate": 0.0004452139184583029,
      "loss": 3.2685,
      "step": 78145
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.49764084815979,
      "learning_rate": 0.00044521033902973064,
      "loss": 3.1718,
      "step": 78146
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6028121709823608,
      "learning_rate": 0.0004452067595741612,
      "loss": 2.8426,
      "step": 78147
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6251331567764282,
      "learning_rate": 0.0004452031800915949,
      "loss": 2.9202,
      "step": 78148
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7579482793807983,
      "learning_rate": 0.0004451996005820326,
      "loss": 2.8968,
      "step": 78149
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.2657382488250732,
      "learning_rate": 0.00044519602104547507,
      "loss": 2.7366,
      "step": 78150
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7363499402999878,
      "learning_rate": 0.0004451924414819228,
      "loss": 2.9463,
      "step": 78151
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5468525886535645,
      "learning_rate": 0.0004451888618913765,
      "loss": 2.8492,
      "step": 78152
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9968485832214355,
      "learning_rate": 0.0004451852822738368,
      "loss": 2.8964,
      "step": 78153
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4722744226455688,
      "learning_rate": 0.0004451817026293045,
      "loss": 3.0956,
      "step": 78154
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1756038665771484,
      "learning_rate": 0.00044517812295778015,
      "loss": 2.8639,
      "step": 78155
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.3982340097427368,
      "learning_rate": 0.0004451745432592645,
      "loss": 3.0648,
      "step": 78156
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7976908683776855,
      "learning_rate": 0.0004451709635337581,
      "loss": 3.0994,
      "step": 78157
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9156432151794434,
      "learning_rate": 0.0004451673837812617,
      "loss": 3.3332,
      "step": 78158
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.3993180990219116,
      "learning_rate": 0.00044516380400177596,
      "loss": 3.2894,
      "step": 78159
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5672248601913452,
      "learning_rate": 0.0004451602241953015,
      "loss": 3.115,
      "step": 78160
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5579358339309692,
      "learning_rate": 0.00044515664436183894,
      "loss": 2.9624,
      "step": 78161
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4929139614105225,
      "learning_rate": 0.00044515306450138915,
      "loss": 2.8024,
      "step": 78162
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6286340951919556,
      "learning_rate": 0.0004451494846139526,
      "loss": 3.1493,
      "step": 78163
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7479841709136963,
      "learning_rate": 0.00044514590469953,
      "loss": 3.101,
      "step": 78164
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.763920545578003,
      "learning_rate": 0.00044514232475812213,
      "loss": 3.0168,
      "step": 78165
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5523988008499146,
      "learning_rate": 0.0004451387447897295,
      "loss": 3.0848,
      "step": 78166
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9555562734603882,
      "learning_rate": 0.00044513516479435285,
      "loss": 2.9032,
      "step": 78167
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.4077305793762207,
      "learning_rate": 0.00044513158477199274,
      "loss": 2.802,
      "step": 78168
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6820827722549438,
      "learning_rate": 0.0004451280047226501,
      "loss": 2.8247,
      "step": 78169
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7209851741790771,
      "learning_rate": 0.0004451244246463253,
      "loss": 3.156,
      "step": 78170
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.0030322074890137,
      "learning_rate": 0.00044512084454301914,
      "loss": 3.1081,
      "step": 78171
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0034544467926025,
      "learning_rate": 0.0004451172644127323,
      "loss": 3.0253,
      "step": 78172
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7548850774765015,
      "learning_rate": 0.0004451136842554654,
      "loss": 3.2513,
      "step": 78173
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.566787838935852,
      "learning_rate": 0.0004451101040712191,
      "loss": 2.8506,
      "step": 78174
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.540919542312622,
      "learning_rate": 0.0004451065238599942,
      "loss": 3.0688,
      "step": 78175
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.636833667755127,
      "learning_rate": 0.00044510294362179123,
      "loss": 3.0431,
      "step": 78176
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6567401885986328,
      "learning_rate": 0.0004450993633566108,
      "loss": 3.1445,
      "step": 78177
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7823766469955444,
      "learning_rate": 0.00044509578306445373,
      "loss": 2.9572,
      "step": 78178
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4318302869796753,
      "learning_rate": 0.0004450922027453206,
      "loss": 2.96,
      "step": 78179
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.114675998687744,
      "learning_rate": 0.00044508862239921214,
      "loss": 2.9579,
      "step": 78180
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.3712329864501953,
      "learning_rate": 0.00044508504202612885,
      "loss": 2.8579,
      "step": 78181
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5816291570663452,
      "learning_rate": 0.00044508146162607163,
      "loss": 3.2222,
      "step": 78182
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7165194749832153,
      "learning_rate": 0.00044507788119904087,
      "loss": 2.8455,
      "step": 78183
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.021392822265625,
      "learning_rate": 0.0004450743007450376,
      "loss": 3.1373,
      "step": 78184
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7818019390106201,
      "learning_rate": 0.00044507072026406214,
      "loss": 3.0837,
      "step": 78185
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9383057355880737,
      "learning_rate": 0.0004450671397561153,
      "loss": 3.0169,
      "step": 78186
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7278157472610474,
      "learning_rate": 0.0004450635592211978,
      "loss": 2.8064,
      "step": 78187
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.064258575439453,
      "learning_rate": 0.0004450599786593102,
      "loss": 2.8765,
      "step": 78188
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.3224425315856934,
      "learning_rate": 0.0004450563980704532,
      "loss": 2.884,
      "step": 78189
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8588298559188843,
      "learning_rate": 0.00044505281745462757,
      "loss": 2.7418,
      "step": 78190
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.90196692943573,
      "learning_rate": 0.0004450492368118338,
      "loss": 3.2399,
      "step": 78191
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.091811180114746,
      "learning_rate": 0.0004450456561420727,
      "loss": 2.9247,
      "step": 78192
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5297951698303223,
      "learning_rate": 0.00044504207544534484,
      "loss": 3.0283,
      "step": 78193
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.958528757095337,
      "learning_rate": 0.0004450384947216509,
      "loss": 3.3574,
      "step": 78194
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7860149145126343,
      "learning_rate": 0.0004450349139709916,
      "loss": 3.1028,
      "step": 78195
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0892045497894287,
      "learning_rate": 0.00044503133319336757,
      "loss": 3.1249,
      "step": 78196
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8663591146469116,
      "learning_rate": 0.00044502775238877945,
      "loss": 3.0575,
      "step": 78197
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5072470903396606,
      "learning_rate": 0.00044502417155722787,
      "loss": 2.879,
      "step": 78198
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.161940813064575,
      "learning_rate": 0.00044502059069871365,
      "loss": 3.1127,
      "step": 78199
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.531185269355774,
      "learning_rate": 0.0004450170098132374,
      "loss": 2.9886,
      "step": 78200
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.494119644165039,
      "learning_rate": 0.0004450134289007996,
      "loss": 2.9826,
      "step": 78201
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.2368013858795166,
      "learning_rate": 0.00044500984796140124,
      "loss": 3.2002,
      "step": 78202
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.169382095336914,
      "learning_rate": 0.00044500626699504273,
      "loss": 2.8671,
      "step": 78203
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.64999520778656,
      "learning_rate": 0.00044500268600172483,
      "loss": 3.1548,
      "step": 78204
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.2040467262268066,
      "learning_rate": 0.0004449991049814482,
      "loss": 2.8828,
      "step": 78205
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4148705005645752,
      "learning_rate": 0.0004449955239342134,
      "loss": 3.0356,
      "step": 78206
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.6151490211486816,
      "learning_rate": 0.00044499194286002134,
      "loss": 3.006,
      "step": 78207
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6491751670837402,
      "learning_rate": 0.0004449883617588725,
      "loss": 3.1573,
      "step": 78208
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.851386547088623,
      "learning_rate": 0.0004449847806307676,
      "loss": 3.0172,
      "step": 78209
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.47459876537323,
      "learning_rate": 0.0004449811994757072,
      "loss": 3.0269,
      "step": 78210
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.777296543121338,
      "learning_rate": 0.0004449776182936921,
      "loss": 2.9728,
      "step": 78211
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5017383098602295,
      "learning_rate": 0.000444974037084723,
      "loss": 3.2128,
      "step": 78212
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7155681848526,
      "learning_rate": 0.00044497045584880043,
      "loss": 3.1316,
      "step": 78213
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.433054208755493,
      "learning_rate": 0.00044496687458592515,
      "loss": 3.1901,
      "step": 78214
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8051388263702393,
      "learning_rate": 0.00044496329329609774,
      "loss": 3.0264,
      "step": 78215
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5297001600265503,
      "learning_rate": 0.00044495971197931896,
      "loss": 2.8182,
      "step": 78216
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4802325963974,
      "learning_rate": 0.00044495613063558936,
      "loss": 2.9442,
      "step": 78217
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.3828046321868896,
      "learning_rate": 0.00044495254926490974,
      "loss": 2.9409,
      "step": 78218
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4897884130477905,
      "learning_rate": 0.0004449489678672807,
      "loss": 3.0476,
      "step": 78219
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6007230281829834,
      "learning_rate": 0.0004449453864427029,
      "loss": 3.016,
      "step": 78220
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1587440967559814,
      "learning_rate": 0.00044494180499117704,
      "loss": 3.2256,
      "step": 78221
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6131809949874878,
      "learning_rate": 0.0004449382235127037,
      "loss": 2.9515,
      "step": 78222
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6719635725021362,
      "learning_rate": 0.00044493464200728367,
      "loss": 3.122,
      "step": 78223
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5965908765792847,
      "learning_rate": 0.0004449310604749175,
      "loss": 2.8706,
      "step": 78224
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.068697452545166,
      "learning_rate": 0.00044492747891560603,
      "loss": 2.9796,
      "step": 78225
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7483054399490356,
      "learning_rate": 0.0004449238973293498,
      "loss": 2.9454,
      "step": 78226
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.874982476234436,
      "learning_rate": 0.00044492031571614937,
      "loss": 3.0092,
      "step": 78227
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6777894496917725,
      "learning_rate": 0.0004449167340760055,
      "loss": 2.788,
      "step": 78228
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7628048658370972,
      "learning_rate": 0.00044491315240891894,
      "loss": 2.9078,
      "step": 78229
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.2278897762298584,
      "learning_rate": 0.0004449095707148903,
      "loss": 3.0497,
      "step": 78230
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.191879987716675,
      "learning_rate": 0.0004449059889939202,
      "loss": 2.7507,
      "step": 78231
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4690920114517212,
      "learning_rate": 0.0004449024072460093,
      "loss": 3.0407,
      "step": 78232
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.34360933303833,
      "learning_rate": 0.0004448988254711584,
      "loss": 2.8707,
      "step": 78233
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8970202207565308,
      "learning_rate": 0.000444895243669368,
      "loss": 3.4435,
      "step": 78234
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.220538377761841,
      "learning_rate": 0.0004448916618406389,
      "loss": 2.9623,
      "step": 78235
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.834824800491333,
      "learning_rate": 0.0004448880799849717,
      "loss": 3.1672,
      "step": 78236
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7973175048828125,
      "learning_rate": 0.00044488449810236696,
      "loss": 2.8325,
      "step": 78237
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.84802508354187,
      "learning_rate": 0.0004448809161928256,
      "loss": 3.0048,
      "step": 78238
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6437100172042847,
      "learning_rate": 0.0004448773342563481,
      "loss": 2.9873,
      "step": 78239
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6665256023406982,
      "learning_rate": 0.0004448737522929352,
      "loss": 2.8,
      "step": 78240
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8857845067977905,
      "learning_rate": 0.00044487017030258745,
      "loss": 3.0935,
      "step": 78241
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.5060019493103027,
      "learning_rate": 0.00044486658828530565,
      "loss": 3.0023,
      "step": 78242
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8884228467941284,
      "learning_rate": 0.00044486300624109037,
      "loss": 3.0122,
      "step": 78243
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5412334203720093,
      "learning_rate": 0.00044485942416994237,
      "loss": 3.1495,
      "step": 78244
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.203012466430664,
      "learning_rate": 0.00044485584207186235,
      "loss": 2.8566,
      "step": 78245
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8341987133026123,
      "learning_rate": 0.0004448522599468507,
      "loss": 3.0427,
      "step": 78246
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7967945337295532,
      "learning_rate": 0.0004448486777949084,
      "loss": 3.1464,
      "step": 78247
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.700886607170105,
      "learning_rate": 0.00044484509561603606,
      "loss": 2.9071,
      "step": 78248
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.64719820022583,
      "learning_rate": 0.0004448415134102341,
      "loss": 2.8877,
      "step": 78249
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7602412700653076,
      "learning_rate": 0.0004448379311775035,
      "loss": 2.7969,
      "step": 78250
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.617664098739624,
      "learning_rate": 0.00044483434891784483,
      "loss": 2.7458,
      "step": 78251
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.3767818212509155,
      "learning_rate": 0.0004448307666312586,
      "loss": 3.1215,
      "step": 78252
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.470047950744629,
      "learning_rate": 0.00044482718431774565,
      "loss": 3.0652,
      "step": 78253
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.55619215965271,
      "learning_rate": 0.0004448236019773066,
      "loss": 3.1786,
      "step": 78254
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5714542865753174,
      "learning_rate": 0.00044482001960994216,
      "loss": 2.9021,
      "step": 78255
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6647467613220215,
      "learning_rate": 0.00044481643721565285,
      "loss": 2.5811,
      "step": 78256
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.336664080619812,
      "learning_rate": 0.0004448128547944395,
      "loss": 2.9194,
      "step": 78257
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.798169732093811,
      "learning_rate": 0.00044480927234630263,
      "loss": 2.8874,
      "step": 78258
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.5197150707244873,
      "learning_rate": 0.000444805689871243,
      "loss": 2.8371,
      "step": 78259
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5760966539382935,
      "learning_rate": 0.00044480210736926127,
      "loss": 2.939,
      "step": 78260
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8489125967025757,
      "learning_rate": 0.00044479852484035823,
      "loss": 2.9382,
      "step": 78261
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.6845521926879883,
      "learning_rate": 0.00044479494228453424,
      "loss": 3.1923,
      "step": 78262
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6789551973342896,
      "learning_rate": 0.00044479135970179015,
      "loss": 3.0715,
      "step": 78263
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7009069919586182,
      "learning_rate": 0.0004447877770921267,
      "loss": 3.1965,
      "step": 78264
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.395019769668579,
      "learning_rate": 0.00044478419445554443,
      "loss": 3.1314,
      "step": 78265
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.543241500854492,
      "learning_rate": 0.00044478061179204403,
      "loss": 2.7578,
      "step": 78266
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9015265703201294,
      "learning_rate": 0.00044477702910162615,
      "loss": 2.9946,
      "step": 78267
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.7353668212890625,
      "learning_rate": 0.0004447734463842916,
      "loss": 3.1447,
      "step": 78268
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6477835178375244,
      "learning_rate": 0.00044476986364004085,
      "loss": 2.9767,
      "step": 78269
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5576059818267822,
      "learning_rate": 0.00044476628086887463,
      "loss": 3.1215,
      "step": 78270
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1710281372070312,
      "learning_rate": 0.00044476269807079363,
      "loss": 2.8209,
      "step": 78271
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.2014191150665283,
      "learning_rate": 0.00044475911524579856,
      "loss": 3.093,
      "step": 78272
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5202000141143799,
      "learning_rate": 0.00044475553239389,
      "loss": 2.9371,
      "step": 78273
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1662180423736572,
      "learning_rate": 0.00044475194951506863,
      "loss": 3.0882,
      "step": 78274
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7779076099395752,
      "learning_rate": 0.0004447483666093353,
      "loss": 3.0903,
      "step": 78275
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6322582960128784,
      "learning_rate": 0.0004447447836766904,
      "loss": 3.0629,
      "step": 78276
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.492620825767517,
      "learning_rate": 0.00044474120071713466,
      "loss": 3.1574,
      "step": 78277
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.4961159229278564,
      "learning_rate": 0.00044473761773066887,
      "loss": 3.3044,
      "step": 78278
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6827987432479858,
      "learning_rate": 0.0004447340347172936,
      "loss": 3.4357,
      "step": 78279
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.808503270149231,
      "learning_rate": 0.00044473045167700955,
      "loss": 2.8946,
      "step": 78280
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9391709566116333,
      "learning_rate": 0.00044472686860981737,
      "loss": 3.1571,
      "step": 78281
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7094027996063232,
      "learning_rate": 0.0004447232855157178,
      "loss": 3.2545,
      "step": 78282
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9377236366271973,
      "learning_rate": 0.00044471970239471124,
      "loss": 2.8756,
      "step": 78283
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1932051181793213,
      "learning_rate": 0.00044471611924679874,
      "loss": 2.9952,
      "step": 78284
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6098856925964355,
      "learning_rate": 0.00044471253607198074,
      "loss": 3.1418,
      "step": 78285
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0640907287597656,
      "learning_rate": 0.00044470895287025794,
      "loss": 3.1187,
      "step": 78286
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8196876049041748,
      "learning_rate": 0.0004447053696416311,
      "loss": 2.8564,
      "step": 78287
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.312506675720215,
      "learning_rate": 0.00044470178638610064,
      "loss": 2.7636,
      "step": 78288
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1371352672576904,
      "learning_rate": 0.0004446982031036674,
      "loss": 3.3481,
      "step": 78289
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9899413585662842,
      "learning_rate": 0.00044469461979433216,
      "loss": 3.0257,
      "step": 78290
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7529915571212769,
      "learning_rate": 0.0004446910364580954,
      "loss": 3.1509,
      "step": 78291
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.563008427619934,
      "learning_rate": 0.0004446874530949578,
      "loss": 3.1292,
      "step": 78292
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8404194116592407,
      "learning_rate": 0.00044468386970492014,
      "loss": 2.9914,
      "step": 78293
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9189832210540771,
      "learning_rate": 0.000444680286287983,
      "loss": 2.8621,
      "step": 78294
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.346853494644165,
      "learning_rate": 0.000444676702844147,
      "loss": 3.0612,
      "step": 78295
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.9591498374938965,
      "learning_rate": 0.000444673119373413,
      "loss": 3.0164,
      "step": 78296
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4752055406570435,
      "learning_rate": 0.00044466953587578143,
      "loss": 3.1806,
      "step": 78297
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6234086751937866,
      "learning_rate": 0.00044466595235125305,
      "loss": 2.8255,
      "step": 78298
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.4810707569122314,
      "learning_rate": 0.00044466236879982864,
      "loss": 3.0145,
      "step": 78299
    },
    {
      "epoch": 1.02,
      "grad_norm": 4.4372406005859375,
      "learning_rate": 0.0004446587852215086,
      "loss": 3.0198,
      "step": 78300
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0831921100616455,
      "learning_rate": 0.00044465520161629394,
      "loss": 3.0331,
      "step": 78301
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9160298109054565,
      "learning_rate": 0.00044465161798418505,
      "loss": 3.0322,
      "step": 78302
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.7724432945251465,
      "learning_rate": 0.0004446480343251827,
      "loss": 2.8344,
      "step": 78303
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6670244932174683,
      "learning_rate": 0.00044464445063928756,
      "loss": 2.8743,
      "step": 78304
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4973145723342896,
      "learning_rate": 0.00044464086692650026,
      "loss": 3.1175,
      "step": 78305
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8159040212631226,
      "learning_rate": 0.0004446372831868216,
      "loss": 3.149,
      "step": 78306
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.679098129272461,
      "learning_rate": 0.000444633699420252,
      "loss": 3.1443,
      "step": 78307
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.3509578704833984,
      "learning_rate": 0.0004446301156267924,
      "loss": 2.8718,
      "step": 78308
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.450423240661621,
      "learning_rate": 0.0004446265318064432,
      "loss": 2.9021,
      "step": 78309
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.458778977394104,
      "learning_rate": 0.0004446229479592053,
      "loss": 2.8703,
      "step": 78310
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.831063151359558,
      "learning_rate": 0.0004446193640850792,
      "loss": 2.8406,
      "step": 78311
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5576311349868774,
      "learning_rate": 0.00044461578018406566,
      "loss": 3.0099,
      "step": 78312
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6668226718902588,
      "learning_rate": 0.0004446121962561653,
      "loss": 2.8822,
      "step": 78313
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.825848937034607,
      "learning_rate": 0.00044460861230137874,
      "loss": 3.1001,
      "step": 78314
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7120769023895264,
      "learning_rate": 0.0004446050283197068,
      "loss": 3.0934,
      "step": 78315
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.692812442779541,
      "learning_rate": 0.00044460144431115,
      "loss": 2.9648,
      "step": 78316
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.001582145690918,
      "learning_rate": 0.00044459786027570915,
      "loss": 2.8367,
      "step": 78317
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.4014711380004883,
      "learning_rate": 0.0004445942762133848,
      "loss": 2.8966,
      "step": 78318
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5593878030776978,
      "learning_rate": 0.0004445906921241776,
      "loss": 3.0866,
      "step": 78319
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5091205835342407,
      "learning_rate": 0.0004445871080080883,
      "loss": 3.0029,
      "step": 78320
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.587087869644165,
      "learning_rate": 0.00044458352386511754,
      "loss": 2.7503,
      "step": 78321
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7180291414260864,
      "learning_rate": 0.0004445799396952659,
      "loss": 2.936,
      "step": 78322
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8239061832427979,
      "learning_rate": 0.00044457635549853417,
      "loss": 2.8086,
      "step": 78323
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7908763885498047,
      "learning_rate": 0.00044457277127492295,
      "loss": 3.0432,
      "step": 78324
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8164511919021606,
      "learning_rate": 0.00044456918702443296,
      "loss": 3.0573,
      "step": 78325
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4405144453048706,
      "learning_rate": 0.0004445656027470648,
      "loss": 3.169,
      "step": 78326
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4521890878677368,
      "learning_rate": 0.0004445620184428192,
      "loss": 2.9103,
      "step": 78327
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1274702548980713,
      "learning_rate": 0.00044455843411169674,
      "loss": 2.8361,
      "step": 78328
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7826300859451294,
      "learning_rate": 0.0004445548497536981,
      "loss": 3.0558,
      "step": 78329
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.039444923400879,
      "learning_rate": 0.0004445512653688241,
      "loss": 3.0233,
      "step": 78330
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.357491374015808,
      "learning_rate": 0.0004445476809570752,
      "loss": 3.1483,
      "step": 78331
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.852052927017212,
      "learning_rate": 0.00044454409651845214,
      "loss": 3.0372,
      "step": 78332
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.3414251804351807,
      "learning_rate": 0.00044454051205295575,
      "loss": 3.0137,
      "step": 78333
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6135790348052979,
      "learning_rate": 0.00044453692756058643,
      "loss": 2.9869,
      "step": 78334
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.937687873840332,
      "learning_rate": 0.0004445333430413449,
      "loss": 3.2251,
      "step": 78335
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6306949853897095,
      "learning_rate": 0.00044452975849523205,
      "loss": 3.1939,
      "step": 78336
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9288808107376099,
      "learning_rate": 0.00044452617392224834,
      "loss": 2.9818,
      "step": 78337
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.907613754272461,
      "learning_rate": 0.00044452258932239447,
      "loss": 2.8909,
      "step": 78338
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.033167600631714,
      "learning_rate": 0.00044451900469567114,
      "loss": 2.7811,
      "step": 78339
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.649656057357788,
      "learning_rate": 0.000444515420042079,
      "loss": 3.0185,
      "step": 78340
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.5325896739959717,
      "learning_rate": 0.00044451183536161866,
      "loss": 2.8708,
      "step": 78341
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.7072551250457764,
      "learning_rate": 0.00044450825065429087,
      "loss": 2.9982,
      "step": 78342
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5572868585586548,
      "learning_rate": 0.00044450466592009627,
      "loss": 3.2086,
      "step": 78343
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1990976333618164,
      "learning_rate": 0.0004445010811590355,
      "loss": 3.0708,
      "step": 78344
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.2019660472869873,
      "learning_rate": 0.0004444974963711093,
      "loss": 3.0624,
      "step": 78345
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5778732299804688,
      "learning_rate": 0.0004444939115563183,
      "loss": 2.9161,
      "step": 78346
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0294995307922363,
      "learning_rate": 0.0004444903267146631,
      "loss": 2.9755,
      "step": 78347
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.682470440864563,
      "learning_rate": 0.0004444867418461445,
      "loss": 3.0596,
      "step": 78348
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8403241634368896,
      "learning_rate": 0.00044448315695076295,
      "loss": 3.1324,
      "step": 78349
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6548731327056885,
      "learning_rate": 0.0004444795720285194,
      "loss": 2.9518,
      "step": 78350
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7028868198394775,
      "learning_rate": 0.0004444759870794143,
      "loss": 3.0606,
      "step": 78351
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.159895420074463,
      "learning_rate": 0.0004444724021034483,
      "loss": 2.9426,
      "step": 78352
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.580292224884033,
      "learning_rate": 0.0004444688171006223,
      "loss": 3.2678,
      "step": 78353
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6511045694351196,
      "learning_rate": 0.0004444652320709368,
      "loss": 2.9482,
      "step": 78354
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.192241668701172,
      "learning_rate": 0.00044446164701439237,
      "loss": 3.2258,
      "step": 78355
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0343148708343506,
      "learning_rate": 0.0004444580619309899,
      "loss": 2.7335,
      "step": 78356
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.20257830619812,
      "learning_rate": 0.00044445447682073,
      "loss": 2.9746,
      "step": 78357
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.072463035583496,
      "learning_rate": 0.00044445089168361317,
      "loss": 3.2278,
      "step": 78358
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.3933818340301514,
      "learning_rate": 0.00044444730651964023,
      "loss": 2.9621,
      "step": 78359
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6047347784042358,
      "learning_rate": 0.0004444437213288118,
      "loss": 2.9526,
      "step": 78360
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.803785800933838,
      "learning_rate": 0.00044444013611112855,
      "loss": 2.9527,
      "step": 78361
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9475313425064087,
      "learning_rate": 0.00044443655086659115,
      "loss": 2.824,
      "step": 78362
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.817221164703369,
      "learning_rate": 0.00044443296559520034,
      "loss": 3.2204,
      "step": 78363
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.8802683353424072,
      "learning_rate": 0.00044442938029695667,
      "loss": 3.0108,
      "step": 78364
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.83912992477417,
      "learning_rate": 0.00044442579497186083,
      "loss": 3.024,
      "step": 78365
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0867691040039062,
      "learning_rate": 0.00044442220961991353,
      "loss": 3.2664,
      "step": 78366
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.9842164516448975,
      "learning_rate": 0.0004444186242411154,
      "loss": 3.1754,
      "step": 78367
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8053791522979736,
      "learning_rate": 0.0004444150388354671,
      "loss": 3.095,
      "step": 78368
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4807004928588867,
      "learning_rate": 0.00044441145340296944,
      "loss": 3.1037,
      "step": 78369
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.495032787322998,
      "learning_rate": 0.00044440786794362286,
      "loss": 2.955,
      "step": 78370
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.6330478191375732,
      "learning_rate": 0.00044440428245742813,
      "loss": 3.0076,
      "step": 78371
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1806561946868896,
      "learning_rate": 0.0004444006969443859,
      "loss": 3.0057,
      "step": 78372
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.4958221912384033,
      "learning_rate": 0.0004443971114044969,
      "loss": 2.8769,
      "step": 78373
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6576443910598755,
      "learning_rate": 0.00044439352583776175,
      "loss": 3.0666,
      "step": 78374
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4628016948699951,
      "learning_rate": 0.0004443899402441811,
      "loss": 3.1521,
      "step": 78375
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8782051801681519,
      "learning_rate": 0.00044438635462375566,
      "loss": 3.0197,
      "step": 78376
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6496000289916992,
      "learning_rate": 0.000444382768976486,
      "loss": 3.0269,
      "step": 78377
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.469199776649475,
      "learning_rate": 0.0004443791833023729,
      "loss": 2.9352,
      "step": 78378
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.600444793701172,
      "learning_rate": 0.0004443755976014171,
      "loss": 3.1516,
      "step": 78379
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8600140810012817,
      "learning_rate": 0.000444372011873619,
      "loss": 3.0507,
      "step": 78380
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4335886240005493,
      "learning_rate": 0.00044436842611897946,
      "loss": 2.975,
      "step": 78381
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5267693996429443,
      "learning_rate": 0.0004443648403374991,
      "loss": 2.7207,
      "step": 78382
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0126953125,
      "learning_rate": 0.00044436125452917865,
      "loss": 2.8923,
      "step": 78383
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0181877613067627,
      "learning_rate": 0.00044435766869401866,
      "loss": 3.0459,
      "step": 78384
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.92481529712677,
      "learning_rate": 0.00044435408283201983,
      "loss": 2.866,
      "step": 78385
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6911635398864746,
      "learning_rate": 0.00044435049694318293,
      "loss": 3.156,
      "step": 78386
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.798668622970581,
      "learning_rate": 0.00044434691102750845,
      "loss": 2.9719,
      "step": 78387
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.087005853652954,
      "learning_rate": 0.00044434332508499726,
      "loss": 3.0563,
      "step": 78388
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.837194800376892,
      "learning_rate": 0.00044433973911564984,
      "loss": 2.9956,
      "step": 78389
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5768495798110962,
      "learning_rate": 0.000444336153119467,
      "loss": 2.9886,
      "step": 78390
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.5365536212921143,
      "learning_rate": 0.0004443325670964493,
      "loss": 3.2579,
      "step": 78391
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.687220573425293,
      "learning_rate": 0.00044432898104659744,
      "loss": 3.0265,
      "step": 78392
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.928040623664856,
      "learning_rate": 0.0004443253949699121,
      "loss": 3.0807,
      "step": 78393
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.73350191116333,
      "learning_rate": 0.0004443218088663941,
      "loss": 2.9321,
      "step": 78394
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.2001469135284424,
      "learning_rate": 0.0004443182227360437,
      "loss": 3.2643,
      "step": 78395
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6252737045288086,
      "learning_rate": 0.0004443146365788619,
      "loss": 3.128,
      "step": 78396
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5547127723693848,
      "learning_rate": 0.00044431105039484934,
      "loss": 3.1064,
      "step": 78397
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7423983812332153,
      "learning_rate": 0.0004443074641840067,
      "loss": 2.9654,
      "step": 78398
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7710422277450562,
      "learning_rate": 0.0004443038779463344,
      "loss": 3.0852,
      "step": 78399
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5771187543869019,
      "learning_rate": 0.00044430029168183343,
      "loss": 3.134,
      "step": 78400
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5041918754577637,
      "learning_rate": 0.00044429670539050426,
      "loss": 3.1404,
      "step": 78401
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8059556484222412,
      "learning_rate": 0.0004442931190723476,
      "loss": 2.718,
      "step": 78402
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5703480243682861,
      "learning_rate": 0.0004442895327273641,
      "loss": 3.0457,
      "step": 78403
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6017189025878906,
      "learning_rate": 0.00044428594635555446,
      "loss": 3.0451,
      "step": 78404
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9644345045089722,
      "learning_rate": 0.0004442823599569194,
      "loss": 2.8937,
      "step": 78405
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.362729072570801,
      "learning_rate": 0.0004442787735314595,
      "loss": 3.0028,
      "step": 78406
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5506953001022339,
      "learning_rate": 0.00044427518707917545,
      "loss": 2.946,
      "step": 78407
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9055348634719849,
      "learning_rate": 0.0004442716006000679,
      "loss": 3.0021,
      "step": 78408
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0693204402923584,
      "learning_rate": 0.0004442680140941376,
      "loss": 2.9233,
      "step": 78409
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6634386777877808,
      "learning_rate": 0.00044426442756138505,
      "loss": 3.0268,
      "step": 78410
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6729460954666138,
      "learning_rate": 0.00044426084100181103,
      "loss": 3.0359,
      "step": 78411
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6768745183944702,
      "learning_rate": 0.00044425725441541624,
      "loss": 2.8112,
      "step": 78412
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.687873363494873,
      "learning_rate": 0.00044425366780220126,
      "loss": 2.9827,
      "step": 78413
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.277276039123535,
      "learning_rate": 0.00044425008116216687,
      "loss": 3.0675,
      "step": 78414
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.3966516256332397,
      "learning_rate": 0.00044424649449531365,
      "loss": 3.266,
      "step": 78415
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6960208415985107,
      "learning_rate": 0.00044424290780164225,
      "loss": 2.7823,
      "step": 78416
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.660951852798462,
      "learning_rate": 0.0004442393210811534,
      "loss": 2.8001,
      "step": 78417
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9510880708694458,
      "learning_rate": 0.00044423573433384776,
      "loss": 2.9089,
      "step": 78418
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8888300657272339,
      "learning_rate": 0.0004442321475597259,
      "loss": 3.025,
      "step": 78419
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.169731855392456,
      "learning_rate": 0.0004442285607587886,
      "loss": 3.1385,
      "step": 78420
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.7867085933685303,
      "learning_rate": 0.0004442249739310366,
      "loss": 3.0255,
      "step": 78421
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8260444402694702,
      "learning_rate": 0.0004442213870764703,
      "loss": 2.9483,
      "step": 78422
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9110198020935059,
      "learning_rate": 0.0004442178001950905,
      "loss": 3.0475,
      "step": 78423
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.068981409072876,
      "learning_rate": 0.00044421421328689803,
      "loss": 2.7569,
      "step": 78424
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0832762718200684,
      "learning_rate": 0.00044421062635189326,
      "loss": 3.3976,
      "step": 78425
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4400194883346558,
      "learning_rate": 0.0004442070393900771,
      "loss": 2.9381,
      "step": 78426
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.208616018295288,
      "learning_rate": 0.0004442034524014502,
      "loss": 2.9713,
      "step": 78427
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8463735580444336,
      "learning_rate": 0.00044419986538601306,
      "loss": 3.0168,
      "step": 78428
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8299461603164673,
      "learning_rate": 0.0004441962783437664,
      "loss": 2.9632,
      "step": 78429
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6135413646697998,
      "learning_rate": 0.0004441926912747111,
      "loss": 3.1381,
      "step": 78430
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7716012001037598,
      "learning_rate": 0.00044418910417884746,
      "loss": 3.012,
      "step": 78431
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.2721483707427979,
      "learning_rate": 0.00044418551705617646,
      "loss": 3.1975,
      "step": 78432
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.419236183166504,
      "learning_rate": 0.0004441819299066986,
      "loss": 2.7635,
      "step": 78433
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.328102707862854,
      "learning_rate": 0.0004441783427304146,
      "loss": 2.8438,
      "step": 78434
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5449217557907104,
      "learning_rate": 0.00044417475552732514,
      "loss": 3.2299,
      "step": 78435
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0026800632476807,
      "learning_rate": 0.0004441711682974309,
      "loss": 3.2376,
      "step": 78436
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5192646980285645,
      "learning_rate": 0.00044416758104073243,
      "loss": 2.9378,
      "step": 78437
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6978636980056763,
      "learning_rate": 0.0004441639937572306,
      "loss": 2.9231,
      "step": 78438
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7267837524414062,
      "learning_rate": 0.0004441604064469259,
      "loss": 3.0811,
      "step": 78439
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5130985975265503,
      "learning_rate": 0.0004441568191098191,
      "loss": 3.0787,
      "step": 78440
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5652836561203003,
      "learning_rate": 0.00044415323174591085,
      "loss": 3.1235,
      "step": 78441
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7876319885253906,
      "learning_rate": 0.00044414964435520165,
      "loss": 2.9571,
      "step": 78442
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7728058099746704,
      "learning_rate": 0.0004441460569376924,
      "loss": 3.174,
      "step": 78443
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6516636610031128,
      "learning_rate": 0.00044414246949338364,
      "loss": 3.1822,
      "step": 78444
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7758140563964844,
      "learning_rate": 0.0004441388820222762,
      "loss": 3.1129,
      "step": 78445
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4710935354232788,
      "learning_rate": 0.0004441352945243705,
      "loss": 2.9762,
      "step": 78446
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.507731556892395,
      "learning_rate": 0.00044413170699966737,
      "loss": 2.7729,
      "step": 78447
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5679599046707153,
      "learning_rate": 0.0004441281194481674,
      "loss": 3.0942,
      "step": 78448
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5485399961471558,
      "learning_rate": 0.00044412453186987134,
      "loss": 2.9552,
      "step": 78449
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6798555850982666,
      "learning_rate": 0.00044412094426477975,
      "loss": 3.1098,
      "step": 78450
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8259968757629395,
      "learning_rate": 0.0004441173566328934,
      "loss": 3.2629,
      "step": 78451
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6238514184951782,
      "learning_rate": 0.0004441137689742129,
      "loss": 2.7541,
      "step": 78452
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7848879098892212,
      "learning_rate": 0.00044411018128873896,
      "loss": 3.201,
      "step": 78453
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.017914056777954,
      "learning_rate": 0.00044410659357647215,
      "loss": 3.2251,
      "step": 78454
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6083030700683594,
      "learning_rate": 0.00044410300583741334,
      "loss": 3.1649,
      "step": 78455
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5625520944595337,
      "learning_rate": 0.00044409941807156293,
      "loss": 3.0587,
      "step": 78456
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.3342795372009277,
      "learning_rate": 0.0004440958302789217,
      "loss": 2.8528,
      "step": 78457
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.520869255065918,
      "learning_rate": 0.00044409224245949046,
      "loss": 3.0002,
      "step": 78458
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6612797975540161,
      "learning_rate": 0.00044408865461326966,
      "loss": 3.1124,
      "step": 78459
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.506027340888977,
      "learning_rate": 0.0004440850667402601,
      "loss": 2.9109,
      "step": 78460
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4452674388885498,
      "learning_rate": 0.0004440814788404624,
      "loss": 3.139,
      "step": 78461
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6037710905075073,
      "learning_rate": 0.0004440778909138772,
      "loss": 3.3394,
      "step": 78462
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5163836479187012,
      "learning_rate": 0.0004440743029605052,
      "loss": 3.0338,
      "step": 78463
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9961113929748535,
      "learning_rate": 0.00044407071498034714,
      "loss": 3.1775,
      "step": 78464
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5001007318496704,
      "learning_rate": 0.0004440671269734035,
      "loss": 2.7669,
      "step": 78465
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.886250615119934,
      "learning_rate": 0.00044406353893967513,
      "loss": 3.2971,
      "step": 78466
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5128027200698853,
      "learning_rate": 0.0004440599508791627,
      "loss": 3.149,
      "step": 78467
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6781179904937744,
      "learning_rate": 0.00044405636279186666,
      "loss": 3.1027,
      "step": 78468
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.2769615650177,
      "learning_rate": 0.00044405277467778786,
      "loss": 3.1757,
      "step": 78469
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1710331439971924,
      "learning_rate": 0.00044404918653692704,
      "loss": 3.0235,
      "step": 78470
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8316763639450073,
      "learning_rate": 0.00044404559836928463,
      "loss": 2.8085,
      "step": 78471
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1395061016082764,
      "learning_rate": 0.0004440420101748615,
      "loss": 3.1065,
      "step": 78472
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7369942665100098,
      "learning_rate": 0.0004440384219536582,
      "loss": 3.0394,
      "step": 78473
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.888216257095337,
      "learning_rate": 0.00044403483370567546,
      "loss": 2.9635,
      "step": 78474
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5448402166366577,
      "learning_rate": 0.00044403124543091393,
      "loss": 3.1038,
      "step": 78475
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.068974733352661,
      "learning_rate": 0.0004440276571293743,
      "loss": 3.0778,
      "step": 78476
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6515752077102661,
      "learning_rate": 0.00044402406880105714,
      "loss": 2.9358,
      "step": 78477
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.727872610092163,
      "learning_rate": 0.0004440204804459632,
      "loss": 3.2003,
      "step": 78478
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.1255884170532227,
      "learning_rate": 0.0004440168920640932,
      "loss": 3.1228,
      "step": 78479
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1975014209747314,
      "learning_rate": 0.0004440133036554477,
      "loss": 2.9583,
      "step": 78480
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5051687955856323,
      "learning_rate": 0.00044400971522002734,
      "loss": 2.9036,
      "step": 78481
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6248430013656616,
      "learning_rate": 0.000444006126757833,
      "loss": 2.9002,
      "step": 78482
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5714792013168335,
      "learning_rate": 0.0004440025382688651,
      "loss": 2.719,
      "step": 78483
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4453845024108887,
      "learning_rate": 0.0004439989497531244,
      "loss": 3.0452,
      "step": 78484
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6700383424758911,
      "learning_rate": 0.0004439953612106117,
      "loss": 3.0304,
      "step": 78485
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.588667631149292,
      "learning_rate": 0.0004439917726413274,
      "loss": 2.8822,
      "step": 78486
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4794583320617676,
      "learning_rate": 0.0004439881840452724,
      "loss": 2.8026,
      "step": 78487
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9489636421203613,
      "learning_rate": 0.0004439845954224472,
      "loss": 3.2852,
      "step": 78488
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5075745582580566,
      "learning_rate": 0.0004439810067728526,
      "loss": 2.9175,
      "step": 78489
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6372833251953125,
      "learning_rate": 0.00044397741809648915,
      "loss": 2.9901,
      "step": 78490
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6341421604156494,
      "learning_rate": 0.0004439738293933577,
      "loss": 3.1548,
      "step": 78491
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6080912351608276,
      "learning_rate": 0.00044397024066345875,
      "loss": 3.0161,
      "step": 78492
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4073635339736938,
      "learning_rate": 0.00044396665190679294,
      "loss": 3.2797,
      "step": 78493
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.667099714279175,
      "learning_rate": 0.0004439630631233611,
      "loss": 2.7979,
      "step": 78494
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0974113941192627,
      "learning_rate": 0.0004439594743131638,
      "loss": 2.9924,
      "step": 78495
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8609418869018555,
      "learning_rate": 0.00044395588547620164,
      "loss": 3.0979,
      "step": 78496
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8059942722320557,
      "learning_rate": 0.0004439522966124754,
      "loss": 2.8577,
      "step": 78497
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0092999935150146,
      "learning_rate": 0.0004439487077219858,
      "loss": 3.2507,
      "step": 78498
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4979816675186157,
      "learning_rate": 0.00044394511880473326,
      "loss": 3.0895,
      "step": 78499
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9892693758010864,
      "learning_rate": 0.0004439415298607187,
      "loss": 3.1741,
      "step": 78500
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.122516393661499,
      "learning_rate": 0.0004439379408899427,
      "loss": 2.6625,
      "step": 78501
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6666436195373535,
      "learning_rate": 0.0004439343518924059,
      "loss": 3.0968,
      "step": 78502
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0082497596740723,
      "learning_rate": 0.000443930762868109,
      "loss": 3.2047,
      "step": 78503
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.5113563537597656,
      "learning_rate": 0.00044392717381705265,
      "loss": 3.0143,
      "step": 78504
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6993180513381958,
      "learning_rate": 0.0004439235847392375,
      "loss": 3.0834,
      "step": 78505
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5428904294967651,
      "learning_rate": 0.0004439199956346642,
      "loss": 2.8948,
      "step": 78506
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8937103748321533,
      "learning_rate": 0.0004439164065033336,
      "loss": 2.9642,
      "step": 78507
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.895161747932434,
      "learning_rate": 0.00044391281734524604,
      "loss": 2.9251,
      "step": 78508
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6032848358154297,
      "learning_rate": 0.00044390922816040244,
      "loss": 2.978,
      "step": 78509
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9037322998046875,
      "learning_rate": 0.00044390563894880345,
      "loss": 3.1308,
      "step": 78510
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5156238079071045,
      "learning_rate": 0.00044390204971044966,
      "loss": 2.8226,
      "step": 78511
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6600319147109985,
      "learning_rate": 0.00044389846044534176,
      "loss": 3.013,
      "step": 78512
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9245902299880981,
      "learning_rate": 0.0004438948711534805,
      "loss": 2.9728,
      "step": 78513
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5779775381088257,
      "learning_rate": 0.00044389128183486634,
      "loss": 3.0791,
      "step": 78514
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5889990329742432,
      "learning_rate": 0.0004438876924895001,
      "loss": 3.0382,
      "step": 78515
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6177237033843994,
      "learning_rate": 0.0004438841031173825,
      "loss": 2.9549,
      "step": 78516
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.3695858716964722,
      "learning_rate": 0.00044388051371851403,
      "loss": 2.931,
      "step": 78517
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7761797904968262,
      "learning_rate": 0.0004438769242928955,
      "loss": 2.9339,
      "step": 78518
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6071321964263916,
      "learning_rate": 0.00044387333484052755,
      "loss": 3.2087,
      "step": 78519
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8735322952270508,
      "learning_rate": 0.0004438697453614108,
      "loss": 2.9377,
      "step": 78520
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.833827018737793,
      "learning_rate": 0.0004438661558555459,
      "loss": 2.87,
      "step": 78521
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1689975261688232,
      "learning_rate": 0.00044386256632293375,
      "loss": 3.3552,
      "step": 78522
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8478552103042603,
      "learning_rate": 0.00044385897676357464,
      "loss": 3.0188,
      "step": 78523
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9704837799072266,
      "learning_rate": 0.0004438553871774695,
      "loss": 2.9952,
      "step": 78524
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.131450891494751,
      "learning_rate": 0.00044385179756461894,
      "loss": 2.7827,
      "step": 78525
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7201794385910034,
      "learning_rate": 0.0004438482079250236,
      "loss": 2.9747,
      "step": 78526
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6226080656051636,
      "learning_rate": 0.0004438446182586841,
      "loss": 3.1038,
      "step": 78527
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.991422176361084,
      "learning_rate": 0.00044384102856560127,
      "loss": 3.3103,
      "step": 78528
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.92415189743042,
      "learning_rate": 0.0004438374388457757,
      "loss": 3.0521,
      "step": 78529
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5011149644851685,
      "learning_rate": 0.0004438338490992079,
      "loss": 2.9275,
      "step": 78530
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5687158107757568,
      "learning_rate": 0.0004438302593258988,
      "loss": 3.1132,
      "step": 78531
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.2964611053466797,
      "learning_rate": 0.00044382666952584885,
      "loss": 2.9799,
      "step": 78532
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5057966709136963,
      "learning_rate": 0.00044382307969905886,
      "loss": 3.1241,
      "step": 78533
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.515231966972351,
      "learning_rate": 0.00044381948984552946,
      "loss": 3.0232,
      "step": 78534
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1067891120910645,
      "learning_rate": 0.00044381589996526124,
      "loss": 3.3375,
      "step": 78535
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.112394094467163,
      "learning_rate": 0.000443812310058255,
      "loss": 3.1127,
      "step": 78536
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5475765466690063,
      "learning_rate": 0.0004438087201245113,
      "loss": 3.0444,
      "step": 78537
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.706282138824463,
      "learning_rate": 0.00044380513016403084,
      "loss": 3.1216,
      "step": 78538
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4159969091415405,
      "learning_rate": 0.00044380154017681426,
      "loss": 3.3321,
      "step": 78539
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9203318357467651,
      "learning_rate": 0.00044379795016286235,
      "loss": 2.9811,
      "step": 78540
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7908222675323486,
      "learning_rate": 0.00044379436012217564,
      "loss": 2.9408,
      "step": 78541
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4779118299484253,
      "learning_rate": 0.00044379077005475483,
      "loss": 3.053,
      "step": 78542
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.747911810874939,
      "learning_rate": 0.0004437871799606006,
      "loss": 3.0112,
      "step": 78543
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6500550508499146,
      "learning_rate": 0.0004437835898397136,
      "loss": 2.8825,
      "step": 78544
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7451863288879395,
      "learning_rate": 0.00044377999969209456,
      "loss": 2.8406,
      "step": 78545
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0355167388916016,
      "learning_rate": 0.00044377640951774415,
      "loss": 2.7602,
      "step": 78546
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4000797271728516,
      "learning_rate": 0.00044377281931666285,
      "loss": 3.0205,
      "step": 78547
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1967885494232178,
      "learning_rate": 0.0004437692290888515,
      "loss": 3.234,
      "step": 78548
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7662750482559204,
      "learning_rate": 0.00044376563883431085,
      "loss": 3.3288,
      "step": 78549
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6777153015136719,
      "learning_rate": 0.00044376204855304147,
      "loss": 2.8387,
      "step": 78550
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.212353467941284,
      "learning_rate": 0.0004437584582450439,
      "loss": 2.7672,
      "step": 78551
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6272189617156982,
      "learning_rate": 0.00044375486791031895,
      "loss": 2.8574,
      "step": 78552
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.165156126022339,
      "learning_rate": 0.00044375127754886723,
      "loss": 2.8135,
      "step": 78553
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8131911754608154,
      "learning_rate": 0.00044374768716068944,
      "loss": 3.0111,
      "step": 78554
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7271746397018433,
      "learning_rate": 0.00044374409674578633,
      "loss": 3.046,
      "step": 78555
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.758666515350342,
      "learning_rate": 0.0004437405063041584,
      "loss": 3.1711,
      "step": 78556
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6929854154586792,
      "learning_rate": 0.00044373691583580636,
      "loss": 2.9837,
      "step": 78557
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.592213749885559,
      "learning_rate": 0.000443733325340731,
      "loss": 3.3153,
      "step": 78558
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7628217935562134,
      "learning_rate": 0.0004437297348189328,
      "loss": 2.9399,
      "step": 78559
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4835046529769897,
      "learning_rate": 0.00044372614427041265,
      "loss": 3.144,
      "step": 78560
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.2885751724243164,
      "learning_rate": 0.00044372255369517106,
      "loss": 3.0156,
      "step": 78561
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5855642557144165,
      "learning_rate": 0.0004437189630932087,
      "loss": 3.0467,
      "step": 78562
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.181370735168457,
      "learning_rate": 0.00044371537246452625,
      "loss": 3.0586,
      "step": 78563
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6412676572799683,
      "learning_rate": 0.0004437117818091245,
      "loss": 3.0042,
      "step": 78564
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8184016942977905,
      "learning_rate": 0.00044370819112700394,
      "loss": 3.1195,
      "step": 78565
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6856415271759033,
      "learning_rate": 0.00044370460041816533,
      "loss": 2.9169,
      "step": 78566
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0039865970611572,
      "learning_rate": 0.00044370100968260933,
      "loss": 2.8792,
      "step": 78567
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.603740692138672,
      "learning_rate": 0.0004436974189203366,
      "loss": 2.889,
      "step": 78568
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5179393291473389,
      "learning_rate": 0.0004436938281313477,
      "loss": 3.2486,
      "step": 78569
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.849988341331482,
      "learning_rate": 0.00044369023731564354,
      "loss": 2.8375,
      "step": 78570
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8044174909591675,
      "learning_rate": 0.00044368664647322466,
      "loss": 3.1681,
      "step": 78571
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.3175384998321533,
      "learning_rate": 0.0004436830556040916,
      "loss": 3.0197,
      "step": 78572
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9536387920379639,
      "learning_rate": 0.0004436794647082452,
      "loss": 2.9937,
      "step": 78573
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4497570991516113,
      "learning_rate": 0.00044367587378568615,
      "loss": 3.0672,
      "step": 78574
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4140757322311401,
      "learning_rate": 0.00044367228283641503,
      "loss": 2.5737,
      "step": 78575
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.7199912071228027,
      "learning_rate": 0.0004436686918604325,
      "loss": 3.0727,
      "step": 78576
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7331349849700928,
      "learning_rate": 0.0004436651008577392,
      "loss": 2.7713,
      "step": 78577
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.8754069805145264,
      "learning_rate": 0.0004436615098283359,
      "loss": 3.0478,
      "step": 78578
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8515188694000244,
      "learning_rate": 0.00044365791877222315,
      "loss": 2.9539,
      "step": 78579
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.420880913734436,
      "learning_rate": 0.0004436543276894017,
      "loss": 3.0982,
      "step": 78580
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6330636739730835,
      "learning_rate": 0.0004436507365798722,
      "loss": 3.1119,
      "step": 78581
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.671874761581421,
      "learning_rate": 0.00044364714544363536,
      "loss": 2.9904,
      "step": 78582
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.4205472469329834,
      "learning_rate": 0.00044364355428069184,
      "loss": 3.1689,
      "step": 78583
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8618578910827637,
      "learning_rate": 0.00044363996309104215,
      "loss": 3.0007,
      "step": 78584
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5611469745635986,
      "learning_rate": 0.00044363637187468715,
      "loss": 2.9534,
      "step": 78585
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.688501000404358,
      "learning_rate": 0.00044363278063162744,
      "loss": 2.9947,
      "step": 78586
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1723647117614746,
      "learning_rate": 0.00044362918936186366,
      "loss": 2.9015,
      "step": 78587
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5611125230789185,
      "learning_rate": 0.0004436255980653965,
      "loss": 3.277,
      "step": 78588
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6569780111312866,
      "learning_rate": 0.00044362200674222664,
      "loss": 2.938,
      "step": 78589
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7976806163787842,
      "learning_rate": 0.00044361841539235474,
      "loss": 2.8018,
      "step": 78590
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5756871700286865,
      "learning_rate": 0.0004436148240157815,
      "loss": 3.0532,
      "step": 78591
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9211097955703735,
      "learning_rate": 0.00044361123261250755,
      "loss": 3.0065,
      "step": 78592
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0501298904418945,
      "learning_rate": 0.0004436076411825335,
      "loss": 2.9277,
      "step": 78593
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6607730388641357,
      "learning_rate": 0.0004436040497258601,
      "loss": 3.0953,
      "step": 78594
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.177769184112549,
      "learning_rate": 0.0004436004582424881,
      "loss": 2.7615,
      "step": 78595
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8366966247558594,
      "learning_rate": 0.0004435968667324179,
      "loss": 3.0485,
      "step": 78596
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9907158613204956,
      "learning_rate": 0.00044359327519565033,
      "loss": 3.2198,
      "step": 78597
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.400730013847351,
      "learning_rate": 0.00044358968363218627,
      "loss": 2.9053,
      "step": 78598
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0439202785491943,
      "learning_rate": 0.00044358609204202596,
      "loss": 3.0137,
      "step": 78599
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8699899911880493,
      "learning_rate": 0.00044358250042517036,
      "loss": 2.8346,
      "step": 78600
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8249162435531616,
      "learning_rate": 0.0004435789087816201,
      "loss": 2.8233,
      "step": 78601
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.755314826965332,
      "learning_rate": 0.00044357531711137577,
      "loss": 3.1098,
      "step": 78602
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.8962020874023438,
      "learning_rate": 0.00044357172541443807,
      "loss": 3.1241,
      "step": 78603
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6571704149246216,
      "learning_rate": 0.00044356813369080775,
      "loss": 3.2093,
      "step": 78604
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6186623573303223,
      "learning_rate": 0.0004435645419404853,
      "loss": 3.0475,
      "step": 78605
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.783824920654297,
      "learning_rate": 0.0004435609501634715,
      "loss": 3.1996,
      "step": 78606
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.108856201171875,
      "learning_rate": 0.0004435573583597671,
      "loss": 3.0437,
      "step": 78607
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4646005630493164,
      "learning_rate": 0.0004435537665293726,
      "loss": 2.8762,
      "step": 78608
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.460197925567627,
      "learning_rate": 0.00044355017467228875,
      "loss": 3.054,
      "step": 78609
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.716902256011963,
      "learning_rate": 0.0004435465827885163,
      "loss": 3.1011,
      "step": 78610
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0513622760772705,
      "learning_rate": 0.00044354299087805577,
      "loss": 2.9043,
      "step": 78611
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5130034685134888,
      "learning_rate": 0.0004435393989409078,
      "loss": 2.7031,
      "step": 78612
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1527984142303467,
      "learning_rate": 0.0004435358069770733,
      "loss": 2.8342,
      "step": 78613
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.032348871231079,
      "learning_rate": 0.0004435322149865527,
      "loss": 2.9233,
      "step": 78614
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6511781215667725,
      "learning_rate": 0.0004435286229693468,
      "loss": 2.8251,
      "step": 78615
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.7606399059295654,
      "learning_rate": 0.0004435250309254561,
      "loss": 2.9316,
      "step": 78616
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.684798002243042,
      "learning_rate": 0.0004435214388548815,
      "loss": 2.7368,
      "step": 78617
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.067415714263916,
      "learning_rate": 0.00044351784675762353,
      "loss": 3.0779,
      "step": 78618
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0074026584625244,
      "learning_rate": 0.0004435142546336829,
      "loss": 2.9036,
      "step": 78619
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6539698839187622,
      "learning_rate": 0.0004435106624830602,
      "loss": 3.1113,
      "step": 78620
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5130517482757568,
      "learning_rate": 0.0004435070703057562,
      "loss": 3.0966,
      "step": 78621
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4319636821746826,
      "learning_rate": 0.00044350347810177154,
      "loss": 3.065,
      "step": 78622
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.513790488243103,
      "learning_rate": 0.00044349988587110686,
      "loss": 2.8035,
      "step": 78623
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.3951460123062134,
      "learning_rate": 0.00044349629361376284,
      "loss": 3.0763,
      "step": 78624
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1853137016296387,
      "learning_rate": 0.0004434927013297402,
      "loss": 2.8954,
      "step": 78625
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.256988048553467,
      "learning_rate": 0.0004434891090190395,
      "loss": 2.9262,
      "step": 78626
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.3291921615600586,
      "learning_rate": 0.00044348551668166146,
      "loss": 2.9817,
      "step": 78627
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.812469959259033,
      "learning_rate": 0.0004434819243176068,
      "loss": 2.9333,
      "step": 78628
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9257887601852417,
      "learning_rate": 0.0004434783319268761,
      "loss": 2.8637,
      "step": 78629
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6446115970611572,
      "learning_rate": 0.00044347473950947,
      "loss": 3.0147,
      "step": 78630
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8182260990142822,
      "learning_rate": 0.0004434711470653894,
      "loss": 3.1073,
      "step": 78631
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6712497472763062,
      "learning_rate": 0.00044346755459463466,
      "loss": 2.9082,
      "step": 78632
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9667978286743164,
      "learning_rate": 0.00044346396209720676,
      "loss": 3.0593,
      "step": 78633
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.3459272384643555,
      "learning_rate": 0.00044346036957310607,
      "loss": 3.0971,
      "step": 78634
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.6351938247680664,
      "learning_rate": 0.0004434567770223334,
      "loss": 2.8904,
      "step": 78635
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0236258506774902,
      "learning_rate": 0.00044345318444488944,
      "loss": 3.157,
      "step": 78636
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6267807483673096,
      "learning_rate": 0.0004434495918407748,
      "loss": 3.0615,
      "step": 78637
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.46290922164917,
      "learning_rate": 0.0004434459992099902,
      "loss": 3.0437,
      "step": 78638
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.740133285522461,
      "learning_rate": 0.00044344240655253627,
      "loss": 3.0361,
      "step": 78639
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6617830991744995,
      "learning_rate": 0.0004434388138684137,
      "loss": 2.9909,
      "step": 78640
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7476584911346436,
      "learning_rate": 0.00044343522115762314,
      "loss": 3.1093,
      "step": 78641
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.765462040901184,
      "learning_rate": 0.0004434316284201652,
      "loss": 3.1658,
      "step": 78642
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6201186180114746,
      "learning_rate": 0.00044342803565604067,
      "loss": 3.2556,
      "step": 78643
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6309218406677246,
      "learning_rate": 0.00044342444286525027,
      "loss": 3.0856,
      "step": 78644
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5484623908996582,
      "learning_rate": 0.0004434208500477944,
      "loss": 3.1421,
      "step": 78645
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.3820348978042603,
      "learning_rate": 0.00044341725720367387,
      "loss": 3.073,
      "step": 78646
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6586421728134155,
      "learning_rate": 0.00044341366433288946,
      "loss": 3.0374,
      "step": 78647
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7401927709579468,
      "learning_rate": 0.0004434100714354417,
      "loss": 3.0087,
      "step": 78648
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6749389171600342,
      "learning_rate": 0.00044340647851133134,
      "loss": 3.1901,
      "step": 78649
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7256499528884888,
      "learning_rate": 0.00044340288556055904,
      "loss": 3.0138,
      "step": 78650
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8778350353240967,
      "learning_rate": 0.00044339929258312534,
      "loss": 3.0242,
      "step": 78651
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8427445888519287,
      "learning_rate": 0.000443395699579031,
      "loss": 3.1884,
      "step": 78652
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8107812404632568,
      "learning_rate": 0.00044339210654827685,
      "loss": 3.0261,
      "step": 78653
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.943678379058838,
      "learning_rate": 0.00044338851349086324,
      "loss": 2.9477,
      "step": 78654
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5240308046340942,
      "learning_rate": 0.00044338492040679105,
      "loss": 3.0466,
      "step": 78655
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.2538273334503174,
      "learning_rate": 0.0004433813272960609,
      "loss": 3.1499,
      "step": 78656
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.71629798412323,
      "learning_rate": 0.0004433777341586735,
      "loss": 2.9545,
      "step": 78657
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.823754072189331,
      "learning_rate": 0.0004433741409946293,
      "loss": 2.9771,
      "step": 78658
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8024734258651733,
      "learning_rate": 0.0004433705478039293,
      "loss": 2.8616,
      "step": 78659
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6295721530914307,
      "learning_rate": 0.000443366954586574,
      "loss": 3.0576,
      "step": 78660
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8083406686782837,
      "learning_rate": 0.00044336336134256406,
      "loss": 2.8549,
      "step": 78661
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4829645156860352,
      "learning_rate": 0.0004433597680719002,
      "loss": 3.266,
      "step": 78662
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5920692682266235,
      "learning_rate": 0.00044335617477458294,
      "loss": 2.8764,
      "step": 78663
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.5888614654541016,
      "learning_rate": 0.0004433525814506131,
      "loss": 3.0092,
      "step": 78664
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6997666358947754,
      "learning_rate": 0.00044334898809999135,
      "loss": 3.2471,
      "step": 78665
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9375998973846436,
      "learning_rate": 0.0004433453947227183,
      "loss": 3.0422,
      "step": 78666
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6800954341888428,
      "learning_rate": 0.0004433418013187946,
      "loss": 2.7352,
      "step": 78667
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1003377437591553,
      "learning_rate": 0.00044333820788822106,
      "loss": 3.0227,
      "step": 78668
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5705901384353638,
      "learning_rate": 0.0004433346144309982,
      "loss": 2.9723,
      "step": 78669
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7342349290847778,
      "learning_rate": 0.00044333102094712667,
      "loss": 3.1607,
      "step": 78670
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7338831424713135,
      "learning_rate": 0.00044332742743660725,
      "loss": 3.2524,
      "step": 78671
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7021863460540771,
      "learning_rate": 0.00044332383389944046,
      "loss": 3.1669,
      "step": 78672
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.3389365673065186,
      "learning_rate": 0.0004433202403356272,
      "loss": 3.1295,
      "step": 78673
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5161645412445068,
      "learning_rate": 0.00044331664674516797,
      "loss": 3.0753,
      "step": 78674
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.3816356658935547,
      "learning_rate": 0.0004433130531280634,
      "loss": 3.1025,
      "step": 78675
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5792182683944702,
      "learning_rate": 0.0004433094594843143,
      "loss": 2.8035,
      "step": 78676
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.899904727935791,
      "learning_rate": 0.0004433058658139212,
      "loss": 2.9705,
      "step": 78677
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7043638229370117,
      "learning_rate": 0.00044330227211688487,
      "loss": 3.1402,
      "step": 78678
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0281822681427,
      "learning_rate": 0.000443298678393206,
      "loss": 3.0246,
      "step": 78679
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.130359411239624,
      "learning_rate": 0.0004432950846428851,
      "loss": 2.9196,
      "step": 78680
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9341195821762085,
      "learning_rate": 0.000443291490865923,
      "loss": 2.9583,
      "step": 78681
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.0026845932006836,
      "learning_rate": 0.00044328789706232026,
      "loss": 2.9954,
      "step": 78682
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.5924293994903564,
      "learning_rate": 0.00044328430323207764,
      "loss": 3.0436,
      "step": 78683
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.064234972000122,
      "learning_rate": 0.00044328070937519573,
      "loss": 3.0166,
      "step": 78684
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7199203968048096,
      "learning_rate": 0.0004432771154916753,
      "loss": 2.9962,
      "step": 78685
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.048226833343506,
      "learning_rate": 0.0004432735215815169,
      "loss": 3.2636,
      "step": 78686
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.8636908531188965,
      "learning_rate": 0.0004432699276447213,
      "loss": 2.834,
      "step": 78687
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7160378694534302,
      "learning_rate": 0.0004432663336812889,
      "loss": 2.8397,
      "step": 78688
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.2328529357910156,
      "learning_rate": 0.00044326273969122086,
      "loss": 2.9138,
      "step": 78689
    },
    {
      "epoch": 1.02,
      "grad_norm": 3.2327091693878174,
      "learning_rate": 0.0004432591456745174,
      "loss": 2.9452,
      "step": 78690
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8302332162857056,
      "learning_rate": 0.00044325555163117947,
      "loss": 3.1819,
      "step": 78691
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6689850091934204,
      "learning_rate": 0.00044325195756120766,
      "loss": 2.9798,
      "step": 78692
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9879666566848755,
      "learning_rate": 0.0004432483634646024,
      "loss": 3.0222,
      "step": 78693
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.1672003269195557,
      "learning_rate": 0.00044324476934136473,
      "loss": 2.8512,
      "step": 78694
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.5809595584869385,
      "learning_rate": 0.00044324117519149513,
      "loss": 2.9755,
      "step": 78695
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9163762331008911,
      "learning_rate": 0.0004432375810149943,
      "loss": 3.1301,
      "step": 78696
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.4994975328445435,
      "learning_rate": 0.0004432339868118628,
      "loss": 3.0881,
      "step": 78697
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.087198257446289,
      "learning_rate": 0.00044323039258210145,
      "loss": 3.0287,
      "step": 78698
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6075432300567627,
      "learning_rate": 0.00044322679832571095,
      "loss": 2.883,
      "step": 78699
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9876209497451782,
      "learning_rate": 0.0004432232040426919,
      "loss": 3.0278,
      "step": 78700
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.104497194290161,
      "learning_rate": 0.0004432196097330448,
      "loss": 3.0042,
      "step": 78701
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7295985221862793,
      "learning_rate": 0.00044321601539677063,
      "loss": 3.003,
      "step": 78702
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.3244147300720215,
      "learning_rate": 0.0004432124210338698,
      "loss": 2.971,
      "step": 78703
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.053992986679077,
      "learning_rate": 0.000443208826644343,
      "loss": 2.9874,
      "step": 78704
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.538668155670166,
      "learning_rate": 0.0004432052322281911,
      "loss": 2.9693,
      "step": 78705
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.68887197971344,
      "learning_rate": 0.00044320163778541465,
      "loss": 2.9365,
      "step": 78706
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.747581958770752,
      "learning_rate": 0.00044319804331601425,
      "loss": 2.9915,
      "step": 78707
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.859657049179077,
      "learning_rate": 0.00044319444881999067,
      "loss": 2.8933,
      "step": 78708
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6507258415222168,
      "learning_rate": 0.00044319085429734444,
      "loss": 2.8141,
      "step": 78709
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.6214077472686768,
      "learning_rate": 0.00044318725974807645,
      "loss": 2.7649,
      "step": 78710
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8371057510375977,
      "learning_rate": 0.0004431836651721873,
      "loss": 3.2889,
      "step": 78711
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.886199951171875,
      "learning_rate": 0.0004431800705696775,
      "loss": 3.1526,
      "step": 78712
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.026153564453125,
      "learning_rate": 0.0004431764759405478,
      "loss": 2.9653,
      "step": 78713
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.7224518060684204,
      "learning_rate": 0.0004431728812847989,
      "loss": 2.8275,
      "step": 78714
    },
    {
      "epoch": 1.02,
      "grad_norm": 4.375796794891357,
      "learning_rate": 0.0004431692866024315,
      "loss": 2.9493,
      "step": 78715
    },
    {
      "epoch": 1.02,
      "grad_norm": 2.012462615966797,
      "learning_rate": 0.0004431656918934462,
      "loss": 3.0739,
      "step": 78716
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.714950680732727,
      "learning_rate": 0.0004431620971578437,
      "loss": 2.982,
      "step": 78717
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.8834593296051025,
      "learning_rate": 0.0004431585023956247,
      "loss": 2.8667,
      "step": 78718
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.9522205591201782,
      "learning_rate": 0.0004431549076067898,
      "loss": 2.8343,
      "step": 78719
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.5653125047683716,
      "learning_rate": 0.00044315131279133973,
      "loss": 2.9701,
      "step": 78720
    },
    {
      "epoch": 1.02,
      "grad_norm": 1.807324767112732,
      "learning_rate": 0.0004431477179492751,
      "loss": 3.0331,
      "step": 78721
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7705607414245605,
      "learning_rate": 0.0004431441230805966,
      "loss": 3.0435,
      "step": 78722
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.2680463790893555,
      "learning_rate": 0.0004431405281853049,
      "loss": 3.1027,
      "step": 78723
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.511095643043518,
      "learning_rate": 0.0004431369332634007,
      "loss": 2.996,
      "step": 78724
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.687066674232483,
      "learning_rate": 0.00044313333831488464,
      "loss": 3.1221,
      "step": 78725
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.5546011924743652,
      "learning_rate": 0.0004431297433397574,
      "loss": 2.8558,
      "step": 78726
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9192143678665161,
      "learning_rate": 0.0004431261483380196,
      "loss": 2.9158,
      "step": 78727
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4795734882354736,
      "learning_rate": 0.00044312255330967196,
      "loss": 2.8961,
      "step": 78728
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8583621978759766,
      "learning_rate": 0.0004431189582547152,
      "loss": 2.9208,
      "step": 78729
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.320096492767334,
      "learning_rate": 0.00044311536317314987,
      "loss": 3.0595,
      "step": 78730
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6104596853256226,
      "learning_rate": 0.00044311176806497663,
      "loss": 2.9194,
      "step": 78731
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5191223621368408,
      "learning_rate": 0.0004431081729301963,
      "loss": 2.6694,
      "step": 78732
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.270493745803833,
      "learning_rate": 0.0004431045777688094,
      "loss": 3.0307,
      "step": 78733
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5402958393096924,
      "learning_rate": 0.0004431009825808167,
      "loss": 3.1892,
      "step": 78734
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8316086530685425,
      "learning_rate": 0.00044309738736621884,
      "loss": 3.1113,
      "step": 78735
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.434904098510742,
      "learning_rate": 0.00044309379212501644,
      "loss": 3.1752,
      "step": 78736
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7956186532974243,
      "learning_rate": 0.0004430901968572102,
      "loss": 3.1677,
      "step": 78737
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7204161882400513,
      "learning_rate": 0.00044308660156280085,
      "loss": 2.9481,
      "step": 78738
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9481933116912842,
      "learning_rate": 0.0004430830062417889,
      "loss": 2.9296,
      "step": 78739
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0736284255981445,
      "learning_rate": 0.0004430794108941752,
      "loss": 3.0252,
      "step": 78740
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8211578130722046,
      "learning_rate": 0.00044307581551996034,
      "loss": 3.0679,
      "step": 78741
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7040623426437378,
      "learning_rate": 0.0004430722201191449,
      "loss": 2.8985,
      "step": 78742
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.2692322731018066,
      "learning_rate": 0.00044306862469172974,
      "loss": 2.9433,
      "step": 78743
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.279658317565918,
      "learning_rate": 0.0004430650292377154,
      "loss": 2.8898,
      "step": 78744
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.125246047973633,
      "learning_rate": 0.0004430614337571025,
      "loss": 3.0456,
      "step": 78745
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.052705764770508,
      "learning_rate": 0.0004430578382498918,
      "loss": 3.2059,
      "step": 78746
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4688533544540405,
      "learning_rate": 0.000443054242716084,
      "loss": 2.9206,
      "step": 78747
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5596911907196045,
      "learning_rate": 0.0004430506471556797,
      "loss": 2.8068,
      "step": 78748
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.831786870956421,
      "learning_rate": 0.0004430470515686795,
      "loss": 3.1134,
      "step": 78749
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.998380422592163,
      "learning_rate": 0.0004430434559550843,
      "loss": 2.8437,
      "step": 78750
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8209846019744873,
      "learning_rate": 0.0004430398603148945,
      "loss": 3.008,
      "step": 78751
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8140873908996582,
      "learning_rate": 0.00044303626464811094,
      "loss": 2.8674,
      "step": 78752
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7798796892166138,
      "learning_rate": 0.00044303266895473427,
      "loss": 2.9822,
      "step": 78753
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.985323190689087,
      "learning_rate": 0.00044302907323476504,
      "loss": 2.9011,
      "step": 78754
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.052868366241455,
      "learning_rate": 0.000443025477488204,
      "loss": 2.9339,
      "step": 78755
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.5138680934906006,
      "learning_rate": 0.000443021881715052,
      "loss": 2.9903,
      "step": 78756
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8297785520553589,
      "learning_rate": 0.0004430182859153093,
      "loss": 3.1641,
      "step": 78757
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6251616477966309,
      "learning_rate": 0.00044301469008897694,
      "loss": 3.1004,
      "step": 78758
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4147435426712036,
      "learning_rate": 0.0004430110942360554,
      "loss": 3.221,
      "step": 78759
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5657984018325806,
      "learning_rate": 0.00044300749835654547,
      "loss": 2.963,
      "step": 78760
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5343947410583496,
      "learning_rate": 0.0004430039024504477,
      "loss": 2.9303,
      "step": 78761
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6459362506866455,
      "learning_rate": 0.0004430003065177628,
      "loss": 3.1182,
      "step": 78762
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5287550687789917,
      "learning_rate": 0.0004429967105584914,
      "loss": 3.0048,
      "step": 78763
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.2224459648132324,
      "learning_rate": 0.0004429931145726343,
      "loss": 3.0445,
      "step": 78764
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6816160678863525,
      "learning_rate": 0.00044298951856019204,
      "loss": 2.928,
      "step": 78765
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.500478982925415,
      "learning_rate": 0.00044298592252116527,
      "loss": 2.9408,
      "step": 78766
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4238122701644897,
      "learning_rate": 0.0004429823264555548,
      "loss": 2.958,
      "step": 78767
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0999979972839355,
      "learning_rate": 0.00044297873036336117,
      "loss": 3.0621,
      "step": 78768
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8673080205917358,
      "learning_rate": 0.00044297513424458514,
      "loss": 3.0681,
      "step": 78769
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6269961595535278,
      "learning_rate": 0.0004429715380992273,
      "loss": 2.9512,
      "step": 78770
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6713099479675293,
      "learning_rate": 0.00044296794192728834,
      "loss": 3.0062,
      "step": 78771
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7340909242630005,
      "learning_rate": 0.000442964345728769,
      "loss": 3.0093,
      "step": 78772
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4543375968933105,
      "learning_rate": 0.0004429607495036698,
      "loss": 3.3691,
      "step": 78773
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.6144747734069824,
      "learning_rate": 0.00044295715325199156,
      "loss": 2.8905,
      "step": 78774
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.88491952419281,
      "learning_rate": 0.0004429535569737349,
      "loss": 3.0495,
      "step": 78775
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9182194471359253,
      "learning_rate": 0.00044294996066890043,
      "loss": 3.0311,
      "step": 78776
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.091306686401367,
      "learning_rate": 0.0004429463643374889,
      "loss": 3.0858,
      "step": 78777
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5568740367889404,
      "learning_rate": 0.00044294276797950095,
      "loss": 2.9762,
      "step": 78778
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.3320860862731934,
      "learning_rate": 0.0004429391715949372,
      "loss": 2.9878,
      "step": 78779
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.4041168689727783,
      "learning_rate": 0.00044293557518379836,
      "loss": 2.9513,
      "step": 78780
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7297608852386475,
      "learning_rate": 0.00044293197874608513,
      "loss": 3.0634,
      "step": 78781
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9429539442062378,
      "learning_rate": 0.00044292838228179816,
      "loss": 2.7747,
      "step": 78782
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.698177456855774,
      "learning_rate": 0.00044292478579093805,
      "loss": 3.2941,
      "step": 78783
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.6073007583618164,
      "learning_rate": 0.0004429211892735056,
      "loss": 3.1054,
      "step": 78784
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8944309949874878,
      "learning_rate": 0.00044291759272950136,
      "loss": 3.0644,
      "step": 78785
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4471383094787598,
      "learning_rate": 0.000442913996158926,
      "loss": 3.0687,
      "step": 78786
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.950143814086914,
      "learning_rate": 0.0004429103995617803,
      "loss": 3.0023,
      "step": 78787
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.815737009048462,
      "learning_rate": 0.0004429068029380648,
      "loss": 2.7851,
      "step": 78788
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8017168045043945,
      "learning_rate": 0.00044290320628778023,
      "loss": 3.0392,
      "step": 78789
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8644843101501465,
      "learning_rate": 0.0004428996096109273,
      "loss": 3.0551,
      "step": 78790
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.6792023181915283,
      "learning_rate": 0.00044289601290750663,
      "loss": 3.0008,
      "step": 78791
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8348584175109863,
      "learning_rate": 0.0004428924161775189,
      "loss": 3.0028,
      "step": 78792
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.250195026397705,
      "learning_rate": 0.0004428888194209647,
      "loss": 2.8448,
      "step": 78793
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.923190951347351,
      "learning_rate": 0.00044288522263784486,
      "loss": 3.2212,
      "step": 78794
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.728821039199829,
      "learning_rate": 0.0004428816258281599,
      "loss": 2.6679,
      "step": 78795
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7383842468261719,
      "learning_rate": 0.00044287802899191055,
      "loss": 3.214,
      "step": 78796
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.98142671585083,
      "learning_rate": 0.0004428744321290975,
      "loss": 3.0182,
      "step": 78797
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8139275312423706,
      "learning_rate": 0.0004428708352397214,
      "loss": 3.3236,
      "step": 78798
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.629590392112732,
      "learning_rate": 0.0004428672383237829,
      "loss": 3.2032,
      "step": 78799
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5693095922470093,
      "learning_rate": 0.0004428636413812827,
      "loss": 3.0759,
      "step": 78800
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6012200117111206,
      "learning_rate": 0.00044286004441222146,
      "loss": 3.0147,
      "step": 78801
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9937266111373901,
      "learning_rate": 0.0004428564474165998,
      "loss": 3.0674,
      "step": 78802
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.5646402835845947,
      "learning_rate": 0.0004428528503944185,
      "loss": 2.7878,
      "step": 78803
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.091057538986206,
      "learning_rate": 0.00044284925334567803,
      "loss": 2.7156,
      "step": 78804
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9168388843536377,
      "learning_rate": 0.0004428456562703794,
      "loss": 2.9671,
      "step": 78805
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.446697473526001,
      "learning_rate": 0.0004428420591685228,
      "loss": 3.0114,
      "step": 78806
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.2759594917297363,
      "learning_rate": 0.0004428384620401094,
      "loss": 2.8705,
      "step": 78807
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9399815797805786,
      "learning_rate": 0.00044283486488513955,
      "loss": 3.0811,
      "step": 78808
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.567190647125244,
      "learning_rate": 0.00044283126770361397,
      "loss": 3.0131,
      "step": 78809
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8706486225128174,
      "learning_rate": 0.00044282767049553336,
      "loss": 2.9163,
      "step": 78810
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.614038348197937,
      "learning_rate": 0.00044282407326089846,
      "loss": 3.1966,
      "step": 78811
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4650688171386719,
      "learning_rate": 0.0004428204759997098,
      "loss": 3.0664,
      "step": 78812
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.4436287879943848,
      "learning_rate": 0.0004428168787119681,
      "loss": 3.0732,
      "step": 78813
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.723983645439148,
      "learning_rate": 0.0004428132813976741,
      "loss": 2.9665,
      "step": 78814
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7827355861663818,
      "learning_rate": 0.0004428096840568284,
      "loss": 3.2736,
      "step": 78815
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.227024555206299,
      "learning_rate": 0.0004428060866894317,
      "loss": 2.8527,
      "step": 78816
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.829809308052063,
      "learning_rate": 0.0004428024892954847,
      "loss": 3.2361,
      "step": 78817
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0441296100616455,
      "learning_rate": 0.0004427988918749879,
      "loss": 2.8129,
      "step": 78818
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.718981146812439,
      "learning_rate": 0.00044279529442794216,
      "loss": 3.0298,
      "step": 78819
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.975243091583252,
      "learning_rate": 0.0004427916969543481,
      "loss": 3.0868,
      "step": 78820
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.104140520095825,
      "learning_rate": 0.0004427880994542063,
      "loss": 3.0379,
      "step": 78821
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.415895938873291,
      "learning_rate": 0.00044278450192751756,
      "loss": 2.9698,
      "step": 78822
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6980582475662231,
      "learning_rate": 0.0004427809043742825,
      "loss": 2.9342,
      "step": 78823
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6412301063537598,
      "learning_rate": 0.0004427773067945017,
      "loss": 3.0433,
      "step": 78824
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.198594331741333,
      "learning_rate": 0.0004427737091881759,
      "loss": 3.2093,
      "step": 78825
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7830709218978882,
      "learning_rate": 0.00044277011155530584,
      "loss": 3.137,
      "step": 78826
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0915935039520264,
      "learning_rate": 0.00044276651389589213,
      "loss": 2.9775,
      "step": 78827
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7231500148773193,
      "learning_rate": 0.00044276291620993537,
      "loss": 3.083,
      "step": 78828
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5771931409835815,
      "learning_rate": 0.0004427593184974363,
      "loss": 3.2562,
      "step": 78829
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.408076763153076,
      "learning_rate": 0.00044275572075839554,
      "loss": 3.0064,
      "step": 78830
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.835951566696167,
      "learning_rate": 0.00044275212299281383,
      "loss": 3.0943,
      "step": 78831
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5772770643234253,
      "learning_rate": 0.0004427485252006919,
      "loss": 2.8904,
      "step": 78832
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6388905048370361,
      "learning_rate": 0.00044274492738203023,
      "loss": 2.9013,
      "step": 78833
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7583855390548706,
      "learning_rate": 0.0004427413295368296,
      "loss": 2.8431,
      "step": 78834
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.7682933807373047,
      "learning_rate": 0.0004427377316650907,
      "loss": 3.0587,
      "step": 78835
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.3110932111740112,
      "learning_rate": 0.0004427341337668141,
      "loss": 3.1703,
      "step": 78836
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7957388162612915,
      "learning_rate": 0.00044273053584200056,
      "loss": 3.1549,
      "step": 78837
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.418060064315796,
      "learning_rate": 0.00044272693789065067,
      "loss": 2.9989,
      "step": 78838
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.450326919555664,
      "learning_rate": 0.00044272333991276526,
      "loss": 2.9787,
      "step": 78839
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.688065767288208,
      "learning_rate": 0.0004427197419083447,
      "loss": 2.8823,
      "step": 78840
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.4517154693603516,
      "learning_rate": 0.00044271614387739,
      "loss": 2.8875,
      "step": 78841
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5560531616210938,
      "learning_rate": 0.00044271254581990165,
      "loss": 3.006,
      "step": 78842
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.1580116748809814,
      "learning_rate": 0.0004427089477358803,
      "loss": 2.9272,
      "step": 78843
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.901893138885498,
      "learning_rate": 0.0004427053496253267,
      "loss": 3.0814,
      "step": 78844
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.660826563835144,
      "learning_rate": 0.0004427017514882415,
      "loss": 3.0664,
      "step": 78845
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5151705741882324,
      "learning_rate": 0.00044269815332462534,
      "loss": 3.2128,
      "step": 78846
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7812161445617676,
      "learning_rate": 0.00044269455513447885,
      "loss": 3.0821,
      "step": 78847
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.919536828994751,
      "learning_rate": 0.00044269095691780284,
      "loss": 2.9,
      "step": 78848
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5702449083328247,
      "learning_rate": 0.0004426873586745978,
      "loss": 3.0138,
      "step": 78849
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.3221633434295654,
      "learning_rate": 0.00044268376040486454,
      "loss": 3.1959,
      "step": 78850
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.0208024978637695,
      "learning_rate": 0.0004426801621086037,
      "loss": 3.1055,
      "step": 78851
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.2055373191833496,
      "learning_rate": 0.00044267656378581586,
      "loss": 3.1328,
      "step": 78852
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.6899542808532715,
      "learning_rate": 0.0004426729654365017,
      "loss": 2.7816,
      "step": 78853
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0501303672790527,
      "learning_rate": 0.0004426693670606621,
      "loss": 2.9735,
      "step": 78854
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.1440436840057373,
      "learning_rate": 0.00044266576865829746,
      "loss": 3.0869,
      "step": 78855
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.85163152217865,
      "learning_rate": 0.00044266217022940856,
      "loss": 2.9276,
      "step": 78856
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.238699436187744,
      "learning_rate": 0.0004426585717739961,
      "loss": 2.926,
      "step": 78857
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.308384656906128,
      "learning_rate": 0.0004426549732920607,
      "loss": 3.0102,
      "step": 78858
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8284997940063477,
      "learning_rate": 0.00044265137478360315,
      "loss": 3.1015,
      "step": 78859
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.674578070640564,
      "learning_rate": 0.0004426477762486239,
      "loss": 2.9013,
      "step": 78860
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.200514554977417,
      "learning_rate": 0.00044264417768712376,
      "loss": 2.7514,
      "step": 78861
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.991379737854004,
      "learning_rate": 0.00044264057909910336,
      "loss": 2.8958,
      "step": 78862
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4865082502365112,
      "learning_rate": 0.0004426369804845635,
      "loss": 2.9049,
      "step": 78863
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8294295072555542,
      "learning_rate": 0.0004426333818435045,
      "loss": 2.7925,
      "step": 78864
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.708724021911621,
      "learning_rate": 0.0004426297831759275,
      "loss": 3.2052,
      "step": 78865
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5798683166503906,
      "learning_rate": 0.0004426261844818328,
      "loss": 2.8955,
      "step": 78866
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8311693668365479,
      "learning_rate": 0.00044262258576122126,
      "loss": 3.0357,
      "step": 78867
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7801347970962524,
      "learning_rate": 0.00044261898701409343,
      "loss": 3.02,
      "step": 78868
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4816668033599854,
      "learning_rate": 0.0004426153882404501,
      "loss": 3.0155,
      "step": 78869
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.69941782951355,
      "learning_rate": 0.00044261178944029187,
      "loss": 3.22,
      "step": 78870
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.075958490371704,
      "learning_rate": 0.00044260819061361933,
      "loss": 3.021,
      "step": 78871
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.12429141998291,
      "learning_rate": 0.00044260459176043333,
      "loss": 3.0406,
      "step": 78872
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8736401796340942,
      "learning_rate": 0.0004426009928807344,
      "loss": 3.3019,
      "step": 78873
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.007080554962158,
      "learning_rate": 0.00044259739397452323,
      "loss": 3.1957,
      "step": 78874
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.843393087387085,
      "learning_rate": 0.0004425937950418006,
      "loss": 2.8851,
      "step": 78875
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9777976274490356,
      "learning_rate": 0.00044259019608256704,
      "loss": 3.1708,
      "step": 78876
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.1983304023742676,
      "learning_rate": 0.0004425865970968232,
      "loss": 2.9632,
      "step": 78877
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8623626232147217,
      "learning_rate": 0.0004425829980845699,
      "loss": 2.9969,
      "step": 78878
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.4600298404693604,
      "learning_rate": 0.00044257939904580776,
      "loss": 3.0176,
      "step": 78879
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.5157485008239746,
      "learning_rate": 0.00044257579998053734,
      "loss": 3.0562,
      "step": 78880
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5789704322814941,
      "learning_rate": 0.0004425722008887594,
      "loss": 2.6397,
      "step": 78881
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0286223888397217,
      "learning_rate": 0.0004425686017704746,
      "loss": 2.8448,
      "step": 78882
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9955295324325562,
      "learning_rate": 0.0004425650026256837,
      "loss": 3.1906,
      "step": 78883
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.401766061782837,
      "learning_rate": 0.0004425614034543872,
      "loss": 3.0598,
      "step": 78884
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.495809316635132,
      "learning_rate": 0.0004425578042565858,
      "loss": 2.9536,
      "step": 78885
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7756468057632446,
      "learning_rate": 0.0004425542050322803,
      "loss": 2.9487,
      "step": 78886
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5297266244888306,
      "learning_rate": 0.00044255060578147126,
      "loss": 2.9467,
      "step": 78887
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7100688219070435,
      "learning_rate": 0.0004425470065041593,
      "loss": 3.1136,
      "step": 78888
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6705461740493774,
      "learning_rate": 0.00044254340720034516,
      "loss": 2.9169,
      "step": 78889
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.820554494857788,
      "learning_rate": 0.00044253980787002965,
      "loss": 3.1619,
      "step": 78890
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9099737405776978,
      "learning_rate": 0.0004425362085132132,
      "loss": 3.0668,
      "step": 78891
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0694684982299805,
      "learning_rate": 0.00044253260912989657,
      "loss": 2.8754,
      "step": 78892
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.235910654067993,
      "learning_rate": 0.0004425290097200805,
      "loss": 3.0547,
      "step": 78893
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.82555091381073,
      "learning_rate": 0.0004425254102837655,
      "loss": 3.2638,
      "step": 78894
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.281155824661255,
      "learning_rate": 0.00044252181082095234,
      "loss": 3.0117,
      "step": 78895
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6606968641281128,
      "learning_rate": 0.00044251821133164184,
      "loss": 3.0378,
      "step": 78896
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6009925603866577,
      "learning_rate": 0.0004425146118158344,
      "loss": 3.2997,
      "step": 78897
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.654792070388794,
      "learning_rate": 0.0004425110122735308,
      "loss": 2.9344,
      "step": 78898
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.292466163635254,
      "learning_rate": 0.00044250741270473174,
      "loss": 2.869,
      "step": 78899
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6398953199386597,
      "learning_rate": 0.00044250381310943787,
      "loss": 3.0531,
      "step": 78900
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.620144248008728,
      "learning_rate": 0.00044250021348764984,
      "loss": 3.0471,
      "step": 78901
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7154390811920166,
      "learning_rate": 0.0004424966138393683,
      "loss": 2.9072,
      "step": 78902
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4052668809890747,
      "learning_rate": 0.000442493014164594,
      "loss": 3.1129,
      "step": 78903
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5021405220031738,
      "learning_rate": 0.00044248941446332744,
      "loss": 2.8594,
      "step": 78904
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.748908519744873,
      "learning_rate": 0.0004424858147355695,
      "loss": 3.0864,
      "step": 78905
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.593135952949524,
      "learning_rate": 0.00044248221498132084,
      "loss": 2.9509,
      "step": 78906
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4538934230804443,
      "learning_rate": 0.00044247861520058194,
      "loss": 2.8957,
      "step": 78907
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4172191619873047,
      "learning_rate": 0.00044247501539335354,
      "loss": 3.2428,
      "step": 78908
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6182093620300293,
      "learning_rate": 0.00044247141555963645,
      "loss": 3.0093,
      "step": 78909
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4956979751586914,
      "learning_rate": 0.00044246781569943117,
      "loss": 2.8108,
      "step": 78910
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8538775444030762,
      "learning_rate": 0.00044246421581273845,
      "loss": 2.7068,
      "step": 78911
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0271456241607666,
      "learning_rate": 0.00044246061589955894,
      "loss": 2.9283,
      "step": 78912
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7543296813964844,
      "learning_rate": 0.0004424570159598933,
      "loss": 3.0908,
      "step": 78913
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7330125570297241,
      "learning_rate": 0.0004424534159937422,
      "loss": 2.9061,
      "step": 78914
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.1499805450439453,
      "learning_rate": 0.0004424498160011064,
      "loss": 3.0297,
      "step": 78915
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7323315143585205,
      "learning_rate": 0.00044244621598198646,
      "loss": 3.2128,
      "step": 78916
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9366117715835571,
      "learning_rate": 0.00044244261593638304,
      "loss": 3.0674,
      "step": 78917
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.1818058490753174,
      "learning_rate": 0.00044243901586429695,
      "loss": 2.9386,
      "step": 78918
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5185306072235107,
      "learning_rate": 0.0004424354157657286,
      "loss": 3.0219,
      "step": 78919
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6417694091796875,
      "learning_rate": 0.0004424318156406789,
      "loss": 3.0407,
      "step": 78920
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6801186800003052,
      "learning_rate": 0.0004424282154891484,
      "loss": 2.9842,
      "step": 78921
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7662925720214844,
      "learning_rate": 0.0004424246153111379,
      "loss": 2.9853,
      "step": 78922
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6407378911972046,
      "learning_rate": 0.0004424210151066479,
      "loss": 3.0153,
      "step": 78923
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.1785080432891846,
      "learning_rate": 0.00044241741487567913,
      "loss": 2.8608,
      "step": 78924
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.046602249145508,
      "learning_rate": 0.0004424138146182323,
      "loss": 2.8485,
      "step": 78925
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.551124095916748,
      "learning_rate": 0.000442410214334308,
      "loss": 3.0947,
      "step": 78926
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9004861116409302,
      "learning_rate": 0.0004424066140239071,
      "loss": 3.2682,
      "step": 78927
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.1758921146392822,
      "learning_rate": 0.00044240301368703,
      "loss": 3.0405,
      "step": 78928
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.8277831077575684,
      "learning_rate": 0.00044239941332367753,
      "loss": 2.9341,
      "step": 78929
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.470733165740967,
      "learning_rate": 0.00044239581293385033,
      "loss": 2.8033,
      "step": 78930
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6777504682540894,
      "learning_rate": 0.000442392212517549,
      "loss": 3.097,
      "step": 78931
    },
    {
      "epoch": 1.03,
      "grad_norm": 5.0101189613342285,
      "learning_rate": 0.0004423886120747743,
      "loss": 2.8145,
      "step": 78932
    },
    {
      "epoch": 1.03,
      "grad_norm": 6.547985076904297,
      "learning_rate": 0.000442385011605527,
      "loss": 3.133,
      "step": 78933
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.9534921646118164,
      "learning_rate": 0.0004423814111098075,
      "loss": 3.1873,
      "step": 78934
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.290076971054077,
      "learning_rate": 0.00044237781058761654,
      "loss": 2.9476,
      "step": 78935
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8770252466201782,
      "learning_rate": 0.00044237421003895497,
      "loss": 3.2138,
      "step": 78936
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.4091968536376953,
      "learning_rate": 0.00044237060946382336,
      "loss": 3.0819,
      "step": 78937
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.7770864963531494,
      "learning_rate": 0.00044236700886222225,
      "loss": 2.8638,
      "step": 78938
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.1200881004333496,
      "learning_rate": 0.00044236340823415257,
      "loss": 3.179,
      "step": 78939
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.3804223537445068,
      "learning_rate": 0.00044235980757961475,
      "loss": 3.0785,
      "step": 78940
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.567042589187622,
      "learning_rate": 0.0004423562068986096,
      "loss": 2.9148,
      "step": 78941
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.5588324069976807,
      "learning_rate": 0.0004423526061911378,
      "loss": 3.004,
      "step": 78942
    },
    {
      "epoch": 1.03,
      "grad_norm": 4.3475823402404785,
      "learning_rate": 0.0004423490054571998,
      "loss": 2.8051,
      "step": 78943
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6952390670776367,
      "learning_rate": 0.0004423454046967965,
      "loss": 3.028,
      "step": 78944
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7439717054367065,
      "learning_rate": 0.00044234180390992866,
      "loss": 2.957,
      "step": 78945
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.1550586223602295,
      "learning_rate": 0.0004423382030965966,
      "loss": 3.1364,
      "step": 78946
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.2060132026672363,
      "learning_rate": 0.0004423346022568012,
      "loss": 3.0537,
      "step": 78947
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5927727222442627,
      "learning_rate": 0.00044233100139054324,
      "loss": 3.0853,
      "step": 78948
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.379697561264038,
      "learning_rate": 0.00044232740049782307,
      "loss": 2.8866,
      "step": 78949
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.9048540592193604,
      "learning_rate": 0.00044232379957864174,
      "loss": 2.9592,
      "step": 78950
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.6645357608795166,
      "learning_rate": 0.00044232019863299965,
      "loss": 3.1959,
      "step": 78951
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.650831937789917,
      "learning_rate": 0.0004423165976608975,
      "loss": 2.8641,
      "step": 78952
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5274815559387207,
      "learning_rate": 0.00044231299666233604,
      "loss": 3.1482,
      "step": 78953
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7533243894577026,
      "learning_rate": 0.00044230939563731593,
      "loss": 2.9275,
      "step": 78954
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.0489678382873535,
      "learning_rate": 0.00044230579458583776,
      "loss": 3.3111,
      "step": 78955
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.849575996398926,
      "learning_rate": 0.00044230219350790235,
      "loss": 2.8482,
      "step": 78956
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.616487979888916,
      "learning_rate": 0.00044229859240351024,
      "loss": 2.8904,
      "step": 78957
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.2017619609832764,
      "learning_rate": 0.0004422949912726621,
      "loss": 3.0435,
      "step": 78958
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.2169501781463623,
      "learning_rate": 0.0004422913901153586,
      "loss": 3.0682,
      "step": 78959
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.7206246852874756,
      "learning_rate": 0.0004422877889316006,
      "loss": 2.9541,
      "step": 78960
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.637602686882019,
      "learning_rate": 0.0004422841877213885,
      "loss": 3.2227,
      "step": 78961
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6356971263885498,
      "learning_rate": 0.00044228058648472306,
      "loss": 3.1059,
      "step": 78962
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7288967370986938,
      "learning_rate": 0.00044227698522160495,
      "loss": 3.0467,
      "step": 78963
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6710036993026733,
      "learning_rate": 0.000442273383932035,
      "loss": 3.085,
      "step": 78964
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7532551288604736,
      "learning_rate": 0.0004422697826160136,
      "loss": 3.0459,
      "step": 78965
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.599164366722107,
      "learning_rate": 0.0004422661812735416,
      "loss": 2.8949,
      "step": 78966
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.540022850036621,
      "learning_rate": 0.00044226257990461973,
      "loss": 3.1656,
      "step": 78967
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.356008529663086,
      "learning_rate": 0.0004422589785092484,
      "loss": 3.0019,
      "step": 78968
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6061385869979858,
      "learning_rate": 0.00044225537708742857,
      "loss": 2.9746,
      "step": 78969
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5658982992172241,
      "learning_rate": 0.0004422517756391607,
      "loss": 2.985,
      "step": 78970
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4141299724578857,
      "learning_rate": 0.0004422481741644457,
      "loss": 3.0989,
      "step": 78971
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.3608087301254272,
      "learning_rate": 0.00044224457266328393,
      "loss": 3.0199,
      "step": 78972
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.032790422439575,
      "learning_rate": 0.00044224097113567616,
      "loss": 3.0372,
      "step": 78973
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9322808980941772,
      "learning_rate": 0.00044223736958162323,
      "loss": 2.836,
      "step": 78974
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.769636631011963,
      "learning_rate": 0.0004422337680011257,
      "loss": 2.9138,
      "step": 78975
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.614593744277954,
      "learning_rate": 0.00044223016639418413,
      "loss": 3.284,
      "step": 78976
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8619978427886963,
      "learning_rate": 0.00044222656476079926,
      "loss": 3.1027,
      "step": 78977
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8951678276062012,
      "learning_rate": 0.00044222296310097195,
      "loss": 2.9598,
      "step": 78978
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.391119122505188,
      "learning_rate": 0.00044221936141470257,
      "loss": 2.932,
      "step": 78979
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.374645471572876,
      "learning_rate": 0.00044221575970199194,
      "loss": 3.0682,
      "step": 78980
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.1046323776245117,
      "learning_rate": 0.0004422121579628407,
      "loss": 2.8933,
      "step": 78981
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5325191020965576,
      "learning_rate": 0.00044220855619724964,
      "loss": 2.7465,
      "step": 78982
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5869988203048706,
      "learning_rate": 0.0004422049544052192,
      "loss": 3.0886,
      "step": 78983
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8242850303649902,
      "learning_rate": 0.0004422013525867503,
      "loss": 3.1004,
      "step": 78984
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8922197818756104,
      "learning_rate": 0.00044219775074184344,
      "loss": 3.1334,
      "step": 78985
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.83004629611969,
      "learning_rate": 0.0004421941488704993,
      "loss": 3.0759,
      "step": 78986
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.23258113861084,
      "learning_rate": 0.00044219054697271855,
      "loss": 2.8206,
      "step": 78987
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.962488889694214,
      "learning_rate": 0.00044218694504850203,
      "loss": 2.9687,
      "step": 78988
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0473179817199707,
      "learning_rate": 0.00044218334309785017,
      "loss": 3.0872,
      "step": 78989
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7608320713043213,
      "learning_rate": 0.0004421797411207637,
      "loss": 3.0071,
      "step": 78990
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.1467666625976562,
      "learning_rate": 0.00044217613911724346,
      "loss": 3.0982,
      "step": 78991
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.4193227291107178,
      "learning_rate": 0.0004421725370872899,
      "loss": 3.0163,
      "step": 78992
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7375856637954712,
      "learning_rate": 0.0004421689350309038,
      "loss": 3.0604,
      "step": 78993
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4881185293197632,
      "learning_rate": 0.00044216533294808583,
      "loss": 2.8786,
      "step": 78994
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.408771276473999,
      "learning_rate": 0.0004421617308388366,
      "loss": 3.1825,
      "step": 78995
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.421880006790161,
      "learning_rate": 0.00044215812870315686,
      "loss": 3.0423,
      "step": 78996
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5650554895401,
      "learning_rate": 0.0004421545265410473,
      "loss": 3.1788,
      "step": 78997
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.738789677619934,
      "learning_rate": 0.0004421509243525084,
      "loss": 2.9541,
      "step": 78998
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7067815065383911,
      "learning_rate": 0.000442147322137541,
      "loss": 2.971,
      "step": 78999
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8930057287216187,
      "learning_rate": 0.0004421437198961458,
      "loss": 3.1317,
      "step": 79000
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8287216424942017,
      "learning_rate": 0.0004421401176283233,
      "loss": 2.9938,
      "step": 79001
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6693511009216309,
      "learning_rate": 0.00044213651533407433,
      "loss": 2.9719,
      "step": 79002
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.27258563041687,
      "learning_rate": 0.0004421329130133995,
      "loss": 3.1145,
      "step": 79003
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.3723256587982178,
      "learning_rate": 0.0004421293106662994,
      "loss": 2.9973,
      "step": 79004
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7827365398406982,
      "learning_rate": 0.0004421257082927749,
      "loss": 2.9704,
      "step": 79005
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7644282579421997,
      "learning_rate": 0.00044212210589282655,
      "loss": 2.7965,
      "step": 79006
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5845046043395996,
      "learning_rate": 0.00044211850346645497,
      "loss": 2.9383,
      "step": 79007
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8757072687149048,
      "learning_rate": 0.0004421149010136608,
      "loss": 3.1413,
      "step": 79008
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7961788177490234,
      "learning_rate": 0.00044211129853444485,
      "loss": 3.0629,
      "step": 79009
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.991222858428955,
      "learning_rate": 0.00044210769602880774,
      "loss": 2.9639,
      "step": 79010
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8091657161712646,
      "learning_rate": 0.00044210409349675013,
      "loss": 3.0134,
      "step": 79011
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.986335039138794,
      "learning_rate": 0.00044210049093827274,
      "loss": 2.8647,
      "step": 79012
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.076536178588867,
      "learning_rate": 0.00044209688835337605,
      "loss": 2.713,
      "step": 79013
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0494048595428467,
      "learning_rate": 0.00044209328574206094,
      "loss": 2.9944,
      "step": 79014
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4567002058029175,
      "learning_rate": 0.000442089683104328,
      "loss": 3.1439,
      "step": 79015
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.08127760887146,
      "learning_rate": 0.0004420860804401779,
      "loss": 2.8762,
      "step": 79016
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.5605955123901367,
      "learning_rate": 0.0004420824777496113,
      "loss": 3.2572,
      "step": 79017
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8976168632507324,
      "learning_rate": 0.000442078875032629,
      "loss": 3.1976,
      "step": 79018
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0208964347839355,
      "learning_rate": 0.0004420752722892314,
      "loss": 2.8176,
      "step": 79019
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.957266092300415,
      "learning_rate": 0.00044207166951941933,
      "loss": 2.9761,
      "step": 79020
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.091104507446289,
      "learning_rate": 0.0004420680667231936,
      "loss": 2.9644,
      "step": 79021
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.315657615661621,
      "learning_rate": 0.0004420644639005546,
      "loss": 2.8666,
      "step": 79022
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.128309965133667,
      "learning_rate": 0.0004420608610515032,
      "loss": 2.9754,
      "step": 79023
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8487025499343872,
      "learning_rate": 0.00044205725817604005,
      "loss": 2.8326,
      "step": 79024
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.808244228363037,
      "learning_rate": 0.00044205365527416574,
      "loss": 2.758,
      "step": 79025
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.552943468093872,
      "learning_rate": 0.00044205005234588087,
      "loss": 3.0347,
      "step": 79026
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.80865740776062,
      "learning_rate": 0.00044204644939118635,
      "loss": 3.2466,
      "step": 79027
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.5820515155792236,
      "learning_rate": 0.0004420428464100826,
      "loss": 2.8736,
      "step": 79028
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.584672451019287,
      "learning_rate": 0.0004420392434025705,
      "loss": 2.9033,
      "step": 79029
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.292064666748047,
      "learning_rate": 0.0004420356403686506,
      "loss": 2.8897,
      "step": 79030
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.5035948753356934,
      "learning_rate": 0.0004420320373083235,
      "loss": 3.069,
      "step": 79031
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.1297850608825684,
      "learning_rate": 0.00044202843422159007,
      "loss": 2.9485,
      "step": 79032
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0111918449401855,
      "learning_rate": 0.00044202483110845084,
      "loss": 2.9594,
      "step": 79033
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0823111534118652,
      "learning_rate": 0.0004420212279689065,
      "loss": 3.0143,
      "step": 79034
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7295161485671997,
      "learning_rate": 0.00044201762480295775,
      "loss": 3.1133,
      "step": 79035
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4554917812347412,
      "learning_rate": 0.00044201402161060525,
      "loss": 3.2292,
      "step": 79036
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5035605430603027,
      "learning_rate": 0.00044201041839184965,
      "loss": 2.8745,
      "step": 79037
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.714277982711792,
      "learning_rate": 0.0004420068151466916,
      "loss": 3.2215,
      "step": 79038
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6616603136062622,
      "learning_rate": 0.0004420032118751318,
      "loss": 3.1109,
      "step": 79039
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6714189052581787,
      "learning_rate": 0.000441999608577171,
      "loss": 3.1605,
      "step": 79040
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.1527276039123535,
      "learning_rate": 0.0004419960052528098,
      "loss": 3.1479,
      "step": 79041
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.858814001083374,
      "learning_rate": 0.00044199240190204874,
      "loss": 2.9272,
      "step": 79042
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.481300950050354,
      "learning_rate": 0.0004419887985248887,
      "loss": 3.0184,
      "step": 79043
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5450615882873535,
      "learning_rate": 0.0004419851951213302,
      "loss": 3.1071,
      "step": 79044
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4320862293243408,
      "learning_rate": 0.0004419815916913741,
      "loss": 3.0809,
      "step": 79045
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.770754337310791,
      "learning_rate": 0.0004419779882350209,
      "loss": 2.8277,
      "step": 79046
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.578325629234314,
      "learning_rate": 0.00044197438475227126,
      "loss": 2.9999,
      "step": 79047
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.002976894378662,
      "learning_rate": 0.0004419707812431259,
      "loss": 3.1962,
      "step": 79048
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7072553634643555,
      "learning_rate": 0.00044196717770758555,
      "loss": 2.973,
      "step": 79049
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6875497102737427,
      "learning_rate": 0.0004419635741456507,
      "loss": 2.884,
      "step": 79050
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8026010990142822,
      "learning_rate": 0.00044195997055732224,
      "loss": 3.0879,
      "step": 79051
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8998836278915405,
      "learning_rate": 0.00044195636694260073,
      "loss": 2.8669,
      "step": 79052
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8532650470733643,
      "learning_rate": 0.0004419527633014868,
      "loss": 3.0304,
      "step": 79053
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4402142763137817,
      "learning_rate": 0.0004419491596339812,
      "loss": 3.102,
      "step": 79054
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7765793800354004,
      "learning_rate": 0.00044194555594008456,
      "loss": 2.9756,
      "step": 79055
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.491316795349121,
      "learning_rate": 0.00044194195221979764,
      "loss": 2.9728,
      "step": 79056
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4897217750549316,
      "learning_rate": 0.000441938348473121,
      "loss": 2.8606,
      "step": 79057
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5795698165893555,
      "learning_rate": 0.0004419347447000553,
      "loss": 2.7331,
      "step": 79058
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8022613525390625,
      "learning_rate": 0.0004419311409006013,
      "loss": 3.1574,
      "step": 79059
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.615092396736145,
      "learning_rate": 0.0004419275370747596,
      "loss": 3.097,
      "step": 79060
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6353607177734375,
      "learning_rate": 0.00044192393322253087,
      "loss": 2.8268,
      "step": 79061
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5047472715377808,
      "learning_rate": 0.0004419203293439158,
      "loss": 2.9268,
      "step": 79062
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7621641159057617,
      "learning_rate": 0.0004419167254389151,
      "loss": 2.7896,
      "step": 79063
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7494690418243408,
      "learning_rate": 0.0004419131215075294,
      "loss": 3.2324,
      "step": 79064
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6629747152328491,
      "learning_rate": 0.0004419095175497593,
      "loss": 3.2329,
      "step": 79065
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5457099676132202,
      "learning_rate": 0.00044190591356560563,
      "loss": 2.9608,
      "step": 79066
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.818732500076294,
      "learning_rate": 0.00044190230955506895,
      "loss": 2.8793,
      "step": 79067
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4380664825439453,
      "learning_rate": 0.00044189870551814995,
      "loss": 3.3001,
      "step": 79068
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.3534324169158936,
      "learning_rate": 0.0004418951014548493,
      "loss": 2.7023,
      "step": 79069
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.043808937072754,
      "learning_rate": 0.00044189149736516763,
      "loss": 2.9497,
      "step": 79070
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4208917617797852,
      "learning_rate": 0.0004418878932491057,
      "loss": 3.0066,
      "step": 79071
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6967357397079468,
      "learning_rate": 0.0004418842891066641,
      "loss": 3.0629,
      "step": 79072
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.043076515197754,
      "learning_rate": 0.00044188068493784355,
      "loss": 3.0711,
      "step": 79073
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.2006947994232178,
      "learning_rate": 0.00044187708074264475,
      "loss": 3.0509,
      "step": 79074
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6248149871826172,
      "learning_rate": 0.0004418734765210682,
      "loss": 3.1106,
      "step": 79075
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.785383939743042,
      "learning_rate": 0.00044186987227311486,
      "loss": 3.2393,
      "step": 79076
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.4507761001586914,
      "learning_rate": 0.0004418662679987851,
      "loss": 3.0455,
      "step": 79077
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9360560178756714,
      "learning_rate": 0.0004418626636980798,
      "loss": 3.0261,
      "step": 79078
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.121985673904419,
      "learning_rate": 0.0004418590593709995,
      "loss": 3.0047,
      "step": 79079
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.229132890701294,
      "learning_rate": 0.0004418554550175449,
      "loss": 3.2468,
      "step": 79080
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.2116713523864746,
      "learning_rate": 0.0004418518506377168,
      "loss": 2.8313,
      "step": 79081
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.029679775238037,
      "learning_rate": 0.0004418482462315157,
      "loss": 3.1407,
      "step": 79082
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0916364192962646,
      "learning_rate": 0.0004418446417989423,
      "loss": 3.012,
      "step": 79083
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.1744332313537598,
      "learning_rate": 0.00044184103733999736,
      "loss": 2.9255,
      "step": 79084
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6374037265777588,
      "learning_rate": 0.00044183743285468145,
      "loss": 3.001,
      "step": 79085
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.675600051879883,
      "learning_rate": 0.00044183382834299536,
      "loss": 3.1376,
      "step": 79086
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.5797133445739746,
      "learning_rate": 0.0004418302238049396,
      "loss": 2.9118,
      "step": 79087
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8930139541625977,
      "learning_rate": 0.00044182661924051497,
      "loss": 2.9321,
      "step": 79088
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.543380618095398,
      "learning_rate": 0.00044182301464972207,
      "loss": 2.9746,
      "step": 79089
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.527721881866455,
      "learning_rate": 0.00044181941003256154,
      "loss": 3.2013,
      "step": 79090
    },
    {
      "epoch": 1.03,
      "grad_norm": 4.44970178604126,
      "learning_rate": 0.00044181580538903424,
      "loss": 2.7857,
      "step": 79091
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.164255380630493,
      "learning_rate": 0.00044181220071914056,
      "loss": 3.0383,
      "step": 79092
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.299795150756836,
      "learning_rate": 0.0004418085960228814,
      "loss": 2.6797,
      "step": 79093
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.762352705001831,
      "learning_rate": 0.0004418049913002574,
      "loss": 2.9448,
      "step": 79094
    },
    {
      "epoch": 1.03,
      "grad_norm": 4.789447784423828,
      "learning_rate": 0.00044180138655126904,
      "loss": 2.9342,
      "step": 79095
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.1916182041168213,
      "learning_rate": 0.0004417977817759172,
      "loss": 2.9484,
      "step": 79096
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.8068454265594482,
      "learning_rate": 0.0004417941769742025,
      "loss": 3.0089,
      "step": 79097
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0475709438323975,
      "learning_rate": 0.0004417905721461255,
      "loss": 3.0844,
      "step": 79098
    },
    {
      "epoch": 1.03,
      "grad_norm": 4.1011962890625,
      "learning_rate": 0.00044178696729168696,
      "loss": 3.0569,
      "step": 79099
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.4063034057617188,
      "learning_rate": 0.0004417833624108877,
      "loss": 3.0073,
      "step": 79100
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.3868253231048584,
      "learning_rate": 0.0004417797575037281,
      "loss": 3.3036,
      "step": 79101
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4886316061019897,
      "learning_rate": 0.00044177615257020897,
      "loss": 2.8202,
      "step": 79102
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.861060380935669,
      "learning_rate": 0.0004417725476103311,
      "loss": 3.0904,
      "step": 79103
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.5000295639038086,
      "learning_rate": 0.0004417689426240949,
      "loss": 3.0146,
      "step": 79104
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.348482847213745,
      "learning_rate": 0.0004417653376115012,
      "loss": 2.967,
      "step": 79105
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8429888486862183,
      "learning_rate": 0.0004417617325725506,
      "loss": 3.0414,
      "step": 79106
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7784268856048584,
      "learning_rate": 0.00044175812750724396,
      "loss": 3.0377,
      "step": 79107
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.9660422801971436,
      "learning_rate": 0.00044175452241558175,
      "loss": 3.0951,
      "step": 79108
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.948244333267212,
      "learning_rate": 0.00044175091729756465,
      "loss": 3.2561,
      "step": 79109
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0864312648773193,
      "learning_rate": 0.0004417473121531934,
      "loss": 2.9615,
      "step": 79110
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9316960573196411,
      "learning_rate": 0.00044174370698246864,
      "loss": 2.9593,
      "step": 79111
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8380188941955566,
      "learning_rate": 0.00044174010178539113,
      "loss": 3.0108,
      "step": 79112
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.3310325145721436,
      "learning_rate": 0.0004417364965619614,
      "loss": 2.9664,
      "step": 79113
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4545197486877441,
      "learning_rate": 0.00044173289131218017,
      "loss": 3.1006,
      "step": 79114
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5667306184768677,
      "learning_rate": 0.0004417292860360481,
      "loss": 3.0484,
      "step": 79115
    },
    {
      "epoch": 1.03,
      "grad_norm": 4.849137306213379,
      "learning_rate": 0.00044172568073356594,
      "loss": 3.0491,
      "step": 79116
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.284363269805908,
      "learning_rate": 0.0004417220754047343,
      "loss": 2.8925,
      "step": 79117
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0401759147644043,
      "learning_rate": 0.00044171847004955377,
      "loss": 3.3671,
      "step": 79118
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.811846137046814,
      "learning_rate": 0.00044171486466802523,
      "loss": 2.7943,
      "step": 79119
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8921267986297607,
      "learning_rate": 0.0004417112592601491,
      "loss": 3.0022,
      "step": 79120
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.062354564666748,
      "learning_rate": 0.0004417076538259262,
      "loss": 3.2026,
      "step": 79121
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.5644752979278564,
      "learning_rate": 0.0004417040483653572,
      "loss": 3.0214,
      "step": 79122
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7203421592712402,
      "learning_rate": 0.00044170044287844274,
      "loss": 2.9882,
      "step": 79123
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.126455307006836,
      "learning_rate": 0.0004416968373651834,
      "loss": 3.072,
      "step": 79124
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7361215353012085,
      "learning_rate": 0.0004416932318255801,
      "loss": 3.2415,
      "step": 79125
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6851685047149658,
      "learning_rate": 0.00044168962625963323,
      "loss": 3.0772,
      "step": 79126
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.921700358390808,
      "learning_rate": 0.0004416860206673437,
      "loss": 3.0025,
      "step": 79127
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0089070796966553,
      "learning_rate": 0.000441682415048712,
      "loss": 3.1279,
      "step": 79128
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.9829206466674805,
      "learning_rate": 0.00044167880940373884,
      "loss": 3.2417,
      "step": 79129
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.941652536392212,
      "learning_rate": 0.0004416752037324249,
      "loss": 2.8971,
      "step": 79130
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5804283618927002,
      "learning_rate": 0.00044167159803477094,
      "loss": 3.182,
      "step": 79131
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8345214128494263,
      "learning_rate": 0.0004416679923107775,
      "loss": 2.9507,
      "step": 79132
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.331422805786133,
      "learning_rate": 0.0004416643865604453,
      "loss": 2.9598,
      "step": 79133
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5616430044174194,
      "learning_rate": 0.0004416607807837751,
      "loss": 3.1684,
      "step": 79134
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.3406474590301514,
      "learning_rate": 0.00044165717498076743,
      "loss": 3.0217,
      "step": 79135
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.823091745376587,
      "learning_rate": 0.00044165356915142295,
      "loss": 3.005,
      "step": 79136
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.6568868160247803,
      "learning_rate": 0.0004416499632957425,
      "loss": 2.9357,
      "step": 79137
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9748427867889404,
      "learning_rate": 0.0004416463574137266,
      "loss": 3.0021,
      "step": 79138
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5528037548065186,
      "learning_rate": 0.00044164275150537595,
      "loss": 3.0705,
      "step": 79139
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.553922176361084,
      "learning_rate": 0.0004416391455706913,
      "loss": 2.9258,
      "step": 79140
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7490583658218384,
      "learning_rate": 0.00044163553960967325,
      "loss": 3.0004,
      "step": 79141
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.272880792617798,
      "learning_rate": 0.0004416319336223224,
      "loss": 2.7563,
      "step": 79142
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6079784631729126,
      "learning_rate": 0.00044162832760863955,
      "loss": 3.0571,
      "step": 79143
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.198672294616699,
      "learning_rate": 0.0004416247215686254,
      "loss": 3.0395,
      "step": 79144
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.871772050857544,
      "learning_rate": 0.00044162111550228043,
      "loss": 2.9658,
      "step": 79145
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5083369016647339,
      "learning_rate": 0.0004416175094096054,
      "loss": 2.941,
      "step": 79146
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6796884536743164,
      "learning_rate": 0.00044161390329060116,
      "loss": 2.9836,
      "step": 79147
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0178871154785156,
      "learning_rate": 0.0004416102971452681,
      "loss": 2.9038,
      "step": 79148
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7453335523605347,
      "learning_rate": 0.000441606690973607,
      "loss": 2.827,
      "step": 79149
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6440672874450684,
      "learning_rate": 0.00044160308477561854,
      "loss": 3.1382,
      "step": 79150
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.611850380897522,
      "learning_rate": 0.00044159947855130344,
      "loss": 2.9381,
      "step": 79151
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7524406909942627,
      "learning_rate": 0.00044159587230066234,
      "loss": 3.1284,
      "step": 79152
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.538586974143982,
      "learning_rate": 0.0004415922660236959,
      "loss": 3.1681,
      "step": 79153
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.969236969947815,
      "learning_rate": 0.0004415886597204047,
      "loss": 2.9867,
      "step": 79154
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5769343376159668,
      "learning_rate": 0.0004415850533907896,
      "loss": 2.6651,
      "step": 79155
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.724008560180664,
      "learning_rate": 0.0004415814470348511,
      "loss": 3.1255,
      "step": 79156
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.683184027671814,
      "learning_rate": 0.00044157784065258993,
      "loss": 3.2955,
      "step": 79157
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.657504677772522,
      "learning_rate": 0.00044157423424400687,
      "loss": 3.0442,
      "step": 79158
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0619192123413086,
      "learning_rate": 0.00044157062780910236,
      "loss": 3.0843,
      "step": 79159
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.834375262260437,
      "learning_rate": 0.0004415670213478772,
      "loss": 3.0069,
      "step": 79160
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.3200457096099854,
      "learning_rate": 0.0004415634148603322,
      "loss": 3.0077,
      "step": 79161
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5990843772888184,
      "learning_rate": 0.00044155980834646773,
      "loss": 3.3202,
      "step": 79162
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.965654730796814,
      "learning_rate": 0.0004415562018062847,
      "loss": 3.1237,
      "step": 79163
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.588987112045288,
      "learning_rate": 0.0004415525952397837,
      "loss": 2.8046,
      "step": 79164
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8135367631912231,
      "learning_rate": 0.0004415489886469654,
      "loss": 3.1216,
      "step": 79165
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.492608904838562,
      "learning_rate": 0.00044154538202783043,
      "loss": 3.1839,
      "step": 79166
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.754429340362549,
      "learning_rate": 0.00044154177538237955,
      "loss": 3.0181,
      "step": 79167
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8372801542282104,
      "learning_rate": 0.0004415381687106133,
      "loss": 3.1162,
      "step": 79168
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4597371816635132,
      "learning_rate": 0.0004415345620125325,
      "loss": 2.9992,
      "step": 79169
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8162111043930054,
      "learning_rate": 0.0004415309552881377,
      "loss": 2.9114,
      "step": 79170
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5464391708374023,
      "learning_rate": 0.0004415273485374297,
      "loss": 3.0818,
      "step": 79171
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8640611171722412,
      "learning_rate": 0.0004415237417604091,
      "loss": 3.0876,
      "step": 79172
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9426928758621216,
      "learning_rate": 0.0004415201349570765,
      "loss": 2.8327,
      "step": 79173
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5674420595169067,
      "learning_rate": 0.00044151652812743266,
      "loss": 2.9958,
      "step": 79174
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.099942684173584,
      "learning_rate": 0.00044151292127147824,
      "loss": 2.7251,
      "step": 79175
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7910633087158203,
      "learning_rate": 0.0004415093143892139,
      "loss": 2.6769,
      "step": 79176
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.696990728378296,
      "learning_rate": 0.0004415057074806403,
      "loss": 3.1239,
      "step": 79177
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.4399826526641846,
      "learning_rate": 0.0004415021005457581,
      "loss": 3.1357,
      "step": 79178
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5810121297836304,
      "learning_rate": 0.000441498493584568,
      "loss": 3.1539,
      "step": 79179
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.761881947517395,
      "learning_rate": 0.0004414948865970707,
      "loss": 3.1678,
      "step": 79180
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5358455181121826,
      "learning_rate": 0.0004414912795832667,
      "loss": 3.1211,
      "step": 79181
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5581903457641602,
      "learning_rate": 0.0004414876725431569,
      "loss": 3.0854,
      "step": 79182
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7269951105117798,
      "learning_rate": 0.0004414840654767419,
      "loss": 2.7229,
      "step": 79183
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7175893783569336,
      "learning_rate": 0.0004414804583840223,
      "loss": 2.8285,
      "step": 79184
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.2523317337036133,
      "learning_rate": 0.00044147685126499876,
      "loss": 3.0666,
      "step": 79185
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.599428415298462,
      "learning_rate": 0.00044147324411967213,
      "loss": 2.9652,
      "step": 79186
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6425637006759644,
      "learning_rate": 0.00044146963694804286,
      "loss": 2.885,
      "step": 79187
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.478609561920166,
      "learning_rate": 0.0004414660297501117,
      "loss": 2.7949,
      "step": 79188
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.738457202911377,
      "learning_rate": 0.0004414624225258794,
      "loss": 3.1735,
      "step": 79189
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7423681020736694,
      "learning_rate": 0.0004414588152753465,
      "loss": 3.043,
      "step": 79190
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7130357027053833,
      "learning_rate": 0.00044145520799851375,
      "loss": 3.0294,
      "step": 79191
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.2769854068756104,
      "learning_rate": 0.0004414516006953819,
      "loss": 3.0806,
      "step": 79192
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0216641426086426,
      "learning_rate": 0.0004414479933659514,
      "loss": 3.2469,
      "step": 79193
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.053940534591675,
      "learning_rate": 0.0004414443860102231,
      "loss": 3.2073,
      "step": 79194
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4936442375183105,
      "learning_rate": 0.00044144077862819765,
      "loss": 3.0616,
      "step": 79195
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9443227052688599,
      "learning_rate": 0.0004414371712198756,
      "loss": 2.8925,
      "step": 79196
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8267654180526733,
      "learning_rate": 0.0004414335637852578,
      "loss": 2.7927,
      "step": 79197
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5175971984863281,
      "learning_rate": 0.0004414299563243448,
      "loss": 3.299,
      "step": 79198
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.2171671390533447,
      "learning_rate": 0.0004414263488371373,
      "loss": 2.9734,
      "step": 79199
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8398481607437134,
      "learning_rate": 0.000441422741323636,
      "loss": 3.1229,
      "step": 79200
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7492369413375854,
      "learning_rate": 0.00044141913378384156,
      "loss": 3.1178,
      "step": 79201
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9467217922210693,
      "learning_rate": 0.0004414155262177545,
      "loss": 3.1239,
      "step": 79202
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6074895858764648,
      "learning_rate": 0.00044141191862537575,
      "loss": 3.1748,
      "step": 79203
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.244198799133301,
      "learning_rate": 0.00044140831100670586,
      "loss": 2.9142,
      "step": 79204
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0581257343292236,
      "learning_rate": 0.00044140470336174537,
      "loss": 2.8937,
      "step": 79205
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.260861873626709,
      "learning_rate": 0.0004414010956904952,
      "loss": 2.9807,
      "step": 79206
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.766392469406128,
      "learning_rate": 0.0004413974879929559,
      "loss": 2.981,
      "step": 79207
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.538109302520752,
      "learning_rate": 0.00044139388026912804,
      "loss": 2.8709,
      "step": 79208
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4989112615585327,
      "learning_rate": 0.00044139027251901246,
      "loss": 3.0147,
      "step": 79209
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.491288423538208,
      "learning_rate": 0.0004413866647426098,
      "loss": 3.1005,
      "step": 79210
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.803682565689087,
      "learning_rate": 0.00044138305693992055,
      "loss": 3.0151,
      "step": 79211
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.69536554813385,
      "learning_rate": 0.0004413794491109456,
      "loss": 3.1234,
      "step": 79212
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9596561193466187,
      "learning_rate": 0.0004413758412556856,
      "loss": 3.0156,
      "step": 79213
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.969553828239441,
      "learning_rate": 0.0004413722333741411,
      "loss": 3.0147,
      "step": 79214
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7037962675094604,
      "learning_rate": 0.00044136862546631284,
      "loss": 3.1944,
      "step": 79215
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.2646701335906982,
      "learning_rate": 0.0004413650175322015,
      "loss": 2.796,
      "step": 79216
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6779552698135376,
      "learning_rate": 0.0004413614095718077,
      "loss": 2.9935,
      "step": 79217
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6324812173843384,
      "learning_rate": 0.00044135780158513217,
      "loss": 3.0734,
      "step": 79218
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.775485873222351,
      "learning_rate": 0.00044135419357217557,
      "loss": 3.0448,
      "step": 79219
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6295409202575684,
      "learning_rate": 0.00044135058553293856,
      "loss": 3.1455,
      "step": 79220
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7131441831588745,
      "learning_rate": 0.00044134697746742174,
      "loss": 3.117,
      "step": 79221
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6819158792495728,
      "learning_rate": 0.00044134336937562597,
      "loss": 3.1523,
      "step": 79222
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0323829650878906,
      "learning_rate": 0.0004413397612575517,
      "loss": 3.2096,
      "step": 79223
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.487052083015442,
      "learning_rate": 0.00044133615311319965,
      "loss": 2.9504,
      "step": 79224
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7623571157455444,
      "learning_rate": 0.00044133254494257073,
      "loss": 3.2375,
      "step": 79225
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0449600219726562,
      "learning_rate": 0.00044132893674566525,
      "loss": 2.829,
      "step": 79226
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5651417970657349,
      "learning_rate": 0.0004413253285224841,
      "loss": 3.0661,
      "step": 79227
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5903172492980957,
      "learning_rate": 0.00044132172027302796,
      "loss": 2.9216,
      "step": 79228
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.057715892791748,
      "learning_rate": 0.0004413181119972974,
      "loss": 3.071,
      "step": 79229
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7819762229919434,
      "learning_rate": 0.0004413145036952931,
      "loss": 2.9036,
      "step": 79230
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7670848369598389,
      "learning_rate": 0.00044131089536701576,
      "loss": 3.1933,
      "step": 79231
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.189258098602295,
      "learning_rate": 0.00044130728701246614,
      "loss": 2.7225,
      "step": 79232
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.345322847366333,
      "learning_rate": 0.00044130367863164484,
      "loss": 3.1201,
      "step": 79233
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.539933681488037,
      "learning_rate": 0.0004413000702245524,
      "loss": 3.0837,
      "step": 79234
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6299244165420532,
      "learning_rate": 0.00044129646179118964,
      "loss": 2.8846,
      "step": 79235
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.622986912727356,
      "learning_rate": 0.0004412928533315573,
      "loss": 3.0099,
      "step": 79236
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5071929693222046,
      "learning_rate": 0.0004412892448456559,
      "loss": 2.9524,
      "step": 79237
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4708162546157837,
      "learning_rate": 0.00044128563633348615,
      "loss": 3.2701,
      "step": 79238
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8049330711364746,
      "learning_rate": 0.00044128202779504874,
      "loss": 2.8364,
      "step": 79239
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6547633409500122,
      "learning_rate": 0.00044127841923034426,
      "loss": 2.8262,
      "step": 79240
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4327905178070068,
      "learning_rate": 0.00044127481063937357,
      "loss": 2.8987,
      "step": 79241
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.705366611480713,
      "learning_rate": 0.00044127120202213716,
      "loss": 3.0953,
      "step": 79242
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5057897567749023,
      "learning_rate": 0.0004412675933786358,
      "loss": 3.301,
      "step": 79243
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4253937005996704,
      "learning_rate": 0.00044126398470887006,
      "loss": 3.0316,
      "step": 79244
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5692380666732788,
      "learning_rate": 0.0004412603760128407,
      "loss": 3.0791,
      "step": 79245
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.623471975326538,
      "learning_rate": 0.0004412567672905484,
      "loss": 2.994,
      "step": 79246
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8630154132843018,
      "learning_rate": 0.0004412531585419939,
      "loss": 2.8894,
      "step": 79247
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6135908365249634,
      "learning_rate": 0.00044124954976717764,
      "loss": 2.8388,
      "step": 79248
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.2100448608398438,
      "learning_rate": 0.0004412459409661004,
      "loss": 3.1828,
      "step": 79249
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.784131407737732,
      "learning_rate": 0.00044124233213876305,
      "loss": 3.1731,
      "step": 79250
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9675261974334717,
      "learning_rate": 0.00044123872328516586,
      "loss": 2.9362,
      "step": 79251
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8475791215896606,
      "learning_rate": 0.0004412351144053099,
      "loss": 3.0286,
      "step": 79252
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.260761022567749,
      "learning_rate": 0.00044123150549919555,
      "loss": 2.9874,
      "step": 79253
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.683196783065796,
      "learning_rate": 0.00044122789656682364,
      "loss": 2.8163,
      "step": 79254
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5773800611495972,
      "learning_rate": 0.0004412242876081948,
      "loss": 3.0674,
      "step": 79255
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.610235333442688,
      "learning_rate": 0.00044122067862330976,
      "loss": 2.7957,
      "step": 79256
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.1598033905029297,
      "learning_rate": 0.000441217069612169,
      "loss": 3.2773,
      "step": 79257
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0089476108551025,
      "learning_rate": 0.0004412134605747734,
      "loss": 2.9071,
      "step": 79258
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.072248935699463,
      "learning_rate": 0.0004412098515111236,
      "loss": 3.2725,
      "step": 79259
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.1612517833709717,
      "learning_rate": 0.0004412062424212201,
      "loss": 3.081,
      "step": 79260
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6957961320877075,
      "learning_rate": 0.00044120263330506374,
      "loss": 2.9002,
      "step": 79261
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5766552686691284,
      "learning_rate": 0.0004411990241626552,
      "loss": 3.0033,
      "step": 79262
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.108973979949951,
      "learning_rate": 0.0004411954149939951,
      "loss": 2.9228,
      "step": 79263
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.872123122215271,
      "learning_rate": 0.00044119180579908405,
      "loss": 3.0556,
      "step": 79264
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6021331548690796,
      "learning_rate": 0.0004411881965779228,
      "loss": 3.2893,
      "step": 79265
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4934641122817993,
      "learning_rate": 0.000441184587330512,
      "loss": 3.0262,
      "step": 79266
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5836058855056763,
      "learning_rate": 0.0004411809780568523,
      "loss": 2.7686,
      "step": 79267
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.4942727088928223,
      "learning_rate": 0.0004411773687569444,
      "loss": 3.177,
      "step": 79268
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.461265206336975,
      "learning_rate": 0.00044117375943078903,
      "loss": 2.9903,
      "step": 79269
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.836286187171936,
      "learning_rate": 0.00044117015007838663,
      "loss": 3.2879,
      "step": 79270
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5827008485794067,
      "learning_rate": 0.00044116654069973824,
      "loss": 3.0847,
      "step": 79271
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8555761575698853,
      "learning_rate": 0.0004411629312948442,
      "loss": 3.2341,
      "step": 79272
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9108411073684692,
      "learning_rate": 0.0004411593218637053,
      "loss": 2.8809,
      "step": 79273
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8197492361068726,
      "learning_rate": 0.0004411557124063223,
      "loss": 3.2183,
      "step": 79274
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6441634893417358,
      "learning_rate": 0.0004411521029226957,
      "loss": 3.3047,
      "step": 79275
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.7247424125671387,
      "learning_rate": 0.00044114849341282626,
      "loss": 2.7822,
      "step": 79276
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4085897207260132,
      "learning_rate": 0.00044114488387671474,
      "loss": 3.0223,
      "step": 79277
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.4693667888641357,
      "learning_rate": 0.00044114127431436163,
      "loss": 3.2862,
      "step": 79278
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6939783096313477,
      "learning_rate": 0.00044113766472576773,
      "loss": 3.0174,
      "step": 79279
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9089425802230835,
      "learning_rate": 0.0004411340551109337,
      "loss": 2.9442,
      "step": 79280
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.6766815185546875,
      "learning_rate": 0.00044113044546986014,
      "loss": 2.9265,
      "step": 79281
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.72683048248291,
      "learning_rate": 0.00044112683580254774,
      "loss": 3.11,
      "step": 79282
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7450932264328003,
      "learning_rate": 0.0004411232261089973,
      "loss": 3.0395,
      "step": 79283
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4546607732772827,
      "learning_rate": 0.00044111961638920933,
      "loss": 2.9317,
      "step": 79284
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.904845118522644,
      "learning_rate": 0.0004411160066431845,
      "loss": 2.9755,
      "step": 79285
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.13384747505188,
      "learning_rate": 0.00044111239687092367,
      "loss": 3.2522,
      "step": 79286
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.224364757537842,
      "learning_rate": 0.0004411087870724273,
      "loss": 2.9357,
      "step": 79287
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.113550901412964,
      "learning_rate": 0.0004411051772476962,
      "loss": 2.8576,
      "step": 79288
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.76774263381958,
      "learning_rate": 0.00044110156739673095,
      "loss": 3.021,
      "step": 79289
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5933172702789307,
      "learning_rate": 0.0004410979575195322,
      "loss": 3.3723,
      "step": 79290
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.790709137916565,
      "learning_rate": 0.0004410943476161007,
      "loss": 3.2829,
      "step": 79291
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.2449254989624023,
      "learning_rate": 0.00044109073768643716,
      "loss": 3.1085,
      "step": 79292
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.1071178913116455,
      "learning_rate": 0.0004410871277305421,
      "loss": 3.0284,
      "step": 79293
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4338395595550537,
      "learning_rate": 0.0004410835177484163,
      "loss": 3.0965,
      "step": 79294
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4977681636810303,
      "learning_rate": 0.0004410799077400605,
      "loss": 2.9719,
      "step": 79295
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5542259216308594,
      "learning_rate": 0.00044107629770547517,
      "loss": 2.9805,
      "step": 79296
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6363856792449951,
      "learning_rate": 0.0004410726876446612,
      "loss": 2.9869,
      "step": 79297
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9919785261154175,
      "learning_rate": 0.00044106907755761906,
      "loss": 2.8148,
      "step": 79298
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4663357734680176,
      "learning_rate": 0.00044106546744434954,
      "loss": 2.8064,
      "step": 79299
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.2389209270477295,
      "learning_rate": 0.0004410618573048533,
      "loss": 2.8958,
      "step": 79300
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9639959335327148,
      "learning_rate": 0.000441058247139131,
      "loss": 3.0925,
      "step": 79301
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8786758184432983,
      "learning_rate": 0.0004410546369471834,
      "loss": 2.936,
      "step": 79302
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.259977102279663,
      "learning_rate": 0.0004410510267290109,
      "loss": 3.2295,
      "step": 79303
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0587894916534424,
      "learning_rate": 0.00044104741648461445,
      "loss": 3.081,
      "step": 79304
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6689571142196655,
      "learning_rate": 0.00044104380621399466,
      "loss": 2.7792,
      "step": 79305
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.134699583053589,
      "learning_rate": 0.00044104019591715204,
      "loss": 2.8708,
      "step": 79306
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.590397834777832,
      "learning_rate": 0.0004410365855940875,
      "loss": 2.8926,
      "step": 79307
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.284344434738159,
      "learning_rate": 0.00044103297524480153,
      "loss": 3.0511,
      "step": 79308
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.3891404867172241,
      "learning_rate": 0.00044102936486929494,
      "loss": 2.8003,
      "step": 79309
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8645919561386108,
      "learning_rate": 0.0004410257544675682,
      "loss": 3.0365,
      "step": 79310
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.074244499206543,
      "learning_rate": 0.00044102214403962225,
      "loss": 3.0118,
      "step": 79311
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9091469049453735,
      "learning_rate": 0.00044101853358545755,
      "loss": 2.9521,
      "step": 79312
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8159072399139404,
      "learning_rate": 0.0004410149231050749,
      "loss": 3.0327,
      "step": 79313
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0254855155944824,
      "learning_rate": 0.0004410113125984749,
      "loss": 3.0329,
      "step": 79314
    },
    {
      "epoch": 1.03,
      "grad_norm": 5.297191143035889,
      "learning_rate": 0.00044100770206565815,
      "loss": 2.9268,
      "step": 79315
    },
    {
      "epoch": 1.03,
      "grad_norm": 4.636382102966309,
      "learning_rate": 0.00044100409150662546,
      "loss": 3.0265,
      "step": 79316
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.249591112136841,
      "learning_rate": 0.00044100048092137747,
      "loss": 2.8909,
      "step": 79317
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7103856801986694,
      "learning_rate": 0.00044099687030991477,
      "loss": 3.1197,
      "step": 79318
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7382699251174927,
      "learning_rate": 0.0004409932596722382,
      "loss": 2.9941,
      "step": 79319
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.8597121238708496,
      "learning_rate": 0.00044098964900834823,
      "loss": 2.9129,
      "step": 79320
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.676769971847534,
      "learning_rate": 0.00044098603831824564,
      "loss": 2.9583,
      "step": 79321
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5921578407287598,
      "learning_rate": 0.0004409824276019311,
      "loss": 3.0259,
      "step": 79322
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.049171209335327,
      "learning_rate": 0.00044097881685940517,
      "loss": 3.128,
      "step": 79323
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8391255140304565,
      "learning_rate": 0.0004409752060906688,
      "loss": 3.1129,
      "step": 79324
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.696408987045288,
      "learning_rate": 0.00044097159529572235,
      "loss": 3.0291,
      "step": 79325
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9883419275283813,
      "learning_rate": 0.0004409679844745666,
      "loss": 2.9088,
      "step": 79326
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5841031074523926,
      "learning_rate": 0.00044096437362720227,
      "loss": 3.2744,
      "step": 79327
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6353662014007568,
      "learning_rate": 0.00044096076275363007,
      "loss": 2.8967,
      "step": 79328
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.853255033493042,
      "learning_rate": 0.0004409571518538506,
      "loss": 2.9767,
      "step": 79329
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.4738473892211914,
      "learning_rate": 0.0004409535409278644,
      "loss": 2.838,
      "step": 79330
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.804376244544983,
      "learning_rate": 0.0004409499299756724,
      "loss": 3.2292,
      "step": 79331
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.984013557434082,
      "learning_rate": 0.0004409463189972751,
      "loss": 3.2245,
      "step": 79332
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6532378196716309,
      "learning_rate": 0.0004409427079926732,
      "loss": 3.2001,
      "step": 79333
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.3671813011169434,
      "learning_rate": 0.0004409390969618674,
      "loss": 2.7697,
      "step": 79334
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.761810064315796,
      "learning_rate": 0.0004409354859048585,
      "loss": 3.211,
      "step": 79335
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7645244598388672,
      "learning_rate": 0.0004409318748216468,
      "loss": 2.9369,
      "step": 79336
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4783958196640015,
      "learning_rate": 0.00044092826371223336,
      "loss": 2.9588,
      "step": 79337
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.3739949464797974,
      "learning_rate": 0.00044092465257661865,
      "loss": 2.8576,
      "step": 79338
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8816359043121338,
      "learning_rate": 0.00044092104141480343,
      "loss": 2.9609,
      "step": 79339
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.855899453163147,
      "learning_rate": 0.0004409174302267882,
      "loss": 3.0877,
      "step": 79340
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6720666885375977,
      "learning_rate": 0.00044091381901257393,
      "loss": 3.2076,
      "step": 79341
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8384373188018799,
      "learning_rate": 0.00044091020777216105,
      "loss": 2.9409,
      "step": 79342
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.3976197242736816,
      "learning_rate": 0.0004409065965055502,
      "loss": 2.9109,
      "step": 79343
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6430895328521729,
      "learning_rate": 0.00044090298521274235,
      "loss": 2.7619,
      "step": 79344
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9917213916778564,
      "learning_rate": 0.0004408993738937379,
      "loss": 2.9658,
      "step": 79345
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8578264713287354,
      "learning_rate": 0.0004408957625485375,
      "loss": 3.0076,
      "step": 79346
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7883976697921753,
      "learning_rate": 0.00044089215117714206,
      "loss": 2.926,
      "step": 79347
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.9796793460845947,
      "learning_rate": 0.000440888539779552,
      "loss": 2.9257,
      "step": 79348
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6991820335388184,
      "learning_rate": 0.00044088492835576813,
      "loss": 2.8263,
      "step": 79349
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7985615730285645,
      "learning_rate": 0.0004408813169057911,
      "loss": 2.9336,
      "step": 79350
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9686172008514404,
      "learning_rate": 0.0004408777054296215,
      "loss": 3.1055,
      "step": 79351
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.728520154953003,
      "learning_rate": 0.00044087409392726024,
      "loss": 3.0769,
      "step": 79352
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8057620525360107,
      "learning_rate": 0.00044087048239870773,
      "loss": 2.7531,
      "step": 79353
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8661130666732788,
      "learning_rate": 0.00044086687084396475,
      "loss": 2.9888,
      "step": 79354
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6644141674041748,
      "learning_rate": 0.0004408632592630319,
      "loss": 2.9524,
      "step": 79355
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.854910135269165,
      "learning_rate": 0.00044085964765591,
      "loss": 3.0761,
      "step": 79356
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5432956218719482,
      "learning_rate": 0.00044085603602259956,
      "loss": 2.9739,
      "step": 79357
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9301493167877197,
      "learning_rate": 0.0004408524243631014,
      "loss": 2.7241,
      "step": 79358
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7127224206924438,
      "learning_rate": 0.0004408488126774161,
      "loss": 2.9473,
      "step": 79359
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.310682773590088,
      "learning_rate": 0.00044084520096554427,
      "loss": 3.161,
      "step": 79360
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8458964824676514,
      "learning_rate": 0.0004408415892274867,
      "loss": 3.1086,
      "step": 79361
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4940619468688965,
      "learning_rate": 0.0004408379774632441,
      "loss": 2.9596,
      "step": 79362
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.698379635810852,
      "learning_rate": 0.000440834365672817,
      "loss": 2.9155,
      "step": 79363
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5597134828567505,
      "learning_rate": 0.0004408307538562061,
      "loss": 3.2051,
      "step": 79364
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6399279832839966,
      "learning_rate": 0.0004408271420134122,
      "loss": 3.0221,
      "step": 79365
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7249560356140137,
      "learning_rate": 0.00044082353014443575,
      "loss": 2.8099,
      "step": 79366
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0142555236816406,
      "learning_rate": 0.00044081991824927757,
      "loss": 2.8851,
      "step": 79367
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8860728740692139,
      "learning_rate": 0.00044081630632793845,
      "loss": 2.885,
      "step": 79368
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5791786909103394,
      "learning_rate": 0.00044081269438041873,
      "loss": 2.9559,
      "step": 79369
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5018304586410522,
      "learning_rate": 0.0004408090824067194,
      "loss": 3.0705,
      "step": 79370
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.2787020206451416,
      "learning_rate": 0.00044080547040684087,
      "loss": 3.1757,
      "step": 79371
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.1622564792633057,
      "learning_rate": 0.0004408018583807841,
      "loss": 2.8491,
      "step": 79372
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7255960702896118,
      "learning_rate": 0.0004407982463285495,
      "loss": 3.0759,
      "step": 79373
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4108121395111084,
      "learning_rate": 0.00044079463425013786,
      "loss": 3.0852,
      "step": 79374
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7272372245788574,
      "learning_rate": 0.00044079102214555,
      "loss": 2.892,
      "step": 79375
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.1046764850616455,
      "learning_rate": 0.0004407874100147862,
      "loss": 3.0652,
      "step": 79376
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8170018196105957,
      "learning_rate": 0.00044078379785784754,
      "loss": 2.8171,
      "step": 79377
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4839117527008057,
      "learning_rate": 0.0004407801856747344,
      "loss": 3.24,
      "step": 79378
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8178731203079224,
      "learning_rate": 0.00044077657346544764,
      "loss": 2.9251,
      "step": 79379
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0229382514953613,
      "learning_rate": 0.0004407729612299878,
      "loss": 3.0101,
      "step": 79380
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6824387311935425,
      "learning_rate": 0.00044076934896835566,
      "loss": 2.9592,
      "step": 79381
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6709250211715698,
      "learning_rate": 0.00044076573668055176,
      "loss": 3.0695,
      "step": 79382
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5002974271774292,
      "learning_rate": 0.00044076212436657697,
      "loss": 3.1488,
      "step": 79383
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.117547035217285,
      "learning_rate": 0.00044075851202643175,
      "loss": 3.0435,
      "step": 79384
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.196122884750366,
      "learning_rate": 0.0004407548996601169,
      "loss": 3.1473,
      "step": 79385
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7455034255981445,
      "learning_rate": 0.000440751287267633,
      "loss": 2.8665,
      "step": 79386
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.7125473022460938,
      "learning_rate": 0.0004407476748489809,
      "loss": 3.0535,
      "step": 79387
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.569746494293213,
      "learning_rate": 0.00044074406240416107,
      "loss": 3.0251,
      "step": 79388
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6632550954818726,
      "learning_rate": 0.0004407404499331742,
      "loss": 3.1533,
      "step": 79389
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7691729068756104,
      "learning_rate": 0.0004407368374360212,
      "loss": 2.9862,
      "step": 79390
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.1205692291259766,
      "learning_rate": 0.00044073322491270246,
      "loss": 3.0406,
      "step": 79391
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8534889221191406,
      "learning_rate": 0.00044072961236321873,
      "loss": 3.2701,
      "step": 79392
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6292054653167725,
      "learning_rate": 0.00044072599978757076,
      "loss": 3.1549,
      "step": 79393
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.8495123386383057,
      "learning_rate": 0.0004407223871857591,
      "loss": 2.8763,
      "step": 79394
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8471565246582031,
      "learning_rate": 0.00044071877455778455,
      "loss": 3.1336,
      "step": 79395
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.33063006401062,
      "learning_rate": 0.00044071516190364777,
      "loss": 2.9675,
      "step": 79396
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.580296277999878,
      "learning_rate": 0.00044071154922334935,
      "loss": 2.6852,
      "step": 79397
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5206716060638428,
      "learning_rate": 0.0004407079365168899,
      "loss": 2.8858,
      "step": 79398
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4389030933380127,
      "learning_rate": 0.00044070432378427027,
      "loss": 2.9409,
      "step": 79399
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0223374366760254,
      "learning_rate": 0.00044070071102549107,
      "loss": 2.6965,
      "step": 79400
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4648979902267456,
      "learning_rate": 0.00044069709824055294,
      "loss": 3.2775,
      "step": 79401
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9052283763885498,
      "learning_rate": 0.0004406934854294566,
      "loss": 3.0036,
      "step": 79402
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6715277433395386,
      "learning_rate": 0.0004406898725922026,
      "loss": 2.8772,
      "step": 79403
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5467242002487183,
      "learning_rate": 0.0004406862597287917,
      "loss": 3.0588,
      "step": 79404
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9808056354522705,
      "learning_rate": 0.00044068264683922467,
      "loss": 2.9685,
      "step": 79405
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.508873462677002,
      "learning_rate": 0.00044067903392350195,
      "loss": 2.9642,
      "step": 79406
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8225160837173462,
      "learning_rate": 0.00044067542098162436,
      "loss": 2.8934,
      "step": 79407
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5884466171264648,
      "learning_rate": 0.00044067180801359257,
      "loss": 3.038,
      "step": 79408
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5981091260910034,
      "learning_rate": 0.0004406681950194073,
      "loss": 3.0978,
      "step": 79409
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.795695424079895,
      "learning_rate": 0.00044066458199906917,
      "loss": 3.007,
      "step": 79410
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6403969526290894,
      "learning_rate": 0.00044066096895257876,
      "loss": 3.1829,
      "step": 79411
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9006778001785278,
      "learning_rate": 0.0004406573558799369,
      "loss": 3.0173,
      "step": 79412
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7425862550735474,
      "learning_rate": 0.00044065374278114404,
      "loss": 3.1639,
      "step": 79413
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5693469047546387,
      "learning_rate": 0.0004406501296562011,
      "loss": 2.8446,
      "step": 79414
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.919306755065918,
      "learning_rate": 0.00044064651650510864,
      "loss": 3.1036,
      "step": 79415
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6986823081970215,
      "learning_rate": 0.00044064290332786736,
      "loss": 2.8416,
      "step": 79416
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8099702596664429,
      "learning_rate": 0.0004406392901244778,
      "loss": 2.9172,
      "step": 79417
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7197980880737305,
      "learning_rate": 0.00044063567689494085,
      "loss": 2.9208,
      "step": 79418
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.510690450668335,
      "learning_rate": 0.00044063206363925705,
      "loss": 3.0065,
      "step": 79419
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5094534158706665,
      "learning_rate": 0.0004406284503574271,
      "loss": 2.9877,
      "step": 79420
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.904557466506958,
      "learning_rate": 0.00044062483704945164,
      "loss": 2.9753,
      "step": 79421
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5795273780822754,
      "learning_rate": 0.0004406212237153314,
      "loss": 2.993,
      "step": 79422
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.028261423110962,
      "learning_rate": 0.00044061761035506697,
      "loss": 2.9168,
      "step": 79423
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5065977573394775,
      "learning_rate": 0.0004406139969686591,
      "loss": 3.1101,
      "step": 79424
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6334081888198853,
      "learning_rate": 0.00044061038355610845,
      "loss": 3.0256,
      "step": 79425
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0393178462982178,
      "learning_rate": 0.00044060677011741566,
      "loss": 2.974,
      "step": 79426
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8794684410095215,
      "learning_rate": 0.00044060315665258145,
      "loss": 3.1676,
      "step": 79427
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.689164400100708,
      "learning_rate": 0.0004405995431616064,
      "loss": 2.9395,
      "step": 79428
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6762388944625854,
      "learning_rate": 0.0004405959296444913,
      "loss": 2.7637,
      "step": 79429
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9116013050079346,
      "learning_rate": 0.0004405923161012367,
      "loss": 3.0845,
      "step": 79430
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.374372720718384,
      "learning_rate": 0.00044058870253184336,
      "loss": 2.9585,
      "step": 79431
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.770448923110962,
      "learning_rate": 0.000440585088936312,
      "loss": 2.8963,
      "step": 79432
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6017415523529053,
      "learning_rate": 0.0004405814753146431,
      "loss": 2.9052,
      "step": 79433
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5896457433700562,
      "learning_rate": 0.00044057786166683754,
      "loss": 3.042,
      "step": 79434
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0908851623535156,
      "learning_rate": 0.00044057424799289586,
      "loss": 3.1028,
      "step": 79435
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5123729705810547,
      "learning_rate": 0.0004405706342928188,
      "loss": 2.9726,
      "step": 79436
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6605491638183594,
      "learning_rate": 0.00044056702056660696,
      "loss": 3.309,
      "step": 79437
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7946966886520386,
      "learning_rate": 0.00044056340681426103,
      "loss": 2.8865,
      "step": 79438
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4233975410461426,
      "learning_rate": 0.0004405597930357818,
      "loss": 3.1436,
      "step": 79439
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7519773244857788,
      "learning_rate": 0.0004405561792311698,
      "loss": 3.1751,
      "step": 79440
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.507309079170227,
      "learning_rate": 0.0004405525654004258,
      "loss": 3.3141,
      "step": 79441
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9153614044189453,
      "learning_rate": 0.0004405489515435504,
      "loss": 2.9602,
      "step": 79442
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5067243576049805,
      "learning_rate": 0.0004405453376605443,
      "loss": 3.082,
      "step": 79443
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5396634340286255,
      "learning_rate": 0.00044054172375140806,
      "loss": 3.2221,
      "step": 79444
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.7384326457977295,
      "learning_rate": 0.00044053810981614265,
      "loss": 3.1548,
      "step": 79445
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.3952202796936035,
      "learning_rate": 0.00044053449585474845,
      "loss": 3.2082,
      "step": 79446
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0425679683685303,
      "learning_rate": 0.0004405308818672262,
      "loss": 3.0102,
      "step": 79447
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.215749740600586,
      "learning_rate": 0.0004405272678535767,
      "loss": 3.1531,
      "step": 79448
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.3776466846466064,
      "learning_rate": 0.0004405236538138005,
      "loss": 3.1186,
      "step": 79449
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.7499585151672363,
      "learning_rate": 0.00044052003974789824,
      "loss": 3.1771,
      "step": 79450
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7532302141189575,
      "learning_rate": 0.00044051642565587075,
      "loss": 3.1574,
      "step": 79451
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.054928779602051,
      "learning_rate": 0.00044051281153771854,
      "loss": 3.0091,
      "step": 79452
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7364659309387207,
      "learning_rate": 0.00044050919739344226,
      "loss": 3.1681,
      "step": 79453
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8671599626541138,
      "learning_rate": 0.00044050558322304283,
      "loss": 2.8968,
      "step": 79454
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.644524335861206,
      "learning_rate": 0.00044050196902652064,
      "loss": 3.0586,
      "step": 79455
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7438515424728394,
      "learning_rate": 0.00044049835480387654,
      "loss": 3.0734,
      "step": 79456
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6594949960708618,
      "learning_rate": 0.00044049474055511115,
      "loss": 2.9515,
      "step": 79457
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.117438316345215,
      "learning_rate": 0.0004404911262802251,
      "loss": 3.1569,
      "step": 79458
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9882960319519043,
      "learning_rate": 0.00044048751197921915,
      "loss": 3.2296,
      "step": 79459
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.1005749702453613,
      "learning_rate": 0.0004404838976520939,
      "loss": 3.2107,
      "step": 79460
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.1288201808929443,
      "learning_rate": 0.00044048028329884997,
      "loss": 3.0107,
      "step": 79461
    },
    {
      "epoch": 1.03,
      "grad_norm": 3.3185579776763916,
      "learning_rate": 0.0004404766689194882,
      "loss": 2.7227,
      "step": 79462
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6524107456207275,
      "learning_rate": 0.0004404730545140091,
      "loss": 2.8865,
      "step": 79463
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.855554461479187,
      "learning_rate": 0.00044046944008241346,
      "loss": 3.0084,
      "step": 79464
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.6200878620147705,
      "learning_rate": 0.00044046582562470187,
      "loss": 2.7921,
      "step": 79465
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.3166449069976807,
      "learning_rate": 0.0004404622111408751,
      "loss": 2.9318,
      "step": 79466
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4753590822219849,
      "learning_rate": 0.00044045859663093364,
      "loss": 2.8968,
      "step": 79467
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6353936195373535,
      "learning_rate": 0.00044045498209487825,
      "loss": 3.1553,
      "step": 79468
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.067124605178833,
      "learning_rate": 0.00044045136753270974,
      "loss": 2.8923,
      "step": 79469
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.53191339969635,
      "learning_rate": 0.00044044775294442865,
      "loss": 3.0163,
      "step": 79470
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6534453630447388,
      "learning_rate": 0.00044044413833003564,
      "loss": 3.001,
      "step": 79471
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7746042013168335,
      "learning_rate": 0.00044044052368953147,
      "loss": 3.0617,
      "step": 79472
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9374336004257202,
      "learning_rate": 0.0004404369090229167,
      "loss": 3.1828,
      "step": 79473
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7794106006622314,
      "learning_rate": 0.00044043329433019206,
      "loss": 3.0825,
      "step": 79474
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8157198429107666,
      "learning_rate": 0.0004404296796113583,
      "loss": 2.7912,
      "step": 79475
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.9203418493270874,
      "learning_rate": 0.0004404260648664159,
      "loss": 2.9976,
      "step": 79476
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.623685598373413,
      "learning_rate": 0.0004404224500953656,
      "loss": 3.0905,
      "step": 79477
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.5117707252502441,
      "learning_rate": 0.0004404188352982083,
      "loss": 3.045,
      "step": 79478
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.309760332107544,
      "learning_rate": 0.00044041522047494434,
      "loss": 3.2043,
      "step": 79479
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8458632230758667,
      "learning_rate": 0.0004404116056255746,
      "loss": 2.9098,
      "step": 79480
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.292840003967285,
      "learning_rate": 0.0004404079907500997,
      "loss": 2.958,
      "step": 79481
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6796858310699463,
      "learning_rate": 0.0004404043758485203,
      "loss": 3.0868,
      "step": 79482
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4436392784118652,
      "learning_rate": 0.000440400760920837,
      "loss": 2.9726,
      "step": 79483
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.8330044746398926,
      "learning_rate": 0.00044039714596705065,
      "loss": 3.1796,
      "step": 79484
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.0936191082000732,
      "learning_rate": 0.00044039353098716176,
      "loss": 2.7744,
      "step": 79485
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.6252095699310303,
      "learning_rate": 0.0004403899159811711,
      "loss": 2.7985,
      "step": 79486
    },
    {
      "epoch": 1.03,
      "grad_norm": 2.769643783569336,
      "learning_rate": 0.0004403863009490793,
      "loss": 2.903,
      "step": 79487
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.7516441345214844,
      "learning_rate": 0.000440382685890887,
      "loss": 2.9421,
      "step": 79488
    },
    {
      "epoch": 1.03,
      "grad_norm": 1.4784425497055054,
      "learning_rate": 0.00044037907080659493,
      "loss": 3.1094,
      "step": 79489
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.715832233428955,
      "learning_rate": 0.00044037545569620376,
      "loss": 3.2551,
      "step": 79490
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.5839481353759766,
      "learning_rate": 0.0004403718405597141,
      "loss": 2.962,
      "step": 79491
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.4411473274230957,
      "learning_rate": 0.0004403682253971267,
      "loss": 3.1178,
      "step": 79492
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1117029190063477,
      "learning_rate": 0.0004403646102084422,
      "loss": 2.9445,
      "step": 79493
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.6354026794433594,
      "learning_rate": 0.0004403609949936613,
      "loss": 2.9578,
      "step": 79494
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.345595121383667,
      "learning_rate": 0.00044035737975278456,
      "loss": 3.3319,
      "step": 79495
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4707179069519043,
      "learning_rate": 0.00044035376448581275,
      "loss": 3.254,
      "step": 79496
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6254624128341675,
      "learning_rate": 0.0004403501491927466,
      "loss": 3.0766,
      "step": 79497
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8012288808822632,
      "learning_rate": 0.00044034653387358666,
      "loss": 3.0759,
      "step": 79498
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.5107080936431885,
      "learning_rate": 0.0004403429185283336,
      "loss": 3.023,
      "step": 79499
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8940589427947998,
      "learning_rate": 0.0004403393031569882,
      "loss": 3.0497,
      "step": 79500
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6503602266311646,
      "learning_rate": 0.0004403356877595511,
      "loss": 3.0406,
      "step": 79501
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4351997375488281,
      "learning_rate": 0.0004403320723360229,
      "loss": 2.7547,
      "step": 79502
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.751044511795044,
      "learning_rate": 0.00044032845688640434,
      "loss": 3.0489,
      "step": 79503
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5836132764816284,
      "learning_rate": 0.000440324841410696,
      "loss": 3.0786,
      "step": 79504
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7921165227890015,
      "learning_rate": 0.0004403212259088987,
      "loss": 3.0586,
      "step": 79505
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5830600261688232,
      "learning_rate": 0.000440317610381013,
      "loss": 2.9555,
      "step": 79506
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9763073921203613,
      "learning_rate": 0.0004403139948270396,
      "loss": 3.0207,
      "step": 79507
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8567888736724854,
      "learning_rate": 0.00044031037924697927,
      "loss": 3.1466,
      "step": 79508
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.258211135864258,
      "learning_rate": 0.0004403067636408325,
      "loss": 3.0183,
      "step": 79509
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6855758428573608,
      "learning_rate": 0.0004403031480086001,
      "loss": 3.1318,
      "step": 79510
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6359679698944092,
      "learning_rate": 0.0004402995323502827,
      "loss": 3.1864,
      "step": 79511
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6131340265274048,
      "learning_rate": 0.00044029591666588095,
      "loss": 3.0064,
      "step": 79512
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5452308654785156,
      "learning_rate": 0.00044029230095539543,
      "loss": 2.6819,
      "step": 79513
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4373784065246582,
      "learning_rate": 0.00044028868521882704,
      "loss": 2.8703,
      "step": 79514
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6916502714157104,
      "learning_rate": 0.0004402850694561764,
      "loss": 2.9746,
      "step": 79515
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7671747207641602,
      "learning_rate": 0.000440281453667444,
      "loss": 3.0635,
      "step": 79516
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7648675441741943,
      "learning_rate": 0.0004402778378526307,
      "loss": 3.2405,
      "step": 79517
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.870245337486267,
      "learning_rate": 0.00044027422201173706,
      "loss": 3.0352,
      "step": 79518
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.4090230464935303,
      "learning_rate": 0.0004402706061447638,
      "loss": 2.8966,
      "step": 79519
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2485246658325195,
      "learning_rate": 0.00044026699025171155,
      "loss": 2.6464,
      "step": 79520
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1501893997192383,
      "learning_rate": 0.0004402633743325812,
      "loss": 3.0539,
      "step": 79521
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.3719303607940674,
      "learning_rate": 0.00044025975838737306,
      "loss": 2.9775,
      "step": 79522
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6228829622268677,
      "learning_rate": 0.000440256142416088,
      "loss": 2.9366,
      "step": 79523
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4622162580490112,
      "learning_rate": 0.0004402525264187267,
      "loss": 2.9896,
      "step": 79524
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.299597978591919,
      "learning_rate": 0.0004402489103952898,
      "loss": 3.1026,
      "step": 79525
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2048542499542236,
      "learning_rate": 0.00044024529434577803,
      "loss": 2.9481,
      "step": 79526
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6858803033828735,
      "learning_rate": 0.000440241678270192,
      "loss": 2.8242,
      "step": 79527
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.6441283226013184,
      "learning_rate": 0.00044023806216853233,
      "loss": 3.1052,
      "step": 79528
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6395714282989502,
      "learning_rate": 0.00044023444604079985,
      "loss": 3.117,
      "step": 79529
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8627935647964478,
      "learning_rate": 0.0004402308298869951,
      "loss": 3.044,
      "step": 79530
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6213104724884033,
      "learning_rate": 0.00044022721370711875,
      "loss": 3.2221,
      "step": 79531
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0015885829925537,
      "learning_rate": 0.00044022359750117154,
      "loss": 3.2089,
      "step": 79532
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.435876727104187,
      "learning_rate": 0.00044021998126915414,
      "loss": 3.2113,
      "step": 79533
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4582061767578125,
      "learning_rate": 0.00044021636501106716,
      "loss": 3.0534,
      "step": 79534
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7802537679672241,
      "learning_rate": 0.0004402127487269113,
      "loss": 2.8007,
      "step": 79535
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6804041862487793,
      "learning_rate": 0.0004402091324166873,
      "loss": 2.9848,
      "step": 79536
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8291690349578857,
      "learning_rate": 0.00044020551608039575,
      "loss": 2.8037,
      "step": 79537
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6228588819503784,
      "learning_rate": 0.0004402018997180373,
      "loss": 2.9842,
      "step": 79538
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.3860111236572266,
      "learning_rate": 0.0004401982833296128,
      "loss": 3.1,
      "step": 79539
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.922215461730957,
      "learning_rate": 0.00044019466691512266,
      "loss": 2.9417,
      "step": 79540
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.8074018955230713,
      "learning_rate": 0.0004401910504745677,
      "loss": 2.8873,
      "step": 79541
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.036170482635498,
      "learning_rate": 0.00044018743400794864,
      "loss": 3.0641,
      "step": 79542
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9521450996398926,
      "learning_rate": 0.00044018381751526606,
      "loss": 3.2533,
      "step": 79543
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5108214616775513,
      "learning_rate": 0.0004401802009965207,
      "loss": 2.869,
      "step": 79544
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7276785373687744,
      "learning_rate": 0.0004401765844517131,
      "loss": 3.1676,
      "step": 79545
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.0460712909698486,
      "learning_rate": 0.0004401729678808441,
      "loss": 2.9513,
      "step": 79546
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.8181962966918945,
      "learning_rate": 0.00044016935128391425,
      "loss": 2.9836,
      "step": 79547
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.1081936359405518,
      "learning_rate": 0.0004401657346609244,
      "loss": 2.9135,
      "step": 79548
    },
    {
      "epoch": 1.04,
      "grad_norm": 4.190551280975342,
      "learning_rate": 0.0004401621180118749,
      "loss": 3.0119,
      "step": 79549
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.7434353828430176,
      "learning_rate": 0.0004401585013367667,
      "loss": 2.8112,
      "step": 79550
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9770714044570923,
      "learning_rate": 0.0004401548846356005,
      "loss": 3.1729,
      "step": 79551
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.3740389347076416,
      "learning_rate": 0.00044015126790837675,
      "loss": 2.8588,
      "step": 79552
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6752815246582031,
      "learning_rate": 0.0004401476511550962,
      "loss": 2.6919,
      "step": 79553
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5641082525253296,
      "learning_rate": 0.0004401440343757596,
      "loss": 3.0901,
      "step": 79554
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5337872505187988,
      "learning_rate": 0.0004401404175703676,
      "loss": 3.3619,
      "step": 79555
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9158885478973389,
      "learning_rate": 0.00044013680073892085,
      "loss": 3.0476,
      "step": 79556
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7685681581497192,
      "learning_rate": 0.00044013318388142,
      "loss": 2.8255,
      "step": 79557
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6713285446166992,
      "learning_rate": 0.00044012956699786575,
      "loss": 3.0364,
      "step": 79558
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4422602653503418,
      "learning_rate": 0.0004401259500882588,
      "loss": 3.315,
      "step": 79559
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8912262916564941,
      "learning_rate": 0.0004401223331525998,
      "loss": 3.1394,
      "step": 79560
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7746539115905762,
      "learning_rate": 0.00044011871619088934,
      "loss": 2.9507,
      "step": 79561
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8597713708877563,
      "learning_rate": 0.00044011509920312817,
      "loss": 3.1155,
      "step": 79562
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6597191095352173,
      "learning_rate": 0.00044011148218931704,
      "loss": 3.0282,
      "step": 79563
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8976348638534546,
      "learning_rate": 0.0004401078651494565,
      "loss": 2.8474,
      "step": 79564
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7213107347488403,
      "learning_rate": 0.0004401042480835473,
      "loss": 3.0667,
      "step": 79565
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.792792320251465,
      "learning_rate": 0.00044010063099159005,
      "loss": 2.7867,
      "step": 79566
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.6650006771087646,
      "learning_rate": 0.00044009701387358536,
      "loss": 2.9839,
      "step": 79567
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7098499536514282,
      "learning_rate": 0.000440093396729534,
      "loss": 3.0503,
      "step": 79568
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.1905100345611572,
      "learning_rate": 0.00044008977955943676,
      "loss": 2.9113,
      "step": 79569
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1877455711364746,
      "learning_rate": 0.0004400861623632941,
      "loss": 2.9459,
      "step": 79570
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.465909242630005,
      "learning_rate": 0.0004400825451411068,
      "loss": 2.9753,
      "step": 79571
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.136188507080078,
      "learning_rate": 0.00044007892789287543,
      "loss": 3.0093,
      "step": 79572
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.004997968673706,
      "learning_rate": 0.00044007531061860094,
      "loss": 3.0985,
      "step": 79573
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.682365894317627,
      "learning_rate": 0.00044007169331828364,
      "loss": 2.9509,
      "step": 79574
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.578330636024475,
      "learning_rate": 0.0004400680759919244,
      "loss": 2.9229,
      "step": 79575
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2675387859344482,
      "learning_rate": 0.0004400644586395239,
      "loss": 2.9443,
      "step": 79576
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9557487964630127,
      "learning_rate": 0.0004400608412610827,
      "loss": 3.0609,
      "step": 79577
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.3890917301177979,
      "learning_rate": 0.00044005722385660165,
      "loss": 3.211,
      "step": 79578
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.614902138710022,
      "learning_rate": 0.0004400536064260813,
      "loss": 3.1404,
      "step": 79579
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5995595455169678,
      "learning_rate": 0.00044004998896952224,
      "loss": 2.9248,
      "step": 79580
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5723042488098145,
      "learning_rate": 0.00044004637148692524,
      "loss": 3.1716,
      "step": 79581
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8191096782684326,
      "learning_rate": 0.0004400427539782911,
      "loss": 3.1091,
      "step": 79582
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.902740478515625,
      "learning_rate": 0.0004400391364436203,
      "loss": 3.2394,
      "step": 79583
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.64564049243927,
      "learning_rate": 0.0004400355188829136,
      "loss": 3.0941,
      "step": 79584
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.3376333713531494,
      "learning_rate": 0.0004400319012961717,
      "loss": 3.2678,
      "step": 79585
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.064458131790161,
      "learning_rate": 0.0004400282836833951,
      "loss": 3.0223,
      "step": 79586
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.10683274269104,
      "learning_rate": 0.0004400246660445847,
      "loss": 3.2097,
      "step": 79587
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6040375232696533,
      "learning_rate": 0.000440021048379741,
      "loss": 2.8403,
      "step": 79588
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.7066378593444824,
      "learning_rate": 0.0004400174306888648,
      "loss": 3.0337,
      "step": 79589
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.2347981929779053,
      "learning_rate": 0.0004400138129719567,
      "loss": 3.015,
      "step": 79590
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8633331060409546,
      "learning_rate": 0.0004400101952290174,
      "loss": 3.0811,
      "step": 79591
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5307916402816772,
      "learning_rate": 0.00044000657746004755,
      "loss": 3.0892,
      "step": 79592
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7680580615997314,
      "learning_rate": 0.00044000295966504783,
      "loss": 2.7007,
      "step": 79593
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.879006862640381,
      "learning_rate": 0.00043999934184401897,
      "loss": 3.1505,
      "step": 79594
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.7958579063415527,
      "learning_rate": 0.0004399957239969615,
      "loss": 2.7723,
      "step": 79595
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5409660339355469,
      "learning_rate": 0.00043999210612387623,
      "loss": 3.1643,
      "step": 79596
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.8555967807769775,
      "learning_rate": 0.0004399884882247637,
      "loss": 2.9524,
      "step": 79597
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.94674015045166,
      "learning_rate": 0.0004399848702996248,
      "loss": 3.2161,
      "step": 79598
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.800842046737671,
      "learning_rate": 0.00043998125234846,
      "loss": 3.0219,
      "step": 79599
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.557608962059021,
      "learning_rate": 0.0004399776343712701,
      "loss": 2.9002,
      "step": 79600
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.938314437866211,
      "learning_rate": 0.00043997401636805566,
      "loss": 3.0631,
      "step": 79601
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.7042086124420166,
      "learning_rate": 0.0004399703983388174,
      "loss": 2.9225,
      "step": 79602
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.35520076751709,
      "learning_rate": 0.00043996678028355605,
      "loss": 2.9131,
      "step": 79603
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9499173164367676,
      "learning_rate": 0.00043996316220227226,
      "loss": 3.0968,
      "step": 79604
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.360696792602539,
      "learning_rate": 0.00043995954409496653,
      "loss": 2.8555,
      "step": 79605
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9564034938812256,
      "learning_rate": 0.00043995592596163984,
      "loss": 3.1394,
      "step": 79606
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4985911846160889,
      "learning_rate": 0.0004399523078022927,
      "loss": 2.8474,
      "step": 79607
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5266237258911133,
      "learning_rate": 0.00043994868961692564,
      "loss": 2.9399,
      "step": 79608
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.732463002204895,
      "learning_rate": 0.0004399450714055396,
      "loss": 3.1005,
      "step": 79609
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.347360372543335,
      "learning_rate": 0.000439941453168135,
      "loss": 2.8914,
      "step": 79610
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9587589502334595,
      "learning_rate": 0.00043993783490471275,
      "loss": 2.8196,
      "step": 79611
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5615922212600708,
      "learning_rate": 0.0004399342166152734,
      "loss": 2.9542,
      "step": 79612
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.851267695426941,
      "learning_rate": 0.0004399305982998176,
      "loss": 2.9905,
      "step": 79613
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6482552289962769,
      "learning_rate": 0.0004399269799583461,
      "loss": 2.8362,
      "step": 79614
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7155388593673706,
      "learning_rate": 0.0004399233615908596,
      "loss": 3.1166,
      "step": 79615
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.838975429534912,
      "learning_rate": 0.00043991974319735857,
      "loss": 3.1281,
      "step": 79616
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8794976472854614,
      "learning_rate": 0.0004399161247778439,
      "loss": 2.9625,
      "step": 79617
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5920348167419434,
      "learning_rate": 0.00043991250633231623,
      "loss": 2.9163,
      "step": 79618
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.3758552074432373,
      "learning_rate": 0.0004399088878607761,
      "loss": 3.1654,
      "step": 79619
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8202908039093018,
      "learning_rate": 0.00043990526936322425,
      "loss": 3.2332,
      "step": 79620
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5880697965621948,
      "learning_rate": 0.00043990165083966144,
      "loss": 2.9965,
      "step": 79621
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6446928977966309,
      "learning_rate": 0.0004398980322900882,
      "loss": 2.9992,
      "step": 79622
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8228800296783447,
      "learning_rate": 0.0004398944137145053,
      "loss": 2.8495,
      "step": 79623
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8523297309875488,
      "learning_rate": 0.00043989079511291346,
      "loss": 3.0843,
      "step": 79624
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9476686716079712,
      "learning_rate": 0.00043988717648531315,
      "loss": 2.9289,
      "step": 79625
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.6467702388763428,
      "learning_rate": 0.00043988355783170527,
      "loss": 3.0549,
      "step": 79626
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0479743480682373,
      "learning_rate": 0.00043987993915209036,
      "loss": 3.0112,
      "step": 79627
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.072305679321289,
      "learning_rate": 0.0004398763204464691,
      "loss": 2.9428,
      "step": 79628
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8587806224822998,
      "learning_rate": 0.00043987270171484227,
      "loss": 2.8804,
      "step": 79629
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.9893791675567627,
      "learning_rate": 0.00043986908295721045,
      "loss": 3.0509,
      "step": 79630
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.3047852516174316,
      "learning_rate": 0.0004398654641735743,
      "loss": 3.1329,
      "step": 79631
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.3800705671310425,
      "learning_rate": 0.00043986184536393445,
      "loss": 3.0512,
      "step": 79632
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8300009965896606,
      "learning_rate": 0.00043985822652829173,
      "loss": 3.0301,
      "step": 79633
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.510468006134033,
      "learning_rate": 0.0004398546076666467,
      "loss": 2.9465,
      "step": 79634
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.165207624435425,
      "learning_rate": 0.00043985098877900007,
      "loss": 3.0387,
      "step": 79635
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5379054546356201,
      "learning_rate": 0.00043984736986535253,
      "loss": 3.1456,
      "step": 79636
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8277109861373901,
      "learning_rate": 0.00043984375092570463,
      "loss": 3.1156,
      "step": 79637
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.325486183166504,
      "learning_rate": 0.00043984013196005724,
      "loss": 2.9931,
      "step": 79638
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.357635736465454,
      "learning_rate": 0.0004398365129684109,
      "loss": 2.8251,
      "step": 79639
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4911091327667236,
      "learning_rate": 0.0004398328939507663,
      "loss": 3.0431,
      "step": 79640
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.020603656768799,
      "learning_rate": 0.00043982927490712406,
      "loss": 3.2129,
      "step": 79641
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.4390041828155518,
      "learning_rate": 0.00043982565583748504,
      "loss": 3.0113,
      "step": 79642
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.94616961479187,
      "learning_rate": 0.0004398220367418497,
      "loss": 3.1819,
      "step": 79643
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6884982585906982,
      "learning_rate": 0.00043981841762021887,
      "loss": 3.2499,
      "step": 79644
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2709052562713623,
      "learning_rate": 0.00043981479847259307,
      "loss": 3.0729,
      "step": 79645
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.9818670749664307,
      "learning_rate": 0.00043981117929897315,
      "loss": 3.0695,
      "step": 79646
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0261895656585693,
      "learning_rate": 0.0004398075600993597,
      "loss": 2.8786,
      "step": 79647
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7352656126022339,
      "learning_rate": 0.00043980394087375326,
      "loss": 3.1287,
      "step": 79648
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.5298306941986084,
      "learning_rate": 0.0004398003216221548,
      "loss": 2.8769,
      "step": 79649
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1038520336151123,
      "learning_rate": 0.0004397967023445647,
      "loss": 3.0713,
      "step": 79650
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1490163803100586,
      "learning_rate": 0.0004397930830409838,
      "loss": 2.787,
      "step": 79651
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.312343120574951,
      "learning_rate": 0.00043978946371141275,
      "loss": 2.9098,
      "step": 79652
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0694756507873535,
      "learning_rate": 0.0004397858443558521,
      "loss": 2.9298,
      "step": 79653
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.7002646923065186,
      "learning_rate": 0.0004397822249743027,
      "loss": 2.7723,
      "step": 79654
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6657568216323853,
      "learning_rate": 0.0004397786055667651,
      "loss": 2.8619,
      "step": 79655
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6504948139190674,
      "learning_rate": 0.0004397749861332401,
      "loss": 2.9965,
      "step": 79656
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.6764142513275146,
      "learning_rate": 0.0004397713666737282,
      "loss": 2.9765,
      "step": 79657
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6023142337799072,
      "learning_rate": 0.00043976774718823027,
      "loss": 3.0172,
      "step": 79658
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.9520320892333984,
      "learning_rate": 0.0004397641276767468,
      "loss": 2.8364,
      "step": 79659
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8985610008239746,
      "learning_rate": 0.00043976050813927857,
      "loss": 2.9661,
      "step": 79660
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.3070473670959473,
      "learning_rate": 0.0004397568885758262,
      "loss": 2.8727,
      "step": 79661
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.246450662612915,
      "learning_rate": 0.00043975326898639043,
      "loss": 2.9827,
      "step": 79662
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.1677067279815674,
      "learning_rate": 0.00043974964937097184,
      "loss": 3.0064,
      "step": 79663
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8537875413894653,
      "learning_rate": 0.00043974602972957115,
      "loss": 3.2173,
      "step": 79664
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5686222314834595,
      "learning_rate": 0.00043974241006218903,
      "loss": 3.1721,
      "step": 79665
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.305938959121704,
      "learning_rate": 0.0004397387903688262,
      "loss": 3.0155,
      "step": 79666
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.6819684505462646,
      "learning_rate": 0.00043973517064948333,
      "loss": 3.0191,
      "step": 79667
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1263082027435303,
      "learning_rate": 0.00043973155090416095,
      "loss": 3.0295,
      "step": 79668
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.9023003578186035,
      "learning_rate": 0.00043972793113285983,
      "loss": 3.1296,
      "step": 79669
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1644866466522217,
      "learning_rate": 0.00043972431133558075,
      "loss": 3.1967,
      "step": 79670
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1588387489318848,
      "learning_rate": 0.00043972069151232423,
      "loss": 3.0758,
      "step": 79671
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.75803804397583,
      "learning_rate": 0.000439717071663091,
      "loss": 2.9844,
      "step": 79672
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6346865892410278,
      "learning_rate": 0.00043971345178788173,
      "loss": 3.2548,
      "step": 79673
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.197808027267456,
      "learning_rate": 0.0004397098318866971,
      "loss": 2.8758,
      "step": 79674
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.6192333698272705,
      "learning_rate": 0.0004397062119595377,
      "loss": 3.1372,
      "step": 79675
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7682486772537231,
      "learning_rate": 0.00043970259200640433,
      "loss": 2.9653,
      "step": 79676
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5924551486968994,
      "learning_rate": 0.0004396989720272977,
      "loss": 3.1251,
      "step": 79677
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.184697151184082,
      "learning_rate": 0.0004396953520222183,
      "loss": 2.9687,
      "step": 79678
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.230363368988037,
      "learning_rate": 0.00043969173199116693,
      "loss": 2.776,
      "step": 79679
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8733140230178833,
      "learning_rate": 0.0004396881119341442,
      "loss": 2.7756,
      "step": 79680
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.938706636428833,
      "learning_rate": 0.00043968449185115087,
      "loss": 2.925,
      "step": 79681
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.5483829975128174,
      "learning_rate": 0.00043968087174218746,
      "loss": 2.9933,
      "step": 79682
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.36075758934021,
      "learning_rate": 0.00043967725160725475,
      "loss": 3.0483,
      "step": 79683
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6952954530715942,
      "learning_rate": 0.0004396736314463535,
      "loss": 3.0059,
      "step": 79684
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0217530727386475,
      "learning_rate": 0.00043967001125948425,
      "loss": 3.0529,
      "step": 79685
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.8605825901031494,
      "learning_rate": 0.00043966639104664766,
      "loss": 3.0275,
      "step": 79686
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7531605958938599,
      "learning_rate": 0.00043966277080784453,
      "loss": 3.165,
      "step": 79687
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7467705011367798,
      "learning_rate": 0.0004396591505430754,
      "loss": 3.0644,
      "step": 79688
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5756893157958984,
      "learning_rate": 0.00043965553025234096,
      "loss": 3.3165,
      "step": 79689
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.5014731884002686,
      "learning_rate": 0.000439651909935642,
      "loss": 3.124,
      "step": 79690
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.5247366428375244,
      "learning_rate": 0.00043964828959297916,
      "loss": 2.9785,
      "step": 79691
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8720911741256714,
      "learning_rate": 0.00043964466922435296,
      "loss": 2.9486,
      "step": 79692
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.234510898590088,
      "learning_rate": 0.0004396410488297641,
      "loss": 2.8328,
      "step": 79693
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9765843152999878,
      "learning_rate": 0.00043963742840921353,
      "loss": 2.6871,
      "step": 79694
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2215280532836914,
      "learning_rate": 0.0004396338079627016,
      "loss": 3.0225,
      "step": 79695
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4795811176300049,
      "learning_rate": 0.00043963018749022917,
      "loss": 3.2453,
      "step": 79696
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.152024269104004,
      "learning_rate": 0.00043962656699179686,
      "loss": 3.0308,
      "step": 79697
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.959869623184204,
      "learning_rate": 0.0004396229464674053,
      "loss": 3.0306,
      "step": 79698
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4479318857192993,
      "learning_rate": 0.00043961932591705515,
      "loss": 2.9506,
      "step": 79699
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5807664394378662,
      "learning_rate": 0.0004396157053407472,
      "loss": 3.1526,
      "step": 79700
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8302303552627563,
      "learning_rate": 0.0004396120847384821,
      "loss": 2.7289,
      "step": 79701
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.3636480569839478,
      "learning_rate": 0.00043960846411026034,
      "loss": 2.943,
      "step": 79702
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4441194534301758,
      "learning_rate": 0.0004396048434560828,
      "loss": 3.094,
      "step": 79703
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6649188995361328,
      "learning_rate": 0.00043960122277595016,
      "loss": 2.9861,
      "step": 79704
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6062984466552734,
      "learning_rate": 0.00043959760206986294,
      "loss": 3.1942,
      "step": 79705
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.7282681465148926,
      "learning_rate": 0.0004395939813378219,
      "loss": 3.0468,
      "step": 79706
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9203511476516724,
      "learning_rate": 0.0004395903605798277,
      "loss": 2.7955,
      "step": 79707
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0143613815307617,
      "learning_rate": 0.000439586739795881,
      "loss": 3.032,
      "step": 79708
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6418465375900269,
      "learning_rate": 0.0004395831189859825,
      "loss": 3.0326,
      "step": 79709
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6903624534606934,
      "learning_rate": 0.0004395794981501329,
      "loss": 2.9236,
      "step": 79710
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0621941089630127,
      "learning_rate": 0.0004395758772883328,
      "loss": 3.1918,
      "step": 79711
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.691381812095642,
      "learning_rate": 0.0004395722564005829,
      "loss": 3.0518,
      "step": 79712
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.4226419925689697,
      "learning_rate": 0.00043956863548688397,
      "loss": 3.2062,
      "step": 79713
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.024193286895752,
      "learning_rate": 0.0004395650145472365,
      "loss": 3.0853,
      "step": 79714
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.3410909175872803,
      "learning_rate": 0.0004395613935816413,
      "loss": 3.0719,
      "step": 79715
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8988544940948486,
      "learning_rate": 0.00043955777259009897,
      "loss": 2.9639,
      "step": 79716
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8058615922927856,
      "learning_rate": 0.00043955415157261024,
      "loss": 2.9147,
      "step": 79717
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7033122777938843,
      "learning_rate": 0.00043955053052917574,
      "loss": 3.1122,
      "step": 79718
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6246588230133057,
      "learning_rate": 0.00043954690945979623,
      "loss": 3.1422,
      "step": 79719
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.3259811401367188,
      "learning_rate": 0.00043954328836447225,
      "loss": 3.162,
      "step": 79720
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7954192161560059,
      "learning_rate": 0.0004395396672432045,
      "loss": 3.0743,
      "step": 79721
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.3116989135742188,
      "learning_rate": 0.00043953604609599386,
      "loss": 2.9924,
      "step": 79722
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8875491619110107,
      "learning_rate": 0.00043953242492284064,
      "loss": 2.8902,
      "step": 79723
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5430322885513306,
      "learning_rate": 0.00043952880372374577,
      "loss": 3.1234,
      "step": 79724
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4178926944732666,
      "learning_rate": 0.00043952518249870996,
      "loss": 3.195,
      "step": 79725
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8926422595977783,
      "learning_rate": 0.0004395215612477337,
      "loss": 2.7255,
      "step": 79726
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6820135116577148,
      "learning_rate": 0.00043951793997081776,
      "loss": 2.965,
      "step": 79727
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2732579708099365,
      "learning_rate": 0.00043951431866796285,
      "loss": 3.3143,
      "step": 79728
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5522701740264893,
      "learning_rate": 0.0004395106973391695,
      "loss": 2.8935,
      "step": 79729
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5067758560180664,
      "learning_rate": 0.0004395070759844384,
      "loss": 3.149,
      "step": 79730
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5525739192962646,
      "learning_rate": 0.0004395034546037705,
      "loss": 2.9828,
      "step": 79731
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5374482870101929,
      "learning_rate": 0.0004394998331971662,
      "loss": 2.8732,
      "step": 79732
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7335693836212158,
      "learning_rate": 0.0004394962117646262,
      "loss": 3.0509,
      "step": 79733
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.256575584411621,
      "learning_rate": 0.0004394925903061513,
      "loss": 2.8334,
      "step": 79734
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6346625089645386,
      "learning_rate": 0.00043948896882174205,
      "loss": 2.8294,
      "step": 79735
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6566293239593506,
      "learning_rate": 0.0004394853473113992,
      "loss": 2.9451,
      "step": 79736
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7270926237106323,
      "learning_rate": 0.0004394817257751234,
      "loss": 3.1603,
      "step": 79737
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7947160005569458,
      "learning_rate": 0.0004394781042129152,
      "loss": 3.1757,
      "step": 79738
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.810486912727356,
      "learning_rate": 0.0004394744826247755,
      "loss": 3.1316,
      "step": 79739
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6331005096435547,
      "learning_rate": 0.00043947086101070486,
      "loss": 3.2319,
      "step": 79740
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.30383038520813,
      "learning_rate": 0.0004394672393707039,
      "loss": 2.8332,
      "step": 79741
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7218139171600342,
      "learning_rate": 0.00043946361770477336,
      "loss": 3.0716,
      "step": 79742
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.217782974243164,
      "learning_rate": 0.0004394599960129139,
      "loss": 3.2488,
      "step": 79743
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2511706352233887,
      "learning_rate": 0.0004394563742951262,
      "loss": 3.1934,
      "step": 79744
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.219055414199829,
      "learning_rate": 0.00043945275255141095,
      "loss": 3.0548,
      "step": 79745
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.554789662361145,
      "learning_rate": 0.0004394491307817688,
      "loss": 2.9616,
      "step": 79746
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2600059509277344,
      "learning_rate": 0.0004394455089862004,
      "loss": 2.9824,
      "step": 79747
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.830713987350464,
      "learning_rate": 0.00043944188716470644,
      "loss": 3.4535,
      "step": 79748
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7588584423065186,
      "learning_rate": 0.0004394382653172877,
      "loss": 3.1937,
      "step": 79749
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.493505835533142,
      "learning_rate": 0.0004394346434439446,
      "loss": 3.2497,
      "step": 79750
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.3314805030822754,
      "learning_rate": 0.00043943102154467804,
      "loss": 3.1181,
      "step": 79751
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.5266716480255127,
      "learning_rate": 0.0004394273996194887,
      "loss": 3.1405,
      "step": 79752
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7903673648834229,
      "learning_rate": 0.000439423777668377,
      "loss": 2.9381,
      "step": 79753
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.3071651458740234,
      "learning_rate": 0.00043942015569134387,
      "loss": 3.0159,
      "step": 79754
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0808627605438232,
      "learning_rate": 0.0004394165336883899,
      "loss": 3.0657,
      "step": 79755
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6198863983154297,
      "learning_rate": 0.0004394129116595158,
      "loss": 3.1695,
      "step": 79756
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.530055284500122,
      "learning_rate": 0.0004394092896047222,
      "loss": 2.7384,
      "step": 79757
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8998647928237915,
      "learning_rate": 0.0004394056675240098,
      "loss": 2.8336,
      "step": 79758
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8564873933792114,
      "learning_rate": 0.0004394020454173792,
      "loss": 3.229,
      "step": 79759
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6949951648712158,
      "learning_rate": 0.0004393984232848311,
      "loss": 3.0,
      "step": 79760
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8482794761657715,
      "learning_rate": 0.00043939480112636627,
      "loss": 3.1209,
      "step": 79761
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4330695867538452,
      "learning_rate": 0.0004393911789419854,
      "loss": 3.1362,
      "step": 79762
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6633573770523071,
      "learning_rate": 0.00043938755673168887,
      "loss": 3.0405,
      "step": 79763
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8620411157608032,
      "learning_rate": 0.0004393839344954777,
      "loss": 3.198,
      "step": 79764
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1018784046173096,
      "learning_rate": 0.0004393803122333524,
      "loss": 2.92,
      "step": 79765
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6228101253509521,
      "learning_rate": 0.00043937668994531367,
      "loss": 2.8643,
      "step": 79766
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.529611587524414,
      "learning_rate": 0.0004393730676313621,
      "loss": 2.751,
      "step": 79767
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.088320255279541,
      "learning_rate": 0.00043936944529149855,
      "loss": 2.8983,
      "step": 79768
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.244389295578003,
      "learning_rate": 0.0004393658229257236,
      "loss": 2.847,
      "step": 79769
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.3670971393585205,
      "learning_rate": 0.00043936220053403783,
      "loss": 2.8736,
      "step": 79770
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7347674369812012,
      "learning_rate": 0.000439358578116442,
      "loss": 3.1309,
      "step": 79771
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.717659592628479,
      "learning_rate": 0.0004393549556729368,
      "loss": 3.1228,
      "step": 79772
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7147161960601807,
      "learning_rate": 0.000439351333203523,
      "loss": 3.1871,
      "step": 79773
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2611043453216553,
      "learning_rate": 0.000439347710708201,
      "loss": 3.0844,
      "step": 79774
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.903657078742981,
      "learning_rate": 0.0004393440881869717,
      "loss": 2.9625,
      "step": 79775
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0402212142944336,
      "learning_rate": 0.0004393404656398357,
      "loss": 2.9141,
      "step": 79776
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.8745028972625732,
      "learning_rate": 0.0004393368430667937,
      "loss": 2.8882,
      "step": 79777
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5446771383285522,
      "learning_rate": 0.00043933322046784627,
      "loss": 2.9882,
      "step": 79778
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0044105052948,
      "learning_rate": 0.00043932959784299415,
      "loss": 3.2009,
      "step": 79779
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1132564544677734,
      "learning_rate": 0.0004393259751922382,
      "loss": 3.2562,
      "step": 79780
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7062219381332397,
      "learning_rate": 0.0004393223525155788,
      "loss": 3.0112,
      "step": 79781
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.694801926612854,
      "learning_rate": 0.0004393187298130167,
      "loss": 2.9063,
      "step": 79782
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4040703773498535,
      "learning_rate": 0.0004393151070845527,
      "loss": 2.7934,
      "step": 79783
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6346293687820435,
      "learning_rate": 0.00043931148433018735,
      "loss": 3.1337,
      "step": 79784
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.259676694869995,
      "learning_rate": 0.0004393078615499214,
      "loss": 2.8121,
      "step": 79785
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.750821828842163,
      "learning_rate": 0.0004393042387437555,
      "loss": 3.0977,
      "step": 79786
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.00484037399292,
      "learning_rate": 0.00043930061591169025,
      "loss": 3.0184,
      "step": 79787
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6098923683166504,
      "learning_rate": 0.0004392969930537264,
      "loss": 3.1715,
      "step": 79788
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.028688669204712,
      "learning_rate": 0.0004392933701698647,
      "loss": 2.9279,
      "step": 79789
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.741093397140503,
      "learning_rate": 0.00043928974726010567,
      "loss": 3.0532,
      "step": 79790
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4757301807403564,
      "learning_rate": 0.00043928612432445,
      "loss": 3.1359,
      "step": 79791
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.628089427947998,
      "learning_rate": 0.0004392825013628985,
      "loss": 2.6754,
      "step": 79792
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.642589569091797,
      "learning_rate": 0.0004392788783754516,
      "loss": 2.9439,
      "step": 79793
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.486282467842102,
      "learning_rate": 0.0004392752553621103,
      "loss": 3.2047,
      "step": 79794
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.0985324382781982,
      "learning_rate": 0.0004392716323228751,
      "loss": 3.1772,
      "step": 79795
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2702560424804688,
      "learning_rate": 0.00043926800925774657,
      "loss": 3.0131,
      "step": 79796
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.773024320602417,
      "learning_rate": 0.0004392643861667255,
      "loss": 3.1433,
      "step": 79797
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.002629041671753,
      "learning_rate": 0.0004392607630498126,
      "loss": 2.9077,
      "step": 79798
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5229703187942505,
      "learning_rate": 0.0004392571399070084,
      "loss": 3.2266,
      "step": 79799
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.959787130355835,
      "learning_rate": 0.0004392535167383138,
      "loss": 2.9849,
      "step": 79800
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9093618392944336,
      "learning_rate": 0.0004392498935437293,
      "loss": 3.1391,
      "step": 79801
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9223239421844482,
      "learning_rate": 0.00043924627032325566,
      "loss": 3.1162,
      "step": 79802
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7846081256866455,
      "learning_rate": 0.0004392426470768934,
      "loss": 3.0357,
      "step": 79803
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7947999238967896,
      "learning_rate": 0.0004392390238046434,
      "loss": 2.9653,
      "step": 79804
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1478323936462402,
      "learning_rate": 0.00043923540050650617,
      "loss": 2.8153,
      "step": 79805
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7145981788635254,
      "learning_rate": 0.0004392317771824824,
      "loss": 2.8363,
      "step": 79806
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.810417652130127,
      "learning_rate": 0.00043922815383257293,
      "loss": 3.172,
      "step": 79807
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.940877676010132,
      "learning_rate": 0.00043922453045677827,
      "loss": 3.0053,
      "step": 79808
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.498533248901367,
      "learning_rate": 0.0004392209070550991,
      "loss": 2.8658,
      "step": 79809
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.71915340423584,
      "learning_rate": 0.00043921728362753627,
      "loss": 3.0759,
      "step": 79810
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.939855933189392,
      "learning_rate": 0.00043921366017409024,
      "loss": 3.1044,
      "step": 79811
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.025398015975952,
      "learning_rate": 0.00043921003669476164,
      "loss": 3.1756,
      "step": 79812
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.2920360565185547,
      "learning_rate": 0.0004392064131895515,
      "loss": 2.9471,
      "step": 79813
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.445258855819702,
      "learning_rate": 0.00043920278965846007,
      "loss": 2.9533,
      "step": 79814
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5633221864700317,
      "learning_rate": 0.0004391991661014883,
      "loss": 3.2034,
      "step": 79815
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.003612756729126,
      "learning_rate": 0.0004391955425186368,
      "loss": 2.9475,
      "step": 79816
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.156067132949829,
      "learning_rate": 0.00043919191890990614,
      "loss": 3.0265,
      "step": 79817
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0305418968200684,
      "learning_rate": 0.0004391882952752971,
      "loss": 2.9211,
      "step": 79818
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.575655221939087,
      "learning_rate": 0.00043918467161481044,
      "loss": 2.604,
      "step": 79819
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.5692272186279297,
      "learning_rate": 0.00043918104792844655,
      "loss": 3.2187,
      "step": 79820
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.060788869857788,
      "learning_rate": 0.00043917742421620634,
      "loss": 2.9174,
      "step": 79821
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.047548770904541,
      "learning_rate": 0.0004391738004780905,
      "loss": 2.9733,
      "step": 79822
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6927564144134521,
      "learning_rate": 0.00043917017671409953,
      "loss": 3.0685,
      "step": 79823
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.348844528198242,
      "learning_rate": 0.0004391665529242342,
      "loss": 3.0808,
      "step": 79824
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.2508249282836914,
      "learning_rate": 0.00043916292910849527,
      "loss": 3.0599,
      "step": 79825
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.3226006031036377,
      "learning_rate": 0.00043915930526688317,
      "loss": 2.8146,
      "step": 79826
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5434203147888184,
      "learning_rate": 0.0004391556813993988,
      "loss": 3.2304,
      "step": 79827
    },
    {
      "epoch": 1.04,
      "grad_norm": 4.040309429168701,
      "learning_rate": 0.0004391520575060429,
      "loss": 2.9356,
      "step": 79828
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.803034782409668,
      "learning_rate": 0.0004391484335868158,
      "loss": 3.1026,
      "step": 79829
    },
    {
      "epoch": 1.04,
      "grad_norm": 5.129499435424805,
      "learning_rate": 0.00043914480964171845,
      "loss": 2.9781,
      "step": 79830
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5832583904266357,
      "learning_rate": 0.00043914118567075155,
      "loss": 3.1135,
      "step": 79831
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4114255905151367,
      "learning_rate": 0.0004391375616739156,
      "loss": 3.152,
      "step": 79832
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.8843584060668945,
      "learning_rate": 0.00043913393765121126,
      "loss": 3.0849,
      "step": 79833
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.6129534244537354,
      "learning_rate": 0.0004391303136026395,
      "loss": 3.0765,
      "step": 79834
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4583536386489868,
      "learning_rate": 0.0004391266895282006,
      "loss": 2.9671,
      "step": 79835
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7114495038986206,
      "learning_rate": 0.0004391230654278955,
      "loss": 2.7874,
      "step": 79836
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4902899265289307,
      "learning_rate": 0.0004391194413017248,
      "loss": 3.0018,
      "step": 79837
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7782974243164062,
      "learning_rate": 0.0004391158171496892,
      "loss": 2.8247,
      "step": 79838
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.654861569404602,
      "learning_rate": 0.00043911219297178923,
      "loss": 3.1603,
      "step": 79839
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8910753726959229,
      "learning_rate": 0.00043910856876802574,
      "loss": 3.0224,
      "step": 79840
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8134820461273193,
      "learning_rate": 0.0004391049445383994,
      "loss": 2.974,
      "step": 79841
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.564108967781067,
      "learning_rate": 0.0004391013202829107,
      "loss": 3.0551,
      "step": 79842
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7553339004516602,
      "learning_rate": 0.00043909769600156053,
      "loss": 2.9826,
      "step": 79843
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8335224390029907,
      "learning_rate": 0.0004390940716943495,
      "loss": 2.9679,
      "step": 79844
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7938823699951172,
      "learning_rate": 0.0004390904473612782,
      "loss": 2.9212,
      "step": 79845
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6333670616149902,
      "learning_rate": 0.00043908682300234736,
      "loss": 3.0465,
      "step": 79846
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6173452138900757,
      "learning_rate": 0.00043908319861755773,
      "loss": 3.1433,
      "step": 79847
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9720814228057861,
      "learning_rate": 0.00043907957420690976,
      "loss": 2.778,
      "step": 79848
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7082865238189697,
      "learning_rate": 0.0004390759497704044,
      "loss": 3.2725,
      "step": 79849
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6047130823135376,
      "learning_rate": 0.00043907232530804216,
      "loss": 2.973,
      "step": 79850
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6076993942260742,
      "learning_rate": 0.0004390687008198237,
      "loss": 3.306,
      "step": 79851
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9018564224243164,
      "learning_rate": 0.0004390650763057498,
      "loss": 3.0751,
      "step": 79852
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.990242600440979,
      "learning_rate": 0.000439061451765821,
      "loss": 3.1498,
      "step": 79853
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8246829509735107,
      "learning_rate": 0.00043905782720003814,
      "loss": 3.0241,
      "step": 79854
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5861469507217407,
      "learning_rate": 0.00043905420260840187,
      "loss": 3.2897,
      "step": 79855
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6578999757766724,
      "learning_rate": 0.00043905057799091264,
      "loss": 2.9605,
      "step": 79856
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9853402376174927,
      "learning_rate": 0.00043904695334757136,
      "loss": 3.1367,
      "step": 79857
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8352810144424438,
      "learning_rate": 0.00043904332867837864,
      "loss": 3.1176,
      "step": 79858
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.993591547012329,
      "learning_rate": 0.00043903970398333507,
      "loss": 3.1926,
      "step": 79859
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6985667943954468,
      "learning_rate": 0.0004390360792624414,
      "loss": 2.963,
      "step": 79860
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9895848035812378,
      "learning_rate": 0.0004390324545156984,
      "loss": 2.9988,
      "step": 79861
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.545527458190918,
      "learning_rate": 0.0004390288297431066,
      "loss": 3.0768,
      "step": 79862
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5760133266448975,
      "learning_rate": 0.00043902520494466666,
      "loss": 2.9732,
      "step": 79863
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8597005605697632,
      "learning_rate": 0.00043902158012037936,
      "loss": 3.1246,
      "step": 79864
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7428282499313354,
      "learning_rate": 0.0004390179552702453,
      "loss": 2.9217,
      "step": 79865
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5803329944610596,
      "learning_rate": 0.0004390143303942652,
      "loss": 3.1773,
      "step": 79866
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5471668243408203,
      "learning_rate": 0.0004390107054924397,
      "loss": 2.8795,
      "step": 79867
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5245535373687744,
      "learning_rate": 0.00043900708056476953,
      "loss": 2.931,
      "step": 79868
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8127752542495728,
      "learning_rate": 0.00043900345561125525,
      "loss": 3.1624,
      "step": 79869
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6619035005569458,
      "learning_rate": 0.00043899983063189766,
      "loss": 2.7327,
      "step": 79870
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8037316799163818,
      "learning_rate": 0.00043899620562669735,
      "loss": 3.1738,
      "step": 79871
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6081278324127197,
      "learning_rate": 0.000438992580595655,
      "loss": 2.8799,
      "step": 79872
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7626731395721436,
      "learning_rate": 0.0004389889555387713,
      "loss": 3.0983,
      "step": 79873
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7583842277526855,
      "learning_rate": 0.000438985330456047,
      "loss": 2.9767,
      "step": 79874
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6475030183792114,
      "learning_rate": 0.0004389817053474827,
      "loss": 3.2814,
      "step": 79875
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.734138250350952,
      "learning_rate": 0.00043897808021307893,
      "loss": 3.1271,
      "step": 79876
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1814124584198,
      "learning_rate": 0.0004389744550528367,
      "loss": 3.1483,
      "step": 79877
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7402173280715942,
      "learning_rate": 0.0004389708298667564,
      "loss": 3.0008,
      "step": 79878
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.401211738586426,
      "learning_rate": 0.00043896720465483877,
      "loss": 2.8193,
      "step": 79879
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1401007175445557,
      "learning_rate": 0.0004389635794170846,
      "loss": 3.3383,
      "step": 79880
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2484560012817383,
      "learning_rate": 0.00043895995415349444,
      "loss": 3.278,
      "step": 79881
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5255851745605469,
      "learning_rate": 0.00043895632886406896,
      "loss": 2.9739,
      "step": 79882
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.710249423980713,
      "learning_rate": 0.00043895270354880894,
      "loss": 3.1024,
      "step": 79883
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9796315431594849,
      "learning_rate": 0.00043894907820771494,
      "loss": 3.0006,
      "step": 79884
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8543473482131958,
      "learning_rate": 0.0004389454528407877,
      "loss": 2.755,
      "step": 79885
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5803775787353516,
      "learning_rate": 0.00043894182744802794,
      "loss": 3.0843,
      "step": 79886
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6328939199447632,
      "learning_rate": 0.00043893820202943614,
      "loss": 3.1392,
      "step": 79887
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6371902227401733,
      "learning_rate": 0.0004389345765850132,
      "loss": 3.0612,
      "step": 79888
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5348985195159912,
      "learning_rate": 0.00043893095111475973,
      "loss": 3.099,
      "step": 79889
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5777684450149536,
      "learning_rate": 0.0004389273256186763,
      "loss": 2.7903,
      "step": 79890
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9749711751937866,
      "learning_rate": 0.00043892370009676363,
      "loss": 3.1798,
      "step": 79891
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4557321071624756,
      "learning_rate": 0.00043892007454902253,
      "loss": 3.1007,
      "step": 79892
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8087360858917236,
      "learning_rate": 0.0004389164489754535,
      "loss": 3.0826,
      "step": 79893
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5451362133026123,
      "learning_rate": 0.0004389128233760573,
      "loss": 3.0555,
      "step": 79894
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.062988042831421,
      "learning_rate": 0.00043890919775083456,
      "loss": 2.9358,
      "step": 79895
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.240865707397461,
      "learning_rate": 0.00043890557209978596,
      "loss": 2.989,
      "step": 79896
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.82603919506073,
      "learning_rate": 0.0004389019464229122,
      "loss": 3.1825,
      "step": 79897
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.514174461364746,
      "learning_rate": 0.000438898320720214,
      "loss": 2.8542,
      "step": 79898
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6113743782043457,
      "learning_rate": 0.00043889469499169197,
      "loss": 3.0407,
      "step": 79899
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.883338212966919,
      "learning_rate": 0.0004388910692373467,
      "loss": 2.9981,
      "step": 79900
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.866379976272583,
      "learning_rate": 0.00043888744345717913,
      "loss": 2.9559,
      "step": 79901
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5017520189285278,
      "learning_rate": 0.00043888381765118964,
      "loss": 3.0607,
      "step": 79902
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.687757134437561,
      "learning_rate": 0.000438880191819379,
      "loss": 2.8636,
      "step": 79903
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.660659909248352,
      "learning_rate": 0.00043887656596174794,
      "loss": 3.0817,
      "step": 79904
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.668199062347412,
      "learning_rate": 0.0004388729400782972,
      "loss": 2.88,
      "step": 79905
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.480369210243225,
      "learning_rate": 0.00043886931416902726,
      "loss": 2.8601,
      "step": 79906
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.287848711013794,
      "learning_rate": 0.00043886568823393895,
      "loss": 3.1033,
      "step": 79907
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.244189739227295,
      "learning_rate": 0.00043886206227303285,
      "loss": 3.2108,
      "step": 79908
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5035516023635864,
      "learning_rate": 0.00043885843628630963,
      "loss": 3.1114,
      "step": 79909
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5127991437911987,
      "learning_rate": 0.00043885481027377003,
      "loss": 3.172,
      "step": 79910
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.263827323913574,
      "learning_rate": 0.00043885118423541477,
      "loss": 3.0363,
      "step": 79911
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8109912872314453,
      "learning_rate": 0.0004388475581712444,
      "loss": 3.0051,
      "step": 79912
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.229673147201538,
      "learning_rate": 0.0004388439320812596,
      "loss": 2.9983,
      "step": 79913
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.4784634113311768,
      "learning_rate": 0.0004388403059654612,
      "loss": 3.0151,
      "step": 79914
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.163696527481079,
      "learning_rate": 0.00043883667982384973,
      "loss": 3.0132,
      "step": 79915
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.3676657676696777,
      "learning_rate": 0.00043883305365642584,
      "loss": 2.9447,
      "step": 79916
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.771731972694397,
      "learning_rate": 0.00043882942746319034,
      "loss": 3.1338,
      "step": 79917
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9279773235321045,
      "learning_rate": 0.0004388258012441438,
      "loss": 3.0566,
      "step": 79918
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.015571117401123,
      "learning_rate": 0.00043882217499928693,
      "loss": 2.9303,
      "step": 79919
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8192094564437866,
      "learning_rate": 0.0004388185487286204,
      "loss": 2.9827,
      "step": 79920
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5157067775726318,
      "learning_rate": 0.00043881492243214487,
      "loss": 3.0202,
      "step": 79921
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5968248844146729,
      "learning_rate": 0.00043881129610986097,
      "loss": 2.9279,
      "step": 79922
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7569947242736816,
      "learning_rate": 0.0004388076697617696,
      "loss": 3.0685,
      "step": 79923
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5208934545516968,
      "learning_rate": 0.00043880404338787104,
      "loss": 3.0284,
      "step": 79924
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7967441082000732,
      "learning_rate": 0.0004388004169881663,
      "loss": 3.3136,
      "step": 79925
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.4308316707611084,
      "learning_rate": 0.00043879679056265595,
      "loss": 2.9729,
      "step": 79926
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.3993232250213623,
      "learning_rate": 0.00043879316411134066,
      "loss": 3.0175,
      "step": 79927
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9943203926086426,
      "learning_rate": 0.0004387895376342211,
      "loss": 3.0659,
      "step": 79928
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6363073587417603,
      "learning_rate": 0.0004387859111312979,
      "loss": 3.2105,
      "step": 79929
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.508549928665161,
      "learning_rate": 0.00043878228460257184,
      "loss": 2.9437,
      "step": 79930
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7971512079238892,
      "learning_rate": 0.0004387786580480435,
      "loss": 3.1181,
      "step": 79931
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2821881771087646,
      "learning_rate": 0.0004387750314677136,
      "loss": 3.0508,
      "step": 79932
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.154395580291748,
      "learning_rate": 0.0004387714048615828,
      "loss": 3.2088,
      "step": 79933
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.7626655101776123,
      "learning_rate": 0.0004387677782296517,
      "loss": 3.1577,
      "step": 79934
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4743030071258545,
      "learning_rate": 0.00043876415157192115,
      "loss": 2.8042,
      "step": 79935
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.329745054244995,
      "learning_rate": 0.0004387605248883917,
      "loss": 3.0658,
      "step": 79936
    },
    {
      "epoch": 1.04,
      "grad_norm": 4.308055877685547,
      "learning_rate": 0.00043875689817906405,
      "loss": 3.0574,
      "step": 79937
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.5171895027160645,
      "learning_rate": 0.0004387532714439389,
      "loss": 2.862,
      "step": 79938
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5251479148864746,
      "learning_rate": 0.0004387496446830168,
      "loss": 2.9453,
      "step": 79939
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8339650630950928,
      "learning_rate": 0.0004387460178962986,
      "loss": 3.077,
      "step": 79940
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.09722900390625,
      "learning_rate": 0.0004387423910837849,
      "loss": 3.1569,
      "step": 79941
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9309710264205933,
      "learning_rate": 0.00043873876424547635,
      "loss": 2.9186,
      "step": 79942
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.494702696800232,
      "learning_rate": 0.00043873513738137363,
      "loss": 2.8417,
      "step": 79943
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8510942459106445,
      "learning_rate": 0.0004387315104914774,
      "loss": 3.0756,
      "step": 79944
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0632505416870117,
      "learning_rate": 0.00043872788357578847,
      "loss": 2.8965,
      "step": 79945
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.044713258743286,
      "learning_rate": 0.0004387242566343074,
      "loss": 2.9135,
      "step": 79946
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8826179504394531,
      "learning_rate": 0.0004387206296670348,
      "loss": 3.1161,
      "step": 79947
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.1896309852600098,
      "learning_rate": 0.00043871700267397143,
      "loss": 2.8445,
      "step": 79948
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.7694787979125977,
      "learning_rate": 0.000438713375655118,
      "loss": 2.9609,
      "step": 79949
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1101672649383545,
      "learning_rate": 0.000438709748610475,
      "loss": 2.8987,
      "step": 79950
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5264052152633667,
      "learning_rate": 0.00043870612154004335,
      "loss": 2.9469,
      "step": 79951
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.570923089981079,
      "learning_rate": 0.0004387024944438236,
      "loss": 3.0877,
      "step": 79952
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.47819185256958,
      "learning_rate": 0.0004386988673218165,
      "loss": 2.8996,
      "step": 79953
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.95409095287323,
      "learning_rate": 0.0004386952401740225,
      "loss": 3.057,
      "step": 79954
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4993951320648193,
      "learning_rate": 0.00043869161300044257,
      "loss": 3.0279,
      "step": 79955
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6762973070144653,
      "learning_rate": 0.0004386879858010772,
      "loss": 2.9827,
      "step": 79956
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0892446041107178,
      "learning_rate": 0.00043868435857592715,
      "loss": 2.8372,
      "step": 79957
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2480359077453613,
      "learning_rate": 0.000438680731324993,
      "loss": 3.1072,
      "step": 79958
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5275212526321411,
      "learning_rate": 0.0004386771040482756,
      "loss": 2.8764,
      "step": 79959
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5920937061309814,
      "learning_rate": 0.00043867347674577545,
      "loss": 3.2102,
      "step": 79960
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1912479400634766,
      "learning_rate": 0.0004386698494174932,
      "loss": 2.8331,
      "step": 79961
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9096640348434448,
      "learning_rate": 0.0004386662220634297,
      "loss": 3.1067,
      "step": 79962
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5897377729415894,
      "learning_rate": 0.00043866259468358545,
      "loss": 3.0051,
      "step": 79963
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9916261434555054,
      "learning_rate": 0.0004386589672779613,
      "loss": 3.0342,
      "step": 79964
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0587363243103027,
      "learning_rate": 0.00043865533984655783,
      "loss": 2.9402,
      "step": 79965
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.494673728942871,
      "learning_rate": 0.00043865171238937565,
      "loss": 3.2022,
      "step": 79966
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4411957263946533,
      "learning_rate": 0.0004386480849064155,
      "loss": 3.104,
      "step": 79967
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8688089847564697,
      "learning_rate": 0.0004386444573976782,
      "loss": 3.1113,
      "step": 79968
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8996344804763794,
      "learning_rate": 0.00043864082986316404,
      "loss": 3.3411,
      "step": 79969
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.3854361772537231,
      "learning_rate": 0.0004386372023028741,
      "loss": 3.0972,
      "step": 79970
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.640547275543213,
      "learning_rate": 0.00043863357471680877,
      "loss": 3.0059,
      "step": 79971
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.3489079475402832,
      "learning_rate": 0.00043862994710496895,
      "loss": 3.1334,
      "step": 79972
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5235775709152222,
      "learning_rate": 0.00043862631946735514,
      "loss": 3.0869,
      "step": 79973
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6732351779937744,
      "learning_rate": 0.0004386226918039681,
      "loss": 3.0179,
      "step": 79974
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6078453063964844,
      "learning_rate": 0.0004386190641148085,
      "loss": 3.2541,
      "step": 79975
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6443232297897339,
      "learning_rate": 0.000438615436399877,
      "loss": 2.9901,
      "step": 79976
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7122306823730469,
      "learning_rate": 0.00043861180865917423,
      "loss": 2.9946,
      "step": 79977
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.091306447982788,
      "learning_rate": 0.00043860818089270105,
      "loss": 2.9822,
      "step": 79978
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.064413547515869,
      "learning_rate": 0.0004386045531004578,
      "loss": 3.0222,
      "step": 79979
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9099953174591064,
      "learning_rate": 0.0004386009252824455,
      "loss": 3.0269,
      "step": 79980
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0138726234436035,
      "learning_rate": 0.0004385972974386646,
      "loss": 3.2173,
      "step": 79981
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.353404998779297,
      "learning_rate": 0.00043859366956911584,
      "loss": 3.1829,
      "step": 79982
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.104487180709839,
      "learning_rate": 0.0004385900416737999,
      "loss": 2.8481,
      "step": 79983
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8991707563400269,
      "learning_rate": 0.0004385864137527175,
      "loss": 2.9588,
      "step": 79984
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9762885570526123,
      "learning_rate": 0.0004385827858058692,
      "loss": 3.312,
      "step": 79985
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9136325120925903,
      "learning_rate": 0.0004385791578332557,
      "loss": 2.9128,
      "step": 79986
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.0147526264190674,
      "learning_rate": 0.00043857552983487787,
      "loss": 2.8145,
      "step": 79987
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0469024181365967,
      "learning_rate": 0.0004385719018107362,
      "loss": 3.1187,
      "step": 79988
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6158711910247803,
      "learning_rate": 0.0004385682737608313,
      "loss": 3.0241,
      "step": 79989
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6202735900878906,
      "learning_rate": 0.00043856464568516404,
      "loss": 2.9239,
      "step": 79990
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.780020833015442,
      "learning_rate": 0.00043856101758373494,
      "loss": 2.8406,
      "step": 79991
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.619870901107788,
      "learning_rate": 0.00043855738945654477,
      "loss": 3.0249,
      "step": 79992
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2563681602478027,
      "learning_rate": 0.00043855376130359417,
      "loss": 3.1372,
      "step": 79993
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8576401472091675,
      "learning_rate": 0.00043855013312488374,
      "loss": 2.9845,
      "step": 79994
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.740476369857788,
      "learning_rate": 0.0004385465049204143,
      "loss": 2.8603,
      "step": 79995
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.9028236865997314,
      "learning_rate": 0.0004385428766901864,
      "loss": 2.816,
      "step": 79996
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6556793451309204,
      "learning_rate": 0.00043853924843420083,
      "loss": 3.1551,
      "step": 79997
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7865248918533325,
      "learning_rate": 0.00043853562015245813,
      "loss": 2.9214,
      "step": 79998
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2226550579071045,
      "learning_rate": 0.00043853199184495906,
      "loss": 3.1681,
      "step": 79999
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.213663101196289,
      "learning_rate": 0.0004385283635117043,
      "loss": 3.2457,
      "step": 80000
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8052427768707275,
      "learning_rate": 0.00043852473515269445,
      "loss": 3.0451,
      "step": 80001
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7531307935714722,
      "learning_rate": 0.0004385211067679303,
      "loss": 2.8402,
      "step": 80002
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.611763834953308,
      "learning_rate": 0.0004385174783574124,
      "loss": 3.2118,
      "step": 80003
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8370035886764526,
      "learning_rate": 0.00043851384992114157,
      "loss": 3.0983,
      "step": 80004
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9720284938812256,
      "learning_rate": 0.0004385102214591183,
      "loss": 2.9592,
      "step": 80005
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9260637760162354,
      "learning_rate": 0.00043850659297134343,
      "loss": 2.922,
      "step": 80006
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.5351004600524902,
      "learning_rate": 0.0004385029644578176,
      "loss": 3.028,
      "step": 80007
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.615908622741699,
      "learning_rate": 0.00043849933591854143,
      "loss": 3.0251,
      "step": 80008
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.711921215057373,
      "learning_rate": 0.00043849570735351556,
      "loss": 2.9035,
      "step": 80009
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7802640199661255,
      "learning_rate": 0.00043849207876274076,
      "loss": 3.4279,
      "step": 80010
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2627294063568115,
      "learning_rate": 0.0004384884501462177,
      "loss": 2.8193,
      "step": 80011
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.694805383682251,
      "learning_rate": 0.000438484821503947,
      "loss": 3.1185,
      "step": 80012
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.469879150390625,
      "learning_rate": 0.0004384811928359293,
      "loss": 2.9576,
      "step": 80013
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6045665740966797,
      "learning_rate": 0.0004384775641421655,
      "loss": 2.698,
      "step": 80014
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5744513273239136,
      "learning_rate": 0.00043847393542265593,
      "loss": 2.9907,
      "step": 80015
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5614893436431885,
      "learning_rate": 0.00043847030667740146,
      "loss": 2.975,
      "step": 80016
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6279138326644897,
      "learning_rate": 0.0004384666779064028,
      "loss": 2.937,
      "step": 80017
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.592110514640808,
      "learning_rate": 0.0004384630491096606,
      "loss": 2.7943,
      "step": 80018
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.482399344444275,
      "learning_rate": 0.00043845942028717543,
      "loss": 2.8548,
      "step": 80019
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9848741292953491,
      "learning_rate": 0.0004384557914389481,
      "loss": 2.9465,
      "step": 80020
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8532582521438599,
      "learning_rate": 0.0004384521625649792,
      "loss": 3.0111,
      "step": 80021
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6875203847885132,
      "learning_rate": 0.0004384485336652694,
      "loss": 2.9502,
      "step": 80022
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.434738039970398,
      "learning_rate": 0.00043844490473981954,
      "loss": 3.0988,
      "step": 80023
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6740995645523071,
      "learning_rate": 0.00043844127578862997,
      "loss": 2.8364,
      "step": 80024
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4311134815216064,
      "learning_rate": 0.0004384376468117016,
      "loss": 3.0966,
      "step": 80025
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.3440302610397339,
      "learning_rate": 0.0004384340178090351,
      "loss": 3.1207,
      "step": 80026
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9768520593643188,
      "learning_rate": 0.00043843038878063113,
      "loss": 3.0127,
      "step": 80027
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4574111700057983,
      "learning_rate": 0.00043842675972649033,
      "loss": 2.8171,
      "step": 80028
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0260536670684814,
      "learning_rate": 0.00043842313064661333,
      "loss": 3.069,
      "step": 80029
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4646238088607788,
      "learning_rate": 0.0004384195015410009,
      "loss": 3.0455,
      "step": 80030
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7756308317184448,
      "learning_rate": 0.0004384158724096537,
      "loss": 2.987,
      "step": 80031
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4360589981079102,
      "learning_rate": 0.0004384122432525723,
      "loss": 3.0739,
      "step": 80032
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6957049369812012,
      "learning_rate": 0.0004384086140697574,
      "loss": 3.0017,
      "step": 80033
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5650535821914673,
      "learning_rate": 0.0004384049848612099,
      "loss": 2.8,
      "step": 80034
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6525276899337769,
      "learning_rate": 0.00043840135562693013,
      "loss": 3.055,
      "step": 80035
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.3727086782455444,
      "learning_rate": 0.000438397726366919,
      "loss": 2.8725,
      "step": 80036
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8274930715560913,
      "learning_rate": 0.00043839409708117724,
      "loss": 3.0923,
      "step": 80037
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4208357334136963,
      "learning_rate": 0.0004383904677697053,
      "loss": 3.0578,
      "step": 80038
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7057095766067505,
      "learning_rate": 0.0004383868384325039,
      "loss": 3.0205,
      "step": 80039
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4261767864227295,
      "learning_rate": 0.00043838320906957386,
      "loss": 3.2188,
      "step": 80040
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8423817157745361,
      "learning_rate": 0.0004383795796809157,
      "loss": 2.7672,
      "step": 80041
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.552268385887146,
      "learning_rate": 0.00043837595026653025,
      "loss": 3.0767,
      "step": 80042
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.787329912185669,
      "learning_rate": 0.00043837232082641803,
      "loss": 2.7334,
      "step": 80043
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5228345394134521,
      "learning_rate": 0.0004383686913605798,
      "loss": 2.9293,
      "step": 80044
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9409213066101074,
      "learning_rate": 0.0004383650618690163,
      "loss": 3.1416,
      "step": 80045
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7098044157028198,
      "learning_rate": 0.000438361432351728,
      "loss": 2.9341,
      "step": 80046
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0116379261016846,
      "learning_rate": 0.0004383578028087157,
      "loss": 2.8854,
      "step": 80047
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.404849052429199,
      "learning_rate": 0.00043835417323998015,
      "loss": 3.1016,
      "step": 80048
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.800575613975525,
      "learning_rate": 0.0004383505436455219,
      "loss": 3.0853,
      "step": 80049
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.856932520866394,
      "learning_rate": 0.0004383469140253417,
      "loss": 2.9016,
      "step": 80050
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7737706899642944,
      "learning_rate": 0.00043834328437944024,
      "loss": 2.8961,
      "step": 80051
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9462499618530273,
      "learning_rate": 0.0004383396547078181,
      "loss": 3.1029,
      "step": 80052
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0054819583892822,
      "learning_rate": 0.000438336025010476,
      "loss": 3.0919,
      "step": 80053
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7343212366104126,
      "learning_rate": 0.00043833239528741466,
      "loss": 3.0176,
      "step": 80054
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6960033178329468,
      "learning_rate": 0.00043832876553863464,
      "loss": 2.9386,
      "step": 80055
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.6305606365203857,
      "learning_rate": 0.00043832513576413674,
      "loss": 3.0437,
      "step": 80056
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.780423641204834,
      "learning_rate": 0.0004383215059639216,
      "loss": 3.2108,
      "step": 80057
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4803475141525269,
      "learning_rate": 0.00043831787613798993,
      "loss": 3.0322,
      "step": 80058
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7996548414230347,
      "learning_rate": 0.00043831424628634226,
      "loss": 3.1043,
      "step": 80059
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7593469619750977,
      "learning_rate": 0.0004383106164089794,
      "loss": 2.9768,
      "step": 80060
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6701523065567017,
      "learning_rate": 0.000438306986505902,
      "loss": 3.0494,
      "step": 80061
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7381826639175415,
      "learning_rate": 0.00043830335657711063,
      "loss": 3.0064,
      "step": 80062
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9214198589324951,
      "learning_rate": 0.00043829972662260617,
      "loss": 3.0954,
      "step": 80063
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.3656344413757324,
      "learning_rate": 0.0004382960966423891,
      "loss": 2.6666,
      "step": 80064
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2547028064727783,
      "learning_rate": 0.0004382924666364602,
      "loss": 2.8884,
      "step": 80065
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8608083724975586,
      "learning_rate": 0.0004382888366048202,
      "loss": 3.0936,
      "step": 80066
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5417851209640503,
      "learning_rate": 0.0004382852065474696,
      "loss": 3.0713,
      "step": 80067
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1673176288604736,
      "learning_rate": 0.0004382815764644092,
      "loss": 2.9399,
      "step": 80068
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6854091882705688,
      "learning_rate": 0.0004382779463556396,
      "loss": 3.1007,
      "step": 80069
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8208088874816895,
      "learning_rate": 0.0004382743162211616,
      "loss": 3.2276,
      "step": 80070
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8813821077346802,
      "learning_rate": 0.00043827068606097576,
      "loss": 3.1544,
      "step": 80071
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8735735416412354,
      "learning_rate": 0.00043826705587508277,
      "loss": 3.0016,
      "step": 80072
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.356900215148926,
      "learning_rate": 0.00043826342566348334,
      "loss": 3.0182,
      "step": 80073
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6109797954559326,
      "learning_rate": 0.0004382597954261781,
      "loss": 3.0638,
      "step": 80074
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6076380014419556,
      "learning_rate": 0.00043825616516316785,
      "loss": 3.1664,
      "step": 80075
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.446966290473938,
      "learning_rate": 0.0004382525348744531,
      "loss": 3.2655,
      "step": 80076
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.138343334197998,
      "learning_rate": 0.00043824890456003454,
      "loss": 3.2892,
      "step": 80077
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.875643014907837,
      "learning_rate": 0.000438245274219913,
      "loss": 3.0675,
      "step": 80078
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.76948082447052,
      "learning_rate": 0.000438241643854089,
      "loss": 2.8556,
      "step": 80079
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.884162664413452,
      "learning_rate": 0.00043823801346256323,
      "loss": 3.1392,
      "step": 80080
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.515753984451294,
      "learning_rate": 0.00043823438304533646,
      "loss": 3.0853,
      "step": 80081
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4902007579803467,
      "learning_rate": 0.00043823075260240933,
      "loss": 2.8575,
      "step": 80082
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.519606351852417,
      "learning_rate": 0.0004382271221337824,
      "loss": 2.9834,
      "step": 80083
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.57332706451416,
      "learning_rate": 0.0004382234916394566,
      "loss": 3.2851,
      "step": 80084
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6716179847717285,
      "learning_rate": 0.00043821986111943227,
      "loss": 3.1025,
      "step": 80085
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4160428047180176,
      "learning_rate": 0.0004382162305737103,
      "loss": 3.2174,
      "step": 80086
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1856281757354736,
      "learning_rate": 0.00043821260000229145,
      "loss": 3.0058,
      "step": 80087
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2773938179016113,
      "learning_rate": 0.00043820896940517613,
      "loss": 2.8577,
      "step": 80088
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5409358739852905,
      "learning_rate": 0.00043820533878236517,
      "loss": 2.9097,
      "step": 80089
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.520125389099121,
      "learning_rate": 0.00043820170813385926,
      "loss": 2.9708,
      "step": 80090
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7448323965072632,
      "learning_rate": 0.00043819807745965907,
      "loss": 2.8896,
      "step": 80091
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.3768736124038696,
      "learning_rate": 0.0004381944467597652,
      "loss": 3.1427,
      "step": 80092
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4943972826004028,
      "learning_rate": 0.0004381908160341784,
      "loss": 2.9556,
      "step": 80093
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6619888544082642,
      "learning_rate": 0.00043818718528289936,
      "loss": 2.9393,
      "step": 80094
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4612040519714355,
      "learning_rate": 0.0004381835545059286,
      "loss": 2.8974,
      "step": 80095
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.746155023574829,
      "learning_rate": 0.000438179923703267,
      "loss": 3.1397,
      "step": 80096
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5124084949493408,
      "learning_rate": 0.00043817629287491516,
      "loss": 3.0145,
      "step": 80097
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.433031439781189,
      "learning_rate": 0.0004381726620208737,
      "loss": 3.1592,
      "step": 80098
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.513237714767456,
      "learning_rate": 0.00043816903114114335,
      "loss": 3.0307,
      "step": 80099
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.375227928161621,
      "learning_rate": 0.00043816540023572473,
      "loss": 2.9253,
      "step": 80100
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1759555339813232,
      "learning_rate": 0.00043816176930461855,
      "loss": 3.1859,
      "step": 80101
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.011857748031616,
      "learning_rate": 0.0004381581383478255,
      "loss": 3.0709,
      "step": 80102
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2022337913513184,
      "learning_rate": 0.0004381545073653463,
      "loss": 2.8968,
      "step": 80103
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.5745625495910645,
      "learning_rate": 0.0004381508763571815,
      "loss": 2.5915,
      "step": 80104
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.463200330734253,
      "learning_rate": 0.0004381472453233318,
      "loss": 3.1553,
      "step": 80105
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5811872482299805,
      "learning_rate": 0.000438143614263798,
      "loss": 3.0227,
      "step": 80106
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.4928195476531982,
      "learning_rate": 0.00043813998317858075,
      "loss": 3.0923,
      "step": 80107
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5207034349441528,
      "learning_rate": 0.00043813635206768056,
      "loss": 3.1184,
      "step": 80108
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8320281505584717,
      "learning_rate": 0.0004381327209310983,
      "loss": 3.0319,
      "step": 80109
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1234474182128906,
      "learning_rate": 0.0004381290897688345,
      "loss": 2.9452,
      "step": 80110
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5966836214065552,
      "learning_rate": 0.0004381254585808899,
      "loss": 3.2376,
      "step": 80111
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.594949722290039,
      "learning_rate": 0.0004381218273672652,
      "loss": 3.0356,
      "step": 80112
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.528536796569824,
      "learning_rate": 0.0004381181961279611,
      "loss": 3.2086,
      "step": 80113
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7303118705749512,
      "learning_rate": 0.0004381145648629781,
      "loss": 2.6722,
      "step": 80114
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7242554426193237,
      "learning_rate": 0.00043811093357231707,
      "loss": 2.9966,
      "step": 80115
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2335493564605713,
      "learning_rate": 0.00043810730225597857,
      "loss": 3.2681,
      "step": 80116
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.568100690841675,
      "learning_rate": 0.0004381036709139633,
      "loss": 3.1831,
      "step": 80117
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.565303087234497,
      "learning_rate": 0.00043810003954627204,
      "loss": 2.8126,
      "step": 80118
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6070408821105957,
      "learning_rate": 0.00043809640815290526,
      "loss": 2.8733,
      "step": 80119
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.430802583694458,
      "learning_rate": 0.00043809277673386376,
      "loss": 2.9888,
      "step": 80120
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.06209135055542,
      "learning_rate": 0.0004380891452891483,
      "loss": 3.147,
      "step": 80121
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.667052149772644,
      "learning_rate": 0.00043808551381875946,
      "loss": 2.9206,
      "step": 80122
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.5137975215911865,
      "learning_rate": 0.00043808188232269786,
      "loss": 3.1002,
      "step": 80123
    },
    {
      "epoch": 1.04,
      "grad_norm": 4.5402045249938965,
      "learning_rate": 0.0004380782508009642,
      "loss": 3.0304,
      "step": 80124
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2375950813293457,
      "learning_rate": 0.0004380746192535592,
      "loss": 3.0876,
      "step": 80125
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6176185607910156,
      "learning_rate": 0.0004380709876804836,
      "loss": 2.9763,
      "step": 80126
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1927309036254883,
      "learning_rate": 0.0004380673560817379,
      "loss": 3.0727,
      "step": 80127
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.114837646484375,
      "learning_rate": 0.0004380637244573229,
      "loss": 3.1659,
      "step": 80128
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.6673150062561035,
      "learning_rate": 0.0004380600928072393,
      "loss": 2.947,
      "step": 80129
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.618870496749878,
      "learning_rate": 0.00043805646113148774,
      "loss": 2.8885,
      "step": 80130
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6570366621017456,
      "learning_rate": 0.00043805282943006876,
      "loss": 3.0902,
      "step": 80131
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.4925105571746826,
      "learning_rate": 0.00043804919770298325,
      "loss": 2.8752,
      "step": 80132
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.964329957962036,
      "learning_rate": 0.0004380455659502318,
      "loss": 3.0145,
      "step": 80133
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.967055082321167,
      "learning_rate": 0.000438041934171815,
      "loss": 3.0389,
      "step": 80134
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8357945680618286,
      "learning_rate": 0.0004380383023677336,
      "loss": 3.1712,
      "step": 80135
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.5966453552246094,
      "learning_rate": 0.0004380346705379884,
      "loss": 3.0628,
      "step": 80136
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.7782185077667236,
      "learning_rate": 0.00043803103868257984,
      "loss": 3.0007,
      "step": 80137
    },
    {
      "epoch": 1.04,
      "grad_norm": 4.11760950088501,
      "learning_rate": 0.00043802740680150867,
      "loss": 2.9684,
      "step": 80138
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6212607622146606,
      "learning_rate": 0.00043802377489477563,
      "loss": 3.0622,
      "step": 80139
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.8966286182403564,
      "learning_rate": 0.0004380201429623814,
      "loss": 3.0259,
      "step": 80140
    },
    {
      "epoch": 1.04,
      "grad_norm": 5.135875701904297,
      "learning_rate": 0.00043801651100432664,
      "loss": 2.9857,
      "step": 80141
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.7810540199279785,
      "learning_rate": 0.000438012879020612,
      "loss": 3.1689,
      "step": 80142
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.4119975566864014,
      "learning_rate": 0.00043800924701123807,
      "loss": 3.0767,
      "step": 80143
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2613322734832764,
      "learning_rate": 0.0004380056149762057,
      "loss": 2.9449,
      "step": 80144
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1282668113708496,
      "learning_rate": 0.00043800198291551546,
      "loss": 3.1588,
      "step": 80145
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.3756308555603027,
      "learning_rate": 0.00043799835082916806,
      "loss": 3.1471,
      "step": 80146
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.949552059173584,
      "learning_rate": 0.0004379947187171641,
      "loss": 2.9092,
      "step": 80147
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0247395038604736,
      "learning_rate": 0.0004379910865795044,
      "loss": 2.9301,
      "step": 80148
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8961261510849,
      "learning_rate": 0.0004379874544161895,
      "loss": 2.9215,
      "step": 80149
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.040027141571045,
      "learning_rate": 0.00043798382222722015,
      "loss": 2.7298,
      "step": 80150
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7055128812789917,
      "learning_rate": 0.000437980190012597,
      "loss": 2.5856,
      "step": 80151
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9487428665161133,
      "learning_rate": 0.0004379765577723207,
      "loss": 2.6952,
      "step": 80152
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7770546674728394,
      "learning_rate": 0.00043797292550639197,
      "loss": 3.3346,
      "step": 80153
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.3711540699005127,
      "learning_rate": 0.0004379692932148115,
      "loss": 3.0685,
      "step": 80154
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8935418128967285,
      "learning_rate": 0.0004379656608975799,
      "loss": 3.2876,
      "step": 80155
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0532941818237305,
      "learning_rate": 0.0004379620285546979,
      "loss": 3.1393,
      "step": 80156
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9730892181396484,
      "learning_rate": 0.00043795839618616613,
      "loss": 2.7654,
      "step": 80157
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5612058639526367,
      "learning_rate": 0.0004379547637919853,
      "loss": 2.8896,
      "step": 80158
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4781599044799805,
      "learning_rate": 0.0004379511313721561,
      "loss": 3.2804,
      "step": 80159
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7099764347076416,
      "learning_rate": 0.00043794749892667916,
      "loss": 2.8845,
      "step": 80160
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.307115077972412,
      "learning_rate": 0.00043794386645555514,
      "loss": 3.2196,
      "step": 80161
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6378885507583618,
      "learning_rate": 0.00043794023395878485,
      "loss": 2.7708,
      "step": 80162
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5456959009170532,
      "learning_rate": 0.0004379366014363688,
      "loss": 2.9206,
      "step": 80163
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2296950817108154,
      "learning_rate": 0.0004379329688883078,
      "loss": 2.8751,
      "step": 80164
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9278095960617065,
      "learning_rate": 0.0004379293363146023,
      "loss": 2.8597,
      "step": 80165
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5089987516403198,
      "learning_rate": 0.00043792570371525326,
      "loss": 3.1417,
      "step": 80166
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.309574604034424,
      "learning_rate": 0.00043792207109026116,
      "loss": 2.8865,
      "step": 80167
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0231950283050537,
      "learning_rate": 0.00043791843843962676,
      "loss": 2.8631,
      "step": 80168
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4858248233795166,
      "learning_rate": 0.00043791480576335076,
      "loss": 3.3256,
      "step": 80169
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.950932264328003,
      "learning_rate": 0.00043791117306143376,
      "loss": 2.8724,
      "step": 80170
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.027859687805176,
      "learning_rate": 0.00043790754033387646,
      "loss": 2.8667,
      "step": 80171
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.100216865539551,
      "learning_rate": 0.0004379039075806795,
      "loss": 2.9764,
      "step": 80172
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.822728157043457,
      "learning_rate": 0.0004379002748018438,
      "loss": 2.9653,
      "step": 80173
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1305766105651855,
      "learning_rate": 0.00043789664199736966,
      "loss": 3.0985,
      "step": 80174
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.13150954246521,
      "learning_rate": 0.00043789300916725795,
      "loss": 2.6697,
      "step": 80175
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2908449172973633,
      "learning_rate": 0.00043788937631150936,
      "loss": 2.8571,
      "step": 80176
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4820103645324707,
      "learning_rate": 0.00043788574343012454,
      "loss": 2.7296,
      "step": 80177
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6045140027999878,
      "learning_rate": 0.00043788211052310413,
      "loss": 2.8963,
      "step": 80178
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.242102861404419,
      "learning_rate": 0.00043787847759044885,
      "loss": 3.0885,
      "step": 80179
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.076295852661133,
      "learning_rate": 0.00043787484463215934,
      "loss": 3.015,
      "step": 80180
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0370266437530518,
      "learning_rate": 0.0004378712116482363,
      "loss": 3.0465,
      "step": 80181
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6571085453033447,
      "learning_rate": 0.00043786757863868047,
      "loss": 3.0547,
      "step": 80182
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8956258296966553,
      "learning_rate": 0.0004378639456034923,
      "loss": 3.1389,
      "step": 80183
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.941775918006897,
      "learning_rate": 0.0004378603125426727,
      "loss": 2.9836,
      "step": 80184
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7029814720153809,
      "learning_rate": 0.0004378566794562224,
      "loss": 3.0044,
      "step": 80185
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5045832395553589,
      "learning_rate": 0.0004378530463441417,
      "loss": 3.049,
      "step": 80186
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2905189990997314,
      "learning_rate": 0.0004378494132064316,
      "loss": 3.2403,
      "step": 80187
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.720816731452942,
      "learning_rate": 0.0004378457800430928,
      "loss": 3.2334,
      "step": 80188
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.78681480884552,
      "learning_rate": 0.0004378421468541258,
      "loss": 2.9267,
      "step": 80189
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9257789850234985,
      "learning_rate": 0.00043783851363953126,
      "loss": 2.9962,
      "step": 80190
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7760297060012817,
      "learning_rate": 0.00043783488039931005,
      "loss": 3.0219,
      "step": 80191
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6378824710845947,
      "learning_rate": 0.00043783124713346274,
      "loss": 3.019,
      "step": 80192
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.472127079963684,
      "learning_rate": 0.00043782761384198994,
      "loss": 3.2121,
      "step": 80193
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7193769216537476,
      "learning_rate": 0.0004378239805248924,
      "loss": 2.8266,
      "step": 80194
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.684225559234619,
      "learning_rate": 0.00043782034718217077,
      "loss": 2.8676,
      "step": 80195
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.4308385848999023,
      "learning_rate": 0.0004378167138138257,
      "loss": 3.1931,
      "step": 80196
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7518422603607178,
      "learning_rate": 0.000437813080419858,
      "loss": 3.114,
      "step": 80197
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8179839849472046,
      "learning_rate": 0.0004378094470002682,
      "loss": 3.1878,
      "step": 80198
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7389777898788452,
      "learning_rate": 0.00043780581355505697,
      "loss": 3.1029,
      "step": 80199
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5120254755020142,
      "learning_rate": 0.00043780218008422514,
      "loss": 3.0583,
      "step": 80200
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.796074390411377,
      "learning_rate": 0.0004377985465877732,
      "loss": 2.8954,
      "step": 80201
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6845365762710571,
      "learning_rate": 0.0004377949130657019,
      "loss": 2.8851,
      "step": 80202
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6286749839782715,
      "learning_rate": 0.00043779127951801207,
      "loss": 3.1476,
      "step": 80203
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.3584438562393188,
      "learning_rate": 0.00043778764594470406,
      "loss": 3.0981,
      "step": 80204
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7550305128097534,
      "learning_rate": 0.0004377840123457788,
      "loss": 2.9532,
      "step": 80205
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6916035413742065,
      "learning_rate": 0.0004377803787212369,
      "loss": 2.8267,
      "step": 80206
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.588971734046936,
      "learning_rate": 0.00043777674507107907,
      "loss": 3.1981,
      "step": 80207
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8471781015396118,
      "learning_rate": 0.0004377731113953058,
      "loss": 2.9821,
      "step": 80208
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.385571002960205,
      "learning_rate": 0.00043776947769391797,
      "loss": 2.7721,
      "step": 80209
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.552587628364563,
      "learning_rate": 0.00043776584396691627,
      "loss": 2.9326,
      "step": 80210
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.640804409980774,
      "learning_rate": 0.0004377622102143013,
      "loss": 3.0917,
      "step": 80211
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7403442859649658,
      "learning_rate": 0.00043775857643607363,
      "loss": 3.0273,
      "step": 80212
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.880130648612976,
      "learning_rate": 0.0004377549426322341,
      "loss": 2.8879,
      "step": 80213
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.776884913444519,
      "learning_rate": 0.0004377513088027833,
      "loss": 2.9059,
      "step": 80214
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.4337472915649414,
      "learning_rate": 0.00043774767494772195,
      "loss": 3.1354,
      "step": 80215
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.514080286026001,
      "learning_rate": 0.0004377440410670507,
      "loss": 3.134,
      "step": 80216
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7009727954864502,
      "learning_rate": 0.0004377404071607702,
      "loss": 2.9612,
      "step": 80217
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7090177536010742,
      "learning_rate": 0.0004377367732288813,
      "loss": 3.1321,
      "step": 80218
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5305060148239136,
      "learning_rate": 0.00043773313927138435,
      "loss": 3.077,
      "step": 80219
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.919480562210083,
      "learning_rate": 0.00043772950528828033,
      "loss": 3.1535,
      "step": 80220
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7348500490188599,
      "learning_rate": 0.0004377258712795698,
      "loss": 2.9581,
      "step": 80221
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6413379907608032,
      "learning_rate": 0.0004377222372452533,
      "loss": 3.3912,
      "step": 80222
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.8706520795822144,
      "learning_rate": 0.0004377186031853318,
      "loss": 3.2807,
      "step": 80223
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0698702335357666,
      "learning_rate": 0.00043771496909980574,
      "loss": 2.9009,
      "step": 80224
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.855265498161316,
      "learning_rate": 0.00043771133498867577,
      "loss": 2.9298,
      "step": 80225
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6249626874923706,
      "learning_rate": 0.0004377077008519428,
      "loss": 2.9189,
      "step": 80226
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6172045469284058,
      "learning_rate": 0.00043770406668960736,
      "loss": 2.9926,
      "step": 80227
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7031915187835693,
      "learning_rate": 0.00043770043250167007,
      "loss": 2.7758,
      "step": 80228
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.734668254852295,
      "learning_rate": 0.0004376967982881317,
      "loss": 3.1885,
      "step": 80229
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5692451000213623,
      "learning_rate": 0.00043769316404899294,
      "loss": 3.3587,
      "step": 80230
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7036737203598022,
      "learning_rate": 0.0004376895297842544,
      "loss": 2.8686,
      "step": 80231
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6234396696090698,
      "learning_rate": 0.0004376858954939167,
      "loss": 3.0776,
      "step": 80232
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.668670892715454,
      "learning_rate": 0.0004376822611779807,
      "loss": 2.8645,
      "step": 80233
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5332783460617065,
      "learning_rate": 0.00043767862683644684,
      "loss": 3.1502,
      "step": 80234
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.65521240234375,
      "learning_rate": 0.000437674992469316,
      "loss": 3.1584,
      "step": 80235
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.6780320405960083,
      "learning_rate": 0.0004376713580765888,
      "loss": 2.9228,
      "step": 80236
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5917600393295288,
      "learning_rate": 0.0004376677236582659,
      "loss": 2.7798,
      "step": 80237
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7204855680465698,
      "learning_rate": 0.0004376640892143479,
      "loss": 2.8421,
      "step": 80238
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7235066890716553,
      "learning_rate": 0.0004376604547448356,
      "loss": 2.9338,
      "step": 80239
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.4219425916671753,
      "learning_rate": 0.0004376568202497297,
      "loss": 2.8333,
      "step": 80240
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.592365026473999,
      "learning_rate": 0.0004376531857290307,
      "loss": 2.8903,
      "step": 80241
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9978264570236206,
      "learning_rate": 0.00043764955118273934,
      "loss": 2.7101,
      "step": 80242
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.706581473350525,
      "learning_rate": 0.00043764591661085646,
      "loss": 2.9601,
      "step": 80243
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.293229818344116,
      "learning_rate": 0.0004376422820133825,
      "loss": 3.0765,
      "step": 80244
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2068324089050293,
      "learning_rate": 0.0004376386473903183,
      "loss": 3.0873,
      "step": 80245
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.332765817642212,
      "learning_rate": 0.0004376350127416644,
      "loss": 2.9142,
      "step": 80246
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.3392772674560547,
      "learning_rate": 0.0004376313780674216,
      "loss": 2.974,
      "step": 80247
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.5865094661712646,
      "learning_rate": 0.0004376277433675905,
      "loss": 3.23,
      "step": 80248
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.0878586769104004,
      "learning_rate": 0.0004376241086421719,
      "loss": 2.9219,
      "step": 80249
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5898096561431885,
      "learning_rate": 0.0004376204738911663,
      "loss": 2.9153,
      "step": 80250
    },
    {
      "epoch": 1.04,
      "grad_norm": 3.1558547019958496,
      "learning_rate": 0.0004376168391145745,
      "loss": 3.2052,
      "step": 80251
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.845924139022827,
      "learning_rate": 0.0004376132043123971,
      "loss": 3.075,
      "step": 80252
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.9629192352294922,
      "learning_rate": 0.0004376095694846348,
      "loss": 2.9212,
      "step": 80253
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.5391236543655396,
      "learning_rate": 0.00043760593463128825,
      "loss": 3.1909,
      "step": 80254
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.2172763347625732,
      "learning_rate": 0.0004376022997523583,
      "loss": 3.0859,
      "step": 80255
    },
    {
      "epoch": 1.04,
      "grad_norm": 2.1793692111968994,
      "learning_rate": 0.00043759866484784527,
      "loss": 3.0176,
      "step": 80256
    },
    {
      "epoch": 1.04,
      "grad_norm": 1.7808198928833008,
      "learning_rate": 0.00043759502991775023,
      "loss": 2.9812,
      "step": 80257
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.4843015670776367,
      "learning_rate": 0.00043759139496207366,
      "loss": 2.9104,
      "step": 80258
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.420459508895874,
      "learning_rate": 0.0004375877599808162,
      "loss": 2.8694,
      "step": 80259
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.150120496749878,
      "learning_rate": 0.0004375841249739785,
      "loss": 3.0521,
      "step": 80260
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.3523740768432617,
      "learning_rate": 0.00043758048994156146,
      "loss": 3.2559,
      "step": 80261
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.672310471534729,
      "learning_rate": 0.00043757685488356553,
      "loss": 3.1102,
      "step": 80262
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.235398292541504,
      "learning_rate": 0.0004375732197999915,
      "loss": 2.8729,
      "step": 80263
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5678004026412964,
      "learning_rate": 0.00043756958469084,
      "loss": 3.0857,
      "step": 80264
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8063468933105469,
      "learning_rate": 0.0004375659495561117,
      "loss": 3.0016,
      "step": 80265
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.549566626548767,
      "learning_rate": 0.00043756231439580725,
      "loss": 2.9798,
      "step": 80266
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5743545293807983,
      "learning_rate": 0.0004375586792099275,
      "loss": 3.1421,
      "step": 80267
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6688348054885864,
      "learning_rate": 0.0004375550439984729,
      "loss": 2.8579,
      "step": 80268
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0553646087646484,
      "learning_rate": 0.00043755140876144423,
      "loss": 2.9991,
      "step": 80269
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5308032035827637,
      "learning_rate": 0.0004375477734988422,
      "loss": 2.884,
      "step": 80270
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.424860954284668,
      "learning_rate": 0.0004375441382106674,
      "loss": 3.3145,
      "step": 80271
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.880025029182434,
      "learning_rate": 0.0004375405028969206,
      "loss": 3.1311,
      "step": 80272
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.806853175163269,
      "learning_rate": 0.0004375368675576024,
      "loss": 3.0273,
      "step": 80273
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9348000288009644,
      "learning_rate": 0.0004375332321927134,
      "loss": 2.9335,
      "step": 80274
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0868029594421387,
      "learning_rate": 0.00043752959680225446,
      "loss": 3.2152,
      "step": 80275
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.026291608810425,
      "learning_rate": 0.00043752596138622624,
      "loss": 3.1818,
      "step": 80276
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7689300775527954,
      "learning_rate": 0.0004375223259446293,
      "loss": 2.8852,
      "step": 80277
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.689854621887207,
      "learning_rate": 0.0004375186904774643,
      "loss": 2.906,
      "step": 80278
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6794394254684448,
      "learning_rate": 0.0004375150549847321,
      "loss": 2.7951,
      "step": 80279
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8144060373306274,
      "learning_rate": 0.0004375114194664331,
      "loss": 3.2515,
      "step": 80280
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8752590417861938,
      "learning_rate": 0.00043750778392256825,
      "loss": 3.3346,
      "step": 80281
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.2818663120269775,
      "learning_rate": 0.00043750414835313806,
      "loss": 2.9152,
      "step": 80282
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6167317628860474,
      "learning_rate": 0.0004375005127581433,
      "loss": 2.8027,
      "step": 80283
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6644259691238403,
      "learning_rate": 0.00043749687713758446,
      "loss": 3.1359,
      "step": 80284
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5629783868789673,
      "learning_rate": 0.0004374932414914625,
      "loss": 3.0798,
      "step": 80285
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7300541400909424,
      "learning_rate": 0.00043748960581977786,
      "loss": 2.8204,
      "step": 80286
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5607819557189941,
      "learning_rate": 0.0004374859701225314,
      "loss": 3.0705,
      "step": 80287
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6059337854385376,
      "learning_rate": 0.0004374823343997237,
      "loss": 2.97,
      "step": 80288
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7348406314849854,
      "learning_rate": 0.00043747869865135534,
      "loss": 3.0855,
      "step": 80289
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0862224102020264,
      "learning_rate": 0.00043747506287742716,
      "loss": 2.998,
      "step": 80290
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.650109052658081,
      "learning_rate": 0.0004374714270779398,
      "loss": 3.183,
      "step": 80291
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.806634545326233,
      "learning_rate": 0.00043746779125289386,
      "loss": 2.9533,
      "step": 80292
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6944842338562012,
      "learning_rate": 0.00043746415540229003,
      "loss": 2.855,
      "step": 80293
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.2887873649597168,
      "learning_rate": 0.00043746051952612907,
      "loss": 2.9304,
      "step": 80294
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.023806095123291,
      "learning_rate": 0.0004374568836244116,
      "loss": 3.1026,
      "step": 80295
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.454856276512146,
      "learning_rate": 0.0004374532476971383,
      "loss": 3.0375,
      "step": 80296
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.905957818031311,
      "learning_rate": 0.0004374496117443098,
      "loss": 3.1928,
      "step": 80297
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7370102405548096,
      "learning_rate": 0.00043744597576592684,
      "loss": 3.1486,
      "step": 80298
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5661290884017944,
      "learning_rate": 0.00043744233976199015,
      "loss": 3.0534,
      "step": 80299
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.164451837539673,
      "learning_rate": 0.0004374387037325003,
      "loss": 3.2574,
      "step": 80300
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8170812129974365,
      "learning_rate": 0.00043743506767745795,
      "loss": 3.0678,
      "step": 80301
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5671367645263672,
      "learning_rate": 0.00043743143159686394,
      "loss": 2.9046,
      "step": 80302
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.5172624588012695,
      "learning_rate": 0.0004374277954907187,
      "loss": 2.9555,
      "step": 80303
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.8014650344848633,
      "learning_rate": 0.0004374241593590231,
      "loss": 3.3522,
      "step": 80304
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.008328914642334,
      "learning_rate": 0.00043742052320177774,
      "loss": 3.0084,
      "step": 80305
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.175630569458008,
      "learning_rate": 0.0004374168870189833,
      "loss": 3.05,
      "step": 80306
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.791424036026001,
      "learning_rate": 0.0004374132508106405,
      "loss": 2.9558,
      "step": 80307
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.606839895248413,
      "learning_rate": 0.00043740961457675,
      "loss": 3.2341,
      "step": 80308
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6236367225646973,
      "learning_rate": 0.0004374059783173124,
      "loss": 3.1585,
      "step": 80309
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.855860471725464,
      "learning_rate": 0.0004374023420323285,
      "loss": 3.057,
      "step": 80310
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7855523824691772,
      "learning_rate": 0.0004373987057217988,
      "loss": 2.8602,
      "step": 80311
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6765810251235962,
      "learning_rate": 0.00043739506938572414,
      "loss": 3.0314,
      "step": 80312
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1069116592407227,
      "learning_rate": 0.0004373914330241052,
      "loss": 2.9217,
      "step": 80313
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.189709186553955,
      "learning_rate": 0.0004373877966369425,
      "loss": 2.7867,
      "step": 80314
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.402281403541565,
      "learning_rate": 0.0004373841602242369,
      "loss": 3.1367,
      "step": 80315
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8204580545425415,
      "learning_rate": 0.000437380523785989,
      "loss": 3.2906,
      "step": 80316
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.940417766571045,
      "learning_rate": 0.00043737688732219945,
      "loss": 2.6858,
      "step": 80317
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1564135551452637,
      "learning_rate": 0.0004373732508328689,
      "loss": 2.8607,
      "step": 80318
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4841688871383667,
      "learning_rate": 0.00043736961431799817,
      "loss": 3.0278,
      "step": 80319
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4820287227630615,
      "learning_rate": 0.00043736597777758774,
      "loss": 3.1303,
      "step": 80320
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4724401235580444,
      "learning_rate": 0.0004373623412116384,
      "loss": 3.1158,
      "step": 80321
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5204273462295532,
      "learning_rate": 0.00043735870462015085,
      "loss": 3.1316,
      "step": 80322
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0649659633636475,
      "learning_rate": 0.00043735506800312565,
      "loss": 3.0478,
      "step": 80323
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.191697835922241,
      "learning_rate": 0.0004373514313605636,
      "loss": 2.9617,
      "step": 80324
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4359519481658936,
      "learning_rate": 0.00043734779469246536,
      "loss": 2.9243,
      "step": 80325
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.121969223022461,
      "learning_rate": 0.00043734415799883146,
      "loss": 3.2036,
      "step": 80326
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.911195158958435,
      "learning_rate": 0.0004373405212796628,
      "loss": 3.1248,
      "step": 80327
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6635992527008057,
      "learning_rate": 0.00043733688453495995,
      "loss": 3.0205,
      "step": 80328
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.633069634437561,
      "learning_rate": 0.00043733324776472354,
      "loss": 2.7145,
      "step": 80329
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6096612215042114,
      "learning_rate": 0.00043732961096895417,
      "loss": 2.8924,
      "step": 80330
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7906239032745361,
      "learning_rate": 0.0004373259741476528,
      "loss": 2.9415,
      "step": 80331
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.801061987876892,
      "learning_rate": 0.00043732233730081984,
      "loss": 3.0454,
      "step": 80332
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.848771095275879,
      "learning_rate": 0.00043731870042845614,
      "loss": 2.9837,
      "step": 80333
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6516051292419434,
      "learning_rate": 0.0004373150635305623,
      "loss": 2.8841,
      "step": 80334
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.165940523147583,
      "learning_rate": 0.0004373114266071389,
      "loss": 3.145,
      "step": 80335
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.2275912761688232,
      "learning_rate": 0.0004373077896581868,
      "loss": 2.914,
      "step": 80336
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8504762649536133,
      "learning_rate": 0.0004373041526837066,
      "loss": 2.979,
      "step": 80337
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5977298021316528,
      "learning_rate": 0.0004373005156836989,
      "loss": 3.2377,
      "step": 80338
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6969298124313354,
      "learning_rate": 0.0004372968786581645,
      "loss": 3.1178,
      "step": 80339
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.153852701187134,
      "learning_rate": 0.00043729324160710406,
      "loss": 3.1407,
      "step": 80340
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6257983446121216,
      "learning_rate": 0.00043728960453051814,
      "loss": 3.1347,
      "step": 80341
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.460472822189331,
      "learning_rate": 0.00043728596742840744,
      "loss": 2.9599,
      "step": 80342
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.683716058731079,
      "learning_rate": 0.0004372823303007728,
      "loss": 2.9642,
      "step": 80343
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1897330284118652,
      "learning_rate": 0.0004372786931476147,
      "loss": 2.9537,
      "step": 80344
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.3276824951171875,
      "learning_rate": 0.0004372750559689339,
      "loss": 3.1,
      "step": 80345
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5514572858810425,
      "learning_rate": 0.00043727141876473115,
      "loss": 2.9618,
      "step": 80346
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.8486948013305664,
      "learning_rate": 0.000437267781535007,
      "loss": 2.7561,
      "step": 80347
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.5098876953125,
      "learning_rate": 0.0004372641442797622,
      "loss": 3.06,
      "step": 80348
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1185929775238037,
      "learning_rate": 0.00043726050699899744,
      "loss": 3.0595,
      "step": 80349
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.029121160507202,
      "learning_rate": 0.00043725686969271326,
      "loss": 3.0678,
      "step": 80350
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1064815521240234,
      "learning_rate": 0.0004372532323609105,
      "loss": 3.069,
      "step": 80351
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6503076553344727,
      "learning_rate": 0.0004372495950035898,
      "loss": 2.7446,
      "step": 80352
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5779842138290405,
      "learning_rate": 0.00043724595762075174,
      "loss": 3.1505,
      "step": 80353
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.358297824859619,
      "learning_rate": 0.00043724232021239703,
      "loss": 3.0108,
      "step": 80354
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.006160259246826,
      "learning_rate": 0.00043723868277852654,
      "loss": 2.9157,
      "step": 80355
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7501850128173828,
      "learning_rate": 0.0004372350453191407,
      "loss": 2.8592,
      "step": 80356
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8538119792938232,
      "learning_rate": 0.00043723140783424023,
      "loss": 3.0196,
      "step": 80357
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.041477918624878,
      "learning_rate": 0.00043722777032382593,
      "loss": 3.0519,
      "step": 80358
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.054337978363037,
      "learning_rate": 0.00043722413278789835,
      "loss": 2.8977,
      "step": 80359
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.377941370010376,
      "learning_rate": 0.0004372204952264582,
      "loss": 3.0028,
      "step": 80360
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.507695198059082,
      "learning_rate": 0.0004372168576395062,
      "loss": 3.0579,
      "step": 80361
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6624318361282349,
      "learning_rate": 0.000437213220027043,
      "loss": 2.9124,
      "step": 80362
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.838686943054199,
      "learning_rate": 0.00043720958238906924,
      "loss": 2.9258,
      "step": 80363
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.547566533088684,
      "learning_rate": 0.0004372059447255857,
      "loss": 2.8522,
      "step": 80364
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6306453943252563,
      "learning_rate": 0.00043720230703659296,
      "loss": 2.9816,
      "step": 80365
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.346834182739258,
      "learning_rate": 0.00043719866932209164,
      "loss": 2.9679,
      "step": 80366
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.2090158462524414,
      "learning_rate": 0.00043719503158208263,
      "loss": 3.0957,
      "step": 80367
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5838966369628906,
      "learning_rate": 0.0004371913938165664,
      "loss": 3.2416,
      "step": 80368
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5237646102905273,
      "learning_rate": 0.00043718775602554367,
      "loss": 3.0121,
      "step": 80369
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.3424617052078247,
      "learning_rate": 0.0004371841182090152,
      "loss": 3.019,
      "step": 80370
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7895969152450562,
      "learning_rate": 0.00043718048036698156,
      "loss": 3.2202,
      "step": 80371
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.777312994003296,
      "learning_rate": 0.0004371768424994435,
      "loss": 2.8668,
      "step": 80372
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.597743272781372,
      "learning_rate": 0.0004371732046064017,
      "loss": 2.8881,
      "step": 80373
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.8301401138305664,
      "learning_rate": 0.0004371695666878568,
      "loss": 2.9684,
      "step": 80374
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.6118619441986084,
      "learning_rate": 0.00043716592874380944,
      "loss": 3.1055,
      "step": 80375
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.226562023162842,
      "learning_rate": 0.0004371622907742604,
      "loss": 3.2649,
      "step": 80376
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8038232326507568,
      "learning_rate": 0.00043715865277921034,
      "loss": 3.0849,
      "step": 80377
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8744062185287476,
      "learning_rate": 0.00043715501475865984,
      "loss": 2.8478,
      "step": 80378
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.133411169052124,
      "learning_rate": 0.0004371513767126096,
      "loss": 2.9758,
      "step": 80379
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8119010925292969,
      "learning_rate": 0.0004371477386410604,
      "loss": 2.8966,
      "step": 80380
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.666210651397705,
      "learning_rate": 0.0004371441005440128,
      "loss": 2.7841,
      "step": 80381
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.316226005554199,
      "learning_rate": 0.0004371404624214676,
      "loss": 2.9139,
      "step": 80382
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.2305572032928467,
      "learning_rate": 0.0004371368242734253,
      "loss": 3.3753,
      "step": 80383
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.2359421253204346,
      "learning_rate": 0.0004371331860998867,
      "loss": 2.8967,
      "step": 80384
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9880707263946533,
      "learning_rate": 0.0004371295479008525,
      "loss": 3.0512,
      "step": 80385
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.7187695503234863,
      "learning_rate": 0.00043712590967632327,
      "loss": 3.058,
      "step": 80386
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.6551947593688965,
      "learning_rate": 0.00043712227142629975,
      "loss": 2.7722,
      "step": 80387
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7945985794067383,
      "learning_rate": 0.00043711863315078263,
      "loss": 3.02,
      "step": 80388
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.654156446456909,
      "learning_rate": 0.00043711499484977255,
      "loss": 3.0697,
      "step": 80389
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7431666851043701,
      "learning_rate": 0.0004371113565232702,
      "loss": 3.0077,
      "step": 80390
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.415483832359314,
      "learning_rate": 0.00043710771817127636,
      "loss": 2.9087,
      "step": 80391
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4382317066192627,
      "learning_rate": 0.00043710407979379154,
      "loss": 2.9238,
      "step": 80392
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5547122955322266,
      "learning_rate": 0.0004371004413908164,
      "loss": 2.8702,
      "step": 80393
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5526779890060425,
      "learning_rate": 0.0004370968029623518,
      "loss": 2.8838,
      "step": 80394
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.543320894241333,
      "learning_rate": 0.00043709316450839833,
      "loss": 3.155,
      "step": 80395
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0838043689727783,
      "learning_rate": 0.00043708952602895655,
      "loss": 2.8741,
      "step": 80396
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8690252304077148,
      "learning_rate": 0.0004370858875240273,
      "loss": 2.894,
      "step": 80397
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0375616550445557,
      "learning_rate": 0.00043708224899361125,
      "loss": 3.0048,
      "step": 80398
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8095479011535645,
      "learning_rate": 0.00043707861043770893,
      "loss": 3.1583,
      "step": 80399
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.626171588897705,
      "learning_rate": 0.0004370749718563212,
      "loss": 2.8854,
      "step": 80400
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5349916219711304,
      "learning_rate": 0.00043707133324944863,
      "loss": 3.1637,
      "step": 80401
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6350048780441284,
      "learning_rate": 0.0004370676946170918,
      "loss": 3.1206,
      "step": 80402
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.513973593711853,
      "learning_rate": 0.0004370640559592516,
      "loss": 2.9655,
      "step": 80403
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8567078113555908,
      "learning_rate": 0.0004370604172759287,
      "loss": 3.0979,
      "step": 80404
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5849597454071045,
      "learning_rate": 0.0004370567785671235,
      "loss": 3.1434,
      "step": 80405
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.660697102546692,
      "learning_rate": 0.00043705313983283684,
      "loss": 3.1794,
      "step": 80406
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.46381413936615,
      "learning_rate": 0.00043704950107306955,
      "loss": 3.2145,
      "step": 80407
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5642372369766235,
      "learning_rate": 0.0004370458622878221,
      "loss": 3.4306,
      "step": 80408
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.004390001296997,
      "learning_rate": 0.00043704222347709524,
      "loss": 2.75,
      "step": 80409
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.085562229156494,
      "learning_rate": 0.00043703858464088974,
      "loss": 3.1509,
      "step": 80410
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4952374696731567,
      "learning_rate": 0.00043703494577920603,
      "loss": 2.9472,
      "step": 80411
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8524730205535889,
      "learning_rate": 0.000437031306892045,
      "loss": 3.0304,
      "step": 80412
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1727406978607178,
      "learning_rate": 0.0004370276679794073,
      "loss": 3.2536,
      "step": 80413
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.565534234046936,
      "learning_rate": 0.0004370240290412935,
      "loss": 3.312,
      "step": 80414
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.8857533931732178,
      "learning_rate": 0.0004370203900777044,
      "loss": 3.0452,
      "step": 80415
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.201436996459961,
      "learning_rate": 0.00043701675108864064,
      "loss": 2.9923,
      "step": 80416
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.620195984840393,
      "learning_rate": 0.0004370131120741028,
      "loss": 2.8626,
      "step": 80417
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.83092999458313,
      "learning_rate": 0.0004370094730340917,
      "loss": 3.0278,
      "step": 80418
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1304454803466797,
      "learning_rate": 0.000437005833968608,
      "loss": 3.172,
      "step": 80419
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8040900230407715,
      "learning_rate": 0.00043700219487765224,
      "loss": 2.9737,
      "step": 80420
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8760199546813965,
      "learning_rate": 0.00043699855576122515,
      "loss": 3.0052,
      "step": 80421
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0383758544921875,
      "learning_rate": 0.00043699491661932754,
      "loss": 2.9278,
      "step": 80422
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.468055248260498,
      "learning_rate": 0.0004369912774519599,
      "loss": 2.9764,
      "step": 80423
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5726091861724854,
      "learning_rate": 0.0004369876382591231,
      "loss": 3.0793,
      "step": 80424
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5830422639846802,
      "learning_rate": 0.0004369839990408177,
      "loss": 2.9662,
      "step": 80425
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.3933318853378296,
      "learning_rate": 0.00043698035979704427,
      "loss": 2.9446,
      "step": 80426
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6852831840515137,
      "learning_rate": 0.0004369767205278037,
      "loss": 2.9091,
      "step": 80427
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7248660326004028,
      "learning_rate": 0.0004369730812330966,
      "loss": 3.2867,
      "step": 80428
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5915710926055908,
      "learning_rate": 0.00043696944191292357,
      "loss": 2.943,
      "step": 80429
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7085134983062744,
      "learning_rate": 0.00043696580256728535,
      "loss": 3.1837,
      "step": 80430
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.395589828491211,
      "learning_rate": 0.0004369621631961826,
      "loss": 2.8759,
      "step": 80431
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7695906162261963,
      "learning_rate": 0.0004369585237996159,
      "loss": 2.9797,
      "step": 80432
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9053809642791748,
      "learning_rate": 0.0004369548843775861,
      "loss": 3.0462,
      "step": 80433
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.4675445556640625,
      "learning_rate": 0.00043695124493009393,
      "loss": 3.0992,
      "step": 80434
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5176831483840942,
      "learning_rate": 0.0004369476054571398,
      "loss": 3.1177,
      "step": 80435
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6582869291305542,
      "learning_rate": 0.0004369439659587245,
      "loss": 3.07,
      "step": 80436
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.531517744064331,
      "learning_rate": 0.0004369403264348488,
      "loss": 3.255,
      "step": 80437
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8402745723724365,
      "learning_rate": 0.00043693668688551333,
      "loss": 3.0655,
      "step": 80438
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8718671798706055,
      "learning_rate": 0.0004369330473107187,
      "loss": 2.9083,
      "step": 80439
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0792665481567383,
      "learning_rate": 0.00043692940771046556,
      "loss": 3.021,
      "step": 80440
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7315363883972168,
      "learning_rate": 0.0004369257680847548,
      "loss": 2.8573,
      "step": 80441
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9792344570159912,
      "learning_rate": 0.0004369221284335869,
      "loss": 3.0732,
      "step": 80442
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7158725261688232,
      "learning_rate": 0.00043691848875696254,
      "loss": 3.0683,
      "step": 80443
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9823074340820312,
      "learning_rate": 0.00043691484905488255,
      "loss": 2.9526,
      "step": 80444
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6510403156280518,
      "learning_rate": 0.0004369112093273474,
      "loss": 3.031,
      "step": 80445
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.9401695728302,
      "learning_rate": 0.00043690756957435794,
      "loss": 2.9645,
      "step": 80446
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8998781442642212,
      "learning_rate": 0.00043690392979591473,
      "loss": 3.1243,
      "step": 80447
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0442047119140625,
      "learning_rate": 0.0004369002899920185,
      "loss": 2.787,
      "step": 80448
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.048476219177246,
      "learning_rate": 0.00043689665016266994,
      "loss": 2.9541,
      "step": 80449
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.061225414276123,
      "learning_rate": 0.0004368930103078698,
      "loss": 3.1242,
      "step": 80450
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6163922548294067,
      "learning_rate": 0.0004368893704276185,
      "loss": 3.1143,
      "step": 80451
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1585090160369873,
      "learning_rate": 0.0004368857305219169,
      "loss": 2.8955,
      "step": 80452
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.114522695541382,
      "learning_rate": 0.00043688209059076583,
      "loss": 2.9729,
      "step": 80453
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1390950679779053,
      "learning_rate": 0.00043687845063416564,
      "loss": 3.0566,
      "step": 80454
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7381787300109863,
      "learning_rate": 0.0004368748106521172,
      "loss": 3.0202,
      "step": 80455
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.5850300788879395,
      "learning_rate": 0.00043687117064462123,
      "loss": 2.8408,
      "step": 80456
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.150240659713745,
      "learning_rate": 0.0004368675306116782,
      "loss": 2.8064,
      "step": 80457
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0050439834594727,
      "learning_rate": 0.00043686389055328894,
      "loss": 3.1777,
      "step": 80458
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0159406661987305,
      "learning_rate": 0.00043686025046945417,
      "loss": 3.1195,
      "step": 80459
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.667602062225342,
      "learning_rate": 0.00043685661036017446,
      "loss": 2.7141,
      "step": 80460
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.7550861835479736,
      "learning_rate": 0.0004368529702254505,
      "loss": 2.9898,
      "step": 80461
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0141446590423584,
      "learning_rate": 0.000436849330065283,
      "loss": 2.9871,
      "step": 80462
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.6027212142944336,
      "learning_rate": 0.00043684568987967264,
      "loss": 3.0477,
      "step": 80463
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9398436546325684,
      "learning_rate": 0.00043684204966862,
      "loss": 2.8056,
      "step": 80464
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6970410346984863,
      "learning_rate": 0.000436838409432126,
      "loss": 2.9388,
      "step": 80465
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.669979214668274,
      "learning_rate": 0.00043683476917019107,
      "loss": 3.0409,
      "step": 80466
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9067143201828003,
      "learning_rate": 0.0004368311288828159,
      "loss": 3.1274,
      "step": 80467
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.53383469581604,
      "learning_rate": 0.0004368274885700014,
      "loss": 2.8384,
      "step": 80468
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6620445251464844,
      "learning_rate": 0.00043682384823174794,
      "loss": 2.8118,
      "step": 80469
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9287617206573486,
      "learning_rate": 0.0004368202078680564,
      "loss": 3.0815,
      "step": 80470
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8277877569198608,
      "learning_rate": 0.0004368165674789274,
      "loss": 2.9737,
      "step": 80471
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5447520017623901,
      "learning_rate": 0.00043681292706436166,
      "loss": 2.8004,
      "step": 80472
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.4483237266540527,
      "learning_rate": 0.00043680928662435974,
      "loss": 3.1754,
      "step": 80473
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.141136407852173,
      "learning_rate": 0.00043680564615892234,
      "loss": 3.0729,
      "step": 80474
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8789517879486084,
      "learning_rate": 0.0004368020056680504,
      "loss": 2.8893,
      "step": 80475
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.352931261062622,
      "learning_rate": 0.0004367983651517442,
      "loss": 3.2322,
      "step": 80476
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.422935128211975,
      "learning_rate": 0.00043679472461000465,
      "loss": 3.0721,
      "step": 80477
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4146604537963867,
      "learning_rate": 0.00043679108404283233,
      "loss": 3.1754,
      "step": 80478
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6524255275726318,
      "learning_rate": 0.0004367874434502281,
      "loss": 2.8661,
      "step": 80479
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5710464715957642,
      "learning_rate": 0.00043678380283219243,
      "loss": 3.1919,
      "step": 80480
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1715598106384277,
      "learning_rate": 0.000436780162188726,
      "loss": 3.0907,
      "step": 80481
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0303640365600586,
      "learning_rate": 0.00043677652151982965,
      "loss": 2.9001,
      "step": 80482
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7503374814987183,
      "learning_rate": 0.0004367728808255039,
      "loss": 3.1838,
      "step": 80483
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9616531133651733,
      "learning_rate": 0.00043676924010574956,
      "loss": 3.1256,
      "step": 80484
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6601828336715698,
      "learning_rate": 0.0004367655993605672,
      "loss": 3.0352,
      "step": 80485
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4074923992156982,
      "learning_rate": 0.0004367619585899575,
      "loss": 3.0593,
      "step": 80486
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6494845151901245,
      "learning_rate": 0.00043675831779392126,
      "loss": 2.9462,
      "step": 80487
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7574466466903687,
      "learning_rate": 0.00043675467697245897,
      "loss": 3.185,
      "step": 80488
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.932903528213501,
      "learning_rate": 0.0004367510361255715,
      "loss": 3.2251,
      "step": 80489
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8054025173187256,
      "learning_rate": 0.00043674739525325935,
      "loss": 3.0404,
      "step": 80490
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6122461557388306,
      "learning_rate": 0.0004367437543555232,
      "loss": 2.7519,
      "step": 80491
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.8800206184387207,
      "learning_rate": 0.000436740113432364,
      "loss": 2.9341,
      "step": 80492
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.612135410308838,
      "learning_rate": 0.00043673647248378215,
      "loss": 3.0411,
      "step": 80493
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6437188386917114,
      "learning_rate": 0.00043673283150977836,
      "loss": 2.9333,
      "step": 80494
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0932681560516357,
      "learning_rate": 0.00043672919051035344,
      "loss": 2.7318,
      "step": 80495
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.93826425075531,
      "learning_rate": 0.0004367255494855079,
      "loss": 3.0357,
      "step": 80496
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7548333406448364,
      "learning_rate": 0.00043672190843524255,
      "loss": 2.945,
      "step": 80497
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4506422281265259,
      "learning_rate": 0.00043671826735955805,
      "loss": 2.9981,
      "step": 80498
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.662000298500061,
      "learning_rate": 0.00043671462625845497,
      "loss": 2.8403,
      "step": 80499
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4585739374160767,
      "learning_rate": 0.0004367109851319341,
      "loss": 2.9333,
      "step": 80500
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6439919471740723,
      "learning_rate": 0.000436707343979996,
      "loss": 2.8765,
      "step": 80501
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7968354225158691,
      "learning_rate": 0.00043670370280264153,
      "loss": 3.2712,
      "step": 80502
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7065597772598267,
      "learning_rate": 0.0004367000615998712,
      "loss": 3.263,
      "step": 80503
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7115267515182495,
      "learning_rate": 0.0004366964203716858,
      "loss": 2.6491,
      "step": 80504
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6721782684326172,
      "learning_rate": 0.0004366927791180859,
      "loss": 3.2741,
      "step": 80505
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.372912883758545,
      "learning_rate": 0.0004366891378390722,
      "loss": 3.1102,
      "step": 80506
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7281168699264526,
      "learning_rate": 0.00043668549653464546,
      "loss": 2.8769,
      "step": 80507
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.3227736949920654,
      "learning_rate": 0.00043668185520480635,
      "loss": 2.924,
      "step": 80508
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7035324573516846,
      "learning_rate": 0.0004366782138495555,
      "loss": 2.9726,
      "step": 80509
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.3781229257583618,
      "learning_rate": 0.0004366745724688935,
      "loss": 3.0208,
      "step": 80510
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5145115852355957,
      "learning_rate": 0.0004366709310628212,
      "loss": 2.7815,
      "step": 80511
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.469128131866455,
      "learning_rate": 0.0004366672896313391,
      "loss": 2.8871,
      "step": 80512
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6808680295944214,
      "learning_rate": 0.00043666364817444804,
      "loss": 2.8569,
      "step": 80513
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9104498624801636,
      "learning_rate": 0.0004366600066921487,
      "loss": 3.0449,
      "step": 80514
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6285852193832397,
      "learning_rate": 0.00043665636518444155,
      "loss": 2.993,
      "step": 80515
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1440675258636475,
      "learning_rate": 0.00043665272365132744,
      "loss": 2.9499,
      "step": 80516
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7824136018753052,
      "learning_rate": 0.0004366490820928071,
      "loss": 2.7901,
      "step": 80517
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6958314180374146,
      "learning_rate": 0.00043664544050888105,
      "loss": 3.057,
      "step": 80518
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7076551914215088,
      "learning_rate": 0.00043664179889954996,
      "loss": 3.041,
      "step": 80519
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6867691278457642,
      "learning_rate": 0.0004366381572648147,
      "loss": 3.0281,
      "step": 80520
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.699822425842285,
      "learning_rate": 0.00043663451560467574,
      "loss": 3.0767,
      "step": 80521
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.759384274482727,
      "learning_rate": 0.0004366308739191339,
      "loss": 2.7649,
      "step": 80522
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6479989290237427,
      "learning_rate": 0.00043662723220818983,
      "loss": 3.0162,
      "step": 80523
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.889572024345398,
      "learning_rate": 0.0004366235904718441,
      "loss": 3.0604,
      "step": 80524
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6537760496139526,
      "learning_rate": 0.0004366199487100974,
      "loss": 3.0461,
      "step": 80525
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.325592279434204,
      "learning_rate": 0.0004366163069229507,
      "loss": 3.1161,
      "step": 80526
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8523691892623901,
      "learning_rate": 0.00043661266511040427,
      "loss": 3.0838,
      "step": 80527
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6542726755142212,
      "learning_rate": 0.000436609023272459,
      "loss": 3.0914,
      "step": 80528
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6418166160583496,
      "learning_rate": 0.00043660538140911557,
      "loss": 3.0202,
      "step": 80529
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.815659999847412,
      "learning_rate": 0.0004366017395203746,
      "loss": 3.0552,
      "step": 80530
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.5380048751831055,
      "learning_rate": 0.0004365980976062368,
      "loss": 3.0241,
      "step": 80531
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.9439501762390137,
      "learning_rate": 0.00043659445566670283,
      "loss": 3.1591,
      "step": 80532
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7108088731765747,
      "learning_rate": 0.0004365908137017733,
      "loss": 2.9735,
      "step": 80533
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.2465603351593018,
      "learning_rate": 0.0004365871717114491,
      "loss": 3.1306,
      "step": 80534
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8031316995620728,
      "learning_rate": 0.0004365835296957307,
      "loss": 3.4114,
      "step": 80535
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4186935424804688,
      "learning_rate": 0.0004365798876546188,
      "loss": 2.9924,
      "step": 80536
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.696150302886963,
      "learning_rate": 0.0004365762455881141,
      "loss": 3.0378,
      "step": 80537
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.322150707244873,
      "learning_rate": 0.00043657260349621744,
      "loss": 2.9143,
      "step": 80538
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.7377800941467285,
      "learning_rate": 0.00043656896137892924,
      "loss": 2.976,
      "step": 80539
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9361099004745483,
      "learning_rate": 0.0004365653192362503,
      "loss": 2.9737,
      "step": 80540
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.812225341796875,
      "learning_rate": 0.0004365616770681814,
      "loss": 3.2552,
      "step": 80541
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.833827018737793,
      "learning_rate": 0.00043655803487472293,
      "loss": 3.1135,
      "step": 80542
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.3675340414047241,
      "learning_rate": 0.0004365543926558758,
      "loss": 3.0899,
      "step": 80543
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8532718420028687,
      "learning_rate": 0.0004365507504116407,
      "loss": 2.9119,
      "step": 80544
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5366363525390625,
      "learning_rate": 0.00043654710814201826,
      "loss": 2.835,
      "step": 80545
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4475066661834717,
      "learning_rate": 0.000436543465847009,
      "loss": 2.9573,
      "step": 80546
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7366331815719604,
      "learning_rate": 0.0004365398235266138,
      "loss": 3.0405,
      "step": 80547
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.744201421737671,
      "learning_rate": 0.0004365361811808333,
      "loss": 3.0946,
      "step": 80548
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4027787446975708,
      "learning_rate": 0.00043653253880966816,
      "loss": 2.8727,
      "step": 80549
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5837682485580444,
      "learning_rate": 0.00043652889641311896,
      "loss": 3.2085,
      "step": 80550
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.01071834564209,
      "learning_rate": 0.0004365252539911865,
      "loss": 2.9619,
      "step": 80551
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6802091598510742,
      "learning_rate": 0.0004365216115438714,
      "loss": 3.0071,
      "step": 80552
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6835496425628662,
      "learning_rate": 0.00043651796907117447,
      "loss": 2.8773,
      "step": 80553
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8907804489135742,
      "learning_rate": 0.0004365143265730961,
      "loss": 3.0719,
      "step": 80554
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8464113473892212,
      "learning_rate": 0.0004365106840496372,
      "loss": 3.0605,
      "step": 80555
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0524110794067383,
      "learning_rate": 0.0004365070415007985,
      "loss": 2.966,
      "step": 80556
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6455903053283691,
      "learning_rate": 0.00043650339892658044,
      "loss": 3.0476,
      "step": 80557
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5648589134216309,
      "learning_rate": 0.0004364997563269838,
      "loss": 3.0968,
      "step": 80558
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4538317918777466,
      "learning_rate": 0.00043649611370200937,
      "loss": 2.9315,
      "step": 80559
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8067699670791626,
      "learning_rate": 0.00043649247105165777,
      "loss": 3.1846,
      "step": 80560
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6913573741912842,
      "learning_rate": 0.0004364888283759295,
      "loss": 2.951,
      "step": 80561
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.5233614444732666,
      "learning_rate": 0.00043648518567482547,
      "loss": 3.0255,
      "step": 80562
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8258150815963745,
      "learning_rate": 0.0004364815429483463,
      "loss": 3.1381,
      "step": 80563
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.3965522050857544,
      "learning_rate": 0.0004364779001964925,
      "loss": 2.6944,
      "step": 80564
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.338953733444214,
      "learning_rate": 0.000436474257419265,
      "loss": 3.0694,
      "step": 80565
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4177049398422241,
      "learning_rate": 0.00043647061461666426,
      "loss": 3.07,
      "step": 80566
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0654799938201904,
      "learning_rate": 0.00043646697178869123,
      "loss": 3.1132,
      "step": 80567
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.057873249053955,
      "learning_rate": 0.00043646332893534624,
      "loss": 3.0479,
      "step": 80568
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5827432870864868,
      "learning_rate": 0.0004364596860566302,
      "loss": 2.8522,
      "step": 80569
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.720592975616455,
      "learning_rate": 0.00043645604315254376,
      "loss": 2.9349,
      "step": 80570
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.638686180114746,
      "learning_rate": 0.0004364524002230875,
      "loss": 3.172,
      "step": 80571
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.00102162361145,
      "learning_rate": 0.0004364487572682623,
      "loss": 2.7948,
      "step": 80572
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.7100257873535156,
      "learning_rate": 0.0004364451142880685,
      "loss": 2.5989,
      "step": 80573
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4398950338363647,
      "learning_rate": 0.00043644147128250714,
      "loss": 3.04,
      "step": 80574
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.5499892234802246,
      "learning_rate": 0.00043643782825157867,
      "loss": 3.1149,
      "step": 80575
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1157479286193848,
      "learning_rate": 0.00043643418519528383,
      "loss": 2.9934,
      "step": 80576
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0881457328796387,
      "learning_rate": 0.0004364305421136233,
      "loss": 3.0026,
      "step": 80577
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.3017280101776123,
      "learning_rate": 0.0004364268990065978,
      "loss": 2.87,
      "step": 80578
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.944342851638794,
      "learning_rate": 0.0004364232558742079,
      "loss": 3.0144,
      "step": 80579
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7490116357803345,
      "learning_rate": 0.00043641961271645435,
      "loss": 3.1654,
      "step": 80580
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.257840394973755,
      "learning_rate": 0.0004364159695333379,
      "loss": 2.8905,
      "step": 80581
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5896093845367432,
      "learning_rate": 0.00043641232632485907,
      "loss": 3.2372,
      "step": 80582
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5672539472579956,
      "learning_rate": 0.0004364086830910186,
      "loss": 2.9859,
      "step": 80583
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.4757211208343506,
      "learning_rate": 0.00043640503983181727,
      "loss": 3.273,
      "step": 80584
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.3308889865875244,
      "learning_rate": 0.0004364013965472556,
      "loss": 2.7863,
      "step": 80585
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5648545026779175,
      "learning_rate": 0.00043639775323733437,
      "loss": 3.1812,
      "step": 80586
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.375689744949341,
      "learning_rate": 0.0004363941099020542,
      "loss": 2.9771,
      "step": 80587
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6683845520019531,
      "learning_rate": 0.0004363904665414158,
      "loss": 3.0796,
      "step": 80588
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.2308144569396973,
      "learning_rate": 0.00043638682315541973,
      "loss": 2.7705,
      "step": 80589
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7374982833862305,
      "learning_rate": 0.000436383179744067,
      "loss": 3.0611,
      "step": 80590
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6369130611419678,
      "learning_rate": 0.0004363795363073578,
      "loss": 3.0218,
      "step": 80591
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7259386777877808,
      "learning_rate": 0.00043637589284529327,
      "loss": 3.0348,
      "step": 80592
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.421181559562683,
      "learning_rate": 0.00043637224935787384,
      "loss": 3.2775,
      "step": 80593
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6354604959487915,
      "learning_rate": 0.0004363686058451002,
      "loss": 3.0436,
      "step": 80594
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.731527328491211,
      "learning_rate": 0.00043636496230697305,
      "loss": 3.1527,
      "step": 80595
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6834099292755127,
      "learning_rate": 0.00043636131874349315,
      "loss": 3.0694,
      "step": 80596
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5190184116363525,
      "learning_rate": 0.0004363576751546611,
      "loss": 2.9663,
      "step": 80597
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.002044677734375,
      "learning_rate": 0.00043635403154047755,
      "loss": 3.1954,
      "step": 80598
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.045060396194458,
      "learning_rate": 0.00043635038790094315,
      "loss": 3.0527,
      "step": 80599
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.801957845687866,
      "learning_rate": 0.0004363467442360587,
      "loss": 2.8253,
      "step": 80600
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6255617141723633,
      "learning_rate": 0.0004363431005458248,
      "loss": 3.0858,
      "step": 80601
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.0773143768310547,
      "learning_rate": 0.00043633945683024225,
      "loss": 3.1346,
      "step": 80602
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.7023677825927734,
      "learning_rate": 0.00043633581308931147,
      "loss": 3.1204,
      "step": 80603
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1797239780426025,
      "learning_rate": 0.00043633216932303335,
      "loss": 3.1402,
      "step": 80604
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1007273197174072,
      "learning_rate": 0.0004363285255314086,
      "loss": 2.9596,
      "step": 80605
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.950427770614624,
      "learning_rate": 0.0004363248817144376,
      "loss": 3.1715,
      "step": 80606
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8043932914733887,
      "learning_rate": 0.00043632123787212135,
      "loss": 2.9914,
      "step": 80607
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4771641492843628,
      "learning_rate": 0.00043631759400446045,
      "loss": 3.009,
      "step": 80608
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.016892433166504,
      "learning_rate": 0.00043631395011145546,
      "loss": 3.0634,
      "step": 80609
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.793184995651245,
      "learning_rate": 0.00043631030619310714,
      "loss": 3.0166,
      "step": 80610
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4557933807373047,
      "learning_rate": 0.00043630666224941624,
      "loss": 2.855,
      "step": 80611
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.805777668952942,
      "learning_rate": 0.0004363030182803833,
      "loss": 2.9327,
      "step": 80612
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5482532978057861,
      "learning_rate": 0.000436299374286009,
      "loss": 3.1915,
      "step": 80613
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5513602495193481,
      "learning_rate": 0.0004362957302662942,
      "loss": 2.9325,
      "step": 80614
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6325087547302246,
      "learning_rate": 0.00043629208622123937,
      "loss": 2.9853,
      "step": 80615
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4812989234924316,
      "learning_rate": 0.0004362884421508452,
      "loss": 3.1986,
      "step": 80616
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4259501695632935,
      "learning_rate": 0.00043628479805511255,
      "loss": 3.0129,
      "step": 80617
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4824210405349731,
      "learning_rate": 0.000436281153934042,
      "loss": 3.2559,
      "step": 80618
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6756075620651245,
      "learning_rate": 0.0004362775097876341,
      "loss": 3.2326,
      "step": 80619
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6806105375289917,
      "learning_rate": 0.0004362738656158897,
      "loss": 2.8022,
      "step": 80620
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.730895757675171,
      "learning_rate": 0.00043627022141880944,
      "loss": 3.0015,
      "step": 80621
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.184192419052124,
      "learning_rate": 0.0004362665771963939,
      "loss": 2.9164,
      "step": 80622
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5797868967056274,
      "learning_rate": 0.00043626293294864394,
      "loss": 3.2404,
      "step": 80623
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6097767353057861,
      "learning_rate": 0.00043625928867556006,
      "loss": 3.1048,
      "step": 80624
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9386662244796753,
      "learning_rate": 0.00043625564437714293,
      "loss": 2.9551,
      "step": 80625
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5347262620925903,
      "learning_rate": 0.0004362520000533935,
      "loss": 2.9875,
      "step": 80626
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8002792596817017,
      "learning_rate": 0.00043624835570431206,
      "loss": 2.8915,
      "step": 80627
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.618420124053955,
      "learning_rate": 0.00043624471132989957,
      "loss": 2.9118,
      "step": 80628
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6439241170883179,
      "learning_rate": 0.00043624106693015665,
      "loss": 2.9861,
      "step": 80629
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6102958917617798,
      "learning_rate": 0.0004362374225050839,
      "loss": 2.8985,
      "step": 80630
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.625978946685791,
      "learning_rate": 0.000436233778054682,
      "loss": 2.9623,
      "step": 80631
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6566334962844849,
      "learning_rate": 0.0004362301335789518,
      "loss": 2.8333,
      "step": 80632
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8066068887710571,
      "learning_rate": 0.0004362264890778937,
      "loss": 2.7703,
      "step": 80633
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9043606519699097,
      "learning_rate": 0.00043622284455150853,
      "loss": 3.0214,
      "step": 80634
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.3894102573394775,
      "learning_rate": 0.00043621919999979706,
      "loss": 2.9941,
      "step": 80635
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.286855459213257,
      "learning_rate": 0.0004362155554227598,
      "loss": 3.1537,
      "step": 80636
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.621942162513733,
      "learning_rate": 0.0004362119108203975,
      "loss": 2.8866,
      "step": 80637
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.5733578205108643,
      "learning_rate": 0.0004362082661927109,
      "loss": 3.0218,
      "step": 80638
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.092689037322998,
      "learning_rate": 0.00043620462153970055,
      "loss": 2.997,
      "step": 80639
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7159156799316406,
      "learning_rate": 0.00043620097686136716,
      "loss": 3.0394,
      "step": 80640
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.500081181526184,
      "learning_rate": 0.0004361973321577115,
      "loss": 3.4012,
      "step": 80641
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8204350471496582,
      "learning_rate": 0.0004361936874287342,
      "loss": 3.1436,
      "step": 80642
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1249496936798096,
      "learning_rate": 0.00043619004267443585,
      "loss": 2.9119,
      "step": 80643
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.3435990810394287,
      "learning_rate": 0.0004361863978948172,
      "loss": 3.1372,
      "step": 80644
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.746842861175537,
      "learning_rate": 0.000436182753089879,
      "loss": 3.1255,
      "step": 80645
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4909563064575195,
      "learning_rate": 0.00043617910825962183,
      "loss": 2.9573,
      "step": 80646
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.9243719577789307,
      "learning_rate": 0.0004361754634040464,
      "loss": 2.8724,
      "step": 80647
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6836490631103516,
      "learning_rate": 0.00043617181852315335,
      "loss": 3.1074,
      "step": 80648
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5925500392913818,
      "learning_rate": 0.00043616817361694343,
      "loss": 3.1646,
      "step": 80649
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.247734546661377,
      "learning_rate": 0.00043616452868541723,
      "loss": 3.2427,
      "step": 80650
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9550578594207764,
      "learning_rate": 0.00043616088372857545,
      "loss": 3.1493,
      "step": 80651
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5983465909957886,
      "learning_rate": 0.0004361572387464189,
      "loss": 3.0824,
      "step": 80652
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.698071002960205,
      "learning_rate": 0.00043615359373894803,
      "loss": 2.9092,
      "step": 80653
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.4820663928985596,
      "learning_rate": 0.0004361499487061637,
      "loss": 2.9682,
      "step": 80654
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9150551557540894,
      "learning_rate": 0.00043614630364806654,
      "loss": 2.9752,
      "step": 80655
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7207369804382324,
      "learning_rate": 0.00043614265856465717,
      "loss": 3.0355,
      "step": 80656
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.811173439025879,
      "learning_rate": 0.00043613901345593634,
      "loss": 2.8421,
      "step": 80657
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.852912425994873,
      "learning_rate": 0.0004361353683219047,
      "loss": 2.7472,
      "step": 80658
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8427245616912842,
      "learning_rate": 0.00043613172316256296,
      "loss": 3.2537,
      "step": 80659
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.815936803817749,
      "learning_rate": 0.00043612807797791165,
      "loss": 2.9636,
      "step": 80660
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5658851861953735,
      "learning_rate": 0.0004361244327679516,
      "loss": 3.1618,
      "step": 80661
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.448498487472534,
      "learning_rate": 0.0004361207875326835,
      "loss": 2.8885,
      "step": 80662
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8579466342926025,
      "learning_rate": 0.00043611714227210797,
      "loss": 3.2579,
      "step": 80663
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8714982271194458,
      "learning_rate": 0.00043611349698622563,
      "loss": 2.973,
      "step": 80664
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6294442415237427,
      "learning_rate": 0.00043610985167503725,
      "loss": 2.8897,
      "step": 80665
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1813268661499023,
      "learning_rate": 0.00043610620633854354,
      "loss": 3.0872,
      "step": 80666
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5052508115768433,
      "learning_rate": 0.00043610256097674504,
      "loss": 3.1314,
      "step": 80667
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.3813729286193848,
      "learning_rate": 0.0004360989155896425,
      "loss": 3.0113,
      "step": 80668
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.4120032787323,
      "learning_rate": 0.0004360952701772367,
      "loss": 3.1613,
      "step": 80669
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6436235904693604,
      "learning_rate": 0.0004360916247395281,
      "loss": 2.982,
      "step": 80670
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.762125849723816,
      "learning_rate": 0.00043608797927651756,
      "loss": 3.0593,
      "step": 80671
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6557691097259521,
      "learning_rate": 0.00043608433378820573,
      "loss": 3.0491,
      "step": 80672
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.795731544494629,
      "learning_rate": 0.00043608068827459323,
      "loss": 3.1022,
      "step": 80673
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5751491785049438,
      "learning_rate": 0.00043607704273568076,
      "loss": 2.8698,
      "step": 80674
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.025357246398926,
      "learning_rate": 0.000436073397171469,
      "loss": 2.9672,
      "step": 80675
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.022216320037842,
      "learning_rate": 0.00043606975158195854,
      "loss": 3.2013,
      "step": 80676
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6647003889083862,
      "learning_rate": 0.00043606610596715024,
      "loss": 2.7593,
      "step": 80677
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.024282455444336,
      "learning_rate": 0.0004360624603270447,
      "loss": 2.896,
      "step": 80678
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.914334774017334,
      "learning_rate": 0.0004360588146616426,
      "loss": 3.0141,
      "step": 80679
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0935614109039307,
      "learning_rate": 0.0004360551689709444,
      "loss": 2.9552,
      "step": 80680
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.673926591873169,
      "learning_rate": 0.0004360515232549512,
      "loss": 3.1444,
      "step": 80681
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6466715335845947,
      "learning_rate": 0.00043604787751366336,
      "loss": 3.1744,
      "step": 80682
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8982423543930054,
      "learning_rate": 0.00043604423174708166,
      "loss": 3.0936,
      "step": 80683
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5194164514541626,
      "learning_rate": 0.0004360405859552068,
      "loss": 3.11,
      "step": 80684
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5991464853286743,
      "learning_rate": 0.00043603694013803936,
      "loss": 2.9251,
      "step": 80685
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8470033407211304,
      "learning_rate": 0.0004360332942955801,
      "loss": 3.1052,
      "step": 80686
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.270719289779663,
      "learning_rate": 0.0004360296484278297,
      "loss": 2.8105,
      "step": 80687
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6885358095169067,
      "learning_rate": 0.00043602600253478884,
      "loss": 3.1096,
      "step": 80688
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7592445611953735,
      "learning_rate": 0.00043602235661645815,
      "loss": 2.7948,
      "step": 80689
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5608643293380737,
      "learning_rate": 0.0004360187106728384,
      "loss": 3.0515,
      "step": 80690
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9224286079406738,
      "learning_rate": 0.00043601506470393014,
      "loss": 2.9142,
      "step": 80691
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.606076717376709,
      "learning_rate": 0.0004360114187097341,
      "loss": 2.9825,
      "step": 80692
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.391450047492981,
      "learning_rate": 0.00043600777269025106,
      "loss": 2.9509,
      "step": 80693
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5879710912704468,
      "learning_rate": 0.0004360041266454815,
      "loss": 3.1767,
      "step": 80694
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6065070629119873,
      "learning_rate": 0.0004360004805754262,
      "loss": 3.1048,
      "step": 80695
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.172542095184326,
      "learning_rate": 0.00043599683448008596,
      "loss": 2.9687,
      "step": 80696
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4812840223312378,
      "learning_rate": 0.00043599318835946125,
      "loss": 3.3264,
      "step": 80697
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6038626432418823,
      "learning_rate": 0.00043598954221355283,
      "loss": 3.1223,
      "step": 80698
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9270576238632202,
      "learning_rate": 0.00043598589604236146,
      "loss": 2.8507,
      "step": 80699
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9116584062576294,
      "learning_rate": 0.0004359822498458877,
      "loss": 3.0206,
      "step": 80700
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.2371482849121094,
      "learning_rate": 0.00043597860362413224,
      "loss": 3.1291,
      "step": 80701
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1301794052124023,
      "learning_rate": 0.0004359749573770959,
      "loss": 3.1662,
      "step": 80702
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.053096055984497,
      "learning_rate": 0.0004359713111047791,
      "loss": 3.1145,
      "step": 80703
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.698058009147644,
      "learning_rate": 0.00043596766480718277,
      "loss": 3.232,
      "step": 80704
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0796844959259033,
      "learning_rate": 0.0004359640184843074,
      "loss": 2.9847,
      "step": 80705
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.3241231441497803,
      "learning_rate": 0.0004359603721361539,
      "loss": 2.9683,
      "step": 80706
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7448030710220337,
      "learning_rate": 0.0004359567257627227,
      "loss": 2.6862,
      "step": 80707
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.389012336730957,
      "learning_rate": 0.00043595307936401454,
      "loss": 2.9703,
      "step": 80708
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.255686044692993,
      "learning_rate": 0.00043594943294003027,
      "loss": 2.9684,
      "step": 80709
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.373429298400879,
      "learning_rate": 0.0004359457864907703,
      "loss": 2.8649,
      "step": 80710
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8029797077178955,
      "learning_rate": 0.00043594214001623546,
      "loss": 3.2345,
      "step": 80711
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.2258107662200928,
      "learning_rate": 0.0004359384935164265,
      "loss": 2.9726,
      "step": 80712
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.289283275604248,
      "learning_rate": 0.00043593484699134394,
      "loss": 3.3598,
      "step": 80713
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.0906522274017334,
      "learning_rate": 0.00043593120044098854,
      "loss": 2.8893,
      "step": 80714
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.677024245262146,
      "learning_rate": 0.000435927553865361,
      "loss": 2.9893,
      "step": 80715
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.585700511932373,
      "learning_rate": 0.0004359239072644619,
      "loss": 2.9281,
      "step": 80716
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0686635971069336,
      "learning_rate": 0.00043592026063829194,
      "loss": 2.8308,
      "step": 80717
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.073505401611328,
      "learning_rate": 0.000435916613986852,
      "loss": 3.0238,
      "step": 80718
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.3562753200531006,
      "learning_rate": 0.00043591296731014246,
      "loss": 2.9287,
      "step": 80719
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.7579410076141357,
      "learning_rate": 0.0004359093206081641,
      "loss": 2.7992,
      "step": 80720
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.5900001525878906,
      "learning_rate": 0.00043590567388091775,
      "loss": 2.9468,
      "step": 80721
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6586154699325562,
      "learning_rate": 0.0004359020271284039,
      "loss": 2.8923,
      "step": 80722
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9076143503189087,
      "learning_rate": 0.0004358983803506233,
      "loss": 3.0723,
      "step": 80723
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.729801654815674,
      "learning_rate": 0.00043589473354757667,
      "loss": 3.06,
      "step": 80724
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8740681409835815,
      "learning_rate": 0.00043589108671926463,
      "loss": 3.0882,
      "step": 80725
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6937174797058105,
      "learning_rate": 0.00043588743986568786,
      "loss": 3.2558,
      "step": 80726
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8263343572616577,
      "learning_rate": 0.00043588379298684706,
      "loss": 3.1308,
      "step": 80727
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.858436107635498,
      "learning_rate": 0.0004358801460827429,
      "loss": 2.9609,
      "step": 80728
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6462304592132568,
      "learning_rate": 0.000435876499153376,
      "loss": 2.948,
      "step": 80729
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5791345834732056,
      "learning_rate": 0.0004358728521987472,
      "loss": 2.8584,
      "step": 80730
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6052650213241577,
      "learning_rate": 0.00043586920521885696,
      "loss": 2.8677,
      "step": 80731
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.100787878036499,
      "learning_rate": 0.0004358655582137061,
      "loss": 2.8995,
      "step": 80732
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8414324522018433,
      "learning_rate": 0.0004358619111832953,
      "loss": 3.0447,
      "step": 80733
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.636941909790039,
      "learning_rate": 0.0004358582641276252,
      "loss": 2.7915,
      "step": 80734
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.6621243953704834,
      "learning_rate": 0.0004358546170466965,
      "loss": 2.9844,
      "step": 80735
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.5248615741729736,
      "learning_rate": 0.0004358509699405099,
      "loss": 2.9955,
      "step": 80736
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.2244157791137695,
      "learning_rate": 0.00043584732280906594,
      "loss": 3.1263,
      "step": 80737
    },
    {
      "epoch": 1.05,
      "grad_norm": 4.023041725158691,
      "learning_rate": 0.0004358436756523654,
      "loss": 2.9329,
      "step": 80738
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8538602590560913,
      "learning_rate": 0.00043584002847040903,
      "loss": 2.9637,
      "step": 80739
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.226515293121338,
      "learning_rate": 0.0004358363812631974,
      "loss": 2.9187,
      "step": 80740
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.167330741882324,
      "learning_rate": 0.0004358327340307312,
      "loss": 3.0083,
      "step": 80741
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.1780714988708496,
      "learning_rate": 0.0004358290867730112,
      "loss": 2.8308,
      "step": 80742
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6980047225952148,
      "learning_rate": 0.0004358254394900379,
      "loss": 3.0323,
      "step": 80743
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8283065557479858,
      "learning_rate": 0.00043582179218181215,
      "loss": 3.4087,
      "step": 80744
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.6368165016174316,
      "learning_rate": 0.0004358181448483346,
      "loss": 3.0472,
      "step": 80745
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.171517848968506,
      "learning_rate": 0.0004358144974896058,
      "loss": 2.9406,
      "step": 80746
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.847767949104309,
      "learning_rate": 0.00043581085010562664,
      "loss": 3.0144,
      "step": 80747
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9539018869400024,
      "learning_rate": 0.00043580720269639756,
      "loss": 3.0161,
      "step": 80748
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.4923393726348877,
      "learning_rate": 0.0004358035552619194,
      "loss": 2.7845,
      "step": 80749
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7277169227600098,
      "learning_rate": 0.0004357999078021928,
      "loss": 3.0026,
      "step": 80750
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.2758946418762207,
      "learning_rate": 0.0004357962603172185,
      "loss": 2.7626,
      "step": 80751
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.015662908554077,
      "learning_rate": 0.00043579261280699705,
      "loss": 3.1819,
      "step": 80752
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.6554741859436035,
      "learning_rate": 0.00043578896527152915,
      "loss": 2.965,
      "step": 80753
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8868515491485596,
      "learning_rate": 0.0004357853177108156,
      "loss": 2.8278,
      "step": 80754
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5732191801071167,
      "learning_rate": 0.00043578167012485697,
      "loss": 2.9418,
      "step": 80755
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0378410816192627,
      "learning_rate": 0.0004357780225136539,
      "loss": 3.2089,
      "step": 80756
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9300482273101807,
      "learning_rate": 0.00043577437487720723,
      "loss": 2.8105,
      "step": 80757
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4355924129486084,
      "learning_rate": 0.00043577072721551744,
      "loss": 3.2197,
      "step": 80758
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.53367280960083,
      "learning_rate": 0.0004357670795285854,
      "loss": 2.7559,
      "step": 80759
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.4524478912353516,
      "learning_rate": 0.0004357634318164117,
      "loss": 3.2369,
      "step": 80760
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.473206877708435,
      "learning_rate": 0.000435759784078997,
      "loss": 3.1947,
      "step": 80761
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.048433542251587,
      "learning_rate": 0.00043575613631634193,
      "loss": 3.1305,
      "step": 80762
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.9850478172302246,
      "learning_rate": 0.0004357524885284473,
      "loss": 2.8153,
      "step": 80763
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5044214725494385,
      "learning_rate": 0.0004357488407153137,
      "loss": 2.9816,
      "step": 80764
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1632261276245117,
      "learning_rate": 0.00043574519287694176,
      "loss": 3.037,
      "step": 80765
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7635804414749146,
      "learning_rate": 0.00043574154501333233,
      "loss": 3.0068,
      "step": 80766
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.468514084815979,
      "learning_rate": 0.0004357378971244859,
      "loss": 2.9548,
      "step": 80767
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8104606866836548,
      "learning_rate": 0.00043573424921040325,
      "loss": 3.2307,
      "step": 80768
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.203958511352539,
      "learning_rate": 0.00043573060127108517,
      "loss": 2.9312,
      "step": 80769
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9285160303115845,
      "learning_rate": 0.00043572695330653207,
      "loss": 2.8996,
      "step": 80770
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.8633475303649902,
      "learning_rate": 0.00043572330531674476,
      "loss": 3.0594,
      "step": 80771
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.6103169918060303,
      "learning_rate": 0.000435719657301724,
      "loss": 3.0472,
      "step": 80772
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.064868211746216,
      "learning_rate": 0.0004357160092614704,
      "loss": 2.9347,
      "step": 80773
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6010758876800537,
      "learning_rate": 0.0004357123611959846,
      "loss": 3.0829,
      "step": 80774
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.7246923446655273,
      "learning_rate": 0.0004357087131052672,
      "loss": 2.7806,
      "step": 80775
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.3452138900756836,
      "learning_rate": 0.00043570506498931915,
      "loss": 2.9906,
      "step": 80776
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8815737962722778,
      "learning_rate": 0.00043570141684814095,
      "loss": 3.1042,
      "step": 80777
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7192589044570923,
      "learning_rate": 0.00043569776868173325,
      "loss": 2.9782,
      "step": 80778
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.147028923034668,
      "learning_rate": 0.0004356941204900968,
      "loss": 2.9318,
      "step": 80779
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6908737421035767,
      "learning_rate": 0.0004356904722732322,
      "loss": 3.0013,
      "step": 80780
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4543535709381104,
      "learning_rate": 0.00043568682403114016,
      "loss": 3.1882,
      "step": 80781
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0948381423950195,
      "learning_rate": 0.00043568317576382143,
      "loss": 2.7505,
      "step": 80782
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.2770392894744873,
      "learning_rate": 0.00043567952747127674,
      "loss": 2.8785,
      "step": 80783
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5655947923660278,
      "learning_rate": 0.0004356758791535065,
      "loss": 3.1507,
      "step": 80784
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4120900630950928,
      "learning_rate": 0.00043567223081051167,
      "loss": 2.8423,
      "step": 80785
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6207480430603027,
      "learning_rate": 0.00043566858244229275,
      "loss": 3.0181,
      "step": 80786
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.066899061203003,
      "learning_rate": 0.0004356649340488504,
      "loss": 2.9455,
      "step": 80787
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6220052242279053,
      "learning_rate": 0.0004356612856301855,
      "loss": 2.9329,
      "step": 80788
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.786102533340454,
      "learning_rate": 0.00043565763718629856,
      "loss": 2.9292,
      "step": 80789
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4421263933181763,
      "learning_rate": 0.0004356539887171903,
      "loss": 2.8814,
      "step": 80790
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9820181131362915,
      "learning_rate": 0.0004356503402228615,
      "loss": 2.9664,
      "step": 80791
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7460277080535889,
      "learning_rate": 0.0004356466917033126,
      "loss": 2.9207,
      "step": 80792
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4822287559509277,
      "learning_rate": 0.00043564304315854444,
      "loss": 3.0008,
      "step": 80793
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6817775964736938,
      "learning_rate": 0.00043563939458855777,
      "loss": 3.1189,
      "step": 80794
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5719630718231201,
      "learning_rate": 0.00043563574599335316,
      "loss": 3.1156,
      "step": 80795
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5640349388122559,
      "learning_rate": 0.0004356320973729312,
      "loss": 2.8105,
      "step": 80796
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5912061929702759,
      "learning_rate": 0.0004356284487272928,
      "loss": 3.2287,
      "step": 80797
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.3338375091552734,
      "learning_rate": 0.00043562480005643843,
      "loss": 2.8924,
      "step": 80798
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5904003381729126,
      "learning_rate": 0.0004356211513603688,
      "loss": 3.1885,
      "step": 80799
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7314989566802979,
      "learning_rate": 0.0004356175026390848,
      "loss": 3.0602,
      "step": 80800
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4344151020050049,
      "learning_rate": 0.0004356138538925868,
      "loss": 2.9042,
      "step": 80801
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6537095308303833,
      "learning_rate": 0.00043561020512087565,
      "loss": 3.1204,
      "step": 80802
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7062673568725586,
      "learning_rate": 0.0004356065563239521,
      "loss": 3.1454,
      "step": 80803
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9684035778045654,
      "learning_rate": 0.0004356029075018166,
      "loss": 2.9641,
      "step": 80804
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0583763122558594,
      "learning_rate": 0.00043559925865446997,
      "loss": 3.025,
      "step": 80805
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0184166431427,
      "learning_rate": 0.000435595609781913,
      "loss": 3.1727,
      "step": 80806
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.674365520477295,
      "learning_rate": 0.00043559196088414616,
      "loss": 2.881,
      "step": 80807
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7498738765716553,
      "learning_rate": 0.0004355883119611702,
      "loss": 2.8981,
      "step": 80808
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.13185977935791,
      "learning_rate": 0.0004355846630129859,
      "loss": 2.7595,
      "step": 80809
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0822932720184326,
      "learning_rate": 0.0004355810140395938,
      "loss": 2.954,
      "step": 80810
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7765909433364868,
      "learning_rate": 0.00043557736504099454,
      "loss": 2.9355,
      "step": 80811
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.654916524887085,
      "learning_rate": 0.00043557371601718903,
      "loss": 3.0799,
      "step": 80812
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.0739800930023193,
      "learning_rate": 0.0004355700669681778,
      "loss": 2.9062,
      "step": 80813
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.548824429512024,
      "learning_rate": 0.00043556641789396143,
      "loss": 3.0906,
      "step": 80814
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.547913670539856,
      "learning_rate": 0.0004355627687945408,
      "loss": 3.1556,
      "step": 80815
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9507825374603271,
      "learning_rate": 0.00043555911966991647,
      "loss": 3.1197,
      "step": 80816
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4351627826690674,
      "learning_rate": 0.0004355554705200891,
      "loss": 3.0892,
      "step": 80817
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.022273063659668,
      "learning_rate": 0.00043555182134505946,
      "loss": 2.7975,
      "step": 80818
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.7109923362731934,
      "learning_rate": 0.00043554817214482813,
      "loss": 3.1049,
      "step": 80819
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9116493463516235,
      "learning_rate": 0.00043554452291939583,
      "loss": 3.2045,
      "step": 80820
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.33064603805542,
      "learning_rate": 0.0004355408736687633,
      "loss": 3.1199,
      "step": 80821
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.655957579612732,
      "learning_rate": 0.0004355372243929311,
      "loss": 3.0605,
      "step": 80822
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1508450508117676,
      "learning_rate": 0.00043553357509190004,
      "loss": 2.8869,
      "step": 80823
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5862860679626465,
      "learning_rate": 0.00043552992576567065,
      "loss": 3.184,
      "step": 80824
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6382075548171997,
      "learning_rate": 0.00043552627641424375,
      "loss": 3.1121,
      "step": 80825
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5466328859329224,
      "learning_rate": 0.00043552262703761994,
      "loss": 3.0446,
      "step": 80826
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7294695377349854,
      "learning_rate": 0.0004355189776357999,
      "loss": 2.8146,
      "step": 80827
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.270540475845337,
      "learning_rate": 0.00043551532820878435,
      "loss": 2.9995,
      "step": 80828
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1016621589660645,
      "learning_rate": 0.0004355116787565739,
      "loss": 2.945,
      "step": 80829
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8414252996444702,
      "learning_rate": 0.00043550802927916934,
      "loss": 2.8582,
      "step": 80830
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9046962261199951,
      "learning_rate": 0.0004355043797765712,
      "loss": 3.0117,
      "step": 80831
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5675045251846313,
      "learning_rate": 0.00043550073024878035,
      "loss": 3.0551,
      "step": 80832
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7343312501907349,
      "learning_rate": 0.00043549708069579723,
      "loss": 3.1597,
      "step": 80833
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7305494546890259,
      "learning_rate": 0.00043549343111762273,
      "loss": 3.2326,
      "step": 80834
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.831292748451233,
      "learning_rate": 0.0004354897815142574,
      "loss": 2.7009,
      "step": 80835
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8505046367645264,
      "learning_rate": 0.00043548613188570197,
      "loss": 2.9579,
      "step": 80836
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.195096254348755,
      "learning_rate": 0.00043548248223195717,
      "loss": 2.9823,
      "step": 80837
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.578717589378357,
      "learning_rate": 0.00043547883255302353,
      "loss": 3.1457,
      "step": 80838
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6216654777526855,
      "learning_rate": 0.0004354751828489019,
      "loss": 3.0175,
      "step": 80839
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0108790397644043,
      "learning_rate": 0.0004354715331195928,
      "loss": 2.9869,
      "step": 80840
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8517060279846191,
      "learning_rate": 0.00043546788336509704,
      "loss": 3.1398,
      "step": 80841
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5268360376358032,
      "learning_rate": 0.00043546423358541516,
      "loss": 3.0013,
      "step": 80842
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0755105018615723,
      "learning_rate": 0.000435460583780548,
      "loss": 2.785,
      "step": 80843
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.948032021522522,
      "learning_rate": 0.0004354569339504961,
      "loss": 3.1596,
      "step": 80844
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8786253929138184,
      "learning_rate": 0.0004354532840952603,
      "loss": 3.0603,
      "step": 80845
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.4081344604492188,
      "learning_rate": 0.00043544963421484115,
      "loss": 2.952,
      "step": 80846
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.9517579078674316,
      "learning_rate": 0.00043544598430923933,
      "loss": 2.8946,
      "step": 80847
    },
    {
      "epoch": 1.05,
      "grad_norm": 4.973272323608398,
      "learning_rate": 0.00043544233437845545,
      "loss": 2.9231,
      "step": 80848
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.9270472526550293,
      "learning_rate": 0.00043543868442249043,
      "loss": 3.169,
      "step": 80849
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7726408243179321,
      "learning_rate": 0.0004354350344413447,
      "loss": 3.064,
      "step": 80850
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.4913687705993652,
      "learning_rate": 0.00043543138443501904,
      "loss": 3.0353,
      "step": 80851
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.3142495155334473,
      "learning_rate": 0.0004354277344035143,
      "loss": 3.0466,
      "step": 80852
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.398721694946289,
      "learning_rate": 0.00043542408434683085,
      "loss": 3.0522,
      "step": 80853
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.433079481124878,
      "learning_rate": 0.00043542043426496947,
      "loss": 3.0744,
      "step": 80854
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.421555995941162,
      "learning_rate": 0.0004354167841579309,
      "loss": 2.9334,
      "step": 80855
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.0349104404449463,
      "learning_rate": 0.00043541313402571587,
      "loss": 3.3192,
      "step": 80856
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7045897245407104,
      "learning_rate": 0.0004354094838683249,
      "loss": 3.3477,
      "step": 80857
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7656053304672241,
      "learning_rate": 0.0004354058336857588,
      "loss": 3.0778,
      "step": 80858
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.828413248062134,
      "learning_rate": 0.00043540218347801825,
      "loss": 2.8518,
      "step": 80859
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.282193183898926,
      "learning_rate": 0.00043539853324510376,
      "loss": 3.2371,
      "step": 80860
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.2185866832733154,
      "learning_rate": 0.00043539488298701616,
      "loss": 3.1559,
      "step": 80861
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.244502544403076,
      "learning_rate": 0.0004353912327037561,
      "loss": 2.9602,
      "step": 80862
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.263237953186035,
      "learning_rate": 0.00043538758239532427,
      "loss": 3.0998,
      "step": 80863
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7830013036727905,
      "learning_rate": 0.0004353839320617214,
      "loss": 3.1686,
      "step": 80864
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7619496583938599,
      "learning_rate": 0.000435380281702948,
      "loss": 2.857,
      "step": 80865
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5354238748550415,
      "learning_rate": 0.00043537663131900485,
      "loss": 3.0401,
      "step": 80866
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5459693670272827,
      "learning_rate": 0.0004353729809098927,
      "loss": 2.8798,
      "step": 80867
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6428580284118652,
      "learning_rate": 0.0004353693304756121,
      "loss": 3.1529,
      "step": 80868
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6310924291610718,
      "learning_rate": 0.00043536568001616377,
      "loss": 2.9313,
      "step": 80869
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0863306522369385,
      "learning_rate": 0.00043536202953154844,
      "loss": 2.6846,
      "step": 80870
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.039069414138794,
      "learning_rate": 0.0004353583790217667,
      "loss": 3.1832,
      "step": 80871
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.109646797180176,
      "learning_rate": 0.0004353547284868194,
      "loss": 3.0081,
      "step": 80872
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9333117008209229,
      "learning_rate": 0.0004353510779267071,
      "loss": 3.0628,
      "step": 80873
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9922492504119873,
      "learning_rate": 0.00043534742734143035,
      "loss": 3.1289,
      "step": 80874
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9395114183425903,
      "learning_rate": 0.00043534377673099,
      "loss": 3.1059,
      "step": 80875
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7586628198623657,
      "learning_rate": 0.0004353401260953868,
      "loss": 3.1846,
      "step": 80876
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7173941135406494,
      "learning_rate": 0.0004353364754346212,
      "loss": 3.1209,
      "step": 80877
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.659779667854309,
      "learning_rate": 0.00043533282474869395,
      "loss": 3.1587,
      "step": 80878
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6265842914581299,
      "learning_rate": 0.0004353291740376059,
      "loss": 3.098,
      "step": 80879
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6903070211410522,
      "learning_rate": 0.0004353255233013575,
      "loss": 3.1553,
      "step": 80880
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9107410907745361,
      "learning_rate": 0.0004353218725399496,
      "loss": 3.0658,
      "step": 80881
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6032482385635376,
      "learning_rate": 0.0004353182217533828,
      "loss": 3.083,
      "step": 80882
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.499815821647644,
      "learning_rate": 0.00043531457094165773,
      "loss": 2.8702,
      "step": 80883
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5834498405456543,
      "learning_rate": 0.0004353109201047752,
      "loss": 2.8514,
      "step": 80884
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5790752172470093,
      "learning_rate": 0.0004353072692427358,
      "loss": 2.9588,
      "step": 80885
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.3186508417129517,
      "learning_rate": 0.00043530361835554017,
      "loss": 2.7246,
      "step": 80886
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7202595472335815,
      "learning_rate": 0.0004352999674431891,
      "loss": 3.0026,
      "step": 80887
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5936697721481323,
      "learning_rate": 0.0004352963165056832,
      "loss": 3.3097,
      "step": 80888
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6986902952194214,
      "learning_rate": 0.00043529266554302314,
      "loss": 2.9951,
      "step": 80889
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7012665271759033,
      "learning_rate": 0.00043528901455520963,
      "loss": 2.8721,
      "step": 80890
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.3603148460388184,
      "learning_rate": 0.0004352853635422434,
      "loss": 2.7697,
      "step": 80891
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.6138947010040283,
      "learning_rate": 0.000435281712504125,
      "loss": 3.0988,
      "step": 80892
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.69728422164917,
      "learning_rate": 0.00043527806144085516,
      "loss": 3.0518,
      "step": 80893
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6570024490356445,
      "learning_rate": 0.00043527441035243463,
      "loss": 3.0822,
      "step": 80894
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5672061443328857,
      "learning_rate": 0.00043527075923886397,
      "loss": 2.8395,
      "step": 80895
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.3008060455322266,
      "learning_rate": 0.00043526710810014394,
      "loss": 3.1308,
      "step": 80896
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8074023723602295,
      "learning_rate": 0.0004352634569362753,
      "loss": 2.9068,
      "step": 80897
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6186853647232056,
      "learning_rate": 0.0004352598057472585,
      "loss": 2.8342,
      "step": 80898
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.658839702606201,
      "learning_rate": 0.0004352561545330944,
      "loss": 2.6881,
      "step": 80899
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7893904447555542,
      "learning_rate": 0.00043525250329378367,
      "loss": 3.0707,
      "step": 80900
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8338618278503418,
      "learning_rate": 0.00043524885202932687,
      "loss": 3.1459,
      "step": 80901
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.259658098220825,
      "learning_rate": 0.0004352452007397248,
      "loss": 3.054,
      "step": 80902
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8034030199050903,
      "learning_rate": 0.0004352415494249781,
      "loss": 3.1331,
      "step": 80903
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.3876063823699951,
      "learning_rate": 0.0004352378980850874,
      "loss": 3.0006,
      "step": 80904
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.112898588180542,
      "learning_rate": 0.00043523424672005346,
      "loss": 3.0093,
      "step": 80905
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.281464099884033,
      "learning_rate": 0.00043523059532987686,
      "loss": 2.8099,
      "step": 80906
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7426927089691162,
      "learning_rate": 0.0004352269439145584,
      "loss": 2.7718,
      "step": 80907
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.3194260597229004,
      "learning_rate": 0.0004352232924740987,
      "loss": 3.2432,
      "step": 80908
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.3807146549224854,
      "learning_rate": 0.0004352196410084984,
      "loss": 2.9176,
      "step": 80909
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8032562732696533,
      "learning_rate": 0.0004352159895177583,
      "loss": 3.1338,
      "step": 80910
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6832443475723267,
      "learning_rate": 0.00043521233800187896,
      "loss": 3.0446,
      "step": 80911
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.389835834503174,
      "learning_rate": 0.000435208686460861,
      "loss": 3.0077,
      "step": 80912
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7901408672332764,
      "learning_rate": 0.00043520503489470527,
      "loss": 3.0247,
      "step": 80913
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6377618312835693,
      "learning_rate": 0.0004352013833034124,
      "loss": 2.9908,
      "step": 80914
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.975502610206604,
      "learning_rate": 0.000435197731686983,
      "loss": 2.7973,
      "step": 80915
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.804760217666626,
      "learning_rate": 0.0004351940800454177,
      "loss": 2.8239,
      "step": 80916
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9594720602035522,
      "learning_rate": 0.0004351904283787175,
      "loss": 2.6505,
      "step": 80917
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.2411348819732666,
      "learning_rate": 0.0004351867766868826,
      "loss": 3.1585,
      "step": 80918
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7501567602157593,
      "learning_rate": 0.00043518312496991404,
      "loss": 3.1247,
      "step": 80919
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9846755266189575,
      "learning_rate": 0.00043517947322781246,
      "loss": 3.2254,
      "step": 80920
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9684555530548096,
      "learning_rate": 0.0004351758214605783,
      "loss": 2.912,
      "step": 80921
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5909558534622192,
      "learning_rate": 0.0004351721696682125,
      "loss": 3.1445,
      "step": 80922
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.486448049545288,
      "learning_rate": 0.00043516851785071564,
      "loss": 3.0987,
      "step": 80923
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.135805368423462,
      "learning_rate": 0.00043516486600808847,
      "loss": 3.4286,
      "step": 80924
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8456573486328125,
      "learning_rate": 0.0004351612141403315,
      "loss": 2.8137,
      "step": 80925
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0589852333068848,
      "learning_rate": 0.0004351575622474455,
      "loss": 3.0124,
      "step": 80926
    },
    {
      "epoch": 1.05,
      "grad_norm": 4.308307647705078,
      "learning_rate": 0.00043515391032943117,
      "loss": 2.8921,
      "step": 80927
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.306030511856079,
      "learning_rate": 0.00043515025838628923,
      "loss": 2.9857,
      "step": 80928
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7823952436447144,
      "learning_rate": 0.00043514660641802026,
      "loss": 3.1097,
      "step": 80929
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0748512744903564,
      "learning_rate": 0.00043514295442462496,
      "loss": 3.1875,
      "step": 80930
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.5825493335723877,
      "learning_rate": 0.00043513930240610414,
      "loss": 3.0475,
      "step": 80931
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.649401307106018,
      "learning_rate": 0.00043513565036245824,
      "loss": 3.0244,
      "step": 80932
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.150299072265625,
      "learning_rate": 0.0004351319982936881,
      "loss": 2.8616,
      "step": 80933
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0952582359313965,
      "learning_rate": 0.0004351283461997945,
      "loss": 3.178,
      "step": 80934
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.261882781982422,
      "learning_rate": 0.0004351246940807779,
      "loss": 2.9097,
      "step": 80935
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.7210421562194824,
      "learning_rate": 0.00043512104193663896,
      "loss": 2.851,
      "step": 80936
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6697461605072021,
      "learning_rate": 0.00043511738976737864,
      "loss": 2.9875,
      "step": 80937
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.844009518623352,
      "learning_rate": 0.00043511373757299735,
      "loss": 3.0565,
      "step": 80938
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.5412492752075195,
      "learning_rate": 0.0004351100853534959,
      "loss": 2.8031,
      "step": 80939
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.425877571105957,
      "learning_rate": 0.00043510643310887493,
      "loss": 3.1028,
      "step": 80940
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.262636423110962,
      "learning_rate": 0.0004351027808391352,
      "loss": 3.0053,
      "step": 80941
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6563899517059326,
      "learning_rate": 0.00043509912854427715,
      "loss": 3.1303,
      "step": 80942
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6583466529846191,
      "learning_rate": 0.0004350954762243018,
      "loss": 2.835,
      "step": 80943
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.50363290309906,
      "learning_rate": 0.00043509182387920946,
      "loss": 2.9209,
      "step": 80944
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.3932459354400635,
      "learning_rate": 0.00043508817150900115,
      "loss": 3.1388,
      "step": 80945
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.665174126625061,
      "learning_rate": 0.00043508451911367736,
      "loss": 2.8258,
      "step": 80946
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1502723693847656,
      "learning_rate": 0.0004350808666932388,
      "loss": 2.8419,
      "step": 80947
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.9136581420898438,
      "learning_rate": 0.00043507721424768613,
      "loss": 2.8387,
      "step": 80948
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9918605089187622,
      "learning_rate": 0.00043507356177702004,
      "loss": 2.8878,
      "step": 80949
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7798957824707031,
      "learning_rate": 0.0004350699092812413,
      "loss": 2.9961,
      "step": 80950
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.033174753189087,
      "learning_rate": 0.0004350662567603505,
      "loss": 2.9615,
      "step": 80951
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.794699192047119,
      "learning_rate": 0.0004350626042143483,
      "loss": 3.309,
      "step": 80952
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0686285495758057,
      "learning_rate": 0.0004350589516432354,
      "loss": 2.9511,
      "step": 80953
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.108389139175415,
      "learning_rate": 0.0004350552990470125,
      "loss": 3.1525,
      "step": 80954
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.424072504043579,
      "learning_rate": 0.00043505164642568034,
      "loss": 3.0307,
      "step": 80955
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.077075481414795,
      "learning_rate": 0.0004350479937792395,
      "loss": 3.0721,
      "step": 80956
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6653709411621094,
      "learning_rate": 0.0004350443411076906,
      "loss": 3.0425,
      "step": 80957
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.79385507106781,
      "learning_rate": 0.00043504068841103454,
      "loss": 2.9308,
      "step": 80958
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5761194229125977,
      "learning_rate": 0.0004350370356892718,
      "loss": 3.0861,
      "step": 80959
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.1457645893096924,
      "learning_rate": 0.0004350333829424031,
      "loss": 2.9022,
      "step": 80960
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.755401849746704,
      "learning_rate": 0.0004350297301704292,
      "loss": 3.1601,
      "step": 80961
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8288315534591675,
      "learning_rate": 0.00043502607737335065,
      "loss": 2.8721,
      "step": 80962
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.730684518814087,
      "learning_rate": 0.0004350224245511682,
      "loss": 2.9451,
      "step": 80963
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7445313930511475,
      "learning_rate": 0.00043501877170388267,
      "loss": 3.2391,
      "step": 80964
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7001394033432007,
      "learning_rate": 0.00043501511883149447,
      "loss": 2.9749,
      "step": 80965
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7859606742858887,
      "learning_rate": 0.0004350114659340044,
      "loss": 2.9089,
      "step": 80966
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6629396677017212,
      "learning_rate": 0.00043500781301141324,
      "loss": 3.0806,
      "step": 80967
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7223894596099854,
      "learning_rate": 0.0004350041600637215,
      "loss": 2.8064,
      "step": 80968
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6994298696517944,
      "learning_rate": 0.00043500050709093,
      "loss": 3.2212,
      "step": 80969
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.2150394916534424,
      "learning_rate": 0.0004349968540930394,
      "loss": 3.0349,
      "step": 80970
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.781854271888733,
      "learning_rate": 0.00043499320107005026,
      "loss": 2.7239,
      "step": 80971
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7593499422073364,
      "learning_rate": 0.00043498954802196336,
      "loss": 3.1877,
      "step": 80972
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.536094069480896,
      "learning_rate": 0.0004349858949487793,
      "loss": 3.2171,
      "step": 80973
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6924980878829956,
      "learning_rate": 0.0004349822418504989,
      "loss": 3.0462,
      "step": 80974
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7314263582229614,
      "learning_rate": 0.00043497858872712264,
      "loss": 2.9243,
      "step": 80975
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.507296085357666,
      "learning_rate": 0.0004349749355786514,
      "loss": 2.9632,
      "step": 80976
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0111641883850098,
      "learning_rate": 0.0004349712824050858,
      "loss": 3.1027,
      "step": 80977
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.0095982551574707,
      "learning_rate": 0.0004349676292064264,
      "loss": 2.8957,
      "step": 80978
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5885769128799438,
      "learning_rate": 0.00043496397598267404,
      "loss": 3.0389,
      "step": 80979
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6442484855651855,
      "learning_rate": 0.0004349603227338293,
      "loss": 3.067,
      "step": 80980
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.3454062938690186,
      "learning_rate": 0.0004349566694598929,
      "loss": 3.041,
      "step": 80981
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6667230129241943,
      "learning_rate": 0.0004349530161608655,
      "loss": 3.0047,
      "step": 80982
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5842801332473755,
      "learning_rate": 0.0004349493628367478,
      "loss": 3.2566,
      "step": 80983
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.8674373626708984,
      "learning_rate": 0.00043494570948754044,
      "loss": 3.1076,
      "step": 80984
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.813833236694336,
      "learning_rate": 0.00043494205611324416,
      "loss": 2.98,
      "step": 80985
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5046452283859253,
      "learning_rate": 0.0004349384027138596,
      "loss": 3.1218,
      "step": 80986
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9981833696365356,
      "learning_rate": 0.0004349347492893875,
      "loss": 3.2801,
      "step": 80987
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.693784475326538,
      "learning_rate": 0.0004349310958398283,
      "loss": 2.9953,
      "step": 80988
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.470827341079712,
      "learning_rate": 0.000434927442365183,
      "loss": 2.9571,
      "step": 80989
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8829759359359741,
      "learning_rate": 0.00043492378886545217,
      "loss": 2.8961,
      "step": 80990
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4665204286575317,
      "learning_rate": 0.0004349201353406363,
      "loss": 2.9804,
      "step": 80991
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8513879776000977,
      "learning_rate": 0.00043491648179073636,
      "loss": 2.9325,
      "step": 80992
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.926310658454895,
      "learning_rate": 0.0004349128282157529,
      "loss": 3.0392,
      "step": 80993
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7102760076522827,
      "learning_rate": 0.0004349091746156865,
      "loss": 3.329,
      "step": 80994
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9667822122573853,
      "learning_rate": 0.0004349055209905381,
      "loss": 3.0499,
      "step": 80995
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.7908287048339844,
      "learning_rate": 0.00043490186734030815,
      "loss": 2.8893,
      "step": 80996
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.3061490058898926,
      "learning_rate": 0.0004348982136649973,
      "loss": 3.0249,
      "step": 80997
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.862615942955017,
      "learning_rate": 0.0004348945599646065,
      "loss": 3.0182,
      "step": 80998
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.160508394241333,
      "learning_rate": 0.0004348909062391361,
      "loss": 2.826,
      "step": 80999
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.23305606842041,
      "learning_rate": 0.00043488725248858696,
      "loss": 2.8224,
      "step": 81000
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9251352548599243,
      "learning_rate": 0.0004348835987129598,
      "loss": 2.7745,
      "step": 81001
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8537166118621826,
      "learning_rate": 0.0004348799449122552,
      "loss": 3.104,
      "step": 81002
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8420604467391968,
      "learning_rate": 0.0004348762910864739,
      "loss": 2.836,
      "step": 81003
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9936946630477905,
      "learning_rate": 0.0004348726372356164,
      "loss": 2.757,
      "step": 81004
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7232294082641602,
      "learning_rate": 0.00043486898335968376,
      "loss": 2.9887,
      "step": 81005
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5220156908035278,
      "learning_rate": 0.00043486532945867626,
      "loss": 2.9821,
      "step": 81006
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5387662649154663,
      "learning_rate": 0.00043486167553259485,
      "loss": 2.8311,
      "step": 81007
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6995654106140137,
      "learning_rate": 0.00043485802158144013,
      "loss": 2.9929,
      "step": 81008
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.4664740562438965,
      "learning_rate": 0.00043485436760521264,
      "loss": 2.9188,
      "step": 81009
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.6037704944610596,
      "learning_rate": 0.0004348507136039132,
      "loss": 3.1622,
      "step": 81010
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8100744485855103,
      "learning_rate": 0.00043484705957754253,
      "loss": 2.9925,
      "step": 81011
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.8269637823104858,
      "learning_rate": 0.0004348434055261012,
      "loss": 2.9643,
      "step": 81012
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.4354891777038574,
      "learning_rate": 0.0004348397514495899,
      "loss": 3.197,
      "step": 81013
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.609035611152649,
      "learning_rate": 0.0004348360973480094,
      "loss": 2.8893,
      "step": 81014
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.7246294021606445,
      "learning_rate": 0.00043483244322136034,
      "loss": 3.1756,
      "step": 81015
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.634451985359192,
      "learning_rate": 0.0004348287890696434,
      "loss": 2.9809,
      "step": 81016
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.5972086191177368,
      "learning_rate": 0.0004348251348928592,
      "loss": 3.1633,
      "step": 81017
    },
    {
      "epoch": 1.05,
      "grad_norm": 3.0315463542938232,
      "learning_rate": 0.00043482148069100843,
      "loss": 3.1269,
      "step": 81018
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.450777053833008,
      "learning_rate": 0.00043481782646409193,
      "loss": 2.9333,
      "step": 81019
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.7068015336990356,
      "learning_rate": 0.0004348141722121101,
      "loss": 2.9317,
      "step": 81020
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.249133586883545,
      "learning_rate": 0.00043481051793506386,
      "loss": 2.9341,
      "step": 81021
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.332937717437744,
      "learning_rate": 0.00043480686363295376,
      "loss": 3.1795,
      "step": 81022
    },
    {
      "epoch": 1.05,
      "grad_norm": 2.2377753257751465,
      "learning_rate": 0.00043480320930578054,
      "loss": 3.0275,
      "step": 81023
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9217314720153809,
      "learning_rate": 0.0004347995549535449,
      "loss": 3.0962,
      "step": 81024
    },
    {
      "epoch": 1.05,
      "grad_norm": 1.9651700258255005,
      "learning_rate": 0.00043479590057624745,
      "loss": 2.7442,
      "step": 81025
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.123253107070923,
      "learning_rate": 0.00043479224617388887,
      "loss": 3.0774,
      "step": 81026
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.5317816734313965,
      "learning_rate": 0.0004347885917464698,
      "loss": 3.0156,
      "step": 81027
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4012950658798218,
      "learning_rate": 0.00043478493729399114,
      "loss": 3.13,
      "step": 81028
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.365471124649048,
      "learning_rate": 0.0004347812828164533,
      "loss": 3.0584,
      "step": 81029
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.1206679344177246,
      "learning_rate": 0.0004347776283138571,
      "loss": 3.2874,
      "step": 81030
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.893212080001831,
      "learning_rate": 0.00043477397378620326,
      "loss": 2.8546,
      "step": 81031
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4656847715377808,
      "learning_rate": 0.0004347703192334923,
      "loss": 3.0923,
      "step": 81032
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.682538390159607,
      "learning_rate": 0.000434766664655725,
      "loss": 2.8348,
      "step": 81033
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.416248321533203,
      "learning_rate": 0.00043476301005290215,
      "loss": 3.2038,
      "step": 81034
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.843767523765564,
      "learning_rate": 0.00043475935542502416,
      "loss": 3.0727,
      "step": 81035
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5385655164718628,
      "learning_rate": 0.000434755700772092,
      "loss": 2.9882,
      "step": 81036
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.4117751121520996,
      "learning_rate": 0.0004347520460941061,
      "loss": 3.0411,
      "step": 81037
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.1835315227508545,
      "learning_rate": 0.00043474839139106733,
      "loss": 2.9942,
      "step": 81038
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4632354974746704,
      "learning_rate": 0.0004347447366629762,
      "loss": 2.906,
      "step": 81039
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.673373818397522,
      "learning_rate": 0.00043474108190983356,
      "loss": 2.915,
      "step": 81040
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.530269980430603,
      "learning_rate": 0.00043473742713164,
      "loss": 3.0894,
      "step": 81041
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5610384941101074,
      "learning_rate": 0.00043473377232839614,
      "loss": 2.9886,
      "step": 81042
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2615838050842285,
      "learning_rate": 0.00043473011750010283,
      "loss": 2.857,
      "step": 81043
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.3887319564819336,
      "learning_rate": 0.00043472646264676066,
      "loss": 3.1272,
      "step": 81044
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8283052444458008,
      "learning_rate": 0.00043472280776837017,
      "loss": 3.0068,
      "step": 81045
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.681873083114624,
      "learning_rate": 0.0004347191528649322,
      "loss": 2.9282,
      "step": 81046
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4662050008773804,
      "learning_rate": 0.0004347154979364475,
      "loss": 2.9258,
      "step": 81047
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7369072437286377,
      "learning_rate": 0.0004347118429829166,
      "loss": 3.2637,
      "step": 81048
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4771814346313477,
      "learning_rate": 0.0004347081880043402,
      "loss": 2.9153,
      "step": 81049
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5618399381637573,
      "learning_rate": 0.000434704533000719,
      "loss": 2.7666,
      "step": 81050
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4843485355377197,
      "learning_rate": 0.0004347008779720537,
      "loss": 3.0502,
      "step": 81051
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7212228775024414,
      "learning_rate": 0.0004346972229183449,
      "loss": 3.0008,
      "step": 81052
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4481569528579712,
      "learning_rate": 0.0004346935678395935,
      "loss": 2.9689,
      "step": 81053
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.826180100440979,
      "learning_rate": 0.00043468991273579985,
      "loss": 3.1975,
      "step": 81054
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5632481575012207,
      "learning_rate": 0.00043468625760696494,
      "loss": 3.055,
      "step": 81055
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.03680682182312,
      "learning_rate": 0.0004346826024530892,
      "loss": 2.9947,
      "step": 81056
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.715183138847351,
      "learning_rate": 0.00043467894727417346,
      "loss": 2.9563,
      "step": 81057
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6011216640472412,
      "learning_rate": 0.00043467529207021836,
      "loss": 3.0478,
      "step": 81058
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.736382246017456,
      "learning_rate": 0.0004346716368412247,
      "loss": 3.3133,
      "step": 81059
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.1792125701904297,
      "learning_rate": 0.0004346679815871929,
      "loss": 3.0034,
      "step": 81060
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.744990348815918,
      "learning_rate": 0.0004346643263081238,
      "loss": 3.0341,
      "step": 81061
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6521306037902832,
      "learning_rate": 0.00043466067100401804,
      "loss": 2.7756,
      "step": 81062
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2996156215667725,
      "learning_rate": 0.00043465701567487635,
      "loss": 3.0202,
      "step": 81063
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.025972604751587,
      "learning_rate": 0.0004346533603206994,
      "loss": 2.7787,
      "step": 81064
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7655478715896606,
      "learning_rate": 0.00043464970494148784,
      "loss": 3.1169,
      "step": 81065
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6828662157058716,
      "learning_rate": 0.0004346460495372423,
      "loss": 3.1091,
      "step": 81066
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.071789026260376,
      "learning_rate": 0.0004346423941079636,
      "loss": 3.0875,
      "step": 81067
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2734785079956055,
      "learning_rate": 0.00043463873865365234,
      "loss": 2.8881,
      "step": 81068
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.8766090869903564,
      "learning_rate": 0.0004346350831743092,
      "loss": 3.224,
      "step": 81069
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6941041946411133,
      "learning_rate": 0.0004346314276699348,
      "loss": 2.9873,
      "step": 81070
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.606576442718506,
      "learning_rate": 0.0004346277721405299,
      "loss": 3.0475,
      "step": 81071
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.050973653793335,
      "learning_rate": 0.00043462411658609515,
      "loss": 3.1127,
      "step": 81072
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7066236734390259,
      "learning_rate": 0.0004346204610066312,
      "loss": 2.8274,
      "step": 81073
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8338664770126343,
      "learning_rate": 0.00043461680540213884,
      "loss": 3.1463,
      "step": 81074
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4365653991699219,
      "learning_rate": 0.0004346131497726186,
      "loss": 2.8794,
      "step": 81075
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.665372610092163,
      "learning_rate": 0.00043460949411807125,
      "loss": 3.0811,
      "step": 81076
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.1784355640411377,
      "learning_rate": 0.0004346058384384975,
      "loss": 3.0348,
      "step": 81077
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6009621620178223,
      "learning_rate": 0.00043460218273389797,
      "loss": 3.1261,
      "step": 81078
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5101242065429688,
      "learning_rate": 0.00043459852700427335,
      "loss": 3.0254,
      "step": 81079
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0151989459991455,
      "learning_rate": 0.0004345948712496243,
      "loss": 3.1616,
      "step": 81080
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8725926876068115,
      "learning_rate": 0.0004345912154699516,
      "loss": 3.1168,
      "step": 81081
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9579389095306396,
      "learning_rate": 0.0004345875596652557,
      "loss": 2.8443,
      "step": 81082
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4751213788986206,
      "learning_rate": 0.00043458390383553755,
      "loss": 3.1123,
      "step": 81083
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2210452556610107,
      "learning_rate": 0.0004345802479807977,
      "loss": 3.2115,
      "step": 81084
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0064897537231445,
      "learning_rate": 0.00043457659210103676,
      "loss": 3.0461,
      "step": 81085
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.013803243637085,
      "learning_rate": 0.00043457293619625565,
      "loss": 2.9958,
      "step": 81086
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.828462600708008,
      "learning_rate": 0.0004345692802664548,
      "loss": 3.0496,
      "step": 81087
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.3201980590820312,
      "learning_rate": 0.00043456562431163487,
      "loss": 3.2125,
      "step": 81088
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6173595190048218,
      "learning_rate": 0.00043456196833179683,
      "loss": 2.8499,
      "step": 81089
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.938520908355713,
      "learning_rate": 0.0004345583123269411,
      "loss": 3.1391,
      "step": 81090
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.9407830238342285,
      "learning_rate": 0.0004345546562970684,
      "loss": 2.9693,
      "step": 81091
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4330683946609497,
      "learning_rate": 0.0004345510002421796,
      "loss": 3.0746,
      "step": 81092
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6653130054473877,
      "learning_rate": 0.00043454734416227504,
      "loss": 2.9892,
      "step": 81093
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.941273808479309,
      "learning_rate": 0.00043454368805735574,
      "loss": 3.0057,
      "step": 81094
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7055143117904663,
      "learning_rate": 0.0004345400319274221,
      "loss": 2.984,
      "step": 81095
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2867720127105713,
      "learning_rate": 0.000434536375772475,
      "loss": 2.9771,
      "step": 81096
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4760863780975342,
      "learning_rate": 0.00043453271959251503,
      "loss": 2.8667,
      "step": 81097
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.662942886352539,
      "learning_rate": 0.00043452906338754295,
      "loss": 3.2601,
      "step": 81098
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.574641466140747,
      "learning_rate": 0.0004345254071575593,
      "loss": 2.8486,
      "step": 81099
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6846097707748413,
      "learning_rate": 0.0004345217509025649,
      "loss": 2.9836,
      "step": 81100
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6838569641113281,
      "learning_rate": 0.00043451809462256033,
      "loss": 3.1556,
      "step": 81101
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.411155343055725,
      "learning_rate": 0.00043451443831754626,
      "loss": 3.1753,
      "step": 81102
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.572490930557251,
      "learning_rate": 0.0004345107819875234,
      "loss": 2.7716,
      "step": 81103
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.625213623046875,
      "learning_rate": 0.0004345071256324926,
      "loss": 3.2231,
      "step": 81104
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5436615943908691,
      "learning_rate": 0.0004345034692524543,
      "loss": 3.0866,
      "step": 81105
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.0552468299865723,
      "learning_rate": 0.00043449981284740926,
      "loss": 2.9933,
      "step": 81106
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.692437767982483,
      "learning_rate": 0.0004344961564173581,
      "loss": 2.9757,
      "step": 81107
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6234551668167114,
      "learning_rate": 0.00043449249996230173,
      "loss": 2.9754,
      "step": 81108
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.773209571838379,
      "learning_rate": 0.0004344888434822405,
      "loss": 2.881,
      "step": 81109
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2920563220977783,
      "learning_rate": 0.0004344851869771753,
      "loss": 3.1879,
      "step": 81110
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7907105684280396,
      "learning_rate": 0.00043448153044710683,
      "loss": 3.1975,
      "step": 81111
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6726579666137695,
      "learning_rate": 0.0004344778738920357,
      "loss": 2.9699,
      "step": 81112
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.696893572807312,
      "learning_rate": 0.00043447421731196255,
      "loss": 3.0254,
      "step": 81113
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5906270742416382,
      "learning_rate": 0.00043447056070688815,
      "loss": 3.1443,
      "step": 81114
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9508659839630127,
      "learning_rate": 0.0004344669040768131,
      "loss": 2.8042,
      "step": 81115
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0533320903778076,
      "learning_rate": 0.00043446324742173805,
      "loss": 2.9943,
      "step": 81116
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7727229595184326,
      "learning_rate": 0.00043445959074166386,
      "loss": 3.1873,
      "step": 81117
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8882354497909546,
      "learning_rate": 0.00043445593403659106,
      "loss": 2.8277,
      "step": 81118
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.599373698234558,
      "learning_rate": 0.0004344522773065203,
      "loss": 2.8905,
      "step": 81119
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.861510992050171,
      "learning_rate": 0.00043444862055145233,
      "loss": 3.0332,
      "step": 81120
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.151212215423584,
      "learning_rate": 0.00043444496377138787,
      "loss": 2.8676,
      "step": 81121
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.109102249145508,
      "learning_rate": 0.00043444130696632756,
      "loss": 3.1449,
      "step": 81122
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6093593835830688,
      "learning_rate": 0.00043443765013627205,
      "loss": 2.8383,
      "step": 81123
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7684842348098755,
      "learning_rate": 0.0004344339932812221,
      "loss": 2.9387,
      "step": 81124
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.187427520751953,
      "learning_rate": 0.00043443033640117826,
      "loss": 3.3974,
      "step": 81125
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5608994960784912,
      "learning_rate": 0.00043442667949614134,
      "loss": 3.1297,
      "step": 81126
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5149502754211426,
      "learning_rate": 0.00043442302256611193,
      "loss": 3.071,
      "step": 81127
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.591436743736267,
      "learning_rate": 0.00043441936561109073,
      "loss": 2.8947,
      "step": 81128
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4382833242416382,
      "learning_rate": 0.00043441570863107847,
      "loss": 3.1545,
      "step": 81129
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6054565906524658,
      "learning_rate": 0.00043441205162607577,
      "loss": 2.7436,
      "step": 81130
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5556397438049316,
      "learning_rate": 0.0004344083945960833,
      "loss": 3.0077,
      "step": 81131
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.757562279701233,
      "learning_rate": 0.0004344047375411018,
      "loss": 3.0874,
      "step": 81132
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7572226524353027,
      "learning_rate": 0.00043440108046113196,
      "loss": 3.1671,
      "step": 81133
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2562057971954346,
      "learning_rate": 0.0004343974233561744,
      "loss": 3.0907,
      "step": 81134
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.471477746963501,
      "learning_rate": 0.00043439376622622986,
      "loss": 2.8459,
      "step": 81135
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4763299226760864,
      "learning_rate": 0.00043439010907129886,
      "loss": 3.1125,
      "step": 81136
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.456679344177246,
      "learning_rate": 0.0004343864518913823,
      "loss": 2.7563,
      "step": 81137
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.087411642074585,
      "learning_rate": 0.0004343827946864807,
      "loss": 2.9019,
      "step": 81138
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8694688081741333,
      "learning_rate": 0.00043437913745659486,
      "loss": 3.3317,
      "step": 81139
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.3031280040740967,
      "learning_rate": 0.00043437548020172536,
      "loss": 3.1281,
      "step": 81140
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.710963726043701,
      "learning_rate": 0.000434371822921873,
      "loss": 2.9214,
      "step": 81141
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.682807207107544,
      "learning_rate": 0.0004343681656170383,
      "loss": 2.9256,
      "step": 81142
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8767931461334229,
      "learning_rate": 0.000434364508287222,
      "loss": 3.0037,
      "step": 81143
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.7432870864868164,
      "learning_rate": 0.0004343608509324249,
      "loss": 3.108,
      "step": 81144
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.1473584175109863,
      "learning_rate": 0.0004343571935526475,
      "loss": 3.0913,
      "step": 81145
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.818708062171936,
      "learning_rate": 0.0004343535361478906,
      "loss": 2.9669,
      "step": 81146
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8615596294403076,
      "learning_rate": 0.0004343498787181549,
      "loss": 3.3772,
      "step": 81147
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5416109561920166,
      "learning_rate": 0.0004343462212634409,
      "loss": 3.1007,
      "step": 81148
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0456454753875732,
      "learning_rate": 0.00043434256378374945,
      "loss": 2.9783,
      "step": 81149
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.183305263519287,
      "learning_rate": 0.0004343389062790812,
      "loss": 3.071,
      "step": 81150
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.56422758102417,
      "learning_rate": 0.0004343352487494368,
      "loss": 2.9734,
      "step": 81151
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.035478115081787,
      "learning_rate": 0.00043433159119481694,
      "loss": 3.1253,
      "step": 81152
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9063756465911865,
      "learning_rate": 0.0004343279336152223,
      "loss": 3.1995,
      "step": 81153
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.787379264831543,
      "learning_rate": 0.0004343242760106536,
      "loss": 3.0405,
      "step": 81154
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.037400245666504,
      "learning_rate": 0.00043432061838111143,
      "loss": 3.2872,
      "step": 81155
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6568338871002197,
      "learning_rate": 0.00043431696072659655,
      "loss": 3.2401,
      "step": 81156
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.4559943675994873,
      "learning_rate": 0.00043431330304710957,
      "loss": 3.0141,
      "step": 81157
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.635727047920227,
      "learning_rate": 0.0004343096453426512,
      "loss": 2.8758,
      "step": 81158
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.184147357940674,
      "learning_rate": 0.00043430598761322217,
      "loss": 3.1547,
      "step": 81159
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.259139060974121,
      "learning_rate": 0.00043430232985882316,
      "loss": 3.0263,
      "step": 81160
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4788264036178589,
      "learning_rate": 0.00043429867207945476,
      "loss": 3.1399,
      "step": 81161
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6793053150177002,
      "learning_rate": 0.0004342950142751177,
      "loss": 2.8176,
      "step": 81162
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.6427090167999268,
      "learning_rate": 0.0004342913564458127,
      "loss": 2.7606,
      "step": 81163
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9909895658493042,
      "learning_rate": 0.00043428769859154035,
      "loss": 3.1427,
      "step": 81164
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.866574764251709,
      "learning_rate": 0.00043428404071230143,
      "loss": 2.6946,
      "step": 81165
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7530517578125,
      "learning_rate": 0.0004342803828080966,
      "loss": 3.0028,
      "step": 81166
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.1467466354370117,
      "learning_rate": 0.0004342767248789264,
      "loss": 3.0659,
      "step": 81167
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.9162561893463135,
      "learning_rate": 0.00043427306692479177,
      "loss": 2.8016,
      "step": 81168
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.3298534154891968,
      "learning_rate": 0.00043426940894569316,
      "loss": 3.104,
      "step": 81169
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.998862862586975,
      "learning_rate": 0.0004342657509416313,
      "loss": 2.998,
      "step": 81170
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.1874582767486572,
      "learning_rate": 0.0004342620929126069,
      "loss": 2.8648,
      "step": 81171
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7668935060501099,
      "learning_rate": 0.00043425843485862066,
      "loss": 3.0417,
      "step": 81172
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.8023033142089844,
      "learning_rate": 0.0004342547767796733,
      "loss": 2.9626,
      "step": 81173
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5648157596588135,
      "learning_rate": 0.00043425111867576537,
      "loss": 3.14,
      "step": 81174
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6635384559631348,
      "learning_rate": 0.0004342474605468977,
      "loss": 2.9656,
      "step": 81175
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5640949010849,
      "learning_rate": 0.00043424380239307083,
      "loss": 3.1234,
      "step": 81176
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5357249975204468,
      "learning_rate": 0.0004342401442142855,
      "loss": 2.9104,
      "step": 81177
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6030224561691284,
      "learning_rate": 0.0004342364860105424,
      "loss": 3.1946,
      "step": 81178
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9535454511642456,
      "learning_rate": 0.00043423282778184226,
      "loss": 3.0475,
      "step": 81179
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.695027470588684,
      "learning_rate": 0.00043422916952818566,
      "loss": 2.8778,
      "step": 81180
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.736791729927063,
      "learning_rate": 0.0004342255112495733,
      "loss": 3.2679,
      "step": 81181
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.640275001525879,
      "learning_rate": 0.0004342218529460059,
      "loss": 2.8721,
      "step": 81182
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.3548613786697388,
      "learning_rate": 0.0004342181946174841,
      "loss": 2.9915,
      "step": 81183
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5348780155181885,
      "learning_rate": 0.00043421453626400866,
      "loss": 3.1063,
      "step": 81184
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0469188690185547,
      "learning_rate": 0.0004342108778855802,
      "loss": 2.8581,
      "step": 81185
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8605140447616577,
      "learning_rate": 0.0004342072194821994,
      "loss": 3.1409,
      "step": 81186
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.023926019668579,
      "learning_rate": 0.00043420356105386686,
      "loss": 2.9943,
      "step": 81187
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.571150541305542,
      "learning_rate": 0.00043419990260058343,
      "loss": 2.9488,
      "step": 81188
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.008704900741577,
      "learning_rate": 0.00043419624412234975,
      "loss": 2.8974,
      "step": 81189
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5957987308502197,
      "learning_rate": 0.00043419258561916635,
      "loss": 3.3174,
      "step": 81190
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.554517149925232,
      "learning_rate": 0.00043418892709103405,
      "loss": 3.286,
      "step": 81191
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2070884704589844,
      "learning_rate": 0.00043418526853795356,
      "loss": 3.2433,
      "step": 81192
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5824302434921265,
      "learning_rate": 0.0004341816099599254,
      "loss": 2.7264,
      "step": 81193
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.1044344902038574,
      "learning_rate": 0.00043417795135695036,
      "loss": 2.9181,
      "step": 81194
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8564709424972534,
      "learning_rate": 0.00043417429272902907,
      "loss": 3.0203,
      "step": 81195
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0713930130004883,
      "learning_rate": 0.0004341706340761624,
      "loss": 3.2707,
      "step": 81196
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9430509805679321,
      "learning_rate": 0.0004341669753983508,
      "loss": 3.0184,
      "step": 81197
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.591465473175049,
      "learning_rate": 0.00043416331669559493,
      "loss": 3.0629,
      "step": 81198
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5526983737945557,
      "learning_rate": 0.00043415965796789576,
      "loss": 3.0313,
      "step": 81199
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8313103914260864,
      "learning_rate": 0.0004341559992152536,
      "loss": 2.8073,
      "step": 81200
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.6787095069885254,
      "learning_rate": 0.00043415234043766936,
      "loss": 2.9079,
      "step": 81201
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.1542985439300537,
      "learning_rate": 0.0004341486816351437,
      "loss": 2.7924,
      "step": 81202
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6140775680541992,
      "learning_rate": 0.0004341450228076772,
      "loss": 3.126,
      "step": 81203
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5563851594924927,
      "learning_rate": 0.0004341413639552707,
      "loss": 3.0554,
      "step": 81204
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7631629705429077,
      "learning_rate": 0.0004341377050779248,
      "loss": 2.9803,
      "step": 81205
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.9693706035614014,
      "learning_rate": 0.00043413404617564005,
      "loss": 2.8883,
      "step": 81206
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6900489330291748,
      "learning_rate": 0.0004341303872484173,
      "loss": 3.1045,
      "step": 81207
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7896116971969604,
      "learning_rate": 0.00043412672829625724,
      "loss": 3.3795,
      "step": 81208
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.312662124633789,
      "learning_rate": 0.0004341230693191604,
      "loss": 2.8423,
      "step": 81209
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6008625030517578,
      "learning_rate": 0.00043411941031712756,
      "loss": 3.036,
      "step": 81210
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7077573537826538,
      "learning_rate": 0.0004341157512901595,
      "loss": 2.9817,
      "step": 81211
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.030646562576294,
      "learning_rate": 0.0004341120922382567,
      "loss": 3.1029,
      "step": 81212
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6163182258605957,
      "learning_rate": 0.0004341084331614199,
      "loss": 3.1278,
      "step": 81213
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9147884845733643,
      "learning_rate": 0.0004341047740596499,
      "loss": 3.109,
      "step": 81214
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.1652796268463135,
      "learning_rate": 0.0004341011149329472,
      "loss": 2.9769,
      "step": 81215
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5384169816970825,
      "learning_rate": 0.00043409745578131264,
      "loss": 3.3127,
      "step": 81216
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4635992050170898,
      "learning_rate": 0.0004340937966047468,
      "loss": 3.3103,
      "step": 81217
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.722076416015625,
      "learning_rate": 0.00043409013740325044,
      "loss": 2.9627,
      "step": 81218
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9272513389587402,
      "learning_rate": 0.00043408647817682407,
      "loss": 2.8782,
      "step": 81219
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8454340696334839,
      "learning_rate": 0.00043408281892546864,
      "loss": 3.2327,
      "step": 81220
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5123889446258545,
      "learning_rate": 0.0004340791596491846,
      "loss": 2.9613,
      "step": 81221
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.3933372497558594,
      "learning_rate": 0.00043407550034797276,
      "loss": 3.146,
      "step": 81222
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7193230390548706,
      "learning_rate": 0.0004340718410218337,
      "loss": 3.0485,
      "step": 81223
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7498390674591064,
      "learning_rate": 0.0004340681816707682,
      "loss": 3.1808,
      "step": 81224
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5386226177215576,
      "learning_rate": 0.00043406452229477683,
      "loss": 3.1033,
      "step": 81225
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4987380504608154,
      "learning_rate": 0.0004340608628938604,
      "loss": 3.152,
      "step": 81226
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7256605625152588,
      "learning_rate": 0.00043405720346801946,
      "loss": 3.1679,
      "step": 81227
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.788808822631836,
      "learning_rate": 0.0004340535440172548,
      "loss": 2.7402,
      "step": 81228
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9093865156173706,
      "learning_rate": 0.0004340498845415671,
      "loss": 2.8859,
      "step": 81229
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.727662205696106,
      "learning_rate": 0.00043404622504095695,
      "loss": 2.8861,
      "step": 81230
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7722480297088623,
      "learning_rate": 0.000434042565515425,
      "loss": 2.703,
      "step": 81231
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.318859338760376,
      "learning_rate": 0.00043403890596497213,
      "loss": 2.856,
      "step": 81232
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.179961919784546,
      "learning_rate": 0.00043403524638959887,
      "loss": 3.0113,
      "step": 81233
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.602837324142456,
      "learning_rate": 0.00043403158678930587,
      "loss": 3.0389,
      "step": 81234
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5357098579406738,
      "learning_rate": 0.00043402792716409395,
      "loss": 2.992,
      "step": 81235
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6855813264846802,
      "learning_rate": 0.00043402426751396365,
      "loss": 2.8959,
      "step": 81236
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.203789472579956,
      "learning_rate": 0.0004340206078389157,
      "loss": 3.0172,
      "step": 81237
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.964041829109192,
      "learning_rate": 0.00043401694813895083,
      "loss": 3.1411,
      "step": 81238
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.674081563949585,
      "learning_rate": 0.0004340132884140696,
      "loss": 3.1116,
      "step": 81239
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.627794623374939,
      "learning_rate": 0.0004340096286642729,
      "loss": 2.8906,
      "step": 81240
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8965071439743042,
      "learning_rate": 0.00043400596888956114,
      "loss": 3.1165,
      "step": 81241
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.60683274269104,
      "learning_rate": 0.00043400230908993524,
      "loss": 2.902,
      "step": 81242
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.1891164779663086,
      "learning_rate": 0.00043399864926539573,
      "loss": 2.9878,
      "step": 81243
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.507101535797119,
      "learning_rate": 0.0004339949894159434,
      "loss": 3.0424,
      "step": 81244
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8619784116744995,
      "learning_rate": 0.00043399132954157887,
      "loss": 3.0329,
      "step": 81245
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.179260730743408,
      "learning_rate": 0.0004339876696423028,
      "loss": 3.0769,
      "step": 81246
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8294278383255005,
      "learning_rate": 0.0004339840097181158,
      "loss": 2.8703,
      "step": 81247
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6454789638519287,
      "learning_rate": 0.0004339803497690188,
      "loss": 3.0057,
      "step": 81248
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7026443481445312,
      "learning_rate": 0.0004339766897950122,
      "loss": 2.9766,
      "step": 81249
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0552821159362793,
      "learning_rate": 0.0004339730297960969,
      "loss": 2.8518,
      "step": 81250
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4181249141693115,
      "learning_rate": 0.0004339693697722734,
      "loss": 3.074,
      "step": 81251
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.729455828666687,
      "learning_rate": 0.0004339657097235425,
      "loss": 3.1045,
      "step": 81252
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6736873388290405,
      "learning_rate": 0.0004339620496499049,
      "loss": 3.1044,
      "step": 81253
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6524763107299805,
      "learning_rate": 0.00043395838955136113,
      "loss": 3.0548,
      "step": 81254
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6514043807983398,
      "learning_rate": 0.000433954729427912,
      "loss": 2.7943,
      "step": 81255
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9084861278533936,
      "learning_rate": 0.0004339510692795582,
      "loss": 2.8669,
      "step": 81256
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9399561882019043,
      "learning_rate": 0.00043394740910630033,
      "loss": 2.9673,
      "step": 81257
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.760667085647583,
      "learning_rate": 0.0004339437489081391,
      "loss": 2.9639,
      "step": 81258
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.876709222793579,
      "learning_rate": 0.0004339400886850752,
      "loss": 2.9085,
      "step": 81259
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5601171255111694,
      "learning_rate": 0.00043393642843710936,
      "loss": 2.9101,
      "step": 81260
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.335988759994507,
      "learning_rate": 0.00043393276816424217,
      "loss": 3.1476,
      "step": 81261
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0880391597747803,
      "learning_rate": 0.0004339291078664743,
      "loss": 3.113,
      "step": 81262
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6565982103347778,
      "learning_rate": 0.0004339254475438066,
      "loss": 3.0499,
      "step": 81263
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5686782598495483,
      "learning_rate": 0.00043392178719623954,
      "loss": 3.2046,
      "step": 81264
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6845898628234863,
      "learning_rate": 0.00043391812682377396,
      "loss": 3.0693,
      "step": 81265
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7155121564865112,
      "learning_rate": 0.0004339144664264104,
      "loss": 2.8847,
      "step": 81266
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4309442043304443,
      "learning_rate": 0.0004339108060041496,
      "loss": 3.24,
      "step": 81267
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7655376195907593,
      "learning_rate": 0.0004339071455569923,
      "loss": 3.0514,
      "step": 81268
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4123984575271606,
      "learning_rate": 0.0004339034850849392,
      "loss": 2.842,
      "step": 81269
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9536904096603394,
      "learning_rate": 0.00043389982458799084,
      "loss": 2.7911,
      "step": 81270
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.690413475036621,
      "learning_rate": 0.000433896164066148,
      "loss": 3.2706,
      "step": 81271
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6462241411209106,
      "learning_rate": 0.00043389250351941124,
      "loss": 2.8622,
      "step": 81272
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.710012435913086,
      "learning_rate": 0.00043388884294778146,
      "loss": 3.1733,
      "step": 81273
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.4286770820617676,
      "learning_rate": 0.00043388518235125916,
      "loss": 2.8873,
      "step": 81274
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8291373252868652,
      "learning_rate": 0.0004338815217298451,
      "loss": 3.1691,
      "step": 81275
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.178400754928589,
      "learning_rate": 0.00043387786108353986,
      "loss": 3.1129,
      "step": 81276
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.062812089920044,
      "learning_rate": 0.00043387420041234427,
      "loss": 3.2304,
      "step": 81277
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.543572187423706,
      "learning_rate": 0.000433870539716259,
      "loss": 2.9283,
      "step": 81278
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.1971912384033203,
      "learning_rate": 0.0004338668789952845,
      "loss": 3.0436,
      "step": 81279
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0100443363189697,
      "learning_rate": 0.00043386321824942167,
      "loss": 3.0419,
      "step": 81280
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7302532196044922,
      "learning_rate": 0.0004338595574786713,
      "loss": 2.8664,
      "step": 81281
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8227944374084473,
      "learning_rate": 0.0004338558966830338,
      "loss": 2.9773,
      "step": 81282
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.026395797729492,
      "learning_rate": 0.0004338522358625099,
      "loss": 2.8026,
      "step": 81283
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.4180870056152344,
      "learning_rate": 0.0004338485750171005,
      "loss": 2.8802,
      "step": 81284
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.261916399002075,
      "learning_rate": 0.00043384491414680596,
      "loss": 3.001,
      "step": 81285
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.3272993564605713,
      "learning_rate": 0.00043384125325162716,
      "loss": 3.3216,
      "step": 81286
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8852120637893677,
      "learning_rate": 0.00043383759233156483,
      "loss": 2.9011,
      "step": 81287
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8759998083114624,
      "learning_rate": 0.0004338339313866195,
      "loss": 3.0616,
      "step": 81288
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.551453709602356,
      "learning_rate": 0.0004338302704167919,
      "loss": 2.8455,
      "step": 81289
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6765567064285278,
      "learning_rate": 0.00043382660942208284,
      "loss": 3.0964,
      "step": 81290
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.286116123199463,
      "learning_rate": 0.00043382294840249276,
      "loss": 2.9846,
      "step": 81291
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6499826908111572,
      "learning_rate": 0.0004338192873580225,
      "loss": 3.0072,
      "step": 81292
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0341572761535645,
      "learning_rate": 0.00043381562628867277,
      "loss": 2.9903,
      "step": 81293
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.0019376277923584,
      "learning_rate": 0.0004338119651944441,
      "loss": 2.9753,
      "step": 81294
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.000648021697998,
      "learning_rate": 0.0004338083040753372,
      "loss": 2.8448,
      "step": 81295
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8940366506576538,
      "learning_rate": 0.000433804642931353,
      "loss": 3.0365,
      "step": 81296
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.333064317703247,
      "learning_rate": 0.0004338009817624919,
      "loss": 3.0125,
      "step": 81297
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6572849750518799,
      "learning_rate": 0.0004337973205687547,
      "loss": 3.215,
      "step": 81298
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7626572847366333,
      "learning_rate": 0.00043379365935014206,
      "loss": 3.2415,
      "step": 81299
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7794491052627563,
      "learning_rate": 0.0004337899981066546,
      "loss": 2.9206,
      "step": 81300
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.415210723876953,
      "learning_rate": 0.00043378633683829307,
      "loss": 2.8121,
      "step": 81301
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5239280462265015,
      "learning_rate": 0.0004337826755450581,
      "loss": 3.1024,
      "step": 81302
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7037776708602905,
      "learning_rate": 0.0004337790142269504,
      "loss": 3.0701,
      "step": 81303
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.228259801864624,
      "learning_rate": 0.0004337753528839707,
      "loss": 3.2656,
      "step": 81304
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6007664203643799,
      "learning_rate": 0.00043377169151611965,
      "loss": 2.7244,
      "step": 81305
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5630015134811401,
      "learning_rate": 0.000433768030123398,
      "loss": 3.0567,
      "step": 81306
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8494879007339478,
      "learning_rate": 0.0004337643687058062,
      "loss": 3.0162,
      "step": 81307
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6578696966171265,
      "learning_rate": 0.0004337607072633451,
      "loss": 3.0294,
      "step": 81308
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7417985200881958,
      "learning_rate": 0.00043375704579601536,
      "loss": 2.8654,
      "step": 81309
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9065982103347778,
      "learning_rate": 0.00043375338430381776,
      "loss": 2.8557,
      "step": 81310
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.814023733139038,
      "learning_rate": 0.0004337497227867528,
      "loss": 2.8733,
      "step": 81311
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8369783163070679,
      "learning_rate": 0.00043374606124482127,
      "loss": 2.9913,
      "step": 81312
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.508010745048523,
      "learning_rate": 0.0004337423996780238,
      "loss": 3.1509,
      "step": 81313
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4606804847717285,
      "learning_rate": 0.00043373873808636105,
      "loss": 2.8481,
      "step": 81314
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8868422508239746,
      "learning_rate": 0.0004337350764698338,
      "loss": 2.9184,
      "step": 81315
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.668166160583496,
      "learning_rate": 0.00043373141482844267,
      "loss": 3.0665,
      "step": 81316
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.969214916229248,
      "learning_rate": 0.0004337277531621883,
      "loss": 3.0263,
      "step": 81317
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6738523244857788,
      "learning_rate": 0.0004337240914710715,
      "loss": 3.0561,
      "step": 81318
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.35935115814209,
      "learning_rate": 0.00043372042975509286,
      "loss": 2.9598,
      "step": 81319
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.653489589691162,
      "learning_rate": 0.000433716768014253,
      "loss": 2.9025,
      "step": 81320
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.585815668106079,
      "learning_rate": 0.0004337131062485527,
      "loss": 2.9488,
      "step": 81321
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.3431005477905273,
      "learning_rate": 0.0004337094444579926,
      "loss": 3.3531,
      "step": 81322
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5393948554992676,
      "learning_rate": 0.0004337057826425734,
      "loss": 3.2086,
      "step": 81323
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.61149263381958,
      "learning_rate": 0.0004337021208022958,
      "loss": 2.7802,
      "step": 81324
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9977107048034668,
      "learning_rate": 0.0004336984589371604,
      "loss": 2.9934,
      "step": 81325
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5728094577789307,
      "learning_rate": 0.000433694797047168,
      "loss": 3.0831,
      "step": 81326
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9241517782211304,
      "learning_rate": 0.00043369113513231917,
      "loss": 3.1974,
      "step": 81327
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8257399797439575,
      "learning_rate": 0.00043368747319261463,
      "loss": 2.9091,
      "step": 81328
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7483018636703491,
      "learning_rate": 0.00043368381122805503,
      "loss": 3.0596,
      "step": 81329
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6616935729980469,
      "learning_rate": 0.00043368014923864123,
      "loss": 3.0608,
      "step": 81330
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.91844642162323,
      "learning_rate": 0.00043367648722437356,
      "loss": 2.9485,
      "step": 81331
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6832849979400635,
      "learning_rate": 0.00043367282518525305,
      "loss": 2.8591,
      "step": 81332
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7421404123306274,
      "learning_rate": 0.00043366916312128023,
      "loss": 3.1551,
      "step": 81333
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.936926245689392,
      "learning_rate": 0.0004336655010324557,
      "loss": 3.0812,
      "step": 81334
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9389722347259521,
      "learning_rate": 0.00043366183891878026,
      "loss": 2.8673,
      "step": 81335
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6073616743087769,
      "learning_rate": 0.0004336581767802547,
      "loss": 3.0552,
      "step": 81336
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8213872909545898,
      "learning_rate": 0.0004336545146168794,
      "loss": 2.8521,
      "step": 81337
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6607016324996948,
      "learning_rate": 0.0004336508524286552,
      "loss": 3.0776,
      "step": 81338
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7940638065338135,
      "learning_rate": 0.000433647190215583,
      "loss": 3.0893,
      "step": 81339
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.600052833557129,
      "learning_rate": 0.000433643527977663,
      "loss": 2.8492,
      "step": 81340
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.3970367908477783,
      "learning_rate": 0.00043363986571489624,
      "loss": 3.1156,
      "step": 81341
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0474443435668945,
      "learning_rate": 0.00043363620342728337,
      "loss": 3.2044,
      "step": 81342
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8414281606674194,
      "learning_rate": 0.0004336325411148249,
      "loss": 2.8858,
      "step": 81343
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8147813081741333,
      "learning_rate": 0.00043362887877752165,
      "loss": 2.893,
      "step": 81344
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.1907505989074707,
      "learning_rate": 0.0004336252164153744,
      "loss": 3.0839,
      "step": 81345
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0960562229156494,
      "learning_rate": 0.00043362155402838353,
      "loss": 2.9273,
      "step": 81346
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5671042203903198,
      "learning_rate": 0.0004336178916165499,
      "loss": 2.9942,
      "step": 81347
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.480926036834717,
      "learning_rate": 0.00043361422917987427,
      "loss": 2.9717,
      "step": 81348
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.435647487640381,
      "learning_rate": 0.0004336105667183572,
      "loss": 2.9098,
      "step": 81349
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.521085500717163,
      "learning_rate": 0.00043360690423199934,
      "loss": 3.0352,
      "step": 81350
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.756188988685608,
      "learning_rate": 0.00043360324172080154,
      "loss": 3.1034,
      "step": 81351
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.530007839202881,
      "learning_rate": 0.00043359957918476434,
      "loss": 2.9365,
      "step": 81352
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6027119159698486,
      "learning_rate": 0.0004335959166238884,
      "loss": 2.8247,
      "step": 81353
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.671873688697815,
      "learning_rate": 0.00043359225403817456,
      "loss": 3.0453,
      "step": 81354
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7875593900680542,
      "learning_rate": 0.00043358859142762334,
      "loss": 3.0939,
      "step": 81355
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2426369190216064,
      "learning_rate": 0.00043358492879223544,
      "loss": 3.0185,
      "step": 81356
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.884435772895813,
      "learning_rate": 0.0004335812661320116,
      "loss": 3.1536,
      "step": 81357
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.1943342685699463,
      "learning_rate": 0.00043357760344695255,
      "loss": 3.254,
      "step": 81358
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.6066503524780273,
      "learning_rate": 0.00043357394073705887,
      "loss": 3.0591,
      "step": 81359
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9298211336135864,
      "learning_rate": 0.0004335702780023312,
      "loss": 2.9727,
      "step": 81360
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5342719554901123,
      "learning_rate": 0.00043356661524277033,
      "loss": 3.1418,
      "step": 81361
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.409223794937134,
      "learning_rate": 0.00043356295245837695,
      "loss": 3.1757,
      "step": 81362
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.2071914672851562,
      "learning_rate": 0.00043355928964915164,
      "loss": 3.1975,
      "step": 81363
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.7952637672424316,
      "learning_rate": 0.0004335556268150952,
      "loss": 2.9413,
      "step": 81364
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8681285381317139,
      "learning_rate": 0.0004335519639562082,
      "loss": 3.0122,
      "step": 81365
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.52490496635437,
      "learning_rate": 0.00043354830107249144,
      "loss": 3.0372,
      "step": 81366
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.1695566177368164,
      "learning_rate": 0.0004335446381639454,
      "loss": 3.1986,
      "step": 81367
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6562470197677612,
      "learning_rate": 0.00043354097523057094,
      "loss": 3.2728,
      "step": 81368
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.184983015060425,
      "learning_rate": 0.0004335373122723688,
      "loss": 2.8189,
      "step": 81369
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.4376583099365234,
      "learning_rate": 0.0004335336492893394,
      "loss": 2.9574,
      "step": 81370
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.963881015777588,
      "learning_rate": 0.00043352998628148366,
      "loss": 2.9594,
      "step": 81371
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7188403606414795,
      "learning_rate": 0.00043352632324880223,
      "loss": 3.0919,
      "step": 81372
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5112935304641724,
      "learning_rate": 0.0004335226601912956,
      "loss": 2.7295,
      "step": 81373
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6189345121383667,
      "learning_rate": 0.0004335189971089646,
      "loss": 3.0824,
      "step": 81374
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.285111904144287,
      "learning_rate": 0.00043351533400181,
      "loss": 3.1613,
      "step": 81375
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7786757946014404,
      "learning_rate": 0.0004335116708698323,
      "loss": 3.0688,
      "step": 81376
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.815812349319458,
      "learning_rate": 0.0004335080077130323,
      "loss": 2.8708,
      "step": 81377
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.112015724182129,
      "learning_rate": 0.0004335043445314106,
      "loss": 2.9322,
      "step": 81378
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7398500442504883,
      "learning_rate": 0.00043350068132496804,
      "loss": 2.9331,
      "step": 81379
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6836284399032593,
      "learning_rate": 0.0004334970180937051,
      "loss": 3.1391,
      "step": 81380
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.422682523727417,
      "learning_rate": 0.00043349335483762247,
      "loss": 3.2541,
      "step": 81381
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.6860439777374268,
      "learning_rate": 0.0004334896915567211,
      "loss": 3.0019,
      "step": 81382
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4299447536468506,
      "learning_rate": 0.0004334860282510012,
      "loss": 2.9147,
      "step": 81383
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9114859104156494,
      "learning_rate": 0.0004334823649204639,
      "loss": 2.9284,
      "step": 81384
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9518526792526245,
      "learning_rate": 0.00043347870156510977,
      "loss": 2.9914,
      "step": 81385
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0338070392608643,
      "learning_rate": 0.0004334750381849393,
      "loss": 3.205,
      "step": 81386
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2869224548339844,
      "learning_rate": 0.0004334713747799533,
      "loss": 3.0955,
      "step": 81387
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8900156021118164,
      "learning_rate": 0.0004334677113501526,
      "loss": 3.0787,
      "step": 81388
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.130542039871216,
      "learning_rate": 0.00043346404789553763,
      "loss": 2.9565,
      "step": 81389
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6990547180175781,
      "learning_rate": 0.0004334603844161091,
      "loss": 3.2677,
      "step": 81390
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8407173156738281,
      "learning_rate": 0.0004334567209118679,
      "loss": 3.0629,
      "step": 81391
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.335444450378418,
      "learning_rate": 0.0004334530573828145,
      "loss": 3.0132,
      "step": 81392
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.4899110794067383,
      "learning_rate": 0.0004334493938289497,
      "loss": 3.0664,
      "step": 81393
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4876878261566162,
      "learning_rate": 0.0004334457302502741,
      "loss": 3.0811,
      "step": 81394
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6078184843063354,
      "learning_rate": 0.00043344206664678843,
      "loss": 3.0138,
      "step": 81395
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.715492606163025,
      "learning_rate": 0.0004334384030184933,
      "loss": 3.1989,
      "step": 81396
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7845077514648438,
      "learning_rate": 0.0004334347393653896,
      "loss": 2.9707,
      "step": 81397
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9187510013580322,
      "learning_rate": 0.00043343107568747773,
      "loss": 2.9698,
      "step": 81398
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8699052333831787,
      "learning_rate": 0.0004334274119847586,
      "loss": 2.9982,
      "step": 81399
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.481356143951416,
      "learning_rate": 0.00043342374825723276,
      "loss": 2.9764,
      "step": 81400
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8131475448608398,
      "learning_rate": 0.0004334200845049009,
      "loss": 2.8977,
      "step": 81401
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.793461799621582,
      "learning_rate": 0.0004334164207277637,
      "loss": 3.0848,
      "step": 81402
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7871214151382446,
      "learning_rate": 0.00043341275692582203,
      "loss": 3.1526,
      "step": 81403
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.489856243133545,
      "learning_rate": 0.00043340909309907624,
      "loss": 3.0165,
      "step": 81404
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5639227628707886,
      "learning_rate": 0.0004334054292475272,
      "loss": 2.8847,
      "step": 81405
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5770823955535889,
      "learning_rate": 0.0004334017653711757,
      "loss": 3.1742,
      "step": 81406
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4293338060379028,
      "learning_rate": 0.00043339810147002215,
      "loss": 2.9303,
      "step": 81407
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8149476051330566,
      "learning_rate": 0.0004333944375440674,
      "loss": 3.1452,
      "step": 81408
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7549890279769897,
      "learning_rate": 0.0004333907735933122,
      "loss": 3.2845,
      "step": 81409
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5330750942230225,
      "learning_rate": 0.00043338710961775707,
      "loss": 3.0531,
      "step": 81410
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.6466591358184814,
      "learning_rate": 0.0004333834456174027,
      "loss": 3.1336,
      "step": 81411
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4967514276504517,
      "learning_rate": 0.00043337978159224997,
      "loss": 3.0614,
      "step": 81412
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.981364130973816,
      "learning_rate": 0.00043337611754229926,
      "loss": 3.4493,
      "step": 81413
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0973196029663086,
      "learning_rate": 0.0004333724534675515,
      "loss": 3.1859,
      "step": 81414
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.0347750186920166,
      "learning_rate": 0.0004333687893680073,
      "loss": 3.2481,
      "step": 81415
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0204131603240967,
      "learning_rate": 0.00043336512524366733,
      "loss": 3.0743,
      "step": 81416
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.1725456714630127,
      "learning_rate": 0.00043336146109453224,
      "loss": 2.9241,
      "step": 81417
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.3986384868621826,
      "learning_rate": 0.00043335779692060274,
      "loss": 3.2327,
      "step": 81418
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7875014543533325,
      "learning_rate": 0.0004333541327218795,
      "loss": 3.0363,
      "step": 81419
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4705801010131836,
      "learning_rate": 0.0004333504684983632,
      "loss": 3.1689,
      "step": 81420
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.062373638153076,
      "learning_rate": 0.0004333468042500545,
      "loss": 3.0969,
      "step": 81421
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9784432649612427,
      "learning_rate": 0.00043334313997695417,
      "loss": 3.029,
      "step": 81422
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.676713466644287,
      "learning_rate": 0.0004333394756790628,
      "loss": 3.1942,
      "step": 81423
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8735026121139526,
      "learning_rate": 0.0004333358113563812,
      "loss": 2.8742,
      "step": 81424
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.946991205215454,
      "learning_rate": 0.00043333214700890985,
      "loss": 2.8922,
      "step": 81425
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8950247764587402,
      "learning_rate": 0.00043332848263664947,
      "loss": 3.0372,
      "step": 81426
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7497336864471436,
      "learning_rate": 0.000433324818239601,
      "loss": 3.1548,
      "step": 81427
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.1474978923797607,
      "learning_rate": 0.0004333211538177648,
      "loss": 2.644,
      "step": 81428
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7828381061553955,
      "learning_rate": 0.0004333174893711417,
      "loss": 3.0303,
      "step": 81429
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5428847074508667,
      "learning_rate": 0.0004333138248997324,
      "loss": 3.0215,
      "step": 81430
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7041864395141602,
      "learning_rate": 0.0004333101604035375,
      "loss": 2.9222,
      "step": 81431
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2206993103027344,
      "learning_rate": 0.00043330649588255773,
      "loss": 2.7355,
      "step": 81432
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5953412055969238,
      "learning_rate": 0.00043330283133679383,
      "loss": 2.8177,
      "step": 81433
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.478761076927185,
      "learning_rate": 0.0004332991667662463,
      "loss": 3.1079,
      "step": 81434
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6574490070343018,
      "learning_rate": 0.000433295502170916,
      "loss": 2.9025,
      "step": 81435
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.838292121887207,
      "learning_rate": 0.00043329183755080365,
      "loss": 2.9113,
      "step": 81436
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5910043716430664,
      "learning_rate": 0.00043328817290590974,
      "loss": 2.9613,
      "step": 81437
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.3643906116485596,
      "learning_rate": 0.000433284508236235,
      "loss": 3.2513,
      "step": 81438
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5453358888626099,
      "learning_rate": 0.0004332808435417802,
      "loss": 3.1777,
      "step": 81439
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7162750959396362,
      "learning_rate": 0.00043327717882254594,
      "loss": 3.1907,
      "step": 81440
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2929959297180176,
      "learning_rate": 0.00043327351407853297,
      "loss": 2.9238,
      "step": 81441
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.9041378498077393,
      "learning_rate": 0.00043326984930974185,
      "loss": 3.1325,
      "step": 81442
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.3797686100006104,
      "learning_rate": 0.0004332661845161735,
      "loss": 2.8886,
      "step": 81443
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8505806922912598,
      "learning_rate": 0.0004332625196978284,
      "loss": 3.0438,
      "step": 81444
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.1946468353271484,
      "learning_rate": 0.0004332588548547072,
      "loss": 3.0353,
      "step": 81445
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4846144914627075,
      "learning_rate": 0.0004332551899868108,
      "loss": 3.1573,
      "step": 81446
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6351523399353027,
      "learning_rate": 0.00043325152509413967,
      "loss": 3.1453,
      "step": 81447
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5343810319900513,
      "learning_rate": 0.0004332478601766945,
      "loss": 2.9377,
      "step": 81448
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6184284687042236,
      "learning_rate": 0.00043324419523447616,
      "loss": 2.9328,
      "step": 81449
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4961211681365967,
      "learning_rate": 0.0004332405302674852,
      "loss": 3.2607,
      "step": 81450
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4938695430755615,
      "learning_rate": 0.0004332368652757222,
      "loss": 2.8592,
      "step": 81451
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.1639292240142822,
      "learning_rate": 0.000433233200259188,
      "loss": 3.0573,
      "step": 81452
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.71222722530365,
      "learning_rate": 0.0004332295352178833,
      "loss": 2.9116,
      "step": 81453
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2111077308654785,
      "learning_rate": 0.00043322587015180865,
      "loss": 2.9414,
      "step": 81454
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.894449234008789,
      "learning_rate": 0.0004332222050609648,
      "loss": 3.0339,
      "step": 81455
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.050830125808716,
      "learning_rate": 0.00043321853994535245,
      "loss": 3.1101,
      "step": 81456
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7868947982788086,
      "learning_rate": 0.00043321487480497224,
      "loss": 2.8762,
      "step": 81457
    },
    {
      "epoch": 1.06,
      "grad_norm": 4.120328903198242,
      "learning_rate": 0.00043321120963982484,
      "loss": 3.0131,
      "step": 81458
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.3663735389709473,
      "learning_rate": 0.000433207544449911,
      "loss": 2.9898,
      "step": 81459
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4984958171844482,
      "learning_rate": 0.0004332038792352314,
      "loss": 3.1885,
      "step": 81460
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.365703582763672,
      "learning_rate": 0.00043320021399578665,
      "loss": 2.8957,
      "step": 81461
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.6694250106811523,
      "learning_rate": 0.00043319654873157743,
      "loss": 3.1217,
      "step": 81462
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.3990514278411865,
      "learning_rate": 0.0004331928834426045,
      "loss": 3.011,
      "step": 81463
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5258793830871582,
      "learning_rate": 0.0004331892181288685,
      "loss": 2.9839,
      "step": 81464
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.6257269382476807,
      "learning_rate": 0.00043318555279037006,
      "loss": 3.0763,
      "step": 81465
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.1709482669830322,
      "learning_rate": 0.00043318188742710997,
      "loss": 3.1417,
      "step": 81466
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5730267763137817,
      "learning_rate": 0.0004331782220390889,
      "loss": 3.2201,
      "step": 81467
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.022695541381836,
      "learning_rate": 0.00043317455662630737,
      "loss": 3.1142,
      "step": 81468
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7251070737838745,
      "learning_rate": 0.0004331708911887662,
      "loss": 3.1192,
      "step": 81469
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.667656660079956,
      "learning_rate": 0.0004331672257264661,
      "loss": 3.2198,
      "step": 81470
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.4832310676574707,
      "learning_rate": 0.0004331635602394076,
      "loss": 2.931,
      "step": 81471
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.721086859703064,
      "learning_rate": 0.00043315989472759157,
      "loss": 2.9466,
      "step": 81472
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.081223249435425,
      "learning_rate": 0.0004331562291910186,
      "loss": 2.9831,
      "step": 81473
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5413919687271118,
      "learning_rate": 0.0004331525636296894,
      "loss": 2.8898,
      "step": 81474
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0169191360473633,
      "learning_rate": 0.00043314889804360455,
      "loss": 2.9107,
      "step": 81475
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.736504316329956,
      "learning_rate": 0.0004331452324327649,
      "loss": 3.0668,
      "step": 81476
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7937954664230347,
      "learning_rate": 0.0004331415667971709,
      "loss": 3.0503,
      "step": 81477
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8037638664245605,
      "learning_rate": 0.00043313790113682347,
      "loss": 2.9043,
      "step": 81478
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4859254360198975,
      "learning_rate": 0.0004331342354517232,
      "loss": 3.2151,
      "step": 81479
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7838501930236816,
      "learning_rate": 0.0004331305697418707,
      "loss": 2.8126,
      "step": 81480
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4457789659500122,
      "learning_rate": 0.0004331269040072667,
      "loss": 3.0849,
      "step": 81481
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6215646266937256,
      "learning_rate": 0.00043312323824791206,
      "loss": 3.0663,
      "step": 81482
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.5914108753204346,
      "learning_rate": 0.0004331195724638071,
      "loss": 2.932,
      "step": 81483
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.5583181381225586,
      "learning_rate": 0.00043311590665495275,
      "loss": 3.1758,
      "step": 81484
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6754494905471802,
      "learning_rate": 0.0004331122408213497,
      "loss": 3.1351,
      "step": 81485
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.9814517498016357,
      "learning_rate": 0.00043310857496299857,
      "loss": 3.2103,
      "step": 81486
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.7470476627349854,
      "learning_rate": 0.0004331049090798999,
      "loss": 2.8835,
      "step": 81487
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.4073503017425537,
      "learning_rate": 0.00043310124317205473,
      "loss": 3.031,
      "step": 81488
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8901818990707397,
      "learning_rate": 0.0004330975772394634,
      "loss": 2.8845,
      "step": 81489
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.4897732734680176,
      "learning_rate": 0.00043309391128212675,
      "loss": 3.0422,
      "step": 81490
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.6594948768615723,
      "learning_rate": 0.00043309024530004543,
      "loss": 3.1883,
      "step": 81491
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.06227970123291,
      "learning_rate": 0.00043308657929322006,
      "loss": 3.0728,
      "step": 81492
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.804524302482605,
      "learning_rate": 0.0004330829132616514,
      "loss": 2.6649,
      "step": 81493
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.397507667541504,
      "learning_rate": 0.00043307924720534024,
      "loss": 3.0386,
      "step": 81494
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2721707820892334,
      "learning_rate": 0.00043307558112428703,
      "loss": 3.2011,
      "step": 81495
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.567476987838745,
      "learning_rate": 0.00043307191501849255,
      "loss": 2.8296,
      "step": 81496
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.334113597869873,
      "learning_rate": 0.00043306824888795755,
      "loss": 2.8217,
      "step": 81497
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.785768747329712,
      "learning_rate": 0.00043306458273268257,
      "loss": 3.1982,
      "step": 81498
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.457489252090454,
      "learning_rate": 0.0004330609165526684,
      "loss": 2.9132,
      "step": 81499
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5713564157485962,
      "learning_rate": 0.0004330572503479157,
      "loss": 2.898,
      "step": 81500
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4844300746917725,
      "learning_rate": 0.00043305358411842515,
      "loss": 2.8957,
      "step": 81501
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.091996908187866,
      "learning_rate": 0.00043304991786419743,
      "loss": 2.8174,
      "step": 81502
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9051653146743774,
      "learning_rate": 0.00043304625158523325,
      "loss": 2.9431,
      "step": 81503
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.516516089439392,
      "learning_rate": 0.0004330425852815332,
      "loss": 3.07,
      "step": 81504
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5165698528289795,
      "learning_rate": 0.00043303891895309803,
      "loss": 3.0935,
      "step": 81505
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8974539041519165,
      "learning_rate": 0.00043303525259992846,
      "loss": 2.9509,
      "step": 81506
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5602681636810303,
      "learning_rate": 0.0004330315862220251,
      "loss": 2.9661,
      "step": 81507
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5996655225753784,
      "learning_rate": 0.0004330279198193887,
      "loss": 2.9369,
      "step": 81508
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5959416627883911,
      "learning_rate": 0.0004330242533920198,
      "loss": 3.1998,
      "step": 81509
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5983697175979614,
      "learning_rate": 0.00043302058693991925,
      "loss": 2.9607,
      "step": 81510
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.61385977268219,
      "learning_rate": 0.00043301692046308763,
      "loss": 3.0409,
      "step": 81511
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9979932308197021,
      "learning_rate": 0.0004330132539615257,
      "loss": 3.2544,
      "step": 81512
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.4119181632995605,
      "learning_rate": 0.00043300958743523407,
      "loss": 3.0414,
      "step": 81513
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6226295232772827,
      "learning_rate": 0.0004330059208842134,
      "loss": 3.2065,
      "step": 81514
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9620881080627441,
      "learning_rate": 0.00043300225430846446,
      "loss": 2.9614,
      "step": 81515
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.774788498878479,
      "learning_rate": 0.00043299858770798794,
      "loss": 3.0109,
      "step": 81516
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.3828152418136597,
      "learning_rate": 0.0004329949210827844,
      "loss": 3.0478,
      "step": 81517
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4288283586502075,
      "learning_rate": 0.0004329912544328546,
      "loss": 2.9877,
      "step": 81518
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7033534049987793,
      "learning_rate": 0.0004329875877581993,
      "loss": 3.0937,
      "step": 81519
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5622416734695435,
      "learning_rate": 0.00043298392105881896,
      "loss": 3.0939,
      "step": 81520
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8370920419692993,
      "learning_rate": 0.00043298025433471444,
      "loss": 2.9963,
      "step": 81521
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2600505352020264,
      "learning_rate": 0.0004329765875858865,
      "loss": 3.0117,
      "step": 81522
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7599074840545654,
      "learning_rate": 0.00043297292081233556,
      "loss": 3.1539,
      "step": 81523
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6533747911453247,
      "learning_rate": 0.0004329692540140625,
      "loss": 2.9625,
      "step": 81524
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.1827824115753174,
      "learning_rate": 0.00043296558719106796,
      "loss": 3.2112,
      "step": 81525
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8324644565582275,
      "learning_rate": 0.00043296192034335257,
      "loss": 3.139,
      "step": 81526
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5826035737991333,
      "learning_rate": 0.000432958253470917,
      "loss": 2.8798,
      "step": 81527
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8461275100708008,
      "learning_rate": 0.00043295458657376215,
      "loss": 3.0689,
      "step": 81528
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.174081325531006,
      "learning_rate": 0.0004329509196518884,
      "loss": 3.0907,
      "step": 81529
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9654271602630615,
      "learning_rate": 0.00043294725270529656,
      "loss": 2.9186,
      "step": 81530
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5651772022247314,
      "learning_rate": 0.00043294358573398744,
      "loss": 2.9499,
      "step": 81531
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8416850566864014,
      "learning_rate": 0.0004329399187379615,
      "loss": 3.1273,
      "step": 81532
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.443681001663208,
      "learning_rate": 0.00043293625171721947,
      "loss": 2.9369,
      "step": 81533
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5585445165634155,
      "learning_rate": 0.0004329325846717622,
      "loss": 3.0776,
      "step": 81534
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.7970504760742188,
      "learning_rate": 0.00043292891760159016,
      "loss": 3.0496,
      "step": 81535
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6819531917572021,
      "learning_rate": 0.0004329252505067041,
      "loss": 3.3874,
      "step": 81536
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9588418006896973,
      "learning_rate": 0.0004329215833871048,
      "loss": 2.965,
      "step": 81537
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.133774995803833,
      "learning_rate": 0.0004329179162427929,
      "loss": 2.9539,
      "step": 81538
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.240645408630371,
      "learning_rate": 0.000432914249073769,
      "loss": 2.9224,
      "step": 81539
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7934223413467407,
      "learning_rate": 0.0004329105818800338,
      "loss": 2.8859,
      "step": 81540
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2926127910614014,
      "learning_rate": 0.000432906914661588,
      "loss": 3.2981,
      "step": 81541
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.060847520828247,
      "learning_rate": 0.0004329032474184324,
      "loss": 3.0358,
      "step": 81542
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.443384051322937,
      "learning_rate": 0.0004328995801505675,
      "loss": 3.038,
      "step": 81543
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.696537971496582,
      "learning_rate": 0.0004328959128579941,
      "loss": 2.773,
      "step": 81544
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.3078930377960205,
      "learning_rate": 0.0004328922455407128,
      "loss": 2.9953,
      "step": 81545
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.3249878883361816,
      "learning_rate": 0.0004328885781987244,
      "loss": 2.9752,
      "step": 81546
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.4865493774414062,
      "learning_rate": 0.0004328849108320294,
      "loss": 3.0575,
      "step": 81547
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4666885137557983,
      "learning_rate": 0.00043288124344062864,
      "loss": 3.1913,
      "step": 81548
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.997438669204712,
      "learning_rate": 0.00043287757602452275,
      "loss": 2.8394,
      "step": 81549
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8765004873275757,
      "learning_rate": 0.0004328739085837124,
      "loss": 2.9119,
      "step": 81550
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6278823614120483,
      "learning_rate": 0.00043287024111819824,
      "loss": 3.2153,
      "step": 81551
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.301193952560425,
      "learning_rate": 0.0004328665736279811,
      "loss": 2.9708,
      "step": 81552
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.631967306137085,
      "learning_rate": 0.00043286290611306154,
      "loss": 2.9214,
      "step": 81553
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6879003047943115,
      "learning_rate": 0.0004328592385734402,
      "loss": 2.9138,
      "step": 81554
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.794382095336914,
      "learning_rate": 0.0004328555710091178,
      "loss": 3.1801,
      "step": 81555
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5277612209320068,
      "learning_rate": 0.00043285190342009513,
      "loss": 3.0431,
      "step": 81556
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2616801261901855,
      "learning_rate": 0.0004328482358063727,
      "loss": 3.0366,
      "step": 81557
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.152177572250366,
      "learning_rate": 0.00043284456816795137,
      "loss": 3.0106,
      "step": 81558
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9475020170211792,
      "learning_rate": 0.0004328409005048316,
      "loss": 3.1191,
      "step": 81559
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5422899723052979,
      "learning_rate": 0.00043283723281701425,
      "loss": 2.8027,
      "step": 81560
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7134413719177246,
      "learning_rate": 0.0004328335651045,
      "loss": 2.8331,
      "step": 81561
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9482661485671997,
      "learning_rate": 0.00043282989736728944,
      "loss": 3.2158,
      "step": 81562
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6749039888381958,
      "learning_rate": 0.00043282622960538324,
      "loss": 3.0138,
      "step": 81563
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5730366706848145,
      "learning_rate": 0.0004328225618187823,
      "loss": 2.7875,
      "step": 81564
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5012474060058594,
      "learning_rate": 0.00043281889400748703,
      "loss": 3.0678,
      "step": 81565
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4421426057815552,
      "learning_rate": 0.00043281522617149817,
      "loss": 2.9988,
      "step": 81566
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5008978843688965,
      "learning_rate": 0.0004328115583108166,
      "loss": 2.9683,
      "step": 81567
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6438268423080444,
      "learning_rate": 0.0004328078904254427,
      "loss": 2.9379,
      "step": 81568
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.635215163230896,
      "learning_rate": 0.00043280422251537734,
      "loss": 3.015,
      "step": 81569
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6599074602127075,
      "learning_rate": 0.0004328005545806213,
      "loss": 3.219,
      "step": 81570
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.798672080039978,
      "learning_rate": 0.00043279688662117496,
      "loss": 3.0219,
      "step": 81571
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8505159616470337,
      "learning_rate": 0.0004327932186370392,
      "loss": 2.8869,
      "step": 81572
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.044286012649536,
      "learning_rate": 0.00043278955062821473,
      "loss": 2.7305,
      "step": 81573
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.71743905544281,
      "learning_rate": 0.0004327858825947022,
      "loss": 3.1718,
      "step": 81574
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.089829206466675,
      "learning_rate": 0.00043278221453650216,
      "loss": 2.9938,
      "step": 81575
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.671980857849121,
      "learning_rate": 0.00043277854645361547,
      "loss": 3.2231,
      "step": 81576
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6030272245407104,
      "learning_rate": 0.0004327748783460428,
      "loss": 3.0813,
      "step": 81577
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0992588996887207,
      "learning_rate": 0.00043277121021378473,
      "loss": 2.8913,
      "step": 81578
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6702522039413452,
      "learning_rate": 0.00043276754205684193,
      "loss": 2.8997,
      "step": 81579
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.840201497077942,
      "learning_rate": 0.0004327638738752152,
      "loss": 2.998,
      "step": 81580
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5641342401504517,
      "learning_rate": 0.0004327602056689051,
      "loss": 3.2095,
      "step": 81581
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.4658331871032715,
      "learning_rate": 0.0004327565374379124,
      "loss": 2.9233,
      "step": 81582
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5058869123458862,
      "learning_rate": 0.0004327528691822378,
      "loss": 2.9816,
      "step": 81583
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.544340968132019,
      "learning_rate": 0.0004327492009018819,
      "loss": 2.8942,
      "step": 81584
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7572327852249146,
      "learning_rate": 0.0004327455325968454,
      "loss": 3.0143,
      "step": 81585
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.89152991771698,
      "learning_rate": 0.00043274186426712905,
      "loss": 2.9769,
      "step": 81586
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4367539882659912,
      "learning_rate": 0.0004327381959127334,
      "loss": 3.2185,
      "step": 81587
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5397770404815674,
      "learning_rate": 0.0004327345275336593,
      "loss": 3.1694,
      "step": 81588
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5785845518112183,
      "learning_rate": 0.0004327308591299073,
      "loss": 3.0401,
      "step": 81589
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.3425568342208862,
      "learning_rate": 0.00043272719070147815,
      "loss": 3.2592,
      "step": 81590
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.042396306991577,
      "learning_rate": 0.00043272352224837246,
      "loss": 2.8381,
      "step": 81591
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.763667345046997,
      "learning_rate": 0.00043271985377059104,
      "loss": 3.1242,
      "step": 81592
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.334803342819214,
      "learning_rate": 0.0004327161852681344,
      "loss": 2.8652,
      "step": 81593
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8873860836029053,
      "learning_rate": 0.0004327125167410034,
      "loss": 3.1031,
      "step": 81594
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4938030242919922,
      "learning_rate": 0.0004327088481891986,
      "loss": 3.081,
      "step": 81595
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7736600637435913,
      "learning_rate": 0.0004327051796127207,
      "loss": 2.8136,
      "step": 81596
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8589696884155273,
      "learning_rate": 0.0004327015110115704,
      "loss": 2.9477,
      "step": 81597
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.701668381690979,
      "learning_rate": 0.00043269784238574846,
      "loss": 2.9576,
      "step": 81598
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7057009935379028,
      "learning_rate": 0.00043269417373525547,
      "loss": 3.1434,
      "step": 81599
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8478188514709473,
      "learning_rate": 0.00043269050506009206,
      "loss": 3.0242,
      "step": 81600
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.677522897720337,
      "learning_rate": 0.00043268683636025904,
      "loss": 2.8728,
      "step": 81601
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8554017543792725,
      "learning_rate": 0.00043268316763575706,
      "loss": 3.0164,
      "step": 81602
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.771859884262085,
      "learning_rate": 0.00043267949888658666,
      "loss": 3.0029,
      "step": 81603
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.9156529903411865,
      "learning_rate": 0.0004326758301127487,
      "loss": 3.0718,
      "step": 81604
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.684148907661438,
      "learning_rate": 0.0004326721613142438,
      "loss": 3.2047,
      "step": 81605
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.4446237087249756,
      "learning_rate": 0.00043266849249107265,
      "loss": 3.109,
      "step": 81606
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9139208793640137,
      "learning_rate": 0.0004326648236432359,
      "loss": 3.1792,
      "step": 81607
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9895282983779907,
      "learning_rate": 0.00043266115477073424,
      "loss": 2.9799,
      "step": 81608
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5618748664855957,
      "learning_rate": 0.00043265748587356846,
      "loss": 2.999,
      "step": 81609
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7080659866333008,
      "learning_rate": 0.0004326538169517391,
      "loss": 2.9984,
      "step": 81610
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.53815758228302,
      "learning_rate": 0.0004326501480052468,
      "loss": 3.0957,
      "step": 81611
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5542641878128052,
      "learning_rate": 0.00043264647903409247,
      "loss": 3.0618,
      "step": 81612
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7413173913955688,
      "learning_rate": 0.0004326428100382766,
      "loss": 2.9815,
      "step": 81613
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.870333194732666,
      "learning_rate": 0.0004326391410177999,
      "loss": 3.0806,
      "step": 81614
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.790717363357544,
      "learning_rate": 0.00043263547197266305,
      "loss": 3.0988,
      "step": 81615
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4449517726898193,
      "learning_rate": 0.00043263180290286686,
      "loss": 2.9073,
      "step": 81616
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6742689609527588,
      "learning_rate": 0.0004326281338084119,
      "loss": 2.8706,
      "step": 81617
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.449963092803955,
      "learning_rate": 0.0004326244646892988,
      "loss": 3.1541,
      "step": 81618
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.145453929901123,
      "learning_rate": 0.0004326207955455284,
      "loss": 3.1423,
      "step": 81619
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7107689380645752,
      "learning_rate": 0.00043261712637710116,
      "loss": 2.9279,
      "step": 81620
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.360590934753418,
      "learning_rate": 0.00043261345718401795,
      "loss": 2.9211,
      "step": 81621
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6682379245758057,
      "learning_rate": 0.0004326097879662794,
      "loss": 2.8661,
      "step": 81622
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.634677767753601,
      "learning_rate": 0.00043260611872388623,
      "loss": 2.7278,
      "step": 81623
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7997058629989624,
      "learning_rate": 0.000432602449456839,
      "loss": 2.9101,
      "step": 81624
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7679280042648315,
      "learning_rate": 0.00043259878016513857,
      "loss": 3.033,
      "step": 81625
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8335535526275635,
      "learning_rate": 0.0004325951108487855,
      "loss": 3.1766,
      "step": 81626
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7113723754882812,
      "learning_rate": 0.00043259144150778044,
      "loss": 3.0736,
      "step": 81627
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6485309600830078,
      "learning_rate": 0.0004325877721421242,
      "loss": 2.8354,
      "step": 81628
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6973458528518677,
      "learning_rate": 0.0004325841027518172,
      "loss": 2.8829,
      "step": 81629
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8826314210891724,
      "learning_rate": 0.00043258043333686054,
      "loss": 2.9016,
      "step": 81630
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6302112340927124,
      "learning_rate": 0.00043257676389725464,
      "loss": 2.969,
      "step": 81631
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.284355878829956,
      "learning_rate": 0.0004325730944330001,
      "loss": 3.087,
      "step": 81632
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5972093343734741,
      "learning_rate": 0.0004325694249440978,
      "loss": 3.0032,
      "step": 81633
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.830444097518921,
      "learning_rate": 0.00043256575543054835,
      "loss": 2.9045,
      "step": 81634
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5350207090377808,
      "learning_rate": 0.0004325620858923524,
      "loss": 3.4039,
      "step": 81635
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9536715745925903,
      "learning_rate": 0.0004325584163295106,
      "loss": 3.1237,
      "step": 81636
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5427054166793823,
      "learning_rate": 0.00043255474674202377,
      "loss": 3.0817,
      "step": 81637
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.3337016105651855,
      "learning_rate": 0.0004325510771298926,
      "loss": 2.9332,
      "step": 81638
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6383920907974243,
      "learning_rate": 0.00043254740749311746,
      "loss": 2.9394,
      "step": 81639
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0135908126831055,
      "learning_rate": 0.00043254373783169935,
      "loss": 3.2964,
      "step": 81640
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.9573049545288086,
      "learning_rate": 0.00043254006814563896,
      "loss": 3.0529,
      "step": 81641
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.089725971221924,
      "learning_rate": 0.00043253639843493676,
      "loss": 3.0884,
      "step": 81642
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8913825750350952,
      "learning_rate": 0.00043253272869959357,
      "loss": 3.0683,
      "step": 81643
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9599170684814453,
      "learning_rate": 0.00043252905893961005,
      "loss": 3.2436,
      "step": 81644
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2874679565429688,
      "learning_rate": 0.00043252538915498684,
      "loss": 2.9471,
      "step": 81645
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2912864685058594,
      "learning_rate": 0.0004325217193457247,
      "loss": 3.0505,
      "step": 81646
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.179558038711548,
      "learning_rate": 0.00043251804951182423,
      "loss": 3.0766,
      "step": 81647
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8258417844772339,
      "learning_rate": 0.0004325143796532862,
      "loss": 2.9422,
      "step": 81648
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8009315729141235,
      "learning_rate": 0.00043251070977011117,
      "loss": 3.281,
      "step": 81649
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.379964828491211,
      "learning_rate": 0.0004325070398623001,
      "loss": 2.7628,
      "step": 81650
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.873321533203125,
      "learning_rate": 0.0004325033699298533,
      "loss": 2.9825,
      "step": 81651
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.602451205253601,
      "learning_rate": 0.00043249969997277157,
      "loss": 2.9937,
      "step": 81652
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.992235541343689,
      "learning_rate": 0.0004324960299910558,
      "loss": 2.9435,
      "step": 81653
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8992186784744263,
      "learning_rate": 0.0004324923599847064,
      "loss": 2.9265,
      "step": 81654
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0017833709716797,
      "learning_rate": 0.00043248868995372417,
      "loss": 2.6571,
      "step": 81655
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.142559766769409,
      "learning_rate": 0.0004324850198981099,
      "loss": 3.3195,
      "step": 81656
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.91523277759552,
      "learning_rate": 0.00043248134981786404,
      "loss": 3.1182,
      "step": 81657
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8937746286392212,
      "learning_rate": 0.00043247767971298746,
      "loss": 3.0348,
      "step": 81658
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6245242357254028,
      "learning_rate": 0.0004324740095834808,
      "loss": 2.8939,
      "step": 81659
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.801731824874878,
      "learning_rate": 0.00043247033942934467,
      "loss": 2.9771,
      "step": 81660
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.065993070602417,
      "learning_rate": 0.00043246666925057976,
      "loss": 3.087,
      "step": 81661
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.941360354423523,
      "learning_rate": 0.0004324629990471869,
      "loss": 3.1478,
      "step": 81662
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8047797679901123,
      "learning_rate": 0.00043245932881916656,
      "loss": 2.8959,
      "step": 81663
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.8447344303131104,
      "learning_rate": 0.00043245565856651967,
      "loss": 2.9913,
      "step": 81664
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.962730050086975,
      "learning_rate": 0.0004324519882892467,
      "loss": 3.1725,
      "step": 81665
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5389783382415771,
      "learning_rate": 0.0004324483179873484,
      "loss": 2.9998,
      "step": 81666
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.003922462463379,
      "learning_rate": 0.00043244464766082545,
      "loss": 3.018,
      "step": 81667
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.853760242462158,
      "learning_rate": 0.0004324409773096785,
      "loss": 3.1032,
      "step": 81668
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.104684352874756,
      "learning_rate": 0.0004324373069339084,
      "loss": 3.0736,
      "step": 81669
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.936118483543396,
      "learning_rate": 0.0004324336365335156,
      "loss": 2.9986,
      "step": 81670
    },
    {
      "epoch": 1.06,
      "grad_norm": 5.061610221862793,
      "learning_rate": 0.00043242996610850094,
      "loss": 3.0506,
      "step": 81671
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.0523738861083984,
      "learning_rate": 0.000432426295658865,
      "loss": 2.9627,
      "step": 81672
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7006027698516846,
      "learning_rate": 0.0004324226251846085,
      "loss": 3.1,
      "step": 81673
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.2936922311782837,
      "learning_rate": 0.0004324189546857322,
      "loss": 2.8441,
      "step": 81674
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.566683053970337,
      "learning_rate": 0.00043241528416223667,
      "loss": 3.0011,
      "step": 81675
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.043808937072754,
      "learning_rate": 0.0004324116136141227,
      "loss": 2.8667,
      "step": 81676
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.77892005443573,
      "learning_rate": 0.00043240794304139085,
      "loss": 3.079,
      "step": 81677
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.7363576889038086,
      "learning_rate": 0.00043240427244404194,
      "loss": 2.9826,
      "step": 81678
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.742523193359375,
      "learning_rate": 0.0004324006018220765,
      "loss": 3.1635,
      "step": 81679
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.1742568016052246,
      "learning_rate": 0.00043239693117549526,
      "loss": 3.0643,
      "step": 81680
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6945754289627075,
      "learning_rate": 0.000432393260504299,
      "loss": 3.0309,
      "step": 81681
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9614912271499634,
      "learning_rate": 0.0004323895898084883,
      "loss": 2.9733,
      "step": 81682
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.93815541267395,
      "learning_rate": 0.00043238591908806393,
      "loss": 2.96,
      "step": 81683
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.732375979423523,
      "learning_rate": 0.00043238224834302646,
      "loss": 2.8398,
      "step": 81684
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9642196893692017,
      "learning_rate": 0.00043237857757337663,
      "loss": 3.1507,
      "step": 81685
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.343710422515869,
      "learning_rate": 0.00043237490677911514,
      "loss": 2.9349,
      "step": 81686
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.945194959640503,
      "learning_rate": 0.00043237123596024263,
      "loss": 3.3293,
      "step": 81687
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5991506576538086,
      "learning_rate": 0.0004323675651167599,
      "loss": 2.94,
      "step": 81688
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.939818859100342,
      "learning_rate": 0.0004323638942486674,
      "loss": 3.0936,
      "step": 81689
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9829092025756836,
      "learning_rate": 0.0004323602233559661,
      "loss": 3.1125,
      "step": 81690
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5485920906066895,
      "learning_rate": 0.00043235655243865646,
      "loss": 2.8802,
      "step": 81691
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.2917110919952393,
      "learning_rate": 0.00043235288149673926,
      "loss": 3.0849,
      "step": 81692
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9819459915161133,
      "learning_rate": 0.0004323492105302151,
      "loss": 3.0604,
      "step": 81693
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.423403739929199,
      "learning_rate": 0.00043234553953908474,
      "loss": 2.9307,
      "step": 81694
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7828478813171387,
      "learning_rate": 0.00043234186852334893,
      "loss": 2.965,
      "step": 81695
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9245612621307373,
      "learning_rate": 0.00043233819748300824,
      "loss": 3.1705,
      "step": 81696
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6313403844833374,
      "learning_rate": 0.00043233452641806326,
      "loss": 3.0065,
      "step": 81697
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.707597017288208,
      "learning_rate": 0.00043233085532851497,
      "loss": 3.0702,
      "step": 81698
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5801008939743042,
      "learning_rate": 0.0004323271842143638,
      "loss": 3.0093,
      "step": 81699
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6693671941757202,
      "learning_rate": 0.00043232351307561045,
      "loss": 2.969,
      "step": 81700
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.746310830116272,
      "learning_rate": 0.0004323198419122558,
      "loss": 2.942,
      "step": 81701
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4473397731781006,
      "learning_rate": 0.00043231617072430023,
      "loss": 3.1016,
      "step": 81702
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6167954206466675,
      "learning_rate": 0.0004323124995117447,
      "loss": 2.8723,
      "step": 81703
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8809667825698853,
      "learning_rate": 0.00043230882827458977,
      "loss": 3.2386,
      "step": 81704
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5012538433074951,
      "learning_rate": 0.00043230515701283607,
      "loss": 3.1089,
      "step": 81705
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8095686435699463,
      "learning_rate": 0.0004323014857264844,
      "loss": 3.0571,
      "step": 81706
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.035916805267334,
      "learning_rate": 0.0004322978144155353,
      "loss": 2.9306,
      "step": 81707
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8531523942947388,
      "learning_rate": 0.00043229414307998965,
      "loss": 3.1096,
      "step": 81708
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4579912424087524,
      "learning_rate": 0.00043229047171984797,
      "loss": 3.0171,
      "step": 81709
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7621285915374756,
      "learning_rate": 0.00043228680033511094,
      "loss": 3.1936,
      "step": 81710
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.610128402709961,
      "learning_rate": 0.00043228312892577944,
      "loss": 2.9471,
      "step": 81711
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0334036350250244,
      "learning_rate": 0.0004322794574918539,
      "loss": 3.1873,
      "step": 81712
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7588183879852295,
      "learning_rate": 0.0004322757860333351,
      "loss": 2.9604,
      "step": 81713
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5573872327804565,
      "learning_rate": 0.0004322721145502238,
      "loss": 2.9137,
      "step": 81714
    },
    {
      "epoch": 1.06,
      "grad_norm": 5.742395877838135,
      "learning_rate": 0.0004322684430425206,
      "loss": 3.0188,
      "step": 81715
    },
    {
      "epoch": 1.06,
      "grad_norm": 4.267041206359863,
      "learning_rate": 0.00043226477151022617,
      "loss": 2.9674,
      "step": 81716
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8004511594772339,
      "learning_rate": 0.00043226109995334126,
      "loss": 3.1518,
      "step": 81717
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0773110389709473,
      "learning_rate": 0.0004322574283718665,
      "loss": 2.9241,
      "step": 81718
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.7722604274749756,
      "learning_rate": 0.00043225375676580254,
      "loss": 2.8315,
      "step": 81719
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.310262441635132,
      "learning_rate": 0.00043225008513515014,
      "loss": 2.9726,
      "step": 81720
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.533564805984497,
      "learning_rate": 0.00043224641347991,
      "loss": 2.9527,
      "step": 81721
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.593503713607788,
      "learning_rate": 0.0004322427418000828,
      "loss": 3.2009,
      "step": 81722
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7654731273651123,
      "learning_rate": 0.00043223907009566897,
      "loss": 2.9659,
      "step": 81723
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6048076152801514,
      "learning_rate": 0.00043223539836666956,
      "loss": 3.0485,
      "step": 81724
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6794053316116333,
      "learning_rate": 0.000432231726613085,
      "loss": 3.0781,
      "step": 81725
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0949342250823975,
      "learning_rate": 0.00043222805483491617,
      "loss": 2.7971,
      "step": 81726
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7554093599319458,
      "learning_rate": 0.0004322243830321636,
      "loss": 3.3325,
      "step": 81727
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4651196002960205,
      "learning_rate": 0.000432220711204828,
      "loss": 3.1409,
      "step": 81728
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.4796555042266846,
      "learning_rate": 0.0004322170393529102,
      "loss": 3.13,
      "step": 81729
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5007222890853882,
      "learning_rate": 0.00043221336747641066,
      "loss": 3.1688,
      "step": 81730
    },
    {
      "epoch": 1.06,
      "grad_norm": 3.66705584526062,
      "learning_rate": 0.0004322096955753301,
      "loss": 2.8466,
      "step": 81731
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.031497001647949,
      "learning_rate": 0.0004322060236496694,
      "loss": 3.0847,
      "step": 81732
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0681090354919434,
      "learning_rate": 0.000432202351699429,
      "loss": 2.9915,
      "step": 81733
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.4402027130126953,
      "learning_rate": 0.0004321986797246096,
      "loss": 2.946,
      "step": 81734
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6549665927886963,
      "learning_rate": 0.00043219500772521214,
      "loss": 3.0334,
      "step": 81735
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5483086109161377,
      "learning_rate": 0.00043219133570123705,
      "loss": 2.9707,
      "step": 81736
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.15213942527771,
      "learning_rate": 0.00043218766365268516,
      "loss": 3.017,
      "step": 81737
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.5471575260162354,
      "learning_rate": 0.00043218399157955706,
      "loss": 3.0834,
      "step": 81738
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8841038942337036,
      "learning_rate": 0.0004321803194818534,
      "loss": 2.9726,
      "step": 81739
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6498550176620483,
      "learning_rate": 0.00043217664735957506,
      "loss": 3.0757,
      "step": 81740
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.785725474357605,
      "learning_rate": 0.00043217297521272246,
      "loss": 2.7525,
      "step": 81741
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.5984227657318115,
      "learning_rate": 0.0004321693030412964,
      "loss": 2.97,
      "step": 81742
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.7386465072631836,
      "learning_rate": 0.0004321656308452977,
      "loss": 2.9591,
      "step": 81743
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.063199996948242,
      "learning_rate": 0.0004321619586247269,
      "loss": 3.2529,
      "step": 81744
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8146497011184692,
      "learning_rate": 0.0004321582863795846,
      "loss": 2.9889,
      "step": 81745
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.60893714427948,
      "learning_rate": 0.0004321546141098715,
      "loss": 2.9201,
      "step": 81746
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4658480882644653,
      "learning_rate": 0.0004321509418155886,
      "loss": 3.049,
      "step": 81747
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5021603107452393,
      "learning_rate": 0.00043214726949673615,
      "loss": 2.8552,
      "step": 81748
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6180788278579712,
      "learning_rate": 0.0004321435971533151,
      "loss": 3.0921,
      "step": 81749
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.510994791984558,
      "learning_rate": 0.0004321399247853261,
      "loss": 3.089,
      "step": 81750
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.039916515350342,
      "learning_rate": 0.0004321362523927697,
      "loss": 2.963,
      "step": 81751
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8988670110702515,
      "learning_rate": 0.0004321325799756467,
      "loss": 3.0228,
      "step": 81752
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.506679892539978,
      "learning_rate": 0.00043212890753395786,
      "loss": 2.9795,
      "step": 81753
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.708228349685669,
      "learning_rate": 0.0004321252350677037,
      "loss": 3.0969,
      "step": 81754
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6544471979141235,
      "learning_rate": 0.00043212156257688486,
      "loss": 2.8816,
      "step": 81755
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4728621244430542,
      "learning_rate": 0.0004321178900615023,
      "loss": 2.9246,
      "step": 81756
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4782030582427979,
      "learning_rate": 0.0004321142175215565,
      "loss": 3.1764,
      "step": 81757
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.3705382347106934,
      "learning_rate": 0.000432110544957048,
      "loss": 2.6086,
      "step": 81758
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7505574226379395,
      "learning_rate": 0.00043210687236797785,
      "loss": 3.0668,
      "step": 81759
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.780959963798523,
      "learning_rate": 0.0004321031997543465,
      "loss": 2.9819,
      "step": 81760
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8010753393173218,
      "learning_rate": 0.00043209952711615457,
      "loss": 2.9949,
      "step": 81761
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6391068696975708,
      "learning_rate": 0.000432095854453403,
      "loss": 2.9295,
      "step": 81762
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7549713850021362,
      "learning_rate": 0.0004320921817660922,
      "loss": 3.1637,
      "step": 81763
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5844745635986328,
      "learning_rate": 0.00043208850905422294,
      "loss": 2.8504,
      "step": 81764
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4968228340148926,
      "learning_rate": 0.00043208483631779605,
      "loss": 2.8475,
      "step": 81765
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.160865306854248,
      "learning_rate": 0.00043208116355681204,
      "loss": 3.0027,
      "step": 81766
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7263306379318237,
      "learning_rate": 0.00043207749077127166,
      "loss": 2.7922,
      "step": 81767
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5965969562530518,
      "learning_rate": 0.0004320738179611756,
      "loss": 2.9914,
      "step": 81768
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.5962488651275635,
      "learning_rate": 0.00043207014512652446,
      "loss": 2.9593,
      "step": 81769
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.549043893814087,
      "learning_rate": 0.00043206647226731905,
      "loss": 2.9505,
      "step": 81770
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8927315473556519,
      "learning_rate": 0.00043206279938356,
      "loss": 3.0471,
      "step": 81771
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8472611904144287,
      "learning_rate": 0.00043205912647524785,
      "loss": 2.8878,
      "step": 81772
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.15547513961792,
      "learning_rate": 0.00043205545354238354,
      "loss": 3.0415,
      "step": 81773
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.3683531284332275,
      "learning_rate": 0.0004320517805849676,
      "loss": 2.8865,
      "step": 81774
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.0620598793029785,
      "learning_rate": 0.00043204810760300076,
      "loss": 2.883,
      "step": 81775
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8939433097839355,
      "learning_rate": 0.0004320444345964836,
      "loss": 3.0129,
      "step": 81776
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4830174446105957,
      "learning_rate": 0.0004320407615654169,
      "loss": 3.377,
      "step": 81777
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.7761681079864502,
      "learning_rate": 0.00043203708850980145,
      "loss": 3.0013,
      "step": 81778
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.496280550956726,
      "learning_rate": 0.0004320334154296378,
      "loss": 3.0403,
      "step": 81779
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.871029019355774,
      "learning_rate": 0.0004320297423249265,
      "loss": 2.8306,
      "step": 81780
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.5772751569747925,
      "learning_rate": 0.0004320260691956686,
      "loss": 2.9676,
      "step": 81781
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8161827325820923,
      "learning_rate": 0.00043202239604186435,
      "loss": 3.0496,
      "step": 81782
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.875856876373291,
      "learning_rate": 0.0004320187228635147,
      "loss": 3.2084,
      "step": 81783
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.781648874282837,
      "learning_rate": 0.00043201504966062035,
      "loss": 2.9611,
      "step": 81784
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6343894004821777,
      "learning_rate": 0.0004320113764331818,
      "loss": 3.075,
      "step": 81785
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.765629768371582,
      "learning_rate": 0.00043200770318119994,
      "loss": 3.0327,
      "step": 81786
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.8179590702056885,
      "learning_rate": 0.00043200402990467535,
      "loss": 2.9913,
      "step": 81787
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.797695517539978,
      "learning_rate": 0.0004320003566036087,
      "loss": 3.1089,
      "step": 81788
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.6533740758895874,
      "learning_rate": 0.0004319966832780006,
      "loss": 3.2109,
      "step": 81789
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.4887793064117432,
      "learning_rate": 0.00043199300992785203,
      "loss": 2.9382,
      "step": 81790
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.3426144123077393,
      "learning_rate": 0.0004319893365531633,
      "loss": 2.8483,
      "step": 81791
    },
    {
      "epoch": 1.06,
      "grad_norm": 2.2258598804473877,
      "learning_rate": 0.00043198566315393527,
      "loss": 3.2454,
      "step": 81792
    },
    {
      "epoch": 1.06,
      "grad_norm": 1.9241914749145508,
      "learning_rate": 0.0004319819897301687,
      "loss": 3.0539,
      "step": 81793
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.607223391532898,
      "learning_rate": 0.0004319783162818641,
      "loss": 3.0195,
      "step": 81794
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.5605053901672363,
      "learning_rate": 0.00043197464280902233,
      "loss": 3.0092,
      "step": 81795
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5786187648773193,
      "learning_rate": 0.0004319709693116439,
      "loss": 3.1212,
      "step": 81796
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.071209669113159,
      "learning_rate": 0.00043196729578972963,
      "loss": 3.0668,
      "step": 81797
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.778084635734558,
      "learning_rate": 0.00043196362224328007,
      "loss": 2.8715,
      "step": 81798
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6726690530776978,
      "learning_rate": 0.00043195994867229614,
      "loss": 2.9769,
      "step": 81799
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.102756977081299,
      "learning_rate": 0.0004319562750767782,
      "loss": 2.8357,
      "step": 81800
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.3606834411621094,
      "learning_rate": 0.00043195260145672713,
      "loss": 3.1284,
      "step": 81801
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4930198192596436,
      "learning_rate": 0.0004319489278121436,
      "loss": 3.238,
      "step": 81802
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.887782096862793,
      "learning_rate": 0.00043194525414302833,
      "loss": 2.97,
      "step": 81803
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.137404441833496,
      "learning_rate": 0.00043194158044938184,
      "loss": 3.2276,
      "step": 81804
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5405594110488892,
      "learning_rate": 0.000431937906731205,
      "loss": 2.9944,
      "step": 81805
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5750465393066406,
      "learning_rate": 0.0004319342329884983,
      "loss": 2.9968,
      "step": 81806
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4500582218170166,
      "learning_rate": 0.00043193055922126274,
      "loss": 3.1189,
      "step": 81807
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6079446077346802,
      "learning_rate": 0.00043192688542949867,
      "loss": 3.1133,
      "step": 81808
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6620161533355713,
      "learning_rate": 0.00043192321161320685,
      "loss": 3.1924,
      "step": 81809
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7846169471740723,
      "learning_rate": 0.00043191953777238815,
      "loss": 3.0675,
      "step": 81810
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8400859832763672,
      "learning_rate": 0.000431915863907043,
      "loss": 2.937,
      "step": 81811
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.716081976890564,
      "learning_rate": 0.0004319121900171723,
      "loss": 3.0184,
      "step": 81812
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.083308219909668,
      "learning_rate": 0.0004319085161027765,
      "loss": 2.9769,
      "step": 81813
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8588682413101196,
      "learning_rate": 0.00043190484216385655,
      "loss": 2.9199,
      "step": 81814
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6831755638122559,
      "learning_rate": 0.00043190116820041296,
      "loss": 3.0612,
      "step": 81815
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7816336154937744,
      "learning_rate": 0.00043189749421244645,
      "loss": 3.0839,
      "step": 81816
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0365400314331055,
      "learning_rate": 0.00043189382019995774,
      "loss": 3.1216,
      "step": 81817
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.26436185836792,
      "learning_rate": 0.00043189014616294735,
      "loss": 3.1565,
      "step": 81818
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9815205335617065,
      "learning_rate": 0.0004318864721014162,
      "loss": 3.2281,
      "step": 81819
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.696709394454956,
      "learning_rate": 0.0004318827980153649,
      "loss": 3.2868,
      "step": 81820
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.035998821258545,
      "learning_rate": 0.00043187912390479395,
      "loss": 3.0845,
      "step": 81821
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.5254149436950684,
      "learning_rate": 0.00043187544976970436,
      "loss": 2.9154,
      "step": 81822
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1357874870300293,
      "learning_rate": 0.00043187177561009656,
      "loss": 2.9549,
      "step": 81823
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.651916980743408,
      "learning_rate": 0.0004318681014259712,
      "loss": 2.8172,
      "step": 81824
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.7724595069885254,
      "learning_rate": 0.00043186442721732923,
      "loss": 2.9553,
      "step": 81825
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.786388874053955,
      "learning_rate": 0.0004318607529841711,
      "loss": 3.2406,
      "step": 81826
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.187727212905884,
      "learning_rate": 0.0004318570787264976,
      "loss": 3.1582,
      "step": 81827
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.3511319160461426,
      "learning_rate": 0.00043185340444430937,
      "loss": 3.1203,
      "step": 81828
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6922224760055542,
      "learning_rate": 0.0004318497301376071,
      "loss": 3.1495,
      "step": 81829
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6834771633148193,
      "learning_rate": 0.00043184605580639146,
      "loss": 2.9907,
      "step": 81830
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.020521402359009,
      "learning_rate": 0.00043184238145066315,
      "loss": 3.0086,
      "step": 81831
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0554399490356445,
      "learning_rate": 0.0004318387070704229,
      "loss": 2.8195,
      "step": 81832
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6101309061050415,
      "learning_rate": 0.0004318350326656713,
      "loss": 2.9161,
      "step": 81833
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5889291763305664,
      "learning_rate": 0.00043183135823640905,
      "loss": 2.9904,
      "step": 81834
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7252784967422485,
      "learning_rate": 0.00043182768378263693,
      "loss": 3.144,
      "step": 81835
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.626951217651367,
      "learning_rate": 0.00043182400930435555,
      "loss": 2.958,
      "step": 81836
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6297600269317627,
      "learning_rate": 0.0004318203348015655,
      "loss": 3.1577,
      "step": 81837
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6750319004058838,
      "learning_rate": 0.00043181666027426773,
      "loss": 3.0159,
      "step": 81838
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.823891520500183,
      "learning_rate": 0.00043181298572246264,
      "loss": 3.0619,
      "step": 81839
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9182957410812378,
      "learning_rate": 0.000431809311146151,
      "loss": 3.0218,
      "step": 81840
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6383256912231445,
      "learning_rate": 0.00043180563654533354,
      "loss": 3.1718,
      "step": 81841
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7477731704711914,
      "learning_rate": 0.000431801961920011,
      "loss": 2.9354,
      "step": 81842
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6991277933120728,
      "learning_rate": 0.00043179828727018393,
      "loss": 3.1126,
      "step": 81843
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9194434881210327,
      "learning_rate": 0.00043179461259585305,
      "loss": 3.014,
      "step": 81844
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5085701942443848,
      "learning_rate": 0.00043179093789701915,
      "loss": 3.3142,
      "step": 81845
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.08774471282959,
      "learning_rate": 0.0004317872631736827,
      "loss": 2.9295,
      "step": 81846
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8740373849868774,
      "learning_rate": 0.0004317835884258446,
      "loss": 3.1863,
      "step": 81847
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5374571084976196,
      "learning_rate": 0.0004317799136535055,
      "loss": 3.0797,
      "step": 81848
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4957389831542969,
      "learning_rate": 0.00043177623885666587,
      "loss": 3.0147,
      "step": 81849
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.135221481323242,
      "learning_rate": 0.0004317725640353266,
      "loss": 3.2768,
      "step": 81850
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.098658323287964,
      "learning_rate": 0.0004317688891894884,
      "loss": 3.2384,
      "step": 81851
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4561030864715576,
      "learning_rate": 0.0004317652143191518,
      "loss": 3.1719,
      "step": 81852
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.926857590675354,
      "learning_rate": 0.00043176153942431757,
      "loss": 3.1074,
      "step": 81853
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8226284980773926,
      "learning_rate": 0.0004317578645049865,
      "loss": 3.1542,
      "step": 81854
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5926557779312134,
      "learning_rate": 0.000431754189561159,
      "loss": 3.2384,
      "step": 81855
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6148167848587036,
      "learning_rate": 0.0004317505145928359,
      "loss": 3.0485,
      "step": 81856
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.374678134918213,
      "learning_rate": 0.00043174683960001803,
      "loss": 3.1479,
      "step": 81857
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5595405101776123,
      "learning_rate": 0.00043174316458270587,
      "loss": 2.8476,
      "step": 81858
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7085535526275635,
      "learning_rate": 0.0004317394895409001,
      "loss": 2.7775,
      "step": 81859
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.059168577194214,
      "learning_rate": 0.00043173581447460157,
      "loss": 2.8786,
      "step": 81860
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.6809990406036377,
      "learning_rate": 0.0004317321393838108,
      "loss": 3.1902,
      "step": 81861
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9950512647628784,
      "learning_rate": 0.0004317284642685286,
      "loss": 3.1468,
      "step": 81862
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.083855152130127,
      "learning_rate": 0.0004317247891287556,
      "loss": 3.0072,
      "step": 81863
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.3588128089904785,
      "learning_rate": 0.00043172111396449245,
      "loss": 3.085,
      "step": 81864
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.849387526512146,
      "learning_rate": 0.0004317174387757398,
      "loss": 3.1838,
      "step": 81865
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.736520528793335,
      "learning_rate": 0.0004317137635624985,
      "loss": 2.7535,
      "step": 81866
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7757599353790283,
      "learning_rate": 0.000431710088324769,
      "loss": 3.1842,
      "step": 81867
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9319919347763062,
      "learning_rate": 0.00043170641306255217,
      "loss": 2.8345,
      "step": 81868
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5811694860458374,
      "learning_rate": 0.00043170273777584867,
      "loss": 2.9313,
      "step": 81869
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.9502782821655273,
      "learning_rate": 0.0004316990624646591,
      "loss": 2.8553,
      "step": 81870
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9811748266220093,
      "learning_rate": 0.00043169538712898415,
      "loss": 2.9682,
      "step": 81871
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0386016368865967,
      "learning_rate": 0.0004316917117688246,
      "loss": 3.0957,
      "step": 81872
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.3956825733184814,
      "learning_rate": 0.00043168803638418105,
      "loss": 2.7718,
      "step": 81873
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.2713184356689453,
      "learning_rate": 0.0004316843609750542,
      "loss": 3.0384,
      "step": 81874
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.020181655883789,
      "learning_rate": 0.0004316806855414448,
      "loss": 3.0351,
      "step": 81875
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.297991991043091,
      "learning_rate": 0.00043167701008335347,
      "loss": 3.077,
      "step": 81876
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9983924627304077,
      "learning_rate": 0.0004316733346007808,
      "loss": 3.0208,
      "step": 81877
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7576990127563477,
      "learning_rate": 0.00043166965909372765,
      "loss": 3.1599,
      "step": 81878
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4218950271606445,
      "learning_rate": 0.00043166598356219466,
      "loss": 2.9615,
      "step": 81879
    },
    {
      "epoch": 1.07,
      "grad_norm": 4.290997505187988,
      "learning_rate": 0.0004316623080061824,
      "loss": 2.8944,
      "step": 81880
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.828302264213562,
      "learning_rate": 0.00043165863242569164,
      "loss": 3.1121,
      "step": 81881
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.631925106048584,
      "learning_rate": 0.0004316549568207231,
      "loss": 3.0368,
      "step": 81882
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5960125923156738,
      "learning_rate": 0.00043165128119127737,
      "loss": 2.9242,
      "step": 81883
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7936253547668457,
      "learning_rate": 0.00043164760553735524,
      "loss": 3.0546,
      "step": 81884
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.964769721031189,
      "learning_rate": 0.0004316439298589573,
      "loss": 2.9914,
      "step": 81885
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5445375442504883,
      "learning_rate": 0.0004316402541560842,
      "loss": 3.1936,
      "step": 81886
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1244897842407227,
      "learning_rate": 0.0004316365784287367,
      "loss": 3.1034,
      "step": 81887
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6345293521881104,
      "learning_rate": 0.0004316329026769156,
      "loss": 2.8873,
      "step": 81888
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5070945024490356,
      "learning_rate": 0.0004316292269006214,
      "loss": 3.0844,
      "step": 81889
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7913686037063599,
      "learning_rate": 0.0004316255510998548,
      "loss": 2.9154,
      "step": 81890
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7549517154693604,
      "learning_rate": 0.00043162187527461665,
      "loss": 2.9647,
      "step": 81891
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6224427223205566,
      "learning_rate": 0.00043161819942490735,
      "loss": 3.1194,
      "step": 81892
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.424113392829895,
      "learning_rate": 0.00043161452355072776,
      "loss": 3.2266,
      "step": 81893
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6515766382217407,
      "learning_rate": 0.00043161084765207865,
      "loss": 2.9574,
      "step": 81894
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.591008424758911,
      "learning_rate": 0.00043160717172896056,
      "loss": 3.1857,
      "step": 81895
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8978041410446167,
      "learning_rate": 0.00043160349578137425,
      "loss": 3.0784,
      "step": 81896
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.021921396255493,
      "learning_rate": 0.00043159981980932027,
      "loss": 3.0074,
      "step": 81897
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7898775339126587,
      "learning_rate": 0.0004315961438127994,
      "loss": 3.144,
      "step": 81898
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5856069326400757,
      "learning_rate": 0.0004315924677918124,
      "loss": 3.2977,
      "step": 81899
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9366899728775024,
      "learning_rate": 0.00043158879174635983,
      "loss": 2.9454,
      "step": 81900
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6130281686782837,
      "learning_rate": 0.00043158511567644245,
      "loss": 3.0975,
      "step": 81901
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5984822511672974,
      "learning_rate": 0.0004315814395820609,
      "loss": 2.9525,
      "step": 81902
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7031421661376953,
      "learning_rate": 0.0004315777634632158,
      "loss": 2.9426,
      "step": 81903
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.685373067855835,
      "learning_rate": 0.0004315740873199081,
      "loss": 2.8047,
      "step": 81904
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7626605033874512,
      "learning_rate": 0.00043157041115213815,
      "loss": 3.1445,
      "step": 81905
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8211206197738647,
      "learning_rate": 0.00043156673495990677,
      "loss": 2.6436,
      "step": 81906
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6318039894104004,
      "learning_rate": 0.0004315630587432147,
      "loss": 2.9755,
      "step": 81907
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.1201438903808594,
      "learning_rate": 0.00043155938250206255,
      "loss": 2.8497,
      "step": 81908
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.4910714626312256,
      "learning_rate": 0.000431555706236451,
      "loss": 3.0223,
      "step": 81909
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7739282846450806,
      "learning_rate": 0.0004315520299463808,
      "loss": 3.0434,
      "step": 81910
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9999560117721558,
      "learning_rate": 0.0004315483536318526,
      "loss": 2.9304,
      "step": 81911
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.3579292297363281,
      "learning_rate": 0.0004315446772928671,
      "loss": 2.9549,
      "step": 81912
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5061618089675903,
      "learning_rate": 0.00043154100092942496,
      "loss": 2.9171,
      "step": 81913
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.605340838432312,
      "learning_rate": 0.00043153732454152675,
      "loss": 3.2372,
      "step": 81914
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.8975257873535156,
      "learning_rate": 0.00043153364812917344,
      "loss": 3.2505,
      "step": 81915
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6415694952011108,
      "learning_rate": 0.00043152997169236545,
      "loss": 2.9368,
      "step": 81916
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0358738899230957,
      "learning_rate": 0.00043152629523110345,
      "loss": 3.1159,
      "step": 81917
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.882359266281128,
      "learning_rate": 0.0004315226187453884,
      "loss": 3.0132,
      "step": 81918
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.4395363330841064,
      "learning_rate": 0.00043151894223522077,
      "loss": 2.9039,
      "step": 81919
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8792684078216553,
      "learning_rate": 0.0004315152657006012,
      "loss": 2.9585,
      "step": 81920
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.621926188468933,
      "learning_rate": 0.0004315115891415306,
      "loss": 3.0664,
      "step": 81921
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0669403076171875,
      "learning_rate": 0.0004315079125580094,
      "loss": 2.8436,
      "step": 81922
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9559454917907715,
      "learning_rate": 0.00043150423595003846,
      "loss": 2.9297,
      "step": 81923
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7105109691619873,
      "learning_rate": 0.0004315005593176184,
      "loss": 3.3816,
      "step": 81924
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.08536434173584,
      "learning_rate": 0.00043149688266074985,
      "loss": 2.9715,
      "step": 81925
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.3290607929229736,
      "learning_rate": 0.0004314932059794336,
      "loss": 2.9307,
      "step": 81926
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.2663044929504395,
      "learning_rate": 0.00043148952927367026,
      "loss": 2.8473,
      "step": 81927
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.0431928634643555,
      "learning_rate": 0.00043148585254346053,
      "loss": 2.7648,
      "step": 81928
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.777615547180176,
      "learning_rate": 0.0004314821757888051,
      "loss": 3.0886,
      "step": 81929
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.2363016605377197,
      "learning_rate": 0.00043147849900970466,
      "loss": 2.9959,
      "step": 81930
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6230419874191284,
      "learning_rate": 0.0004314748222061599,
      "loss": 3.0004,
      "step": 81931
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.476978063583374,
      "learning_rate": 0.00043147114537817147,
      "loss": 2.9795,
      "step": 81932
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.0019800662994385,
      "learning_rate": 0.00043146746852574006,
      "loss": 3.0274,
      "step": 81933
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5868123769760132,
      "learning_rate": 0.00043146379164886635,
      "loss": 3.3301,
      "step": 81934
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.465908408164978,
      "learning_rate": 0.0004314601147475511,
      "loss": 2.9884,
      "step": 81935
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.216425657272339,
      "learning_rate": 0.0004314564378217949,
      "loss": 2.8151,
      "step": 81936
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.329106092453003,
      "learning_rate": 0.0004314527608715985,
      "loss": 3.2267,
      "step": 81937
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8414744138717651,
      "learning_rate": 0.00043144908389696244,
      "loss": 2.8489,
      "step": 81938
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6112849712371826,
      "learning_rate": 0.00043144540689788764,
      "loss": 3.074,
      "step": 81939
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7354100942611694,
      "learning_rate": 0.00043144172987437455,
      "loss": 3.0026,
      "step": 81940
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.618093729019165,
      "learning_rate": 0.00043143805282642403,
      "loss": 3.012,
      "step": 81941
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.2703258991241455,
      "learning_rate": 0.00043143437575403673,
      "loss": 3.1361,
      "step": 81942
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7593241930007935,
      "learning_rate": 0.0004314306986572132,
      "loss": 2.8452,
      "step": 81943
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7356356382369995,
      "learning_rate": 0.0004314270215359543,
      "loss": 2.8819,
      "step": 81944
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4541127681732178,
      "learning_rate": 0.0004314233443902606,
      "loss": 3.0535,
      "step": 81945
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5833665132522583,
      "learning_rate": 0.0004314196672201329,
      "loss": 3.0002,
      "step": 81946
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6169183254241943,
      "learning_rate": 0.00043141599002557166,
      "loss": 3.0438,
      "step": 81947
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9210938215255737,
      "learning_rate": 0.0004314123128065778,
      "loss": 3.0807,
      "step": 81948
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6865336894989014,
      "learning_rate": 0.0004314086355631519,
      "loss": 3.0556,
      "step": 81949
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7756823301315308,
      "learning_rate": 0.0004314049582952946,
      "loss": 3.2222,
      "step": 81950
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.058316230773926,
      "learning_rate": 0.00043140128100300676,
      "loss": 3.0522,
      "step": 81951
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.949912190437317,
      "learning_rate": 0.0004313976036862888,
      "loss": 2.8911,
      "step": 81952
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9886474609375,
      "learning_rate": 0.0004313939263451416,
      "loss": 3.1954,
      "step": 81953
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.658106565475464,
      "learning_rate": 0.00043139024897956583,
      "loss": 3.1961,
      "step": 81954
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.82747483253479,
      "learning_rate": 0.00043138657158956216,
      "loss": 3.0095,
      "step": 81955
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.508431077003479,
      "learning_rate": 0.0004313828941751311,
      "loss": 3.1777,
      "step": 81956
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5463200807571411,
      "learning_rate": 0.0004313792167362736,
      "loss": 3.0061,
      "step": 81957
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7584753036499023,
      "learning_rate": 0.0004313755392729902,
      "loss": 2.9791,
      "step": 81958
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.250218629837036,
      "learning_rate": 0.00043137186178528154,
      "loss": 3.0027,
      "step": 81959
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5197938680648804,
      "learning_rate": 0.00043136818427314856,
      "loss": 3.102,
      "step": 81960
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.706044316291809,
      "learning_rate": 0.00043136450673659154,
      "loss": 3.0593,
      "step": 81961
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5897502899169922,
      "learning_rate": 0.00043136082917561146,
      "loss": 2.836,
      "step": 81962
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6220052242279053,
      "learning_rate": 0.0004313571515902089,
      "loss": 3.1367,
      "step": 81963
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7869315147399902,
      "learning_rate": 0.00043135347398038466,
      "loss": 3.0302,
      "step": 81964
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9938308000564575,
      "learning_rate": 0.00043134979634613924,
      "loss": 3.162,
      "step": 81965
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5759931802749634,
      "learning_rate": 0.00043134611868747346,
      "loss": 2.7392,
      "step": 81966
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6983633041381836,
      "learning_rate": 0.00043134244100438793,
      "loss": 2.8241,
      "step": 81967
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5590533018112183,
      "learning_rate": 0.0004313387632968833,
      "loss": 2.6664,
      "step": 81968
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0353147983551025,
      "learning_rate": 0.0004313350855649604,
      "loss": 3.1284,
      "step": 81969
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.226320743560791,
      "learning_rate": 0.00043133140780861974,
      "loss": 3.0644,
      "step": 81970
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5025708675384521,
      "learning_rate": 0.00043132773002786223,
      "loss": 2.8217,
      "step": 81971
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5493327379226685,
      "learning_rate": 0.0004313240522226884,
      "loss": 3.1435,
      "step": 81972
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7213011980056763,
      "learning_rate": 0.00043132037439309884,
      "loss": 2.895,
      "step": 81973
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.740206241607666,
      "learning_rate": 0.0004313166965390944,
      "loss": 3.0736,
      "step": 81974
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.0424113273620605,
      "learning_rate": 0.00043131301866067566,
      "loss": 3.0576,
      "step": 81975
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.5742363929748535,
      "learning_rate": 0.00043130934075784344,
      "loss": 2.97,
      "step": 81976
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6092286109924316,
      "learning_rate": 0.0004313056628305983,
      "loss": 3.0474,
      "step": 81977
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9055293798446655,
      "learning_rate": 0.0004313019848789409,
      "loss": 2.9872,
      "step": 81978
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7098551988601685,
      "learning_rate": 0.0004312983069028721,
      "loss": 3.1699,
      "step": 81979
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.852167010307312,
      "learning_rate": 0.00043129462890239233,
      "loss": 3.0381,
      "step": 81980
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8802050352096558,
      "learning_rate": 0.0004312909508775024,
      "loss": 2.9477,
      "step": 81981
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.386594533920288,
      "learning_rate": 0.0004312872728282031,
      "loss": 2.9538,
      "step": 81982
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6592893600463867,
      "learning_rate": 0.000431283594754495,
      "loss": 2.7094,
      "step": 81983
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4157161712646484,
      "learning_rate": 0.0004312799166563788,
      "loss": 2.944,
      "step": 81984
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5440973043441772,
      "learning_rate": 0.0004312762385338552,
      "loss": 3.0824,
      "step": 81985
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.729766607284546,
      "learning_rate": 0.0004312725603869248,
      "loss": 3.0458,
      "step": 81986
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7714122533798218,
      "learning_rate": 0.0004312688822155885,
      "loss": 2.9679,
      "step": 81987
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6545394659042358,
      "learning_rate": 0.00043126520401984666,
      "loss": 3.2793,
      "step": 81988
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.292273759841919,
      "learning_rate": 0.0004312615257997003,
      "loss": 2.9047,
      "step": 81989
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.800528883934021,
      "learning_rate": 0.00043125784755514986,
      "loss": 2.8798,
      "step": 81990
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.5941808223724365,
      "learning_rate": 0.0004312541692861961,
      "loss": 3.0541,
      "step": 81991
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1281983852386475,
      "learning_rate": 0.0004312504909928397,
      "loss": 2.9926,
      "step": 81992
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.630043864250183,
      "learning_rate": 0.00043124681267508135,
      "loss": 2.908,
      "step": 81993
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1156485080718994,
      "learning_rate": 0.00043124313433292184,
      "loss": 3.0314,
      "step": 81994
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.3172669410705566,
      "learning_rate": 0.0004312394559663617,
      "loss": 2.9428,
      "step": 81995
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7267951965332031,
      "learning_rate": 0.00043123577757540166,
      "loss": 3.1145,
      "step": 81996
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7502975463867188,
      "learning_rate": 0.0004312320991600424,
      "loss": 3.019,
      "step": 81997
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.135869026184082,
      "learning_rate": 0.00043122842072028465,
      "loss": 2.9374,
      "step": 81998
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6523138284683228,
      "learning_rate": 0.00043122474225612903,
      "loss": 2.914,
      "step": 81999
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5110969543457031,
      "learning_rate": 0.00043122106376757626,
      "loss": 2.9233,
      "step": 82000
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5260530710220337,
      "learning_rate": 0.000431217385254627,
      "loss": 3.0784,
      "step": 82001
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4761847257614136,
      "learning_rate": 0.0004312137067172819,
      "loss": 2.858,
      "step": 82002
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4279485940933228,
      "learning_rate": 0.0004312100281555419,
      "loss": 3.1691,
      "step": 82003
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9542936086654663,
      "learning_rate": 0.0004312063495694073,
      "loss": 3.2027,
      "step": 82004
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.410041093826294,
      "learning_rate": 0.00043120267095887895,
      "loss": 2.9529,
      "step": 82005
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8929312229156494,
      "learning_rate": 0.00043119899232395764,
      "loss": 3.0203,
      "step": 82006
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6535505056381226,
      "learning_rate": 0.00043119531366464395,
      "loss": 3.0042,
      "step": 82007
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4732743501663208,
      "learning_rate": 0.0004311916349809385,
      "loss": 3.0783,
      "step": 82008
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.245063304901123,
      "learning_rate": 0.00043118795627284214,
      "loss": 3.0808,
      "step": 82009
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9873219728469849,
      "learning_rate": 0.0004311842775403554,
      "loss": 3.1585,
      "step": 82010
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6675359010696411,
      "learning_rate": 0.000431180598783479,
      "loss": 2.9924,
      "step": 82011
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1569125652313232,
      "learning_rate": 0.00043117692000221375,
      "loss": 2.9873,
      "step": 82012
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.3638858795166016,
      "learning_rate": 0.0004311732411965602,
      "loss": 3.2024,
      "step": 82013
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8470746278762817,
      "learning_rate": 0.000431169562366519,
      "loss": 3.0872,
      "step": 82014
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.2565524578094482,
      "learning_rate": 0.00043116588351209097,
      "loss": 3.0725,
      "step": 82015
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.396899461746216,
      "learning_rate": 0.0004311622046332767,
      "loss": 2.9331,
      "step": 82016
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.5169506072998047,
      "learning_rate": 0.0004311585257300769,
      "loss": 3.1684,
      "step": 82017
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1599252223968506,
      "learning_rate": 0.0004311548468024923,
      "loss": 3.177,
      "step": 82018
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.2744626998901367,
      "learning_rate": 0.00043115116785052347,
      "loss": 3.0245,
      "step": 82019
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0213747024536133,
      "learning_rate": 0.0004311474888741712,
      "loss": 3.1434,
      "step": 82020
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7075085639953613,
      "learning_rate": 0.0004311438098734361,
      "loss": 3.072,
      "step": 82021
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.3316102027893066,
      "learning_rate": 0.0004311401308483189,
      "loss": 2.9777,
      "step": 82022
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.65156626701355,
      "learning_rate": 0.0004311364517988203,
      "loss": 3.05,
      "step": 82023
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5798051357269287,
      "learning_rate": 0.000431132772724941,
      "loss": 3.2145,
      "step": 82024
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.7257556915283203,
      "learning_rate": 0.0004311290936266816,
      "loss": 3.1248,
      "step": 82025
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.777278184890747,
      "learning_rate": 0.0004311254145040428,
      "loss": 2.9737,
      "step": 82026
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8387593030929565,
      "learning_rate": 0.00043112173535702535,
      "loss": 3.0068,
      "step": 82027
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8171145915985107,
      "learning_rate": 0.00043111805618562985,
      "loss": 3.1237,
      "step": 82028
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.985129714012146,
      "learning_rate": 0.000431114376989857,
      "loss": 3.0403,
      "step": 82029
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9921058416366577,
      "learning_rate": 0.0004311106977697076,
      "loss": 2.9197,
      "step": 82030
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5660628080368042,
      "learning_rate": 0.0004311070185251822,
      "loss": 3.0002,
      "step": 82031
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.564516067504883,
      "learning_rate": 0.00043110333925628155,
      "loss": 3.0194,
      "step": 82032
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.7536466121673584,
      "learning_rate": 0.0004310996599630063,
      "loss": 3.0342,
      "step": 82033
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7098807096481323,
      "learning_rate": 0.0004310959806453572,
      "loss": 3.2188,
      "step": 82034
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7800103425979614,
      "learning_rate": 0.0004310923013033348,
      "loss": 2.9645,
      "step": 82035
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.2905526161193848,
      "learning_rate": 0.0004310886219369399,
      "loss": 3.0105,
      "step": 82036
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.0248029232025146,
      "learning_rate": 0.0004310849425461732,
      "loss": 3.0006,
      "step": 82037
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7965160608291626,
      "learning_rate": 0.00043108126313103523,
      "loss": 3.1732,
      "step": 82038
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8035929203033447,
      "learning_rate": 0.0004310775836915268,
      "loss": 3.2398,
      "step": 82039
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.6954703330993652,
      "learning_rate": 0.0004310739042276486,
      "loss": 2.8078,
      "step": 82040
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.999316692352295,
      "learning_rate": 0.00043107022473940133,
      "loss": 3.2655,
      "step": 82041
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6604112386703491,
      "learning_rate": 0.0004310665452267856,
      "loss": 3.0068,
      "step": 82042
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9678897857666016,
      "learning_rate": 0.0004310628656898022,
      "loss": 2.9696,
      "step": 82043
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.5830821990966797,
      "learning_rate": 0.00043105918612845165,
      "loss": 2.9797,
      "step": 82044
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8088122606277466,
      "learning_rate": 0.00043105550654273464,
      "loss": 2.9694,
      "step": 82045
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6244398355484009,
      "learning_rate": 0.0004310518269326521,
      "loss": 2.7673,
      "step": 82046
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.2132487297058105,
      "learning_rate": 0.00043104814729820446,
      "loss": 3.3704,
      "step": 82047
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.2818989753723145,
      "learning_rate": 0.0004310444676393925,
      "loss": 2.8675,
      "step": 82048
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.64255690574646,
      "learning_rate": 0.000431040787956217,
      "loss": 3.0319,
      "step": 82049
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9153579473495483,
      "learning_rate": 0.0004310371082486784,
      "loss": 2.9149,
      "step": 82050
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7763348817825317,
      "learning_rate": 0.0004310334285167776,
      "loss": 2.9357,
      "step": 82051
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.3671503067016602,
      "learning_rate": 0.0004310297487605152,
      "loss": 3.269,
      "step": 82052
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9140945672988892,
      "learning_rate": 0.0004310260689798919,
      "loss": 2.9968,
      "step": 82053
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.015590190887451,
      "learning_rate": 0.00043102238917490837,
      "loss": 2.8734,
      "step": 82054
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7214491367340088,
      "learning_rate": 0.00043101870934556533,
      "loss": 2.8792,
      "step": 82055
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8976188898086548,
      "learning_rate": 0.00043101502949186347,
      "loss": 2.7737,
      "step": 82056
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.9325194358825684,
      "learning_rate": 0.00043101134961380337,
      "loss": 2.9842,
      "step": 82057
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7779909372329712,
      "learning_rate": 0.0004310076697113858,
      "loss": 2.9534,
      "step": 82058
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.783278226852417,
      "learning_rate": 0.00043100398978461143,
      "loss": 3.0501,
      "step": 82059
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7725694179534912,
      "learning_rate": 0.00043100030983348093,
      "loss": 2.9266,
      "step": 82060
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.687941074371338,
      "learning_rate": 0.00043099662985799514,
      "loss": 3.0122,
      "step": 82061
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4570780992507935,
      "learning_rate": 0.0004309929498581544,
      "loss": 3.2377,
      "step": 82062
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.568796992301941,
      "learning_rate": 0.0004309892698339597,
      "loss": 3.1657,
      "step": 82063
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8163974285125732,
      "learning_rate": 0.0004309855897854117,
      "loss": 2.9611,
      "step": 82064
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6387056112289429,
      "learning_rate": 0.0004309819097125109,
      "loss": 2.8879,
      "step": 82065
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6689821481704712,
      "learning_rate": 0.00043097822961525806,
      "loss": 2.9824,
      "step": 82066
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4697648286819458,
      "learning_rate": 0.0004309745494936539,
      "loss": 3.2386,
      "step": 82067
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8001360893249512,
      "learning_rate": 0.0004309708693476992,
      "loss": 2.9305,
      "step": 82068
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7216930389404297,
      "learning_rate": 0.00043096718917739443,
      "loss": 3.1018,
      "step": 82069
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6455769538879395,
      "learning_rate": 0.0004309635089827404,
      "loss": 3.2886,
      "step": 82070
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9343944787979126,
      "learning_rate": 0.00043095982876373794,
      "loss": 2.8645,
      "step": 82071
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.151233673095703,
      "learning_rate": 0.0004309561485203874,
      "loss": 2.8398,
      "step": 82072
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7850327491760254,
      "learning_rate": 0.0004309524682526897,
      "loss": 3.0586,
      "step": 82073
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8686941862106323,
      "learning_rate": 0.0004309487879606455,
      "loss": 3.0618,
      "step": 82074
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.777450442314148,
      "learning_rate": 0.00043094510764425545,
      "loss": 3.203,
      "step": 82075
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8229553699493408,
      "learning_rate": 0.00043094142730352016,
      "loss": 3.0526,
      "step": 82076
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0848376750946045,
      "learning_rate": 0.0004309377469384404,
      "loss": 2.9658,
      "step": 82077
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.439346194267273,
      "learning_rate": 0.00043093406654901684,
      "loss": 2.8504,
      "step": 82078
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1300294399261475,
      "learning_rate": 0.00043093038613525023,
      "loss": 3.0177,
      "step": 82079
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1726694107055664,
      "learning_rate": 0.00043092670569714117,
      "loss": 2.6214,
      "step": 82080
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.554445743560791,
      "learning_rate": 0.0004309230252346903,
      "loss": 3.0408,
      "step": 82081
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.113811731338501,
      "learning_rate": 0.0004309193447478984,
      "loss": 3.1736,
      "step": 82082
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7591071128845215,
      "learning_rate": 0.00043091566423676615,
      "loss": 3.097,
      "step": 82083
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5087449550628662,
      "learning_rate": 0.00043091198370129416,
      "loss": 3.1389,
      "step": 82084
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6729509830474854,
      "learning_rate": 0.00043090830314148324,
      "loss": 3.0559,
      "step": 82085
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9306344985961914,
      "learning_rate": 0.000430904622557334,
      "loss": 3.2778,
      "step": 82086
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9034368991851807,
      "learning_rate": 0.000430900941948847,
      "loss": 2.9514,
      "step": 82087
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1624302864074707,
      "learning_rate": 0.00043089726131602315,
      "loss": 3.0114,
      "step": 82088
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.629299521446228,
      "learning_rate": 0.000430893580658863,
      "loss": 2.9612,
      "step": 82089
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6154686212539673,
      "learning_rate": 0.0004308898999773672,
      "loss": 2.9611,
      "step": 82090
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6824331283569336,
      "learning_rate": 0.00043088621927153665,
      "loss": 2.9977,
      "step": 82091
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4271966218948364,
      "learning_rate": 0.0004308825385413717,
      "loss": 3.096,
      "step": 82092
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4007256031036377,
      "learning_rate": 0.0004308788577868733,
      "loss": 3.1739,
      "step": 82093
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5864841938018799,
      "learning_rate": 0.0004308751770080421,
      "loss": 2.9513,
      "step": 82094
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7839832305908203,
      "learning_rate": 0.00043087149620487866,
      "loss": 2.8569,
      "step": 82095
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.238117218017578,
      "learning_rate": 0.00043086781537738376,
      "loss": 2.8754,
      "step": 82096
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1130003929138184,
      "learning_rate": 0.00043086413452555805,
      "loss": 2.6931,
      "step": 82097
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4663240909576416,
      "learning_rate": 0.00043086045364940226,
      "loss": 2.9544,
      "step": 82098
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7263743877410889,
      "learning_rate": 0.000430856772748917,
      "loss": 2.8554,
      "step": 82099
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5796629190444946,
      "learning_rate": 0.00043085309182410304,
      "loss": 3.2834,
      "step": 82100
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9446115493774414,
      "learning_rate": 0.000430849410874961,
      "loss": 2.7518,
      "step": 82101
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8229514360427856,
      "learning_rate": 0.00043084572990149153,
      "loss": 3.0591,
      "step": 82102
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9825012683868408,
      "learning_rate": 0.00043084204890369546,
      "loss": 3.0706,
      "step": 82103
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6184178590774536,
      "learning_rate": 0.0004308383678815733,
      "loss": 3.0035,
      "step": 82104
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7170974016189575,
      "learning_rate": 0.0004308346868351259,
      "loss": 3.0996,
      "step": 82105
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.711127519607544,
      "learning_rate": 0.0004308310057643538,
      "loss": 2.6914,
      "step": 82106
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.183202028274536,
      "learning_rate": 0.0004308273246692578,
      "loss": 2.9511,
      "step": 82107
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6931953430175781,
      "learning_rate": 0.00043082364354983847,
      "loss": 3.2073,
      "step": 82108
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5385547876358032,
      "learning_rate": 0.00043081996240609664,
      "loss": 3.0677,
      "step": 82109
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4819777011871338,
      "learning_rate": 0.00043081628123803285,
      "loss": 2.9097,
      "step": 82110
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7515878677368164,
      "learning_rate": 0.00043081260004564786,
      "loss": 3.177,
      "step": 82111
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.748875379562378,
      "learning_rate": 0.00043080891882894227,
      "loss": 3.179,
      "step": 82112
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9077208042144775,
      "learning_rate": 0.00043080523758791693,
      "loss": 2.7642,
      "step": 82113
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6121609210968018,
      "learning_rate": 0.00043080155632257234,
      "loss": 2.9805,
      "step": 82114
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8569916486740112,
      "learning_rate": 0.0004307978750329093,
      "loss": 2.9752,
      "step": 82115
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.129545211791992,
      "learning_rate": 0.00043079419371892856,
      "loss": 3.0598,
      "step": 82116
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6798614263534546,
      "learning_rate": 0.0004307905123806306,
      "loss": 2.9892,
      "step": 82117
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.801663875579834,
      "learning_rate": 0.00043078683101801623,
      "loss": 2.8563,
      "step": 82118
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.373269557952881,
      "learning_rate": 0.0004307831496310862,
      "loss": 3.2622,
      "step": 82119
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4727661609649658,
      "learning_rate": 0.00043077946821984105,
      "loss": 3.2927,
      "step": 82120
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9166520833969116,
      "learning_rate": 0.0004307757867842815,
      "loss": 2.9103,
      "step": 82121
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.261662244796753,
      "learning_rate": 0.00043077210532440827,
      "loss": 2.8776,
      "step": 82122
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5118188858032227,
      "learning_rate": 0.00043076842384022214,
      "loss": 3.1425,
      "step": 82123
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.054170608520508,
      "learning_rate": 0.00043076474233172353,
      "loss": 3.0162,
      "step": 82124
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.192986011505127,
      "learning_rate": 0.0004307610607989135,
      "loss": 2.9965,
      "step": 82125
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8333014249801636,
      "learning_rate": 0.0004307573792417923,
      "loss": 3.1483,
      "step": 82126
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.6282033920288086,
      "learning_rate": 0.00043075369766036093,
      "loss": 3.0639,
      "step": 82127
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1825623512268066,
      "learning_rate": 0.00043075001605461996,
      "loss": 3.0485,
      "step": 82128
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9090487957000732,
      "learning_rate": 0.00043074633442457015,
      "loss": 2.9016,
      "step": 82129
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6728601455688477,
      "learning_rate": 0.00043074265277021203,
      "loss": 2.808,
      "step": 82130
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.534198522567749,
      "learning_rate": 0.00043073897109154654,
      "loss": 2.9068,
      "step": 82131
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8622108697891235,
      "learning_rate": 0.00043073528938857405,
      "loss": 3.2416,
      "step": 82132
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7593499422073364,
      "learning_rate": 0.0004307316076612954,
      "loss": 3.1465,
      "step": 82133
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6355881690979004,
      "learning_rate": 0.0004307279259097114,
      "loss": 2.8015,
      "step": 82134
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0212020874023438,
      "learning_rate": 0.00043072424413382254,
      "loss": 2.9881,
      "step": 82135
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0252013206481934,
      "learning_rate": 0.00043072056233362953,
      "loss": 3.1183,
      "step": 82136
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8737809658050537,
      "learning_rate": 0.0004307168805091332,
      "loss": 3.038,
      "step": 82137
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.616981863975525,
      "learning_rate": 0.0004307131986603341,
      "loss": 2.8887,
      "step": 82138
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.641784429550171,
      "learning_rate": 0.0004307095167872329,
      "loss": 2.9288,
      "step": 82139
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4422762393951416,
      "learning_rate": 0.00043070583488983037,
      "loss": 3.0882,
      "step": 82140
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5392147302627563,
      "learning_rate": 0.0004307021529681272,
      "loss": 3.0677,
      "step": 82141
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8210396766662598,
      "learning_rate": 0.0004306984710221239,
      "loss": 2.9665,
      "step": 82142
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5797523260116577,
      "learning_rate": 0.00043069478905182146,
      "loss": 2.9716,
      "step": 82143
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8007392883300781,
      "learning_rate": 0.0004306911070572203,
      "loss": 2.7953,
      "step": 82144
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.905922770500183,
      "learning_rate": 0.00043068742503832124,
      "loss": 3.1917,
      "step": 82145
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9533227682113647,
      "learning_rate": 0.00043068374299512485,
      "loss": 3.0796,
      "step": 82146
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5495673418045044,
      "learning_rate": 0.0004306800609276319,
      "loss": 2.8221,
      "step": 82147
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.721078872680664,
      "learning_rate": 0.0004306763788358431,
      "loss": 3.2017,
      "step": 82148
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7410876750946045,
      "learning_rate": 0.00043067269671975914,
      "loss": 2.911,
      "step": 82149
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.198890209197998,
      "learning_rate": 0.0004306690145793805,
      "loss": 2.9965,
      "step": 82150
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5475411415100098,
      "learning_rate": 0.00043066533241470817,
      "loss": 3.093,
      "step": 82151
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5769423246383667,
      "learning_rate": 0.0004306616502257426,
      "loss": 3.1148,
      "step": 82152
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8439432382583618,
      "learning_rate": 0.00043065796801248466,
      "loss": 3.3543,
      "step": 82153
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.3608956336975098,
      "learning_rate": 0.00043065428577493484,
      "loss": 2.7581,
      "step": 82154
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7861894369125366,
      "learning_rate": 0.0004306506035130939,
      "loss": 2.9555,
      "step": 82155
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6093026399612427,
      "learning_rate": 0.00043064692122696263,
      "loss": 2.9699,
      "step": 82156
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.157306432723999,
      "learning_rate": 0.0004306432389165416,
      "loss": 3.0184,
      "step": 82157
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7737748622894287,
      "learning_rate": 0.00043063955658183153,
      "loss": 3.1138,
      "step": 82158
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.246821880340576,
      "learning_rate": 0.0004306358742228331,
      "loss": 2.9896,
      "step": 82159
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8022462129592896,
      "learning_rate": 0.00043063219183954703,
      "loss": 3.056,
      "step": 82160
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8672873973846436,
      "learning_rate": 0.0004306285094319739,
      "loss": 2.8845,
      "step": 82161
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7771261930465698,
      "learning_rate": 0.00043062482700011444,
      "loss": 2.9546,
      "step": 82162
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.028982639312744,
      "learning_rate": 0.0004306211445439695,
      "loss": 3.0071,
      "step": 82163
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.618382453918457,
      "learning_rate": 0.0004306174620635395,
      "loss": 2.9967,
      "step": 82164
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9962044954299927,
      "learning_rate": 0.00043061377955882525,
      "loss": 2.9294,
      "step": 82165
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.122023820877075,
      "learning_rate": 0.0004306100970298275,
      "loss": 3.2592,
      "step": 82166
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.44033944606781,
      "learning_rate": 0.0004306064144765469,
      "loss": 2.9947,
      "step": 82167
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.080298662185669,
      "learning_rate": 0.000430602731898984,
      "loss": 3.1661,
      "step": 82168
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7303577661514282,
      "learning_rate": 0.0004305990492971396,
      "loss": 3.0441,
      "step": 82169
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.029597520828247,
      "learning_rate": 0.00043059536667101444,
      "loss": 3.0158,
      "step": 82170
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.9591524600982666,
      "learning_rate": 0.0004305916840206091,
      "loss": 2.9111,
      "step": 82171
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7289941310882568,
      "learning_rate": 0.00043058800134592427,
      "loss": 3.1148,
      "step": 82172
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0935277938842773,
      "learning_rate": 0.0004305843186469607,
      "loss": 3.0968,
      "step": 82173
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.505284309387207,
      "learning_rate": 0.0004305806359237191,
      "loss": 2.9939,
      "step": 82174
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7893115282058716,
      "learning_rate": 0.0004305769531762,
      "loss": 3.1227,
      "step": 82175
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.816199779510498,
      "learning_rate": 0.0004305732704044041,
      "loss": 3.2953,
      "step": 82176
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.177293539047241,
      "learning_rate": 0.00043056958760833233,
      "loss": 3.2122,
      "step": 82177
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6352187395095825,
      "learning_rate": 0.0004305659047879852,
      "loss": 3.1816,
      "step": 82178
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.833639144897461,
      "learning_rate": 0.00043056222194336327,
      "loss": 2.7481,
      "step": 82179
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9185515642166138,
      "learning_rate": 0.00043055853907446757,
      "loss": 3.087,
      "step": 82180
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.435089588165283,
      "learning_rate": 0.00043055485618129837,
      "loss": 3.2417,
      "step": 82181
    },
    {
      "epoch": 1.07,
      "grad_norm": 4.433612823486328,
      "learning_rate": 0.00043055117326385665,
      "loss": 2.9944,
      "step": 82182
    },
    {
      "epoch": 1.07,
      "grad_norm": 4.110608100891113,
      "learning_rate": 0.00043054749032214306,
      "loss": 2.8389,
      "step": 82183
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8612815141677856,
      "learning_rate": 0.00043054380735615803,
      "loss": 2.8811,
      "step": 82184
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.826080560684204,
      "learning_rate": 0.0004305401243659026,
      "loss": 2.8184,
      "step": 82185
    },
    {
      "epoch": 1.07,
      "grad_norm": 4.018339157104492,
      "learning_rate": 0.00043053644135137735,
      "loss": 2.6745,
      "step": 82186
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.015181064605713,
      "learning_rate": 0.0004305327583125828,
      "loss": 2.9621,
      "step": 82187
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0206525325775146,
      "learning_rate": 0.0004305290752495197,
      "loss": 3.206,
      "step": 82188
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.3016107082366943,
      "learning_rate": 0.00043052539216218886,
      "loss": 2.957,
      "step": 82189
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8625248670578003,
      "learning_rate": 0.00043052170905059087,
      "loss": 3.2536,
      "step": 82190
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.419393539428711,
      "learning_rate": 0.0004305180259147265,
      "loss": 3.0349,
      "step": 82191
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7063785791397095,
      "learning_rate": 0.0004305143427545963,
      "loss": 3.0721,
      "step": 82192
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6728181838989258,
      "learning_rate": 0.000430510659570201,
      "loss": 3.0426,
      "step": 82193
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5469114780426025,
      "learning_rate": 0.0004305069763615413,
      "loss": 3.2387,
      "step": 82194
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.481258511543274,
      "learning_rate": 0.000430503293128618,
      "loss": 3.2695,
      "step": 82195
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7419055700302124,
      "learning_rate": 0.00043049960987143155,
      "loss": 3.3167,
      "step": 82196
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9569720029830933,
      "learning_rate": 0.0004304959265899828,
      "loss": 3.0262,
      "step": 82197
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5858495235443115,
      "learning_rate": 0.0004304922432842724,
      "loss": 2.8824,
      "step": 82198
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.868557095527649,
      "learning_rate": 0.000430488559954301,
      "loss": 2.8644,
      "step": 82199
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.187577247619629,
      "learning_rate": 0.00043048487660006935,
      "loss": 2.9685,
      "step": 82200
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6032116413116455,
      "learning_rate": 0.00043048119322157814,
      "loss": 3.0565,
      "step": 82201
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.2080771923065186,
      "learning_rate": 0.0004304775098188279,
      "loss": 3.0143,
      "step": 82202
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5870115756988525,
      "learning_rate": 0.0004304738263918195,
      "loss": 3.1963,
      "step": 82203
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.648435354232788,
      "learning_rate": 0.00043047014294055356,
      "loss": 2.8841,
      "step": 82204
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.559450626373291,
      "learning_rate": 0.0004304664594650307,
      "loss": 2.5922,
      "step": 82205
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9838265180587769,
      "learning_rate": 0.00043046277596525174,
      "loss": 3.1822,
      "step": 82206
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1229894161224365,
      "learning_rate": 0.00043045909244121723,
      "loss": 2.934,
      "step": 82207
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6625672578811646,
      "learning_rate": 0.0004304554088929279,
      "loss": 3.104,
      "step": 82208
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8981534242630005,
      "learning_rate": 0.00043045172532038447,
      "loss": 3.0226,
      "step": 82209
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6084470748901367,
      "learning_rate": 0.0004304480417235877,
      "loss": 3.1948,
      "step": 82210
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1757826805114746,
      "learning_rate": 0.000430444358102538,
      "loss": 3.0904,
      "step": 82211
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9180951118469238,
      "learning_rate": 0.0004304406744572363,
      "loss": 2.7939,
      "step": 82212
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.811357021331787,
      "learning_rate": 0.0004304369907876833,
      "loss": 3.0752,
      "step": 82213
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0621719360351562,
      "learning_rate": 0.00043043330709387955,
      "loss": 3.1728,
      "step": 82214
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4574205875396729,
      "learning_rate": 0.00043042962337582575,
      "loss": 3.0005,
      "step": 82215
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4475048780441284,
      "learning_rate": 0.0004304259396335227,
      "loss": 2.7186,
      "step": 82216
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6511452198028564,
      "learning_rate": 0.00043042225586697086,
      "loss": 3.149,
      "step": 82217
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0464468002319336,
      "learning_rate": 0.00043041857207617117,
      "loss": 3.0792,
      "step": 82218
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5898938179016113,
      "learning_rate": 0.0004304148882611242,
      "loss": 3.1514,
      "step": 82219
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6126397848129272,
      "learning_rate": 0.0004304112044218306,
      "loss": 3.0829,
      "step": 82220
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.114447593688965,
      "learning_rate": 0.0004304075205582911,
      "loss": 2.9579,
      "step": 82221
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.860661268234253,
      "learning_rate": 0.00043040383667050647,
      "loss": 2.9706,
      "step": 82222
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6530840396881104,
      "learning_rate": 0.0004304001527584772,
      "loss": 3.0897,
      "step": 82223
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.5012006759643555,
      "learning_rate": 0.0004303964688222041,
      "loss": 2.8824,
      "step": 82224
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.2718703746795654,
      "learning_rate": 0.00043039278486168794,
      "loss": 2.8415,
      "step": 82225
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.599010705947876,
      "learning_rate": 0.00043038910087692914,
      "loss": 2.9636,
      "step": 82226
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.268970489501953,
      "learning_rate": 0.0004303854168679286,
      "loss": 3.0332,
      "step": 82227
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.5175228118896484,
      "learning_rate": 0.000430381732834687,
      "loss": 2.8927,
      "step": 82228
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.970530390739441,
      "learning_rate": 0.0004303780487772049,
      "loss": 2.9363,
      "step": 82229
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4391539096832275,
      "learning_rate": 0.0004303743646954831,
      "loss": 3.1403,
      "step": 82230
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.02927565574646,
      "learning_rate": 0.00043037068058952225,
      "loss": 3.2503,
      "step": 82231
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8883634805679321,
      "learning_rate": 0.00043036699645932305,
      "loss": 2.9617,
      "step": 82232
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0752551555633545,
      "learning_rate": 0.0004303633123048861,
      "loss": 3.0038,
      "step": 82233
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.584428071975708,
      "learning_rate": 0.00043035962812621223,
      "loss": 3.0498,
      "step": 82234
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8638249635696411,
      "learning_rate": 0.00043035594392330204,
      "loss": 2.9273,
      "step": 82235
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1127870082855225,
      "learning_rate": 0.00043035225969615613,
      "loss": 3.1199,
      "step": 82236
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.105975866317749,
      "learning_rate": 0.0004303485754447753,
      "loss": 3.0419,
      "step": 82237
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.341439962387085,
      "learning_rate": 0.0004303448911691603,
      "loss": 3.1626,
      "step": 82238
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9053274393081665,
      "learning_rate": 0.0004303412068693116,
      "loss": 3.1017,
      "step": 82239
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6404820680618286,
      "learning_rate": 0.00043033752254523005,
      "loss": 3.0358,
      "step": 82240
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.910841703414917,
      "learning_rate": 0.00043033383819691636,
      "loss": 2.7784,
      "step": 82241
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4851272106170654,
      "learning_rate": 0.0004303301538243711,
      "loss": 3.079,
      "step": 82242
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.704152226448059,
      "learning_rate": 0.00043032646942759497,
      "loss": 3.0692,
      "step": 82243
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.799940824508667,
      "learning_rate": 0.0004303227850065887,
      "loss": 3.1644,
      "step": 82244
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8598932027816772,
      "learning_rate": 0.00043031910056135305,
      "loss": 3.1436,
      "step": 82245
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9927195310592651,
      "learning_rate": 0.0004303154160918885,
      "loss": 3.1564,
      "step": 82246
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5529011487960815,
      "learning_rate": 0.0004303117315981959,
      "loss": 2.9139,
      "step": 82247
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.249267101287842,
      "learning_rate": 0.000430308047080276,
      "loss": 3.2549,
      "step": 82248
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.501457452774048,
      "learning_rate": 0.0004303043625381292,
      "loss": 2.9867,
      "step": 82249
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6274181604385376,
      "learning_rate": 0.0004303006779717565,
      "loss": 2.9723,
      "step": 82250
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6256777048110962,
      "learning_rate": 0.0004302969933811583,
      "loss": 3.0295,
      "step": 82251
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0981335639953613,
      "learning_rate": 0.0004302933087663355,
      "loss": 2.6769,
      "step": 82252
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.3151652812957764,
      "learning_rate": 0.00043028962412728874,
      "loss": 3.1665,
      "step": 82253
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.034264326095581,
      "learning_rate": 0.0004302859394640186,
      "loss": 3.0463,
      "step": 82254
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6885554790496826,
      "learning_rate": 0.00043028225477652603,
      "loss": 2.8122,
      "step": 82255
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.6180005073547363,
      "learning_rate": 0.00043027857006481136,
      "loss": 3.1079,
      "step": 82256
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.851377010345459,
      "learning_rate": 0.0004302748853288755,
      "loss": 3.1209,
      "step": 82257
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.536485433578491,
      "learning_rate": 0.00043027120056871905,
      "loss": 2.9903,
      "step": 82258
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6502436399459839,
      "learning_rate": 0.00043026751578434277,
      "loss": 2.6852,
      "step": 82259
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.719696283340454,
      "learning_rate": 0.00043026383097574726,
      "loss": 3.0878,
      "step": 82260
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6205297708511353,
      "learning_rate": 0.0004302601461429332,
      "loss": 2.7587,
      "step": 82261
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8883267641067505,
      "learning_rate": 0.00043025646128590147,
      "loss": 2.9285,
      "step": 82262
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.54019832611084,
      "learning_rate": 0.00043025277640465254,
      "loss": 3.1297,
      "step": 82263
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6877508163452148,
      "learning_rate": 0.0004302490914991871,
      "loss": 3.0237,
      "step": 82264
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9608293771743774,
      "learning_rate": 0.000430245406569506,
      "loss": 2.74,
      "step": 82265
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7337944507598877,
      "learning_rate": 0.0004302417216156097,
      "loss": 2.9797,
      "step": 82266
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6730324029922485,
      "learning_rate": 0.000430238036637499,
      "loss": 3.1095,
      "step": 82267
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.265728235244751,
      "learning_rate": 0.0004302343516351747,
      "loss": 2.8386,
      "step": 82268
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.155104875564575,
      "learning_rate": 0.0004302306666086373,
      "loss": 3.1712,
      "step": 82269
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.162916898727417,
      "learning_rate": 0.00043022698155788757,
      "loss": 3.0628,
      "step": 82270
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.207157850265503,
      "learning_rate": 0.00043022329648292624,
      "loss": 3.1187,
      "step": 82271
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.5412473678588867,
      "learning_rate": 0.00043021961138375387,
      "loss": 2.9489,
      "step": 82272
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7824130058288574,
      "learning_rate": 0.00043021592626037126,
      "loss": 2.9152,
      "step": 82273
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7459430694580078,
      "learning_rate": 0.000430212241112779,
      "loss": 3.3755,
      "step": 82274
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.3521759510040283,
      "learning_rate": 0.00043020855594097794,
      "loss": 3.0278,
      "step": 82275
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8179517984390259,
      "learning_rate": 0.0004302048707449685,
      "loss": 3.1089,
      "step": 82276
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6454137563705444,
      "learning_rate": 0.00043020118552475166,
      "loss": 2.9214,
      "step": 82277
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4973541498184204,
      "learning_rate": 0.0004301975002803279,
      "loss": 3.0949,
      "step": 82278
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1141116619110107,
      "learning_rate": 0.0004301938150116979,
      "loss": 3.0485,
      "step": 82279
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9021251201629639,
      "learning_rate": 0.00043019012971886255,
      "loss": 3.2407,
      "step": 82280
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6172796487808228,
      "learning_rate": 0.00043018644440182224,
      "loss": 2.9957,
      "step": 82281
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7109917402267456,
      "learning_rate": 0.0004301827590605779,
      "loss": 3.1408,
      "step": 82282
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7375279664993286,
      "learning_rate": 0.0004301790736951301,
      "loss": 3.1783,
      "step": 82283
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6974220275878906,
      "learning_rate": 0.0004301753883054796,
      "loss": 3.3094,
      "step": 82284
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.036297082901001,
      "learning_rate": 0.000430171702891627,
      "loss": 2.9343,
      "step": 82285
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7542648315429688,
      "learning_rate": 0.0004301680174535731,
      "loss": 2.7205,
      "step": 82286
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.2426741123199463,
      "learning_rate": 0.0004301643319913184,
      "loss": 2.7733,
      "step": 82287
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1577467918395996,
      "learning_rate": 0.0004301606465048637,
      "loss": 2.9202,
      "step": 82288
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.4822020530700684,
      "learning_rate": 0.0004301569609942098,
      "loss": 2.9385,
      "step": 82289
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8269548416137695,
      "learning_rate": 0.00043015327545935716,
      "loss": 3.1189,
      "step": 82290
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.559643030166626,
      "learning_rate": 0.00043014958990030656,
      "loss": 2.807,
      "step": 82291
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7252531051635742,
      "learning_rate": 0.00043014590431705873,
      "loss": 3.0949,
      "step": 82292
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6763256788253784,
      "learning_rate": 0.00043014221870961433,
      "loss": 3.0561,
      "step": 82293
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4858163595199585,
      "learning_rate": 0.000430138533077974,
      "loss": 2.9553,
      "step": 82294
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1155006885528564,
      "learning_rate": 0.0004301348474221386,
      "loss": 3.0338,
      "step": 82295
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9276001453399658,
      "learning_rate": 0.00043013116174210847,
      "loss": 3.1898,
      "step": 82296
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5748552083969116,
      "learning_rate": 0.00043012747603788455,
      "loss": 3.1828,
      "step": 82297
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6072624921798706,
      "learning_rate": 0.0004301237903094676,
      "loss": 3.201,
      "step": 82298
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9870632886886597,
      "learning_rate": 0.00043012010455685805,
      "loss": 3.1379,
      "step": 82299
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7103453874588013,
      "learning_rate": 0.0004301164187800568,
      "loss": 3.1761,
      "step": 82300
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8064014911651611,
      "learning_rate": 0.0004301127329790644,
      "loss": 2.8284,
      "step": 82301
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5706675052642822,
      "learning_rate": 0.0004301090471538816,
      "loss": 2.9963,
      "step": 82302
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7490274906158447,
      "learning_rate": 0.00043010536130450906,
      "loss": 2.9621,
      "step": 82303
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8732783794403076,
      "learning_rate": 0.00043010167543094745,
      "loss": 3.161,
      "step": 82304
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7549352645874023,
      "learning_rate": 0.0004300979895331975,
      "loss": 2.9484,
      "step": 82305
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9926131963729858,
      "learning_rate": 0.0004300943036112599,
      "loss": 3.2574,
      "step": 82306
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.4982783794403076,
      "learning_rate": 0.00043009061766513534,
      "loss": 3.1054,
      "step": 82307
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.2154593467712402,
      "learning_rate": 0.00043008693169482447,
      "loss": 2.8972,
      "step": 82308
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.657594919204712,
      "learning_rate": 0.0004300832457003279,
      "loss": 3.4584,
      "step": 82309
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.823345184326172,
      "learning_rate": 0.0004300795596816465,
      "loss": 2.8259,
      "step": 82310
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.936806082725525,
      "learning_rate": 0.0004300758736387808,
      "loss": 2.9471,
      "step": 82311
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9208436012268066,
      "learning_rate": 0.00043007218757173157,
      "loss": 2.9606,
      "step": 82312
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7404351234436035,
      "learning_rate": 0.00043006850148049946,
      "loss": 2.7707,
      "step": 82313
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.6785335540771484,
      "learning_rate": 0.00043006481536508515,
      "loss": 2.7746,
      "step": 82314
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1839993000030518,
      "learning_rate": 0.00043006112922548933,
      "loss": 2.8549,
      "step": 82315
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9433375597000122,
      "learning_rate": 0.00043005744306171266,
      "loss": 3.031,
      "step": 82316
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8793522119522095,
      "learning_rate": 0.000430053756873756,
      "loss": 2.9473,
      "step": 82317
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.926841139793396,
      "learning_rate": 0.0004300500706616197,
      "loss": 3.0763,
      "step": 82318
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7167962789535522,
      "learning_rate": 0.00043004638442530475,
      "loss": 3.0333,
      "step": 82319
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6008248329162598,
      "learning_rate": 0.0004300426981648117,
      "loss": 3.0286,
      "step": 82320
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8548851013183594,
      "learning_rate": 0.0004300390118801413,
      "loss": 3.1199,
      "step": 82321
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.685347318649292,
      "learning_rate": 0.00043003532557129404,
      "loss": 3.3229,
      "step": 82322
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5967203378677368,
      "learning_rate": 0.000430031639238271,
      "loss": 3.1941,
      "step": 82323
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6376241445541382,
      "learning_rate": 0.00043002795288107237,
      "loss": 3.0596,
      "step": 82324
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1242024898529053,
      "learning_rate": 0.00043002426649969925,
      "loss": 3.0383,
      "step": 82325
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6431565284729004,
      "learning_rate": 0.00043002058009415216,
      "loss": 3.3265,
      "step": 82326
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.19952654838562,
      "learning_rate": 0.0004300168936644317,
      "loss": 2.9581,
      "step": 82327
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8133594989776611,
      "learning_rate": 0.0004300132072105387,
      "loss": 2.7967,
      "step": 82328
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4994423389434814,
      "learning_rate": 0.0004300095207324738,
      "loss": 3.0547,
      "step": 82329
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.606418251991272,
      "learning_rate": 0.0004300058342302377,
      "loss": 3.0712,
      "step": 82330
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4490877389907837,
      "learning_rate": 0.00043000214770383096,
      "loss": 2.9179,
      "step": 82331
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.728378415107727,
      "learning_rate": 0.00042999846115325443,
      "loss": 3.1231,
      "step": 82332
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5691736936569214,
      "learning_rate": 0.0004299947745785088,
      "loss": 2.967,
      "step": 82333
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.889337420463562,
      "learning_rate": 0.0004299910879795946,
      "loss": 3.3346,
      "step": 82334
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4792821407318115,
      "learning_rate": 0.0004299874013565126,
      "loss": 3.0852,
      "step": 82335
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6240500211715698,
      "learning_rate": 0.00042998371470926355,
      "loss": 2.9016,
      "step": 82336
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.6141116619110107,
      "learning_rate": 0.00042998002803784803,
      "loss": 3.2114,
      "step": 82337
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.953050971031189,
      "learning_rate": 0.0004299763413422667,
      "loss": 3.1003,
      "step": 82338
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8882391452789307,
      "learning_rate": 0.00042997265462252045,
      "loss": 2.8998,
      "step": 82339
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7270088195800781,
      "learning_rate": 0.00042996896787860975,
      "loss": 3.1236,
      "step": 82340
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9489182233810425,
      "learning_rate": 0.0004299652811105354,
      "loss": 3.1159,
      "step": 82341
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0033864974975586,
      "learning_rate": 0.000429961594318298,
      "loss": 3.2989,
      "step": 82342
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6479734182357788,
      "learning_rate": 0.0004299579075018984,
      "loss": 3.2864,
      "step": 82343
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7517926692962646,
      "learning_rate": 0.00042995422066133705,
      "loss": 2.914,
      "step": 82344
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.2444345951080322,
      "learning_rate": 0.0004299505337966147,
      "loss": 2.969,
      "step": 82345
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.295349359512329,
      "learning_rate": 0.00042994684690773226,
      "loss": 2.7625,
      "step": 82346
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.3389933109283447,
      "learning_rate": 0.0004299431599946902,
      "loss": 3.0946,
      "step": 82347
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6779295206069946,
      "learning_rate": 0.00042993947305748924,
      "loss": 2.9441,
      "step": 82348
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.619096279144287,
      "learning_rate": 0.0004299357860961301,
      "loss": 2.9403,
      "step": 82349
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7945210933685303,
      "learning_rate": 0.00042993209911061344,
      "loss": 3.2439,
      "step": 82350
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1795170307159424,
      "learning_rate": 0.00042992841210093985,
      "loss": 2.7959,
      "step": 82351
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7851853370666504,
      "learning_rate": 0.00042992472506711023,
      "loss": 2.9639,
      "step": 82352
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4479118585586548,
      "learning_rate": 0.00042992103800912514,
      "loss": 2.9555,
      "step": 82353
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6561861038208008,
      "learning_rate": 0.0004299173509269852,
      "loss": 3.2176,
      "step": 82354
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4981343746185303,
      "learning_rate": 0.00042991366382069126,
      "loss": 3.0403,
      "step": 82355
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6110904216766357,
      "learning_rate": 0.0004299099766902439,
      "loss": 3.0581,
      "step": 82356
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5557866096496582,
      "learning_rate": 0.0004299062895356438,
      "loss": 2.8687,
      "step": 82357
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5871727466583252,
      "learning_rate": 0.0004299026023568917,
      "loss": 3.1583,
      "step": 82358
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.651134729385376,
      "learning_rate": 0.00042989891515398824,
      "loss": 3.0449,
      "step": 82359
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5339977741241455,
      "learning_rate": 0.00042989522792693407,
      "loss": 2.8697,
      "step": 82360
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4723424911499023,
      "learning_rate": 0.00042989154067573,
      "loss": 2.9367,
      "step": 82361
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8626655340194702,
      "learning_rate": 0.00042988785340037657,
      "loss": 2.8491,
      "step": 82362
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6742998361587524,
      "learning_rate": 0.0004298841661008746,
      "loss": 3.0089,
      "step": 82363
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8425270318984985,
      "learning_rate": 0.0004298804787772246,
      "loss": 2.9169,
      "step": 82364
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6693774461746216,
      "learning_rate": 0.00042987679142942757,
      "loss": 3.1175,
      "step": 82365
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7850916385650635,
      "learning_rate": 0.0004298731040574838,
      "loss": 3.0407,
      "step": 82366
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0996971130371094,
      "learning_rate": 0.0004298694166613943,
      "loss": 3.1295,
      "step": 82367
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.077397584915161,
      "learning_rate": 0.0004298657292411595,
      "loss": 3.0614,
      "step": 82368
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.001561164855957,
      "learning_rate": 0.0004298620417967804,
      "loss": 3.0398,
      "step": 82369
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5484051704406738,
      "learning_rate": 0.0004298583543282573,
      "loss": 3.0501,
      "step": 82370
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4818394184112549,
      "learning_rate": 0.00042985466683559116,
      "loss": 2.9204,
      "step": 82371
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.2646095752716064,
      "learning_rate": 0.0004298509793187826,
      "loss": 2.7513,
      "step": 82372
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0921096801757812,
      "learning_rate": 0.0004298472917778323,
      "loss": 3.1722,
      "step": 82373
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.740425705909729,
      "learning_rate": 0.00042984360421274083,
      "loss": 3.0869,
      "step": 82374
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.0651659965515137,
      "learning_rate": 0.0004298399166235091,
      "loss": 3.0294,
      "step": 82375
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5819909572601318,
      "learning_rate": 0.00042983622901013763,
      "loss": 3.0418,
      "step": 82376
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7862087488174438,
      "learning_rate": 0.0004298325413726271,
      "loss": 2.8824,
      "step": 82377
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7657840251922607,
      "learning_rate": 0.00042982885371097837,
      "loss": 3.0043,
      "step": 82378
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.629380464553833,
      "learning_rate": 0.00042982516602519197,
      "loss": 2.9394,
      "step": 82379
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.3938860893249512,
      "learning_rate": 0.0004298214783152685,
      "loss": 3.3648,
      "step": 82380
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5627870559692383,
      "learning_rate": 0.00042981779058120893,
      "loss": 2.658,
      "step": 82381
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5947422981262207,
      "learning_rate": 0.0004298141028230137,
      "loss": 2.8903,
      "step": 82382
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8060044050216675,
      "learning_rate": 0.0004298104150406836,
      "loss": 3.0341,
      "step": 82383
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9030158519744873,
      "learning_rate": 0.0004298067272342193,
      "loss": 3.0711,
      "step": 82384
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8042988777160645,
      "learning_rate": 0.00042980303940362146,
      "loss": 3.0824,
      "step": 82385
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.286813735961914,
      "learning_rate": 0.00042979935154889077,
      "loss": 2.8324,
      "step": 82386
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6729423999786377,
      "learning_rate": 0.000429795663670028,
      "loss": 3.1385,
      "step": 82387
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.484858751296997,
      "learning_rate": 0.0004297919757670337,
      "loss": 3.1926,
      "step": 82388
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8195661306381226,
      "learning_rate": 0.0004297882878399086,
      "loss": 3.0741,
      "step": 82389
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7495126724243164,
      "learning_rate": 0.0004297845998886535,
      "loss": 3.0103,
      "step": 82390
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6750601530075073,
      "learning_rate": 0.00042978091191326896,
      "loss": 2.9717,
      "step": 82391
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8468424081802368,
      "learning_rate": 0.00042977722391375565,
      "loss": 2.7889,
      "step": 82392
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4459476470947266,
      "learning_rate": 0.00042977353589011434,
      "loss": 3.2593,
      "step": 82393
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0112409591674805,
      "learning_rate": 0.00042976984784234563,
      "loss": 2.8484,
      "step": 82394
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8096935749053955,
      "learning_rate": 0.0004297661597704503,
      "loss": 2.8671,
      "step": 82395
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0580520629882812,
      "learning_rate": 0.00042976247167442904,
      "loss": 3.1173,
      "step": 82396
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9322454929351807,
      "learning_rate": 0.0004297587835542824,
      "loss": 2.96,
      "step": 82397
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.042048692703247,
      "learning_rate": 0.00042975509541001125,
      "loss": 3.09,
      "step": 82398
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.813143491744995,
      "learning_rate": 0.00042975140724161616,
      "loss": 2.9972,
      "step": 82399
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.090571641921997,
      "learning_rate": 0.0004297477190490977,
      "loss": 3.1948,
      "step": 82400
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9192862510681152,
      "learning_rate": 0.0004297440308324568,
      "loss": 2.9252,
      "step": 82401
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4645013809204102,
      "learning_rate": 0.00042974034259169407,
      "loss": 3.216,
      "step": 82402
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.4483816623687744,
      "learning_rate": 0.0004297366543268101,
      "loss": 2.9319,
      "step": 82403
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.306727647781372,
      "learning_rate": 0.00042973296603780563,
      "loss": 3.1026,
      "step": 82404
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9030613899230957,
      "learning_rate": 0.00042972927772468137,
      "loss": 2.7521,
      "step": 82405
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9511752128601074,
      "learning_rate": 0.000429725589387438,
      "loss": 2.8988,
      "step": 82406
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.036929130554199,
      "learning_rate": 0.00042972190102607617,
      "loss": 2.8308,
      "step": 82407
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.2341442108154297,
      "learning_rate": 0.00042971821264059664,
      "loss": 2.8638,
      "step": 82408
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6063660383224487,
      "learning_rate": 0.00042971452423099996,
      "loss": 3.2097,
      "step": 82409
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.056008815765381,
      "learning_rate": 0.000429710835797287,
      "loss": 2.9403,
      "step": 82410
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.467015504837036,
      "learning_rate": 0.00042970714733945827,
      "loss": 2.9336,
      "step": 82411
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7935289144515991,
      "learning_rate": 0.0004297034588575145,
      "loss": 2.7428,
      "step": 82412
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8469533920288086,
      "learning_rate": 0.00042969977035145653,
      "loss": 3.1765,
      "step": 82413
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9911609888076782,
      "learning_rate": 0.00042969608182128486,
      "loss": 3.0291,
      "step": 82414
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.4366016387939453,
      "learning_rate": 0.00042969239326700026,
      "loss": 2.8055,
      "step": 82415
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4790449142456055,
      "learning_rate": 0.0004296887046886033,
      "loss": 2.8092,
      "step": 82416
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4473203420639038,
      "learning_rate": 0.000429685016086095,
      "loss": 3.1569,
      "step": 82417
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.5367918014526367,
      "learning_rate": 0.00042968132745947556,
      "loss": 2.9576,
      "step": 82418
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8344112634658813,
      "learning_rate": 0.00042967763880874597,
      "loss": 2.9858,
      "step": 82419
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5680065155029297,
      "learning_rate": 0.0004296739501339069,
      "loss": 3.0465,
      "step": 82420
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.2657275199890137,
      "learning_rate": 0.000429670261434959,
      "loss": 2.9828,
      "step": 82421
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.3301494121551514,
      "learning_rate": 0.00042966657271190296,
      "loss": 2.9496,
      "step": 82422
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6850844621658325,
      "learning_rate": 0.00042966288396473933,
      "loss": 3.0917,
      "step": 82423
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4303827285766602,
      "learning_rate": 0.0004296591951934691,
      "loss": 3.1814,
      "step": 82424
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5572900772094727,
      "learning_rate": 0.0004296555063980927,
      "loss": 3.2215,
      "step": 82425
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0982611179351807,
      "learning_rate": 0.0004296518175786109,
      "loss": 2.9933,
      "step": 82426
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7581915855407715,
      "learning_rate": 0.00042964812873502435,
      "loss": 2.8605,
      "step": 82427
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.532354474067688,
      "learning_rate": 0.0004296444398673339,
      "loss": 2.9149,
      "step": 82428
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5466749668121338,
      "learning_rate": 0.0004296407509755399,
      "loss": 3.0893,
      "step": 82429
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.093055486679077,
      "learning_rate": 0.00042963706205964336,
      "loss": 2.8685,
      "step": 82430
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8303509950637817,
      "learning_rate": 0.00042963337311964473,
      "loss": 2.8707,
      "step": 82431
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.5586998462677,
      "learning_rate": 0.00042962968415554494,
      "loss": 2.8435,
      "step": 82432
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5740314722061157,
      "learning_rate": 0.0004296259951673445,
      "loss": 3.029,
      "step": 82433
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.527428150177002,
      "learning_rate": 0.0004296223061550441,
      "loss": 2.9009,
      "step": 82434
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7600592374801636,
      "learning_rate": 0.00042961861711864454,
      "loss": 3.2286,
      "step": 82435
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1495654582977295,
      "learning_rate": 0.0004296149280581464,
      "loss": 3.0012,
      "step": 82436
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8777824640274048,
      "learning_rate": 0.00042961123897355036,
      "loss": 2.9638,
      "step": 82437
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1625962257385254,
      "learning_rate": 0.00042960754986485724,
      "loss": 3.075,
      "step": 82438
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4891595840454102,
      "learning_rate": 0.0004296038607320675,
      "loss": 3.2764,
      "step": 82439
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6333359479904175,
      "learning_rate": 0.000429600171575182,
      "loss": 3.0606,
      "step": 82440
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7434481382369995,
      "learning_rate": 0.00042959648239420143,
      "loss": 2.95,
      "step": 82441
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6664773225784302,
      "learning_rate": 0.0004295927931891265,
      "loss": 3.1708,
      "step": 82442
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8109914064407349,
      "learning_rate": 0.00042958910395995764,
      "loss": 3.0247,
      "step": 82443
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.759695053100586,
      "learning_rate": 0.0004295854147066958,
      "loss": 3.035,
      "step": 82444
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0194954872131348,
      "learning_rate": 0.0004295817254293416,
      "loss": 2.979,
      "step": 82445
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.351991653442383,
      "learning_rate": 0.0004295780361278957,
      "loss": 3.1812,
      "step": 82446
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.3911793231964111,
      "learning_rate": 0.0004295743468023588,
      "loss": 3.0448,
      "step": 82447
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9258191585540771,
      "learning_rate": 0.00042957065745273163,
      "loss": 3.1266,
      "step": 82448
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.0187954902648926,
      "learning_rate": 0.00042956696807901483,
      "loss": 2.8262,
      "step": 82449
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.899578094482422,
      "learning_rate": 0.00042956327868120896,
      "loss": 3.0416,
      "step": 82450
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.241389751434326,
      "learning_rate": 0.00042955958925931494,
      "loss": 2.7176,
      "step": 82451
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.985486388206482,
      "learning_rate": 0.0004295558998133333,
      "loss": 3.1092,
      "step": 82452
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.030726194381714,
      "learning_rate": 0.00042955221034326476,
      "loss": 3.1533,
      "step": 82453
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7000757455825806,
      "learning_rate": 0.0004295485208491101,
      "loss": 3.0529,
      "step": 82454
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.7522432804107666,
      "learning_rate": 0.00042954483133086983,
      "loss": 2.876,
      "step": 82455
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5532604455947876,
      "learning_rate": 0.0004295411417885448,
      "loss": 2.845,
      "step": 82456
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8463313579559326,
      "learning_rate": 0.0004295374522221356,
      "loss": 2.9052,
      "step": 82457
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.051959991455078,
      "learning_rate": 0.00042953376263164296,
      "loss": 2.6447,
      "step": 82458
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1327998638153076,
      "learning_rate": 0.00042953007301706745,
      "loss": 2.8792,
      "step": 82459
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0665385723114014,
      "learning_rate": 0.00042952638337841,
      "loss": 2.9648,
      "step": 82460
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9665275812149048,
      "learning_rate": 0.00042952269371567106,
      "loss": 2.9249,
      "step": 82461
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9048510789871216,
      "learning_rate": 0.0004295190040288515,
      "loss": 3.002,
      "step": 82462
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.808606505393982,
      "learning_rate": 0.00042951531431795176,
      "loss": 2.9927,
      "step": 82463
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.113490104675293,
      "learning_rate": 0.0004295116245829728,
      "loss": 3.0578,
      "step": 82464
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7616838216781616,
      "learning_rate": 0.0004295079348239151,
      "loss": 3.2158,
      "step": 82465
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8053113222122192,
      "learning_rate": 0.00042950424504077954,
      "loss": 2.8714,
      "step": 82466
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1003267765045166,
      "learning_rate": 0.0004295005552335666,
      "loss": 2.9665,
      "step": 82467
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7849595546722412,
      "learning_rate": 0.0004294968654022771,
      "loss": 3.0098,
      "step": 82468
    },
    {
      "epoch": 1.07,
      "grad_norm": 4.855752944946289,
      "learning_rate": 0.0004294931755469117,
      "loss": 3.0811,
      "step": 82469
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.3881022930145264,
      "learning_rate": 0.00042948948566747105,
      "loss": 3.0159,
      "step": 82470
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.052461624145508,
      "learning_rate": 0.0004294857957639558,
      "loss": 2.938,
      "step": 82471
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4517356157302856,
      "learning_rate": 0.0004294821058363668,
      "loss": 2.9916,
      "step": 82472
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8412433862686157,
      "learning_rate": 0.0004294784158847046,
      "loss": 2.8597,
      "step": 82473
    },
    {
      "epoch": 1.07,
      "grad_norm": 3.491816520690918,
      "learning_rate": 0.0004294747259089699,
      "loss": 3.3434,
      "step": 82474
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.865493059158325,
      "learning_rate": 0.00042947103590916346,
      "loss": 2.9547,
      "step": 82475
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.658553957939148,
      "learning_rate": 0.0004294673458852859,
      "loss": 2.9312,
      "step": 82476
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.603363275527954,
      "learning_rate": 0.0004294636558373378,
      "loss": 2.8064,
      "step": 82477
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.685285210609436,
      "learning_rate": 0.00042945996576532013,
      "loss": 3.1524,
      "step": 82478
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6039931774139404,
      "learning_rate": 0.0004294562756692333,
      "loss": 2.894,
      "step": 82479
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.128493309020996,
      "learning_rate": 0.00042945258554907806,
      "loss": 2.9295,
      "step": 82480
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5539026260375977,
      "learning_rate": 0.00042944889540485527,
      "loss": 3.1558,
      "step": 82481
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.846494197845459,
      "learning_rate": 0.00042944520523656546,
      "loss": 2.9643,
      "step": 82482
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6188613176345825,
      "learning_rate": 0.00042944151504420924,
      "loss": 3.1854,
      "step": 82483
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6283783912658691,
      "learning_rate": 0.0004294378248277875,
      "loss": 3.0498,
      "step": 82484
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6645618677139282,
      "learning_rate": 0.00042943413458730085,
      "loss": 2.9499,
      "step": 82485
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5906840562820435,
      "learning_rate": 0.00042943044432274983,
      "loss": 2.9434,
      "step": 82486
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5494457483291626,
      "learning_rate": 0.0004294267540341354,
      "loss": 2.8986,
      "step": 82487
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.782409429550171,
      "learning_rate": 0.00042942306372145797,
      "loss": 3.1547,
      "step": 82488
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9459308385849,
      "learning_rate": 0.0004294193733847184,
      "loss": 3.174,
      "step": 82489
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6741950511932373,
      "learning_rate": 0.0004294156830239173,
      "loss": 3.0845,
      "step": 82490
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4171764850616455,
      "learning_rate": 0.0004294119926390554,
      "loss": 2.8688,
      "step": 82491
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1686818599700928,
      "learning_rate": 0.00042940830223013333,
      "loss": 2.9211,
      "step": 82492
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7807706594467163,
      "learning_rate": 0.0004294046117971519,
      "loss": 3.2639,
      "step": 82493
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.625641942024231,
      "learning_rate": 0.00042940092134011165,
      "loss": 2.9721,
      "step": 82494
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9051737785339355,
      "learning_rate": 0.0004293972308590133,
      "loss": 2.9774,
      "step": 82495
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5340592861175537,
      "learning_rate": 0.0004293935403538576,
      "loss": 2.8193,
      "step": 82496
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9330183267593384,
      "learning_rate": 0.0004293898498246452,
      "loss": 3.1115,
      "step": 82497
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.844042420387268,
      "learning_rate": 0.00042938615927137677,
      "loss": 2.8674,
      "step": 82498
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6624382734298706,
      "learning_rate": 0.00042938246869405304,
      "loss": 2.9234,
      "step": 82499
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.014533281326294,
      "learning_rate": 0.00042937877809267466,
      "loss": 2.9292,
      "step": 82500
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6430187225341797,
      "learning_rate": 0.00042937508746724223,
      "loss": 3.1284,
      "step": 82501
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.5522642135620117,
      "learning_rate": 0.0004293713968177567,
      "loss": 2.9561,
      "step": 82502
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.626859426498413,
      "learning_rate": 0.00042936770614421845,
      "loss": 3.1876,
      "step": 82503
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.3804479837417603,
      "learning_rate": 0.0004293640154466283,
      "loss": 2.7743,
      "step": 82504
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5935068130493164,
      "learning_rate": 0.000429360324724987,
      "loss": 2.9951,
      "step": 82505
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7456271648406982,
      "learning_rate": 0.00042935663397929516,
      "loss": 3.0175,
      "step": 82506
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.032700300216675,
      "learning_rate": 0.00042935294320955346,
      "loss": 3.0519,
      "step": 82507
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.2189104557037354,
      "learning_rate": 0.0004293492524157626,
      "loss": 2.9899,
      "step": 82508
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5658681392669678,
      "learning_rate": 0.0004293455615979233,
      "loss": 3.0026,
      "step": 82509
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7936941385269165,
      "learning_rate": 0.0004293418707560362,
      "loss": 3.1965,
      "step": 82510
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.472367525100708,
      "learning_rate": 0.00042933817989010203,
      "loss": 3.0778,
      "step": 82511
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8961139917373657,
      "learning_rate": 0.0004293344890001214,
      "loss": 2.8031,
      "step": 82512
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9295392036437988,
      "learning_rate": 0.0004293307980860951,
      "loss": 3.1063,
      "step": 82513
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6477165222167969,
      "learning_rate": 0.00042932710714802376,
      "loss": 3.1963,
      "step": 82514
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5325381755828857,
      "learning_rate": 0.000429323416185908,
      "loss": 3.2786,
      "step": 82515
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8431297540664673,
      "learning_rate": 0.00042931972519974867,
      "loss": 2.9715,
      "step": 82516
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7394131422042847,
      "learning_rate": 0.00042931603418954633,
      "loss": 3.1019,
      "step": 82517
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.818877100944519,
      "learning_rate": 0.0004293123431553017,
      "loss": 2.8647,
      "step": 82518
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8645570278167725,
      "learning_rate": 0.00042930865209701543,
      "loss": 2.947,
      "step": 82519
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.6684999465942383,
      "learning_rate": 0.0004293049610146883,
      "loss": 3.1425,
      "step": 82520
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5536142587661743,
      "learning_rate": 0.0004293012699083209,
      "loss": 2.8695,
      "step": 82521
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9495481252670288,
      "learning_rate": 0.000429297578777914,
      "loss": 2.8962,
      "step": 82522
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.3592302799224854,
      "learning_rate": 0.00042929388762346813,
      "loss": 3.1393,
      "step": 82523
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.776855230331421,
      "learning_rate": 0.00042929019644498417,
      "loss": 3.1653,
      "step": 82524
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.094008445739746,
      "learning_rate": 0.0004292865052424627,
      "loss": 2.8616,
      "step": 82525
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5190727710723877,
      "learning_rate": 0.0004292828140159044,
      "loss": 2.8999,
      "step": 82526
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5026355981826782,
      "learning_rate": 0.00042927912276531004,
      "loss": 3.0365,
      "step": 82527
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9841620922088623,
      "learning_rate": 0.0004292754314906802,
      "loss": 2.9986,
      "step": 82528
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.634680151939392,
      "learning_rate": 0.00042927174019201565,
      "loss": 2.5622,
      "step": 82529
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7516388893127441,
      "learning_rate": 0.0004292680488693171,
      "loss": 2.8854,
      "step": 82530
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.5067594051361084,
      "learning_rate": 0.00042926435752258505,
      "loss": 2.9663,
      "step": 82531
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6655852794647217,
      "learning_rate": 0.00042926066615182033,
      "loss": 2.9998,
      "step": 82532
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.0993196964263916,
      "learning_rate": 0.0004292569747570238,
      "loss": 2.9914,
      "step": 82533
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.706870436668396,
      "learning_rate": 0.0004292532833381958,
      "loss": 3.1994,
      "step": 82534
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.672990560531616,
      "learning_rate": 0.0004292495918953372,
      "loss": 2.9598,
      "step": 82535
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.671739339828491,
      "learning_rate": 0.00042924590042844863,
      "loss": 2.9173,
      "step": 82536
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8405466079711914,
      "learning_rate": 0.0004292422089375309,
      "loss": 2.9839,
      "step": 82537
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9751238822937012,
      "learning_rate": 0.00042923851742258446,
      "loss": 3.1443,
      "step": 82538
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.2573468685150146,
      "learning_rate": 0.00042923482588361027,
      "loss": 2.8624,
      "step": 82539
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6365184783935547,
      "learning_rate": 0.0004292311343206088,
      "loss": 2.928,
      "step": 82540
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6202880144119263,
      "learning_rate": 0.0004292274427335809,
      "loss": 3.0152,
      "step": 82541
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9614307880401611,
      "learning_rate": 0.00042922375112252714,
      "loss": 2.8836,
      "step": 82542
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9730315208435059,
      "learning_rate": 0.00042922005948744824,
      "loss": 2.9504,
      "step": 82543
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.9099756479263306,
      "learning_rate": 0.0004292163678283449,
      "loss": 2.8566,
      "step": 82544
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8792781829833984,
      "learning_rate": 0.00042921267614521783,
      "loss": 2.7672,
      "step": 82545
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.021652936935425,
      "learning_rate": 0.0004292089844380676,
      "loss": 3.0499,
      "step": 82546
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.076772928237915,
      "learning_rate": 0.000429205292706895,
      "loss": 2.9842,
      "step": 82547
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.573228359222412,
      "learning_rate": 0.0004292016009517008,
      "loss": 3.2589,
      "step": 82548
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6420950889587402,
      "learning_rate": 0.0004291979091724855,
      "loss": 3.0784,
      "step": 82549
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7471790313720703,
      "learning_rate": 0.0004291942173692498,
      "loss": 3.1259,
      "step": 82550
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.3883492946624756,
      "learning_rate": 0.00042919052554199466,
      "loss": 2.9694,
      "step": 82551
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.6712377071380615,
      "learning_rate": 0.0004291868336907204,
      "loss": 2.9589,
      "step": 82552
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7375661134719849,
      "learning_rate": 0.00042918314181542786,
      "loss": 3.1786,
      "step": 82553
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.8087410926818848,
      "learning_rate": 0.00042917944991611784,
      "loss": 3.0377,
      "step": 82554
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8394442796707153,
      "learning_rate": 0.00042917575799279083,
      "loss": 3.0229,
      "step": 82555
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.4781062602996826,
      "learning_rate": 0.00042917206604544767,
      "loss": 3.0186,
      "step": 82556
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1122758388519287,
      "learning_rate": 0.00042916837407408893,
      "loss": 3.1699,
      "step": 82557
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.033090353012085,
      "learning_rate": 0.0004291646820787154,
      "loss": 2.8819,
      "step": 82558
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.7950310707092285,
      "learning_rate": 0.00042916099005932764,
      "loss": 2.9167,
      "step": 82559
    },
    {
      "epoch": 1.07,
      "grad_norm": 1.8525642156600952,
      "learning_rate": 0.00042915729801592655,
      "loss": 3.031,
      "step": 82560
    },
    {
      "epoch": 1.07,
      "grad_norm": 2.1290855407714844,
      "learning_rate": 0.0004291536059485126,
      "loss": 3.0109,
      "step": 82561
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5101333856582642,
      "learning_rate": 0.0004291499138570865,
      "loss": 3.266,
      "step": 82562
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6915662288665771,
      "learning_rate": 0.00042914622174164906,
      "loss": 2.9451,
      "step": 82563
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5424373149871826,
      "learning_rate": 0.0004291425296022009,
      "loss": 2.9506,
      "step": 82564
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.913896083831787,
      "learning_rate": 0.00042913883743874267,
      "loss": 2.9682,
      "step": 82565
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.211061477661133,
      "learning_rate": 0.0004291351452512752,
      "loss": 2.7103,
      "step": 82566
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0915465354919434,
      "learning_rate": 0.00042913145303979895,
      "loss": 2.9939,
      "step": 82567
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.818700909614563,
      "learning_rate": 0.00042912776080431474,
      "loss": 3.0826,
      "step": 82568
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9544930458068848,
      "learning_rate": 0.0004291240685448233,
      "loss": 2.9069,
      "step": 82569
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.1290669441223145,
      "learning_rate": 0.0004291203762613252,
      "loss": 2.9802,
      "step": 82570
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6780868768692017,
      "learning_rate": 0.0004291166839538212,
      "loss": 2.835,
      "step": 82571
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1282150745391846,
      "learning_rate": 0.00042911299162231203,
      "loss": 3.1945,
      "step": 82572
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.488438844680786,
      "learning_rate": 0.0004291092992667982,
      "loss": 3.0317,
      "step": 82573
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.3960962295532227,
      "learning_rate": 0.0004291056068872806,
      "loss": 2.8836,
      "step": 82574
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.27700138092041,
      "learning_rate": 0.0004291019144837598,
      "loss": 3.0564,
      "step": 82575
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7866394519805908,
      "learning_rate": 0.00042909822205623656,
      "loss": 2.9505,
      "step": 82576
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9388031959533691,
      "learning_rate": 0.0004290945296047115,
      "loss": 3.2666,
      "step": 82577
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.7902770042419434,
      "learning_rate": 0.0004290908371291852,
      "loss": 2.8171,
      "step": 82578
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.816084146499634,
      "learning_rate": 0.0004290871446296587,
      "loss": 3.0122,
      "step": 82579
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7160395383834839,
      "learning_rate": 0.00042908345210613234,
      "loss": 2.939,
      "step": 82580
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1356594562530518,
      "learning_rate": 0.00042907975955860695,
      "loss": 3.1895,
      "step": 82581
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.608295440673828,
      "learning_rate": 0.0004290760669870833,
      "loss": 2.9877,
      "step": 82582
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5716463327407837,
      "learning_rate": 0.00042907237439156177,
      "loss": 3.3338,
      "step": 82583
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8180738687515259,
      "learning_rate": 0.00042906868177204334,
      "loss": 3.082,
      "step": 82584
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.835935115814209,
      "learning_rate": 0.00042906498912852865,
      "loss": 2.8446,
      "step": 82585
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4966049194335938,
      "learning_rate": 0.0004290612964610183,
      "loss": 3.1605,
      "step": 82586
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6293487548828125,
      "learning_rate": 0.00042905760376951306,
      "loss": 2.9678,
      "step": 82587
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8015412092208862,
      "learning_rate": 0.0004290539110540135,
      "loss": 2.994,
      "step": 82588
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5766602754592896,
      "learning_rate": 0.0004290502183145204,
      "loss": 2.9253,
      "step": 82589
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8134993314743042,
      "learning_rate": 0.0004290465255510345,
      "loss": 3.0739,
      "step": 82590
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4570344686508179,
      "learning_rate": 0.0004290428327635564,
      "loss": 3.1938,
      "step": 82591
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.468477725982666,
      "learning_rate": 0.00042903913995208674,
      "loss": 2.903,
      "step": 82592
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6137912273406982,
      "learning_rate": 0.00042903544711662625,
      "loss": 3.2492,
      "step": 82593
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.3925306797027588,
      "learning_rate": 0.0004290317542571757,
      "loss": 2.8926,
      "step": 82594
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7264463901519775,
      "learning_rate": 0.00042902806137373576,
      "loss": 3.2477,
      "step": 82595
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4319987297058105,
      "learning_rate": 0.00042902436846630696,
      "loss": 3.0034,
      "step": 82596
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9205100536346436,
      "learning_rate": 0.0004290206755348902,
      "loss": 2.8351,
      "step": 82597
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8030900955200195,
      "learning_rate": 0.0004290169825794859,
      "loss": 2.7449,
      "step": 82598
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.595245361328125,
      "learning_rate": 0.000429013289600095,
      "loss": 3.0048,
      "step": 82599
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7389588356018066,
      "learning_rate": 0.0004290095965967181,
      "loss": 3.0388,
      "step": 82600
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8391995429992676,
      "learning_rate": 0.00042900590356935594,
      "loss": 3.025,
      "step": 82601
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6020383834838867,
      "learning_rate": 0.00042900221051800905,
      "loss": 2.8409,
      "step": 82602
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0743424892425537,
      "learning_rate": 0.0004289985174426782,
      "loss": 3.0374,
      "step": 82603
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6021744012832642,
      "learning_rate": 0.00042899482434336417,
      "loss": 2.9965,
      "step": 82604
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6381124258041382,
      "learning_rate": 0.00042899113122006757,
      "loss": 3.1146,
      "step": 82605
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7396271228790283,
      "learning_rate": 0.000428987438072789,
      "loss": 2.8246,
      "step": 82606
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.261526584625244,
      "learning_rate": 0.0004289837449015293,
      "loss": 3.0333,
      "step": 82607
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6301459074020386,
      "learning_rate": 0.0004289800517062891,
      "loss": 3.064,
      "step": 82608
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0573883056640625,
      "learning_rate": 0.000428976358487069,
      "loss": 3.1283,
      "step": 82609
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7006635665893555,
      "learning_rate": 0.0004289726652438698,
      "loss": 3.0624,
      "step": 82610
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.7487728595733643,
      "learning_rate": 0.0004289689719766921,
      "loss": 3.1207,
      "step": 82611
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0104305744171143,
      "learning_rate": 0.0004289652786855367,
      "loss": 2.9929,
      "step": 82612
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8587942123413086,
      "learning_rate": 0.0004289615853704042,
      "loss": 3.0723,
      "step": 82613
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8727561235427856,
      "learning_rate": 0.0004289578920312953,
      "loss": 3.0186,
      "step": 82614
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.522718667984009,
      "learning_rate": 0.0004289541986682108,
      "loss": 3.091,
      "step": 82615
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.868399977684021,
      "learning_rate": 0.0004289505052811511,
      "loss": 3.0489,
      "step": 82616
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.789971351623535,
      "learning_rate": 0.0004289468118701172,
      "loss": 2.8221,
      "step": 82617
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.863705635070801,
      "learning_rate": 0.00042894311843510964,
      "loss": 2.9165,
      "step": 82618
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1139066219329834,
      "learning_rate": 0.00042893942497612903,
      "loss": 2.9765,
      "step": 82619
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.082674980163574,
      "learning_rate": 0.00042893573149317617,
      "loss": 2.8583,
      "step": 82620
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.241427421569824,
      "learning_rate": 0.0004289320379862518,
      "loss": 3.0688,
      "step": 82621
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0899155139923096,
      "learning_rate": 0.00042892834445535646,
      "loss": 3.1715,
      "step": 82622
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6887621879577637,
      "learning_rate": 0.0004289246509004909,
      "loss": 2.7645,
      "step": 82623
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5651328563690186,
      "learning_rate": 0.0004289209573216559,
      "loss": 3.1641,
      "step": 82624
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.193077802658081,
      "learning_rate": 0.000428917263718852,
      "loss": 2.9926,
      "step": 82625
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.020069122314453,
      "learning_rate": 0.00042891357009207994,
      "loss": 2.9357,
      "step": 82626
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6561462879180908,
      "learning_rate": 0.00042890987644134045,
      "loss": 2.9771,
      "step": 82627
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.669681429862976,
      "learning_rate": 0.00042890618276663413,
      "loss": 3.1022,
      "step": 82628
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8040015697479248,
      "learning_rate": 0.00042890248906796174,
      "loss": 3.2129,
      "step": 82629
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.2045977115631104,
      "learning_rate": 0.000428898795345324,
      "loss": 3.2148,
      "step": 82630
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6339646577835083,
      "learning_rate": 0.0004288951015987215,
      "loss": 3.2377,
      "step": 82631
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6791666746139526,
      "learning_rate": 0.00042889140782815497,
      "loss": 2.94,
      "step": 82632
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.745837926864624,
      "learning_rate": 0.0004288877140336251,
      "loss": 2.8594,
      "step": 82633
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4890543222427368,
      "learning_rate": 0.0004288840202151325,
      "loss": 3.0885,
      "step": 82634
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6161251068115234,
      "learning_rate": 0.000428880326372678,
      "loss": 2.8507,
      "step": 82635
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8031779527664185,
      "learning_rate": 0.0004288766325062622,
      "loss": 3.2222,
      "step": 82636
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.366065263748169,
      "learning_rate": 0.00042887293861588576,
      "loss": 2.8743,
      "step": 82637
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0146591663360596,
      "learning_rate": 0.00042886924470154946,
      "loss": 2.9205,
      "step": 82638
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6678982973098755,
      "learning_rate": 0.0004288655507632539,
      "loss": 2.769,
      "step": 82639
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6237468719482422,
      "learning_rate": 0.00042886185680099983,
      "loss": 2.8396,
      "step": 82640
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5597845315933228,
      "learning_rate": 0.0004288581628147879,
      "loss": 3.1268,
      "step": 82641
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6662627458572388,
      "learning_rate": 0.00042885446880461877,
      "loss": 3.1999,
      "step": 82642
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6373331546783447,
      "learning_rate": 0.00042885077477049325,
      "loss": 3.0115,
      "step": 82643
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.819685935974121,
      "learning_rate": 0.0004288470807124119,
      "loss": 3.1197,
      "step": 82644
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.455086350440979,
      "learning_rate": 0.0004288433866303754,
      "loss": 3.1472,
      "step": 82645
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8166236877441406,
      "learning_rate": 0.00042883969252438455,
      "loss": 3.0703,
      "step": 82646
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.557587742805481,
      "learning_rate": 0.0004288359983944399,
      "loss": 2.95,
      "step": 82647
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7750054597854614,
      "learning_rate": 0.0004288323042405422,
      "loss": 3.0362,
      "step": 82648
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.762880802154541,
      "learning_rate": 0.0004288286100626922,
      "loss": 2.964,
      "step": 82649
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.3988778591156006,
      "learning_rate": 0.0004288249158608905,
      "loss": 3.1288,
      "step": 82650
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1013166904449463,
      "learning_rate": 0.0004288212216351379,
      "loss": 3.0013,
      "step": 82651
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4577826261520386,
      "learning_rate": 0.0004288175273854349,
      "loss": 2.9099,
      "step": 82652
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.486456871032715,
      "learning_rate": 0.00042881383311178236,
      "loss": 2.9638,
      "step": 82653
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.8464245796203613,
      "learning_rate": 0.0004288101388141808,
      "loss": 3.0454,
      "step": 82654
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.092710256576538,
      "learning_rate": 0.0004288064444926311,
      "loss": 2.9939,
      "step": 82655
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5363904237747192,
      "learning_rate": 0.0004288027501471338,
      "loss": 2.7774,
      "step": 82656
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8333046436309814,
      "learning_rate": 0.00042879905577768965,
      "loss": 2.8101,
      "step": 82657
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.2649641036987305,
      "learning_rate": 0.0004287953613842994,
      "loss": 3.3502,
      "step": 82658
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1195778846740723,
      "learning_rate": 0.0004287916669669635,
      "loss": 3.1186,
      "step": 82659
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6073894500732422,
      "learning_rate": 0.00042878797252568284,
      "loss": 2.8631,
      "step": 82660
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.365056037902832,
      "learning_rate": 0.0004287842780604582,
      "loss": 2.8488,
      "step": 82661
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7070554494857788,
      "learning_rate": 0.00042878058357129,
      "loss": 2.8418,
      "step": 82662
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8143601417541504,
      "learning_rate": 0.0004287768890581791,
      "loss": 3.0876,
      "step": 82663
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.86330246925354,
      "learning_rate": 0.0004287731945211261,
      "loss": 3.2953,
      "step": 82664
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.442305088043213,
      "learning_rate": 0.0004287694999601318,
      "loss": 2.843,
      "step": 82665
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.09077525138855,
      "learning_rate": 0.0004287658053751968,
      "loss": 2.7821,
      "step": 82666
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8373677730560303,
      "learning_rate": 0.00042876211076632185,
      "loss": 3.0791,
      "step": 82667
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.861317753791809,
      "learning_rate": 0.00042875841613350754,
      "loss": 2.9906,
      "step": 82668
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.15165638923645,
      "learning_rate": 0.0004287547214767546,
      "loss": 3.0633,
      "step": 82669
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1234095096588135,
      "learning_rate": 0.00042875102679606374,
      "loss": 3.2111,
      "step": 82670
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.4334444999694824,
      "learning_rate": 0.0004287473320914356,
      "loss": 2.869,
      "step": 82671
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8988583087921143,
      "learning_rate": 0.00042874363736287096,
      "loss": 2.9416,
      "step": 82672
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9854426383972168,
      "learning_rate": 0.00042873994261037043,
      "loss": 2.9878,
      "step": 82673
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.2489500045776367,
      "learning_rate": 0.0004287362478339347,
      "loss": 2.9692,
      "step": 82674
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5818939208984375,
      "learning_rate": 0.00042873255303356445,
      "loss": 3.0795,
      "step": 82675
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.635575771331787,
      "learning_rate": 0.00042872885820926046,
      "loss": 3.0966,
      "step": 82676
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7643396854400635,
      "learning_rate": 0.0004287251633610233,
      "loss": 2.9024,
      "step": 82677
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.730997920036316,
      "learning_rate": 0.0004287214684888537,
      "loss": 3.0612,
      "step": 82678
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.704027533531189,
      "learning_rate": 0.0004287177735927524,
      "loss": 3.1115,
      "step": 82679
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5722007751464844,
      "learning_rate": 0.00042871407867271994,
      "loss": 2.7429,
      "step": 82680
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7318135499954224,
      "learning_rate": 0.00042871038372875716,
      "loss": 3.0433,
      "step": 82681
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.656022548675537,
      "learning_rate": 0.0004287066887608647,
      "loss": 3.2514,
      "step": 82682
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5829604864120483,
      "learning_rate": 0.0004287029937690432,
      "loss": 3.1103,
      "step": 82683
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8443660736083984,
      "learning_rate": 0.0004286992987532934,
      "loss": 3.2631,
      "step": 82684
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6930148601531982,
      "learning_rate": 0.0004286956037136159,
      "loss": 3.0757,
      "step": 82685
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1324026584625244,
      "learning_rate": 0.0004286919086500116,
      "loss": 2.927,
      "step": 82686
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.2631235122680664,
      "learning_rate": 0.000428688213562481,
      "loss": 2.8861,
      "step": 82687
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6364858150482178,
      "learning_rate": 0.00042868451845102476,
      "loss": 2.971,
      "step": 82688
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1264827251434326,
      "learning_rate": 0.0004286808233156437,
      "loss": 2.9914,
      "step": 82689
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.5610857009887695,
      "learning_rate": 0.00042867712815633844,
      "loss": 3.2021,
      "step": 82690
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0325589179992676,
      "learning_rate": 0.00042867343297310964,
      "loss": 3.1288,
      "step": 82691
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.926348090171814,
      "learning_rate": 0.00042866973776595804,
      "loss": 3.1522,
      "step": 82692
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.122023105621338,
      "learning_rate": 0.0004286660425348844,
      "loss": 2.7287,
      "step": 82693
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6443233489990234,
      "learning_rate": 0.00042866234727988915,
      "loss": 3.1255,
      "step": 82694
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5742230415344238,
      "learning_rate": 0.00042865865200097323,
      "loss": 2.8358,
      "step": 82695
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.3838497400283813,
      "learning_rate": 0.0004286549566981372,
      "loss": 3.1101,
      "step": 82696
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.582159399986267,
      "learning_rate": 0.00042865126137138184,
      "loss": 3.0003,
      "step": 82697
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.804592490196228,
      "learning_rate": 0.0004286475660207077,
      "loss": 3.1101,
      "step": 82698
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.140031337738037,
      "learning_rate": 0.0004286438706461157,
      "loss": 3.162,
      "step": 82699
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6316554546356201,
      "learning_rate": 0.0004286401752476063,
      "loss": 3.2706,
      "step": 82700
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9495453834533691,
      "learning_rate": 0.00042863647982518017,
      "loss": 3.0659,
      "step": 82701
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.944283127784729,
      "learning_rate": 0.00042863278437883817,
      "loss": 3.321,
      "step": 82702
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6669116020202637,
      "learning_rate": 0.00042862908890858094,
      "loss": 3.0229,
      "step": 82703
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4795857667922974,
      "learning_rate": 0.0004286253934144091,
      "loss": 2.7872,
      "step": 82704
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9632923603057861,
      "learning_rate": 0.0004286216978963233,
      "loss": 3.0442,
      "step": 82705
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5350703001022339,
      "learning_rate": 0.00042861800235432444,
      "loss": 3.3539,
      "step": 82706
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9013051986694336,
      "learning_rate": 0.00042861430678841294,
      "loss": 3.1165,
      "step": 82707
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.722975254058838,
      "learning_rate": 0.0004286106111985897,
      "loss": 2.9003,
      "step": 82708
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7274973392486572,
      "learning_rate": 0.0004286069155848552,
      "loss": 2.975,
      "step": 82709
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5992636680603027,
      "learning_rate": 0.0004286032199472104,
      "loss": 2.849,
      "step": 82710
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.071211099624634,
      "learning_rate": 0.00042859952428565574,
      "loss": 2.9268,
      "step": 82711
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8579444885253906,
      "learning_rate": 0.000428595828600192,
      "loss": 2.837,
      "step": 82712
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1665313243865967,
      "learning_rate": 0.00042859213289082,
      "loss": 3.0165,
      "step": 82713
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.40351939201355,
      "learning_rate": 0.0004285884371575401,
      "loss": 3.0866,
      "step": 82714
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.531356930732727,
      "learning_rate": 0.00042858474140035325,
      "loss": 2.9693,
      "step": 82715
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.8268654346466064,
      "learning_rate": 0.0004285810456192601,
      "loss": 3.1182,
      "step": 82716
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6174671649932861,
      "learning_rate": 0.00042857734981426135,
      "loss": 2.7453,
      "step": 82717
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5408949851989746,
      "learning_rate": 0.0004285736539853575,
      "loss": 3.0234,
      "step": 82718
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7268245220184326,
      "learning_rate": 0.00042856995813254955,
      "loss": 2.9149,
      "step": 82719
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9279000759124756,
      "learning_rate": 0.0004285662622558379,
      "loss": 3.0684,
      "step": 82720
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.71978759765625,
      "learning_rate": 0.0004285625663552234,
      "loss": 2.7612,
      "step": 82721
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8652726411819458,
      "learning_rate": 0.0004285588704307067,
      "loss": 2.8347,
      "step": 82722
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0144712924957275,
      "learning_rate": 0.00042855517448228843,
      "loss": 3.1791,
      "step": 82723
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.7788186073303223,
      "learning_rate": 0.0004285514785099694,
      "loss": 2.862,
      "step": 82724
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7334184646606445,
      "learning_rate": 0.0004285477825137502,
      "loss": 3.0835,
      "step": 82725
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.3486385345458984,
      "learning_rate": 0.0004285440864936315,
      "loss": 3.0106,
      "step": 82726
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.702027440071106,
      "learning_rate": 0.00042854039044961403,
      "loss": 3.1898,
      "step": 82727
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.833561897277832,
      "learning_rate": 0.0004285366943816985,
      "loss": 2.921,
      "step": 82728
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5892894268035889,
      "learning_rate": 0.00042853299828988557,
      "loss": 2.8861,
      "step": 82729
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5745373964309692,
      "learning_rate": 0.00042852930217417596,
      "loss": 3.013,
      "step": 82730
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5515645742416382,
      "learning_rate": 0.0004285256060345703,
      "loss": 3.3838,
      "step": 82731
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1057379245758057,
      "learning_rate": 0.0004285219098710693,
      "loss": 2.8751,
      "step": 82732
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.078287363052368,
      "learning_rate": 0.0004285182136836736,
      "loss": 3.0272,
      "step": 82733
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8965578079223633,
      "learning_rate": 0.000428514517472384,
      "loss": 2.9728,
      "step": 82734
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.6515114307403564,
      "learning_rate": 0.00042851082123720113,
      "loss": 2.8176,
      "step": 82735
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.623246431350708,
      "learning_rate": 0.0004285071249781256,
      "loss": 2.9722,
      "step": 82736
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9719359874725342,
      "learning_rate": 0.00042850342869515826,
      "loss": 2.8869,
      "step": 82737
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8045629262924194,
      "learning_rate": 0.0004284997323882997,
      "loss": 2.9541,
      "step": 82738
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9634156227111816,
      "learning_rate": 0.0004284960360575506,
      "loss": 3.0795,
      "step": 82739
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.392692804336548,
      "learning_rate": 0.0004284923397029117,
      "loss": 3.03,
      "step": 82740
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0252110958099365,
      "learning_rate": 0.0004284886433243835,
      "loss": 2.9921,
      "step": 82741
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4956445693969727,
      "learning_rate": 0.0004284849469219669,
      "loss": 2.9571,
      "step": 82742
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8529369831085205,
      "learning_rate": 0.0004284812504956626,
      "loss": 2.8534,
      "step": 82743
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.968497395515442,
      "learning_rate": 0.00042847755404547115,
      "loss": 3.0448,
      "step": 82744
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6094372272491455,
      "learning_rate": 0.0004284738575713933,
      "loss": 3.1561,
      "step": 82745
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6397470235824585,
      "learning_rate": 0.0004284701610734298,
      "loss": 2.9475,
      "step": 82746
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.971266508102417,
      "learning_rate": 0.0004284664645515812,
      "loss": 2.9342,
      "step": 82747
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.313425064086914,
      "learning_rate": 0.00042846276800584827,
      "loss": 3.3562,
      "step": 82748
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6640726327896118,
      "learning_rate": 0.0004284590714362317,
      "loss": 2.8626,
      "step": 82749
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.003176689147949,
      "learning_rate": 0.00042845537484273216,
      "loss": 2.8133,
      "step": 82750
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.359848976135254,
      "learning_rate": 0.00042845167822535033,
      "loss": 2.9417,
      "step": 82751
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.6657681465148926,
      "learning_rate": 0.00042844798158408695,
      "loss": 3.1337,
      "step": 82752
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5099138021469116,
      "learning_rate": 0.00042844428491894265,
      "loss": 2.8979,
      "step": 82753
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.318709135055542,
      "learning_rate": 0.00042844058822991805,
      "loss": 2.9422,
      "step": 82754
    },
    {
      "epoch": 1.08,
      "grad_norm": 5.520881175994873,
      "learning_rate": 0.00042843689151701405,
      "loss": 3.0442,
      "step": 82755
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.1128647327423096,
      "learning_rate": 0.00042843319478023115,
      "loss": 2.9844,
      "step": 82756
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1068532466888428,
      "learning_rate": 0.00042842949801957006,
      "loss": 3.0338,
      "step": 82757
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8596487045288086,
      "learning_rate": 0.0004284258012350316,
      "loss": 2.9062,
      "step": 82758
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.9148826599121094,
      "learning_rate": 0.00042842210442661625,
      "loss": 2.8684,
      "step": 82759
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7515904903411865,
      "learning_rate": 0.00042841840759432485,
      "loss": 3.0193,
      "step": 82760
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9592546224594116,
      "learning_rate": 0.000428414710738158,
      "loss": 2.9232,
      "step": 82761
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.3140687942504883,
      "learning_rate": 0.00042841101385811655,
      "loss": 2.9377,
      "step": 82762
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7830686569213867,
      "learning_rate": 0.00042840731695420096,
      "loss": 3.1156,
      "step": 82763
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8049956560134888,
      "learning_rate": 0.00042840362002641205,
      "loss": 2.9841,
      "step": 82764
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.107553005218506,
      "learning_rate": 0.0004283999230747505,
      "loss": 2.982,
      "step": 82765
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.2809839248657227,
      "learning_rate": 0.00042839622609921693,
      "loss": 3.1401,
      "step": 82766
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7122607231140137,
      "learning_rate": 0.0004283925290998122,
      "loss": 3.2003,
      "step": 82767
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.092068672180176,
      "learning_rate": 0.0004283888320765367,
      "loss": 2.8985,
      "step": 82768
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.225757122039795,
      "learning_rate": 0.00042838513502939135,
      "loss": 3.0991,
      "step": 82769
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0949950218200684,
      "learning_rate": 0.0004283814379583769,
      "loss": 3.2785,
      "step": 82770
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7288979291915894,
      "learning_rate": 0.00042837774086349384,
      "loss": 3.0406,
      "step": 82771
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.4451746940612793,
      "learning_rate": 0.00042837404374474294,
      "loss": 3.0749,
      "step": 82772
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.300359010696411,
      "learning_rate": 0.0004283703466021248,
      "loss": 3.0636,
      "step": 82773
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5552486181259155,
      "learning_rate": 0.0004283666494356403,
      "loss": 3.1675,
      "step": 82774
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8400713205337524,
      "learning_rate": 0.00042836295224529,
      "loss": 3.2503,
      "step": 82775
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.033482074737549,
      "learning_rate": 0.00042835925503107456,
      "loss": 2.8925,
      "step": 82776
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8378733396530151,
      "learning_rate": 0.00042835555779299463,
      "loss": 2.7327,
      "step": 82777
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6440863609313965,
      "learning_rate": 0.0004283518605310512,
      "loss": 3.098,
      "step": 82778
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5613279342651367,
      "learning_rate": 0.0004283481632452446,
      "loss": 2.9906,
      "step": 82779
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6741316318511963,
      "learning_rate": 0.00042834446593557565,
      "loss": 3.2689,
      "step": 82780
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8096777200698853,
      "learning_rate": 0.0004283407686020451,
      "loss": 3.0691,
      "step": 82781
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8668735027313232,
      "learning_rate": 0.0004283370712446535,
      "loss": 3.1445,
      "step": 82782
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7577601671218872,
      "learning_rate": 0.0004283333738634016,
      "loss": 2.9556,
      "step": 82783
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6349726915359497,
      "learning_rate": 0.0004283296764582902,
      "loss": 3.125,
      "step": 82784
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7510212659835815,
      "learning_rate": 0.00042832597902931987,
      "loss": 3.1042,
      "step": 82785
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0118985176086426,
      "learning_rate": 0.0004283222815764913,
      "loss": 2.9612,
      "step": 82786
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0260426998138428,
      "learning_rate": 0.0004283185840998051,
      "loss": 2.8726,
      "step": 82787
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6892778873443604,
      "learning_rate": 0.0004283148865992623,
      "loss": 3.0182,
      "step": 82788
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7988622188568115,
      "learning_rate": 0.00042831118907486315,
      "loss": 2.9408,
      "step": 82789
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7101117372512817,
      "learning_rate": 0.0004283074915266085,
      "loss": 3.0922,
      "step": 82790
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7513350248336792,
      "learning_rate": 0.00042830379395449915,
      "loss": 3.1143,
      "step": 82791
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.622944712638855,
      "learning_rate": 0.0004283000963585357,
      "loss": 2.8466,
      "step": 82792
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6042054891586304,
      "learning_rate": 0.0004282963987387188,
      "loss": 3.0512,
      "step": 82793
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6675628423690796,
      "learning_rate": 0.0004282927010950492,
      "loss": 3.2429,
      "step": 82794
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6850099563598633,
      "learning_rate": 0.0004282890034275276,
      "loss": 3.2267,
      "step": 82795
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6412111520767212,
      "learning_rate": 0.0004282853057361546,
      "loss": 3.1422,
      "step": 82796
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9755345582962036,
      "learning_rate": 0.0004282816080209309,
      "loss": 2.8559,
      "step": 82797
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.71853768825531,
      "learning_rate": 0.00042827791028185734,
      "loss": 3.0119,
      "step": 82798
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8631211519241333,
      "learning_rate": 0.00042827421251893444,
      "loss": 3.0785,
      "step": 82799
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.734059453010559,
      "learning_rate": 0.00042827051473216294,
      "loss": 2.9803,
      "step": 82800
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.923988699913025,
      "learning_rate": 0.00042826681692154355,
      "loss": 3.1119,
      "step": 82801
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.669295310974121,
      "learning_rate": 0.0004282631190870769,
      "loss": 3.1101,
      "step": 82802
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.779934048652649,
      "learning_rate": 0.00042825942122876374,
      "loss": 2.8474,
      "step": 82803
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7957229614257812,
      "learning_rate": 0.0004282557233466048,
      "loss": 2.9355,
      "step": 82804
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6911746263504028,
      "learning_rate": 0.0004282520254406006,
      "loss": 3.1101,
      "step": 82805
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6821168661117554,
      "learning_rate": 0.0004282483275107519,
      "loss": 3.0291,
      "step": 82806
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6312302350997925,
      "learning_rate": 0.0004282446295570595,
      "loss": 3.0733,
      "step": 82807
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7860996723175049,
      "learning_rate": 0.000428240931579524,
      "loss": 2.9373,
      "step": 82808
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8774402141571045,
      "learning_rate": 0.000428237233578146,
      "loss": 3.1291,
      "step": 82809
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.6332645416259766,
      "learning_rate": 0.0004282335355529265,
      "loss": 2.9472,
      "step": 82810
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8484103679656982,
      "learning_rate": 0.0004282298375038657,
      "loss": 2.8389,
      "step": 82811
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6747334003448486,
      "learning_rate": 0.00042822613943096463,
      "loss": 3.0004,
      "step": 82812
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.198810338973999,
      "learning_rate": 0.000428222441334224,
      "loss": 3.176,
      "step": 82813
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5874831676483154,
      "learning_rate": 0.0004282187432136443,
      "loss": 3.2014,
      "step": 82814
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7628694772720337,
      "learning_rate": 0.00042821504506922626,
      "loss": 3.2593,
      "step": 82815
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7850903272628784,
      "learning_rate": 0.0004282113469009708,
      "loss": 3.0639,
      "step": 82816
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8770017623901367,
      "learning_rate": 0.00042820764870887834,
      "loss": 3.0281,
      "step": 82817
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.6694092750549316,
      "learning_rate": 0.0004282039504929496,
      "loss": 3.3154,
      "step": 82818
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7370998859405518,
      "learning_rate": 0.00042820025225318546,
      "loss": 3.1678,
      "step": 82819
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9551244974136353,
      "learning_rate": 0.00042819655398958635,
      "loss": 2.9833,
      "step": 82820
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9194809198379517,
      "learning_rate": 0.0004281928557021531,
      "loss": 3.4749,
      "step": 82821
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0594780445098877,
      "learning_rate": 0.0004281891573908865,
      "loss": 3.1034,
      "step": 82822
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.852301001548767,
      "learning_rate": 0.00042818545905578696,
      "loss": 2.9707,
      "step": 82823
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6016627550125122,
      "learning_rate": 0.00042818176069685536,
      "loss": 3.0108,
      "step": 82824
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7525014877319336,
      "learning_rate": 0.0004281780623140924,
      "loss": 3.1085,
      "step": 82825
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9543421268463135,
      "learning_rate": 0.00042817436390749867,
      "loss": 3.0517,
      "step": 82826
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.6869404315948486,
      "learning_rate": 0.00042817066547707494,
      "loss": 2.7871,
      "step": 82827
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7302510738372803,
      "learning_rate": 0.0004281669670228219,
      "loss": 2.9683,
      "step": 82828
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7180205583572388,
      "learning_rate": 0.0004281632685447401,
      "loss": 3.1827,
      "step": 82829
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.875494360923767,
      "learning_rate": 0.0004281595700428304,
      "loss": 3.0346,
      "step": 82830
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6068373918533325,
      "learning_rate": 0.0004281558715170934,
      "loss": 2.7434,
      "step": 82831
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.073794364929199,
      "learning_rate": 0.00042815217296752984,
      "loss": 2.779,
      "step": 82832
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.332780122756958,
      "learning_rate": 0.0004281484743941403,
      "loss": 2.8405,
      "step": 82833
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7514903545379639,
      "learning_rate": 0.00042814477579692566,
      "loss": 3.1822,
      "step": 82834
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.2500078678131104,
      "learning_rate": 0.00042814107717588633,
      "loss": 2.7417,
      "step": 82835
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.398632526397705,
      "learning_rate": 0.0004281373785310232,
      "loss": 3.02,
      "step": 82836
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.2223031520843506,
      "learning_rate": 0.00042813367986233703,
      "loss": 3.2289,
      "step": 82837
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8135262727737427,
      "learning_rate": 0.00042812998116982824,
      "loss": 3.1482,
      "step": 82838
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1802053451538086,
      "learning_rate": 0.0004281262824534977,
      "loss": 3.1068,
      "step": 82839
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9079076051712036,
      "learning_rate": 0.00042812258371334607,
      "loss": 3.0109,
      "step": 82840
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6077364683151245,
      "learning_rate": 0.00042811888494937415,
      "loss": 3.0405,
      "step": 82841
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8160141706466675,
      "learning_rate": 0.00042811518616158237,
      "loss": 2.9134,
      "step": 82842
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.81244957447052,
      "learning_rate": 0.0004281114873499716,
      "loss": 2.9824,
      "step": 82843
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7957628965377808,
      "learning_rate": 0.0004281077885145425,
      "loss": 3.087,
      "step": 82844
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6989248991012573,
      "learning_rate": 0.0004281040896552957,
      "loss": 2.7686,
      "step": 82845
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.567559838294983,
      "learning_rate": 0.00042810039077223195,
      "loss": 3.135,
      "step": 82846
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.3639729022979736,
      "learning_rate": 0.00042809669186535205,
      "loss": 3.0674,
      "step": 82847
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5773231983184814,
      "learning_rate": 0.00042809299293465635,
      "loss": 3.2863,
      "step": 82848
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.463944673538208,
      "learning_rate": 0.0004280892939801459,
      "loss": 3.093,
      "step": 82849
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4715453386306763,
      "learning_rate": 0.0004280855950018212,
      "loss": 3.0575,
      "step": 82850
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.766281008720398,
      "learning_rate": 0.0004280818959996829,
      "loss": 2.9771,
      "step": 82851
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8382320404052734,
      "learning_rate": 0.00042807819697373174,
      "loss": 3.1144,
      "step": 82852
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5609838962554932,
      "learning_rate": 0.0004280744979239685,
      "loss": 3.0883,
      "step": 82853
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.4879868030548096,
      "learning_rate": 0.00042807079885039383,
      "loss": 2.945,
      "step": 82854
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.3848049640655518,
      "learning_rate": 0.0004280670997530083,
      "loss": 3.2067,
      "step": 82855
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.632651925086975,
      "learning_rate": 0.00042806340063181273,
      "loss": 2.8684,
      "step": 82856
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6417617797851562,
      "learning_rate": 0.00042805970148680766,
      "loss": 2.8639,
      "step": 82857
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.464021921157837,
      "learning_rate": 0.000428056002317994,
      "loss": 3.1502,
      "step": 82858
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4547830820083618,
      "learning_rate": 0.00042805230312537224,
      "loss": 3.2294,
      "step": 82859
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6344057321548462,
      "learning_rate": 0.00042804860390894314,
      "loss": 2.9545,
      "step": 82860
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.0086758136749268,
      "learning_rate": 0.00042804490466870744,
      "loss": 2.9673,
      "step": 82861
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4743119478225708,
      "learning_rate": 0.0004280412054046658,
      "loss": 3.026,
      "step": 82862
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1279947757720947,
      "learning_rate": 0.0004280375061168188,
      "loss": 2.7921,
      "step": 82863
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8488376140594482,
      "learning_rate": 0.0004280338068051672,
      "loss": 3.0472,
      "step": 82864
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7545732259750366,
      "learning_rate": 0.0004280301074697117,
      "loss": 2.7937,
      "step": 82865
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8084253072738647,
      "learning_rate": 0.00042802640811045306,
      "loss": 3.1446,
      "step": 82866
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.946676254272461,
      "learning_rate": 0.00042802270872739184,
      "loss": 2.7639,
      "step": 82867
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5633504390716553,
      "learning_rate": 0.0004280190093205288,
      "loss": 2.7521,
      "step": 82868
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.986997365951538,
      "learning_rate": 0.00042801530988986465,
      "loss": 2.859,
      "step": 82869
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8252966403961182,
      "learning_rate": 0.00042801161043539993,
      "loss": 3.1006,
      "step": 82870
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.3610799312591553,
      "learning_rate": 0.0004280079109571355,
      "loss": 3.083,
      "step": 82871
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.3488423824310303,
      "learning_rate": 0.000428004211455072,
      "loss": 2.9234,
      "step": 82872
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.574488878250122,
      "learning_rate": 0.00042800051192921006,
      "loss": 3.1085,
      "step": 82873
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4498622417449951,
      "learning_rate": 0.00042799681237955044,
      "loss": 3.1311,
      "step": 82874
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.037792682647705,
      "learning_rate": 0.0004279931128060937,
      "loss": 3.1465,
      "step": 82875
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.5157101154327393,
      "learning_rate": 0.00042798941320884077,
      "loss": 3.0675,
      "step": 82876
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.317326545715332,
      "learning_rate": 0.00042798571358779224,
      "loss": 3.1435,
      "step": 82877
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.768912672996521,
      "learning_rate": 0.00042798201394294856,
      "loss": 3.2655,
      "step": 82878
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5529344081878662,
      "learning_rate": 0.0004279783142743107,
      "loss": 3.1084,
      "step": 82879
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.1881349086761475,
      "learning_rate": 0.0004279746145818793,
      "loss": 3.0669,
      "step": 82880
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.701611876487732,
      "learning_rate": 0.00042797091486565487,
      "loss": 3.1932,
      "step": 82881
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7344682216644287,
      "learning_rate": 0.00042796721512563835,
      "loss": 3.0959,
      "step": 82882
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1632328033447266,
      "learning_rate": 0.0004279635153618303,
      "loss": 2.904,
      "step": 82883
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.7047903537750244,
      "learning_rate": 0.00042795981557423136,
      "loss": 3.1953,
      "step": 82884
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9533298015594482,
      "learning_rate": 0.0004279561157628423,
      "loss": 3.1389,
      "step": 82885
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.345470428466797,
      "learning_rate": 0.0004279524159276638,
      "loss": 3.2069,
      "step": 82886
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.739941120147705,
      "learning_rate": 0.0004279487160686965,
      "loss": 2.8651,
      "step": 82887
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.6225223541259766,
      "learning_rate": 0.00042794501618594115,
      "loss": 2.9494,
      "step": 82888
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.73505437374115,
      "learning_rate": 0.0004279413162793984,
      "loss": 3.1137,
      "step": 82889
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.2402448654174805,
      "learning_rate": 0.0004279376163490689,
      "loss": 3.1995,
      "step": 82890
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6387825012207031,
      "learning_rate": 0.0004279339163949534,
      "loss": 2.9865,
      "step": 82891
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4224488735198975,
      "learning_rate": 0.0004279302164170526,
      "loss": 3.0142,
      "step": 82892
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.3288776874542236,
      "learning_rate": 0.0004279265164153672,
      "loss": 2.9687,
      "step": 82893
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6699533462524414,
      "learning_rate": 0.00042792281638989776,
      "loss": 2.9408,
      "step": 82894
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8634239435195923,
      "learning_rate": 0.0004279191163406451,
      "loss": 2.8533,
      "step": 82895
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7056071758270264,
      "learning_rate": 0.0004279154162676098,
      "loss": 3.1254,
      "step": 82896
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.639438271522522,
      "learning_rate": 0.00042791171617079265,
      "loss": 3.0665,
      "step": 82897
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6992387771606445,
      "learning_rate": 0.00042790801605019433,
      "loss": 3.2578,
      "step": 82898
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7923086881637573,
      "learning_rate": 0.00042790431590581545,
      "loss": 2.982,
      "step": 82899
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.767989158630371,
      "learning_rate": 0.0004279006157376568,
      "loss": 2.9204,
      "step": 82900
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.071013927459717,
      "learning_rate": 0.00042789691554571903,
      "loss": 3.261,
      "step": 82901
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.929955005645752,
      "learning_rate": 0.00042789321533000273,
      "loss": 2.7808,
      "step": 82902
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5206822156906128,
      "learning_rate": 0.00042788951509050864,
      "loss": 3.1162,
      "step": 82903
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.649694800376892,
      "learning_rate": 0.00042788581482723756,
      "loss": 3.1437,
      "step": 82904
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.405313730239868,
      "learning_rate": 0.00042788211454019003,
      "loss": 3.0747,
      "step": 82905
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8042088747024536,
      "learning_rate": 0.0004278784142293668,
      "loss": 3.0313,
      "step": 82906
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8255643844604492,
      "learning_rate": 0.0004278747138947686,
      "loss": 3.0529,
      "step": 82907
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7842525243759155,
      "learning_rate": 0.0004278710135363961,
      "loss": 3.053,
      "step": 82908
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7783901691436768,
      "learning_rate": 0.0004278673131542499,
      "loss": 2.885,
      "step": 82909
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5533326864242554,
      "learning_rate": 0.00042786361274833076,
      "loss": 3.033,
      "step": 82910
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5217475891113281,
      "learning_rate": 0.0004278599123186394,
      "loss": 2.8076,
      "step": 82911
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.84012770652771,
      "learning_rate": 0.00042785621186517645,
      "loss": 3.2061,
      "step": 82912
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.163062572479248,
      "learning_rate": 0.0004278525113879426,
      "loss": 2.7656,
      "step": 82913
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6287448406219482,
      "learning_rate": 0.00042784881088693864,
      "loss": 3.0471,
      "step": 82914
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.614581346511841,
      "learning_rate": 0.00042784511036216504,
      "loss": 3.0458,
      "step": 82915
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.953369140625,
      "learning_rate": 0.0004278414098136227,
      "loss": 2.8993,
      "step": 82916
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.2810897827148438,
      "learning_rate": 0.00042783770924131225,
      "loss": 2.9301,
      "step": 82917
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9738417863845825,
      "learning_rate": 0.0004278340086452343,
      "loss": 2.8642,
      "step": 82918
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.536102294921875,
      "learning_rate": 0.0004278303080253896,
      "loss": 2.8403,
      "step": 82919
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0829029083251953,
      "learning_rate": 0.0004278266073817789,
      "loss": 3.1106,
      "step": 82920
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6511359214782715,
      "learning_rate": 0.0004278229067144028,
      "loss": 2.9806,
      "step": 82921
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4787545204162598,
      "learning_rate": 0.0004278192060232619,
      "loss": 3.2578,
      "step": 82922
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.8670992851257324,
      "learning_rate": 0.0004278155053083571,
      "loss": 2.8284,
      "step": 82923
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9717881679534912,
      "learning_rate": 0.000427811804569689,
      "loss": 3.0912,
      "step": 82924
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.5770421028137207,
      "learning_rate": 0.0004278081038072582,
      "loss": 3.1064,
      "step": 82925
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.33463978767395,
      "learning_rate": 0.00042780440302106547,
      "loss": 3.0131,
      "step": 82926
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.3962061405181885,
      "learning_rate": 0.0004278007022111115,
      "loss": 2.867,
      "step": 82927
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.3264222145080566,
      "learning_rate": 0.000427797001377397,
      "loss": 3.1634,
      "step": 82928
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5259889364242554,
      "learning_rate": 0.00042779330051992263,
      "loss": 3.0302,
      "step": 82929
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5279291868209839,
      "learning_rate": 0.00042778959963868905,
      "loss": 3.1143,
      "step": 82930
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8288923501968384,
      "learning_rate": 0.00042778589873369697,
      "loss": 3.0849,
      "step": 82931
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9559335708618164,
      "learning_rate": 0.0004277821978049471,
      "loss": 3.0717,
      "step": 82932
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5136404037475586,
      "learning_rate": 0.0004277784968524401,
      "loss": 3.2566,
      "step": 82933
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9085038900375366,
      "learning_rate": 0.0004277747958761766,
      "loss": 2.7481,
      "step": 82934
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.2265872955322266,
      "learning_rate": 0.0004277710948761574,
      "loss": 3.039,
      "step": 82935
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.060124158859253,
      "learning_rate": 0.0004277673938523832,
      "loss": 3.1089,
      "step": 82936
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0508718490600586,
      "learning_rate": 0.0004277636928048545,
      "loss": 3.0305,
      "step": 82937
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.9703686237335205,
      "learning_rate": 0.00042775999173357227,
      "loss": 2.7372,
      "step": 82938
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6753073930740356,
      "learning_rate": 0.00042775629063853695,
      "loss": 3.1523,
      "step": 82939
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7118661403656006,
      "learning_rate": 0.0004277525895197493,
      "loss": 2.9709,
      "step": 82940
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.1404266357421875,
      "learning_rate": 0.0004277488883772101,
      "loss": 2.766,
      "step": 82941
    },
    {
      "epoch": 1.08,
      "grad_norm": 5.15276575088501,
      "learning_rate": 0.00042774518721092,
      "loss": 2.7763,
      "step": 82942
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.030447244644165,
      "learning_rate": 0.00042774148602087956,
      "loss": 3.273,
      "step": 82943
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5660831928253174,
      "learning_rate": 0.0004277377848070896,
      "loss": 2.9188,
      "step": 82944
    },
    {
      "epoch": 1.08,
      "grad_norm": 4.049629211425781,
      "learning_rate": 0.00042773408356955084,
      "loss": 3.0197,
      "step": 82945
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.753906488418579,
      "learning_rate": 0.0004277303823082638,
      "loss": 3.0666,
      "step": 82946
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.4233551025390625,
      "learning_rate": 0.0004277266810232294,
      "loss": 3.028,
      "step": 82947
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1796226501464844,
      "learning_rate": 0.0004277229797144481,
      "loss": 3.2335,
      "step": 82948
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.468109130859375,
      "learning_rate": 0.00042771927838192067,
      "loss": 3.0985,
      "step": 82949
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.3126308917999268,
      "learning_rate": 0.0004277155770256479,
      "loss": 3.0652,
      "step": 82950
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.0330703258514404,
      "learning_rate": 0.0004277118756456303,
      "loss": 2.7754,
      "step": 82951
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.835574984550476,
      "learning_rate": 0.00042770817424186866,
      "loss": 3.2174,
      "step": 82952
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5571082830429077,
      "learning_rate": 0.0004277044728143637,
      "loss": 3.4766,
      "step": 82953
    },
    {
      "epoch": 1.08,
      "grad_norm": 4.305810451507568,
      "learning_rate": 0.0004277007713631161,
      "loss": 2.9882,
      "step": 82954
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.5552570819854736,
      "learning_rate": 0.0004276970698881265,
      "loss": 3.0809,
      "step": 82955
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.3981740474700928,
      "learning_rate": 0.00042769336838939547,
      "loss": 3.0034,
      "step": 82956
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4336810111999512,
      "learning_rate": 0.000427689666866924,
      "loss": 3.0838,
      "step": 82957
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.6754276752471924,
      "learning_rate": 0.0004276859653207126,
      "loss": 3.0943,
      "step": 82958
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.8895328044891357,
      "learning_rate": 0.00042768226375076193,
      "loss": 2.8822,
      "step": 82959
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9998770952224731,
      "learning_rate": 0.00042767856215707265,
      "loss": 2.9835,
      "step": 82960
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6303393840789795,
      "learning_rate": 0.0004276748605396457,
      "loss": 3.0481,
      "step": 82961
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4557757377624512,
      "learning_rate": 0.0004276711588984814,
      "loss": 3.2072,
      "step": 82962
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.547603130340576,
      "learning_rate": 0.00042766745723358065,
      "loss": 2.8047,
      "step": 82963
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.821810483932495,
      "learning_rate": 0.0004276637555449441,
      "loss": 3.0089,
      "step": 82964
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9924111366271973,
      "learning_rate": 0.0004276600538325726,
      "loss": 2.726,
      "step": 82965
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6667836904525757,
      "learning_rate": 0.0004276563520964665,
      "loss": 2.9979,
      "step": 82966
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.9172723293304443,
      "learning_rate": 0.0004276526503366267,
      "loss": 3.0106,
      "step": 82967
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.754796266555786,
      "learning_rate": 0.00042764894855305396,
      "loss": 3.3456,
      "step": 82968
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.2408275604248047,
      "learning_rate": 0.0004276452467457488,
      "loss": 2.8774,
      "step": 82969
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6315582990646362,
      "learning_rate": 0.0004276415449147121,
      "loss": 2.8109,
      "step": 82970
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.5979371070861816,
      "learning_rate": 0.00042763784305994426,
      "loss": 3.2092,
      "step": 82971
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.369084119796753,
      "learning_rate": 0.00042763414118144623,
      "loss": 3.0389,
      "step": 82972
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8308579921722412,
      "learning_rate": 0.00042763043927921855,
      "loss": 2.7108,
      "step": 82973
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.507797122001648,
      "learning_rate": 0.00042762673735326196,
      "loss": 2.8773,
      "step": 82974
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.839989423751831,
      "learning_rate": 0.00042762303540357723,
      "loss": 2.7748,
      "step": 82975
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.7292356491088867,
      "learning_rate": 0.0004276193334301649,
      "loss": 2.8136,
      "step": 82976
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.961358666419983,
      "learning_rate": 0.0004276156314330257,
      "loss": 2.8855,
      "step": 82977
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9290305376052856,
      "learning_rate": 0.00042761192941216046,
      "loss": 3.133,
      "step": 82978
    },
    {
      "epoch": 1.08,
      "grad_norm": 4.375062942504883,
      "learning_rate": 0.00042760822736756965,
      "loss": 2.8823,
      "step": 82979
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.3515660762786865,
      "learning_rate": 0.0004276045252992542,
      "loss": 2.946,
      "step": 82980
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7570525407791138,
      "learning_rate": 0.00042760082320721457,
      "loss": 3.0957,
      "step": 82981
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6048710346221924,
      "learning_rate": 0.0004275971210914515,
      "loss": 3.0014,
      "step": 82982
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.4994091987609863,
      "learning_rate": 0.0004275934189519657,
      "loss": 3.0661,
      "step": 82983
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.167609691619873,
      "learning_rate": 0.00042758971678875793,
      "loss": 3.0815,
      "step": 82984
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6085772514343262,
      "learning_rate": 0.00042758601460182884,
      "loss": 3.0377,
      "step": 82985
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8386331796646118,
      "learning_rate": 0.00042758231239117903,
      "loss": 3.0522,
      "step": 82986
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.695209264755249,
      "learning_rate": 0.0004275786101568094,
      "loss": 3.031,
      "step": 82987
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6773478984832764,
      "learning_rate": 0.00042757490789872035,
      "loss": 2.8789,
      "step": 82988
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.381502151489258,
      "learning_rate": 0.00042757120561691273,
      "loss": 3.0206,
      "step": 82989
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1171905994415283,
      "learning_rate": 0.0004275675033113873,
      "loss": 2.6947,
      "step": 82990
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.158061981201172,
      "learning_rate": 0.0004275638009821446,
      "loss": 2.9935,
      "step": 82991
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7989847660064697,
      "learning_rate": 0.0004275600986291854,
      "loss": 3.28,
      "step": 82992
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6508593559265137,
      "learning_rate": 0.00042755639625251035,
      "loss": 3.1593,
      "step": 82993
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.994258165359497,
      "learning_rate": 0.0004275526938521201,
      "loss": 2.9977,
      "step": 82994
    },
    {
      "epoch": 1.08,
      "grad_norm": 4.344305515289307,
      "learning_rate": 0.0004275489914280155,
      "loss": 2.8188,
      "step": 82995
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9164551496505737,
      "learning_rate": 0.0004275452889801972,
      "loss": 3.1844,
      "step": 82996
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.96152925491333,
      "learning_rate": 0.00042754158650866567,
      "loss": 3.0276,
      "step": 82997
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.7249815464019775,
      "learning_rate": 0.00042753788401342184,
      "loss": 3.2886,
      "step": 82998
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7196298837661743,
      "learning_rate": 0.00042753418149446624,
      "loss": 3.1256,
      "step": 82999
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6031299829483032,
      "learning_rate": 0.00042753047895179974,
      "loss": 3.1738,
      "step": 83000
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.626745581626892,
      "learning_rate": 0.0004275267763854228,
      "loss": 3.2321,
      "step": 83001
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.199697971343994,
      "learning_rate": 0.0004275230737953363,
      "loss": 3.188,
      "step": 83002
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9854856729507446,
      "learning_rate": 0.0004275193711815408,
      "loss": 2.8661,
      "step": 83003
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8087215423583984,
      "learning_rate": 0.0004275156685440371,
      "loss": 3.0041,
      "step": 83004
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7360608577728271,
      "learning_rate": 0.0004275119658828258,
      "loss": 2.9805,
      "step": 83005
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8755016326904297,
      "learning_rate": 0.0004275082631979076,
      "loss": 2.9788,
      "step": 83006
    },
    {
      "epoch": 1.08,
      "grad_norm": 5.2977471351623535,
      "learning_rate": 0.0004275045604892832,
      "loss": 2.8274,
      "step": 83007
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8512492179870605,
      "learning_rate": 0.0004275008577569534,
      "loss": 3.0427,
      "step": 83008
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1799662113189697,
      "learning_rate": 0.00042749715500091867,
      "loss": 3.1979,
      "step": 83009
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0260932445526123,
      "learning_rate": 0.0004274934522211798,
      "loss": 2.9194,
      "step": 83010
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.8336410522460938,
      "learning_rate": 0.0004274897494177376,
      "loss": 2.9802,
      "step": 83011
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1769304275512695,
      "learning_rate": 0.00042748604659059253,
      "loss": 2.9056,
      "step": 83012
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6591447591781616,
      "learning_rate": 0.00042748234373974546,
      "loss": 2.6166,
      "step": 83013
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.652673602104187,
      "learning_rate": 0.000427478640865197,
      "loss": 2.9469,
      "step": 83014
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.45884108543396,
      "learning_rate": 0.0004274749379669478,
      "loss": 2.9166,
      "step": 83015
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5266722440719604,
      "learning_rate": 0.0004274712350449987,
      "loss": 3.0086,
      "step": 83016
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1052424907684326,
      "learning_rate": 0.00042746753209935026,
      "loss": 3.1159,
      "step": 83017
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7808092832565308,
      "learning_rate": 0.00042746382913000326,
      "loss": 3.0302,
      "step": 83018
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8606082201004028,
      "learning_rate": 0.0004274601261369582,
      "loss": 2.8411,
      "step": 83019
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8136451244354248,
      "learning_rate": 0.000427456423120216,
      "loss": 3.307,
      "step": 83020
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6879303455352783,
      "learning_rate": 0.0004274527200797772,
      "loss": 2.8349,
      "step": 83021
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4662033319473267,
      "learning_rate": 0.00042744901701564247,
      "loss": 3.011,
      "step": 83022
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7680128812789917,
      "learning_rate": 0.0004274453139278127,
      "loss": 3.0041,
      "step": 83023
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7734051942825317,
      "learning_rate": 0.0004274416108162883,
      "loss": 3.0302,
      "step": 83024
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9111912250518799,
      "learning_rate": 0.0004274379076810702,
      "loss": 3.1195,
      "step": 83025
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6822553873062134,
      "learning_rate": 0.000427434204522159,
      "loss": 2.8521,
      "step": 83026
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.079505205154419,
      "learning_rate": 0.00042743050133955534,
      "loss": 3.0614,
      "step": 83027
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5520559549331665,
      "learning_rate": 0.0004274267981332599,
      "loss": 3.0573,
      "step": 83028
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6430689096450806,
      "learning_rate": 0.0004274230949032735,
      "loss": 2.9708,
      "step": 83029
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0877420902252197,
      "learning_rate": 0.00042741939164959666,
      "loss": 3.064,
      "step": 83030
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5270886421203613,
      "learning_rate": 0.00042741568837223015,
      "loss": 3.1554,
      "step": 83031
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1955699920654297,
      "learning_rate": 0.00042741198507117474,
      "loss": 3.2449,
      "step": 83032
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7146822214126587,
      "learning_rate": 0.000427408281746431,
      "loss": 2.8407,
      "step": 83033
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4442884922027588,
      "learning_rate": 0.00042740457839799957,
      "loss": 3.1638,
      "step": 83034
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5891497135162354,
      "learning_rate": 0.0004274008750258813,
      "loss": 3.1372,
      "step": 83035
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6874566078186035,
      "learning_rate": 0.00042739717163007684,
      "loss": 2.9125,
      "step": 83036
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6824758052825928,
      "learning_rate": 0.00042739346821058676,
      "loss": 3.0647,
      "step": 83037
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.670509934425354,
      "learning_rate": 0.00042738976476741186,
      "loss": 3.0949,
      "step": 83038
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.218552827835083,
      "learning_rate": 0.0004273860613005529,
      "loss": 2.8997,
      "step": 83039
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7060832977294922,
      "learning_rate": 0.00042738235781001027,
      "loss": 3.0195,
      "step": 83040
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5998260974884033,
      "learning_rate": 0.000427378654295785,
      "loss": 3.2268,
      "step": 83041
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7084327936172485,
      "learning_rate": 0.00042737495075787765,
      "loss": 3.4139,
      "step": 83042
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7922402620315552,
      "learning_rate": 0.0004273712471962888,
      "loss": 2.7564,
      "step": 83043
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.174365520477295,
      "learning_rate": 0.0004273675436110192,
      "loss": 2.9747,
      "step": 83044
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8832206726074219,
      "learning_rate": 0.00042736384000206963,
      "loss": 2.8985,
      "step": 83045
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.597805380821228,
      "learning_rate": 0.00042736013636944075,
      "loss": 3.0999,
      "step": 83046
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7840065956115723,
      "learning_rate": 0.0004273564327131332,
      "loss": 2.9354,
      "step": 83047
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6168935298919678,
      "learning_rate": 0.00042735272903314764,
      "loss": 2.882,
      "step": 83048
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5470948219299316,
      "learning_rate": 0.0004273490253294848,
      "loss": 3.0449,
      "step": 83049
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1918134689331055,
      "learning_rate": 0.00042734532160214545,
      "loss": 3.1916,
      "step": 83050
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8973348140716553,
      "learning_rate": 0.0004273416178511302,
      "loss": 3.1328,
      "step": 83051
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.802525520324707,
      "learning_rate": 0.00042733791407643967,
      "loss": 3.0111,
      "step": 83052
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7081117630004883,
      "learning_rate": 0.0004273342102780747,
      "loss": 2.8621,
      "step": 83053
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0230302810668945,
      "learning_rate": 0.00042733050645603576,
      "loss": 3.0204,
      "step": 83054
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.769646406173706,
      "learning_rate": 0.00042732680261032375,
      "loss": 2.8813,
      "step": 83055
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6272592544555664,
      "learning_rate": 0.0004273230987409393,
      "loss": 3.0905,
      "step": 83056
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6998021602630615,
      "learning_rate": 0.00042731939484788313,
      "loss": 2.9718,
      "step": 83057
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9848490953445435,
      "learning_rate": 0.00042731569093115577,
      "loss": 2.7866,
      "step": 83058
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7048264741897583,
      "learning_rate": 0.00042731198699075805,
      "loss": 2.8971,
      "step": 83059
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7197837829589844,
      "learning_rate": 0.00042730828302669073,
      "loss": 2.8872,
      "step": 83060
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.4638893604278564,
      "learning_rate": 0.00042730457903895436,
      "loss": 2.8902,
      "step": 83061
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.233799695968628,
      "learning_rate": 0.00042730087502754953,
      "loss": 3.0562,
      "step": 83062
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6094281673431396,
      "learning_rate": 0.0004272971709924772,
      "loss": 3.0941,
      "step": 83063
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.307844877243042,
      "learning_rate": 0.0004272934669337379,
      "loss": 3.1374,
      "step": 83064
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.0909433364868164,
      "learning_rate": 0.0004272897628513323,
      "loss": 3.0276,
      "step": 83065
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.920993447303772,
      "learning_rate": 0.00042728605874526123,
      "loss": 2.8822,
      "step": 83066
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6814653873443604,
      "learning_rate": 0.00042728235461552517,
      "loss": 2.9297,
      "step": 83067
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6022193431854248,
      "learning_rate": 0.00042727865046212493,
      "loss": 3.1349,
      "step": 83068
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.196133852005005,
      "learning_rate": 0.00042727494628506127,
      "loss": 2.9098,
      "step": 83069
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5578298568725586,
      "learning_rate": 0.00042727124208433474,
      "loss": 3.0185,
      "step": 83070
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4913837909698486,
      "learning_rate": 0.00042726753785994604,
      "loss": 3.1273,
      "step": 83071
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5915824174880981,
      "learning_rate": 0.000427263833611896,
      "loss": 2.9882,
      "step": 83072
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9782596826553345,
      "learning_rate": 0.0004272601293401851,
      "loss": 2.7713,
      "step": 83073
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7535449266433716,
      "learning_rate": 0.00042725642504481417,
      "loss": 2.8456,
      "step": 83074
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6602815389633179,
      "learning_rate": 0.00042725272072578394,
      "loss": 3.0393,
      "step": 83075
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.935834527015686,
      "learning_rate": 0.00042724901638309496,
      "loss": 2.9007,
      "step": 83076
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8759796619415283,
      "learning_rate": 0.00042724531201674803,
      "loss": 2.9196,
      "step": 83077
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.376232624053955,
      "learning_rate": 0.0004272416076267438,
      "loss": 3.0354,
      "step": 83078
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.3023667335510254,
      "learning_rate": 0.00042723790321308285,
      "loss": 3.1076,
      "step": 83079
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6354386806488037,
      "learning_rate": 0.000427234198775766,
      "loss": 3.1704,
      "step": 83080
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.731707215309143,
      "learning_rate": 0.000427230494314794,
      "loss": 3.0312,
      "step": 83081
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5766189098358154,
      "learning_rate": 0.00042722678983016737,
      "loss": 3.0561,
      "step": 83082
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7513819932937622,
      "learning_rate": 0.000427223085321887,
      "loss": 3.0205,
      "step": 83083
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8314054012298584,
      "learning_rate": 0.0004272193807899534,
      "loss": 3.2118,
      "step": 83084
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0303030014038086,
      "learning_rate": 0.0004272156762343672,
      "loss": 3.0765,
      "step": 83085
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9413999319076538,
      "learning_rate": 0.0004272119716551293,
      "loss": 3.0332,
      "step": 83086
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.584464192390442,
      "learning_rate": 0.00042720826705224034,
      "loss": 3.2783,
      "step": 83087
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6061142683029175,
      "learning_rate": 0.00042720456242570083,
      "loss": 3.175,
      "step": 83088
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.400897741317749,
      "learning_rate": 0.0004272008577755117,
      "loss": 3.1462,
      "step": 83089
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.8408796787261963,
      "learning_rate": 0.0004271971531016735,
      "loss": 2.8231,
      "step": 83090
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5836421251296997,
      "learning_rate": 0.00042719344840418696,
      "loss": 3.0724,
      "step": 83091
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0203914642333984,
      "learning_rate": 0.00042718974368305266,
      "loss": 2.7504,
      "step": 83092
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.614501953125,
      "learning_rate": 0.0004271860389382715,
      "loss": 2.6698,
      "step": 83093
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.6570258140563965,
      "learning_rate": 0.000427182334169844,
      "loss": 3.045,
      "step": 83094
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8248261213302612,
      "learning_rate": 0.000427178629377771,
      "loss": 2.8208,
      "step": 83095
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9363768100738525,
      "learning_rate": 0.000427174924562053,
      "loss": 3.0838,
      "step": 83096
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.20796275138855,
      "learning_rate": 0.0004271712197226908,
      "loss": 3.0656,
      "step": 83097
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6283518075942993,
      "learning_rate": 0.000427167514859685,
      "loss": 2.8206,
      "step": 83098
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7930253744125366,
      "learning_rate": 0.0004271638099730365,
      "loss": 2.8268,
      "step": 83099
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7525897026062012,
      "learning_rate": 0.00042716010506274577,
      "loss": 3.0953,
      "step": 83100
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6589133739471436,
      "learning_rate": 0.00042715640012881353,
      "loss": 2.9749,
      "step": 83101
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6820964813232422,
      "learning_rate": 0.0004271526951712406,
      "loss": 3.1331,
      "step": 83102
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6119264364242554,
      "learning_rate": 0.0004271489901900276,
      "loss": 2.9555,
      "step": 83103
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4224278926849365,
      "learning_rate": 0.00042714528518517504,
      "loss": 2.9733,
      "step": 83104
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7565126419067383,
      "learning_rate": 0.000427141580156684,
      "loss": 2.9186,
      "step": 83105
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6963913440704346,
      "learning_rate": 0.00042713787510455475,
      "loss": 3.087,
      "step": 83106
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7581279277801514,
      "learning_rate": 0.0004271341700287882,
      "loss": 3.2265,
      "step": 83107
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.795524001121521,
      "learning_rate": 0.0004271304649293851,
      "loss": 3.0323,
      "step": 83108
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5547698736190796,
      "learning_rate": 0.000427126759806346,
      "loss": 3.4829,
      "step": 83109
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.7551679611206055,
      "learning_rate": 0.0004271230546596716,
      "loss": 3.0412,
      "step": 83110
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8528249263763428,
      "learning_rate": 0.00042711934948936264,
      "loss": 2.836,
      "step": 83111
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8374977111816406,
      "learning_rate": 0.00042711564429541985,
      "loss": 2.7181,
      "step": 83112
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.2184431552886963,
      "learning_rate": 0.0004271119390778438,
      "loss": 3.2051,
      "step": 83113
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.4015438556671143,
      "learning_rate": 0.00042710823383663527,
      "loss": 2.8675,
      "step": 83114
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7156447172164917,
      "learning_rate": 0.00042710452857179493,
      "loss": 3.2339,
      "step": 83115
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.907251238822937,
      "learning_rate": 0.0004271008232833234,
      "loss": 3.0477,
      "step": 83116
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6632018089294434,
      "learning_rate": 0.0004270971179712215,
      "loss": 3.0911,
      "step": 83117
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6281236410140991,
      "learning_rate": 0.0004270934126354898,
      "loss": 3.0078,
      "step": 83118
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.3615354299545288,
      "learning_rate": 0.00042708970727612907,
      "loss": 3.1036,
      "step": 83119
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9179019927978516,
      "learning_rate": 0.0004270860018931399,
      "loss": 2.6961,
      "step": 83120
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6911453008651733,
      "learning_rate": 0.0004270822964865232,
      "loss": 3.0457,
      "step": 83121
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4504636526107788,
      "learning_rate": 0.00042707859105627934,
      "loss": 3.0395,
      "step": 83122
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6138333082199097,
      "learning_rate": 0.00042707488560240917,
      "loss": 2.9401,
      "step": 83123
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.6347897052764893,
      "learning_rate": 0.00042707118012491347,
      "loss": 3.2674,
      "step": 83124
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5763838291168213,
      "learning_rate": 0.0004270674746237928,
      "loss": 2.9437,
      "step": 83125
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7268372774124146,
      "learning_rate": 0.0004270637690990479,
      "loss": 3.154,
      "step": 83126
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.779842495918274,
      "learning_rate": 0.00042706006355067947,
      "loss": 3.2791,
      "step": 83127
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.615985631942749,
      "learning_rate": 0.0004270563579786881,
      "loss": 3.1435,
      "step": 83128
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.740350604057312,
      "learning_rate": 0.0004270526523830745,
      "loss": 3.0552,
      "step": 83129
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.945493221282959,
      "learning_rate": 0.00042704894676383955,
      "loss": 3.1776,
      "step": 83130
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8330016136169434,
      "learning_rate": 0.00042704524112098383,
      "loss": 2.9006,
      "step": 83131
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6083084344863892,
      "learning_rate": 0.0004270415354545079,
      "loss": 3.1213,
      "step": 83132
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4942033290863037,
      "learning_rate": 0.0004270378297644126,
      "loss": 3.0081,
      "step": 83133
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5331394672393799,
      "learning_rate": 0.00042703412405069855,
      "loss": 3.0675,
      "step": 83134
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5853358507156372,
      "learning_rate": 0.00042703041831336643,
      "loss": 3.007,
      "step": 83135
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1805381774902344,
      "learning_rate": 0.00042702671255241703,
      "loss": 2.982,
      "step": 83136
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5516823530197144,
      "learning_rate": 0.0004270230067678509,
      "loss": 3.0027,
      "step": 83137
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5406384468078613,
      "learning_rate": 0.0004270193009596688,
      "loss": 2.8583,
      "step": 83138
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9189645051956177,
      "learning_rate": 0.00042701559512787145,
      "loss": 2.7516,
      "step": 83139
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9259817600250244,
      "learning_rate": 0.0004270118892724594,
      "loss": 2.8621,
      "step": 83140
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9005297422409058,
      "learning_rate": 0.00042700818339343357,
      "loss": 3.0538,
      "step": 83141
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.225023031234741,
      "learning_rate": 0.0004270044774907945,
      "loss": 3.0371,
      "step": 83142
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.404719591140747,
      "learning_rate": 0.0004270007715645429,
      "loss": 2.7878,
      "step": 83143
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.839917778968811,
      "learning_rate": 0.0004269970656146794,
      "loss": 3.1692,
      "step": 83144
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0961694717407227,
      "learning_rate": 0.00042699335964120486,
      "loss": 3.0796,
      "step": 83145
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.9498450756073,
      "learning_rate": 0.0004269896536441198,
      "loss": 2.8408,
      "step": 83146
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5910096168518066,
      "learning_rate": 0.0004269859476234249,
      "loss": 2.8517,
      "step": 83147
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8032724857330322,
      "learning_rate": 0.000426982241579121,
      "loss": 2.9334,
      "step": 83148
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.00146222114563,
      "learning_rate": 0.0004269785355112087,
      "loss": 3.1198,
      "step": 83149
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.1069414615631104,
      "learning_rate": 0.0004269748294196886,
      "loss": 3.0143,
      "step": 83150
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.234658718109131,
      "learning_rate": 0.0004269711233045616,
      "loss": 2.8834,
      "step": 83151
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.2128567695617676,
      "learning_rate": 0.0004269674171658282,
      "loss": 3.0184,
      "step": 83152
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9585750102996826,
      "learning_rate": 0.00042696371100348914,
      "loss": 3.1294,
      "step": 83153
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.067436695098877,
      "learning_rate": 0.00042696000481754517,
      "loss": 3.0746,
      "step": 83154
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5614347457885742,
      "learning_rate": 0.00042695629860799693,
      "loss": 2.821,
      "step": 83155
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7647793292999268,
      "learning_rate": 0.00042695259237484517,
      "loss": 2.9018,
      "step": 83156
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6332790851593018,
      "learning_rate": 0.00042694888611809054,
      "loss": 2.9228,
      "step": 83157
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9168823957443237,
      "learning_rate": 0.0004269451798377336,
      "loss": 2.9779,
      "step": 83158
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9527945518493652,
      "learning_rate": 0.00042694147353377514,
      "loss": 3.0188,
      "step": 83159
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7028840780258179,
      "learning_rate": 0.00042693776720621605,
      "loss": 3.0506,
      "step": 83160
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.394930124282837,
      "learning_rate": 0.00042693406085505663,
      "loss": 3.0311,
      "step": 83161
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.515476942062378,
      "learning_rate": 0.0004269303544802979,
      "loss": 3.0493,
      "step": 83162
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7797003984451294,
      "learning_rate": 0.0004269266480819404,
      "loss": 2.806,
      "step": 83163
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8341598510742188,
      "learning_rate": 0.00042692294165998484,
      "loss": 2.9164,
      "step": 83164
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.998207688331604,
      "learning_rate": 0.00042691923521443186,
      "loss": 3.1584,
      "step": 83165
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7605031728744507,
      "learning_rate": 0.0004269155287452823,
      "loss": 2.9384,
      "step": 83166
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8202705383300781,
      "learning_rate": 0.00042691182225253665,
      "loss": 2.854,
      "step": 83167
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.606350064277649,
      "learning_rate": 0.00042690811573619573,
      "loss": 2.7969,
      "step": 83168
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.706669807434082,
      "learning_rate": 0.0004269044091962602,
      "loss": 2.9165,
      "step": 83169
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.655208945274353,
      "learning_rate": 0.0004269007026327307,
      "loss": 2.8605,
      "step": 83170
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9947375059127808,
      "learning_rate": 0.00042689699604560793,
      "loss": 3.0104,
      "step": 83171
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.446560025215149,
      "learning_rate": 0.0004268932894348928,
      "loss": 3.2626,
      "step": 83172
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.113542079925537,
      "learning_rate": 0.00042688958280058564,
      "loss": 3.0374,
      "step": 83173
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6613274812698364,
      "learning_rate": 0.0004268858761426873,
      "loss": 3.1003,
      "step": 83174
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.801546335220337,
      "learning_rate": 0.0004268821694611986,
      "loss": 2.8885,
      "step": 83175
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8476091623306274,
      "learning_rate": 0.00042687846275612005,
      "loss": 3.0117,
      "step": 83176
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4432194232940674,
      "learning_rate": 0.00042687475602745244,
      "loss": 2.8471,
      "step": 83177
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8874281644821167,
      "learning_rate": 0.0004268710492751964,
      "loss": 2.8816,
      "step": 83178
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.670128583908081,
      "learning_rate": 0.00042686734249935265,
      "loss": 3.0486,
      "step": 83179
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1097075939178467,
      "learning_rate": 0.0004268636356999218,
      "loss": 3.1372,
      "step": 83180
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7331303358078003,
      "learning_rate": 0.00042685992887690457,
      "loss": 2.974,
      "step": 83181
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8834527730941772,
      "learning_rate": 0.00042685622203030184,
      "loss": 3.1939,
      "step": 83182
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8590004444122314,
      "learning_rate": 0.00042685251516011403,
      "loss": 2.8755,
      "step": 83183
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.454944133758545,
      "learning_rate": 0.00042684880826634195,
      "loss": 2.8132,
      "step": 83184
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7965278625488281,
      "learning_rate": 0.00042684510134898636,
      "loss": 3.2411,
      "step": 83185
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.2308759689331055,
      "learning_rate": 0.0004268413944080478,
      "loss": 2.9693,
      "step": 83186
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.5036730766296387,
      "learning_rate": 0.000426837687443527,
      "loss": 2.9188,
      "step": 83187
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6832786798477173,
      "learning_rate": 0.0004268339804554248,
      "loss": 2.8917,
      "step": 83188
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.373807430267334,
      "learning_rate": 0.0004268302734437416,
      "loss": 2.9092,
      "step": 83189
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5482547283172607,
      "learning_rate": 0.0004268265664084784,
      "loss": 2.9542,
      "step": 83190
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.433020830154419,
      "learning_rate": 0.0004268228593496357,
      "loss": 3.0478,
      "step": 83191
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1055872440338135,
      "learning_rate": 0.0004268191522672143,
      "loss": 2.8854,
      "step": 83192
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5207430124282837,
      "learning_rate": 0.00042681544516121473,
      "loss": 3.132,
      "step": 83193
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9525065422058105,
      "learning_rate": 0.0004268117380316378,
      "loss": 2.9007,
      "step": 83194
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9763156175613403,
      "learning_rate": 0.00042680803087848416,
      "loss": 3.0006,
      "step": 83195
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8635237216949463,
      "learning_rate": 0.00042680432370175456,
      "loss": 2.8796,
      "step": 83196
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6816025972366333,
      "learning_rate": 0.00042680061650144963,
      "loss": 3.0736,
      "step": 83197
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6385116577148438,
      "learning_rate": 0.00042679690927757,
      "loss": 2.9577,
      "step": 83198
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5721997022628784,
      "learning_rate": 0.0004267932020301165,
      "loss": 2.9915,
      "step": 83199
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1053719520568848,
      "learning_rate": 0.0004267894947590897,
      "loss": 2.8265,
      "step": 83200
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6178532838821411,
      "learning_rate": 0.0004267857874644904,
      "loss": 2.9063,
      "step": 83201
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8774656057357788,
      "learning_rate": 0.0004267820801463193,
      "loss": 3.1375,
      "step": 83202
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.097659111022949,
      "learning_rate": 0.0004267783728045769,
      "loss": 2.8342,
      "step": 83203
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.642330527305603,
      "learning_rate": 0.00042677466543926407,
      "loss": 3.1963,
      "step": 83204
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.983300805091858,
      "learning_rate": 0.00042677095805038136,
      "loss": 2.9023,
      "step": 83205
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.993130683898926,
      "learning_rate": 0.00042676725063792956,
      "loss": 2.8427,
      "step": 83206
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7927380800247192,
      "learning_rate": 0.0004267635432019093,
      "loss": 3.2071,
      "step": 83207
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5649335384368896,
      "learning_rate": 0.0004267598357423214,
      "loss": 3.084,
      "step": 83208
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0535895824432373,
      "learning_rate": 0.00042675612825916645,
      "loss": 2.8906,
      "step": 83209
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.485853910446167,
      "learning_rate": 0.0004267524207524451,
      "loss": 2.9583,
      "step": 83210
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5946629047393799,
      "learning_rate": 0.00042674871322215805,
      "loss": 3.14,
      "step": 83211
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8776155710220337,
      "learning_rate": 0.0004267450056683061,
      "loss": 3.161,
      "step": 83212
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.3617041110992432,
      "learning_rate": 0.0004267412980908898,
      "loss": 2.7452,
      "step": 83213
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.834482192993164,
      "learning_rate": 0.0004267375904899099,
      "loss": 3.0926,
      "step": 83214
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6961301565170288,
      "learning_rate": 0.0004267338828653672,
      "loss": 3.1009,
      "step": 83215
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6571662425994873,
      "learning_rate": 0.0004267301752172621,
      "loss": 3.2345,
      "step": 83216
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5026785135269165,
      "learning_rate": 0.00042672646754559555,
      "loss": 2.9118,
      "step": 83217
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4499189853668213,
      "learning_rate": 0.00042672275985036816,
      "loss": 2.941,
      "step": 83218
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5201612710952759,
      "learning_rate": 0.0004267190521315806,
      "loss": 2.8797,
      "step": 83219
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.691172480583191,
      "learning_rate": 0.00042671534438923364,
      "loss": 2.838,
      "step": 83220
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.836024522781372,
      "learning_rate": 0.00042671163662332787,
      "loss": 2.9925,
      "step": 83221
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.472111225128174,
      "learning_rate": 0.00042670792883386395,
      "loss": 2.9987,
      "step": 83222
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6614254713058472,
      "learning_rate": 0.0004267042210208427,
      "loss": 3.0934,
      "step": 83223
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8342580795288086,
      "learning_rate": 0.00042670051318426474,
      "loss": 3.2095,
      "step": 83224
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9004154205322266,
      "learning_rate": 0.00042669680532413074,
      "loss": 2.989,
      "step": 83225
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6833699941635132,
      "learning_rate": 0.0004266930974404415,
      "loss": 3.1822,
      "step": 83226
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.113772392272949,
      "learning_rate": 0.0004266893895331975,
      "loss": 3.043,
      "step": 83227
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.3047118186950684,
      "learning_rate": 0.00042668568160239963,
      "loss": 2.9893,
      "step": 83228
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.674843668937683,
      "learning_rate": 0.00042668197364804846,
      "loss": 2.9833,
      "step": 83229
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.287919759750366,
      "learning_rate": 0.00042667826567014473,
      "loss": 2.8273,
      "step": 83230
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.239032506942749,
      "learning_rate": 0.0004266745576686891,
      "loss": 3.1465,
      "step": 83231
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.4480841159820557,
      "learning_rate": 0.00042667084964368227,
      "loss": 3.1207,
      "step": 83232
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7736763954162598,
      "learning_rate": 0.000426667141595125,
      "loss": 2.812,
      "step": 83233
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.8035995960235596,
      "learning_rate": 0.0004266634335230179,
      "loss": 2.8968,
      "step": 83234
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5653836727142334,
      "learning_rate": 0.00042665972542736165,
      "loss": 2.8991,
      "step": 83235
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.522236704826355,
      "learning_rate": 0.000426656017308157,
      "loss": 3.1502,
      "step": 83236
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5901979207992554,
      "learning_rate": 0.00042665230916540456,
      "loss": 2.7944,
      "step": 83237
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.3323557376861572,
      "learning_rate": 0.0004266486009991051,
      "loss": 3.0867,
      "step": 83238
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7446479797363281,
      "learning_rate": 0.0004266448928092593,
      "loss": 3.0915,
      "step": 83239
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.2424559593200684,
      "learning_rate": 0.00042664118459586774,
      "loss": 2.9914,
      "step": 83240
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5753324031829834,
      "learning_rate": 0.0004266374763589312,
      "loss": 3.2132,
      "step": 83241
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6631304025650024,
      "learning_rate": 0.0004266337680984504,
      "loss": 3.1892,
      "step": 83242
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5837041139602661,
      "learning_rate": 0.00042663005981442604,
      "loss": 3.0076,
      "step": 83243
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5380640029907227,
      "learning_rate": 0.0004266263515068587,
      "loss": 2.8762,
      "step": 83244
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5681180953979492,
      "learning_rate": 0.00042662264317574913,
      "loss": 3.0967,
      "step": 83245
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9305511713027954,
      "learning_rate": 0.0004266189348210981,
      "loss": 2.9225,
      "step": 83246
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.2891170978546143,
      "learning_rate": 0.00042661522644290613,
      "loss": 2.7459,
      "step": 83247
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1447317600250244,
      "learning_rate": 0.00042661151804117406,
      "loss": 2.8313,
      "step": 83248
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6956020593643188,
      "learning_rate": 0.0004266078096159025,
      "loss": 2.8949,
      "step": 83249
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5454102754592896,
      "learning_rate": 0.0004266041011670921,
      "loss": 3.0344,
      "step": 83250
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.751690149307251,
      "learning_rate": 0.00042660039269474365,
      "loss": 3.0898,
      "step": 83251
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7796550989151,
      "learning_rate": 0.00042659668419885784,
      "loss": 2.8421,
      "step": 83252
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8390473127365112,
      "learning_rate": 0.00042659297567943525,
      "loss": 3.2249,
      "step": 83253
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6155426502227783,
      "learning_rate": 0.0004265892671364766,
      "loss": 2.9548,
      "step": 83254
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8787485361099243,
      "learning_rate": 0.0004265855585699828,
      "loss": 2.8082,
      "step": 83255
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.1731925010681152,
      "learning_rate": 0.0004265818499799542,
      "loss": 3.0229,
      "step": 83256
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7117434740066528,
      "learning_rate": 0.0004265781413663917,
      "loss": 2.7988,
      "step": 83257
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.6376047134399414,
      "learning_rate": 0.0004265744327292959,
      "loss": 3.0798,
      "step": 83258
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.043895721435547,
      "learning_rate": 0.0004265707240686675,
      "loss": 2.8854,
      "step": 83259
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.044863700866699,
      "learning_rate": 0.0004265670153845072,
      "loss": 3.0447,
      "step": 83260
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.400083303451538,
      "learning_rate": 0.00042656330667681586,
      "loss": 3.2909,
      "step": 83261
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.163825273513794,
      "learning_rate": 0.00042655959794559383,
      "loss": 2.9827,
      "step": 83262
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.7089428901672363,
      "learning_rate": 0.00042655588919084206,
      "loss": 2.8587,
      "step": 83263
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7523317337036133,
      "learning_rate": 0.0004265521804125612,
      "loss": 2.8147,
      "step": 83264
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.6443824768066406,
      "learning_rate": 0.00042654847161075187,
      "loss": 2.8456,
      "step": 83265
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0493695735931396,
      "learning_rate": 0.00042654476278541475,
      "loss": 3.1507,
      "step": 83266
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.0321829319000244,
      "learning_rate": 0.0004265410539365506,
      "loss": 2.8507,
      "step": 83267
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8558439016342163,
      "learning_rate": 0.00042653734506416,
      "loss": 3.05,
      "step": 83268
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.140152931213379,
      "learning_rate": 0.00042653363616824383,
      "loss": 2.9792,
      "step": 83269
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.551243305206299,
      "learning_rate": 0.0004265299272488027,
      "loss": 3.1707,
      "step": 83270
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.748663306236267,
      "learning_rate": 0.0004265262183058372,
      "loss": 2.7186,
      "step": 83271
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0645570755004883,
      "learning_rate": 0.0004265225093393481,
      "loss": 2.8339,
      "step": 83272
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.5645267963409424,
      "learning_rate": 0.0004265188003493361,
      "loss": 3.0438,
      "step": 83273
    },
    {
      "epoch": 1.08,
      "grad_norm": 3.7999398708343506,
      "learning_rate": 0.00042651509133580175,
      "loss": 3.1382,
      "step": 83274
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.874619722366333,
      "learning_rate": 0.00042651138229874596,
      "loss": 3.1836,
      "step": 83275
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7780930995941162,
      "learning_rate": 0.00042650767323816933,
      "loss": 3.0336,
      "step": 83276
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.2989087104797363,
      "learning_rate": 0.0004265039641540725,
      "loss": 3.0122,
      "step": 83277
    },
    {
      "epoch": 1.08,
      "grad_norm": 4.003824234008789,
      "learning_rate": 0.0004265002550464562,
      "loss": 3.3123,
      "step": 83278
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.011096239089966,
      "learning_rate": 0.0004264965459153211,
      "loss": 3.2353,
      "step": 83279
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5822726488113403,
      "learning_rate": 0.0004264928367606679,
      "loss": 3.0221,
      "step": 83280
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.201237201690674,
      "learning_rate": 0.0004264891275824973,
      "loss": 3.2779,
      "step": 83281
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.5790650844573975,
      "learning_rate": 0.00042648541838081,
      "loss": 3.3195,
      "step": 83282
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8476452827453613,
      "learning_rate": 0.00042648170915560667,
      "loss": 2.9837,
      "step": 83283
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8292343616485596,
      "learning_rate": 0.00042647799990688795,
      "loss": 2.8992,
      "step": 83284
    },
    {
      "epoch": 1.08,
      "grad_norm": 4.454975128173828,
      "learning_rate": 0.0004264742906346547,
      "loss": 2.7861,
      "step": 83285
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5690078735351562,
      "learning_rate": 0.00042647058133890743,
      "loss": 2.95,
      "step": 83286
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6102468967437744,
      "learning_rate": 0.0004264668720196469,
      "loss": 3.0321,
      "step": 83287
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.0821421146392822,
      "learning_rate": 0.00042646316267687373,
      "loss": 2.938,
      "step": 83288
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8116480112075806,
      "learning_rate": 0.00042645945331058874,
      "loss": 2.9283,
      "step": 83289
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.840588092803955,
      "learning_rate": 0.00042645574392079254,
      "loss": 2.7053,
      "step": 83290
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.818265676498413,
      "learning_rate": 0.00042645203450748586,
      "loss": 2.7695,
      "step": 83291
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5905938148498535,
      "learning_rate": 0.0004264483250706693,
      "loss": 3.1573,
      "step": 83292
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6325258016586304,
      "learning_rate": 0.00042644461561034357,
      "loss": 2.9574,
      "step": 83293
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7356669902801514,
      "learning_rate": 0.00042644090612650954,
      "loss": 3.0545,
      "step": 83294
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.809281587600708,
      "learning_rate": 0.0004264371966191676,
      "loss": 3.1225,
      "step": 83295
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4212578535079956,
      "learning_rate": 0.0004264334870883187,
      "loss": 2.8176,
      "step": 83296
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4984647035598755,
      "learning_rate": 0.0004264297775339634,
      "loss": 3.0616,
      "step": 83297
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8097096681594849,
      "learning_rate": 0.00042642606795610243,
      "loss": 3.0913,
      "step": 83298
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6859748363494873,
      "learning_rate": 0.00042642235835473643,
      "loss": 2.9682,
      "step": 83299
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6341158151626587,
      "learning_rate": 0.00042641864872986624,
      "loss": 2.9149,
      "step": 83300
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8082070350646973,
      "learning_rate": 0.00042641493908149227,
      "loss": 2.9519,
      "step": 83301
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7233027219772339,
      "learning_rate": 0.00042641122940961546,
      "loss": 3.0484,
      "step": 83302
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8856332302093506,
      "learning_rate": 0.0004264075197142364,
      "loss": 3.2043,
      "step": 83303
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7760010957717896,
      "learning_rate": 0.00042640380999535584,
      "loss": 3.0585,
      "step": 83304
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.217226982116699,
      "learning_rate": 0.00042640010025297444,
      "loss": 3.0836,
      "step": 83305
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8597838878631592,
      "learning_rate": 0.0004263963904870928,
      "loss": 3.2073,
      "step": 83306
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.775376558303833,
      "learning_rate": 0.00042639268069771173,
      "loss": 2.7817,
      "step": 83307
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7478011846542358,
      "learning_rate": 0.0004263889708848318,
      "loss": 3.0047,
      "step": 83308
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4948656558990479,
      "learning_rate": 0.00042638526104845384,
      "loss": 3.1381,
      "step": 83309
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5518436431884766,
      "learning_rate": 0.0004263815511885785,
      "loss": 3.133,
      "step": 83310
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1096456050872803,
      "learning_rate": 0.00042637784130520646,
      "loss": 2.847,
      "step": 83311
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.669571042060852,
      "learning_rate": 0.0004263741313983383,
      "loss": 2.9143,
      "step": 83312
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7056852579116821,
      "learning_rate": 0.0004263704214679748,
      "loss": 3.1212,
      "step": 83313
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6273139715194702,
      "learning_rate": 0.00042636671151411674,
      "loss": 3.0909,
      "step": 83314
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7208077907562256,
      "learning_rate": 0.0004263630015367647,
      "loss": 2.955,
      "step": 83315
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7724281549453735,
      "learning_rate": 0.0004263592915359194,
      "loss": 2.9425,
      "step": 83316
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.601006269454956,
      "learning_rate": 0.0004263555815115814,
      "loss": 2.8081,
      "step": 83317
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.3857786655426025,
      "learning_rate": 0.0004263518714637517,
      "loss": 3.2109,
      "step": 83318
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5397759675979614,
      "learning_rate": 0.00042634816139243073,
      "loss": 3.0335,
      "step": 83319
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.7269574403762817,
      "learning_rate": 0.0004263444512976192,
      "loss": 3.1946,
      "step": 83320
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.6690869331359863,
      "learning_rate": 0.0004263407411793179,
      "loss": 3.2184,
      "step": 83321
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.5588669776916504,
      "learning_rate": 0.0004263370310375275,
      "loss": 2.9934,
      "step": 83322
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.021807909011841,
      "learning_rate": 0.00042633332087224857,
      "loss": 2.9741,
      "step": 83323
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.4743491411209106,
      "learning_rate": 0.00042632961068348195,
      "loss": 3.1676,
      "step": 83324
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.9012036323547363,
      "learning_rate": 0.0004263259004712283,
      "loss": 2.8501,
      "step": 83325
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.8606725931167603,
      "learning_rate": 0.0004263221902354882,
      "loss": 2.9912,
      "step": 83326
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.1983861923217773,
      "learning_rate": 0.0004263184799762625,
      "loss": 3.099,
      "step": 83327
    },
    {
      "epoch": 1.08,
      "grad_norm": 1.572084665298462,
      "learning_rate": 0.00042631476969355174,
      "loss": 2.8586,
      "step": 83328
    },
    {
      "epoch": 1.08,
      "grad_norm": 2.135258674621582,
      "learning_rate": 0.00042631105938735676,
      "loss": 2.8747,
      "step": 83329
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4810768365859985,
      "learning_rate": 0.00042630734905767806,
      "loss": 2.9394,
      "step": 83330
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.0875041484832764,
      "learning_rate": 0.0004263036387045165,
      "loss": 3.0971,
      "step": 83331
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5779050588607788,
      "learning_rate": 0.00042629992832787274,
      "loss": 2.7752,
      "step": 83332
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.846947431564331,
      "learning_rate": 0.0004262962179277474,
      "loss": 2.9301,
      "step": 83333
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1059083938598633,
      "learning_rate": 0.0004262925075041413,
      "loss": 3.0256,
      "step": 83334
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8736916780471802,
      "learning_rate": 0.00042628879705705494,
      "loss": 3.0624,
      "step": 83335
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6521862745285034,
      "learning_rate": 0.0004262850865864891,
      "loss": 3.0911,
      "step": 83336
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.4710769653320312,
      "learning_rate": 0.00042628137609244455,
      "loss": 3.2247,
      "step": 83337
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6194649934768677,
      "learning_rate": 0.00042627766557492186,
      "loss": 3.0078,
      "step": 83338
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9176501035690308,
      "learning_rate": 0.00042627395503392175,
      "loss": 2.8397,
      "step": 83339
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.329035997390747,
      "learning_rate": 0.000426270244469445,
      "loss": 2.8636,
      "step": 83340
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8684242963790894,
      "learning_rate": 0.00042626653388149214,
      "loss": 2.9059,
      "step": 83341
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8635263442993164,
      "learning_rate": 0.000426262823270064,
      "loss": 2.9957,
      "step": 83342
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.0671379566192627,
      "learning_rate": 0.00042625911263516125,
      "loss": 2.7258,
      "step": 83343
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8395735025405884,
      "learning_rate": 0.0004262554019767845,
      "loss": 3.0819,
      "step": 83344
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.665931224822998,
      "learning_rate": 0.00042625169129493443,
      "loss": 3.1116,
      "step": 83345
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.0043203830718994,
      "learning_rate": 0.0004262479805896119,
      "loss": 3.0663,
      "step": 83346
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9749863147735596,
      "learning_rate": 0.00042624426986081744,
      "loss": 2.8502,
      "step": 83347
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8457392454147339,
      "learning_rate": 0.00042624055910855176,
      "loss": 3.1044,
      "step": 83348
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.2414937019348145,
      "learning_rate": 0.0004262368483328156,
      "loss": 2.9187,
      "step": 83349
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7072899341583252,
      "learning_rate": 0.00042623313753360965,
      "loss": 2.9953,
      "step": 83350
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6297041177749634,
      "learning_rate": 0.0004262294267109345,
      "loss": 2.7912,
      "step": 83351
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9799968004226685,
      "learning_rate": 0.000426225715864791,
      "loss": 2.9935,
      "step": 83352
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5733561515808105,
      "learning_rate": 0.00042622200499517975,
      "loss": 2.6867,
      "step": 83353
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1696975231170654,
      "learning_rate": 0.00042621829410210136,
      "loss": 2.6832,
      "step": 83354
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4662294387817383,
      "learning_rate": 0.00042621458318555677,
      "loss": 3.3467,
      "step": 83355
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.575277328491211,
      "learning_rate": 0.00042621087224554635,
      "loss": 2.9133,
      "step": 83356
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1523921489715576,
      "learning_rate": 0.000426207161282071,
      "loss": 2.8088,
      "step": 83357
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7859033346176147,
      "learning_rate": 0.00042620345029513137,
      "loss": 2.9022,
      "step": 83358
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6012436151504517,
      "learning_rate": 0.00042619973928472815,
      "loss": 2.9572,
      "step": 83359
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8346798419952393,
      "learning_rate": 0.00042619602825086193,
      "loss": 2.9808,
      "step": 83360
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.686978816986084,
      "learning_rate": 0.00042619231719353363,
      "loss": 2.7875,
      "step": 83361
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9619590044021606,
      "learning_rate": 0.0004261886061127437,
      "loss": 3.0338,
      "step": 83362
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4951919317245483,
      "learning_rate": 0.0004261848950084929,
      "loss": 3.307,
      "step": 83363
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.800671100616455,
      "learning_rate": 0.00042618118388078205,
      "loss": 2.9326,
      "step": 83364
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1915793418884277,
      "learning_rate": 0.0004261774727296117,
      "loss": 2.856,
      "step": 83365
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.197902202606201,
      "learning_rate": 0.0004261737615549825,
      "loss": 2.933,
      "step": 83366
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.012906312942505,
      "learning_rate": 0.0004261700503568953,
      "loss": 2.927,
      "step": 83367
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.63898766040802,
      "learning_rate": 0.0004261663391353507,
      "loss": 2.9292,
      "step": 83368
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.487991213798523,
      "learning_rate": 0.0004261626278903493,
      "loss": 3.3268,
      "step": 83369
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.0147030353546143,
      "learning_rate": 0.00042615891662189204,
      "loss": 3.2454,
      "step": 83370
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8509215116500854,
      "learning_rate": 0.0004261552053299793,
      "loss": 3.1491,
      "step": 83371
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8701012134552002,
      "learning_rate": 0.00042615149401461203,
      "loss": 3.1776,
      "step": 83372
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.760176420211792,
      "learning_rate": 0.0004261477826757908,
      "loss": 2.9547,
      "step": 83373
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4442923069000244,
      "learning_rate": 0.0004261440713135162,
      "loss": 3.0446,
      "step": 83374
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.095977544784546,
      "learning_rate": 0.00042614035992778916,
      "loss": 2.7745,
      "step": 83375
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7230114936828613,
      "learning_rate": 0.00042613664851861026,
      "loss": 2.693,
      "step": 83376
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1321587562561035,
      "learning_rate": 0.0004261329370859802,
      "loss": 3.1592,
      "step": 83377
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6464182138442993,
      "learning_rate": 0.00042612922562989956,
      "loss": 2.9111,
      "step": 83378
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6180862188339233,
      "learning_rate": 0.00042612551415036905,
      "loss": 2.9521,
      "step": 83379
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7749583721160889,
      "learning_rate": 0.00042612180264738953,
      "loss": 2.9971,
      "step": 83380
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9408944845199585,
      "learning_rate": 0.00042611809112096165,
      "loss": 2.7748,
      "step": 83381
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5748286247253418,
      "learning_rate": 0.00042611437957108594,
      "loss": 3.0051,
      "step": 83382
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.618391990661621,
      "learning_rate": 0.0004261106679977632,
      "loss": 2.9564,
      "step": 83383
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9768441915512085,
      "learning_rate": 0.0004261069564009941,
      "loss": 3.0962,
      "step": 83384
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4955006837844849,
      "learning_rate": 0.00042610324478077936,
      "loss": 3.2187,
      "step": 83385
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4783507585525513,
      "learning_rate": 0.0004260995331371196,
      "loss": 3.1169,
      "step": 83386
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8625479936599731,
      "learning_rate": 0.0004260958214700156,
      "loss": 2.8334,
      "step": 83387
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9030524492263794,
      "learning_rate": 0.000426092109779468,
      "loss": 3.1377,
      "step": 83388
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5400323867797852,
      "learning_rate": 0.0004260883980654775,
      "loss": 2.8562,
      "step": 83389
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9884037971496582,
      "learning_rate": 0.00042608468632804477,
      "loss": 2.9757,
      "step": 83390
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.793094515800476,
      "learning_rate": 0.0004260809745671705,
      "loss": 3.0603,
      "step": 83391
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9783735275268555,
      "learning_rate": 0.00042607726278285545,
      "loss": 3.1653,
      "step": 83392
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.6526598930358887,
      "learning_rate": 0.00042607355097510025,
      "loss": 3.0107,
      "step": 83393
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9559499025344849,
      "learning_rate": 0.00042606983914390557,
      "loss": 3.1635,
      "step": 83394
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4542734622955322,
      "learning_rate": 0.0004260661272892722,
      "loss": 2.9278,
      "step": 83395
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.070359945297241,
      "learning_rate": 0.0004260624154112006,
      "loss": 3.159,
      "step": 83396
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6276724338531494,
      "learning_rate": 0.0004260587035096917,
      "loss": 3.1798,
      "step": 83397
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.735978126525879,
      "learning_rate": 0.0004260549915847461,
      "loss": 3.0964,
      "step": 83398
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5879770517349243,
      "learning_rate": 0.00042605127963636456,
      "loss": 2.9492,
      "step": 83399
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7370641231536865,
      "learning_rate": 0.0004260475676645476,
      "loss": 3.0432,
      "step": 83400
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.127089023590088,
      "learning_rate": 0.00042604385566929613,
      "loss": 3.0234,
      "step": 83401
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.758306622505188,
      "learning_rate": 0.0004260401436506106,
      "loss": 2.991,
      "step": 83402
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.682049036026001,
      "learning_rate": 0.000426036431608492,
      "loss": 2.7076,
      "step": 83403
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8893648386001587,
      "learning_rate": 0.0004260327195429407,
      "loss": 3.0265,
      "step": 83404
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5743606090545654,
      "learning_rate": 0.00042602900745395756,
      "loss": 2.7929,
      "step": 83405
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.002882957458496,
      "learning_rate": 0.00042602529534154334,
      "loss": 3.2404,
      "step": 83406
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.2767486572265625,
      "learning_rate": 0.00042602158320569853,
      "loss": 3.027,
      "step": 83407
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8053845167160034,
      "learning_rate": 0.00042601787104642395,
      "loss": 3.1308,
      "step": 83408
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.086038112640381,
      "learning_rate": 0.00042601415886372036,
      "loss": 2.7962,
      "step": 83409
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6178783178329468,
      "learning_rate": 0.00042601044665758824,
      "loss": 3.0697,
      "step": 83410
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.904748558998108,
      "learning_rate": 0.0004260067344280285,
      "loss": 3.1157,
      "step": 83411
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6874741315841675,
      "learning_rate": 0.00042600302217504164,
      "loss": 2.9499,
      "step": 83412
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.0212221145629883,
      "learning_rate": 0.00042599930989862855,
      "loss": 2.8577,
      "step": 83413
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1116549968719482,
      "learning_rate": 0.0004259955975987897,
      "loss": 2.9824,
      "step": 83414
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.697685956954956,
      "learning_rate": 0.000425991885275526,
      "loss": 3.1721,
      "step": 83415
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6092379093170166,
      "learning_rate": 0.0004259881729288379,
      "loss": 2.9221,
      "step": 83416
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7773921489715576,
      "learning_rate": 0.0004259844605587263,
      "loss": 3.1939,
      "step": 83417
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.0550589561462402,
      "learning_rate": 0.00042598074816519176,
      "loss": 2.9636,
      "step": 83418
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.4609856605529785,
      "learning_rate": 0.0004259770357482351,
      "loss": 3.0197,
      "step": 83419
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4461339712142944,
      "learning_rate": 0.0004259733233078569,
      "loss": 3.1764,
      "step": 83420
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5463974475860596,
      "learning_rate": 0.0004259696108440579,
      "loss": 3.1803,
      "step": 83421
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8477011919021606,
      "learning_rate": 0.00042596589835683867,
      "loss": 2.8807,
      "step": 83422
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7885485887527466,
      "learning_rate": 0.00042596218584620014,
      "loss": 3.0136,
      "step": 83423
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9558610916137695,
      "learning_rate": 0.0004259584733121427,
      "loss": 2.8888,
      "step": 83424
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.367765188217163,
      "learning_rate": 0.00042595476075466736,
      "loss": 3.051,
      "step": 83425
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.3682501316070557,
      "learning_rate": 0.0004259510481737746,
      "loss": 3.1856,
      "step": 83426
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.909009575843811,
      "learning_rate": 0.00042594733556946515,
      "loss": 2.8363,
      "step": 83427
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1672682762145996,
      "learning_rate": 0.0004259436229417398,
      "loss": 2.9408,
      "step": 83428
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6715415716171265,
      "learning_rate": 0.000425939910290599,
      "loss": 2.8693,
      "step": 83429
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7669923305511475,
      "learning_rate": 0.00042593619761604356,
      "loss": 2.9806,
      "step": 83430
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9654786586761475,
      "learning_rate": 0.0004259324849180744,
      "loss": 2.9709,
      "step": 83431
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7669166326522827,
      "learning_rate": 0.0004259287721966919,
      "loss": 2.8326,
      "step": 83432
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6754423379898071,
      "learning_rate": 0.00042592505945189687,
      "loss": 2.893,
      "step": 83433
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.6364340782165527,
      "learning_rate": 0.00042592134668369006,
      "loss": 3.0301,
      "step": 83434
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.6289970874786377,
      "learning_rate": 0.00042591763389207197,
      "loss": 2.8685,
      "step": 83435
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4207112789154053,
      "learning_rate": 0.00042591392107704345,
      "loss": 3.4022,
      "step": 83436
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.49076509475708,
      "learning_rate": 0.00042591020823860533,
      "loss": 3.1385,
      "step": 83437
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.651524305343628,
      "learning_rate": 0.00042590649537675794,
      "loss": 2.8889,
      "step": 83438
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6270835399627686,
      "learning_rate": 0.0004259027824915022,
      "loss": 3.2264,
      "step": 83439
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8417538404464722,
      "learning_rate": 0.00042589906958283877,
      "loss": 2.9449,
      "step": 83440
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.935226559638977,
      "learning_rate": 0.0004258953566507683,
      "loss": 3.1737,
      "step": 83441
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.295959234237671,
      "learning_rate": 0.0004258916436952915,
      "loss": 2.7644,
      "step": 83442
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6175484657287598,
      "learning_rate": 0.00042588793071640906,
      "loss": 3.1817,
      "step": 83443
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.6562047004699707,
      "learning_rate": 0.00042588421771412176,
      "loss": 2.8941,
      "step": 83444
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.7209346294403076,
      "learning_rate": 0.00042588050468843016,
      "loss": 3.1415,
      "step": 83445
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.142179489135742,
      "learning_rate": 0.0004258767916393349,
      "loss": 3.1804,
      "step": 83446
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6822158098220825,
      "learning_rate": 0.00042587307856683696,
      "loss": 3.0347,
      "step": 83447
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.675727367401123,
      "learning_rate": 0.0004258693654709367,
      "loss": 2.8782,
      "step": 83448
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8066309690475464,
      "learning_rate": 0.000425865652351635,
      "loss": 3.2025,
      "step": 83449
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.335702896118164,
      "learning_rate": 0.0004258619392089325,
      "loss": 3.1629,
      "step": 83450
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5166668891906738,
      "learning_rate": 0.00042585822604282984,
      "loss": 2.9786,
      "step": 83451
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.172248601913452,
      "learning_rate": 0.00042585451285332776,
      "loss": 2.9005,
      "step": 83452
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.2287697792053223,
      "learning_rate": 0.0004258507996404271,
      "loss": 2.9041,
      "step": 83453
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7351397275924683,
      "learning_rate": 0.00042584708640412824,
      "loss": 2.8789,
      "step": 83454
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.711745023727417,
      "learning_rate": 0.00042584337314443204,
      "loss": 2.9036,
      "step": 83455
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.0643885135650635,
      "learning_rate": 0.00042583965986133925,
      "loss": 2.9253,
      "step": 83456
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.0529985427856445,
      "learning_rate": 0.0004258359465548505,
      "loss": 2.8541,
      "step": 83457
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.787432312965393,
      "learning_rate": 0.00042583223322496637,
      "loss": 3.1496,
      "step": 83458
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.3737142086029053,
      "learning_rate": 0.0004258285198716878,
      "loss": 2.9786,
      "step": 83459
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.778322458267212,
      "learning_rate": 0.00042582480649501524,
      "loss": 2.906,
      "step": 83460
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.123521089553833,
      "learning_rate": 0.00042582109309494945,
      "loss": 2.8336,
      "step": 83461
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6817564964294434,
      "learning_rate": 0.0004258173796714912,
      "loss": 2.9185,
      "step": 83462
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.2447783946990967,
      "learning_rate": 0.0004258136662246411,
      "loss": 2.9714,
      "step": 83463
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.2512454986572266,
      "learning_rate": 0.0004258099527543998,
      "loss": 3.0106,
      "step": 83464
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.791339635848999,
      "learning_rate": 0.0004258062392607682,
      "loss": 3.0563,
      "step": 83465
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.004079580307007,
      "learning_rate": 0.0004258025257437468,
      "loss": 3.1949,
      "step": 83466
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.454089403152466,
      "learning_rate": 0.0004257988122033362,
      "loss": 2.9282,
      "step": 83467
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.959799289703369,
      "learning_rate": 0.00042579509863953743,
      "loss": 2.918,
      "step": 83468
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4437791109085083,
      "learning_rate": 0.00042579138505235086,
      "loss": 2.8851,
      "step": 83469
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.7725021839141846,
      "learning_rate": 0.00042578767144177724,
      "loss": 3.2266,
      "step": 83470
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.3352952003479004,
      "learning_rate": 0.00042578395780781743,
      "loss": 3.2381,
      "step": 83471
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4913122653961182,
      "learning_rate": 0.0004257802441504719,
      "loss": 3.2154,
      "step": 83472
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.596498727798462,
      "learning_rate": 0.00042577653046974154,
      "loss": 3.1231,
      "step": 83473
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7186906337738037,
      "learning_rate": 0.00042577281676562696,
      "loss": 3.1031,
      "step": 83474
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.614219069480896,
      "learning_rate": 0.00042576910303812886,
      "loss": 3.1168,
      "step": 83475
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8277230262756348,
      "learning_rate": 0.00042576538928724777,
      "loss": 2.946,
      "step": 83476
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7078297138214111,
      "learning_rate": 0.00042576167551298467,
      "loss": 3.0814,
      "step": 83477
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4598760604858398,
      "learning_rate": 0.00042575796171534,
      "loss": 3.0542,
      "step": 83478
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8244746923446655,
      "learning_rate": 0.0004257542478943146,
      "loss": 3.1919,
      "step": 83479
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.983607292175293,
      "learning_rate": 0.00042575053404990913,
      "loss": 3.0049,
      "step": 83480
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.485839605331421,
      "learning_rate": 0.0004257468201821242,
      "loss": 3.2224,
      "step": 83481
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7591317892074585,
      "learning_rate": 0.00042574310629096054,
      "loss": 2.9787,
      "step": 83482
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7361385822296143,
      "learning_rate": 0.0004257393923764189,
      "loss": 3.2449,
      "step": 83483
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7455967664718628,
      "learning_rate": 0.0004257356784385,
      "loss": 3.2271,
      "step": 83484
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4305180311203003,
      "learning_rate": 0.0004257319644772044,
      "loss": 2.975,
      "step": 83485
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.076127767562866,
      "learning_rate": 0.00042572825049253286,
      "loss": 3.1604,
      "step": 83486
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.5494167804718018,
      "learning_rate": 0.0004257245364844861,
      "loss": 3.0368,
      "step": 83487
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7537957429885864,
      "learning_rate": 0.0004257208224530647,
      "loss": 2.8195,
      "step": 83488
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.3562257289886475,
      "learning_rate": 0.0004257171083982695,
      "loss": 3.0862,
      "step": 83489
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7533634901046753,
      "learning_rate": 0.000425713394320101,
      "loss": 3.2551,
      "step": 83490
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1151959896087646,
      "learning_rate": 0.00042570968021856016,
      "loss": 2.961,
      "step": 83491
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8737735748291016,
      "learning_rate": 0.0004257059660936475,
      "loss": 2.7983,
      "step": 83492
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7353568077087402,
      "learning_rate": 0.00042570225194536363,
      "loss": 3.1329,
      "step": 83493
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.731017827987671,
      "learning_rate": 0.00042569853777370937,
      "loss": 2.7535,
      "step": 83494
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8264975547790527,
      "learning_rate": 0.00042569482357868545,
      "loss": 2.8582,
      "step": 83495
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.483886957168579,
      "learning_rate": 0.00042569110936029243,
      "loss": 3.1549,
      "step": 83496
    },
    {
      "epoch": 1.09,
      "grad_norm": 4.5533576011657715,
      "learning_rate": 0.00042568739511853105,
      "loss": 3.0194,
      "step": 83497
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.022970199584961,
      "learning_rate": 0.00042568368085340203,
      "loss": 3.0559,
      "step": 83498
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.863999366760254,
      "learning_rate": 0.000425679966564906,
      "loss": 3.0858,
      "step": 83499
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.2759761810302734,
      "learning_rate": 0.00042567625225304375,
      "loss": 3.0534,
      "step": 83500
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.540304660797119,
      "learning_rate": 0.00042567253791781596,
      "loss": 3.0465,
      "step": 83501
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.818244218826294,
      "learning_rate": 0.0004256688235592232,
      "loss": 2.7662,
      "step": 83502
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.595000147819519,
      "learning_rate": 0.00042566510917726617,
      "loss": 3.1254,
      "step": 83503
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.647595167160034,
      "learning_rate": 0.0004256613947719458,
      "loss": 2.94,
      "step": 83504
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.4994051456451416,
      "learning_rate": 0.00042565768034326243,
      "loss": 3.263,
      "step": 83505
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.4545209407806396,
      "learning_rate": 0.000425653965891217,
      "loss": 3.1304,
      "step": 83506
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6295000314712524,
      "learning_rate": 0.00042565025141581007,
      "loss": 2.8953,
      "step": 83507
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.405827283859253,
      "learning_rate": 0.0004256465369170425,
      "loss": 2.9679,
      "step": 83508
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9456706047058105,
      "learning_rate": 0.00042564282239491476,
      "loss": 3.0524,
      "step": 83509
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4450215101242065,
      "learning_rate": 0.0004256391078494277,
      "loss": 2.8573,
      "step": 83510
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7278188467025757,
      "learning_rate": 0.000425635393280582,
      "loss": 3.2528,
      "step": 83511
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6824870109558105,
      "learning_rate": 0.0004256316786883782,
      "loss": 3.0051,
      "step": 83512
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7077159881591797,
      "learning_rate": 0.0004256279640728172,
      "loss": 2.977,
      "step": 83513
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.568924903869629,
      "learning_rate": 0.0004256242494338995,
      "loss": 3.1625,
      "step": 83514
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4784072637557983,
      "learning_rate": 0.00042562053477162594,
      "loss": 2.8451,
      "step": 83515
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6813501119613647,
      "learning_rate": 0.00042561682008599713,
      "loss": 3.1039,
      "step": 83516
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8267836570739746,
      "learning_rate": 0.00042561310537701384,
      "loss": 3.0015,
      "step": 83517
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.813457727432251,
      "learning_rate": 0.0004256093906446766,
      "loss": 3.0432,
      "step": 83518
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.634418249130249,
      "learning_rate": 0.0004256056758889863,
      "loss": 2.9152,
      "step": 83519
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8384509086608887,
      "learning_rate": 0.0004256019611099435,
      "loss": 3.1882,
      "step": 83520
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.670454740524292,
      "learning_rate": 0.0004255982463075489,
      "loss": 3.0036,
      "step": 83521
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.859381914138794,
      "learning_rate": 0.00042559453148180323,
      "loss": 2.6408,
      "step": 83522
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.61666738986969,
      "learning_rate": 0.00042559081663270725,
      "loss": 3.0694,
      "step": 83523
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.982425570487976,
      "learning_rate": 0.00042558710176026146,
      "loss": 2.8654,
      "step": 83524
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9057385921478271,
      "learning_rate": 0.0004255833868644666,
      "loss": 2.9631,
      "step": 83525
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5432624816894531,
      "learning_rate": 0.0004255796719453236,
      "loss": 3.1307,
      "step": 83526
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7740956544876099,
      "learning_rate": 0.0004255759570028328,
      "loss": 3.0316,
      "step": 83527
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4526441097259521,
      "learning_rate": 0.00042557224203699514,
      "loss": 2.7581,
      "step": 83528
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5958482027053833,
      "learning_rate": 0.00042556852704781124,
      "loss": 3.2122,
      "step": 83529
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4690158367156982,
      "learning_rate": 0.00042556481203528176,
      "loss": 2.9031,
      "step": 83530
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4934988021850586,
      "learning_rate": 0.0004255610969994074,
      "loss": 3.1051,
      "step": 83531
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9685946702957153,
      "learning_rate": 0.0004255573819401889,
      "loss": 3.0631,
      "step": 83532
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5301164388656616,
      "learning_rate": 0.00042555366685762685,
      "loss": 3.199,
      "step": 83533
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8820903301239014,
      "learning_rate": 0.00042554995175172203,
      "loss": 3.156,
      "step": 83534
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.860998511314392,
      "learning_rate": 0.0004255462366224751,
      "loss": 3.0727,
      "step": 83535
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6104737520217896,
      "learning_rate": 0.0004255425214698868,
      "loss": 3.2332,
      "step": 83536
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9423117637634277,
      "learning_rate": 0.0004255388062939577,
      "loss": 2.8072,
      "step": 83537
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.3601810932159424,
      "learning_rate": 0.0004255350910946887,
      "loss": 3.0211,
      "step": 83538
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5537481307983398,
      "learning_rate": 0.0004255313758720802,
      "loss": 3.2831,
      "step": 83539
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.058821439743042,
      "learning_rate": 0.0004255276606261331,
      "loss": 3.0234,
      "step": 83540
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.989139199256897,
      "learning_rate": 0.00042552394535684806,
      "loss": 3.2906,
      "step": 83541
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5530672073364258,
      "learning_rate": 0.00042552023006422573,
      "loss": 3.1104,
      "step": 83542
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8779107332229614,
      "learning_rate": 0.0004255165147482668,
      "loss": 3.0973,
      "step": 83543
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7496473789215088,
      "learning_rate": 0.0004255127994089721,
      "loss": 2.9277,
      "step": 83544
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.922692894935608,
      "learning_rate": 0.00042550908404634206,
      "loss": 2.9527,
      "step": 83545
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8794169425964355,
      "learning_rate": 0.0004255053686603776,
      "loss": 2.981,
      "step": 83546
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.264427661895752,
      "learning_rate": 0.0004255016532510793,
      "loss": 3.1606,
      "step": 83547
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7340279817581177,
      "learning_rate": 0.0004254979378184478,
      "loss": 3.0628,
      "step": 83548
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.723915696144104,
      "learning_rate": 0.0004254942223624839,
      "loss": 3.2451,
      "step": 83549
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9061448574066162,
      "learning_rate": 0.00042549050688318836,
      "loss": 3.05,
      "step": 83550
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8186008930206299,
      "learning_rate": 0.00042548679138056165,
      "loss": 3.0064,
      "step": 83551
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5415360927581787,
      "learning_rate": 0.0004254830758546046,
      "loss": 3.0219,
      "step": 83552
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.8133530616760254,
      "learning_rate": 0.0004254793603053179,
      "loss": 3.0281,
      "step": 83553
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.0909106731414795,
      "learning_rate": 0.0004254756447327022,
      "loss": 3.1239,
      "step": 83554
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.2032313346862793,
      "learning_rate": 0.0004254719291367582,
      "loss": 2.9415,
      "step": 83555
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7870053052902222,
      "learning_rate": 0.0004254682135174867,
      "loss": 2.8338,
      "step": 83556
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9705694913864136,
      "learning_rate": 0.00042546449787488814,
      "loss": 3.0382,
      "step": 83557
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.0868046283721924,
      "learning_rate": 0.00042546078220896346,
      "loss": 3.0069,
      "step": 83558
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5730103254318237,
      "learning_rate": 0.00042545706651971326,
      "loss": 3.0644,
      "step": 83559
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4267438650131226,
      "learning_rate": 0.0004254533508071382,
      "loss": 3.0068,
      "step": 83560
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7323282957077026,
      "learning_rate": 0.00042544963507123887,
      "loss": 3.0892,
      "step": 83561
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8662480115890503,
      "learning_rate": 0.00042544591931201626,
      "loss": 3.1015,
      "step": 83562
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9260720014572144,
      "learning_rate": 0.0004254422035294708,
      "loss": 2.9458,
      "step": 83563
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.846848487854004,
      "learning_rate": 0.00042543848772360325,
      "loss": 3.1102,
      "step": 83564
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1941041946411133,
      "learning_rate": 0.0004254347718944144,
      "loss": 3.0518,
      "step": 83565
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.148193359375,
      "learning_rate": 0.00042543105604190477,
      "loss": 3.05,
      "step": 83566
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7630826234817505,
      "learning_rate": 0.00042542734016607524,
      "loss": 3.035,
      "step": 83567
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.711573839187622,
      "learning_rate": 0.0004254236242669263,
      "loss": 2.9255,
      "step": 83568
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.4263978004455566,
      "learning_rate": 0.0004254199083444588,
      "loss": 3.1831,
      "step": 83569
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5502477884292603,
      "learning_rate": 0.0004254161923986733,
      "loss": 2.9033,
      "step": 83570
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9855577945709229,
      "learning_rate": 0.0004254124764295706,
      "loss": 3.0672,
      "step": 83571
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8634284734725952,
      "learning_rate": 0.00042540876043715146,
      "loss": 2.8306,
      "step": 83572
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.5993943214416504,
      "learning_rate": 0.0004254050444214163,
      "loss": 3.0909,
      "step": 83573
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6959562301635742,
      "learning_rate": 0.00042540132838236606,
      "loss": 3.2446,
      "step": 83574
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8374584913253784,
      "learning_rate": 0.0004253976123200013,
      "loss": 3.0926,
      "step": 83575
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.103379726409912,
      "learning_rate": 0.00042539389623432285,
      "loss": 3.077,
      "step": 83576
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.15266752243042,
      "learning_rate": 0.0004253901801253312,
      "loss": 2.9111,
      "step": 83577
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.703542709350586,
      "learning_rate": 0.0004253864639930272,
      "loss": 3.0442,
      "step": 83578
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8324930667877197,
      "learning_rate": 0.0004253827478374115,
      "loss": 2.8252,
      "step": 83579
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.63795804977417,
      "learning_rate": 0.0004253790316584848,
      "loss": 2.8187,
      "step": 83580
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.3018205165863037,
      "learning_rate": 0.0004253753154562477,
      "loss": 3.0252,
      "step": 83581
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.4123475551605225,
      "learning_rate": 0.00042537159923070094,
      "loss": 2.9065,
      "step": 83582
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.72860586643219,
      "learning_rate": 0.00042536788298184536,
      "loss": 3.0373,
      "step": 83583
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5563675165176392,
      "learning_rate": 0.00042536416670968145,
      "loss": 3.0061,
      "step": 83584
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.7338640689849854,
      "learning_rate": 0.00042536045041420995,
      "loss": 3.4119,
      "step": 83585
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.7361037731170654,
      "learning_rate": 0.0004253567340954316,
      "loss": 3.0847,
      "step": 83586
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8656455278396606,
      "learning_rate": 0.0004253530177533471,
      "loss": 2.6766,
      "step": 83587
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5570476055145264,
      "learning_rate": 0.00042534930138795707,
      "loss": 2.8499,
      "step": 83588
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.839348793029785,
      "learning_rate": 0.00042534558499926224,
      "loss": 3.094,
      "step": 83589
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.957164764404297,
      "learning_rate": 0.00042534186858726334,
      "loss": 3.1513,
      "step": 83590
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.435978889465332,
      "learning_rate": 0.00042533815215196094,
      "loss": 3.0024,
      "step": 83591
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5667381286621094,
      "learning_rate": 0.0004253344356933559,
      "loss": 2.9591,
      "step": 83592
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.9216291904449463,
      "learning_rate": 0.00042533071921144883,
      "loss": 2.8691,
      "step": 83593
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.0311594009399414,
      "learning_rate": 0.0004253270027062403,
      "loss": 3.21,
      "step": 83594
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.6785454750061035,
      "learning_rate": 0.00042532328617773116,
      "loss": 3.0474,
      "step": 83595
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.706851601600647,
      "learning_rate": 0.0004253195696259222,
      "loss": 3.0822,
      "step": 83596
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.117520809173584,
      "learning_rate": 0.00042531585305081375,
      "loss": 2.8503,
      "step": 83597
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.964327812194824,
      "learning_rate": 0.0004253121364524068,
      "loss": 3.0675,
      "step": 83598
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.7851407527923584,
      "learning_rate": 0.000425308419830702,
      "loss": 2.9818,
      "step": 83599
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4680055379867554,
      "learning_rate": 0.0004253047031857,
      "loss": 2.9711,
      "step": 83600
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.2914981842041016,
      "learning_rate": 0.0004253009865174014,
      "loss": 2.7319,
      "step": 83601
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7014272212982178,
      "learning_rate": 0.0004252972698258071,
      "loss": 3.0328,
      "step": 83602
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.5400846004486084,
      "learning_rate": 0.00042529355311091756,
      "loss": 2.8691,
      "step": 83603
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.219022750854492,
      "learning_rate": 0.0004252898363727337,
      "loss": 3.1169,
      "step": 83604
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.642992377281189,
      "learning_rate": 0.00042528611961125605,
      "loss": 3.0351,
      "step": 83605
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.0795300006866455,
      "learning_rate": 0.00042528240282648525,
      "loss": 3.0368,
      "step": 83606
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7779631614685059,
      "learning_rate": 0.00042527868601842215,
      "loss": 2.7804,
      "step": 83607
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6870793104171753,
      "learning_rate": 0.00042527496918706747,
      "loss": 2.8655,
      "step": 83608
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5145893096923828,
      "learning_rate": 0.00042527125233242174,
      "loss": 3.1675,
      "step": 83609
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6687061786651611,
      "learning_rate": 0.0004252675354544856,
      "loss": 2.8789,
      "step": 83610
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.225026845932007,
      "learning_rate": 0.00042526381855326,
      "loss": 3.1322,
      "step": 83611
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.473737359046936,
      "learning_rate": 0.0004252601016287455,
      "loss": 3.0714,
      "step": 83612
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5782922506332397,
      "learning_rate": 0.0004252563846809427,
      "loss": 3.1342,
      "step": 83613
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6828711032867432,
      "learning_rate": 0.00042525266770985246,
      "loss": 3.0661,
      "step": 83614
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.3062546253204346,
      "learning_rate": 0.00042524895071547534,
      "loss": 2.9364,
      "step": 83615
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.498741865158081,
      "learning_rate": 0.0004252452336978121,
      "loss": 2.9458,
      "step": 83616
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.855069637298584,
      "learning_rate": 0.00042524151665686337,
      "loss": 3.094,
      "step": 83617
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7718836069107056,
      "learning_rate": 0.0004252377995926299,
      "loss": 2.976,
      "step": 83618
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5970664024353027,
      "learning_rate": 0.0004252340825051123,
      "loss": 2.8825,
      "step": 83619
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.592207431793213,
      "learning_rate": 0.00042523036539431145,
      "loss": 3.0368,
      "step": 83620
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8081690073013306,
      "learning_rate": 0.00042522664826022784,
      "loss": 2.9097,
      "step": 83621
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5895886421203613,
      "learning_rate": 0.0004252229311028622,
      "loss": 3.0072,
      "step": 83622
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5862655639648438,
      "learning_rate": 0.0004252192139222153,
      "loss": 2.8845,
      "step": 83623
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.258425235748291,
      "learning_rate": 0.0004252154967182877,
      "loss": 3.0958,
      "step": 83624
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1981000900268555,
      "learning_rate": 0.00042521177949108024,
      "loss": 3.22,
      "step": 83625
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6663856506347656,
      "learning_rate": 0.00042520806224059354,
      "loss": 2.7086,
      "step": 83626
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.4420692920684814,
      "learning_rate": 0.00042520434496682837,
      "loss": 3.1817,
      "step": 83627
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9987976551055908,
      "learning_rate": 0.00042520062766978527,
      "loss": 2.946,
      "step": 83628
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8400672674179077,
      "learning_rate": 0.000425196910349465,
      "loss": 2.9498,
      "step": 83629
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.434852123260498,
      "learning_rate": 0.00042519319300586827,
      "loss": 3.0375,
      "step": 83630
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.2739267349243164,
      "learning_rate": 0.00042518947563899577,
      "loss": 2.9159,
      "step": 83631
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.33064341545105,
      "learning_rate": 0.0004251857582488482,
      "loss": 3.1028,
      "step": 83632
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4839171171188354,
      "learning_rate": 0.0004251820408354262,
      "loss": 2.8116,
      "step": 83633
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.2461369037628174,
      "learning_rate": 0.00042517832339873055,
      "loss": 3.1428,
      "step": 83634
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.208761215209961,
      "learning_rate": 0.0004251746059387619,
      "loss": 2.945,
      "step": 83635
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6753747463226318,
      "learning_rate": 0.00042517088845552085,
      "loss": 3.0,
      "step": 83636
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.143310308456421,
      "learning_rate": 0.00042516717094900815,
      "loss": 3.2067,
      "step": 83637
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.522902011871338,
      "learning_rate": 0.00042516345341922463,
      "loss": 2.8807,
      "step": 83638
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9287970066070557,
      "learning_rate": 0.00042515973586617077,
      "loss": 2.7934,
      "step": 83639
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5404608249664307,
      "learning_rate": 0.0004251560182898474,
      "loss": 3.2266,
      "step": 83640
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.2289516925811768,
      "learning_rate": 0.00042515230069025507,
      "loss": 3.3558,
      "step": 83641
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.312929153442383,
      "learning_rate": 0.00042514858306739474,
      "loss": 3.067,
      "step": 83642
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6405153274536133,
      "learning_rate": 0.0004251448654212668,
      "loss": 3.0124,
      "step": 83643
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.776166319847107,
      "learning_rate": 0.00042514114775187207,
      "loss": 2.7769,
      "step": 83644
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.487189769744873,
      "learning_rate": 0.00042513743005921124,
      "loss": 2.921,
      "step": 83645
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.629501223564148,
      "learning_rate": 0.000425133712343285,
      "loss": 2.6895,
      "step": 83646
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7331398725509644,
      "learning_rate": 0.0004251299946040941,
      "loss": 2.9953,
      "step": 83647
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5144010782241821,
      "learning_rate": 0.00042512627684163914,
      "loss": 3.1803,
      "step": 83648
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7733358144760132,
      "learning_rate": 0.0004251225590559208,
      "loss": 3.1488,
      "step": 83649
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.125901937484741,
      "learning_rate": 0.0004251188412469398,
      "loss": 2.9776,
      "step": 83650
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7384244203567505,
      "learning_rate": 0.0004251151234146969,
      "loss": 3.0327,
      "step": 83651
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.748670220375061,
      "learning_rate": 0.0004251114055591927,
      "loss": 3.0042,
      "step": 83652
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.416627883911133,
      "learning_rate": 0.00042510768768042804,
      "loss": 3.1824,
      "step": 83653
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7909224033355713,
      "learning_rate": 0.00042510396977840344,
      "loss": 2.9651,
      "step": 83654
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5300438404083252,
      "learning_rate": 0.0004251002518531196,
      "loss": 3.3348,
      "step": 83655
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6295043230056763,
      "learning_rate": 0.0004250965339045772,
      "loss": 3.0546,
      "step": 83656
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.526630401611328,
      "learning_rate": 0.0004250928159327772,
      "loss": 3.1693,
      "step": 83657
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6558128595352173,
      "learning_rate": 0.0004250890979377199,
      "loss": 3.0265,
      "step": 83658
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.715808629989624,
      "learning_rate": 0.0004250853799194063,
      "loss": 3.0062,
      "step": 83659
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8351950645446777,
      "learning_rate": 0.00042508166187783694,
      "loss": 2.9781,
      "step": 83660
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5470631122589111,
      "learning_rate": 0.00042507794381301244,
      "loss": 3.1227,
      "step": 83661
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7646455764770508,
      "learning_rate": 0.0004250742257249337,
      "loss": 2.9896,
      "step": 83662
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7932493686676025,
      "learning_rate": 0.0004250705076136012,
      "loss": 2.9094,
      "step": 83663
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.089421033859253,
      "learning_rate": 0.00042506678947901584,
      "loss": 2.9347,
      "step": 83664
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7335563898086548,
      "learning_rate": 0.00042506307132117807,
      "loss": 3.1085,
      "step": 83665
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.135436773300171,
      "learning_rate": 0.0004250593531400888,
      "loss": 2.9178,
      "step": 83666
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.2491841316223145,
      "learning_rate": 0.0004250556349357486,
      "loss": 3.008,
      "step": 83667
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.0917322635650635,
      "learning_rate": 0.0004250519167081583,
      "loss": 2.9985,
      "step": 83668
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.671985387802124,
      "learning_rate": 0.0004250481984573184,
      "loss": 3.0788,
      "step": 83669
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7072911262512207,
      "learning_rate": 0.0004250444801832297,
      "loss": 3.0408,
      "step": 83670
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.0451080799102783,
      "learning_rate": 0.0004250407618858928,
      "loss": 3.2585,
      "step": 83671
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8157786130905151,
      "learning_rate": 0.0004250370435653086,
      "loss": 2.6888,
      "step": 83672
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6276379823684692,
      "learning_rate": 0.0004250333252214775,
      "loss": 3.001,
      "step": 83673
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8062652349472046,
      "learning_rate": 0.00042502960685440044,
      "loss": 3.1652,
      "step": 83674
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4250329732894897,
      "learning_rate": 0.00042502588846407807,
      "loss": 3.1019,
      "step": 83675
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9280765056610107,
      "learning_rate": 0.00042502217005051094,
      "loss": 2.9954,
      "step": 83676
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5341155529022217,
      "learning_rate": 0.0004250184516136998,
      "loss": 3.1771,
      "step": 83677
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4988240003585815,
      "learning_rate": 0.00042501473315364547,
      "loss": 2.7832,
      "step": 83678
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.662986397743225,
      "learning_rate": 0.0004250110146703485,
      "loss": 2.8609,
      "step": 83679
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.031080484390259,
      "learning_rate": 0.0004250072961638096,
      "loss": 3.0878,
      "step": 83680
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7089680433273315,
      "learning_rate": 0.0004250035776340295,
      "loss": 3.0713,
      "step": 83681
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1121087074279785,
      "learning_rate": 0.0004249998590810088,
      "loss": 2.9607,
      "step": 83682
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.6434028148651123,
      "learning_rate": 0.0004249961405047483,
      "loss": 2.9436,
      "step": 83683
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9792896509170532,
      "learning_rate": 0.0004249924219052488,
      "loss": 3.0111,
      "step": 83684
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.57958722114563,
      "learning_rate": 0.00042498870328251074,
      "loss": 2.7151,
      "step": 83685
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6788281202316284,
      "learning_rate": 0.0004249849846365349,
      "loss": 3.5397,
      "step": 83686
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8555251359939575,
      "learning_rate": 0.000424981265967322,
      "loss": 3.1838,
      "step": 83687
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9527305364608765,
      "learning_rate": 0.00042497754727487277,
      "loss": 3.1898,
      "step": 83688
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9217487573623657,
      "learning_rate": 0.0004249738285591878,
      "loss": 3.0936,
      "step": 83689
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.517786741256714,
      "learning_rate": 0.00042497010982026794,
      "loss": 2.9,
      "step": 83690
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6662843227386475,
      "learning_rate": 0.00042496639105811366,
      "loss": 2.9646,
      "step": 83691
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5965067148208618,
      "learning_rate": 0.00042496267227272574,
      "loss": 2.9904,
      "step": 83692
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4880093336105347,
      "learning_rate": 0.00042495895346410505,
      "loss": 2.979,
      "step": 83693
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5993666648864746,
      "learning_rate": 0.0004249552346322521,
      "loss": 2.9284,
      "step": 83694
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9652594327926636,
      "learning_rate": 0.0004249515157771674,
      "loss": 2.8258,
      "step": 83695
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.593872308731079,
      "learning_rate": 0.00042494779689885215,
      "loss": 3.2252,
      "step": 83696
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8172134160995483,
      "learning_rate": 0.00042494407799730655,
      "loss": 2.9392,
      "step": 83697
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8721835613250732,
      "learning_rate": 0.0004249403590725316,
      "loss": 3.1628,
      "step": 83698
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.013929843902588,
      "learning_rate": 0.0004249366401245278,
      "loss": 3.0534,
      "step": 83699
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5469669103622437,
      "learning_rate": 0.00042493292115329594,
      "loss": 2.8139,
      "step": 83700
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6272897720336914,
      "learning_rate": 0.0004249292021588367,
      "loss": 3.3942,
      "step": 83701
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1122961044311523,
      "learning_rate": 0.00042492548314115077,
      "loss": 2.9458,
      "step": 83702
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5879132747650146,
      "learning_rate": 0.0004249217641002389,
      "loss": 3.0439,
      "step": 83703
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.867182970046997,
      "learning_rate": 0.00042491804503610157,
      "loss": 2.9278,
      "step": 83704
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5958744287490845,
      "learning_rate": 0.00042491432594873976,
      "loss": 3.0943,
      "step": 83705
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.595266580581665,
      "learning_rate": 0.0004249106068381539,
      "loss": 3.2094,
      "step": 83706
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5788021087646484,
      "learning_rate": 0.0004249068877043448,
      "loss": 2.8868,
      "step": 83707
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.79690682888031,
      "learning_rate": 0.0004249031685473133,
      "loss": 3.0375,
      "step": 83708
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5655336380004883,
      "learning_rate": 0.0004248994493670598,
      "loss": 3.0946,
      "step": 83709
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5507159233093262,
      "learning_rate": 0.00042489573016358514,
      "loss": 2.9538,
      "step": 83710
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8554985523223877,
      "learning_rate": 0.0004248920109368901,
      "loss": 3.0235,
      "step": 83711
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7607702016830444,
      "learning_rate": 0.00042488829168697525,
      "loss": 2.704,
      "step": 83712
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6015183925628662,
      "learning_rate": 0.0004248845724138412,
      "loss": 2.7745,
      "step": 83713
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7249462604522705,
      "learning_rate": 0.00042488085311748885,
      "loss": 3.112,
      "step": 83714
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9522596597671509,
      "learning_rate": 0.00042487713379791886,
      "loss": 3.1878,
      "step": 83715
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5622423887252808,
      "learning_rate": 0.0004248734144551317,
      "loss": 3.066,
      "step": 83716
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.532822608947754,
      "learning_rate": 0.0004248696950891283,
      "loss": 3.1192,
      "step": 83717
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5299453735351562,
      "learning_rate": 0.0004248659756999093,
      "loss": 3.085,
      "step": 83718
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6282285451889038,
      "learning_rate": 0.0004248622562874753,
      "loss": 3.0039,
      "step": 83719
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.676875352859497,
      "learning_rate": 0.00042485853685182706,
      "loss": 2.9063,
      "step": 83720
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.921797752380371,
      "learning_rate": 0.0004248548173929653,
      "loss": 2.9504,
      "step": 83721
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5721091032028198,
      "learning_rate": 0.0004248510979108906,
      "loss": 3.0037,
      "step": 83722
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8011447191238403,
      "learning_rate": 0.0004248473784056037,
      "loss": 3.0098,
      "step": 83723
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.092170476913452,
      "learning_rate": 0.00042484365887710543,
      "loss": 3.1551,
      "step": 83724
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.2743659019470215,
      "learning_rate": 0.0004248399393253963,
      "loss": 3.0329,
      "step": 83725
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.2064361572265625,
      "learning_rate": 0.00042483621975047703,
      "loss": 2.988,
      "step": 83726
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.505293130874634,
      "learning_rate": 0.0004248325001523485,
      "loss": 3.0622,
      "step": 83727
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5318350791931152,
      "learning_rate": 0.00042482878053101117,
      "loss": 2.9993,
      "step": 83728
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1318359375,
      "learning_rate": 0.0004248250608864657,
      "loss": 3.1932,
      "step": 83729
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.927025318145752,
      "learning_rate": 0.00042482134121871304,
      "loss": 3.0067,
      "step": 83730
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4945957660675049,
      "learning_rate": 0.0004248176215277537,
      "loss": 2.8826,
      "step": 83731
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9792015552520752,
      "learning_rate": 0.0004248139018135884,
      "loss": 3.1437,
      "step": 83732
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7534666061401367,
      "learning_rate": 0.00042481018207621784,
      "loss": 2.8821,
      "step": 83733
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6421517133712769,
      "learning_rate": 0.0004248064623156427,
      "loss": 3.1249,
      "step": 83734
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.5043082237243652,
      "learning_rate": 0.0004248027425318637,
      "loss": 3.0034,
      "step": 83735
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7524995803833008,
      "learning_rate": 0.0004247990227248815,
      "loss": 2.9209,
      "step": 83736
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8356263637542725,
      "learning_rate": 0.00042479530289469674,
      "loss": 3.2422,
      "step": 83737
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7331770658493042,
      "learning_rate": 0.00042479158304131027,
      "loss": 3.2103,
      "step": 83738
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8828126192092896,
      "learning_rate": 0.0004247878631647227,
      "loss": 2.9242,
      "step": 83739
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8256558179855347,
      "learning_rate": 0.0004247841432649347,
      "loss": 3.0523,
      "step": 83740
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.3571462631225586,
      "learning_rate": 0.00042478042334194693,
      "loss": 2.9029,
      "step": 83741
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.697686791419983,
      "learning_rate": 0.0004247767033957602,
      "loss": 2.7539,
      "step": 83742
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5120820999145508,
      "learning_rate": 0.000424772983426375,
      "loss": 3.1439,
      "step": 83743
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.728865146636963,
      "learning_rate": 0.00042476926343379217,
      "loss": 2.8977,
      "step": 83744
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7788469791412354,
      "learning_rate": 0.0004247655434180125,
      "loss": 2.9529,
      "step": 83745
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7373650074005127,
      "learning_rate": 0.00042476182337903646,
      "loss": 3.2101,
      "step": 83746
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7287991046905518,
      "learning_rate": 0.00042475810331686484,
      "loss": 2.8311,
      "step": 83747
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1000874042510986,
      "learning_rate": 0.0004247543832314983,
      "loss": 3.161,
      "step": 83748
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.7784059047698975,
      "learning_rate": 0.0004247506631229377,
      "loss": 3.0904,
      "step": 83749
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1193504333496094,
      "learning_rate": 0.0004247469429911835,
      "loss": 2.8803,
      "step": 83750
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5443984270095825,
      "learning_rate": 0.0004247432228362365,
      "loss": 2.7753,
      "step": 83751
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7304069995880127,
      "learning_rate": 0.0004247395026580974,
      "loss": 3.0961,
      "step": 83752
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.986738681793213,
      "learning_rate": 0.0004247357824567669,
      "loss": 3.087,
      "step": 83753
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.380361557006836,
      "learning_rate": 0.0004247320622322456,
      "loss": 3.2307,
      "step": 83754
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5044621229171753,
      "learning_rate": 0.0004247283419845342,
      "loss": 3.0161,
      "step": 83755
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.172323703765869,
      "learning_rate": 0.0004247246217136336,
      "loss": 3.0162,
      "step": 83756
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9082846641540527,
      "learning_rate": 0.00042472090141954427,
      "loss": 3.187,
      "step": 83757
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.506650686264038,
      "learning_rate": 0.00042471718110226694,
      "loss": 2.915,
      "step": 83758
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7273422479629517,
      "learning_rate": 0.00042471346076180237,
      "loss": 3.0021,
      "step": 83759
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.2956080436706543,
      "learning_rate": 0.0004247097403981512,
      "loss": 3.2414,
      "step": 83760
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.3795669078826904,
      "learning_rate": 0.0004247060200113141,
      "loss": 2.9203,
      "step": 83761
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7258307933807373,
      "learning_rate": 0.00042470229960129186,
      "loss": 3.2482,
      "step": 83762
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.3425042629241943,
      "learning_rate": 0.0004246985791680851,
      "loss": 2.905,
      "step": 83763
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.302006483078003,
      "learning_rate": 0.00042469485871169445,
      "loss": 3.0435,
      "step": 83764
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.4995310306549072,
      "learning_rate": 0.0004246911382321207,
      "loss": 3.0065,
      "step": 83765
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.306021213531494,
      "learning_rate": 0.00042468741772936464,
      "loss": 3.0754,
      "step": 83766
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.835978627204895,
      "learning_rate": 0.0004246836972034267,
      "loss": 3.2076,
      "step": 83767
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.575333833694458,
      "learning_rate": 0.0004246799766543077,
      "loss": 2.9006,
      "step": 83768
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.0447182655334473,
      "learning_rate": 0.00042467625608200843,
      "loss": 3.0357,
      "step": 83769
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9916597604751587,
      "learning_rate": 0.00042467253548652943,
      "loss": 3.0793,
      "step": 83770
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.3361668586730957,
      "learning_rate": 0.00042466881486787143,
      "loss": 2.9358,
      "step": 83771
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.954067349433899,
      "learning_rate": 0.00042466509422603525,
      "loss": 2.7579,
      "step": 83772
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8484598398208618,
      "learning_rate": 0.00042466137356102137,
      "loss": 3.0678,
      "step": 83773
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.393235445022583,
      "learning_rate": 0.0004246576528728306,
      "loss": 2.9666,
      "step": 83774
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.3600566387176514,
      "learning_rate": 0.0004246539321614636,
      "loss": 3.0407,
      "step": 83775
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7616338729858398,
      "learning_rate": 0.0004246502114269212,
      "loss": 3.0562,
      "step": 83776
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.309654951095581,
      "learning_rate": 0.0004246464906692039,
      "loss": 3.0684,
      "step": 83777
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5826040506362915,
      "learning_rate": 0.0004246427698883124,
      "loss": 2.8684,
      "step": 83778
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.827736258506775,
      "learning_rate": 0.00042463904908424756,
      "loss": 2.9599,
      "step": 83779
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.192809581756592,
      "learning_rate": 0.0004246353282570099,
      "loss": 3.1447,
      "step": 83780
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.8294434547424316,
      "learning_rate": 0.0004246316074066002,
      "loss": 2.9389,
      "step": 83781
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4458723068237305,
      "learning_rate": 0.00042462788653301915,
      "loss": 3.2249,
      "step": 83782
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.571115493774414,
      "learning_rate": 0.00042462416563626736,
      "loss": 3.0516,
      "step": 83783
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.282823324203491,
      "learning_rate": 0.00042462044471634563,
      "loss": 2.7686,
      "step": 83784
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7415562868118286,
      "learning_rate": 0.00042461672377325466,
      "loss": 2.9004,
      "step": 83785
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.746264696121216,
      "learning_rate": 0.00042461300280699504,
      "loss": 3.0813,
      "step": 83786
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.6118123531341553,
      "learning_rate": 0.00042460928181756744,
      "loss": 3.0756,
      "step": 83787
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7708572149276733,
      "learning_rate": 0.0004246055608049727,
      "loss": 3.0495,
      "step": 83788
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5575497150421143,
      "learning_rate": 0.00042460183976921145,
      "loss": 2.9334,
      "step": 83789
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.039811372756958,
      "learning_rate": 0.0004245981187102844,
      "loss": 2.8087,
      "step": 83790
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.37876033782959,
      "learning_rate": 0.00042459439762819207,
      "loss": 3.1992,
      "step": 83791
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5906902551651,
      "learning_rate": 0.00042459067652293535,
      "loss": 3.102,
      "step": 83792
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6564819812774658,
      "learning_rate": 0.00042458695539451493,
      "loss": 3.0439,
      "step": 83793
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5493147373199463,
      "learning_rate": 0.00042458323424293145,
      "loss": 3.2355,
      "step": 83794
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.368340253829956,
      "learning_rate": 0.0004245795130681855,
      "loss": 2.9121,
      "step": 83795
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8862247467041016,
      "learning_rate": 0.0004245757918702779,
      "loss": 3.2203,
      "step": 83796
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.541771650314331,
      "learning_rate": 0.0004245720706492093,
      "loss": 3.1107,
      "step": 83797
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.322305679321289,
      "learning_rate": 0.00042456834940498043,
      "loss": 2.9721,
      "step": 83798
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.982053518295288,
      "learning_rate": 0.00042456462813759196,
      "loss": 2.9888,
      "step": 83799
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9033139944076538,
      "learning_rate": 0.0004245609068470446,
      "loss": 3.3122,
      "step": 83800
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.322246551513672,
      "learning_rate": 0.0004245571855333389,
      "loss": 3.1696,
      "step": 83801
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6665705442428589,
      "learning_rate": 0.0004245534641964758,
      "loss": 3.0293,
      "step": 83802
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.470023274421692,
      "learning_rate": 0.00042454974283645584,
      "loss": 2.8884,
      "step": 83803
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6581170558929443,
      "learning_rate": 0.0004245460214532796,
      "loss": 2.934,
      "step": 83804
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.51088547706604,
      "learning_rate": 0.0004245423000469481,
      "loss": 2.9079,
      "step": 83805
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9011417627334595,
      "learning_rate": 0.0004245385786174617,
      "loss": 3.0903,
      "step": 83806
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.945454478263855,
      "learning_rate": 0.00042453485716482123,
      "loss": 3.0197,
      "step": 83807
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.8942670822143555,
      "learning_rate": 0.0004245311356890274,
      "loss": 2.9555,
      "step": 83808
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7630343437194824,
      "learning_rate": 0.0004245274141900809,
      "loss": 2.9497,
      "step": 83809
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7257188558578491,
      "learning_rate": 0.00042452369266798243,
      "loss": 3.1802,
      "step": 83810
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.2757518291473389,
      "learning_rate": 0.00042451997112273257,
      "loss": 2.9871,
      "step": 83811
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5131853818893433,
      "learning_rate": 0.0004245162495543322,
      "loss": 2.8613,
      "step": 83812
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1640167236328125,
      "learning_rate": 0.00042451252796278183,
      "loss": 2.7672,
      "step": 83813
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4155267477035522,
      "learning_rate": 0.00042450880634808225,
      "loss": 3.0311,
      "step": 83814
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4199000597000122,
      "learning_rate": 0.00042450508471023415,
      "loss": 3.0726,
      "step": 83815
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.4538474082946777,
      "learning_rate": 0.0004245013630492383,
      "loss": 2.9151,
      "step": 83816
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.137014865875244,
      "learning_rate": 0.0004244976413650951,
      "loss": 2.9606,
      "step": 83817
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5984898805618286,
      "learning_rate": 0.00042449391965780555,
      "loss": 3.2533,
      "step": 83818
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.427717685699463,
      "learning_rate": 0.0004244901979273702,
      "loss": 3.0678,
      "step": 83819
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.2128829956054688,
      "learning_rate": 0.00042448647617378976,
      "loss": 2.8736,
      "step": 83820
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6248549222946167,
      "learning_rate": 0.000424482754397065,
      "loss": 3.0636,
      "step": 83821
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.680056095123291,
      "learning_rate": 0.0004244790325971965,
      "loss": 2.9712,
      "step": 83822
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1693859100341797,
      "learning_rate": 0.000424475310774185,
      "loss": 3.1046,
      "step": 83823
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7616490125656128,
      "learning_rate": 0.0004244715889280312,
      "loss": 2.926,
      "step": 83824
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.593798875808716,
      "learning_rate": 0.0004244678670587358,
      "loss": 2.9276,
      "step": 83825
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.4590022563934326,
      "learning_rate": 0.0004244641451662994,
      "loss": 3.0137,
      "step": 83826
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8939753770828247,
      "learning_rate": 0.00042446042325072287,
      "loss": 3.0804,
      "step": 83827
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9698946475982666,
      "learning_rate": 0.0004244567013120067,
      "loss": 3.015,
      "step": 83828
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9191157817840576,
      "learning_rate": 0.0004244529793501517,
      "loss": 2.9959,
      "step": 83829
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.707600474357605,
      "learning_rate": 0.00042444925736515856,
      "loss": 2.9005,
      "step": 83830
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5428332090377808,
      "learning_rate": 0.00042444553535702793,
      "loss": 3.0487,
      "step": 83831
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.977732539176941,
      "learning_rate": 0.0004244418133257606,
      "loss": 2.9962,
      "step": 83832
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.556107521057129,
      "learning_rate": 0.0004244380912713571,
      "loss": 2.9643,
      "step": 83833
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4382292032241821,
      "learning_rate": 0.00042443436919381827,
      "loss": 2.9544,
      "step": 83834
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.599606990814209,
      "learning_rate": 0.0004244306470931447,
      "loss": 3.1059,
      "step": 83835
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5465147495269775,
      "learning_rate": 0.00042442692496933717,
      "loss": 2.9152,
      "step": 83836
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.024536609649658,
      "learning_rate": 0.0004244232028223963,
      "loss": 2.9509,
      "step": 83837
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7974367141723633,
      "learning_rate": 0.0004244194806523228,
      "loss": 3.2053,
      "step": 83838
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7834124565124512,
      "learning_rate": 0.0004244157584591173,
      "loss": 3.0806,
      "step": 83839
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5801804065704346,
      "learning_rate": 0.0004244120362427807,
      "loss": 2.7967,
      "step": 83840
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6225107908248901,
      "learning_rate": 0.0004244083140033135,
      "loss": 2.822,
      "step": 83841
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.744094729423523,
      "learning_rate": 0.00042440459174071646,
      "loss": 3.0304,
      "step": 83842
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.627354621887207,
      "learning_rate": 0.00042440086945499017,
      "loss": 2.9348,
      "step": 83843
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6212308406829834,
      "learning_rate": 0.0004243971471461355,
      "loss": 3.0453,
      "step": 83844
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.655216932296753,
      "learning_rate": 0.00042439342481415305,
      "loss": 3.1839,
      "step": 83845
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9431862831115723,
      "learning_rate": 0.0004243897024590435,
      "loss": 2.9597,
      "step": 83846
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.556889295578003,
      "learning_rate": 0.0004243859800808075,
      "loss": 3.0847,
      "step": 83847
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.302818536758423,
      "learning_rate": 0.0004243822576794459,
      "loss": 3.3225,
      "step": 83848
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.193108558654785,
      "learning_rate": 0.0004243785352549593,
      "loss": 3.1516,
      "step": 83849
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7612309455871582,
      "learning_rate": 0.0004243748128073482,
      "loss": 2.9665,
      "step": 83850
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9031542539596558,
      "learning_rate": 0.00042437109033661367,
      "loss": 3.0049,
      "step": 83851
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6145111322402954,
      "learning_rate": 0.0004243673678427561,
      "loss": 3.0023,
      "step": 83852
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.439355731010437,
      "learning_rate": 0.0004243636453257763,
      "loss": 3.3179,
      "step": 83853
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8338544368743896,
      "learning_rate": 0.000424359922785675,
      "loss": 2.9603,
      "step": 83854
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9245314598083496,
      "learning_rate": 0.00042435620022245285,
      "loss": 3.2491,
      "step": 83855
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4809701442718506,
      "learning_rate": 0.0004243524776361105,
      "loss": 2.8885,
      "step": 83856
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7092041969299316,
      "learning_rate": 0.0004243487550266486,
      "loss": 3.0018,
      "step": 83857
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.409085512161255,
      "learning_rate": 0.0004243450323940681,
      "loss": 3.1182,
      "step": 83858
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4383140802383423,
      "learning_rate": 0.00042434130973836935,
      "loss": 2.9488,
      "step": 83859
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.774917483329773,
      "learning_rate": 0.0004243375870595532,
      "loss": 3.0868,
      "step": 83860
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.005558729171753,
      "learning_rate": 0.0004243338643576205,
      "loss": 2.9664,
      "step": 83861
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.014624834060669,
      "learning_rate": 0.00042433014163257166,
      "loss": 2.7059,
      "step": 83862
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.721855878829956,
      "learning_rate": 0.0004243264188844075,
      "loss": 2.8867,
      "step": 83863
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4256635904312134,
      "learning_rate": 0.0004243226961131288,
      "loss": 2.8706,
      "step": 83864
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8794771432876587,
      "learning_rate": 0.0004243189733187361,
      "loss": 3.0563,
      "step": 83865
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.475078821182251,
      "learning_rate": 0.00042431525050123014,
      "loss": 3.0956,
      "step": 83866
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.37189519405365,
      "learning_rate": 0.0004243115276606117,
      "loss": 3.2027,
      "step": 83867
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5173606872558594,
      "learning_rate": 0.00042430780479688133,
      "loss": 3.0647,
      "step": 83868
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8228031396865845,
      "learning_rate": 0.00042430408191003986,
      "loss": 2.6793,
      "step": 83869
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8504338264465332,
      "learning_rate": 0.00042430035900008786,
      "loss": 2.818,
      "step": 83870
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5265650749206543,
      "learning_rate": 0.0004242966360670261,
      "loss": 2.992,
      "step": 83871
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9723659753799438,
      "learning_rate": 0.0004242929131108552,
      "loss": 2.8056,
      "step": 83872
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6387068033218384,
      "learning_rate": 0.000424289190131576,
      "loss": 2.857,
      "step": 83873
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7178891897201538,
      "learning_rate": 0.00042428546712918895,
      "loss": 3.2736,
      "step": 83874
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.3503810167312622,
      "learning_rate": 0.000424281744103695,
      "loss": 3.0342,
      "step": 83875
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7426166534423828,
      "learning_rate": 0.0004242780210550947,
      "loss": 3.0372,
      "step": 83876
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7990583181381226,
      "learning_rate": 0.0004242742979833888,
      "loss": 3.0,
      "step": 83877
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.93961763381958,
      "learning_rate": 0.00042427057488857785,
      "loss": 3.1229,
      "step": 83878
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.677472710609436,
      "learning_rate": 0.00042426685177066287,
      "loss": 2.9348,
      "step": 83879
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.700518250465393,
      "learning_rate": 0.0004242631286296442,
      "loss": 2.9668,
      "step": 83880
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6237245798110962,
      "learning_rate": 0.0004242594054655226,
      "loss": 3.1755,
      "step": 83881
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6948366165161133,
      "learning_rate": 0.0004242556822782989,
      "loss": 2.9437,
      "step": 83882
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8587315082550049,
      "learning_rate": 0.0004242519590679737,
      "loss": 3.1152,
      "step": 83883
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7238006591796875,
      "learning_rate": 0.00042424823583454775,
      "loss": 2.9321,
      "step": 83884
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5752872228622437,
      "learning_rate": 0.00042424451257802175,
      "loss": 3.1566,
      "step": 83885
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.7267229557037354,
      "learning_rate": 0.0004242407892983962,
      "loss": 3.0249,
      "step": 83886
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9954782724380493,
      "learning_rate": 0.0004242370659956721,
      "loss": 3.003,
      "step": 83887
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6324018239974976,
      "learning_rate": 0.0004242333426698499,
      "loss": 2.8665,
      "step": 83888
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8043427467346191,
      "learning_rate": 0.00042422961932093046,
      "loss": 2.9131,
      "step": 83889
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.713392734527588,
      "learning_rate": 0.0004242258959489143,
      "loss": 3.0255,
      "step": 83890
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5710490942001343,
      "learning_rate": 0.0004242221725538023,
      "loss": 3.2168,
      "step": 83891
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6213701963424683,
      "learning_rate": 0.0004242184491355949,
      "loss": 3.1961,
      "step": 83892
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7847706079483032,
      "learning_rate": 0.00042421472569429303,
      "loss": 3.019,
      "step": 83893
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7313859462738037,
      "learning_rate": 0.00042421100222989733,
      "loss": 2.8944,
      "step": 83894
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.507672667503357,
      "learning_rate": 0.0004242072787424084,
      "loss": 2.9212,
      "step": 83895
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5854887962341309,
      "learning_rate": 0.000424203555231827,
      "loss": 2.9757,
      "step": 83896
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5328452587127686,
      "learning_rate": 0.00042419983169815386,
      "loss": 2.9158,
      "step": 83897
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5951077938079834,
      "learning_rate": 0.00042419610814138956,
      "loss": 2.9059,
      "step": 83898
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6818925142288208,
      "learning_rate": 0.0004241923845615349,
      "loss": 3.0109,
      "step": 83899
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.797197937965393,
      "learning_rate": 0.0004241886609585906,
      "loss": 2.9673,
      "step": 83900
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7510007619857788,
      "learning_rate": 0.0004241849373325572,
      "loss": 3.0352,
      "step": 83901
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.791077733039856,
      "learning_rate": 0.0004241812136834354,
      "loss": 3.182,
      "step": 83902
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.732917070388794,
      "learning_rate": 0.0004241774900112261,
      "loss": 2.954,
      "step": 83903
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.009540557861328,
      "learning_rate": 0.0004241737663159299,
      "loss": 2.8728,
      "step": 83904
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6219817399978638,
      "learning_rate": 0.0004241700425975473,
      "loss": 3.1034,
      "step": 83905
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.434963583946228,
      "learning_rate": 0.00042416631885607925,
      "loss": 3.0625,
      "step": 83906
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8571540117263794,
      "learning_rate": 0.0004241625950915263,
      "loss": 2.9976,
      "step": 83907
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7843096256256104,
      "learning_rate": 0.0004241588713038892,
      "loss": 2.9988,
      "step": 83908
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5865685939788818,
      "learning_rate": 0.0004241551474931685,
      "loss": 2.757,
      "step": 83909
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8661794662475586,
      "learning_rate": 0.0004241514236593652,
      "loss": 3.2107,
      "step": 83910
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.4452812671661377,
      "learning_rate": 0.0004241476998024797,
      "loss": 2.9812,
      "step": 83911
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6714322566986084,
      "learning_rate": 0.0004241439759225128,
      "loss": 2.9258,
      "step": 83912
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5803536176681519,
      "learning_rate": 0.0004241402520194653,
      "loss": 2.9972,
      "step": 83913
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.3407225608825684,
      "learning_rate": 0.00042413652809333765,
      "loss": 2.8991,
      "step": 83914
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9881161451339722,
      "learning_rate": 0.0004241328041441307,
      "loss": 2.7934,
      "step": 83915
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8762400150299072,
      "learning_rate": 0.0004241290801718452,
      "loss": 2.886,
      "step": 83916
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1978728771209717,
      "learning_rate": 0.00042412535617648167,
      "loss": 3.0281,
      "step": 83917
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5688247680664062,
      "learning_rate": 0.00042412163215804093,
      "loss": 3.1342,
      "step": 83918
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.647900104522705,
      "learning_rate": 0.0004241179081165237,
      "loss": 3.1806,
      "step": 83919
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.881489634513855,
      "learning_rate": 0.00042411418405193045,
      "loss": 3.0502,
      "step": 83920
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.804889440536499,
      "learning_rate": 0.00042411045996426216,
      "loss": 2.9636,
      "step": 83921
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.299964666366577,
      "learning_rate": 0.0004241067358535194,
      "loss": 3.0011,
      "step": 83922
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.681310772895813,
      "learning_rate": 0.0004241030117197028,
      "loss": 2.6773,
      "step": 83923
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8458995819091797,
      "learning_rate": 0.0004240992875628131,
      "loss": 3.2223,
      "step": 83924
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.295854091644287,
      "learning_rate": 0.00042409556338285104,
      "loss": 2.9881,
      "step": 83925
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.561844825744629,
      "learning_rate": 0.00042409183917981725,
      "loss": 3.1012,
      "step": 83926
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6929959058761597,
      "learning_rate": 0.00042408811495371244,
      "loss": 2.7459,
      "step": 83927
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.344355583190918,
      "learning_rate": 0.00042408439070453733,
      "loss": 2.8101,
      "step": 83928
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7874956130981445,
      "learning_rate": 0.00042408066643229256,
      "loss": 3.1519,
      "step": 83929
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.836594581604004,
      "learning_rate": 0.00042407694213697885,
      "loss": 3.0301,
      "step": 83930
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6262112855911255,
      "learning_rate": 0.0004240732178185969,
      "loss": 2.9636,
      "step": 83931
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7811245918273926,
      "learning_rate": 0.00042406949347714745,
      "loss": 2.8744,
      "step": 83932
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6838475465774536,
      "learning_rate": 0.00042406576911263106,
      "loss": 2.9589,
      "step": 83933
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6608037948608398,
      "learning_rate": 0.0004240620447250485,
      "loss": 3.1441,
      "step": 83934
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.0079245567321777,
      "learning_rate": 0.0004240583203144005,
      "loss": 3.1581,
      "step": 83935
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7627217769622803,
      "learning_rate": 0.0004240545958806877,
      "loss": 3.1708,
      "step": 83936
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.198066234588623,
      "learning_rate": 0.0004240508714239109,
      "loss": 2.9872,
      "step": 83937
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1259236335754395,
      "learning_rate": 0.00042404714694407056,
      "loss": 2.769,
      "step": 83938
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8510651588439941,
      "learning_rate": 0.0004240434224411676,
      "loss": 3.0089,
      "step": 83939
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.071591377258301,
      "learning_rate": 0.0004240396979152026,
      "loss": 2.9694,
      "step": 83940
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.1611576080322266,
      "learning_rate": 0.0004240359733661763,
      "loss": 2.886,
      "step": 83941
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.8068270683288574,
      "learning_rate": 0.0004240322487940893,
      "loss": 2.9572,
      "step": 83942
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7414189577102661,
      "learning_rate": 0.00042402852419894246,
      "loss": 3.0395,
      "step": 83943
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.164639949798584,
      "learning_rate": 0.0004240247995807363,
      "loss": 2.806,
      "step": 83944
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.079909324645996,
      "learning_rate": 0.0004240210749394717,
      "loss": 2.9839,
      "step": 83945
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4901790618896484,
      "learning_rate": 0.00042401735027514917,
      "loss": 2.9628,
      "step": 83946
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1558022499084473,
      "learning_rate": 0.00042401362558776946,
      "loss": 3.2813,
      "step": 83947
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.844994068145752,
      "learning_rate": 0.0004240099008773332,
      "loss": 2.9182,
      "step": 83948
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6347168684005737,
      "learning_rate": 0.0004240061761438413,
      "loss": 2.8991,
      "step": 83949
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6859705448150635,
      "learning_rate": 0.0004240024513872942,
      "loss": 2.8909,
      "step": 83950
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.429040789604187,
      "learning_rate": 0.0004239987266076928,
      "loss": 2.9647,
      "step": 83951
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.3827524185180664,
      "learning_rate": 0.0004239950018050377,
      "loss": 2.9924,
      "step": 83952
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6926345825195312,
      "learning_rate": 0.00042399127697932953,
      "loss": 3.1694,
      "step": 83953
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8533855676651,
      "learning_rate": 0.000423987552130569,
      "loss": 2.963,
      "step": 83954
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5742132663726807,
      "learning_rate": 0.000423983827258757,
      "loss": 2.8207,
      "step": 83955
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6536815166473389,
      "learning_rate": 0.0004239801023638939,
      "loss": 3.3256,
      "step": 83956
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9685724973678589,
      "learning_rate": 0.0004239763774459806,
      "loss": 3.1505,
      "step": 83957
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7126260995864868,
      "learning_rate": 0.00042397265250501784,
      "loss": 2.9866,
      "step": 83958
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.499637246131897,
      "learning_rate": 0.0004239689275410062,
      "loss": 2.8922,
      "step": 83959
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.006103038787842,
      "learning_rate": 0.00042396520255394627,
      "loss": 3.0602,
      "step": 83960
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7011507749557495,
      "learning_rate": 0.000423961477543839,
      "loss": 3.0674,
      "step": 83961
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1190154552459717,
      "learning_rate": 0.0004239577525106848,
      "loss": 3.0486,
      "step": 83962
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.0190162658691406,
      "learning_rate": 0.0004239540274544847,
      "loss": 3.2181,
      "step": 83963
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8058394193649292,
      "learning_rate": 0.00042395030237523914,
      "loss": 2.9451,
      "step": 83964
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.3252668380737305,
      "learning_rate": 0.00042394657727294887,
      "loss": 3.0851,
      "step": 83965
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.0099611282348633,
      "learning_rate": 0.00042394285214761453,
      "loss": 2.7168,
      "step": 83966
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.6380388736724854,
      "learning_rate": 0.00042393912699923705,
      "loss": 3.0162,
      "step": 83967
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9862316846847534,
      "learning_rate": 0.00042393540182781675,
      "loss": 3.1921,
      "step": 83968
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6604167222976685,
      "learning_rate": 0.0004239316766333546,
      "loss": 3.1072,
      "step": 83969
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.9643280506134033,
      "learning_rate": 0.00042392795141585126,
      "loss": 3.1091,
      "step": 83970
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.919618606567383,
      "learning_rate": 0.00042392422617530727,
      "loss": 2.7492,
      "step": 83971
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7595851421356201,
      "learning_rate": 0.0004239205009117235,
      "loss": 3.0852,
      "step": 83972
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.4657297134399414,
      "learning_rate": 0.00042391677562510054,
      "loss": 3.1833,
      "step": 83973
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.103975534439087,
      "learning_rate": 0.00042391305031543914,
      "loss": 3.0115,
      "step": 83974
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8111252784729004,
      "learning_rate": 0.00042390932498273987,
      "loss": 2.9879,
      "step": 83975
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7009952068328857,
      "learning_rate": 0.00042390559962700357,
      "loss": 3.0234,
      "step": 83976
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.461012601852417,
      "learning_rate": 0.000423901874248231,
      "loss": 2.9252,
      "step": 83977
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.203690528869629,
      "learning_rate": 0.0004238981488464226,
      "loss": 3.0382,
      "step": 83978
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9257757663726807,
      "learning_rate": 0.0004238944234215792,
      "loss": 3.0306,
      "step": 83979
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6930361986160278,
      "learning_rate": 0.00042389069797370163,
      "loss": 2.8085,
      "step": 83980
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.685971975326538,
      "learning_rate": 0.00042388697250279027,
      "loss": 3.1408,
      "step": 83981
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.3875722885131836,
      "learning_rate": 0.00042388324700884606,
      "loss": 3.0917,
      "step": 83982
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.831883192062378,
      "learning_rate": 0.0004238795214918696,
      "loss": 3.1862,
      "step": 83983
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.456331729888916,
      "learning_rate": 0.00042387579595186154,
      "loss": 2.9318,
      "step": 83984
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8783133029937744,
      "learning_rate": 0.0004238720703888227,
      "loss": 2.8362,
      "step": 83985
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.526007890701294,
      "learning_rate": 0.0004238683448027538,
      "loss": 3.0149,
      "step": 83986
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.060710906982422,
      "learning_rate": 0.0004238646191936552,
      "loss": 3.086,
      "step": 83987
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7425792217254639,
      "learning_rate": 0.000423860893561528,
      "loss": 2.9109,
      "step": 83988
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.0713651180267334,
      "learning_rate": 0.0004238571679063727,
      "loss": 3.1601,
      "step": 83989
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.79684317111969,
      "learning_rate": 0.00042385344222819,
      "loss": 3.2707,
      "step": 83990
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.773333191871643,
      "learning_rate": 0.0004238497165269806,
      "loss": 3.1671,
      "step": 83991
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6733781099319458,
      "learning_rate": 0.00042384599080274525,
      "loss": 2.965,
      "step": 83992
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6303726434707642,
      "learning_rate": 0.00042384226505548446,
      "loss": 3.1746,
      "step": 83993
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8627954721450806,
      "learning_rate": 0.0004238385392851992,
      "loss": 3.0405,
      "step": 83994
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.589587688446045,
      "learning_rate": 0.00042383481349189,
      "loss": 3.1491,
      "step": 83995
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.299901247024536,
      "learning_rate": 0.00042383108767555746,
      "loss": 3.0154,
      "step": 83996
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7099746465682983,
      "learning_rate": 0.00042382736183620243,
      "loss": 3.0897,
      "step": 83997
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8620572090148926,
      "learning_rate": 0.00042382363597382567,
      "loss": 3.0501,
      "step": 83998
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6338814496994019,
      "learning_rate": 0.0004238199100884276,
      "loss": 2.9,
      "step": 83999
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6360379457473755,
      "learning_rate": 0.0004238161841800091,
      "loss": 3.1105,
      "step": 84000
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.526561975479126,
      "learning_rate": 0.00042381245824857095,
      "loss": 2.9448,
      "step": 84001
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4399547576904297,
      "learning_rate": 0.00042380873229411366,
      "loss": 3.1961,
      "step": 84002
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9053504467010498,
      "learning_rate": 0.00042380500631663794,
      "loss": 2.9574,
      "step": 84003
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.3860429525375366,
      "learning_rate": 0.0004238012803161446,
      "loss": 3.1608,
      "step": 84004
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5832319259643555,
      "learning_rate": 0.0004237975542926342,
      "loss": 3.1392,
      "step": 84005
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4911550283432007,
      "learning_rate": 0.00042379382824610755,
      "loss": 2.8656,
      "step": 84006
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.546523094177246,
      "learning_rate": 0.00042379010217656534,
      "loss": 2.912,
      "step": 84007
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.402759313583374,
      "learning_rate": 0.0004237863760840081,
      "loss": 3.0374,
      "step": 84008
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9432241916656494,
      "learning_rate": 0.00042378264996843665,
      "loss": 2.7355,
      "step": 84009
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4668771028518677,
      "learning_rate": 0.00042377892382985174,
      "loss": 3.0843,
      "step": 84010
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9699822664260864,
      "learning_rate": 0.00042377519766825393,
      "loss": 2.9502,
      "step": 84011
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.244584798812866,
      "learning_rate": 0.000423771471483644,
      "loss": 2.989,
      "step": 84012
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7786856889724731,
      "learning_rate": 0.00042376774527602267,
      "loss": 3.0905,
      "step": 84013
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8017064332962036,
      "learning_rate": 0.00042376401904539046,
      "loss": 3.2305,
      "step": 84014
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8819624185562134,
      "learning_rate": 0.0004237602927917482,
      "loss": 2.6804,
      "step": 84015
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6488900184631348,
      "learning_rate": 0.0004237565665150967,
      "loss": 2.6921,
      "step": 84016
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.086970329284668,
      "learning_rate": 0.0004237528402154364,
      "loss": 2.8036,
      "step": 84017
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5747474431991577,
      "learning_rate": 0.0004237491138927681,
      "loss": 3.0848,
      "step": 84018
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.843335509300232,
      "learning_rate": 0.00042374538754709256,
      "loss": 3.127,
      "step": 84019
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7260321378707886,
      "learning_rate": 0.0004237416611784104,
      "loss": 3.1209,
      "step": 84020
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8965200185775757,
      "learning_rate": 0.0004237379347867224,
      "loss": 2.9059,
      "step": 84021
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9428150653839111,
      "learning_rate": 0.0004237342083720291,
      "loss": 3.0211,
      "step": 84022
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7222334146499634,
      "learning_rate": 0.00042373048193433124,
      "loss": 2.7652,
      "step": 84023
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.221245527267456,
      "learning_rate": 0.00042372675547362965,
      "loss": 2.9368,
      "step": 84024
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6727837324142456,
      "learning_rate": 0.00042372302898992486,
      "loss": 3.2498,
      "step": 84025
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8519827127456665,
      "learning_rate": 0.0004237193024832176,
      "loss": 2.921,
      "step": 84026
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9939007759094238,
      "learning_rate": 0.0004237155759535086,
      "loss": 3.0729,
      "step": 84027
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.590938687324524,
      "learning_rate": 0.0004237118494007986,
      "loss": 2.9613,
      "step": 84028
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9199658632278442,
      "learning_rate": 0.0004237081228250882,
      "loss": 3.092,
      "step": 84029
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.694559097290039,
      "learning_rate": 0.00042370439622637805,
      "loss": 2.9913,
      "step": 84030
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7066142559051514,
      "learning_rate": 0.000423700669604669,
      "loss": 2.9525,
      "step": 84031
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6030583381652832,
      "learning_rate": 0.0004236969429599617,
      "loss": 3.3159,
      "step": 84032
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1348655223846436,
      "learning_rate": 0.00042369321629225667,
      "loss": 2.9793,
      "step": 84033
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5689420700073242,
      "learning_rate": 0.00042368948960155487,
      "loss": 3.1743,
      "step": 84034
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.9037954807281494,
      "learning_rate": 0.00042368576288785676,
      "loss": 2.9931,
      "step": 84035
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7284154891967773,
      "learning_rate": 0.0004236820361511632,
      "loss": 2.9619,
      "step": 84036
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.627493143081665,
      "learning_rate": 0.0004236783093914748,
      "loss": 3.026,
      "step": 84037
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1243906021118164,
      "learning_rate": 0.00042367458260879223,
      "loss": 3.1379,
      "step": 84038
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.774226427078247,
      "learning_rate": 0.0004236708558031162,
      "loss": 3.2685,
      "step": 84039
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4662675857543945,
      "learning_rate": 0.00042366712897444757,
      "loss": 3.0644,
      "step": 84040
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5777043104171753,
      "learning_rate": 0.00042366340212278674,
      "loss": 3.1532,
      "step": 84041
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6190332174301147,
      "learning_rate": 0.0004236596752481346,
      "loss": 3.064,
      "step": 84042
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.818895936012268,
      "learning_rate": 0.00042365594835049177,
      "loss": 3.312,
      "step": 84043
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.719455599784851,
      "learning_rate": 0.000423652221429859,
      "loss": 3.0813,
      "step": 84044
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4830933809280396,
      "learning_rate": 0.0004236484944862369,
      "loss": 2.9518,
      "step": 84045
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7561488151550293,
      "learning_rate": 0.00042364476751962624,
      "loss": 2.9094,
      "step": 84046
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8928700685501099,
      "learning_rate": 0.00042364104053002773,
      "loss": 2.9951,
      "step": 84047
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8351155519485474,
      "learning_rate": 0.0004236373135174419,
      "loss": 3.2119,
      "step": 84048
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4856200218200684,
      "learning_rate": 0.0004236335864818696,
      "loss": 3.1643,
      "step": 84049
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1243386268615723,
      "learning_rate": 0.00042362985942331156,
      "loss": 2.8739,
      "step": 84050
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.3355867862701416,
      "learning_rate": 0.0004236261323417684,
      "loss": 3.0043,
      "step": 84051
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6795172691345215,
      "learning_rate": 0.0004236224052372407,
      "loss": 3.0707,
      "step": 84052
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.7354373931884766,
      "learning_rate": 0.00042361867810972935,
      "loss": 3.2868,
      "step": 84053
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5742295980453491,
      "learning_rate": 0.0004236149509592349,
      "loss": 3.1398,
      "step": 84054
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8616523742675781,
      "learning_rate": 0.00042361122378575816,
      "loss": 2.991,
      "step": 84055
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1833534240722656,
      "learning_rate": 0.00042360749658929974,
      "loss": 2.9765,
      "step": 84056
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5629584789276123,
      "learning_rate": 0.0004236037693698603,
      "loss": 2.8358,
      "step": 84057
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.0206408500671387,
      "learning_rate": 0.0004236000421274406,
      "loss": 2.9308,
      "step": 84058
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.1292459964752197,
      "learning_rate": 0.00042359631486204143,
      "loss": 2.8722,
      "step": 84059
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.5152130126953125,
      "learning_rate": 0.0004235925875736632,
      "loss": 3.0248,
      "step": 84060
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.212693214416504,
      "learning_rate": 0.00042358886026230684,
      "loss": 3.194,
      "step": 84061
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.718809723854065,
      "learning_rate": 0.0004235851329279731,
      "loss": 3.0047,
      "step": 84062
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.790268659591675,
      "learning_rate": 0.0004235814055706624,
      "loss": 3.1857,
      "step": 84063
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9182956218719482,
      "learning_rate": 0.00042357767819037556,
      "loss": 2.9675,
      "step": 84064
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7489937543869019,
      "learning_rate": 0.0004235739507871135,
      "loss": 2.8061,
      "step": 84065
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7989301681518555,
      "learning_rate": 0.00042357022336087646,
      "loss": 3.0892,
      "step": 84066
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.0690078735351562,
      "learning_rate": 0.0004235664959116655,
      "loss": 3.1375,
      "step": 84067
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4416862726211548,
      "learning_rate": 0.0004235627684394812,
      "loss": 2.8178,
      "step": 84068
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5705561637878418,
      "learning_rate": 0.00042355904094432427,
      "loss": 3.034,
      "step": 84069
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.6255230903625488,
      "learning_rate": 0.0004235553134261953,
      "loss": 3.0713,
      "step": 84070
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.471362590789795,
      "learning_rate": 0.0004235515858850952,
      "loss": 3.0188,
      "step": 84071
    },
    {
      "epoch": 1.09,
      "grad_norm": 3.6886610984802246,
      "learning_rate": 0.0004235478583210245,
      "loss": 2.9422,
      "step": 84072
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5073513984680176,
      "learning_rate": 0.00042354413073398373,
      "loss": 3.2208,
      "step": 84073
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.157346248626709,
      "learning_rate": 0.000423540403123974,
      "loss": 3.0051,
      "step": 84074
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.6912014484405518,
      "learning_rate": 0.0004235366754909957,
      "loss": 3.0615,
      "step": 84075
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.5405880212783813,
      "learning_rate": 0.00042353294783504955,
      "loss": 3.0144,
      "step": 84076
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.1583034992218018,
      "learning_rate": 0.0004235292201561364,
      "loss": 3.2211,
      "step": 84077
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.246717929840088,
      "learning_rate": 0.0004235254924542567,
      "loss": 3.0265,
      "step": 84078
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9008548259735107,
      "learning_rate": 0.00042352176472941136,
      "loss": 3.0886,
      "step": 84079
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8861675262451172,
      "learning_rate": 0.000423518036981601,
      "loss": 3.1061,
      "step": 84080
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.49432373046875,
      "learning_rate": 0.00042351430921082627,
      "loss": 3.0355,
      "step": 84081
    },
    {
      "epoch": 1.09,
      "grad_norm": 4.033771514892578,
      "learning_rate": 0.0004235105814170879,
      "loss": 3.1743,
      "step": 84082
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.080843210220337,
      "learning_rate": 0.00042350685360038664,
      "loss": 3.185,
      "step": 84083
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.7376902103424072,
      "learning_rate": 0.00042350312576072306,
      "loss": 3.06,
      "step": 84084
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.035127639770508,
      "learning_rate": 0.00042349939789809786,
      "loss": 3.0172,
      "step": 84085
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.3099238872528076,
      "learning_rate": 0.0004234956700125119,
      "loss": 2.93,
      "step": 84086
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.534099817276001,
      "learning_rate": 0.0004234919421039658,
      "loss": 3.0576,
      "step": 84087
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4935338497161865,
      "learning_rate": 0.0004234882141724601,
      "loss": 2.9494,
      "step": 84088
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.571906089782715,
      "learning_rate": 0.00042348448621799573,
      "loss": 2.9316,
      "step": 84089
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.0188348293304443,
      "learning_rate": 0.00042348075824057314,
      "loss": 3.15,
      "step": 84090
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8836236000061035,
      "learning_rate": 0.00042347703024019323,
      "loss": 2.7412,
      "step": 84091
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.4989112615585327,
      "learning_rate": 0.0004234733022168566,
      "loss": 3.0471,
      "step": 84092
    },
    {
      "epoch": 1.09,
      "grad_norm": 2.365950345993042,
      "learning_rate": 0.0004234695741705639,
      "loss": 2.8677,
      "step": 84093
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.61842942237854,
      "learning_rate": 0.00042346584610131584,
      "loss": 2.9057,
      "step": 84094
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.9342819452285767,
      "learning_rate": 0.00042346211800911327,
      "loss": 2.8793,
      "step": 84095
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8160802125930786,
      "learning_rate": 0.00042345838989395673,
      "loss": 3.2376,
      "step": 84096
    },
    {
      "epoch": 1.09,
      "grad_norm": 1.8198641538619995,
      "learning_rate": 0.0004234546617558469,
      "loss": 2.9024,
      "step": 84097
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.175175905227661,
      "learning_rate": 0.00042345093359478456,
      "loss": 3.0719,
      "step": 84098
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7370411157608032,
      "learning_rate": 0.00042344720541077034,
      "loss": 3.1273,
      "step": 84099
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.222182035446167,
      "learning_rate": 0.0004234434772038049,
      "loss": 2.8902,
      "step": 84100
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7762784957885742,
      "learning_rate": 0.00042343974897388905,
      "loss": 3.1296,
      "step": 84101
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8384966850280762,
      "learning_rate": 0.00042343602072102344,
      "loss": 3.0203,
      "step": 84102
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7101393938064575,
      "learning_rate": 0.0004234322924452087,
      "loss": 3.0069,
      "step": 84103
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.189629316329956,
      "learning_rate": 0.0004234285641464456,
      "loss": 3.0908,
      "step": 84104
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8537979125976562,
      "learning_rate": 0.0004234248358247348,
      "loss": 3.025,
      "step": 84105
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.813141345977783,
      "learning_rate": 0.0004234211074800769,
      "loss": 2.824,
      "step": 84106
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.676365613937378,
      "learning_rate": 0.0004234173791124728,
      "loss": 3.0094,
      "step": 84107
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7346432209014893,
      "learning_rate": 0.000423413650721923,
      "loss": 2.8155,
      "step": 84108
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.180467367172241,
      "learning_rate": 0.0004234099223084284,
      "loss": 3.0286,
      "step": 84109
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7409217357635498,
      "learning_rate": 0.0004234061938719894,
      "loss": 3.0611,
      "step": 84110
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.842284917831421,
      "learning_rate": 0.00042340246541260697,
      "loss": 2.9894,
      "step": 84111
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0205719470977783,
      "learning_rate": 0.0004233987369302816,
      "loss": 2.9781,
      "step": 84112
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9397332668304443,
      "learning_rate": 0.00042339500842501423,
      "loss": 2.9659,
      "step": 84113
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.446016788482666,
      "learning_rate": 0.00042339127989680524,
      "loss": 2.9701,
      "step": 84114
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8797415494918823,
      "learning_rate": 0.00042338755134565555,
      "loss": 2.9839,
      "step": 84115
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7842963933944702,
      "learning_rate": 0.00042338382277156577,
      "loss": 2.8955,
      "step": 84116
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7169474363327026,
      "learning_rate": 0.00042338009417453666,
      "loss": 2.9082,
      "step": 84117
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6405242681503296,
      "learning_rate": 0.0004233763655545688,
      "loss": 2.8488,
      "step": 84118
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8475993871688843,
      "learning_rate": 0.00042337263691166293,
      "loss": 3.049,
      "step": 84119
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.764930009841919,
      "learning_rate": 0.00042336890824581984,
      "loss": 2.9523,
      "step": 84120
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8762766122817993,
      "learning_rate": 0.0004233651795570401,
      "loss": 2.9957,
      "step": 84121
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.721925973892212,
      "learning_rate": 0.00042336145084532437,
      "loss": 3.0678,
      "step": 84122
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.5613279342651367,
      "learning_rate": 0.0004233577221106736,
      "loss": 2.9493,
      "step": 84123
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6750550270080566,
      "learning_rate": 0.0004233539933530881,
      "loss": 2.9262,
      "step": 84124
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.98715341091156,
      "learning_rate": 0.0004233502645725689,
      "loss": 3.2096,
      "step": 84125
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9521712064743042,
      "learning_rate": 0.0004233465357691165,
      "loss": 3.1573,
      "step": 84126
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6384660005569458,
      "learning_rate": 0.00042334280694273166,
      "loss": 3.1561,
      "step": 84127
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7286584377288818,
      "learning_rate": 0.00042333907809341504,
      "loss": 2.9887,
      "step": 84128
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6187429428100586,
      "learning_rate": 0.00042333534922116744,
      "loss": 2.7755,
      "step": 84129
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9166646003723145,
      "learning_rate": 0.0004233316203259894,
      "loss": 2.8988,
      "step": 84130
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.3506927490234375,
      "learning_rate": 0.0004233278914078817,
      "loss": 2.9161,
      "step": 84131
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5723637342453003,
      "learning_rate": 0.00042332416246684503,
      "loss": 3.095,
      "step": 84132
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8679348230361938,
      "learning_rate": 0.00042332043350288006,
      "loss": 2.9537,
      "step": 84133
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.931420922279358,
      "learning_rate": 0.0004233167045159874,
      "loss": 2.8313,
      "step": 84134
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.829498529434204,
      "learning_rate": 0.00042331297550616805,
      "loss": 2.9973,
      "step": 84135
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8125731945037842,
      "learning_rate": 0.0004233092464734223,
      "loss": 3.0119,
      "step": 84136
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5159214735031128,
      "learning_rate": 0.0004233055174177511,
      "loss": 3.0171,
      "step": 84137
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8180828094482422,
      "learning_rate": 0.00042330178833915514,
      "loss": 3.1222,
      "step": 84138
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1882259845733643,
      "learning_rate": 0.0004232980592376349,
      "loss": 2.8499,
      "step": 84139
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.554905652999878,
      "learning_rate": 0.0004232943301131913,
      "loss": 2.9326,
      "step": 84140
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6801689863204956,
      "learning_rate": 0.000423290600965825,
      "loss": 2.964,
      "step": 84141
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7662426233291626,
      "learning_rate": 0.00042328687179553657,
      "loss": 3.3029,
      "step": 84142
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9161840677261353,
      "learning_rate": 0.00042328314260232686,
      "loss": 3.0626,
      "step": 84143
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.161334991455078,
      "learning_rate": 0.0004232794133861965,
      "loss": 3.0132,
      "step": 84144
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4914475679397583,
      "learning_rate": 0.0004232756841471461,
      "loss": 3.0523,
      "step": 84145
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.8531363010406494,
      "learning_rate": 0.00042327195488517643,
      "loss": 2.9082,
      "step": 84146
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.529956102371216,
      "learning_rate": 0.0004232682256002882,
      "loss": 2.818,
      "step": 84147
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5621874332427979,
      "learning_rate": 0.0004232644962924821,
      "loss": 2.7913,
      "step": 84148
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6675512790679932,
      "learning_rate": 0.00042326076696175876,
      "loss": 2.9658,
      "step": 84149
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.285816192626953,
      "learning_rate": 0.000423257037608119,
      "loss": 2.9905,
      "step": 84150
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7758400440216064,
      "learning_rate": 0.0004232533082315633,
      "loss": 2.9367,
      "step": 84151
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.066380500793457,
      "learning_rate": 0.00042324957883209255,
      "loss": 3.205,
      "step": 84152
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.2097513675689697,
      "learning_rate": 0.0004232458494097074,
      "loss": 2.9712,
      "step": 84153
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2028400897979736,
      "learning_rate": 0.00042324211996440847,
      "loss": 3.0205,
      "step": 84154
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1655640602111816,
      "learning_rate": 0.00042323839049619657,
      "loss": 2.9559,
      "step": 84155
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8582961559295654,
      "learning_rate": 0.00042323466100507227,
      "loss": 3.1522,
      "step": 84156
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6037561893463135,
      "learning_rate": 0.00042323093149103635,
      "loss": 3.0639,
      "step": 84157
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1269125938415527,
      "learning_rate": 0.0004232272019540894,
      "loss": 2.9967,
      "step": 84158
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6952497959136963,
      "learning_rate": 0.00042322347239423226,
      "loss": 3.2683,
      "step": 84159
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.9319186210632324,
      "learning_rate": 0.00042321974281146556,
      "loss": 2.8994,
      "step": 84160
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8908462524414062,
      "learning_rate": 0.00042321601320578994,
      "loss": 3.102,
      "step": 84161
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5770206451416016,
      "learning_rate": 0.00042321228357720616,
      "loss": 2.8152,
      "step": 84162
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1875524520874023,
      "learning_rate": 0.0004232085539257149,
      "loss": 2.9651,
      "step": 84163
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.725998640060425,
      "learning_rate": 0.0004232048242513168,
      "loss": 3.0924,
      "step": 84164
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.86211097240448,
      "learning_rate": 0.00042320109455401273,
      "loss": 3.2838,
      "step": 84165
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1811888217926025,
      "learning_rate": 0.00042319736483380314,
      "loss": 2.6217,
      "step": 84166
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8458783626556396,
      "learning_rate": 0.0004231936350906888,
      "loss": 2.9893,
      "step": 84167
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.658948540687561,
      "learning_rate": 0.00042318990532467057,
      "loss": 3.0043,
      "step": 84168
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6235519647598267,
      "learning_rate": 0.0004231861755357489,
      "loss": 2.8584,
      "step": 84169
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.9200360774993896,
      "learning_rate": 0.0004231824457239246,
      "loss": 3.0876,
      "step": 84170
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.402550458908081,
      "learning_rate": 0.00042317871588919836,
      "loss": 3.0477,
      "step": 84171
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8692094087600708,
      "learning_rate": 0.00042317498603157086,
      "loss": 3.0363,
      "step": 84172
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.011229991912842,
      "learning_rate": 0.00042317125615104283,
      "loss": 2.9272,
      "step": 84173
    },
    {
      "epoch": 1.1,
      "grad_norm": 4.8039445877075195,
      "learning_rate": 0.000423167526247615,
      "loss": 3.0991,
      "step": 84174
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.4896790981292725,
      "learning_rate": 0.00042316379632128794,
      "loss": 2.8832,
      "step": 84175
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.413539409637451,
      "learning_rate": 0.00042316006637206237,
      "loss": 3.2842,
      "step": 84176
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.5117270946502686,
      "learning_rate": 0.00042315633639993905,
      "loss": 3.0458,
      "step": 84177
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.274982452392578,
      "learning_rate": 0.0004231526064049187,
      "loss": 3.042,
      "step": 84178
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2447168827056885,
      "learning_rate": 0.0004231488763870019,
      "loss": 2.8148,
      "step": 84179
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8988474607467651,
      "learning_rate": 0.00042314514634618933,
      "loss": 2.6705,
      "step": 84180
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0401668548583984,
      "learning_rate": 0.0004231414162824819,
      "loss": 3.0577,
      "step": 84181
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8857190608978271,
      "learning_rate": 0.0004231376861958801,
      "loss": 3.1795,
      "step": 84182
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.3807849884033203,
      "learning_rate": 0.0004231339560863846,
      "loss": 2.9034,
      "step": 84183
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.7579503059387207,
      "learning_rate": 0.0004231302259539963,
      "loss": 2.9645,
      "step": 84184
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.3765478134155273,
      "learning_rate": 0.00042312649579871574,
      "loss": 3.022,
      "step": 84185
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2742209434509277,
      "learning_rate": 0.0004231227656205435,
      "loss": 3.3058,
      "step": 84186
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7261006832122803,
      "learning_rate": 0.0004231190354194806,
      "loss": 2.9149,
      "step": 84187
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.08243465423584,
      "learning_rate": 0.00042311530519552744,
      "loss": 2.8652,
      "step": 84188
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9153414964675903,
      "learning_rate": 0.0004231115749486848,
      "loss": 2.989,
      "step": 84189
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.176105499267578,
      "learning_rate": 0.0004231078446789535,
      "loss": 2.6928,
      "step": 84190
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.5562903881073,
      "learning_rate": 0.000423104114386334,
      "loss": 3.0602,
      "step": 84191
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.221151828765869,
      "learning_rate": 0.0004231003840708272,
      "loss": 2.8423,
      "step": 84192
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7569139003753662,
      "learning_rate": 0.00042309665373243367,
      "loss": 3.1747,
      "step": 84193
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1130497455596924,
      "learning_rate": 0.0004230929233711543,
      "loss": 2.796,
      "step": 84194
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7352522611618042,
      "learning_rate": 0.0004230891929869894,
      "loss": 3.084,
      "step": 84195
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6984775066375732,
      "learning_rate": 0.00042308546257994003,
      "loss": 3.2791,
      "step": 84196
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8257149457931519,
      "learning_rate": 0.00042308173215000676,
      "loss": 3.0171,
      "step": 84197
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7434842586517334,
      "learning_rate": 0.00042307800169719026,
      "loss": 3.0421,
      "step": 84198
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9487229585647583,
      "learning_rate": 0.00042307427122149124,
      "loss": 3.15,
      "step": 84199
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7804744243621826,
      "learning_rate": 0.0004230705407229103,
      "loss": 3.0217,
      "step": 84200
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6249078512191772,
      "learning_rate": 0.00042306681020144835,
      "loss": 2.9375,
      "step": 84201
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5098997354507446,
      "learning_rate": 0.0004230630796571059,
      "loss": 3.0399,
      "step": 84202
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6431655883789062,
      "learning_rate": 0.00042305934908988367,
      "loss": 2.7266,
      "step": 84203
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.795686960220337,
      "learning_rate": 0.0004230556184997824,
      "loss": 3.0422,
      "step": 84204
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4101642370224,
      "learning_rate": 0.00042305188788680284,
      "loss": 3.0811,
      "step": 84205
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.709484577178955,
      "learning_rate": 0.00042304815725094553,
      "loss": 2.9558,
      "step": 84206
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6126930713653564,
      "learning_rate": 0.0004230444265922112,
      "loss": 2.9052,
      "step": 84207
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6453629732131958,
      "learning_rate": 0.00042304069591060075,
      "loss": 3.0052,
      "step": 84208
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9469523429870605,
      "learning_rate": 0.00042303696520611453,
      "loss": 3.0336,
      "step": 84209
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4672117233276367,
      "learning_rate": 0.0004230332344787535,
      "loss": 3.0528,
      "step": 84210
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6811103820800781,
      "learning_rate": 0.00042302950372851827,
      "loss": 2.9792,
      "step": 84211
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.124575138092041,
      "learning_rate": 0.0004230257729554096,
      "loss": 3.0496,
      "step": 84212
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.018677234649658,
      "learning_rate": 0.00042302204215942797,
      "loss": 3.0419,
      "step": 84213
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9160879850387573,
      "learning_rate": 0.00042301831134057436,
      "loss": 3.1461,
      "step": 84214
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4599367380142212,
      "learning_rate": 0.0004230145804988493,
      "loss": 3.15,
      "step": 84215
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.8575868606567383,
      "learning_rate": 0.00042301084963425345,
      "loss": 3.1528,
      "step": 84216
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.920899748802185,
      "learning_rate": 0.0004230071187467876,
      "loss": 3.1566,
      "step": 84217
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1525158882141113,
      "learning_rate": 0.0004230033878364524,
      "loss": 2.8319,
      "step": 84218
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8178025484085083,
      "learning_rate": 0.00042299965690324845,
      "loss": 2.9889,
      "step": 84219
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6919901371002197,
      "learning_rate": 0.0004229959259471767,
      "loss": 3.2062,
      "step": 84220
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.742712378501892,
      "learning_rate": 0.00042299219496823765,
      "loss": 3.3216,
      "step": 84221
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9607927799224854,
      "learning_rate": 0.00042298846396643196,
      "loss": 3.0103,
      "step": 84222
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9245717525482178,
      "learning_rate": 0.00042298473294176045,
      "loss": 2.9739,
      "step": 84223
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7525659799575806,
      "learning_rate": 0.0004229810018942237,
      "loss": 2.7225,
      "step": 84224
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2112534046173096,
      "learning_rate": 0.0004229772708238225,
      "loss": 3.1071,
      "step": 84225
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.6167633533477783,
      "learning_rate": 0.00042297353973055755,
      "loss": 3.2126,
      "step": 84226
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8589495420455933,
      "learning_rate": 0.0004229698086144294,
      "loss": 2.8351,
      "step": 84227
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6128493547439575,
      "learning_rate": 0.00042296607747543887,
      "loss": 2.9306,
      "step": 84228
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.283322334289551,
      "learning_rate": 0.0004229623463135867,
      "loss": 2.8522,
      "step": 84229
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5329053401947021,
      "learning_rate": 0.00042295861512887343,
      "loss": 2.9992,
      "step": 84230
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7755481004714966,
      "learning_rate": 0.0004229548839212998,
      "loss": 2.821,
      "step": 84231
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.7353007793426514,
      "learning_rate": 0.00042295115269086665,
      "loss": 3.0878,
      "step": 84232
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.486600875854492,
      "learning_rate": 0.0004229474214375745,
      "loss": 3.145,
      "step": 84233
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5940849781036377,
      "learning_rate": 0.00042294369016142406,
      "loss": 3.0457,
      "step": 84234
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6068472862243652,
      "learning_rate": 0.0004229399588624161,
      "loss": 3.0567,
      "step": 84235
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6439024209976196,
      "learning_rate": 0.00042293622754055135,
      "loss": 3.139,
      "step": 84236
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.0055739879608154,
      "learning_rate": 0.0004229324961958303,
      "loss": 3.1733,
      "step": 84237
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.055314302444458,
      "learning_rate": 0.00042292876482825394,
      "loss": 2.8615,
      "step": 84238
    },
    {
      "epoch": 1.1,
      "grad_norm": 4.400383949279785,
      "learning_rate": 0.0004229250334378227,
      "loss": 2.8215,
      "step": 84239
    },
    {
      "epoch": 1.1,
      "grad_norm": 4.34084415435791,
      "learning_rate": 0.0004229213020245374,
      "loss": 3.0654,
      "step": 84240
    },
    {
      "epoch": 1.1,
      "grad_norm": 4.051177501678467,
      "learning_rate": 0.00042291757058839866,
      "loss": 2.9721,
      "step": 84241
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.327582836151123,
      "learning_rate": 0.00042291383912940726,
      "loss": 3.2841,
      "step": 84242
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2206857204437256,
      "learning_rate": 0.00042291010764756385,
      "loss": 3.1785,
      "step": 84243
    },
    {
      "epoch": 1.1,
      "grad_norm": 4.196370601654053,
      "learning_rate": 0.00042290637614286915,
      "loss": 2.7962,
      "step": 84244
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.6138453483581543,
      "learning_rate": 0.00042290264461532386,
      "loss": 3.1428,
      "step": 84245
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.398655891418457,
      "learning_rate": 0.0004228989130649286,
      "loss": 3.0372,
      "step": 84246
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8572341203689575,
      "learning_rate": 0.00042289518149168416,
      "loss": 3.01,
      "step": 84247
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.867708683013916,
      "learning_rate": 0.00042289144989559116,
      "loss": 3.039,
      "step": 84248
    },
    {
      "epoch": 1.1,
      "grad_norm": 4.616306781768799,
      "learning_rate": 0.0004228877182766503,
      "loss": 3.0242,
      "step": 84249
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.0394015312194824,
      "learning_rate": 0.0004228839866348622,
      "loss": 3.4317,
      "step": 84250
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6424131393432617,
      "learning_rate": 0.00042288025497022786,
      "loss": 3.1079,
      "step": 84251
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6817288398742676,
      "learning_rate": 0.0004228765232827477,
      "loss": 2.8637,
      "step": 84252
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0861737728118896,
      "learning_rate": 0.0004228727915724224,
      "loss": 3.1467,
      "step": 84253
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.204826593399048,
      "learning_rate": 0.0004228690598392528,
      "loss": 3.1814,
      "step": 84254
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0397541522979736,
      "learning_rate": 0.0004228653280832394,
      "loss": 2.9475,
      "step": 84255
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7060163021087646,
      "learning_rate": 0.0004228615963043831,
      "loss": 3.1855,
      "step": 84256
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9294195175170898,
      "learning_rate": 0.00042285786450268456,
      "loss": 3.0806,
      "step": 84257
    },
    {
      "epoch": 1.1,
      "grad_norm": 4.10831356048584,
      "learning_rate": 0.0004228541326781443,
      "loss": 2.9092,
      "step": 84258
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.1737496852874756,
      "learning_rate": 0.0004228504008307632,
      "loss": 2.9676,
      "step": 84259
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.687498927116394,
      "learning_rate": 0.00042284666896054196,
      "loss": 2.8842,
      "step": 84260
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.438093900680542,
      "learning_rate": 0.0004228429370674811,
      "loss": 2.9807,
      "step": 84261
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7644253969192505,
      "learning_rate": 0.0004228392051515815,
      "loss": 2.8323,
      "step": 84262
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.213409662246704,
      "learning_rate": 0.0004228354732128438,
      "loss": 3.1184,
      "step": 84263
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6687657833099365,
      "learning_rate": 0.0004228317412512686,
      "loss": 3.2789,
      "step": 84264
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9167720079421997,
      "learning_rate": 0.00042282800926685663,
      "loss": 3.1529,
      "step": 84265
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6237242221832275,
      "learning_rate": 0.00042282427725960864,
      "loss": 2.7479,
      "step": 84266
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6780223846435547,
      "learning_rate": 0.0004228205452295253,
      "loss": 2.9382,
      "step": 84267
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.163353204727173,
      "learning_rate": 0.00042281681317660733,
      "loss": 2.735,
      "step": 84268
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5820388793945312,
      "learning_rate": 0.00042281308110085543,
      "loss": 2.8335,
      "step": 84269
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.0685231685638428,
      "learning_rate": 0.0004228093490022702,
      "loss": 2.9349,
      "step": 84270
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4876322746276855,
      "learning_rate": 0.00042280561688085234,
      "loss": 3.2026,
      "step": 84271
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6956442594528198,
      "learning_rate": 0.00042280188473660266,
      "loss": 2.8849,
      "step": 84272
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9340426921844482,
      "learning_rate": 0.00042279815256952185,
      "loss": 2.9432,
      "step": 84273
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5706868171691895,
      "learning_rate": 0.00042279442037961045,
      "loss": 3.1171,
      "step": 84274
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0333006381988525,
      "learning_rate": 0.00042279068816686934,
      "loss": 3.0641,
      "step": 84275
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8241183757781982,
      "learning_rate": 0.000422786955931299,
      "loss": 2.7935,
      "step": 84276
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9737827777862549,
      "learning_rate": 0.00042278322367290033,
      "loss": 2.9374,
      "step": 84277
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1741747856140137,
      "learning_rate": 0.00042277949139167396,
      "loss": 2.9635,
      "step": 84278
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.442091464996338,
      "learning_rate": 0.00042277575908762047,
      "loss": 3.0204,
      "step": 84279
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6661940813064575,
      "learning_rate": 0.00042277202676074073,
      "loss": 3.1261,
      "step": 84280
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8950749635696411,
      "learning_rate": 0.0004227682944110353,
      "loss": 2.9923,
      "step": 84281
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5049315690994263,
      "learning_rate": 0.000422764562038505,
      "loss": 3.1352,
      "step": 84282
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.136950969696045,
      "learning_rate": 0.0004227608296431504,
      "loss": 3.0051,
      "step": 84283
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.785237193107605,
      "learning_rate": 0.0004227570972249722,
      "loss": 3.1486,
      "step": 84284
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8545029163360596,
      "learning_rate": 0.0004227533647839713,
      "loss": 3.1202,
      "step": 84285
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7469611167907715,
      "learning_rate": 0.00042274963232014805,
      "loss": 2.8338,
      "step": 84286
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.766173005104065,
      "learning_rate": 0.0004227458998335034,
      "loss": 2.9367,
      "step": 84287
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7104822397232056,
      "learning_rate": 0.000422742167324038,
      "loss": 3.2268,
      "step": 84288
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.058396816253662,
      "learning_rate": 0.0004227384347917525,
      "loss": 2.8725,
      "step": 84289
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.209366798400879,
      "learning_rate": 0.00042273470223664754,
      "loss": 2.6337,
      "step": 84290
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1315231323242188,
      "learning_rate": 0.00042273096965872393,
      "loss": 2.6739,
      "step": 84291
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.476013422012329,
      "learning_rate": 0.0004227272370579823,
      "loss": 2.8777,
      "step": 84292
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.272005558013916,
      "learning_rate": 0.0004227235044344235,
      "loss": 2.8507,
      "step": 84293
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5778900384902954,
      "learning_rate": 0.0004227197717880479,
      "loss": 3.1447,
      "step": 84294
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2579617500305176,
      "learning_rate": 0.00042271603911885644,
      "loss": 3.0816,
      "step": 84295
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8872283697128296,
      "learning_rate": 0.0004227123064268498,
      "loss": 3.1645,
      "step": 84296
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6264334917068481,
      "learning_rate": 0.00042270857371202856,
      "loss": 2.8391,
      "step": 84297
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4309109449386597,
      "learning_rate": 0.00042270484097439346,
      "loss": 3.1264,
      "step": 84298
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.506661057472229,
      "learning_rate": 0.00042270110821394534,
      "loss": 3.053,
      "step": 84299
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.457241415977478,
      "learning_rate": 0.0004226973754306846,
      "loss": 3.0999,
      "step": 84300
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5720540285110474,
      "learning_rate": 0.0004226936426246122,
      "loss": 2.8152,
      "step": 84301
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.519715666770935,
      "learning_rate": 0.0004226899097957288,
      "loss": 2.9321,
      "step": 84302
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0979502201080322,
      "learning_rate": 0.0004226861769440349,
      "loss": 3.0016,
      "step": 84303
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.44211745262146,
      "learning_rate": 0.0004226824440695313,
      "loss": 3.0728,
      "step": 84304
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9998971223831177,
      "learning_rate": 0.0004226787111722189,
      "loss": 3.1576,
      "step": 84305
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7960472106933594,
      "learning_rate": 0.0004226749782520981,
      "loss": 3.1223,
      "step": 84306
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6341428756713867,
      "learning_rate": 0.00042267124530916963,
      "loss": 3.0659,
      "step": 84307
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7021639347076416,
      "learning_rate": 0.0004226675123434344,
      "loss": 2.8664,
      "step": 84308
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9743441343307495,
      "learning_rate": 0.0004226637793548929,
      "loss": 3.0733,
      "step": 84309
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8445886373519897,
      "learning_rate": 0.00042266004634354586,
      "loss": 3.0525,
      "step": 84310
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.789881944656372,
      "learning_rate": 0.00042265631330939404,
      "loss": 3.1994,
      "step": 84311
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7143687009811401,
      "learning_rate": 0.0004226525802524381,
      "loss": 3.077,
      "step": 84312
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7126381397247314,
      "learning_rate": 0.0004226488471726787,
      "loss": 3.0511,
      "step": 84313
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6307536363601685,
      "learning_rate": 0.00042264511407011656,
      "loss": 3.0185,
      "step": 84314
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5822490453720093,
      "learning_rate": 0.00042264138094475244,
      "loss": 2.9315,
      "step": 84315
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.677711009979248,
      "learning_rate": 0.00042263764779658695,
      "loss": 2.8026,
      "step": 84316
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8507753610610962,
      "learning_rate": 0.0004226339146256208,
      "loss": 2.9873,
      "step": 84317
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7208565473556519,
      "learning_rate": 0.00042263018143185474,
      "loss": 3.075,
      "step": 84318
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.457707166671753,
      "learning_rate": 0.0004226264482152893,
      "loss": 3.0684,
      "step": 84319
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.059760332107544,
      "learning_rate": 0.0004226227149759253,
      "loss": 3.0478,
      "step": 84320
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8885080814361572,
      "learning_rate": 0.00042261898171376354,
      "loss": 3.2506,
      "step": 84321
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.286025047302246,
      "learning_rate": 0.0004226152484288045,
      "loss": 2.9265,
      "step": 84322
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.850721001625061,
      "learning_rate": 0.000422611515121049,
      "loss": 2.9867,
      "step": 84323
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6084630489349365,
      "learning_rate": 0.0004226077817904977,
      "loss": 3.0709,
      "step": 84324
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9785529375076294,
      "learning_rate": 0.0004226040484371513,
      "loss": 2.9368,
      "step": 84325
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.849715232849121,
      "learning_rate": 0.00042260031506101046,
      "loss": 2.8382,
      "step": 84326
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5314768552780151,
      "learning_rate": 0.000422596581662076,
      "loss": 3.1948,
      "step": 84327
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1028990745544434,
      "learning_rate": 0.0004225928482403484,
      "loss": 3.2448,
      "step": 84328
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.8746836185455322,
      "learning_rate": 0.00042258911479582857,
      "loss": 3.1355,
      "step": 84329
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.7938783168792725,
      "learning_rate": 0.0004225853813285171,
      "loss": 2.9025,
      "step": 84330
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5839099884033203,
      "learning_rate": 0.00042258164783841464,
      "loss": 3.0615,
      "step": 84331
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1705098152160645,
      "learning_rate": 0.0004225779143255219,
      "loss": 2.8821,
      "step": 84332
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.185020685195923,
      "learning_rate": 0.0004225741807898398,
      "loss": 2.8707,
      "step": 84333
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.341390609741211,
      "learning_rate": 0.0004225704472313687,
      "loss": 2.9003,
      "step": 84334
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1435866355895996,
      "learning_rate": 0.0004225667136501094,
      "loss": 3.2072,
      "step": 84335
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9492411613464355,
      "learning_rate": 0.0004225629800460628,
      "loss": 3.2018,
      "step": 84336
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.5868825912475586,
      "learning_rate": 0.0004225592464192293,
      "loss": 2.932,
      "step": 84337
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.033189296722412,
      "learning_rate": 0.00042255551276960973,
      "loss": 3.117,
      "step": 84338
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0516743659973145,
      "learning_rate": 0.0004225517790972048,
      "loss": 2.8318,
      "step": 84339
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.1551225185394287,
      "learning_rate": 0.0004225480454020152,
      "loss": 2.9986,
      "step": 84340
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2497317790985107,
      "learning_rate": 0.00042254431168404154,
      "loss": 3.1117,
      "step": 84341
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4322260618209839,
      "learning_rate": 0.0004225405779432846,
      "loss": 2.9174,
      "step": 84342
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6387029886245728,
      "learning_rate": 0.0004225368441797451,
      "loss": 3.0557,
      "step": 84343
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8528375625610352,
      "learning_rate": 0.0004225331103934236,
      "loss": 3.1393,
      "step": 84344
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8978888988494873,
      "learning_rate": 0.00042252937658432107,
      "loss": 2.8288,
      "step": 84345
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.859478235244751,
      "learning_rate": 0.00042252564275243785,
      "loss": 3.0082,
      "step": 84346
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6779505014419556,
      "learning_rate": 0.00042252190889777483,
      "loss": 3.1158,
      "step": 84347
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.483210563659668,
      "learning_rate": 0.0004225181750203327,
      "loss": 2.8549,
      "step": 84348
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.015230178833008,
      "learning_rate": 0.00042251444112011204,
      "loss": 3.2206,
      "step": 84349
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5659823417663574,
      "learning_rate": 0.0004225107071971137,
      "loss": 2.7867,
      "step": 84350
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.5408997535705566,
      "learning_rate": 0.0004225069732513384,
      "loss": 2.9184,
      "step": 84351
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.1797709465026855,
      "learning_rate": 0.0004225032392827866,
      "loss": 2.9974,
      "step": 84352
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5612519979476929,
      "learning_rate": 0.0004224995052914592,
      "loss": 3.0414,
      "step": 84353
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.703620195388794,
      "learning_rate": 0.00042249577127735687,
      "loss": 2.674,
      "step": 84354
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.471292495727539,
      "learning_rate": 0.0004224920372404802,
      "loss": 3.047,
      "step": 84355
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.500889301300049,
      "learning_rate": 0.0004224883031808299,
      "loss": 3.1435,
      "step": 84356
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5865464210510254,
      "learning_rate": 0.00042248456909840684,
      "loss": 3.0362,
      "step": 84357
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7166467905044556,
      "learning_rate": 0.0004224808349932115,
      "loss": 3.0399,
      "step": 84358
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.390688419342041,
      "learning_rate": 0.0004224771008652447,
      "loss": 3.1592,
      "step": 84359
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0790224075317383,
      "learning_rate": 0.0004224733667145071,
      "loss": 3.111,
      "step": 84360
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6379104852676392,
      "learning_rate": 0.0004224696325409993,
      "loss": 2.9996,
      "step": 84361
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9166375398635864,
      "learning_rate": 0.0004224658983447222,
      "loss": 2.8826,
      "step": 84362
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7799761295318604,
      "learning_rate": 0.00042246216412567634,
      "loss": 2.8567,
      "step": 84363
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.648695468902588,
      "learning_rate": 0.0004224584298838624,
      "loss": 3.2242,
      "step": 84364
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6175593137741089,
      "learning_rate": 0.00042245469561928115,
      "loss": 2.951,
      "step": 84365
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.3129303455352783,
      "learning_rate": 0.0004224509613319333,
      "loss": 2.9271,
      "step": 84366
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5826727151870728,
      "learning_rate": 0.0004224472270218195,
      "loss": 3.1892,
      "step": 84367
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5260266065597534,
      "learning_rate": 0.0004224434926889404,
      "loss": 3.101,
      "step": 84368
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7526955604553223,
      "learning_rate": 0.00042243975833329675,
      "loss": 2.93,
      "step": 84369
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.654803514480591,
      "learning_rate": 0.00042243602395488933,
      "loss": 3.425,
      "step": 84370
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.709912657737732,
      "learning_rate": 0.0004224322895537186,
      "loss": 3.0735,
      "step": 84371
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8339169025421143,
      "learning_rate": 0.0004224285551297855,
      "loss": 2.8895,
      "step": 84372
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2192459106445312,
      "learning_rate": 0.0004224248206830905,
      "loss": 3.1273,
      "step": 84373
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0451042652130127,
      "learning_rate": 0.00042242108621363457,
      "loss": 3.0809,
      "step": 84374
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0921974182128906,
      "learning_rate": 0.0004224173517214182,
      "loss": 2.898,
      "step": 84375
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4930665493011475,
      "learning_rate": 0.0004224136172064421,
      "loss": 2.7361,
      "step": 84376
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8195475339889526,
      "learning_rate": 0.00042240988266870706,
      "loss": 3.2365,
      "step": 84377
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4132519960403442,
      "learning_rate": 0.0004224061481082136,
      "loss": 3.0037,
      "step": 84378
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8059524297714233,
      "learning_rate": 0.00042240241352496264,
      "loss": 3.2283,
      "step": 84379
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9026693105697632,
      "learning_rate": 0.00042239867891895463,
      "loss": 2.7438,
      "step": 84380
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6172959804534912,
      "learning_rate": 0.00042239494429019056,
      "loss": 3.0747,
      "step": 84381
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.718018651008606,
      "learning_rate": 0.00042239120963867083,
      "loss": 2.9749,
      "step": 84382
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6107097864151,
      "learning_rate": 0.00042238747496439635,
      "loss": 3.1691,
      "step": 84383
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7192891836166382,
      "learning_rate": 0.0004223837402673676,
      "loss": 3.128,
      "step": 84384
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.460437297821045,
      "learning_rate": 0.00042238000554758553,
      "loss": 2.9253,
      "step": 84385
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6309024095535278,
      "learning_rate": 0.0004223762708050507,
      "loss": 2.8811,
      "step": 84386
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9661351442337036,
      "learning_rate": 0.00042237253603976375,
      "loss": 3.0911,
      "step": 84387
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.6209888458251953,
      "learning_rate": 0.0004223688012517255,
      "loss": 2.9367,
      "step": 84388
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.600087285041809,
      "learning_rate": 0.00042236506644093653,
      "loss": 3.0326,
      "step": 84389
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4350701570510864,
      "learning_rate": 0.00042236133160739763,
      "loss": 3.1562,
      "step": 84390
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6887080669403076,
      "learning_rate": 0.0004223575967511094,
      "loss": 3.0706,
      "step": 84391
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6313986778259277,
      "learning_rate": 0.0004223538618720726,
      "loss": 2.9065,
      "step": 84392
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.005178928375244,
      "learning_rate": 0.0004223501269702879,
      "loss": 3.0214,
      "step": 84393
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8570812940597534,
      "learning_rate": 0.00042234639204575603,
      "loss": 3.0439,
      "step": 84394
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.802726149559021,
      "learning_rate": 0.00042234265709847766,
      "loss": 2.9989,
      "step": 84395
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.8532052040100098,
      "learning_rate": 0.00042233892212845337,
      "loss": 2.8085,
      "step": 84396
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0922091007232666,
      "learning_rate": 0.00042233518713568415,
      "loss": 2.9073,
      "step": 84397
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6380009651184082,
      "learning_rate": 0.00042233145212017037,
      "loss": 3.1189,
      "step": 84398
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5337039232254028,
      "learning_rate": 0.0004223277170819129,
      "loss": 2.9173,
      "step": 84399
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.4547030925750732,
      "learning_rate": 0.0004223239820209124,
      "loss": 2.845,
      "step": 84400
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5676908493041992,
      "learning_rate": 0.0004223202469371696,
      "loss": 2.9927,
      "step": 84401
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5833779573440552,
      "learning_rate": 0.0004223165118306851,
      "loss": 3.0214,
      "step": 84402
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.690484881401062,
      "learning_rate": 0.0004223127767014597,
      "loss": 3.1327,
      "step": 84403
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.145920753479004,
      "learning_rate": 0.00042230904154949404,
      "loss": 3.259,
      "step": 84404
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4701157808303833,
      "learning_rate": 0.0004223053063747888,
      "loss": 3.0074,
      "step": 84405
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.8302206993103027,
      "learning_rate": 0.0004223015711773447,
      "loss": 2.988,
      "step": 84406
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.9828941822052,
      "learning_rate": 0.0004222978359571624,
      "loss": 3.064,
      "step": 84407
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.59307599067688,
      "learning_rate": 0.00042229410071424264,
      "loss": 2.9921,
      "step": 84408
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9119856357574463,
      "learning_rate": 0.0004222903654485862,
      "loss": 3.2262,
      "step": 84409
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.5966219902038574,
      "learning_rate": 0.0004222866301601936,
      "loss": 3.0326,
      "step": 84410
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.5695059299468994,
      "learning_rate": 0.00042228289484906546,
      "loss": 3.1197,
      "step": 84411
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6205517053604126,
      "learning_rate": 0.00042227915951520287,
      "loss": 2.9535,
      "step": 84412
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8731149435043335,
      "learning_rate": 0.0004222754241586061,
      "loss": 3.1078,
      "step": 84413
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8454424142837524,
      "learning_rate": 0.000422271688779276,
      "loss": 3.1421,
      "step": 84414
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.760822057723999,
      "learning_rate": 0.0004222679533772134,
      "loss": 2.9557,
      "step": 84415
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7950828075408936,
      "learning_rate": 0.0004222642179524189,
      "loss": 3.1034,
      "step": 84416
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2986762523651123,
      "learning_rate": 0.00042226048250489307,
      "loss": 3.0336,
      "step": 84417
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0141589641571045,
      "learning_rate": 0.0004222567470346368,
      "loss": 3.057,
      "step": 84418
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.795997142791748,
      "learning_rate": 0.0004222530115416507,
      "loss": 3.0269,
      "step": 84419
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5841046571731567,
      "learning_rate": 0.0004222492760259353,
      "loss": 2.8184,
      "step": 84420
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.9002461433410645,
      "learning_rate": 0.00042224554048749164,
      "loss": 2.8627,
      "step": 84421
    },
    {
      "epoch": 1.1,
      "grad_norm": 4.228335380554199,
      "learning_rate": 0.0004222418049263201,
      "loss": 3.1263,
      "step": 84422
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.059642791748047,
      "learning_rate": 0.0004222380693424215,
      "loss": 3.0027,
      "step": 84423
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.025343179702759,
      "learning_rate": 0.00042223433373579666,
      "loss": 2.9901,
      "step": 84424
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7579848766326904,
      "learning_rate": 0.00042223059810644604,
      "loss": 3.2459,
      "step": 84425
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5019304752349854,
      "learning_rate": 0.00042222686245437045,
      "loss": 3.0143,
      "step": 84426
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2036993503570557,
      "learning_rate": 0.0004222231267795707,
      "loss": 2.9489,
      "step": 84427
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.499610424041748,
      "learning_rate": 0.0004222193910820472,
      "loss": 2.7947,
      "step": 84428
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.896684169769287,
      "learning_rate": 0.0004222156553618009,
      "loss": 3.0405,
      "step": 84429
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.869439721107483,
      "learning_rate": 0.0004222119196188324,
      "loss": 3.1313,
      "step": 84430
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.658060908317566,
      "learning_rate": 0.0004222081838531423,
      "loss": 2.8791,
      "step": 84431
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.510757327079773,
      "learning_rate": 0.0004222044480647315,
      "loss": 3.2126,
      "step": 84432
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.176024913787842,
      "learning_rate": 0.00042220071225360057,
      "loss": 3.0163,
      "step": 84433
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9870628118515015,
      "learning_rate": 0.0004221969764197502,
      "loss": 3.0128,
      "step": 84434
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1209676265716553,
      "learning_rate": 0.0004221932405631811,
      "loss": 2.9349,
      "step": 84435
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6477590799331665,
      "learning_rate": 0.000422189504683894,
      "loss": 3.1801,
      "step": 84436
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7795661687850952,
      "learning_rate": 0.00042218576878188955,
      "loss": 2.9703,
      "step": 84437
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.827418565750122,
      "learning_rate": 0.00042218203285716843,
      "loss": 3.0104,
      "step": 84438
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8931386470794678,
      "learning_rate": 0.00042217829690973144,
      "loss": 3.2335,
      "step": 84439
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.34443736076355,
      "learning_rate": 0.0004221745609395791,
      "loss": 3.0627,
      "step": 84440
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6230394840240479,
      "learning_rate": 0.00042217082494671226,
      "loss": 2.9784,
      "step": 84441
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7316325902938843,
      "learning_rate": 0.00042216708893113154,
      "loss": 3.144,
      "step": 84442
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.478665351867676,
      "learning_rate": 0.00042216335289283765,
      "loss": 3.0869,
      "step": 84443
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.4482123851776123,
      "learning_rate": 0.0004221596168318313,
      "loss": 3.0633,
      "step": 84444
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1125731468200684,
      "learning_rate": 0.0004221558807481132,
      "loss": 3.0086,
      "step": 84445
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7808152437210083,
      "learning_rate": 0.00042215214464168396,
      "loss": 3.142,
      "step": 84446
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1496951580047607,
      "learning_rate": 0.00042214840851254433,
      "loss": 3.1917,
      "step": 84447
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.24084210395813,
      "learning_rate": 0.00042214467236069495,
      "loss": 3.0508,
      "step": 84448
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.542895793914795,
      "learning_rate": 0.0004221409361861367,
      "loss": 2.9871,
      "step": 84449
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2754273414611816,
      "learning_rate": 0.00042213719998887014,
      "loss": 2.8225,
      "step": 84450
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.743858814239502,
      "learning_rate": 0.0004221334637688959,
      "loss": 2.7306,
      "step": 84451
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7977327108383179,
      "learning_rate": 0.00042212972752621477,
      "loss": 3.0839,
      "step": 84452
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9083513021469116,
      "learning_rate": 0.0004221259912608274,
      "loss": 2.9193,
      "step": 84453
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.919770359992981,
      "learning_rate": 0.00042212225497273453,
      "loss": 3.1899,
      "step": 84454
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.5864429473876953,
      "learning_rate": 0.00042211851866193683,
      "loss": 3.0459,
      "step": 84455
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1415021419525146,
      "learning_rate": 0.00042211478232843496,
      "loss": 2.7445,
      "step": 84456
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8908568620681763,
      "learning_rate": 0.0004221110459722297,
      "loss": 2.9115,
      "step": 84457
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.186809778213501,
      "learning_rate": 0.00042210730959332164,
      "loss": 3.063,
      "step": 84458
    },
    {
      "epoch": 1.1,
      "grad_norm": 5.085407733917236,
      "learning_rate": 0.00042210357319171155,
      "loss": 2.9984,
      "step": 84459
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.4565653800964355,
      "learning_rate": 0.00042209983676740005,
      "loss": 2.8256,
      "step": 84460
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.023020029067993,
      "learning_rate": 0.00042209610032038796,
      "loss": 3.2292,
      "step": 84461
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.597166061401367,
      "learning_rate": 0.0004220923638506758,
      "loss": 2.8727,
      "step": 84462
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.145951747894287,
      "learning_rate": 0.00042208862735826445,
      "loss": 2.7506,
      "step": 84463
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5937341451644897,
      "learning_rate": 0.0004220848908431545,
      "loss": 3.1937,
      "step": 84464
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2085800170898438,
      "learning_rate": 0.0004220811543053466,
      "loss": 3.0865,
      "step": 84465
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9763336181640625,
      "learning_rate": 0.0004220774177448417,
      "loss": 3.093,
      "step": 84466
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6437867879867554,
      "learning_rate": 0.0004220736811616401,
      "loss": 3.0218,
      "step": 84467
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6623879671096802,
      "learning_rate": 0.00042206994455574277,
      "loss": 3.1703,
      "step": 84468
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.792108416557312,
      "learning_rate": 0.0004220662079271504,
      "loss": 3.1452,
      "step": 84469
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.5448267459869385,
      "learning_rate": 0.0004220624712758635,
      "loss": 2.8432,
      "step": 84470
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6363921165466309,
      "learning_rate": 0.00042205873460188296,
      "loss": 2.8693,
      "step": 84471
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8671671152114868,
      "learning_rate": 0.00042205499790520936,
      "loss": 3.0433,
      "step": 84472
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7213448286056519,
      "learning_rate": 0.00042205126118584346,
      "loss": 2.9404,
      "step": 84473
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2714922428131104,
      "learning_rate": 0.00042204752444378586,
      "loss": 2.8753,
      "step": 84474
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.624232292175293,
      "learning_rate": 0.0004220437876790374,
      "loss": 2.941,
      "step": 84475
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8580139875411987,
      "learning_rate": 0.0004220400508915987,
      "loss": 2.9145,
      "step": 84476
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6217803955078125,
      "learning_rate": 0.0004220363140814704,
      "loss": 3.0073,
      "step": 84477
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.523745059967041,
      "learning_rate": 0.00042203257724865326,
      "loss": 2.9292,
      "step": 84478
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4814730882644653,
      "learning_rate": 0.00042202884039314805,
      "loss": 2.7724,
      "step": 84479
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6697115898132324,
      "learning_rate": 0.0004220251035149552,
      "loss": 3.0953,
      "step": 84480
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.836380124092102,
      "learning_rate": 0.0004220213666140757,
      "loss": 3.117,
      "step": 84481
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.515373706817627,
      "learning_rate": 0.00042201762969051014,
      "loss": 3.0746,
      "step": 84482
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5618129968643188,
      "learning_rate": 0.0004220138927442591,
      "loss": 2.8828,
      "step": 84483
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9188683032989502,
      "learning_rate": 0.0004220101557753235,
      "loss": 3.167,
      "step": 84484
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6418112516403198,
      "learning_rate": 0.0004220064187837039,
      "loss": 3.1808,
      "step": 84485
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4643440246582031,
      "learning_rate": 0.00042200268176940085,
      "loss": 2.932,
      "step": 84486
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7104765176773071,
      "learning_rate": 0.00042199894473241527,
      "loss": 3.0292,
      "step": 84487
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8514333963394165,
      "learning_rate": 0.00042199520767274786,
      "loss": 2.9077,
      "step": 84488
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6533066034317017,
      "learning_rate": 0.00042199147059039915,
      "loss": 2.7852,
      "step": 84489
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.5699539184570312,
      "learning_rate": 0.00042198773348536996,
      "loss": 3.0342,
      "step": 84490
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.398061990737915,
      "learning_rate": 0.000421983996357661,
      "loss": 2.938,
      "step": 84491
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2342422008514404,
      "learning_rate": 0.00042198025920727286,
      "loss": 3.034,
      "step": 84492
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.822950839996338,
      "learning_rate": 0.00042197652203420624,
      "loss": 3.046,
      "step": 84493
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6208959817886353,
      "learning_rate": 0.00042197278483846197,
      "loss": 3.2121,
      "step": 84494
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6273322105407715,
      "learning_rate": 0.0004219690476200406,
      "loss": 3.0648,
      "step": 84495
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6586450338363647,
      "learning_rate": 0.0004219653103789428,
      "loss": 2.9549,
      "step": 84496
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5014773607254028,
      "learning_rate": 0.0004219615731151696,
      "loss": 3.0644,
      "step": 84497
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4182482957839966,
      "learning_rate": 0.0004219578358287212,
      "loss": 2.9419,
      "step": 84498
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7063360214233398,
      "learning_rate": 0.0004219540985195986,
      "loss": 3.0574,
      "step": 84499
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0905168056488037,
      "learning_rate": 0.0004219503611878025,
      "loss": 3.1433,
      "step": 84500
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.857324242591858,
      "learning_rate": 0.00042194662383333344,
      "loss": 2.9999,
      "step": 84501
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.700680136680603,
      "learning_rate": 0.00042194288645619227,
      "loss": 2.8157,
      "step": 84502
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.152297019958496,
      "learning_rate": 0.0004219391490563796,
      "loss": 2.9301,
      "step": 84503
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.251204490661621,
      "learning_rate": 0.00042193541163389615,
      "loss": 2.9448,
      "step": 84504
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6947938203811646,
      "learning_rate": 0.0004219316741887425,
      "loss": 3.0599,
      "step": 84505
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.13128924369812,
      "learning_rate": 0.0004219279367209196,
      "loss": 3.0514,
      "step": 84506
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.7859954833984375,
      "learning_rate": 0.0004219241992304279,
      "loss": 2.7842,
      "step": 84507
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7198365926742554,
      "learning_rate": 0.00042192046171726815,
      "loss": 2.9813,
      "step": 84508
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5163156986236572,
      "learning_rate": 0.0004219167241814412,
      "loss": 3.0063,
      "step": 84509
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.903756856918335,
      "learning_rate": 0.00042191298662294755,
      "loss": 2.7419,
      "step": 84510
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.776647686958313,
      "learning_rate": 0.00042190924904178805,
      "loss": 2.8987,
      "step": 84511
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2287473678588867,
      "learning_rate": 0.0004219055114379632,
      "loss": 2.7918,
      "step": 84512
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5633018016815186,
      "learning_rate": 0.00042190177381147396,
      "loss": 2.9445,
      "step": 84513
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5327943563461304,
      "learning_rate": 0.0004218980361623208,
      "loss": 2.9193,
      "step": 84514
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.97515606880188,
      "learning_rate": 0.00042189429849050447,
      "loss": 2.9443,
      "step": 84515
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2867767810821533,
      "learning_rate": 0.00042189056079602573,
      "loss": 2.8197,
      "step": 84516
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8480384349822998,
      "learning_rate": 0.0004218868230788852,
      "loss": 3.1885,
      "step": 84517
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9693878889083862,
      "learning_rate": 0.00042188308533908363,
      "loss": 3.1165,
      "step": 84518
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6964662075042725,
      "learning_rate": 0.00042187934757662173,
      "loss": 2.9241,
      "step": 84519
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8129528760910034,
      "learning_rate": 0.0004218756097915001,
      "loss": 3.1371,
      "step": 84520
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9055156707763672,
      "learning_rate": 0.0004218718719837195,
      "loss": 3.1777,
      "step": 84521
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6240493059158325,
      "learning_rate": 0.00042186813415328065,
      "loss": 3.0591,
      "step": 84522
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5410454273223877,
      "learning_rate": 0.0004218643963001842,
      "loss": 2.8861,
      "step": 84523
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6304421424865723,
      "learning_rate": 0.00042186065842443084,
      "loss": 2.9515,
      "step": 84524
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8193804025650024,
      "learning_rate": 0.00042185692052602137,
      "loss": 3.0377,
      "step": 84525
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6578751802444458,
      "learning_rate": 0.0004218531826049563,
      "loss": 3.1082,
      "step": 84526
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8332980871200562,
      "learning_rate": 0.0004218494446612364,
      "loss": 3.4261,
      "step": 84527
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9983749389648438,
      "learning_rate": 0.0004218457066948625,
      "loss": 3.0703,
      "step": 84528
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.201690673828125,
      "learning_rate": 0.0004218419687058351,
      "loss": 2.8994,
      "step": 84529
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6337411403656006,
      "learning_rate": 0.000421838230694155,
      "loss": 2.9734,
      "step": 84530
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.681598424911499,
      "learning_rate": 0.0004218344926598229,
      "loss": 3.1176,
      "step": 84531
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0993263721466064,
      "learning_rate": 0.0004218307546028394,
      "loss": 2.9777,
      "step": 84532
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.333674430847168,
      "learning_rate": 0.00042182701652320533,
      "loss": 2.9201,
      "step": 84533
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.455118417739868,
      "learning_rate": 0.00042182327842092134,
      "loss": 2.7657,
      "step": 84534
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1455109119415283,
      "learning_rate": 0.000421819540295988,
      "loss": 2.8588,
      "step": 84535
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.526645541191101,
      "learning_rate": 0.0004218158021484062,
      "loss": 3.0371,
      "step": 84536
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.621378779411316,
      "learning_rate": 0.0004218120639781765,
      "loss": 3.0785,
      "step": 84537
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7025039196014404,
      "learning_rate": 0.0004218083257852996,
      "loss": 2.9149,
      "step": 84538
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.542850375175476,
      "learning_rate": 0.0004218045875697763,
      "loss": 3.287,
      "step": 84539
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0079498291015625,
      "learning_rate": 0.0004218008493316073,
      "loss": 2.9171,
      "step": 84540
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.577017068862915,
      "learning_rate": 0.0004217971110707931,
      "loss": 3.2332,
      "step": 84541
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6493247747421265,
      "learning_rate": 0.0004217933727873345,
      "loss": 3.0905,
      "step": 84542
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.696502447128296,
      "learning_rate": 0.0004217896344812323,
      "loss": 3.0254,
      "step": 84543
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.6175992488861084,
      "learning_rate": 0.00042178589615248706,
      "loss": 2.8062,
      "step": 84544
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7797220945358276,
      "learning_rate": 0.0004217821578010995,
      "loss": 2.8044,
      "step": 84545
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.014805555343628,
      "learning_rate": 0.0004217784194270703,
      "loss": 3.1037,
      "step": 84546
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.0918796062469482,
      "learning_rate": 0.0004217746810304003,
      "loss": 3.1502,
      "step": 84547
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7084498405456543,
      "learning_rate": 0.00042177094261109006,
      "loss": 2.9947,
      "step": 84548
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8798491954803467,
      "learning_rate": 0.00042176720416914034,
      "loss": 3.2779,
      "step": 84549
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.050065517425537,
      "learning_rate": 0.00042176346570455177,
      "loss": 3.0331,
      "step": 84550
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4354947805404663,
      "learning_rate": 0.00042175972721732505,
      "loss": 2.9543,
      "step": 84551
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7720911502838135,
      "learning_rate": 0.0004217559887074608,
      "loss": 3.0838,
      "step": 84552
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6765251159667969,
      "learning_rate": 0.0004217522501749599,
      "loss": 3.0596,
      "step": 84553
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2815496921539307,
      "learning_rate": 0.00042174851161982306,
      "loss": 2.8527,
      "step": 84554
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7010990381240845,
      "learning_rate": 0.0004217447730420507,
      "loss": 3.0271,
      "step": 84555
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5743563175201416,
      "learning_rate": 0.0004217410344416438,
      "loss": 3.1141,
      "step": 84556
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5520378351211548,
      "learning_rate": 0.00042173729581860295,
      "loss": 3.3151,
      "step": 84557
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9425294399261475,
      "learning_rate": 0.0004217335571729288,
      "loss": 3.2205,
      "step": 84558
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.727081298828125,
      "learning_rate": 0.00042172981850462207,
      "loss": 3.2709,
      "step": 84559
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.967033863067627,
      "learning_rate": 0.00042172607981368353,
      "loss": 2.8626,
      "step": 84560
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7153830528259277,
      "learning_rate": 0.0004217223411001138,
      "loss": 2.9387,
      "step": 84561
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6823713779449463,
      "learning_rate": 0.00042171860236391354,
      "loss": 3.0275,
      "step": 84562
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.3595919609069824,
      "learning_rate": 0.0004217148636050835,
      "loss": 3.4107,
      "step": 84563
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.732578158378601,
      "learning_rate": 0.00042171112482362443,
      "loss": 2.9194,
      "step": 84564
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.717828392982483,
      "learning_rate": 0.0004217073860195369,
      "loss": 2.9031,
      "step": 84565
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1307156085968018,
      "learning_rate": 0.00042170364719282163,
      "loss": 3.2021,
      "step": 84566
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6645116806030273,
      "learning_rate": 0.0004216999083434795,
      "loss": 2.9322,
      "step": 84567
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1211228370666504,
      "learning_rate": 0.00042169616947151093,
      "loss": 3.1002,
      "step": 84568
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.536952257156372,
      "learning_rate": 0.00042169243057691683,
      "loss": 3.0152,
      "step": 84569
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8685519695281982,
      "learning_rate": 0.0004216886916596978,
      "loss": 3.1039,
      "step": 84570
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.617702603340149,
      "learning_rate": 0.0004216849527198545,
      "loss": 2.9951,
      "step": 84571
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1397643089294434,
      "learning_rate": 0.0004216812137573877,
      "loss": 2.9672,
      "step": 84572
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9484307765960693,
      "learning_rate": 0.000421677474772298,
      "loss": 3.0997,
      "step": 84573
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.732251763343811,
      "learning_rate": 0.0004216737357645862,
      "loss": 2.914,
      "step": 84574
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9793442487716675,
      "learning_rate": 0.00042166999673425297,
      "loss": 2.8087,
      "step": 84575
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.960693597793579,
      "learning_rate": 0.000421666257681299,
      "loss": 2.884,
      "step": 84576
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5503509044647217,
      "learning_rate": 0.000421662518605725,
      "loss": 3.1028,
      "step": 84577
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5182684659957886,
      "learning_rate": 0.0004216587795075316,
      "loss": 3.109,
      "step": 84578
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5568029880523682,
      "learning_rate": 0.00042165504038671947,
      "loss": 3.0684,
      "step": 84579
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0959901809692383,
      "learning_rate": 0.00042165130124328953,
      "loss": 2.9849,
      "step": 84580
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6020526885986328,
      "learning_rate": 0.0004216475620772422,
      "loss": 2.8441,
      "step": 84581
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1964235305786133,
      "learning_rate": 0.0004216438228885783,
      "loss": 2.8545,
      "step": 84582
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8196567296981812,
      "learning_rate": 0.0004216400836772986,
      "loss": 2.7677,
      "step": 84583
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9304652214050293,
      "learning_rate": 0.0004216363444434037,
      "loss": 3.0522,
      "step": 84584
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5988879203796387,
      "learning_rate": 0.00042163260518689416,
      "loss": 3.063,
      "step": 84585
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.7004542350769043,
      "learning_rate": 0.00042162886590777095,
      "loss": 2.6501,
      "step": 84586
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7171918153762817,
      "learning_rate": 0.00042162512660603465,
      "loss": 3.0975,
      "step": 84587
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8718825578689575,
      "learning_rate": 0.00042162138728168584,
      "loss": 3.0604,
      "step": 84588
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.374575614929199,
      "learning_rate": 0.00042161764793472546,
      "loss": 2.9977,
      "step": 84589
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.466703414916992,
      "learning_rate": 0.000421613908565154,
      "loss": 2.9308,
      "step": 84590
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6929221153259277,
      "learning_rate": 0.0004216101691729721,
      "loss": 2.7254,
      "step": 84591
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.922616958618164,
      "learning_rate": 0.0004216064297581808,
      "loss": 2.8244,
      "step": 84592
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0097897052764893,
      "learning_rate": 0.0004216026903207804,
      "loss": 3.152,
      "step": 84593
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.006533622741699,
      "learning_rate": 0.0004215989508607718,
      "loss": 2.7662,
      "step": 84594
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1350059509277344,
      "learning_rate": 0.0004215952113781557,
      "loss": 2.9561,
      "step": 84595
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.019408702850342,
      "learning_rate": 0.0004215914718729327,
      "loss": 2.8654,
      "step": 84596
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7993872165679932,
      "learning_rate": 0.0004215877323451036,
      "loss": 2.9145,
      "step": 84597
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.986240029335022,
      "learning_rate": 0.00042158399279466907,
      "loss": 3.2368,
      "step": 84598
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7119386196136475,
      "learning_rate": 0.0004215802532216297,
      "loss": 2.9219,
      "step": 84599
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.4425430297851562,
      "learning_rate": 0.00042157651362598626,
      "loss": 2.9325,
      "step": 84600
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8759098052978516,
      "learning_rate": 0.00042157277400773957,
      "loss": 2.9,
      "step": 84601
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.6338300704956055,
      "learning_rate": 0.0004215690343668901,
      "loss": 3.1619,
      "step": 84602
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8018290996551514,
      "learning_rate": 0.0004215652947034387,
      "loss": 3.2614,
      "step": 84603
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.629291296005249,
      "learning_rate": 0.00042156155501738593,
      "loss": 2.9911,
      "step": 84604
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0531275272369385,
      "learning_rate": 0.00042155781530873267,
      "loss": 3.0213,
      "step": 84605
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.979851722717285,
      "learning_rate": 0.0004215540755774795,
      "loss": 2.9745,
      "step": 84606
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8172545433044434,
      "learning_rate": 0.0004215503358236271,
      "loss": 3.1434,
      "step": 84607
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8541215658187866,
      "learning_rate": 0.0004215465960471762,
      "loss": 3.0655,
      "step": 84608
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.608513355255127,
      "learning_rate": 0.00042154285624812753,
      "loss": 2.9702,
      "step": 84609
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.4425148963928223,
      "learning_rate": 0.00042153911642648175,
      "loss": 3.0668,
      "step": 84610
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.296099901199341,
      "learning_rate": 0.0004215353765822395,
      "loss": 3.0251,
      "step": 84611
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0222244262695312,
      "learning_rate": 0.0004215316367154016,
      "loss": 3.0564,
      "step": 84612
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7589789628982544,
      "learning_rate": 0.0004215278968259686,
      "loss": 3.0618,
      "step": 84613
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.631976842880249,
      "learning_rate": 0.0004215241569139413,
      "loss": 3.1156,
      "step": 84614
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.677005648612976,
      "learning_rate": 0.00042152041697932037,
      "loss": 2.9433,
      "step": 84615
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.4520885944366455,
      "learning_rate": 0.00042151667702210654,
      "loss": 2.877,
      "step": 84616
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4448466300964355,
      "learning_rate": 0.0004215129370423004,
      "loss": 2.7032,
      "step": 84617
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4441930055618286,
      "learning_rate": 0.00042150919703990276,
      "loss": 2.9556,
      "step": 84618
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.864372968673706,
      "learning_rate": 0.00042150545701491427,
      "loss": 2.885,
      "step": 84619
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6802372932434082,
      "learning_rate": 0.00042150171696733556,
      "loss": 2.8493,
      "step": 84620
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5664821863174438,
      "learning_rate": 0.00042149797689716744,
      "loss": 2.7901,
      "step": 84621
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6605459451675415,
      "learning_rate": 0.00042149423680441057,
      "loss": 2.9513,
      "step": 84622
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0289931297302246,
      "learning_rate": 0.0004214904966890656,
      "loss": 3.0011,
      "step": 84623
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.055027484893799,
      "learning_rate": 0.00042148675655113315,
      "loss": 2.7753,
      "step": 84624
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6707183122634888,
      "learning_rate": 0.0004214830163906142,
      "loss": 2.9506,
      "step": 84625
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8039195537567139,
      "learning_rate": 0.00042147927620750917,
      "loss": 3.0167,
      "step": 84626
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.3428292274475098,
      "learning_rate": 0.0004214755360018188,
      "loss": 2.9465,
      "step": 84627
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2067711353302,
      "learning_rate": 0.000421471795773544,
      "loss": 2.7717,
      "step": 84628
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6137361526489258,
      "learning_rate": 0.0004214680555226851,
      "loss": 2.9816,
      "step": 84629
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8259875774383545,
      "learning_rate": 0.00042146431524924313,
      "loss": 2.7501,
      "step": 84630
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.533393383026123,
      "learning_rate": 0.0004214605749532187,
      "loss": 2.7568,
      "step": 84631
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.8841605186462402,
      "learning_rate": 0.0004214568346346123,
      "loss": 3.0111,
      "step": 84632
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.256216049194336,
      "learning_rate": 0.0004214530942934248,
      "loss": 3.2104,
      "step": 84633
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.7520394325256348,
      "learning_rate": 0.00042144935392965697,
      "loss": 3.052,
      "step": 84634
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.4381635189056396,
      "learning_rate": 0.00042144561354330937,
      "loss": 3.0807,
      "step": 84635
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.159569025039673,
      "learning_rate": 0.0004214418731343827,
      "loss": 3.0478,
      "step": 84636
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4801454544067383,
      "learning_rate": 0.00042143813270287777,
      "loss": 3.0534,
      "step": 84637
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5372505187988281,
      "learning_rate": 0.0004214343922487951,
      "loss": 2.6565,
      "step": 84638
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.894877314567566,
      "learning_rate": 0.00042143065177213565,
      "loss": 3.1056,
      "step": 84639
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6953898668289185,
      "learning_rate": 0.0004214269112728998,
      "loss": 3.0723,
      "step": 84640
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7399084568023682,
      "learning_rate": 0.0004214231707510884,
      "loss": 2.9372,
      "step": 84641
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.69599449634552,
      "learning_rate": 0.0004214194302067022,
      "loss": 2.7574,
      "step": 84642
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6193971633911133,
      "learning_rate": 0.00042141568963974187,
      "loss": 3.0603,
      "step": 84643
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.006748676300049,
      "learning_rate": 0.000421411949050208,
      "loss": 2.8891,
      "step": 84644
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9113433361053467,
      "learning_rate": 0.0004214082084381013,
      "loss": 3.12,
      "step": 84645
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5772387981414795,
      "learning_rate": 0.0004214044678034227,
      "loss": 3.0117,
      "step": 84646
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6597083806991577,
      "learning_rate": 0.00042140072714617254,
      "loss": 2.8679,
      "step": 84647
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.55583119392395,
      "learning_rate": 0.00042139698646635174,
      "loss": 3.0591,
      "step": 84648
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5934793949127197,
      "learning_rate": 0.00042139324576396104,
      "loss": 2.9519,
      "step": 84649
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.799629807472229,
      "learning_rate": 0.000421389505039001,
      "loss": 3.2271,
      "step": 84650
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.847238302230835,
      "learning_rate": 0.0004213857642914723,
      "loss": 2.8531,
      "step": 84651
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2704179286956787,
      "learning_rate": 0.0004213820235213757,
      "loss": 2.8821,
      "step": 84652
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.04734206199646,
      "learning_rate": 0.00042137828272871195,
      "loss": 3.2355,
      "step": 84653
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.169438600540161,
      "learning_rate": 0.0004213745419134817,
      "loss": 3.2156,
      "step": 84654
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1814804077148438,
      "learning_rate": 0.00042137080107568556,
      "loss": 3.2032,
      "step": 84655
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.3854987621307373,
      "learning_rate": 0.00042136706021532434,
      "loss": 3.095,
      "step": 84656
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.636381983757019,
      "learning_rate": 0.00042136331933239864,
      "loss": 3.0021,
      "step": 84657
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.726328730583191,
      "learning_rate": 0.0004213595784269093,
      "loss": 2.8934,
      "step": 84658
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9114587306976318,
      "learning_rate": 0.0004213558374988569,
      "loss": 3.1184,
      "step": 84659
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.604227304458618,
      "learning_rate": 0.0004213520965482421,
      "loss": 2.9986,
      "step": 84660
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9323151111602783,
      "learning_rate": 0.0004213483555750657,
      "loss": 2.8908,
      "step": 84661
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9130899906158447,
      "learning_rate": 0.0004213446145793284,
      "loss": 2.8584,
      "step": 84662
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8816497325897217,
      "learning_rate": 0.0004213408735610308,
      "loss": 3.1672,
      "step": 84663
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4586384296417236,
      "learning_rate": 0.0004213371325201735,
      "loss": 2.877,
      "step": 84664
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.559550166130066,
      "learning_rate": 0.00042133339145675755,
      "loss": 2.903,
      "step": 84665
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4430568218231201,
      "learning_rate": 0.00042132965037078334,
      "loss": 3.1048,
      "step": 84666
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9716471433639526,
      "learning_rate": 0.0004213259092622516,
      "loss": 2.9433,
      "step": 84667
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7036287784576416,
      "learning_rate": 0.0004213221681311632,
      "loss": 3.1816,
      "step": 84668
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6829110383987427,
      "learning_rate": 0.00042131842697751867,
      "loss": 3.0743,
      "step": 84669
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6614656448364258,
      "learning_rate": 0.00042131468580131867,
      "loss": 2.9794,
      "step": 84670
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7293494939804077,
      "learning_rate": 0.0004213109446025642,
      "loss": 2.9394,
      "step": 84671
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.155106782913208,
      "learning_rate": 0.0004213072033812555,
      "loss": 3.1097,
      "step": 84672
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7186405658721924,
      "learning_rate": 0.0004213034621373936,
      "loss": 3.0031,
      "step": 84673
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6898725032806396,
      "learning_rate": 0.0004212997208709791,
      "loss": 2.762,
      "step": 84674
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5253463983535767,
      "learning_rate": 0.00042129597958201275,
      "loss": 2.9904,
      "step": 84675
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8220194578170776,
      "learning_rate": 0.00042129223827049505,
      "loss": 2.9874,
      "step": 84676
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7125353813171387,
      "learning_rate": 0.0004212884969364269,
      "loss": 2.9513,
      "step": 84677
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4046618938446045,
      "learning_rate": 0.00042128475557980897,
      "loss": 2.9393,
      "step": 84678
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9898830652236938,
      "learning_rate": 0.0004212810142006419,
      "loss": 2.967,
      "step": 84679
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9530951976776123,
      "learning_rate": 0.00042127727279892636,
      "loss": 3.0064,
      "step": 84680
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7896584272384644,
      "learning_rate": 0.0004212735313746631,
      "loss": 2.885,
      "step": 84681
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5432019233703613,
      "learning_rate": 0.0004212697899278528,
      "loss": 3.0371,
      "step": 84682
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0689923763275146,
      "learning_rate": 0.00042126604845849617,
      "loss": 2.5682,
      "step": 84683
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5329593420028687,
      "learning_rate": 0.0004212623069665939,
      "loss": 3.2236,
      "step": 84684
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7376935482025146,
      "learning_rate": 0.0004212585654521466,
      "loss": 3.2033,
      "step": 84685
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.180528163909912,
      "learning_rate": 0.0004212548239151552,
      "loss": 2.9367,
      "step": 84686
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.679174780845642,
      "learning_rate": 0.0004212510823556201,
      "loss": 3.2032,
      "step": 84687
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7918511629104614,
      "learning_rate": 0.00042124734077354217,
      "loss": 3.2005,
      "step": 84688
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7989329099655151,
      "learning_rate": 0.00042124359916892207,
      "loss": 3.1345,
      "step": 84689
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7187385559082031,
      "learning_rate": 0.00042123985754176054,
      "loss": 3.1308,
      "step": 84690
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9267280101776123,
      "learning_rate": 0.0004212361158920582,
      "loss": 3.0721,
      "step": 84691
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8386270999908447,
      "learning_rate": 0.00042123237421981584,
      "loss": 2.9036,
      "step": 84692
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.249499559402466,
      "learning_rate": 0.0004212286325250339,
      "loss": 2.8285,
      "step": 84693
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6050012111663818,
      "learning_rate": 0.00042122489080771345,
      "loss": 2.7457,
      "step": 84694
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.504920482635498,
      "learning_rate": 0.000421221149067855,
      "loss": 3.2105,
      "step": 84695
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7772892713546753,
      "learning_rate": 0.00042121740730545915,
      "loss": 2.9933,
      "step": 84696
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8677650690078735,
      "learning_rate": 0.0004212136655205268,
      "loss": 2.9718,
      "step": 84697
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6917890310287476,
      "learning_rate": 0.0004212099237130585,
      "loss": 3.0004,
      "step": 84698
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4633617401123047,
      "learning_rate": 0.000421206181883055,
      "loss": 3.0814,
      "step": 84699
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.306973695755005,
      "learning_rate": 0.00042120244003051693,
      "loss": 3.1582,
      "step": 84700
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.455737590789795,
      "learning_rate": 0.0004211986981554451,
      "loss": 2.992,
      "step": 84701
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.155521869659424,
      "learning_rate": 0.00042119495625784006,
      "loss": 3.0893,
      "step": 84702
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.3087553977966309,
      "learning_rate": 0.00042119121433770266,
      "loss": 2.8258,
      "step": 84703
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5520678758621216,
      "learning_rate": 0.0004211874723950335,
      "loss": 2.8035,
      "step": 84704
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.281125783920288,
      "learning_rate": 0.0004211837304298333,
      "loss": 3.0198,
      "step": 84705
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4529012441635132,
      "learning_rate": 0.0004211799884421028,
      "loss": 3.0404,
      "step": 84706
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.199326992034912,
      "learning_rate": 0.00042117624643184265,
      "loss": 2.753,
      "step": 84707
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0164406299591064,
      "learning_rate": 0.0004211725043990535,
      "loss": 3.1133,
      "step": 84708
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.0570859909057617,
      "learning_rate": 0.0004211687623437361,
      "loss": 3.1824,
      "step": 84709
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8850321769714355,
      "learning_rate": 0.00042116502026589116,
      "loss": 2.9623,
      "step": 84710
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.195388078689575,
      "learning_rate": 0.00042116127816551935,
      "loss": 2.944,
      "step": 84711
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.653947114944458,
      "learning_rate": 0.00042115753604262134,
      "loss": 2.935,
      "step": 84712
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.550675868988037,
      "learning_rate": 0.0004211537938971979,
      "loss": 2.9448,
      "step": 84713
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.711913824081421,
      "learning_rate": 0.0004211500517292497,
      "loss": 3.0447,
      "step": 84714
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4066367149353027,
      "learning_rate": 0.0004211463095387774,
      "loss": 3.1433,
      "step": 84715
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7952126264572144,
      "learning_rate": 0.0004211425673257816,
      "loss": 3.1147,
      "step": 84716
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5590394735336304,
      "learning_rate": 0.0004211388250902633,
      "loss": 3.1385,
      "step": 84717
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7343506813049316,
      "learning_rate": 0.00042113508283222285,
      "loss": 3.1421,
      "step": 84718
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6696048974990845,
      "learning_rate": 0.00042113134055166124,
      "loss": 3.1578,
      "step": 84719
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.6832449436187744,
      "learning_rate": 0.0004211275982485789,
      "loss": 2.7428,
      "step": 84720
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.04457426071167,
      "learning_rate": 0.00042112385592297673,
      "loss": 2.7781,
      "step": 84721
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6737288236618042,
      "learning_rate": 0.00042112011357485534,
      "loss": 3.078,
      "step": 84722
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0325329303741455,
      "learning_rate": 0.0004211163712042154,
      "loss": 2.8042,
      "step": 84723
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0246317386627197,
      "learning_rate": 0.0004211126288110577,
      "loss": 3.1541,
      "step": 84724
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7919288873672485,
      "learning_rate": 0.00042110888639538283,
      "loss": 2.799,
      "step": 84725
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5068316459655762,
      "learning_rate": 0.0004211051439571915,
      "loss": 3.0103,
      "step": 84726
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.9929683208465576,
      "learning_rate": 0.00042110140149648456,
      "loss": 3.0682,
      "step": 84727
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.7171154022216797,
      "learning_rate": 0.0004210976590132624,
      "loss": 3.0737,
      "step": 84728
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.899832844734192,
      "learning_rate": 0.00042109391650752606,
      "loss": 2.9989,
      "step": 84729
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8150523900985718,
      "learning_rate": 0.0004210901739792761,
      "loss": 2.9892,
      "step": 84730
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.4640896320343018,
      "learning_rate": 0.000421086431428513,
      "loss": 3.1131,
      "step": 84731
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.4149186611175537,
      "learning_rate": 0.00042108268885523775,
      "loss": 2.7878,
      "step": 84732
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.530968427658081,
      "learning_rate": 0.0004210789462594509,
      "loss": 3.246,
      "step": 84733
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.831279993057251,
      "learning_rate": 0.00042107520364115334,
      "loss": 2.9849,
      "step": 84734
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6155674457550049,
      "learning_rate": 0.00042107146100034545,
      "loss": 2.9297,
      "step": 84735
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.460763692855835,
      "learning_rate": 0.0004210677183370281,
      "loss": 3.2643,
      "step": 84736
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.508228063583374,
      "learning_rate": 0.0004210639756512021,
      "loss": 2.9975,
      "step": 84737
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9984346628189087,
      "learning_rate": 0.00042106023294286797,
      "loss": 2.9675,
      "step": 84738
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8334157466888428,
      "learning_rate": 0.00042105649021202636,
      "loss": 2.9297,
      "step": 84739
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6913036108016968,
      "learning_rate": 0.00042105274745867817,
      "loss": 2.9359,
      "step": 84740
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9442400932312012,
      "learning_rate": 0.000421049004682824,
      "loss": 3.1996,
      "step": 84741
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.717527151107788,
      "learning_rate": 0.0004210452618844645,
      "loss": 3.1717,
      "step": 84742
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0661723613739014,
      "learning_rate": 0.00042104151906360035,
      "loss": 3.0256,
      "step": 84743
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7566965818405151,
      "learning_rate": 0.0004210377762202324,
      "loss": 3.3603,
      "step": 84744
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6514602899551392,
      "learning_rate": 0.00042103403335436114,
      "loss": 2.7977,
      "step": 84745
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.699749231338501,
      "learning_rate": 0.00042103029046598744,
      "loss": 3.0065,
      "step": 84746
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2389976978302,
      "learning_rate": 0.0004210265475551119,
      "loss": 2.8815,
      "step": 84747
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6908013820648193,
      "learning_rate": 0.0004210228046217352,
      "loss": 3.1321,
      "step": 84748
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.871829867362976,
      "learning_rate": 0.0004210190616658581,
      "loss": 3.0086,
      "step": 84749
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8631699085235596,
      "learning_rate": 0.0004210153186874813,
      "loss": 3.0212,
      "step": 84750
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7047233581542969,
      "learning_rate": 0.0004210115756866055,
      "loss": 3.0339,
      "step": 84751
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.780113697052002,
      "learning_rate": 0.0004210078326632312,
      "loss": 3.2614,
      "step": 84752
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4748982191085815,
      "learning_rate": 0.00042100408961735943,
      "loss": 3.285,
      "step": 84753
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.636315941810608,
      "learning_rate": 0.00042100034654899066,
      "loss": 2.7983,
      "step": 84754
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5867385864257812,
      "learning_rate": 0.0004209966034581257,
      "loss": 2.9824,
      "step": 84755
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.4186818599700928,
      "learning_rate": 0.0004209928603447651,
      "loss": 2.9502,
      "step": 84756
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.4174787998199463,
      "learning_rate": 0.00042098911720890966,
      "loss": 2.9203,
      "step": 84757
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.135652542114258,
      "learning_rate": 0.0004209853740505601,
      "loss": 2.9243,
      "step": 84758
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.763774037361145,
      "learning_rate": 0.0004209816308697171,
      "loss": 3.2192,
      "step": 84759
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.013314962387085,
      "learning_rate": 0.0004209778876663812,
      "loss": 3.0814,
      "step": 84760
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0004682540893555,
      "learning_rate": 0.00042097414444055333,
      "loss": 2.9819,
      "step": 84761
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7516378164291382,
      "learning_rate": 0.00042097040119223406,
      "loss": 3.0417,
      "step": 84762
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8838821649551392,
      "learning_rate": 0.00042096665792142415,
      "loss": 2.906,
      "step": 84763
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.4536008834838867,
      "learning_rate": 0.00042096291462812415,
      "loss": 3.2176,
      "step": 84764
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.7200639247894287,
      "learning_rate": 0.0004209591713123349,
      "loss": 3.2037,
      "step": 84765
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.647227168083191,
      "learning_rate": 0.0004209554279740571,
      "loss": 3.333,
      "step": 84766
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7941277027130127,
      "learning_rate": 0.00042095168461329143,
      "loss": 3.0618,
      "step": 84767
    },
    {
      "epoch": 1.1,
      "grad_norm": 4.732879161834717,
      "learning_rate": 0.00042094794123003854,
      "loss": 3.0535,
      "step": 84768
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.515385866165161,
      "learning_rate": 0.00042094419782429907,
      "loss": 3.1153,
      "step": 84769
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.83322274684906,
      "learning_rate": 0.0004209404543960738,
      "loss": 2.9322,
      "step": 84770
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.798496127128601,
      "learning_rate": 0.0004209367109453635,
      "loss": 2.8893,
      "step": 84771
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8902015686035156,
      "learning_rate": 0.0004209329674721688,
      "loss": 3.0448,
      "step": 84772
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.930715560913086,
      "learning_rate": 0.00042092922397649025,
      "loss": 2.9712,
      "step": 84773
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.516495943069458,
      "learning_rate": 0.0004209254804583288,
      "loss": 2.8782,
      "step": 84774
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9690266847610474,
      "learning_rate": 0.0004209217369176849,
      "loss": 2.9152,
      "step": 84775
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.605085849761963,
      "learning_rate": 0.0004209179933545594,
      "loss": 2.9856,
      "step": 84776
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.5396029949188232,
      "learning_rate": 0.00042091424976895296,
      "loss": 2.9974,
      "step": 84777
    },
    {
      "epoch": 1.1,
      "grad_norm": 5.004879951477051,
      "learning_rate": 0.0004209105061608664,
      "loss": 3.0253,
      "step": 84778
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.6935360431671143,
      "learning_rate": 0.00042090676253030016,
      "loss": 2.8784,
      "step": 84779
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5419645309448242,
      "learning_rate": 0.0004209030188772551,
      "loss": 3.0615,
      "step": 84780
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.297861099243164,
      "learning_rate": 0.0004208992752017319,
      "loss": 3.2911,
      "step": 84781
    },
    {
      "epoch": 1.1,
      "grad_norm": 5.752241611480713,
      "learning_rate": 0.00042089553150373123,
      "loss": 3.0517,
      "step": 84782
    },
    {
      "epoch": 1.1,
      "grad_norm": 4.746092319488525,
      "learning_rate": 0.0004208917877832538,
      "loss": 3.269,
      "step": 84783
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.258943557739258,
      "learning_rate": 0.0004208880440403004,
      "loss": 3.0277,
      "step": 84784
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.230452299118042,
      "learning_rate": 0.0004208843002748715,
      "loss": 2.9818,
      "step": 84785
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.8824806213378906,
      "learning_rate": 0.00042088055648696794,
      "loss": 2.934,
      "step": 84786
    },
    {
      "epoch": 1.1,
      "grad_norm": 5.105323791503906,
      "learning_rate": 0.0004208768126765905,
      "loss": 2.8989,
      "step": 84787
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.524545431137085,
      "learning_rate": 0.0004208730688437397,
      "loss": 3.1811,
      "step": 84788
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.668846845626831,
      "learning_rate": 0.00042086932498841637,
      "loss": 2.9573,
      "step": 84789
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0795865058898926,
      "learning_rate": 0.0004208655811106211,
      "loss": 3.2307,
      "step": 84790
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.1817591190338135,
      "learning_rate": 0.0004208618372103546,
      "loss": 3.0637,
      "step": 84791
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.80044686794281,
      "learning_rate": 0.0004208580932876177,
      "loss": 2.9036,
      "step": 84792
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5878381729125977,
      "learning_rate": 0.0004208543493424109,
      "loss": 3.3159,
      "step": 84793
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6154859066009521,
      "learning_rate": 0.00042085060537473505,
      "loss": 3.1459,
      "step": 84794
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.3753087520599365,
      "learning_rate": 0.0004208468613845907,
      "loss": 2.7752,
      "step": 84795
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.4607478380203247,
      "learning_rate": 0.00042084311737197884,
      "loss": 3.155,
      "step": 84796
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8325016498565674,
      "learning_rate": 0.0004208393733368998,
      "loss": 3.1614,
      "step": 84797
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7266926765441895,
      "learning_rate": 0.00042083562927935453,
      "loss": 2.9796,
      "step": 84798
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.3251590728759766,
      "learning_rate": 0.0004208318851993436,
      "loss": 2.9843,
      "step": 84799
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6011018753051758,
      "learning_rate": 0.0004208281410968677,
      "loss": 3.2836,
      "step": 84800
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2859342098236084,
      "learning_rate": 0.0004208243969719276,
      "loss": 3.1005,
      "step": 84801
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9037379026412964,
      "learning_rate": 0.000420820652824524,
      "loss": 3.0199,
      "step": 84802
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5440863370895386,
      "learning_rate": 0.00042081690865465745,
      "loss": 3.2619,
      "step": 84803
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5904741287231445,
      "learning_rate": 0.00042081316446232887,
      "loss": 2.9899,
      "step": 84804
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9465618133544922,
      "learning_rate": 0.0004208094202475388,
      "loss": 2.9803,
      "step": 84805
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.579142451286316,
      "learning_rate": 0.000420805676010288,
      "loss": 2.8441,
      "step": 84806
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6855742931365967,
      "learning_rate": 0.0004208019317505771,
      "loss": 3.0114,
      "step": 84807
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.3746933937072754,
      "learning_rate": 0.0004207981874684069,
      "loss": 2.8576,
      "step": 84808
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2167742252349854,
      "learning_rate": 0.000420794443163778,
      "loss": 2.9589,
      "step": 84809
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6465532779693604,
      "learning_rate": 0.0004207906988366911,
      "loss": 3.0975,
      "step": 84810
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8015477657318115,
      "learning_rate": 0.000420786954487147,
      "loss": 2.898,
      "step": 84811
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7823704481124878,
      "learning_rate": 0.0004207832101151463,
      "loss": 2.9443,
      "step": 84812
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5951095819473267,
      "learning_rate": 0.00042077946572068975,
      "loss": 2.9899,
      "step": 84813
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.598246455192566,
      "learning_rate": 0.0004207757213037779,
      "loss": 2.9928,
      "step": 84814
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.2292392253875732,
      "learning_rate": 0.0004207719768644117,
      "loss": 2.8032,
      "step": 84815
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.669831395149231,
      "learning_rate": 0.0004207682324025916,
      "loss": 3.2145,
      "step": 84816
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.027181386947632,
      "learning_rate": 0.00042076448791831844,
      "loss": 2.9484,
      "step": 84817
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7771551609039307,
      "learning_rate": 0.0004207607434115929,
      "loss": 3.0178,
      "step": 84818
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6680976152420044,
      "learning_rate": 0.0004207569988824157,
      "loss": 2.9906,
      "step": 84819
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7093356847763062,
      "learning_rate": 0.00042075325433078744,
      "loss": 2.9342,
      "step": 84820
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6975047588348389,
      "learning_rate": 0.0004207495097567089,
      "loss": 3.0828,
      "step": 84821
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5003807544708252,
      "learning_rate": 0.0004207457651601807,
      "loss": 2.9811,
      "step": 84822
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5711328983306885,
      "learning_rate": 0.0004207420205412036,
      "loss": 2.9034,
      "step": 84823
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.186976671218872,
      "learning_rate": 0.0004207382758997783,
      "loss": 2.8218,
      "step": 84824
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.79703950881958,
      "learning_rate": 0.0004207345312359055,
      "loss": 2.9537,
      "step": 84825
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7334223985671997,
      "learning_rate": 0.0004207307865495858,
      "loss": 2.8816,
      "step": 84826
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7213674783706665,
      "learning_rate": 0.00042072704184082006,
      "loss": 3.0721,
      "step": 84827
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7669886350631714,
      "learning_rate": 0.0004207232971096088,
      "loss": 2.9836,
      "step": 84828
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.9484777450561523,
      "learning_rate": 0.0004207195523559528,
      "loss": 2.8472,
      "step": 84829
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8453950881958008,
      "learning_rate": 0.0004207158075798528,
      "loss": 3.0634,
      "step": 84830
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8191238641738892,
      "learning_rate": 0.0004207120627813094,
      "loss": 2.8154,
      "step": 84831
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.6449482440948486,
      "learning_rate": 0.00042070831796032343,
      "loss": 3.1683,
      "step": 84832
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.464230537414551,
      "learning_rate": 0.0004207045731168955,
      "loss": 2.85,
      "step": 84833
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.69533109664917,
      "learning_rate": 0.00042070082825102623,
      "loss": 3.0756,
      "step": 84834
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.487062692642212,
      "learning_rate": 0.0004206970833627165,
      "loss": 3.0522,
      "step": 84835
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.0396502017974854,
      "learning_rate": 0.0004206933384519668,
      "loss": 3.1111,
      "step": 84836
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.712214708328247,
      "learning_rate": 0.000420689593518778,
      "loss": 2.9967,
      "step": 84837
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.205383539199829,
      "learning_rate": 0.0004206858485631507,
      "loss": 3.0099,
      "step": 84838
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.819411516189575,
      "learning_rate": 0.0004206821035850856,
      "loss": 3.1212,
      "step": 84839
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.1766717433929443,
      "learning_rate": 0.0004206783585845835,
      "loss": 2.9561,
      "step": 84840
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5145925283432007,
      "learning_rate": 0.00042067461356164496,
      "loss": 2.8472,
      "step": 84841
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6099021434783936,
      "learning_rate": 0.0004206708685162707,
      "loss": 3.0313,
      "step": 84842
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9097185134887695,
      "learning_rate": 0.00042066712344846147,
      "loss": 2.8305,
      "step": 84843
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0706329345703125,
      "learning_rate": 0.00042066337835821796,
      "loss": 3.0362,
      "step": 84844
    },
    {
      "epoch": 1.1,
      "grad_norm": 3.988753080368042,
      "learning_rate": 0.00042065963324554084,
      "loss": 3.2888,
      "step": 84845
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5790088176727295,
      "learning_rate": 0.0004206558881104308,
      "loss": 2.9575,
      "step": 84846
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1733808517456055,
      "learning_rate": 0.0004206521429528886,
      "loss": 3.0821,
      "step": 84847
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.516329050064087,
      "learning_rate": 0.00042064839777291495,
      "loss": 3.0564,
      "step": 84848
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.538051724433899,
      "learning_rate": 0.00042064465257051034,
      "loss": 2.9644,
      "step": 84849
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.6252384185791016,
      "learning_rate": 0.00042064090734567566,
      "loss": 3.4877,
      "step": 84850
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0084943771362305,
      "learning_rate": 0.0004206371620984116,
      "loss": 3.0527,
      "step": 84851
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1456546783447266,
      "learning_rate": 0.00042063341682871874,
      "loss": 2.9914,
      "step": 84852
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5421658754348755,
      "learning_rate": 0.00042062967153659787,
      "loss": 3.1643,
      "step": 84853
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.8942852020263672,
      "learning_rate": 0.00042062592622204973,
      "loss": 2.7873,
      "step": 84854
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.1415960788726807,
      "learning_rate": 0.00042062218088507493,
      "loss": 3.0175,
      "step": 84855
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.0531532764434814,
      "learning_rate": 0.0004206184355256741,
      "loss": 2.7227,
      "step": 84856
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5794678926467896,
      "learning_rate": 0.00042061469014384814,
      "loss": 3.0883,
      "step": 84857
    },
    {
      "epoch": 1.1,
      "grad_norm": 2.9080326557159424,
      "learning_rate": 0.00042061094473959756,
      "loss": 2.9528,
      "step": 84858
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.9788321256637573,
      "learning_rate": 0.00042060719931292313,
      "loss": 3.182,
      "step": 84859
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7336359024047852,
      "learning_rate": 0.00042060345386382567,
      "loss": 2.9384,
      "step": 84860
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.446534276008606,
      "learning_rate": 0.0004205997083923056,
      "loss": 2.8586,
      "step": 84861
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.555406093597412,
      "learning_rate": 0.0004205959628983638,
      "loss": 2.9988,
      "step": 84862
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.5645266771316528,
      "learning_rate": 0.00042059221738200103,
      "loss": 3.2721,
      "step": 84863
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.7761852741241455,
      "learning_rate": 0.0004205884718432178,
      "loss": 3.2192,
      "step": 84864
    },
    {
      "epoch": 1.1,
      "grad_norm": 1.638896107673645,
      "learning_rate": 0.0004205847262820149,
      "loss": 3.0256,
      "step": 84865
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6896494626998901,
      "learning_rate": 0.0004205809806983931,
      "loss": 2.9277,
      "step": 84866
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6794660091400146,
      "learning_rate": 0.0004205772350923529,
      "loss": 3.1122,
      "step": 84867
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5822151899337769,
      "learning_rate": 0.0004205734894638952,
      "loss": 3.0314,
      "step": 84868
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.006409168243408,
      "learning_rate": 0.0004205697438130206,
      "loss": 3.0176,
      "step": 84869
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.822084665298462,
      "learning_rate": 0.0004205659981397298,
      "loss": 3.031,
      "step": 84870
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6783192157745361,
      "learning_rate": 0.00042056225244402345,
      "loss": 2.9264,
      "step": 84871
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.679190754890442,
      "learning_rate": 0.0004205585067259024,
      "loss": 2.9906,
      "step": 84872
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8617510795593262,
      "learning_rate": 0.0004205547609853672,
      "loss": 2.898,
      "step": 84873
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6160849332809448,
      "learning_rate": 0.00042055101522241856,
      "loss": 3.1297,
      "step": 84874
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.3476710319519043,
      "learning_rate": 0.00042054726943705726,
      "loss": 3.1528,
      "step": 84875
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6467210054397583,
      "learning_rate": 0.000420543523629284,
      "loss": 2.8175,
      "step": 84876
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5195086002349854,
      "learning_rate": 0.00042053977779909926,
      "loss": 2.987,
      "step": 84877
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0206568241119385,
      "learning_rate": 0.00042053603194650404,
      "loss": 2.9316,
      "step": 84878
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.9149203300476074,
      "learning_rate": 0.00042053228607149884,
      "loss": 2.9986,
      "step": 84879
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.811566948890686,
      "learning_rate": 0.0004205285401740844,
      "loss": 3.073,
      "step": 84880
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1074929237365723,
      "learning_rate": 0.00042052479425426146,
      "loss": 2.932,
      "step": 84881
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.931676983833313,
      "learning_rate": 0.0004205210483120307,
      "loss": 3.2342,
      "step": 84882
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6673307418823242,
      "learning_rate": 0.0004205173023473928,
      "loss": 2.9119,
      "step": 84883
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5549976825714111,
      "learning_rate": 0.00042051355636034843,
      "loss": 3.0605,
      "step": 84884
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.734387993812561,
      "learning_rate": 0.0004205098103508983,
      "loss": 2.9811,
      "step": 84885
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.6574926376342773,
      "learning_rate": 0.00042050606431904316,
      "loss": 3.2705,
      "step": 84886
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.256744861602783,
      "learning_rate": 0.0004205023182647837,
      "loss": 2.9557,
      "step": 84887
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8399264812469482,
      "learning_rate": 0.0004204985721881205,
      "loss": 3.0849,
      "step": 84888
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8188117742538452,
      "learning_rate": 0.00042049482608905436,
      "loss": 2.9424,
      "step": 84889
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0579652786254883,
      "learning_rate": 0.00042049107996758604,
      "loss": 2.8506,
      "step": 84890
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.027196168899536,
      "learning_rate": 0.00042048733382371605,
      "loss": 2.8855,
      "step": 84891
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9586248397827148,
      "learning_rate": 0.0004204835876574452,
      "loss": 3.0143,
      "step": 84892
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.887418270111084,
      "learning_rate": 0.0004204798414687743,
      "loss": 3.0474,
      "step": 84893
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7781004905700684,
      "learning_rate": 0.0004204760952577038,
      "loss": 2.9751,
      "step": 84894
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.012014150619507,
      "learning_rate": 0.00042047234902423456,
      "loss": 3.1254,
      "step": 84895
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7094593048095703,
      "learning_rate": 0.00042046860276836724,
      "loss": 3.0401,
      "step": 84896
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1584808826446533,
      "learning_rate": 0.0004204648564901025,
      "loss": 2.7241,
      "step": 84897
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5849334001541138,
      "learning_rate": 0.0004204611101894411,
      "loss": 2.9996,
      "step": 84898
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0098581314086914,
      "learning_rate": 0.00042045736386638364,
      "loss": 3.1449,
      "step": 84899
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6566128730773926,
      "learning_rate": 0.000420453617520931,
      "loss": 3.3248,
      "step": 84900
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.893964409828186,
      "learning_rate": 0.00042044987115308363,
      "loss": 2.9942,
      "step": 84901
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5427500009536743,
      "learning_rate": 0.0004204461247628424,
      "loss": 2.9185,
      "step": 84902
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.647618055343628,
      "learning_rate": 0.00042044237835020797,
      "loss": 2.8931,
      "step": 84903
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.735640048980713,
      "learning_rate": 0.00042043863191518107,
      "loss": 2.9473,
      "step": 84904
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8974465131759644,
      "learning_rate": 0.0004204348854577623,
      "loss": 2.8229,
      "step": 84905
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.194366693496704,
      "learning_rate": 0.00042043113897795244,
      "loss": 3.0422,
      "step": 84906
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7608815431594849,
      "learning_rate": 0.00042042739247575217,
      "loss": 3.1266,
      "step": 84907
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.033639669418335,
      "learning_rate": 0.00042042364595116214,
      "loss": 2.8728,
      "step": 84908
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.6146233081817627,
      "learning_rate": 0.0004204198994041831,
      "loss": 3.0023,
      "step": 84909
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.573821783065796,
      "learning_rate": 0.0004204161528348157,
      "loss": 3.2298,
      "step": 84910
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.087796211242676,
      "learning_rate": 0.0004204124062430608,
      "loss": 2.9316,
      "step": 84911
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6245578527450562,
      "learning_rate": 0.00042040865962891875,
      "loss": 3.1336,
      "step": 84912
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6120033264160156,
      "learning_rate": 0.00042040491299239055,
      "loss": 2.9031,
      "step": 84913
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6308350563049316,
      "learning_rate": 0.0004204011663334769,
      "loss": 2.8509,
      "step": 84914
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8072962760925293,
      "learning_rate": 0.0004203974196521783,
      "loss": 3.077,
      "step": 84915
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8328100442886353,
      "learning_rate": 0.0004203936729484955,
      "loss": 2.986,
      "step": 84916
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7787586450576782,
      "learning_rate": 0.0004203899262224293,
      "loss": 3.0089,
      "step": 84917
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.680964469909668,
      "learning_rate": 0.0004203861794739804,
      "loss": 3.0482,
      "step": 84918
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7366278171539307,
      "learning_rate": 0.00042038243270314937,
      "loss": 3.1894,
      "step": 84919
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.3793692588806152,
      "learning_rate": 0.00042037868590993693,
      "loss": 3.258,
      "step": 84920
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7844469547271729,
      "learning_rate": 0.00042037493909434395,
      "loss": 3.1569,
      "step": 84921
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.019017457962036,
      "learning_rate": 0.00042037119225637093,
      "loss": 3.0325,
      "step": 84922
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7528940439224243,
      "learning_rate": 0.00042036744539601856,
      "loss": 2.8838,
      "step": 84923
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9090722799301147,
      "learning_rate": 0.0004203636985132877,
      "loss": 2.8218,
      "step": 84924
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8570798635482788,
      "learning_rate": 0.00042035995160817895,
      "loss": 3.0383,
      "step": 84925
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.3822486400604248,
      "learning_rate": 0.00042035620468069295,
      "loss": 3.0627,
      "step": 84926
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4989521503448486,
      "learning_rate": 0.00042035245773083054,
      "loss": 2.7799,
      "step": 84927
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.358166217803955,
      "learning_rate": 0.00042034871075859236,
      "loss": 3.1218,
      "step": 84928
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6226613521575928,
      "learning_rate": 0.00042034496376397895,
      "loss": 2.9918,
      "step": 84929
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7443006038665771,
      "learning_rate": 0.00042034121674699125,
      "loss": 3.409,
      "step": 84930
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4456363916397095,
      "learning_rate": 0.0004203374697076298,
      "loss": 2.9583,
      "step": 84931
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.866449236869812,
      "learning_rate": 0.00042033372264589533,
      "loss": 2.9769,
      "step": 84932
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.62019681930542,
      "learning_rate": 0.00042032997556178857,
      "loss": 3.0764,
      "step": 84933
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6780391931533813,
      "learning_rate": 0.0004203262284553102,
      "loss": 2.9994,
      "step": 84934
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7554703950881958,
      "learning_rate": 0.0004203224813264609,
      "loss": 2.8573,
      "step": 84935
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9345669746398926,
      "learning_rate": 0.0004203187341752414,
      "loss": 3.2144,
      "step": 84936
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7945157289505005,
      "learning_rate": 0.0004203149870016524,
      "loss": 3.0549,
      "step": 84937
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.52909255027771,
      "learning_rate": 0.0004203112398056945,
      "loss": 3.003,
      "step": 84938
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8743438720703125,
      "learning_rate": 0.0004203074925873686,
      "loss": 2.9806,
      "step": 84939
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5614393949508667,
      "learning_rate": 0.0004203037453466751,
      "loss": 3.0142,
      "step": 84940
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6108509302139282,
      "learning_rate": 0.0004202999980836149,
      "loss": 3.186,
      "step": 84941
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5330649614334106,
      "learning_rate": 0.00042029625079818876,
      "loss": 3.1569,
      "step": 84942
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6808515787124634,
      "learning_rate": 0.0004202925034903972,
      "loss": 2.9604,
      "step": 84943
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.854373574256897,
      "learning_rate": 0.000420288756160241,
      "loss": 3.1633,
      "step": 84944
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.711495280265808,
      "learning_rate": 0.0004202850088077209,
      "loss": 3.1236,
      "step": 84945
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6247698068618774,
      "learning_rate": 0.0004202812614328374,
      "loss": 2.9166,
      "step": 84946
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4712727069854736,
      "learning_rate": 0.0004202775140355914,
      "loss": 2.9031,
      "step": 84947
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6675171852111816,
      "learning_rate": 0.00042027376661598367,
      "loss": 3.0422,
      "step": 84948
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.4523673057556152,
      "learning_rate": 0.00042027001917401474,
      "loss": 2.9773,
      "step": 84949
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.049916982650757,
      "learning_rate": 0.0004202662717096852,
      "loss": 2.9256,
      "step": 84950
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8964515924453735,
      "learning_rate": 0.0004202625242229961,
      "loss": 3.0178,
      "step": 84951
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.190917730331421,
      "learning_rate": 0.00042025877671394773,
      "loss": 2.8985,
      "step": 84952
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.809611201286316,
      "learning_rate": 0.00042025502918254106,
      "loss": 2.8734,
      "step": 84953
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7161328792572021,
      "learning_rate": 0.00042025128162877674,
      "loss": 3.003,
      "step": 84954
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.7363266944885254,
      "learning_rate": 0.0004202475340526554,
      "loss": 2.9873,
      "step": 84955
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.156801462173462,
      "learning_rate": 0.00042024378645417776,
      "loss": 2.8699,
      "step": 84956
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.837520122528076,
      "learning_rate": 0.0004202400388333446,
      "loss": 2.9109,
      "step": 84957
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0648462772369385,
      "learning_rate": 0.00042023629119015656,
      "loss": 2.9765,
      "step": 84958
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6496336460113525,
      "learning_rate": 0.0004202325435246142,
      "loss": 3.2181,
      "step": 84959
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5959227085113525,
      "learning_rate": 0.00042022879583671844,
      "loss": 2.6536,
      "step": 84960
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5830514430999756,
      "learning_rate": 0.0004202250481264698,
      "loss": 3.1044,
      "step": 84961
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6021133661270142,
      "learning_rate": 0.0004202213003938691,
      "loss": 3.0728,
      "step": 84962
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.123844623565674,
      "learning_rate": 0.00042021755263891707,
      "loss": 2.8343,
      "step": 84963
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.076970338821411,
      "learning_rate": 0.00042021380486161416,
      "loss": 3.0346,
      "step": 84964
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9380334615707397,
      "learning_rate": 0.00042021005706196135,
      "loss": 3.0164,
      "step": 84965
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5859501361846924,
      "learning_rate": 0.00042020630923995917,
      "loss": 2.9676,
      "step": 84966
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.728227972984314,
      "learning_rate": 0.0004202025613956084,
      "loss": 2.9996,
      "step": 84967
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.657039999961853,
      "learning_rate": 0.00042019881352890965,
      "loss": 3.0253,
      "step": 84968
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.4258921146392822,
      "learning_rate": 0.0004201950656398638,
      "loss": 2.8434,
      "step": 84969
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9534543752670288,
      "learning_rate": 0.00042019131772847134,
      "loss": 3.0813,
      "step": 84970
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.0127201080322266,
      "learning_rate": 0.00042018756979473297,
      "loss": 3.2156,
      "step": 84971
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.621012568473816,
      "learning_rate": 0.0004201838218386496,
      "loss": 3.0877,
      "step": 84972
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7344062328338623,
      "learning_rate": 0.00042018007386022174,
      "loss": 2.837,
      "step": 84973
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.664070963859558,
      "learning_rate": 0.00042017632585945007,
      "loss": 3.2242,
      "step": 84974
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6405823230743408,
      "learning_rate": 0.0004201725778363354,
      "loss": 3.0263,
      "step": 84975
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6387245655059814,
      "learning_rate": 0.0004201688297908784,
      "loss": 3.1318,
      "step": 84976
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.768977403640747,
      "learning_rate": 0.0004201650817230797,
      "loss": 3.0023,
      "step": 84977
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0073447227478027,
      "learning_rate": 0.00042016133363294007,
      "loss": 2.9905,
      "step": 84978
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0942511558532715,
      "learning_rate": 0.00042015758552046024,
      "loss": 2.9851,
      "step": 84979
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4904946088790894,
      "learning_rate": 0.0004201538373856407,
      "loss": 2.9764,
      "step": 84980
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7313274145126343,
      "learning_rate": 0.0004201500892284824,
      "loss": 3.2974,
      "step": 84981
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9513270854949951,
      "learning_rate": 0.00042014634104898597,
      "loss": 3.2717,
      "step": 84982
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7084460258483887,
      "learning_rate": 0.000420142592847152,
      "loss": 3.0846,
      "step": 84983
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0208396911621094,
      "learning_rate": 0.0004201388446229812,
      "loss": 2.7553,
      "step": 84984
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.552147150039673,
      "learning_rate": 0.00042013509637647443,
      "loss": 3.0785,
      "step": 84985
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8262139558792114,
      "learning_rate": 0.0004201313481076322,
      "loss": 3.2377,
      "step": 84986
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5025798082351685,
      "learning_rate": 0.00042012759981645525,
      "loss": 3.0044,
      "step": 84987
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.733131170272827,
      "learning_rate": 0.00042012385150294444,
      "loss": 2.9558,
      "step": 84988
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.2211711406707764,
      "learning_rate": 0.00042012010316710025,
      "loss": 3.0218,
      "step": 84989
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.765811562538147,
      "learning_rate": 0.00042011635480892345,
      "loss": 3.1584,
      "step": 84990
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4817172288894653,
      "learning_rate": 0.0004201126064284148,
      "loss": 3.0152,
      "step": 84991
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5822334289550781,
      "learning_rate": 0.00042010885802557494,
      "loss": 3.1313,
      "step": 84992
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1204304695129395,
      "learning_rate": 0.0004201051096004046,
      "loss": 2.9513,
      "step": 84993
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4976636171340942,
      "learning_rate": 0.00042010136115290444,
      "loss": 2.9902,
      "step": 84994
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7185359001159668,
      "learning_rate": 0.0004200976126830752,
      "loss": 2.8044,
      "step": 84995
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.0597171783447266,
      "learning_rate": 0.00042009386419091747,
      "loss": 2.9339,
      "step": 84996
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9082356691360474,
      "learning_rate": 0.000420090115676432,
      "loss": 3.1056,
      "step": 84997
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7353957891464233,
      "learning_rate": 0.0004200863671396196,
      "loss": 2.992,
      "step": 84998
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.023653745651245,
      "learning_rate": 0.0004200826185804809,
      "loss": 2.9912,
      "step": 84999
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.6418581008911133,
      "learning_rate": 0.00042007886999901646,
      "loss": 3.065,
      "step": 85000
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8766400814056396,
      "learning_rate": 0.00042007512139522716,
      "loss": 2.9788,
      "step": 85001
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.03590989112854,
      "learning_rate": 0.0004200713727691136,
      "loss": 3.0515,
      "step": 85002
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.3098998069763184,
      "learning_rate": 0.00042006762412067657,
      "loss": 3.1489,
      "step": 85003
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8100985288619995,
      "learning_rate": 0.00042006387544991664,
      "loss": 3.1268,
      "step": 85004
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.607811450958252,
      "learning_rate": 0.00042006012675683456,
      "loss": 2.8639,
      "step": 85005
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.277508020401001,
      "learning_rate": 0.00042005637804143114,
      "loss": 3.1093,
      "step": 85006
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1334609985351562,
      "learning_rate": 0.0004200526293037069,
      "loss": 3.0264,
      "step": 85007
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.807802438735962,
      "learning_rate": 0.0004200488805436625,
      "loss": 3.0662,
      "step": 85008
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.3771724700927734,
      "learning_rate": 0.0004200451317612989,
      "loss": 2.9567,
      "step": 85009
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.8281078338623047,
      "learning_rate": 0.0004200413829566166,
      "loss": 3.0059,
      "step": 85010
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.1529202461242676,
      "learning_rate": 0.00042003763412961633,
      "loss": 2.9446,
      "step": 85011
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4927659034729004,
      "learning_rate": 0.0004200338852802988,
      "loss": 3.2992,
      "step": 85012
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.2098166942596436,
      "learning_rate": 0.00042003013640866475,
      "loss": 3.0606,
      "step": 85013
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.9625911712646484,
      "learning_rate": 0.00042002638751471475,
      "loss": 2.8955,
      "step": 85014
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.5099995136260986,
      "learning_rate": 0.00042002263859844966,
      "loss": 3.0485,
      "step": 85015
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1017940044403076,
      "learning_rate": 0.0004200188896598701,
      "loss": 2.8951,
      "step": 85016
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.2769830226898193,
      "learning_rate": 0.00042001514069897665,
      "loss": 3.1271,
      "step": 85017
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8927459716796875,
      "learning_rate": 0.00042001139171577025,
      "loss": 3.0428,
      "step": 85018
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7853295803070068,
      "learning_rate": 0.0004200076427102514,
      "loss": 3.0484,
      "step": 85019
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.0919573307037354,
      "learning_rate": 0.00042000389368242083,
      "loss": 2.838,
      "step": 85020
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7391523122787476,
      "learning_rate": 0.00042000014463227937,
      "loss": 2.6981,
      "step": 85021
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7096247673034668,
      "learning_rate": 0.0004199963955598275,
      "loss": 3.0346,
      "step": 85022
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1907131671905518,
      "learning_rate": 0.00041999264646506616,
      "loss": 2.8848,
      "step": 85023
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0059821605682373,
      "learning_rate": 0.0004199888973479959,
      "loss": 2.8213,
      "step": 85024
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.648898720741272,
      "learning_rate": 0.00041998514820861736,
      "loss": 2.9216,
      "step": 85025
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5638782978057861,
      "learning_rate": 0.0004199813990469313,
      "loss": 2.9349,
      "step": 85026
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6869083642959595,
      "learning_rate": 0.00041997764986293856,
      "loss": 3.1888,
      "step": 85027
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.728850245475769,
      "learning_rate": 0.0004199739006566396,
      "loss": 3.0116,
      "step": 85028
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6004217863082886,
      "learning_rate": 0.00041997015142803524,
      "loss": 2.9562,
      "step": 85029
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1179022789001465,
      "learning_rate": 0.00041996640217712626,
      "loss": 2.9365,
      "step": 85030
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9912786483764648,
      "learning_rate": 0.00041996265290391313,
      "loss": 3.1663,
      "step": 85031
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6742171049118042,
      "learning_rate": 0.0004199589036083967,
      "loss": 2.9998,
      "step": 85032
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.3791749477386475,
      "learning_rate": 0.0004199551542905778,
      "loss": 3.0879,
      "step": 85033
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9814565181732178,
      "learning_rate": 0.0004199514049504568,
      "loss": 3.1219,
      "step": 85034
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5048425197601318,
      "learning_rate": 0.00041994765558803465,
      "loss": 2.9972,
      "step": 85035
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.533111572265625,
      "learning_rate": 0.000419943906203312,
      "loss": 3.1801,
      "step": 85036
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5725092887878418,
      "learning_rate": 0.0004199401567962894,
      "loss": 3.119,
      "step": 85037
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6399462223052979,
      "learning_rate": 0.00041993640736696767,
      "loss": 2.8753,
      "step": 85038
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.256911277770996,
      "learning_rate": 0.00041993265791534756,
      "loss": 2.8441,
      "step": 85039
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6297341585159302,
      "learning_rate": 0.0004199289084414297,
      "loss": 2.9799,
      "step": 85040
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.983621597290039,
      "learning_rate": 0.00041992515894521477,
      "loss": 2.8116,
      "step": 85041
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.751410961151123,
      "learning_rate": 0.0004199214094267035,
      "loss": 3.0543,
      "step": 85042
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6806914806365967,
      "learning_rate": 0.0004199176598858966,
      "loss": 2.8825,
      "step": 85043
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5798218250274658,
      "learning_rate": 0.0004199139103227947,
      "loss": 3.079,
      "step": 85044
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6556994915008545,
      "learning_rate": 0.00041991016073739855,
      "loss": 3.2095,
      "step": 85045
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5279988050460815,
      "learning_rate": 0.00041990641112970884,
      "loss": 3.1372,
      "step": 85046
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8925548791885376,
      "learning_rate": 0.0004199026614997263,
      "loss": 3.2427,
      "step": 85047
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.231407880783081,
      "learning_rate": 0.00041989891184745153,
      "loss": 3.2596,
      "step": 85048
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4036037921905518,
      "learning_rate": 0.00041989516217288535,
      "loss": 3.1878,
      "step": 85049
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9155957698822021,
      "learning_rate": 0.00041989141247602836,
      "loss": 3.2249,
      "step": 85050
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6067966222763062,
      "learning_rate": 0.0004198876627568813,
      "loss": 3.0583,
      "step": 85051
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.618471145629883,
      "learning_rate": 0.00041988391301544495,
      "loss": 2.9783,
      "step": 85052
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4781213998794556,
      "learning_rate": 0.0004198801632517198,
      "loss": 3.0099,
      "step": 85053
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.092968463897705,
      "learning_rate": 0.0004198764134657067,
      "loss": 2.6652,
      "step": 85054
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.512428045272827,
      "learning_rate": 0.0004198726636574063,
      "loss": 3.1874,
      "step": 85055
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8075588941574097,
      "learning_rate": 0.0004198689138268193,
      "loss": 3.1132,
      "step": 85056
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.5319478511810303,
      "learning_rate": 0.0004198651639739464,
      "loss": 3.1645,
      "step": 85057
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8456555604934692,
      "learning_rate": 0.00041986141409878835,
      "loss": 3.0697,
      "step": 85058
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.151864528656006,
      "learning_rate": 0.00041985766420134577,
      "loss": 3.294,
      "step": 85059
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4513276815414429,
      "learning_rate": 0.00041985391428161936,
      "loss": 3.1609,
      "step": 85060
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0905838012695312,
      "learning_rate": 0.00041985016433960994,
      "loss": 2.8327,
      "step": 85061
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9661610126495361,
      "learning_rate": 0.00041984641437531805,
      "loss": 3.2561,
      "step": 85062
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.620851993560791,
      "learning_rate": 0.0004198426643887444,
      "loss": 2.9428,
      "step": 85063
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.375155210494995,
      "learning_rate": 0.00041983891437988985,
      "loss": 2.977,
      "step": 85064
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4321749210357666,
      "learning_rate": 0.00041983516434875495,
      "loss": 3.0391,
      "step": 85065
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9477386474609375,
      "learning_rate": 0.00041983141429534034,
      "loss": 2.998,
      "step": 85066
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.904680848121643,
      "learning_rate": 0.0004198276642196469,
      "loss": 3.0785,
      "step": 85067
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.4939403533935547,
      "learning_rate": 0.0004198239141216752,
      "loss": 3.0945,
      "step": 85068
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7253190279006958,
      "learning_rate": 0.000419820164001426,
      "loss": 3.0432,
      "step": 85069
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.2847776412963867,
      "learning_rate": 0.00041981641385889994,
      "loss": 2.6585,
      "step": 85070
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.578921914100647,
      "learning_rate": 0.0004198126636940977,
      "loss": 2.7807,
      "step": 85071
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.844239592552185,
      "learning_rate": 0.0004198089135070201,
      "loss": 2.9453,
      "step": 85072
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4350637197494507,
      "learning_rate": 0.0004198051632976678,
      "loss": 3.0796,
      "step": 85073
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6145561933517456,
      "learning_rate": 0.00041980141306604136,
      "loss": 2.9793,
      "step": 85074
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7438838481903076,
      "learning_rate": 0.0004197976628121416,
      "loss": 2.8584,
      "step": 85075
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.2232115268707275,
      "learning_rate": 0.0004197939125359692,
      "loss": 3.2274,
      "step": 85076
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6646281480789185,
      "learning_rate": 0.0004197901622375249,
      "loss": 3.0548,
      "step": 85077
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0593771934509277,
      "learning_rate": 0.0004197864119168093,
      "loss": 3.0076,
      "step": 85078
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6397939920425415,
      "learning_rate": 0.0004197826615738231,
      "loss": 3.0053,
      "step": 85079
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.060467004776001,
      "learning_rate": 0.00041977891120856714,
      "loss": 2.8261,
      "step": 85080
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.339231491088867,
      "learning_rate": 0.0004197751608210419,
      "loss": 2.9775,
      "step": 85081
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7421364784240723,
      "learning_rate": 0.0004197714104112484,
      "loss": 3.101,
      "step": 85082
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.4284939765930176,
      "learning_rate": 0.0004197676599791869,
      "loss": 2.811,
      "step": 85083
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.506253480911255,
      "learning_rate": 0.0004197639095248585,
      "loss": 2.8506,
      "step": 85084
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.3231494426727295,
      "learning_rate": 0.0004197601590482636,
      "loss": 3.1048,
      "step": 85085
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.8575351238250732,
      "learning_rate": 0.0004197564085494031,
      "loss": 3.2441,
      "step": 85086
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8592809438705444,
      "learning_rate": 0.00041975265802827766,
      "loss": 2.9999,
      "step": 85087
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.196349859237671,
      "learning_rate": 0.00041974890748488786,
      "loss": 3.0084,
      "step": 85088
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6253007650375366,
      "learning_rate": 0.00041974515691923446,
      "loss": 3.0485,
      "step": 85089
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6669715642929077,
      "learning_rate": 0.0004197414063313182,
      "loss": 2.8419,
      "step": 85090
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6204214096069336,
      "learning_rate": 0.00041973765572113987,
      "loss": 2.9075,
      "step": 85091
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4710769653320312,
      "learning_rate": 0.0004197339050886999,
      "loss": 3.2022,
      "step": 85092
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4072209596633911,
      "learning_rate": 0.0004197301544339992,
      "loss": 2.9728,
      "step": 85093
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5476858615875244,
      "learning_rate": 0.0004197264037570385,
      "loss": 2.94,
      "step": 85094
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.286369800567627,
      "learning_rate": 0.0004197226530578182,
      "loss": 3.1762,
      "step": 85095
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.716684341430664,
      "learning_rate": 0.00041971890233633935,
      "loss": 2.9158,
      "step": 85096
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9565061330795288,
      "learning_rate": 0.0004197151515926024,
      "loss": 3.2331,
      "step": 85097
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.782392144203186,
      "learning_rate": 0.0004197114008266082,
      "loss": 2.8588,
      "step": 85098
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.742586374282837,
      "learning_rate": 0.00041970765003835735,
      "loss": 3.1913,
      "step": 85099
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.548446774482727,
      "learning_rate": 0.0004197038992278507,
      "loss": 3.0205,
      "step": 85100
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.763127326965332,
      "learning_rate": 0.0004197001483950887,
      "loss": 3.2433,
      "step": 85101
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7682576179504395,
      "learning_rate": 0.0004196963975400722,
      "loss": 2.9789,
      "step": 85102
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6658656597137451,
      "learning_rate": 0.000419692646662802,
      "loss": 2.9068,
      "step": 85103
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8261330127716064,
      "learning_rate": 0.0004196888957632786,
      "loss": 2.8451,
      "step": 85104
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.3410165309906006,
      "learning_rate": 0.0004196851448415027,
      "loss": 2.9339,
      "step": 85105
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4589884281158447,
      "learning_rate": 0.0004196813938974752,
      "loss": 3.1291,
      "step": 85106
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.742401123046875,
      "learning_rate": 0.0004196776429311966,
      "loss": 2.8511,
      "step": 85107
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7923929691314697,
      "learning_rate": 0.00041967389194266764,
      "loss": 3.0392,
      "step": 85108
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5865583419799805,
      "learning_rate": 0.00041967014093188907,
      "loss": 2.9271,
      "step": 85109
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4559078216552734,
      "learning_rate": 0.0004196663898988616,
      "loss": 3.0213,
      "step": 85110
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6499874591827393,
      "learning_rate": 0.0004196626388435859,
      "loss": 3.1383,
      "step": 85111
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.678663969039917,
      "learning_rate": 0.00041965888776606254,
      "loss": 3.1184,
      "step": 85112
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7897502183914185,
      "learning_rate": 0.0004196551366662925,
      "loss": 2.8503,
      "step": 85113
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.775590419769287,
      "learning_rate": 0.00041965138554427614,
      "loss": 2.9386,
      "step": 85114
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5157455205917358,
      "learning_rate": 0.0004196476344000144,
      "loss": 2.9961,
      "step": 85115
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5747723579406738,
      "learning_rate": 0.000419643883233508,
      "loss": 3.1399,
      "step": 85116
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9652761220932007,
      "learning_rate": 0.00041964013204475744,
      "loss": 2.9198,
      "step": 85117
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7907917499542236,
      "learning_rate": 0.00041963638083376354,
      "loss": 2.9065,
      "step": 85118
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6188547611236572,
      "learning_rate": 0.000419632629600527,
      "loss": 3.1537,
      "step": 85119
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.878522276878357,
      "learning_rate": 0.00041962887834504846,
      "loss": 3.1762,
      "step": 85120
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.459526300430298,
      "learning_rate": 0.0004196251270673287,
      "loss": 3.2871,
      "step": 85121
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9713798761367798,
      "learning_rate": 0.00041962137576736835,
      "loss": 2.7433,
      "step": 85122
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.621320962905884,
      "learning_rate": 0.00041961762444516805,
      "loss": 3.2323,
      "step": 85123
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.067641258239746,
      "learning_rate": 0.0004196138731007287,
      "loss": 3.0952,
      "step": 85124
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9870176315307617,
      "learning_rate": 0.0004196101217340509,
      "loss": 3.431,
      "step": 85125
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7933475971221924,
      "learning_rate": 0.0004196063703451352,
      "loss": 3.0317,
      "step": 85126
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6136845350265503,
      "learning_rate": 0.0004196026189339824,
      "loss": 3.0432,
      "step": 85127
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6069225072860718,
      "learning_rate": 0.0004195988675005933,
      "loss": 2.8914,
      "step": 85128
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.708052396774292,
      "learning_rate": 0.00041959511604496854,
      "loss": 2.7374,
      "step": 85129
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.060225009918213,
      "learning_rate": 0.0004195913645671087,
      "loss": 3.1258,
      "step": 85130
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.815330147743225,
      "learning_rate": 0.00041958761306701465,
      "loss": 3.0912,
      "step": 85131
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6442621946334839,
      "learning_rate": 0.00041958386154468695,
      "loss": 2.9763,
      "step": 85132
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5988316535949707,
      "learning_rate": 0.0004195801100001263,
      "loss": 3.0349,
      "step": 85133
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5911332368850708,
      "learning_rate": 0.0004195763584333336,
      "loss": 2.9742,
      "step": 85134
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5760488510131836,
      "learning_rate": 0.0004195726068443093,
      "loss": 2.9652,
      "step": 85135
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5527470111846924,
      "learning_rate": 0.0004195688552330541,
      "loss": 3.0759,
      "step": 85136
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5420045852661133,
      "learning_rate": 0.000419565103599569,
      "loss": 3.0155,
      "step": 85137
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5382190942764282,
      "learning_rate": 0.00041956135194385437,
      "loss": 2.9657,
      "step": 85138
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8165088891983032,
      "learning_rate": 0.000419557600265911,
      "loss": 2.7337,
      "step": 85139
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0500271320343018,
      "learning_rate": 0.00041955384856573973,
      "loss": 2.6724,
      "step": 85140
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4597256183624268,
      "learning_rate": 0.00041955009684334105,
      "loss": 2.8698,
      "step": 85141
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7346947193145752,
      "learning_rate": 0.00041954634509871574,
      "loss": 2.952,
      "step": 85142
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.794963002204895,
      "learning_rate": 0.00041954259333186456,
      "loss": 3.2749,
      "step": 85143
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.3684353828430176,
      "learning_rate": 0.00041953884154278815,
      "loss": 3.1139,
      "step": 85144
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.512535333633423,
      "learning_rate": 0.0004195350897314872,
      "loss": 2.7431,
      "step": 85145
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.3110103607177734,
      "learning_rate": 0.0004195313378979625,
      "loss": 2.8352,
      "step": 85146
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.50801420211792,
      "learning_rate": 0.00041952758604221454,
      "loss": 3.0587,
      "step": 85147
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.013917922973633,
      "learning_rate": 0.0004195238341642442,
      "loss": 2.826,
      "step": 85148
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9963364601135254,
      "learning_rate": 0.0004195200822640522,
      "loss": 3.0231,
      "step": 85149
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6809797286987305,
      "learning_rate": 0.000419516330341639,
      "loss": 3.1723,
      "step": 85150
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.321319103240967,
      "learning_rate": 0.0004195125783970055,
      "loss": 3.1001,
      "step": 85151
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.107651710510254,
      "learning_rate": 0.00041950882643015247,
      "loss": 3.164,
      "step": 85152
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1197941303253174,
      "learning_rate": 0.0004195050744410804,
      "loss": 3.1231,
      "step": 85153
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9385782480239868,
      "learning_rate": 0.0004195013224297901,
      "loss": 3.0542,
      "step": 85154
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6047859191894531,
      "learning_rate": 0.00041949757039628225,
      "loss": 2.9207,
      "step": 85155
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5882903337478638,
      "learning_rate": 0.00041949381834055754,
      "loss": 2.7806,
      "step": 85156
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1211555004119873,
      "learning_rate": 0.00041949006626261666,
      "loss": 3.2421,
      "step": 85157
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.128466844558716,
      "learning_rate": 0.00041948631416246037,
      "loss": 3.2108,
      "step": 85158
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6870065927505493,
      "learning_rate": 0.00041948256204008927,
      "loss": 3.0009,
      "step": 85159
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7578191757202148,
      "learning_rate": 0.0004194788098955041,
      "loss": 2.8863,
      "step": 85160
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.794512152671814,
      "learning_rate": 0.00041947505772870565,
      "loss": 2.9108,
      "step": 85161
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.303265333175659,
      "learning_rate": 0.00041947130553969444,
      "loss": 3.1947,
      "step": 85162
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4709795713424683,
      "learning_rate": 0.00041946755332847123,
      "loss": 3.1271,
      "step": 85163
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7163012027740479,
      "learning_rate": 0.0004194638010950368,
      "loss": 3.0648,
      "step": 85164
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8527919054031372,
      "learning_rate": 0.00041946004883939187,
      "loss": 3.0498,
      "step": 85165
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.6953132152557373,
      "learning_rate": 0.00041945629656153696,
      "loss": 2.9691,
      "step": 85166
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7196661233901978,
      "learning_rate": 0.0004194525442614729,
      "loss": 2.8719,
      "step": 85167
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6702558994293213,
      "learning_rate": 0.00041944879193920036,
      "loss": 2.8587,
      "step": 85168
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7894717454910278,
      "learning_rate": 0.00041944503959472,
      "loss": 3.0183,
      "step": 85169
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.3937386274337769,
      "learning_rate": 0.0004194412872280326,
      "loss": 2.9912,
      "step": 85170
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5177873373031616,
      "learning_rate": 0.00041943753483913873,
      "loss": 3.1342,
      "step": 85171
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.3451991081237793,
      "learning_rate": 0.00041943378242803933,
      "loss": 3.0825,
      "step": 85172
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7008318901062012,
      "learning_rate": 0.00041943002999473476,
      "loss": 3.042,
      "step": 85173
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8589658737182617,
      "learning_rate": 0.00041942627753922595,
      "loss": 3.0725,
      "step": 85174
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7594558000564575,
      "learning_rate": 0.0004194225250615136,
      "loss": 3.0964,
      "step": 85175
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4627718925476074,
      "learning_rate": 0.00041941877256159826,
      "loss": 3.0712,
      "step": 85176
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6368393898010254,
      "learning_rate": 0.00041941502003948084,
      "loss": 2.7275,
      "step": 85177
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5365427732467651,
      "learning_rate": 0.0004194112674951618,
      "loss": 3.0954,
      "step": 85178
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.582491397857666,
      "learning_rate": 0.00041940751492864205,
      "loss": 3.1123,
      "step": 85179
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4769349098205566,
      "learning_rate": 0.0004194037623399221,
      "loss": 2.8535,
      "step": 85180
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6397836208343506,
      "learning_rate": 0.00041940000972900276,
      "loss": 3.1063,
      "step": 85181
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.062692642211914,
      "learning_rate": 0.0004193962570958846,
      "loss": 3.0957,
      "step": 85182
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9143366813659668,
      "learning_rate": 0.0004193925044405686,
      "loss": 3.1152,
      "step": 85183
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9341609477996826,
      "learning_rate": 0.0004193887517630552,
      "loss": 3.2112,
      "step": 85184
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.331880807876587,
      "learning_rate": 0.00041938499906334515,
      "loss": 2.9512,
      "step": 85185
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.3224148750305176,
      "learning_rate": 0.00041938124634143925,
      "loss": 2.8192,
      "step": 85186
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6619865894317627,
      "learning_rate": 0.0004193774935973381,
      "loss": 2.9244,
      "step": 85187
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6505073308944702,
      "learning_rate": 0.0004193737408310424,
      "loss": 3.3522,
      "step": 85188
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1079277992248535,
      "learning_rate": 0.0004193699880425529,
      "loss": 2.9853,
      "step": 85189
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7653062343597412,
      "learning_rate": 0.00041936623523187023,
      "loss": 3.1284,
      "step": 85190
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6587499380111694,
      "learning_rate": 0.0004193624823989951,
      "loss": 2.6721,
      "step": 85191
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.1910088062286377,
      "learning_rate": 0.00041935872954392835,
      "loss": 3.0773,
      "step": 85192
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.440969228744507,
      "learning_rate": 0.00041935497666667043,
      "loss": 2.9245,
      "step": 85193
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8105931282043457,
      "learning_rate": 0.0004193512237672222,
      "loss": 3.0826,
      "step": 85194
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.68099308013916,
      "learning_rate": 0.0004193474708455844,
      "loss": 2.8109,
      "step": 85195
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5473573207855225,
      "learning_rate": 0.00041934371790175754,
      "loss": 3.0982,
      "step": 85196
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7029533386230469,
      "learning_rate": 0.0004193399649357425,
      "loss": 2.9303,
      "step": 85197
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7969974279403687,
      "learning_rate": 0.0004193362119475399,
      "loss": 2.6449,
      "step": 85198
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.8230350017547607,
      "learning_rate": 0.00041933245893715045,
      "loss": 3.13,
      "step": 85199
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9162063598632812,
      "learning_rate": 0.00041932870590457476,
      "loss": 3.1409,
      "step": 85200
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7867776155471802,
      "learning_rate": 0.00041932495284981375,
      "loss": 2.9304,
      "step": 85201
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5404988527297974,
      "learning_rate": 0.00041932119977286785,
      "loss": 2.9951,
      "step": 85202
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.0424203872680664,
      "learning_rate": 0.0004193174466737379,
      "loss": 2.9799,
      "step": 85203
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8656998872756958,
      "learning_rate": 0.00041931369355242473,
      "loss": 3.1071,
      "step": 85204
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.684005618095398,
      "learning_rate": 0.0004193099404089287,
      "loss": 2.9908,
      "step": 85205
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0946083068847656,
      "learning_rate": 0.00041930618724325084,
      "loss": 2.9609,
      "step": 85206
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0696585178375244,
      "learning_rate": 0.00041930243405539165,
      "loss": 3.1668,
      "step": 85207
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.550408124923706,
      "learning_rate": 0.0004192986808453519,
      "loss": 3.1585,
      "step": 85208
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.849037528038025,
      "learning_rate": 0.00041929492761313226,
      "loss": 2.8948,
      "step": 85209
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.4010186195373535,
      "learning_rate": 0.0004192911743587335,
      "loss": 2.7857,
      "step": 85210
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.173379898071289,
      "learning_rate": 0.00041928742108215614,
      "loss": 3.1916,
      "step": 85211
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9547793865203857,
      "learning_rate": 0.00041928366778340106,
      "loss": 2.783,
      "step": 85212
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8670603036880493,
      "learning_rate": 0.00041927991446246896,
      "loss": 3.0629,
      "step": 85213
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6850907802581787,
      "learning_rate": 0.00041927616111936037,
      "loss": 3.0181,
      "step": 85214
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5767937898635864,
      "learning_rate": 0.00041927240775407616,
      "loss": 3.136,
      "step": 85215
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5738962888717651,
      "learning_rate": 0.000419268654366617,
      "loss": 2.9242,
      "step": 85216
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.141158103942871,
      "learning_rate": 0.0004192649009569834,
      "loss": 2.8803,
      "step": 85217
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7378077507019043,
      "learning_rate": 0.00041926114752517625,
      "loss": 2.8676,
      "step": 85218
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.499299168586731,
      "learning_rate": 0.0004192573940711963,
      "loss": 2.7514,
      "step": 85219
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7834738492965698,
      "learning_rate": 0.00041925364059504404,
      "loss": 3.0484,
      "step": 85220
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7312308549880981,
      "learning_rate": 0.0004192498870967203,
      "loss": 2.7595,
      "step": 85221
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.596778154373169,
      "learning_rate": 0.0004192461335762258,
      "loss": 2.8971,
      "step": 85222
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5789673328399658,
      "learning_rate": 0.0004192423800335611,
      "loss": 3.0865,
      "step": 85223
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7572121620178223,
      "learning_rate": 0.00041923862646872705,
      "loss": 3.0748,
      "step": 85224
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6941317319869995,
      "learning_rate": 0.00041923487288172436,
      "loss": 3.2482,
      "step": 85225
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.2634644508361816,
      "learning_rate": 0.00041923111927255353,
      "loss": 2.9473,
      "step": 85226
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.503663182258606,
      "learning_rate": 0.00041922736564121543,
      "loss": 3.0171,
      "step": 85227
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.727337121963501,
      "learning_rate": 0.0004192236119877108,
      "loss": 3.0672,
      "step": 85228
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.89095938205719,
      "learning_rate": 0.0004192198583120401,
      "loss": 3.212,
      "step": 85229
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6991013288497925,
      "learning_rate": 0.0004192161046142042,
      "loss": 2.8358,
      "step": 85230
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.4684336185455322,
      "learning_rate": 0.0004192123508942039,
      "loss": 3.1386,
      "step": 85231
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.5965237617492676,
      "learning_rate": 0.00041920859715203975,
      "loss": 2.8901,
      "step": 85232
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9912865161895752,
      "learning_rate": 0.0004192048433877124,
      "loss": 3.1211,
      "step": 85233
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5812476873397827,
      "learning_rate": 0.00041920108960122267,
      "loss": 3.0307,
      "step": 85234
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.233851909637451,
      "learning_rate": 0.00041919733579257116,
      "loss": 2.7953,
      "step": 85235
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7492386102676392,
      "learning_rate": 0.0004191935819617586,
      "loss": 3.1482,
      "step": 85236
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.758276343345642,
      "learning_rate": 0.00041918982810878574,
      "loss": 2.9845,
      "step": 85237
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6333919763565063,
      "learning_rate": 0.0004191860742336533,
      "loss": 2.9194,
      "step": 85238
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7664357423782349,
      "learning_rate": 0.0004191823203363618,
      "loss": 3.0734,
      "step": 85239
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8046760559082031,
      "learning_rate": 0.0004191785664169122,
      "loss": 3.0014,
      "step": 85240
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.77644944190979,
      "learning_rate": 0.0004191748124753049,
      "loss": 3.0318,
      "step": 85241
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7510201930999756,
      "learning_rate": 0.0004191710585115408,
      "loss": 2.9511,
      "step": 85242
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.542944312095642,
      "learning_rate": 0.0004191673045256206,
      "loss": 2.8284,
      "step": 85243
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7224091291427612,
      "learning_rate": 0.0004191635505175449,
      "loss": 3.2916,
      "step": 85244
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7777467966079712,
      "learning_rate": 0.00041915979648731453,
      "loss": 3.1988,
      "step": 85245
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0365374088287354,
      "learning_rate": 0.00041915604243493,
      "loss": 3.08,
      "step": 85246
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.05169415473938,
      "learning_rate": 0.0004191522883603922,
      "loss": 3.0934,
      "step": 85247
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8810228109359741,
      "learning_rate": 0.00041914853426370163,
      "loss": 2.9828,
      "step": 85248
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.33986496925354,
      "learning_rate": 0.0004191447801448592,
      "loss": 2.991,
      "step": 85249
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7787505388259888,
      "learning_rate": 0.0004191410260038655,
      "loss": 2.9336,
      "step": 85250
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6882282495498657,
      "learning_rate": 0.00041913727184072114,
      "loss": 2.8496,
      "step": 85251
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7651902437210083,
      "learning_rate": 0.00041913351765542696,
      "loss": 2.8954,
      "step": 85252
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8520113229751587,
      "learning_rate": 0.0004191297634479837,
      "loss": 3.0483,
      "step": 85253
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.4299941062927246,
      "learning_rate": 0.00041912600921839186,
      "loss": 3.1627,
      "step": 85254
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8102375268936157,
      "learning_rate": 0.0004191222549666523,
      "loss": 3.0255,
      "step": 85255
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.3890419006347656,
      "learning_rate": 0.0004191185006927657,
      "loss": 2.9793,
      "step": 85256
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.504387378692627,
      "learning_rate": 0.0004191147463967326,
      "loss": 3.0449,
      "step": 85257
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6465086936950684,
      "learning_rate": 0.0004191109920785539,
      "loss": 3.1631,
      "step": 85258
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1738102436065674,
      "learning_rate": 0.0004191072377382302,
      "loss": 3.0838,
      "step": 85259
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1583075523376465,
      "learning_rate": 0.00041910348337576226,
      "loss": 3.0277,
      "step": 85260
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7977365255355835,
      "learning_rate": 0.0004190997289911507,
      "loss": 2.8864,
      "step": 85261
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.13195538520813,
      "learning_rate": 0.0004190959745843962,
      "loss": 2.9343,
      "step": 85262
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.702279806137085,
      "learning_rate": 0.00041909222015549955,
      "loss": 3.0113,
      "step": 85263
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.765966773033142,
      "learning_rate": 0.0004190884657044614,
      "loss": 3.1769,
      "step": 85264
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.650523066520691,
      "learning_rate": 0.0004190847112312825,
      "loss": 3.0126,
      "step": 85265
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.960755467414856,
      "learning_rate": 0.0004190809567359634,
      "loss": 3.0411,
      "step": 85266
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6022446155548096,
      "learning_rate": 0.0004190772022185051,
      "loss": 2.8747,
      "step": 85267
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.7012417316436768,
      "learning_rate": 0.00041907344767890795,
      "loss": 2.9051,
      "step": 85268
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6136621236801147,
      "learning_rate": 0.0004190696931171728,
      "loss": 3.213,
      "step": 85269
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9386775493621826,
      "learning_rate": 0.0004190659385333003,
      "loss": 3.0302,
      "step": 85270
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7384371757507324,
      "learning_rate": 0.00041906218392729136,
      "loss": 3.0232,
      "step": 85271
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7436256408691406,
      "learning_rate": 0.0004190584292991464,
      "loss": 2.9913,
      "step": 85272
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6373472213745117,
      "learning_rate": 0.00041905467464886626,
      "loss": 3.0916,
      "step": 85273
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.579896092414856,
      "learning_rate": 0.00041905091997645165,
      "loss": 2.8995,
      "step": 85274
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9935200214385986,
      "learning_rate": 0.0004190471652819031,
      "loss": 3.154,
      "step": 85275
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5938018560409546,
      "learning_rate": 0.00041904341056522154,
      "loss": 2.8958,
      "step": 85276
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4756397008895874,
      "learning_rate": 0.0004190396558264076,
      "loss": 2.9012,
      "step": 85277
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.625905156135559,
      "learning_rate": 0.00041903590106546183,
      "loss": 3.3025,
      "step": 85278
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6220226287841797,
      "learning_rate": 0.0004190321462823851,
      "loss": 3.0274,
      "step": 85279
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6502734422683716,
      "learning_rate": 0.00041902839147717804,
      "loss": 3.1651,
      "step": 85280
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.62477707862854,
      "learning_rate": 0.0004190246366498413,
      "loss": 3.2621,
      "step": 85281
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6240137815475464,
      "learning_rate": 0.0004190208818003757,
      "loss": 3.036,
      "step": 85282
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.0293502807617188,
      "learning_rate": 0.00041901712692878193,
      "loss": 3.0735,
      "step": 85283
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6472249031066895,
      "learning_rate": 0.0004190133720350605,
      "loss": 3.1894,
      "step": 85284
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0883002281188965,
      "learning_rate": 0.00041900961711921226,
      "loss": 2.8708,
      "step": 85285
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.983587384223938,
      "learning_rate": 0.00041900586218123793,
      "loss": 2.9374,
      "step": 85286
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6323001384735107,
      "learning_rate": 0.0004190021072211382,
      "loss": 3.1602,
      "step": 85287
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.06496262550354,
      "learning_rate": 0.00041899835223891367,
      "loss": 3.0432,
      "step": 85288
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.415875792503357,
      "learning_rate": 0.00041899459723456514,
      "loss": 3.2207,
      "step": 85289
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9884392023086548,
      "learning_rate": 0.0004189908422080932,
      "loss": 3.0911,
      "step": 85290
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8589248657226562,
      "learning_rate": 0.00041898708715949864,
      "loss": 3.0344,
      "step": 85291
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7710838317871094,
      "learning_rate": 0.0004189833320887822,
      "loss": 2.9428,
      "step": 85292
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8047677278518677,
      "learning_rate": 0.0004189795769959444,
      "loss": 2.752,
      "step": 85293
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8508145809173584,
      "learning_rate": 0.00041897582188098615,
      "loss": 2.9845,
      "step": 85294
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4530915021896362,
      "learning_rate": 0.000418972066743908,
      "loss": 3.1206,
      "step": 85295
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6397311687469482,
      "learning_rate": 0.00041896831158471076,
      "loss": 2.9312,
      "step": 85296
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.4327821731567383,
      "learning_rate": 0.00041896455640339495,
      "loss": 3.1896,
      "step": 85297
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1215672492980957,
      "learning_rate": 0.0004189608011999615,
      "loss": 2.9136,
      "step": 85298
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5441126823425293,
      "learning_rate": 0.00041895704597441093,
      "loss": 2.81,
      "step": 85299
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.6640079021453857,
      "learning_rate": 0.000418953290726744,
      "loss": 3.2592,
      "step": 85300
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.444331169128418,
      "learning_rate": 0.00041894953545696143,
      "loss": 2.9869,
      "step": 85301
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8606858253479004,
      "learning_rate": 0.00041894578016506384,
      "loss": 2.7966,
      "step": 85302
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.4860005378723145,
      "learning_rate": 0.0004189420248510521,
      "loss": 3.0928,
      "step": 85303
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.4740092754364014,
      "learning_rate": 0.0004189382695149267,
      "loss": 2.8923,
      "step": 85304
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7271528244018555,
      "learning_rate": 0.00041893451415668845,
      "loss": 2.9458,
      "step": 85305
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5871317386627197,
      "learning_rate": 0.000418930758776338,
      "loss": 2.7115,
      "step": 85306
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6416308879852295,
      "learning_rate": 0.00041892700337387614,
      "loss": 2.9534,
      "step": 85307
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.775870680809021,
      "learning_rate": 0.0004189232479493034,
      "loss": 2.9903,
      "step": 85308
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.8173341751098633,
      "learning_rate": 0.00041891949250262074,
      "loss": 2.8728,
      "step": 85309
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6503164768218994,
      "learning_rate": 0.0004189157370338285,
      "loss": 3.1621,
      "step": 85310
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8013228178024292,
      "learning_rate": 0.0004189119815429278,
      "loss": 3.0346,
      "step": 85311
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8542873859405518,
      "learning_rate": 0.000418908226029919,
      "loss": 3.165,
      "step": 85312
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5721855163574219,
      "learning_rate": 0.00041890447049480295,
      "loss": 2.9744,
      "step": 85313
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7278320789337158,
      "learning_rate": 0.00041890071493758027,
      "loss": 3.0398,
      "step": 85314
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.671403169631958,
      "learning_rate": 0.00041889695935825173,
      "loss": 3.0102,
      "step": 85315
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9634947776794434,
      "learning_rate": 0.00041889320375681794,
      "loss": 3.1084,
      "step": 85316
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0549850463867188,
      "learning_rate": 0.0004188894481332798,
      "loss": 2.9975,
      "step": 85317
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.5146450996398926,
      "learning_rate": 0.00041888569248763773,
      "loss": 3.0035,
      "step": 85318
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.959113597869873,
      "learning_rate": 0.0004188819368198927,
      "loss": 3.0059,
      "step": 85319
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7389720678329468,
      "learning_rate": 0.0004188781811300451,
      "loss": 2.7571,
      "step": 85320
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.150484800338745,
      "learning_rate": 0.00041887442541809595,
      "loss": 3.1113,
      "step": 85321
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6926630735397339,
      "learning_rate": 0.00041887066968404575,
      "loss": 3.057,
      "step": 85322
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7589800357818604,
      "learning_rate": 0.0004188669139278953,
      "loss": 2.9323,
      "step": 85323
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7473162412643433,
      "learning_rate": 0.00041886315814964513,
      "loss": 2.9934,
      "step": 85324
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.767408013343811,
      "learning_rate": 0.0004188594023492961,
      "loss": 3.1513,
      "step": 85325
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8367960453033447,
      "learning_rate": 0.00041885564652684897,
      "loss": 3.0645,
      "step": 85326
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6970243453979492,
      "learning_rate": 0.0004188518906823042,
      "loss": 3.1376,
      "step": 85327
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8796192407608032,
      "learning_rate": 0.00041884813481566263,
      "loss": 2.738,
      "step": 85328
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8783657550811768,
      "learning_rate": 0.00041884437892692497,
      "loss": 3.0901,
      "step": 85329
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0377516746520996,
      "learning_rate": 0.00041884062301609197,
      "loss": 2.9897,
      "step": 85330
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.2461650371551514,
      "learning_rate": 0.0004188368670831641,
      "loss": 2.9119,
      "step": 85331
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.507510781288147,
      "learning_rate": 0.00041883311112814235,
      "loss": 3.035,
      "step": 85332
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.149568796157837,
      "learning_rate": 0.00041882935515102725,
      "loss": 3.1174,
      "step": 85333
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8783098459243774,
      "learning_rate": 0.0004188255991518195,
      "loss": 3.0327,
      "step": 85334
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.645113468170166,
      "learning_rate": 0.0004188218431305199,
      "loss": 3.0441,
      "step": 85335
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.393423318862915,
      "learning_rate": 0.000418818087087129,
      "loss": 2.7183,
      "step": 85336
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7709113359451294,
      "learning_rate": 0.0004188143310216476,
      "loss": 3.0443,
      "step": 85337
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7062565088272095,
      "learning_rate": 0.0004188105749340764,
      "loss": 3.3303,
      "step": 85338
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0024728775024414,
      "learning_rate": 0.00041880681882441596,
      "loss": 3.2163,
      "step": 85339
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.702065348625183,
      "learning_rate": 0.00041880306269266723,
      "loss": 3.1061,
      "step": 85340
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5725370645523071,
      "learning_rate": 0.0004187993065388307,
      "loss": 3.1233,
      "step": 85341
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1228158473968506,
      "learning_rate": 0.0004187955503629072,
      "loss": 3.2505,
      "step": 85342
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9852039813995361,
      "learning_rate": 0.00041879179416489726,
      "loss": 3.0405,
      "step": 85343
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0298473834991455,
      "learning_rate": 0.00041878803794480167,
      "loss": 2.9057,
      "step": 85344
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.436089038848877,
      "learning_rate": 0.00041878428170262125,
      "loss": 2.9316,
      "step": 85345
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.2990331649780273,
      "learning_rate": 0.00041878052543835656,
      "loss": 3.3303,
      "step": 85346
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.5840377807617188,
      "learning_rate": 0.00041877676915200824,
      "loss": 3.2063,
      "step": 85347
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.065247058868408,
      "learning_rate": 0.0004187730128435772,
      "loss": 2.9636,
      "step": 85348
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5248212814331055,
      "learning_rate": 0.000418769256513064,
      "loss": 2.9595,
      "step": 85349
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.146674871444702,
      "learning_rate": 0.0004187655001604693,
      "loss": 3.0736,
      "step": 85350
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.437620162963867,
      "learning_rate": 0.00041876174378579383,
      "loss": 2.9308,
      "step": 85351
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.341635227203369,
      "learning_rate": 0.0004187579873890384,
      "loss": 3.1579,
      "step": 85352
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.942915201187134,
      "learning_rate": 0.00041875423097020354,
      "loss": 3.0316,
      "step": 85353
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0594711303710938,
      "learning_rate": 0.00041875047452929,
      "loss": 3.1431,
      "step": 85354
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9504715204238892,
      "learning_rate": 0.00041874671806629854,
      "loss": 3.3804,
      "step": 85355
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.559084177017212,
      "learning_rate": 0.00041874296158122995,
      "loss": 3.0323,
      "step": 85356
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.2749178409576416,
      "learning_rate": 0.00041873920507408467,
      "loss": 2.9996,
      "step": 85357
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.8979573249816895,
      "learning_rate": 0.0004187354485448635,
      "loss": 3.0625,
      "step": 85358
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8211034536361694,
      "learning_rate": 0.00041873169199356725,
      "loss": 2.9104,
      "step": 85359
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.510813593864441,
      "learning_rate": 0.00041872793542019647,
      "loss": 3.1422,
      "step": 85360
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6797168254852295,
      "learning_rate": 0.000418724178824752,
      "loss": 3.1907,
      "step": 85361
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1109540462493896,
      "learning_rate": 0.00041872042220723443,
      "loss": 2.9828,
      "step": 85362
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5297439098358154,
      "learning_rate": 0.00041871666556764453,
      "loss": 3.1158,
      "step": 85363
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.601712942123413,
      "learning_rate": 0.0004187129089059829,
      "loss": 2.995,
      "step": 85364
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4399890899658203,
      "learning_rate": 0.00041870915222225034,
      "loss": 2.7469,
      "step": 85365
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.627496361732483,
      "learning_rate": 0.0004187053955164475,
      "loss": 2.9484,
      "step": 85366
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7464312314987183,
      "learning_rate": 0.00041870163878857504,
      "loss": 2.8007,
      "step": 85367
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7328461408615112,
      "learning_rate": 0.0004186978820386338,
      "loss": 3.0605,
      "step": 85368
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6593999862670898,
      "learning_rate": 0.00041869412526662433,
      "loss": 2.9177,
      "step": 85369
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8660285472869873,
      "learning_rate": 0.0004186903684725473,
      "loss": 2.9474,
      "step": 85370
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5991915464401245,
      "learning_rate": 0.0004186866116564036,
      "loss": 2.9296,
      "step": 85371
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4684728384017944,
      "learning_rate": 0.0004186828548181938,
      "loss": 2.9969,
      "step": 85372
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8137761354446411,
      "learning_rate": 0.0004186790979579186,
      "loss": 2.9921,
      "step": 85373
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8830519914627075,
      "learning_rate": 0.0004186753410755788,
      "loss": 3.2826,
      "step": 85374
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6310572624206543,
      "learning_rate": 0.00041867158417117483,
      "loss": 2.9901,
      "step": 85375
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9011530876159668,
      "learning_rate": 0.00041866782724470764,
      "loss": 2.9372,
      "step": 85376
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.860287070274353,
      "learning_rate": 0.0004186640702961779,
      "loss": 2.9393,
      "step": 85377
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.732694149017334,
      "learning_rate": 0.00041866031332558635,
      "loss": 3.0019,
      "step": 85378
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.14190411567688,
      "learning_rate": 0.0004186565563329335,
      "loss": 3.1327,
      "step": 85379
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.993039846420288,
      "learning_rate": 0.00041865279931822016,
      "loss": 3.1226,
      "step": 85380
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6660583019256592,
      "learning_rate": 0.0004186490422814471,
      "loss": 2.9592,
      "step": 85381
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9592807292938232,
      "learning_rate": 0.00041864528522261483,
      "loss": 3.0406,
      "step": 85382
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.9137635231018066,
      "learning_rate": 0.0004186415281417242,
      "loss": 2.9831,
      "step": 85383
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.3871870040893555,
      "learning_rate": 0.000418637771038776,
      "loss": 3.0624,
      "step": 85384
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7330793142318726,
      "learning_rate": 0.00041863401391377056,
      "loss": 2.9732,
      "step": 85385
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1995623111724854,
      "learning_rate": 0.000418630256766709,
      "loss": 2.8136,
      "step": 85386
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1260218620300293,
      "learning_rate": 0.0004186264995975918,
      "loss": 2.9073,
      "step": 85387
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1283931732177734,
      "learning_rate": 0.00041862274240641966,
      "loss": 3.0562,
      "step": 85388
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7299550771713257,
      "learning_rate": 0.0004186189851931933,
      "loss": 2.9452,
      "step": 85389
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6513580083847046,
      "learning_rate": 0.00041861522795791356,
      "loss": 3.0284,
      "step": 85390
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.04732084274292,
      "learning_rate": 0.00041861147070058083,
      "loss": 2.8372,
      "step": 85391
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1029999256134033,
      "learning_rate": 0.0004186077134211961,
      "loss": 3.0328,
      "step": 85392
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.059685230255127,
      "learning_rate": 0.00041860395611975995,
      "loss": 3.0054,
      "step": 85393
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.5267231464385986,
      "learning_rate": 0.000418600198796273,
      "loss": 2.8558,
      "step": 85394
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.085268974304199,
      "learning_rate": 0.0004185964414507361,
      "loss": 2.8955,
      "step": 85395
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.924468994140625,
      "learning_rate": 0.0004185926840831499,
      "loss": 2.9362,
      "step": 85396
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8437867164611816,
      "learning_rate": 0.00041858892669351507,
      "loss": 3.2973,
      "step": 85397
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.323349714279175,
      "learning_rate": 0.0004185851692818323,
      "loss": 2.9577,
      "step": 85398
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7155625820159912,
      "learning_rate": 0.0004185814118481024,
      "loss": 2.9884,
      "step": 85399
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.015286445617676,
      "learning_rate": 0.00041857765439232586,
      "loss": 3.0567,
      "step": 85400
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9527990818023682,
      "learning_rate": 0.0004185738969145035,
      "loss": 3.1499,
      "step": 85401
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.5221590995788574,
      "learning_rate": 0.0004185701394146361,
      "loss": 3.1655,
      "step": 85402
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1197633743286133,
      "learning_rate": 0.0004185663818927242,
      "loss": 2.7887,
      "step": 85403
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6794825792312622,
      "learning_rate": 0.0004185626243487685,
      "loss": 3.2973,
      "step": 85404
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1219723224639893,
      "learning_rate": 0.00041855886678276994,
      "loss": 3.0855,
      "step": 85405
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6820225715637207,
      "learning_rate": 0.0004185551091947289,
      "loss": 3.0389,
      "step": 85406
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5916223526000977,
      "learning_rate": 0.00041855135158464635,
      "loss": 3.1865,
      "step": 85407
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.012660026550293,
      "learning_rate": 0.0004185475939525228,
      "loss": 2.9457,
      "step": 85408
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.674902319908142,
      "learning_rate": 0.0004185438362983591,
      "loss": 2.655,
      "step": 85409
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7841849327087402,
      "learning_rate": 0.0004185400786221557,
      "loss": 2.964,
      "step": 85410
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4757452011108398,
      "learning_rate": 0.00041853632092391364,
      "loss": 3.1009,
      "step": 85411
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.651085615158081,
      "learning_rate": 0.0004185325632036333,
      "loss": 2.9644,
      "step": 85412
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.61121666431427,
      "learning_rate": 0.0004185288054613155,
      "loss": 2.8875,
      "step": 85413
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.4795308113098145,
      "learning_rate": 0.0004185250476969611,
      "loss": 2.8246,
      "step": 85414
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.002810478210449,
      "learning_rate": 0.0004185212899105706,
      "loss": 2.8773,
      "step": 85415
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7644081115722656,
      "learning_rate": 0.0004185175321021447,
      "loss": 3.0199,
      "step": 85416
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.3319385051727295,
      "learning_rate": 0.0004185137742716843,
      "loss": 3.1419,
      "step": 85417
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8973199129104614,
      "learning_rate": 0.00041851001641918977,
      "loss": 3.0268,
      "step": 85418
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8520792722702026,
      "learning_rate": 0.0004185062585446621,
      "loss": 2.5782,
      "step": 85419
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9862464666366577,
      "learning_rate": 0.00041850250064810187,
      "loss": 3.0404,
      "step": 85420
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.8987364768981934,
      "learning_rate": 0.00041849874272950973,
      "loss": 2.9968,
      "step": 85421
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8530348539352417,
      "learning_rate": 0.00041849498478888645,
      "loss": 3.1066,
      "step": 85422
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7169078588485718,
      "learning_rate": 0.00041849122682623277,
      "loss": 3.093,
      "step": 85423
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.499997615814209,
      "learning_rate": 0.00041848746884154934,
      "loss": 3.0652,
      "step": 85424
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1512866020202637,
      "learning_rate": 0.0004184837108348367,
      "loss": 3.1239,
      "step": 85425
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.066272735595703,
      "learning_rate": 0.00041847995280609593,
      "loss": 3.0735,
      "step": 85426
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.747588038444519,
      "learning_rate": 0.00041847619475532735,
      "loss": 2.997,
      "step": 85427
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.923793911933899,
      "learning_rate": 0.0004184724366825318,
      "loss": 2.833,
      "step": 85428
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6504565477371216,
      "learning_rate": 0.00041846867858771013,
      "loss": 2.8624,
      "step": 85429
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.657881736755371,
      "learning_rate": 0.00041846492047086274,
      "loss": 3.1766,
      "step": 85430
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8465888500213623,
      "learning_rate": 0.0004184611623319906,
      "loss": 2.9719,
      "step": 85431
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7192373275756836,
      "learning_rate": 0.00041845740417109417,
      "loss": 2.9913,
      "step": 85432
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5455684661865234,
      "learning_rate": 0.00041845364598817436,
      "loss": 3.0828,
      "step": 85433
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5708037614822388,
      "learning_rate": 0.00041844988778323174,
      "loss": 2.938,
      "step": 85434
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.996943473815918,
      "learning_rate": 0.0004184461295562671,
      "loss": 3.2813,
      "step": 85435
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.729977011680603,
      "learning_rate": 0.000418442371307281,
      "loss": 2.9529,
      "step": 85436
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.952858328819275,
      "learning_rate": 0.0004184386130362744,
      "loss": 2.9643,
      "step": 85437
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5617144107818604,
      "learning_rate": 0.00041843485474324767,
      "loss": 2.9397,
      "step": 85438
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6812587976455688,
      "learning_rate": 0.00041843109642820167,
      "loss": 3.1069,
      "step": 85439
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.959318995475769,
      "learning_rate": 0.0004184273380911372,
      "loss": 2.7923,
      "step": 85440
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.075108528137207,
      "learning_rate": 0.00041842357973205475,
      "loss": 3.0723,
      "step": 85441
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6555536985397339,
      "learning_rate": 0.0004184198213509551,
      "loss": 2.8616,
      "step": 85442
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.6800506114959717,
      "learning_rate": 0.000418416062947839,
      "loss": 3.0718,
      "step": 85443
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.073230743408203,
      "learning_rate": 0.00041841230452270724,
      "loss": 2.9739,
      "step": 85444
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7348753213882446,
      "learning_rate": 0.00041840854607556025,
      "loss": 2.8187,
      "step": 85445
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8174551725387573,
      "learning_rate": 0.0004184047876063989,
      "loss": 3.077,
      "step": 85446
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6426600217819214,
      "learning_rate": 0.0004184010291152239,
      "loss": 3.0494,
      "step": 85447
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4951494932174683,
      "learning_rate": 0.00041839727060203594,
      "loss": 3.0803,
      "step": 85448
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1293842792510986,
      "learning_rate": 0.0004183935120668357,
      "loss": 3.1714,
      "step": 85449
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5186058282852173,
      "learning_rate": 0.00041838975350962375,
      "loss": 3.2643,
      "step": 85450
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7237097024917603,
      "learning_rate": 0.000418385994930401,
      "loss": 2.7655,
      "step": 85451
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5191766023635864,
      "learning_rate": 0.0004183822363291681,
      "loss": 2.9242,
      "step": 85452
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8339442014694214,
      "learning_rate": 0.0004183784777059256,
      "loss": 2.8582,
      "step": 85453
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.642313003540039,
      "learning_rate": 0.0004183747190606744,
      "loss": 3.0122,
      "step": 85454
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4918692111968994,
      "learning_rate": 0.00041837096039341507,
      "loss": 3.001,
      "step": 85455
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4549881219863892,
      "learning_rate": 0.00041836720170414834,
      "loss": 2.9604,
      "step": 85456
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8510372638702393,
      "learning_rate": 0.0004183634429928749,
      "loss": 3.4427,
      "step": 85457
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4957921504974365,
      "learning_rate": 0.0004183596842595955,
      "loss": 2.9831,
      "step": 85458
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4396497011184692,
      "learning_rate": 0.00041835592550431075,
      "loss": 2.8522,
      "step": 85459
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6677979230880737,
      "learning_rate": 0.00041835216672702144,
      "loss": 3.1834,
      "step": 85460
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.212162494659424,
      "learning_rate": 0.00041834840792772824,
      "loss": 3.0903,
      "step": 85461
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.921595811843872,
      "learning_rate": 0.0004183446491064318,
      "loss": 2.9747,
      "step": 85462
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.030174493789673,
      "learning_rate": 0.0004183408902631329,
      "loss": 2.9361,
      "step": 85463
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.675825834274292,
      "learning_rate": 0.00041833713139783214,
      "loss": 3.3123,
      "step": 85464
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.908809781074524,
      "learning_rate": 0.0004183333725105303,
      "loss": 3.2322,
      "step": 85465
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5522905588150024,
      "learning_rate": 0.0004183296136012281,
      "loss": 2.8751,
      "step": 85466
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7180700302124023,
      "learning_rate": 0.0004183258546699261,
      "loss": 3.1545,
      "step": 85467
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6474742889404297,
      "learning_rate": 0.0004183220957166251,
      "loss": 2.8839,
      "step": 85468
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6099368333816528,
      "learning_rate": 0.00041831833674132584,
      "loss": 3.0032,
      "step": 85469
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9906196594238281,
      "learning_rate": 0.000418314577744029,
      "loss": 2.9805,
      "step": 85470
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.4019179344177246,
      "learning_rate": 0.0004183108187247351,
      "loss": 2.9313,
      "step": 85471
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.854576826095581,
      "learning_rate": 0.0004183070596834452,
      "loss": 3.0843,
      "step": 85472
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7544572353363037,
      "learning_rate": 0.0004183033006201596,
      "loss": 3.0524,
      "step": 85473
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0874133110046387,
      "learning_rate": 0.0004182995415348792,
      "loss": 3.3812,
      "step": 85474
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.107297420501709,
      "learning_rate": 0.0004182957824276048,
      "loss": 3.0349,
      "step": 85475
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6893043518066406,
      "learning_rate": 0.00041829202329833685,
      "loss": 3.3454,
      "step": 85476
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.061889886856079,
      "learning_rate": 0.00041828826414707623,
      "loss": 3.0924,
      "step": 85477
    },
    {
      "epoch": 1.11,
      "grad_norm": 4.375938415527344,
      "learning_rate": 0.00041828450497382366,
      "loss": 3.1162,
      "step": 85478
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.731552004814148,
      "learning_rate": 0.00041828074577857967,
      "loss": 3.0148,
      "step": 85479
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.953019618988037,
      "learning_rate": 0.00041827698656134513,
      "loss": 2.7661,
      "step": 85480
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.379241704940796,
      "learning_rate": 0.0004182732273221206,
      "loss": 3.079,
      "step": 85481
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.901063919067383,
      "learning_rate": 0.0004182694680609069,
      "loss": 2.9113,
      "step": 85482
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7920573949813843,
      "learning_rate": 0.0004182657087777046,
      "loss": 3.0863,
      "step": 85483
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.621571660041809,
      "learning_rate": 0.0004182619494725146,
      "loss": 2.9537,
      "step": 85484
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.016082763671875,
      "learning_rate": 0.0004182581901453373,
      "loss": 3.1985,
      "step": 85485
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.4142940044403076,
      "learning_rate": 0.0004182544307961737,
      "loss": 2.6847,
      "step": 85486
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9882164001464844,
      "learning_rate": 0.00041825067142502436,
      "loss": 2.9246,
      "step": 85487
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.6731300354003906,
      "learning_rate": 0.0004182469120318899,
      "loss": 2.9771,
      "step": 85488
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.993651270866394,
      "learning_rate": 0.00041824315261677116,
      "loss": 3.0577,
      "step": 85489
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.4571890830993652,
      "learning_rate": 0.00041823939317966883,
      "loss": 2.8994,
      "step": 85490
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.298524856567383,
      "learning_rate": 0.00041823563372058353,
      "loss": 3.0479,
      "step": 85491
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9280924797058105,
      "learning_rate": 0.000418231874239516,
      "loss": 3.1381,
      "step": 85492
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.5324623584747314,
      "learning_rate": 0.0004182281147364669,
      "loss": 3.097,
      "step": 85493
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.056699514389038,
      "learning_rate": 0.00041822435521143695,
      "loss": 2.9739,
      "step": 85494
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6575050354003906,
      "learning_rate": 0.0004182205956644269,
      "loss": 2.9739,
      "step": 85495
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5258252620697021,
      "learning_rate": 0.00041821683609543747,
      "loss": 3.3066,
      "step": 85496
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9248816967010498,
      "learning_rate": 0.0004182130765044692,
      "loss": 3.0994,
      "step": 85497
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.2611870765686035,
      "learning_rate": 0.00041820931689152296,
      "loss": 3.1638,
      "step": 85498
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.908652663230896,
      "learning_rate": 0.0004182055572565993,
      "loss": 2.8755,
      "step": 85499
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5463827848434448,
      "learning_rate": 0.000418201797599699,
      "loss": 3.0571,
      "step": 85500
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6290403604507446,
      "learning_rate": 0.0004181980379208228,
      "loss": 2.9239,
      "step": 85501
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.3032901287078857,
      "learning_rate": 0.0004181942782199714,
      "loss": 2.7507,
      "step": 85502
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7207894325256348,
      "learning_rate": 0.00041819051849714537,
      "loss": 2.9088,
      "step": 85503
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8530350923538208,
      "learning_rate": 0.00041818675875234554,
      "loss": 3.209,
      "step": 85504
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.1420392990112305,
      "learning_rate": 0.00041818299898557256,
      "loss": 2.8271,
      "step": 85505
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5473450422286987,
      "learning_rate": 0.0004181792391968271,
      "loss": 3.0328,
      "step": 85506
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9549325704574585,
      "learning_rate": 0.00041817547938610987,
      "loss": 3.069,
      "step": 85507
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.024172306060791,
      "learning_rate": 0.00041817171955342157,
      "loss": 2.8863,
      "step": 85508
    },
    {
      "epoch": 1.11,
      "grad_norm": 4.029181480407715,
      "learning_rate": 0.000418167959698763,
      "loss": 3.0245,
      "step": 85509
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7185767889022827,
      "learning_rate": 0.00041816419982213464,
      "loss": 2.9149,
      "step": 85510
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.917696475982666,
      "learning_rate": 0.0004181604399235375,
      "loss": 3.1122,
      "step": 85511
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.389596462249756,
      "learning_rate": 0.00041815668000297205,
      "loss": 3.0091,
      "step": 85512
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.652317762374878,
      "learning_rate": 0.000418152920060439,
      "loss": 2.9725,
      "step": 85513
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.2332394123077393,
      "learning_rate": 0.00041814916009593904,
      "loss": 3.2268,
      "step": 85514
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5254665613174438,
      "learning_rate": 0.00041814540010947303,
      "loss": 2.945,
      "step": 85515
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4809162616729736,
      "learning_rate": 0.00041814164010104153,
      "loss": 3.0361,
      "step": 85516
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4204785823822021,
      "learning_rate": 0.0004181378800706452,
      "loss": 3.0463,
      "step": 85517
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8026968240737915,
      "learning_rate": 0.00041813412001828493,
      "loss": 2.8365,
      "step": 85518
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1832079887390137,
      "learning_rate": 0.00041813035994396126,
      "loss": 3.1417,
      "step": 85519
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6532127857208252,
      "learning_rate": 0.00041812659984767484,
      "loss": 3.1996,
      "step": 85520
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7938076257705688,
      "learning_rate": 0.00041812283972942656,
      "loss": 3.1612,
      "step": 85521
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.093452215194702,
      "learning_rate": 0.00041811907958921694,
      "loss": 2.9338,
      "step": 85522
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.6417317390441895,
      "learning_rate": 0.0004181153194270468,
      "loss": 2.916,
      "step": 85523
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.3246872425079346,
      "learning_rate": 0.0004181115592429167,
      "loss": 2.9234,
      "step": 85524
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9857944250106812,
      "learning_rate": 0.00041810779903682756,
      "loss": 3.035,
      "step": 85525
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.8363945484161377,
      "learning_rate": 0.00041810403880877985,
      "loss": 3.0285,
      "step": 85526
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7283393144607544,
      "learning_rate": 0.0004181002785587744,
      "loss": 2.925,
      "step": 85527
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7087901830673218,
      "learning_rate": 0.00041809651828681187,
      "loss": 3.1482,
      "step": 85528
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.080841302871704,
      "learning_rate": 0.00041809275799289307,
      "loss": 2.6415,
      "step": 85529
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1512107849121094,
      "learning_rate": 0.00041808899767701847,
      "loss": 3.0071,
      "step": 85530
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6689012050628662,
      "learning_rate": 0.00041808523733918894,
      "loss": 3.1005,
      "step": 85531
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4353001117706299,
      "learning_rate": 0.0004180814769794052,
      "loss": 3.08,
      "step": 85532
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7310078144073486,
      "learning_rate": 0.0004180777165976678,
      "loss": 3.0305,
      "step": 85533
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0721242427825928,
      "learning_rate": 0.0004180739561939775,
      "loss": 3.119,
      "step": 85534
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6439846754074097,
      "learning_rate": 0.00041807019576833507,
      "loss": 3.0102,
      "step": 85535
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.900594711303711,
      "learning_rate": 0.00041806643532074124,
      "loss": 3.0889,
      "step": 85536
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.832484483718872,
      "learning_rate": 0.0004180626748511965,
      "loss": 3.0115,
      "step": 85537
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.795777440071106,
      "learning_rate": 0.0004180589143597017,
      "loss": 3.1226,
      "step": 85538
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4134751558303833,
      "learning_rate": 0.0004180551538462576,
      "loss": 3.002,
      "step": 85539
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6980477571487427,
      "learning_rate": 0.0004180513933108648,
      "loss": 3.156,
      "step": 85540
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.591957926750183,
      "learning_rate": 0.0004180476327535239,
      "loss": 3.0078,
      "step": 85541
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.283566951751709,
      "learning_rate": 0.0004180438721742359,
      "loss": 2.8049,
      "step": 85542
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.632900357246399,
      "learning_rate": 0.00041804011157300117,
      "loss": 3.0049,
      "step": 85543
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.5457372665405273,
      "learning_rate": 0.0004180363509498206,
      "loss": 3.1597,
      "step": 85544
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6603878736495972,
      "learning_rate": 0.00041803259030469487,
      "loss": 3.3935,
      "step": 85545
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9548919200897217,
      "learning_rate": 0.0004180288296376247,
      "loss": 2.9679,
      "step": 85546
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7248013019561768,
      "learning_rate": 0.0004180250689486106,
      "loss": 2.8959,
      "step": 85547
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.535948395729065,
      "learning_rate": 0.0004180213082376536,
      "loss": 2.8352,
      "step": 85548
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.5132522583007812,
      "learning_rate": 0.00041801754750475406,
      "loss": 2.9137,
      "step": 85549
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7579387426376343,
      "learning_rate": 0.0004180137867499128,
      "loss": 3.1803,
      "step": 85550
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6595877408981323,
      "learning_rate": 0.0004180100259731307,
      "loss": 2.7589,
      "step": 85551
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.031193971633911,
      "learning_rate": 0.0004180062651744082,
      "loss": 3.045,
      "step": 85552
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.3409197330474854,
      "learning_rate": 0.00041800250435374616,
      "loss": 3.0736,
      "step": 85553
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.6640303134918213,
      "learning_rate": 0.0004179987435111452,
      "loss": 2.9486,
      "step": 85554
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8831707239151,
      "learning_rate": 0.00041799498264660605,
      "loss": 3.1324,
      "step": 85555
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.410598039627075,
      "learning_rate": 0.00041799122176012935,
      "loss": 3.0972,
      "step": 85556
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.3330464363098145,
      "learning_rate": 0.00041798746085171593,
      "loss": 3.1965,
      "step": 85557
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6402286291122437,
      "learning_rate": 0.0004179836999213664,
      "loss": 2.8681,
      "step": 85558
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.705103874206543,
      "learning_rate": 0.00041797993896908143,
      "loss": 3.1783,
      "step": 85559
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5994981527328491,
      "learning_rate": 0.0004179761779948619,
      "loss": 3.0121,
      "step": 85560
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.783943772315979,
      "learning_rate": 0.0004179724169987082,
      "loss": 2.9053,
      "step": 85561
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8187659978866577,
      "learning_rate": 0.00041796865598062123,
      "loss": 2.9554,
      "step": 85562
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4101907014846802,
      "learning_rate": 0.0004179648949406018,
      "loss": 3.1211,
      "step": 85563
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.3538676500320435,
      "learning_rate": 0.00041796113387865036,
      "loss": 3.018,
      "step": 85564
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.974216341972351,
      "learning_rate": 0.0004179573727947677,
      "loss": 3.1734,
      "step": 85565
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5304698944091797,
      "learning_rate": 0.00041795361168895463,
      "loss": 3.0336,
      "step": 85566
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9158591032028198,
      "learning_rate": 0.00041794985056121167,
      "loss": 3.1613,
      "step": 85567
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.4754459857940674,
      "learning_rate": 0.0004179460894115396,
      "loss": 2.9786,
      "step": 85568
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6657099723815918,
      "learning_rate": 0.0004179423282399392,
      "loss": 2.8867,
      "step": 85569
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.078915596008301,
      "learning_rate": 0.00041793856704641104,
      "loss": 3.1101,
      "step": 85570
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0185277462005615,
      "learning_rate": 0.0004179348058309558,
      "loss": 2.754,
      "step": 85571
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9354016780853271,
      "learning_rate": 0.0004179310445935744,
      "loss": 3.2046,
      "step": 85572
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.05256724357605,
      "learning_rate": 0.0004179272833342673,
      "loss": 2.9277,
      "step": 85573
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6153308153152466,
      "learning_rate": 0.0004179235220530353,
      "loss": 3.0429,
      "step": 85574
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.989762783050537,
      "learning_rate": 0.0004179197607498792,
      "loss": 3.0553,
      "step": 85575
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.258859157562256,
      "learning_rate": 0.00041791599942479947,
      "loss": 3.093,
      "step": 85576
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.526906967163086,
      "learning_rate": 0.00041791223807779694,
      "loss": 3.0782,
      "step": 85577
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5545393228530884,
      "learning_rate": 0.00041790847670887234,
      "loss": 2.9758,
      "step": 85578
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.9674489498138428,
      "learning_rate": 0.00041790471531802636,
      "loss": 2.8059,
      "step": 85579
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.236898422241211,
      "learning_rate": 0.0004179009539052596,
      "loss": 2.8133,
      "step": 85580
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6753941774368286,
      "learning_rate": 0.0004178971924705728,
      "loss": 3.1694,
      "step": 85581
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5249959230422974,
      "learning_rate": 0.0004178934310139668,
      "loss": 3.014,
      "step": 85582
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1770713329315186,
      "learning_rate": 0.0004178896695354421,
      "loss": 2.8688,
      "step": 85583
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.8976376056671143,
      "learning_rate": 0.00041788590803499946,
      "loss": 2.8583,
      "step": 85584
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7274279594421387,
      "learning_rate": 0.00041788214651263965,
      "loss": 3.1614,
      "step": 85585
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.079688787460327,
      "learning_rate": 0.00041787838496836334,
      "loss": 2.9133,
      "step": 85586
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9959591627120972,
      "learning_rate": 0.00041787462340217124,
      "loss": 2.9057,
      "step": 85587
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9494950771331787,
      "learning_rate": 0.00041787086181406394,
      "loss": 3.0967,
      "step": 85588
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.673358678817749,
      "learning_rate": 0.00041786710020404226,
      "loss": 2.9002,
      "step": 85589
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.873334288597107,
      "learning_rate": 0.00041786333857210685,
      "loss": 3.2805,
      "step": 85590
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.315852165222168,
      "learning_rate": 0.00041785957691825847,
      "loss": 2.9711,
      "step": 85591
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.153988838195801,
      "learning_rate": 0.0004178558152424977,
      "loss": 2.9351,
      "step": 85592
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8626461029052734,
      "learning_rate": 0.00041785205354482534,
      "loss": 3.0189,
      "step": 85593
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.592048406600952,
      "learning_rate": 0.00041784829182524215,
      "loss": 3.1702,
      "step": 85594
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1651742458343506,
      "learning_rate": 0.00041784453008374855,
      "loss": 2.8583,
      "step": 85595
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7966548204421997,
      "learning_rate": 0.0004178407683203455,
      "loss": 2.9296,
      "step": 85596
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.3282039165496826,
      "learning_rate": 0.00041783700653503375,
      "loss": 2.8977,
      "step": 85597
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.0403811931610107,
      "learning_rate": 0.00041783324472781375,
      "loss": 2.6478,
      "step": 85598
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5986453294754028,
      "learning_rate": 0.00041782948289868636,
      "loss": 2.924,
      "step": 85599
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5753017663955688,
      "learning_rate": 0.0004178257210476523,
      "loss": 3.2245,
      "step": 85600
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.972177505493164,
      "learning_rate": 0.0004178219591747121,
      "loss": 2.9773,
      "step": 85601
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6180278062820435,
      "learning_rate": 0.0004178181972798666,
      "loss": 3.0942,
      "step": 85602
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7650487422943115,
      "learning_rate": 0.0004178144353631165,
      "loss": 3.1695,
      "step": 85603
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.1849732398986816,
      "learning_rate": 0.00041781067342446253,
      "loss": 3.0484,
      "step": 85604
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.3640522956848145,
      "learning_rate": 0.0004178069114639052,
      "loss": 3.1245,
      "step": 85605
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8425718545913696,
      "learning_rate": 0.00041780314948144546,
      "loss": 3.009,
      "step": 85606
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7381644248962402,
      "learning_rate": 0.00041779938747708384,
      "loss": 3.1651,
      "step": 85607
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5883684158325195,
      "learning_rate": 0.00041779562545082107,
      "loss": 3.0031,
      "step": 85608
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8093003034591675,
      "learning_rate": 0.0004177918634026579,
      "loss": 2.9262,
      "step": 85609
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6727626323699951,
      "learning_rate": 0.00041778810133259503,
      "loss": 2.928,
      "step": 85610
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6915929317474365,
      "learning_rate": 0.00041778433924063307,
      "loss": 3.1066,
      "step": 85611
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6646010875701904,
      "learning_rate": 0.0004177805771267728,
      "loss": 3.1659,
      "step": 85612
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.474793791770935,
      "learning_rate": 0.0004177768149910149,
      "loss": 3.1414,
      "step": 85613
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6294113397598267,
      "learning_rate": 0.0004177730528333601,
      "loss": 3.1448,
      "step": 85614
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.366018533706665,
      "learning_rate": 0.000417769290653809,
      "loss": 2.8511,
      "step": 85615
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8439115285873413,
      "learning_rate": 0.00041776552845236234,
      "loss": 3.2632,
      "step": 85616
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.8349506855010986,
      "learning_rate": 0.00041776176622902093,
      "loss": 3.0814,
      "step": 85617
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7366604804992676,
      "learning_rate": 0.0004177580039837853,
      "loss": 2.9175,
      "step": 85618
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.749138593673706,
      "learning_rate": 0.00041775424171665635,
      "loss": 3.0192,
      "step": 85619
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.578250527381897,
      "learning_rate": 0.0004177504794276346,
      "loss": 3.0919,
      "step": 85620
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6999804973602295,
      "learning_rate": 0.0004177467171167208,
      "loss": 2.9573,
      "step": 85621
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5503917932510376,
      "learning_rate": 0.00041774295478391575,
      "loss": 2.9622,
      "step": 85622
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.765744924545288,
      "learning_rate": 0.00041773919242921994,
      "loss": 3.0,
      "step": 85623
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.6695375442504883,
      "learning_rate": 0.00041773543005263426,
      "loss": 2.9997,
      "step": 85624
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7849884033203125,
      "learning_rate": 0.0004177316676541593,
      "loss": 2.9537,
      "step": 85625
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7989870309829712,
      "learning_rate": 0.0004177279052337958,
      "loss": 3.0202,
      "step": 85626
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.5688536167144775,
      "learning_rate": 0.00041772414279154455,
      "loss": 3.1346,
      "step": 85627
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.9201414585113525,
      "learning_rate": 0.0004177203803274061,
      "loss": 3.1304,
      "step": 85628
    },
    {
      "epoch": 1.11,
      "grad_norm": 3.6706342697143555,
      "learning_rate": 0.00041771661784138113,
      "loss": 3.082,
      "step": 85629
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.7191643714904785,
      "learning_rate": 0.0004177128553334706,
      "loss": 3.0758,
      "step": 85630
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.066950559616089,
      "learning_rate": 0.00041770909280367484,
      "loss": 2.912,
      "step": 85631
    },
    {
      "epoch": 1.11,
      "grad_norm": 1.8327221870422363,
      "learning_rate": 0.00041770533025199485,
      "loss": 2.8208,
      "step": 85632
    },
    {
      "epoch": 1.11,
      "grad_norm": 2.337191581726074,
      "learning_rate": 0.00041770156767843126,
      "loss": 3.1068,
      "step": 85633
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.805002212524414,
      "learning_rate": 0.00041769780508298457,
      "loss": 2.9488,
      "step": 85634
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8990435600280762,
      "learning_rate": 0.00041769404246565575,
      "loss": 2.8452,
      "step": 85635
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8527876138687134,
      "learning_rate": 0.0004176902798264454,
      "loss": 2.9761,
      "step": 85636
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7132220268249512,
      "learning_rate": 0.0004176865171653541,
      "loss": 3.0008,
      "step": 85637
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9226055145263672,
      "learning_rate": 0.0004176827544823827,
      "loss": 2.7729,
      "step": 85638
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.411774158477783,
      "learning_rate": 0.00041767899177753195,
      "loss": 2.9702,
      "step": 85639
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6725293397903442,
      "learning_rate": 0.00041767522905080236,
      "loss": 2.9161,
      "step": 85640
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.792541265487671,
      "learning_rate": 0.0004176714663021947,
      "loss": 2.818,
      "step": 85641
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.536344528198242,
      "learning_rate": 0.0004176677035317098,
      "loss": 3.1685,
      "step": 85642
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7633781433105469,
      "learning_rate": 0.00041766394073934816,
      "loss": 2.7054,
      "step": 85643
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9159305095672607,
      "learning_rate": 0.00041766017792511055,
      "loss": 2.9323,
      "step": 85644
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.170990228652954,
      "learning_rate": 0.00041765641508899776,
      "loss": 3.2615,
      "step": 85645
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7196978330612183,
      "learning_rate": 0.00041765265223101043,
      "loss": 2.9798,
      "step": 85646
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.91250479221344,
      "learning_rate": 0.00041764888935114927,
      "loss": 3.1591,
      "step": 85647
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.088773727416992,
      "learning_rate": 0.0004176451264494148,
      "loss": 2.8246,
      "step": 85648
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8500782251358032,
      "learning_rate": 0.00041764136352580804,
      "loss": 2.8752,
      "step": 85649
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5561878681182861,
      "learning_rate": 0.0004176376005803295,
      "loss": 3.1048,
      "step": 85650
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7361078262329102,
      "learning_rate": 0.00041763383761297983,
      "loss": 3.1596,
      "step": 85651
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.839310646057129,
      "learning_rate": 0.00041763007462375997,
      "loss": 2.9752,
      "step": 85652
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7662403583526611,
      "learning_rate": 0.0004176263116126703,
      "loss": 3.0167,
      "step": 85653
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8240795135498047,
      "learning_rate": 0.00041762254857971175,
      "loss": 3.1435,
      "step": 85654
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8421789407730103,
      "learning_rate": 0.00041761878552488496,
      "loss": 3.1004,
      "step": 85655
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7453094720840454,
      "learning_rate": 0.0004176150224481906,
      "loss": 3.0055,
      "step": 85656
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9538874626159668,
      "learning_rate": 0.00041761125934962936,
      "loss": 2.9834,
      "step": 85657
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.616206169128418,
      "learning_rate": 0.00041760749622920195,
      "loss": 3.0089,
      "step": 85658
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7144958972930908,
      "learning_rate": 0.0004176037330869091,
      "loss": 3.0672,
      "step": 85659
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.828487753868103,
      "learning_rate": 0.0004175999699227516,
      "loss": 3.2379,
      "step": 85660
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5883917808532715,
      "learning_rate": 0.00041759620673672995,
      "loss": 3.1536,
      "step": 85661
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8720622062683105,
      "learning_rate": 0.000417592443528845,
      "loss": 3.1397,
      "step": 85662
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8551623821258545,
      "learning_rate": 0.0004175886802990973,
      "loss": 2.8895,
      "step": 85663
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4729491472244263,
      "learning_rate": 0.0004175849170474878,
      "loss": 3.1983,
      "step": 85664
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8576242923736572,
      "learning_rate": 0.0004175811537740169,
      "loss": 2.9804,
      "step": 85665
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8277894258499146,
      "learning_rate": 0.00041757739047868544,
      "loss": 3.1458,
      "step": 85666
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9035942554473877,
      "learning_rate": 0.00041757362716149425,
      "loss": 3.2677,
      "step": 85667
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.101222038269043,
      "learning_rate": 0.00041756986382244376,
      "loss": 2.9443,
      "step": 85668
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7210484743118286,
      "learning_rate": 0.0004175661004615349,
      "loss": 2.9285,
      "step": 85669
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.621747612953186,
      "learning_rate": 0.00041756233707876834,
      "loss": 2.8286,
      "step": 85670
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.587420701980591,
      "learning_rate": 0.0004175585736741446,
      "loss": 3.04,
      "step": 85671
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.3512589931488037,
      "learning_rate": 0.00041755481024766453,
      "loss": 2.7997,
      "step": 85672
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5199363231658936,
      "learning_rate": 0.00041755104679932884,
      "loss": 3.1245,
      "step": 85673
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.8409924507141113,
      "learning_rate": 0.00041754728332913814,
      "loss": 3.0461,
      "step": 85674
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.354205846786499,
      "learning_rate": 0.0004175435198370932,
      "loss": 2.8958,
      "step": 85675
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.017728328704834,
      "learning_rate": 0.00041753975632319475,
      "loss": 3.2063,
      "step": 85676
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5586694478988647,
      "learning_rate": 0.00041753599278744337,
      "loss": 2.9933,
      "step": 85677
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4356223344802856,
      "learning_rate": 0.0004175322292298399,
      "loss": 3.2654,
      "step": 85678
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7040892839431763,
      "learning_rate": 0.0004175284656503849,
      "loss": 3.0688,
      "step": 85679
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7322691679000854,
      "learning_rate": 0.0004175247020490792,
      "loss": 2.9647,
      "step": 85680
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6158785820007324,
      "learning_rate": 0.0004175209384259234,
      "loss": 2.9924,
      "step": 85681
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.585002064704895,
      "learning_rate": 0.00041751717478091826,
      "loss": 2.9479,
      "step": 85682
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4767024517059326,
      "learning_rate": 0.00041751341111406445,
      "loss": 2.8652,
      "step": 85683
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.044616460800171,
      "learning_rate": 0.00041750964742536266,
      "loss": 3.0006,
      "step": 85684
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.3344132900238037,
      "learning_rate": 0.0004175058837148137,
      "loss": 3.0493,
      "step": 85685
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.5197341442108154,
      "learning_rate": 0.0004175021199824181,
      "loss": 3.0843,
      "step": 85686
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.688363790512085,
      "learning_rate": 0.00041749835622817655,
      "loss": 3.1508,
      "step": 85687
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9303737878799438,
      "learning_rate": 0.00041749459245209,
      "loss": 3.0345,
      "step": 85688
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9211695194244385,
      "learning_rate": 0.0004174908286541589,
      "loss": 2.9537,
      "step": 85689
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.904036283493042,
      "learning_rate": 0.00041748706483438403,
      "loss": 2.7242,
      "step": 85690
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7158530950546265,
      "learning_rate": 0.00041748330099276615,
      "loss": 3.0929,
      "step": 85691
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.152838945388794,
      "learning_rate": 0.0004174795371293059,
      "loss": 2.7879,
      "step": 85692
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.2918174266815186,
      "learning_rate": 0.0004174757732440039,
      "loss": 3.1323,
      "step": 85693
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.651104688644409,
      "learning_rate": 0.00041747200933686104,
      "loss": 3.1466,
      "step": 85694
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.608303427696228,
      "learning_rate": 0.0004174682454078779,
      "loss": 3.0938,
      "step": 85695
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.6576459407806396,
      "learning_rate": 0.00041746448145705515,
      "loss": 3.1201,
      "step": 85696
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.917188048362732,
      "learning_rate": 0.0004174607174843935,
      "loss": 3.1772,
      "step": 85697
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8886076211929321,
      "learning_rate": 0.00041745695348989383,
      "loss": 3.0061,
      "step": 85698
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.728524088859558,
      "learning_rate": 0.0004174531894735566,
      "loss": 3.3276,
      "step": 85699
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.913400888442993,
      "learning_rate": 0.0004174494254353826,
      "loss": 3.2061,
      "step": 85700
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.650075674057007,
      "learning_rate": 0.0004174456613753726,
      "loss": 3.1446,
      "step": 85701
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8948756456375122,
      "learning_rate": 0.0004174418972935272,
      "loss": 2.9282,
      "step": 85702
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8475072383880615,
      "learning_rate": 0.000417438133189847,
      "loss": 2.8551,
      "step": 85703
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7079633474349976,
      "learning_rate": 0.000417434369064333,
      "loss": 3.0468,
      "step": 85704
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.3887991905212402,
      "learning_rate": 0.00041743060491698574,
      "loss": 2.8671,
      "step": 85705
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.018195867538452,
      "learning_rate": 0.0004174268407478058,
      "loss": 2.8744,
      "step": 85706
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.4699366092681885,
      "learning_rate": 0.000417423076556794,
      "loss": 3.1473,
      "step": 85707
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6844782829284668,
      "learning_rate": 0.00041741931234395114,
      "loss": 3.1032,
      "step": 85708
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9804339408874512,
      "learning_rate": 0.0004174155481092778,
      "loss": 2.9328,
      "step": 85709
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.518324851989746,
      "learning_rate": 0.0004174117838527746,
      "loss": 3.1678,
      "step": 85710
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.853567123413086,
      "learning_rate": 0.00041740801957444235,
      "loss": 2.9881,
      "step": 85711
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.742514967918396,
      "learning_rate": 0.0004174042552742818,
      "loss": 2.8959,
      "step": 85712
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8512216806411743,
      "learning_rate": 0.0004174004909522936,
      "loss": 3.186,
      "step": 85713
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7381877899169922,
      "learning_rate": 0.0004173967266084783,
      "loss": 3.1231,
      "step": 85714
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4712637662887573,
      "learning_rate": 0.00041739296224283683,
      "loss": 3.0429,
      "step": 85715
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4573912620544434,
      "learning_rate": 0.00041738919785536984,
      "loss": 3.002,
      "step": 85716
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9056369066238403,
      "learning_rate": 0.0004173854334460779,
      "loss": 3.0751,
      "step": 85717
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7772670984268188,
      "learning_rate": 0.0004173816690149618,
      "loss": 2.8284,
      "step": 85718
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.2625651359558105,
      "learning_rate": 0.0004173779045620223,
      "loss": 3.0746,
      "step": 85719
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7090054750442505,
      "learning_rate": 0.00041737414008725997,
      "loss": 3.177,
      "step": 85720
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7491053342819214,
      "learning_rate": 0.0004173703755906755,
      "loss": 2.7717,
      "step": 85721
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1158559322357178,
      "learning_rate": 0.00041736661107226984,
      "loss": 2.915,
      "step": 85722
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7804120779037476,
      "learning_rate": 0.00041736284653204336,
      "loss": 2.8261,
      "step": 85723
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6982625722885132,
      "learning_rate": 0.00041735908196999694,
      "loss": 2.7789,
      "step": 85724
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1456708908081055,
      "learning_rate": 0.0004173553173861313,
      "loss": 2.7753,
      "step": 85725
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7282564640045166,
      "learning_rate": 0.00041735155278044706,
      "loss": 2.9749,
      "step": 85726
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.847276210784912,
      "learning_rate": 0.00041734778815294497,
      "loss": 2.8312,
      "step": 85727
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.367131233215332,
      "learning_rate": 0.0004173440235036257,
      "loss": 2.9403,
      "step": 85728
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.8754961490631104,
      "learning_rate": 0.00041734025883248993,
      "loss": 2.8636,
      "step": 85729
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.937644362449646,
      "learning_rate": 0.0004173364941395384,
      "loss": 2.8943,
      "step": 85730
    },
    {
      "epoch": 1.12,
      "grad_norm": 4.416913032531738,
      "learning_rate": 0.0004173327294247719,
      "loss": 3.0536,
      "step": 85731
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8343583345413208,
      "learning_rate": 0.00041732896468819094,
      "loss": 2.9601,
      "step": 85732
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5596799850463867,
      "learning_rate": 0.0004173251999297963,
      "loss": 2.9698,
      "step": 85733
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.7137467861175537,
      "learning_rate": 0.00041732143514958874,
      "loss": 2.8122,
      "step": 85734
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7854019403457642,
      "learning_rate": 0.00041731767034756885,
      "loss": 3.1914,
      "step": 85735
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.700156331062317,
      "learning_rate": 0.0004173139055237374,
      "loss": 2.878,
      "step": 85736
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4197542667388916,
      "learning_rate": 0.0004173101406780952,
      "loss": 3.1213,
      "step": 85737
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.3801424503326416,
      "learning_rate": 0.00041730637581064274,
      "loss": 3.0903,
      "step": 85738
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8772339820861816,
      "learning_rate": 0.0004173026109213808,
      "loss": 2.7551,
      "step": 85739
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.467535376548767,
      "learning_rate": 0.0004172988460103101,
      "loss": 3.1673,
      "step": 85740
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4714529514312744,
      "learning_rate": 0.0004172950810774313,
      "loss": 2.9951,
      "step": 85741
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4731937646865845,
      "learning_rate": 0.00041729131612274514,
      "loss": 3.2745,
      "step": 85742
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.735008955001831,
      "learning_rate": 0.0004172875511462524,
      "loss": 2.9972,
      "step": 85743
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8719162940979004,
      "learning_rate": 0.0004172837861479536,
      "loss": 2.8072,
      "step": 85744
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9298717975616455,
      "learning_rate": 0.0004172800211278495,
      "loss": 3.1583,
      "step": 85745
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7315698862075806,
      "learning_rate": 0.0004172762560859409,
      "loss": 2.6628,
      "step": 85746
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.653116226196289,
      "learning_rate": 0.0004172724910222284,
      "loss": 3.0667,
      "step": 85747
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.13875412940979,
      "learning_rate": 0.00041726872593671275,
      "loss": 2.9999,
      "step": 85748
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9352853298187256,
      "learning_rate": 0.00041726496082939465,
      "loss": 2.8668,
      "step": 85749
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.914665699005127,
      "learning_rate": 0.00041726119570027477,
      "loss": 2.7496,
      "step": 85750
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9765838384628296,
      "learning_rate": 0.00041725743054935374,
      "loss": 3.05,
      "step": 85751
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4794584512710571,
      "learning_rate": 0.00041725366537663245,
      "loss": 2.9148,
      "step": 85752
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.4702048301696777,
      "learning_rate": 0.00041724990018211143,
      "loss": 3.1024,
      "step": 85753
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.6705586910247803,
      "learning_rate": 0.00041724613496579144,
      "loss": 3.2051,
      "step": 85754
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.172827959060669,
      "learning_rate": 0.00041724236972767324,
      "loss": 2.9813,
      "step": 85755
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.490072250366211,
      "learning_rate": 0.00041723860446775743,
      "loss": 2.808,
      "step": 85756
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8212647438049316,
      "learning_rate": 0.0004172348391860447,
      "loss": 2.9387,
      "step": 85757
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.57430362701416,
      "learning_rate": 0.0004172310738825359,
      "loss": 2.9586,
      "step": 85758
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.0321154594421387,
      "learning_rate": 0.0004172273085572315,
      "loss": 3.0597,
      "step": 85759
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.997922420501709,
      "learning_rate": 0.0004172235432101324,
      "loss": 2.5915,
      "step": 85760
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.6210968494415283,
      "learning_rate": 0.00041721977784123926,
      "loss": 2.9114,
      "step": 85761
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.496029853820801,
      "learning_rate": 0.0004172160124505527,
      "loss": 3.0347,
      "step": 85762
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.961345911026001,
      "learning_rate": 0.0004172122470380735,
      "loss": 3.1327,
      "step": 85763
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1061627864837646,
      "learning_rate": 0.0004172084816038023,
      "loss": 2.9728,
      "step": 85764
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.587090253829956,
      "learning_rate": 0.0004172047161477399,
      "loss": 3.0116,
      "step": 85765
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.3157267570495605,
      "learning_rate": 0.00041720095066988684,
      "loss": 2.8567,
      "step": 85766
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5394724607467651,
      "learning_rate": 0.00041719718517024406,
      "loss": 3.1973,
      "step": 85767
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.903072714805603,
      "learning_rate": 0.00041719341964881196,
      "loss": 2.98,
      "step": 85768
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.709245204925537,
      "learning_rate": 0.00041718965410559146,
      "loss": 2.8287,
      "step": 85769
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.8841898441314697,
      "learning_rate": 0.0004171858885405832,
      "loss": 3.246,
      "step": 85770
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8067278861999512,
      "learning_rate": 0.0004171821229537878,
      "loss": 3.0237,
      "step": 85771
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1056485176086426,
      "learning_rate": 0.00041717835734520603,
      "loss": 2.9853,
      "step": 85772
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1271839141845703,
      "learning_rate": 0.0004171745917148387,
      "loss": 2.9411,
      "step": 85773
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6939963102340698,
      "learning_rate": 0.0004171708260626863,
      "loss": 2.926,
      "step": 85774
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7109012603759766,
      "learning_rate": 0.0004171670603887497,
      "loss": 3.0505,
      "step": 85775
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.641964316368103,
      "learning_rate": 0.00041716329469302944,
      "loss": 3.1433,
      "step": 85776
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6518443822860718,
      "learning_rate": 0.0004171595289755264,
      "loss": 3.1839,
      "step": 85777
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4067327976226807,
      "learning_rate": 0.00041715576323624124,
      "loss": 3.0983,
      "step": 85778
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7059980630874634,
      "learning_rate": 0.00041715199747517444,
      "loss": 3.1802,
      "step": 85779
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4218437671661377,
      "learning_rate": 0.00041714823169232705,
      "loss": 2.8705,
      "step": 85780
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8202977180480957,
      "learning_rate": 0.0004171444658876995,
      "loss": 3.0952,
      "step": 85781
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.832381248474121,
      "learning_rate": 0.0004171407000612925,
      "loss": 2.5759,
      "step": 85782
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.401874303817749,
      "learning_rate": 0.000417136934213107,
      "loss": 2.9273,
      "step": 85783
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7058123350143433,
      "learning_rate": 0.0004171331683431435,
      "loss": 2.8982,
      "step": 85784
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5271294116973877,
      "learning_rate": 0.00041712940245140263,
      "loss": 3.0376,
      "step": 85785
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1304659843444824,
      "learning_rate": 0.0004171256365378853,
      "loss": 3.0775,
      "step": 85786
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.197334051132202,
      "learning_rate": 0.00041712187060259203,
      "loss": 3.0465,
      "step": 85787
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.2932205200195312,
      "learning_rate": 0.0004171181046455236,
      "loss": 2.7341,
      "step": 85788
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7526274919509888,
      "learning_rate": 0.0004171143386666807,
      "loss": 2.965,
      "step": 85789
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6499994993209839,
      "learning_rate": 0.00041711057266606414,
      "loss": 2.8584,
      "step": 85790
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6891083717346191,
      "learning_rate": 0.0004171068066436744,
      "loss": 3.1987,
      "step": 85791
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5893688201904297,
      "learning_rate": 0.0004171030405995123,
      "loss": 2.9607,
      "step": 85792
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.228085517883301,
      "learning_rate": 0.00041709927453357864,
      "loss": 3.2817,
      "step": 85793
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4437236785888672,
      "learning_rate": 0.00041709550844587387,
      "loss": 3.076,
      "step": 85794
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6757575273513794,
      "learning_rate": 0.00041709174233639897,
      "loss": 2.9686,
      "step": 85795
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7634022235870361,
      "learning_rate": 0.0004170879762051543,
      "loss": 2.8726,
      "step": 85796
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5676897764205933,
      "learning_rate": 0.000417084210052141,
      "loss": 3.2368,
      "step": 85797
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7490739822387695,
      "learning_rate": 0.0004170804438773594,
      "loss": 3.0642,
      "step": 85798
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.199458599090576,
      "learning_rate": 0.00041707667768081034,
      "loss": 3.0147,
      "step": 85799
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.6499922275543213,
      "learning_rate": 0.00041707291146249456,
      "loss": 2.9348,
      "step": 85800
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6768051385879517,
      "learning_rate": 0.00041706914522241276,
      "loss": 3.0764,
      "step": 85801
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1653568744659424,
      "learning_rate": 0.0004170653789605655,
      "loss": 3.1462,
      "step": 85802
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5703580379486084,
      "learning_rate": 0.0004170616126769536,
      "loss": 3.0383,
      "step": 85803
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.948372483253479,
      "learning_rate": 0.0004170578463715778,
      "loss": 2.9255,
      "step": 85804
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.551705002784729,
      "learning_rate": 0.0004170540800444386,
      "loss": 3.1506,
      "step": 85805
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9760141372680664,
      "learning_rate": 0.00041705031369553697,
      "loss": 3.048,
      "step": 85806
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7435294389724731,
      "learning_rate": 0.0004170465473248735,
      "loss": 3.0316,
      "step": 85807
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5898686647415161,
      "learning_rate": 0.00041704278093244876,
      "loss": 2.948,
      "step": 85808
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6895328760147095,
      "learning_rate": 0.00041703901451826356,
      "loss": 3.0764,
      "step": 85809
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5947322845458984,
      "learning_rate": 0.0004170352480823187,
      "loss": 2.8853,
      "step": 85810
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7359368801116943,
      "learning_rate": 0.00041703148162461465,
      "loss": 3.2301,
      "step": 85811
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6959445476531982,
      "learning_rate": 0.00041702771514515234,
      "loss": 3.1593,
      "step": 85812
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.077043056488037,
      "learning_rate": 0.0004170239486439324,
      "loss": 2.8195,
      "step": 85813
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5905122756958008,
      "learning_rate": 0.0004170201821209554,
      "loss": 3.018,
      "step": 85814
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7978030443191528,
      "learning_rate": 0.0004170164155762221,
      "loss": 3.0085,
      "step": 85815
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.5698115825653076,
      "learning_rate": 0.00041701264900973335,
      "loss": 2.9327,
      "step": 85816
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.593775749206543,
      "learning_rate": 0.0004170088824214897,
      "loss": 3.1431,
      "step": 85817
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6089786291122437,
      "learning_rate": 0.0004170051158114919,
      "loss": 3.1142,
      "step": 85818
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5830801725387573,
      "learning_rate": 0.00041700134917974064,
      "loss": 2.9999,
      "step": 85819
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.4716575145721436,
      "learning_rate": 0.00041699758252623656,
      "loss": 2.8697,
      "step": 85820
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8477696180343628,
      "learning_rate": 0.00041699381585098047,
      "loss": 2.8971,
      "step": 85821
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.3452882766723633,
      "learning_rate": 0.0004169900491539731,
      "loss": 2.9961,
      "step": 85822
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6717878580093384,
      "learning_rate": 0.00041698628243521497,
      "loss": 2.8955,
      "step": 85823
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6492666006088257,
      "learning_rate": 0.00041698251569470687,
      "loss": 3.0596,
      "step": 85824
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.954401969909668,
      "learning_rate": 0.00041697874893244956,
      "loss": 2.9354,
      "step": 85825
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7363464832305908,
      "learning_rate": 0.00041697498214844365,
      "loss": 3.1553,
      "step": 85826
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8697447776794434,
      "learning_rate": 0.0004169712153426899,
      "loss": 2.8527,
      "step": 85827
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5387009382247925,
      "learning_rate": 0.00041696744851518904,
      "loss": 2.8357,
      "step": 85828
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6381359100341797,
      "learning_rate": 0.0004169636816659417,
      "loss": 2.8927,
      "step": 85829
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5729719400405884,
      "learning_rate": 0.0004169599147949486,
      "loss": 3.0128,
      "step": 85830
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9041091203689575,
      "learning_rate": 0.00041695614790221045,
      "loss": 2.9604,
      "step": 85831
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7520731687545776,
      "learning_rate": 0.0004169523809877279,
      "loss": 2.9506,
      "step": 85832
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.569488286972046,
      "learning_rate": 0.0004169486140515017,
      "loss": 3.0948,
      "step": 85833
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.654981255531311,
      "learning_rate": 0.0004169448470935326,
      "loss": 2.9994,
      "step": 85834
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6839065551757812,
      "learning_rate": 0.0004169410801138213,
      "loss": 3.0862,
      "step": 85835
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6854045391082764,
      "learning_rate": 0.00041693731311236826,
      "loss": 2.958,
      "step": 85836
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1741316318511963,
      "learning_rate": 0.0004169335460891745,
      "loss": 3.1849,
      "step": 85837
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5885438919067383,
      "learning_rate": 0.0004169297790442406,
      "loss": 2.9368,
      "step": 85838
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.792267918586731,
      "learning_rate": 0.00041692601197756713,
      "loss": 2.9214,
      "step": 85839
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1501286029815674,
      "learning_rate": 0.00041692224488915503,
      "loss": 2.9752,
      "step": 85840
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.337114095687866,
      "learning_rate": 0.00041691847777900484,
      "loss": 3.021,
      "step": 85841
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5035405158996582,
      "learning_rate": 0.00041691471064711727,
      "loss": 3.1069,
      "step": 85842
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8191194534301758,
      "learning_rate": 0.000416910943493493,
      "loss": 3.1957,
      "step": 85843
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.435962438583374,
      "learning_rate": 0.0004169071763181329,
      "loss": 2.9777,
      "step": 85844
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.020941734313965,
      "learning_rate": 0.0004169034091210375,
      "loss": 2.9896,
      "step": 85845
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.554070234298706,
      "learning_rate": 0.00041689964190220753,
      "loss": 3.1211,
      "step": 85846
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8826406002044678,
      "learning_rate": 0.00041689587466164376,
      "loss": 3.036,
      "step": 85847
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5858858823776245,
      "learning_rate": 0.0004168921073993467,
      "loss": 3.0427,
      "step": 85848
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7304835319519043,
      "learning_rate": 0.00041688834011531734,
      "loss": 3.1417,
      "step": 85849
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.6733474731445312,
      "learning_rate": 0.0004168845728095562,
      "loss": 2.781,
      "step": 85850
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5993642807006836,
      "learning_rate": 0.000416880805482064,
      "loss": 3.0652,
      "step": 85851
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6847282648086548,
      "learning_rate": 0.0004168770381328414,
      "loss": 3.0095,
      "step": 85852
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8050285577774048,
      "learning_rate": 0.00041687327076188926,
      "loss": 3.1891,
      "step": 85853
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9919754266738892,
      "learning_rate": 0.0004168695033692081,
      "loss": 3.2635,
      "step": 85854
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.532573938369751,
      "learning_rate": 0.0004168657359547986,
      "loss": 3.1977,
      "step": 85855
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.691659927368164,
      "learning_rate": 0.00041686196851866173,
      "loss": 3.0452,
      "step": 85856
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7565828561782837,
      "learning_rate": 0.00041685820106079793,
      "loss": 3.1152,
      "step": 85857
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9323965311050415,
      "learning_rate": 0.00041685443358120797,
      "loss": 3.0071,
      "step": 85858
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5807009935379028,
      "learning_rate": 0.00041685066607989265,
      "loss": 3.1123,
      "step": 85859
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.372253179550171,
      "learning_rate": 0.00041684689855685253,
      "loss": 3.1319,
      "step": 85860
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7158976793289185,
      "learning_rate": 0.00041684313101208835,
      "loss": 3.1155,
      "step": 85861
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6793290376663208,
      "learning_rate": 0.00041683936344560083,
      "loss": 3.0336,
      "step": 85862
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5802377462387085,
      "learning_rate": 0.00041683559585739067,
      "loss": 3.0211,
      "step": 85863
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8360886573791504,
      "learning_rate": 0.00041683182824745857,
      "loss": 3.0684,
      "step": 85864
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9826856851577759,
      "learning_rate": 0.00041682806061580524,
      "loss": 3.0243,
      "step": 85865
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7075787782669067,
      "learning_rate": 0.0004168242929624314,
      "loss": 2.8846,
      "step": 85866
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8993675708770752,
      "learning_rate": 0.00041682052528733764,
      "loss": 2.875,
      "step": 85867
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5054941177368164,
      "learning_rate": 0.00041681675759052485,
      "loss": 2.993,
      "step": 85868
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6205416917800903,
      "learning_rate": 0.0004168129898719935,
      "loss": 2.6181,
      "step": 85869
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8022398948669434,
      "learning_rate": 0.0004168092221317444,
      "loss": 2.9195,
      "step": 85870
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.8434536457061768,
      "learning_rate": 0.00041680545436977837,
      "loss": 3.1497,
      "step": 85871
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6982775926589966,
      "learning_rate": 0.00041680168658609593,
      "loss": 2.996,
      "step": 85872
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.670917272567749,
      "learning_rate": 0.00041679791878069785,
      "loss": 3.2558,
      "step": 85873
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.602160930633545,
      "learning_rate": 0.0004167941509535849,
      "loss": 3.1456,
      "step": 85874
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9250247478485107,
      "learning_rate": 0.00041679038310475766,
      "loss": 2.8471,
      "step": 85875
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9464021921157837,
      "learning_rate": 0.00041678661523421686,
      "loss": 3.0176,
      "step": 85876
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.189507246017456,
      "learning_rate": 0.00041678284734196323,
      "loss": 2.797,
      "step": 85877
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9986059665679932,
      "learning_rate": 0.00041677907942799753,
      "loss": 3.1507,
      "step": 85878
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.089998483657837,
      "learning_rate": 0.0004167753114923203,
      "loss": 3.0036,
      "step": 85879
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8655980825424194,
      "learning_rate": 0.0004167715435349324,
      "loss": 2.699,
      "step": 85880
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.0388453006744385,
      "learning_rate": 0.0004167677755558344,
      "loss": 3.3171,
      "step": 85881
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.7219314575195312,
      "learning_rate": 0.0004167640075550272,
      "loss": 2.9994,
      "step": 85882
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.009396553039551,
      "learning_rate": 0.00041676023953251124,
      "loss": 3.1465,
      "step": 85883
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.709425210952759,
      "learning_rate": 0.00041675647148828744,
      "loss": 2.9527,
      "step": 85884
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.8174166679382324,
      "learning_rate": 0.00041675270342235633,
      "loss": 2.9451,
      "step": 85885
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.0579984188079834,
      "learning_rate": 0.00041674893533471874,
      "loss": 3.2644,
      "step": 85886
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6630852222442627,
      "learning_rate": 0.00041674516722537525,
      "loss": 2.9795,
      "step": 85887
    },
    {
      "epoch": 1.12,
      "grad_norm": 4.21018123626709,
      "learning_rate": 0.0004167413990943267,
      "loss": 2.8642,
      "step": 85888
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.2828493118286133,
      "learning_rate": 0.0004167376309415737,
      "loss": 2.8785,
      "step": 85889
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.707524061203003,
      "learning_rate": 0.000416733862767117,
      "loss": 3.0807,
      "step": 85890
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8882486820220947,
      "learning_rate": 0.0004167300945709572,
      "loss": 2.9588,
      "step": 85891
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.536489248275757,
      "learning_rate": 0.0004167263263530952,
      "loss": 3.0283,
      "step": 85892
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.584924340248108,
      "learning_rate": 0.0004167225581135314,
      "loss": 2.8066,
      "step": 85893
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4320244789123535,
      "learning_rate": 0.00041671878985226677,
      "loss": 3.1908,
      "step": 85894
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.300969123840332,
      "learning_rate": 0.00041671502156930197,
      "loss": 3.0374,
      "step": 85895
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9757295846939087,
      "learning_rate": 0.0004167112532646375,
      "loss": 2.9931,
      "step": 85896
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8246253728866577,
      "learning_rate": 0.00041670748493827426,
      "loss": 3.2062,
      "step": 85897
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6239463090896606,
      "learning_rate": 0.00041670371659021293,
      "loss": 3.0947,
      "step": 85898
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.013545036315918,
      "learning_rate": 0.00041669994822045417,
      "loss": 2.8776,
      "step": 85899
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5882734060287476,
      "learning_rate": 0.0004166961798289987,
      "loss": 2.9051,
      "step": 85900
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5627131462097168,
      "learning_rate": 0.0004166924114158472,
      "loss": 3.118,
      "step": 85901
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6696950197219849,
      "learning_rate": 0.0004166886429810004,
      "loss": 3.1034,
      "step": 85902
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.768426537513733,
      "learning_rate": 0.00041668487452445894,
      "loss": 3.0373,
      "step": 85903
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.990383267402649,
      "learning_rate": 0.0004166811060462236,
      "loss": 2.7594,
      "step": 85904
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9255982637405396,
      "learning_rate": 0.0004166773375462949,
      "loss": 2.997,
      "step": 85905
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.7287487983703613,
      "learning_rate": 0.0004166735690246738,
      "loss": 2.9084,
      "step": 85906
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.868221640586853,
      "learning_rate": 0.0004166698004813609,
      "loss": 3.1909,
      "step": 85907
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7495301961898804,
      "learning_rate": 0.0004166660319163568,
      "loss": 2.6356,
      "step": 85908
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4512847661972046,
      "learning_rate": 0.0004166622633296623,
      "loss": 3.1609,
      "step": 85909
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.7765626907348633,
      "learning_rate": 0.0004166584947212781,
      "loss": 3.0925,
      "step": 85910
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1001815795898438,
      "learning_rate": 0.000416654726091205,
      "loss": 2.9281,
      "step": 85911
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4795671701431274,
      "learning_rate": 0.00041665095743944345,
      "loss": 2.985,
      "step": 85912
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.933217763900757,
      "learning_rate": 0.00041664718876599423,
      "loss": 3.0873,
      "step": 85913
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.673635959625244,
      "learning_rate": 0.00041664342007085827,
      "loss": 3.0323,
      "step": 85914
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7598265409469604,
      "learning_rate": 0.000416639651354036,
      "loss": 3.1378,
      "step": 85915
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9534655809402466,
      "learning_rate": 0.0004166358826155282,
      "loss": 2.785,
      "step": 85916
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.002505302429199,
      "learning_rate": 0.0004166321138553357,
      "loss": 2.8786,
      "step": 85917
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.3505771160125732,
      "learning_rate": 0.00041662834507345895,
      "loss": 3.0794,
      "step": 85918
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8542085886001587,
      "learning_rate": 0.0004166245762698988,
      "loss": 2.9919,
      "step": 85919
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.683163046836853,
      "learning_rate": 0.00041662080744465604,
      "loss": 3.0944,
      "step": 85920
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.3186545372009277,
      "learning_rate": 0.00041661703859773117,
      "loss": 3.0922,
      "step": 85921
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7739022970199585,
      "learning_rate": 0.000416613269729125,
      "loss": 2.9836,
      "step": 85922
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.0394234657287598,
      "learning_rate": 0.0004166095008388383,
      "loss": 2.9769,
      "step": 85923
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.165242910385132,
      "learning_rate": 0.00041660573192687166,
      "loss": 3.0456,
      "step": 85924
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8154633045196533,
      "learning_rate": 0.00041660196299322575,
      "loss": 2.8325,
      "step": 85925
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8480212688446045,
      "learning_rate": 0.0004165981940379014,
      "loss": 3.0152,
      "step": 85926
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.773313045501709,
      "learning_rate": 0.0004165944250608993,
      "loss": 3.0866,
      "step": 85927
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1000659465789795,
      "learning_rate": 0.00041659065606221995,
      "loss": 3.0668,
      "step": 85928
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7453380823135376,
      "learning_rate": 0.0004165868870418643,
      "loss": 3.1899,
      "step": 85929
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.59144926071167,
      "learning_rate": 0.0004165831179998329,
      "loss": 2.9949,
      "step": 85930
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8988436460494995,
      "learning_rate": 0.0004165793489361265,
      "loss": 2.8236,
      "step": 85931
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.143192768096924,
      "learning_rate": 0.00041657557985074584,
      "loss": 2.8311,
      "step": 85932
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.2257211208343506,
      "learning_rate": 0.00041657181074369155,
      "loss": 2.9891,
      "step": 85933
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5903912782669067,
      "learning_rate": 0.0004165680416149644,
      "loss": 2.9979,
      "step": 85934
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.2599592208862305,
      "learning_rate": 0.000416564272464565,
      "loss": 3.0383,
      "step": 85935
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9652832746505737,
      "learning_rate": 0.0004165605032924941,
      "loss": 3.2169,
      "step": 85936
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.661959171295166,
      "learning_rate": 0.00041655673409875244,
      "loss": 2.8907,
      "step": 85937
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.247783660888672,
      "learning_rate": 0.0004165529648833407,
      "loss": 2.947,
      "step": 85938
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.075429677963257,
      "learning_rate": 0.00041654919564625945,
      "loss": 2.882,
      "step": 85939
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9812535047531128,
      "learning_rate": 0.0004165454263875096,
      "loss": 3.0592,
      "step": 85940
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7329998016357422,
      "learning_rate": 0.00041654165710709176,
      "loss": 2.8778,
      "step": 85941
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5086146593093872,
      "learning_rate": 0.00041653788780500657,
      "loss": 3.1713,
      "step": 85942
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.5038466453552246,
      "learning_rate": 0.00041653411848125484,
      "loss": 3.0093,
      "step": 85943
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.872031569480896,
      "learning_rate": 0.0004165303491358372,
      "loss": 2.7388,
      "step": 85944
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7085391283035278,
      "learning_rate": 0.0004165265797687543,
      "loss": 3.147,
      "step": 85945
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.2686870098114014,
      "learning_rate": 0.000416522810380007,
      "loss": 3.1303,
      "step": 85946
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.894200325012207,
      "learning_rate": 0.0004165190409695959,
      "loss": 3.0354,
      "step": 85947
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.2709691524505615,
      "learning_rate": 0.0004165152715375217,
      "loss": 3.0446,
      "step": 85948
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7591999769210815,
      "learning_rate": 0.0004165115020837851,
      "loss": 3.0259,
      "step": 85949
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.3046302795410156,
      "learning_rate": 0.0004165077326083868,
      "loss": 3.0493,
      "step": 85950
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7677433490753174,
      "learning_rate": 0.0004165039631113275,
      "loss": 3.1064,
      "step": 85951
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.35886812210083,
      "learning_rate": 0.0004165001935926079,
      "loss": 3.0325,
      "step": 85952
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7346010208129883,
      "learning_rate": 0.0004164964240522288,
      "loss": 2.8937,
      "step": 85953
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9418833255767822,
      "learning_rate": 0.0004164926544901908,
      "loss": 2.9955,
      "step": 85954
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.442006826400757,
      "learning_rate": 0.0004164888849064946,
      "loss": 3.2533,
      "step": 85955
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.702677845954895,
      "learning_rate": 0.00041648511530114094,
      "loss": 2.8422,
      "step": 85956
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6358562707901,
      "learning_rate": 0.00041648134567413045,
      "loss": 2.9466,
      "step": 85957
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.6295177936553955,
      "learning_rate": 0.0004164775760254638,
      "loss": 3.0524,
      "step": 85958
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5103224515914917,
      "learning_rate": 0.00041647380635514194,
      "loss": 3.3178,
      "step": 85959
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.0389251708984375,
      "learning_rate": 0.0004164700366631653,
      "loss": 2.9423,
      "step": 85960
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9811431169509888,
      "learning_rate": 0.00041646626694953474,
      "loss": 2.9954,
      "step": 85961
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8661986589431763,
      "learning_rate": 0.00041646249721425077,
      "loss": 2.9231,
      "step": 85962
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.128403902053833,
      "learning_rate": 0.0004164587274573144,
      "loss": 2.8998,
      "step": 85963
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4884066581726074,
      "learning_rate": 0.00041645495767872606,
      "loss": 3.1005,
      "step": 85964
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.0013914108276367,
      "learning_rate": 0.00041645118787848656,
      "loss": 3.1649,
      "step": 85965
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8763772249221802,
      "learning_rate": 0.00041644741805659663,
      "loss": 2.8292,
      "step": 85966
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.2330639362335205,
      "learning_rate": 0.00041644364821305686,
      "loss": 2.9592,
      "step": 85967
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6993601322174072,
      "learning_rate": 0.000416439878347868,
      "loss": 3.0164,
      "step": 85968
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.5299196243286133,
      "learning_rate": 0.0004164361084610308,
      "loss": 3.066,
      "step": 85969
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.593614935874939,
      "learning_rate": 0.000416432338552546,
      "loss": 3.061,
      "step": 85970
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.123687267303467,
      "learning_rate": 0.0004164285686224141,
      "loss": 2.8433,
      "step": 85971
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7550550699234009,
      "learning_rate": 0.00041642479867063606,
      "loss": 3.0283,
      "step": 85972
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6667201519012451,
      "learning_rate": 0.00041642102869721235,
      "loss": 2.9256,
      "step": 85973
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.506200075149536,
      "learning_rate": 0.0004164172587021439,
      "loss": 2.8504,
      "step": 85974
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.609957218170166,
      "learning_rate": 0.0004164134886854311,
      "loss": 2.8596,
      "step": 85975
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4993656873703003,
      "learning_rate": 0.00041640971864707497,
      "loss": 2.8677,
      "step": 85976
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6995577812194824,
      "learning_rate": 0.00041640594858707603,
      "loss": 2.9285,
      "step": 85977
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.98411226272583,
      "learning_rate": 0.00041640217850543506,
      "loss": 2.9178,
      "step": 85978
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.3107717037200928,
      "learning_rate": 0.0004163984084021527,
      "loss": 2.9732,
      "step": 85979
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.084460735321045,
      "learning_rate": 0.0004163946382772296,
      "loss": 2.8158,
      "step": 85980
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7141438722610474,
      "learning_rate": 0.0004163908681306667,
      "loss": 2.8722,
      "step": 85981
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8031160831451416,
      "learning_rate": 0.00041638709796246447,
      "loss": 2.97,
      "step": 85982
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5825281143188477,
      "learning_rate": 0.00041638332777262365,
      "loss": 2.9251,
      "step": 85983
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6976417303085327,
      "learning_rate": 0.0004163795575611451,
      "loss": 2.9602,
      "step": 85984
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.070554256439209,
      "learning_rate": 0.0004163757873280292,
      "loss": 2.9459,
      "step": 85985
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.671526312828064,
      "learning_rate": 0.00041637201707327696,
      "loss": 3.1682,
      "step": 85986
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7875709533691406,
      "learning_rate": 0.000416368246796889,
      "loss": 3.0595,
      "step": 85987
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7590868473052979,
      "learning_rate": 0.0004163644764988659,
      "loss": 3.0285,
      "step": 85988
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5669595003128052,
      "learning_rate": 0.00041636070617920843,
      "loss": 2.853,
      "step": 85989
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7265877723693848,
      "learning_rate": 0.0004163569358379174,
      "loss": 2.9586,
      "step": 85990
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.730628252029419,
      "learning_rate": 0.00041635316547499336,
      "loss": 3.248,
      "step": 85991
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.6801915168762207,
      "learning_rate": 0.00041634939509043703,
      "loss": 2.9787,
      "step": 85992
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.129141092300415,
      "learning_rate": 0.0004163456246842493,
      "loss": 2.9979,
      "step": 85993
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.4278626441955566,
      "learning_rate": 0.0004163418542564306,
      "loss": 3.0422,
      "step": 85994
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8616613149642944,
      "learning_rate": 0.00041633808380698174,
      "loss": 3.0903,
      "step": 85995
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.0162127017974854,
      "learning_rate": 0.00041633431333590353,
      "loss": 2.9413,
      "step": 85996
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6204156875610352,
      "learning_rate": 0.0004163305428431965,
      "loss": 2.8992,
      "step": 85997
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.699679970741272,
      "learning_rate": 0.00041632677232886147,
      "loss": 3.2601,
      "step": 85998
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7960381507873535,
      "learning_rate": 0.00041632300179289907,
      "loss": 2.6648,
      "step": 85999
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.711048126220703,
      "learning_rate": 0.00041631923123531004,
      "loss": 3.2453,
      "step": 86000
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.616358757019043,
      "learning_rate": 0.00041631546065609503,
      "loss": 2.9497,
      "step": 86001
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.2658514976501465,
      "learning_rate": 0.00041631169005525485,
      "loss": 3.0042,
      "step": 86002
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1540441513061523,
      "learning_rate": 0.0004163079194327901,
      "loss": 2.9266,
      "step": 86003
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8550584316253662,
      "learning_rate": 0.00041630414878870146,
      "loss": 3.0098,
      "step": 86004
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5324385166168213,
      "learning_rate": 0.0004163003781229898,
      "loss": 2.756,
      "step": 86005
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4429974555969238,
      "learning_rate": 0.00041629660743565557,
      "loss": 3.0603,
      "step": 86006
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.3149466514587402,
      "learning_rate": 0.00041629283672669966,
      "loss": 3.1255,
      "step": 86007
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.840477228164673,
      "learning_rate": 0.00041628906599612276,
      "loss": 2.8259,
      "step": 86008
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5594151020050049,
      "learning_rate": 0.00041628529524392546,
      "loss": 3.0037,
      "step": 86009
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8106125593185425,
      "learning_rate": 0.00041628152447010857,
      "loss": 2.9614,
      "step": 86010
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1023616790771484,
      "learning_rate": 0.0004162777536746728,
      "loss": 3.203,
      "step": 86011
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.805716872215271,
      "learning_rate": 0.00041627398285761874,
      "loss": 3.0545,
      "step": 86012
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1105480194091797,
      "learning_rate": 0.0004162702120189471,
      "loss": 2.9144,
      "step": 86013
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6913106441497803,
      "learning_rate": 0.00041626644115865875,
      "loss": 2.8917,
      "step": 86014
    },
    {
      "epoch": 1.12,
      "grad_norm": 4.937500476837158,
      "learning_rate": 0.0004162626702767542,
      "loss": 3.006,
      "step": 86015
    },
    {
      "epoch": 1.12,
      "grad_norm": 6.704499244689941,
      "learning_rate": 0.00041625889937323417,
      "loss": 2.9704,
      "step": 86016
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.8307793140411377,
      "learning_rate": 0.0004162551284480995,
      "loss": 2.9968,
      "step": 86017
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.3601207733154297,
      "learning_rate": 0.0004162513575013508,
      "loss": 3.1371,
      "step": 86018
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.3298988342285156,
      "learning_rate": 0.0004162475865329887,
      "loss": 2.7037,
      "step": 86019
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.5492358207702637,
      "learning_rate": 0.00041624381554301415,
      "loss": 3.2106,
      "step": 86020
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.5070691108703613,
      "learning_rate": 0.00041624004453142755,
      "loss": 3.1169,
      "step": 86021
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.004686117172241,
      "learning_rate": 0.0004162362734982297,
      "loss": 2.9579,
      "step": 86022
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.529261589050293,
      "learning_rate": 0.00041623250244342144,
      "loss": 3.092,
      "step": 86023
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9502673149108887,
      "learning_rate": 0.0004162287313670033,
      "loss": 3.0094,
      "step": 86024
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.2884232997894287,
      "learning_rate": 0.0004162249602689761,
      "loss": 3.1274,
      "step": 86025
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.681765079498291,
      "learning_rate": 0.00041622118914934044,
      "loss": 3.0537,
      "step": 86026
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7096788883209229,
      "learning_rate": 0.0004162174180080971,
      "loss": 2.9388,
      "step": 86027
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.040548324584961,
      "learning_rate": 0.00041621364684524666,
      "loss": 3.1417,
      "step": 86028
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.028083562850952,
      "learning_rate": 0.00041620987566079004,
      "loss": 2.9588,
      "step": 86029
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7117637395858765,
      "learning_rate": 0.00041620610445472766,
      "loss": 2.9106,
      "step": 86030
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8201621770858765,
      "learning_rate": 0.00041620233322706043,
      "loss": 3.1037,
      "step": 86031
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9986293315887451,
      "learning_rate": 0.00041619856197778907,
      "loss": 3.0028,
      "step": 86032
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.0976853370666504,
      "learning_rate": 0.0004161947907069141,
      "loss": 3.038,
      "step": 86033
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.626617670059204,
      "learning_rate": 0.0004161910194144364,
      "loss": 2.8732,
      "step": 86034
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.262108564376831,
      "learning_rate": 0.0004161872481003566,
      "loss": 3.2229,
      "step": 86035
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8655850887298584,
      "learning_rate": 0.0004161834767646753,
      "loss": 3.1298,
      "step": 86036
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.791093111038208,
      "learning_rate": 0.0004161797054073934,
      "loss": 2.9876,
      "step": 86037
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.551008462905884,
      "learning_rate": 0.0004161759340285115,
      "loss": 3.0419,
      "step": 86038
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.2122230529785156,
      "learning_rate": 0.00041617216262803026,
      "loss": 2.8921,
      "step": 86039
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5559149980545044,
      "learning_rate": 0.00041616839120595036,
      "loss": 2.8775,
      "step": 86040
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7106311321258545,
      "learning_rate": 0.0004161646197622727,
      "loss": 3.3213,
      "step": 86041
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.047262191772461,
      "learning_rate": 0.0004161608482969978,
      "loss": 2.9487,
      "step": 86042
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.170440912246704,
      "learning_rate": 0.00041615707681012635,
      "loss": 3.0465,
      "step": 86043
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6771049499511719,
      "learning_rate": 0.00041615330530165906,
      "loss": 2.9917,
      "step": 86044
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.1159958839416504,
      "learning_rate": 0.0004161495337715968,
      "loss": 3.0734,
      "step": 86045
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.434408664703369,
      "learning_rate": 0.0004161457622199401,
      "loss": 2.9482,
      "step": 86046
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.988457202911377,
      "learning_rate": 0.0004161419906466897,
      "loss": 2.9423,
      "step": 86047
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.010118007659912,
      "learning_rate": 0.0004161382190518464,
      "loss": 3.2831,
      "step": 86048
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.490718364715576,
      "learning_rate": 0.0004161344474354107,
      "loss": 3.0291,
      "step": 86049
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.7173264026641846,
      "learning_rate": 0.0004161306757973834,
      "loss": 2.8756,
      "step": 86050
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.3781216144561768,
      "learning_rate": 0.0004161269041377653,
      "loss": 2.9635,
      "step": 86051
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5463601350784302,
      "learning_rate": 0.000416123132456557,
      "loss": 2.7909,
      "step": 86052
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.19279408454895,
      "learning_rate": 0.00041611936075375923,
      "loss": 2.9068,
      "step": 86053
    },
    {
      "epoch": 1.12,
      "grad_norm": 4.029229640960693,
      "learning_rate": 0.0004161155890293726,
      "loss": 2.9608,
      "step": 86054
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.4377572536468506,
      "learning_rate": 0.000416111817283398,
      "loss": 2.9776,
      "step": 86055
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.701521158218384,
      "learning_rate": 0.00041610804551583594,
      "loss": 2.9041,
      "step": 86056
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.8491756916046143,
      "learning_rate": 0.0004161042737266872,
      "loss": 3.0545,
      "step": 86057
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.0243706703186035,
      "learning_rate": 0.00041610050191595257,
      "loss": 3.0107,
      "step": 86058
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6152697801589966,
      "learning_rate": 0.00041609673008363257,
      "loss": 2.8879,
      "step": 86059
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6223241090774536,
      "learning_rate": 0.000416092958229728,
      "loss": 3.0548,
      "step": 86060
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.099604845046997,
      "learning_rate": 0.0004160891863542396,
      "loss": 3.061,
      "step": 86061
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.452092409133911,
      "learning_rate": 0.00041608541445716803,
      "loss": 3.178,
      "step": 86062
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6134814023971558,
      "learning_rate": 0.000416081642538514,
      "loss": 2.7427,
      "step": 86063
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.5114827156066895,
      "learning_rate": 0.00041607787059827814,
      "loss": 3.0298,
      "step": 86064
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.301353693008423,
      "learning_rate": 0.0004160740986364613,
      "loss": 2.9085,
      "step": 86065
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.012430429458618,
      "learning_rate": 0.000416070326653064,
      "loss": 2.9825,
      "step": 86066
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7286932468414307,
      "learning_rate": 0.00041606655464808705,
      "loss": 3.0379,
      "step": 86067
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.376248836517334,
      "learning_rate": 0.00041606278262153114,
      "loss": 3.1938,
      "step": 86068
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.931701898574829,
      "learning_rate": 0.00041605901057339707,
      "loss": 3.0213,
      "step": 86069
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.9407365322113037,
      "learning_rate": 0.00041605523850368536,
      "loss": 2.7264,
      "step": 86070
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.060159683227539,
      "learning_rate": 0.00041605146641239673,
      "loss": 3.2089,
      "step": 86071
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.0267696380615234,
      "learning_rate": 0.0004160476942995321,
      "loss": 3.234,
      "step": 86072
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.6567819118499756,
      "learning_rate": 0.0004160439221650919,
      "loss": 3.1506,
      "step": 86073
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.205268621444702,
      "learning_rate": 0.00041604015000907695,
      "loss": 2.7057,
      "step": 86074
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6806936264038086,
      "learning_rate": 0.000416036377831488,
      "loss": 2.9741,
      "step": 86075
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.080864191055298,
      "learning_rate": 0.0004160326056323256,
      "loss": 3.0537,
      "step": 86076
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.399444580078125,
      "learning_rate": 0.00041602883341159064,
      "loss": 3.1161,
      "step": 86077
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5817437171936035,
      "learning_rate": 0.0004160250611692837,
      "loss": 3.0023,
      "step": 86078
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.4254655838012695,
      "learning_rate": 0.0004160212889054055,
      "loss": 3.2569,
      "step": 86079
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.515195608139038,
      "learning_rate": 0.00041601751661995675,
      "loss": 2.983,
      "step": 86080
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.73978590965271,
      "learning_rate": 0.00041601374431293823,
      "loss": 3.005,
      "step": 86081
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.82962167263031,
      "learning_rate": 0.00041600997198435044,
      "loss": 3.4342,
      "step": 86082
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8492457866668701,
      "learning_rate": 0.0004160061996341943,
      "loss": 2.9945,
      "step": 86083
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5795782804489136,
      "learning_rate": 0.0004160024272624704,
      "loss": 3.0625,
      "step": 86084
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5890604257583618,
      "learning_rate": 0.0004159986548691794,
      "loss": 3.2536,
      "step": 86085
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.6584115028381348,
      "learning_rate": 0.00041599488245432206,
      "loss": 2.9791,
      "step": 86086
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.0459957122802734,
      "learning_rate": 0.0004159911100178992,
      "loss": 3.0544,
      "step": 86087
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.397684931755066,
      "learning_rate": 0.00041598733755991127,
      "loss": 2.7414,
      "step": 86088
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.2661547660827637,
      "learning_rate": 0.00041598356508035916,
      "loss": 3.1208,
      "step": 86089
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.2902472019195557,
      "learning_rate": 0.0004159797925792435,
      "loss": 2.8743,
      "step": 86090
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.292999505996704,
      "learning_rate": 0.00041597602005656505,
      "loss": 2.8499,
      "step": 86091
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8960134983062744,
      "learning_rate": 0.0004159722475123244,
      "loss": 3.0053,
      "step": 86092
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.65047550201416,
      "learning_rate": 0.0004159684749465224,
      "loss": 2.9955,
      "step": 86093
    },
    {
      "epoch": 1.12,
      "grad_norm": 4.608031272888184,
      "learning_rate": 0.00041596470235915966,
      "loss": 3.4883,
      "step": 86094
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.274308204650879,
      "learning_rate": 0.00041596092975023686,
      "loss": 2.9328,
      "step": 86095
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.700015664100647,
      "learning_rate": 0.00041595715711975476,
      "loss": 2.6856,
      "step": 86096
    },
    {
      "epoch": 1.12,
      "grad_norm": 4.090024471282959,
      "learning_rate": 0.000415953384467714,
      "loss": 2.9313,
      "step": 86097
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.6310055255889893,
      "learning_rate": 0.0004159496117941153,
      "loss": 3.0731,
      "step": 86098
    },
    {
      "epoch": 1.12,
      "grad_norm": 4.141327381134033,
      "learning_rate": 0.00041594583909895945,
      "loss": 3.1518,
      "step": 86099
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9673352241516113,
      "learning_rate": 0.000415942066382247,
      "loss": 3.1645,
      "step": 86100
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.5956897735595703,
      "learning_rate": 0.0004159382936439788,
      "loss": 2.8978,
      "step": 86101
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.383274555206299,
      "learning_rate": 0.0004159345208841555,
      "loss": 3.0909,
      "step": 86102
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.2883527278900146,
      "learning_rate": 0.0004159307481027777,
      "loss": 2.851,
      "step": 86103
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5996675491333008,
      "learning_rate": 0.00041592697529984626,
      "loss": 2.9228,
      "step": 86104
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.565796971321106,
      "learning_rate": 0.0004159232024753618,
      "loss": 3.2241,
      "step": 86105
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.233321189880371,
      "learning_rate": 0.00041591942962932495,
      "loss": 3.277,
      "step": 86106
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.5963926315307617,
      "learning_rate": 0.00041591565676173657,
      "loss": 3.0205,
      "step": 86107
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.5889923572540283,
      "learning_rate": 0.0004159118838725972,
      "loss": 3.1664,
      "step": 86108
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.2190895080566406,
      "learning_rate": 0.00041590811096190773,
      "loss": 2.9645,
      "step": 86109
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.730360746383667,
      "learning_rate": 0.0004159043380296687,
      "loss": 2.8239,
      "step": 86110
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.590909719467163,
      "learning_rate": 0.0004159005650758808,
      "loss": 2.9975,
      "step": 86111
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.206886053085327,
      "learning_rate": 0.0004158967921005449,
      "loss": 3.0253,
      "step": 86112
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.590920329093933,
      "learning_rate": 0.00041589301910366155,
      "loss": 3.1077,
      "step": 86113
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.88586688041687,
      "learning_rate": 0.0004158892460852315,
      "loss": 2.9437,
      "step": 86114
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.843700408935547,
      "learning_rate": 0.00041588547304525553,
      "loss": 3.2139,
      "step": 86115
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.9661295413970947,
      "learning_rate": 0.00041588169998373417,
      "loss": 3.0469,
      "step": 86116
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8066861629486084,
      "learning_rate": 0.0004158779269006682,
      "loss": 2.935,
      "step": 86117
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7906070947647095,
      "learning_rate": 0.00041587415379605835,
      "loss": 3.16,
      "step": 86118
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.6541600227355957,
      "learning_rate": 0.0004158703806699054,
      "loss": 3.003,
      "step": 86119
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.9682164192199707,
      "learning_rate": 0.0004158666075222098,
      "loss": 2.9547,
      "step": 86120
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.58185076713562,
      "learning_rate": 0.0004158628343529726,
      "loss": 3.0375,
      "step": 86121
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.286899447441101,
      "learning_rate": 0.0004158590611621942,
      "loss": 3.1204,
      "step": 86122
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.0740725994110107,
      "learning_rate": 0.0004158552879498754,
      "loss": 2.8731,
      "step": 86123
    },
    {
      "epoch": 1.12,
      "grad_norm": 4.368896961212158,
      "learning_rate": 0.000415851514716017,
      "loss": 3.0324,
      "step": 86124
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.4513206481933594,
      "learning_rate": 0.0004158477414606195,
      "loss": 2.9725,
      "step": 86125
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.0494651794433594,
      "learning_rate": 0.0004158439681836838,
      "loss": 2.9366,
      "step": 86126
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5798792839050293,
      "learning_rate": 0.0004158401948852105,
      "loss": 3.027,
      "step": 86127
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.931010365486145,
      "learning_rate": 0.00041583642156520033,
      "loss": 2.8872,
      "step": 86128
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.739976406097412,
      "learning_rate": 0.00041583264822365394,
      "loss": 2.9766,
      "step": 86129
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.621446132659912,
      "learning_rate": 0.00041582887486057215,
      "loss": 3.1422,
      "step": 86130
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6172735691070557,
      "learning_rate": 0.00041582510147595555,
      "loss": 2.9165,
      "step": 86131
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1040937900543213,
      "learning_rate": 0.0004158213280698048,
      "loss": 3.0434,
      "step": 86132
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.165919542312622,
      "learning_rate": 0.0004158175546421208,
      "loss": 3.0175,
      "step": 86133
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5955568552017212,
      "learning_rate": 0.00041581378119290405,
      "loss": 2.9225,
      "step": 86134
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8104822635650635,
      "learning_rate": 0.0004158100077221554,
      "loss": 3.0397,
      "step": 86135
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8934770822525024,
      "learning_rate": 0.00041580623422987547,
      "loss": 3.0958,
      "step": 86136
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.092358112335205,
      "learning_rate": 0.0004158024607160649,
      "loss": 3.0467,
      "step": 86137
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5661276578903198,
      "learning_rate": 0.00041579868718072453,
      "loss": 3.1941,
      "step": 86138
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.999592900276184,
      "learning_rate": 0.000415794913623855,
      "loss": 3.1469,
      "step": 86139
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.337141513824463,
      "learning_rate": 0.000415791140045457,
      "loss": 3.1008,
      "step": 86140
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8160128593444824,
      "learning_rate": 0.0004157873664455312,
      "loss": 3.0568,
      "step": 86141
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8159351348876953,
      "learning_rate": 0.0004157835928240784,
      "loss": 3.1114,
      "step": 86142
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.4381024837493896,
      "learning_rate": 0.0004157798191810992,
      "loss": 2.9647,
      "step": 86143
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.670412540435791,
      "learning_rate": 0.0004157760455165943,
      "loss": 3.0714,
      "step": 86144
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9930694103240967,
      "learning_rate": 0.00041577227183056453,
      "loss": 3.2193,
      "step": 86145
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8562425374984741,
      "learning_rate": 0.00041576849812301057,
      "loss": 3.0808,
      "step": 86146
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.279024600982666,
      "learning_rate": 0.0004157647243939329,
      "loss": 2.9775,
      "step": 86147
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.599514126777649,
      "learning_rate": 0.00041576095064333254,
      "loss": 2.9128,
      "step": 86148
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.2780556678771973,
      "learning_rate": 0.00041575717687120995,
      "loss": 3.0426,
      "step": 86149
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.580926537513733,
      "learning_rate": 0.00041575340307756594,
      "loss": 2.9664,
      "step": 86150
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7702159881591797,
      "learning_rate": 0.00041574962926240117,
      "loss": 2.9771,
      "step": 86151
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1498684883117676,
      "learning_rate": 0.0004157458554257163,
      "loss": 3.1009,
      "step": 86152
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.058436632156372,
      "learning_rate": 0.00041574208156751216,
      "loss": 3.0177,
      "step": 86153
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7414439916610718,
      "learning_rate": 0.00041573830768778944,
      "loss": 3.2397,
      "step": 86154
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8156161308288574,
      "learning_rate": 0.00041573453378654867,
      "loss": 2.9612,
      "step": 86155
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.96376371383667,
      "learning_rate": 0.0004157307598637907,
      "loss": 2.9994,
      "step": 86156
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9363970756530762,
      "learning_rate": 0.00041572698591951627,
      "loss": 3.1015,
      "step": 86157
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7160718441009521,
      "learning_rate": 0.00041572321195372595,
      "loss": 2.9706,
      "step": 86158
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.391988754272461,
      "learning_rate": 0.00041571943796642045,
      "loss": 2.9839,
      "step": 86159
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7379199266433716,
      "learning_rate": 0.0004157156639576006,
      "loss": 3.1694,
      "step": 86160
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6065118312835693,
      "learning_rate": 0.000415711889927267,
      "loss": 2.9552,
      "step": 86161
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5752289295196533,
      "learning_rate": 0.0004157081158754204,
      "loss": 2.8465,
      "step": 86162
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.4982268810272217,
      "learning_rate": 0.0004157043418020615,
      "loss": 3.127,
      "step": 86163
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6124407052993774,
      "learning_rate": 0.00041570056770719085,
      "loss": 3.0094,
      "step": 86164
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9777294397354126,
      "learning_rate": 0.00041569679359080934,
      "loss": 3.1138,
      "step": 86165
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.4823501110076904,
      "learning_rate": 0.0004156930194529177,
      "loss": 3.0731,
      "step": 86166
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7175172567367554,
      "learning_rate": 0.0004156892452935164,
      "loss": 3.2437,
      "step": 86167
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5550358295440674,
      "learning_rate": 0.0004156854711126064,
      "loss": 2.9692,
      "step": 86168
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.255988121032715,
      "learning_rate": 0.00041568169691018824,
      "loss": 3.1008,
      "step": 86169
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.909877061843872,
      "learning_rate": 0.0004156779226862627,
      "loss": 3.0228,
      "step": 86170
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1589272022247314,
      "learning_rate": 0.0004156741484408304,
      "loss": 2.9474,
      "step": 86171
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.508398175239563,
      "learning_rate": 0.0004156703741738922,
      "loss": 3.0789,
      "step": 86172
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6321284770965576,
      "learning_rate": 0.0004156665998854486,
      "loss": 3.1,
      "step": 86173
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7985937595367432,
      "learning_rate": 0.0004156628255755004,
      "loss": 3.0693,
      "step": 86174
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6021134853363037,
      "learning_rate": 0.00041565905124404836,
      "loss": 2.9939,
      "step": 86175
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5057564973831177,
      "learning_rate": 0.00041565527689109297,
      "loss": 2.898,
      "step": 86176
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.843550443649292,
      "learning_rate": 0.0004156515025166352,
      "loss": 2.9279,
      "step": 86177
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5069730281829834,
      "learning_rate": 0.00041564772812067557,
      "loss": 3.2533,
      "step": 86178
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.417320489883423,
      "learning_rate": 0.00041564395370321497,
      "loss": 3.0524,
      "step": 86179
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5667595863342285,
      "learning_rate": 0.0004156401792642539,
      "loss": 3.0829,
      "step": 86180
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5615513324737549,
      "learning_rate": 0.000415636404803793,
      "loss": 2.9996,
      "step": 86181
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.923410177230835,
      "learning_rate": 0.00041563263032183334,
      "loss": 3.1858,
      "step": 86182
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.763141393661499,
      "learning_rate": 0.00041562885581837523,
      "loss": 2.9238,
      "step": 86183
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.76101553440094,
      "learning_rate": 0.00041562508129341957,
      "loss": 3.0065,
      "step": 86184
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.704574704170227,
      "learning_rate": 0.0004156213067469671,
      "loss": 2.928,
      "step": 86185
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7791924476623535,
      "learning_rate": 0.00041561753217901837,
      "loss": 3.0966,
      "step": 86186
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6495643854141235,
      "learning_rate": 0.0004156137575895742,
      "loss": 2.9673,
      "step": 86187
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6974667310714722,
      "learning_rate": 0.00041560998297863523,
      "loss": 3.3693,
      "step": 86188
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6786766052246094,
      "learning_rate": 0.0004156062083462022,
      "loss": 3.1248,
      "step": 86189
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7262266874313354,
      "learning_rate": 0.0004156024336922757,
      "loss": 3.0123,
      "step": 86190
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.551943302154541,
      "learning_rate": 0.0004155986590168567,
      "loss": 2.9361,
      "step": 86191
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8332254886627197,
      "learning_rate": 0.00041559488431994565,
      "loss": 3.1802,
      "step": 86192
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6415996551513672,
      "learning_rate": 0.00041559110960154327,
      "loss": 2.9355,
      "step": 86193
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.121711015701294,
      "learning_rate": 0.00041558733486165043,
      "loss": 2.9889,
      "step": 86194
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.98098886013031,
      "learning_rate": 0.0004155835601002676,
      "loss": 2.858,
      "step": 86195
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7723205089569092,
      "learning_rate": 0.00041557978531739565,
      "loss": 3.0909,
      "step": 86196
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4609123468399048,
      "learning_rate": 0.00041557601051303523,
      "loss": 3.0208,
      "step": 86197
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.3747620582580566,
      "learning_rate": 0.00041557223568718706,
      "loss": 3.0089,
      "step": 86198
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7801045179367065,
      "learning_rate": 0.00041556846083985185,
      "loss": 3.1538,
      "step": 86199
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9961215257644653,
      "learning_rate": 0.0004155646859710303,
      "loss": 3.0653,
      "step": 86200
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.2535171508789062,
      "learning_rate": 0.000415560911080723,
      "loss": 3.0365,
      "step": 86201
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7670773267745972,
      "learning_rate": 0.0004155571361689308,
      "loss": 2.8778,
      "step": 86202
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.401256799697876,
      "learning_rate": 0.00041555336123565444,
      "loss": 2.993,
      "step": 86203
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8966624736785889,
      "learning_rate": 0.0004155495862808944,
      "loss": 3.193,
      "step": 86204
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.831255316734314,
      "learning_rate": 0.0004155458113046515,
      "loss": 2.972,
      "step": 86205
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.164142370223999,
      "learning_rate": 0.00041554203630692657,
      "loss": 3.1769,
      "step": 86206
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.6832520961761475,
      "learning_rate": 0.00041553826128772005,
      "loss": 2.8572,
      "step": 86207
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.4267337322235107,
      "learning_rate": 0.0004155344862470329,
      "loss": 2.9634,
      "step": 86208
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7040138244628906,
      "learning_rate": 0.0004155307111848657,
      "loss": 3.1362,
      "step": 86209
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.2319631576538086,
      "learning_rate": 0.0004155269361012191,
      "loss": 3.0554,
      "step": 86210
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.9545249938964844,
      "learning_rate": 0.00041552316099609384,
      "loss": 3.0527,
      "step": 86211
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.459677219390869,
      "learning_rate": 0.0004155193858694908,
      "loss": 3.0045,
      "step": 86212
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.018376350402832,
      "learning_rate": 0.0004155156107214104,
      "loss": 3.1985,
      "step": 86213
    },
    {
      "epoch": 1.12,
      "grad_norm": 4.111863136291504,
      "learning_rate": 0.00041551183555185343,
      "loss": 2.9632,
      "step": 86214
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.073823928833008,
      "learning_rate": 0.00041550806036082076,
      "loss": 3.0839,
      "step": 86215
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6957226991653442,
      "learning_rate": 0.00041550428514831286,
      "loss": 2.9777,
      "step": 86216
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6933040618896484,
      "learning_rate": 0.0004155005099143305,
      "loss": 2.9717,
      "step": 86217
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.6533617973327637,
      "learning_rate": 0.00041549673465887454,
      "loss": 2.9461,
      "step": 86218
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.2757697105407715,
      "learning_rate": 0.00041549295938194547,
      "loss": 2.8775,
      "step": 86219
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6001290082931519,
      "learning_rate": 0.0004154891840835441,
      "loss": 3.0476,
      "step": 86220
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.761746644973755,
      "learning_rate": 0.00041548540876367113,
      "loss": 3.2455,
      "step": 86221
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.823596477508545,
      "learning_rate": 0.0004154816334223272,
      "loss": 3.0722,
      "step": 86222
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.1950225830078125,
      "learning_rate": 0.00041547785805951307,
      "loss": 2.8952,
      "step": 86223
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.154716730117798,
      "learning_rate": 0.00041547408267522944,
      "loss": 2.9892,
      "step": 86224
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.7738659381866455,
      "learning_rate": 0.00041547030726947704,
      "loss": 2.946,
      "step": 86225
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.111609935760498,
      "learning_rate": 0.00041546653184225646,
      "loss": 3.2463,
      "step": 86226
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.474704384803772,
      "learning_rate": 0.00041546275639356856,
      "loss": 3.0223,
      "step": 86227
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5750744342803955,
      "learning_rate": 0.00041545898092341384,
      "loss": 3.082,
      "step": 86228
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7005841732025146,
      "learning_rate": 0.00041545520543179317,
      "loss": 3.2214,
      "step": 86229
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.051917552947998,
      "learning_rate": 0.00041545142991870714,
      "loss": 3.0788,
      "step": 86230
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5731332302093506,
      "learning_rate": 0.0004154476543841566,
      "loss": 2.9328,
      "step": 86231
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8314073085784912,
      "learning_rate": 0.00041544387882814214,
      "loss": 2.7928,
      "step": 86232
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7197284698486328,
      "learning_rate": 0.00041544010325066447,
      "loss": 3.1819,
      "step": 86233
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4760801792144775,
      "learning_rate": 0.0004154363276517243,
      "loss": 2.983,
      "step": 86234
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.944547176361084,
      "learning_rate": 0.0004154325520313224,
      "loss": 2.9789,
      "step": 86235
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.2004575729370117,
      "learning_rate": 0.0004154287763894593,
      "loss": 3.1121,
      "step": 86236
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.0381569862365723,
      "learning_rate": 0.0004154250007261358,
      "loss": 3.1641,
      "step": 86237
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8041048049926758,
      "learning_rate": 0.00041542122504135276,
      "loss": 2.9028,
      "step": 86238
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.34920334815979,
      "learning_rate": 0.00041541744933511063,
      "loss": 2.8547,
      "step": 86239
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.124995231628418,
      "learning_rate": 0.00041541367360741025,
      "loss": 2.9858,
      "step": 86240
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6277790069580078,
      "learning_rate": 0.00041540989785825226,
      "loss": 2.9196,
      "step": 86241
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6156156063079834,
      "learning_rate": 0.0004154061220876374,
      "loss": 2.8586,
      "step": 86242
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8019654750823975,
      "learning_rate": 0.00041540234629556643,
      "loss": 2.949,
      "step": 86243
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6546809673309326,
      "learning_rate": 0.0004153985704820399,
      "loss": 3.1726,
      "step": 86244
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8434501886367798,
      "learning_rate": 0.0004153947946470586,
      "loss": 3.0549,
      "step": 86245
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.92449951171875,
      "learning_rate": 0.00041539101879062333,
      "loss": 3.0936,
      "step": 86246
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.6596994400024414,
      "learning_rate": 0.00041538724291273455,
      "loss": 3.0053,
      "step": 86247
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.803088903427124,
      "learning_rate": 0.00041538346701339314,
      "loss": 2.919,
      "step": 86248
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.618192434310913,
      "learning_rate": 0.0004153796910925999,
      "loss": 3.1076,
      "step": 86249
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.2711973190307617,
      "learning_rate": 0.00041537591515035524,
      "loss": 2.8353,
      "step": 86250
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1171157360076904,
      "learning_rate": 0.00041537213918666003,
      "loss": 3.0272,
      "step": 86251
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5575790405273438,
      "learning_rate": 0.0004153683632015151,
      "loss": 2.9309,
      "step": 86252
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.7568068504333496,
      "learning_rate": 0.0004153645871949209,
      "loss": 2.9925,
      "step": 86253
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.2993030548095703,
      "learning_rate": 0.0004153608111668782,
      "loss": 3.078,
      "step": 86254
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8518176078796387,
      "learning_rate": 0.0004153570351173879,
      "loss": 2.9317,
      "step": 86255
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.4685139656066895,
      "learning_rate": 0.00041535325904645045,
      "loss": 2.9074,
      "step": 86256
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.842638611793518,
      "learning_rate": 0.0004153494829540666,
      "loss": 3.2887,
      "step": 86257
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8819456100463867,
      "learning_rate": 0.00041534570684023724,
      "loss": 3.2091,
      "step": 86258
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5082193613052368,
      "learning_rate": 0.0004153419307049628,
      "loss": 3.1094,
      "step": 86259
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.502044677734375,
      "learning_rate": 0.00041533815454824417,
      "loss": 2.8595,
      "step": 86260
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.0108542442321777,
      "learning_rate": 0.00041533437837008207,
      "loss": 2.8338,
      "step": 86261
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.731154203414917,
      "learning_rate": 0.00041533060217047705,
      "loss": 2.979,
      "step": 86262
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6557234525680542,
      "learning_rate": 0.0004153268259494299,
      "loss": 2.8282,
      "step": 86263
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.3846015930175781,
      "learning_rate": 0.00041532304970694136,
      "loss": 2.9576,
      "step": 86264
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.742963433265686,
      "learning_rate": 0.0004153192734430121,
      "loss": 2.8987,
      "step": 86265
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7593556642532349,
      "learning_rate": 0.0004153154971576427,
      "loss": 3.0355,
      "step": 86266
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5594068765640259,
      "learning_rate": 0.00041531172085083407,
      "loss": 2.9033,
      "step": 86267
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5740646123886108,
      "learning_rate": 0.0004153079445225868,
      "loss": 3.1491,
      "step": 86268
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.827380657196045,
      "learning_rate": 0.00041530416817290157,
      "loss": 3.1498,
      "step": 86269
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8410552740097046,
      "learning_rate": 0.0004153003918017792,
      "loss": 3.1365,
      "step": 86270
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4995256662368774,
      "learning_rate": 0.0004152966154092202,
      "loss": 3.224,
      "step": 86271
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.3806533813476562,
      "learning_rate": 0.00041529283899522545,
      "loss": 3.037,
      "step": 86272
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.199385643005371,
      "learning_rate": 0.0004152890625597956,
      "loss": 3.2739,
      "step": 86273
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7215015888214111,
      "learning_rate": 0.0004152852861029313,
      "loss": 3.2509,
      "step": 86274
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.166020154953003,
      "learning_rate": 0.0004152815096246333,
      "loss": 3.1283,
      "step": 86275
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5276446342468262,
      "learning_rate": 0.0004152777331249023,
      "loss": 3.1552,
      "step": 86276
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5551053285598755,
      "learning_rate": 0.00041527395660373895,
      "loss": 2.973,
      "step": 86277
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6953204870224,
      "learning_rate": 0.00041527018006114403,
      "loss": 3.0372,
      "step": 86278
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5209522247314453,
      "learning_rate": 0.0004152664034971183,
      "loss": 2.8677,
      "step": 86279
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.697416067123413,
      "learning_rate": 0.00041526262691166224,
      "loss": 3.1569,
      "step": 86280
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.4664816856384277,
      "learning_rate": 0.0004152588503047767,
      "loss": 3.0688,
      "step": 86281
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.4242889881134033,
      "learning_rate": 0.0004152550736764624,
      "loss": 3.0811,
      "step": 86282
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.4023616313934326,
      "learning_rate": 0.00041525129702672,
      "loss": 2.9266,
      "step": 86283
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.869314193725586,
      "learning_rate": 0.00041524752035555014,
      "loss": 2.7698,
      "step": 86284
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.0747499465942383,
      "learning_rate": 0.0004152437436629537,
      "loss": 2.9946,
      "step": 86285
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7890982627868652,
      "learning_rate": 0.00041523996694893127,
      "loss": 3.0325,
      "step": 86286
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.273638963699341,
      "learning_rate": 0.0004152361902134834,
      "loss": 3.1395,
      "step": 86287
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4531105756759644,
      "learning_rate": 0.0004152324134566111,
      "loss": 2.8605,
      "step": 86288
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.0860683917999268,
      "learning_rate": 0.00041522863667831485,
      "loss": 3.0166,
      "step": 86289
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.825501799583435,
      "learning_rate": 0.00041522485987859547,
      "loss": 3.0223,
      "step": 86290
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.01285457611084,
      "learning_rate": 0.00041522108305745356,
      "loss": 3.0748,
      "step": 86291
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7117363214492798,
      "learning_rate": 0.0004152173062148899,
      "loss": 3.2004,
      "step": 86292
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.921640396118164,
      "learning_rate": 0.0004152135293509052,
      "loss": 3.2427,
      "step": 86293
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.159146308898926,
      "learning_rate": 0.0004152097524655001,
      "loss": 2.9264,
      "step": 86294
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.758682370185852,
      "learning_rate": 0.0004152059755586753,
      "loss": 3.0644,
      "step": 86295
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7918622493743896,
      "learning_rate": 0.0004152021986304316,
      "loss": 3.1166,
      "step": 86296
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.074038028717041,
      "learning_rate": 0.0004151984216807696,
      "loss": 2.8043,
      "step": 86297
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8124133348464966,
      "learning_rate": 0.00041519464470969006,
      "loss": 2.8593,
      "step": 86298
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6591976881027222,
      "learning_rate": 0.00041519086771719363,
      "loss": 2.9442,
      "step": 86299
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8479456901550293,
      "learning_rate": 0.00041518709070328106,
      "loss": 2.8748,
      "step": 86300
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6806498765945435,
      "learning_rate": 0.000415183313667953,
      "loss": 2.9818,
      "step": 86301
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9046244621276855,
      "learning_rate": 0.00041517953661121017,
      "loss": 3.2425,
      "step": 86302
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7551689147949219,
      "learning_rate": 0.0004151757595330534,
      "loss": 3.237,
      "step": 86303
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.465521812438965,
      "learning_rate": 0.00041517198243348324,
      "loss": 3.0078,
      "step": 86304
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9618151187896729,
      "learning_rate": 0.0004151682053125004,
      "loss": 2.8716,
      "step": 86305
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9099963903427124,
      "learning_rate": 0.0004151644281701057,
      "loss": 2.9905,
      "step": 86306
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.7747511863708496,
      "learning_rate": 0.0004151606510062997,
      "loss": 3.213,
      "step": 86307
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.459531307220459,
      "learning_rate": 0.00041515687382108306,
      "loss": 2.9833,
      "step": 86308
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.0415637493133545,
      "learning_rate": 0.00041515309661445675,
      "loss": 3.2098,
      "step": 86309
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7040815353393555,
      "learning_rate": 0.0004151493193864212,
      "loss": 3.0876,
      "step": 86310
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.77886164188385,
      "learning_rate": 0.0004151455421369772,
      "loss": 3.0138,
      "step": 86311
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.902830719947815,
      "learning_rate": 0.0004151417648661255,
      "loss": 2.9558,
      "step": 86312
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.596655249595642,
      "learning_rate": 0.00041513798757386686,
      "loss": 2.9549,
      "step": 86313
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.632692813873291,
      "learning_rate": 0.00041513421026020183,
      "loss": 3.0344,
      "step": 86314
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.432314157485962,
      "learning_rate": 0.0004151304329251312,
      "loss": 2.9574,
      "step": 86315
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.973408818244934,
      "learning_rate": 0.0004151266555686556,
      "loss": 3.0696,
      "step": 86316
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.80666983127594,
      "learning_rate": 0.00041512287819077577,
      "loss": 3.2153,
      "step": 86317
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7602989673614502,
      "learning_rate": 0.0004151191007914925,
      "loss": 3.1433,
      "step": 86318
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.4188780784606934,
      "learning_rate": 0.0004151153233708064,
      "loss": 2.8273,
      "step": 86319
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6923702955245972,
      "learning_rate": 0.0004151115459287181,
      "loss": 3.15,
      "step": 86320
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.8714327812194824,
      "learning_rate": 0.0004151077684652284,
      "loss": 3.1405,
      "step": 86321
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.740273118019104,
      "learning_rate": 0.0004151039909803381,
      "loss": 3.2488,
      "step": 86322
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9642040729522705,
      "learning_rate": 0.0004151002134740478,
      "loss": 2.9284,
      "step": 86323
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1751129627227783,
      "learning_rate": 0.0004150964359463581,
      "loss": 2.966,
      "step": 86324
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.0788094997406006,
      "learning_rate": 0.0004150926583972698,
      "loss": 2.9946,
      "step": 86325
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5797208547592163,
      "learning_rate": 0.0004150888808267836,
      "loss": 3.218,
      "step": 86326
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.213229179382324,
      "learning_rate": 0.0004150851032349003,
      "loss": 2.924,
      "step": 86327
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.463597297668457,
      "learning_rate": 0.00041508132562162047,
      "loss": 3.0398,
      "step": 86328
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6323987245559692,
      "learning_rate": 0.0004150775479869448,
      "loss": 3.2285,
      "step": 86329
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.556770086288452,
      "learning_rate": 0.00041507377033087414,
      "loss": 3.1165,
      "step": 86330
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.554206609725952,
      "learning_rate": 0.000415069992653409,
      "loss": 2.999,
      "step": 86331
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.689371109008789,
      "learning_rate": 0.00041506621495455016,
      "loss": 2.9798,
      "step": 86332
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5124258995056152,
      "learning_rate": 0.0004150624372342984,
      "loss": 3.0538,
      "step": 86333
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.4914472103118896,
      "learning_rate": 0.00041505865949265433,
      "loss": 2.9888,
      "step": 86334
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.559266209602356,
      "learning_rate": 0.00041505488172961867,
      "loss": 3.3141,
      "step": 86335
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7633767127990723,
      "learning_rate": 0.0004150511039451922,
      "loss": 3.0758,
      "step": 86336
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9104024171829224,
      "learning_rate": 0.0004150473261393756,
      "loss": 3.023,
      "step": 86337
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5854068994522095,
      "learning_rate": 0.00041504354831216935,
      "loss": 3.1519,
      "step": 86338
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.639561653137207,
      "learning_rate": 0.00041503977046357447,
      "loss": 3.208,
      "step": 86339
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.4205381870269775,
      "learning_rate": 0.0004150359925935915,
      "loss": 2.8625,
      "step": 86340
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6114544868469238,
      "learning_rate": 0.0004150322147022212,
      "loss": 3.1724,
      "step": 86341
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.521503210067749,
      "learning_rate": 0.00041502843678946415,
      "loss": 3.1891,
      "step": 86342
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6619117259979248,
      "learning_rate": 0.0004150246588553212,
      "loss": 3.0639,
      "step": 86343
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6571075916290283,
      "learning_rate": 0.000415020880899793,
      "loss": 3.1928,
      "step": 86344
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.6422369480133057,
      "learning_rate": 0.0004150171029228801,
      "loss": 2.9328,
      "step": 86345
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.0316784381866455,
      "learning_rate": 0.0004150133249245836,
      "loss": 3.0287,
      "step": 86346
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.7880403995513916,
      "learning_rate": 0.00041500954690490383,
      "loss": 2.8589,
      "step": 86347
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.209057569503784,
      "learning_rate": 0.0004150057688638416,
      "loss": 2.9943,
      "step": 86348
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7700749635696411,
      "learning_rate": 0.0004150019908013977,
      "loss": 2.8873,
      "step": 86349
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9131898880004883,
      "learning_rate": 0.0004149982127175727,
      "loss": 2.9096,
      "step": 86350
    },
    {
      "epoch": 1.12,
      "grad_norm": 4.063766956329346,
      "learning_rate": 0.0004149944346123673,
      "loss": 2.9897,
      "step": 86351
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.0092291831970215,
      "learning_rate": 0.00041499065648578236,
      "loss": 2.9291,
      "step": 86352
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.331476926803589,
      "learning_rate": 0.0004149868783378185,
      "loss": 2.9652,
      "step": 86353
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1612465381622314,
      "learning_rate": 0.0004149831001684763,
      "loss": 2.6805,
      "step": 86354
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.7410616874694824,
      "learning_rate": 0.0004149793219777567,
      "loss": 2.9252,
      "step": 86355
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.999985694885254,
      "learning_rate": 0.00041497554376566015,
      "loss": 3.0139,
      "step": 86356
    },
    {
      "epoch": 1.12,
      "grad_norm": 4.253075122833252,
      "learning_rate": 0.00041497176553218754,
      "loss": 2.959,
      "step": 86357
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8498458862304688,
      "learning_rate": 0.00041496798727733955,
      "loss": 2.9661,
      "step": 86358
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.525773048400879,
      "learning_rate": 0.00041496420900111674,
      "loss": 2.9075,
      "step": 86359
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.6087443828582764,
      "learning_rate": 0.00041496043070351997,
      "loss": 3.2075,
      "step": 86360
    },
    {
      "epoch": 1.12,
      "grad_norm": 4.96200704574585,
      "learning_rate": 0.00041495665238454994,
      "loss": 2.9699,
      "step": 86361
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9203898906707764,
      "learning_rate": 0.0004149528740442072,
      "loss": 3.0023,
      "step": 86362
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7798491716384888,
      "learning_rate": 0.00041494909568249254,
      "loss": 2.8562,
      "step": 86363
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9264005422592163,
      "learning_rate": 0.0004149453172994068,
      "loss": 3.2197,
      "step": 86364
    },
    {
      "epoch": 1.12,
      "grad_norm": 4.701802730560303,
      "learning_rate": 0.0004149415388949505,
      "loss": 3.1799,
      "step": 86365
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.0575199127197266,
      "learning_rate": 0.0004149377604691243,
      "loss": 2.989,
      "step": 86366
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7354685068130493,
      "learning_rate": 0.00041493398202192914,
      "loss": 2.9248,
      "step": 86367
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.564805269241333,
      "learning_rate": 0.0004149302035533655,
      "loss": 3.3446,
      "step": 86368
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.2399699687957764,
      "learning_rate": 0.0004149264250634342,
      "loss": 3.1437,
      "step": 86369
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.2731494903564453,
      "learning_rate": 0.00041492264655213596,
      "loss": 3.026,
      "step": 86370
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9036149978637695,
      "learning_rate": 0.0004149188680194713,
      "loss": 3.1228,
      "step": 86371
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1656858921051025,
      "learning_rate": 0.0004149150894654411,
      "loss": 3.2885,
      "step": 86372
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.878286838531494,
      "learning_rate": 0.0004149113108900461,
      "loss": 3.0493,
      "step": 86373
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.5339162349700928,
      "learning_rate": 0.0004149075322932868,
      "loss": 3.0689,
      "step": 86374
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.7982414960861206,
      "learning_rate": 0.00041490375367516405,
      "loss": 2.9844,
      "step": 86375
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.1163806915283203,
      "learning_rate": 0.00041489997503567865,
      "loss": 2.8223,
      "step": 86376
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9659676551818848,
      "learning_rate": 0.000414896196374831,
      "loss": 2.8598,
      "step": 86377
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.843600869178772,
      "learning_rate": 0.00041489241769262207,
      "loss": 3.0301,
      "step": 86378
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.713841199874878,
      "learning_rate": 0.0004148886389890524,
      "loss": 2.9833,
      "step": 86379
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.281855583190918,
      "learning_rate": 0.00041488486026412287,
      "loss": 3.0244,
      "step": 86380
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.631008267402649,
      "learning_rate": 0.000414881081517834,
      "loss": 3.0452,
      "step": 86381
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8109166622161865,
      "learning_rate": 0.0004148773027501866,
      "loss": 3.2364,
      "step": 86382
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.5612964630126953,
      "learning_rate": 0.00041487352396118136,
      "loss": 2.8664,
      "step": 86383
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.7300162315368652,
      "learning_rate": 0.0004148697451508189,
      "loss": 2.6503,
      "step": 86384
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.457059860229492,
      "learning_rate": 0.00041486596631910003,
      "loss": 2.9967,
      "step": 86385
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8938112258911133,
      "learning_rate": 0.0004148621874660254,
      "loss": 2.8215,
      "step": 86386
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.618539810180664,
      "learning_rate": 0.0004148584085915957,
      "loss": 2.7913,
      "step": 86387
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.8019917011260986,
      "learning_rate": 0.0004148546296958117,
      "loss": 2.8114,
      "step": 86388
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.1462361812591553,
      "learning_rate": 0.0004148508507786741,
      "loss": 2.9089,
      "step": 86389
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.9346550703048706,
      "learning_rate": 0.0004148470718401834,
      "loss": 3.1202,
      "step": 86390
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.9140143394470215,
      "learning_rate": 0.00041484329288034056,
      "loss": 3.1208,
      "step": 86391
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.929023265838623,
      "learning_rate": 0.0004148395138991462,
      "loss": 3.0177,
      "step": 86392
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.9378974437713623,
      "learning_rate": 0.000414835734896601,
      "loss": 2.904,
      "step": 86393
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8753446340560913,
      "learning_rate": 0.0004148319558727056,
      "loss": 2.6997,
      "step": 86394
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.268564462661743,
      "learning_rate": 0.00041482817682746083,
      "loss": 2.9471,
      "step": 86395
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.6320767402648926,
      "learning_rate": 0.00041482439776086737,
      "loss": 2.9422,
      "step": 86396
    },
    {
      "epoch": 1.12,
      "grad_norm": 4.300754547119141,
      "learning_rate": 0.0004148206186729258,
      "loss": 3.1376,
      "step": 86397
    },
    {
      "epoch": 1.12,
      "grad_norm": 3.4524142742156982,
      "learning_rate": 0.000414816839563637,
      "loss": 2.8855,
      "step": 86398
    },
    {
      "epoch": 1.12,
      "grad_norm": 1.8262964487075806,
      "learning_rate": 0.0004148130604330015,
      "loss": 2.9557,
      "step": 86399
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.3431334495544434,
      "learning_rate": 0.0004148092812810201,
      "loss": 2.6553,
      "step": 86400
    },
    {
      "epoch": 1.12,
      "grad_norm": 2.418860673904419,
      "learning_rate": 0.00041480550210769355,
      "loss": 2.8417,
      "step": 86401
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7666094303131104,
      "learning_rate": 0.00041480172291302243,
      "loss": 3.1042,
      "step": 86402
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5965696573257446,
      "learning_rate": 0.0004147979436970076,
      "loss": 3.2103,
      "step": 86403
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.05985689163208,
      "learning_rate": 0.0004147941644596496,
      "loss": 3.0111,
      "step": 86404
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.4476571083068848,
      "learning_rate": 0.0004147903852009491,
      "loss": 2.8812,
      "step": 86405
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.265120029449463,
      "learning_rate": 0.000414786605920907,
      "loss": 2.9394,
      "step": 86406
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.452284336090088,
      "learning_rate": 0.0004147828266195239,
      "loss": 3.2139,
      "step": 86407
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.65556001663208,
      "learning_rate": 0.0004147790472968004,
      "loss": 3.0149,
      "step": 86408
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9160501956939697,
      "learning_rate": 0.00041477526795273746,
      "loss": 3.0212,
      "step": 86409
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.220921516418457,
      "learning_rate": 0.00041477148858733555,
      "loss": 2.9075,
      "step": 86410
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9895304441452026,
      "learning_rate": 0.00041476770920059557,
      "loss": 3.1111,
      "step": 86411
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.2785825729370117,
      "learning_rate": 0.000414763929792518,
      "loss": 2.9794,
      "step": 86412
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5319006443023682,
      "learning_rate": 0.0004147601503631036,
      "loss": 2.9551,
      "step": 86413
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7234768867492676,
      "learning_rate": 0.0004147563709123532,
      "loss": 3.072,
      "step": 86414
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.767024040222168,
      "learning_rate": 0.0004147525914402674,
      "loss": 3.0127,
      "step": 86415
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7743996381759644,
      "learning_rate": 0.00041474881194684694,
      "loss": 3.1964,
      "step": 86416
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.640479564666748,
      "learning_rate": 0.0004147450324320925,
      "loss": 3.0,
      "step": 86417
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.3136610984802246,
      "learning_rate": 0.0004147412528960048,
      "loss": 3.0095,
      "step": 86418
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.216850757598877,
      "learning_rate": 0.00041473747333858457,
      "loss": 3.1424,
      "step": 86419
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1665432453155518,
      "learning_rate": 0.0004147336937598324,
      "loss": 3.0694,
      "step": 86420
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9252378940582275,
      "learning_rate": 0.00041472991415974906,
      "loss": 3.2017,
      "step": 86421
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.13248872756958,
      "learning_rate": 0.00041472613453833536,
      "loss": 2.8817,
      "step": 86422
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.079925298690796,
      "learning_rate": 0.00041472235489559183,
      "loss": 2.9206,
      "step": 86423
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.000586986541748,
      "learning_rate": 0.00041471857523151933,
      "loss": 3.0262,
      "step": 86424
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4780055284500122,
      "learning_rate": 0.0004147147955461184,
      "loss": 3.2891,
      "step": 86425
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6155385971069336,
      "learning_rate": 0.00041471101583938987,
      "loss": 3.1073,
      "step": 86426
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5112742185592651,
      "learning_rate": 0.00041470723611133437,
      "loss": 2.9681,
      "step": 86427
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5518879890441895,
      "learning_rate": 0.0004147034563619527,
      "loss": 2.9278,
      "step": 86428
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8704180717468262,
      "learning_rate": 0.0004146996765912454,
      "loss": 2.9352,
      "step": 86429
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.127293348312378,
      "learning_rate": 0.00041469589679921326,
      "loss": 3.1139,
      "step": 86430
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5360186100006104,
      "learning_rate": 0.00041469211698585707,
      "loss": 3.1016,
      "step": 86431
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5878255367279053,
      "learning_rate": 0.00041468833715117744,
      "loss": 3.1135,
      "step": 86432
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.4110374450683594,
      "learning_rate": 0.000414684557295175,
      "loss": 2.9561,
      "step": 86433
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8913825750350952,
      "learning_rate": 0.00041468077741785064,
      "loss": 3.1927,
      "step": 86434
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4499244689941406,
      "learning_rate": 0.0004146769975192049,
      "loss": 2.7676,
      "step": 86435
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.833032250404358,
      "learning_rate": 0.0004146732175992385,
      "loss": 2.7753,
      "step": 86436
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.923268437385559,
      "learning_rate": 0.00041466943765795225,
      "loss": 3.0558,
      "step": 86437
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6361985206604004,
      "learning_rate": 0.00041466565769534674,
      "loss": 3.0779,
      "step": 86438
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7136986255645752,
      "learning_rate": 0.0004146618777114227,
      "loss": 2.9983,
      "step": 86439
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5208613872528076,
      "learning_rate": 0.00041465809770618097,
      "loss": 2.7477,
      "step": 86440
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5224560499191284,
      "learning_rate": 0.0004146543176796221,
      "loss": 2.9435,
      "step": 86441
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5660840272903442,
      "learning_rate": 0.00041465053763174676,
      "loss": 2.9292,
      "step": 86442
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.7431678771972656,
      "learning_rate": 0.0004146467575625557,
      "loss": 2.7903,
      "step": 86443
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7160128355026245,
      "learning_rate": 0.00041464297747204976,
      "loss": 3.1352,
      "step": 86444
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.429452657699585,
      "learning_rate": 0.00041463919736022946,
      "loss": 3.1291,
      "step": 86445
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.899533987045288,
      "learning_rate": 0.0004146354172270955,
      "loss": 2.7521,
      "step": 86446
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.845731496810913,
      "learning_rate": 0.00041463163707264886,
      "loss": 3.0577,
      "step": 86447
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5082110166549683,
      "learning_rate": 0.0004146278568968898,
      "loss": 3.151,
      "step": 86448
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.4505600929260254,
      "learning_rate": 0.0004146240766998194,
      "loss": 2.96,
      "step": 86449
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.076826810836792,
      "learning_rate": 0.0004146202964814382,
      "loss": 2.7854,
      "step": 86450
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.028043746948242,
      "learning_rate": 0.00041461651624174686,
      "loss": 2.9563,
      "step": 86451
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7827668190002441,
      "learning_rate": 0.00041461273598074615,
      "loss": 2.7975,
      "step": 86452
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1431941986083984,
      "learning_rate": 0.00041460895569843685,
      "loss": 2.7795,
      "step": 86453
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6653952598571777,
      "learning_rate": 0.00041460517539481956,
      "loss": 3.028,
      "step": 86454
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.061034917831421,
      "learning_rate": 0.00041460139506989494,
      "loss": 2.8398,
      "step": 86455
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0412843227386475,
      "learning_rate": 0.0004145976147236638,
      "loss": 3.046,
      "step": 86456
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.048452615737915,
      "learning_rate": 0.0004145938343561268,
      "loss": 3.039,
      "step": 86457
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.858350992202759,
      "learning_rate": 0.00041459005396728463,
      "loss": 2.9329,
      "step": 86458
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9440526962280273,
      "learning_rate": 0.00041458627355713814,
      "loss": 3.0452,
      "step": 86459
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.843764066696167,
      "learning_rate": 0.00041458249312568767,
      "loss": 2.8233,
      "step": 86460
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4968605041503906,
      "learning_rate": 0.00041457871267293426,
      "loss": 3.3354,
      "step": 86461
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8786214590072632,
      "learning_rate": 0.0004145749321988786,
      "loss": 2.8576,
      "step": 86462
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4471808671951294,
      "learning_rate": 0.00041457115170352114,
      "loss": 2.8667,
      "step": 86463
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7917876243591309,
      "learning_rate": 0.0004145673711868628,
      "loss": 3.0367,
      "step": 86464
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.895006775856018,
      "learning_rate": 0.00041456359064890425,
      "loss": 3.0253,
      "step": 86465
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.71687650680542,
      "learning_rate": 0.00041455981008964613,
      "loss": 2.8949,
      "step": 86466
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7375843524932861,
      "learning_rate": 0.00041455602950908913,
      "loss": 2.9198,
      "step": 86467
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.2521493434906006,
      "learning_rate": 0.0004145522489072341,
      "loss": 2.9987,
      "step": 86468
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.414655089378357,
      "learning_rate": 0.00041454846828408164,
      "loss": 3.1425,
      "step": 86469
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0785977840423584,
      "learning_rate": 0.0004145446876396324,
      "loss": 3.003,
      "step": 86470
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.726880669593811,
      "learning_rate": 0.0004145409069738872,
      "loss": 3.0362,
      "step": 86471
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7740893363952637,
      "learning_rate": 0.0004145371262868466,
      "loss": 2.8754,
      "step": 86472
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.136178731918335,
      "learning_rate": 0.0004145333455785114,
      "loss": 2.936,
      "step": 86473
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.5160462856292725,
      "learning_rate": 0.0004145295648488824,
      "loss": 2.7833,
      "step": 86474
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6963436603546143,
      "learning_rate": 0.00041452578409796007,
      "loss": 2.8989,
      "step": 86475
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8012179136276245,
      "learning_rate": 0.0004145220033257453,
      "loss": 3.0447,
      "step": 86476
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8523237705230713,
      "learning_rate": 0.0004145182225322388,
      "loss": 2.8736,
      "step": 86477
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7375171184539795,
      "learning_rate": 0.00041451444171744105,
      "loss": 2.9532,
      "step": 86478
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9294941425323486,
      "learning_rate": 0.00041451066088135294,
      "loss": 2.9565,
      "step": 86479
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.816836953163147,
      "learning_rate": 0.0004145068800239752,
      "loss": 2.8945,
      "step": 86480
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6488031148910522,
      "learning_rate": 0.0004145030991453084,
      "loss": 2.9958,
      "step": 86481
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4313855171203613,
      "learning_rate": 0.00041449931824535333,
      "loss": 2.9092,
      "step": 86482
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5764769315719604,
      "learning_rate": 0.00041449553732411074,
      "loss": 3.0925,
      "step": 86483
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.444514513015747,
      "learning_rate": 0.0004144917563815812,
      "loss": 2.8216,
      "step": 86484
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.926339030265808,
      "learning_rate": 0.00041448797541776544,
      "loss": 3.2026,
      "step": 86485
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.449169397354126,
      "learning_rate": 0.00041448419443266426,
      "loss": 3.0127,
      "step": 86486
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5306575298309326,
      "learning_rate": 0.00041448041342627835,
      "loss": 3.2549,
      "step": 86487
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8868603706359863,
      "learning_rate": 0.00041447663239860825,
      "loss": 2.882,
      "step": 86488
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5280872583389282,
      "learning_rate": 0.00041447285134965493,
      "loss": 3.1389,
      "step": 86489
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4907537698745728,
      "learning_rate": 0.0004144690702794189,
      "loss": 3.0852,
      "step": 86490
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8058478832244873,
      "learning_rate": 0.0004144652891879009,
      "loss": 3.1834,
      "step": 86491
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.710875391960144,
      "learning_rate": 0.0004144615080751016,
      "loss": 3.0001,
      "step": 86492
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6109161376953125,
      "learning_rate": 0.0004144577269410218,
      "loss": 3.1631,
      "step": 86493
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7239679098129272,
      "learning_rate": 0.00041445394578566204,
      "loss": 3.3132,
      "step": 86494
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8155488967895508,
      "learning_rate": 0.0004144501646090232,
      "loss": 3.2184,
      "step": 86495
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5408791303634644,
      "learning_rate": 0.000414446383411106,
      "loss": 3.1338,
      "step": 86496
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5864185094833374,
      "learning_rate": 0.000414442602191911,
      "loss": 2.9055,
      "step": 86497
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.805833339691162,
      "learning_rate": 0.0004144388209514388,
      "loss": 2.889,
      "step": 86498
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0490167140960693,
      "learning_rate": 0.00041443503968969044,
      "loss": 2.7578,
      "step": 86499
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8559497594833374,
      "learning_rate": 0.0004144312584066665,
      "loss": 2.8022,
      "step": 86500
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6977049112319946,
      "learning_rate": 0.00041442747710236747,
      "loss": 3.034,
      "step": 86501
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.6583752632141113,
      "learning_rate": 0.0004144236957767943,
      "loss": 2.9451,
      "step": 86502
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4970982074737549,
      "learning_rate": 0.00041441991442994763,
      "loss": 2.9966,
      "step": 86503
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7656580209732056,
      "learning_rate": 0.0004144161330618281,
      "loss": 3.1417,
      "step": 86504
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8632599115371704,
      "learning_rate": 0.0004144123516724364,
      "loss": 2.7391,
      "step": 86505
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.003633499145508,
      "learning_rate": 0.0004144085702617733,
      "loss": 3.0907,
      "step": 86506
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0970664024353027,
      "learning_rate": 0.00041440478882983955,
      "loss": 3.107,
      "step": 86507
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0701849460601807,
      "learning_rate": 0.0004144010073766357,
      "loss": 3.0295,
      "step": 86508
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1071364879608154,
      "learning_rate": 0.00041439722590216264,
      "loss": 2.9131,
      "step": 86509
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.3431663513183594,
      "learning_rate": 0.000414393444406421,
      "loss": 2.8678,
      "step": 86510
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.13710618019104,
      "learning_rate": 0.0004143896628894113,
      "loss": 3.1758,
      "step": 86511
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.0220422744750977,
      "learning_rate": 0.0004143858813511345,
      "loss": 3.1475,
      "step": 86512
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6447808742523193,
      "learning_rate": 0.0004143820997915912,
      "loss": 2.8995,
      "step": 86513
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.7096924781799316,
      "learning_rate": 0.00041437831821078213,
      "loss": 3.3232,
      "step": 86514
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.559241771697998,
      "learning_rate": 0.000414374536608708,
      "loss": 3.0706,
      "step": 86515
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1507318019866943,
      "learning_rate": 0.00041437075498536937,
      "loss": 3.236,
      "step": 86516
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7585378885269165,
      "learning_rate": 0.0004143669733407672,
      "loss": 3.1882,
      "step": 86517
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.2631795406341553,
      "learning_rate": 0.0004143631916749019,
      "loss": 2.9629,
      "step": 86518
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.379572629928589,
      "learning_rate": 0.0004143594099877744,
      "loss": 3.132,
      "step": 86519
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.2978193759918213,
      "learning_rate": 0.00041435562827938537,
      "loss": 2.8453,
      "step": 86520
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8244361877441406,
      "learning_rate": 0.0004143518465497354,
      "loss": 2.9569,
      "step": 86521
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9474067687988281,
      "learning_rate": 0.0004143480647988253,
      "loss": 2.9019,
      "step": 86522
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.2428138256073,
      "learning_rate": 0.00041434428302665574,
      "loss": 3.0038,
      "step": 86523
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0595784187316895,
      "learning_rate": 0.0004143405012332274,
      "loss": 2.9476,
      "step": 86524
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7441602945327759,
      "learning_rate": 0.00041433671941854103,
      "loss": 3.0179,
      "step": 86525
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1596202850341797,
      "learning_rate": 0.00041433293758259734,
      "loss": 2.977,
      "step": 86526
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.6234517097473145,
      "learning_rate": 0.00041432915572539694,
      "loss": 3.0262,
      "step": 86527
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8902710676193237,
      "learning_rate": 0.00041432537384694055,
      "loss": 3.0166,
      "step": 86528
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9346505403518677,
      "learning_rate": 0.000414321591947229,
      "loss": 2.8643,
      "step": 86529
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0168163776397705,
      "learning_rate": 0.0004143178100262629,
      "loss": 3.1294,
      "step": 86530
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.626713752746582,
      "learning_rate": 0.00041431402808404284,
      "loss": 3.2749,
      "step": 86531
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6201918125152588,
      "learning_rate": 0.00041431024612056976,
      "loss": 3.1605,
      "step": 86532
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.108067512512207,
      "learning_rate": 0.0004143064641358442,
      "loss": 3.2158,
      "step": 86533
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5045671463012695,
      "learning_rate": 0.000414302682129867,
      "loss": 2.9838,
      "step": 86534
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7364460229873657,
      "learning_rate": 0.0004142989001026387,
      "loss": 2.9755,
      "step": 86535
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5626164674758911,
      "learning_rate": 0.00041429511805416005,
      "loss": 2.8848,
      "step": 86536
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6469372510910034,
      "learning_rate": 0.0004142913359844318,
      "loss": 2.8722,
      "step": 86537
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7227283716201782,
      "learning_rate": 0.0004142875538934547,
      "loss": 2.9041,
      "step": 86538
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8109427690505981,
      "learning_rate": 0.0004142837717812293,
      "loss": 3.1288,
      "step": 86539
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8782871961593628,
      "learning_rate": 0.0004142799896477564,
      "loss": 2.7515,
      "step": 86540
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8710660934448242,
      "learning_rate": 0.00041427620749303674,
      "loss": 3.1083,
      "step": 86541
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.818055272102356,
      "learning_rate": 0.00041427242531707093,
      "loss": 2.9518,
      "step": 86542
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.910274624824524,
      "learning_rate": 0.00041426864311985975,
      "loss": 2.8688,
      "step": 86543
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4801424741744995,
      "learning_rate": 0.0004142648609014039,
      "loss": 2.9937,
      "step": 86544
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.595492959022522,
      "learning_rate": 0.000414261078661704,
      "loss": 2.7986,
      "step": 86545
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8079832792282104,
      "learning_rate": 0.00041425729640076083,
      "loss": 3.0579,
      "step": 86546
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7654814720153809,
      "learning_rate": 0.0004142535141185751,
      "loss": 2.9583,
      "step": 86547
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7946559190750122,
      "learning_rate": 0.00041424973181514743,
      "loss": 2.8663,
      "step": 86548
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0331311225891113,
      "learning_rate": 0.0004142459494904786,
      "loss": 2.7785,
      "step": 86549
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9966765642166138,
      "learning_rate": 0.0004142421671445693,
      "loss": 2.9688,
      "step": 86550
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4638490676879883,
      "learning_rate": 0.0004142383847774202,
      "loss": 3.08,
      "step": 86551
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.654005765914917,
      "learning_rate": 0.0004142346023890321,
      "loss": 2.7951,
      "step": 86552
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1978583335876465,
      "learning_rate": 0.0004142308199794055,
      "loss": 3.1447,
      "step": 86553
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1193525791168213,
      "learning_rate": 0.00041422703754854135,
      "loss": 3.3346,
      "step": 86554
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7612398862838745,
      "learning_rate": 0.00041422325509644014,
      "loss": 3.0084,
      "step": 86555
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7779486179351807,
      "learning_rate": 0.00041421947262310283,
      "loss": 3.0451,
      "step": 86556
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0991785526275635,
      "learning_rate": 0.0004142156901285298,
      "loss": 3.1551,
      "step": 86557
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.5727250576019287,
      "learning_rate": 0.000414211907612722,
      "loss": 2.8756,
      "step": 86558
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.60956871509552,
      "learning_rate": 0.00041420812507568007,
      "loss": 2.6453,
      "step": 86559
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.07314133644104,
      "learning_rate": 0.0004142043425174046,
      "loss": 3.0187,
      "step": 86560
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8832460641860962,
      "learning_rate": 0.00041420055993789644,
      "loss": 2.9787,
      "step": 86561
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.0721280574798584,
      "learning_rate": 0.00041419677733715626,
      "loss": 2.8936,
      "step": 86562
    },
    {
      "epoch": 1.13,
      "grad_norm": 4.029623031616211,
      "learning_rate": 0.00041419299471518475,
      "loss": 2.8803,
      "step": 86563
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5464520454406738,
      "learning_rate": 0.00041418921207198256,
      "loss": 3.1427,
      "step": 86564
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0398240089416504,
      "learning_rate": 0.00041418542940755055,
      "loss": 3.0188,
      "step": 86565
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.316094398498535,
      "learning_rate": 0.00041418164672188916,
      "loss": 2.904,
      "step": 86566
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6600635051727295,
      "learning_rate": 0.0004141778640149993,
      "loss": 2.9631,
      "step": 86567
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.398507833480835,
      "learning_rate": 0.00041417408128688164,
      "loss": 2.8652,
      "step": 86568
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.2376670837402344,
      "learning_rate": 0.00041417029853753687,
      "loss": 2.8877,
      "step": 86569
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4981794357299805,
      "learning_rate": 0.00041416651576696564,
      "loss": 2.957,
      "step": 86570
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0641541481018066,
      "learning_rate": 0.00041416273297516877,
      "loss": 3.0529,
      "step": 86571
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6167360544204712,
      "learning_rate": 0.00041415895016214685,
      "loss": 3.0431,
      "step": 86572
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6274373531341553,
      "learning_rate": 0.0004141551673279006,
      "loss": 3.0978,
      "step": 86573
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6278650760650635,
      "learning_rate": 0.0004141513844724308,
      "loss": 3.2228,
      "step": 86574
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6632041931152344,
      "learning_rate": 0.00041414760159573803,
      "loss": 2.963,
      "step": 86575
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.788552165031433,
      "learning_rate": 0.00041414381869782314,
      "loss": 2.7803,
      "step": 86576
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.1509499549865723,
      "learning_rate": 0.0004141400357786867,
      "loss": 2.7108,
      "step": 86577
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.666482925415039,
      "learning_rate": 0.00041413625283832954,
      "loss": 3.1429,
      "step": 86578
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.2248640060424805,
      "learning_rate": 0.0004141324698767523,
      "loss": 3.1726,
      "step": 86579
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6972790956497192,
      "learning_rate": 0.0004141286868939556,
      "loss": 2.6951,
      "step": 86580
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8373030424118042,
      "learning_rate": 0.0004141249038899403,
      "loss": 3.0472,
      "step": 86581
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7551746368408203,
      "learning_rate": 0.00041412112086470696,
      "loss": 2.8433,
      "step": 86582
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.9443655014038086,
      "learning_rate": 0.0004141173378182564,
      "loss": 2.7981,
      "step": 86583
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.364969253540039,
      "learning_rate": 0.00041411355475058935,
      "loss": 2.99,
      "step": 86584
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1703827381134033,
      "learning_rate": 0.00041410977166170623,
      "loss": 2.8549,
      "step": 86585
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.641577124595642,
      "learning_rate": 0.00041410598855160815,
      "loss": 3.167,
      "step": 86586
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1153974533081055,
      "learning_rate": 0.0004141022054202954,
      "loss": 3.1599,
      "step": 86587
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.5649170875549316,
      "learning_rate": 0.00041409842226776915,
      "loss": 3.1094,
      "step": 86588
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5724939107894897,
      "learning_rate": 0.00041409463909402965,
      "loss": 3.156,
      "step": 86589
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6668120622634888,
      "learning_rate": 0.0004140908558990779,
      "loss": 2.8803,
      "step": 86590
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8668115139007568,
      "learning_rate": 0.0004140870726829145,
      "loss": 2.8738,
      "step": 86591
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7465969324111938,
      "learning_rate": 0.00041408328944554017,
      "loss": 2.7468,
      "step": 86592
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.770615816116333,
      "learning_rate": 0.00041407950618695545,
      "loss": 2.9606,
      "step": 86593
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8503053188323975,
      "learning_rate": 0.00041407572290716135,
      "loss": 2.772,
      "step": 86594
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.3262438774108887,
      "learning_rate": 0.0004140719396061584,
      "loss": 2.9999,
      "step": 86595
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.3343539237976074,
      "learning_rate": 0.00041406815628394734,
      "loss": 3.089,
      "step": 86596
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9643946886062622,
      "learning_rate": 0.0004140643729405288,
      "loss": 3.0715,
      "step": 86597
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.78159499168396,
      "learning_rate": 0.00041406058957590356,
      "loss": 2.887,
      "step": 86598
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.9378862380981445,
      "learning_rate": 0.0004140568061900723,
      "loss": 2.9575,
      "step": 86599
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.476630687713623,
      "learning_rate": 0.0004140530227830357,
      "loss": 3.0039,
      "step": 86600
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5119911432266235,
      "learning_rate": 0.00041404923935479455,
      "loss": 3.1344,
      "step": 86601
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4318103790283203,
      "learning_rate": 0.0004140454559053496,
      "loss": 2.9271,
      "step": 86602
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4997721910476685,
      "learning_rate": 0.00041404167243470127,
      "loss": 3.0567,
      "step": 86603
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.551529884338379,
      "learning_rate": 0.00041403788894285045,
      "loss": 3.0455,
      "step": 86604
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.3993018865585327,
      "learning_rate": 0.0004140341054297979,
      "loss": 3.04,
      "step": 86605
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1611006259918213,
      "learning_rate": 0.0004140303218955442,
      "loss": 2.9673,
      "step": 86606
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4817066192626953,
      "learning_rate": 0.0004140265383400901,
      "loss": 3.0791,
      "step": 86607
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5947984457015991,
      "learning_rate": 0.00041402275476343643,
      "loss": 3.1804,
      "step": 86608
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7212387323379517,
      "learning_rate": 0.00041401897116558365,
      "loss": 2.9024,
      "step": 86609
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1178135871887207,
      "learning_rate": 0.00041401518754653267,
      "loss": 2.8465,
      "step": 86610
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1192431449890137,
      "learning_rate": 0.00041401140390628417,
      "loss": 3.0509,
      "step": 86611
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8709334135055542,
      "learning_rate": 0.00041400762024483865,
      "loss": 3.0178,
      "step": 86612
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4814881086349487,
      "learning_rate": 0.000414003836562197,
      "loss": 2.9657,
      "step": 86613
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.7050538063049316,
      "learning_rate": 0.00041400005285836,
      "loss": 2.8802,
      "step": 86614
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8171584606170654,
      "learning_rate": 0.00041399626913332806,
      "loss": 3.0316,
      "step": 86615
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4616116285324097,
      "learning_rate": 0.00041399248538710216,
      "loss": 3.1912,
      "step": 86616
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.676731824874878,
      "learning_rate": 0.00041398870161968293,
      "loss": 2.9811,
      "step": 86617
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.218911647796631,
      "learning_rate": 0.000413984917831071,
      "loss": 2.9795,
      "step": 86618
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.737475037574768,
      "learning_rate": 0.0004139811340212672,
      "loss": 2.8665,
      "step": 86619
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5997016429901123,
      "learning_rate": 0.00041397735019027207,
      "loss": 3.1329,
      "step": 86620
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.7512781620025635,
      "learning_rate": 0.00041397356633808644,
      "loss": 2.8143,
      "step": 86621
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1652300357818604,
      "learning_rate": 0.0004139697824647109,
      "loss": 2.9458,
      "step": 86622
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0243422985076904,
      "learning_rate": 0.00041396599857014636,
      "loss": 2.9104,
      "step": 86623
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5294395685195923,
      "learning_rate": 0.00041396221465439325,
      "loss": 3.1364,
      "step": 86624
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.2096035480499268,
      "learning_rate": 0.00041395843071745245,
      "loss": 2.9922,
      "step": 86625
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7959120273590088,
      "learning_rate": 0.0004139546467593247,
      "loss": 3.1017,
      "step": 86626
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.577860713005066,
      "learning_rate": 0.0004139508627800106,
      "loss": 2.9003,
      "step": 86627
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.15519642829895,
      "learning_rate": 0.0004139470787795108,
      "loss": 3.008,
      "step": 86628
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.751689076423645,
      "learning_rate": 0.0004139432947578262,
      "loss": 3.2389,
      "step": 86629
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5104608535766602,
      "learning_rate": 0.0004139395107149573,
      "loss": 2.974,
      "step": 86630
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8301329612731934,
      "learning_rate": 0.00041393572665090496,
      "loss": 3.1457,
      "step": 86631
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8364241123199463,
      "learning_rate": 0.0004139319425656698,
      "loss": 2.9621,
      "step": 86632
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5076215267181396,
      "learning_rate": 0.0004139281584592525,
      "loss": 3.1927,
      "step": 86633
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6246954202651978,
      "learning_rate": 0.0004139243743316539,
      "loss": 2.8625,
      "step": 86634
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.602960467338562,
      "learning_rate": 0.0004139205901828746,
      "loss": 2.8574,
      "step": 86635
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.052304983139038,
      "learning_rate": 0.00041391680601291517,
      "loss": 2.9852,
      "step": 86636
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.7493131160736084,
      "learning_rate": 0.00041391302182177656,
      "loss": 3.0205,
      "step": 86637
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5575268268585205,
      "learning_rate": 0.00041390923760945946,
      "loss": 3.1117,
      "step": 86638
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7986191511154175,
      "learning_rate": 0.00041390545337596427,
      "loss": 3.1386,
      "step": 86639
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.248641014099121,
      "learning_rate": 0.00041390166912129206,
      "loss": 2.8701,
      "step": 86640
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9195785522460938,
      "learning_rate": 0.0004138978848454434,
      "loss": 3.0846,
      "step": 86641
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.143505096435547,
      "learning_rate": 0.0004138941005484188,
      "loss": 2.9011,
      "step": 86642
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.955925703048706,
      "learning_rate": 0.00041389031623021924,
      "loss": 3.1243,
      "step": 86643
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7779326438903809,
      "learning_rate": 0.00041388653189084536,
      "loss": 2.806,
      "step": 86644
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.1337270736694336,
      "learning_rate": 0.00041388274753029777,
      "loss": 3.008,
      "step": 86645
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.0275044441223145,
      "learning_rate": 0.00041387896314857723,
      "loss": 3.0463,
      "step": 86646
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5847914218902588,
      "learning_rate": 0.00041387517874568445,
      "loss": 2.9707,
      "step": 86647
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8827418088912964,
      "learning_rate": 0.0004138713943216202,
      "loss": 3.0339,
      "step": 86648
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9101049900054932,
      "learning_rate": 0.00041386760987638496,
      "loss": 2.5966,
      "step": 86649
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8823908567428589,
      "learning_rate": 0.00041386382540997967,
      "loss": 2.9627,
      "step": 86650
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.251565456390381,
      "learning_rate": 0.000413860040922405,
      "loss": 3.0179,
      "step": 86651
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6709398031234741,
      "learning_rate": 0.0004138562564136615,
      "loss": 2.9186,
      "step": 86652
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.167778491973877,
      "learning_rate": 0.00041385247188375,
      "loss": 2.8723,
      "step": 86653
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0449891090393066,
      "learning_rate": 0.0004138486873326713,
      "loss": 2.8643,
      "step": 86654
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6868122816085815,
      "learning_rate": 0.0004138449027604258,
      "loss": 3.0308,
      "step": 86655
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6323461532592773,
      "learning_rate": 0.0004138411181670144,
      "loss": 3.0335,
      "step": 86656
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8898704051971436,
      "learning_rate": 0.00041383733355243786,
      "loss": 3.1321,
      "step": 86657
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8951054811477661,
      "learning_rate": 0.0004138335489166968,
      "loss": 3.0003,
      "step": 86658
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9410752058029175,
      "learning_rate": 0.00041382976425979184,
      "loss": 3.0519,
      "step": 86659
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.425222158432007,
      "learning_rate": 0.000413825979581724,
      "loss": 2.9848,
      "step": 86660
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5555611848831177,
      "learning_rate": 0.00041382219488249355,
      "loss": 2.9381,
      "step": 86661
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6917930841445923,
      "learning_rate": 0.00041381841016210143,
      "loss": 3.1423,
      "step": 86662
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9215478897094727,
      "learning_rate": 0.00041381462542054845,
      "loss": 2.8765,
      "step": 86663
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6020429134368896,
      "learning_rate": 0.0004138108406578351,
      "loss": 2.9435,
      "step": 86664
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9950487613677979,
      "learning_rate": 0.0004138070558739621,
      "loss": 2.6812,
      "step": 86665
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7725470066070557,
      "learning_rate": 0.00041380327106893035,
      "loss": 3.051,
      "step": 86666
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7667436599731445,
      "learning_rate": 0.00041379948624274037,
      "loss": 3.0266,
      "step": 86667
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5558089017868042,
      "learning_rate": 0.0004137957013953929,
      "loss": 3.0074,
      "step": 86668
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5489639043807983,
      "learning_rate": 0.00041379191652688867,
      "loss": 2.9686,
      "step": 86669
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9527511596679688,
      "learning_rate": 0.0004137881316372283,
      "loss": 2.8447,
      "step": 86670
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.025036334991455,
      "learning_rate": 0.0004137843467264126,
      "loss": 2.9853,
      "step": 86671
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8138788938522339,
      "learning_rate": 0.0004137805617944424,
      "loss": 2.8129,
      "step": 86672
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.136704444885254,
      "learning_rate": 0.00041377677684131814,
      "loss": 2.8935,
      "step": 86673
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.643202781677246,
      "learning_rate": 0.00041377299186704056,
      "loss": 2.8068,
      "step": 86674
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5151787996292114,
      "learning_rate": 0.0004137692068716105,
      "loss": 3.083,
      "step": 86675
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.778942584991455,
      "learning_rate": 0.00041376542185502863,
      "loss": 3.2559,
      "step": 86676
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.78600811958313,
      "learning_rate": 0.0004137616368172955,
      "loss": 2.753,
      "step": 86677
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.145413875579834,
      "learning_rate": 0.000413757851758412,
      "loss": 3.1408,
      "step": 86678
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8765926361083984,
      "learning_rate": 0.0004137540666783788,
      "loss": 2.9102,
      "step": 86679
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.430037260055542,
      "learning_rate": 0.00041375028157719656,
      "loss": 2.9714,
      "step": 86680
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.5417096614837646,
      "learning_rate": 0.00041374649645486603,
      "loss": 3.1054,
      "step": 86681
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.224677562713623,
      "learning_rate": 0.00041374271131138776,
      "loss": 2.866,
      "step": 86682
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6662324666976929,
      "learning_rate": 0.0004137389261467627,
      "loss": 2.7502,
      "step": 86683
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.823603868484497,
      "learning_rate": 0.00041373514096099134,
      "loss": 2.8511,
      "step": 86684
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6356775760650635,
      "learning_rate": 0.0004137313557540745,
      "loss": 3.0647,
      "step": 86685
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6444072723388672,
      "learning_rate": 0.00041372757052601284,
      "loss": 2.7886,
      "step": 86686
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7021121978759766,
      "learning_rate": 0.0004137237852768072,
      "loss": 2.9218,
      "step": 86687
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7537823915481567,
      "learning_rate": 0.000413720000006458,
      "loss": 3.0042,
      "step": 86688
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9313637018203735,
      "learning_rate": 0.00041371621471496614,
      "loss": 3.3251,
      "step": 86689
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5066269636154175,
      "learning_rate": 0.0004137124294023323,
      "loss": 3.1363,
      "step": 86690
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7277579307556152,
      "learning_rate": 0.0004137086440685572,
      "loss": 3.0711,
      "step": 86691
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.280843734741211,
      "learning_rate": 0.0004137048587136415,
      "loss": 3.0725,
      "step": 86692
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7895983457565308,
      "learning_rate": 0.000413701073337586,
      "loss": 3.3504,
      "step": 86693
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5152077674865723,
      "learning_rate": 0.0004136972879403912,
      "loss": 3.1402,
      "step": 86694
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0177838802337646,
      "learning_rate": 0.00041369350252205795,
      "loss": 2.8671,
      "step": 86695
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5215636491775513,
      "learning_rate": 0.000413689717082587,
      "loss": 2.8322,
      "step": 86696
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1468639373779297,
      "learning_rate": 0.00041368593162197895,
      "loss": 3.0416,
      "step": 86697
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.4265341758728027,
      "learning_rate": 0.0004136821461402345,
      "loss": 3.0215,
      "step": 86698
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7287330627441406,
      "learning_rate": 0.00041367836063735446,
      "loss": 3.1973,
      "step": 86699
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7925087213516235,
      "learning_rate": 0.00041367457511333944,
      "loss": 3.0645,
      "step": 86700
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.959435224533081,
      "learning_rate": 0.0004136707895681901,
      "loss": 3.1641,
      "step": 86701
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.080073118209839,
      "learning_rate": 0.00041366700400190736,
      "loss": 3.0981,
      "step": 86702
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.115053176879883,
      "learning_rate": 0.00041366321841449167,
      "loss": 3.1697,
      "step": 86703
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.983925461769104,
      "learning_rate": 0.00041365943280594384,
      "loss": 3.1211,
      "step": 86704
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4245617389678955,
      "learning_rate": 0.00041365564717626464,
      "loss": 3.0488,
      "step": 86705
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9700901508331299,
      "learning_rate": 0.00041365186152545467,
      "loss": 2.7738,
      "step": 86706
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.611224889755249,
      "learning_rate": 0.0004136480758535147,
      "loss": 3.0328,
      "step": 86707
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.8133513927459717,
      "learning_rate": 0.0004136442901604454,
      "loss": 3.1469,
      "step": 86708
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0132837295532227,
      "learning_rate": 0.0004136405044462474,
      "loss": 2.9914,
      "step": 86709
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0556602478027344,
      "learning_rate": 0.0004136367187109216,
      "loss": 3.0862,
      "step": 86710
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9215677976608276,
      "learning_rate": 0.00041363293295446855,
      "loss": 2.9154,
      "step": 86711
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6184349060058594,
      "learning_rate": 0.000413629147176889,
      "loss": 3.0304,
      "step": 86712
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.317305326461792,
      "learning_rate": 0.0004136253613781836,
      "loss": 3.0572,
      "step": 86713
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.7906646728515625,
      "learning_rate": 0.00041362157555835307,
      "loss": 3.0411,
      "step": 86714
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8910106420516968,
      "learning_rate": 0.0004136177897173983,
      "loss": 3.0169,
      "step": 86715
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.999548077583313,
      "learning_rate": 0.00041361400385531974,
      "loss": 3.2259,
      "step": 86716
    },
    {
      "epoch": 1.13,
      "grad_norm": 4.1693572998046875,
      "learning_rate": 0.00041361021797211813,
      "loss": 2.9556,
      "step": 86717
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.240713357925415,
      "learning_rate": 0.00041360643206779437,
      "loss": 3.194,
      "step": 86718
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7350622415542603,
      "learning_rate": 0.00041360264614234894,
      "loss": 3.0301,
      "step": 86719
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7847923040390015,
      "learning_rate": 0.00041359886019578265,
      "loss": 2.9052,
      "step": 86720
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.192856550216675,
      "learning_rate": 0.0004135950742280962,
      "loss": 3.0395,
      "step": 86721
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.246500253677368,
      "learning_rate": 0.0004135912882392903,
      "loss": 3.055,
      "step": 86722
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6405365467071533,
      "learning_rate": 0.0004135875022293656,
      "loss": 2.9363,
      "step": 86723
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.4909677505493164,
      "learning_rate": 0.00041358371619832284,
      "loss": 3.1434,
      "step": 86724
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.877272605895996,
      "learning_rate": 0.00041357993014616275,
      "loss": 3.0301,
      "step": 86725
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.2937707901000977,
      "learning_rate": 0.00041357614407288597,
      "loss": 2.8372,
      "step": 86726
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5945879220962524,
      "learning_rate": 0.00041357235797849333,
      "loss": 3.0361,
      "step": 86727
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8078703880310059,
      "learning_rate": 0.00041356857186298525,
      "loss": 3.2009,
      "step": 86728
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.809190034866333,
      "learning_rate": 0.00041356478572636277,
      "loss": 2.8218,
      "step": 86729
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.6845345497131348,
      "learning_rate": 0.00041356099956862654,
      "loss": 2.9678,
      "step": 86730
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.715710997581482,
      "learning_rate": 0.000413557213389777,
      "loss": 2.8711,
      "step": 86731
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.5710482597351074,
      "learning_rate": 0.00041355342718981503,
      "loss": 2.8329,
      "step": 86732
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.9542627334594727,
      "learning_rate": 0.00041354964096874155,
      "loss": 2.9813,
      "step": 86733
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.571898937225342,
      "learning_rate": 0.00041354585472655686,
      "loss": 3.0897,
      "step": 86734
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1927990913391113,
      "learning_rate": 0.00041354206846326184,
      "loss": 2.6963,
      "step": 86735
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.371281147003174,
      "learning_rate": 0.0004135382821788573,
      "loss": 2.9735,
      "step": 86736
    },
    {
      "epoch": 1.13,
      "grad_norm": 4.539012908935547,
      "learning_rate": 0.0004135344958733438,
      "loss": 2.9418,
      "step": 86737
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.6115541458129883,
      "learning_rate": 0.0004135307095467221,
      "loss": 3.0926,
      "step": 86738
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.237215757369995,
      "learning_rate": 0.00041352692319899294,
      "loss": 3.0144,
      "step": 86739
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5745526552200317,
      "learning_rate": 0.00041352313683015696,
      "loss": 2.9969,
      "step": 86740
    },
    {
      "epoch": 1.13,
      "grad_norm": 4.222415447235107,
      "learning_rate": 0.0004135193504402148,
      "loss": 2.9848,
      "step": 86741
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.51381254196167,
      "learning_rate": 0.0004135155640291674,
      "loss": 3.2537,
      "step": 86742
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.715918779373169,
      "learning_rate": 0.00041351177759701524,
      "loss": 2.9488,
      "step": 86743
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.127183675765991,
      "learning_rate": 0.0004135079911437591,
      "loss": 2.709,
      "step": 86744
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.1765589714050293,
      "learning_rate": 0.00041350420466939973,
      "loss": 3.0157,
      "step": 86745
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.2134995460510254,
      "learning_rate": 0.00041350041817393767,
      "loss": 2.9498,
      "step": 86746
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7616064548492432,
      "learning_rate": 0.00041349663165737377,
      "loss": 3.1641,
      "step": 86747
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9871013164520264,
      "learning_rate": 0.0004134928451197088,
      "loss": 3.0653,
      "step": 86748
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.39793062210083,
      "learning_rate": 0.00041348905856094327,
      "loss": 2.8263,
      "step": 86749
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.860511541366577,
      "learning_rate": 0.000413485271981078,
      "loss": 3.0262,
      "step": 86750
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8616762161254883,
      "learning_rate": 0.00041348148538011375,
      "loss": 3.0934,
      "step": 86751
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.142636775970459,
      "learning_rate": 0.0004134776987580511,
      "loss": 2.8152,
      "step": 86752
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5266631841659546,
      "learning_rate": 0.00041347391211489075,
      "loss": 3.0857,
      "step": 86753
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.917997121810913,
      "learning_rate": 0.00041347012545063353,
      "loss": 3.0606,
      "step": 86754
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9368979930877686,
      "learning_rate": 0.00041346633876528007,
      "loss": 3.1089,
      "step": 86755
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.67915940284729,
      "learning_rate": 0.00041346255205883096,
      "loss": 3.0688,
      "step": 86756
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.6847777366638184,
      "learning_rate": 0.0004134587653312872,
      "loss": 2.8429,
      "step": 86757
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.754157543182373,
      "learning_rate": 0.0004134549785826492,
      "loss": 2.813,
      "step": 86758
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7540005445480347,
      "learning_rate": 0.00041345119181291777,
      "loss": 3.0044,
      "step": 86759
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.3430533409118652,
      "learning_rate": 0.0004134474050220936,
      "loss": 2.989,
      "step": 86760
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5797256231307983,
      "learning_rate": 0.0004134436182101776,
      "loss": 3.2014,
      "step": 86761
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.498797059059143,
      "learning_rate": 0.0004134398313771701,
      "loss": 3.0882,
      "step": 86762
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.50294828414917,
      "learning_rate": 0.000413436044523072,
      "loss": 3.319,
      "step": 86763
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9355335235595703,
      "learning_rate": 0.0004134322576478842,
      "loss": 3.2834,
      "step": 86764
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.747322678565979,
      "learning_rate": 0.00041342847075160694,
      "loss": 3.0699,
      "step": 86765
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4429283142089844,
      "learning_rate": 0.0004134246838342413,
      "loss": 2.8509,
      "step": 86766
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6034120321273804,
      "learning_rate": 0.0004134208968957879,
      "loss": 2.9956,
      "step": 86767
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5989524126052856,
      "learning_rate": 0.0004134171099362474,
      "loss": 2.7556,
      "step": 86768
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.525612711906433,
      "learning_rate": 0.0004134133229556205,
      "loss": 2.9958,
      "step": 86769
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6611518859863281,
      "learning_rate": 0.0004134095359539078,
      "loss": 2.9615,
      "step": 86770
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.756722331047058,
      "learning_rate": 0.0004134057489311103,
      "loss": 3.0206,
      "step": 86771
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5124260187149048,
      "learning_rate": 0.00041340196188722854,
      "loss": 2.9971,
      "step": 86772
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6937536001205444,
      "learning_rate": 0.0004133981748222631,
      "loss": 3.0248,
      "step": 86773
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9576492309570312,
      "learning_rate": 0.0004133943877362149,
      "loss": 3.1137,
      "step": 86774
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6827999353408813,
      "learning_rate": 0.0004133906006290845,
      "loss": 2.8976,
      "step": 86775
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.858521819114685,
      "learning_rate": 0.00041338681350087264,
      "loss": 2.9255,
      "step": 86776
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7078728675842285,
      "learning_rate": 0.00041338302635158,
      "loss": 2.8885,
      "step": 86777
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4561916589736938,
      "learning_rate": 0.00041337923918120735,
      "loss": 3.3709,
      "step": 86778
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7496453523635864,
      "learning_rate": 0.0004133754519897554,
      "loss": 2.915,
      "step": 86779
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6792844533920288,
      "learning_rate": 0.00041337166477722475,
      "loss": 2.9293,
      "step": 86780
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4258880615234375,
      "learning_rate": 0.0004133678775436162,
      "loss": 3.2028,
      "step": 86781
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0489675998687744,
      "learning_rate": 0.0004133640902889304,
      "loss": 2.8066,
      "step": 86782
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9057610034942627,
      "learning_rate": 0.0004133603030131681,
      "loss": 3.0106,
      "step": 86783
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8316982984542847,
      "learning_rate": 0.00041335651571632997,
      "loss": 2.7969,
      "step": 86784
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.491365432739258,
      "learning_rate": 0.00041335272839841675,
      "loss": 2.9789,
      "step": 86785
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8687974214553833,
      "learning_rate": 0.0004133489410594291,
      "loss": 3.1129,
      "step": 86786
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.1789798736572266,
      "learning_rate": 0.00041334515369936765,
      "loss": 3.273,
      "step": 86787
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8596826791763306,
      "learning_rate": 0.00041334136631823335,
      "loss": 2.8755,
      "step": 86788
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.114941120147705,
      "learning_rate": 0.0004133375789160267,
      "loss": 2.942,
      "step": 86789
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.835732936859131,
      "learning_rate": 0.00041333379149274834,
      "loss": 2.9987,
      "step": 86790
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.1330676078796387,
      "learning_rate": 0.0004133300040483993,
      "loss": 3.0206,
      "step": 86791
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5713776350021362,
      "learning_rate": 0.00041332621658297995,
      "loss": 2.7559,
      "step": 86792
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.509653329849243,
      "learning_rate": 0.00041332242909649107,
      "loss": 2.8688,
      "step": 86793
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.513760566711426,
      "learning_rate": 0.0004133186415889335,
      "loss": 2.7391,
      "step": 86794
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.5992841720581055,
      "learning_rate": 0.0004133148540603078,
      "loss": 2.8139,
      "step": 86795
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.126741647720337,
      "learning_rate": 0.00041331106651061475,
      "loss": 2.943,
      "step": 86796
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.315093517303467,
      "learning_rate": 0.00041330727893985507,
      "loss": 2.9228,
      "step": 86797
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.525620937347412,
      "learning_rate": 0.0004133034913480294,
      "loss": 3.2214,
      "step": 86798
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.1484720706939697,
      "learning_rate": 0.0004132997037351384,
      "loss": 3.1989,
      "step": 86799
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.5103230476379395,
      "learning_rate": 0.00041329591610118297,
      "loss": 3.124,
      "step": 86800
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.2871851921081543,
      "learning_rate": 0.00041329212844616364,
      "loss": 3.0382,
      "step": 86801
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.573803663253784,
      "learning_rate": 0.00041328834077008114,
      "loss": 3.1511,
      "step": 86802
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.19293475151062,
      "learning_rate": 0.0004132845530729362,
      "loss": 3.119,
      "step": 86803
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.4342434406280518,
      "learning_rate": 0.00041328076535472954,
      "loss": 3.0407,
      "step": 86804
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6979552507400513,
      "learning_rate": 0.00041327697761546186,
      "loss": 3.0933,
      "step": 86805
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6763463020324707,
      "learning_rate": 0.00041327318985513386,
      "loss": 3.0729,
      "step": 86806
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0077693462371826,
      "learning_rate": 0.0004132694020737462,
      "loss": 3.087,
      "step": 86807
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8327035903930664,
      "learning_rate": 0.0004132656142712996,
      "loss": 3.1214,
      "step": 86808
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.2304389476776123,
      "learning_rate": 0.0004132618264477949,
      "loss": 3.0611,
      "step": 86809
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.919480323791504,
      "learning_rate": 0.00041325803860323254,
      "loss": 3.0091,
      "step": 86810
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.50464928150177,
      "learning_rate": 0.00041325425073761345,
      "loss": 2.9319,
      "step": 86811
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.2029831409454346,
      "learning_rate": 0.0004132504628509383,
      "loss": 3.0216,
      "step": 86812
    },
    {
      "epoch": 1.13,
      "grad_norm": 4.2839274406433105,
      "learning_rate": 0.00041324667494320764,
      "loss": 2.9613,
      "step": 86813
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.663954019546509,
      "learning_rate": 0.00041324288701442233,
      "loss": 3.2674,
      "step": 86814
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8335824012756348,
      "learning_rate": 0.00041323909906458307,
      "loss": 2.8754,
      "step": 86815
    },
    {
      "epoch": 1.13,
      "grad_norm": 4.084639072418213,
      "learning_rate": 0.00041323531109369055,
      "loss": 2.9575,
      "step": 86816
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.8618500232696533,
      "learning_rate": 0.00041323152310174537,
      "loss": 3.1585,
      "step": 86817
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.6931726932525635,
      "learning_rate": 0.00041322773508874834,
      "loss": 2.9979,
      "step": 86818
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6114801168441772,
      "learning_rate": 0.0004132239470547001,
      "loss": 3.1185,
      "step": 86819
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.310495376586914,
      "learning_rate": 0.00041322015899960136,
      "loss": 2.8231,
      "step": 86820
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.4025161266326904,
      "learning_rate": 0.000413216370923453,
      "loss": 3.3392,
      "step": 86821
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9361767768859863,
      "learning_rate": 0.0004132125828262555,
      "loss": 3.1231,
      "step": 86822
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1452887058258057,
      "learning_rate": 0.0004132087947080096,
      "loss": 3.1331,
      "step": 86823
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.135756731033325,
      "learning_rate": 0.00041320500656871615,
      "loss": 2.9593,
      "step": 86824
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5323896408081055,
      "learning_rate": 0.00041320121840837567,
      "loss": 2.9913,
      "step": 86825
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6814689636230469,
      "learning_rate": 0.000413197430226989,
      "loss": 2.9925,
      "step": 86826
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.944707155227661,
      "learning_rate": 0.00041319364202455674,
      "loss": 2.9637,
      "step": 86827
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7485426664352417,
      "learning_rate": 0.00041318985380107965,
      "loss": 3.0444,
      "step": 86828
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.347871780395508,
      "learning_rate": 0.00041318606555655846,
      "loss": 2.9612,
      "step": 86829
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5479878187179565,
      "learning_rate": 0.0004131822772909939,
      "loss": 2.9729,
      "step": 86830
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5817464590072632,
      "learning_rate": 0.00041317848900438646,
      "loss": 2.9641,
      "step": 86831
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7042323350906372,
      "learning_rate": 0.0004131747006967371,
      "loss": 3.0249,
      "step": 86832
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.35644268989563,
      "learning_rate": 0.0004131709123680465,
      "loss": 3.0917,
      "step": 86833
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8090585470199585,
      "learning_rate": 0.00041316712401831513,
      "loss": 2.9665,
      "step": 86834
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.613632082939148,
      "learning_rate": 0.0004131633356475439,
      "loss": 3.1899,
      "step": 86835
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.978700637817383,
      "learning_rate": 0.00041315954725573353,
      "loss": 2.944,
      "step": 86836
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6179183721542358,
      "learning_rate": 0.00041315575884288465,
      "loss": 3.074,
      "step": 86837
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.5821919441223145,
      "learning_rate": 0.0004131519704089979,
      "loss": 2.9178,
      "step": 86838
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7761006355285645,
      "learning_rate": 0.0004131481819540742,
      "loss": 3.0544,
      "step": 86839
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7481279373168945,
      "learning_rate": 0.0004131443934781141,
      "loss": 2.9665,
      "step": 86840
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7966057062149048,
      "learning_rate": 0.00041314060498111825,
      "loss": 3.058,
      "step": 86841
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.706358551979065,
      "learning_rate": 0.0004131368164630874,
      "loss": 2.8588,
      "step": 86842
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6586973667144775,
      "learning_rate": 0.00041313302792402235,
      "loss": 2.6943,
      "step": 86843
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.2626633644104004,
      "learning_rate": 0.0004131292393639237,
      "loss": 2.9433,
      "step": 86844
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.475163221359253,
      "learning_rate": 0.00041312545078279214,
      "loss": 2.8543,
      "step": 86845
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6470214128494263,
      "learning_rate": 0.00041312166218062854,
      "loss": 2.9629,
      "step": 86846
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6481596231460571,
      "learning_rate": 0.0004131178735574334,
      "loss": 2.9684,
      "step": 86847
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6265722513198853,
      "learning_rate": 0.0004131140849132075,
      "loss": 3.3134,
      "step": 86848
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.631242275238037,
      "learning_rate": 0.0004131102962479517,
      "loss": 3.0689,
      "step": 86849
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7837787866592407,
      "learning_rate": 0.0004131065075616664,
      "loss": 3.1852,
      "step": 86850
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9698199033737183,
      "learning_rate": 0.0004131027188543525,
      "loss": 2.9951,
      "step": 86851
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6817727088928223,
      "learning_rate": 0.0004130989301260107,
      "loss": 2.9878,
      "step": 86852
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.3086342811584473,
      "learning_rate": 0.0004130951413766417,
      "loss": 2.92,
      "step": 86853
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5061619281768799,
      "learning_rate": 0.0004130913526062461,
      "loss": 3.1702,
      "step": 86854
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7267026901245117,
      "learning_rate": 0.0004130875638148247,
      "loss": 3.155,
      "step": 86855
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.592949628829956,
      "learning_rate": 0.0004130837750023783,
      "loss": 2.8668,
      "step": 86856
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8546391725540161,
      "learning_rate": 0.00041307998616890734,
      "loss": 2.9967,
      "step": 86857
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6175941228866577,
      "learning_rate": 0.00041307619731441276,
      "loss": 3.119,
      "step": 86858
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4577456712722778,
      "learning_rate": 0.0004130724084388952,
      "loss": 2.8872,
      "step": 86859
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7685672044754028,
      "learning_rate": 0.0004130686195423553,
      "loss": 2.9643,
      "step": 86860
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9312657117843628,
      "learning_rate": 0.0004130648306247938,
      "loss": 2.9897,
      "step": 86861
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6217973232269287,
      "learning_rate": 0.0004130610416862114,
      "loss": 2.9949,
      "step": 86862
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.177992582321167,
      "learning_rate": 0.00041305725272660883,
      "loss": 2.856,
      "step": 86863
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8076651096343994,
      "learning_rate": 0.0004130534637459868,
      "loss": 2.8911,
      "step": 86864
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.824562430381775,
      "learning_rate": 0.00041304967474434603,
      "loss": 3.2363,
      "step": 86865
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.952746033668518,
      "learning_rate": 0.00041304588572168714,
      "loss": 3.1288,
      "step": 86866
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8421220779418945,
      "learning_rate": 0.00041304209667801094,
      "loss": 2.959,
      "step": 86867
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1992640495300293,
      "learning_rate": 0.000413038307613318,
      "loss": 3.0216,
      "step": 86868
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8604146242141724,
      "learning_rate": 0.0004130345185276091,
      "loss": 3.1229,
      "step": 86869
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.130018949508667,
      "learning_rate": 0.0004130307294208851,
      "loss": 3.0452,
      "step": 86870
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.147355318069458,
      "learning_rate": 0.00041302694029314645,
      "loss": 3.1735,
      "step": 86871
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.404945135116577,
      "learning_rate": 0.0004130231511443939,
      "loss": 3.1261,
      "step": 86872
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.067371129989624,
      "learning_rate": 0.0004130193619746283,
      "loss": 3.0528,
      "step": 86873
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5201530456542969,
      "learning_rate": 0.0004130155727838502,
      "loss": 3.1214,
      "step": 86874
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.3117752075195312,
      "learning_rate": 0.0004130117835720604,
      "loss": 3.1972,
      "step": 86875
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.6613264083862305,
      "learning_rate": 0.0004130079943392597,
      "loss": 3.0639,
      "step": 86876
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.891492247581482,
      "learning_rate": 0.00041300420508544853,
      "loss": 2.9187,
      "step": 86877
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.3360559940338135,
      "learning_rate": 0.00041300041581062775,
      "loss": 2.9073,
      "step": 86878
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.4367456436157227,
      "learning_rate": 0.0004129966265147982,
      "loss": 2.9046,
      "step": 86879
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.7066125869750977,
      "learning_rate": 0.00041299283719796026,
      "loss": 2.987,
      "step": 86880
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7767118215560913,
      "learning_rate": 0.0004129890478601149,
      "loss": 3.021,
      "step": 86881
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6374421119689941,
      "learning_rate": 0.0004129852585012628,
      "loss": 2.9463,
      "step": 86882
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.2487311363220215,
      "learning_rate": 0.0004129814691214045,
      "loss": 2.8863,
      "step": 86883
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.7305893898010254,
      "learning_rate": 0.0004129776797205409,
      "loss": 3.0598,
      "step": 86884
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.583671808242798,
      "learning_rate": 0.0004129738902986726,
      "loss": 2.8532,
      "step": 86885
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7890511751174927,
      "learning_rate": 0.00041297010085580025,
      "loss": 2.851,
      "step": 86886
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.0631320476531982,
      "learning_rate": 0.0004129663113919247,
      "loss": 3.1833,
      "step": 86887
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.688560962677002,
      "learning_rate": 0.00041296252190704656,
      "loss": 3.098,
      "step": 86888
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1808993816375732,
      "learning_rate": 0.00041295873240116657,
      "loss": 3.0305,
      "step": 86889
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.056105613708496,
      "learning_rate": 0.0004129549428742853,
      "loss": 3.0018,
      "step": 86890
    },
    {
      "epoch": 1.13,
      "grad_norm": 4.176374435424805,
      "learning_rate": 0.00041295115332640376,
      "loss": 3.0182,
      "step": 86891
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.408379077911377,
      "learning_rate": 0.00041294736375752236,
      "loss": 2.9603,
      "step": 86892
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.392700672149658,
      "learning_rate": 0.0004129435741676419,
      "loss": 2.9811,
      "step": 86893
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7089647054672241,
      "learning_rate": 0.0004129397845567632,
      "loss": 3.035,
      "step": 86894
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8160042762756348,
      "learning_rate": 0.00041293599492488674,
      "loss": 3.1245,
      "step": 86895
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.866626739501953,
      "learning_rate": 0.0004129322052720134,
      "loss": 3.0277,
      "step": 86896
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8417526483535767,
      "learning_rate": 0.00041292841559814386,
      "loss": 3.0967,
      "step": 86897
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6911325454711914,
      "learning_rate": 0.00041292462590327876,
      "loss": 2.8693,
      "step": 86898
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7886000871658325,
      "learning_rate": 0.00041292083618741884,
      "loss": 3.0475,
      "step": 86899
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.9565749168395996,
      "learning_rate": 0.0004129170464505649,
      "loss": 3.082,
      "step": 86900
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.565524101257324,
      "learning_rate": 0.0004129132566927174,
      "loss": 2.8402,
      "step": 86901
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5910465717315674,
      "learning_rate": 0.0004129094669138773,
      "loss": 2.9362,
      "step": 86902
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0167694091796875,
      "learning_rate": 0.0004129056771140452,
      "loss": 2.948,
      "step": 86903
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.4347915649414062,
      "learning_rate": 0.00041290188729322167,
      "loss": 3.0844,
      "step": 86904
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.4816431999206543,
      "learning_rate": 0.00041289809745140765,
      "loss": 2.8189,
      "step": 86905
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8287787437438965,
      "learning_rate": 0.00041289430758860375,
      "loss": 3.0155,
      "step": 86906
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.852642774581909,
      "learning_rate": 0.0004128905177048106,
      "loss": 3.0504,
      "step": 86907
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.7633891105651855,
      "learning_rate": 0.000412886727800029,
      "loss": 3.0838,
      "step": 86908
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0870490074157715,
      "learning_rate": 0.00041288293787425975,
      "loss": 3.1072,
      "step": 86909
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9067246913909912,
      "learning_rate": 0.0004128791479275033,
      "loss": 2.9869,
      "step": 86910
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.5872902870178223,
      "learning_rate": 0.00041287535795976046,
      "loss": 2.9344,
      "step": 86911
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1928791999816895,
      "learning_rate": 0.000412871567971032,
      "loss": 2.845,
      "step": 86912
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.4064722061157227,
      "learning_rate": 0.00041286777796131866,
      "loss": 2.9122,
      "step": 86913
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.108761787414551,
      "learning_rate": 0.000412863987930621,
      "loss": 2.9129,
      "step": 86914
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.213869571685791,
      "learning_rate": 0.00041286019787893977,
      "loss": 2.9054,
      "step": 86915
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.885704755783081,
      "learning_rate": 0.0004128564078062758,
      "loss": 2.8556,
      "step": 86916
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.412611246109009,
      "learning_rate": 0.0004128526177126296,
      "loss": 3.0615,
      "step": 86917
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.9426186084747314,
      "learning_rate": 0.00041284882759800196,
      "loss": 3.2389,
      "step": 86918
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.0544486045837402,
      "learning_rate": 0.00041284503746239366,
      "loss": 3.0455,
      "step": 86919
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6684430837631226,
      "learning_rate": 0.00041284124730580523,
      "loss": 3.1284,
      "step": 86920
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.3203413486480713,
      "learning_rate": 0.00041283745712823754,
      "loss": 2.8165,
      "step": 86921
    },
    {
      "epoch": 1.13,
      "grad_norm": 4.1386213302612305,
      "learning_rate": 0.00041283366692969136,
      "loss": 3.0012,
      "step": 86922
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.6206743717193604,
      "learning_rate": 0.0004128298767101671,
      "loss": 2.9894,
      "step": 86923
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1661503314971924,
      "learning_rate": 0.00041282608646966566,
      "loss": 3.0516,
      "step": 86924
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7969104051589966,
      "learning_rate": 0.00041282229620818783,
      "loss": 3.1614,
      "step": 86925
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.623929023742676,
      "learning_rate": 0.00041281850592573416,
      "loss": 3.0866,
      "step": 86926
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.3464252948760986,
      "learning_rate": 0.0004128147156223053,
      "loss": 2.8881,
      "step": 86927
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.266899585723877,
      "learning_rate": 0.00041281092529790216,
      "loss": 2.9898,
      "step": 86928
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7916615009307861,
      "learning_rate": 0.00041280713495252534,
      "loss": 3.0786,
      "step": 86929
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8750213384628296,
      "learning_rate": 0.00041280334458617546,
      "loss": 3.0627,
      "step": 86930
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8126943111419678,
      "learning_rate": 0.0004127995541988534,
      "loss": 3.0132,
      "step": 86931
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.7026422023773193,
      "learning_rate": 0.00041279576379055975,
      "loss": 2.8814,
      "step": 86932
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.6587579250335693,
      "learning_rate": 0.0004127919733612952,
      "loss": 2.9908,
      "step": 86933
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.2542402744293213,
      "learning_rate": 0.00041278818291106057,
      "loss": 2.872,
      "step": 86934
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.2227730751037598,
      "learning_rate": 0.00041278439243985635,
      "loss": 3.1998,
      "step": 86935
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.618051290512085,
      "learning_rate": 0.0004127806019476835,
      "loss": 2.9598,
      "step": 86936
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9569655656814575,
      "learning_rate": 0.0004127768114345426,
      "loss": 2.7495,
      "step": 86937
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6542973518371582,
      "learning_rate": 0.00041277302090043434,
      "loss": 2.8332,
      "step": 86938
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.6662440299987793,
      "learning_rate": 0.0004127692303453594,
      "loss": 2.7988,
      "step": 86939
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.2534067630767822,
      "learning_rate": 0.0004127654397693185,
      "loss": 3.0674,
      "step": 86940
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.8169515132904053,
      "learning_rate": 0.0004127616491723125,
      "loss": 2.7875,
      "step": 86941
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7634546756744385,
      "learning_rate": 0.0004127578585543419,
      "loss": 3.1818,
      "step": 86942
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.690568208694458,
      "learning_rate": 0.0004127540679154076,
      "loss": 2.9598,
      "step": 86943
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.352156639099121,
      "learning_rate": 0.00041275027725551003,
      "loss": 2.8966,
      "step": 86944
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.272315740585327,
      "learning_rate": 0.0004127464865746502,
      "loss": 3.0721,
      "step": 86945
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.984341025352478,
      "learning_rate": 0.00041274269587282856,
      "loss": 3.1315,
      "step": 86946
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.745373487472534,
      "learning_rate": 0.00041273890515004596,
      "loss": 3.1255,
      "step": 86947
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.7925736904144287,
      "learning_rate": 0.00041273511440630313,
      "loss": 3.0501,
      "step": 86948
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1772704124450684,
      "learning_rate": 0.0004127313236416006,
      "loss": 3.0245,
      "step": 86949
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7724499702453613,
      "learning_rate": 0.0004127275328559392,
      "loss": 3.1913,
      "step": 86950
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.9748756885528564,
      "learning_rate": 0.0004127237420493197,
      "loss": 2.9632,
      "step": 86951
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.725115418434143,
      "learning_rate": 0.0004127199512217428,
      "loss": 2.8013,
      "step": 86952
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1952357292175293,
      "learning_rate": 0.00041271616037320895,
      "loss": 2.8936,
      "step": 86953
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.626645803451538,
      "learning_rate": 0.0004127123695037191,
      "loss": 2.9137,
      "step": 86954
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.144258737564087,
      "learning_rate": 0.00041270857861327396,
      "loss": 2.8245,
      "step": 86955
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.156306505203247,
      "learning_rate": 0.0004127047877018741,
      "loss": 3.0443,
      "step": 86956
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6221798658370972,
      "learning_rate": 0.00041270099676952033,
      "loss": 3.038,
      "step": 86957
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9747962951660156,
      "learning_rate": 0.0004126972058162133,
      "loss": 2.9585,
      "step": 86958
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0681190490722656,
      "learning_rate": 0.00041269341484195376,
      "loss": 3.089,
      "step": 86959
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4753776788711548,
      "learning_rate": 0.0004126896238467423,
      "loss": 3.1267,
      "step": 86960
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6632747650146484,
      "learning_rate": 0.0004126858328305799,
      "loss": 2.9425,
      "step": 86961
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6055283546447754,
      "learning_rate": 0.0004126820417934669,
      "loss": 2.9422,
      "step": 86962
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5317567586898804,
      "learning_rate": 0.00041267825073540426,
      "loss": 3.1452,
      "step": 86963
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.6654765605926514,
      "learning_rate": 0.00041267445965639257,
      "loss": 3.0842,
      "step": 86964
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.283226490020752,
      "learning_rate": 0.00041267066855643256,
      "loss": 3.0053,
      "step": 86965
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6687350273132324,
      "learning_rate": 0.00041266687743552496,
      "loss": 3.0294,
      "step": 86966
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5795772075653076,
      "learning_rate": 0.0004126630862936705,
      "loss": 3.1958,
      "step": 86967
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.3993699550628662,
      "learning_rate": 0.00041265929513086984,
      "loss": 2.9088,
      "step": 86968
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.9569921493530273,
      "learning_rate": 0.0004126555039471236,
      "loss": 3.125,
      "step": 86969
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.041597366333008,
      "learning_rate": 0.0004126517127424327,
      "loss": 3.0515,
      "step": 86970
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.831250548362732,
      "learning_rate": 0.0004126479215167976,
      "loss": 3.0986,
      "step": 86971
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.614011287689209,
      "learning_rate": 0.0004126441302702192,
      "loss": 2.9211,
      "step": 86972
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.162087917327881,
      "learning_rate": 0.0004126403390026981,
      "loss": 3.317,
      "step": 86973
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0162699222564697,
      "learning_rate": 0.0004126365477142351,
      "loss": 2.7592,
      "step": 86974
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9910892248153687,
      "learning_rate": 0.0004126327564048307,
      "loss": 2.8639,
      "step": 86975
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9718049764633179,
      "learning_rate": 0.00041262896507448594,
      "loss": 3.2303,
      "step": 86976
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6376361846923828,
      "learning_rate": 0.00041262517372320115,
      "loss": 2.8823,
      "step": 86977
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8169559240341187,
      "learning_rate": 0.00041262138235097733,
      "loss": 3.0763,
      "step": 86978
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.654375433921814,
      "learning_rate": 0.00041261759095781496,
      "loss": 3.0343,
      "step": 86979
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.6801741123199463,
      "learning_rate": 0.0004126137995437149,
      "loss": 2.8359,
      "step": 86980
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9131851196289062,
      "learning_rate": 0.0004126100081086779,
      "loss": 2.7783,
      "step": 86981
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9578006267547607,
      "learning_rate": 0.00041260621665270444,
      "loss": 2.7793,
      "step": 86982
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.727597951889038,
      "learning_rate": 0.0004126024251757955,
      "loss": 3.1932,
      "step": 86983
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.661025285720825,
      "learning_rate": 0.0004125986336779515,
      "loss": 2.9671,
      "step": 86984
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.857287883758545,
      "learning_rate": 0.00041259484215917336,
      "loss": 2.9493,
      "step": 86985
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5175827741622925,
      "learning_rate": 0.0004125910506194617,
      "loss": 2.8975,
      "step": 86986
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0555803775787354,
      "learning_rate": 0.00041258725905881727,
      "loss": 3.1098,
      "step": 86987
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5283818244934082,
      "learning_rate": 0.0004125834674772407,
      "loss": 3.0168,
      "step": 86988
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5131114721298218,
      "learning_rate": 0.0004125796758747328,
      "loss": 2.8872,
      "step": 86989
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5763221979141235,
      "learning_rate": 0.00041257588425129413,
      "loss": 3.0382,
      "step": 86990
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8506954908370972,
      "learning_rate": 0.0004125720926069255,
      "loss": 3.0138,
      "step": 86991
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9843559265136719,
      "learning_rate": 0.0004125683009416276,
      "loss": 2.9836,
      "step": 86992
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8710542917251587,
      "learning_rate": 0.00041256450925540117,
      "loss": 3.143,
      "step": 86993
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.431136131286621,
      "learning_rate": 0.00041256071754824686,
      "loss": 2.8253,
      "step": 86994
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.610610008239746,
      "learning_rate": 0.0004125569258201654,
      "loss": 3.1001,
      "step": 86995
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.6087872982025146,
      "learning_rate": 0.0004125531340711574,
      "loss": 3.06,
      "step": 86996
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.3353890180587769,
      "learning_rate": 0.00041254934230122367,
      "loss": 3.1154,
      "step": 86997
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1402769088745117,
      "learning_rate": 0.00041254555051036494,
      "loss": 3.034,
      "step": 86998
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8854776620864868,
      "learning_rate": 0.00041254175869858187,
      "loss": 3.196,
      "step": 86999
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.036219835281372,
      "learning_rate": 0.0004125379668658751,
      "loss": 2.8335,
      "step": 87000
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.878670573234558,
      "learning_rate": 0.00041253417501224546,
      "loss": 2.949,
      "step": 87001
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.481796145439148,
      "learning_rate": 0.0004125303831376936,
      "loss": 2.9586,
      "step": 87002
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.4256577491760254,
      "learning_rate": 0.0004125265912422202,
      "loss": 2.9534,
      "step": 87003
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.7438602447509766,
      "learning_rate": 0.000412522799325826,
      "loss": 2.8264,
      "step": 87004
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1906914710998535,
      "learning_rate": 0.00041251900738851165,
      "loss": 3.0081,
      "step": 87005
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6457045078277588,
      "learning_rate": 0.0004125152154302778,
      "loss": 3.0367,
      "step": 87006
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.133622884750366,
      "learning_rate": 0.00041251142345112546,
      "loss": 2.8381,
      "step": 87007
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5657222270965576,
      "learning_rate": 0.00041250763145105496,
      "loss": 3.0071,
      "step": 87008
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9356149435043335,
      "learning_rate": 0.0004125038394300672,
      "loss": 2.8775,
      "step": 87009
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6525700092315674,
      "learning_rate": 0.00041250004738816294,
      "loss": 3.2528,
      "step": 87010
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6954714059829712,
      "learning_rate": 0.00041249625532534267,
      "loss": 2.7785,
      "step": 87011
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5609002113342285,
      "learning_rate": 0.0004124924632416073,
      "loss": 2.7319,
      "step": 87012
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.621287226676941,
      "learning_rate": 0.0004124886711369575,
      "loss": 2.9933,
      "step": 87013
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8817024230957031,
      "learning_rate": 0.0004124848790113938,
      "loss": 2.7894,
      "step": 87014
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6186553239822388,
      "learning_rate": 0.0004124810868649171,
      "loss": 3.0434,
      "step": 87015
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.835374355316162,
      "learning_rate": 0.0004124772946975281,
      "loss": 3.2905,
      "step": 87016
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.102987289428711,
      "learning_rate": 0.00041247350250922743,
      "loss": 2.9257,
      "step": 87017
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9354884624481201,
      "learning_rate": 0.00041246971030001574,
      "loss": 3.2074,
      "step": 87018
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5970442295074463,
      "learning_rate": 0.0004124659180698939,
      "loss": 3.1144,
      "step": 87019
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.832757830619812,
      "learning_rate": 0.00041246212581886243,
      "loss": 3.0572,
      "step": 87020
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.748374104499817,
      "learning_rate": 0.00041245833354692217,
      "loss": 3.0716,
      "step": 87021
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6308478116989136,
      "learning_rate": 0.0004124545412540738,
      "loss": 3.0578,
      "step": 87022
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8369152545928955,
      "learning_rate": 0.000412450748940318,
      "loss": 3.0055,
      "step": 87023
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7791762351989746,
      "learning_rate": 0.0004124469566056555,
      "loss": 3.1044,
      "step": 87024
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8692288398742676,
      "learning_rate": 0.0004124431642500869,
      "loss": 2.8627,
      "step": 87025
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.01533842086792,
      "learning_rate": 0.00041243937187361313,
      "loss": 3.0985,
      "step": 87026
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7331324815750122,
      "learning_rate": 0.0004124355794762347,
      "loss": 3.0785,
      "step": 87027
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4247466325759888,
      "learning_rate": 0.00041243178705795234,
      "loss": 3.1782,
      "step": 87028
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7954694032669067,
      "learning_rate": 0.00041242799461876684,
      "loss": 3.1865,
      "step": 87029
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.373915672302246,
      "learning_rate": 0.00041242420215867873,
      "loss": 2.9566,
      "step": 87030
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.4386050701141357,
      "learning_rate": 0.00041242040967768895,
      "loss": 2.8734,
      "step": 87031
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6086045503616333,
      "learning_rate": 0.000412416617175798,
      "loss": 2.8418,
      "step": 87032
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9554673433303833,
      "learning_rate": 0.00041241282465300687,
      "loss": 2.8624,
      "step": 87033
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.29931640625,
      "learning_rate": 0.0004124090321093159,
      "loss": 2.8751,
      "step": 87034
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.842700719833374,
      "learning_rate": 0.000412405239544726,
      "loss": 3.1921,
      "step": 87035
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5802252292633057,
      "learning_rate": 0.0004124014469592379,
      "loss": 2.9707,
      "step": 87036
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4128068685531616,
      "learning_rate": 0.00041239765435285223,
      "loss": 2.9863,
      "step": 87037
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.149837017059326,
      "learning_rate": 0.0004123938617255697,
      "loss": 3.1454,
      "step": 87038
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6188457012176514,
      "learning_rate": 0.000412390069077391,
      "loss": 3.1627,
      "step": 87039
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7990636825561523,
      "learning_rate": 0.0004123862764083169,
      "loss": 2.7847,
      "step": 87040
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8160547018051147,
      "learning_rate": 0.0004123824837183481,
      "loss": 2.793,
      "step": 87041
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6348562240600586,
      "learning_rate": 0.0004123786910074852,
      "loss": 3.2158,
      "step": 87042
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1167283058166504,
      "learning_rate": 0.00041237489827572905,
      "loss": 2.9369,
      "step": 87043
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8142298460006714,
      "learning_rate": 0.00041237110552308026,
      "loss": 3.1781,
      "step": 87044
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.621350646018982,
      "learning_rate": 0.00041236731274953955,
      "loss": 2.9217,
      "step": 87045
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8946181535720825,
      "learning_rate": 0.0004123635199551076,
      "loss": 2.9377,
      "step": 87046
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9911803007125854,
      "learning_rate": 0.0004123597271397853,
      "loss": 3.2124,
      "step": 87047
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.118915319442749,
      "learning_rate": 0.00041235593430357303,
      "loss": 2.9935,
      "step": 87048
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8458460569381714,
      "learning_rate": 0.0004123521414464717,
      "loss": 2.9947,
      "step": 87049
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.2025094032287598,
      "learning_rate": 0.0004123483485684821,
      "loss": 3.1037,
      "step": 87050
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.964791178703308,
      "learning_rate": 0.0004123445556696047,
      "loss": 2.8755,
      "step": 87051
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7499443292617798,
      "learning_rate": 0.0004123407627498404,
      "loss": 3.1593,
      "step": 87052
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8002547025680542,
      "learning_rate": 0.00041233696980918984,
      "loss": 3.1257,
      "step": 87053
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.613843560218811,
      "learning_rate": 0.0004123331768476537,
      "loss": 3.121,
      "step": 87054
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7540563344955444,
      "learning_rate": 0.00041232938386523264,
      "loss": 2.6859,
      "step": 87055
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8057483434677124,
      "learning_rate": 0.00041232559086192753,
      "loss": 3.0155,
      "step": 87056
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8756715059280396,
      "learning_rate": 0.00041232179783773885,
      "loss": 2.8546,
      "step": 87057
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5158685445785522,
      "learning_rate": 0.0004123180047926675,
      "loss": 3.1328,
      "step": 87058
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7533026933670044,
      "learning_rate": 0.0004123142117267141,
      "loss": 3.0679,
      "step": 87059
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.3029563426971436,
      "learning_rate": 0.0004123104186398794,
      "loss": 2.8704,
      "step": 87060
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.953366994857788,
      "learning_rate": 0.000412306625532164,
      "loss": 2.8565,
      "step": 87061
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.925734519958496,
      "learning_rate": 0.0004123028324035688,
      "loss": 2.8161,
      "step": 87062
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9035663604736328,
      "learning_rate": 0.0004122990392540943,
      "loss": 2.8341,
      "step": 87063
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.4703078269958496,
      "learning_rate": 0.0004122952460837413,
      "loss": 3.0654,
      "step": 87064
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8975979089736938,
      "learning_rate": 0.0004122914528925105,
      "loss": 3.0601,
      "step": 87065
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9261170625686646,
      "learning_rate": 0.00041228765968040254,
      "loss": 3.1103,
      "step": 87066
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6505171060562134,
      "learning_rate": 0.0004122838664474182,
      "loss": 3.3108,
      "step": 87067
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8359445333480835,
      "learning_rate": 0.00041228007319355826,
      "loss": 2.7037,
      "step": 87068
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4354349374771118,
      "learning_rate": 0.0004122762799188233,
      "loss": 3.0638,
      "step": 87069
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8699332475662231,
      "learning_rate": 0.000412272486623214,
      "loss": 2.7399,
      "step": 87070
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8311424255371094,
      "learning_rate": 0.0004122686933067312,
      "loss": 2.8769,
      "step": 87071
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6478569507598877,
      "learning_rate": 0.0004122648999693754,
      "loss": 3.1214,
      "step": 87072
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8961471319198608,
      "learning_rate": 0.00041226110661114757,
      "loss": 3.1037,
      "step": 87073
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8490266799926758,
      "learning_rate": 0.0004122573132320483,
      "loss": 3.0012,
      "step": 87074
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.848911166191101,
      "learning_rate": 0.0004122535198320782,
      "loss": 2.7335,
      "step": 87075
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1339621543884277,
      "learning_rate": 0.0004122497264112381,
      "loss": 2.9806,
      "step": 87076
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.050335168838501,
      "learning_rate": 0.0004122459329695286,
      "loss": 2.9364,
      "step": 87077
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6740782260894775,
      "learning_rate": 0.00041224213950695053,
      "loss": 2.8912,
      "step": 87078
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8044835329055786,
      "learning_rate": 0.00041223834602350444,
      "loss": 3.0452,
      "step": 87079
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.737316608428955,
      "learning_rate": 0.00041223455251919124,
      "loss": 3.2119,
      "step": 87080
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.563007116317749,
      "learning_rate": 0.00041223075899401144,
      "loss": 3.0903,
      "step": 87081
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.253551483154297,
      "learning_rate": 0.00041222696544796573,
      "loss": 2.8694,
      "step": 87082
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.2125487327575684,
      "learning_rate": 0.00041222317188105515,
      "loss": 2.915,
      "step": 87083
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8391612768173218,
      "learning_rate": 0.00041221937829327997,
      "loss": 2.9452,
      "step": 87084
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5178966522216797,
      "learning_rate": 0.00041221558468464104,
      "loss": 2.9761,
      "step": 87085
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7471739053726196,
      "learning_rate": 0.0004122117910551393,
      "loss": 3.0453,
      "step": 87086
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.283642292022705,
      "learning_rate": 0.0004122079974047752,
      "loss": 3.0667,
      "step": 87087
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9564098119735718,
      "learning_rate": 0.0004122042037335495,
      "loss": 3.115,
      "step": 87088
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7234220504760742,
      "learning_rate": 0.0004122004100414629,
      "loss": 3.2132,
      "step": 87089
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.1344892978668213,
      "learning_rate": 0.0004121966163285162,
      "loss": 2.9042,
      "step": 87090
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.639115333557129,
      "learning_rate": 0.00041219282259471,
      "loss": 3.11,
      "step": 87091
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5978580713272095,
      "learning_rate": 0.000412189028840045,
      "loss": 3.1877,
      "step": 87092
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6780595779418945,
      "learning_rate": 0.00041218523506452196,
      "loss": 3.1318,
      "step": 87093
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9585719108581543,
      "learning_rate": 0.0004121814412681415,
      "loss": 2.8222,
      "step": 87094
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.289454221725464,
      "learning_rate": 0.00041217764745090453,
      "loss": 3.1667,
      "step": 87095
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.778084635734558,
      "learning_rate": 0.0004121738536128115,
      "loss": 3.1597,
      "step": 87096
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6014903783798218,
      "learning_rate": 0.00041217005975386327,
      "loss": 3.1024,
      "step": 87097
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9103891849517822,
      "learning_rate": 0.00041216626587406056,
      "loss": 3.0983,
      "step": 87098
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0118250846862793,
      "learning_rate": 0.000412162471973404,
      "loss": 2.9285,
      "step": 87099
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.46697199344635,
      "learning_rate": 0.00041215867805189423,
      "loss": 3.0119,
      "step": 87100
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6184418201446533,
      "learning_rate": 0.0004121548841095322,
      "loss": 2.9352,
      "step": 87101
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5865386724472046,
      "learning_rate": 0.00041215109014631834,
      "loss": 2.9278,
      "step": 87102
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5603604316711426,
      "learning_rate": 0.0004121472961622535,
      "loss": 3.1462,
      "step": 87103
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6202974319458008,
      "learning_rate": 0.0004121435021573384,
      "loss": 2.8681,
      "step": 87104
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9097758531570435,
      "learning_rate": 0.0004121397081315736,
      "loss": 2.9532,
      "step": 87105
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7525455951690674,
      "learning_rate": 0.00041213591408496,
      "loss": 2.8793,
      "step": 87106
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5861793756484985,
      "learning_rate": 0.0004121321200174983,
      "loss": 3.0836,
      "step": 87107
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9523707628250122,
      "learning_rate": 0.000412128325929189,
      "loss": 3.0636,
      "step": 87108
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.371872901916504,
      "learning_rate": 0.000412124531820033,
      "loss": 2.8064,
      "step": 87109
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5260850191116333,
      "learning_rate": 0.0004121207376900309,
      "loss": 3.0979,
      "step": 87110
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6006659269332886,
      "learning_rate": 0.0004121169435391834,
      "loss": 3.019,
      "step": 87111
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6729713678359985,
      "learning_rate": 0.0004121131493674913,
      "loss": 3.0325,
      "step": 87112
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5497417449951172,
      "learning_rate": 0.0004121093551749551,
      "loss": 2.7861,
      "step": 87113
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.0017545223236084,
      "learning_rate": 0.0004121055609615758,
      "loss": 3.2674,
      "step": 87114
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.969527244567871,
      "learning_rate": 0.000412101766727354,
      "loss": 2.9917,
      "step": 87115
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8294273614883423,
      "learning_rate": 0.0004120979724722903,
      "loss": 3.1688,
      "step": 87116
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6291959285736084,
      "learning_rate": 0.0004120941781963855,
      "loss": 2.7383,
      "step": 87117
    },
    {
      "epoch": 1.13,
      "grad_norm": 4.141024589538574,
      "learning_rate": 0.00041209038389964015,
      "loss": 3.0761,
      "step": 87118
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.25512433052063,
      "learning_rate": 0.00041208658958205525,
      "loss": 3.0072,
      "step": 87119
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.743096351623535,
      "learning_rate": 0.00041208279524363123,
      "loss": 3.0558,
      "step": 87120
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.202662944793701,
      "learning_rate": 0.000412079000884369,
      "loss": 2.9339,
      "step": 87121
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.533264636993408,
      "learning_rate": 0.00041207520650426906,
      "loss": 2.8346,
      "step": 87122
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.9974782466888428,
      "learning_rate": 0.0004120714121033322,
      "loss": 2.9648,
      "step": 87123
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9103827476501465,
      "learning_rate": 0.00041206761768155923,
      "loss": 2.9829,
      "step": 87124
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.048238754272461,
      "learning_rate": 0.00041206382323895075,
      "loss": 3.3024,
      "step": 87125
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.44569730758667,
      "learning_rate": 0.00041206002877550754,
      "loss": 3.0742,
      "step": 87126
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.4052743911743164,
      "learning_rate": 0.0004120562342912302,
      "loss": 2.9814,
      "step": 87127
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7004013061523438,
      "learning_rate": 0.00041205243978611957,
      "loss": 2.8171,
      "step": 87128
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.095163583755493,
      "learning_rate": 0.00041204864526017617,
      "loss": 2.9079,
      "step": 87129
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.151895523071289,
      "learning_rate": 0.00041204485071340085,
      "loss": 2.8025,
      "step": 87130
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5499526262283325,
      "learning_rate": 0.0004120410561457943,
      "loss": 2.8106,
      "step": 87131
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5399868488311768,
      "learning_rate": 0.00041203726155735723,
      "loss": 2.9814,
      "step": 87132
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.624819040298462,
      "learning_rate": 0.00041203346694809024,
      "loss": 3.361,
      "step": 87133
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.044218063354492,
      "learning_rate": 0.00041202967231799415,
      "loss": 3.2019,
      "step": 87134
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9776054620742798,
      "learning_rate": 0.00041202587766706966,
      "loss": 2.8658,
      "step": 87135
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.720089316368103,
      "learning_rate": 0.0004120220829953174,
      "loss": 3.2459,
      "step": 87136
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8043098449707031,
      "learning_rate": 0.00041201828830273807,
      "loss": 2.9647,
      "step": 87137
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9122309684753418,
      "learning_rate": 0.0004120144935893326,
      "loss": 2.9439,
      "step": 87138
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.153719902038574,
      "learning_rate": 0.0004120106988551013,
      "loss": 2.87,
      "step": 87139
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8940829038619995,
      "learning_rate": 0.00041200690410004523,
      "loss": 3.1905,
      "step": 87140
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6998385190963745,
      "learning_rate": 0.00041200310932416493,
      "loss": 3.3024,
      "step": 87141
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6528507471084595,
      "learning_rate": 0.0004119993145274612,
      "loss": 3.1729,
      "step": 87142
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.288698673248291,
      "learning_rate": 0.0004119955197099346,
      "loss": 2.9274,
      "step": 87143
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6597508192062378,
      "learning_rate": 0.000411991724871586,
      "loss": 3.0378,
      "step": 87144
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.6091768741607666,
      "learning_rate": 0.0004119879300124159,
      "loss": 3.1808,
      "step": 87145
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6596403121948242,
      "learning_rate": 0.0004119841351324252,
      "loss": 2.9344,
      "step": 87146
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6696336269378662,
      "learning_rate": 0.00041198034023161463,
      "loss": 2.8184,
      "step": 87147
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.0619919300079346,
      "learning_rate": 0.0004119765453099847,
      "loss": 3.1135,
      "step": 87148
    },
    {
      "epoch": 1.13,
      "grad_norm": 3.1301259994506836,
      "learning_rate": 0.0004119727503675362,
      "loss": 3.1072,
      "step": 87149
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4965916872024536,
      "learning_rate": 0.0004119689554042699,
      "loss": 3.1872,
      "step": 87150
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4784064292907715,
      "learning_rate": 0.00041196516042018644,
      "loss": 3.0461,
      "step": 87151
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.954296588897705,
      "learning_rate": 0.00041196136541528654,
      "loss": 2.9337,
      "step": 87152
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9095882177352905,
      "learning_rate": 0.00041195757038957093,
      "loss": 3.2804,
      "step": 87153
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7689265012741089,
      "learning_rate": 0.00041195377534304026,
      "loss": 3.0789,
      "step": 87154
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.826591968536377,
      "learning_rate": 0.00041194998027569525,
      "loss": 3.2435,
      "step": 87155
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.421882390975952,
      "learning_rate": 0.0004119461851875367,
      "loss": 2.9675,
      "step": 87156
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.4725761413574219,
      "learning_rate": 0.0004119423900785652,
      "loss": 2.9072,
      "step": 87157
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.871458649635315,
      "learning_rate": 0.00041193859494878147,
      "loss": 2.8571,
      "step": 87158
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.6613519191741943,
      "learning_rate": 0.0004119347997981863,
      "loss": 3.1231,
      "step": 87159
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.372074842453003,
      "learning_rate": 0.0004119310046267803,
      "loss": 3.1503,
      "step": 87160
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.7761187553405762,
      "learning_rate": 0.0004119272094345642,
      "loss": 2.9796,
      "step": 87161
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.057722568511963,
      "learning_rate": 0.00041192341422153876,
      "loss": 2.9364,
      "step": 87162
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.5297160148620605,
      "learning_rate": 0.0004119196189877046,
      "loss": 3.1595,
      "step": 87163
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.0488903522491455,
      "learning_rate": 0.0004119158237330625,
      "loss": 3.1103,
      "step": 87164
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.986954689025879,
      "learning_rate": 0.0004119120284576132,
      "loss": 2.8053,
      "step": 87165
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.2064051628112793,
      "learning_rate": 0.00041190823316135725,
      "loss": 3.0782,
      "step": 87166
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.8236470222473145,
      "learning_rate": 0.0004119044378442954,
      "loss": 3.1202,
      "step": 87167
    },
    {
      "epoch": 1.13,
      "grad_norm": 2.9133284091949463,
      "learning_rate": 0.00041190064250642847,
      "loss": 3.0107,
      "step": 87168
    },
    {
      "epoch": 1.13,
      "grad_norm": 1.9542632102966309,
      "learning_rate": 0.00041189684714775714,
      "loss": 3.1185,
      "step": 87169
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5996878147125244,
      "learning_rate": 0.000411893051768282,
      "loss": 3.0907,
      "step": 87170
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6085776090621948,
      "learning_rate": 0.0004118892563680039,
      "loss": 3.2624,
      "step": 87171
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.002870798110962,
      "learning_rate": 0.00041188546094692344,
      "loss": 2.9837,
      "step": 87172
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7039514780044556,
      "learning_rate": 0.00041188166550504133,
      "loss": 3.1581,
      "step": 87173
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.764703392982483,
      "learning_rate": 0.00041187787004235836,
      "loss": 3.0172,
      "step": 87174
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9028240442276,
      "learning_rate": 0.0004118740745588752,
      "loss": 2.8447,
      "step": 87175
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1475014686584473,
      "learning_rate": 0.0004118702790545925,
      "loss": 3.1713,
      "step": 87176
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.722772479057312,
      "learning_rate": 0.0004118664835295109,
      "loss": 3.16,
      "step": 87177
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.7148663997650146,
      "learning_rate": 0.00041186268798363136,
      "loss": 2.9894,
      "step": 87178
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.3022775650024414,
      "learning_rate": 0.0004118588924169544,
      "loss": 3.0992,
      "step": 87179
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6466445922851562,
      "learning_rate": 0.0004118550968294807,
      "loss": 3.0476,
      "step": 87180
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.643984794616699,
      "learning_rate": 0.0004118513012212111,
      "loss": 2.8306,
      "step": 87181
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1891884803771973,
      "learning_rate": 0.0004118475055921462,
      "loss": 2.8741,
      "step": 87182
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8810867071151733,
      "learning_rate": 0.00041184370994228666,
      "loss": 3.0332,
      "step": 87183
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1038620471954346,
      "learning_rate": 0.00041183991427163344,
      "loss": 3.0415,
      "step": 87184
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0967509746551514,
      "learning_rate": 0.00041183611858018696,
      "loss": 2.9096,
      "step": 87185
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9041390419006348,
      "learning_rate": 0.000411832322867948,
      "loss": 3.465,
      "step": 87186
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.540748119354248,
      "learning_rate": 0.0004118285271349174,
      "loss": 3.101,
      "step": 87187
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5104398727416992,
      "learning_rate": 0.0004118247313810957,
      "loss": 2.7938,
      "step": 87188
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.066143274307251,
      "learning_rate": 0.00041182093560648364,
      "loss": 2.8137,
      "step": 87189
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5563682317733765,
      "learning_rate": 0.00041181713981108207,
      "loss": 3.1515,
      "step": 87190
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8586019277572632,
      "learning_rate": 0.0004118133439948914,
      "loss": 3.0127,
      "step": 87191
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5843865871429443,
      "learning_rate": 0.00041180954815791263,
      "loss": 3.1747,
      "step": 87192
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6421386003494263,
      "learning_rate": 0.00041180575230014645,
      "loss": 3.1767,
      "step": 87193
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6150010824203491,
      "learning_rate": 0.0004118019564215933,
      "loss": 2.8921,
      "step": 87194
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.998232126235962,
      "learning_rate": 0.0004117981605222542,
      "loss": 2.9326,
      "step": 87195
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.256751537322998,
      "learning_rate": 0.0004117943646021296,
      "loss": 3.0754,
      "step": 87196
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.027120351791382,
      "learning_rate": 0.00041179056866122037,
      "loss": 3.2157,
      "step": 87197
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.970616102218628,
      "learning_rate": 0.0004117867726995271,
      "loss": 3.2101,
      "step": 87198
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7382426261901855,
      "learning_rate": 0.0004117829767170507,
      "loss": 2.9697,
      "step": 87199
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.400339126586914,
      "learning_rate": 0.0004117791807137916,
      "loss": 3.1169,
      "step": 87200
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.0317177772521973,
      "learning_rate": 0.00041177538468975067,
      "loss": 3.1355,
      "step": 87201
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8919031620025635,
      "learning_rate": 0.00041177158864492866,
      "loss": 2.9593,
      "step": 87202
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1054275035858154,
      "learning_rate": 0.0004117677925793261,
      "loss": 2.6372,
      "step": 87203
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.3192546367645264,
      "learning_rate": 0.0004117639964929438,
      "loss": 2.7569,
      "step": 87204
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1654975414276123,
      "learning_rate": 0.00041176020038578257,
      "loss": 3.0735,
      "step": 87205
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5886893272399902,
      "learning_rate": 0.00041175640425784296,
      "loss": 2.9732,
      "step": 87206
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7487603425979614,
      "learning_rate": 0.0004117526081091257,
      "loss": 2.9092,
      "step": 87207
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.5706355571746826,
      "learning_rate": 0.0004117488119396315,
      "loss": 3.2614,
      "step": 87208
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.5481314659118652,
      "learning_rate": 0.00041174501574936116,
      "loss": 2.8751,
      "step": 87209
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0999693870544434,
      "learning_rate": 0.00041174121953831523,
      "loss": 3.0779,
      "step": 87210
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.3071556091308594,
      "learning_rate": 0.00041173742330649453,
      "loss": 2.962,
      "step": 87211
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6196871995925903,
      "learning_rate": 0.00041173362705389975,
      "loss": 3.2067,
      "step": 87212
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0740113258361816,
      "learning_rate": 0.00041172983078053156,
      "loss": 3.0908,
      "step": 87213
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.547102451324463,
      "learning_rate": 0.0004117260344863907,
      "loss": 2.8233,
      "step": 87214
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.613958716392517,
      "learning_rate": 0.0004117222381714778,
      "loss": 3.0506,
      "step": 87215
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4980844259262085,
      "learning_rate": 0.00041171844183579375,
      "loss": 3.0172,
      "step": 87216
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.6130545139312744,
      "learning_rate": 0.00041171464547933907,
      "loss": 2.9473,
      "step": 87217
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9020071029663086,
      "learning_rate": 0.0004117108491021145,
      "loss": 3.1368,
      "step": 87218
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.188668727874756,
      "learning_rate": 0.0004117070527041208,
      "loss": 3.2252,
      "step": 87219
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.760809302330017,
      "learning_rate": 0.0004117032562853587,
      "loss": 2.9724,
      "step": 87220
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9383667707443237,
      "learning_rate": 0.0004116994598458288,
      "loss": 2.9691,
      "step": 87221
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6254054307937622,
      "learning_rate": 0.0004116956633855318,
      "loss": 2.9799,
      "step": 87222
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8511593341827393,
      "learning_rate": 0.0004116918669044686,
      "loss": 3.0178,
      "step": 87223
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8307089805603027,
      "learning_rate": 0.00041168807040263967,
      "loss": 2.9705,
      "step": 87224
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7288284301757812,
      "learning_rate": 0.00041168427388004586,
      "loss": 3.0712,
      "step": 87225
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7548068761825562,
      "learning_rate": 0.00041168047733668785,
      "loss": 3.044,
      "step": 87226
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.005746603012085,
      "learning_rate": 0.0004116766807725663,
      "loss": 3.2273,
      "step": 87227
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9406527280807495,
      "learning_rate": 0.0004116728841876819,
      "loss": 3.0205,
      "step": 87228
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.100496530532837,
      "learning_rate": 0.0004116690875820355,
      "loss": 3.0126,
      "step": 87229
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.498415946960449,
      "learning_rate": 0.00041166529095562767,
      "loss": 3.1205,
      "step": 87230
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.147552490234375,
      "learning_rate": 0.00041166149430845914,
      "loss": 2.9324,
      "step": 87231
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.865378975868225,
      "learning_rate": 0.0004116576976405307,
      "loss": 2.8549,
      "step": 87232
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.2913172245025635,
      "learning_rate": 0.00041165390095184297,
      "loss": 3.1125,
      "step": 87233
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1067357063293457,
      "learning_rate": 0.00041165010424239657,
      "loss": 3.1511,
      "step": 87234
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6903231143951416,
      "learning_rate": 0.00041164630751219246,
      "loss": 2.8719,
      "step": 87235
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.980955719947815,
      "learning_rate": 0.00041164251076123104,
      "loss": 3.1595,
      "step": 87236
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.082568883895874,
      "learning_rate": 0.0004116387139895132,
      "loss": 2.75,
      "step": 87237
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.236585855484009,
      "learning_rate": 0.00041163491719703975,
      "loss": 3.3307,
      "step": 87238
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7742570638656616,
      "learning_rate": 0.00041163112038381113,
      "loss": 3.1488,
      "step": 87239
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5997512340545654,
      "learning_rate": 0.0004116273235498282,
      "loss": 3.16,
      "step": 87240
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9297795295715332,
      "learning_rate": 0.0004116235266950917,
      "loss": 3.0179,
      "step": 87241
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.098782777786255,
      "learning_rate": 0.0004116197298196022,
      "loss": 3.2335,
      "step": 87242
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5866502523422241,
      "learning_rate": 0.0004116159329233605,
      "loss": 3.2879,
      "step": 87243
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5862878561019897,
      "learning_rate": 0.0004116121360063673,
      "loss": 2.8558,
      "step": 87244
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.72234046459198,
      "learning_rate": 0.0004116083390686234,
      "loss": 2.9175,
      "step": 87245
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8963533639907837,
      "learning_rate": 0.0004116045421101293,
      "loss": 2.9331,
      "step": 87246
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.439631700515747,
      "learning_rate": 0.0004116007451308858,
      "loss": 2.7323,
      "step": 87247
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.718692660331726,
      "learning_rate": 0.00041159694813089364,
      "loss": 3.0001,
      "step": 87248
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.249621868133545,
      "learning_rate": 0.0004115931511101535,
      "loss": 3.0764,
      "step": 87249
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.979189395904541,
      "learning_rate": 0.00041158935406866606,
      "loss": 3.1162,
      "step": 87250
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9549486637115479,
      "learning_rate": 0.00041158555700643215,
      "loss": 2.7517,
      "step": 87251
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9026057720184326,
      "learning_rate": 0.00041158175992345224,
      "loss": 3.0654,
      "step": 87252
    },
    {
      "epoch": 1.14,
      "grad_norm": 4.144686698913574,
      "learning_rate": 0.0004115779628197272,
      "loss": 2.9184,
      "step": 87253
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.503446340560913,
      "learning_rate": 0.0004115741656952578,
      "loss": 2.7447,
      "step": 87254
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.867762565612793,
      "learning_rate": 0.00041157036855004464,
      "loss": 2.8772,
      "step": 87255
    },
    {
      "epoch": 1.14,
      "grad_norm": 4.946088790893555,
      "learning_rate": 0.00041156657138408836,
      "loss": 3.0118,
      "step": 87256
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.2379324436187744,
      "learning_rate": 0.00041156277419738987,
      "loss": 2.8849,
      "step": 87257
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.823415756225586,
      "learning_rate": 0.0004115589769899497,
      "loss": 3.2245,
      "step": 87258
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.305333375930786,
      "learning_rate": 0.0004115551797617685,
      "loss": 2.8592,
      "step": 87259
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.3435182571411133,
      "learning_rate": 0.0004115513825128472,
      "loss": 3.1836,
      "step": 87260
    },
    {
      "epoch": 1.14,
      "grad_norm": 4.0504536628723145,
      "learning_rate": 0.0004115475852431864,
      "loss": 3.0314,
      "step": 87261
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.273311138153076,
      "learning_rate": 0.00041154378795278673,
      "loss": 3.148,
      "step": 87262
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0365991592407227,
      "learning_rate": 0.00041153999064164905,
      "loss": 2.8358,
      "step": 87263
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7622369527816772,
      "learning_rate": 0.0004115361933097739,
      "loss": 2.9473,
      "step": 87264
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.228059768676758,
      "learning_rate": 0.00041153239595716207,
      "loss": 2.7405,
      "step": 87265
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.7670743465423584,
      "learning_rate": 0.0004115285985838144,
      "loss": 2.8073,
      "step": 87266
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.502480983734131,
      "learning_rate": 0.00041152480118973134,
      "loss": 3.1615,
      "step": 87267
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7355037927627563,
      "learning_rate": 0.0004115210037749137,
      "loss": 2.9214,
      "step": 87268
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.8481054306030273,
      "learning_rate": 0.0004115172063393622,
      "loss": 2.8264,
      "step": 87269
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.7280304431915283,
      "learning_rate": 0.00041151340888307753,
      "loss": 2.9578,
      "step": 87270
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.12410044670105,
      "learning_rate": 0.00041150961140606044,
      "loss": 2.8079,
      "step": 87271
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6778955459594727,
      "learning_rate": 0.00041150581390831173,
      "loss": 3.0858,
      "step": 87272
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7274531126022339,
      "learning_rate": 0.0004115020163898318,
      "loss": 3.244,
      "step": 87273
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9143390655517578,
      "learning_rate": 0.0004114982188506216,
      "loss": 3.1574,
      "step": 87274
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.985131025314331,
      "learning_rate": 0.00041149442129068185,
      "loss": 2.9647,
      "step": 87275
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.555401086807251,
      "learning_rate": 0.0004114906237100131,
      "loss": 3.0599,
      "step": 87276
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.2574737071990967,
      "learning_rate": 0.00041148682610861616,
      "loss": 3.0008,
      "step": 87277
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0599417686462402,
      "learning_rate": 0.0004114830284864918,
      "loss": 2.9749,
      "step": 87278
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1908111572265625,
      "learning_rate": 0.00041147923084364056,
      "loss": 2.9787,
      "step": 87279
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7986962795257568,
      "learning_rate": 0.0004114754331800631,
      "loss": 3.1148,
      "step": 87280
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1011226177215576,
      "learning_rate": 0.0004114716354957605,
      "loss": 3.1989,
      "step": 87281
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.2632100582122803,
      "learning_rate": 0.00041146783779073305,
      "loss": 2.8335,
      "step": 87282
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5736662149429321,
      "learning_rate": 0.00041146404006498166,
      "loss": 3.1911,
      "step": 87283
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5839979648590088,
      "learning_rate": 0.00041146024231850705,
      "loss": 3.0096,
      "step": 87284
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.2481133937835693,
      "learning_rate": 0.0004114564445513098,
      "loss": 3.1223,
      "step": 87285
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8252360820770264,
      "learning_rate": 0.00041145264676339076,
      "loss": 2.9228,
      "step": 87286
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4983612298965454,
      "learning_rate": 0.00041144884895475054,
      "loss": 3.0997,
      "step": 87287
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.042233467102051,
      "learning_rate": 0.0004114450511253899,
      "loss": 3.107,
      "step": 87288
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5489616394042969,
      "learning_rate": 0.0004114412532753095,
      "loss": 2.9567,
      "step": 87289
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6301023960113525,
      "learning_rate": 0.00041143745540451,
      "loss": 2.9858,
      "step": 87290
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9967364072799683,
      "learning_rate": 0.0004114336575129923,
      "loss": 2.9634,
      "step": 87291
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9276782274246216,
      "learning_rate": 0.00041142985960075683,
      "loss": 3.1608,
      "step": 87292
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4746123552322388,
      "learning_rate": 0.0004114260616678046,
      "loss": 2.9951,
      "step": 87293
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4666754007339478,
      "learning_rate": 0.0004114222637141361,
      "loss": 3.1598,
      "step": 87294
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.155625343322754,
      "learning_rate": 0.0004114184657397521,
      "loss": 2.9751,
      "step": 87295
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.437312364578247,
      "learning_rate": 0.0004114146677446533,
      "loss": 2.754,
      "step": 87296
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5308289527893066,
      "learning_rate": 0.00041141086972884035,
      "loss": 3.0622,
      "step": 87297
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4536722898483276,
      "learning_rate": 0.00041140707169231417,
      "loss": 2.846,
      "step": 87298
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5854285955429077,
      "learning_rate": 0.0004114032736350752,
      "loss": 3.1437,
      "step": 87299
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.1521592140197754,
      "learning_rate": 0.00041139947555712423,
      "loss": 3.0334,
      "step": 87300
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.9619531631469727,
      "learning_rate": 0.00041139567745846205,
      "loss": 3.136,
      "step": 87301
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9368993043899536,
      "learning_rate": 0.00041139187933908933,
      "loss": 3.1159,
      "step": 87302
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.8344614505767822,
      "learning_rate": 0.00041138808119900666,
      "loss": 3.2159,
      "step": 87303
    },
    {
      "epoch": 1.14,
      "grad_norm": 5.1447038650512695,
      "learning_rate": 0.0004113842830382149,
      "loss": 3.1063,
      "step": 87304
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.233076572418213,
      "learning_rate": 0.0004113804848567147,
      "loss": 3.1656,
      "step": 87305
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.074251651763916,
      "learning_rate": 0.0004113766866545068,
      "loss": 2.9909,
      "step": 87306
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.793418526649475,
      "learning_rate": 0.0004113728884315919,
      "loss": 2.9874,
      "step": 87307
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.578411340713501,
      "learning_rate": 0.0004113690901879706,
      "loss": 2.9074,
      "step": 87308
    },
    {
      "epoch": 1.14,
      "grad_norm": 4.481788158416748,
      "learning_rate": 0.0004113652919236437,
      "loss": 2.6713,
      "step": 87309
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4430792331695557,
      "learning_rate": 0.0004113614936386119,
      "loss": 3.0718,
      "step": 87310
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.403789758682251,
      "learning_rate": 0.00041135769533287586,
      "loss": 3.006,
      "step": 87311
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8092268705368042,
      "learning_rate": 0.00041135389700643635,
      "loss": 2.9381,
      "step": 87312
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5526777505874634,
      "learning_rate": 0.0004113500986592941,
      "loss": 3.0606,
      "step": 87313
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4328335523605347,
      "learning_rate": 0.0004113463002914497,
      "loss": 2.975,
      "step": 87314
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9171262979507446,
      "learning_rate": 0.000411342501902904,
      "loss": 2.8402,
      "step": 87315
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5922194719314575,
      "learning_rate": 0.00041133870349365756,
      "loss": 3.0741,
      "step": 87316
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5381429195404053,
      "learning_rate": 0.0004113349050637111,
      "loss": 2.9922,
      "step": 87317
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6245909929275513,
      "learning_rate": 0.00041133110661306546,
      "loss": 3.0983,
      "step": 87318
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6585357189178467,
      "learning_rate": 0.0004113273081417213,
      "loss": 3.1194,
      "step": 87319
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9031566381454468,
      "learning_rate": 0.0004113235096496792,
      "loss": 2.9236,
      "step": 87320
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7435227632522583,
      "learning_rate": 0.00041131971113694,
      "loss": 3.0662,
      "step": 87321
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8934296369552612,
      "learning_rate": 0.00041131591260350436,
      "loss": 2.8363,
      "step": 87322
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9065446853637695,
      "learning_rate": 0.00041131211404937297,
      "loss": 3.1452,
      "step": 87323
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7472587823867798,
      "learning_rate": 0.00041130831547454665,
      "loss": 3.0307,
      "step": 87324
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6180206537246704,
      "learning_rate": 0.0004113045168790259,
      "loss": 3.1654,
      "step": 87325
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.68588387966156,
      "learning_rate": 0.0004113007182628116,
      "loss": 3.0529,
      "step": 87326
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.711120367050171,
      "learning_rate": 0.0004112969196259044,
      "loss": 2.895,
      "step": 87327
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9261163473129272,
      "learning_rate": 0.00041129312096830495,
      "loss": 2.79,
      "step": 87328
    },
    {
      "epoch": 1.14,
      "grad_norm": 4.151032447814941,
      "learning_rate": 0.000411289322290014,
      "loss": 3.0547,
      "step": 87329
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.8990213871002197,
      "learning_rate": 0.0004112855235910324,
      "loss": 3.0424,
      "step": 87330
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.5312697887420654,
      "learning_rate": 0.0004112817248713606,
      "loss": 2.8739,
      "step": 87331
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9217066764831543,
      "learning_rate": 0.00041127792613099946,
      "loss": 3.1901,
      "step": 87332
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.358374834060669,
      "learning_rate": 0.00041127412736994965,
      "loss": 3.1568,
      "step": 87333
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.5182387828826904,
      "learning_rate": 0.00041127032858821185,
      "loss": 2.9717,
      "step": 87334
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8852465152740479,
      "learning_rate": 0.0004112665297857868,
      "loss": 3.2874,
      "step": 87335
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4929083585739136,
      "learning_rate": 0.00041126273096267523,
      "loss": 3.095,
      "step": 87336
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6988071203231812,
      "learning_rate": 0.00041125893211887776,
      "loss": 3.0506,
      "step": 87337
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.587443470954895,
      "learning_rate": 0.0004112551332543953,
      "loss": 3.006,
      "step": 87338
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.2668392658233643,
      "learning_rate": 0.0004112513343692283,
      "loss": 2.898,
      "step": 87339
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.868438959121704,
      "learning_rate": 0.0004112475354633775,
      "loss": 3.1769,
      "step": 87340
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.595585823059082,
      "learning_rate": 0.00041124373653684377,
      "loss": 3.1425,
      "step": 87341
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6347941160202026,
      "learning_rate": 0.00041123993758962783,
      "loss": 2.8973,
      "step": 87342
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6260110139846802,
      "learning_rate": 0.0004112361386217302,
      "loss": 3.114,
      "step": 87343
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.2081375122070312,
      "learning_rate": 0.0004112323396331516,
      "loss": 2.7262,
      "step": 87344
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.986800193786621,
      "learning_rate": 0.0004112285406238929,
      "loss": 2.6736,
      "step": 87345
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7963674068450928,
      "learning_rate": 0.00041122474159395467,
      "loss": 2.8507,
      "step": 87346
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0000133514404297,
      "learning_rate": 0.0004112209425433376,
      "loss": 2.786,
      "step": 87347
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.7560253143310547,
      "learning_rate": 0.0004112171434720426,
      "loss": 3.0962,
      "step": 87348
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5940423011779785,
      "learning_rate": 0.0004112133443800702,
      "loss": 3.2374,
      "step": 87349
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5943151712417603,
      "learning_rate": 0.00041120954526742106,
      "loss": 2.8897,
      "step": 87350
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8957034349441528,
      "learning_rate": 0.00041120574613409607,
      "loss": 3.1414,
      "step": 87351
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4423317909240723,
      "learning_rate": 0.0004112019469800957,
      "loss": 3.0056,
      "step": 87352
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.771844744682312,
      "learning_rate": 0.0004111981478054209,
      "loss": 3.2327,
      "step": 87353
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5793585777282715,
      "learning_rate": 0.0004111943486100722,
      "loss": 3.2003,
      "step": 87354
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7537003755569458,
      "learning_rate": 0.0004111905493940504,
      "loss": 3.0303,
      "step": 87355
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.684880256652832,
      "learning_rate": 0.00041118675015735617,
      "loss": 2.8331,
      "step": 87356
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7018201351165771,
      "learning_rate": 0.00041118295089999027,
      "loss": 3.0916,
      "step": 87357
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7638328075408936,
      "learning_rate": 0.0004111791516219533,
      "loss": 2.9437,
      "step": 87358
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7219048738479614,
      "learning_rate": 0.00041117535232324605,
      "loss": 3.0123,
      "step": 87359
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8633612394332886,
      "learning_rate": 0.00041117155300386926,
      "loss": 2.808,
      "step": 87360
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7317084074020386,
      "learning_rate": 0.0004111677536638235,
      "loss": 2.867,
      "step": 87361
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.284491539001465,
      "learning_rate": 0.00041116395430310955,
      "loss": 3.2002,
      "step": 87362
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8187052011489868,
      "learning_rate": 0.0004111601549217282,
      "loss": 2.7462,
      "step": 87363
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.588598370552063,
      "learning_rate": 0.00041115635551967997,
      "loss": 2.9618,
      "step": 87364
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5183426141738892,
      "learning_rate": 0.00041115255609696575,
      "loss": 2.9102,
      "step": 87365
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.401074171066284,
      "learning_rate": 0.0004111487566535862,
      "loss": 3.0526,
      "step": 87366
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6362385749816895,
      "learning_rate": 0.0004111449571895419,
      "loss": 2.9874,
      "step": 87367
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6173666715621948,
      "learning_rate": 0.0004111411577048337,
      "loss": 3.056,
      "step": 87368
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9135560989379883,
      "learning_rate": 0.00041113735819946234,
      "loss": 3.0223,
      "step": 87369
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7663127183914185,
      "learning_rate": 0.00041113355867342837,
      "loss": 3.0324,
      "step": 87370
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8340965509414673,
      "learning_rate": 0.0004111297591267326,
      "loss": 2.9561,
      "step": 87371
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6338717937469482,
      "learning_rate": 0.00041112595955937573,
      "loss": 3.0098,
      "step": 87372
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5896161794662476,
      "learning_rate": 0.00041112215997135836,
      "loss": 2.933,
      "step": 87373
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.3021063804626465,
      "learning_rate": 0.00041111836036268124,
      "loss": 3.1343,
      "step": 87374
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.728251576423645,
      "learning_rate": 0.0004111145607333453,
      "loss": 3.1321,
      "step": 87375
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.936279535293579,
      "learning_rate": 0.0004111107610833509,
      "loss": 2.7183,
      "step": 87376
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.408933639526367,
      "learning_rate": 0.000411106961412699,
      "loss": 3.1302,
      "step": 87377
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7405328750610352,
      "learning_rate": 0.0004111031617213902,
      "loss": 3.2044,
      "step": 87378
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.660534143447876,
      "learning_rate": 0.00041109936200942526,
      "loss": 3.2134,
      "step": 87379
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.5017287731170654,
      "learning_rate": 0.0004110955622768048,
      "loss": 3.4151,
      "step": 87380
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.536374092102051,
      "learning_rate": 0.0004110917625235296,
      "loss": 3.1077,
      "step": 87381
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.186924457550049,
      "learning_rate": 0.0004110879627496004,
      "loss": 3.1147,
      "step": 87382
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.601424217224121,
      "learning_rate": 0.00041108416295501773,
      "loss": 3.1828,
      "step": 87383
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.750704050064087,
      "learning_rate": 0.0004110803631397825,
      "loss": 2.7838,
      "step": 87384
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.7607312202453613,
      "learning_rate": 0.0004110765633038953,
      "loss": 3.2432,
      "step": 87385
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.733483076095581,
      "learning_rate": 0.00041107276344735694,
      "loss": 3.13,
      "step": 87386
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.722805142402649,
      "learning_rate": 0.00041106896357016794,
      "loss": 3.079,
      "step": 87387
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.651396632194519,
      "learning_rate": 0.00041106516367232917,
      "loss": 2.9571,
      "step": 87388
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.733107566833496,
      "learning_rate": 0.0004110613637538413,
      "loss": 3.1155,
      "step": 87389
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.987584114074707,
      "learning_rate": 0.0004110575638147051,
      "loss": 2.6869,
      "step": 87390
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.54927659034729,
      "learning_rate": 0.00041105376385492114,
      "loss": 3.0839,
      "step": 87391
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1423866748809814,
      "learning_rate": 0.0004110499638744901,
      "loss": 3.2489,
      "step": 87392
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8597869873046875,
      "learning_rate": 0.0004110461638734129,
      "loss": 3.0438,
      "step": 87393
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0740044116973877,
      "learning_rate": 0.0004110423638516901,
      "loss": 2.9648,
      "step": 87394
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.2091257572174072,
      "learning_rate": 0.00041103856380932243,
      "loss": 3.1815,
      "step": 87395
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.22993540763855,
      "learning_rate": 0.00041103476374631056,
      "loss": 2.8802,
      "step": 87396
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4724384546279907,
      "learning_rate": 0.00041103096366265526,
      "loss": 3.0378,
      "step": 87397
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9397996664047241,
      "learning_rate": 0.0004110271635583572,
      "loss": 3.1132,
      "step": 87398
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9686800241470337,
      "learning_rate": 0.00041102336343341704,
      "loss": 3.1032,
      "step": 87399
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0090572834014893,
      "learning_rate": 0.00041101956328783564,
      "loss": 2.9239,
      "step": 87400
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0613315105438232,
      "learning_rate": 0.0004110157631216135,
      "loss": 2.9294,
      "step": 87401
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5674091577529907,
      "learning_rate": 0.0004110119629347515,
      "loss": 3.1963,
      "step": 87402
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.843209981918335,
      "learning_rate": 0.00041100816272725033,
      "loss": 2.8297,
      "step": 87403
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.2004175186157227,
      "learning_rate": 0.00041100436249911056,
      "loss": 2.7,
      "step": 87404
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.0697648525238037,
      "learning_rate": 0.00041100056225033295,
      "loss": 2.9159,
      "step": 87405
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.87980318069458,
      "learning_rate": 0.0004109967619809183,
      "loss": 3.1295,
      "step": 87406
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.511974811553955,
      "learning_rate": 0.0004109929616908673,
      "loss": 3.274,
      "step": 87407
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.3368475437164307,
      "learning_rate": 0.00041098916138018055,
      "loss": 3.2263,
      "step": 87408
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.097813129425049,
      "learning_rate": 0.0004109853610488589,
      "loss": 3.0219,
      "step": 87409
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9398630857467651,
      "learning_rate": 0.0004109815606969029,
      "loss": 3.107,
      "step": 87410
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6216044425964355,
      "learning_rate": 0.00041097776032431336,
      "loss": 2.9779,
      "step": 87411
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7017356157302856,
      "learning_rate": 0.00041097395993109097,
      "loss": 3.0897,
      "step": 87412
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5019625425338745,
      "learning_rate": 0.0004109701595172364,
      "loss": 3.2798,
      "step": 87413
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7408479452133179,
      "learning_rate": 0.00041096635908275037,
      "loss": 3.0585,
      "step": 87414
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.3435137271881104,
      "learning_rate": 0.00041096255862763364,
      "loss": 3.0773,
      "step": 87415
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.4096107482910156,
      "learning_rate": 0.00041095875815188687,
      "loss": 3.0695,
      "step": 87416
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.192495584487915,
      "learning_rate": 0.0004109549576555107,
      "loss": 2.7942,
      "step": 87417
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7245643138885498,
      "learning_rate": 0.00041095115713850604,
      "loss": 2.9378,
      "step": 87418
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.8457565307617188,
      "learning_rate": 0.00041094735660087334,
      "loss": 2.9811,
      "step": 87419
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6443160772323608,
      "learning_rate": 0.00041094355604261347,
      "loss": 3.1852,
      "step": 87420
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9051402807235718,
      "learning_rate": 0.0004109397554637272,
      "loss": 3.2273,
      "step": 87421
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9179322719573975,
      "learning_rate": 0.000410935954864215,
      "loss": 3.1407,
      "step": 87422
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0417063236236572,
      "learning_rate": 0.0004109321542440777,
      "loss": 3.2259,
      "step": 87423
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8187791109085083,
      "learning_rate": 0.00041092835360331614,
      "loss": 2.9705,
      "step": 87424
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.114071846008301,
      "learning_rate": 0.0004109245529419308,
      "loss": 2.9679,
      "step": 87425
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7270723581314087,
      "learning_rate": 0.00041092075225992254,
      "loss": 3.1213,
      "step": 87426
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1736626625061035,
      "learning_rate": 0.00041091695155729205,
      "loss": 2.9779,
      "step": 87427
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7590633630752563,
      "learning_rate": 0.00041091315083404,
      "loss": 3.072,
      "step": 87428
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9292099475860596,
      "learning_rate": 0.00041090935009016697,
      "loss": 3.2459,
      "step": 87429
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.682100534439087,
      "learning_rate": 0.00041090554932567396,
      "loss": 3.092,
      "step": 87430
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0494818687438965,
      "learning_rate": 0.00041090174854056143,
      "loss": 2.8997,
      "step": 87431
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7455946207046509,
      "learning_rate": 0.0004108979477348302,
      "loss": 2.9216,
      "step": 87432
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6384642124176025,
      "learning_rate": 0.00041089414690848096,
      "loss": 2.9779,
      "step": 87433
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.2742950916290283,
      "learning_rate": 0.00041089034606151437,
      "loss": 2.9864,
      "step": 87434
    },
    {
      "epoch": 1.14,
      "grad_norm": 4.4148149490356445,
      "learning_rate": 0.0004108865451939312,
      "loss": 2.855,
      "step": 87435
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7270660400390625,
      "learning_rate": 0.0004108827443057321,
      "loss": 3.017,
      "step": 87436
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9088469743728638,
      "learning_rate": 0.00041087894339691787,
      "loss": 3.1057,
      "step": 87437
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.3860387802124023,
      "learning_rate": 0.00041087514246748903,
      "loss": 3.0685,
      "step": 87438
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9546760320663452,
      "learning_rate": 0.00041087134151744653,
      "loss": 3.113,
      "step": 87439
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8096743822097778,
      "learning_rate": 0.0004108675405467909,
      "loss": 3.1265,
      "step": 87440
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.748349666595459,
      "learning_rate": 0.0004108637395555229,
      "loss": 3.0428,
      "step": 87441
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.2074501514434814,
      "learning_rate": 0.00041085993854364327,
      "loss": 3.0585,
      "step": 87442
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9017000198364258,
      "learning_rate": 0.00041085613751115266,
      "loss": 2.7826,
      "step": 87443
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9329302310943604,
      "learning_rate": 0.00041085233645805176,
      "loss": 2.9135,
      "step": 87444
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8649928569793701,
      "learning_rate": 0.0004108485353843414,
      "loss": 3.2011,
      "step": 87445
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6643556356430054,
      "learning_rate": 0.0004108447342900222,
      "loss": 3.3115,
      "step": 87446
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.554412841796875,
      "learning_rate": 0.0004108409331750948,
      "loss": 3.1964,
      "step": 87447
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7836012840270996,
      "learning_rate": 0.00041083713203956,
      "loss": 3.0274,
      "step": 87448
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.004568338394165,
      "learning_rate": 0.0004108333308834185,
      "loss": 3.0335,
      "step": 87449
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6221860647201538,
      "learning_rate": 0.000410829529706671,
      "loss": 3.0856,
      "step": 87450
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0352401733398438,
      "learning_rate": 0.00041082572850931816,
      "loss": 3.0207,
      "step": 87451
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5361111164093018,
      "learning_rate": 0.0004108219272913608,
      "loss": 2.9163,
      "step": 87452
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8643264770507812,
      "learning_rate": 0.0004108181260527995,
      "loss": 2.8779,
      "step": 87453
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.2625505924224854,
      "learning_rate": 0.00041081432479363504,
      "loss": 2.7867,
      "step": 87454
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8336492776870728,
      "learning_rate": 0.00041081052351386806,
      "loss": 2.8287,
      "step": 87455
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.76725697517395,
      "learning_rate": 0.00041080672221349934,
      "loss": 3.0049,
      "step": 87456
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9832227230072021,
      "learning_rate": 0.0004108029208925296,
      "loss": 3.183,
      "step": 87457
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0233538150787354,
      "learning_rate": 0.00041079911955095946,
      "loss": 3.0967,
      "step": 87458
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.721831798553467,
      "learning_rate": 0.0004107953181887897,
      "loss": 2.9514,
      "step": 87459
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5954713821411133,
      "learning_rate": 0.0004107915168060209,
      "loss": 2.8349,
      "step": 87460
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9861977100372314,
      "learning_rate": 0.00041078771540265397,
      "loss": 3.1513,
      "step": 87461
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8611645698547363,
      "learning_rate": 0.0004107839139786895,
      "loss": 3.2891,
      "step": 87462
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.9389195442199707,
      "learning_rate": 0.0004107801125341282,
      "loss": 2.9385,
      "step": 87463
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6601823568344116,
      "learning_rate": 0.00041077631106897083,
      "loss": 2.9495,
      "step": 87464
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5993901491165161,
      "learning_rate": 0.00041077250958321797,
      "loss": 3.1767,
      "step": 87465
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.642431616783142,
      "learning_rate": 0.0004107687080768704,
      "loss": 3.0224,
      "step": 87466
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7863103151321411,
      "learning_rate": 0.0004107649065499289,
      "loss": 2.8919,
      "step": 87467
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7272189855575562,
      "learning_rate": 0.00041076110500239414,
      "loss": 2.8607,
      "step": 87468
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9814753532409668,
      "learning_rate": 0.00041075730343426674,
      "loss": 2.9344,
      "step": 87469
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9428184032440186,
      "learning_rate": 0.0004107535018455474,
      "loss": 2.9107,
      "step": 87470
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8167287111282349,
      "learning_rate": 0.00041074970023623707,
      "loss": 3.0008,
      "step": 87471
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.6617021560668945,
      "learning_rate": 0.0004107458986063361,
      "loss": 3.0104,
      "step": 87472
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.75062894821167,
      "learning_rate": 0.00041074209695584544,
      "loss": 3.1473,
      "step": 87473
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.559211254119873,
      "learning_rate": 0.00041073829528476583,
      "loss": 2.9929,
      "step": 87474
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6870869398117065,
      "learning_rate": 0.00041073449359309777,
      "loss": 2.9436,
      "step": 87475
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.147003173828125,
      "learning_rate": 0.0004107306918808421,
      "loss": 3.1483,
      "step": 87476
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.81600821018219,
      "learning_rate": 0.0004107268901479995,
      "loss": 3.0346,
      "step": 87477
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6355654001235962,
      "learning_rate": 0.00041072308839457073,
      "loss": 2.7692,
      "step": 87478
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.553750991821289,
      "learning_rate": 0.00041071928662055637,
      "loss": 3.0044,
      "step": 87479
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.8284196853637695,
      "learning_rate": 0.00041071548482595724,
      "loss": 3.0907,
      "step": 87480
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5698423385620117,
      "learning_rate": 0.00041071168301077404,
      "loss": 2.9532,
      "step": 87481
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7525407075881958,
      "learning_rate": 0.00041070788117500743,
      "loss": 2.9224,
      "step": 87482
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9241715669631958,
      "learning_rate": 0.0004107040793186581,
      "loss": 2.9996,
      "step": 87483
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.097764015197754,
      "learning_rate": 0.00041070027744172683,
      "loss": 2.9404,
      "step": 87484
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.723771572113037,
      "learning_rate": 0.00041069647554421436,
      "loss": 2.8624,
      "step": 87485
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.866342544555664,
      "learning_rate": 0.0004106926736261212,
      "loss": 3.1334,
      "step": 87486
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.3302626609802246,
      "learning_rate": 0.0004106888716874482,
      "loss": 2.871,
      "step": 87487
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7046624422073364,
      "learning_rate": 0.00041068506972819613,
      "loss": 2.9928,
      "step": 87488
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9472606182098389,
      "learning_rate": 0.00041068126774836556,
      "loss": 3.1881,
      "step": 87489
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.469451427459717,
      "learning_rate": 0.00041067746574795726,
      "loss": 2.8202,
      "step": 87490
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.933510661125183,
      "learning_rate": 0.00041067366372697195,
      "loss": 3.0311,
      "step": 87491
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.6049513816833496,
      "learning_rate": 0.0004106698616854103,
      "loss": 2.9245,
      "step": 87492
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.551716923713684,
      "learning_rate": 0.00041066605962327297,
      "loss": 2.8848,
      "step": 87493
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.2930970191955566,
      "learning_rate": 0.0004106622575405609,
      "loss": 2.8802,
      "step": 87494
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.430211305618286,
      "learning_rate": 0.0004106584554372744,
      "loss": 3.1262,
      "step": 87495
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1121230125427246,
      "learning_rate": 0.0004106546533134146,
      "loss": 3.0749,
      "step": 87496
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.849895715713501,
      "learning_rate": 0.00041065085116898196,
      "loss": 2.9292,
      "step": 87497
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.226412057876587,
      "learning_rate": 0.00041064704900397713,
      "loss": 2.8282,
      "step": 87498
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7899887561798096,
      "learning_rate": 0.00041064324681840104,
      "loss": 3.047,
      "step": 87499
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.689577579498291,
      "learning_rate": 0.00041063944461225426,
      "loss": 3.0074,
      "step": 87500
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8475209474563599,
      "learning_rate": 0.0004106356423855375,
      "loss": 2.9447,
      "step": 87501
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.812374472618103,
      "learning_rate": 0.00041063184013825145,
      "loss": 2.6776,
      "step": 87502
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5202007293701172,
      "learning_rate": 0.000410628037870397,
      "loss": 3.1744,
      "step": 87503
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5590832233428955,
      "learning_rate": 0.0004106242355819745,
      "loss": 2.9365,
      "step": 87504
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6902300119400024,
      "learning_rate": 0.000410620433272985,
      "loss": 2.7933,
      "step": 87505
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7767428159713745,
      "learning_rate": 0.0004106166309434291,
      "loss": 3.0928,
      "step": 87506
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6137316226959229,
      "learning_rate": 0.00041061282859330744,
      "loss": 3.2989,
      "step": 87507
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6633858680725098,
      "learning_rate": 0.0004106090262226207,
      "loss": 3.1771,
      "step": 87508
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6207195520401,
      "learning_rate": 0.00041060522383136976,
      "loss": 3.1631,
      "step": 87509
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.772491216659546,
      "learning_rate": 0.00041060142141955514,
      "loss": 3.5005,
      "step": 87510
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8915934562683105,
      "learning_rate": 0.00041059761898717766,
      "loss": 2.9828,
      "step": 87511
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5599802732467651,
      "learning_rate": 0.00041059381653423796,
      "loss": 3.179,
      "step": 87512
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0042288303375244,
      "learning_rate": 0.0004105900140607368,
      "loss": 3.1114,
      "step": 87513
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.006514549255371,
      "learning_rate": 0.00041058621156667493,
      "loss": 3.1855,
      "step": 87514
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.777689814567566,
      "learning_rate": 0.0004105824090520529,
      "loss": 3.1267,
      "step": 87515
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6659566164016724,
      "learning_rate": 0.0004105786065168716,
      "loss": 2.8898,
      "step": 87516
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9570964574813843,
      "learning_rate": 0.0004105748039611316,
      "loss": 2.9003,
      "step": 87517
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9108422994613647,
      "learning_rate": 0.00041057100138483364,
      "loss": 3.0664,
      "step": 87518
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.76897394657135,
      "learning_rate": 0.0004105671987879785,
      "loss": 2.9445,
      "step": 87519
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.851371169090271,
      "learning_rate": 0.0004105633961705668,
      "loss": 3.0533,
      "step": 87520
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9979652166366577,
      "learning_rate": 0.0004105595935325992,
      "loss": 2.8172,
      "step": 87521
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6918286085128784,
      "learning_rate": 0.0004105557908740766,
      "loss": 3.0822,
      "step": 87522
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7946704626083374,
      "learning_rate": 0.0004105519881949996,
      "loss": 3.0738,
      "step": 87523
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.908479928970337,
      "learning_rate": 0.00041054818549536885,
      "loss": 3.1674,
      "step": 87524
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9288625717163086,
      "learning_rate": 0.00041054438277518513,
      "loss": 2.9217,
      "step": 87525
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0675036907196045,
      "learning_rate": 0.00041054058003444906,
      "loss": 2.9992,
      "step": 87526
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7324244976043701,
      "learning_rate": 0.00041053677727316144,
      "loss": 3.1129,
      "step": 87527
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0890848636627197,
      "learning_rate": 0.00041053297449132305,
      "loss": 3.1646,
      "step": 87528
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5551841259002686,
      "learning_rate": 0.00041052917168893436,
      "loss": 2.995,
      "step": 87529
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.8634018898010254,
      "learning_rate": 0.00041052536886599625,
      "loss": 2.9908,
      "step": 87530
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.158602714538574,
      "learning_rate": 0.00041052156602250936,
      "loss": 3.0493,
      "step": 87531
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1761786937713623,
      "learning_rate": 0.00041051776315847446,
      "loss": 3.0911,
      "step": 87532
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.56270170211792,
      "learning_rate": 0.0004105139602738922,
      "loss": 2.9151,
      "step": 87533
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.2971556186676025,
      "learning_rate": 0.0004105101573687634,
      "loss": 3.0989,
      "step": 87534
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.533503532409668,
      "learning_rate": 0.0004105063544430886,
      "loss": 2.7115,
      "step": 87535
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5550025701522827,
      "learning_rate": 0.0004105025514968685,
      "loss": 3.041,
      "step": 87536
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.368781805038452,
      "learning_rate": 0.0004104987485301041,
      "loss": 3.0485,
      "step": 87537
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7122594118118286,
      "learning_rate": 0.00041049494554279566,
      "loss": 2.9648,
      "step": 87538
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9994122982025146,
      "learning_rate": 0.0004104911425349442,
      "loss": 2.9391,
      "step": 87539
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.520693063735962,
      "learning_rate": 0.00041048733950655044,
      "loss": 2.719,
      "step": 87540
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.386325716972351,
      "learning_rate": 0.00041048353645761494,
      "loss": 3.2332,
      "step": 87541
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7803833484649658,
      "learning_rate": 0.00041047973338813847,
      "loss": 3.0606,
      "step": 87542
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7559205293655396,
      "learning_rate": 0.0004104759302981218,
      "loss": 3.3153,
      "step": 87543
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.677846908569336,
      "learning_rate": 0.00041047212718756543,
      "loss": 3.1284,
      "step": 87544
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6258655786514282,
      "learning_rate": 0.0004104683240564703,
      "loss": 2.9785,
      "step": 87545
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4693509340286255,
      "learning_rate": 0.00041046452090483704,
      "loss": 3.0534,
      "step": 87546
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5292699337005615,
      "learning_rate": 0.0004104607177326663,
      "loss": 2.996,
      "step": 87547
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5819286108016968,
      "learning_rate": 0.00041045691453995875,
      "loss": 3.2526,
      "step": 87548
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.434830665588379,
      "learning_rate": 0.00041045311132671534,
      "loss": 3.286,
      "step": 87549
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.299318313598633,
      "learning_rate": 0.0004104493080929365,
      "loss": 3.171,
      "step": 87550
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9018914699554443,
      "learning_rate": 0.000410445504838623,
      "loss": 3.0365,
      "step": 87551
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5593841075897217,
      "learning_rate": 0.0004104417015637757,
      "loss": 2.9877,
      "step": 87552
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4181727170944214,
      "learning_rate": 0.00041043789826839515,
      "loss": 3.0196,
      "step": 87553
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.3733959197998047,
      "learning_rate": 0.0004104340949524822,
      "loss": 3.1804,
      "step": 87554
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.2351765632629395,
      "learning_rate": 0.0004104302916160374,
      "loss": 3.1257,
      "step": 87555
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7411558628082275,
      "learning_rate": 0.00041042648825906147,
      "loss": 3.0355,
      "step": 87556
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.196507453918457,
      "learning_rate": 0.0004104226848815552,
      "loss": 3.0124,
      "step": 87557
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0235869884490967,
      "learning_rate": 0.0004104188814835193,
      "loss": 2.9556,
      "step": 87558
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0295658111572266,
      "learning_rate": 0.0004104150780649545,
      "loss": 3.1352,
      "step": 87559
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7902450561523438,
      "learning_rate": 0.00041041127462586134,
      "loss": 3.3627,
      "step": 87560
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.43121337890625,
      "learning_rate": 0.0004104074711662407,
      "loss": 2.8969,
      "step": 87561
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.049377202987671,
      "learning_rate": 0.0004104036676860932,
      "loss": 3.0884,
      "step": 87562
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.641169548034668,
      "learning_rate": 0.00041039986418541955,
      "loss": 2.9666,
      "step": 87563
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.178650379180908,
      "learning_rate": 0.00041039606066422055,
      "loss": 2.913,
      "step": 87564
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.628914475440979,
      "learning_rate": 0.0004103922571224968,
      "loss": 3.1964,
      "step": 87565
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.621854543685913,
      "learning_rate": 0.00041038845356024907,
      "loss": 2.8479,
      "step": 87566
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0458474159240723,
      "learning_rate": 0.000410384649977478,
      "loss": 2.959,
      "step": 87567
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7449496984481812,
      "learning_rate": 0.0004103808463741844,
      "loss": 3.1057,
      "step": 87568
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6639494895935059,
      "learning_rate": 0.0004103770427503688,
      "loss": 3.0423,
      "step": 87569
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6491624116897583,
      "learning_rate": 0.0004103732391060322,
      "loss": 2.9943,
      "step": 87570
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8555058240890503,
      "learning_rate": 0.000410369435441175,
      "loss": 3.1499,
      "step": 87571
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.789228916168213,
      "learning_rate": 0.00041036563175579806,
      "loss": 3.2272,
      "step": 87572
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9203392267227173,
      "learning_rate": 0.0004103618280499021,
      "loss": 3.0914,
      "step": 87573
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5737104415893555,
      "learning_rate": 0.0004103580243234878,
      "loss": 3.088,
      "step": 87574
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7022502422332764,
      "learning_rate": 0.00041035422057655586,
      "loss": 2.7803,
      "step": 87575
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.623512864112854,
      "learning_rate": 0.00041035041680910695,
      "loss": 2.9931,
      "step": 87576
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.56051766872406,
      "learning_rate": 0.00041034661302114184,
      "loss": 2.7504,
      "step": 87577
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8448522090911865,
      "learning_rate": 0.00041034280921266114,
      "loss": 3.2337,
      "step": 87578
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7977365255355835,
      "learning_rate": 0.0004103390053836657,
      "loss": 2.9189,
      "step": 87579
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7276732921600342,
      "learning_rate": 0.0004103352015341562,
      "loss": 3.0771,
      "step": 87580
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9505671262741089,
      "learning_rate": 0.00041033139766413323,
      "loss": 3.257,
      "step": 87581
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.0042381286621094,
      "learning_rate": 0.00041032759377359755,
      "loss": 3.0266,
      "step": 87582
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0656371116638184,
      "learning_rate": 0.00041032378986254997,
      "loss": 3.0541,
      "step": 87583
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.2896528244018555,
      "learning_rate": 0.00041031998593099106,
      "loss": 2.8306,
      "step": 87584
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6095342636108398,
      "learning_rate": 0.0004103161819789216,
      "loss": 3.2199,
      "step": 87585
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9022562503814697,
      "learning_rate": 0.0004103123780063423,
      "loss": 3.023,
      "step": 87586
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6294294595718384,
      "learning_rate": 0.0004103085740132538,
      "loss": 2.8348,
      "step": 87587
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.633674383163452,
      "learning_rate": 0.00041030476999965687,
      "loss": 2.8669,
      "step": 87588
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6412426233291626,
      "learning_rate": 0.00041030096596555223,
      "loss": 3.0522,
      "step": 87589
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7741310596466064,
      "learning_rate": 0.0004102971619109405,
      "loss": 2.8669,
      "step": 87590
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0619773864746094,
      "learning_rate": 0.0004102933578358225,
      "loss": 2.9462,
      "step": 87591
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.3502813577651978,
      "learning_rate": 0.0004102895537401989,
      "loss": 3.0778,
      "step": 87592
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.6586263179779053,
      "learning_rate": 0.0004102857496240703,
      "loss": 3.0747,
      "step": 87593
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9822381734848022,
      "learning_rate": 0.00041028194548743755,
      "loss": 3.1777,
      "step": 87594
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6115649938583374,
      "learning_rate": 0.00041027814133030133,
      "loss": 3.4274,
      "step": 87595
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1007089614868164,
      "learning_rate": 0.0004102743371526623,
      "loss": 2.633,
      "step": 87596
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.156348705291748,
      "learning_rate": 0.0004102705329545212,
      "loss": 2.9832,
      "step": 87597
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4920313358306885,
      "learning_rate": 0.00041026672873587867,
      "loss": 2.7863,
      "step": 87598
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7110462188720703,
      "learning_rate": 0.0004102629244967355,
      "loss": 3.0224,
      "step": 87599
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.339085102081299,
      "learning_rate": 0.0004102591202370924,
      "loss": 2.8736,
      "step": 87600
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.781036138534546,
      "learning_rate": 0.0004102553159569501,
      "loss": 2.8459,
      "step": 87601
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6858272552490234,
      "learning_rate": 0.00041025151165630916,
      "loss": 3.0902,
      "step": 87602
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6380128860473633,
      "learning_rate": 0.00041024770733517045,
      "loss": 3.0339,
      "step": 87603
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8228579759597778,
      "learning_rate": 0.00041024390299353457,
      "loss": 3.0496,
      "step": 87604
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7918719053268433,
      "learning_rate": 0.0004102400986314023,
      "loss": 3.1566,
      "step": 87605
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1844255924224854,
      "learning_rate": 0.0004102362942487742,
      "loss": 3.019,
      "step": 87606
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7339575290679932,
      "learning_rate": 0.0004102324898456512,
      "loss": 2.9357,
      "step": 87607
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.046170473098755,
      "learning_rate": 0.00041022868542203386,
      "loss": 3.0162,
      "step": 87608
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.1299548149108887,
      "learning_rate": 0.000410224880977923,
      "loss": 3.0032,
      "step": 87609
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.937464714050293,
      "learning_rate": 0.0004102210765133192,
      "loss": 2.8845,
      "step": 87610
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8487712144851685,
      "learning_rate": 0.00041021727202822324,
      "loss": 3.0865,
      "step": 87611
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.65533447265625,
      "learning_rate": 0.0004102134675226357,
      "loss": 2.756,
      "step": 87612
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6333054304122925,
      "learning_rate": 0.00041020966299655753,
      "loss": 2.7878,
      "step": 87613
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.7898871898651123,
      "learning_rate": 0.00041020585844998923,
      "loss": 3.022,
      "step": 87614
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7993358373641968,
      "learning_rate": 0.0004102020538829316,
      "loss": 3.0248,
      "step": 87615
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.327169179916382,
      "learning_rate": 0.0004101982492953854,
      "loss": 3.1099,
      "step": 87616
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.613595724105835,
      "learning_rate": 0.0004101944446873512,
      "loss": 2.9137,
      "step": 87617
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.202817916870117,
      "learning_rate": 0.00041019064005882967,
      "loss": 2.8749,
      "step": 87618
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.791423797607422,
      "learning_rate": 0.0004101868354098218,
      "loss": 3.1028,
      "step": 87619
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7739206552505493,
      "learning_rate": 0.00041018303074032803,
      "loss": 2.8482,
      "step": 87620
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.041733741760254,
      "learning_rate": 0.00041017922605034917,
      "loss": 2.9461,
      "step": 87621
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.5894486904144287,
      "learning_rate": 0.000410175421339886,
      "loss": 3.1367,
      "step": 87622
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.085209369659424,
      "learning_rate": 0.000410171616608939,
      "loss": 3.2058,
      "step": 87623
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.925197720527649,
      "learning_rate": 0.000410167811857509,
      "loss": 2.8232,
      "step": 87624
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.055950880050659,
      "learning_rate": 0.00041016400708559684,
      "loss": 2.8543,
      "step": 87625
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.294508218765259,
      "learning_rate": 0.0004101602022932031,
      "loss": 3.0509,
      "step": 87626
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.103895664215088,
      "learning_rate": 0.0004101563974803284,
      "loss": 3.1189,
      "step": 87627
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.609595775604248,
      "learning_rate": 0.00041015259264697365,
      "loss": 3.0043,
      "step": 87628
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8412469625473022,
      "learning_rate": 0.0004101487877931394,
      "loss": 2.9341,
      "step": 87629
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.794236183166504,
      "learning_rate": 0.0004101449829188264,
      "loss": 2.9422,
      "step": 87630
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5953805446624756,
      "learning_rate": 0.0004101411780240354,
      "loss": 3.0898,
      "step": 87631
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6920181512832642,
      "learning_rate": 0.000410137373108767,
      "loss": 3.3299,
      "step": 87632
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.8567845821380615,
      "learning_rate": 0.00041013356817302206,
      "loss": 2.8706,
      "step": 87633
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.4536452293395996,
      "learning_rate": 0.00041012976321680124,
      "loss": 2.8265,
      "step": 87634
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6225601434707642,
      "learning_rate": 0.00041012595824010517,
      "loss": 3.3682,
      "step": 87635
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.462955355644226,
      "learning_rate": 0.0004101221532429346,
      "loss": 2.9309,
      "step": 87636
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8179521560668945,
      "learning_rate": 0.0004101183482252903,
      "loss": 2.9687,
      "step": 87637
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9532502889633179,
      "learning_rate": 0.0004101145431871728,
      "loss": 3.1157,
      "step": 87638
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.666445016860962,
      "learning_rate": 0.00041011073812858304,
      "loss": 3.1969,
      "step": 87639
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.535951852798462,
      "learning_rate": 0.00041010693304952163,
      "loss": 3.1163,
      "step": 87640
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.126864433288574,
      "learning_rate": 0.0004101031279499892,
      "loss": 2.9102,
      "step": 87641
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9292501211166382,
      "learning_rate": 0.0004100993228299865,
      "loss": 2.9768,
      "step": 87642
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.589505672454834,
      "learning_rate": 0.00041009551768951427,
      "loss": 2.9081,
      "step": 87643
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8745213747024536,
      "learning_rate": 0.00041009171252857327,
      "loss": 2.9481,
      "step": 87644
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4408296346664429,
      "learning_rate": 0.0004100879073471641,
      "loss": 3.0326,
      "step": 87645
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8997688293457031,
      "learning_rate": 0.0004100841021452874,
      "loss": 2.8391,
      "step": 87646
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5927637815475464,
      "learning_rate": 0.0004100802969229442,
      "loss": 2.8845,
      "step": 87647
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.645330786705017,
      "learning_rate": 0.00041007649168013486,
      "loss": 2.9231,
      "step": 87648
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9547643661499023,
      "learning_rate": 0.0004100726864168602,
      "loss": 2.9966,
      "step": 87649
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.027662754058838,
      "learning_rate": 0.00041006888113312103,
      "loss": 3.0229,
      "step": 87650
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7397959232330322,
      "learning_rate": 0.00041006507582891794,
      "loss": 3.0484,
      "step": 87651
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.4739136695861816,
      "learning_rate": 0.0004100612705042517,
      "loss": 2.8626,
      "step": 87652
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.627983331680298,
      "learning_rate": 0.000410057465159123,
      "loss": 3.1886,
      "step": 87653
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6734038591384888,
      "learning_rate": 0.00041005365979353254,
      "loss": 3.0264,
      "step": 87654
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6820290088653564,
      "learning_rate": 0.0004100498544074809,
      "loss": 3.056,
      "step": 87655
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.7067201137542725,
      "learning_rate": 0.00041004604900096904,
      "loss": 3.1516,
      "step": 87656
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.6939899921417236,
      "learning_rate": 0.0004100422435739975,
      "loss": 2.9156,
      "step": 87657
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.580411672592163,
      "learning_rate": 0.00041003843812656714,
      "loss": 2.9962,
      "step": 87658
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.938484787940979,
      "learning_rate": 0.00041003463265867847,
      "loss": 2.9912,
      "step": 87659
    },
    {
      "epoch": 1.14,
      "grad_norm": 4.2803778648376465,
      "learning_rate": 0.0004100308271703322,
      "loss": 2.9363,
      "step": 87660
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.4990739822387695,
      "learning_rate": 0.00041002702166152916,
      "loss": 3.2779,
      "step": 87661
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.2350635528564453,
      "learning_rate": 0.00041002321613227015,
      "loss": 3.0484,
      "step": 87662
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.5495283603668213,
      "learning_rate": 0.00041001941058255566,
      "loss": 2.9236,
      "step": 87663
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.7326574325561523,
      "learning_rate": 0.0004100156050123864,
      "loss": 3.2046,
      "step": 87664
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8717501163482666,
      "learning_rate": 0.00041001179942176327,
      "loss": 2.9588,
      "step": 87665
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7102265357971191,
      "learning_rate": 0.0004100079938106869,
      "loss": 3.1506,
      "step": 87666
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0058581829071045,
      "learning_rate": 0.00041000418817915786,
      "loss": 2.9631,
      "step": 87667
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5885396003723145,
      "learning_rate": 0.0004100003825271771,
      "loss": 3.1598,
      "step": 87668
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7738643884658813,
      "learning_rate": 0.00040999657685474505,
      "loss": 3.1279,
      "step": 87669
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8359284400939941,
      "learning_rate": 0.00040999277116186264,
      "loss": 3.009,
      "step": 87670
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.014698028564453,
      "learning_rate": 0.0004099889654485305,
      "loss": 2.9344,
      "step": 87671
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9205403327941895,
      "learning_rate": 0.0004099851597147493,
      "loss": 2.8358,
      "step": 87672
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.632265329360962,
      "learning_rate": 0.0004099813539605198,
      "loss": 2.7994,
      "step": 87673
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.6729538440704346,
      "learning_rate": 0.00040997754818584265,
      "loss": 2.9861,
      "step": 87674
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.086277484893799,
      "learning_rate": 0.00040997374239071867,
      "loss": 3.0368,
      "step": 87675
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.3641929626464844,
      "learning_rate": 0.00040996993657514843,
      "loss": 3.1497,
      "step": 87676
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.2361912727355957,
      "learning_rate": 0.0004099661307391328,
      "loss": 2.9801,
      "step": 87677
    },
    {
      "epoch": 1.14,
      "grad_norm": 4.191972255706787,
      "learning_rate": 0.0004099623248826723,
      "loss": 2.9678,
      "step": 87678
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1686851978302,
      "learning_rate": 0.0004099585190057677,
      "loss": 2.9599,
      "step": 87679
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9502166509628296,
      "learning_rate": 0.00040995471310841983,
      "loss": 2.84,
      "step": 87680
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.4140465259552,
      "learning_rate": 0.0004099509071906293,
      "loss": 3.3537,
      "step": 87681
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.917178153991699,
      "learning_rate": 0.00040994710125239674,
      "loss": 3.1582,
      "step": 87682
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.3702805042266846,
      "learning_rate": 0.00040994329529372303,
      "loss": 3.0082,
      "step": 87683
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9836927652359009,
      "learning_rate": 0.0004099394893146087,
      "loss": 2.9317,
      "step": 87684
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7256553173065186,
      "learning_rate": 0.00040993568331505455,
      "loss": 3.1479,
      "step": 87685
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.1571311950683594,
      "learning_rate": 0.0004099318772950614,
      "loss": 2.7876,
      "step": 87686
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5445423126220703,
      "learning_rate": 0.0004099280712546297,
      "loss": 2.9732,
      "step": 87687
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5196458101272583,
      "learning_rate": 0.0004099242651937603,
      "loss": 3.238,
      "step": 87688
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4371684789657593,
      "learning_rate": 0.00040992045911245403,
      "loss": 3.1829,
      "step": 87689
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.550447702407837,
      "learning_rate": 0.00040991665301071134,
      "loss": 2.9912,
      "step": 87690
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7549430131912231,
      "learning_rate": 0.0004099128468885331,
      "loss": 3.1263,
      "step": 87691
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.868159294128418,
      "learning_rate": 0.00040990904074592006,
      "loss": 2.731,
      "step": 87692
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6644461154937744,
      "learning_rate": 0.00040990523458287283,
      "loss": 3.1702,
      "step": 87693
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.521357774734497,
      "learning_rate": 0.0004099014283993921,
      "loss": 2.7761,
      "step": 87694
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.169623374938965,
      "learning_rate": 0.0004098976221954786,
      "loss": 2.7907,
      "step": 87695
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6168431043624878,
      "learning_rate": 0.00040989381597113315,
      "loss": 3.2683,
      "step": 87696
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9296108484268188,
      "learning_rate": 0.0004098900097263562,
      "loss": 3.2391,
      "step": 87697
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.728684425354004,
      "learning_rate": 0.00040988620346114885,
      "loss": 2.8716,
      "step": 87698
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.7637503147125244,
      "learning_rate": 0.0004098823971755114,
      "loss": 2.8607,
      "step": 87699
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6368038654327393,
      "learning_rate": 0.0004098785908694448,
      "loss": 3.124,
      "step": 87700
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.841590404510498,
      "learning_rate": 0.00040987478454294977,
      "loss": 2.8283,
      "step": 87701
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.640645146369934,
      "learning_rate": 0.00040987097819602684,
      "loss": 3.161,
      "step": 87702
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7256734371185303,
      "learning_rate": 0.0004098671718286768,
      "loss": 3.0333,
      "step": 87703
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8066984415054321,
      "learning_rate": 0.00040986336544090047,
      "loss": 3.2324,
      "step": 87704
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4476392269134521,
      "learning_rate": 0.00040985955903269844,
      "loss": 3.3852,
      "step": 87705
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.4022116661071777,
      "learning_rate": 0.00040985575260407144,
      "loss": 2.8656,
      "step": 87706
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6044633388519287,
      "learning_rate": 0.0004098519461550202,
      "loss": 2.9622,
      "step": 87707
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8515903949737549,
      "learning_rate": 0.0004098481396855453,
      "loss": 3.0212,
      "step": 87708
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7771745920181274,
      "learning_rate": 0.00040984433319564764,
      "loss": 2.9466,
      "step": 87709
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.91896915435791,
      "learning_rate": 0.00040984052668532786,
      "loss": 2.871,
      "step": 87710
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0157835483551025,
      "learning_rate": 0.00040983672015458664,
      "loss": 3.0636,
      "step": 87711
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0827932357788086,
      "learning_rate": 0.00040983291360342474,
      "loss": 2.9673,
      "step": 87712
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.9435036182403564,
      "learning_rate": 0.0004098291070318427,
      "loss": 3.0886,
      "step": 87713
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6344367265701294,
      "learning_rate": 0.0004098253004398415,
      "loss": 3.1106,
      "step": 87714
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6763286590576172,
      "learning_rate": 0.0004098214938274216,
      "loss": 3.0313,
      "step": 87715
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.726252794265747,
      "learning_rate": 0.0004098176871945839,
      "loss": 3.0316,
      "step": 87716
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.02740216255188,
      "learning_rate": 0.000409813880541329,
      "loss": 3.0358,
      "step": 87717
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.753354549407959,
      "learning_rate": 0.0004098100738676576,
      "loss": 3.0456,
      "step": 87718
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.021411418914795,
      "learning_rate": 0.00040980626717357035,
      "loss": 3.1036,
      "step": 87719
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.080240249633789,
      "learning_rate": 0.0004098024604590682,
      "loss": 2.8209,
      "step": 87720
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7319546937942505,
      "learning_rate": 0.0004097986537241516,
      "loss": 3.0158,
      "step": 87721
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.3405611515045166,
      "learning_rate": 0.00040979484696882136,
      "loss": 3.0793,
      "step": 87722
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7433421611785889,
      "learning_rate": 0.00040979104019307826,
      "loss": 3.0301,
      "step": 87723
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6964136362075806,
      "learning_rate": 0.0004097872333969229,
      "loss": 2.8225,
      "step": 87724
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.4949917793273926,
      "learning_rate": 0.000409783426580356,
      "loss": 3.051,
      "step": 87725
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7150543928146362,
      "learning_rate": 0.00040977961974337834,
      "loss": 3.3529,
      "step": 87726
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8686344623565674,
      "learning_rate": 0.0004097758128859905,
      "loss": 2.9532,
      "step": 87727
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.195880651473999,
      "learning_rate": 0.0004097720060081932,
      "loss": 3.1001,
      "step": 87728
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5275784730911255,
      "learning_rate": 0.0004097681991099874,
      "loss": 3.0665,
      "step": 87729
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.700273871421814,
      "learning_rate": 0.0004097643921913735,
      "loss": 3.0137,
      "step": 87730
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6865235567092896,
      "learning_rate": 0.00040976058525235234,
      "loss": 3.043,
      "step": 87731
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6109201908111572,
      "learning_rate": 0.0004097567782929247,
      "loss": 3.0388,
      "step": 87732
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7978394031524658,
      "learning_rate": 0.0004097529713130911,
      "loss": 3.0085,
      "step": 87733
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0328683853149414,
      "learning_rate": 0.00040974916431285233,
      "loss": 3.1049,
      "step": 87734
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.675904393196106,
      "learning_rate": 0.00040974535729220926,
      "loss": 3.0181,
      "step": 87735
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6274206638336182,
      "learning_rate": 0.0004097415502511623,
      "loss": 3.1638,
      "step": 87736
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.641798973083496,
      "learning_rate": 0.00040973774318971243,
      "loss": 2.7934,
      "step": 87737
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5541348457336426,
      "learning_rate": 0.00040973393610786013,
      "loss": 3.0995,
      "step": 87738
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5136407613754272,
      "learning_rate": 0.00040973012900560635,
      "loss": 3.0007,
      "step": 87739
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7934331893920898,
      "learning_rate": 0.0004097263218829515,
      "loss": 3.0105,
      "step": 87740
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.235715389251709,
      "learning_rate": 0.0004097225147398966,
      "loss": 3.1653,
      "step": 87741
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6534860134124756,
      "learning_rate": 0.00040971870757644215,
      "loss": 3.106,
      "step": 87742
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7732547521591187,
      "learning_rate": 0.000409714900392589,
      "loss": 2.8773,
      "step": 87743
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6434597969055176,
      "learning_rate": 0.0004097110931883377,
      "loss": 3.1181,
      "step": 87744
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9192242622375488,
      "learning_rate": 0.000409707285963689,
      "loss": 3.2341,
      "step": 87745
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8251314163208008,
      "learning_rate": 0.0004097034787186438,
      "loss": 3.0072,
      "step": 87746
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9737714529037476,
      "learning_rate": 0.0004096996714532025,
      "loss": 3.044,
      "step": 87747
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8284187316894531,
      "learning_rate": 0.00040969586416736604,
      "loss": 2.8828,
      "step": 87748
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7284469604492188,
      "learning_rate": 0.00040969205686113496,
      "loss": 3.0612,
      "step": 87749
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7608081102371216,
      "learning_rate": 0.0004096882495345102,
      "loss": 2.9592,
      "step": 87750
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4478243589401245,
      "learning_rate": 0.0004096844421874922,
      "loss": 2.9849,
      "step": 87751
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8173959255218506,
      "learning_rate": 0.00040968063482008186,
      "loss": 2.8665,
      "step": 87752
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.627205729484558,
      "learning_rate": 0.0004096768274322798,
      "loss": 2.9094,
      "step": 87753
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.707598328590393,
      "learning_rate": 0.00040967302002408676,
      "loss": 3.0408,
      "step": 87754
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.655652403831482,
      "learning_rate": 0.0004096692125955034,
      "loss": 2.9376,
      "step": 87755
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8802213668823242,
      "learning_rate": 0.0004096654051465305,
      "loss": 3.0825,
      "step": 87756
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0851075649261475,
      "learning_rate": 0.0004096615976771687,
      "loss": 3.217,
      "step": 87757
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.684562087059021,
      "learning_rate": 0.0004096577901874187,
      "loss": 2.9309,
      "step": 87758
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.560410499572754,
      "learning_rate": 0.0004096539826772814,
      "loss": 3.1154,
      "step": 87759
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.005448818206787,
      "learning_rate": 0.0004096501751467572,
      "loss": 3.1462,
      "step": 87760
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9193345308303833,
      "learning_rate": 0.000409646367595847,
      "loss": 2.73,
      "step": 87761
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.204253911972046,
      "learning_rate": 0.0004096425600245515,
      "loss": 3.0397,
      "step": 87762
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8442858457565308,
      "learning_rate": 0.00040963875243287136,
      "loss": 2.9629,
      "step": 87763
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5397049188613892,
      "learning_rate": 0.0004096349448208073,
      "loss": 2.9944,
      "step": 87764
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6615169048309326,
      "learning_rate": 0.0004096311371883601,
      "loss": 2.7789,
      "step": 87765
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7813243865966797,
      "learning_rate": 0.00040962732953553035,
      "loss": 3.143,
      "step": 87766
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9333245754241943,
      "learning_rate": 0.00040962352186231873,
      "loss": 2.982,
      "step": 87767
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8043261766433716,
      "learning_rate": 0.0004096197141687262,
      "loss": 3.0286,
      "step": 87768
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7235114574432373,
      "learning_rate": 0.0004096159064547531,
      "loss": 2.8898,
      "step": 87769
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8873014450073242,
      "learning_rate": 0.00040961209872040046,
      "loss": 3.1062,
      "step": 87770
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6250447034835815,
      "learning_rate": 0.0004096082909656689,
      "loss": 2.8753,
      "step": 87771
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.699519157409668,
      "learning_rate": 0.000409604483190559,
      "loss": 2.9834,
      "step": 87772
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9131418466567993,
      "learning_rate": 0.00040960067539507156,
      "loss": 3.3127,
      "step": 87773
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0397520065307617,
      "learning_rate": 0.00040959686757920734,
      "loss": 2.9143,
      "step": 87774
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.9238667488098145,
      "learning_rate": 0.00040959305974296696,
      "loss": 2.7443,
      "step": 87775
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6440626382827759,
      "learning_rate": 0.0004095892518863511,
      "loss": 2.918,
      "step": 87776
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.749671220779419,
      "learning_rate": 0.00040958544400936065,
      "loss": 3.0528,
      "step": 87777
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5721726417541504,
      "learning_rate": 0.00040958163611199615,
      "loss": 2.899,
      "step": 87778
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8214309215545654,
      "learning_rate": 0.0004095778281942583,
      "loss": 2.9619,
      "step": 87779
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.5665862560272217,
      "learning_rate": 0.00040957402025614795,
      "loss": 2.9569,
      "step": 87780
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.7732412815093994,
      "learning_rate": 0.00040957021229766566,
      "loss": 3.14,
      "step": 87781
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.825455665588379,
      "learning_rate": 0.00040956640431881225,
      "loss": 3.2757,
      "step": 87782
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.566004991531372,
      "learning_rate": 0.00040956259631958825,
      "loss": 3.0386,
      "step": 87783
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5506975650787354,
      "learning_rate": 0.0004095587882999947,
      "loss": 2.9946,
      "step": 87784
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.031317949295044,
      "learning_rate": 0.0004095549802600319,
      "loss": 2.9463,
      "step": 87785
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8216826915740967,
      "learning_rate": 0.00040955117219970087,
      "loss": 2.8187,
      "step": 87786
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7336852550506592,
      "learning_rate": 0.00040954736411900227,
      "loss": 3.0021,
      "step": 87787
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7326035499572754,
      "learning_rate": 0.00040954355601793667,
      "loss": 3.0121,
      "step": 87788
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.163724422454834,
      "learning_rate": 0.0004095397478965048,
      "loss": 3.0133,
      "step": 87789
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7105263471603394,
      "learning_rate": 0.0004095359397547075,
      "loss": 3.0727,
      "step": 87790
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8100357055664062,
      "learning_rate": 0.00040953213159254545,
      "loss": 2.9824,
      "step": 87791
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.507608413696289,
      "learning_rate": 0.0004095283234100192,
      "loss": 3.1644,
      "step": 87792
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0042872428894043,
      "learning_rate": 0.0004095245152071296,
      "loss": 2.9082,
      "step": 87793
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6256910562515259,
      "learning_rate": 0.00040952070698387733,
      "loss": 3.2923,
      "step": 87794
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.1164448261260986,
      "learning_rate": 0.00040951689874026305,
      "loss": 2.9157,
      "step": 87795
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.541198253631592,
      "learning_rate": 0.0004095130904762876,
      "loss": 3.2684,
      "step": 87796
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8121050596237183,
      "learning_rate": 0.0004095092821919516,
      "loss": 3.1412,
      "step": 87797
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7378623485565186,
      "learning_rate": 0.0004095054738872557,
      "loss": 2.9693,
      "step": 87798
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.3592636585235596,
      "learning_rate": 0.0004095016655622007,
      "loss": 3.3152,
      "step": 87799
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.4442169666290283,
      "learning_rate": 0.00040949785721678723,
      "loss": 2.9162,
      "step": 87800
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.2240753173828125,
      "learning_rate": 0.000409494048851016,
      "loss": 3.2536,
      "step": 87801
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1397337913513184,
      "learning_rate": 0.00040949024046488795,
      "loss": 3.0732,
      "step": 87802
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0396862030029297,
      "learning_rate": 0.0004094864320584034,
      "loss": 3.0209,
      "step": 87803
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.569899797439575,
      "learning_rate": 0.00040948262363156335,
      "loss": 3.1284,
      "step": 87804
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.5566182136535645,
      "learning_rate": 0.0004094788151843684,
      "loss": 2.7074,
      "step": 87805
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8857558965682983,
      "learning_rate": 0.00040947500671681923,
      "loss": 2.8658,
      "step": 87806
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.2650787830352783,
      "learning_rate": 0.00040947119822891664,
      "loss": 2.6951,
      "step": 87807
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.968919038772583,
      "learning_rate": 0.0004094673897206613,
      "loss": 3.1764,
      "step": 87808
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5890233516693115,
      "learning_rate": 0.0004094635811920539,
      "loss": 2.8524,
      "step": 87809
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7161351442337036,
      "learning_rate": 0.00040945977264309505,
      "loss": 3.2133,
      "step": 87810
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.442481279373169,
      "learning_rate": 0.00040945596407378573,
      "loss": 2.9931,
      "step": 87811
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9575706720352173,
      "learning_rate": 0.00040945215548412637,
      "loss": 3.2054,
      "step": 87812
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5450270175933838,
      "learning_rate": 0.0004094483468741178,
      "loss": 2.9085,
      "step": 87813
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9954415559768677,
      "learning_rate": 0.0004094445382437608,
      "loss": 2.8969,
      "step": 87814
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.617797613143921,
      "learning_rate": 0.00040944072959305585,
      "loss": 3.1062,
      "step": 87815
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0870654582977295,
      "learning_rate": 0.00040943692092200386,
      "loss": 3.0245,
      "step": 87816
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9704334735870361,
      "learning_rate": 0.0004094331122306055,
      "loss": 3.17,
      "step": 87817
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7559425830841064,
      "learning_rate": 0.00040942930351886147,
      "loss": 2.8195,
      "step": 87818
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6876758337020874,
      "learning_rate": 0.0004094254947867724,
      "loss": 3.0546,
      "step": 87819
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5688822269439697,
      "learning_rate": 0.0004094216860343392,
      "loss": 3.1431,
      "step": 87820
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.541719913482666,
      "learning_rate": 0.00040941787726156236,
      "loss": 3.1311,
      "step": 87821
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6782317161560059,
      "learning_rate": 0.00040941406846844265,
      "loss": 3.0629,
      "step": 87822
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7798607349395752,
      "learning_rate": 0.00040941025965498076,
      "loss": 2.9903,
      "step": 87823
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6753325462341309,
      "learning_rate": 0.00040940645082117755,
      "loss": 3.0519,
      "step": 87824
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.03228497505188,
      "learning_rate": 0.0004094026419670336,
      "loss": 3.0525,
      "step": 87825
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6652568578720093,
      "learning_rate": 0.0004093988330925495,
      "loss": 2.8735,
      "step": 87826
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.080312967300415,
      "learning_rate": 0.00040939502419772624,
      "loss": 2.9901,
      "step": 87827
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5760154724121094,
      "learning_rate": 0.0004093912152825643,
      "loss": 3.0219,
      "step": 87828
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7473132610321045,
      "learning_rate": 0.0004093874063470645,
      "loss": 2.9697,
      "step": 87829
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.715786099433899,
      "learning_rate": 0.0004093835973912275,
      "loss": 2.9937,
      "step": 87830
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8815653324127197,
      "learning_rate": 0.0004093797884150541,
      "loss": 2.7294,
      "step": 87831
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6045970916748047,
      "learning_rate": 0.0004093759794185448,
      "loss": 2.8386,
      "step": 87832
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4307780265808105,
      "learning_rate": 0.00040937217040170053,
      "loss": 3.0412,
      "step": 87833
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.750566005706787,
      "learning_rate": 0.00040936836136452187,
      "loss": 3.0347,
      "step": 87834
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.734068751335144,
      "learning_rate": 0.0004093645523070096,
      "loss": 3.0459,
      "step": 87835
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6416703462600708,
      "learning_rate": 0.0004093607432291644,
      "loss": 2.9175,
      "step": 87836
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6271322965621948,
      "learning_rate": 0.0004093569341309869,
      "loss": 3.0208,
      "step": 87837
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8132284879684448,
      "learning_rate": 0.00040935312501247796,
      "loss": 2.973,
      "step": 87838
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.842630386352539,
      "learning_rate": 0.0004093493158736382,
      "loss": 2.8111,
      "step": 87839
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.2856595516204834,
      "learning_rate": 0.0004093455067144683,
      "loss": 3.0233,
      "step": 87840
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9816068410873413,
      "learning_rate": 0.0004093416975349691,
      "loss": 3.0468,
      "step": 87841
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.558485746383667,
      "learning_rate": 0.0004093378883351411,
      "loss": 3.1017,
      "step": 87842
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7886790037155151,
      "learning_rate": 0.00040933407911498515,
      "loss": 2.8222,
      "step": 87843
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.69062077999115,
      "learning_rate": 0.000409330269874502,
      "loss": 2.88,
      "step": 87844
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9915179014205933,
      "learning_rate": 0.0004093264606136922,
      "loss": 2.8616,
      "step": 87845
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5485416650772095,
      "learning_rate": 0.0004093226513325566,
      "loss": 2.9966,
      "step": 87846
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.533233404159546,
      "learning_rate": 0.0004093188420310958,
      "loss": 2.8313,
      "step": 87847
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4431943893432617,
      "learning_rate": 0.0004093150327093106,
      "loss": 2.9962,
      "step": 87848
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1170120239257812,
      "learning_rate": 0.0004093112233672017,
      "loss": 3.0582,
      "step": 87849
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.602229356765747,
      "learning_rate": 0.0004093074140047698,
      "loss": 3.0556,
      "step": 87850
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.4260647296905518,
      "learning_rate": 0.0004093036046220155,
      "loss": 3.005,
      "step": 87851
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.2664496898651123,
      "learning_rate": 0.0004092997952189397,
      "loss": 2.9343,
      "step": 87852
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.4168429374694824,
      "learning_rate": 0.00040929598579554286,
      "loss": 3.1657,
      "step": 87853
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.4887237548828125,
      "learning_rate": 0.000409292176351826,
      "loss": 2.8973,
      "step": 87854
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.5187430381774902,
      "learning_rate": 0.0004092883668877895,
      "loss": 3.0702,
      "step": 87855
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1407222747802734,
      "learning_rate": 0.00040928455740343433,
      "loss": 2.8908,
      "step": 87856
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.327767848968506,
      "learning_rate": 0.0004092807478987611,
      "loss": 3.0121,
      "step": 87857
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.865730047225952,
      "learning_rate": 0.00040927693837377054,
      "loss": 2.8992,
      "step": 87858
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.342867612838745,
      "learning_rate": 0.00040927312882846323,
      "loss": 3.0297,
      "step": 87859
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1935322284698486,
      "learning_rate": 0.0004092693192628401,
      "loss": 2.704,
      "step": 87860
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.3651347160339355,
      "learning_rate": 0.0004092655096769016,
      "loss": 2.8982,
      "step": 87861
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.853937864303589,
      "learning_rate": 0.0004092617000706487,
      "loss": 3.0645,
      "step": 87862
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.545001745223999,
      "learning_rate": 0.00040925789044408197,
      "loss": 3.1554,
      "step": 87863
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.590588092803955,
      "learning_rate": 0.00040925408079720216,
      "loss": 2.9508,
      "step": 87864
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9469133615493774,
      "learning_rate": 0.00040925027113000983,
      "loss": 3.2563,
      "step": 87865
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.267646551132202,
      "learning_rate": 0.00040924646144250594,
      "loss": 3.0656,
      "step": 87866
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9709495306015015,
      "learning_rate": 0.000409242651734691,
      "loss": 3.1767,
      "step": 87867
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5788038969039917,
      "learning_rate": 0.0004092388420065658,
      "loss": 2.9074,
      "step": 87868
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.599308967590332,
      "learning_rate": 0.0004092350322581311,
      "loss": 2.5256,
      "step": 87869
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1141958236694336,
      "learning_rate": 0.00040923122248938745,
      "loss": 2.9085,
      "step": 87870
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0897715091705322,
      "learning_rate": 0.0004092274127003357,
      "loss": 3.1064,
      "step": 87871
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7376463413238525,
      "learning_rate": 0.00040922360289097657,
      "loss": 3.1462,
      "step": 87872
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.026148796081543,
      "learning_rate": 0.00040921979306131063,
      "loss": 2.7554,
      "step": 87873
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.751244306564331,
      "learning_rate": 0.0004092159832113386,
      "loss": 2.9104,
      "step": 87874
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.3432724475860596,
      "learning_rate": 0.00040921217334106145,
      "loss": 3.2058,
      "step": 87875
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5752995014190674,
      "learning_rate": 0.00040920836345047955,
      "loss": 3.0844,
      "step": 87876
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.373096227645874,
      "learning_rate": 0.0004092045535395938,
      "loss": 3.0364,
      "step": 87877
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7783015966415405,
      "learning_rate": 0.0004092007436084049,
      "loss": 2.8875,
      "step": 87878
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8450534343719482,
      "learning_rate": 0.0004091969336569134,
      "loss": 3.1082,
      "step": 87879
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.098984479904175,
      "learning_rate": 0.0004091931236851202,
      "loss": 3.1716,
      "step": 87880
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.359196424484253,
      "learning_rate": 0.000409189313693026,
      "loss": 2.8925,
      "step": 87881
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.039823293685913,
      "learning_rate": 0.0004091855036806314,
      "loss": 3.2619,
      "step": 87882
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8205738067626953,
      "learning_rate": 0.0004091816936479371,
      "loss": 2.9056,
      "step": 87883
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7447783946990967,
      "learning_rate": 0.00040917788359494395,
      "loss": 2.9618,
      "step": 87884
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.5495214462280273,
      "learning_rate": 0.0004091740735216525,
      "loss": 2.9611,
      "step": 87885
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.6581525802612305,
      "learning_rate": 0.0004091702634280636,
      "loss": 2.7575,
      "step": 87886
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.596679925918579,
      "learning_rate": 0.00040916645331417784,
      "loss": 3.0831,
      "step": 87887
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.173732042312622,
      "learning_rate": 0.00040916264317999594,
      "loss": 3.0063,
      "step": 87888
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6253892183303833,
      "learning_rate": 0.0004091588330255187,
      "loss": 3.1713,
      "step": 87889
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7843815088272095,
      "learning_rate": 0.0004091550228507468,
      "loss": 3.0797,
      "step": 87890
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1147446632385254,
      "learning_rate": 0.0004091512126556808,
      "loss": 2.7068,
      "step": 87891
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7015470266342163,
      "learning_rate": 0.0004091474024403216,
      "loss": 3.0563,
      "step": 87892
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5511338710784912,
      "learning_rate": 0.0004091435922046699,
      "loss": 2.8411,
      "step": 87893
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.055471897125244,
      "learning_rate": 0.00040913978194872624,
      "loss": 3.2187,
      "step": 87894
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.701127290725708,
      "learning_rate": 0.0004091359716724915,
      "loss": 2.7559,
      "step": 87895
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8553152084350586,
      "learning_rate": 0.00040913216137596634,
      "loss": 2.8208,
      "step": 87896
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8398587703704834,
      "learning_rate": 0.0004091283510591514,
      "loss": 3.3419,
      "step": 87897
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.7407708168029785,
      "learning_rate": 0.00040912454072204745,
      "loss": 3.1183,
      "step": 87898
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7294365167617798,
      "learning_rate": 0.0004091207303646552,
      "loss": 2.9737,
      "step": 87899
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4590543508529663,
      "learning_rate": 0.00040911691998697533,
      "loss": 3.1562,
      "step": 87900
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.1667535305023193,
      "learning_rate": 0.00040911310958900856,
      "loss": 3.1168,
      "step": 87901
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9295142889022827,
      "learning_rate": 0.0004091092991707556,
      "loss": 3.0198,
      "step": 87902
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.7351644039154053,
      "learning_rate": 0.0004091054887322172,
      "loss": 3.233,
      "step": 87903
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6572928428649902,
      "learning_rate": 0.000409101678273394,
      "loss": 2.9564,
      "step": 87904
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.025115966796875,
      "learning_rate": 0.0004090978677942868,
      "loss": 3.1889,
      "step": 87905
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0972161293029785,
      "learning_rate": 0.0004090940572948962,
      "loss": 2.911,
      "step": 87906
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.138659954071045,
      "learning_rate": 0.00040909024677522295,
      "loss": 3.2533,
      "step": 87907
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9348115921020508,
      "learning_rate": 0.00040908643623526776,
      "loss": 3.1579,
      "step": 87908
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.871131420135498,
      "learning_rate": 0.00040908262567503133,
      "loss": 2.8234,
      "step": 87909
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.334549903869629,
      "learning_rate": 0.00040907881509451435,
      "loss": 2.9965,
      "step": 87910
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.56162428855896,
      "learning_rate": 0.00040907500449371764,
      "loss": 2.8233,
      "step": 87911
    },
    {
      "epoch": 1.14,
      "grad_norm": 3.208272933959961,
      "learning_rate": 0.00040907119387264185,
      "loss": 3.0251,
      "step": 87912
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.720963954925537,
      "learning_rate": 0.00040906738323128757,
      "loss": 3.178,
      "step": 87913
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5943248271942139,
      "learning_rate": 0.0004090635725696556,
      "loss": 2.9174,
      "step": 87914
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.635776162147522,
      "learning_rate": 0.0004090597618877467,
      "loss": 3.121,
      "step": 87915
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9922012090682983,
      "learning_rate": 0.00040905595118556154,
      "loss": 3.0142,
      "step": 87916
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.0755369663238525,
      "learning_rate": 0.00040905214046310087,
      "loss": 3.1143,
      "step": 87917
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8141175508499146,
      "learning_rate": 0.00040904832972036523,
      "loss": 3.0186,
      "step": 87918
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8529443740844727,
      "learning_rate": 0.0004090445189573555,
      "loss": 3.2627,
      "step": 87919
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9064539670944214,
      "learning_rate": 0.0004090407081740724,
      "loss": 3.212,
      "step": 87920
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7392449378967285,
      "learning_rate": 0.0004090368973705165,
      "loss": 2.9344,
      "step": 87921
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.019680976867676,
      "learning_rate": 0.0004090330865466885,
      "loss": 2.9065,
      "step": 87922
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8273385763168335,
      "learning_rate": 0.0004090292757025894,
      "loss": 3.2705,
      "step": 87923
    },
    {
      "epoch": 1.14,
      "grad_norm": 2.464954137802124,
      "learning_rate": 0.0004090254648382195,
      "loss": 3.0507,
      "step": 87924
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.8889400959014893,
      "learning_rate": 0.0004090216539535798,
      "loss": 3.0141,
      "step": 87925
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6998612880706787,
      "learning_rate": 0.00040901784304867093,
      "loss": 3.1647,
      "step": 87926
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5793474912643433,
      "learning_rate": 0.00040901403212349356,
      "loss": 3.0548,
      "step": 87927
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6721553802490234,
      "learning_rate": 0.00040901022117804844,
      "loss": 3.0135,
      "step": 87928
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7353929281234741,
      "learning_rate": 0.0004090064102123362,
      "loss": 2.9899,
      "step": 87929
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.4883989095687866,
      "learning_rate": 0.00040900259922635777,
      "loss": 3.1951,
      "step": 87930
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.5878825187683105,
      "learning_rate": 0.0004089987882201135,
      "loss": 2.8301,
      "step": 87931
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.467039942741394,
      "learning_rate": 0.0004089949771936044,
      "loss": 3.133,
      "step": 87932
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.762162208557129,
      "learning_rate": 0.00040899116614683114,
      "loss": 3.0666,
      "step": 87933
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.7796907424926758,
      "learning_rate": 0.0004089873550797942,
      "loss": 3.0566,
      "step": 87934
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.94270658493042,
      "learning_rate": 0.00040898354399249463,
      "loss": 3.0015,
      "step": 87935
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.6989707946777344,
      "learning_rate": 0.0004089797328849329,
      "loss": 2.9848,
      "step": 87936
    },
    {
      "epoch": 1.14,
      "grad_norm": 1.9730757474899292,
      "learning_rate": 0.0004089759217571098,
      "loss": 3.2238,
      "step": 87937
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7654122114181519,
      "learning_rate": 0.0004089721106090259,
      "loss": 2.7197,
      "step": 87938
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.528321385383606,
      "learning_rate": 0.0004089682994406821,
      "loss": 2.9313,
      "step": 87939
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6120191812515259,
      "learning_rate": 0.000408964488252079,
      "loss": 2.9187,
      "step": 87940
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.630465030670166,
      "learning_rate": 0.00040896067704321743,
      "loss": 3.0062,
      "step": 87941
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0458030700683594,
      "learning_rate": 0.000408956865814098,
      "loss": 3.0875,
      "step": 87942
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.199026346206665,
      "learning_rate": 0.0004089530545647214,
      "loss": 2.8503,
      "step": 87943
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.794555902481079,
      "learning_rate": 0.00040894924329508837,
      "loss": 2.7904,
      "step": 87944
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.732607126235962,
      "learning_rate": 0.0004089454320051996,
      "loss": 3.1845,
      "step": 87945
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5696338415145874,
      "learning_rate": 0.00040894162069505586,
      "loss": 3.0695,
      "step": 87946
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7981796264648438,
      "learning_rate": 0.00040893780936465776,
      "loss": 3.0472,
      "step": 87947
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5816117525100708,
      "learning_rate": 0.0004089339980140061,
      "loss": 3.2163,
      "step": 87948
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0533759593963623,
      "learning_rate": 0.0004089301866431016,
      "loss": 2.8551,
      "step": 87949
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.723657250404358,
      "learning_rate": 0.00040892637525194485,
      "loss": 3.0527,
      "step": 87950
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.586198329925537,
      "learning_rate": 0.0004089225638405367,
      "loss": 2.9566,
      "step": 87951
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.3651788234710693,
      "learning_rate": 0.0004089187524088777,
      "loss": 3.0048,
      "step": 87952
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6771018505096436,
      "learning_rate": 0.00040891494095696867,
      "loss": 2.9927,
      "step": 87953
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9494162797927856,
      "learning_rate": 0.0004089111294848104,
      "loss": 2.861,
      "step": 87954
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8497309684753418,
      "learning_rate": 0.00040890731799240337,
      "loss": 3.1402,
      "step": 87955
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8065017461776733,
      "learning_rate": 0.00040890350647974845,
      "loss": 2.8573,
      "step": 87956
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0246529579162598,
      "learning_rate": 0.0004088996949468464,
      "loss": 3.0632,
      "step": 87957
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.711194396018982,
      "learning_rate": 0.0004088958833936977,
      "loss": 2.8386,
      "step": 87958
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8780219554901123,
      "learning_rate": 0.0004088920718203033,
      "loss": 2.9827,
      "step": 87959
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6154980659484863,
      "learning_rate": 0.0004088882602266638,
      "loss": 3.048,
      "step": 87960
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6692551374435425,
      "learning_rate": 0.00040888444861277983,
      "loss": 3.0617,
      "step": 87961
    },
    {
      "epoch": 1.15,
      "grad_norm": 4.914713382720947,
      "learning_rate": 0.00040888063697865225,
      "loss": 3.2603,
      "step": 87962
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6125115156173706,
      "learning_rate": 0.0004088768253242817,
      "loss": 2.8437,
      "step": 87963
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7975242137908936,
      "learning_rate": 0.00040887301364966893,
      "loss": 3.139,
      "step": 87964
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8188291788101196,
      "learning_rate": 0.0004088692019548145,
      "loss": 3.0606,
      "step": 87965
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6515469551086426,
      "learning_rate": 0.00040886539023971935,
      "loss": 3.0282,
      "step": 87966
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5796195268630981,
      "learning_rate": 0.00040886157850438405,
      "loss": 2.9792,
      "step": 87967
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7862012386322021,
      "learning_rate": 0.00040885776674880924,
      "loss": 2.9871,
      "step": 87968
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8149237632751465,
      "learning_rate": 0.0004088539549729958,
      "loss": 3.0521,
      "step": 87969
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.906104326248169,
      "learning_rate": 0.00040885014317694436,
      "loss": 3.1406,
      "step": 87970
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7946462631225586,
      "learning_rate": 0.0004088463313606556,
      "loss": 3.1279,
      "step": 87971
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5517525672912598,
      "learning_rate": 0.00040884251952413023,
      "loss": 3.0193,
      "step": 87972
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5959742069244385,
      "learning_rate": 0.000408838707667369,
      "loss": 2.9776,
      "step": 87973
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4203766584396362,
      "learning_rate": 0.0004088348957903726,
      "loss": 2.6594,
      "step": 87974
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6790809631347656,
      "learning_rate": 0.00040883108389314174,
      "loss": 2.8827,
      "step": 87975
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8664425611495972,
      "learning_rate": 0.0004088272719756771,
      "loss": 2.9848,
      "step": 87976
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8337244987487793,
      "learning_rate": 0.00040882346003797944,
      "loss": 3.0831,
      "step": 87977
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.71005380153656,
      "learning_rate": 0.00040881964808004947,
      "loss": 2.9886,
      "step": 87978
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.399723768234253,
      "learning_rate": 0.0004088158361018878,
      "loss": 2.8316,
      "step": 87979
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4356106519699097,
      "learning_rate": 0.00040881202410349523,
      "loss": 3.1774,
      "step": 87980
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.965372085571289,
      "learning_rate": 0.0004088082120848725,
      "loss": 3.0085,
      "step": 87981
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.4273464679718018,
      "learning_rate": 0.0004088044000460203,
      "loss": 3.1053,
      "step": 87982
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.019225835800171,
      "learning_rate": 0.00040880058798693926,
      "loss": 3.0155,
      "step": 87983
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6417629718780518,
      "learning_rate": 0.0004087967759076301,
      "loss": 2.8366,
      "step": 87984
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1041312217712402,
      "learning_rate": 0.00040879296380809366,
      "loss": 2.9092,
      "step": 87985
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5992894172668457,
      "learning_rate": 0.00040878915168833047,
      "loss": 2.8979,
      "step": 87986
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7154027223587036,
      "learning_rate": 0.0004087853395483413,
      "loss": 2.8788,
      "step": 87987
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.56080961227417,
      "learning_rate": 0.000408781527388127,
      "loss": 2.7243,
      "step": 87988
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.584733009338379,
      "learning_rate": 0.00040877771520768803,
      "loss": 3.0787,
      "step": 87989
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4564015865325928,
      "learning_rate": 0.0004087739030070253,
      "loss": 2.9474,
      "step": 87990
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7906757593154907,
      "learning_rate": 0.00040877009078613943,
      "loss": 3.2995,
      "step": 87991
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7746094465255737,
      "learning_rate": 0.0004087662785450312,
      "loss": 2.9313,
      "step": 87992
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.046323299407959,
      "learning_rate": 0.00040876246628370117,
      "loss": 3.2118,
      "step": 87993
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.3867268562316895,
      "learning_rate": 0.0004087586540021503,
      "loss": 3.1406,
      "step": 87994
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7708048820495605,
      "learning_rate": 0.00040875484170037893,
      "loss": 3.1972,
      "step": 87995
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.005422592163086,
      "learning_rate": 0.0004087510293783881,
      "loss": 3.1105,
      "step": 87996
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.656827688217163,
      "learning_rate": 0.00040874721703617847,
      "loss": 3.1889,
      "step": 87997
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.877135992050171,
      "learning_rate": 0.0004087434046737505,
      "loss": 2.9415,
      "step": 87998
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0304057598114014,
      "learning_rate": 0.0004087395922911052,
      "loss": 2.9132,
      "step": 87999
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7914808988571167,
      "learning_rate": 0.0004087357798882432,
      "loss": 3.016,
      "step": 88000
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7819892168045044,
      "learning_rate": 0.0004087319674651651,
      "loss": 2.8722,
      "step": 88001
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9012120962142944,
      "learning_rate": 0.00040872815502187166,
      "loss": 2.9024,
      "step": 88002
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8020976781845093,
      "learning_rate": 0.0004087243425583635,
      "loss": 2.9442,
      "step": 88003
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0225565433502197,
      "learning_rate": 0.00040872053007464163,
      "loss": 2.9791,
      "step": 88004
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.849106788635254,
      "learning_rate": 0.00040871671757070646,
      "loss": 3.1036,
      "step": 88005
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.972200870513916,
      "learning_rate": 0.00040871290504655884,
      "loss": 3.0248,
      "step": 88006
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5869485139846802,
      "learning_rate": 0.00040870909250219947,
      "loss": 3.0266,
      "step": 88007
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7867052555084229,
      "learning_rate": 0.00040870527993762894,
      "loss": 3.2058,
      "step": 88008
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7185317277908325,
      "learning_rate": 0.0004087014673528481,
      "loss": 2.9135,
      "step": 88009
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.616617202758789,
      "learning_rate": 0.00040869765474785757,
      "loss": 3.0889,
      "step": 88010
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7512272596359253,
      "learning_rate": 0.00040869384212265813,
      "loss": 3.1072,
      "step": 88011
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6101113557815552,
      "learning_rate": 0.0004086900294772504,
      "loss": 3.0453,
      "step": 88012
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.4969727993011475,
      "learning_rate": 0.0004086862168116352,
      "loss": 3.2532,
      "step": 88013
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.7934494018554688,
      "learning_rate": 0.00040868240412581316,
      "loss": 3.0998,
      "step": 88014
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6091095209121704,
      "learning_rate": 0.00040867859141978505,
      "loss": 3.162,
      "step": 88015
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.4744014739990234,
      "learning_rate": 0.00040867477869355146,
      "loss": 3.1132,
      "step": 88016
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6042901277542114,
      "learning_rate": 0.00040867096594711323,
      "loss": 2.9496,
      "step": 88017
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9009853601455688,
      "learning_rate": 0.000408667153180471,
      "loss": 3.258,
      "step": 88018
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5496571063995361,
      "learning_rate": 0.0004086633403936255,
      "loss": 2.9276,
      "step": 88019
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.816615104675293,
      "learning_rate": 0.0004086595275865774,
      "loss": 3.0025,
      "step": 88020
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.787956714630127,
      "learning_rate": 0.0004086557147593275,
      "loss": 2.9307,
      "step": 88021
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5788276195526123,
      "learning_rate": 0.00040865190191187646,
      "loss": 3.1945,
      "step": 88022
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9066462516784668,
      "learning_rate": 0.0004086480890442249,
      "loss": 3.1933,
      "step": 88023
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5666136741638184,
      "learning_rate": 0.0004086442761563737,
      "loss": 2.9254,
      "step": 88024
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0543622970581055,
      "learning_rate": 0.0004086404632483234,
      "loss": 2.9821,
      "step": 88025
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.607253074645996,
      "learning_rate": 0.00040863665032007485,
      "loss": 3.3536,
      "step": 88026
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.841394305229187,
      "learning_rate": 0.00040863283737162866,
      "loss": 2.9113,
      "step": 88027
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.7492783069610596,
      "learning_rate": 0.0004086290244029855,
      "loss": 2.7964,
      "step": 88028
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5554370880126953,
      "learning_rate": 0.0004086252114141463,
      "loss": 3.0209,
      "step": 88029
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9438896179199219,
      "learning_rate": 0.00040862139840511154,
      "loss": 3.1038,
      "step": 88030
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9108482599258423,
      "learning_rate": 0.00040861758537588205,
      "loss": 2.9779,
      "step": 88031
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6832478046417236,
      "learning_rate": 0.0004086137723264585,
      "loss": 2.805,
      "step": 88032
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8074579238891602,
      "learning_rate": 0.0004086099592568416,
      "loss": 3.1089,
      "step": 88033
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4870145320892334,
      "learning_rate": 0.00040860614616703203,
      "loss": 2.7198,
      "step": 88034
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7415070533752441,
      "learning_rate": 0.0004086023330570305,
      "loss": 2.8801,
      "step": 88035
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1932718753814697,
      "learning_rate": 0.00040859851992683784,
      "loss": 3.0566,
      "step": 88036
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5626507997512817,
      "learning_rate": 0.0004085947067764546,
      "loss": 3.0281,
      "step": 88037
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.4526336193084717,
      "learning_rate": 0.0004085908936058815,
      "loss": 3.0709,
      "step": 88038
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7380090951919556,
      "learning_rate": 0.0004085870804151194,
      "loss": 3.1376,
      "step": 88039
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8653169870376587,
      "learning_rate": 0.00040858326720416885,
      "loss": 2.9771,
      "step": 88040
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8022924661636353,
      "learning_rate": 0.00040857945397303067,
      "loss": 2.839,
      "step": 88041
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.3093771934509277,
      "learning_rate": 0.0004085756407217055,
      "loss": 3.1584,
      "step": 88042
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7451460361480713,
      "learning_rate": 0.0004085718274501941,
      "loss": 3.0358,
      "step": 88043
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5937477350234985,
      "learning_rate": 0.00040856801415849707,
      "loss": 3.0867,
      "step": 88044
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6426994800567627,
      "learning_rate": 0.0004085642008466152,
      "loss": 3.2194,
      "step": 88045
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9692800045013428,
      "learning_rate": 0.0004085603875145493,
      "loss": 3.0315,
      "step": 88046
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.236477851867676,
      "learning_rate": 0.0004085565741622999,
      "loss": 2.9425,
      "step": 88047
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7838091850280762,
      "learning_rate": 0.0004085527607898678,
      "loss": 3.0022,
      "step": 88048
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7470494508743286,
      "learning_rate": 0.0004085489473972537,
      "loss": 2.9985,
      "step": 88049
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7939708232879639,
      "learning_rate": 0.0004085451339844583,
      "loss": 3.1027,
      "step": 88050
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7390565872192383,
      "learning_rate": 0.0004085413205514823,
      "loss": 3.1785,
      "step": 88051
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6954829692840576,
      "learning_rate": 0.00040853750709832643,
      "loss": 2.9359,
      "step": 88052
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0247151851654053,
      "learning_rate": 0.0004085336936249913,
      "loss": 2.9325,
      "step": 88053
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.84994637966156,
      "learning_rate": 0.0004085298801314778,
      "loss": 3.0084,
      "step": 88054
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8338347673416138,
      "learning_rate": 0.0004085260666177866,
      "loss": 3.1911,
      "step": 88055
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.802065134048462,
      "learning_rate": 0.0004085222530839182,
      "loss": 2.9783,
      "step": 88056
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6317654848098755,
      "learning_rate": 0.0004085184395298736,
      "loss": 3.2722,
      "step": 88057
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7325217723846436,
      "learning_rate": 0.0004085146259556534,
      "loss": 3.0998,
      "step": 88058
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8426275253295898,
      "learning_rate": 0.0004085108123612582,
      "loss": 3.0233,
      "step": 88059
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.176658868789673,
      "learning_rate": 0.00040850699874668875,
      "loss": 2.9041,
      "step": 88060
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7461711168289185,
      "learning_rate": 0.00040850318511194596,
      "loss": 2.8627,
      "step": 88061
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.4992001056671143,
      "learning_rate": 0.0004084993714570302,
      "loss": 2.7493,
      "step": 88062
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.7144761085510254,
      "learning_rate": 0.0004084955577819424,
      "loss": 3.2051,
      "step": 88063
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8893098831176758,
      "learning_rate": 0.0004084917440866833,
      "loss": 2.8514,
      "step": 88064
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6619821786880493,
      "learning_rate": 0.0004084879303712535,
      "loss": 2.9226,
      "step": 88065
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6211469173431396,
      "learning_rate": 0.0004084841166356537,
      "loss": 3.0774,
      "step": 88066
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.8749961853027344,
      "learning_rate": 0.0004084803028798847,
      "loss": 3.0875,
      "step": 88067
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.249293804168701,
      "learning_rate": 0.0004084764891039472,
      "loss": 2.8286,
      "step": 88068
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.829325556755066,
      "learning_rate": 0.0004084726753078418,
      "loss": 3.1793,
      "step": 88069
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.4335477352142334,
      "learning_rate": 0.0004084688614915693,
      "loss": 3.0234,
      "step": 88070
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.3321611881256104,
      "learning_rate": 0.00040846504765513044,
      "loss": 2.9059,
      "step": 88071
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7651028633117676,
      "learning_rate": 0.00040846123379852583,
      "loss": 2.7068,
      "step": 88072
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7275761365890503,
      "learning_rate": 0.0004084574199217563,
      "loss": 3.0939,
      "step": 88073
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.8533005714416504,
      "learning_rate": 0.0004084536060248224,
      "loss": 2.9019,
      "step": 88074
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.4086191654205322,
      "learning_rate": 0.00040844979210772497,
      "loss": 2.8565,
      "step": 88075
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9742810726165771,
      "learning_rate": 0.00040844597817046467,
      "loss": 2.7814,
      "step": 88076
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6355727910995483,
      "learning_rate": 0.0004084421642130422,
      "loss": 3.0304,
      "step": 88077
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.7781126499176025,
      "learning_rate": 0.0004084383502354582,
      "loss": 3.0132,
      "step": 88078
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0346579551696777,
      "learning_rate": 0.0004084345362377137,
      "loss": 3.3248,
      "step": 88079
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8351715803146362,
      "learning_rate": 0.00040843072221980896,
      "loss": 3.119,
      "step": 88080
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.531459331512451,
      "learning_rate": 0.000408426908181745,
      "loss": 3.0312,
      "step": 88081
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.8455889225006104,
      "learning_rate": 0.0004084230941235225,
      "loss": 2.7837,
      "step": 88082
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6586239337921143,
      "learning_rate": 0.00040841928004514195,
      "loss": 3.1113,
      "step": 88083
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.064251661300659,
      "learning_rate": 0.0004084154659466042,
      "loss": 3.0891,
      "step": 88084
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.19608211517334,
      "learning_rate": 0.00040841165182791013,
      "loss": 2.9008,
      "step": 88085
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.017394542694092,
      "learning_rate": 0.00040840783768906016,
      "loss": 2.9645,
      "step": 88086
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5696868896484375,
      "learning_rate": 0.00040840402353005514,
      "loss": 3.3104,
      "step": 88087
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.3853554725646973,
      "learning_rate": 0.0004084002093508958,
      "loss": 3.1177,
      "step": 88088
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.737498164176941,
      "learning_rate": 0.0004083963951515828,
      "loss": 3.1535,
      "step": 88089
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9148852825164795,
      "learning_rate": 0.00040839258093211685,
      "loss": 2.9656,
      "step": 88090
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.861932635307312,
      "learning_rate": 0.00040838876669249864,
      "loss": 3.3039,
      "step": 88091
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.539774775505066,
      "learning_rate": 0.000408384952432729,
      "loss": 2.9461,
      "step": 88092
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6814603805541992,
      "learning_rate": 0.00040838113815280856,
      "loss": 3.2497,
      "step": 88093
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.608597993850708,
      "learning_rate": 0.0004083773238527379,
      "loss": 3.0428,
      "step": 88094
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.383310317993164,
      "learning_rate": 0.00040837350953251786,
      "loss": 3.0397,
      "step": 88095
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.3176610469818115,
      "learning_rate": 0.00040836969519214924,
      "loss": 2.9667,
      "step": 88096
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4944422245025635,
      "learning_rate": 0.0004083658808316326,
      "loss": 3.02,
      "step": 88097
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.1958136558532715,
      "learning_rate": 0.00040836206645096876,
      "loss": 2.9546,
      "step": 88098
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9950788021087646,
      "learning_rate": 0.00040835825205015836,
      "loss": 2.8248,
      "step": 88099
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.926306962966919,
      "learning_rate": 0.000408354437629202,
      "loss": 3.0,
      "step": 88100
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1528608798980713,
      "learning_rate": 0.00040835062318810055,
      "loss": 3.0375,
      "step": 88101
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6488858461380005,
      "learning_rate": 0.00040834680872685466,
      "loss": 3.168,
      "step": 88102
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.470547914505005,
      "learning_rate": 0.0004083429942454652,
      "loss": 3.0327,
      "step": 88103
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.3036224842071533,
      "learning_rate": 0.00040833917974393254,
      "loss": 2.8779,
      "step": 88104
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8171905279159546,
      "learning_rate": 0.0004083353652222576,
      "loss": 3.114,
      "step": 88105
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7884012460708618,
      "learning_rate": 0.0004083315506804412,
      "loss": 2.9622,
      "step": 88106
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.280439853668213,
      "learning_rate": 0.0004083277361184838,
      "loss": 2.9491,
      "step": 88107
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.364503264427185,
      "learning_rate": 0.0004083239215363863,
      "loss": 3.1391,
      "step": 88108
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5589061975479126,
      "learning_rate": 0.0004083201069341494,
      "loss": 2.8607,
      "step": 88109
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.45152747631073,
      "learning_rate": 0.0004083162923117737,
      "loss": 3.0232,
      "step": 88110
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.781211495399475,
      "learning_rate": 0.00040831247766925986,
      "loss": 2.9794,
      "step": 88111
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1144068241119385,
      "learning_rate": 0.0004083086630066088,
      "loss": 3.1248,
      "step": 88112
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8441998958587646,
      "learning_rate": 0.00040830484832382103,
      "loss": 3.1182,
      "step": 88113
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.49276864528656,
      "learning_rate": 0.00040830103362089737,
      "loss": 3.0773,
      "step": 88114
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.474418878555298,
      "learning_rate": 0.00040829721889783845,
      "loss": 3.0524,
      "step": 88115
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.290419340133667,
      "learning_rate": 0.0004082934041546452,
      "loss": 3.0622,
      "step": 88116
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9445950984954834,
      "learning_rate": 0.00040828958939131797,
      "loss": 2.9947,
      "step": 88117
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.32332706451416,
      "learning_rate": 0.00040828577460785777,
      "loss": 2.9707,
      "step": 88118
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.343257427215576,
      "learning_rate": 0.00040828195980426523,
      "loss": 2.8733,
      "step": 88119
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6714115142822266,
      "learning_rate": 0.0004082781449805409,
      "loss": 3.1969,
      "step": 88120
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5636732578277588,
      "learning_rate": 0.0004082743301366857,
      "loss": 3.0078,
      "step": 88121
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.121528148651123,
      "learning_rate": 0.00040827051527270034,
      "loss": 2.9784,
      "step": 88122
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.944395899772644,
      "learning_rate": 0.00040826670038858536,
      "loss": 2.867,
      "step": 88123
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5126653909683228,
      "learning_rate": 0.0004082628854843415,
      "loss": 3.1266,
      "step": 88124
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7121676206588745,
      "learning_rate": 0.0004082590705599696,
      "loss": 3.1258,
      "step": 88125
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8487998247146606,
      "learning_rate": 0.0004082552556154703,
      "loss": 2.9326,
      "step": 88126
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.277228832244873,
      "learning_rate": 0.00040825144065084426,
      "loss": 2.846,
      "step": 88127
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.4383108615875244,
      "learning_rate": 0.0004082476256660923,
      "loss": 2.7247,
      "step": 88128
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.664555549621582,
      "learning_rate": 0.000408243810661215,
      "loss": 3.1081,
      "step": 88129
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.5521774291992188,
      "learning_rate": 0.00040823999563621314,
      "loss": 2.9455,
      "step": 88130
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9081608057022095,
      "learning_rate": 0.00040823618059108747,
      "loss": 3.1218,
      "step": 88131
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0507187843322754,
      "learning_rate": 0.0004082323655258386,
      "loss": 3.0247,
      "step": 88132
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.6972038745880127,
      "learning_rate": 0.0004082285504404673,
      "loss": 3.0205,
      "step": 88133
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9561434984207153,
      "learning_rate": 0.0004082247353349743,
      "loss": 2.8659,
      "step": 88134
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.012502431869507,
      "learning_rate": 0.00040822092020936025,
      "loss": 2.907,
      "step": 88135
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.711949110031128,
      "learning_rate": 0.0004082171050636259,
      "loss": 3.0683,
      "step": 88136
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6657037734985352,
      "learning_rate": 0.00040821328989777196,
      "loss": 3.0166,
      "step": 88137
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7650120258331299,
      "learning_rate": 0.0004082094747117991,
      "loss": 3.0776,
      "step": 88138
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0505964756011963,
      "learning_rate": 0.00040820565950570805,
      "loss": 3.0608,
      "step": 88139
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6844885349273682,
      "learning_rate": 0.0004082018442794996,
      "loss": 3.0525,
      "step": 88140
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.538092017173767,
      "learning_rate": 0.00040819802903317427,
      "loss": 2.9636,
      "step": 88141
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6154354810714722,
      "learning_rate": 0.00040819421376673295,
      "loss": 3.1729,
      "step": 88142
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.665611743927002,
      "learning_rate": 0.00040819039848017627,
      "loss": 2.9861,
      "step": 88143
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.680924892425537,
      "learning_rate": 0.000408186583173505,
      "loss": 2.9653,
      "step": 88144
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7993123531341553,
      "learning_rate": 0.00040818276784671974,
      "loss": 2.9735,
      "step": 88145
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.2305850982666016,
      "learning_rate": 0.0004081789524998213,
      "loss": 2.8908,
      "step": 88146
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.2479705810546875,
      "learning_rate": 0.0004081751371328103,
      "loss": 2.8103,
      "step": 88147
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.3561257123947144,
      "learning_rate": 0.0004081713217456875,
      "loss": 2.9578,
      "step": 88148
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.2021028995513916,
      "learning_rate": 0.00040816750633845367,
      "loss": 2.9909,
      "step": 88149
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5672283172607422,
      "learning_rate": 0.0004081636909111094,
      "loss": 3.2165,
      "step": 88150
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6407644748687744,
      "learning_rate": 0.00040815987546365546,
      "loss": 2.9244,
      "step": 88151
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4500752687454224,
      "learning_rate": 0.00040815605999609263,
      "loss": 3.2045,
      "step": 88152
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.081099033355713,
      "learning_rate": 0.0004081522445084215,
      "loss": 2.8135,
      "step": 88153
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.882008671760559,
      "learning_rate": 0.00040814842900064284,
      "loss": 3.2006,
      "step": 88154
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.524212121963501,
      "learning_rate": 0.00040814461347275734,
      "loss": 2.9427,
      "step": 88155
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.2640857696533203,
      "learning_rate": 0.0004081407979247657,
      "loss": 2.8037,
      "step": 88156
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8360710144042969,
      "learning_rate": 0.00040813698235666864,
      "loss": 3.1784,
      "step": 88157
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.855538010597229,
      "learning_rate": 0.0004081331667684669,
      "loss": 2.8848,
      "step": 88158
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.3547492027282715,
      "learning_rate": 0.00040812935116016114,
      "loss": 2.908,
      "step": 88159
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.748172640800476,
      "learning_rate": 0.0004081255355317521,
      "loss": 3.0266,
      "step": 88160
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.3762285709381104,
      "learning_rate": 0.00040812171988324053,
      "loss": 2.9701,
      "step": 88161
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.800427198410034,
      "learning_rate": 0.000408117904214627,
      "loss": 2.9849,
      "step": 88162
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.676751971244812,
      "learning_rate": 0.00040811408852591235,
      "loss": 3.1576,
      "step": 88163
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7663044929504395,
      "learning_rate": 0.0004081102728170973,
      "loss": 2.8972,
      "step": 88164
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.720033884048462,
      "learning_rate": 0.00040810645708818245,
      "loss": 3.212,
      "step": 88165
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4835114479064941,
      "learning_rate": 0.0004081026413391685,
      "loss": 2.883,
      "step": 88166
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6242361068725586,
      "learning_rate": 0.00040809882557005635,
      "loss": 3.1435,
      "step": 88167
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7409131526947021,
      "learning_rate": 0.00040809500978084657,
      "loss": 3.0279,
      "step": 88168
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6965726613998413,
      "learning_rate": 0.0004080911939715399,
      "loss": 3.0077,
      "step": 88169
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6188548803329468,
      "learning_rate": 0.000408087378142137,
      "loss": 2.8411,
      "step": 88170
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6069872379302979,
      "learning_rate": 0.0004080835622926386,
      "loss": 2.9766,
      "step": 88171
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.823714017868042,
      "learning_rate": 0.00040807974642304544,
      "loss": 2.9499,
      "step": 88172
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7152068614959717,
      "learning_rate": 0.00040807593053335825,
      "loss": 2.9461,
      "step": 88173
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5423671007156372,
      "learning_rate": 0.0004080721146235776,
      "loss": 3.0474,
      "step": 88174
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6734575033187866,
      "learning_rate": 0.0004080682986937044,
      "loss": 2.9487,
      "step": 88175
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4207745790481567,
      "learning_rate": 0.0004080644827437392,
      "loss": 3.2173,
      "step": 88176
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9397448301315308,
      "learning_rate": 0.00040806066677368284,
      "loss": 2.9651,
      "step": 88177
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.593346118927002,
      "learning_rate": 0.0004080568507835359,
      "loss": 3.1133,
      "step": 88178
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6767194271087646,
      "learning_rate": 0.00040805303477329917,
      "loss": 2.8567,
      "step": 88179
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.631901502609253,
      "learning_rate": 0.00040804921874297345,
      "loss": 2.8113,
      "step": 88180
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6553871631622314,
      "learning_rate": 0.00040804540269255926,
      "loss": 2.9577,
      "step": 88181
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5333001613616943,
      "learning_rate": 0.00040804158662205735,
      "loss": 2.9349,
      "step": 88182
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5993661880493164,
      "learning_rate": 0.0004080377705314685,
      "loss": 3.0515,
      "step": 88183
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8356225490570068,
      "learning_rate": 0.0004080339544207934,
      "loss": 2.9489,
      "step": 88184
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.523564100265503,
      "learning_rate": 0.0004080301382900327,
      "loss": 3.0583,
      "step": 88185
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8198984861373901,
      "learning_rate": 0.00040802632213918715,
      "loss": 2.94,
      "step": 88186
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5588237047195435,
      "learning_rate": 0.0004080225059682575,
      "loss": 2.9332,
      "step": 88187
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8606892824172974,
      "learning_rate": 0.0004080186897772445,
      "loss": 2.8671,
      "step": 88188
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5575884580612183,
      "learning_rate": 0.0004080148735661487,
      "loss": 3.1557,
      "step": 88189
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8547395467758179,
      "learning_rate": 0.0004080110573349709,
      "loss": 2.8013,
      "step": 88190
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7302461862564087,
      "learning_rate": 0.0004080072410837119,
      "loss": 3.0737,
      "step": 88191
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6845811605453491,
      "learning_rate": 0.0004080034248123722,
      "loss": 2.9706,
      "step": 88192
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7948757410049438,
      "learning_rate": 0.0004079996085209527,
      "loss": 2.8055,
      "step": 88193
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.568806529045105,
      "learning_rate": 0.00040799579220945393,
      "loss": 3.1793,
      "step": 88194
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7541611194610596,
      "learning_rate": 0.00040799197587787684,
      "loss": 3.032,
      "step": 88195
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6091651916503906,
      "learning_rate": 0.00040798815952622196,
      "loss": 3.0258,
      "step": 88196
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8442248106002808,
      "learning_rate": 0.00040798434315449,
      "loss": 2.8939,
      "step": 88197
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.821975827217102,
      "learning_rate": 0.0004079805267626817,
      "loss": 3.0688,
      "step": 88198
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6444615125656128,
      "learning_rate": 0.0004079767103507978,
      "loss": 2.8211,
      "step": 88199
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6485657691955566,
      "learning_rate": 0.000407972893918839,
      "loss": 3.0545,
      "step": 88200
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7365634441375732,
      "learning_rate": 0.000407969077466806,
      "loss": 2.8827,
      "step": 88201
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6584887504577637,
      "learning_rate": 0.00040796526099469953,
      "loss": 3.0704,
      "step": 88202
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7250728607177734,
      "learning_rate": 0.0004079614445025203,
      "loss": 2.9706,
      "step": 88203
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8536577224731445,
      "learning_rate": 0.00040795762799026897,
      "loss": 2.9347,
      "step": 88204
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0439021587371826,
      "learning_rate": 0.00040795381145794626,
      "loss": 3.063,
      "step": 88205
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7470743656158447,
      "learning_rate": 0.00040794999490555286,
      "loss": 3.1251,
      "step": 88206
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6858292818069458,
      "learning_rate": 0.0004079461783330897,
      "loss": 2.8542,
      "step": 88207
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.784950852394104,
      "learning_rate": 0.0004079423617405571,
      "loss": 3.0014,
      "step": 88208
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.560573697090149,
      "learning_rate": 0.000407938545127956,
      "loss": 2.9699,
      "step": 88209
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.000960111618042,
      "learning_rate": 0.00040793472849528725,
      "loss": 3.1572,
      "step": 88210
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.703198194503784,
      "learning_rate": 0.00040793091184255126,
      "loss": 2.7013,
      "step": 88211
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7521653175354004,
      "learning_rate": 0.00040792709516974886,
      "loss": 2.8827,
      "step": 88212
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.4680328369140625,
      "learning_rate": 0.0004079232784768809,
      "loss": 3.148,
      "step": 88213
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.890965223312378,
      "learning_rate": 0.00040791946176394785,
      "loss": 2.8367,
      "step": 88214
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.414743423461914,
      "learning_rate": 0.00040791564503095054,
      "loss": 2.9839,
      "step": 88215
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.036240577697754,
      "learning_rate": 0.0004079118282778898,
      "loss": 2.9659,
      "step": 88216
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.8200888633728027,
      "learning_rate": 0.00040790801150476606,
      "loss": 2.9822,
      "step": 88217
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.270103693008423,
      "learning_rate": 0.0004079041947115803,
      "loss": 2.6758,
      "step": 88218
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.016237258911133,
      "learning_rate": 0.0004079003778983331,
      "loss": 3.0388,
      "step": 88219
    },
    {
      "epoch": 1.15,
      "grad_norm": 6.150376796722412,
      "learning_rate": 0.0004078965610650251,
      "loss": 2.979,
      "step": 88220
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.812852144241333,
      "learning_rate": 0.0004078927442116571,
      "loss": 2.9109,
      "step": 88221
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.8359339237213135,
      "learning_rate": 0.0004078889273382299,
      "loss": 2.892,
      "step": 88222
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1552484035491943,
      "learning_rate": 0.000407885110444744,
      "loss": 3.1068,
      "step": 88223
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.1916868686676025,
      "learning_rate": 0.0004078812935312003,
      "loss": 3.1558,
      "step": 88224
    },
    {
      "epoch": 1.15,
      "grad_norm": 4.106796741485596,
      "learning_rate": 0.00040787747659759945,
      "loss": 2.764,
      "step": 88225
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.4047765731811523,
      "learning_rate": 0.000407873659643942,
      "loss": 2.8847,
      "step": 88226
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9180231094360352,
      "learning_rate": 0.00040786984267022895,
      "loss": 3.0509,
      "step": 88227
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6265039443969727,
      "learning_rate": 0.0004078660256764608,
      "loss": 2.9524,
      "step": 88228
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.7476162910461426,
      "learning_rate": 0.0004078622086626383,
      "loss": 2.8026,
      "step": 88229
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.6753573417663574,
      "learning_rate": 0.0004078583916287622,
      "loss": 3.0594,
      "step": 88230
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9089832305908203,
      "learning_rate": 0.0004078545745748332,
      "loss": 3.1702,
      "step": 88231
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7101943492889404,
      "learning_rate": 0.000407850757500852,
      "loss": 2.8692,
      "step": 88232
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9086604118347168,
      "learning_rate": 0.00040784694040681927,
      "loss": 3.0824,
      "step": 88233
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.747119665145874,
      "learning_rate": 0.00040784312329273585,
      "loss": 3.1951,
      "step": 88234
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8307499885559082,
      "learning_rate": 0.0004078393061586023,
      "loss": 3.2916,
      "step": 88235
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.504526972770691,
      "learning_rate": 0.0004078354890044193,
      "loss": 3.1276,
      "step": 88236
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9281361103057861,
      "learning_rate": 0.0004078316718301878,
      "loss": 2.9643,
      "step": 88237
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0731873512268066,
      "learning_rate": 0.00040782785463590825,
      "loss": 3.3586,
      "step": 88238
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8645503520965576,
      "learning_rate": 0.00040782403742158145,
      "loss": 2.9078,
      "step": 88239
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.3330533504486084,
      "learning_rate": 0.0004078202201872083,
      "loss": 2.7359,
      "step": 88240
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.465303659439087,
      "learning_rate": 0.0004078164029327892,
      "loss": 3.0218,
      "step": 88241
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6393803358078003,
      "learning_rate": 0.0004078125856583249,
      "loss": 3.1846,
      "step": 88242
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.534491777420044,
      "learning_rate": 0.00040780876836381643,
      "loss": 2.9684,
      "step": 88243
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6508865356445312,
      "learning_rate": 0.00040780495104926414,
      "loss": 2.9484,
      "step": 88244
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1474642753601074,
      "learning_rate": 0.00040780113371466885,
      "loss": 3.0036,
      "step": 88245
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8075758218765259,
      "learning_rate": 0.0004077973163600313,
      "loss": 2.998,
      "step": 88246
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7941533327102661,
      "learning_rate": 0.00040779349898535227,
      "loss": 2.9211,
      "step": 88247
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.4222753047943115,
      "learning_rate": 0.00040778968159063233,
      "loss": 3.1772,
      "step": 88248
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6570875644683838,
      "learning_rate": 0.00040778586417587233,
      "loss": 3.0182,
      "step": 88249
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.659228801727295,
      "learning_rate": 0.00040778204674107285,
      "loss": 3.1113,
      "step": 88250
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.6582672595977783,
      "learning_rate": 0.0004077782292862346,
      "loss": 2.965,
      "step": 88251
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.666750431060791,
      "learning_rate": 0.0004077744118113584,
      "loss": 3.0729,
      "step": 88252
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4688299894332886,
      "learning_rate": 0.00040777059431644497,
      "loss": 2.8474,
      "step": 88253
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6961206197738647,
      "learning_rate": 0.00040776677680149487,
      "loss": 3.1794,
      "step": 88254
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.859290361404419,
      "learning_rate": 0.0004077629592665089,
      "loss": 3.0188,
      "step": 88255
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9265161752700806,
      "learning_rate": 0.00040775914171148776,
      "loss": 3.193,
      "step": 88256
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1437437534332275,
      "learning_rate": 0.00040775532413643215,
      "loss": 3.1196,
      "step": 88257
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6959954500198364,
      "learning_rate": 0.00040775150654134276,
      "loss": 3.2779,
      "step": 88258
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.378403663635254,
      "learning_rate": 0.00040774768892622046,
      "loss": 3.1562,
      "step": 88259
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.259061574935913,
      "learning_rate": 0.00040774387129106573,
      "loss": 3.3599,
      "step": 88260
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.533432126045227,
      "learning_rate": 0.00040774005363587945,
      "loss": 3.2046,
      "step": 88261
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9339478015899658,
      "learning_rate": 0.0004077362359606622,
      "loss": 3.0955,
      "step": 88262
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5999789237976074,
      "learning_rate": 0.0004077324182654147,
      "loss": 2.6877,
      "step": 88263
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.072866916656494,
      "learning_rate": 0.0004077286005501378,
      "loss": 2.7559,
      "step": 88264
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8795291185379028,
      "learning_rate": 0.00040772478281483215,
      "loss": 2.6992,
      "step": 88265
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7934871912002563,
      "learning_rate": 0.00040772096505949833,
      "loss": 2.9359,
      "step": 88266
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7006844282150269,
      "learning_rate": 0.0004077171472841372,
      "loss": 2.7982,
      "step": 88267
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6992237567901611,
      "learning_rate": 0.0004077133294887493,
      "loss": 2.9013,
      "step": 88268
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.752943515777588,
      "learning_rate": 0.00040770951167333567,
      "loss": 3.0383,
      "step": 88269
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6170358657836914,
      "learning_rate": 0.00040770569383789666,
      "loss": 2.8696,
      "step": 88270
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0342133045196533,
      "learning_rate": 0.00040770187598243313,
      "loss": 3.3027,
      "step": 88271
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.096195697784424,
      "learning_rate": 0.0004076980581069459,
      "loss": 3.0314,
      "step": 88272
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8625047206878662,
      "learning_rate": 0.0004076942402114355,
      "loss": 3.1472,
      "step": 88273
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7053860425949097,
      "learning_rate": 0.00040769042229590267,
      "loss": 3.0451,
      "step": 88274
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.636226177215576,
      "learning_rate": 0.00040768660436034816,
      "loss": 2.7916,
      "step": 88275
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6675103902816772,
      "learning_rate": 0.0004076827864047728,
      "loss": 2.9444,
      "step": 88276
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7690067291259766,
      "learning_rate": 0.00040767896842917704,
      "loss": 3.0413,
      "step": 88277
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.2823195457458496,
      "learning_rate": 0.0004076751504335618,
      "loss": 2.9359,
      "step": 88278
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6949084997177124,
      "learning_rate": 0.0004076713324179276,
      "loss": 2.8198,
      "step": 88279
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7543212175369263,
      "learning_rate": 0.00040766751438227545,
      "loss": 2.9318,
      "step": 88280
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6854690313339233,
      "learning_rate": 0.00040766369632660575,
      "loss": 3.1113,
      "step": 88281
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.649777889251709,
      "learning_rate": 0.0004076598782509194,
      "loss": 3.0476,
      "step": 88282
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8480980396270752,
      "learning_rate": 0.000407656060155217,
      "loss": 2.7303,
      "step": 88283
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6915873289108276,
      "learning_rate": 0.00040765224203949933,
      "loss": 3.168,
      "step": 88284
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1261096000671387,
      "learning_rate": 0.0004076484239037671,
      "loss": 3.256,
      "step": 88285
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.4717867374420166,
      "learning_rate": 0.000407644605748021,
      "loss": 2.8717,
      "step": 88286
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5734155178070068,
      "learning_rate": 0.0004076407875722617,
      "loss": 2.912,
      "step": 88287
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8532103300094604,
      "learning_rate": 0.00040763696937648995,
      "loss": 3.2606,
      "step": 88288
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6698126792907715,
      "learning_rate": 0.0004076331511607065,
      "loss": 3.2451,
      "step": 88289
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.513617992401123,
      "learning_rate": 0.00040762933292491193,
      "loss": 3.1948,
      "step": 88290
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.761147379875183,
      "learning_rate": 0.0004076255146691071,
      "loss": 3.1094,
      "step": 88291
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6826977729797363,
      "learning_rate": 0.00040762169639329263,
      "loss": 3.0851,
      "step": 88292
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.70722234249115,
      "learning_rate": 0.00040761787809746923,
      "loss": 2.9713,
      "step": 88293
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8637200593948364,
      "learning_rate": 0.00040761405978163764,
      "loss": 2.9851,
      "step": 88294
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.886749267578125,
      "learning_rate": 0.00040761024144579867,
      "loss": 2.9509,
      "step": 88295
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.710681676864624,
      "learning_rate": 0.0004076064230899528,
      "loss": 2.8625,
      "step": 88296
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7334576845169067,
      "learning_rate": 0.0004076026047141009,
      "loss": 3.0596,
      "step": 88297
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6974891424179077,
      "learning_rate": 0.00040759878631824365,
      "loss": 3.0982,
      "step": 88298
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6880146265029907,
      "learning_rate": 0.0004075949679023818,
      "loss": 3.0795,
      "step": 88299
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.478132724761963,
      "learning_rate": 0.0004075911494665159,
      "loss": 3.0227,
      "step": 88300
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6458595991134644,
      "learning_rate": 0.0004075873310106469,
      "loss": 3.1068,
      "step": 88301
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.376847267150879,
      "learning_rate": 0.0004075835125347753,
      "loss": 2.8777,
      "step": 88302
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5556308031082153,
      "learning_rate": 0.0004075796940389019,
      "loss": 2.8271,
      "step": 88303
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9549598693847656,
      "learning_rate": 0.0004075758755230275,
      "loss": 2.8413,
      "step": 88304
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7255247831344604,
      "learning_rate": 0.00040757205698715266,
      "loss": 2.8507,
      "step": 88305
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9397326707839966,
      "learning_rate": 0.00040756823843127806,
      "loss": 2.812,
      "step": 88306
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.5066585540771484,
      "learning_rate": 0.0004075644198554046,
      "loss": 3.0101,
      "step": 88307
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.644266963005066,
      "learning_rate": 0.0004075606012595328,
      "loss": 3.1414,
      "step": 88308
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.719169020652771,
      "learning_rate": 0.00040755678264366347,
      "loss": 3.1772,
      "step": 88309
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.616826295852661,
      "learning_rate": 0.00040755296400779735,
      "loss": 2.772,
      "step": 88310
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7721812725067139,
      "learning_rate": 0.00040754914535193513,
      "loss": 2.9866,
      "step": 88311
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5689618587493896,
      "learning_rate": 0.0004075453266760774,
      "loss": 2.8754,
      "step": 88312
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0091357231140137,
      "learning_rate": 0.0004075415079802251,
      "loss": 3.1288,
      "step": 88313
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.775881290435791,
      "learning_rate": 0.00040753768926437867,
      "loss": 3.0624,
      "step": 88314
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6129217147827148,
      "learning_rate": 0.000407533870528539,
      "loss": 3.0727,
      "step": 88315
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.4703309535980225,
      "learning_rate": 0.0004075300517727067,
      "loss": 3.0605,
      "step": 88316
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.076209783554077,
      "learning_rate": 0.00040752623299688265,
      "loss": 2.8441,
      "step": 88317
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.908904790878296,
      "learning_rate": 0.0004075224142010674,
      "loss": 2.9084,
      "step": 88318
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6229513883590698,
      "learning_rate": 0.00040751859538526165,
      "loss": 3.136,
      "step": 88319
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.6466007232666016,
      "learning_rate": 0.00040751477654946627,
      "loss": 2.9187,
      "step": 88320
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8499419689178467,
      "learning_rate": 0.0004075109576936817,
      "loss": 3.2231,
      "step": 88321
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8458155393600464,
      "learning_rate": 0.0004075071388179089,
      "loss": 2.8583,
      "step": 88322
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8052836656570435,
      "learning_rate": 0.0004075033199221486,
      "loss": 2.9746,
      "step": 88323
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.180293560028076,
      "learning_rate": 0.00040749950100640127,
      "loss": 3.0827,
      "step": 88324
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7289153337478638,
      "learning_rate": 0.00040749568207066776,
      "loss": 2.7854,
      "step": 88325
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.9256181716918945,
      "learning_rate": 0.0004074918631149488,
      "loss": 2.9413,
      "step": 88326
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.116908550262451,
      "learning_rate": 0.0004074880441392451,
      "loss": 3.0112,
      "step": 88327
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.067450523376465,
      "learning_rate": 0.0004074842251435573,
      "loss": 2.8494,
      "step": 88328
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.4555466175079346,
      "learning_rate": 0.0004074804061278862,
      "loss": 2.9977,
      "step": 88329
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.158595561981201,
      "learning_rate": 0.0004074765870922324,
      "loss": 2.926,
      "step": 88330
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5051114559173584,
      "learning_rate": 0.0004074727680365967,
      "loss": 2.8384,
      "step": 88331
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5459647178649902,
      "learning_rate": 0.00040746894896097986,
      "loss": 2.9905,
      "step": 88332
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.179569959640503,
      "learning_rate": 0.0004074651298653824,
      "loss": 3.2035,
      "step": 88333
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1557068824768066,
      "learning_rate": 0.00040746131074980514,
      "loss": 2.7269,
      "step": 88334
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7892423868179321,
      "learning_rate": 0.0004074574916142489,
      "loss": 3.1996,
      "step": 88335
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9019927978515625,
      "learning_rate": 0.00040745367245871426,
      "loss": 2.7069,
      "step": 88336
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.019090414047241,
      "learning_rate": 0.0004074498532832019,
      "loss": 3.0036,
      "step": 88337
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7275456190109253,
      "learning_rate": 0.00040744603408771263,
      "loss": 2.8121,
      "step": 88338
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.4854354858398438,
      "learning_rate": 0.00040744221487224705,
      "loss": 2.9095,
      "step": 88339
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.616420269012451,
      "learning_rate": 0.00040743839563680596,
      "loss": 2.9884,
      "step": 88340
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6460938453674316,
      "learning_rate": 0.0004074345763813901,
      "loss": 3.1652,
      "step": 88341
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7243341207504272,
      "learning_rate": 0.0004074307571060001,
      "loss": 3.1506,
      "step": 88342
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.388371229171753,
      "learning_rate": 0.00040742693781063667,
      "loss": 2.8932,
      "step": 88343
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5850919485092163,
      "learning_rate": 0.0004074231184953005,
      "loss": 2.6538,
      "step": 88344
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9918231964111328,
      "learning_rate": 0.0004074192991599924,
      "loss": 2.9775,
      "step": 88345
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.054807186126709,
      "learning_rate": 0.000407415479804713,
      "loss": 2.777,
      "step": 88346
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.401320219039917,
      "learning_rate": 0.0004074116604294631,
      "loss": 3.3039,
      "step": 88347
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8452309370040894,
      "learning_rate": 0.00040740784103424326,
      "loss": 3.0851,
      "step": 88348
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.363248586654663,
      "learning_rate": 0.0004074040216190543,
      "loss": 3.1836,
      "step": 88349
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.116905450820923,
      "learning_rate": 0.00040740020218389694,
      "loss": 3.0737,
      "step": 88350
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6614147424697876,
      "learning_rate": 0.0004073963827287718,
      "loss": 3.297,
      "step": 88351
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7255278825759888,
      "learning_rate": 0.0004073925632536797,
      "loss": 2.9369,
      "step": 88352
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.8073761463165283,
      "learning_rate": 0.0004073887437586213,
      "loss": 3.2209,
      "step": 88353
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.276951313018799,
      "learning_rate": 0.0004073849242435973,
      "loss": 3.0388,
      "step": 88354
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.599036455154419,
      "learning_rate": 0.0004073811047086084,
      "loss": 3.1621,
      "step": 88355
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9003015756607056,
      "learning_rate": 0.00040737728515365525,
      "loss": 3.0373,
      "step": 88356
    },
    {
      "epoch": 1.15,
      "grad_norm": 5.155853271484375,
      "learning_rate": 0.0004073734655787388,
      "loss": 2.7364,
      "step": 88357
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5809930562973022,
      "learning_rate": 0.0004073696459838595,
      "loss": 2.87,
      "step": 88358
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.760043740272522,
      "learning_rate": 0.0004073658263690181,
      "loss": 2.8566,
      "step": 88359
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5700280666351318,
      "learning_rate": 0.0004073620067342154,
      "loss": 3.0761,
      "step": 88360
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.797209620475769,
      "learning_rate": 0.00040735818707945216,
      "loss": 3.0872,
      "step": 88361
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7976542711257935,
      "learning_rate": 0.00040735436740472894,
      "loss": 2.8995,
      "step": 88362
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.869937777519226,
      "learning_rate": 0.00040735054771004643,
      "loss": 3.0995,
      "step": 88363
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7311893701553345,
      "learning_rate": 0.0004073467279954056,
      "loss": 2.9987,
      "step": 88364
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9067795276641846,
      "learning_rate": 0.0004073429082608069,
      "loss": 3.052,
      "step": 88365
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6943219900131226,
      "learning_rate": 0.0004073390885062511,
      "loss": 3.1485,
      "step": 88366
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1246209144592285,
      "learning_rate": 0.00040733526873173894,
      "loss": 2.7583,
      "step": 88367
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.558555006980896,
      "learning_rate": 0.00040733144893727124,
      "loss": 2.9671,
      "step": 88368
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5473076105117798,
      "learning_rate": 0.0004073276291228485,
      "loss": 2.9929,
      "step": 88369
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.023123025894165,
      "learning_rate": 0.00040732380928847154,
      "loss": 3.1095,
      "step": 88370
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4817508459091187,
      "learning_rate": 0.00040731998943414107,
      "loss": 3.0284,
      "step": 88371
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6770559549331665,
      "learning_rate": 0.0004073161695598578,
      "loss": 3.0149,
      "step": 88372
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6371660232543945,
      "learning_rate": 0.00040731234966562236,
      "loss": 3.2046,
      "step": 88373
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6691588163375854,
      "learning_rate": 0.00040730852975143564,
      "loss": 2.9624,
      "step": 88374
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.426555633544922,
      "learning_rate": 0.0004073047098172982,
      "loss": 2.9071,
      "step": 88375
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.7463908195495605,
      "learning_rate": 0.0004073008898632107,
      "loss": 2.8955,
      "step": 88376
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8784903287887573,
      "learning_rate": 0.000407297069889174,
      "loss": 2.8759,
      "step": 88377
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5448189973831177,
      "learning_rate": 0.0004072932498951888,
      "loss": 3.0141,
      "step": 88378
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7544922828674316,
      "learning_rate": 0.00040728942988125567,
      "loss": 2.9107,
      "step": 88379
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.469602346420288,
      "learning_rate": 0.0004072856098473754,
      "loss": 2.9225,
      "step": 88380
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8101856708526611,
      "learning_rate": 0.0004072817897935488,
      "loss": 3.0363,
      "step": 88381
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9276790618896484,
      "learning_rate": 0.0004072779697197765,
      "loss": 2.8052,
      "step": 88382
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5308864116668701,
      "learning_rate": 0.0004072741496260591,
      "loss": 2.9124,
      "step": 88383
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5954346656799316,
      "learning_rate": 0.0004072703295123975,
      "loss": 3.0977,
      "step": 88384
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8201160430908203,
      "learning_rate": 0.0004072665093787923,
      "loss": 2.917,
      "step": 88385
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6067582368850708,
      "learning_rate": 0.0004072626892252442,
      "loss": 3.0537,
      "step": 88386
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7761613130569458,
      "learning_rate": 0.000407258869051754,
      "loss": 3.0054,
      "step": 88387
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5249983072280884,
      "learning_rate": 0.0004072550488583223,
      "loss": 2.7242,
      "step": 88388
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6037217378616333,
      "learning_rate": 0.00040725122864494984,
      "loss": 3.0427,
      "step": 88389
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9542350769042969,
      "learning_rate": 0.00040724740841163737,
      "loss": 2.9767,
      "step": 88390
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7013026475906372,
      "learning_rate": 0.0004072435881583856,
      "loss": 2.8573,
      "step": 88391
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7785558700561523,
      "learning_rate": 0.00040723976788519526,
      "loss": 2.9577,
      "step": 88392
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.427404761314392,
      "learning_rate": 0.000407235947592067,
      "loss": 3.0176,
      "step": 88393
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6007401943206787,
      "learning_rate": 0.0004072321272790015,
      "loss": 3.1058,
      "step": 88394
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5188076496124268,
      "learning_rate": 0.0004072283069459995,
      "loss": 3.1512,
      "step": 88395
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1933233737945557,
      "learning_rate": 0.0004072244865930619,
      "loss": 3.3449,
      "step": 88396
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0654923915863037,
      "learning_rate": 0.0004072206662201891,
      "loss": 3.0204,
      "step": 88397
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.027409553527832,
      "learning_rate": 0.000407216845827382,
      "loss": 2.9826,
      "step": 88398
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.869508981704712,
      "learning_rate": 0.0004072130254146412,
      "loss": 3.1746,
      "step": 88399
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.6867332458496094,
      "learning_rate": 0.00040720920498196755,
      "loss": 3.3533,
      "step": 88400
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7711381912231445,
      "learning_rate": 0.00040720538452936164,
      "loss": 3.0117,
      "step": 88401
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8673080205917358,
      "learning_rate": 0.00040720156405682435,
      "loss": 3.2252,
      "step": 88402
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.3594346046447754,
      "learning_rate": 0.00040719774356435607,
      "loss": 2.9186,
      "step": 88403
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5678904056549072,
      "learning_rate": 0.00040719392305195776,
      "loss": 3.2152,
      "step": 88404
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4486662149429321,
      "learning_rate": 0.0004071901025196302,
      "loss": 3.1041,
      "step": 88405
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6849509477615356,
      "learning_rate": 0.0004071862819673739,
      "loss": 2.8887,
      "step": 88406
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5483543872833252,
      "learning_rate": 0.0004071824613951896,
      "loss": 3.0524,
      "step": 88407
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.656808853149414,
      "learning_rate": 0.00040717864080307815,
      "loss": 3.2095,
      "step": 88408
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4794756174087524,
      "learning_rate": 0.00040717482019104014,
      "loss": 3.0368,
      "step": 88409
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.2408506870269775,
      "learning_rate": 0.0004071709995590762,
      "loss": 2.9367,
      "step": 88410
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6808034181594849,
      "learning_rate": 0.00040716717890718724,
      "loss": 3.0866,
      "step": 88411
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6013615131378174,
      "learning_rate": 0.0004071633582353739,
      "loss": 3.0556,
      "step": 88412
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8699321746826172,
      "learning_rate": 0.0004071595375436368,
      "loss": 2.9722,
      "step": 88413
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8158867359161377,
      "learning_rate": 0.00040715571683197683,
      "loss": 2.9472,
      "step": 88414
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0373308658599854,
      "learning_rate": 0.00040715189610039454,
      "loss": 3.048,
      "step": 88415
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9034970998764038,
      "learning_rate": 0.0004071480753488906,
      "loss": 3.06,
      "step": 88416
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0269601345062256,
      "learning_rate": 0.0004071442545774659,
      "loss": 3.0348,
      "step": 88417
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0377233028411865,
      "learning_rate": 0.000407140433786121,
      "loss": 2.7575,
      "step": 88418
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7469269037246704,
      "learning_rate": 0.00040713661297485674,
      "loss": 3.0707,
      "step": 88419
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.552302360534668,
      "learning_rate": 0.0004071327921436738,
      "loss": 2.8797,
      "step": 88420
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7614045143127441,
      "learning_rate": 0.0004071289712925727,
      "loss": 3.2266,
      "step": 88421
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.3779518604278564,
      "learning_rate": 0.0004071251504215544,
      "loss": 3.1752,
      "step": 88422
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0415778160095215,
      "learning_rate": 0.0004071213295306195,
      "loss": 3.003,
      "step": 88423
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6512305736541748,
      "learning_rate": 0.0004071175086197687,
      "loss": 2.7795,
      "step": 88424
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.6416354179382324,
      "learning_rate": 0.00040711368768900277,
      "loss": 3.0034,
      "step": 88425
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9085592031478882,
      "learning_rate": 0.0004071098667383224,
      "loss": 2.9845,
      "step": 88426
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.9353199005126953,
      "learning_rate": 0.0004071060457677282,
      "loss": 2.9783,
      "step": 88427
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9882115125656128,
      "learning_rate": 0.000407102224777221,
      "loss": 2.9151,
      "step": 88428
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7567824125289917,
      "learning_rate": 0.00040709840376680157,
      "loss": 2.8808,
      "step": 88429
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7371894121170044,
      "learning_rate": 0.0004070945827364704,
      "loss": 3.0377,
      "step": 88430
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.3168461322784424,
      "learning_rate": 0.00040709076168622834,
      "loss": 3.0106,
      "step": 88431
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8230534791946411,
      "learning_rate": 0.00040708694061607616,
      "loss": 3.0549,
      "step": 88432
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5932832956314087,
      "learning_rate": 0.00040708311952601444,
      "loss": 3.0119,
      "step": 88433
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.285080671310425,
      "learning_rate": 0.0004070792984160439,
      "loss": 2.9489,
      "step": 88434
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0505118370056152,
      "learning_rate": 0.00040707547728616547,
      "loss": 3.2788,
      "step": 88435
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.75501549243927,
      "learning_rate": 0.00040707165613637956,
      "loss": 3.1853,
      "step": 88436
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.7347464561462402,
      "learning_rate": 0.00040706783496668693,
      "loss": 2.9108,
      "step": 88437
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.4303994178771973,
      "learning_rate": 0.0004070640137770885,
      "loss": 3.0915,
      "step": 88438
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.388526439666748,
      "learning_rate": 0.00040706019256758487,
      "loss": 2.9976,
      "step": 88439
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.102769136428833,
      "learning_rate": 0.0004070563713381766,
      "loss": 2.9109,
      "step": 88440
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.29465651512146,
      "learning_rate": 0.0004070525500888646,
      "loss": 2.9046,
      "step": 88441
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.2767558097839355,
      "learning_rate": 0.0004070487288196495,
      "loss": 3.0504,
      "step": 88442
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6632298231124878,
      "learning_rate": 0.0004070449075305321,
      "loss": 3.0364,
      "step": 88443
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.7313458919525146,
      "learning_rate": 0.0004070410862215129,
      "loss": 2.7597,
      "step": 88444
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0743980407714844,
      "learning_rate": 0.0004070372648925928,
      "loss": 3.0038,
      "step": 88445
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7187869548797607,
      "learning_rate": 0.0004070334435437724,
      "loss": 3.1285,
      "step": 88446
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5834206342697144,
      "learning_rate": 0.0004070296221750526,
      "loss": 3.1401,
      "step": 88447
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.618517518043518,
      "learning_rate": 0.00040702580078643384,
      "loss": 2.9206,
      "step": 88448
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.523617148399353,
      "learning_rate": 0.000407021979377917,
      "loss": 2.9195,
      "step": 88449
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7836406230926514,
      "learning_rate": 0.00040701815794950275,
      "loss": 3.0434,
      "step": 88450
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8602025508880615,
      "learning_rate": 0.00040701433650119185,
      "loss": 2.7245,
      "step": 88451
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.137014150619507,
      "learning_rate": 0.000407010515032985,
      "loss": 2.9296,
      "step": 88452
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.3028676509857178,
      "learning_rate": 0.0004070066935448827,
      "loss": 2.8723,
      "step": 88453
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.266005277633667,
      "learning_rate": 0.00040700287203688596,
      "loss": 3.0001,
      "step": 88454
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.579774022102356,
      "learning_rate": 0.0004069990505089953,
      "loss": 2.9794,
      "step": 88455
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1025302410125732,
      "learning_rate": 0.0004069952289612116,
      "loss": 2.8926,
      "step": 88456
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.348578691482544,
      "learning_rate": 0.0004069914073935354,
      "loss": 3.1754,
      "step": 88457
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8822009563446045,
      "learning_rate": 0.00040698758580596747,
      "loss": 2.733,
      "step": 88458
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.607048511505127,
      "learning_rate": 0.00040698376419850854,
      "loss": 2.8995,
      "step": 88459
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.8825509548187256,
      "learning_rate": 0.00040697994257115944,
      "loss": 3.0342,
      "step": 88460
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8611376285552979,
      "learning_rate": 0.00040697612092392054,
      "loss": 2.9499,
      "step": 88461
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7168198823928833,
      "learning_rate": 0.00040697229925679287,
      "loss": 2.7132,
      "step": 88462
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.1098568439483643,
      "learning_rate": 0.00040696847756977703,
      "loss": 2.8876,
      "step": 88463
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8552799224853516,
      "learning_rate": 0.00040696465586287367,
      "loss": 2.9823,
      "step": 88464
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.456691026687622,
      "learning_rate": 0.0004069608341360836,
      "loss": 2.8555,
      "step": 88465
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.044477939605713,
      "learning_rate": 0.00040695701238940753,
      "loss": 3.145,
      "step": 88466
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1763052940368652,
      "learning_rate": 0.0004069531906228461,
      "loss": 2.9358,
      "step": 88467
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8442407846450806,
      "learning_rate": 0.0004069493688364,
      "loss": 3.0115,
      "step": 88468
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6831530332565308,
      "learning_rate": 0.00040694554703007014,
      "loss": 2.9737,
      "step": 88469
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9531797170639038,
      "learning_rate": 0.00040694172520385695,
      "loss": 2.8534,
      "step": 88470
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9162894487380981,
      "learning_rate": 0.00040693790335776127,
      "loss": 3.0665,
      "step": 88471
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.729984998703003,
      "learning_rate": 0.0004069340814917839,
      "loss": 2.8386,
      "step": 88472
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.852586030960083,
      "learning_rate": 0.0004069302596059254,
      "loss": 3.2754,
      "step": 88473
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.411085844039917,
      "learning_rate": 0.0004069264377001866,
      "loss": 2.9206,
      "step": 88474
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.67818284034729,
      "learning_rate": 0.00040692261577456815,
      "loss": 2.8358,
      "step": 88475
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8184579610824585,
      "learning_rate": 0.0004069187938290707,
      "loss": 2.9554,
      "step": 88476
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.609002947807312,
      "learning_rate": 0.00040691497186369507,
      "loss": 2.8236,
      "step": 88477
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6664211750030518,
      "learning_rate": 0.0004069111498784419,
      "loss": 3.1272,
      "step": 88478
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7003591060638428,
      "learning_rate": 0.00040690732787331197,
      "loss": 2.9241,
      "step": 88479
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.418994665145874,
      "learning_rate": 0.0004069035058483059,
      "loss": 2.9316,
      "step": 88480
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8244010210037231,
      "learning_rate": 0.0004068996838034246,
      "loss": 3.2124,
      "step": 88481
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.4983227252960205,
      "learning_rate": 0.00040689586173866843,
      "loss": 3.12,
      "step": 88482
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7615288496017456,
      "learning_rate": 0.00040689203965403836,
      "loss": 2.9871,
      "step": 88483
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.347137212753296,
      "learning_rate": 0.00040688821754953513,
      "loss": 2.9426,
      "step": 88484
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9768768548965454,
      "learning_rate": 0.0004068843954251593,
      "loss": 3.5605,
      "step": 88485
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1943199634552,
      "learning_rate": 0.00040688057328091165,
      "loss": 2.9786,
      "step": 88486
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4414063692092896,
      "learning_rate": 0.0004068767511167929,
      "loss": 2.9034,
      "step": 88487
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9685916900634766,
      "learning_rate": 0.00040687292893280374,
      "loss": 2.9992,
      "step": 88488
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7894829511642456,
      "learning_rate": 0.0004068691067289449,
      "loss": 2.8229,
      "step": 88489
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7531601190567017,
      "learning_rate": 0.000406865284505217,
      "loss": 2.9782,
      "step": 88490
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4256296157836914,
      "learning_rate": 0.0004068614622616209,
      "loss": 3.1538,
      "step": 88491
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.803357720375061,
      "learning_rate": 0.00040685763999815717,
      "loss": 3.0168,
      "step": 88492
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.42701256275177,
      "learning_rate": 0.00040685381771482667,
      "loss": 3.0304,
      "step": 88493
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.70712411403656,
      "learning_rate": 0.00040684999541162995,
      "loss": 3.097,
      "step": 88494
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0946547985076904,
      "learning_rate": 0.0004068461730885678,
      "loss": 3.1738,
      "step": 88495
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7656649351119995,
      "learning_rate": 0.00040684235074564106,
      "loss": 3.1659,
      "step": 88496
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.807676911354065,
      "learning_rate": 0.0004068385283828502,
      "loss": 3.1338,
      "step": 88497
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6597535610198975,
      "learning_rate": 0.00040683470600019604,
      "loss": 3.034,
      "step": 88498
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6157721281051636,
      "learning_rate": 0.00040683088359767935,
      "loss": 3.1194,
      "step": 88499
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9687888622283936,
      "learning_rate": 0.00040682706117530073,
      "loss": 3.0005,
      "step": 88500
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8796663284301758,
      "learning_rate": 0.00040682323873306094,
      "loss": 2.7127,
      "step": 88501
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0475165843963623,
      "learning_rate": 0.0004068194162709607,
      "loss": 2.9006,
      "step": 88502
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.78346848487854,
      "learning_rate": 0.0004068155937890007,
      "loss": 2.9999,
      "step": 88503
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.851061224937439,
      "learning_rate": 0.0004068117712871817,
      "loss": 2.8906,
      "step": 88504
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.2619755268096924,
      "learning_rate": 0.0004068079487655044,
      "loss": 2.9908,
      "step": 88505
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5477533340454102,
      "learning_rate": 0.0004068041262239694,
      "loss": 3.1898,
      "step": 88506
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.917923927307129,
      "learning_rate": 0.0004068003036625775,
      "loss": 3.0828,
      "step": 88507
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8629809617996216,
      "learning_rate": 0.0004067964810813295,
      "loss": 3.2531,
      "step": 88508
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.723882794380188,
      "learning_rate": 0.00040679265848022597,
      "loss": 3.2221,
      "step": 88509
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5029906034469604,
      "learning_rate": 0.00040678883585926765,
      "loss": 2.8802,
      "step": 88510
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7097336053848267,
      "learning_rate": 0.00040678501321845536,
      "loss": 3.0173,
      "step": 88511
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.058732032775879,
      "learning_rate": 0.0004067811905577896,
      "loss": 2.9749,
      "step": 88512
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.2327120304107666,
      "learning_rate": 0.0004067773678772712,
      "loss": 2.7997,
      "step": 88513
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.184429407119751,
      "learning_rate": 0.000406773545176901,
      "loss": 2.7893,
      "step": 88514
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.649752378463745,
      "learning_rate": 0.0004067697224566795,
      "loss": 2.8358,
      "step": 88515
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7064052820205688,
      "learning_rate": 0.00040676589971660744,
      "loss": 2.9889,
      "step": 88516
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6774723529815674,
      "learning_rate": 0.0004067620769566856,
      "loss": 2.8499,
      "step": 88517
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6534878015518188,
      "learning_rate": 0.0004067582541769148,
      "loss": 3.1432,
      "step": 88518
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6143909692764282,
      "learning_rate": 0.0004067544313772955,
      "loss": 2.8983,
      "step": 88519
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.8303165435791016,
      "learning_rate": 0.0004067506085578286,
      "loss": 3.0661,
      "step": 88520
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1331372261047363,
      "learning_rate": 0.00040674678571851476,
      "loss": 3.0879,
      "step": 88521
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7441898584365845,
      "learning_rate": 0.0004067429628593546,
      "loss": 2.876,
      "step": 88522
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7850629091262817,
      "learning_rate": 0.00040673913998034896,
      "loss": 3.1714,
      "step": 88523
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.7796647548675537,
      "learning_rate": 0.00040673531708149856,
      "loss": 2.8315,
      "step": 88524
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1621460914611816,
      "learning_rate": 0.0004067314941628039,
      "loss": 2.8649,
      "step": 88525
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6720517873764038,
      "learning_rate": 0.0004067276712242659,
      "loss": 2.8716,
      "step": 88526
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.360175132751465,
      "learning_rate": 0.00040672384826588527,
      "loss": 3.183,
      "step": 88527
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8437647819519043,
      "learning_rate": 0.00040672002528766265,
      "loss": 3.1092,
      "step": 88528
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6320730447769165,
      "learning_rate": 0.0004067162022895987,
      "loss": 3.0231,
      "step": 88529
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7530100345611572,
      "learning_rate": 0.0004067123792716943,
      "loss": 2.849,
      "step": 88530
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.063955307006836,
      "learning_rate": 0.00040670855623394994,
      "loss": 2.8614,
      "step": 88531
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6758464574813843,
      "learning_rate": 0.00040670473317636644,
      "loss": 2.9859,
      "step": 88532
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.946580410003662,
      "learning_rate": 0.0004067009100989445,
      "loss": 2.9771,
      "step": 88533
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9348161220550537,
      "learning_rate": 0.00040669708700168496,
      "loss": 3.1947,
      "step": 88534
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9485769271850586,
      "learning_rate": 0.0004066932638845884,
      "loss": 3.1586,
      "step": 88535
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.98361337184906,
      "learning_rate": 0.0004066894407476555,
      "loss": 2.8806,
      "step": 88536
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.704139232635498,
      "learning_rate": 0.00040668561759088707,
      "loss": 3.192,
      "step": 88537
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.035952568054199,
      "learning_rate": 0.0004066817944142837,
      "loss": 3.0902,
      "step": 88538
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7527180910110474,
      "learning_rate": 0.0004066779712178462,
      "loss": 2.9601,
      "step": 88539
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1478703022003174,
      "learning_rate": 0.00040667414800157523,
      "loss": 2.87,
      "step": 88540
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5280284881591797,
      "learning_rate": 0.0004066703247654716,
      "loss": 3.0208,
      "step": 88541
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.660963535308838,
      "learning_rate": 0.00040666650150953586,
      "loss": 3.0481,
      "step": 88542
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6219669580459595,
      "learning_rate": 0.00040666267823376886,
      "loss": 3.023,
      "step": 88543
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5864392518997192,
      "learning_rate": 0.00040665885493817126,
      "loss": 3.0764,
      "step": 88544
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.89946448802948,
      "learning_rate": 0.0004066550316227437,
      "loss": 3.041,
      "step": 88545
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8302044868469238,
      "learning_rate": 0.000406651208287487,
      "loss": 2.9954,
      "step": 88546
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5823420286178589,
      "learning_rate": 0.0004066473849324018,
      "loss": 2.884,
      "step": 88547
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.135887861251831,
      "learning_rate": 0.00040664356155748887,
      "loss": 3.178,
      "step": 88548
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9256235361099243,
      "learning_rate": 0.00040663973816274884,
      "loss": 3.05,
      "step": 88549
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7112329006195068,
      "learning_rate": 0.0004066359147481825,
      "loss": 3.0219,
      "step": 88550
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9102718830108643,
      "learning_rate": 0.0004066320913137906,
      "loss": 2.9982,
      "step": 88551
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5952556133270264,
      "learning_rate": 0.00040662826785957374,
      "loss": 2.8567,
      "step": 88552
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6100082397460938,
      "learning_rate": 0.0004066244443855326,
      "loss": 2.8436,
      "step": 88553
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6506340503692627,
      "learning_rate": 0.000406620620891668,
      "loss": 2.9497,
      "step": 88554
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.744722843170166,
      "learning_rate": 0.00040661679737798064,
      "loss": 3.1591,
      "step": 88555
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7213726043701172,
      "learning_rate": 0.0004066129738444712,
      "loss": 3.0234,
      "step": 88556
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.84115469455719,
      "learning_rate": 0.0004066091502911404,
      "loss": 3.3028,
      "step": 88557
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.2327234745025635,
      "learning_rate": 0.0004066053267179889,
      "loss": 2.8513,
      "step": 88558
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.486845016479492,
      "learning_rate": 0.00040660150312501754,
      "loss": 3.0798,
      "step": 88559
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.800487995147705,
      "learning_rate": 0.00040659767951222695,
      "loss": 3.1453,
      "step": 88560
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.28159499168396,
      "learning_rate": 0.00040659385587961774,
      "loss": 2.852,
      "step": 88561
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6847257614135742,
      "learning_rate": 0.0004065900322271907,
      "loss": 2.8758,
      "step": 88562
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9816373586654663,
      "learning_rate": 0.0004065862085549467,
      "loss": 3.1467,
      "step": 88563
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7640068531036377,
      "learning_rate": 0.00040658238486288625,
      "loss": 3.1565,
      "step": 88564
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.864648699760437,
      "learning_rate": 0.00040657856115101015,
      "loss": 3.0404,
      "step": 88565
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7503715753555298,
      "learning_rate": 0.00040657473741931906,
      "loss": 3.153,
      "step": 88566
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7724930047988892,
      "learning_rate": 0.0004065709136678137,
      "loss": 3.1575,
      "step": 88567
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.3456380367279053,
      "learning_rate": 0.0004065670898964948,
      "loss": 2.9952,
      "step": 88568
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7061318159103394,
      "learning_rate": 0.00040656326610536304,
      "loss": 2.952,
      "step": 88569
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8735746145248413,
      "learning_rate": 0.00040655944229441916,
      "loss": 2.9669,
      "step": 88570
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.137613296508789,
      "learning_rate": 0.00040655561846366387,
      "loss": 2.9794,
      "step": 88571
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6834927797317505,
      "learning_rate": 0.00040655179461309793,
      "loss": 2.9928,
      "step": 88572
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.192131280899048,
      "learning_rate": 0.00040654797074272193,
      "loss": 3.0496,
      "step": 88573
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.2152352333068848,
      "learning_rate": 0.0004065441468525367,
      "loss": 3.0397,
      "step": 88574
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9644368886947632,
      "learning_rate": 0.0004065403229425429,
      "loss": 2.8559,
      "step": 88575
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0035243034362793,
      "learning_rate": 0.00040653649901274127,
      "loss": 2.9774,
      "step": 88576
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.253183364868164,
      "learning_rate": 0.00040653267506313243,
      "loss": 2.8223,
      "step": 88577
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.967300534248352,
      "learning_rate": 0.0004065288510937172,
      "loss": 3.068,
      "step": 88578
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1410977840423584,
      "learning_rate": 0.00040652502710449625,
      "loss": 3.1872,
      "step": 88579
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9148589372634888,
      "learning_rate": 0.00040652120309547026,
      "loss": 2.8893,
      "step": 88580
    },
    {
      "epoch": 1.15,
      "grad_norm": 4.178175926208496,
      "learning_rate": 0.00040651737906663996,
      "loss": 2.825,
      "step": 88581
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.246077060699463,
      "learning_rate": 0.0004065135550180061,
      "loss": 3.02,
      "step": 88582
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5191501379013062,
      "learning_rate": 0.00040650973094956937,
      "loss": 3.1633,
      "step": 88583
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6069345474243164,
      "learning_rate": 0.0004065059068613304,
      "loss": 3.1801,
      "step": 88584
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.8209664821624756,
      "learning_rate": 0.00040650208275329005,
      "loss": 3.2094,
      "step": 88585
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.137315034866333,
      "learning_rate": 0.0004064982586254489,
      "loss": 2.893,
      "step": 88586
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.2715725898742676,
      "learning_rate": 0.0004064944344778077,
      "loss": 2.9849,
      "step": 88587
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.063821315765381,
      "learning_rate": 0.00040649061031036726,
      "loss": 3.1416,
      "step": 88588
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.311473846435547,
      "learning_rate": 0.00040648678612312814,
      "loss": 3.1039,
      "step": 88589
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5560537576675415,
      "learning_rate": 0.0004064829619160911,
      "loss": 3.1667,
      "step": 88590
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4646923542022705,
      "learning_rate": 0.0004064791376892569,
      "loss": 3.0293,
      "step": 88591
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.288771152496338,
      "learning_rate": 0.0004064753134426262,
      "loss": 3.047,
      "step": 88592
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.3299741744995117,
      "learning_rate": 0.0004064714891761997,
      "loss": 2.9554,
      "step": 88593
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.874779462814331,
      "learning_rate": 0.0004064676648899783,
      "loss": 3.0356,
      "step": 88594
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1362898349761963,
      "learning_rate": 0.00040646384058396244,
      "loss": 2.9634,
      "step": 88595
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.2675530910491943,
      "learning_rate": 0.00040646001625815285,
      "loss": 2.9339,
      "step": 88596
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1170148849487305,
      "learning_rate": 0.00040645619191255044,
      "loss": 3.0013,
      "step": 88597
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8730710744857788,
      "learning_rate": 0.0004064523675471558,
      "loss": 3.0095,
      "step": 88598
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.840983510017395,
      "learning_rate": 0.0004064485431619696,
      "loss": 2.982,
      "step": 88599
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1613945960998535,
      "learning_rate": 0.0004064447187569927,
      "loss": 3.337,
      "step": 88600
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.491899013519287,
      "learning_rate": 0.0004064408943322257,
      "loss": 3.1996,
      "step": 88601
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4919153451919556,
      "learning_rate": 0.00040643706988766927,
      "loss": 3.0198,
      "step": 88602
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4332046508789062,
      "learning_rate": 0.00040643324542332423,
      "loss": 2.7124,
      "step": 88603
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9489257335662842,
      "learning_rate": 0.00040642942093919125,
      "loss": 2.9958,
      "step": 88604
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5417332649230957,
      "learning_rate": 0.000406425596435271,
      "loss": 2.6418,
      "step": 88605
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7146031856536865,
      "learning_rate": 0.00040642177191156425,
      "loss": 2.7597,
      "step": 88606
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.030836820602417,
      "learning_rate": 0.0004064179473680716,
      "loss": 3.1463,
      "step": 88607
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6430115699768066,
      "learning_rate": 0.00040641412280479386,
      "loss": 2.8926,
      "step": 88608
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.2245900630950928,
      "learning_rate": 0.0004064102982217319,
      "loss": 3.2659,
      "step": 88609
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4948559999465942,
      "learning_rate": 0.0004064064736188861,
      "loss": 2.8113,
      "step": 88610
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9042924642562866,
      "learning_rate": 0.00040640264899625733,
      "loss": 3.3191,
      "step": 88611
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9174113273620605,
      "learning_rate": 0.00040639882435384634,
      "loss": 3.0348,
      "step": 88612
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6354237794876099,
      "learning_rate": 0.0004063949996916538,
      "loss": 3.1017,
      "step": 88613
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.3908345699310303,
      "learning_rate": 0.00040639117500968034,
      "loss": 2.9661,
      "step": 88614
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6522698402404785,
      "learning_rate": 0.0004063873503079269,
      "loss": 2.9002,
      "step": 88615
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4544333219528198,
      "learning_rate": 0.0004063835255863939,
      "loss": 2.8389,
      "step": 88616
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.132136106491089,
      "learning_rate": 0.0004063797008450823,
      "loss": 3.1262,
      "step": 88617
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1899759769439697,
      "learning_rate": 0.0004063758760839926,
      "loss": 2.8802,
      "step": 88618
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6969804763793945,
      "learning_rate": 0.00040637205130312576,
      "loss": 3.163,
      "step": 88619
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0740349292755127,
      "learning_rate": 0.0004063682265024822,
      "loss": 2.7233,
      "step": 88620
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.519205331802368,
      "learning_rate": 0.0004063644016820628,
      "loss": 2.9958,
      "step": 88621
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7903414964675903,
      "learning_rate": 0.00040636057684186836,
      "loss": 3.2033,
      "step": 88622
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.2492525577545166,
      "learning_rate": 0.00040635675198189935,
      "loss": 2.6625,
      "step": 88623
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.4338159561157227,
      "learning_rate": 0.0004063529271021567,
      "loss": 3.112,
      "step": 88624
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7470299005508423,
      "learning_rate": 0.00040634910220264107,
      "loss": 2.9872,
      "step": 88625
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.6586077213287354,
      "learning_rate": 0.00040634527728335303,
      "loss": 2.9494,
      "step": 88626
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.4929215908050537,
      "learning_rate": 0.0004063414523442935,
      "loss": 3.0555,
      "step": 88627
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.847723960876465,
      "learning_rate": 0.0004063376273854629,
      "loss": 2.9662,
      "step": 88628
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.643674612045288,
      "learning_rate": 0.00040633380240686235,
      "loss": 3.0187,
      "step": 88629
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.7140345573425293,
      "learning_rate": 0.0004063299774084922,
      "loss": 3.2709,
      "step": 88630
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.2589263916015625,
      "learning_rate": 0.00040632615239035336,
      "loss": 2.9387,
      "step": 88631
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.4197793006896973,
      "learning_rate": 0.0004063223273524464,
      "loss": 3.1749,
      "step": 88632
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.4342474937438965,
      "learning_rate": 0.00040631850229477226,
      "loss": 3.012,
      "step": 88633
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.209216833114624,
      "learning_rate": 0.0004063146772173313,
      "loss": 3.0944,
      "step": 88634
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6073099374771118,
      "learning_rate": 0.00040631085212012463,
      "loss": 2.9114,
      "step": 88635
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.5574090480804443,
      "learning_rate": 0.00040630702700315276,
      "loss": 3.0662,
      "step": 88636
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1852457523345947,
      "learning_rate": 0.00040630320186641627,
      "loss": 2.9305,
      "step": 88637
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.187838315963745,
      "learning_rate": 0.0004062993767099161,
      "loss": 3.1805,
      "step": 88638
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7976237535476685,
      "learning_rate": 0.0004062955515336528,
      "loss": 2.9543,
      "step": 88639
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.851984977722168,
      "learning_rate": 0.00040629172633762723,
      "loss": 2.9844,
      "step": 88640
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1457927227020264,
      "learning_rate": 0.00040628790112184,
      "loss": 2.9645,
      "step": 88641
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.546648383140564,
      "learning_rate": 0.00040628407588629185,
      "loss": 3.1299,
      "step": 88642
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6738260984420776,
      "learning_rate": 0.00040628025063098344,
      "loss": 3.0404,
      "step": 88643
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5949856042861938,
      "learning_rate": 0.00040627642535591554,
      "loss": 3.1566,
      "step": 88644
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7579917907714844,
      "learning_rate": 0.0004062726000610889,
      "loss": 3.1877,
      "step": 88645
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5707920789718628,
      "learning_rate": 0.00040626877474650414,
      "loss": 3.2342,
      "step": 88646
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.2053394317626953,
      "learning_rate": 0.000406264949412162,
      "loss": 3.0654,
      "step": 88647
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7077891826629639,
      "learning_rate": 0.00040626112405806313,
      "loss": 2.9996,
      "step": 88648
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7395379543304443,
      "learning_rate": 0.0004062572986842085,
      "loss": 3.1462,
      "step": 88649
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6457068920135498,
      "learning_rate": 0.0004062534732905985,
      "loss": 3.1938,
      "step": 88650
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8770142793655396,
      "learning_rate": 0.0004062496478772339,
      "loss": 3.0946,
      "step": 88651
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4656010866165161,
      "learning_rate": 0.00040624582244411564,
      "loss": 3.1005,
      "step": 88652
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9880516529083252,
      "learning_rate": 0.00040624199699124423,
      "loss": 3.313,
      "step": 88653
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7927300930023193,
      "learning_rate": 0.00040623817151862033,
      "loss": 3.0198,
      "step": 88654
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5887259244918823,
      "learning_rate": 0.0004062343460262449,
      "loss": 3.0696,
      "step": 88655
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0624992847442627,
      "learning_rate": 0.0004062305205141184,
      "loss": 3.0791,
      "step": 88656
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.822060465812683,
      "learning_rate": 0.00040622669498224166,
      "loss": 3.3086,
      "step": 88657
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4774972200393677,
      "learning_rate": 0.00040622286943061537,
      "loss": 3.1307,
      "step": 88658
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5401872396469116,
      "learning_rate": 0.0004062190438592403,
      "loss": 3.0199,
      "step": 88659
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.32612943649292,
      "learning_rate": 0.000406215218268117,
      "loss": 3.1316,
      "step": 88660
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9654912948608398,
      "learning_rate": 0.00040621139265724636,
      "loss": 3.092,
      "step": 88661
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0945827960968018,
      "learning_rate": 0.000406207567026629,
      "loss": 3.0346,
      "step": 88662
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.2722768783569336,
      "learning_rate": 0.00040620374137626566,
      "loss": 3.181,
      "step": 88663
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7913414239883423,
      "learning_rate": 0.000406199915706157,
      "loss": 3.097,
      "step": 88664
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0530686378479004,
      "learning_rate": 0.0004061960900163038,
      "loss": 2.8156,
      "step": 88665
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0318310260772705,
      "learning_rate": 0.0004061922643067067,
      "loss": 2.8333,
      "step": 88666
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4944343566894531,
      "learning_rate": 0.0004061884385773666,
      "loss": 3.1787,
      "step": 88667
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9565032720565796,
      "learning_rate": 0.0004061846128282839,
      "loss": 3.1431,
      "step": 88668
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4259617328643799,
      "learning_rate": 0.0004061807870594596,
      "loss": 3.1426,
      "step": 88669
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.2609565258026123,
      "learning_rate": 0.0004061769612708943,
      "loss": 2.9692,
      "step": 88670
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.020681858062744,
      "learning_rate": 0.00040617313546258854,
      "loss": 3.1083,
      "step": 88671
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5922908782958984,
      "learning_rate": 0.00040616930963454327,
      "loss": 3.0293,
      "step": 88672
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.678194761276245,
      "learning_rate": 0.00040616548378675913,
      "loss": 2.9184,
      "step": 88673
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.691286325454712,
      "learning_rate": 0.0004061616579192368,
      "loss": 3.1883,
      "step": 88674
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8618963956832886,
      "learning_rate": 0.00040615783203197704,
      "loss": 2.9415,
      "step": 88675
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.3713324069976807,
      "learning_rate": 0.00040615400612498056,
      "loss": 2.9319,
      "step": 88676
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.628950357437134,
      "learning_rate": 0.000406150180198248,
      "loss": 2.9147,
      "step": 88677
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9257991313934326,
      "learning_rate": 0.00040614635425178006,
      "loss": 3.0688,
      "step": 88678
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8496816158294678,
      "learning_rate": 0.0004061425282855777,
      "loss": 3.2259,
      "step": 88679
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.479357957839966,
      "learning_rate": 0.00040613870229964125,
      "loss": 3.0285,
      "step": 88680
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.9538832902908325,
      "learning_rate": 0.0004061348762939717,
      "loss": 2.9735,
      "step": 88681
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4523109197616577,
      "learning_rate": 0.00040613105026856964,
      "loss": 2.9657,
      "step": 88682
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7538650035858154,
      "learning_rate": 0.0004061272242234359,
      "loss": 2.9397,
      "step": 88683
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6156883239746094,
      "learning_rate": 0.00040612339815857093,
      "loss": 3.1211,
      "step": 88684
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.6214404106140137,
      "learning_rate": 0.0004061195720739758,
      "loss": 3.0703,
      "step": 88685
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.4006277322769165,
      "learning_rate": 0.00040611574596965093,
      "loss": 2.9884,
      "step": 88686
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.6559083461761475,
      "learning_rate": 0.00040611191984559717,
      "loss": 3.0579,
      "step": 88687
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5810941457748413,
      "learning_rate": 0.0004061080937018152,
      "loss": 3.3354,
      "step": 88688
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.5511894226074219,
      "learning_rate": 0.00040610426753830576,
      "loss": 2.9137,
      "step": 88689
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.5270307064056396,
      "learning_rate": 0.0004061004413550694,
      "loss": 3.0408,
      "step": 88690
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.932281732559204,
      "learning_rate": 0.00040609661515210715,
      "loss": 2.9896,
      "step": 88691
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7228981256484985,
      "learning_rate": 0.0004060927889294195,
      "loss": 3.3125,
      "step": 88692
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.1293981075286865,
      "learning_rate": 0.0004060889626870071,
      "loss": 3.1893,
      "step": 88693
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8938618898391724,
      "learning_rate": 0.0004060851364248709,
      "loss": 2.9939,
      "step": 88694
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.7317479848861694,
      "learning_rate": 0.00040608131014301135,
      "loss": 3.1069,
      "step": 88695
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.3926632404327393,
      "learning_rate": 0.0004060774838414293,
      "loss": 3.2192,
      "step": 88696
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.8951687812805176,
      "learning_rate": 0.0004060736575201255,
      "loss": 2.9881,
      "step": 88697
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.218268871307373,
      "learning_rate": 0.00040606983117910054,
      "loss": 2.8969,
      "step": 88698
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.8233574628829956,
      "learning_rate": 0.00040606600481835525,
      "loss": 3.2455,
      "step": 88699
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.7539615631103516,
      "learning_rate": 0.0004060621784378902,
      "loss": 2.914,
      "step": 88700
    },
    {
      "epoch": 1.15,
      "grad_norm": 3.562060832977295,
      "learning_rate": 0.0004060583520377063,
      "loss": 2.9797,
      "step": 88701
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.7608742713928223,
      "learning_rate": 0.00040605452561780404,
      "loss": 3.0168,
      "step": 88702
    },
    {
      "epoch": 1.15,
      "grad_norm": 1.988006830215454,
      "learning_rate": 0.00040605069917818435,
      "loss": 2.8731,
      "step": 88703
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.31912899017334,
      "learning_rate": 0.0004060468727188478,
      "loss": 2.9569,
      "step": 88704
    },
    {
      "epoch": 1.15,
      "grad_norm": 2.0510292053222656,
      "learning_rate": 0.00040604304623979506,
      "loss": 2.9577,
      "step": 88705
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.4603826999664307,
      "learning_rate": 0.00040603921974102704,
      "loss": 2.887,
      "step": 88706
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.574101209640503,
      "learning_rate": 0.0004060353932225442,
      "loss": 3.0199,
      "step": 88707
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.346270799636841,
      "learning_rate": 0.00040603156668434745,
      "loss": 2.9143,
      "step": 88708
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.484668016433716,
      "learning_rate": 0.0004060277401264374,
      "loss": 3.0123,
      "step": 88709
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9238935708999634,
      "learning_rate": 0.00040602391354881487,
      "loss": 2.9912,
      "step": 88710
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.738840937614441,
      "learning_rate": 0.0004060200869514804,
      "loss": 2.7291,
      "step": 88711
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.695706844329834,
      "learning_rate": 0.0004060162603344348,
      "loss": 3.0031,
      "step": 88712
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.9140431880950928,
      "learning_rate": 0.0004060124336976788,
      "loss": 3.0966,
      "step": 88713
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.9773309230804443,
      "learning_rate": 0.00040600860704121316,
      "loss": 2.9958,
      "step": 88714
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.716916561126709,
      "learning_rate": 0.00040600478036503844,
      "loss": 2.9608,
      "step": 88715
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3006768226623535,
      "learning_rate": 0.00040600095366915537,
      "loss": 2.7527,
      "step": 88716
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3380539417266846,
      "learning_rate": 0.00040599712695356486,
      "loss": 3.1258,
      "step": 88717
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.152486801147461,
      "learning_rate": 0.0004059933002182674,
      "loss": 3.0793,
      "step": 88718
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9979597330093384,
      "learning_rate": 0.0004059894734632638,
      "loss": 3.0133,
      "step": 88719
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.5206520557403564,
      "learning_rate": 0.00040598564668855474,
      "loss": 2.9323,
      "step": 88720
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.113614082336426,
      "learning_rate": 0.000405981819894141,
      "loss": 3.0128,
      "step": 88721
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0247626304626465,
      "learning_rate": 0.00040597799308002324,
      "loss": 3.1275,
      "step": 88722
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.087446928024292,
      "learning_rate": 0.0004059741662462021,
      "loss": 2.8287,
      "step": 88723
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.395277261734009,
      "learning_rate": 0.00040597033939267837,
      "loss": 3.0647,
      "step": 88724
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.7705748081207275,
      "learning_rate": 0.0004059665125194528,
      "loss": 3.0184,
      "step": 88725
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.908346176147461,
      "learning_rate": 0.00040596268562652604,
      "loss": 3.0718,
      "step": 88726
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8246291875839233,
      "learning_rate": 0.0004059588587138988,
      "loss": 2.9364,
      "step": 88727
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3304808139801025,
      "learning_rate": 0.0004059550317815719,
      "loss": 3.1534,
      "step": 88728
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5581008195877075,
      "learning_rate": 0.00040595120482954584,
      "loss": 3.0855,
      "step": 88729
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.793529748916626,
      "learning_rate": 0.00040594737785782146,
      "loss": 3.0728,
      "step": 88730
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5723700523376465,
      "learning_rate": 0.00040594355086639956,
      "loss": 2.8933,
      "step": 88731
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9024335145950317,
      "learning_rate": 0.00040593972385528064,
      "loss": 2.9935,
      "step": 88732
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5179203748703003,
      "learning_rate": 0.00040593589682446556,
      "loss": 3.1689,
      "step": 88733
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.190098285675049,
      "learning_rate": 0.0004059320697739551,
      "loss": 2.9324,
      "step": 88734
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.5237278938293457,
      "learning_rate": 0.0004059282427037497,
      "loss": 2.9675,
      "step": 88735
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6151009798049927,
      "learning_rate": 0.0004059244156138503,
      "loss": 3.2067,
      "step": 88736
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.9700767993927,
      "learning_rate": 0.00040592058850425763,
      "loss": 3.0107,
      "step": 88737
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0125603675842285,
      "learning_rate": 0.0004059167613749723,
      "loss": 3.1049,
      "step": 88738
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0932064056396484,
      "learning_rate": 0.00040591293422599493,
      "loss": 2.835,
      "step": 88739
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.5901215076446533,
      "learning_rate": 0.00040590910705732647,
      "loss": 2.9073,
      "step": 88740
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.181349277496338,
      "learning_rate": 0.00040590527986896744,
      "loss": 2.9245,
      "step": 88741
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5763736963272095,
      "learning_rate": 0.00040590145266091864,
      "loss": 3.002,
      "step": 88742
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7226277589797974,
      "learning_rate": 0.0004058976254331808,
      "loss": 3.1143,
      "step": 88743
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6603217124938965,
      "learning_rate": 0.0004058937981857545,
      "loss": 3.1104,
      "step": 88744
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5719149112701416,
      "learning_rate": 0.0004058899709186406,
      "loss": 2.9493,
      "step": 88745
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.818480134010315,
      "learning_rate": 0.0004058861436318398,
      "loss": 3.187,
      "step": 88746
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.909748911857605,
      "learning_rate": 0.0004058823163253527,
      "loss": 3.0189,
      "step": 88747
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1049270629882812,
      "learning_rate": 0.0004058784889991801,
      "loss": 3.0495,
      "step": 88748
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.755074381828308,
      "learning_rate": 0.0004058746616533228,
      "loss": 2.9543,
      "step": 88749
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6795281171798706,
      "learning_rate": 0.00040587083428778124,
      "loss": 2.9705,
      "step": 88750
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.186520576477051,
      "learning_rate": 0.00040586700690255635,
      "loss": 3.0569,
      "step": 88751
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.494960069656372,
      "learning_rate": 0.0004058631794976488,
      "loss": 3.3001,
      "step": 88752
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9174360036849976,
      "learning_rate": 0.00040585935207305924,
      "loss": 3.0725,
      "step": 88753
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7463477849960327,
      "learning_rate": 0.00040585552462878843,
      "loss": 3.1656,
      "step": 88754
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0259623527526855,
      "learning_rate": 0.0004058516971648372,
      "loss": 3.073,
      "step": 88755
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.614416480064392,
      "learning_rate": 0.000405847869681206,
      "loss": 3.0277,
      "step": 88756
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5797548294067383,
      "learning_rate": 0.0004058440421778957,
      "loss": 2.9415,
      "step": 88757
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5174821615219116,
      "learning_rate": 0.0004058402146549071,
      "loss": 3.0358,
      "step": 88758
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9768586158752441,
      "learning_rate": 0.00040583638711224067,
      "loss": 3.2464,
      "step": 88759
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8766623735427856,
      "learning_rate": 0.0004058325595498973,
      "loss": 2.9356,
      "step": 88760
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6299248933792114,
      "learning_rate": 0.00040582873196787775,
      "loss": 3.0716,
      "step": 88761
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6451810598373413,
      "learning_rate": 0.0004058249043661825,
      "loss": 3.4046,
      "step": 88762
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9877724647521973,
      "learning_rate": 0.0004058210767448125,
      "loss": 2.9934,
      "step": 88763
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7777910232543945,
      "learning_rate": 0.00040581724910376837,
      "loss": 2.8735,
      "step": 88764
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6308120489120483,
      "learning_rate": 0.0004058134214430508,
      "loss": 3.2924,
      "step": 88765
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.8356003761291504,
      "learning_rate": 0.0004058095937626605,
      "loss": 2.9439,
      "step": 88766
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.4714598655700684,
      "learning_rate": 0.0004058057660625982,
      "loss": 2.9581,
      "step": 88767
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.283336639404297,
      "learning_rate": 0.0004058019383428646,
      "loss": 2.9908,
      "step": 88768
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9157153367996216,
      "learning_rate": 0.00040579811060346044,
      "loss": 2.7793,
      "step": 88769
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.968644142150879,
      "learning_rate": 0.00040579428284438643,
      "loss": 2.951,
      "step": 88770
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.8154497146606445,
      "learning_rate": 0.0004057904550656432,
      "loss": 2.9264,
      "step": 88771
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7653075456619263,
      "learning_rate": 0.0004057866272672316,
      "loss": 3.287,
      "step": 88772
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8898987770080566,
      "learning_rate": 0.00040578279944915226,
      "loss": 3.1179,
      "step": 88773
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9672231674194336,
      "learning_rate": 0.0004057789716114059,
      "loss": 3.1666,
      "step": 88774
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6461421251296997,
      "learning_rate": 0.0004057751437539931,
      "loss": 3.173,
      "step": 88775
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.579590916633606,
      "learning_rate": 0.0004057713158769149,
      "loss": 3.0523,
      "step": 88776
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6233956813812256,
      "learning_rate": 0.0004057674879801717,
      "loss": 2.7672,
      "step": 88777
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7594386339187622,
      "learning_rate": 0.0004057636600637644,
      "loss": 3.088,
      "step": 88778
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.278425455093384,
      "learning_rate": 0.0004057598321276936,
      "loss": 2.9629,
      "step": 88779
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.038498878479004,
      "learning_rate": 0.00040575600417196004,
      "loss": 2.8261,
      "step": 88780
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8391753435134888,
      "learning_rate": 0.00040575217619656445,
      "loss": 2.8522,
      "step": 88781
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7978276014328003,
      "learning_rate": 0.0004057483482015075,
      "loss": 2.8898,
      "step": 88782
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.653334140777588,
      "learning_rate": 0.00040574452018679006,
      "loss": 2.7726,
      "step": 88783
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0826897621154785,
      "learning_rate": 0.00040574069215241256,
      "loss": 3.0229,
      "step": 88784
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0624780654907227,
      "learning_rate": 0.00040573686409837593,
      "loss": 3.1034,
      "step": 88785
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8103421926498413,
      "learning_rate": 0.0004057330360246809,
      "loss": 3.1163,
      "step": 88786
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9044572114944458,
      "learning_rate": 0.000405729207931328,
      "loss": 2.824,
      "step": 88787
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5930535793304443,
      "learning_rate": 0.0004057253798183181,
      "loss": 3.2243,
      "step": 88788
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0624780654907227,
      "learning_rate": 0.0004057215516856518,
      "loss": 2.9875,
      "step": 88789
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7542846202850342,
      "learning_rate": 0.0004057177235333299,
      "loss": 3.0518,
      "step": 88790
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0070478916168213,
      "learning_rate": 0.0004057138953613531,
      "loss": 3.1432,
      "step": 88791
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0017504692077637,
      "learning_rate": 0.0004057100671697221,
      "loss": 3.0885,
      "step": 88792
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.214292526245117,
      "learning_rate": 0.00040570623895843755,
      "loss": 2.9013,
      "step": 88793
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7715281248092651,
      "learning_rate": 0.0004057024107275002,
      "loss": 2.8727,
      "step": 88794
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7310986518859863,
      "learning_rate": 0.00040569858247691085,
      "loss": 3.0221,
      "step": 88795
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6497546434402466,
      "learning_rate": 0.0004056947542066701,
      "loss": 3.0584,
      "step": 88796
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6518375873565674,
      "learning_rate": 0.0004056909259167787,
      "loss": 2.7408,
      "step": 88797
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7253131866455078,
      "learning_rate": 0.00040568709760723746,
      "loss": 3.0992,
      "step": 88798
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6577237844467163,
      "learning_rate": 0.00040568326927804685,
      "loss": 3.0053,
      "step": 88799
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5370898246765137,
      "learning_rate": 0.00040567944092920774,
      "loss": 3.1541,
      "step": 88800
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7255113124847412,
      "learning_rate": 0.0004056756125607208,
      "loss": 3.0821,
      "step": 88801
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8679420948028564,
      "learning_rate": 0.0004056717841725869,
      "loss": 3.2236,
      "step": 88802
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.806911826133728,
      "learning_rate": 0.0004056679557648066,
      "loss": 2.9104,
      "step": 88803
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9078054428100586,
      "learning_rate": 0.0004056641273373806,
      "loss": 2.9974,
      "step": 88804
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8825870752334595,
      "learning_rate": 0.00040566029889030955,
      "loss": 2.9492,
      "step": 88805
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7434494495391846,
      "learning_rate": 0.00040565647042359435,
      "loss": 2.802,
      "step": 88806
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.369266986846924,
      "learning_rate": 0.0004056526419372356,
      "loss": 3.0439,
      "step": 88807
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6245139837265015,
      "learning_rate": 0.0004056488134312341,
      "loss": 3.0686,
      "step": 88808
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5379358530044556,
      "learning_rate": 0.00040564498490559044,
      "loss": 3.0992,
      "step": 88809
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.6101701259613037,
      "learning_rate": 0.0004056411563603053,
      "loss": 2.7969,
      "step": 88810
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0554535388946533,
      "learning_rate": 0.0004056373277953795,
      "loss": 3.2505,
      "step": 88811
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5531545877456665,
      "learning_rate": 0.00040563349921081377,
      "loss": 3.3407,
      "step": 88812
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.78981351852417,
      "learning_rate": 0.00040562967060660883,
      "loss": 2.9347,
      "step": 88813
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.0242691040039062,
      "learning_rate": 0.00040562584198276525,
      "loss": 2.9959,
      "step": 88814
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.084787130355835,
      "learning_rate": 0.0004056220133392839,
      "loss": 2.9749,
      "step": 88815
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.800602674484253,
      "learning_rate": 0.0004056181846761654,
      "loss": 2.968,
      "step": 88816
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0911872386932373,
      "learning_rate": 0.0004056143559934105,
      "loss": 3.0622,
      "step": 88817
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8730578422546387,
      "learning_rate": 0.00040561052729101986,
      "loss": 3.087,
      "step": 88818
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.237874746322632,
      "learning_rate": 0.0004056066985689943,
      "loss": 3.0124,
      "step": 88819
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.231846809387207,
      "learning_rate": 0.00040560286982733437,
      "loss": 2.6486,
      "step": 88820
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.203831911087036,
      "learning_rate": 0.0004055990410660409,
      "loss": 2.9612,
      "step": 88821
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.634086847305298,
      "learning_rate": 0.0004055952122851146,
      "loss": 2.8769,
      "step": 88822
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.012070894241333,
      "learning_rate": 0.00040559138348455614,
      "loss": 3.0821,
      "step": 88823
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.708524227142334,
      "learning_rate": 0.0004055875546643662,
      "loss": 3.092,
      "step": 88824
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.7366554737091064,
      "learning_rate": 0.0004055837258245457,
      "loss": 2.9711,
      "step": 88825
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.478076934814453,
      "learning_rate": 0.00040557989696509507,
      "loss": 3.0033,
      "step": 88826
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9496548175811768,
      "learning_rate": 0.0004055760680860151,
      "loss": 3.0871,
      "step": 88827
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.4094271659851074,
      "learning_rate": 0.0004055722391873066,
      "loss": 2.8831,
      "step": 88828
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.7832963466644287,
      "learning_rate": 0.00040556841026897025,
      "loss": 2.7348,
      "step": 88829
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5643951892852783,
      "learning_rate": 0.00040556458133100674,
      "loss": 2.8635,
      "step": 88830
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7970192432403564,
      "learning_rate": 0.0004055607523734168,
      "loss": 3.0842,
      "step": 88831
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6635857820510864,
      "learning_rate": 0.0004055569233962011,
      "loss": 3.2141,
      "step": 88832
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.268329381942749,
      "learning_rate": 0.0004055530943993603,
      "loss": 2.666,
      "step": 88833
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6150778532028198,
      "learning_rate": 0.0004055492653828953,
      "loss": 2.9573,
      "step": 88834
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.125277280807495,
      "learning_rate": 0.00040554543634680666,
      "loss": 2.9374,
      "step": 88835
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.379948616027832,
      "learning_rate": 0.0004055416072910951,
      "loss": 2.7082,
      "step": 88836
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0276286602020264,
      "learning_rate": 0.0004055377782157614,
      "loss": 3.1359,
      "step": 88837
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.5421783924102783,
      "learning_rate": 0.0004055339491208062,
      "loss": 2.9159,
      "step": 88838
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.8907179832458496,
      "learning_rate": 0.00040553012000623024,
      "loss": 2.9825,
      "step": 88839
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.5224900245666504,
      "learning_rate": 0.0004055262908720343,
      "loss": 2.9698,
      "step": 88840
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5720316171646118,
      "learning_rate": 0.000405522461718219,
      "loss": 3.021,
      "step": 88841
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9527002573013306,
      "learning_rate": 0.000405518632544785,
      "loss": 2.9656,
      "step": 88842
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.93974769115448,
      "learning_rate": 0.0004055148033517333,
      "loss": 3.2057,
      "step": 88843
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.8066649436950684,
      "learning_rate": 0.0004055109741390642,
      "loss": 3.2267,
      "step": 88844
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5694098472595215,
      "learning_rate": 0.0004055071449067787,
      "loss": 3.1981,
      "step": 88845
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.726319670677185,
      "learning_rate": 0.0004055033156548774,
      "loss": 3.2688,
      "step": 88846
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.238827705383301,
      "learning_rate": 0.00040549948638336113,
      "loss": 2.8669,
      "step": 88847
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4128010272979736,
      "learning_rate": 0.0004054956570922304,
      "loss": 3.2059,
      "step": 88848
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.685756802558899,
      "learning_rate": 0.0004054918277814861,
      "loss": 2.8954,
      "step": 88849
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.747861623764038,
      "learning_rate": 0.0004054879984511289,
      "loss": 2.9513,
      "step": 88850
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.116903066635132,
      "learning_rate": 0.00040548416910115946,
      "loss": 3.1516,
      "step": 88851
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4723879098892212,
      "learning_rate": 0.0004054803397315785,
      "loss": 3.095,
      "step": 88852
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8217769861221313,
      "learning_rate": 0.0004054765103423868,
      "loss": 2.9013,
      "step": 88853
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.731359601020813,
      "learning_rate": 0.000405472680933585,
      "loss": 3.0575,
      "step": 88854
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6507763862609863,
      "learning_rate": 0.00040546885150517375,
      "loss": 3.2908,
      "step": 88855
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.485567569732666,
      "learning_rate": 0.000405465022057154,
      "loss": 3.1367,
      "step": 88856
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5663717985153198,
      "learning_rate": 0.0004054611925895263,
      "loss": 3.0207,
      "step": 88857
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6458325386047363,
      "learning_rate": 0.0004054573631022912,
      "loss": 3.0454,
      "step": 88858
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9502365589141846,
      "learning_rate": 0.0004054535335954498,
      "loss": 2.9743,
      "step": 88859
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6026109457015991,
      "learning_rate": 0.00040544970406900243,
      "loss": 3.202,
      "step": 88860
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.239976167678833,
      "learning_rate": 0.00040544587452295003,
      "loss": 2.9355,
      "step": 88861
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5221803188323975,
      "learning_rate": 0.0004054420449572933,
      "loss": 3.1517,
      "step": 88862
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8881770372390747,
      "learning_rate": 0.00040543821537203286,
      "loss": 3.1799,
      "step": 88863
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6517126560211182,
      "learning_rate": 0.00040543438576716944,
      "loss": 3.1037,
      "step": 88864
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5799925327301025,
      "learning_rate": 0.0004054305561427039,
      "loss": 2.8374,
      "step": 88865
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7179988622665405,
      "learning_rate": 0.0004054267264986367,
      "loss": 2.9766,
      "step": 88866
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8850582838058472,
      "learning_rate": 0.0004054228968349687,
      "loss": 3.129,
      "step": 88867
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.636685848236084,
      "learning_rate": 0.00040541906715170067,
      "loss": 3.0611,
      "step": 88868
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8963148593902588,
      "learning_rate": 0.00040541523744883317,
      "loss": 3.2114,
      "step": 88869
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.7674624919891357,
      "learning_rate": 0.00040541140772636694,
      "loss": 2.9258,
      "step": 88870
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7561877965927124,
      "learning_rate": 0.00040540757798430293,
      "loss": 2.7567,
      "step": 88871
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9277313947677612,
      "learning_rate": 0.0004054037482226415,
      "loss": 2.9243,
      "step": 88872
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1803829669952393,
      "learning_rate": 0.0004053999184413836,
      "loss": 2.8937,
      "step": 88873
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7544255256652832,
      "learning_rate": 0.0004053960886405298,
      "loss": 2.9617,
      "step": 88874
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.076606273651123,
      "learning_rate": 0.00040539225882008084,
      "loss": 2.7008,
      "step": 88875
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3835182189941406,
      "learning_rate": 0.00040538842898003754,
      "loss": 2.9825,
      "step": 88876
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.594226121902466,
      "learning_rate": 0.0004053845991204006,
      "loss": 2.8285,
      "step": 88877
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8240511417388916,
      "learning_rate": 0.0004053807692411706,
      "loss": 2.9663,
      "step": 88878
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1841213703155518,
      "learning_rate": 0.0004053769393423483,
      "loss": 2.9026,
      "step": 88879
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.642120361328125,
      "learning_rate": 0.00040537310942393454,
      "loss": 3.0012,
      "step": 88880
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.020339012145996,
      "learning_rate": 0.0004053692794859299,
      "loss": 2.8374,
      "step": 88881
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8469789028167725,
      "learning_rate": 0.000405365449528335,
      "loss": 2.9786,
      "step": 88882
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.270477533340454,
      "learning_rate": 0.0004053616195511508,
      "loss": 2.8577,
      "step": 88883
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7875099182128906,
      "learning_rate": 0.0004053577895543778,
      "loss": 3.2787,
      "step": 88884
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5285844802856445,
      "learning_rate": 0.00040535395953801687,
      "loss": 2.8032,
      "step": 88885
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.095255136489868,
      "learning_rate": 0.00040535012950206864,
      "loss": 2.9771,
      "step": 88886
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5162445306777954,
      "learning_rate": 0.0004053462994465338,
      "loss": 2.9717,
      "step": 88887
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.319830894470215,
      "learning_rate": 0.0004053424693714131,
      "loss": 2.7402,
      "step": 88888
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1188061237335205,
      "learning_rate": 0.00040533863927670724,
      "loss": 2.9847,
      "step": 88889
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6773518323898315,
      "learning_rate": 0.000405334809162417,
      "loss": 3.0186,
      "step": 88890
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.984058380126953,
      "learning_rate": 0.000405330979028543,
      "loss": 3.3339,
      "step": 88891
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9588786363601685,
      "learning_rate": 0.0004053271488750859,
      "loss": 2.8916,
      "step": 88892
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7378630638122559,
      "learning_rate": 0.0004053233187020466,
      "loss": 3.0719,
      "step": 88893
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9379826784133911,
      "learning_rate": 0.0004053194885094257,
      "loss": 2.7694,
      "step": 88894
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.253843307495117,
      "learning_rate": 0.00040531565829722387,
      "loss": 3.0408,
      "step": 88895
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.6593122482299805,
      "learning_rate": 0.00040531182806544184,
      "loss": 3.1079,
      "step": 88896
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.2587263584136963,
      "learning_rate": 0.0004053079978140804,
      "loss": 2.9891,
      "step": 88897
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6191390752792358,
      "learning_rate": 0.00040530416754314027,
      "loss": 2.7525,
      "step": 88898
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.539311408996582,
      "learning_rate": 0.00040530033725262203,
      "loss": 3.1592,
      "step": 88899
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.6580052375793457,
      "learning_rate": 0.0004052965069425265,
      "loss": 3.2394,
      "step": 88900
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4914175271987915,
      "learning_rate": 0.00040529267661285436,
      "loss": 3.0966,
      "step": 88901
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7226040363311768,
      "learning_rate": 0.00040528884626360635,
      "loss": 3.2359,
      "step": 88902
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.222064256668091,
      "learning_rate": 0.00040528501589478307,
      "loss": 2.9472,
      "step": 88903
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.734148621559143,
      "learning_rate": 0.00040528118550638544,
      "loss": 3.0432,
      "step": 88904
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.606207251548767,
      "learning_rate": 0.00040527735509841395,
      "loss": 3.1984,
      "step": 88905
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.5925893783569336,
      "learning_rate": 0.00040527352467086946,
      "loss": 3.0972,
      "step": 88906
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8610203266143799,
      "learning_rate": 0.00040526969422375263,
      "loss": 2.8981,
      "step": 88907
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6983197927474976,
      "learning_rate": 0.0004052658637570641,
      "loss": 3.2901,
      "step": 88908
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7193251848220825,
      "learning_rate": 0.00040526203327080477,
      "loss": 3.1075,
      "step": 88909
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.2499823570251465,
      "learning_rate": 0.0004052582027649753,
      "loss": 3.1484,
      "step": 88910
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.951883554458618,
      "learning_rate": 0.00040525437223957615,
      "loss": 2.755,
      "step": 88911
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.092865467071533,
      "learning_rate": 0.00040525054169460834,
      "loss": 2.9713,
      "step": 88912
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5839999914169312,
      "learning_rate": 0.0004052467111300724,
      "loss": 3.0487,
      "step": 88913
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.8435909748077393,
      "learning_rate": 0.0004052428805459692,
      "loss": 3.0806,
      "step": 88914
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.544877767562866,
      "learning_rate": 0.0004052390499422993,
      "loss": 2.9894,
      "step": 88915
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.689896821975708,
      "learning_rate": 0.0004052352193190635,
      "loss": 2.7458,
      "step": 88916
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1133854389190674,
      "learning_rate": 0.00040523138867626254,
      "loss": 2.9791,
      "step": 88917
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9799201488494873,
      "learning_rate": 0.00040522755801389695,
      "loss": 3.1501,
      "step": 88918
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.1191258430480957,
      "learning_rate": 0.0004052237273319676,
      "loss": 2.9933,
      "step": 88919
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.6456971168518066,
      "learning_rate": 0.0004052198966304753,
      "loss": 2.802,
      "step": 88920
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.134425401687622,
      "learning_rate": 0.0004052160659094205,
      "loss": 2.8652,
      "step": 88921
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6083955764770508,
      "learning_rate": 0.0004052122351688041,
      "loss": 2.9817,
      "step": 88922
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.8969199657440186,
      "learning_rate": 0.0004052084044086268,
      "loss": 2.91,
      "step": 88923
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.3306005001068115,
      "learning_rate": 0.00040520457362888917,
      "loss": 2.8583,
      "step": 88924
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.204293727874756,
      "learning_rate": 0.0004052007428295921,
      "loss": 3.3593,
      "step": 88925
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5084985494613647,
      "learning_rate": 0.0004051969120107362,
      "loss": 3.0189,
      "step": 88926
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.6404807567596436,
      "learning_rate": 0.00040519308117232226,
      "loss": 3.0382,
      "step": 88927
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.835787773132324,
      "learning_rate": 0.00040518925031435087,
      "loss": 2.7364,
      "step": 88928
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8948348760604858,
      "learning_rate": 0.00040518541943682286,
      "loss": 3.1698,
      "step": 88929
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0368435382843018,
      "learning_rate": 0.00040518158853973887,
      "loss": 3.153,
      "step": 88930
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.5823256969451904,
      "learning_rate": 0.0004051777576230996,
      "loss": 2.8489,
      "step": 88931
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.2104973793029785,
      "learning_rate": 0.00040517392668690586,
      "loss": 3.1549,
      "step": 88932
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8264909982681274,
      "learning_rate": 0.00040517009573115824,
      "loss": 3.0148,
      "step": 88933
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8019896745681763,
      "learning_rate": 0.0004051662647558576,
      "loss": 2.8785,
      "step": 88934
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9437175989151,
      "learning_rate": 0.0004051624337610045,
      "loss": 3.1597,
      "step": 88935
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6124751567840576,
      "learning_rate": 0.00040515860274659973,
      "loss": 2.9987,
      "step": 88936
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6998242139816284,
      "learning_rate": 0.000405154771712644,
      "loss": 2.8301,
      "step": 88937
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7350499629974365,
      "learning_rate": 0.00040515094065913805,
      "loss": 2.9762,
      "step": 88938
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.063281297683716,
      "learning_rate": 0.0004051471095860825,
      "loss": 3.1076,
      "step": 88939
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9042329788208008,
      "learning_rate": 0.0004051432784934781,
      "loss": 2.9043,
      "step": 88940
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8135485649108887,
      "learning_rate": 0.00040513944738132567,
      "loss": 3.2548,
      "step": 88941
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5738698244094849,
      "learning_rate": 0.00040513561624962575,
      "loss": 2.9088,
      "step": 88942
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9921356439590454,
      "learning_rate": 0.00040513178509837914,
      "loss": 2.956,
      "step": 88943
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.215451240539551,
      "learning_rate": 0.00040512795392758663,
      "loss": 3.2868,
      "step": 88944
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.836873173713684,
      "learning_rate": 0.0004051241227372488,
      "loss": 2.9409,
      "step": 88945
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8753273487091064,
      "learning_rate": 0.00040512029152736635,
      "loss": 2.9247,
      "step": 88946
    },
    {
      "epoch": 1.16,
      "grad_norm": 4.1138410568237305,
      "learning_rate": 0.00040511646029794013,
      "loss": 2.9581,
      "step": 88947
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1055941581726074,
      "learning_rate": 0.0004051126290489707,
      "loss": 3.0461,
      "step": 88948
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9804328680038452,
      "learning_rate": 0.00040510879778045893,
      "loss": 2.9672,
      "step": 88949
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.815044641494751,
      "learning_rate": 0.00040510496649240547,
      "loss": 3.0202,
      "step": 88950
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.9718594551086426,
      "learning_rate": 0.00040510113518481093,
      "loss": 2.9889,
      "step": 88951
    },
    {
      "epoch": 1.16,
      "grad_norm": 4.205275535583496,
      "learning_rate": 0.00040509730385767604,
      "loss": 2.8104,
      "step": 88952
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.083930015563965,
      "learning_rate": 0.0004050934725110017,
      "loss": 2.9428,
      "step": 88953
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4312620162963867,
      "learning_rate": 0.0004050896411447885,
      "loss": 3.1359,
      "step": 88954
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.421905517578125,
      "learning_rate": 0.00040508580975903713,
      "loss": 2.9559,
      "step": 88955
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3274600505828857,
      "learning_rate": 0.00040508197835374834,
      "loss": 3.0741,
      "step": 88956
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3819315433502197,
      "learning_rate": 0.0004050781469289228,
      "loss": 2.9093,
      "step": 88957
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7963937520980835,
      "learning_rate": 0.0004050743154845612,
      "loss": 3.1687,
      "step": 88958
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.320070505142212,
      "learning_rate": 0.0004050704840206644,
      "loss": 2.8563,
      "step": 88959
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9282593727111816,
      "learning_rate": 0.00040506665253723295,
      "loss": 3.1444,
      "step": 88960
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.2055435180664062,
      "learning_rate": 0.0004050628210342676,
      "loss": 3.0677,
      "step": 88961
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9139481782913208,
      "learning_rate": 0.00040505898951176915,
      "loss": 3.0317,
      "step": 88962
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9648505449295044,
      "learning_rate": 0.00040505515796973823,
      "loss": 2.8703,
      "step": 88963
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.898106336593628,
      "learning_rate": 0.00040505132640817563,
      "loss": 2.7809,
      "step": 88964
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6457475423812866,
      "learning_rate": 0.00040504749482708194,
      "loss": 3.1174,
      "step": 88965
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.854024052619934,
      "learning_rate": 0.00040504366322645797,
      "loss": 2.8259,
      "step": 88966
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7555277347564697,
      "learning_rate": 0.0004050398316063043,
      "loss": 2.94,
      "step": 88967
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4223600625991821,
      "learning_rate": 0.00040503599996662186,
      "loss": 2.6632,
      "step": 88968
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7646209001541138,
      "learning_rate": 0.0004050321683074112,
      "loss": 2.9252,
      "step": 88969
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.157031297683716,
      "learning_rate": 0.0004050283366286731,
      "loss": 3.2195,
      "step": 88970
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8752785921096802,
      "learning_rate": 0.0004050245049304083,
      "loss": 2.8232,
      "step": 88971
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.668046236038208,
      "learning_rate": 0.0004050206732126174,
      "loss": 3.234,
      "step": 88972
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.623177409172058,
      "learning_rate": 0.0004050168414753011,
      "loss": 3.0372,
      "step": 88973
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9163933992385864,
      "learning_rate": 0.00040501300971846027,
      "loss": 2.9158,
      "step": 88974
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7436600923538208,
      "learning_rate": 0.00040500917794209553,
      "loss": 3.0261,
      "step": 88975
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8375569581985474,
      "learning_rate": 0.00040500534614620757,
      "loss": 2.9596,
      "step": 88976
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5294984579086304,
      "learning_rate": 0.00040500151433079713,
      "loss": 2.9752,
      "step": 88977
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.475334644317627,
      "learning_rate": 0.00040499768249586493,
      "loss": 3.2408,
      "step": 88978
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9999146461486816,
      "learning_rate": 0.0004049938506414118,
      "loss": 3.0869,
      "step": 88979
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.017127275466919,
      "learning_rate": 0.0004049900187674382,
      "loss": 2.9405,
      "step": 88980
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1502363681793213,
      "learning_rate": 0.000404986186873945,
      "loss": 2.9636,
      "step": 88981
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.038224458694458,
      "learning_rate": 0.0004049823549609329,
      "loss": 3.1304,
      "step": 88982
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5215474367141724,
      "learning_rate": 0.00040497852302840255,
      "loss": 2.887,
      "step": 88983
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8925693035125732,
      "learning_rate": 0.0004049746910763548,
      "loss": 3.0069,
      "step": 88984
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7718505859375,
      "learning_rate": 0.0004049708591047902,
      "loss": 3.1032,
      "step": 88985
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5541484355926514,
      "learning_rate": 0.0004049670271137096,
      "loss": 2.9164,
      "step": 88986
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7013529539108276,
      "learning_rate": 0.0004049631951031136,
      "loss": 2.8754,
      "step": 88987
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6479321718215942,
      "learning_rate": 0.00040495936307300296,
      "loss": 3.1059,
      "step": 88988
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7152634859085083,
      "learning_rate": 0.00040495553102337846,
      "loss": 2.9502,
      "step": 88989
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7209633588790894,
      "learning_rate": 0.0004049516989542407,
      "loss": 3.0364,
      "step": 88990
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6891322135925293,
      "learning_rate": 0.00040494786686559033,
      "loss": 3.316,
      "step": 88991
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6890747547149658,
      "learning_rate": 0.0004049440347574283,
      "loss": 3.1042,
      "step": 88992
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6445666551589966,
      "learning_rate": 0.0004049402026297552,
      "loss": 3.1149,
      "step": 88993
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9265501499176025,
      "learning_rate": 0.00040493637048257165,
      "loss": 3.0956,
      "step": 88994
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9334399700164795,
      "learning_rate": 0.00040493253831587846,
      "loss": 3.0575,
      "step": 88995
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5372772216796875,
      "learning_rate": 0.00040492870612967647,
      "loss": 3.1125,
      "step": 88996
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.087707996368408,
      "learning_rate": 0.00040492487392396615,
      "loss": 2.8374,
      "step": 88997
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7022888660430908,
      "learning_rate": 0.0004049210416987483,
      "loss": 3.1232,
      "step": 88998
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1921021938323975,
      "learning_rate": 0.0004049172094540237,
      "loss": 3.0017,
      "step": 88999
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.2410478591918945,
      "learning_rate": 0.000404913377189793,
      "loss": 3.0284,
      "step": 89000
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6302436590194702,
      "learning_rate": 0.0004049095449060568,
      "loss": 3.17,
      "step": 89001
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5889816284179688,
      "learning_rate": 0.0004049057126028161,
      "loss": 2.9209,
      "step": 89002
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8850017786026,
      "learning_rate": 0.0004049018802800714,
      "loss": 3.0372,
      "step": 89003
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.908517837524414,
      "learning_rate": 0.00040489804793782336,
      "loss": 2.9661,
      "step": 89004
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5774444341659546,
      "learning_rate": 0.00040489421557607293,
      "loss": 3.2725,
      "step": 89005
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.8299341201782227,
      "learning_rate": 0.00040489038319482064,
      "loss": 2.9703,
      "step": 89006
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9363601207733154,
      "learning_rate": 0.0004048865507940672,
      "loss": 3.123,
      "step": 89007
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.7863264083862305,
      "learning_rate": 0.00040488271837381347,
      "loss": 3.046,
      "step": 89008
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7639342546463013,
      "learning_rate": 0.00040487888593405995,
      "loss": 3.1243,
      "step": 89009
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1314034461975098,
      "learning_rate": 0.0004048750534748075,
      "loss": 3.0659,
      "step": 89010
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3193135261535645,
      "learning_rate": 0.00040487122099605686,
      "loss": 3.0776,
      "step": 89011
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.702114224433899,
      "learning_rate": 0.00040486738849780863,
      "loss": 2.8881,
      "step": 89012
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3048479557037354,
      "learning_rate": 0.00040486355598006357,
      "loss": 2.934,
      "step": 89013
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.624458074569702,
      "learning_rate": 0.00040485972344282245,
      "loss": 2.9031,
      "step": 89014
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.219226837158203,
      "learning_rate": 0.0004048558908860858,
      "loss": 2.9352,
      "step": 89015
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8599967956542969,
      "learning_rate": 0.0004048520583098545,
      "loss": 2.8668,
      "step": 89016
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.7108285427093506,
      "learning_rate": 0.0004048482257141293,
      "loss": 2.7848,
      "step": 89017
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9095853567123413,
      "learning_rate": 0.0004048443930989108,
      "loss": 3.0622,
      "step": 89018
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.543786883354187,
      "learning_rate": 0.00040484056046419974,
      "loss": 2.8043,
      "step": 89019
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.5174264907836914,
      "learning_rate": 0.0004048367278099969,
      "loss": 3.0891,
      "step": 89020
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.10133695602417,
      "learning_rate": 0.0004048328951363029,
      "loss": 2.8586,
      "step": 89021
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.586590051651001,
      "learning_rate": 0.00040482906244311843,
      "loss": 3.1573,
      "step": 89022
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.4551634788513184,
      "learning_rate": 0.0004048252297304443,
      "loss": 3.138,
      "step": 89023
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.8590927124023438,
      "learning_rate": 0.0004048213969982812,
      "loss": 2.9676,
      "step": 89024
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7846567630767822,
      "learning_rate": 0.0004048175642466298,
      "loss": 3.0275,
      "step": 89025
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6030867099761963,
      "learning_rate": 0.0004048137314754908,
      "loss": 2.9123,
      "step": 89026
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.5691981315612793,
      "learning_rate": 0.000404809898684865,
      "loss": 3.0284,
      "step": 89027
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3480849266052246,
      "learning_rate": 0.00040480606587475304,
      "loss": 3.0181,
      "step": 89028
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.08301043510437,
      "learning_rate": 0.0004048022330451557,
      "loss": 2.8853,
      "step": 89029
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5326160192489624,
      "learning_rate": 0.0004047984001960736,
      "loss": 2.6319,
      "step": 89030
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.673187255859375,
      "learning_rate": 0.0004047945673275075,
      "loss": 3.0774,
      "step": 89031
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.7275807857513428,
      "learning_rate": 0.0004047907344394581,
      "loss": 3.0023,
      "step": 89032
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7272188663482666,
      "learning_rate": 0.0004047869015319262,
      "loss": 2.9537,
      "step": 89033
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8276889324188232,
      "learning_rate": 0.0004047830686049123,
      "loss": 2.9024,
      "step": 89034
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5543099641799927,
      "learning_rate": 0.0004047792356584174,
      "loss": 2.9216,
      "step": 89035
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.8146767616271973,
      "learning_rate": 0.00040477540269244196,
      "loss": 2.9479,
      "step": 89036
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.166675567626953,
      "learning_rate": 0.00040477156970698683,
      "loss": 3.083,
      "step": 89037
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8396897315979004,
      "learning_rate": 0.00040476773670205276,
      "loss": 2.7437,
      "step": 89038
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.2098779678344727,
      "learning_rate": 0.00040476390367764025,
      "loss": 2.9064,
      "step": 89039
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.9768929481506348,
      "learning_rate": 0.0004047600706337502,
      "loss": 3.0416,
      "step": 89040
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9202415943145752,
      "learning_rate": 0.0004047562375703834,
      "loss": 3.0034,
      "step": 89041
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.75532066822052,
      "learning_rate": 0.0004047524044875402,
      "loss": 3.1073,
      "step": 89042
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6471362113952637,
      "learning_rate": 0.00040474857138522177,
      "loss": 3.0634,
      "step": 89043
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.057535171508789,
      "learning_rate": 0.0004047447382634285,
      "loss": 3.0923,
      "step": 89044
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9196857213974,
      "learning_rate": 0.00040474090512216125,
      "loss": 2.7675,
      "step": 89045
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5820887088775635,
      "learning_rate": 0.0004047370719614206,
      "loss": 3.1041,
      "step": 89046
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6733345985412598,
      "learning_rate": 0.0004047332387812074,
      "loss": 3.1307,
      "step": 89047
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.151839017868042,
      "learning_rate": 0.0004047294055815224,
      "loss": 3.0588,
      "step": 89048
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7705557346343994,
      "learning_rate": 0.0004047255723623661,
      "loss": 3.0127,
      "step": 89049
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7772016525268555,
      "learning_rate": 0.00040472173912373935,
      "loss": 2.9966,
      "step": 89050
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6911264657974243,
      "learning_rate": 0.000404717905865643,
      "loss": 3.1097,
      "step": 89051
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9219233989715576,
      "learning_rate": 0.0004047140725880774,
      "loss": 2.7643,
      "step": 89052
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.626593828201294,
      "learning_rate": 0.0004047102392910436,
      "loss": 2.8863,
      "step": 89053
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6734933853149414,
      "learning_rate": 0.0004047064059745422,
      "loss": 3.2719,
      "step": 89054
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7139546871185303,
      "learning_rate": 0.0004047025726385739,
      "loss": 2.966,
      "step": 89055
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.762786865234375,
      "learning_rate": 0.00040469873928313937,
      "loss": 3.08,
      "step": 89056
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.745579481124878,
      "learning_rate": 0.0004046949059082394,
      "loss": 3.113,
      "step": 89057
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5447726249694824,
      "learning_rate": 0.00040469107251387467,
      "loss": 3.037,
      "step": 89058
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6972545385360718,
      "learning_rate": 0.00040468723910004584,
      "loss": 2.9183,
      "step": 89059
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9177321195602417,
      "learning_rate": 0.00040468340566675374,
      "loss": 3.1831,
      "step": 89060
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6397594213485718,
      "learning_rate": 0.000404679572213999,
      "loss": 3.1731,
      "step": 89061
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.01137638092041,
      "learning_rate": 0.00040467573874178233,
      "loss": 2.6542,
      "step": 89062
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0485341548919678,
      "learning_rate": 0.0004046719052501046,
      "loss": 3.0291,
      "step": 89063
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8455405235290527,
      "learning_rate": 0.0004046680717389662,
      "loss": 3.0586,
      "step": 89064
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.796763300895691,
      "learning_rate": 0.00040466423820836807,
      "loss": 3.172,
      "step": 89065
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.079413414001465,
      "learning_rate": 0.00040466040465831087,
      "loss": 3.0225,
      "step": 89066
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7994024753570557,
      "learning_rate": 0.00040465657108879543,
      "loss": 3.006,
      "step": 89067
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7911553382873535,
      "learning_rate": 0.00040465273749982225,
      "loss": 3.1454,
      "step": 89068
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5534844398498535,
      "learning_rate": 0.0004046489038913922,
      "loss": 3.0181,
      "step": 89069
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.262803554534912,
      "learning_rate": 0.00040464507026350603,
      "loss": 3.1365,
      "step": 89070
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9329057931900024,
      "learning_rate": 0.00040464123661616423,
      "loss": 3.0956,
      "step": 89071
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9621597528457642,
      "learning_rate": 0.0004046374029493677,
      "loss": 2.9975,
      "step": 89072
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4751793146133423,
      "learning_rate": 0.000404633569263117,
      "loss": 3.1576,
      "step": 89073
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.6702404022216797,
      "learning_rate": 0.00040462973555741315,
      "loss": 2.8742,
      "step": 89074
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.757902979850769,
      "learning_rate": 0.00040462590183225653,
      "loss": 3.0596,
      "step": 89075
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.240283489227295,
      "learning_rate": 0.000404622068087648,
      "loss": 2.8331,
      "step": 89076
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7055096626281738,
      "learning_rate": 0.00040461823432358824,
      "loss": 3.072,
      "step": 89077
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.019429922103882,
      "learning_rate": 0.000404614400540078,
      "loss": 2.8783,
      "step": 89078
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.690303087234497,
      "learning_rate": 0.0004046105667371179,
      "loss": 3.0615,
      "step": 89079
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6355113983154297,
      "learning_rate": 0.0004046067329147088,
      "loss": 3.0076,
      "step": 89080
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6700365543365479,
      "learning_rate": 0.0004046028990728513,
      "loss": 3.0044,
      "step": 89081
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7372208833694458,
      "learning_rate": 0.0004045990652115462,
      "loss": 2.9815,
      "step": 89082
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.194350004196167,
      "learning_rate": 0.00040459523133079404,
      "loss": 2.9728,
      "step": 89083
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.647220492362976,
      "learning_rate": 0.0004045913974305958,
      "loss": 3.1546,
      "step": 89084
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7456568479537964,
      "learning_rate": 0.00040458756351095197,
      "loss": 3.1115,
      "step": 89085
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7391916513442993,
      "learning_rate": 0.0004045837295718633,
      "loss": 2.9717,
      "step": 89086
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0398964881896973,
      "learning_rate": 0.0004045798956133306,
      "loss": 2.9819,
      "step": 89087
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4970721006393433,
      "learning_rate": 0.0004045760616353545,
      "loss": 2.8623,
      "step": 89088
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.732942819595337,
      "learning_rate": 0.00040457222763793575,
      "loss": 3.0458,
      "step": 89089
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7362414598464966,
      "learning_rate": 0.0004045683936210751,
      "loss": 3.2478,
      "step": 89090
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5717989206314087,
      "learning_rate": 0.0004045645595847731,
      "loss": 3.1302,
      "step": 89091
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.82637357711792,
      "learning_rate": 0.0004045607255290306,
      "loss": 3.0972,
      "step": 89092
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6234155893325806,
      "learning_rate": 0.00040455689145384833,
      "loss": 2.9253,
      "step": 89093
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6329054832458496,
      "learning_rate": 0.00040455305735922695,
      "loss": 3.0153,
      "step": 89094
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0540266036987305,
      "learning_rate": 0.0004045492232451672,
      "loss": 3.0194,
      "step": 89095
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.43940007686615,
      "learning_rate": 0.0004045453891116698,
      "loss": 2.9956,
      "step": 89096
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5445343255996704,
      "learning_rate": 0.00040454155495873534,
      "loss": 2.9295,
      "step": 89097
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.78127920627594,
      "learning_rate": 0.0004045377207863647,
      "loss": 3.1323,
      "step": 89098
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8223357200622559,
      "learning_rate": 0.0004045338865945585,
      "loss": 3.0729,
      "step": 89099
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4564381837844849,
      "learning_rate": 0.00040453005238331747,
      "loss": 3.1545,
      "step": 89100
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.3852144479751587,
      "learning_rate": 0.0004045262181526423,
      "loss": 2.9951,
      "step": 89101
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.6304266452789307,
      "learning_rate": 0.00040452238390253383,
      "loss": 2.7306,
      "step": 89102
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.778685212135315,
      "learning_rate": 0.00040451854963299263,
      "loss": 2.8531,
      "step": 89103
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7198448181152344,
      "learning_rate": 0.00040451471534401945,
      "loss": 2.8998,
      "step": 89104
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9235332012176514,
      "learning_rate": 0.00040451088103561503,
      "loss": 2.8499,
      "step": 89105
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.057474374771118,
      "learning_rate": 0.00040450704670778,
      "loss": 3.057,
      "step": 89106
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6810215711593628,
      "learning_rate": 0.00040450321236051525,
      "loss": 2.9545,
      "step": 89107
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1562681198120117,
      "learning_rate": 0.0004044993779938213,
      "loss": 3.0559,
      "step": 89108
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8330987691879272,
      "learning_rate": 0.00040449554360769894,
      "loss": 3.1561,
      "step": 89109
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7851848602294922,
      "learning_rate": 0.00040449170920214887,
      "loss": 3.1445,
      "step": 89110
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9833437204360962,
      "learning_rate": 0.0004044878747771719,
      "loss": 2.9103,
      "step": 89111
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.684287190437317,
      "learning_rate": 0.0004044840403327686,
      "loss": 3.0375,
      "step": 89112
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.950244426727295,
      "learning_rate": 0.00040448020586893976,
      "loss": 2.9969,
      "step": 89113
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6560016870498657,
      "learning_rate": 0.000404476371385686,
      "loss": 2.8673,
      "step": 89114
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9070308208465576,
      "learning_rate": 0.00040447253688300833,
      "loss": 2.9125,
      "step": 89115
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.99399733543396,
      "learning_rate": 0.000404468702360907,
      "loss": 3.1194,
      "step": 89116
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.04901123046875,
      "learning_rate": 0.0004044648678193831,
      "loss": 2.9685,
      "step": 89117
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9960030317306519,
      "learning_rate": 0.0004044610332584372,
      "loss": 2.791,
      "step": 89118
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.6881840229034424,
      "learning_rate": 0.00040445719867807,
      "loss": 2.8293,
      "step": 89119
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5561065673828125,
      "learning_rate": 0.0004044533640782822,
      "loss": 3.1234,
      "step": 89120
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.313526153564453,
      "learning_rate": 0.00040444952945907465,
      "loss": 2.9599,
      "step": 89121
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.453115940093994,
      "learning_rate": 0.0004044456948204479,
      "loss": 3.1969,
      "step": 89122
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5561476945877075,
      "learning_rate": 0.00040444186016240263,
      "loss": 3.1193,
      "step": 89123
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.3630993366241455,
      "learning_rate": 0.0004044380254849398,
      "loss": 3.2091,
      "step": 89124
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8280137777328491,
      "learning_rate": 0.0004044341907880599,
      "loss": 3.0437,
      "step": 89125
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.321537971496582,
      "learning_rate": 0.00040443035607176366,
      "loss": 3.1641,
      "step": 89126
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3714065551757812,
      "learning_rate": 0.00040442652133605196,
      "loss": 2.8528,
      "step": 89127
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5917009115219116,
      "learning_rate": 0.0004044226865809253,
      "loss": 3.1088,
      "step": 89128
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.213352680206299,
      "learning_rate": 0.0004044188518063845,
      "loss": 2.7068,
      "step": 89129
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1325907707214355,
      "learning_rate": 0.00040441501701243035,
      "loss": 3.2641,
      "step": 89130
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8027713298797607,
      "learning_rate": 0.00040441118219906343,
      "loss": 3.2759,
      "step": 89131
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7507286071777344,
      "learning_rate": 0.00040440734736628446,
      "loss": 2.8668,
      "step": 89132
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.997283935546875,
      "learning_rate": 0.0004044035125140942,
      "loss": 3.1016,
      "step": 89133
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.047067165374756,
      "learning_rate": 0.00040439967764249344,
      "loss": 2.9808,
      "step": 89134
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8715139627456665,
      "learning_rate": 0.0004043958427514827,
      "loss": 2.8893,
      "step": 89135
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.170274019241333,
      "learning_rate": 0.00040439200784106287,
      "loss": 2.9894,
      "step": 89136
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7447357177734375,
      "learning_rate": 0.0004043881729112346,
      "loss": 3.0903,
      "step": 89137
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.553121566772461,
      "learning_rate": 0.00040438433796199854,
      "loss": 3.0784,
      "step": 89138
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.732387900352478,
      "learning_rate": 0.00040438050299335555,
      "loss": 3.1967,
      "step": 89139
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.975090980529785,
      "learning_rate": 0.00040437666800530615,
      "loss": 3.1001,
      "step": 89140
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.2148799896240234,
      "learning_rate": 0.00040437283299785126,
      "loss": 3.0992,
      "step": 89141
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6705492734909058,
      "learning_rate": 0.00040436899797099144,
      "loss": 3.0163,
      "step": 89142
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.860978364944458,
      "learning_rate": 0.00040436516292472743,
      "loss": 2.8926,
      "step": 89143
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.4395060539245605,
      "learning_rate": 0.00040436132785906,
      "loss": 2.9525,
      "step": 89144
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6796088218688965,
      "learning_rate": 0.00040435749277398984,
      "loss": 3.0315,
      "step": 89145
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.537050485610962,
      "learning_rate": 0.00040435365766951757,
      "loss": 2.8831,
      "step": 89146
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0977797508239746,
      "learning_rate": 0.00040434982254564404,
      "loss": 3.0651,
      "step": 89147
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9952147006988525,
      "learning_rate": 0.00040434598740236996,
      "loss": 2.9285,
      "step": 89148
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.8878650665283203,
      "learning_rate": 0.0004043421522396959,
      "loss": 3.1138,
      "step": 89149
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.2200334072113037,
      "learning_rate": 0.00040433831705762274,
      "loss": 3.008,
      "step": 89150
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5542640686035156,
      "learning_rate": 0.0004043344818561511,
      "loss": 2.7957,
      "step": 89151
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8832037448883057,
      "learning_rate": 0.0004043306466352817,
      "loss": 3.3943,
      "step": 89152
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.776406168937683,
      "learning_rate": 0.00040432681139501524,
      "loss": 2.9981,
      "step": 89153
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3802506923675537,
      "learning_rate": 0.0004043229761353525,
      "loss": 2.9108,
      "step": 89154
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9209644794464111,
      "learning_rate": 0.0004043191408562942,
      "loss": 2.9559,
      "step": 89155
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1677157878875732,
      "learning_rate": 0.0004043153055578408,
      "loss": 3.1575,
      "step": 89156
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.357673168182373,
      "learning_rate": 0.00040431147023999335,
      "loss": 3.1139,
      "step": 89157
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.936799168586731,
      "learning_rate": 0.0004043076349027524,
      "loss": 3.121,
      "step": 89158
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4889798164367676,
      "learning_rate": 0.0004043037995461187,
      "loss": 3.1037,
      "step": 89159
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4668512344360352,
      "learning_rate": 0.00040429996417009305,
      "loss": 3.1331,
      "step": 89160
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6979484558105469,
      "learning_rate": 0.0004042961287746759,
      "loss": 3.1567,
      "step": 89161
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8473842144012451,
      "learning_rate": 0.0004042922933598683,
      "loss": 3.3743,
      "step": 89162
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.77642822265625,
      "learning_rate": 0.0004042884579256707,
      "loss": 3.0252,
      "step": 89163
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.719796061515808,
      "learning_rate": 0.0004042846224720839,
      "loss": 2.9993,
      "step": 89164
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8202450275421143,
      "learning_rate": 0.0004042807869991086,
      "loss": 2.8564,
      "step": 89165
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8466192483901978,
      "learning_rate": 0.00040427695150674564,
      "loss": 3.1562,
      "step": 89166
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9005109071731567,
      "learning_rate": 0.0004042731159949955,
      "loss": 3.111,
      "step": 89167
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0943734645843506,
      "learning_rate": 0.0004042692804638591,
      "loss": 2.9651,
      "step": 89168
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5925260782241821,
      "learning_rate": 0.0004042654449133371,
      "loss": 2.9887,
      "step": 89169
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7677911520004272,
      "learning_rate": 0.00040426160934343004,
      "loss": 2.9065,
      "step": 89170
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4417519569396973,
      "learning_rate": 0.00040425777375413893,
      "loss": 3.1796,
      "step": 89171
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.524491548538208,
      "learning_rate": 0.0004042539381454643,
      "loss": 2.9356,
      "step": 89172
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.598710536956787,
      "learning_rate": 0.0004042501025174068,
      "loss": 3.1261,
      "step": 89173
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.098660945892334,
      "learning_rate": 0.0004042462668699673,
      "loss": 3.0358,
      "step": 89174
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.2857701778411865,
      "learning_rate": 0.00040424243120314656,
      "loss": 3.0463,
      "step": 89175
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7729159593582153,
      "learning_rate": 0.000404238595516945,
      "loss": 2.9318,
      "step": 89176
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8380554914474487,
      "learning_rate": 0.00040423475981136357,
      "loss": 2.992,
      "step": 89177
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.4339497089385986,
      "learning_rate": 0.0004042309240864031,
      "loss": 3.0475,
      "step": 89178
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7452123165130615,
      "learning_rate": 0.0004042270883420639,
      "loss": 3.0242,
      "step": 89179
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.746442198753357,
      "learning_rate": 0.00040422325257834707,
      "loss": 3.0922,
      "step": 89180
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7955448627471924,
      "learning_rate": 0.00040421941679525303,
      "loss": 3.0717,
      "step": 89181
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5283551216125488,
      "learning_rate": 0.0004042155809927828,
      "loss": 3.2427,
      "step": 89182
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5279901027679443,
      "learning_rate": 0.00040421174517093686,
      "loss": 3.0183,
      "step": 89183
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6613669395446777,
      "learning_rate": 0.0004042079093297159,
      "loss": 2.9356,
      "step": 89184
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.161109209060669,
      "learning_rate": 0.0004042040734691209,
      "loss": 2.7294,
      "step": 89185
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5836519002914429,
      "learning_rate": 0.00040420023758915225,
      "loss": 3.0145,
      "step": 89186
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7865254878997803,
      "learning_rate": 0.0004041964016898108,
      "loss": 3.1869,
      "step": 89187
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7781766653060913,
      "learning_rate": 0.00040419256577109734,
      "loss": 2.9526,
      "step": 89188
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.2616443634033203,
      "learning_rate": 0.0004041887298330125,
      "loss": 3.038,
      "step": 89189
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8249958753585815,
      "learning_rate": 0.000404184893875557,
      "loss": 2.9771,
      "step": 89190
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1361536979675293,
      "learning_rate": 0.00040418105789873164,
      "loss": 2.9377,
      "step": 89191
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8828843832015991,
      "learning_rate": 0.00040417722190253696,
      "loss": 3.04,
      "step": 89192
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8756005764007568,
      "learning_rate": 0.00040417338588697376,
      "loss": 2.9398,
      "step": 89193
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1987271308898926,
      "learning_rate": 0.0004041695498520428,
      "loss": 3.0306,
      "step": 89194
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.3723255395889282,
      "learning_rate": 0.00040416571379774477,
      "loss": 2.7766,
      "step": 89195
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6538820266723633,
      "learning_rate": 0.0004041618777240803,
      "loss": 3.0831,
      "step": 89196
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6674212217330933,
      "learning_rate": 0.0004041580416310503,
      "loss": 2.8037,
      "step": 89197
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8675849437713623,
      "learning_rate": 0.0004041542055186552,
      "loss": 3.0521,
      "step": 89198
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8587726354599,
      "learning_rate": 0.0004041503693868959,
      "loss": 3.1701,
      "step": 89199
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6750825643539429,
      "learning_rate": 0.0004041465332357731,
      "loss": 2.9728,
      "step": 89200
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0281577110290527,
      "learning_rate": 0.00040414269706528757,
      "loss": 3.1537,
      "step": 89201
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9498324394226074,
      "learning_rate": 0.00040413886087543985,
      "loss": 3.0246,
      "step": 89202
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7634695768356323,
      "learning_rate": 0.00040413502466623083,
      "loss": 3.0831,
      "step": 89203
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4943749904632568,
      "learning_rate": 0.0004041311884376611,
      "loss": 2.9288,
      "step": 89204
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.518012762069702,
      "learning_rate": 0.0004041273521897314,
      "loss": 2.8662,
      "step": 89205
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5125612020492554,
      "learning_rate": 0.00040412351592244246,
      "loss": 2.9523,
      "step": 89206
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6407438516616821,
      "learning_rate": 0.00040411967963579505,
      "loss": 3.2216,
      "step": 89207
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.2447102069854736,
      "learning_rate": 0.0004041158433297897,
      "loss": 2.9992,
      "step": 89208
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6627230644226074,
      "learning_rate": 0.0004041120070044274,
      "loss": 3.1587,
      "step": 89209
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6923788785934448,
      "learning_rate": 0.0004041081706597086,
      "loss": 3.2749,
      "step": 89210
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.679409384727478,
      "learning_rate": 0.0004041043342956342,
      "loss": 2.8201,
      "step": 89211
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9889733791351318,
      "learning_rate": 0.0004041004979122048,
      "loss": 2.8266,
      "step": 89212
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4528348445892334,
      "learning_rate": 0.0004040966615094212,
      "loss": 3.1399,
      "step": 89213
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7068432569503784,
      "learning_rate": 0.000404092825087284,
      "loss": 2.9198,
      "step": 89214
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5803366899490356,
      "learning_rate": 0.00040408898864579407,
      "loss": 2.9122,
      "step": 89215
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4489336013793945,
      "learning_rate": 0.00040408515218495197,
      "loss": 2.8319,
      "step": 89216
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.891482949256897,
      "learning_rate": 0.00040408131570475843,
      "loss": 2.8966,
      "step": 89217
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4698529243469238,
      "learning_rate": 0.0004040774792052143,
      "loss": 3.1082,
      "step": 89218
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9457039833068848,
      "learning_rate": 0.00040407364268632014,
      "loss": 3.1422,
      "step": 89219
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9710613489151,
      "learning_rate": 0.0004040698061480767,
      "loss": 2.9044,
      "step": 89220
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7720985412597656,
      "learning_rate": 0.00040406596959048483,
      "loss": 2.989,
      "step": 89221
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8997246026992798,
      "learning_rate": 0.000404062133013545,
      "loss": 3.0804,
      "step": 89222
    },
    {
      "epoch": 1.16,
      "grad_norm": 4.733034610748291,
      "learning_rate": 0.00040405829641725814,
      "loss": 2.9937,
      "step": 89223
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.7279281616210938,
      "learning_rate": 0.0004040544598016249,
      "loss": 2.8561,
      "step": 89224
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.880280613899231,
      "learning_rate": 0.0004040506231666459,
      "loss": 2.9899,
      "step": 89225
    },
    {
      "epoch": 1.16,
      "grad_norm": 4.5825653076171875,
      "learning_rate": 0.00040404678651232193,
      "loss": 2.9789,
      "step": 89226
    },
    {
      "epoch": 1.16,
      "grad_norm": 4.107424736022949,
      "learning_rate": 0.00040404294983865377,
      "loss": 3.049,
      "step": 89227
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.992120385169983,
      "learning_rate": 0.000404039113145642,
      "loss": 3.0863,
      "step": 89228
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.676328182220459,
      "learning_rate": 0.0004040352764332874,
      "loss": 3.204,
      "step": 89229
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.857886791229248,
      "learning_rate": 0.0004040314397015907,
      "loss": 3.0394,
      "step": 89230
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.5280511379241943,
      "learning_rate": 0.00040402760295055255,
      "loss": 3.0733,
      "step": 89231
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.2857511043548584,
      "learning_rate": 0.0004040237661801737,
      "loss": 2.982,
      "step": 89232
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0557260513305664,
      "learning_rate": 0.00040401992939045493,
      "loss": 2.9982,
      "step": 89233
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3030409812927246,
      "learning_rate": 0.0004040160925813969,
      "loss": 3.2513,
      "step": 89234
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.5021090507507324,
      "learning_rate": 0.00040401225575300025,
      "loss": 2.8588,
      "step": 89235
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6763688325881958,
      "learning_rate": 0.0004040084189052658,
      "loss": 3.2083,
      "step": 89236
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.597971200942993,
      "learning_rate": 0.0004040045820381942,
      "loss": 3.1229,
      "step": 89237
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6444569826126099,
      "learning_rate": 0.0004040007451517862,
      "loss": 3.0699,
      "step": 89238
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0789473056793213,
      "learning_rate": 0.00040399690824604245,
      "loss": 3.0641,
      "step": 89239
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5395005941390991,
      "learning_rate": 0.0004039930713209638,
      "loss": 2.9478,
      "step": 89240
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1286120414733887,
      "learning_rate": 0.00040398923437655077,
      "loss": 2.8342,
      "step": 89241
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8313727378845215,
      "learning_rate": 0.00040398539741280426,
      "loss": 3.2163,
      "step": 89242
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8585269451141357,
      "learning_rate": 0.00040398156042972485,
      "loss": 2.9557,
      "step": 89243
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9485732316970825,
      "learning_rate": 0.00040397772342731336,
      "loss": 3.0338,
      "step": 89244
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1175343990325928,
      "learning_rate": 0.0004039738864055704,
      "loss": 3.282,
      "step": 89245
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.033188819885254,
      "learning_rate": 0.0004039700493644967,
      "loss": 3.0718,
      "step": 89246
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.728161334991455,
      "learning_rate": 0.0004039662123040931,
      "loss": 2.9649,
      "step": 89247
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.085461378097534,
      "learning_rate": 0.00040396237522436016,
      "loss": 3.1973,
      "step": 89248
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7510839700698853,
      "learning_rate": 0.00040395853812529866,
      "loss": 2.8053,
      "step": 89249
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8625081777572632,
      "learning_rate": 0.0004039547010069093,
      "loss": 3.0353,
      "step": 89250
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6090925931930542,
      "learning_rate": 0.0004039508638691929,
      "loss": 2.7841,
      "step": 89251
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.383608102798462,
      "learning_rate": 0.0004039470267121499,
      "loss": 3.1728,
      "step": 89252
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7882356643676758,
      "learning_rate": 0.00040394318953578125,
      "loss": 3.032,
      "step": 89253
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7547392845153809,
      "learning_rate": 0.0004039393523400877,
      "loss": 2.9115,
      "step": 89254
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.2764629125595093,
      "learning_rate": 0.00040393551512506973,
      "loss": 3.1843,
      "step": 89255
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7702397108078003,
      "learning_rate": 0.00040393167789072826,
      "loss": 2.997,
      "step": 89256
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6027264595031738,
      "learning_rate": 0.00040392784063706386,
      "loss": 3.2375,
      "step": 89257
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.270437240600586,
      "learning_rate": 0.0004039240033640774,
      "loss": 3.1319,
      "step": 89258
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.7203643321990967,
      "learning_rate": 0.00040392016607176945,
      "loss": 2.7602,
      "step": 89259
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.4291818141937256,
      "learning_rate": 0.0004039163287601408,
      "loss": 3.0513,
      "step": 89260
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7344509363174438,
      "learning_rate": 0.0004039124914291922,
      "loss": 2.8574,
      "step": 89261
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9279638528823853,
      "learning_rate": 0.0004039086540789242,
      "loss": 3.0296,
      "step": 89262
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.275571823120117,
      "learning_rate": 0.00040390481670933765,
      "loss": 3.1255,
      "step": 89263
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9690073728561401,
      "learning_rate": 0.00040390097932043324,
      "loss": 2.9564,
      "step": 89264
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6768238544464111,
      "learning_rate": 0.00040389714191221165,
      "loss": 3.0409,
      "step": 89265
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9555937051773071,
      "learning_rate": 0.0004038933044846737,
      "loss": 3.0937,
      "step": 89266
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.414605140686035,
      "learning_rate": 0.00040388946703782,
      "loss": 3.2249,
      "step": 89267
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9584910869598389,
      "learning_rate": 0.0004038856295716512,
      "loss": 2.995,
      "step": 89268
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7607228755950928,
      "learning_rate": 0.00040388179208616817,
      "loss": 2.9272,
      "step": 89269
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.768028736114502,
      "learning_rate": 0.00040387795458137165,
      "loss": 2.9074,
      "step": 89270
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6032112836837769,
      "learning_rate": 0.0004038741170572621,
      "loss": 3.1659,
      "step": 89271
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6915925741195679,
      "learning_rate": 0.00040387027951384037,
      "loss": 2.9797,
      "step": 89272
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5626648664474487,
      "learning_rate": 0.00040386644195110734,
      "loss": 3.0028,
      "step": 89273
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7455449104309082,
      "learning_rate": 0.00040386260436906353,
      "loss": 2.9987,
      "step": 89274
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.532508134841919,
      "learning_rate": 0.00040385876676770965,
      "loss": 3.0453,
      "step": 89275
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9378042221069336,
      "learning_rate": 0.0004038549291470465,
      "loss": 3.0547,
      "step": 89276
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7141783237457275,
      "learning_rate": 0.00040385109150707475,
      "loss": 2.8491,
      "step": 89277
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6949634552001953,
      "learning_rate": 0.0004038472538477951,
      "loss": 2.955,
      "step": 89278
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7356548309326172,
      "learning_rate": 0.0004038434161692084,
      "loss": 3.0073,
      "step": 89279
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8875154256820679,
      "learning_rate": 0.0004038395784713151,
      "loss": 3.1391,
      "step": 89280
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4249802827835083,
      "learning_rate": 0.00040383574075411615,
      "loss": 2.953,
      "step": 89281
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.025973081588745,
      "learning_rate": 0.00040383190301761213,
      "loss": 2.8794,
      "step": 89282
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6601533889770508,
      "learning_rate": 0.0004038280652618038,
      "loss": 3.1461,
      "step": 89283
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7004286050796509,
      "learning_rate": 0.0004038242274866919,
      "loss": 3.1034,
      "step": 89284
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9107956886291504,
      "learning_rate": 0.0004038203896922771,
      "loss": 2.9058,
      "step": 89285
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9737862348556519,
      "learning_rate": 0.00040381655187856014,
      "loss": 2.8883,
      "step": 89286
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7073942422866821,
      "learning_rate": 0.0004038127140455417,
      "loss": 3.1312,
      "step": 89287
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7479616403579712,
      "learning_rate": 0.0004038088761932226,
      "loss": 3.0628,
      "step": 89288
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3332698345184326,
      "learning_rate": 0.0004038050383216034,
      "loss": 3.1503,
      "step": 89289
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1926143169403076,
      "learning_rate": 0.00040380120043068487,
      "loss": 2.9972,
      "step": 89290
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6455458402633667,
      "learning_rate": 0.00040379736252046776,
      "loss": 2.7881,
      "step": 89291
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3450536727905273,
      "learning_rate": 0.00040379352459095276,
      "loss": 2.8176,
      "step": 89292
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3712286949157715,
      "learning_rate": 0.00040378968664214055,
      "loss": 2.9907,
      "step": 89293
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0181477069854736,
      "learning_rate": 0.00040378584867403197,
      "loss": 3.155,
      "step": 89294
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5477573871612549,
      "learning_rate": 0.00040378201068662757,
      "loss": 3.1031,
      "step": 89295
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7748569250106812,
      "learning_rate": 0.0004037781726799281,
      "loss": 2.8372,
      "step": 89296
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6150978803634644,
      "learning_rate": 0.00040377433465393437,
      "loss": 2.8638,
      "step": 89297
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.7790281772613525,
      "learning_rate": 0.00040377049660864707,
      "loss": 3.0676,
      "step": 89298
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.529547095298767,
      "learning_rate": 0.0004037666585440668,
      "loss": 3.0556,
      "step": 89299
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.772129535675049,
      "learning_rate": 0.0004037628204601944,
      "loss": 3.117,
      "step": 89300
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7243080139160156,
      "learning_rate": 0.0004037589823570305,
      "loss": 2.9076,
      "step": 89301
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.767430305480957,
      "learning_rate": 0.00040375514423457583,
      "loss": 3.0429,
      "step": 89302
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9290683269500732,
      "learning_rate": 0.00040375130609283114,
      "loss": 2.9851,
      "step": 89303
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.6050050258636475,
      "learning_rate": 0.00040374746793179713,
      "loss": 2.7318,
      "step": 89304
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8644205331802368,
      "learning_rate": 0.0004037436297514745,
      "loss": 2.8843,
      "step": 89305
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6996572017669678,
      "learning_rate": 0.00040373979155186406,
      "loss": 3.0365,
      "step": 89306
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.97052001953125,
      "learning_rate": 0.0004037359533329663,
      "loss": 3.3373,
      "step": 89307
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.354245185852051,
      "learning_rate": 0.00040373211509478203,
      "loss": 3.1005,
      "step": 89308
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6093767881393433,
      "learning_rate": 0.0004037282768373122,
      "loss": 3.053,
      "step": 89309
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.988374948501587,
      "learning_rate": 0.00040372443856055717,
      "loss": 2.9584,
      "step": 89310
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.764796018600464,
      "learning_rate": 0.0004037206002645178,
      "loss": 2.8475,
      "step": 89311
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0912063121795654,
      "learning_rate": 0.00040371676194919496,
      "loss": 3.1114,
      "step": 89312
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7940356731414795,
      "learning_rate": 0.0004037129236145891,
      "loss": 3.1068,
      "step": 89313
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3176913261413574,
      "learning_rate": 0.0004037090852607011,
      "loss": 2.8781,
      "step": 89314
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.519575357437134,
      "learning_rate": 0.0004037052468875315,
      "loss": 3.0618,
      "step": 89315
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0288195610046387,
      "learning_rate": 0.0004037014084950813,
      "loss": 2.9174,
      "step": 89316
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.171806812286377,
      "learning_rate": 0.000403697570083351,
      "loss": 3.1749,
      "step": 89317
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.352720022201538,
      "learning_rate": 0.00040369373165234136,
      "loss": 3.0362,
      "step": 89318
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0685298442840576,
      "learning_rate": 0.00040368989320205306,
      "loss": 3.0394,
      "step": 89319
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4699885845184326,
      "learning_rate": 0.0004036860547324869,
      "loss": 3.1154,
      "step": 89320
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.8472681045532227,
      "learning_rate": 0.0004036822162436435,
      "loss": 3.1321,
      "step": 89321
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.491612672805786,
      "learning_rate": 0.00040367837773552367,
      "loss": 2.9483,
      "step": 89322
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.3676679134368896,
      "learning_rate": 0.000403674539208128,
      "loss": 2.79,
      "step": 89323
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6430996656417847,
      "learning_rate": 0.0004036707006614573,
      "loss": 3.0039,
      "step": 89324
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1975326538085938,
      "learning_rate": 0.00040366686209551234,
      "loss": 3.1973,
      "step": 89325
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8457943201065063,
      "learning_rate": 0.00040366302351029366,
      "loss": 2.9759,
      "step": 89326
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.714207649230957,
      "learning_rate": 0.00040365918490580205,
      "loss": 2.9917,
      "step": 89327
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8976750373840332,
      "learning_rate": 0.00040365534628203836,
      "loss": 3.1796,
      "step": 89328
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7118653059005737,
      "learning_rate": 0.0004036515076390031,
      "loss": 3.1159,
      "step": 89329
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.6532199382781982,
      "learning_rate": 0.0004036476689766971,
      "loss": 3.1026,
      "step": 89330
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.186056137084961,
      "learning_rate": 0.00040364383029512093,
      "loss": 3.0747,
      "step": 89331
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.733181357383728,
      "learning_rate": 0.0004036399915942756,
      "loss": 2.9678,
      "step": 89332
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1192617416381836,
      "learning_rate": 0.0004036361528741615,
      "loss": 3.1322,
      "step": 89333
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.326756238937378,
      "learning_rate": 0.00040363231413477956,
      "loss": 2.9436,
      "step": 89334
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.619564414024353,
      "learning_rate": 0.0004036284753761304,
      "loss": 3.0615,
      "step": 89335
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6023043394088745,
      "learning_rate": 0.00040362463659821473,
      "loss": 2.9168,
      "step": 89336
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3940906524658203,
      "learning_rate": 0.00040362079780103323,
      "loss": 3.1927,
      "step": 89337
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9934171438217163,
      "learning_rate": 0.0004036169589845868,
      "loss": 3.1252,
      "step": 89338
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.3312699794769287,
      "learning_rate": 0.00040361312014887597,
      "loss": 2.8976,
      "step": 89339
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9828847646713257,
      "learning_rate": 0.00040360928129390145,
      "loss": 3.0025,
      "step": 89340
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.694866418838501,
      "learning_rate": 0.000403605442419664,
      "loss": 2.9999,
      "step": 89341
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.159271478652954,
      "learning_rate": 0.00040360160352616444,
      "loss": 3.2192,
      "step": 89342
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.150069236755371,
      "learning_rate": 0.00040359776461340336,
      "loss": 3.1082,
      "step": 89343
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1802899837493896,
      "learning_rate": 0.0004035939256813814,
      "loss": 3.284,
      "step": 89344
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7791060209274292,
      "learning_rate": 0.00040359008673009945,
      "loss": 3.1705,
      "step": 89345
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5252525806427002,
      "learning_rate": 0.0004035862477595582,
      "loss": 2.8874,
      "step": 89346
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5753830671310425,
      "learning_rate": 0.00040358240876975826,
      "loss": 3.2323,
      "step": 89347
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.36255145072937,
      "learning_rate": 0.00040357856976070035,
      "loss": 3.1752,
      "step": 89348
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.809316635131836,
      "learning_rate": 0.0004035747307323853,
      "loss": 2.8656,
      "step": 89349
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6593950986862183,
      "learning_rate": 0.00040357089168481373,
      "loss": 2.8772,
      "step": 89350
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6113492250442505,
      "learning_rate": 0.0004035670526179864,
      "loss": 2.9057,
      "step": 89351
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8773795366287231,
      "learning_rate": 0.000403563213531904,
      "loss": 2.9052,
      "step": 89352
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7191964387893677,
      "learning_rate": 0.0004035593744265672,
      "loss": 3.0361,
      "step": 89353
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9140334129333496,
      "learning_rate": 0.00040355553530197673,
      "loss": 3.1027,
      "step": 89354
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7664505243301392,
      "learning_rate": 0.00040355169615813335,
      "loss": 2.8765,
      "step": 89355
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4961766004562378,
      "learning_rate": 0.0004035478569950378,
      "loss": 2.8784,
      "step": 89356
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7155449390411377,
      "learning_rate": 0.0004035440178126907,
      "loss": 3.2992,
      "step": 89357
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3524062633514404,
      "learning_rate": 0.00040354017861109285,
      "loss": 3.1998,
      "step": 89358
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3136091232299805,
      "learning_rate": 0.000403536339390245,
      "loss": 2.8557,
      "step": 89359
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9816075563430786,
      "learning_rate": 0.00040353250015014765,
      "loss": 2.9322,
      "step": 89360
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.8786282539367676,
      "learning_rate": 0.0004035286608908018,
      "loss": 3.0347,
      "step": 89361
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.9301023483276367,
      "learning_rate": 0.00040352482161220784,
      "loss": 2.763,
      "step": 89362
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1080729961395264,
      "learning_rate": 0.0004035209823143667,
      "loss": 2.839,
      "step": 89363
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7328696250915527,
      "learning_rate": 0.0004035171429972792,
      "loss": 3.0196,
      "step": 89364
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7365974187850952,
      "learning_rate": 0.00040351330366094576,
      "loss": 3.0019,
      "step": 89365
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.079648971557617,
      "learning_rate": 0.0004035094643053673,
      "loss": 3.0797,
      "step": 89366
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5720653533935547,
      "learning_rate": 0.0004035056249305445,
      "loss": 2.9504,
      "step": 89367
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.61120343208313,
      "learning_rate": 0.00040350178553647805,
      "loss": 2.9655,
      "step": 89368
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.5777387619018555,
      "learning_rate": 0.00040349794612316864,
      "loss": 2.9403,
      "step": 89369
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0713119506835938,
      "learning_rate": 0.000403494106690617,
      "loss": 2.8773,
      "step": 89370
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7289072275161743,
      "learning_rate": 0.00040349026723882384,
      "loss": 3.0134,
      "step": 89371
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.2233500480651855,
      "learning_rate": 0.0004034864277677899,
      "loss": 3.1068,
      "step": 89372
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9270488023757935,
      "learning_rate": 0.0004034825882775159,
      "loss": 2.5778,
      "step": 89373
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.158658742904663,
      "learning_rate": 0.0004034787487680025,
      "loss": 3.0998,
      "step": 89374
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.434576392173767,
      "learning_rate": 0.0004034749092392504,
      "loss": 3.0138,
      "step": 89375
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.751399040222168,
      "learning_rate": 0.00040347106969126055,
      "loss": 3.1729,
      "step": 89376
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.3970355987548828,
      "learning_rate": 0.0004034672301240333,
      "loss": 3.0603,
      "step": 89377
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7122079133987427,
      "learning_rate": 0.0004034633905375695,
      "loss": 2.8326,
      "step": 89378
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.5926434993743896,
      "learning_rate": 0.00040345955093187,
      "loss": 2.8871,
      "step": 89379
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4389874935150146,
      "learning_rate": 0.0004034557113069355,
      "loss": 3.0954,
      "step": 89380
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6644177436828613,
      "learning_rate": 0.00040345187166276647,
      "loss": 2.9103,
      "step": 89381
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.640370488166809,
      "learning_rate": 0.0004034480319993638,
      "loss": 3.1314,
      "step": 89382
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.707807183265686,
      "learning_rate": 0.00040344419231672833,
      "loss": 2.932,
      "step": 89383
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.940018653869629,
      "learning_rate": 0.0004034403526148605,
      "loss": 2.9474,
      "step": 89384
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8009264469146729,
      "learning_rate": 0.0004034365128937612,
      "loss": 2.9565,
      "step": 89385
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6716716289520264,
      "learning_rate": 0.00040343267315343117,
      "loss": 2.8525,
      "step": 89386
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.605452537536621,
      "learning_rate": 0.00040342883339387095,
      "loss": 2.6508,
      "step": 89387
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.4670488834381104,
      "learning_rate": 0.0004034249936150813,
      "loss": 2.8617,
      "step": 89388
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.712467074394226,
      "learning_rate": 0.0004034211538170632,
      "loss": 3.0945,
      "step": 89389
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0979843139648438,
      "learning_rate": 0.000403417313999817,
      "loss": 2.7968,
      "step": 89390
    },
    {
      "epoch": 1.16,
      "grad_norm": 4.217292785644531,
      "learning_rate": 0.00040341347416334355,
      "loss": 2.8776,
      "step": 89391
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.521352529525757,
      "learning_rate": 0.0004034096343076437,
      "loss": 2.8662,
      "step": 89392
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9686795473098755,
      "learning_rate": 0.00040340579443271803,
      "loss": 3.151,
      "step": 89393
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0813612937927246,
      "learning_rate": 0.0004034019545385672,
      "loss": 3.1642,
      "step": 89394
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6776323318481445,
      "learning_rate": 0.000403398114625192,
      "loss": 3.0623,
      "step": 89395
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4014897346496582,
      "learning_rate": 0.0004033942746925932,
      "loss": 2.9723,
      "step": 89396
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8158009052276611,
      "learning_rate": 0.0004033904347407714,
      "loss": 3.0784,
      "step": 89397
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5459675788879395,
      "learning_rate": 0.00040338659476972744,
      "loss": 3.1774,
      "step": 89398
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.004936933517456,
      "learning_rate": 0.00040338275477946184,
      "loss": 2.9534,
      "step": 89399
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7676448822021484,
      "learning_rate": 0.00040337891476997545,
      "loss": 2.847,
      "step": 89400
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6403701305389404,
      "learning_rate": 0.00040337507474126903,
      "loss": 3.049,
      "step": 89401
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.5975117683410645,
      "learning_rate": 0.0004033712346933433,
      "loss": 3.2746,
      "step": 89402
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.1614551544189453,
      "learning_rate": 0.00040336739462619876,
      "loss": 3.2026,
      "step": 89403
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1824657917022705,
      "learning_rate": 0.00040336355453983637,
      "loss": 2.9994,
      "step": 89404
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8179045915603638,
      "learning_rate": 0.00040335971443425665,
      "loss": 2.9466,
      "step": 89405
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9284849166870117,
      "learning_rate": 0.00040335587430946043,
      "loss": 3.205,
      "step": 89406
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.123332977294922,
      "learning_rate": 0.0004033520341654485,
      "loss": 2.8971,
      "step": 89407
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.214982748031616,
      "learning_rate": 0.0004033481940022214,
      "loss": 2.9272,
      "step": 89408
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8969577550888062,
      "learning_rate": 0.00040334435381977993,
      "loss": 2.7677,
      "step": 89409
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.2161617279052734,
      "learning_rate": 0.0004033405136181248,
      "loss": 3.093,
      "step": 89410
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.053340435028076,
      "learning_rate": 0.0004033366733972567,
      "loss": 2.777,
      "step": 89411
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8763982057571411,
      "learning_rate": 0.0004033328331571764,
      "loss": 2.9879,
      "step": 89412
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0085771083831787,
      "learning_rate": 0.0004033289928978845,
      "loss": 2.9478,
      "step": 89413
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7223432064056396,
      "learning_rate": 0.00040332515261938187,
      "loss": 2.838,
      "step": 89414
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9857527017593384,
      "learning_rate": 0.0004033213123216691,
      "loss": 3.1271,
      "step": 89415
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5377960205078125,
      "learning_rate": 0.00040331747200474696,
      "loss": 3.1767,
      "step": 89416
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3391520977020264,
      "learning_rate": 0.0004033136316686162,
      "loss": 2.9475,
      "step": 89417
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8554970026016235,
      "learning_rate": 0.0004033097913132774,
      "loss": 2.824,
      "step": 89418
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.646858811378479,
      "learning_rate": 0.00040330595093873137,
      "loss": 3.1568,
      "step": 89419
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.031804084777832,
      "learning_rate": 0.0004033021105449789,
      "loss": 3.0574,
      "step": 89420
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3821725845336914,
      "learning_rate": 0.0004032982701320205,
      "loss": 2.8638,
      "step": 89421
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9929325580596924,
      "learning_rate": 0.00040329442969985703,
      "loss": 2.9669,
      "step": 89422
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5802251100540161,
      "learning_rate": 0.0004032905892484892,
      "loss": 2.9739,
      "step": 89423
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.481365442276001,
      "learning_rate": 0.00040328674877791775,
      "loss": 2.9938,
      "step": 89424
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.479048728942871,
      "learning_rate": 0.0004032829082881433,
      "loss": 3.4008,
      "step": 89425
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7439689636230469,
      "learning_rate": 0.0004032790677791666,
      "loss": 3.3776,
      "step": 89426
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9336051940917969,
      "learning_rate": 0.00040327522725098846,
      "loss": 3.0246,
      "step": 89427
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9138939380645752,
      "learning_rate": 0.00040327138670360937,
      "loss": 2.8378,
      "step": 89428
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9125052690505981,
      "learning_rate": 0.00040326754613703025,
      "loss": 3.2595,
      "step": 89429
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6895097494125366,
      "learning_rate": 0.00040326370555125165,
      "loss": 2.9554,
      "step": 89430
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.191995859146118,
      "learning_rate": 0.00040325986494627454,
      "loss": 2.9802,
      "step": 89431
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.890013337135315,
      "learning_rate": 0.00040325602432209935,
      "loss": 3.2452,
      "step": 89432
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.3199872970581055,
      "learning_rate": 0.0004032521836787269,
      "loss": 2.7873,
      "step": 89433
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9226036071777344,
      "learning_rate": 0.00040324834301615805,
      "loss": 2.8161,
      "step": 89434
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7556370496749878,
      "learning_rate": 0.0004032445023343933,
      "loss": 2.9244,
      "step": 89435
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.046830892562866,
      "learning_rate": 0.00040324066163343345,
      "loss": 3.0134,
      "step": 89436
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6222858428955078,
      "learning_rate": 0.00040323682091327926,
      "loss": 3.0196,
      "step": 89437
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7002956867218018,
      "learning_rate": 0.00040323298017393134,
      "loss": 3.0854,
      "step": 89438
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.3529059886932373,
      "learning_rate": 0.00040322913941539043,
      "loss": 2.991,
      "step": 89439
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.1098239421844482,
      "learning_rate": 0.00040322529863765736,
      "loss": 3.0502,
      "step": 89440
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6496561765670776,
      "learning_rate": 0.0004032214578407327,
      "loss": 3.0087,
      "step": 89441
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0242786407470703,
      "learning_rate": 0.00040321761702461726,
      "loss": 3.0082,
      "step": 89442
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.6221179962158203,
      "learning_rate": 0.0004032137761893117,
      "loss": 3.0144,
      "step": 89443
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5324907302856445,
      "learning_rate": 0.00040320993533481673,
      "loss": 2.9679,
      "step": 89444
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4388128519058228,
      "learning_rate": 0.00040320609446113306,
      "loss": 2.7757,
      "step": 89445
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7820109128952026,
      "learning_rate": 0.00040320225356826146,
      "loss": 3.211,
      "step": 89446
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.9413238763809204,
      "learning_rate": 0.00040319841265620273,
      "loss": 3.1213,
      "step": 89447
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.337326765060425,
      "learning_rate": 0.00040319457172495726,
      "loss": 2.7524,
      "step": 89448
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.4355101585388184,
      "learning_rate": 0.00040319073077452606,
      "loss": 2.799,
      "step": 89449
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5424890518188477,
      "learning_rate": 0.00040318688980490986,
      "loss": 2.9035,
      "step": 89450
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.045541524887085,
      "learning_rate": 0.0004031830488161091,
      "loss": 2.9788,
      "step": 89451
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5107505321502686,
      "learning_rate": 0.00040317920780812475,
      "loss": 3.0211,
      "step": 89452
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.531924843788147,
      "learning_rate": 0.0004031753667809575,
      "loss": 3.0682,
      "step": 89453
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.3994097709655762,
      "learning_rate": 0.00040317152573460794,
      "loss": 3.2001,
      "step": 89454
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.755341649055481,
      "learning_rate": 0.00040316768466907676,
      "loss": 3.079,
      "step": 89455
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.059324026107788,
      "learning_rate": 0.0004031638435843649,
      "loss": 2.9652,
      "step": 89456
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7647809982299805,
      "learning_rate": 0.0004031600024804728,
      "loss": 2.9891,
      "step": 89457
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6790207624435425,
      "learning_rate": 0.0004031561613574014,
      "loss": 3.199,
      "step": 89458
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.8930531740188599,
      "learning_rate": 0.0004031523202151513,
      "loss": 3.0704,
      "step": 89459
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.6804077625274658,
      "learning_rate": 0.00040314847905372327,
      "loss": 3.3588,
      "step": 89460
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7044557332992554,
      "learning_rate": 0.00040314463787311796,
      "loss": 3.1264,
      "step": 89461
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.5337486267089844,
      "learning_rate": 0.00040314079667333615,
      "loss": 2.909,
      "step": 89462
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.4730502367019653,
      "learning_rate": 0.00040313695545437844,
      "loss": 2.9424,
      "step": 89463
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.04455304145813,
      "learning_rate": 0.0004031331142162457,
      "loss": 3.0277,
      "step": 89464
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.0062410831451416,
      "learning_rate": 0.0004031292729589385,
      "loss": 3.1171,
      "step": 89465
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.068621873855591,
      "learning_rate": 0.0004031254316824577,
      "loss": 3.2812,
      "step": 89466
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.545820951461792,
      "learning_rate": 0.00040312159038680384,
      "loss": 2.9847,
      "step": 89467
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.5731985569000244,
      "learning_rate": 0.0004031177490719778,
      "loss": 2.8734,
      "step": 89468
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.634247064590454,
      "learning_rate": 0.0004031139077379802,
      "loss": 3.0845,
      "step": 89469
    },
    {
      "epoch": 1.16,
      "grad_norm": 2.7079854011535645,
      "learning_rate": 0.0004031100663848118,
      "loss": 3.0833,
      "step": 89470
    },
    {
      "epoch": 1.16,
      "grad_norm": 3.031663417816162,
      "learning_rate": 0.0004031062250124733,
      "loss": 3.2828,
      "step": 89471
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7791471481323242,
      "learning_rate": 0.00040310238362096536,
      "loss": 3.1142,
      "step": 89472
    },
    {
      "epoch": 1.16,
      "grad_norm": 1.7165813446044922,
      "learning_rate": 0.0004030985422102887,
      "loss": 3.0348,
      "step": 89473
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8595930337905884,
      "learning_rate": 0.00040309470078044425,
      "loss": 2.9486,
      "step": 89474
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.6011810302734375,
      "learning_rate": 0.00040309085933143234,
      "loss": 3.113,
      "step": 89475
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.3348724842071533,
      "learning_rate": 0.000403087017863254,
      "loss": 3.1229,
      "step": 89476
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7300748825073242,
      "learning_rate": 0.00040308317637590985,
      "loss": 3.0409,
      "step": 89477
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.5693397521972656,
      "learning_rate": 0.00040307933486940053,
      "loss": 2.901,
      "step": 89478
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.558732509613037,
      "learning_rate": 0.00040307549334372685,
      "loss": 2.953,
      "step": 89479
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.933546781539917,
      "learning_rate": 0.00040307165179888956,
      "loss": 3.0144,
      "step": 89480
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9166977405548096,
      "learning_rate": 0.00040306781023488917,
      "loss": 3.0605,
      "step": 89481
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.9745583534240723,
      "learning_rate": 0.0004030639686517266,
      "loss": 2.9156,
      "step": 89482
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4864284992218018,
      "learning_rate": 0.0004030601270494025,
      "loss": 3.1237,
      "step": 89483
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2062289714813232,
      "learning_rate": 0.00040305628542791763,
      "loss": 3.0784,
      "step": 89484
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0812292098999023,
      "learning_rate": 0.00040305244378727254,
      "loss": 3.0144,
      "step": 89485
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5732455253601074,
      "learning_rate": 0.00040304860212746815,
      "loss": 2.9801,
      "step": 89486
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.3229503631591797,
      "learning_rate": 0.000403044760448505,
      "loss": 2.9609,
      "step": 89487
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2318291664123535,
      "learning_rate": 0.0004030409187503839,
      "loss": 2.8071,
      "step": 89488
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5570147037506104,
      "learning_rate": 0.0004030370770331056,
      "loss": 2.9077,
      "step": 89489
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.749669075012207,
      "learning_rate": 0.0004030332352966707,
      "loss": 3.0409,
      "step": 89490
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7717337608337402,
      "learning_rate": 0.00040302939354108,
      "loss": 3.0639,
      "step": 89491
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.066835641860962,
      "learning_rate": 0.00040302555176633417,
      "loss": 2.9597,
      "step": 89492
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9821598529815674,
      "learning_rate": 0.00040302170997243397,
      "loss": 3.3685,
      "step": 89493
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.140625476837158,
      "learning_rate": 0.0004030178681593801,
      "loss": 2.9891,
      "step": 89494
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.224060535430908,
      "learning_rate": 0.0004030140263271733,
      "loss": 3.1691,
      "step": 89495
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.590317964553833,
      "learning_rate": 0.00040301018447581416,
      "loss": 2.8902,
      "step": 89496
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.672112226486206,
      "learning_rate": 0.0004030063426053035,
      "loss": 2.9322,
      "step": 89497
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7429864406585693,
      "learning_rate": 0.0004030025007156421,
      "loss": 3.0547,
      "step": 89498
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.694007158279419,
      "learning_rate": 0.0004029986588068305,
      "loss": 2.9056,
      "step": 89499
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.494064450263977,
      "learning_rate": 0.0004029948168788695,
      "loss": 2.7529,
      "step": 89500
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2558445930480957,
      "learning_rate": 0.00040299097493175993,
      "loss": 2.7331,
      "step": 89501
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.931662917137146,
      "learning_rate": 0.0004029871329655023,
      "loss": 3.1284,
      "step": 89502
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.606584310531616,
      "learning_rate": 0.0004029832909800974,
      "loss": 2.8289,
      "step": 89503
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.420271873474121,
      "learning_rate": 0.000402979448975546,
      "loss": 2.906,
      "step": 89504
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.861824870109558,
      "learning_rate": 0.00040297560695184883,
      "loss": 3.1798,
      "step": 89505
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0331308841705322,
      "learning_rate": 0.00040297176490900643,
      "loss": 3.0051,
      "step": 89506
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.678956389427185,
      "learning_rate": 0.00040296792284701973,
      "loss": 3.1838,
      "step": 89507
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.709892988204956,
      "learning_rate": 0.0004029640807658894,
      "loss": 2.8447,
      "step": 89508
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6168594360351562,
      "learning_rate": 0.000402960238665616,
      "loss": 2.9597,
      "step": 89509
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8095749616622925,
      "learning_rate": 0.00040295639654620033,
      "loss": 2.9474,
      "step": 89510
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.401565432548523,
      "learning_rate": 0.0004029525544076431,
      "loss": 3.1876,
      "step": 89511
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.3946590423583984,
      "learning_rate": 0.00040294871224994514,
      "loss": 2.7837,
      "step": 89512
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.566062569618225,
      "learning_rate": 0.00040294487007310706,
      "loss": 2.8379,
      "step": 89513
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2128212451934814,
      "learning_rate": 0.00040294102787712956,
      "loss": 3.2073,
      "step": 89514
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.75399649143219,
      "learning_rate": 0.00040293718566201345,
      "loss": 3.0158,
      "step": 89515
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6211386919021606,
      "learning_rate": 0.0004029333434277593,
      "loss": 3.1049,
      "step": 89516
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5159744024276733,
      "learning_rate": 0.0004029295011743678,
      "loss": 3.1344,
      "step": 89517
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.898366093635559,
      "learning_rate": 0.0004029256589018399,
      "loss": 3.1963,
      "step": 89518
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0497705936431885,
      "learning_rate": 0.00040292181661017617,
      "loss": 3.3101,
      "step": 89519
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.558849811553955,
      "learning_rate": 0.0004029179742993773,
      "loss": 3.0506,
      "step": 89520
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6882001161575317,
      "learning_rate": 0.00040291413196944404,
      "loss": 2.799,
      "step": 89521
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4470772743225098,
      "learning_rate": 0.0004029102896203771,
      "loss": 3.1631,
      "step": 89522
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.1381571292877197,
      "learning_rate": 0.0004029064472521772,
      "loss": 2.6923,
      "step": 89523
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6640584468841553,
      "learning_rate": 0.00040290260486484507,
      "loss": 2.7514,
      "step": 89524
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2540476322174072,
      "learning_rate": 0.0004028987624583814,
      "loss": 3.2028,
      "step": 89525
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8206422328948975,
      "learning_rate": 0.0004028949200327869,
      "loss": 3.0615,
      "step": 89526
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.5539817810058594,
      "learning_rate": 0.0004028910775880623,
      "loss": 3.0898,
      "step": 89527
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.044754981994629,
      "learning_rate": 0.00040288723512420824,
      "loss": 2.9868,
      "step": 89528
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.782175302505493,
      "learning_rate": 0.00040288339264122556,
      "loss": 2.9342,
      "step": 89529
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.9014647006988525,
      "learning_rate": 0.0004028795501391149,
      "loss": 2.8878,
      "step": 89530
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.3320674896240234,
      "learning_rate": 0.000402875707617877,
      "loss": 3.2054,
      "step": 89531
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6199434995651245,
      "learning_rate": 0.00040287186507751257,
      "loss": 2.9552,
      "step": 89532
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.5778467655181885,
      "learning_rate": 0.00040286802251802235,
      "loss": 2.9936,
      "step": 89533
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8585535287857056,
      "learning_rate": 0.00040286417993940697,
      "loss": 2.873,
      "step": 89534
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8046107292175293,
      "learning_rate": 0.00040286033734166724,
      "loss": 3.2133,
      "step": 89535
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6293058395385742,
      "learning_rate": 0.0004028564947248037,
      "loss": 2.9633,
      "step": 89536
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.288620948791504,
      "learning_rate": 0.00040285265208881734,
      "loss": 3.2871,
      "step": 89537
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.7459754943847656,
      "learning_rate": 0.00040284880943370873,
      "loss": 2.8274,
      "step": 89538
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.730492115020752,
      "learning_rate": 0.00040284496675947854,
      "loss": 3.1673,
      "step": 89539
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5403845310211182,
      "learning_rate": 0.00040284112406612746,
      "loss": 2.9588,
      "step": 89540
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7704627513885498,
      "learning_rate": 0.00040283728135365643,
      "loss": 3.0061,
      "step": 89541
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.328097343444824,
      "learning_rate": 0.0004028334386220659,
      "loss": 2.9905,
      "step": 89542
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6430068016052246,
      "learning_rate": 0.0004028295958713567,
      "loss": 3.1895,
      "step": 89543
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8875185251235962,
      "learning_rate": 0.00040282575310152954,
      "loss": 3.0202,
      "step": 89544
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.954277992248535,
      "learning_rate": 0.00040282191031258517,
      "loss": 2.8645,
      "step": 89545
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.978265643119812,
      "learning_rate": 0.0004028180675045242,
      "loss": 3.2246,
      "step": 89546
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.818911075592041,
      "learning_rate": 0.0004028142246773475,
      "loss": 3.1697,
      "step": 89547
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.3524515628814697,
      "learning_rate": 0.0004028103818310556,
      "loss": 2.8025,
      "step": 89548
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9691025018692017,
      "learning_rate": 0.00040280653896564936,
      "loss": 2.9629,
      "step": 89549
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7782654762268066,
      "learning_rate": 0.00040280269608112954,
      "loss": 2.969,
      "step": 89550
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.851311206817627,
      "learning_rate": 0.0004027988531774966,
      "loss": 3.0109,
      "step": 89551
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6992340087890625,
      "learning_rate": 0.00040279501025475143,
      "loss": 2.8284,
      "step": 89552
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5113059282302856,
      "learning_rate": 0.00040279116731289477,
      "loss": 3.1854,
      "step": 89553
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5681442022323608,
      "learning_rate": 0.0004027873243519273,
      "loss": 3.2381,
      "step": 89554
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9614027738571167,
      "learning_rate": 0.0004027834813718497,
      "loss": 2.998,
      "step": 89555
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0446877479553223,
      "learning_rate": 0.00040277963837266276,
      "loss": 2.872,
      "step": 89556
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4821988344192505,
      "learning_rate": 0.00040277579535436713,
      "loss": 3.2005,
      "step": 89557
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6320658922195435,
      "learning_rate": 0.00040277195231696347,
      "loss": 3.0646,
      "step": 89558
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.5837109088897705,
      "learning_rate": 0.0004027681092604527,
      "loss": 3.0134,
      "step": 89559
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4944911003112793,
      "learning_rate": 0.00040276426618483525,
      "loss": 2.9562,
      "step": 89560
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7047804594039917,
      "learning_rate": 0.00040276042309011204,
      "loss": 3.0339,
      "step": 89561
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.772706985473633,
      "learning_rate": 0.00040275657997628373,
      "loss": 2.6612,
      "step": 89562
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.3168443441390991,
      "learning_rate": 0.0004027527368433511,
      "loss": 3.0506,
      "step": 89563
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.1196179389953613,
      "learning_rate": 0.0004027488936913147,
      "loss": 2.8734,
      "step": 89564
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.003819227218628,
      "learning_rate": 0.00040274505052017537,
      "loss": 2.7402,
      "step": 89565
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6474254131317139,
      "learning_rate": 0.0004027412073299338,
      "loss": 3.0604,
      "step": 89566
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7379776239395142,
      "learning_rate": 0.0004027373641205907,
      "loss": 2.9577,
      "step": 89567
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9912641048431396,
      "learning_rate": 0.0004027335208921468,
      "loss": 3.0148,
      "step": 89568
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5849974155426025,
      "learning_rate": 0.00040272967764460276,
      "loss": 3.2195,
      "step": 89569
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.906114101409912,
      "learning_rate": 0.00040272583437795935,
      "loss": 2.9851,
      "step": 89570
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.878838062286377,
      "learning_rate": 0.00040272199109221734,
      "loss": 3.1714,
      "step": 89571
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6707935333251953,
      "learning_rate": 0.0004027181477873772,
      "loss": 2.8317,
      "step": 89572
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.1691031455993652,
      "learning_rate": 0.00040271430446344,
      "loss": 3.2379,
      "step": 89573
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.058074474334717,
      "learning_rate": 0.00040271046112040615,
      "loss": 2.9132,
      "step": 89574
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7421238422393799,
      "learning_rate": 0.00040270661775827657,
      "loss": 2.5895,
      "step": 89575
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9000264406204224,
      "learning_rate": 0.0004027027743770518,
      "loss": 2.9319,
      "step": 89576
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4836640357971191,
      "learning_rate": 0.0004026989309767328,
      "loss": 3.2816,
      "step": 89577
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5808368921279907,
      "learning_rate": 0.00040269508755732,
      "loss": 3.1105,
      "step": 89578
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.150787115097046,
      "learning_rate": 0.0004026912441188143,
      "loss": 3.0093,
      "step": 89579
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8136078119277954,
      "learning_rate": 0.0004026874006612162,
      "loss": 3.018,
      "step": 89580
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.35819673538208,
      "learning_rate": 0.0004026835571845268,
      "loss": 2.7546,
      "step": 89581
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.544975996017456,
      "learning_rate": 0.00040267971368874647,
      "loss": 3.0907,
      "step": 89582
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.141385316848755,
      "learning_rate": 0.0004026758701738761,
      "loss": 2.9962,
      "step": 89583
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5150947570800781,
      "learning_rate": 0.0004026720266399163,
      "loss": 3.0837,
      "step": 89584
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8564071655273438,
      "learning_rate": 0.00040266818308686784,
      "loss": 2.6501,
      "step": 89585
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.555715799331665,
      "learning_rate": 0.0004026643395147314,
      "loss": 2.859,
      "step": 89586
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.722298502922058,
      "learning_rate": 0.0004026604959235078,
      "loss": 3.0047,
      "step": 89587
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9328137636184692,
      "learning_rate": 0.0004026566523131977,
      "loss": 3.1075,
      "step": 89588
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7263247966766357,
      "learning_rate": 0.00040265280868380165,
      "loss": 3.058,
      "step": 89589
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2820191383361816,
      "learning_rate": 0.0004026489650353206,
      "loss": 2.8936,
      "step": 89590
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9187511205673218,
      "learning_rate": 0.0004026451213677552,
      "loss": 3.155,
      "step": 89591
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7865437269210815,
      "learning_rate": 0.00040264127768110605,
      "loss": 3.0595,
      "step": 89592
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8348863124847412,
      "learning_rate": 0.000402637433975374,
      "loss": 3.0057,
      "step": 89593
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5353795289993286,
      "learning_rate": 0.0004026335902505597,
      "loss": 3.0358,
      "step": 89594
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.17702054977417,
      "learning_rate": 0.0004026297465066639,
      "loss": 3.3098,
      "step": 89595
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7152928113937378,
      "learning_rate": 0.00040262590274368725,
      "loss": 2.9695,
      "step": 89596
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5151615142822266,
      "learning_rate": 0.0004026220589616305,
      "loss": 2.6697,
      "step": 89597
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4086028337478638,
      "learning_rate": 0.0004026182151604944,
      "loss": 2.8218,
      "step": 89598
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.743892788887024,
      "learning_rate": 0.00040261437134027963,
      "loss": 2.9068,
      "step": 89599
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.6333281993865967,
      "learning_rate": 0.0004026105275009869,
      "loss": 3.0939,
      "step": 89600
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.079868793487549,
      "learning_rate": 0.0004026066836426169,
      "loss": 3.2702,
      "step": 89601
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7197308540344238,
      "learning_rate": 0.0004026028397651705,
      "loss": 3.0337,
      "step": 89602
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.672301769256592,
      "learning_rate": 0.0004025989958686482,
      "loss": 3.2133,
      "step": 89603
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.945778727531433,
      "learning_rate": 0.00040259515195305084,
      "loss": 2.8814,
      "step": 89604
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4820388555526733,
      "learning_rate": 0.0004025913080183791,
      "loss": 2.9564,
      "step": 89605
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4721275568008423,
      "learning_rate": 0.0004025874640646337,
      "loss": 2.8879,
      "step": 89606
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.997288465499878,
      "learning_rate": 0.0004025836200918154,
      "loss": 3.1288,
      "step": 89607
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5117931365966797,
      "learning_rate": 0.0004025797760999248,
      "loss": 3.0645,
      "step": 89608
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9447731971740723,
      "learning_rate": 0.0004025759320889626,
      "loss": 2.7069,
      "step": 89609
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7081798315048218,
      "learning_rate": 0.00040257208805892974,
      "loss": 2.9397,
      "step": 89610
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7596718072891235,
      "learning_rate": 0.0004025682440098268,
      "loss": 3.0458,
      "step": 89611
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.015138864517212,
      "learning_rate": 0.0004025643999416544,
      "loss": 3.1255,
      "step": 89612
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6229543685913086,
      "learning_rate": 0.00040256055585441344,
      "loss": 3.1311,
      "step": 89613
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.5542421340942383,
      "learning_rate": 0.0004025567117481045,
      "loss": 3.0014,
      "step": 89614
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6893043518066406,
      "learning_rate": 0.00040255286762272825,
      "loss": 3.0225,
      "step": 89615
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.6137945652008057,
      "learning_rate": 0.0004025490234782856,
      "loss": 2.9042,
      "step": 89616
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.6925244331359863,
      "learning_rate": 0.00040254517931477703,
      "loss": 2.8836,
      "step": 89617
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7445207834243774,
      "learning_rate": 0.0004025413351322035,
      "loss": 3.3639,
      "step": 89618
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.4680511951446533,
      "learning_rate": 0.0004025374909305655,
      "loss": 2.9963,
      "step": 89619
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6691545248031616,
      "learning_rate": 0.00040253364670986394,
      "loss": 3.2694,
      "step": 89620
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6486371755599976,
      "learning_rate": 0.00040252980247009937,
      "loss": 2.9724,
      "step": 89621
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7204381227493286,
      "learning_rate": 0.00040252595821127263,
      "loss": 2.9219,
      "step": 89622
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5033780336380005,
      "learning_rate": 0.0004025221139333843,
      "loss": 3.2728,
      "step": 89623
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7335206270217896,
      "learning_rate": 0.00040251826963643523,
      "loss": 2.9156,
      "step": 89624
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0107340812683105,
      "learning_rate": 0.00040251442532042604,
      "loss": 3.0566,
      "step": 89625
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8515044450759888,
      "learning_rate": 0.00040251058098535755,
      "loss": 3.2947,
      "step": 89626
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.800612449645996,
      "learning_rate": 0.00040250673663123036,
      "loss": 2.9473,
      "step": 89627
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8250449895858765,
      "learning_rate": 0.0004025028922580452,
      "loss": 3.0735,
      "step": 89628
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5084272623062134,
      "learning_rate": 0.0004024990478658029,
      "loss": 3.1429,
      "step": 89629
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5262335538864136,
      "learning_rate": 0.00040249520345450406,
      "loss": 2.9658,
      "step": 89630
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6318939924240112,
      "learning_rate": 0.00040249135902414943,
      "loss": 2.9947,
      "step": 89631
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0137555599212646,
      "learning_rate": 0.00040248751457473974,
      "loss": 3.1016,
      "step": 89632
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6522338390350342,
      "learning_rate": 0.0004024836701062756,
      "loss": 2.8726,
      "step": 89633
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.736396551132202,
      "learning_rate": 0.0004024798256187579,
      "loss": 2.7862,
      "step": 89634
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.273423433303833,
      "learning_rate": 0.00040247598111218727,
      "loss": 3.0255,
      "step": 89635
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8207228183746338,
      "learning_rate": 0.00040247213658656434,
      "loss": 3.0471,
      "step": 89636
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8815728425979614,
      "learning_rate": 0.0004024682920418899,
      "loss": 2.8656,
      "step": 89637
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.049124002456665,
      "learning_rate": 0.0004024644474781648,
      "loss": 2.9584,
      "step": 89638
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5000749826431274,
      "learning_rate": 0.0004024606028953895,
      "loss": 2.7872,
      "step": 89639
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.918829083442688,
      "learning_rate": 0.00040245675829356486,
      "loss": 3.2544,
      "step": 89640
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9580519199371338,
      "learning_rate": 0.0004024529136726917,
      "loss": 3.1986,
      "step": 89641
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.989224910736084,
      "learning_rate": 0.0004024490690327704,
      "loss": 2.9645,
      "step": 89642
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5522397756576538,
      "learning_rate": 0.00040244522437380206,
      "loss": 2.9569,
      "step": 89643
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5901931524276733,
      "learning_rate": 0.00040244137969578715,
      "loss": 3.1047,
      "step": 89644
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6724309921264648,
      "learning_rate": 0.00040243753499872643,
      "loss": 3.0208,
      "step": 89645
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.327427387237549,
      "learning_rate": 0.0004024336902826206,
      "loss": 2.9313,
      "step": 89646
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7281875610351562,
      "learning_rate": 0.0004024298455474705,
      "loss": 3.1243,
      "step": 89647
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0600969791412354,
      "learning_rate": 0.00040242600079327685,
      "loss": 3.0091,
      "step": 89648
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.513221025466919,
      "learning_rate": 0.0004024221560200401,
      "loss": 3.265,
      "step": 89649
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.037095546722412,
      "learning_rate": 0.00040241831122776115,
      "loss": 3.0026,
      "step": 89650
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.818411946296692,
      "learning_rate": 0.00040241446641644076,
      "loss": 3.0604,
      "step": 89651
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5736253261566162,
      "learning_rate": 0.0004024106215860796,
      "loss": 2.9812,
      "step": 89652
    },
    {
      "epoch": 1.17,
      "grad_norm": 4.012994289398193,
      "learning_rate": 0.00040240677673667835,
      "loss": 2.8774,
      "step": 89653
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.5972936153411865,
      "learning_rate": 0.0004024029318682378,
      "loss": 2.9308,
      "step": 89654
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.736502766609192,
      "learning_rate": 0.0004023990869807585,
      "loss": 2.8042,
      "step": 89655
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2680044174194336,
      "learning_rate": 0.0004023952420742413,
      "loss": 3.1095,
      "step": 89656
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.3477537631988525,
      "learning_rate": 0.000402391397148687,
      "loss": 2.8985,
      "step": 89657
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6911126375198364,
      "learning_rate": 0.0004023875522040961,
      "loss": 3.3122,
      "step": 89658
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8752384185791016,
      "learning_rate": 0.00040238370724046945,
      "loss": 3.066,
      "step": 89659
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6888328790664673,
      "learning_rate": 0.00040237986225780776,
      "loss": 3.0378,
      "step": 89660
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6139062643051147,
      "learning_rate": 0.00040237601725611166,
      "loss": 2.9514,
      "step": 89661
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.705295443534851,
      "learning_rate": 0.000402372172235382,
      "loss": 2.937,
      "step": 89662
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9960579872131348,
      "learning_rate": 0.0004023683271956194,
      "loss": 2.9321,
      "step": 89663
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8379883766174316,
      "learning_rate": 0.00040236448213682455,
      "loss": 3.0356,
      "step": 89664
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5709240436553955,
      "learning_rate": 0.00040236063705899826,
      "loss": 2.9207,
      "step": 89665
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7697559595108032,
      "learning_rate": 0.0004023567919621412,
      "loss": 2.7405,
      "step": 89666
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7838947772979736,
      "learning_rate": 0.00040235294684625404,
      "loss": 3.1248,
      "step": 89667
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.3517873287200928,
      "learning_rate": 0.0004023491017113376,
      "loss": 2.8692,
      "step": 89668
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7678591012954712,
      "learning_rate": 0.00040234525655739243,
      "loss": 3.0735,
      "step": 89669
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.820574402809143,
      "learning_rate": 0.00040234141138441944,
      "loss": 3.0399,
      "step": 89670
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0105457305908203,
      "learning_rate": 0.00040233756619241913,
      "loss": 3.0599,
      "step": 89671
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.043438673019409,
      "learning_rate": 0.0004023337209813925,
      "loss": 2.8528,
      "step": 89672
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7210423946380615,
      "learning_rate": 0.00040232987575134,
      "loss": 2.8562,
      "step": 89673
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.815032720565796,
      "learning_rate": 0.00040232603050226244,
      "loss": 3.1914,
      "step": 89674
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8526190519332886,
      "learning_rate": 0.00040232218523416057,
      "loss": 3.1203,
      "step": 89675
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0886013507843018,
      "learning_rate": 0.0004023183399470351,
      "loss": 2.8476,
      "step": 89676
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4869214296340942,
      "learning_rate": 0.00040231449464088665,
      "loss": 2.8499,
      "step": 89677
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6162115335464478,
      "learning_rate": 0.00040231064931571607,
      "loss": 3.1917,
      "step": 89678
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0318775177001953,
      "learning_rate": 0.00040230680397152396,
      "loss": 3.0468,
      "step": 89679
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8736821413040161,
      "learning_rate": 0.00040230295860831106,
      "loss": 2.7043,
      "step": 89680
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6504889726638794,
      "learning_rate": 0.0004022991132260782,
      "loss": 3.0658,
      "step": 89681
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.791088342666626,
      "learning_rate": 0.0004022952678248259,
      "loss": 2.7748,
      "step": 89682
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2273237705230713,
      "learning_rate": 0.000402291422404555,
      "loss": 3.0925,
      "step": 89683
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.8846442699432373,
      "learning_rate": 0.00040228757696526623,
      "loss": 3.1857,
      "step": 89684
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.4637985229492188,
      "learning_rate": 0.00040228373150696034,
      "loss": 3.085,
      "step": 89685
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.7937207221984863,
      "learning_rate": 0.0004022798860296378,
      "loss": 2.7942,
      "step": 89686
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.3719611167907715,
      "learning_rate": 0.00040227604053329956,
      "loss": 2.6527,
      "step": 89687
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.512023448944092,
      "learning_rate": 0.0004022721950179464,
      "loss": 2.8725,
      "step": 89688
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6837869882583618,
      "learning_rate": 0.0004022683494835788,
      "loss": 2.9493,
      "step": 89689
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.152750253677368,
      "learning_rate": 0.00040226450393019753,
      "loss": 2.9162,
      "step": 89690
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9143898487091064,
      "learning_rate": 0.00040226065835780343,
      "loss": 2.9725,
      "step": 89691
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.167191743850708,
      "learning_rate": 0.0004022568127663972,
      "loss": 3.0083,
      "step": 89692
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.4064810276031494,
      "learning_rate": 0.0004022529671559794,
      "loss": 3.0459,
      "step": 89693
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.627135157585144,
      "learning_rate": 0.0004022491215265509,
      "loss": 3.0094,
      "step": 89694
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6366125345230103,
      "learning_rate": 0.00040224527587811226,
      "loss": 3.1675,
      "step": 89695
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.427772283554077,
      "learning_rate": 0.00040224143021066447,
      "loss": 2.9684,
      "step": 89696
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.26094913482666,
      "learning_rate": 0.00040223758452420796,
      "loss": 2.7508,
      "step": 89697
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.5188844203948975,
      "learning_rate": 0.0004022337388187435,
      "loss": 3.2643,
      "step": 89698
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.24406099319458,
      "learning_rate": 0.00040222989309427197,
      "loss": 3.0335,
      "step": 89699
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8942482471466064,
      "learning_rate": 0.0004022260473507939,
      "loss": 3.1108,
      "step": 89700
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.3680660724639893,
      "learning_rate": 0.0004022222015883101,
      "loss": 2.8899,
      "step": 89701
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.2668137550354004,
      "learning_rate": 0.0004022183558068214,
      "loss": 2.8541,
      "step": 89702
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2511074542999268,
      "learning_rate": 0.00040221451000632815,
      "loss": 2.9578,
      "step": 89703
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8616502285003662,
      "learning_rate": 0.00040221066418683136,
      "loss": 2.8866,
      "step": 89704
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6458301544189453,
      "learning_rate": 0.00040220681834833176,
      "loss": 3.0095,
      "step": 89705
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.1078059673309326,
      "learning_rate": 0.0004022029724908299,
      "loss": 3.1951,
      "step": 89706
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.0436694622039795,
      "learning_rate": 0.00040219912661432656,
      "loss": 2.7951,
      "step": 89707
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.5653860569000244,
      "learning_rate": 0.0004021952807188226,
      "loss": 2.8172,
      "step": 89708
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.5871641635894775,
      "learning_rate": 0.0004021914348043185,
      "loss": 3.0348,
      "step": 89709
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.094374179840088,
      "learning_rate": 0.00040218758887081515,
      "loss": 3.013,
      "step": 89710
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.13206148147583,
      "learning_rate": 0.0004021837429183132,
      "loss": 3.0583,
      "step": 89711
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.3560656309127808,
      "learning_rate": 0.0004021798969468133,
      "loss": 2.9154,
      "step": 89712
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.61423659324646,
      "learning_rate": 0.00040217605095631617,
      "loss": 2.9483,
      "step": 89713
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4665714502334595,
      "learning_rate": 0.0004021722049468227,
      "loss": 2.9695,
      "step": 89714
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.04524302482605,
      "learning_rate": 0.0004021683589183335,
      "loss": 3.1684,
      "step": 89715
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5286883115768433,
      "learning_rate": 0.0004021645128708492,
      "loss": 2.9389,
      "step": 89716
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8435724973678589,
      "learning_rate": 0.00040216066680437055,
      "loss": 2.8119,
      "step": 89717
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.757088303565979,
      "learning_rate": 0.00040215682071889847,
      "loss": 3.2953,
      "step": 89718
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8802870512008667,
      "learning_rate": 0.00040215297461443334,
      "loss": 2.7675,
      "step": 89719
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6421741247177124,
      "learning_rate": 0.0004021491284909761,
      "loss": 3.1087,
      "step": 89720
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6118019819259644,
      "learning_rate": 0.00040214528234852744,
      "loss": 2.7036,
      "step": 89721
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6344215869903564,
      "learning_rate": 0.00040214143618708803,
      "loss": 2.9251,
      "step": 89722
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.045036554336548,
      "learning_rate": 0.0004021375900066586,
      "loss": 3.074,
      "step": 89723
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.251986265182495,
      "learning_rate": 0.00040213374380723986,
      "loss": 3.0869,
      "step": 89724
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.1112523078918457,
      "learning_rate": 0.00040212989758883256,
      "loss": 2.8311,
      "step": 89725
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8864805698394775,
      "learning_rate": 0.0004021260513514373,
      "loss": 3.2015,
      "step": 89726
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4840935468673706,
      "learning_rate": 0.000402122205095055,
      "loss": 2.9574,
      "step": 89727
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4719210863113403,
      "learning_rate": 0.0004021183588196861,
      "loss": 3.0456,
      "step": 89728
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0302600860595703,
      "learning_rate": 0.0004021145125253316,
      "loss": 3.1747,
      "step": 89729
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.923801302909851,
      "learning_rate": 0.0004021106662119921,
      "loss": 2.869,
      "step": 89730
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.504473328590393,
      "learning_rate": 0.0004021068198796682,
      "loss": 3.139,
      "step": 89731
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.220568895339966,
      "learning_rate": 0.00040210297352836073,
      "loss": 2.9629,
      "step": 89732
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.1640286445617676,
      "learning_rate": 0.0004020991271580704,
      "loss": 3.0181,
      "step": 89733
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.01023268699646,
      "learning_rate": 0.00040209528076879796,
      "loss": 2.9796,
      "step": 89734
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.8555703163146973,
      "learning_rate": 0.00040209143436054404,
      "loss": 2.8988,
      "step": 89735
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.4433658123016357,
      "learning_rate": 0.0004020875879333094,
      "loss": 3.0574,
      "step": 89736
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2138185501098633,
      "learning_rate": 0.0004020837414870947,
      "loss": 2.8296,
      "step": 89737
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9412755966186523,
      "learning_rate": 0.0004020798950219008,
      "loss": 2.8994,
      "step": 89738
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.8710761070251465,
      "learning_rate": 0.0004020760485377283,
      "loss": 2.927,
      "step": 89739
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5696123838424683,
      "learning_rate": 0.00040207220203457793,
      "loss": 3.2326,
      "step": 89740
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4755377769470215,
      "learning_rate": 0.00040206835551245037,
      "loss": 3.0924,
      "step": 89741
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6818127632141113,
      "learning_rate": 0.00040206450897134645,
      "loss": 2.9931,
      "step": 89742
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7969739437103271,
      "learning_rate": 0.00040206066241126677,
      "loss": 2.6894,
      "step": 89743
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.721456527709961,
      "learning_rate": 0.00040205681583221204,
      "loss": 3.0635,
      "step": 89744
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6013379096984863,
      "learning_rate": 0.0004020529692341831,
      "loss": 3.3109,
      "step": 89745
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7501002550125122,
      "learning_rate": 0.00040204912261718054,
      "loss": 3.2432,
      "step": 89746
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8737236261367798,
      "learning_rate": 0.0004020452759812051,
      "loss": 2.9597,
      "step": 89747
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.548314094543457,
      "learning_rate": 0.00040204142932625755,
      "loss": 3.0707,
      "step": 89748
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.051013469696045,
      "learning_rate": 0.00040203758265233854,
      "loss": 2.8792,
      "step": 89749
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9525645971298218,
      "learning_rate": 0.00040203373595944886,
      "loss": 2.874,
      "step": 89750
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5487576723098755,
      "learning_rate": 0.00040202988924758923,
      "loss": 3.1458,
      "step": 89751
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7613900899887085,
      "learning_rate": 0.00040202604251676023,
      "loss": 2.9539,
      "step": 89752
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.617614507675171,
      "learning_rate": 0.0004020221957669626,
      "loss": 3.1586,
      "step": 89753
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7277861833572388,
      "learning_rate": 0.00040201834899819734,
      "loss": 3.1348,
      "step": 89754
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.848373532295227,
      "learning_rate": 0.00040201450221046475,
      "loss": 3.0372,
      "step": 89755
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6479779481887817,
      "learning_rate": 0.00040201065540376577,
      "loss": 2.9024,
      "step": 89756
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7888740301132202,
      "learning_rate": 0.0004020068085781012,
      "loss": 2.9294,
      "step": 89757
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4991095066070557,
      "learning_rate": 0.00040200296173347155,
      "loss": 3.0943,
      "step": 89758
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.7585017681121826,
      "learning_rate": 0.00040199911486987755,
      "loss": 2.7824,
      "step": 89759
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8057067394256592,
      "learning_rate": 0.0004019952679873201,
      "loss": 2.855,
      "step": 89760
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9842536449432373,
      "learning_rate": 0.0004019914210857998,
      "loss": 2.8373,
      "step": 89761
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2894465923309326,
      "learning_rate": 0.00040198757416531726,
      "loss": 2.9381,
      "step": 89762
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2427494525909424,
      "learning_rate": 0.00040198372722587345,
      "loss": 2.9002,
      "step": 89763
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8405264616012573,
      "learning_rate": 0.00040197988026746884,
      "loss": 2.8973,
      "step": 89764
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6627013683319092,
      "learning_rate": 0.0004019760332901043,
      "loss": 2.9416,
      "step": 89765
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.948766827583313,
      "learning_rate": 0.0004019721862937805,
      "loss": 3.1128,
      "step": 89766
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.0070643424987793,
      "learning_rate": 0.0004019683392784981,
      "loss": 2.974,
      "step": 89767
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.416424512863159,
      "learning_rate": 0.0004019644922442578,
      "loss": 2.939,
      "step": 89768
    },
    {
      "epoch": 1.17,
      "grad_norm": 4.00390625,
      "learning_rate": 0.0004019606451910605,
      "loss": 2.7959,
      "step": 89769
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.6703028678894043,
      "learning_rate": 0.0004019567981189067,
      "loss": 2.8533,
      "step": 89770
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2432405948638916,
      "learning_rate": 0.00040195295102779733,
      "loss": 3.1092,
      "step": 89771
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8115147352218628,
      "learning_rate": 0.0004019491039177328,
      "loss": 2.831,
      "step": 89772
    },
    {
      "epoch": 1.17,
      "grad_norm": 4.322465419769287,
      "learning_rate": 0.00040194525678871417,
      "loss": 3.124,
      "step": 89773
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.54504132270813,
      "learning_rate": 0.0004019414096407419,
      "loss": 3.1622,
      "step": 89774
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.726857304573059,
      "learning_rate": 0.0004019375624738168,
      "loss": 2.8798,
      "step": 89775
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6752820014953613,
      "learning_rate": 0.00040193371528793965,
      "loss": 2.7924,
      "step": 89776
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9764801263809204,
      "learning_rate": 0.0004019298680831111,
      "loss": 3.1447,
      "step": 89777
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7405284643173218,
      "learning_rate": 0.0004019260208593318,
      "loss": 3.1823,
      "step": 89778
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.1188855171203613,
      "learning_rate": 0.0004019221736166025,
      "loss": 3.0403,
      "step": 89779
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5999759435653687,
      "learning_rate": 0.00040191832635492407,
      "loss": 3.0155,
      "step": 89780
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.5333054065704346,
      "learning_rate": 0.000401914479074297,
      "loss": 2.9756,
      "step": 89781
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.846648931503296,
      "learning_rate": 0.0004019106317747221,
      "loss": 2.9397,
      "step": 89782
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.201368808746338,
      "learning_rate": 0.0004019067844562001,
      "loss": 3.1639,
      "step": 89783
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9062711000442505,
      "learning_rate": 0.0004019029371187318,
      "loss": 3.1241,
      "step": 89784
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7129602432250977,
      "learning_rate": 0.0004018990897623177,
      "loss": 3.1145,
      "step": 89785
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.580973744392395,
      "learning_rate": 0.0004018952423869587,
      "loss": 3.1834,
      "step": 89786
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.7964532375335693,
      "learning_rate": 0.0004018913949926555,
      "loss": 3.1397,
      "step": 89787
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.440927267074585,
      "learning_rate": 0.0004018875475794086,
      "loss": 2.9861,
      "step": 89788
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6277834177017212,
      "learning_rate": 0.00040188370014721897,
      "loss": 2.9222,
      "step": 89789
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6922922134399414,
      "learning_rate": 0.00040187985269608725,
      "loss": 2.9867,
      "step": 89790
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.7610929012298584,
      "learning_rate": 0.0004018760052260142,
      "loss": 3.0797,
      "step": 89791
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.573397636413574,
      "learning_rate": 0.00040187215773700033,
      "loss": 3.0066,
      "step": 89792
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.030728816986084,
      "learning_rate": 0.00040186831022904655,
      "loss": 2.9036,
      "step": 89793
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7815645933151245,
      "learning_rate": 0.0004018644627021537,
      "loss": 3.0246,
      "step": 89794
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9968265295028687,
      "learning_rate": 0.0004018606151563221,
      "loss": 3.0694,
      "step": 89795
    },
    {
      "epoch": 1.17,
      "grad_norm": 4.127696990966797,
      "learning_rate": 0.0004018567675915528,
      "loss": 3.0017,
      "step": 89796
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.7033207416534424,
      "learning_rate": 0.00040185292000784645,
      "loss": 2.924,
      "step": 89797
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8697911500930786,
      "learning_rate": 0.00040184907240520365,
      "loss": 3.184,
      "step": 89798
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.865326166152954,
      "learning_rate": 0.00040184522478362516,
      "loss": 2.7967,
      "step": 89799
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8943809270858765,
      "learning_rate": 0.0004018413771431118,
      "loss": 3.2707,
      "step": 89800
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.1597940921783447,
      "learning_rate": 0.0004018375294836641,
      "loss": 3.1557,
      "step": 89801
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7827281951904297,
      "learning_rate": 0.0004018336818052829,
      "loss": 3.0749,
      "step": 89802
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8017761707305908,
      "learning_rate": 0.000401829834107969,
      "loss": 2.8111,
      "step": 89803
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.1823673248291016,
      "learning_rate": 0.00040182598639172294,
      "loss": 2.7831,
      "step": 89804
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5639193058013916,
      "learning_rate": 0.0004018221386565455,
      "loss": 3.0113,
      "step": 89805
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.1282098293304443,
      "learning_rate": 0.0004018182909024374,
      "loss": 3.114,
      "step": 89806
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9618946313858032,
      "learning_rate": 0.00040181444312939934,
      "loss": 3.2807,
      "step": 89807
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.157743453979492,
      "learning_rate": 0.0004018105953374321,
      "loss": 2.9919,
      "step": 89808
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.684859037399292,
      "learning_rate": 0.0004018067475265364,
      "loss": 2.8172,
      "step": 89809
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.267908811569214,
      "learning_rate": 0.00040180289969671284,
      "loss": 3.1516,
      "step": 89810
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8969697952270508,
      "learning_rate": 0.0004017990518479621,
      "loss": 3.13,
      "step": 89811
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.80682373046875,
      "learning_rate": 0.0004017952039802852,
      "loss": 3.1554,
      "step": 89812
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.348301649093628,
      "learning_rate": 0.0004017913560936825,
      "loss": 2.9827,
      "step": 89813
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.049013376235962,
      "learning_rate": 0.0004017875081881549,
      "loss": 2.8259,
      "step": 89814
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.928786039352417,
      "learning_rate": 0.0004017836602637031,
      "loss": 3.218,
      "step": 89815
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.036870241165161,
      "learning_rate": 0.00040177981232032776,
      "loss": 3.0528,
      "step": 89816
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8668212890625,
      "learning_rate": 0.0004017759643580296,
      "loss": 3.1122,
      "step": 89817
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0775673389434814,
      "learning_rate": 0.0004017721163768095,
      "loss": 3.1043,
      "step": 89818
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8680987358093262,
      "learning_rate": 0.00040176826837666794,
      "loss": 2.8884,
      "step": 89819
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6048349142074585,
      "learning_rate": 0.00040176442035760575,
      "loss": 3.0143,
      "step": 89820
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.3537192344665527,
      "learning_rate": 0.00040176057231962363,
      "loss": 3.0961,
      "step": 89821
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6063446998596191,
      "learning_rate": 0.0004017567242627223,
      "loss": 2.9778,
      "step": 89822
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4862867593765259,
      "learning_rate": 0.0004017528761869025,
      "loss": 3.0516,
      "step": 89823
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6624727249145508,
      "learning_rate": 0.00040174902809216495,
      "loss": 3.1747,
      "step": 89824
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8358891010284424,
      "learning_rate": 0.0004017451799785103,
      "loss": 2.9756,
      "step": 89825
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.737903356552124,
      "learning_rate": 0.00040174133184593924,
      "loss": 2.939,
      "step": 89826
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6188535690307617,
      "learning_rate": 0.0004017374836944527,
      "loss": 3.0673,
      "step": 89827
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7998217344284058,
      "learning_rate": 0.00040173363552405107,
      "loss": 3.2497,
      "step": 89828
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.639351725578308,
      "learning_rate": 0.0004017297873347353,
      "loss": 2.8679,
      "step": 89829
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7252436876296997,
      "learning_rate": 0.0004017259391265061,
      "loss": 3.2767,
      "step": 89830
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2350566387176514,
      "learning_rate": 0.00040172209089936407,
      "loss": 2.7176,
      "step": 89831
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.750587821006775,
      "learning_rate": 0.0004017182426533099,
      "loss": 2.9534,
      "step": 89832
    },
    {
      "epoch": 1.17,
      "grad_norm": 4.00417423248291,
      "learning_rate": 0.0004017143943883446,
      "loss": 3.0293,
      "step": 89833
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.825610637664795,
      "learning_rate": 0.00040171054610446845,
      "loss": 2.7307,
      "step": 89834
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.862585425376892,
      "learning_rate": 0.0004017066978016825,
      "loss": 2.9476,
      "step": 89835
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.6504294872283936,
      "learning_rate": 0.00040170284947998737,
      "loss": 3.0051,
      "step": 89836
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7594726085662842,
      "learning_rate": 0.0004016990011393837,
      "loss": 2.9982,
      "step": 89837
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.4901199340820312,
      "learning_rate": 0.00040169515277987234,
      "loss": 3.0301,
      "step": 89838
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.093808174133301,
      "learning_rate": 0.00040169130440145394,
      "loss": 2.8756,
      "step": 89839
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.1404247283935547,
      "learning_rate": 0.0004016874560041291,
      "loss": 3.263,
      "step": 89840
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.5924434661865234,
      "learning_rate": 0.0004016836075878988,
      "loss": 3.2076,
      "step": 89841
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7277960777282715,
      "learning_rate": 0.0004016797591527635,
      "loss": 3.0894,
      "step": 89842
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.083855390548706,
      "learning_rate": 0.00040167591069872394,
      "loss": 3.3065,
      "step": 89843
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7005932331085205,
      "learning_rate": 0.00040167206222578095,
      "loss": 2.8384,
      "step": 89844
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.542550563812256,
      "learning_rate": 0.0004016682137339354,
      "loss": 2.8318,
      "step": 89845
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8021578788757324,
      "learning_rate": 0.00040166436522318756,
      "loss": 3.0206,
      "step": 89846
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8675200939178467,
      "learning_rate": 0.00040166051669353846,
      "loss": 2.9058,
      "step": 89847
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8766411542892456,
      "learning_rate": 0.00040165666814498874,
      "loss": 2.8897,
      "step": 89848
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2396905422210693,
      "learning_rate": 0.0004016528195775392,
      "loss": 2.9136,
      "step": 89849
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.1040170192718506,
      "learning_rate": 0.00040164897099119045,
      "loss": 3.0944,
      "step": 89850
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.581484317779541,
      "learning_rate": 0.00040164512238594324,
      "loss": 2.7924,
      "step": 89851
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.432720899581909,
      "learning_rate": 0.0004016412737617983,
      "loss": 3.0347,
      "step": 89852
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6222258806228638,
      "learning_rate": 0.00040163742511875627,
      "loss": 3.2102,
      "step": 89853
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7146190404891968,
      "learning_rate": 0.0004016335764568179,
      "loss": 3.0667,
      "step": 89854
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7026506662368774,
      "learning_rate": 0.00040162972777598404,
      "loss": 2.9586,
      "step": 89855
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2083616256713867,
      "learning_rate": 0.0004016258790762553,
      "loss": 3.1761,
      "step": 89856
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.1237125396728516,
      "learning_rate": 0.00040162203035763223,
      "loss": 3.0602,
      "step": 89857
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6072865724563599,
      "learning_rate": 0.0004016181816201159,
      "loss": 2.9234,
      "step": 89858
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.727150559425354,
      "learning_rate": 0.0004016143328637067,
      "loss": 3.092,
      "step": 89859
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5467299222946167,
      "learning_rate": 0.00040161048408840554,
      "loss": 3.1469,
      "step": 89860
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.59215247631073,
      "learning_rate": 0.00040160663529421306,
      "loss": 3.0237,
      "step": 89861
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8750131130218506,
      "learning_rate": 0.00040160278648112993,
      "loss": 3.012,
      "step": 89862
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.603545069694519,
      "learning_rate": 0.000401598937649157,
      "loss": 2.9966,
      "step": 89863
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6925642490386963,
      "learning_rate": 0.00040159508879829487,
      "loss": 3.0294,
      "step": 89864
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2470715045928955,
      "learning_rate": 0.00040159123992854434,
      "loss": 2.9631,
      "step": 89865
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5722230672836304,
      "learning_rate": 0.000401587391039906,
      "loss": 2.7998,
      "step": 89866
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.68356454372406,
      "learning_rate": 0.0004015835421323807,
      "loss": 2.9553,
      "step": 89867
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7703214883804321,
      "learning_rate": 0.00040157969320596913,
      "loss": 3.0747,
      "step": 89868
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0085320472717285,
      "learning_rate": 0.000401575844260672,
      "loss": 3.0078,
      "step": 89869
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.412703514099121,
      "learning_rate": 0.00040157199529648985,
      "loss": 2.8507,
      "step": 89870
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0409018993377686,
      "learning_rate": 0.00040156814631342366,
      "loss": 2.9345,
      "step": 89871
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9679406881332397,
      "learning_rate": 0.00040156429731147403,
      "loss": 2.9445,
      "step": 89872
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6275038719177246,
      "learning_rate": 0.00040156044829064167,
      "loss": 2.9897,
      "step": 89873
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6702467203140259,
      "learning_rate": 0.00040155659925092727,
      "loss": 3.0708,
      "step": 89874
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.946762204170227,
      "learning_rate": 0.00040155275019233166,
      "loss": 2.9873,
      "step": 89875
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9014830589294434,
      "learning_rate": 0.0004015489011148554,
      "loss": 3.0361,
      "step": 89876
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5746270418167114,
      "learning_rate": 0.0004015450520184993,
      "loss": 2.8679,
      "step": 89877
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4720964431762695,
      "learning_rate": 0.0004015412029032641,
      "loss": 2.9707,
      "step": 89878
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7284523248672485,
      "learning_rate": 0.00040153735376915046,
      "loss": 2.9745,
      "step": 89879
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.734135627746582,
      "learning_rate": 0.0004015335046161591,
      "loss": 2.8568,
      "step": 89880
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.98003888130188,
      "learning_rate": 0.0004015296554442907,
      "loss": 3.0787,
      "step": 89881
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0411455631256104,
      "learning_rate": 0.00040152580625354615,
      "loss": 2.7372,
      "step": 89882
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2597291469573975,
      "learning_rate": 0.00040152195704392594,
      "loss": 2.8264,
      "step": 89883
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.9388864040374756,
      "learning_rate": 0.0004015181078154308,
      "loss": 3.0885,
      "step": 89884
    },
    {
      "epoch": 1.17,
      "grad_norm": 4.234529495239258,
      "learning_rate": 0.0004015142585680617,
      "loss": 3.213,
      "step": 89885
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.6826043128967285,
      "learning_rate": 0.0004015104093018191,
      "loss": 3.1912,
      "step": 89886
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2785327434539795,
      "learning_rate": 0.00040150656001670373,
      "loss": 3.0422,
      "step": 89887
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.9515321254730225,
      "learning_rate": 0.0004015027107127165,
      "loss": 2.9695,
      "step": 89888
    },
    {
      "epoch": 1.17,
      "grad_norm": 5.193489074707031,
      "learning_rate": 0.00040149886138985796,
      "loss": 3.0757,
      "step": 89889
    },
    {
      "epoch": 1.17,
      "grad_norm": 4.7742743492126465,
      "learning_rate": 0.00040149501204812886,
      "loss": 3.07,
      "step": 89890
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.8749985694885254,
      "learning_rate": 0.0004014911626875299,
      "loss": 3.0091,
      "step": 89891
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0196993350982666,
      "learning_rate": 0.00040148731330806185,
      "loss": 3.178,
      "step": 89892
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.7211852073669434,
      "learning_rate": 0.00040148346390972536,
      "loss": 2.925,
      "step": 89893
    },
    {
      "epoch": 1.17,
      "grad_norm": 5.9139485359191895,
      "learning_rate": 0.00040147961449252126,
      "loss": 2.9902,
      "step": 89894
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.4985907077789307,
      "learning_rate": 0.0004014757650564501,
      "loss": 3.1137,
      "step": 89895
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0313472747802734,
      "learning_rate": 0.00040147191560151266,
      "loss": 3.0942,
      "step": 89896
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8317514657974243,
      "learning_rate": 0.00040146806612770974,
      "loss": 2.8537,
      "step": 89897
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6224045753479004,
      "learning_rate": 0.0004014642166350419,
      "loss": 3.1644,
      "step": 89898
    },
    {
      "epoch": 1.17,
      "grad_norm": 4.0770583152771,
      "learning_rate": 0.00040146036712351005,
      "loss": 3.0214,
      "step": 89899
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.3466150760650635,
      "learning_rate": 0.0004014565175931148,
      "loss": 2.9449,
      "step": 89900
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6388936042785645,
      "learning_rate": 0.0004014526680438568,
      "loss": 2.9352,
      "step": 89901
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5640445947647095,
      "learning_rate": 0.00040144881847573683,
      "loss": 3.3061,
      "step": 89902
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.64847993850708,
      "learning_rate": 0.0004014449688887557,
      "loss": 3.1805,
      "step": 89903
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.514213800430298,
      "learning_rate": 0.00040144111928291394,
      "loss": 2.9313,
      "step": 89904
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.138845443725586,
      "learning_rate": 0.0004014372696582124,
      "loss": 2.9706,
      "step": 89905
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.567447543144226,
      "learning_rate": 0.0004014334200146518,
      "loss": 3.1126,
      "step": 89906
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4761830568313599,
      "learning_rate": 0.0004014295703522327,
      "loss": 2.9957,
      "step": 89907
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.3554821014404297,
      "learning_rate": 0.000401425720670956,
      "loss": 3.0892,
      "step": 89908
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8012030124664307,
      "learning_rate": 0.0004014218709708224,
      "loss": 3.4779,
      "step": 89909
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.485607385635376,
      "learning_rate": 0.00040141802125183244,
      "loss": 2.9639,
      "step": 89910
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5612696409225464,
      "learning_rate": 0.00040141417151398704,
      "loss": 2.8162,
      "step": 89911
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.855441927909851,
      "learning_rate": 0.0004014103217572869,
      "loss": 3.0385,
      "step": 89912
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.265885353088379,
      "learning_rate": 0.00040140647198173255,
      "loss": 3.2584,
      "step": 89913
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.472848653793335,
      "learning_rate": 0.00040140262218732475,
      "loss": 2.8472,
      "step": 89914
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.0370798110961914,
      "learning_rate": 0.0004013987723740644,
      "loss": 2.8725,
      "step": 89915
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9615288972854614,
      "learning_rate": 0.00040139492254195213,
      "loss": 3.288,
      "step": 89916
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6428658962249756,
      "learning_rate": 0.00040139107269098856,
      "loss": 2.9291,
      "step": 89917
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.222806453704834,
      "learning_rate": 0.0004013872228211745,
      "loss": 2.9069,
      "step": 89918
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.344897747039795,
      "learning_rate": 0.00040138337293251067,
      "loss": 3.257,
      "step": 89919
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7170449495315552,
      "learning_rate": 0.0004013795230249977,
      "loss": 3.0426,
      "step": 89920
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7637962102890015,
      "learning_rate": 0.0004013756730986364,
      "loss": 3.3234,
      "step": 89921
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.947183609008789,
      "learning_rate": 0.0004013718231534275,
      "loss": 2.9811,
      "step": 89922
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.038557291030884,
      "learning_rate": 0.0004013679731893716,
      "loss": 2.7802,
      "step": 89923
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.772993326187134,
      "learning_rate": 0.00040136412320646947,
      "loss": 3.0795,
      "step": 89924
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0971786975860596,
      "learning_rate": 0.0004013602732047219,
      "loss": 2.7684,
      "step": 89925
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.3978545665740967,
      "learning_rate": 0.00040135642318412946,
      "loss": 2.7087,
      "step": 89926
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.578428030014038,
      "learning_rate": 0.000401352573144693,
      "loss": 2.7646,
      "step": 89927
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.565952181816101,
      "learning_rate": 0.00040134872308641317,
      "loss": 3.0755,
      "step": 89928
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.3591837882995605,
      "learning_rate": 0.00040134487300929066,
      "loss": 3.1761,
      "step": 89929
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8868807554244995,
      "learning_rate": 0.00040134102291332623,
      "loss": 2.8802,
      "step": 89930
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6908241510391235,
      "learning_rate": 0.00040133717279852065,
      "loss": 3.0604,
      "step": 89931
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9945553541183472,
      "learning_rate": 0.00040133332266487456,
      "loss": 3.1415,
      "step": 89932
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5332088470458984,
      "learning_rate": 0.00040132947251238866,
      "loss": 2.9335,
      "step": 89933
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.277137517929077,
      "learning_rate": 0.0004013256223410638,
      "loss": 2.7982,
      "step": 89934
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.02982497215271,
      "learning_rate": 0.0004013217721509005,
      "loss": 3.0884,
      "step": 89935
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7959665060043335,
      "learning_rate": 0.00040131792194189955,
      "loss": 3.1212,
      "step": 89936
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.524875521659851,
      "learning_rate": 0.00040131407171406177,
      "loss": 2.9405,
      "step": 89937
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7050319910049438,
      "learning_rate": 0.0004013102214673877,
      "loss": 2.9646,
      "step": 89938
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.257913112640381,
      "learning_rate": 0.0004013063712018782,
      "loss": 3.0564,
      "step": 89939
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.1215388774871826,
      "learning_rate": 0.0004013025209175339,
      "loss": 2.7433,
      "step": 89940
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.3893840312957764,
      "learning_rate": 0.0004012986706143556,
      "loss": 3.1887,
      "step": 89941
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.958664059638977,
      "learning_rate": 0.00040129482029234394,
      "loss": 3.1088,
      "step": 89942
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6194941997528076,
      "learning_rate": 0.0004012909699514997,
      "loss": 3.0068,
      "step": 89943
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7016000747680664,
      "learning_rate": 0.0004012871195918235,
      "loss": 3.0177,
      "step": 89944
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9601439237594604,
      "learning_rate": 0.0004012832692133161,
      "loss": 3.0152,
      "step": 89945
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.072326183319092,
      "learning_rate": 0.0004012794188159783,
      "loss": 3.0976,
      "step": 89946
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.036146879196167,
      "learning_rate": 0.00040127556839981065,
      "loss": 2.7786,
      "step": 89947
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.811299204826355,
      "learning_rate": 0.000401271717964814,
      "loss": 2.9835,
      "step": 89948
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.600221633911133,
      "learning_rate": 0.000401267867510989,
      "loss": 3.0224,
      "step": 89949
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.372330665588379,
      "learning_rate": 0.0004012640170383365,
      "loss": 3.0438,
      "step": 89950
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.125056266784668,
      "learning_rate": 0.00040126016654685705,
      "loss": 2.9708,
      "step": 89951
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5949958562850952,
      "learning_rate": 0.0004012563160365514,
      "loss": 3.2388,
      "step": 89952
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.001511812210083,
      "learning_rate": 0.0004012524655074203,
      "loss": 3.0933,
      "step": 89953
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.6240158081054688,
      "learning_rate": 0.00040124861495946447,
      "loss": 3.1474,
      "step": 89954
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8829166889190674,
      "learning_rate": 0.0004012447643926846,
      "loss": 3.2087,
      "step": 89955
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7680500745773315,
      "learning_rate": 0.00040124091380708136,
      "loss": 3.1255,
      "step": 89956
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6163842678070068,
      "learning_rate": 0.00040123706320265565,
      "loss": 2.6746,
      "step": 89957
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9053529500961304,
      "learning_rate": 0.0004012332125794079,
      "loss": 3.1018,
      "step": 89958
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.253133535385132,
      "learning_rate": 0.0004012293619373391,
      "loss": 2.9345,
      "step": 89959
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.3562426567077637,
      "learning_rate": 0.0004012255112764499,
      "loss": 3.1272,
      "step": 89960
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.135256052017212,
      "learning_rate": 0.0004012216605967408,
      "loss": 3.1239,
      "step": 89961
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.757126808166504,
      "learning_rate": 0.0004012178098982128,
      "loss": 3.1282,
      "step": 89962
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.3057754039764404,
      "learning_rate": 0.00040121395918086644,
      "loss": 3.2147,
      "step": 89963
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.1992621421813965,
      "learning_rate": 0.00040121010844470253,
      "loss": 3.0686,
      "step": 89964
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.565650224685669,
      "learning_rate": 0.00040120625768972175,
      "loss": 2.9366,
      "step": 89965
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.896911382675171,
      "learning_rate": 0.00040120240691592476,
      "loss": 3.0582,
      "step": 89966
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.558762788772583,
      "learning_rate": 0.00040119855612331246,
      "loss": 3.0279,
      "step": 89967
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6075944900512695,
      "learning_rate": 0.00040119470531188535,
      "loss": 2.9547,
      "step": 89968
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6675474643707275,
      "learning_rate": 0.0004011908544816442,
      "loss": 3.0108,
      "step": 89969
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0892674922943115,
      "learning_rate": 0.00040118700363258984,
      "loss": 3.0669,
      "step": 89970
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8674949407577515,
      "learning_rate": 0.00040118315276472285,
      "loss": 3.0234,
      "step": 89971
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.484633445739746,
      "learning_rate": 0.00040117930187804404,
      "loss": 2.9942,
      "step": 89972
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4495152235031128,
      "learning_rate": 0.0004011754509725541,
      "loss": 2.8725,
      "step": 89973
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7496657371520996,
      "learning_rate": 0.00040117160004825373,
      "loss": 3.1619,
      "step": 89974
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5763227939605713,
      "learning_rate": 0.0004011677491051436,
      "loss": 3.0295,
      "step": 89975
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.606837511062622,
      "learning_rate": 0.0004011638981432245,
      "loss": 3.0323,
      "step": 89976
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.227665662765503,
      "learning_rate": 0.0004011600471624971,
      "loss": 2.8722,
      "step": 89977
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8804125785827637,
      "learning_rate": 0.00040115619616296213,
      "loss": 3.176,
      "step": 89978
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.836166262626648,
      "learning_rate": 0.0004011523451446204,
      "loss": 3.1684,
      "step": 89979
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8283329010009766,
      "learning_rate": 0.0004011484941074725,
      "loss": 3.0265,
      "step": 89980
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6702029705047607,
      "learning_rate": 0.0004011446430515191,
      "loss": 2.8235,
      "step": 89981
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0614559650421143,
      "learning_rate": 0.00040114079197676105,
      "loss": 3.0234,
      "step": 89982
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0547964572906494,
      "learning_rate": 0.00040113694088319906,
      "loss": 3.1611,
      "step": 89983
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9875952005386353,
      "learning_rate": 0.0004011330897708338,
      "loss": 2.9711,
      "step": 89984
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0605661869049072,
      "learning_rate": 0.00040112923863966593,
      "loss": 2.7951,
      "step": 89985
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9682819843292236,
      "learning_rate": 0.0004011253874896963,
      "loss": 3.1383,
      "step": 89986
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.8740127086639404,
      "learning_rate": 0.0004011215363209255,
      "loss": 2.8696,
      "step": 89987
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.6952006816864014,
      "learning_rate": 0.00040111768513335434,
      "loss": 2.9859,
      "step": 89988
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.839163899421692,
      "learning_rate": 0.00040111383392698344,
      "loss": 2.9454,
      "step": 89989
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.944307565689087,
      "learning_rate": 0.0004011099827018136,
      "loss": 2.7778,
      "step": 89990
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5316709280014038,
      "learning_rate": 0.00040110613145784545,
      "loss": 2.8934,
      "step": 89991
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5484960079193115,
      "learning_rate": 0.0004011022801950799,
      "loss": 2.9912,
      "step": 89992
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.734989881515503,
      "learning_rate": 0.00040109842891351746,
      "loss": 3.0724,
      "step": 89993
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7841447591781616,
      "learning_rate": 0.0004010945776131588,
      "loss": 2.9014,
      "step": 89994
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.645114541053772,
      "learning_rate": 0.0004010907262940049,
      "loss": 2.9937,
      "step": 89995
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.510136604309082,
      "learning_rate": 0.00040108687495605627,
      "loss": 3.1046,
      "step": 89996
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6315010786056519,
      "learning_rate": 0.0004010830235993137,
      "loss": 2.9138,
      "step": 89997
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8651363849639893,
      "learning_rate": 0.0004010791722237779,
      "loss": 3.0502,
      "step": 89998
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.732694387435913,
      "learning_rate": 0.0004010753208294495,
      "loss": 3.0538,
      "step": 89999
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5849051475524902,
      "learning_rate": 0.0004010714694163294,
      "loss": 2.936,
      "step": 90000
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.5502700805664062,
      "learning_rate": 0.0004010676179844181,
      "loss": 3.2122,
      "step": 90001
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6340961456298828,
      "learning_rate": 0.0004010637665337165,
      "loss": 2.8613,
      "step": 90002
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.493329405784607,
      "learning_rate": 0.00040105991506422513,
      "loss": 3.0889,
      "step": 90003
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6553051471710205,
      "learning_rate": 0.0004010560635759449,
      "loss": 2.7678,
      "step": 90004
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7120246887207031,
      "learning_rate": 0.00040105221206887645,
      "loss": 2.8814,
      "step": 90005
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.08260178565979,
      "learning_rate": 0.00040104836054302045,
      "loss": 2.9838,
      "step": 90006
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.681305170059204,
      "learning_rate": 0.0004010445089983777,
      "loss": 3.1485,
      "step": 90007
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.773990273475647,
      "learning_rate": 0.0004010406574349488,
      "loss": 2.9273,
      "step": 90008
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.624771237373352,
      "learning_rate": 0.00040103680585273455,
      "loss": 2.8725,
      "step": 90009
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.785345196723938,
      "learning_rate": 0.00040103295425173574,
      "loss": 3.0689,
      "step": 90010
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7899625301361084,
      "learning_rate": 0.0004010291026319529,
      "loss": 2.9727,
      "step": 90011
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.165712833404541,
      "learning_rate": 0.00040102525099338683,
      "loss": 3.2029,
      "step": 90012
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8782769441604614,
      "learning_rate": 0.00040102139933603835,
      "loss": 3.0963,
      "step": 90013
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.939126968383789,
      "learning_rate": 0.000401017547659908,
      "loss": 3.1495,
      "step": 90014
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.186206102371216,
      "learning_rate": 0.0004010136959649966,
      "loss": 3.0222,
      "step": 90015
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.6674892902374268,
      "learning_rate": 0.0004010098442513049,
      "loss": 2.7536,
      "step": 90016
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2830750942230225,
      "learning_rate": 0.0004010059925188335,
      "loss": 3.1599,
      "step": 90017
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7227599620819092,
      "learning_rate": 0.00040100214076758316,
      "loss": 3.1782,
      "step": 90018
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5735275745391846,
      "learning_rate": 0.0004009982889975547,
      "loss": 2.8581,
      "step": 90019
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8146508932113647,
      "learning_rate": 0.00040099443720874875,
      "loss": 2.8654,
      "step": 90020
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.714966058731079,
      "learning_rate": 0.0004009905854011659,
      "loss": 3.1182,
      "step": 90021
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5542726516723633,
      "learning_rate": 0.00040098673357480717,
      "loss": 3.193,
      "step": 90022
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.015678882598877,
      "learning_rate": 0.000400982881729673,
      "loss": 2.8731,
      "step": 90023
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5790379047393799,
      "learning_rate": 0.00040097902986576414,
      "loss": 3.0798,
      "step": 90024
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6948442459106445,
      "learning_rate": 0.00040097517798308154,
      "loss": 3.0085,
      "step": 90025
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6728320121765137,
      "learning_rate": 0.0004009713260816256,
      "loss": 3.1626,
      "step": 90026
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4879887104034424,
      "learning_rate": 0.0004009674741613973,
      "loss": 3.0422,
      "step": 90027
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6386889219284058,
      "learning_rate": 0.0004009636222223972,
      "loss": 3.251,
      "step": 90028
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6775425672531128,
      "learning_rate": 0.000400959770264626,
      "loss": 2.9666,
      "step": 90029
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.54734206199646,
      "learning_rate": 0.0004009559182880845,
      "loss": 3.1172,
      "step": 90030
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9365073442459106,
      "learning_rate": 0.00040095206629277345,
      "loss": 3.0793,
      "step": 90031
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8124473094940186,
      "learning_rate": 0.0004009482142786934,
      "loss": 3.0369,
      "step": 90032
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4790626764297485,
      "learning_rate": 0.0004009443622458453,
      "loss": 2.8336,
      "step": 90033
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.118448495864868,
      "learning_rate": 0.00040094051019422966,
      "loss": 3.1792,
      "step": 90034
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.03359317779541,
      "learning_rate": 0.0004009366581238472,
      "loss": 3.085,
      "step": 90035
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5812913179397583,
      "learning_rate": 0.00040093280603469884,
      "loss": 3.0173,
      "step": 90036
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.37648606300354,
      "learning_rate": 0.00040092895392678505,
      "loss": 3.0308,
      "step": 90037
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6539031267166138,
      "learning_rate": 0.0004009251018001068,
      "loss": 3.1019,
      "step": 90038
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7840241193771362,
      "learning_rate": 0.0004009212496546646,
      "loss": 3.1079,
      "step": 90039
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9787672758102417,
      "learning_rate": 0.00040091739749045915,
      "loss": 2.9393,
      "step": 90040
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7619813680648804,
      "learning_rate": 0.0004009135453074914,
      "loss": 2.9857,
      "step": 90041
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.1367828845977783,
      "learning_rate": 0.0004009096931057618,
      "loss": 3.0465,
      "step": 90042
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5472506284713745,
      "learning_rate": 0.0004009058408852712,
      "loss": 3.0965,
      "step": 90043
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.03438663482666,
      "learning_rate": 0.00040090198864602036,
      "loss": 3.1964,
      "step": 90044
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.225996732711792,
      "learning_rate": 0.00040089813638801,
      "loss": 2.7012,
      "step": 90045
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7002649307250977,
      "learning_rate": 0.0004008942841112406,
      "loss": 2.9927,
      "step": 90046
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8709678649902344,
      "learning_rate": 0.0004008904318157131,
      "loss": 2.6721,
      "step": 90047
    },
    {
      "epoch": 1.17,
      "grad_norm": 4.419963836669922,
      "learning_rate": 0.0004008865795014282,
      "loss": 2.9317,
      "step": 90048
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.7947158813476562,
      "learning_rate": 0.0004008827271683866,
      "loss": 3.0255,
      "step": 90049
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6094410419464111,
      "learning_rate": 0.0004008788748165889,
      "loss": 3.0532,
      "step": 90050
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5744587182998657,
      "learning_rate": 0.000400875022446036,
      "loss": 2.8404,
      "step": 90051
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.828096866607666,
      "learning_rate": 0.00040087117005672854,
      "loss": 2.899,
      "step": 90052
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.341423511505127,
      "learning_rate": 0.0004008673176486672,
      "loss": 3.1433,
      "step": 90053
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.835602283477783,
      "learning_rate": 0.0004008634652218527,
      "loss": 3.1497,
      "step": 90054
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6450649499893188,
      "learning_rate": 0.00040085961277628576,
      "loss": 2.9128,
      "step": 90055
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2692108154296875,
      "learning_rate": 0.0004008557603119672,
      "loss": 3.1027,
      "step": 90056
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.492548942565918,
      "learning_rate": 0.0004008519078288976,
      "loss": 2.8262,
      "step": 90057
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.307818651199341,
      "learning_rate": 0.0004008480553270777,
      "loss": 3.1782,
      "step": 90058
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.765432596206665,
      "learning_rate": 0.0004008442028065083,
      "loss": 3.0587,
      "step": 90059
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2741096019744873,
      "learning_rate": 0.00040084035026719005,
      "loss": 2.9327,
      "step": 90060
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9355040788650513,
      "learning_rate": 0.0004008364977091236,
      "loss": 3.262,
      "step": 90061
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0837457180023193,
      "learning_rate": 0.0004008326451323098,
      "loss": 3.09,
      "step": 90062
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8930447101593018,
      "learning_rate": 0.0004008287925367493,
      "loss": 2.8563,
      "step": 90063
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7519547939300537,
      "learning_rate": 0.0004008249399224428,
      "loss": 2.9419,
      "step": 90064
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.3694872856140137,
      "learning_rate": 0.0004008210872893911,
      "loss": 2.8395,
      "step": 90065
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.4383389949798584,
      "learning_rate": 0.00040081723463759487,
      "loss": 2.9797,
      "step": 90066
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.4704952239990234,
      "learning_rate": 0.00040081338196705475,
      "loss": 3.049,
      "step": 90067
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.8395111560821533,
      "learning_rate": 0.0004008095292777716,
      "loss": 2.9898,
      "step": 90068
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9615020751953125,
      "learning_rate": 0.00040080567656974593,
      "loss": 3.211,
      "step": 90069
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8141661882400513,
      "learning_rate": 0.00040080182384297865,
      "loss": 2.9954,
      "step": 90070
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.4000306129455566,
      "learning_rate": 0.0004007979710974704,
      "loss": 2.6898,
      "step": 90071
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.193284034729004,
      "learning_rate": 0.0004007941183332219,
      "loss": 3.1003,
      "step": 90072
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0619189739227295,
      "learning_rate": 0.00040079026555023386,
      "loss": 2.9854,
      "step": 90073
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.1647818088531494,
      "learning_rate": 0.00040078641274850707,
      "loss": 3.0447,
      "step": 90074
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8101780414581299,
      "learning_rate": 0.0004007825599280421,
      "loss": 3.0988,
      "step": 90075
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.4040560722351074,
      "learning_rate": 0.0004007787070888398,
      "loss": 3.002,
      "step": 90076
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.4043524265289307,
      "learning_rate": 0.0004007748542309008,
      "loss": 3.178,
      "step": 90077
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.305277109146118,
      "learning_rate": 0.00040077100135422594,
      "loss": 2.8355,
      "step": 90078
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.3606784343719482,
      "learning_rate": 0.0004007671484588157,
      "loss": 3.2975,
      "step": 90079
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7724549770355225,
      "learning_rate": 0.0004007632955446711,
      "loss": 3.2775,
      "step": 90080
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.9033687114715576,
      "learning_rate": 0.0004007594426117926,
      "loss": 2.8406,
      "step": 90081
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.8099887371063232,
      "learning_rate": 0.00040075558966018107,
      "loss": 3.0521,
      "step": 90082
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8574496507644653,
      "learning_rate": 0.0004007517366898372,
      "loss": 3.0758,
      "step": 90083
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.308439254760742,
      "learning_rate": 0.0004007478837007616,
      "loss": 2.9145,
      "step": 90084
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.1045284271240234,
      "learning_rate": 0.00040074403069295504,
      "loss": 3.2406,
      "step": 90085
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5759042501449585,
      "learning_rate": 0.00040074017766641843,
      "loss": 3.0996,
      "step": 90086
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6720377206802368,
      "learning_rate": 0.0004007363246211521,
      "loss": 3.0385,
      "step": 90087
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7330466508865356,
      "learning_rate": 0.00040073247155715714,
      "loss": 3.0338,
      "step": 90088
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6079845428466797,
      "learning_rate": 0.0004007286184744341,
      "loss": 2.9577,
      "step": 90089
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7087031602859497,
      "learning_rate": 0.00040072476537298367,
      "loss": 2.9598,
      "step": 90090
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4750794172286987,
      "learning_rate": 0.0004007209122528066,
      "loss": 2.9816,
      "step": 90091
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.195739984512329,
      "learning_rate": 0.0004007170591139037,
      "loss": 2.852,
      "step": 90092
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7158052921295166,
      "learning_rate": 0.00040071320595627544,
      "loss": 3.0658,
      "step": 90093
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.802164912223816,
      "learning_rate": 0.0004007093527799228,
      "loss": 2.9503,
      "step": 90094
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4147906303405762,
      "learning_rate": 0.00040070549958484637,
      "loss": 3.2453,
      "step": 90095
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.483047604560852,
      "learning_rate": 0.0004007016463710469,
      "loss": 2.9658,
      "step": 90096
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8701564073562622,
      "learning_rate": 0.00040069779313852506,
      "loss": 2.9091,
      "step": 90097
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6942538022994995,
      "learning_rate": 0.00040069393988728164,
      "loss": 3.245,
      "step": 90098
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6855652332305908,
      "learning_rate": 0.00040069008661731726,
      "loss": 3.1739,
      "step": 90099
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5795929431915283,
      "learning_rate": 0.0004006862333286327,
      "loss": 3.1816,
      "step": 90100
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.992607593536377,
      "learning_rate": 0.00040068238002122876,
      "loss": 2.8891,
      "step": 90101
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.5398738384246826,
      "learning_rate": 0.000400678526695106,
      "loss": 3.2641,
      "step": 90102
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6027119159698486,
      "learning_rate": 0.0004006746733502652,
      "loss": 3.1355,
      "step": 90103
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7259204387664795,
      "learning_rate": 0.00040067081998670706,
      "loss": 2.8681,
      "step": 90104
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.340531587600708,
      "learning_rate": 0.00040066696660443233,
      "loss": 2.9514,
      "step": 90105
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9038629531860352,
      "learning_rate": 0.0004006631132034416,
      "loss": 3.1499,
      "step": 90106
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.052858829498291,
      "learning_rate": 0.0004006592597837359,
      "loss": 3.002,
      "step": 90107
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.766648769378662,
      "learning_rate": 0.0004006554063453157,
      "loss": 3.103,
      "step": 90108
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.7190520763397217,
      "learning_rate": 0.00040065155288818166,
      "loss": 2.811,
      "step": 90109
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.3907158374786377,
      "learning_rate": 0.0004006476994123347,
      "loss": 2.9571,
      "step": 90110
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.219416379928589,
      "learning_rate": 0.0004006438459177754,
      "loss": 3.0707,
      "step": 90111
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.51138436794281,
      "learning_rate": 0.00040063999240450444,
      "loss": 3.0222,
      "step": 90112
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.832753300666809,
      "learning_rate": 0.0004006361388725226,
      "loss": 3.0634,
      "step": 90113
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5774972438812256,
      "learning_rate": 0.0004006322853218307,
      "loss": 2.8765,
      "step": 90114
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5767768621444702,
      "learning_rate": 0.0004006284317524293,
      "loss": 3.0868,
      "step": 90115
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.436995267868042,
      "learning_rate": 0.0004006245781643192,
      "loss": 3.0756,
      "step": 90116
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.7924976348876953,
      "learning_rate": 0.00040062072455750115,
      "loss": 3.2621,
      "step": 90117
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5338125228881836,
      "learning_rate": 0.00040061687093197564,
      "loss": 3.1099,
      "step": 90118
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.812860131263733,
      "learning_rate": 0.0004006130172877437,
      "loss": 3.1637,
      "step": 90119
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.911511778831482,
      "learning_rate": 0.00040060916362480583,
      "loss": 3.058,
      "step": 90120
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.033376693725586,
      "learning_rate": 0.00040060530994316283,
      "loss": 2.8055,
      "step": 90121
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6993337869644165,
      "learning_rate": 0.0004006014562428155,
      "loss": 2.9232,
      "step": 90122
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5766302347183228,
      "learning_rate": 0.0004005976025237643,
      "loss": 2.8331,
      "step": 90123
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9913506507873535,
      "learning_rate": 0.0004005937487860102,
      "loss": 3.2505,
      "step": 90124
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.190049409866333,
      "learning_rate": 0.0004005898950295538,
      "loss": 2.9195,
      "step": 90125
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.853519082069397,
      "learning_rate": 0.00040058604125439587,
      "loss": 3.1356,
      "step": 90126
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.320061445236206,
      "learning_rate": 0.0004005821874605371,
      "loss": 3.2206,
      "step": 90127
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6336034536361694,
      "learning_rate": 0.00040057833364797813,
      "loss": 2.8416,
      "step": 90128
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6205427646636963,
      "learning_rate": 0.0004005744798167198,
      "loss": 2.8551,
      "step": 90129
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.387146472930908,
      "learning_rate": 0.00040057062596676285,
      "loss": 2.9237,
      "step": 90130
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9767553806304932,
      "learning_rate": 0.00040056677209810777,
      "loss": 3.1685,
      "step": 90131
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.61810302734375,
      "learning_rate": 0.0004005629182107555,
      "loss": 2.9226,
      "step": 90132
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6795560121536255,
      "learning_rate": 0.0004005590643047067,
      "loss": 2.9268,
      "step": 90133
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7092993259429932,
      "learning_rate": 0.000400555210379962,
      "loss": 3.142,
      "step": 90134
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.925894856452942,
      "learning_rate": 0.0004005513564365223,
      "loss": 2.9757,
      "step": 90135
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6515518426895142,
      "learning_rate": 0.0004005475024743881,
      "loss": 2.9637,
      "step": 90136
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5403344631195068,
      "learning_rate": 0.00040054364849356034,
      "loss": 2.8126,
      "step": 90137
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7333024740219116,
      "learning_rate": 0.00040053979449403956,
      "loss": 2.8949,
      "step": 90138
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7193360328674316,
      "learning_rate": 0.0004005359404758265,
      "loss": 3.2499,
      "step": 90139
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5521527528762817,
      "learning_rate": 0.00040053208643892193,
      "loss": 3.1404,
      "step": 90140
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.1439101696014404,
      "learning_rate": 0.0004005282323833266,
      "loss": 3.0324,
      "step": 90141
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7154170274734497,
      "learning_rate": 0.0004005243783090411,
      "loss": 3.1235,
      "step": 90142
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.137880563735962,
      "learning_rate": 0.0004005205242160662,
      "loss": 3.0377,
      "step": 90143
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.52582585811615,
      "learning_rate": 0.0004005166701044027,
      "loss": 2.8892,
      "step": 90144
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7127268314361572,
      "learning_rate": 0.00040051281597405124,
      "loss": 2.9131,
      "step": 90145
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6766619682312012,
      "learning_rate": 0.0004005089618250126,
      "loss": 3.0103,
      "step": 90146
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7712754011154175,
      "learning_rate": 0.00040050510765728733,
      "loss": 3.0134,
      "step": 90147
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6986674070358276,
      "learning_rate": 0.0004005012534708764,
      "loss": 3.2597,
      "step": 90148
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7393920421600342,
      "learning_rate": 0.00040049739926578025,
      "loss": 2.7927,
      "step": 90149
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7589927911758423,
      "learning_rate": 0.00040049354504199984,
      "loss": 2.8745,
      "step": 90150
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.835656762123108,
      "learning_rate": 0.0004004896907995357,
      "loss": 2.7465,
      "step": 90151
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8060240745544434,
      "learning_rate": 0.0004004858365383887,
      "loss": 3.077,
      "step": 90152
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8711858987808228,
      "learning_rate": 0.00040048198225855954,
      "loss": 2.982,
      "step": 90153
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6658284664154053,
      "learning_rate": 0.00040047812796004876,
      "loss": 3.1444,
      "step": 90154
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8837268352508545,
      "learning_rate": 0.00040047427364285724,
      "loss": 3.0001,
      "step": 90155
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0127830505371094,
      "learning_rate": 0.00040047041930698566,
      "loss": 2.9807,
      "step": 90156
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.236043930053711,
      "learning_rate": 0.00040046656495243475,
      "loss": 3.1572,
      "step": 90157
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7639203071594238,
      "learning_rate": 0.0004004627105792052,
      "loss": 3.0016,
      "step": 90158
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.712111234664917,
      "learning_rate": 0.00040045885618729773,
      "loss": 2.8208,
      "step": 90159
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.5561790466308594,
      "learning_rate": 0.00040045500177671303,
      "loss": 2.8835,
      "step": 90160
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.829492211341858,
      "learning_rate": 0.00040045114734745186,
      "loss": 3.1067,
      "step": 90161
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8533695936203003,
      "learning_rate": 0.00040044729289951504,
      "loss": 3.0398,
      "step": 90162
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.642667055130005,
      "learning_rate": 0.00040044343843290305,
      "loss": 3.0927,
      "step": 90163
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.492361307144165,
      "learning_rate": 0.00040043958394761676,
      "loss": 2.8156,
      "step": 90164
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.066662073135376,
      "learning_rate": 0.0004004357294436569,
      "loss": 2.8885,
      "step": 90165
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.889452576637268,
      "learning_rate": 0.00040043187492102406,
      "loss": 3.0236,
      "step": 90166
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6207740306854248,
      "learning_rate": 0.000400428020379719,
      "loss": 2.9045,
      "step": 90167
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.575187087059021,
      "learning_rate": 0.00040042416581974257,
      "loss": 3.0698,
      "step": 90168
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2937839031219482,
      "learning_rate": 0.0004004203112410954,
      "loss": 2.9995,
      "step": 90169
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9962369203567505,
      "learning_rate": 0.0004004164566437782,
      "loss": 2.7938,
      "step": 90170
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4056165218353271,
      "learning_rate": 0.00040041260202779166,
      "loss": 3.0075,
      "step": 90171
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0145153999328613,
      "learning_rate": 0.00040040874739313647,
      "loss": 2.9383,
      "step": 90172
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6967661380767822,
      "learning_rate": 0.0004004048927398135,
      "loss": 3.0555,
      "step": 90173
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6560684442520142,
      "learning_rate": 0.00040040103806782324,
      "loss": 3.0509,
      "step": 90174
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.698407769203186,
      "learning_rate": 0.0004003971833771667,
      "loss": 2.9611,
      "step": 90175
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4872373342514038,
      "learning_rate": 0.00040039332866784427,
      "loss": 2.8393,
      "step": 90176
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6352514028549194,
      "learning_rate": 0.00040038947393985694,
      "loss": 2.8148,
      "step": 90177
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7636648416519165,
      "learning_rate": 0.00040038561919320524,
      "loss": 3.0392,
      "step": 90178
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.3498129844665527,
      "learning_rate": 0.0004003817644278899,
      "loss": 3.0657,
      "step": 90179
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6262272596359253,
      "learning_rate": 0.00040037790964391173,
      "loss": 2.9408,
      "step": 90180
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6135796308517456,
      "learning_rate": 0.0004003740548412716,
      "loss": 3.0078,
      "step": 90181
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7124671936035156,
      "learning_rate": 0.0004003702000199698,
      "loss": 3.2259,
      "step": 90182
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.765510320663452,
      "learning_rate": 0.0004003663451800074,
      "loss": 2.6989,
      "step": 90183
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.503297805786133,
      "learning_rate": 0.00040036249032138503,
      "loss": 2.7779,
      "step": 90184
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9284015893936157,
      "learning_rate": 0.0004003586354441033,
      "loss": 2.9652,
      "step": 90185
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8126578330993652,
      "learning_rate": 0.00040035478054816295,
      "loss": 3.0489,
      "step": 90186
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.050875425338745,
      "learning_rate": 0.0004003509256335649,
      "loss": 3.1595,
      "step": 90187
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.1446664333343506,
      "learning_rate": 0.0004003470707003096,
      "loss": 2.8591,
      "step": 90188
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.0914509296417236,
      "learning_rate": 0.00040034321574839793,
      "loss": 3.1018,
      "step": 90189
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.233663558959961,
      "learning_rate": 0.00040033936077783067,
      "loss": 2.7418,
      "step": 90190
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.6644058227539062,
      "learning_rate": 0.00040033550578860824,
      "loss": 3.1691,
      "step": 90191
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8331938982009888,
      "learning_rate": 0.0004003316507807317,
      "loss": 3.0482,
      "step": 90192
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.5440096855163574,
      "learning_rate": 0.00040032779575420154,
      "loss": 2.8393,
      "step": 90193
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2751412391662598,
      "learning_rate": 0.00040032394070901856,
      "loss": 2.9482,
      "step": 90194
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2318923473358154,
      "learning_rate": 0.00040032008564518345,
      "loss": 2.9158,
      "step": 90195
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.629648447036743,
      "learning_rate": 0.00040031623056269703,
      "loss": 3.2697,
      "step": 90196
    },
    {
      "epoch": 1.17,
      "grad_norm": 3.511394739151001,
      "learning_rate": 0.0004003123754615598,
      "loss": 3.1375,
      "step": 90197
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.805619716644287,
      "learning_rate": 0.00040030852034177264,
      "loss": 3.0183,
      "step": 90198
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6058639287948608,
      "learning_rate": 0.0004003046652033363,
      "loss": 3.0147,
      "step": 90199
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.1383800506591797,
      "learning_rate": 0.0004003008100462513,
      "loss": 2.9482,
      "step": 90200
    },
    {
      "epoch": 1.17,
      "grad_norm": 4.31391716003418,
      "learning_rate": 0.0004002969548705186,
      "loss": 2.8825,
      "step": 90201
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.071040153503418,
      "learning_rate": 0.0004002930996761388,
      "loss": 3.095,
      "step": 90202
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.22314715385437,
      "learning_rate": 0.0004002892444631126,
      "loss": 3.3083,
      "step": 90203
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.154151201248169,
      "learning_rate": 0.0004002853892314407,
      "loss": 2.9041,
      "step": 90204
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9143606424331665,
      "learning_rate": 0.0004002815339811239,
      "loss": 2.9538,
      "step": 90205
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4646779298782349,
      "learning_rate": 0.00040027767871216285,
      "loss": 3.1669,
      "step": 90206
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2911643981933594,
      "learning_rate": 0.00040027382342455823,
      "loss": 2.9457,
      "step": 90207
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8050096035003662,
      "learning_rate": 0.0004002699681183109,
      "loss": 3.0422,
      "step": 90208
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9359631538391113,
      "learning_rate": 0.00040026611279342145,
      "loss": 2.9086,
      "step": 90209
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8619940280914307,
      "learning_rate": 0.0004002622574498906,
      "loss": 2.9864,
      "step": 90210
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8030130863189697,
      "learning_rate": 0.0004002584020877192,
      "loss": 3.0169,
      "step": 90211
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7332897186279297,
      "learning_rate": 0.00040025454670690783,
      "loss": 3.0681,
      "step": 90212
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.015292167663574,
      "learning_rate": 0.0004002506913074572,
      "loss": 3.0249,
      "step": 90213
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.901865005493164,
      "learning_rate": 0.00040024683588936813,
      "loss": 2.9623,
      "step": 90214
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7537480592727661,
      "learning_rate": 0.00040024298045264126,
      "loss": 3.1208,
      "step": 90215
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.9496315717697144,
      "learning_rate": 0.00040023912499727733,
      "loss": 3.3952,
      "step": 90216
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.578060269355774,
      "learning_rate": 0.00040023526952327705,
      "loss": 2.9928,
      "step": 90217
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.833065152168274,
      "learning_rate": 0.0004002314140306412,
      "loss": 2.9956,
      "step": 90218
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.8679040670394897,
      "learning_rate": 0.00040022755851937036,
      "loss": 2.7608,
      "step": 90219
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.119053840637207,
      "learning_rate": 0.0004002237029894653,
      "loss": 2.6585,
      "step": 90220
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.706431269645691,
      "learning_rate": 0.00040021984744092684,
      "loss": 3.1259,
      "step": 90221
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7781587839126587,
      "learning_rate": 0.0004002159918737556,
      "loss": 3.0352,
      "step": 90222
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6274998188018799,
      "learning_rate": 0.00040021213628795233,
      "loss": 2.9956,
      "step": 90223
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5507302284240723,
      "learning_rate": 0.00040020828068351765,
      "loss": 2.8562,
      "step": 90224
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6851402521133423,
      "learning_rate": 0.00040020442506045247,
      "loss": 3.0546,
      "step": 90225
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.157188892364502,
      "learning_rate": 0.00040020056941875734,
      "loss": 2.9182,
      "step": 90226
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.953194499015808,
      "learning_rate": 0.00040019671375843296,
      "loss": 3.2606,
      "step": 90227
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.492274284362793,
      "learning_rate": 0.0004001928580794802,
      "loss": 2.962,
      "step": 90228
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5746667385101318,
      "learning_rate": 0.0004001890023818997,
      "loss": 3.1895,
      "step": 90229
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4636890888214111,
      "learning_rate": 0.00040018514666569213,
      "loss": 2.9733,
      "step": 90230
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4300217628479004,
      "learning_rate": 0.00040018129093085834,
      "loss": 2.8443,
      "step": 90231
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.6613233089447021,
      "learning_rate": 0.0004001774351773989,
      "loss": 3.0666,
      "step": 90232
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5157288312911987,
      "learning_rate": 0.0004001735794053146,
      "loss": 3.2509,
      "step": 90233
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2244527339935303,
      "learning_rate": 0.0004001697236146061,
      "loss": 2.7158,
      "step": 90234
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7296861410140991,
      "learning_rate": 0.0004001658678052742,
      "loss": 3.1795,
      "step": 90235
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7953648567199707,
      "learning_rate": 0.00040016201197731955,
      "loss": 2.9056,
      "step": 90236
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.839963436126709,
      "learning_rate": 0.00040015815613074284,
      "loss": 3.1219,
      "step": 90237
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.4540584087371826,
      "learning_rate": 0.00040015430026554496,
      "loss": 2.9638,
      "step": 90238
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.5772854089736938,
      "learning_rate": 0.0004001504443817264,
      "loss": 3.0757,
      "step": 90239
    },
    {
      "epoch": 1.17,
      "grad_norm": 1.7373361587524414,
      "learning_rate": 0.000400146588479288,
      "loss": 2.8922,
      "step": 90240
    },
    {
      "epoch": 1.17,
      "grad_norm": 2.2000985145568848,
      "learning_rate": 0.0004001427325582305,
      "loss": 3.0577,
      "step": 90241
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.467954158782959,
      "learning_rate": 0.00040013887661855457,
      "loss": 2.8489,
      "step": 90242
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.343616485595703,
      "learning_rate": 0.0004001350206602609,
      "loss": 2.9511,
      "step": 90243
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.878445029258728,
      "learning_rate": 0.00040013116468335037,
      "loss": 3.4049,
      "step": 90244
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5717895030975342,
      "learning_rate": 0.0004001273086878234,
      "loss": 3.0104,
      "step": 90245
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.045038938522339,
      "learning_rate": 0.0004001234526736809,
      "loss": 2.8172,
      "step": 90246
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.8341240882873535,
      "learning_rate": 0.00040011959664092357,
      "loss": 3.0142,
      "step": 90247
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.992634892463684,
      "learning_rate": 0.0004001157405895522,
      "loss": 2.9144,
      "step": 90248
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5719023942947388,
      "learning_rate": 0.0004001118845195673,
      "loss": 3.2966,
      "step": 90249
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.3388164043426514,
      "learning_rate": 0.00040010802843096975,
      "loss": 3.1727,
      "step": 90250
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5945408344268799,
      "learning_rate": 0.0004001041723237603,
      "loss": 3.1763,
      "step": 90251
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8689322471618652,
      "learning_rate": 0.00040010031619793954,
      "loss": 3.0133,
      "step": 90252
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5280145406723022,
      "learning_rate": 0.00040009646005350827,
      "loss": 3.1526,
      "step": 90253
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8002490997314453,
      "learning_rate": 0.0004000926038904672,
      "loss": 2.9881,
      "step": 90254
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.6646673679351807,
      "learning_rate": 0.0004000887477088169,
      "loss": 2.9419,
      "step": 90255
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.475659728050232,
      "learning_rate": 0.0004000848915085583,
      "loss": 3.0199,
      "step": 90256
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7582274675369263,
      "learning_rate": 0.00040008103528969206,
      "loss": 3.0046,
      "step": 90257
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.773500919342041,
      "learning_rate": 0.00040007717905221884,
      "loss": 2.9982,
      "step": 90258
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.973285675048828,
      "learning_rate": 0.0004000733227961393,
      "loss": 2.9537,
      "step": 90259
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8133671283721924,
      "learning_rate": 0.0004000694665214544,
      "loss": 3.1311,
      "step": 90260
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.1582813262939453,
      "learning_rate": 0.00040006561022816456,
      "loss": 3.1311,
      "step": 90261
    },
    {
      "epoch": 1.18,
      "grad_norm": 4.840171813964844,
      "learning_rate": 0.00040006175391627067,
      "loss": 2.8599,
      "step": 90262
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.40720796585083,
      "learning_rate": 0.0004000578975857735,
      "loss": 3.1882,
      "step": 90263
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7274249792099,
      "learning_rate": 0.0004000540412366736,
      "loss": 2.8811,
      "step": 90264
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.5276966094970703,
      "learning_rate": 0.0004000501848689717,
      "loss": 3.0235,
      "step": 90265
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.59126353263855,
      "learning_rate": 0.00040004632848266873,
      "loss": 3.0715,
      "step": 90266
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.462415933609009,
      "learning_rate": 0.00040004247207776515,
      "loss": 3.0983,
      "step": 90267
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.16056752204895,
      "learning_rate": 0.00040003861565426175,
      "loss": 2.9067,
      "step": 90268
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.888912320137024,
      "learning_rate": 0.00040003475921215935,
      "loss": 3.0527,
      "step": 90269
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.013998031616211,
      "learning_rate": 0.00040003090275145866,
      "loss": 3.2196,
      "step": 90270
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.4030587673187256,
      "learning_rate": 0.0004000270462721602,
      "loss": 3.0552,
      "step": 90271
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8172667026519775,
      "learning_rate": 0.000400023189774265,
      "loss": 2.9973,
      "step": 90272
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8233492374420166,
      "learning_rate": 0.0004000193332577734,
      "loss": 3.073,
      "step": 90273
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.040759325027466,
      "learning_rate": 0.00040001547672268643,
      "loss": 3.1626,
      "step": 90274
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8508045673370361,
      "learning_rate": 0.0004000116201690048,
      "loss": 2.998,
      "step": 90275
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.834075689315796,
      "learning_rate": 0.00040000776359672895,
      "loss": 3.1873,
      "step": 90276
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.190751314163208,
      "learning_rate": 0.0004000039070058598,
      "loss": 3.2106,
      "step": 90277
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.874258279800415,
      "learning_rate": 0.0004000000503963981,
      "loss": 3.1643,
      "step": 90278
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6902835369110107,
      "learning_rate": 0.0003999961937683444,
      "loss": 2.9548,
      "step": 90279
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4047096967697144,
      "learning_rate": 0.0003999923371216996,
      "loss": 3.0451,
      "step": 90280
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5071156024932861,
      "learning_rate": 0.0003999884804564643,
      "loss": 2.9004,
      "step": 90281
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4999860525131226,
      "learning_rate": 0.0003999846237726393,
      "loss": 3.0309,
      "step": 90282
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4226417541503906,
      "learning_rate": 0.00039998076707022524,
      "loss": 3.1565,
      "step": 90283
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5395528078079224,
      "learning_rate": 0.00039997691034922295,
      "loss": 3.0524,
      "step": 90284
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2210488319396973,
      "learning_rate": 0.000399973053609633,
      "loss": 3.0778,
      "step": 90285
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7530437707901,
      "learning_rate": 0.00039996919685145605,
      "loss": 3.0017,
      "step": 90286
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6121021509170532,
      "learning_rate": 0.00039996534007469313,
      "loss": 2.9506,
      "step": 90287
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2565178871154785,
      "learning_rate": 0.0003999614832793446,
      "loss": 3.0895,
      "step": 90288
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7396560907363892,
      "learning_rate": 0.0003999576264654115,
      "loss": 2.6617,
      "step": 90289
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5318163633346558,
      "learning_rate": 0.0003999537696328943,
      "loss": 3.0671,
      "step": 90290
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.217500686645508,
      "learning_rate": 0.0003999499127817938,
      "loss": 2.8902,
      "step": 90291
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.4678330421447754,
      "learning_rate": 0.00039994605591211075,
      "loss": 2.9258,
      "step": 90292
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8335392475128174,
      "learning_rate": 0.0003999421990238459,
      "loss": 3.0724,
      "step": 90293
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6887025833129883,
      "learning_rate": 0.0003999383421169998,
      "loss": 2.8763,
      "step": 90294
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6727806329727173,
      "learning_rate": 0.0003999344851915733,
      "loss": 2.9889,
      "step": 90295
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8011622428894043,
      "learning_rate": 0.0003999306282475672,
      "loss": 2.7796,
      "step": 90296
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8672319650650024,
      "learning_rate": 0.000399926771284982,
      "loss": 3.2092,
      "step": 90297
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5457406044006348,
      "learning_rate": 0.00039992291430381857,
      "loss": 3.0585,
      "step": 90298
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.539817452430725,
      "learning_rate": 0.00039991905730407765,
      "loss": 2.752,
      "step": 90299
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7205923795700073,
      "learning_rate": 0.0003999152002857597,
      "loss": 2.8481,
      "step": 90300
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.180908679962158,
      "learning_rate": 0.0003999113432488658,
      "loss": 3.0843,
      "step": 90301
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0567121505737305,
      "learning_rate": 0.00039990748619339643,
      "loss": 3.1045,
      "step": 90302
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5316691398620605,
      "learning_rate": 0.0003999036291193524,
      "loss": 3.0418,
      "step": 90303
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5484050512313843,
      "learning_rate": 0.00039989977202673426,
      "loss": 2.8591,
      "step": 90304
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2529778480529785,
      "learning_rate": 0.00039989591491554306,
      "loss": 3.0172,
      "step": 90305
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.1108736991882324,
      "learning_rate": 0.00039989205778577925,
      "loss": 2.9003,
      "step": 90306
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7368344068527222,
      "learning_rate": 0.0003998882006374436,
      "loss": 3.1296,
      "step": 90307
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0762720108032227,
      "learning_rate": 0.0003998843434705369,
      "loss": 3.0122,
      "step": 90308
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.689773440361023,
      "learning_rate": 0.0003998804862850597,
      "loss": 2.8492,
      "step": 90309
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.776732087135315,
      "learning_rate": 0.00039987662908101294,
      "loss": 3.0401,
      "step": 90310
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5241920948028564,
      "learning_rate": 0.00039987277185839725,
      "loss": 2.8843,
      "step": 90311
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0891857147216797,
      "learning_rate": 0.00039986891461721324,
      "loss": 2.8402,
      "step": 90312
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8169560432434082,
      "learning_rate": 0.0003998650573574617,
      "loss": 2.9714,
      "step": 90313
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6317293643951416,
      "learning_rate": 0.00039986120007914346,
      "loss": 2.8119,
      "step": 90314
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.267648935317993,
      "learning_rate": 0.0003998573427822591,
      "loss": 3.1142,
      "step": 90315
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9648387432098389,
      "learning_rate": 0.0003998534854668093,
      "loss": 2.8639,
      "step": 90316
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7975332736968994,
      "learning_rate": 0.000399849628132795,
      "loss": 3.2636,
      "step": 90317
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9052587747573853,
      "learning_rate": 0.00039984577078021663,
      "loss": 3.1326,
      "step": 90318
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2422876358032227,
      "learning_rate": 0.00039984191340907516,
      "loss": 2.8435,
      "step": 90319
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.3651676177978516,
      "learning_rate": 0.00039983805601937107,
      "loss": 3.3431,
      "step": 90320
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.294494390487671,
      "learning_rate": 0.00039983419861110527,
      "loss": 2.8884,
      "step": 90321
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8849774599075317,
      "learning_rate": 0.0003998303411842785,
      "loss": 3.1104,
      "step": 90322
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.335047721862793,
      "learning_rate": 0.0003998264837388912,
      "loss": 3.1466,
      "step": 90323
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9490970373153687,
      "learning_rate": 0.00039982262627494443,
      "loss": 2.8849,
      "step": 90324
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.1438605785369873,
      "learning_rate": 0.00039981876879243865,
      "loss": 2.8399,
      "step": 90325
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0161492824554443,
      "learning_rate": 0.0003998149112913747,
      "loss": 2.9998,
      "step": 90326
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.5850789546966553,
      "learning_rate": 0.0003998110537717533,
      "loss": 3.1036,
      "step": 90327
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.408766984939575,
      "learning_rate": 0.00039980719623357514,
      "loss": 2.9761,
      "step": 90328
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6844130754470825,
      "learning_rate": 0.00039980333867684087,
      "loss": 3.0327,
      "step": 90329
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7163763046264648,
      "learning_rate": 0.00039979948110155145,
      "loss": 2.9992,
      "step": 90330
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.68406343460083,
      "learning_rate": 0.0003997956235077072,
      "loss": 2.987,
      "step": 90331
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9373764991760254,
      "learning_rate": 0.0003997917658953091,
      "loss": 3.0924,
      "step": 90332
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6675788164138794,
      "learning_rate": 0.000399787908264358,
      "loss": 2.9248,
      "step": 90333
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.85443115234375,
      "learning_rate": 0.0003997840506148543,
      "loss": 3.131,
      "step": 90334
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5562045574188232,
      "learning_rate": 0.0003997801929467989,
      "loss": 3.0951,
      "step": 90335
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8701233863830566,
      "learning_rate": 0.0003997763352601925,
      "loss": 3.0399,
      "step": 90336
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4692184925079346,
      "learning_rate": 0.0003997724775550358,
      "loss": 3.025,
      "step": 90337
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8516052961349487,
      "learning_rate": 0.00039976861983132944,
      "loss": 3.0886,
      "step": 90338
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.238722562789917,
      "learning_rate": 0.0003997647620890743,
      "loss": 2.7406,
      "step": 90339
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0034472942352295,
      "learning_rate": 0.0003997609043282709,
      "loss": 2.8464,
      "step": 90340
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.585758924484253,
      "learning_rate": 0.00039975704654892014,
      "loss": 3.1388,
      "step": 90341
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6474205255508423,
      "learning_rate": 0.00039975318875102266,
      "loss": 3.0652,
      "step": 90342
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.089721918106079,
      "learning_rate": 0.0003997493309345792,
      "loss": 3.0062,
      "step": 90343
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.526355504989624,
      "learning_rate": 0.0003997454730995904,
      "loss": 3.0922,
      "step": 90344
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.017971992492676,
      "learning_rate": 0.00039974161524605707,
      "loss": 2.9458,
      "step": 90345
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.237511396408081,
      "learning_rate": 0.00039973775737397983,
      "loss": 2.8975,
      "step": 90346
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.655710220336914,
      "learning_rate": 0.00039973389948335953,
      "loss": 2.8944,
      "step": 90347
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9252958297729492,
      "learning_rate": 0.00039973004157419677,
      "loss": 3.0837,
      "step": 90348
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.819883346557617,
      "learning_rate": 0.00039972618364649237,
      "loss": 2.887,
      "step": 90349
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5129082202911377,
      "learning_rate": 0.0003997223257002469,
      "loss": 3.0398,
      "step": 90350
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5475388765335083,
      "learning_rate": 0.0003997184677354613,
      "loss": 3.0965,
      "step": 90351
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6605610847473145,
      "learning_rate": 0.00039971460975213606,
      "loss": 2.9929,
      "step": 90352
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.019219160079956,
      "learning_rate": 0.00039971075175027196,
      "loss": 3.1389,
      "step": 90353
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6797438859939575,
      "learning_rate": 0.0003997068937298698,
      "loss": 3.1306,
      "step": 90354
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0986831188201904,
      "learning_rate": 0.00039970303569093025,
      "loss": 3.1767,
      "step": 90355
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9313631057739258,
      "learning_rate": 0.000399699177633454,
      "loss": 2.7456,
      "step": 90356
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.893223762512207,
      "learning_rate": 0.00039969531955744186,
      "loss": 2.8186,
      "step": 90357
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.661904215812683,
      "learning_rate": 0.0003996914614628944,
      "loss": 3.067,
      "step": 90358
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7381099462509155,
      "learning_rate": 0.0003996876033498124,
      "loss": 3.0317,
      "step": 90359
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0928702354431152,
      "learning_rate": 0.0003996837452181967,
      "loss": 3.0244,
      "step": 90360
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9603219032287598,
      "learning_rate": 0.0003996798870680479,
      "loss": 3.0641,
      "step": 90361
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6487456560134888,
      "learning_rate": 0.0003996760288993666,
      "loss": 2.9061,
      "step": 90362
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.929335355758667,
      "learning_rate": 0.00039967217071215373,
      "loss": 2.9477,
      "step": 90363
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0996668338775635,
      "learning_rate": 0.0003996683125064099,
      "loss": 2.9564,
      "step": 90364
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5330548286437988,
      "learning_rate": 0.0003996644542821358,
      "loss": 3.084,
      "step": 90365
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6275005340576172,
      "learning_rate": 0.00039966059603933234,
      "loss": 2.7691,
      "step": 90366
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8093163967132568,
      "learning_rate": 0.00039965673777799996,
      "loss": 3.2383,
      "step": 90367
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.847790241241455,
      "learning_rate": 0.00039965287949813953,
      "loss": 2.9114,
      "step": 90368
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0234735012054443,
      "learning_rate": 0.0003996490211997518,
      "loss": 3.1924,
      "step": 90369
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0121700763702393,
      "learning_rate": 0.00039964516288283744,
      "loss": 3.0703,
      "step": 90370
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.940474510192871,
      "learning_rate": 0.00039964130454739707,
      "loss": 3.0589,
      "step": 90371
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7213661670684814,
      "learning_rate": 0.00039963744619343164,
      "loss": 2.9369,
      "step": 90372
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.9020626544952393,
      "learning_rate": 0.00039963358782094165,
      "loss": 2.9295,
      "step": 90373
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.338080883026123,
      "learning_rate": 0.0003996297294299279,
      "loss": 3.0201,
      "step": 90374
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.50030517578125,
      "learning_rate": 0.0003996258710203911,
      "loss": 3.0291,
      "step": 90375
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7395048141479492,
      "learning_rate": 0.000399622012592332,
      "loss": 2.9378,
      "step": 90376
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.797306537628174,
      "learning_rate": 0.0003996181541457512,
      "loss": 3.057,
      "step": 90377
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.392944097518921,
      "learning_rate": 0.00039961429568064963,
      "loss": 3.0156,
      "step": 90378
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.420215129852295,
      "learning_rate": 0.00039961043719702777,
      "loss": 2.8968,
      "step": 90379
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6994916200637817,
      "learning_rate": 0.0003996065786948865,
      "loss": 3.0578,
      "step": 90380
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.691620111465454,
      "learning_rate": 0.00039960272017422645,
      "loss": 3.2545,
      "step": 90381
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.362584352493286,
      "learning_rate": 0.0003995988616350484,
      "loss": 3.1091,
      "step": 90382
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.808432698249817,
      "learning_rate": 0.0003995950030773531,
      "loss": 2.8311,
      "step": 90383
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.572119951248169,
      "learning_rate": 0.0003995911445011411,
      "loss": 2.988,
      "step": 90384
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.060880184173584,
      "learning_rate": 0.0003995872859064133,
      "loss": 2.9493,
      "step": 90385
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.758107304573059,
      "learning_rate": 0.00039958342729317037,
      "loss": 2.9522,
      "step": 90386
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6834008693695068,
      "learning_rate": 0.00039957956866141294,
      "loss": 3.1791,
      "step": 90387
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.74453604221344,
      "learning_rate": 0.00039957571001114184,
      "loss": 2.972,
      "step": 90388
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.142350196838379,
      "learning_rate": 0.00039957185134235765,
      "loss": 2.77,
      "step": 90389
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8883882761001587,
      "learning_rate": 0.00039956799265506126,
      "loss": 3.0021,
      "step": 90390
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.096423387527466,
      "learning_rate": 0.0003995641339492533,
      "loss": 2.759,
      "step": 90391
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.3517847061157227,
      "learning_rate": 0.00039956027522493445,
      "loss": 3.0028,
      "step": 90392
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.1624231338500977,
      "learning_rate": 0.00039955641648210544,
      "loss": 3.2657,
      "step": 90393
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4644310474395752,
      "learning_rate": 0.00039955255772076704,
      "loss": 2.8344,
      "step": 90394
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.524112582206726,
      "learning_rate": 0.0003995486989409199,
      "loss": 3.1701,
      "step": 90395
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.124729871749878,
      "learning_rate": 0.0003995448401425649,
      "loss": 3.1313,
      "step": 90396
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7446309328079224,
      "learning_rate": 0.0003995409813257025,
      "loss": 2.9531,
      "step": 90397
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9189869165420532,
      "learning_rate": 0.0003995371224903336,
      "loss": 2.8883,
      "step": 90398
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7191951274871826,
      "learning_rate": 0.00039953326363645895,
      "loss": 2.9684,
      "step": 90399
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6790337562561035,
      "learning_rate": 0.00039952940476407905,
      "loss": 3.049,
      "step": 90400
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7054024934768677,
      "learning_rate": 0.0003995255458731948,
      "loss": 3.1946,
      "step": 90401
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5748789310455322,
      "learning_rate": 0.000399521686963807,
      "loss": 2.9612,
      "step": 90402
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.002387285232544,
      "learning_rate": 0.0003995178280359161,
      "loss": 3.0334,
      "step": 90403
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7240054607391357,
      "learning_rate": 0.0003995139690895229,
      "loss": 2.9363,
      "step": 90404
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.4594497680664062,
      "learning_rate": 0.00039951011012462833,
      "loss": 3.0649,
      "step": 90405
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9312502145767212,
      "learning_rate": 0.00039950625114123286,
      "loss": 2.9163,
      "step": 90406
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6868482828140259,
      "learning_rate": 0.00039950239213933737,
      "loss": 2.9974,
      "step": 90407
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0564727783203125,
      "learning_rate": 0.00039949853311894236,
      "loss": 3.2529,
      "step": 90408
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7904425859451294,
      "learning_rate": 0.00039949467408004885,
      "loss": 2.9598,
      "step": 90409
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.232398748397827,
      "learning_rate": 0.00039949081502265734,
      "loss": 3.014,
      "step": 90410
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.501816987991333,
      "learning_rate": 0.00039948695594676863,
      "loss": 2.8393,
      "step": 90411
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.518324375152588,
      "learning_rate": 0.0003994830968523835,
      "loss": 2.9751,
      "step": 90412
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.33496356010437,
      "learning_rate": 0.0003994792377395024,
      "loss": 3.0413,
      "step": 90413
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.110020399093628,
      "learning_rate": 0.00039947537860812634,
      "loss": 2.89,
      "step": 90414
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9859721660614014,
      "learning_rate": 0.00039947151945825596,
      "loss": 3.3222,
      "step": 90415
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7727876901626587,
      "learning_rate": 0.00039946766028989186,
      "loss": 2.8725,
      "step": 90416
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2454030513763428,
      "learning_rate": 0.00039946380110303484,
      "loss": 3.1231,
      "step": 90417
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0592973232269287,
      "learning_rate": 0.0003994599418976858,
      "loss": 3.1565,
      "step": 90418
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.832953929901123,
      "learning_rate": 0.00039945608267384506,
      "loss": 2.9453,
      "step": 90419
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.80411696434021,
      "learning_rate": 0.00039945222343151364,
      "loss": 2.8489,
      "step": 90420
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4648979902267456,
      "learning_rate": 0.0003994483641706922,
      "loss": 3.1203,
      "step": 90421
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.703545093536377,
      "learning_rate": 0.0003994445048913814,
      "loss": 2.9956,
      "step": 90422
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.95090913772583,
      "learning_rate": 0.000399440645593582,
      "loss": 3.1481,
      "step": 90423
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8220243453979492,
      "learning_rate": 0.0003994367862772947,
      "loss": 2.9544,
      "step": 90424
    },
    {
      "epoch": 1.18,
      "grad_norm": 4.173224925994873,
      "learning_rate": 0.0003994329269425202,
      "loss": 3.0959,
      "step": 90425
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.737567901611328,
      "learning_rate": 0.00039942906758925925,
      "loss": 2.8534,
      "step": 90426
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6290178298950195,
      "learning_rate": 0.0003994252082175127,
      "loss": 3.0583,
      "step": 90427
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6676311492919922,
      "learning_rate": 0.0003994213488272809,
      "loss": 3.0231,
      "step": 90428
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.735182762145996,
      "learning_rate": 0.0003994174894185649,
      "loss": 2.9675,
      "step": 90429
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.9251606464385986,
      "learning_rate": 0.0003994136299913654,
      "loss": 2.949,
      "step": 90430
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.026533365249634,
      "learning_rate": 0.0003994097705456829,
      "loss": 3.1922,
      "step": 90431
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.844326376914978,
      "learning_rate": 0.00039940591108151826,
      "loss": 2.9316,
      "step": 90432
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2803406715393066,
      "learning_rate": 0.00039940205159887224,
      "loss": 3.0588,
      "step": 90433
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.96359384059906,
      "learning_rate": 0.0003993981920977455,
      "loss": 2.9873,
      "step": 90434
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8936994075775146,
      "learning_rate": 0.0003993943325781387,
      "loss": 2.871,
      "step": 90435
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.77016282081604,
      "learning_rate": 0.0003993904730400527,
      "loss": 3.1396,
      "step": 90436
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.8090622425079346,
      "learning_rate": 0.00039938661348348813,
      "loss": 2.9936,
      "step": 90437
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.725529670715332,
      "learning_rate": 0.00039938275390844565,
      "loss": 3.0497,
      "step": 90438
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.3629767894744873,
      "learning_rate": 0.000399378894314926,
      "loss": 2.9889,
      "step": 90439
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.140571117401123,
      "learning_rate": 0.00039937503470293003,
      "loss": 3.0418,
      "step": 90440
    },
    {
      "epoch": 1.18,
      "grad_norm": 4.123502731323242,
      "learning_rate": 0.00039937117507245836,
      "loss": 2.8846,
      "step": 90441
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.3159632682800293,
      "learning_rate": 0.0003993673154235117,
      "loss": 3.2506,
      "step": 90442
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.9031596183776855,
      "learning_rate": 0.00039936345575609076,
      "loss": 3.078,
      "step": 90443
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.072033643722534,
      "learning_rate": 0.00039935959607019625,
      "loss": 3.2873,
      "step": 90444
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.915652871131897,
      "learning_rate": 0.00039935573636582904,
      "loss": 2.8075,
      "step": 90445
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.4272992610931396,
      "learning_rate": 0.00039935187664298953,
      "loss": 2.959,
      "step": 90446
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.237110137939453,
      "learning_rate": 0.0003993480169016788,
      "loss": 2.6952,
      "step": 90447
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.5687015056610107,
      "learning_rate": 0.00039934415714189725,
      "loss": 3.0602,
      "step": 90448
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6653274297714233,
      "learning_rate": 0.0003993402973636459,
      "loss": 2.9898,
      "step": 90449
    },
    {
      "epoch": 1.18,
      "grad_norm": 4.331282138824463,
      "learning_rate": 0.0003993364375669252,
      "loss": 3.0855,
      "step": 90450
    },
    {
      "epoch": 1.18,
      "grad_norm": 4.844699859619141,
      "learning_rate": 0.000399332577751736,
      "loss": 3.0306,
      "step": 90451
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.102766275405884,
      "learning_rate": 0.0003993287179180791,
      "loss": 3.0396,
      "step": 90452
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9900625944137573,
      "learning_rate": 0.000399324858065955,
      "loss": 2.886,
      "step": 90453
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7713536024093628,
      "learning_rate": 0.0003993209981953646,
      "loss": 3.0018,
      "step": 90454
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.6671464443206787,
      "learning_rate": 0.0003993171383063085,
      "loss": 3.0963,
      "step": 90455
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.4715070724487305,
      "learning_rate": 0.0003993132783987875,
      "loss": 2.9981,
      "step": 90456
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7831013202667236,
      "learning_rate": 0.0003993094184728023,
      "loss": 2.8821,
      "step": 90457
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0112905502319336,
      "learning_rate": 0.00039930555852835364,
      "loss": 3.1848,
      "step": 90458
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.9321372509002686,
      "learning_rate": 0.0003993016985654421,
      "loss": 2.884,
      "step": 90459
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.006626605987549,
      "learning_rate": 0.0003992978385840685,
      "loss": 2.8986,
      "step": 90460
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.814577341079712,
      "learning_rate": 0.00039929397858423367,
      "loss": 2.912,
      "step": 90461
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.016911268234253,
      "learning_rate": 0.00039929011856593816,
      "loss": 2.9957,
      "step": 90462
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4713116884231567,
      "learning_rate": 0.0003992862585291827,
      "loss": 3.0429,
      "step": 90463
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2522308826446533,
      "learning_rate": 0.0003992823984739681,
      "loss": 2.9714,
      "step": 90464
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.836840033531189,
      "learning_rate": 0.00039927853840029503,
      "loss": 3.057,
      "step": 90465
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.756797432899475,
      "learning_rate": 0.00039927467830816417,
      "loss": 3.1333,
      "step": 90466
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6544160842895508,
      "learning_rate": 0.00039927081819757633,
      "loss": 2.8904,
      "step": 90467
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7153115272521973,
      "learning_rate": 0.0003992669580685321,
      "loss": 2.9776,
      "step": 90468
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8336119651794434,
      "learning_rate": 0.00039926309792103225,
      "loss": 2.6649,
      "step": 90469
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5218842029571533,
      "learning_rate": 0.0003992592377550776,
      "loss": 3.1905,
      "step": 90470
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4995522499084473,
      "learning_rate": 0.0003992553775706688,
      "loss": 3.2529,
      "step": 90471
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6740138530731201,
      "learning_rate": 0.0003992515173678064,
      "loss": 3.2044,
      "step": 90472
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.663304090499878,
      "learning_rate": 0.0003992476571464915,
      "loss": 3.0922,
      "step": 90473
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9728671312332153,
      "learning_rate": 0.0003992437969067244,
      "loss": 3.1226,
      "step": 90474
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.614838719367981,
      "learning_rate": 0.00039923993664850606,
      "loss": 2.8697,
      "step": 90475
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.7076406478881836,
      "learning_rate": 0.0003992360763718372,
      "loss": 3.0204,
      "step": 90476
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.3146095275878906,
      "learning_rate": 0.00039923221607671844,
      "loss": 3.1013,
      "step": 90477
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.4953765869140625,
      "learning_rate": 0.00039922835576315045,
      "loss": 3.0972,
      "step": 90478
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6007280349731445,
      "learning_rate": 0.00039922449543113416,
      "loss": 3.0705,
      "step": 90479
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2272565364837646,
      "learning_rate": 0.00039922063508067006,
      "loss": 2.876,
      "step": 90480
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.301389694213867,
      "learning_rate": 0.000399216774711759,
      "loss": 3.2251,
      "step": 90481
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.270899772644043,
      "learning_rate": 0.0003992129143244017,
      "loss": 3.1537,
      "step": 90482
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.254915475845337,
      "learning_rate": 0.0003992090539185989,
      "loss": 2.9146,
      "step": 90483
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.5358963012695312,
      "learning_rate": 0.0003992051934943512,
      "loss": 3.0667,
      "step": 90484
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.5033419132232666,
      "learning_rate": 0.00039920133305165935,
      "loss": 3.0613,
      "step": 90485
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.4343416690826416,
      "learning_rate": 0.0003991974725905241,
      "loss": 3.0279,
      "step": 90486
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5435419082641602,
      "learning_rate": 0.00039919361211094624,
      "loss": 3.1826,
      "step": 90487
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6277555227279663,
      "learning_rate": 0.0003991897516129264,
      "loss": 2.9299,
      "step": 90488
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.611090660095215,
      "learning_rate": 0.0003991858910964652,
      "loss": 2.8401,
      "step": 90489
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.2338860034942627,
      "learning_rate": 0.0003991820305615637,
      "loss": 3.1653,
      "step": 90490
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.614170789718628,
      "learning_rate": 0.0003991781700082222,
      "loss": 2.8987,
      "step": 90491
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.5126821994781494,
      "learning_rate": 0.0003991743094364416,
      "loss": 3.2168,
      "step": 90492
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.5589351654052734,
      "learning_rate": 0.0003991704488462227,
      "loss": 3.0806,
      "step": 90493
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.252714157104492,
      "learning_rate": 0.00039916658823756614,
      "loss": 2.9966,
      "step": 90494
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7944591045379639,
      "learning_rate": 0.0003991627276104726,
      "loss": 2.81,
      "step": 90495
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.158571720123291,
      "learning_rate": 0.00039915886696494286,
      "loss": 3.226,
      "step": 90496
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.8684897422790527,
      "learning_rate": 0.0003991550063009776,
      "loss": 3.1776,
      "step": 90497
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.115527868270874,
      "learning_rate": 0.00039915114561857757,
      "loss": 2.8975,
      "step": 90498
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8173023462295532,
      "learning_rate": 0.0003991472849177434,
      "loss": 2.7339,
      "step": 90499
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.1536200046539307,
      "learning_rate": 0.00039914342419847607,
      "loss": 3.2822,
      "step": 90500
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.4729530811309814,
      "learning_rate": 0.00039913956346077594,
      "loss": 2.9958,
      "step": 90501
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7226006984710693,
      "learning_rate": 0.0003991357027046439,
      "loss": 3.139,
      "step": 90502
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9042630195617676,
      "learning_rate": 0.00039913184193008077,
      "loss": 2.9648,
      "step": 90503
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7019786834716797,
      "learning_rate": 0.00039912798113708704,
      "loss": 3.0502,
      "step": 90504
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7329171895980835,
      "learning_rate": 0.00039912412032566355,
      "loss": 2.8806,
      "step": 90505
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5281672477722168,
      "learning_rate": 0.00039912025949581106,
      "loss": 3.0107,
      "step": 90506
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5374120473861694,
      "learning_rate": 0.0003991163986475303,
      "loss": 3.0187,
      "step": 90507
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0939390659332275,
      "learning_rate": 0.0003991125377808218,
      "loss": 3.1161,
      "step": 90508
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6674001216888428,
      "learning_rate": 0.00039910867689568654,
      "loss": 2.9459,
      "step": 90509
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.005634307861328,
      "learning_rate": 0.000399104815992125,
      "loss": 3.1054,
      "step": 90510
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4618104696273804,
      "learning_rate": 0.00039910095507013804,
      "loss": 3.0388,
      "step": 90511
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4967552423477173,
      "learning_rate": 0.00039909709412972637,
      "loss": 2.7771,
      "step": 90512
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9171550273895264,
      "learning_rate": 0.00039909323317089063,
      "loss": 2.9311,
      "step": 90513
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8997794389724731,
      "learning_rate": 0.00039908937219363164,
      "loss": 2.9302,
      "step": 90514
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9505537748336792,
      "learning_rate": 0.00039908551119795,
      "loss": 3.1469,
      "step": 90515
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6522200107574463,
      "learning_rate": 0.0003990816501838466,
      "loss": 2.833,
      "step": 90516
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.760628342628479,
      "learning_rate": 0.000399077789151322,
      "loss": 3.0487,
      "step": 90517
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5267184972763062,
      "learning_rate": 0.0003990739281003769,
      "loss": 3.0745,
      "step": 90518
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.946295976638794,
      "learning_rate": 0.0003990700670310122,
      "loss": 2.9986,
      "step": 90519
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.990391492843628,
      "learning_rate": 0.0003990662059432284,
      "loss": 3.0118,
      "step": 90520
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.741196870803833,
      "learning_rate": 0.00039906234483702633,
      "loss": 3.1558,
      "step": 90521
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5739004611968994,
      "learning_rate": 0.00039905848371240676,
      "loss": 2.9433,
      "step": 90522
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8631397485733032,
      "learning_rate": 0.00039905462256937033,
      "loss": 2.998,
      "step": 90523
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6703169345855713,
      "learning_rate": 0.00039905076140791775,
      "loss": 2.7511,
      "step": 90524
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7877167463302612,
      "learning_rate": 0.00039904690022804983,
      "loss": 2.9416,
      "step": 90525
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.3609704971313477,
      "learning_rate": 0.0003990430390297672,
      "loss": 2.9747,
      "step": 90526
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0396132469177246,
      "learning_rate": 0.0003990391778130705,
      "loss": 2.8446,
      "step": 90527
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.369041919708252,
      "learning_rate": 0.00039903531657796074,
      "loss": 2.9113,
      "step": 90528
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0985989570617676,
      "learning_rate": 0.00039903145532443825,
      "loss": 2.9118,
      "step": 90529
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0590498447418213,
      "learning_rate": 0.000399027594052504,
      "loss": 2.8961,
      "step": 90530
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0774552822113037,
      "learning_rate": 0.00039902373276215876,
      "loss": 2.9403,
      "step": 90531
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9499708414077759,
      "learning_rate": 0.000399019871453403,
      "loss": 3.0242,
      "step": 90532
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.607738494873047,
      "learning_rate": 0.0003990160101262376,
      "loss": 3.1052,
      "step": 90533
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6314815282821655,
      "learning_rate": 0.0003990121487806633,
      "loss": 3.1984,
      "step": 90534
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.1985456943511963,
      "learning_rate": 0.00039900828741668075,
      "loss": 2.8739,
      "step": 90535
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.224846601486206,
      "learning_rate": 0.00039900442603429075,
      "loss": 2.828,
      "step": 90536
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.1183550357818604,
      "learning_rate": 0.0003990005646334939,
      "loss": 2.7949,
      "step": 90537
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.59683358669281,
      "learning_rate": 0.000398996703214291,
      "loss": 2.8498,
      "step": 90538
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.892526149749756,
      "learning_rate": 0.0003989928417766827,
      "loss": 2.9076,
      "step": 90539
    },
    {
      "epoch": 1.18,
      "grad_norm": 4.214380741119385,
      "learning_rate": 0.0003989889803206698,
      "loss": 2.793,
      "step": 90540
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.742712140083313,
      "learning_rate": 0.0003989851188462529,
      "loss": 2.7856,
      "step": 90541
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.539389729499817,
      "learning_rate": 0.0003989812573534329,
      "loss": 2.878,
      "step": 90542
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.008070230484009,
      "learning_rate": 0.0003989773958422104,
      "loss": 3.0841,
      "step": 90543
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.606231451034546,
      "learning_rate": 0.0003989735343125861,
      "loss": 2.975,
      "step": 90544
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0598695278167725,
      "learning_rate": 0.0003989696727645608,
      "loss": 2.527,
      "step": 90545
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.6772329807281494,
      "learning_rate": 0.0003989658111981352,
      "loss": 2.9821,
      "step": 90546
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2593908309936523,
      "learning_rate": 0.00039896194961330984,
      "loss": 3.0366,
      "step": 90547
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0598907470703125,
      "learning_rate": 0.00039895808801008564,
      "loss": 2.9696,
      "step": 90548
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.22011399269104,
      "learning_rate": 0.00039895422638846334,
      "loss": 2.747,
      "step": 90549
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7325164079666138,
      "learning_rate": 0.0003989503647484435,
      "loss": 3.2833,
      "step": 90550
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8307812213897705,
      "learning_rate": 0.00039894650309002695,
      "loss": 3.0954,
      "step": 90551
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7727116346359253,
      "learning_rate": 0.00039894264141321443,
      "loss": 2.9476,
      "step": 90552
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9976309537887573,
      "learning_rate": 0.0003989387797180065,
      "loss": 2.9365,
      "step": 90553
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8731518983840942,
      "learning_rate": 0.00039893491800440403,
      "loss": 3.0062,
      "step": 90554
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.419785499572754,
      "learning_rate": 0.00039893105627240773,
      "loss": 3.102,
      "step": 90555
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.457223653793335,
      "learning_rate": 0.00039892719452201827,
      "loss": 3.0439,
      "step": 90556
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7019600868225098,
      "learning_rate": 0.0003989233327532363,
      "loss": 2.9533,
      "step": 90557
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.129314422607422,
      "learning_rate": 0.0003989194709660627,
      "loss": 3.0635,
      "step": 90558
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6861257553100586,
      "learning_rate": 0.00039891560916049815,
      "loss": 3.2175,
      "step": 90559
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5908106565475464,
      "learning_rate": 0.0003989117473365432,
      "loss": 2.8269,
      "step": 90560
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9063048362731934,
      "learning_rate": 0.00039890788549419874,
      "loss": 3.163,
      "step": 90561
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6126148700714111,
      "learning_rate": 0.00039890402363346547,
      "loss": 3.0397,
      "step": 90562
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.79412841796875,
      "learning_rate": 0.00039890016175434396,
      "loss": 2.9335,
      "step": 90563
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8546090126037598,
      "learning_rate": 0.0003988962998568352,
      "loss": 3.0088,
      "step": 90564
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0079739093780518,
      "learning_rate": 0.00039889243794093967,
      "loss": 2.8114,
      "step": 90565
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.3926641941070557,
      "learning_rate": 0.00039888857600665814,
      "loss": 3.1159,
      "step": 90566
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8396188020706177,
      "learning_rate": 0.0003988847140539914,
      "loss": 3.332,
      "step": 90567
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6329900026321411,
      "learning_rate": 0.00039888085208294007,
      "loss": 3.2096,
      "step": 90568
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.1357996463775635,
      "learning_rate": 0.00039887699009350495,
      "loss": 2.7777,
      "step": 90569
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.658291220664978,
      "learning_rate": 0.00039887312808568675,
      "loss": 3.0765,
      "step": 90570
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.4255242347717285,
      "learning_rate": 0.00039886926605948623,
      "loss": 2.7968,
      "step": 90571
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6978460550308228,
      "learning_rate": 0.000398865404014904,
      "loss": 2.9957,
      "step": 90572
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.085526943206787,
      "learning_rate": 0.0003988615419519407,
      "loss": 2.7769,
      "step": 90573
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7051759958267212,
      "learning_rate": 0.0003988576798705973,
      "loss": 2.6591,
      "step": 90574
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.1909942626953125,
      "learning_rate": 0.00039885381777087447,
      "loss": 3.047,
      "step": 90575
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.872727632522583,
      "learning_rate": 0.00039884995565277274,
      "loss": 2.9465,
      "step": 90576
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8693640232086182,
      "learning_rate": 0.00039884609351629286,
      "loss": 3.0015,
      "step": 90577
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6944621801376343,
      "learning_rate": 0.00039884223136143576,
      "loss": 2.8377,
      "step": 90578
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8293722867965698,
      "learning_rate": 0.0003988383691882019,
      "loss": 3.1129,
      "step": 90579
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.680721402168274,
      "learning_rate": 0.00039883450699659226,
      "loss": 3.0168,
      "step": 90580
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8969898223876953,
      "learning_rate": 0.00039883064478660733,
      "loss": 2.891,
      "step": 90581
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2104828357696533,
      "learning_rate": 0.000398826782558248,
      "loss": 2.862,
      "step": 90582
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.685666561126709,
      "learning_rate": 0.00039882292031151483,
      "loss": 3.1721,
      "step": 90583
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5155171155929565,
      "learning_rate": 0.0003988190580464086,
      "loss": 2.9996,
      "step": 90584
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6436008214950562,
      "learning_rate": 0.00039881519576293,
      "loss": 3.1361,
      "step": 90585
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.825323224067688,
      "learning_rate": 0.00039881133346108,
      "loss": 2.923,
      "step": 90586
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.991721510887146,
      "learning_rate": 0.0003988074711408589,
      "loss": 3.0363,
      "step": 90587
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6946754455566406,
      "learning_rate": 0.0003988036088022677,
      "loss": 3.1973,
      "step": 90588
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7789299488067627,
      "learning_rate": 0.0003987997464453071,
      "loss": 3.2933,
      "step": 90589
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5653437376022339,
      "learning_rate": 0.00039879588406997767,
      "loss": 3.1687,
      "step": 90590
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.8725838661193848,
      "learning_rate": 0.00039879202167628026,
      "loss": 3.0029,
      "step": 90591
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4704675674438477,
      "learning_rate": 0.0003987881592642156,
      "loss": 2.8788,
      "step": 90592
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6428415775299072,
      "learning_rate": 0.00039878429683378423,
      "loss": 3.0396,
      "step": 90593
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9228249788284302,
      "learning_rate": 0.0003987804343849871,
      "loss": 2.8118,
      "step": 90594
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8719079494476318,
      "learning_rate": 0.00039877657191782483,
      "loss": 2.8933,
      "step": 90595
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5304781198501587,
      "learning_rate": 0.00039877270943229805,
      "loss": 3.2448,
      "step": 90596
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6773117780685425,
      "learning_rate": 0.0003987688469284076,
      "loss": 3.0859,
      "step": 90597
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.951390027999878,
      "learning_rate": 0.00039876498440615416,
      "loss": 3.0866,
      "step": 90598
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4836336374282837,
      "learning_rate": 0.0003987611218655384,
      "loss": 2.8589,
      "step": 90599
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9738101959228516,
      "learning_rate": 0.00039875725930656116,
      "loss": 3.0224,
      "step": 90600
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5429069995880127,
      "learning_rate": 0.00039875339672922305,
      "loss": 2.8085,
      "step": 90601
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5861008167266846,
      "learning_rate": 0.0003987495341335248,
      "loss": 3.1034,
      "step": 90602
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5285345315933228,
      "learning_rate": 0.00039874567151946714,
      "loss": 2.9096,
      "step": 90603
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7449520826339722,
      "learning_rate": 0.0003987418088870509,
      "loss": 2.7419,
      "step": 90604
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8555493354797363,
      "learning_rate": 0.00039873794623627654,
      "loss": 2.8975,
      "step": 90605
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5766777992248535,
      "learning_rate": 0.00039873408356714505,
      "loss": 3.1434,
      "step": 90606
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9218882322311401,
      "learning_rate": 0.000398730220879657,
      "loss": 3.0409,
      "step": 90607
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.696518898010254,
      "learning_rate": 0.00039872635817381305,
      "loss": 2.92,
      "step": 90608
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7571055889129639,
      "learning_rate": 0.0003987224954496141,
      "loss": 2.7445,
      "step": 90609
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8439525365829468,
      "learning_rate": 0.0003987186327070608,
      "loss": 2.7878,
      "step": 90610
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0111324787139893,
      "learning_rate": 0.00039871476994615375,
      "loss": 2.9491,
      "step": 90611
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0671496391296387,
      "learning_rate": 0.00039871090716689383,
      "loss": 2.7854,
      "step": 90612
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.090193033218384,
      "learning_rate": 0.00039870704436928173,
      "loss": 2.9664,
      "step": 90613
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6841826438903809,
      "learning_rate": 0.00039870318155331804,
      "loss": 3.0822,
      "step": 90614
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9416037797927856,
      "learning_rate": 0.0003986993187190035,
      "loss": 3.1245,
      "step": 90615
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.979246973991394,
      "learning_rate": 0.00039869545586633905,
      "loss": 2.7345,
      "step": 90616
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8691314458847046,
      "learning_rate": 0.0003986915929953252,
      "loss": 2.9894,
      "step": 90617
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.4811654090881348,
      "learning_rate": 0.00039868773010596267,
      "loss": 3.0502,
      "step": 90618
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6398131847381592,
      "learning_rate": 0.0003986838671982523,
      "loss": 3.3412,
      "step": 90619
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.4300432205200195,
      "learning_rate": 0.0003986800042721947,
      "loss": 2.8303,
      "step": 90620
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7892616987228394,
      "learning_rate": 0.00039867614132779055,
      "loss": 2.8911,
      "step": 90621
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5245991945266724,
      "learning_rate": 0.0003986722783650408,
      "loss": 2.9082,
      "step": 90622
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.885959506034851,
      "learning_rate": 0.00039866841538394585,
      "loss": 2.7445,
      "step": 90623
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.815677285194397,
      "learning_rate": 0.00039866455238450664,
      "loss": 2.9066,
      "step": 90624
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.354858636856079,
      "learning_rate": 0.0003986606893667239,
      "loss": 2.9616,
      "step": 90625
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.077120542526245,
      "learning_rate": 0.0003986568263305982,
      "loss": 2.7297,
      "step": 90626
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.809769630432129,
      "learning_rate": 0.00039865296327613035,
      "loss": 3.2358,
      "step": 90627
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6358054876327515,
      "learning_rate": 0.00039864910020332106,
      "loss": 2.9202,
      "step": 90628
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.138005256652832,
      "learning_rate": 0.00039864523711217103,
      "loss": 3.0604,
      "step": 90629
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9258935451507568,
      "learning_rate": 0.00039864137400268096,
      "loss": 3.0841,
      "step": 90630
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.1271448135375977,
      "learning_rate": 0.00039863751087485167,
      "loss": 3.1566,
      "step": 90631
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6653040647506714,
      "learning_rate": 0.0003986336477286838,
      "loss": 3.0356,
      "step": 90632
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6810585260391235,
      "learning_rate": 0.00039862978456417796,
      "loss": 3.2702,
      "step": 90633
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8765466213226318,
      "learning_rate": 0.00039862592138133516,
      "loss": 2.8782,
      "step": 90634
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7370606660842896,
      "learning_rate": 0.0003986220581801558,
      "loss": 3.0239,
      "step": 90635
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7956161499023438,
      "learning_rate": 0.00039861819496064073,
      "loss": 2.9662,
      "step": 90636
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6733757257461548,
      "learning_rate": 0.00039861433172279077,
      "loss": 3.0849,
      "step": 90637
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6847798824310303,
      "learning_rate": 0.00039861046846660647,
      "loss": 2.9712,
      "step": 90638
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6114678382873535,
      "learning_rate": 0.00039860660519208863,
      "loss": 2.9882,
      "step": 90639
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4584486484527588,
      "learning_rate": 0.000398602741899238,
      "loss": 3.0057,
      "step": 90640
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.554761528968811,
      "learning_rate": 0.00039859887858805524,
      "loss": 2.9498,
      "step": 90641
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5876587629318237,
      "learning_rate": 0.0003985950152585411,
      "loss": 3.0371,
      "step": 90642
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9440096616744995,
      "learning_rate": 0.00039859115191069627,
      "loss": 3.115,
      "step": 90643
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.3553097248077393,
      "learning_rate": 0.0003985872885445215,
      "loss": 3.2059,
      "step": 90644
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8056483268737793,
      "learning_rate": 0.0003985834251600174,
      "loss": 3.0184,
      "step": 90645
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8240596055984497,
      "learning_rate": 0.0003985795617571849,
      "loss": 3.3238,
      "step": 90646
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.3432164192199707,
      "learning_rate": 0.0003985756983360246,
      "loss": 2.8785,
      "step": 90647
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.123116970062256,
      "learning_rate": 0.0003985718348965371,
      "loss": 3.1333,
      "step": 90648
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6136502027511597,
      "learning_rate": 0.00039856797143872336,
      "loss": 3.1864,
      "step": 90649
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8308489322662354,
      "learning_rate": 0.00039856410796258393,
      "loss": 3.1465,
      "step": 90650
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8829253911972046,
      "learning_rate": 0.00039856024446811953,
      "loss": 2.9208,
      "step": 90651
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.855146050453186,
      "learning_rate": 0.00039855638095533097,
      "loss": 3.0044,
      "step": 90652
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.97061026096344,
      "learning_rate": 0.0003985525174242189,
      "loss": 2.9281,
      "step": 90653
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.873354196548462,
      "learning_rate": 0.00039854865387478405,
      "loss": 3.0691,
      "step": 90654
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.059875726699829,
      "learning_rate": 0.00039854479030702715,
      "loss": 3.022,
      "step": 90655
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.550478219985962,
      "learning_rate": 0.00039854092672094896,
      "loss": 3.1516,
      "step": 90656
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.3965563774108887,
      "learning_rate": 0.0003985370631165501,
      "loss": 3.0059,
      "step": 90657
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.129251003265381,
      "learning_rate": 0.0003985331994938313,
      "loss": 2.8268,
      "step": 90658
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5955225229263306,
      "learning_rate": 0.0003985293358527934,
      "loss": 3.0964,
      "step": 90659
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4993616342544556,
      "learning_rate": 0.00039852547219343696,
      "loss": 3.0385,
      "step": 90660
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9521644115447998,
      "learning_rate": 0.00039852160851576283,
      "loss": 3.16,
      "step": 90661
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9838956594467163,
      "learning_rate": 0.0003985177448197716,
      "loss": 2.9934,
      "step": 90662
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7481074333190918,
      "learning_rate": 0.00039851388110546417,
      "loss": 3.5396,
      "step": 90663
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7899577617645264,
      "learning_rate": 0.00039851001737284115,
      "loss": 3.4236,
      "step": 90664
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7566739320755005,
      "learning_rate": 0.00039850615362190314,
      "loss": 3.0769,
      "step": 90665
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.735776424407959,
      "learning_rate": 0.00039850228985265106,
      "loss": 2.899,
      "step": 90666
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7108097076416016,
      "learning_rate": 0.00039849842606508556,
      "loss": 2.9106,
      "step": 90667
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7625540494918823,
      "learning_rate": 0.00039849456225920734,
      "loss": 3.0997,
      "step": 90668
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.084685802459717,
      "learning_rate": 0.000398490698435017,
      "loss": 3.0057,
      "step": 90669
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8866649866104126,
      "learning_rate": 0.0003984868345925156,
      "loss": 3.0337,
      "step": 90670
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5401452779769897,
      "learning_rate": 0.0003984829707317035,
      "loss": 3.0468,
      "step": 90671
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5938537120819092,
      "learning_rate": 0.0003984791068525815,
      "loss": 2.9711,
      "step": 90672
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.828709363937378,
      "learning_rate": 0.00039847524295515043,
      "loss": 2.923,
      "step": 90673
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.633056879043579,
      "learning_rate": 0.00039847137903941104,
      "loss": 2.8582,
      "step": 90674
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.787362813949585,
      "learning_rate": 0.00039846751510536386,
      "loss": 2.9829,
      "step": 90675
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.612366795539856,
      "learning_rate": 0.0003984636511530097,
      "loss": 2.9003,
      "step": 90676
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.375368118286133,
      "learning_rate": 0.00039845978718234935,
      "loss": 2.7939,
      "step": 90677
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.3752237558364868,
      "learning_rate": 0.0003984559231933835,
      "loss": 3.016,
      "step": 90678
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.671471118927002,
      "learning_rate": 0.00039845205918611276,
      "loss": 3.0017,
      "step": 90679
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7291676998138428,
      "learning_rate": 0.000398448195160538,
      "loss": 3.0375,
      "step": 90680
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0019893646240234,
      "learning_rate": 0.0003984443311166598,
      "loss": 2.7589,
      "step": 90681
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7369003295898438,
      "learning_rate": 0.0003984404670544789,
      "loss": 3.182,
      "step": 90682
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6160489320755005,
      "learning_rate": 0.0003984366029739961,
      "loss": 3.0617,
      "step": 90683
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9855473041534424,
      "learning_rate": 0.0003984327388752121,
      "loss": 3.0112,
      "step": 90684
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6725035905838013,
      "learning_rate": 0.0003984288747581276,
      "loss": 3.0174,
      "step": 90685
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8546851873397827,
      "learning_rate": 0.0003984250106227433,
      "loss": 2.827,
      "step": 90686
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.3583595752716064,
      "learning_rate": 0.00039842114646905985,
      "loss": 3.2695,
      "step": 90687
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.907571792602539,
      "learning_rate": 0.00039841728229707816,
      "loss": 3.0702,
      "step": 90688
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8647091388702393,
      "learning_rate": 0.0003984134181067989,
      "loss": 3.1512,
      "step": 90689
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.067664384841919,
      "learning_rate": 0.0003984095538982226,
      "loss": 3.0286,
      "step": 90690
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8433048725128174,
      "learning_rate": 0.0003984056896713501,
      "loss": 2.7214,
      "step": 90691
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2425200939178467,
      "learning_rate": 0.0003984018254261822,
      "loss": 3.0633,
      "step": 90692
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0717291831970215,
      "learning_rate": 0.0003983979611627195,
      "loss": 2.9315,
      "step": 90693
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.857591152191162,
      "learning_rate": 0.0003983940968809627,
      "loss": 3.1027,
      "step": 90694
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2167165279388428,
      "learning_rate": 0.0003983902325809128,
      "loss": 2.8026,
      "step": 90695
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.4059853553771973,
      "learning_rate": 0.00039838636826257005,
      "loss": 3.203,
      "step": 90696
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.5760302543640137,
      "learning_rate": 0.0003983825039259355,
      "loss": 2.8375,
      "step": 90697
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.168450117111206,
      "learning_rate": 0.0003983786395710099,
      "loss": 3.1251,
      "step": 90698
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2281229496002197,
      "learning_rate": 0.0003983747751977937,
      "loss": 3.1606,
      "step": 90699
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.516607642173767,
      "learning_rate": 0.00039837091080628786,
      "loss": 2.8096,
      "step": 90700
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9997406005859375,
      "learning_rate": 0.00039836704639649304,
      "loss": 3.0281,
      "step": 90701
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.498248815536499,
      "learning_rate": 0.00039836318196840984,
      "loss": 3.0605,
      "step": 90702
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5432571172714233,
      "learning_rate": 0.00039835931752203903,
      "loss": 2.9312,
      "step": 90703
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.471369981765747,
      "learning_rate": 0.0003983554530573815,
      "loss": 2.8776,
      "step": 90704
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.609702706336975,
      "learning_rate": 0.00039835158857443774,
      "loss": 3.0404,
      "step": 90705
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7287421226501465,
      "learning_rate": 0.0003983477240732086,
      "loss": 2.9799,
      "step": 90706
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0507171154022217,
      "learning_rate": 0.00039834385955369477,
      "loss": 3.1183,
      "step": 90707
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.4623639583587646,
      "learning_rate": 0.00039833999501589693,
      "loss": 2.9215,
      "step": 90708
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.70499587059021,
      "learning_rate": 0.0003983361304598158,
      "loss": 2.9108,
      "step": 90709
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.23020339012146,
      "learning_rate": 0.0003983322658854522,
      "loss": 2.7917,
      "step": 90710
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.5351643562316895,
      "learning_rate": 0.00039832840129280675,
      "loss": 2.9383,
      "step": 90711
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.838871955871582,
      "learning_rate": 0.0003983245366818802,
      "loss": 3.1472,
      "step": 90712
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5259701013565063,
      "learning_rate": 0.0003983206720526733,
      "loss": 3.0246,
      "step": 90713
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.7542147636413574,
      "learning_rate": 0.0003983168074051866,
      "loss": 3.0463,
      "step": 90714
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2126989364624023,
      "learning_rate": 0.0003983129427394211,
      "loss": 2.9518,
      "step": 90715
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.366116523742676,
      "learning_rate": 0.0003983090780553772,
      "loss": 2.9621,
      "step": 90716
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8055423498153687,
      "learning_rate": 0.000398305213353056,
      "loss": 2.7747,
      "step": 90717
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.470726251602173,
      "learning_rate": 0.00039830134863245786,
      "loss": 2.9404,
      "step": 90718
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.281294107437134,
      "learning_rate": 0.00039829748389358366,
      "loss": 2.9752,
      "step": 90719
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.812922477722168,
      "learning_rate": 0.00039829361913643415,
      "loss": 3.2345,
      "step": 90720
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9973570108413696,
      "learning_rate": 0.00039828975436100993,
      "loss": 2.8871,
      "step": 90721
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.1067800521850586,
      "learning_rate": 0.00039828588956731175,
      "loss": 3.1238,
      "step": 90722
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.925783157348633,
      "learning_rate": 0.00039828202475534054,
      "loss": 3.0674,
      "step": 90723
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5696226358413696,
      "learning_rate": 0.0003982781599250968,
      "loss": 2.9311,
      "step": 90724
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4456418752670288,
      "learning_rate": 0.00039827429507658117,
      "loss": 3.1225,
      "step": 90725
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.1811530590057373,
      "learning_rate": 0.0003982704302097946,
      "loss": 3.0382,
      "step": 90726
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.656090497970581,
      "learning_rate": 0.00039826656532473764,
      "loss": 2.8096,
      "step": 90727
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9705740213394165,
      "learning_rate": 0.00039826270042141106,
      "loss": 2.9585,
      "step": 90728
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8458620309829712,
      "learning_rate": 0.00039825883549981573,
      "loss": 2.9868,
      "step": 90729
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.387807846069336,
      "learning_rate": 0.0003982549705599521,
      "loss": 3.1281,
      "step": 90730
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.3707773685455322,
      "learning_rate": 0.00039825110560182104,
      "loss": 3.2494,
      "step": 90731
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5894083976745605,
      "learning_rate": 0.0003982472406254233,
      "loss": 3.0322,
      "step": 90732
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.7558977603912354,
      "learning_rate": 0.00039824337563075946,
      "loss": 3.0189,
      "step": 90733
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0039727687835693,
      "learning_rate": 0.00039823951061783035,
      "loss": 2.8893,
      "step": 90734
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5089181661605835,
      "learning_rate": 0.0003982356455866367,
      "loss": 2.8907,
      "step": 90735
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8357435464859009,
      "learning_rate": 0.0003982317805371792,
      "loss": 3.0855,
      "step": 90736
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2775375843048096,
      "learning_rate": 0.0003982279154694584,
      "loss": 2.9715,
      "step": 90737
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7939125299453735,
      "learning_rate": 0.00039822405038347536,
      "loss": 2.9273,
      "step": 90738
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4984941482543945,
      "learning_rate": 0.00039822018527923056,
      "loss": 2.9363,
      "step": 90739
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4931966066360474,
      "learning_rate": 0.0003982163201567247,
      "loss": 3.0599,
      "step": 90740
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9690428972244263,
      "learning_rate": 0.00039821245501595867,
      "loss": 2.8589,
      "step": 90741
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2178218364715576,
      "learning_rate": 0.00039820858985693304,
      "loss": 3.0307,
      "step": 90742
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.132972240447998,
      "learning_rate": 0.0003982047246796486,
      "loss": 2.9082,
      "step": 90743
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.750005841255188,
      "learning_rate": 0.0003982008594841061,
      "loss": 3.0249,
      "step": 90744
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5936022996902466,
      "learning_rate": 0.0003981969942703061,
      "loss": 3.0831,
      "step": 90745
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6654363870620728,
      "learning_rate": 0.00039819312903824946,
      "loss": 3.0818,
      "step": 90746
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0126044750213623,
      "learning_rate": 0.0003981892637879368,
      "loss": 3.1685,
      "step": 90747
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6422319412231445,
      "learning_rate": 0.0003981853985193691,
      "loss": 2.9605,
      "step": 90748
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.183696985244751,
      "learning_rate": 0.0003981815332325467,
      "loss": 2.9904,
      "step": 90749
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9786810874938965,
      "learning_rate": 0.00039817766792747054,
      "loss": 3.0548,
      "step": 90750
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6646453142166138,
      "learning_rate": 0.00039817380260414133,
      "loss": 3.037,
      "step": 90751
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.27492094039917,
      "learning_rate": 0.00039816993726255975,
      "loss": 2.7688,
      "step": 90752
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.434995651245117,
      "learning_rate": 0.0003981660719027265,
      "loss": 2.8745,
      "step": 90753
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.054833173751831,
      "learning_rate": 0.0003981622065246423,
      "loss": 3.0448,
      "step": 90754
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.898568034172058,
      "learning_rate": 0.000398158341128308,
      "loss": 2.9686,
      "step": 90755
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.688336730003357,
      "learning_rate": 0.0003981544757137241,
      "loss": 3.0084,
      "step": 90756
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.642150640487671,
      "learning_rate": 0.0003981506102808915,
      "loss": 3.2066,
      "step": 90757
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.563777208328247,
      "learning_rate": 0.0003981467448298107,
      "loss": 3.1849,
      "step": 90758
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.750299096107483,
      "learning_rate": 0.0003981428793604828,
      "loss": 3.3225,
      "step": 90759
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9390413761138916,
      "learning_rate": 0.0003981390138729082,
      "loss": 2.9978,
      "step": 90760
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.48470938205719,
      "learning_rate": 0.00039813514836708764,
      "loss": 3.2994,
      "step": 90761
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7335422039031982,
      "learning_rate": 0.00039813128284302203,
      "loss": 3.0887,
      "step": 90762
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7315239906311035,
      "learning_rate": 0.00039812741730071185,
      "loss": 2.9174,
      "step": 90763
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.440894365310669,
      "learning_rate": 0.0003981235517401579,
      "loss": 3.3863,
      "step": 90764
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5026051998138428,
      "learning_rate": 0.0003981196861613611,
      "loss": 3.3145,
      "step": 90765
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.060487747192383,
      "learning_rate": 0.00039811582056432183,
      "loss": 2.8906,
      "step": 90766
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.744735836982727,
      "learning_rate": 0.000398111954949041,
      "loss": 3.093,
      "step": 90767
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4516551494598389,
      "learning_rate": 0.0003981080893155194,
      "loss": 2.9382,
      "step": 90768
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.462651014328003,
      "learning_rate": 0.0003981042236637576,
      "loss": 2.7762,
      "step": 90769
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.764496922492981,
      "learning_rate": 0.0003981003579937563,
      "loss": 2.9883,
      "step": 90770
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.1588363647460938,
      "learning_rate": 0.0003980964923055164,
      "loss": 2.9533,
      "step": 90771
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8594328165054321,
      "learning_rate": 0.0003980926265990385,
      "loss": 3.0259,
      "step": 90772
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9448765516281128,
      "learning_rate": 0.0003980887608743233,
      "loss": 3.0864,
      "step": 90773
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5813069343566895,
      "learning_rate": 0.0003980848951313715,
      "loss": 3.082,
      "step": 90774
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.781876802444458,
      "learning_rate": 0.00039808102937018396,
      "loss": 2.829,
      "step": 90775
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4348535537719727,
      "learning_rate": 0.0003980771635907612,
      "loss": 2.9551,
      "step": 90776
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8358800411224365,
      "learning_rate": 0.0003980732977931042,
      "loss": 2.9443,
      "step": 90777
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7200055122375488,
      "learning_rate": 0.00039806943197721336,
      "loss": 3.0225,
      "step": 90778
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8404083251953125,
      "learning_rate": 0.0003980655661430896,
      "loss": 2.8886,
      "step": 90779
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6429941654205322,
      "learning_rate": 0.0003980617002907336,
      "loss": 3.0993,
      "step": 90780
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.194361686706543,
      "learning_rate": 0.0003980578344201462,
      "loss": 3.1591,
      "step": 90781
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7938158512115479,
      "learning_rate": 0.00039805396853132783,
      "loss": 2.8058,
      "step": 90782
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7978661060333252,
      "learning_rate": 0.0003980501026242795,
      "loss": 3.0285,
      "step": 90783
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6789108514785767,
      "learning_rate": 0.00039804623669900175,
      "loss": 2.9929,
      "step": 90784
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8328511714935303,
      "learning_rate": 0.00039804237075549535,
      "loss": 2.9553,
      "step": 90785
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5798386335372925,
      "learning_rate": 0.000398038504793761,
      "loss": 3.0634,
      "step": 90786
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7458659410476685,
      "learning_rate": 0.0003980346388137995,
      "loss": 3.0864,
      "step": 90787
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7408995628356934,
      "learning_rate": 0.0003980307728156115,
      "loss": 2.9931,
      "step": 90788
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8729069232940674,
      "learning_rate": 0.0003980269067991977,
      "loss": 2.9217,
      "step": 90789
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7779419422149658,
      "learning_rate": 0.0003980230407645589,
      "loss": 2.8855,
      "step": 90790
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.937848687171936,
      "learning_rate": 0.0003980191747116957,
      "loss": 2.8793,
      "step": 90791
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.504770040512085,
      "learning_rate": 0.0003980153086406089,
      "loss": 3.0329,
      "step": 90792
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.136237382888794,
      "learning_rate": 0.00039801144255129924,
      "loss": 2.9624,
      "step": 90793
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7815015316009521,
      "learning_rate": 0.0003980075764437674,
      "loss": 3.1286,
      "step": 90794
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.4587459564208984,
      "learning_rate": 0.000398003710318014,
      "loss": 2.9417,
      "step": 90795
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.324674367904663,
      "learning_rate": 0.00039799984417404,
      "loss": 3.1053,
      "step": 90796
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.16687273979187,
      "learning_rate": 0.0003979959780118459,
      "loss": 2.9395,
      "step": 90797
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.878517508506775,
      "learning_rate": 0.00039799211183143247,
      "loss": 3.2795,
      "step": 90798
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.157841920852661,
      "learning_rate": 0.0003979882456328006,
      "loss": 3.1608,
      "step": 90799
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.747853398323059,
      "learning_rate": 0.00039798437941595074,
      "loss": 3.0613,
      "step": 90800
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.4632678031921387,
      "learning_rate": 0.00039798051318088373,
      "loss": 3.0968,
      "step": 90801
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8419378995895386,
      "learning_rate": 0.0003979766469276004,
      "loss": 3.253,
      "step": 90802
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.300180673599243,
      "learning_rate": 0.0003979727806561012,
      "loss": 3.1775,
      "step": 90803
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.019444227218628,
      "learning_rate": 0.0003979689143663871,
      "loss": 3.0138,
      "step": 90804
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.3001906871795654,
      "learning_rate": 0.0003979650480584588,
      "loss": 2.8205,
      "step": 90805
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.010531425476074,
      "learning_rate": 0.0003979611817323168,
      "loss": 3.045,
      "step": 90806
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6958757638931274,
      "learning_rate": 0.0003979573153879621,
      "loss": 2.9326,
      "step": 90807
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.7395944595336914,
      "learning_rate": 0.00039795344902539525,
      "loss": 2.9787,
      "step": 90808
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.826340913772583,
      "learning_rate": 0.000397949582644617,
      "loss": 3.0588,
      "step": 90809
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9748870134353638,
      "learning_rate": 0.000397945716245628,
      "loss": 3.1215,
      "step": 90810
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.733402729034424,
      "learning_rate": 0.00039794184982842916,
      "loss": 2.9807,
      "step": 90811
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.978906512260437,
      "learning_rate": 0.00039793798339302094,
      "loss": 3.1657,
      "step": 90812
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.415611982345581,
      "learning_rate": 0.0003979341169394043,
      "loss": 3.0756,
      "step": 90813
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.82624351978302,
      "learning_rate": 0.0003979302504675799,
      "loss": 3.1257,
      "step": 90814
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6878684759140015,
      "learning_rate": 0.0003979263839775483,
      "loss": 3.0484,
      "step": 90815
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.89528226852417,
      "learning_rate": 0.0003979225174693104,
      "loss": 3.0428,
      "step": 90816
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.1176178455352783,
      "learning_rate": 0.0003979186509428669,
      "loss": 2.9221,
      "step": 90817
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.1228861808776855,
      "learning_rate": 0.0003979147843982184,
      "loss": 2.9526,
      "step": 90818
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.243374824523926,
      "learning_rate": 0.00039791091783536567,
      "loss": 3.2226,
      "step": 90819
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.7257473468780518,
      "learning_rate": 0.0003979070512543096,
      "loss": 2.8918,
      "step": 90820
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.178410053253174,
      "learning_rate": 0.0003979031846550506,
      "loss": 3.4116,
      "step": 90821
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7010303735733032,
      "learning_rate": 0.00039789931803758954,
      "loss": 3.1566,
      "step": 90822
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.7259509563446045,
      "learning_rate": 0.00039789545140192734,
      "loss": 3.1846,
      "step": 90823
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.283494234085083,
      "learning_rate": 0.00039789158474806436,
      "loss": 2.9015,
      "step": 90824
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.555513381958008,
      "learning_rate": 0.0003978877180760015,
      "loss": 3.0749,
      "step": 90825
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.748862385749817,
      "learning_rate": 0.00039788385138573955,
      "loss": 3.0139,
      "step": 90826
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.156107187271118,
      "learning_rate": 0.0003978799846772791,
      "loss": 3.2176,
      "step": 90827
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.921351432800293,
      "learning_rate": 0.00039787611795062087,
      "loss": 2.9179,
      "step": 90828
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.2363126277923584,
      "learning_rate": 0.00039787225120576573,
      "loss": 3.0307,
      "step": 90829
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.823823928833008,
      "learning_rate": 0.00039786838444271416,
      "loss": 2.9368,
      "step": 90830
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.8825488090515137,
      "learning_rate": 0.0003978645176614671,
      "loss": 3.0121,
      "step": 90831
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.446504592895508,
      "learning_rate": 0.0003978606508620252,
      "loss": 2.9852,
      "step": 90832
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.5467586517333984,
      "learning_rate": 0.00039785678404438907,
      "loss": 2.8496,
      "step": 90833
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0207269191741943,
      "learning_rate": 0.00039785291720855956,
      "loss": 2.9942,
      "step": 90834
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9418288469314575,
      "learning_rate": 0.00039784905035453725,
      "loss": 2.8616,
      "step": 90835
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2111735343933105,
      "learning_rate": 0.0003978451834823231,
      "loss": 3.02,
      "step": 90836
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.3137779235839844,
      "learning_rate": 0.00039784131659191763,
      "loss": 3.0156,
      "step": 90837
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6116890907287598,
      "learning_rate": 0.0003978374496833216,
      "loss": 2.795,
      "step": 90838
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.3503201007843018,
      "learning_rate": 0.0003978335827565357,
      "loss": 2.9532,
      "step": 90839
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2635374069213867,
      "learning_rate": 0.0003978297158115608,
      "loss": 2.8516,
      "step": 90840
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.7751288414001465,
      "learning_rate": 0.0003978258488483975,
      "loss": 2.9413,
      "step": 90841
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2766010761260986,
      "learning_rate": 0.0003978219818670464,
      "loss": 2.9322,
      "step": 90842
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.547885775566101,
      "learning_rate": 0.0003978181148675085,
      "loss": 3.0125,
      "step": 90843
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.6235604286193848,
      "learning_rate": 0.00039781424784978423,
      "loss": 3.0052,
      "step": 90844
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9787178039550781,
      "learning_rate": 0.0003978103808138745,
      "loss": 3.1634,
      "step": 90845
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8957972526550293,
      "learning_rate": 0.00039780651375977995,
      "loss": 3.0469,
      "step": 90846
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9726663827896118,
      "learning_rate": 0.0003978026466875014,
      "loss": 3.0245,
      "step": 90847
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7579745054244995,
      "learning_rate": 0.0003977987795970394,
      "loss": 2.9103,
      "step": 90848
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7637178897857666,
      "learning_rate": 0.0003977949124883948,
      "loss": 3.0237,
      "step": 90849
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6376367807388306,
      "learning_rate": 0.0003977910453615684,
      "loss": 3.2224,
      "step": 90850
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.1440038681030273,
      "learning_rate": 0.00039778717821656063,
      "loss": 2.8607,
      "step": 90851
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4467623233795166,
      "learning_rate": 0.0003977833110533724,
      "loss": 3.1174,
      "step": 90852
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.594019889831543,
      "learning_rate": 0.0003977794438720044,
      "loss": 3.0715,
      "step": 90853
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.631988286972046,
      "learning_rate": 0.00039777557667245746,
      "loss": 3.0146,
      "step": 90854
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5693901777267456,
      "learning_rate": 0.0003977717094547321,
      "loss": 3.25,
      "step": 90855
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6060153245925903,
      "learning_rate": 0.0003977678422188291,
      "loss": 2.9911,
      "step": 90856
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7639528512954712,
      "learning_rate": 0.0003977639749647493,
      "loss": 3.0513,
      "step": 90857
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8268340826034546,
      "learning_rate": 0.0003977601076924933,
      "loss": 3.0875,
      "step": 90858
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9815983772277832,
      "learning_rate": 0.0003977562404020619,
      "loss": 2.9381,
      "step": 90859
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8252521753311157,
      "learning_rate": 0.00039775237309345573,
      "loss": 2.9647,
      "step": 90860
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7319138050079346,
      "learning_rate": 0.0003977485057666755,
      "loss": 2.8872,
      "step": 90861
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.3435804843902588,
      "learning_rate": 0.000397744638421722,
      "loss": 3.0236,
      "step": 90862
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9079687595367432,
      "learning_rate": 0.00039774077105859604,
      "loss": 3.0279,
      "step": 90863
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9866149425506592,
      "learning_rate": 0.0003977369036772982,
      "loss": 2.9603,
      "step": 90864
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.791602373123169,
      "learning_rate": 0.0003977330362778291,
      "loss": 3.2853,
      "step": 90865
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8243283033370972,
      "learning_rate": 0.0003977291688601896,
      "loss": 2.9889,
      "step": 90866
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6108015775680542,
      "learning_rate": 0.0003977253014243805,
      "loss": 2.834,
      "step": 90867
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7743358612060547,
      "learning_rate": 0.0003977214339704023,
      "loss": 2.9822,
      "step": 90868
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5260449647903442,
      "learning_rate": 0.00039771756649825603,
      "loss": 2.7009,
      "step": 90869
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7639638185501099,
      "learning_rate": 0.00039771369900794203,
      "loss": 2.9068,
      "step": 90870
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2014319896698,
      "learning_rate": 0.0003977098314994613,
      "loss": 3.1028,
      "step": 90871
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.3416097164154053,
      "learning_rate": 0.0003977059639728144,
      "loss": 3.138,
      "step": 90872
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0496764183044434,
      "learning_rate": 0.0003977020964280022,
      "loss": 2.8328,
      "step": 90873
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0054140090942383,
      "learning_rate": 0.0003976982288650252,
      "loss": 2.7759,
      "step": 90874
    },
    {
      "epoch": 1.18,
      "grad_norm": 4.279384136199951,
      "learning_rate": 0.0003976943612838845,
      "loss": 2.8887,
      "step": 90875
    },
    {
      "epoch": 1.18,
      "grad_norm": 4.936408519744873,
      "learning_rate": 0.0003976904936845803,
      "loss": 2.9565,
      "step": 90876
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.98076331615448,
      "learning_rate": 0.0003976866260671138,
      "loss": 2.8918,
      "step": 90877
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5057789087295532,
      "learning_rate": 0.0003976827584314854,
      "loss": 2.8945,
      "step": 90878
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.4201204776763916,
      "learning_rate": 0.000397678890777696,
      "loss": 3.0048,
      "step": 90879
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.976958751678467,
      "learning_rate": 0.0003976750231057462,
      "loss": 2.8862,
      "step": 90880
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8706344366073608,
      "learning_rate": 0.00039767115541563684,
      "loss": 3.105,
      "step": 90881
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6033986806869507,
      "learning_rate": 0.0003976672877073685,
      "loss": 3.0375,
      "step": 90882
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0306320190429688,
      "learning_rate": 0.00039766341998094193,
      "loss": 2.9457,
      "step": 90883
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6076443195343018,
      "learning_rate": 0.00039765955223635796,
      "loss": 3.0964,
      "step": 90884
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6149135828018188,
      "learning_rate": 0.00039765568447361723,
      "loss": 2.9961,
      "step": 90885
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8699733018875122,
      "learning_rate": 0.0003976518166927204,
      "loss": 3.2005,
      "step": 90886
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.865370750427246,
      "learning_rate": 0.00039764794889366833,
      "loss": 2.9892,
      "step": 90887
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0576069355010986,
      "learning_rate": 0.00039764408107646163,
      "loss": 3.0164,
      "step": 90888
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8613488674163818,
      "learning_rate": 0.00039764021324110104,
      "loss": 3.0088,
      "step": 90889
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8359605073928833,
      "learning_rate": 0.0003976363453875874,
      "loss": 3.1264,
      "step": 90890
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8317428827285767,
      "learning_rate": 0.00039763247751592115,
      "loss": 2.8729,
      "step": 90891
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6675498485565186,
      "learning_rate": 0.00039762860962610326,
      "loss": 3.0526,
      "step": 90892
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.3217885494232178,
      "learning_rate": 0.0003976247417181344,
      "loss": 2.7845,
      "step": 90893
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.773531436920166,
      "learning_rate": 0.0003976208737920152,
      "loss": 2.9301,
      "step": 90894
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8617596626281738,
      "learning_rate": 0.00039761700584774645,
      "loss": 2.8281,
      "step": 90895
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.442898988723755,
      "learning_rate": 0.0003976131378853289,
      "loss": 2.7823,
      "step": 90896
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2411272525787354,
      "learning_rate": 0.0003976092699047632,
      "loss": 3.1074,
      "step": 90897
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.551343321800232,
      "learning_rate": 0.0003976054019060501,
      "loss": 2.9985,
      "step": 90898
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.86478590965271,
      "learning_rate": 0.0003976015338891903,
      "loss": 2.8493,
      "step": 90899
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.186183214187622,
      "learning_rate": 0.00039759766585418453,
      "loss": 3.0315,
      "step": 90900
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.3678479194641113,
      "learning_rate": 0.0003975937978010335,
      "loss": 3.0548,
      "step": 90901
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.673081874847412,
      "learning_rate": 0.000397589929729738,
      "loss": 3.0994,
      "step": 90902
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6422348022460938,
      "learning_rate": 0.00039758606164029856,
      "loss": 3.2091,
      "step": 90903
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7138193845748901,
      "learning_rate": 0.00039758219353271617,
      "loss": 3.12,
      "step": 90904
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6723874807357788,
      "learning_rate": 0.00039757832540699133,
      "loss": 3.1037,
      "step": 90905
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7447530031204224,
      "learning_rate": 0.00039757445726312486,
      "loss": 2.9653,
      "step": 90906
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6276754140853882,
      "learning_rate": 0.0003975705891011174,
      "loss": 3.0782,
      "step": 90907
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5137670040130615,
      "learning_rate": 0.0003975667209209699,
      "loss": 3.1633,
      "step": 90908
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5528614521026611,
      "learning_rate": 0.00039756285272268273,
      "loss": 2.9023,
      "step": 90909
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.822788119316101,
      "learning_rate": 0.00039755898450625685,
      "loss": 3.1888,
      "step": 90910
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8020612001419067,
      "learning_rate": 0.0003975551162716929,
      "loss": 3.0224,
      "step": 90911
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.134490966796875,
      "learning_rate": 0.00039755124801899163,
      "loss": 3.1852,
      "step": 90912
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.72833251953125,
      "learning_rate": 0.0003975473797481537,
      "loss": 3.0503,
      "step": 90913
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.1608223915100098,
      "learning_rate": 0.00039754351145917984,
      "loss": 2.9939,
      "step": 90914
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4332826137542725,
      "learning_rate": 0.00039753964315207096,
      "loss": 2.99,
      "step": 90915
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5236537456512451,
      "learning_rate": 0.0003975357748268275,
      "loss": 2.869,
      "step": 90916
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7894798517227173,
      "learning_rate": 0.00039753190648345027,
      "loss": 3.1665,
      "step": 90917
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8141194581985474,
      "learning_rate": 0.0003975280381219401,
      "loss": 3.077,
      "step": 90918
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.1047863960266113,
      "learning_rate": 0.0003975241697422976,
      "loss": 2.7206,
      "step": 90919
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7019288539886475,
      "learning_rate": 0.0003975203013445235,
      "loss": 3.0985,
      "step": 90920
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6806594133377075,
      "learning_rate": 0.00039751643292861857,
      "loss": 3.1174,
      "step": 90921
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7385282516479492,
      "learning_rate": 0.00039751256449458344,
      "loss": 3.03,
      "step": 90922
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7515754699707031,
      "learning_rate": 0.00039750869604241883,
      "loss": 3.1659,
      "step": 90923
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7642500400543213,
      "learning_rate": 0.00039750482757212565,
      "loss": 2.991,
      "step": 90924
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6023975610733032,
      "learning_rate": 0.00039750095908370445,
      "loss": 3.1668,
      "step": 90925
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7413736581802368,
      "learning_rate": 0.00039749709057715593,
      "loss": 2.9949,
      "step": 90926
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.612167239189148,
      "learning_rate": 0.00039749322205248085,
      "loss": 2.8524,
      "step": 90927
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9048149585723877,
      "learning_rate": 0.00039748935350968,
      "loss": 2.6472,
      "step": 90928
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8702127933502197,
      "learning_rate": 0.000397485484948754,
      "loss": 3.1462,
      "step": 90929
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.409540057182312,
      "learning_rate": 0.00039748161636970363,
      "loss": 2.8395,
      "step": 90930
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8874831199645996,
      "learning_rate": 0.00039747774777252964,
      "loss": 3.0488,
      "step": 90931
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8815473318099976,
      "learning_rate": 0.0003974738791572326,
      "loss": 2.7993,
      "step": 90932
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.979170322418213,
      "learning_rate": 0.0003974700105238134,
      "loss": 2.9523,
      "step": 90933
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6888688802719116,
      "learning_rate": 0.0003974661418722725,
      "loss": 3.0791,
      "step": 90934
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8774089813232422,
      "learning_rate": 0.00039746227320261104,
      "loss": 2.9333,
      "step": 90935
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7390445470809937,
      "learning_rate": 0.00039745840451482935,
      "loss": 3.1162,
      "step": 90936
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8678514957427979,
      "learning_rate": 0.0003974545358089284,
      "loss": 3.208,
      "step": 90937
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.817858099937439,
      "learning_rate": 0.0003974506670849087,
      "loss": 2.8626,
      "step": 90938
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0272912979125977,
      "learning_rate": 0.0003974467983427713,
      "loss": 2.9444,
      "step": 90939
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.1911861896514893,
      "learning_rate": 0.00039744292958251656,
      "loss": 3.132,
      "step": 90940
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0944063663482666,
      "learning_rate": 0.0003974390608041453,
      "loss": 2.9638,
      "step": 90941
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.2347140312194824,
      "learning_rate": 0.0003974351920076584,
      "loss": 2.8194,
      "step": 90942
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0299158096313477,
      "learning_rate": 0.00039743132319305626,
      "loss": 3.0363,
      "step": 90943
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6984997987747192,
      "learning_rate": 0.00039742745436033994,
      "loss": 3.1278,
      "step": 90944
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.6543266773223877,
      "learning_rate": 0.00039742358550951006,
      "loss": 3.0455,
      "step": 90945
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.003218412399292,
      "learning_rate": 0.0003974197166405672,
      "loss": 2.9181,
      "step": 90946
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5910499095916748,
      "learning_rate": 0.00039741584775351217,
      "loss": 3.0377,
      "step": 90947
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.5259344577789307,
      "learning_rate": 0.0003974119788483458,
      "loss": 3.114,
      "step": 90948
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9137917757034302,
      "learning_rate": 0.0003974081099250686,
      "loss": 2.823,
      "step": 90949
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.733734369277954,
      "learning_rate": 0.0003974042409836815,
      "loss": 3.1017,
      "step": 90950
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.087726354598999,
      "learning_rate": 0.000397400372024185,
      "loss": 3.1491,
      "step": 90951
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.478205680847168,
      "learning_rate": 0.00039739650304658,
      "loss": 3.0296,
      "step": 90952
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.5950875282287598,
      "learning_rate": 0.00039739263405086714,
      "loss": 3.4161,
      "step": 90953
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0132312774658203,
      "learning_rate": 0.00039738876503704715,
      "loss": 3.1416,
      "step": 90954
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.211707830429077,
      "learning_rate": 0.0003973848960051207,
      "loss": 3.0519,
      "step": 90955
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.254528760910034,
      "learning_rate": 0.0003973810269550886,
      "loss": 2.9839,
      "step": 90956
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9161118268966675,
      "learning_rate": 0.00039737715788695153,
      "loss": 2.9642,
      "step": 90957
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.166729211807251,
      "learning_rate": 0.0003973732888007102,
      "loss": 3.0998,
      "step": 90958
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.801074743270874,
      "learning_rate": 0.00039736941969636534,
      "loss": 3.013,
      "step": 90959
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.226667881011963,
      "learning_rate": 0.00039736555057391773,
      "loss": 3.0507,
      "step": 90960
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.644733190536499,
      "learning_rate": 0.0003973616814333679,
      "loss": 3.0259,
      "step": 90961
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.348702907562256,
      "learning_rate": 0.00039735781227471676,
      "loss": 3.0848,
      "step": 90962
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.7732951641082764,
      "learning_rate": 0.00039735394309796504,
      "loss": 2.9071,
      "step": 90963
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.570926547050476,
      "learning_rate": 0.00039735007390311327,
      "loss": 3.2462,
      "step": 90964
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.1944212913513184,
      "learning_rate": 0.0003973462046901623,
      "loss": 2.744,
      "step": 90965
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.1508629322052,
      "learning_rate": 0.00039734233545911287,
      "loss": 3.0011,
      "step": 90966
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.597915768623352,
      "learning_rate": 0.00039733846620996566,
      "loss": 3.1538,
      "step": 90967
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4909210205078125,
      "learning_rate": 0.0003973345969427214,
      "loss": 3.0099,
      "step": 90968
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5743502378463745,
      "learning_rate": 0.0003973307276573808,
      "loss": 2.8882,
      "step": 90969
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8614754676818848,
      "learning_rate": 0.00039732685835394455,
      "loss": 3.1654,
      "step": 90970
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8275576829910278,
      "learning_rate": 0.00039732298903241334,
      "loss": 2.9682,
      "step": 90971
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.014310121536255,
      "learning_rate": 0.0003973191196927881,
      "loss": 2.9843,
      "step": 90972
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8476204872131348,
      "learning_rate": 0.00039731525033506927,
      "loss": 2.9323,
      "step": 90973
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9298272132873535,
      "learning_rate": 0.0003973113809592577,
      "loss": 3.0371,
      "step": 90974
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.571365237236023,
      "learning_rate": 0.0003973075115653542,
      "loss": 3.0862,
      "step": 90975
    },
    {
      "epoch": 1.18,
      "grad_norm": 3.6693663597106934,
      "learning_rate": 0.00039730364215335935,
      "loss": 3.0678,
      "step": 90976
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.962545156478882,
      "learning_rate": 0.0003972997727232739,
      "loss": 2.9122,
      "step": 90977
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8270074129104614,
      "learning_rate": 0.00039729590327509874,
      "loss": 2.8889,
      "step": 90978
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.032299518585205,
      "learning_rate": 0.00039729203380883425,
      "loss": 2.9881,
      "step": 90979
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.4443838596343994,
      "learning_rate": 0.0003972881643244813,
      "loss": 2.927,
      "step": 90980
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.6035850048065186,
      "learning_rate": 0.0003972842948220407,
      "loss": 3.0113,
      "step": 90981
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.737964153289795,
      "learning_rate": 0.00039728042530151326,
      "loss": 2.7585,
      "step": 90982
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2274131774902344,
      "learning_rate": 0.00039727655576289936,
      "loss": 3.0161,
      "step": 90983
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.4668641090393066,
      "learning_rate": 0.0003972726862062,
      "loss": 2.8952,
      "step": 90984
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5136213302612305,
      "learning_rate": 0.00039726881663141586,
      "loss": 2.9226,
      "step": 90985
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.227078676223755,
      "learning_rate": 0.0003972649470385475,
      "loss": 2.9948,
      "step": 90986
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7539159059524536,
      "learning_rate": 0.0003972610774275958,
      "loss": 2.9726,
      "step": 90987
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.638971209526062,
      "learning_rate": 0.00039725720779856143,
      "loss": 3.0134,
      "step": 90988
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.2352607250213623,
      "learning_rate": 0.00039725333815144513,
      "loss": 2.9085,
      "step": 90989
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7297917604446411,
      "learning_rate": 0.0003972494684862475,
      "loss": 2.8798,
      "step": 90990
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5070772171020508,
      "learning_rate": 0.00039724559880296957,
      "loss": 2.9995,
      "step": 90991
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.5392818450927734,
      "learning_rate": 0.00039724172910161166,
      "loss": 3.0426,
      "step": 90992
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.0196409225463867,
      "learning_rate": 0.0003972378593821746,
      "loss": 3.0346,
      "step": 90993
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9971082210540771,
      "learning_rate": 0.00039723398964465944,
      "loss": 2.9489,
      "step": 90994
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8078302145004272,
      "learning_rate": 0.00039723011988906655,
      "loss": 3.0491,
      "step": 90995
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.606806993484497,
      "learning_rate": 0.00039722625011539664,
      "loss": 3.2638,
      "step": 90996
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.729546070098877,
      "learning_rate": 0.00039722238032365064,
      "loss": 3.1084,
      "step": 90997
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.556444764137268,
      "learning_rate": 0.0003972185105138291,
      "loss": 3.0978,
      "step": 90998
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.069645881652832,
      "learning_rate": 0.00039721464068593286,
      "loss": 3.0157,
      "step": 90999
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.123931884765625,
      "learning_rate": 0.00039721077083996266,
      "loss": 2.9556,
      "step": 91000
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.9301677942276,
      "learning_rate": 0.00039720690097591897,
      "loss": 2.9428,
      "step": 91001
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.5973000526428223,
      "learning_rate": 0.0003972030310938027,
      "loss": 3.0972,
      "step": 91002
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.8028335571289062,
      "learning_rate": 0.0003971991611936147,
      "loss": 2.939,
      "step": 91003
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.8701213598251343,
      "learning_rate": 0.00039719529127535543,
      "loss": 3.1236,
      "step": 91004
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.050013780593872,
      "learning_rate": 0.0003971914213390257,
      "loss": 3.0647,
      "step": 91005
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.045036792755127,
      "learning_rate": 0.00039718755138462634,
      "loss": 3.0735,
      "step": 91006
    },
    {
      "epoch": 1.18,
      "grad_norm": 2.73266339302063,
      "learning_rate": 0.0003971836814121579,
      "loss": 3.2639,
      "step": 91007
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7553198337554932,
      "learning_rate": 0.0003971798114216212,
      "loss": 2.9697,
      "step": 91008
    },
    {
      "epoch": 1.18,
      "grad_norm": 1.7719926834106445,
      "learning_rate": 0.00039717594141301703,
      "loss": 2.9028,
      "step": 91009
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.935752272605896,
      "learning_rate": 0.0003971720713863459,
      "loss": 3.005,
      "step": 91010
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.912372350692749,
      "learning_rate": 0.0003971682013416087,
      "loss": 3.1905,
      "step": 91011
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6224294900894165,
      "learning_rate": 0.00039716433127880607,
      "loss": 3.0053,
      "step": 91012
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8315564393997192,
      "learning_rate": 0.00039716046119793875,
      "loss": 3.2293,
      "step": 91013
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7103008031845093,
      "learning_rate": 0.0003971565910990075,
      "loss": 2.987,
      "step": 91014
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.3468940258026123,
      "learning_rate": 0.000397152720982013,
      "loss": 2.9458,
      "step": 91015
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1465697288513184,
      "learning_rate": 0.00039714885084695593,
      "loss": 3.0417,
      "step": 91016
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.1714425086975098,
      "learning_rate": 0.0003971449806938371,
      "loss": 2.9786,
      "step": 91017
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8581781387329102,
      "learning_rate": 0.00039714111052265716,
      "loss": 2.9737,
      "step": 91018
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.724491834640503,
      "learning_rate": 0.0003971372403334168,
      "loss": 2.9399,
      "step": 91019
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6262447834014893,
      "learning_rate": 0.00039713337012611696,
      "loss": 2.9953,
      "step": 91020
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.599305510520935,
      "learning_rate": 0.000397129499900758,
      "loss": 3.0594,
      "step": 91021
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.8058955669403076,
      "learning_rate": 0.000397125629657341,
      "loss": 3.1663,
      "step": 91022
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8197132349014282,
      "learning_rate": 0.00039712175939586646,
      "loss": 3.1647,
      "step": 91023
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1599881649017334,
      "learning_rate": 0.00039711788911633514,
      "loss": 2.9126,
      "step": 91024
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.595860242843628,
      "learning_rate": 0.00039711401881874773,
      "loss": 3.1056,
      "step": 91025
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.369936227798462,
      "learning_rate": 0.000397110148503105,
      "loss": 2.87,
      "step": 91026
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5997060537338257,
      "learning_rate": 0.00039710627816940774,
      "loss": 2.9246,
      "step": 91027
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6306726932525635,
      "learning_rate": 0.0003971024078176565,
      "loss": 3.1592,
      "step": 91028
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.405200481414795,
      "learning_rate": 0.0003970985374478521,
      "loss": 2.9678,
      "step": 91029
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0200324058532715,
      "learning_rate": 0.00039709466705999526,
      "loss": 2.8855,
      "step": 91030
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.2013535499572754,
      "learning_rate": 0.0003970907966540867,
      "loss": 2.8985,
      "step": 91031
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5433242321014404,
      "learning_rate": 0.0003970869262301271,
      "loss": 2.9637,
      "step": 91032
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0673346519470215,
      "learning_rate": 0.0003970830557881173,
      "loss": 3.0483,
      "step": 91033
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7277262210845947,
      "learning_rate": 0.0003970791853280578,
      "loss": 3.3022,
      "step": 91034
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9313387870788574,
      "learning_rate": 0.00039707531484994954,
      "loss": 2.9244,
      "step": 91035
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6372429132461548,
      "learning_rate": 0.00039707144435379316,
      "loss": 3.2352,
      "step": 91036
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8634634017944336,
      "learning_rate": 0.0003970675738395893,
      "loss": 2.8764,
      "step": 91037
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.3579139709472656,
      "learning_rate": 0.0003970637033073387,
      "loss": 3.2801,
      "step": 91038
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.963965892791748,
      "learning_rate": 0.0003970598327570423,
      "loss": 2.984,
      "step": 91039
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.917344808578491,
      "learning_rate": 0.00039705596218870046,
      "loss": 3.0263,
      "step": 91040
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.761296033859253,
      "learning_rate": 0.00039705209160231423,
      "loss": 3.1963,
      "step": 91041
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5288145542144775,
      "learning_rate": 0.0003970482209978841,
      "loss": 2.9522,
      "step": 91042
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1484386920928955,
      "learning_rate": 0.0003970443503754109,
      "loss": 2.9799,
      "step": 91043
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.005192756652832,
      "learning_rate": 0.0003970404797348953,
      "loss": 2.7129,
      "step": 91044
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.538620948791504,
      "learning_rate": 0.00039703660907633806,
      "loss": 2.9673,
      "step": 91045
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.029223918914795,
      "learning_rate": 0.0003970327383997399,
      "loss": 3.068,
      "step": 91046
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.875004768371582,
      "learning_rate": 0.0003970288677051015,
      "loss": 3.2285,
      "step": 91047
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7682853937149048,
      "learning_rate": 0.00039702499699242357,
      "loss": 3.0707,
      "step": 91048
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9470536708831787,
      "learning_rate": 0.0003970211262617069,
      "loss": 2.748,
      "step": 91049
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7930231094360352,
      "learning_rate": 0.00039701725551295216,
      "loss": 2.7835,
      "step": 91050
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8815358877182007,
      "learning_rate": 0.0003970133847461601,
      "loss": 3.2216,
      "step": 91051
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.499040126800537,
      "learning_rate": 0.00039700951396133143,
      "loss": 2.8917,
      "step": 91052
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9914993047714233,
      "learning_rate": 0.00039700564315846683,
      "loss": 3.231,
      "step": 91053
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6481552124023438,
      "learning_rate": 0.00039700177233756707,
      "loss": 2.9863,
      "step": 91054
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6671496629714966,
      "learning_rate": 0.00039699790149863286,
      "loss": 2.8491,
      "step": 91055
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6724144220352173,
      "learning_rate": 0.00039699403064166484,
      "loss": 2.8086,
      "step": 91056
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.574155569076538,
      "learning_rate": 0.0003969901597666638,
      "loss": 2.758,
      "step": 91057
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6648657321929932,
      "learning_rate": 0.0003969862888736306,
      "loss": 2.8619,
      "step": 91058
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.534010648727417,
      "learning_rate": 0.0003969824179625657,
      "loss": 3.0478,
      "step": 91059
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6201516389846802,
      "learning_rate": 0.00039697854703346993,
      "loss": 2.9809,
      "step": 91060
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.310431718826294,
      "learning_rate": 0.00039697467608634405,
      "loss": 2.8414,
      "step": 91061
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6618692874908447,
      "learning_rate": 0.0003969708051211888,
      "loss": 3.195,
      "step": 91062
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5609264373779297,
      "learning_rate": 0.00039696693413800475,
      "loss": 3.0196,
      "step": 91063
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.5537543296813965,
      "learning_rate": 0.00039696306313679285,
      "loss": 3.4048,
      "step": 91064
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.6087138652801514,
      "learning_rate": 0.00039695919211755353,
      "loss": 2.8047,
      "step": 91065
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.535865068435669,
      "learning_rate": 0.00039695532108028773,
      "loss": 2.816,
      "step": 91066
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1988778114318848,
      "learning_rate": 0.0003969514500249961,
      "loss": 2.8861,
      "step": 91067
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7636486291885376,
      "learning_rate": 0.0003969475789516794,
      "loss": 3.3507,
      "step": 91068
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.36959171295166,
      "learning_rate": 0.00039694370786033824,
      "loss": 2.7303,
      "step": 91069
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.820680022239685,
      "learning_rate": 0.00039693983675097354,
      "loss": 2.9903,
      "step": 91070
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9275792837142944,
      "learning_rate": 0.0003969359656235858,
      "loss": 3.0777,
      "step": 91071
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.880220651626587,
      "learning_rate": 0.0003969320944781758,
      "loss": 3.2191,
      "step": 91072
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.3279941082000732,
      "learning_rate": 0.00039692822331474436,
      "loss": 3.0143,
      "step": 91073
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5877081155776978,
      "learning_rate": 0.0003969243521332921,
      "loss": 2.8643,
      "step": 91074
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.901294231414795,
      "learning_rate": 0.0003969204809338198,
      "loss": 3.0372,
      "step": 91075
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9848694801330566,
      "learning_rate": 0.00039691660971632813,
      "loss": 3.0481,
      "step": 91076
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.32824969291687,
      "learning_rate": 0.00039691273848081786,
      "loss": 3.0171,
      "step": 91077
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5458260774612427,
      "learning_rate": 0.00039690886722728967,
      "loss": 3.0563,
      "step": 91078
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6855833530426025,
      "learning_rate": 0.0003969049959557443,
      "loss": 2.89,
      "step": 91079
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.707595944404602,
      "learning_rate": 0.00039690112466618245,
      "loss": 3.0443,
      "step": 91080
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6741578578948975,
      "learning_rate": 0.0003968972533586048,
      "loss": 3.0967,
      "step": 91081
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9064801931381226,
      "learning_rate": 0.00039689338203301226,
      "loss": 2.82,
      "step": 91082
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5119436979293823,
      "learning_rate": 0.0003968895106894053,
      "loss": 3.0535,
      "step": 91083
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1815009117126465,
      "learning_rate": 0.0003968856393277848,
      "loss": 2.5645,
      "step": 91084
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8593393564224243,
      "learning_rate": 0.00039688176794815144,
      "loss": 3.279,
      "step": 91085
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8563206195831299,
      "learning_rate": 0.0003968778965505059,
      "loss": 2.9628,
      "step": 91086
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.682610273361206,
      "learning_rate": 0.0003968740251348489,
      "loss": 3.055,
      "step": 91087
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8128352165222168,
      "learning_rate": 0.00039687015370118124,
      "loss": 3.3507,
      "step": 91088
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7176480293273926,
      "learning_rate": 0.00039686628224950356,
      "loss": 3.0195,
      "step": 91089
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.640415906906128,
      "learning_rate": 0.00039686241077981657,
      "loss": 3.2211,
      "step": 91090
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8316727876663208,
      "learning_rate": 0.00039685853929212115,
      "loss": 2.9939,
      "step": 91091
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6292210817337036,
      "learning_rate": 0.00039685466778641783,
      "loss": 2.9728,
      "step": 91092
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.325582981109619,
      "learning_rate": 0.0003968507962627074,
      "loss": 3.1642,
      "step": 91093
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7647459506988525,
      "learning_rate": 0.00039684692472099055,
      "loss": 2.8929,
      "step": 91094
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7122349739074707,
      "learning_rate": 0.0003968430531612681,
      "loss": 3.2208,
      "step": 91095
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8026297092437744,
      "learning_rate": 0.00039683918158354064,
      "loss": 2.9252,
      "step": 91096
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8404103517532349,
      "learning_rate": 0.00039683530998780897,
      "loss": 3.2032,
      "step": 91097
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5629080533981323,
      "learning_rate": 0.00039683143837407374,
      "loss": 3.0797,
      "step": 91098
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7297688722610474,
      "learning_rate": 0.0003968275667423357,
      "loss": 2.9344,
      "step": 91099
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0294981002807617,
      "learning_rate": 0.0003968236950925957,
      "loss": 3.2459,
      "step": 91100
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5867549180984497,
      "learning_rate": 0.0003968198234248543,
      "loss": 3.1627,
      "step": 91101
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.699158191680908,
      "learning_rate": 0.00039681595173911223,
      "loss": 3.0386,
      "step": 91102
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.9245824813842773,
      "learning_rate": 0.0003968120800353703,
      "loss": 2.9886,
      "step": 91103
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1199703216552734,
      "learning_rate": 0.0003968082083136292,
      "loss": 3.0078,
      "step": 91104
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6754881143569946,
      "learning_rate": 0.00039680433657388953,
      "loss": 3.2086,
      "step": 91105
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.158334493637085,
      "learning_rate": 0.0003968004648161522,
      "loss": 2.9864,
      "step": 91106
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7720330953598022,
      "learning_rate": 0.00039679659304041776,
      "loss": 3.2352,
      "step": 91107
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5655183792114258,
      "learning_rate": 0.0003967927212466871,
      "loss": 2.9835,
      "step": 91108
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1115665435791016,
      "learning_rate": 0.0003967888494349607,
      "loss": 2.9983,
      "step": 91109
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8897712230682373,
      "learning_rate": 0.00039678497760523956,
      "loss": 2.9419,
      "step": 91110
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9293079376220703,
      "learning_rate": 0.0003967811057575242,
      "loss": 3.036,
      "step": 91111
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0692341327667236,
      "learning_rate": 0.00039677723389181543,
      "loss": 2.6967,
      "step": 91112
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9146406650543213,
      "learning_rate": 0.0003967733620081139,
      "loss": 3.3584,
      "step": 91113
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.137784719467163,
      "learning_rate": 0.0003967694901064204,
      "loss": 3.0835,
      "step": 91114
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6461483240127563,
      "learning_rate": 0.0003967656181867357,
      "loss": 2.9257,
      "step": 91115
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.386988401412964,
      "learning_rate": 0.0003967617462490604,
      "loss": 2.9626,
      "step": 91116
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6625059843063354,
      "learning_rate": 0.00039675787429339525,
      "loss": 3.0219,
      "step": 91117
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8208417892456055,
      "learning_rate": 0.00039675400231974095,
      "loss": 3.0076,
      "step": 91118
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6418894529342651,
      "learning_rate": 0.00039675013032809827,
      "loss": 3.0502,
      "step": 91119
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8845340013504028,
      "learning_rate": 0.000396746258318468,
      "loss": 2.8605,
      "step": 91120
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.575011968612671,
      "learning_rate": 0.0003967423862908506,
      "loss": 3.1935,
      "step": 91121
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.4073041677474976,
      "learning_rate": 0.00039673851424524713,
      "loss": 3.2179,
      "step": 91122
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.379098653793335,
      "learning_rate": 0.00039673464218165804,
      "loss": 3.0288,
      "step": 91123
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9213746786117554,
      "learning_rate": 0.0003967307701000842,
      "loss": 3.0974,
      "step": 91124
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6821171045303345,
      "learning_rate": 0.00039672689800052634,
      "loss": 3.2497,
      "step": 91125
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.188347578048706,
      "learning_rate": 0.0003967230258829851,
      "loss": 2.8865,
      "step": 91126
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7227016687393188,
      "learning_rate": 0.0003967191537474611,
      "loss": 2.9074,
      "step": 91127
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6361067295074463,
      "learning_rate": 0.0003967152815939553,
      "loss": 3.1553,
      "step": 91128
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6921112537384033,
      "learning_rate": 0.0003967114094224683,
      "loss": 3.0252,
      "step": 91129
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.596457600593567,
      "learning_rate": 0.00039670753723300076,
      "loss": 3.0267,
      "step": 91130
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.505913257598877,
      "learning_rate": 0.00039670366502555355,
      "loss": 3.0161,
      "step": 91131
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5790116786956787,
      "learning_rate": 0.00039669979280012723,
      "loss": 3.1029,
      "step": 91132
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7577005624771118,
      "learning_rate": 0.00039669592055672267,
      "loss": 2.9992,
      "step": 91133
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6397719383239746,
      "learning_rate": 0.00039669204829534047,
      "loss": 2.9448,
      "step": 91134
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5262476205825806,
      "learning_rate": 0.0003966881760159813,
      "loss": 3.0283,
      "step": 91135
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.386540412902832,
      "learning_rate": 0.0003966843037186461,
      "loss": 3.1854,
      "step": 91136
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5544809103012085,
      "learning_rate": 0.0003966804314033354,
      "loss": 2.8212,
      "step": 91137
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6441617012023926,
      "learning_rate": 0.00039667655907005007,
      "loss": 2.8738,
      "step": 91138
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7049418687820435,
      "learning_rate": 0.0003966726867187906,
      "loss": 2.9965,
      "step": 91139
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6070005893707275,
      "learning_rate": 0.00039666881434955803,
      "loss": 2.8727,
      "step": 91140
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1236350536346436,
      "learning_rate": 0.0003966649419623528,
      "loss": 3.0254,
      "step": 91141
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.07206654548645,
      "learning_rate": 0.0003966610695571757,
      "loss": 2.8153,
      "step": 91142
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7706319093704224,
      "learning_rate": 0.00039665719713402755,
      "loss": 2.9322,
      "step": 91143
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8545600175857544,
      "learning_rate": 0.00039665332469290896,
      "loss": 3.0413,
      "step": 91144
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5120781660079956,
      "learning_rate": 0.0003966494522338207,
      "loss": 3.0279,
      "step": 91145
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.672804594039917,
      "learning_rate": 0.0003966455797567635,
      "loss": 2.8022,
      "step": 91146
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.639844298362732,
      "learning_rate": 0.00039664170726173804,
      "loss": 3.0204,
      "step": 91147
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5729033946990967,
      "learning_rate": 0.00039663783474874504,
      "loss": 2.8712,
      "step": 91148
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.721941351890564,
      "learning_rate": 0.00039663396221778534,
      "loss": 3.1326,
      "step": 91149
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1959660053253174,
      "learning_rate": 0.00039663008966885944,
      "loss": 3.0136,
      "step": 91150
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.52194344997406,
      "learning_rate": 0.0003966262171019682,
      "loss": 3.0799,
      "step": 91151
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9268702268600464,
      "learning_rate": 0.00039662234451711244,
      "loss": 3.016,
      "step": 91152
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9100803136825562,
      "learning_rate": 0.0003966184719142927,
      "loss": 3.1473,
      "step": 91153
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.787261962890625,
      "learning_rate": 0.00039661459929350967,
      "loss": 2.8694,
      "step": 91154
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.604494571685791,
      "learning_rate": 0.0003966107266547643,
      "loss": 2.8067,
      "step": 91155
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6000300645828247,
      "learning_rate": 0.0003966068539980571,
      "loss": 3.1688,
      "step": 91156
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.788597583770752,
      "learning_rate": 0.00039660298132338884,
      "loss": 2.763,
      "step": 91157
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8940298557281494,
      "learning_rate": 0.00039659910863076034,
      "loss": 3.1177,
      "step": 91158
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.371826648712158,
      "learning_rate": 0.00039659523592017214,
      "loss": 3.0137,
      "step": 91159
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9684134721755981,
      "learning_rate": 0.0003965913631916251,
      "loss": 3.0463,
      "step": 91160
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5366963148117065,
      "learning_rate": 0.00039658749044512,
      "loss": 3.2107,
      "step": 91161
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7887893915176392,
      "learning_rate": 0.0003965836176806574,
      "loss": 3.2776,
      "step": 91162
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9144575595855713,
      "learning_rate": 0.000396579744898238,
      "loss": 3.0,
      "step": 91163
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.5405852794647217,
      "learning_rate": 0.0003965758720978627,
      "loss": 3.0583,
      "step": 91164
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.079678773880005,
      "learning_rate": 0.00039657199927953214,
      "loss": 2.8797,
      "step": 91165
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7917193174362183,
      "learning_rate": 0.0003965681264432469,
      "loss": 3.1983,
      "step": 91166
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9155282974243164,
      "learning_rate": 0.00039656425358900794,
      "loss": 3.2407,
      "step": 91167
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7051559686660767,
      "learning_rate": 0.0003965603807168158,
      "loss": 3.2906,
      "step": 91168
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5561782121658325,
      "learning_rate": 0.00039655650782667123,
      "loss": 2.922,
      "step": 91169
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5542287826538086,
      "learning_rate": 0.0003965526349185751,
      "loss": 2.9368,
      "step": 91170
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7967506647109985,
      "learning_rate": 0.0003965487619925279,
      "loss": 3.0414,
      "step": 91171
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5693415403366089,
      "learning_rate": 0.00039654488904853057,
      "loss": 2.9807,
      "step": 91172
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5797895193099976,
      "learning_rate": 0.00039654101608658365,
      "loss": 3.0805,
      "step": 91173
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.90570068359375,
      "learning_rate": 0.00039653714310668793,
      "loss": 3.1198,
      "step": 91174
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6946837902069092,
      "learning_rate": 0.0003965332701088441,
      "loss": 2.8358,
      "step": 91175
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6690863370895386,
      "learning_rate": 0.000396529397093053,
      "loss": 3.0243,
      "step": 91176
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5638948678970337,
      "learning_rate": 0.00039652552405931526,
      "loss": 2.7699,
      "step": 91177
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7234272956848145,
      "learning_rate": 0.00039652165100763153,
      "loss": 2.8333,
      "step": 91178
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.526822328567505,
      "learning_rate": 0.00039651777793800263,
      "loss": 2.8351,
      "step": 91179
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.063084125518799,
      "learning_rate": 0.00039651390485042926,
      "loss": 2.9687,
      "step": 91180
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.4299139976501465,
      "learning_rate": 0.0003965100317449121,
      "loss": 3.1631,
      "step": 91181
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8381937742233276,
      "learning_rate": 0.00039650615862145194,
      "loss": 3.0271,
      "step": 91182
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9942340850830078,
      "learning_rate": 0.00039650228548004945,
      "loss": 3.1138,
      "step": 91183
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9596400260925293,
      "learning_rate": 0.0003964984123207054,
      "loss": 3.0314,
      "step": 91184
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0215680599212646,
      "learning_rate": 0.0003964945391434204,
      "loss": 3.2312,
      "step": 91185
    },
    {
      "epoch": 1.19,
      "grad_norm": 5.401443958282471,
      "learning_rate": 0.0003964906659481954,
      "loss": 2.7861,
      "step": 91186
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.419495105743408,
      "learning_rate": 0.0003964867927350308,
      "loss": 3.0273,
      "step": 91187
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7407158613204956,
      "learning_rate": 0.00039648291950392756,
      "loss": 3.2595,
      "step": 91188
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6445448398590088,
      "learning_rate": 0.0003964790462548863,
      "loss": 3.0471,
      "step": 91189
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.399892568588257,
      "learning_rate": 0.00039647517298790777,
      "loss": 3.0618,
      "step": 91190
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.4711005687713623,
      "learning_rate": 0.00039647129970299264,
      "loss": 3.1749,
      "step": 91191
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7684447765350342,
      "learning_rate": 0.0003964674264001417,
      "loss": 2.9965,
      "step": 91192
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.5382118225097656,
      "learning_rate": 0.00039646355307935575,
      "loss": 3.0706,
      "step": 91193
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.141465187072754,
      "learning_rate": 0.00039645967974063525,
      "loss": 2.8838,
      "step": 91194
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8754546642303467,
      "learning_rate": 0.00039645580638398117,
      "loss": 2.8507,
      "step": 91195
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.4349958896636963,
      "learning_rate": 0.00039645193300939406,
      "loss": 2.9222,
      "step": 91196
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1780805587768555,
      "learning_rate": 0.00039644805961687484,
      "loss": 2.9867,
      "step": 91197
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.169877767562866,
      "learning_rate": 0.0003964441862064239,
      "loss": 3.1617,
      "step": 91198
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6085214614868164,
      "learning_rate": 0.0003964403127780423,
      "loss": 3.1818,
      "step": 91199
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.947478771209717,
      "learning_rate": 0.00039643643933173073,
      "loss": 3.051,
      "step": 91200
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.744906425476074,
      "learning_rate": 0.00039643256586748964,
      "loss": 3.1174,
      "step": 91201
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.2243659496307373,
      "learning_rate": 0.0003964286923853199,
      "loss": 3.2401,
      "step": 91202
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.064661979675293,
      "learning_rate": 0.0003964248188852224,
      "loss": 3.1143,
      "step": 91203
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.783815622329712,
      "learning_rate": 0.0003964209453671976,
      "loss": 2.9487,
      "step": 91204
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.4919846057891846,
      "learning_rate": 0.0003964170718312463,
      "loss": 3.2177,
      "step": 91205
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0148680210113525,
      "learning_rate": 0.00039641319827736927,
      "loss": 2.9485,
      "step": 91206
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7025550603866577,
      "learning_rate": 0.0003964093247055673,
      "loss": 2.9338,
      "step": 91207
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.424509286880493,
      "learning_rate": 0.00039640545111584093,
      "loss": 3.1977,
      "step": 91208
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.961329698562622,
      "learning_rate": 0.00039640157750819096,
      "loss": 3.0088,
      "step": 91209
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8587077856063843,
      "learning_rate": 0.0003963977038826182,
      "loss": 3.1189,
      "step": 91210
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6537925004959106,
      "learning_rate": 0.00039639383023912315,
      "loss": 3.2008,
      "step": 91211
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7697794437408447,
      "learning_rate": 0.0003963899565777067,
      "loss": 3.0401,
      "step": 91212
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.6152517795562744,
      "learning_rate": 0.0003963860828983697,
      "loss": 2.8827,
      "step": 91213
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7159662246704102,
      "learning_rate": 0.0003963822092011125,
      "loss": 3.165,
      "step": 91214
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6858055591583252,
      "learning_rate": 0.0003963783354859361,
      "loss": 2.9927,
      "step": 91215
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.983726978302002,
      "learning_rate": 0.00039637446175284127,
      "loss": 2.8453,
      "step": 91216
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.5192179679870605,
      "learning_rate": 0.00039637058800182844,
      "loss": 3.1386,
      "step": 91217
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9946532249450684,
      "learning_rate": 0.00039636671423289857,
      "loss": 2.9898,
      "step": 91218
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0905940532684326,
      "learning_rate": 0.0003963628404460523,
      "loss": 2.8653,
      "step": 91219
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.464823007583618,
      "learning_rate": 0.0003963589666412903,
      "loss": 2.8888,
      "step": 91220
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.64752995967865,
      "learning_rate": 0.00039635509281861345,
      "loss": 2.918,
      "step": 91221
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.750891089439392,
      "learning_rate": 0.0003963512189780223,
      "loss": 3.083,
      "step": 91222
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.53660249710083,
      "learning_rate": 0.00039634734511951767,
      "loss": 2.9663,
      "step": 91223
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5698190927505493,
      "learning_rate": 0.0003963434712431003,
      "loss": 2.976,
      "step": 91224
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.035442352294922,
      "learning_rate": 0.00039633959734877087,
      "loss": 2.7215,
      "step": 91225
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7680559158325195,
      "learning_rate": 0.00039633572343653,
      "loss": 3.156,
      "step": 91226
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1236016750335693,
      "learning_rate": 0.0003963318495063785,
      "loss": 2.934,
      "step": 91227
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9758784770965576,
      "learning_rate": 0.0003963279755583171,
      "loss": 3.0216,
      "step": 91228
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.882758140563965,
      "learning_rate": 0.00039632410159234655,
      "loss": 2.8902,
      "step": 91229
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.083699941635132,
      "learning_rate": 0.0003963202276084675,
      "loss": 2.9546,
      "step": 91230
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5731761455535889,
      "learning_rate": 0.00039631635360668074,
      "loss": 2.9574,
      "step": 91231
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6460349559783936,
      "learning_rate": 0.0003963124795869869,
      "loss": 3.0781,
      "step": 91232
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.104388952255249,
      "learning_rate": 0.0003963086055493868,
      "loss": 2.9805,
      "step": 91233
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7303138971328735,
      "learning_rate": 0.0003963047314938811,
      "loss": 3.2303,
      "step": 91234
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9949612617492676,
      "learning_rate": 0.0003963008574204705,
      "loss": 3.1461,
      "step": 91235
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.6960208415985107,
      "learning_rate": 0.00039629698332915576,
      "loss": 3.2371,
      "step": 91236
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9044362306594849,
      "learning_rate": 0.0003962931092199376,
      "loss": 2.8821,
      "step": 91237
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9322566986083984,
      "learning_rate": 0.00039628923509281677,
      "loss": 3.2268,
      "step": 91238
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.8551595211029053,
      "learning_rate": 0.0003962853609477939,
      "loss": 3.1668,
      "step": 91239
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8497196435928345,
      "learning_rate": 0.00039628148678486986,
      "loss": 3.0176,
      "step": 91240
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.981752634048462,
      "learning_rate": 0.0003962776126040452,
      "loss": 3.085,
      "step": 91241
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.036320686340332,
      "learning_rate": 0.00039627373840532066,
      "loss": 3.3424,
      "step": 91242
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.4624252319335938,
      "learning_rate": 0.0003962698641886971,
      "loss": 3.0963,
      "step": 91243
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5810612440109253,
      "learning_rate": 0.0003962659899541751,
      "loss": 2.8177,
      "step": 91244
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.3723065853118896,
      "learning_rate": 0.0003962621157017554,
      "loss": 3.017,
      "step": 91245
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.3039729595184326,
      "learning_rate": 0.00039625824143143894,
      "loss": 2.9217,
      "step": 91246
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9560256004333496,
      "learning_rate": 0.00039625436714322605,
      "loss": 2.8825,
      "step": 91247
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.2335638999938965,
      "learning_rate": 0.00039625049283711775,
      "loss": 2.7696,
      "step": 91248
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.123814105987549,
      "learning_rate": 0.00039624661851311464,
      "loss": 2.71,
      "step": 91249
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1029727458953857,
      "learning_rate": 0.0003962427441712175,
      "loss": 2.6829,
      "step": 91250
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.424694061279297,
      "learning_rate": 0.000396238869811427,
      "loss": 2.8551,
      "step": 91251
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.4681057929992676,
      "learning_rate": 0.0003962349954337438,
      "loss": 3.0981,
      "step": 91252
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.2915008068084717,
      "learning_rate": 0.00039623112103816884,
      "loss": 2.9648,
      "step": 91253
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.890153408050537,
      "learning_rate": 0.0003962272466247027,
      "loss": 3.1528,
      "step": 91254
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9177855253219604,
      "learning_rate": 0.00039622337219334593,
      "loss": 3.1524,
      "step": 91255
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.5286829471588135,
      "learning_rate": 0.0003962194977440996,
      "loss": 3.0224,
      "step": 91256
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1658589839935303,
      "learning_rate": 0.00039621562327696417,
      "loss": 3.0015,
      "step": 91257
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6152315139770508,
      "learning_rate": 0.00039621174879194044,
      "loss": 3.0945,
      "step": 91258
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5868351459503174,
      "learning_rate": 0.0003962078742890291,
      "loss": 2.8809,
      "step": 91259
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.6106958389282227,
      "learning_rate": 0.0003962039997682309,
      "loss": 3.1252,
      "step": 91260
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0098588466644287,
      "learning_rate": 0.0003962001252295466,
      "loss": 3.0564,
      "step": 91261
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9107565879821777,
      "learning_rate": 0.0003961962506729769,
      "loss": 3.1827,
      "step": 91262
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.7800564765930176,
      "learning_rate": 0.00039619237609852246,
      "loss": 2.8773,
      "step": 91263
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.056962251663208,
      "learning_rate": 0.000396188501506184,
      "loss": 3.13,
      "step": 91264
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.4670135974884033,
      "learning_rate": 0.0003961846268959624,
      "loss": 2.8761,
      "step": 91265
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.924119472503662,
      "learning_rate": 0.0003961807522678582,
      "loss": 3.1619,
      "step": 91266
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7503024339675903,
      "learning_rate": 0.00039617687762187213,
      "loss": 2.9321,
      "step": 91267
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.599449872970581,
      "learning_rate": 0.0003961730029580051,
      "loss": 2.889,
      "step": 91268
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.724219799041748,
      "learning_rate": 0.0003961691282762576,
      "loss": 2.9396,
      "step": 91269
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5206443071365356,
      "learning_rate": 0.0003961652535766304,
      "loss": 3.1093,
      "step": 91270
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5711694955825806,
      "learning_rate": 0.0003961613788591244,
      "loss": 2.9991,
      "step": 91271
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5242035388946533,
      "learning_rate": 0.00039615750412374005,
      "loss": 3.231,
      "step": 91272
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.08054518699646,
      "learning_rate": 0.0003961536293704782,
      "loss": 3.0361,
      "step": 91273
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0130560398101807,
      "learning_rate": 0.00039614975459933966,
      "loss": 3.0872,
      "step": 91274
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.4956883192062378,
      "learning_rate": 0.00039614587981032505,
      "loss": 3.1043,
      "step": 91275
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8400837182998657,
      "learning_rate": 0.00039614200500343505,
      "loss": 3.0829,
      "step": 91276
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1140005588531494,
      "learning_rate": 0.00039613813017867056,
      "loss": 2.8096,
      "step": 91277
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.00154972076416,
      "learning_rate": 0.000396134255336032,
      "loss": 2.937,
      "step": 91278
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5636792182922363,
      "learning_rate": 0.0003961303804755204,
      "loss": 2.9916,
      "step": 91279
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6141986846923828,
      "learning_rate": 0.0003961265055971362,
      "loss": 2.9033,
      "step": 91280
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.4367003440856934,
      "learning_rate": 0.0003961226307008805,
      "loss": 2.8485,
      "step": 91281
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.239974021911621,
      "learning_rate": 0.0003961187557867536,
      "loss": 2.8649,
      "step": 91282
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0183281898498535,
      "learning_rate": 0.00039611488085475647,
      "loss": 2.7908,
      "step": 91283
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.445451021194458,
      "learning_rate": 0.00039611100590488974,
      "loss": 2.948,
      "step": 91284
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7924458980560303,
      "learning_rate": 0.0003961071309371543,
      "loss": 2.9841,
      "step": 91285
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8601456880569458,
      "learning_rate": 0.00039610325595155056,
      "loss": 3.2302,
      "step": 91286
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.979994535446167,
      "learning_rate": 0.00039609938094807947,
      "loss": 3.0289,
      "step": 91287
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5500266551971436,
      "learning_rate": 0.0003960955059267417,
      "loss": 2.7833,
      "step": 91288
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.581802487373352,
      "learning_rate": 0.0003960916308875379,
      "loss": 3.0599,
      "step": 91289
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.700285792350769,
      "learning_rate": 0.00039608775583046895,
      "loss": 2.92,
      "step": 91290
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.2081451416015625,
      "learning_rate": 0.0003960838807555354,
      "loss": 2.9208,
      "step": 91291
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7766190767288208,
      "learning_rate": 0.0003960800056627381,
      "loss": 3.1292,
      "step": 91292
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6204999685287476,
      "learning_rate": 0.0003960761305520777,
      "loss": 3.1716,
      "step": 91293
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.066924810409546,
      "learning_rate": 0.0003960722554235549,
      "loss": 3.1379,
      "step": 91294
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7127654552459717,
      "learning_rate": 0.0003960683802771705,
      "loss": 3.0258,
      "step": 91295
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.740911602973938,
      "learning_rate": 0.0003960645051129252,
      "loss": 2.9736,
      "step": 91296
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0076792240142822,
      "learning_rate": 0.00039606062993081953,
      "loss": 3.1062,
      "step": 91297
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.515501618385315,
      "learning_rate": 0.00039605675473085457,
      "loss": 3.0607,
      "step": 91298
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.396873950958252,
      "learning_rate": 0.00039605287951303075,
      "loss": 3.0098,
      "step": 91299
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6540840864181519,
      "learning_rate": 0.00039604900427734886,
      "loss": 3.1795,
      "step": 91300
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.197169303894043,
      "learning_rate": 0.00039604512902380974,
      "loss": 2.8982,
      "step": 91301
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.763491153717041,
      "learning_rate": 0.0003960412537524139,
      "loss": 2.8558,
      "step": 91302
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8328138589859009,
      "learning_rate": 0.0003960373784631623,
      "loss": 3.074,
      "step": 91303
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.8695006370544434,
      "learning_rate": 0.0003960335031560555,
      "loss": 2.9215,
      "step": 91304
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5527286529541016,
      "learning_rate": 0.00039602962783109424,
      "loss": 2.9885,
      "step": 91305
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.678243398666382,
      "learning_rate": 0.00039602575248827925,
      "loss": 3.1217,
      "step": 91306
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8770956993103027,
      "learning_rate": 0.00039602187712761125,
      "loss": 3.0565,
      "step": 91307
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5114670991897583,
      "learning_rate": 0.00039601800174909104,
      "loss": 2.804,
      "step": 91308
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5646370649337769,
      "learning_rate": 0.0003960141263527192,
      "loss": 2.7742,
      "step": 91309
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9165042638778687,
      "learning_rate": 0.00039601025093849657,
      "loss": 3.0175,
      "step": 91310
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5031685829162598,
      "learning_rate": 0.00039600637550642377,
      "loss": 2.9983,
      "step": 91311
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8455476760864258,
      "learning_rate": 0.0003960025000565016,
      "loss": 3.0112,
      "step": 91312
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7693887948989868,
      "learning_rate": 0.0003959986245887308,
      "loss": 3.3407,
      "step": 91313
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7461333274841309,
      "learning_rate": 0.00039599474910311194,
      "loss": 2.8884,
      "step": 91314
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0604918003082275,
      "learning_rate": 0.0003959908735996459,
      "loss": 3.0416,
      "step": 91315
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.097700595855713,
      "learning_rate": 0.0003959869980783333,
      "loss": 3.0415,
      "step": 91316
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.551015853881836,
      "learning_rate": 0.000395983122539175,
      "loss": 3.1022,
      "step": 91317
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0587267875671387,
      "learning_rate": 0.00039597924698217156,
      "loss": 3.2228,
      "step": 91318
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.2041075229644775,
      "learning_rate": 0.0003959753714073238,
      "loss": 3.2045,
      "step": 91319
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.2234084606170654,
      "learning_rate": 0.0003959714958146324,
      "loss": 2.7064,
      "step": 91320
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5267475843429565,
      "learning_rate": 0.00039596762020409804,
      "loss": 2.804,
      "step": 91321
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.928139328956604,
      "learning_rate": 0.00039596374457572154,
      "loss": 3.2158,
      "step": 91322
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9711148738861084,
      "learning_rate": 0.0003959598689295036,
      "loss": 3.1608,
      "step": 91323
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.028622627258301,
      "learning_rate": 0.00039595599326544485,
      "loss": 3.0133,
      "step": 91324
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.053112268447876,
      "learning_rate": 0.000395952117583546,
      "loss": 2.946,
      "step": 91325
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1999032497406006,
      "learning_rate": 0.00039594824188380804,
      "loss": 2.9892,
      "step": 91326
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.034656047821045,
      "learning_rate": 0.0003959443661662313,
      "loss": 3.0012,
      "step": 91327
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8024744987487793,
      "learning_rate": 0.00039594049043081673,
      "loss": 3.083,
      "step": 91328
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8948365449905396,
      "learning_rate": 0.0003959366146775651,
      "loss": 2.9047,
      "step": 91329
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.1867032051086426,
      "learning_rate": 0.00039593273890647696,
      "loss": 2.8762,
      "step": 91330
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.843968152999878,
      "learning_rate": 0.00039592886311755316,
      "loss": 3.098,
      "step": 91331
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.6183457374572754,
      "learning_rate": 0.00039592498731079434,
      "loss": 2.9316,
      "step": 91332
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.5562503337860107,
      "learning_rate": 0.00039592111148620127,
      "loss": 3.0306,
      "step": 91333
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.9276845455169678,
      "learning_rate": 0.00039591723564377465,
      "loss": 2.9141,
      "step": 91334
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.4378483295440674,
      "learning_rate": 0.0003959133597835153,
      "loss": 2.8255,
      "step": 91335
    },
    {
      "epoch": 1.19,
      "grad_norm": 4.039472579956055,
      "learning_rate": 0.00039590948390542366,
      "loss": 2.9313,
      "step": 91336
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9442856311798096,
      "learning_rate": 0.0003959056080095007,
      "loss": 3.0035,
      "step": 91337
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7647262811660767,
      "learning_rate": 0.00039590173209574713,
      "loss": 2.8913,
      "step": 91338
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.013840913772583,
      "learning_rate": 0.0003958978561641636,
      "loss": 3.1177,
      "step": 91339
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.359360456466675,
      "learning_rate": 0.00039589398021475087,
      "loss": 2.9741,
      "step": 91340
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9047760963439941,
      "learning_rate": 0.00039589010424750964,
      "loss": 2.8907,
      "step": 91341
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5506372451782227,
      "learning_rate": 0.0003958862282624406,
      "loss": 3.0999,
      "step": 91342
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.677251100540161,
      "learning_rate": 0.0003958823522595444,
      "loss": 2.834,
      "step": 91343
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9679853916168213,
      "learning_rate": 0.00039587847623882204,
      "loss": 2.9182,
      "step": 91344
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.4948874711990356,
      "learning_rate": 0.000395874600200274,
      "loss": 3.0026,
      "step": 91345
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7422454357147217,
      "learning_rate": 0.000395870724143901,
      "loss": 2.9808,
      "step": 91346
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7828435897827148,
      "learning_rate": 0.0003958668480697039,
      "loss": 3.0602,
      "step": 91347
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.662039875984192,
      "learning_rate": 0.00039586297197768326,
      "loss": 3.0123,
      "step": 91348
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8851187229156494,
      "learning_rate": 0.00039585909586784,
      "loss": 2.8409,
      "step": 91349
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.544347047805786,
      "learning_rate": 0.0003958552197401747,
      "loss": 2.993,
      "step": 91350
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.814773678779602,
      "learning_rate": 0.00039585134359468804,
      "loss": 2.9807,
      "step": 91351
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.182302236557007,
      "learning_rate": 0.00039584746743138077,
      "loss": 3.0972,
      "step": 91352
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.285161256790161,
      "learning_rate": 0.0003958435912502538,
      "loss": 3.0674,
      "step": 91353
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.7417662143707275,
      "learning_rate": 0.00039583971505130754,
      "loss": 2.9384,
      "step": 91354
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.3596391677856445,
      "learning_rate": 0.00039583583883454293,
      "loss": 3.1196,
      "step": 91355
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8699733018875122,
      "learning_rate": 0.0003958319625999607,
      "loss": 3.0522,
      "step": 91356
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.010132074356079,
      "learning_rate": 0.00039582808634756135,
      "loss": 3.0497,
      "step": 91357
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.2881617546081543,
      "learning_rate": 0.0003958242100773459,
      "loss": 3.2123,
      "step": 91358
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9010308980941772,
      "learning_rate": 0.00039582033378931483,
      "loss": 3.1254,
      "step": 91359
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8591605424880981,
      "learning_rate": 0.000395816457483469,
      "loss": 2.8781,
      "step": 91360
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8335614204406738,
      "learning_rate": 0.00039581258115980897,
      "loss": 2.7963,
      "step": 91361
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8480013608932495,
      "learning_rate": 0.0003958087048183357,
      "loss": 2.8883,
      "step": 91362
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.891696810722351,
      "learning_rate": 0.0003958048284590497,
      "loss": 2.9076,
      "step": 91363
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6590948104858398,
      "learning_rate": 0.0003958009520819518,
      "loss": 3.0476,
      "step": 91364
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.832124948501587,
      "learning_rate": 0.00039579707568704277,
      "loss": 2.9599,
      "step": 91365
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8508739471435547,
      "learning_rate": 0.0003957931992743232,
      "loss": 3.074,
      "step": 91366
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.141727924346924,
      "learning_rate": 0.0003957893228437938,
      "loss": 3.0171,
      "step": 91367
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5372127294540405,
      "learning_rate": 0.00039578544639545544,
      "loss": 2.9576,
      "step": 91368
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5983951091766357,
      "learning_rate": 0.00039578156992930877,
      "loss": 2.9937,
      "step": 91369
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.6767759323120117,
      "learning_rate": 0.00039577769344535446,
      "loss": 3.2173,
      "step": 91370
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8244068622589111,
      "learning_rate": 0.0003957738169435932,
      "loss": 2.9169,
      "step": 91371
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.17214298248291,
      "learning_rate": 0.00039576994042402584,
      "loss": 2.8745,
      "step": 91372
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.3738067150115967,
      "learning_rate": 0.00039576606388665316,
      "loss": 2.9875,
      "step": 91373
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.144897699356079,
      "learning_rate": 0.0003957621873314756,
      "loss": 2.817,
      "step": 91374
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6452324390411377,
      "learning_rate": 0.0003957583107584941,
      "loss": 3.0026,
      "step": 91375
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0881197452545166,
      "learning_rate": 0.0003957544341677093,
      "loss": 3.223,
      "step": 91376
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.3278610706329346,
      "learning_rate": 0.00039575055755912197,
      "loss": 2.7276,
      "step": 91377
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1584954261779785,
      "learning_rate": 0.00039574668093273277,
      "loss": 3.1749,
      "step": 91378
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8670029640197754,
      "learning_rate": 0.0003957428042885425,
      "loss": 2.9234,
      "step": 91379
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7430249452590942,
      "learning_rate": 0.0003957389276265518,
      "loss": 3.0761,
      "step": 91380
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7486666440963745,
      "learning_rate": 0.00039573505094676144,
      "loss": 2.9279,
      "step": 91381
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9037275314331055,
      "learning_rate": 0.00039573117424917213,
      "loss": 2.9427,
      "step": 91382
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.6342272758483887,
      "learning_rate": 0.0003957272975337845,
      "loss": 3.3416,
      "step": 91383
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6876602172851562,
      "learning_rate": 0.00039572342080059956,
      "loss": 2.9901,
      "step": 91384
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9452905654907227,
      "learning_rate": 0.0003957195440496177,
      "loss": 2.9804,
      "step": 91385
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.8250415325164795,
      "learning_rate": 0.0003957156672808397,
      "loss": 2.7131,
      "step": 91386
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0199413299560547,
      "learning_rate": 0.0003957117904942665,
      "loss": 3.0,
      "step": 91387
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8865642547607422,
      "learning_rate": 0.00039570791368989856,
      "loss": 2.7686,
      "step": 91388
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.487030506134033,
      "learning_rate": 0.00039570403686773676,
      "loss": 3.0565,
      "step": 91389
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.9315757751464844,
      "learning_rate": 0.0003957001600277818,
      "loss": 3.0228,
      "step": 91390
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.037714958190918,
      "learning_rate": 0.00039569628317003436,
      "loss": 3.0303,
      "step": 91391
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9642630815505981,
      "learning_rate": 0.0003956924062944951,
      "loss": 2.7776,
      "step": 91392
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8712307214736938,
      "learning_rate": 0.0003956885294011649,
      "loss": 2.8169,
      "step": 91393
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8915427923202515,
      "learning_rate": 0.00039568465249004435,
      "loss": 2.979,
      "step": 91394
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.1174697875976562,
      "learning_rate": 0.0003956807755611342,
      "loss": 2.7249,
      "step": 91395
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9581724405288696,
      "learning_rate": 0.00039567689861443533,
      "loss": 2.9453,
      "step": 91396
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5874464511871338,
      "learning_rate": 0.00039567302164994814,
      "loss": 3.0471,
      "step": 91397
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5928643941879272,
      "learning_rate": 0.0003956691446676736,
      "loss": 3.054,
      "step": 91398
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5996983051300049,
      "learning_rate": 0.00039566526766761237,
      "loss": 3.154,
      "step": 91399
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5788801908493042,
      "learning_rate": 0.00039566139064976515,
      "loss": 2.8086,
      "step": 91400
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7648957967758179,
      "learning_rate": 0.00039565751361413267,
      "loss": 2.895,
      "step": 91401
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.289788007736206,
      "learning_rate": 0.0003956536365607157,
      "loss": 2.9269,
      "step": 91402
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6558411121368408,
      "learning_rate": 0.0003956497594895149,
      "loss": 3.0583,
      "step": 91403
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.7737843990325928,
      "learning_rate": 0.0003956458824005309,
      "loss": 2.9474,
      "step": 91404
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1227874755859375,
      "learning_rate": 0.00039564200529376464,
      "loss": 2.9528,
      "step": 91405
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0493242740631104,
      "learning_rate": 0.00039563812816921664,
      "loss": 2.954,
      "step": 91406
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.179149627685547,
      "learning_rate": 0.0003956342510268878,
      "loss": 3.1454,
      "step": 91407
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1147050857543945,
      "learning_rate": 0.0003956303738667787,
      "loss": 3.0822,
      "step": 91408
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.8510427474975586,
      "learning_rate": 0.0003956264966888901,
      "loss": 2.9823,
      "step": 91409
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.3567402362823486,
      "learning_rate": 0.0003956226194932227,
      "loss": 2.9212,
      "step": 91410
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.068805694580078,
      "learning_rate": 0.00039561874227977734,
      "loss": 2.6523,
      "step": 91411
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.429117202758789,
      "learning_rate": 0.00039561486504855457,
      "loss": 2.7958,
      "step": 91412
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5909820795059204,
      "learning_rate": 0.0003956109877995552,
      "loss": 2.8733,
      "step": 91413
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.3311607837677,
      "learning_rate": 0.00039560711053278,
      "loss": 3.1731,
      "step": 91414
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.4222726821899414,
      "learning_rate": 0.00039560323324822956,
      "loss": 3.0903,
      "step": 91415
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5523220300674438,
      "learning_rate": 0.0003955993559459047,
      "loss": 3.3185,
      "step": 91416
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5747684240341187,
      "learning_rate": 0.0003955954786258062,
      "loss": 2.9463,
      "step": 91417
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.028184652328491,
      "learning_rate": 0.00039559160128793463,
      "loss": 2.826,
      "step": 91418
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8503507375717163,
      "learning_rate": 0.0003955877239322907,
      "loss": 2.9338,
      "step": 91419
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.717279314994812,
      "learning_rate": 0.0003955838465588753,
      "loss": 3.1119,
      "step": 91420
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7359390258789062,
      "learning_rate": 0.00039557996916768904,
      "loss": 2.8459,
      "step": 91421
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5858441591262817,
      "learning_rate": 0.00039557609175873263,
      "loss": 2.7955,
      "step": 91422
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.3930461406707764,
      "learning_rate": 0.0003955722143320069,
      "loss": 2.9538,
      "step": 91423
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.969970464706421,
      "learning_rate": 0.0003955683368875124,
      "loss": 3.0828,
      "step": 91424
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.836140751838684,
      "learning_rate": 0.00039556445942524996,
      "loss": 3.1927,
      "step": 91425
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.6242973804473877,
      "learning_rate": 0.00039556058194522035,
      "loss": 2.88,
      "step": 91426
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.2888576984405518,
      "learning_rate": 0.0003955567044474242,
      "loss": 2.9939,
      "step": 91427
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9267265796661377,
      "learning_rate": 0.00039555282693186215,
      "loss": 2.9986,
      "step": 91428
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5448007583618164,
      "learning_rate": 0.0003955489493985351,
      "loss": 2.9983,
      "step": 91429
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7679718732833862,
      "learning_rate": 0.0003955450718474437,
      "loss": 2.782,
      "step": 91430
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.3137030601501465,
      "learning_rate": 0.00039554119427858867,
      "loss": 3.0086,
      "step": 91431
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.7500247955322266,
      "learning_rate": 0.0003955373166919708,
      "loss": 2.9287,
      "step": 91432
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5523711442947388,
      "learning_rate": 0.0003955334390875906,
      "loss": 2.9023,
      "step": 91433
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7623428106307983,
      "learning_rate": 0.00039552956146544904,
      "loss": 2.9922,
      "step": 91434
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.8426339626312256,
      "learning_rate": 0.00039552568382554673,
      "loss": 3.1447,
      "step": 91435
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.4923055171966553,
      "learning_rate": 0.0003955218061678843,
      "loss": 2.9575,
      "step": 91436
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5058608055114746,
      "learning_rate": 0.0003955179284924626,
      "loss": 3.051,
      "step": 91437
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.558039903640747,
      "learning_rate": 0.0003955140507992824,
      "loss": 3.2471,
      "step": 91438
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0157852172851562,
      "learning_rate": 0.00039551017308834423,
      "loss": 2.9337,
      "step": 91439
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7272974252700806,
      "learning_rate": 0.00039550629535964896,
      "loss": 3.0806,
      "step": 91440
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.517498254776001,
      "learning_rate": 0.0003955024176131973,
      "loss": 2.9929,
      "step": 91441
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.546430230140686,
      "learning_rate": 0.0003954985398489899,
      "loss": 3.1809,
      "step": 91442
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5607532262802124,
      "learning_rate": 0.00039549466206702743,
      "loss": 3.0969,
      "step": 91443
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9987683296203613,
      "learning_rate": 0.0003954907842673109,
      "loss": 3.2748,
      "step": 91444
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7432806491851807,
      "learning_rate": 0.0003954869064498406,
      "loss": 2.7135,
      "step": 91445
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.090404510498047,
      "learning_rate": 0.0003954830286146176,
      "loss": 2.7729,
      "step": 91446
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.8922126293182373,
      "learning_rate": 0.00039547915076164255,
      "loss": 2.9729,
      "step": 91447
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6642992496490479,
      "learning_rate": 0.0003954752728909161,
      "loss": 3.0503,
      "step": 91448
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.678325891494751,
      "learning_rate": 0.00039547139500243885,
      "loss": 3.092,
      "step": 91449
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.3132431507110596,
      "learning_rate": 0.0003954675170962118,
      "loss": 3.1138,
      "step": 91450
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.887552261352539,
      "learning_rate": 0.0003954636391722355,
      "loss": 2.9792,
      "step": 91451
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6478773355484009,
      "learning_rate": 0.0003954597612305107,
      "loss": 3.0003,
      "step": 91452
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8912309408187866,
      "learning_rate": 0.0003954558832710382,
      "loss": 2.8495,
      "step": 91453
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.685781717300415,
      "learning_rate": 0.00039545200529381863,
      "loss": 3.1889,
      "step": 91454
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5385346412658691,
      "learning_rate": 0.0003954481272988527,
      "loss": 3.2449,
      "step": 91455
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8244346380233765,
      "learning_rate": 0.00039544424928614116,
      "loss": 3.0207,
      "step": 91456
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.082026720046997,
      "learning_rate": 0.00039544037125568477,
      "loss": 3.1401,
      "step": 91457
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.761497139930725,
      "learning_rate": 0.0003954364932074841,
      "loss": 2.8408,
      "step": 91458
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6138173341751099,
      "learning_rate": 0.0003954326151415401,
      "loss": 3.2548,
      "step": 91459
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.2684943675994873,
      "learning_rate": 0.0003954287370578533,
      "loss": 3.0616,
      "step": 91460
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6412982940673828,
      "learning_rate": 0.0003954248589564246,
      "loss": 2.8629,
      "step": 91461
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.4645336866378784,
      "learning_rate": 0.0003954209808372545,
      "loss": 2.9786,
      "step": 91462
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1461470127105713,
      "learning_rate": 0.0003954171027003439,
      "loss": 3.019,
      "step": 91463
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.1844875812530518,
      "learning_rate": 0.00039541322454569346,
      "loss": 3.1528,
      "step": 91464
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0971598625183105,
      "learning_rate": 0.0003954093463733039,
      "loss": 3.1653,
      "step": 91465
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.2565979957580566,
      "learning_rate": 0.0003954054681831759,
      "loss": 3.0753,
      "step": 91466
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.922020435333252,
      "learning_rate": 0.0003954015899753103,
      "loss": 2.8981,
      "step": 91467
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.702424168586731,
      "learning_rate": 0.00039539771174970764,
      "loss": 2.9596,
      "step": 91468
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6523205041885376,
      "learning_rate": 0.00039539383350636886,
      "loss": 3.0366,
      "step": 91469
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0974831581115723,
      "learning_rate": 0.0003953899552452945,
      "loss": 3.2195,
      "step": 91470
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9417392015457153,
      "learning_rate": 0.00039538607696648533,
      "loss": 2.9776,
      "step": 91471
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7089695930480957,
      "learning_rate": 0.00039538219866994215,
      "loss": 3.0207,
      "step": 91472
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9126465320587158,
      "learning_rate": 0.00039537832035566555,
      "loss": 2.9755,
      "step": 91473
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.41660475730896,
      "learning_rate": 0.0003953744420236564,
      "loss": 3.059,
      "step": 91474
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5410754680633545,
      "learning_rate": 0.00039537056367391525,
      "loss": 3.1864,
      "step": 91475
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5825904607772827,
      "learning_rate": 0.00039536668530644296,
      "loss": 2.9907,
      "step": 91476
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.6464858055114746,
      "learning_rate": 0.0003953628069212402,
      "loss": 3.2896,
      "step": 91477
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.4933738708496094,
      "learning_rate": 0.00039535892851830766,
      "loss": 2.9403,
      "step": 91478
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.271322011947632,
      "learning_rate": 0.0003953550500976462,
      "loss": 2.615,
      "step": 91479
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7273495197296143,
      "learning_rate": 0.0003953511716592563,
      "loss": 2.8755,
      "step": 91480
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1277568340301514,
      "learning_rate": 0.00039534729320313895,
      "loss": 2.865,
      "step": 91481
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6213809251785278,
      "learning_rate": 0.0003953434147292947,
      "loss": 3.1791,
      "step": 91482
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6707032918930054,
      "learning_rate": 0.00039533953623772423,
      "loss": 2.9953,
      "step": 91483
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6193740367889404,
      "learning_rate": 0.00039533565772842844,
      "loss": 2.9568,
      "step": 91484
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7015585899353027,
      "learning_rate": 0.00039533177920140784,
      "loss": 3.0495,
      "step": 91485
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.710806965827942,
      "learning_rate": 0.0003953279006566633,
      "loss": 2.9974,
      "step": 91486
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7463372945785522,
      "learning_rate": 0.00039532402209419557,
      "loss": 3.1017,
      "step": 91487
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.265378475189209,
      "learning_rate": 0.00039532014351400526,
      "loss": 2.8575,
      "step": 91488
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9611529111862183,
      "learning_rate": 0.00039531626491609307,
      "loss": 2.8298,
      "step": 91489
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7569905519485474,
      "learning_rate": 0.00039531238630046,
      "loss": 3.029,
      "step": 91490
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.136012315750122,
      "learning_rate": 0.00039530850766710635,
      "loss": 2.9698,
      "step": 91491
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.2247159481048584,
      "learning_rate": 0.0003953046290160331,
      "loss": 2.9917,
      "step": 91492
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1029961109161377,
      "learning_rate": 0.00039530075034724095,
      "loss": 3.0099,
      "step": 91493
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7419673204421997,
      "learning_rate": 0.0003952968716607306,
      "loss": 3.0591,
      "step": 91494
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.291480302810669,
      "learning_rate": 0.0003952929929565027,
      "loss": 3.1853,
      "step": 91495
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.196591854095459,
      "learning_rate": 0.0003952891142345581,
      "loss": 2.8256,
      "step": 91496
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9928206205368042,
      "learning_rate": 0.0003952852354948974,
      "loss": 2.9394,
      "step": 91497
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9420818090438843,
      "learning_rate": 0.0003952813567375214,
      "loss": 3.1347,
      "step": 91498
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.143676996231079,
      "learning_rate": 0.0003952774779624308,
      "loss": 3.028,
      "step": 91499
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0579185485839844,
      "learning_rate": 0.0003952735991696263,
      "loss": 3.0227,
      "step": 91500
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.4517014026641846,
      "learning_rate": 0.0003952697203591087,
      "loss": 3.1158,
      "step": 91501
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.12637996673584,
      "learning_rate": 0.0003952658415308786,
      "loss": 2.9574,
      "step": 91502
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6404136419296265,
      "learning_rate": 0.00039526196268493686,
      "loss": 3.0217,
      "step": 91503
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.642148017883301,
      "learning_rate": 0.000395258083821284,
      "loss": 2.972,
      "step": 91504
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9082223176956177,
      "learning_rate": 0.000395254204939921,
      "loss": 3.2961,
      "step": 91505
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0864477157592773,
      "learning_rate": 0.00039525032604084833,
      "loss": 2.9933,
      "step": 91506
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0742154121398926,
      "learning_rate": 0.00039524644712406686,
      "loss": 2.9508,
      "step": 91507
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5466535091400146,
      "learning_rate": 0.00039524256818957733,
      "loss": 2.9293,
      "step": 91508
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8884670734405518,
      "learning_rate": 0.00039523868923738034,
      "loss": 3.0673,
      "step": 91509
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.2253828048706055,
      "learning_rate": 0.00039523481026747665,
      "loss": 2.958,
      "step": 91510
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8794599771499634,
      "learning_rate": 0.0003952309312798672,
      "loss": 3.2045,
      "step": 91511
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6227325201034546,
      "learning_rate": 0.0003952270522745523,
      "loss": 3.0735,
      "step": 91512
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.724792957305908,
      "learning_rate": 0.000395223173251533,
      "loss": 3.0736,
      "step": 91513
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1044983863830566,
      "learning_rate": 0.00039521929421080983,
      "loss": 3.1668,
      "step": 91514
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5314146280288696,
      "learning_rate": 0.0003952154151523837,
      "loss": 3.1232,
      "step": 91515
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.2433676719665527,
      "learning_rate": 0.0003952115360762552,
      "loss": 2.893,
      "step": 91516
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0762109756469727,
      "learning_rate": 0.00039520765698242507,
      "loss": 3.1134,
      "step": 91517
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6700286865234375,
      "learning_rate": 0.00039520377787089406,
      "loss": 3.1078,
      "step": 91518
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8136249780654907,
      "learning_rate": 0.00039519989874166285,
      "loss": 2.9444,
      "step": 91519
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7573102712631226,
      "learning_rate": 0.00039519601959473214,
      "loss": 2.9489,
      "step": 91520
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6248353719711304,
      "learning_rate": 0.00039519214043010276,
      "loss": 3.0975,
      "step": 91521
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.68772292137146,
      "learning_rate": 0.00039518826124777535,
      "loss": 2.9523,
      "step": 91522
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7597987651824951,
      "learning_rate": 0.0003951843820477506,
      "loss": 3.0338,
      "step": 91523
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7129089832305908,
      "learning_rate": 0.00039518050283002937,
      "loss": 3.0706,
      "step": 91524
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9579485654830933,
      "learning_rate": 0.0003951766235946122,
      "loss": 3.1535,
      "step": 91525
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8364224433898926,
      "learning_rate": 0.00039517274434149984,
      "loss": 2.7683,
      "step": 91526
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.7021663188934326,
      "learning_rate": 0.0003951688650706933,
      "loss": 2.9273,
      "step": 91527
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9177453517913818,
      "learning_rate": 0.00039516498578219284,
      "loss": 2.9793,
      "step": 91528
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6493194103240967,
      "learning_rate": 0.0003951611064759995,
      "loss": 2.9494,
      "step": 91529
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.84228253364563,
      "learning_rate": 0.000395157227152114,
      "loss": 2.8137,
      "step": 91530
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6151663064956665,
      "learning_rate": 0.00039515334781053686,
      "loss": 2.8755,
      "step": 91531
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0529122352600098,
      "learning_rate": 0.0003951494684512689,
      "loss": 2.9896,
      "step": 91532
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.678854465484619,
      "learning_rate": 0.000395145589074311,
      "loss": 2.9119,
      "step": 91533
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.538956880569458,
      "learning_rate": 0.00039514170967966356,
      "loss": 3.0318,
      "step": 91534
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.921476125717163,
      "learning_rate": 0.00039513783026732754,
      "loss": 2.9353,
      "step": 91535
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.610781192779541,
      "learning_rate": 0.0003951339508373037,
      "loss": 2.817,
      "step": 91536
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7710604667663574,
      "learning_rate": 0.0003951300713895926,
      "loss": 3.1382,
      "step": 91537
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.3863164186477661,
      "learning_rate": 0.00039512619192419495,
      "loss": 2.7109,
      "step": 91538
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9615672826766968,
      "learning_rate": 0.0003951223124411117,
      "loss": 2.781,
      "step": 91539
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.776880145072937,
      "learning_rate": 0.00039511843294034326,
      "loss": 2.7653,
      "step": 91540
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8047921657562256,
      "learning_rate": 0.0003951145534218906,
      "loss": 2.9723,
      "step": 91541
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1345720291137695,
      "learning_rate": 0.0003951106738857543,
      "loss": 2.7327,
      "step": 91542
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.039425849914551,
      "learning_rate": 0.00039510679433193513,
      "loss": 2.9267,
      "step": 91543
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5887519121170044,
      "learning_rate": 0.00039510291476043384,
      "loss": 3.1155,
      "step": 91544
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.3269662857055664,
      "learning_rate": 0.00039509903517125105,
      "loss": 2.976,
      "step": 91545
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.4977011680603027,
      "learning_rate": 0.0003950951555643877,
      "loss": 2.9262,
      "step": 91546
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8057231903076172,
      "learning_rate": 0.00039509127593984425,
      "loss": 2.9702,
      "step": 91547
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6770267486572266,
      "learning_rate": 0.0003950873962976216,
      "loss": 2.9247,
      "step": 91548
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7901674509048462,
      "learning_rate": 0.00039508351663772036,
      "loss": 3.0567,
      "step": 91549
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.4619709253311157,
      "learning_rate": 0.0003950796369601413,
      "loss": 3.078,
      "step": 91550
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.974456787109375,
      "learning_rate": 0.0003950757572648852,
      "loss": 2.9973,
      "step": 91551
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.244333028793335,
      "learning_rate": 0.0003950718775519526,
      "loss": 3.3283,
      "step": 91552
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.5135600566864014,
      "learning_rate": 0.00039506799782134446,
      "loss": 3.196,
      "step": 91553
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0624582767486572,
      "learning_rate": 0.00039506411807306135,
      "loss": 2.8868,
      "step": 91554
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.791319489479065,
      "learning_rate": 0.000395060238307104,
      "loss": 3.1452,
      "step": 91555
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.0421385765075684,
      "learning_rate": 0.0003950563585234732,
      "loss": 3.0038,
      "step": 91556
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.190044641494751,
      "learning_rate": 0.0003950524787221696,
      "loss": 3.2067,
      "step": 91557
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6729689836502075,
      "learning_rate": 0.00039504859890319397,
      "loss": 2.7718,
      "step": 91558
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.3175909519195557,
      "learning_rate": 0.000395044719066547,
      "loss": 2.7735,
      "step": 91559
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.086477279663086,
      "learning_rate": 0.0003950408392122295,
      "loss": 2.9662,
      "step": 91560
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6301746368408203,
      "learning_rate": 0.000395036959340242,
      "loss": 3.2052,
      "step": 91561
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6119811534881592,
      "learning_rate": 0.0003950330794505853,
      "loss": 2.9611,
      "step": 91562
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.149214029312134,
      "learning_rate": 0.00039502919954326025,
      "loss": 3.1653,
      "step": 91563
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.6263034343719482,
      "learning_rate": 0.00039502531961826753,
      "loss": 3.1385,
      "step": 91564
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6382595300674438,
      "learning_rate": 0.0003950214396756077,
      "loss": 3.1667,
      "step": 91565
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6516159772872925,
      "learning_rate": 0.00039501755971528167,
      "loss": 3.0193,
      "step": 91566
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.972520112991333,
      "learning_rate": 0.00039501367973729005,
      "loss": 2.9629,
      "step": 91567
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.731573462486267,
      "learning_rate": 0.0003950097997416335,
      "loss": 3.14,
      "step": 91568
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.4082669019699097,
      "learning_rate": 0.00039500591972831296,
      "loss": 2.9971,
      "step": 91569
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7925236225128174,
      "learning_rate": 0.000395002039697329,
      "loss": 2.8633,
      "step": 91570
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9604203701019287,
      "learning_rate": 0.00039499815964868227,
      "loss": 2.9931,
      "step": 91571
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7863205671310425,
      "learning_rate": 0.00039499427958237377,
      "loss": 2.8582,
      "step": 91572
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6452895402908325,
      "learning_rate": 0.00039499039949840393,
      "loss": 3.0183,
      "step": 91573
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.4638888835906982,
      "learning_rate": 0.0003949865193967736,
      "loss": 3.0917,
      "step": 91574
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8378371000289917,
      "learning_rate": 0.00039498263927748354,
      "loss": 2.9231,
      "step": 91575
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.388528347015381,
      "learning_rate": 0.00039497875914053433,
      "loss": 3.2675,
      "step": 91576
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.245635986328125,
      "learning_rate": 0.0003949748789859267,
      "loss": 2.833,
      "step": 91577
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.472062587738037,
      "learning_rate": 0.00039497099881366163,
      "loss": 3.0953,
      "step": 91578
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.505864143371582,
      "learning_rate": 0.00039496711862373955,
      "loss": 2.9277,
      "step": 91579
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7423505783081055,
      "learning_rate": 0.0003949632384161613,
      "loss": 2.93,
      "step": 91580
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.890009641647339,
      "learning_rate": 0.0003949593581909276,
      "loss": 3.0193,
      "step": 91581
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.2521231174468994,
      "learning_rate": 0.0003949554779480392,
      "loss": 2.9354,
      "step": 91582
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.657243013381958,
      "learning_rate": 0.0003949515976874967,
      "loss": 3.1184,
      "step": 91583
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.890502691268921,
      "learning_rate": 0.000394947717409301,
      "loss": 2.9152,
      "step": 91584
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8775955438613892,
      "learning_rate": 0.0003949438371134527,
      "loss": 3.0427,
      "step": 91585
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9920506477355957,
      "learning_rate": 0.0003949399567999525,
      "loss": 3.1566,
      "step": 91586
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6795743703842163,
      "learning_rate": 0.00039493607646880115,
      "loss": 3.2285,
      "step": 91587
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7069554328918457,
      "learning_rate": 0.00039493219611999955,
      "loss": 3.1042,
      "step": 91588
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8660919666290283,
      "learning_rate": 0.00039492831575354816,
      "loss": 2.7626,
      "step": 91589
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.4423701763153076,
      "learning_rate": 0.00039492443536944774,
      "loss": 2.8923,
      "step": 91590
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.038527250289917,
      "learning_rate": 0.0003949205549676992,
      "loss": 2.7063,
      "step": 91591
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.181588888168335,
      "learning_rate": 0.00039491667454830305,
      "loss": 2.6896,
      "step": 91592
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8042219877243042,
      "learning_rate": 0.00039491279411126007,
      "loss": 3.1218,
      "step": 91593
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0026469230651855,
      "learning_rate": 0.0003949089136565711,
      "loss": 2.8257,
      "step": 91594
    },
    {
      "epoch": 1.19,
      "grad_norm": 4.25732946395874,
      "learning_rate": 0.00039490503318423675,
      "loss": 3.0778,
      "step": 91595
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9003572463989258,
      "learning_rate": 0.0003949011526942577,
      "loss": 2.9644,
      "step": 91596
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0981125831604004,
      "learning_rate": 0.00039489727218663484,
      "loss": 3.0499,
      "step": 91597
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.767937183380127,
      "learning_rate": 0.00039489339166136865,
      "loss": 3.0595,
      "step": 91598
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6442561149597168,
      "learning_rate": 0.00039488951111846005,
      "loss": 3.2414,
      "step": 91599
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8316439390182495,
      "learning_rate": 0.00039488563055790985,
      "loss": 2.7945,
      "step": 91600
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7362028360366821,
      "learning_rate": 0.0003948817499797184,
      "loss": 3.0872,
      "step": 91601
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0280725955963135,
      "learning_rate": 0.0003948778693838867,
      "loss": 3.0012,
      "step": 91602
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9212595224380493,
      "learning_rate": 0.00039487398877041555,
      "loss": 3.0045,
      "step": 91603
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8811882734298706,
      "learning_rate": 0.00039487010813930536,
      "loss": 3.1035,
      "step": 91604
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.098958969116211,
      "learning_rate": 0.00039486622749055706,
      "loss": 3.1027,
      "step": 91605
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.018777370452881,
      "learning_rate": 0.00039486234682417143,
      "loss": 2.8203,
      "step": 91606
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9114978313446045,
      "learning_rate": 0.000394858466140149,
      "loss": 2.9232,
      "step": 91607
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.123908042907715,
      "learning_rate": 0.00039485458543849064,
      "loss": 3.1536,
      "step": 91608
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9806609153747559,
      "learning_rate": 0.000394850704719197,
      "loss": 2.8364,
      "step": 91609
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9845651388168335,
      "learning_rate": 0.00039484682398226883,
      "loss": 3.0188,
      "step": 91610
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8541593551635742,
      "learning_rate": 0.0003948429432277068,
      "loss": 2.9128,
      "step": 91611
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.109927177429199,
      "learning_rate": 0.0003948390624555118,
      "loss": 2.995,
      "step": 91612
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9070628881454468,
      "learning_rate": 0.0003948351816656843,
      "loss": 2.966,
      "step": 91613
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.9254093170166016,
      "learning_rate": 0.00039483130085822516,
      "loss": 3.0147,
      "step": 91614
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.167905330657959,
      "learning_rate": 0.0003948274200331352,
      "loss": 3.1068,
      "step": 91615
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9002325534820557,
      "learning_rate": 0.00039482353919041496,
      "loss": 3.1801,
      "step": 91616
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6817371845245361,
      "learning_rate": 0.00039481965833006524,
      "loss": 2.8467,
      "step": 91617
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.085625410079956,
      "learning_rate": 0.0003948157774520868,
      "loss": 2.8691,
      "step": 91618
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6932790279388428,
      "learning_rate": 0.0003948118965564802,
      "loss": 3.0696,
      "step": 91619
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6007119417190552,
      "learning_rate": 0.0003948080156432463,
      "loss": 2.8092,
      "step": 91620
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.591575264930725,
      "learning_rate": 0.000394804134712386,
      "loss": 2.8721,
      "step": 91621
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.985837697982788,
      "learning_rate": 0.0003948002537638996,
      "loss": 2.9232,
      "step": 91622
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6355055570602417,
      "learning_rate": 0.00039479637279778813,
      "loss": 3.0,
      "step": 91623
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9504656791687012,
      "learning_rate": 0.0003947924918140522,
      "loss": 3.1659,
      "step": 91624
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9801101684570312,
      "learning_rate": 0.00039478861081269256,
      "loss": 2.7571,
      "step": 91625
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.733759880065918,
      "learning_rate": 0.00039478472979370997,
      "loss": 2.9617,
      "step": 91626
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9555495977401733,
      "learning_rate": 0.00039478084875710503,
      "loss": 2.915,
      "step": 91627
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7917286157608032,
      "learning_rate": 0.0003947769677028786,
      "loss": 2.8541,
      "step": 91628
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5519254207611084,
      "learning_rate": 0.0003947730866310313,
      "loss": 3.0511,
      "step": 91629
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.702335000038147,
      "learning_rate": 0.000394769205541564,
      "loss": 2.8148,
      "step": 91630
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.2751269340515137,
      "learning_rate": 0.0003947653244344772,
      "loss": 2.9037,
      "step": 91631
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6724257469177246,
      "learning_rate": 0.00039476144330977174,
      "loss": 2.8412,
      "step": 91632
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5847011804580688,
      "learning_rate": 0.00039475756216744836,
      "loss": 2.7389,
      "step": 91633
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.0660436153411865,
      "learning_rate": 0.00039475368100750784,
      "loss": 3.0529,
      "step": 91634
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8453636169433594,
      "learning_rate": 0.0003947497998299507,
      "loss": 3.3037,
      "step": 91635
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8543258905410767,
      "learning_rate": 0.0003947459186347779,
      "loss": 3.1138,
      "step": 91636
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.0326321125030518,
      "learning_rate": 0.0003947420374219899,
      "loss": 2.9312,
      "step": 91637
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1941113471984863,
      "learning_rate": 0.0003947381561915877,
      "loss": 2.9264,
      "step": 91638
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7187820672988892,
      "learning_rate": 0.0003947342749435718,
      "loss": 3.0724,
      "step": 91639
    },
    {
      "epoch": 1.19,
      "grad_norm": 3.727687358856201,
      "learning_rate": 0.00039473039367794304,
      "loss": 2.9873,
      "step": 91640
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.059778928756714,
      "learning_rate": 0.00039472651239470216,
      "loss": 3.077,
      "step": 91641
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9761089086532593,
      "learning_rate": 0.0003947226310938498,
      "loss": 3.0262,
      "step": 91642
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5811489820480347,
      "learning_rate": 0.0003947187497753867,
      "loss": 2.9397,
      "step": 91643
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7539905309677124,
      "learning_rate": 0.0003947148684393136,
      "loss": 2.9787,
      "step": 91644
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5376073122024536,
      "learning_rate": 0.0003947109870856312,
      "loss": 2.9431,
      "step": 91645
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5626018047332764,
      "learning_rate": 0.00039470710571434033,
      "loss": 3.1515,
      "step": 91646
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.263291358947754,
      "learning_rate": 0.0003947032243254415,
      "loss": 3.1067,
      "step": 91647
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8414803743362427,
      "learning_rate": 0.00039469934291893563,
      "loss": 3.1814,
      "step": 91648
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6621320247650146,
      "learning_rate": 0.00039469546149482334,
      "loss": 3.1706,
      "step": 91649
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6461037397384644,
      "learning_rate": 0.00039469158005310534,
      "loss": 3.186,
      "step": 91650
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5017331838607788,
      "learning_rate": 0.0003946876985937824,
      "loss": 2.9705,
      "step": 91651
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7867755889892578,
      "learning_rate": 0.00039468381711685535,
      "loss": 3.2354,
      "step": 91652
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6067705154418945,
      "learning_rate": 0.00039467993562232467,
      "loss": 3.2734,
      "step": 91653
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0335216522216797,
      "learning_rate": 0.0003946760541101912,
      "loss": 2.9034,
      "step": 91654
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.759459376335144,
      "learning_rate": 0.0003946721725804557,
      "loss": 3.0397,
      "step": 91655
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6950961351394653,
      "learning_rate": 0.00039466829103311887,
      "loss": 3.1495,
      "step": 91656
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0718576908111572,
      "learning_rate": 0.0003946644094681814,
      "loss": 2.8694,
      "step": 91657
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6574643850326538,
      "learning_rate": 0.00039466052788564407,
      "loss": 2.9897,
      "step": 91658
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.527442216873169,
      "learning_rate": 0.0003946566462855075,
      "loss": 2.9855,
      "step": 91659
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7771397829055786,
      "learning_rate": 0.00039465276466777246,
      "loss": 2.8805,
      "step": 91660
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9810469150543213,
      "learning_rate": 0.00039464888303243975,
      "loss": 2.9331,
      "step": 91661
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.2954227924346924,
      "learning_rate": 0.00039464500137951,
      "loss": 2.7927,
      "step": 91662
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.7866251468658447,
      "learning_rate": 0.00039464111970898393,
      "loss": 3.0526,
      "step": 91663
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.4656952619552612,
      "learning_rate": 0.00039463723802086233,
      "loss": 2.8667,
      "step": 91664
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.512285590171814,
      "learning_rate": 0.0003946333563151458,
      "loss": 3.165,
      "step": 91665
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.7076897621154785,
      "learning_rate": 0.00039462947459183525,
      "loss": 3.0597,
      "step": 91666
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.84416663646698,
      "learning_rate": 0.0003946255928509312,
      "loss": 3.3316,
      "step": 91667
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9694679975509644,
      "learning_rate": 0.0003946217110924346,
      "loss": 3.0021,
      "step": 91668
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.720316767692566,
      "learning_rate": 0.0003946178293163459,
      "loss": 3.1235,
      "step": 91669
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5801655054092407,
      "learning_rate": 0.0003946139475226661,
      "loss": 3.0263,
      "step": 91670
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0810325145721436,
      "learning_rate": 0.0003946100657113957,
      "loss": 2.9957,
      "step": 91671
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.632279396057129,
      "learning_rate": 0.00039460618388253543,
      "loss": 3.1007,
      "step": 91672
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.664747714996338,
      "learning_rate": 0.00039460230203608617,
      "loss": 3.0692,
      "step": 91673
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7745643854141235,
      "learning_rate": 0.00039459842017204854,
      "loss": 3.1188,
      "step": 91674
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.637459397315979,
      "learning_rate": 0.0003945945382904232,
      "loss": 2.8439,
      "step": 91675
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8695529699325562,
      "learning_rate": 0.00039459065639121113,
      "loss": 2.9242,
      "step": 91676
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6441832780838013,
      "learning_rate": 0.0003945867744744127,
      "loss": 2.9793,
      "step": 91677
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7079840898513794,
      "learning_rate": 0.00039458289254002885,
      "loss": 2.9657,
      "step": 91678
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.774538278579712,
      "learning_rate": 0.00039457901058806037,
      "loss": 2.9335,
      "step": 91679
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7229586839675903,
      "learning_rate": 0.0003945751286185077,
      "loss": 3.1136,
      "step": 91680
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0818068981170654,
      "learning_rate": 0.0003945712466313718,
      "loss": 2.9327,
      "step": 91681
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8758618831634521,
      "learning_rate": 0.0003945673646266533,
      "loss": 3.0963,
      "step": 91682
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6229742765426636,
      "learning_rate": 0.000394563482604353,
      "loss": 2.8879,
      "step": 91683
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7894361019134521,
      "learning_rate": 0.0003945596005644715,
      "loss": 2.8221,
      "step": 91684
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.203232765197754,
      "learning_rate": 0.0003945557185070097,
      "loss": 2.9276,
      "step": 91685
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9464741945266724,
      "learning_rate": 0.00039455183643196805,
      "loss": 3.0437,
      "step": 91686
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.872655987739563,
      "learning_rate": 0.0003945479543393475,
      "loss": 2.7892,
      "step": 91687
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.2770419120788574,
      "learning_rate": 0.0003945440722291487,
      "loss": 3.2042,
      "step": 91688
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1902506351470947,
      "learning_rate": 0.00039454019010137233,
      "loss": 2.7244,
      "step": 91689
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.68367338180542,
      "learning_rate": 0.0003945363079560192,
      "loss": 2.8879,
      "step": 91690
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.918439269065857,
      "learning_rate": 0.00039453242579309003,
      "loss": 2.931,
      "step": 91691
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.462506890296936,
      "learning_rate": 0.0003945285436125854,
      "loss": 2.9517,
      "step": 91692
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6549519300460815,
      "learning_rate": 0.0003945246614145062,
      "loss": 3.0973,
      "step": 91693
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6018494367599487,
      "learning_rate": 0.00039452077919885305,
      "loss": 3.0349,
      "step": 91694
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.357588052749634,
      "learning_rate": 0.0003945168969656267,
      "loss": 2.966,
      "step": 91695
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8443540334701538,
      "learning_rate": 0.00039451301471482786,
      "loss": 3.0215,
      "step": 91696
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5541778802871704,
      "learning_rate": 0.00039450913244645737,
      "loss": 3.1986,
      "step": 91697
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.4720137119293213,
      "learning_rate": 0.00039450525016051575,
      "loss": 3.132,
      "step": 91698
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7762385606765747,
      "learning_rate": 0.0003945013678570038,
      "loss": 2.9692,
      "step": 91699
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8337374925613403,
      "learning_rate": 0.00039449748553592234,
      "loss": 3.2131,
      "step": 91700
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8012710809707642,
      "learning_rate": 0.00039449360319727196,
      "loss": 3.2673,
      "step": 91701
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.4392402172088623,
      "learning_rate": 0.0003944897208410534,
      "loss": 3.1798,
      "step": 91702
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.301933526992798,
      "learning_rate": 0.00039448583846726754,
      "loss": 3.2055,
      "step": 91703
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.088412046432495,
      "learning_rate": 0.00039448195607591486,
      "loss": 2.9167,
      "step": 91704
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.450897693634033,
      "learning_rate": 0.00039447807366699625,
      "loss": 3.1524,
      "step": 91705
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.4271609783172607,
      "learning_rate": 0.00039447419124051243,
      "loss": 3.1427,
      "step": 91706
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1997735500335693,
      "learning_rate": 0.000394470308796464,
      "loss": 3.0838,
      "step": 91707
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1245412826538086,
      "learning_rate": 0.00039446642633485175,
      "loss": 2.9968,
      "step": 91708
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.072641611099243,
      "learning_rate": 0.00039446254385567647,
      "loss": 2.9146,
      "step": 91709
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7451659440994263,
      "learning_rate": 0.00039445866135893873,
      "loss": 3.1617,
      "step": 91710
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5870689153671265,
      "learning_rate": 0.0003944547788446394,
      "loss": 3.1803,
      "step": 91711
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5451525449752808,
      "learning_rate": 0.0003944508963127792,
      "loss": 2.8256,
      "step": 91712
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9026789665222168,
      "learning_rate": 0.0003944470137633587,
      "loss": 3.0323,
      "step": 91713
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.866370677947998,
      "learning_rate": 0.0003944431311963788,
      "loss": 3.0806,
      "step": 91714
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5712970495224,
      "learning_rate": 0.00039443924861184005,
      "loss": 3.1246,
      "step": 91715
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9851771593093872,
      "learning_rate": 0.00039443536600974335,
      "loss": 2.9961,
      "step": 91716
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6572489738464355,
      "learning_rate": 0.00039443148339008923,
      "loss": 2.7121,
      "step": 91717
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7529606819152832,
      "learning_rate": 0.0003944276007528786,
      "loss": 2.8552,
      "step": 91718
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.812246561050415,
      "learning_rate": 0.0003944237180981121,
      "loss": 2.7892,
      "step": 91719
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.4701380729675293,
      "learning_rate": 0.0003944198354257904,
      "loss": 3.1291,
      "step": 91720
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7393791675567627,
      "learning_rate": 0.00039441595273591426,
      "loss": 3.0907,
      "step": 91721
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8468730449676514,
      "learning_rate": 0.0003944120700284845,
      "loss": 2.9375,
      "step": 91722
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8758339881896973,
      "learning_rate": 0.00039440818730350166,
      "loss": 2.9682,
      "step": 91723
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.5550200939178467,
      "learning_rate": 0.00039440430456096656,
      "loss": 3.1286,
      "step": 91724
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7907438278198242,
      "learning_rate": 0.00039440042180087997,
      "loss": 2.9251,
      "step": 91725
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1079883575439453,
      "learning_rate": 0.0003943965390232426,
      "loss": 3.1017,
      "step": 91726
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8248738050460815,
      "learning_rate": 0.000394392656228055,
      "loss": 3.0473,
      "step": 91727
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6944226026535034,
      "learning_rate": 0.0003943887734153181,
      "loss": 2.9352,
      "step": 91728
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5556944608688354,
      "learning_rate": 0.0003943848905850325,
      "loss": 2.8981,
      "step": 91729
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1893022060394287,
      "learning_rate": 0.00039438100773719903,
      "loss": 3.1136,
      "step": 91730
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.997385025024414,
      "learning_rate": 0.00039437712487181835,
      "loss": 2.933,
      "step": 91731
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.619284152984619,
      "learning_rate": 0.0003943732419888911,
      "loss": 2.8329,
      "step": 91732
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.638967514038086,
      "learning_rate": 0.0003943693590884182,
      "loss": 3.2059,
      "step": 91733
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.687688946723938,
      "learning_rate": 0.0003943654761704002,
      "loss": 3.1963,
      "step": 91734
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.006028413772583,
      "learning_rate": 0.0003943615932348378,
      "loss": 3.0424,
      "step": 91735
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.936522364616394,
      "learning_rate": 0.0003943577102817319,
      "loss": 3.0117,
      "step": 91736
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.2403814792633057,
      "learning_rate": 0.00039435382731108315,
      "loss": 2.8245,
      "step": 91737
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.4535326957702637,
      "learning_rate": 0.00039434994432289213,
      "loss": 2.9501,
      "step": 91738
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.124666929244995,
      "learning_rate": 0.0003943460613171597,
      "loss": 2.8386,
      "step": 91739
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.509178876876831,
      "learning_rate": 0.00039434217829388665,
      "loss": 2.9155,
      "step": 91740
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.1370179653167725,
      "learning_rate": 0.00039433829525307355,
      "loss": 3.0716,
      "step": 91741
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.645738124847412,
      "learning_rate": 0.0003943344121947211,
      "loss": 3.1426,
      "step": 91742
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.602832317352295,
      "learning_rate": 0.00039433052911883027,
      "loss": 2.9437,
      "step": 91743
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.143218517303467,
      "learning_rate": 0.00039432664602540143,
      "loss": 2.8791,
      "step": 91744
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5949969291687012,
      "learning_rate": 0.00039432276291443563,
      "loss": 3.3119,
      "step": 91745
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6655855178833008,
      "learning_rate": 0.0003943188797859334,
      "loss": 2.9885,
      "step": 91746
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.0130679607391357,
      "learning_rate": 0.0003943149966398955,
      "loss": 3.0032,
      "step": 91747
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.233060836791992,
      "learning_rate": 0.0003943111134763226,
      "loss": 3.0421,
      "step": 91748
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5493438243865967,
      "learning_rate": 0.00039430723029521556,
      "loss": 2.817,
      "step": 91749
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.866197347640991,
      "learning_rate": 0.000394303347096575,
      "loss": 3.1193,
      "step": 91750
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5318437814712524,
      "learning_rate": 0.0003942994638804017,
      "loss": 3.0934,
      "step": 91751
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9239583015441895,
      "learning_rate": 0.0003942955806466964,
      "loss": 3.0224,
      "step": 91752
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.804246425628662,
      "learning_rate": 0.00039429169739545964,
      "loss": 2.9845,
      "step": 91753
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6465203762054443,
      "learning_rate": 0.00039428781412669233,
      "loss": 2.939,
      "step": 91754
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7441281080245972,
      "learning_rate": 0.00039428393084039515,
      "loss": 3.0773,
      "step": 91755
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7273542881011963,
      "learning_rate": 0.0003942800475365688,
      "loss": 3.0511,
      "step": 91756
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7803077697753906,
      "learning_rate": 0.00039427616421521395,
      "loss": 3.2358,
      "step": 91757
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6113812923431396,
      "learning_rate": 0.0003942722808763315,
      "loss": 3.1376,
      "step": 91758
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.3218302726745605,
      "learning_rate": 0.000394268397519922,
      "loss": 3.0287,
      "step": 91759
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5356056690216064,
      "learning_rate": 0.00039426451414598617,
      "loss": 2.9622,
      "step": 91760
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8881170749664307,
      "learning_rate": 0.0003942606307545249,
      "loss": 3.0618,
      "step": 91761
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.8331849575042725,
      "learning_rate": 0.00039425674734553875,
      "loss": 3.0219,
      "step": 91762
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.101747751235962,
      "learning_rate": 0.00039425286391902843,
      "loss": 3.1138,
      "step": 91763
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6867321729660034,
      "learning_rate": 0.00039424898047499485,
      "loss": 3.0277,
      "step": 91764
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6040502786636353,
      "learning_rate": 0.0003942450970134385,
      "loss": 2.8867,
      "step": 91765
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9591622352600098,
      "learning_rate": 0.00039424121353436015,
      "loss": 2.9985,
      "step": 91766
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.841146230697632,
      "learning_rate": 0.00039423733003776077,
      "loss": 2.874,
      "step": 91767
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.9744092226028442,
      "learning_rate": 0.0003942334465236407,
      "loss": 2.98,
      "step": 91768
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.730345368385315,
      "learning_rate": 0.0003942295629920009,
      "loss": 3.158,
      "step": 91769
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.3933863639831543,
      "learning_rate": 0.00039422567944284216,
      "loss": 3.3252,
      "step": 91770
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.7251267433166504,
      "learning_rate": 0.000394221795876165,
      "loss": 2.8603,
      "step": 91771
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.5371254682540894,
      "learning_rate": 0.0003942179122919702,
      "loss": 3.1501,
      "step": 91772
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6839690208435059,
      "learning_rate": 0.00039421402869025866,
      "loss": 2.8642,
      "step": 91773
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.6753969192504883,
      "learning_rate": 0.00039421014507103074,
      "loss": 3.0216,
      "step": 91774
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.192060708999634,
      "learning_rate": 0.0003942062614342875,
      "loss": 2.802,
      "step": 91775
    },
    {
      "epoch": 1.19,
      "grad_norm": 2.8155829906463623,
      "learning_rate": 0.0003942023777800295,
      "loss": 3.049,
      "step": 91776
    },
    {
      "epoch": 1.19,
      "grad_norm": 1.7152388095855713,
      "learning_rate": 0.0003941984941082575,
      "loss": 3.1246,
      "step": 91777
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.735763669013977,
      "learning_rate": 0.00039419461041897226,
      "loss": 3.2286,
      "step": 91778
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.4326958656311035,
      "learning_rate": 0.0003941907267121745,
      "loss": 2.8577,
      "step": 91779
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1000874042510986,
      "learning_rate": 0.0003941868429878648,
      "loss": 3.055,
      "step": 91780
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.795628309249878,
      "learning_rate": 0.00039418295924604406,
      "loss": 2.9678,
      "step": 91781
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9781962633132935,
      "learning_rate": 0.00039417907548671284,
      "loss": 2.8932,
      "step": 91782
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.004460096359253,
      "learning_rate": 0.00039417519170987207,
      "loss": 2.9219,
      "step": 91783
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.306882381439209,
      "learning_rate": 0.00039417130791552236,
      "loss": 3.0015,
      "step": 91784
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0107738971710205,
      "learning_rate": 0.0003941674241036643,
      "loss": 2.9072,
      "step": 91785
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.286860227584839,
      "learning_rate": 0.0003941635402742988,
      "loss": 3.0103,
      "step": 91786
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.658186912536621,
      "learning_rate": 0.0003941596564274265,
      "loss": 2.8691,
      "step": 91787
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7435435056686401,
      "learning_rate": 0.00039415577256304824,
      "loss": 3.206,
      "step": 91788
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.671701431274414,
      "learning_rate": 0.0003941518886811646,
      "loss": 2.7038,
      "step": 91789
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6460692882537842,
      "learning_rate": 0.0003941480047817763,
      "loss": 3.206,
      "step": 91790
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5675923824310303,
      "learning_rate": 0.0003941441208648841,
      "loss": 3.1881,
      "step": 91791
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.3886804580688477,
      "learning_rate": 0.0003941402369304888,
      "loss": 2.7556,
      "step": 91792
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.027397871017456,
      "learning_rate": 0.00039413635297859097,
      "loss": 3.0699,
      "step": 91793
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.426391839981079,
      "learning_rate": 0.0003941324690091915,
      "loss": 2.8826,
      "step": 91794
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.0713329315185547,
      "learning_rate": 0.00039412858502229096,
      "loss": 3.0223,
      "step": 91795
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.6123287677764893,
      "learning_rate": 0.00039412470101789015,
      "loss": 3.0922,
      "step": 91796
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8675616979599,
      "learning_rate": 0.00039412081699598977,
      "loss": 3.0924,
      "step": 91797
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.8156509399414062,
      "learning_rate": 0.0003941169329565907,
      "loss": 2.9057,
      "step": 91798
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6270354986190796,
      "learning_rate": 0.00039411304889969335,
      "loss": 2.8386,
      "step": 91799
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5455381870269775,
      "learning_rate": 0.00039410916482529866,
      "loss": 3.2247,
      "step": 91800
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.4002524614334106,
      "learning_rate": 0.00039410528073340734,
      "loss": 2.9703,
      "step": 91801
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9760059118270874,
      "learning_rate": 0.00039410139662402,
      "loss": 3.0763,
      "step": 91802
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1436195373535156,
      "learning_rate": 0.00039409751249713744,
      "loss": 3.4076,
      "step": 91803
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.3349831104278564,
      "learning_rate": 0.00039409362835276043,
      "loss": 3.0434,
      "step": 91804
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.6573898792266846,
      "learning_rate": 0.00039408974419088955,
      "loss": 3.0461,
      "step": 91805
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.919575810432434,
      "learning_rate": 0.00039408586001152566,
      "loss": 3.2262,
      "step": 91806
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.215456008911133,
      "learning_rate": 0.0003940819758146695,
      "loss": 2.8515,
      "step": 91807
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.054349899291992,
      "learning_rate": 0.00039407809160032164,
      "loss": 2.9916,
      "step": 91808
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2865335941314697,
      "learning_rate": 0.00039407420736848285,
      "loss": 3.1123,
      "step": 91809
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.895762324333191,
      "learning_rate": 0.00039407032311915394,
      "loss": 2.7853,
      "step": 91810
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.706874132156372,
      "learning_rate": 0.00039406643885233565,
      "loss": 3.097,
      "step": 91811
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8495007753372192,
      "learning_rate": 0.0003940625545680286,
      "loss": 2.7424,
      "step": 91812
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.656394124031067,
      "learning_rate": 0.00039405867026623344,
      "loss": 3.0406,
      "step": 91813
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.9168307781219482,
      "learning_rate": 0.00039405478594695115,
      "loss": 2.8856,
      "step": 91814
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.637945294380188,
      "learning_rate": 0.0003940509016101822,
      "loss": 2.9967,
      "step": 91815
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8693536520004272,
      "learning_rate": 0.0003940470172559274,
      "loss": 2.8704,
      "step": 91816
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.7691569328308105,
      "learning_rate": 0.00039404313288418755,
      "loss": 2.7665,
      "step": 91817
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.537747859954834,
      "learning_rate": 0.00039403924849496327,
      "loss": 2.9603,
      "step": 91818
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8202838897705078,
      "learning_rate": 0.0003940353640882553,
      "loss": 3.146,
      "step": 91819
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.614341974258423,
      "learning_rate": 0.0003940314796640644,
      "loss": 3.1152,
      "step": 91820
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.084456443786621,
      "learning_rate": 0.00039402759522239137,
      "loss": 2.7819,
      "step": 91821
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.33385968208313,
      "learning_rate": 0.0003940237107632367,
      "loss": 2.7813,
      "step": 91822
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8426834344863892,
      "learning_rate": 0.00039401982628660127,
      "loss": 2.9202,
      "step": 91823
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.3979334831237793,
      "learning_rate": 0.00039401594179248574,
      "loss": 3.1539,
      "step": 91824
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.050105333328247,
      "learning_rate": 0.00039401205728089103,
      "loss": 2.9314,
      "step": 91825
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.633158564567566,
      "learning_rate": 0.00039400817275181757,
      "loss": 2.9741,
      "step": 91826
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9736486673355103,
      "learning_rate": 0.00039400428820526623,
      "loss": 2.9946,
      "step": 91827
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.054642915725708,
      "learning_rate": 0.00039400040364123777,
      "loss": 2.9705,
      "step": 91828
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8957732915878296,
      "learning_rate": 0.00039399651905973283,
      "loss": 3.1122,
      "step": 91829
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.016782760620117,
      "learning_rate": 0.00039399263446075213,
      "loss": 3.0185,
      "step": 91830
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.341890811920166,
      "learning_rate": 0.00039398874984429653,
      "loss": 3.0942,
      "step": 91831
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8074169158935547,
      "learning_rate": 0.00039398486521036657,
      "loss": 3.1155,
      "step": 91832
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.798374652862549,
      "learning_rate": 0.000393980980558963,
      "loss": 2.9101,
      "step": 91833
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.234982490539551,
      "learning_rate": 0.0003939770958900867,
      "loss": 2.9549,
      "step": 91834
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0284924507141113,
      "learning_rate": 0.00039397321120373825,
      "loss": 3.0017,
      "step": 91835
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1919877529144287,
      "learning_rate": 0.0003939693264999184,
      "loss": 3.0559,
      "step": 91836
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.3517913818359375,
      "learning_rate": 0.00039396544177862787,
      "loss": 3.0786,
      "step": 91837
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.878530502319336,
      "learning_rate": 0.0003939615570398674,
      "loss": 3.0561,
      "step": 91838
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6713862419128418,
      "learning_rate": 0.0003939576722836377,
      "loss": 3.2833,
      "step": 91839
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.5423836708068848,
      "learning_rate": 0.0003939537875099395,
      "loss": 3.2378,
      "step": 91840
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.865352153778076,
      "learning_rate": 0.0003939499027187735,
      "loss": 3.099,
      "step": 91841
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.5866901874542236,
      "learning_rate": 0.00039394601791014033,
      "loss": 2.8618,
      "step": 91842
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.049544095993042,
      "learning_rate": 0.000393942133084041,
      "loss": 2.8499,
      "step": 91843
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.975452184677124,
      "learning_rate": 0.00039393824824047606,
      "loss": 2.8699,
      "step": 91844
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.6617038249969482,
      "learning_rate": 0.0003939343633794461,
      "loss": 2.9153,
      "step": 91845
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2758119106292725,
      "learning_rate": 0.000393930478500952,
      "loss": 2.8892,
      "step": 91846
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.996840000152588,
      "learning_rate": 0.0003939265936049945,
      "loss": 3.1107,
      "step": 91847
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5660128593444824,
      "learning_rate": 0.00039392270869157423,
      "loss": 2.9213,
      "step": 91848
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8433020114898682,
      "learning_rate": 0.0003939188237606919,
      "loss": 3.2196,
      "step": 91849
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.735681414604187,
      "learning_rate": 0.0003939149388123485,
      "loss": 3.1,
      "step": 91850
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.5079092979431152,
      "learning_rate": 0.0003939110538465444,
      "loss": 3.0863,
      "step": 91851
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2296295166015625,
      "learning_rate": 0.00039390716886328033,
      "loss": 3.078,
      "step": 91852
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6715707778930664,
      "learning_rate": 0.00039390328386255735,
      "loss": 3.1785,
      "step": 91853
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6441559791564941,
      "learning_rate": 0.00039389939884437595,
      "loss": 3.0732,
      "step": 91854
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.164109945297241,
      "learning_rate": 0.00039389551380873683,
      "loss": 2.9022,
      "step": 91855
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.849966526031494,
      "learning_rate": 0.00039389162875564076,
      "loss": 3.0628,
      "step": 91856
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6851218938827515,
      "learning_rate": 0.00039388774368508844,
      "loss": 3.1496,
      "step": 91857
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9823850393295288,
      "learning_rate": 0.0003938838585970807,
      "loss": 3.3244,
      "step": 91858
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7724146842956543,
      "learning_rate": 0.00039387997349161813,
      "loss": 3.0648,
      "step": 91859
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.60335373878479,
      "learning_rate": 0.0003938760883687015,
      "loss": 3.1683,
      "step": 91860
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7389235496520996,
      "learning_rate": 0.0003938722032283315,
      "loss": 2.964,
      "step": 91861
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.10153865814209,
      "learning_rate": 0.0003938683180705089,
      "loss": 2.9453,
      "step": 91862
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.53277587890625,
      "learning_rate": 0.0003938644328952344,
      "loss": 3.1847,
      "step": 91863
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6425138711929321,
      "learning_rate": 0.0003938605477025088,
      "loss": 3.1697,
      "step": 91864
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1632678508758545,
      "learning_rate": 0.00039385666249233274,
      "loss": 2.9549,
      "step": 91865
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.560614824295044,
      "learning_rate": 0.0003938527772647069,
      "loss": 3.1114,
      "step": 91866
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5841935873031616,
      "learning_rate": 0.000393848892019632,
      "loss": 2.9175,
      "step": 91867
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.929360866546631,
      "learning_rate": 0.000393845006757109,
      "loss": 3.043,
      "step": 91868
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6758226156234741,
      "learning_rate": 0.0003938411214771383,
      "loss": 2.8126,
      "step": 91869
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.48928165435791,
      "learning_rate": 0.00039383723617972076,
      "loss": 3.0308,
      "step": 91870
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7052581310272217,
      "learning_rate": 0.0003938333508648572,
      "loss": 3.3277,
      "step": 91871
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8860265016555786,
      "learning_rate": 0.0003938294655325482,
      "loss": 3.0832,
      "step": 91872
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.086061716079712,
      "learning_rate": 0.00039382558018279454,
      "loss": 3.3191,
      "step": 91873
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9632970094680786,
      "learning_rate": 0.00039382169481559697,
      "loss": 2.8765,
      "step": 91874
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7930306196212769,
      "learning_rate": 0.00039381780943095605,
      "loss": 3.0072,
      "step": 91875
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.4644111394882202,
      "learning_rate": 0.00039381392402887275,
      "loss": 2.9051,
      "step": 91876
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1165196895599365,
      "learning_rate": 0.0003938100386093476,
      "loss": 3.0875,
      "step": 91877
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7015113830566406,
      "learning_rate": 0.0003938061531723814,
      "loss": 3.2988,
      "step": 91878
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6143475770950317,
      "learning_rate": 0.0003938022677179749,
      "loss": 2.839,
      "step": 91879
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.256319761276245,
      "learning_rate": 0.00039379838224612885,
      "loss": 2.9986,
      "step": 91880
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.5006470680236816,
      "learning_rate": 0.00039379449675684377,
      "loss": 3.1893,
      "step": 91881
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0080227851867676,
      "learning_rate": 0.0003937906112501206,
      "loss": 3.0371,
      "step": 91882
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6675689220428467,
      "learning_rate": 0.00039378672572596,
      "loss": 2.985,
      "step": 91883
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.262467861175537,
      "learning_rate": 0.00039378284018436264,
      "loss": 2.9495,
      "step": 91884
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.071765661239624,
      "learning_rate": 0.00039377895462532926,
      "loss": 2.9273,
      "step": 91885
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7729243040084839,
      "learning_rate": 0.00039377506904886067,
      "loss": 2.8603,
      "step": 91886
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.4839720726013184,
      "learning_rate": 0.00039377118345495745,
      "loss": 2.8836,
      "step": 91887
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0759551525115967,
      "learning_rate": 0.0003937672978436204,
      "loss": 2.989,
      "step": 91888
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.6062045097351074,
      "learning_rate": 0.0003937634122148503,
      "loss": 2.8693,
      "step": 91889
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5379606485366821,
      "learning_rate": 0.00039375952656864777,
      "loss": 2.8165,
      "step": 91890
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1532785892486572,
      "learning_rate": 0.0003937556409050136,
      "loss": 2.8972,
      "step": 91891
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.176758050918579,
      "learning_rate": 0.00039375175522394856,
      "loss": 3.0521,
      "step": 91892
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.692606210708618,
      "learning_rate": 0.00039374786952545317,
      "loss": 3.2791,
      "step": 91893
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.803428053855896,
      "learning_rate": 0.0003937439838095283,
      "loss": 2.8957,
      "step": 91894
    },
    {
      "epoch": 1.2,
      "grad_norm": 4.8387956619262695,
      "learning_rate": 0.0003937400980761747,
      "loss": 2.7361,
      "step": 91895
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.6734163761138916,
      "learning_rate": 0.000393736212325393,
      "loss": 2.8697,
      "step": 91896
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.654702067375183,
      "learning_rate": 0.000393732326557184,
      "loss": 3.1954,
      "step": 91897
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.552160620689392,
      "learning_rate": 0.00039372844077154836,
      "loss": 2.8074,
      "step": 91898
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.882662296295166,
      "learning_rate": 0.0003937245549684869,
      "loss": 2.8922,
      "step": 91899
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2429757118225098,
      "learning_rate": 0.00039372066914800027,
      "loss": 2.8483,
      "step": 91900
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0951409339904785,
      "learning_rate": 0.00039371678331008906,
      "loss": 3.0744,
      "step": 91901
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.039402723312378,
      "learning_rate": 0.0003937128974547543,
      "loss": 2.8723,
      "step": 91902
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6455485820770264,
      "learning_rate": 0.00039370901158199644,
      "loss": 2.7941,
      "step": 91903
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7674930095672607,
      "learning_rate": 0.0003937051256918164,
      "loss": 3.1772,
      "step": 91904
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.317654848098755,
      "learning_rate": 0.00039370123978421466,
      "loss": 3.0089,
      "step": 91905
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7247132062911987,
      "learning_rate": 0.00039369735385919225,
      "loss": 3.1324,
      "step": 91906
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.828842043876648,
      "learning_rate": 0.0003936934679167496,
      "loss": 3.0153,
      "step": 91907
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5305830240249634,
      "learning_rate": 0.00039368958195688765,
      "loss": 3.1778,
      "step": 91908
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.4069517850875854,
      "learning_rate": 0.000393685695979607,
      "loss": 3.0558,
      "step": 91909
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5852230787277222,
      "learning_rate": 0.00039368180998490846,
      "loss": 3.1523,
      "step": 91910
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.5181448459625244,
      "learning_rate": 0.0003936779239727927,
      "loss": 2.967,
      "step": 91911
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1711809635162354,
      "learning_rate": 0.0003936740379432604,
      "loss": 2.87,
      "step": 91912
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6860010623931885,
      "learning_rate": 0.00039367015189631236,
      "loss": 3.0356,
      "step": 91913
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.7968695163726807,
      "learning_rate": 0.0003936662658319492,
      "loss": 2.9172,
      "step": 91914
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.7856192588806152,
      "learning_rate": 0.00039366237975017176,
      "loss": 3.0826,
      "step": 91915
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5266444683074951,
      "learning_rate": 0.00039365849365098073,
      "loss": 3.2248,
      "step": 91916
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.357128620147705,
      "learning_rate": 0.0003936546075343769,
      "loss": 3.0808,
      "step": 91917
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9297738075256348,
      "learning_rate": 0.0003936507214003608,
      "loss": 3.1706,
      "step": 91918
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.245542287826538,
      "learning_rate": 0.0003936468352489332,
      "loss": 3.0904,
      "step": 91919
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.7508513927459717,
      "learning_rate": 0.00039364294908009504,
      "loss": 3.0778,
      "step": 91920
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.064417839050293,
      "learning_rate": 0.00039363906289384686,
      "loss": 3.0105,
      "step": 91921
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6207762956619263,
      "learning_rate": 0.0003936351766901893,
      "loss": 3.4051,
      "step": 91922
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.0542523860931396,
      "learning_rate": 0.0003936312904691233,
      "loss": 2.915,
      "step": 91923
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.6754884719848633,
      "learning_rate": 0.00039362740423064946,
      "loss": 3.1417,
      "step": 91924
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.01790452003479,
      "learning_rate": 0.0003936235179747685,
      "loss": 3.0912,
      "step": 91925
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8957455158233643,
      "learning_rate": 0.00039361963170148116,
      "loss": 3.0666,
      "step": 91926
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.0167953968048096,
      "learning_rate": 0.00039361574541078815,
      "loss": 2.9803,
      "step": 91927
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.3724119663238525,
      "learning_rate": 0.0003936118591026902,
      "loss": 3.072,
      "step": 91928
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8075928688049316,
      "learning_rate": 0.0003936079727771881,
      "loss": 3.0151,
      "step": 91929
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.717930316925049,
      "learning_rate": 0.00039360408643428246,
      "loss": 3.2202,
      "step": 91930
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0736122131347656,
      "learning_rate": 0.000393600200073974,
      "loss": 3.0165,
      "step": 91931
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5209579467773438,
      "learning_rate": 0.00039359631369626364,
      "loss": 3.1897,
      "step": 91932
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.5486702919006348,
      "learning_rate": 0.0003935924273011519,
      "loss": 2.9277,
      "step": 91933
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.91126549243927,
      "learning_rate": 0.00039358854088863943,
      "loss": 2.8982,
      "step": 91934
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6474478244781494,
      "learning_rate": 0.0003935846544587273,
      "loss": 3.1199,
      "step": 91935
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7128045558929443,
      "learning_rate": 0.00039358076801141584,
      "loss": 2.8941,
      "step": 91936
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6196951866149902,
      "learning_rate": 0.000393576881546706,
      "loss": 3.1502,
      "step": 91937
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.020878553390503,
      "learning_rate": 0.0003935729950645985,
      "loss": 3.2207,
      "step": 91938
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7871180772781372,
      "learning_rate": 0.00039356910856509395,
      "loss": 3.0196,
      "step": 91939
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8780202865600586,
      "learning_rate": 0.00039356522204819316,
      "loss": 3.124,
      "step": 91940
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9511524438858032,
      "learning_rate": 0.00039356133551389684,
      "loss": 2.9899,
      "step": 91941
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.64354407787323,
      "learning_rate": 0.00039355744896220574,
      "loss": 3.1224,
      "step": 91942
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.706568717956543,
      "learning_rate": 0.00039355356239312045,
      "loss": 2.9324,
      "step": 91943
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6553797721862793,
      "learning_rate": 0.0003935496758066419,
      "loss": 3.1805,
      "step": 91944
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.468329668045044,
      "learning_rate": 0.0003935457892027706,
      "loss": 3.2768,
      "step": 91945
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.3289791345596313,
      "learning_rate": 0.0003935419025815074,
      "loss": 2.868,
      "step": 91946
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1414718627929688,
      "learning_rate": 0.0003935380159428531,
      "loss": 3.2499,
      "step": 91947
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.4670706987380981,
      "learning_rate": 0.0003935341292868082,
      "loss": 2.9803,
      "step": 91948
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7138255834579468,
      "learning_rate": 0.0003935302426133735,
      "loss": 3.2441,
      "step": 91949
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8671144247055054,
      "learning_rate": 0.00039352635592254984,
      "loss": 2.9319,
      "step": 91950
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.232792854309082,
      "learning_rate": 0.00039352246921433784,
      "loss": 3.1028,
      "step": 91951
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.883528232574463,
      "learning_rate": 0.00039351858248873827,
      "loss": 3.0056,
      "step": 91952
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.245591640472412,
      "learning_rate": 0.00039351469574575184,
      "loss": 2.7917,
      "step": 91953
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9381599426269531,
      "learning_rate": 0.0003935108089853792,
      "loss": 2.8136,
      "step": 91954
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1835391521453857,
      "learning_rate": 0.00039350692220762115,
      "loss": 3.2371,
      "step": 91955
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.623276710510254,
      "learning_rate": 0.0003935030354124785,
      "loss": 3.0765,
      "step": 91956
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.083843469619751,
      "learning_rate": 0.0003934991485999518,
      "loss": 3.0558,
      "step": 91957
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.1050262451171875,
      "learning_rate": 0.0003934952617700418,
      "loss": 2.8723,
      "step": 91958
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.341779947280884,
      "learning_rate": 0.00039349137492274936,
      "loss": 3.0842,
      "step": 91959
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6122252941131592,
      "learning_rate": 0.000393487488058075,
      "loss": 3.0737,
      "step": 91960
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.092432975769043,
      "learning_rate": 0.0003934836011760196,
      "loss": 2.8181,
      "step": 91961
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.0371510982513428,
      "learning_rate": 0.00039347971427658386,
      "loss": 2.9115,
      "step": 91962
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6618300676345825,
      "learning_rate": 0.00039347582735976843,
      "loss": 2.9627,
      "step": 91963
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9596227407455444,
      "learning_rate": 0.00039347194042557406,
      "loss": 3.0474,
      "step": 91964
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8179049491882324,
      "learning_rate": 0.0003934680534740016,
      "loss": 3.0415,
      "step": 91965
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7754950523376465,
      "learning_rate": 0.00039346416650505166,
      "loss": 3.2321,
      "step": 91966
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.603428363800049,
      "learning_rate": 0.00039346027951872483,
      "loss": 2.9589,
      "step": 91967
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6390817165374756,
      "learning_rate": 0.00039345639251502215,
      "loss": 3.0356,
      "step": 91968
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7734153270721436,
      "learning_rate": 0.000393452505493944,
      "loss": 3.0362,
      "step": 91969
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6704961061477661,
      "learning_rate": 0.00039344861845549126,
      "loss": 2.9744,
      "step": 91970
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8305144309997559,
      "learning_rate": 0.0003934447313996648,
      "loss": 2.9829,
      "step": 91971
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.506446361541748,
      "learning_rate": 0.0003934408443264651,
      "loss": 3.0445,
      "step": 91972
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6399506330490112,
      "learning_rate": 0.000393436957235893,
      "loss": 3.2404,
      "step": 91973
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.704901933670044,
      "learning_rate": 0.0003934330701279493,
      "loss": 3.0814,
      "step": 91974
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7942882776260376,
      "learning_rate": 0.00039342918300263447,
      "loss": 2.8736,
      "step": 91975
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5750627517700195,
      "learning_rate": 0.0003934252958599494,
      "loss": 2.8415,
      "step": 91976
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.852944254875183,
      "learning_rate": 0.0003934214086998949,
      "loss": 2.7034,
      "step": 91977
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7529487609863281,
      "learning_rate": 0.0003934175215224716,
      "loss": 2.8304,
      "step": 91978
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.325779438018799,
      "learning_rate": 0.0003934136343276801,
      "loss": 3.1016,
      "step": 91979
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7087548971176147,
      "learning_rate": 0.00039340974711552147,
      "loss": 2.8894,
      "step": 91980
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9319442510604858,
      "learning_rate": 0.000393405859885996,
      "loss": 2.9813,
      "step": 91981
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8137295246124268,
      "learning_rate": 0.00039340197263910466,
      "loss": 2.9473,
      "step": 91982
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6835218667984009,
      "learning_rate": 0.00039339808537484813,
      "loss": 3.157,
      "step": 91983
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6398745775222778,
      "learning_rate": 0.0003933941980932272,
      "loss": 3.2036,
      "step": 91984
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5848350524902344,
      "learning_rate": 0.0003933903107942424,
      "loss": 3.0011,
      "step": 91985
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7217142581939697,
      "learning_rate": 0.00039338642347789464,
      "loss": 2.9057,
      "step": 91986
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5260491371154785,
      "learning_rate": 0.00039338253614418467,
      "loss": 2.9103,
      "step": 91987
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8418203592300415,
      "learning_rate": 0.00039337864879311304,
      "loss": 3.0325,
      "step": 91988
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7731070518493652,
      "learning_rate": 0.0003933747614246805,
      "loss": 2.9684,
      "step": 91989
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.4828211069107056,
      "learning_rate": 0.00039337087403888785,
      "loss": 3.175,
      "step": 91990
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.314919948577881,
      "learning_rate": 0.00039336698663573587,
      "loss": 2.7878,
      "step": 91991
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.465884208679199,
      "learning_rate": 0.0003933630992152252,
      "loss": 2.9923,
      "step": 91992
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.74543297290802,
      "learning_rate": 0.00039335921177735653,
      "loss": 3.0808,
      "step": 91993
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.7026169300079346,
      "learning_rate": 0.00039335532432213064,
      "loss": 2.8584,
      "step": 91994
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.590906023979187,
      "learning_rate": 0.00039335143684954824,
      "loss": 3.0821,
      "step": 91995
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7864004373550415,
      "learning_rate": 0.00039334754935961,
      "loss": 2.9731,
      "step": 91996
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.533586025238037,
      "learning_rate": 0.00039334366185231665,
      "loss": 2.9987,
      "step": 91997
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8758244514465332,
      "learning_rate": 0.00039333977432766913,
      "loss": 2.8882,
      "step": 91998
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.42987322807312,
      "learning_rate": 0.0003933358867856678,
      "loss": 3.057,
      "step": 91999
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6362823247909546,
      "learning_rate": 0.00039333199922631364,
      "loss": 2.954,
      "step": 92000
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9868884086608887,
      "learning_rate": 0.0003933281116496073,
      "loss": 3.076,
      "step": 92001
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.969933271408081,
      "learning_rate": 0.0003933242240555496,
      "loss": 2.9258,
      "step": 92002
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8317666053771973,
      "learning_rate": 0.00039332033644414095,
      "loss": 2.932,
      "step": 92003
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0569796562194824,
      "learning_rate": 0.0003933164488153824,
      "loss": 2.7536,
      "step": 92004
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.778141736984253,
      "learning_rate": 0.0003933125611692746,
      "loss": 2.9023,
      "step": 92005
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.483008623123169,
      "learning_rate": 0.0003933086735058182,
      "loss": 2.9716,
      "step": 92006
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0867080688476562,
      "learning_rate": 0.0003933047858250139,
      "loss": 3.1044,
      "step": 92007
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8793785572052002,
      "learning_rate": 0.00039330089812686256,
      "loss": 2.9226,
      "step": 92008
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.295900821685791,
      "learning_rate": 0.0003932970104113648,
      "loss": 3.0574,
      "step": 92009
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.2542810440063477,
      "learning_rate": 0.00039329312267852133,
      "loss": 3.023,
      "step": 92010
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.742234468460083,
      "learning_rate": 0.000393289234928333,
      "loss": 3.0625,
      "step": 92011
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8667405843734741,
      "learning_rate": 0.0003932853471608004,
      "loss": 3.1153,
      "step": 92012
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9077728986740112,
      "learning_rate": 0.00039328145937592417,
      "loss": 3.0338,
      "step": 92013
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7946940660476685,
      "learning_rate": 0.0003932775715737053,
      "loss": 2.5658,
      "step": 92014
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6158416271209717,
      "learning_rate": 0.00039327368375414437,
      "loss": 3.0656,
      "step": 92015
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.3427672386169434,
      "learning_rate": 0.000393269795917242,
      "loss": 3.171,
      "step": 92016
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2346456050872803,
      "learning_rate": 0.0003932659080629991,
      "loss": 2.8949,
      "step": 92017
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9773081541061401,
      "learning_rate": 0.00039326202019141626,
      "loss": 3.0686,
      "step": 92018
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6869227886199951,
      "learning_rate": 0.0003932581323024942,
      "loss": 3.2234,
      "step": 92019
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.394374370574951,
      "learning_rate": 0.00039325424439623383,
      "loss": 2.848,
      "step": 92020
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9257357120513916,
      "learning_rate": 0.0003932503564726357,
      "loss": 3.0481,
      "step": 92021
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8275046348571777,
      "learning_rate": 0.0003932464685317005,
      "loss": 2.8808,
      "step": 92022
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6761889457702637,
      "learning_rate": 0.00039324258057342904,
      "loss": 2.7117,
      "step": 92023
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6494078636169434,
      "learning_rate": 0.00039323869259782205,
      "loss": 3.0397,
      "step": 92024
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.9100427627563477,
      "learning_rate": 0.00039323480460488024,
      "loss": 2.7737,
      "step": 92025
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.987949013710022,
      "learning_rate": 0.0003932309165946043,
      "loss": 3.1267,
      "step": 92026
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8496917486190796,
      "learning_rate": 0.0003932270285669949,
      "loss": 3.2044,
      "step": 92027
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8573721647262573,
      "learning_rate": 0.0003932231405220529,
      "loss": 2.8271,
      "step": 92028
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0391266345977783,
      "learning_rate": 0.00039321925245977904,
      "loss": 3.0453,
      "step": 92029
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9369096755981445,
      "learning_rate": 0.0003932153643801738,
      "loss": 3.1577,
      "step": 92030
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.887621521949768,
      "learning_rate": 0.0003932114762832382,
      "loss": 3.0669,
      "step": 92031
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9994897842407227,
      "learning_rate": 0.0003932075881689728,
      "loss": 3.1334,
      "step": 92032
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.913621425628662,
      "learning_rate": 0.00039320370003737833,
      "loss": 2.9927,
      "step": 92033
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.820608139038086,
      "learning_rate": 0.0003931998118884555,
      "loss": 3.2703,
      "step": 92034
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8064454793930054,
      "learning_rate": 0.0003931959237222051,
      "loss": 3.1794,
      "step": 92035
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6111441850662231,
      "learning_rate": 0.0003931920355386278,
      "loss": 3.1025,
      "step": 92036
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8949775695800781,
      "learning_rate": 0.00039318814733772437,
      "loss": 2.6939,
      "step": 92037
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.219116449356079,
      "learning_rate": 0.00039318425911949554,
      "loss": 3.0108,
      "step": 92038
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.343737840652466,
      "learning_rate": 0.0003931803708839419,
      "loss": 3.0976,
      "step": 92039
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6573200225830078,
      "learning_rate": 0.0003931764826310643,
      "loss": 3.1387,
      "step": 92040
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.5173308849334717,
      "learning_rate": 0.0003931725943608635,
      "loss": 2.8447,
      "step": 92041
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8302757740020752,
      "learning_rate": 0.0003931687060733401,
      "loss": 3.1768,
      "step": 92042
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.3264737129211426,
      "learning_rate": 0.00039316481776849484,
      "loss": 3.1425,
      "step": 92043
    },
    {
      "epoch": 1.2,
      "grad_norm": 4.082342147827148,
      "learning_rate": 0.00039316092944632853,
      "loss": 2.9976,
      "step": 92044
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.9365384578704834,
      "learning_rate": 0.0003931570411068419,
      "loss": 3.0994,
      "step": 92045
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.5766232013702393,
      "learning_rate": 0.0003931531527500355,
      "loss": 3.0958,
      "step": 92046
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.2274537086486816,
      "learning_rate": 0.00039314926437591024,
      "loss": 2.8628,
      "step": 92047
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.313983201980591,
      "learning_rate": 0.0003931453759844668,
      "loss": 3.1565,
      "step": 92048
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7228260040283203,
      "learning_rate": 0.0003931414875757058,
      "loss": 2.8557,
      "step": 92049
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.863724946975708,
      "learning_rate": 0.0003931375991496281,
      "loss": 2.9659,
      "step": 92050
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.212646007537842,
      "learning_rate": 0.0003931337107062343,
      "loss": 3.0005,
      "step": 92051
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0387089252471924,
      "learning_rate": 0.00039312982224552523,
      "loss": 3.1054,
      "step": 92052
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8029979467391968,
      "learning_rate": 0.0003931259337675015,
      "loss": 3.139,
      "step": 92053
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.138655424118042,
      "learning_rate": 0.0003931220452721641,
      "loss": 2.8273,
      "step": 92054
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.189667224884033,
      "learning_rate": 0.00039311815675951333,
      "loss": 3.1731,
      "step": 92055
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0156943798065186,
      "learning_rate": 0.00039311426822955024,
      "loss": 3.0406,
      "step": 92056
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5711777210235596,
      "learning_rate": 0.00039311037968227543,
      "loss": 3.1119,
      "step": 92057
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9872311353683472,
      "learning_rate": 0.00039310649111768965,
      "loss": 2.7148,
      "step": 92058
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8438351154327393,
      "learning_rate": 0.00039310260253579366,
      "loss": 3.0647,
      "step": 92059
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8406792879104614,
      "learning_rate": 0.00039309871393658805,
      "loss": 2.7389,
      "step": 92060
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.855358600616455,
      "learning_rate": 0.0003930948253200737,
      "loss": 3.1109,
      "step": 92061
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6649984121322632,
      "learning_rate": 0.0003930909366862512,
      "loss": 2.8177,
      "step": 92062
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.844443678855896,
      "learning_rate": 0.0003930870480351214,
      "loss": 3.0321,
      "step": 92063
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5831629037857056,
      "learning_rate": 0.0003930831593666849,
      "loss": 3.2168,
      "step": 92064
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.087897777557373,
      "learning_rate": 0.00039307927068094257,
      "loss": 2.802,
      "step": 92065
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6472750902175903,
      "learning_rate": 0.000393075381977895,
      "loss": 2.7693,
      "step": 92066
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8512086868286133,
      "learning_rate": 0.00039307149325754293,
      "loss": 3.0046,
      "step": 92067
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7311855554580688,
      "learning_rate": 0.0003930676045198871,
      "loss": 2.8691,
      "step": 92068
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8320670127868652,
      "learning_rate": 0.00039306371576492837,
      "loss": 2.9375,
      "step": 92069
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5111734867095947,
      "learning_rate": 0.00039305982699266724,
      "loss": 3.0278,
      "step": 92070
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.872008204460144,
      "learning_rate": 0.00039305593820310446,
      "loss": 3.1419,
      "step": 92071
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7282249927520752,
      "learning_rate": 0.000393052049396241,
      "loss": 3.0267,
      "step": 92072
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1954126358032227,
      "learning_rate": 0.0003930481605720773,
      "loss": 2.8912,
      "step": 92073
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7988756895065308,
      "learning_rate": 0.00039304427173061414,
      "loss": 3.1045,
      "step": 92074
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2169089317321777,
      "learning_rate": 0.0003930403828718525,
      "loss": 2.9089,
      "step": 92075
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.722447633743286,
      "learning_rate": 0.0003930364939957927,
      "loss": 2.9495,
      "step": 92076
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.3015530109405518,
      "learning_rate": 0.0003930326051024357,
      "loss": 2.87,
      "step": 92077
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5802587270736694,
      "learning_rate": 0.00039302871619178217,
      "loss": 2.8395,
      "step": 92078
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5372146368026733,
      "learning_rate": 0.0003930248272638329,
      "loss": 3.0694,
      "step": 92079
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0582685470581055,
      "learning_rate": 0.0003930209383185885,
      "loss": 2.8879,
      "step": 92080
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.635850667953491,
      "learning_rate": 0.00039301704935604975,
      "loss": 3.0537,
      "step": 92081
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.857908010482788,
      "learning_rate": 0.00039301316037621746,
      "loss": 2.9987,
      "step": 92082
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0594587326049805,
      "learning_rate": 0.00039300927137909226,
      "loss": 3.1285,
      "step": 92083
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.3422694206237793,
      "learning_rate": 0.00039300538236467474,
      "loss": 2.893,
      "step": 92084
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.4160850048065186,
      "learning_rate": 0.00039300149333296587,
      "loss": 3.0377,
      "step": 92085
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.010313034057617,
      "learning_rate": 0.0003929976042839663,
      "loss": 3.0569,
      "step": 92086
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9012142419815063,
      "learning_rate": 0.0003929937152176767,
      "loss": 2.8145,
      "step": 92087
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8058714866638184,
      "learning_rate": 0.0003929898261340977,
      "loss": 2.6655,
      "step": 92088
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.667455792427063,
      "learning_rate": 0.00039298593703323024,
      "loss": 2.888,
      "step": 92089
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.525125503540039,
      "learning_rate": 0.00039298204791507496,
      "loss": 3.0534,
      "step": 92090
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.299299955368042,
      "learning_rate": 0.00039297815877963255,
      "loss": 3.0631,
      "step": 92091
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.678838849067688,
      "learning_rate": 0.0003929742696269037,
      "loss": 3.0701,
      "step": 92092
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.643867015838623,
      "learning_rate": 0.0003929703804568892,
      "loss": 3.2972,
      "step": 92093
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.243621826171875,
      "learning_rate": 0.00039296649126958975,
      "loss": 3.1277,
      "step": 92094
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.6088411808013916,
      "learning_rate": 0.00039296260206500604,
      "loss": 2.8211,
      "step": 92095
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5492377281188965,
      "learning_rate": 0.00039295871284313896,
      "loss": 2.9497,
      "step": 92096
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.633095622062683,
      "learning_rate": 0.00039295482360398893,
      "loss": 3.2271,
      "step": 92097
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6040971279144287,
      "learning_rate": 0.00039295093434755694,
      "loss": 2.9684,
      "step": 92098
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1309616565704346,
      "learning_rate": 0.00039294704507384363,
      "loss": 2.9106,
      "step": 92099
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8730989694595337,
      "learning_rate": 0.0003929431557828497,
      "loss": 2.9457,
      "step": 92100
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2722432613372803,
      "learning_rate": 0.0003929392664745758,
      "loss": 2.9196,
      "step": 92101
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6300495862960815,
      "learning_rate": 0.00039293537714902286,
      "loss": 3.079,
      "step": 92102
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1868443489074707,
      "learning_rate": 0.0003929314878061914,
      "loss": 2.961,
      "step": 92103
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.8587090969085693,
      "learning_rate": 0.0003929275984460822,
      "loss": 2.9439,
      "step": 92104
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7842072248458862,
      "learning_rate": 0.00039292370906869606,
      "loss": 2.9337,
      "step": 92105
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8475685119628906,
      "learning_rate": 0.00039291981967403365,
      "loss": 3.0834,
      "step": 92106
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5731801986694336,
      "learning_rate": 0.00039291593026209563,
      "loss": 3.0354,
      "step": 92107
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.278006076812744,
      "learning_rate": 0.0003929120408328829,
      "loss": 2.7261,
      "step": 92108
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6229950189590454,
      "learning_rate": 0.0003929081513863959,
      "loss": 3.0828,
      "step": 92109
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.160304546356201,
      "learning_rate": 0.0003929042619226356,
      "loss": 2.9185,
      "step": 92110
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1482932567596436,
      "learning_rate": 0.00039290037244160274,
      "loss": 2.9893,
      "step": 92111
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8872997760772705,
      "learning_rate": 0.0003928964829432979,
      "loss": 3.0361,
      "step": 92112
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.563373327255249,
      "learning_rate": 0.0003928925934277217,
      "loss": 3.005,
      "step": 92113
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.50032639503479,
      "learning_rate": 0.00039288870389487526,
      "loss": 3.2025,
      "step": 92114
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8424220085144043,
      "learning_rate": 0.00039288481434475886,
      "loss": 2.75,
      "step": 92115
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7640210390090942,
      "learning_rate": 0.00039288092477737346,
      "loss": 3.2058,
      "step": 92116
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.374476671218872,
      "learning_rate": 0.0003928770351927198,
      "loss": 3.0619,
      "step": 92117
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8774856328964233,
      "learning_rate": 0.0003928731455907985,
      "loss": 3.0582,
      "step": 92118
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9033966064453125,
      "learning_rate": 0.00039286925597161034,
      "loss": 2.9239,
      "step": 92119
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.186086416244507,
      "learning_rate": 0.00039286536633515595,
      "loss": 3.1909,
      "step": 92120
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0406534671783447,
      "learning_rate": 0.00039286147668143634,
      "loss": 2.9415,
      "step": 92121
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.4781217575073242,
      "learning_rate": 0.0003928575870104519,
      "loss": 2.7839,
      "step": 92122
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.501692295074463,
      "learning_rate": 0.0003928536973222035,
      "loss": 3.1734,
      "step": 92123
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9071968793869019,
      "learning_rate": 0.00039284980761669186,
      "loss": 2.8548,
      "step": 92124
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.6746888160705566,
      "learning_rate": 0.00039284591789391757,
      "loss": 2.8424,
      "step": 92125
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.899970531463623,
      "learning_rate": 0.0003928420281538816,
      "loss": 2.9325,
      "step": 92126
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.6818861961364746,
      "learning_rate": 0.00039283813839658457,
      "loss": 2.8693,
      "step": 92127
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8787362575531006,
      "learning_rate": 0.00039283424862202705,
      "loss": 3.0188,
      "step": 92128
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.183527708053589,
      "learning_rate": 0.00039283035883020995,
      "loss": 3.1264,
      "step": 92129
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7969392538070679,
      "learning_rate": 0.000392826469021134,
      "loss": 3.1395,
      "step": 92130
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.087104320526123,
      "learning_rate": 0.00039282257919479973,
      "loss": 2.9992,
      "step": 92131
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.011711359024048,
      "learning_rate": 0.0003928186893512081,
      "loss": 3.1017,
      "step": 92132
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9085710048675537,
      "learning_rate": 0.0003928147994903597,
      "loss": 2.723,
      "step": 92133
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.861655831336975,
      "learning_rate": 0.00039281090961225526,
      "loss": 2.9401,
      "step": 92134
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.245528221130371,
      "learning_rate": 0.0003928070197168955,
      "loss": 2.9596,
      "step": 92135
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.978449821472168,
      "learning_rate": 0.0003928031298042812,
      "loss": 2.9902,
      "step": 92136
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.641208529472351,
      "learning_rate": 0.000392799239874413,
      "loss": 3.0123,
      "step": 92137
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2882192134857178,
      "learning_rate": 0.0003927953499272917,
      "loss": 2.9304,
      "step": 92138
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.3775908946990967,
      "learning_rate": 0.00039279145996291804,
      "loss": 3.0479,
      "step": 92139
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8556153774261475,
      "learning_rate": 0.0003927875699812926,
      "loss": 2.8921,
      "step": 92140
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7474567890167236,
      "learning_rate": 0.00039278367998241625,
      "loss": 3.1105,
      "step": 92141
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7344152927398682,
      "learning_rate": 0.0003927797899662897,
      "loss": 3.2512,
      "step": 92142
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0659101009368896,
      "learning_rate": 0.0003927758999329136,
      "loss": 2.649,
      "step": 92143
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.065131425857544,
      "learning_rate": 0.00039277200988228867,
      "loss": 3.0144,
      "step": 92144
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.947462797164917,
      "learning_rate": 0.00039276811981441576,
      "loss": 3.0536,
      "step": 92145
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7421619892120361,
      "learning_rate": 0.0003927642297292954,
      "loss": 3.0724,
      "step": 92146
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8616478443145752,
      "learning_rate": 0.00039276033962692844,
      "loss": 2.9601,
      "step": 92147
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.631776213645935,
      "learning_rate": 0.00039275644950731567,
      "loss": 2.8835,
      "step": 92148
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.633204221725464,
      "learning_rate": 0.0003927525593704576,
      "loss": 2.8285,
      "step": 92149
    },
    {
      "epoch": 1.2,
      "grad_norm": 4.201893329620361,
      "learning_rate": 0.0003927486692163552,
      "loss": 3.0095,
      "step": 92150
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0854170322418213,
      "learning_rate": 0.00039274477904500905,
      "loss": 2.8632,
      "step": 92151
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.593277931213379,
      "learning_rate": 0.0003927408888564199,
      "loss": 3.0924,
      "step": 92152
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.201782464981079,
      "learning_rate": 0.00039273699865058836,
      "loss": 3.0242,
      "step": 92153
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9655948877334595,
      "learning_rate": 0.0003927331084275154,
      "loss": 2.9468,
      "step": 92154
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.6837072372436523,
      "learning_rate": 0.0003927292181872015,
      "loss": 3.0742,
      "step": 92155
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9093801975250244,
      "learning_rate": 0.0003927253279296475,
      "loss": 3.0954,
      "step": 92156
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.338369846343994,
      "learning_rate": 0.00039272143765485415,
      "loss": 3.1493,
      "step": 92157
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0980756282806396,
      "learning_rate": 0.0003927175473628221,
      "loss": 3.2411,
      "step": 92158
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.433429479598999,
      "learning_rate": 0.00039271365705355213,
      "loss": 3.1082,
      "step": 92159
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8294655084609985,
      "learning_rate": 0.000392709766727045,
      "loss": 2.7939,
      "step": 92160
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6735072135925293,
      "learning_rate": 0.0003927058763833013,
      "loss": 3.1021,
      "step": 92161
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9006863832473755,
      "learning_rate": 0.0003927019860223218,
      "loss": 3.2347,
      "step": 92162
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5628931522369385,
      "learning_rate": 0.0003926980956441073,
      "loss": 2.961,
      "step": 92163
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.808937668800354,
      "learning_rate": 0.00039269420524865853,
      "loss": 2.7998,
      "step": 92164
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5858607292175293,
      "learning_rate": 0.00039269031483597604,
      "loss": 3.0862,
      "step": 92165
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8676518201828003,
      "learning_rate": 0.0003926864244060607,
      "loss": 3.1543,
      "step": 92166
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9335840940475464,
      "learning_rate": 0.00039268253395891326,
      "loss": 3.2498,
      "step": 92167
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7246958017349243,
      "learning_rate": 0.00039267864349453433,
      "loss": 3.2646,
      "step": 92168
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5974019765853882,
      "learning_rate": 0.0003926747530129247,
      "loss": 3.1068,
      "step": 92169
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.789245843887329,
      "learning_rate": 0.00039267086251408513,
      "loss": 3.1014,
      "step": 92170
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8296655416488647,
      "learning_rate": 0.00039266697199801627,
      "loss": 3.0715,
      "step": 92171
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.841111183166504,
      "learning_rate": 0.0003926630814647189,
      "loss": 2.989,
      "step": 92172
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.542325496673584,
      "learning_rate": 0.00039265919091419364,
      "loss": 2.957,
      "step": 92173
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.4319005012512207,
      "learning_rate": 0.0003926553003464413,
      "loss": 3.097,
      "step": 92174
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.5423154830932617,
      "learning_rate": 0.00039265140976146265,
      "loss": 3.2578,
      "step": 92175
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.7397379875183105,
      "learning_rate": 0.0003926475191592583,
      "loss": 3.1795,
      "step": 92176
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6046361923217773,
      "learning_rate": 0.00039264362853982904,
      "loss": 3.214,
      "step": 92177
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.807525634765625,
      "learning_rate": 0.00039263973790317565,
      "loss": 2.8339,
      "step": 92178
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.856459379196167,
      "learning_rate": 0.0003926358472492987,
      "loss": 2.6983,
      "step": 92179
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9663374423980713,
      "learning_rate": 0.00039263195657819903,
      "loss": 3.2002,
      "step": 92180
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7626286745071411,
      "learning_rate": 0.0003926280658898774,
      "loss": 2.9856,
      "step": 92181
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.4978442192077637,
      "learning_rate": 0.00039262417518433427,
      "loss": 3.1726,
      "step": 92182
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.039855718612671,
      "learning_rate": 0.0003926202844615707,
      "loss": 2.9847,
      "step": 92183
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.848061442375183,
      "learning_rate": 0.0003926163937215872,
      "loss": 3.1186,
      "step": 92184
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.678512692451477,
      "learning_rate": 0.00039261250296438463,
      "loss": 3.3538,
      "step": 92185
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6783007383346558,
      "learning_rate": 0.00039260861218996367,
      "loss": 2.9987,
      "step": 92186
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5623530149459839,
      "learning_rate": 0.000392604721398325,
      "loss": 2.7001,
      "step": 92187
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.5589869022369385,
      "learning_rate": 0.0003926008305894694,
      "loss": 3.0355,
      "step": 92188
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5405409336090088,
      "learning_rate": 0.0003925969397633974,
      "loss": 3.1276,
      "step": 92189
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7221300601959229,
      "learning_rate": 0.00039259304892011,
      "loss": 3.1833,
      "step": 92190
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.5720913410186768,
      "learning_rate": 0.0003925891580596078,
      "loss": 3.333,
      "step": 92191
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7653993368148804,
      "learning_rate": 0.00039258526718189156,
      "loss": 2.9649,
      "step": 92192
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.814247965812683,
      "learning_rate": 0.0003925813762869619,
      "loss": 2.8629,
      "step": 92193
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.571621894836426,
      "learning_rate": 0.0003925774853748197,
      "loss": 2.8328,
      "step": 92194
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.021775722503662,
      "learning_rate": 0.0003925735944454655,
      "loss": 3.0078,
      "step": 92195
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5314677953720093,
      "learning_rate": 0.0003925697034989001,
      "loss": 3.066,
      "step": 92196
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.735192894935608,
      "learning_rate": 0.0003925658125351243,
      "loss": 3.2664,
      "step": 92197
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.880310535430908,
      "learning_rate": 0.00039256192155413876,
      "loss": 2.9012,
      "step": 92198
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7358314990997314,
      "learning_rate": 0.00039255803055594427,
      "loss": 3.1487,
      "step": 92199
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6969660520553589,
      "learning_rate": 0.00039255413954054154,
      "loss": 2.9089,
      "step": 92200
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.272953510284424,
      "learning_rate": 0.00039255024850793106,
      "loss": 3.0371,
      "step": 92201
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.774071455001831,
      "learning_rate": 0.00039254635745811385,
      "loss": 3.1741,
      "step": 92202
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.4518325328826904,
      "learning_rate": 0.0003925424663910905,
      "loss": 2.806,
      "step": 92203
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.5473005771636963,
      "learning_rate": 0.0003925385753068618,
      "loss": 3.1257,
      "step": 92204
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.5199520587921143,
      "learning_rate": 0.00039253468420542836,
      "loss": 2.9883,
      "step": 92205
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0506668090820312,
      "learning_rate": 0.00039253079308679105,
      "loss": 3.1146,
      "step": 92206
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8129332065582275,
      "learning_rate": 0.00039252690195095053,
      "loss": 3.1151,
      "step": 92207
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.3294320106506348,
      "learning_rate": 0.0003925230107979074,
      "loss": 2.9547,
      "step": 92208
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.4003543853759766,
      "learning_rate": 0.0003925191196276626,
      "loss": 3.0803,
      "step": 92209
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.3529961109161377,
      "learning_rate": 0.00039251522844021674,
      "loss": 2.7258,
      "step": 92210
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.200028896331787,
      "learning_rate": 0.00039251133723557053,
      "loss": 2.968,
      "step": 92211
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0109140872955322,
      "learning_rate": 0.0003925074460137247,
      "loss": 3.0837,
      "step": 92212
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8548086881637573,
      "learning_rate": 0.00039250355477467996,
      "loss": 2.9303,
      "step": 92213
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8746417760849,
      "learning_rate": 0.00039249966351843717,
      "loss": 3.1702,
      "step": 92214
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0442798137664795,
      "learning_rate": 0.0003924957722449969,
      "loss": 2.965,
      "step": 92215
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1714751720428467,
      "learning_rate": 0.0003924918809543599,
      "loss": 3.033,
      "step": 92216
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6951035261154175,
      "learning_rate": 0.0003924879896465269,
      "loss": 2.9653,
      "step": 92217
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5013145208358765,
      "learning_rate": 0.0003924840983214987,
      "loss": 3.0918,
      "step": 92218
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6203900575637817,
      "learning_rate": 0.0003924802069792759,
      "loss": 2.9185,
      "step": 92219
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6086686849594116,
      "learning_rate": 0.00039247631561985924,
      "loss": 2.8526,
      "step": 92220
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.618053913116455,
      "learning_rate": 0.00039247242424324957,
      "loss": 2.852,
      "step": 92221
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8216177225112915,
      "learning_rate": 0.00039246853284944755,
      "loss": 3.1704,
      "step": 92222
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7288669347763062,
      "learning_rate": 0.0003924646414384538,
      "loss": 3.1196,
      "step": 92223
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8754602670669556,
      "learning_rate": 0.0003924607500102692,
      "loss": 2.7569,
      "step": 92224
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.554488182067871,
      "learning_rate": 0.0003924568585648944,
      "loss": 2.8051,
      "step": 92225
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.272958755493164,
      "learning_rate": 0.0003924529671023301,
      "loss": 3.0688,
      "step": 92226
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.04945707321167,
      "learning_rate": 0.00039244907562257706,
      "loss": 2.9062,
      "step": 92227
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6597670316696167,
      "learning_rate": 0.00039244518412563596,
      "loss": 2.7725,
      "step": 92228
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2347702980041504,
      "learning_rate": 0.00039244129261150755,
      "loss": 2.9375,
      "step": 92229
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.4735914468765259,
      "learning_rate": 0.00039243740108019264,
      "loss": 3.2516,
      "step": 92230
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.3486289978027344,
      "learning_rate": 0.00039243350953169177,
      "loss": 3.3144,
      "step": 92231
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6957272291183472,
      "learning_rate": 0.00039242961796600575,
      "loss": 3.0112,
      "step": 92232
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.710086464881897,
      "learning_rate": 0.00039242572638313546,
      "loss": 3.0199,
      "step": 92233
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8113068342208862,
      "learning_rate": 0.00039242183478308137,
      "loss": 2.7153,
      "step": 92234
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5887877941131592,
      "learning_rate": 0.0003924179431658443,
      "loss": 3.0835,
      "step": 92235
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.55741286277771,
      "learning_rate": 0.00039241405153142507,
      "loss": 2.9758,
      "step": 92236
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5876013040542603,
      "learning_rate": 0.00039241015987982427,
      "loss": 3.2002,
      "step": 92237
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6176363229751587,
      "learning_rate": 0.00039240626821104266,
      "loss": 3.0028,
      "step": 92238
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9388353824615479,
      "learning_rate": 0.0003924023765250811,
      "loss": 3.0786,
      "step": 92239
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5786728858947754,
      "learning_rate": 0.0003923984848219401,
      "loss": 3.1788,
      "step": 92240
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7196202278137207,
      "learning_rate": 0.0003923945931016205,
      "loss": 2.7693,
      "step": 92241
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.4704471826553345,
      "learning_rate": 0.000392390701364123,
      "loss": 3.093,
      "step": 92242
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.862277865409851,
      "learning_rate": 0.0003923868096094482,
      "loss": 3.0805,
      "step": 92243
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.989429235458374,
      "learning_rate": 0.00039238291783759714,
      "loss": 2.871,
      "step": 92244
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.313589096069336,
      "learning_rate": 0.0003923790260485703,
      "loss": 3.1991,
      "step": 92245
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8297749757766724,
      "learning_rate": 0.00039237513424236845,
      "loss": 2.9225,
      "step": 92246
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.557269811630249,
      "learning_rate": 0.0003923712424189922,
      "loss": 3.1586,
      "step": 92247
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8228758573532104,
      "learning_rate": 0.0003923673505784425,
      "loss": 2.8747,
      "step": 92248
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.4224016666412354,
      "learning_rate": 0.00039236345872072,
      "loss": 2.9746,
      "step": 92249
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.889719843864441,
      "learning_rate": 0.0003923595668458253,
      "loss": 3.0851,
      "step": 92250
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.75264573097229,
      "learning_rate": 0.0003923556749537593,
      "loss": 3.0577,
      "step": 92251
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.818023443222046,
      "learning_rate": 0.0003923517830445226,
      "loss": 3.1414,
      "step": 92252
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.839328646659851,
      "learning_rate": 0.0003923478911181159,
      "loss": 2.9821,
      "step": 92253
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8401693105697632,
      "learning_rate": 0.00039234399917454006,
      "loss": 3.1126,
      "step": 92254
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5386803150177002,
      "learning_rate": 0.0003923401072137957,
      "loss": 3.0893,
      "step": 92255
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6621285676956177,
      "learning_rate": 0.0003923362152358836,
      "loss": 3.0692,
      "step": 92256
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8262451887130737,
      "learning_rate": 0.0003923323232408044,
      "loss": 3.3872,
      "step": 92257
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5290701389312744,
      "learning_rate": 0.0003923284312285589,
      "loss": 2.7293,
      "step": 92258
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.658808469772339,
      "learning_rate": 0.0003923245391991479,
      "loss": 2.5746,
      "step": 92259
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.5229034423828125,
      "learning_rate": 0.00039232064715257186,
      "loss": 2.7031,
      "step": 92260
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.831439733505249,
      "learning_rate": 0.00039231675508883177,
      "loss": 2.8604,
      "step": 92261
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2618398666381836,
      "learning_rate": 0.0003923128630079282,
      "loss": 2.9736,
      "step": 92262
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.035505533218384,
      "learning_rate": 0.00039230897090986196,
      "loss": 2.8261,
      "step": 92263
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.2101943492889404,
      "learning_rate": 0.0003923050787946337,
      "loss": 2.9768,
      "step": 92264
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1416373252868652,
      "learning_rate": 0.00039230118666224427,
      "loss": 3.0264,
      "step": 92265
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8378762006759644,
      "learning_rate": 0.0003922972945126943,
      "loss": 2.9915,
      "step": 92266
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.764739990234375,
      "learning_rate": 0.00039229340234598443,
      "loss": 2.8141,
      "step": 92267
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.6638338565826416,
      "learning_rate": 0.0003922895101621155,
      "loss": 2.9697,
      "step": 92268
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1820175647735596,
      "learning_rate": 0.0003922856179610882,
      "loss": 3.1514,
      "step": 92269
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6981189250946045,
      "learning_rate": 0.0003922817257429033,
      "loss": 3.0171,
      "step": 92270
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2643187046051025,
      "learning_rate": 0.0003922778335075615,
      "loss": 3.2071,
      "step": 92271
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0612831115722656,
      "learning_rate": 0.00039227394125506353,
      "loss": 3.1581,
      "step": 92272
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.681973695755005,
      "learning_rate": 0.00039227004898541,
      "loss": 3.2094,
      "step": 92273
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.698178768157959,
      "learning_rate": 0.0003922661566986018,
      "loss": 3.0617,
      "step": 92274
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.8965904712677,
      "learning_rate": 0.00039226226439463953,
      "loss": 3.2079,
      "step": 92275
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.7256290912628174,
      "learning_rate": 0.00039225837207352405,
      "loss": 3.0591,
      "step": 92276
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9112257957458496,
      "learning_rate": 0.0003922544797352559,
      "loss": 3.1508,
      "step": 92277
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.5432887077331543,
      "learning_rate": 0.0003922505873798359,
      "loss": 3.2304,
      "step": 92278
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7402222156524658,
      "learning_rate": 0.00039224669500726486,
      "loss": 2.9859,
      "step": 92279
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.637170433998108,
      "learning_rate": 0.0003922428026175433,
      "loss": 2.8966,
      "step": 92280
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7781745195388794,
      "learning_rate": 0.00039223891021067213,
      "loss": 3.0384,
      "step": 92281
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6868412494659424,
      "learning_rate": 0.00039223501778665204,
      "loss": 3.0735,
      "step": 92282
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8092865943908691,
      "learning_rate": 0.00039223112534548366,
      "loss": 2.951,
      "step": 92283
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.136838912963867,
      "learning_rate": 0.0003922272328871678,
      "loss": 3.4312,
      "step": 92284
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.4328809976577759,
      "learning_rate": 0.0003922233404117052,
      "loss": 2.9193,
      "step": 92285
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0378715991973877,
      "learning_rate": 0.0003922194479190965,
      "loss": 2.6252,
      "step": 92286
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7856454849243164,
      "learning_rate": 0.00039221555540934244,
      "loss": 2.9857,
      "step": 92287
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0013463497161865,
      "learning_rate": 0.0003922116628824438,
      "loss": 2.9482,
      "step": 92288
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.293907642364502,
      "learning_rate": 0.00039220777033840124,
      "loss": 2.9459,
      "step": 92289
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6989529132843018,
      "learning_rate": 0.00039220387777721554,
      "loss": 2.9973,
      "step": 92290
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6929240226745605,
      "learning_rate": 0.0003921999851988874,
      "loss": 3.1193,
      "step": 92291
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.4406163692474365,
      "learning_rate": 0.0003921960926034175,
      "loss": 3.0994,
      "step": 92292
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7451772689819336,
      "learning_rate": 0.0003921921999908066,
      "loss": 3.26,
      "step": 92293
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6327850818634033,
      "learning_rate": 0.0003921883073610555,
      "loss": 3.0917,
      "step": 92294
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.4369430541992188,
      "learning_rate": 0.0003921844147141648,
      "loss": 3.3048,
      "step": 92295
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.044343948364258,
      "learning_rate": 0.00039218052205013533,
      "loss": 3.1324,
      "step": 92296
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0330162048339844,
      "learning_rate": 0.0003921766293689678,
      "loss": 3.0943,
      "step": 92297
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2813720703125,
      "learning_rate": 0.00039217273667066277,
      "loss": 3.1691,
      "step": 92298
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.8378329277038574,
      "learning_rate": 0.0003921688439552211,
      "loss": 2.8175,
      "step": 92299
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5651252269744873,
      "learning_rate": 0.00039216495122264364,
      "loss": 2.8739,
      "step": 92300
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8133556842803955,
      "learning_rate": 0.00039216105847293084,
      "loss": 2.8918,
      "step": 92301
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9633314609527588,
      "learning_rate": 0.0003921571657060836,
      "loss": 3.0196,
      "step": 92302
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9351112842559814,
      "learning_rate": 0.0003921532729221026,
      "loss": 3.1146,
      "step": 92303
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9636491537094116,
      "learning_rate": 0.00039214938012098856,
      "loss": 3.0209,
      "step": 92304
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.734504222869873,
      "learning_rate": 0.0003921454873027422,
      "loss": 2.8401,
      "step": 92305
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.140535593032837,
      "learning_rate": 0.00039214159446736437,
      "loss": 2.7644,
      "step": 92306
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5885095596313477,
      "learning_rate": 0.0003921377016148555,
      "loss": 3.0824,
      "step": 92307
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8785583972930908,
      "learning_rate": 0.00039213380874521654,
      "loss": 3.0936,
      "step": 92308
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.764626383781433,
      "learning_rate": 0.0003921299158584483,
      "loss": 3.016,
      "step": 92309
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8496768474578857,
      "learning_rate": 0.0003921260229545512,
      "loss": 2.7499,
      "step": 92310
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8062257766723633,
      "learning_rate": 0.00039212213003352617,
      "loss": 3.1058,
      "step": 92311
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.003641128540039,
      "learning_rate": 0.000392118237095374,
      "loss": 2.8535,
      "step": 92312
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.8917295932769775,
      "learning_rate": 0.00039211434414009527,
      "loss": 3.3181,
      "step": 92313
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2752814292907715,
      "learning_rate": 0.0003921104511676907,
      "loss": 2.8691,
      "step": 92314
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.342114210128784,
      "learning_rate": 0.000392106558178161,
      "loss": 2.894,
      "step": 92315
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.7537667751312256,
      "learning_rate": 0.0003921026651715071,
      "loss": 3.0019,
      "step": 92316
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.636183500289917,
      "learning_rate": 0.00039209877214772945,
      "loss": 3.0866,
      "step": 92317
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5490612983703613,
      "learning_rate": 0.00039209487910682895,
      "loss": 2.9165,
      "step": 92318
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.7937445640563965,
      "learning_rate": 0.00039209098604880635,
      "loss": 3.0013,
      "step": 92319
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.690518617630005,
      "learning_rate": 0.0003920870929736622,
      "loss": 2.9341,
      "step": 92320
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.345065951347351,
      "learning_rate": 0.0003920831998813973,
      "loss": 3.1093,
      "step": 92321
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6530612707138062,
      "learning_rate": 0.00039207930677201245,
      "loss": 3.1075,
      "step": 92322
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.5554933547973633,
      "learning_rate": 0.0003920754136455083,
      "loss": 3.0546,
      "step": 92323
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.062933921813965,
      "learning_rate": 0.0003920715205018856,
      "loss": 3.1717,
      "step": 92324
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2841200828552246,
      "learning_rate": 0.0003920676273411451,
      "loss": 2.8964,
      "step": 92325
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.088979482650757,
      "learning_rate": 0.00039206373416328746,
      "loss": 3.0697,
      "step": 92326
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7737737894058228,
      "learning_rate": 0.0003920598409683134,
      "loss": 2.9363,
      "step": 92327
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7779439687728882,
      "learning_rate": 0.00039205594775622375,
      "loss": 3.0819,
      "step": 92328
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0918006896972656,
      "learning_rate": 0.00039205205452701907,
      "loss": 2.9061,
      "step": 92329
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7493304014205933,
      "learning_rate": 0.00039204816128070026,
      "loss": 3.0128,
      "step": 92330
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7123401165008545,
      "learning_rate": 0.00039204426801726797,
      "loss": 3.0714,
      "step": 92331
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7768704891204834,
      "learning_rate": 0.0003920403747367228,
      "loss": 3.1071,
      "step": 92332
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6535881757736206,
      "learning_rate": 0.00039203648143906565,
      "loss": 2.9485,
      "step": 92333
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6177221536636353,
      "learning_rate": 0.0003920325881242972,
      "loss": 3.1144,
      "step": 92334
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9267258644104004,
      "learning_rate": 0.00039202869479241815,
      "loss": 3.0878,
      "step": 92335
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7110161781311035,
      "learning_rate": 0.0003920248014434292,
      "loss": 2.8037,
      "step": 92336
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.693547010421753,
      "learning_rate": 0.00039202090807733113,
      "loss": 3.1653,
      "step": 92337
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8635015487670898,
      "learning_rate": 0.00039201701469412464,
      "loss": 3.0878,
      "step": 92338
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7531808614730835,
      "learning_rate": 0.0003920131212938104,
      "loss": 2.8916,
      "step": 92339
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.4678257703781128,
      "learning_rate": 0.00039200922787638933,
      "loss": 3.0023,
      "step": 92340
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9466004371643066,
      "learning_rate": 0.00039200533444186186,
      "loss": 2.8585,
      "step": 92341
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.288895845413208,
      "learning_rate": 0.0003920014409902288,
      "loss": 2.9269,
      "step": 92342
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1837031841278076,
      "learning_rate": 0.0003919975475214911,
      "loss": 2.915,
      "step": 92343
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.702256441116333,
      "learning_rate": 0.0003919936540356493,
      "loss": 3.0546,
      "step": 92344
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7869293689727783,
      "learning_rate": 0.000391989760532704,
      "loss": 3.0546,
      "step": 92345
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6756671667099,
      "learning_rate": 0.0003919858670126562,
      "loss": 3.1795,
      "step": 92346
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9376355409622192,
      "learning_rate": 0.0003919819734755065,
      "loss": 3.2925,
      "step": 92347
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.711953639984131,
      "learning_rate": 0.00039197807992125556,
      "loss": 3.1529,
      "step": 92348
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8122755289077759,
      "learning_rate": 0.00039197418634990416,
      "loss": 2.9834,
      "step": 92349
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.463787078857422,
      "learning_rate": 0.000391970292761453,
      "loss": 2.8505,
      "step": 92350
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9173160791397095,
      "learning_rate": 0.00039196639915590293,
      "loss": 3.2397,
      "step": 92351
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.761918306350708,
      "learning_rate": 0.00039196250553325446,
      "loss": 2.8567,
      "step": 92352
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9775192737579346,
      "learning_rate": 0.0003919586118935085,
      "loss": 3.227,
      "step": 92353
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7173724174499512,
      "learning_rate": 0.0003919547182366656,
      "loss": 3.0053,
      "step": 92354
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.3663705587387085,
      "learning_rate": 0.00039195082456272665,
      "loss": 3.1767,
      "step": 92355
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8685402870178223,
      "learning_rate": 0.00039194693087169234,
      "loss": 2.9626,
      "step": 92356
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.372154712677002,
      "learning_rate": 0.00039194303716356325,
      "loss": 3.1906,
      "step": 92357
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.881469488143921,
      "learning_rate": 0.00039193914343834036,
      "loss": 2.8778,
      "step": 92358
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9978997707366943,
      "learning_rate": 0.0003919352496960241,
      "loss": 2.8589,
      "step": 92359
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6180499792099,
      "learning_rate": 0.0003919313559366154,
      "loss": 2.841,
      "step": 92360
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.3456391096115112,
      "learning_rate": 0.000391927462160115,
      "loss": 3.0787,
      "step": 92361
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2428605556488037,
      "learning_rate": 0.0003919235683665235,
      "loss": 2.9848,
      "step": 92362
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7972259521484375,
      "learning_rate": 0.0003919196745558416,
      "loss": 2.8451,
      "step": 92363
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.963179349899292,
      "learning_rate": 0.00039191578072807025,
      "loss": 2.8773,
      "step": 92364
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0436182022094727,
      "learning_rate": 0.00039191188688320993,
      "loss": 2.9559,
      "step": 92365
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.632148027420044,
      "learning_rate": 0.00039190799302126134,
      "loss": 2.9827,
      "step": 92366
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0342483520507812,
      "learning_rate": 0.00039190409914222556,
      "loss": 3.2289,
      "step": 92367
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0939934253692627,
      "learning_rate": 0.00039190020524610287,
      "loss": 2.8658,
      "step": 92368
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.627186894416809,
      "learning_rate": 0.00039189631133289434,
      "loss": 3.177,
      "step": 92369
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.590160608291626,
      "learning_rate": 0.0003918924174026005,
      "loss": 2.9155,
      "step": 92370
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.4850828647613525,
      "learning_rate": 0.0003918885234552221,
      "loss": 2.863,
      "step": 92371
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9359570741653442,
      "learning_rate": 0.00039188462949075987,
      "loss": 3.0882,
      "step": 92372
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8270790576934814,
      "learning_rate": 0.0003918807355092146,
      "loss": 3.0678,
      "step": 92373
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9870123863220215,
      "learning_rate": 0.00039187684151058695,
      "loss": 3.0609,
      "step": 92374
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0297536849975586,
      "learning_rate": 0.0003918729474948777,
      "loss": 3.1831,
      "step": 92375
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7112174034118652,
      "learning_rate": 0.00039186905346208744,
      "loss": 2.8905,
      "step": 92376
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.4873062372207642,
      "learning_rate": 0.000391865159412217,
      "loss": 3.173,
      "step": 92377
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.6431632041931152,
      "learning_rate": 0.0003918612653452672,
      "loss": 3.056,
      "step": 92378
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.3956897258758545,
      "learning_rate": 0.0003918573712612386,
      "loss": 3.007,
      "step": 92379
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8871630430221558,
      "learning_rate": 0.000391853477160132,
      "loss": 3.0939,
      "step": 92380
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.567844271659851,
      "learning_rate": 0.00039184958304194805,
      "loss": 2.8372,
      "step": 92381
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2556369304656982,
      "learning_rate": 0.0003918456889066876,
      "loss": 3.1601,
      "step": 92382
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.7728538513183594,
      "learning_rate": 0.00039184179475435123,
      "loss": 3.0132,
      "step": 92383
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8229928016662598,
      "learning_rate": 0.00039183790058493976,
      "loss": 2.9192,
      "step": 92384
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.778996467590332,
      "learning_rate": 0.00039183400639845385,
      "loss": 2.9266,
      "step": 92385
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.216754913330078,
      "learning_rate": 0.00039183011219489435,
      "loss": 3.1547,
      "step": 92386
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.0859591960906982,
      "learning_rate": 0.00039182621797426186,
      "loss": 3.0572,
      "step": 92387
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.955552577972412,
      "learning_rate": 0.00039182232373655715,
      "loss": 2.8705,
      "step": 92388
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1787526607513428,
      "learning_rate": 0.00039181842948178097,
      "loss": 2.8986,
      "step": 92389
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.3334786891937256,
      "learning_rate": 0.0003918145352099339,
      "loss": 2.9812,
      "step": 92390
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7584835290908813,
      "learning_rate": 0.0003918106409210168,
      "loss": 3.0198,
      "step": 92391
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5081840753555298,
      "learning_rate": 0.00039180674661503057,
      "loss": 3.0021,
      "step": 92392
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.680243492126465,
      "learning_rate": 0.0003918028522919755,
      "loss": 3.195,
      "step": 92393
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1291794776916504,
      "learning_rate": 0.0003917989579518526,
      "loss": 2.882,
      "step": 92394
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6652315855026245,
      "learning_rate": 0.0003917950635946627,
      "loss": 2.8916,
      "step": 92395
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9425615072250366,
      "learning_rate": 0.00039179116922040614,
      "loss": 3.1105,
      "step": 92396
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.3468878269195557,
      "learning_rate": 0.0003917872748290839,
      "loss": 2.947,
      "step": 92397
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6851222515106201,
      "learning_rate": 0.00039178338042069683,
      "loss": 3.0865,
      "step": 92398
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6718605756759644,
      "learning_rate": 0.0003917794859952454,
      "loss": 2.9893,
      "step": 92399
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7119916677474976,
      "learning_rate": 0.00039177559155273043,
      "loss": 2.9926,
      "step": 92400
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.9251444339752197,
      "learning_rate": 0.00039177169709315264,
      "loss": 3.1599,
      "step": 92401
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2563207149505615,
      "learning_rate": 0.00039176780261651275,
      "loss": 2.7998,
      "step": 92402
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.4364540576934814,
      "learning_rate": 0.0003917639081228115,
      "loss": 2.8611,
      "step": 92403
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.739412307739258,
      "learning_rate": 0.0003917600136120497,
      "loss": 2.8123,
      "step": 92404
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7085801362991333,
      "learning_rate": 0.0003917561190842278,
      "loss": 2.6294,
      "step": 92405
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.698577880859375,
      "learning_rate": 0.00039175222453934685,
      "loss": 2.9341,
      "step": 92406
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.709044337272644,
      "learning_rate": 0.00039174832997740746,
      "loss": 2.9925,
      "step": 92407
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9612295627593994,
      "learning_rate": 0.00039174443539841016,
      "loss": 3.0506,
      "step": 92408
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1312882900238037,
      "learning_rate": 0.00039174054080235593,
      "loss": 3.0824,
      "step": 92409
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.950305700302124,
      "learning_rate": 0.0003917366461892454,
      "loss": 2.9281,
      "step": 92410
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.0519747734069824,
      "learning_rate": 0.0003917327515590793,
      "loss": 2.8979,
      "step": 92411
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9097973108291626,
      "learning_rate": 0.0003917288569118583,
      "loss": 2.9199,
      "step": 92412
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.837208867073059,
      "learning_rate": 0.0003917249622475833,
      "loss": 2.822,
      "step": 92413
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.6864802837371826,
      "learning_rate": 0.00039172106756625477,
      "loss": 2.7797,
      "step": 92414
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7080724239349365,
      "learning_rate": 0.0003917171728678736,
      "loss": 2.6581,
      "step": 92415
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6004180908203125,
      "learning_rate": 0.0003917132781524405,
      "loss": 2.6643,
      "step": 92416
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0558929443359375,
      "learning_rate": 0.00039170938341995617,
      "loss": 2.9416,
      "step": 92417
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9537596702575684,
      "learning_rate": 0.0003917054886704213,
      "loss": 3.1373,
      "step": 92418
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7360678911209106,
      "learning_rate": 0.0003917015939038367,
      "loss": 2.9308,
      "step": 92419
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6596243381500244,
      "learning_rate": 0.00039169769912020296,
      "loss": 3.046,
      "step": 92420
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5659003257751465,
      "learning_rate": 0.00039169380431952096,
      "loss": 2.8419,
      "step": 92421
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7899140119552612,
      "learning_rate": 0.00039168990950179134,
      "loss": 2.8618,
      "step": 92422
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5538558959960938,
      "learning_rate": 0.0003916860146670148,
      "loss": 3.2938,
      "step": 92423
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8824691772460938,
      "learning_rate": 0.00039168211981519213,
      "loss": 3.2145,
      "step": 92424
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.3584280014038086,
      "learning_rate": 0.000391678224946324,
      "loss": 3.136,
      "step": 92425
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8837555646896362,
      "learning_rate": 0.00039167433006041114,
      "loss": 3.0729,
      "step": 92426
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8573122024536133,
      "learning_rate": 0.00039167043515745434,
      "loss": 3.0999,
      "step": 92427
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.658968448638916,
      "learning_rate": 0.00039166654023745426,
      "loss": 3.1093,
      "step": 92428
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7186390161514282,
      "learning_rate": 0.00039166264530041166,
      "loss": 3.0935,
      "step": 92429
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.238684892654419,
      "learning_rate": 0.0003916587503463272,
      "loss": 3.1044,
      "step": 92430
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7685068845748901,
      "learning_rate": 0.0003916548553752017,
      "loss": 3.2688,
      "step": 92431
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2766592502593994,
      "learning_rate": 0.00039165096038703585,
      "loss": 2.8398,
      "step": 92432
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7326716184616089,
      "learning_rate": 0.0003916470653818303,
      "loss": 3.0367,
      "step": 92433
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8188693523406982,
      "learning_rate": 0.00039164317035958576,
      "loss": 2.8366,
      "step": 92434
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.693152904510498,
      "learning_rate": 0.00039163927532030306,
      "loss": 2.888,
      "step": 92435
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5260393619537354,
      "learning_rate": 0.000391635380263983,
      "loss": 3.0065,
      "step": 92436
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.492563486099243,
      "learning_rate": 0.0003916314851906261,
      "loss": 2.8642,
      "step": 92437
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5463368892669678,
      "learning_rate": 0.0003916275901002332,
      "loss": 3.2111,
      "step": 92438
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6925225257873535,
      "learning_rate": 0.0003916236949928051,
      "loss": 2.9952,
      "step": 92439
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6719151735305786,
      "learning_rate": 0.00039161979986834225,
      "loss": 2.8212,
      "step": 92440
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.49272620677948,
      "learning_rate": 0.00039161590472684563,
      "loss": 3.1772,
      "step": 92441
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5290237665176392,
      "learning_rate": 0.0003916120095683158,
      "loss": 2.8535,
      "step": 92442
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7395962476730347,
      "learning_rate": 0.00039160811439275373,
      "loss": 3.0373,
      "step": 92443
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.135732412338257,
      "learning_rate": 0.0003916042192001599,
      "loss": 2.9156,
      "step": 92444
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5835970640182495,
      "learning_rate": 0.00039160032399053506,
      "loss": 3.2075,
      "step": 92445
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.166470766067505,
      "learning_rate": 0.0003915964287638801,
      "loss": 3.0129,
      "step": 92446
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9110604524612427,
      "learning_rate": 0.0003915925335201956,
      "loss": 3.1114,
      "step": 92447
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.952000617980957,
      "learning_rate": 0.00039158863825948227,
      "loss": 3.1043,
      "step": 92448
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5619145631790161,
      "learning_rate": 0.0003915847429817409,
      "loss": 3.1217,
      "step": 92449
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.012368679046631,
      "learning_rate": 0.0003915808476869723,
      "loss": 3.0322,
      "step": 92450
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.429530143737793,
      "learning_rate": 0.00039157695237517696,
      "loss": 2.9649,
      "step": 92451
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6451750993728638,
      "learning_rate": 0.00039157305704635574,
      "loss": 2.985,
      "step": 92452
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.4416040182113647,
      "learning_rate": 0.00039156916170050946,
      "loss": 3.1219,
      "step": 92453
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6025078296661377,
      "learning_rate": 0.00039156526633763865,
      "loss": 2.8765,
      "step": 92454
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7132452726364136,
      "learning_rate": 0.00039156137095774414,
      "loss": 3.0893,
      "step": 92455
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1110754013061523,
      "learning_rate": 0.0003915574755608268,
      "loss": 3.1893,
      "step": 92456
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.162567377090454,
      "learning_rate": 0.000391553580146887,
      "loss": 3.0877,
      "step": 92457
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6194504499435425,
      "learning_rate": 0.0003915496847159257,
      "loss": 2.9708,
      "step": 92458
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.4390926361083984,
      "learning_rate": 0.0003915457892679436,
      "loss": 2.9479,
      "step": 92459
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.379723310470581,
      "learning_rate": 0.00039154189380294146,
      "loss": 3.0715,
      "step": 92460
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.803562045097351,
      "learning_rate": 0.0003915379983209199,
      "loss": 2.9018,
      "step": 92461
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6643478870391846,
      "learning_rate": 0.0003915341028218798,
      "loss": 3.0106,
      "step": 92462
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6815301179885864,
      "learning_rate": 0.00039153020730582167,
      "loss": 3.2634,
      "step": 92463
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7837870121002197,
      "learning_rate": 0.00039152631177274635,
      "loss": 3.1198,
      "step": 92464
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6896661520004272,
      "learning_rate": 0.00039152241622265465,
      "loss": 2.7874,
      "step": 92465
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.306643009185791,
      "learning_rate": 0.0003915185206555471,
      "loss": 2.9785,
      "step": 92466
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7349430322647095,
      "learning_rate": 0.0003915146250714245,
      "loss": 2.7855,
      "step": 92467
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.508143186569214,
      "learning_rate": 0.00039151072947028775,
      "loss": 2.9835,
      "step": 92468
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.630757212638855,
      "learning_rate": 0.0003915068338521373,
      "loss": 2.9456,
      "step": 92469
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8796777725219727,
      "learning_rate": 0.00039150293821697406,
      "loss": 3.0302,
      "step": 92470
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.825227975845337,
      "learning_rate": 0.00039149904256479876,
      "loss": 2.9831,
      "step": 92471
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5440672636032104,
      "learning_rate": 0.00039149514689561194,
      "loss": 2.9749,
      "step": 92472
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.383319139480591,
      "learning_rate": 0.0003914912512094145,
      "loss": 3.0497,
      "step": 92473
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.220247268676758,
      "learning_rate": 0.00039148735550620714,
      "loss": 2.99,
      "step": 92474
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.922656536102295,
      "learning_rate": 0.0003914834597859905,
      "loss": 2.8013,
      "step": 92475
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.9060497283935547,
      "learning_rate": 0.0003914795640487653,
      "loss": 3.1209,
      "step": 92476
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.219892740249634,
      "learning_rate": 0.0003914756682945325,
      "loss": 2.9532,
      "step": 92477
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9327452182769775,
      "learning_rate": 0.00039147177252329247,
      "loss": 3.0424,
      "step": 92478
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7118151187896729,
      "learning_rate": 0.0003914678767350462,
      "loss": 2.8029,
      "step": 92479
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.005171537399292,
      "learning_rate": 0.00039146398092979436,
      "loss": 3.0754,
      "step": 92480
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.173049211502075,
      "learning_rate": 0.00039146008510753755,
      "loss": 3.163,
      "step": 92481
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.558495044708252,
      "learning_rate": 0.00039145618926827666,
      "loss": 3.1107,
      "step": 92482
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5816524028778076,
      "learning_rate": 0.00039145229341201233,
      "loss": 3.0548,
      "step": 92483
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.800007700920105,
      "learning_rate": 0.0003914483975387452,
      "loss": 2.9939,
      "step": 92484
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8758007287979126,
      "learning_rate": 0.00039144450164847614,
      "loss": 2.9306,
      "step": 92485
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.434822916984558,
      "learning_rate": 0.0003914406057412059,
      "loss": 3.1711,
      "step": 92486
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.751509189605713,
      "learning_rate": 0.00039143670981693504,
      "loss": 3.177,
      "step": 92487
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.186140298843384,
      "learning_rate": 0.0003914328138756644,
      "loss": 3.0938,
      "step": 92488
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5046451091766357,
      "learning_rate": 0.0003914289179173947,
      "loss": 2.8266,
      "step": 92489
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6973427534103394,
      "learning_rate": 0.0003914250219421266,
      "loss": 3.1917,
      "step": 92490
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7549560070037842,
      "learning_rate": 0.0003914211259498608,
      "loss": 3.1173,
      "step": 92491
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.899200201034546,
      "learning_rate": 0.00039141722994059816,
      "loss": 2.7249,
      "step": 92492
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.210930824279785,
      "learning_rate": 0.00039141333391433934,
      "loss": 2.7516,
      "step": 92493
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.604926347732544,
      "learning_rate": 0.000391409437871085,
      "loss": 2.9907,
      "step": 92494
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7245150804519653,
      "learning_rate": 0.000391405541810836,
      "loss": 2.8751,
      "step": 92495
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.4390618801116943,
      "learning_rate": 0.00039140164573359294,
      "loss": 3.1514,
      "step": 92496
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.7459604740142822,
      "learning_rate": 0.0003913977496393566,
      "loss": 3.0952,
      "step": 92497
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7229139804840088,
      "learning_rate": 0.0003913938535281277,
      "loss": 2.9738,
      "step": 92498
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.279468297958374,
      "learning_rate": 0.00039138995739990693,
      "loss": 3.079,
      "step": 92499
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7440756559371948,
      "learning_rate": 0.00039138606125469497,
      "loss": 2.8766,
      "step": 92500
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.412346601486206,
      "learning_rate": 0.0003913821650924928,
      "loss": 2.8296,
      "step": 92501
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.72084641456604,
      "learning_rate": 0.00039137826891330087,
      "loss": 3.0659,
      "step": 92502
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2282233238220215,
      "learning_rate": 0.00039137437271711997,
      "loss": 2.8682,
      "step": 92503
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.392389178276062,
      "learning_rate": 0.00039137047650395087,
      "loss": 2.7846,
      "step": 92504
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.4753175973892212,
      "learning_rate": 0.0003913665802737942,
      "loss": 2.9053,
      "step": 92505
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.2877564430236816,
      "learning_rate": 0.00039136268402665083,
      "loss": 2.8717,
      "step": 92506
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7689417600631714,
      "learning_rate": 0.0003913587877625215,
      "loss": 2.8939,
      "step": 92507
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9054633378982544,
      "learning_rate": 0.00039135489148140666,
      "loss": 2.8703,
      "step": 92508
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.563143253326416,
      "learning_rate": 0.00039135099518330734,
      "loss": 2.9363,
      "step": 92509
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.1667797565460205,
      "learning_rate": 0.00039134709886822416,
      "loss": 2.8591,
      "step": 92510
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.4995962381362915,
      "learning_rate": 0.00039134320253615776,
      "loss": 3.1002,
      "step": 92511
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.20503830909729,
      "learning_rate": 0.00039133930618710897,
      "loss": 3.0051,
      "step": 92512
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.8096845149993896,
      "learning_rate": 0.00039133540982107853,
      "loss": 2.8891,
      "step": 92513
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.271742820739746,
      "learning_rate": 0.00039133151343806704,
      "loss": 3.1107,
      "step": 92514
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8095567226409912,
      "learning_rate": 0.0003913276170380753,
      "loss": 3.0575,
      "step": 92515
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.3078572750091553,
      "learning_rate": 0.000391323720621104,
      "loss": 3.3344,
      "step": 92516
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.917571544647217,
      "learning_rate": 0.000391319824187154,
      "loss": 3.017,
      "step": 92517
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.889787435531616,
      "learning_rate": 0.00039131592773622583,
      "loss": 3.0429,
      "step": 92518
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0655453205108643,
      "learning_rate": 0.00039131203126832034,
      "loss": 2.7278,
      "step": 92519
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.9997243881225586,
      "learning_rate": 0.00039130813478343825,
      "loss": 3.0747,
      "step": 92520
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.8173415660858154,
      "learning_rate": 0.0003913042382815802,
      "loss": 3.017,
      "step": 92521
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.618177890777588,
      "learning_rate": 0.0003913003417627469,
      "loss": 2.993,
      "step": 92522
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.7429451942443848,
      "learning_rate": 0.0003912964452269393,
      "loss": 2.8815,
      "step": 92523
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8125360012054443,
      "learning_rate": 0.000391292548674158,
      "loss": 3.0598,
      "step": 92524
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.203916311264038,
      "learning_rate": 0.0003912886521044035,
      "loss": 2.7924,
      "step": 92525
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.728860855102539,
      "learning_rate": 0.00039128475551767684,
      "loss": 2.9075,
      "step": 92526
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.9519340991973877,
      "learning_rate": 0.0003912808589139785,
      "loss": 2.9993,
      "step": 92527
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.4565987586975098,
      "learning_rate": 0.0003912769622933095,
      "loss": 2.7219,
      "step": 92528
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.678239345550537,
      "learning_rate": 0.0003912730656556703,
      "loss": 2.7989,
      "step": 92529
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.277676582336426,
      "learning_rate": 0.00039126916900106165,
      "loss": 3.1114,
      "step": 92530
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.064927101135254,
      "learning_rate": 0.0003912652723294845,
      "loss": 2.8239,
      "step": 92531
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.0915799140930176,
      "learning_rate": 0.00039126137564093926,
      "loss": 2.9799,
      "step": 92532
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.623777985572815,
      "learning_rate": 0.00039125747893542685,
      "loss": 3.1257,
      "step": 92533
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.6689791679382324,
      "learning_rate": 0.000391253582212948,
      "loss": 3.035,
      "step": 92534
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5871942043304443,
      "learning_rate": 0.0003912496854735034,
      "loss": 3.2697,
      "step": 92535
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.4659899473190308,
      "learning_rate": 0.0003912457887170937,
      "loss": 3.1277,
      "step": 92536
    },
    {
      "epoch": 1.2,
      "grad_norm": 3.2171790599823,
      "learning_rate": 0.0003912418919437197,
      "loss": 3.033,
      "step": 92537
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5429047346115112,
      "learning_rate": 0.0003912379951533821,
      "loss": 3.1578,
      "step": 92538
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8822963237762451,
      "learning_rate": 0.0003912340983460817,
      "loss": 3.2881,
      "step": 92539
    },
    {
      "epoch": 1.2,
      "grad_norm": 2.139000654220581,
      "learning_rate": 0.0003912302015218191,
      "loss": 3.0868,
      "step": 92540
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.8199769258499146,
      "learning_rate": 0.0003912263046805951,
      "loss": 2.8612,
      "step": 92541
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5960725545883179,
      "learning_rate": 0.0003912224078224104,
      "loss": 2.9265,
      "step": 92542
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5907249450683594,
      "learning_rate": 0.00039121851094726565,
      "loss": 3.1231,
      "step": 92543
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.58462393283844,
      "learning_rate": 0.0003912146140551618,
      "loss": 3.0357,
      "step": 92544
    },
    {
      "epoch": 1.2,
      "grad_norm": 1.5770715475082397,
      "learning_rate": 0.00039121071714609936,
      "loss": 3.1516,
      "step": 92545
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5711015462875366,
      "learning_rate": 0.00039120682022007915,
      "loss": 3.1777,
      "step": 92546
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9738179445266724,
      "learning_rate": 0.0003912029232771019,
      "loss": 3.009,
      "step": 92547
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6072416305541992,
      "learning_rate": 0.00039119902631716823,
      "loss": 2.9949,
      "step": 92548
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7128655910491943,
      "learning_rate": 0.0003911951293402789,
      "loss": 3.063,
      "step": 92549
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.624819040298462,
      "learning_rate": 0.00039119123234643483,
      "loss": 3.0322,
      "step": 92550
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8670012950897217,
      "learning_rate": 0.00039118733533563646,
      "loss": 3.1051,
      "step": 92551
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.001354694366455,
      "learning_rate": 0.0003911834383078847,
      "loss": 3.0525,
      "step": 92552
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6692525148391724,
      "learning_rate": 0.00039117954126318023,
      "loss": 3.1072,
      "step": 92553
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7917932271957397,
      "learning_rate": 0.00039117564420152373,
      "loss": 3.0366,
      "step": 92554
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7253634929656982,
      "learning_rate": 0.00039117174712291594,
      "loss": 3.111,
      "step": 92555
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9431061744689941,
      "learning_rate": 0.0003911678500273577,
      "loss": 3.1008,
      "step": 92556
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7760910987854004,
      "learning_rate": 0.0003911639529148495,
      "loss": 3.1692,
      "step": 92557
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.4335458278656006,
      "learning_rate": 0.00039116005578539224,
      "loss": 2.9975,
      "step": 92558
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8167204856872559,
      "learning_rate": 0.0003911561586389867,
      "loss": 3.1272,
      "step": 92559
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9774501323699951,
      "learning_rate": 0.00039115226147563343,
      "loss": 3.1645,
      "step": 92560
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9031591415405273,
      "learning_rate": 0.00039114836429533323,
      "loss": 3.2244,
      "step": 92561
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9288321733474731,
      "learning_rate": 0.00039114446709808683,
      "loss": 3.23,
      "step": 92562
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7795547246932983,
      "learning_rate": 0.0003911405698838949,
      "loss": 2.9628,
      "step": 92563
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9367992877960205,
      "learning_rate": 0.0003911366726527583,
      "loss": 3.1552,
      "step": 92564
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9101661443710327,
      "learning_rate": 0.00039113277540467773,
      "loss": 3.1468,
      "step": 92565
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6613795757293701,
      "learning_rate": 0.0003911288781396538,
      "loss": 3.014,
      "step": 92566
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.071509599685669,
      "learning_rate": 0.00039112498085768714,
      "loss": 2.7893,
      "step": 92567
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0171356201171875,
      "learning_rate": 0.00039112108355877884,
      "loss": 2.9486,
      "step": 92568
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7944828271865845,
      "learning_rate": 0.0003911171862429293,
      "loss": 2.8068,
      "step": 92569
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.810288190841675,
      "learning_rate": 0.00039111328891013936,
      "loss": 2.9277,
      "step": 92570
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0166380405426025,
      "learning_rate": 0.0003911093915604098,
      "loss": 3.0828,
      "step": 92571
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0816664695739746,
      "learning_rate": 0.0003911054941937412,
      "loss": 2.9864,
      "step": 92572
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.3827083110809326,
      "learning_rate": 0.00039110159681013444,
      "loss": 3.0307,
      "step": 92573
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.2039599418640137,
      "learning_rate": 0.00039109769940959017,
      "loss": 2.7549,
      "step": 92574
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9565335512161255,
      "learning_rate": 0.00039109380199210913,
      "loss": 3.0006,
      "step": 92575
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.547828197479248,
      "learning_rate": 0.00039108990455769194,
      "loss": 3.0438,
      "step": 92576
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.649728536605835,
      "learning_rate": 0.00039108600710633946,
      "loss": 3.1211,
      "step": 92577
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5725606679916382,
      "learning_rate": 0.00039108210963805244,
      "loss": 2.9634,
      "step": 92578
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0121517181396484,
      "learning_rate": 0.0003910782121528315,
      "loss": 2.9468,
      "step": 92579
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.668951988220215,
      "learning_rate": 0.0003910743146506774,
      "loss": 2.929,
      "step": 92580
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.198028087615967,
      "learning_rate": 0.0003910704171315908,
      "loss": 2.909,
      "step": 92581
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.5237483978271484,
      "learning_rate": 0.00039106651959557254,
      "loss": 2.8941,
      "step": 92582
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5770976543426514,
      "learning_rate": 0.00039106262204262327,
      "loss": 2.9829,
      "step": 92583
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0170743465423584,
      "learning_rate": 0.00039105872447274385,
      "loss": 2.9492,
      "step": 92584
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8941768407821655,
      "learning_rate": 0.0003910548268859347,
      "loss": 3.1067,
      "step": 92585
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7589614391326904,
      "learning_rate": 0.00039105092928219687,
      "loss": 3.236,
      "step": 92586
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8855230808258057,
      "learning_rate": 0.000391047031661531,
      "loss": 2.9446,
      "step": 92587
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.586073398590088,
      "learning_rate": 0.0003910431340239376,
      "loss": 3.2172,
      "step": 92588
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.4914028644561768,
      "learning_rate": 0.00039103923636941767,
      "loss": 2.9897,
      "step": 92589
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5897808074951172,
      "learning_rate": 0.0003910353386979719,
      "loss": 3.0096,
      "step": 92590
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7521066665649414,
      "learning_rate": 0.0003910314410096008,
      "loss": 2.8278,
      "step": 92591
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0783486366271973,
      "learning_rate": 0.0003910275433043053,
      "loss": 2.9166,
      "step": 92592
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6770750284194946,
      "learning_rate": 0.00039102364558208616,
      "loss": 2.8882,
      "step": 92593
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.8874075412750244,
      "learning_rate": 0.0003910197478429438,
      "loss": 2.6911,
      "step": 92594
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6455639600753784,
      "learning_rate": 0.00039101585008687934,
      "loss": 2.9536,
      "step": 92595
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.2484259605407715,
      "learning_rate": 0.00039101195231389324,
      "loss": 2.8365,
      "step": 92596
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.417210817337036,
      "learning_rate": 0.00039100805452398627,
      "loss": 2.8274,
      "step": 92597
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6819007396697998,
      "learning_rate": 0.0003910041567171592,
      "loss": 2.9422,
      "step": 92598
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9580872058868408,
      "learning_rate": 0.0003910002588934128,
      "loss": 2.9363,
      "step": 92599
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7534838914871216,
      "learning_rate": 0.0003909963610527476,
      "loss": 3.1408,
      "step": 92600
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7418837547302246,
      "learning_rate": 0.00039099246319516457,
      "loss": 2.8518,
      "step": 92601
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.064586877822876,
      "learning_rate": 0.00039098856532066435,
      "loss": 2.8644,
      "step": 92602
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6815743446350098,
      "learning_rate": 0.0003909846674292476,
      "loss": 2.7995,
      "step": 92603
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7016397714614868,
      "learning_rate": 0.000390980769520915,
      "loss": 2.9593,
      "step": 92604
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0295894145965576,
      "learning_rate": 0.0003909768715956674,
      "loss": 3.2692,
      "step": 92605
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9218816757202148,
      "learning_rate": 0.0003909729736535055,
      "loss": 3.0507,
      "step": 92606
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5847591161727905,
      "learning_rate": 0.00039096907569443007,
      "loss": 3.1103,
      "step": 92607
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7748379707336426,
      "learning_rate": 0.00039096517771844164,
      "loss": 3.0846,
      "step": 92608
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.676032543182373,
      "learning_rate": 0.00039096127972554123,
      "loss": 2.8192,
      "step": 92609
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.4067163467407227,
      "learning_rate": 0.0003909573817157292,
      "loss": 3.0706,
      "step": 92610
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.5773937702178955,
      "learning_rate": 0.0003909534836890066,
      "loss": 3.2256,
      "step": 92611
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.282076835632324,
      "learning_rate": 0.0003909495856453741,
      "loss": 2.9547,
      "step": 92612
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5766154527664185,
      "learning_rate": 0.00039094568758483224,
      "loss": 3.0872,
      "step": 92613
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6035668849945068,
      "learning_rate": 0.0003909417895073819,
      "loss": 2.9901,
      "step": 92614
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8632718324661255,
      "learning_rate": 0.0003909378914130237,
      "loss": 2.9772,
      "step": 92615
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5549131631851196,
      "learning_rate": 0.00039093399330175855,
      "loss": 2.9099,
      "step": 92616
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.4958627223968506,
      "learning_rate": 0.00039093009517358695,
      "loss": 3.0756,
      "step": 92617
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1314468383789062,
      "learning_rate": 0.00039092619702850976,
      "loss": 3.1753,
      "step": 92618
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.744621992111206,
      "learning_rate": 0.0003909222988665276,
      "loss": 2.9654,
      "step": 92619
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8796147108078003,
      "learning_rate": 0.00039091840068764146,
      "loss": 2.8925,
      "step": 92620
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8160479068756104,
      "learning_rate": 0.00039091450249185175,
      "loss": 3.0419,
      "step": 92621
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8417141437530518,
      "learning_rate": 0.00039091060427915927,
      "loss": 3.0372,
      "step": 92622
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8501758575439453,
      "learning_rate": 0.00039090670604956495,
      "loss": 2.9718,
      "step": 92623
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.2062015533447266,
      "learning_rate": 0.0003909028078030692,
      "loss": 3.054,
      "step": 92624
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.7347471714019775,
      "learning_rate": 0.000390898909539673,
      "loss": 3.1321,
      "step": 92625
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.205568790435791,
      "learning_rate": 0.00039089501125937694,
      "loss": 2.8727,
      "step": 92626
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.6709234714508057,
      "learning_rate": 0.00039089111296218175,
      "loss": 2.7174,
      "step": 92627
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.2113397121429443,
      "learning_rate": 0.00039088721464808823,
      "loss": 2.9219,
      "step": 92628
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.5923352241516113,
      "learning_rate": 0.0003908833163170971,
      "loss": 2.9989,
      "step": 92629
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1781606674194336,
      "learning_rate": 0.0003908794179692089,
      "loss": 2.9624,
      "step": 92630
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9865840673446655,
      "learning_rate": 0.0003908755196044246,
      "loss": 3.2623,
      "step": 92631
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.260532855987549,
      "learning_rate": 0.0003908716212227449,
      "loss": 3.0225,
      "step": 92632
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.741342067718506,
      "learning_rate": 0.0003908677228241704,
      "loss": 2.9339,
      "step": 92633
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8482381105422974,
      "learning_rate": 0.0003908638244087018,
      "loss": 2.7151,
      "step": 92634
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9339381456375122,
      "learning_rate": 0.00039085992597633995,
      "loss": 3.1052,
      "step": 92635
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.795628786087036,
      "learning_rate": 0.0003908560275270855,
      "loss": 3.0614,
      "step": 92636
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.167325496673584,
      "learning_rate": 0.00039085212906093925,
      "loss": 2.7339,
      "step": 92637
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7484897375106812,
      "learning_rate": 0.00039084823057790183,
      "loss": 2.9043,
      "step": 92638
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.746993899345398,
      "learning_rate": 0.000390844332077974,
      "loss": 3.1112,
      "step": 92639
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8509799242019653,
      "learning_rate": 0.00039084043356115653,
      "loss": 3.0889,
      "step": 92640
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.7311713695526123,
      "learning_rate": 0.0003908365350274501,
      "loss": 3.0707,
      "step": 92641
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5988141298294067,
      "learning_rate": 0.00039083263647685546,
      "loss": 3.0588,
      "step": 92642
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.816929578781128,
      "learning_rate": 0.0003908287379093733,
      "loss": 3.1637,
      "step": 92643
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.6017560958862305,
      "learning_rate": 0.0003908248393250044,
      "loss": 3.0111,
      "step": 92644
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6654688119888306,
      "learning_rate": 0.0003908209407237494,
      "loss": 2.8894,
      "step": 92645
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8309085369110107,
      "learning_rate": 0.00039081704210560905,
      "loss": 3.2496,
      "step": 92646
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.673150658607483,
      "learning_rate": 0.0003908131434705842,
      "loss": 2.9396,
      "step": 92647
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7523373365402222,
      "learning_rate": 0.00039080924481867536,
      "loss": 3.148,
      "step": 92648
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6897809505462646,
      "learning_rate": 0.0003908053461498834,
      "loss": 2.9725,
      "step": 92649
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7900309562683105,
      "learning_rate": 0.000390801447464209,
      "loss": 3.1085,
      "step": 92650
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6327250003814697,
      "learning_rate": 0.000390797548761653,
      "loss": 3.343,
      "step": 92651
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6894227266311646,
      "learning_rate": 0.0003907936500422158,
      "loss": 2.8579,
      "step": 92652
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.065983533859253,
      "learning_rate": 0.00039078975130589857,
      "loss": 3.0632,
      "step": 92653
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8820383548736572,
      "learning_rate": 0.00039078585255270177,
      "loss": 3.0049,
      "step": 92654
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.903533697128296,
      "learning_rate": 0.00039078195378262605,
      "loss": 2.8657,
      "step": 92655
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.649397373199463,
      "learning_rate": 0.00039077805499567234,
      "loss": 2.9488,
      "step": 92656
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6672598123550415,
      "learning_rate": 0.0003907741561918413,
      "loss": 3.0588,
      "step": 92657
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6619210243225098,
      "learning_rate": 0.00039077025737113355,
      "loss": 3.1192,
      "step": 92658
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.068955659866333,
      "learning_rate": 0.00039076635853354993,
      "loss": 2.7037,
      "step": 92659
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7302095890045166,
      "learning_rate": 0.0003907624596790912,
      "loss": 3.1031,
      "step": 92660
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.789442539215088,
      "learning_rate": 0.0003907585608077579,
      "loss": 3.065,
      "step": 92661
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6927622556686401,
      "learning_rate": 0.00039075466191955096,
      "loss": 2.9835,
      "step": 92662
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8306174278259277,
      "learning_rate": 0.000390750763014471,
      "loss": 3.0882,
      "step": 92663
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1763627529144287,
      "learning_rate": 0.00039074686409251876,
      "loss": 2.7793,
      "step": 92664
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.0891244411468506,
      "learning_rate": 0.0003907429651536949,
      "loss": 2.7404,
      "step": 92665
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.066384792327881,
      "learning_rate": 0.0003907390661980003,
      "loss": 3.0084,
      "step": 92666
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1493442058563232,
      "learning_rate": 0.0003907351672254355,
      "loss": 3.0689,
      "step": 92667
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.3916709423065186,
      "learning_rate": 0.0003907312682360014,
      "loss": 2.9364,
      "step": 92668
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.847963809967041,
      "learning_rate": 0.00039072736922969863,
      "loss": 3.2941,
      "step": 92669
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.060476779937744,
      "learning_rate": 0.0003907234702065279,
      "loss": 3.1197,
      "step": 92670
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.3986663818359375,
      "learning_rate": 0.00039071957116648996,
      "loss": 3.0307,
      "step": 92671
    },
    {
      "epoch": 1.21,
      "grad_norm": 4.409978866577148,
      "learning_rate": 0.00039071567210958557,
      "loss": 3.1405,
      "step": 92672
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.284169912338257,
      "learning_rate": 0.0003907117730358154,
      "loss": 3.1797,
      "step": 92673
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.254706382751465,
      "learning_rate": 0.00039070787394518023,
      "loss": 2.988,
      "step": 92674
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6891623735427856,
      "learning_rate": 0.00039070397483768074,
      "loss": 2.7365,
      "step": 92675
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.3031554222106934,
      "learning_rate": 0.0003907000757133176,
      "loss": 3.0172,
      "step": 92676
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5365010499954224,
      "learning_rate": 0.0003906961765720917,
      "loss": 3.1753,
      "step": 92677
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9320985078811646,
      "learning_rate": 0.0003906922774140037,
      "loss": 2.9973,
      "step": 92678
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.4098458290100098,
      "learning_rate": 0.0003906883782390542,
      "loss": 3.0436,
      "step": 92679
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.4557764530181885,
      "learning_rate": 0.000390684479047244,
      "loss": 2.9317,
      "step": 92680
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.457235813140869,
      "learning_rate": 0.00039068057983857393,
      "loss": 2.8746,
      "step": 92681
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5952602624893188,
      "learning_rate": 0.0003906766806130446,
      "loss": 3.0385,
      "step": 92682
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.50303053855896,
      "learning_rate": 0.00039067278137065677,
      "loss": 3.1536,
      "step": 92683
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1116881370544434,
      "learning_rate": 0.00039066888211141114,
      "loss": 3.0063,
      "step": 92684
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.683276891708374,
      "learning_rate": 0.00039066498283530846,
      "loss": 2.9426,
      "step": 92685
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.919321060180664,
      "learning_rate": 0.0003906610835423494,
      "loss": 2.8449,
      "step": 92686
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.343013048171997,
      "learning_rate": 0.00039065718423253486,
      "loss": 2.7908,
      "step": 92687
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.800640344619751,
      "learning_rate": 0.0003906532849058653,
      "loss": 2.9889,
      "step": 92688
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5951389074325562,
      "learning_rate": 0.0003906493855623416,
      "loss": 3.235,
      "step": 92689
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5267261266708374,
      "learning_rate": 0.00039064548620196455,
      "loss": 2.866,
      "step": 92690
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.7711129188537598,
      "learning_rate": 0.0003906415868247348,
      "loss": 2.8382,
      "step": 92691
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6727471351623535,
      "learning_rate": 0.000390637687430653,
      "loss": 2.832,
      "step": 92692
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6603217124938965,
      "learning_rate": 0.00039063378801971994,
      "loss": 3.2793,
      "step": 92693
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.9858641624450684,
      "learning_rate": 0.00039062988859193634,
      "loss": 2.9809,
      "step": 92694
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.5450754165649414,
      "learning_rate": 0.00039062598914730293,
      "loss": 3.1471,
      "step": 92695
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7970380783081055,
      "learning_rate": 0.0003906220896858205,
      "loss": 2.9365,
      "step": 92696
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9362813234329224,
      "learning_rate": 0.0003906181902074897,
      "loss": 3.0074,
      "step": 92697
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.656571865081787,
      "learning_rate": 0.0003906142907123112,
      "loss": 2.8086,
      "step": 92698
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7325817346572876,
      "learning_rate": 0.00039061039120028587,
      "loss": 3.2786,
      "step": 92699
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.863993525505066,
      "learning_rate": 0.00039060649167141424,
      "loss": 3.1831,
      "step": 92700
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.306436061859131,
      "learning_rate": 0.0003906025921256973,
      "loss": 2.7079,
      "step": 92701
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.2450971603393555,
      "learning_rate": 0.0003905986925631356,
      "loss": 2.8578,
      "step": 92702
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0192627906799316,
      "learning_rate": 0.0003905947929837298,
      "loss": 3.0921,
      "step": 92703
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7864717245101929,
      "learning_rate": 0.0003905908933874808,
      "loss": 2.9445,
      "step": 92704
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9990334510803223,
      "learning_rate": 0.0003905869937743893,
      "loss": 3.0317,
      "step": 92705
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.717218279838562,
      "learning_rate": 0.00039058309414445586,
      "loss": 3.0403,
      "step": 92706
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.921818494796753,
      "learning_rate": 0.0003905791944976813,
      "loss": 2.9311,
      "step": 92707
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.17972993850708,
      "learning_rate": 0.00039057529483406644,
      "loss": 2.9462,
      "step": 92708
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.004897356033325,
      "learning_rate": 0.0003905713951536119,
      "loss": 2.7623,
      "step": 92709
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.3013696670532227,
      "learning_rate": 0.0003905674954563184,
      "loss": 3.2839,
      "step": 92710
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6779465675354004,
      "learning_rate": 0.0003905635957421867,
      "loss": 2.9344,
      "step": 92711
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.188279628753662,
      "learning_rate": 0.0003905596960112176,
      "loss": 2.9585,
      "step": 92712
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7130438089370728,
      "learning_rate": 0.00039055579626341155,
      "loss": 3.0512,
      "step": 92713
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.3067564964294434,
      "learning_rate": 0.0003905518964987697,
      "loss": 2.8587,
      "step": 92714
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.5278208255767822,
      "learning_rate": 0.0003905479967172924,
      "loss": 3.2263,
      "step": 92715
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8024057149887085,
      "learning_rate": 0.00039054409691898054,
      "loss": 3.0464,
      "step": 92716
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1859588623046875,
      "learning_rate": 0.00039054019710383476,
      "loss": 3.1382,
      "step": 92717
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.748889207839966,
      "learning_rate": 0.00039053629727185603,
      "loss": 3.1292,
      "step": 92718
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.869025707244873,
      "learning_rate": 0.00039053239742304474,
      "loss": 3.0995,
      "step": 92719
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5591176748275757,
      "learning_rate": 0.0003905284975574018,
      "loss": 3.142,
      "step": 92720
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.8198904991149902,
      "learning_rate": 0.00039052459767492794,
      "loss": 2.9603,
      "step": 92721
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.935599684715271,
      "learning_rate": 0.0003905206977756238,
      "loss": 2.8337,
      "step": 92722
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.974345326423645,
      "learning_rate": 0.00039051679785949016,
      "loss": 2.9791,
      "step": 92723
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.551953077316284,
      "learning_rate": 0.0003905128979265278,
      "loss": 2.8349,
      "step": 92724
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6897475719451904,
      "learning_rate": 0.0003905089979767373,
      "loss": 2.9431,
      "step": 92725
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.570533275604248,
      "learning_rate": 0.0003905050980101195,
      "loss": 2.9412,
      "step": 92726
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0239956378936768,
      "learning_rate": 0.0003905011980266752,
      "loss": 2.8078,
      "step": 92727
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9325650930404663,
      "learning_rate": 0.00039049729802640484,
      "loss": 3.1144,
      "step": 92728
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6873060464859009,
      "learning_rate": 0.0003904933980093094,
      "loss": 3.0397,
      "step": 92729
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.165562152862549,
      "learning_rate": 0.0003904894979753896,
      "loss": 2.9227,
      "step": 92730
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.071822166442871,
      "learning_rate": 0.000390485597924646,
      "loss": 2.9819,
      "step": 92731
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1044938564300537,
      "learning_rate": 0.00039048169785707945,
      "loss": 2.943,
      "step": 92732
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7602359056472778,
      "learning_rate": 0.0003904777977726907,
      "loss": 2.9342,
      "step": 92733
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.163470506668091,
      "learning_rate": 0.0003904738976714803,
      "loss": 2.8173,
      "step": 92734
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.78061580657959,
      "learning_rate": 0.0003904699975534491,
      "loss": 3.0692,
      "step": 92735
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8807001113891602,
      "learning_rate": 0.00039046609741859794,
      "loss": 3.0111,
      "step": 92736
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.719085931777954,
      "learning_rate": 0.00039046219726692735,
      "loss": 2.9139,
      "step": 92737
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.318009376525879,
      "learning_rate": 0.0003904582970984381,
      "loss": 2.917,
      "step": 92738
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5456243753433228,
      "learning_rate": 0.00039045439691313104,
      "loss": 3.1067,
      "step": 92739
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.035569906234741,
      "learning_rate": 0.0003904504967110067,
      "loss": 3.1004,
      "step": 92740
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.025221824645996,
      "learning_rate": 0.000390446596492066,
      "loss": 2.7883,
      "step": 92741
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5951398611068726,
      "learning_rate": 0.00039044269625630956,
      "loss": 3.1519,
      "step": 92742
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.5190510749816895,
      "learning_rate": 0.000390438796003738,
      "loss": 3.1053,
      "step": 92743
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7875357866287231,
      "learning_rate": 0.00039043489573435225,
      "loss": 3.1297,
      "step": 92744
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8820486068725586,
      "learning_rate": 0.000390430995448153,
      "loss": 2.6287,
      "step": 92745
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5446040630340576,
      "learning_rate": 0.00039042709514514084,
      "loss": 3.2146,
      "step": 92746
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.07576060295105,
      "learning_rate": 0.00039042319482531656,
      "loss": 3.113,
      "step": 92747
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0083627700805664,
      "learning_rate": 0.000390419294488681,
      "loss": 2.9427,
      "step": 92748
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5427470207214355,
      "learning_rate": 0.00039041539413523467,
      "loss": 3.141,
      "step": 92749
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.090198516845703,
      "learning_rate": 0.00039041149376497846,
      "loss": 3.0194,
      "step": 92750
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.3955525159835815,
      "learning_rate": 0.00039040759337791305,
      "loss": 2.6801,
      "step": 92751
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.0152244567871094,
      "learning_rate": 0.0003904036929740391,
      "loss": 3.1203,
      "step": 92752
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8764461278915405,
      "learning_rate": 0.00039039979255335746,
      "loss": 3.107,
      "step": 92753
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9093167781829834,
      "learning_rate": 0.0003903958921158689,
      "loss": 3.0689,
      "step": 92754
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9538642168045044,
      "learning_rate": 0.00039039199166157384,
      "loss": 2.9396,
      "step": 92755
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5814430713653564,
      "learning_rate": 0.0003903880911904733,
      "loss": 3.0734,
      "step": 92756
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6363402605056763,
      "learning_rate": 0.0003903841907025679,
      "loss": 3.2452,
      "step": 92757
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9925940036773682,
      "learning_rate": 0.0003903802901978583,
      "loss": 3.1281,
      "step": 92758
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.738036036491394,
      "learning_rate": 0.00039037638967634545,
      "loss": 3.0664,
      "step": 92759
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.6702330112457275,
      "learning_rate": 0.0003903724891380298,
      "loss": 2.9714,
      "step": 92760
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.995314121246338,
      "learning_rate": 0.00039036858858291227,
      "loss": 3.1154,
      "step": 92761
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7135610580444336,
      "learning_rate": 0.0003903646880109935,
      "loss": 2.9626,
      "step": 92762
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.3779971599578857,
      "learning_rate": 0.00039036078742227416,
      "loss": 2.9881,
      "step": 92763
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6523387432098389,
      "learning_rate": 0.00039035688681675514,
      "loss": 3.0874,
      "step": 92764
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8670761585235596,
      "learning_rate": 0.000390352986194437,
      "loss": 3.1921,
      "step": 92765
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.2710869312286377,
      "learning_rate": 0.00039034908555532054,
      "loss": 2.9837,
      "step": 92766
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.634561777114868,
      "learning_rate": 0.0003903451848994065,
      "loss": 2.9125,
      "step": 92767
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7887487411499023,
      "learning_rate": 0.0003903412842266956,
      "loss": 3.2354,
      "step": 92768
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9354588985443115,
      "learning_rate": 0.00039033738353718854,
      "loss": 3.0756,
      "step": 92769
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.9657914638519287,
      "learning_rate": 0.00039033348283088604,
      "loss": 2.939,
      "step": 92770
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0308828353881836,
      "learning_rate": 0.00039032958210778886,
      "loss": 2.9705,
      "step": 92771
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7728042602539062,
      "learning_rate": 0.00039032568136789766,
      "loss": 2.8796,
      "step": 92772
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9783843755722046,
      "learning_rate": 0.00039032178061121326,
      "loss": 3.0287,
      "step": 92773
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.957181453704834,
      "learning_rate": 0.0003903178798377363,
      "loss": 2.9866,
      "step": 92774
    },
    {
      "epoch": 1.21,
      "grad_norm": 4.0179853439331055,
      "learning_rate": 0.0003903139790474676,
      "loss": 2.9246,
      "step": 92775
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5912476778030396,
      "learning_rate": 0.00039031007824040775,
      "loss": 3.086,
      "step": 92776
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.3275411128997803,
      "learning_rate": 0.0003903061774165576,
      "loss": 2.9849,
      "step": 92777
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.4550018310546875,
      "learning_rate": 0.00039030227657591785,
      "loss": 2.7787,
      "step": 92778
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.3214633464813232,
      "learning_rate": 0.0003902983757184891,
      "loss": 2.9414,
      "step": 92779
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1799614429473877,
      "learning_rate": 0.00039029447484427226,
      "loss": 2.9984,
      "step": 92780
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.6481034755706787,
      "learning_rate": 0.00039029057395326795,
      "loss": 2.9569,
      "step": 92781
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.680659294128418,
      "learning_rate": 0.000390286673045477,
      "loss": 2.9823,
      "step": 92782
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.216681480407715,
      "learning_rate": 0.0003902827721208999,
      "loss": 3.0438,
      "step": 92783
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1704773902893066,
      "learning_rate": 0.00039027887117953767,
      "loss": 3.1873,
      "step": 92784
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1382017135620117,
      "learning_rate": 0.00039027497022139083,
      "loss": 3.0909,
      "step": 92785
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.8712849617004395,
      "learning_rate": 0.0003902710692464601,
      "loss": 3.1843,
      "step": 92786
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.3152055740356445,
      "learning_rate": 0.0003902671682547463,
      "loss": 3.1676,
      "step": 92787
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0191729068756104,
      "learning_rate": 0.0003902632672462502,
      "loss": 3.099,
      "step": 92788
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.502781629562378,
      "learning_rate": 0.0003902593662209725,
      "loss": 3.1319,
      "step": 92789
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.5850741863250732,
      "learning_rate": 0.0003902554651789138,
      "loss": 3.0084,
      "step": 92790
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8187499046325684,
      "learning_rate": 0.00039025156412007486,
      "loss": 2.8744,
      "step": 92791
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7038955688476562,
      "learning_rate": 0.0003902476630444566,
      "loss": 3.0338,
      "step": 92792
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0344746112823486,
      "learning_rate": 0.00039024376195205947,
      "loss": 3.1505,
      "step": 92793
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7582895755767822,
      "learning_rate": 0.0003902398608428843,
      "loss": 2.992,
      "step": 92794
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.894819736480713,
      "learning_rate": 0.0003902359597169319,
      "loss": 2.9052,
      "step": 92795
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0055785179138184,
      "learning_rate": 0.0003902320585742029,
      "loss": 3.1152,
      "step": 92796
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.046445608139038,
      "learning_rate": 0.0003902281574146981,
      "loss": 2.9708,
      "step": 92797
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5470502376556396,
      "learning_rate": 0.0003902242562384181,
      "loss": 2.8998,
      "step": 92798
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.734227180480957,
      "learning_rate": 0.0003902203550453638,
      "loss": 2.8776,
      "step": 92799
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.169039726257324,
      "learning_rate": 0.0003902164538355358,
      "loss": 2.9891,
      "step": 92800
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.650449275970459,
      "learning_rate": 0.00039021255260893487,
      "loss": 3.1542,
      "step": 92801
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8522372245788574,
      "learning_rate": 0.0003902086513655617,
      "loss": 3.057,
      "step": 92802
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8961610794067383,
      "learning_rate": 0.0003902047501054171,
      "loss": 3.0198,
      "step": 92803
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.4970264434814453,
      "learning_rate": 0.0003902008488285016,
      "loss": 2.6735,
      "step": 92804
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5375398397445679,
      "learning_rate": 0.0003901969475348162,
      "loss": 3.165,
      "step": 92805
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.05316162109375,
      "learning_rate": 0.00039019304622436147,
      "loss": 3.196,
      "step": 92806
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8917056322097778,
      "learning_rate": 0.00039018914489713806,
      "loss": 3.0161,
      "step": 92807
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9573947191238403,
      "learning_rate": 0.00039018524355314676,
      "loss": 3.331,
      "step": 92808
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0885446071624756,
      "learning_rate": 0.00039018134219238844,
      "loss": 2.9316,
      "step": 92809
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.591822862625122,
      "learning_rate": 0.0003901774408148637,
      "loss": 2.9245,
      "step": 92810
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.3967483043670654,
      "learning_rate": 0.0003901735394205732,
      "loss": 2.9581,
      "step": 92811
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5475691556930542,
      "learning_rate": 0.00039016963800951783,
      "loss": 3.0508,
      "step": 92812
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6531791687011719,
      "learning_rate": 0.0003901657365816981,
      "loss": 3.1457,
      "step": 92813
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.409494400024414,
      "learning_rate": 0.000390161835137115,
      "loss": 3.3054,
      "step": 92814
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.034320831298828,
      "learning_rate": 0.000390157933675769,
      "loss": 2.9757,
      "step": 92815
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5043188333511353,
      "learning_rate": 0.0003901540321976609,
      "loss": 2.9395,
      "step": 92816
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6612536907196045,
      "learning_rate": 0.00039015013070279155,
      "loss": 3.1005,
      "step": 92817
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1349308490753174,
      "learning_rate": 0.00039014622919116167,
      "loss": 2.9786,
      "step": 92818
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7719348669052124,
      "learning_rate": 0.00039014232766277176,
      "loss": 2.9291,
      "step": 92819
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.215620279312134,
      "learning_rate": 0.00039013842611762274,
      "loss": 3.0783,
      "step": 92820
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.047783613204956,
      "learning_rate": 0.0003901345245557153,
      "loss": 2.9967,
      "step": 92821
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.009937047958374,
      "learning_rate": 0.00039013062297705013,
      "loss": 2.9526,
      "step": 92822
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7955752611160278,
      "learning_rate": 0.00039012672138162796,
      "loss": 2.9773,
      "step": 92823
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6469520330429077,
      "learning_rate": 0.00039012281976944955,
      "loss": 3.054,
      "step": 92824
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5548245906829834,
      "learning_rate": 0.0003901189181405156,
      "loss": 3.0944,
      "step": 92825
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8882023096084595,
      "learning_rate": 0.00039011501649482684,
      "loss": 3.0807,
      "step": 92826
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6574429273605347,
      "learning_rate": 0.00039011111483238406,
      "loss": 3.3121,
      "step": 92827
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9946235418319702,
      "learning_rate": 0.00039010721315318786,
      "loss": 3.0232,
      "step": 92828
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5685821771621704,
      "learning_rate": 0.000390103311457239,
      "loss": 3.0041,
      "step": 92829
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7183982133865356,
      "learning_rate": 0.0003900994097445383,
      "loss": 3.1927,
      "step": 92830
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.2005059719085693,
      "learning_rate": 0.0003900955080150864,
      "loss": 3.385,
      "step": 92831
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9126560688018799,
      "learning_rate": 0.000390091606268884,
      "loss": 3.0413,
      "step": 92832
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0672404766082764,
      "learning_rate": 0.00039008770450593195,
      "loss": 2.9855,
      "step": 92833
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.4174230098724365,
      "learning_rate": 0.0003900838027262308,
      "loss": 2.9789,
      "step": 92834
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5609532594680786,
      "learning_rate": 0.0003900799009297814,
      "loss": 2.9505,
      "step": 92835
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.3740580081939697,
      "learning_rate": 0.00039007599911658453,
      "loss": 2.9408,
      "step": 92836
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.174529790878296,
      "learning_rate": 0.00039007209728664075,
      "loss": 3.1723,
      "step": 92837
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6374815702438354,
      "learning_rate": 0.0003900681954399508,
      "loss": 3.0894,
      "step": 92838
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7783839702606201,
      "learning_rate": 0.0003900642935765157,
      "loss": 3.0025,
      "step": 92839
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7262647151947021,
      "learning_rate": 0.00039006039169633576,
      "loss": 2.9971,
      "step": 92840
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7459224462509155,
      "learning_rate": 0.0003900564897994119,
      "loss": 2.7128,
      "step": 92841
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.143950939178467,
      "learning_rate": 0.0003900525878857449,
      "loss": 3.1267,
      "step": 92842
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1130805015563965,
      "learning_rate": 0.00039004868595533544,
      "loss": 3.1096,
      "step": 92843
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5250409841537476,
      "learning_rate": 0.0003900447840081842,
      "loss": 3.0306,
      "step": 92844
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.567286252975464,
      "learning_rate": 0.0003900408820442919,
      "loss": 3.0349,
      "step": 92845
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6909323930740356,
      "learning_rate": 0.0003900369800636593,
      "loss": 3.2144,
      "step": 92846
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6515480279922485,
      "learning_rate": 0.0003900330780662872,
      "loss": 2.9027,
      "step": 92847
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.849066138267517,
      "learning_rate": 0.0003900291760521762,
      "loss": 3.008,
      "step": 92848
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.666596531867981,
      "learning_rate": 0.000390025274021327,
      "loss": 3.0197,
      "step": 92849
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6269887685775757,
      "learning_rate": 0.00039002137197374054,
      "loss": 3.2424,
      "step": 92850
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5589616298675537,
      "learning_rate": 0.00039001746990941725,
      "loss": 3.0252,
      "step": 92851
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.3734476566314697,
      "learning_rate": 0.0003900135678283582,
      "loss": 3.0629,
      "step": 92852
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.5606296062469482,
      "learning_rate": 0.00039000966573056386,
      "loss": 2.8803,
      "step": 92853
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1908326148986816,
      "learning_rate": 0.00039000576361603495,
      "loss": 2.9175,
      "step": 92854
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.624887466430664,
      "learning_rate": 0.00039000186148477236,
      "loss": 3.158,
      "step": 92855
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.428767681121826,
      "learning_rate": 0.0003899979593367767,
      "loss": 2.8881,
      "step": 92856
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.529753565788269,
      "learning_rate": 0.0003899940571720486,
      "loss": 3.2384,
      "step": 92857
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7401089668273926,
      "learning_rate": 0.00038999015499058915,
      "loss": 2.9742,
      "step": 92858
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5297250747680664,
      "learning_rate": 0.00038998625279239866,
      "loss": 2.9941,
      "step": 92859
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0844931602478027,
      "learning_rate": 0.00038998235057747805,
      "loss": 3.1025,
      "step": 92860
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0037729740142822,
      "learning_rate": 0.0003899784483458281,
      "loss": 3.1639,
      "step": 92861
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5773155689239502,
      "learning_rate": 0.00038997454609744934,
      "loss": 3.0014,
      "step": 92862
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.828426480293274,
      "learning_rate": 0.00038997064383234265,
      "loss": 2.9829,
      "step": 92863
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0777199268341064,
      "learning_rate": 0.00038996674155050877,
      "loss": 3.1433,
      "step": 92864
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.486639976501465,
      "learning_rate": 0.0003899628392519483,
      "loss": 3.0477,
      "step": 92865
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.477052927017212,
      "learning_rate": 0.0003899589369366621,
      "loss": 3.2233,
      "step": 92866
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.289386510848999,
      "learning_rate": 0.00038995503460465084,
      "loss": 2.9885,
      "step": 92867
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.060914993286133,
      "learning_rate": 0.0003899511322559152,
      "loss": 3.0246,
      "step": 92868
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.2618534564971924,
      "learning_rate": 0.000389947229890456,
      "loss": 2.8904,
      "step": 92869
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.159172773361206,
      "learning_rate": 0.0003899433275082739,
      "loss": 3.177,
      "step": 92870
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.553519368171692,
      "learning_rate": 0.00038993942510936954,
      "loss": 2.9713,
      "step": 92871
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.577449321746826,
      "learning_rate": 0.00038993552269374386,
      "loss": 3.3074,
      "step": 92872
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.030546188354492,
      "learning_rate": 0.0003899316202613974,
      "loss": 2.9229,
      "step": 92873
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0458600521087646,
      "learning_rate": 0.000389927717812331,
      "loss": 2.9981,
      "step": 92874
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1227571964263916,
      "learning_rate": 0.0003899238153465453,
      "loss": 3.0422,
      "step": 92875
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.261981964111328,
      "learning_rate": 0.00038991991286404105,
      "loss": 3.0628,
      "step": 92876
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.328913927078247,
      "learning_rate": 0.00038991601036481905,
      "loss": 2.888,
      "step": 92877
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9208804368972778,
      "learning_rate": 0.00038991210784887996,
      "loss": 2.9881,
      "step": 92878
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0950677394866943,
      "learning_rate": 0.0003899082053162245,
      "loss": 2.7355,
      "step": 92879
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.2285449504852295,
      "learning_rate": 0.00038990430276685336,
      "loss": 3.0382,
      "step": 92880
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.956697702407837,
      "learning_rate": 0.00038990040020076737,
      "loss": 2.8514,
      "step": 92881
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9257960319519043,
      "learning_rate": 0.0003898964976179672,
      "loss": 3.1264,
      "step": 92882
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8492228984832764,
      "learning_rate": 0.00038989259501845354,
      "loss": 3.0652,
      "step": 92883
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8720518350601196,
      "learning_rate": 0.0003898886924022272,
      "loss": 3.1873,
      "step": 92884
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8825697898864746,
      "learning_rate": 0.00038988478976928877,
      "loss": 2.6604,
      "step": 92885
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.631705403327942,
      "learning_rate": 0.00038988088711963914,
      "loss": 3.1123,
      "step": 92886
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8950896263122559,
      "learning_rate": 0.00038987698445327887,
      "loss": 3.133,
      "step": 92887
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7105259895324707,
      "learning_rate": 0.0003898730817702089,
      "loss": 3.0292,
      "step": 92888
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.493548631668091,
      "learning_rate": 0.00038986917907042975,
      "loss": 2.9171,
      "step": 92889
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0172383785247803,
      "learning_rate": 0.0003898652763539422,
      "loss": 3.0645,
      "step": 92890
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.573931336402893,
      "learning_rate": 0.00038986137362074705,
      "loss": 3.0356,
      "step": 92891
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6402335166931152,
      "learning_rate": 0.00038985747087084494,
      "loss": 2.927,
      "step": 92892
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.488325834274292,
      "learning_rate": 0.0003898535681042366,
      "loss": 3.0874,
      "step": 92893
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.2218570709228516,
      "learning_rate": 0.00038984966532092287,
      "loss": 2.9509,
      "step": 92894
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9249813556671143,
      "learning_rate": 0.0003898457625209044,
      "loss": 3.0778,
      "step": 92895
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.2197139263153076,
      "learning_rate": 0.0003898418597041818,
      "loss": 2.9968,
      "step": 92896
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.3608179092407227,
      "learning_rate": 0.000389837956870756,
      "loss": 3.0571,
      "step": 92897
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7948991060256958,
      "learning_rate": 0.0003898340540206276,
      "loss": 2.8895,
      "step": 92898
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1564300060272217,
      "learning_rate": 0.00038983015115379735,
      "loss": 3.0353,
      "step": 92899
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.5728914737701416,
      "learning_rate": 0.00038982624827026596,
      "loss": 3.1384,
      "step": 92900
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9392145872116089,
      "learning_rate": 0.0003898223453700342,
      "loss": 2.9662,
      "step": 92901
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.214111804962158,
      "learning_rate": 0.00038981844245310275,
      "loss": 2.8443,
      "step": 92902
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8525735139846802,
      "learning_rate": 0.0003898145395194724,
      "loss": 3.0621,
      "step": 92903
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8182364702224731,
      "learning_rate": 0.00038981063656914373,
      "loss": 2.9175,
      "step": 92904
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6058822870254517,
      "learning_rate": 0.0003898067336021176,
      "loss": 3.0312,
      "step": 92905
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.4768885374069214,
      "learning_rate": 0.00038980283061839483,
      "loss": 2.911,
      "step": 92906
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.4439125061035156,
      "learning_rate": 0.0003897989276179759,
      "loss": 2.9181,
      "step": 92907
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.4052846431732178,
      "learning_rate": 0.0003897950246008617,
      "loss": 2.8214,
      "step": 92908
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7148813009262085,
      "learning_rate": 0.0003897911215670529,
      "loss": 3.0275,
      "step": 92909
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.040614366531372,
      "learning_rate": 0.0003897872185165502,
      "loss": 3.0259,
      "step": 92910
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.7980642318725586,
      "learning_rate": 0.00038978331544935435,
      "loss": 2.9652,
      "step": 92911
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6118426322937012,
      "learning_rate": 0.0003897794123654662,
      "loss": 3.1076,
      "step": 92912
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.624894857406616,
      "learning_rate": 0.00038977550926488626,
      "loss": 3.1408,
      "step": 92913
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6207696199417114,
      "learning_rate": 0.0003897716061476153,
      "loss": 2.968,
      "step": 92914
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9815491437911987,
      "learning_rate": 0.0003897677030136543,
      "loss": 2.9016,
      "step": 92915
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9277417659759521,
      "learning_rate": 0.00038976379986300363,
      "loss": 2.9321,
      "step": 92916
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.2499306201934814,
      "learning_rate": 0.00038975989669566417,
      "loss": 2.8888,
      "step": 92917
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.7938246726989746,
      "learning_rate": 0.0003897559935116367,
      "loss": 2.8999,
      "step": 92918
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7199660539627075,
      "learning_rate": 0.000389752090310922,
      "loss": 3.1656,
      "step": 92919
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.153683662414551,
      "learning_rate": 0.00038974818709352056,
      "loss": 2.8291,
      "step": 92920
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.845351457595825,
      "learning_rate": 0.0003897442838594332,
      "loss": 3.0961,
      "step": 92921
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8429821729660034,
      "learning_rate": 0.0003897403806086608,
      "loss": 2.9086,
      "step": 92922
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5751852989196777,
      "learning_rate": 0.00038973647734120395,
      "loss": 2.8829,
      "step": 92923
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.076153516769409,
      "learning_rate": 0.00038973257405706333,
      "loss": 2.7416,
      "step": 92924
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9797686338424683,
      "learning_rate": 0.00038972867075623977,
      "loss": 3.1338,
      "step": 92925
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9289307594299316,
      "learning_rate": 0.0003897247674387339,
      "loss": 3.0162,
      "step": 92926
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.880606770515442,
      "learning_rate": 0.00038972086410454657,
      "loss": 3.284,
      "step": 92927
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.4908928871154785,
      "learning_rate": 0.00038971696075367846,
      "loss": 2.816,
      "step": 92928
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6837167739868164,
      "learning_rate": 0.00038971305738613027,
      "loss": 3.1154,
      "step": 92929
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7977285385131836,
      "learning_rate": 0.0003897091540019026,
      "loss": 3.0797,
      "step": 92930
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0799038410186768,
      "learning_rate": 0.00038970525060099645,
      "loss": 2.7915,
      "step": 92931
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.854995608329773,
      "learning_rate": 0.0003897013471834124,
      "loss": 2.8091,
      "step": 92932
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.083828926086426,
      "learning_rate": 0.0003896974437491511,
      "loss": 3.0919,
      "step": 92933
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.644026756286621,
      "learning_rate": 0.0003896935402982134,
      "loss": 3.101,
      "step": 92934
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.5057196617126465,
      "learning_rate": 0.00038968963683059995,
      "loss": 3.1139,
      "step": 92935
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.267200231552124,
      "learning_rate": 0.00038968573334631154,
      "loss": 3.291,
      "step": 92936
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7972440719604492,
      "learning_rate": 0.0003896818298453488,
      "loss": 2.929,
      "step": 92937
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.524739980697632,
      "learning_rate": 0.0003896779263277125,
      "loss": 3.1531,
      "step": 92938
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.034780979156494,
      "learning_rate": 0.00038967402279340346,
      "loss": 3.1464,
      "step": 92939
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.114388942718506,
      "learning_rate": 0.00038967011924242236,
      "loss": 3.0036,
      "step": 92940
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.4508700370788574,
      "learning_rate": 0.00038966621567476976,
      "loss": 2.9931,
      "step": 92941
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7196048498153687,
      "learning_rate": 0.00038966231209044657,
      "loss": 3.1926,
      "step": 92942
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.4790873527526855,
      "learning_rate": 0.00038965840848945355,
      "loss": 3.051,
      "step": 92943
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.171621084213257,
      "learning_rate": 0.0003896545048717912,
      "loss": 2.9063,
      "step": 92944
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.480419158935547,
      "learning_rate": 0.00038965060123746044,
      "loss": 2.9432,
      "step": 92945
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1289138793945312,
      "learning_rate": 0.000389646697586462,
      "loss": 2.8511,
      "step": 92946
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6397126913070679,
      "learning_rate": 0.0003896427939187965,
      "loss": 3.0898,
      "step": 92947
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9338129758834839,
      "learning_rate": 0.0003896388902344647,
      "loss": 2.8124,
      "step": 92948
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9107329845428467,
      "learning_rate": 0.00038963498653346736,
      "loss": 2.885,
      "step": 92949
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.6870903968811035,
      "learning_rate": 0.0003896310828158052,
      "loss": 2.9029,
      "step": 92950
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5242054462432861,
      "learning_rate": 0.00038962717908147884,
      "loss": 2.8581,
      "step": 92951
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.8518903255462646,
      "learning_rate": 0.00038962327533048917,
      "loss": 2.7914,
      "step": 92952
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9468514919281006,
      "learning_rate": 0.0003896193715628368,
      "loss": 3.1184,
      "step": 92953
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8331360816955566,
      "learning_rate": 0.00038961546777852257,
      "loss": 3.0067,
      "step": 92954
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8631057739257812,
      "learning_rate": 0.0003896115639775471,
      "loss": 3.0726,
      "step": 92955
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8324910402297974,
      "learning_rate": 0.00038960766015991115,
      "loss": 3.0321,
      "step": 92956
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.774281620979309,
      "learning_rate": 0.00038960375632561545,
      "loss": 3.0296,
      "step": 92957
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.066470146179199,
      "learning_rate": 0.00038959985247466065,
      "loss": 3.113,
      "step": 92958
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5636653900146484,
      "learning_rate": 0.00038959594860704763,
      "loss": 2.937,
      "step": 92959
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8413007259368896,
      "learning_rate": 0.000389592044722777,
      "loss": 3.0849,
      "step": 92960
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.2388110160827637,
      "learning_rate": 0.0003895881408218495,
      "loss": 2.859,
      "step": 92961
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.081786870956421,
      "learning_rate": 0.0003895842369042659,
      "loss": 3.0381,
      "step": 92962
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.622780203819275,
      "learning_rate": 0.0003895803329700269,
      "loss": 3.0653,
      "step": 92963
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.095665454864502,
      "learning_rate": 0.0003895764290191332,
      "loss": 2.9583,
      "step": 92964
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.454327344894409,
      "learning_rate": 0.00038957252505158557,
      "loss": 2.9538,
      "step": 92965
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6949231624603271,
      "learning_rate": 0.0003895686210673847,
      "loss": 2.8942,
      "step": 92966
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.789712905883789,
      "learning_rate": 0.00038956471706653133,
      "loss": 3.0002,
      "step": 92967
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8388549089431763,
      "learning_rate": 0.0003895608130490262,
      "loss": 2.8385,
      "step": 92968
    },
    {
      "epoch": 1.21,
      "grad_norm": 4.958924293518066,
      "learning_rate": 0.0003895569090148701,
      "loss": 3.146,
      "step": 92969
    },
    {
      "epoch": 1.21,
      "grad_norm": 5.801758766174316,
      "learning_rate": 0.0003895530049640636,
      "loss": 3.108,
      "step": 92970
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.3264801502227783,
      "learning_rate": 0.00038954910089660747,
      "loss": 3.0433,
      "step": 92971
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.996132254600525,
      "learning_rate": 0.0003895451968125025,
      "loss": 3.0556,
      "step": 92972
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.4336297512054443,
      "learning_rate": 0.00038954129271174945,
      "loss": 3.0615,
      "step": 92973
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.9363772869110107,
      "learning_rate": 0.0003895373885943489,
      "loss": 2.9345,
      "step": 92974
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.605011463165283,
      "learning_rate": 0.00038953348446030175,
      "loss": 3.4058,
      "step": 92975
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9412833452224731,
      "learning_rate": 0.0003895295803096086,
      "loss": 3.1664,
      "step": 92976
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9370524883270264,
      "learning_rate": 0.00038952567614227015,
      "loss": 3.1757,
      "step": 92977
    },
    {
      "epoch": 1.21,
      "grad_norm": 4.417447090148926,
      "learning_rate": 0.0003895217719582872,
      "loss": 3.0498,
      "step": 92978
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.2058520317077637,
      "learning_rate": 0.0003895178677576605,
      "loss": 2.9493,
      "step": 92979
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.5485198497772217,
      "learning_rate": 0.0003895139635403907,
      "loss": 2.6409,
      "step": 92980
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.56398606300354,
      "learning_rate": 0.00038951005930647865,
      "loss": 3.0524,
      "step": 92981
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.397385358810425,
      "learning_rate": 0.0003895061550559249,
      "loss": 3.1443,
      "step": 92982
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.957577705383301,
      "learning_rate": 0.00038950225078873034,
      "loss": 2.96,
      "step": 92983
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.842519998550415,
      "learning_rate": 0.0003894983465048956,
      "loss": 2.9583,
      "step": 92984
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6917815208435059,
      "learning_rate": 0.0003894944422044214,
      "loss": 3.049,
      "step": 92985
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.3364949226379395,
      "learning_rate": 0.0003894905378873085,
      "loss": 2.9223,
      "step": 92986
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.7720701694488525,
      "learning_rate": 0.00038948663355355763,
      "loss": 2.9817,
      "step": 92987
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.202620506286621,
      "learning_rate": 0.0003894827292031695,
      "loss": 3.1872,
      "step": 92988
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.4600303173065186,
      "learning_rate": 0.0003894788248361449,
      "loss": 3.0183,
      "step": 92989
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5552213191986084,
      "learning_rate": 0.0003894749204524844,
      "loss": 2.7498,
      "step": 92990
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.769895076751709,
      "learning_rate": 0.00038947101605218883,
      "loss": 2.8936,
      "step": 92991
    },
    {
      "epoch": 1.21,
      "grad_norm": 4.547069549560547,
      "learning_rate": 0.000389467111635259,
      "loss": 3.0821,
      "step": 92992
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.304439067840576,
      "learning_rate": 0.00038946320720169547,
      "loss": 3.0111,
      "step": 92993
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.2277028560638428,
      "learning_rate": 0.00038945930275149904,
      "loss": 2.6851,
      "step": 92994
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8822396993637085,
      "learning_rate": 0.0003894553982846705,
      "loss": 2.826,
      "step": 92995
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.8760693073272705,
      "learning_rate": 0.0003894514938012105,
      "loss": 2.9298,
      "step": 92996
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.4090256690979004,
      "learning_rate": 0.00038944758930111963,
      "loss": 3.1,
      "step": 92997
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.800105333328247,
      "learning_rate": 0.000389443684784399,
      "loss": 2.8407,
      "step": 92998
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.7350056171417236,
      "learning_rate": 0.00038943978025104893,
      "loss": 2.8276,
      "step": 92999
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.9411797523498535,
      "learning_rate": 0.0003894358757010704,
      "loss": 2.9914,
      "step": 93000
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.523045301437378,
      "learning_rate": 0.000389431971134464,
      "loss": 3.1704,
      "step": 93001
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.402103900909424,
      "learning_rate": 0.00038942806655123055,
      "loss": 3.1584,
      "step": 93002
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7241536378860474,
      "learning_rate": 0.0003894241619513707,
      "loss": 2.7493,
      "step": 93003
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.985473394393921,
      "learning_rate": 0.00038942025733488525,
      "loss": 3.2937,
      "step": 93004
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.6235640048980713,
      "learning_rate": 0.00038941635270177486,
      "loss": 3.0039,
      "step": 93005
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7209147214889526,
      "learning_rate": 0.00038941244805204024,
      "loss": 2.8752,
      "step": 93006
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.536579966545105,
      "learning_rate": 0.00038940854338568226,
      "loss": 3.1913,
      "step": 93007
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.496314287185669,
      "learning_rate": 0.00038940463870270145,
      "loss": 3.0222,
      "step": 93008
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1279749870300293,
      "learning_rate": 0.0003894007340030987,
      "loss": 2.6953,
      "step": 93009
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9689218997955322,
      "learning_rate": 0.00038939682928687465,
      "loss": 3.076,
      "step": 93010
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8398011922836304,
      "learning_rate": 0.00038939292455403,
      "loss": 2.8443,
      "step": 93011
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.38556170463562,
      "learning_rate": 0.00038938901980456556,
      "loss": 2.9312,
      "step": 93012
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6968703269958496,
      "learning_rate": 0.000389385115038482,
      "loss": 3.185,
      "step": 93013
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.100264549255371,
      "learning_rate": 0.00038938121025578,
      "loss": 3.0657,
      "step": 93014
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.945655345916748,
      "learning_rate": 0.0003893773054564604,
      "loss": 2.9996,
      "step": 93015
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.079284191131592,
      "learning_rate": 0.00038937340064052394,
      "loss": 2.9954,
      "step": 93016
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.4560673236846924,
      "learning_rate": 0.00038936949580797126,
      "loss": 2.7587,
      "step": 93017
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.4301838874816895,
      "learning_rate": 0.00038936559095880297,
      "loss": 2.9992,
      "step": 93018
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.8451883792877197,
      "learning_rate": 0.00038936168609302014,
      "loss": 2.8156,
      "step": 93019
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.131007671356201,
      "learning_rate": 0.0003893577812106231,
      "loss": 3.1811,
      "step": 93020
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.017021894454956,
      "learning_rate": 0.0003893538763116128,
      "loss": 2.9459,
      "step": 93021
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7072174549102783,
      "learning_rate": 0.00038934997139598995,
      "loss": 3.1387,
      "step": 93022
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.721413254737854,
      "learning_rate": 0.00038934606646375523,
      "loss": 2.9841,
      "step": 93023
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.037921905517578,
      "learning_rate": 0.00038934216151490945,
      "loss": 3.0062,
      "step": 93024
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.1125423908233643,
      "learning_rate": 0.00038933825654945326,
      "loss": 2.8129,
      "step": 93025
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0718400478363037,
      "learning_rate": 0.00038933435156738737,
      "loss": 2.9641,
      "step": 93026
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.5510218143463135,
      "learning_rate": 0.0003893304465687125,
      "loss": 3.0773,
      "step": 93027
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.050781011581421,
      "learning_rate": 0.00038932654155342955,
      "loss": 2.9267,
      "step": 93028
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.693771481513977,
      "learning_rate": 0.00038932263652153903,
      "loss": 3.1433,
      "step": 93029
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.607844591140747,
      "learning_rate": 0.0003893187314730417,
      "loss": 3.1986,
      "step": 93030
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6753430366516113,
      "learning_rate": 0.0003893148264079384,
      "loss": 3.1335,
      "step": 93031
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.228026866912842,
      "learning_rate": 0.00038931092132622974,
      "loss": 2.6417,
      "step": 93032
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8440635204315186,
      "learning_rate": 0.0003893070162279165,
      "loss": 2.9366,
      "step": 93033
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6092629432678223,
      "learning_rate": 0.00038930311111299947,
      "loss": 2.9879,
      "step": 93034
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6939958333969116,
      "learning_rate": 0.0003892992059814792,
      "loss": 3.1785,
      "step": 93035
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0143399238586426,
      "learning_rate": 0.0003892953008333566,
      "loss": 3.0803,
      "step": 93036
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.3499990701675415,
      "learning_rate": 0.00038929139566863224,
      "loss": 2.8554,
      "step": 93037
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9010844230651855,
      "learning_rate": 0.000389287490487307,
      "loss": 2.8882,
      "step": 93038
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.5558547973632812,
      "learning_rate": 0.0003892835852893815,
      "loss": 3.0599,
      "step": 93039
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.4594935178756714,
      "learning_rate": 0.00038927968007485653,
      "loss": 3.172,
      "step": 93040
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5155006647109985,
      "learning_rate": 0.00038927577484373276,
      "loss": 3.0363,
      "step": 93041
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8763154745101929,
      "learning_rate": 0.0003892718695960109,
      "loss": 3.115,
      "step": 93042
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.661668062210083,
      "learning_rate": 0.00038926796433169175,
      "loss": 2.8655,
      "step": 93043
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.4775621891021729,
      "learning_rate": 0.00038926405905077594,
      "loss": 2.79,
      "step": 93044
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0362586975097656,
      "learning_rate": 0.0003892601537532643,
      "loss": 3.0564,
      "step": 93045
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7224692106246948,
      "learning_rate": 0.00038925624843915763,
      "loss": 3.3142,
      "step": 93046
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.859344720840454,
      "learning_rate": 0.0003892523431084564,
      "loss": 3.1637,
      "step": 93047
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9608131647109985,
      "learning_rate": 0.00038924843776116144,
      "loss": 3.0804,
      "step": 93048
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8224581480026245,
      "learning_rate": 0.0003892445323972736,
      "loss": 2.8951,
      "step": 93049
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.396815776824951,
      "learning_rate": 0.00038924062701679353,
      "loss": 3.1194,
      "step": 93050
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7717963457107544,
      "learning_rate": 0.0003892367216197219,
      "loss": 2.9862,
      "step": 93051
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5152212381362915,
      "learning_rate": 0.00038923281620605943,
      "loss": 3.1743,
      "step": 93052
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.736714243888855,
      "learning_rate": 0.00038922891077580697,
      "loss": 3.1731,
      "step": 93053
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.037435531616211,
      "learning_rate": 0.0003892250053289652,
      "loss": 2.8742,
      "step": 93054
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.2601234912872314,
      "learning_rate": 0.0003892210998655347,
      "loss": 3.1766,
      "step": 93055
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.3363146781921387,
      "learning_rate": 0.00038921719438551633,
      "loss": 2.9216,
      "step": 93056
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9120523929595947,
      "learning_rate": 0.00038921328888891096,
      "loss": 3.0687,
      "step": 93057
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5722427368164062,
      "learning_rate": 0.000389209383375719,
      "loss": 3.0082,
      "step": 93058
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.5043606758117676,
      "learning_rate": 0.00038920547784594136,
      "loss": 2.8285,
      "step": 93059
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.6280853748321533,
      "learning_rate": 0.00038920157229957876,
      "loss": 3.1003,
      "step": 93060
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.3305869102478027,
      "learning_rate": 0.00038919766673663187,
      "loss": 3.098,
      "step": 93061
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.7517054080963135,
      "learning_rate": 0.0003891937611571015,
      "loss": 2.7437,
      "step": 93062
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.5251400470733643,
      "learning_rate": 0.00038918985556098834,
      "loss": 3.0989,
      "step": 93063
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8587783575057983,
      "learning_rate": 0.000389185949948293,
      "loss": 3.1556,
      "step": 93064
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.923325777053833,
      "learning_rate": 0.0003891820443190164,
      "loss": 2.844,
      "step": 93065
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.103572368621826,
      "learning_rate": 0.00038917813867315917,
      "loss": 2.9398,
      "step": 93066
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.686565637588501,
      "learning_rate": 0.000389174233010722,
      "loss": 2.9575,
      "step": 93067
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9370472431182861,
      "learning_rate": 0.0003891703273317057,
      "loss": 3.1492,
      "step": 93068
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7618316411972046,
      "learning_rate": 0.00038916642163611093,
      "loss": 3.0261,
      "step": 93069
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8565802574157715,
      "learning_rate": 0.00038916251592393836,
      "loss": 2.8754,
      "step": 93070
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9575730562210083,
      "learning_rate": 0.000389158610195189,
      "loss": 3.1805,
      "step": 93071
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6718240976333618,
      "learning_rate": 0.0003891547044498632,
      "loss": 3.044,
      "step": 93072
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5543383359909058,
      "learning_rate": 0.0003891507986879619,
      "loss": 2.872,
      "step": 93073
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.6869845390319824,
      "learning_rate": 0.00038914689290948587,
      "loss": 2.8624,
      "step": 93074
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.73366379737854,
      "learning_rate": 0.00038914298711443564,
      "loss": 2.9328,
      "step": 93075
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7932380437850952,
      "learning_rate": 0.000389139081302812,
      "loss": 2.9939,
      "step": 93076
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.3303568363189697,
      "learning_rate": 0.0003891351754746159,
      "loss": 3.0477,
      "step": 93077
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.759330153465271,
      "learning_rate": 0.0003891312696298477,
      "loss": 2.8661,
      "step": 93078
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5681538581848145,
      "learning_rate": 0.00038912736376850844,
      "loss": 3.3161,
      "step": 93079
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1670310497283936,
      "learning_rate": 0.00038912345789059866,
      "loss": 2.9398,
      "step": 93080
    },
    {
      "epoch": 1.21,
      "grad_norm": 4.477504253387451,
      "learning_rate": 0.00038911955199611917,
      "loss": 2.5971,
      "step": 93081
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6556624174118042,
      "learning_rate": 0.00038911564608507063,
      "loss": 2.969,
      "step": 93082
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.063657522201538,
      "learning_rate": 0.0003891117401574539,
      "loss": 2.8956,
      "step": 93083
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.316964864730835,
      "learning_rate": 0.00038910783421326956,
      "loss": 3.2084,
      "step": 93084
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.664297103881836,
      "learning_rate": 0.0003891039282525183,
      "loss": 2.8678,
      "step": 93085
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8684619665145874,
      "learning_rate": 0.0003891000222752012,
      "loss": 3.0478,
      "step": 93086
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0050222873687744,
      "learning_rate": 0.00038909611628131847,
      "loss": 3.2522,
      "step": 93087
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6630635261535645,
      "learning_rate": 0.0003890922102708712,
      "loss": 2.9572,
      "step": 93088
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9348974227905273,
      "learning_rate": 0.00038908830424386003,
      "loss": 2.8987,
      "step": 93089
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.675082802772522,
      "learning_rate": 0.00038908439820028556,
      "loss": 3.0,
      "step": 93090
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.435818910598755,
      "learning_rate": 0.00038908049214014866,
      "loss": 3.0627,
      "step": 93091
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7448365688323975,
      "learning_rate": 0.0003890765860634501,
      "loss": 3.0445,
      "step": 93092
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.757100224494934,
      "learning_rate": 0.00038907267997019036,
      "loss": 3.3087,
      "step": 93093
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.895896077156067,
      "learning_rate": 0.00038906877386037043,
      "loss": 2.8186,
      "step": 93094
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7519999742507935,
      "learning_rate": 0.00038906486773399094,
      "loss": 3.0433,
      "step": 93095
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7394804954528809,
      "learning_rate": 0.00038906096159105265,
      "loss": 2.8607,
      "step": 93096
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9719886779785156,
      "learning_rate": 0.00038905705543155615,
      "loss": 2.6676,
      "step": 93097
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8509644269943237,
      "learning_rate": 0.0003890531492555023,
      "loss": 2.9126,
      "step": 93098
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.521914482116699,
      "learning_rate": 0.00038904924306289184,
      "loss": 2.8599,
      "step": 93099
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9149187803268433,
      "learning_rate": 0.0003890453368537254,
      "loss": 2.942,
      "step": 93100
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9363303184509277,
      "learning_rate": 0.00038904143062800376,
      "loss": 2.7254,
      "step": 93101
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.246366500854492,
      "learning_rate": 0.00038903752438572756,
      "loss": 3.0194,
      "step": 93102
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.182372808456421,
      "learning_rate": 0.00038903361812689765,
      "loss": 3.0867,
      "step": 93103
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9635812044143677,
      "learning_rate": 0.0003890297118515148,
      "loss": 2.9066,
      "step": 93104
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6522260904312134,
      "learning_rate": 0.0003890258055595796,
      "loss": 3.0269,
      "step": 93105
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.8792803287506104,
      "learning_rate": 0.00038902189925109267,
      "loss": 2.941,
      "step": 93106
    },
    {
      "epoch": 1.21,
      "grad_norm": 4.564944744110107,
      "learning_rate": 0.0003890179929260551,
      "loss": 2.9921,
      "step": 93107
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.986983060836792,
      "learning_rate": 0.0003890140865844673,
      "loss": 3.3271,
      "step": 93108
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1817643642425537,
      "learning_rate": 0.0003890101802263301,
      "loss": 2.921,
      "step": 93109
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.9871628284454346,
      "learning_rate": 0.0003890062738516443,
      "loss": 3.0553,
      "step": 93110
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.70493745803833,
      "learning_rate": 0.00038900236746041046,
      "loss": 3.0779,
      "step": 93111
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6359630823135376,
      "learning_rate": 0.00038899846105262944,
      "loss": 3.0772,
      "step": 93112
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9372599124908447,
      "learning_rate": 0.0003889945546283019,
      "loss": 3.1901,
      "step": 93113
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.243819236755371,
      "learning_rate": 0.0003889906481874286,
      "loss": 2.815,
      "step": 93114
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5308483839035034,
      "learning_rate": 0.0003889867417300103,
      "loss": 3.0325,
      "step": 93115
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7137044668197632,
      "learning_rate": 0.0003889828352560477,
      "loss": 3.1154,
      "step": 93116
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.089165210723877,
      "learning_rate": 0.0003889789287655415,
      "loss": 3.0069,
      "step": 93117
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.335552930831909,
      "learning_rate": 0.00038897502225849237,
      "loss": 3.1242,
      "step": 93118
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6212128400802612,
      "learning_rate": 0.0003889711157349011,
      "loss": 2.9733,
      "step": 93119
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8387447595596313,
      "learning_rate": 0.00038896720919476854,
      "loss": 3.0345,
      "step": 93120
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5967199802398682,
      "learning_rate": 0.0003889633026380952,
      "loss": 2.8138,
      "step": 93121
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6397309303283691,
      "learning_rate": 0.00038895939606488193,
      "loss": 2.8583,
      "step": 93122
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5534095764160156,
      "learning_rate": 0.00038895548947512945,
      "loss": 3.0571,
      "step": 93123
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.74941086769104,
      "learning_rate": 0.0003889515828688384,
      "loss": 2.7682,
      "step": 93124
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6079310178756714,
      "learning_rate": 0.0003889476762460096,
      "loss": 2.843,
      "step": 93125
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.996522307395935,
      "learning_rate": 0.0003889437696066438,
      "loss": 3.107,
      "step": 93126
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.711269497871399,
      "learning_rate": 0.0003889398629507416,
      "loss": 3.2565,
      "step": 93127
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5259976387023926,
      "learning_rate": 0.0003889359562783038,
      "loss": 2.6022,
      "step": 93128
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0585434436798096,
      "learning_rate": 0.0003889320495893312,
      "loss": 3.1953,
      "step": 93129
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9024529457092285,
      "learning_rate": 0.0003889281428838244,
      "loss": 3.1622,
      "step": 93130
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5879014730453491,
      "learning_rate": 0.0003889242361617842,
      "loss": 3.0182,
      "step": 93131
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5054831504821777,
      "learning_rate": 0.0003889203294232113,
      "loss": 2.7911,
      "step": 93132
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.671518564224243,
      "learning_rate": 0.00038891642266810646,
      "loss": 3.1867,
      "step": 93133
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8597747087478638,
      "learning_rate": 0.00038891251589647034,
      "loss": 2.9812,
      "step": 93134
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.591613531112671,
      "learning_rate": 0.00038890860910830376,
      "loss": 2.9437,
      "step": 93135
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.183181047439575,
      "learning_rate": 0.0003889047023036073,
      "loss": 2.9079,
      "step": 93136
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.668927550315857,
      "learning_rate": 0.0003889007954823818,
      "loss": 2.8518,
      "step": 93137
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.501556396484375,
      "learning_rate": 0.000388896888644628,
      "loss": 2.8546,
      "step": 93138
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.3799943923950195,
      "learning_rate": 0.0003888929817903466,
      "loss": 2.8792,
      "step": 93139
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.546473264694214,
      "learning_rate": 0.00038888907491953825,
      "loss": 2.666,
      "step": 93140
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6126803159713745,
      "learning_rate": 0.00038888516803220377,
      "loss": 2.8598,
      "step": 93141
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.4200279712677,
      "learning_rate": 0.00038888126112834393,
      "loss": 3.1028,
      "step": 93142
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9765160083770752,
      "learning_rate": 0.00038887735420795933,
      "loss": 3.0322,
      "step": 93143
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.047759532928467,
      "learning_rate": 0.00038887344727105074,
      "loss": 3.3218,
      "step": 93144
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9330438375473022,
      "learning_rate": 0.0003888695403176189,
      "loss": 2.7735,
      "step": 93145
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.4170310497283936,
      "learning_rate": 0.0003888656333476645,
      "loss": 2.9509,
      "step": 93146
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9832022190093994,
      "learning_rate": 0.0003888617263611884,
      "loss": 2.9982,
      "step": 93147
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6855170726776123,
      "learning_rate": 0.0003888578193581911,
      "loss": 3.0653,
      "step": 93148
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.2086822986602783,
      "learning_rate": 0.0003888539123386736,
      "loss": 2.8743,
      "step": 93149
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.683253049850464,
      "learning_rate": 0.0003888500053026364,
      "loss": 3.0407,
      "step": 93150
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8659615516662598,
      "learning_rate": 0.0003888460982500803,
      "loss": 3.0461,
      "step": 93151
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.591047763824463,
      "learning_rate": 0.000388842191181006,
      "loss": 3.0711,
      "step": 93152
    },
    {
      "epoch": 1.21,
      "grad_norm": 5.829474925994873,
      "learning_rate": 0.0003888382840954144,
      "loss": 2.9675,
      "step": 93153
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.884193181991577,
      "learning_rate": 0.000388834376993306,
      "loss": 3.0234,
      "step": 93154
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.067326307296753,
      "learning_rate": 0.00038883046987468154,
      "loss": 2.9357,
      "step": 93155
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8219729661941528,
      "learning_rate": 0.00038882656273954197,
      "loss": 3.1123,
      "step": 93156
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.4849534034729004,
      "learning_rate": 0.0003888226555878878,
      "loss": 3.1704,
      "step": 93157
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.221480369567871,
      "learning_rate": 0.0003888187484197197,
      "loss": 3.0368,
      "step": 93158
    },
    {
      "epoch": 1.21,
      "grad_norm": 4.564337730407715,
      "learning_rate": 0.0003888148412350387,
      "loss": 2.9088,
      "step": 93159
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.2141923904418945,
      "learning_rate": 0.00038881093403384524,
      "loss": 2.9655,
      "step": 93160
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6618844270706177,
      "learning_rate": 0.0003888070268161402,
      "loss": 3.0069,
      "step": 93161
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.5994679927825928,
      "learning_rate": 0.00038880311958192423,
      "loss": 2.9791,
      "step": 93162
    },
    {
      "epoch": 1.21,
      "grad_norm": 5.537657260894775,
      "learning_rate": 0.00038879921233119804,
      "loss": 3.0917,
      "step": 93163
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.445582389831543,
      "learning_rate": 0.00038879530506396253,
      "loss": 3.126,
      "step": 93164
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.223931312561035,
      "learning_rate": 0.0003887913977802182,
      "loss": 2.8762,
      "step": 93165
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.753422260284424,
      "learning_rate": 0.00038878749047996586,
      "loss": 2.9274,
      "step": 93166
    },
    {
      "epoch": 1.21,
      "grad_norm": 5.099542617797852,
      "learning_rate": 0.0003887835831632063,
      "loss": 2.927,
      "step": 93167
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.973867893218994,
      "learning_rate": 0.0003887796758299402,
      "loss": 2.9758,
      "step": 93168
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.4974334239959717,
      "learning_rate": 0.00038877576848016823,
      "loss": 3.1405,
      "step": 93169
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7490588426589966,
      "learning_rate": 0.0003887718611138912,
      "loss": 3.2097,
      "step": 93170
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.1131491661071777,
      "learning_rate": 0.0003887679537311098,
      "loss": 3.1688,
      "step": 93171
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.6026594638824463,
      "learning_rate": 0.00038876404633182476,
      "loss": 3.0344,
      "step": 93172
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.8728089332580566,
      "learning_rate": 0.0003887601389160368,
      "loss": 3.1099,
      "step": 93173
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.973252534866333,
      "learning_rate": 0.00038875623148374675,
      "loss": 3.1309,
      "step": 93174
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.6373331546783447,
      "learning_rate": 0.00038875232403495514,
      "loss": 2.9745,
      "step": 93175
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.749639630317688,
      "learning_rate": 0.00038874841656966273,
      "loss": 2.9618,
      "step": 93176
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8155308961868286,
      "learning_rate": 0.0003887445090878705,
      "loss": 3.1181,
      "step": 93177
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.4667696952819824,
      "learning_rate": 0.0003887406015895789,
      "loss": 3.1589,
      "step": 93178
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.827660322189331,
      "learning_rate": 0.0003887366940747887,
      "loss": 2.9675,
      "step": 93179
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.6124370098114014,
      "learning_rate": 0.0003887327865435007,
      "loss": 2.9512,
      "step": 93180
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.432723045349121,
      "learning_rate": 0.0003887288789957156,
      "loss": 3.1377,
      "step": 93181
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5357915163040161,
      "learning_rate": 0.0003887249714314341,
      "loss": 3.2257,
      "step": 93182
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.615075707435608,
      "learning_rate": 0.00038872106385065693,
      "loss": 2.8651,
      "step": 93183
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.0379831790924072,
      "learning_rate": 0.00038871715625338497,
      "loss": 3.2393,
      "step": 93184
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1743953227996826,
      "learning_rate": 0.00038871324863961877,
      "loss": 2.8965,
      "step": 93185
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.27048921585083,
      "learning_rate": 0.0003887093410093591,
      "loss": 2.8049,
      "step": 93186
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.5340113639831543,
      "learning_rate": 0.0003887054333626067,
      "loss": 2.9875,
      "step": 93187
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9457131624221802,
      "learning_rate": 0.00038870152569936217,
      "loss": 3.0873,
      "step": 93188
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.139922618865967,
      "learning_rate": 0.00038869761801962644,
      "loss": 2.9634,
      "step": 93189
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8886222839355469,
      "learning_rate": 0.00038869371032340016,
      "loss": 3.1129,
      "step": 93190
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8377188444137573,
      "learning_rate": 0.00038868980261068403,
      "loss": 2.9562,
      "step": 93191
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.047485589981079,
      "learning_rate": 0.00038868589488147876,
      "loss": 2.9689,
      "step": 93192
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.4145543575286865,
      "learning_rate": 0.00038868198713578516,
      "loss": 3.1259,
      "step": 93193
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.4901262521743774,
      "learning_rate": 0.0003886780793736039,
      "loss": 3.0929,
      "step": 93194
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.05588698387146,
      "learning_rate": 0.0003886741715949357,
      "loss": 3.0269,
      "step": 93195
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9982798099517822,
      "learning_rate": 0.00038867026379978126,
      "loss": 2.9709,
      "step": 93196
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.123037099838257,
      "learning_rate": 0.0003886663559881414,
      "loss": 2.8113,
      "step": 93197
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7573028802871704,
      "learning_rate": 0.00038866244816001676,
      "loss": 3.1425,
      "step": 93198
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8969805240631104,
      "learning_rate": 0.00038865854031540814,
      "loss": 3.047,
      "step": 93199
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8532299995422363,
      "learning_rate": 0.00038865463245431613,
      "loss": 3.1115,
      "step": 93200
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.773196816444397,
      "learning_rate": 0.0003886507245767416,
      "loss": 2.901,
      "step": 93201
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.5154566764831543,
      "learning_rate": 0.0003886468166826853,
      "loss": 3.167,
      "step": 93202
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.632050633430481,
      "learning_rate": 0.00038864290877214784,
      "loss": 2.8752,
      "step": 93203
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.462712049484253,
      "learning_rate": 0.0003886390008451299,
      "loss": 3.0991,
      "step": 93204
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6767252683639526,
      "learning_rate": 0.0003886350929016324,
      "loss": 2.9297,
      "step": 93205
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.5114223957061768,
      "learning_rate": 0.0003886311849416559,
      "loss": 3.1703,
      "step": 93206
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.4887210130691528,
      "learning_rate": 0.0003886272769652012,
      "loss": 2.7405,
      "step": 93207
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8885043859481812,
      "learning_rate": 0.0003886233689722691,
      "loss": 3.0878,
      "step": 93208
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6519155502319336,
      "learning_rate": 0.00038861946096286015,
      "loss": 2.9033,
      "step": 93209
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8219496011734009,
      "learning_rate": 0.00038861555293697516,
      "loss": 2.9139,
      "step": 93210
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.8217568397521973,
      "learning_rate": 0.0003886116448946149,
      "loss": 2.8986,
      "step": 93211
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6950502395629883,
      "learning_rate": 0.0003886077368357801,
      "loss": 2.6365,
      "step": 93212
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5906728506088257,
      "learning_rate": 0.0003886038287604714,
      "loss": 3.0798,
      "step": 93213
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.002823829650879,
      "learning_rate": 0.0003885999206686896,
      "loss": 2.5996,
      "step": 93214
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7276320457458496,
      "learning_rate": 0.0003885960125604354,
      "loss": 3.2027,
      "step": 93215
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.325260877609253,
      "learning_rate": 0.0003885921044357095,
      "loss": 3.281,
      "step": 93216
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8707540035247803,
      "learning_rate": 0.0003885881962945127,
      "loss": 2.9868,
      "step": 93217
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.103102922439575,
      "learning_rate": 0.0003885842881368457,
      "loss": 2.8288,
      "step": 93218
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.618060827255249,
      "learning_rate": 0.0003885803799627091,
      "loss": 2.9801,
      "step": 93219
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.058032274246216,
      "learning_rate": 0.0003885764717721038,
      "loss": 2.939,
      "step": 93220
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.768584132194519,
      "learning_rate": 0.00038857256356503053,
      "loss": 3.1059,
      "step": 93221
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8726387023925781,
      "learning_rate": 0.0003885686553414898,
      "loss": 3.1551,
      "step": 93222
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.555391550064087,
      "learning_rate": 0.00038856474710148266,
      "loss": 3.1309,
      "step": 93223
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7897295951843262,
      "learning_rate": 0.00038856083884500955,
      "loss": 3.0921,
      "step": 93224
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9796818494796753,
      "learning_rate": 0.0003885569305720713,
      "loss": 2.9867,
      "step": 93225
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6148775815963745,
      "learning_rate": 0.0003885530222826687,
      "loss": 3.2633,
      "step": 93226
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7901418209075928,
      "learning_rate": 0.0003885491139768024,
      "loss": 3.079,
      "step": 93227
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.3340063095092773,
      "learning_rate": 0.00038854520565447307,
      "loss": 2.938,
      "step": 93228
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.6030232906341553,
      "learning_rate": 0.00038854129731568163,
      "loss": 3.0391,
      "step": 93229
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6826841831207275,
      "learning_rate": 0.00038853738896042864,
      "loss": 3.0879,
      "step": 93230
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.618783950805664,
      "learning_rate": 0.00038853348058871486,
      "loss": 3.1881,
      "step": 93231
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.7674338817596436,
      "learning_rate": 0.0003885295722005411,
      "loss": 2.9921,
      "step": 93232
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.805039167404175,
      "learning_rate": 0.00038852566379590797,
      "loss": 3.0263,
      "step": 93233
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.716841459274292,
      "learning_rate": 0.00038852175537481626,
      "loss": 2.9828,
      "step": 93234
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7532554864883423,
      "learning_rate": 0.00038851784693726664,
      "loss": 2.8982,
      "step": 93235
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.2169036865234375,
      "learning_rate": 0.0003885139384832599,
      "loss": 3.0928,
      "step": 93236
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.583733081817627,
      "learning_rate": 0.00038851003001279683,
      "loss": 2.953,
      "step": 93237
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.04803729057312,
      "learning_rate": 0.000388506121525878,
      "loss": 2.9894,
      "step": 93238
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.210700273513794,
      "learning_rate": 0.00038850221302250425,
      "loss": 3.1433,
      "step": 93239
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.3039140701293945,
      "learning_rate": 0.0003884983045026762,
      "loss": 2.8711,
      "step": 93240
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9456188678741455,
      "learning_rate": 0.0003884943959663948,
      "loss": 3.0251,
      "step": 93241
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.19814133644104,
      "learning_rate": 0.00038849048741366045,
      "loss": 2.845,
      "step": 93242
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.587522268295288,
      "learning_rate": 0.0003884865788444741,
      "loss": 2.9662,
      "step": 93243
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.4010021686553955,
      "learning_rate": 0.00038848267025883644,
      "loss": 2.9262,
      "step": 93244
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.3802924156188965,
      "learning_rate": 0.00038847876165674813,
      "loss": 2.7844,
      "step": 93245
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6436585187911987,
      "learning_rate": 0.00038847485303821004,
      "loss": 2.9582,
      "step": 93246
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6866297721862793,
      "learning_rate": 0.00038847094440322275,
      "loss": 3.061,
      "step": 93247
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.4016289710998535,
      "learning_rate": 0.00038846703575178703,
      "loss": 3.1257,
      "step": 93248
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.38108229637146,
      "learning_rate": 0.00038846312708390365,
      "loss": 3.1751,
      "step": 93249
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.714783787727356,
      "learning_rate": 0.0003884592183995733,
      "loss": 3.0485,
      "step": 93250
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.744729995727539,
      "learning_rate": 0.0003884553096987967,
      "loss": 2.9808,
      "step": 93251
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0458016395568848,
      "learning_rate": 0.00038845140098157466,
      "loss": 2.9764,
      "step": 93252
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.6006340980529785,
      "learning_rate": 0.0003884474922479077,
      "loss": 3.078,
      "step": 93253
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.458561658859253,
      "learning_rate": 0.00038844358349779677,
      "loss": 3.0223,
      "step": 93254
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.323652744293213,
      "learning_rate": 0.0003884396747312425,
      "loss": 3.0925,
      "step": 93255
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9904290437698364,
      "learning_rate": 0.0003884357659482456,
      "loss": 2.8538,
      "step": 93256
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.614931344985962,
      "learning_rate": 0.0003884318571488069,
      "loss": 2.8365,
      "step": 93257
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8519898653030396,
      "learning_rate": 0.000388427948332927,
      "loss": 3.072,
      "step": 93258
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.852424144744873,
      "learning_rate": 0.00038842403950060657,
      "loss": 2.9515,
      "step": 93259
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8948090076446533,
      "learning_rate": 0.00038842013065184665,
      "loss": 3.0564,
      "step": 93260
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.567164421081543,
      "learning_rate": 0.0003884162217866476,
      "loss": 3.1855,
      "step": 93261
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.309680700302124,
      "learning_rate": 0.00038841231290501026,
      "loss": 3.0834,
      "step": 93262
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0669326782226562,
      "learning_rate": 0.00038840840400693557,
      "loss": 2.9233,
      "step": 93263
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.803873896598816,
      "learning_rate": 0.000388404495092424,
      "loss": 2.966,
      "step": 93264
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8092169761657715,
      "learning_rate": 0.0003884005861614763,
      "loss": 2.8847,
      "step": 93265
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6899924278259277,
      "learning_rate": 0.0003883966772140934,
      "loss": 2.9543,
      "step": 93266
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7575829029083252,
      "learning_rate": 0.00038839276825027583,
      "loss": 2.9657,
      "step": 93267
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.9026260375976562,
      "learning_rate": 0.00038838885927002436,
      "loss": 2.9524,
      "step": 93268
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6393674612045288,
      "learning_rate": 0.00038838495027333975,
      "loss": 3.0151,
      "step": 93269
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5648081302642822,
      "learning_rate": 0.00038838104126022265,
      "loss": 2.7576,
      "step": 93270
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.6914327144622803,
      "learning_rate": 0.00038837713223067397,
      "loss": 2.8611,
      "step": 93271
    },
    {
      "epoch": 1.21,
      "grad_norm": 3.040562391281128,
      "learning_rate": 0.0003883732231846942,
      "loss": 3.2027,
      "step": 93272
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5456092357635498,
      "learning_rate": 0.0003883693141222843,
      "loss": 3.0389,
      "step": 93273
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.700987696647644,
      "learning_rate": 0.0003883654050434447,
      "loss": 2.9963,
      "step": 93274
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.775715708732605,
      "learning_rate": 0.0003883614959481765,
      "loss": 3.0093,
      "step": 93275
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6513758897781372,
      "learning_rate": 0.0003883575868364801,
      "loss": 2.9775,
      "step": 93276
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6785757541656494,
      "learning_rate": 0.00038835367770835637,
      "loss": 2.8223,
      "step": 93277
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.444424033164978,
      "learning_rate": 0.0003883497685638061,
      "loss": 3.0005,
      "step": 93278
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.145237922668457,
      "learning_rate": 0.00038834585940282983,
      "loss": 3.2989,
      "step": 93279
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.648835301399231,
      "learning_rate": 0.00038834195022542845,
      "loss": 3.0162,
      "step": 93280
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6656701564788818,
      "learning_rate": 0.00038833804103160266,
      "loss": 2.9039,
      "step": 93281
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.659867525100708,
      "learning_rate": 0.0003883341318213531,
      "loss": 2.9275,
      "step": 93282
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.2271552085876465,
      "learning_rate": 0.0003883302225946806,
      "loss": 2.9026,
      "step": 93283
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5296111106872559,
      "learning_rate": 0.00038832631335158587,
      "loss": 3.1641,
      "step": 93284
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.448427677154541,
      "learning_rate": 0.0003883224040920695,
      "loss": 3.1009,
      "step": 93285
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.085801839828491,
      "learning_rate": 0.0003883184948161324,
      "loss": 3.042,
      "step": 93286
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6533786058425903,
      "learning_rate": 0.00038831458552377525,
      "loss": 3.065,
      "step": 93287
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8650014400482178,
      "learning_rate": 0.00038831067621499876,
      "loss": 3.188,
      "step": 93288
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1101601123809814,
      "learning_rate": 0.0003883067668898036,
      "loss": 3.0686,
      "step": 93289
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.815909743309021,
      "learning_rate": 0.0003883028575481906,
      "loss": 2.8721,
      "step": 93290
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0417304039001465,
      "learning_rate": 0.00038829894819016037,
      "loss": 2.9334,
      "step": 93291
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.716457724571228,
      "learning_rate": 0.0003882950388157137,
      "loss": 3.0564,
      "step": 93292
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.011525869369507,
      "learning_rate": 0.0003882911294248514,
      "loss": 2.9983,
      "step": 93293
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.5707695484161377,
      "learning_rate": 0.00038828722001757406,
      "loss": 3.1088,
      "step": 93294
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7880092859268188,
      "learning_rate": 0.0003882833105938824,
      "loss": 2.9343,
      "step": 93295
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5714951753616333,
      "learning_rate": 0.0003882794011537773,
      "loss": 3.2794,
      "step": 93296
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1439831256866455,
      "learning_rate": 0.0003882754916972593,
      "loss": 3.009,
      "step": 93297
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.623428225517273,
      "learning_rate": 0.0003882715822243293,
      "loss": 2.9797,
      "step": 93298
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.7394862174987793,
      "learning_rate": 0.0003882676727349879,
      "loss": 2.887,
      "step": 93299
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0571579933166504,
      "learning_rate": 0.00038826376322923596,
      "loss": 2.8334,
      "step": 93300
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5087072849273682,
      "learning_rate": 0.00038825985370707396,
      "loss": 3.1164,
      "step": 93301
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.803691029548645,
      "learning_rate": 0.00038825594416850293,
      "loss": 3.2492,
      "step": 93302
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.328550100326538,
      "learning_rate": 0.0003882520346135234,
      "loss": 2.6675,
      "step": 93303
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.1115920543670654,
      "learning_rate": 0.00038824812504213615,
      "loss": 3.0211,
      "step": 93304
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.5126359462738037,
      "learning_rate": 0.0003882442154543419,
      "loss": 3.028,
      "step": 93305
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.8756043910980225,
      "learning_rate": 0.00038824030585014137,
      "loss": 3.0249,
      "step": 93306
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.288344144821167,
      "learning_rate": 0.0003882363962295353,
      "loss": 3.0547,
      "step": 93307
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.081892728805542,
      "learning_rate": 0.00038823248659252454,
      "loss": 3.0723,
      "step": 93308
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.067084550857544,
      "learning_rate": 0.00038822857693910956,
      "loss": 2.9751,
      "step": 93309
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.8967221975326538,
      "learning_rate": 0.0003882246672692912,
      "loss": 3.0206,
      "step": 93310
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0347673892974854,
      "learning_rate": 0.0003882207575830704,
      "loss": 3.0145,
      "step": 93311
    },
    {
      "epoch": 1.21,
      "grad_norm": 1.6387590169906616,
      "learning_rate": 0.00038821684788044753,
      "loss": 3.2187,
      "step": 93312
    },
    {
      "epoch": 1.21,
      "grad_norm": 2.0053188800811768,
      "learning_rate": 0.00038821293816142347,
      "loss": 3.1821,
      "step": 93313
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.110874652862549,
      "learning_rate": 0.000388209028425999,
      "loss": 2.8041,
      "step": 93314
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9704951047897339,
      "learning_rate": 0.00038820511867417484,
      "loss": 2.8654,
      "step": 93315
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.276420831680298,
      "learning_rate": 0.00038820120890595164,
      "loss": 3.0783,
      "step": 93316
    },
    {
      "epoch": 1.22,
      "grad_norm": 5.626454830169678,
      "learning_rate": 0.0003881972991213302,
      "loss": 2.9767,
      "step": 93317
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.6180145740509033,
      "learning_rate": 0.00038819338932031123,
      "loss": 2.9175,
      "step": 93318
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.880951166152954,
      "learning_rate": 0.00038818947950289544,
      "loss": 3.0425,
      "step": 93319
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.89486026763916,
      "learning_rate": 0.00038818556966908354,
      "loss": 2.8342,
      "step": 93320
    },
    {
      "epoch": 1.22,
      "grad_norm": 4.320473670959473,
      "learning_rate": 0.0003881816598188762,
      "loss": 2.8598,
      "step": 93321
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6921257972717285,
      "learning_rate": 0.0003881777499522744,
      "loss": 3.1296,
      "step": 93322
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.543360710144043,
      "learning_rate": 0.00038817384006927853,
      "loss": 3.3197,
      "step": 93323
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4550111293792725,
      "learning_rate": 0.0003881699301698896,
      "loss": 3.0285,
      "step": 93324
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.5394699573516846,
      "learning_rate": 0.0003881660202541081,
      "loss": 2.6987,
      "step": 93325
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0084800720214844,
      "learning_rate": 0.0003881621103219349,
      "loss": 3.0911,
      "step": 93326
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9023315906524658,
      "learning_rate": 0.0003881582003733707,
      "loss": 3.1749,
      "step": 93327
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0026538372039795,
      "learning_rate": 0.0003881542904084163,
      "loss": 2.9833,
      "step": 93328
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.016442060470581,
      "learning_rate": 0.00038815038042707236,
      "loss": 3.1324,
      "step": 93329
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.386066436767578,
      "learning_rate": 0.0003881464704293395,
      "loss": 3.1884,
      "step": 93330
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6313869953155518,
      "learning_rate": 0.00038814256041521864,
      "loss": 2.999,
      "step": 93331
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8277896642684937,
      "learning_rate": 0.00038813865038471034,
      "loss": 3.0288,
      "step": 93332
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9773523807525635,
      "learning_rate": 0.00038813474033781546,
      "loss": 3.0773,
      "step": 93333
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0386767387390137,
      "learning_rate": 0.00038813083027453466,
      "loss": 2.9561,
      "step": 93334
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5655313730239868,
      "learning_rate": 0.0003881269201948686,
      "loss": 3.0847,
      "step": 93335
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8258906602859497,
      "learning_rate": 0.0003881230100988182,
      "loss": 3.2699,
      "step": 93336
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8228741884231567,
      "learning_rate": 0.00038811909998638395,
      "loss": 2.9039,
      "step": 93337
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6267638206481934,
      "learning_rate": 0.00038811518985756675,
      "loss": 3.0828,
      "step": 93338
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.635930061340332,
      "learning_rate": 0.0003881112797123673,
      "loss": 3.1057,
      "step": 93339
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1475613117218018,
      "learning_rate": 0.0003881073695507863,
      "loss": 3.1641,
      "step": 93340
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.641761064529419,
      "learning_rate": 0.00038810345937282444,
      "loss": 3.0705,
      "step": 93341
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7664724588394165,
      "learning_rate": 0.0003880995491784825,
      "loss": 3.142,
      "step": 93342
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6824253797531128,
      "learning_rate": 0.00038809563896776117,
      "loss": 3.1802,
      "step": 93343
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.548279881477356,
      "learning_rate": 0.0003880917287406612,
      "loss": 3.0635,
      "step": 93344
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.868288278579712,
      "learning_rate": 0.0003880878184971834,
      "loss": 3.1041,
      "step": 93345
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.617738127708435,
      "learning_rate": 0.0003880839082373283,
      "loss": 2.9345,
      "step": 93346
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.915132999420166,
      "learning_rate": 0.0003880799979610968,
      "loss": 2.904,
      "step": 93347
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.843396782875061,
      "learning_rate": 0.00038807608766848955,
      "loss": 2.6339,
      "step": 93348
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5524979829788208,
      "learning_rate": 0.0003880721773595073,
      "loss": 2.8313,
      "step": 93349
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6841546297073364,
      "learning_rate": 0.00038806826703415074,
      "loss": 3.1553,
      "step": 93350
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1210107803344727,
      "learning_rate": 0.0003880643566924207,
      "loss": 2.8613,
      "step": 93351
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8702715635299683,
      "learning_rate": 0.0003880604463343178,
      "loss": 3.2476,
      "step": 93352
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8496044874191284,
      "learning_rate": 0.00038805653595984276,
      "loss": 2.9507,
      "step": 93353
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.2862069606781006,
      "learning_rate": 0.0003880526255689964,
      "loss": 3.0106,
      "step": 93354
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.929944396018982,
      "learning_rate": 0.0003880487151617793,
      "loss": 3.054,
      "step": 93355
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1276779174804688,
      "learning_rate": 0.00038804480473819236,
      "loss": 2.9776,
      "step": 93356
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5887188911437988,
      "learning_rate": 0.00038804089429823625,
      "loss": 3.228,
      "step": 93357
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.212773084640503,
      "learning_rate": 0.00038803698384191166,
      "loss": 3.0946,
      "step": 93358
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.53617262840271,
      "learning_rate": 0.00038803307336921926,
      "loss": 2.9971,
      "step": 93359
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.829715609550476,
      "learning_rate": 0.00038802916288015996,
      "loss": 3.1361,
      "step": 93360
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.6951844692230225,
      "learning_rate": 0.0003880252523747343,
      "loss": 2.7349,
      "step": 93361
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.6006529331207275,
      "learning_rate": 0.0003880213418529431,
      "loss": 2.6998,
      "step": 93362
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4749319553375244,
      "learning_rate": 0.00038801743131478705,
      "loss": 2.9862,
      "step": 93363
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8491073846817017,
      "learning_rate": 0.0003880135207602669,
      "loss": 2.8115,
      "step": 93364
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9971944093704224,
      "learning_rate": 0.00038800961018938343,
      "loss": 2.9481,
      "step": 93365
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6273901462554932,
      "learning_rate": 0.0003880056996021373,
      "loss": 2.9279,
      "step": 93366
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9366562366485596,
      "learning_rate": 0.00038800178899852923,
      "loss": 3.1439,
      "step": 93367
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.030531167984009,
      "learning_rate": 0.00038799787837855994,
      "loss": 3.1619,
      "step": 93368
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.885876178741455,
      "learning_rate": 0.00038799396774223026,
      "loss": 3.2587,
      "step": 93369
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0380876064300537,
      "learning_rate": 0.0003879900570895408,
      "loss": 3.0524,
      "step": 93370
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8527686595916748,
      "learning_rate": 0.0003879861464204923,
      "loss": 2.8693,
      "step": 93371
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9563319683074951,
      "learning_rate": 0.0003879822357350855,
      "loss": 2.9945,
      "step": 93372
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0486960411071777,
      "learning_rate": 0.0003879783250333212,
      "loss": 2.9366,
      "step": 93373
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.047511577606201,
      "learning_rate": 0.00038797441431520006,
      "loss": 3.3457,
      "step": 93374
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5741713047027588,
      "learning_rate": 0.0003879705035807228,
      "loss": 3.2274,
      "step": 93375
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.818443775177002,
      "learning_rate": 0.00038796659282989016,
      "loss": 2.9345,
      "step": 93376
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.3250856399536133,
      "learning_rate": 0.00038796268206270286,
      "loss": 3.23,
      "step": 93377
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.572178602218628,
      "learning_rate": 0.0003879587712791617,
      "loss": 2.9718,
      "step": 93378
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.602869987487793,
      "learning_rate": 0.00038795486047926725,
      "loss": 3.1125,
      "step": 93379
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4865243434906006,
      "learning_rate": 0.00038795094966302034,
      "loss": 3.0148,
      "step": 93380
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8802112340927124,
      "learning_rate": 0.0003879470388304217,
      "loss": 3.3332,
      "step": 93381
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8158007860183716,
      "learning_rate": 0.0003879431279814721,
      "loss": 2.9043,
      "step": 93382
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4923617839813232,
      "learning_rate": 0.00038793921711617217,
      "loss": 3.0359,
      "step": 93383
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4657270908355713,
      "learning_rate": 0.00038793530623452266,
      "loss": 3.0078,
      "step": 93384
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7048946619033813,
      "learning_rate": 0.0003879313953365244,
      "loss": 3.0026,
      "step": 93385
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8995091915130615,
      "learning_rate": 0.000387927484422178,
      "loss": 2.8531,
      "step": 93386
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4568657875061035,
      "learning_rate": 0.00038792357349148415,
      "loss": 2.8824,
      "step": 93387
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.0695083141326904,
      "learning_rate": 0.0003879196625444437,
      "loss": 3.0959,
      "step": 93388
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1499524116516113,
      "learning_rate": 0.0003879157515810574,
      "loss": 2.8689,
      "step": 93389
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.907993197441101,
      "learning_rate": 0.00038791184060132575,
      "loss": 3.032,
      "step": 93390
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.50341796875,
      "learning_rate": 0.00038790792960524976,
      "loss": 3.2479,
      "step": 93391
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5074975490570068,
      "learning_rate": 0.00038790401859282997,
      "loss": 3.3429,
      "step": 93392
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6217914819717407,
      "learning_rate": 0.00038790010756406713,
      "loss": 3.0182,
      "step": 93393
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.5716404914855957,
      "learning_rate": 0.0003878961965189621,
      "loss": 2.8956,
      "step": 93394
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7442125082015991,
      "learning_rate": 0.0003878922854575154,
      "loss": 3.2748,
      "step": 93395
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6157560348510742,
      "learning_rate": 0.00038788837437972787,
      "loss": 3.0917,
      "step": 93396
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8756815195083618,
      "learning_rate": 0.00038788446328560033,
      "loss": 3.0598,
      "step": 93397
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6995924711227417,
      "learning_rate": 0.00038788055217513337,
      "loss": 2.8468,
      "step": 93398
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6054407358169556,
      "learning_rate": 0.0003878766410483277,
      "loss": 3.0389,
      "step": 93399
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.333272695541382,
      "learning_rate": 0.00038787272990518413,
      "loss": 3.0256,
      "step": 93400
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.1305713653564453,
      "learning_rate": 0.0003878688187457034,
      "loss": 3.1144,
      "step": 93401
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7338680028915405,
      "learning_rate": 0.0003878649075698861,
      "loss": 3.1704,
      "step": 93402
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.365922451019287,
      "learning_rate": 0.0003878609963777332,
      "loss": 2.9529,
      "step": 93403
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.460354804992676,
      "learning_rate": 0.00038785708516924516,
      "loss": 2.7967,
      "step": 93404
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.640112280845642,
      "learning_rate": 0.0003878531739444229,
      "loss": 3.0953,
      "step": 93405
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.395843982696533,
      "learning_rate": 0.000387849262703267,
      "loss": 2.958,
      "step": 93406
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.397773027420044,
      "learning_rate": 0.0003878453514457784,
      "loss": 3.0159,
      "step": 93407
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6813900470733643,
      "learning_rate": 0.0003878414401719576,
      "loss": 3.0438,
      "step": 93408
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4504863023757935,
      "learning_rate": 0.00038783752888180536,
      "loss": 3.1508,
      "step": 93409
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.523916482925415,
      "learning_rate": 0.0003878336175753226,
      "loss": 3.0868,
      "step": 93410
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.489757537841797,
      "learning_rate": 0.00038782970625250983,
      "loss": 2.8447,
      "step": 93411
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.834175705909729,
      "learning_rate": 0.00038782579491336783,
      "loss": 2.7757,
      "step": 93412
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7372485399246216,
      "learning_rate": 0.0003878218835578974,
      "loss": 2.8446,
      "step": 93413
    },
    {
      "epoch": 1.22,
      "grad_norm": 4.4738450050354,
      "learning_rate": 0.00038781797218609925,
      "loss": 2.9443,
      "step": 93414
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0056023597717285,
      "learning_rate": 0.000387814060797974,
      "loss": 2.9979,
      "step": 93415
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7090431451797485,
      "learning_rate": 0.0003878101493935225,
      "loss": 3.0488,
      "step": 93416
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6856552362442017,
      "learning_rate": 0.00038780623797274544,
      "loss": 3.2614,
      "step": 93417
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.988419532775879,
      "learning_rate": 0.0003878023265356436,
      "loss": 3.2231,
      "step": 93418
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.5130889415740967,
      "learning_rate": 0.0003877984150822176,
      "loss": 3.0759,
      "step": 93419
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.762453556060791,
      "learning_rate": 0.0003877945036124682,
      "loss": 2.9119,
      "step": 93420
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.849448800086975,
      "learning_rate": 0.0003877905921263962,
      "loss": 2.984,
      "step": 93421
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5523213148117065,
      "learning_rate": 0.00038778668062400224,
      "loss": 2.8914,
      "step": 93422
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.093594551086426,
      "learning_rate": 0.00038778276910528703,
      "loss": 3.0265,
      "step": 93423
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4880962371826172,
      "learning_rate": 0.00038777885757025144,
      "loss": 2.9984,
      "step": 93424
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5982717275619507,
      "learning_rate": 0.000387774946018896,
      "loss": 3.158,
      "step": 93425
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.44389009475708,
      "learning_rate": 0.0003877710344512216,
      "loss": 2.9391,
      "step": 93426
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9607608318328857,
      "learning_rate": 0.00038776712286722894,
      "loss": 3.305,
      "step": 93427
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7678149938583374,
      "learning_rate": 0.0003877632112669187,
      "loss": 2.9667,
      "step": 93428
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.383892059326172,
      "learning_rate": 0.00038775929965029156,
      "loss": 2.814,
      "step": 93429
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0577292442321777,
      "learning_rate": 0.0003877553880173484,
      "loss": 2.7758,
      "step": 93430
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4437270164489746,
      "learning_rate": 0.00038775147636808976,
      "loss": 2.9833,
      "step": 93431
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.2559263706207275,
      "learning_rate": 0.0003877475647025165,
      "loss": 3.1569,
      "step": 93432
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4660770893096924,
      "learning_rate": 0.00038774365302062934,
      "loss": 3.1653,
      "step": 93433
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8250402212142944,
      "learning_rate": 0.000387739741322429,
      "loss": 3.1855,
      "step": 93434
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.672853946685791,
      "learning_rate": 0.0003877358296079161,
      "loss": 3.1388,
      "step": 93435
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.053769826889038,
      "learning_rate": 0.00038773191787709154,
      "loss": 3.0133,
      "step": 93436
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.261630058288574,
      "learning_rate": 0.0003877280061299559,
      "loss": 2.8815,
      "step": 93437
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4839147329330444,
      "learning_rate": 0.00038772409436651,
      "loss": 2.9934,
      "step": 93438
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4465469121932983,
      "learning_rate": 0.00038772018258675456,
      "loss": 2.9958,
      "step": 93439
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8565293550491333,
      "learning_rate": 0.0003877162707906902,
      "loss": 3.0247,
      "step": 93440
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7533535957336426,
      "learning_rate": 0.0003877123589783178,
      "loss": 2.7424,
      "step": 93441
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1359949111938477,
      "learning_rate": 0.000387708447149638,
      "loss": 2.8579,
      "step": 93442
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5787209272384644,
      "learning_rate": 0.0003877045353046515,
      "loss": 2.9755,
      "step": 93443
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7866497039794922,
      "learning_rate": 0.0003877006234433591,
      "loss": 3.0948,
      "step": 93444
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.5333080291748047,
      "learning_rate": 0.00038769671156576154,
      "loss": 3.0105,
      "step": 93445
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4766674041748047,
      "learning_rate": 0.00038769279967185947,
      "loss": 2.6602,
      "step": 93446
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.034536838531494,
      "learning_rate": 0.0003876888877616536,
      "loss": 3.0323,
      "step": 93447
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.99686861038208,
      "learning_rate": 0.0003876849758351448,
      "loss": 2.8769,
      "step": 93448
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.2867987155914307,
      "learning_rate": 0.0003876810638923337,
      "loss": 2.8196,
      "step": 93449
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6451514959335327,
      "learning_rate": 0.00038767715193322095,
      "loss": 2.8057,
      "step": 93450
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.3256622552871704,
      "learning_rate": 0.0003876732399578074,
      "loss": 3.1386,
      "step": 93451
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.487006425857544,
      "learning_rate": 0.00038766932796609384,
      "loss": 3.2213,
      "step": 93452
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8917438983917236,
      "learning_rate": 0.0003876654159580808,
      "loss": 3.0105,
      "step": 93453
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6724672317504883,
      "learning_rate": 0.0003876615039337691,
      "loss": 3.1445,
      "step": 93454
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6978129148483276,
      "learning_rate": 0.00038765759189315956,
      "loss": 2.9369,
      "step": 93455
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4920859336853027,
      "learning_rate": 0.0003876536798362527,
      "loss": 3.2145,
      "step": 93456
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0675153732299805,
      "learning_rate": 0.00038764976776304937,
      "loss": 3.0096,
      "step": 93457
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.935185194015503,
      "learning_rate": 0.0003876458556735504,
      "loss": 2.854,
      "step": 93458
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7425448894500732,
      "learning_rate": 0.00038764194356775636,
      "loss": 2.8897,
      "step": 93459
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9954806566238403,
      "learning_rate": 0.00038763803144566795,
      "loss": 3.0297,
      "step": 93460
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.9057207107543945,
      "learning_rate": 0.0003876341193072861,
      "loss": 2.9394,
      "step": 93461
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5693058967590332,
      "learning_rate": 0.0003876302071526114,
      "loss": 2.9443,
      "step": 93462
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.832277774810791,
      "learning_rate": 0.00038762629498164454,
      "loss": 3.1663,
      "step": 93463
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4378018379211426,
      "learning_rate": 0.0003876223827943863,
      "loss": 2.9016,
      "step": 93464
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7079288959503174,
      "learning_rate": 0.0003876184705908374,
      "loss": 3.1115,
      "step": 93465
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6218804121017456,
      "learning_rate": 0.0003876145583709986,
      "loss": 2.9354,
      "step": 93466
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.136435031890869,
      "learning_rate": 0.00038761064613487063,
      "loss": 2.8922,
      "step": 93467
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8848934173583984,
      "learning_rate": 0.0003876067338824541,
      "loss": 2.9954,
      "step": 93468
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.064584493637085,
      "learning_rate": 0.0003876028216137499,
      "loss": 2.9853,
      "step": 93469
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.589475154876709,
      "learning_rate": 0.0003875989093287587,
      "loss": 2.8503,
      "step": 93470
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5902581214904785,
      "learning_rate": 0.00038759499702748114,
      "loss": 3.189,
      "step": 93471
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.802536964416504,
      "learning_rate": 0.000387591084709918,
      "loss": 2.8849,
      "step": 93472
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5456713438034058,
      "learning_rate": 0.00038758717237607007,
      "loss": 2.8782,
      "step": 93473
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6054155826568604,
      "learning_rate": 0.000387583260025938,
      "loss": 3.0196,
      "step": 93474
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6310193538665771,
      "learning_rate": 0.0003875793476595225,
      "loss": 2.7943,
      "step": 93475
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7931493520736694,
      "learning_rate": 0.0003875754352768245,
      "loss": 2.8098,
      "step": 93476
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7919381856918335,
      "learning_rate": 0.00038757152287784445,
      "loss": 3.0383,
      "step": 93477
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7570096254348755,
      "learning_rate": 0.0003875676104625832,
      "loss": 2.9347,
      "step": 93478
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7361204624176025,
      "learning_rate": 0.00038756369803104155,
      "loss": 2.9841,
      "step": 93479
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.701218843460083,
      "learning_rate": 0.0003875597855832201,
      "loss": 2.9281,
      "step": 93480
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6439129114151,
      "learning_rate": 0.0003875558731191197,
      "loss": 3.0376,
      "step": 93481
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.067913770675659,
      "learning_rate": 0.000387551960638741,
      "loss": 2.9943,
      "step": 93482
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7990273237228394,
      "learning_rate": 0.0003875480481420847,
      "loss": 3.0244,
      "step": 93483
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9314261674880981,
      "learning_rate": 0.0003875441356291515,
      "loss": 2.8954,
      "step": 93484
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7689250707626343,
      "learning_rate": 0.0003875402230999423,
      "loss": 3.1637,
      "step": 93485
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.853674054145813,
      "learning_rate": 0.0003875363105544577,
      "loss": 2.955,
      "step": 93486
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.247358560562134,
      "learning_rate": 0.00038753239799269837,
      "loss": 2.7773,
      "step": 93487
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.643991708755493,
      "learning_rate": 0.0003875284854146652,
      "loss": 2.7843,
      "step": 93488
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6876959800720215,
      "learning_rate": 0.00038752457282035884,
      "loss": 3.0556,
      "step": 93489
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6470897197723389,
      "learning_rate": 0.00038752066020977994,
      "loss": 2.9473,
      "step": 93490
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7038793563842773,
      "learning_rate": 0.0003875167475829294,
      "loss": 3.1861,
      "step": 93491
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5465461015701294,
      "learning_rate": 0.00038751283493980773,
      "loss": 2.8557,
      "step": 93492
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7700403928756714,
      "learning_rate": 0.00038750892228041583,
      "loss": 2.996,
      "step": 93493
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8124488592147827,
      "learning_rate": 0.00038750500960475434,
      "loss": 3.2826,
      "step": 93494
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9717367887496948,
      "learning_rate": 0.00038750109691282407,
      "loss": 2.9051,
      "step": 93495
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1708059310913086,
      "learning_rate": 0.00038749718420462567,
      "loss": 2.8213,
      "step": 93496
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0317161083221436,
      "learning_rate": 0.00038749327148015984,
      "loss": 3.0943,
      "step": 93497
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7632527351379395,
      "learning_rate": 0.0003874893587394274,
      "loss": 2.8395,
      "step": 93498
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.942240834236145,
      "learning_rate": 0.00038748544598242907,
      "loss": 3.1435,
      "step": 93499
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4536051750183105,
      "learning_rate": 0.00038748153320916547,
      "loss": 3.062,
      "step": 93500
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.474498748779297,
      "learning_rate": 0.00038747762041963744,
      "loss": 3.047,
      "step": 93501
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7079482078552246,
      "learning_rate": 0.0003874737076138457,
      "loss": 2.9415,
      "step": 93502
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.7805566787719727,
      "learning_rate": 0.0003874697947917909,
      "loss": 3.0677,
      "step": 93503
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.079754590988159,
      "learning_rate": 0.0003874658819534738,
      "loss": 3.104,
      "step": 93504
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.906278610229492,
      "learning_rate": 0.0003874619690988952,
      "loss": 3.0281,
      "step": 93505
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7663577795028687,
      "learning_rate": 0.00038745805622805577,
      "loss": 3.0916,
      "step": 93506
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.6505768299102783,
      "learning_rate": 0.0003874541433409562,
      "loss": 2.9421,
      "step": 93507
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.7669291496276855,
      "learning_rate": 0.00038745023043759724,
      "loss": 2.9393,
      "step": 93508
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9024147987365723,
      "learning_rate": 0.00038744631751797974,
      "loss": 2.9912,
      "step": 93509
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6009739637374878,
      "learning_rate": 0.00038744240458210414,
      "loss": 2.9957,
      "step": 93510
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.3412697315216064,
      "learning_rate": 0.0003874384916299715,
      "loss": 3.0249,
      "step": 93511
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6427010297775269,
      "learning_rate": 0.00038743457866158233,
      "loss": 3.0997,
      "step": 93512
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5413665771484375,
      "learning_rate": 0.0003874306656769374,
      "loss": 2.9747,
      "step": 93513
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8108795881271362,
      "learning_rate": 0.00038742675267603745,
      "loss": 2.9509,
      "step": 93514
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.3951910734176636,
      "learning_rate": 0.0003874228396588834,
      "loss": 3.2487,
      "step": 93515
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9473515748977661,
      "learning_rate": 0.0003874189266254756,
      "loss": 2.9804,
      "step": 93516
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7749652862548828,
      "learning_rate": 0.00038741501357581495,
      "loss": 3.0404,
      "step": 93517
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.2990219593048096,
      "learning_rate": 0.00038741110050990226,
      "loss": 2.8559,
      "step": 93518
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0957181453704834,
      "learning_rate": 0.00038740718742773825,
      "loss": 3.134,
      "step": 93519
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5633633136749268,
      "learning_rate": 0.00038740327432932356,
      "loss": 2.8893,
      "step": 93520
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5614120960235596,
      "learning_rate": 0.0003873993612146589,
      "loss": 2.9653,
      "step": 93521
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4170852899551392,
      "learning_rate": 0.00038739544808374514,
      "loss": 2.932,
      "step": 93522
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9108695983886719,
      "learning_rate": 0.00038739153493658283,
      "loss": 2.8611,
      "step": 93523
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9035874605178833,
      "learning_rate": 0.0003873876217731728,
      "loss": 2.8441,
      "step": 93524
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7007993459701538,
      "learning_rate": 0.00038738370859351583,
      "loss": 2.9721,
      "step": 93525
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.2514307498931885,
      "learning_rate": 0.0003873797953976125,
      "loss": 3.0269,
      "step": 93526
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1292593479156494,
      "learning_rate": 0.00038737588218546365,
      "loss": 3.0618,
      "step": 93527
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7468756437301636,
      "learning_rate": 0.00038737196895707,
      "loss": 3.131,
      "step": 93528
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.178664207458496,
      "learning_rate": 0.0003873680557124322,
      "loss": 2.8383,
      "step": 93529
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.233203649520874,
      "learning_rate": 0.00038736414245155106,
      "loss": 3.2497,
      "step": 93530
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8214033842086792,
      "learning_rate": 0.0003873602291744273,
      "loss": 3.2713,
      "step": 93531
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7028040885925293,
      "learning_rate": 0.00038735631588106154,
      "loss": 3.151,
      "step": 93532
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.8369317054748535,
      "learning_rate": 0.0003873524025714547,
      "loss": 2.8047,
      "step": 93533
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.043431043624878,
      "learning_rate": 0.00038734848924560733,
      "loss": 3.125,
      "step": 93534
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0949642658233643,
      "learning_rate": 0.00038734457590352026,
      "loss": 3.2518,
      "step": 93535
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.476616144180298,
      "learning_rate": 0.0003873406625451941,
      "loss": 2.8252,
      "step": 93536
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.449570655822754,
      "learning_rate": 0.0003873367491706298,
      "loss": 3.1418,
      "step": 93537
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.108510732650757,
      "learning_rate": 0.00038733283577982783,
      "loss": 3.0559,
      "step": 93538
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5705230236053467,
      "learning_rate": 0.0003873289223727891,
      "loss": 2.8706,
      "step": 93539
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4805915355682373,
      "learning_rate": 0.0003873250089495143,
      "loss": 2.9915,
      "step": 93540
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.008763074874878,
      "learning_rate": 0.000387321095510004,
      "loss": 2.8339,
      "step": 93541
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8181040287017822,
      "learning_rate": 0.0003873171820542592,
      "loss": 2.9129,
      "step": 93542
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.777445673942566,
      "learning_rate": 0.00038731326858228046,
      "loss": 2.9911,
      "step": 93543
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5576149225234985,
      "learning_rate": 0.0003873093550940685,
      "loss": 3.0675,
      "step": 93544
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.013882637023926,
      "learning_rate": 0.00038730544158962403,
      "loss": 3.0999,
      "step": 93545
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4413485527038574,
      "learning_rate": 0.000387301528068948,
      "loss": 3.0274,
      "step": 93546
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5746413469314575,
      "learning_rate": 0.0003872976145320408,
      "loss": 2.7101,
      "step": 93547
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6409308910369873,
      "learning_rate": 0.0003872937009789034,
      "loss": 3.1415,
      "step": 93548
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.649742364883423,
      "learning_rate": 0.0003872897874095365,
      "loss": 3.0606,
      "step": 93549
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9988168478012085,
      "learning_rate": 0.00038728587382394063,
      "loss": 2.8923,
      "step": 93550
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1152737140655518,
      "learning_rate": 0.0003872819602221168,
      "loss": 3.1897,
      "step": 93551
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4434032440185547,
      "learning_rate": 0.0003872780466040656,
      "loss": 3.0842,
      "step": 93552
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.145688772201538,
      "learning_rate": 0.00038727413296978775,
      "loss": 2.9378,
      "step": 93553
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.339674234390259,
      "learning_rate": 0.0003872702193192839,
      "loss": 3.0349,
      "step": 93554
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9297566413879395,
      "learning_rate": 0.000387266305652555,
      "loss": 3.0513,
      "step": 93555
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6873193979263306,
      "learning_rate": 0.0003872623919696016,
      "loss": 3.0871,
      "step": 93556
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.891748309135437,
      "learning_rate": 0.0003872584782704244,
      "loss": 2.9448,
      "step": 93557
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8406473398208618,
      "learning_rate": 0.0003872545645550244,
      "loss": 2.9985,
      "step": 93558
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.3910157680511475,
      "learning_rate": 0.0003872506508234019,
      "loss": 2.9604,
      "step": 93559
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7409584522247314,
      "learning_rate": 0.0003872467370755579,
      "loss": 3.2043,
      "step": 93560
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4450364112854004,
      "learning_rate": 0.0003872428233114932,
      "loss": 2.7865,
      "step": 93561
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5997321605682373,
      "learning_rate": 0.00038723890953120836,
      "loss": 3.0807,
      "step": 93562
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4872617721557617,
      "learning_rate": 0.00038723499573470414,
      "loss": 3.1324,
      "step": 93563
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1219680309295654,
      "learning_rate": 0.00038723108192198133,
      "loss": 2.9306,
      "step": 93564
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7622076272964478,
      "learning_rate": 0.00038722716809304054,
      "loss": 2.9779,
      "step": 93565
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7807241678237915,
      "learning_rate": 0.0003872232542478826,
      "loss": 2.8292,
      "step": 93566
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.409558653831482,
      "learning_rate": 0.00038721934038650827,
      "loss": 3.1255,
      "step": 93567
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.632690668106079,
      "learning_rate": 0.0003872154265089182,
      "loss": 3.1021,
      "step": 93568
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8806970119476318,
      "learning_rate": 0.00038721151261511305,
      "loss": 3.1061,
      "step": 93569
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8737393617630005,
      "learning_rate": 0.0003872075987050938,
      "loss": 3.0624,
      "step": 93570
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9047638177871704,
      "learning_rate": 0.0003872036847788608,
      "loss": 3.0744,
      "step": 93571
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.538697600364685,
      "learning_rate": 0.0003871997708364151,
      "loss": 3.1481,
      "step": 93572
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6620172262191772,
      "learning_rate": 0.00038719585687775734,
      "loss": 2.8855,
      "step": 93573
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7471400499343872,
      "learning_rate": 0.0003871919429028882,
      "loss": 2.9785,
      "step": 93574
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4669550657272339,
      "learning_rate": 0.00038718802891180845,
      "loss": 2.9698,
      "step": 93575
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6928977966308594,
      "learning_rate": 0.00038718411490451873,
      "loss": 3.0048,
      "step": 93576
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.2886650562286377,
      "learning_rate": 0.0003871802008810199,
      "loss": 2.9442,
      "step": 93577
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.283379077911377,
      "learning_rate": 0.00038717628684131256,
      "loss": 2.9393,
      "step": 93578
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.337498188018799,
      "learning_rate": 0.0003871723727853975,
      "loss": 3.0961,
      "step": 93579
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.6638684272766113,
      "learning_rate": 0.00038716845871327555,
      "loss": 2.8971,
      "step": 93580
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.628910779953003,
      "learning_rate": 0.0003871645446249473,
      "loss": 2.9985,
      "step": 93581
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0678277015686035,
      "learning_rate": 0.0003871606305204135,
      "loss": 2.9947,
      "step": 93582
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.043602228164673,
      "learning_rate": 0.0003871567163996749,
      "loss": 3.1809,
      "step": 93583
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9152640104293823,
      "learning_rate": 0.0003871528022627322,
      "loss": 2.688,
      "step": 93584
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.782818078994751,
      "learning_rate": 0.00038714888810958617,
      "loss": 3.0302,
      "step": 93585
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.710120439529419,
      "learning_rate": 0.0003871449739402375,
      "loss": 2.9695,
      "step": 93586
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6455398797988892,
      "learning_rate": 0.000387141059754687,
      "loss": 3.0744,
      "step": 93587
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6214721202850342,
      "learning_rate": 0.00038713714555293523,
      "loss": 3.1841,
      "step": 93588
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7248802185058594,
      "learning_rate": 0.0003871332313349831,
      "loss": 3.1897,
      "step": 93589
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.575365424156189,
      "learning_rate": 0.0003871293171008312,
      "loss": 3.0863,
      "step": 93590
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.691829800605774,
      "learning_rate": 0.0003871254028504804,
      "loss": 2.917,
      "step": 93591
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.00469970703125,
      "learning_rate": 0.00038712148858393127,
      "loss": 3.1413,
      "step": 93592
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9430122375488281,
      "learning_rate": 0.00038711757430118454,
      "loss": 2.8862,
      "step": 93593
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9313374757766724,
      "learning_rate": 0.0003871136600022412,
      "loss": 2.8461,
      "step": 93594
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.859299898147583,
      "learning_rate": 0.00038710974568710165,
      "loss": 3.1521,
      "step": 93595
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.8814499378204346,
      "learning_rate": 0.0003871058313557668,
      "loss": 2.9746,
      "step": 93596
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.673716425895691,
      "learning_rate": 0.00038710191700823724,
      "loss": 3.2346,
      "step": 93597
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.103027105331421,
      "learning_rate": 0.0003870980026445139,
      "loss": 2.9421,
      "step": 93598
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7986570596694946,
      "learning_rate": 0.00038709408826459734,
      "loss": 2.9607,
      "step": 93599
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.112112045288086,
      "learning_rate": 0.0003870901738684884,
      "loss": 2.961,
      "step": 93600
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9094157218933105,
      "learning_rate": 0.00038708625945618774,
      "loss": 3.2125,
      "step": 93601
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1820926666259766,
      "learning_rate": 0.00038708234502769605,
      "loss": 3.1803,
      "step": 93602
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.993980884552002,
      "learning_rate": 0.00038707843058301413,
      "loss": 2.9443,
      "step": 93603
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6579307317733765,
      "learning_rate": 0.00038707451612214275,
      "loss": 3.1043,
      "step": 93604
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4948316812515259,
      "learning_rate": 0.00038707060164508254,
      "loss": 3.071,
      "step": 93605
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1326563358306885,
      "learning_rate": 0.00038706668715183417,
      "loss": 3.1641,
      "step": 93606
    },
    {
      "epoch": 1.22,
      "grad_norm": 4.161520004272461,
      "learning_rate": 0.0003870627726423986,
      "loss": 2.9864,
      "step": 93607
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.903735399246216,
      "learning_rate": 0.00038705885811677633,
      "loss": 3.1447,
      "step": 93608
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8314955234527588,
      "learning_rate": 0.00038705494357496817,
      "loss": 3.1802,
      "step": 93609
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.6452600955963135,
      "learning_rate": 0.00038705102901697493,
      "loss": 2.9759,
      "step": 93610
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.608095169067383,
      "learning_rate": 0.0003870471144427972,
      "loss": 2.8306,
      "step": 93611
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.09725022315979,
      "learning_rate": 0.0003870431998524357,
      "loss": 2.8405,
      "step": 93612
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.12896466255188,
      "learning_rate": 0.0003870392852458914,
      "loss": 3.0244,
      "step": 93613
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9699349403381348,
      "learning_rate": 0.0003870353706231647,
      "loss": 3.0548,
      "step": 93614
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.5763814449310303,
      "learning_rate": 0.0003870314559842565,
      "loss": 2.8928,
      "step": 93615
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.019876718521118,
      "learning_rate": 0.00038702754132916766,
      "loss": 2.9544,
      "step": 93616
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9162458181381226,
      "learning_rate": 0.00038702362665789854,
      "loss": 3.1509,
      "step": 93617
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.225623846054077,
      "learning_rate": 0.0003870197119704502,
      "loss": 2.9135,
      "step": 93618
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.8382408618927,
      "learning_rate": 0.0003870157972668233,
      "loss": 3.1246,
      "step": 93619
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0973260402679443,
      "learning_rate": 0.00038701188254701847,
      "loss": 2.8428,
      "step": 93620
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.186206340789795,
      "learning_rate": 0.0003870079678110364,
      "loss": 3.0297,
      "step": 93621
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.467240333557129,
      "learning_rate": 0.0003870040530588781,
      "loss": 3.0539,
      "step": 93622
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4021365642547607,
      "learning_rate": 0.0003870001382905439,
      "loss": 2.9561,
      "step": 93623
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7042946815490723,
      "learning_rate": 0.0003869962235060348,
      "loss": 3.1241,
      "step": 93624
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9655718803405762,
      "learning_rate": 0.0003869923087053516,
      "loss": 3.058,
      "step": 93625
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.276655435562134,
      "learning_rate": 0.00038698839388849474,
      "loss": 2.9452,
      "step": 93626
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.1461105346679688,
      "learning_rate": 0.0003869844790554651,
      "loss": 2.9105,
      "step": 93627
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8729251623153687,
      "learning_rate": 0.0003869805642062634,
      "loss": 2.9884,
      "step": 93628
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7547425031661987,
      "learning_rate": 0.0003869766493408905,
      "loss": 2.9901,
      "step": 93629
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.041118860244751,
      "learning_rate": 0.0003869727344593468,
      "loss": 3.197,
      "step": 93630
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.051710844039917,
      "learning_rate": 0.0003869688195616334,
      "loss": 2.9455,
      "step": 93631
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.909117341041565,
      "learning_rate": 0.0003869649046477507,
      "loss": 2.9938,
      "step": 93632
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.621917486190796,
      "learning_rate": 0.0003869609897176997,
      "loss": 2.8267,
      "step": 93633
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0457170009613037,
      "learning_rate": 0.00038695707477148106,
      "loss": 2.9235,
      "step": 93634
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5721642971038818,
      "learning_rate": 0.0003869531598090953,
      "loss": 3.0179,
      "step": 93635
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1065666675567627,
      "learning_rate": 0.00038694924483054336,
      "loss": 3.1978,
      "step": 93636
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9738233089447021,
      "learning_rate": 0.000386945329835826,
      "loss": 2.9911,
      "step": 93637
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4568228721618652,
      "learning_rate": 0.0003869414148249438,
      "loss": 2.8569,
      "step": 93638
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7454806566238403,
      "learning_rate": 0.0003869374997978975,
      "loss": 3.1013,
      "step": 93639
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8739492893218994,
      "learning_rate": 0.0003869335847546879,
      "loss": 3.0961,
      "step": 93640
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4574190378189087,
      "learning_rate": 0.0003869296696953158,
      "loss": 2.9385,
      "step": 93641
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4640460014343262,
      "learning_rate": 0.00038692575461978174,
      "loss": 2.7416,
      "step": 93642
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7405519485473633,
      "learning_rate": 0.00038692183952808657,
      "loss": 3.0375,
      "step": 93643
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8834550380706787,
      "learning_rate": 0.000386917924420231,
      "loss": 3.1302,
      "step": 93644
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8393558263778687,
      "learning_rate": 0.0003869140092962157,
      "loss": 3.1762,
      "step": 93645
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.134836196899414,
      "learning_rate": 0.0003869100941560415,
      "loss": 3.0318,
      "step": 93646
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.83077871799469,
      "learning_rate": 0.000386906178999709,
      "loss": 3.3003,
      "step": 93647
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7826627492904663,
      "learning_rate": 0.0003869022638272191,
      "loss": 2.9853,
      "step": 93648
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6244337558746338,
      "learning_rate": 0.0003868983486385724,
      "loss": 2.8924,
      "step": 93649
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6811912059783936,
      "learning_rate": 0.0003868944334337696,
      "loss": 3.2699,
      "step": 93650
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.834897518157959,
      "learning_rate": 0.0003868905182128115,
      "loss": 2.9018,
      "step": 93651
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.90714693069458,
      "learning_rate": 0.0003868866029756988,
      "loss": 3.0701,
      "step": 93652
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6724865436553955,
      "learning_rate": 0.00038688268772243235,
      "loss": 2.9253,
      "step": 93653
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7381377220153809,
      "learning_rate": 0.00038687877245301264,
      "loss": 3.098,
      "step": 93654
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.706288456916809,
      "learning_rate": 0.0003868748571674406,
      "loss": 2.9667,
      "step": 93655
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7344017028808594,
      "learning_rate": 0.00038687094186571686,
      "loss": 3.0969,
      "step": 93656
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.744254231452942,
      "learning_rate": 0.00038686702654784215,
      "loss": 3.1161,
      "step": 93657
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5568766593933105,
      "learning_rate": 0.0003868631112138172,
      "loss": 3.1856,
      "step": 93658
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0151305198669434,
      "learning_rate": 0.0003868591958636429,
      "loss": 3.0159,
      "step": 93659
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.13411808013916,
      "learning_rate": 0.00038685528049731973,
      "loss": 2.9255,
      "step": 93660
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7619763612747192,
      "learning_rate": 0.00038685136511484847,
      "loss": 3.0596,
      "step": 93661
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1086580753326416,
      "learning_rate": 0.00038684744971623,
      "loss": 3.1666,
      "step": 93662
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6509703397750854,
      "learning_rate": 0.0003868435343014649,
      "loss": 2.8598,
      "step": 93663
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.849111557006836,
      "learning_rate": 0.0003868396188705539,
      "loss": 2.702,
      "step": 93664
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7343213558197021,
      "learning_rate": 0.0003868357034234979,
      "loss": 2.875,
      "step": 93665
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.711204171180725,
      "learning_rate": 0.00038683178796029746,
      "loss": 3.15,
      "step": 93666
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8143181800842285,
      "learning_rate": 0.0003868278724809533,
      "loss": 3.1263,
      "step": 93667
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.026646852493286,
      "learning_rate": 0.0003868239569854663,
      "loss": 2.9691,
      "step": 93668
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7919145822525024,
      "learning_rate": 0.00038682004147383696,
      "loss": 2.9292,
      "step": 93669
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8442717790603638,
      "learning_rate": 0.0003868161259460662,
      "loss": 3.223,
      "step": 93670
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.120166540145874,
      "learning_rate": 0.0003868122104021546,
      "loss": 3.1812,
      "step": 93671
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8222455978393555,
      "learning_rate": 0.00038680829484210314,
      "loss": 3.0998,
      "step": 93672
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.9725594520568848,
      "learning_rate": 0.0003868043792659122,
      "loss": 2.9424,
      "step": 93673
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.821298360824585,
      "learning_rate": 0.0003868004636735828,
      "loss": 2.9512,
      "step": 93674
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.629268169403076,
      "learning_rate": 0.0003867965480651156,
      "loss": 3.1572,
      "step": 93675
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9102298021316528,
      "learning_rate": 0.00038679263244051115,
      "loss": 3.1553,
      "step": 93676
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.750710368156433,
      "learning_rate": 0.0003867887167997704,
      "loss": 3.2806,
      "step": 93677
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4410440921783447,
      "learning_rate": 0.00038678480114289393,
      "loss": 3.0429,
      "step": 93678
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.21390700340271,
      "learning_rate": 0.0003867808854698826,
      "loss": 2.8959,
      "step": 93679
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.792627215385437,
      "learning_rate": 0.000386776969780737,
      "loss": 2.9497,
      "step": 93680
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.971994400024414,
      "learning_rate": 0.00038677305407545796,
      "loss": 2.8284,
      "step": 93681
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.465057134628296,
      "learning_rate": 0.0003867691383540461,
      "loss": 3.0315,
      "step": 93682
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.253674268722534,
      "learning_rate": 0.00038676522261650234,
      "loss": 3.2884,
      "step": 93683
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1298553943634033,
      "learning_rate": 0.0003867613068628272,
      "loss": 2.8498,
      "step": 93684
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4879074096679688,
      "learning_rate": 0.00038675739109302146,
      "loss": 2.9996,
      "step": 93685
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.9260082244873047,
      "learning_rate": 0.00038675347530708605,
      "loss": 3.103,
      "step": 93686
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9441239833831787,
      "learning_rate": 0.0003867495595050214,
      "loss": 2.8159,
      "step": 93687
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1713736057281494,
      "learning_rate": 0.0003867456436868284,
      "loss": 3.0406,
      "step": 93688
    },
    {
      "epoch": 1.22,
      "grad_norm": 4.427420139312744,
      "learning_rate": 0.0003867417278525077,
      "loss": 3.0838,
      "step": 93689
    },
    {
      "epoch": 1.22,
      "grad_norm": 4.1492743492126465,
      "learning_rate": 0.00038673781200206016,
      "loss": 2.9996,
      "step": 93690
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.951582431793213,
      "learning_rate": 0.0003867338961354864,
      "loss": 2.8147,
      "step": 93691
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5737310647964478,
      "learning_rate": 0.00038672998025278713,
      "loss": 3.097,
      "step": 93692
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.585476875305176,
      "learning_rate": 0.00038672606435396313,
      "loss": 3.1968,
      "step": 93693
    },
    {
      "epoch": 1.22,
      "grad_norm": 4.646387100219727,
      "learning_rate": 0.0003867221484390151,
      "loss": 2.9145,
      "step": 93694
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.7965829372406006,
      "learning_rate": 0.00038671823250794387,
      "loss": 3.0118,
      "step": 93695
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.739924192428589,
      "learning_rate": 0.00038671431656075,
      "loss": 3.0309,
      "step": 93696
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7693984508514404,
      "learning_rate": 0.00038671040059743436,
      "loss": 2.9421,
      "step": 93697
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.234930992126465,
      "learning_rate": 0.00038670648461799765,
      "loss": 3.1583,
      "step": 93698
    },
    {
      "epoch": 1.22,
      "grad_norm": 4.181296348571777,
      "learning_rate": 0.00038670256862244047,
      "loss": 3.0121,
      "step": 93699
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.69490647315979,
      "learning_rate": 0.0003866986526107637,
      "loss": 2.9317,
      "step": 93700
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.524657964706421,
      "learning_rate": 0.000386694736582968,
      "loss": 2.9533,
      "step": 93701
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4061763286590576,
      "learning_rate": 0.0003866908205390541,
      "loss": 2.6881,
      "step": 93702
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.719910144805908,
      "learning_rate": 0.00038668690447902273,
      "loss": 2.8525,
      "step": 93703
    },
    {
      "epoch": 1.22,
      "grad_norm": 4.046041488647461,
      "learning_rate": 0.0003866829884028747,
      "loss": 3.0999,
      "step": 93704
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.5736522674560547,
      "learning_rate": 0.0003866790723106106,
      "loss": 3.2879,
      "step": 93705
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.842190146446228,
      "learning_rate": 0.00038667515620223117,
      "loss": 3.0716,
      "step": 93706
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.021533727645874,
      "learning_rate": 0.00038667124007773736,
      "loss": 2.9541,
      "step": 93707
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.3187177181243896,
      "learning_rate": 0.0003866673239371296,
      "loss": 3.0834,
      "step": 93708
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.5321600437164307,
      "learning_rate": 0.00038666340778040876,
      "loss": 3.0044,
      "step": 93709
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.908370018005371,
      "learning_rate": 0.0003866594916075756,
      "loss": 3.1376,
      "step": 93710
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6955732107162476,
      "learning_rate": 0.0003866555754186308,
      "loss": 3.1156,
      "step": 93711
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.2504448890686035,
      "learning_rate": 0.000386651659213575,
      "loss": 3.1304,
      "step": 93712
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.901768684387207,
      "learning_rate": 0.0003866477429924092,
      "loss": 3.1553,
      "step": 93713
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7850666046142578,
      "learning_rate": 0.0003866438267551338,
      "loss": 2.8631,
      "step": 93714
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9099878072738647,
      "learning_rate": 0.0003866399105017497,
      "loss": 2.872,
      "step": 93715
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.8170621395111084,
      "learning_rate": 0.00038663599423225764,
      "loss": 2.9147,
      "step": 93716
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.8741326332092285,
      "learning_rate": 0.0003866320779466583,
      "loss": 3.1129,
      "step": 93717
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0652687549591064,
      "learning_rate": 0.00038662816164495244,
      "loss": 2.9446,
      "step": 93718
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.2716381549835205,
      "learning_rate": 0.0003866242453271407,
      "loss": 3.0245,
      "step": 93719
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.1407971382141113,
      "learning_rate": 0.000386620328993224,
      "loss": 2.905,
      "step": 93720
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.581458568572998,
      "learning_rate": 0.00038661641264320286,
      "loss": 2.87,
      "step": 93721
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5972707271575928,
      "learning_rate": 0.00038661249627707807,
      "loss": 3.3836,
      "step": 93722
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8921127319335938,
      "learning_rate": 0.0003866085798948505,
      "loss": 3.0767,
      "step": 93723
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.665996551513672,
      "learning_rate": 0.00038660466349652067,
      "loss": 3.2756,
      "step": 93724
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.186645746231079,
      "learning_rate": 0.00038660074708208935,
      "loss": 3.1848,
      "step": 93725
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6056801080703735,
      "learning_rate": 0.0003865968306515574,
      "loss": 3.2886,
      "step": 93726
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.590601682662964,
      "learning_rate": 0.00038659291420492547,
      "loss": 2.8655,
      "step": 93727
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.818629741668701,
      "learning_rate": 0.0003865889977421942,
      "loss": 2.8762,
      "step": 93728
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.042954206466675,
      "learning_rate": 0.00038658508126336447,
      "loss": 3.1382,
      "step": 93729
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5708028078079224,
      "learning_rate": 0.00038658116476843695,
      "loss": 2.8598,
      "step": 93730
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9490629434585571,
      "learning_rate": 0.0003865772482574123,
      "loss": 3.0535,
      "step": 93731
    },
    {
      "epoch": 1.22,
      "grad_norm": 4.182098865509033,
      "learning_rate": 0.0003865733317302913,
      "loss": 3.0677,
      "step": 93732
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.585308790206909,
      "learning_rate": 0.0003865694151870748,
      "loss": 2.7725,
      "step": 93733
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7841602563858032,
      "learning_rate": 0.0003865654986277633,
      "loss": 2.9925,
      "step": 93734
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.5091238021850586,
      "learning_rate": 0.0003865615820523577,
      "loss": 3.1028,
      "step": 93735
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.997692108154297,
      "learning_rate": 0.0003865576654608586,
      "loss": 3.1445,
      "step": 93736
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.747415542602539,
      "learning_rate": 0.00038655374885326687,
      "loss": 2.9033,
      "step": 93737
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.820724368095398,
      "learning_rate": 0.00038654983222958313,
      "loss": 3.0099,
      "step": 93738
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.105830669403076,
      "learning_rate": 0.0003865459155898081,
      "loss": 3.1008,
      "step": 93739
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4474916458129883,
      "learning_rate": 0.0003865419989339426,
      "loss": 2.8599,
      "step": 93740
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9492770433425903,
      "learning_rate": 0.0003865380822619874,
      "loss": 2.9317,
      "step": 93741
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.009385585784912,
      "learning_rate": 0.00038653416557394297,
      "loss": 3.0177,
      "step": 93742
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.704879641532898,
      "learning_rate": 0.00038653024886981023,
      "loss": 3.026,
      "step": 93743
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8338090181350708,
      "learning_rate": 0.00038652633214958994,
      "loss": 3.2108,
      "step": 93744
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.822378158569336,
      "learning_rate": 0.00038652241541328275,
      "loss": 2.9635,
      "step": 93745
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.646894931793213,
      "learning_rate": 0.0003865184986608894,
      "loss": 3.1465,
      "step": 93746
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.746166706085205,
      "learning_rate": 0.0003865145818924107,
      "loss": 2.8212,
      "step": 93747
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7151786088943481,
      "learning_rate": 0.0003865106651078472,
      "loss": 2.9414,
      "step": 93748
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5442296266555786,
      "learning_rate": 0.0003865067483071998,
      "loss": 3.0151,
      "step": 93749
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.3069303035736084,
      "learning_rate": 0.0003865028314904692,
      "loss": 3.0131,
      "step": 93750
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.102587938308716,
      "learning_rate": 0.000386498914657656,
      "loss": 2.8319,
      "step": 93751
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.309159517288208,
      "learning_rate": 0.0003864949978087611,
      "loss": 2.8666,
      "step": 93752
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1960337162017822,
      "learning_rate": 0.00038649108094378513,
      "loss": 3.0679,
      "step": 93753
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5274213552474976,
      "learning_rate": 0.00038648716406272876,
      "loss": 2.8636,
      "step": 93754
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7171285152435303,
      "learning_rate": 0.00038648324716559286,
      "loss": 2.7636,
      "step": 93755
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.433157205581665,
      "learning_rate": 0.0003864793302523781,
      "loss": 3.0541,
      "step": 93756
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.745640516281128,
      "learning_rate": 0.0003864754133230852,
      "loss": 3.1543,
      "step": 93757
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0151047706604004,
      "learning_rate": 0.00038647149637771475,
      "loss": 2.9259,
      "step": 93758
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.674159288406372,
      "learning_rate": 0.0003864675794162677,
      "loss": 2.9837,
      "step": 93759
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.589792013168335,
      "learning_rate": 0.00038646366243874484,
      "loss": 2.8366,
      "step": 93760
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7580453157424927,
      "learning_rate": 0.00038645974544514655,
      "loss": 3.028,
      "step": 93761
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5186127424240112,
      "learning_rate": 0.0003864558284354739,
      "loss": 2.8157,
      "step": 93762
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8641563653945923,
      "learning_rate": 0.0003864519114097275,
      "loss": 2.8539,
      "step": 93763
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8210047483444214,
      "learning_rate": 0.0003864479943679079,
      "loss": 3.1141,
      "step": 93764
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4345858097076416,
      "learning_rate": 0.00038644407731001603,
      "loss": 3.2902,
      "step": 93765
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.933645248413086,
      "learning_rate": 0.0003864401602360526,
      "loss": 2.8632,
      "step": 93766
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8501064777374268,
      "learning_rate": 0.0003864362431460184,
      "loss": 3.1436,
      "step": 93767
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1415560245513916,
      "learning_rate": 0.000386432326039914,
      "loss": 2.7986,
      "step": 93768
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7838481664657593,
      "learning_rate": 0.0003864284089177402,
      "loss": 3.1929,
      "step": 93769
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8865047693252563,
      "learning_rate": 0.00038642449177949767,
      "loss": 2.7515,
      "step": 93770
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.033367156982422,
      "learning_rate": 0.00038642057462518733,
      "loss": 2.8887,
      "step": 93771
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5025931596755981,
      "learning_rate": 0.00038641665745480966,
      "loss": 3.0502,
      "step": 93772
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.7462987899780273,
      "learning_rate": 0.0003864127402683655,
      "loss": 2.9644,
      "step": 93773
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.898543119430542,
      "learning_rate": 0.0003864088230658557,
      "loss": 3.2143,
      "step": 93774
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.873970866203308,
      "learning_rate": 0.0003864049058472807,
      "loss": 3.1127,
      "step": 93775
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8966619968414307,
      "learning_rate": 0.0003864009886126415,
      "loss": 3.1272,
      "step": 93776
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9897902011871338,
      "learning_rate": 0.0003863970713619388,
      "loss": 2.6929,
      "step": 93777
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.015650987625122,
      "learning_rate": 0.0003863931540951731,
      "loss": 2.8926,
      "step": 93778
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7132952213287354,
      "learning_rate": 0.0003863892368123453,
      "loss": 3.0347,
      "step": 93779
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5760974884033203,
      "learning_rate": 0.0003863853195134562,
      "loss": 2.7996,
      "step": 93780
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8835489749908447,
      "learning_rate": 0.0003863814021985064,
      "loss": 2.8853,
      "step": 93781
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.921958088874817,
      "learning_rate": 0.00038637748486749666,
      "loss": 2.9661,
      "step": 93782
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8381779193878174,
      "learning_rate": 0.0003863735675204277,
      "loss": 2.9194,
      "step": 93783
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5171293020248413,
      "learning_rate": 0.0003863696501573003,
      "loss": 2.9639,
      "step": 93784
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8286303281784058,
      "learning_rate": 0.00038636573277811516,
      "loss": 2.8767,
      "step": 93785
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8091281652450562,
      "learning_rate": 0.00038636181538287287,
      "loss": 2.8139,
      "step": 93786
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5133519172668457,
      "learning_rate": 0.00038635789797157447,
      "loss": 2.8623,
      "step": 93787
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.612346887588501,
      "learning_rate": 0.0003863539805442204,
      "loss": 3.0873,
      "step": 93788
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7189821004867554,
      "learning_rate": 0.0003863500631008115,
      "loss": 2.9434,
      "step": 93789
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.3931281566619873,
      "learning_rate": 0.0003863461456413485,
      "loss": 3.0539,
      "step": 93790
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.2986137866973877,
      "learning_rate": 0.00038634222816583217,
      "loss": 3.4251,
      "step": 93791
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4934006929397583,
      "learning_rate": 0.00038633831067426313,
      "loss": 3.1911,
      "step": 93792
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.782618761062622,
      "learning_rate": 0.00038633439316664226,
      "loss": 2.6895,
      "step": 93793
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8522017002105713,
      "learning_rate": 0.0003863304756429701,
      "loss": 2.9625,
      "step": 93794
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8791204690933228,
      "learning_rate": 0.0003863265581032475,
      "loss": 3.1033,
      "step": 93795
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6550098657608032,
      "learning_rate": 0.0003863226405474752,
      "loss": 3.1122,
      "step": 93796
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9042409658432007,
      "learning_rate": 0.00038631872297565387,
      "loss": 3.0136,
      "step": 93797
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0984175205230713,
      "learning_rate": 0.0003863148053877842,
      "loss": 3.2908,
      "step": 93798
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8934849500656128,
      "learning_rate": 0.0003863108877838671,
      "loss": 2.9135,
      "step": 93799
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7249969244003296,
      "learning_rate": 0.00038630697016390304,
      "loss": 2.9998,
      "step": 93800
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9462355375289917,
      "learning_rate": 0.00038630305252789296,
      "loss": 3.0762,
      "step": 93801
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.721584439277649,
      "learning_rate": 0.00038629913487583755,
      "loss": 2.9632,
      "step": 93802
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.119631052017212,
      "learning_rate": 0.0003862952172077374,
      "loss": 2.9658,
      "step": 93803
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.78415048122406,
      "learning_rate": 0.0003862912995235934,
      "loss": 3.0309,
      "step": 93804
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6524471044540405,
      "learning_rate": 0.00038628738182340625,
      "loss": 2.9594,
      "step": 93805
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7528181076049805,
      "learning_rate": 0.00038628346410717664,
      "loss": 2.8336,
      "step": 93806
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.158508539199829,
      "learning_rate": 0.00038627954637490523,
      "loss": 2.96,
      "step": 93807
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7567309141159058,
      "learning_rate": 0.00038627562862659294,
      "loss": 2.8852,
      "step": 93808
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.702476978302002,
      "learning_rate": 0.00038627171086224026,
      "loss": 3.068,
      "step": 93809
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.3965795040130615,
      "learning_rate": 0.00038626779308184806,
      "loss": 3.018,
      "step": 93810
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7305899858474731,
      "learning_rate": 0.00038626387528541714,
      "loss": 3.3499,
      "step": 93811
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.716729760169983,
      "learning_rate": 0.00038625995747294806,
      "loss": 2.9951,
      "step": 93812
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.856462001800537,
      "learning_rate": 0.0003862560396444416,
      "loss": 3.2977,
      "step": 93813
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8707576990127563,
      "learning_rate": 0.00038625212179989863,
      "loss": 3.0294,
      "step": 93814
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.809495449066162,
      "learning_rate": 0.0003862482039393197,
      "loss": 2.9329,
      "step": 93815
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9959081411361694,
      "learning_rate": 0.00038624428606270553,
      "loss": 3.1185,
      "step": 93816
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0949742794036865,
      "learning_rate": 0.00038624036817005704,
      "loss": 3.0585,
      "step": 93817
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.428524971008301,
      "learning_rate": 0.00038623645026137477,
      "loss": 3.1079,
      "step": 93818
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.328356981277466,
      "learning_rate": 0.0003862325323366595,
      "loss": 2.8272,
      "step": 93819
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7945435047149658,
      "learning_rate": 0.000386228614395912,
      "loss": 3.1203,
      "step": 93820
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.3396201133728027,
      "learning_rate": 0.000386224696439133,
      "loss": 3.0851,
      "step": 93821
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6611301898956299,
      "learning_rate": 0.00038622077846632314,
      "loss": 2.9454,
      "step": 93822
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5121577978134155,
      "learning_rate": 0.0003862168604774833,
      "loss": 3.1364,
      "step": 93823
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.669243574142456,
      "learning_rate": 0.00038621294247261404,
      "loss": 2.8396,
      "step": 93824
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.0988309383392334,
      "learning_rate": 0.0003862090244517162,
      "loss": 2.9021,
      "step": 93825
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8693301677703857,
      "learning_rate": 0.0003862051064147905,
      "loss": 3.34,
      "step": 93826
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.227869749069214,
      "learning_rate": 0.00038620118836183755,
      "loss": 3.0842,
      "step": 93827
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.755063533782959,
      "learning_rate": 0.0003861972702928582,
      "loss": 2.9738,
      "step": 93828
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.421645402908325,
      "learning_rate": 0.0003861933522078532,
      "loss": 3.0653,
      "step": 93829
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.0616579055786133,
      "learning_rate": 0.00038618943410682324,
      "loss": 2.9634,
      "step": 93830
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8135278224945068,
      "learning_rate": 0.00038618551598976897,
      "loss": 2.8707,
      "step": 93831
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6864649057388306,
      "learning_rate": 0.00038618159785669127,
      "loss": 3.1457,
      "step": 93832
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9344751834869385,
      "learning_rate": 0.00038617767970759075,
      "loss": 3.04,
      "step": 93833
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.757935047149658,
      "learning_rate": 0.0003861737615424681,
      "loss": 3.0886,
      "step": 93834
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.2288808822631836,
      "learning_rate": 0.00038616984336132425,
      "loss": 2.7582,
      "step": 93835
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4020311832427979,
      "learning_rate": 0.00038616592516415964,
      "loss": 2.8667,
      "step": 93836
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7424644231796265,
      "learning_rate": 0.0003861620069509753,
      "loss": 2.8216,
      "step": 93837
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.2602052688598633,
      "learning_rate": 0.0003861580887217718,
      "loss": 2.8956,
      "step": 93838
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.2301294803619385,
      "learning_rate": 0.00038615417047654975,
      "loss": 3.1395,
      "step": 93839
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1343495845794678,
      "learning_rate": 0.0003861502522153101,
      "loss": 2.9329,
      "step": 93840
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.9937498569488525,
      "learning_rate": 0.0003861463339380536,
      "loss": 2.9927,
      "step": 93841
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.6608259677886963,
      "learning_rate": 0.00038614241564478076,
      "loss": 3.0217,
      "step": 93842
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0565741062164307,
      "learning_rate": 0.0003861384973354924,
      "loss": 3.159,
      "step": 93843
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8190921545028687,
      "learning_rate": 0.0003861345790101893,
      "loss": 2.9218,
      "step": 93844
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.9344606399536133,
      "learning_rate": 0.00038613066066887214,
      "loss": 2.978,
      "step": 93845
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.6354105472564697,
      "learning_rate": 0.0003861267423115417,
      "loss": 3.0708,
      "step": 93846
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4718308448791504,
      "learning_rate": 0.0003861228239381986,
      "loss": 2.923,
      "step": 93847
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.540065884590149,
      "learning_rate": 0.0003861189055488438,
      "loss": 3.0126,
      "step": 93848
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.7085824012756348,
      "learning_rate": 0.00038611498714347765,
      "loss": 3.0606,
      "step": 93849
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.312490701675415,
      "learning_rate": 0.0003861110687221012,
      "loss": 2.9442,
      "step": 93850
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1925048828125,
      "learning_rate": 0.00038610715028471513,
      "loss": 2.8781,
      "step": 93851
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5303378105163574,
      "learning_rate": 0.0003861032318313201,
      "loss": 2.7095,
      "step": 93852
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9140279293060303,
      "learning_rate": 0.0003860993133619168,
      "loss": 3.126,
      "step": 93853
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6559820175170898,
      "learning_rate": 0.000386095394876506,
      "loss": 3.0983,
      "step": 93854
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7723443508148193,
      "learning_rate": 0.00038609147637508855,
      "loss": 3.0608,
      "step": 93855
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.781916618347168,
      "learning_rate": 0.000386087557857665,
      "loss": 2.8483,
      "step": 93856
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.532776355743408,
      "learning_rate": 0.00038608363932423614,
      "loss": 2.7817,
      "step": 93857
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6232715845108032,
      "learning_rate": 0.00038607972077480264,
      "loss": 3.1821,
      "step": 93858
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8033883571624756,
      "learning_rate": 0.00038607580220936544,
      "loss": 3.1902,
      "step": 93859
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.5829739570617676,
      "learning_rate": 0.00038607188362792505,
      "loss": 2.9936,
      "step": 93860
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.967737078666687,
      "learning_rate": 0.00038606796503048226,
      "loss": 2.9142,
      "step": 93861
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7112152576446533,
      "learning_rate": 0.0003860640464170378,
      "loss": 2.9473,
      "step": 93862
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1104280948638916,
      "learning_rate": 0.00038606012778759244,
      "loss": 2.9084,
      "step": 93863
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0944037437438965,
      "learning_rate": 0.00038605620914214687,
      "loss": 2.9497,
      "step": 93864
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.333634376525879,
      "learning_rate": 0.0003860522904807018,
      "loss": 2.9769,
      "step": 93865
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.10701847076416,
      "learning_rate": 0.00038604837180325805,
      "loss": 2.9275,
      "step": 93866
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.2777106761932373,
      "learning_rate": 0.00038604445310981626,
      "loss": 2.9833,
      "step": 93867
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8129371404647827,
      "learning_rate": 0.0003860405344003772,
      "loss": 2.914,
      "step": 93868
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.285738468170166,
      "learning_rate": 0.00038603661567494156,
      "loss": 2.8694,
      "step": 93869
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6070187091827393,
      "learning_rate": 0.00038603269693351,
      "loss": 3.0436,
      "step": 93870
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.163057804107666,
      "learning_rate": 0.0003860287781760834,
      "loss": 3.1384,
      "step": 93871
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.922652244567871,
      "learning_rate": 0.00038602485940266253,
      "loss": 3.3072,
      "step": 93872
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.253917694091797,
      "learning_rate": 0.00038602094061324793,
      "loss": 3.1837,
      "step": 93873
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8418992757797241,
      "learning_rate": 0.0003860170218078404,
      "loss": 3.0413,
      "step": 93874
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.040151357650757,
      "learning_rate": 0.00038601310298644066,
      "loss": 2.9555,
      "step": 93875
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0304410457611084,
      "learning_rate": 0.0003860091841490495,
      "loss": 3.0615,
      "step": 93876
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.9430060386657715,
      "learning_rate": 0.0003860052652956676,
      "loss": 2.9148,
      "step": 93877
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.540313243865967,
      "learning_rate": 0.00038600134642629576,
      "loss": 2.9337,
      "step": 93878
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9331402778625488,
      "learning_rate": 0.0003859974275409346,
      "loss": 3.0759,
      "step": 93879
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.560753583908081,
      "learning_rate": 0.0003859935086395848,
      "loss": 2.6957,
      "step": 93880
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.3393852710723877,
      "learning_rate": 0.0003859895897222473,
      "loss": 2.7488,
      "step": 93881
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4804697036743164,
      "learning_rate": 0.00038598567078892266,
      "loss": 2.6954,
      "step": 93882
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6757819652557373,
      "learning_rate": 0.00038598175183961165,
      "loss": 2.9619,
      "step": 93883
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.391148328781128,
      "learning_rate": 0.0003859778328743151,
      "loss": 3.071,
      "step": 93884
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.9938805103302,
      "learning_rate": 0.0003859739138930336,
      "loss": 3.1281,
      "step": 93885
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6913851499557495,
      "learning_rate": 0.0003859699948957679,
      "loss": 2.9987,
      "step": 93886
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1927504539489746,
      "learning_rate": 0.0003859660758825188,
      "loss": 2.9856,
      "step": 93887
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7429959774017334,
      "learning_rate": 0.0003859621568532869,
      "loss": 3.359,
      "step": 93888
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.3268866539001465,
      "learning_rate": 0.00038595823780807303,
      "loss": 2.9695,
      "step": 93889
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.2039835453033447,
      "learning_rate": 0.000385954318746878,
      "loss": 2.7955,
      "step": 93890
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8439751863479614,
      "learning_rate": 0.00038595039966970237,
      "loss": 3.1838,
      "step": 93891
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5070905685424805,
      "learning_rate": 0.0003859464805765469,
      "loss": 2.9804,
      "step": 93892
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.996887445449829,
      "learning_rate": 0.00038594256146741245,
      "loss": 2.9475,
      "step": 93893
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.765088438987732,
      "learning_rate": 0.0003859386423422996,
      "loss": 2.898,
      "step": 93894
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.0377485752105713,
      "learning_rate": 0.0003859347232012091,
      "loss": 3.0259,
      "step": 93895
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.801535725593567,
      "learning_rate": 0.00038593080404414185,
      "loss": 3.227,
      "step": 93896
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8745418787002563,
      "learning_rate": 0.00038592688487109825,
      "loss": 2.9787,
      "step": 93897
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4155499935150146,
      "learning_rate": 0.00038592296568207935,
      "loss": 3.0105,
      "step": 93898
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.889518141746521,
      "learning_rate": 0.0003859190464770857,
      "loss": 2.691,
      "step": 93899
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.2385330200195312,
      "learning_rate": 0.0003859151272561181,
      "loss": 3.1155,
      "step": 93900
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.3296284675598145,
      "learning_rate": 0.00038591120801917725,
      "loss": 2.745,
      "step": 93901
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9305285215377808,
      "learning_rate": 0.0003859072887662639,
      "loss": 2.6905,
      "step": 93902
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.032078266143799,
      "learning_rate": 0.00038590336949737865,
      "loss": 3.0437,
      "step": 93903
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.790897011756897,
      "learning_rate": 0.00038589945021252247,
      "loss": 2.9741,
      "step": 93904
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.6416451930999756,
      "learning_rate": 0.000385895530911696,
      "loss": 3.0887,
      "step": 93905
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.500157594680786,
      "learning_rate": 0.00038589161159489986,
      "loss": 2.9984,
      "step": 93906
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6232786178588867,
      "learning_rate": 0.0003858876922621348,
      "loss": 3.0331,
      "step": 93907
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5576303005218506,
      "learning_rate": 0.0003858837729134017,
      "loss": 2.9333,
      "step": 93908
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.492658853530884,
      "learning_rate": 0.00038587985354870106,
      "loss": 3.0656,
      "step": 93909
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8445065021514893,
      "learning_rate": 0.00038587593416803377,
      "loss": 2.7304,
      "step": 93910
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.3041579723358154,
      "learning_rate": 0.0003858720147714006,
      "loss": 2.9581,
      "step": 93911
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.668262004852295,
      "learning_rate": 0.00038586809535880217,
      "loss": 3.0543,
      "step": 93912
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.836883306503296,
      "learning_rate": 0.00038586417593023916,
      "loss": 2.8603,
      "step": 93913
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.678025722503662,
      "learning_rate": 0.0003858602564857125,
      "loss": 2.9688,
      "step": 93914
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8007785081863403,
      "learning_rate": 0.00038585633702522274,
      "loss": 3.1142,
      "step": 93915
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.004694700241089,
      "learning_rate": 0.0003858524175487707,
      "loss": 2.9116,
      "step": 93916
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7325162887573242,
      "learning_rate": 0.000385848498056357,
      "loss": 3.0628,
      "step": 93917
    },
    {
      "epoch": 1.22,
      "grad_norm": 4.567943572998047,
      "learning_rate": 0.0003858445785479825,
      "loss": 2.9653,
      "step": 93918
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.6737735271453857,
      "learning_rate": 0.0003858406590236478,
      "loss": 3.2536,
      "step": 93919
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9823296070098877,
      "learning_rate": 0.00038583673948335375,
      "loss": 3.0884,
      "step": 93920
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7763996124267578,
      "learning_rate": 0.00038583281992710106,
      "loss": 3.0551,
      "step": 93921
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.7692556381225586,
      "learning_rate": 0.0003858289003548904,
      "loss": 2.9617,
      "step": 93922
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4101266860961914,
      "learning_rate": 0.00038582498076672255,
      "loss": 3.1725,
      "step": 93923
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.34706711769104,
      "learning_rate": 0.0003858210611625982,
      "loss": 2.855,
      "step": 93924
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8981612920761108,
      "learning_rate": 0.00038581714154251815,
      "loss": 3.0301,
      "step": 93925
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8403685092926025,
      "learning_rate": 0.000385813221906483,
      "loss": 3.0412,
      "step": 93926
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.141847848892212,
      "learning_rate": 0.0003858093022544936,
      "loss": 2.9452,
      "step": 93927
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0026159286499023,
      "learning_rate": 0.00038580538258655057,
      "loss": 3.1657,
      "step": 93928
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.2106008529663086,
      "learning_rate": 0.0003858014629026547,
      "loss": 3.1696,
      "step": 93929
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8139773607254028,
      "learning_rate": 0.00038579754320280685,
      "loss": 2.9716,
      "step": 93930
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.6559295654296875,
      "learning_rate": 0.0003857936234870074,
      "loss": 3.1908,
      "step": 93931
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.697075605392456,
      "learning_rate": 0.00038578970375525745,
      "loss": 2.9867,
      "step": 93932
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8378608226776123,
      "learning_rate": 0.00038578578400755753,
      "loss": 3.1601,
      "step": 93933
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7897239923477173,
      "learning_rate": 0.0003857818642439084,
      "loss": 2.7224,
      "step": 93934
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6929376125335693,
      "learning_rate": 0.00038577794446431087,
      "loss": 2.9491,
      "step": 93935
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.587280035018921,
      "learning_rate": 0.0003857740246687656,
      "loss": 2.8474,
      "step": 93936
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7687461376190186,
      "learning_rate": 0.00038577010485727327,
      "loss": 2.9043,
      "step": 93937
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6594871282577515,
      "learning_rate": 0.0003857661850298346,
      "loss": 2.979,
      "step": 93938
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.640896797180176,
      "learning_rate": 0.00038576226518645043,
      "loss": 2.7542,
      "step": 93939
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.129648208618164,
      "learning_rate": 0.00038575834532712154,
      "loss": 3.0731,
      "step": 93940
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.2535829544067383,
      "learning_rate": 0.0003857544254518484,
      "loss": 3.1016,
      "step": 93941
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.729896903038025,
      "learning_rate": 0.00038575050556063196,
      "loss": 2.9095,
      "step": 93942
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9063233137130737,
      "learning_rate": 0.0003857465856534728,
      "loss": 3.1411,
      "step": 93943
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7866196632385254,
      "learning_rate": 0.0003857426657303719,
      "loss": 2.9902,
      "step": 93944
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7640702724456787,
      "learning_rate": 0.00038573874579132973,
      "loss": 2.8857,
      "step": 93945
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.577549457550049,
      "learning_rate": 0.00038573482583634705,
      "loss": 3.1413,
      "step": 93946
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0176870822906494,
      "learning_rate": 0.0003857309058654247,
      "loss": 2.9208,
      "step": 93947
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.613074779510498,
      "learning_rate": 0.0003857269858785634,
      "loss": 2.9503,
      "step": 93948
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8567471504211426,
      "learning_rate": 0.0003857230658757638,
      "loss": 2.7631,
      "step": 93949
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.071606159210205,
      "learning_rate": 0.00038571914585702657,
      "loss": 3.1627,
      "step": 93950
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.784999966621399,
      "learning_rate": 0.0003857152258223527,
      "loss": 3.0282,
      "step": 93951
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.489545226097107,
      "learning_rate": 0.0003857113057717427,
      "loss": 3.2225,
      "step": 93952
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.266969680786133,
      "learning_rate": 0.00038570738570519727,
      "loss": 3.0647,
      "step": 93953
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7226405143737793,
      "learning_rate": 0.00038570346562271735,
      "loss": 3.2887,
      "step": 93954
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.698277235031128,
      "learning_rate": 0.0003856995455243034,
      "loss": 3.115,
      "step": 93955
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1487810611724854,
      "learning_rate": 0.0003856956254099563,
      "loss": 2.964,
      "step": 93956
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4174511432647705,
      "learning_rate": 0.00038569170527967685,
      "loss": 2.9457,
      "step": 93957
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7514574527740479,
      "learning_rate": 0.0003856877851334657,
      "loss": 3.0581,
      "step": 93958
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.3765767812728882,
      "learning_rate": 0.0003856838649713235,
      "loss": 2.9233,
      "step": 93959
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.31265926361084,
      "learning_rate": 0.00038567994479325106,
      "loss": 3.1928,
      "step": 93960
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.9522764682769775,
      "learning_rate": 0.0003856760245992491,
      "loss": 3.06,
      "step": 93961
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8674229383468628,
      "learning_rate": 0.0003856721043893184,
      "loss": 2.7333,
      "step": 93962
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.796976327896118,
      "learning_rate": 0.00038566818416345964,
      "loss": 3.082,
      "step": 93963
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4357972145080566,
      "learning_rate": 0.00038566426392167355,
      "loss": 2.7873,
      "step": 93964
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.87445068359375,
      "learning_rate": 0.00038566034366396075,
      "loss": 3.0215,
      "step": 93965
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5279016494750977,
      "learning_rate": 0.0003856564233903222,
      "loss": 3.0442,
      "step": 93966
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4868113994598389,
      "learning_rate": 0.0003856525031007585,
      "loss": 2.9897,
      "step": 93967
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.5522494316101074,
      "learning_rate": 0.0003856485827952703,
      "loss": 3.3148,
      "step": 93968
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.3502917289733887,
      "learning_rate": 0.00038564466247385847,
      "loss": 3.0401,
      "step": 93969
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0351474285125732,
      "learning_rate": 0.00038564074213652365,
      "loss": 2.9295,
      "step": 93970
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.341884136199951,
      "learning_rate": 0.0003856368217832666,
      "loss": 2.8528,
      "step": 93971
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.993039131164551,
      "learning_rate": 0.00038563290141408813,
      "loss": 3.1647,
      "step": 93972
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.5055465698242188,
      "learning_rate": 0.0003856289810289888,
      "loss": 3.1393,
      "step": 93973
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.072817802429199,
      "learning_rate": 0.00038562506062796934,
      "loss": 3.0346,
      "step": 93974
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.576811671257019,
      "learning_rate": 0.0003856211402110308,
      "loss": 2.89,
      "step": 93975
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5298840999603271,
      "learning_rate": 0.0003856172197781735,
      "loss": 3.2138,
      "step": 93976
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.0181081295013428,
      "learning_rate": 0.0003856132993293983,
      "loss": 3.298,
      "step": 93977
    },
    {
      "epoch": 1.22,
      "grad_norm": 4.234689235687256,
      "learning_rate": 0.00038560937886470613,
      "loss": 3.1953,
      "step": 93978
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.032498836517334,
      "learning_rate": 0.00038560545838409746,
      "loss": 2.9037,
      "step": 93979
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.5642120838165283,
      "learning_rate": 0.00038560153788757314,
      "loss": 2.7768,
      "step": 93980
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9952915906906128,
      "learning_rate": 0.00038559761737513393,
      "loss": 3.0988,
      "step": 93981
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8171250820159912,
      "learning_rate": 0.0003855936968467804,
      "loss": 3.1133,
      "step": 93982
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9612212181091309,
      "learning_rate": 0.00038558977630251347,
      "loss": 3.0958,
      "step": 93983
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.3661619424819946,
      "learning_rate": 0.0003855858557423337,
      "loss": 2.9512,
      "step": 93984
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.292052984237671,
      "learning_rate": 0.00038558193516624205,
      "loss": 3.3365,
      "step": 93985
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.6513078212738037,
      "learning_rate": 0.000385578014574239,
      "loss": 2.9892,
      "step": 93986
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.3219680786132812,
      "learning_rate": 0.00038557409396632535,
      "loss": 2.9246,
      "step": 93987
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9326767921447754,
      "learning_rate": 0.00038557017334250193,
      "loss": 3.158,
      "step": 93988
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0444834232330322,
      "learning_rate": 0.0003855662527027694,
      "loss": 3.0877,
      "step": 93989
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.493436098098755,
      "learning_rate": 0.0003855623320471285,
      "loss": 3.1449,
      "step": 93990
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7920019626617432,
      "learning_rate": 0.0003855584113755799,
      "loss": 3.0677,
      "step": 93991
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7195570468902588,
      "learning_rate": 0.0003855544906881244,
      "loss": 3.1541,
      "step": 93992
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9699969291687012,
      "learning_rate": 0.00038555056998476264,
      "loss": 3.1258,
      "step": 93993
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.194603204727173,
      "learning_rate": 0.00038554664926549555,
      "loss": 3.0434,
      "step": 93994
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1722183227539062,
      "learning_rate": 0.00038554272853032364,
      "loss": 2.8933,
      "step": 93995
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5960692167282104,
      "learning_rate": 0.0003855388077792477,
      "loss": 3.1072,
      "step": 93996
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8470414876937866,
      "learning_rate": 0.0003855348870122685,
      "loss": 2.9914,
      "step": 93997
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7276560068130493,
      "learning_rate": 0.0003855309662293868,
      "loss": 3.026,
      "step": 93998
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.686206817626953,
      "learning_rate": 0.0003855270454306032,
      "loss": 2.9671,
      "step": 93999
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.877803921699524,
      "learning_rate": 0.0003855231246159185,
      "loss": 3.1533,
      "step": 94000
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.5464224815368652,
      "learning_rate": 0.0003855192037853335,
      "loss": 2.9884,
      "step": 94001
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.7342207431793213,
      "learning_rate": 0.00038551528293884885,
      "loss": 2.8363,
      "step": 94002
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7029739618301392,
      "learning_rate": 0.0003855113620764653,
      "loss": 3.1887,
      "step": 94003
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7672909498214722,
      "learning_rate": 0.00038550744119818356,
      "loss": 3.1098,
      "step": 94004
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5964288711547852,
      "learning_rate": 0.0003855035203040044,
      "loss": 3.0201,
      "step": 94005
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6046987771987915,
      "learning_rate": 0.0003854995993939285,
      "loss": 2.9289,
      "step": 94006
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4655094146728516,
      "learning_rate": 0.0003854956784679566,
      "loss": 3.3,
      "step": 94007
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4575691223144531,
      "learning_rate": 0.0003854917575260894,
      "loss": 2.8929,
      "step": 94008
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6645994186401367,
      "learning_rate": 0.0003854878365683278,
      "loss": 3.2665,
      "step": 94009
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.349689245223999,
      "learning_rate": 0.0003854839155946723,
      "loss": 2.7154,
      "step": 94010
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4219589233398438,
      "learning_rate": 0.0003854799946051237,
      "loss": 2.9933,
      "step": 94011
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.596793293952942,
      "learning_rate": 0.0003854760735996829,
      "loss": 2.8145,
      "step": 94012
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.6616201400756836,
      "learning_rate": 0.00038547215257835033,
      "loss": 3.1505,
      "step": 94013
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.704587697982788,
      "learning_rate": 0.00038546823154112685,
      "loss": 3.0042,
      "step": 94014
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7260390520095825,
      "learning_rate": 0.00038546431048801336,
      "loss": 3.1067,
      "step": 94015
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1026601791381836,
      "learning_rate": 0.0003854603894190104,
      "loss": 2.8207,
      "step": 94016
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.757514238357544,
      "learning_rate": 0.00038545646833411867,
      "loss": 3.1049,
      "step": 94017
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.671650767326355,
      "learning_rate": 0.000385452547233339,
      "loss": 2.9509,
      "step": 94018
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.4713737964630127,
      "learning_rate": 0.0003854486261166721,
      "loss": 2.8698,
      "step": 94019
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.4093682765960693,
      "learning_rate": 0.00038544470498411864,
      "loss": 2.9073,
      "step": 94020
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.364359140396118,
      "learning_rate": 0.0003854407838356795,
      "loss": 3.0817,
      "step": 94021
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8443373441696167,
      "learning_rate": 0.00038543686267135526,
      "loss": 3.1016,
      "step": 94022
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1019132137298584,
      "learning_rate": 0.0003854329414911466,
      "loss": 2.957,
      "step": 94023
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7340810298919678,
      "learning_rate": 0.00038542902029505444,
      "loss": 2.7751,
      "step": 94024
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.647070288658142,
      "learning_rate": 0.00038542509908307947,
      "loss": 2.8692,
      "step": 94025
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.964661955833435,
      "learning_rate": 0.00038542117785522224,
      "loss": 3.1393,
      "step": 94026
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7094142436981201,
      "learning_rate": 0.0003854172566114837,
      "loss": 3.0621,
      "step": 94027
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5603188276290894,
      "learning_rate": 0.00038541333535186437,
      "loss": 2.8928,
      "step": 94028
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.801406979560852,
      "learning_rate": 0.0003854094140763651,
      "loss": 2.9601,
      "step": 94029
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9381104707717896,
      "learning_rate": 0.00038540549278498666,
      "loss": 2.9495,
      "step": 94030
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7021994590759277,
      "learning_rate": 0.00038540157147772967,
      "loss": 3.0315,
      "step": 94031
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0152339935302734,
      "learning_rate": 0.0003853976501545951,
      "loss": 3.0759,
      "step": 94032
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8845233917236328,
      "learning_rate": 0.00038539372881558334,
      "loss": 3.2932,
      "step": 94033
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.700356125831604,
      "learning_rate": 0.00038538980746069524,
      "loss": 3.0685,
      "step": 94034
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6996632814407349,
      "learning_rate": 0.0003853858860899316,
      "loss": 2.803,
      "step": 94035
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7572075128555298,
      "learning_rate": 0.0003853819647032931,
      "loss": 3.2086,
      "step": 94036
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9897279739379883,
      "learning_rate": 0.00038537804330078057,
      "loss": 2.7658,
      "step": 94037
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5182019472122192,
      "learning_rate": 0.00038537412188239454,
      "loss": 3.0501,
      "step": 94038
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7574152946472168,
      "learning_rate": 0.00038537020044813596,
      "loss": 3.1057,
      "step": 94039
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6711031198501587,
      "learning_rate": 0.0003853662789980054,
      "loss": 2.9998,
      "step": 94040
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5519309043884277,
      "learning_rate": 0.00038536235753200367,
      "loss": 2.9338,
      "step": 94041
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8052713871002197,
      "learning_rate": 0.0003853584360501314,
      "loss": 3.3185,
      "step": 94042
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9109375476837158,
      "learning_rate": 0.00038535451455238933,
      "loss": 2.961,
      "step": 94043
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.7809134721755981,
      "learning_rate": 0.00038535059303877837,
      "loss": 2.8736,
      "step": 94044
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5241055488586426,
      "learning_rate": 0.0003853466715092991,
      "loss": 2.8851,
      "step": 94045
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5460231304168701,
      "learning_rate": 0.00038534274996395227,
      "loss": 3.2618,
      "step": 94046
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.3651764392852783,
      "learning_rate": 0.00038533882840273854,
      "loss": 3.043,
      "step": 94047
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6373426914215088,
      "learning_rate": 0.0003853349068256588,
      "loss": 3.0735,
      "step": 94048
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.551941156387329,
      "learning_rate": 0.00038533098523271364,
      "loss": 3.1564,
      "step": 94049
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8011647462844849,
      "learning_rate": 0.0003853270636239038,
      "loss": 3.3589,
      "step": 94050
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.805688500404358,
      "learning_rate": 0.0003853231419992301,
      "loss": 2.9795,
      "step": 94051
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.026353597640991,
      "learning_rate": 0.0003853192203586932,
      "loss": 3.0079,
      "step": 94052
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.794999599456787,
      "learning_rate": 0.0003853152987022939,
      "loss": 3.0201,
      "step": 94053
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.8722071647644043,
      "learning_rate": 0.00038531137703003284,
      "loss": 3.0519,
      "step": 94054
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9652212858200073,
      "learning_rate": 0.0003853074553419108,
      "loss": 3.0811,
      "step": 94055
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6344107389450073,
      "learning_rate": 0.0003853035336379285,
      "loss": 3.1346,
      "step": 94056
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.1070828437805176,
      "learning_rate": 0.00038529961191808665,
      "loss": 3.1606,
      "step": 94057
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.322567939758301,
      "learning_rate": 0.000385295690182386,
      "loss": 2.9286,
      "step": 94058
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6651291847229004,
      "learning_rate": 0.0003852917684308272,
      "loss": 2.8806,
      "step": 94059
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9214115142822266,
      "learning_rate": 0.00038528784666341104,
      "loss": 2.9787,
      "step": 94060
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0796666145324707,
      "learning_rate": 0.0003852839248801384,
      "loss": 3.1496,
      "step": 94061
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6930516958236694,
      "learning_rate": 0.00038528000308100973,
      "loss": 2.9546,
      "step": 94062
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.9624145030975342,
      "learning_rate": 0.00038527608126602596,
      "loss": 3.0948,
      "step": 94063
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.6373745203018188,
      "learning_rate": 0.00038527215943518785,
      "loss": 2.9396,
      "step": 94064
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.699759602546692,
      "learning_rate": 0.0003852682375884959,
      "loss": 2.8203,
      "step": 94065
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.709812045097351,
      "learning_rate": 0.000385264315725951,
      "loss": 3.1088,
      "step": 94066
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0162594318389893,
      "learning_rate": 0.0003852603938475539,
      "loss": 3.0989,
      "step": 94067
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.5898594856262207,
      "learning_rate": 0.00038525647195330527,
      "loss": 2.993,
      "step": 94068
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.706822156906128,
      "learning_rate": 0.00038525255004320574,
      "loss": 3.1005,
      "step": 94069
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.530178189277649,
      "learning_rate": 0.0003852486281172564,
      "loss": 3.0163,
      "step": 94070
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0818212032318115,
      "learning_rate": 0.0003852447061754575,
      "loss": 2.7099,
      "step": 94071
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.015860080718994,
      "learning_rate": 0.00038524078421781007,
      "loss": 3.0563,
      "step": 94072
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.083221197128296,
      "learning_rate": 0.0003852368622443148,
      "loss": 3.1374,
      "step": 94073
    },
    {
      "epoch": 1.22,
      "grad_norm": 3.0245933532714844,
      "learning_rate": 0.0003852329402549724,
      "loss": 2.9296,
      "step": 94074
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.2097952365875244,
      "learning_rate": 0.00038522901824978353,
      "loss": 2.7954,
      "step": 94075
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.0995733737945557,
      "learning_rate": 0.00038522509622874903,
      "loss": 2.8391,
      "step": 94076
    },
    {
      "epoch": 1.22,
      "grad_norm": 5.247263431549072,
      "learning_rate": 0.0003852211741918695,
      "loss": 2.9535,
      "step": 94077
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.907371997833252,
      "learning_rate": 0.0003852172521391458,
      "loss": 2.8314,
      "step": 94078
    },
    {
      "epoch": 1.22,
      "grad_norm": 1.678324580192566,
      "learning_rate": 0.0003852133300705786,
      "loss": 2.9207,
      "step": 94079
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.7867703437805176,
      "learning_rate": 0.00038520940798616863,
      "loss": 2.8825,
      "step": 94080
    },
    {
      "epoch": 1.22,
      "grad_norm": 2.6995723247528076,
      "learning_rate": 0.00038520548588591663,
      "loss": 3.1122,
      "step": 94081
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1204898357391357,
      "learning_rate": 0.00038520156376982336,
      "loss": 3.1451,
      "step": 94082
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5397298336029053,
      "learning_rate": 0.00038519764163788946,
      "loss": 3.0267,
      "step": 94083
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5382798910140991,
      "learning_rate": 0.00038519371949011564,
      "loss": 3.0908,
      "step": 94084
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.297874927520752,
      "learning_rate": 0.0003851897973265028,
      "loss": 3.0551,
      "step": 94085
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.5376179218292236,
      "learning_rate": 0.0003851858751470516,
      "loss": 3.179,
      "step": 94086
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.085932970046997,
      "learning_rate": 0.0003851819529517626,
      "loss": 2.8825,
      "step": 94087
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.916993498802185,
      "learning_rate": 0.0003851780307406368,
      "loss": 3.0919,
      "step": 94088
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.7150697708129883,
      "learning_rate": 0.0003851741085136747,
      "loss": 2.9933,
      "step": 94089
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.8213090896606445,
      "learning_rate": 0.00038517018627087714,
      "loss": 3.0622,
      "step": 94090
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7794896364212036,
      "learning_rate": 0.00038516626401224495,
      "loss": 3.1334,
      "step": 94091
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.067829132080078,
      "learning_rate": 0.0003851623417377787,
      "loss": 2.9432,
      "step": 94092
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1675426959991455,
      "learning_rate": 0.00038515841944747906,
      "loss": 3.1079,
      "step": 94093
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.571744441986084,
      "learning_rate": 0.000385154497141347,
      "loss": 3.1397,
      "step": 94094
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.4567909240722656,
      "learning_rate": 0.00038515057481938297,
      "loss": 3.0155,
      "step": 94095
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.442106008529663,
      "learning_rate": 0.0003851466524815879,
      "loss": 3.0614,
      "step": 94096
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.827360987663269,
      "learning_rate": 0.0003851427301279625,
      "loss": 3.1402,
      "step": 94097
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.2044076919555664,
      "learning_rate": 0.00038513880775850736,
      "loss": 2.8153,
      "step": 94098
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.914985418319702,
      "learning_rate": 0.0003851348853732234,
      "loss": 3.114,
      "step": 94099
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9515202045440674,
      "learning_rate": 0.00038513096297211125,
      "loss": 3.0562,
      "step": 94100
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.4516618251800537,
      "learning_rate": 0.0003851270405551716,
      "loss": 2.8864,
      "step": 94101
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7153037786483765,
      "learning_rate": 0.00038512311812240527,
      "loss": 2.9994,
      "step": 94102
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2975516319274902,
      "learning_rate": 0.00038511919567381294,
      "loss": 2.9822,
      "step": 94103
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.626535177230835,
      "learning_rate": 0.0003851152732093953,
      "loss": 3.201,
      "step": 94104
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.4769299030303955,
      "learning_rate": 0.0003851113507291532,
      "loss": 3.2034,
      "step": 94105
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5414007902145386,
      "learning_rate": 0.00038510742823308727,
      "loss": 2.941,
      "step": 94106
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8223758935928345,
      "learning_rate": 0.0003851035057211982,
      "loss": 3.0011,
      "step": 94107
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0376107692718506,
      "learning_rate": 0.0003850995831934868,
      "loss": 2.9773,
      "step": 94108
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8647732734680176,
      "learning_rate": 0.0003850956606499539,
      "loss": 2.8124,
      "step": 94109
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.538867712020874,
      "learning_rate": 0.0003850917380906,
      "loss": 3.0574,
      "step": 94110
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.626685380935669,
      "learning_rate": 0.00038508781551542596,
      "loss": 3.1038,
      "step": 94111
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1145451068878174,
      "learning_rate": 0.0003850838929244324,
      "loss": 3.0153,
      "step": 94112
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.89520263671875,
      "learning_rate": 0.0003850799703176203,
      "loss": 3.0061,
      "step": 94113
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.57322359085083,
      "learning_rate": 0.00038507604769499014,
      "loss": 3.2367,
      "step": 94114
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.120957612991333,
      "learning_rate": 0.00038507212505654273,
      "loss": 2.9768,
      "step": 94115
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6432609558105469,
      "learning_rate": 0.00038506820240227877,
      "loss": 3.1121,
      "step": 94116
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9210542440414429,
      "learning_rate": 0.00038506427973219917,
      "loss": 2.8245,
      "step": 94117
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.2719221115112305,
      "learning_rate": 0.00038506035704630436,
      "loss": 2.9834,
      "step": 94118
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7778080701828003,
      "learning_rate": 0.0003850564343445953,
      "loss": 2.9084,
      "step": 94119
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7248975038528442,
      "learning_rate": 0.00038505251162707267,
      "loss": 3.0688,
      "step": 94120
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.385570526123047,
      "learning_rate": 0.0003850485888937371,
      "loss": 3.3454,
      "step": 94121
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1444597244262695,
      "learning_rate": 0.00038504466614458944,
      "loss": 2.7784,
      "step": 94122
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0101051330566406,
      "learning_rate": 0.0003850407433796303,
      "loss": 3.1096,
      "step": 94123
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7164031267166138,
      "learning_rate": 0.0003850368205988606,
      "loss": 2.9521,
      "step": 94124
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5965049266815186,
      "learning_rate": 0.00038503289780228087,
      "loss": 2.8814,
      "step": 94125
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7446413040161133,
      "learning_rate": 0.0003850289749898919,
      "loss": 2.9424,
      "step": 94126
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.939788818359375,
      "learning_rate": 0.0003850250521616945,
      "loss": 2.6766,
      "step": 94127
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8072559833526611,
      "learning_rate": 0.00038502112931768935,
      "loss": 3.0515,
      "step": 94128
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.978995442390442,
      "learning_rate": 0.0003850172064578771,
      "loss": 3.1441,
      "step": 94129
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9387024641036987,
      "learning_rate": 0.00038501328358225855,
      "loss": 2.9449,
      "step": 94130
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.817020297050476,
      "learning_rate": 0.0003850093606908345,
      "loss": 2.8077,
      "step": 94131
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8411997556686401,
      "learning_rate": 0.00038500543778360554,
      "loss": 2.8654,
      "step": 94132
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.646358847618103,
      "learning_rate": 0.00038500151486057247,
      "loss": 3.1604,
      "step": 94133
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.387434720993042,
      "learning_rate": 0.00038499759192173606,
      "loss": 2.9572,
      "step": 94134
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.569766640663147,
      "learning_rate": 0.0003849936689670969,
      "loss": 2.8759,
      "step": 94135
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.281419515609741,
      "learning_rate": 0.0003849897459966559,
      "loss": 3.1298,
      "step": 94136
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.792201042175293,
      "learning_rate": 0.00038498582301041374,
      "loss": 3.2655,
      "step": 94137
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5258784294128418,
      "learning_rate": 0.00038498190000837095,
      "loss": 2.8718,
      "step": 94138
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1075949668884277,
      "learning_rate": 0.0003849779769905285,
      "loss": 2.8358,
      "step": 94139
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9577957391738892,
      "learning_rate": 0.00038497405395688714,
      "loss": 3.2946,
      "step": 94140
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6592481136322021,
      "learning_rate": 0.0003849701309074474,
      "loss": 3.0439,
      "step": 94141
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7767406702041626,
      "learning_rate": 0.00038496620784221006,
      "loss": 3.3382,
      "step": 94142
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8775120973587036,
      "learning_rate": 0.00038496228476117604,
      "loss": 3.062,
      "step": 94143
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.674182653427124,
      "learning_rate": 0.00038495836166434585,
      "loss": 3.1141,
      "step": 94144
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.235989570617676,
      "learning_rate": 0.0003849544385517203,
      "loss": 2.8391,
      "step": 94145
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.472597599029541,
      "learning_rate": 0.0003849505154233002,
      "loss": 3.2306,
      "step": 94146
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.7636172771453857,
      "learning_rate": 0.000384946592279086,
      "loss": 2.867,
      "step": 94147
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9019768238067627,
      "learning_rate": 0.00038494266911907876,
      "loss": 3.086,
      "step": 94148
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.354703187942505,
      "learning_rate": 0.0003849387459432791,
      "loss": 3.1353,
      "step": 94149
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5462430715560913,
      "learning_rate": 0.0003849348227516876,
      "loss": 2.8698,
      "step": 94150
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.106193780899048,
      "learning_rate": 0.00038493089954430516,
      "loss": 3.0525,
      "step": 94151
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6942713260650635,
      "learning_rate": 0.00038492697632113256,
      "loss": 3.0847,
      "step": 94152
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2045483589172363,
      "learning_rate": 0.00038492305308217035,
      "loss": 3.0701,
      "step": 94153
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9454092979431152,
      "learning_rate": 0.00038491912982741933,
      "loss": 3.271,
      "step": 94154
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.501787543296814,
      "learning_rate": 0.0003849152065568803,
      "loss": 3.3024,
      "step": 94155
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1229262351989746,
      "learning_rate": 0.0003849112832705539,
      "loss": 2.9463,
      "step": 94156
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.553838014602661,
      "learning_rate": 0.00038490735996844083,
      "loss": 2.9124,
      "step": 94157
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0994372367858887,
      "learning_rate": 0.000384903436650542,
      "loss": 3.0027,
      "step": 94158
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7002356052398682,
      "learning_rate": 0.0003848995133168579,
      "loss": 2.9001,
      "step": 94159
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.796548843383789,
      "learning_rate": 0.0003848955899673894,
      "loss": 2.8141,
      "step": 94160
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.784725546836853,
      "learning_rate": 0.0003848916666021373,
      "loss": 3.0761,
      "step": 94161
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8281779289245605,
      "learning_rate": 0.0003848877432211021,
      "loss": 3.0943,
      "step": 94162
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.159486770629883,
      "learning_rate": 0.0003848838198242848,
      "loss": 3.269,
      "step": 94163
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.3629825115203857,
      "learning_rate": 0.00038487989641168595,
      "loss": 2.968,
      "step": 94164
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7115755081176758,
      "learning_rate": 0.0003848759729833063,
      "loss": 2.8911,
      "step": 94165
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8033925294876099,
      "learning_rate": 0.0003848720495391465,
      "loss": 3.1406,
      "step": 94166
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2014927864074707,
      "learning_rate": 0.0003848681260792076,
      "loss": 2.7433,
      "step": 94167
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.966499924659729,
      "learning_rate": 0.00038486420260349004,
      "loss": 2.7837,
      "step": 94168
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7568669319152832,
      "learning_rate": 0.00038486027911199453,
      "loss": 3.132,
      "step": 94169
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.540832281112671,
      "learning_rate": 0.000384856355604722,
      "loss": 3.2178,
      "step": 94170
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.115565299987793,
      "learning_rate": 0.000384852432081673,
      "loss": 3.0017,
      "step": 94171
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.136329412460327,
      "learning_rate": 0.0003848485085428484,
      "loss": 3.1387,
      "step": 94172
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0961339473724365,
      "learning_rate": 0.00038484458498824883,
      "loss": 2.9113,
      "step": 94173
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6905858516693115,
      "learning_rate": 0.000384840661417875,
      "loss": 3.295,
      "step": 94174
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.5580315589904785,
      "learning_rate": 0.0003848367378317277,
      "loss": 2.9358,
      "step": 94175
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6914329528808594,
      "learning_rate": 0.00038483281422980775,
      "loss": 3.209,
      "step": 94176
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.677335500717163,
      "learning_rate": 0.00038482889061211567,
      "loss": 2.9415,
      "step": 94177
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9252119064331055,
      "learning_rate": 0.00038482496697865234,
      "loss": 2.7056,
      "step": 94178
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.068877935409546,
      "learning_rate": 0.0003848210433294185,
      "loss": 3.2932,
      "step": 94179
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6361678838729858,
      "learning_rate": 0.0003848171196644147,
      "loss": 3.1289,
      "step": 94180
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9158134460449219,
      "learning_rate": 0.0003848131959836419,
      "loss": 2.9522,
      "step": 94181
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.7602479457855225,
      "learning_rate": 0.0003848092722871007,
      "loss": 2.8445,
      "step": 94182
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.10371732711792,
      "learning_rate": 0.0003848053485747918,
      "loss": 2.9361,
      "step": 94183
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8638702630996704,
      "learning_rate": 0.000384801424846716,
      "loss": 3.1209,
      "step": 94184
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.1303794384002686,
      "learning_rate": 0.0003847975011028741,
      "loss": 3.0519,
      "step": 94185
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.0584769248962402,
      "learning_rate": 0.0003847935773432667,
      "loss": 2.829,
      "step": 94186
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1460115909576416,
      "learning_rate": 0.00038478965356789454,
      "loss": 2.7585,
      "step": 94187
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.663256287574768,
      "learning_rate": 0.00038478572977675835,
      "loss": 3.1655,
      "step": 94188
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1087958812713623,
      "learning_rate": 0.000384781805969859,
      "loss": 2.9659,
      "step": 94189
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.768872857093811,
      "learning_rate": 0.0003847778821471971,
      "loss": 2.9608,
      "step": 94190
    },
    {
      "epoch": 1.23,
      "grad_norm": 4.001230716705322,
      "learning_rate": 0.0003847739583087733,
      "loss": 2.9795,
      "step": 94191
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.120250701904297,
      "learning_rate": 0.00038477003445458853,
      "loss": 3.0202,
      "step": 94192
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.4509822130203247,
      "learning_rate": 0.0003847661105846433,
      "loss": 3.2362,
      "step": 94193
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.9402825832366943,
      "learning_rate": 0.00038476218669893845,
      "loss": 3.054,
      "step": 94194
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.1758334636688232,
      "learning_rate": 0.0003847582627974749,
      "loss": 2.867,
      "step": 94195
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.220283269882202,
      "learning_rate": 0.00038475433888025297,
      "loss": 3.3061,
      "step": 94196
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.287252426147461,
      "learning_rate": 0.00038475041494727365,
      "loss": 3.0375,
      "step": 94197
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.008364677429199,
      "learning_rate": 0.0003847464909985377,
      "loss": 2.83,
      "step": 94198
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0533270835876465,
      "learning_rate": 0.00038474256703404573,
      "loss": 2.9191,
      "step": 94199
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.985390543937683,
      "learning_rate": 0.00038473864305379855,
      "loss": 3.2378,
      "step": 94200
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.220935106277466,
      "learning_rate": 0.0003847347190577969,
      "loss": 2.9838,
      "step": 94201
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0518791675567627,
      "learning_rate": 0.0003847307950460414,
      "loss": 3.2184,
      "step": 94202
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1954269409179688,
      "learning_rate": 0.00038472687101853275,
      "loss": 3.0047,
      "step": 94203
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7289366722106934,
      "learning_rate": 0.0003847229469752719,
      "loss": 2.9829,
      "step": 94204
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8595387935638428,
      "learning_rate": 0.0003847190229162595,
      "loss": 3.0318,
      "step": 94205
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.036921977996826,
      "learning_rate": 0.0003847150988414961,
      "loss": 3.08,
      "step": 94206
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.968765139579773,
      "learning_rate": 0.00038471117475098266,
      "loss": 3.0074,
      "step": 94207
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7359185218811035,
      "learning_rate": 0.0003847072506447198,
      "loss": 3.1613,
      "step": 94208
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2276229858398438,
      "learning_rate": 0.0003847033265227082,
      "loss": 3.1236,
      "step": 94209
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.812030553817749,
      "learning_rate": 0.0003846994023849487,
      "loss": 2.8883,
      "step": 94210
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.6843695640563965,
      "learning_rate": 0.00038469547823144197,
      "loss": 3.0587,
      "step": 94211
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8537156581878662,
      "learning_rate": 0.0003846915540621888,
      "loss": 2.8737,
      "step": 94212
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5195437669754028,
      "learning_rate": 0.00038468762987718983,
      "loss": 3.0683,
      "step": 94213
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.4594640731811523,
      "learning_rate": 0.0003846837056764458,
      "loss": 2.8341,
      "step": 94214
    },
    {
      "epoch": 1.23,
      "grad_norm": 4.958850383758545,
      "learning_rate": 0.00038467978145995753,
      "loss": 3.007,
      "step": 94215
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.2839341163635254,
      "learning_rate": 0.00038467585722772567,
      "loss": 2.6901,
      "step": 94216
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6232260465621948,
      "learning_rate": 0.00038467193297975097,
      "loss": 3.0287,
      "step": 94217
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.425342321395874,
      "learning_rate": 0.0003846680087160341,
      "loss": 3.0614,
      "step": 94218
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.9408538341522217,
      "learning_rate": 0.000384664084436576,
      "loss": 3.1738,
      "step": 94219
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.8357083797454834,
      "learning_rate": 0.00038466016014137704,
      "loss": 2.9764,
      "step": 94220
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.379795551300049,
      "learning_rate": 0.00038465623583043833,
      "loss": 3.039,
      "step": 94221
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.12371563911438,
      "learning_rate": 0.00038465231150376037,
      "loss": 2.5177,
      "step": 94222
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.7991318702697754,
      "learning_rate": 0.00038464838716134397,
      "loss": 3.0501,
      "step": 94223
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8800582885742188,
      "learning_rate": 0.00038464446280318977,
      "loss": 3.0167,
      "step": 94224
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.75260591506958,
      "learning_rate": 0.00038464053842929865,
      "loss": 3.1589,
      "step": 94225
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6352615356445312,
      "learning_rate": 0.0003846366140396712,
      "loss": 3.1905,
      "step": 94226
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.957297444343567,
      "learning_rate": 0.0003846326896343082,
      "loss": 3.0336,
      "step": 94227
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8346078395843506,
      "learning_rate": 0.00038462876521321043,
      "loss": 2.6088,
      "step": 94228
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6335928440093994,
      "learning_rate": 0.0003846248407763785,
      "loss": 2.7934,
      "step": 94229
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.8996777534484863,
      "learning_rate": 0.0003846209163238133,
      "loss": 2.8227,
      "step": 94230
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9255268573760986,
      "learning_rate": 0.0003846169918555155,
      "loss": 3.0349,
      "step": 94231
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8325868844985962,
      "learning_rate": 0.00038461306737148574,
      "loss": 3.2476,
      "step": 94232
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6753389835357666,
      "learning_rate": 0.00038460914287172476,
      "loss": 3.1513,
      "step": 94233
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.4514261484146118,
      "learning_rate": 0.0003846052183562334,
      "loss": 2.9685,
      "step": 94234
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.3029353618621826,
      "learning_rate": 0.0003846012938250124,
      "loss": 3.0844,
      "step": 94235
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9599690437316895,
      "learning_rate": 0.00038459736927806226,
      "loss": 3.1726,
      "step": 94236
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.563098192214966,
      "learning_rate": 0.00038459344471538406,
      "loss": 2.9661,
      "step": 94237
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6114606857299805,
      "learning_rate": 0.0003845895201369782,
      "loss": 2.6171,
      "step": 94238
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.960292100906372,
      "learning_rate": 0.00038458559554284557,
      "loss": 2.9798,
      "step": 94239
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.793798804283142,
      "learning_rate": 0.000384581670932987,
      "loss": 3.1558,
      "step": 94240
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.6514017581939697,
      "learning_rate": 0.00038457774630740296,
      "loss": 3.0071,
      "step": 94241
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.4736168384552,
      "learning_rate": 0.00038457382166609435,
      "loss": 2.8202,
      "step": 94242
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.738542079925537,
      "learning_rate": 0.000384569897009062,
      "loss": 3.2869,
      "step": 94243
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.715295672416687,
      "learning_rate": 0.0003845659723363064,
      "loss": 3.2289,
      "step": 94244
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.939786672592163,
      "learning_rate": 0.00038456204764782836,
      "loss": 3.2245,
      "step": 94245
    },
    {
      "epoch": 1.23,
      "grad_norm": 4.538852214813232,
      "learning_rate": 0.0003845581229436287,
      "loss": 3.0167,
      "step": 94246
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.9948301315307617,
      "learning_rate": 0.00038455419822370807,
      "loss": 3.0355,
      "step": 94247
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.9770874977111816,
      "learning_rate": 0.00038455027348806715,
      "loss": 3.0965,
      "step": 94248
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.088089942932129,
      "learning_rate": 0.00038454634873670687,
      "loss": 2.8966,
      "step": 94249
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2497239112854004,
      "learning_rate": 0.00038454242396962767,
      "loss": 2.8846,
      "step": 94250
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.3751957416534424,
      "learning_rate": 0.0003845384991868305,
      "loss": 2.9234,
      "step": 94251
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1231539249420166,
      "learning_rate": 0.00038453457438831606,
      "loss": 2.9819,
      "step": 94252
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7659958600997925,
      "learning_rate": 0.0003845306495740851,
      "loss": 2.8543,
      "step": 94253
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.161689043045044,
      "learning_rate": 0.0003845267247441382,
      "loss": 3.1237,
      "step": 94254
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.884827733039856,
      "learning_rate": 0.00038452279989847617,
      "loss": 3.0876,
      "step": 94255
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.8542537689208984,
      "learning_rate": 0.0003845188750370999,
      "loss": 3.066,
      "step": 94256
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7983583211898804,
      "learning_rate": 0.00038451495016000983,
      "loss": 3.2109,
      "step": 94257
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9630941152572632,
      "learning_rate": 0.0003845110252672069,
      "loss": 3.0849,
      "step": 94258
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1050915718078613,
      "learning_rate": 0.0003845071003586918,
      "loss": 2.9717,
      "step": 94259
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5430989265441895,
      "learning_rate": 0.00038450317543446515,
      "loss": 2.8329,
      "step": 94260
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.4619730710983276,
      "learning_rate": 0.00038449925049452785,
      "loss": 3.1793,
      "step": 94261
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7008205652236938,
      "learning_rate": 0.0003844953255388805,
      "loss": 2.9959,
      "step": 94262
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.032193660736084,
      "learning_rate": 0.00038449140056752385,
      "loss": 3.0098,
      "step": 94263
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8844928741455078,
      "learning_rate": 0.00038448747558045873,
      "loss": 2.951,
      "step": 94264
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.449728488922119,
      "learning_rate": 0.0003844835505776858,
      "loss": 2.9754,
      "step": 94265
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7587088346481323,
      "learning_rate": 0.0003844796255592057,
      "loss": 3.0204,
      "step": 94266
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9262423515319824,
      "learning_rate": 0.00038447570052501924,
      "loss": 2.7998,
      "step": 94267
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.8076283931732178,
      "learning_rate": 0.00038447177547512725,
      "loss": 2.5007,
      "step": 94268
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8152775764465332,
      "learning_rate": 0.0003844678504095302,
      "loss": 2.9735,
      "step": 94269
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8105698823928833,
      "learning_rate": 0.0003844639253282291,
      "loss": 3.2308,
      "step": 94270
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6719461679458618,
      "learning_rate": 0.00038446000023122464,
      "loss": 3.2592,
      "step": 94271
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1115310192108154,
      "learning_rate": 0.0003844560751185173,
      "loss": 2.9591,
      "step": 94272
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.142101526260376,
      "learning_rate": 0.0003844521499901081,
      "loss": 3.0774,
      "step": 94273
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7610257863998413,
      "learning_rate": 0.0003844482248459976,
      "loss": 3.0988,
      "step": 94274
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.3428828716278076,
      "learning_rate": 0.0003844442996861866,
      "loss": 3.1915,
      "step": 94275
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.228595018386841,
      "learning_rate": 0.0003844403745106757,
      "loss": 3.1781,
      "step": 94276
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6745108366012573,
      "learning_rate": 0.00038443644931946596,
      "loss": 3.0727,
      "step": 94277
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.885240077972412,
      "learning_rate": 0.0003844325241125577,
      "loss": 3.2486,
      "step": 94278
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8226702213287354,
      "learning_rate": 0.0003844285988899519,
      "loss": 3.0443,
      "step": 94279
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.218129873275757,
      "learning_rate": 0.0003844246736516493,
      "loss": 2.7995,
      "step": 94280
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8001896142959595,
      "learning_rate": 0.0003844207483976505,
      "loss": 3.0946,
      "step": 94281
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6492881774902344,
      "learning_rate": 0.0003844168231279562,
      "loss": 3.143,
      "step": 94282
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.885415554046631,
      "learning_rate": 0.00038441289784256743,
      "loss": 3.1008,
      "step": 94283
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9063587188720703,
      "learning_rate": 0.0003844089725414845,
      "loss": 3.128,
      "step": 94284
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5262542963027954,
      "learning_rate": 0.00038440504722470845,
      "loss": 2.8557,
      "step": 94285
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9073766469955444,
      "learning_rate": 0.0003844011218922399,
      "loss": 3.2006,
      "step": 94286
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5412731170654297,
      "learning_rate": 0.0003843971965440796,
      "loss": 3.0886,
      "step": 94287
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5948240756988525,
      "learning_rate": 0.00038439327118022825,
      "loss": 2.9222,
      "step": 94288
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.747169852256775,
      "learning_rate": 0.0003843893458006866,
      "loss": 3.087,
      "step": 94289
    },
    {
      "epoch": 1.23,
      "grad_norm": 4.1029372215271,
      "learning_rate": 0.0003843854204054554,
      "loss": 2.9439,
      "step": 94290
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.062206506729126,
      "learning_rate": 0.0003843814949945353,
      "loss": 3.0005,
      "step": 94291
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0556395053863525,
      "learning_rate": 0.0003843775695679271,
      "loss": 3.1383,
      "step": 94292
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.3564515113830566,
      "learning_rate": 0.0003843736441256316,
      "loss": 2.9501,
      "step": 94293
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1255972385406494,
      "learning_rate": 0.00038436971866764934,
      "loss": 3.2257,
      "step": 94294
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.095991849899292,
      "learning_rate": 0.0003843657931939812,
      "loss": 2.9922,
      "step": 94295
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.910161018371582,
      "learning_rate": 0.0003843618677046278,
      "loss": 2.9551,
      "step": 94296
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.166105031967163,
      "learning_rate": 0.00038435794219959004,
      "loss": 2.9791,
      "step": 94297
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.649477243423462,
      "learning_rate": 0.0003843540166788685,
      "loss": 3.0461,
      "step": 94298
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1603872776031494,
      "learning_rate": 0.00038435009114246396,
      "loss": 2.9509,
      "step": 94299
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.026888608932495,
      "learning_rate": 0.00038434616559037717,
      "loss": 2.8604,
      "step": 94300
    },
    {
      "epoch": 1.23,
      "grad_norm": 5.486758232116699,
      "learning_rate": 0.0003843422400226088,
      "loss": 3.2084,
      "step": 94301
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1899783611297607,
      "learning_rate": 0.0003843383144391596,
      "loss": 2.892,
      "step": 94302
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.028172492980957,
      "learning_rate": 0.0003843343888400304,
      "loss": 3.2667,
      "step": 94303
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7546930313110352,
      "learning_rate": 0.0003843304632252218,
      "loss": 3.024,
      "step": 94304
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.496795415878296,
      "learning_rate": 0.00038432653759473456,
      "loss": 2.9778,
      "step": 94305
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.3943090438842773,
      "learning_rate": 0.00038432261194856944,
      "loss": 3.2121,
      "step": 94306
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.3904707431793213,
      "learning_rate": 0.0003843186862867272,
      "loss": 2.8526,
      "step": 94307
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0682482719421387,
      "learning_rate": 0.0003843147606092084,
      "loss": 2.701,
      "step": 94308
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0756800174713135,
      "learning_rate": 0.000384310834916014,
      "loss": 2.9237,
      "step": 94309
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.0393853187561035,
      "learning_rate": 0.0003843069092071446,
      "loss": 3.1049,
      "step": 94310
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.651517152786255,
      "learning_rate": 0.0003843029834826009,
      "loss": 2.8967,
      "step": 94311
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5483349561691284,
      "learning_rate": 0.00038429905774238374,
      "loss": 2.8716,
      "step": 94312
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.206364154815674,
      "learning_rate": 0.00038429513198649383,
      "loss": 3.0918,
      "step": 94313
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8299552202224731,
      "learning_rate": 0.0003842912062149317,
      "loss": 3.0,
      "step": 94314
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.515323519706726,
      "learning_rate": 0.00038428728042769845,
      "loss": 2.9587,
      "step": 94315
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0085129737854004,
      "learning_rate": 0.0003842833546247945,
      "loss": 2.8706,
      "step": 94316
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.503330945968628,
      "learning_rate": 0.0003842794288062207,
      "loss": 2.8707,
      "step": 94317
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.978219509124756,
      "learning_rate": 0.00038427550297197776,
      "loss": 2.7966,
      "step": 94318
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9927527904510498,
      "learning_rate": 0.00038427157712206633,
      "loss": 3.1375,
      "step": 94319
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.210191488265991,
      "learning_rate": 0.0003842676512564874,
      "loss": 3.1909,
      "step": 94320
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.904760479927063,
      "learning_rate": 0.00038426372537524146,
      "loss": 3.195,
      "step": 94321
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.3318076133728027,
      "learning_rate": 0.0003842597994783292,
      "loss": 2.9138,
      "step": 94322
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.3151776790618896,
      "learning_rate": 0.0003842558735657516,
      "loss": 3.0888,
      "step": 94323
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.4656115770339966,
      "learning_rate": 0.00038425194763750916,
      "loss": 3.1554,
      "step": 94324
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7078206539154053,
      "learning_rate": 0.00038424802169360265,
      "loss": 2.901,
      "step": 94325
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.950681447982788,
      "learning_rate": 0.00038424409573403295,
      "loss": 3.0564,
      "step": 94326
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5818672180175781,
      "learning_rate": 0.00038424016975880065,
      "loss": 3.1203,
      "step": 94327
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.937560796737671,
      "learning_rate": 0.0003842362437679064,
      "loss": 3.0006,
      "step": 94328
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.710825800895691,
      "learning_rate": 0.00038423231776135117,
      "loss": 3.0373,
      "step": 94329
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5044175386428833,
      "learning_rate": 0.0003842283917391355,
      "loss": 2.8816,
      "step": 94330
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0114638805389404,
      "learning_rate": 0.0003842244657012602,
      "loss": 3.1117,
      "step": 94331
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5717668533325195,
      "learning_rate": 0.00038422053964772603,
      "loss": 2.8774,
      "step": 94332
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7223750352859497,
      "learning_rate": 0.00038421661357853357,
      "loss": 3.0299,
      "step": 94333
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.04417085647583,
      "learning_rate": 0.00038421268749368377,
      "loss": 3.0498,
      "step": 94334
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7541635036468506,
      "learning_rate": 0.00038420876139317716,
      "loss": 2.9333,
      "step": 94335
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8376840353012085,
      "learning_rate": 0.0003842048352770146,
      "loss": 2.9851,
      "step": 94336
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.429565906524658,
      "learning_rate": 0.00038420090914519675,
      "loss": 3.0582,
      "step": 94337
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.46208119392395,
      "learning_rate": 0.00038419698299772435,
      "loss": 3.0318,
      "step": 94338
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8105721473693848,
      "learning_rate": 0.00038419305683459813,
      "loss": 3.0308,
      "step": 94339
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7865201234817505,
      "learning_rate": 0.00038418913065581885,
      "loss": 3.154,
      "step": 94340
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1185951232910156,
      "learning_rate": 0.0003841852044613872,
      "loss": 3.2433,
      "step": 94341
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.345214605331421,
      "learning_rate": 0.000384181278251304,
      "loss": 2.8343,
      "step": 94342
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.3021671772003174,
      "learning_rate": 0.0003841773520255698,
      "loss": 2.9177,
      "step": 94343
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.180079936981201,
      "learning_rate": 0.00038417342578418555,
      "loss": 3.0317,
      "step": 94344
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.105329990386963,
      "learning_rate": 0.00038416949952715186,
      "loss": 3.1221,
      "step": 94345
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.1593687534332275,
      "learning_rate": 0.0003841655732544694,
      "loss": 2.8135,
      "step": 94346
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.9783756732940674,
      "learning_rate": 0.000384161646966139,
      "loss": 2.9528,
      "step": 94347
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8535497188568115,
      "learning_rate": 0.0003841577206621614,
      "loss": 3.2314,
      "step": 94348
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9715207815170288,
      "learning_rate": 0.00038415379434253726,
      "loss": 2.9648,
      "step": 94349
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.4020001888275146,
      "learning_rate": 0.0003841498680072673,
      "loss": 2.8836,
      "step": 94350
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.958327293395996,
      "learning_rate": 0.00038414594165635236,
      "loss": 3.1123,
      "step": 94351
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8411250114440918,
      "learning_rate": 0.00038414201528979305,
      "loss": 3.0688,
      "step": 94352
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6759963035583496,
      "learning_rate": 0.00038413808890759023,
      "loss": 3.2736,
      "step": 94353
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.649415969848633,
      "learning_rate": 0.00038413416250974444,
      "loss": 3.0407,
      "step": 94354
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.1523921489715576,
      "learning_rate": 0.00038413023609625664,
      "loss": 2.904,
      "step": 94355
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.206664562225342,
      "learning_rate": 0.00038412630966712743,
      "loss": 2.9208,
      "step": 94356
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.76044762134552,
      "learning_rate": 0.0003841223832223574,
      "loss": 3.127,
      "step": 94357
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.020716905593872,
      "learning_rate": 0.00038411845676194757,
      "loss": 2.9334,
      "step": 94358
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7613575458526611,
      "learning_rate": 0.0003841145302858985,
      "loss": 3.1939,
      "step": 94359
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7503138780593872,
      "learning_rate": 0.0003841106037942109,
      "loss": 3.0285,
      "step": 94360
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.517770767211914,
      "learning_rate": 0.0003841066772868856,
      "loss": 2.87,
      "step": 94361
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.31065034866333,
      "learning_rate": 0.00038410275076392335,
      "loss": 3.217,
      "step": 94362
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9139471054077148,
      "learning_rate": 0.0003840988242253247,
      "loss": 2.985,
      "step": 94363
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5819531679153442,
      "learning_rate": 0.00038409489767109054,
      "loss": 3.1434,
      "step": 94364
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6518738269805908,
      "learning_rate": 0.00038409097110122153,
      "loss": 3.053,
      "step": 94365
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5941944122314453,
      "learning_rate": 0.0003840870445157184,
      "loss": 3.3306,
      "step": 94366
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7813533544540405,
      "learning_rate": 0.000384083117914582,
      "loss": 3.1025,
      "step": 94367
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.5762674808502197,
      "learning_rate": 0.0003840791912978129,
      "loss": 2.8837,
      "step": 94368
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8015992641448975,
      "learning_rate": 0.00038407526466541187,
      "loss": 2.902,
      "step": 94369
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6683173179626465,
      "learning_rate": 0.0003840713380173796,
      "loss": 2.8839,
      "step": 94370
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.223695993423462,
      "learning_rate": 0.000384067411353717,
      "loss": 2.9954,
      "step": 94371
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7834975719451904,
      "learning_rate": 0.00038406348467442464,
      "loss": 3.423,
      "step": 94372
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7638511657714844,
      "learning_rate": 0.0003840595579795033,
      "loss": 3.0799,
      "step": 94373
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.4922958612442017,
      "learning_rate": 0.00038405563126895373,
      "loss": 3.1821,
      "step": 94374
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.4964878559112549,
      "learning_rate": 0.0003840517045427765,
      "loss": 2.9528,
      "step": 94375
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0106868743896484,
      "learning_rate": 0.0003840477778009726,
      "loss": 2.9068,
      "step": 94376
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.71287202835083,
      "learning_rate": 0.0003840438510435426,
      "loss": 2.8431,
      "step": 94377
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9166592359542847,
      "learning_rate": 0.00038403992427048727,
      "loss": 2.8492,
      "step": 94378
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1353774070739746,
      "learning_rate": 0.0003840359974818073,
      "loss": 2.753,
      "step": 94379
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.3987958431243896,
      "learning_rate": 0.0003840320706775034,
      "loss": 2.8246,
      "step": 94380
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0630271434783936,
      "learning_rate": 0.0003840281438575765,
      "loss": 3.1632,
      "step": 94381
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9542491436004639,
      "learning_rate": 0.0003840242170220271,
      "loss": 2.9746,
      "step": 94382
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.777940034866333,
      "learning_rate": 0.000384020290170856,
      "loss": 3.1142,
      "step": 94383
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.238489866256714,
      "learning_rate": 0.0003840163633040639,
      "loss": 3.0707,
      "step": 94384
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.658461093902588,
      "learning_rate": 0.0003840124364216517,
      "loss": 3.1093,
      "step": 94385
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7510355710983276,
      "learning_rate": 0.00038400850952361994,
      "loss": 2.8689,
      "step": 94386
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.591838836669922,
      "learning_rate": 0.00038400458260996935,
      "loss": 2.8311,
      "step": 94387
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8817832469940186,
      "learning_rate": 0.0003840006556807007,
      "loss": 2.9956,
      "step": 94388
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9212229251861572,
      "learning_rate": 0.0003839967287358149,
      "loss": 3.1168,
      "step": 94389
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8021010160446167,
      "learning_rate": 0.0003839928017753124,
      "loss": 3.3041,
      "step": 94390
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9247668981552124,
      "learning_rate": 0.0003839888747991941,
      "loss": 3.0152,
      "step": 94391
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.951175570487976,
      "learning_rate": 0.00038398494780746075,
      "loss": 3.0492,
      "step": 94392
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6909799575805664,
      "learning_rate": 0.00038398102080011286,
      "loss": 3.1028,
      "step": 94393
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5266529321670532,
      "learning_rate": 0.0003839770937771514,
      "loss": 3.1762,
      "step": 94394
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.200244188308716,
      "learning_rate": 0.00038397316673857696,
      "loss": 2.9271,
      "step": 94395
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.738635540008545,
      "learning_rate": 0.00038396923968439045,
      "loss": 3.1059,
      "step": 94396
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6931402683258057,
      "learning_rate": 0.00038396531261459236,
      "loss": 3.0011,
      "step": 94397
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.555492877960205,
      "learning_rate": 0.00038396138552918355,
      "loss": 2.9845,
      "step": 94398
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.4276342391967773,
      "learning_rate": 0.00038395745842816477,
      "loss": 2.7749,
      "step": 94399
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8924378156661987,
      "learning_rate": 0.0003839535313115367,
      "loss": 3.0502,
      "step": 94400
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6683152914047241,
      "learning_rate": 0.00038394960417930003,
      "loss": 2.9894,
      "step": 94401
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0408875942230225,
      "learning_rate": 0.00038394567703145565,
      "loss": 3.0252,
      "step": 94402
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.0611300468444824,
      "learning_rate": 0.00038394174986800406,
      "loss": 2.9996,
      "step": 94403
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.604190468788147,
      "learning_rate": 0.0003839378226889462,
      "loss": 3.1187,
      "step": 94404
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7322436571121216,
      "learning_rate": 0.0003839338954942827,
      "loss": 2.9137,
      "step": 94405
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6675233840942383,
      "learning_rate": 0.00038392996828401425,
      "loss": 2.886,
      "step": 94406
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8279564380645752,
      "learning_rate": 0.00038392604105814173,
      "loss": 2.9048,
      "step": 94407
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.134335517883301,
      "learning_rate": 0.00038392211381666576,
      "loss": 2.6956,
      "step": 94408
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.3245604038238525,
      "learning_rate": 0.000383918186559587,
      "loss": 2.9379,
      "step": 94409
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.4311319589614868,
      "learning_rate": 0.00038391425928690636,
      "loss": 3.1677,
      "step": 94410
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6976649761199951,
      "learning_rate": 0.00038391033199862445,
      "loss": 3.0327,
      "step": 94411
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.539771556854248,
      "learning_rate": 0.00038390640469474196,
      "loss": 2.7264,
      "step": 94412
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.974099636077881,
      "learning_rate": 0.0003839024773752597,
      "loss": 3.0734,
      "step": 94413
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.106125831604004,
      "learning_rate": 0.00038389855004017853,
      "loss": 2.9653,
      "step": 94414
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9730560779571533,
      "learning_rate": 0.0003838946226894989,
      "loss": 3.1245,
      "step": 94415
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.5743587017059326,
      "learning_rate": 0.00038389069532322165,
      "loss": 2.7772,
      "step": 94416
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.5610430240631104,
      "learning_rate": 0.00038388676794134767,
      "loss": 3.1892,
      "step": 94417
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.936102032661438,
      "learning_rate": 0.00038388284054387744,
      "loss": 3.059,
      "step": 94418
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9991711378097534,
      "learning_rate": 0.0003838789131308118,
      "loss": 2.9824,
      "step": 94419
    },
    {
      "epoch": 1.23,
      "grad_norm": 4.152706146240234,
      "learning_rate": 0.0003838749857021516,
      "loss": 2.9013,
      "step": 94420
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.0340237617492676,
      "learning_rate": 0.0003838710582578973,
      "loss": 3.2882,
      "step": 94421
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2910854816436768,
      "learning_rate": 0.0003838671307980499,
      "loss": 2.9214,
      "step": 94422
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7992992401123047,
      "learning_rate": 0.00038386320332261,
      "loss": 3.1183,
      "step": 94423
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.016209840774536,
      "learning_rate": 0.0003838592758315783,
      "loss": 3.1455,
      "step": 94424
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.336562156677246,
      "learning_rate": 0.00038385534832495563,
      "loss": 3.1576,
      "step": 94425
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.6494033336639404,
      "learning_rate": 0.00038385142080274263,
      "loss": 2.9597,
      "step": 94426
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6894570589065552,
      "learning_rate": 0.0003838474932649401,
      "loss": 3.0076,
      "step": 94427
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9904190301895142,
      "learning_rate": 0.00038384356571154876,
      "loss": 2.9563,
      "step": 94428
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.679598331451416,
      "learning_rate": 0.0003838396381425693,
      "loss": 3.0713,
      "step": 94429
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7743079662322998,
      "learning_rate": 0.0003838357105580025,
      "loss": 2.9053,
      "step": 94430
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8669482469558716,
      "learning_rate": 0.00038383178295784895,
      "loss": 3.0724,
      "step": 94431
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.12888240814209,
      "learning_rate": 0.0003838278553421096,
      "loss": 3.0281,
      "step": 94432
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.375042200088501,
      "learning_rate": 0.0003838239277107849,
      "loss": 2.9998,
      "step": 94433
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.479696750640869,
      "learning_rate": 0.0003838200000638759,
      "loss": 3.0872,
      "step": 94434
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1722605228424072,
      "learning_rate": 0.0003838160724013832,
      "loss": 3.1876,
      "step": 94435
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.778955101966858,
      "learning_rate": 0.0003838121447233075,
      "loss": 2.8617,
      "step": 94436
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7915756702423096,
      "learning_rate": 0.00038380821702964943,
      "loss": 2.9128,
      "step": 94437
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6718353033065796,
      "learning_rate": 0.00038380428932040993,
      "loss": 3.0445,
      "step": 94438
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1021041870117188,
      "learning_rate": 0.0003838003615955896,
      "loss": 3.2753,
      "step": 94439
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.6035385131835938,
      "learning_rate": 0.00038379643385518913,
      "loss": 2.9484,
      "step": 94440
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7800087928771973,
      "learning_rate": 0.0003837925060992095,
      "loss": 3.1419,
      "step": 94441
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5906165838241577,
      "learning_rate": 0.0003837885783276511,
      "loss": 2.8685,
      "step": 94442
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6485446691513062,
      "learning_rate": 0.0003837846505405149,
      "loss": 2.8042,
      "step": 94443
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.3538928031921387,
      "learning_rate": 0.00038378072273780153,
      "loss": 2.8878,
      "step": 94444
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.319032669067383,
      "learning_rate": 0.00038377679491951173,
      "loss": 3.2511,
      "step": 94445
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2397875785827637,
      "learning_rate": 0.00038377286708564623,
      "loss": 3.021,
      "step": 94446
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.6945059299468994,
      "learning_rate": 0.0003837689392362059,
      "loss": 3.1118,
      "step": 94447
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7281254529953003,
      "learning_rate": 0.00038376501137119115,
      "loss": 3.1057,
      "step": 94448
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9799959659576416,
      "learning_rate": 0.000383761083490603,
      "loss": 2.9553,
      "step": 94449
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9284484386444092,
      "learning_rate": 0.00038375715559444204,
      "loss": 3.1635,
      "step": 94450
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6840667724609375,
      "learning_rate": 0.0003837532276827091,
      "loss": 2.9741,
      "step": 94451
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6720384359359741,
      "learning_rate": 0.0003837492997554048,
      "loss": 2.939,
      "step": 94452
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7742375135421753,
      "learning_rate": 0.00038374537181253,
      "loss": 3.1565,
      "step": 94453
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.717781662940979,
      "learning_rate": 0.0003837414438540853,
      "loss": 2.8697,
      "step": 94454
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8573322296142578,
      "learning_rate": 0.0003837375158800715,
      "loss": 2.9083,
      "step": 94455
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.804177165031433,
      "learning_rate": 0.0003837335878904893,
      "loss": 3.0714,
      "step": 94456
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6935948133468628,
      "learning_rate": 0.0003837296598853395,
      "loss": 2.9058,
      "step": 94457
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.476450204849243,
      "learning_rate": 0.00038372573186462274,
      "loss": 3.0896,
      "step": 94458
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8753283023834229,
      "learning_rate": 0.00038372180382833973,
      "loss": 2.9978,
      "step": 94459
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7530919313430786,
      "learning_rate": 0.00038371787577649137,
      "loss": 3.1888,
      "step": 94460
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7770808935165405,
      "learning_rate": 0.0003837139477090782,
      "loss": 3.2808,
      "step": 94461
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8968390226364136,
      "learning_rate": 0.00038371001962610103,
      "loss": 3.1512,
      "step": 94462
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.08652663230896,
      "learning_rate": 0.0003837060915275606,
      "loss": 2.7636,
      "step": 94463
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7055262327194214,
      "learning_rate": 0.0003837021634134576,
      "loss": 3.0302,
      "step": 94464
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8121438026428223,
      "learning_rate": 0.0003836982352837928,
      "loss": 3.0198,
      "step": 94465
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6473947763442993,
      "learning_rate": 0.00038369430713856693,
      "loss": 3.095,
      "step": 94466
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8647476434707642,
      "learning_rate": 0.00038369037897778075,
      "loss": 2.9711,
      "step": 94467
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.748175024986267,
      "learning_rate": 0.0003836864508014348,
      "loss": 2.7651,
      "step": 94468
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1683099269866943,
      "learning_rate": 0.00038368252260953013,
      "loss": 2.881,
      "step": 94469
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2118723392486572,
      "learning_rate": 0.0003836785944020672,
      "loss": 2.7741,
      "step": 94470
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.722638726234436,
      "learning_rate": 0.0003836746661790469,
      "loss": 2.8928,
      "step": 94471
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9127848148345947,
      "learning_rate": 0.00038367073794046987,
      "loss": 3.254,
      "step": 94472
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.450646162033081,
      "learning_rate": 0.0003836668096863369,
      "loss": 3.1053,
      "step": 94473
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5508995056152344,
      "learning_rate": 0.0003836628814166487,
      "loss": 2.9232,
      "step": 94474
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.73686945438385,
      "learning_rate": 0.0003836589531314059,
      "loss": 2.918,
      "step": 94475
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.783678650856018,
      "learning_rate": 0.00038365502483060936,
      "loss": 3.0628,
      "step": 94476
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9010844230651855,
      "learning_rate": 0.0003836510965142598,
      "loss": 3.2959,
      "step": 94477
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8333401679992676,
      "learning_rate": 0.0003836471681823579,
      "loss": 2.8762,
      "step": 94478
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7301971912384033,
      "learning_rate": 0.00038364323983490446,
      "loss": 2.6394,
      "step": 94479
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.4891598224639893,
      "learning_rate": 0.0003836393114719001,
      "loss": 3.0161,
      "step": 94480
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6835873126983643,
      "learning_rate": 0.00038363538309334563,
      "loss": 2.778,
      "step": 94481
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.4353142976760864,
      "learning_rate": 0.0003836314546992418,
      "loss": 2.91,
      "step": 94482
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5235118865966797,
      "learning_rate": 0.0003836275262895892,
      "loss": 2.9847,
      "step": 94483
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6838351488113403,
      "learning_rate": 0.00038362359786438884,
      "loss": 2.9581,
      "step": 94484
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7915152311325073,
      "learning_rate": 0.0003836196694236411,
      "loss": 2.9013,
      "step": 94485
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.4901094436645508,
      "learning_rate": 0.000383615740967347,
      "loss": 3.1892,
      "step": 94486
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6425542831420898,
      "learning_rate": 0.0003836118124955072,
      "loss": 3.1397,
      "step": 94487
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.602800965309143,
      "learning_rate": 0.00038360788400812224,
      "loss": 3.0146,
      "step": 94488
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.913069725036621,
      "learning_rate": 0.0003836039555051931,
      "loss": 2.9091,
      "step": 94489
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7953178882598877,
      "learning_rate": 0.00038360002698672037,
      "loss": 2.9487,
      "step": 94490
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2905938625335693,
      "learning_rate": 0.0003835960984527048,
      "loss": 3.2263,
      "step": 94491
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6858474016189575,
      "learning_rate": 0.00038359216990314714,
      "loss": 3.0102,
      "step": 94492
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9508472681045532,
      "learning_rate": 0.00038358824133804815,
      "loss": 2.8686,
      "step": 94493
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.0579214096069336,
      "learning_rate": 0.0003835843127574085,
      "loss": 3.2492,
      "step": 94494
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0286436080932617,
      "learning_rate": 0.00038358038416122893,
      "loss": 2.9606,
      "step": 94495
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.211651086807251,
      "learning_rate": 0.0003835764555495102,
      "loss": 3.011,
      "step": 94496
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7390285730361938,
      "learning_rate": 0.00038357252692225306,
      "loss": 3.2746,
      "step": 94497
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.922801971435547,
      "learning_rate": 0.0003835685982794582,
      "loss": 2.7999,
      "step": 94498
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.2442269325256348,
      "learning_rate": 0.00038356466962112637,
      "loss": 3.1279,
      "step": 94499
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.292703866958618,
      "learning_rate": 0.0003835607409472582,
      "loss": 3.1664,
      "step": 94500
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9737541675567627,
      "learning_rate": 0.00038355681225785463,
      "loss": 2.9414,
      "step": 94501
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6655032634735107,
      "learning_rate": 0.00038355288355291627,
      "loss": 3.1189,
      "step": 94502
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.2467401027679443,
      "learning_rate": 0.0003835489548324438,
      "loss": 2.8961,
      "step": 94503
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.6347789764404297,
      "learning_rate": 0.00038354502609643795,
      "loss": 2.9419,
      "step": 94504
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7745647430419922,
      "learning_rate": 0.00038354109734489966,
      "loss": 2.8602,
      "step": 94505
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.537172555923462,
      "learning_rate": 0.00038353716857782936,
      "loss": 2.8941,
      "step": 94506
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5355045795440674,
      "learning_rate": 0.0003835332397952279,
      "loss": 3.1677,
      "step": 94507
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5511645078659058,
      "learning_rate": 0.00038352931099709615,
      "loss": 3.1843,
      "step": 94508
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8483444452285767,
      "learning_rate": 0.0003835253821834347,
      "loss": 3.2112,
      "step": 94509
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5478031635284424,
      "learning_rate": 0.00038352145335424423,
      "loss": 2.9365,
      "step": 94510
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2187132835388184,
      "learning_rate": 0.0003835175245095256,
      "loss": 3.0381,
      "step": 94511
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2473344802856445,
      "learning_rate": 0.00038351359564927956,
      "loss": 3.0016,
      "step": 94512
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7046983242034912,
      "learning_rate": 0.0003835096667735066,
      "loss": 3.1136,
      "step": 94513
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5996174812316895,
      "learning_rate": 0.0003835057378822078,
      "loss": 3.4174,
      "step": 94514
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.449946880340576,
      "learning_rate": 0.0003835018089753836,
      "loss": 3.1734,
      "step": 94515
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.121492624282837,
      "learning_rate": 0.0003834978800530348,
      "loss": 2.9241,
      "step": 94516
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.009239912033081,
      "learning_rate": 0.0003834939511151622,
      "loss": 2.8845,
      "step": 94517
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.936593770980835,
      "learning_rate": 0.00038349002216176657,
      "loss": 3.1602,
      "step": 94518
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9213645458221436,
      "learning_rate": 0.0003834860931928485,
      "loss": 2.9899,
      "step": 94519
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9437720775604248,
      "learning_rate": 0.0003834821642084088,
      "loss": 2.8629,
      "step": 94520
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7554222345352173,
      "learning_rate": 0.0003834782352084482,
      "loss": 2.9855,
      "step": 94521
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.739199161529541,
      "learning_rate": 0.0003834743061929674,
      "loss": 3.0261,
      "step": 94522
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6049057245254517,
      "learning_rate": 0.00038347037716196714,
      "loss": 2.9438,
      "step": 94523
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9751592874526978,
      "learning_rate": 0.00038346644811544824,
      "loss": 3.1013,
      "step": 94524
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7372671365737915,
      "learning_rate": 0.0003834625190534113,
      "loss": 3.037,
      "step": 94525
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.694322943687439,
      "learning_rate": 0.00038345858997585705,
      "loss": 3.1822,
      "step": 94526
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5376429557800293,
      "learning_rate": 0.00038345466088278635,
      "loss": 3.1193,
      "step": 94527
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7335871458053589,
      "learning_rate": 0.0003834507317741998,
      "loss": 3.038,
      "step": 94528
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.1219868659973145,
      "learning_rate": 0.0003834468026500982,
      "loss": 2.9458,
      "step": 94529
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6452075242996216,
      "learning_rate": 0.0003834428735104823,
      "loss": 3.2255,
      "step": 94530
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0072531700134277,
      "learning_rate": 0.0003834389443553528,
      "loss": 2.9156,
      "step": 94531
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.043809413909912,
      "learning_rate": 0.00038343501518471037,
      "loss": 2.8589,
      "step": 94532
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9940768480300903,
      "learning_rate": 0.0003834310859985559,
      "loss": 2.9581,
      "step": 94533
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.4029242992401123,
      "learning_rate": 0.0003834271567968899,
      "loss": 3.1071,
      "step": 94534
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6147404909133911,
      "learning_rate": 0.0003834232275797132,
      "loss": 3.0292,
      "step": 94535
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7976806163787842,
      "learning_rate": 0.00038341929834702663,
      "loss": 3.0289,
      "step": 94536
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.731261968612671,
      "learning_rate": 0.0003834153690988308,
      "loss": 2.9388,
      "step": 94537
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6163008213043213,
      "learning_rate": 0.0003834114398351265,
      "loss": 3.1924,
      "step": 94538
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8778358697891235,
      "learning_rate": 0.00038340751055591444,
      "loss": 2.9817,
      "step": 94539
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5289808511734009,
      "learning_rate": 0.0003834035812611953,
      "loss": 2.9912,
      "step": 94540
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.72170090675354,
      "learning_rate": 0.00038339965195096986,
      "loss": 2.9461,
      "step": 94541
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1296944618225098,
      "learning_rate": 0.0003833957226252389,
      "loss": 3.0939,
      "step": 94542
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1215453147888184,
      "learning_rate": 0.0003833917932840031,
      "loss": 2.9249,
      "step": 94543
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6977401971817017,
      "learning_rate": 0.00038338786392726323,
      "loss": 3.0553,
      "step": 94544
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2837905883789062,
      "learning_rate": 0.0003833839345550199,
      "loss": 2.7377,
      "step": 94545
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1391117572784424,
      "learning_rate": 0.0003833800051672739,
      "loss": 3.0383,
      "step": 94546
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.900841236114502,
      "learning_rate": 0.00038337607576402607,
      "loss": 3.1547,
      "step": 94547
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7486172914505005,
      "learning_rate": 0.00038337214634527703,
      "loss": 2.9546,
      "step": 94548
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.3339195251464844,
      "learning_rate": 0.0003833682169110276,
      "loss": 2.9462,
      "step": 94549
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8375682830810547,
      "learning_rate": 0.0003833642874612783,
      "loss": 3.1362,
      "step": 94550
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9146490097045898,
      "learning_rate": 0.00038336035799603013,
      "loss": 2.8758,
      "step": 94551
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7776200771331787,
      "learning_rate": 0.0003833564285152836,
      "loss": 3.3452,
      "step": 94552
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.686537265777588,
      "learning_rate": 0.0003833524990190396,
      "loss": 3.0835,
      "step": 94553
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.211665391921997,
      "learning_rate": 0.00038334856950729873,
      "loss": 2.9778,
      "step": 94554
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.88996422290802,
      "learning_rate": 0.0003833446399800618,
      "loss": 3.3569,
      "step": 94555
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5151591300964355,
      "learning_rate": 0.0003833407104373296,
      "loss": 3.1837,
      "step": 94556
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8640702962875366,
      "learning_rate": 0.00038333678087910266,
      "loss": 3.0222,
      "step": 94557
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.8835060596466064,
      "learning_rate": 0.000383332851305382,
      "loss": 3.1085,
      "step": 94558
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.490109443664551,
      "learning_rate": 0.00038332892171616807,
      "loss": 2.9636,
      "step": 94559
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.013245105743408,
      "learning_rate": 0.00038332499211146177,
      "loss": 2.9811,
      "step": 94560
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8349665403366089,
      "learning_rate": 0.00038332106249126374,
      "loss": 3.0125,
      "step": 94561
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.386676788330078,
      "learning_rate": 0.0003833171328555749,
      "loss": 3.1309,
      "step": 94562
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5801552534103394,
      "learning_rate": 0.0003833132032043957,
      "loss": 2.8901,
      "step": 94563
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7380852699279785,
      "learning_rate": 0.000383309273537727,
      "loss": 2.8246,
      "step": 94564
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6761375665664673,
      "learning_rate": 0.0003833053438555696,
      "loss": 3.0994,
      "step": 94565
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0397000312805176,
      "learning_rate": 0.00038330141415792406,
      "loss": 2.892,
      "step": 94566
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.110293388366699,
      "learning_rate": 0.00038329748444479125,
      "loss": 2.9794,
      "step": 94567
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0801079273223877,
      "learning_rate": 0.00038329355471617186,
      "loss": 3.0345,
      "step": 94568
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.029223918914795,
      "learning_rate": 0.00038328962497206676,
      "loss": 3.0393,
      "step": 94569
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8295118808746338,
      "learning_rate": 0.00038328569521247643,
      "loss": 3.1187,
      "step": 94570
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.1884989738464355,
      "learning_rate": 0.00038328176543740163,
      "loss": 2.7481,
      "step": 94571
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.422687530517578,
      "learning_rate": 0.00038327783564684334,
      "loss": 3.0168,
      "step": 94572
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8108454942703247,
      "learning_rate": 0.00038327390584080205,
      "loss": 2.8921,
      "step": 94573
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7596826553344727,
      "learning_rate": 0.0003832699760192786,
      "loss": 3.1578,
      "step": 94574
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5523234605789185,
      "learning_rate": 0.0003832660461822737,
      "loss": 3.0547,
      "step": 94575
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5771352052688599,
      "learning_rate": 0.0003832621163297881,
      "loss": 2.9642,
      "step": 94576
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9049267768859863,
      "learning_rate": 0.00038325818646182236,
      "loss": 2.7102,
      "step": 94577
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.537691593170166,
      "learning_rate": 0.0003832542565783775,
      "loss": 2.7771,
      "step": 94578
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.750380277633667,
      "learning_rate": 0.000383250326679454,
      "loss": 3.0722,
      "step": 94579
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1989665031433105,
      "learning_rate": 0.00038324639676505273,
      "loss": 2.8868,
      "step": 94580
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.097362756729126,
      "learning_rate": 0.0003832424668351744,
      "loss": 3.0284,
      "step": 94581
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.900313377380371,
      "learning_rate": 0.0003832385368898197,
      "loss": 3.3417,
      "step": 94582
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.4562013149261475,
      "learning_rate": 0.0003832346069289894,
      "loss": 3.0367,
      "step": 94583
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6411741971969604,
      "learning_rate": 0.00038323067695268415,
      "loss": 2.9654,
      "step": 94584
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.9346237182617188,
      "learning_rate": 0.0003832267469609049,
      "loss": 2.5827,
      "step": 94585
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.7408595085144043,
      "learning_rate": 0.0003832228169536521,
      "loss": 3.3883,
      "step": 94586
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.772665500640869,
      "learning_rate": 0.0003832188869309266,
      "loss": 3.1421,
      "step": 94587
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1475934982299805,
      "learning_rate": 0.00038321495689272927,
      "loss": 2.9798,
      "step": 94588
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.803766965866089,
      "learning_rate": 0.0003832110268390606,
      "loss": 2.9327,
      "step": 94589
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.776484966278076,
      "learning_rate": 0.0003832070967699214,
      "loss": 2.9914,
      "step": 94590
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8766162395477295,
      "learning_rate": 0.0003832031666853125,
      "loss": 3.0116,
      "step": 94591
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7312983274459839,
      "learning_rate": 0.0003831992365852345,
      "loss": 2.7579,
      "step": 94592
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.1591246128082275,
      "learning_rate": 0.00038319530646968824,
      "loss": 3.0146,
      "step": 94593
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1868135929107666,
      "learning_rate": 0.00038319137633867443,
      "loss": 2.9178,
      "step": 94594
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8609365224838257,
      "learning_rate": 0.00038318744619219376,
      "loss": 3.0295,
      "step": 94595
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8906452655792236,
      "learning_rate": 0.0003831835160302469,
      "loss": 3.1815,
      "step": 94596
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.487152099609375,
      "learning_rate": 0.00038317958585283476,
      "loss": 3.0351,
      "step": 94597
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8083100318908691,
      "learning_rate": 0.0003831756556599579,
      "loss": 3.1359,
      "step": 94598
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0937232971191406,
      "learning_rate": 0.0003831717254516171,
      "loss": 2.9468,
      "step": 94599
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.8996846675872803,
      "learning_rate": 0.00038316779522781314,
      "loss": 2.9438,
      "step": 94600
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2652828693389893,
      "learning_rate": 0.00038316386498854667,
      "loss": 2.7737,
      "step": 94601
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7087430953979492,
      "learning_rate": 0.0003831599347338185,
      "loss": 2.9432,
      "step": 94602
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.940778136253357,
      "learning_rate": 0.0003831560044636294,
      "loss": 3.0606,
      "step": 94603
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1378276348114014,
      "learning_rate": 0.0003831520741779799,
      "loss": 3.3779,
      "step": 94604
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5408709049224854,
      "learning_rate": 0.00038314814387687096,
      "loss": 2.8968,
      "step": 94605
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.553126573562622,
      "learning_rate": 0.0003831442135603032,
      "loss": 3.0688,
      "step": 94606
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.1227052211761475,
      "learning_rate": 0.0003831402832282773,
      "loss": 2.9824,
      "step": 94607
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.188755989074707,
      "learning_rate": 0.00038313635288079404,
      "loss": 3.0293,
      "step": 94608
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1335883140563965,
      "learning_rate": 0.00038313242251785426,
      "loss": 3.1392,
      "step": 94609
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.895667552947998,
      "learning_rate": 0.0003831284921394586,
      "loss": 2.9868,
      "step": 94610
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0633866786956787,
      "learning_rate": 0.00038312456174560764,
      "loss": 3.0206,
      "step": 94611
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6694376468658447,
      "learning_rate": 0.0003831206313363024,
      "loss": 3.0468,
      "step": 94612
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.7868101596832275,
      "learning_rate": 0.0003831167009115434,
      "loss": 3.1227,
      "step": 94613
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.803027391433716,
      "learning_rate": 0.00038311277047133133,
      "loss": 2.9477,
      "step": 94614
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.968279242515564,
      "learning_rate": 0.0003831088400156672,
      "loss": 2.7579,
      "step": 94615
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2473037242889404,
      "learning_rate": 0.0003831049095445515,
      "loss": 3.0015,
      "step": 94616
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1251909732818604,
      "learning_rate": 0.000383100979057985,
      "loss": 3.2774,
      "step": 94617
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.345723867416382,
      "learning_rate": 0.00038309704855596854,
      "loss": 3.091,
      "step": 94618
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8010258674621582,
      "learning_rate": 0.00038309311803850273,
      "loss": 2.8399,
      "step": 94619
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.900861144065857,
      "learning_rate": 0.00038308918750558834,
      "loss": 3.097,
      "step": 94620
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.3337063789367676,
      "learning_rate": 0.00038308525695722614,
      "loss": 2.8462,
      "step": 94621
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.4753692150115967,
      "learning_rate": 0.00038308132639341676,
      "loss": 3.0912,
      "step": 94622
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.4102389812469482,
      "learning_rate": 0.00038307739581416097,
      "loss": 3.1027,
      "step": 94623
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.692472219467163,
      "learning_rate": 0.00038307346521945963,
      "loss": 3.1726,
      "step": 94624
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7760127782821655,
      "learning_rate": 0.0003830695346093133,
      "loss": 3.0602,
      "step": 94625
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8162012100219727,
      "learning_rate": 0.00038306560398372277,
      "loss": 2.9892,
      "step": 94626
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.151020050048828,
      "learning_rate": 0.0003830616733426888,
      "loss": 2.8144,
      "step": 94627
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.150043487548828,
      "learning_rate": 0.0003830577426862121,
      "loss": 3.0955,
      "step": 94628
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9023605585098267,
      "learning_rate": 0.0003830538120142933,
      "loss": 2.9862,
      "step": 94629
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9099032878875732,
      "learning_rate": 0.00038304988132693336,
      "loss": 3.0227,
      "step": 94630
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6814454793930054,
      "learning_rate": 0.0003830459506241328,
      "loss": 3.2593,
      "step": 94631
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.588486671447754,
      "learning_rate": 0.0003830420199058924,
      "loss": 3.1866,
      "step": 94632
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9908947944641113,
      "learning_rate": 0.00038303808917221306,
      "loss": 2.8158,
      "step": 94633
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.4298551082611084,
      "learning_rate": 0.00038303415842309526,
      "loss": 3.1277,
      "step": 94634
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.784268856048584,
      "learning_rate": 0.00038303022765853983,
      "loss": 3.1831,
      "step": 94635
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.623515248298645,
      "learning_rate": 0.0003830262968785477,
      "loss": 2.984,
      "step": 94636
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7002109289169312,
      "learning_rate": 0.0003830223660831192,
      "loss": 2.8444,
      "step": 94637
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.5787465572357178,
      "learning_rate": 0.0003830184352722553,
      "loss": 3.1884,
      "step": 94638
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5197380781173706,
      "learning_rate": 0.0003830145044459568,
      "loss": 2.9952,
      "step": 94639
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9896458387374878,
      "learning_rate": 0.00038301057360422427,
      "loss": 3.1332,
      "step": 94640
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6646705865859985,
      "learning_rate": 0.0003830066427470585,
      "loss": 2.9284,
      "step": 94641
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.076603412628174,
      "learning_rate": 0.0003830027118744602,
      "loss": 3.045,
      "step": 94642
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.651008129119873,
      "learning_rate": 0.00038299878098643027,
      "loss": 3.1422,
      "step": 94643
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6745918989181519,
      "learning_rate": 0.0003829948500829692,
      "loss": 3.0385,
      "step": 94644
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6421259641647339,
      "learning_rate": 0.0003829909191640778,
      "loss": 2.848,
      "step": 94645
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7514554262161255,
      "learning_rate": 0.0003829869882297569,
      "loss": 3.0908,
      "step": 94646
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1215660572052,
      "learning_rate": 0.00038298305728000706,
      "loss": 2.8676,
      "step": 94647
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.755657434463501,
      "learning_rate": 0.00038297912631482913,
      "loss": 2.9425,
      "step": 94648
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9555953741073608,
      "learning_rate": 0.00038297519533422386,
      "loss": 2.9827,
      "step": 94649
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.721234917640686,
      "learning_rate": 0.0003829712643381919,
      "loss": 2.9048,
      "step": 94650
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.8601877689361572,
      "learning_rate": 0.000382967333326734,
      "loss": 2.852,
      "step": 94651
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.4817689657211304,
      "learning_rate": 0.00038296340229985093,
      "loss": 3.1442,
      "step": 94652
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1959726810455322,
      "learning_rate": 0.0003829594712575434,
      "loss": 3.0346,
      "step": 94653
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.542597770690918,
      "learning_rate": 0.00038295554019981216,
      "loss": 2.9363,
      "step": 94654
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5460511445999146,
      "learning_rate": 0.0003829516091266579,
      "loss": 2.9404,
      "step": 94655
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.720099687576294,
      "learning_rate": 0.00038294767803808127,
      "loss": 2.9203,
      "step": 94656
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9589251279830933,
      "learning_rate": 0.0003829437469340833,
      "loss": 3.1505,
      "step": 94657
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9624452590942383,
      "learning_rate": 0.00038293981581466436,
      "loss": 3.1977,
      "step": 94658
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7692933082580566,
      "learning_rate": 0.0003829358846798254,
      "loss": 2.9471,
      "step": 94659
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.940513253211975,
      "learning_rate": 0.0003829319535295671,
      "loss": 3.1105,
      "step": 94660
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6758506298065186,
      "learning_rate": 0.00038292802236389014,
      "loss": 2.95,
      "step": 94661
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6352630853652954,
      "learning_rate": 0.00038292409118279535,
      "loss": 3.3088,
      "step": 94662
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.4299187660217285,
      "learning_rate": 0.00038292015998628336,
      "loss": 2.9254,
      "step": 94663
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2210443019866943,
      "learning_rate": 0.00038291622877435506,
      "loss": 3.0275,
      "step": 94664
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.3599653244018555,
      "learning_rate": 0.0003829122975470109,
      "loss": 3.1086,
      "step": 94665
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.145573377609253,
      "learning_rate": 0.0003829083663042519,
      "loss": 2.84,
      "step": 94666
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8744282722473145,
      "learning_rate": 0.0003829044350460787,
      "loss": 3.0613,
      "step": 94667
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0135862827301025,
      "learning_rate": 0.00038290050377249186,
      "loss": 2.8746,
      "step": 94668
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.175654411315918,
      "learning_rate": 0.00038289657248349233,
      "loss": 3.0801,
      "step": 94669
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.651117205619812,
      "learning_rate": 0.0003828926411790808,
      "loss": 2.8398,
      "step": 94670
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2341911792755127,
      "learning_rate": 0.00038288870985925787,
      "loss": 2.9213,
      "step": 94671
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.283876657485962,
      "learning_rate": 0.00038288477852402444,
      "loss": 3.1943,
      "step": 94672
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.4762320518493652,
      "learning_rate": 0.00038288084717338114,
      "loss": 2.7595,
      "step": 94673
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7400020360946655,
      "learning_rate": 0.0003828769158073287,
      "loss": 3.0641,
      "step": 94674
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7116620540618896,
      "learning_rate": 0.00038287298442586795,
      "loss": 3.1055,
      "step": 94675
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.8362293243408203,
      "learning_rate": 0.0003828690530289996,
      "loss": 3.2666,
      "step": 94676
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6352174282073975,
      "learning_rate": 0.0003828651216167241,
      "loss": 2.8142,
      "step": 94677
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0736870765686035,
      "learning_rate": 0.00038286119018904254,
      "loss": 3.0322,
      "step": 94678
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.422023892402649,
      "learning_rate": 0.00038285725874595555,
      "loss": 3.051,
      "step": 94679
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.703987717628479,
      "learning_rate": 0.0003828533272874638,
      "loss": 3.0421,
      "step": 94680
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.0685832500457764,
      "learning_rate": 0.000382849395813568,
      "loss": 2.8408,
      "step": 94681
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.4267170429229736,
      "learning_rate": 0.000382845464324269,
      "loss": 2.8419,
      "step": 94682
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7786535024642944,
      "learning_rate": 0.0003828415328195675,
      "loss": 2.9388,
      "step": 94683
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9286576509475708,
      "learning_rate": 0.0003828376012994641,
      "loss": 3.2136,
      "step": 94684
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.21580171585083,
      "learning_rate": 0.00038283366976395975,
      "loss": 3.1969,
      "step": 94685
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.472686290740967,
      "learning_rate": 0.0003828297382130549,
      "loss": 2.7793,
      "step": 94686
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7233572006225586,
      "learning_rate": 0.00038282580664675053,
      "loss": 3.1038,
      "step": 94687
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7375332117080688,
      "learning_rate": 0.0003828218750650473,
      "loss": 2.8182,
      "step": 94688
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.933376431465149,
      "learning_rate": 0.00038281794346794594,
      "loss": 2.7258,
      "step": 94689
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2758266925811768,
      "learning_rate": 0.000382814011855447,
      "loss": 2.8814,
      "step": 94690
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7664775848388672,
      "learning_rate": 0.0003828100802275516,
      "loss": 2.9709,
      "step": 94691
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0617871284484863,
      "learning_rate": 0.00038280614858426004,
      "loss": 3.0062,
      "step": 94692
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9715851545333862,
      "learning_rate": 0.0003828022169255734,
      "loss": 3.0336,
      "step": 94693
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7143936157226562,
      "learning_rate": 0.0003827982852514922,
      "loss": 2.8102,
      "step": 94694
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1776535511016846,
      "learning_rate": 0.0003827943535620172,
      "loss": 2.7494,
      "step": 94695
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7376569509506226,
      "learning_rate": 0.0003827904218571492,
      "loss": 3.0866,
      "step": 94696
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9328233003616333,
      "learning_rate": 0.00038278649013688895,
      "loss": 3.0875,
      "step": 94697
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8261066675186157,
      "learning_rate": 0.00038278255840123706,
      "loss": 3.0212,
      "step": 94698
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6732850074768066,
      "learning_rate": 0.0003827786266501943,
      "loss": 3.1462,
      "step": 94699
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7873221635818481,
      "learning_rate": 0.00038277469488376154,
      "loss": 2.8651,
      "step": 94700
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0528905391693115,
      "learning_rate": 0.00038277076310193933,
      "loss": 3.0142,
      "step": 94701
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.074174404144287,
      "learning_rate": 0.0003827668313047285,
      "loss": 3.0923,
      "step": 94702
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.865443229675293,
      "learning_rate": 0.0003827628994921298,
      "loss": 2.9052,
      "step": 94703
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.131845474243164,
      "learning_rate": 0.00038275896766414377,
      "loss": 3.1105,
      "step": 94704
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5501923561096191,
      "learning_rate": 0.00038275503582077135,
      "loss": 2.9562,
      "step": 94705
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8181499242782593,
      "learning_rate": 0.00038275110396201323,
      "loss": 2.9739,
      "step": 94706
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7021327018737793,
      "learning_rate": 0.00038274717208787017,
      "loss": 3.0158,
      "step": 94707
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6445448398590088,
      "learning_rate": 0.0003827432401983428,
      "loss": 2.8563,
      "step": 94708
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.009518623352051,
      "learning_rate": 0.0003827393082934319,
      "loss": 2.9197,
      "step": 94709
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.87196946144104,
      "learning_rate": 0.00038273537637313815,
      "loss": 3.2462,
      "step": 94710
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6057151556015015,
      "learning_rate": 0.00038273144443746234,
      "loss": 2.9612,
      "step": 94711
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.3151345252990723,
      "learning_rate": 0.0003827275124864053,
      "loss": 3.1095,
      "step": 94712
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.4632641077041626,
      "learning_rate": 0.00038272358051996756,
      "loss": 2.9387,
      "step": 94713
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6827346086502075,
      "learning_rate": 0.00038271964853814995,
      "loss": 3.0821,
      "step": 94714
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.073899269104004,
      "learning_rate": 0.00038271571654095325,
      "loss": 2.868,
      "step": 94715
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.781738758087158,
      "learning_rate": 0.0003827117845283781,
      "loss": 3.2359,
      "step": 94716
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8941500186920166,
      "learning_rate": 0.0003827078525004252,
      "loss": 2.9187,
      "step": 94717
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9370296001434326,
      "learning_rate": 0.0003827039204570954,
      "loss": 2.9895,
      "step": 94718
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.7793068885803223,
      "learning_rate": 0.00038269998839838946,
      "loss": 3.2248,
      "step": 94719
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.354083299636841,
      "learning_rate": 0.000382696056324308,
      "loss": 3.0066,
      "step": 94720
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7256348133087158,
      "learning_rate": 0.0003826921242348517,
      "loss": 3.3304,
      "step": 94721
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.63773250579834,
      "learning_rate": 0.0003826881921300215,
      "loss": 3.1063,
      "step": 94722
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.565148115158081,
      "learning_rate": 0.00038268426000981787,
      "loss": 3.0026,
      "step": 94723
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.926121473312378,
      "learning_rate": 0.0003826803278742417,
      "loss": 2.863,
      "step": 94724
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.4413692951202393,
      "learning_rate": 0.00038267639572329374,
      "loss": 3.1519,
      "step": 94725
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.087369441986084,
      "learning_rate": 0.0003826724635569747,
      "loss": 3.1176,
      "step": 94726
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8911523818969727,
      "learning_rate": 0.0003826685313752852,
      "loss": 2.9347,
      "step": 94727
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8279553651809692,
      "learning_rate": 0.0003826645991782262,
      "loss": 2.9816,
      "step": 94728
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7521235942840576,
      "learning_rate": 0.0003826606669657982,
      "loss": 3.1222,
      "step": 94729
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6291098594665527,
      "learning_rate": 0.000382656734738002,
      "loss": 3.0028,
      "step": 94730
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5788286924362183,
      "learning_rate": 0.00038265280249483836,
      "loss": 2.8396,
      "step": 94731
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.840512990951538,
      "learning_rate": 0.00038264887023630806,
      "loss": 2.8362,
      "step": 94732
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.378406047821045,
      "learning_rate": 0.0003826449379624117,
      "loss": 3.0459,
      "step": 94733
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.048691749572754,
      "learning_rate": 0.0003826410056731501,
      "loss": 3.0901,
      "step": 94734
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8402957916259766,
      "learning_rate": 0.0003826370733685241,
      "loss": 3.1038,
      "step": 94735
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.121641159057617,
      "learning_rate": 0.00038263314104853413,
      "loss": 3.1708,
      "step": 94736
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2260732650756836,
      "learning_rate": 0.0003826292087131812,
      "loss": 3.0677,
      "step": 94737
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.4579029083251953,
      "learning_rate": 0.00038262527636246594,
      "loss": 3.0056,
      "step": 94738
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.4899053573608398,
      "learning_rate": 0.000382621343996389,
      "loss": 2.8769,
      "step": 94739
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.198878765106201,
      "learning_rate": 0.00038261741161495124,
      "loss": 2.9691,
      "step": 94740
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.705934524536133,
      "learning_rate": 0.00038261347921815335,
      "loss": 3.01,
      "step": 94741
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7772630453109741,
      "learning_rate": 0.00038260954680599616,
      "loss": 3.1108,
      "step": 94742
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.061532497406006,
      "learning_rate": 0.0003826056143784801,
      "loss": 2.719,
      "step": 94743
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.062598705291748,
      "learning_rate": 0.0003826016819356062,
      "loss": 3.0366,
      "step": 94744
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0957119464874268,
      "learning_rate": 0.00038259774947737516,
      "loss": 2.7826,
      "step": 94745
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7420848608016968,
      "learning_rate": 0.0003825938170037875,
      "loss": 3.1467,
      "step": 94746
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.776943564414978,
      "learning_rate": 0.00038258988451484415,
      "loss": 2.9353,
      "step": 94747
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1838157176971436,
      "learning_rate": 0.00038258595201054576,
      "loss": 2.953,
      "step": 94748
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9082380533218384,
      "learning_rate": 0.0003825820194908932,
      "loss": 3.1124,
      "step": 94749
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9472233057022095,
      "learning_rate": 0.00038257808695588694,
      "loss": 2.9356,
      "step": 94750
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7095763683319092,
      "learning_rate": 0.00038257415440552786,
      "loss": 3.1248,
      "step": 94751
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1643967628479004,
      "learning_rate": 0.0003825702218398168,
      "loss": 3.0658,
      "step": 94752
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2701797485351562,
      "learning_rate": 0.00038256628925875425,
      "loss": 2.9744,
      "step": 94753
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8427345752716064,
      "learning_rate": 0.00038256235666234113,
      "loss": 3.0535,
      "step": 94754
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9037196636199951,
      "learning_rate": 0.00038255842405057815,
      "loss": 2.8475,
      "step": 94755
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.971030831336975,
      "learning_rate": 0.00038255449142346595,
      "loss": 2.9267,
      "step": 94756
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.047153949737549,
      "learning_rate": 0.00038255055878100525,
      "loss": 2.7647,
      "step": 94757
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6771857738494873,
      "learning_rate": 0.000382546626123197,
      "loss": 3.0459,
      "step": 94758
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6006088256835938,
      "learning_rate": 0.0003825426934500417,
      "loss": 3.1354,
      "step": 94759
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5757296085357666,
      "learning_rate": 0.0003825387607615401,
      "loss": 2.7856,
      "step": 94760
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1296567916870117,
      "learning_rate": 0.000382534828057693,
      "loss": 2.8247,
      "step": 94761
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.677140712738037,
      "learning_rate": 0.0003825308953385012,
      "loss": 2.8399,
      "step": 94762
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.3046746253967285,
      "learning_rate": 0.00038252696260396524,
      "loss": 2.8666,
      "step": 94763
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6118981838226318,
      "learning_rate": 0.0003825230298540861,
      "loss": 3.2028,
      "step": 94764
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5167086124420166,
      "learning_rate": 0.00038251909708886424,
      "loss": 3.1025,
      "step": 94765
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5393544435501099,
      "learning_rate": 0.0003825151643083005,
      "loss": 3.0712,
      "step": 94766
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.649390697479248,
      "learning_rate": 0.0003825112315123958,
      "loss": 3.1899,
      "step": 94767
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.962087869644165,
      "learning_rate": 0.00038250729870115054,
      "loss": 3.126,
      "step": 94768
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6423529386520386,
      "learning_rate": 0.0003825033658745657,
      "loss": 3.2922,
      "step": 94769
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6444934606552124,
      "learning_rate": 0.00038249943303264193,
      "loss": 2.8583,
      "step": 94770
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9811458587646484,
      "learning_rate": 0.0003824955001753799,
      "loss": 3.1355,
      "step": 94771
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0668182373046875,
      "learning_rate": 0.00038249156730278043,
      "loss": 3.1628,
      "step": 94772
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.4865809679031372,
      "learning_rate": 0.00038248763441484427,
      "loss": 3.2606,
      "step": 94773
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7469714879989624,
      "learning_rate": 0.000382483701511572,
      "loss": 3.0253,
      "step": 94774
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.183584690093994,
      "learning_rate": 0.0003824797685929645,
      "loss": 2.99,
      "step": 94775
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7487075328826904,
      "learning_rate": 0.00038247583565902257,
      "loss": 2.9131,
      "step": 94776
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2080657482147217,
      "learning_rate": 0.00038247190270974664,
      "loss": 3.1406,
      "step": 94777
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6195368766784668,
      "learning_rate": 0.0003824679697451377,
      "loss": 2.876,
      "step": 94778
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6485893726348877,
      "learning_rate": 0.00038246403676519647,
      "loss": 3.1519,
      "step": 94779
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.4532849788665771,
      "learning_rate": 0.00038246010376992353,
      "loss": 3.1038,
      "step": 94780
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.4858057498931885,
      "learning_rate": 0.00038245617075931965,
      "loss": 2.9371,
      "step": 94781
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.064423084259033,
      "learning_rate": 0.0003824522377333858,
      "loss": 2.7376,
      "step": 94782
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7783524990081787,
      "learning_rate": 0.0003824483046921224,
      "loss": 2.9965,
      "step": 94783
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6703494787216187,
      "learning_rate": 0.00038244437163553025,
      "loss": 3.0868,
      "step": 94784
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.240236759185791,
      "learning_rate": 0.0003824404385636102,
      "loss": 3.0019,
      "step": 94785
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.342648983001709,
      "learning_rate": 0.00038243650547636296,
      "loss": 2.9583,
      "step": 94786
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.784074068069458,
      "learning_rate": 0.0003824325723737892,
      "loss": 2.964,
      "step": 94787
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0916435718536377,
      "learning_rate": 0.00038242863925588963,
      "loss": 3.3536,
      "step": 94788
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.992363691329956,
      "learning_rate": 0.00038242470612266503,
      "loss": 2.8634,
      "step": 94789
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.9229280948638916,
      "learning_rate": 0.00038242077297411615,
      "loss": 2.9618,
      "step": 94790
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.786707878112793,
      "learning_rate": 0.00038241683981024365,
      "loss": 3.2198,
      "step": 94791
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.1582376956939697,
      "learning_rate": 0.00038241290663104834,
      "loss": 3.0463,
      "step": 94792
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.685762882232666,
      "learning_rate": 0.00038240897343653086,
      "loss": 2.961,
      "step": 94793
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.4611732959747314,
      "learning_rate": 0.00038240504022669204,
      "loss": 2.9459,
      "step": 94794
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.066438913345337,
      "learning_rate": 0.00038240110700153257,
      "loss": 2.9371,
      "step": 94795
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.88463294506073,
      "learning_rate": 0.0003823971737610532,
      "loss": 2.7813,
      "step": 94796
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0780889987945557,
      "learning_rate": 0.00038239324050525457,
      "loss": 2.8636,
      "step": 94797
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.044997453689575,
      "learning_rate": 0.0003823893072341376,
      "loss": 3.1267,
      "step": 94798
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.4505890607833862,
      "learning_rate": 0.0003823853739477028,
      "loss": 3.4507,
      "step": 94799
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.635772705078125,
      "learning_rate": 0.00038238144064595104,
      "loss": 3.0643,
      "step": 94800
    },
    {
      "epoch": 1.23,
      "grad_norm": 3.215130567550659,
      "learning_rate": 0.000382377507328883,
      "loss": 2.8468,
      "step": 94801
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.7376279830932617,
      "learning_rate": 0.00038237357399649945,
      "loss": 2.8789,
      "step": 94802
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8579970598220825,
      "learning_rate": 0.000382369640648801,
      "loss": 3.0533,
      "step": 94803
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7064868211746216,
      "learning_rate": 0.0003823657072857886,
      "loss": 3.1024,
      "step": 94804
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.238697052001953,
      "learning_rate": 0.0003823617739074628,
      "loss": 2.8947,
      "step": 94805
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0128276348114014,
      "learning_rate": 0.0003823578405138244,
      "loss": 3.1226,
      "step": 94806
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9725546836853027,
      "learning_rate": 0.00038235390710487417,
      "loss": 2.9265,
      "step": 94807
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8916131258010864,
      "learning_rate": 0.00038234997368061273,
      "loss": 2.8576,
      "step": 94808
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2436673641204834,
      "learning_rate": 0.0003823460402410409,
      "loss": 2.904,
      "step": 94809
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.088346004486084,
      "learning_rate": 0.00038234210678615943,
      "loss": 3.1326,
      "step": 94810
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.623467206954956,
      "learning_rate": 0.00038233817331596897,
      "loss": 3.1665,
      "step": 94811
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6500649452209473,
      "learning_rate": 0.0003823342398304703,
      "loss": 3.1715,
      "step": 94812
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8094732761383057,
      "learning_rate": 0.0003823303063296641,
      "loss": 3.1694,
      "step": 94813
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6610214710235596,
      "learning_rate": 0.0003823263728135512,
      "loss": 3.2306,
      "step": 94814
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7953224182128906,
      "learning_rate": 0.0003823224392821322,
      "loss": 3.0834,
      "step": 94815
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8979243040084839,
      "learning_rate": 0.00038231850573540796,
      "loss": 3.22,
      "step": 94816
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.925945520401001,
      "learning_rate": 0.0003823145721733791,
      "loss": 2.6047,
      "step": 94817
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.058967351913452,
      "learning_rate": 0.00038231063859604645,
      "loss": 3.149,
      "step": 94818
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8529568910598755,
      "learning_rate": 0.0003823067050034108,
      "loss": 2.8822,
      "step": 94819
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9353173971176147,
      "learning_rate": 0.0003823027713954726,
      "loss": 3.0813,
      "step": 94820
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8464587926864624,
      "learning_rate": 0.00038229883777223287,
      "loss": 2.9103,
      "step": 94821
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7016339302062988,
      "learning_rate": 0.00038229490413369216,
      "loss": 2.9553,
      "step": 94822
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8277978897094727,
      "learning_rate": 0.0003822909704798513,
      "loss": 2.9557,
      "step": 94823
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9328279495239258,
      "learning_rate": 0.00038228703681071104,
      "loss": 3.1025,
      "step": 94824
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.982069730758667,
      "learning_rate": 0.00038228310312627203,
      "loss": 2.8107,
      "step": 94825
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9781677722930908,
      "learning_rate": 0.0003822791694265351,
      "loss": 2.898,
      "step": 94826
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7062867879867554,
      "learning_rate": 0.0003822752357115008,
      "loss": 2.8696,
      "step": 94827
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.086622476577759,
      "learning_rate": 0.00038227130198117005,
      "loss": 2.9695,
      "step": 94828
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0228328704833984,
      "learning_rate": 0.00038226736823554345,
      "loss": 3.0058,
      "step": 94829
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.044400691986084,
      "learning_rate": 0.00038226343447462195,
      "loss": 3.193,
      "step": 94830
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0611627101898193,
      "learning_rate": 0.00038225950069840594,
      "loss": 2.8898,
      "step": 94831
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.8524998426437378,
      "learning_rate": 0.00038225556690689645,
      "loss": 2.6528,
      "step": 94832
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.699996829032898,
      "learning_rate": 0.00038225163310009406,
      "loss": 3.0752,
      "step": 94833
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.0120849609375,
      "learning_rate": 0.0003822476992779996,
      "loss": 2.8988,
      "step": 94834
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6731866598129272,
      "learning_rate": 0.0003822437654406136,
      "loss": 3.0506,
      "step": 94835
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7107632160186768,
      "learning_rate": 0.000382239831587937,
      "loss": 2.9801,
      "step": 94836
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.9518094062805176,
      "learning_rate": 0.00038223589771997055,
      "loss": 2.954,
      "step": 94837
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.2913856506347656,
      "learning_rate": 0.00038223196383671483,
      "loss": 2.7457,
      "step": 94838
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.637630581855774,
      "learning_rate": 0.0003822280299381706,
      "loss": 2.8064,
      "step": 94839
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6131011247634888,
      "learning_rate": 0.0003822240960243387,
      "loss": 3.1234,
      "step": 94840
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6796903610229492,
      "learning_rate": 0.00038222016209521974,
      "loss": 2.802,
      "step": 94841
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7100187540054321,
      "learning_rate": 0.00038221622815081443,
      "loss": 2.9337,
      "step": 94842
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7365914583206177,
      "learning_rate": 0.00038221229419112377,
      "loss": 2.9758,
      "step": 94843
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.6321630477905273,
      "learning_rate": 0.0003822083602161481,
      "loss": 2.8697,
      "step": 94844
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5758663415908813,
      "learning_rate": 0.00038220442622588843,
      "loss": 3.0591,
      "step": 94845
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.7942116260528564,
      "learning_rate": 0.0003822004922203454,
      "loss": 2.9545,
      "step": 94846
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.5555800199508667,
      "learning_rate": 0.00038219655819951977,
      "loss": 3.1559,
      "step": 94847
    },
    {
      "epoch": 1.23,
      "grad_norm": 1.897980809211731,
      "learning_rate": 0.00038219262416341215,
      "loss": 3.2352,
      "step": 94848
    },
    {
      "epoch": 1.23,
      "grad_norm": 2.078848361968994,
      "learning_rate": 0.0003821886901120235,
      "loss": 2.9046,
      "step": 94849
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.3602194786071777,
      "learning_rate": 0.00038218475604535435,
      "loss": 2.9985,
      "step": 94850
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.289057970046997,
      "learning_rate": 0.00038218082196340546,
      "loss": 3.2279,
      "step": 94851
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.169842004776001,
      "learning_rate": 0.0003821768878661777,
      "loss": 3.2228,
      "step": 94852
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1757233142852783,
      "learning_rate": 0.00038217295375367165,
      "loss": 2.9181,
      "step": 94853
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6829919815063477,
      "learning_rate": 0.00038216901962588807,
      "loss": 3.1079,
      "step": 94854
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.157928228378296,
      "learning_rate": 0.00038216508548282775,
      "loss": 3.1056,
      "step": 94855
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.7211151123046875,
      "learning_rate": 0.00038216115132449146,
      "loss": 2.7025,
      "step": 94856
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.014200210571289,
      "learning_rate": 0.0003821572171508798,
      "loss": 2.9028,
      "step": 94857
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.742509603500366,
      "learning_rate": 0.0003821532829619935,
      "loss": 2.5975,
      "step": 94858
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.682835578918457,
      "learning_rate": 0.00038214934875783346,
      "loss": 3.2162,
      "step": 94859
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9461910724639893,
      "learning_rate": 0.0003821454145384002,
      "loss": 2.9253,
      "step": 94860
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6006052494049072,
      "learning_rate": 0.0003821414803036946,
      "loss": 3.0536,
      "step": 94861
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9019615650177002,
      "learning_rate": 0.0003821375460537174,
      "loss": 3.0158,
      "step": 94862
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7033644914627075,
      "learning_rate": 0.0003821336117884692,
      "loss": 3.0804,
      "step": 94863
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.29829740524292,
      "learning_rate": 0.0003821296775079508,
      "loss": 2.9719,
      "step": 94864
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9742976427078247,
      "learning_rate": 0.00038212574321216307,
      "loss": 3.0742,
      "step": 94865
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6941852569580078,
      "learning_rate": 0.00038212180890110655,
      "loss": 3.0124,
      "step": 94866
    },
    {
      "epoch": 1.24,
      "grad_norm": 4.083834171295166,
      "learning_rate": 0.000382117874574782,
      "loss": 2.8964,
      "step": 94867
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.050804615020752,
      "learning_rate": 0.00038211394023319025,
      "loss": 3.0131,
      "step": 94868
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7106859683990479,
      "learning_rate": 0.0003821100058763319,
      "loss": 3.1084,
      "step": 94869
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.7066195011138916,
      "learning_rate": 0.0003821060715042078,
      "loss": 2.9242,
      "step": 94870
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2087833881378174,
      "learning_rate": 0.00038210213711681865,
      "loss": 2.8048,
      "step": 94871
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1040842533111572,
      "learning_rate": 0.0003820982027141651,
      "loss": 2.8247,
      "step": 94872
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7475107908248901,
      "learning_rate": 0.000382094268296248,
      "loss": 3.1838,
      "step": 94873
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6999894380569458,
      "learning_rate": 0.00038209033386306797,
      "loss": 3.2533,
      "step": 94874
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9883686304092407,
      "learning_rate": 0.0003820863994146258,
      "loss": 3.2738,
      "step": 94875
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.467134952545166,
      "learning_rate": 0.0003820824649509222,
      "loss": 2.916,
      "step": 94876
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.3141889572143555,
      "learning_rate": 0.000382078530471958,
      "loss": 2.9526,
      "step": 94877
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8284095525741577,
      "learning_rate": 0.0003820745959777338,
      "loss": 2.9661,
      "step": 94878
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7823312282562256,
      "learning_rate": 0.0003820706614682504,
      "loss": 3.1057,
      "step": 94879
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.713189721107483,
      "learning_rate": 0.00038206672694350856,
      "loss": 3.2994,
      "step": 94880
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2054944038391113,
      "learning_rate": 0.0003820627924035089,
      "loss": 2.8442,
      "step": 94881
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.937961459159851,
      "learning_rate": 0.0003820588578482522,
      "loss": 2.8203,
      "step": 94882
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5208103656768799,
      "learning_rate": 0.00038205492327773925,
      "loss": 3.2677,
      "step": 94883
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9171558618545532,
      "learning_rate": 0.00038205098869197073,
      "loss": 3.0213,
      "step": 94884
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8881407976150513,
      "learning_rate": 0.0003820470540909474,
      "loss": 2.9101,
      "step": 94885
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.956192970275879,
      "learning_rate": 0.00038204311947466995,
      "loss": 3.0144,
      "step": 94886
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.7917566299438477,
      "learning_rate": 0.0003820391848431392,
      "loss": 2.8021,
      "step": 94887
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1886653900146484,
      "learning_rate": 0.00038203525019635574,
      "loss": 2.8583,
      "step": 94888
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9901418685913086,
      "learning_rate": 0.0003820313155343204,
      "loss": 2.9986,
      "step": 94889
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.685006022453308,
      "learning_rate": 0.0003820273808570339,
      "loss": 3.0706,
      "step": 94890
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.5307161808013916,
      "learning_rate": 0.0003820234461644969,
      "loss": 2.9557,
      "step": 94891
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.7023062705993652,
      "learning_rate": 0.0003820195114567103,
      "loss": 2.9793,
      "step": 94892
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.3780813217163086,
      "learning_rate": 0.00038201557673367463,
      "loss": 2.9637,
      "step": 94893
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.851479172706604,
      "learning_rate": 0.0003820116419953908,
      "loss": 3.0621,
      "step": 94894
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.687481164932251,
      "learning_rate": 0.0003820077072418594,
      "loss": 2.8368,
      "step": 94895
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.4035401344299316,
      "learning_rate": 0.0003820037724730812,
      "loss": 2.821,
      "step": 94896
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.112534999847412,
      "learning_rate": 0.00038199983768905695,
      "loss": 3.0642,
      "step": 94897
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7393343448638916,
      "learning_rate": 0.0003819959028897875,
      "loss": 3.0216,
      "step": 94898
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1044371128082275,
      "learning_rate": 0.00038199196807527336,
      "loss": 3.1933,
      "step": 94899
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.785322666168213,
      "learning_rate": 0.0003819880332455154,
      "loss": 3.0674,
      "step": 94900
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.8980798721313477,
      "learning_rate": 0.0003819840984005143,
      "loss": 2.9803,
      "step": 94901
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8462308645248413,
      "learning_rate": 0.0003819801635402708,
      "loss": 3.0982,
      "step": 94902
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7997990846633911,
      "learning_rate": 0.00038197622866478564,
      "loss": 2.9853,
      "step": 94903
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.8889577388763428,
      "learning_rate": 0.0003819722937740596,
      "loss": 3.0636,
      "step": 94904
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.4235336780548096,
      "learning_rate": 0.0003819683588680933,
      "loss": 2.9256,
      "step": 94905
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.048279285430908,
      "learning_rate": 0.00038196442394688757,
      "loss": 3.1117,
      "step": 94906
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9280314445495605,
      "learning_rate": 0.0003819604890104431,
      "loss": 2.9997,
      "step": 94907
    },
    {
      "epoch": 1.24,
      "grad_norm": 4.237581253051758,
      "learning_rate": 0.00038195655405876065,
      "loss": 3.0589,
      "step": 94908
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.155322790145874,
      "learning_rate": 0.00038195261909184085,
      "loss": 3.2625,
      "step": 94909
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8035922050476074,
      "learning_rate": 0.00038194868410968455,
      "loss": 3.1427,
      "step": 94910
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.403639316558838,
      "learning_rate": 0.0003819447491122925,
      "loss": 2.9468,
      "step": 94911
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.161844491958618,
      "learning_rate": 0.00038194081409966533,
      "loss": 2.865,
      "step": 94912
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.2682220935821533,
      "learning_rate": 0.0003819368790718038,
      "loss": 3.0824,
      "step": 94913
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0123472213745117,
      "learning_rate": 0.0003819329440287087,
      "loss": 3.1567,
      "step": 94914
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7051130533218384,
      "learning_rate": 0.0003819290089703807,
      "loss": 2.8753,
      "step": 94915
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.935671091079712,
      "learning_rate": 0.00038192507389682055,
      "loss": 2.9337,
      "step": 94916
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.5428659915924072,
      "learning_rate": 0.00038192113880802895,
      "loss": 2.8055,
      "step": 94917
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5616624355316162,
      "learning_rate": 0.0003819172037040067,
      "loss": 3.1801,
      "step": 94918
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6370651721954346,
      "learning_rate": 0.00038191326858475454,
      "loss": 3.0572,
      "step": 94919
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0954275131225586,
      "learning_rate": 0.0003819093334502731,
      "loss": 2.8344,
      "step": 94920
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.3758771419525146,
      "learning_rate": 0.0003819053983005631,
      "loss": 3.0654,
      "step": 94921
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.456022024154663,
      "learning_rate": 0.0003819014631356255,
      "loss": 2.9208,
      "step": 94922
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6170549392700195,
      "learning_rate": 0.00038189752795546073,
      "loss": 3.0871,
      "step": 94923
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.203585624694824,
      "learning_rate": 0.00038189359276006966,
      "loss": 3.063,
      "step": 94924
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.762309193611145,
      "learning_rate": 0.0003818896575494531,
      "loss": 2.8836,
      "step": 94925
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.220332384109497,
      "learning_rate": 0.0003818857223236118,
      "loss": 2.828,
      "step": 94926
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7508858442306519,
      "learning_rate": 0.0003818817870825462,
      "loss": 3.0075,
      "step": 94927
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6361145973205566,
      "learning_rate": 0.0003818778518262573,
      "loss": 2.9816,
      "step": 94928
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.097167491912842,
      "learning_rate": 0.0003818739165547458,
      "loss": 3.2069,
      "step": 94929
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.239738702774048,
      "learning_rate": 0.0003818699812680124,
      "loss": 3.5484,
      "step": 94930
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6011006832122803,
      "learning_rate": 0.0003818660459660578,
      "loss": 2.791,
      "step": 94931
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.5017197132110596,
      "learning_rate": 0.00038186211064888275,
      "loss": 3.1797,
      "step": 94932
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.042346954345703,
      "learning_rate": 0.000381858175316488,
      "loss": 2.9274,
      "step": 94933
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2209832668304443,
      "learning_rate": 0.0003818542399688742,
      "loss": 3.0912,
      "step": 94934
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5168919563293457,
      "learning_rate": 0.00038185030460604236,
      "loss": 3.0835,
      "step": 94935
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.319164514541626,
      "learning_rate": 0.0003818463692279928,
      "loss": 2.911,
      "step": 94936
    },
    {
      "epoch": 1.24,
      "grad_norm": 4.280228137969971,
      "learning_rate": 0.0003818424338347265,
      "loss": 2.9312,
      "step": 94937
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.945148468017578,
      "learning_rate": 0.0003818384984262442,
      "loss": 2.9821,
      "step": 94938
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.956309199333191,
      "learning_rate": 0.00038183456300254655,
      "loss": 2.987,
      "step": 94939
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.4278006553649902,
      "learning_rate": 0.0003818306275636342,
      "loss": 3.1333,
      "step": 94940
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.7745509147644043,
      "learning_rate": 0.00038182669210950815,
      "loss": 3.1575,
      "step": 94941
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.536532402038574,
      "learning_rate": 0.00038182275664016895,
      "loss": 3.247,
      "step": 94942
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2196881771087646,
      "learning_rate": 0.0003818188211556173,
      "loss": 2.9818,
      "step": 94943
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.830944538116455,
      "learning_rate": 0.000381814885655854,
      "loss": 3.2178,
      "step": 94944
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.6179986000061035,
      "learning_rate": 0.00038181095014087977,
      "loss": 2.9973,
      "step": 94945
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.043592691421509,
      "learning_rate": 0.00038180701461069537,
      "loss": 2.9684,
      "step": 94946
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8198570013046265,
      "learning_rate": 0.00038180307906530146,
      "loss": 3.3437,
      "step": 94947
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.203662872314453,
      "learning_rate": 0.00038179914350469884,
      "loss": 3.0448,
      "step": 94948
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.671410083770752,
      "learning_rate": 0.00038179520792888823,
      "loss": 3.1387,
      "step": 94949
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.63510000705719,
      "learning_rate": 0.00038179127233787033,
      "loss": 3.1442,
      "step": 94950
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2912285327911377,
      "learning_rate": 0.00038178733673164584,
      "loss": 2.7319,
      "step": 94951
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5561805963516235,
      "learning_rate": 0.00038178340111021564,
      "loss": 3.1773,
      "step": 94952
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7310676574707031,
      "learning_rate": 0.0003817794654735803,
      "loss": 3.0789,
      "step": 94953
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.028757095336914,
      "learning_rate": 0.0003817755298217406,
      "loss": 2.9951,
      "step": 94954
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6509734392166138,
      "learning_rate": 0.00038177159415469727,
      "loss": 2.976,
      "step": 94955
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1209301948547363,
      "learning_rate": 0.0003817676584724512,
      "loss": 2.755,
      "step": 94956
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7184373140335083,
      "learning_rate": 0.0003817637227750028,
      "loss": 2.7154,
      "step": 94957
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0650861263275146,
      "learning_rate": 0.000381759787062353,
      "loss": 3.0932,
      "step": 94958
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7033864259719849,
      "learning_rate": 0.00038175585133450263,
      "loss": 2.8674,
      "step": 94959
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6020921468734741,
      "learning_rate": 0.0003817519155914522,
      "loss": 3.2281,
      "step": 94960
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9219516515731812,
      "learning_rate": 0.0003817479798332026,
      "loss": 3.1437,
      "step": 94961
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2485225200653076,
      "learning_rate": 0.0003817440440597545,
      "loss": 2.9827,
      "step": 94962
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.51166832447052,
      "learning_rate": 0.00038174010827110865,
      "loss": 2.9016,
      "step": 94963
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0731425285339355,
      "learning_rate": 0.00038173617246726565,
      "loss": 2.9725,
      "step": 94964
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6402384042739868,
      "learning_rate": 0.0003817322366482266,
      "loss": 3.227,
      "step": 94965
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8343671560287476,
      "learning_rate": 0.0003817283008139918,
      "loss": 2.996,
      "step": 94966
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6597089767456055,
      "learning_rate": 0.00038172436496456216,
      "loss": 3.0641,
      "step": 94967
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.4552173614501953,
      "learning_rate": 0.00038172042909993847,
      "loss": 3.078,
      "step": 94968
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.188880443572998,
      "learning_rate": 0.0003817164932201214,
      "loss": 2.8943,
      "step": 94969
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.511604905128479,
      "learning_rate": 0.00038171255732511166,
      "loss": 3.0244,
      "step": 94970
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8966243267059326,
      "learning_rate": 0.00038170862141491017,
      "loss": 3.142,
      "step": 94971
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.4518439769744873,
      "learning_rate": 0.00038170468548951727,
      "loss": 3.0214,
      "step": 94972
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.810340166091919,
      "learning_rate": 0.000381700749548934,
      "loss": 3.1028,
      "step": 94973
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.7501237392425537,
      "learning_rate": 0.0003816968135931611,
      "loss": 2.9125,
      "step": 94974
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7835179567337036,
      "learning_rate": 0.00038169287762219913,
      "loss": 3.1977,
      "step": 94975
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5576401948928833,
      "learning_rate": 0.000381688941636049,
      "loss": 2.9662,
      "step": 94976
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.6382384300231934,
      "learning_rate": 0.0003816850056347114,
      "loss": 3.0683,
      "step": 94977
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.75984525680542,
      "learning_rate": 0.00038168106961818685,
      "loss": 3.0217,
      "step": 94978
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.064462184906006,
      "learning_rate": 0.00038167713358647627,
      "loss": 3.0303,
      "step": 94979
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.4775437116622925,
      "learning_rate": 0.0003816731975395805,
      "loss": 3.1526,
      "step": 94980
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.705242395401001,
      "learning_rate": 0.0003816692614775001,
      "loss": 2.8927,
      "step": 94981
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2407777309417725,
      "learning_rate": 0.0003816653254002358,
      "loss": 2.7271,
      "step": 94982
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.89385986328125,
      "learning_rate": 0.00038166138930778836,
      "loss": 3.1402,
      "step": 94983
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9881428480148315,
      "learning_rate": 0.00038165745320015853,
      "loss": 2.915,
      "step": 94984
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.696599245071411,
      "learning_rate": 0.00038165351707734706,
      "loss": 2.7948,
      "step": 94985
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6850613355636597,
      "learning_rate": 0.0003816495809393546,
      "loss": 2.6799,
      "step": 94986
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7451450824737549,
      "learning_rate": 0.00038164564478618203,
      "loss": 2.9747,
      "step": 94987
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8327604532241821,
      "learning_rate": 0.00038164170861782994,
      "loss": 2.9427,
      "step": 94988
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.6025726795196533,
      "learning_rate": 0.00038163777243429913,
      "loss": 2.9298,
      "step": 94989
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6688796281814575,
      "learning_rate": 0.00038163383623559036,
      "loss": 3.1307,
      "step": 94990
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7823354005813599,
      "learning_rate": 0.0003816299000217043,
      "loss": 2.9335,
      "step": 94991
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.02970290184021,
      "learning_rate": 0.00038162596379264163,
      "loss": 2.9911,
      "step": 94992
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.118088483810425,
      "learning_rate": 0.00038162202754840326,
      "loss": 3.0739,
      "step": 94993
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.292849063873291,
      "learning_rate": 0.00038161809128898973,
      "loss": 3.2284,
      "step": 94994
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8371487855911255,
      "learning_rate": 0.0003816141550144018,
      "loss": 2.9356,
      "step": 94995
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8828085660934448,
      "learning_rate": 0.00038161021872464045,
      "loss": 2.7645,
      "step": 94996
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.106149196624756,
      "learning_rate": 0.0003816062824197061,
      "loss": 2.8885,
      "step": 94997
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.007737398147583,
      "learning_rate": 0.0003816023460995996,
      "loss": 2.9949,
      "step": 94998
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.783057689666748,
      "learning_rate": 0.0003815984097643217,
      "loss": 2.9115,
      "step": 94999
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0547633171081543,
      "learning_rate": 0.0003815944734138731,
      "loss": 3.3162,
      "step": 95000
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2068240642547607,
      "learning_rate": 0.00038159053704825457,
      "loss": 2.8043,
      "step": 95001
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.570194721221924,
      "learning_rate": 0.0003815866006674668,
      "loss": 3.056,
      "step": 95002
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7137489318847656,
      "learning_rate": 0.00038158266427151057,
      "loss": 2.8519,
      "step": 95003
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.489680528640747,
      "learning_rate": 0.00038157872786038653,
      "loss": 2.8438,
      "step": 95004
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.3401942253112793,
      "learning_rate": 0.0003815747914340955,
      "loss": 2.9008,
      "step": 95005
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.461822748184204,
      "learning_rate": 0.0003815708549926381,
      "loss": 3.189,
      "step": 95006
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.031414747238159,
      "learning_rate": 0.00038156691853601533,
      "loss": 2.9411,
      "step": 95007
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.933485507965088,
      "learning_rate": 0.0003815629820642276,
      "loss": 3.0032,
      "step": 95008
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2147228717803955,
      "learning_rate": 0.00038155904557727574,
      "loss": 3.1931,
      "step": 95009
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7058899402618408,
      "learning_rate": 0.0003815551090751606,
      "loss": 2.9588,
      "step": 95010
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.7537190914154053,
      "learning_rate": 0.00038155117255788274,
      "loss": 3.0199,
      "step": 95011
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5593020915985107,
      "learning_rate": 0.000381547236025443,
      "loss": 3.0359,
      "step": 95012
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9091801643371582,
      "learning_rate": 0.00038154329947784213,
      "loss": 3.0888,
      "step": 95013
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7844129800796509,
      "learning_rate": 0.0003815393629150808,
      "loss": 3.0393,
      "step": 95014
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7812519073486328,
      "learning_rate": 0.0003815354263371598,
      "loss": 3.2291,
      "step": 95015
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.901444911956787,
      "learning_rate": 0.00038153148974407977,
      "loss": 3.1742,
      "step": 95016
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.973482608795166,
      "learning_rate": 0.00038152755313584154,
      "loss": 3.0481,
      "step": 95017
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5238397121429443,
      "learning_rate": 0.00038152361651244574,
      "loss": 3.0179,
      "step": 95018
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5928682088851929,
      "learning_rate": 0.0003815196798738932,
      "loss": 2.8365,
      "step": 95019
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7678122520446777,
      "learning_rate": 0.00038151574322018464,
      "loss": 3.1708,
      "step": 95020
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.606980800628662,
      "learning_rate": 0.0003815118065513207,
      "loss": 3.0122,
      "step": 95021
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.9331493377685547,
      "learning_rate": 0.0003815078698673022,
      "loss": 3.0731,
      "step": 95022
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9240447282791138,
      "learning_rate": 0.0003815039331681299,
      "loss": 2.9403,
      "step": 95023
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7549101114273071,
      "learning_rate": 0.00038149999645380444,
      "loss": 2.9437,
      "step": 95024
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7201141119003296,
      "learning_rate": 0.00038149605972432655,
      "loss": 3.0078,
      "step": 95025
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9206233024597168,
      "learning_rate": 0.0003814921229796971,
      "loss": 3.1227,
      "step": 95026
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.6744771003723145,
      "learning_rate": 0.00038148818621991663,
      "loss": 2.9896,
      "step": 95027
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5378066301345825,
      "learning_rate": 0.000381484249444986,
      "loss": 3.3362,
      "step": 95028
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9570574760437012,
      "learning_rate": 0.000381480312654906,
      "loss": 3.1628,
      "step": 95029
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.053588628768921,
      "learning_rate": 0.0003814763758496771,
      "loss": 3.1795,
      "step": 95030
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5994588136672974,
      "learning_rate": 0.0003814724390293004,
      "loss": 2.9603,
      "step": 95031
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5504411458969116,
      "learning_rate": 0.00038146850219377635,
      "loss": 3.0877,
      "step": 95032
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6307512521743774,
      "learning_rate": 0.0003814645653431057,
      "loss": 3.2045,
      "step": 95033
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.428072452545166,
      "learning_rate": 0.0003814606284772893,
      "loss": 2.8017,
      "step": 95034
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7665718793869019,
      "learning_rate": 0.00038145669159632786,
      "loss": 3.0092,
      "step": 95035
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7513351440429688,
      "learning_rate": 0.00038145275470022203,
      "loss": 2.9611,
      "step": 95036
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.4476406574249268,
      "learning_rate": 0.0003814488177889726,
      "loss": 2.9378,
      "step": 95037
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.442532777786255,
      "learning_rate": 0.00038144488086258035,
      "loss": 2.935,
      "step": 95038
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2916080951690674,
      "learning_rate": 0.0003814409439210459,
      "loss": 3.0131,
      "step": 95039
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.745701789855957,
      "learning_rate": 0.00038143700696437004,
      "loss": 3.0613,
      "step": 95040
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8012648820877075,
      "learning_rate": 0.0003814330699925536,
      "loss": 3.2342,
      "step": 95041
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.183825731277466,
      "learning_rate": 0.0003814291330055971,
      "loss": 3.0555,
      "step": 95042
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.077227830886841,
      "learning_rate": 0.00038142519600350137,
      "loss": 3.0283,
      "step": 95043
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7982856035232544,
      "learning_rate": 0.0003814212589862673,
      "loss": 3.1073,
      "step": 95044
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8926503658294678,
      "learning_rate": 0.0003814173219538954,
      "loss": 3.0347,
      "step": 95045
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9052194356918335,
      "learning_rate": 0.0003814133849063865,
      "loss": 3.0616,
      "step": 95046
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7518460750579834,
      "learning_rate": 0.0003814094478437413,
      "loss": 2.998,
      "step": 95047
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8554179668426514,
      "learning_rate": 0.00038140551076596053,
      "loss": 2.8633,
      "step": 95048
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6221641302108765,
      "learning_rate": 0.00038140157367304496,
      "loss": 2.9749,
      "step": 95049
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5683422088623047,
      "learning_rate": 0.0003813976365649953,
      "loss": 3.0116,
      "step": 95050
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7026865482330322,
      "learning_rate": 0.0003813936994418123,
      "loss": 3.1329,
      "step": 95051
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.113621950149536,
      "learning_rate": 0.00038138976230349663,
      "loss": 3.2389,
      "step": 95052
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.163954019546509,
      "learning_rate": 0.0003813858251500491,
      "loss": 2.8846,
      "step": 95053
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6529133319854736,
      "learning_rate": 0.00038138188798147044,
      "loss": 2.6756,
      "step": 95054
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8393361568450928,
      "learning_rate": 0.00038137795079776123,
      "loss": 3.2965,
      "step": 95055
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.5257792472839355,
      "learning_rate": 0.0003813740135989224,
      "loss": 3.0291,
      "step": 95056
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6563119888305664,
      "learning_rate": 0.0003813700763849547,
      "loss": 3.2482,
      "step": 95057
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.61312735080719,
      "learning_rate": 0.0003813661391558586,
      "loss": 2.9022,
      "step": 95058
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7416247129440308,
      "learning_rate": 0.0003813622019116351,
      "loss": 2.9451,
      "step": 95059
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6850125789642334,
      "learning_rate": 0.00038135826465228483,
      "loss": 2.8545,
      "step": 95060
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7869610786437988,
      "learning_rate": 0.0003813543273778085,
      "loss": 3.0447,
      "step": 95061
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.4303501844406128,
      "learning_rate": 0.0003813503900882068,
      "loss": 3.0223,
      "step": 95062
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6946545839309692,
      "learning_rate": 0.0003813464527834807,
      "loss": 3.0222,
      "step": 95063
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.9755778312683105,
      "learning_rate": 0.0003813425154636306,
      "loss": 3.0442,
      "step": 95064
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7997150421142578,
      "learning_rate": 0.0003813385781286574,
      "loss": 2.9507,
      "step": 95065
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.3315396308898926,
      "learning_rate": 0.00038133464077856193,
      "loss": 3.0767,
      "step": 95066
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.690866470336914,
      "learning_rate": 0.0003813307034133447,
      "loss": 2.8574,
      "step": 95067
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9352258443832397,
      "learning_rate": 0.0003813267660330066,
      "loss": 3.219,
      "step": 95068
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.61622154712677,
      "learning_rate": 0.0003813228286375484,
      "loss": 2.9776,
      "step": 95069
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6820664405822754,
      "learning_rate": 0.0003813188912269706,
      "loss": 3.0526,
      "step": 95070
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8148186206817627,
      "learning_rate": 0.00038131495380127413,
      "loss": 2.8004,
      "step": 95071
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.72588312625885,
      "learning_rate": 0.0003813110163604598,
      "loss": 3.0388,
      "step": 95072
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.473079204559326,
      "learning_rate": 0.0003813070789045281,
      "loss": 2.9912,
      "step": 95073
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.3070075511932373,
      "learning_rate": 0.00038130314143347983,
      "loss": 3.366,
      "step": 95074
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.540339469909668,
      "learning_rate": 0.0003812992039473159,
      "loss": 2.8376,
      "step": 95075
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.565905809402466,
      "learning_rate": 0.00038129526644603687,
      "loss": 3.0463,
      "step": 95076
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5361943244934082,
      "learning_rate": 0.00038129132892964347,
      "loss": 2.9735,
      "step": 95077
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5628618001937866,
      "learning_rate": 0.00038128739139813657,
      "loss": 2.958,
      "step": 95078
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8857072591781616,
      "learning_rate": 0.00038128345385151665,
      "loss": 3.1017,
      "step": 95079
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1571507453918457,
      "learning_rate": 0.00038127951628978473,
      "loss": 3.2165,
      "step": 95080
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6088279485702515,
      "learning_rate": 0.00038127557871294143,
      "loss": 2.8729,
      "step": 95081
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.296212673187256,
      "learning_rate": 0.0003812716411209874,
      "loss": 3.0448,
      "step": 95082
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.691310167312622,
      "learning_rate": 0.00038126770351392335,
      "loss": 3.1006,
      "step": 95083
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.4924473762512207,
      "learning_rate": 0.0003812637658917503,
      "loss": 3.2476,
      "step": 95084
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.4689596891403198,
      "learning_rate": 0.0003812598282544687,
      "loss": 2.7844,
      "step": 95085
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8579777479171753,
      "learning_rate": 0.00038125589060207933,
      "loss": 3.1641,
      "step": 95086
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7651612758636475,
      "learning_rate": 0.00038125195293458295,
      "loss": 3.1731,
      "step": 95087
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6379729509353638,
      "learning_rate": 0.00038124801525198034,
      "loss": 2.8616,
      "step": 95088
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8646504878997803,
      "learning_rate": 0.0003812440775542722,
      "loss": 2.9264,
      "step": 95089
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.615469217300415,
      "learning_rate": 0.00038124013984145913,
      "loss": 2.8411,
      "step": 95090
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.6138925552368164,
      "learning_rate": 0.0003812362021135422,
      "loss": 2.7953,
      "step": 95091
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7346681356430054,
      "learning_rate": 0.00038123226437052174,
      "loss": 3.0904,
      "step": 95092
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9459348917007446,
      "learning_rate": 0.00038122832661239875,
      "loss": 2.954,
      "step": 95093
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0853466987609863,
      "learning_rate": 0.00038122438883917385,
      "loss": 2.645,
      "step": 95094
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9394330978393555,
      "learning_rate": 0.0003812204510508478,
      "loss": 2.983,
      "step": 95095
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.366218328475952,
      "learning_rate": 0.00038121651324742143,
      "loss": 2.9314,
      "step": 95096
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6356598138809204,
      "learning_rate": 0.0003812125754288952,
      "loss": 3.1,
      "step": 95097
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0040090084075928,
      "learning_rate": 0.00038120863759527016,
      "loss": 2.9391,
      "step": 95098
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.0591542720794678,
      "learning_rate": 0.0003812046997465469,
      "loss": 2.9127,
      "step": 95099
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.929654836654663,
      "learning_rate": 0.00038120076188272607,
      "loss": 2.6627,
      "step": 95100
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.605655550956726,
      "learning_rate": 0.0003811968240038086,
      "loss": 2.998,
      "step": 95101
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.226642370223999,
      "learning_rate": 0.00038119288610979506,
      "loss": 3.0173,
      "step": 95102
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9785668849945068,
      "learning_rate": 0.0003811889482006862,
      "loss": 2.7698,
      "step": 95103
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.7174627780914307,
      "learning_rate": 0.0003811850102764827,
      "loss": 3.0452,
      "step": 95104
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6018348932266235,
      "learning_rate": 0.0003811810723371855,
      "loss": 3.1778,
      "step": 95105
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2886250019073486,
      "learning_rate": 0.0003811771343827952,
      "loss": 2.9082,
      "step": 95106
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7339415550231934,
      "learning_rate": 0.00038117319641331243,
      "loss": 3.1166,
      "step": 95107
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0421955585479736,
      "learning_rate": 0.0003811692584287382,
      "loss": 2.958,
      "step": 95108
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.994363307952881,
      "learning_rate": 0.000381165320429073,
      "loss": 3.0331,
      "step": 95109
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1999197006225586,
      "learning_rate": 0.0003811613824143176,
      "loss": 2.8888,
      "step": 95110
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7799636125564575,
      "learning_rate": 0.0003811574443844728,
      "loss": 3.0889,
      "step": 95111
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.0085225105285645,
      "learning_rate": 0.00038115350633953927,
      "loss": 2.9947,
      "step": 95112
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.439143180847168,
      "learning_rate": 0.00038114956827951774,
      "loss": 3.2767,
      "step": 95113
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.9353020191192627,
      "learning_rate": 0.00038114563020440905,
      "loss": 3.2722,
      "step": 95114
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6076502799987793,
      "learning_rate": 0.00038114169211421386,
      "loss": 2.9078,
      "step": 95115
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9663623571395874,
      "learning_rate": 0.00038113775400893285,
      "loss": 2.9135,
      "step": 95116
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.8997573852539062,
      "learning_rate": 0.0003811338158885668,
      "loss": 3.13,
      "step": 95117
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.8008601665496826,
      "learning_rate": 0.00038112987775311646,
      "loss": 2.9346,
      "step": 95118
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2454965114593506,
      "learning_rate": 0.00038112593960258254,
      "loss": 3.2192,
      "step": 95119
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.3359339237213135,
      "learning_rate": 0.00038112200143696574,
      "loss": 3.0379,
      "step": 95120
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2895045280456543,
      "learning_rate": 0.00038111806325626693,
      "loss": 3.1045,
      "step": 95121
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.3403918743133545,
      "learning_rate": 0.0003811141250604866,
      "loss": 3.1722,
      "step": 95122
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9748048782348633,
      "learning_rate": 0.0003811101868496257,
      "loss": 2.9778,
      "step": 95123
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.893680214881897,
      "learning_rate": 0.000381106248623685,
      "loss": 3.1242,
      "step": 95124
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9696041345596313,
      "learning_rate": 0.00038110231038266496,
      "loss": 2.925,
      "step": 95125
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.773252487182617,
      "learning_rate": 0.00038109837212656645,
      "loss": 3.2537,
      "step": 95126
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5614203214645386,
      "learning_rate": 0.0003810944338553903,
      "loss": 3.014,
      "step": 95127
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8357200622558594,
      "learning_rate": 0.00038109049556913716,
      "loss": 2.9016,
      "step": 95128
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6146647930145264,
      "learning_rate": 0.0003810865572678078,
      "loss": 2.9969,
      "step": 95129
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7301816940307617,
      "learning_rate": 0.00038108261895140286,
      "loss": 3.0768,
      "step": 95130
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5410139560699463,
      "learning_rate": 0.0003810786806199231,
      "loss": 2.9755,
      "step": 95131
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7667301893234253,
      "learning_rate": 0.0003810747422733693,
      "loss": 2.5707,
      "step": 95132
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6394799947738647,
      "learning_rate": 0.00038107080391174225,
      "loss": 2.9212,
      "step": 95133
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.284017324447632,
      "learning_rate": 0.00038106686553504256,
      "loss": 2.7604,
      "step": 95134
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5687180757522583,
      "learning_rate": 0.00038106292714327095,
      "loss": 2.9894,
      "step": 95135
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0234248638153076,
      "learning_rate": 0.0003810589887364283,
      "loss": 2.8784,
      "step": 95136
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7443888187408447,
      "learning_rate": 0.00038105505031451524,
      "loss": 2.8366,
      "step": 95137
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9101864099502563,
      "learning_rate": 0.0003810511118775325,
      "loss": 2.9027,
      "step": 95138
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6639811992645264,
      "learning_rate": 0.00038104717342548087,
      "loss": 3.0176,
      "step": 95139
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2967636585235596,
      "learning_rate": 0.00038104323495836094,
      "loss": 2.988,
      "step": 95140
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8187894821166992,
      "learning_rate": 0.0003810392964761736,
      "loss": 3.1678,
      "step": 95141
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5822914838790894,
      "learning_rate": 0.00038103535797891957,
      "loss": 3.1267,
      "step": 95142
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6248962879180908,
      "learning_rate": 0.0003810314194665995,
      "loss": 2.9946,
      "step": 95143
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.747490406036377,
      "learning_rate": 0.00038102748093921405,
      "loss": 3.1515,
      "step": 95144
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.222113847732544,
      "learning_rate": 0.0003810235423967643,
      "loss": 3.0446,
      "step": 95145
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6401368379592896,
      "learning_rate": 0.00038101960383925054,
      "loss": 3.1644,
      "step": 95146
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2476305961608887,
      "learning_rate": 0.00038101566526667374,
      "loss": 3.0677,
      "step": 95147
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7044976949691772,
      "learning_rate": 0.0003810117266790347,
      "loss": 3.31,
      "step": 95148
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.385693073272705,
      "learning_rate": 0.00038100778807633395,
      "loss": 2.6925,
      "step": 95149
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7145744562149048,
      "learning_rate": 0.0003810038494585723,
      "loss": 2.9055,
      "step": 95150
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0265932083129883,
      "learning_rate": 0.0003809999108257507,
      "loss": 3.0086,
      "step": 95151
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.164827346801758,
      "learning_rate": 0.0003809959721778695,
      "loss": 3.1151,
      "step": 95152
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5380165576934814,
      "learning_rate": 0.00038099203351492963,
      "loss": 3.0102,
      "step": 95153
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5847747325897217,
      "learning_rate": 0.0003809880948369319,
      "loss": 2.9517,
      "step": 95154
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.3262624740600586,
      "learning_rate": 0.0003809841561438769,
      "loss": 3.0377,
      "step": 95155
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.572385549545288,
      "learning_rate": 0.00038098021743576536,
      "loss": 3.0451,
      "step": 95156
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2929329872131348,
      "learning_rate": 0.00038097627871259813,
      "loss": 3.3139,
      "step": 95157
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6994715929031372,
      "learning_rate": 0.0003809723399743759,
      "loss": 3.1119,
      "step": 95158
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.4366543292999268,
      "learning_rate": 0.0003809684012210994,
      "loss": 3.0781,
      "step": 95159
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.4153671264648438,
      "learning_rate": 0.0003809644624527693,
      "loss": 2.963,
      "step": 95160
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9997552633285522,
      "learning_rate": 0.00038096052366938635,
      "loss": 2.994,
      "step": 95161
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6914820671081543,
      "learning_rate": 0.00038095658487095137,
      "loss": 3.2801,
      "step": 95162
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8444184064865112,
      "learning_rate": 0.00038095264605746506,
      "loss": 3.277,
      "step": 95163
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8979278802871704,
      "learning_rate": 0.0003809487072289281,
      "loss": 3.1444,
      "step": 95164
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.4671372175216675,
      "learning_rate": 0.00038094476838534117,
      "loss": 2.9302,
      "step": 95165
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1557564735412598,
      "learning_rate": 0.00038094082952670516,
      "loss": 2.8643,
      "step": 95166
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9519590139389038,
      "learning_rate": 0.0003809368906530207,
      "loss": 2.9076,
      "step": 95167
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6335710287094116,
      "learning_rate": 0.00038093295176428853,
      "loss": 3.0987,
      "step": 95168
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.670617938041687,
      "learning_rate": 0.00038092901286050944,
      "loss": 2.9381,
      "step": 95169
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.104170322418213,
      "learning_rate": 0.00038092507394168405,
      "loss": 3.1397,
      "step": 95170
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.7046217918395996,
      "learning_rate": 0.0003809211350078132,
      "loss": 2.8937,
      "step": 95171
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8345593214035034,
      "learning_rate": 0.0003809171960588976,
      "loss": 3.1256,
      "step": 95172
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0251927375793457,
      "learning_rate": 0.0003809132570949379,
      "loss": 3.0383,
      "step": 95173
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.6087632179260254,
      "learning_rate": 0.000380909318115935,
      "loss": 3.1088,
      "step": 95174
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2242040634155273,
      "learning_rate": 0.00038090537912188944,
      "loss": 3.1775,
      "step": 95175
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.27340030670166,
      "learning_rate": 0.00038090144011280206,
      "loss": 2.6974,
      "step": 95176
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.63825523853302,
      "learning_rate": 0.0003808975010886736,
      "loss": 3.1262,
      "step": 95177
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.086977243423462,
      "learning_rate": 0.00038089356204950474,
      "loss": 2.9938,
      "step": 95178
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.4284952878952026,
      "learning_rate": 0.00038088962299529633,
      "loss": 3.0756,
      "step": 95179
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0714244842529297,
      "learning_rate": 0.0003808856839260489,
      "loss": 3.2539,
      "step": 95180
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.873223900794983,
      "learning_rate": 0.0003808817448417633,
      "loss": 3.0837,
      "step": 95181
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.344912528991699,
      "learning_rate": 0.00038087780574244033,
      "loss": 2.9269,
      "step": 95182
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6271040439605713,
      "learning_rate": 0.00038087386662808066,
      "loss": 2.8625,
      "step": 95183
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9406071901321411,
      "learning_rate": 0.0003808699274986849,
      "loss": 3.0253,
      "step": 95184
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.720251202583313,
      "learning_rate": 0.00038086598835425394,
      "loss": 3.0374,
      "step": 95185
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.911045789718628,
      "learning_rate": 0.00038086204919478853,
      "loss": 3.0382,
      "step": 95186
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7468613386154175,
      "learning_rate": 0.00038085811002028924,
      "loss": 2.9159,
      "step": 95187
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.225958824157715,
      "learning_rate": 0.00038085417083075695,
      "loss": 3.1382,
      "step": 95188
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8271420001983643,
      "learning_rate": 0.00038085023162619236,
      "loss": 2.6418,
      "step": 95189
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.3863754272460938,
      "learning_rate": 0.00038084629240659623,
      "loss": 3.0631,
      "step": 95190
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1171650886535645,
      "learning_rate": 0.00038084235317196915,
      "loss": 3.0207,
      "step": 95191
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.3275082111358643,
      "learning_rate": 0.000380838413922312,
      "loss": 2.9429,
      "step": 95192
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7206279039382935,
      "learning_rate": 0.0003808344746576254,
      "loss": 3.0722,
      "step": 95193
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.099574565887451,
      "learning_rate": 0.00038083053537791027,
      "loss": 3.0712,
      "step": 95194
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.332808256149292,
      "learning_rate": 0.0003808265960831671,
      "loss": 3.1072,
      "step": 95195
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1181252002716064,
      "learning_rate": 0.0003808226567733968,
      "loss": 2.851,
      "step": 95196
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8699480295181274,
      "learning_rate": 0.00038081871744860004,
      "loss": 3.042,
      "step": 95197
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7621359825134277,
      "learning_rate": 0.00038081477810877754,
      "loss": 2.8775,
      "step": 95198
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8298097848892212,
      "learning_rate": 0.00038081083875393006,
      "loss": 3.0322,
      "step": 95199
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.116476058959961,
      "learning_rate": 0.0003808068993840583,
      "loss": 3.1101,
      "step": 95200
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.017967939376831,
      "learning_rate": 0.000380802959999163,
      "loss": 3.0683,
      "step": 95201
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1140353679656982,
      "learning_rate": 0.000380799020599245,
      "loss": 3.069,
      "step": 95202
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7381069660186768,
      "learning_rate": 0.00038079508118430485,
      "loss": 2.9991,
      "step": 95203
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8659824132919312,
      "learning_rate": 0.0003807911417543434,
      "loss": 3.2707,
      "step": 95204
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.445828437805176,
      "learning_rate": 0.0003807872023093613,
      "loss": 2.8652,
      "step": 95205
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.845651626586914,
      "learning_rate": 0.0003807832628493594,
      "loss": 2.8291,
      "step": 95206
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7630107402801514,
      "learning_rate": 0.00038077932337433836,
      "loss": 3.1174,
      "step": 95207
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9934561252593994,
      "learning_rate": 0.00038077538388429883,
      "loss": 2.8448,
      "step": 95208
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7942193746566772,
      "learning_rate": 0.00038077144437924175,
      "loss": 3.1721,
      "step": 95209
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7238364219665527,
      "learning_rate": 0.00038076750485916766,
      "loss": 2.9062,
      "step": 95210
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.041043281555176,
      "learning_rate": 0.00038076356532407737,
      "loss": 3.0355,
      "step": 95211
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.661380410194397,
      "learning_rate": 0.0003807596257739717,
      "loss": 3.034,
      "step": 95212
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7705186605453491,
      "learning_rate": 0.00038075568620885117,
      "loss": 2.8533,
      "step": 95213
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.663705587387085,
      "learning_rate": 0.00038075174662871667,
      "loss": 2.924,
      "step": 95214
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7219902276992798,
      "learning_rate": 0.00038074780703356895,
      "loss": 2.8745,
      "step": 95215
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1508452892303467,
      "learning_rate": 0.0003807438674234086,
      "loss": 3.282,
      "step": 95216
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.003004789352417,
      "learning_rate": 0.0003807399277982365,
      "loss": 2.9734,
      "step": 95217
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9792702198028564,
      "learning_rate": 0.00038073598815805337,
      "loss": 3.0567,
      "step": 95218
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9109960794448853,
      "learning_rate": 0.0003807320485028598,
      "loss": 3.1854,
      "step": 95219
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.4990551471710205,
      "learning_rate": 0.0003807281088326566,
      "loss": 3.093,
      "step": 95220
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.038832664489746,
      "learning_rate": 0.0003807241691474446,
      "loss": 2.8796,
      "step": 95221
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8994362354278564,
      "learning_rate": 0.00038072022944722443,
      "loss": 3.0429,
      "step": 95222
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.9583749771118164,
      "learning_rate": 0.0003807162897319968,
      "loss": 2.8683,
      "step": 95223
    },
    {
      "epoch": 1.24,
      "grad_norm": 6.010451793670654,
      "learning_rate": 0.0003807123500017626,
      "loss": 2.9438,
      "step": 95224
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.7158079147338867,
      "learning_rate": 0.0003807084102565224,
      "loss": 2.8877,
      "step": 95225
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.928749680519104,
      "learning_rate": 0.0003807044704962769,
      "loss": 3.2383,
      "step": 95226
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.59853196144104,
      "learning_rate": 0.00038070053072102706,
      "loss": 2.9909,
      "step": 95227
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2230827808380127,
      "learning_rate": 0.00038069659093077336,
      "loss": 3.2244,
      "step": 95228
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.3327901363372803,
      "learning_rate": 0.00038069265112551664,
      "loss": 2.9741,
      "step": 95229
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.573835611343384,
      "learning_rate": 0.0003806887113052577,
      "loss": 3.0323,
      "step": 95230
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.273797035217285,
      "learning_rate": 0.0003806847714699972,
      "loss": 3.1446,
      "step": 95231
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.4175326824188232,
      "learning_rate": 0.0003806808316197358,
      "loss": 2.9338,
      "step": 95232
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.629647731781006,
      "learning_rate": 0.0003806768917544744,
      "loss": 2.9324,
      "step": 95233
    },
    {
      "epoch": 1.24,
      "grad_norm": 4.336344242095947,
      "learning_rate": 0.0003806729518742136,
      "loss": 2.601,
      "step": 95234
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.899953842163086,
      "learning_rate": 0.00038066901197895416,
      "loss": 3.0234,
      "step": 95235
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0405261516571045,
      "learning_rate": 0.00038066507206869685,
      "loss": 3.0066,
      "step": 95236
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.5236756801605225,
      "learning_rate": 0.0003806611321434423,
      "loss": 3.0507,
      "step": 95237
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.141859769821167,
      "learning_rate": 0.0003806571922031914,
      "loss": 2.8718,
      "step": 95238
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5690281391143799,
      "learning_rate": 0.0003806532522479449,
      "loss": 2.9003,
      "step": 95239
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8497514724731445,
      "learning_rate": 0.0003806493122777033,
      "loss": 3.1831,
      "step": 95240
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.74250328540802,
      "learning_rate": 0.0003806453722924675,
      "loss": 2.9788,
      "step": 95241
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6645801067352295,
      "learning_rate": 0.0003806414322922383,
      "loss": 3.0173,
      "step": 95242
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8399213552474976,
      "learning_rate": 0.0003806374922770162,
      "loss": 2.8909,
      "step": 95243
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.478497266769409,
      "learning_rate": 0.0003806335522468021,
      "loss": 2.8612,
      "step": 95244
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.708236575126648,
      "learning_rate": 0.00038062961220159677,
      "loss": 3.0529,
      "step": 95245
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.046760320663452,
      "learning_rate": 0.0003806256721414008,
      "loss": 2.8514,
      "step": 95246
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7032619714736938,
      "learning_rate": 0.000380621732066215,
      "loss": 2.9179,
      "step": 95247
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0982983112335205,
      "learning_rate": 0.0003806177919760401,
      "loss": 2.9337,
      "step": 95248
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7276356220245361,
      "learning_rate": 0.00038061385187087687,
      "loss": 3.287,
      "step": 95249
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7026500701904297,
      "learning_rate": 0.00038060991175072593,
      "loss": 3.2105,
      "step": 95250
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.4076082706451416,
      "learning_rate": 0.0003806059716155881,
      "loss": 3.0142,
      "step": 95251
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.4631458520889282,
      "learning_rate": 0.0003806020314654642,
      "loss": 2.9431,
      "step": 95252
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5468522310256958,
      "learning_rate": 0.00038059809130035476,
      "loss": 3.0812,
      "step": 95253
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0312368869781494,
      "learning_rate": 0.0003805941511202606,
      "loss": 3.1806,
      "step": 95254
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7143369913101196,
      "learning_rate": 0.0003805902109251826,
      "loss": 3.0072,
      "step": 95255
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.686179518699646,
      "learning_rate": 0.00038058627071512125,
      "loss": 3.1035,
      "step": 95256
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.4811334609985352,
      "learning_rate": 0.0003805823304900774,
      "loss": 3.0964,
      "step": 95257
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1356964111328125,
      "learning_rate": 0.0003805783902500518,
      "loss": 3.1179,
      "step": 95258
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8823350667953491,
      "learning_rate": 0.00038057444999504514,
      "loss": 2.9717,
      "step": 95259
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.542863368988037,
      "learning_rate": 0.0003805705097250581,
      "loss": 2.9903,
      "step": 95260
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5141583681106567,
      "learning_rate": 0.0003805665694400917,
      "loss": 2.8436,
      "step": 95261
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6258598566055298,
      "learning_rate": 0.0003805626291401462,
      "loss": 2.8765,
      "step": 95262
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7304655313491821,
      "learning_rate": 0.00038055868882522274,
      "loss": 3.022,
      "step": 95263
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6548408269882202,
      "learning_rate": 0.00038055474849532184,
      "loss": 3.1272,
      "step": 95264
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.019465208053589,
      "learning_rate": 0.0003805508081504443,
      "loss": 2.8159,
      "step": 95265
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7583495378494263,
      "learning_rate": 0.00038054686779059085,
      "loss": 3.0081,
      "step": 95266
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7613003253936768,
      "learning_rate": 0.0003805429274157622,
      "loss": 2.8444,
      "step": 95267
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6780180931091309,
      "learning_rate": 0.0003805389870259591,
      "loss": 2.8196,
      "step": 95268
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6520581245422363,
      "learning_rate": 0.0003805350466211823,
      "loss": 3.0836,
      "step": 95269
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.413872480392456,
      "learning_rate": 0.0003805311062014325,
      "loss": 2.9344,
      "step": 95270
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6713244915008545,
      "learning_rate": 0.0003805271657667105,
      "loss": 2.9266,
      "step": 95271
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7362595796585083,
      "learning_rate": 0.0003805232253170169,
      "loss": 3.1004,
      "step": 95272
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.3619041442871094,
      "learning_rate": 0.00038051928485235254,
      "loss": 2.9177,
      "step": 95273
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8143281936645508,
      "learning_rate": 0.0003805153443727181,
      "loss": 3.2946,
      "step": 95274
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7649513483047485,
      "learning_rate": 0.0003805114038781144,
      "loss": 2.8581,
      "step": 95275
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8262196779251099,
      "learning_rate": 0.0003805074633685421,
      "loss": 2.8974,
      "step": 95276
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7150651216506958,
      "learning_rate": 0.0003805035228440019,
      "loss": 2.7779,
      "step": 95277
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1224544048309326,
      "learning_rate": 0.00038049958230449457,
      "loss": 3.1173,
      "step": 95278
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.281548500061035,
      "learning_rate": 0.0003804956417500209,
      "loss": 2.9354,
      "step": 95279
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.3940365314483643,
      "learning_rate": 0.0003804917011805816,
      "loss": 3.0723,
      "step": 95280
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2673420906066895,
      "learning_rate": 0.00038048776059617723,
      "loss": 2.9727,
      "step": 95281
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.8605751991271973,
      "learning_rate": 0.0003804838199968088,
      "loss": 3.1669,
      "step": 95282
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.5670666694641113,
      "learning_rate": 0.00038047987938247686,
      "loss": 3.0412,
      "step": 95283
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.301098108291626,
      "learning_rate": 0.0003804759387531822,
      "loss": 2.9826,
      "step": 95284
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7620880603790283,
      "learning_rate": 0.0003804719981089256,
      "loss": 2.8832,
      "step": 95285
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8653513193130493,
      "learning_rate": 0.00038046805744970766,
      "loss": 3.0503,
      "step": 95286
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2512917518615723,
      "learning_rate": 0.0003804641167755292,
      "loss": 2.9341,
      "step": 95287
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7973111867904663,
      "learning_rate": 0.00038046017608639096,
      "loss": 3.0785,
      "step": 95288
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.685890793800354,
      "learning_rate": 0.00038045623538229367,
      "loss": 3.17,
      "step": 95289
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5290226936340332,
      "learning_rate": 0.00038045229466323796,
      "loss": 3.0748,
      "step": 95290
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6473032236099243,
      "learning_rate": 0.00038044835392922477,
      "loss": 2.8092,
      "step": 95291
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.508736491203308,
      "learning_rate": 0.0003804444131802546,
      "loss": 2.79,
      "step": 95292
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.551167607307434,
      "learning_rate": 0.00038044047241632834,
      "loss": 3.1393,
      "step": 95293
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.026059865951538,
      "learning_rate": 0.00038043653163744674,
      "loss": 3.0304,
      "step": 95294
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2330753803253174,
      "learning_rate": 0.0003804325908436104,
      "loss": 2.9348,
      "step": 95295
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9216653108596802,
      "learning_rate": 0.0003804286500348201,
      "loss": 3.135,
      "step": 95296
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0679728984832764,
      "learning_rate": 0.0003804247092110767,
      "loss": 2.9689,
      "step": 95297
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9773750305175781,
      "learning_rate": 0.00038042076837238077,
      "loss": 2.898,
      "step": 95298
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7956130504608154,
      "learning_rate": 0.0003804168275187331,
      "loss": 3.4192,
      "step": 95299
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.740110993385315,
      "learning_rate": 0.00038041288665013445,
      "loss": 3.1743,
      "step": 95300
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6996433734893799,
      "learning_rate": 0.0003804089457665855,
      "loss": 2.6716,
      "step": 95301
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5787540674209595,
      "learning_rate": 0.00038040500486808695,
      "loss": 3.2264,
      "step": 95302
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.72517991065979,
      "learning_rate": 0.0003804010639546397,
      "loss": 3.2048,
      "step": 95303
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5635746717453003,
      "learning_rate": 0.0003803971230262443,
      "loss": 3.055,
      "step": 95304
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.986267328262329,
      "learning_rate": 0.0003803931820829016,
      "loss": 2.9646,
      "step": 95305
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6594446897506714,
      "learning_rate": 0.00038038924112461227,
      "loss": 2.9868,
      "step": 95306
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7349148988723755,
      "learning_rate": 0.000380385300151377,
      "loss": 2.9515,
      "step": 95307
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.5342183113098145,
      "learning_rate": 0.00038038135916319664,
      "loss": 3.1133,
      "step": 95308
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9752581119537354,
      "learning_rate": 0.00038037741816007194,
      "loss": 3.0741,
      "step": 95309
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6907395124435425,
      "learning_rate": 0.0003803734771420035,
      "loss": 2.8423,
      "step": 95310
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9307676553726196,
      "learning_rate": 0.00038036953610899203,
      "loss": 3.101,
      "step": 95311
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.289127826690674,
      "learning_rate": 0.0003803655950610385,
      "loss": 3.1068,
      "step": 95312
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9248604774475098,
      "learning_rate": 0.0003803616539981434,
      "loss": 2.8691,
      "step": 95313
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2354331016540527,
      "learning_rate": 0.00038035771292030754,
      "loss": 3.0418,
      "step": 95314
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9271352291107178,
      "learning_rate": 0.00038035377182753173,
      "loss": 2.854,
      "step": 95315
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8554140329360962,
      "learning_rate": 0.0003803498307198166,
      "loss": 3.0594,
      "step": 95316
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.73100745677948,
      "learning_rate": 0.0003803458895971629,
      "loss": 3.0029,
      "step": 95317
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6472702026367188,
      "learning_rate": 0.0003803419484595713,
      "loss": 3.1123,
      "step": 95318
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5351096391677856,
      "learning_rate": 0.0003803380073070428,
      "loss": 2.9815,
      "step": 95319
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1237101554870605,
      "learning_rate": 0.00038033406613957783,
      "loss": 3.168,
      "step": 95320
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.12675404548645,
      "learning_rate": 0.00038033012495717727,
      "loss": 2.9226,
      "step": 95321
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5885413885116577,
      "learning_rate": 0.0003803261837598418,
      "loss": 2.6116,
      "step": 95322
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6599117517471313,
      "learning_rate": 0.00038032224254757225,
      "loss": 3.059,
      "step": 95323
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8235187530517578,
      "learning_rate": 0.00038031830132036925,
      "loss": 2.7099,
      "step": 95324
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.944627285003662,
      "learning_rate": 0.0003803143600782335,
      "loss": 3.0238,
      "step": 95325
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6612203121185303,
      "learning_rate": 0.00038031041882116587,
      "loss": 3.0467,
      "step": 95326
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5404503345489502,
      "learning_rate": 0.00038030647754916695,
      "loss": 3.1703,
      "step": 95327
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5177401304244995,
      "learning_rate": 0.00038030253626223764,
      "loss": 3.031,
      "step": 95328
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8662680387496948,
      "learning_rate": 0.0003802985949603785,
      "loss": 2.8216,
      "step": 95329
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8902796506881714,
      "learning_rate": 0.0003802946536435903,
      "loss": 2.9834,
      "step": 95330
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7793933153152466,
      "learning_rate": 0.00038029071231187385,
      "loss": 2.9578,
      "step": 95331
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1418802738189697,
      "learning_rate": 0.0003802867709652299,
      "loss": 2.8817,
      "step": 95332
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2634220123291016,
      "learning_rate": 0.00038028282960365906,
      "loss": 3.1246,
      "step": 95333
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6061475276947021,
      "learning_rate": 0.00038027888822716213,
      "loss": 2.8975,
      "step": 95334
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6468300819396973,
      "learning_rate": 0.0003802749468357398,
      "loss": 2.9951,
      "step": 95335
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1292269229888916,
      "learning_rate": 0.00038027100542939283,
      "loss": 2.8411,
      "step": 95336
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7898588180541992,
      "learning_rate": 0.0003802670640081221,
      "loss": 3.0369,
      "step": 95337
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.417271137237549,
      "learning_rate": 0.0003802631225719281,
      "loss": 3.2218,
      "step": 95338
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1250007152557373,
      "learning_rate": 0.00038025918112081174,
      "loss": 3.0677,
      "step": 95339
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2983808517456055,
      "learning_rate": 0.00038025523965477364,
      "loss": 2.9917,
      "step": 95340
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.942962646484375,
      "learning_rate": 0.0003802512981738146,
      "loss": 3.047,
      "step": 95341
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7461053133010864,
      "learning_rate": 0.00038024735667793524,
      "loss": 2.8106,
      "step": 95342
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.815850853919983,
      "learning_rate": 0.00038024341516713645,
      "loss": 2.9612,
      "step": 95343
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2716267108917236,
      "learning_rate": 0.00038023947364141894,
      "loss": 3.0801,
      "step": 95344
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0560057163238525,
      "learning_rate": 0.00038023553210078334,
      "loss": 2.9433,
      "step": 95345
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7365208864212036,
      "learning_rate": 0.00038023159054523047,
      "loss": 2.9643,
      "step": 95346
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.567793846130371,
      "learning_rate": 0.000380227648974761,
      "loss": 3.1632,
      "step": 95347
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.747048020362854,
      "learning_rate": 0.0003802237073893757,
      "loss": 3.0369,
      "step": 95348
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1202235221862793,
      "learning_rate": 0.00038021976578907527,
      "loss": 2.9352,
      "step": 95349
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1087756156921387,
      "learning_rate": 0.0003802158241738605,
      "loss": 2.9962,
      "step": 95350
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7380015850067139,
      "learning_rate": 0.0003802118825437321,
      "loss": 3.1439,
      "step": 95351
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6429643630981445,
      "learning_rate": 0.0003802079408986908,
      "loss": 3.0648,
      "step": 95352
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6859633922576904,
      "learning_rate": 0.0003802039992387373,
      "loss": 3.0244,
      "step": 95353
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.347330331802368,
      "learning_rate": 0.00038020005756387237,
      "loss": 3.1026,
      "step": 95354
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.001140832901001,
      "learning_rate": 0.00038019611587409674,
      "loss": 2.9472,
      "step": 95355
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.624211072921753,
      "learning_rate": 0.0003801921741694112,
      "loss": 3.1557,
      "step": 95356
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6030175685882568,
      "learning_rate": 0.0003801882324498164,
      "loss": 2.9604,
      "step": 95357
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.376008987426758,
      "learning_rate": 0.00038018429071531303,
      "loss": 3.0904,
      "step": 95358
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.825482964515686,
      "learning_rate": 0.0003801803489659019,
      "loss": 3.1308,
      "step": 95359
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6894620656967163,
      "learning_rate": 0.00038017640720158384,
      "loss": 2.8468,
      "step": 95360
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.723489761352539,
      "learning_rate": 0.0003801724654223593,
      "loss": 2.8685,
      "step": 95361
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1707046031951904,
      "learning_rate": 0.0003801685236282293,
      "loss": 3.0079,
      "step": 95362
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8627395629882812,
      "learning_rate": 0.00038016458181919444,
      "loss": 3.216,
      "step": 95363
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7750903367996216,
      "learning_rate": 0.00038016063999525546,
      "loss": 2.9446,
      "step": 95364
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6267883777618408,
      "learning_rate": 0.00038015669815641314,
      "loss": 2.9943,
      "step": 95365
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7731629610061646,
      "learning_rate": 0.00038015275630266804,
      "loss": 3.0426,
      "step": 95366
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8110475540161133,
      "learning_rate": 0.0003801488144340212,
      "loss": 2.9088,
      "step": 95367
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.3686823844909668,
      "learning_rate": 0.0003801448725504731,
      "loss": 3.0835,
      "step": 95368
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8463311195373535,
      "learning_rate": 0.0003801409306520246,
      "loss": 3.1772,
      "step": 95369
    },
    {
      "epoch": 1.24,
      "grad_norm": 4.079805374145508,
      "learning_rate": 0.0003801369887386763,
      "loss": 2.6858,
      "step": 95370
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.22944974899292,
      "learning_rate": 0.00038013304681042907,
      "loss": 3.0779,
      "step": 95371
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8155994415283203,
      "learning_rate": 0.00038012910486728363,
      "loss": 2.8825,
      "step": 95372
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.5630104541778564,
      "learning_rate": 0.00038012516290924066,
      "loss": 2.938,
      "step": 95373
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5160539150238037,
      "learning_rate": 0.0003801212209363009,
      "loss": 3.0875,
      "step": 95374
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.6457786560058594,
      "learning_rate": 0.00038011727894846504,
      "loss": 3.0967,
      "step": 95375
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.6700196266174316,
      "learning_rate": 0.000380113336945734,
      "loss": 2.9259,
      "step": 95376
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6921265125274658,
      "learning_rate": 0.0003801093949281083,
      "loss": 2.7682,
      "step": 95377
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9787046909332275,
      "learning_rate": 0.00038010545289558865,
      "loss": 3.1088,
      "step": 95378
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.600825071334839,
      "learning_rate": 0.00038010151084817603,
      "loss": 3.2536,
      "step": 95379
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.5190112590789795,
      "learning_rate": 0.00038009756878587097,
      "loss": 2.9874,
      "step": 95380
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5854767560958862,
      "learning_rate": 0.00038009362670867424,
      "loss": 3.1714,
      "step": 95381
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9112441539764404,
      "learning_rate": 0.0003800896846165867,
      "loss": 2.976,
      "step": 95382
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5212010145187378,
      "learning_rate": 0.0003800857425096088,
      "loss": 3.0702,
      "step": 95383
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9223177433013916,
      "learning_rate": 0.00038008180038774165,
      "loss": 3.2336,
      "step": 95384
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8171061277389526,
      "learning_rate": 0.0003800778582509856,
      "loss": 3.25,
      "step": 95385
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.75272536277771,
      "learning_rate": 0.00038007391609934165,
      "loss": 2.9871,
      "step": 95386
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.5052330493927,
      "learning_rate": 0.00038006997393281045,
      "loss": 3.0533,
      "step": 95387
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.084449291229248,
      "learning_rate": 0.0003800660317513927,
      "loss": 3.1177,
      "step": 95388
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.608918309211731,
      "learning_rate": 0.0003800620895550892,
      "loss": 3.1015,
      "step": 95389
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.718929648399353,
      "learning_rate": 0.0003800581473439006,
      "loss": 3.0478,
      "step": 95390
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9581797122955322,
      "learning_rate": 0.0003800542051178277,
      "loss": 3.2265,
      "step": 95391
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.0400843620300293,
      "learning_rate": 0.0003800502628768713,
      "loss": 3.1709,
      "step": 95392
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0636754035949707,
      "learning_rate": 0.000380046320621032,
      "loss": 3.0721,
      "step": 95393
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1921210289001465,
      "learning_rate": 0.00038004237835031053,
      "loss": 3.0082,
      "step": 95394
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.848677396774292,
      "learning_rate": 0.0003800384360647077,
      "loss": 3.1541,
      "step": 95395
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.988188624382019,
      "learning_rate": 0.00038003449376422413,
      "loss": 2.786,
      "step": 95396
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7826553583145142,
      "learning_rate": 0.0003800305514488607,
      "loss": 3.1797,
      "step": 95397
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2940900325775146,
      "learning_rate": 0.0003800266091186181,
      "loss": 3.1952,
      "step": 95398
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.370234727859497,
      "learning_rate": 0.000380022666773497,
      "loss": 2.9287,
      "step": 95399
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6673932075500488,
      "learning_rate": 0.0003800187244134982,
      "loss": 2.8864,
      "step": 95400
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7696374654769897,
      "learning_rate": 0.0003800147820386224,
      "loss": 2.8495,
      "step": 95401
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.40636944770813,
      "learning_rate": 0.00038001083964887047,
      "loss": 3.2839,
      "step": 95402
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.23650860786438,
      "learning_rate": 0.00038000689724424283,
      "loss": 2.9406,
      "step": 95403
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7170143127441406,
      "learning_rate": 0.0003800029548247405,
      "loss": 2.9752,
      "step": 95404
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.402190923690796,
      "learning_rate": 0.000379999012390364,
      "loss": 3.0575,
      "step": 95405
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.3887205123901367,
      "learning_rate": 0.00037999506994111426,
      "loss": 3.1071,
      "step": 95406
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0273876190185547,
      "learning_rate": 0.000379991127476992,
      "loss": 2.9525,
      "step": 95407
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0086331367492676,
      "learning_rate": 0.0003799871849979977,
      "loss": 3.1706,
      "step": 95408
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.980589509010315,
      "learning_rate": 0.0003799832425041324,
      "loss": 2.9658,
      "step": 95409
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1566197872161865,
      "learning_rate": 0.00037997929999539676,
      "loss": 3.066,
      "step": 95410
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9805001020431519,
      "learning_rate": 0.00037997535747179127,
      "loss": 3.061,
      "step": 95411
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2953338623046875,
      "learning_rate": 0.000379971414933317,
      "loss": 2.8286,
      "step": 95412
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5655025243759155,
      "learning_rate": 0.00037996747237997454,
      "loss": 3.0173,
      "step": 95413
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.6103553771972656,
      "learning_rate": 0.0003799635298117645,
      "loss": 3.0567,
      "step": 95414
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.7869808673858643,
      "learning_rate": 0.00037995958722868774,
      "loss": 2.9553,
      "step": 95415
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0544962882995605,
      "learning_rate": 0.0003799556446307451,
      "loss": 2.8937,
      "step": 95416
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.6989026069641113,
      "learning_rate": 0.0003799517020179371,
      "loss": 2.8971,
      "step": 95417
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6509283781051636,
      "learning_rate": 0.00037994775939026465,
      "loss": 3.1084,
      "step": 95418
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6706863641738892,
      "learning_rate": 0.00037994381674772836,
      "loss": 3.0216,
      "step": 95419
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.424999475479126,
      "learning_rate": 0.000379939874090329,
      "loss": 3.0461,
      "step": 95420
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6240483522415161,
      "learning_rate": 0.00037993593141806727,
      "loss": 2.7584,
      "step": 95421
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6764025688171387,
      "learning_rate": 0.0003799319887309441,
      "loss": 2.916,
      "step": 95422
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8666930198669434,
      "learning_rate": 0.00037992804602895986,
      "loss": 2.9447,
      "step": 95423
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.482403516769409,
      "learning_rate": 0.0003799241033121156,
      "loss": 2.8631,
      "step": 95424
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1619372367858887,
      "learning_rate": 0.00037992016058041193,
      "loss": 2.8231,
      "step": 95425
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.6195855140686035,
      "learning_rate": 0.00037991621783384954,
      "loss": 2.795,
      "step": 95426
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.2885756492614746,
      "learning_rate": 0.0003799122750724293,
      "loss": 2.9898,
      "step": 95427
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7024431228637695,
      "learning_rate": 0.0003799083322961518,
      "loss": 3.0842,
      "step": 95428
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1122679710388184,
      "learning_rate": 0.00037990438950501784,
      "loss": 2.9599,
      "step": 95429
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.6174979209899902,
      "learning_rate": 0.0003799004466990281,
      "loss": 3.2305,
      "step": 95430
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1422572135925293,
      "learning_rate": 0.0003798965038781835,
      "loss": 3.1793,
      "step": 95431
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8427560329437256,
      "learning_rate": 0.0003798925610424845,
      "loss": 2.9517,
      "step": 95432
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.1876394748687744,
      "learning_rate": 0.000379888618191932,
      "loss": 2.9717,
      "step": 95433
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0845465660095215,
      "learning_rate": 0.0003798846753265268,
      "loss": 3.1257,
      "step": 95434
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.205563545227051,
      "learning_rate": 0.0003798807324462694,
      "loss": 3.3066,
      "step": 95435
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2839832305908203,
      "learning_rate": 0.00037987678955116064,
      "loss": 3.0945,
      "step": 95436
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9830065965652466,
      "learning_rate": 0.0003798728466412014,
      "loss": 2.8877,
      "step": 95437
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.8392701148986816,
      "learning_rate": 0.0003798689037163922,
      "loss": 3.0766,
      "step": 95438
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.5818817615509033,
      "learning_rate": 0.0003798649607767339,
      "loss": 3.0782,
      "step": 95439
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6527726650238037,
      "learning_rate": 0.00037986101782222715,
      "loss": 2.933,
      "step": 95440
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6304303407669067,
      "learning_rate": 0.00037985707485287275,
      "loss": 2.9131,
      "step": 95441
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.933143377304077,
      "learning_rate": 0.0003798531318686715,
      "loss": 2.8096,
      "step": 95442
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.5168793201446533,
      "learning_rate": 0.0003798491888696239,
      "loss": 3.3525,
      "step": 95443
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2452964782714844,
      "learning_rate": 0.000379845245855731,
      "loss": 3.0222,
      "step": 95444
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9322749376296997,
      "learning_rate": 0.0003798413028269932,
      "loss": 2.9497,
      "step": 95445
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.931165099143982,
      "learning_rate": 0.0003798373597834114,
      "loss": 3.0389,
      "step": 95446
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.5142972469329834,
      "learning_rate": 0.00037983341672498643,
      "loss": 3.0056,
      "step": 95447
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.709790587425232,
      "learning_rate": 0.00037982947365171896,
      "loss": 2.9056,
      "step": 95448
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7335578203201294,
      "learning_rate": 0.0003798255305636095,
      "loss": 2.8663,
      "step": 95449
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.647371768951416,
      "learning_rate": 0.0003798215874606591,
      "loss": 3.114,
      "step": 95450
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.081549882888794,
      "learning_rate": 0.0003798176443428683,
      "loss": 2.9631,
      "step": 95451
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9197337627410889,
      "learning_rate": 0.00037981370121023794,
      "loss": 2.7875,
      "step": 95452
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7653125524520874,
      "learning_rate": 0.0003798097580627687,
      "loss": 2.872,
      "step": 95453
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7425601482391357,
      "learning_rate": 0.0003798058149004612,
      "loss": 3.0293,
      "step": 95454
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.873433232307434,
      "learning_rate": 0.0003798018717233165,
      "loss": 2.9693,
      "step": 95455
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.4600104093551636,
      "learning_rate": 0.000379797928531335,
      "loss": 3.162,
      "step": 95456
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6840190887451172,
      "learning_rate": 0.0003797939853245176,
      "loss": 3.1386,
      "step": 95457
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.810316562652588,
      "learning_rate": 0.00037979004210286494,
      "loss": 3.2365,
      "step": 95458
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.0883028507232666,
      "learning_rate": 0.0003797860988663779,
      "loss": 3.1523,
      "step": 95459
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0666651725769043,
      "learning_rate": 0.00037978215561505703,
      "loss": 2.9935,
      "step": 95460
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.869429349899292,
      "learning_rate": 0.00037977821234890316,
      "loss": 3.1033,
      "step": 95461
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6434531211853027,
      "learning_rate": 0.0003797742690679171,
      "loss": 2.8984,
      "step": 95462
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.213366746902466,
      "learning_rate": 0.0003797703257720994,
      "loss": 2.9606,
      "step": 95463
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7835804224014282,
      "learning_rate": 0.00037976638246145093,
      "loss": 2.9489,
      "step": 95464
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.5511772632598877,
      "learning_rate": 0.0003797624391359724,
      "loss": 3.0532,
      "step": 95465
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6095550060272217,
      "learning_rate": 0.0003797584957956645,
      "loss": 2.968,
      "step": 95466
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0945374965667725,
      "learning_rate": 0.00037975455244052794,
      "loss": 2.9089,
      "step": 95467
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.152383327484131,
      "learning_rate": 0.0003797506090705636,
      "loss": 3.0505,
      "step": 95468
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.911685824394226,
      "learning_rate": 0.00037974666568577206,
      "loss": 2.9136,
      "step": 95469
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.4825764894485474,
      "learning_rate": 0.0003797427222861541,
      "loss": 2.9676,
      "step": 95470
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.552616596221924,
      "learning_rate": 0.0003797387788717106,
      "loss": 3.0336,
      "step": 95471
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.541337251663208,
      "learning_rate": 0.00037973483544244196,
      "loss": 3.1517,
      "step": 95472
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.721273422241211,
      "learning_rate": 0.00037973089199834915,
      "loss": 2.9579,
      "step": 95473
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.156569719314575,
      "learning_rate": 0.00037972694853943297,
      "loss": 3.1882,
      "step": 95474
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.885887384414673,
      "learning_rate": 0.000379723005065694,
      "loss": 3.1282,
      "step": 95475
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.604419469833374,
      "learning_rate": 0.000379719061577133,
      "loss": 2.9945,
      "step": 95476
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.1460232734680176,
      "learning_rate": 0.00037971511807375073,
      "loss": 3.0349,
      "step": 95477
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5310653448104858,
      "learning_rate": 0.0003797111745555479,
      "loss": 2.7676,
      "step": 95478
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7656587362289429,
      "learning_rate": 0.0003797072310225252,
      "loss": 2.8652,
      "step": 95479
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.710252523422241,
      "learning_rate": 0.00037970328747468355,
      "loss": 3.0713,
      "step": 95480
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.7053775787353516,
      "learning_rate": 0.0003796993439120234,
      "loss": 2.8462,
      "step": 95481
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7171002626419067,
      "learning_rate": 0.0003796954003345457,
      "loss": 3.0437,
      "step": 95482
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7558096647262573,
      "learning_rate": 0.00037969145674225126,
      "loss": 2.7853,
      "step": 95483
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.8315560817718506,
      "learning_rate": 0.0003796875131351405,
      "loss": 3.0356,
      "step": 95484
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.003922462463379,
      "learning_rate": 0.0003796835695132144,
      "loss": 3.1143,
      "step": 95485
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5725789070129395,
      "learning_rate": 0.00037967962587647365,
      "loss": 2.7048,
      "step": 95486
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.3143441677093506,
      "learning_rate": 0.0003796756822249189,
      "loss": 3.0183,
      "step": 95487
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.29175066947937,
      "learning_rate": 0.0003796717385585509,
      "loss": 3.1329,
      "step": 95488
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.4468886852264404,
      "learning_rate": 0.0003796677948773705,
      "loss": 3.2894,
      "step": 95489
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1418511867523193,
      "learning_rate": 0.00037966385118137834,
      "loss": 3.1884,
      "step": 95490
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.639765739440918,
      "learning_rate": 0.00037965990747057505,
      "loss": 2.8987,
      "step": 95491
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.648949384689331,
      "learning_rate": 0.00037965596374496167,
      "loss": 2.779,
      "step": 95492
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1988985538482666,
      "learning_rate": 0.0003796520200045386,
      "loss": 2.8787,
      "step": 95493
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8570884466171265,
      "learning_rate": 0.0003796480762493068,
      "loss": 2.8359,
      "step": 95494
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7075296640396118,
      "learning_rate": 0.0003796441324792669,
      "loss": 3.1469,
      "step": 95495
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8561452627182007,
      "learning_rate": 0.0003796401886944196,
      "loss": 2.9633,
      "step": 95496
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5991076231002808,
      "learning_rate": 0.0003796362448947657,
      "loss": 3.0427,
      "step": 95497
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6860830783843994,
      "learning_rate": 0.000379632301080306,
      "loss": 2.9675,
      "step": 95498
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0545084476470947,
      "learning_rate": 0.00037962835725104105,
      "loss": 2.9621,
      "step": 95499
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6978087425231934,
      "learning_rate": 0.00037962441340697175,
      "loss": 2.7603,
      "step": 95500
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.708768367767334,
      "learning_rate": 0.0003796204695480988,
      "loss": 2.8686,
      "step": 95501
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.468566417694092,
      "learning_rate": 0.0003796165256744228,
      "loss": 2.9678,
      "step": 95502
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9970885515213013,
      "learning_rate": 0.0003796125817859446,
      "loss": 3.1446,
      "step": 95503
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.338773727416992,
      "learning_rate": 0.000379608637882665,
      "loss": 2.9126,
      "step": 95504
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7902297973632812,
      "learning_rate": 0.0003796046939645846,
      "loss": 3.0064,
      "step": 95505
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7170581817626953,
      "learning_rate": 0.00037960075003170413,
      "loss": 2.8522,
      "step": 95506
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8219398260116577,
      "learning_rate": 0.0003795968060840245,
      "loss": 3.0819,
      "step": 95507
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.39947247505188,
      "learning_rate": 0.00037959286212154624,
      "loss": 3.2306,
      "step": 95508
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7769464254379272,
      "learning_rate": 0.0003795889181442701,
      "loss": 3.1541,
      "step": 95509
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.755834937095642,
      "learning_rate": 0.0003795849741521971,
      "loss": 3.0832,
      "step": 95510
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6839874982833862,
      "learning_rate": 0.00037958103014532757,
      "loss": 2.9791,
      "step": 95511
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5952059030532837,
      "learning_rate": 0.00037957708612366243,
      "loss": 2.7648,
      "step": 95512
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6256463527679443,
      "learning_rate": 0.0003795731420872025,
      "loss": 3.1021,
      "step": 95513
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6739181280136108,
      "learning_rate": 0.0003795691980359484,
      "loss": 2.8786,
      "step": 95514
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.837842345237732,
      "learning_rate": 0.0003795652539699007,
      "loss": 3.3373,
      "step": 95515
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.760299801826477,
      "learning_rate": 0.00037956130988906053,
      "loss": 2.8716,
      "step": 95516
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8055256605148315,
      "learning_rate": 0.0003795573657934283,
      "loss": 3.0349,
      "step": 95517
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.6608455181121826,
      "learning_rate": 0.00037955342168300495,
      "loss": 3.0633,
      "step": 95518
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8616604804992676,
      "learning_rate": 0.000379549477557791,
      "loss": 3.0655,
      "step": 95519
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.121788740158081,
      "learning_rate": 0.00037954553341778744,
      "loss": 3.1903,
      "step": 95520
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9600163698196411,
      "learning_rate": 0.0003795415892629948,
      "loss": 2.7407,
      "step": 95521
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7675939798355103,
      "learning_rate": 0.00037953764509341385,
      "loss": 2.777,
      "step": 95522
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.786802887916565,
      "learning_rate": 0.0003795337009090454,
      "loss": 3.2972,
      "step": 95523
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6451119184494019,
      "learning_rate": 0.0003795297567098901,
      "loss": 3.0472,
      "step": 95524
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.017399311065674,
      "learning_rate": 0.0003795258124959487,
      "loss": 2.9627,
      "step": 95525
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7565808296203613,
      "learning_rate": 0.00037952186826722197,
      "loss": 2.948,
      "step": 95526
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9199246168136597,
      "learning_rate": 0.0003795179240237106,
      "loss": 2.9355,
      "step": 95527
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.977447271347046,
      "learning_rate": 0.00037951397976541533,
      "loss": 2.8614,
      "step": 95528
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9777401685714722,
      "learning_rate": 0.000379510035492337,
      "loss": 3.2462,
      "step": 95529
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.595984935760498,
      "learning_rate": 0.0003795060912044762,
      "loss": 3.1802,
      "step": 95530
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8422154188156128,
      "learning_rate": 0.00037950214690183373,
      "loss": 2.8017,
      "step": 95531
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0486562252044678,
      "learning_rate": 0.0003794982025844102,
      "loss": 2.7875,
      "step": 95532
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9491915702819824,
      "learning_rate": 0.00037949425825220664,
      "loss": 2.9121,
      "step": 95533
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7476205825805664,
      "learning_rate": 0.0003794903139052235,
      "loss": 2.971,
      "step": 95534
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6508444547653198,
      "learning_rate": 0.0003794863695434616,
      "loss": 2.9982,
      "step": 95535
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8138680458068848,
      "learning_rate": 0.00037948242516692174,
      "loss": 2.7186,
      "step": 95536
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8660236597061157,
      "learning_rate": 0.00037947848077560454,
      "loss": 2.9787,
      "step": 95537
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9466136693954468,
      "learning_rate": 0.0003794745363695108,
      "loss": 3.0341,
      "step": 95538
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8047776222229004,
      "learning_rate": 0.0003794705919486412,
      "loss": 3.1038,
      "step": 95539
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9105526208877563,
      "learning_rate": 0.0003794666475129967,
      "loss": 2.9284,
      "step": 95540
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7563366889953613,
      "learning_rate": 0.0003794627030625776,
      "loss": 3.0871,
      "step": 95541
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.720892071723938,
      "learning_rate": 0.00037945875859738505,
      "loss": 3.1195,
      "step": 95542
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1567559242248535,
      "learning_rate": 0.00037945481411741957,
      "loss": 2.9215,
      "step": 95543
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.975621223449707,
      "learning_rate": 0.00037945086962268195,
      "loss": 2.8917,
      "step": 95544
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7742574214935303,
      "learning_rate": 0.00037944692511317294,
      "loss": 3.1209,
      "step": 95545
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6595377922058105,
      "learning_rate": 0.0003794429805888932,
      "loss": 2.962,
      "step": 95546
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.642750859260559,
      "learning_rate": 0.00037943903604984353,
      "loss": 2.7013,
      "step": 95547
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.3277347087860107,
      "learning_rate": 0.0003794350914960246,
      "loss": 2.8854,
      "step": 95548
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0697295665740967,
      "learning_rate": 0.00037943114692743724,
      "loss": 2.9067,
      "step": 95549
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.805518388748169,
      "learning_rate": 0.0003794272023440821,
      "loss": 2.9819,
      "step": 95550
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.048750877380371,
      "learning_rate": 0.0003794232577459599,
      "loss": 2.8588,
      "step": 95551
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9863693714141846,
      "learning_rate": 0.0003794193131330715,
      "loss": 3.2932,
      "step": 95552
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1669139862060547,
      "learning_rate": 0.00037941536850541756,
      "loss": 3.1383,
      "step": 95553
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.7557291984558105,
      "learning_rate": 0.0003794114238629988,
      "loss": 2.7979,
      "step": 95554
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.119882583618164,
      "learning_rate": 0.0003794074792058159,
      "loss": 2.7901,
      "step": 95555
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.9565820693969727,
      "learning_rate": 0.00037940353453386975,
      "loss": 3.2185,
      "step": 95556
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8263418674468994,
      "learning_rate": 0.00037939958984716085,
      "loss": 3.0115,
      "step": 95557
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.5341076850891113,
      "learning_rate": 0.0003793956451456901,
      "loss": 2.8137,
      "step": 95558
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.554894208908081,
      "learning_rate": 0.0003793917004294583,
      "loss": 3.0524,
      "step": 95559
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.3459267616271973,
      "learning_rate": 0.000379387755698466,
      "loss": 2.9518,
      "step": 95560
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8215419054031372,
      "learning_rate": 0.00037938381095271395,
      "loss": 2.8437,
      "step": 95561
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.955627918243408,
      "learning_rate": 0.0003793798661922031,
      "loss": 3.0345,
      "step": 95562
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.784308910369873,
      "learning_rate": 0.00037937592141693396,
      "loss": 3.0949,
      "step": 95563
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8900957107543945,
      "learning_rate": 0.0003793719766269073,
      "loss": 3.0305,
      "step": 95564
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5931000709533691,
      "learning_rate": 0.00037936803182212403,
      "loss": 2.9522,
      "step": 95565
    },
    {
      "epoch": 1.24,
      "grad_norm": 4.123286724090576,
      "learning_rate": 0.0003793640870025846,
      "loss": 3.1815,
      "step": 95566
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.063380002975464,
      "learning_rate": 0.00037936014216828997,
      "loss": 2.9921,
      "step": 95567
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.4675705432891846,
      "learning_rate": 0.0003793561973192407,
      "loss": 2.8286,
      "step": 95568
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.7177255153656006,
      "learning_rate": 0.0003793522524554377,
      "loss": 3.0081,
      "step": 95569
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.700613021850586,
      "learning_rate": 0.00037934830757688155,
      "loss": 2.706,
      "step": 95570
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.7508349418640137,
      "learning_rate": 0.00037934436268357316,
      "loss": 3.1695,
      "step": 95571
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1196961402893066,
      "learning_rate": 0.00037934041777551306,
      "loss": 3.1683,
      "step": 95572
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.0421841144561768,
      "learning_rate": 0.0003793364728527021,
      "loss": 3.0626,
      "step": 95573
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.049715042114258,
      "learning_rate": 0.00037933252791514104,
      "loss": 2.6913,
      "step": 95574
    },
    {
      "epoch": 1.24,
      "grad_norm": 4.631803512573242,
      "learning_rate": 0.0003793285829628305,
      "loss": 2.9547,
      "step": 95575
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.0172536373138428,
      "learning_rate": 0.00037932463799577134,
      "loss": 2.9255,
      "step": 95576
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.7839738130569458,
      "learning_rate": 0.00037932069301396424,
      "loss": 2.8997,
      "step": 95577
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.2998578548431396,
      "learning_rate": 0.0003793167480174099,
      "loss": 3.0313,
      "step": 95578
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.9682724475860596,
      "learning_rate": 0.00037931280300610896,
      "loss": 3.0184,
      "step": 95579
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.2687594890594482,
      "learning_rate": 0.0003793088579800624,
      "loss": 3.0345,
      "step": 95580
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.3904495239257812,
      "learning_rate": 0.00037930491293927085,
      "loss": 2.9295,
      "step": 95581
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.937567949295044,
      "learning_rate": 0.0003793009678837349,
      "loss": 2.9882,
      "step": 95582
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.9798784255981445,
      "learning_rate": 0.0003792970228134555,
      "loss": 2.7727,
      "step": 95583
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.1951000690460205,
      "learning_rate": 0.0003792930777284333,
      "loss": 2.9059,
      "step": 95584
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.5146992206573486,
      "learning_rate": 0.00037928913262866895,
      "loss": 3.2502,
      "step": 95585
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6263612508773804,
      "learning_rate": 0.00037928518751416325,
      "loss": 3.0742,
      "step": 95586
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0583877563476562,
      "learning_rate": 0.000379281242384917,
      "loss": 3.0154,
      "step": 95587
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.7183170318603516,
      "learning_rate": 0.0003792772972409309,
      "loss": 3.0919,
      "step": 95588
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.038276433944702,
      "learning_rate": 0.0003792733520822055,
      "loss": 2.8375,
      "step": 95589
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.717280626296997,
      "learning_rate": 0.00037926940690874184,
      "loss": 2.7808,
      "step": 95590
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.8658108711242676,
      "learning_rate": 0.0003792654617205405,
      "loss": 3.1274,
      "step": 95591
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.032284736633301,
      "learning_rate": 0.00037926151651760206,
      "loss": 3.2114,
      "step": 95592
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.884947657585144,
      "learning_rate": 0.0003792575712999276,
      "loss": 2.9391,
      "step": 95593
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9842604398727417,
      "learning_rate": 0.00037925362606751757,
      "loss": 2.8752,
      "step": 95594
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.577932357788086,
      "learning_rate": 0.0003792496808203727,
      "loss": 2.9126,
      "step": 95595
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.873313546180725,
      "learning_rate": 0.000379245735558494,
      "loss": 3.0829,
      "step": 95596
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.0540785789489746,
      "learning_rate": 0.0003792417902818819,
      "loss": 2.9241,
      "step": 95597
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9493217468261719,
      "learning_rate": 0.00037923784499053727,
      "loss": 3.0173,
      "step": 95598
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.524502754211426,
      "learning_rate": 0.0003792338996844609,
      "loss": 2.8935,
      "step": 95599
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8357995748519897,
      "learning_rate": 0.0003792299543636534,
      "loss": 3.2158,
      "step": 95600
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5675002336502075,
      "learning_rate": 0.0003792260090281155,
      "loss": 2.9396,
      "step": 95601
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6958776712417603,
      "learning_rate": 0.0003792220636778481,
      "loss": 3.0711,
      "step": 95602
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.371659517288208,
      "learning_rate": 0.0003792181183128518,
      "loss": 3.1616,
      "step": 95603
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.740921974182129,
      "learning_rate": 0.0003792141729331273,
      "loss": 2.9943,
      "step": 95604
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5501811504364014,
      "learning_rate": 0.00037921022753867545,
      "loss": 2.8958,
      "step": 95605
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.3167262077331543,
      "learning_rate": 0.00037920628212949684,
      "loss": 2.9325,
      "step": 95606
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.774078845977783,
      "learning_rate": 0.0003792023367055923,
      "loss": 3.2014,
      "step": 95607
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.189910650253296,
      "learning_rate": 0.00037919839126696265,
      "loss": 2.9984,
      "step": 95608
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.5417836904525757,
      "learning_rate": 0.00037919444581360843,
      "loss": 2.7574,
      "step": 95609
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.4412283897399902,
      "learning_rate": 0.00037919050034553053,
      "loss": 2.9034,
      "step": 95610
    },
    {
      "epoch": 1.24,
      "grad_norm": 3.0219297409057617,
      "learning_rate": 0.0003791865548627296,
      "loss": 3.1615,
      "step": 95611
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.6839603185653687,
      "learning_rate": 0.00037918260936520634,
      "loss": 3.1149,
      "step": 95612
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.8610637187957764,
      "learning_rate": 0.00037917866385296157,
      "loss": 3.01,
      "step": 95613
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.748643398284912,
      "learning_rate": 0.0003791747183259961,
      "loss": 3.1572,
      "step": 95614
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.7399044036865234,
      "learning_rate": 0.0003791707727843104,
      "loss": 2.9227,
      "step": 95615
    },
    {
      "epoch": 1.24,
      "grad_norm": 1.9789990186691284,
      "learning_rate": 0.0003791668272279054,
      "loss": 2.7141,
      "step": 95616
    },
    {
      "epoch": 1.24,
      "grad_norm": 2.1331608295440674,
      "learning_rate": 0.0003791628816567819,
      "loss": 2.706,
      "step": 95617
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.3736255168914795,
      "learning_rate": 0.00037915893607094037,
      "loss": 3.1881,
      "step": 95618
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0092668533325195,
      "learning_rate": 0.0003791549904703818,
      "loss": 2.8476,
      "step": 95619
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9751368761062622,
      "learning_rate": 0.00037915104485510676,
      "loss": 3.0834,
      "step": 95620
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9040496349334717,
      "learning_rate": 0.00037914709922511606,
      "loss": 3.3378,
      "step": 95621
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2646193504333496,
      "learning_rate": 0.00037914315358041046,
      "loss": 3.0962,
      "step": 95622
    },
    {
      "epoch": 1.25,
      "grad_norm": 5.0919599533081055,
      "learning_rate": 0.0003791392079209906,
      "loss": 2.9964,
      "step": 95623
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2233967781066895,
      "learning_rate": 0.0003791352622468573,
      "loss": 2.9536,
      "step": 95624
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.9703142642974854,
      "learning_rate": 0.00037913131655801125,
      "loss": 3.0085,
      "step": 95625
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7883445024490356,
      "learning_rate": 0.00037912737085445326,
      "loss": 3.0927,
      "step": 95626
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8008227348327637,
      "learning_rate": 0.00037912342513618383,
      "loss": 2.9953,
      "step": 95627
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2383062839508057,
      "learning_rate": 0.00037911947940320406,
      "loss": 3.0358,
      "step": 95628
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.416412830352783,
      "learning_rate": 0.0003791155336555144,
      "loss": 3.1873,
      "step": 95629
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2351691722869873,
      "learning_rate": 0.00037911158789311565,
      "loss": 2.7941,
      "step": 95630
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.588632583618164,
      "learning_rate": 0.00037910764211600853,
      "loss": 3.0772,
      "step": 95631
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0112593173980713,
      "learning_rate": 0.0003791036963241939,
      "loss": 3.306,
      "step": 95632
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.3607726097106934,
      "learning_rate": 0.0003790997505176723,
      "loss": 3.0459,
      "step": 95633
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.3915035724639893,
      "learning_rate": 0.00037909580469644465,
      "loss": 2.9089,
      "step": 95634
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7056705951690674,
      "learning_rate": 0.00037909185886051165,
      "loss": 3.0044,
      "step": 95635
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2093441486358643,
      "learning_rate": 0.0003790879130098738,
      "loss": 2.9969,
      "step": 95636
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9134974479675293,
      "learning_rate": 0.0003790839671445321,
      "loss": 2.9333,
      "step": 95637
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.68950617313385,
      "learning_rate": 0.0003790800212644873,
      "loss": 3.1113,
      "step": 95638
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.628198266029358,
      "learning_rate": 0.00037907607536973986,
      "loss": 2.664,
      "step": 95639
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.7407219409942627,
      "learning_rate": 0.00037907212946029073,
      "loss": 3.093,
      "step": 95640
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.3336715698242188,
      "learning_rate": 0.00037906818353614075,
      "loss": 3.1096,
      "step": 95641
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9481130838394165,
      "learning_rate": 0.00037906423759729034,
      "loss": 2.8519,
      "step": 95642
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.667960286140442,
      "learning_rate": 0.00037906029164374043,
      "loss": 3.0285,
      "step": 95643
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.774917721748352,
      "learning_rate": 0.0003790563456754917,
      "loss": 2.7727,
      "step": 95644
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5537670850753784,
      "learning_rate": 0.000379052399692545,
      "loss": 3.0716,
      "step": 95645
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8692646026611328,
      "learning_rate": 0.0003790484536949008,
      "loss": 3.0776,
      "step": 95646
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6007039546966553,
      "learning_rate": 0.0003790445076825602,
      "loss": 2.9459,
      "step": 95647
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0825655460357666,
      "learning_rate": 0.0003790405616555236,
      "loss": 3.3322,
      "step": 95648
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9801653623580933,
      "learning_rate": 0.0003790366156137919,
      "loss": 2.7759,
      "step": 95649
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9128154516220093,
      "learning_rate": 0.00037903266955736584,
      "loss": 2.938,
      "step": 95650
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5864962339401245,
      "learning_rate": 0.00037902872348624607,
      "loss": 3.0404,
      "step": 95651
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7474381923675537,
      "learning_rate": 0.0003790247774004333,
      "loss": 2.9709,
      "step": 95652
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6733776330947876,
      "learning_rate": 0.0003790208312999284,
      "loss": 3.3125,
      "step": 95653
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.947283387184143,
      "learning_rate": 0.00037901688518473213,
      "loss": 2.8756,
      "step": 95654
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.986402153968811,
      "learning_rate": 0.00037901293905484503,
      "loss": 3.0101,
      "step": 95655
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.195652484893799,
      "learning_rate": 0.0003790089929102679,
      "loss": 3.2722,
      "step": 95656
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6478561162948608,
      "learning_rate": 0.00037900504675100164,
      "loss": 2.9373,
      "step": 95657
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7095392942428589,
      "learning_rate": 0.0003790011005770467,
      "loss": 2.8292,
      "step": 95658
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.257857322692871,
      "learning_rate": 0.00037899715438840403,
      "loss": 3.1529,
      "step": 95659
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.115717649459839,
      "learning_rate": 0.0003789932081850743,
      "loss": 3.1527,
      "step": 95660
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0383172035217285,
      "learning_rate": 0.0003789892619670582,
      "loss": 3.1185,
      "step": 95661
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8190828561782837,
      "learning_rate": 0.00037898531573435655,
      "loss": 3.1491,
      "step": 95662
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5876153707504272,
      "learning_rate": 0.00037898136948697007,
      "loss": 2.9256,
      "step": 95663
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5407166481018066,
      "learning_rate": 0.0003789774232248994,
      "loss": 2.8679,
      "step": 95664
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.140686273574829,
      "learning_rate": 0.00037897347694814537,
      "loss": 2.8986,
      "step": 95665
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.662623643875122,
      "learning_rate": 0.0003789695306567087,
      "loss": 2.9802,
      "step": 95666
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7303059101104736,
      "learning_rate": 0.0003789655843505901,
      "loss": 3.1543,
      "step": 95667
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9769928455352783,
      "learning_rate": 0.0003789616380297903,
      "loss": 2.9234,
      "step": 95668
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5395433902740479,
      "learning_rate": 0.00037895769169431,
      "loss": 2.9553,
      "step": 95669
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7584846019744873,
      "learning_rate": 0.00037895374534414997,
      "loss": 2.9972,
      "step": 95670
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7222346067428589,
      "learning_rate": 0.000378949798979311,
      "loss": 2.9984,
      "step": 95671
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.048758029937744,
      "learning_rate": 0.0003789458525997937,
      "loss": 2.9606,
      "step": 95672
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.539534568786621,
      "learning_rate": 0.00037894190620559894,
      "loss": 3.1194,
      "step": 95673
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9401872158050537,
      "learning_rate": 0.0003789379597967273,
      "loss": 2.9447,
      "step": 95674
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6283373832702637,
      "learning_rate": 0.0003789340133731798,
      "loss": 3.2579,
      "step": 95675
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8071885108947754,
      "learning_rate": 0.0003789300669349568,
      "loss": 2.8584,
      "step": 95676
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.094512462615967,
      "learning_rate": 0.00037892612048205925,
      "loss": 2.9469,
      "step": 95677
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8960847854614258,
      "learning_rate": 0.0003789221740144879,
      "loss": 3.0073,
      "step": 95678
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.3211605548858643,
      "learning_rate": 0.0003789182275322434,
      "loss": 2.7123,
      "step": 95679
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1947734355926514,
      "learning_rate": 0.00037891428103532646,
      "loss": 2.9133,
      "step": 95680
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.4948309659957886,
      "learning_rate": 0.0003789103345237379,
      "loss": 3.009,
      "step": 95681
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1020777225494385,
      "learning_rate": 0.0003789063879974785,
      "loss": 2.9604,
      "step": 95682
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.0658822059631348,
      "learning_rate": 0.00037890244145654875,
      "loss": 2.6887,
      "step": 95683
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.547248125076294,
      "learning_rate": 0.00037889849490094967,
      "loss": 2.8504,
      "step": 95684
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6216191053390503,
      "learning_rate": 0.0003788945483306818,
      "loss": 2.9088,
      "step": 95685
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.98784601688385,
      "learning_rate": 0.00037889060174574595,
      "loss": 3.0003,
      "step": 95686
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7113007307052612,
      "learning_rate": 0.0003788866551461429,
      "loss": 2.8895,
      "step": 95687
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9467486143112183,
      "learning_rate": 0.00037888270853187335,
      "loss": 3.0291,
      "step": 95688
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0082688331604004,
      "learning_rate": 0.00037887876190293796,
      "loss": 3.0737,
      "step": 95689
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.215442419052124,
      "learning_rate": 0.00037887481525933754,
      "loss": 3.2064,
      "step": 95690
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.634690761566162,
      "learning_rate": 0.0003788708686010728,
      "loss": 2.9741,
      "step": 95691
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.971122145652771,
      "learning_rate": 0.00037886692192814445,
      "loss": 3.0225,
      "step": 95692
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.579497814178467,
      "learning_rate": 0.00037886297524055334,
      "loss": 2.9759,
      "step": 95693
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8395978212356567,
      "learning_rate": 0.0003788590285383,
      "loss": 3.1521,
      "step": 95694
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.030622720718384,
      "learning_rate": 0.0003788550818213853,
      "loss": 2.9889,
      "step": 95695
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8806036710739136,
      "learning_rate": 0.00037885113508981003,
      "loss": 2.8778,
      "step": 95696
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5106967687606812,
      "learning_rate": 0.0003788471883435747,
      "loss": 3.0623,
      "step": 95697
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5788239240646362,
      "learning_rate": 0.00037884324158268037,
      "loss": 3.0911,
      "step": 95698
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.922457218170166,
      "learning_rate": 0.0003788392948071275,
      "loss": 3.1515,
      "step": 95699
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6946706771850586,
      "learning_rate": 0.00037883534801691693,
      "loss": 3.0314,
      "step": 95700
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5846905708312988,
      "learning_rate": 0.00037883140121204936,
      "loss": 3.1172,
      "step": 95701
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.106351852416992,
      "learning_rate": 0.00037882745439252554,
      "loss": 2.9952,
      "step": 95702
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9192181825637817,
      "learning_rate": 0.00037882350755834624,
      "loss": 2.9666,
      "step": 95703
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7351486682891846,
      "learning_rate": 0.0003788195607095121,
      "loss": 3.1738,
      "step": 95704
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7735556364059448,
      "learning_rate": 0.000378815613846024,
      "loss": 2.8122,
      "step": 95705
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6574887037277222,
      "learning_rate": 0.00037881166696788256,
      "loss": 2.833,
      "step": 95706
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.592104434967041,
      "learning_rate": 0.00037880772007508847,
      "loss": 2.8967,
      "step": 95707
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.3442912101745605,
      "learning_rate": 0.00037880377316764263,
      "loss": 3.3973,
      "step": 95708
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7106722593307495,
      "learning_rate": 0.00037879982624554565,
      "loss": 3.1663,
      "step": 95709
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.3854575157165527,
      "learning_rate": 0.00037879587930879827,
      "loss": 3.1206,
      "step": 95710
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6845974922180176,
      "learning_rate": 0.0003787919323574013,
      "loss": 3.1129,
      "step": 95711
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6954834461212158,
      "learning_rate": 0.0003787879853913554,
      "loss": 3.1518,
      "step": 95712
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6034338474273682,
      "learning_rate": 0.00037878403841066127,
      "loss": 3.1157,
      "step": 95713
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.4700818061828613,
      "learning_rate": 0.0003787800914153198,
      "loss": 2.9636,
      "step": 95714
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.4521584510803223,
      "learning_rate": 0.00037877614440533154,
      "loss": 3.1348,
      "step": 95715
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7851641178131104,
      "learning_rate": 0.00037877219738069744,
      "loss": 3.0983,
      "step": 95716
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2511682510375977,
      "learning_rate": 0.00037876825034141793,
      "loss": 2.8145,
      "step": 95717
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1789093017578125,
      "learning_rate": 0.00037876430328749404,
      "loss": 2.8045,
      "step": 95718
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7238268852233887,
      "learning_rate": 0.00037876035621892633,
      "loss": 2.9416,
      "step": 95719
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6585843563079834,
      "learning_rate": 0.00037875640913571554,
      "loss": 3.0354,
      "step": 95720
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9840507507324219,
      "learning_rate": 0.00037875246203786255,
      "loss": 3.0969,
      "step": 95721
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9951671361923218,
      "learning_rate": 0.0003787485149253679,
      "loss": 2.7261,
      "step": 95722
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.3404064178466797,
      "learning_rate": 0.0003787445677982324,
      "loss": 3.0708,
      "step": 95723
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.642176628112793,
      "learning_rate": 0.00037874062065645686,
      "loss": 2.9255,
      "step": 95724
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5787012577056885,
      "learning_rate": 0.00037873667350004194,
      "loss": 2.8244,
      "step": 95725
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6583518981933594,
      "learning_rate": 0.0003787327263289884,
      "loss": 3.1954,
      "step": 95726
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.865727186203003,
      "learning_rate": 0.00037872877914329697,
      "loss": 2.8181,
      "step": 95727
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.042346954345703,
      "learning_rate": 0.0003787248319429683,
      "loss": 3.0416,
      "step": 95728
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.9860680103302,
      "learning_rate": 0.00037872088472800324,
      "loss": 2.9862,
      "step": 95729
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.057936429977417,
      "learning_rate": 0.00037871693749840256,
      "loss": 2.7086,
      "step": 95730
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.529548168182373,
      "learning_rate": 0.0003787129902541668,
      "loss": 2.8406,
      "step": 95731
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7935600280761719,
      "learning_rate": 0.0003787090429952968,
      "loss": 2.731,
      "step": 95732
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.142643928527832,
      "learning_rate": 0.0003787050957217934,
      "loss": 2.9909,
      "step": 95733
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5915029048919678,
      "learning_rate": 0.0003787011484336572,
      "loss": 3.1352,
      "step": 95734
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8920910358428955,
      "learning_rate": 0.00037869720113088896,
      "loss": 3.1212,
      "step": 95735
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8157531023025513,
      "learning_rate": 0.0003786932538134895,
      "loss": 2.7487,
      "step": 95736
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2024295330047607,
      "learning_rate": 0.0003786893064814594,
      "loss": 2.9846,
      "step": 95737
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.007829189300537,
      "learning_rate": 0.00037868535913479945,
      "loss": 3.0491,
      "step": 95738
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.839470624923706,
      "learning_rate": 0.00037868141177351043,
      "loss": 3.1163,
      "step": 95739
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8226211071014404,
      "learning_rate": 0.00037867746439759303,
      "loss": 3.165,
      "step": 95740
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5882353782653809,
      "learning_rate": 0.000378673517007048,
      "loss": 3.1928,
      "step": 95741
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.879955530166626,
      "learning_rate": 0.0003786695696018762,
      "loss": 2.7124,
      "step": 95742
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9810762405395508,
      "learning_rate": 0.00037866562218207814,
      "loss": 2.894,
      "step": 95743
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7896010875701904,
      "learning_rate": 0.00037866167474765464,
      "loss": 3.0654,
      "step": 95744
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.752463698387146,
      "learning_rate": 0.0003786577272986065,
      "loss": 3.14,
      "step": 95745
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5464656352996826,
      "learning_rate": 0.0003786537798349343,
      "loss": 2.9538,
      "step": 95746
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5444239377975464,
      "learning_rate": 0.00037864983235663896,
      "loss": 3.2311,
      "step": 95747
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.531847596168518,
      "learning_rate": 0.00037864588486372124,
      "loss": 2.9616,
      "step": 95748
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2749438285827637,
      "learning_rate": 0.0003786419373561816,
      "loss": 2.849,
      "step": 95749
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5356336832046509,
      "learning_rate": 0.000378637989834021,
      "loss": 2.8537,
      "step": 95750
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5582324266433716,
      "learning_rate": 0.0003786340422972402,
      "loss": 3.0416,
      "step": 95751
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6081761121749878,
      "learning_rate": 0.00037863009474583967,
      "loss": 2.8995,
      "step": 95752
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.3671088218688965,
      "learning_rate": 0.00037862614717982043,
      "loss": 2.9437,
      "step": 95753
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0739591121673584,
      "learning_rate": 0.0003786221995991831,
      "loss": 2.9524,
      "step": 95754
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8170617818832397,
      "learning_rate": 0.00037861825200392836,
      "loss": 3.0734,
      "step": 95755
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7203806638717651,
      "learning_rate": 0.00037861430439405704,
      "loss": 2.911,
      "step": 95756
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.039874315261841,
      "learning_rate": 0.00037861035676956987,
      "loss": 2.9416,
      "step": 95757
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7857187986373901,
      "learning_rate": 0.0003786064091304675,
      "loss": 3.0849,
      "step": 95758
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0559067726135254,
      "learning_rate": 0.0003786024614767507,
      "loss": 2.9974,
      "step": 95759
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5613131523132324,
      "learning_rate": 0.0003785985138084203,
      "loss": 3.2202,
      "step": 95760
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2267959117889404,
      "learning_rate": 0.0003785945661254769,
      "loss": 2.8621,
      "step": 95761
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.08368182182312,
      "learning_rate": 0.0003785906184279212,
      "loss": 2.9672,
      "step": 95762
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9727954864501953,
      "learning_rate": 0.00037858667071575424,
      "loss": 3.3453,
      "step": 95763
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.88007652759552,
      "learning_rate": 0.0003785827229889764,
      "loss": 3.0761,
      "step": 95764
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7582402229309082,
      "learning_rate": 0.0003785787752475885,
      "loss": 2.9584,
      "step": 95765
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9543381929397583,
      "learning_rate": 0.00037857482749159143,
      "loss": 3.0145,
      "step": 95766
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9270590543746948,
      "learning_rate": 0.0003785708797209858,
      "loss": 2.9256,
      "step": 95767
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.102757692337036,
      "learning_rate": 0.00037856693193577225,
      "loss": 2.8391,
      "step": 95768
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9523119926452637,
      "learning_rate": 0.0003785629841359518,
      "loss": 2.7719,
      "step": 95769
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.153010606765747,
      "learning_rate": 0.0003785590363215248,
      "loss": 2.9705,
      "step": 95770
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7918654680252075,
      "learning_rate": 0.00037855508849249234,
      "loss": 3.0424,
      "step": 95771
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8729169368743896,
      "learning_rate": 0.000378551140648855,
      "loss": 3.0903,
      "step": 95772
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9318442344665527,
      "learning_rate": 0.0003785471927906134,
      "loss": 2.9878,
      "step": 95773
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1716434955596924,
      "learning_rate": 0.00037854324491776853,
      "loss": 2.8987,
      "step": 95774
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5703041553497314,
      "learning_rate": 0.00037853929703032097,
      "loss": 2.9788,
      "step": 95775
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.5370850563049316,
      "learning_rate": 0.0003785353491282714,
      "loss": 3.0551,
      "step": 95776
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8886572122573853,
      "learning_rate": 0.00037853140121162066,
      "loss": 3.0095,
      "step": 95777
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.4521210193634033,
      "learning_rate": 0.00037852745328036954,
      "loss": 3.044,
      "step": 95778
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.761391043663025,
      "learning_rate": 0.00037852350533451855,
      "loss": 3.0766,
      "step": 95779
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.72953200340271,
      "learning_rate": 0.0003785195573740686,
      "loss": 3.2147,
      "step": 95780
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.4255074262619019,
      "learning_rate": 0.0003785156093990204,
      "loss": 2.9961,
      "step": 95781
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.727905511856079,
      "learning_rate": 0.00037851166140937464,
      "loss": 2.954,
      "step": 95782
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8052200078964233,
      "learning_rate": 0.00037850771340513205,
      "loss": 3.1417,
      "step": 95783
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8069459199905396,
      "learning_rate": 0.00037850376538629355,
      "loss": 3.0437,
      "step": 95784
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.930117130279541,
      "learning_rate": 0.00037849981735285957,
      "loss": 2.8541,
      "step": 95785
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9344695806503296,
      "learning_rate": 0.000378495869304831,
      "loss": 2.696,
      "step": 95786
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.4482409954071045,
      "learning_rate": 0.00037849192124220855,
      "loss": 3.3533,
      "step": 95787
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6759510040283203,
      "learning_rate": 0.00037848797316499314,
      "loss": 3.1214,
      "step": 95788
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7642210721969604,
      "learning_rate": 0.0003784840250731851,
      "loss": 3.1407,
      "step": 95789
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0987143516540527,
      "learning_rate": 0.0003784800769667856,
      "loss": 2.9349,
      "step": 95790
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6012402772903442,
      "learning_rate": 0.0003784761288457951,
      "loss": 3.3427,
      "step": 95791
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.558663010597229,
      "learning_rate": 0.00037847218071021435,
      "loss": 3.1456,
      "step": 95792
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.963307499885559,
      "learning_rate": 0.0003784682325600442,
      "loss": 2.9644,
      "step": 95793
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7957175970077515,
      "learning_rate": 0.00037846428439528533,
      "loss": 2.7887,
      "step": 95794
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7504020929336548,
      "learning_rate": 0.0003784603362159384,
      "loss": 2.9125,
      "step": 95795
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9214065074920654,
      "learning_rate": 0.0003784563880220042,
      "loss": 2.8927,
      "step": 95796
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7374911308288574,
      "learning_rate": 0.00037845243981348363,
      "loss": 3.0309,
      "step": 95797
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7799830436706543,
      "learning_rate": 0.0003784484915903771,
      "loss": 3.1664,
      "step": 95798
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.771127700805664,
      "learning_rate": 0.00037844454335268557,
      "loss": 2.9836,
      "step": 95799
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.126438856124878,
      "learning_rate": 0.0003784405951004097,
      "loss": 2.9915,
      "step": 95800
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6501330137252808,
      "learning_rate": 0.0003784366468335503,
      "loss": 3.1698,
      "step": 95801
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6345514059066772,
      "learning_rate": 0.0003784326985521081,
      "loss": 3.1513,
      "step": 95802
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9356364011764526,
      "learning_rate": 0.0003784287502560837,
      "loss": 3.1795,
      "step": 95803
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9183144569396973,
      "learning_rate": 0.0003784248019454779,
      "loss": 2.8951,
      "step": 95804
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.758410096168518,
      "learning_rate": 0.0003784208536202915,
      "loss": 2.9823,
      "step": 95805
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6960878372192383,
      "learning_rate": 0.0003784169052805251,
      "loss": 3.1506,
      "step": 95806
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0263309478759766,
      "learning_rate": 0.0003784129569261796,
      "loss": 3.0896,
      "step": 95807
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1659774780273438,
      "learning_rate": 0.0003784090085572556,
      "loss": 3.0752,
      "step": 95808
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9842050075531006,
      "learning_rate": 0.00037840506017375395,
      "loss": 2.9815,
      "step": 95809
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.677156686782837,
      "learning_rate": 0.00037840111177567525,
      "loss": 2.8687,
      "step": 95810
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.4330549240112305,
      "learning_rate": 0.00037839716336302026,
      "loss": 2.8604,
      "step": 95811
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9783728122711182,
      "learning_rate": 0.0003783932149357899,
      "loss": 2.8495,
      "step": 95812
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.786922574043274,
      "learning_rate": 0.0003783892664939847,
      "loss": 2.9276,
      "step": 95813
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.1727445125579834,
      "learning_rate": 0.00037838531803760533,
      "loss": 2.8944,
      "step": 95814
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.285508155822754,
      "learning_rate": 0.0003783813695666529,
      "loss": 2.9053,
      "step": 95815
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.766647219657898,
      "learning_rate": 0.00037837742108112765,
      "loss": 3.2416,
      "step": 95816
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7925219535827637,
      "learning_rate": 0.00037837347258103067,
      "loss": 2.9784,
      "step": 95817
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6695600748062134,
      "learning_rate": 0.0003783695240663626,
      "loss": 2.9636,
      "step": 95818
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.6978373527526855,
      "learning_rate": 0.0003783655755371241,
      "loss": 3.0715,
      "step": 95819
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.9756271839141846,
      "learning_rate": 0.000378361626993316,
      "loss": 2.9341,
      "step": 95820
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.7374227046966553,
      "learning_rate": 0.0003783576784349391,
      "loss": 3.0919,
      "step": 95821
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9934806823730469,
      "learning_rate": 0.00037835372986199387,
      "loss": 2.828,
      "step": 95822
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5770070552825928,
      "learning_rate": 0.00037834978127448125,
      "loss": 3.0359,
      "step": 95823
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7226088047027588,
      "learning_rate": 0.00037834583267240193,
      "loss": 3.0082,
      "step": 95824
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.4322094917297363,
      "learning_rate": 0.0003783418840557566,
      "loss": 2.8519,
      "step": 95825
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6486364603042603,
      "learning_rate": 0.00037833793542454606,
      "loss": 3.0172,
      "step": 95826
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.4222207069396973,
      "learning_rate": 0.00037833398677877107,
      "loss": 2.6579,
      "step": 95827
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5046381950378418,
      "learning_rate": 0.00037833003811843226,
      "loss": 3.136,
      "step": 95828
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6601567268371582,
      "learning_rate": 0.00037832608944353045,
      "loss": 2.9655,
      "step": 95829
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6046096086502075,
      "learning_rate": 0.00037832214075406633,
      "loss": 3.308,
      "step": 95830
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.4442663192749023,
      "learning_rate": 0.0003783181920500407,
      "loss": 3.2251,
      "step": 95831
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.447951078414917,
      "learning_rate": 0.0003783142433314541,
      "loss": 3.0196,
      "step": 95832
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.912959337234497,
      "learning_rate": 0.0003783102945983075,
      "loss": 3.1721,
      "step": 95833
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.01239275932312,
      "learning_rate": 0.00037830634585060154,
      "loss": 3.1135,
      "step": 95834
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.088036060333252,
      "learning_rate": 0.0003783023970883369,
      "loss": 2.8391,
      "step": 95835
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6616251468658447,
      "learning_rate": 0.00037829844831151443,
      "loss": 3.163,
      "step": 95836
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6821662187576294,
      "learning_rate": 0.0003782944995201348,
      "loss": 2.9973,
      "step": 95837
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.3284637928009033,
      "learning_rate": 0.0003782905507141986,
      "loss": 2.9339,
      "step": 95838
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.582453966140747,
      "learning_rate": 0.00037828660189370694,
      "loss": 2.8888,
      "step": 95839
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9031586647033691,
      "learning_rate": 0.0003782826530586602,
      "loss": 2.9738,
      "step": 95840
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8377742767333984,
      "learning_rate": 0.0003782787042090592,
      "loss": 3.1515,
      "step": 95841
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.574124813079834,
      "learning_rate": 0.00037827475534490474,
      "loss": 2.8826,
      "step": 95842
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7616454362869263,
      "learning_rate": 0.0003782708064661976,
      "loss": 3.1156,
      "step": 95843
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9471811056137085,
      "learning_rate": 0.0003782668575729383,
      "loss": 2.7123,
      "step": 95844
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1392099857330322,
      "learning_rate": 0.0003782629086651278,
      "loss": 2.9655,
      "step": 95845
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6260879039764404,
      "learning_rate": 0.00037825895974276673,
      "loss": 3.0929,
      "step": 95846
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.82479727268219,
      "learning_rate": 0.00037825501080585586,
      "loss": 2.9586,
      "step": 95847
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.367023468017578,
      "learning_rate": 0.000378251061854396,
      "loss": 3.2689,
      "step": 95848
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.5952658653259277,
      "learning_rate": 0.0003782471128883876,
      "loss": 3.1652,
      "step": 95849
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.741100549697876,
      "learning_rate": 0.00037824316390783165,
      "loss": 3.1594,
      "step": 95850
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.9932708740234375,
      "learning_rate": 0.0003782392149127289,
      "loss": 2.8514,
      "step": 95851
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9312386512756348,
      "learning_rate": 0.0003782352659030799,
      "loss": 2.8901,
      "step": 95852
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5085018873214722,
      "learning_rate": 0.0003782313168788855,
      "loss": 2.9834,
      "step": 95853
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.9402172565460205,
      "learning_rate": 0.00037822736784014644,
      "loss": 3.1601,
      "step": 95854
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.919779896736145,
      "learning_rate": 0.0003782234187868635,
      "loss": 3.1086,
      "step": 95855
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.485532283782959,
      "learning_rate": 0.00037821946971903727,
      "loss": 2.8699,
      "step": 95856
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.924951195716858,
      "learning_rate": 0.00037821552063666865,
      "loss": 2.9493,
      "step": 95857
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.5125210285186768,
      "learning_rate": 0.00037821157153975827,
      "loss": 2.7908,
      "step": 95858
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5923725366592407,
      "learning_rate": 0.00037820762242830685,
      "loss": 2.9576,
      "step": 95859
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.4933125972747803,
      "learning_rate": 0.0003782036733023151,
      "loss": 2.9411,
      "step": 95860
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.52805233001709,
      "learning_rate": 0.0003781997241617839,
      "loss": 3.1112,
      "step": 95861
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8033807277679443,
      "learning_rate": 0.00037819577500671387,
      "loss": 2.8868,
      "step": 95862
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.248006582260132,
      "learning_rate": 0.0003781918258371057,
      "loss": 3.0245,
      "step": 95863
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.5725479125976562,
      "learning_rate": 0.0003781878766529603,
      "loss": 2.8198,
      "step": 95864
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1593542098999023,
      "learning_rate": 0.00037818392745427823,
      "loss": 2.9666,
      "step": 95865
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.815005898475647,
      "learning_rate": 0.0003781799782410603,
      "loss": 3.1283,
      "step": 95866
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.871487617492676,
      "learning_rate": 0.0003781760290133072,
      "loss": 2.9298,
      "step": 95867
    },
    {
      "epoch": 1.25,
      "grad_norm": 4.0620198249816895,
      "learning_rate": 0.00037817207977101973,
      "loss": 3.2003,
      "step": 95868
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.655792236328125,
      "learning_rate": 0.00037816813051419864,
      "loss": 2.8454,
      "step": 95869
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.929276704788208,
      "learning_rate": 0.00037816418124284463,
      "loss": 2.8875,
      "step": 95870
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2415285110473633,
      "learning_rate": 0.0003781602319569584,
      "loss": 3.313,
      "step": 95871
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.1752724647521973,
      "learning_rate": 0.00037815628265654063,
      "loss": 3.007,
      "step": 95872
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9394385814666748,
      "learning_rate": 0.0003781523333415922,
      "loss": 3.1349,
      "step": 95873
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7305299043655396,
      "learning_rate": 0.0003781483840121138,
      "loss": 2.9797,
      "step": 95874
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.82249116897583,
      "learning_rate": 0.0003781444346681061,
      "loss": 2.9446,
      "step": 95875
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.7682392597198486,
      "learning_rate": 0.0003781404853095699,
      "loss": 3.0125,
      "step": 95876
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.506697177886963,
      "learning_rate": 0.0003781365359365059,
      "loss": 3.1358,
      "step": 95877
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.66145920753479,
      "learning_rate": 0.0003781325865489148,
      "loss": 2.9225,
      "step": 95878
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.317784070968628,
      "learning_rate": 0.0003781286371467975,
      "loss": 3.0664,
      "step": 95879
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.0461339950561523,
      "learning_rate": 0.0003781246877301545,
      "loss": 3.1381,
      "step": 95880
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.5571162700653076,
      "learning_rate": 0.00037812073829898664,
      "loss": 2.788,
      "step": 95881
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8742644786834717,
      "learning_rate": 0.0003781167888532947,
      "loss": 2.9763,
      "step": 95882
    },
    {
      "epoch": 1.25,
      "grad_norm": 4.424690246582031,
      "learning_rate": 0.00037811283939307935,
      "loss": 2.8205,
      "step": 95883
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.355956554412842,
      "learning_rate": 0.0003781088899183414,
      "loss": 3.0168,
      "step": 95884
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7986749410629272,
      "learning_rate": 0.00037810494042908143,
      "loss": 3.0718,
      "step": 95885
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8468166589736938,
      "learning_rate": 0.0003781009909253004,
      "loss": 2.9553,
      "step": 95886
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1176586151123047,
      "learning_rate": 0.00037809704140699885,
      "loss": 2.8213,
      "step": 95887
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.687481164932251,
      "learning_rate": 0.00037809309187417756,
      "loss": 2.9221,
      "step": 95888
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.522481918334961,
      "learning_rate": 0.00037808914232683735,
      "loss": 2.8487,
      "step": 95889
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.2845358848571777,
      "learning_rate": 0.0003780851927649789,
      "loss": 3.2382,
      "step": 95890
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.38667368888855,
      "learning_rate": 0.0003780812431886029,
      "loss": 3.1571,
      "step": 95891
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.06084942817688,
      "learning_rate": 0.0003780772935977101,
      "loss": 3.1238,
      "step": 95892
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7868242263793945,
      "learning_rate": 0.0003780733439923013,
      "loss": 2.9025,
      "step": 95893
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6759307384490967,
      "learning_rate": 0.0003780693943723772,
      "loss": 3.0954,
      "step": 95894
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.029181957244873,
      "learning_rate": 0.00037806544473793845,
      "loss": 3.0703,
      "step": 95895
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6268212795257568,
      "learning_rate": 0.000378061495088986,
      "loss": 2.8027,
      "step": 95896
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7655686140060425,
      "learning_rate": 0.00037805754542552034,
      "loss": 3.2065,
      "step": 95897
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1499974727630615,
      "learning_rate": 0.00037805359574754233,
      "loss": 3.068,
      "step": 95898
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1519930362701416,
      "learning_rate": 0.0003780496460550527,
      "loss": 3.1029,
      "step": 95899
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8531864881515503,
      "learning_rate": 0.00037804569634805217,
      "loss": 3.1117,
      "step": 95900
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5330263376235962,
      "learning_rate": 0.00037804174662654143,
      "loss": 2.9573,
      "step": 95901
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.421581268310547,
      "learning_rate": 0.00037803779689052125,
      "loss": 3.0253,
      "step": 95902
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1426751613616943,
      "learning_rate": 0.0003780338471399925,
      "loss": 3.0738,
      "step": 95903
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5986288785934448,
      "learning_rate": 0.00037802989737495564,
      "loss": 3.0359,
      "step": 95904
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.4891935586929321,
      "learning_rate": 0.0003780259475954115,
      "loss": 3.1365,
      "step": 95905
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7465287446975708,
      "learning_rate": 0.0003780219978013611,
      "loss": 3.1557,
      "step": 95906
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.97401762008667,
      "learning_rate": 0.00037801804799280476,
      "loss": 3.1891,
      "step": 95907
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9720954895019531,
      "learning_rate": 0.0003780140981697434,
      "loss": 2.9038,
      "step": 95908
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5483133792877197,
      "learning_rate": 0.00037801014833217783,
      "loss": 3.1619,
      "step": 95909
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9241808652877808,
      "learning_rate": 0.0003780061984801086,
      "loss": 3.1285,
      "step": 95910
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6069483757019043,
      "learning_rate": 0.0003780022486135366,
      "loss": 3.1714,
      "step": 95911
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8698958158493042,
      "learning_rate": 0.00037799829873246254,
      "loss": 3.1512,
      "step": 95912
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.921254277229309,
      "learning_rate": 0.0003779943488368871,
      "loss": 3.1043,
      "step": 95913
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5476797819137573,
      "learning_rate": 0.000377990398926811,
      "loss": 2.8627,
      "step": 95914
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8994947671890259,
      "learning_rate": 0.00037798644900223505,
      "loss": 2.8257,
      "step": 95915
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9869170188903809,
      "learning_rate": 0.0003779824990631599,
      "loss": 3.0047,
      "step": 95916
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.616506576538086,
      "learning_rate": 0.00037797854910958637,
      "loss": 3.0696,
      "step": 95917
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5423084497451782,
      "learning_rate": 0.0003779745991415152,
      "loss": 2.9787,
      "step": 95918
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.946824550628662,
      "learning_rate": 0.000377970649158947,
      "loss": 3.0043,
      "step": 95919
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.097888231277466,
      "learning_rate": 0.0003779666991618826,
      "loss": 3.0199,
      "step": 95920
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.720110535621643,
      "learning_rate": 0.00037796274915032275,
      "loss": 3.0282,
      "step": 95921
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1505324840545654,
      "learning_rate": 0.0003779587991242682,
      "loss": 2.8905,
      "step": 95922
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0585596561431885,
      "learning_rate": 0.0003779548490837195,
      "loss": 3.1025,
      "step": 95923
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7529106140136719,
      "learning_rate": 0.0003779508990286776,
      "loss": 3.1698,
      "step": 95924
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.101611614227295,
      "learning_rate": 0.0003779469489591432,
      "loss": 2.7632,
      "step": 95925
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9587833881378174,
      "learning_rate": 0.00037794299887511693,
      "loss": 3.0542,
      "step": 95926
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8570131063461304,
      "learning_rate": 0.0003779390487765996,
      "loss": 3.0604,
      "step": 95927
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8406822681427002,
      "learning_rate": 0.000377935098663592,
      "loss": 3.1077,
      "step": 95928
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8629590272903442,
      "learning_rate": 0.0003779311485360947,
      "loss": 3.0431,
      "step": 95929
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5889791250228882,
      "learning_rate": 0.0003779271983941085,
      "loss": 2.9015,
      "step": 95930
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.942557454109192,
      "learning_rate": 0.00037792324823763423,
      "loss": 3.2468,
      "step": 95931
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8539360761642456,
      "learning_rate": 0.0003779192980666726,
      "loss": 3.1128,
      "step": 95932
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8547524213790894,
      "learning_rate": 0.00037791534788122425,
      "loss": 2.9217,
      "step": 95933
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.15380859375,
      "learning_rate": 0.00037791139768128995,
      "loss": 2.8057,
      "step": 95934
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8996905088424683,
      "learning_rate": 0.00037790744746687044,
      "loss": 3.0541,
      "step": 95935
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.355315923690796,
      "learning_rate": 0.0003779034972379665,
      "loss": 3.143,
      "step": 95936
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.752852439880371,
      "learning_rate": 0.00037789954699457887,
      "loss": 2.9812,
      "step": 95937
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9675557613372803,
      "learning_rate": 0.0003778955967367081,
      "loss": 2.9991,
      "step": 95938
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0549440383911133,
      "learning_rate": 0.0003778916464643552,
      "loss": 3.1419,
      "step": 95939
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9205048084259033,
      "learning_rate": 0.0003778876961775208,
      "loss": 2.9898,
      "step": 95940
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.77329421043396,
      "learning_rate": 0.00037788374587620544,
      "loss": 2.9806,
      "step": 95941
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7422857284545898,
      "learning_rate": 0.0003778797955604101,
      "loss": 3.098,
      "step": 95942
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0256221294403076,
      "learning_rate": 0.0003778758452301355,
      "loss": 3.0967,
      "step": 95943
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8270366191864014,
      "learning_rate": 0.0003778718948853823,
      "loss": 2.8819,
      "step": 95944
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7019810676574707,
      "learning_rate": 0.0003778679445261512,
      "loss": 2.7367,
      "step": 95945
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.497106909751892,
      "learning_rate": 0.00037786399415244297,
      "loss": 2.8948,
      "step": 95946
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9941729307174683,
      "learning_rate": 0.0003778600437642584,
      "loss": 3.2577,
      "step": 95947
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.911644697189331,
      "learning_rate": 0.00037785609336159817,
      "loss": 3.109,
      "step": 95948
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.653201937675476,
      "learning_rate": 0.000377852142944463,
      "loss": 3.1852,
      "step": 95949
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7694731950759888,
      "learning_rate": 0.0003778481925128536,
      "loss": 3.0372,
      "step": 95950
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.630857229232788,
      "learning_rate": 0.0003778442420667708,
      "loss": 2.8724,
      "step": 95951
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6750315427780151,
      "learning_rate": 0.00037784029160621536,
      "loss": 2.911,
      "step": 95952
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7484904527664185,
      "learning_rate": 0.00037783634113118785,
      "loss": 2.8576,
      "step": 95953
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8397140502929688,
      "learning_rate": 0.00037783239064168906,
      "loss": 2.9092,
      "step": 95954
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8513933420181274,
      "learning_rate": 0.00037782844013771986,
      "loss": 2.8791,
      "step": 95955
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.701158881187439,
      "learning_rate": 0.00037782448961928085,
      "loss": 3.0758,
      "step": 95956
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.310962200164795,
      "learning_rate": 0.0003778205390863728,
      "loss": 3.1058,
      "step": 95957
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1805994510650635,
      "learning_rate": 0.00037781658853899646,
      "loss": 3.1788,
      "step": 95958
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.075707197189331,
      "learning_rate": 0.00037781263797715244,
      "loss": 2.9861,
      "step": 95959
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.48476243019104,
      "learning_rate": 0.0003778086874008417,
      "loss": 2.8308,
      "step": 95960
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0807278156280518,
      "learning_rate": 0.00037780473681006487,
      "loss": 3.0994,
      "step": 95961
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.783902645111084,
      "learning_rate": 0.00037780078620482257,
      "loss": 3.0277,
      "step": 95962
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7470524311065674,
      "learning_rate": 0.00037779683558511565,
      "loss": 3.2189,
      "step": 95963
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.762966513633728,
      "learning_rate": 0.00037779288495094494,
      "loss": 3.1296,
      "step": 95964
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.4621833562850952,
      "learning_rate": 0.0003777889343023109,
      "loss": 2.9852,
      "step": 95965
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.620041847229004,
      "learning_rate": 0.0003777849836392145,
      "loss": 3.3035,
      "step": 95966
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9544601440429688,
      "learning_rate": 0.00037778103296165646,
      "loss": 2.8855,
      "step": 95967
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7717128992080688,
      "learning_rate": 0.00037777708226963743,
      "loss": 3.0362,
      "step": 95968
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.062601089477539,
      "learning_rate": 0.00037777313156315813,
      "loss": 3.1251,
      "step": 95969
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.611701488494873,
      "learning_rate": 0.00037776918084221937,
      "loss": 3.0195,
      "step": 95970
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8199981451034546,
      "learning_rate": 0.00037776523010682185,
      "loss": 3.2032,
      "step": 95971
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6060255765914917,
      "learning_rate": 0.0003777612793569663,
      "loss": 3.131,
      "step": 95972
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.5250368118286133,
      "learning_rate": 0.0003777573285926534,
      "loss": 2.8403,
      "step": 95973
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.203660249710083,
      "learning_rate": 0.00037775337781388406,
      "loss": 2.8929,
      "step": 95974
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2540693283081055,
      "learning_rate": 0.00037774942702065883,
      "loss": 2.9977,
      "step": 95975
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.7257301807403564,
      "learning_rate": 0.00037774547621297847,
      "loss": 3.0149,
      "step": 95976
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2109498977661133,
      "learning_rate": 0.00037774152539084385,
      "loss": 2.8844,
      "step": 95977
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5882487297058105,
      "learning_rate": 0.00037773757455425563,
      "loss": 2.7651,
      "step": 95978
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.4879804849624634,
      "learning_rate": 0.00037773362370321445,
      "loss": 2.8939,
      "step": 95979
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.182896137237549,
      "learning_rate": 0.00037772967283772113,
      "loss": 2.9936,
      "step": 95980
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0080535411834717,
      "learning_rate": 0.00037772572195777647,
      "loss": 2.8796,
      "step": 95981
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2640273571014404,
      "learning_rate": 0.00037772177106338103,
      "loss": 2.8726,
      "step": 95982
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8365596532821655,
      "learning_rate": 0.00037771782015453566,
      "loss": 3.1181,
      "step": 95983
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6120045185089111,
      "learning_rate": 0.00037771386923124114,
      "loss": 2.7874,
      "step": 95984
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.810707449913025,
      "learning_rate": 0.00037770991829349815,
      "loss": 3.0639,
      "step": 95985
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6455833911895752,
      "learning_rate": 0.00037770596734130736,
      "loss": 2.9778,
      "step": 95986
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0784385204315186,
      "learning_rate": 0.0003777020163746696,
      "loss": 2.9243,
      "step": 95987
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6091156005859375,
      "learning_rate": 0.00037769806539358555,
      "loss": 2.9784,
      "step": 95988
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.903714895248413,
      "learning_rate": 0.000377694114398056,
      "loss": 2.7912,
      "step": 95989
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5468307733535767,
      "learning_rate": 0.0003776901633880816,
      "loss": 3.1564,
      "step": 95990
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6321735382080078,
      "learning_rate": 0.0003776862123636631,
      "loss": 3.004,
      "step": 95991
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.878096103668213,
      "learning_rate": 0.0003776822613248013,
      "loss": 3.2163,
      "step": 95992
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9769020080566406,
      "learning_rate": 0.00037767831027149697,
      "loss": 2.9372,
      "step": 95993
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8670097589492798,
      "learning_rate": 0.0003776743592037507,
      "loss": 2.8703,
      "step": 95994
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8541744947433472,
      "learning_rate": 0.0003776704081215633,
      "loss": 3.2361,
      "step": 95995
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.604562997817993,
      "learning_rate": 0.0003776664570249356,
      "loss": 3.1456,
      "step": 95996
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.350778341293335,
      "learning_rate": 0.0003776625059138681,
      "loss": 2.8709,
      "step": 95997
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6708717346191406,
      "learning_rate": 0.00037765855478836185,
      "loss": 3.1265,
      "step": 95998
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5804705619812012,
      "learning_rate": 0.0003776546036484173,
      "loss": 3.0297,
      "step": 95999
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1231014728546143,
      "learning_rate": 0.0003776506524940352,
      "loss": 3.0965,
      "step": 96000
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0475382804870605,
      "learning_rate": 0.0003776467013252165,
      "loss": 2.9914,
      "step": 96001
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9097965955734253,
      "learning_rate": 0.0003776427501419618,
      "loss": 3.0014,
      "step": 96002
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.353721857070923,
      "learning_rate": 0.0003776387989442718,
      "loss": 3.0816,
      "step": 96003
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.890100359916687,
      "learning_rate": 0.0003776348477321474,
      "loss": 3.0712,
      "step": 96004
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2865986824035645,
      "learning_rate": 0.0003776308965055891,
      "loss": 3.028,
      "step": 96005
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0704565048217773,
      "learning_rate": 0.00037762694526459776,
      "loss": 2.8505,
      "step": 96006
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5795918703079224,
      "learning_rate": 0.0003776229940091742,
      "loss": 2.9954,
      "step": 96007
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7158751487731934,
      "learning_rate": 0.00037761904273931896,
      "loss": 3.0518,
      "step": 96008
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7236706018447876,
      "learning_rate": 0.0003776150914550329,
      "loss": 2.9573,
      "step": 96009
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1237294673919678,
      "learning_rate": 0.0003776111401563167,
      "loss": 3.0181,
      "step": 96010
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1442885398864746,
      "learning_rate": 0.00037760718884317115,
      "loss": 2.9992,
      "step": 96011
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5301930904388428,
      "learning_rate": 0.00037760323751559694,
      "loss": 2.7552,
      "step": 96012
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6314457654953003,
      "learning_rate": 0.0003775992861735949,
      "loss": 3.0871,
      "step": 96013
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0130019187927246,
      "learning_rate": 0.00037759533481716563,
      "loss": 3.2902,
      "step": 96014
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0909862518310547,
      "learning_rate": 0.00037759138344630995,
      "loss": 3.0267,
      "step": 96015
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.3961715698242188,
      "learning_rate": 0.00037758743206102853,
      "loss": 2.8388,
      "step": 96016
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5044631958007812,
      "learning_rate": 0.0003775834806613222,
      "loss": 3.055,
      "step": 96017
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9203282594680786,
      "learning_rate": 0.00037757952924719155,
      "loss": 3.043,
      "step": 96018
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8967081308364868,
      "learning_rate": 0.0003775755778186375,
      "loss": 2.9455,
      "step": 96019
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.455545425415039,
      "learning_rate": 0.00037757162637566064,
      "loss": 3.0182,
      "step": 96020
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.726161241531372,
      "learning_rate": 0.00037756767491826167,
      "loss": 2.9135,
      "step": 96021
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0101046562194824,
      "learning_rate": 0.0003775637234464415,
      "loss": 3.0629,
      "step": 96022
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1493730545043945,
      "learning_rate": 0.00037755977196020075,
      "loss": 2.8585,
      "step": 96023
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.4473867416381836,
      "learning_rate": 0.00037755582045954016,
      "loss": 2.877,
      "step": 96024
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.913002610206604,
      "learning_rate": 0.00037755186894446055,
      "loss": 3.0306,
      "step": 96025
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.4888787269592285,
      "learning_rate": 0.0003775479174149625,
      "loss": 2.9933,
      "step": 96026
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8218631744384766,
      "learning_rate": 0.00037754396587104687,
      "loss": 2.8176,
      "step": 96027
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6703758239746094,
      "learning_rate": 0.0003775400143127143,
      "loss": 3.2463,
      "step": 96028
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6451343297958374,
      "learning_rate": 0.0003775360627399656,
      "loss": 3.2158,
      "step": 96029
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8484463691711426,
      "learning_rate": 0.0003775321111528015,
      "loss": 3.0202,
      "step": 96030
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5697523355484009,
      "learning_rate": 0.0003775281595512228,
      "loss": 3.1663,
      "step": 96031
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0669400691986084,
      "learning_rate": 0.00037752420793523003,
      "loss": 2.7505,
      "step": 96032
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.476372241973877,
      "learning_rate": 0.00037752025630482405,
      "loss": 3.0313,
      "step": 96033
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6158344745635986,
      "learning_rate": 0.00037751630466000566,
      "loss": 3.0355,
      "step": 96034
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1469409465789795,
      "learning_rate": 0.00037751235300077546,
      "loss": 2.9173,
      "step": 96035
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.4146745204925537,
      "learning_rate": 0.00037750840132713426,
      "loss": 3.0506,
      "step": 96036
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.7308757305145264,
      "learning_rate": 0.0003775044496390829,
      "loss": 2.9135,
      "step": 96037
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8483506441116333,
      "learning_rate": 0.0003775004979366218,
      "loss": 3.0758,
      "step": 96038
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6912139654159546,
      "learning_rate": 0.00037749654621975205,
      "loss": 3.0158,
      "step": 96039
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8663973808288574,
      "learning_rate": 0.0003774925944884742,
      "loss": 2.9861,
      "step": 96040
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.711670994758606,
      "learning_rate": 0.00037748864274278894,
      "loss": 3.2467,
      "step": 96041
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.538815975189209,
      "learning_rate": 0.0003774846909826971,
      "loss": 3.0112,
      "step": 96042
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.6126232147216797,
      "learning_rate": 0.0003774807392081995,
      "loss": 3.1852,
      "step": 96043
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7374026775360107,
      "learning_rate": 0.0003774767874192966,
      "loss": 2.9433,
      "step": 96044
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.061645030975342,
      "learning_rate": 0.00037747283561598944,
      "loss": 2.8211,
      "step": 96045
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7752747535705566,
      "learning_rate": 0.0003774688837982786,
      "loss": 3.0159,
      "step": 96046
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.609342098236084,
      "learning_rate": 0.00037746493196616477,
      "loss": 2.8473,
      "step": 96047
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.584817409515381,
      "learning_rate": 0.0003774609801196487,
      "loss": 2.9864,
      "step": 96048
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.8199424743652344,
      "learning_rate": 0.0003774570282587314,
      "loss": 2.8812,
      "step": 96049
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9666907787322998,
      "learning_rate": 0.0003774530763834132,
      "loss": 2.8957,
      "step": 96050
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7240383625030518,
      "learning_rate": 0.000377449124493695,
      "loss": 2.9915,
      "step": 96051
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.837882399559021,
      "learning_rate": 0.0003774451725895776,
      "loss": 2.9634,
      "step": 96052
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.761867880821228,
      "learning_rate": 0.00037744122067106167,
      "loss": 3.1325,
      "step": 96053
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5831915140151978,
      "learning_rate": 0.00037743726873814794,
      "loss": 2.8648,
      "step": 96054
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.5710108280181885,
      "learning_rate": 0.00037743331679083715,
      "loss": 2.6926,
      "step": 96055
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.4376013278961182,
      "learning_rate": 0.00037742936482913005,
      "loss": 3.0153,
      "step": 96056
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6455144882202148,
      "learning_rate": 0.00037742541285302734,
      "loss": 2.8886,
      "step": 96057
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8367611169815063,
      "learning_rate": 0.00037742146086252985,
      "loss": 2.8948,
      "step": 96058
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6203562021255493,
      "learning_rate": 0.0003774175088576383,
      "loss": 2.8157,
      "step": 96059
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1283435821533203,
      "learning_rate": 0.00037741355683835326,
      "loss": 3.0206,
      "step": 96060
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1345174312591553,
      "learning_rate": 0.00037740960480467557,
      "loss": 3.0872,
      "step": 96061
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6496827602386475,
      "learning_rate": 0.00037740565275660613,
      "loss": 2.8939,
      "step": 96062
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9525432586669922,
      "learning_rate": 0.00037740170069414537,
      "loss": 2.7539,
      "step": 96063
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6080033779144287,
      "learning_rate": 0.00037739774861729415,
      "loss": 3.1734,
      "step": 96064
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6915788650512695,
      "learning_rate": 0.0003773937965260533,
      "loss": 3.0792,
      "step": 96065
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6377426385879517,
      "learning_rate": 0.00037738984442042356,
      "loss": 3.0373,
      "step": 96066
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7108360528945923,
      "learning_rate": 0.00037738589230040543,
      "loss": 3.0171,
      "step": 96067
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6580976247787476,
      "learning_rate": 0.0003773819401659999,
      "loss": 2.7917,
      "step": 96068
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8664352893829346,
      "learning_rate": 0.0003773779880172076,
      "loss": 3.158,
      "step": 96069
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6613779067993164,
      "learning_rate": 0.0003773740358540292,
      "loss": 2.939,
      "step": 96070
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2151191234588623,
      "learning_rate": 0.0003773700836764655,
      "loss": 3.0401,
      "step": 96071
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8484102487564087,
      "learning_rate": 0.00037736613148451736,
      "loss": 3.0932,
      "step": 96072
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8530350923538208,
      "learning_rate": 0.00037736217927818535,
      "loss": 2.8323,
      "step": 96073
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.1332273483276367,
      "learning_rate": 0.00037735822705747027,
      "loss": 2.9792,
      "step": 96074
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8339918851852417,
      "learning_rate": 0.00037735427482237275,
      "loss": 2.9254,
      "step": 96075
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8385674953460693,
      "learning_rate": 0.0003773503225728936,
      "loss": 3.0961,
      "step": 96076
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.546473264694214,
      "learning_rate": 0.00037734637030903374,
      "loss": 2.8971,
      "step": 96077
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.746279001235962,
      "learning_rate": 0.0003773424180307936,
      "loss": 2.9953,
      "step": 96078
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9324859380722046,
      "learning_rate": 0.000377338465738174,
      "loss": 2.868,
      "step": 96079
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7877309322357178,
      "learning_rate": 0.0003773345134311758,
      "loss": 3.1855,
      "step": 96080
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.8025755882263184,
      "learning_rate": 0.0003773305611097996,
      "loss": 3.273,
      "step": 96081
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.2987606525421143,
      "learning_rate": 0.0003773266087740462,
      "loss": 3.017,
      "step": 96082
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8117892742156982,
      "learning_rate": 0.0003773226564239164,
      "loss": 3.05,
      "step": 96083
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.656099557876587,
      "learning_rate": 0.0003773187040594108,
      "loss": 3.0327,
      "step": 96084
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.292785882949829,
      "learning_rate": 0.00037731475168053017,
      "loss": 3.3065,
      "step": 96085
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.031791925430298,
      "learning_rate": 0.00037731079928727537,
      "loss": 2.8491,
      "step": 96086
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8326694965362549,
      "learning_rate": 0.0003773068468796469,
      "loss": 2.8481,
      "step": 96087
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.4404075145721436,
      "learning_rate": 0.0003773028944576457,
      "loss": 2.6671,
      "step": 96088
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2314178943634033,
      "learning_rate": 0.00037729894202127243,
      "loss": 3.0803,
      "step": 96089
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7937829494476318,
      "learning_rate": 0.0003772949895705278,
      "loss": 3.0112,
      "step": 96090
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.4677376747131348,
      "learning_rate": 0.0003772910371054125,
      "loss": 2.9906,
      "step": 96091
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6198762655258179,
      "learning_rate": 0.0003772870846259275,
      "loss": 3.4585,
      "step": 96092
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.67186439037323,
      "learning_rate": 0.0003772831321320733,
      "loss": 2.8377,
      "step": 96093
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2930727005004883,
      "learning_rate": 0.0003772791796238507,
      "loss": 2.8533,
      "step": 96094
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1861214637756348,
      "learning_rate": 0.0003772752271012605,
      "loss": 2.818,
      "step": 96095
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.3393917083740234,
      "learning_rate": 0.0003772712745643033,
      "loss": 3.0619,
      "step": 96096
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.41544508934021,
      "learning_rate": 0.0003772673220129799,
      "loss": 2.687,
      "step": 96097
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0076353549957275,
      "learning_rate": 0.00037726336944729114,
      "loss": 2.8694,
      "step": 96098
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.06396484375,
      "learning_rate": 0.0003772594168672376,
      "loss": 2.8843,
      "step": 96099
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9266088008880615,
      "learning_rate": 0.00037725546427282,
      "loss": 2.9899,
      "step": 96100
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9608991146087646,
      "learning_rate": 0.0003772515116640393,
      "loss": 3.1725,
      "step": 96101
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7270320653915405,
      "learning_rate": 0.00037724755904089603,
      "loss": 3.0948,
      "step": 96102
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2477517127990723,
      "learning_rate": 0.0003772436064033909,
      "loss": 3.0416,
      "step": 96103
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1789181232452393,
      "learning_rate": 0.00037723965375152484,
      "loss": 2.8655,
      "step": 96104
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9063249826431274,
      "learning_rate": 0.00037723570108529845,
      "loss": 3.1275,
      "step": 96105
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9711357355117798,
      "learning_rate": 0.0003772317484047124,
      "loss": 2.9938,
      "step": 96106
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.690064549446106,
      "learning_rate": 0.00037722779570976755,
      "loss": 3.0818,
      "step": 96107
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.3711767196655273,
      "learning_rate": 0.0003772238430004646,
      "loss": 3.0306,
      "step": 96108
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9242736101150513,
      "learning_rate": 0.00037721989027680435,
      "loss": 3.0689,
      "step": 96109
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.111283302307129,
      "learning_rate": 0.0003772159375387874,
      "loss": 2.9421,
      "step": 96110
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1208527088165283,
      "learning_rate": 0.00037721198478641454,
      "loss": 3.006,
      "step": 96111
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6905068159103394,
      "learning_rate": 0.00037720803201968657,
      "loss": 2.9759,
      "step": 96112
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7132415771484375,
      "learning_rate": 0.0003772040792386041,
      "loss": 2.9189,
      "step": 96113
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.999535083770752,
      "learning_rate": 0.000377200126443168,
      "loss": 2.8602,
      "step": 96114
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.172006845474243,
      "learning_rate": 0.00037719617363337883,
      "loss": 3.0107,
      "step": 96115
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.7656009197235107,
      "learning_rate": 0.0003771922208092376,
      "loss": 3.0302,
      "step": 96116
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.344740629196167,
      "learning_rate": 0.0003771882679707447,
      "loss": 3.079,
      "step": 96117
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0136399269104004,
      "learning_rate": 0.00037718431511790115,
      "loss": 2.8542,
      "step": 96118
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.871093988418579,
      "learning_rate": 0.0003771803622507076,
      "loss": 3.3928,
      "step": 96119
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1346046924591064,
      "learning_rate": 0.00037717640936916466,
      "loss": 2.8045,
      "step": 96120
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0225961208343506,
      "learning_rate": 0.00037717245647327324,
      "loss": 2.9489,
      "step": 96121
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6690667867660522,
      "learning_rate": 0.000377168503563034,
      "loss": 3.0195,
      "step": 96122
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0166211128234863,
      "learning_rate": 0.00037716455063844774,
      "loss": 2.9842,
      "step": 96123
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0904157161712646,
      "learning_rate": 0.000377160597699515,
      "loss": 2.8143,
      "step": 96124
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2029662132263184,
      "learning_rate": 0.0003771566447462367,
      "loss": 3.0566,
      "step": 96125
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1207549571990967,
      "learning_rate": 0.0003771526917786136,
      "loss": 2.8326,
      "step": 96126
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.169293165206909,
      "learning_rate": 0.0003771487387966463,
      "loss": 2.6637,
      "step": 96127
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.4796392917633057,
      "learning_rate": 0.00037714478580033555,
      "loss": 2.9588,
      "step": 96128
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8996162414550781,
      "learning_rate": 0.0003771408327896822,
      "loss": 3.0599,
      "step": 96129
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9941668510437012,
      "learning_rate": 0.00037713687976468686,
      "loss": 2.8876,
      "step": 96130
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.158477783203125,
      "learning_rate": 0.00037713292672535034,
      "loss": 3.1522,
      "step": 96131
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.363507032394409,
      "learning_rate": 0.0003771289736716734,
      "loss": 2.8829,
      "step": 96132
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5940316915512085,
      "learning_rate": 0.00037712502060365666,
      "loss": 3.1796,
      "step": 96133
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.402256488800049,
      "learning_rate": 0.0003771210675213009,
      "loss": 3.0093,
      "step": 96134
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.6493418216705322,
      "learning_rate": 0.00037711711442460694,
      "loss": 3.0461,
      "step": 96135
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.7557454109191895,
      "learning_rate": 0.00037711316131357545,
      "loss": 2.9715,
      "step": 96136
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5809376239776611,
      "learning_rate": 0.0003771092081882071,
      "loss": 3.203,
      "step": 96137
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.900829792022705,
      "learning_rate": 0.0003771052550485028,
      "loss": 3.1058,
      "step": 96138
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.3752968311309814,
      "learning_rate": 0.0003771013018944631,
      "loss": 3.136,
      "step": 96139
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.7415738105773926,
      "learning_rate": 0.00037709734872608887,
      "loss": 2.84,
      "step": 96140
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.484288454055786,
      "learning_rate": 0.0003770933955433807,
      "loss": 3.1582,
      "step": 96141
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.233051300048828,
      "learning_rate": 0.0003770894423463395,
      "loss": 2.9163,
      "step": 96142
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.583894729614258,
      "learning_rate": 0.0003770854891349659,
      "loss": 3.1342,
      "step": 96143
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.6346731185913086,
      "learning_rate": 0.00037708153590926063,
      "loss": 3.0675,
      "step": 96144
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5324517488479614,
      "learning_rate": 0.00037707758266922447,
      "loss": 2.8642,
      "step": 96145
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.579130172729492,
      "learning_rate": 0.0003770736294148581,
      "loss": 3.2719,
      "step": 96146
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.5451362133026123,
      "learning_rate": 0.00037706967614616234,
      "loss": 3.0989,
      "step": 96147
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.3168678283691406,
      "learning_rate": 0.0003770657228631378,
      "loss": 3.0524,
      "step": 96148
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5336577892303467,
      "learning_rate": 0.0003770617695657853,
      "loss": 3.038,
      "step": 96149
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.3838818073272705,
      "learning_rate": 0.0003770578162541056,
      "loss": 2.9479,
      "step": 96150
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8416112661361694,
      "learning_rate": 0.00037705386292809933,
      "loss": 3.0426,
      "step": 96151
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.599962592124939,
      "learning_rate": 0.00037704990958776737,
      "loss": 2.9898,
      "step": 96152
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0787453651428223,
      "learning_rate": 0.00037704595623311033,
      "loss": 2.961,
      "step": 96153
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.12106990814209,
      "learning_rate": 0.0003770420028641291,
      "loss": 3.0057,
      "step": 96154
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9101698398590088,
      "learning_rate": 0.0003770380494808242,
      "loss": 3.0091,
      "step": 96155
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7110093832015991,
      "learning_rate": 0.00037703409608319646,
      "loss": 2.8091,
      "step": 96156
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6102180480957031,
      "learning_rate": 0.0003770301426712466,
      "loss": 3.218,
      "step": 96157
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.580028772354126,
      "learning_rate": 0.00037702618924497546,
      "loss": 3.1243,
      "step": 96158
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.989417314529419,
      "learning_rate": 0.0003770222358043837,
      "loss": 3.0344,
      "step": 96159
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6048296689987183,
      "learning_rate": 0.000377018282349472,
      "loss": 2.9578,
      "step": 96160
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0978894233703613,
      "learning_rate": 0.0003770143288802412,
      "loss": 2.738,
      "step": 96161
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.079749822616577,
      "learning_rate": 0.0003770103753966919,
      "loss": 3.0727,
      "step": 96162
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9819711446762085,
      "learning_rate": 0.000377006421898825,
      "loss": 3.2396,
      "step": 96163
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8636928796768188,
      "learning_rate": 0.0003770024683866412,
      "loss": 2.7484,
      "step": 96164
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.619692087173462,
      "learning_rate": 0.00037699851486014115,
      "loss": 2.9839,
      "step": 96165
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5805304050445557,
      "learning_rate": 0.0003769945613193255,
      "loss": 2.8655,
      "step": 96166
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.78486967086792,
      "learning_rate": 0.0003769906077641952,
      "loss": 3.0439,
      "step": 96167
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.1736793518066406,
      "learning_rate": 0.00037698665419475094,
      "loss": 3.2659,
      "step": 96168
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8627746105194092,
      "learning_rate": 0.00037698270061099336,
      "loss": 3.0386,
      "step": 96169
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.091676712036133,
      "learning_rate": 0.00037697874701292323,
      "loss": 3.1739,
      "step": 96170
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9670416116714478,
      "learning_rate": 0.00037697479340054136,
      "loss": 3.0491,
      "step": 96171
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9035749435424805,
      "learning_rate": 0.00037697083977384835,
      "loss": 2.7352,
      "step": 96172
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.405128002166748,
      "learning_rate": 0.000376966886132845,
      "loss": 3.0114,
      "step": 96173
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6519447565078735,
      "learning_rate": 0.00037696293247753215,
      "loss": 3.0429,
      "step": 96174
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.326061248779297,
      "learning_rate": 0.0003769589788079103,
      "loss": 2.9376,
      "step": 96175
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.6231822967529297,
      "learning_rate": 0.0003769550251239804,
      "loss": 3.1155,
      "step": 96176
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8524528741836548,
      "learning_rate": 0.00037695107142574313,
      "loss": 3.0557,
      "step": 96177
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.92537522315979,
      "learning_rate": 0.0003769471177131992,
      "loss": 3.0422,
      "step": 96178
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0611445903778076,
      "learning_rate": 0.00037694316398634927,
      "loss": 2.8615,
      "step": 96179
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7996702194213867,
      "learning_rate": 0.00037693921024519426,
      "loss": 2.9174,
      "step": 96180
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8552567958831787,
      "learning_rate": 0.0003769352564897348,
      "loss": 2.7465,
      "step": 96181
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5938928127288818,
      "learning_rate": 0.00037693130271997154,
      "loss": 2.9458,
      "step": 96182
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.3330025672912598,
      "learning_rate": 0.0003769273489359053,
      "loss": 2.8807,
      "step": 96183
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6599547863006592,
      "learning_rate": 0.0003769233951375369,
      "loss": 2.8473,
      "step": 96184
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7827397584915161,
      "learning_rate": 0.00037691944132486687,
      "loss": 2.9025,
      "step": 96185
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9632689952850342,
      "learning_rate": 0.00037691548749789613,
      "loss": 2.8976,
      "step": 96186
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8867230415344238,
      "learning_rate": 0.0003769115336566254,
      "loss": 2.7742,
      "step": 96187
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.551950216293335,
      "learning_rate": 0.00037690757980105526,
      "loss": 3.0357,
      "step": 96188
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8189724683761597,
      "learning_rate": 0.0003769036259311866,
      "loss": 3.0858,
      "step": 96189
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9411851167678833,
      "learning_rate": 0.0003768996720470201,
      "loss": 3.0275,
      "step": 96190
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.620134949684143,
      "learning_rate": 0.0003768957181485565,
      "loss": 2.9543,
      "step": 96191
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2023990154266357,
      "learning_rate": 0.00037689176423579647,
      "loss": 2.9097,
      "step": 96192
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.518919825553894,
      "learning_rate": 0.00037688781030874096,
      "loss": 3.1877,
      "step": 96193
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6896029710769653,
      "learning_rate": 0.00037688385636739047,
      "loss": 3.0323,
      "step": 96194
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7484502792358398,
      "learning_rate": 0.0003768799024117458,
      "loss": 3.1491,
      "step": 96195
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.7855703830718994,
      "learning_rate": 0.00037687594844180777,
      "loss": 2.9845,
      "step": 96196
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7262425422668457,
      "learning_rate": 0.00037687199445757697,
      "loss": 3.0599,
      "step": 96197
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.079669237136841,
      "learning_rate": 0.0003768680404590543,
      "loss": 2.8999,
      "step": 96198
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5129618644714355,
      "learning_rate": 0.00037686408644624033,
      "loss": 2.8777,
      "step": 96199
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6234983205795288,
      "learning_rate": 0.0003768601324191359,
      "loss": 2.8565,
      "step": 96200
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6439539194107056,
      "learning_rate": 0.00037685617837774173,
      "loss": 3.3035,
      "step": 96201
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6905403137207031,
      "learning_rate": 0.00037685222432205854,
      "loss": 2.9307,
      "step": 96202
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5733693838119507,
      "learning_rate": 0.00037684827025208703,
      "loss": 2.7549,
      "step": 96203
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.5502233505249023,
      "learning_rate": 0.00037684431616782807,
      "loss": 3.0016,
      "step": 96204
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.7119338512420654,
      "learning_rate": 0.0003768403620692823,
      "loss": 3.1678,
      "step": 96205
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9975755214691162,
      "learning_rate": 0.0003768364079564504,
      "loss": 3.0335,
      "step": 96206
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.525841474533081,
      "learning_rate": 0.00037683245382933307,
      "loss": 3.2489,
      "step": 96207
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.2929415702819824,
      "learning_rate": 0.00037682849968793134,
      "loss": 2.9047,
      "step": 96208
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1826367378234863,
      "learning_rate": 0.00037682454553224557,
      "loss": 3.1745,
      "step": 96209
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.156189203262329,
      "learning_rate": 0.0003768205913622768,
      "loss": 3.0499,
      "step": 96210
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1037447452545166,
      "learning_rate": 0.00037681663717802554,
      "loss": 3.0798,
      "step": 96211
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0933611392974854,
      "learning_rate": 0.0003768126829794927,
      "loss": 2.9084,
      "step": 96212
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7222247123718262,
      "learning_rate": 0.0003768087287666788,
      "loss": 3.1336,
      "step": 96213
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.083509922027588,
      "learning_rate": 0.0003768047745395848,
      "loss": 2.8764,
      "step": 96214
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.321092367172241,
      "learning_rate": 0.0003768008202982113,
      "loss": 3.1043,
      "step": 96215
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8172796964645386,
      "learning_rate": 0.00037679686604255916,
      "loss": 2.9609,
      "step": 96216
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7867923974990845,
      "learning_rate": 0.000376792911772629,
      "loss": 2.5934,
      "step": 96217
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.9683594703674316,
      "learning_rate": 0.00037678895748842153,
      "loss": 2.859,
      "step": 96218
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0499820709228516,
      "learning_rate": 0.0003767850031899375,
      "loss": 3.0577,
      "step": 96219
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5969741344451904,
      "learning_rate": 0.00037678104887717783,
      "loss": 3.0129,
      "step": 96220
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.1889519691467285,
      "learning_rate": 0.00037677709455014307,
      "loss": 2.9154,
      "step": 96221
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.8281219005584717,
      "learning_rate": 0.0003767731402088339,
      "loss": 2.7852,
      "step": 96222
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7648988962173462,
      "learning_rate": 0.0003767691858532513,
      "loss": 3.1097,
      "step": 96223
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7817903757095337,
      "learning_rate": 0.0003767652314833958,
      "loss": 2.9405,
      "step": 96224
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.370023488998413,
      "learning_rate": 0.0003767612770992681,
      "loss": 3.0909,
      "step": 96225
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.957152009010315,
      "learning_rate": 0.00037675732270086925,
      "loss": 2.7413,
      "step": 96226
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6209523677825928,
      "learning_rate": 0.00037675336828819954,
      "loss": 2.9566,
      "step": 96227
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7078258991241455,
      "learning_rate": 0.00037674941386126005,
      "loss": 2.9035,
      "step": 96228
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1940829753875732,
      "learning_rate": 0.0003767454594200514,
      "loss": 3.2897,
      "step": 96229
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.4351916313171387,
      "learning_rate": 0.00037674150496457427,
      "loss": 3.1401,
      "step": 96230
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6431236267089844,
      "learning_rate": 0.0003767375504948294,
      "loss": 3.0402,
      "step": 96231
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0401241779327393,
      "learning_rate": 0.0003767335960108177,
      "loss": 2.9629,
      "step": 96232
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9215340614318848,
      "learning_rate": 0.0003767296415125397,
      "loss": 3.1164,
      "step": 96233
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6975610256195068,
      "learning_rate": 0.0003767256869999962,
      "loss": 3.0555,
      "step": 96234
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.528499722480774,
      "learning_rate": 0.000376721732473188,
      "loss": 3.0754,
      "step": 96235
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1247620582580566,
      "learning_rate": 0.00037671777793211574,
      "loss": 2.8975,
      "step": 96236
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6669907569885254,
      "learning_rate": 0.00037671382337678015,
      "loss": 2.9235,
      "step": 96237
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9236738681793213,
      "learning_rate": 0.00037670986880718204,
      "loss": 2.6812,
      "step": 96238
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8569811582565308,
      "learning_rate": 0.0003767059142233222,
      "loss": 3.0476,
      "step": 96239
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7494688034057617,
      "learning_rate": 0.0003767019596252012,
      "loss": 3.2032,
      "step": 96240
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.45735239982605,
      "learning_rate": 0.00037669800501281993,
      "loss": 3.0448,
      "step": 96241
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.920807957649231,
      "learning_rate": 0.000376694050386179,
      "loss": 3.0511,
      "step": 96242
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.4841293096542358,
      "learning_rate": 0.0003766900957452792,
      "loss": 2.8473,
      "step": 96243
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6959878206253052,
      "learning_rate": 0.0003766861410901213,
      "loss": 2.9222,
      "step": 96244
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6975289583206177,
      "learning_rate": 0.00037668218642070584,
      "loss": 2.8766,
      "step": 96245
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.681607961654663,
      "learning_rate": 0.00037667823173703394,
      "loss": 2.712,
      "step": 96246
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6935468912124634,
      "learning_rate": 0.000376674277039106,
      "loss": 3.1379,
      "step": 96247
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.668940782546997,
      "learning_rate": 0.00037667032232692287,
      "loss": 2.9833,
      "step": 96248
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0218100547790527,
      "learning_rate": 0.0003766663676004852,
      "loss": 3.1044,
      "step": 96249
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0152170658111572,
      "learning_rate": 0.00037666241285979403,
      "loss": 3.0635,
      "step": 96250
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6347030401229858,
      "learning_rate": 0.00037665845810484967,
      "loss": 2.8858,
      "step": 96251
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1937315464019775,
      "learning_rate": 0.0003766545033356531,
      "loss": 3.3274,
      "step": 96252
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8849579095840454,
      "learning_rate": 0.00037665054855220504,
      "loss": 2.8565,
      "step": 96253
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0332889556884766,
      "learning_rate": 0.0003766465937545062,
      "loss": 2.9884,
      "step": 96254
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.6072535514831543,
      "learning_rate": 0.0003766426389425573,
      "loss": 2.9858,
      "step": 96255
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8986515998840332,
      "learning_rate": 0.000376638684116359,
      "loss": 2.8542,
      "step": 96256
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9849801063537598,
      "learning_rate": 0.00037663472927591227,
      "loss": 3.2106,
      "step": 96257
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.82241690158844,
      "learning_rate": 0.0003766307744212176,
      "loss": 2.8989,
      "step": 96258
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.035189628601074,
      "learning_rate": 0.0003766268195522759,
      "loss": 3.0519,
      "step": 96259
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.6880064010620117,
      "learning_rate": 0.0003766228646690878,
      "loss": 3.0226,
      "step": 96260
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.4665277004241943,
      "learning_rate": 0.000376618909771654,
      "loss": 3.2842,
      "step": 96261
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6482279300689697,
      "learning_rate": 0.0003766149548599753,
      "loss": 3.0436,
      "step": 96262
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.213893175125122,
      "learning_rate": 0.00037661099993405254,
      "loss": 3.1872,
      "step": 96263
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.164299726486206,
      "learning_rate": 0.00037660704499388627,
      "loss": 3.0223,
      "step": 96264
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.2138943672180176,
      "learning_rate": 0.00037660309003947727,
      "loss": 2.9472,
      "step": 96265
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.601940631866455,
      "learning_rate": 0.00037659913507082646,
      "loss": 2.8535,
      "step": 96266
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.214463710784912,
      "learning_rate": 0.0003765951800879343,
      "loss": 2.8901,
      "step": 96267
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1326417922973633,
      "learning_rate": 0.0003765912250908016,
      "loss": 3.0338,
      "step": 96268
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.5360679626464844,
      "learning_rate": 0.0003765872700794293,
      "loss": 3.0006,
      "step": 96269
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0233592987060547,
      "learning_rate": 0.00037658331505381794,
      "loss": 3.018,
      "step": 96270
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.976201057434082,
      "learning_rate": 0.00037657936001396817,
      "loss": 2.9846,
      "step": 96271
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.155569314956665,
      "learning_rate": 0.000376575404959881,
      "loss": 3.06,
      "step": 96272
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.645272970199585,
      "learning_rate": 0.0003765714498915569,
      "loss": 3.0061,
      "step": 96273
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6666629314422607,
      "learning_rate": 0.0003765674948089968,
      "loss": 3.0865,
      "step": 96274
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.4518754482269287,
      "learning_rate": 0.00037656353971220137,
      "loss": 2.8595,
      "step": 96275
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0329771041870117,
      "learning_rate": 0.00037655958460117123,
      "loss": 3.184,
      "step": 96276
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6453567743301392,
      "learning_rate": 0.0003765556294759073,
      "loss": 2.8913,
      "step": 96277
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0453786849975586,
      "learning_rate": 0.00037655167433641026,
      "loss": 2.9457,
      "step": 96278
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6274299621582031,
      "learning_rate": 0.0003765477191826808,
      "loss": 2.9554,
      "step": 96279
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.731742262840271,
      "learning_rate": 0.00037654376401471956,
      "loss": 2.7853,
      "step": 96280
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.755639672279358,
      "learning_rate": 0.00037653980883252755,
      "loss": 3.079,
      "step": 96281
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.656369686126709,
      "learning_rate": 0.0003765358536361053,
      "loss": 3.2379,
      "step": 96282
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7149097919464111,
      "learning_rate": 0.0003765318984254535,
      "loss": 3.0612,
      "step": 96283
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8108185529708862,
      "learning_rate": 0.0003765279432005731,
      "loss": 2.9915,
      "step": 96284
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8654253482818604,
      "learning_rate": 0.00037652398796146467,
      "loss": 2.9317,
      "step": 96285
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6243547201156616,
      "learning_rate": 0.00037652003270812894,
      "loss": 3.0459,
      "step": 96286
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.691593050956726,
      "learning_rate": 0.00037651607744056673,
      "loss": 2.9695,
      "step": 96287
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9595730304718018,
      "learning_rate": 0.0003765121221587787,
      "loss": 3.0039,
      "step": 96288
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6328496932983398,
      "learning_rate": 0.00037650816686276564,
      "loss": 2.7022,
      "step": 96289
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.675572395324707,
      "learning_rate": 0.0003765042115525283,
      "loss": 3.1634,
      "step": 96290
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7501838207244873,
      "learning_rate": 0.00037650025622806735,
      "loss": 3.0177,
      "step": 96291
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1624414920806885,
      "learning_rate": 0.0003764963008893836,
      "loss": 2.8396,
      "step": 96292
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.882177710533142,
      "learning_rate": 0.0003764923455364777,
      "loss": 3.2054,
      "step": 96293
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.84132719039917,
      "learning_rate": 0.0003764883901693504,
      "loss": 2.9878,
      "step": 96294
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.721381425857544,
      "learning_rate": 0.0003764844347880025,
      "loss": 2.9496,
      "step": 96295
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.660008192062378,
      "learning_rate": 0.00037648047939243477,
      "loss": 3.0025,
      "step": 96296
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.3222789764404297,
      "learning_rate": 0.0003764765239826478,
      "loss": 3.2144,
      "step": 96297
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8384376764297485,
      "learning_rate": 0.00037647256855864237,
      "loss": 3.2018,
      "step": 96298
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.051377058029175,
      "learning_rate": 0.00037646861312041937,
      "loss": 3.2984,
      "step": 96299
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0032432079315186,
      "learning_rate": 0.00037646465766797923,
      "loss": 2.9413,
      "step": 96300
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5156385898590088,
      "learning_rate": 0.00037646070220132303,
      "loss": 3.2413,
      "step": 96301
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5966272354125977,
      "learning_rate": 0.00037645674672045126,
      "loss": 3.0906,
      "step": 96302
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.973257303237915,
      "learning_rate": 0.0003764527912253648,
      "loss": 2.8216,
      "step": 96303
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7449396848678589,
      "learning_rate": 0.0003764488357160642,
      "loss": 2.9406,
      "step": 96304
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7086483240127563,
      "learning_rate": 0.0003764448801925505,
      "loss": 2.9171,
      "step": 96305
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6907652616500854,
      "learning_rate": 0.00037644092465482414,
      "loss": 2.9776,
      "step": 96306
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0479812622070312,
      "learning_rate": 0.0003764369691028859,
      "loss": 3.0614,
      "step": 96307
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9295250177383423,
      "learning_rate": 0.0003764330135367367,
      "loss": 2.8005,
      "step": 96308
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.605247139930725,
      "learning_rate": 0.0003764290579563771,
      "loss": 3.0531,
      "step": 96309
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5576577186584473,
      "learning_rate": 0.00037642510236180794,
      "loss": 2.7459,
      "step": 96310
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.864850401878357,
      "learning_rate": 0.00037642114675302997,
      "loss": 2.9618,
      "step": 96311
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.896727204322815,
      "learning_rate": 0.00037641719113004374,
      "loss": 2.931,
      "step": 96312
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9586390256881714,
      "learning_rate": 0.00037641323549285013,
      "loss": 2.9848,
      "step": 96313
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7396351099014282,
      "learning_rate": 0.0003764092798414499,
      "loss": 3.0821,
      "step": 96314
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.59831702709198,
      "learning_rate": 0.0003764053241758437,
      "loss": 3.0986,
      "step": 96315
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6869752407073975,
      "learning_rate": 0.0003764013684960323,
      "loss": 3.1096,
      "step": 96316
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0912697315216064,
      "learning_rate": 0.0003763974128020166,
      "loss": 2.7995,
      "step": 96317
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.258049726486206,
      "learning_rate": 0.000376393457093797,
      "loss": 3.1165,
      "step": 96318
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0613348484039307,
      "learning_rate": 0.0003763895013713744,
      "loss": 2.7906,
      "step": 96319
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8017877340316772,
      "learning_rate": 0.00037638554563474964,
      "loss": 3.0215,
      "step": 96320
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7223403453826904,
      "learning_rate": 0.0003763815898839234,
      "loss": 3.1795,
      "step": 96321
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5499520301818848,
      "learning_rate": 0.00037637763411889623,
      "loss": 3.0204,
      "step": 96322
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7622395753860474,
      "learning_rate": 0.00037637367833966915,
      "loss": 3.0639,
      "step": 96323
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.991861343383789,
      "learning_rate": 0.00037636972254624277,
      "loss": 3.0112,
      "step": 96324
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.7758593559265137,
      "learning_rate": 0.0003763657667386178,
      "loss": 3.2,
      "step": 96325
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.738808035850525,
      "learning_rate": 0.0003763618109167949,
      "loss": 3.0882,
      "step": 96326
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5789378881454468,
      "learning_rate": 0.000376357855080775,
      "loss": 2.996,
      "step": 96327
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1717588901519775,
      "learning_rate": 0.0003763538992305587,
      "loss": 3.0468,
      "step": 96328
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0454297065734863,
      "learning_rate": 0.00037634994336614675,
      "loss": 3.12,
      "step": 96329
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.8541970252990723,
      "learning_rate": 0.00037634598748753986,
      "loss": 2.7714,
      "step": 96330
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0507028102874756,
      "learning_rate": 0.0003763420315947389,
      "loss": 2.8033,
      "step": 96331
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8202660083770752,
      "learning_rate": 0.00037633807568774447,
      "loss": 3.005,
      "step": 96332
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.8825430870056152,
      "learning_rate": 0.0003763341197665574,
      "loss": 3.0963,
      "step": 96333
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.82099986076355,
      "learning_rate": 0.00037633016383117836,
      "loss": 3.1167,
      "step": 96334
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7733076810836792,
      "learning_rate": 0.00037632620788160805,
      "loss": 2.8856,
      "step": 96335
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.456287145614624,
      "learning_rate": 0.00037632225191784727,
      "loss": 3.1452,
      "step": 96336
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.469198703765869,
      "learning_rate": 0.0003763182959398967,
      "loss": 3.0468,
      "step": 96337
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.3389506340026855,
      "learning_rate": 0.00037631433994775727,
      "loss": 3.1283,
      "step": 96338
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.665525197982788,
      "learning_rate": 0.00037631038394142946,
      "loss": 3.0427,
      "step": 96339
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9679298400878906,
      "learning_rate": 0.0003763064279209141,
      "loss": 3.0949,
      "step": 96340
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.290842294692993,
      "learning_rate": 0.0003763024718862119,
      "loss": 3.1368,
      "step": 96341
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.720720052719116,
      "learning_rate": 0.0003762985158373237,
      "loss": 3.0068,
      "step": 96342
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7131530046463013,
      "learning_rate": 0.0003762945597742502,
      "loss": 2.9712,
      "step": 96343
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.632185220718384,
      "learning_rate": 0.000376290603696992,
      "loss": 3.2339,
      "step": 96344
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.2453579902648926,
      "learning_rate": 0.00037628664760555006,
      "loss": 3.0988,
      "step": 96345
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.375368118286133,
      "learning_rate": 0.0003762826914999249,
      "loss": 2.9994,
      "step": 96346
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.1977925300598145,
      "learning_rate": 0.00037627873538011733,
      "loss": 3.0882,
      "step": 96347
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7817039489746094,
      "learning_rate": 0.0003762747792461282,
      "loss": 3.1675,
      "step": 96348
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.532381296157837,
      "learning_rate": 0.00037627082309795806,
      "loss": 3.0178,
      "step": 96349
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8890089988708496,
      "learning_rate": 0.00037626686693560777,
      "loss": 3.1788,
      "step": 96350
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7504781484603882,
      "learning_rate": 0.00037626291075907806,
      "loss": 3.1602,
      "step": 96351
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.3848249912261963,
      "learning_rate": 0.00037625895456836964,
      "loss": 2.9741,
      "step": 96352
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.381154775619507,
      "learning_rate": 0.00037625499836348316,
      "loss": 3.0125,
      "step": 96353
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.091116428375244,
      "learning_rate": 0.00037625104214441955,
      "loss": 3.1659,
      "step": 96354
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8676846027374268,
      "learning_rate": 0.00037624708591117934,
      "loss": 3.0312,
      "step": 96355
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.3515970706939697,
      "learning_rate": 0.0003762431296637633,
      "loss": 2.8482,
      "step": 96356
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8712247610092163,
      "learning_rate": 0.0003762391734021724,
      "loss": 2.8727,
      "step": 96357
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.781671404838562,
      "learning_rate": 0.00037623521712640707,
      "loss": 3.116,
      "step": 96358
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.117461919784546,
      "learning_rate": 0.0003762312608364682,
      "loss": 3.0696,
      "step": 96359
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6432145833969116,
      "learning_rate": 0.00037622730453235655,
      "loss": 2.9657,
      "step": 96360
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.606633186340332,
      "learning_rate": 0.0003762233482140727,
      "loss": 2.9225,
      "step": 96361
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8246182203292847,
      "learning_rate": 0.0003762193918816175,
      "loss": 3.0759,
      "step": 96362
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7186535596847534,
      "learning_rate": 0.0003762154355349918,
      "loss": 3.0425,
      "step": 96363
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.904666781425476,
      "learning_rate": 0.0003762114791741961,
      "loss": 2.9888,
      "step": 96364
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6630090475082397,
      "learning_rate": 0.00037620752279923125,
      "loss": 3.093,
      "step": 96365
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.778836727142334,
      "learning_rate": 0.00037620356641009805,
      "loss": 2.7724,
      "step": 96366
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6861469745635986,
      "learning_rate": 0.00037619961000679716,
      "loss": 3.0272,
      "step": 96367
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9651755094528198,
      "learning_rate": 0.00037619565358932926,
      "loss": 2.912,
      "step": 96368
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6583725214004517,
      "learning_rate": 0.0003761916971576953,
      "loss": 2.9919,
      "step": 96369
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8855019807815552,
      "learning_rate": 0.0003761877407118957,
      "loss": 3.151,
      "step": 96370
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.642372965812683,
      "learning_rate": 0.0003761837842519314,
      "loss": 3.0716,
      "step": 96371
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7921525239944458,
      "learning_rate": 0.00037617982777780315,
      "loss": 3.016,
      "step": 96372
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.5069448947906494,
      "learning_rate": 0.00037617587128951154,
      "loss": 3.1772,
      "step": 96373
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.6337649822235107,
      "learning_rate": 0.0003761719147870575,
      "loss": 2.8452,
      "step": 96374
    },
    {
      "epoch": 1.25,
      "grad_norm": 3.6806905269622803,
      "learning_rate": 0.00037616795827044163,
      "loss": 2.7462,
      "step": 96375
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.97304105758667,
      "learning_rate": 0.0003761640017396647,
      "loss": 2.9555,
      "step": 96376
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.144927978515625,
      "learning_rate": 0.0003761600451947274,
      "loss": 3.0294,
      "step": 96377
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.0916128158569336,
      "learning_rate": 0.00037615608863563057,
      "loss": 2.9224,
      "step": 96378
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.258826971054077,
      "learning_rate": 0.0003761521320623748,
      "loss": 2.8633,
      "step": 96379
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.4153358936309814,
      "learning_rate": 0.000376148175474961,
      "loss": 3.3348,
      "step": 96380
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.9871443510055542,
      "learning_rate": 0.0003761442188733898,
      "loss": 3.0378,
      "step": 96381
    },
    {
      "epoch": 1.25,
      "grad_norm": 2.288365364074707,
      "learning_rate": 0.0003761402622576619,
      "loss": 3.0175,
      "step": 96382
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.8802711963653564,
      "learning_rate": 0.0003761363056277781,
      "loss": 3.2087,
      "step": 96383
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.7169498205184937,
      "learning_rate": 0.00037613234898373926,
      "loss": 2.9815,
      "step": 96384
    },
    {
      "epoch": 1.25,
      "grad_norm": 1.5296930074691772,
      "learning_rate": 0.0003761283923255458,
      "loss": 3.1538,
      "step": 96385
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.084214448928833,
      "learning_rate": 0.0003761244356531987,
      "loss": 3.0295,
      "step": 96386
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.334827423095703,
      "learning_rate": 0.00037612047896669863,
      "loss": 3.2453,
      "step": 96387
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.722383737564087,
      "learning_rate": 0.00037611652226604634,
      "loss": 2.9312,
      "step": 96388
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.185415744781494,
      "learning_rate": 0.0003761125655512425,
      "loss": 2.9011,
      "step": 96389
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.707603693008423,
      "learning_rate": 0.00037610860882228794,
      "loss": 3.1394,
      "step": 96390
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.3777999877929688,
      "learning_rate": 0.0003761046520791834,
      "loss": 3.2203,
      "step": 96391
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9360154867172241,
      "learning_rate": 0.00037610069532192946,
      "loss": 2.6586,
      "step": 96392
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7082874774932861,
      "learning_rate": 0.000376096738550527,
      "loss": 2.9962,
      "step": 96393
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7590795755386353,
      "learning_rate": 0.0003760927817649768,
      "loss": 2.9195,
      "step": 96394
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.6204769611358643,
      "learning_rate": 0.0003760888249652795,
      "loss": 2.9412,
      "step": 96395
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.871708631515503,
      "learning_rate": 0.0003760848681514357,
      "loss": 2.7853,
      "step": 96396
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.638901472091675,
      "learning_rate": 0.0003760809113234465,
      "loss": 3.0249,
      "step": 96397
    },
    {
      "epoch": 1.26,
      "grad_norm": 4.384078502655029,
      "learning_rate": 0.0003760769544813123,
      "loss": 2.7116,
      "step": 96398
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.766823172569275,
      "learning_rate": 0.00037607299762503393,
      "loss": 2.8058,
      "step": 96399
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6193819046020508,
      "learning_rate": 0.0003760690407546123,
      "loss": 2.8917,
      "step": 96400
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2558224201202393,
      "learning_rate": 0.00037606508387004787,
      "loss": 3.0213,
      "step": 96401
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0567362308502197,
      "learning_rate": 0.0003760611269713415,
      "loss": 2.8929,
      "step": 96402
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6095703840255737,
      "learning_rate": 0.000376057170058494,
      "loss": 3.3182,
      "step": 96403
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.046586751937866,
      "learning_rate": 0.000376053213131506,
      "loss": 3.0425,
      "step": 96404
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7253155708312988,
      "learning_rate": 0.0003760492561903782,
      "loss": 3.0083,
      "step": 96405
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9608076810836792,
      "learning_rate": 0.0003760452992351115,
      "loss": 2.8985,
      "step": 96406
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.577201008796692,
      "learning_rate": 0.0003760413422657066,
      "loss": 2.7991,
      "step": 96407
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1379311084747314,
      "learning_rate": 0.000376037385282164,
      "loss": 3.0817,
      "step": 96408
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.254507064819336,
      "learning_rate": 0.0003760334282844848,
      "loss": 2.913,
      "step": 96409
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7692328691482544,
      "learning_rate": 0.0003760294712726695,
      "loss": 3.0415,
      "step": 96410
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.3507118225097656,
      "learning_rate": 0.00037602551424671877,
      "loss": 2.8751,
      "step": 96411
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6598061323165894,
      "learning_rate": 0.0003760215572066336,
      "loss": 2.8824,
      "step": 96412
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0780231952667236,
      "learning_rate": 0.00037601760015241456,
      "loss": 2.919,
      "step": 96413
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5449914932250977,
      "learning_rate": 0.00037601364308406237,
      "loss": 3.0255,
      "step": 96414
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.001739740371704,
      "learning_rate": 0.0003760096860015779,
      "loss": 2.9798,
      "step": 96415
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.240295886993408,
      "learning_rate": 0.0003760057289049616,
      "loss": 3.087,
      "step": 96416
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.9972732067108154,
      "learning_rate": 0.0003760017717942145,
      "loss": 3.0051,
      "step": 96417
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.368666887283325,
      "learning_rate": 0.0003759978146693373,
      "loss": 2.9184,
      "step": 96418
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6396288871765137,
      "learning_rate": 0.00037599385753033063,
      "loss": 3.0063,
      "step": 96419
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.587347388267517,
      "learning_rate": 0.00037598990037719524,
      "loss": 2.8125,
      "step": 96420
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9106017351150513,
      "learning_rate": 0.00037598594320993194,
      "loss": 2.8324,
      "step": 96421
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.734719157218933,
      "learning_rate": 0.00037598198602854137,
      "loss": 3.0205,
      "step": 96422
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.882460355758667,
      "learning_rate": 0.00037597802883302436,
      "loss": 2.9531,
      "step": 96423
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.3310606479644775,
      "learning_rate": 0.0003759740716233816,
      "loss": 2.8848,
      "step": 96424
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7070558071136475,
      "learning_rate": 0.0003759701143996138,
      "loss": 2.7115,
      "step": 96425
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.3851770162582397,
      "learning_rate": 0.0003759661571617217,
      "loss": 3.0476,
      "step": 96426
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.150949239730835,
      "learning_rate": 0.0003759621999097061,
      "loss": 3.092,
      "step": 96427
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0062432289123535,
      "learning_rate": 0.0003759582426435677,
      "loss": 2.9773,
      "step": 96428
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7425884008407593,
      "learning_rate": 0.00037595428536330716,
      "loss": 2.7525,
      "step": 96429
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.834071159362793,
      "learning_rate": 0.00037595032806892544,
      "loss": 2.9562,
      "step": 96430
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9113950729370117,
      "learning_rate": 0.00037594637076042295,
      "loss": 2.6263,
      "step": 96431
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6094691753387451,
      "learning_rate": 0.0003759424134378006,
      "loss": 2.9785,
      "step": 96432
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8965815305709839,
      "learning_rate": 0.00037593845610105924,
      "loss": 2.9271,
      "step": 96433
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.323517322540283,
      "learning_rate": 0.0003759344987501994,
      "loss": 3.1064,
      "step": 96434
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.967352032661438,
      "learning_rate": 0.00037593054138522193,
      "loss": 2.7751,
      "step": 96435
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1000025272369385,
      "learning_rate": 0.00037592658400612764,
      "loss": 3.2541,
      "step": 96436
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1399903297424316,
      "learning_rate": 0.00037592262661291704,
      "loss": 3.0699,
      "step": 96437
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.024486541748047,
      "learning_rate": 0.00037591866920559094,
      "loss": 3.0181,
      "step": 96438
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5623066425323486,
      "learning_rate": 0.00037591471178415026,
      "loss": 2.9028,
      "step": 96439
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8590891361236572,
      "learning_rate": 0.00037591075434859555,
      "loss": 3.0316,
      "step": 96440
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.0463757514953613,
      "learning_rate": 0.00037590679689892756,
      "loss": 2.956,
      "step": 96441
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.669057846069336,
      "learning_rate": 0.00037590283943514716,
      "loss": 3.0061,
      "step": 96442
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.4915056228637695,
      "learning_rate": 0.0003758988819572549,
      "loss": 2.9884,
      "step": 96443
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.736591100692749,
      "learning_rate": 0.00037589492446525163,
      "loss": 3.048,
      "step": 96444
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9724980592727661,
      "learning_rate": 0.0003758909669591381,
      "loss": 2.9682,
      "step": 96445
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5632771253585815,
      "learning_rate": 0.0003758870094389149,
      "loss": 2.9114,
      "step": 96446
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6148457527160645,
      "learning_rate": 0.00037588305190458293,
      "loss": 2.8399,
      "step": 96447
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.9738831520080566,
      "learning_rate": 0.000375879094356143,
      "loss": 3.2657,
      "step": 96448
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.824068784713745,
      "learning_rate": 0.00037587513679359556,
      "loss": 3.0206,
      "step": 96449
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.454359531402588,
      "learning_rate": 0.0003758711792169415,
      "loss": 2.8167,
      "step": 96450
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.999861478805542,
      "learning_rate": 0.0003758672216261816,
      "loss": 3.028,
      "step": 96451
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9839533567428589,
      "learning_rate": 0.00037586326402131657,
      "loss": 3.2154,
      "step": 96452
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8678827285766602,
      "learning_rate": 0.0003758593064023471,
      "loss": 3.0922,
      "step": 96453
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2478795051574707,
      "learning_rate": 0.0003758553487692739,
      "loss": 3.0624,
      "step": 96454
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.057128429412842,
      "learning_rate": 0.00037585139112209783,
      "loss": 3.2763,
      "step": 96455
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0405242443084717,
      "learning_rate": 0.00037584743346081955,
      "loss": 3.0127,
      "step": 96456
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1321566104888916,
      "learning_rate": 0.00037584347578543975,
      "loss": 3.0652,
      "step": 96457
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5203981399536133,
      "learning_rate": 0.00037583951809595925,
      "loss": 3.1521,
      "step": 96458
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.5529799461364746,
      "learning_rate": 0.00037583556039237875,
      "loss": 3.0483,
      "step": 96459
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6727664470672607,
      "learning_rate": 0.000375831602674699,
      "loss": 2.8616,
      "step": 96460
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5857912302017212,
      "learning_rate": 0.0003758276449429207,
      "loss": 3.253,
      "step": 96461
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.654902458190918,
      "learning_rate": 0.0003758236871970446,
      "loss": 3.0578,
      "step": 96462
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9449764490127563,
      "learning_rate": 0.00037581972943707156,
      "loss": 3.1043,
      "step": 96463
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5816978216171265,
      "learning_rate": 0.00037581577166300215,
      "loss": 2.8789,
      "step": 96464
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7936429977416992,
      "learning_rate": 0.0003758118138748371,
      "loss": 3.2375,
      "step": 96465
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.5184943675994873,
      "learning_rate": 0.00037580785607257717,
      "loss": 2.9624,
      "step": 96466
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9045346975326538,
      "learning_rate": 0.00037580389825622317,
      "loss": 3.1473,
      "step": 96467
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6856083869934082,
      "learning_rate": 0.0003757999404257758,
      "loss": 3.0292,
      "step": 96468
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8696919679641724,
      "learning_rate": 0.00037579598258123583,
      "loss": 3.2746,
      "step": 96469
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0370585918426514,
      "learning_rate": 0.0003757920247226039,
      "loss": 2.8372,
      "step": 96470
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8355261087417603,
      "learning_rate": 0.00037578806684988083,
      "loss": 3.1047,
      "step": 96471
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8420944213867188,
      "learning_rate": 0.0003757841089630673,
      "loss": 2.657,
      "step": 96472
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2217090129852295,
      "learning_rate": 0.0003757801510621641,
      "loss": 3.0715,
      "step": 96473
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.003323793411255,
      "learning_rate": 0.000375776193147172,
      "loss": 2.9022,
      "step": 96474
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.845525860786438,
      "learning_rate": 0.00037577223521809155,
      "loss": 3.0771,
      "step": 96475
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.051064968109131,
      "learning_rate": 0.0003757682772749237,
      "loss": 3.0975,
      "step": 96476
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.3796486854553223,
      "learning_rate": 0.00037576431931766904,
      "loss": 2.8745,
      "step": 96477
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.74708890914917,
      "learning_rate": 0.00037576036134632834,
      "loss": 2.7914,
      "step": 96478
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.5550272464752197,
      "learning_rate": 0.00037575640336090255,
      "loss": 2.871,
      "step": 96479
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9991323947906494,
      "learning_rate": 0.000375752445361392,
      "loss": 3.1995,
      "step": 96480
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9129689931869507,
      "learning_rate": 0.00037574848734779766,
      "loss": 2.9251,
      "step": 96481
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8501054048538208,
      "learning_rate": 0.00037574452932012035,
      "loss": 3.2586,
      "step": 96482
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1673812866210938,
      "learning_rate": 0.0003757405712783606,
      "loss": 2.9475,
      "step": 96483
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6493302583694458,
      "learning_rate": 0.00037573661322251936,
      "loss": 2.9501,
      "step": 96484
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.120366096496582,
      "learning_rate": 0.0003757326551525972,
      "loss": 3.1108,
      "step": 96485
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2518045902252197,
      "learning_rate": 0.00037572869706859494,
      "loss": 3.269,
      "step": 96486
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0511889457702637,
      "learning_rate": 0.0003757247389705132,
      "loss": 3.0533,
      "step": 96487
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.096022367477417,
      "learning_rate": 0.0003757207808583529,
      "loss": 3.0669,
      "step": 96488
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.340468645095825,
      "learning_rate": 0.00037571682273211454,
      "loss": 3.0813,
      "step": 96489
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9857122898101807,
      "learning_rate": 0.00037571286459179914,
      "loss": 3.1883,
      "step": 96490
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.9656219482421875,
      "learning_rate": 0.00037570890643740727,
      "loss": 3.0124,
      "step": 96491
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.378143548965454,
      "learning_rate": 0.00037570494826893964,
      "loss": 2.8296,
      "step": 96492
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8852940797805786,
      "learning_rate": 0.000375700990086397,
      "loss": 3.0168,
      "step": 96493
    },
    {
      "epoch": 1.26,
      "grad_norm": 4.403999328613281,
      "learning_rate": 0.00037569703188978013,
      "loss": 3.0104,
      "step": 96494
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5467259883880615,
      "learning_rate": 0.0003756930736790898,
      "loss": 3.1209,
      "step": 96495
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6479542255401611,
      "learning_rate": 0.00037568911545432667,
      "loss": 3.043,
      "step": 96496
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.862746238708496,
      "learning_rate": 0.00037568515721549155,
      "loss": 3.1315,
      "step": 96497
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.4226508140563965,
      "learning_rate": 0.0003756811989625851,
      "loss": 2.8051,
      "step": 96498
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6587302684783936,
      "learning_rate": 0.00037567724069560807,
      "loss": 2.9118,
      "step": 96499
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.5764176845550537,
      "learning_rate": 0.0003756732824145612,
      "loss": 3.1748,
      "step": 96500
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.008399724960327,
      "learning_rate": 0.00037566932411944523,
      "loss": 3.1825,
      "step": 96501
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6019219160079956,
      "learning_rate": 0.00037566536581026095,
      "loss": 3.2727,
      "step": 96502
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8909943103790283,
      "learning_rate": 0.00037566140748700904,
      "loss": 2.9386,
      "step": 96503
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.593176245689392,
      "learning_rate": 0.00037565744914969026,
      "loss": 3.0407,
      "step": 96504
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.718939185142517,
      "learning_rate": 0.0003756534907983053,
      "loss": 3.0452,
      "step": 96505
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5393803119659424,
      "learning_rate": 0.0003756495324328549,
      "loss": 2.9141,
      "step": 96506
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.438408851623535,
      "learning_rate": 0.00037564557405333994,
      "loss": 2.8658,
      "step": 96507
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7904434204101562,
      "learning_rate": 0.00037564161565976093,
      "loss": 3.1719,
      "step": 96508
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6338404417037964,
      "learning_rate": 0.00037563765725211873,
      "loss": 3.0017,
      "step": 96509
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6956987380981445,
      "learning_rate": 0.00037563369883041406,
      "loss": 2.8494,
      "step": 96510
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.0093772411346436,
      "learning_rate": 0.0003756297403946477,
      "loss": 3.0649,
      "step": 96511
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7438377141952515,
      "learning_rate": 0.0003756257819448203,
      "loss": 2.9929,
      "step": 96512
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6019201278686523,
      "learning_rate": 0.00037562182348093267,
      "loss": 3.0587,
      "step": 96513
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5750281810760498,
      "learning_rate": 0.00037561786500298555,
      "loss": 3.159,
      "step": 96514
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6125621795654297,
      "learning_rate": 0.0003756139065109796,
      "loss": 3.061,
      "step": 96515
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.594180941581726,
      "learning_rate": 0.0003756099480049155,
      "loss": 2.9315,
      "step": 96516
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7617555856704712,
      "learning_rate": 0.00037560598948479417,
      "loss": 2.9206,
      "step": 96517
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1163361072540283,
      "learning_rate": 0.00037560203095061636,
      "loss": 2.9731,
      "step": 96518
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7437043190002441,
      "learning_rate": 0.0003755980724023826,
      "loss": 2.7274,
      "step": 96519
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0270838737487793,
      "learning_rate": 0.0003755941138400937,
      "loss": 2.9247,
      "step": 96520
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9257198572158813,
      "learning_rate": 0.0003755901552637505,
      "loss": 2.933,
      "step": 96521
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.800979733467102,
      "learning_rate": 0.0003755861966733536,
      "loss": 3.0562,
      "step": 96522
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6109886169433594,
      "learning_rate": 0.00037558223806890385,
      "loss": 3.1607,
      "step": 96523
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.4359456300735474,
      "learning_rate": 0.0003755782794504019,
      "loss": 3.0055,
      "step": 96524
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6836429834365845,
      "learning_rate": 0.00037557432081784857,
      "loss": 3.0857,
      "step": 96525
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7551718950271606,
      "learning_rate": 0.0003755703621712445,
      "loss": 2.9,
      "step": 96526
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.689749002456665,
      "learning_rate": 0.0003755664035105905,
      "loss": 3.0934,
      "step": 96527
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.4752479791641235,
      "learning_rate": 0.00037556244483588733,
      "loss": 3.0727,
      "step": 96528
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0753560066223145,
      "learning_rate": 0.0003755584861471356,
      "loss": 2.8272,
      "step": 96529
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6578164100646973,
      "learning_rate": 0.0003755545274443361,
      "loss": 2.9703,
      "step": 96530
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2122530937194824,
      "learning_rate": 0.0003755505687274897,
      "loss": 3.0965,
      "step": 96531
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.5057053565979004,
      "learning_rate": 0.0003755466099965969,
      "loss": 3.061,
      "step": 96532
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.4386420249938965,
      "learning_rate": 0.0003755426512516586,
      "loss": 3.0707,
      "step": 96533
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7234946489334106,
      "learning_rate": 0.0003755386924926755,
      "loss": 2.9765,
      "step": 96534
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.4658539295196533,
      "learning_rate": 0.00037553473371964837,
      "loss": 3.0919,
      "step": 96535
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2141339778900146,
      "learning_rate": 0.0003755307749325778,
      "loss": 3.0298,
      "step": 96536
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9813172817230225,
      "learning_rate": 0.00037552681613146476,
      "loss": 2.8958,
      "step": 96537
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6852513551712036,
      "learning_rate": 0.0003755228573163098,
      "loss": 2.9289,
      "step": 96538
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.290045976638794,
      "learning_rate": 0.0003755188984871137,
      "loss": 2.89,
      "step": 96539
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.325019359588623,
      "learning_rate": 0.00037551493964387727,
      "loss": 2.8748,
      "step": 96540
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.657618761062622,
      "learning_rate": 0.0003755109807866011,
      "loss": 2.873,
      "step": 96541
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.618438720703125,
      "learning_rate": 0.00037550702191528607,
      "loss": 3.1359,
      "step": 96542
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8131678104400635,
      "learning_rate": 0.00037550306302993285,
      "loss": 3.272,
      "step": 96543
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.59544038772583,
      "learning_rate": 0.00037549910413054225,
      "loss": 2.8998,
      "step": 96544
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8479151725769043,
      "learning_rate": 0.0003754951452171148,
      "loss": 3.2311,
      "step": 96545
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.574560523033142,
      "learning_rate": 0.0003754911862896515,
      "loss": 3.0039,
      "step": 96546
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6350691318511963,
      "learning_rate": 0.0003754872273481529,
      "loss": 3.1192,
      "step": 96547
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.7198598384857178,
      "learning_rate": 0.0003754832683926198,
      "loss": 3.1243,
      "step": 96548
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.655272364616394,
      "learning_rate": 0.00037547930942305295,
      "loss": 2.9145,
      "step": 96549
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.4490325450897217,
      "learning_rate": 0.00037547535043945307,
      "loss": 3.0814,
      "step": 96550
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.679917335510254,
      "learning_rate": 0.0003754713914418209,
      "loss": 3.1102,
      "step": 96551
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5308109521865845,
      "learning_rate": 0.00037546743243015726,
      "loss": 2.8898,
      "step": 96552
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.758732795715332,
      "learning_rate": 0.00037546347340446264,
      "loss": 2.9449,
      "step": 96553
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5192205905914307,
      "learning_rate": 0.00037545951436473806,
      "loss": 3.0293,
      "step": 96554
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.5289788246154785,
      "learning_rate": 0.00037545555531098406,
      "loss": 3.0265,
      "step": 96555
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.282829523086548,
      "learning_rate": 0.00037545159624320147,
      "loss": 2.8118,
      "step": 96556
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8843579292297363,
      "learning_rate": 0.0003754476371613909,
      "loss": 3.1252,
      "step": 96557
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9105950593948364,
      "learning_rate": 0.0003754436780655534,
      "loss": 3.0906,
      "step": 96558
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.91139817237854,
      "learning_rate": 0.00037543971895568934,
      "loss": 3.0034,
      "step": 96559
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2975997924804688,
      "learning_rate": 0.0003754357598317997,
      "loss": 2.9778,
      "step": 96560
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6373084783554077,
      "learning_rate": 0.00037543180069388513,
      "loss": 2.9342,
      "step": 96561
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.955211877822876,
      "learning_rate": 0.00037542784154194627,
      "loss": 3.284,
      "step": 96562
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.094658851623535,
      "learning_rate": 0.000375423882375984,
      "loss": 2.908,
      "step": 96563
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.125885486602783,
      "learning_rate": 0.0003754199231959991,
      "loss": 3.1862,
      "step": 96564
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2376177310943604,
      "learning_rate": 0.00037541596400199204,
      "loss": 2.9463,
      "step": 96565
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7949844598770142,
      "learning_rate": 0.0003754120047939638,
      "loss": 2.9425,
      "step": 96566
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8774980306625366,
      "learning_rate": 0.00037540804557191515,
      "loss": 2.8861,
      "step": 96567
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8599175214767456,
      "learning_rate": 0.00037540408633584654,
      "loss": 2.9598,
      "step": 96568
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.177267074584961,
      "learning_rate": 0.00037540012708575903,
      "loss": 3.0295,
      "step": 96569
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7619305849075317,
      "learning_rate": 0.00037539616782165323,
      "loss": 3.0711,
      "step": 96570
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.201972007751465,
      "learning_rate": 0.0003753922085435297,
      "loss": 2.9538,
      "step": 96571
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7358620166778564,
      "learning_rate": 0.00037538824925138943,
      "loss": 3.1564,
      "step": 96572
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.60694420337677,
      "learning_rate": 0.0003753842899452331,
      "loss": 2.9647,
      "step": 96573
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1721866130828857,
      "learning_rate": 0.0003753803306250613,
      "loss": 3.2138,
      "step": 96574
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.749297022819519,
      "learning_rate": 0.00037537637129087496,
      "loss": 3.1294,
      "step": 96575
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.238140106201172,
      "learning_rate": 0.00037537241194267475,
      "loss": 2.9772,
      "step": 96576
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8450740575790405,
      "learning_rate": 0.00037536845258046134,
      "loss": 2.9685,
      "step": 96577
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.04964017868042,
      "learning_rate": 0.0003753644932042355,
      "loss": 3.0104,
      "step": 96578
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6203689575195312,
      "learning_rate": 0.000375360533813998,
      "loss": 3.305,
      "step": 96579
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5925945043563843,
      "learning_rate": 0.00037535657440974956,
      "loss": 3.1795,
      "step": 96580
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6956567764282227,
      "learning_rate": 0.00037535261499149086,
      "loss": 2.9183,
      "step": 96581
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0302228927612305,
      "learning_rate": 0.00037534865555922275,
      "loss": 2.9435,
      "step": 96582
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7811030149459839,
      "learning_rate": 0.0003753446961129458,
      "loss": 3.0435,
      "step": 96583
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8923125267028809,
      "learning_rate": 0.00037534073665266096,
      "loss": 3.0325,
      "step": 96584
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.917136788368225,
      "learning_rate": 0.0003753367771783689,
      "loss": 3.0843,
      "step": 96585
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.4638028144836426,
      "learning_rate": 0.0003753328176900702,
      "loss": 2.9799,
      "step": 96586
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.4142847061157227,
      "learning_rate": 0.0003753288581877657,
      "loss": 2.9701,
      "step": 96587
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.793636679649353,
      "learning_rate": 0.00037532489867145617,
      "loss": 2.9466,
      "step": 96588
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.872222661972046,
      "learning_rate": 0.0003753209391411424,
      "loss": 3.1386,
      "step": 96589
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8869951963424683,
      "learning_rate": 0.0003753169795968249,
      "loss": 2.9983,
      "step": 96590
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6539121866226196,
      "learning_rate": 0.0003753130200385046,
      "loss": 2.9244,
      "step": 96591
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.143960475921631,
      "learning_rate": 0.0003753090604661823,
      "loss": 3.0809,
      "step": 96592
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8768858909606934,
      "learning_rate": 0.0003753051008798585,
      "loss": 3.1108,
      "step": 96593
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.022599697113037,
      "learning_rate": 0.00037530114127953406,
      "loss": 3.005,
      "step": 96594
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.6536669731140137,
      "learning_rate": 0.00037529718166520986,
      "loss": 3.0705,
      "step": 96595
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7350401878356934,
      "learning_rate": 0.00037529322203688634,
      "loss": 3.0273,
      "step": 96596
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.060072183609009,
      "learning_rate": 0.0003752892623945644,
      "loss": 3.1245,
      "step": 96597
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.85409414768219,
      "learning_rate": 0.0003752853027382448,
      "loss": 3.2477,
      "step": 96598
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.953141450881958,
      "learning_rate": 0.0003752813430679283,
      "loss": 3.0713,
      "step": 96599
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.069612979888916,
      "learning_rate": 0.0003752773833836155,
      "loss": 2.7108,
      "step": 96600
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.4966301918029785,
      "learning_rate": 0.0003752734236853072,
      "loss": 3.0196,
      "step": 96601
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7800259590148926,
      "learning_rate": 0.00037526946397300413,
      "loss": 3.3743,
      "step": 96602
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.2378547191619873,
      "learning_rate": 0.00037526550424670717,
      "loss": 3.0141,
      "step": 96603
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7850645780563354,
      "learning_rate": 0.00037526154450641684,
      "loss": 3.1834,
      "step": 96604
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6979215145111084,
      "learning_rate": 0.00037525758475213396,
      "loss": 3.145,
      "step": 96605
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7024133205413818,
      "learning_rate": 0.00037525362498385935,
      "loss": 2.9353,
      "step": 96606
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.682291030883789,
      "learning_rate": 0.0003752496652015936,
      "loss": 2.9607,
      "step": 96607
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7852580547332764,
      "learning_rate": 0.00037524570540533757,
      "loss": 3.0096,
      "step": 96608
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6298772096633911,
      "learning_rate": 0.00037524174559509186,
      "loss": 3.196,
      "step": 96609
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.099815607070923,
      "learning_rate": 0.00037523778577085734,
      "loss": 2.8646,
      "step": 96610
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.467176914215088,
      "learning_rate": 0.0003752338259326347,
      "loss": 2.7769,
      "step": 96611
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7400426864624023,
      "learning_rate": 0.0003752298660804247,
      "loss": 2.9589,
      "step": 96612
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.313091993331909,
      "learning_rate": 0.00037522590621422803,
      "loss": 2.9214,
      "step": 96613
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.4283998012542725,
      "learning_rate": 0.0003752219463340455,
      "loss": 3.1112,
      "step": 96614
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9973056316375732,
      "learning_rate": 0.00037521798643987765,
      "loss": 3.1062,
      "step": 96615
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.085397481918335,
      "learning_rate": 0.00037521402653172545,
      "loss": 3.095,
      "step": 96616
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7619109153747559,
      "learning_rate": 0.00037521006660958954,
      "loss": 2.8574,
      "step": 96617
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7867354154586792,
      "learning_rate": 0.00037520610667347057,
      "loss": 3.2101,
      "step": 96618
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7110986709594727,
      "learning_rate": 0.00037520214672336956,
      "loss": 3.1355,
      "step": 96619
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.90071439743042,
      "learning_rate": 0.0003751981867592869,
      "loss": 2.7868,
      "step": 96620
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9279358386993408,
      "learning_rate": 0.00037519422678122344,
      "loss": 2.7371,
      "step": 96621
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8044939041137695,
      "learning_rate": 0.00037519026678918007,
      "loss": 3.0801,
      "step": 96622
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.727493405342102,
      "learning_rate": 0.0003751863067831573,
      "loss": 2.8339,
      "step": 96623
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8812683820724487,
      "learning_rate": 0.000375182346763156,
      "loss": 3.1115,
      "step": 96624
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9515669345855713,
      "learning_rate": 0.000375178386729177,
      "loss": 2.7792,
      "step": 96625
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8914273977279663,
      "learning_rate": 0.0003751744266812208,
      "loss": 2.823,
      "step": 96626
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.3695411682128906,
      "learning_rate": 0.00037517046661928827,
      "loss": 2.6634,
      "step": 96627
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0308241844177246,
      "learning_rate": 0.00037516650654338023,
      "loss": 2.915,
      "step": 96628
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.02018666267395,
      "learning_rate": 0.0003751625464534972,
      "loss": 3.222,
      "step": 96629
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.656480312347412,
      "learning_rate": 0.0003751585863496401,
      "loss": 3.072,
      "step": 96630
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.963770866394043,
      "learning_rate": 0.00037515462623180957,
      "loss": 3.0856,
      "step": 96631
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.2454781532287598,
      "learning_rate": 0.00037515066610000635,
      "loss": 2.8626,
      "step": 96632
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0093953609466553,
      "learning_rate": 0.00037514670595423125,
      "loss": 3.153,
      "step": 96633
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7926188707351685,
      "learning_rate": 0.000375142745794485,
      "loss": 2.7917,
      "step": 96634
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.503143548965454,
      "learning_rate": 0.00037513878562076827,
      "loss": 3.0385,
      "step": 96635
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2097365856170654,
      "learning_rate": 0.00037513482543308175,
      "loss": 2.9012,
      "step": 96636
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.0006442070007324,
      "learning_rate": 0.00037513086523142627,
      "loss": 2.8205,
      "step": 96637
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8868080377578735,
      "learning_rate": 0.0003751269050158026,
      "loss": 2.7719,
      "step": 96638
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2801480293273926,
      "learning_rate": 0.0003751229447862114,
      "loss": 2.7943,
      "step": 96639
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.3873708248138428,
      "learning_rate": 0.0003751189845426533,
      "loss": 3.1506,
      "step": 96640
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.003891706466675,
      "learning_rate": 0.0003751150242851293,
      "loss": 3.0527,
      "step": 96641
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.5971221923828125,
      "learning_rate": 0.00037511106401364,
      "loss": 3.0591,
      "step": 96642
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.7695837020874023,
      "learning_rate": 0.0003751071037281861,
      "loss": 3.231,
      "step": 96643
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7836970090866089,
      "learning_rate": 0.0003751031434287684,
      "loss": 3.0401,
      "step": 96644
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.649965524673462,
      "learning_rate": 0.00037509918311538754,
      "loss": 2.8709,
      "step": 96645
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1358561515808105,
      "learning_rate": 0.0003750952227880444,
      "loss": 2.8751,
      "step": 96646
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.752056837081909,
      "learning_rate": 0.00037509126244673956,
      "loss": 2.9678,
      "step": 96647
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.028491497039795,
      "learning_rate": 0.00037508730209147393,
      "loss": 2.8883,
      "step": 96648
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6566587686538696,
      "learning_rate": 0.00037508334172224817,
      "loss": 3.1378,
      "step": 96649
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.554281234741211,
      "learning_rate": 0.00037507938133906287,
      "loss": 3.0429,
      "step": 96650
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7432212829589844,
      "learning_rate": 0.00037507542094191896,
      "loss": 2.9575,
      "step": 96651
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0671327114105225,
      "learning_rate": 0.00037507146053081714,
      "loss": 3.1022,
      "step": 96652
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6954892873764038,
      "learning_rate": 0.00037506750010575805,
      "loss": 3.2811,
      "step": 96653
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7616535425186157,
      "learning_rate": 0.00037506353966674263,
      "loss": 3.0564,
      "step": 96654
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2388226985931396,
      "learning_rate": 0.0003750595792137713,
      "loss": 2.7249,
      "step": 96655
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.572853446006775,
      "learning_rate": 0.00037505561874684516,
      "loss": 2.9054,
      "step": 96656
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9758692979812622,
      "learning_rate": 0.00037505165826596463,
      "loss": 2.8803,
      "step": 96657
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0354061126708984,
      "learning_rate": 0.00037504769777113063,
      "loss": 3.0432,
      "step": 96658
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.103442907333374,
      "learning_rate": 0.0003750437372623438,
      "loss": 3.0276,
      "step": 96659
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.96383535861969,
      "learning_rate": 0.000375039776739605,
      "loss": 2.9156,
      "step": 96660
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7260167598724365,
      "learning_rate": 0.0003750358162029148,
      "loss": 2.9636,
      "step": 96661
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2346243858337402,
      "learning_rate": 0.00037503185565227414,
      "loss": 2.9995,
      "step": 96662
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0807552337646484,
      "learning_rate": 0.0003750278950876836,
      "loss": 3.0327,
      "step": 96663
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.877428650856018,
      "learning_rate": 0.00037502393450914383,
      "loss": 2.9708,
      "step": 96664
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9045913219451904,
      "learning_rate": 0.00037501997391665585,
      "loss": 2.7543,
      "step": 96665
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.904334306716919,
      "learning_rate": 0.0003750160133102202,
      "loss": 3.0193,
      "step": 96666
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.364394426345825,
      "learning_rate": 0.0003750120526898376,
      "loss": 3.1592,
      "step": 96667
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6476877927780151,
      "learning_rate": 0.00037500809205550897,
      "loss": 3.108,
      "step": 96668
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8560502529144287,
      "learning_rate": 0.00037500413140723483,
      "loss": 3.1594,
      "step": 96669
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.547131061553955,
      "learning_rate": 0.00037500017074501595,
      "loss": 3.0994,
      "step": 96670
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9261420965194702,
      "learning_rate": 0.00037499621006885326,
      "loss": 3.1129,
      "step": 96671
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0622379779815674,
      "learning_rate": 0.0003749922493787473,
      "loss": 3.1942,
      "step": 96672
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1825833320617676,
      "learning_rate": 0.0003749882886746988,
      "loss": 2.9445,
      "step": 96673
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8333865404129028,
      "learning_rate": 0.00037498432795670864,
      "loss": 2.8259,
      "step": 96674
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.4864439964294434,
      "learning_rate": 0.0003749803672247775,
      "loss": 2.9191,
      "step": 96675
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.941642165184021,
      "learning_rate": 0.00037497640647890594,
      "loss": 3.1463,
      "step": 96676
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.612211227416992,
      "learning_rate": 0.00037497244571909504,
      "loss": 2.9197,
      "step": 96677
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7433058023452759,
      "learning_rate": 0.0003749684849453452,
      "loss": 2.7762,
      "step": 96678
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8549468517303467,
      "learning_rate": 0.0003749645241576574,
      "loss": 2.9849,
      "step": 96679
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.010645866394043,
      "learning_rate": 0.00037496056335603226,
      "loss": 3.2303,
      "step": 96680
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0966854095458984,
      "learning_rate": 0.00037495660254047055,
      "loss": 3.081,
      "step": 96681
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0891261100769043,
      "learning_rate": 0.00037495264171097294,
      "loss": 2.8217,
      "step": 96682
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9267358779907227,
      "learning_rate": 0.00037494868086754025,
      "loss": 3.0328,
      "step": 96683
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.020148277282715,
      "learning_rate": 0.0003749447200101732,
      "loss": 2.9314,
      "step": 96684
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.7964818477630615,
      "learning_rate": 0.00037494075913887246,
      "loss": 2.9828,
      "step": 96685
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7508773803710938,
      "learning_rate": 0.0003749367982536389,
      "loss": 2.9292,
      "step": 96686
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.690073847770691,
      "learning_rate": 0.00037493283735447316,
      "loss": 2.945,
      "step": 96687
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.5320963859558105,
      "learning_rate": 0.0003749288764413759,
      "loss": 3.0628,
      "step": 96688
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8838269710540771,
      "learning_rate": 0.000374924915514348,
      "loss": 2.9405,
      "step": 96689
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8907880783081055,
      "learning_rate": 0.0003749209545733901,
      "loss": 3.0126,
      "step": 96690
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.399332284927368,
      "learning_rate": 0.0003749169936185031,
      "loss": 3.2348,
      "step": 96691
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9419649839401245,
      "learning_rate": 0.00037491303264968747,
      "loss": 3.2675,
      "step": 96692
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8996363878250122,
      "learning_rate": 0.0003749090716669442,
      "loss": 3.0307,
      "step": 96693
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.177557945251465,
      "learning_rate": 0.00037490511067027386,
      "loss": 3.3074,
      "step": 96694
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9367461204528809,
      "learning_rate": 0.0003749011496596773,
      "loss": 2.8371,
      "step": 96695
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6481478214263916,
      "learning_rate": 0.0003748971886351552,
      "loss": 3.2071,
      "step": 96696
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9665849208831787,
      "learning_rate": 0.0003748932275967082,
      "loss": 2.9936,
      "step": 96697
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6302675008773804,
      "learning_rate": 0.0003748892665443373,
      "loss": 2.8109,
      "step": 96698
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9800150394439697,
      "learning_rate": 0.00037488530547804297,
      "loss": 3.0268,
      "step": 96699
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.58681058883667,
      "learning_rate": 0.00037488134439782604,
      "loss": 2.7865,
      "step": 96700
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9971734285354614,
      "learning_rate": 0.00037487738330368726,
      "loss": 3.0328,
      "step": 96701
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1553866863250732,
      "learning_rate": 0.0003748734221956274,
      "loss": 2.9857,
      "step": 96702
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7358653545379639,
      "learning_rate": 0.0003748694610736471,
      "loss": 2.968,
      "step": 96703
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.665534496307373,
      "learning_rate": 0.00037486549993774723,
      "loss": 2.8781,
      "step": 96704
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.7529642581939697,
      "learning_rate": 0.0003748615387879284,
      "loss": 3.2484,
      "step": 96705
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.287508249282837,
      "learning_rate": 0.00037485757762419135,
      "loss": 3.1062,
      "step": 96706
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6205508708953857,
      "learning_rate": 0.000374853616446537,
      "loss": 3.1596,
      "step": 96707
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7825217247009277,
      "learning_rate": 0.00037484965525496586,
      "loss": 3.0868,
      "step": 96708
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.9173781871795654,
      "learning_rate": 0.0003748456940494787,
      "loss": 2.8876,
      "step": 96709
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.1263628005981445,
      "learning_rate": 0.00037484173283007646,
      "loss": 3.1858,
      "step": 96710
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7675055265426636,
      "learning_rate": 0.0003748377715967597,
      "loss": 3.1553,
      "step": 96711
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6772408485412598,
      "learning_rate": 0.000374833810349529,
      "loss": 3.0681,
      "step": 96712
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.5735981464385986,
      "learning_rate": 0.0003748298490883855,
      "loss": 2.8063,
      "step": 96713
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.10735821723938,
      "learning_rate": 0.0003748258878133296,
      "loss": 2.7087,
      "step": 96714
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8571666479110718,
      "learning_rate": 0.0003748219265243622,
      "loss": 2.9358,
      "step": 96715
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9311046600341797,
      "learning_rate": 0.00037481796522148396,
      "loss": 3.1298,
      "step": 96716
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7559105157852173,
      "learning_rate": 0.0003748140039046957,
      "loss": 2.8634,
      "step": 96717
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6950960159301758,
      "learning_rate": 0.00037481004257399804,
      "loss": 2.8102,
      "step": 96718
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7229351997375488,
      "learning_rate": 0.0003748060812293918,
      "loss": 3.1965,
      "step": 96719
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.705364465713501,
      "learning_rate": 0.00037480211987087774,
      "loss": 2.8631,
      "step": 96720
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7199132442474365,
      "learning_rate": 0.00037479815849845646,
      "loss": 2.8109,
      "step": 96721
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9398783445358276,
      "learning_rate": 0.0003747941971121289,
      "loss": 3.1101,
      "step": 96722
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.134516954421997,
      "learning_rate": 0.00037479023571189565,
      "loss": 2.7005,
      "step": 96723
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8029295206069946,
      "learning_rate": 0.00037478627429775747,
      "loss": 2.6871,
      "step": 96724
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6658843755722046,
      "learning_rate": 0.0003747823128697151,
      "loss": 3.142,
      "step": 96725
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.4823119640350342,
      "learning_rate": 0.00037477835142776935,
      "loss": 3.2073,
      "step": 96726
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5927493572235107,
      "learning_rate": 0.00037477438997192083,
      "loss": 3.1077,
      "step": 96727
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9490879774093628,
      "learning_rate": 0.00037477042850217034,
      "loss": 3.0164,
      "step": 96728
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.655120611190796,
      "learning_rate": 0.00037476646701851865,
      "loss": 2.9961,
      "step": 96729
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6627466678619385,
      "learning_rate": 0.00037476250552096645,
      "loss": 2.7057,
      "step": 96730
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.3620479106903076,
      "learning_rate": 0.0003747585440095144,
      "loss": 2.929,
      "step": 96731
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5154659748077393,
      "learning_rate": 0.0003747545824841635,
      "loss": 3.0455,
      "step": 96732
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.845806360244751,
      "learning_rate": 0.0003747506209449142,
      "loss": 3.041,
      "step": 96733
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.740666389465332,
      "learning_rate": 0.00037474665939176734,
      "loss": 2.8238,
      "step": 96734
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.598274827003479,
      "learning_rate": 0.00037474269782472374,
      "loss": 3.1732,
      "step": 96735
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5202126502990723,
      "learning_rate": 0.000374738736243784,
      "loss": 3.0401,
      "step": 96736
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5365983247756958,
      "learning_rate": 0.0003747347746489489,
      "loss": 2.7033,
      "step": 96737
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.62696373462677,
      "learning_rate": 0.0003747308130402193,
      "loss": 2.9269,
      "step": 96738
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.290329694747925,
      "learning_rate": 0.0003747268514175957,
      "loss": 2.9689,
      "step": 96739
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8728944063186646,
      "learning_rate": 0.000374722889781079,
      "loss": 2.8352,
      "step": 96740
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2230753898620605,
      "learning_rate": 0.00037471892813067,
      "loss": 2.8884,
      "step": 96741
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.356651782989502,
      "learning_rate": 0.00037471496646636923,
      "loss": 2.9212,
      "step": 96742
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.978585958480835,
      "learning_rate": 0.0003747110047881775,
      "loss": 3.1467,
      "step": 96743
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.909637928009033,
      "learning_rate": 0.00037470704309609576,
      "loss": 2.761,
      "step": 96744
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2603814601898193,
      "learning_rate": 0.00037470308139012445,
      "loss": 2.9976,
      "step": 96745
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8705202341079712,
      "learning_rate": 0.0003746991196702644,
      "loss": 3.2441,
      "step": 96746
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6881669759750366,
      "learning_rate": 0.0003746951579365165,
      "loss": 3.0415,
      "step": 96747
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9413533210754395,
      "learning_rate": 0.0003746911961888813,
      "loss": 2.8433,
      "step": 96748
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7258028984069824,
      "learning_rate": 0.0003746872344273595,
      "loss": 2.9175,
      "step": 96749
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9938548803329468,
      "learning_rate": 0.0003746832726519521,
      "loss": 2.9421,
      "step": 96750
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1743576526641846,
      "learning_rate": 0.00037467931086265956,
      "loss": 3.1254,
      "step": 96751
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0664570331573486,
      "learning_rate": 0.0003746753490594827,
      "loss": 3.0133,
      "step": 96752
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9505811929702759,
      "learning_rate": 0.0003746713872424224,
      "loss": 2.7812,
      "step": 96753
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6702206134796143,
      "learning_rate": 0.0003746674254114792,
      "loss": 3.1331,
      "step": 96754
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8360406160354614,
      "learning_rate": 0.0003746634635666539,
      "loss": 2.9874,
      "step": 96755
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.13992977142334,
      "learning_rate": 0.00037465950170794733,
      "loss": 3.1327,
      "step": 96756
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8221584558486938,
      "learning_rate": 0.00037465553983536006,
      "loss": 2.9783,
      "step": 96757
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1017074584960938,
      "learning_rate": 0.00037465157794889294,
      "loss": 3.0415,
      "step": 96758
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7524079084396362,
      "learning_rate": 0.00037464761604854674,
      "loss": 2.8941,
      "step": 96759
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9233922958374023,
      "learning_rate": 0.0003746436541343221,
      "loss": 3.101,
      "step": 96760
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1510303020477295,
      "learning_rate": 0.00037463969220621977,
      "loss": 3.1797,
      "step": 96761
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.5586795806884766,
      "learning_rate": 0.0003746357302642406,
      "loss": 2.9286,
      "step": 96762
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.039330005645752,
      "learning_rate": 0.0003746317683083851,
      "loss": 3.0614,
      "step": 96763
    },
    {
      "epoch": 1.26,
      "grad_norm": 4.068951606750488,
      "learning_rate": 0.00037462780633865426,
      "loss": 3.3698,
      "step": 96764
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.273533344268799,
      "learning_rate": 0.00037462384435504864,
      "loss": 2.8817,
      "step": 96765
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1764676570892334,
      "learning_rate": 0.0003746198823575691,
      "loss": 2.8476,
      "step": 96766
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.670074224472046,
      "learning_rate": 0.0003746159203462162,
      "loss": 2.8733,
      "step": 96767
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.6151888370513916,
      "learning_rate": 0.00037461195832099093,
      "loss": 3.0493,
      "step": 96768
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.097038507461548,
      "learning_rate": 0.0003746079962818938,
      "loss": 2.9661,
      "step": 96769
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5028042793273926,
      "learning_rate": 0.0003746040342289257,
      "loss": 2.9747,
      "step": 96770
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.225308418273926,
      "learning_rate": 0.0003746000721620872,
      "loss": 3.1518,
      "step": 96771
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.221425771713257,
      "learning_rate": 0.0003745961100813792,
      "loss": 2.7904,
      "step": 96772
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8201234340667725,
      "learning_rate": 0.00037459214798680237,
      "loss": 3.1941,
      "step": 96773
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8949121236801147,
      "learning_rate": 0.00037458818587835744,
      "loss": 2.983,
      "step": 96774
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.321906328201294,
      "learning_rate": 0.0003745842237560452,
      "loss": 3.2166,
      "step": 96775
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.6528985500335693,
      "learning_rate": 0.0003745802616198663,
      "loss": 2.7753,
      "step": 96776
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.898683786392212,
      "learning_rate": 0.0003745762994698216,
      "loss": 3.0374,
      "step": 96777
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6219894886016846,
      "learning_rate": 0.00037457233730591165,
      "loss": 3.1111,
      "step": 96778
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0481348037719727,
      "learning_rate": 0.0003745683751281374,
      "loss": 2.9684,
      "step": 96779
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.3533942699432373,
      "learning_rate": 0.00037456441293649937,
      "loss": 2.7461,
      "step": 96780
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7336235046386719,
      "learning_rate": 0.0003745604507309985,
      "loss": 3.003,
      "step": 96781
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.768153190612793,
      "learning_rate": 0.00037455648851163534,
      "loss": 2.8399,
      "step": 96782
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.893811583518982,
      "learning_rate": 0.0003745525262784108,
      "loss": 2.8248,
      "step": 96783
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.9997315406799316,
      "learning_rate": 0.0003745485640313255,
      "loss": 2.8883,
      "step": 96784
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0920283794403076,
      "learning_rate": 0.00037454460177038025,
      "loss": 3.0868,
      "step": 96785
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7430708408355713,
      "learning_rate": 0.0003745406394955757,
      "loss": 3.223,
      "step": 96786
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.8729758262634277,
      "learning_rate": 0.00037453667720691275,
      "loss": 3.0428,
      "step": 96787
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.632302165031433,
      "learning_rate": 0.00037453271490439194,
      "loss": 2.9989,
      "step": 96788
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7099366188049316,
      "learning_rate": 0.00037452875258801405,
      "loss": 2.871,
      "step": 96789
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8306399583816528,
      "learning_rate": 0.00037452479025777994,
      "loss": 2.9479,
      "step": 96790
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8471922874450684,
      "learning_rate": 0.0003745208279136902,
      "loss": 2.9878,
      "step": 96791
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6240023374557495,
      "learning_rate": 0.00037451686555574565,
      "loss": 2.97,
      "step": 96792
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8372811079025269,
      "learning_rate": 0.0003745129031839471,
      "loss": 3.0137,
      "step": 96793
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6183662414550781,
      "learning_rate": 0.0003745089407982951,
      "loss": 2.8978,
      "step": 96794
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9072608947753906,
      "learning_rate": 0.00037450497839879043,
      "loss": 2.986,
      "step": 96795
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7781049013137817,
      "learning_rate": 0.000374501015985434,
      "loss": 3.0602,
      "step": 96796
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5740526914596558,
      "learning_rate": 0.0003744970535582264,
      "loss": 3.0437,
      "step": 96797
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9300501346588135,
      "learning_rate": 0.00037449309111716833,
      "loss": 3.0755,
      "step": 96798
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6738513708114624,
      "learning_rate": 0.0003744891286622607,
      "loss": 3.1047,
      "step": 96799
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.04272198677063,
      "learning_rate": 0.00037448516619350403,
      "loss": 2.9689,
      "step": 96800
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5992741584777832,
      "learning_rate": 0.00037448120371089914,
      "loss": 2.8885,
      "step": 96801
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7854795455932617,
      "learning_rate": 0.00037447724121444687,
      "loss": 3.2112,
      "step": 96802
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.852160930633545,
      "learning_rate": 0.0003744732787041478,
      "loss": 3.1811,
      "step": 96803
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5560673475265503,
      "learning_rate": 0.0003744693161800028,
      "loss": 3.0261,
      "step": 96804
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6357043981552124,
      "learning_rate": 0.0003744653536420125,
      "loss": 2.9489,
      "step": 96805
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.637727975845337,
      "learning_rate": 0.00037446139109017774,
      "loss": 3.0074,
      "step": 96806
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6542142629623413,
      "learning_rate": 0.00037445742852449917,
      "loss": 3.1362,
      "step": 96807
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9102997779846191,
      "learning_rate": 0.00037445346594497766,
      "loss": 2.9284,
      "step": 96808
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6133882999420166,
      "learning_rate": 0.00037444950335161365,
      "loss": 2.8028,
      "step": 96809
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8686175346374512,
      "learning_rate": 0.0003744455407444082,
      "loss": 3.1283,
      "step": 96810
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.695085048675537,
      "learning_rate": 0.00037444157812336194,
      "loss": 2.9186,
      "step": 96811
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6826422214508057,
      "learning_rate": 0.00037443761548847554,
      "loss": 3.3377,
      "step": 96812
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1754910945892334,
      "learning_rate": 0.00037443365283974973,
      "loss": 2.8922,
      "step": 96813
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8448803424835205,
      "learning_rate": 0.0003744296901771854,
      "loss": 3.1462,
      "step": 96814
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5694012641906738,
      "learning_rate": 0.00037442572750078315,
      "loss": 3.2453,
      "step": 96815
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8077435493469238,
      "learning_rate": 0.00037442176481054367,
      "loss": 3.0449,
      "step": 96816
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.418112277984619,
      "learning_rate": 0.00037441780210646793,
      "loss": 3.0722,
      "step": 96817
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7815752029418945,
      "learning_rate": 0.00037441383938855636,
      "loss": 3.2256,
      "step": 96818
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.411360263824463,
      "learning_rate": 0.0003744098766568099,
      "loss": 3.1196,
      "step": 96819
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8149383068084717,
      "learning_rate": 0.00037440591391122937,
      "loss": 2.8706,
      "step": 96820
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.5711581707000732,
      "learning_rate": 0.0003744019511518152,
      "loss": 2.9302,
      "step": 96821
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2706527709960938,
      "learning_rate": 0.00037439798837856833,
      "loss": 2.9077,
      "step": 96822
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.7491021156311035,
      "learning_rate": 0.0003743940255914896,
      "loss": 3.2852,
      "step": 96823
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.984402894973755,
      "learning_rate": 0.0003743900627905795,
      "loss": 3.0446,
      "step": 96824
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.5620369911193848,
      "learning_rate": 0.0003743860999758389,
      "loss": 3.0866,
      "step": 96825
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2145142555236816,
      "learning_rate": 0.00037438213714726857,
      "loss": 2.8355,
      "step": 96826
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7385562658309937,
      "learning_rate": 0.0003743781743048692,
      "loss": 3.1606,
      "step": 96827
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8754013776779175,
      "learning_rate": 0.0003743742114486414,
      "loss": 3.2007,
      "step": 96828
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1764707565307617,
      "learning_rate": 0.00037437024857858616,
      "loss": 2.9452,
      "step": 96829
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1850531101226807,
      "learning_rate": 0.00037436628569470406,
      "loss": 2.8189,
      "step": 96830
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.4643495082855225,
      "learning_rate": 0.0003743623227969958,
      "loss": 3.0884,
      "step": 96831
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0477635860443115,
      "learning_rate": 0.00037435835988546223,
      "loss": 2.9873,
      "step": 96832
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6968046426773071,
      "learning_rate": 0.00037435439696010403,
      "loss": 3.121,
      "step": 96833
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.4045803546905518,
      "learning_rate": 0.0003743504340209219,
      "loss": 2.8559,
      "step": 96834
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7554179430007935,
      "learning_rate": 0.0003743464710679167,
      "loss": 3.0072,
      "step": 96835
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7552223205566406,
      "learning_rate": 0.00037434250810108904,
      "loss": 3.1779,
      "step": 96836
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.103034734725952,
      "learning_rate": 0.0003743385451204398,
      "loss": 2.7851,
      "step": 96837
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5774325132369995,
      "learning_rate": 0.0003743345821259695,
      "loss": 2.9464,
      "step": 96838
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9436671733856201,
      "learning_rate": 0.00037433061911767903,
      "loss": 3.2126,
      "step": 96839
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.7990992069244385,
      "learning_rate": 0.00037432665609556906,
      "loss": 3.0463,
      "step": 96840
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6503134965896606,
      "learning_rate": 0.00037432269305964045,
      "loss": 3.3308,
      "step": 96841
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7253012657165527,
      "learning_rate": 0.00037431873000989374,
      "loss": 2.8578,
      "step": 96842
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5741608142852783,
      "learning_rate": 0.00037431476694632984,
      "loss": 3.158,
      "step": 96843
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9822341203689575,
      "learning_rate": 0.00037431080386894953,
      "loss": 2.9005,
      "step": 96844
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.716996669769287,
      "learning_rate": 0.0003743068407777532,
      "loss": 2.9487,
      "step": 96845
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0550179481506348,
      "learning_rate": 0.000374302877672742,
      "loss": 3.097,
      "step": 96846
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9056847095489502,
      "learning_rate": 0.0003742989145539165,
      "loss": 2.5891,
      "step": 96847
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9026267528533936,
      "learning_rate": 0.0003742949514212774,
      "loss": 3.0069,
      "step": 96848
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0003373622894287,
      "learning_rate": 0.00037429098827482536,
      "loss": 2.8946,
      "step": 96849
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8370327949523926,
      "learning_rate": 0.0003742870251145613,
      "loss": 2.8362,
      "step": 96850
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6852631568908691,
      "learning_rate": 0.00037428306194048594,
      "loss": 3.021,
      "step": 96851
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7069979906082153,
      "learning_rate": 0.0003742790987525999,
      "loss": 3.0831,
      "step": 96852
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.3065831661224365,
      "learning_rate": 0.0003742751355509039,
      "loss": 3.2702,
      "step": 96853
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8007878065109253,
      "learning_rate": 0.00037427117233539885,
      "loss": 2.9566,
      "step": 96854
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8921042680740356,
      "learning_rate": 0.0003742672091060853,
      "loss": 3.1413,
      "step": 96855
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7007603645324707,
      "learning_rate": 0.0003742632458629642,
      "loss": 3.0658,
      "step": 96856
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9616931676864624,
      "learning_rate": 0.00037425928260603617,
      "loss": 2.8552,
      "step": 96857
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2119290828704834,
      "learning_rate": 0.00037425531933530184,
      "loss": 3.0371,
      "step": 96858
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1007776260375977,
      "learning_rate": 0.000374251356050762,
      "loss": 2.9582,
      "step": 96859
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7578537464141846,
      "learning_rate": 0.0003742473927524175,
      "loss": 2.9934,
      "step": 96860
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6164721250534058,
      "learning_rate": 0.000374243429440269,
      "loss": 2.9532,
      "step": 96861
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6876235008239746,
      "learning_rate": 0.00037423946611431723,
      "loss": 3.4128,
      "step": 96862
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1935818195343018,
      "learning_rate": 0.000374235502774563,
      "loss": 3.0343,
      "step": 96863
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.8464863300323486,
      "learning_rate": 0.000374231539421007,
      "loss": 2.8089,
      "step": 96864
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9702110290527344,
      "learning_rate": 0.00037422757605364986,
      "loss": 2.9879,
      "step": 96865
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.736194372177124,
      "learning_rate": 0.00037422361267249246,
      "loss": 2.6965,
      "step": 96866
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8000634908676147,
      "learning_rate": 0.00037421964927753545,
      "loss": 2.9519,
      "step": 96867
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.068363666534424,
      "learning_rate": 0.00037421568586877975,
      "loss": 2.8081,
      "step": 96868
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8247116804122925,
      "learning_rate": 0.0003742117224462258,
      "loss": 3.0572,
      "step": 96869
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.656336545944214,
      "learning_rate": 0.0003742077590098745,
      "loss": 3.1794,
      "step": 96870
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.440770149230957,
      "learning_rate": 0.00037420379555972664,
      "loss": 2.8808,
      "step": 96871
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8417091369628906,
      "learning_rate": 0.0003741998320957829,
      "loss": 2.8003,
      "step": 96872
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.076209306716919,
      "learning_rate": 0.00037419586861804394,
      "loss": 2.8268,
      "step": 96873
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.652383327484131,
      "learning_rate": 0.0003741919051265106,
      "loss": 2.879,
      "step": 96874
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.046623468399048,
      "learning_rate": 0.0003741879416211836,
      "loss": 2.8711,
      "step": 96875
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.658158302307129,
      "learning_rate": 0.0003741839781020637,
      "loss": 2.9154,
      "step": 96876
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.159146308898926,
      "learning_rate": 0.0003741800145691515,
      "loss": 3.2234,
      "step": 96877
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0167644023895264,
      "learning_rate": 0.0003741760510224479,
      "loss": 2.7841,
      "step": 96878
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.881649136543274,
      "learning_rate": 0.00037417208746195357,
      "loss": 2.8405,
      "step": 96879
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0716195106506348,
      "learning_rate": 0.0003741681238876692,
      "loss": 2.9823,
      "step": 96880
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0370185375213623,
      "learning_rate": 0.00037416416029959566,
      "loss": 3.0324,
      "step": 96881
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.956819772720337,
      "learning_rate": 0.0003741601966977336,
      "loss": 3.068,
      "step": 96882
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.046191930770874,
      "learning_rate": 0.0003741562330820837,
      "loss": 3.0586,
      "step": 96883
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.3907294273376465,
      "learning_rate": 0.0003741522694526467,
      "loss": 2.8804,
      "step": 96884
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.7901363372802734,
      "learning_rate": 0.0003741483058094235,
      "loss": 2.9489,
      "step": 96885
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8134386539459229,
      "learning_rate": 0.00037414434215241466,
      "loss": 3.2515,
      "step": 96886
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.4500842094421387,
      "learning_rate": 0.0003741403784816211,
      "loss": 2.755,
      "step": 96887
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1681997776031494,
      "learning_rate": 0.00037413641479704335,
      "loss": 3.0443,
      "step": 96888
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.5899786949157715,
      "learning_rate": 0.00037413245109868223,
      "loss": 2.7618,
      "step": 96889
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.4013400077819824,
      "learning_rate": 0.00037412848738653856,
      "loss": 2.8585,
      "step": 96890
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.861302375793457,
      "learning_rate": 0.0003741245236606129,
      "loss": 2.8063,
      "step": 96891
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.5698511600494385,
      "learning_rate": 0.00037412055992090617,
      "loss": 2.8941,
      "step": 96892
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.034106731414795,
      "learning_rate": 0.00037411659616741905,
      "loss": 3.3376,
      "step": 96893
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.3329861164093018,
      "learning_rate": 0.00037411263240015217,
      "loss": 2.9079,
      "step": 96894
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.15920352935791,
      "learning_rate": 0.0003741086686191064,
      "loss": 2.8347,
      "step": 96895
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.808832049369812,
      "learning_rate": 0.0003741047048242825,
      "loss": 2.9967,
      "step": 96896
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8838765621185303,
      "learning_rate": 0.00037410074101568104,
      "loss": 3.1322,
      "step": 96897
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6802903413772583,
      "learning_rate": 0.0003740967771933028,
      "loss": 3.0082,
      "step": 96898
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.316645622253418,
      "learning_rate": 0.0003740928133571487,
      "loss": 3.2375,
      "step": 96899
    },
    {
      "epoch": 1.26,
      "grad_norm": 4.013403415679932,
      "learning_rate": 0.0003740888495072193,
      "loss": 3.1684,
      "step": 96900
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0050532817840576,
      "learning_rate": 0.00037408488564351534,
      "loss": 3.0202,
      "step": 96901
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7799535989761353,
      "learning_rate": 0.0003740809217660377,
      "loss": 2.8719,
      "step": 96902
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.8697903156280518,
      "learning_rate": 0.000374076957874787,
      "loss": 3.1882,
      "step": 96903
    },
    {
      "epoch": 1.26,
      "grad_norm": 4.036396026611328,
      "learning_rate": 0.0003740729939697639,
      "loss": 3.1011,
      "step": 96904
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.0305063724517822,
      "learning_rate": 0.0003740690300509693,
      "loss": 2.7989,
      "step": 96905
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7931493520736694,
      "learning_rate": 0.00037406506611840386,
      "loss": 2.998,
      "step": 96906
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9785212278366089,
      "learning_rate": 0.00037406110217206835,
      "loss": 2.9424,
      "step": 96907
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.709867000579834,
      "learning_rate": 0.0003740571382119635,
      "loss": 3.0474,
      "step": 96908
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.626544952392578,
      "learning_rate": 0.0003740531742380899,
      "loss": 3.0439,
      "step": 96909
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7972482442855835,
      "learning_rate": 0.0003740492102504485,
      "loss": 3.0364,
      "step": 96910
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7234838008880615,
      "learning_rate": 0.00037404524624903997,
      "loss": 3.2198,
      "step": 96911
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.7643721103668213,
      "learning_rate": 0.000374041282233865,
      "loss": 2.9787,
      "step": 96912
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1956937313079834,
      "learning_rate": 0.0003740373182049244,
      "loss": 3.0361,
      "step": 96913
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1475071907043457,
      "learning_rate": 0.00037403335416221894,
      "loss": 2.9758,
      "step": 96914
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7607839107513428,
      "learning_rate": 0.0003740293901057491,
      "loss": 2.9593,
      "step": 96915
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.3729188442230225,
      "learning_rate": 0.0003740254260355159,
      "loss": 3.2252,
      "step": 96916
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.656886339187622,
      "learning_rate": 0.00037402146195152,
      "loss": 3.0527,
      "step": 96917
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.825669050216675,
      "learning_rate": 0.000374017497853762,
      "loss": 2.8417,
      "step": 96918
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.3187344074249268,
      "learning_rate": 0.0003740135337422428,
      "loss": 3.2647,
      "step": 96919
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.769866943359375,
      "learning_rate": 0.00037400956961696315,
      "loss": 3.0639,
      "step": 96920
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.977430820465088,
      "learning_rate": 0.0003740056054779237,
      "loss": 3.1712,
      "step": 96921
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8840975761413574,
      "learning_rate": 0.00037400164132512517,
      "loss": 3.2274,
      "step": 96922
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9644043445587158,
      "learning_rate": 0.00037399767715856836,
      "loss": 2.9648,
      "step": 96923
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9057495594024658,
      "learning_rate": 0.000373993712978254,
      "loss": 2.9322,
      "step": 96924
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.448164224624634,
      "learning_rate": 0.0003739897487841828,
      "loss": 2.7978,
      "step": 96925
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.3430373668670654,
      "learning_rate": 0.00037398578457635545,
      "loss": 2.9293,
      "step": 96926
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.368447780609131,
      "learning_rate": 0.0003739818203547729,
      "loss": 2.8577,
      "step": 96927
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.360701322555542,
      "learning_rate": 0.0003739778561194356,
      "loss": 2.9616,
      "step": 96928
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1956071853637695,
      "learning_rate": 0.0003739738918703444,
      "loss": 2.7825,
      "step": 96929
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.81028151512146,
      "learning_rate": 0.00037396992760750024,
      "loss": 3.1409,
      "step": 96930
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9321225881576538,
      "learning_rate": 0.0003739659633309035,
      "loss": 2.9775,
      "step": 96931
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.067936658859253,
      "learning_rate": 0.0003739619990405551,
      "loss": 2.9606,
      "step": 96932
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9066756963729858,
      "learning_rate": 0.00037395803473645587,
      "loss": 3.0104,
      "step": 96933
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.283541202545166,
      "learning_rate": 0.00037395407041860635,
      "loss": 3.0949,
      "step": 96934
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.697472095489502,
      "learning_rate": 0.0003739501060870074,
      "loss": 2.9293,
      "step": 96935
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8652222156524658,
      "learning_rate": 0.00037394614174165975,
      "loss": 2.925,
      "step": 96936
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.91167950630188,
      "learning_rate": 0.0003739421773825641,
      "loss": 2.8636,
      "step": 96937
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1255204677581787,
      "learning_rate": 0.00037393821300972117,
      "loss": 2.7566,
      "step": 96938
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.3549938201904297,
      "learning_rate": 0.0003739342486231319,
      "loss": 2.6794,
      "step": 96939
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.41782546043396,
      "learning_rate": 0.00037393028422279663,
      "loss": 3.3319,
      "step": 96940
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9063314199447632,
      "learning_rate": 0.00037392631980871636,
      "loss": 3.0123,
      "step": 96941
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.7500088214874268,
      "learning_rate": 0.0003739223553808919,
      "loss": 2.7316,
      "step": 96942
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.58040189743042,
      "learning_rate": 0.0003739183909393239,
      "loss": 2.8367,
      "step": 96943
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6371859312057495,
      "learning_rate": 0.00037391442648401297,
      "loss": 2.714,
      "step": 96944
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.426147937774658,
      "learning_rate": 0.00037391046201495996,
      "loss": 2.8368,
      "step": 96945
    },
    {
      "epoch": 1.26,
      "grad_norm": 5.344878673553467,
      "learning_rate": 0.0003739064975321656,
      "loss": 2.8055,
      "step": 96946
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.496929407119751,
      "learning_rate": 0.0003739025330356307,
      "loss": 3.0745,
      "step": 96947
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.273320436477661,
      "learning_rate": 0.0003738985685253558,
      "loss": 2.8296,
      "step": 96948
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.351794958114624,
      "learning_rate": 0.00037389460400134194,
      "loss": 2.8765,
      "step": 96949
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9223994016647339,
      "learning_rate": 0.0003738906394635895,
      "loss": 2.8386,
      "step": 96950
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.7470664978027344,
      "learning_rate": 0.0003738866749120995,
      "loss": 2.7321,
      "step": 96951
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.883307933807373,
      "learning_rate": 0.00037388271034687255,
      "loss": 2.8451,
      "step": 96952
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.910121440887451,
      "learning_rate": 0.00037387874576790936,
      "loss": 3.0408,
      "step": 96953
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.3802878856658936,
      "learning_rate": 0.0003738747811752108,
      "loss": 2.8248,
      "step": 96954
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.722327947616577,
      "learning_rate": 0.00037387081656877744,
      "loss": 2.9914,
      "step": 96955
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.3800814151763916,
      "learning_rate": 0.00037386685194861017,
      "loss": 2.9055,
      "step": 96956
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.264069080352783,
      "learning_rate": 0.0003738628873147096,
      "loss": 3.0408,
      "step": 96957
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.217711925506592,
      "learning_rate": 0.00037385892266707656,
      "loss": 3.2824,
      "step": 96958
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.3493363857269287,
      "learning_rate": 0.00037385495800571183,
      "loss": 3.1239,
      "step": 96959
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6516294479370117,
      "learning_rate": 0.0003738509933306159,
      "loss": 3.1391,
      "step": 96960
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6232616901397705,
      "learning_rate": 0.0003738470286417898,
      "loss": 2.8082,
      "step": 96961
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.7906951904296875,
      "learning_rate": 0.00037384306393923407,
      "loss": 3.0693,
      "step": 96962
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.5663833618164062,
      "learning_rate": 0.0003738390992229496,
      "loss": 2.8841,
      "step": 96963
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.222564935684204,
      "learning_rate": 0.00037383513449293696,
      "loss": 3.0088,
      "step": 96964
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7972584962844849,
      "learning_rate": 0.00037383116974919705,
      "loss": 3.02,
      "step": 96965
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.4991722106933594,
      "learning_rate": 0.00037382720499173054,
      "loss": 3.1564,
      "step": 96966
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.8042104244232178,
      "learning_rate": 0.00037382324022053814,
      "loss": 2.7952,
      "step": 96967
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.079925298690796,
      "learning_rate": 0.00037381927543562055,
      "loss": 3.2226,
      "step": 96968
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.129774808883667,
      "learning_rate": 0.0003738153106369786,
      "loss": 3.0928,
      "step": 96969
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.3146779537200928,
      "learning_rate": 0.00037381134582461297,
      "loss": 3.1618,
      "step": 96970
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6725871562957764,
      "learning_rate": 0.0003738073809985245,
      "loss": 2.894,
      "step": 96971
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7969168424606323,
      "learning_rate": 0.0003738034161587138,
      "loss": 2.9993,
      "step": 96972
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.696179747581482,
      "learning_rate": 0.0003737994513051816,
      "loss": 2.9266,
      "step": 96973
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6760491132736206,
      "learning_rate": 0.0003737954864379287,
      "loss": 2.988,
      "step": 96974
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7490850687026978,
      "learning_rate": 0.00037379152155695593,
      "loss": 2.7177,
      "step": 96975
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6044694185256958,
      "learning_rate": 0.00037378755666226375,
      "loss": 3.1223,
      "step": 96976
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9281690120697021,
      "learning_rate": 0.00037378359175385315,
      "loss": 2.9384,
      "step": 96977
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9155575037002563,
      "learning_rate": 0.0003737796268317249,
      "loss": 3.1425,
      "step": 96978
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.555424690246582,
      "learning_rate": 0.0003737756618958795,
      "loss": 3.1107,
      "step": 96979
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0314271450042725,
      "learning_rate": 0.00037377169694631783,
      "loss": 2.97,
      "step": 96980
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.04498291015625,
      "learning_rate": 0.00037376773198304064,
      "loss": 3.1188,
      "step": 96981
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0869014263153076,
      "learning_rate": 0.0003737637670060486,
      "loss": 2.9257,
      "step": 96982
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.017820119857788,
      "learning_rate": 0.0003737598020153425,
      "loss": 3.0811,
      "step": 96983
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.688085913658142,
      "learning_rate": 0.0003737558370109231,
      "loss": 3.0241,
      "step": 96984
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1472554206848145,
      "learning_rate": 0.00037375187199279104,
      "loss": 3.0217,
      "step": 96985
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.040323257446289,
      "learning_rate": 0.0003737479069609471,
      "loss": 2.8409,
      "step": 96986
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.102017402648926,
      "learning_rate": 0.000373743941915392,
      "loss": 2.8747,
      "step": 96987
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.4380271434783936,
      "learning_rate": 0.00037373997685612664,
      "loss": 2.7175,
      "step": 96988
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.050192356109619,
      "learning_rate": 0.00037373601178315155,
      "loss": 2.7002,
      "step": 96989
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.6678833961486816,
      "learning_rate": 0.00037373204669646754,
      "loss": 2.9182,
      "step": 96990
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.067164659500122,
      "learning_rate": 0.00037372808159607537,
      "loss": 2.8133,
      "step": 96991
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.395176649093628,
      "learning_rate": 0.00037372411648197573,
      "loss": 3.1162,
      "step": 96992
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.591171979904175,
      "learning_rate": 0.00037372015135416945,
      "loss": 2.7039,
      "step": 96993
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.709467887878418,
      "learning_rate": 0.0003737161862126572,
      "loss": 3.0336,
      "step": 96994
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9226906299591064,
      "learning_rate": 0.0003737122210574396,
      "loss": 2.9332,
      "step": 96995
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.820791006088257,
      "learning_rate": 0.0003737082558885176,
      "loss": 3.0671,
      "step": 96996
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.2512197494506836,
      "learning_rate": 0.00037370429070589185,
      "loss": 2.8868,
      "step": 96997
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8142210245132446,
      "learning_rate": 0.000373700325509563,
      "loss": 3.0384,
      "step": 96998
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.329195022583008,
      "learning_rate": 0.000373696360299532,
      "loss": 2.8675,
      "step": 96999
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1912965774536133,
      "learning_rate": 0.0003736923950757994,
      "loss": 2.9279,
      "step": 97000
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9013346433639526,
      "learning_rate": 0.0003736884298383659,
      "loss": 3.0329,
      "step": 97001
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0101258754730225,
      "learning_rate": 0.00037368446458723245,
      "loss": 2.8483,
      "step": 97002
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.8307223320007324,
      "learning_rate": 0.00037368049932239966,
      "loss": 2.9051,
      "step": 97003
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.308701992034912,
      "learning_rate": 0.0003736765340438682,
      "loss": 3.1549,
      "step": 97004
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9043563604354858,
      "learning_rate": 0.0003736725687516389,
      "loss": 2.8718,
      "step": 97005
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9115979671478271,
      "learning_rate": 0.0003736686034457125,
      "loss": 3.0779,
      "step": 97006
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.853900909423828,
      "learning_rate": 0.00037366463812608974,
      "loss": 3.0434,
      "step": 97007
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7995351552963257,
      "learning_rate": 0.00037366067279277125,
      "loss": 2.9901,
      "step": 97008
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.3575339317321777,
      "learning_rate": 0.000373656707445758,
      "loss": 2.8124,
      "step": 97009
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8190083503723145,
      "learning_rate": 0.0003736527420850504,
      "loss": 3.0298,
      "step": 97010
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8026114702224731,
      "learning_rate": 0.00037364877671064944,
      "loss": 3.077,
      "step": 97011
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.810631513595581,
      "learning_rate": 0.00037364481132255587,
      "loss": 2.7594,
      "step": 97012
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7743330001831055,
      "learning_rate": 0.00037364084592077023,
      "loss": 2.8313,
      "step": 97013
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.6944472789764404,
      "learning_rate": 0.00037363688050529335,
      "loss": 2.9143,
      "step": 97014
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5701282024383545,
      "learning_rate": 0.00037363291507612614,
      "loss": 2.7054,
      "step": 97015
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.859421968460083,
      "learning_rate": 0.000373628949633269,
      "loss": 2.8557,
      "step": 97016
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6918615102767944,
      "learning_rate": 0.00037362498417672295,
      "loss": 2.8952,
      "step": 97017
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.159067392349243,
      "learning_rate": 0.0003736210187064886,
      "loss": 2.7876,
      "step": 97018
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.484179973602295,
      "learning_rate": 0.00037361705322256673,
      "loss": 3.1581,
      "step": 97019
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9143224954605103,
      "learning_rate": 0.00037361308772495796,
      "loss": 3.3089,
      "step": 97020
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7194032669067383,
      "learning_rate": 0.0003736091222136633,
      "loss": 3.0089,
      "step": 97021
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6800014972686768,
      "learning_rate": 0.0003736051566886832,
      "loss": 3.1231,
      "step": 97022
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6821411848068237,
      "learning_rate": 0.0003736011911500185,
      "loss": 3.1801,
      "step": 97023
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.259192943572998,
      "learning_rate": 0.00037359722559767003,
      "loss": 3.0635,
      "step": 97024
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.3423092365264893,
      "learning_rate": 0.00037359326003163834,
      "loss": 2.9673,
      "step": 97025
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7328541278839111,
      "learning_rate": 0.00037358929445192434,
      "loss": 2.9529,
      "step": 97026
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5086710453033447,
      "learning_rate": 0.00037358532885852874,
      "loss": 2.8446,
      "step": 97027
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8351088762283325,
      "learning_rate": 0.0003735813632514522,
      "loss": 2.9191,
      "step": 97028
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7001326084136963,
      "learning_rate": 0.00037357739763069545,
      "loss": 2.9896,
      "step": 97029
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0435056686401367,
      "learning_rate": 0.0003735734319962594,
      "loss": 2.9458,
      "step": 97030
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7571831941604614,
      "learning_rate": 0.0003735694663481445,
      "loss": 2.9378,
      "step": 97031
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.380185127258301,
      "learning_rate": 0.0003735655006863517,
      "loss": 3.0877,
      "step": 97032
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.915864109992981,
      "learning_rate": 0.00037356153501088174,
      "loss": 3.0135,
      "step": 97033
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.212697982788086,
      "learning_rate": 0.00037355756932173526,
      "loss": 2.7995,
      "step": 97034
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6416852474212646,
      "learning_rate": 0.0003735536036189131,
      "loss": 3.1076,
      "step": 97035
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.898000717163086,
      "learning_rate": 0.0003735496379024158,
      "loss": 2.9217,
      "step": 97036
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8470216989517212,
      "learning_rate": 0.0003735456721722444,
      "loss": 3.111,
      "step": 97037
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5716450214385986,
      "learning_rate": 0.0003735417064283993,
      "loss": 2.6898,
      "step": 97038
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8527944087982178,
      "learning_rate": 0.00037353774067088146,
      "loss": 2.5917,
      "step": 97039
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9538229703903198,
      "learning_rate": 0.00037353377489969167,
      "loss": 2.9963,
      "step": 97040
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.348841428756714,
      "learning_rate": 0.0003735298091148305,
      "loss": 2.9623,
      "step": 97041
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7864460945129395,
      "learning_rate": 0.00037352584331629867,
      "loss": 3.0487,
      "step": 97042
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.880256175994873,
      "learning_rate": 0.00037352187750409714,
      "loss": 3.0495,
      "step": 97043
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8278082609176636,
      "learning_rate": 0.0003735179116782264,
      "loss": 3.0666,
      "step": 97044
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1984410285949707,
      "learning_rate": 0.00037351394583868733,
      "loss": 2.9738,
      "step": 97045
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8335041999816895,
      "learning_rate": 0.00037350997998548063,
      "loss": 3.1472,
      "step": 97046
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6881991624832153,
      "learning_rate": 0.000373506014118607,
      "loss": 2.765,
      "step": 97047
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.9465126991271973,
      "learning_rate": 0.0003735020482380673,
      "loss": 2.9401,
      "step": 97048
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6557388305664062,
      "learning_rate": 0.00037349808234386214,
      "loss": 2.9322,
      "step": 97049
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9758901596069336,
      "learning_rate": 0.0003734941164359923,
      "loss": 2.8527,
      "step": 97050
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.448915958404541,
      "learning_rate": 0.0003734901505144585,
      "loss": 2.993,
      "step": 97051
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0238420963287354,
      "learning_rate": 0.0003734861845792615,
      "loss": 3.2099,
      "step": 97052
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7070900201797485,
      "learning_rate": 0.000373482218630402,
      "loss": 3.1408,
      "step": 97053
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.619938850402832,
      "learning_rate": 0.0003734782526678808,
      "loss": 3.0349,
      "step": 97054
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8449028730392456,
      "learning_rate": 0.0003734742866916987,
      "loss": 2.9886,
      "step": 97055
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5442924499511719,
      "learning_rate": 0.0003734703207018562,
      "loss": 2.8934,
      "step": 97056
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2767181396484375,
      "learning_rate": 0.0003734663546983542,
      "loss": 3.0557,
      "step": 97057
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7539141178131104,
      "learning_rate": 0.00037346238868119354,
      "loss": 3.1018,
      "step": 97058
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9901270866394043,
      "learning_rate": 0.0003734584226503747,
      "loss": 3.1275,
      "step": 97059
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5205001831054688,
      "learning_rate": 0.0003734544566058985,
      "loss": 3.1399,
      "step": 97060
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8435373306274414,
      "learning_rate": 0.0003734504905477659,
      "loss": 2.9577,
      "step": 97061
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.4765819311141968,
      "learning_rate": 0.00037344652447597745,
      "loss": 2.8975,
      "step": 97062
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.652409553527832,
      "learning_rate": 0.00037344255839053377,
      "loss": 2.6821,
      "step": 97063
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9715362787246704,
      "learning_rate": 0.00037343859229143587,
      "loss": 2.9691,
      "step": 97064
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0958287715911865,
      "learning_rate": 0.0003734346261786843,
      "loss": 2.8431,
      "step": 97065
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.014789581298828,
      "learning_rate": 0.00037343066005227977,
      "loss": 3.0604,
      "step": 97066
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.4145429134368896,
      "learning_rate": 0.0003734266939122232,
      "loss": 3.0454,
      "step": 97067
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.4006927013397217,
      "learning_rate": 0.0003734227277585152,
      "loss": 3.0512,
      "step": 97068
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.872053861618042,
      "learning_rate": 0.0003734187615911565,
      "loss": 3.0464,
      "step": 97069
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7690006494522095,
      "learning_rate": 0.00037341479541014793,
      "loss": 3.1007,
      "step": 97070
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5429694652557373,
      "learning_rate": 0.0003734108292154901,
      "loss": 3.1232,
      "step": 97071
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.4022324085235596,
      "learning_rate": 0.0003734068630071838,
      "loss": 2.6661,
      "step": 97072
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9526468515396118,
      "learning_rate": 0.00037340289678522984,
      "loss": 2.998,
      "step": 97073
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8994194269180298,
      "learning_rate": 0.0003733989305496288,
      "loss": 3.0186,
      "step": 97074
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8255770206451416,
      "learning_rate": 0.0003733949643003816,
      "loss": 3.1299,
      "step": 97075
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8308696746826172,
      "learning_rate": 0.0003733909980374889,
      "loss": 3.0169,
      "step": 97076
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.751602292060852,
      "learning_rate": 0.0003733870317609513,
      "loss": 3.0664,
      "step": 97077
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6458708047866821,
      "learning_rate": 0.0003733830654707698,
      "loss": 2.9451,
      "step": 97078
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.844334840774536,
      "learning_rate": 0.000373379099166945,
      "loss": 3.0792,
      "step": 97079
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.936455011367798,
      "learning_rate": 0.0003733751328494775,
      "loss": 3.1173,
      "step": 97080
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9962084293365479,
      "learning_rate": 0.0003733711665183683,
      "loss": 2.9521,
      "step": 97081
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8627885580062866,
      "learning_rate": 0.00037336720017361807,
      "loss": 3.076,
      "step": 97082
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.920031189918518,
      "learning_rate": 0.0003733632338152274,
      "loss": 3.1768,
      "step": 97083
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6638964414596558,
      "learning_rate": 0.0003733592674431971,
      "loss": 3.2578,
      "step": 97084
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.702216625213623,
      "learning_rate": 0.00037335530105752805,
      "loss": 3.0175,
      "step": 97085
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.456449508666992,
      "learning_rate": 0.0003733513346582207,
      "loss": 3.048,
      "step": 97086
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.7184040546417236,
      "learning_rate": 0.000373347368245276,
      "loss": 3.1365,
      "step": 97087
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.755670428276062,
      "learning_rate": 0.00037334340181869474,
      "loss": 3.0704,
      "step": 97088
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.856862783432007,
      "learning_rate": 0.0003733394353784775,
      "loss": 3.0168,
      "step": 97089
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.333804130554199,
      "learning_rate": 0.00037333546892462496,
      "loss": 2.8468,
      "step": 97090
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7146965265274048,
      "learning_rate": 0.0003733315024571382,
      "loss": 3.195,
      "step": 97091
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.979062795639038,
      "learning_rate": 0.00037332753597601754,
      "loss": 2.9995,
      "step": 97092
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.6180808544158936,
      "learning_rate": 0.0003733235694812639,
      "loss": 2.9577,
      "step": 97093
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.094489574432373,
      "learning_rate": 0.00037331960297287817,
      "loss": 3.0868,
      "step": 97094
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5837466716766357,
      "learning_rate": 0.00037331563645086086,
      "loss": 3.1494,
      "step": 97095
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.119563102722168,
      "learning_rate": 0.0003733116699152128,
      "loss": 3.0627,
      "step": 97096
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8409016132354736,
      "learning_rate": 0.00037330770336593475,
      "loss": 2.8168,
      "step": 97097
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7656056880950928,
      "learning_rate": 0.00037330373680302735,
      "loss": 3.0713,
      "step": 97098
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.4201713800430298,
      "learning_rate": 0.0003732997702264914,
      "loss": 3.1074,
      "step": 97099
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.5257632732391357,
      "learning_rate": 0.00037329580363632773,
      "loss": 3.0927,
      "step": 97100
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.2486612796783447,
      "learning_rate": 0.0003732918370325369,
      "loss": 3.149,
      "step": 97101
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.008193254470825,
      "learning_rate": 0.00037328787041511973,
      "loss": 2.8623,
      "step": 97102
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0175435543060303,
      "learning_rate": 0.000373283903784077,
      "loss": 2.916,
      "step": 97103
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.0491466522216797,
      "learning_rate": 0.00037327993713940944,
      "loss": 2.9092,
      "step": 97104
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.614151954650879,
      "learning_rate": 0.0003732759704811177,
      "loss": 2.9957,
      "step": 97105
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.057260274887085,
      "learning_rate": 0.0003732720038092026,
      "loss": 2.923,
      "step": 97106
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5928938388824463,
      "learning_rate": 0.0003732680371236648,
      "loss": 3.3357,
      "step": 97107
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1710915565490723,
      "learning_rate": 0.0003732640704245051,
      "loss": 2.6378,
      "step": 97108
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.534163475036621,
      "learning_rate": 0.0003732601037117243,
      "loss": 3.0944,
      "step": 97109
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6688926219940186,
      "learning_rate": 0.000373256136985323,
      "loss": 2.7333,
      "step": 97110
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.4655704498291016,
      "learning_rate": 0.00037325217024530204,
      "loss": 3.1207,
      "step": 97111
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8771766424179077,
      "learning_rate": 0.0003732482034916621,
      "loss": 3.2402,
      "step": 97112
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.3098642826080322,
      "learning_rate": 0.0003732442367244039,
      "loss": 3.0252,
      "step": 97113
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.9285215139389038,
      "learning_rate": 0.00037324026994352825,
      "loss": 3.0225,
      "step": 97114
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.3275694847106934,
      "learning_rate": 0.00037323630314903586,
      "loss": 2.9374,
      "step": 97115
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.166581630706787,
      "learning_rate": 0.0003732323363409275,
      "loss": 3.0574,
      "step": 97116
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.0336625576019287,
      "learning_rate": 0.0003732283695192038,
      "loss": 3.015,
      "step": 97117
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.4116277694702148,
      "learning_rate": 0.0003732244026838656,
      "loss": 2.9276,
      "step": 97118
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.06909441947937,
      "learning_rate": 0.0003732204358349135,
      "loss": 2.9729,
      "step": 97119
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.129732847213745,
      "learning_rate": 0.0003732164689723484,
      "loss": 3.087,
      "step": 97120
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1905341148376465,
      "learning_rate": 0.000373212502096171,
      "loss": 2.9728,
      "step": 97121
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8002510070800781,
      "learning_rate": 0.0003732085352063821,
      "loss": 3.1233,
      "step": 97122
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.0001699924468994,
      "learning_rate": 0.0003732045683029822,
      "loss": 3.0686,
      "step": 97123
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.557668447494507,
      "learning_rate": 0.00037320060138597223,
      "loss": 3.1813,
      "step": 97124
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6189392805099487,
      "learning_rate": 0.0003731966344553529,
      "loss": 3.0014,
      "step": 97125
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.592526435852051,
      "learning_rate": 0.00037319266751112493,
      "loss": 2.9334,
      "step": 97126
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.1891276836395264,
      "learning_rate": 0.0003731887005532891,
      "loss": 3.1061,
      "step": 97127
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8995205163955688,
      "learning_rate": 0.0003731847335818461,
      "loss": 2.9246,
      "step": 97128
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5054436922073364,
      "learning_rate": 0.00037318076659679665,
      "loss": 3.0453,
      "step": 97129
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6330299377441406,
      "learning_rate": 0.0003731767995981415,
      "loss": 2.8844,
      "step": 97130
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.552830696105957,
      "learning_rate": 0.00037317283258588143,
      "loss": 2.8626,
      "step": 97131
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.3881702423095703,
      "learning_rate": 0.00037316886556001716,
      "loss": 3.1616,
      "step": 97132
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6673814058303833,
      "learning_rate": 0.00037316489852054935,
      "loss": 2.8305,
      "step": 97133
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.007986307144165,
      "learning_rate": 0.00037316093146747887,
      "loss": 3.1457,
      "step": 97134
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8972300291061401,
      "learning_rate": 0.00037315696440080637,
      "loss": 2.7705,
      "step": 97135
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6651573181152344,
      "learning_rate": 0.00037315299732053266,
      "loss": 3.2639,
      "step": 97136
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.4884586334228516,
      "learning_rate": 0.00037314903022665834,
      "loss": 2.8112,
      "step": 97137
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6328625679016113,
      "learning_rate": 0.00037314506311918427,
      "loss": 3.1144,
      "step": 97138
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7478724718093872,
      "learning_rate": 0.0003731410959981111,
      "loss": 2.8726,
      "step": 97139
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2287919521331787,
      "learning_rate": 0.0003731371288634397,
      "loss": 3.188,
      "step": 97140
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.222454309463501,
      "learning_rate": 0.0003731331617151707,
      "loss": 3.0118,
      "step": 97141
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6135430335998535,
      "learning_rate": 0.0003731291945533049,
      "loss": 2.9078,
      "step": 97142
    },
    {
      "epoch": 1.26,
      "grad_norm": 3.0683133602142334,
      "learning_rate": 0.00037312522737784304,
      "loss": 3.2027,
      "step": 97143
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.716501235961914,
      "learning_rate": 0.0003731212601887857,
      "loss": 2.9129,
      "step": 97144
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.5398091077804565,
      "learning_rate": 0.0003731172929861338,
      "loss": 3.0328,
      "step": 97145
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6845289468765259,
      "learning_rate": 0.000373113325769888,
      "loss": 2.9566,
      "step": 97146
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.6282477378845215,
      "learning_rate": 0.00037310935854004904,
      "loss": 2.9209,
      "step": 97147
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7206535339355469,
      "learning_rate": 0.00037310539129661765,
      "loss": 2.9316,
      "step": 97148
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.2355849742889404,
      "learning_rate": 0.0003731014240395947,
      "loss": 2.9078,
      "step": 97149
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.493098020553589,
      "learning_rate": 0.0003730974567689807,
      "loss": 2.906,
      "step": 97150
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.7966450452804565,
      "learning_rate": 0.0003730934894847765,
      "loss": 3.3375,
      "step": 97151
    },
    {
      "epoch": 1.26,
      "grad_norm": 1.8121423721313477,
      "learning_rate": 0.00037308952218698295,
      "loss": 3.0278,
      "step": 97152
    },
    {
      "epoch": 1.26,
      "grad_norm": 2.5949180126190186,
      "learning_rate": 0.0003730855548756006,
      "loss": 2.967,
      "step": 97153
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8255867958068848,
      "learning_rate": 0.00037308158755063026,
      "loss": 3.013,
      "step": 97154
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0213167667388916,
      "learning_rate": 0.0003730776202120727,
      "loss": 2.9411,
      "step": 97155
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8718409538269043,
      "learning_rate": 0.00037307365285992863,
      "loss": 2.8912,
      "step": 97156
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.678403377532959,
      "learning_rate": 0.0003730696854941988,
      "loss": 3.1794,
      "step": 97157
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.8402459621429443,
      "learning_rate": 0.0003730657181148839,
      "loss": 3.2227,
      "step": 97158
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6811158657073975,
      "learning_rate": 0.0003730617507219847,
      "loss": 3.1984,
      "step": 97159
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.294637441635132,
      "learning_rate": 0.00037305778331550197,
      "loss": 2.9394,
      "step": 97160
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7230339050292969,
      "learning_rate": 0.0003730538158954364,
      "loss": 3.0824,
      "step": 97161
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.2286295890808105,
      "learning_rate": 0.0003730498484617888,
      "loss": 2.9798,
      "step": 97162
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.026942014694214,
      "learning_rate": 0.00037304588101455977,
      "loss": 3.0424,
      "step": 97163
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9662340879440308,
      "learning_rate": 0.0003730419135537502,
      "loss": 2.9165,
      "step": 97164
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0033140182495117,
      "learning_rate": 0.00037303794607936074,
      "loss": 2.9238,
      "step": 97165
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.186941623687744,
      "learning_rate": 0.0003730339785913921,
      "loss": 2.907,
      "step": 97166
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8972511291503906,
      "learning_rate": 0.0003730300110898452,
      "loss": 3.0641,
      "step": 97167
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7037994861602783,
      "learning_rate": 0.0003730260435747205,
      "loss": 3.0808,
      "step": 97168
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.8638105392456055,
      "learning_rate": 0.00037302207604601885,
      "loss": 2.8049,
      "step": 97169
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.329132318496704,
      "learning_rate": 0.0003730181085037412,
      "loss": 2.7459,
      "step": 97170
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3778510093688965,
      "learning_rate": 0.000373014140947888,
      "loss": 2.9746,
      "step": 97171
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.4183573722839355,
      "learning_rate": 0.00037301017337846003,
      "loss": 3.1093,
      "step": 97172
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.743285894393921,
      "learning_rate": 0.00037300620579545815,
      "loss": 2.7444,
      "step": 97173
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9491596221923828,
      "learning_rate": 0.0003730022381988831,
      "loss": 3.0117,
      "step": 97174
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7538673877716064,
      "learning_rate": 0.0003729982705887355,
      "loss": 3.0435,
      "step": 97175
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.2418148517608643,
      "learning_rate": 0.00037299430296501617,
      "loss": 3.0901,
      "step": 97176
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.5128254890441895,
      "learning_rate": 0.00037299033532772573,
      "loss": 3.0514,
      "step": 97177
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9912312030792236,
      "learning_rate": 0.00037298636767686504,
      "loss": 3.0865,
      "step": 97178
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5465894937515259,
      "learning_rate": 0.0003729824000124349,
      "loss": 3.0837,
      "step": 97179
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9742847681045532,
      "learning_rate": 0.0003729784323344359,
      "loss": 3.2596,
      "step": 97180
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.5958802700042725,
      "learning_rate": 0.0003729744646428687,
      "loss": 2.9665,
      "step": 97181
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6648670434951782,
      "learning_rate": 0.0003729704969377344,
      "loss": 3.0362,
      "step": 97182
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.649224281311035,
      "learning_rate": 0.00037296652921903336,
      "loss": 2.9071,
      "step": 97183
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5836539268493652,
      "learning_rate": 0.0003729625614867665,
      "loss": 3.1856,
      "step": 97184
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.8001351356506348,
      "learning_rate": 0.0003729585937409346,
      "loss": 3.1287,
      "step": 97185
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6390529870986938,
      "learning_rate": 0.00037295462598153825,
      "loss": 2.7074,
      "step": 97186
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6974409818649292,
      "learning_rate": 0.0003729506582085782,
      "loss": 3.0457,
      "step": 97187
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.994757890701294,
      "learning_rate": 0.0003729466904220553,
      "loss": 2.8691,
      "step": 97188
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.480644702911377,
      "learning_rate": 0.00037294272262197026,
      "loss": 2.7367,
      "step": 97189
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6978950500488281,
      "learning_rate": 0.00037293875480832377,
      "loss": 3.0099,
      "step": 97190
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.6100351810455322,
      "learning_rate": 0.00037293478698111656,
      "loss": 2.9553,
      "step": 97191
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.8365366458892822,
      "learning_rate": 0.0003729308191403494,
      "loss": 2.9775,
      "step": 97192
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.686455249786377,
      "learning_rate": 0.00037292685128602305,
      "loss": 2.9029,
      "step": 97193
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.862281084060669,
      "learning_rate": 0.0003729228834181382,
      "loss": 3.1307,
      "step": 97194
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.048260450363159,
      "learning_rate": 0.0003729189155366957,
      "loss": 3.031,
      "step": 97195
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.878546953201294,
      "learning_rate": 0.0003729149476416961,
      "loss": 3.2318,
      "step": 97196
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.103391408920288,
      "learning_rate": 0.0003729109797331402,
      "loss": 3.1455,
      "step": 97197
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6341415643692017,
      "learning_rate": 0.0003729070118110289,
      "loss": 3.0507,
      "step": 97198
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.709883451461792,
      "learning_rate": 0.0003729030438753627,
      "loss": 2.871,
      "step": 97199
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.052422285079956,
      "learning_rate": 0.0003728990759261425,
      "loss": 3.0583,
      "step": 97200
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5369960069656372,
      "learning_rate": 0.000372895107963369,
      "loss": 3.1962,
      "step": 97201
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6274384260177612,
      "learning_rate": 0.0003728911399870429,
      "loss": 3.193,
      "step": 97202
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8103435039520264,
      "learning_rate": 0.0003728871719971649,
      "loss": 2.9607,
      "step": 97203
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7551994323730469,
      "learning_rate": 0.0003728832039937359,
      "loss": 3.0339,
      "step": 97204
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1135201454162598,
      "learning_rate": 0.0003728792359767565,
      "loss": 3.0837,
      "step": 97205
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.05013108253479,
      "learning_rate": 0.00037287526794622743,
      "loss": 2.9621,
      "step": 97206
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7671970129013062,
      "learning_rate": 0.0003728712999021496,
      "loss": 2.8109,
      "step": 97207
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1161699295043945,
      "learning_rate": 0.00037286733184452346,
      "loss": 2.9148,
      "step": 97208
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.273387908935547,
      "learning_rate": 0.00037286336377335,
      "loss": 3.0537,
      "step": 97209
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0133469104766846,
      "learning_rate": 0.0003728593956886299,
      "loss": 3.0453,
      "step": 97210
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3556551933288574,
      "learning_rate": 0.00037285542759036373,
      "loss": 2.6531,
      "step": 97211
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.624815821647644,
      "learning_rate": 0.0003728514594785524,
      "loss": 3.0474,
      "step": 97212
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5261958837509155,
      "learning_rate": 0.0003728474913531967,
      "loss": 3.2768,
      "step": 97213
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.243807792663574,
      "learning_rate": 0.0003728435232142972,
      "loss": 2.9714,
      "step": 97214
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.560131311416626,
      "learning_rate": 0.00037283955506185474,
      "loss": 3.1669,
      "step": 97215
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.932908535003662,
      "learning_rate": 0.00037283558689587003,
      "loss": 2.9408,
      "step": 97216
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7105083465576172,
      "learning_rate": 0.00037283161871634385,
      "loss": 3.0467,
      "step": 97217
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.780822515487671,
      "learning_rate": 0.00037282765052327685,
      "loss": 2.8114,
      "step": 97218
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.768359661102295,
      "learning_rate": 0.0003728236823166698,
      "loss": 2.8846,
      "step": 97219
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0927841663360596,
      "learning_rate": 0.00037281971409652343,
      "loss": 2.9441,
      "step": 97220
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0540716648101807,
      "learning_rate": 0.0003728157458628386,
      "loss": 2.9669,
      "step": 97221
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9929745197296143,
      "learning_rate": 0.00037281177761561585,
      "loss": 2.9221,
      "step": 97222
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7721654176712036,
      "learning_rate": 0.00037280780935485604,
      "loss": 2.6852,
      "step": 97223
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.153737783432007,
      "learning_rate": 0.00037280384108055996,
      "loss": 3.0515,
      "step": 97224
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9445348978042603,
      "learning_rate": 0.00037279987279272817,
      "loss": 2.6454,
      "step": 97225
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6297197341918945,
      "learning_rate": 0.0003727959044913615,
      "loss": 3.004,
      "step": 97226
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0633203983306885,
      "learning_rate": 0.00037279193617646076,
      "loss": 3.2611,
      "step": 97227
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.789466142654419,
      "learning_rate": 0.0003727879678480266,
      "loss": 2.8133,
      "step": 97228
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8867307901382446,
      "learning_rate": 0.0003727839995060598,
      "loss": 2.8837,
      "step": 97229
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.420759677886963,
      "learning_rate": 0.0003727800311505611,
      "loss": 3.0486,
      "step": 97230
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.2689075469970703,
      "learning_rate": 0.00037277606278153123,
      "loss": 3.1273,
      "step": 97231
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9214723110198975,
      "learning_rate": 0.00037277209439897085,
      "loss": 2.9911,
      "step": 97232
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.81144118309021,
      "learning_rate": 0.0003727681260028808,
      "loss": 2.8948,
      "step": 97233
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.208641290664673,
      "learning_rate": 0.00037276415759326183,
      "loss": 3.0397,
      "step": 97234
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.4981776475906372,
      "learning_rate": 0.0003727601891701145,
      "loss": 2.8655,
      "step": 97235
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.015455722808838,
      "learning_rate": 0.0003727562207334398,
      "loss": 2.7189,
      "step": 97236
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9172283411026,
      "learning_rate": 0.0003727522522832383,
      "loss": 3.1759,
      "step": 97237
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6810386180877686,
      "learning_rate": 0.00037274828381951076,
      "loss": 2.9897,
      "step": 97238
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7348283529281616,
      "learning_rate": 0.00037274431534225794,
      "loss": 2.8792,
      "step": 97239
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3266546726226807,
      "learning_rate": 0.0003727403468514807,
      "loss": 3.2018,
      "step": 97240
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.657458782196045,
      "learning_rate": 0.0003727363783471795,
      "loss": 3.0859,
      "step": 97241
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.755389451980591,
      "learning_rate": 0.00037273240982935526,
      "loss": 3.0561,
      "step": 97242
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9234306812286377,
      "learning_rate": 0.0003727284412980088,
      "loss": 3.126,
      "step": 97243
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.7647879123687744,
      "learning_rate": 0.0003727244727531407,
      "loss": 3.1224,
      "step": 97244
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.636817455291748,
      "learning_rate": 0.00037272050419475165,
      "loss": 2.9521,
      "step": 97245
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6272944211959839,
      "learning_rate": 0.0003727165356228426,
      "loss": 3.0742,
      "step": 97246
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.891491174697876,
      "learning_rate": 0.0003727125670374141,
      "loss": 2.8472,
      "step": 97247
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5737464427947998,
      "learning_rate": 0.000372708598438467,
      "loss": 3.4361,
      "step": 97248
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.2018966674804688,
      "learning_rate": 0.000372704629826002,
      "loss": 2.6515,
      "step": 97249
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6648755073547363,
      "learning_rate": 0.0003727006612000198,
      "loss": 2.9281,
      "step": 97250
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.894533395767212,
      "learning_rate": 0.00037269669256052124,
      "loss": 3.4485,
      "step": 97251
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9386886358261108,
      "learning_rate": 0.00037269272390750695,
      "loss": 3.072,
      "step": 97252
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7057974338531494,
      "learning_rate": 0.00037268875524097776,
      "loss": 3.0194,
      "step": 97253
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6491392850875854,
      "learning_rate": 0.00037268478656093425,
      "loss": 3.0906,
      "step": 97254
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.849755883216858,
      "learning_rate": 0.0003726808178673773,
      "loss": 3.2091,
      "step": 97255
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8566142320632935,
      "learning_rate": 0.0003726768491603077,
      "loss": 2.9725,
      "step": 97256
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5752627849578857,
      "learning_rate": 0.000372672880439726,
      "loss": 3.2608,
      "step": 97257
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8044204711914062,
      "learning_rate": 0.0003726689117056331,
      "loss": 3.2714,
      "step": 97258
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5405348539352417,
      "learning_rate": 0.00037266494295802973,
      "loss": 3.0296,
      "step": 97259
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0549941062927246,
      "learning_rate": 0.00037266097419691647,
      "loss": 2.98,
      "step": 97260
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9577913284301758,
      "learning_rate": 0.0003726570054222942,
      "loss": 3.0142,
      "step": 97261
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8531240224838257,
      "learning_rate": 0.00037265303663416366,
      "loss": 3.0719,
      "step": 97262
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1595473289489746,
      "learning_rate": 0.0003726490678325255,
      "loss": 2.8481,
      "step": 97263
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.3989031314849854,
      "learning_rate": 0.0003726450990173805,
      "loss": 3.1009,
      "step": 97264
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7384096384048462,
      "learning_rate": 0.0003726411301887295,
      "loss": 2.8489,
      "step": 97265
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9930517673492432,
      "learning_rate": 0.00037263716134657303,
      "loss": 2.8239,
      "step": 97266
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7453715801239014,
      "learning_rate": 0.0003726331924909119,
      "loss": 3.2106,
      "step": 97267
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.162605047225952,
      "learning_rate": 0.00037262922362174707,
      "loss": 3.0117,
      "step": 97268
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.0098683834075928,
      "learning_rate": 0.00037262525473907897,
      "loss": 2.8155,
      "step": 97269
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.392629861831665,
      "learning_rate": 0.00037262128584290843,
      "loss": 2.8052,
      "step": 97270
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.794682502746582,
      "learning_rate": 0.00037261731693323633,
      "loss": 2.9325,
      "step": 97271
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.9264097213745117,
      "learning_rate": 0.0003726133480100632,
      "loss": 2.9385,
      "step": 97272
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.859110116958618,
      "learning_rate": 0.00037260937907338996,
      "loss": 2.7959,
      "step": 97273
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.257742404937744,
      "learning_rate": 0.00037260541012321726,
      "loss": 2.7808,
      "step": 97274
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7670174837112427,
      "learning_rate": 0.0003726014411595458,
      "loss": 3.1697,
      "step": 97275
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9987766742706299,
      "learning_rate": 0.0003725974721823763,
      "loss": 3.1231,
      "step": 97276
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3386828899383545,
      "learning_rate": 0.00037259350319170976,
      "loss": 2.8958,
      "step": 97277
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.247563362121582,
      "learning_rate": 0.00037258953418754657,
      "loss": 2.8175,
      "step": 97278
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1288766860961914,
      "learning_rate": 0.0003725855651698876,
      "loss": 3.2243,
      "step": 97279
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.813246250152588,
      "learning_rate": 0.0003725815961387337,
      "loss": 3.0045,
      "step": 97280
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.4870362281799316,
      "learning_rate": 0.00037257762709408545,
      "loss": 2.8591,
      "step": 97281
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.892556071281433,
      "learning_rate": 0.00037257365803594367,
      "loss": 3.2501,
      "step": 97282
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6140557527542114,
      "learning_rate": 0.0003725696889643091,
      "loss": 3.0258,
      "step": 97283
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9294219017028809,
      "learning_rate": 0.0003725657198791824,
      "loss": 2.7393,
      "step": 97284
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8282451629638672,
      "learning_rate": 0.0003725617507805644,
      "loss": 3.2183,
      "step": 97285
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.431004524230957,
      "learning_rate": 0.0003725577816684558,
      "loss": 2.7616,
      "step": 97286
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0534040927886963,
      "learning_rate": 0.00037255381254285724,
      "loss": 2.9487,
      "step": 97287
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.142910957336426,
      "learning_rate": 0.00037254984340376963,
      "loss": 2.7531,
      "step": 97288
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.145015001296997,
      "learning_rate": 0.0003725458742511937,
      "loss": 2.9254,
      "step": 97289
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.2138302326202393,
      "learning_rate": 0.00037254190508513,
      "loss": 2.6675,
      "step": 97290
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7761187553405762,
      "learning_rate": 0.0003725379359055795,
      "loss": 3.0363,
      "step": 97291
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.133984327316284,
      "learning_rate": 0.0003725339667125428,
      "loss": 2.8567,
      "step": 97292
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.487401247024536,
      "learning_rate": 0.0003725299975060206,
      "loss": 2.8602,
      "step": 97293
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.599484443664551,
      "learning_rate": 0.00037252602828601375,
      "loss": 3.0043,
      "step": 97294
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9543513059616089,
      "learning_rate": 0.000372522059052523,
      "loss": 3.1532,
      "step": 97295
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.275294542312622,
      "learning_rate": 0.00037251808980554897,
      "loss": 3.2018,
      "step": 97296
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.0598456859588623,
      "learning_rate": 0.00037251412054509247,
      "loss": 2.9404,
      "step": 97297
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1992082595825195,
      "learning_rate": 0.0003725101512711542,
      "loss": 2.9983,
      "step": 97298
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.410672664642334,
      "learning_rate": 0.000372506181983735,
      "loss": 3.0954,
      "step": 97299
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.36603045463562,
      "learning_rate": 0.0003725022126828354,
      "loss": 3.0227,
      "step": 97300
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.579850673675537,
      "learning_rate": 0.00037249824336845627,
      "loss": 2.9508,
      "step": 97301
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6961193084716797,
      "learning_rate": 0.00037249427404059855,
      "loss": 2.9027,
      "step": 97302
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1468029022216797,
      "learning_rate": 0.00037249030469926257,
      "loss": 3.1365,
      "step": 97303
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1523189544677734,
      "learning_rate": 0.00037248633534444936,
      "loss": 3.1349,
      "step": 97304
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.4117016792297363,
      "learning_rate": 0.00037248236597615963,
      "loss": 3.2262,
      "step": 97305
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5205079317092896,
      "learning_rate": 0.00037247839659439397,
      "loss": 3.1279,
      "step": 97306
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.9813272953033447,
      "learning_rate": 0.0003724744271991532,
      "loss": 2.8787,
      "step": 97307
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.8504369258880615,
      "learning_rate": 0.00037247045779043806,
      "loss": 2.6772,
      "step": 97308
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.2613584995269775,
      "learning_rate": 0.0003724664883682494,
      "loss": 2.8046,
      "step": 97309
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6652061939239502,
      "learning_rate": 0.00037246251893258776,
      "loss": 3.0751,
      "step": 97310
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.78863525390625,
      "learning_rate": 0.00037245854948345395,
      "loss": 3.2208,
      "step": 97311
    },
    {
      "epoch": 1.27,
      "grad_norm": 4.550833225250244,
      "learning_rate": 0.00037245458002084877,
      "loss": 2.9409,
      "step": 97312
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.6670100688934326,
      "learning_rate": 0.00037245061054477297,
      "loss": 3.1412,
      "step": 97313
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.183034896850586,
      "learning_rate": 0.0003724466410552272,
      "loss": 2.8625,
      "step": 97314
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.7037038803100586,
      "learning_rate": 0.0003724426715522122,
      "loss": 3.1312,
      "step": 97315
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.9865269660949707,
      "learning_rate": 0.0003724387020357288,
      "loss": 3.1453,
      "step": 97316
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.293034315109253,
      "learning_rate": 0.0003724347325057776,
      "loss": 3.1352,
      "step": 97317
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.089179277420044,
      "learning_rate": 0.0003724307629623594,
      "loss": 2.8913,
      "step": 97318
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8626530170440674,
      "learning_rate": 0.0003724267934054751,
      "loss": 3.0246,
      "step": 97319
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.7872018814086914,
      "learning_rate": 0.00037242282383512514,
      "loss": 2.7994,
      "step": 97320
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.9786376953125,
      "learning_rate": 0.0003724188542513105,
      "loss": 3.0541,
      "step": 97321
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.915712356567383,
      "learning_rate": 0.0003724148846540317,
      "loss": 3.059,
      "step": 97322
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.865186333656311,
      "learning_rate": 0.0003724109150432897,
      "loss": 3.1273,
      "step": 97323
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.689138650894165,
      "learning_rate": 0.0003724069454190852,
      "loss": 3.0432,
      "step": 97324
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.371218204498291,
      "learning_rate": 0.0003724029757814188,
      "loss": 2.672,
      "step": 97325
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.113079071044922,
      "learning_rate": 0.0003723990061302914,
      "loss": 2.7698,
      "step": 97326
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7223933935165405,
      "learning_rate": 0.00037239503646570353,
      "loss": 2.9155,
      "step": 97327
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.25989031791687,
      "learning_rate": 0.00037239106678765615,
      "loss": 2.9752,
      "step": 97328
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8490318059921265,
      "learning_rate": 0.00037238709709614987,
      "loss": 2.9177,
      "step": 97329
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3143210411071777,
      "learning_rate": 0.0003723831273911855,
      "loss": 3.1512,
      "step": 97330
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5127639770507812,
      "learning_rate": 0.0003723791576727637,
      "loss": 3.0474,
      "step": 97331
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0031373500823975,
      "learning_rate": 0.0003723751879408853,
      "loss": 3.1039,
      "step": 97332
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.6266651153564453,
      "learning_rate": 0.00037237121819555096,
      "loss": 3.0876,
      "step": 97333
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.9342753887176514,
      "learning_rate": 0.0003723672484367614,
      "loss": 2.7162,
      "step": 97334
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6329903602600098,
      "learning_rate": 0.00037236327866451747,
      "loss": 3.0621,
      "step": 97335
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.2554900646209717,
      "learning_rate": 0.00037235930887881977,
      "loss": 2.9166,
      "step": 97336
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.7784583568573,
      "learning_rate": 0.0003723553390796692,
      "loss": 2.9452,
      "step": 97337
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9256646633148193,
      "learning_rate": 0.00037235136926706635,
      "loss": 3.1023,
      "step": 97338
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7358953952789307,
      "learning_rate": 0.000372347399441012,
      "loss": 3.0771,
      "step": 97339
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.219757556915283,
      "learning_rate": 0.0003723434296015069,
      "loss": 2.9175,
      "step": 97340
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6245172023773193,
      "learning_rate": 0.0003723394597485519,
      "loss": 3.1307,
      "step": 97341
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1205005645751953,
      "learning_rate": 0.0003723354898821475,
      "loss": 3.1159,
      "step": 97342
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.23759126663208,
      "learning_rate": 0.0003723315200022946,
      "loss": 2.8573,
      "step": 97343
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.4968717098236084,
      "learning_rate": 0.00037232755010899394,
      "loss": 3.0181,
      "step": 97344
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.224811553955078,
      "learning_rate": 0.00037232358020224614,
      "loss": 2.9591,
      "step": 97345
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.361680746078491,
      "learning_rate": 0.0003723196102820521,
      "loss": 2.9583,
      "step": 97346
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8522082567214966,
      "learning_rate": 0.00037231564034841244,
      "loss": 3.01,
      "step": 97347
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8905178308486938,
      "learning_rate": 0.000372311670401328,
      "loss": 3.0792,
      "step": 97348
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3044207096099854,
      "learning_rate": 0.0003723077004407993,
      "loss": 3.0274,
      "step": 97349
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.193204402923584,
      "learning_rate": 0.0003723037304668274,
      "loss": 2.9416,
      "step": 97350
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0454771518707275,
      "learning_rate": 0.0003722997604794128,
      "loss": 3.2213,
      "step": 97351
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6223907470703125,
      "learning_rate": 0.00037229579047855634,
      "loss": 3.0417,
      "step": 97352
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.2067036628723145,
      "learning_rate": 0.0003722918204642587,
      "loss": 3.2048,
      "step": 97353
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7409696578979492,
      "learning_rate": 0.0003722878504365207,
      "loss": 3.0852,
      "step": 97354
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8208842277526855,
      "learning_rate": 0.000372283880395343,
      "loss": 2.7256,
      "step": 97355
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.012995719909668,
      "learning_rate": 0.00037227991034072635,
      "loss": 3.0053,
      "step": 97356
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.663598656654358,
      "learning_rate": 0.0003722759402726715,
      "loss": 3.2489,
      "step": 97357
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.030907154083252,
      "learning_rate": 0.00037227197019117916,
      "loss": 3.0317,
      "step": 97358
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6864854097366333,
      "learning_rate": 0.00037226800009625014,
      "loss": 3.334,
      "step": 97359
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.858187198638916,
      "learning_rate": 0.0003722640299878851,
      "loss": 3.1472,
      "step": 97360
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.2071166038513184,
      "learning_rate": 0.0003722600598660848,
      "loss": 3.0041,
      "step": 97361
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6201171875,
      "learning_rate": 0.00037225608973085004,
      "loss": 2.7674,
      "step": 97362
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.170315742492676,
      "learning_rate": 0.0003722521195821815,
      "loss": 2.9869,
      "step": 97363
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.182502508163452,
      "learning_rate": 0.0003722481494200799,
      "loss": 3.0647,
      "step": 97364
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6243619918823242,
      "learning_rate": 0.00037224417924454605,
      "loss": 3.1173,
      "step": 97365
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8199769258499146,
      "learning_rate": 0.0003722402090555806,
      "loss": 2.9968,
      "step": 97366
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.470165967941284,
      "learning_rate": 0.00037223623885318427,
      "loss": 2.9388,
      "step": 97367
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5045583248138428,
      "learning_rate": 0.00037223226863735803,
      "loss": 2.9544,
      "step": 97368
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.23222017288208,
      "learning_rate": 0.00037222829840810227,
      "loss": 2.8649,
      "step": 97369
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9977257251739502,
      "learning_rate": 0.000372224328165418,
      "loss": 3.3213,
      "step": 97370
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.117483377456665,
      "learning_rate": 0.0003722203579093059,
      "loss": 2.9762,
      "step": 97371
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5523358583450317,
      "learning_rate": 0.0003722163876397666,
      "loss": 2.9704,
      "step": 97372
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.043726682662964,
      "learning_rate": 0.00037221241735680094,
      "loss": 3.0611,
      "step": 97373
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6803154945373535,
      "learning_rate": 0.0003722084470604097,
      "loss": 2.8438,
      "step": 97374
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8572986125946045,
      "learning_rate": 0.00037220447675059343,
      "loss": 2.7627,
      "step": 97375
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6788175106048584,
      "learning_rate": 0.000372200506427353,
      "loss": 2.8607,
      "step": 97376
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.4925851821899414,
      "learning_rate": 0.00037219653609068913,
      "loss": 3.1097,
      "step": 97377
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5844712257385254,
      "learning_rate": 0.00037219256574060256,
      "loss": 2.958,
      "step": 97378
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1118621826171875,
      "learning_rate": 0.0003721885953770941,
      "loss": 2.9416,
      "step": 97379
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7299431562423706,
      "learning_rate": 0.0003721846250001644,
      "loss": 3.0217,
      "step": 97380
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0422143936157227,
      "learning_rate": 0.00037218065460981417,
      "loss": 2.7696,
      "step": 97381
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9538997411727905,
      "learning_rate": 0.0003721766842060442,
      "loss": 3.1144,
      "step": 97382
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0557594299316406,
      "learning_rate": 0.0003721727137888553,
      "loss": 2.8517,
      "step": 97383
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7904412746429443,
      "learning_rate": 0.00037216874335824804,
      "loss": 3.1004,
      "step": 97384
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0664408206939697,
      "learning_rate": 0.00037216477291422326,
      "loss": 3.0864,
      "step": 97385
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0951974391937256,
      "learning_rate": 0.0003721608024567817,
      "loss": 3.1419,
      "step": 97386
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9649720191955566,
      "learning_rate": 0.0003721568319859241,
      "loss": 2.8385,
      "step": 97387
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8937947750091553,
      "learning_rate": 0.00037215286150165116,
      "loss": 3.0578,
      "step": 97388
    },
    {
      "epoch": 1.27,
      "grad_norm": 4.03113317489624,
      "learning_rate": 0.00037214889100396363,
      "loss": 2.8051,
      "step": 97389
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3121039867401123,
      "learning_rate": 0.0003721449204928623,
      "loss": 2.9076,
      "step": 97390
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7325259447097778,
      "learning_rate": 0.0003721409499683479,
      "loss": 2.992,
      "step": 97391
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9687660932540894,
      "learning_rate": 0.000372136979430421,
      "loss": 2.9975,
      "step": 97392
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8572341203689575,
      "learning_rate": 0.0003721330088790825,
      "loss": 3.1645,
      "step": 97393
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6652039289474487,
      "learning_rate": 0.0003721290383143333,
      "loss": 2.8619,
      "step": 97394
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9045408964157104,
      "learning_rate": 0.00037212506773617377,
      "loss": 2.9918,
      "step": 97395
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7818019390106201,
      "learning_rate": 0.00037212109714460486,
      "loss": 3.0034,
      "step": 97396
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.5823841094970703,
      "learning_rate": 0.0003721171265396273,
      "loss": 3.0368,
      "step": 97397
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7777342796325684,
      "learning_rate": 0.00037211315592124177,
      "loss": 3.0836,
      "step": 97398
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.040229320526123,
      "learning_rate": 0.00037210918528944906,
      "loss": 2.8352,
      "step": 97399
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.777496099472046,
      "learning_rate": 0.00037210521464424987,
      "loss": 2.6949,
      "step": 97400
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6460654735565186,
      "learning_rate": 0.00037210124398564506,
      "loss": 3.02,
      "step": 97401
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6188998222351074,
      "learning_rate": 0.0003720972733136352,
      "loss": 3.1711,
      "step": 97402
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0210766792297363,
      "learning_rate": 0.0003720933026282211,
      "loss": 3.0521,
      "step": 97403
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6989991664886475,
      "learning_rate": 0.0003720893319294035,
      "loss": 3.005,
      "step": 97404
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.02272367477417,
      "learning_rate": 0.0003720853612171831,
      "loss": 2.9976,
      "step": 97405
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9075239896774292,
      "learning_rate": 0.00037208139049156065,
      "loss": 3.1978,
      "step": 97406
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1805944442749023,
      "learning_rate": 0.00037207741975253694,
      "loss": 3.1242,
      "step": 97407
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8109489679336548,
      "learning_rate": 0.00037207344900011275,
      "loss": 2.8719,
      "step": 97408
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.378220319747925,
      "learning_rate": 0.00037206947823428864,
      "loss": 3.0177,
      "step": 97409
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.114821672439575,
      "learning_rate": 0.00037206550745506547,
      "loss": 3.0781,
      "step": 97410
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.9374377727508545,
      "learning_rate": 0.00037206153666244406,
      "loss": 2.8487,
      "step": 97411
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.639360785484314,
      "learning_rate": 0.00037205756585642494,
      "loss": 3.1626,
      "step": 97412
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.380077838897705,
      "learning_rate": 0.000372053595037009,
      "loss": 2.7059,
      "step": 97413
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.2508530616760254,
      "learning_rate": 0.00037204962420419696,
      "loss": 3.1218,
      "step": 97414
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7260135412216187,
      "learning_rate": 0.0003720456533579895,
      "loss": 2.8615,
      "step": 97415
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8270875215530396,
      "learning_rate": 0.0003720416824983874,
      "loss": 2.9947,
      "step": 97416
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.948585033416748,
      "learning_rate": 0.0003720377116253914,
      "loss": 3.1552,
      "step": 97417
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.627565860748291,
      "learning_rate": 0.0003720337407390023,
      "loss": 2.8435,
      "step": 97418
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0880918502807617,
      "learning_rate": 0.0003720297698392206,
      "loss": 3.0734,
      "step": 97419
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7793169021606445,
      "learning_rate": 0.00037202579892604734,
      "loss": 3.1528,
      "step": 97420
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.993626594543457,
      "learning_rate": 0.0003720218279994831,
      "loss": 3.1749,
      "step": 97421
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.850874900817871,
      "learning_rate": 0.00037201785705952853,
      "loss": 3.0535,
      "step": 97422
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.090989351272583,
      "learning_rate": 0.00037201388610618466,
      "loss": 3.1239,
      "step": 97423
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.791860580444336,
      "learning_rate": 0.0003720099151394519,
      "loss": 2.8324,
      "step": 97424
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1011433601379395,
      "learning_rate": 0.00037200594415933124,
      "loss": 3.0611,
      "step": 97425
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7025318145751953,
      "learning_rate": 0.0003720019731658233,
      "loss": 2.8366,
      "step": 97426
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.2865054607391357,
      "learning_rate": 0.0003719980021589288,
      "loss": 2.9508,
      "step": 97427
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3268203735351562,
      "learning_rate": 0.0003719940311386485,
      "loss": 2.718,
      "step": 97428
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6755906343460083,
      "learning_rate": 0.0003719900601049832,
      "loss": 3.0384,
      "step": 97429
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7163243293762207,
      "learning_rate": 0.0003719860890579336,
      "loss": 2.903,
      "step": 97430
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.114109754562378,
      "learning_rate": 0.00037198211799750037,
      "loss": 2.9691,
      "step": 97431
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0607237815856934,
      "learning_rate": 0.0003719781469236843,
      "loss": 3.0645,
      "step": 97432
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7676507234573364,
      "learning_rate": 0.00037197417583648617,
      "loss": 3.0398,
      "step": 97433
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.2147843837738037,
      "learning_rate": 0.00037197020473590667,
      "loss": 3.2398,
      "step": 97434
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5716334581375122,
      "learning_rate": 0.0003719662336219466,
      "loss": 3.0099,
      "step": 97435
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.4926562309265137,
      "learning_rate": 0.00037196226249460655,
      "loss": 2.8919,
      "step": 97436
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.700052261352539,
      "learning_rate": 0.00037195829135388745,
      "loss": 3.2274,
      "step": 97437
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8500276803970337,
      "learning_rate": 0.0003719543201997899,
      "loss": 2.8181,
      "step": 97438
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.346418619155884,
      "learning_rate": 0.0003719503490323147,
      "loss": 2.895,
      "step": 97439
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9241670370101929,
      "learning_rate": 0.00037194637785146253,
      "loss": 3.0103,
      "step": 97440
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.848733901977539,
      "learning_rate": 0.0003719424066572342,
      "loss": 2.8403,
      "step": 97441
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.403275489807129,
      "learning_rate": 0.00037193843544963044,
      "loss": 2.7384,
      "step": 97442
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8472809791564941,
      "learning_rate": 0.0003719344642286519,
      "loss": 2.9765,
      "step": 97443
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.8929457664489746,
      "learning_rate": 0.0003719304929942995,
      "loss": 2.7883,
      "step": 97444
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5556896924972534,
      "learning_rate": 0.00037192652174657375,
      "loss": 2.8503,
      "step": 97445
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9853782653808594,
      "learning_rate": 0.0003719225504854755,
      "loss": 3.0359,
      "step": 97446
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8844951391220093,
      "learning_rate": 0.0003719185792110056,
      "loss": 3.4624,
      "step": 97447
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6623417139053345,
      "learning_rate": 0.0003719146079231646,
      "loss": 2.7214,
      "step": 97448
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6532431840896606,
      "learning_rate": 0.0003719106366219533,
      "loss": 3.1257,
      "step": 97449
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1452019214630127,
      "learning_rate": 0.00037190666530737253,
      "loss": 2.8743,
      "step": 97450
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9119665622711182,
      "learning_rate": 0.00037190269397942294,
      "loss": 3.0301,
      "step": 97451
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.915640950202942,
      "learning_rate": 0.0003718987226381052,
      "loss": 2.9906,
      "step": 97452
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1613941192626953,
      "learning_rate": 0.00037189475128342013,
      "loss": 3.1068,
      "step": 97453
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9391670227050781,
      "learning_rate": 0.0003718907799153686,
      "loss": 2.8051,
      "step": 97454
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9991655349731445,
      "learning_rate": 0.0003718868085339511,
      "loss": 2.9013,
      "step": 97455
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.81512451171875,
      "learning_rate": 0.0003718828371391685,
      "loss": 2.9224,
      "step": 97456
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.681593894958496,
      "learning_rate": 0.00037187886573102156,
      "loss": 3.1952,
      "step": 97457
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6573846340179443,
      "learning_rate": 0.000371874894309511,
      "loss": 2.916,
      "step": 97458
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.825469493865967,
      "learning_rate": 0.0003718709228746375,
      "loss": 3.1295,
      "step": 97459
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.8068246841430664,
      "learning_rate": 0.0003718669514264019,
      "loss": 2.9489,
      "step": 97460
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7538374662399292,
      "learning_rate": 0.00037186297996480476,
      "loss": 3.1042,
      "step": 97461
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3781402111053467,
      "learning_rate": 0.000371859008489847,
      "loss": 2.9472,
      "step": 97462
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.707894802093506,
      "learning_rate": 0.0003718550370015293,
      "loss": 3.0891,
      "step": 97463
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.7399747371673584,
      "learning_rate": 0.00037185106549985237,
      "loss": 3.0883,
      "step": 97464
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5381039381027222,
      "learning_rate": 0.000371847093984817,
      "loss": 3.0826,
      "step": 97465
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8281420469284058,
      "learning_rate": 0.0003718431224564239,
      "loss": 3.012,
      "step": 97466
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.5461480617523193,
      "learning_rate": 0.0003718391509146738,
      "loss": 3.2164,
      "step": 97467
    },
    {
      "epoch": 1.27,
      "grad_norm": 5.445077419281006,
      "learning_rate": 0.0003718351793595674,
      "loss": 2.8688,
      "step": 97468
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0811522006988525,
      "learning_rate": 0.0003718312077911056,
      "loss": 3.1387,
      "step": 97469
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.590854287147522,
      "learning_rate": 0.00037182723620928887,
      "loss": 3.1291,
      "step": 97470
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3210020065307617,
      "learning_rate": 0.00037182326461411824,
      "loss": 3.3312,
      "step": 97471
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.221327066421509,
      "learning_rate": 0.00037181929300559425,
      "loss": 3.1037,
      "step": 97472
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9289188385009766,
      "learning_rate": 0.00037181532138371766,
      "loss": 2.8899,
      "step": 97473
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.19687557220459,
      "learning_rate": 0.0003718113497484893,
      "loss": 2.9784,
      "step": 97474
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.843390941619873,
      "learning_rate": 0.0003718073780999099,
      "loss": 3.1547,
      "step": 97475
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.4864953756332397,
      "learning_rate": 0.00037180340643798004,
      "loss": 3.0769,
      "step": 97476
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6187397241592407,
      "learning_rate": 0.00037179943476270064,
      "loss": 2.9452,
      "step": 97477
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.05206298828125,
      "learning_rate": 0.0003717954630740724,
      "loss": 2.9635,
      "step": 97478
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.4187216758728027,
      "learning_rate": 0.00037179149137209596,
      "loss": 3.1869,
      "step": 97479
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.881212830543518,
      "learning_rate": 0.00037178751965677204,
      "loss": 3.1657,
      "step": 97480
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.5870492458343506,
      "learning_rate": 0.0003717835479281016,
      "loss": 3.106,
      "step": 97481
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0805859565734863,
      "learning_rate": 0.0003717795761860852,
      "loss": 2.8921,
      "step": 97482
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7698394060134888,
      "learning_rate": 0.00037177560443072365,
      "loss": 2.9708,
      "step": 97483
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3595023155212402,
      "learning_rate": 0.0003717716326620176,
      "loss": 2.909,
      "step": 97484
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1846184730529785,
      "learning_rate": 0.000371767660879968,
      "loss": 2.909,
      "step": 97485
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6540381908416748,
      "learning_rate": 0.00037176368908457526,
      "loss": 2.803,
      "step": 97486
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6721746921539307,
      "learning_rate": 0.0003717597172758404,
      "loss": 2.6096,
      "step": 97487
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.221687078475952,
      "learning_rate": 0.00037175574545376396,
      "loss": 2.9713,
      "step": 97488
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.87949800491333,
      "learning_rate": 0.00037175177361834687,
      "loss": 3.0002,
      "step": 97489
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7786744832992554,
      "learning_rate": 0.0003717478017695897,
      "loss": 3.3548,
      "step": 97490
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.924572229385376,
      "learning_rate": 0.00037174382990749327,
      "loss": 2.78,
      "step": 97491
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7624868154525757,
      "learning_rate": 0.0003717398580320583,
      "loss": 3.2426,
      "step": 97492
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6301519870758057,
      "learning_rate": 0.00037173588614328564,
      "loss": 2.9279,
      "step": 97493
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.4387402534484863,
      "learning_rate": 0.00037173191424117586,
      "loss": 2.9514,
      "step": 97494
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5665457248687744,
      "learning_rate": 0.0003717279423257297,
      "loss": 2.9485,
      "step": 97495
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9772586822509766,
      "learning_rate": 0.0003717239703969481,
      "loss": 3.1279,
      "step": 97496
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.666675090789795,
      "learning_rate": 0.0003717199984548315,
      "loss": 3.15,
      "step": 97497
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.033756732940674,
      "learning_rate": 0.0003717160264993809,
      "loss": 3.2386,
      "step": 97498
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8221757411956787,
      "learning_rate": 0.0003717120545305969,
      "loss": 3.0809,
      "step": 97499
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9976723194122314,
      "learning_rate": 0.0003717080825484803,
      "loss": 2.9609,
      "step": 97500
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.40822696685791,
      "learning_rate": 0.0003717041105530317,
      "loss": 3.3323,
      "step": 97501
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6640180349349976,
      "learning_rate": 0.0003717001385442522,
      "loss": 2.8413,
      "step": 97502
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7792338132858276,
      "learning_rate": 0.00037169616652214213,
      "loss": 3.0238,
      "step": 97503
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.709364891052246,
      "learning_rate": 0.00037169219448670237,
      "loss": 3.0844,
      "step": 97504
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6524027585983276,
      "learning_rate": 0.00037168822243793375,
      "loss": 3.0141,
      "step": 97505
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6025272607803345,
      "learning_rate": 0.00037168425037583687,
      "loss": 2.8048,
      "step": 97506
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5981793403625488,
      "learning_rate": 0.0003716802783004125,
      "loss": 2.9416,
      "step": 97507
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8000961542129517,
      "learning_rate": 0.00037167630621166153,
      "loss": 2.8622,
      "step": 97508
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5886305570602417,
      "learning_rate": 0.00037167233410958454,
      "loss": 2.9524,
      "step": 97509
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.019423246383667,
      "learning_rate": 0.00037166836199418226,
      "loss": 2.8432,
      "step": 97510
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8871577978134155,
      "learning_rate": 0.0003716643898654555,
      "loss": 3.011,
      "step": 97511
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9200267791748047,
      "learning_rate": 0.000371660417723405,
      "loss": 2.8651,
      "step": 97512
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.076178550720215,
      "learning_rate": 0.0003716564455680315,
      "loss": 2.6869,
      "step": 97513
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.7011210918426514,
      "learning_rate": 0.0003716524733993357,
      "loss": 3.0343,
      "step": 97514
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5330653190612793,
      "learning_rate": 0.00037164850121731833,
      "loss": 3.0251,
      "step": 97515
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5705132484436035,
      "learning_rate": 0.0003716445290219801,
      "loss": 2.8635,
      "step": 97516
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1079955101013184,
      "learning_rate": 0.0003716405568133219,
      "loss": 2.9471,
      "step": 97517
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.5887980461120605,
      "learning_rate": 0.0003716365845913443,
      "loss": 2.9467,
      "step": 97518
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6830496788024902,
      "learning_rate": 0.00037163261235604813,
      "loss": 3.0503,
      "step": 97519
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.068470001220703,
      "learning_rate": 0.0003716286401074341,
      "loss": 3.1113,
      "step": 97520
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7553203105926514,
      "learning_rate": 0.000371624667845503,
      "loss": 2.9843,
      "step": 97521
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6947237253189087,
      "learning_rate": 0.00037162069557025544,
      "loss": 3.1333,
      "step": 97522
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.4884032011032104,
      "learning_rate": 0.0003716167232816923,
      "loss": 2.9541,
      "step": 97523
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.4752888679504395,
      "learning_rate": 0.0003716127509798143,
      "loss": 2.8166,
      "step": 97524
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7360349893569946,
      "learning_rate": 0.00037160877866462204,
      "loss": 2.9927,
      "step": 97525
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.510215997695923,
      "learning_rate": 0.00037160480633611635,
      "loss": 2.9433,
      "step": 97526
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7067756652832031,
      "learning_rate": 0.0003716008339942981,
      "loss": 3.1867,
      "step": 97527
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.555757761001587,
      "learning_rate": 0.0003715968616391678,
      "loss": 3.0276,
      "step": 97528
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.151492118835449,
      "learning_rate": 0.0003715928892707263,
      "loss": 2.9219,
      "step": 97529
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5937222242355347,
      "learning_rate": 0.00037158891688897437,
      "loss": 2.9434,
      "step": 97530
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.800490140914917,
      "learning_rate": 0.00037158494449391266,
      "loss": 2.8995,
      "step": 97531
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7400126457214355,
      "learning_rate": 0.000371580972085542,
      "loss": 2.9468,
      "step": 97532
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6097803115844727,
      "learning_rate": 0.00037157699966386306,
      "loss": 3.0478,
      "step": 97533
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.4839272499084473,
      "learning_rate": 0.00037157302722887664,
      "loss": 2.9077,
      "step": 97534
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.5925536155700684,
      "learning_rate": 0.0003715690547805834,
      "loss": 2.9539,
      "step": 97535
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8258181810379028,
      "learning_rate": 0.0003715650823189841,
      "loss": 3.1506,
      "step": 97536
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6467864513397217,
      "learning_rate": 0.0003715611098440796,
      "loss": 3.0757,
      "step": 97537
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.917876958847046,
      "learning_rate": 0.00037155713735587045,
      "loss": 2.8913,
      "step": 97538
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0850613117218018,
      "learning_rate": 0.00037155316485435754,
      "loss": 2.8058,
      "step": 97539
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.946738600730896,
      "learning_rate": 0.00037154919233954153,
      "loss": 2.7747,
      "step": 97540
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6771483421325684,
      "learning_rate": 0.0003715452198114231,
      "loss": 2.8519,
      "step": 97541
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.900719404220581,
      "learning_rate": 0.00037154124727000316,
      "loss": 2.9958,
      "step": 97542
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.893515110015869,
      "learning_rate": 0.00037153727471528233,
      "loss": 3.1251,
      "step": 97543
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.39699125289917,
      "learning_rate": 0.0003715333021472613,
      "loss": 2.8104,
      "step": 97544
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9462848901748657,
      "learning_rate": 0.00037152932956594094,
      "loss": 3.1272,
      "step": 97545
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.816661834716797,
      "learning_rate": 0.00037152535697132196,
      "loss": 2.9891,
      "step": 97546
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.2644670009613037,
      "learning_rate": 0.0003715213843634049,
      "loss": 3.0497,
      "step": 97547
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3677453994750977,
      "learning_rate": 0.0003715174117421909,
      "loss": 2.7914,
      "step": 97548
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.828998327255249,
      "learning_rate": 0.0003715134391076803,
      "loss": 2.9443,
      "step": 97549
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1798784732818604,
      "learning_rate": 0.00037150946645987405,
      "loss": 3.1517,
      "step": 97550
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.666503667831421,
      "learning_rate": 0.0003715054937987729,
      "loss": 3.0585,
      "step": 97551
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9797476530075073,
      "learning_rate": 0.0003715015211243775,
      "loss": 2.791,
      "step": 97552
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7487043142318726,
      "learning_rate": 0.0003714975484366885,
      "loss": 2.857,
      "step": 97553
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8932971954345703,
      "learning_rate": 0.00037149357573570686,
      "loss": 3.1354,
      "step": 97554
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0242509841918945,
      "learning_rate": 0.0003714896030214332,
      "loss": 3.1561,
      "step": 97555
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8621108531951904,
      "learning_rate": 0.00037148563029386827,
      "loss": 2.9434,
      "step": 97556
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.311753273010254,
      "learning_rate": 0.00037148165755301286,
      "loss": 2.8769,
      "step": 97557
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8727394342422485,
      "learning_rate": 0.0003714776847988676,
      "loss": 3.0114,
      "step": 97558
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9189975261688232,
      "learning_rate": 0.00037147371203143326,
      "loss": 3.3758,
      "step": 97559
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.706146240234375,
      "learning_rate": 0.00037146973925071075,
      "loss": 3.1196,
      "step": 97560
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.692901849746704,
      "learning_rate": 0.0003714657664567005,
      "loss": 3.1288,
      "step": 97561
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1395821571350098,
      "learning_rate": 0.0003714617936494035,
      "loss": 2.5174,
      "step": 97562
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.763706922531128,
      "learning_rate": 0.00037145782082882045,
      "loss": 2.8792,
      "step": 97563
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6167033910751343,
      "learning_rate": 0.0003714538479949519,
      "loss": 3.145,
      "step": 97564
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7615976333618164,
      "learning_rate": 0.00037144987514779883,
      "loss": 3.168,
      "step": 97565
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9050877094268799,
      "learning_rate": 0.00037144590228736183,
      "loss": 2.9371,
      "step": 97566
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.4837803840637207,
      "learning_rate": 0.0003714419294136417,
      "loss": 3.0796,
      "step": 97567
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3969528675079346,
      "learning_rate": 0.0003714379565266392,
      "loss": 2.9143,
      "step": 97568
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.8304619789123535,
      "learning_rate": 0.000371433983626355,
      "loss": 3.0128,
      "step": 97569
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.823888897895813,
      "learning_rate": 0.00037143001071278993,
      "loss": 3.1106,
      "step": 97570
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5637272596359253,
      "learning_rate": 0.0003714260377859446,
      "loss": 2.9943,
      "step": 97571
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.841415286064148,
      "learning_rate": 0.0003714220648458199,
      "loss": 2.9143,
      "step": 97572
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.7000732421875,
      "learning_rate": 0.0003714180918924164,
      "loss": 2.8496,
      "step": 97573
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.837005615234375,
      "learning_rate": 0.00037141411892573507,
      "loss": 3.1985,
      "step": 97574
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7060551643371582,
      "learning_rate": 0.0003714101459457764,
      "loss": 3.3539,
      "step": 97575
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6719180345535278,
      "learning_rate": 0.00037140617295254125,
      "loss": 2.9364,
      "step": 97576
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.055032730102539,
      "learning_rate": 0.00037140219994603033,
      "loss": 3.0919,
      "step": 97577
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6483726501464844,
      "learning_rate": 0.0003713982269262444,
      "loss": 2.9466,
      "step": 97578
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9354579448699951,
      "learning_rate": 0.0003713942538931842,
      "loss": 2.8015,
      "step": 97579
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5084012746810913,
      "learning_rate": 0.0003713902808468504,
      "loss": 3.1254,
      "step": 97580
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5266867876052856,
      "learning_rate": 0.0003713863077872439,
      "loss": 3.0745,
      "step": 97581
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.4804351329803467,
      "learning_rate": 0.0003713823347143653,
      "loss": 2.829,
      "step": 97582
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0257301330566406,
      "learning_rate": 0.00037137836162821534,
      "loss": 2.8006,
      "step": 97583
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3269691467285156,
      "learning_rate": 0.00037137438852879486,
      "loss": 3.2586,
      "step": 97584
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6543337106704712,
      "learning_rate": 0.0003713704154161045,
      "loss": 3.0142,
      "step": 97585
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.776404857635498,
      "learning_rate": 0.00037136644229014503,
      "loss": 3.0466,
      "step": 97586
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.496213674545288,
      "learning_rate": 0.00037136246915091714,
      "loss": 2.8539,
      "step": 97587
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8856942653656006,
      "learning_rate": 0.00037135849599842177,
      "loss": 2.9387,
      "step": 97588
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.706028938293457,
      "learning_rate": 0.00037135452283265934,
      "loss": 2.6902,
      "step": 97589
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9891843795776367,
      "learning_rate": 0.0003713505496536308,
      "loss": 3.1842,
      "step": 97590
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8102558851242065,
      "learning_rate": 0.00037134657646133694,
      "loss": 3.1447,
      "step": 97591
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6186697483062744,
      "learning_rate": 0.0003713426032557784,
      "loss": 3.0198,
      "step": 97592
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9835624694824219,
      "learning_rate": 0.00037133863003695574,
      "loss": 3.0978,
      "step": 97593
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8089691400527954,
      "learning_rate": 0.0003713346568048701,
      "loss": 2.8938,
      "step": 97594
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9696980714797974,
      "learning_rate": 0.0003713306835595219,
      "loss": 2.9981,
      "step": 97595
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.127603530883789,
      "learning_rate": 0.00037132671030091195,
      "loss": 2.7601,
      "step": 97596
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.812382459640503,
      "learning_rate": 0.00037132273702904114,
      "loss": 3.1715,
      "step": 97597
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1252341270446777,
      "learning_rate": 0.00037131876374390995,
      "loss": 3.1875,
      "step": 97598
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8704380989074707,
      "learning_rate": 0.0003713147904455193,
      "loss": 3.1065,
      "step": 97599
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5645192861557007,
      "learning_rate": 0.0003713108171338699,
      "loss": 3.0156,
      "step": 97600
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8357585668563843,
      "learning_rate": 0.0003713068438089625,
      "loss": 2.7998,
      "step": 97601
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.992521286010742,
      "learning_rate": 0.00037130287047079777,
      "loss": 3.2619,
      "step": 97602
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.025369882583618,
      "learning_rate": 0.00037129889711937656,
      "loss": 2.9854,
      "step": 97603
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8350560665130615,
      "learning_rate": 0.0003712949237546994,
      "loss": 2.7557,
      "step": 97604
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.0358405113220215,
      "learning_rate": 0.0003712909503767673,
      "loss": 2.8912,
      "step": 97605
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.5505354404449463,
      "learning_rate": 0.0003712869769855808,
      "loss": 2.8455,
      "step": 97606
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.836812138557434,
      "learning_rate": 0.00037128300358114073,
      "loss": 3.1765,
      "step": 97607
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9026254415512085,
      "learning_rate": 0.0003712790301634477,
      "loss": 3.2149,
      "step": 97608
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.5992069244384766,
      "learning_rate": 0.0003712750567325027,
      "loss": 3.2077,
      "step": 97609
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.728827714920044,
      "learning_rate": 0.0003712710832883063,
      "loss": 2.9403,
      "step": 97610
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8653236627578735,
      "learning_rate": 0.0003712671098308592,
      "loss": 3.0727,
      "step": 97611
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9656305313110352,
      "learning_rate": 0.00037126313636016224,
      "loss": 3.2148,
      "step": 97612
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.743408441543579,
      "learning_rate": 0.00037125916287621607,
      "loss": 3.1147,
      "step": 97613
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.837060809135437,
      "learning_rate": 0.00037125518937902155,
      "loss": 2.9176,
      "step": 97614
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9486196041107178,
      "learning_rate": 0.0003712512158685793,
      "loss": 2.7907,
      "step": 97615
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5907458066940308,
      "learning_rate": 0.0003712472423448901,
      "loss": 2.9504,
      "step": 97616
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7212361097335815,
      "learning_rate": 0.0003712432688079547,
      "loss": 3.001,
      "step": 97617
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.4311141967773438,
      "learning_rate": 0.0003712392952577739,
      "loss": 2.9736,
      "step": 97618
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.4642205238342285,
      "learning_rate": 0.0003712353216943482,
      "loss": 2.885,
      "step": 97619
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.111298084259033,
      "learning_rate": 0.0003712313481176787,
      "loss": 2.7983,
      "step": 97620
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.761179804801941,
      "learning_rate": 0.0003712273745277658,
      "loss": 3.1098,
      "step": 97621
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8585634231567383,
      "learning_rate": 0.00037122340092461047,
      "loss": 2.9304,
      "step": 97622
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.024718999862671,
      "learning_rate": 0.00037121942730821333,
      "loss": 2.9835,
      "step": 97623
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.69210946559906,
      "learning_rate": 0.00037121545367857517,
      "loss": 2.9152,
      "step": 97624
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.544127345085144,
      "learning_rate": 0.0003712114800356967,
      "loss": 2.9099,
      "step": 97625
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.026198148727417,
      "learning_rate": 0.0003712075063795787,
      "loss": 3.0957,
      "step": 97626
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.064375877380371,
      "learning_rate": 0.00037120353271022187,
      "loss": 3.1541,
      "step": 97627
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8989558219909668,
      "learning_rate": 0.000371199559027627,
      "loss": 2.8586,
      "step": 97628
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8281958103179932,
      "learning_rate": 0.00037119558533179464,
      "loss": 2.9165,
      "step": 97629
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.2258594036102295,
      "learning_rate": 0.00037119161162272587,
      "loss": 2.9366,
      "step": 97630
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9606871604919434,
      "learning_rate": 0.0003711876379004211,
      "loss": 2.9699,
      "step": 97631
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.138638973236084,
      "learning_rate": 0.0003711836641648812,
      "loss": 2.7738,
      "step": 97632
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9802693128585815,
      "learning_rate": 0.00037117969041610706,
      "loss": 2.9139,
      "step": 97633
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.809516429901123,
      "learning_rate": 0.00037117571665409914,
      "loss": 2.7246,
      "step": 97634
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0894477367401123,
      "learning_rate": 0.0003711717428788583,
      "loss": 2.9563,
      "step": 97635
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.2320427894592285,
      "learning_rate": 0.0003711677690903854,
      "loss": 2.7081,
      "step": 97636
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6734601259231567,
      "learning_rate": 0.000371163795288681,
      "loss": 3.0295,
      "step": 97637
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8018556833267212,
      "learning_rate": 0.0003711598214737458,
      "loss": 2.9208,
      "step": 97638
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8471795320510864,
      "learning_rate": 0.0003711558476455808,
      "loss": 2.7782,
      "step": 97639
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.423495054244995,
      "learning_rate": 0.0003711518738041865,
      "loss": 3.0742,
      "step": 97640
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.83097243309021,
      "learning_rate": 0.00037114789994956377,
      "loss": 2.9657,
      "step": 97641
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.441364049911499,
      "learning_rate": 0.00037114392608171337,
      "loss": 2.9969,
      "step": 97642
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.3824105262756348,
      "learning_rate": 0.00037113995220063587,
      "loss": 3.0289,
      "step": 97643
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3115668296813965,
      "learning_rate": 0.0003711359783063321,
      "loss": 3.1515,
      "step": 97644
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7456282377243042,
      "learning_rate": 0.0003711320043988029,
      "loss": 2.8785,
      "step": 97645
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0054900646209717,
      "learning_rate": 0.0003711280304780489,
      "loss": 2.9233,
      "step": 97646
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.810518741607666,
      "learning_rate": 0.0003711240565440707,
      "loss": 2.8838,
      "step": 97647
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5506982803344727,
      "learning_rate": 0.0003711200825968694,
      "loss": 3.1762,
      "step": 97648
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.999636173248291,
      "learning_rate": 0.0003711161086364454,
      "loss": 2.937,
      "step": 97649
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9411671161651611,
      "learning_rate": 0.0003711121346627997,
      "loss": 2.9028,
      "step": 97650
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9626609086990356,
      "learning_rate": 0.00037110816067593283,
      "loss": 3.0491,
      "step": 97651
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5584933757781982,
      "learning_rate": 0.00037110418667584555,
      "loss": 3.1974,
      "step": 97652
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7814059257507324,
      "learning_rate": 0.0003711002126625387,
      "loss": 3.1416,
      "step": 97653
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6630147695541382,
      "learning_rate": 0.000371096238636013,
      "loss": 3.1687,
      "step": 97654
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8255739212036133,
      "learning_rate": 0.00037109226459626916,
      "loss": 3.1016,
      "step": 97655
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7174876928329468,
      "learning_rate": 0.00037108829054330794,
      "loss": 2.8974,
      "step": 97656
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9137611389160156,
      "learning_rate": 0.0003710843164771301,
      "loss": 3.0941,
      "step": 97657
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5062254667282104,
      "learning_rate": 0.0003710803423977363,
      "loss": 3.0036,
      "step": 97658
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5443599224090576,
      "learning_rate": 0.0003710763683051273,
      "loss": 3.1962,
      "step": 97659
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3343393802642822,
      "learning_rate": 0.00037107239419930386,
      "loss": 2.9744,
      "step": 97660
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.187279462814331,
      "learning_rate": 0.00037106842008026675,
      "loss": 3.0439,
      "step": 97661
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1554102897644043,
      "learning_rate": 0.0003710644459480167,
      "loss": 3.0142,
      "step": 97662
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7766884565353394,
      "learning_rate": 0.00037106047180255437,
      "loss": 3.2532,
      "step": 97663
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.9659993648529053,
      "learning_rate": 0.0003710564976438806,
      "loss": 3.0228,
      "step": 97664
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.760453462600708,
      "learning_rate": 0.000371052523471996,
      "loss": 2.7523,
      "step": 97665
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8097233772277832,
      "learning_rate": 0.00037104854928690157,
      "loss": 3.1417,
      "step": 97666
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.77055823802948,
      "learning_rate": 0.0003710445750885977,
      "loss": 3.3136,
      "step": 97667
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.2886829376220703,
      "learning_rate": 0.00037104060087708545,
      "loss": 3.0278,
      "step": 97668
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7044979333877563,
      "learning_rate": 0.0003710366266523653,
      "loss": 2.8983,
      "step": 97669
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.7288002967834473,
      "learning_rate": 0.00037103265241443816,
      "loss": 2.714,
      "step": 97670
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.2024526596069336,
      "learning_rate": 0.0003710286781633046,
      "loss": 2.8149,
      "step": 97671
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.8290927410125732,
      "learning_rate": 0.0003710247038989656,
      "loss": 3.0814,
      "step": 97672
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.70133376121521,
      "learning_rate": 0.0003710207296214218,
      "loss": 3.0747,
      "step": 97673
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1365420818328857,
      "learning_rate": 0.00037101675533067377,
      "loss": 3.0827,
      "step": 97674
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.2805685997009277,
      "learning_rate": 0.00037101278102672245,
      "loss": 3.0161,
      "step": 97675
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6657147407531738,
      "learning_rate": 0.00037100880670956854,
      "loss": 3.1405,
      "step": 97676
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.735378623008728,
      "learning_rate": 0.0003710048323792127,
      "loss": 3.068,
      "step": 97677
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8754454851150513,
      "learning_rate": 0.0003710008580356557,
      "loss": 3.0772,
      "step": 97678
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6782832145690918,
      "learning_rate": 0.00037099688367889835,
      "loss": 2.7909,
      "step": 97679
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.421431541442871,
      "learning_rate": 0.0003709929093089413,
      "loss": 2.9598,
      "step": 97680
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.078795909881592,
      "learning_rate": 0.0003709889349257854,
      "loss": 3.0924,
      "step": 97681
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7662404775619507,
      "learning_rate": 0.0003709849605294313,
      "loss": 3.0606,
      "step": 97682
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.683858633041382,
      "learning_rate": 0.0003709809861198797,
      "loss": 3.2682,
      "step": 97683
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.33152437210083,
      "learning_rate": 0.00037097701169713136,
      "loss": 3.146,
      "step": 97684
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.9800541400909424,
      "learning_rate": 0.0003709730372611872,
      "loss": 2.9771,
      "step": 97685
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.2022206783294678,
      "learning_rate": 0.0003709690628120477,
      "loss": 2.6376,
      "step": 97686
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.865027904510498,
      "learning_rate": 0.00037096508834971373,
      "loss": 2.9933,
      "step": 97687
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5283217430114746,
      "learning_rate": 0.0003709611138741861,
      "loss": 2.9034,
      "step": 97688
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1766679286956787,
      "learning_rate": 0.00037095713938546535,
      "loss": 3.0323,
      "step": 97689
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.442434072494507,
      "learning_rate": 0.0003709531648835523,
      "loss": 3.2712,
      "step": 97690
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8437573909759521,
      "learning_rate": 0.00037094919036844787,
      "loss": 3.0355,
      "step": 97691
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8021787405014038,
      "learning_rate": 0.0003709452158401525,
      "loss": 3.0124,
      "step": 97692
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3982598781585693,
      "learning_rate": 0.00037094124129866715,
      "loss": 3.1685,
      "step": 97693
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.888751745223999,
      "learning_rate": 0.0003709372667439925,
      "loss": 3.1808,
      "step": 97694
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8511502742767334,
      "learning_rate": 0.0003709332921761292,
      "loss": 3.3315,
      "step": 97695
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.920774221420288,
      "learning_rate": 0.00037092931759507804,
      "loss": 2.9829,
      "step": 97696
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.6050333976745605,
      "learning_rate": 0.0003709253430008399,
      "loss": 2.9036,
      "step": 97697
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.617448091506958,
      "learning_rate": 0.00037092136839341535,
      "loss": 3.1812,
      "step": 97698
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5425447225570679,
      "learning_rate": 0.0003709173937728052,
      "loss": 3.0304,
      "step": 97699
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.015446901321411,
      "learning_rate": 0.0003709134191390101,
      "loss": 2.9159,
      "step": 97700
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1036083698272705,
      "learning_rate": 0.0003709094444920309,
      "loss": 2.8971,
      "step": 97701
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5491145849227905,
      "learning_rate": 0.0003709054698318683,
      "loss": 2.9491,
      "step": 97702
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.453920364379883,
      "learning_rate": 0.000370901495158523,
      "loss": 2.8544,
      "step": 97703
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.467406749725342,
      "learning_rate": 0.0003708975204719958,
      "loss": 2.7452,
      "step": 97704
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8837225437164307,
      "learning_rate": 0.0003708935457722874,
      "loss": 3.1434,
      "step": 97705
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9136030673980713,
      "learning_rate": 0.00037088957105939856,
      "loss": 3.1578,
      "step": 97706
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0718958377838135,
      "learning_rate": 0.00037088559633333004,
      "loss": 2.9199,
      "step": 97707
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.4546715021133423,
      "learning_rate": 0.0003708816215940825,
      "loss": 3.0758,
      "step": 97708
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8436907529830933,
      "learning_rate": 0.00037087764684165677,
      "loss": 2.8926,
      "step": 97709
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1093804836273193,
      "learning_rate": 0.0003708736720760535,
      "loss": 2.9324,
      "step": 97710
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.106480360031128,
      "learning_rate": 0.00037086969729727354,
      "loss": 2.9751,
      "step": 97711
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1051056385040283,
      "learning_rate": 0.00037086572250531757,
      "loss": 3.2673,
      "step": 97712
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.883948802947998,
      "learning_rate": 0.00037086174770018627,
      "loss": 3.0361,
      "step": 97713
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1783251762390137,
      "learning_rate": 0.00037085777288188045,
      "loss": 2.841,
      "step": 97714
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9661962985992432,
      "learning_rate": 0.0003708537980504008,
      "loss": 2.9062,
      "step": 97715
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.228466272354126,
      "learning_rate": 0.0003708498232057481,
      "loss": 3.134,
      "step": 97716
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6194920539855957,
      "learning_rate": 0.0003708458483479231,
      "loss": 2.916,
      "step": 97717
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.6405346393585205,
      "learning_rate": 0.00037084187347692654,
      "loss": 2.9132,
      "step": 97718
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8523876667022705,
      "learning_rate": 0.0003708378985927591,
      "loss": 3.0502,
      "step": 97719
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9213659763336182,
      "learning_rate": 0.0003708339236954216,
      "loss": 3.0384,
      "step": 97720
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9936065673828125,
      "learning_rate": 0.0003708299487849147,
      "loss": 3.1161,
      "step": 97721
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9681055545806885,
      "learning_rate": 0.00037082597386123924,
      "loss": 3.0141,
      "step": 97722
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8241889476776123,
      "learning_rate": 0.00037082199892439586,
      "loss": 3.0451,
      "step": 97723
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9711887836456299,
      "learning_rate": 0.0003708180239743852,
      "loss": 3.3284,
      "step": 97724
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8964853286743164,
      "learning_rate": 0.00037081404901120836,
      "loss": 3.182,
      "step": 97725
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8289629220962524,
      "learning_rate": 0.00037081007403486576,
      "loss": 3.0558,
      "step": 97726
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9829158782958984,
      "learning_rate": 0.0003708060990453581,
      "loss": 2.8495,
      "step": 97727
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7808589935302734,
      "learning_rate": 0.00037080212404268643,
      "loss": 3.2281,
      "step": 97728
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7664742469787598,
      "learning_rate": 0.0003707981490268512,
      "loss": 2.867,
      "step": 97729
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.348393201828003,
      "learning_rate": 0.0003707941739978533,
      "loss": 2.844,
      "step": 97730
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0723562240600586,
      "learning_rate": 0.00037079019895569347,
      "loss": 2.9552,
      "step": 97731
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.636375665664673,
      "learning_rate": 0.0003707862239003724,
      "loss": 3.0961,
      "step": 97732
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8512264490127563,
      "learning_rate": 0.00037078224883189075,
      "loss": 2.8823,
      "step": 97733
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.165529489517212,
      "learning_rate": 0.0003707782737502494,
      "loss": 3.2185,
      "step": 97734
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.618282675743103,
      "learning_rate": 0.00037077429865544904,
      "loss": 3.0787,
      "step": 97735
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.008193016052246,
      "learning_rate": 0.00037077032354749037,
      "loss": 2.7046,
      "step": 97736
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.345534086227417,
      "learning_rate": 0.00037076634842637425,
      "loss": 3.0359,
      "step": 97737
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8689097166061401,
      "learning_rate": 0.0003707623732921011,
      "loss": 2.861,
      "step": 97738
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.200377941131592,
      "learning_rate": 0.00037075839814467213,
      "loss": 3.2115,
      "step": 97739
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.137962579727173,
      "learning_rate": 0.00037075442298408775,
      "loss": 3.0827,
      "step": 97740
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.074686288833618,
      "learning_rate": 0.0003707504478103488,
      "loss": 2.9302,
      "step": 97741
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.4416258335113525,
      "learning_rate": 0.000370746472623456,
      "loss": 3.0855,
      "step": 97742
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.730550765991211,
      "learning_rate": 0.0003707424974234101,
      "loss": 2.8811,
      "step": 97743
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.498714566230774,
      "learning_rate": 0.00037073852221021183,
      "loss": 3.1078,
      "step": 97744
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.76292085647583,
      "learning_rate": 0.00037073454698386186,
      "loss": 2.9759,
      "step": 97745
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7026208639144897,
      "learning_rate": 0.0003707305717443611,
      "loss": 3.1973,
      "step": 97746
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8391015529632568,
      "learning_rate": 0.0003707265964917102,
      "loss": 2.9208,
      "step": 97747
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8698843717575073,
      "learning_rate": 0.00037072262122590983,
      "loss": 2.775,
      "step": 97748
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.194216012954712,
      "learning_rate": 0.00037071864594696077,
      "loss": 3.1049,
      "step": 97749
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6460046768188477,
      "learning_rate": 0.0003707146706548639,
      "loss": 3.1874,
      "step": 97750
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.887284278869629,
      "learning_rate": 0.00037071069534961974,
      "loss": 2.8981,
      "step": 97751
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.595373511314392,
      "learning_rate": 0.00037070672003122914,
      "loss": 2.8251,
      "step": 97752
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.764392375946045,
      "learning_rate": 0.00037070274469969274,
      "loss": 3.019,
      "step": 97753
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0186238288879395,
      "learning_rate": 0.00037069876935501154,
      "loss": 3.0056,
      "step": 97754
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7355645895004272,
      "learning_rate": 0.000370694793997186,
      "loss": 3.0201,
      "step": 97755
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6258418560028076,
      "learning_rate": 0.000370690818626217,
      "loss": 2.8526,
      "step": 97756
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.008819818496704,
      "learning_rate": 0.00037068684324210516,
      "loss": 2.7244,
      "step": 97757
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8209643363952637,
      "learning_rate": 0.00037068286784485143,
      "loss": 3.1704,
      "step": 97758
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.4996113777160645,
      "learning_rate": 0.00037067889243445635,
      "loss": 3.1369,
      "step": 97759
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.0140573978424072,
      "learning_rate": 0.00037067491701092074,
      "loss": 2.9363,
      "step": 97760
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6194169521331787,
      "learning_rate": 0.00037067094157424536,
      "loss": 2.9788,
      "step": 97761
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.2018964290618896,
      "learning_rate": 0.00037066696612443086,
      "loss": 3.152,
      "step": 97762
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7517327070236206,
      "learning_rate": 0.00037066299066147804,
      "loss": 3.1518,
      "step": 97763
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6325185298919678,
      "learning_rate": 0.00037065901518538774,
      "loss": 3.0581,
      "step": 97764
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8169922828674316,
      "learning_rate": 0.00037065503969616043,
      "loss": 3.0114,
      "step": 97765
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0963315963745117,
      "learning_rate": 0.00037065106419379714,
      "loss": 2.9065,
      "step": 97766
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.4679412841796875,
      "learning_rate": 0.0003706470886782985,
      "loss": 2.9355,
      "step": 97767
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.29543137550354,
      "learning_rate": 0.00037064311314966513,
      "loss": 3.0315,
      "step": 97768
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8974796533584595,
      "learning_rate": 0.00037063913760789787,
      "loss": 2.8703,
      "step": 97769
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8479397296905518,
      "learning_rate": 0.00037063516205299756,
      "loss": 2.9599,
      "step": 97770
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8479360342025757,
      "learning_rate": 0.0003706311864849648,
      "loss": 2.9564,
      "step": 97771
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7544413805007935,
      "learning_rate": 0.00037062721090380034,
      "loss": 2.7626,
      "step": 97772
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9776954650878906,
      "learning_rate": 0.00037062323530950495,
      "loss": 2.9855,
      "step": 97773
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.745583176612854,
      "learning_rate": 0.0003706192597020794,
      "loss": 3.1599,
      "step": 97774
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9576923847198486,
      "learning_rate": 0.00037061528408152434,
      "loss": 3.0087,
      "step": 97775
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7657393217086792,
      "learning_rate": 0.00037061130844784064,
      "loss": 2.9699,
      "step": 97776
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7183923721313477,
      "learning_rate": 0.000370607332801029,
      "loss": 3.0108,
      "step": 97777
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.875244379043579,
      "learning_rate": 0.00037060335714109,
      "loss": 2.8189,
      "step": 97778
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3632972240448,
      "learning_rate": 0.00037059938146802453,
      "loss": 2.9471,
      "step": 97779
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5536181926727295,
      "learning_rate": 0.00037059540578183336,
      "loss": 2.7964,
      "step": 97780
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.47878098487854,
      "learning_rate": 0.0003705914300825171,
      "loss": 3.2157,
      "step": 97781
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7621989250183105,
      "learning_rate": 0.0003705874543700767,
      "loss": 3.0495,
      "step": 97782
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.2458696365356445,
      "learning_rate": 0.00037058347864451266,
      "loss": 3.1067,
      "step": 97783
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.723708152770996,
      "learning_rate": 0.00037057950290582577,
      "loss": 3.0574,
      "step": 97784
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8525567054748535,
      "learning_rate": 0.0003705755271540169,
      "loss": 3.0866,
      "step": 97785
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8480093479156494,
      "learning_rate": 0.00037057155138908667,
      "loss": 2.8625,
      "step": 97786
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.094200611114502,
      "learning_rate": 0.0003705675756110358,
      "loss": 3.0273,
      "step": 97787
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.4697784185409546,
      "learning_rate": 0.00037056359981986513,
      "loss": 2.7915,
      "step": 97788
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1844499111175537,
      "learning_rate": 0.0003705596240155754,
      "loss": 2.9002,
      "step": 97789
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.921613335609436,
      "learning_rate": 0.0003705556481981673,
      "loss": 3.0294,
      "step": 97790
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.331907272338867,
      "learning_rate": 0.0003705516723676415,
      "loss": 2.9947,
      "step": 97791
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1422128677368164,
      "learning_rate": 0.0003705476965239989,
      "loss": 2.8894,
      "step": 97792
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.9411699771881104,
      "learning_rate": 0.0003705437206672401,
      "loss": 3.0854,
      "step": 97793
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.5313642024993896,
      "learning_rate": 0.0003705397447973659,
      "loss": 3.3362,
      "step": 97794
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6437872648239136,
      "learning_rate": 0.00037053576891437706,
      "loss": 2.8194,
      "step": 97795
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7816963195800781,
      "learning_rate": 0.00037053179301827426,
      "loss": 3.1903,
      "step": 97796
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.7295002937316895,
      "learning_rate": 0.00037052781710905826,
      "loss": 3.0574,
      "step": 97797
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0565617084503174,
      "learning_rate": 0.0003705238411867299,
      "loss": 3.2482,
      "step": 97798
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.736033320426941,
      "learning_rate": 0.00037051986525128965,
      "loss": 3.0243,
      "step": 97799
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9320849180221558,
      "learning_rate": 0.0003705158893027385,
      "loss": 3.0288,
      "step": 97800
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.726579189300537,
      "learning_rate": 0.0003705119133410772,
      "loss": 3.0663,
      "step": 97801
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3777174949645996,
      "learning_rate": 0.00037050793736630633,
      "loss": 3.0095,
      "step": 97802
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0715832710266113,
      "learning_rate": 0.0003705039613784267,
      "loss": 3.1301,
      "step": 97803
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0163276195526123,
      "learning_rate": 0.0003704999853774391,
      "loss": 2.9076,
      "step": 97804
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.571741819381714,
      "learning_rate": 0.0003704960093633442,
      "loss": 2.9164,
      "step": 97805
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0278704166412354,
      "learning_rate": 0.0003704920333361427,
      "loss": 2.9009,
      "step": 97806
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9685713052749634,
      "learning_rate": 0.00037048805729583554,
      "loss": 2.9572,
      "step": 97807
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.5773589611053467,
      "learning_rate": 0.0003704840812424232,
      "loss": 3.0528,
      "step": 97808
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3490982055664062,
      "learning_rate": 0.0003704801051759065,
      "loss": 2.5335,
      "step": 97809
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.48948073387146,
      "learning_rate": 0.00037047612909628635,
      "loss": 2.8212,
      "step": 97810
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.3487677574157715,
      "learning_rate": 0.0003704721530035633,
      "loss": 3.2505,
      "step": 97811
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.393001079559326,
      "learning_rate": 0.0003704681768977381,
      "loss": 3.1194,
      "step": 97812
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0170035362243652,
      "learning_rate": 0.0003704642007788117,
      "loss": 2.9479,
      "step": 97813
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0380547046661377,
      "learning_rate": 0.00037046022464678444,
      "loss": 2.8538,
      "step": 97814
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7572537660598755,
      "learning_rate": 0.0003704562485016574,
      "loss": 2.8423,
      "step": 97815
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.650103211402893,
      "learning_rate": 0.0003704522723434313,
      "loss": 3.0721,
      "step": 97816
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0347914695739746,
      "learning_rate": 0.0003704482961721067,
      "loss": 3.1616,
      "step": 97817
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1650969982147217,
      "learning_rate": 0.0003704443199876844,
      "loss": 3.0144,
      "step": 97818
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0190956592559814,
      "learning_rate": 0.00037044034379016526,
      "loss": 2.9968,
      "step": 97819
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.25835919380188,
      "learning_rate": 0.00037043636757954984,
      "loss": 3.011,
      "step": 97820
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.292658567428589,
      "learning_rate": 0.00037043239135583903,
      "loss": 2.835,
      "step": 97821
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.029343366622925,
      "learning_rate": 0.00037042841511903356,
      "loss": 2.8842,
      "step": 97822
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.657689094543457,
      "learning_rate": 0.000370424438869134,
      "loss": 3.0283,
      "step": 97823
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.840325117111206,
      "learning_rate": 0.00037042046260614127,
      "loss": 2.9743,
      "step": 97824
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.870944023132324,
      "learning_rate": 0.00037041648633005607,
      "loss": 3.1839,
      "step": 97825
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.616571307182312,
      "learning_rate": 0.00037041251004087903,
      "loss": 3.1214,
      "step": 97826
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7756403684616089,
      "learning_rate": 0.00037040853373861106,
      "loss": 2.8814,
      "step": 97827
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8965542316436768,
      "learning_rate": 0.0003704045574232528,
      "loss": 2.8853,
      "step": 97828
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.538821220397949,
      "learning_rate": 0.00037040058109480493,
      "loss": 2.8686,
      "step": 97829
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7361706495285034,
      "learning_rate": 0.0003703966047532683,
      "loss": 3.319,
      "step": 97830
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6950109004974365,
      "learning_rate": 0.00037039262839864365,
      "loss": 2.9226,
      "step": 97831
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.566076397895813,
      "learning_rate": 0.0003703886520309317,
      "loss": 3.0876,
      "step": 97832
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8125758171081543,
      "learning_rate": 0.00037038467565013314,
      "loss": 2.7478,
      "step": 97833
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6205554008483887,
      "learning_rate": 0.00037038069925624873,
      "loss": 3.1537,
      "step": 97834
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7364389896392822,
      "learning_rate": 0.0003703767228492792,
      "loss": 3.1289,
      "step": 97835
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6379989385604858,
      "learning_rate": 0.00037037274642922534,
      "loss": 2.9046,
      "step": 97836
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6502740383148193,
      "learning_rate": 0.0003703687699960878,
      "loss": 3.109,
      "step": 97837
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6298555135726929,
      "learning_rate": 0.00037036479354986745,
      "loss": 3.0799,
      "step": 97838
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1089494228363037,
      "learning_rate": 0.00037036081709056494,
      "loss": 3.0102,
      "step": 97839
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.01349139213562,
      "learning_rate": 0.00037035684061818104,
      "loss": 2.9386,
      "step": 97840
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7479885816574097,
      "learning_rate": 0.0003703528641327164,
      "loss": 2.9594,
      "step": 97841
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1163971424102783,
      "learning_rate": 0.00037034888763417195,
      "loss": 2.9497,
      "step": 97842
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.1015076637268066,
      "learning_rate": 0.0003703449111225483,
      "loss": 3.0684,
      "step": 97843
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6725678443908691,
      "learning_rate": 0.00037034093459784604,
      "loss": 3.0103,
      "step": 97844
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9098656177520752,
      "learning_rate": 0.0003703369580600662,
      "loss": 3.1927,
      "step": 97845
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.776133418083191,
      "learning_rate": 0.0003703329815092095,
      "loss": 2.9591,
      "step": 97846
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.732932686805725,
      "learning_rate": 0.0003703290049452764,
      "loss": 3.1848,
      "step": 97847
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8926628828048706,
      "learning_rate": 0.0003703250283682679,
      "loss": 3.1548,
      "step": 97848
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8205583095550537,
      "learning_rate": 0.00037032105177818456,
      "loss": 3.2076,
      "step": 97849
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7579522132873535,
      "learning_rate": 0.0003703170751750273,
      "loss": 2.966,
      "step": 97850
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5716397762298584,
      "learning_rate": 0.0003703130985587967,
      "loss": 2.8972,
      "step": 97851
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5584094524383545,
      "learning_rate": 0.00037030912192949356,
      "loss": 2.8777,
      "step": 97852
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7931078672409058,
      "learning_rate": 0.00037030514528711873,
      "loss": 3.0477,
      "step": 97853
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.810941696166992,
      "learning_rate": 0.0003703011686316727,
      "loss": 3.0162,
      "step": 97854
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6081037521362305,
      "learning_rate": 0.0003702971919631564,
      "loss": 3.0942,
      "step": 97855
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.019878387451172,
      "learning_rate": 0.0003702932152815707,
      "loss": 3.0236,
      "step": 97856
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.271899461746216,
      "learning_rate": 0.00037028923858691595,
      "loss": 2.9457,
      "step": 97857
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6024812459945679,
      "learning_rate": 0.00037028526187919317,
      "loss": 3.0667,
      "step": 97858
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8778575658798218,
      "learning_rate": 0.0003702812851584031,
      "loss": 3.0628,
      "step": 97859
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8700350522994995,
      "learning_rate": 0.0003702773084245463,
      "loss": 3.0529,
      "step": 97860
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8634312152862549,
      "learning_rate": 0.0003702733316776237,
      "loss": 2.8781,
      "step": 97861
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0449888706207275,
      "learning_rate": 0.00037026935491763597,
      "loss": 3.0226,
      "step": 97862
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.294198513031006,
      "learning_rate": 0.0003702653781445838,
      "loss": 2.9882,
      "step": 97863
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.731823444366455,
      "learning_rate": 0.000370261401358468,
      "loss": 2.8747,
      "step": 97864
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.6606197357177734,
      "learning_rate": 0.0003702574245592893,
      "loss": 2.9529,
      "step": 97865
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9901622533798218,
      "learning_rate": 0.0003702534477470483,
      "loss": 2.7827,
      "step": 97866
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.912407636642456,
      "learning_rate": 0.00037024947092174595,
      "loss": 2.9111,
      "step": 97867
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8769954442977905,
      "learning_rate": 0.0003702454940833829,
      "loss": 2.9022,
      "step": 97868
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9396262168884277,
      "learning_rate": 0.00037024151723195984,
      "loss": 2.9778,
      "step": 97869
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.721724510192871,
      "learning_rate": 0.0003702375403674776,
      "loss": 3.078,
      "step": 97870
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.709584951400757,
      "learning_rate": 0.00037023356348993687,
      "loss": 2.8305,
      "step": 97871
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9144446849822998,
      "learning_rate": 0.0003702295865993384,
      "loss": 2.8975,
      "step": 97872
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.3639442920684814,
      "learning_rate": 0.00037022560969568284,
      "loss": 2.9553,
      "step": 97873
    },
    {
      "epoch": 1.27,
      "grad_norm": 3.1141021251678467,
      "learning_rate": 0.0003702216327789712,
      "loss": 3.052,
      "step": 97874
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.623119592666626,
      "learning_rate": 0.0003702176558492038,
      "loss": 2.9427,
      "step": 97875
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9269875288009644,
      "learning_rate": 0.0003702136789063818,
      "loss": 3.0356,
      "step": 97876
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8432151079177856,
      "learning_rate": 0.0003702097019505057,
      "loss": 3.0154,
      "step": 97877
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8310428857803345,
      "learning_rate": 0.00037020572498157626,
      "loss": 2.8931,
      "step": 97878
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.326540946960449,
      "learning_rate": 0.00037020174799959425,
      "loss": 2.8635,
      "step": 97879
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9371310472488403,
      "learning_rate": 0.0003701977710045604,
      "loss": 3.0568,
      "step": 97880
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.4918208122253418,
      "learning_rate": 0.0003701937939964755,
      "loss": 3.1536,
      "step": 97881
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6846436262130737,
      "learning_rate": 0.0003701898169753402,
      "loss": 3.098,
      "step": 97882
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6465644836425781,
      "learning_rate": 0.00037018583994115535,
      "loss": 3.0893,
      "step": 97883
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9238507747650146,
      "learning_rate": 0.0003701818628939216,
      "loss": 2.8919,
      "step": 97884
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7338504791259766,
      "learning_rate": 0.00037017788583363964,
      "loss": 2.7068,
      "step": 97885
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7388637065887451,
      "learning_rate": 0.00037017390876031045,
      "loss": 3.1342,
      "step": 97886
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8786731958389282,
      "learning_rate": 0.0003701699316739345,
      "loss": 3.0266,
      "step": 97887
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7305476665496826,
      "learning_rate": 0.00037016595457451263,
      "loss": 2.8903,
      "step": 97888
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.2641632556915283,
      "learning_rate": 0.0003701619774620456,
      "loss": 2.8255,
      "step": 97889
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7352104187011719,
      "learning_rate": 0.0003701580003365342,
      "loss": 2.8059,
      "step": 97890
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.0238802433013916,
      "learning_rate": 0.000370154023197979,
      "loss": 2.7691,
      "step": 97891
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.5711538791656494,
      "learning_rate": 0.00037015004604638087,
      "loss": 2.9596,
      "step": 97892
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7816002368927002,
      "learning_rate": 0.0003701460688817405,
      "loss": 2.9409,
      "step": 97893
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.293741464614868,
      "learning_rate": 0.00037014209170405874,
      "loss": 2.9357,
      "step": 97894
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.5626821517944336,
      "learning_rate": 0.00037013811451333626,
      "loss": 2.7406,
      "step": 97895
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9871934652328491,
      "learning_rate": 0.00037013413730957364,
      "loss": 2.925,
      "step": 97896
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.155217170715332,
      "learning_rate": 0.0003701301600927718,
      "loss": 2.9898,
      "step": 97897
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.5877411365509033,
      "learning_rate": 0.00037012618286293153,
      "loss": 3.0652,
      "step": 97898
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.777469277381897,
      "learning_rate": 0.0003701222056200534,
      "loss": 3.0971,
      "step": 97899
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.78590989112854,
      "learning_rate": 0.0003701182283641382,
      "loss": 3.0486,
      "step": 97900
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.7491482496261597,
      "learning_rate": 0.0003701142510951868,
      "loss": 2.9416,
      "step": 97901
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8967087268829346,
      "learning_rate": 0.00037011027381319975,
      "loss": 2.9754,
      "step": 97902
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.9742950201034546,
      "learning_rate": 0.0003701062965181779,
      "loss": 3.0263,
      "step": 97903
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.067111015319824,
      "learning_rate": 0.00037010231921012207,
      "loss": 2.8802,
      "step": 97904
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.185633420944214,
      "learning_rate": 0.00037009834188903273,
      "loss": 3.034,
      "step": 97905
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6392065286636353,
      "learning_rate": 0.00037009436455491085,
      "loss": 3.0644,
      "step": 97906
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5824713706970215,
      "learning_rate": 0.0003700903872077572,
      "loss": 3.0228,
      "step": 97907
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.383110761642456,
      "learning_rate": 0.0003700864098475722,
      "loss": 2.872,
      "step": 97908
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6560509204864502,
      "learning_rate": 0.000370082432474357,
      "loss": 2.759,
      "step": 97909
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8804658651351929,
      "learning_rate": 0.0003700784550881121,
      "loss": 2.8448,
      "step": 97910
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.208946704864502,
      "learning_rate": 0.00037007447768883827,
      "loss": 3.0682,
      "step": 97911
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.4452714920043945,
      "learning_rate": 0.0003700705002765363,
      "loss": 2.9476,
      "step": 97912
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.689572811126709,
      "learning_rate": 0.00037006652285120694,
      "loss": 2.9755,
      "step": 97913
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.5407299995422363,
      "learning_rate": 0.0003700625454128508,
      "loss": 2.9038,
      "step": 97914
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.6018487215042114,
      "learning_rate": 0.00037005856796146875,
      "loss": 3.0452,
      "step": 97915
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.8929758071899414,
      "learning_rate": 0.00037005459049706154,
      "loss": 2.8827,
      "step": 97916
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.900614857673645,
      "learning_rate": 0.00037005061301962976,
      "loss": 2.8544,
      "step": 97917
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.285923957824707,
      "learning_rate": 0.00037004663552917433,
      "loss": 2.9274,
      "step": 97918
    },
    {
      "epoch": 1.27,
      "grad_norm": 1.5597927570343018,
      "learning_rate": 0.00037004265802569584,
      "loss": 3.0033,
      "step": 97919
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.287334442138672,
      "learning_rate": 0.0003700386805091952,
      "loss": 3.0369,
      "step": 97920
    },
    {
      "epoch": 1.27,
      "grad_norm": 2.206394910812378,
      "learning_rate": 0.00037003470297967294,
      "loss": 2.9019,
      "step": 97921
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.271977424621582,
      "learning_rate": 0.00037003072543712994,
      "loss": 2.9245,
      "step": 97922
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9453927278518677,
      "learning_rate": 0.000370026747881567,
      "loss": 3.131,
      "step": 97923
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.2727622985839844,
      "learning_rate": 0.00037002277031298464,
      "loss": 2.9865,
      "step": 97924
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.2264578342437744,
      "learning_rate": 0.0003700187927313838,
      "loss": 2.6773,
      "step": 97925
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.835228681564331,
      "learning_rate": 0.00037001481513676505,
      "loss": 2.8213,
      "step": 97926
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.7368226051330566,
      "learning_rate": 0.00037001083752912937,
      "loss": 2.8952,
      "step": 97927
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.575709342956543,
      "learning_rate": 0.00037000685990847725,
      "loss": 2.8889,
      "step": 97928
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6349608898162842,
      "learning_rate": 0.0003700028822748095,
      "loss": 3.1215,
      "step": 97929
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0981857776641846,
      "learning_rate": 0.000369998904628127,
      "loss": 3.0138,
      "step": 97930
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.4296858310699463,
      "learning_rate": 0.0003699949269684303,
      "loss": 3.032,
      "step": 97931
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.6699438095092773,
      "learning_rate": 0.0003699909492957202,
      "loss": 3.0192,
      "step": 97932
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7452514171600342,
      "learning_rate": 0.0003699869716099975,
      "loss": 3.0937,
      "step": 97933
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.197425127029419,
      "learning_rate": 0.000369982993911263,
      "loss": 3.0404,
      "step": 97934
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.0909013748168945,
      "learning_rate": 0.00036997901619951725,
      "loss": 3.1889,
      "step": 97935
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8304803371429443,
      "learning_rate": 0.00036997503847476106,
      "loss": 2.8874,
      "step": 97936
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0809736251831055,
      "learning_rate": 0.0003699710607369952,
      "loss": 3.128,
      "step": 97937
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.9605767726898193,
      "learning_rate": 0.00036996708298622045,
      "loss": 3.0877,
      "step": 97938
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9450427293777466,
      "learning_rate": 0.00036996310522243743,
      "loss": 3.433,
      "step": 97939
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.559324026107788,
      "learning_rate": 0.00036995912744564697,
      "loss": 2.9442,
      "step": 97940
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.2646467685699463,
      "learning_rate": 0.0003699551496558499,
      "loss": 2.8409,
      "step": 97941
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6214967966079712,
      "learning_rate": 0.00036995117185304664,
      "loss": 2.8499,
      "step": 97942
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8423601388931274,
      "learning_rate": 0.00036994719403723824,
      "loss": 2.9312,
      "step": 97943
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5845566987991333,
      "learning_rate": 0.00036994321620842537,
      "loss": 3.0164,
      "step": 97944
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.789497971534729,
      "learning_rate": 0.0003699392383666087,
      "loss": 2.9706,
      "step": 97945
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.221351146697998,
      "learning_rate": 0.000369935260511789,
      "loss": 2.7089,
      "step": 97946
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8301031589508057,
      "learning_rate": 0.0003699312826439671,
      "loss": 3.0892,
      "step": 97947
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.280888795852661,
      "learning_rate": 0.0003699273047631436,
      "loss": 3.0164,
      "step": 97948
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8321253061294556,
      "learning_rate": 0.0003699233268693192,
      "loss": 3.024,
      "step": 97949
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6162346601486206,
      "learning_rate": 0.0003699193489624948,
      "loss": 3.0686,
      "step": 97950
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0757699012756348,
      "learning_rate": 0.0003699153710426711,
      "loss": 3.0467,
      "step": 97951
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9894124269485474,
      "learning_rate": 0.00036991139310984877,
      "loss": 3.1043,
      "step": 97952
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9858647584915161,
      "learning_rate": 0.00036990741516402864,
      "loss": 2.9889,
      "step": 97953
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.404909610748291,
      "learning_rate": 0.00036990343720521136,
      "loss": 2.602,
      "step": 97954
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.358125686645508,
      "learning_rate": 0.0003698994592333977,
      "loss": 3.0376,
      "step": 97955
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.948375940322876,
      "learning_rate": 0.00036989548124858853,
      "loss": 3.2971,
      "step": 97956
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.590908169746399,
      "learning_rate": 0.0003698915032507843,
      "loss": 3.1581,
      "step": 97957
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.4726898670196533,
      "learning_rate": 0.000369887525239986,
      "loss": 2.9822,
      "step": 97958
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7409294843673706,
      "learning_rate": 0.0003698835472161944,
      "loss": 3.0773,
      "step": 97959
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8647472858428955,
      "learning_rate": 0.00036987956917940997,
      "loss": 3.1338,
      "step": 97960
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.3721730709075928,
      "learning_rate": 0.0003698755911296336,
      "loss": 2.9854,
      "step": 97961
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.5016531944274902,
      "learning_rate": 0.0003698716130668661,
      "loss": 3.1078,
      "step": 97962
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9153834581375122,
      "learning_rate": 0.0003698676349911082,
      "loss": 2.9076,
      "step": 97963
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5808037519454956,
      "learning_rate": 0.00036986365690236053,
      "loss": 2.9989,
      "step": 97964
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.850599765777588,
      "learning_rate": 0.0003698596788006239,
      "loss": 3.0288,
      "step": 97965
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6885000467300415,
      "learning_rate": 0.000369855700685899,
      "loss": 3.1502,
      "step": 97966
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.300107717514038,
      "learning_rate": 0.0003698517225581866,
      "loss": 2.8161,
      "step": 97967
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.923067331314087,
      "learning_rate": 0.0003698477444174876,
      "loss": 2.9548,
      "step": 97968
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6908621788024902,
      "learning_rate": 0.0003698437662638024,
      "loss": 3.0638,
      "step": 97969
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1810314655303955,
      "learning_rate": 0.00036983978809713194,
      "loss": 2.9224,
      "step": 97970
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.839360237121582,
      "learning_rate": 0.00036983580991747705,
      "loss": 3.0103,
      "step": 97971
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.398160457611084,
      "learning_rate": 0.00036983183172483835,
      "loss": 3.2446,
      "step": 97972
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.859429121017456,
      "learning_rate": 0.0003698278535192165,
      "loss": 2.9469,
      "step": 97973
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7260048389434814,
      "learning_rate": 0.0003698238753006124,
      "loss": 2.7267,
      "step": 97974
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.703445553779602,
      "learning_rate": 0.0003698198970690267,
      "loss": 2.9755,
      "step": 97975
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.9402270317077637,
      "learning_rate": 0.0003698159188244602,
      "loss": 3.1793,
      "step": 97976
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.2316277027130127,
      "learning_rate": 0.00036981194056691363,
      "loss": 3.0002,
      "step": 97977
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6247278451919556,
      "learning_rate": 0.00036980796229638764,
      "loss": 3.0435,
      "step": 97978
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9349133968353271,
      "learning_rate": 0.000369803984012883,
      "loss": 2.9529,
      "step": 97979
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.0599756240844727,
      "learning_rate": 0.00036980000571640054,
      "loss": 3.0096,
      "step": 97980
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.4802303314208984,
      "learning_rate": 0.00036979602740694096,
      "loss": 3.0623,
      "step": 97981
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9971911907196045,
      "learning_rate": 0.00036979204908450493,
      "loss": 3.2587,
      "step": 97982
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7399858236312866,
      "learning_rate": 0.0003697880707490933,
      "loss": 3.1175,
      "step": 97983
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.2148563861846924,
      "learning_rate": 0.00036978409240070667,
      "loss": 3.1675,
      "step": 97984
    },
    {
      "epoch": 1.28,
      "grad_norm": 4.189615249633789,
      "learning_rate": 0.0003697801140393459,
      "loss": 3.0284,
      "step": 97985
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.830449342727661,
      "learning_rate": 0.00036977613566501177,
      "loss": 2.7373,
      "step": 97986
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9006394147872925,
      "learning_rate": 0.00036977215727770484,
      "loss": 2.9692,
      "step": 97987
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.7313408851623535,
      "learning_rate": 0.00036976817887742587,
      "loss": 2.8694,
      "step": 97988
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9790586233139038,
      "learning_rate": 0.00036976420046417577,
      "loss": 2.7933,
      "step": 97989
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7414085865020752,
      "learning_rate": 0.00036976022203795525,
      "loss": 2.891,
      "step": 97990
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.3142521381378174,
      "learning_rate": 0.00036975624359876486,
      "loss": 3.1035,
      "step": 97991
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6501338481903076,
      "learning_rate": 0.00036975226514660557,
      "loss": 3.0441,
      "step": 97992
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.58964204788208,
      "learning_rate": 0.000369748286681478,
      "loss": 3.0847,
      "step": 97993
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0052530765533447,
      "learning_rate": 0.00036974430820338293,
      "loss": 3.0313,
      "step": 97994
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6298273801803589,
      "learning_rate": 0.000369740329712321,
      "loss": 2.9839,
      "step": 97995
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9189320802688599,
      "learning_rate": 0.0003697363512082931,
      "loss": 3.0465,
      "step": 97996
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.4134467840194702,
      "learning_rate": 0.00036973237269129986,
      "loss": 3.1375,
      "step": 97997
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.864719033241272,
      "learning_rate": 0.000369728394161342,
      "loss": 2.975,
      "step": 97998
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0002405643463135,
      "learning_rate": 0.00036972441561842047,
      "loss": 3.0194,
      "step": 97999
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0644760131835938,
      "learning_rate": 0.0003697204370625357,
      "loss": 3.0409,
      "step": 98000
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8097151517868042,
      "learning_rate": 0.0003697164584936886,
      "loss": 3.166,
      "step": 98001
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8992424011230469,
      "learning_rate": 0.00036971247991188,
      "loss": 2.6935,
      "step": 98002
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.586847186088562,
      "learning_rate": 0.0003697085013171105,
      "loss": 2.84,
      "step": 98003
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9001706838607788,
      "learning_rate": 0.0003697045227093808,
      "loss": 3.3262,
      "step": 98004
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8477963209152222,
      "learning_rate": 0.0003697005440886918,
      "loss": 3.0665,
      "step": 98005
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.799254894256592,
      "learning_rate": 0.0003696965654550441,
      "loss": 3.1414,
      "step": 98006
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.5797595977783203,
      "learning_rate": 0.00036969258680843845,
      "loss": 2.9044,
      "step": 98007
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.876345157623291,
      "learning_rate": 0.00036968860814887573,
      "loss": 3.0413,
      "step": 98008
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.825945258140564,
      "learning_rate": 0.00036968462947635657,
      "loss": 2.9275,
      "step": 98009
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.820904016494751,
      "learning_rate": 0.0003696806507908817,
      "loss": 2.8624,
      "step": 98010
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.3471004962921143,
      "learning_rate": 0.00036967667209245183,
      "loss": 3.0736,
      "step": 98011
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0213119983673096,
      "learning_rate": 0.0003696726933810678,
      "loss": 3.2006,
      "step": 98012
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9959847927093506,
      "learning_rate": 0.00036966871465673025,
      "loss": 2.8955,
      "step": 98013
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.771404981613159,
      "learning_rate": 0.00036966473591944004,
      "loss": 3.0673,
      "step": 98014
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7613993883132935,
      "learning_rate": 0.00036966075716919785,
      "loss": 2.9272,
      "step": 98015
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7940138578414917,
      "learning_rate": 0.00036965677840600437,
      "loss": 3.3081,
      "step": 98016
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8255274295806885,
      "learning_rate": 0.00036965279962986036,
      "loss": 3.3327,
      "step": 98017
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0178074836730957,
      "learning_rate": 0.00036964882084076654,
      "loss": 3.0574,
      "step": 98018
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6335647106170654,
      "learning_rate": 0.0003696448420387239,
      "loss": 2.9474,
      "step": 98019
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0058298110961914,
      "learning_rate": 0.0003696408632237328,
      "loss": 3.0241,
      "step": 98020
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.066287040710449,
      "learning_rate": 0.00036963688439579413,
      "loss": 3.1323,
      "step": 98021
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.2568132877349854,
      "learning_rate": 0.0003696329055549087,
      "loss": 2.9882,
      "step": 98022
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.742910623550415,
      "learning_rate": 0.00036962892670107714,
      "loss": 2.9599,
      "step": 98023
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.133512258529663,
      "learning_rate": 0.0003696249478343003,
      "loss": 2.7912,
      "step": 98024
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.63992977142334,
      "learning_rate": 0.0003696209689545789,
      "loss": 2.9402,
      "step": 98025
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7160300016403198,
      "learning_rate": 0.00036961699006191366,
      "loss": 2.9693,
      "step": 98026
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7782210111618042,
      "learning_rate": 0.0003696130111563052,
      "loss": 3.0725,
      "step": 98027
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.635043978691101,
      "learning_rate": 0.0003696090322377544,
      "loss": 2.8311,
      "step": 98028
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.6276893615722656,
      "learning_rate": 0.0003696050533062621,
      "loss": 2.9658,
      "step": 98029
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5691180229187012,
      "learning_rate": 0.0003696010743618287,
      "loss": 2.7838,
      "step": 98030
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.593740463256836,
      "learning_rate": 0.00036959709540445527,
      "loss": 2.9835,
      "step": 98031
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7123597860336304,
      "learning_rate": 0.0003695931164341424,
      "loss": 2.9004,
      "step": 98032
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0829813480377197,
      "learning_rate": 0.00036958913745089087,
      "loss": 3.0335,
      "step": 98033
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.665656566619873,
      "learning_rate": 0.0003695851584547014,
      "loss": 2.7169,
      "step": 98034
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0572736263275146,
      "learning_rate": 0.0003695811794455748,
      "loss": 3.0559,
      "step": 98035
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.802050232887268,
      "learning_rate": 0.00036957720042351167,
      "loss": 3.0686,
      "step": 98036
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8162693977355957,
      "learning_rate": 0.00036957322138851277,
      "loss": 2.8893,
      "step": 98037
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.842120885848999,
      "learning_rate": 0.000369569242340579,
      "loss": 3.0978,
      "step": 98038
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8313785791397095,
      "learning_rate": 0.00036956526327971094,
      "loss": 3.3457,
      "step": 98039
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7239466905593872,
      "learning_rate": 0.00036956128420590937,
      "loss": 2.9926,
      "step": 98040
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8195898532867432,
      "learning_rate": 0.00036955730511917516,
      "loss": 3.1026,
      "step": 98041
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9557363986968994,
      "learning_rate": 0.00036955332601950887,
      "loss": 3.1054,
      "step": 98042
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.802146315574646,
      "learning_rate": 0.00036954934690691123,
      "loss": 3.0858,
      "step": 98043
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5968375205993652,
      "learning_rate": 0.00036954536778138313,
      "loss": 2.7456,
      "step": 98044
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8292449712753296,
      "learning_rate": 0.00036954138864292517,
      "loss": 2.972,
      "step": 98045
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.4164607524871826,
      "learning_rate": 0.00036953740949153814,
      "loss": 2.9754,
      "step": 98046
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.723076581954956,
      "learning_rate": 0.0003695334303272229,
      "loss": 2.8465,
      "step": 98047
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.038278102874756,
      "learning_rate": 0.00036952945114998,
      "loss": 3.163,
      "step": 98048
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.3904082775115967,
      "learning_rate": 0.0003695254719598103,
      "loss": 2.8601,
      "step": 98049
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5524221658706665,
      "learning_rate": 0.00036952149275671454,
      "loss": 3.0398,
      "step": 98050
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7034754753112793,
      "learning_rate": 0.00036951751354069335,
      "loss": 2.9911,
      "step": 98051
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7099721431732178,
      "learning_rate": 0.0003695135343117475,
      "loss": 3.2832,
      "step": 98052
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1504781246185303,
      "learning_rate": 0.0003695095550698779,
      "loss": 3.0583,
      "step": 98053
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7374427318572998,
      "learning_rate": 0.0003695055758150851,
      "loss": 2.8932,
      "step": 98054
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7583154439926147,
      "learning_rate": 0.0003695015965473699,
      "loss": 2.6586,
      "step": 98055
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.712491273880005,
      "learning_rate": 0.000369497617266733,
      "loss": 3.032,
      "step": 98056
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7632346153259277,
      "learning_rate": 0.0003694936379731753,
      "loss": 3.3137,
      "step": 98057
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8689181804656982,
      "learning_rate": 0.0003694896586666973,
      "loss": 3.2203,
      "step": 98058
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.945936441421509,
      "learning_rate": 0.0003694856793472999,
      "loss": 2.8429,
      "step": 98059
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.594728469848633,
      "learning_rate": 0.0003694817000149838,
      "loss": 3.3359,
      "step": 98060
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.030240058898926,
      "learning_rate": 0.0003694777206697498,
      "loss": 3.033,
      "step": 98061
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0100109577178955,
      "learning_rate": 0.0003694737413115984,
      "loss": 3.2684,
      "step": 98062
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9016485214233398,
      "learning_rate": 0.0003694697619405308,
      "loss": 2.9529,
      "step": 98063
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.9497954845428467,
      "learning_rate": 0.00036946578255654726,
      "loss": 2.8828,
      "step": 98064
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.309697389602661,
      "learning_rate": 0.0003694618031596487,
      "loss": 2.8368,
      "step": 98065
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6222269535064697,
      "learning_rate": 0.000369457823749836,
      "loss": 2.9837,
      "step": 98066
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.2972171306610107,
      "learning_rate": 0.0003694538443271097,
      "loss": 3.0393,
      "step": 98067
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.447378158569336,
      "learning_rate": 0.0003694498648914706,
      "loss": 2.6196,
      "step": 98068
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7526661157608032,
      "learning_rate": 0.0003694458854429196,
      "loss": 2.7217,
      "step": 98069
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.710133671760559,
      "learning_rate": 0.0003694419059814572,
      "loss": 3.0367,
      "step": 98070
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.872169256210327,
      "learning_rate": 0.00036943792650708413,
      "loss": 3.2053,
      "step": 98071
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.027553081512451,
      "learning_rate": 0.00036943394701980146,
      "loss": 2.852,
      "step": 98072
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.716781497001648,
      "learning_rate": 0.0003694299675196095,
      "loss": 3.0117,
      "step": 98073
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5493457317352295,
      "learning_rate": 0.00036942598800650934,
      "loss": 3.0744,
      "step": 98074
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6742042303085327,
      "learning_rate": 0.00036942200848050157,
      "loss": 2.9919,
      "step": 98075
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6472439765930176,
      "learning_rate": 0.0003694180289415869,
      "loss": 2.9487,
      "step": 98076
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.636521339416504,
      "learning_rate": 0.0003694140493897661,
      "loss": 3.1649,
      "step": 98077
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7004224061965942,
      "learning_rate": 0.00036941006982503993,
      "loss": 3.1869,
      "step": 98078
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8521782159805298,
      "learning_rate": 0.0003694060902474091,
      "loss": 2.9363,
      "step": 98079
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9543001651763916,
      "learning_rate": 0.0003694021106568744,
      "loss": 3.1947,
      "step": 98080
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.100062370300293,
      "learning_rate": 0.00036939813105343655,
      "loss": 3.3023,
      "step": 98081
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.920612096786499,
      "learning_rate": 0.0003693941514370962,
      "loss": 2.955,
      "step": 98082
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8617581129074097,
      "learning_rate": 0.00036939017180785417,
      "loss": 2.7199,
      "step": 98083
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.783798098564148,
      "learning_rate": 0.0003693861921657113,
      "loss": 3.0908,
      "step": 98084
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.2681868076324463,
      "learning_rate": 0.0003693822125106682,
      "loss": 2.894,
      "step": 98085
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.151296377182007,
      "learning_rate": 0.00036937823284272553,
      "loss": 3.0184,
      "step": 98086
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8483104705810547,
      "learning_rate": 0.00036937425316188426,
      "loss": 3.0325,
      "step": 98087
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5327068567276,
      "learning_rate": 0.000369370273468145,
      "loss": 3.1101,
      "step": 98088
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0695366859436035,
      "learning_rate": 0.00036936629376150843,
      "loss": 3.2896,
      "step": 98089
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.732100486755371,
      "learning_rate": 0.00036936231404197545,
      "loss": 2.9933,
      "step": 98090
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.624014139175415,
      "learning_rate": 0.0003693583343095466,
      "loss": 3.1874,
      "step": 98091
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8938562870025635,
      "learning_rate": 0.0003693543545642227,
      "loss": 2.8519,
      "step": 98092
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1924548149108887,
      "learning_rate": 0.0003693503748060047,
      "loss": 2.9143,
      "step": 98093
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.3696484565734863,
      "learning_rate": 0.000369346395034893,
      "loss": 3.025,
      "step": 98094
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.174039363861084,
      "learning_rate": 0.00036934241525088846,
      "loss": 2.9674,
      "step": 98095
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7755497694015503,
      "learning_rate": 0.000369338435453992,
      "loss": 3.114,
      "step": 98096
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0780391693115234,
      "learning_rate": 0.00036933445564420414,
      "loss": 2.9337,
      "step": 98097
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.742602825164795,
      "learning_rate": 0.0003693304758215257,
      "loss": 2.8863,
      "step": 98098
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7990796566009521,
      "learning_rate": 0.0003693264959859574,
      "loss": 3.1011,
      "step": 98099
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.018669605255127,
      "learning_rate": 0.00036932251613750003,
      "loss": 2.9355,
      "step": 98100
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.362276554107666,
      "learning_rate": 0.0003693185362761543,
      "loss": 2.9347,
      "step": 98101
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.861987829208374,
      "learning_rate": 0.0003693145564019209,
      "loss": 2.8769,
      "step": 98102
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.658342957496643,
      "learning_rate": 0.0003693105765148007,
      "loss": 3.0334,
      "step": 98103
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.078235387802124,
      "learning_rate": 0.00036930659661479426,
      "loss": 3.1625,
      "step": 98104
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7930737733840942,
      "learning_rate": 0.00036930261670190244,
      "loss": 3.0751,
      "step": 98105
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.906722903251648,
      "learning_rate": 0.00036929863677612597,
      "loss": 2.7745,
      "step": 98106
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.4351820945739746,
      "learning_rate": 0.0003692946568374656,
      "loss": 3.3128,
      "step": 98107
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0704774856567383,
      "learning_rate": 0.00036929067688592197,
      "loss": 3.079,
      "step": 98108
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7097913026809692,
      "learning_rate": 0.00036928669692149596,
      "loss": 3.2271,
      "step": 98109
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8460276126861572,
      "learning_rate": 0.0003692827169441882,
      "loss": 3.0013,
      "step": 98110
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.7213454246520996,
      "learning_rate": 0.0003692787369539995,
      "loss": 2.8257,
      "step": 98111
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9482109546661377,
      "learning_rate": 0.00036927475695093065,
      "loss": 3.0449,
      "step": 98112
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7056671380996704,
      "learning_rate": 0.00036927077693498216,
      "loss": 3.2216,
      "step": 98113
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.3105504512786865,
      "learning_rate": 0.00036926679690615504,
      "loss": 3.0241,
      "step": 98114
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.886518120765686,
      "learning_rate": 0.0003692628168644499,
      "loss": 2.9867,
      "step": 98115
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.714475393295288,
      "learning_rate": 0.00036925883680986746,
      "loss": 3.0295,
      "step": 98116
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7645496129989624,
      "learning_rate": 0.00036925485674240853,
      "loss": 3.1538,
      "step": 98117
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.677145481109619,
      "learning_rate": 0.0003692508766620738,
      "loss": 2.9847,
      "step": 98118
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9347268342971802,
      "learning_rate": 0.000369246896568864,
      "loss": 3.0593,
      "step": 98119
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8364008665084839,
      "learning_rate": 0.0003692429164627799,
      "loss": 2.9655,
      "step": 98120
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.717594861984253,
      "learning_rate": 0.0003692389363438223,
      "loss": 3.1766,
      "step": 98121
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.257434368133545,
      "learning_rate": 0.0003692349562119918,
      "loss": 2.7718,
      "step": 98122
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7039495706558228,
      "learning_rate": 0.0003692309760672893,
      "loss": 3.1636,
      "step": 98123
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.3964667320251465,
      "learning_rate": 0.0003692269959097153,
      "loss": 3.0077,
      "step": 98124
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.119504690170288,
      "learning_rate": 0.00036922301573927085,
      "loss": 2.9501,
      "step": 98125
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.8332512378692627,
      "learning_rate": 0.00036921903555595646,
      "loss": 2.883,
      "step": 98126
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7287379503250122,
      "learning_rate": 0.000369215055359773,
      "loss": 2.8881,
      "step": 98127
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.091050386428833,
      "learning_rate": 0.00036921107515072114,
      "loss": 3.0549,
      "step": 98128
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.922229051589966,
      "learning_rate": 0.00036920709492880156,
      "loss": 2.8991,
      "step": 98129
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.5152573585510254,
      "learning_rate": 0.00036920311469401516,
      "loss": 2.9977,
      "step": 98130
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5932238101959229,
      "learning_rate": 0.00036919913444636253,
      "loss": 2.7865,
      "step": 98131
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6578052043914795,
      "learning_rate": 0.0003691951541858445,
      "loss": 2.7968,
      "step": 98132
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.997718095779419,
      "learning_rate": 0.00036919117391246185,
      "loss": 2.9814,
      "step": 98133
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.8069260120391846,
      "learning_rate": 0.0003691871936262152,
      "loss": 3.1368,
      "step": 98134
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.2506284713745117,
      "learning_rate": 0.00036918321332710525,
      "loss": 3.043,
      "step": 98135
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5189234018325806,
      "learning_rate": 0.000369179233015133,
      "loss": 2.926,
      "step": 98136
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9804095029830933,
      "learning_rate": 0.00036917525269029893,
      "loss": 2.9576,
      "step": 98137
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.348515272140503,
      "learning_rate": 0.0003691712723526039,
      "loss": 3.0707,
      "step": 98138
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.6149179935455322,
      "learning_rate": 0.0003691672920020486,
      "loss": 3.3298,
      "step": 98139
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.3884782791137695,
      "learning_rate": 0.0003691633116386338,
      "loss": 3.156,
      "step": 98140
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.180760145187378,
      "learning_rate": 0.0003691593312623603,
      "loss": 2.944,
      "step": 98141
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9661672115325928,
      "learning_rate": 0.0003691553508732287,
      "loss": 2.9125,
      "step": 98142
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7665034532546997,
      "learning_rate": 0.00036915137047123986,
      "loss": 2.9868,
      "step": 98143
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6349400281906128,
      "learning_rate": 0.00036914739005639436,
      "loss": 3.1579,
      "step": 98144
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7404289245605469,
      "learning_rate": 0.0003691434096286932,
      "loss": 2.9183,
      "step": 98145
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0748519897460938,
      "learning_rate": 0.00036913942918813695,
      "loss": 2.8118,
      "step": 98146
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8260637521743774,
      "learning_rate": 0.0003691354487347263,
      "loss": 3.0035,
      "step": 98147
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.8539111614227295,
      "learning_rate": 0.0003691314682684622,
      "loss": 3.2979,
      "step": 98148
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8775347471237183,
      "learning_rate": 0.0003691274877893451,
      "loss": 3.3194,
      "step": 98149
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8210773468017578,
      "learning_rate": 0.00036912350729737594,
      "loss": 2.9694,
      "step": 98150
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6991313695907593,
      "learning_rate": 0.00036911952679255554,
      "loss": 3.2287,
      "step": 98151
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.5649824142456055,
      "learning_rate": 0.00036911554627488437,
      "loss": 2.9261,
      "step": 98152
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9725652933120728,
      "learning_rate": 0.0003691115657443633,
      "loss": 2.9505,
      "step": 98153
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7644829750061035,
      "learning_rate": 0.0003691075852009932,
      "loss": 3.0457,
      "step": 98154
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.008211374282837,
      "learning_rate": 0.00036910360464477464,
      "loss": 2.893,
      "step": 98155
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.3534693717956543,
      "learning_rate": 0.0003690996240757084,
      "loss": 3.0779,
      "step": 98156
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.404979705810547,
      "learning_rate": 0.0003690956434937953,
      "loss": 2.8824,
      "step": 98157
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9049862623214722,
      "learning_rate": 0.00036909166289903595,
      "loss": 2.8262,
      "step": 98158
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.181636095046997,
      "learning_rate": 0.00036908768229143113,
      "loss": 2.9371,
      "step": 98159
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.7812962532043457,
      "learning_rate": 0.0003690837016709817,
      "loss": 3.2368,
      "step": 98160
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7187930345535278,
      "learning_rate": 0.00036907972103768824,
      "loss": 2.9822,
      "step": 98161
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.19551682472229,
      "learning_rate": 0.0003690757403915515,
      "loss": 3.168,
      "step": 98162
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.401996374130249,
      "learning_rate": 0.00036907175973257243,
      "loss": 3.0664,
      "step": 98163
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6858208179473877,
      "learning_rate": 0.0003690677790607515,
      "loss": 2.8837,
      "step": 98164
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.4892030954360962,
      "learning_rate": 0.0003690637983760896,
      "loss": 3.0455,
      "step": 98165
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8426913022994995,
      "learning_rate": 0.00036905981767858747,
      "loss": 3.1193,
      "step": 98166
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.989555835723877,
      "learning_rate": 0.00036905583696824576,
      "loss": 3.3391,
      "step": 98167
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6265678405761719,
      "learning_rate": 0.00036905185624506526,
      "loss": 3.0385,
      "step": 98168
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6627442836761475,
      "learning_rate": 0.0003690478755090468,
      "loss": 3.1932,
      "step": 98169
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6105082035064697,
      "learning_rate": 0.0003690438947601909,
      "loss": 2.7525,
      "step": 98170
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9323558807373047,
      "learning_rate": 0.0003690399139984985,
      "loss": 3.1021,
      "step": 98171
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8384426832199097,
      "learning_rate": 0.00036903593322397036,
      "loss": 2.9965,
      "step": 98172
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6014655828475952,
      "learning_rate": 0.0003690319524366071,
      "loss": 3.1865,
      "step": 98173
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1334657669067383,
      "learning_rate": 0.0003690279716364094,
      "loss": 3.175,
      "step": 98174
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.681879997253418,
      "learning_rate": 0.0003690239908233782,
      "loss": 3.057,
      "step": 98175
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0384368896484375,
      "learning_rate": 0.0003690200099975141,
      "loss": 3.014,
      "step": 98176
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1590030193328857,
      "learning_rate": 0.00036901602915881784,
      "loss": 3.1144,
      "step": 98177
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.796320915222168,
      "learning_rate": 0.0003690120483072903,
      "loss": 2.7925,
      "step": 98178
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.730508804321289,
      "learning_rate": 0.00036900806744293203,
      "loss": 3.0874,
      "step": 98179
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.646550416946411,
      "learning_rate": 0.0003690040865657438,
      "loss": 3.1334,
      "step": 98180
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.800887107849121,
      "learning_rate": 0.0003690001056757266,
      "loss": 2.9947,
      "step": 98181
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.651100516319275,
      "learning_rate": 0.00036899612477288085,
      "loss": 3.0323,
      "step": 98182
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7386138439178467,
      "learning_rate": 0.0003689921438572074,
      "loss": 3.1121,
      "step": 98183
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7393114566802979,
      "learning_rate": 0.0003689881629287071,
      "loss": 3.0078,
      "step": 98184
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6610493659973145,
      "learning_rate": 0.0003689841819873805,
      "loss": 3.1916,
      "step": 98185
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6823668479919434,
      "learning_rate": 0.0003689802010332284,
      "loss": 2.8392,
      "step": 98186
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1963143348693848,
      "learning_rate": 0.0003689762200662517,
      "loss": 3.0365,
      "step": 98187
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.687612771987915,
      "learning_rate": 0.000368972239086451,
      "loss": 3.1698,
      "step": 98188
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0859532356262207,
      "learning_rate": 0.00036896825809382697,
      "loss": 3.1085,
      "step": 98189
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0270471572875977,
      "learning_rate": 0.00036896427708838056,
      "loss": 2.7767,
      "step": 98190
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0231213569641113,
      "learning_rate": 0.00036896029607011227,
      "loss": 2.9215,
      "step": 98191
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6884263753890991,
      "learning_rate": 0.00036895631503902306,
      "loss": 3.1427,
      "step": 98192
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7700037956237793,
      "learning_rate": 0.00036895233399511355,
      "loss": 3.1114,
      "step": 98193
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7145500183105469,
      "learning_rate": 0.0003689483529383845,
      "loss": 3.1734,
      "step": 98194
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.4770209789276123,
      "learning_rate": 0.0003689443718688367,
      "loss": 2.9569,
      "step": 98195
    },
    {
      "epoch": 1.28,
      "grad_norm": 4.050457954406738,
      "learning_rate": 0.0003689403907864707,
      "loss": 2.9731,
      "step": 98196
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7317919731140137,
      "learning_rate": 0.0003689364096912874,
      "loss": 2.7699,
      "step": 98197
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.6614372730255127,
      "learning_rate": 0.00036893242858328755,
      "loss": 2.9519,
      "step": 98198
    },
    {
      "epoch": 1.28,
      "grad_norm": 4.077957630157471,
      "learning_rate": 0.0003689284474624719,
      "loss": 2.6118,
      "step": 98199
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.0267083644866943,
      "learning_rate": 0.0003689244663288411,
      "loss": 2.7974,
      "step": 98200
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8505724668502808,
      "learning_rate": 0.00036892048518239595,
      "loss": 3.0207,
      "step": 98201
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.8378047943115234,
      "learning_rate": 0.00036891650402313725,
      "loss": 2.8695,
      "step": 98202
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.3008220195770264,
      "learning_rate": 0.0003689125228510656,
      "loss": 3.0629,
      "step": 98203
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.3700664043426514,
      "learning_rate": 0.0003689085416661819,
      "loss": 3.0829,
      "step": 98204
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9822248220443726,
      "learning_rate": 0.0003689045604684867,
      "loss": 3.0122,
      "step": 98205
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1342697143554688,
      "learning_rate": 0.0003689005792579809,
      "loss": 3.1164,
      "step": 98206
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.166529655456543,
      "learning_rate": 0.00036889659803466515,
      "loss": 3.2022,
      "step": 98207
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.3117918968200684,
      "learning_rate": 0.00036889261679854025,
      "loss": 2.9001,
      "step": 98208
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.98368501663208,
      "learning_rate": 0.0003688886355496069,
      "loss": 3.1782,
      "step": 98209
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7831401824951172,
      "learning_rate": 0.00036888465428786584,
      "loss": 3.2401,
      "step": 98210
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.814737319946289,
      "learning_rate": 0.0003688806730133178,
      "loss": 3.0516,
      "step": 98211
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9132518768310547,
      "learning_rate": 0.0003688766917259636,
      "loss": 3.0898,
      "step": 98212
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7380270957946777,
      "learning_rate": 0.00036887271042580387,
      "loss": 3.239,
      "step": 98213
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.688865303993225,
      "learning_rate": 0.0003688687291128394,
      "loss": 2.6931,
      "step": 98214
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7289469242095947,
      "learning_rate": 0.000368864747787071,
      "loss": 3.1727,
      "step": 98215
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.138664960861206,
      "learning_rate": 0.00036886076644849933,
      "loss": 2.7544,
      "step": 98216
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.667781114578247,
      "learning_rate": 0.00036885678509712504,
      "loss": 3.2093,
      "step": 98217
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7218056917190552,
      "learning_rate": 0.00036885280373294914,
      "loss": 3.0915,
      "step": 98218
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9521496295928955,
      "learning_rate": 0.00036884882235597207,
      "loss": 2.9758,
      "step": 98219
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.3143699169158936,
      "learning_rate": 0.00036884484096619475,
      "loss": 3.1282,
      "step": 98220
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6106595993041992,
      "learning_rate": 0.00036884085956361793,
      "loss": 2.8705,
      "step": 98221
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9191668033599854,
      "learning_rate": 0.0003688368781482422,
      "loss": 2.832,
      "step": 98222
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.907851219177246,
      "learning_rate": 0.0003688328967200684,
      "loss": 2.9468,
      "step": 98223
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.871563196182251,
      "learning_rate": 0.00036882891527909733,
      "loss": 3.0418,
      "step": 98224
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7550042867660522,
      "learning_rate": 0.0003688249338253296,
      "loss": 3.0067,
      "step": 98225
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.8024988174438477,
      "learning_rate": 0.0003688209523587661,
      "loss": 2.928,
      "step": 98226
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.2067465782165527,
      "learning_rate": 0.0003688169708794074,
      "loss": 2.9877,
      "step": 98227
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.950715184211731,
      "learning_rate": 0.0003688129893872544,
      "loss": 3.0438,
      "step": 98228
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8733925819396973,
      "learning_rate": 0.0003688090078823077,
      "loss": 3.1774,
      "step": 98229
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7507264614105225,
      "learning_rate": 0.0003688050263645681,
      "loss": 2.9196,
      "step": 98230
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8408496379852295,
      "learning_rate": 0.0003688010448340364,
      "loss": 2.9843,
      "step": 98231
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6329199075698853,
      "learning_rate": 0.0003687970632907133,
      "loss": 2.9077,
      "step": 98232
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5593029260635376,
      "learning_rate": 0.0003687930817345996,
      "loss": 2.8765,
      "step": 98233
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9692208766937256,
      "learning_rate": 0.0003687891001656958,
      "loss": 3.0873,
      "step": 98234
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.269177198410034,
      "learning_rate": 0.00036878511858400286,
      "loss": 2.9567,
      "step": 98235
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8546581268310547,
      "learning_rate": 0.00036878113698952144,
      "loss": 3.2247,
      "step": 98236
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.240354537963867,
      "learning_rate": 0.00036877715538225235,
      "loss": 2.8854,
      "step": 98237
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.641003370285034,
      "learning_rate": 0.0003687731737621963,
      "loss": 3.0101,
      "step": 98238
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.0041205883026123,
      "learning_rate": 0.000368769192129354,
      "loss": 3.105,
      "step": 98239
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.104769706726074,
      "learning_rate": 0.0003687652104837262,
      "loss": 2.7795,
      "step": 98240
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0539820194244385,
      "learning_rate": 0.0003687612288253137,
      "loss": 2.904,
      "step": 98241
    },
    {
      "epoch": 1.28,
      "grad_norm": 4.6733808517456055,
      "learning_rate": 0.00036875724715411715,
      "loss": 2.8458,
      "step": 98242
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.8107833862304688,
      "learning_rate": 0.00036875326547013734,
      "loss": 2.6519,
      "step": 98243
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.061366319656372,
      "learning_rate": 0.000368749283773375,
      "loss": 3.1385,
      "step": 98244
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.6417057514190674,
      "learning_rate": 0.0003687453020638309,
      "loss": 2.964,
      "step": 98245
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.044677495956421,
      "learning_rate": 0.00036874132034150567,
      "loss": 2.8479,
      "step": 98246
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.4247238636016846,
      "learning_rate": 0.00036873733860640014,
      "loss": 2.9063,
      "step": 98247
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.178964138031006,
      "learning_rate": 0.00036873335685851517,
      "loss": 2.8341,
      "step": 98248
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.4452877044677734,
      "learning_rate": 0.00036872937509785126,
      "loss": 3.2418,
      "step": 98249
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.1666529178619385,
      "learning_rate": 0.0003687253933244092,
      "loss": 3.0168,
      "step": 98250
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.804212808609009,
      "learning_rate": 0.00036872141153818994,
      "loss": 3.0137,
      "step": 98251
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5067474842071533,
      "learning_rate": 0.000368717429739194,
      "loss": 3.3571,
      "step": 98252
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9497685432434082,
      "learning_rate": 0.00036871344792742216,
      "loss": 3.1069,
      "step": 98253
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.634937047958374,
      "learning_rate": 0.0003687094661028752,
      "loss": 3.182,
      "step": 98254
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.460378408432007,
      "learning_rate": 0.00036870548426555397,
      "loss": 3.0533,
      "step": 98255
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1531596183776855,
      "learning_rate": 0.000368701502415459,
      "loss": 2.9296,
      "step": 98256
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.981701374053955,
      "learning_rate": 0.0003686975205525911,
      "loss": 2.8981,
      "step": 98257
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.9266891479492188,
      "learning_rate": 0.00036869353867695104,
      "loss": 2.982,
      "step": 98258
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.62250018119812,
      "learning_rate": 0.0003686895567885396,
      "loss": 3.3088,
      "step": 98259
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6617470979690552,
      "learning_rate": 0.0003686855748873574,
      "loss": 3.2051,
      "step": 98260
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.3316924571990967,
      "learning_rate": 0.0003686815929734054,
      "loss": 3.1927,
      "step": 98261
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.974363327026367,
      "learning_rate": 0.00036867761104668404,
      "loss": 3.0978,
      "step": 98262
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7797988653182983,
      "learning_rate": 0.0003686736291071943,
      "loss": 3.1837,
      "step": 98263
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5569618940353394,
      "learning_rate": 0.0003686696471549368,
      "loss": 3.1213,
      "step": 98264
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.380216360092163,
      "learning_rate": 0.0003686656651899124,
      "loss": 2.9271,
      "step": 98265
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.193302869796753,
      "learning_rate": 0.00036866168321212164,
      "loss": 3.2699,
      "step": 98266
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8971816301345825,
      "learning_rate": 0.00036865770122156545,
      "loss": 2.932,
      "step": 98267
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0187413692474365,
      "learning_rate": 0.00036865371921824446,
      "loss": 2.8251,
      "step": 98268
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.040065050125122,
      "learning_rate": 0.0003686497372021594,
      "loss": 2.8789,
      "step": 98269
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7767175436019897,
      "learning_rate": 0.00036864575517331116,
      "loss": 3.017,
      "step": 98270
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9793736934661865,
      "learning_rate": 0.0003686417731317003,
      "loss": 2.9154,
      "step": 98271
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.615915298461914,
      "learning_rate": 0.00036863779107732767,
      "loss": 2.8626,
      "step": 98272
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.046494722366333,
      "learning_rate": 0.00036863380901019406,
      "loss": 3.1599,
      "step": 98273
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.192080497741699,
      "learning_rate": 0.00036862982693030004,
      "loss": 3.1272,
      "step": 98274
    },
    {
      "epoch": 1.28,
      "grad_norm": 4.700162887573242,
      "learning_rate": 0.00036862584483764643,
      "loss": 3.0486,
      "step": 98275
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.811042070388794,
      "learning_rate": 0.0003686218627322341,
      "loss": 3.1388,
      "step": 98276
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9952614307403564,
      "learning_rate": 0.0003686178806140635,
      "loss": 2.9297,
      "step": 98277
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.1091411113739014,
      "learning_rate": 0.0003686138984831356,
      "loss": 2.9857,
      "step": 98278
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.0494043827056885,
      "learning_rate": 0.00036860991633945104,
      "loss": 2.9233,
      "step": 98279
    },
    {
      "epoch": 1.28,
      "grad_norm": 4.258755683898926,
      "learning_rate": 0.0003686059341830107,
      "loss": 2.9082,
      "step": 98280
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5816080570220947,
      "learning_rate": 0.0003686019520138152,
      "loss": 3.0029,
      "step": 98281
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.351195812225342,
      "learning_rate": 0.0003685979698318653,
      "loss": 3.0497,
      "step": 98282
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.92221736907959,
      "learning_rate": 0.0003685939876371617,
      "loss": 3.2076,
      "step": 98283
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.5972256660461426,
      "learning_rate": 0.0003685900054297052,
      "loss": 3.0494,
      "step": 98284
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5358611345291138,
      "learning_rate": 0.0003685860232094965,
      "loss": 3.0343,
      "step": 98285
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.488060235977173,
      "learning_rate": 0.0003685820409765364,
      "loss": 3.1009,
      "step": 98286
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5294920206069946,
      "learning_rate": 0.00036857805873082564,
      "loss": 2.9776,
      "step": 98287
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5821489095687866,
      "learning_rate": 0.0003685740764723649,
      "loss": 2.8992,
      "step": 98288
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.838966727256775,
      "learning_rate": 0.0003685700942011549,
      "loss": 2.992,
      "step": 98289
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7522602081298828,
      "learning_rate": 0.00036856611191719645,
      "loss": 2.895,
      "step": 98290
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.518379807472229,
      "learning_rate": 0.0003685621296204903,
      "loss": 2.9564,
      "step": 98291
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.950568437576294,
      "learning_rate": 0.0003685581473110371,
      "loss": 3.0425,
      "step": 98292
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7520911693572998,
      "learning_rate": 0.00036855416498883764,
      "loss": 2.9482,
      "step": 98293
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.507412075996399,
      "learning_rate": 0.00036855018265389276,
      "loss": 3.1295,
      "step": 98294
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1099395751953125,
      "learning_rate": 0.000368546200306203,
      "loss": 2.86,
      "step": 98295
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1075286865234375,
      "learning_rate": 0.0003685422179457692,
      "loss": 2.9965,
      "step": 98296
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.452648401260376,
      "learning_rate": 0.0003685382355725922,
      "loss": 2.9735,
      "step": 98297
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6887140274047852,
      "learning_rate": 0.0003685342531866726,
      "loss": 2.8518,
      "step": 98298
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.702630043029785,
      "learning_rate": 0.00036853027078801115,
      "loss": 3.0221,
      "step": 98299
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.631800889968872,
      "learning_rate": 0.00036852628837660877,
      "loss": 2.9866,
      "step": 98300
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7158443927764893,
      "learning_rate": 0.0003685223059524659,
      "loss": 3.0692,
      "step": 98301
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.2853591442108154,
      "learning_rate": 0.00036851832351558354,
      "loss": 3.172,
      "step": 98302
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.8497724533081055,
      "learning_rate": 0.0003685143410659623,
      "loss": 3.2188,
      "step": 98303
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.8837203979492188,
      "learning_rate": 0.000368510358603603,
      "loss": 2.9085,
      "step": 98304
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.691519260406494,
      "learning_rate": 0.0003685063761285062,
      "loss": 2.9746,
      "step": 98305
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.627333164215088,
      "learning_rate": 0.0003685023936406729,
      "loss": 3.0004,
      "step": 98306
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.3276290893554688,
      "learning_rate": 0.00036849841114010366,
      "loss": 2.9413,
      "step": 98307
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.0357325077056885,
      "learning_rate": 0.00036849442862679923,
      "loss": 3.1224,
      "step": 98308
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1666314601898193,
      "learning_rate": 0.00036849044610076046,
      "loss": 2.9943,
      "step": 98309
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.9335289001464844,
      "learning_rate": 0.000368486463561988,
      "loss": 2.8669,
      "step": 98310
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.2692623138427734,
      "learning_rate": 0.00036848248101048253,
      "loss": 2.9432,
      "step": 98311
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.288475513458252,
      "learning_rate": 0.00036847849844624504,
      "loss": 2.7722,
      "step": 98312
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.431203603744507,
      "learning_rate": 0.000368474515869276,
      "loss": 2.8606,
      "step": 98313
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8515229225158691,
      "learning_rate": 0.00036847053327957623,
      "loss": 3.0762,
      "step": 98314
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0145857334136963,
      "learning_rate": 0.0003684665506771466,
      "loss": 2.8106,
      "step": 98315
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.747432231903076,
      "learning_rate": 0.0003684625680619877,
      "loss": 3.1351,
      "step": 98316
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.516416549682617,
      "learning_rate": 0.0003684585854341003,
      "loss": 3.1421,
      "step": 98317
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.5886216163635254,
      "learning_rate": 0.00036845460279348515,
      "loss": 3.1281,
      "step": 98318
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7938836812973022,
      "learning_rate": 0.000368450620140143,
      "loss": 3.147,
      "step": 98319
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.4248995780944824,
      "learning_rate": 0.0003684466374740746,
      "loss": 2.6898,
      "step": 98320
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.880341649055481,
      "learning_rate": 0.0003684426547952806,
      "loss": 3.3936,
      "step": 98321
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.421158790588379,
      "learning_rate": 0.000368438672103762,
      "loss": 2.9636,
      "step": 98322
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.670041799545288,
      "learning_rate": 0.0003684346893995192,
      "loss": 3.2878,
      "step": 98323
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.4031176567077637,
      "learning_rate": 0.00036843070668255313,
      "loss": 3.0104,
      "step": 98324
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.3907172679901123,
      "learning_rate": 0.00036842672395286455,
      "loss": 2.8206,
      "step": 98325
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.057194471359253,
      "learning_rate": 0.0003684227412104541,
      "loss": 2.8561,
      "step": 98326
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.049760580062866,
      "learning_rate": 0.00036841875845532255,
      "loss": 2.9935,
      "step": 98327
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1327974796295166,
      "learning_rate": 0.00036841477568747076,
      "loss": 3.053,
      "step": 98328
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7615114450454712,
      "learning_rate": 0.0003684107929068993,
      "loss": 2.9134,
      "step": 98329
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.6372125148773193,
      "learning_rate": 0.000368406810113609,
      "loss": 3.0013,
      "step": 98330
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0514931678771973,
      "learning_rate": 0.0003684028273076006,
      "loss": 3.1372,
      "step": 98331
    },
    {
      "epoch": 1.28,
      "grad_norm": 4.726608753204346,
      "learning_rate": 0.0003683988444888748,
      "loss": 3.1012,
      "step": 98332
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.2343027591705322,
      "learning_rate": 0.00036839486165743233,
      "loss": 2.9944,
      "step": 98333
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.566441059112549,
      "learning_rate": 0.00036839087881327407,
      "loss": 3.0112,
      "step": 98334
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8976002931594849,
      "learning_rate": 0.00036838689595640055,
      "loss": 3.015,
      "step": 98335
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0251336097717285,
      "learning_rate": 0.0003683829130868126,
      "loss": 3.1637,
      "step": 98336
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.3847544193267822,
      "learning_rate": 0.0003683789302045111,
      "loss": 3.0037,
      "step": 98337
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.6752023696899414,
      "learning_rate": 0.00036837494730949656,
      "loss": 3.0196,
      "step": 98338
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5523523092269897,
      "learning_rate": 0.0003683709644017698,
      "loss": 2.9406,
      "step": 98339
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.07831072807312,
      "learning_rate": 0.00036836698148133177,
      "loss": 2.8238,
      "step": 98340
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.274644613265991,
      "learning_rate": 0.00036836299854818284,
      "loss": 2.9926,
      "step": 98341
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7257777452468872,
      "learning_rate": 0.00036835901560232397,
      "loss": 3.097,
      "step": 98342
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6872990131378174,
      "learning_rate": 0.000368355032643756,
      "loss": 3.2047,
      "step": 98343
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.475443124771118,
      "learning_rate": 0.00036835104967247934,
      "loss": 2.9205,
      "step": 98344
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9208409786224365,
      "learning_rate": 0.000368347066688495,
      "loss": 2.9352,
      "step": 98345
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6427361965179443,
      "learning_rate": 0.00036834308369180374,
      "loss": 3.1381,
      "step": 98346
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.716010332107544,
      "learning_rate": 0.0003683391006824062,
      "loss": 3.269,
      "step": 98347
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.628188133239746,
      "learning_rate": 0.00036833511766030296,
      "loss": 2.9601,
      "step": 98348
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.646230697631836,
      "learning_rate": 0.0003683311346254951,
      "loss": 2.9465,
      "step": 98349
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9418513774871826,
      "learning_rate": 0.0003683271515779832,
      "loss": 2.8947,
      "step": 98350
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.597672939300537,
      "learning_rate": 0.00036832316851776787,
      "loss": 2.9897,
      "step": 98351
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.030780553817749,
      "learning_rate": 0.0003683191854448501,
      "loss": 2.8568,
      "step": 98352
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.2290494441986084,
      "learning_rate": 0.0003683152023592304,
      "loss": 2.9212,
      "step": 98353
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0990140438079834,
      "learning_rate": 0.0003683112192609097,
      "loss": 2.834,
      "step": 98354
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6684554815292358,
      "learning_rate": 0.0003683072361498886,
      "loss": 2.9112,
      "step": 98355
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.4992544651031494,
      "learning_rate": 0.0003683032530261679,
      "loss": 2.76,
      "step": 98356
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.699971079826355,
      "learning_rate": 0.0003682992698897483,
      "loss": 2.9562,
      "step": 98357
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6618280410766602,
      "learning_rate": 0.00036829528674063073,
      "loss": 2.9379,
      "step": 98358
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.2847845554351807,
      "learning_rate": 0.0003682913035788156,
      "loss": 2.8003,
      "step": 98359
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.444406270980835,
      "learning_rate": 0.0003682873204043039,
      "loss": 2.8712,
      "step": 98360
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.5568714141845703,
      "learning_rate": 0.0003682833372170963,
      "loss": 3.0242,
      "step": 98361
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.428589344024658,
      "learning_rate": 0.00036827935401719355,
      "loss": 2.8942,
      "step": 98362
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.876787543296814,
      "learning_rate": 0.0003682753708045963,
      "loss": 2.8614,
      "step": 98363
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8875250816345215,
      "learning_rate": 0.00036827138757930544,
      "loss": 2.9219,
      "step": 98364
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6990063190460205,
      "learning_rate": 0.00036826740434132163,
      "loss": 3.1544,
      "step": 98365
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.946185827255249,
      "learning_rate": 0.0003682634210906456,
      "loss": 2.8748,
      "step": 98366
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6379084587097168,
      "learning_rate": 0.00036825943782727813,
      "loss": 2.7243,
      "step": 98367
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9515204429626465,
      "learning_rate": 0.00036825545455122,
      "loss": 3.1639,
      "step": 98368
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.751394510269165,
      "learning_rate": 0.00036825147126247186,
      "loss": 3.0934,
      "step": 98369
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7557005882263184,
      "learning_rate": 0.0003682474879610344,
      "loss": 3.0466,
      "step": 98370
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.066988945007324,
      "learning_rate": 0.00036824350464690844,
      "loss": 3.0666,
      "step": 98371
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6946847438812256,
      "learning_rate": 0.00036823952132009486,
      "loss": 2.9186,
      "step": 98372
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8289244174957275,
      "learning_rate": 0.0003682355379805942,
      "loss": 3.0247,
      "step": 98373
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.812557578086853,
      "learning_rate": 0.0003682315546284073,
      "loss": 3.0492,
      "step": 98374
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0800063610076904,
      "learning_rate": 0.00036822757126353485,
      "loss": 2.9585,
      "step": 98375
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.047825813293457,
      "learning_rate": 0.0003682235878859775,
      "loss": 2.9494,
      "step": 98376
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.2966222763061523,
      "learning_rate": 0.00036821960449573616,
      "loss": 2.8789,
      "step": 98377
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.54726505279541,
      "learning_rate": 0.00036821562109281154,
      "loss": 3.0859,
      "step": 98378
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.7690815925598145,
      "learning_rate": 0.00036821163767720435,
      "loss": 2.9139,
      "step": 98379
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.907396078109741,
      "learning_rate": 0.00036820765424891536,
      "loss": 2.8567,
      "step": 98380
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.654413938522339,
      "learning_rate": 0.0003682036708079452,
      "loss": 3.2109,
      "step": 98381
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7262176275253296,
      "learning_rate": 0.00036819968735429473,
      "loss": 3.2015,
      "step": 98382
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7541500329971313,
      "learning_rate": 0.0003681957038879646,
      "loss": 3.0219,
      "step": 98383
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6814041137695312,
      "learning_rate": 0.00036819172040895566,
      "loss": 3.073,
      "step": 98384
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6403484344482422,
      "learning_rate": 0.00036818773691726865,
      "loss": 2.8407,
      "step": 98385
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.369534969329834,
      "learning_rate": 0.0003681837534129041,
      "loss": 2.8831,
      "step": 98386
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6791881322860718,
      "learning_rate": 0.000368179769895863,
      "loss": 3.1185,
      "step": 98387
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0428497791290283,
      "learning_rate": 0.00036817578636614596,
      "loss": 2.869,
      "step": 98388
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9395676851272583,
      "learning_rate": 0.0003681718028237538,
      "loss": 2.8975,
      "step": 98389
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0487515926361084,
      "learning_rate": 0.0003681678192686872,
      "loss": 3.1756,
      "step": 98390
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5328973531723022,
      "learning_rate": 0.00036816383570094683,
      "loss": 2.7277,
      "step": 98391
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.4185962677001953,
      "learning_rate": 0.00036815985212053365,
      "loss": 3.3582,
      "step": 98392
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9964238405227661,
      "learning_rate": 0.0003681558685274482,
      "loss": 2.7547,
      "step": 98393
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6447769403457642,
      "learning_rate": 0.00036815188492169126,
      "loss": 2.9063,
      "step": 98394
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6801128387451172,
      "learning_rate": 0.0003681479013032637,
      "loss": 2.996,
      "step": 98395
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.380157470703125,
      "learning_rate": 0.00036814391767216604,
      "loss": 2.9939,
      "step": 98396
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5445048809051514,
      "learning_rate": 0.0003681399340283992,
      "loss": 2.9126,
      "step": 98397
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0097854137420654,
      "learning_rate": 0.00036813595037196386,
      "loss": 3.1765,
      "step": 98398
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.63919997215271,
      "learning_rate": 0.0003681319667028608,
      "loss": 2.9202,
      "step": 98399
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.2508914470672607,
      "learning_rate": 0.00036812798302109063,
      "loss": 3.1471,
      "step": 98400
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.223848819732666,
      "learning_rate": 0.00036812399932665424,
      "loss": 2.9518,
      "step": 98401
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.8132715225219727,
      "learning_rate": 0.00036812001561955225,
      "loss": 3.1831,
      "step": 98402
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.62038254737854,
      "learning_rate": 0.00036811603189978553,
      "loss": 2.9774,
      "step": 98403
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5674463510513306,
      "learning_rate": 0.0003681120481673548,
      "loss": 3.095,
      "step": 98404
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.223296880722046,
      "learning_rate": 0.0003681080644222606,
      "loss": 3.1344,
      "step": 98405
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.759000778198242,
      "learning_rate": 0.00036810408066450395,
      "loss": 2.882,
      "step": 98406
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.3721654415130615,
      "learning_rate": 0.0003681000968940855,
      "loss": 2.8251,
      "step": 98407
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.684394359588623,
      "learning_rate": 0.0003680961131110058,
      "loss": 2.8547,
      "step": 98408
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.5874996185302734,
      "learning_rate": 0.00036809212931526584,
      "loss": 3.0533,
      "step": 98409
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7416160106658936,
      "learning_rate": 0.0003680881455068663,
      "loss": 2.9734,
      "step": 98410
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.6555728912353516,
      "learning_rate": 0.00036808416168580783,
      "loss": 3.3078,
      "step": 98411
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.700849175453186,
      "learning_rate": 0.00036808017785209125,
      "loss": 3.0385,
      "step": 98412
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.801697015762329,
      "learning_rate": 0.00036807619400571726,
      "loss": 3.0115,
      "step": 98413
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.6501753330230713,
      "learning_rate": 0.00036807221014668667,
      "loss": 2.9708,
      "step": 98414
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1009361743927,
      "learning_rate": 0.00036806822627500013,
      "loss": 2.8605,
      "step": 98415
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9635562896728516,
      "learning_rate": 0.0003680642423906584,
      "loss": 3.077,
      "step": 98416
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.085458993911743,
      "learning_rate": 0.00036806025849366226,
      "loss": 2.8869,
      "step": 98417
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6762408018112183,
      "learning_rate": 0.0003680562745840125,
      "loss": 3.0455,
      "step": 98418
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0208206176757812,
      "learning_rate": 0.00036805229066170977,
      "loss": 3.0462,
      "step": 98419
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7706689834594727,
      "learning_rate": 0.0003680483067267548,
      "loss": 2.8838,
      "step": 98420
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9143588542938232,
      "learning_rate": 0.0003680443227791483,
      "loss": 3.1817,
      "step": 98421
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.65707266330719,
      "learning_rate": 0.0003680403388188912,
      "loss": 2.9074,
      "step": 98422
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.8279869556427,
      "learning_rate": 0.00036803635484598406,
      "loss": 3.07,
      "step": 98423
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.928666114807129,
      "learning_rate": 0.00036803237086042767,
      "loss": 3.1968,
      "step": 98424
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6904844045639038,
      "learning_rate": 0.0003680283868622228,
      "loss": 2.9137,
      "step": 98425
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.035524606704712,
      "learning_rate": 0.00036802440285137015,
      "loss": 2.9736,
      "step": 98426
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.027597188949585,
      "learning_rate": 0.00036802041882787043,
      "loss": 2.8182,
      "step": 98427
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.410054922103882,
      "learning_rate": 0.0003680164347917246,
      "loss": 2.8984,
      "step": 98428
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.228299379348755,
      "learning_rate": 0.0003680124507429331,
      "loss": 3.3751,
      "step": 98429
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.171335220336914,
      "learning_rate": 0.0003680084666814968,
      "loss": 2.9902,
      "step": 98430
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8661048412322998,
      "learning_rate": 0.00036800448260741644,
      "loss": 3.0709,
      "step": 98431
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6356303691864014,
      "learning_rate": 0.0003680004985206928,
      "loss": 3.1153,
      "step": 98432
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.297269344329834,
      "learning_rate": 0.0003679965144213265,
      "loss": 2.9261,
      "step": 98433
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.976646065711975,
      "learning_rate": 0.0003679925303093185,
      "loss": 3.0453,
      "step": 98434
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5921106338500977,
      "learning_rate": 0.00036798854618466935,
      "loss": 2.9619,
      "step": 98435
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.196730375289917,
      "learning_rate": 0.0003679845620473798,
      "loss": 3.2882,
      "step": 98436
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.867310047149658,
      "learning_rate": 0.0003679805778974507,
      "loss": 2.8727,
      "step": 98437
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.034616470336914,
      "learning_rate": 0.0003679765937348827,
      "loss": 3.094,
      "step": 98438
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.804901123046875,
      "learning_rate": 0.00036797260955967654,
      "loss": 2.8451,
      "step": 98439
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.2901313304901123,
      "learning_rate": 0.000367968625371833,
      "loss": 2.809,
      "step": 98440
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.0077896118164062,
      "learning_rate": 0.00036796464117135286,
      "loss": 3.0015,
      "step": 98441
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.112941265106201,
      "learning_rate": 0.00036796065695823675,
      "loss": 2.9982,
      "step": 98442
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9072571992874146,
      "learning_rate": 0.00036795667273248556,
      "loss": 2.9562,
      "step": 98443
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.199181079864502,
      "learning_rate": 0.0003679526884940999,
      "loss": 3.0358,
      "step": 98444
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6275720596313477,
      "learning_rate": 0.00036794870424308046,
      "loss": 2.8862,
      "step": 98445
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0079429149627686,
      "learning_rate": 0.00036794471997942817,
      "loss": 2.9751,
      "step": 98446
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.764475703239441,
      "learning_rate": 0.0003679407357031437,
      "loss": 2.8433,
      "step": 98447
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7638413906097412,
      "learning_rate": 0.00036793675141422763,
      "loss": 2.8911,
      "step": 98448
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6680827140808105,
      "learning_rate": 0.000367932767112681,
      "loss": 3.1353,
      "step": 98449
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.4419461488723755,
      "learning_rate": 0.0003679287827985043,
      "loss": 3.2813,
      "step": 98450
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7878973484039307,
      "learning_rate": 0.0003679247984716983,
      "loss": 2.8183,
      "step": 98451
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5203291177749634,
      "learning_rate": 0.0003679208141322638,
      "loss": 3.0613,
      "step": 98452
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.4622018337249756,
      "learning_rate": 0.00036791682978020167,
      "loss": 2.9988,
      "step": 98453
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7582138776779175,
      "learning_rate": 0.0003679128454155124,
      "loss": 3.0498,
      "step": 98454
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5702382326126099,
      "learning_rate": 0.00036790886103819687,
      "loss": 3.0607,
      "step": 98455
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.765020728111267,
      "learning_rate": 0.00036790487664825584,
      "loss": 3.1669,
      "step": 98456
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.06703782081604,
      "learning_rate": 0.0003679008922456901,
      "loss": 2.913,
      "step": 98457
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8746535778045654,
      "learning_rate": 0.00036789690783050016,
      "loss": 3.1483,
      "step": 98458
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.6014420986175537,
      "learning_rate": 0.00036789292340268687,
      "loss": 3.1954,
      "step": 98459
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7567131519317627,
      "learning_rate": 0.00036788893896225113,
      "loss": 3.0251,
      "step": 98460
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7367829084396362,
      "learning_rate": 0.00036788495450919354,
      "loss": 3.1852,
      "step": 98461
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6714653968811035,
      "learning_rate": 0.0003678809700435147,
      "loss": 3.1046,
      "step": 98462
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.563983678817749,
      "learning_rate": 0.00036787698556521565,
      "loss": 2.9458,
      "step": 98463
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.812646508216858,
      "learning_rate": 0.000367873001074297,
      "loss": 3.4997,
      "step": 98464
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6376227140426636,
      "learning_rate": 0.00036786901657075946,
      "loss": 2.7547,
      "step": 98465
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.524283766746521,
      "learning_rate": 0.00036786503205460375,
      "loss": 2.8264,
      "step": 98466
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9081745147705078,
      "learning_rate": 0.0003678610475258307,
      "loss": 2.9529,
      "step": 98467
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.659051537513733,
      "learning_rate": 0.00036785706298444093,
      "loss": 3.1733,
      "step": 98468
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8855245113372803,
      "learning_rate": 0.00036785307843043523,
      "loss": 2.9247,
      "step": 98469
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8592668771743774,
      "learning_rate": 0.00036784909386381446,
      "loss": 3.0099,
      "step": 98470
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.3050894737243652,
      "learning_rate": 0.00036784510928457927,
      "loss": 3.2488,
      "step": 98471
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.612325668334961,
      "learning_rate": 0.0003678411246927303,
      "loss": 2.9197,
      "step": 98472
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6213568449020386,
      "learning_rate": 0.00036783714008826844,
      "loss": 2.7157,
      "step": 98473
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7281872034072876,
      "learning_rate": 0.00036783315547119443,
      "loss": 3.1485,
      "step": 98474
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8434088230133057,
      "learning_rate": 0.0003678291708415088,
      "loss": 2.7848,
      "step": 98475
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7579447031021118,
      "learning_rate": 0.0003678251861992126,
      "loss": 2.7922,
      "step": 98476
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6279884576797485,
      "learning_rate": 0.00036782120154430636,
      "loss": 3.0175,
      "step": 98477
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.063599109649658,
      "learning_rate": 0.0003678172168767909,
      "loss": 3.1047,
      "step": 98478
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.534590244293213,
      "learning_rate": 0.0003678132321966669,
      "loss": 3.0433,
      "step": 98479
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.185965061187744,
      "learning_rate": 0.0003678092475039352,
      "loss": 3.0761,
      "step": 98480
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9099427461624146,
      "learning_rate": 0.0003678052627985963,
      "loss": 3.1083,
      "step": 98481
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.994262933731079,
      "learning_rate": 0.00036780127808065134,
      "loss": 2.9339,
      "step": 98482
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.9608919620513916,
      "learning_rate": 0.0003677972933501008,
      "loss": 3.0398,
      "step": 98483
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.80593740940094,
      "learning_rate": 0.0003677933086069454,
      "loss": 3.1997,
      "step": 98484
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.5243959426879883,
      "learning_rate": 0.000367789323851186,
      "loss": 2.8927,
      "step": 98485
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.6033449172973633,
      "learning_rate": 0.0003677853390828232,
      "loss": 2.8661,
      "step": 98486
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8839714527130127,
      "learning_rate": 0.0003677813543018579,
      "loss": 3.0921,
      "step": 98487
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6954656839370728,
      "learning_rate": 0.00036777736950829073,
      "loss": 3.076,
      "step": 98488
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.4022834300994873,
      "learning_rate": 0.0003677733847021225,
      "loss": 3.0538,
      "step": 98489
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0138485431671143,
      "learning_rate": 0.0003677693998833539,
      "loss": 2.9524,
      "step": 98490
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5771913528442383,
      "learning_rate": 0.00036776541505198566,
      "loss": 3.0171,
      "step": 98491
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0711331367492676,
      "learning_rate": 0.00036776143020801866,
      "loss": 2.7413,
      "step": 98492
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.7554855346679688,
      "learning_rate": 0.0003677574453514534,
      "loss": 3.0342,
      "step": 98493
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9861761331558228,
      "learning_rate": 0.00036775346048229077,
      "loss": 3.0834,
      "step": 98494
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.666548728942871,
      "learning_rate": 0.00036774947560053157,
      "loss": 2.8213,
      "step": 98495
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.3411035537719727,
      "learning_rate": 0.00036774549070617643,
      "loss": 3.0167,
      "step": 98496
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.3315787315368652,
      "learning_rate": 0.00036774150579922606,
      "loss": 3.1879,
      "step": 98497
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8091223239898682,
      "learning_rate": 0.0003677375208796814,
      "loss": 2.8735,
      "step": 98498
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6787865161895752,
      "learning_rate": 0.00036773353594754294,
      "loss": 2.717,
      "step": 98499
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7572765350341797,
      "learning_rate": 0.00036772955100281154,
      "loss": 2.8985,
      "step": 98500
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5724492073059082,
      "learning_rate": 0.000367725566045488,
      "loss": 3.1969,
      "step": 98501
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.067800521850586,
      "learning_rate": 0.00036772158107557297,
      "loss": 3.0122,
      "step": 98502
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.826433777809143,
      "learning_rate": 0.0003677175960930672,
      "loss": 3.0736,
      "step": 98503
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8877569437026978,
      "learning_rate": 0.0003677136110979715,
      "loss": 3.0187,
      "step": 98504
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6609694957733154,
      "learning_rate": 0.0003677096260902866,
      "loss": 2.932,
      "step": 98505
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7558568716049194,
      "learning_rate": 0.00036770564107001306,
      "loss": 3.0408,
      "step": 98506
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7740892171859741,
      "learning_rate": 0.0003677016560371519,
      "loss": 3.4347,
      "step": 98507
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0477283000946045,
      "learning_rate": 0.0003676976709917036,
      "loss": 3.1877,
      "step": 98508
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7302930355072021,
      "learning_rate": 0.0003676936859336691,
      "loss": 3.0761,
      "step": 98509
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.7969653606414795,
      "learning_rate": 0.0003676897008630491,
      "loss": 3.0571,
      "step": 98510
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.014726400375366,
      "learning_rate": 0.0003676857157798442,
      "loss": 3.1768,
      "step": 98511
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6476234197616577,
      "learning_rate": 0.0003676817306840553,
      "loss": 3.0207,
      "step": 98512
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1145057678222656,
      "learning_rate": 0.0003676777455756831,
      "loss": 2.8492,
      "step": 98513
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.533021926879883,
      "learning_rate": 0.0003676737604547284,
      "loss": 3.0591,
      "step": 98514
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9151549339294434,
      "learning_rate": 0.00036766977532119174,
      "loss": 2.8665,
      "step": 98515
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.7389957904815674,
      "learning_rate": 0.0003676657901750741,
      "loss": 3.1864,
      "step": 98516
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1668131351470947,
      "learning_rate": 0.000367661805016376,
      "loss": 3.2173,
      "step": 98517
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.140303611755371,
      "learning_rate": 0.00036765781984509835,
      "loss": 2.8581,
      "step": 98518
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1399075984954834,
      "learning_rate": 0.00036765383466124194,
      "loss": 2.9612,
      "step": 98519
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0807626247406006,
      "learning_rate": 0.00036764984946480726,
      "loss": 2.9945,
      "step": 98520
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7895829677581787,
      "learning_rate": 0.00036764586425579523,
      "loss": 3.2342,
      "step": 98521
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7998343706130981,
      "learning_rate": 0.0003676418790342065,
      "loss": 2.8567,
      "step": 98522
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.1290769577026367,
      "learning_rate": 0.000367637893800042,
      "loss": 2.939,
      "step": 98523
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1209816932678223,
      "learning_rate": 0.0003676339085533023,
      "loss": 2.8926,
      "step": 98524
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.128389596939087,
      "learning_rate": 0.0003676299232939881,
      "loss": 2.6511,
      "step": 98525
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.585087537765503,
      "learning_rate": 0.00036762593802210027,
      "loss": 3.0379,
      "step": 98526
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7807327508926392,
      "learning_rate": 0.0003676219527376395,
      "loss": 2.9115,
      "step": 98527
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0488553047180176,
      "learning_rate": 0.0003676179674406065,
      "loss": 2.8468,
      "step": 98528
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.3018364906311035,
      "learning_rate": 0.0003676139821310021,
      "loss": 2.8165,
      "step": 98529
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0304458141326904,
      "learning_rate": 0.00036760999680882697,
      "loss": 2.9949,
      "step": 98530
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9256882667541504,
      "learning_rate": 0.00036760601147408186,
      "loss": 3.0049,
      "step": 98531
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5215331315994263,
      "learning_rate": 0.00036760202612676755,
      "loss": 3.0753,
      "step": 98532
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.733823537826538,
      "learning_rate": 0.00036759804076688473,
      "loss": 2.9807,
      "step": 98533
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.7187564373016357,
      "learning_rate": 0.0003675940553944341,
      "loss": 2.9466,
      "step": 98534
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1482670307159424,
      "learning_rate": 0.0003675900700094166,
      "loss": 2.929,
      "step": 98535
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.216900110244751,
      "learning_rate": 0.0003675860846118326,
      "loss": 3.3095,
      "step": 98536
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8762069940567017,
      "learning_rate": 0.00036758209920168325,
      "loss": 3.2081,
      "step": 98537
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8088085651397705,
      "learning_rate": 0.00036757811377896907,
      "loss": 3.0386,
      "step": 98538
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.667012095451355,
      "learning_rate": 0.00036757412834369083,
      "loss": 3.0475,
      "step": 98539
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.534184217453003,
      "learning_rate": 0.0003675701428958493,
      "loss": 3.3484,
      "step": 98540
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6030510663986206,
      "learning_rate": 0.00036756615743544527,
      "loss": 2.7863,
      "step": 98541
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.168145179748535,
      "learning_rate": 0.00036756217196247924,
      "loss": 2.8199,
      "step": 98542
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1157937049865723,
      "learning_rate": 0.00036755818647695224,
      "loss": 3.0505,
      "step": 98543
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.134812593460083,
      "learning_rate": 0.0003675542009788649,
      "loss": 3.0166,
      "step": 98544
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.4254214763641357,
      "learning_rate": 0.000367550215468218,
      "loss": 2.827,
      "step": 98545
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.8473923206329346,
      "learning_rate": 0.00036754622994501215,
      "loss": 3.0653,
      "step": 98546
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6008832454681396,
      "learning_rate": 0.00036754224440924816,
      "loss": 3.0477,
      "step": 98547
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0210790634155273,
      "learning_rate": 0.00036753825886092693,
      "loss": 2.9168,
      "step": 98548
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.275339126586914,
      "learning_rate": 0.00036753427330004897,
      "loss": 2.8893,
      "step": 98549
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8633254766464233,
      "learning_rate": 0.0003675302877266151,
      "loss": 3.168,
      "step": 98550
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.833817958831787,
      "learning_rate": 0.00036752630214062607,
      "loss": 2.7643,
      "step": 98551
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7210270166397095,
      "learning_rate": 0.0003675223165420827,
      "loss": 2.867,
      "step": 98552
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0095489025115967,
      "learning_rate": 0.0003675183309309856,
      "loss": 2.9602,
      "step": 98553
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8904621601104736,
      "learning_rate": 0.00036751434530733556,
      "loss": 2.6732,
      "step": 98554
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.6182196140289307,
      "learning_rate": 0.00036751035967113335,
      "loss": 3.1686,
      "step": 98555
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.32910418510437,
      "learning_rate": 0.0003675063740223797,
      "loss": 3.0877,
      "step": 98556
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7878820896148682,
      "learning_rate": 0.0003675023883610753,
      "loss": 2.9871,
      "step": 98557
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7843396663665771,
      "learning_rate": 0.00036749840268722093,
      "loss": 3.1152,
      "step": 98558
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6073909997940063,
      "learning_rate": 0.00036749441700081745,
      "loss": 2.9197,
      "step": 98559
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.786850929260254,
      "learning_rate": 0.00036749043130186536,
      "loss": 2.9387,
      "step": 98560
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9236880540847778,
      "learning_rate": 0.00036748644559036553,
      "loss": 2.9815,
      "step": 98561
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.103074789047241,
      "learning_rate": 0.0003674824598663188,
      "loss": 3.2292,
      "step": 98562
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7536381483078003,
      "learning_rate": 0.0003674784741297257,
      "loss": 3.0893,
      "step": 98563
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8182491064071655,
      "learning_rate": 0.00036747448838058707,
      "loss": 2.7781,
      "step": 98564
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.387956142425537,
      "learning_rate": 0.00036747050261890375,
      "loss": 3.0815,
      "step": 98565
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1237945556640625,
      "learning_rate": 0.0003674665168446763,
      "loss": 3.0235,
      "step": 98566
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.107206344604492,
      "learning_rate": 0.0003674625310579056,
      "loss": 3.0014,
      "step": 98567
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.905800223350525,
      "learning_rate": 0.0003674585452585924,
      "loss": 3.0287,
      "step": 98568
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0377790927886963,
      "learning_rate": 0.0003674545594467373,
      "loss": 3.0909,
      "step": 98569
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.592492699623108,
      "learning_rate": 0.0003674505736223411,
      "loss": 3.0649,
      "step": 98570
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6268969774246216,
      "learning_rate": 0.00036744658778540466,
      "loss": 2.8241,
      "step": 98571
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6990607976913452,
      "learning_rate": 0.00036744260193592856,
      "loss": 2.9843,
      "step": 98572
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8961827754974365,
      "learning_rate": 0.0003674386160739136,
      "loss": 2.8146,
      "step": 98573
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8519644737243652,
      "learning_rate": 0.0003674346301993606,
      "loss": 3.2226,
      "step": 98574
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9039424657821655,
      "learning_rate": 0.0003674306443122701,
      "loss": 3.0969,
      "step": 98575
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7742345333099365,
      "learning_rate": 0.00036742665841264305,
      "loss": 2.8559,
      "step": 98576
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.678942084312439,
      "learning_rate": 0.00036742267250048017,
      "loss": 2.9634,
      "step": 98577
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.483873963356018,
      "learning_rate": 0.00036741868657578207,
      "loss": 3.2407,
      "step": 98578
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.52004873752594,
      "learning_rate": 0.00036741470063854955,
      "loss": 2.8689,
      "step": 98579
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9238864183425903,
      "learning_rate": 0.00036741071468878345,
      "loss": 3.0926,
      "step": 98580
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5379942655563354,
      "learning_rate": 0.00036740672872648435,
      "loss": 2.9981,
      "step": 98581
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6670715808868408,
      "learning_rate": 0.00036740274275165306,
      "loss": 2.9546,
      "step": 98582
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.897129774093628,
      "learning_rate": 0.00036739875676429035,
      "loss": 2.9366,
      "step": 98583
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1535134315490723,
      "learning_rate": 0.0003673947707643969,
      "loss": 3.072,
      "step": 98584
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7776120901107788,
      "learning_rate": 0.0003673907847519735,
      "loss": 3.0312,
      "step": 98585
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.623163938522339,
      "learning_rate": 0.00036738679872702103,
      "loss": 3.2435,
      "step": 98586
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.7357187271118164,
      "learning_rate": 0.00036738281268953993,
      "loss": 3.0767,
      "step": 98587
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.9355056285858154,
      "learning_rate": 0.0003673788266395311,
      "loss": 2.9393,
      "step": 98588
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.130091667175293,
      "learning_rate": 0.00036737484057699533,
      "loss": 3.1069,
      "step": 98589
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9929189682006836,
      "learning_rate": 0.0003673708545019333,
      "loss": 3.2568,
      "step": 98590
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0342977046966553,
      "learning_rate": 0.0003673668684143457,
      "loss": 2.924,
      "step": 98591
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.631829857826233,
      "learning_rate": 0.0003673628823142333,
      "loss": 2.8534,
      "step": 98592
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.907143473625183,
      "learning_rate": 0.000367358896201597,
      "loss": 2.8858,
      "step": 98593
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.4096007347106934,
      "learning_rate": 0.0003673549100764373,
      "loss": 3.0137,
      "step": 98594
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7746796607971191,
      "learning_rate": 0.0003673509239387551,
      "loss": 3.2161,
      "step": 98595
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.528493046760559,
      "learning_rate": 0.00036734693778855114,
      "loss": 3.1725,
      "step": 98596
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7826768159866333,
      "learning_rate": 0.00036734295162582604,
      "loss": 2.9939,
      "step": 98597
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.4864537715911865,
      "learning_rate": 0.0003673389654505805,
      "loss": 3.0132,
      "step": 98598
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8683226108551025,
      "learning_rate": 0.0003673349792628156,
      "loss": 3.185,
      "step": 98599
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8060094118118286,
      "learning_rate": 0.00036733099306253175,
      "loss": 2.8441,
      "step": 98600
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.006618022918701,
      "learning_rate": 0.0003673270068497298,
      "loss": 3.1182,
      "step": 98601
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.039560556411743,
      "learning_rate": 0.00036732302062441057,
      "loss": 2.884,
      "step": 98602
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7438592910766602,
      "learning_rate": 0.0003673190343865746,
      "loss": 2.9534,
      "step": 98603
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8059546947479248,
      "learning_rate": 0.00036731504813622277,
      "loss": 2.9309,
      "step": 98604
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6560522317886353,
      "learning_rate": 0.0003673110618733559,
      "loss": 2.9569,
      "step": 98605
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0795199871063232,
      "learning_rate": 0.0003673070755979745,
      "loss": 2.8963,
      "step": 98606
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7898719310760498,
      "learning_rate": 0.0003673030893100795,
      "loss": 2.9181,
      "step": 98607
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7676222324371338,
      "learning_rate": 0.0003672991030096716,
      "loss": 3.1734,
      "step": 98608
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0976498126983643,
      "learning_rate": 0.0003672951166967515,
      "loss": 3.0939,
      "step": 98609
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9472030401229858,
      "learning_rate": 0.00036729113037131997,
      "loss": 2.9974,
      "step": 98610
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.104424476623535,
      "learning_rate": 0.00036728714403337783,
      "loss": 2.9014,
      "step": 98611
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.3361380100250244,
      "learning_rate": 0.0003672831576829256,
      "loss": 2.937,
      "step": 98612
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.6442418098449707,
      "learning_rate": 0.0003672791713199642,
      "loss": 2.8281,
      "step": 98613
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7647029161453247,
      "learning_rate": 0.00036727518494449453,
      "loss": 2.8016,
      "step": 98614
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5874743461608887,
      "learning_rate": 0.00036727119855651695,
      "loss": 3.1787,
      "step": 98615
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0633034706115723,
      "learning_rate": 0.00036726721215603233,
      "loss": 3.0344,
      "step": 98616
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8294771909713745,
      "learning_rate": 0.00036726322574304155,
      "loss": 3.055,
      "step": 98617
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6473513841629028,
      "learning_rate": 0.00036725923931754526,
      "loss": 3.048,
      "step": 98618
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.4140803813934326,
      "learning_rate": 0.00036725525287954416,
      "loss": 3.2084,
      "step": 98619
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.840187430381775,
      "learning_rate": 0.0003672512664290391,
      "loss": 3.0633,
      "step": 98620
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0337724685668945,
      "learning_rate": 0.00036724727996603073,
      "loss": 2.8058,
      "step": 98621
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9601198434829712,
      "learning_rate": 0.00036724329349051987,
      "loss": 3.108,
      "step": 98622
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5473324060440063,
      "learning_rate": 0.0003672393070025072,
      "loss": 2.9681,
      "step": 98623
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0749619007110596,
      "learning_rate": 0.0003672353205019934,
      "loss": 3.0609,
      "step": 98624
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0607340335845947,
      "learning_rate": 0.00036723133398897936,
      "loss": 3.0603,
      "step": 98625
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7005301713943481,
      "learning_rate": 0.00036722734746346574,
      "loss": 3.0834,
      "step": 98626
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.2630107402801514,
      "learning_rate": 0.0003672233609254532,
      "loss": 3.0109,
      "step": 98627
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.5976243019104004,
      "learning_rate": 0.0003672193743749427,
      "loss": 3.075,
      "step": 98628
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6121121644973755,
      "learning_rate": 0.0003672153878119348,
      "loss": 3.2761,
      "step": 98629
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8917758464813232,
      "learning_rate": 0.00036721140123643025,
      "loss": 3.1713,
      "step": 98630
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.0657906532287598,
      "learning_rate": 0.0003672074146484299,
      "loss": 2.8426,
      "step": 98631
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.979332208633423,
      "learning_rate": 0.00036720342804793426,
      "loss": 3.0113,
      "step": 98632
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7478519678115845,
      "learning_rate": 0.00036719944143494444,
      "loss": 3.1203,
      "step": 98633
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8301807641983032,
      "learning_rate": 0.0003671954548094609,
      "loss": 2.8055,
      "step": 98634
    },
    {
      "epoch": 1.28,
      "grad_norm": 4.358795642852783,
      "learning_rate": 0.00036719146817148444,
      "loss": 2.9545,
      "step": 98635
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6294407844543457,
      "learning_rate": 0.00036718748152101576,
      "loss": 3.1652,
      "step": 98636
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.691405177116394,
      "learning_rate": 0.0003671834948580558,
      "loss": 2.766,
      "step": 98637
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.651609182357788,
      "learning_rate": 0.000367179508182605,
      "loss": 3.1211,
      "step": 98638
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.248164415359497,
      "learning_rate": 0.00036717552149466437,
      "loss": 3.0119,
      "step": 98639
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.7914271354675293,
      "learning_rate": 0.0003671715347942345,
      "loss": 2.9751,
      "step": 98640
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9320268630981445,
      "learning_rate": 0.00036716754808131623,
      "loss": 3.0174,
      "step": 98641
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.4766132831573486,
      "learning_rate": 0.00036716356135591016,
      "loss": 2.9445,
      "step": 98642
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0341436862945557,
      "learning_rate": 0.0003671595746180171,
      "loss": 2.9893,
      "step": 98643
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.819854736328125,
      "learning_rate": 0.00036715558786763796,
      "loss": 2.8794,
      "step": 98644
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6186107397079468,
      "learning_rate": 0.0003671516011047732,
      "loss": 2.9689,
      "step": 98645
    },
    {
      "epoch": 1.28,
      "grad_norm": 3.6057753562927246,
      "learning_rate": 0.00036714761432942377,
      "loss": 2.8691,
      "step": 98646
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.420121908187866,
      "learning_rate": 0.00036714362754159026,
      "loss": 3.0757,
      "step": 98647
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.222442150115967,
      "learning_rate": 0.0003671396407412735,
      "loss": 2.8859,
      "step": 98648
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.3538265228271484,
      "learning_rate": 0.00036713565392847424,
      "loss": 2.9909,
      "step": 98649
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6704446077346802,
      "learning_rate": 0.0003671316671031932,
      "loss": 3.0509,
      "step": 98650
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5591237545013428,
      "learning_rate": 0.0003671276802654311,
      "loss": 3.0711,
      "step": 98651
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.887237787246704,
      "learning_rate": 0.0003671236934151887,
      "loss": 2.9209,
      "step": 98652
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8371002674102783,
      "learning_rate": 0.0003671197065524667,
      "loss": 2.7594,
      "step": 98653
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7574502229690552,
      "learning_rate": 0.0003671157196772659,
      "loss": 2.9681,
      "step": 98654
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9152729511260986,
      "learning_rate": 0.000367111732789587,
      "loss": 2.8613,
      "step": 98655
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5792509317398071,
      "learning_rate": 0.00036710774588943077,
      "loss": 2.944,
      "step": 98656
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8961994647979736,
      "learning_rate": 0.000367103758976798,
      "loss": 2.9738,
      "step": 98657
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.232126235961914,
      "learning_rate": 0.00036709977205168936,
      "loss": 3.1453,
      "step": 98658
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.82361900806427,
      "learning_rate": 0.0003670957851141055,
      "loss": 3.1227,
      "step": 98659
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5289818048477173,
      "learning_rate": 0.00036709179816404744,
      "loss": 3.0836,
      "step": 98660
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8059742450714111,
      "learning_rate": 0.0003670878112015157,
      "loss": 3.0697,
      "step": 98661
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.282647132873535,
      "learning_rate": 0.00036708382422651097,
      "loss": 2.7535,
      "step": 98662
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.59330153465271,
      "learning_rate": 0.0003670798372390342,
      "loss": 3.0057,
      "step": 98663
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8711092472076416,
      "learning_rate": 0.00036707585023908597,
      "loss": 2.7965,
      "step": 98664
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.7152695655822754,
      "learning_rate": 0.0003670718632266671,
      "loss": 3.0487,
      "step": 98665
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.2534847259521484,
      "learning_rate": 0.0003670678762017783,
      "loss": 2.9163,
      "step": 98666
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9090274572372437,
      "learning_rate": 0.0003670638891644202,
      "loss": 2.994,
      "step": 98667
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.910895586013794,
      "learning_rate": 0.00036705990211459373,
      "loss": 2.9029,
      "step": 98668
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.410611152648926,
      "learning_rate": 0.0003670559150522997,
      "loss": 2.9328,
      "step": 98669
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8606139421463013,
      "learning_rate": 0.00036705192797753853,
      "loss": 3.0531,
      "step": 98670
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.3181705474853516,
      "learning_rate": 0.0003670479408903112,
      "loss": 2.9471,
      "step": 98671
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6549922227859497,
      "learning_rate": 0.00036704395379061843,
      "loss": 3.0463,
      "step": 98672
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.7237753868103027,
      "learning_rate": 0.00036703996667846085,
      "loss": 2.9967,
      "step": 98673
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1374456882476807,
      "learning_rate": 0.00036703597955383934,
      "loss": 2.8983,
      "step": 98674
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0156073570251465,
      "learning_rate": 0.00036703199241675455,
      "loss": 2.8045,
      "step": 98675
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.8676040172576904,
      "learning_rate": 0.0003670280052672072,
      "loss": 3.0105,
      "step": 98676
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.0465950965881348,
      "learning_rate": 0.00036702401810519814,
      "loss": 2.9562,
      "step": 98677
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.031165599822998,
      "learning_rate": 0.0003670200309307281,
      "loss": 2.9432,
      "step": 98678
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6644028425216675,
      "learning_rate": 0.00036701604374379766,
      "loss": 2.8918,
      "step": 98679
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.5544644594192505,
      "learning_rate": 0.00036701205654440767,
      "loss": 3.0441,
      "step": 98680
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9579049348831177,
      "learning_rate": 0.000367008069332559,
      "loss": 3.0623,
      "step": 98681
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9896315336227417,
      "learning_rate": 0.00036700408210825215,
      "loss": 3.0689,
      "step": 98682
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9917985200881958,
      "learning_rate": 0.00036700009487148803,
      "loss": 2.6457,
      "step": 98683
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.02390193939209,
      "learning_rate": 0.00036699610762226735,
      "loss": 3.1821,
      "step": 98684
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1152219772338867,
      "learning_rate": 0.00036699212036059076,
      "loss": 3.0564,
      "step": 98685
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.6396456956863403,
      "learning_rate": 0.00036698813308645907,
      "loss": 2.9853,
      "step": 98686
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.48984956741333,
      "learning_rate": 0.00036698414579987315,
      "loss": 3.201,
      "step": 98687
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1733343601226807,
      "learning_rate": 0.00036698015850083343,
      "loss": 3.1319,
      "step": 98688
    },
    {
      "epoch": 1.28,
      "grad_norm": 1.9190685749053955,
      "learning_rate": 0.00036697617118934095,
      "loss": 2.9582,
      "step": 98689
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.779879570007324,
      "learning_rate": 0.0003669721838653963,
      "loss": 3.0667,
      "step": 98690
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5565972328186035,
      "learning_rate": 0.00036696819652900023,
      "loss": 3.1383,
      "step": 98691
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.757040023803711,
      "learning_rate": 0.00036696420918015356,
      "loss": 3.2682,
      "step": 98692
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.267271041870117,
      "learning_rate": 0.000366960221818857,
      "loss": 2.947,
      "step": 98693
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3845810890197754,
      "learning_rate": 0.0003669562344451112,
      "loss": 3.0281,
      "step": 98694
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9097148180007935,
      "learning_rate": 0.000366952247058917,
      "loss": 3.2976,
      "step": 98695
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.039027452468872,
      "learning_rate": 0.00036694825966027513,
      "loss": 3.0436,
      "step": 98696
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0572433471679688,
      "learning_rate": 0.00036694427224918626,
      "loss": 2.9509,
      "step": 98697
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9386131763458252,
      "learning_rate": 0.00036694028482565124,
      "loss": 3.0487,
      "step": 98698
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8220480680465698,
      "learning_rate": 0.0003669362973896708,
      "loss": 2.9724,
      "step": 98699
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.573690414428711,
      "learning_rate": 0.00036693230994124556,
      "loss": 2.925,
      "step": 98700
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.752745270729065,
      "learning_rate": 0.0003669283224803763,
      "loss": 3.0241,
      "step": 98701
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.895132303237915,
      "learning_rate": 0.0003669243350070639,
      "loss": 2.9831,
      "step": 98702
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.5237107276916504,
      "learning_rate": 0.000366920347521309,
      "loss": 2.7728,
      "step": 98703
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.760276198387146,
      "learning_rate": 0.0003669163600231123,
      "loss": 3.0536,
      "step": 98704
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.751945734024048,
      "learning_rate": 0.0003669123725124746,
      "loss": 2.9067,
      "step": 98705
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2167141437530518,
      "learning_rate": 0.0003669083849893966,
      "loss": 3.1594,
      "step": 98706
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.696116328239441,
      "learning_rate": 0.0003669043974538791,
      "loss": 2.9811,
      "step": 98707
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7757302522659302,
      "learning_rate": 0.00036690040990592287,
      "loss": 3.0102,
      "step": 98708
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.77471125125885,
      "learning_rate": 0.00036689642234552847,
      "loss": 2.8449,
      "step": 98709
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8210487365722656,
      "learning_rate": 0.0003668924347726968,
      "loss": 3.2807,
      "step": 98710
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.104416847229004,
      "learning_rate": 0.0003668884471874286,
      "loss": 3.0633,
      "step": 98711
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.869728684425354,
      "learning_rate": 0.00036688445958972456,
      "loss": 2.9313,
      "step": 98712
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2592527866363525,
      "learning_rate": 0.0003668804719795854,
      "loss": 3.1045,
      "step": 98713
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8092546463012695,
      "learning_rate": 0.000366876484357012,
      "loss": 2.9584,
      "step": 98714
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.4299685955047607,
      "learning_rate": 0.00036687249672200486,
      "loss": 2.773,
      "step": 98715
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5347172021865845,
      "learning_rate": 0.0003668685090745649,
      "loss": 3.0313,
      "step": 98716
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.456900715827942,
      "learning_rate": 0.00036686452141469284,
      "loss": 3.2151,
      "step": 98717
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9699435234069824,
      "learning_rate": 0.00036686053374238947,
      "loss": 3.0507,
      "step": 98718
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7728265523910522,
      "learning_rate": 0.0003668565460576554,
      "loss": 2.9396,
      "step": 98719
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6969046592712402,
      "learning_rate": 0.0003668525583604914,
      "loss": 3.0743,
      "step": 98720
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.597697138786316,
      "learning_rate": 0.0003668485706508983,
      "loss": 2.953,
      "step": 98721
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8223785161972046,
      "learning_rate": 0.0003668445829288768,
      "loss": 2.8935,
      "step": 98722
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9256234169006348,
      "learning_rate": 0.0003668405951944276,
      "loss": 3.049,
      "step": 98723
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.870848298072815,
      "learning_rate": 0.00036683660744755146,
      "loss": 3.1095,
      "step": 98724
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.608525276184082,
      "learning_rate": 0.0003668326196882492,
      "loss": 2.7926,
      "step": 98725
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0603392124176025,
      "learning_rate": 0.0003668286319165214,
      "loss": 3.1903,
      "step": 98726
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8062231540679932,
      "learning_rate": 0.00036682464413236894,
      "loss": 3.0873,
      "step": 98727
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0642600059509277,
      "learning_rate": 0.00036682065633579253,
      "loss": 3.0547,
      "step": 98728
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8684022426605225,
      "learning_rate": 0.0003668166685267929,
      "loss": 3.0367,
      "step": 98729
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1035776138305664,
      "learning_rate": 0.00036681268070537076,
      "loss": 2.8408,
      "step": 98730
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.948786973953247,
      "learning_rate": 0.00036680869287152685,
      "loss": 2.9104,
      "step": 98731
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2523791790008545,
      "learning_rate": 0.00036680470502526206,
      "loss": 2.9551,
      "step": 98732
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8013983964920044,
      "learning_rate": 0.0003668007171665769,
      "loss": 3.0375,
      "step": 98733
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8883057832717896,
      "learning_rate": 0.00036679672929547234,
      "loss": 3.246,
      "step": 98734
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.109819173812866,
      "learning_rate": 0.00036679274141194893,
      "loss": 2.9698,
      "step": 98735
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9539415836334229,
      "learning_rate": 0.0003667887535160075,
      "loss": 3.0316,
      "step": 98736
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7021989822387695,
      "learning_rate": 0.0003667847656076488,
      "loss": 2.7807,
      "step": 98737
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7719049453735352,
      "learning_rate": 0.0003667807776868735,
      "loss": 3.1261,
      "step": 98738
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9621466398239136,
      "learning_rate": 0.00036677678975368245,
      "loss": 2.98,
      "step": 98739
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8602808713912964,
      "learning_rate": 0.0003667728018080763,
      "loss": 3.0123,
      "step": 98740
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6599119901657104,
      "learning_rate": 0.0003667688138500558,
      "loss": 2.9578,
      "step": 98741
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6230605840682983,
      "learning_rate": 0.0003667648258796218,
      "loss": 3.097,
      "step": 98742
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6420321464538574,
      "learning_rate": 0.0003667608378967749,
      "loss": 3.1083,
      "step": 98743
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5931615829467773,
      "learning_rate": 0.0003667568499015159,
      "loss": 3.1534,
      "step": 98744
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5631033182144165,
      "learning_rate": 0.0003667528618938456,
      "loss": 3.2064,
      "step": 98745
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.5511183738708496,
      "learning_rate": 0.0003667488738737646,
      "loss": 3.0395,
      "step": 98746
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.920072317123413,
      "learning_rate": 0.0003667448858412737,
      "loss": 3.1689,
      "step": 98747
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7011867761611938,
      "learning_rate": 0.0003667408977963738,
      "loss": 3.2429,
      "step": 98748
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1678075790405273,
      "learning_rate": 0.0003667369097390654,
      "loss": 3.1635,
      "step": 98749
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6337376832962036,
      "learning_rate": 0.0003667329216693494,
      "loss": 2.9664,
      "step": 98750
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.827000617980957,
      "learning_rate": 0.00036672893358722644,
      "loss": 2.8492,
      "step": 98751
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.215708017349243,
      "learning_rate": 0.0003667249454926974,
      "loss": 2.8252,
      "step": 98752
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.404569149017334,
      "learning_rate": 0.0003667209573857628,
      "loss": 2.8305,
      "step": 98753
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9473004341125488,
      "learning_rate": 0.00036671696926642364,
      "loss": 3.1769,
      "step": 98754
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0093088150024414,
      "learning_rate": 0.00036671298113468044,
      "loss": 3.0369,
      "step": 98755
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1231329441070557,
      "learning_rate": 0.00036670899299053413,
      "loss": 3.0559,
      "step": 98756
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.4722938537597656,
      "learning_rate": 0.0003667050048339853,
      "loss": 2.9721,
      "step": 98757
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.2182724475860596,
      "learning_rate": 0.0003667010166650348,
      "loss": 2.9615,
      "step": 98758
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0337610244750977,
      "learning_rate": 0.00036669702848368327,
      "loss": 3.161,
      "step": 98759
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.079695224761963,
      "learning_rate": 0.00036669304028993157,
      "loss": 3.1943,
      "step": 98760
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.7018918991088867,
      "learning_rate": 0.0003666890520837803,
      "loss": 3.0467,
      "step": 98761
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.934195637702942,
      "learning_rate": 0.0003666850638652303,
      "loss": 2.8465,
      "step": 98762
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9529131650924683,
      "learning_rate": 0.00036668107563428225,
      "loss": 3.1744,
      "step": 98763
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.9033305644989014,
      "learning_rate": 0.000366677087390937,
      "loss": 2.9069,
      "step": 98764
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.382378101348877,
      "learning_rate": 0.00036667309913519515,
      "loss": 3.0756,
      "step": 98765
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.7048089504241943,
      "learning_rate": 0.00036666911086705764,
      "loss": 3.103,
      "step": 98766
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.644693374633789,
      "learning_rate": 0.00036666512258652493,
      "loss": 2.9143,
      "step": 98767
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3768632411956787,
      "learning_rate": 0.00036666113429359795,
      "loss": 3.1785,
      "step": 98768
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3594472408294678,
      "learning_rate": 0.00036665714598827745,
      "loss": 3.0192,
      "step": 98769
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.0084946155548096,
      "learning_rate": 0.0003666531576705641,
      "loss": 2.9992,
      "step": 98770
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1165428161621094,
      "learning_rate": 0.00036664916934045874,
      "loss": 2.8628,
      "step": 98771
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.6278553009033203,
      "learning_rate": 0.00036664518099796203,
      "loss": 3.3077,
      "step": 98772
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.8223612308502197,
      "learning_rate": 0.00036664119264307463,
      "loss": 3.089,
      "step": 98773
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.103253126144409,
      "learning_rate": 0.0003666372042757974,
      "loss": 2.8709,
      "step": 98774
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.739062786102295,
      "learning_rate": 0.00036663321589613116,
      "loss": 3.1136,
      "step": 98775
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1610329151153564,
      "learning_rate": 0.0003666292275040764,
      "loss": 3.1569,
      "step": 98776
    },
    {
      "epoch": 1.29,
      "grad_norm": 4.033672332763672,
      "learning_rate": 0.00036662523909963406,
      "loss": 3.0268,
      "step": 98777
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1248834133148193,
      "learning_rate": 0.0003666212506828049,
      "loss": 2.981,
      "step": 98778
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.483494997024536,
      "learning_rate": 0.0003666172622535895,
      "loss": 3.0858,
      "step": 98779
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9879189729690552,
      "learning_rate": 0.0003666132738119887,
      "loss": 2.9773,
      "step": 98780
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.183767795562744,
      "learning_rate": 0.00036660928535800334,
      "loss": 2.9506,
      "step": 98781
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7535399198532104,
      "learning_rate": 0.00036660529689163394,
      "loss": 3.074,
      "step": 98782
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0000243186950684,
      "learning_rate": 0.0003666013084128814,
      "loss": 3.1454,
      "step": 98783
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1321029663085938,
      "learning_rate": 0.00036659731992174646,
      "loss": 2.7553,
      "step": 98784
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0904576778411865,
      "learning_rate": 0.0003665933314182298,
      "loss": 3.1438,
      "step": 98785
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.191545248031616,
      "learning_rate": 0.0003665893429023321,
      "loss": 3.4934,
      "step": 98786
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6602766513824463,
      "learning_rate": 0.0003665853543740543,
      "loss": 3.1901,
      "step": 98787
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5231962203979492,
      "learning_rate": 0.00036658136583339693,
      "loss": 2.8137,
      "step": 98788
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7574691772460938,
      "learning_rate": 0.0003665773772803609,
      "loss": 2.8926,
      "step": 98789
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.739659309387207,
      "learning_rate": 0.0003665733887149468,
      "loss": 2.938,
      "step": 98790
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7099608182907104,
      "learning_rate": 0.0003665694001371556,
      "loss": 3.1485,
      "step": 98791
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9371038675308228,
      "learning_rate": 0.0003665654115469877,
      "loss": 2.9647,
      "step": 98792
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2902612686157227,
      "learning_rate": 0.0003665614229444441,
      "loss": 2.9693,
      "step": 98793
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.4405758380889893,
      "learning_rate": 0.00036655743432952557,
      "loss": 2.9387,
      "step": 98794
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.727895975112915,
      "learning_rate": 0.00036655344570223265,
      "loss": 2.9415,
      "step": 98795
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0148890018463135,
      "learning_rate": 0.0003665494570625662,
      "loss": 3.0784,
      "step": 98796
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6373741626739502,
      "learning_rate": 0.000366545468410527,
      "loss": 3.0746,
      "step": 98797
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5855859518051147,
      "learning_rate": 0.00036654147974611573,
      "loss": 3.3256,
      "step": 98798
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1860427856445312,
      "learning_rate": 0.00036653749106933315,
      "loss": 3.0757,
      "step": 98799
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5830751657485962,
      "learning_rate": 0.00036653350238018,
      "loss": 2.7673,
      "step": 98800
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0839619636535645,
      "learning_rate": 0.000366529513678657,
      "loss": 2.959,
      "step": 98801
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.751232147216797,
      "learning_rate": 0.0003665255249647649,
      "loss": 2.9021,
      "step": 98802
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8120959997177124,
      "learning_rate": 0.0003665215362385044,
      "loss": 2.9347,
      "step": 98803
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7184523344039917,
      "learning_rate": 0.00036651754749987634,
      "loss": 2.9675,
      "step": 98804
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7667258977890015,
      "learning_rate": 0.00036651355874888137,
      "loss": 2.9302,
      "step": 98805
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.7661936283111572,
      "learning_rate": 0.00036650956998552037,
      "loss": 3.0415,
      "step": 98806
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9368455410003662,
      "learning_rate": 0.0003665055812097939,
      "loss": 2.9943,
      "step": 98807
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7112482786178589,
      "learning_rate": 0.0003665015924217028,
      "loss": 3.1426,
      "step": 98808
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.9257731437683105,
      "learning_rate": 0.0003664976036212478,
      "loss": 2.9744,
      "step": 98809
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.225465774536133,
      "learning_rate": 0.0003664936148084297,
      "loss": 2.9375,
      "step": 98810
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7748973369598389,
      "learning_rate": 0.00036648962598324913,
      "loss": 2.8764,
      "step": 98811
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7436445951461792,
      "learning_rate": 0.00036648563714570685,
      "loss": 3.3855,
      "step": 98812
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.523125171661377,
      "learning_rate": 0.0003664816482958037,
      "loss": 3.0002,
      "step": 98813
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.528808355331421,
      "learning_rate": 0.0003664776594335403,
      "loss": 3.0413,
      "step": 98814
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.557726263999939,
      "learning_rate": 0.0003664736705589175,
      "loss": 2.9396,
      "step": 98815
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8535405397415161,
      "learning_rate": 0.00036646968167193593,
      "loss": 2.9308,
      "step": 98816
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7656577825546265,
      "learning_rate": 0.0003664656927725965,
      "loss": 3.1834,
      "step": 98817
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.795487880706787,
      "learning_rate": 0.00036646170386089974,
      "loss": 3.0752,
      "step": 98818
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.036545753479004,
      "learning_rate": 0.0003664577149368465,
      "loss": 3.2458,
      "step": 98819
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8524202108383179,
      "learning_rate": 0.00036645372600043754,
      "loss": 3.0745,
      "step": 98820
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8805232048034668,
      "learning_rate": 0.0003664497370516736,
      "loss": 3.0725,
      "step": 98821
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9992493391036987,
      "learning_rate": 0.0003664457480905554,
      "loss": 2.9441,
      "step": 98822
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.128675699234009,
      "learning_rate": 0.00036644175911708366,
      "loss": 2.9188,
      "step": 98823
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.4818830490112305,
      "learning_rate": 0.00036643777013125917,
      "loss": 2.7918,
      "step": 98824
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.5219404697418213,
      "learning_rate": 0.00036643378113308263,
      "loss": 2.9938,
      "step": 98825
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.754862666130066,
      "learning_rate": 0.0003664297921225547,
      "loss": 3.2005,
      "step": 98826
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.520012378692627,
      "learning_rate": 0.00036642580309967636,
      "loss": 2.9146,
      "step": 98827
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1532466411590576,
      "learning_rate": 0.0003664218140644482,
      "loss": 3.0477,
      "step": 98828
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.853294849395752,
      "learning_rate": 0.00036641782501687086,
      "loss": 2.9432,
      "step": 98829
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1740529537200928,
      "learning_rate": 0.0003664138359569453,
      "loss": 2.9503,
      "step": 98830
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.293468475341797,
      "learning_rate": 0.0003664098468846721,
      "loss": 3.005,
      "step": 98831
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.965126395225525,
      "learning_rate": 0.00036640585780005206,
      "loss": 2.8658,
      "step": 98832
    },
    {
      "epoch": 1.29,
      "grad_norm": 5.037775039672852,
      "learning_rate": 0.0003664018687030859,
      "loss": 2.9933,
      "step": 98833
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8447707891464233,
      "learning_rate": 0.00036639787959377447,
      "loss": 2.7848,
      "step": 98834
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9161320924758911,
      "learning_rate": 0.0003663938904721183,
      "loss": 3.0483,
      "step": 98835
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1310267448425293,
      "learning_rate": 0.0003663899013381183,
      "loss": 2.9638,
      "step": 98836
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0733559131622314,
      "learning_rate": 0.0003663859121917752,
      "loss": 3.1478,
      "step": 98837
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7359431982040405,
      "learning_rate": 0.0003663819230330897,
      "loss": 2.8871,
      "step": 98838
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2427282333374023,
      "learning_rate": 0.00036637793386206253,
      "loss": 3.1687,
      "step": 98839
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0832066535949707,
      "learning_rate": 0.0003663739446786944,
      "loss": 3.1025,
      "step": 98840
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.029620409011841,
      "learning_rate": 0.0003663699554829862,
      "loss": 3.0477,
      "step": 98841
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.781102180480957,
      "learning_rate": 0.0003663659662749385,
      "loss": 3.0686,
      "step": 98842
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7744569778442383,
      "learning_rate": 0.0003663619770545521,
      "loss": 3.1532,
      "step": 98843
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.143798828125,
      "learning_rate": 0.00036635798782182774,
      "loss": 3.0656,
      "step": 98844
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5557359457015991,
      "learning_rate": 0.0003663539985767663,
      "loss": 3.1407,
      "step": 98845
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7166216373443604,
      "learning_rate": 0.00036635000931936834,
      "loss": 3.1247,
      "step": 98846
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3086471557617188,
      "learning_rate": 0.00036634602004963457,
      "loss": 2.8656,
      "step": 98847
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.759727716445923,
      "learning_rate": 0.000366342030767566,
      "loss": 3.0371,
      "step": 98848
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2210280895233154,
      "learning_rate": 0.0003663380414731631,
      "loss": 3.0581,
      "step": 98849
    },
    {
      "epoch": 1.29,
      "grad_norm": 4.655584335327148,
      "learning_rate": 0.0003663340521664266,
      "loss": 3.0302,
      "step": 98850
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7213878631591797,
      "learning_rate": 0.0003663300628473575,
      "loss": 3.1189,
      "step": 98851
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8797470331192017,
      "learning_rate": 0.00036632607351595633,
      "loss": 2.8925,
      "step": 98852
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.66922926902771,
      "learning_rate": 0.00036632208417222386,
      "loss": 3.1073,
      "step": 98853
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5476149320602417,
      "learning_rate": 0.0003663180948161609,
      "loss": 3.0522,
      "step": 98854
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1454238891601562,
      "learning_rate": 0.0003663141054477682,
      "loss": 2.8162,
      "step": 98855
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7275073528289795,
      "learning_rate": 0.0003663101160670464,
      "loss": 2.954,
      "step": 98856
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.964733600616455,
      "learning_rate": 0.00036630612667399627,
      "loss": 2.9672,
      "step": 98857
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5273547172546387,
      "learning_rate": 0.00036630213726861866,
      "loss": 3.0072,
      "step": 98858
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7464863061904907,
      "learning_rate": 0.00036629814785091416,
      "loss": 3.1121,
      "step": 98859
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7808432579040527,
      "learning_rate": 0.0003662941584208836,
      "loss": 3.0404,
      "step": 98860
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.136875629425049,
      "learning_rate": 0.00036629016897852775,
      "loss": 3.0553,
      "step": 98861
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2706918716430664,
      "learning_rate": 0.00036628617952384724,
      "loss": 3.1647,
      "step": 98862
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2801883220672607,
      "learning_rate": 0.0003662821900568429,
      "loss": 2.8129,
      "step": 98863
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9512227773666382,
      "learning_rate": 0.0003662782005775155,
      "loss": 2.9732,
      "step": 98864
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.5014843940734863,
      "learning_rate": 0.0003662742110858657,
      "loss": 3.0812,
      "step": 98865
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7138596773147583,
      "learning_rate": 0.00036627022158189425,
      "loss": 3.1396,
      "step": 98866
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0775387287139893,
      "learning_rate": 0.0003662662320656019,
      "loss": 3.0235,
      "step": 98867
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9338352680206299,
      "learning_rate": 0.0003662622425369894,
      "loss": 3.0735,
      "step": 98868
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.065925359725952,
      "learning_rate": 0.00036625825299605754,
      "loss": 3.1264,
      "step": 98869
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.412839412689209,
      "learning_rate": 0.00036625426344280706,
      "loss": 2.9231,
      "step": 98870
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.049426555633545,
      "learning_rate": 0.00036625027387723864,
      "loss": 2.9645,
      "step": 98871
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6454464197158813,
      "learning_rate": 0.00036624628429935297,
      "loss": 2.916,
      "step": 98872
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6712477207183838,
      "learning_rate": 0.000366242294709151,
      "loss": 3.0519,
      "step": 98873
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7644745111465454,
      "learning_rate": 0.00036623830510663317,
      "loss": 3.0913,
      "step": 98874
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7292765378952026,
      "learning_rate": 0.00036623431549180044,
      "loss": 3.0906,
      "step": 98875
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.867159366607666,
      "learning_rate": 0.0003662303258646536,
      "loss": 2.7371,
      "step": 98876
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6704950332641602,
      "learning_rate": 0.0003662263362251932,
      "loss": 3.1449,
      "step": 98877
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9491490125656128,
      "learning_rate": 0.00036622234657342006,
      "loss": 3.0566,
      "step": 98878
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2667388916015625,
      "learning_rate": 0.000366218356909335,
      "loss": 3.1686,
      "step": 98879
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.084864854812622,
      "learning_rate": 0.0003662143672329387,
      "loss": 2.7987,
      "step": 98880
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.124081611633301,
      "learning_rate": 0.00036621037754423183,
      "loss": 3.0924,
      "step": 98881
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.078752040863037,
      "learning_rate": 0.0003662063878432153,
      "loss": 2.8944,
      "step": 98882
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8834155797958374,
      "learning_rate": 0.0003662023981298896,
      "loss": 3.0376,
      "step": 98883
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7487069368362427,
      "learning_rate": 0.00036619840840425576,
      "loss": 3.0577,
      "step": 98884
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.640455722808838,
      "learning_rate": 0.00036619441866631437,
      "loss": 3.4487,
      "step": 98885
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6961671113967896,
      "learning_rate": 0.00036619042891606615,
      "loss": 2.9812,
      "step": 98886
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0134479999542236,
      "learning_rate": 0.00036618643915351185,
      "loss": 2.6303,
      "step": 98887
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5239840745925903,
      "learning_rate": 0.0003661824493786523,
      "loss": 2.8921,
      "step": 98888
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5404597520828247,
      "learning_rate": 0.0003661784595914882,
      "loss": 2.941,
      "step": 98889
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9452192783355713,
      "learning_rate": 0.0003661744697920202,
      "loss": 3.1296,
      "step": 98890
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.9016668796539307,
      "learning_rate": 0.0003661704799802492,
      "loss": 2.8883,
      "step": 98891
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.59624183177948,
      "learning_rate": 0.0003661664901561758,
      "loss": 2.9573,
      "step": 98892
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.831634521484375,
      "learning_rate": 0.0003661625003198008,
      "loss": 3.0007,
      "step": 98893
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7269636392593384,
      "learning_rate": 0.00036615851047112495,
      "loss": 3.0005,
      "step": 98894
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.767439365386963,
      "learning_rate": 0.000366154520610149,
      "loss": 2.9057,
      "step": 98895
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6407910585403442,
      "learning_rate": 0.00036615053073687365,
      "loss": 2.8887,
      "step": 98896
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7383617162704468,
      "learning_rate": 0.00036614654085129965,
      "loss": 3.2314,
      "step": 98897
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8502293825149536,
      "learning_rate": 0.00036614255095342786,
      "loss": 2.9684,
      "step": 98898
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5331645011901855,
      "learning_rate": 0.00036613856104325884,
      "loss": 3.1586,
      "step": 98899
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.220353603363037,
      "learning_rate": 0.00036613457112079345,
      "loss": 3.1547,
      "step": 98900
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.628899335861206,
      "learning_rate": 0.0003661305811860324,
      "loss": 2.9764,
      "step": 98901
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.936967611312866,
      "learning_rate": 0.0003661265912389764,
      "loss": 2.7944,
      "step": 98902
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.155660629272461,
      "learning_rate": 0.00036612260127962616,
      "loss": 2.8399,
      "step": 98903
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7640104293823242,
      "learning_rate": 0.0003661186113079825,
      "loss": 2.9605,
      "step": 98904
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2127792835235596,
      "learning_rate": 0.00036611462132404626,
      "loss": 2.9603,
      "step": 98905
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7872575521469116,
      "learning_rate": 0.000366110631327818,
      "loss": 3.1157,
      "step": 98906
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9052767753601074,
      "learning_rate": 0.00036610664131929844,
      "loss": 2.9095,
      "step": 98907
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0174338817596436,
      "learning_rate": 0.00036610265129848843,
      "loss": 2.9929,
      "step": 98908
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8744014501571655,
      "learning_rate": 0.00036609866126538883,
      "loss": 3.1445,
      "step": 98909
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8606255054473877,
      "learning_rate": 0.00036609467122000007,
      "loss": 3.0583,
      "step": 98910
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.847821831703186,
      "learning_rate": 0.0003660906811623232,
      "loss": 3.0901,
      "step": 98911
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7439101934432983,
      "learning_rate": 0.0003660866910923588,
      "loss": 2.7969,
      "step": 98912
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.710524559020996,
      "learning_rate": 0.0003660827010101076,
      "loss": 3.0858,
      "step": 98913
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5983470678329468,
      "learning_rate": 0.0003660787109155704,
      "loss": 2.9911,
      "step": 98914
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.592789888381958,
      "learning_rate": 0.00036607472080874785,
      "loss": 2.9432,
      "step": 98915
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8283565044403076,
      "learning_rate": 0.0003660707306896408,
      "loss": 3.1632,
      "step": 98916
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.569152593612671,
      "learning_rate": 0.00036606674055825,
      "loss": 2.7587,
      "step": 98917
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.870361566543579,
      "learning_rate": 0.00036606275041457613,
      "loss": 2.9644,
      "step": 98918
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7267658710479736,
      "learning_rate": 0.00036605876025861994,
      "loss": 2.9875,
      "step": 98919
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.4056739807128906,
      "learning_rate": 0.00036605477009038213,
      "loss": 3.0372,
      "step": 98920
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8779979944229126,
      "learning_rate": 0.0003660507799098635,
      "loss": 2.975,
      "step": 98921
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.178684711456299,
      "learning_rate": 0.0003660467897170649,
      "loss": 3.1573,
      "step": 98922
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7990202903747559,
      "learning_rate": 0.0003660427995119868,
      "loss": 3.1766,
      "step": 98923
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.770418643951416,
      "learning_rate": 0.0003660388092946302,
      "loss": 2.9824,
      "step": 98924
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8173328638076782,
      "learning_rate": 0.00036603481906499573,
      "loss": 3.0398,
      "step": 98925
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6975562572479248,
      "learning_rate": 0.0003660308288230841,
      "loss": 3.207,
      "step": 98926
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8726825714111328,
      "learning_rate": 0.00036602683856889605,
      "loss": 2.9359,
      "step": 98927
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.721649408340454,
      "learning_rate": 0.00036602284830243245,
      "loss": 2.9697,
      "step": 98928
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7971582412719727,
      "learning_rate": 0.000366018858023694,
      "loss": 2.871,
      "step": 98929
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.056950569152832,
      "learning_rate": 0.0003660148677326813,
      "loss": 2.941,
      "step": 98930
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.870751976966858,
      "learning_rate": 0.00036601087742939525,
      "loss": 3.0307,
      "step": 98931
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.859761118888855,
      "learning_rate": 0.00036600688711383643,
      "loss": 3.007,
      "step": 98932
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.844774842262268,
      "learning_rate": 0.00036600289678600574,
      "loss": 2.6969,
      "step": 98933
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8290945291519165,
      "learning_rate": 0.00036599890644590393,
      "loss": 2.924,
      "step": 98934
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6694892644882202,
      "learning_rate": 0.0003659949160935316,
      "loss": 2.8907,
      "step": 98935
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6777825355529785,
      "learning_rate": 0.00036599092572888966,
      "loss": 2.9903,
      "step": 98936
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.988065242767334,
      "learning_rate": 0.00036598693535197865,
      "loss": 2.95,
      "step": 98937
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5401021242141724,
      "learning_rate": 0.0003659829449627995,
      "loss": 3.2732,
      "step": 98938
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0511062145233154,
      "learning_rate": 0.0003659789545613528,
      "loss": 2.9204,
      "step": 98939
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.4465410709381104,
      "learning_rate": 0.00036597496414763947,
      "loss": 3.102,
      "step": 98940
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9510174989700317,
      "learning_rate": 0.0003659709737216601,
      "loss": 2.6161,
      "step": 98941
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.200587749481201,
      "learning_rate": 0.0003659669832834154,
      "loss": 3.1936,
      "step": 98942
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9974511861801147,
      "learning_rate": 0.00036596299283290633,
      "loss": 2.9419,
      "step": 98943
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.803464889526367,
      "learning_rate": 0.00036595900237013345,
      "loss": 3.0798,
      "step": 98944
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9736748933792114,
      "learning_rate": 0.00036595501189509743,
      "loss": 3.0836,
      "step": 98945
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9140887260437012,
      "learning_rate": 0.0003659510214077993,
      "loss": 2.9456,
      "step": 98946
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5446456670761108,
      "learning_rate": 0.0003659470309082395,
      "loss": 2.9433,
      "step": 98947
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.7358510494232178,
      "learning_rate": 0.000365943040396419,
      "loss": 2.8899,
      "step": 98948
    },
    {
      "epoch": 1.29,
      "grad_norm": 4.649494647979736,
      "learning_rate": 0.00036593904987233843,
      "loss": 2.8411,
      "step": 98949
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2028627395629883,
      "learning_rate": 0.00036593505933599853,
      "loss": 3.0608,
      "step": 98950
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9783883094787598,
      "learning_rate": 0.00036593106878740004,
      "loss": 3.065,
      "step": 98951
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2072460651397705,
      "learning_rate": 0.00036592707822654373,
      "loss": 3.1041,
      "step": 98952
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.166206121444702,
      "learning_rate": 0.0003659230876534303,
      "loss": 2.7456,
      "step": 98953
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8633610010147095,
      "learning_rate": 0.00036591909706806054,
      "loss": 2.8257,
      "step": 98954
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1406452655792236,
      "learning_rate": 0.0003659151064704352,
      "loss": 2.9062,
      "step": 98955
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9945690631866455,
      "learning_rate": 0.000365911115860555,
      "loss": 3.0955,
      "step": 98956
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.224046468734741,
      "learning_rate": 0.00036590712523842066,
      "loss": 2.999,
      "step": 98957
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.835196614265442,
      "learning_rate": 0.0003659031346040329,
      "loss": 2.8724,
      "step": 98958
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9276764392852783,
      "learning_rate": 0.0003658991439573926,
      "loss": 3.1201,
      "step": 98959
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0561211109161377,
      "learning_rate": 0.0003658951532985003,
      "loss": 3.1174,
      "step": 98960
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5493906736373901,
      "learning_rate": 0.00036589116262735697,
      "loss": 3.3235,
      "step": 98961
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3545072078704834,
      "learning_rate": 0.0003658871719439631,
      "loss": 2.9552,
      "step": 98962
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7472831010818481,
      "learning_rate": 0.00036588318124831964,
      "loss": 3.1451,
      "step": 98963
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.697641134262085,
      "learning_rate": 0.0003658791905404273,
      "loss": 2.9404,
      "step": 98964
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0064613819122314,
      "learning_rate": 0.00036587519982028666,
      "loss": 3.0169,
      "step": 98965
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6046473979949951,
      "learning_rate": 0.00036587120908789856,
      "loss": 3.2284,
      "step": 98966
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6263905763626099,
      "learning_rate": 0.0003658672183432639,
      "loss": 3.2481,
      "step": 98967
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8984687328338623,
      "learning_rate": 0.00036586322758638313,
      "loss": 2.9869,
      "step": 98968
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8660389184951782,
      "learning_rate": 0.0003658592368172572,
      "loss": 2.7874,
      "step": 98969
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.634758472442627,
      "learning_rate": 0.00036585524603588687,
      "loss": 3.0703,
      "step": 98970
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.5200014114379883,
      "learning_rate": 0.0003658512552422728,
      "loss": 3.1718,
      "step": 98971
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.6546711921691895,
      "learning_rate": 0.00036584726443641556,
      "loss": 2.8553,
      "step": 98972
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5299266576766968,
      "learning_rate": 0.0003658432736183163,
      "loss": 3.0687,
      "step": 98973
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.083136558532715,
      "learning_rate": 0.0003658392827879754,
      "loss": 3.1868,
      "step": 98974
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.477581262588501,
      "learning_rate": 0.00036583529194539376,
      "loss": 2.8395,
      "step": 98975
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.616470456123352,
      "learning_rate": 0.0003658313010905721,
      "loss": 2.9504,
      "step": 98976
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2350449562072754,
      "learning_rate": 0.00036582731022351115,
      "loss": 2.8889,
      "step": 98977
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7977337837219238,
      "learning_rate": 0.00036582331934421165,
      "loss": 2.7735,
      "step": 98978
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.831749677658081,
      "learning_rate": 0.00036581932845267443,
      "loss": 3.0504,
      "step": 98979
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7008436918258667,
      "learning_rate": 0.00036581533754890004,
      "loss": 2.888,
      "step": 98980
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.4728689193725586,
      "learning_rate": 0.00036581134663288945,
      "loss": 2.8133,
      "step": 98981
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8354105949401855,
      "learning_rate": 0.00036580735570464315,
      "loss": 3.0999,
      "step": 98982
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.129760503768921,
      "learning_rate": 0.0003658033647641621,
      "loss": 2.9274,
      "step": 98983
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8480767011642456,
      "learning_rate": 0.00036579937381144706,
      "loss": 2.9848,
      "step": 98984
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.018994092941284,
      "learning_rate": 0.0003657953828464985,
      "loss": 3.1478,
      "step": 98985
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9052672386169434,
      "learning_rate": 0.00036579139186931745,
      "loss": 2.8367,
      "step": 98986
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3975670337677,
      "learning_rate": 0.0003657874008799045,
      "loss": 2.9141,
      "step": 98987
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.03359317779541,
      "learning_rate": 0.0003657834098782604,
      "loss": 3.061,
      "step": 98988
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0130455493927,
      "learning_rate": 0.00036577941886438593,
      "loss": 2.7376,
      "step": 98989
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2674920558929443,
      "learning_rate": 0.0003657754278382819,
      "loss": 3.2721,
      "step": 98990
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.8346195220947266,
      "learning_rate": 0.00036577143679994894,
      "loss": 3.0046,
      "step": 98991
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2403206825256348,
      "learning_rate": 0.0003657674457493878,
      "loss": 2.9134,
      "step": 98992
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7093616724014282,
      "learning_rate": 0.00036576345468659926,
      "loss": 2.586,
      "step": 98993
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.590740919113159,
      "learning_rate": 0.000365759463611584,
      "loss": 3.1013,
      "step": 98994
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.7473831176757812,
      "learning_rate": 0.0003657554725243429,
      "loss": 3.0124,
      "step": 98995
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6127026081085205,
      "learning_rate": 0.00036575148142487657,
      "loss": 3.0672,
      "step": 98996
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.889430046081543,
      "learning_rate": 0.00036574749031318587,
      "loss": 2.8101,
      "step": 98997
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.729320526123047,
      "learning_rate": 0.0003657434991892714,
      "loss": 2.6126,
      "step": 98998
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.698125958442688,
      "learning_rate": 0.0003657395080531339,
      "loss": 3.2281,
      "step": 98999
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6911381483078003,
      "learning_rate": 0.00036573551690477436,
      "loss": 2.8831,
      "step": 99000
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.63411545753479,
      "learning_rate": 0.00036573152574419324,
      "loss": 2.7743,
      "step": 99001
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.812809705734253,
      "learning_rate": 0.0003657275345713914,
      "loss": 2.8749,
      "step": 99002
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.6963107585906982,
      "learning_rate": 0.0003657235433863696,
      "loss": 3.0789,
      "step": 99003
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6017736196517944,
      "learning_rate": 0.00036571955218912854,
      "loss": 2.7581,
      "step": 99004
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.820549249649048,
      "learning_rate": 0.00036571556097966893,
      "loss": 3.0374,
      "step": 99005
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9064879417419434,
      "learning_rate": 0.0003657115697579916,
      "loss": 3.023,
      "step": 99006
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7547885179519653,
      "learning_rate": 0.0003657075785240973,
      "loss": 3.0808,
      "step": 99007
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6276204586029053,
      "learning_rate": 0.00036570358727798665,
      "loss": 3.1085,
      "step": 99008
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2728023529052734,
      "learning_rate": 0.0003656995960196604,
      "loss": 2.9445,
      "step": 99009
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8071545362472534,
      "learning_rate": 0.00036569560474911946,
      "loss": 3.0227,
      "step": 99010
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.2517471313476562,
      "learning_rate": 0.0003656916134663644,
      "loss": 2.8311,
      "step": 99011
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.592384934425354,
      "learning_rate": 0.0003656876221713961,
      "loss": 2.8954,
      "step": 99012
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.2732369899749756,
      "learning_rate": 0.0003656836308642153,
      "loss": 3.0359,
      "step": 99013
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.268765449523926,
      "learning_rate": 0.0003656796395448225,
      "loss": 3.0927,
      "step": 99014
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.995452880859375,
      "learning_rate": 0.00036567564821321865,
      "loss": 2.9122,
      "step": 99015
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7066930532455444,
      "learning_rate": 0.00036567165686940455,
      "loss": 3.017,
      "step": 99016
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.476416826248169,
      "learning_rate": 0.0003656676655133808,
      "loss": 3.0235,
      "step": 99017
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.563486337661743,
      "learning_rate": 0.00036566367414514814,
      "loss": 3.0182,
      "step": 99018
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6342560052871704,
      "learning_rate": 0.0003656596827647075,
      "loss": 2.9293,
      "step": 99019
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7198777198791504,
      "learning_rate": 0.00036565569137205937,
      "loss": 3.2286,
      "step": 99020
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.312413215637207,
      "learning_rate": 0.0003656516999672046,
      "loss": 3.0318,
      "step": 99021
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0569002628326416,
      "learning_rate": 0.000365647708550144,
      "loss": 2.602,
      "step": 99022
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.658707857131958,
      "learning_rate": 0.00036564371712087816,
      "loss": 2.8847,
      "step": 99023
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8891295194625854,
      "learning_rate": 0.00036563972567940796,
      "loss": 3.1255,
      "step": 99024
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7823067903518677,
      "learning_rate": 0.0003656357342257342,
      "loss": 3.0418,
      "step": 99025
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8274999856948853,
      "learning_rate": 0.00036563174275985736,
      "loss": 3.0054,
      "step": 99026
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8929624557495117,
      "learning_rate": 0.0003656277512817784,
      "loss": 3.2458,
      "step": 99027
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0785794258117676,
      "learning_rate": 0.0003656237597914981,
      "loss": 2.8406,
      "step": 99028
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.860616683959961,
      "learning_rate": 0.000365619768289017,
      "loss": 2.9784,
      "step": 99029
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.088778495788574,
      "learning_rate": 0.00036561577677433595,
      "loss": 2.8298,
      "step": 99030
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8317264318466187,
      "learning_rate": 0.0003656117852474557,
      "loss": 3.1188,
      "step": 99031
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6861140727996826,
      "learning_rate": 0.000365607793708377,
      "loss": 2.7745,
      "step": 99032
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7567557096481323,
      "learning_rate": 0.0003656038021571005,
      "loss": 2.8564,
      "step": 99033
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.455033302307129,
      "learning_rate": 0.00036559981059362714,
      "loss": 3.0749,
      "step": 99034
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.663534164428711,
      "learning_rate": 0.00036559581901795743,
      "loss": 2.9451,
      "step": 99035
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8478238582611084,
      "learning_rate": 0.00036559182743009223,
      "loss": 2.9623,
      "step": 99036
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6164380311965942,
      "learning_rate": 0.00036558783583003236,
      "loss": 3.0177,
      "step": 99037
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.551129937171936,
      "learning_rate": 0.00036558384421777837,
      "loss": 3.1021,
      "step": 99038
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6966118812561035,
      "learning_rate": 0.00036557985259333116,
      "loss": 2.8399,
      "step": 99039
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.857377767562866,
      "learning_rate": 0.00036557586095669145,
      "loss": 2.8091,
      "step": 99040
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2121191024780273,
      "learning_rate": 0.00036557186930785983,
      "loss": 3.1807,
      "step": 99041
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9553735256195068,
      "learning_rate": 0.0003655678776468373,
      "loss": 3.0914,
      "step": 99042
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1170341968536377,
      "learning_rate": 0.00036556388597362444,
      "loss": 2.9464,
      "step": 99043
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8120293617248535,
      "learning_rate": 0.0003655598942882219,
      "loss": 2.9837,
      "step": 99044
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6909515857696533,
      "learning_rate": 0.0003655559025906306,
      "loss": 3.1795,
      "step": 99045
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5569053888320923,
      "learning_rate": 0.0003655519108808513,
      "loss": 2.9021,
      "step": 99046
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6487269401550293,
      "learning_rate": 0.0003655479191588845,
      "loss": 3.2984,
      "step": 99047
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.6631369590759277,
      "learning_rate": 0.0003655439274247312,
      "loss": 3.0974,
      "step": 99048
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.182051420211792,
      "learning_rate": 0.0003655399356783921,
      "loss": 3.133,
      "step": 99049
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6654596328735352,
      "learning_rate": 0.00036553594391986784,
      "loss": 2.9845,
      "step": 99050
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8809289932250977,
      "learning_rate": 0.0003655319521491592,
      "loss": 2.8237,
      "step": 99051
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8269102573394775,
      "learning_rate": 0.00036552796036626696,
      "loss": 3.2095,
      "step": 99052
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.4531199932098389,
      "learning_rate": 0.0003655239685711918,
      "loss": 2.7302,
      "step": 99053
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6568975448608398,
      "learning_rate": 0.00036551997676393446,
      "loss": 3.0463,
      "step": 99054
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8045905828475952,
      "learning_rate": 0.0003655159849444958,
      "loss": 3.2323,
      "step": 99055
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0371885299682617,
      "learning_rate": 0.0003655119931128765,
      "loss": 2.9462,
      "step": 99056
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.4470133781433105,
      "learning_rate": 0.0003655080012690772,
      "loss": 2.7274,
      "step": 99057
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6187705993652344,
      "learning_rate": 0.0003655040094130987,
      "loss": 3.0612,
      "step": 99058
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8387130498886108,
      "learning_rate": 0.0003655000175449419,
      "loss": 3.132,
      "step": 99059
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.163466215133667,
      "learning_rate": 0.00036549602566460736,
      "loss": 2.8315,
      "step": 99060
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.651132583618164,
      "learning_rate": 0.00036549203377209583,
      "loss": 3.1663,
      "step": 99061
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8832786083221436,
      "learning_rate": 0.00036548804186740816,
      "loss": 3.2585,
      "step": 99062
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5525844097137451,
      "learning_rate": 0.00036548404995054496,
      "loss": 2.8678,
      "step": 99063
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.4043633937835693,
      "learning_rate": 0.00036548005802150704,
      "loss": 2.9994,
      "step": 99064
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9665724039077759,
      "learning_rate": 0.0003654760660802952,
      "loss": 3.043,
      "step": 99065
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0021965503692627,
      "learning_rate": 0.0003654720741269101,
      "loss": 2.972,
      "step": 99066
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.710232138633728,
      "learning_rate": 0.00036546808216135246,
      "loss": 2.6781,
      "step": 99067
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.662429928779602,
      "learning_rate": 0.0003654640901836231,
      "loss": 3.0809,
      "step": 99068
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.709519624710083,
      "learning_rate": 0.00036546009819372275,
      "loss": 3.0299,
      "step": 99069
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.5379207134246826,
      "learning_rate": 0.00036545610619165205,
      "loss": 3.1108,
      "step": 99070
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9985755681991577,
      "learning_rate": 0.00036545211417741193,
      "loss": 3.0684,
      "step": 99071
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9291187524795532,
      "learning_rate": 0.000365448122151003,
      "loss": 3.1145,
      "step": 99072
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0018856525421143,
      "learning_rate": 0.000365444130112426,
      "loss": 2.9682,
      "step": 99073
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5514941215515137,
      "learning_rate": 0.0003654401380616818,
      "loss": 2.8137,
      "step": 99074
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9682533740997314,
      "learning_rate": 0.00036543614599877087,
      "loss": 3.1957,
      "step": 99075
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0363922119140625,
      "learning_rate": 0.0003654321539236942,
      "loss": 3.3131,
      "step": 99076
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.608174443244934,
      "learning_rate": 0.00036542816183645244,
      "loss": 3.0489,
      "step": 99077
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8992856740951538,
      "learning_rate": 0.00036542416973704646,
      "loss": 3.3351,
      "step": 99078
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0983898639678955,
      "learning_rate": 0.0003654201776254768,
      "loss": 2.8426,
      "step": 99079
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.507479667663574,
      "learning_rate": 0.0003654161855017443,
      "loss": 3.0717,
      "step": 99080
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5671062469482422,
      "learning_rate": 0.00036541219336584966,
      "loss": 3.0361,
      "step": 99081
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.8269522190093994,
      "learning_rate": 0.0003654082012177938,
      "loss": 2.9292,
      "step": 99082
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.93391752243042,
      "learning_rate": 0.0003654042090575772,
      "loss": 2.8112,
      "step": 99083
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8209153413772583,
      "learning_rate": 0.0003654002168852007,
      "loss": 2.9665,
      "step": 99084
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8050570487976074,
      "learning_rate": 0.0003653962247006651,
      "loss": 3.0132,
      "step": 99085
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.770429015159607,
      "learning_rate": 0.00036539223250397113,
      "loss": 3.1442,
      "step": 99086
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.5092976093292236,
      "learning_rate": 0.0003653882402951195,
      "loss": 3.1435,
      "step": 99087
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.5730433464050293,
      "learning_rate": 0.0003653842480741109,
      "loss": 2.8261,
      "step": 99088
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.01870059967041,
      "learning_rate": 0.00036538025584094625,
      "loss": 3.1006,
      "step": 99089
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.257777214050293,
      "learning_rate": 0.00036537626359562606,
      "loss": 3.1322,
      "step": 99090
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.0711848735809326,
      "learning_rate": 0.00036537227133815126,
      "loss": 2.8764,
      "step": 99091
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.189779281616211,
      "learning_rate": 0.00036536827906852255,
      "loss": 3.2316,
      "step": 99092
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7777070999145508,
      "learning_rate": 0.0003653642867867406,
      "loss": 3.1357,
      "step": 99093
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9486217498779297,
      "learning_rate": 0.0003653602944928062,
      "loss": 3.0058,
      "step": 99094
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.288714647293091,
      "learning_rate": 0.0003653563021867201,
      "loss": 3.0618,
      "step": 99095
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.212465763092041,
      "learning_rate": 0.00036535230986848297,
      "loss": 2.7712,
      "step": 99096
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9416855573654175,
      "learning_rate": 0.00036534831753809563,
      "loss": 2.8761,
      "step": 99097
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3535094261169434,
      "learning_rate": 0.0003653443251955589,
      "loss": 3.1502,
      "step": 99098
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.106907844543457,
      "learning_rate": 0.0003653403328408733,
      "loss": 2.9859,
      "step": 99099
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.775111436843872,
      "learning_rate": 0.00036533634047403976,
      "loss": 2.8811,
      "step": 99100
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6971826553344727,
      "learning_rate": 0.00036533234809505897,
      "loss": 2.9472,
      "step": 99101
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5633161067962646,
      "learning_rate": 0.00036532835570393164,
      "loss": 3.1268,
      "step": 99102
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1048548221588135,
      "learning_rate": 0.00036532436330065853,
      "loss": 3.042,
      "step": 99103
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9860602617263794,
      "learning_rate": 0.00036532037088524047,
      "loss": 2.8236,
      "step": 99104
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0741007328033447,
      "learning_rate": 0.000365316378457678,
      "loss": 3.091,
      "step": 99105
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.166203498840332,
      "learning_rate": 0.00036531238601797205,
      "loss": 2.9593,
      "step": 99106
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.662062168121338,
      "learning_rate": 0.00036530839356612333,
      "loss": 3.0284,
      "step": 99107
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.048546075820923,
      "learning_rate": 0.00036530440110213246,
      "loss": 2.8908,
      "step": 99108
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0941720008850098,
      "learning_rate": 0.00036530040862600027,
      "loss": 3.1485,
      "step": 99109
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.4659855365753174,
      "learning_rate": 0.00036529641613772756,
      "loss": 3.0864,
      "step": 99110
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.273348569869995,
      "learning_rate": 0.00036529242363731503,
      "loss": 3.0546,
      "step": 99111
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8575739860534668,
      "learning_rate": 0.0003652884311247633,
      "loss": 3.0139,
      "step": 99112
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7037029266357422,
      "learning_rate": 0.0003652844386000733,
      "loss": 3.2975,
      "step": 99113
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.439389228820801,
      "learning_rate": 0.0003652804460632457,
      "loss": 2.8292,
      "step": 99114
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0924603939056396,
      "learning_rate": 0.0003652764535142812,
      "loss": 2.9538,
      "step": 99115
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7131325006484985,
      "learning_rate": 0.0003652724609531806,
      "loss": 2.8657,
      "step": 99116
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9247249364852905,
      "learning_rate": 0.0003652684683799446,
      "loss": 2.9281,
      "step": 99117
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.460744857788086,
      "learning_rate": 0.00036526447579457395,
      "loss": 2.9008,
      "step": 99118
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8238816261291504,
      "learning_rate": 0.00036526048319706944,
      "loss": 2.8468,
      "step": 99119
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7614344358444214,
      "learning_rate": 0.0003652564905874317,
      "loss": 2.9607,
      "step": 99120
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5538737773895264,
      "learning_rate": 0.00036525249796566154,
      "loss": 3.1044,
      "step": 99121
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6807551383972168,
      "learning_rate": 0.0003652485053317598,
      "loss": 3.205,
      "step": 99122
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3758482933044434,
      "learning_rate": 0.0003652445126857271,
      "loss": 2.9325,
      "step": 99123
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7325104475021362,
      "learning_rate": 0.00036524052002756416,
      "loss": 2.8935,
      "step": 99124
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8898289203643799,
      "learning_rate": 0.00036523652735727185,
      "loss": 3.1251,
      "step": 99125
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.122126817703247,
      "learning_rate": 0.00036523253467485084,
      "loss": 2.9454,
      "step": 99126
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7331440448760986,
      "learning_rate": 0.0003652285419803019,
      "loss": 2.9406,
      "step": 99127
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.195580244064331,
      "learning_rate": 0.0003652245492736256,
      "loss": 2.9805,
      "step": 99128
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.300506830215454,
      "learning_rate": 0.00036522055655482293,
      "loss": 3.1533,
      "step": 99129
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.198782205581665,
      "learning_rate": 0.00036521656382389455,
      "loss": 2.9646,
      "step": 99130
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7086615562438965,
      "learning_rate": 0.0003652125710808411,
      "loss": 3.0746,
      "step": 99131
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.680797815322876,
      "learning_rate": 0.00036520857832566346,
      "loss": 3.0339,
      "step": 99132
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6266909837722778,
      "learning_rate": 0.00036520458555836226,
      "loss": 2.6045,
      "step": 99133
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9382272958755493,
      "learning_rate": 0.00036520059277893833,
      "loss": 3.0451,
      "step": 99134
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.7042078971862793,
      "learning_rate": 0.0003651965999873924,
      "loss": 2.915,
      "step": 99135
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7827223539352417,
      "learning_rate": 0.00036519260718372514,
      "loss": 3.1007,
      "step": 99136
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8605163097381592,
      "learning_rate": 0.00036518861436793734,
      "loss": 2.8085,
      "step": 99137
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7413278818130493,
      "learning_rate": 0.00036518462154002984,
      "loss": 2.9138,
      "step": 99138
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5551636219024658,
      "learning_rate": 0.00036518062870000324,
      "loss": 2.7816,
      "step": 99139
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.631252646446228,
      "learning_rate": 0.00036517663584785835,
      "loss": 2.9337,
      "step": 99140
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.9910008907318115,
      "learning_rate": 0.0003651726429835958,
      "loss": 2.8572,
      "step": 99141
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7650953531265259,
      "learning_rate": 0.0003651686501072165,
      "loss": 3.394,
      "step": 99142
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9445964097976685,
      "learning_rate": 0.00036516465721872114,
      "loss": 2.976,
      "step": 99143
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1506264209747314,
      "learning_rate": 0.00036516066431811046,
      "loss": 2.8221,
      "step": 99144
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7030361890792847,
      "learning_rate": 0.00036515667140538506,
      "loss": 3.0169,
      "step": 99145
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2394490242004395,
      "learning_rate": 0.0003651526784805459,
      "loss": 3.005,
      "step": 99146
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2570652961730957,
      "learning_rate": 0.0003651486855435936,
      "loss": 2.8471,
      "step": 99147
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7294323444366455,
      "learning_rate": 0.0003651446925945289,
      "loss": 3.1502,
      "step": 99148
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.624954104423523,
      "learning_rate": 0.00036514069963335257,
      "loss": 3.0039,
      "step": 99149
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.930283784866333,
      "learning_rate": 0.00036513670666006543,
      "loss": 3.037,
      "step": 99150
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0691707134246826,
      "learning_rate": 0.0003651327136746681,
      "loss": 3.0592,
      "step": 99151
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3913867473602295,
      "learning_rate": 0.00036512872067716133,
      "loss": 3.1113,
      "step": 99152
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.033714771270752,
      "learning_rate": 0.000365124727667546,
      "loss": 3.0437,
      "step": 99153
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.719724655151367,
      "learning_rate": 0.0003651207346458227,
      "loss": 2.9098,
      "step": 99154
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.6406641006469727,
      "learning_rate": 0.00036511674161199216,
      "loss": 3.151,
      "step": 99155
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0184218883514404,
      "learning_rate": 0.00036511274856605534,
      "loss": 3.0116,
      "step": 99156
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8787492513656616,
      "learning_rate": 0.0003651087555080127,
      "loss": 2.9444,
      "step": 99157
    },
    {
      "epoch": 1.29,
      "grad_norm": 4.5162482261657715,
      "learning_rate": 0.0003651047624378651,
      "loss": 2.8363,
      "step": 99158
    },
    {
      "epoch": 1.29,
      "grad_norm": 4.3382744789123535,
      "learning_rate": 0.00036510076935561343,
      "loss": 3.0352,
      "step": 99159
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.752713441848755,
      "learning_rate": 0.00036509677626125816,
      "loss": 3.0805,
      "step": 99160
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3024086952209473,
      "learning_rate": 0.0003650927831548003,
      "loss": 3.1507,
      "step": 99161
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.2567007541656494,
      "learning_rate": 0.0003650887900362403,
      "loss": 3.0965,
      "step": 99162
    },
    {
      "epoch": 1.29,
      "grad_norm": 5.069687843322754,
      "learning_rate": 0.00036508479690557923,
      "loss": 3.1114,
      "step": 99163
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.7509098052978516,
      "learning_rate": 0.0003650808037628176,
      "loss": 2.8611,
      "step": 99164
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.4871885776519775,
      "learning_rate": 0.0003650768106079562,
      "loss": 3.0937,
      "step": 99165
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8418405055999756,
      "learning_rate": 0.00036507281744099584,
      "loss": 2.9269,
      "step": 99166
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.1600968837738037,
      "learning_rate": 0.00036506882426193716,
      "loss": 3.0317,
      "step": 99167
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.003352642059326,
      "learning_rate": 0.00036506483107078095,
      "loss": 3.0232,
      "step": 99168
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.031381130218506,
      "learning_rate": 0.00036506083786752797,
      "loss": 2.6738,
      "step": 99169
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.5387015342712402,
      "learning_rate": 0.000365056844652179,
      "loss": 3.1268,
      "step": 99170
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.6732566356658936,
      "learning_rate": 0.0003650528514247347,
      "loss": 2.9106,
      "step": 99171
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.595944881439209,
      "learning_rate": 0.00036504885818519585,
      "loss": 2.9043,
      "step": 99172
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8762215375900269,
      "learning_rate": 0.0003650448649335631,
      "loss": 3.1514,
      "step": 99173
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.7622694969177246,
      "learning_rate": 0.00036504087166983745,
      "loss": 2.9521,
      "step": 99174
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.6981124877929688,
      "learning_rate": 0.00036503687839401937,
      "loss": 3.0189,
      "step": 99175
    },
    {
      "epoch": 1.29,
      "grad_norm": 4.196453094482422,
      "learning_rate": 0.00036503288510610977,
      "loss": 3.1632,
      "step": 99176
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8623325824737549,
      "learning_rate": 0.00036502889180610925,
      "loss": 2.8091,
      "step": 99177
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.340423107147217,
      "learning_rate": 0.0003650248984940187,
      "loss": 2.984,
      "step": 99178
    },
    {
      "epoch": 1.29,
      "grad_norm": 4.324716567993164,
      "learning_rate": 0.00036502090516983874,
      "loss": 3.2857,
      "step": 99179
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.7992587089538574,
      "learning_rate": 0.0003650169118335703,
      "loss": 3.0862,
      "step": 99180
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3058993816375732,
      "learning_rate": 0.00036501291848521384,
      "loss": 2.7888,
      "step": 99181
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6796326637268066,
      "learning_rate": 0.00036500892512477025,
      "loss": 2.7116,
      "step": 99182
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.4833369255065918,
      "learning_rate": 0.0003650049317522404,
      "loss": 2.7183,
      "step": 99183
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6234633922576904,
      "learning_rate": 0.0003650009383676248,
      "loss": 2.7683,
      "step": 99184
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8524131774902344,
      "learning_rate": 0.0003649969449709243,
      "loss": 3.0201,
      "step": 99185
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.768894076347351,
      "learning_rate": 0.0003649929515621397,
      "loss": 3.0133,
      "step": 99186
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9580211639404297,
      "learning_rate": 0.00036498895814127163,
      "loss": 3.0664,
      "step": 99187
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.684249997138977,
      "learning_rate": 0.0003649849647083209,
      "loss": 3.0078,
      "step": 99188
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0465896129608154,
      "learning_rate": 0.0003649809712632882,
      "loss": 3.2755,
      "step": 99189
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.6312386989593506,
      "learning_rate": 0.0003649769778061744,
      "loss": 2.9804,
      "step": 99190
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7666168212890625,
      "learning_rate": 0.00036497298433698006,
      "loss": 3.038,
      "step": 99191
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8697700500488281,
      "learning_rate": 0.0003649689908557061,
      "loss": 3.2178,
      "step": 99192
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8048970699310303,
      "learning_rate": 0.00036496499736235314,
      "loss": 3.1468,
      "step": 99193
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.4520514011383057,
      "learning_rate": 0.000364961003856922,
      "loss": 3.105,
      "step": 99194
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9261665344238281,
      "learning_rate": 0.0003649570103394133,
      "loss": 3.122,
      "step": 99195
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8246610164642334,
      "learning_rate": 0.00036495301680982794,
      "loss": 2.896,
      "step": 99196
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8246939182281494,
      "learning_rate": 0.0003649490232681665,
      "loss": 2.8742,
      "step": 99197
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7269200086593628,
      "learning_rate": 0.0003649450297144299,
      "loss": 2.9697,
      "step": 99198
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8350809812545776,
      "learning_rate": 0.0003649410361486188,
      "loss": 2.8927,
      "step": 99199
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2866134643554688,
      "learning_rate": 0.00036493704257073385,
      "loss": 3.1174,
      "step": 99200
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8385469913482666,
      "learning_rate": 0.00036493304898077593,
      "loss": 2.8326,
      "step": 99201
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.763226270675659,
      "learning_rate": 0.0003649290553787458,
      "loss": 2.7091,
      "step": 99202
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.6285927295684814,
      "learning_rate": 0.00036492506176464407,
      "loss": 2.864,
      "step": 99203
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7161424160003662,
      "learning_rate": 0.00036492106813847147,
      "loss": 3.0645,
      "step": 99204
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.141176462173462,
      "learning_rate": 0.00036491707450022893,
      "loss": 3.1114,
      "step": 99205
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.589631199836731,
      "learning_rate": 0.000364913080849917,
      "loss": 2.9489,
      "step": 99206
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6102603673934937,
      "learning_rate": 0.00036490908718753655,
      "loss": 2.9512,
      "step": 99207
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5556328296661377,
      "learning_rate": 0.0003649050935130882,
      "loss": 2.853,
      "step": 99208
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.622131586074829,
      "learning_rate": 0.0003649010998265729,
      "loss": 3.025,
      "step": 99209
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6697746515274048,
      "learning_rate": 0.0003648971061279912,
      "loss": 2.9983,
      "step": 99210
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.027252435684204,
      "learning_rate": 0.00036489311241734386,
      "loss": 3.0093,
      "step": 99211
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7990977764129639,
      "learning_rate": 0.00036488911869463173,
      "loss": 3.0645,
      "step": 99212
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8450783491134644,
      "learning_rate": 0.00036488512495985547,
      "loss": 3.0001,
      "step": 99213
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9328155517578125,
      "learning_rate": 0.0003648811312130158,
      "loss": 2.9026,
      "step": 99214
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9042085409164429,
      "learning_rate": 0.00036487713745411357,
      "loss": 2.8691,
      "step": 99215
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.636250615119934,
      "learning_rate": 0.00036487314368314935,
      "loss": 3.2773,
      "step": 99216
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6528328657150269,
      "learning_rate": 0.00036486914990012414,
      "loss": 3.095,
      "step": 99217
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9796502590179443,
      "learning_rate": 0.0003648651561050384,
      "loss": 2.8029,
      "step": 99218
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8933216333389282,
      "learning_rate": 0.000364861162297893,
      "loss": 3.0595,
      "step": 99219
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7386298179626465,
      "learning_rate": 0.0003648571684786888,
      "loss": 2.9802,
      "step": 99220
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7415874004364014,
      "learning_rate": 0.00036485317464742635,
      "loss": 3.1244,
      "step": 99221
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8091450929641724,
      "learning_rate": 0.00036484918080410643,
      "loss": 3.1804,
      "step": 99222
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.4952707290649414,
      "learning_rate": 0.00036484518694872997,
      "loss": 3.0673,
      "step": 99223
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9703477621078491,
      "learning_rate": 0.0003648411930812974,
      "loss": 2.749,
      "step": 99224
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8290677070617676,
      "learning_rate": 0.00036483719920180974,
      "loss": 2.8052,
      "step": 99225
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.345179796218872,
      "learning_rate": 0.00036483320531026755,
      "loss": 3.0562,
      "step": 99226
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.242783546447754,
      "learning_rate": 0.00036482921140667174,
      "loss": 3.0425,
      "step": 99227
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.4460959434509277,
      "learning_rate": 0.0003648252174910228,
      "loss": 3.0419,
      "step": 99228
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.286376476287842,
      "learning_rate": 0.00036482122356332175,
      "loss": 3.0186,
      "step": 99229
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8632762432098389,
      "learning_rate": 0.00036481722962356916,
      "loss": 2.802,
      "step": 99230
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.390265941619873,
      "learning_rate": 0.0003648132356717658,
      "loss": 3.2267,
      "step": 99231
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.351984977722168,
      "learning_rate": 0.0003648092417079126,
      "loss": 2.9402,
      "step": 99232
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2717857360839844,
      "learning_rate": 0.00036480524773201,
      "loss": 2.935,
      "step": 99233
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.801031470298767,
      "learning_rate": 0.00036480125374405885,
      "loss": 2.9598,
      "step": 99234
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1459336280822754,
      "learning_rate": 0.00036479725974406,
      "loss": 2.8961,
      "step": 99235
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3767056465148926,
      "learning_rate": 0.00036479326573201406,
      "loss": 3.0332,
      "step": 99236
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5731689929962158,
      "learning_rate": 0.00036478927170792186,
      "loss": 2.6467,
      "step": 99237
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1783835887908936,
      "learning_rate": 0.0003647852776717842,
      "loss": 3.0929,
      "step": 99238
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8687775135040283,
      "learning_rate": 0.00036478128362360155,
      "loss": 2.9929,
      "step": 99239
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8292492628097534,
      "learning_rate": 0.0003647772895633749,
      "loss": 2.93,
      "step": 99240
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.822561502456665,
      "learning_rate": 0.00036477329549110504,
      "loss": 3.2846,
      "step": 99241
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0012619495391846,
      "learning_rate": 0.00036476930140679246,
      "loss": 2.9103,
      "step": 99242
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0464999675750732,
      "learning_rate": 0.0003647653073104381,
      "loss": 2.9873,
      "step": 99243
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6820337772369385,
      "learning_rate": 0.00036476131320204266,
      "loss": 2.8982,
      "step": 99244
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5888768434524536,
      "learning_rate": 0.0003647573190816069,
      "loss": 2.8053,
      "step": 99245
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.308380365371704,
      "learning_rate": 0.00036475332494913145,
      "loss": 3.0386,
      "step": 99246
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7954002618789673,
      "learning_rate": 0.0003647493308046172,
      "loss": 3.19,
      "step": 99247
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.752472996711731,
      "learning_rate": 0.0003647453366480648,
      "loss": 3.1505,
      "step": 99248
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6544618606567383,
      "learning_rate": 0.00036474134247947503,
      "loss": 3.1211,
      "step": 99249
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.4954731464385986,
      "learning_rate": 0.0003647373482988486,
      "loss": 3.1482,
      "step": 99250
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8484289646148682,
      "learning_rate": 0.0003647333541061863,
      "loss": 2.7875,
      "step": 99251
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.870969533920288,
      "learning_rate": 0.0003647293599014888,
      "loss": 3.3282,
      "step": 99252
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.6229088306427,
      "learning_rate": 0.00036472536568475693,
      "loss": 2.8774,
      "step": 99253
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5671920776367188,
      "learning_rate": 0.0003647213714559913,
      "loss": 2.9831,
      "step": 99254
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.035820960998535,
      "learning_rate": 0.00036471737721519297,
      "loss": 2.9594,
      "step": 99255
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.8934900760650635,
      "learning_rate": 0.00036471338296236225,
      "loss": 3.1623,
      "step": 99256
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6335803270339966,
      "learning_rate": 0.00036470938869750014,
      "loss": 3.0595,
      "step": 99257
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.025078535079956,
      "learning_rate": 0.00036470539442060745,
      "loss": 2.9833,
      "step": 99258
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.294377326965332,
      "learning_rate": 0.0003647014001316847,
      "loss": 3.0421,
      "step": 99259
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.2830488681793213,
      "learning_rate": 0.0003646974058307327,
      "loss": 2.7795,
      "step": 99260
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6975395679473877,
      "learning_rate": 0.00036469341151775225,
      "loss": 2.8675,
      "step": 99261
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9859977960586548,
      "learning_rate": 0.0003646894171927442,
      "loss": 3.1042,
      "step": 99262
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.503260850906372,
      "learning_rate": 0.00036468542285570906,
      "loss": 3.0495,
      "step": 99263
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.5321097373962402,
      "learning_rate": 0.0003646814285066476,
      "loss": 2.9296,
      "step": 99264
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2971010208129883,
      "learning_rate": 0.0003646774341455608,
      "loss": 2.9363,
      "step": 99265
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.157742500305176,
      "learning_rate": 0.0003646734397724492,
      "loss": 3.2355,
      "step": 99266
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.4683172702789307,
      "learning_rate": 0.00036466944538731354,
      "loss": 2.7881,
      "step": 99267
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9920969009399414,
      "learning_rate": 0.0003646654509901546,
      "loss": 3.0707,
      "step": 99268
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.906825304031372,
      "learning_rate": 0.00036466145658097324,
      "loss": 2.8485,
      "step": 99269
    },
    {
      "epoch": 1.29,
      "grad_norm": 4.649006366729736,
      "learning_rate": 0.00036465746215976993,
      "loss": 2.9718,
      "step": 99270
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.076291799545288,
      "learning_rate": 0.0003646534677265457,
      "loss": 2.7358,
      "step": 99271
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6004480123519897,
      "learning_rate": 0.0003646494732813012,
      "loss": 3.0557,
      "step": 99272
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.913649559020996,
      "learning_rate": 0.00036464547882403704,
      "loss": 2.7075,
      "step": 99273
    },
    {
      "epoch": 1.29,
      "grad_norm": 4.328049659729004,
      "learning_rate": 0.0003646414843547541,
      "loss": 2.8996,
      "step": 99274
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1030149459838867,
      "learning_rate": 0.00036463748987345307,
      "loss": 2.6535,
      "step": 99275
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6445631980895996,
      "learning_rate": 0.0003646334953801347,
      "loss": 3.0907,
      "step": 99276
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.426021099090576,
      "learning_rate": 0.0003646295008747998,
      "loss": 3.0888,
      "step": 99277
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.125297784805298,
      "learning_rate": 0.00036462550635744907,
      "loss": 3.1225,
      "step": 99278
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9122979640960693,
      "learning_rate": 0.00036462151182808315,
      "loss": 2.8437,
      "step": 99279
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7157195806503296,
      "learning_rate": 0.0003646175172867029,
      "loss": 3.0804,
      "step": 99280
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.2030184268951416,
      "learning_rate": 0.0003646135227333091,
      "loss": 2.8601,
      "step": 99281
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1276986598968506,
      "learning_rate": 0.0003646095281679024,
      "loss": 3.0771,
      "step": 99282
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7869518995285034,
      "learning_rate": 0.00036460553359048353,
      "loss": 3.1016,
      "step": 99283
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.825925350189209,
      "learning_rate": 0.0003646015390010533,
      "loss": 2.7614,
      "step": 99284
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.0854146480560303,
      "learning_rate": 0.0003645975443996124,
      "loss": 2.8438,
      "step": 99285
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6786264181137085,
      "learning_rate": 0.0003645935497861616,
      "loss": 3.043,
      "step": 99286
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.939427137374878,
      "learning_rate": 0.0003645895551607017,
      "loss": 3.0062,
      "step": 99287
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9570457935333252,
      "learning_rate": 0.0003645855605232333,
      "loss": 2.997,
      "step": 99288
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1630725860595703,
      "learning_rate": 0.00036458156587375725,
      "loss": 3.0027,
      "step": 99289
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9652998447418213,
      "learning_rate": 0.0003645775712122743,
      "loss": 3.1919,
      "step": 99290
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.81634521484375,
      "learning_rate": 0.00036457357653878517,
      "loss": 3.0023,
      "step": 99291
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.738256573677063,
      "learning_rate": 0.0003645695818532905,
      "loss": 2.8639,
      "step": 99292
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3364737033843994,
      "learning_rate": 0.0003645655871557913,
      "loss": 3.0183,
      "step": 99293
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9886914491653442,
      "learning_rate": 0.00036456159244628795,
      "loss": 2.8584,
      "step": 99294
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2095324993133545,
      "learning_rate": 0.00036455759772478145,
      "loss": 2.9544,
      "step": 99295
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.854238510131836,
      "learning_rate": 0.0003645536029912725,
      "loss": 2.9128,
      "step": 99296
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.5522944927215576,
      "learning_rate": 0.0003645496082457618,
      "loss": 2.9964,
      "step": 99297
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.460136890411377,
      "learning_rate": 0.0003645456134882501,
      "loss": 3.0561,
      "step": 99298
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6835386753082275,
      "learning_rate": 0.00036454161871873826,
      "loss": 3.017,
      "step": 99299
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.7778029441833496,
      "learning_rate": 0.0003645376239372267,
      "loss": 2.7727,
      "step": 99300
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.5789926052093506,
      "learning_rate": 0.0003645336291437165,
      "loss": 2.9085,
      "step": 99301
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6962395906448364,
      "learning_rate": 0.00036452963433820836,
      "loss": 3.0316,
      "step": 99302
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7368865013122559,
      "learning_rate": 0.0003645256395207028,
      "loss": 2.9148,
      "step": 99303
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3201425075531006,
      "learning_rate": 0.00036452164469120074,
      "loss": 2.7696,
      "step": 99304
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8313324451446533,
      "learning_rate": 0.00036451764984970293,
      "loss": 2.9056,
      "step": 99305
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0605971813201904,
      "learning_rate": 0.00036451365499621006,
      "loss": 3.0465,
      "step": 99306
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.987752914428711,
      "learning_rate": 0.0003645096601307229,
      "loss": 3.2159,
      "step": 99307
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5315035581588745,
      "learning_rate": 0.0003645056652532422,
      "loss": 3.0361,
      "step": 99308
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.285712957382202,
      "learning_rate": 0.00036450167036376855,
      "loss": 2.939,
      "step": 99309
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.4480230808258057,
      "learning_rate": 0.00036449767546230293,
      "loss": 2.9105,
      "step": 99310
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.686267375946045,
      "learning_rate": 0.000364493680548846,
      "loss": 2.8779,
      "step": 99311
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1152632236480713,
      "learning_rate": 0.0003644896856233984,
      "loss": 3.0563,
      "step": 99312
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3485023975372314,
      "learning_rate": 0.0003644856906859609,
      "loss": 2.8304,
      "step": 99313
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.310107469558716,
      "learning_rate": 0.00036448169573653446,
      "loss": 2.9235,
      "step": 99314
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6535305976867676,
      "learning_rate": 0.0003644777007751195,
      "loss": 3.2006,
      "step": 99315
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9502407312393188,
      "learning_rate": 0.000364473705801717,
      "loss": 2.8713,
      "step": 99316
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.5467066764831543,
      "learning_rate": 0.00036446971081632764,
      "loss": 2.9724,
      "step": 99317
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5661691427230835,
      "learning_rate": 0.0003644657158189521,
      "loss": 2.8126,
      "step": 99318
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3355484008789062,
      "learning_rate": 0.00036446172080959117,
      "loss": 3.0608,
      "step": 99319
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7035799026489258,
      "learning_rate": 0.00036445772578824564,
      "loss": 2.8948,
      "step": 99320
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6157536506652832,
      "learning_rate": 0.00036445373075491604,
      "loss": 3.03,
      "step": 99321
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.5105535984039307,
      "learning_rate": 0.0003644497357096034,
      "loss": 2.9303,
      "step": 99322
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6197797060012817,
      "learning_rate": 0.00036444574065230834,
      "loss": 2.8002,
      "step": 99323
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8250389099121094,
      "learning_rate": 0.00036444174558303164,
      "loss": 2.9314,
      "step": 99324
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7106273174285889,
      "learning_rate": 0.0003644377505017739,
      "loss": 2.9434,
      "step": 99325
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3317601680755615,
      "learning_rate": 0.000364433755408536,
      "loss": 3.015,
      "step": 99326
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2933244705200195,
      "learning_rate": 0.0003644297603033187,
      "loss": 2.8335,
      "step": 99327
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.4079504013061523,
      "learning_rate": 0.0003644257651861226,
      "loss": 2.8982,
      "step": 99328
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7461719512939453,
      "learning_rate": 0.0003644217700569486,
      "loss": 2.79,
      "step": 99329
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5239609479904175,
      "learning_rate": 0.00036441777491579746,
      "loss": 2.9455,
      "step": 99330
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3777389526367188,
      "learning_rate": 0.0003644137797626697,
      "loss": 3.0184,
      "step": 99331
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9599989652633667,
      "learning_rate": 0.0003644097845975662,
      "loss": 3.0282,
      "step": 99332
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7253127098083496,
      "learning_rate": 0.0003644057894204878,
      "loss": 2.8455,
      "step": 99333
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.771823525428772,
      "learning_rate": 0.0003644017942314351,
      "loss": 2.9142,
      "step": 99334
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.233914613723755,
      "learning_rate": 0.00036439779903040887,
      "loss": 3.0179,
      "step": 99335
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.010974645614624,
      "learning_rate": 0.00036439380381740995,
      "loss": 3.0622,
      "step": 99336
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.938239336013794,
      "learning_rate": 0.00036438980859243895,
      "loss": 2.9621,
      "step": 99337
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.515280246734619,
      "learning_rate": 0.00036438581335549667,
      "loss": 2.8497,
      "step": 99338
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9638354778289795,
      "learning_rate": 0.0003643818181065839,
      "loss": 3.0608,
      "step": 99339
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.184569835662842,
      "learning_rate": 0.00036437782284570123,
      "loss": 2.8393,
      "step": 99340
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.31192684173584,
      "learning_rate": 0.0003643738275728495,
      "loss": 2.7531,
      "step": 99341
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6334166526794434,
      "learning_rate": 0.0003643698322880296,
      "loss": 3.1205,
      "step": 99342
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0549733638763428,
      "learning_rate": 0.000364365836991242,
      "loss": 3.0437,
      "step": 99343
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5161676406860352,
      "learning_rate": 0.00036436184168248765,
      "loss": 2.9799,
      "step": 99344
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.3836579322814941,
      "learning_rate": 0.0003643578463617672,
      "loss": 3.0417,
      "step": 99345
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6023095846176147,
      "learning_rate": 0.0003643538510290815,
      "loss": 3.0013,
      "step": 99346
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5813231468200684,
      "learning_rate": 0.00036434985568443106,
      "loss": 3.2136,
      "step": 99347
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.72067391872406,
      "learning_rate": 0.0003643458603278168,
      "loss": 2.9012,
      "step": 99348
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6762360334396362,
      "learning_rate": 0.00036434186495923945,
      "loss": 3.0164,
      "step": 99349
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6856536865234375,
      "learning_rate": 0.0003643378695786998,
      "loss": 2.9666,
      "step": 99350
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6384788751602173,
      "learning_rate": 0.0003643338741861984,
      "loss": 2.9083,
      "step": 99351
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9708898067474365,
      "learning_rate": 0.00036432987878173617,
      "loss": 2.9988,
      "step": 99352
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5711908340454102,
      "learning_rate": 0.0003643258833653138,
      "loss": 3.2968,
      "step": 99353
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6829159259796143,
      "learning_rate": 0.0003643218879369321,
      "loss": 2.9906,
      "step": 99354
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.83017897605896,
      "learning_rate": 0.0003643178924965917,
      "loss": 3.0479,
      "step": 99355
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0073764324188232,
      "learning_rate": 0.00036431389704429335,
      "loss": 3.0056,
      "step": 99356
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2734031677246094,
      "learning_rate": 0.0003643099015800379,
      "loss": 2.9734,
      "step": 99357
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.0842196941375732,
      "learning_rate": 0.00036430590610382597,
      "loss": 3.2227,
      "step": 99358
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7496800422668457,
      "learning_rate": 0.0003643019106156584,
      "loss": 3.0343,
      "step": 99359
    },
    {
      "epoch": 1.29,
      "grad_norm": 4.54072904586792,
      "learning_rate": 0.0003642979151155359,
      "loss": 2.9976,
      "step": 99360
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5907211303710938,
      "learning_rate": 0.00036429391960345915,
      "loss": 2.9201,
      "step": 99361
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7495213747024536,
      "learning_rate": 0.000364289924079429,
      "loss": 3.0432,
      "step": 99362
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2402381896972656,
      "learning_rate": 0.0003642859285434461,
      "loss": 2.7182,
      "step": 99363
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6540607213974,
      "learning_rate": 0.0003642819329955112,
      "loss": 3.0314,
      "step": 99364
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.225405216217041,
      "learning_rate": 0.00036427793743562515,
      "loss": 3.0787,
      "step": 99365
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.100541591644287,
      "learning_rate": 0.0003642739418637886,
      "loss": 2.9822,
      "step": 99366
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.91267728805542,
      "learning_rate": 0.0003642699462800023,
      "loss": 2.9601,
      "step": 99367
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8946551084518433,
      "learning_rate": 0.00036426595068426694,
      "loss": 2.9224,
      "step": 99368
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.4103240966796875,
      "learning_rate": 0.00036426195507658347,
      "loss": 3.08,
      "step": 99369
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.9509716033935547,
      "learning_rate": 0.0003642579594569524,
      "loss": 3.0806,
      "step": 99370
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.3300015926361084,
      "learning_rate": 0.00036425396382537453,
      "loss": 3.3402,
      "step": 99371
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1013803482055664,
      "learning_rate": 0.0003642499681818507,
      "loss": 3.0653,
      "step": 99372
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.6434638500213623,
      "learning_rate": 0.00036424597252638155,
      "loss": 2.9273,
      "step": 99373
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6680735349655151,
      "learning_rate": 0.00036424197685896786,
      "loss": 3.1023,
      "step": 99374
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7037626504898071,
      "learning_rate": 0.00036423798117961044,
      "loss": 3.011,
      "step": 99375
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5763654708862305,
      "learning_rate": 0.00036423398548830993,
      "loss": 2.9357,
      "step": 99376
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.6505517959594727,
      "learning_rate": 0.00036422998978506704,
      "loss": 3.017,
      "step": 99377
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8122062683105469,
      "learning_rate": 0.00036422599406988275,
      "loss": 3.1709,
      "step": 99378
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8667049407958984,
      "learning_rate": 0.0003642219983427575,
      "loss": 3.2103,
      "step": 99379
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.095043659210205,
      "learning_rate": 0.00036421800260369214,
      "loss": 3.0378,
      "step": 99380
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.4193341732025146,
      "learning_rate": 0.00036421400685268755,
      "loss": 3.3291,
      "step": 99381
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6002806425094604,
      "learning_rate": 0.00036421001108974426,
      "loss": 3.1511,
      "step": 99382
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3703713417053223,
      "learning_rate": 0.00036420601531486315,
      "loss": 2.7772,
      "step": 99383
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6905062198638916,
      "learning_rate": 0.00036420201952804503,
      "loss": 3.2441,
      "step": 99384
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7935060262680054,
      "learning_rate": 0.00036419802372929044,
      "loss": 2.9807,
      "step": 99385
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.185645818710327,
      "learning_rate": 0.00036419402791860013,
      "loss": 2.8549,
      "step": 99386
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.913826823234558,
      "learning_rate": 0.0003641900320959751,
      "loss": 2.9244,
      "step": 99387
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.2794010639190674,
      "learning_rate": 0.00036418603626141587,
      "loss": 2.9483,
      "step": 99388
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.806666612625122,
      "learning_rate": 0.00036418204041492326,
      "loss": 3.1613,
      "step": 99389
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.023378849029541,
      "learning_rate": 0.00036417804455649797,
      "loss": 3.149,
      "step": 99390
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.007395029067993,
      "learning_rate": 0.0003641740486861408,
      "loss": 3.3022,
      "step": 99391
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7360270023345947,
      "learning_rate": 0.0003641700528038525,
      "loss": 3.1418,
      "step": 99392
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.904903769493103,
      "learning_rate": 0.00036416605690963366,
      "loss": 3.0009,
      "step": 99393
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7159398794174194,
      "learning_rate": 0.0003641620610034853,
      "loss": 3.0395,
      "step": 99394
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9766638278961182,
      "learning_rate": 0.0003641580650854079,
      "loss": 3.0129,
      "step": 99395
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2914297580718994,
      "learning_rate": 0.00036415406915540227,
      "loss": 3.0992,
      "step": 99396
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8316009044647217,
      "learning_rate": 0.0003641500732134693,
      "loss": 3.1353,
      "step": 99397
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.731376051902771,
      "learning_rate": 0.0003641460772596095,
      "loss": 2.6683,
      "step": 99398
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9577726125717163,
      "learning_rate": 0.0003641420812938238,
      "loss": 2.9564,
      "step": 99399
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.012500524520874,
      "learning_rate": 0.0003641380853161129,
      "loss": 3.0838,
      "step": 99400
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9957542419433594,
      "learning_rate": 0.0003641340893264775,
      "loss": 3.2726,
      "step": 99401
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.990185022354126,
      "learning_rate": 0.00036413009332491827,
      "loss": 2.9003,
      "step": 99402
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.203158140182495,
      "learning_rate": 0.0003641260973114362,
      "loss": 2.8511,
      "step": 99403
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.529444694519043,
      "learning_rate": 0.00036412210128603173,
      "loss": 3.25,
      "step": 99404
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.9962573051452637,
      "learning_rate": 0.00036411810524870583,
      "loss": 3.1573,
      "step": 99405
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.02199125289917,
      "learning_rate": 0.0003641141091994592,
      "loss": 3.0126,
      "step": 99406
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8065845966339111,
      "learning_rate": 0.0003641101131382925,
      "loss": 2.7713,
      "step": 99407
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.181361198425293,
      "learning_rate": 0.0003641061170652065,
      "loss": 2.9332,
      "step": 99408
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.4428884983062744,
      "learning_rate": 0.00036410212098020203,
      "loss": 2.9344,
      "step": 99409
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.6478018760681152,
      "learning_rate": 0.0003640981248832797,
      "loss": 3.0007,
      "step": 99410
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2817630767822266,
      "learning_rate": 0.0003640941287744403,
      "loss": 2.836,
      "step": 99411
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.8041903972625732,
      "learning_rate": 0.00036409013265368466,
      "loss": 2.9187,
      "step": 99412
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.979670763015747,
      "learning_rate": 0.0003640861365210134,
      "loss": 3.0482,
      "step": 99413
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9338867664337158,
      "learning_rate": 0.00036408214037642733,
      "loss": 2.8936,
      "step": 99414
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.902284860610962,
      "learning_rate": 0.0003640781442199272,
      "loss": 3.0176,
      "step": 99415
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.8590898513793945,
      "learning_rate": 0.0003640741480515138,
      "loss": 3.1362,
      "step": 99416
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7571018934249878,
      "learning_rate": 0.00036407015187118764,
      "loss": 2.9152,
      "step": 99417
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.8067150115966797,
      "learning_rate": 0.0003640661556789498,
      "loss": 3.0091,
      "step": 99418
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.838685989379883,
      "learning_rate": 0.00036406215947480073,
      "loss": 3.0692,
      "step": 99419
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8294501304626465,
      "learning_rate": 0.00036405816325874133,
      "loss": 3.0973,
      "step": 99420
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1348888874053955,
      "learning_rate": 0.00036405416703077235,
      "loss": 2.8853,
      "step": 99421
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.3635945320129395,
      "learning_rate": 0.0003640501707908945,
      "loss": 3.1467,
      "step": 99422
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.790901780128479,
      "learning_rate": 0.0003640461745391084,
      "loss": 2.7365,
      "step": 99423
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6766672134399414,
      "learning_rate": 0.000364042178275415,
      "loss": 3.1714,
      "step": 99424
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.6361594200134277,
      "learning_rate": 0.0003640381819998149,
      "loss": 2.8774,
      "step": 99425
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.023265838623047,
      "learning_rate": 0.0003640341857123089,
      "loss": 3.1383,
      "step": 99426
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.753081202507019,
      "learning_rate": 0.0003640301894128978,
      "loss": 2.9365,
      "step": 99427
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.542110800743103,
      "learning_rate": 0.0003640261931015823,
      "loss": 2.721,
      "step": 99428
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.515784978866577,
      "learning_rate": 0.000364022196778363,
      "loss": 2.8794,
      "step": 99429
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.872511148452759,
      "learning_rate": 0.00036401820044324086,
      "loss": 3.0727,
      "step": 99430
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1661832332611084,
      "learning_rate": 0.00036401420409621645,
      "loss": 3.0839,
      "step": 99431
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.253974199295044,
      "learning_rate": 0.0003640102077372907,
      "loss": 3.0885,
      "step": 99432
    },
    {
      "epoch": 1.29,
      "grad_norm": 4.428091526031494,
      "learning_rate": 0.0003640062113664641,
      "loss": 2.5665,
      "step": 99433
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2322521209716797,
      "learning_rate": 0.0003640022149837376,
      "loss": 3.0691,
      "step": 99434
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9621531963348389,
      "learning_rate": 0.00036399821858911196,
      "loss": 3.1407,
      "step": 99435
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7081693410873413,
      "learning_rate": 0.00036399422218258773,
      "loss": 3.0935,
      "step": 99436
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.781742811203003,
      "learning_rate": 0.0003639902257641658,
      "loss": 2.8753,
      "step": 99437
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.1093554496765137,
      "learning_rate": 0.00036398622933384695,
      "loss": 2.9221,
      "step": 99438
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.617962598800659,
      "learning_rate": 0.00036398223289163176,
      "loss": 2.9738,
      "step": 99439
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.981329083442688,
      "learning_rate": 0.00036397823643752114,
      "loss": 3.344,
      "step": 99440
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.162079095840454,
      "learning_rate": 0.0003639742399715157,
      "loss": 3.0925,
      "step": 99441
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.7942854166030884,
      "learning_rate": 0.00036397024349361625,
      "loss": 3.2091,
      "step": 99442
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.8642029762268066,
      "learning_rate": 0.00036396624700382355,
      "loss": 2.8808,
      "step": 99443
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6191051006317139,
      "learning_rate": 0.00036396225050213825,
      "loss": 3.0189,
      "step": 99444
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2985057830810547,
      "learning_rate": 0.0003639582539885613,
      "loss": 2.7616,
      "step": 99445
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.06386137008667,
      "learning_rate": 0.0003639542574630932,
      "loss": 2.8293,
      "step": 99446
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5654916763305664,
      "learning_rate": 0.0003639502609257347,
      "loss": 3.1849,
      "step": 99447
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.6672935485839844,
      "learning_rate": 0.0003639462643764869,
      "loss": 3.1461,
      "step": 99448
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9358279705047607,
      "learning_rate": 0.0003639422678153501,
      "loss": 2.9856,
      "step": 99449
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.9610588550567627,
      "learning_rate": 0.0003639382712423252,
      "loss": 2.9822,
      "step": 99450
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.223665714263916,
      "learning_rate": 0.0003639342746574131,
      "loss": 2.9124,
      "step": 99451
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8245389461517334,
      "learning_rate": 0.0003639302780606143,
      "loss": 3.4249,
      "step": 99452
    },
    {
      "epoch": 1.29,
      "grad_norm": 2.2769763469696045,
      "learning_rate": 0.00036392628145192967,
      "loss": 2.9053,
      "step": 99453
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.061608076095581,
      "learning_rate": 0.00036392228483136,
      "loss": 2.9586,
      "step": 99454
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.5878791809082031,
      "learning_rate": 0.0003639182881989059,
      "loss": 3.1201,
      "step": 99455
    },
    {
      "epoch": 1.29,
      "grad_norm": 3.0285208225250244,
      "learning_rate": 0.00036391429155456826,
      "loss": 2.8393,
      "step": 99456
    },
    {
      "epoch": 1.29,
      "grad_norm": 1.8220279216766357,
      "learning_rate": 0.00036391029489834766,
      "loss": 2.9928,
      "step": 99457
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7704224586486816,
      "learning_rate": 0.000363906298230245,
      "loss": 3.1969,
      "step": 99458
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.555237054824829,
      "learning_rate": 0.0003639023015502609,
      "loss": 2.98,
      "step": 99459
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.202059745788574,
      "learning_rate": 0.00036389830485839615,
      "loss": 2.8174,
      "step": 99460
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.38771390914917,
      "learning_rate": 0.0003638943081546516,
      "loss": 3.0068,
      "step": 99461
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.030501365661621,
      "learning_rate": 0.00036389031143902785,
      "loss": 2.7203,
      "step": 99462
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9001878499984741,
      "learning_rate": 0.0003638863147115256,
      "loss": 3.135,
      "step": 99463
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.3562939167022705,
      "learning_rate": 0.00036388231797214573,
      "loss": 3.0175,
      "step": 99464
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8513014316558838,
      "learning_rate": 0.00036387832122088896,
      "loss": 3.0999,
      "step": 99465
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8268767595291138,
      "learning_rate": 0.00036387432445775594,
      "loss": 3.0941,
      "step": 99466
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8842337131500244,
      "learning_rate": 0.00036387032768274755,
      "loss": 3.1393,
      "step": 99467
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.330047369003296,
      "learning_rate": 0.00036386633089586444,
      "loss": 2.9381,
      "step": 99468
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.857450008392334,
      "learning_rate": 0.0003638623340971073,
      "loss": 3.1423,
      "step": 99469
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.853869080543518,
      "learning_rate": 0.00036385833728647707,
      "loss": 2.8839,
      "step": 99470
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4930098056793213,
      "learning_rate": 0.0003638543404639743,
      "loss": 3.211,
      "step": 99471
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.269298791885376,
      "learning_rate": 0.0003638503436295998,
      "loss": 2.8173,
      "step": 99472
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.73867928981781,
      "learning_rate": 0.00036384634678335434,
      "loss": 3.0969,
      "step": 99473
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1243934631347656,
      "learning_rate": 0.00036384234992523865,
      "loss": 3.0817,
      "step": 99474
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.162309169769287,
      "learning_rate": 0.00036383835305525344,
      "loss": 3.3269,
      "step": 99475
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.870208740234375,
      "learning_rate": 0.0003638343561733995,
      "loss": 3.1181,
      "step": 99476
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6841309070587158,
      "learning_rate": 0.00036383035927967745,
      "loss": 3.3825,
      "step": 99477
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9280457496643066,
      "learning_rate": 0.00036382636237408824,
      "loss": 3.2209,
      "step": 99478
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0576868057250977,
      "learning_rate": 0.0003638223654566325,
      "loss": 2.9546,
      "step": 99479
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8808115720748901,
      "learning_rate": 0.0003638183685273109,
      "loss": 3.0723,
      "step": 99480
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0087902545928955,
      "learning_rate": 0.0003638143715861243,
      "loss": 3.0065,
      "step": 99481
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1071574687957764,
      "learning_rate": 0.0003638103746330734,
      "loss": 2.9206,
      "step": 99482
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.437087297439575,
      "learning_rate": 0.000363806377668159,
      "loss": 2.8872,
      "step": 99483
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.069810628890991,
      "learning_rate": 0.0003638023806913817,
      "loss": 3.0838,
      "step": 99484
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0113532543182373,
      "learning_rate": 0.00036379838370274246,
      "loss": 2.9464,
      "step": 99485
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2066216468811035,
      "learning_rate": 0.0003637943867022418,
      "loss": 3.0089,
      "step": 99486
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.127472162246704,
      "learning_rate": 0.00036379038968988053,
      "loss": 3.1344,
      "step": 99487
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0239341259002686,
      "learning_rate": 0.0003637863926656595,
      "loss": 2.85,
      "step": 99488
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.638833999633789,
      "learning_rate": 0.00036378239562957933,
      "loss": 2.8928,
      "step": 99489
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.910049557685852,
      "learning_rate": 0.00036377839858164077,
      "loss": 2.9196,
      "step": 99490
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.070728063583374,
      "learning_rate": 0.0003637744015218447,
      "loss": 2.9735,
      "step": 99491
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.794029951095581,
      "learning_rate": 0.0003637704044501917,
      "loss": 2.8745,
      "step": 99492
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8097654581069946,
      "learning_rate": 0.00036376640736668255,
      "loss": 3.2238,
      "step": 99493
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7748527526855469,
      "learning_rate": 0.0003637624102713181,
      "loss": 2.827,
      "step": 99494
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9105104207992554,
      "learning_rate": 0.000363758413164099,
      "loss": 3.0944,
      "step": 99495
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.0389490127563477,
      "learning_rate": 0.000363754416045026,
      "loss": 3.0079,
      "step": 99496
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.516516923904419,
      "learning_rate": 0.0003637504189140999,
      "loss": 3.0354,
      "step": 99497
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.7644824981689453,
      "learning_rate": 0.00036374642177132123,
      "loss": 2.7996,
      "step": 99498
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.6377601623535156,
      "learning_rate": 0.00036374242461669105,
      "loss": 3.0779,
      "step": 99499
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.222263813018799,
      "learning_rate": 0.0003637384274502099,
      "loss": 3.215,
      "step": 99500
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.921846866607666,
      "learning_rate": 0.00036373443027187856,
      "loss": 2.9378,
      "step": 99501
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7266687154769897,
      "learning_rate": 0.00036373043308169777,
      "loss": 3.0347,
      "step": 99502
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.189927339553833,
      "learning_rate": 0.0003637264358796684,
      "loss": 2.8568,
      "step": 99503
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9121425151824951,
      "learning_rate": 0.000363722438665791,
      "loss": 3.0346,
      "step": 99504
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.97845458984375,
      "learning_rate": 0.0003637184414400664,
      "loss": 3.1291,
      "step": 99505
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6906572580337524,
      "learning_rate": 0.00036371444420249527,
      "loss": 2.9367,
      "step": 99506
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6397724151611328,
      "learning_rate": 0.0003637104469530785,
      "loss": 2.9134,
      "step": 99507
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7680331468582153,
      "learning_rate": 0.00036370644969181676,
      "loss": 3.0335,
      "step": 99508
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.6237759590148926,
      "learning_rate": 0.00036370245241871077,
      "loss": 3.2308,
      "step": 99509
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7055716514587402,
      "learning_rate": 0.00036369845513376127,
      "loss": 2.7606,
      "step": 99510
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6859544515609741,
      "learning_rate": 0.00036369445783696904,
      "loss": 2.7893,
      "step": 99511
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9682918787002563,
      "learning_rate": 0.0003636904605283349,
      "loss": 3.0343,
      "step": 99512
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.463653802871704,
      "learning_rate": 0.0003636864632078594,
      "loss": 3.1345,
      "step": 99513
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6185568571090698,
      "learning_rate": 0.0003636824658755434,
      "loss": 3.0245,
      "step": 99514
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.746755838394165,
      "learning_rate": 0.00036367846853138755,
      "loss": 3.0461,
      "step": 99515
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9222320318222046,
      "learning_rate": 0.00036367447117539284,
      "loss": 2.8121,
      "step": 99516
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4783830642700195,
      "learning_rate": 0.0003636704738075597,
      "loss": 3.2289,
      "step": 99517
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.6783015727996826,
      "learning_rate": 0.00036366647642788907,
      "loss": 2.9535,
      "step": 99518
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.5350801944732666,
      "learning_rate": 0.00036366247903638164,
      "loss": 2.9237,
      "step": 99519
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.023498296737671,
      "learning_rate": 0.0003636584816330382,
      "loss": 2.906,
      "step": 99520
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7350685596466064,
      "learning_rate": 0.00036365448421785937,
      "loss": 2.9604,
      "step": 99521
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1853249073028564,
      "learning_rate": 0.000363650486790846,
      "loss": 3.1026,
      "step": 99522
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5971181392669678,
      "learning_rate": 0.00036364648935199883,
      "loss": 2.7802,
      "step": 99523
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.147564649581909,
      "learning_rate": 0.0003636424919013185,
      "loss": 3.1476,
      "step": 99524
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.811225175857544,
      "learning_rate": 0.00036363849443880593,
      "loss": 3.1235,
      "step": 99525
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.013612985610962,
      "learning_rate": 0.00036363449696446166,
      "loss": 3.159,
      "step": 99526
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.416103720664978,
      "learning_rate": 0.0003636304994782866,
      "loss": 2.9614,
      "step": 99527
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7551652193069458,
      "learning_rate": 0.00036362650198028143,
      "loss": 2.9405,
      "step": 99528
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.325038433074951,
      "learning_rate": 0.00036362250447044683,
      "loss": 3.017,
      "step": 99529
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9344924688339233,
      "learning_rate": 0.0003636185069487837,
      "loss": 3.1886,
      "step": 99530
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8558001518249512,
      "learning_rate": 0.0003636145094152926,
      "loss": 2.9984,
      "step": 99531
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8323638439178467,
      "learning_rate": 0.0003636105118699744,
      "loss": 3.0367,
      "step": 99532
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.432915449142456,
      "learning_rate": 0.0003636065143128298,
      "loss": 3.0133,
      "step": 99533
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.6646292209625244,
      "learning_rate": 0.0003636025167438596,
      "loss": 3.0029,
      "step": 99534
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8499807119369507,
      "learning_rate": 0.0003635985191630644,
      "loss": 2.8589,
      "step": 99535
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.5116145610809326,
      "learning_rate": 0.0003635945215704451,
      "loss": 2.88,
      "step": 99536
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8251219987869263,
      "learning_rate": 0.00036359052396600243,
      "loss": 3.1359,
      "step": 99537
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.9260969161987305,
      "learning_rate": 0.000363586526349737,
      "loss": 2.8055,
      "step": 99538
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.3019797801971436,
      "learning_rate": 0.00036358252872164957,
      "loss": 3.2809,
      "step": 99539
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1744048595428467,
      "learning_rate": 0.0003635785310817411,
      "loss": 3.1765,
      "step": 99540
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.5686259269714355,
      "learning_rate": 0.0003635745334300121,
      "loss": 2.9525,
      "step": 99541
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.33085298538208,
      "learning_rate": 0.00036357053576646333,
      "loss": 3.0317,
      "step": 99542
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.902161717414856,
      "learning_rate": 0.0003635665380910957,
      "loss": 2.9057,
      "step": 99543
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8740333318710327,
      "learning_rate": 0.0003635625404039099,
      "loss": 2.7601,
      "step": 99544
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2900142669677734,
      "learning_rate": 0.0003635585427049065,
      "loss": 2.9756,
      "step": 99545
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.947443723678589,
      "learning_rate": 0.00036355454499408646,
      "loss": 2.831,
      "step": 99546
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6189614534378052,
      "learning_rate": 0.0003635505472714503,
      "loss": 3.0624,
      "step": 99547
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9625781774520874,
      "learning_rate": 0.000363546549536999,
      "loss": 3.1308,
      "step": 99548
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.6090593338012695,
      "learning_rate": 0.00036354255179073323,
      "loss": 2.9855,
      "step": 99549
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8215023279190063,
      "learning_rate": 0.0003635385540326536,
      "loss": 3.0246,
      "step": 99550
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.8870925903320312,
      "learning_rate": 0.000363534556262761,
      "loss": 3.0122,
      "step": 99551
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.53222918510437,
      "learning_rate": 0.00036353055848105614,
      "loss": 3.1068,
      "step": 99552
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9173349142074585,
      "learning_rate": 0.0003635265606875397,
      "loss": 3.021,
      "step": 99553
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5637022256851196,
      "learning_rate": 0.0003635225628822125,
      "loss": 3.0285,
      "step": 99554
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9055235385894775,
      "learning_rate": 0.0003635185650650753,
      "loss": 2.9517,
      "step": 99555
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0103206634521484,
      "learning_rate": 0.00036351456723612873,
      "loss": 2.9607,
      "step": 99556
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.888933777809143,
      "learning_rate": 0.0003635105693953736,
      "loss": 2.8606,
      "step": 99557
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.033770799636841,
      "learning_rate": 0.00036350657154281073,
      "loss": 2.8386,
      "step": 99558
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.531445264816284,
      "learning_rate": 0.0003635025736784408,
      "loss": 2.9983,
      "step": 99559
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.6288881301879883,
      "learning_rate": 0.0003634985758022644,
      "loss": 3.1303,
      "step": 99560
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.127923011779785,
      "learning_rate": 0.0003634945779142826,
      "loss": 2.9262,
      "step": 99561
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.231795310974121,
      "learning_rate": 0.00036349058001449586,
      "loss": 2.8841,
      "step": 99562
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7667838335037231,
      "learning_rate": 0.00036348658210290504,
      "loss": 2.9716,
      "step": 99563
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.851669430732727,
      "learning_rate": 0.0003634825841795108,
      "loss": 3.0524,
      "step": 99564
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6864529848098755,
      "learning_rate": 0.00036347858624431403,
      "loss": 2.9213,
      "step": 99565
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6474770307540894,
      "learning_rate": 0.0003634745882973154,
      "loss": 3.0775,
      "step": 99566
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8721115589141846,
      "learning_rate": 0.00036347059033851566,
      "loss": 3.2118,
      "step": 99567
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0977702140808105,
      "learning_rate": 0.00036346659236791547,
      "loss": 2.904,
      "step": 99568
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7393007278442383,
      "learning_rate": 0.0003634625943855156,
      "loss": 2.8955,
      "step": 99569
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.270681142807007,
      "learning_rate": 0.00036345859639131696,
      "loss": 3.1759,
      "step": 99570
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4320433139801025,
      "learning_rate": 0.0003634545983853201,
      "loss": 3.1165,
      "step": 99571
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4362807273864746,
      "learning_rate": 0.0003634506003675258,
      "loss": 2.8816,
      "step": 99572
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.8329877853393555,
      "learning_rate": 0.00036344660233793497,
      "loss": 3.0207,
      "step": 99573
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.28452467918396,
      "learning_rate": 0.00036344260429654803,
      "loss": 3.0303,
      "step": 99574
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.102783441543579,
      "learning_rate": 0.000363438606243366,
      "loss": 3.2717,
      "step": 99575
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8266156911849976,
      "learning_rate": 0.00036343460817838964,
      "loss": 2.9223,
      "step": 99576
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8843849897384644,
      "learning_rate": 0.0003634306101016195,
      "loss": 3.0959,
      "step": 99577
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.184192419052124,
      "learning_rate": 0.0003634266120130563,
      "loss": 2.934,
      "step": 99578
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8535540103912354,
      "learning_rate": 0.0003634226139127011,
      "loss": 2.9545,
      "step": 99579
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8548353910446167,
      "learning_rate": 0.00036341861580055433,
      "loss": 2.9589,
      "step": 99580
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8969193696975708,
      "learning_rate": 0.0003634146176766168,
      "loss": 2.8267,
      "step": 99581
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9324642419815063,
      "learning_rate": 0.00036341061954088945,
      "loss": 2.839,
      "step": 99582
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8351187705993652,
      "learning_rate": 0.0003634066213933727,
      "loss": 3.0249,
      "step": 99583
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6443372964859009,
      "learning_rate": 0.00036340262323406757,
      "loss": 2.8382,
      "step": 99584
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0757596492767334,
      "learning_rate": 0.0003633986250629747,
      "loss": 2.9854,
      "step": 99585
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5934255123138428,
      "learning_rate": 0.0003633946268800947,
      "loss": 3.0287,
      "step": 99586
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8710633516311646,
      "learning_rate": 0.0003633906286854285,
      "loss": 3.0099,
      "step": 99587
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5291616916656494,
      "learning_rate": 0.00036338663047897685,
      "loss": 2.9245,
      "step": 99588
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.653322696685791,
      "learning_rate": 0.00036338263226074044,
      "loss": 3.0705,
      "step": 99589
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.06203293800354,
      "learning_rate": 0.0003633786340307199,
      "loss": 2.8041,
      "step": 99590
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.157318592071533,
      "learning_rate": 0.0003633746357889161,
      "loss": 3.1534,
      "step": 99591
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.990857720375061,
      "learning_rate": 0.0003633706375353298,
      "loss": 2.8966,
      "step": 99592
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6784971952438354,
      "learning_rate": 0.00036336663926996173,
      "loss": 3.1475,
      "step": 99593
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8884601593017578,
      "learning_rate": 0.0003633626409928125,
      "loss": 2.9756,
      "step": 99594
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9299923181533813,
      "learning_rate": 0.0003633586427038831,
      "loss": 2.9468,
      "step": 99595
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.116722822189331,
      "learning_rate": 0.00036335464440317403,
      "loss": 3.0443,
      "step": 99596
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.082794666290283,
      "learning_rate": 0.00036335064609068613,
      "loss": 2.9723,
      "step": 99597
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.232677936553955,
      "learning_rate": 0.00036334664776642027,
      "loss": 2.9922,
      "step": 99598
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.362588882446289,
      "learning_rate": 0.00036334264943037694,
      "loss": 2.9049,
      "step": 99599
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.318742513656616,
      "learning_rate": 0.00036333865108255706,
      "loss": 2.9245,
      "step": 99600
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8846997022628784,
      "learning_rate": 0.00036333465272296133,
      "loss": 2.8867,
      "step": 99601
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.331850290298462,
      "learning_rate": 0.0003633306543515905,
      "loss": 2.789,
      "step": 99602
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6073975563049316,
      "learning_rate": 0.0003633266559684453,
      "loss": 2.9222,
      "step": 99603
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0653326511383057,
      "learning_rate": 0.0003633226575735265,
      "loss": 3.1178,
      "step": 99604
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.6928551197052,
      "learning_rate": 0.0003633186591668348,
      "loss": 2.9255,
      "step": 99605
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8547837734222412,
      "learning_rate": 0.0003633146607483709,
      "loss": 3.107,
      "step": 99606
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9036643505096436,
      "learning_rate": 0.00036331066231813565,
      "loss": 3.0528,
      "step": 99607
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7476732730865479,
      "learning_rate": 0.00036330666387612983,
      "loss": 2.9539,
      "step": 99608
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1030590534210205,
      "learning_rate": 0.00036330266542235407,
      "loss": 2.8612,
      "step": 99609
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.45661997795105,
      "learning_rate": 0.0003632986669568091,
      "loss": 3.1228,
      "step": 99610
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8599958419799805,
      "learning_rate": 0.0003632946684794958,
      "loss": 2.9041,
      "step": 99611
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5957266092300415,
      "learning_rate": 0.00036329066999041476,
      "loss": 3.0915,
      "step": 99612
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7775284051895142,
      "learning_rate": 0.0003632866714895668,
      "loss": 3.006,
      "step": 99613
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.701734185218811,
      "learning_rate": 0.00036328267297695263,
      "loss": 2.8941,
      "step": 99614
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7047427892684937,
      "learning_rate": 0.00036327867445257303,
      "loss": 3.3495,
      "step": 99615
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.549481153488159,
      "learning_rate": 0.0003632746759164288,
      "loss": 2.921,
      "step": 99616
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.8248255252838135,
      "learning_rate": 0.00036327067736852044,
      "loss": 3.4218,
      "step": 99617
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8468800783157349,
      "learning_rate": 0.00036326667880884903,
      "loss": 3.1971,
      "step": 99618
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.624443531036377,
      "learning_rate": 0.00036326268023741516,
      "loss": 3.0441,
      "step": 99619
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0746445655822754,
      "learning_rate": 0.00036325868165421944,
      "loss": 2.9876,
      "step": 99620
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0709228515625,
      "learning_rate": 0.00036325468305926277,
      "loss": 3.1258,
      "step": 99621
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.864116668701172,
      "learning_rate": 0.000363250684452546,
      "loss": 2.7031,
      "step": 99622
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4357268810272217,
      "learning_rate": 0.00036324668583406956,
      "loss": 3.1708,
      "step": 99623
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.4516184329986572,
      "learning_rate": 0.0003632426872038344,
      "loss": 2.7131,
      "step": 99624
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.7460885047912598,
      "learning_rate": 0.00036323868856184133,
      "loss": 2.8342,
      "step": 99625
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7754651308059692,
      "learning_rate": 0.00036323468990809093,
      "loss": 2.9904,
      "step": 99626
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.946100115776062,
      "learning_rate": 0.000363230691242584,
      "loss": 3.0881,
      "step": 99627
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.3609611988067627,
      "learning_rate": 0.00036322669256532136,
      "loss": 3.1449,
      "step": 99628
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0912625789642334,
      "learning_rate": 0.00036322269387630354,
      "loss": 3.069,
      "step": 99629
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.9296209812164307,
      "learning_rate": 0.0003632186951755315,
      "loss": 2.9216,
      "step": 99630
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.410997152328491,
      "learning_rate": 0.00036321469646300597,
      "loss": 3.0664,
      "step": 99631
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.699826955795288,
      "learning_rate": 0.0003632106977387276,
      "loss": 3.3218,
      "step": 99632
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.019289493560791,
      "learning_rate": 0.0003632066990026971,
      "loss": 3.1946,
      "step": 99633
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.90426504611969,
      "learning_rate": 0.0003632027002549154,
      "loss": 3.1563,
      "step": 99634
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.618884563446045,
      "learning_rate": 0.00036319870149538306,
      "loss": 3.0327,
      "step": 99635
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4753339290618896,
      "learning_rate": 0.0003631947027241008,
      "loss": 3.1145,
      "step": 99636
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7123075723648071,
      "learning_rate": 0.00036319070394106966,
      "loss": 3.1829,
      "step": 99637
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.006460428237915,
      "learning_rate": 0.00036318670514629,
      "loss": 2.7381,
      "step": 99638
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7936952114105225,
      "learning_rate": 0.00036318270633976275,
      "loss": 3.2027,
      "step": 99639
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.351877212524414,
      "learning_rate": 0.0003631787075214888,
      "loss": 3.0667,
      "step": 99640
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7861870527267456,
      "learning_rate": 0.00036317470869146854,
      "loss": 2.9657,
      "step": 99641
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6356126070022583,
      "learning_rate": 0.000363170709849703,
      "loss": 2.9824,
      "step": 99642
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6810672283172607,
      "learning_rate": 0.00036316671099619285,
      "loss": 3.0499,
      "step": 99643
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0430104732513428,
      "learning_rate": 0.0003631627121309388,
      "loss": 2.9243,
      "step": 99644
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.326165199279785,
      "learning_rate": 0.00036315871325394157,
      "loss": 3.078,
      "step": 99645
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.294548511505127,
      "learning_rate": 0.000363154714365202,
      "loss": 2.7498,
      "step": 99646
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.986395001411438,
      "learning_rate": 0.00036315071546472074,
      "loss": 2.9388,
      "step": 99647
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6863263845443726,
      "learning_rate": 0.0003631467165524986,
      "loss": 3.1455,
      "step": 99648
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.656981945037842,
      "learning_rate": 0.0003631427176285363,
      "loss": 3.3946,
      "step": 99649
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7478491067886353,
      "learning_rate": 0.0003631387186928345,
      "loss": 3.1204,
      "step": 99650
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7239660024642944,
      "learning_rate": 0.00036313471974539407,
      "loss": 2.8409,
      "step": 99651
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7065205574035645,
      "learning_rate": 0.00036313072078621574,
      "loss": 3.0273,
      "step": 99652
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7922420501708984,
      "learning_rate": 0.0003631267218153002,
      "loss": 3.0233,
      "step": 99653
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.021789312362671,
      "learning_rate": 0.0003631227228326482,
      "loss": 2.8211,
      "step": 99654
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7756463289260864,
      "learning_rate": 0.0003631187238382604,
      "loss": 2.8222,
      "step": 99655
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6578599214553833,
      "learning_rate": 0.00036311472483213777,
      "loss": 3.2147,
      "step": 99656
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0026891231536865,
      "learning_rate": 0.00036311072581428084,
      "loss": 2.8536,
      "step": 99657
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0153703689575195,
      "learning_rate": 0.00036310672678469046,
      "loss": 2.8226,
      "step": 99658
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0097620487213135,
      "learning_rate": 0.0003631027277433674,
      "loss": 2.8071,
      "step": 99659
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.5047404766082764,
      "learning_rate": 0.0003630987286903123,
      "loss": 2.6889,
      "step": 99660
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.206861972808838,
      "learning_rate": 0.00036309472962552596,
      "loss": 3.1904,
      "step": 99661
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.583603620529175,
      "learning_rate": 0.0003630907305490092,
      "loss": 2.9762,
      "step": 99662
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7344186305999756,
      "learning_rate": 0.00036308673146076255,
      "loss": 2.8451,
      "step": 99663
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.9188930988311768,
      "learning_rate": 0.0003630827323607869,
      "loss": 3.002,
      "step": 99664
    },
    {
      "epoch": 1.3,
      "grad_norm": 4.278172492980957,
      "learning_rate": 0.00036307873324908306,
      "loss": 3.0035,
      "step": 99665
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0404140949249268,
      "learning_rate": 0.00036307473412565167,
      "loss": 2.7821,
      "step": 99666
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.79260516166687,
      "learning_rate": 0.00036307073499049344,
      "loss": 2.9788,
      "step": 99667
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.584937572479248,
      "learning_rate": 0.00036306673584360924,
      "loss": 2.804,
      "step": 99668
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.6782877445220947,
      "learning_rate": 0.0003630627366849997,
      "loss": 2.9432,
      "step": 99669
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6743900775909424,
      "learning_rate": 0.0003630587375146656,
      "loss": 3.0918,
      "step": 99670
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.6662120819091797,
      "learning_rate": 0.00036305473833260775,
      "loss": 2.7557,
      "step": 99671
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7157062292099,
      "learning_rate": 0.0003630507391388268,
      "loss": 3.1576,
      "step": 99672
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7171999216079712,
      "learning_rate": 0.0003630467399333235,
      "loss": 2.9595,
      "step": 99673
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.899018406867981,
      "learning_rate": 0.0003630427407160987,
      "loss": 3.0477,
      "step": 99674
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.3987233638763428,
      "learning_rate": 0.00036303874148715297,
      "loss": 2.8919,
      "step": 99675
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.204775333404541,
      "learning_rate": 0.0003630347422464872,
      "loss": 2.6043,
      "step": 99676
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5242539644241333,
      "learning_rate": 0.0003630307429941021,
      "loss": 3.2181,
      "step": 99677
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.679194450378418,
      "learning_rate": 0.00036302674372999834,
      "loss": 3.0161,
      "step": 99678
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7413851022720337,
      "learning_rate": 0.0003630227444541767,
      "loss": 3.047,
      "step": 99679
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6281956434249878,
      "learning_rate": 0.0003630187451666381,
      "loss": 3.0201,
      "step": 99680
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5701252222061157,
      "learning_rate": 0.00036301474586738293,
      "loss": 3.2162,
      "step": 99681
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6401710510253906,
      "learning_rate": 0.00036301074655641216,
      "loss": 3.1546,
      "step": 99682
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6099541187286377,
      "learning_rate": 0.00036300674723372664,
      "loss": 3.0634,
      "step": 99683
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2685341835021973,
      "learning_rate": 0.00036300274789932685,
      "loss": 3.0051,
      "step": 99684
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0816497802734375,
      "learning_rate": 0.0003629987485532137,
      "loss": 2.6851,
      "step": 99685
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8046846389770508,
      "learning_rate": 0.0003629947491953879,
      "loss": 2.9572,
      "step": 99686
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.7012405395507812,
      "learning_rate": 0.00036299074982585017,
      "loss": 2.7242,
      "step": 99687
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.980972170829773,
      "learning_rate": 0.00036298675044460125,
      "loss": 2.9804,
      "step": 99688
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.3455395698547363,
      "learning_rate": 0.00036298275105164197,
      "loss": 3.0319,
      "step": 99689
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.4806978702545166,
      "learning_rate": 0.00036297875164697293,
      "loss": 3.1548,
      "step": 99690
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6179128885269165,
      "learning_rate": 0.000362974752230595,
      "loss": 3.0916,
      "step": 99691
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.605731248855591,
      "learning_rate": 0.0003629707528025089,
      "loss": 3.1853,
      "step": 99692
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.348299026489258,
      "learning_rate": 0.0003629667533627153,
      "loss": 2.704,
      "step": 99693
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7413188219070435,
      "learning_rate": 0.00036296275391121496,
      "loss": 3.1599,
      "step": 99694
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.844318151473999,
      "learning_rate": 0.0003629587544480087,
      "loss": 2.8794,
      "step": 99695
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.845928192138672,
      "learning_rate": 0.0003629547549730973,
      "loss": 3.2369,
      "step": 99696
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.850648045539856,
      "learning_rate": 0.0003629507554864813,
      "loss": 2.9886,
      "step": 99697
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6036635637283325,
      "learning_rate": 0.0003629467559881616,
      "loss": 2.802,
      "step": 99698
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.421967029571533,
      "learning_rate": 0.0003629427564781389,
      "loss": 2.9637,
      "step": 99699
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8420453071594238,
      "learning_rate": 0.00036293875695641404,
      "loss": 2.8691,
      "step": 99700
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2754952907562256,
      "learning_rate": 0.0003629347574229876,
      "loss": 2.8009,
      "step": 99701
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7231487035751343,
      "learning_rate": 0.0003629307578778604,
      "loss": 3.0535,
      "step": 99702
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6950204372406006,
      "learning_rate": 0.00036292675832103315,
      "loss": 2.9809,
      "step": 99703
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.1573710441589355,
      "learning_rate": 0.0003629227587525067,
      "loss": 2.9469,
      "step": 99704
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.753799319267273,
      "learning_rate": 0.0003629187591722817,
      "loss": 2.949,
      "step": 99705
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7689085006713867,
      "learning_rate": 0.0003629147595803589,
      "loss": 3.0196,
      "step": 99706
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.153430223464966,
      "learning_rate": 0.000362910759976739,
      "loss": 3.2976,
      "step": 99707
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4387879371643066,
      "learning_rate": 0.0003629067603614229,
      "loss": 2.8984,
      "step": 99708
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.0191731452941895,
      "learning_rate": 0.00036290276073441124,
      "loss": 2.8248,
      "step": 99709
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.685922145843506,
      "learning_rate": 0.00036289876109570475,
      "loss": 3.0519,
      "step": 99710
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.65448796749115,
      "learning_rate": 0.0003628947614453042,
      "loss": 2.9048,
      "step": 99711
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7171269655227661,
      "learning_rate": 0.0003628907617832103,
      "loss": 3.014,
      "step": 99712
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.191560745239258,
      "learning_rate": 0.00036288676210942384,
      "loss": 3.0803,
      "step": 99713
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.604275941848755,
      "learning_rate": 0.00036288276242394556,
      "loss": 3.228,
      "step": 99714
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6029212474822998,
      "learning_rate": 0.00036287876272677607,
      "loss": 3.1591,
      "step": 99715
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9751522541046143,
      "learning_rate": 0.00036287476301791644,
      "loss": 2.7241,
      "step": 99716
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4652163982391357,
      "learning_rate": 0.0003628707632973671,
      "loss": 3.1061,
      "step": 99717
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.3646295070648193,
      "learning_rate": 0.00036286676356512887,
      "loss": 3.0928,
      "step": 99718
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2233433723449707,
      "learning_rate": 0.00036286276382120256,
      "loss": 2.9807,
      "step": 99719
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.8966901302337646,
      "learning_rate": 0.0003628587640655888,
      "loss": 3.1644,
      "step": 99720
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8904249668121338,
      "learning_rate": 0.00036285476429828847,
      "loss": 2.9623,
      "step": 99721
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2916786670684814,
      "learning_rate": 0.00036285076451930226,
      "loss": 3.3035,
      "step": 99722
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.554199457168579,
      "learning_rate": 0.0003628467647286309,
      "loss": 3.1384,
      "step": 99723
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8275192975997925,
      "learning_rate": 0.0003628427649262751,
      "loss": 3.1929,
      "step": 99724
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8287419080734253,
      "learning_rate": 0.00036283876511223565,
      "loss": 2.8773,
      "step": 99725
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.148972988128662,
      "learning_rate": 0.00036283476528651344,
      "loss": 2.9284,
      "step": 99726
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.707460880279541,
      "learning_rate": 0.00036283076544910885,
      "loss": 3.0129,
      "step": 99727
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0236997604370117,
      "learning_rate": 0.00036282676560002295,
      "loss": 3.1152,
      "step": 99728
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.749617099761963,
      "learning_rate": 0.0003628227657392564,
      "loss": 2.8633,
      "step": 99729
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8265374898910522,
      "learning_rate": 0.0003628187658668098,
      "loss": 3.0252,
      "step": 99730
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7571015357971191,
      "learning_rate": 0.000362814765982684,
      "loss": 2.9696,
      "step": 99731
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7622305154800415,
      "learning_rate": 0.0003628107660868799,
      "loss": 3.1324,
      "step": 99732
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5759261846542358,
      "learning_rate": 0.000362806766179398,
      "loss": 3.0075,
      "step": 99733
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6815918684005737,
      "learning_rate": 0.00036280276626023914,
      "loss": 3.1895,
      "step": 99734
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7142844200134277,
      "learning_rate": 0.000362798766329404,
      "loss": 3.1331,
      "step": 99735
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0019149780273438,
      "learning_rate": 0.0003627947663868935,
      "loss": 3.2102,
      "step": 99736
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9843744039535522,
      "learning_rate": 0.00036279076643270814,
      "loss": 3.0645,
      "step": 99737
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8531582355499268,
      "learning_rate": 0.00036278676646684895,
      "loss": 2.9207,
      "step": 99738
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0755879878997803,
      "learning_rate": 0.00036278276648931643,
      "loss": 2.8536,
      "step": 99739
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7972434759140015,
      "learning_rate": 0.00036277876650011134,
      "loss": 2.8166,
      "step": 99740
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7279472351074219,
      "learning_rate": 0.00036277476649923464,
      "loss": 3.2956,
      "step": 99741
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9774224758148193,
      "learning_rate": 0.00036277076648668684,
      "loss": 3.1655,
      "step": 99742
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8536254167556763,
      "learning_rate": 0.00036276676646246873,
      "loss": 3.1661,
      "step": 99743
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.3478965759277344,
      "learning_rate": 0.0003627627664265812,
      "loss": 3.032,
      "step": 99744
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7036505937576294,
      "learning_rate": 0.0003627587663790247,
      "loss": 2.8198,
      "step": 99745
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7438405752182007,
      "learning_rate": 0.0003627547663198004,
      "loss": 2.9815,
      "step": 99746
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7102599143981934,
      "learning_rate": 0.0003627507662489086,
      "loss": 2.7958,
      "step": 99747
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.525234341621399,
      "learning_rate": 0.0003627467661663504,
      "loss": 2.899,
      "step": 99748
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6698298454284668,
      "learning_rate": 0.00036274276607212624,
      "loss": 2.931,
      "step": 99749
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8467108011245728,
      "learning_rate": 0.0003627387659662372,
      "loss": 2.8311,
      "step": 99750
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.024343729019165,
      "learning_rate": 0.00036273476584868376,
      "loss": 3.059,
      "step": 99751
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.258697748184204,
      "learning_rate": 0.0003627307657194667,
      "loss": 3.1191,
      "step": 99752
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.4838247299194336,
      "learning_rate": 0.0003627267655785868,
      "loss": 2.8788,
      "step": 99753
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.939982295036316,
      "learning_rate": 0.00036272276542604487,
      "loss": 2.8993,
      "step": 99754
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6498106718063354,
      "learning_rate": 0.0003627187652618415,
      "loss": 2.8659,
      "step": 99755
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8639094829559326,
      "learning_rate": 0.00036271476508597763,
      "loss": 3.1553,
      "step": 99756
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.84177565574646,
      "learning_rate": 0.0003627107648984539,
      "loss": 3.0544,
      "step": 99757
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9153242111206055,
      "learning_rate": 0.000362706764699271,
      "loss": 2.7333,
      "step": 99758
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7805674076080322,
      "learning_rate": 0.00036270276448842977,
      "loss": 2.8908,
      "step": 99759
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.574974775314331,
      "learning_rate": 0.0003626987642659309,
      "loss": 3.0374,
      "step": 99760
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.3609485626220703,
      "learning_rate": 0.0003626947640317751,
      "loss": 2.8964,
      "step": 99761
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.730739712715149,
      "learning_rate": 0.0003626907637859633,
      "loss": 2.9268,
      "step": 99762
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2507011890411377,
      "learning_rate": 0.00036268676352849594,
      "loss": 3.1987,
      "step": 99763
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1376898288726807,
      "learning_rate": 0.00036268276325937397,
      "loss": 3.3677,
      "step": 99764
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.3313519954681396,
      "learning_rate": 0.0003626787629785982,
      "loss": 2.9934,
      "step": 99765
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9449875354766846,
      "learning_rate": 0.0003626747626861691,
      "loss": 3.1321,
      "step": 99766
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7346512079238892,
      "learning_rate": 0.00036267076238208763,
      "loss": 2.858,
      "step": 99767
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9838227033615112,
      "learning_rate": 0.0003626667620663545,
      "loss": 3.1358,
      "step": 99768
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6957374811172485,
      "learning_rate": 0.00036266276173897044,
      "loss": 2.7742,
      "step": 99769
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9937796592712402,
      "learning_rate": 0.00036265876139993623,
      "loss": 3.152,
      "step": 99770
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.6773056983947754,
      "learning_rate": 0.00036265476104925255,
      "loss": 2.8947,
      "step": 99771
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.314002513885498,
      "learning_rate": 0.00036265076068692014,
      "loss": 2.9745,
      "step": 99772
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8188365697860718,
      "learning_rate": 0.0003626467603129398,
      "loss": 3.1507,
      "step": 99773
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7891910076141357,
      "learning_rate": 0.0003626427599273122,
      "loss": 2.8921,
      "step": 99774
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.936603307723999,
      "learning_rate": 0.0003626387595300382,
      "loss": 2.9442,
      "step": 99775
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.750712513923645,
      "learning_rate": 0.00036263475912111837,
      "loss": 2.9804,
      "step": 99776
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1344332695007324,
      "learning_rate": 0.0003626307587005536,
      "loss": 2.8879,
      "step": 99777
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.3493611812591553,
      "learning_rate": 0.0003626267582683446,
      "loss": 3.0044,
      "step": 99778
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9442955255508423,
      "learning_rate": 0.0003626227578244921,
      "loss": 3.1829,
      "step": 99779
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0774598121643066,
      "learning_rate": 0.0003626187573689968,
      "loss": 3.1219,
      "step": 99780
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.739133358001709,
      "learning_rate": 0.0003626147569018597,
      "loss": 2.906,
      "step": 99781
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.7496447563171387,
      "learning_rate": 0.0003626107564230811,
      "loss": 2.7708,
      "step": 99782
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9795193672180176,
      "learning_rate": 0.000362606755932662,
      "loss": 3.151,
      "step": 99783
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.499863862991333,
      "learning_rate": 0.0003626027554306033,
      "loss": 3.0704,
      "step": 99784
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0942649841308594,
      "learning_rate": 0.00036259875491690536,
      "loss": 3.2343,
      "step": 99785
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.956094741821289,
      "learning_rate": 0.0003625947543915692,
      "loss": 3.0668,
      "step": 99786
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.65827476978302,
      "learning_rate": 0.00036259075385459554,
      "loss": 2.8889,
      "step": 99787
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7944047451019287,
      "learning_rate": 0.0003625867533059851,
      "loss": 2.9767,
      "step": 99788
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.40282940864563,
      "learning_rate": 0.0003625827527457385,
      "loss": 3.0603,
      "step": 99789
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4055659770965576,
      "learning_rate": 0.00036257875217385663,
      "loss": 3.0234,
      "step": 99790
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6580719947814941,
      "learning_rate": 0.0003625747515903402,
      "loss": 3.0893,
      "step": 99791
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4464805126190186,
      "learning_rate": 0.00036257075099518996,
      "loss": 3.139,
      "step": 99792
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7507569789886475,
      "learning_rate": 0.0003625667503884066,
      "loss": 2.8181,
      "step": 99793
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9095993041992188,
      "learning_rate": 0.00036256274976999086,
      "loss": 3.0339,
      "step": 99794
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.56934654712677,
      "learning_rate": 0.0003625587491399436,
      "loss": 2.7968,
      "step": 99795
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.3797361850738525,
      "learning_rate": 0.00036255474849826544,
      "loss": 3.0437,
      "step": 99796
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7468278408050537,
      "learning_rate": 0.00036255074784495724,
      "loss": 3.0941,
      "step": 99797
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7969930171966553,
      "learning_rate": 0.00036254674718001956,
      "loss": 3.0593,
      "step": 99798
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9586782455444336,
      "learning_rate": 0.0003625427465034534,
      "loss": 2.9536,
      "step": 99799
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.202791452407837,
      "learning_rate": 0.0003625387458152593,
      "loss": 2.7055,
      "step": 99800
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2382965087890625,
      "learning_rate": 0.00036253474511543796,
      "loss": 3.1163,
      "step": 99801
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8799606561660767,
      "learning_rate": 0.0003625307444039904,
      "loss": 2.8568,
      "step": 99802
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.825249433517456,
      "learning_rate": 0.0003625267436809171,
      "loss": 3.211,
      "step": 99803
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4618990421295166,
      "learning_rate": 0.0003625227429462189,
      "loss": 3.1579,
      "step": 99804
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7049531936645508,
      "learning_rate": 0.0003625187421998966,
      "loss": 3.0944,
      "step": 99805
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.8204338550567627,
      "learning_rate": 0.0003625147414419509,
      "loss": 3.2556,
      "step": 99806
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2666280269622803,
      "learning_rate": 0.00036251074067238245,
      "loss": 2.9297,
      "step": 99807
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9444940090179443,
      "learning_rate": 0.0003625067398911921,
      "loss": 3.1468,
      "step": 99808
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.548248529434204,
      "learning_rate": 0.00036250273909838054,
      "loss": 2.9476,
      "step": 99809
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7461498975753784,
      "learning_rate": 0.0003624987382939486,
      "loss": 3.2071,
      "step": 99810
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.417860984802246,
      "learning_rate": 0.00036249473747789694,
      "loss": 3.1518,
      "step": 99811
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8509191274642944,
      "learning_rate": 0.0003624907366502263,
      "loss": 3.1641,
      "step": 99812
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.881115198135376,
      "learning_rate": 0.00036248673581093743,
      "loss": 3.0836,
      "step": 99813
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6365041732788086,
      "learning_rate": 0.0003624827349600312,
      "loss": 3.1202,
      "step": 99814
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9807817935943604,
      "learning_rate": 0.00036247873409750816,
      "loss": 3.1205,
      "step": 99815
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0279815196990967,
      "learning_rate": 0.00036247473322336917,
      "loss": 3.3001,
      "step": 99816
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9123504161834717,
      "learning_rate": 0.000362470732337615,
      "loss": 2.8413,
      "step": 99817
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8274023532867432,
      "learning_rate": 0.0003624667314402463,
      "loss": 2.9226,
      "step": 99818
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8153088092803955,
      "learning_rate": 0.00036246273053126376,
      "loss": 3.0038,
      "step": 99819
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2842421531677246,
      "learning_rate": 0.0003624587296106684,
      "loss": 3.1371,
      "step": 99820
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9542090892791748,
      "learning_rate": 0.0003624547286784607,
      "loss": 3.2285,
      "step": 99821
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7507964372634888,
      "learning_rate": 0.00036245072773464143,
      "loss": 2.8837,
      "step": 99822
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0261311531066895,
      "learning_rate": 0.00036244672677921146,
      "loss": 3.3486,
      "step": 99823
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5388586521148682,
      "learning_rate": 0.00036244272581217147,
      "loss": 2.9437,
      "step": 99824
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2786037921905518,
      "learning_rate": 0.0003624387248335221,
      "loss": 2.7869,
      "step": 99825
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7199777364730835,
      "learning_rate": 0.0003624347238432644,
      "loss": 2.8654,
      "step": 99826
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.911751627922058,
      "learning_rate": 0.0003624307228413987,
      "loss": 2.8886,
      "step": 99827
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5624094009399414,
      "learning_rate": 0.00036242672182792605,
      "loss": 3.1223,
      "step": 99828
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0537455081939697,
      "learning_rate": 0.00036242272080284704,
      "loss": 2.8547,
      "step": 99829
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2252721786499023,
      "learning_rate": 0.00036241871976616245,
      "loss": 3.1096,
      "step": 99830
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9135997295379639,
      "learning_rate": 0.0003624147187178731,
      "loss": 2.7595,
      "step": 99831
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8950172662734985,
      "learning_rate": 0.0003624107176579797,
      "loss": 3.0246,
      "step": 99832
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7091375589370728,
      "learning_rate": 0.00036240671658648293,
      "loss": 3.0264,
      "step": 99833
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.253016471862793,
      "learning_rate": 0.0003624027155033836,
      "loss": 2.9315,
      "step": 99834
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7719639539718628,
      "learning_rate": 0.00036239871440868243,
      "loss": 2.6787,
      "step": 99835
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9254493713378906,
      "learning_rate": 0.0003623947133023801,
      "loss": 3.1782,
      "step": 99836
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.438141345977783,
      "learning_rate": 0.00036239071218447744,
      "loss": 2.9966,
      "step": 99837
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.7664413452148438,
      "learning_rate": 0.00036238671105497523,
      "loss": 3.049,
      "step": 99838
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8106850385665894,
      "learning_rate": 0.00036238270991387407,
      "loss": 2.7409,
      "step": 99839
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2464866638183594,
      "learning_rate": 0.00036237870876117476,
      "loss": 3.0748,
      "step": 99840
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8111000061035156,
      "learning_rate": 0.0003623747075968782,
      "loss": 3.2434,
      "step": 99841
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.353785753250122,
      "learning_rate": 0.00036237070642098485,
      "loss": 2.8362,
      "step": 99842
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8271775245666504,
      "learning_rate": 0.0003623667052334957,
      "loss": 2.8969,
      "step": 99843
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.099640369415283,
      "learning_rate": 0.00036236270403441147,
      "loss": 2.934,
      "step": 99844
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.8891947269439697,
      "learning_rate": 0.0003623587028237327,
      "loss": 2.9552,
      "step": 99845
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.6041877269744873,
      "learning_rate": 0.0003623547016014603,
      "loss": 3.0987,
      "step": 99846
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1880099773406982,
      "learning_rate": 0.0003623507003675951,
      "loss": 3.0964,
      "step": 99847
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.798670768737793,
      "learning_rate": 0.00036234669912213756,
      "loss": 2.9944,
      "step": 99848
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7650516033172607,
      "learning_rate": 0.00036234269786508867,
      "loss": 3.0769,
      "step": 99849
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6526808738708496,
      "learning_rate": 0.0003623386965964492,
      "loss": 2.937,
      "step": 99850
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6026133298873901,
      "learning_rate": 0.00036233469531621966,
      "loss": 3.1264,
      "step": 99851
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.458700180053711,
      "learning_rate": 0.0003623306940244009,
      "loss": 2.8256,
      "step": 99852
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7890347242355347,
      "learning_rate": 0.0003623266927209938,
      "loss": 3.0093,
      "step": 99853
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1679461002349854,
      "learning_rate": 0.0003623226914059989,
      "loss": 2.8187,
      "step": 99854
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9704687595367432,
      "learning_rate": 0.00036231869007941704,
      "loss": 2.7826,
      "step": 99855
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9193984270095825,
      "learning_rate": 0.00036231468874124896,
      "loss": 3.0339,
      "step": 99856
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8343228101730347,
      "learning_rate": 0.00036231068739149547,
      "loss": 2.9854,
      "step": 99857
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7057960033416748,
      "learning_rate": 0.0003623066860301572,
      "loss": 2.8294,
      "step": 99858
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9130942821502686,
      "learning_rate": 0.00036230268465723493,
      "loss": 3.2511,
      "step": 99859
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.851224422454834,
      "learning_rate": 0.00036229868327272946,
      "loss": 3.1174,
      "step": 99860
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7308608293533325,
      "learning_rate": 0.0003622946818766414,
      "loss": 2.8365,
      "step": 99861
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8098384141921997,
      "learning_rate": 0.00036229068046897167,
      "loss": 3.0445,
      "step": 99862
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8883836269378662,
      "learning_rate": 0.00036228667904972093,
      "loss": 2.9346,
      "step": 99863
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.3138065338134766,
      "learning_rate": 0.00036228267761888983,
      "loss": 3.0726,
      "step": 99864
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6533033847808838,
      "learning_rate": 0.0003622786761764793,
      "loss": 3.5393,
      "step": 99865
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8139747381210327,
      "learning_rate": 0.00036227467472249,
      "loss": 2.8395,
      "step": 99866
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9010025262832642,
      "learning_rate": 0.0003622706732569226,
      "loss": 3.3053,
      "step": 99867
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9021666049957275,
      "learning_rate": 0.0003622666717797779,
      "loss": 3.0264,
      "step": 99868
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5863802433013916,
      "learning_rate": 0.00036226267029105673,
      "loss": 3.219,
      "step": 99869
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8712193965911865,
      "learning_rate": 0.00036225866879075966,
      "loss": 3.3622,
      "step": 99870
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6969006061553955,
      "learning_rate": 0.00036225466727888753,
      "loss": 2.8767,
      "step": 99871
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0580759048461914,
      "learning_rate": 0.00036225066575544106,
      "loss": 3.0232,
      "step": 99872
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9304214715957642,
      "learning_rate": 0.00036224666422042117,
      "loss": 2.9093,
      "step": 99873
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1908087730407715,
      "learning_rate": 0.00036224266267382833,
      "loss": 2.9435,
      "step": 99874
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.281447410583496,
      "learning_rate": 0.0003622386611156634,
      "loss": 2.9558,
      "step": 99875
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8656947612762451,
      "learning_rate": 0.0003622346595459272,
      "loss": 3.0141,
      "step": 99876
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1285674571990967,
      "learning_rate": 0.0003622306579646203,
      "loss": 2.9195,
      "step": 99877
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.66886568069458,
      "learning_rate": 0.0003622266563717436,
      "loss": 3.1196,
      "step": 99878
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.671400547027588,
      "learning_rate": 0.00036222265476729777,
      "loss": 3.0332,
      "step": 99879
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.3910062313079834,
      "learning_rate": 0.00036221865315128364,
      "loss": 2.9036,
      "step": 99880
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1438307762145996,
      "learning_rate": 0.00036221465152370177,
      "loss": 3.065,
      "step": 99881
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9563710689544678,
      "learning_rate": 0.00036221064988455314,
      "loss": 3.0498,
      "step": 99882
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.4117227792739868,
      "learning_rate": 0.00036220664823383836,
      "loss": 3.0293,
      "step": 99883
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8403548002243042,
      "learning_rate": 0.0003622026465715582,
      "loss": 2.8791,
      "step": 99884
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.877586841583252,
      "learning_rate": 0.00036219864489771324,
      "loss": 2.9856,
      "step": 99885
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1648967266082764,
      "learning_rate": 0.00036219464321230447,
      "loss": 2.807,
      "step": 99886
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7310082912445068,
      "learning_rate": 0.00036219064151533264,
      "loss": 2.8701,
      "step": 99887
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2660279273986816,
      "learning_rate": 0.00036218663980679827,
      "loss": 3.0192,
      "step": 99888
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6533530950546265,
      "learning_rate": 0.00036218263808670235,
      "loss": 3.1012,
      "step": 99889
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.908384084701538,
      "learning_rate": 0.00036217863635504543,
      "loss": 3.0031,
      "step": 99890
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.134528636932373,
      "learning_rate": 0.0003621746346118283,
      "loss": 3.052,
      "step": 99891
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5817608833312988,
      "learning_rate": 0.00036217063285705174,
      "loss": 3.1414,
      "step": 99892
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6814258098602295,
      "learning_rate": 0.00036216663109071655,
      "loss": 3.0972,
      "step": 99893
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5763241052627563,
      "learning_rate": 0.0003621626293128234,
      "loss": 3.1338,
      "step": 99894
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.194232225418091,
      "learning_rate": 0.00036215862752337296,
      "loss": 3.1658,
      "step": 99895
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.553384780883789,
      "learning_rate": 0.0003621546257223661,
      "loss": 2.9543,
      "step": 99896
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6712480783462524,
      "learning_rate": 0.00036215062390980353,
      "loss": 2.7841,
      "step": 99897
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1478638648986816,
      "learning_rate": 0.000362146622085686,
      "loss": 3.2003,
      "step": 99898
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8433144092559814,
      "learning_rate": 0.0003621426202500143,
      "loss": 2.8162,
      "step": 99899
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9071924686431885,
      "learning_rate": 0.000362138618402789,
      "loss": 2.848,
      "step": 99900
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5691776275634766,
      "learning_rate": 0.000362134616544011,
      "loss": 2.8587,
      "step": 99901
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2729032039642334,
      "learning_rate": 0.000362130614673681,
      "loss": 3.0533,
      "step": 99902
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8034158945083618,
      "learning_rate": 0.0003621266127917997,
      "loss": 3.1434,
      "step": 99903
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8633877038955688,
      "learning_rate": 0.00036212261089836796,
      "loss": 2.9014,
      "step": 99904
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.332498788833618,
      "learning_rate": 0.0003621186089933865,
      "loss": 3.0692,
      "step": 99905
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.5118818283081055,
      "learning_rate": 0.00036211460707685593,
      "loss": 3.044,
      "step": 99906
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8827502727508545,
      "learning_rate": 0.0003621106051487771,
      "loss": 3.123,
      "step": 99907
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1914799213409424,
      "learning_rate": 0.0003621066032091507,
      "loss": 2.8081,
      "step": 99908
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.593848705291748,
      "learning_rate": 0.0003621026012579776,
      "loss": 2.9459,
      "step": 99909
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.980497121810913,
      "learning_rate": 0.00036209859929525837,
      "loss": 2.9251,
      "step": 99910
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0283143520355225,
      "learning_rate": 0.00036209459732099386,
      "loss": 2.9658,
      "step": 99911
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.095499277114868,
      "learning_rate": 0.0003620905953351848,
      "loss": 3.1889,
      "step": 99912
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.463200330734253,
      "learning_rate": 0.00036208659333783195,
      "loss": 2.7377,
      "step": 99913
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.3514671325683594,
      "learning_rate": 0.000362082591328936,
      "loss": 3.1195,
      "step": 99914
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0282304286956787,
      "learning_rate": 0.0003620785893084978,
      "loss": 3.2009,
      "step": 99915
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.083631992340088,
      "learning_rate": 0.0003620745872765179,
      "loss": 2.9432,
      "step": 99916
    },
    {
      "epoch": 1.3,
      "grad_norm": 4.559998512268066,
      "learning_rate": 0.00036207058523299724,
      "loss": 3.0838,
      "step": 99917
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8587982654571533,
      "learning_rate": 0.00036206658317793647,
      "loss": 3.0979,
      "step": 99918
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5999315977096558,
      "learning_rate": 0.0003620625811113363,
      "loss": 2.6877,
      "step": 99919
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9227970838546753,
      "learning_rate": 0.00036205857903319765,
      "loss": 2.8728,
      "step": 99920
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.215357780456543,
      "learning_rate": 0.00036205457694352095,
      "loss": 3.1873,
      "step": 99921
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1908621788024902,
      "learning_rate": 0.00036205057484230726,
      "loss": 3.1266,
      "step": 99922
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7631171941757202,
      "learning_rate": 0.0003620465727295572,
      "loss": 3.0817,
      "step": 99923
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.753861665725708,
      "learning_rate": 0.0003620425706052715,
      "loss": 2.8977,
      "step": 99924
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.515616774559021,
      "learning_rate": 0.0003620385684694509,
      "loss": 3.1346,
      "step": 99925
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8700684309005737,
      "learning_rate": 0.0003620345663220961,
      "loss": 3.3002,
      "step": 99926
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7379368543624878,
      "learning_rate": 0.000362030564163208,
      "loss": 2.7799,
      "step": 99927
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6532822847366333,
      "learning_rate": 0.0003620265619927872,
      "loss": 3.0789,
      "step": 99928
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9288501739501953,
      "learning_rate": 0.0003620225598108345,
      "loss": 2.8034,
      "step": 99929
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7315324544906616,
      "learning_rate": 0.0003620185576173507,
      "loss": 3.1254,
      "step": 99930
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6957460641860962,
      "learning_rate": 0.00036201455541233637,
      "loss": 2.9929,
      "step": 99931
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4173314571380615,
      "learning_rate": 0.00036201055319579235,
      "loss": 3.0119,
      "step": 99932
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.00968861579895,
      "learning_rate": 0.00036200655096771953,
      "loss": 2.8366,
      "step": 99933
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9213306903839111,
      "learning_rate": 0.00036200254872811844,
      "loss": 2.8449,
      "step": 99934
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.403822660446167,
      "learning_rate": 0.0003619985464769899,
      "loss": 3.0111,
      "step": 99935
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7313154935836792,
      "learning_rate": 0.0003619945442143347,
      "loss": 2.9829,
      "step": 99936
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2379672527313232,
      "learning_rate": 0.0003619905419401535,
      "loss": 3.0222,
      "step": 99937
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0808751583099365,
      "learning_rate": 0.0003619865396544471,
      "loss": 2.7594,
      "step": 99938
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.902016282081604,
      "learning_rate": 0.0003619825373572163,
      "loss": 3.0636,
      "step": 99939
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8106470108032227,
      "learning_rate": 0.00036197853504846164,
      "loss": 3.0734,
      "step": 99940
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.621498465538025,
      "learning_rate": 0.00036197453272818414,
      "loss": 3.1277,
      "step": 99941
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.713439464569092,
      "learning_rate": 0.00036197053039638434,
      "loss": 2.8696,
      "step": 99942
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6235857009887695,
      "learning_rate": 0.00036196652805306305,
      "loss": 2.8659,
      "step": 99943
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4619765281677246,
      "learning_rate": 0.00036196252569822097,
      "loss": 2.7803,
      "step": 99944
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.5105936527252197,
      "learning_rate": 0.000361958523331859,
      "loss": 2.739,
      "step": 99945
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7465224266052246,
      "learning_rate": 0.00036195452095397765,
      "loss": 2.7927,
      "step": 99946
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.167144775390625,
      "learning_rate": 0.0003619505185645778,
      "loss": 2.8329,
      "step": 99947
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.104462146759033,
      "learning_rate": 0.00036194651616366024,
      "loss": 3.1883,
      "step": 99948
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5832756757736206,
      "learning_rate": 0.00036194251375122567,
      "loss": 2.7836,
      "step": 99949
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6680537462234497,
      "learning_rate": 0.0003619385113272747,
      "loss": 3.174,
      "step": 99950
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.8091237545013428,
      "learning_rate": 0.0003619345088918083,
      "loss": 2.9146,
      "step": 99951
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8224619626998901,
      "learning_rate": 0.0003619305064448271,
      "loss": 3.0032,
      "step": 99952
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8175503015518188,
      "learning_rate": 0.00036192650398633177,
      "loss": 3.0179,
      "step": 99953
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9776498079299927,
      "learning_rate": 0.00036192250151632324,
      "loss": 2.975,
      "step": 99954
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.373476266860962,
      "learning_rate": 0.00036191849903480206,
      "loss": 3.1673,
      "step": 99955
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9766719341278076,
      "learning_rate": 0.0003619144965417691,
      "loss": 3.1011,
      "step": 99956
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9509539604187012,
      "learning_rate": 0.0003619104940372251,
      "loss": 3.0376,
      "step": 99957
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0928614139556885,
      "learning_rate": 0.00036190649152117066,
      "loss": 3.0613,
      "step": 99958
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.763089895248413,
      "learning_rate": 0.0003619024889936068,
      "loss": 3.0584,
      "step": 99959
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8629553318023682,
      "learning_rate": 0.0003618984864545339,
      "loss": 3.0057,
      "step": 99960
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6593321561813354,
      "learning_rate": 0.00036189448390395307,
      "loss": 3.0269,
      "step": 99961
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0398404598236084,
      "learning_rate": 0.00036189048134186477,
      "loss": 3.3731,
      "step": 99962
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.774430751800537,
      "learning_rate": 0.00036188647876826994,
      "loss": 2.8771,
      "step": 99963
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.071159601211548,
      "learning_rate": 0.00036188247618316923,
      "loss": 3.0549,
      "step": 99964
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7513556480407715,
      "learning_rate": 0.00036187847358656346,
      "loss": 3.074,
      "step": 99965
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.27974009513855,
      "learning_rate": 0.0003618744709784532,
      "loss": 3.1706,
      "step": 99966
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.7575695514678955,
      "learning_rate": 0.0003618704683588393,
      "loss": 2.9864,
      "step": 99967
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2092690467834473,
      "learning_rate": 0.00036186646572772263,
      "loss": 3.1657,
      "step": 99968
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9886113405227661,
      "learning_rate": 0.0003618624630851037,
      "loss": 2.8746,
      "step": 99969
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.6510744094848633,
      "learning_rate": 0.00036185846043098344,
      "loss": 3.2275,
      "step": 99970
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9838614463806152,
      "learning_rate": 0.00036185445776536245,
      "loss": 2.7306,
      "step": 99971
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9125412702560425,
      "learning_rate": 0.00036185045508824165,
      "loss": 2.9488,
      "step": 99972
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.6563665866851807,
      "learning_rate": 0.00036184645239962165,
      "loss": 2.9472,
      "step": 99973
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.8753557205200195,
      "learning_rate": 0.0003618424496995032,
      "loss": 3.095,
      "step": 99974
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.9378857612609863,
      "learning_rate": 0.0003618384469878871,
      "loss": 2.9417,
      "step": 99975
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9744212627410889,
      "learning_rate": 0.00036183444426477406,
      "loss": 3.3884,
      "step": 99976
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4501872062683105,
      "learning_rate": 0.0003618304415301648,
      "loss": 3.2191,
      "step": 99977
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8766024112701416,
      "learning_rate": 0.0003618264387840601,
      "loss": 2.9696,
      "step": 99978
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1454319953918457,
      "learning_rate": 0.0003618224360264608,
      "loss": 3.0825,
      "step": 99979
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8195000886917114,
      "learning_rate": 0.0003618184332573674,
      "loss": 2.9796,
      "step": 99980
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8131722211837769,
      "learning_rate": 0.00036181443047678087,
      "loss": 3.266,
      "step": 99981
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.5360755920410156,
      "learning_rate": 0.0003618104276847018,
      "loss": 2.7647,
      "step": 99982
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.943065881729126,
      "learning_rate": 0.0003618064248811311,
      "loss": 2.8633,
      "step": 99983
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8351227045059204,
      "learning_rate": 0.0003618024220660693,
      "loss": 3.025,
      "step": 99984
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7785415649414062,
      "learning_rate": 0.0003617984192395174,
      "loss": 2.9517,
      "step": 99985
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8033599853515625,
      "learning_rate": 0.0003617944164014759,
      "loss": 3.1564,
      "step": 99986
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.9121246337890625,
      "learning_rate": 0.0003617904135519457,
      "loss": 2.7723,
      "step": 99987
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.4990894794464111,
      "learning_rate": 0.00036178641069092746,
      "loss": 3.1165,
      "step": 99988
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9107418060302734,
      "learning_rate": 0.000361782407818422,
      "loss": 2.8223,
      "step": 99989
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7610329389572144,
      "learning_rate": 0.00036177840493443,
      "loss": 3.0579,
      "step": 99990
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.4565232992172241,
      "learning_rate": 0.00036177440203895224,
      "loss": 2.9683,
      "step": 99991
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5622529983520508,
      "learning_rate": 0.0003617703991319894,
      "loss": 3.1614,
      "step": 99992
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.021697998046875,
      "learning_rate": 0.00036176639621354235,
      "loss": 2.8202,
      "step": 99993
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.632437825202942,
      "learning_rate": 0.0003617623932836118,
      "loss": 3.0094,
      "step": 99994
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.4887174367904663,
      "learning_rate": 0.0003617583903421984,
      "loss": 3.0678,
      "step": 99995
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.932230234146118,
      "learning_rate": 0.00036175438738930287,
      "loss": 3.0766,
      "step": 99996
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0196950435638428,
      "learning_rate": 0.0003617503844249262,
      "loss": 2.9167,
      "step": 99997
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6114670038223267,
      "learning_rate": 0.0003617463814490688,
      "loss": 3.0009,
      "step": 99998
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.3348186016082764,
      "learning_rate": 0.00036174237846173165,
      "loss": 2.8737,
      "step": 99999
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0409367084503174,
      "learning_rate": 0.00036173837546291544,
      "loss": 2.8965,
      "step": 100000
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7471823692321777,
      "learning_rate": 0.00036173437245262094,
      "loss": 3.4798,
      "step": 100001
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9159725904464722,
      "learning_rate": 0.00036173036943084875,
      "loss": 2.8521,
      "step": 100002
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.089966058731079,
      "learning_rate": 0.0003617263663975998,
      "loss": 3.0822,
      "step": 100003
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0386483669281006,
      "learning_rate": 0.0003617223633528747,
      "loss": 2.9154,
      "step": 100004
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5753406286239624,
      "learning_rate": 0.0003617183602966743,
      "loss": 3.1792,
      "step": 100005
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8090122938156128,
      "learning_rate": 0.0003617143572289993,
      "loss": 2.6113,
      "step": 100006
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7862024307250977,
      "learning_rate": 0.0003617103541498504,
      "loss": 2.8082,
      "step": 100007
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9231195449829102,
      "learning_rate": 0.00036170635105922834,
      "loss": 3.0764,
      "step": 100008
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7357655763626099,
      "learning_rate": 0.000361702347957134,
      "loss": 2.9407,
      "step": 100009
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.080859661102295,
      "learning_rate": 0.0003616983448435679,
      "loss": 3.1665,
      "step": 100010
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.868734359741211,
      "learning_rate": 0.000361694341718531,
      "loss": 2.824,
      "step": 100011
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.782726526260376,
      "learning_rate": 0.00036169033858202403,
      "loss": 2.9344,
      "step": 100012
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6906824111938477,
      "learning_rate": 0.00036168633543404757,
      "loss": 3.1351,
      "step": 100013
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8221806287765503,
      "learning_rate": 0.0003616823322746024,
      "loss": 2.9415,
      "step": 100014
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.583582878112793,
      "learning_rate": 0.00036167832910368944,
      "loss": 2.8516,
      "step": 100015
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7340837717056274,
      "learning_rate": 0.0003616743259213093,
      "loss": 2.5921,
      "step": 100016
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8849120140075684,
      "learning_rate": 0.00036167032272746267,
      "loss": 2.8907,
      "step": 100017
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7132667303085327,
      "learning_rate": 0.0003616663195221504,
      "loss": 3.0864,
      "step": 100018
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6408803462982178,
      "learning_rate": 0.00036166231630537323,
      "loss": 3.0573,
      "step": 100019
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.880632758140564,
      "learning_rate": 0.00036165831307713176,
      "loss": 3.0764,
      "step": 100020
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9963396787643433,
      "learning_rate": 0.00036165430983742697,
      "loss": 2.8414,
      "step": 100021
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6148316860198975,
      "learning_rate": 0.0003616503065862594,
      "loss": 3.0383,
      "step": 100022
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6276627779006958,
      "learning_rate": 0.0003616463033236299,
      "loss": 2.9921,
      "step": 100023
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.048001289367676,
      "learning_rate": 0.0003616423000495392,
      "loss": 3.1259,
      "step": 100024
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.945373773574829,
      "learning_rate": 0.000361638296763988,
      "loss": 3.2065,
      "step": 100025
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7292131185531616,
      "learning_rate": 0.00036163429346697707,
      "loss": 3.0773,
      "step": 100026
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6886162757873535,
      "learning_rate": 0.00036163029015850724,
      "loss": 3.2557,
      "step": 100027
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.506298065185547,
      "learning_rate": 0.00036162628683857913,
      "loss": 3.028,
      "step": 100028
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.226017951965332,
      "learning_rate": 0.0003616222835071935,
      "loss": 3.2556,
      "step": 100029
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8486217260360718,
      "learning_rate": 0.0003616182801643512,
      "loss": 2.9596,
      "step": 100030
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.6259686946868896,
      "learning_rate": 0.0003616142768100528,
      "loss": 2.7652,
      "step": 100031
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.435493230819702,
      "learning_rate": 0.00036161027344429916,
      "loss": 2.8908,
      "step": 100032
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9454610347747803,
      "learning_rate": 0.00036160627006709113,
      "loss": 3.2247,
      "step": 100033
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8396296501159668,
      "learning_rate": 0.0003616022666784292,
      "loss": 3.1208,
      "step": 100034
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9202630519866943,
      "learning_rate": 0.0003615982632783143,
      "loss": 3.0402,
      "step": 100035
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.998173713684082,
      "learning_rate": 0.0003615942598667471,
      "loss": 2.9533,
      "step": 100036
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9382820129394531,
      "learning_rate": 0.00036159025644372835,
      "loss": 3.0183,
      "step": 100037
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4406046867370605,
      "learning_rate": 0.00036158625300925884,
      "loss": 3.0495,
      "step": 100038
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0158846378326416,
      "learning_rate": 0.00036158224956333934,
      "loss": 3.2635,
      "step": 100039
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.773967981338501,
      "learning_rate": 0.00036157824610597044,
      "loss": 2.9049,
      "step": 100040
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7093945741653442,
      "learning_rate": 0.00036157424263715297,
      "loss": 3.319,
      "step": 100041
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0246105194091797,
      "learning_rate": 0.0003615702391568878,
      "loss": 3.1784,
      "step": 100042
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6870999336242676,
      "learning_rate": 0.0003615662356651755,
      "loss": 2.9695,
      "step": 100043
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.021686553955078,
      "learning_rate": 0.00036156223216201683,
      "loss": 3.0346,
      "step": 100044
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.843518614768982,
      "learning_rate": 0.00036155822864741263,
      "loss": 2.9294,
      "step": 100045
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.6279096603393555,
      "learning_rate": 0.00036155422512136354,
      "loss": 2.9217,
      "step": 100046
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6034384965896606,
      "learning_rate": 0.00036155022158387037,
      "loss": 3.1402,
      "step": 100047
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.184008836746216,
      "learning_rate": 0.0003615462180349339,
      "loss": 2.9985,
      "step": 100048
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.75180184841156,
      "learning_rate": 0.0003615422144745548,
      "loss": 2.9645,
      "step": 100049
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0539000034332275,
      "learning_rate": 0.00036153821090273387,
      "loss": 3.2379,
      "step": 100050
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6490941047668457,
      "learning_rate": 0.00036153420731947176,
      "loss": 3.2366,
      "step": 100051
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.855714201927185,
      "learning_rate": 0.00036153020372476925,
      "loss": 3.0743,
      "step": 100052
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.7072153091430664,
      "learning_rate": 0.0003615262001186272,
      "loss": 2.889,
      "step": 100053
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5879356861114502,
      "learning_rate": 0.00036152219650104625,
      "loss": 3.0605,
      "step": 100054
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.733802318572998,
      "learning_rate": 0.0003615181928720271,
      "loss": 3.0631,
      "step": 100055
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.662320613861084,
      "learning_rate": 0.0003615141892315707,
      "loss": 3.194,
      "step": 100056
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4956154823303223,
      "learning_rate": 0.00036151018557967755,
      "loss": 3.1327,
      "step": 100057
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.282536506652832,
      "learning_rate": 0.00036150618191634846,
      "loss": 3.0934,
      "step": 100058
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4809348583221436,
      "learning_rate": 0.0003615021782415843,
      "loss": 2.9955,
      "step": 100059
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1423559188842773,
      "learning_rate": 0.00036149817455538574,
      "loss": 3.101,
      "step": 100060
    },
    {
      "epoch": 1.3,
      "grad_norm": 4.091896057128906,
      "learning_rate": 0.0003614941708577534,
      "loss": 3.1165,
      "step": 100061
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.0752251148223877,
      "learning_rate": 0.00036149016714868813,
      "loss": 2.8616,
      "step": 100062
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8701391220092773,
      "learning_rate": 0.00036148616342819074,
      "loss": 3.1092,
      "step": 100063
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0844876766204834,
      "learning_rate": 0.00036148215969626196,
      "loss": 2.8861,
      "step": 100064
    },
    {
      "epoch": 1.3,
      "grad_norm": 4.145202159881592,
      "learning_rate": 0.00036147815595290243,
      "loss": 2.9508,
      "step": 100065
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.224940061569214,
      "learning_rate": 0.0003614741521981129,
      "loss": 2.9489,
      "step": 100066
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2987959384918213,
      "learning_rate": 0.0003614701484318943,
      "loss": 2.9701,
      "step": 100067
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2109591960906982,
      "learning_rate": 0.00036146614465424714,
      "loss": 2.7439,
      "step": 100068
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.085139036178589,
      "learning_rate": 0.0003614621408651723,
      "loss": 3.0034,
      "step": 100069
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.5011725425720215,
      "learning_rate": 0.0003614581370646705,
      "loss": 3.2388,
      "step": 100070
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.46197247505188,
      "learning_rate": 0.0003614541332527424,
      "loss": 3.0114,
      "step": 100071
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2011966705322266,
      "learning_rate": 0.0003614501294293888,
      "loss": 3.079,
      "step": 100072
    },
    {
      "epoch": 1.3,
      "grad_norm": 3.260798215866089,
      "learning_rate": 0.00036144612559461063,
      "loss": 3.1378,
      "step": 100073
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.961092472076416,
      "learning_rate": 0.0003614421217484083,
      "loss": 2.8629,
      "step": 100074
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8943432569503784,
      "learning_rate": 0.0003614381178907827,
      "loss": 3.1597,
      "step": 100075
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0496063232421875,
      "learning_rate": 0.00036143411402173476,
      "loss": 3.1068,
      "step": 100076
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.8065128326416016,
      "learning_rate": 0.00036143011014126496,
      "loss": 2.7973,
      "step": 100077
    },
    {
      "epoch": 1.3,
      "grad_norm": 4.044168949127197,
      "learning_rate": 0.00036142610624937413,
      "loss": 3.1978,
      "step": 100078
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6552200317382812,
      "learning_rate": 0.0003614221023460631,
      "loss": 3.1207,
      "step": 100079
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7224196195602417,
      "learning_rate": 0.0003614180984313324,
      "loss": 3.0436,
      "step": 100080
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.250810384750366,
      "learning_rate": 0.00036141409450518304,
      "loss": 3.0256,
      "step": 100081
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.016850709915161,
      "learning_rate": 0.00036141009056761563,
      "loss": 2.9934,
      "step": 100082
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8487436771392822,
      "learning_rate": 0.00036140608661863093,
      "loss": 3.3639,
      "step": 100083
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2074501514434814,
      "learning_rate": 0.00036140208265822963,
      "loss": 2.7843,
      "step": 100084
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.839707851409912,
      "learning_rate": 0.00036139807868641255,
      "loss": 3.03,
      "step": 100085
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9323369264602661,
      "learning_rate": 0.0003613940747031804,
      "loss": 3.1674,
      "step": 100086
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.396475076675415,
      "learning_rate": 0.00036139007070853387,
      "loss": 3.0332,
      "step": 100087
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.8618743419647217,
      "learning_rate": 0.00036138606670247384,
      "loss": 2.7723,
      "step": 100088
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6167607307434082,
      "learning_rate": 0.000361382062685001,
      "loss": 2.821,
      "step": 100089
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0938663482666016,
      "learning_rate": 0.000361378058656116,
      "loss": 2.8836,
      "step": 100090
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.066499948501587,
      "learning_rate": 0.00036137405461581976,
      "loss": 3.019,
      "step": 100091
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9819988012313843,
      "learning_rate": 0.0003613700505641128,
      "loss": 2.9871,
      "step": 100092
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6427057981491089,
      "learning_rate": 0.0003613660465009961,
      "loss": 2.9841,
      "step": 100093
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6072171926498413,
      "learning_rate": 0.00036136204242647027,
      "loss": 2.727,
      "step": 100094
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8750758171081543,
      "learning_rate": 0.0003613580383405361,
      "loss": 2.7196,
      "step": 100095
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6749423742294312,
      "learning_rate": 0.00036135403424319425,
      "loss": 3.0005,
      "step": 100096
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8544269800186157,
      "learning_rate": 0.0003613500301344456,
      "loss": 3.0795,
      "step": 100097
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9385820627212524,
      "learning_rate": 0.0003613460260142908,
      "loss": 3.0663,
      "step": 100098
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8395298719406128,
      "learning_rate": 0.0003613420218827305,
      "loss": 3.0956,
      "step": 100099
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.877556562423706,
      "learning_rate": 0.0003613380177397657,
      "loss": 3.1412,
      "step": 100100
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7389442920684814,
      "learning_rate": 0.0003613340135853969,
      "loss": 2.8528,
      "step": 100101
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0065197944641113,
      "learning_rate": 0.000361330009419625,
      "loss": 3.1903,
      "step": 100102
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0220205783843994,
      "learning_rate": 0.00036132600524245076,
      "loss": 2.9221,
      "step": 100103
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6956332921981812,
      "learning_rate": 0.00036132200105387484,
      "loss": 3.0278,
      "step": 100104
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.711922287940979,
      "learning_rate": 0.00036131799685389787,
      "loss": 3.1187,
      "step": 100105
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5996921062469482,
      "learning_rate": 0.0003613139926425209,
      "loss": 2.8247,
      "step": 100106
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9456428289413452,
      "learning_rate": 0.0003613099884197444,
      "loss": 3.0542,
      "step": 100107
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8316587209701538,
      "learning_rate": 0.00036130598418556917,
      "loss": 3.0888,
      "step": 100108
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.442436695098877,
      "learning_rate": 0.0003613019799399961,
      "loss": 2.8495,
      "step": 100109
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6478756666183472,
      "learning_rate": 0.0003612979756830258,
      "loss": 2.9129,
      "step": 100110
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7132315635681152,
      "learning_rate": 0.000361293971414659,
      "loss": 2.9886,
      "step": 100111
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6863048076629639,
      "learning_rate": 0.0003612899671348966,
      "loss": 2.8964,
      "step": 100112
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.615452766418457,
      "learning_rate": 0.0003612859628437392,
      "loss": 2.7909,
      "step": 100113
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9730428457260132,
      "learning_rate": 0.00036128195854118753,
      "loss": 3.0408,
      "step": 100114
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6816887855529785,
      "learning_rate": 0.00036127795422724243,
      "loss": 2.7287,
      "step": 100115
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.269415855407715,
      "learning_rate": 0.0003612739499019046,
      "loss": 2.9303,
      "step": 100116
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.541428565979004,
      "learning_rate": 0.00036126994556517477,
      "loss": 3.061,
      "step": 100117
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.631898045539856,
      "learning_rate": 0.00036126594121705367,
      "loss": 3.1336,
      "step": 100118
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0103588104248047,
      "learning_rate": 0.0003612619368575421,
      "loss": 3.1133,
      "step": 100119
    },
    {
      "epoch": 1.3,
      "grad_norm": 4.046341896057129,
      "learning_rate": 0.00036125793248664083,
      "loss": 2.9964,
      "step": 100120
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8922561407089233,
      "learning_rate": 0.00036125392810435055,
      "loss": 2.6992,
      "step": 100121
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1564812660217285,
      "learning_rate": 0.000361249923710672,
      "loss": 3.0061,
      "step": 100122
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.523977756500244,
      "learning_rate": 0.0003612459193056058,
      "loss": 2.9088,
      "step": 100123
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.467438220977783,
      "learning_rate": 0.00036124191488915293,
      "loss": 2.936,
      "step": 100124
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.028095245361328,
      "learning_rate": 0.00036123791046131407,
      "loss": 2.7344,
      "step": 100125
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.108569383621216,
      "learning_rate": 0.00036123390602208985,
      "loss": 2.9385,
      "step": 100126
    },
    {
      "epoch": 1.3,
      "grad_norm": 5.508223533630371,
      "learning_rate": 0.0003612299015714812,
      "loss": 3.0041,
      "step": 100127
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7154616117477417,
      "learning_rate": 0.0003612258971094887,
      "loss": 2.9353,
      "step": 100128
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4892830848693848,
      "learning_rate": 0.00036122189263611314,
      "loss": 3.0546,
      "step": 100129
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7534172534942627,
      "learning_rate": 0.0003612178881513552,
      "loss": 3.0764,
      "step": 100130
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7913732528686523,
      "learning_rate": 0.0003612138836552158,
      "loss": 3.047,
      "step": 100131
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.907751441001892,
      "learning_rate": 0.0003612098791476956,
      "loss": 2.9766,
      "step": 100132
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6904737949371338,
      "learning_rate": 0.00036120587462879525,
      "loss": 3.0786,
      "step": 100133
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5314127206802368,
      "learning_rate": 0.00036120187009851566,
      "loss": 2.8603,
      "step": 100134
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0609652996063232,
      "learning_rate": 0.00036119786555685737,
      "loss": 3.3647,
      "step": 100135
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.679892897605896,
      "learning_rate": 0.0003611938610038213,
      "loss": 3.1927,
      "step": 100136
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.969201922416687,
      "learning_rate": 0.0003611898564394082,
      "loss": 2.7893,
      "step": 100137
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8219809532165527,
      "learning_rate": 0.00036118585186361863,
      "loss": 3.008,
      "step": 100138
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7856982946395874,
      "learning_rate": 0.00036118184727645356,
      "loss": 2.859,
      "step": 100139
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9294310808181763,
      "learning_rate": 0.0003611778426779135,
      "loss": 2.8624,
      "step": 100140
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5156137943267822,
      "learning_rate": 0.0003611738380679995,
      "loss": 3.0167,
      "step": 100141
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.97490656375885,
      "learning_rate": 0.000361169833446712,
      "loss": 2.9114,
      "step": 100142
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4644460678100586,
      "learning_rate": 0.00036116582881405194,
      "loss": 3.2764,
      "step": 100143
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1422557830810547,
      "learning_rate": 0.0003611618241700199,
      "loss": 3.0612,
      "step": 100144
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6556230783462524,
      "learning_rate": 0.00036115781951461684,
      "loss": 2.9691,
      "step": 100145
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.3769872188568115,
      "learning_rate": 0.0003611538148478433,
      "loss": 2.769,
      "step": 100146
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.174421548843384,
      "learning_rate": 0.00036114981016970016,
      "loss": 2.8882,
      "step": 100147
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9736361503601074,
      "learning_rate": 0.00036114580548018814,
      "loss": 3.1671,
      "step": 100148
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.75909423828125,
      "learning_rate": 0.0003611418007793079,
      "loss": 3.1042,
      "step": 100149
    },
    {
      "epoch": 1.3,
      "grad_norm": 4.040207386016846,
      "learning_rate": 0.0003611377960670603,
      "loss": 3.0478,
      "step": 100150
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9370787143707275,
      "learning_rate": 0.00036113379134344593,
      "loss": 3.3856,
      "step": 100151
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8733304738998413,
      "learning_rate": 0.0003611297866084658,
      "loss": 3.3813,
      "step": 100152
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.4368181228637695,
      "learning_rate": 0.00036112578186212036,
      "loss": 3.0993,
      "step": 100153
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8420546054840088,
      "learning_rate": 0.00036112177710441044,
      "loss": 2.8806,
      "step": 100154
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.811202049255371,
      "learning_rate": 0.000361117772335337,
      "loss": 3.0642,
      "step": 100155
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8227193355560303,
      "learning_rate": 0.0003611137675549005,
      "loss": 3.0856,
      "step": 100156
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.899613618850708,
      "learning_rate": 0.00036110976276310184,
      "loss": 3.0821,
      "step": 100157
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7705092430114746,
      "learning_rate": 0.0003611057579599417,
      "loss": 2.8864,
      "step": 100158
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6459211111068726,
      "learning_rate": 0.0003611017531454208,
      "loss": 3.3107,
      "step": 100159
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5899282693862915,
      "learning_rate": 0.00036109774831953996,
      "loss": 2.8635,
      "step": 100160
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7149323225021362,
      "learning_rate": 0.00036109374348229997,
      "loss": 2.9754,
      "step": 100161
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.918395757675171,
      "learning_rate": 0.00036108973863370144,
      "loss": 2.9937,
      "step": 100162
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6759891510009766,
      "learning_rate": 0.0003610857337737452,
      "loss": 2.9334,
      "step": 100163
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6710281372070312,
      "learning_rate": 0.0003610817289024319,
      "loss": 2.9667,
      "step": 100164
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.025452136993408,
      "learning_rate": 0.00036107772401976245,
      "loss": 2.9047,
      "step": 100165
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.293788194656372,
      "learning_rate": 0.00036107371912573737,
      "loss": 3.109,
      "step": 100166
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7304298877716064,
      "learning_rate": 0.0003610697142203577,
      "loss": 2.758,
      "step": 100167
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.3547539710998535,
      "learning_rate": 0.0003610657093036239,
      "loss": 3.1074,
      "step": 100168
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.60774564743042,
      "learning_rate": 0.0003610617043755368,
      "loss": 2.959,
      "step": 100169
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.010739803314209,
      "learning_rate": 0.0003610576994360973,
      "loss": 2.7749,
      "step": 100170
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8766980171203613,
      "learning_rate": 0.0003610536944853059,
      "loss": 2.7112,
      "step": 100171
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5359885692596436,
      "learning_rate": 0.0003610496895231635,
      "loss": 2.8474,
      "step": 100172
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6615772247314453,
      "learning_rate": 0.0003610456845496709,
      "loss": 2.9607,
      "step": 100173
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9669524431228638,
      "learning_rate": 0.00036104167956482866,
      "loss": 3.1086,
      "step": 100174
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7573760747909546,
      "learning_rate": 0.00036103767456863764,
      "loss": 2.922,
      "step": 100175
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7889350652694702,
      "learning_rate": 0.00036103366956109865,
      "loss": 2.9191,
      "step": 100176
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9323327541351318,
      "learning_rate": 0.00036102966454221224,
      "loss": 3.0285,
      "step": 100177
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6503643989562988,
      "learning_rate": 0.0003610256595119792,
      "loss": 2.9143,
      "step": 100178
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9350874423980713,
      "learning_rate": 0.0003610216544704005,
      "loss": 3.0078,
      "step": 100179
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8944370746612549,
      "learning_rate": 0.0003610176494174766,
      "loss": 2.9052,
      "step": 100180
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.745949149131775,
      "learning_rate": 0.00036101364435320847,
      "loss": 2.9022,
      "step": 100181
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.629981279373169,
      "learning_rate": 0.0003610096392775967,
      "loss": 2.9282,
      "step": 100182
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7517075538635254,
      "learning_rate": 0.0003610056341906421,
      "loss": 2.7673,
      "step": 100183
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5632293224334717,
      "learning_rate": 0.00036100162909234534,
      "loss": 2.7858,
      "step": 100184
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9376449584960938,
      "learning_rate": 0.00036099762398270735,
      "loss": 2.8337,
      "step": 100185
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7826358079910278,
      "learning_rate": 0.00036099361886172867,
      "loss": 3.1764,
      "step": 100186
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7797203063964844,
      "learning_rate": 0.00036098961372941005,
      "loss": 3.2303,
      "step": 100187
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.90933096408844,
      "learning_rate": 0.00036098560858575246,
      "loss": 3.1866,
      "step": 100188
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9098817110061646,
      "learning_rate": 0.0003609816034307564,
      "loss": 2.8693,
      "step": 100189
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7621630430221558,
      "learning_rate": 0.0003609775982644227,
      "loss": 2.9765,
      "step": 100190
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.697479009628296,
      "learning_rate": 0.00036097359308675213,
      "loss": 2.8801,
      "step": 100191
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7097667455673218,
      "learning_rate": 0.00036096958789774545,
      "loss": 2.8761,
      "step": 100192
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.6696293354034424,
      "learning_rate": 0.00036096558269740333,
      "loss": 3.0049,
      "step": 100193
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.355071783065796,
      "learning_rate": 0.00036096157748572664,
      "loss": 2.905,
      "step": 100194
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.765066385269165,
      "learning_rate": 0.00036095757226271597,
      "loss": 3.0378,
      "step": 100195
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0896341800689697,
      "learning_rate": 0.00036095356702837213,
      "loss": 3.0583,
      "step": 100196
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.594198226928711,
      "learning_rate": 0.0003609495617826959,
      "loss": 3.0633,
      "step": 100197
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7505533695220947,
      "learning_rate": 0.000360945556525688,
      "loss": 2.7067,
      "step": 100198
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7457575798034668,
      "learning_rate": 0.00036094155125734916,
      "loss": 3.0835,
      "step": 100199
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8019980192184448,
      "learning_rate": 0.0003609375459776801,
      "loss": 3.0003,
      "step": 100200
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.0721254348754883,
      "learning_rate": 0.0003609335406866817,
      "loss": 2.7102,
      "step": 100201
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.4034535884857178,
      "learning_rate": 0.00036092953538435454,
      "loss": 2.9715,
      "step": 100202
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.135432243347168,
      "learning_rate": 0.0003609255300706994,
      "loss": 2.9646,
      "step": 100203
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.212559461593628,
      "learning_rate": 0.00036092152474571714,
      "loss": 2.6347,
      "step": 100204
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.683883786201477,
      "learning_rate": 0.0003609175194094083,
      "loss": 2.9576,
      "step": 100205
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.745728850364685,
      "learning_rate": 0.0003609135140617738,
      "loss": 2.9546,
      "step": 100206
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6706868410110474,
      "learning_rate": 0.0003609095087028144,
      "loss": 2.829,
      "step": 100207
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.643635869026184,
      "learning_rate": 0.00036090550333253067,
      "loss": 3.0115,
      "step": 100208
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.075409412384033,
      "learning_rate": 0.0003609014979509235,
      "loss": 3.1955,
      "step": 100209
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.1092562675476074,
      "learning_rate": 0.00036089749255799363,
      "loss": 3.0287,
      "step": 100210
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.068251371383667,
      "learning_rate": 0.0003608934871537417,
      "loss": 2.9476,
      "step": 100211
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7712894678115845,
      "learning_rate": 0.0003608894817381685,
      "loss": 3.0149,
      "step": 100212
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.5487706661224365,
      "learning_rate": 0.0003608854763112749,
      "loss": 2.977,
      "step": 100213
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.407498359680176,
      "learning_rate": 0.0003608814708730614,
      "loss": 3.0379,
      "step": 100214
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6864479780197144,
      "learning_rate": 0.00036087746542352893,
      "loss": 3.2125,
      "step": 100215
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8460147380828857,
      "learning_rate": 0.0003608734599626783,
      "loss": 3.0772,
      "step": 100216
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.697129726409912,
      "learning_rate": 0.0003608694544905101,
      "loss": 2.9903,
      "step": 100217
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7306804656982422,
      "learning_rate": 0.00036086544900702506,
      "loss": 3.2938,
      "step": 100218
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6412146091461182,
      "learning_rate": 0.000360861443512224,
      "loss": 2.9824,
      "step": 100219
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6111860275268555,
      "learning_rate": 0.0003608574380061077,
      "loss": 3.3679,
      "step": 100220
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.6240769624710083,
      "learning_rate": 0.00036085343248867677,
      "loss": 2.8309,
      "step": 100221
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.8484588861465454,
      "learning_rate": 0.00036084942695993215,
      "loss": 3.0635,
      "step": 100222
    },
    {
      "epoch": 1.3,
      "grad_norm": 2.2573421001434326,
      "learning_rate": 0.00036084542141987436,
      "loss": 2.9366,
      "step": 100223
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.7135995626449585,
      "learning_rate": 0.0003608414158685043,
      "loss": 3.1467,
      "step": 100224
    },
    {
      "epoch": 1.3,
      "grad_norm": 1.9306883811950684,
      "learning_rate": 0.0003608374103058226,
      "loss": 3.1219,
      "step": 100225
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.5085699558258057,
      "learning_rate": 0.0003608334047318302,
      "loss": 2.8016,
      "step": 100226
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9297112226486206,
      "learning_rate": 0.0003608293991465277,
      "loss": 3.2025,
      "step": 100227
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9065430164337158,
      "learning_rate": 0.0003608253935499158,
      "loss": 3.1006,
      "step": 100228
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8661549091339111,
      "learning_rate": 0.0003608213879419954,
      "loss": 3.104,
      "step": 100229
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.5471363067626953,
      "learning_rate": 0.00036081738232276707,
      "loss": 2.9385,
      "step": 100230
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.986960530281067,
      "learning_rate": 0.0003608133766922317,
      "loss": 2.8486,
      "step": 100231
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9021127223968506,
      "learning_rate": 0.0003608093710503899,
      "loss": 2.9613,
      "step": 100232
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.358563184738159,
      "learning_rate": 0.00036080536539724264,
      "loss": 3.1826,
      "step": 100233
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.7699286937713623,
      "learning_rate": 0.0003608013597327904,
      "loss": 3.0561,
      "step": 100234
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4503002166748047,
      "learning_rate": 0.000360797354057034,
      "loss": 3.1204,
      "step": 100235
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8211359977722168,
      "learning_rate": 0.00036079334836997435,
      "loss": 2.9948,
      "step": 100236
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.679086923599243,
      "learning_rate": 0.00036078934267161193,
      "loss": 2.8716,
      "step": 100237
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0452003479003906,
      "learning_rate": 0.00036078533696194773,
      "loss": 3.1897,
      "step": 100238
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6002410650253296,
      "learning_rate": 0.0003607813312409823,
      "loss": 3.1324,
      "step": 100239
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.166562795639038,
      "learning_rate": 0.00036077732550871656,
      "loss": 2.9467,
      "step": 100240
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6908067464828491,
      "learning_rate": 0.0003607733197651511,
      "loss": 2.8256,
      "step": 100241
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.778845191001892,
      "learning_rate": 0.0003607693140102868,
      "loss": 2.6293,
      "step": 100242
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8029555082321167,
      "learning_rate": 0.0003607653082441243,
      "loss": 2.9606,
      "step": 100243
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.976491928100586,
      "learning_rate": 0.00036076130246666437,
      "loss": 2.9726,
      "step": 100244
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3184032440185547,
      "learning_rate": 0.00036075729667790773,
      "loss": 2.8309,
      "step": 100245
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8295854330062866,
      "learning_rate": 0.0003607532908778553,
      "loss": 2.9662,
      "step": 100246
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0649538040161133,
      "learning_rate": 0.0003607492850665076,
      "loss": 3.005,
      "step": 100247
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.092231273651123,
      "learning_rate": 0.0003607452792438655,
      "loss": 2.8259,
      "step": 100248
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.5657975673675537,
      "learning_rate": 0.00036074127340992966,
      "loss": 3.047,
      "step": 100249
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0498788356781006,
      "learning_rate": 0.00036073726756470084,
      "loss": 2.9194,
      "step": 100250
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4104197025299072,
      "learning_rate": 0.0003607332617081799,
      "loss": 2.9766,
      "step": 100251
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.7395026683807373,
      "learning_rate": 0.0003607292558403674,
      "loss": 2.6886,
      "step": 100252
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7211531400680542,
      "learning_rate": 0.00036072524996126425,
      "loss": 2.8543,
      "step": 100253
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.719428539276123,
      "learning_rate": 0.00036072124407087114,
      "loss": 2.965,
      "step": 100254
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9738330841064453,
      "learning_rate": 0.0003607172381691888,
      "loss": 2.866,
      "step": 100255
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.736680030822754,
      "learning_rate": 0.000360713232256218,
      "loss": 3.1945,
      "step": 100256
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.298891305923462,
      "learning_rate": 0.0003607092263319594,
      "loss": 2.9062,
      "step": 100257
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.59185791015625,
      "learning_rate": 0.00036070522039641375,
      "loss": 2.9619,
      "step": 100258
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7437902688980103,
      "learning_rate": 0.00036070121444958206,
      "loss": 2.9009,
      "step": 100259
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5548663139343262,
      "learning_rate": 0.0003606972084914647,
      "loss": 2.7603,
      "step": 100260
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9885982275009155,
      "learning_rate": 0.00036069320252206263,
      "loss": 2.8884,
      "step": 100261
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6218535900115967,
      "learning_rate": 0.0003606891965413766,
      "loss": 2.8311,
      "step": 100262
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6689084768295288,
      "learning_rate": 0.0003606851905494072,
      "loss": 2.9574,
      "step": 100263
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8454076051712036,
      "learning_rate": 0.00036068118454615533,
      "loss": 3.0944,
      "step": 100264
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.807908535003662,
      "learning_rate": 0.00036067717853162167,
      "loss": 3.0652,
      "step": 100265
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5993508100509644,
      "learning_rate": 0.000360673172505807,
      "loss": 3.151,
      "step": 100266
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6655563116073608,
      "learning_rate": 0.000360669166468712,
      "loss": 2.9805,
      "step": 100267
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.184177875518799,
      "learning_rate": 0.0003606651604203375,
      "loss": 2.969,
      "step": 100268
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.953687310218811,
      "learning_rate": 0.0003606611543606842,
      "loss": 2.9044,
      "step": 100269
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0712664127349854,
      "learning_rate": 0.00036065714828975283,
      "loss": 2.9442,
      "step": 100270
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6169370412826538,
      "learning_rate": 0.00036065314220754425,
      "loss": 2.9131,
      "step": 100271
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.177813768386841,
      "learning_rate": 0.000360649136114059,
      "loss": 3.0227,
      "step": 100272
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.833401083946228,
      "learning_rate": 0.0003606451300092979,
      "loss": 2.7821,
      "step": 100273
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.03165864944458,
      "learning_rate": 0.0003606411238932618,
      "loss": 2.9432,
      "step": 100274
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8807268142700195,
      "learning_rate": 0.0003606371177659513,
      "loss": 3.2205,
      "step": 100275
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.685319423675537,
      "learning_rate": 0.00036063311162736725,
      "loss": 3.1252,
      "step": 100276
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.564105272293091,
      "learning_rate": 0.0003606291054775104,
      "loss": 2.9151,
      "step": 100277
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8170455694198608,
      "learning_rate": 0.00036062509931638145,
      "loss": 3.2204,
      "step": 100278
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8037039041519165,
      "learning_rate": 0.00036062109314398107,
      "loss": 2.9956,
      "step": 100279
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.447690010070801,
      "learning_rate": 0.00036061708696031013,
      "loss": 3.1954,
      "step": 100280
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7356486320495605,
      "learning_rate": 0.00036061308076536935,
      "loss": 3.0879,
      "step": 100281
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5592809915542603,
      "learning_rate": 0.00036060907455915937,
      "loss": 3.0562,
      "step": 100282
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.515892505645752,
      "learning_rate": 0.00036060506834168117,
      "loss": 3.0354,
      "step": 100283
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5763078927993774,
      "learning_rate": 0.0003606010621129353,
      "loss": 3.1353,
      "step": 100284
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3617639541625977,
      "learning_rate": 0.00036059705587292244,
      "loss": 2.9065,
      "step": 100285
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0702145099639893,
      "learning_rate": 0.0003605930496216436,
      "loss": 3.0233,
      "step": 100286
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3071634769439697,
      "learning_rate": 0.00036058904335909927,
      "loss": 3.3096,
      "step": 100287
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.7121477127075195,
      "learning_rate": 0.0003605850370852902,
      "loss": 3.1003,
      "step": 100288
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.115412950515747,
      "learning_rate": 0.0003605810308002174,
      "loss": 2.801,
      "step": 100289
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7692627906799316,
      "learning_rate": 0.0003605770245038814,
      "loss": 2.9791,
      "step": 100290
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.5927326679229736,
      "learning_rate": 0.0003605730181962829,
      "loss": 2.9785,
      "step": 100291
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.681340217590332,
      "learning_rate": 0.00036056901187742286,
      "loss": 2.8461,
      "step": 100292
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.087660789489746,
      "learning_rate": 0.0003605650055473018,
      "loss": 3.0461,
      "step": 100293
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0557620525360107,
      "learning_rate": 0.0003605609992059206,
      "loss": 2.9806,
      "step": 100294
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7630980014801025,
      "learning_rate": 0.00036055699285328,
      "loss": 3.1656,
      "step": 100295
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8259406089782715,
      "learning_rate": 0.0003605529864893806,
      "loss": 3.0489,
      "step": 100296
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.764710545539856,
      "learning_rate": 0.0003605489801142233,
      "loss": 3.0197,
      "step": 100297
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8703718185424805,
      "learning_rate": 0.0003605449737278089,
      "loss": 2.9438,
      "step": 100298
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6942967176437378,
      "learning_rate": 0.00036054096733013793,
      "loss": 3.0259,
      "step": 100299
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.979580283164978,
      "learning_rate": 0.0003605369609212113,
      "loss": 2.9213,
      "step": 100300
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8937791585922241,
      "learning_rate": 0.0003605329545010298,
      "loss": 2.9972,
      "step": 100301
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9522154331207275,
      "learning_rate": 0.00036052894806959396,
      "loss": 2.8942,
      "step": 100302
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.7477385997772217,
      "learning_rate": 0.00036052494162690463,
      "loss": 2.9866,
      "step": 100303
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.191636085510254,
      "learning_rate": 0.0003605209351729627,
      "loss": 3.2531,
      "step": 100304
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8307514190673828,
      "learning_rate": 0.00036051692870776864,
      "loss": 3.2943,
      "step": 100305
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8101924657821655,
      "learning_rate": 0.00036051292223132343,
      "loss": 3.0671,
      "step": 100306
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.8528385162353516,
      "learning_rate": 0.0003605089157436277,
      "loss": 3.0652,
      "step": 100307
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.06491756439209,
      "learning_rate": 0.0003605049092446822,
      "loss": 3.1478,
      "step": 100308
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9532238245010376,
      "learning_rate": 0.0003605009027344877,
      "loss": 2.9749,
      "step": 100309
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5793051719665527,
      "learning_rate": 0.000360496896213045,
      "loss": 3.0895,
      "step": 100310
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.868180513381958,
      "learning_rate": 0.0003604928896803547,
      "loss": 3.18,
      "step": 100311
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3413281440734863,
      "learning_rate": 0.0003604888831364177,
      "loss": 3.1553,
      "step": 100312
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8482682704925537,
      "learning_rate": 0.0003604848765812346,
      "loss": 2.8299,
      "step": 100313
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6866928339004517,
      "learning_rate": 0.0003604808700148063,
      "loss": 3.2856,
      "step": 100314
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.2886836528778076,
      "learning_rate": 0.0003604768634371334,
      "loss": 2.764,
      "step": 100315
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7116467952728271,
      "learning_rate": 0.0003604728568482167,
      "loss": 3.0565,
      "step": 100316
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7794334888458252,
      "learning_rate": 0.000360468850248057,
      "loss": 3.0242,
      "step": 100317
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.2273755073547363,
      "learning_rate": 0.000360464843636655,
      "loss": 2.8203,
      "step": 100318
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.244469404220581,
      "learning_rate": 0.0003604608370140114,
      "loss": 2.9689,
      "step": 100319
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.2983741760253906,
      "learning_rate": 0.000360456830380127,
      "loss": 2.8443,
      "step": 100320
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5913530588150024,
      "learning_rate": 0.00036045282373500266,
      "loss": 2.888,
      "step": 100321
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.28939151763916,
      "learning_rate": 0.00036044881707863883,
      "loss": 3.0291,
      "step": 100322
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9242143630981445,
      "learning_rate": 0.0003604448104110365,
      "loss": 3.0399,
      "step": 100323
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.8306033611297607,
      "learning_rate": 0.0003604408037321963,
      "loss": 3.0744,
      "step": 100324
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.717432975769043,
      "learning_rate": 0.00036043679704211905,
      "loss": 2.8864,
      "step": 100325
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9352707862854004,
      "learning_rate": 0.00036043279034080544,
      "loss": 2.8606,
      "step": 100326
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9300596714019775,
      "learning_rate": 0.00036042878362825624,
      "loss": 2.9809,
      "step": 100327
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1428654193878174,
      "learning_rate": 0.0003604247769044722,
      "loss": 3.0982,
      "step": 100328
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5834569931030273,
      "learning_rate": 0.00036042077016945406,
      "loss": 3.0655,
      "step": 100329
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.85236394405365,
      "learning_rate": 0.00036041676342320254,
      "loss": 2.87,
      "step": 100330
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.247760534286499,
      "learning_rate": 0.0003604127566657184,
      "loss": 3.0743,
      "step": 100331
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6527281999588013,
      "learning_rate": 0.0003604087498970024,
      "loss": 2.9549,
      "step": 100332
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.619175672531128,
      "learning_rate": 0.00036040474311705524,
      "loss": 3.0283,
      "step": 100333
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.593482255935669,
      "learning_rate": 0.0003604007363258777,
      "loss": 2.8179,
      "step": 100334
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5135847330093384,
      "learning_rate": 0.00036039672952347063,
      "loss": 3.3153,
      "step": 100335
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9151942729949951,
      "learning_rate": 0.0003603927227098346,
      "loss": 2.8062,
      "step": 100336
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1337783336639404,
      "learning_rate": 0.00036038871588497034,
      "loss": 2.9277,
      "step": 100337
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8679991960525513,
      "learning_rate": 0.0003603847090488788,
      "loss": 3.0058,
      "step": 100338
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8383114337921143,
      "learning_rate": 0.00036038070220156056,
      "loss": 2.982,
      "step": 100339
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7144559621810913,
      "learning_rate": 0.0003603766953430163,
      "loss": 3.037,
      "step": 100340
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.7271108627319336,
      "learning_rate": 0.0003603726884732471,
      "loss": 2.7405,
      "step": 100341
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.208913803100586,
      "learning_rate": 0.00036036868159225325,
      "loss": 2.8466,
      "step": 100342
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3050708770751953,
      "learning_rate": 0.00036036467470003587,
      "loss": 2.8173,
      "step": 100343
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9259768724441528,
      "learning_rate": 0.0003603606677965955,
      "loss": 3.0637,
      "step": 100344
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9667391777038574,
      "learning_rate": 0.00036035666088193294,
      "loss": 3.1327,
      "step": 100345
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.714787006378174,
      "learning_rate": 0.00036035265395604896,
      "loss": 2.997,
      "step": 100346
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.09841251373291,
      "learning_rate": 0.0003603486470189443,
      "loss": 2.7995,
      "step": 100347
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9215595722198486,
      "learning_rate": 0.0003603446400706196,
      "loss": 2.9469,
      "step": 100348
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.112164258956909,
      "learning_rate": 0.00036034063311107573,
      "loss": 2.7669,
      "step": 100349
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.016596794128418,
      "learning_rate": 0.0003603366261403135,
      "loss": 2.6878,
      "step": 100350
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9659295082092285,
      "learning_rate": 0.0003603326191583335,
      "loss": 2.9942,
      "step": 100351
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8843313455581665,
      "learning_rate": 0.00036032861216513644,
      "loss": 2.8489,
      "step": 100352
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6721025705337524,
      "learning_rate": 0.0003603246051607232,
      "loss": 3.0296,
      "step": 100353
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5881285667419434,
      "learning_rate": 0.0003603205981450945,
      "loss": 3.1872,
      "step": 100354
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0357511043548584,
      "learning_rate": 0.00036031659111825104,
      "loss": 2.817,
      "step": 100355
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7065192461013794,
      "learning_rate": 0.0003603125840801936,
      "loss": 3.2515,
      "step": 100356
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7548545598983765,
      "learning_rate": 0.00036030857703092293,
      "loss": 2.8421,
      "step": 100357
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.410369396209717,
      "learning_rate": 0.00036030456997043975,
      "loss": 2.6516,
      "step": 100358
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.4123992919921875,
      "learning_rate": 0.0003603005628987448,
      "loss": 2.8201,
      "step": 100359
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.680006265640259,
      "learning_rate": 0.0003602965558158388,
      "loss": 2.785,
      "step": 100360
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5286308526992798,
      "learning_rate": 0.00036029254872172253,
      "loss": 3.3206,
      "step": 100361
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.4161876440048218,
      "learning_rate": 0.00036028854161639686,
      "loss": 2.9942,
      "step": 100362
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5217902660369873,
      "learning_rate": 0.00036028453449986233,
      "loss": 2.9415,
      "step": 100363
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4687299728393555,
      "learning_rate": 0.0003602805273721197,
      "loss": 2.8761,
      "step": 100364
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1158721446990967,
      "learning_rate": 0.0003602765202331699,
      "loss": 3.117,
      "step": 100365
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7925357818603516,
      "learning_rate": 0.0003602725130830135,
      "loss": 3.1817,
      "step": 100366
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.6171510219573975,
      "learning_rate": 0.00036026850592165127,
      "loss": 2.8613,
      "step": 100367
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6276737451553345,
      "learning_rate": 0.00036026449874908403,
      "loss": 2.8989,
      "step": 100368
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.334705352783203,
      "learning_rate": 0.00036026049156531243,
      "loss": 2.9457,
      "step": 100369
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9891921281814575,
      "learning_rate": 0.00036025648437033734,
      "loss": 3.0834,
      "step": 100370
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8641059398651123,
      "learning_rate": 0.00036025247716415945,
      "loss": 2.8691,
      "step": 100371
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.4465153217315674,
      "learning_rate": 0.00036024846994677943,
      "loss": 3.0448,
      "step": 100372
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7130157947540283,
      "learning_rate": 0.00036024446271819803,
      "loss": 2.9705,
      "step": 100373
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7705649137496948,
      "learning_rate": 0.00036024045547841617,
      "loss": 2.9807,
      "step": 100374
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0005931854248047,
      "learning_rate": 0.0003602364482274344,
      "loss": 3.1372,
      "step": 100375
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.4964238405227661,
      "learning_rate": 0.0003602324409652535,
      "loss": 3.1369,
      "step": 100376
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7203004360198975,
      "learning_rate": 0.0003602284336918744,
      "loss": 3.052,
      "step": 100377
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.8387632369995117,
      "learning_rate": 0.00036022442640729757,
      "loss": 3.2227,
      "step": 100378
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1131091117858887,
      "learning_rate": 0.0003602204191115239,
      "loss": 2.9419,
      "step": 100379
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.360062837600708,
      "learning_rate": 0.00036021641180455417,
      "loss": 3.1207,
      "step": 100380
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.293463945388794,
      "learning_rate": 0.000360212404486389,
      "loss": 2.8555,
      "step": 100381
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6245616674423218,
      "learning_rate": 0.0003602083971570292,
      "loss": 3.1542,
      "step": 100382
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7119468450546265,
      "learning_rate": 0.0003602043898164756,
      "loss": 2.8987,
      "step": 100383
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9282617568969727,
      "learning_rate": 0.0003602003824647288,
      "loss": 2.8454,
      "step": 100384
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.247974395751953,
      "learning_rate": 0.0003601963751017896,
      "loss": 3.058,
      "step": 100385
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.656517267227173,
      "learning_rate": 0.0003601923677276589,
      "loss": 2.8618,
      "step": 100386
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.341017484664917,
      "learning_rate": 0.00036018836034233713,
      "loss": 3.0997,
      "step": 100387
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.2930538654327393,
      "learning_rate": 0.0003601843529458253,
      "loss": 3.1743,
      "step": 100388
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.435258388519287,
      "learning_rate": 0.000360180345538124,
      "loss": 3.1513,
      "step": 100389
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0329861640930176,
      "learning_rate": 0.00036017633811923414,
      "loss": 3.0884,
      "step": 100390
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.863079309463501,
      "learning_rate": 0.00036017233068915625,
      "loss": 2.9287,
      "step": 100391
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4360060691833496,
      "learning_rate": 0.00036016832324789125,
      "loss": 3.1278,
      "step": 100392
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.798100233078003,
      "learning_rate": 0.0003601643157954398,
      "loss": 3.0276,
      "step": 100393
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.7747697830200195,
      "learning_rate": 0.0003601603083318027,
      "loss": 2.9338,
      "step": 100394
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8824529647827148,
      "learning_rate": 0.00036015630085698064,
      "loss": 2.8411,
      "step": 100395
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6822508573532104,
      "learning_rate": 0.0003601522933709744,
      "loss": 3.0064,
      "step": 100396
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.90620756149292,
      "learning_rate": 0.0003601482858737847,
      "loss": 2.8508,
      "step": 100397
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.3670878410339355,
      "learning_rate": 0.0003601442783654123,
      "loss": 2.958,
      "step": 100398
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3610482215881348,
      "learning_rate": 0.000360140270845858,
      "loss": 3.1412,
      "step": 100399
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7771766185760498,
      "learning_rate": 0.0003601362633151224,
      "loss": 2.9961,
      "step": 100400
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9458398818969727,
      "learning_rate": 0.00036013225577320637,
      "loss": 3.0023,
      "step": 100401
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.02211332321167,
      "learning_rate": 0.0003601282482201107,
      "loss": 3.0227,
      "step": 100402
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6661604642868042,
      "learning_rate": 0.0003601242406558359,
      "loss": 3.2506,
      "step": 100403
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5606716871261597,
      "learning_rate": 0.00036012023308038286,
      "loss": 3.053,
      "step": 100404
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.666595697402954,
      "learning_rate": 0.0003601162254937525,
      "loss": 2.8602,
      "step": 100405
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.3991637229919434,
      "learning_rate": 0.0003601122178959453,
      "loss": 2.7124,
      "step": 100406
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1591386795043945,
      "learning_rate": 0.0003601082102869621,
      "loss": 3.055,
      "step": 100407
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8991332054138184,
      "learning_rate": 0.0003601042026668036,
      "loss": 2.9598,
      "step": 100408
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.0642762184143066,
      "learning_rate": 0.0003601001950354707,
      "loss": 2.8874,
      "step": 100409
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8432880640029907,
      "learning_rate": 0.000360096187392964,
      "loss": 3.0793,
      "step": 100410
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0607752799987793,
      "learning_rate": 0.0003600921797392843,
      "loss": 3.0789,
      "step": 100411
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6663457155227661,
      "learning_rate": 0.00036008817207443223,
      "loss": 2.775,
      "step": 100412
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7987773418426514,
      "learning_rate": 0.0003600841643984088,
      "loss": 2.9697,
      "step": 100413
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1281018257141113,
      "learning_rate": 0.00036008015671121446,
      "loss": 2.9309,
      "step": 100414
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.31707501411438,
      "learning_rate": 0.0003600761490128501,
      "loss": 3.0493,
      "step": 100415
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8299728631973267,
      "learning_rate": 0.00036007214130331645,
      "loss": 3.1544,
      "step": 100416
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.571775436401367,
      "learning_rate": 0.0003600681335826144,
      "loss": 3.0106,
      "step": 100417
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4571824073791504,
      "learning_rate": 0.0003600641258507444,
      "loss": 3.2018,
      "step": 100418
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.366946220397949,
      "learning_rate": 0.00036006011810770736,
      "loss": 2.934,
      "step": 100419
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6741536855697632,
      "learning_rate": 0.00036005611035350404,
      "loss": 3.1617,
      "step": 100420
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4947900772094727,
      "learning_rate": 0.00036005210258813517,
      "loss": 2.8018,
      "step": 100421
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8624465465545654,
      "learning_rate": 0.00036004809481160146,
      "loss": 2.9844,
      "step": 100422
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8297749757766724,
      "learning_rate": 0.00036004408702390366,
      "loss": 2.9127,
      "step": 100423
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.571434736251831,
      "learning_rate": 0.00036004007922504254,
      "loss": 3.1513,
      "step": 100424
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8749381303787231,
      "learning_rate": 0.0003600360714150188,
      "loss": 3.0447,
      "step": 100425
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8810231685638428,
      "learning_rate": 0.0003600320635938333,
      "loss": 3.085,
      "step": 100426
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.728523850440979,
      "learning_rate": 0.0003600280557614867,
      "loss": 2.8378,
      "step": 100427
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1821765899658203,
      "learning_rate": 0.0003600240479179796,
      "loss": 3.0455,
      "step": 100428
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.873394250869751,
      "learning_rate": 0.00036002004006331314,
      "loss": 3.1608,
      "step": 100429
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0426430702209473,
      "learning_rate": 0.0003600160321974877,
      "loss": 3.218,
      "step": 100430
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9994478225708008,
      "learning_rate": 0.0003600120243205041,
      "loss": 3.0053,
      "step": 100431
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6949564218521118,
      "learning_rate": 0.0003600080164323632,
      "loss": 2.8589,
      "step": 100432
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.442145586013794,
      "learning_rate": 0.0003600040085330656,
      "loss": 3.0127,
      "step": 100433
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8935341835021973,
      "learning_rate": 0.0003600000006226122,
      "loss": 3.357,
      "step": 100434
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6914488077163696,
      "learning_rate": 0.00035999599270100365,
      "loss": 2.9341,
      "step": 100435
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3518712520599365,
      "learning_rate": 0.00035999198476824074,
      "loss": 2.9177,
      "step": 100436
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.2178030014038086,
      "learning_rate": 0.0003599879768243241,
      "loss": 3.0934,
      "step": 100437
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1282052993774414,
      "learning_rate": 0.00035998396886925465,
      "loss": 2.8358,
      "step": 100438
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4881210327148438,
      "learning_rate": 0.00035997996090303305,
      "loss": 2.9055,
      "step": 100439
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7573018074035645,
      "learning_rate": 0.00035997595292566,
      "loss": 2.906,
      "step": 100440
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.388923168182373,
      "learning_rate": 0.00035997194493713624,
      "loss": 2.9475,
      "step": 100441
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0285801887512207,
      "learning_rate": 0.0003599679369374626,
      "loss": 3.0674,
      "step": 100442
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.937310814857483,
      "learning_rate": 0.0003599639289266398,
      "loss": 2.9483,
      "step": 100443
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6745744943618774,
      "learning_rate": 0.0003599599209046687,
      "loss": 2.9658,
      "step": 100444
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.406250476837158,
      "learning_rate": 0.0003599559128715497,
      "loss": 2.7795,
      "step": 100445
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.05246901512146,
      "learning_rate": 0.00035995190482728386,
      "loss": 2.8315,
      "step": 100446
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1657614707946777,
      "learning_rate": 0.0003599478967718719,
      "loss": 3.0648,
      "step": 100447
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.840256929397583,
      "learning_rate": 0.0003599438887053144,
      "loss": 3.1183,
      "step": 100448
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.7471978664398193,
      "learning_rate": 0.0003599398806276122,
      "loss": 2.8638,
      "step": 100449
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.57570219039917,
      "learning_rate": 0.0003599358725387661,
      "loss": 3.0709,
      "step": 100450
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3774807453155518,
      "learning_rate": 0.0003599318644387767,
      "loss": 2.9479,
      "step": 100451
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.501102924346924,
      "learning_rate": 0.00035992785632764493,
      "loss": 3.082,
      "step": 100452
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.0682668685913086,
      "learning_rate": 0.00035992384820537145,
      "loss": 2.7586,
      "step": 100453
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.4925637245178223,
      "learning_rate": 0.00035991984007195687,
      "loss": 3.1038,
      "step": 100454
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7860870361328125,
      "learning_rate": 0.00035991583192740217,
      "loss": 2.8536,
      "step": 100455
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.650681734085083,
      "learning_rate": 0.00035991182377170794,
      "loss": 2.8301,
      "step": 100456
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.2269279956817627,
      "learning_rate": 0.00035990781560487505,
      "loss": 3.0446,
      "step": 100457
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8307147026062012,
      "learning_rate": 0.00035990380742690405,
      "loss": 3.0502,
      "step": 100458
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9833569526672363,
      "learning_rate": 0.00035989979923779585,
      "loss": 3.0321,
      "step": 100459
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.629462480545044,
      "learning_rate": 0.0003598957910375512,
      "loss": 2.9119,
      "step": 100460
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4036383628845215,
      "learning_rate": 0.00035989178282617074,
      "loss": 3.0467,
      "step": 100461
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7866743803024292,
      "learning_rate": 0.00035988777460365523,
      "loss": 3.0504,
      "step": 100462
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.493894338607788,
      "learning_rate": 0.0003598837663700055,
      "loss": 2.8807,
      "step": 100463
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6360960006713867,
      "learning_rate": 0.00035987975812522223,
      "loss": 2.9379,
      "step": 100464
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.240039348602295,
      "learning_rate": 0.0003598757498693062,
      "loss": 3.0617,
      "step": 100465
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.939310908317566,
      "learning_rate": 0.00035987174160225817,
      "loss": 3.1014,
      "step": 100466
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.8076863288879395,
      "learning_rate": 0.0003598677333240788,
      "loss": 2.8848,
      "step": 100467
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7731434106826782,
      "learning_rate": 0.00035986372503476886,
      "loss": 2.9985,
      "step": 100468
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4389638900756836,
      "learning_rate": 0.00035985971673432925,
      "loss": 2.7018,
      "step": 100469
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1134021282196045,
      "learning_rate": 0.00035985570842276047,
      "loss": 2.8563,
      "step": 100470
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6799064874649048,
      "learning_rate": 0.0003598517001000634,
      "loss": 3.0012,
      "step": 100471
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7535274028778076,
      "learning_rate": 0.00035984769176623885,
      "loss": 2.9105,
      "step": 100472
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5342367887496948,
      "learning_rate": 0.0003598436834212874,
      "loss": 2.998,
      "step": 100473
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0658323764801025,
      "learning_rate": 0.0003598396750652098,
      "loss": 3.1103,
      "step": 100474
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1638712882995605,
      "learning_rate": 0.00035983566669800705,
      "loss": 2.9355,
      "step": 100475
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5479108095169067,
      "learning_rate": 0.00035983165831967966,
      "loss": 3.0707,
      "step": 100476
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1850178241729736,
      "learning_rate": 0.0003598276499302284,
      "loss": 3.0718,
      "step": 100477
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6583201885223389,
      "learning_rate": 0.0003598236415296541,
      "loss": 2.94,
      "step": 100478
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.577828288078308,
      "learning_rate": 0.0003598196331179574,
      "loss": 2.9218,
      "step": 100479
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6691644191741943,
      "learning_rate": 0.0003598156246951391,
      "loss": 3.0944,
      "step": 100480
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5280938148498535,
      "learning_rate": 0.00035981161626120003,
      "loss": 2.6697,
      "step": 100481
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3107426166534424,
      "learning_rate": 0.00035980760781614075,
      "loss": 3.0731,
      "step": 100482
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0191490650177,
      "learning_rate": 0.00035980359935996217,
      "loss": 3.0207,
      "step": 100483
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.4809234142303467,
      "learning_rate": 0.00035979959089266504,
      "loss": 2.9093,
      "step": 100484
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.645519971847534,
      "learning_rate": 0.0003597955824142499,
      "loss": 2.951,
      "step": 100485
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4855120182037354,
      "learning_rate": 0.0003597915739247177,
      "loss": 3.266,
      "step": 100486
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.3195924758911133,
      "learning_rate": 0.00035978756542406914,
      "loss": 2.9463,
      "step": 100487
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8935706615447998,
      "learning_rate": 0.00035978355691230487,
      "loss": 2.9666,
      "step": 100488
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.66129469871521,
      "learning_rate": 0.00035977954838942573,
      "loss": 3.2467,
      "step": 100489
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3494722843170166,
      "learning_rate": 0.0003597755398554325,
      "loss": 3.1355,
      "step": 100490
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9919447898864746,
      "learning_rate": 0.0003597715313103258,
      "loss": 2.9257,
      "step": 100491
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9748245477676392,
      "learning_rate": 0.00035976752275410643,
      "loss": 2.9975,
      "step": 100492
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.426720380783081,
      "learning_rate": 0.0003597635141867752,
      "loss": 2.948,
      "step": 100493
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8447588682174683,
      "learning_rate": 0.00035975950560833284,
      "loss": 2.9745,
      "step": 100494
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.965577483177185,
      "learning_rate": 0.00035975549701878,
      "loss": 3.047,
      "step": 100495
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1555919647216797,
      "learning_rate": 0.0003597514884181175,
      "loss": 3.0535,
      "step": 100496
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5988703966140747,
      "learning_rate": 0.000359747479806346,
      "loss": 3.2589,
      "step": 100497
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8605217933654785,
      "learning_rate": 0.00035974347118346647,
      "loss": 3.0125,
      "step": 100498
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.793047308921814,
      "learning_rate": 0.0003597394625494794,
      "loss": 2.9915,
      "step": 100499
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8051860332489014,
      "learning_rate": 0.0003597354539043856,
      "loss": 3.0578,
      "step": 100500
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.707553505897522,
      "learning_rate": 0.000359731445248186,
      "loss": 3.019,
      "step": 100501
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.21842360496521,
      "learning_rate": 0.000359727436580881,
      "loss": 3.2285,
      "step": 100502
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.483606219291687,
      "learning_rate": 0.00035972342790247165,
      "loss": 2.9274,
      "step": 100503
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.410146713256836,
      "learning_rate": 0.0003597194192129586,
      "loss": 2.74,
      "step": 100504
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1417713165283203,
      "learning_rate": 0.00035971541051234263,
      "loss": 3.0422,
      "step": 100505
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6905103921890259,
      "learning_rate": 0.0003597114018006243,
      "loss": 2.9444,
      "step": 100506
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7576500177383423,
      "learning_rate": 0.00035970739307780454,
      "loss": 2.93,
      "step": 100507
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.8915414810180664,
      "learning_rate": 0.00035970338434388415,
      "loss": 2.8706,
      "step": 100508
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7343089580535889,
      "learning_rate": 0.00035969937559886364,
      "loss": 2.8581,
      "step": 100509
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9095534086227417,
      "learning_rate": 0.00035969536684274394,
      "loss": 3.046,
      "step": 100510
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.945427417755127,
      "learning_rate": 0.00035969135807552576,
      "loss": 2.9176,
      "step": 100511
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.194300413131714,
      "learning_rate": 0.00035968734929720984,
      "loss": 2.8609,
      "step": 100512
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.985431432723999,
      "learning_rate": 0.00035968334050779684,
      "loss": 2.8935,
      "step": 100513
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.7744646072387695,
      "learning_rate": 0.0003596793317072877,
      "loss": 3.0308,
      "step": 100514
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0194101333618164,
      "learning_rate": 0.00035967532289568296,
      "loss": 2.8599,
      "step": 100515
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3496670722961426,
      "learning_rate": 0.0003596713140729835,
      "loss": 2.9207,
      "step": 100516
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4013466835021973,
      "learning_rate": 0.00035966730523919003,
      "loss": 2.7617,
      "step": 100517
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.043558120727539,
      "learning_rate": 0.0003596632963943032,
      "loss": 2.9552,
      "step": 100518
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9746381044387817,
      "learning_rate": 0.00035965928753832385,
      "loss": 2.9438,
      "step": 100519
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.2193360328674316,
      "learning_rate": 0.00035965527867125275,
      "loss": 3.0863,
      "step": 100520
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.579155683517456,
      "learning_rate": 0.0003596512697930906,
      "loss": 2.8066,
      "step": 100521
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1433558464050293,
      "learning_rate": 0.00035964726090383813,
      "loss": 3.1176,
      "step": 100522
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.531834363937378,
      "learning_rate": 0.0003596432520034961,
      "loss": 2.7476,
      "step": 100523
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9653464555740356,
      "learning_rate": 0.00035963924309206533,
      "loss": 3.0988,
      "step": 100524
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.2161526679992676,
      "learning_rate": 0.00035963523416954646,
      "loss": 2.9396,
      "step": 100525
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.855699896812439,
      "learning_rate": 0.0003596312252359403,
      "loss": 2.7704,
      "step": 100526
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7007949352264404,
      "learning_rate": 0.00035962721629124756,
      "loss": 3.0472,
      "step": 100527
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.931128740310669,
      "learning_rate": 0.00035962320733546894,
      "loss": 2.9667,
      "step": 100528
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.704945683479309,
      "learning_rate": 0.0003596191983686053,
      "loss": 3.0698,
      "step": 100529
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.684735894203186,
      "learning_rate": 0.0003596151893906573,
      "loss": 3.1446,
      "step": 100530
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8826855421066284,
      "learning_rate": 0.00035961118040162574,
      "loss": 3.085,
      "step": 100531
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9915952682495117,
      "learning_rate": 0.00035960717140151127,
      "loss": 3.1595,
      "step": 100532
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8693575859069824,
      "learning_rate": 0.0003596031623903148,
      "loss": 3.1419,
      "step": 100533
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5717992782592773,
      "learning_rate": 0.0003595991533680369,
      "loss": 3.2416,
      "step": 100534
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1272077560424805,
      "learning_rate": 0.00035959514433467837,
      "loss": 3.2638,
      "step": 100535
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.5942585468292236,
      "learning_rate": 0.0003595911352902401,
      "loss": 2.9958,
      "step": 100536
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.884694218635559,
      "learning_rate": 0.00035958712623472257,
      "loss": 2.9795,
      "step": 100537
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9131640195846558,
      "learning_rate": 0.0003595831171681268,
      "loss": 2.8715,
      "step": 100538
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1075470447540283,
      "learning_rate": 0.0003595791080904533,
      "loss": 2.9358,
      "step": 100539
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3958311080932617,
      "learning_rate": 0.000359575099001703,
      "loss": 3.1019,
      "step": 100540
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1014328002929688,
      "learning_rate": 0.00035957108990187646,
      "loss": 2.9955,
      "step": 100541
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.85058856010437,
      "learning_rate": 0.00035956708079097464,
      "loss": 2.9646,
      "step": 100542
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.5158209800720215,
      "learning_rate": 0.0003595630716689981,
      "loss": 3.1009,
      "step": 100543
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.707905888557434,
      "learning_rate": 0.0003595590625359477,
      "loss": 3.0713,
      "step": 100544
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.2996737957000732,
      "learning_rate": 0.0003595550533918242,
      "loss": 2.7085,
      "step": 100545
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.625307083129883,
      "learning_rate": 0.0003595510442366282,
      "loss": 3.0856,
      "step": 100546
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8983781337738037,
      "learning_rate": 0.0003595470350703605,
      "loss": 3.1856,
      "step": 100547
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8608874082565308,
      "learning_rate": 0.00035954302589302203,
      "loss": 3.0004,
      "step": 100548
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.5211832523345947,
      "learning_rate": 0.0003595390167046132,
      "loss": 2.7792,
      "step": 100549
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.505664110183716,
      "learning_rate": 0.0003595350075051351,
      "loss": 2.7945,
      "step": 100550
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.172703742980957,
      "learning_rate": 0.00035953099829458827,
      "loss": 2.9594,
      "step": 100551
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7009590864181519,
      "learning_rate": 0.0003595269890729735,
      "loss": 3.066,
      "step": 100552
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1852266788482666,
      "learning_rate": 0.0003595229798402915,
      "loss": 3.052,
      "step": 100553
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0730485916137695,
      "learning_rate": 0.00035951897059654316,
      "loss": 2.9722,
      "step": 100554
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.5648765563964844,
      "learning_rate": 0.000359514961341729,
      "loss": 3.032,
      "step": 100555
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9167163372039795,
      "learning_rate": 0.0003595109520758499,
      "loss": 2.8401,
      "step": 100556
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8584672212600708,
      "learning_rate": 0.00035950694279890665,
      "loss": 2.7302,
      "step": 100557
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8252640962600708,
      "learning_rate": 0.0003595029335108999,
      "loss": 3.2814,
      "step": 100558
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.6456494331359863,
      "learning_rate": 0.00035949892421183047,
      "loss": 2.9753,
      "step": 100559
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7217299938201904,
      "learning_rate": 0.000359494914901699,
      "loss": 2.9743,
      "step": 100560
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9239307641983032,
      "learning_rate": 0.0003594909055805064,
      "loss": 2.9056,
      "step": 100561
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7777800559997559,
      "learning_rate": 0.0003594868962482532,
      "loss": 3.0595,
      "step": 100562
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8307667970657349,
      "learning_rate": 0.0003594828869049404,
      "loss": 3.0658,
      "step": 100563
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3593742847442627,
      "learning_rate": 0.0003594788775505685,
      "loss": 3.0293,
      "step": 100564
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8078889846801758,
      "learning_rate": 0.00035947486818513834,
      "loss": 3.1398,
      "step": 100565
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.88413667678833,
      "learning_rate": 0.0003594708588086508,
      "loss": 3.0689,
      "step": 100566
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7761855125427246,
      "learning_rate": 0.0003594668494211064,
      "loss": 3.1188,
      "step": 100567
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8392153978347778,
      "learning_rate": 0.000359462840022506,
      "loss": 2.9069,
      "step": 100568
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.817772626876831,
      "learning_rate": 0.0003594588306128504,
      "loss": 2.7679,
      "step": 100569
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.877442479133606,
      "learning_rate": 0.0003594548211921402,
      "loss": 3.0259,
      "step": 100570
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.8766162395477295,
      "learning_rate": 0.0003594508117603763,
      "loss": 3.0614,
      "step": 100571
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6646579504013062,
      "learning_rate": 0.0003594468023175593,
      "loss": 2.9572,
      "step": 100572
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9056005477905273,
      "learning_rate": 0.0003594427928636901,
      "loss": 3.0202,
      "step": 100573
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0499160289764404,
      "learning_rate": 0.00035943878339876926,
      "loss": 3.1683,
      "step": 100574
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.58780038356781,
      "learning_rate": 0.00035943477392279776,
      "loss": 3.3243,
      "step": 100575
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9502716064453125,
      "learning_rate": 0.0003594307644357761,
      "loss": 3.0552,
      "step": 100576
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9108797311782837,
      "learning_rate": 0.00035942675493770516,
      "loss": 2.9493,
      "step": 100577
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0860445499420166,
      "learning_rate": 0.00035942274542858564,
      "loss": 3.0271,
      "step": 100578
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0719001293182373,
      "learning_rate": 0.0003594187359084184,
      "loss": 2.9282,
      "step": 100579
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8151805400848389,
      "learning_rate": 0.00035941472637720406,
      "loss": 2.9388,
      "step": 100580
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7664905786514282,
      "learning_rate": 0.00035941071683494335,
      "loss": 2.9611,
      "step": 100581
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.51277756690979,
      "learning_rate": 0.0003594067072816371,
      "loss": 3.136,
      "step": 100582
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6725355386734009,
      "learning_rate": 0.00035940269771728606,
      "loss": 2.8989,
      "step": 100583
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9382119178771973,
      "learning_rate": 0.0003593986881418909,
      "loss": 2.947,
      "step": 100584
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.208951473236084,
      "learning_rate": 0.00035939467855545235,
      "loss": 2.7964,
      "step": 100585
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1344141960144043,
      "learning_rate": 0.00035939066895797133,
      "loss": 2.9369,
      "step": 100586
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7988210916519165,
      "learning_rate": 0.00035938665934944836,
      "loss": 2.8955,
      "step": 100587
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.03828763961792,
      "learning_rate": 0.00035938264972988426,
      "loss": 3.0335,
      "step": 100588
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9904814958572388,
      "learning_rate": 0.0003593786400992799,
      "loss": 2.982,
      "step": 100589
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8646963834762573,
      "learning_rate": 0.0003593746304576359,
      "loss": 3.198,
      "step": 100590
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.897851586341858,
      "learning_rate": 0.000359370620804953,
      "loss": 3.2362,
      "step": 100591
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7721425294876099,
      "learning_rate": 0.00035936661114123204,
      "loss": 2.9335,
      "step": 100592
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7485148906707764,
      "learning_rate": 0.00035936260146647375,
      "loss": 2.8439,
      "step": 100593
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.899644136428833,
      "learning_rate": 0.00035935859178067866,
      "loss": 2.9482,
      "step": 100594
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8643032312393188,
      "learning_rate": 0.00035935458208384785,
      "loss": 3.0298,
      "step": 100595
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.9133269786834717,
      "learning_rate": 0.00035935057237598177,
      "loss": 3.2259,
      "step": 100596
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.530815839767456,
      "learning_rate": 0.00035934656265708143,
      "loss": 3.2088,
      "step": 100597
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1054654121398926,
      "learning_rate": 0.0003593425529271474,
      "loss": 3.2345,
      "step": 100598
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.726414442062378,
      "learning_rate": 0.0003593385431861804,
      "loss": 3.0707,
      "step": 100599
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4966137409210205,
      "learning_rate": 0.0003593345334341814,
      "loss": 2.9941,
      "step": 100600
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.912023663520813,
      "learning_rate": 0.0003593305236711508,
      "loss": 2.9905,
      "step": 100601
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.738831877708435,
      "learning_rate": 0.00035932651389708963,
      "loss": 2.8909,
      "step": 100602
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8639494180679321,
      "learning_rate": 0.0003593225041119986,
      "loss": 2.9762,
      "step": 100603
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4757027626037598,
      "learning_rate": 0.0003593184943158783,
      "loss": 2.9322,
      "step": 100604
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9185357093811035,
      "learning_rate": 0.0003593144845087296,
      "loss": 2.7157,
      "step": 100605
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0463552474975586,
      "learning_rate": 0.00035931047469055325,
      "loss": 2.8116,
      "step": 100606
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6099241971969604,
      "learning_rate": 0.00035930646486134987,
      "loss": 2.9247,
      "step": 100607
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.157604217529297,
      "learning_rate": 0.0003593024550211204,
      "loss": 2.9244,
      "step": 100608
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.8506977558135986,
      "learning_rate": 0.00035929844516986546,
      "loss": 2.9649,
      "step": 100609
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.955916404724121,
      "learning_rate": 0.0003592944353075858,
      "loss": 2.9521,
      "step": 100610
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9539551734924316,
      "learning_rate": 0.00035929042543428216,
      "loss": 2.8288,
      "step": 100611
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0134806632995605,
      "learning_rate": 0.0003592864155499554,
      "loss": 2.994,
      "step": 100612
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7905418872833252,
      "learning_rate": 0.0003592824056546061,
      "loss": 2.7916,
      "step": 100613
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.8168628215789795,
      "learning_rate": 0.00035927839574823506,
      "loss": 3.1203,
      "step": 100614
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1804423332214355,
      "learning_rate": 0.00035927438583084316,
      "loss": 3.1627,
      "step": 100615
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.178907632827759,
      "learning_rate": 0.00035927037590243095,
      "loss": 2.8799,
      "step": 100616
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.190812110900879,
      "learning_rate": 0.00035926636596299923,
      "loss": 2.9593,
      "step": 100617
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6010515689849854,
      "learning_rate": 0.00035926235601254887,
      "loss": 3.0467,
      "step": 100618
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7825038433074951,
      "learning_rate": 0.0003592583460510804,
      "loss": 3.0167,
      "step": 100619
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.899205207824707,
      "learning_rate": 0.00035925433607859474,
      "loss": 2.9223,
      "step": 100620
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6159440279006958,
      "learning_rate": 0.0003592503260950927,
      "loss": 3.1272,
      "step": 100621
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6429574489593506,
      "learning_rate": 0.0003592463161005747,
      "loss": 3.2162,
      "step": 100622
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.02778959274292,
      "learning_rate": 0.00035924230609504185,
      "loss": 2.8653,
      "step": 100623
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5221529006958008,
      "learning_rate": 0.0003592382960784947,
      "loss": 3.1438,
      "step": 100624
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8826912641525269,
      "learning_rate": 0.0003592342860509339,
      "loss": 3.0164,
      "step": 100625
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4016292095184326,
      "learning_rate": 0.00035923027601236047,
      "loss": 2.8167,
      "step": 100626
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5633636713027954,
      "learning_rate": 0.000359226265962775,
      "loss": 3.1697,
      "step": 100627
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.326326608657837,
      "learning_rate": 0.0003592222559021782,
      "loss": 3.1431,
      "step": 100628
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1867482662200928,
      "learning_rate": 0.0003592182458305709,
      "loss": 2.9394,
      "step": 100629
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7905523777008057,
      "learning_rate": 0.0003592142357479538,
      "loss": 2.8695,
      "step": 100630
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.44363534450531,
      "learning_rate": 0.0003592102256543277,
      "loss": 3.2653,
      "step": 100631
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7820720672607422,
      "learning_rate": 0.00035920621554969326,
      "loss": 3.0262,
      "step": 100632
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.5142624378204346,
      "learning_rate": 0.0003592022054340513,
      "loss": 2.8851,
      "step": 100633
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8480653762817383,
      "learning_rate": 0.0003591981953074025,
      "loss": 3.0384,
      "step": 100634
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8997254371643066,
      "learning_rate": 0.0003591941851697476,
      "loss": 3.0508,
      "step": 100635
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5883880853652954,
      "learning_rate": 0.0003591901750210875,
      "loss": 3.2495,
      "step": 100636
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.141791582107544,
      "learning_rate": 0.0003591861648614227,
      "loss": 2.8432,
      "step": 100637
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9200319051742554,
      "learning_rate": 0.00035918215469075407,
      "loss": 2.9268,
      "step": 100638
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9368655681610107,
      "learning_rate": 0.00035917814450908247,
      "loss": 2.9267,
      "step": 100639
    },
    {
      "epoch": 1.31,
      "grad_norm": 4.3487372398376465,
      "learning_rate": 0.00035917413431640845,
      "loss": 2.8557,
      "step": 100640
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9022712707519531,
      "learning_rate": 0.00035917012411273286,
      "loss": 2.8691,
      "step": 100641
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.041069984436035,
      "learning_rate": 0.00035916611389805647,
      "loss": 2.7536,
      "step": 100642
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3373067378997803,
      "learning_rate": 0.0003591621036723799,
      "loss": 2.9339,
      "step": 100643
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8363970518112183,
      "learning_rate": 0.0003591580934357041,
      "loss": 3.02,
      "step": 100644
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9833835363388062,
      "learning_rate": 0.0003591540831880296,
      "loss": 3.1107,
      "step": 100645
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.040435791015625,
      "learning_rate": 0.00035915007292935724,
      "loss": 3.1694,
      "step": 100646
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8815630674362183,
      "learning_rate": 0.0003591460626596878,
      "loss": 3.059,
      "step": 100647
    },
    {
      "epoch": 1.31,
      "grad_norm": 4.113550186157227,
      "learning_rate": 0.00035914205237902203,
      "loss": 3.0507,
      "step": 100648
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.2667555809020996,
      "learning_rate": 0.00035913804208736056,
      "loss": 2.8452,
      "step": 100649
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.6363413333892822,
      "learning_rate": 0.0003591340317847042,
      "loss": 2.9734,
      "step": 100650
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.008760452270508,
      "learning_rate": 0.00035913002147105376,
      "loss": 3.1472,
      "step": 100651
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1633896827697754,
      "learning_rate": 0.00035912601114640994,
      "loss": 3.0277,
      "step": 100652
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.4234293699264526,
      "learning_rate": 0.00035912200081077345,
      "loss": 3.0131,
      "step": 100653
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8864554166793823,
      "learning_rate": 0.00035911799046414505,
      "loss": 2.7711,
      "step": 100654
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3266282081604004,
      "learning_rate": 0.00035911398010652555,
      "loss": 3.0896,
      "step": 100655
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.126246690750122,
      "learning_rate": 0.00035910996973791554,
      "loss": 2.909,
      "step": 100656
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7995645999908447,
      "learning_rate": 0.00035910595935831596,
      "loss": 2.9364,
      "step": 100657
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.048276901245117,
      "learning_rate": 0.00035910194896772744,
      "loss": 3.0877,
      "step": 100658
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6719108819961548,
      "learning_rate": 0.00035909793856615075,
      "loss": 3.0536,
      "step": 100659
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4876058101654053,
      "learning_rate": 0.00035909392815358665,
      "loss": 2.7983,
      "step": 100660
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.132509469985962,
      "learning_rate": 0.00035908991773003594,
      "loss": 3.2553,
      "step": 100661
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.619678020477295,
      "learning_rate": 0.00035908590729549924,
      "loss": 3.1784,
      "step": 100662
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1358742713928223,
      "learning_rate": 0.0003590818968499773,
      "loss": 2.909,
      "step": 100663
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0614664554595947,
      "learning_rate": 0.000359077886393471,
      "loss": 2.9562,
      "step": 100664
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.863115906715393,
      "learning_rate": 0.00035907387592598095,
      "loss": 2.9526,
      "step": 100665
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8330121040344238,
      "learning_rate": 0.000359069865447508,
      "loss": 3.1734,
      "step": 100666
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9945528507232666,
      "learning_rate": 0.00035906585495805286,
      "loss": 2.8718,
      "step": 100667
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.4523564577102661,
      "learning_rate": 0.0003590618444576162,
      "loss": 3.1976,
      "step": 100668
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3295257091522217,
      "learning_rate": 0.0003590578339461988,
      "loss": 3.1117,
      "step": 100669
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7850216627120972,
      "learning_rate": 0.0003590538234238015,
      "loss": 2.8062,
      "step": 100670
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.50340735912323,
      "learning_rate": 0.000359049812890425,
      "loss": 2.8573,
      "step": 100671
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1585581302642822,
      "learning_rate": 0.00035904580234606996,
      "loss": 3.2445,
      "step": 100672
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5623859167099,
      "learning_rate": 0.0003590417917907372,
      "loss": 3.0414,
      "step": 100673
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6206629276275635,
      "learning_rate": 0.0003590377812244275,
      "loss": 3.1264,
      "step": 100674
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9356259107589722,
      "learning_rate": 0.0003590337706471415,
      "loss": 3.1411,
      "step": 100675
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9361255168914795,
      "learning_rate": 0.0003590297600588801,
      "loss": 2.9882,
      "step": 100676
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9871784448623657,
      "learning_rate": 0.0003590257494596438,
      "loss": 3.1391,
      "step": 100677
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7339675426483154,
      "learning_rate": 0.0003590217388494337,
      "loss": 2.9099,
      "step": 100678
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8419148921966553,
      "learning_rate": 0.0003590177282282502,
      "loss": 3.068,
      "step": 100679
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.5638427734375,
      "learning_rate": 0.00035901371759609424,
      "loss": 2.9703,
      "step": 100680
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8172638416290283,
      "learning_rate": 0.0003590097069529666,
      "loss": 3.2281,
      "step": 100681
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3512320518493652,
      "learning_rate": 0.0003590056962988678,
      "loss": 2.8882,
      "step": 100682
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7746385335922241,
      "learning_rate": 0.0003590016856337987,
      "loss": 2.754,
      "step": 100683
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.783475637435913,
      "learning_rate": 0.0003589976749577602,
      "loss": 2.9227,
      "step": 100684
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.578513264656067,
      "learning_rate": 0.0003589936642707529,
      "loss": 2.9859,
      "step": 100685
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.110391855239868,
      "learning_rate": 0.0003589896535727775,
      "loss": 3.0151,
      "step": 100686
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8436236381530762,
      "learning_rate": 0.00035898564286383484,
      "loss": 2.8708,
      "step": 100687
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6194602251052856,
      "learning_rate": 0.00035898163214392566,
      "loss": 2.9137,
      "step": 100688
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8263493776321411,
      "learning_rate": 0.00035897762141305064,
      "loss": 2.9505,
      "step": 100689
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7933149337768555,
      "learning_rate": 0.0003589736106712106,
      "loss": 3.0813,
      "step": 100690
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.5918922424316406,
      "learning_rate": 0.0003589695999184063,
      "loss": 3.0831,
      "step": 100691
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5619531869888306,
      "learning_rate": 0.0003589655891546383,
      "loss": 2.8155,
      "step": 100692
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.4955179691314697,
      "learning_rate": 0.0003589615783799076,
      "loss": 2.8472,
      "step": 100693
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.290069341659546,
      "learning_rate": 0.0003589575675942148,
      "loss": 2.9679,
      "step": 100694
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6713169813156128,
      "learning_rate": 0.00035895355679756066,
      "loss": 2.8384,
      "step": 100695
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.6509509086608887,
      "learning_rate": 0.0003589495459899459,
      "loss": 3.1285,
      "step": 100696
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.2576208114624023,
      "learning_rate": 0.0003589455351713714,
      "loss": 3.0501,
      "step": 100697
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3494367599487305,
      "learning_rate": 0.00035894152434183773,
      "loss": 3.1747,
      "step": 100698
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1458704471588135,
      "learning_rate": 0.00035893751350134576,
      "loss": 3.2623,
      "step": 100699
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9115921258926392,
      "learning_rate": 0.0003589335026498963,
      "loss": 3.0016,
      "step": 100700
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.456521511077881,
      "learning_rate": 0.0003589294917874898,
      "loss": 2.8068,
      "step": 100701
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.370352268218994,
      "learning_rate": 0.00035892548091412727,
      "loss": 3.0954,
      "step": 100702
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.415402889251709,
      "learning_rate": 0.0003589214700298094,
      "loss": 2.7905,
      "step": 100703
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.766639232635498,
      "learning_rate": 0.0003589174591345369,
      "loss": 2.6271,
      "step": 100704
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8153918981552124,
      "learning_rate": 0.0003589134482283105,
      "loss": 3.0207,
      "step": 100705
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9710417985916138,
      "learning_rate": 0.00035890943731113113,
      "loss": 2.9331,
      "step": 100706
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8307372331619263,
      "learning_rate": 0.00035890542638299924,
      "loss": 2.7562,
      "step": 100707
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6003193855285645,
      "learning_rate": 0.00035890141544391566,
      "loss": 3.2507,
      "step": 100708
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.5891008377075195,
      "learning_rate": 0.0003588974044938813,
      "loss": 3.0111,
      "step": 100709
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7397739887237549,
      "learning_rate": 0.00035889339353289675,
      "loss": 2.938,
      "step": 100710
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0065629482269287,
      "learning_rate": 0.0003588893825609628,
      "loss": 2.8448,
      "step": 100711
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.9392123222351074,
      "learning_rate": 0.00035888537157808035,
      "loss": 2.9504,
      "step": 100712
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6421841382980347,
      "learning_rate": 0.00035888136058424987,
      "loss": 2.868,
      "step": 100713
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8837279081344604,
      "learning_rate": 0.0003588773495794722,
      "loss": 2.986,
      "step": 100714
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7800877094268799,
      "learning_rate": 0.0003588733385637483,
      "loss": 3.1444,
      "step": 100715
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.191495656967163,
      "learning_rate": 0.0003588693275370785,
      "loss": 3.028,
      "step": 100716
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5777056217193604,
      "learning_rate": 0.0003588653164994639,
      "loss": 3.142,
      "step": 100717
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.201791286468506,
      "learning_rate": 0.0003588613054509051,
      "loss": 3.2599,
      "step": 100718
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8496812582015991,
      "learning_rate": 0.00035885729439140293,
      "loss": 3.1065,
      "step": 100719
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7934579849243164,
      "learning_rate": 0.000358853283320958,
      "loss": 2.881,
      "step": 100720
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.62060546875,
      "learning_rate": 0.0003588492722395712,
      "loss": 3.0232,
      "step": 100721
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1440248489379883,
      "learning_rate": 0.00035884526114724315,
      "loss": 3.1066,
      "step": 100722
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9180958271026611,
      "learning_rate": 0.00035884125004397466,
      "loss": 3.0124,
      "step": 100723
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8722646236419678,
      "learning_rate": 0.0003588372389297665,
      "loss": 3.3772,
      "step": 100724
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8709290027618408,
      "learning_rate": 0.00035883322780461943,
      "loss": 2.8262,
      "step": 100725
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.609701156616211,
      "learning_rate": 0.00035882921666853404,
      "loss": 3.0421,
      "step": 100726
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8748116493225098,
      "learning_rate": 0.0003588252055215113,
      "loss": 3.2233,
      "step": 100727
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.636480450630188,
      "learning_rate": 0.00035882119436355184,
      "loss": 3.1823,
      "step": 100728
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6520928144454956,
      "learning_rate": 0.0003588171831946563,
      "loss": 3.0083,
      "step": 100729
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8166393041610718,
      "learning_rate": 0.00035881317201482554,
      "loss": 3.0726,
      "step": 100730
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.862988829612732,
      "learning_rate": 0.0003588091608240604,
      "loss": 2.9333,
      "step": 100731
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.131071090698242,
      "learning_rate": 0.0003588051496223615,
      "loss": 2.9707,
      "step": 100732
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9698959589004517,
      "learning_rate": 0.0003588011384097296,
      "loss": 3.1169,
      "step": 100733
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.025221109390259,
      "learning_rate": 0.00035879712718616546,
      "loss": 3.0035,
      "step": 100734
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.9963111877441406,
      "learning_rate": 0.00035879311595166986,
      "loss": 3.0942,
      "step": 100735
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.99181067943573,
      "learning_rate": 0.00035878910470624344,
      "loss": 2.963,
      "step": 100736
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.967855930328369,
      "learning_rate": 0.00035878509344988707,
      "loss": 3.2142,
      "step": 100737
    },
    {
      "epoch": 1.31,
      "grad_norm": 4.392354965209961,
      "learning_rate": 0.00035878108218260135,
      "loss": 3.0596,
      "step": 100738
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9860388040542603,
      "learning_rate": 0.0003587770709043872,
      "loss": 3.0578,
      "step": 100739
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.546482563018799,
      "learning_rate": 0.0003587730596152453,
      "loss": 3.0213,
      "step": 100740
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.292526960372925,
      "learning_rate": 0.00035876904831517634,
      "loss": 2.9684,
      "step": 100741
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.507418632507324,
      "learning_rate": 0.0003587650370041811,
      "loss": 3.0066,
      "step": 100742
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.561922073364258,
      "learning_rate": 0.00035876102568226033,
      "loss": 2.9872,
      "step": 100743
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7589638233184814,
      "learning_rate": 0.0003587570143494148,
      "loss": 3.0881,
      "step": 100744
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.4814058542251587,
      "learning_rate": 0.00035875300300564517,
      "loss": 2.9349,
      "step": 100745
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.470520496368408,
      "learning_rate": 0.00035874899165095237,
      "loss": 3.1594,
      "step": 100746
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.5948612689971924,
      "learning_rate": 0.00035874498028533693,
      "loss": 2.8045,
      "step": 100747
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1406097412109375,
      "learning_rate": 0.0003587409689087997,
      "loss": 2.8508,
      "step": 100748
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.061567544937134,
      "learning_rate": 0.0003587369575213415,
      "loss": 3.021,
      "step": 100749
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.36411714553833,
      "learning_rate": 0.00035873294612296286,
      "loss": 2.8853,
      "step": 100750
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.838315725326538,
      "learning_rate": 0.0003587289347136647,
      "loss": 3.0503,
      "step": 100751
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6235462427139282,
      "learning_rate": 0.00035872492329344776,
      "loss": 2.9606,
      "step": 100752
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8670403957366943,
      "learning_rate": 0.00035872091186231274,
      "loss": 3.0185,
      "step": 100753
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.703784942626953,
      "learning_rate": 0.00035871690042026035,
      "loss": 3.0241,
      "step": 100754
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3157591819763184,
      "learning_rate": 0.0003587128889672914,
      "loss": 2.6544,
      "step": 100755
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.531780242919922,
      "learning_rate": 0.00035870887750340664,
      "loss": 2.6953,
      "step": 100756
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7450238466262817,
      "learning_rate": 0.00035870486602860674,
      "loss": 2.8782,
      "step": 100757
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.0365660190582275,
      "learning_rate": 0.00035870085454289255,
      "loss": 2.7821,
      "step": 100758
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4345247745513916,
      "learning_rate": 0.0003586968430462648,
      "loss": 3.0444,
      "step": 100759
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.2924015522003174,
      "learning_rate": 0.00035869283153872414,
      "loss": 3.1491,
      "step": 100760
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.2238054275512695,
      "learning_rate": 0.0003586888200202713,
      "loss": 3.0475,
      "step": 100761
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.030694007873535,
      "learning_rate": 0.00035868480849090715,
      "loss": 3.1892,
      "step": 100762
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.522216796875,
      "learning_rate": 0.00035868079695063244,
      "loss": 2.9574,
      "step": 100763
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9000566005706787,
      "learning_rate": 0.0003586767853994479,
      "loss": 2.972,
      "step": 100764
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5882582664489746,
      "learning_rate": 0.0003586727738373541,
      "loss": 3.0177,
      "step": 100765
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4817991256713867,
      "learning_rate": 0.00035866876226435204,
      "loss": 3.132,
      "step": 100766
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.035194158554077,
      "learning_rate": 0.00035866475068044236,
      "loss": 3.0732,
      "step": 100767
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.6179120540618896,
      "learning_rate": 0.00035866073908562567,
      "loss": 2.7737,
      "step": 100768
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6911011934280396,
      "learning_rate": 0.0003586567274799029,
      "loss": 2.9106,
      "step": 100769
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.2975752353668213,
      "learning_rate": 0.0003586527158632748,
      "loss": 2.9191,
      "step": 100770
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7951031923294067,
      "learning_rate": 0.00035864870423574197,
      "loss": 2.9085,
      "step": 100771
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3553051948547363,
      "learning_rate": 0.0003586446925973052,
      "loss": 3.0404,
      "step": 100772
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6164591312408447,
      "learning_rate": 0.00035864068094796546,
      "loss": 2.987,
      "step": 100773
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7096084356307983,
      "learning_rate": 0.0003586366692877232,
      "loss": 3.1417,
      "step": 100774
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.206063985824585,
      "learning_rate": 0.00035863265761657925,
      "loss": 2.9874,
      "step": 100775
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7553480863571167,
      "learning_rate": 0.0003586286459345344,
      "loss": 2.9429,
      "step": 100776
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.4935311079025269,
      "learning_rate": 0.00035862463424158936,
      "loss": 2.8791,
      "step": 100777
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5049655437469482,
      "learning_rate": 0.0003586206225377449,
      "loss": 3.0197,
      "step": 100778
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9417431354522705,
      "learning_rate": 0.00035861661082300185,
      "loss": 2.7379,
      "step": 100779
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.744060754776001,
      "learning_rate": 0.00035861259909736077,
      "loss": 3.0905,
      "step": 100780
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8817434310913086,
      "learning_rate": 0.00035860858736082253,
      "loss": 3.3841,
      "step": 100781
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5426867008209229,
      "learning_rate": 0.0003586045756133879,
      "loss": 3.1716,
      "step": 100782
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1345744132995605,
      "learning_rate": 0.00035860056385505747,
      "loss": 2.7856,
      "step": 100783
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5126858949661255,
      "learning_rate": 0.0003585965520858321,
      "loss": 3.1784,
      "step": 100784
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.962282180786133,
      "learning_rate": 0.00035859254030571263,
      "loss": 2.879,
      "step": 100785
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5655267238616943,
      "learning_rate": 0.00035858852851469963,
      "loss": 2.863,
      "step": 100786
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1390020847320557,
      "learning_rate": 0.00035858451671279387,
      "loss": 3.0839,
      "step": 100787
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8071751594543457,
      "learning_rate": 0.00035858050489999627,
      "loss": 2.9833,
      "step": 100788
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3098390102386475,
      "learning_rate": 0.0003585764930763073,
      "loss": 3.2006,
      "step": 100789
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.189993381500244,
      "learning_rate": 0.0003585724812417279,
      "loss": 2.9755,
      "step": 100790
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8505805730819702,
      "learning_rate": 0.0003585684693962588,
      "loss": 2.9118,
      "step": 100791
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.940651774406433,
      "learning_rate": 0.0003585644575399008,
      "loss": 2.9398,
      "step": 100792
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6581562757492065,
      "learning_rate": 0.00035856044567265444,
      "loss": 2.999,
      "step": 100793
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.763582706451416,
      "learning_rate": 0.0003585564337945206,
      "loss": 2.9143,
      "step": 100794
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.59030020236969,
      "learning_rate": 0.00035855242190550006,
      "loss": 3.0989,
      "step": 100795
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.301100254058838,
      "learning_rate": 0.0003585484100055934,
      "loss": 3.1327,
      "step": 100796
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.694885015487671,
      "learning_rate": 0.0003585443980948016,
      "loss": 3.0748,
      "step": 100797
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0364856719970703,
      "learning_rate": 0.00035854038617312534,
      "loss": 2.8326,
      "step": 100798
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.050150156021118,
      "learning_rate": 0.00035853637424056525,
      "loss": 2.9662,
      "step": 100799
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8409111499786377,
      "learning_rate": 0.0003585323622971221,
      "loss": 3.1399,
      "step": 100800
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.2919633388519287,
      "learning_rate": 0.00035852835034279676,
      "loss": 3.182,
      "step": 100801
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.193140983581543,
      "learning_rate": 0.0003585243383775898,
      "loss": 3.1149,
      "step": 100802
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4028592109680176,
      "learning_rate": 0.00035852032640150216,
      "loss": 2.9732,
      "step": 100803
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6728936433792114,
      "learning_rate": 0.0003585163144145344,
      "loss": 3.0065,
      "step": 100804
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.828931212425232,
      "learning_rate": 0.0003585123024166874,
      "loss": 2.8304,
      "step": 100805
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.3992035388946533,
      "learning_rate": 0.00035850829040796184,
      "loss": 3.0679,
      "step": 100806
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.198328733444214,
      "learning_rate": 0.0003585042783883586,
      "loss": 2.8532,
      "step": 100807
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8067631721496582,
      "learning_rate": 0.0003585002663578781,
      "loss": 2.7359,
      "step": 100808
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.145033836364746,
      "learning_rate": 0.0003584962543165214,
      "loss": 2.9374,
      "step": 100809
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5770083665847778,
      "learning_rate": 0.0003584922422642892,
      "loss": 2.6308,
      "step": 100810
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9074625968933105,
      "learning_rate": 0.0003584882302011821,
      "loss": 2.8768,
      "step": 100811
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.186213254928589,
      "learning_rate": 0.000358484218127201,
      "loss": 2.9311,
      "step": 100812
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0054500102996826,
      "learning_rate": 0.0003584802060423466,
      "loss": 2.9901,
      "step": 100813
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.9975616931915283,
      "learning_rate": 0.00035847619394661955,
      "loss": 3.0971,
      "step": 100814
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.02109956741333,
      "learning_rate": 0.00035847218184002067,
      "loss": 2.7935,
      "step": 100815
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.835878610610962,
      "learning_rate": 0.0003584681697225507,
      "loss": 2.8404,
      "step": 100816
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.2822558879852295,
      "learning_rate": 0.00035846415759421043,
      "loss": 2.837,
      "step": 100817
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7966872453689575,
      "learning_rate": 0.0003584601454550006,
      "loss": 3.1288,
      "step": 100818
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.422675132751465,
      "learning_rate": 0.00035845613330492184,
      "loss": 2.8659,
      "step": 100819
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.5563013553619385,
      "learning_rate": 0.00035845212114397505,
      "loss": 3.0376,
      "step": 100820
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.004124641418457,
      "learning_rate": 0.00035844810897216087,
      "loss": 3.182,
      "step": 100821
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9130635261535645,
      "learning_rate": 0.0003584440967894802,
      "loss": 2.96,
      "step": 100822
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.6646456718444824,
      "learning_rate": 0.0003584400845959335,
      "loss": 2.9262,
      "step": 100823
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9827364683151245,
      "learning_rate": 0.00035843607239152177,
      "loss": 2.9145,
      "step": 100824
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.506669044494629,
      "learning_rate": 0.00035843206017624566,
      "loss": 3.2278,
      "step": 100825
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0996499061584473,
      "learning_rate": 0.00035842804795010595,
      "loss": 2.7743,
      "step": 100826
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.080861806869507,
      "learning_rate": 0.0003584240357131033,
      "loss": 3.1343,
      "step": 100827
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6239503622055054,
      "learning_rate": 0.0003584200234652386,
      "loss": 3.1049,
      "step": 100828
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8802200555801392,
      "learning_rate": 0.0003584160112065124,
      "loss": 3.2653,
      "step": 100829
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.047119379043579,
      "learning_rate": 0.00035841199893692563,
      "loss": 2.9564,
      "step": 100830
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.999306321144104,
      "learning_rate": 0.00035840798665647905,
      "loss": 3.0811,
      "step": 100831
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1928887367248535,
      "learning_rate": 0.0003584039743651732,
      "loss": 3.0838,
      "step": 100832
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5480414628982544,
      "learning_rate": 0.000358399962063009,
      "loss": 3.2978,
      "step": 100833
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8019890785217285,
      "learning_rate": 0.00035839594974998717,
      "loss": 2.8477,
      "step": 100834
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.284902811050415,
      "learning_rate": 0.00035839193742610837,
      "loss": 3.1433,
      "step": 100835
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6297920942306519,
      "learning_rate": 0.0003583879250913734,
      "loss": 3.1043,
      "step": 100836
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9101983308792114,
      "learning_rate": 0.00035838391274578313,
      "loss": 2.9603,
      "step": 100837
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9629542827606201,
      "learning_rate": 0.0003583799003893381,
      "loss": 2.9149,
      "step": 100838
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8875625133514404,
      "learning_rate": 0.00035837588802203906,
      "loss": 2.7783,
      "step": 100839
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1588480472564697,
      "learning_rate": 0.000358371875643887,
      "loss": 2.8595,
      "step": 100840
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.649387001991272,
      "learning_rate": 0.0003583678632548824,
      "loss": 3.0501,
      "step": 100841
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.65402090549469,
      "learning_rate": 0.00035836385085502614,
      "loss": 2.8793,
      "step": 100842
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.473212718963623,
      "learning_rate": 0.0003583598384443189,
      "loss": 2.916,
      "step": 100843
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8886481523513794,
      "learning_rate": 0.00035835582602276156,
      "loss": 2.9912,
      "step": 100844
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6392648220062256,
      "learning_rate": 0.00035835181359035467,
      "loss": 3.0901,
      "step": 100845
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.888129234313965,
      "learning_rate": 0.0003583478011470992,
      "loss": 3.001,
      "step": 100846
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7293838262557983,
      "learning_rate": 0.0003583437886929957,
      "loss": 3.1972,
      "step": 100847
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9159538745880127,
      "learning_rate": 0.00035833977622804495,
      "loss": 3.1645,
      "step": 100848
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.06005859375,
      "learning_rate": 0.00035833576375224774,
      "loss": 3.0933,
      "step": 100849
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8431296348571777,
      "learning_rate": 0.0003583317512656049,
      "loss": 2.9931,
      "step": 100850
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5700352191925049,
      "learning_rate": 0.000358327738768117,
      "loss": 2.9705,
      "step": 100851
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.722512125968933,
      "learning_rate": 0.00035832372625978486,
      "loss": 2.8503,
      "step": 100852
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1962757110595703,
      "learning_rate": 0.00035831971374060927,
      "loss": 2.9163,
      "step": 100853
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.142296075820923,
      "learning_rate": 0.00035831570121059106,
      "loss": 2.9566,
      "step": 100854
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5975062847137451,
      "learning_rate": 0.0003583116886697307,
      "loss": 3.0187,
      "step": 100855
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7317365407943726,
      "learning_rate": 0.00035830767611802917,
      "loss": 2.9534,
      "step": 100856
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6858083009719849,
      "learning_rate": 0.00035830366355548706,
      "loss": 2.8696,
      "step": 100857
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.304596424102783,
      "learning_rate": 0.00035829965098210526,
      "loss": 3.0756,
      "step": 100858
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.4923174381256104,
      "learning_rate": 0.0003582956383978845,
      "loss": 3.1687,
      "step": 100859
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.068147897720337,
      "learning_rate": 0.00035829162580282536,
      "loss": 2.9066,
      "step": 100860
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.4611400365829468,
      "learning_rate": 0.00035828761319692883,
      "loss": 2.865,
      "step": 100861
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0164783000946045,
      "learning_rate": 0.0003582836005801955,
      "loss": 2.921,
      "step": 100862
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8402674198150635,
      "learning_rate": 0.0003582795879526261,
      "loss": 2.9856,
      "step": 100863
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.809623122215271,
      "learning_rate": 0.0003582755753142214,
      "loss": 2.9555,
      "step": 100864
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0180165767669678,
      "learning_rate": 0.0003582715626649824,
      "loss": 2.9479,
      "step": 100865
    },
    {
      "epoch": 1.31,
      "grad_norm": 5.730138301849365,
      "learning_rate": 0.00035826755000490935,
      "loss": 2.9549,
      "step": 100866
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9266993999481201,
      "learning_rate": 0.00035826353733400335,
      "loss": 2.8389,
      "step": 100867
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.091853618621826,
      "learning_rate": 0.0003582595246522651,
      "loss": 2.8366,
      "step": 100868
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5495731830596924,
      "learning_rate": 0.0003582555119596952,
      "loss": 2.9479,
      "step": 100869
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7187923192977905,
      "learning_rate": 0.0003582514992562946,
      "loss": 2.8644,
      "step": 100870
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9900784492492676,
      "learning_rate": 0.000358247486542064,
      "loss": 3.0173,
      "step": 100871
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6531946659088135,
      "learning_rate": 0.0003582434738170039,
      "loss": 3.2014,
      "step": 100872
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8529554605484009,
      "learning_rate": 0.00035823946108111534,
      "loss": 3.0471,
      "step": 100873
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7092636823654175,
      "learning_rate": 0.00035823544833439905,
      "loss": 2.7525,
      "step": 100874
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6737009286880493,
      "learning_rate": 0.0003582314355768556,
      "loss": 2.867,
      "step": 100875
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.592152714729309,
      "learning_rate": 0.00035822742280848584,
      "loss": 2.8342,
      "step": 100876
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8236497640609741,
      "learning_rate": 0.00035822341002929054,
      "loss": 2.99,
      "step": 100877
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0601978302001953,
      "learning_rate": 0.00035821939723927035,
      "loss": 2.9964,
      "step": 100878
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0862503051757812,
      "learning_rate": 0.0003582153844384261,
      "loss": 3.1106,
      "step": 100879
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6715775728225708,
      "learning_rate": 0.00035821137162675854,
      "loss": 3.0178,
      "step": 100880
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5530318021774292,
      "learning_rate": 0.0003582073588042683,
      "loss": 2.9901,
      "step": 100881
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.826416015625,
      "learning_rate": 0.0003582033459709563,
      "loss": 3.0324,
      "step": 100882
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8604642152786255,
      "learning_rate": 0.0003581993331268232,
      "loss": 2.8885,
      "step": 100883
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.393894910812378,
      "learning_rate": 0.0003581953202718697,
      "loss": 2.9806,
      "step": 100884
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.6990721225738525,
      "learning_rate": 0.0003581913074060966,
      "loss": 2.8869,
      "step": 100885
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4156365394592285,
      "learning_rate": 0.00035818729452950467,
      "loss": 3.0934,
      "step": 100886
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6715786457061768,
      "learning_rate": 0.00035818328164209456,
      "loss": 3.056,
      "step": 100887
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.758007287979126,
      "learning_rate": 0.0003581792687438671,
      "loss": 2.7538,
      "step": 100888
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.5136351585388184,
      "learning_rate": 0.0003581752558348231,
      "loss": 3.0997,
      "step": 100889
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1850836277008057,
      "learning_rate": 0.0003581712429149631,
      "loss": 3.0025,
      "step": 100890
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.8990516662597656,
      "learning_rate": 0.000358167229984288,
      "loss": 2.88,
      "step": 100891
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.759352684020996,
      "learning_rate": 0.00035816321704279866,
      "loss": 3.076,
      "step": 100892
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8368247747421265,
      "learning_rate": 0.00035815920409049546,
      "loss": 2.8892,
      "step": 100893
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9648882150650024,
      "learning_rate": 0.00035815519112737947,
      "loss": 2.8734,
      "step": 100894
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.596306324005127,
      "learning_rate": 0.0003581511781534514,
      "loss": 2.8527,
      "step": 100895
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0220041275024414,
      "learning_rate": 0.0003581471651687118,
      "loss": 3.0139,
      "step": 100896
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8423962593078613,
      "learning_rate": 0.0003581431521731616,
      "loss": 2.8588,
      "step": 100897
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6745344400405884,
      "learning_rate": 0.0003581391391668015,
      "loss": 3.2653,
      "step": 100898
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.320604085922241,
      "learning_rate": 0.0003581351261496322,
      "loss": 3.131,
      "step": 100899
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.4135992527008057,
      "learning_rate": 0.0003581311131216545,
      "loss": 2.7987,
      "step": 100900
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.068622589111328,
      "learning_rate": 0.0003581271000828692,
      "loss": 2.8183,
      "step": 100901
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.338951826095581,
      "learning_rate": 0.0003581230870332769,
      "loss": 2.957,
      "step": 100902
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8838834762573242,
      "learning_rate": 0.0003581190739728784,
      "loss": 3.0249,
      "step": 100903
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.534124493598938,
      "learning_rate": 0.00035811506090167454,
      "loss": 2.8229,
      "step": 100904
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.645174503326416,
      "learning_rate": 0.0003581110478196659,
      "loss": 3.2201,
      "step": 100905
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5387425422668457,
      "learning_rate": 0.0003581070347268534,
      "loss": 3.0112,
      "step": 100906
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7084757089614868,
      "learning_rate": 0.00035810302162323773,
      "loss": 2.9157,
      "step": 100907
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9881459474563599,
      "learning_rate": 0.0003580990085088195,
      "loss": 2.9314,
      "step": 100908
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8951680660247803,
      "learning_rate": 0.0003580949953835996,
      "loss": 3.1262,
      "step": 100909
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7018837928771973,
      "learning_rate": 0.00035809098224757883,
      "loss": 3.5517,
      "step": 100910
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0289907455444336,
      "learning_rate": 0.0003580869691007578,
      "loss": 2.8797,
      "step": 100911
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.7267184257507324,
      "learning_rate": 0.0003580829559431372,
      "loss": 3.058,
      "step": 100912
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.0321786403656006,
      "learning_rate": 0.00035807894277471806,
      "loss": 2.7967,
      "step": 100913
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.2567555904388428,
      "learning_rate": 0.00035807492959550084,
      "loss": 3.3516,
      "step": 100914
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.540245294570923,
      "learning_rate": 0.00035807091640548637,
      "loss": 2.9977,
      "step": 100915
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.601447582244873,
      "learning_rate": 0.0003580669032046756,
      "loss": 3.2081,
      "step": 100916
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9661132097244263,
      "learning_rate": 0.0003580628899930689,
      "loss": 3.3629,
      "step": 100917
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6049312353134155,
      "learning_rate": 0.00035805887677066726,
      "loss": 3.0613,
      "step": 100918
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.256722927093506,
      "learning_rate": 0.00035805486353747147,
      "loss": 3.1103,
      "step": 100919
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.981060266494751,
      "learning_rate": 0.00035805085029348206,
      "loss": 3.071,
      "step": 100920
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9525879621505737,
      "learning_rate": 0.00035804683703869996,
      "loss": 3.1336,
      "step": 100921
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.5275254249572754,
      "learning_rate": 0.0003580428237731259,
      "loss": 3.0902,
      "step": 100922
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.859981060028076,
      "learning_rate": 0.00035803881049676045,
      "loss": 3.094,
      "step": 100923
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6276060342788696,
      "learning_rate": 0.0003580347972096046,
      "loss": 2.9905,
      "step": 100924
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6849861145019531,
      "learning_rate": 0.00035803078391165896,
      "loss": 2.8955,
      "step": 100925
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.719983458518982,
      "learning_rate": 0.00035802677060292436,
      "loss": 3.1721,
      "step": 100926
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.811511516571045,
      "learning_rate": 0.00035802275728340135,
      "loss": 2.8872,
      "step": 100927
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0168960094451904,
      "learning_rate": 0.0003580187439530909,
      "loss": 2.95,
      "step": 100928
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6432559490203857,
      "learning_rate": 0.0003580147306119937,
      "loss": 2.7041,
      "step": 100929
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8027675151824951,
      "learning_rate": 0.00035801071726011043,
      "loss": 3.0643,
      "step": 100930
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.724877953529358,
      "learning_rate": 0.00035800670389744187,
      "loss": 2.8928,
      "step": 100931
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9323060512542725,
      "learning_rate": 0.0003580026905239888,
      "loss": 3.1644,
      "step": 100932
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7281043529510498,
      "learning_rate": 0.0003579986771397519,
      "loss": 3.0801,
      "step": 100933
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6224983930587769,
      "learning_rate": 0.0003579946637447319,
      "loss": 3.0022,
      "step": 100934
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.812824010848999,
      "learning_rate": 0.00035799065033892977,
      "loss": 3.2074,
      "step": 100935
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5981172323226929,
      "learning_rate": 0.00035798663692234594,
      "loss": 3.0157,
      "step": 100936
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6238162517547607,
      "learning_rate": 0.0003579826234949813,
      "loss": 2.9989,
      "step": 100937
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9878870248794556,
      "learning_rate": 0.0003579786100568366,
      "loss": 3.1696,
      "step": 100938
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.075199842453003,
      "learning_rate": 0.0003579745966079126,
      "loss": 2.9782,
      "step": 100939
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5493241548538208,
      "learning_rate": 0.00035797058314821005,
      "loss": 3.1785,
      "step": 100940
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9519540071487427,
      "learning_rate": 0.00035796656967772967,
      "loss": 2.9496,
      "step": 100941
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6710975170135498,
      "learning_rate": 0.00035796255619647215,
      "loss": 3.1795,
      "step": 100942
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.4355300664901733,
      "learning_rate": 0.0003579585427044384,
      "loss": 2.9082,
      "step": 100943
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.67264986038208,
      "learning_rate": 0.000357954529201629,
      "loss": 3.0075,
      "step": 100944
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5775846242904663,
      "learning_rate": 0.00035795051568804473,
      "loss": 2.9185,
      "step": 100945
    },
    {
      "epoch": 1.31,
      "grad_norm": 3.3651697635650635,
      "learning_rate": 0.00035794650216368644,
      "loss": 2.4685,
      "step": 100946
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.27055287361145,
      "learning_rate": 0.00035794248862855476,
      "loss": 3.2928,
      "step": 100947
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5993645191192627,
      "learning_rate": 0.00035793847508265044,
      "loss": 3.1515,
      "step": 100948
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7995692491531372,
      "learning_rate": 0.0003579344615259743,
      "loss": 2.941,
      "step": 100949
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6806597709655762,
      "learning_rate": 0.0003579304479585271,
      "loss": 3.088,
      "step": 100950
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0200114250183105,
      "learning_rate": 0.0003579264343803095,
      "loss": 2.9341,
      "step": 100951
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.621014952659607,
      "learning_rate": 0.00035792242079132224,
      "loss": 3.2126,
      "step": 100952
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5904186964035034,
      "learning_rate": 0.00035791840719156615,
      "loss": 2.9704,
      "step": 100953
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7141392230987549,
      "learning_rate": 0.000357914393581042,
      "loss": 3.0459,
      "step": 100954
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.06408953666687,
      "learning_rate": 0.0003579103799597503,
      "loss": 2.8521,
      "step": 100955
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7683391571044922,
      "learning_rate": 0.00035790636632769215,
      "loss": 2.9955,
      "step": 100956
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6039565801620483,
      "learning_rate": 0.00035790235268486797,
      "loss": 3.0528,
      "step": 100957
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.651577115058899,
      "learning_rate": 0.0003578983390312787,
      "loss": 3.2826,
      "step": 100958
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.816884994506836,
      "learning_rate": 0.00035789432536692507,
      "loss": 3.0128,
      "step": 100959
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8755435943603516,
      "learning_rate": 0.0003578903116918078,
      "loss": 3.1072,
      "step": 100960
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.704577922821045,
      "learning_rate": 0.0003578862980059276,
      "loss": 3.0854,
      "step": 100961
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5268492698669434,
      "learning_rate": 0.0003578822843092853,
      "loss": 3.1244,
      "step": 100962
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6395411491394043,
      "learning_rate": 0.00035787827060188146,
      "loss": 3.0974,
      "step": 100963
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8263334035873413,
      "learning_rate": 0.00035787425688371704,
      "loss": 3.055,
      "step": 100964
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0774898529052734,
      "learning_rate": 0.0003578702431547927,
      "loss": 2.9013,
      "step": 100965
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.790155291557312,
      "learning_rate": 0.00035786622941510923,
      "loss": 2.6733,
      "step": 100966
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.73447322845459,
      "learning_rate": 0.0003578622156646672,
      "loss": 3.1484,
      "step": 100967
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.999395728111267,
      "learning_rate": 0.00035785820190346765,
      "loss": 3.0298,
      "step": 100968
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.672168254852295,
      "learning_rate": 0.0003578541881315111,
      "loss": 2.8764,
      "step": 100969
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.729637861251831,
      "learning_rate": 0.0003578501743487983,
      "loss": 3.0374,
      "step": 100970
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1161999702453613,
      "learning_rate": 0.00035784616055533014,
      "loss": 3.1443,
      "step": 100971
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.0820415019989014,
      "learning_rate": 0.0003578421467511073,
      "loss": 3.0482,
      "step": 100972
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7898097038269043,
      "learning_rate": 0.0003578381329361304,
      "loss": 3.0729,
      "step": 100973
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8733887672424316,
      "learning_rate": 0.00035783411911040043,
      "loss": 3.0143,
      "step": 100974
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9892358779907227,
      "learning_rate": 0.00035783010527391795,
      "loss": 3.012,
      "step": 100975
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.744303584098816,
      "learning_rate": 0.00035782609142668377,
      "loss": 2.9893,
      "step": 100976
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8566319942474365,
      "learning_rate": 0.0003578220775686987,
      "loss": 3.0787,
      "step": 100977
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.659261703491211,
      "learning_rate": 0.0003578180636999633,
      "loss": 3.0799,
      "step": 100978
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6511293649673462,
      "learning_rate": 0.00035781404982047844,
      "loss": 2.7505,
      "step": 100979
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8089154958724976,
      "learning_rate": 0.0003578100359302449,
      "loss": 2.873,
      "step": 100980
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.1464736461639404,
      "learning_rate": 0.0003578060220292634,
      "loss": 2.8166,
      "step": 100981
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.2488434314727783,
      "learning_rate": 0.0003578020081175346,
      "loss": 2.9303,
      "step": 100982
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.9144723415374756,
      "learning_rate": 0.00035779799419505937,
      "loss": 3.1266,
      "step": 100983
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.180474042892456,
      "learning_rate": 0.0003577939802618384,
      "loss": 2.9665,
      "step": 100984
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.8342036008834839,
      "learning_rate": 0.00035778996631787247,
      "loss": 3.0635,
      "step": 100985
    },
    {
      "epoch": 1.31,
      "grad_norm": 6.298354148864746,
      "learning_rate": 0.00035778595236316224,
      "loss": 2.9214,
      "step": 100986
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.192370653152466,
      "learning_rate": 0.0003577819383977085,
      "loss": 2.7546,
      "step": 100987
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6537715196609497,
      "learning_rate": 0.000357777924421512,
      "loss": 3.1929,
      "step": 100988
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5663621425628662,
      "learning_rate": 0.00035777391043457347,
      "loss": 3.1376,
      "step": 100989
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.7936468124389648,
      "learning_rate": 0.00035776989643689387,
      "loss": 3.1428,
      "step": 100990
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.6504098176956177,
      "learning_rate": 0.0003577658824284735,
      "loss": 2.8524,
      "step": 100991
    },
    {
      "epoch": 1.31,
      "grad_norm": 1.5067731142044067,
      "learning_rate": 0.00035776186840931354,
      "loss": 3.0597,
      "step": 100992
    },
    {
      "epoch": 1.31,
      "grad_norm": 2.2956290245056152,
      "learning_rate": 0.00035775785437941454,
      "loss": 2.8597,
      "step": 100993
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.4140706062316895,
      "learning_rate": 0.0003577538403387772,
      "loss": 3.0074,
      "step": 100994
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6450902223587036,
      "learning_rate": 0.0003577498262874023,
      "loss": 3.0653,
      "step": 100995
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.4292961359024048,
      "learning_rate": 0.00035774581222529074,
      "loss": 2.8379,
      "step": 100996
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8492724895477295,
      "learning_rate": 0.000357741798152443,
      "loss": 2.9625,
      "step": 100997
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1723296642303467,
      "learning_rate": 0.00035773778406886003,
      "loss": 3.0022,
      "step": 100998
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9862536191940308,
      "learning_rate": 0.0003577337699745426,
      "loss": 3.1872,
      "step": 100999
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6525852680206299,
      "learning_rate": 0.0003577297558694913,
      "loss": 3.0407,
      "step": 101000
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.349177598953247,
      "learning_rate": 0.0003577257417537069,
      "loss": 2.88,
      "step": 101001
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7700862884521484,
      "learning_rate": 0.0003577217276271903,
      "loss": 3.0088,
      "step": 101002
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9226067066192627,
      "learning_rate": 0.00035771771348994206,
      "loss": 3.2722,
      "step": 101003
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.1063756942749023,
      "learning_rate": 0.00035771369934196304,
      "loss": 2.9832,
      "step": 101004
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.348172426223755,
      "learning_rate": 0.00035770968518325394,
      "loss": 3.0079,
      "step": 101005
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.0545904636383057,
      "learning_rate": 0.0003577056710138156,
      "loss": 2.9743,
      "step": 101006
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.729091763496399,
      "learning_rate": 0.00035770165683364854,
      "loss": 2.828,
      "step": 101007
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0631067752838135,
      "learning_rate": 0.0003576976426427537,
      "loss": 3.0357,
      "step": 101008
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3365471363067627,
      "learning_rate": 0.00035769362844113184,
      "loss": 2.7775,
      "step": 101009
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6433619260787964,
      "learning_rate": 0.00035768961422878357,
      "loss": 3.281,
      "step": 101010
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9542094469070435,
      "learning_rate": 0.00035768560000570983,
      "loss": 3.2123,
      "step": 101011
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8893141746520996,
      "learning_rate": 0.0003576815857719111,
      "loss": 3.0467,
      "step": 101012
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5162158012390137,
      "learning_rate": 0.00035767757152738833,
      "loss": 3.0343,
      "step": 101013
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6890918016433716,
      "learning_rate": 0.0003576735572721423,
      "loss": 3.1756,
      "step": 101014
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8820463418960571,
      "learning_rate": 0.0003576695430061736,
      "loss": 2.8854,
      "step": 101015
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8493949174880981,
      "learning_rate": 0.00035766552872948295,
      "loss": 3.3038,
      "step": 101016
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.077674150466919,
      "learning_rate": 0.00035766151444207136,
      "loss": 3.0631,
      "step": 101017
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0435006618499756,
      "learning_rate": 0.00035765750014393933,
      "loss": 3.1748,
      "step": 101018
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.5676581859588623,
      "learning_rate": 0.00035765348583508764,
      "loss": 3.0799,
      "step": 101019
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.813130259513855,
      "learning_rate": 0.00035764947151551715,
      "loss": 3.029,
      "step": 101020
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5244181156158447,
      "learning_rate": 0.00035764545718522845,
      "loss": 3.0241,
      "step": 101021
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.29960560798645,
      "learning_rate": 0.00035764144284422246,
      "loss": 3.1512,
      "step": 101022
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1167097091674805,
      "learning_rate": 0.00035763742849249973,
      "loss": 2.9271,
      "step": 101023
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.61150324344635,
      "learning_rate": 0.0003576334141300613,
      "loss": 2.9782,
      "step": 101024
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6873518228530884,
      "learning_rate": 0.0003576293997569075,
      "loss": 2.8862,
      "step": 101025
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8683502674102783,
      "learning_rate": 0.00035762538537303945,
      "loss": 3.1647,
      "step": 101026
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8799448013305664,
      "learning_rate": 0.0003576213709784578,
      "loss": 3.0188,
      "step": 101027
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.769176721572876,
      "learning_rate": 0.0003576173565731631,
      "loss": 3.2277,
      "step": 101028
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.4532902240753174,
      "learning_rate": 0.0003576133421571563,
      "loss": 2.9072,
      "step": 101029
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.966750144958496,
      "learning_rate": 0.00035760932773043805,
      "loss": 3.2799,
      "step": 101030
    },
    {
      "epoch": 1.32,
      "grad_norm": 5.014206409454346,
      "learning_rate": 0.0003576053132930093,
      "loss": 2.838,
      "step": 101031
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.9676339626312256,
      "learning_rate": 0.0003576012988448705,
      "loss": 3.2195,
      "step": 101032
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.447352409362793,
      "learning_rate": 0.0003575972843860225,
      "loss": 2.8773,
      "step": 101033
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7767446041107178,
      "learning_rate": 0.00035759326991646615,
      "loss": 2.8856,
      "step": 101034
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.4909307956695557,
      "learning_rate": 0.00035758925543620215,
      "loss": 2.8848,
      "step": 101035
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7174781560897827,
      "learning_rate": 0.00035758524094523116,
      "loss": 2.9514,
      "step": 101036
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6873478889465332,
      "learning_rate": 0.000357581226443554,
      "loss": 3.0504,
      "step": 101037
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.512233018875122,
      "learning_rate": 0.0003575772119311715,
      "loss": 2.7951,
      "step": 101038
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1922712326049805,
      "learning_rate": 0.0003575731974080842,
      "loss": 3.0619,
      "step": 101039
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1810250282287598,
      "learning_rate": 0.00035756918287429293,
      "loss": 2.9642,
      "step": 101040
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.86216402053833,
      "learning_rate": 0.00035756516832979857,
      "loss": 2.8794,
      "step": 101041
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0565152168273926,
      "learning_rate": 0.00035756115377460167,
      "loss": 2.9343,
      "step": 101042
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9847699403762817,
      "learning_rate": 0.0003575571392087031,
      "loss": 2.8744,
      "step": 101043
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.571087121963501,
      "learning_rate": 0.0003575531246321036,
      "loss": 3.1055,
      "step": 101044
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.849768877029419,
      "learning_rate": 0.0003575491100448038,
      "loss": 3.0039,
      "step": 101045
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9191007614135742,
      "learning_rate": 0.00035754509544680456,
      "loss": 2.9632,
      "step": 101046
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7689646482467651,
      "learning_rate": 0.00035754108083810673,
      "loss": 2.9247,
      "step": 101047
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.4386706352233887,
      "learning_rate": 0.0003575370662187108,
      "loss": 2.8528,
      "step": 101048
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1164910793304443,
      "learning_rate": 0.00035753305158861765,
      "loss": 3.1245,
      "step": 101049
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8133912086486816,
      "learning_rate": 0.0003575290369478281,
      "loss": 2.9315,
      "step": 101050
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7697679996490479,
      "learning_rate": 0.0003575250222963427,
      "loss": 3.0931,
      "step": 101051
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8966233730316162,
      "learning_rate": 0.0003575210076341624,
      "loss": 3.0375,
      "step": 101052
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7947949171066284,
      "learning_rate": 0.00035751699296128784,
      "loss": 3.0318,
      "step": 101053
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.5484213829040527,
      "learning_rate": 0.0003575129782777197,
      "loss": 2.643,
      "step": 101054
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7602736949920654,
      "learning_rate": 0.00035750896358345893,
      "loss": 2.8816,
      "step": 101055
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.4363590478897095,
      "learning_rate": 0.00035750494887850607,
      "loss": 2.9572,
      "step": 101056
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7693511247634888,
      "learning_rate": 0.00035750093416286204,
      "loss": 2.9775,
      "step": 101057
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1319122314453125,
      "learning_rate": 0.00035749691943652746,
      "loss": 2.8213,
      "step": 101058
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.026872158050537,
      "learning_rate": 0.0003574929046995031,
      "loss": 2.9223,
      "step": 101059
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0040974617004395,
      "learning_rate": 0.0003574888899517898,
      "loss": 3.1608,
      "step": 101060
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.654322385787964,
      "learning_rate": 0.0003574848751933882,
      "loss": 3.0511,
      "step": 101061
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1390178203582764,
      "learning_rate": 0.000357480860424299,
      "loss": 2.8541,
      "step": 101062
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.030414342880249,
      "learning_rate": 0.00035747684564452307,
      "loss": 2.9556,
      "step": 101063
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3920445442199707,
      "learning_rate": 0.00035747283085406107,
      "loss": 2.8969,
      "step": 101064
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9494720697402954,
      "learning_rate": 0.0003574688160529138,
      "loss": 3.279,
      "step": 101065
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3040688037872314,
      "learning_rate": 0.0003574648012410821,
      "loss": 3.2292,
      "step": 101066
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.790136456489563,
      "learning_rate": 0.0003574607864185665,
      "loss": 3.0831,
      "step": 101067
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.018721580505371,
      "learning_rate": 0.0003574567715853679,
      "loss": 2.9696,
      "step": 101068
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1547136306762695,
      "learning_rate": 0.000357452756741487,
      "loss": 2.9956,
      "step": 101069
    },
    {
      "epoch": 1.32,
      "grad_norm": 4.009148597717285,
      "learning_rate": 0.0003574487418869245,
      "loss": 2.8828,
      "step": 101070
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0061967372894287,
      "learning_rate": 0.0003574447270216812,
      "loss": 2.7859,
      "step": 101071
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.6187193393707275,
      "learning_rate": 0.0003574407121457579,
      "loss": 3.2634,
      "step": 101072
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.5162160396575928,
      "learning_rate": 0.00035743669725915524,
      "loss": 3.1232,
      "step": 101073
    },
    {
      "epoch": 1.32,
      "grad_norm": 4.373810291290283,
      "learning_rate": 0.000357432682361874,
      "loss": 3.0579,
      "step": 101074
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3652122020721436,
      "learning_rate": 0.000357428667453915,
      "loss": 2.7419,
      "step": 101075
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.7247657775878906,
      "learning_rate": 0.00035742465253527886,
      "loss": 2.9644,
      "step": 101076
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.529531478881836,
      "learning_rate": 0.00035742063760596645,
      "loss": 2.9336,
      "step": 101077
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.6554131507873535,
      "learning_rate": 0.00035741662266597845,
      "loss": 2.8583,
      "step": 101078
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0356435775756836,
      "learning_rate": 0.0003574126077153156,
      "loss": 2.864,
      "step": 101079
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6817235946655273,
      "learning_rate": 0.00035740859275397864,
      "loss": 2.7302,
      "step": 101080
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.637878894805908,
      "learning_rate": 0.0003574045777819684,
      "loss": 2.9513,
      "step": 101081
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1036789417266846,
      "learning_rate": 0.0003574005627992855,
      "loss": 2.8902,
      "step": 101082
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9496599435806274,
      "learning_rate": 0.0003573965478059308,
      "loss": 3.0634,
      "step": 101083
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7426104545593262,
      "learning_rate": 0.00035739253280190497,
      "loss": 2.9572,
      "step": 101084
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.5876684188842773,
      "learning_rate": 0.0003573885177872088,
      "loss": 3.1156,
      "step": 101085
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.4884588718414307,
      "learning_rate": 0.00035738450276184304,
      "loss": 3.2013,
      "step": 101086
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.90310537815094,
      "learning_rate": 0.0003573804877258084,
      "loss": 2.8313,
      "step": 101087
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1605496406555176,
      "learning_rate": 0.0003573764726791056,
      "loss": 3.1012,
      "step": 101088
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.2211201190948486,
      "learning_rate": 0.00035737245762173543,
      "loss": 3.1483,
      "step": 101089
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.6665966510772705,
      "learning_rate": 0.0003573684425536987,
      "loss": 2.9295,
      "step": 101090
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.4197701215744019,
      "learning_rate": 0.0003573644274749961,
      "loss": 2.8086,
      "step": 101091
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7704416513442993,
      "learning_rate": 0.0003573604123856283,
      "loss": 3.0984,
      "step": 101092
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.75732421875,
      "learning_rate": 0.00035735639728559627,
      "loss": 3.0282,
      "step": 101093
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.806356906890869,
      "learning_rate": 0.0003573523821749004,
      "loss": 3.1404,
      "step": 101094
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9845677614212036,
      "learning_rate": 0.0003573483670535417,
      "loss": 2.9148,
      "step": 101095
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.021195888519287,
      "learning_rate": 0.00035734435192152095,
      "loss": 2.9462,
      "step": 101096
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7138737440109253,
      "learning_rate": 0.00035734033677883875,
      "loss": 3.1589,
      "step": 101097
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7857693433761597,
      "learning_rate": 0.00035733632162549586,
      "loss": 3.0278,
      "step": 101098
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9794611930847168,
      "learning_rate": 0.0003573323064614931,
      "loss": 3.1655,
      "step": 101099
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0021896362304688,
      "learning_rate": 0.00035732829128683115,
      "loss": 2.8548,
      "step": 101100
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.594104528427124,
      "learning_rate": 0.0003573242761015108,
      "loss": 2.8615,
      "step": 101101
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8040999174118042,
      "learning_rate": 0.0003573202609055328,
      "loss": 2.8429,
      "step": 101102
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.186107635498047,
      "learning_rate": 0.0003573162456988979,
      "loss": 3.1753,
      "step": 101103
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.294994831085205,
      "learning_rate": 0.0003573122304816068,
      "loss": 3.0012,
      "step": 101104
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7299531698226929,
      "learning_rate": 0.00035730821525366036,
      "loss": 2.6728,
      "step": 101105
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.3953540325164795,
      "learning_rate": 0.0003573042000150591,
      "loss": 2.9499,
      "step": 101106
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.4856053590774536,
      "learning_rate": 0.0003573001847658039,
      "loss": 2.9063,
      "step": 101107
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.2162346839904785,
      "learning_rate": 0.0003572961695058957,
      "loss": 2.9107,
      "step": 101108
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9340401887893677,
      "learning_rate": 0.0003572921542353349,
      "loss": 3.0566,
      "step": 101109
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.471578598022461,
      "learning_rate": 0.00035728813895412244,
      "loss": 2.8401,
      "step": 101110
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.778611183166504,
      "learning_rate": 0.00035728412366225907,
      "loss": 2.8388,
      "step": 101111
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.56719172000885,
      "learning_rate": 0.0003572801083597455,
      "loss": 2.9707,
      "step": 101112
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.806990385055542,
      "learning_rate": 0.0003572760930465824,
      "loss": 3.0443,
      "step": 101113
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.198272228240967,
      "learning_rate": 0.0003572720777227706,
      "loss": 2.8203,
      "step": 101114
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7297619581222534,
      "learning_rate": 0.0003572680623883109,
      "loss": 2.9943,
      "step": 101115
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6727725267410278,
      "learning_rate": 0.000357264047043204,
      "loss": 3.321,
      "step": 101116
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.367183208465576,
      "learning_rate": 0.0003572600316874506,
      "loss": 3.14,
      "step": 101117
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.192488670349121,
      "learning_rate": 0.00035725601632105147,
      "loss": 2.9418,
      "step": 101118
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.967788815498352,
      "learning_rate": 0.0003572520009440074,
      "loss": 3.0084,
      "step": 101119
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.895254373550415,
      "learning_rate": 0.00035724798555631905,
      "loss": 2.9907,
      "step": 101120
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9143131971359253,
      "learning_rate": 0.00035724397015798724,
      "loss": 3.2967,
      "step": 101121
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7914836406707764,
      "learning_rate": 0.0003572399547490127,
      "loss": 2.9478,
      "step": 101122
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5753082036972046,
      "learning_rate": 0.0003572359393293962,
      "loss": 2.9863,
      "step": 101123
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.4189488887786865,
      "learning_rate": 0.0003572319238991384,
      "loss": 2.9076,
      "step": 101124
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8542076349258423,
      "learning_rate": 0.00035722790845824017,
      "loss": 3.2045,
      "step": 101125
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8455570936203003,
      "learning_rate": 0.00035722389300670216,
      "loss": 2.9795,
      "step": 101126
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5914925336837769,
      "learning_rate": 0.0003572198775445251,
      "loss": 3.0702,
      "step": 101127
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6276671886444092,
      "learning_rate": 0.00035721586207170984,
      "loss": 3.0126,
      "step": 101128
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.96769642829895,
      "learning_rate": 0.00035721184658825704,
      "loss": 2.897,
      "step": 101129
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.391723394393921,
      "learning_rate": 0.0003572078310941676,
      "loss": 3.0851,
      "step": 101130
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7299867868423462,
      "learning_rate": 0.000357203815589442,
      "loss": 3.0133,
      "step": 101131
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.054595947265625,
      "learning_rate": 0.0003571998000740812,
      "loss": 3.2606,
      "step": 101132
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.797914505004883,
      "learning_rate": 0.0003571957845480859,
      "loss": 2.9424,
      "step": 101133
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7879360914230347,
      "learning_rate": 0.0003571917690114567,
      "loss": 2.6954,
      "step": 101134
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.040945291519165,
      "learning_rate": 0.00035718775346419454,
      "loss": 2.6476,
      "step": 101135
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6914103031158447,
      "learning_rate": 0.00035718373790630015,
      "loss": 3.0115,
      "step": 101136
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.5682151317596436,
      "learning_rate": 0.0003571797223377742,
      "loss": 3.0585,
      "step": 101137
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8397908210754395,
      "learning_rate": 0.0003571757067586174,
      "loss": 3.0588,
      "step": 101138
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.0874757766723633,
      "learning_rate": 0.00035717169116883065,
      "loss": 2.6512,
      "step": 101139
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9929298162460327,
      "learning_rate": 0.0003571676755684145,
      "loss": 2.7884,
      "step": 101140
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8293832540512085,
      "learning_rate": 0.00035716365995736986,
      "loss": 2.8696,
      "step": 101141
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0809807777404785,
      "learning_rate": 0.0003571596443356974,
      "loss": 2.9518,
      "step": 101142
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6586294174194336,
      "learning_rate": 0.0003571556287033979,
      "loss": 2.8883,
      "step": 101143
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8574483394622803,
      "learning_rate": 0.00035715161306047207,
      "loss": 2.8237,
      "step": 101144
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9380826950073242,
      "learning_rate": 0.00035714759740692074,
      "loss": 2.9207,
      "step": 101145
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.2012693881988525,
      "learning_rate": 0.00035714358174274445,
      "loss": 2.9964,
      "step": 101146
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6775342226028442,
      "learning_rate": 0.00035713956606794417,
      "loss": 2.9509,
      "step": 101147
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8663281202316284,
      "learning_rate": 0.00035713555038252066,
      "loss": 3.2128,
      "step": 101148
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6932040452957153,
      "learning_rate": 0.00035713153468647446,
      "loss": 3.0002,
      "step": 101149
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8384730815887451,
      "learning_rate": 0.00035712751897980645,
      "loss": 3.0039,
      "step": 101150
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7597384452819824,
      "learning_rate": 0.0003571235032625174,
      "loss": 3.1629,
      "step": 101151
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.440389633178711,
      "learning_rate": 0.00035711948753460793,
      "loss": 3.2224,
      "step": 101152
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9619371891021729,
      "learning_rate": 0.0003571154717960789,
      "loss": 2.9426,
      "step": 101153
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.871218204498291,
      "learning_rate": 0.0003571114560469311,
      "loss": 3.0614,
      "step": 101154
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.5320327281951904,
      "learning_rate": 0.00035710744028716513,
      "loss": 3.0656,
      "step": 101155
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.240170955657959,
      "learning_rate": 0.0003571034245167818,
      "loss": 3.1658,
      "step": 101156
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.2419188022613525,
      "learning_rate": 0.000357099408735782,
      "loss": 2.9094,
      "step": 101157
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1546812057495117,
      "learning_rate": 0.0003570953929441661,
      "loss": 2.8759,
      "step": 101158
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0028417110443115,
      "learning_rate": 0.0003570913771419352,
      "loss": 2.7602,
      "step": 101159
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8065475225448608,
      "learning_rate": 0.00035708736132909004,
      "loss": 3.0806,
      "step": 101160
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9354168176651,
      "learning_rate": 0.0003570833455056311,
      "loss": 3.1165,
      "step": 101161
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.327826976776123,
      "learning_rate": 0.00035707932967155937,
      "loss": 2.8853,
      "step": 101162
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.998900055885315,
      "learning_rate": 0.0003570753138268756,
      "loss": 2.8694,
      "step": 101163
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9027400016784668,
      "learning_rate": 0.0003570712979715803,
      "loss": 2.8431,
      "step": 101164
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9615564346313477,
      "learning_rate": 0.0003570672821056744,
      "loss": 3.1312,
      "step": 101165
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9161466360092163,
      "learning_rate": 0.0003570632662291587,
      "loss": 3.173,
      "step": 101166
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.353494644165039,
      "learning_rate": 0.0003570592503420338,
      "loss": 3.1396,
      "step": 101167
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7951600551605225,
      "learning_rate": 0.0003570552344443005,
      "loss": 2.8487,
      "step": 101168
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8120735883712769,
      "learning_rate": 0.0003570512185359596,
      "loss": 2.8255,
      "step": 101169
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8972129821777344,
      "learning_rate": 0.00035704720261701175,
      "loss": 2.9184,
      "step": 101170
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.6314778327941895,
      "learning_rate": 0.00035704318668745773,
      "loss": 3.0455,
      "step": 101171
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.074450731277466,
      "learning_rate": 0.00035703917074729843,
      "loss": 3.0374,
      "step": 101172
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5633573532104492,
      "learning_rate": 0.0003570351547965344,
      "loss": 2.9109,
      "step": 101173
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.941236734390259,
      "learning_rate": 0.0003570311388351664,
      "loss": 3.0789,
      "step": 101174
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9938551187515259,
      "learning_rate": 0.0003570271228631953,
      "loss": 2.9887,
      "step": 101175
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8513669967651367,
      "learning_rate": 0.00035702310688062174,
      "loss": 2.9315,
      "step": 101176
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6477423906326294,
      "learning_rate": 0.0003570190908874465,
      "loss": 3.0127,
      "step": 101177
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7843321561813354,
      "learning_rate": 0.00035701507488367036,
      "loss": 2.9745,
      "step": 101178
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7451685667037964,
      "learning_rate": 0.00035701105886929406,
      "loss": 3.0596,
      "step": 101179
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.2915358543395996,
      "learning_rate": 0.00035700704284431827,
      "loss": 2.861,
      "step": 101180
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8920618295669556,
      "learning_rate": 0.0003570030268087439,
      "loss": 2.9781,
      "step": 101181
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6997851133346558,
      "learning_rate": 0.00035699901076257154,
      "loss": 3.0048,
      "step": 101182
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.276449203491211,
      "learning_rate": 0.0003569949947058019,
      "loss": 2.9714,
      "step": 101183
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9845234155654907,
      "learning_rate": 0.000356990978638436,
      "loss": 3.3109,
      "step": 101184
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.170628547668457,
      "learning_rate": 0.00035698696256047425,
      "loss": 2.994,
      "step": 101185
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8631831407546997,
      "learning_rate": 0.00035698294647191754,
      "loss": 3.0279,
      "step": 101186
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.4167964458465576,
      "learning_rate": 0.00035697893037276677,
      "loss": 2.9768,
      "step": 101187
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.039868116378784,
      "learning_rate": 0.0003569749142630224,
      "loss": 2.69,
      "step": 101188
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.6388704776763916,
      "learning_rate": 0.0003569708981426853,
      "loss": 3.2903,
      "step": 101189
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.703879952430725,
      "learning_rate": 0.0003569668820117563,
      "loss": 2.8044,
      "step": 101190
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.254091262817383,
      "learning_rate": 0.00035696286587023614,
      "loss": 2.7402,
      "step": 101191
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9862077236175537,
      "learning_rate": 0.0003569588497181254,
      "loss": 2.8417,
      "step": 101192
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8250998258590698,
      "learning_rate": 0.000356954833555425,
      "loss": 3.0063,
      "step": 101193
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.152115821838379,
      "learning_rate": 0.0003569508173821356,
      "loss": 2.9694,
      "step": 101194
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9618325233459473,
      "learning_rate": 0.000356946801198258,
      "loss": 2.907,
      "step": 101195
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3498315811157227,
      "learning_rate": 0.0003569427850037929,
      "loss": 3.0177,
      "step": 101196
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7743183374404907,
      "learning_rate": 0.00035693876879874104,
      "loss": 2.9808,
      "step": 101197
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9251277446746826,
      "learning_rate": 0.00035693475258310314,
      "loss": 3.0733,
      "step": 101198
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7767589092254639,
      "learning_rate": 0.00035693073635688,
      "loss": 2.7441,
      "step": 101199
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.107025146484375,
      "learning_rate": 0.0003569267201200725,
      "loss": 2.97,
      "step": 101200
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.910252332687378,
      "learning_rate": 0.0003569227038726812,
      "loss": 2.9803,
      "step": 101201
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7386059761047363,
      "learning_rate": 0.0003569186876147068,
      "loss": 3.2061,
      "step": 101202
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.919395089149475,
      "learning_rate": 0.00035691467134615017,
      "loss": 3.1219,
      "step": 101203
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0100021362304688,
      "learning_rate": 0.00035691065506701213,
      "loss": 3.0512,
      "step": 101204
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8013966083526611,
      "learning_rate": 0.0003569066387772932,
      "loss": 3.0041,
      "step": 101205
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6370110511779785,
      "learning_rate": 0.00035690262247699424,
      "loss": 2.8373,
      "step": 101206
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9970613718032837,
      "learning_rate": 0.0003568986061661161,
      "loss": 2.9345,
      "step": 101207
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.180638313293457,
      "learning_rate": 0.00035689458984465946,
      "loss": 3.1707,
      "step": 101208
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9498087167739868,
      "learning_rate": 0.00035689057351262493,
      "loss": 3.0378,
      "step": 101209
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3934600353240967,
      "learning_rate": 0.0003568865571700134,
      "loss": 2.9407,
      "step": 101210
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7915977239608765,
      "learning_rate": 0.0003568825408168257,
      "loss": 3.1389,
      "step": 101211
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.378370523452759,
      "learning_rate": 0.00035687852445306234,
      "loss": 2.933,
      "step": 101212
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.123380661010742,
      "learning_rate": 0.00035687450807872414,
      "loss": 3.2124,
      "step": 101213
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.2657899856567383,
      "learning_rate": 0.000356870491693812,
      "loss": 3.0464,
      "step": 101214
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5933839082717896,
      "learning_rate": 0.0003568664752983266,
      "loss": 3.0107,
      "step": 101215
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.0588443279266357,
      "learning_rate": 0.0003568624588922686,
      "loss": 3.0732,
      "step": 101216
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.627870559692383,
      "learning_rate": 0.0003568584424756387,
      "loss": 2.9429,
      "step": 101217
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.316617965698242,
      "learning_rate": 0.0003568544260484379,
      "loss": 3.0364,
      "step": 101218
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.07951021194458,
      "learning_rate": 0.0003568504096106667,
      "loss": 3.2377,
      "step": 101219
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.2114222049713135,
      "learning_rate": 0.0003568463931623259,
      "loss": 3.2204,
      "step": 101220
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8279368877410889,
      "learning_rate": 0.00035684237670341635,
      "loss": 2.9567,
      "step": 101221
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.517452597618103,
      "learning_rate": 0.0003568383602339387,
      "loss": 3.1965,
      "step": 101222
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.077282190322876,
      "learning_rate": 0.0003568343437538937,
      "loss": 3.0608,
      "step": 101223
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.578505039215088,
      "learning_rate": 0.00035683032726328227,
      "loss": 3.0323,
      "step": 101224
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.75621497631073,
      "learning_rate": 0.0003568263107621049,
      "loss": 2.8372,
      "step": 101225
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8687492609024048,
      "learning_rate": 0.0003568222942503623,
      "loss": 2.9557,
      "step": 101226
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0697379112243652,
      "learning_rate": 0.00035681827772805557,
      "loss": 2.909,
      "step": 101227
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9098613262176514,
      "learning_rate": 0.0003568142611951852,
      "loss": 3.2233,
      "step": 101228
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.157383441925049,
      "learning_rate": 0.000356810244651752,
      "loss": 2.918,
      "step": 101229
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1061246395111084,
      "learning_rate": 0.0003568062280977567,
      "loss": 2.8361,
      "step": 101230
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3672211170196533,
      "learning_rate": 0.0003568022115332,
      "loss": 2.8191,
      "step": 101231
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8100368976593018,
      "learning_rate": 0.0003567981949580827,
      "loss": 3.0753,
      "step": 101232
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.86533784866333,
      "learning_rate": 0.00035679417837240556,
      "loss": 3.1596,
      "step": 101233
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.340115547180176,
      "learning_rate": 0.0003567901617761694,
      "loss": 2.9677,
      "step": 101234
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.364384889602661,
      "learning_rate": 0.0003567861451693747,
      "loss": 2.9346,
      "step": 101235
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7099945545196533,
      "learning_rate": 0.00035678212855202253,
      "loss": 3.0345,
      "step": 101236
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0507426261901855,
      "learning_rate": 0.0003567781119241134,
      "loss": 3.1598,
      "step": 101237
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.3248767852783203,
      "learning_rate": 0.00035677409528564816,
      "loss": 2.8044,
      "step": 101238
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.656746745109558,
      "learning_rate": 0.00035677007863662766,
      "loss": 3.0873,
      "step": 101239
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6815590858459473,
      "learning_rate": 0.0003567660619770524,
      "loss": 3.2187,
      "step": 101240
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.102667808532715,
      "learning_rate": 0.00035676204530692325,
      "loss": 2.6212,
      "step": 101241
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.771876096725464,
      "learning_rate": 0.00035675802862624107,
      "loss": 3.1338,
      "step": 101242
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8936270475387573,
      "learning_rate": 0.00035675401193500646,
      "loss": 3.0037,
      "step": 101243
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1899573802948,
      "learning_rate": 0.0003567499952332201,
      "loss": 3.0322,
      "step": 101244
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.2881107330322266,
      "learning_rate": 0.00035674597852088303,
      "loss": 3.1271,
      "step": 101245
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.025871753692627,
      "learning_rate": 0.0003567419617979957,
      "loss": 2.9797,
      "step": 101246
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0017731189727783,
      "learning_rate": 0.000356737945064559,
      "loss": 3.0587,
      "step": 101247
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8141562938690186,
      "learning_rate": 0.0003567339283205736,
      "loss": 3.1017,
      "step": 101248
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.6579718589782715,
      "learning_rate": 0.00035672991156604034,
      "loss": 3.0555,
      "step": 101249
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7171651124954224,
      "learning_rate": 0.0003567258948009599,
      "loss": 2.8854,
      "step": 101250
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.040778160095215,
      "learning_rate": 0.0003567218780253331,
      "loss": 3.3349,
      "step": 101251
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5791491270065308,
      "learning_rate": 0.00035671786123916055,
      "loss": 2.9949,
      "step": 101252
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5572737455368042,
      "learning_rate": 0.00035671384444244304,
      "loss": 3.0855,
      "step": 101253
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.084897756576538,
      "learning_rate": 0.0003567098276351815,
      "loss": 2.5724,
      "step": 101254
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.13979172706604,
      "learning_rate": 0.0003567058108173765,
      "loss": 3.0098,
      "step": 101255
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.4884700775146484,
      "learning_rate": 0.00035670179398902867,
      "loss": 2.8834,
      "step": 101256
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1350903511047363,
      "learning_rate": 0.000356697777150139,
      "loss": 2.9411,
      "step": 101257
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5899484157562256,
      "learning_rate": 0.0003566937603007082,
      "loss": 3.0335,
      "step": 101258
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6913154125213623,
      "learning_rate": 0.0003566897434407369,
      "loss": 3.0936,
      "step": 101259
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7684311866760254,
      "learning_rate": 0.0003566857265702259,
      "loss": 3.0673,
      "step": 101260
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.12473726272583,
      "learning_rate": 0.00035668170968917605,
      "loss": 2.948,
      "step": 101261
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.963399887084961,
      "learning_rate": 0.00035667769279758787,
      "loss": 2.9347,
      "step": 101262
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.131167411804199,
      "learning_rate": 0.00035667367589546227,
      "loss": 2.9259,
      "step": 101263
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.82904314994812,
      "learning_rate": 0.0003566696589828,
      "loss": 3.2828,
      "step": 101264
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7449177503585815,
      "learning_rate": 0.00035666564205960176,
      "loss": 3.0118,
      "step": 101265
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8664894104003906,
      "learning_rate": 0.00035666162512586826,
      "loss": 3.1411,
      "step": 101266
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8883686065673828,
      "learning_rate": 0.0003566576081816004,
      "loss": 3.2872,
      "step": 101267
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.685260534286499,
      "learning_rate": 0.0003566535912267987,
      "loss": 2.9545,
      "step": 101268
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.596024990081787,
      "learning_rate": 0.0003566495742614641,
      "loss": 3.1155,
      "step": 101269
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.735848307609558,
      "learning_rate": 0.0003566455572855973,
      "loss": 3.1127,
      "step": 101270
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.070918560028076,
      "learning_rate": 0.000356641540299199,
      "loss": 2.9528,
      "step": 101271
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0947701930999756,
      "learning_rate": 0.0003566375233022699,
      "loss": 3.0554,
      "step": 101272
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8872172832489014,
      "learning_rate": 0.0003566335062948109,
      "loss": 2.7072,
      "step": 101273
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.4629697799682617,
      "learning_rate": 0.00035662948927682263,
      "loss": 2.7312,
      "step": 101274
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7976086139678955,
      "learning_rate": 0.00035662547224830584,
      "loss": 2.8584,
      "step": 101275
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6332134008407593,
      "learning_rate": 0.00035662145520926134,
      "loss": 3.1041,
      "step": 101276
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7655754089355469,
      "learning_rate": 0.00035661743815968984,
      "loss": 2.92,
      "step": 101277
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3325870037078857,
      "learning_rate": 0.0003566134210995921,
      "loss": 3.0831,
      "step": 101278
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0143539905548096,
      "learning_rate": 0.0003566094040289689,
      "loss": 3.0337,
      "step": 101279
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5315877199172974,
      "learning_rate": 0.0003566053869478208,
      "loss": 2.9047,
      "step": 101280
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5884578227996826,
      "learning_rate": 0.00035660136985614876,
      "loss": 2.9728,
      "step": 101281
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1711387634277344,
      "learning_rate": 0.00035659735275395354,
      "loss": 2.968,
      "step": 101282
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.501112937927246,
      "learning_rate": 0.00035659333564123566,
      "loss": 2.8882,
      "step": 101283
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.666907787322998,
      "learning_rate": 0.0003565893185179961,
      "loss": 3.1375,
      "step": 101284
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9149960279464722,
      "learning_rate": 0.0003565853013842356,
      "loss": 2.9144,
      "step": 101285
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.794757604598999,
      "learning_rate": 0.0003565812842399546,
      "loss": 3.0131,
      "step": 101286
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7280632257461548,
      "learning_rate": 0.00035657726708515424,
      "loss": 3.0218,
      "step": 101287
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8412882089614868,
      "learning_rate": 0.00035657324991983505,
      "loss": 3.0386,
      "step": 101288
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8490206003189087,
      "learning_rate": 0.00035656923274399784,
      "loss": 2.9457,
      "step": 101289
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6588351726531982,
      "learning_rate": 0.0003565652155576434,
      "loss": 3.0998,
      "step": 101290
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6840957403182983,
      "learning_rate": 0.0003565611983607723,
      "loss": 2.8616,
      "step": 101291
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1672139167785645,
      "learning_rate": 0.0003565571811533855,
      "loss": 2.6576,
      "step": 101292
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9084316492080688,
      "learning_rate": 0.00035655316393548364,
      "loss": 2.9385,
      "step": 101293
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8372037410736084,
      "learning_rate": 0.00035654914670706734,
      "loss": 3.1158,
      "step": 101294
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7127894163131714,
      "learning_rate": 0.0003565451294681376,
      "loss": 2.8555,
      "step": 101295
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6638420820236206,
      "learning_rate": 0.0003565411122186951,
      "loss": 2.8974,
      "step": 101296
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7570464611053467,
      "learning_rate": 0.00035653709495874045,
      "loss": 3.0623,
      "step": 101297
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.812310814857483,
      "learning_rate": 0.0003565330776882745,
      "loss": 2.8475,
      "step": 101298
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0148937702178955,
      "learning_rate": 0.0003565290604072981,
      "loss": 2.9272,
      "step": 101299
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.856601357460022,
      "learning_rate": 0.00035652504311581176,
      "loss": 3.1541,
      "step": 101300
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.126487970352173,
      "learning_rate": 0.00035652102581381636,
      "loss": 3.0478,
      "step": 101301
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8448907136917114,
      "learning_rate": 0.00035651700850131265,
      "loss": 3.0971,
      "step": 101302
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7206634283065796,
      "learning_rate": 0.0003565129911783014,
      "loss": 3.1616,
      "step": 101303
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.32926607131958,
      "learning_rate": 0.00035650897384478325,
      "loss": 2.974,
      "step": 101304
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.930844783782959,
      "learning_rate": 0.000356504956500759,
      "loss": 2.9728,
      "step": 101305
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.278557062149048,
      "learning_rate": 0.0003565009391462295,
      "loss": 2.8333,
      "step": 101306
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7417672872543335,
      "learning_rate": 0.00035649692178119536,
      "loss": 2.9528,
      "step": 101307
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.442298650741577,
      "learning_rate": 0.0003564929044056574,
      "loss": 2.9802,
      "step": 101308
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.2466306686401367,
      "learning_rate": 0.00035648888701961636,
      "loss": 2.9305,
      "step": 101309
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.839860439300537,
      "learning_rate": 0.00035648486962307294,
      "loss": 2.8642,
      "step": 101310
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0842604637145996,
      "learning_rate": 0.00035648085221602784,
      "loss": 3.1606,
      "step": 101311
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6104042530059814,
      "learning_rate": 0.000356476834798482,
      "loss": 3.0093,
      "step": 101312
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.643048048019409,
      "learning_rate": 0.000356472817370436,
      "loss": 3.1246,
      "step": 101313
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8672285079956055,
      "learning_rate": 0.0003564687999318906,
      "loss": 3.3711,
      "step": 101314
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0346806049346924,
      "learning_rate": 0.0003564647824828467,
      "loss": 2.8287,
      "step": 101315
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8778893947601318,
      "learning_rate": 0.00035646076502330486,
      "loss": 3.0679,
      "step": 101316
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8019776344299316,
      "learning_rate": 0.0003564567475532659,
      "loss": 2.9672,
      "step": 101317
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0352818965911865,
      "learning_rate": 0.00035645273007273056,
      "loss": 2.9649,
      "step": 101318
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.045363187789917,
      "learning_rate": 0.0003564487125816996,
      "loss": 2.9226,
      "step": 101319
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.711814522743225,
      "learning_rate": 0.00035644469508017373,
      "loss": 2.9979,
      "step": 101320
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9886771440505981,
      "learning_rate": 0.00035644067756815373,
      "loss": 3.1417,
      "step": 101321
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5728096961975098,
      "learning_rate": 0.0003564366600456404,
      "loss": 3.0309,
      "step": 101322
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.714128851890564,
      "learning_rate": 0.0003564326425126343,
      "loss": 2.9039,
      "step": 101323
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6279382705688477,
      "learning_rate": 0.00035642862496913644,
      "loss": 2.7582,
      "step": 101324
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7306830883026123,
      "learning_rate": 0.0003564246074151474,
      "loss": 2.903,
      "step": 101325
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.833153009414673,
      "learning_rate": 0.0003564205898506679,
      "loss": 3.2074,
      "step": 101326
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.5613014698028564,
      "learning_rate": 0.0003564165722756988,
      "loss": 3.0856,
      "step": 101327
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9739795923233032,
      "learning_rate": 0.00035641255469024086,
      "loss": 3.0675,
      "step": 101328
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8713103532791138,
      "learning_rate": 0.00035640853709429466,
      "loss": 2.9639,
      "step": 101329
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5581790208816528,
      "learning_rate": 0.00035640451948786104,
      "loss": 3.0893,
      "step": 101330
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.30515456199646,
      "learning_rate": 0.0003564005018709409,
      "loss": 2.9067,
      "step": 101331
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7774475812911987,
      "learning_rate": 0.0003563964842435347,
      "loss": 3.0767,
      "step": 101332
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1518378257751465,
      "learning_rate": 0.00035639246660564336,
      "loss": 3.268,
      "step": 101333
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.008107900619507,
      "learning_rate": 0.00035638844895726765,
      "loss": 3.4961,
      "step": 101334
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.089447259902954,
      "learning_rate": 0.0003563844312984082,
      "loss": 3.0291,
      "step": 101335
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7943145036697388,
      "learning_rate": 0.00035638041362906584,
      "loss": 2.8684,
      "step": 101336
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6642833948135376,
      "learning_rate": 0.00035637639594924127,
      "loss": 2.9623,
      "step": 101337
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7110403776168823,
      "learning_rate": 0.0003563723782589353,
      "loss": 3.2421,
      "step": 101338
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.800267457962036,
      "learning_rate": 0.00035636836055814863,
      "loss": 3.188,
      "step": 101339
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7973110675811768,
      "learning_rate": 0.000356364342846882,
      "loss": 2.9075,
      "step": 101340
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.234022855758667,
      "learning_rate": 0.0003563603251251362,
      "loss": 2.7474,
      "step": 101341
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.4655983448028564,
      "learning_rate": 0.0003563563073929119,
      "loss": 2.8962,
      "step": 101342
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1847915649414062,
      "learning_rate": 0.00035635228965021,
      "loss": 2.6109,
      "step": 101343
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.793607473373413,
      "learning_rate": 0.000356348271897031,
      "loss": 3.0625,
      "step": 101344
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.787893056869507,
      "learning_rate": 0.00035634425413337594,
      "loss": 3.0014,
      "step": 101345
    },
    {
      "epoch": 1.32,
      "grad_norm": 4.993644714355469,
      "learning_rate": 0.0003563402363592454,
      "loss": 2.8961,
      "step": 101346
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.174633502960205,
      "learning_rate": 0.00035633621857464006,
      "loss": 3.0642,
      "step": 101347
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.7098135948181152,
      "learning_rate": 0.00035633220077956084,
      "loss": 3.0349,
      "step": 101348
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1750848293304443,
      "learning_rate": 0.00035632818297400833,
      "loss": 2.9705,
      "step": 101349
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.4826961755752563,
      "learning_rate": 0.00035632416515798336,
      "loss": 2.8822,
      "step": 101350
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9015746116638184,
      "learning_rate": 0.0003563201473314866,
      "loss": 3.0264,
      "step": 101351
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7715529203414917,
      "learning_rate": 0.000356316129494519,
      "loss": 3.2047,
      "step": 101352
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9767918586730957,
      "learning_rate": 0.00035631211164708113,
      "loss": 3.1852,
      "step": 101353
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.9374258518218994,
      "learning_rate": 0.00035630809378917375,
      "loss": 2.9687,
      "step": 101354
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.52929425239563,
      "learning_rate": 0.0003563040759207977,
      "loss": 2.9386,
      "step": 101355
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.698650360107422,
      "learning_rate": 0.00035630005804195355,
      "loss": 2.9574,
      "step": 101356
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.755195379257202,
      "learning_rate": 0.0003562960401526422,
      "loss": 3.2104,
      "step": 101357
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0586941242218018,
      "learning_rate": 0.0003562920222528644,
      "loss": 3.1277,
      "step": 101358
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6738417148590088,
      "learning_rate": 0.0003562880043426208,
      "loss": 2.9864,
      "step": 101359
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.456926703453064,
      "learning_rate": 0.0003562839864219122,
      "loss": 2.6619,
      "step": 101360
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.641661524772644,
      "learning_rate": 0.00035627996849073936,
      "loss": 3.0366,
      "step": 101361
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7856872081756592,
      "learning_rate": 0.00035627595054910305,
      "loss": 3.025,
      "step": 101362
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.901788592338562,
      "learning_rate": 0.00035627193259700387,
      "loss": 2.9684,
      "step": 101363
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7331020832061768,
      "learning_rate": 0.0003562679146344428,
      "loss": 3.0129,
      "step": 101364
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8056344985961914,
      "learning_rate": 0.00035626389666142035,
      "loss": 2.9941,
      "step": 101365
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.562537431716919,
      "learning_rate": 0.00035625987867793737,
      "loss": 2.9265,
      "step": 101366
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7405991554260254,
      "learning_rate": 0.00035625586068399476,
      "loss": 2.9896,
      "step": 101367
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.2671990394592285,
      "learning_rate": 0.000356251842679593,
      "loss": 2.6494,
      "step": 101368
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8274176120758057,
      "learning_rate": 0.000356247824664733,
      "loss": 2.9996,
      "step": 101369
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7993625402450562,
      "learning_rate": 0.00035624380663941554,
      "loss": 2.9721,
      "step": 101370
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6485806703567505,
      "learning_rate": 0.00035623978860364116,
      "loss": 3.181,
      "step": 101371
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.6373376846313477,
      "learning_rate": 0.00035623577055741084,
      "loss": 3.0067,
      "step": 101372
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.6931095123291016,
      "learning_rate": 0.0003562317525007252,
      "loss": 2.9682,
      "step": 101373
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6492711305618286,
      "learning_rate": 0.00035622773443358494,
      "loss": 3.0689,
      "step": 101374
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.7246627807617188,
      "learning_rate": 0.000356223716355991,
      "loss": 2.9251,
      "step": 101375
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.010704278945923,
      "learning_rate": 0.0003562196982679439,
      "loss": 2.9042,
      "step": 101376
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6202481985092163,
      "learning_rate": 0.00035621568016944464,
      "loss": 2.9938,
      "step": 101377
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.9586446285247803,
      "learning_rate": 0.00035621166206049366,
      "loss": 3.0768,
      "step": 101378
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.009112596511841,
      "learning_rate": 0.00035620764394109195,
      "loss": 3.1749,
      "step": 101379
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.270596981048584,
      "learning_rate": 0.0003562036258112402,
      "loss": 2.8766,
      "step": 101380
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8831465244293213,
      "learning_rate": 0.00035619960767093914,
      "loss": 3.0345,
      "step": 101381
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7044222354888916,
      "learning_rate": 0.00035619558952018946,
      "loss": 3.1613,
      "step": 101382
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.5746049880981445,
      "learning_rate": 0.00035619157135899197,
      "loss": 2.9354,
      "step": 101383
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.5520894527435303,
      "learning_rate": 0.0003561875531873475,
      "loss": 2.9253,
      "step": 101384
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1422030925750732,
      "learning_rate": 0.00035618353500525656,
      "loss": 3.1089,
      "step": 101385
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7382522821426392,
      "learning_rate": 0.00035617951681272004,
      "loss": 2.8986,
      "step": 101386
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1785833835601807,
      "learning_rate": 0.00035617549860973875,
      "loss": 2.6005,
      "step": 101387
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.014190435409546,
      "learning_rate": 0.00035617148039631346,
      "loss": 3.0273,
      "step": 101388
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.510059356689453,
      "learning_rate": 0.00035616746217244474,
      "loss": 2.9801,
      "step": 101389
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6132866144180298,
      "learning_rate": 0.0003561634439381334,
      "loss": 2.8938,
      "step": 101390
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.6212239265441895,
      "learning_rate": 0.00035615942569338033,
      "loss": 3.0918,
      "step": 101391
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.2440433502197266,
      "learning_rate": 0.000356155407438186,
      "loss": 2.7369,
      "step": 101392
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.72873592376709,
      "learning_rate": 0.0003561513891725514,
      "loss": 3.0621,
      "step": 101393
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9889475107192993,
      "learning_rate": 0.00035614737089647723,
      "loss": 3.1994,
      "step": 101394
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.7553210258483887,
      "learning_rate": 0.0003561433526099642,
      "loss": 3.0958,
      "step": 101395
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3266658782958984,
      "learning_rate": 0.000356139334313013,
      "loss": 3.2823,
      "step": 101396
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9696577787399292,
      "learning_rate": 0.0003561353160056245,
      "loss": 2.7794,
      "step": 101397
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.651490330696106,
      "learning_rate": 0.0003561312976877993,
      "loss": 2.9208,
      "step": 101398
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9727554321289062,
      "learning_rate": 0.00035612727935953835,
      "loss": 3.0966,
      "step": 101399
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.7881019115448,
      "learning_rate": 0.0003561232610208422,
      "loss": 3.1709,
      "step": 101400
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9907288551330566,
      "learning_rate": 0.00035611924267171173,
      "loss": 3.2091,
      "step": 101401
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7998210191726685,
      "learning_rate": 0.0003561152243121476,
      "loss": 2.7906,
      "step": 101402
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.26832914352417,
      "learning_rate": 0.00035611120594215053,
      "loss": 2.934,
      "step": 101403
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.061903953552246,
      "learning_rate": 0.00035610718756172145,
      "loss": 2.9271,
      "step": 101404
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7182276248931885,
      "learning_rate": 0.00035610316917086094,
      "loss": 3.1124,
      "step": 101405
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.243206262588501,
      "learning_rate": 0.00035609915076956976,
      "loss": 2.9963,
      "step": 101406
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0758965015411377,
      "learning_rate": 0.0003560951323578487,
      "loss": 2.8534,
      "step": 101407
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.784002661705017,
      "learning_rate": 0.00035609111393569844,
      "loss": 3.162,
      "step": 101408
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9334213733673096,
      "learning_rate": 0.0003560870955031199,
      "loss": 2.7434,
      "step": 101409
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0234012603759766,
      "learning_rate": 0.0003560830770601137,
      "loss": 2.7612,
      "step": 101410
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7196009159088135,
      "learning_rate": 0.00035607905860668047,
      "loss": 3.0492,
      "step": 101411
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8252661228179932,
      "learning_rate": 0.0003560750401428212,
      "loss": 3.1824,
      "step": 101412
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7931522130966187,
      "learning_rate": 0.00035607102166853655,
      "loss": 2.9428,
      "step": 101413
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8686264753341675,
      "learning_rate": 0.0003560670031838271,
      "loss": 3.2836,
      "step": 101414
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.667502760887146,
      "learning_rate": 0.00035606298468869384,
      "loss": 3.1623,
      "step": 101415
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.125239849090576,
      "learning_rate": 0.00035605896618313743,
      "loss": 3.0364,
      "step": 101416
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9445486068725586,
      "learning_rate": 0.0003560549476671585,
      "loss": 3.1777,
      "step": 101417
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0241057872772217,
      "learning_rate": 0.00035605092914075794,
      "loss": 3.213,
      "step": 101418
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7125531435012817,
      "learning_rate": 0.0003560469106039365,
      "loss": 3.1745,
      "step": 101419
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.205573797225952,
      "learning_rate": 0.00035604289205669483,
      "loss": 2.9754,
      "step": 101420
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.137594699859619,
      "learning_rate": 0.0003560388734990338,
      "loss": 2.8182,
      "step": 101421
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.2248759269714355,
      "learning_rate": 0.0003560348549309541,
      "loss": 3.2714,
      "step": 101422
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.240354537963867,
      "learning_rate": 0.0003560308363524563,
      "loss": 3.055,
      "step": 101423
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8963954448699951,
      "learning_rate": 0.0003560268177635414,
      "loss": 3.1695,
      "step": 101424
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.852012276649475,
      "learning_rate": 0.00035602279916421007,
      "loss": 2.932,
      "step": 101425
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7809700965881348,
      "learning_rate": 0.00035601878055446307,
      "loss": 3.1258,
      "step": 101426
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6570115089416504,
      "learning_rate": 0.000356014761934301,
      "loss": 2.9484,
      "step": 101427
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8807893991470337,
      "learning_rate": 0.0003560107433037249,
      "loss": 3.1724,
      "step": 101428
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3545849323272705,
      "learning_rate": 0.0003560067246627353,
      "loss": 2.8881,
      "step": 101429
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0019145011901855,
      "learning_rate": 0.00035600270601133285,
      "loss": 2.7989,
      "step": 101430
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.787086844444275,
      "learning_rate": 0.0003559986873495186,
      "loss": 3.0504,
      "step": 101431
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.078056812286377,
      "learning_rate": 0.000355994668677293,
      "loss": 3.1612,
      "step": 101432
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8217241764068604,
      "learning_rate": 0.00035599064999465704,
      "loss": 2.8893,
      "step": 101433
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0983364582061768,
      "learning_rate": 0.00035598663130161133,
      "loss": 3.0206,
      "step": 101434
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3111014366149902,
      "learning_rate": 0.0003559826125981567,
      "loss": 2.8724,
      "step": 101435
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7268764972686768,
      "learning_rate": 0.0003559785938842938,
      "loss": 2.866,
      "step": 101436
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.681133985519409,
      "learning_rate": 0.00035597457516002345,
      "loss": 2.6671,
      "step": 101437
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.482229709625244,
      "learning_rate": 0.00035597055642534633,
      "loss": 2.8179,
      "step": 101438
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9024913311004639,
      "learning_rate": 0.0003559665376802632,
      "loss": 3.1067,
      "step": 101439
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.5322670936584473,
      "learning_rate": 0.0003559625189247749,
      "loss": 3.1359,
      "step": 101440
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.121250867843628,
      "learning_rate": 0.0003559585001588821,
      "loss": 3.2217,
      "step": 101441
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.2018768787384033,
      "learning_rate": 0.0003559544813825855,
      "loss": 3.1336,
      "step": 101442
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3012454509735107,
      "learning_rate": 0.000355950462595886,
      "loss": 3.0231,
      "step": 101443
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.36620831489563,
      "learning_rate": 0.0003559464437987842,
      "loss": 2.9977,
      "step": 101444
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.8237268924713135,
      "learning_rate": 0.00035594242499128087,
      "loss": 3.3151,
      "step": 101445
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.6641058921813965,
      "learning_rate": 0.00035593840617337687,
      "loss": 3.1004,
      "step": 101446
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0485191345214844,
      "learning_rate": 0.00035593438734507276,
      "loss": 2.9493,
      "step": 101447
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.21869158744812,
      "learning_rate": 0.0003559303685063694,
      "loss": 2.9622,
      "step": 101448
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.3131091594696045,
      "learning_rate": 0.00035592634965726767,
      "loss": 2.8681,
      "step": 101449
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.238294839859009,
      "learning_rate": 0.0003559223307977681,
      "loss": 3.0603,
      "step": 101450
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7483991384506226,
      "learning_rate": 0.00035591831192787143,
      "loss": 2.8502,
      "step": 101451
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8683288097381592,
      "learning_rate": 0.00035591429304757864,
      "loss": 3.1125,
      "step": 101452
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3205361366271973,
      "learning_rate": 0.0003559102741568902,
      "loss": 2.8572,
      "step": 101453
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.516490936279297,
      "learning_rate": 0.000355906255255807,
      "loss": 2.8039,
      "step": 101454
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.787243366241455,
      "learning_rate": 0.0003559022363443298,
      "loss": 2.9315,
      "step": 101455
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.414140462875366,
      "learning_rate": 0.0003558982174224593,
      "loss": 2.959,
      "step": 101456
    },
    {
      "epoch": 1.32,
      "grad_norm": 4.220071792602539,
      "learning_rate": 0.00035589419849019633,
      "loss": 2.9445,
      "step": 101457
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.004103660583496,
      "learning_rate": 0.0003558901795475414,
      "loss": 2.9495,
      "step": 101458
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7371764183044434,
      "learning_rate": 0.0003558861605944957,
      "loss": 2.8317,
      "step": 101459
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.659350037574768,
      "learning_rate": 0.0003558821416310595,
      "loss": 3.0096,
      "step": 101460
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.4901926517486572,
      "learning_rate": 0.00035587812265723377,
      "loss": 3.1754,
      "step": 101461
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6121249198913574,
      "learning_rate": 0.00035587410367301934,
      "loss": 3.2071,
      "step": 101462
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.2291088104248047,
      "learning_rate": 0.00035587008467841683,
      "loss": 3.0561,
      "step": 101463
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.819750428199768,
      "learning_rate": 0.00035586606567342697,
      "loss": 2.8293,
      "step": 101464
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.376710891723633,
      "learning_rate": 0.00035586204665805066,
      "loss": 3.1307,
      "step": 101465
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.924485683441162,
      "learning_rate": 0.0003558580276322884,
      "loss": 3.1123,
      "step": 101466
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7997105121612549,
      "learning_rate": 0.0003558540085961411,
      "loss": 3.1335,
      "step": 101467
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3012351989746094,
      "learning_rate": 0.0003558499895496095,
      "loss": 2.8258,
      "step": 101468
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.1198129653930664,
      "learning_rate": 0.00035584597049269443,
      "loss": 2.8845,
      "step": 101469
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.11673641204834,
      "learning_rate": 0.00035584195142539644,
      "loss": 3.2536,
      "step": 101470
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6373854875564575,
      "learning_rate": 0.0003558379323477164,
      "loss": 3.1013,
      "step": 101471
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0029704570770264,
      "learning_rate": 0.0003558339132596551,
      "loss": 2.9146,
      "step": 101472
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.001959800720215,
      "learning_rate": 0.00035582989416121317,
      "loss": 2.8794,
      "step": 101473
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.75253164768219,
      "learning_rate": 0.0003558258750523913,
      "loss": 3.0443,
      "step": 101474
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.5685195922851562,
      "learning_rate": 0.00035582185593319046,
      "loss": 3.1062,
      "step": 101475
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8436126708984375,
      "learning_rate": 0.00035581783680361135,
      "loss": 2.8233,
      "step": 101476
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0176455974578857,
      "learning_rate": 0.00035581381766365447,
      "loss": 3.153,
      "step": 101477
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.284454584121704,
      "learning_rate": 0.0003558097985133209,
      "loss": 3.0535,
      "step": 101478
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7078180313110352,
      "learning_rate": 0.0003558057793526111,
      "loss": 3.2156,
      "step": 101479
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8100106716156006,
      "learning_rate": 0.00035580176018152606,
      "loss": 2.8968,
      "step": 101480
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.153102159500122,
      "learning_rate": 0.00035579774100006646,
      "loss": 2.8101,
      "step": 101481
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9891289472579956,
      "learning_rate": 0.0003557937218082329,
      "loss": 2.9678,
      "step": 101482
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8652266263961792,
      "learning_rate": 0.00035578970260602633,
      "loss": 3.1086,
      "step": 101483
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.835734248161316,
      "learning_rate": 0.0003557856833934473,
      "loss": 2.906,
      "step": 101484
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1201727390289307,
      "learning_rate": 0.0003557816641704967,
      "loss": 2.7945,
      "step": 101485
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5615644454956055,
      "learning_rate": 0.00035577764493717527,
      "loss": 3.0313,
      "step": 101486
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.380734920501709,
      "learning_rate": 0.00035577362569348366,
      "loss": 2.9123,
      "step": 101487
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.069875478744507,
      "learning_rate": 0.0003557696064394227,
      "loss": 3.2144,
      "step": 101488
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0642967224121094,
      "learning_rate": 0.0003557655871749932,
      "loss": 2.9292,
      "step": 101489
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.8468258380889893,
      "learning_rate": 0.0003557615679001957,
      "loss": 2.9563,
      "step": 101490
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8785697221755981,
      "learning_rate": 0.0003557575486150311,
      "loss": 3.0579,
      "step": 101491
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8123416900634766,
      "learning_rate": 0.0003557535293195002,
      "loss": 2.8366,
      "step": 101492
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.75962495803833,
      "learning_rate": 0.00035574951001360365,
      "loss": 2.8581,
      "step": 101493
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.724866509437561,
      "learning_rate": 0.00035574549069734216,
      "loss": 2.7955,
      "step": 101494
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.830744743347168,
      "learning_rate": 0.00035574147137071657,
      "loss": 2.7749,
      "step": 101495
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0023255348205566,
      "learning_rate": 0.0003557374520337275,
      "loss": 3.0213,
      "step": 101496
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8340240716934204,
      "learning_rate": 0.00035573343268637583,
      "loss": 3.1272,
      "step": 101497
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9541828632354736,
      "learning_rate": 0.00035572941332866236,
      "loss": 2.8629,
      "step": 101498
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.730907917022705,
      "learning_rate": 0.0003557253939605876,
      "loss": 2.8707,
      "step": 101499
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8155066967010498,
      "learning_rate": 0.00035572137458215255,
      "loss": 2.7566,
      "step": 101500
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8645778894424438,
      "learning_rate": 0.0003557173551933578,
      "loss": 3.1938,
      "step": 101501
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.060950994491577,
      "learning_rate": 0.00035571333579420414,
      "loss": 3.0122,
      "step": 101502
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.0081822872161865,
      "learning_rate": 0.00035570931638469227,
      "loss": 2.7696,
      "step": 101503
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7881078720092773,
      "learning_rate": 0.0003557052969648231,
      "loss": 3.098,
      "step": 101504
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6926181316375732,
      "learning_rate": 0.0003557012775345972,
      "loss": 3.1089,
      "step": 101505
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.490631103515625,
      "learning_rate": 0.0003556972580940154,
      "loss": 2.8929,
      "step": 101506
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6280584335327148,
      "learning_rate": 0.0003556932386430784,
      "loss": 3.0383,
      "step": 101507
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7654664516448975,
      "learning_rate": 0.00035568921918178694,
      "loss": 2.9426,
      "step": 101508
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.878625750541687,
      "learning_rate": 0.0003556851997101418,
      "loss": 3.0818,
      "step": 101509
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.078967571258545,
      "learning_rate": 0.0003556811802281438,
      "loss": 3.0387,
      "step": 101510
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6806819438934326,
      "learning_rate": 0.00035567716073579364,
      "loss": 3.0639,
      "step": 101511
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9574894905090332,
      "learning_rate": 0.00035567314123309193,
      "loss": 2.7542,
      "step": 101512
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.689193844795227,
      "learning_rate": 0.00035566912172003967,
      "loss": 3.3926,
      "step": 101513
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.760129690170288,
      "learning_rate": 0.0003556651021966374,
      "loss": 2.9502,
      "step": 101514
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.142165422439575,
      "learning_rate": 0.00035566108266288593,
      "loss": 3.2044,
      "step": 101515
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0226826667785645,
      "learning_rate": 0.0003556570631187861,
      "loss": 2.7499,
      "step": 101516
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3712263107299805,
      "learning_rate": 0.00035565304356433843,
      "loss": 3.0299,
      "step": 101517
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.104836940765381,
      "learning_rate": 0.00035564902399954387,
      "loss": 3.1094,
      "step": 101518
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8577302694320679,
      "learning_rate": 0.0003556450044244031,
      "loss": 3.1133,
      "step": 101519
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9337425231933594,
      "learning_rate": 0.0003556409848389169,
      "loss": 2.7752,
      "step": 101520
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3223836421966553,
      "learning_rate": 0.0003556369652430859,
      "loss": 3.0071,
      "step": 101521
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.6460671424865723,
      "learning_rate": 0.0003556329456369111,
      "loss": 2.6422,
      "step": 101522
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6343677043914795,
      "learning_rate": 0.000355628926020393,
      "loss": 3.1369,
      "step": 101523
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.491931438446045,
      "learning_rate": 0.00035562490639353235,
      "loss": 3.0632,
      "step": 101524
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6599069833755493,
      "learning_rate": 0.00035562088675633005,
      "loss": 3.0119,
      "step": 101525
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.6801061630249023,
      "learning_rate": 0.00035561686710878685,
      "loss": 3.0927,
      "step": 101526
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8854663372039795,
      "learning_rate": 0.0003556128474509033,
      "loss": 3.2141,
      "step": 101527
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1608188152313232,
      "learning_rate": 0.0003556088277826804,
      "loss": 2.847,
      "step": 101528
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.242631673812866,
      "learning_rate": 0.0003556048081041187,
      "loss": 2.9418,
      "step": 101529
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5689826011657715,
      "learning_rate": 0.00035560078841521893,
      "loss": 3.2433,
      "step": 101530
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5803378820419312,
      "learning_rate": 0.000355596768715982,
      "loss": 2.9994,
      "step": 101531
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.5794832706451416,
      "learning_rate": 0.00035559274900640866,
      "loss": 3.0573,
      "step": 101532
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0795400142669678,
      "learning_rate": 0.00035558872928649946,
      "loss": 3.276,
      "step": 101533
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9334971904754639,
      "learning_rate": 0.0003555847095562554,
      "loss": 3.0742,
      "step": 101534
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.95478355884552,
      "learning_rate": 0.00035558068981567694,
      "loss": 3.1325,
      "step": 101535
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.5910027027130127,
      "learning_rate": 0.0003555766700647651,
      "loss": 2.9697,
      "step": 101536
    },
    {
      "epoch": 1.32,
      "grad_norm": 4.452165126800537,
      "learning_rate": 0.0003555726503035204,
      "loss": 3.0082,
      "step": 101537
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.2987823486328125,
      "learning_rate": 0.00035556863053194383,
      "loss": 3.0519,
      "step": 101538
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.2399985790252686,
      "learning_rate": 0.0003555646107500359,
      "loss": 3.1021,
      "step": 101539
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7277536392211914,
      "learning_rate": 0.0003555605909577975,
      "loss": 3.0063,
      "step": 101540
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.88823664188385,
      "learning_rate": 0.0003555565711552293,
      "loss": 3.0415,
      "step": 101541
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.163648843765259,
      "learning_rate": 0.0003555525513423321,
      "loss": 2.9305,
      "step": 101542
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8844451904296875,
      "learning_rate": 0.00035554853151910663,
      "loss": 2.7684,
      "step": 101543
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7645496129989624,
      "learning_rate": 0.0003555445116855537,
      "loss": 3.1795,
      "step": 101544
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3931760787963867,
      "learning_rate": 0.0003555404918416739,
      "loss": 3.1645,
      "step": 101545
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7487369775772095,
      "learning_rate": 0.00035553647198746814,
      "loss": 2.9597,
      "step": 101546
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1331350803375244,
      "learning_rate": 0.0003555324521229371,
      "loss": 2.7181,
      "step": 101547
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.2197015285491943,
      "learning_rate": 0.0003555284322480815,
      "loss": 2.9338,
      "step": 101548
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7991009950637817,
      "learning_rate": 0.0003555244123629021,
      "loss": 3.1906,
      "step": 101549
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7573628425598145,
      "learning_rate": 0.00035552039246739974,
      "loss": 2.8454,
      "step": 101550
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9847608804702759,
      "learning_rate": 0.000355516372561575,
      "loss": 3.2305,
      "step": 101551
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8471527099609375,
      "learning_rate": 0.0003555123526454288,
      "loss": 3.0954,
      "step": 101552
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7100329399108887,
      "learning_rate": 0.00035550833271896174,
      "loss": 3.094,
      "step": 101553
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.389756202697754,
      "learning_rate": 0.00035550431278217465,
      "loss": 2.9275,
      "step": 101554
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5954878330230713,
      "learning_rate": 0.00035550029283506823,
      "loss": 3.1628,
      "step": 101555
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7965766191482544,
      "learning_rate": 0.0003554962728776433,
      "loss": 3.2169,
      "step": 101556
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5442982912063599,
      "learning_rate": 0.0003554922529099006,
      "loss": 3.0522,
      "step": 101557
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.4570186138153076,
      "learning_rate": 0.0003554882329318408,
      "loss": 2.6122,
      "step": 101558
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6268184185028076,
      "learning_rate": 0.00035548421294346466,
      "loss": 2.8698,
      "step": 101559
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1601390838623047,
      "learning_rate": 0.00035548019294477304,
      "loss": 3.0266,
      "step": 101560
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.858236312866211,
      "learning_rate": 0.00035547617293576653,
      "loss": 3.0964,
      "step": 101561
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7640299797058105,
      "learning_rate": 0.00035547215291644596,
      "loss": 2.9938,
      "step": 101562
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1428022384643555,
      "learning_rate": 0.00035546813288681213,
      "loss": 3.0747,
      "step": 101563
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.709348201751709,
      "learning_rate": 0.0003554641128468657,
      "loss": 3.3778,
      "step": 101564
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.268367290496826,
      "learning_rate": 0.0003554600927966074,
      "loss": 2.9142,
      "step": 101565
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0168328285217285,
      "learning_rate": 0.00035545607273603804,
      "loss": 3.1196,
      "step": 101566
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.88867449760437,
      "learning_rate": 0.0003554520526651583,
      "loss": 2.917,
      "step": 101567
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9493457078933716,
      "learning_rate": 0.0003554480325839691,
      "loss": 2.9583,
      "step": 101568
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7915526628494263,
      "learning_rate": 0.000355444012492471,
      "loss": 2.8118,
      "step": 101569
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.151181936264038,
      "learning_rate": 0.00035543999239066485,
      "loss": 2.9802,
      "step": 101570
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9653877019882202,
      "learning_rate": 0.0003554359722785513,
      "loss": 2.8109,
      "step": 101571
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8591570854187012,
      "learning_rate": 0.0003554319521561312,
      "loss": 3.149,
      "step": 101572
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.232551336288452,
      "learning_rate": 0.00035542793202340513,
      "loss": 3.0888,
      "step": 101573
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1805102825164795,
      "learning_rate": 0.00035542391188037416,
      "loss": 2.9299,
      "step": 101574
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9032806158065796,
      "learning_rate": 0.0003554198917270387,
      "loss": 3.0585,
      "step": 101575
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.626222014427185,
      "learning_rate": 0.00035541587156339967,
      "loss": 2.7888,
      "step": 101576
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8543788194656372,
      "learning_rate": 0.0003554118513894579,
      "loss": 3.3252,
      "step": 101577
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.5891189575195312,
      "learning_rate": 0.00035540783120521384,
      "loss": 3.1051,
      "step": 101578
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.940415620803833,
      "learning_rate": 0.0003554038110106685,
      "loss": 3.2276,
      "step": 101579
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8812847137451172,
      "learning_rate": 0.0003553997908058225,
      "loss": 2.9182,
      "step": 101580
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1251628398895264,
      "learning_rate": 0.0003553957705906767,
      "loss": 3.2201,
      "step": 101581
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0998895168304443,
      "learning_rate": 0.00035539175036523174,
      "loss": 2.8535,
      "step": 101582
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9099938869476318,
      "learning_rate": 0.0003553877301294884,
      "loss": 3.0114,
      "step": 101583
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1350719928741455,
      "learning_rate": 0.0003553837098834475,
      "loss": 2.915,
      "step": 101584
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.266751289367676,
      "learning_rate": 0.00035537968962710966,
      "loss": 3.1171,
      "step": 101585
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.015162467956543,
      "learning_rate": 0.00035537566936047574,
      "loss": 3.193,
      "step": 101586
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9191936254501343,
      "learning_rate": 0.0003553716490835464,
      "loss": 3.0272,
      "step": 101587
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.4220528602600098,
      "learning_rate": 0.0003553676287963224,
      "loss": 3.0843,
      "step": 101588
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.6902694702148438,
      "learning_rate": 0.00035536360849880455,
      "loss": 2.8944,
      "step": 101589
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.920650601387024,
      "learning_rate": 0.00035535958819099355,
      "loss": 2.9376,
      "step": 101590
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.892974615097046,
      "learning_rate": 0.00035535556787289017,
      "loss": 3.1846,
      "step": 101591
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3089241981506348,
      "learning_rate": 0.0003553515475444951,
      "loss": 2.8717,
      "step": 101592
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.770902156829834,
      "learning_rate": 0.00035534752720580917,
      "loss": 2.9301,
      "step": 101593
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.640669584274292,
      "learning_rate": 0.00035534350685683313,
      "loss": 3.005,
      "step": 101594
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1990673542022705,
      "learning_rate": 0.00035533948649756763,
      "loss": 2.9282,
      "step": 101595
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.92571759223938,
      "learning_rate": 0.0003553354661280135,
      "loss": 2.9669,
      "step": 101596
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.751397967338562,
      "learning_rate": 0.00035533144574817146,
      "loss": 2.9095,
      "step": 101597
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.718340516090393,
      "learning_rate": 0.00035532742535804225,
      "loss": 2.7658,
      "step": 101598
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.804894208908081,
      "learning_rate": 0.0003553234049576267,
      "loss": 3.2187,
      "step": 101599
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.769233226776123,
      "learning_rate": 0.0003553193845469254,
      "loss": 2.6805,
      "step": 101600
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8470629453659058,
      "learning_rate": 0.0003553153641259391,
      "loss": 3.0478,
      "step": 101601
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7483092546463013,
      "learning_rate": 0.0003553113436946688,
      "loss": 3.1252,
      "step": 101602
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9698659181594849,
      "learning_rate": 0.000355307323253115,
      "loss": 2.9381,
      "step": 101603
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6038089990615845,
      "learning_rate": 0.0003553033028012785,
      "loss": 2.9842,
      "step": 101604
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.733734369277954,
      "learning_rate": 0.00035529928233916014,
      "loss": 3.1829,
      "step": 101605
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7971903085708618,
      "learning_rate": 0.00035529526186676053,
      "loss": 2.9423,
      "step": 101606
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7847269773483276,
      "learning_rate": 0.00035529124138408046,
      "loss": 3.0818,
      "step": 101607
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6953314542770386,
      "learning_rate": 0.00035528722089112085,
      "loss": 3.1766,
      "step": 101608
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6694526672363281,
      "learning_rate": 0.0003552832003878822,
      "loss": 3.1168,
      "step": 101609
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.14750599861145,
      "learning_rate": 0.00035527917987436536,
      "loss": 2.7911,
      "step": 101610
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7362340688705444,
      "learning_rate": 0.0003552751593505711,
      "loss": 2.7376,
      "step": 101611
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9378420114517212,
      "learning_rate": 0.0003552711388165001,
      "loss": 3.1276,
      "step": 101612
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5743823051452637,
      "learning_rate": 0.0003552671182721532,
      "loss": 2.9204,
      "step": 101613
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.780953288078308,
      "learning_rate": 0.0003552630977175311,
      "loss": 2.9675,
      "step": 101614
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.885033130645752,
      "learning_rate": 0.00035525907715263456,
      "loss": 2.8606,
      "step": 101615
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8882060050964355,
      "learning_rate": 0.00035525505657746424,
      "loss": 3.1236,
      "step": 101616
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8170944452285767,
      "learning_rate": 0.00035525103599202105,
      "loss": 3.1106,
      "step": 101617
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.397061824798584,
      "learning_rate": 0.00035524701539630565,
      "loss": 3.041,
      "step": 101618
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.68015456199646,
      "learning_rate": 0.00035524299479031867,
      "loss": 2.8661,
      "step": 101619
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8630777597427368,
      "learning_rate": 0.0003552389741740611,
      "loss": 3.0752,
      "step": 101620
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6997979879379272,
      "learning_rate": 0.00035523495354753353,
      "loss": 3.0132,
      "step": 101621
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.7701830863952637,
      "learning_rate": 0.00035523093291073667,
      "loss": 2.8417,
      "step": 101622
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.169206142425537,
      "learning_rate": 0.0003552269122636714,
      "loss": 3.0551,
      "step": 101623
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8805210590362549,
      "learning_rate": 0.0003552228916063384,
      "loss": 2.9398,
      "step": 101624
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7114064693450928,
      "learning_rate": 0.0003552188709387384,
      "loss": 2.8957,
      "step": 101625
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0478482246398926,
      "learning_rate": 0.0003552148502608722,
      "loss": 3.2965,
      "step": 101626
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.928723931312561,
      "learning_rate": 0.00035521082957274053,
      "loss": 3.0526,
      "step": 101627
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9886618852615356,
      "learning_rate": 0.000355206808874344,
      "loss": 2.9877,
      "step": 101628
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.576690435409546,
      "learning_rate": 0.0003552027881656837,
      "loss": 2.9642,
      "step": 101629
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.935082197189331,
      "learning_rate": 0.00035519876744676,
      "loss": 2.894,
      "step": 101630
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.6179497241973877,
      "learning_rate": 0.0003551947467175738,
      "loss": 2.6668,
      "step": 101631
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.662890911102295,
      "learning_rate": 0.00035519072597812594,
      "loss": 3.0701,
      "step": 101632
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7265394926071167,
      "learning_rate": 0.00035518670522841707,
      "loss": 3.2741,
      "step": 101633
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.4634385108947754,
      "learning_rate": 0.0003551826844684479,
      "loss": 2.8837,
      "step": 101634
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.2591161727905273,
      "learning_rate": 0.00035517866369821926,
      "loss": 3.1769,
      "step": 101635
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.700709104537964,
      "learning_rate": 0.00035517464291773184,
      "loss": 2.996,
      "step": 101636
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6543304920196533,
      "learning_rate": 0.0003551706221269864,
      "loss": 2.9804,
      "step": 101637
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7708475589752197,
      "learning_rate": 0.0003551666013259838,
      "loss": 3.0488,
      "step": 101638
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.8082058429718018,
      "learning_rate": 0.0003551625805147246,
      "loss": 2.8519,
      "step": 101639
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.7962191104888916,
      "learning_rate": 0.0003551585596932096,
      "loss": 2.8363,
      "step": 101640
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8254656791687012,
      "learning_rate": 0.00035515453886143957,
      "loss": 3.0566,
      "step": 101641
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1955907344818115,
      "learning_rate": 0.0003551505180194154,
      "loss": 3.1439,
      "step": 101642
    },
    {
      "epoch": 1.32,
      "grad_norm": 4.147237300872803,
      "learning_rate": 0.0003551464971671377,
      "loss": 3.0393,
      "step": 101643
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.941500663757324,
      "learning_rate": 0.00035514247630460705,
      "loss": 3.1352,
      "step": 101644
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.14721417427063,
      "learning_rate": 0.00035513845543182454,
      "loss": 3.0517,
      "step": 101645
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8367724418640137,
      "learning_rate": 0.0003551344345487907,
      "loss": 2.983,
      "step": 101646
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.221527576446533,
      "learning_rate": 0.0003551304136555063,
      "loss": 3.2525,
      "step": 101647
    },
    {
      "epoch": 1.32,
      "grad_norm": 5.805804252624512,
      "learning_rate": 0.00035512639275197215,
      "loss": 2.9714,
      "step": 101648
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.7469139099121094,
      "learning_rate": 0.00035512237183818896,
      "loss": 3.1861,
      "step": 101649
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.589949607849121,
      "learning_rate": 0.00035511835091415746,
      "loss": 3.0403,
      "step": 101650
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7670032978057861,
      "learning_rate": 0.0003551143299798784,
      "loss": 3.2346,
      "step": 101651
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.162637948989868,
      "learning_rate": 0.0003551103090353526,
      "loss": 3.0271,
      "step": 101652
    },
    {
      "epoch": 1.32,
      "grad_norm": 4.54689359664917,
      "learning_rate": 0.0003551062880805807,
      "loss": 2.8797,
      "step": 101653
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.980717182159424,
      "learning_rate": 0.0003551022671155636,
      "loss": 2.8208,
      "step": 101654
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.8933823108673096,
      "learning_rate": 0.0003550982461403018,
      "loss": 3.1811,
      "step": 101655
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6925759315490723,
      "learning_rate": 0.0003550942251547963,
      "loss": 2.8687,
      "step": 101656
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7937344312667847,
      "learning_rate": 0.00035509020415904776,
      "loss": 2.9982,
      "step": 101657
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.3105156421661377,
      "learning_rate": 0.00035508618315305686,
      "loss": 2.9513,
      "step": 101658
    },
    {
      "epoch": 1.32,
      "grad_norm": 4.235228061676025,
      "learning_rate": 0.00035508216213682437,
      "loss": 3.0246,
      "step": 101659
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.169787883758545,
      "learning_rate": 0.00035507814111035115,
      "loss": 2.7135,
      "step": 101660
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.2996790409088135,
      "learning_rate": 0.0003550741200736378,
      "loss": 3.2178,
      "step": 101661
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8119044303894043,
      "learning_rate": 0.00035507009902668515,
      "loss": 3.0209,
      "step": 101662
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.080930233001709,
      "learning_rate": 0.00035506607796949394,
      "loss": 2.977,
      "step": 101663
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.045029878616333,
      "learning_rate": 0.0003550620569020649,
      "loss": 3.0959,
      "step": 101664
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6578203439712524,
      "learning_rate": 0.00035505803582439873,
      "loss": 2.9976,
      "step": 101665
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.24094820022583,
      "learning_rate": 0.00035505401473649636,
      "loss": 3.1467,
      "step": 101666
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.6181933879852295,
      "learning_rate": 0.0003550499936383583,
      "loss": 3.1095,
      "step": 101667
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.057199716567993,
      "learning_rate": 0.00035504597252998543,
      "loss": 2.8984,
      "step": 101668
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7085902690887451,
      "learning_rate": 0.0003550419514113785,
      "loss": 3.0617,
      "step": 101669
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.689993381500244,
      "learning_rate": 0.00035503793028253823,
      "loss": 3.109,
      "step": 101670
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.162067413330078,
      "learning_rate": 0.0003550339091434653,
      "loss": 2.8578,
      "step": 101671
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.398777723312378,
      "learning_rate": 0.00035502988799416066,
      "loss": 3.1478,
      "step": 101672
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6025019884109497,
      "learning_rate": 0.00035502586683462483,
      "loss": 2.9762,
      "step": 101673
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8721812963485718,
      "learning_rate": 0.00035502184566485863,
      "loss": 2.994,
      "step": 101674
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.8728275299072266,
      "learning_rate": 0.00035501782448486293,
      "loss": 3.1447,
      "step": 101675
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1090993881225586,
      "learning_rate": 0.00035501380329463827,
      "loss": 2.8436,
      "step": 101676
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8742609024047852,
      "learning_rate": 0.00035500978209418557,
      "loss": 3.1284,
      "step": 101677
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.1972739696502686,
      "learning_rate": 0.00035500576088350554,
      "loss": 2.8791,
      "step": 101678
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.5802438259124756,
      "learning_rate": 0.00035500173966259883,
      "loss": 3.2026,
      "step": 101679
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1192123889923096,
      "learning_rate": 0.0003549977184314663,
      "loss": 2.8834,
      "step": 101680
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.4853086471557617,
      "learning_rate": 0.00035499369719010866,
      "loss": 3.0727,
      "step": 101681
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.7403507232666016,
      "learning_rate": 0.00035498967593852656,
      "loss": 2.9794,
      "step": 101682
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.328435182571411,
      "learning_rate": 0.000354985654676721,
      "loss": 2.9977,
      "step": 101683
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.092897891998291,
      "learning_rate": 0.0003549816334046925,
      "loss": 2.9502,
      "step": 101684
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.777939796447754,
      "learning_rate": 0.00035497761212244185,
      "loss": 3.0234,
      "step": 101685
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1503384113311768,
      "learning_rate": 0.0003549735908299698,
      "loss": 3.2402,
      "step": 101686
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.1834206581115723,
      "learning_rate": 0.0003549695695272772,
      "loss": 2.9898,
      "step": 101687
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7425527572631836,
      "learning_rate": 0.00035496554821436464,
      "loss": 2.9399,
      "step": 101688
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5729013681411743,
      "learning_rate": 0.00035496152689123295,
      "loss": 3.0433,
      "step": 101689
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.4079790115356445,
      "learning_rate": 0.000354957505557883,
      "loss": 3.0434,
      "step": 101690
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.496035575866699,
      "learning_rate": 0.0003549534842143153,
      "loss": 3.074,
      "step": 101691
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.692034363746643,
      "learning_rate": 0.0003549494628605307,
      "loss": 3.3244,
      "step": 101692
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7746925354003906,
      "learning_rate": 0.0003549454414965301,
      "loss": 2.9183,
      "step": 101693
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.818516731262207,
      "learning_rate": 0.00035494142012231395,
      "loss": 3.083,
      "step": 101694
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6132311820983887,
      "learning_rate": 0.00035493739873788315,
      "loss": 3.1191,
      "step": 101695
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6333876848220825,
      "learning_rate": 0.0003549333773432385,
      "loss": 3.1987,
      "step": 101696
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9441391229629517,
      "learning_rate": 0.00035492935593838074,
      "loss": 3.0036,
      "step": 101697
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8316923379898071,
      "learning_rate": 0.00035492533452331044,
      "loss": 2.9757,
      "step": 101698
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.023634195327759,
      "learning_rate": 0.0003549213130980287,
      "loss": 3.1214,
      "step": 101699
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6010468006134033,
      "learning_rate": 0.00035491729166253587,
      "loss": 2.9569,
      "step": 101700
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.4411895275115967,
      "learning_rate": 0.0003549132702168329,
      "loss": 3.2363,
      "step": 101701
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.046342611312866,
      "learning_rate": 0.00035490924876092065,
      "loss": 2.9038,
      "step": 101702
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7748688459396362,
      "learning_rate": 0.0003549052272947996,
      "loss": 2.7059,
      "step": 101703
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.884893536567688,
      "learning_rate": 0.00035490120581847063,
      "loss": 2.9205,
      "step": 101704
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.73112952709198,
      "learning_rate": 0.0003548971843319346,
      "loss": 3.1306,
      "step": 101705
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7756260633468628,
      "learning_rate": 0.00035489316283519206,
      "loss": 2.9592,
      "step": 101706
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.1693663597106934,
      "learning_rate": 0.0003548891413282438,
      "loss": 2.8861,
      "step": 101707
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0760498046875,
      "learning_rate": 0.0003548851198110907,
      "loss": 2.9702,
      "step": 101708
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.8517377376556396,
      "learning_rate": 0.0003548810982837334,
      "loss": 3.0723,
      "step": 101709
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8379653692245483,
      "learning_rate": 0.00035487707674617265,
      "loss": 3.0998,
      "step": 101710
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7872282266616821,
      "learning_rate": 0.00035487305519840934,
      "loss": 3.0295,
      "step": 101711
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8166778087615967,
      "learning_rate": 0.0003548690336404439,
      "loss": 3.2245,
      "step": 101712
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.071884870529175,
      "learning_rate": 0.0003548650120722773,
      "loss": 2.9432,
      "step": 101713
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.117079734802246,
      "learning_rate": 0.0003548609904939104,
      "loss": 2.974,
      "step": 101714
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6591789722442627,
      "learning_rate": 0.0003548569689053437,
      "loss": 2.9474,
      "step": 101715
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7321057319641113,
      "learning_rate": 0.000354852947306578,
      "loss": 3.0047,
      "step": 101716
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.6523005962371826,
      "learning_rate": 0.00035484892569761427,
      "loss": 2.9149,
      "step": 101717
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3464794158935547,
      "learning_rate": 0.000354844904078453,
      "loss": 2.9342,
      "step": 101718
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.115089178085327,
      "learning_rate": 0.0003548408824490949,
      "loss": 3.0345,
      "step": 101719
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.2972066402435303,
      "learning_rate": 0.0003548368608095411,
      "loss": 3.1463,
      "step": 101720
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.422508478164673,
      "learning_rate": 0.0003548328391597919,
      "loss": 3.1151,
      "step": 101721
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.0156443119049072,
      "learning_rate": 0.00035482881749984826,
      "loss": 3.0045,
      "step": 101722
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7189514636993408,
      "learning_rate": 0.00035482479582971095,
      "loss": 2.974,
      "step": 101723
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.2845804691314697,
      "learning_rate": 0.00035482077414938066,
      "loss": 2.9373,
      "step": 101724
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.2316489219665527,
      "learning_rate": 0.0003548167524588582,
      "loss": 2.9955,
      "step": 101725
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.4004180431365967,
      "learning_rate": 0.0003548127307581442,
      "loss": 2.9884,
      "step": 101726
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.5868210792541504,
      "learning_rate": 0.00035480870904723956,
      "loss": 3.02,
      "step": 101727
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.6219382286071777,
      "learning_rate": 0.0003548046873261449,
      "loss": 2.8688,
      "step": 101728
    },
    {
      "epoch": 1.32,
      "grad_norm": 4.319032669067383,
      "learning_rate": 0.00035480066559486097,
      "loss": 2.9603,
      "step": 101729
    },
    {
      "epoch": 1.32,
      "grad_norm": 5.0122456550598145,
      "learning_rate": 0.0003547966438533886,
      "loss": 3.3315,
      "step": 101730
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.702747344970703,
      "learning_rate": 0.0003547926221017285,
      "loss": 3.0371,
      "step": 101731
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.1020760536193848,
      "learning_rate": 0.0003547886003398814,
      "loss": 2.9606,
      "step": 101732
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6693273782730103,
      "learning_rate": 0.0003547845785678481,
      "loss": 3.0405,
      "step": 101733
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.5853919982910156,
      "learning_rate": 0.00035478055678562934,
      "loss": 2.9871,
      "step": 101734
    },
    {
      "epoch": 1.32,
      "grad_norm": 4.175921440124512,
      "learning_rate": 0.0003547765349932258,
      "loss": 2.9305,
      "step": 101735
    },
    {
      "epoch": 1.32,
      "grad_norm": 6.3121490478515625,
      "learning_rate": 0.0003547725131906382,
      "loss": 3.1088,
      "step": 101736
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.6286771297454834,
      "learning_rate": 0.0003547684913778674,
      "loss": 2.9327,
      "step": 101737
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7524497509002686,
      "learning_rate": 0.0003547644695549142,
      "loss": 3.1305,
      "step": 101738
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.3683512210845947,
      "learning_rate": 0.0003547604477217791,
      "loss": 3.1019,
      "step": 101739
    },
    {
      "epoch": 1.32,
      "grad_norm": 5.672605991363525,
      "learning_rate": 0.00035475642587846305,
      "loss": 2.9194,
      "step": 101740
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.8084864616394043,
      "learning_rate": 0.0003547524040249669,
      "loss": 3.0418,
      "step": 101741
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.44796085357666,
      "learning_rate": 0.00035474838216129106,
      "loss": 2.9843,
      "step": 101742
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6798194646835327,
      "learning_rate": 0.00035474436028743654,
      "loss": 2.8645,
      "step": 101743
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9314111471176147,
      "learning_rate": 0.000354740338403404,
      "loss": 2.8963,
      "step": 101744
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.829035520553589,
      "learning_rate": 0.00035473631650919415,
      "loss": 3.2536,
      "step": 101745
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.1785781383514404,
      "learning_rate": 0.00035473229460480785,
      "loss": 2.8578,
      "step": 101746
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.4520586729049683,
      "learning_rate": 0.0003547282726902457,
      "loss": 2.8368,
      "step": 101747
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9334166049957275,
      "learning_rate": 0.0003547242507655087,
      "loss": 3.1613,
      "step": 101748
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9451848268508911,
      "learning_rate": 0.00035472022883059726,
      "loss": 3.2236,
      "step": 101749
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.226250410079956,
      "learning_rate": 0.0003547162068855123,
      "loss": 2.8831,
      "step": 101750
    },
    {
      "epoch": 1.32,
      "grad_norm": 2.290558099746704,
      "learning_rate": 0.0003547121849302547,
      "loss": 2.889,
      "step": 101751
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.9126145839691162,
      "learning_rate": 0.000354708162964825,
      "loss": 3.2194,
      "step": 101752
    },
    {
      "epoch": 1.32,
      "grad_norm": 3.718686580657959,
      "learning_rate": 0.00035470414098922394,
      "loss": 2.8175,
      "step": 101753
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7692241668701172,
      "learning_rate": 0.0003547001190034525,
      "loss": 3.058,
      "step": 101754
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6496374607086182,
      "learning_rate": 0.0003546960970075111,
      "loss": 2.9302,
      "step": 101755
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.5013662576675415,
      "learning_rate": 0.0003546920750014008,
      "loss": 3.0848,
      "step": 101756
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.8779611587524414,
      "learning_rate": 0.0003546880529851221,
      "loss": 3.0028,
      "step": 101757
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.7990968227386475,
      "learning_rate": 0.00035468403095867597,
      "loss": 3.2969,
      "step": 101758
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.515474557876587,
      "learning_rate": 0.000354680008922063,
      "loss": 2.6424,
      "step": 101759
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6981749534606934,
      "learning_rate": 0.00035467598687528406,
      "loss": 2.9627,
      "step": 101760
    },
    {
      "epoch": 1.32,
      "grad_norm": 1.6441699266433716,
      "learning_rate": 0.00035467196481833973,
      "loss": 3.245,
      "step": 101761
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4805891513824463,
      "learning_rate": 0.0003546679427512308,
      "loss": 2.9579,
      "step": 101762
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1238865852355957,
      "learning_rate": 0.00035466392067395826,
      "loss": 2.9929,
      "step": 101763
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.320971965789795,
      "learning_rate": 0.00035465989858652247,
      "loss": 3.0931,
      "step": 101764
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.325533866882324,
      "learning_rate": 0.0003546558764889244,
      "loss": 3.0579,
      "step": 101765
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.747436761856079,
      "learning_rate": 0.00035465185438116493,
      "loss": 2.8767,
      "step": 101766
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9948581457138062,
      "learning_rate": 0.00035464783226324447,
      "loss": 2.9548,
      "step": 101767
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8766274452209473,
      "learning_rate": 0.000354643810135164,
      "loss": 2.7295,
      "step": 101768
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6631872653961182,
      "learning_rate": 0.00035463978799692424,
      "loss": 3.2442,
      "step": 101769
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0283071994781494,
      "learning_rate": 0.00035463576584852583,
      "loss": 2.8735,
      "step": 101770
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9344011545181274,
      "learning_rate": 0.0003546317436899697,
      "loss": 3.2423,
      "step": 101771
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8036439418792725,
      "learning_rate": 0.00035462772152125647,
      "loss": 3.1359,
      "step": 101772
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5980595350265503,
      "learning_rate": 0.0003546236993423869,
      "loss": 3.2484,
      "step": 101773
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4047629833221436,
      "learning_rate": 0.00035461967715336173,
      "loss": 3.1928,
      "step": 101774
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9393786191940308,
      "learning_rate": 0.0003546156549541819,
      "loss": 2.871,
      "step": 101775
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9323947429656982,
      "learning_rate": 0.00035461163274484774,
      "loss": 3.0613,
      "step": 101776
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.244394063949585,
      "learning_rate": 0.0003546076105253604,
      "loss": 2.8382,
      "step": 101777
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4884397983551025,
      "learning_rate": 0.00035460358829572045,
      "loss": 2.9471,
      "step": 101778
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6790730953216553,
      "learning_rate": 0.0003545995660559286,
      "loss": 2.9123,
      "step": 101779
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.2406468391418457,
      "learning_rate": 0.0003545955438059857,
      "loss": 2.8848,
      "step": 101780
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7101621627807617,
      "learning_rate": 0.0003545915215458925,
      "loss": 2.9938,
      "step": 101781
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7785966396331787,
      "learning_rate": 0.00035458749927564964,
      "loss": 3.2051,
      "step": 101782
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9468302726745605,
      "learning_rate": 0.0003545834769952579,
      "loss": 3.0495,
      "step": 101783
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.2997806072235107,
      "learning_rate": 0.0003545794547047182,
      "loss": 3.0228,
      "step": 101784
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0886154174804688,
      "learning_rate": 0.0003545754324040311,
      "loss": 2.8368,
      "step": 101785
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.085454225540161,
      "learning_rate": 0.0003545714100931973,
      "loss": 3.0224,
      "step": 101786
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.432311534881592,
      "learning_rate": 0.0003545673877722178,
      "loss": 3.01,
      "step": 101787
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.27593731880188,
      "learning_rate": 0.0003545633654410931,
      "loss": 2.7765,
      "step": 101788
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.738802433013916,
      "learning_rate": 0.00035455934309982395,
      "loss": 3.0271,
      "step": 101789
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9669556617736816,
      "learning_rate": 0.0003545553207484113,
      "loss": 2.9221,
      "step": 101790
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.896517753601074,
      "learning_rate": 0.00035455129838685585,
      "loss": 2.8097,
      "step": 101791
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.972042202949524,
      "learning_rate": 0.0003545472760151582,
      "loss": 2.8039,
      "step": 101792
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.040560483932495,
      "learning_rate": 0.0003545432536333191,
      "loss": 3.2444,
      "step": 101793
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1797738075256348,
      "learning_rate": 0.00035453923124133957,
      "loss": 2.8555,
      "step": 101794
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8706761598587036,
      "learning_rate": 0.00035453520883922007,
      "loss": 2.9623,
      "step": 101795
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.112840175628662,
      "learning_rate": 0.0003545311864269614,
      "loss": 3.0886,
      "step": 101796
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.2104694843292236,
      "learning_rate": 0.0003545271640045644,
      "loss": 2.9561,
      "step": 101797
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9726617336273193,
      "learning_rate": 0.00035452314157202974,
      "loss": 2.8357,
      "step": 101798
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.815525770187378,
      "learning_rate": 0.0003545191191293582,
      "loss": 2.9556,
      "step": 101799
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.1764848232269287,
      "learning_rate": 0.0003545150966765506,
      "loss": 2.7631,
      "step": 101800
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6049963235855103,
      "learning_rate": 0.00035451107421360754,
      "loss": 2.9632,
      "step": 101801
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3139030933380127,
      "learning_rate": 0.0003545070517405299,
      "loss": 2.7889,
      "step": 101802
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9344373941421509,
      "learning_rate": 0.00035450302925731835,
      "loss": 3.0139,
      "step": 101803
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8010687828063965,
      "learning_rate": 0.0003544990067639736,
      "loss": 3.4118,
      "step": 101804
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1242356300354004,
      "learning_rate": 0.00035449498426049647,
      "loss": 2.9221,
      "step": 101805
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.995363473892212,
      "learning_rate": 0.00035449096174688775,
      "loss": 2.7635,
      "step": 101806
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7103477716445923,
      "learning_rate": 0.00035448693922314814,
      "loss": 3.1962,
      "step": 101807
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5358470678329468,
      "learning_rate": 0.00035448291668927834,
      "loss": 3.4255,
      "step": 101808
    },
    {
      "epoch": 1.33,
      "grad_norm": 4.227951526641846,
      "learning_rate": 0.00035447889414527917,
      "loss": 3.0353,
      "step": 101809
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.584368944168091,
      "learning_rate": 0.00035447487159115127,
      "loss": 3.0448,
      "step": 101810
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8115016222000122,
      "learning_rate": 0.0003544708490268955,
      "loss": 3.2931,
      "step": 101811
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0878448486328125,
      "learning_rate": 0.00035446682645251266,
      "loss": 2.7839,
      "step": 101812
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.1020262241363525,
      "learning_rate": 0.0003544628038680033,
      "loss": 3.106,
      "step": 101813
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.721082091331482,
      "learning_rate": 0.0003544587812733682,
      "loss": 3.0491,
      "step": 101814
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.742530345916748,
      "learning_rate": 0.0003544547586686084,
      "loss": 2.9295,
      "step": 101815
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.811466097831726,
      "learning_rate": 0.0003544507360537243,
      "loss": 3.0666,
      "step": 101816
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6434295177459717,
      "learning_rate": 0.0003544467134287168,
      "loss": 2.9895,
      "step": 101817
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.500380516052246,
      "learning_rate": 0.00035444269079358667,
      "loss": 2.9928,
      "step": 101818
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.918955683708191,
      "learning_rate": 0.00035443866814833456,
      "loss": 3.0026,
      "step": 101819
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.532615303993225,
      "learning_rate": 0.00035443464549296126,
      "loss": 3.0799,
      "step": 101820
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6421089172363281,
      "learning_rate": 0.0003544306228274675,
      "loss": 2.9814,
      "step": 101821
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7112072706222534,
      "learning_rate": 0.00035442660015185424,
      "loss": 3.149,
      "step": 101822
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3562510013580322,
      "learning_rate": 0.0003544225774661218,
      "loss": 2.9283,
      "step": 101823
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7306416034698486,
      "learning_rate": 0.00035441855477027134,
      "loss": 2.6751,
      "step": 101824
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8643895387649536,
      "learning_rate": 0.0003544145320643035,
      "loss": 3.1129,
      "step": 101825
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.1199445724487305,
      "learning_rate": 0.00035441050934821887,
      "loss": 3.1341,
      "step": 101826
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5788166522979736,
      "learning_rate": 0.0003544064866220182,
      "loss": 2.9508,
      "step": 101827
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7301723957061768,
      "learning_rate": 0.0003544024638857025,
      "loss": 2.9984,
      "step": 101828
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6839531660079956,
      "learning_rate": 0.0003543984411392723,
      "loss": 3.1238,
      "step": 101829
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.2418692111968994,
      "learning_rate": 0.0003543944183827284,
      "loss": 2.8355,
      "step": 101830
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8241015672683716,
      "learning_rate": 0.00035439039561607153,
      "loss": 2.8457,
      "step": 101831
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1985950469970703,
      "learning_rate": 0.0003543863728393025,
      "loss": 3.067,
      "step": 101832
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.886128306388855,
      "learning_rate": 0.00035438235005242204,
      "loss": 2.9922,
      "step": 101833
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0875208377838135,
      "learning_rate": 0.0003543783272554308,
      "loss": 2.781,
      "step": 101834
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5306401252746582,
      "learning_rate": 0.00035437430444832963,
      "loss": 2.9597,
      "step": 101835
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.933847427368164,
      "learning_rate": 0.00035437028163111926,
      "loss": 2.7585,
      "step": 101836
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.798633337020874,
      "learning_rate": 0.0003543662588038005,
      "loss": 2.863,
      "step": 101837
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.705054759979248,
      "learning_rate": 0.0003543622359663739,
      "loss": 2.9902,
      "step": 101838
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7229169607162476,
      "learning_rate": 0.00035435821311884043,
      "loss": 3.1873,
      "step": 101839
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9697113037109375,
      "learning_rate": 0.00035435419026120065,
      "loss": 2.7618,
      "step": 101840
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8769944906234741,
      "learning_rate": 0.0003543501673934554,
      "loss": 2.9781,
      "step": 101841
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6577457189559937,
      "learning_rate": 0.00035434614451560555,
      "loss": 2.9373,
      "step": 101842
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.692736268043518,
      "learning_rate": 0.0003543421216276517,
      "loss": 2.9693,
      "step": 101843
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.5376503467559814,
      "learning_rate": 0.00035433809872959454,
      "loss": 2.8899,
      "step": 101844
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7063891887664795,
      "learning_rate": 0.00035433407582143495,
      "loss": 2.9867,
      "step": 101845
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6341969966888428,
      "learning_rate": 0.0003543300529031736,
      "loss": 3.2687,
      "step": 101846
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.686665415763855,
      "learning_rate": 0.00035432602997481123,
      "loss": 2.9925,
      "step": 101847
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.271151542663574,
      "learning_rate": 0.0003543220070363488,
      "loss": 2.8932,
      "step": 101848
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9076954126358032,
      "learning_rate": 0.00035431798408778667,
      "loss": 2.8272,
      "step": 101849
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.20412540435791,
      "learning_rate": 0.00035431396112912587,
      "loss": 2.9626,
      "step": 101850
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.705244779586792,
      "learning_rate": 0.0003543099381603672,
      "loss": 3.1039,
      "step": 101851
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.884732723236084,
      "learning_rate": 0.0003543059151815112,
      "loss": 2.9975,
      "step": 101852
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.225574493408203,
      "learning_rate": 0.0003543018921925586,
      "loss": 2.9302,
      "step": 101853
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.345400333404541,
      "learning_rate": 0.00035429786919351046,
      "loss": 2.8508,
      "step": 101854
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.255131244659424,
      "learning_rate": 0.0003542938461843672,
      "loss": 2.9311,
      "step": 101855
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4013450145721436,
      "learning_rate": 0.00035428982316512963,
      "loss": 3.0855,
      "step": 101856
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8080650568008423,
      "learning_rate": 0.00035428580013579863,
      "loss": 2.8752,
      "step": 101857
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.622454285621643,
      "learning_rate": 0.0003542817770963749,
      "loss": 2.7965,
      "step": 101858
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7456109523773193,
      "learning_rate": 0.0003542777540468592,
      "loss": 3.0582,
      "step": 101859
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9734134674072266,
      "learning_rate": 0.0003542737309872521,
      "loss": 2.7979,
      "step": 101860
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7597116231918335,
      "learning_rate": 0.00035426970791755465,
      "loss": 3.109,
      "step": 101861
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.6286821365356445,
      "learning_rate": 0.0003542656848377673,
      "loss": 3.0212,
      "step": 101862
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.662369728088379,
      "learning_rate": 0.000354261661747891,
      "loss": 3.1567,
      "step": 101863
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.248795747756958,
      "learning_rate": 0.00035425763864792645,
      "loss": 2.9704,
      "step": 101864
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.4973132610321045,
      "learning_rate": 0.00035425361553787437,
      "loss": 3.1163,
      "step": 101865
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9248138666152954,
      "learning_rate": 0.0003542495924177355,
      "loss": 2.8203,
      "step": 101866
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6179531812667847,
      "learning_rate": 0.0003542455692875106,
      "loss": 2.8673,
      "step": 101867
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.057753801345825,
      "learning_rate": 0.0003542415461472004,
      "loss": 2.926,
      "step": 101868
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6137077808380127,
      "learning_rate": 0.0003542375229968057,
      "loss": 3.1816,
      "step": 101869
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9363957643508911,
      "learning_rate": 0.0003542334998363273,
      "loss": 3.0037,
      "step": 101870
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4348154067993164,
      "learning_rate": 0.00035422947666576574,
      "loss": 3.0585,
      "step": 101871
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.950148582458496,
      "learning_rate": 0.00035422545348512196,
      "loss": 2.8893,
      "step": 101872
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9363882541656494,
      "learning_rate": 0.0003542214302943966,
      "loss": 3.139,
      "step": 101873
    },
    {
      "epoch": 1.33,
      "grad_norm": 4.082724094390869,
      "learning_rate": 0.00035421740709359054,
      "loss": 3.007,
      "step": 101874
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.948650598526001,
      "learning_rate": 0.00035421338388270436,
      "loss": 3.1418,
      "step": 101875
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.407531499862671,
      "learning_rate": 0.00035420936066173894,
      "loss": 3.276,
      "step": 101876
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.942964792251587,
      "learning_rate": 0.00035420533743069495,
      "loss": 2.8606,
      "step": 101877
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.434335947036743,
      "learning_rate": 0.0003542013141895732,
      "loss": 3.1656,
      "step": 101878
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4285879135131836,
      "learning_rate": 0.00035419729093837435,
      "loss": 2.8393,
      "step": 101879
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9808489084243774,
      "learning_rate": 0.0003541932676770992,
      "loss": 3.0044,
      "step": 101880
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8142527341842651,
      "learning_rate": 0.0003541892444057485,
      "loss": 3.1413,
      "step": 101881
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9858100414276123,
      "learning_rate": 0.0003541852211243231,
      "loss": 3.0652,
      "step": 101882
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.445878505706787,
      "learning_rate": 0.00035418119783282354,
      "loss": 2.9204,
      "step": 101883
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7511143684387207,
      "learning_rate": 0.0003541771745312506,
      "loss": 3.0066,
      "step": 101884
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.405224323272705,
      "learning_rate": 0.0003541731512196053,
      "loss": 3.1189,
      "step": 101885
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.760254383087158,
      "learning_rate": 0.00035416912789788807,
      "loss": 3.1008,
      "step": 101886
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5473586320877075,
      "learning_rate": 0.00035416510456609974,
      "loss": 3.171,
      "step": 101887
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5850706100463867,
      "learning_rate": 0.0003541610812242412,
      "loss": 2.8871,
      "step": 101888
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.121917486190796,
      "learning_rate": 0.0003541570578723131,
      "loss": 3.162,
      "step": 101889
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.307537078857422,
      "learning_rate": 0.000354153034510316,
      "loss": 3.1265,
      "step": 101890
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.683418869972229,
      "learning_rate": 0.0003541490111382511,
      "loss": 2.8855,
      "step": 101891
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5687487125396729,
      "learning_rate": 0.0003541449877561187,
      "loss": 2.9471,
      "step": 101892
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.170966625213623,
      "learning_rate": 0.00035414096436391963,
      "loss": 3.026,
      "step": 101893
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.197650909423828,
      "learning_rate": 0.0003541369409616549,
      "loss": 3.0082,
      "step": 101894
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.601952314376831,
      "learning_rate": 0.00035413291754932504,
      "loss": 2.8558,
      "step": 101895
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6421972513198853,
      "learning_rate": 0.0003541288941269308,
      "loss": 2.7779,
      "step": 101896
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.102839708328247,
      "learning_rate": 0.0003541248706944731,
      "loss": 2.9603,
      "step": 101897
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8235933780670166,
      "learning_rate": 0.0003541208472519525,
      "loss": 2.9444,
      "step": 101898
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8189400434494019,
      "learning_rate": 0.0003541168237993698,
      "loss": 3.0405,
      "step": 101899
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0086028575897217,
      "learning_rate": 0.0003541128003367258,
      "loss": 3.1938,
      "step": 101900
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6634379625320435,
      "learning_rate": 0.0003541087768640211,
      "loss": 3.1394,
      "step": 101901
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.322467565536499,
      "learning_rate": 0.00035410475338125663,
      "loss": 3.1573,
      "step": 101902
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7337418794631958,
      "learning_rate": 0.0003541007298884331,
      "loss": 3.1612,
      "step": 101903
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7728925943374634,
      "learning_rate": 0.00035409670638555116,
      "loss": 2.9852,
      "step": 101904
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6573859453201294,
      "learning_rate": 0.00035409268287261167,
      "loss": 3.2361,
      "step": 101905
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.102046012878418,
      "learning_rate": 0.00035408865934961534,
      "loss": 2.9449,
      "step": 101906
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.845915675163269,
      "learning_rate": 0.0003540846358165629,
      "loss": 3.0841,
      "step": 101907
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6097171306610107,
      "learning_rate": 0.000354080612273455,
      "loss": 3.2325,
      "step": 101908
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9269704818725586,
      "learning_rate": 0.00035407658872029256,
      "loss": 2.82,
      "step": 101909
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8146467208862305,
      "learning_rate": 0.00035407256515707634,
      "loss": 2.8365,
      "step": 101910
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9409188032150269,
      "learning_rate": 0.0003540685415838069,
      "loss": 2.9336,
      "step": 101911
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4500622749328613,
      "learning_rate": 0.00035406451800048513,
      "loss": 2.7277,
      "step": 101912
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.2398271560668945,
      "learning_rate": 0.0003540604944071118,
      "loss": 2.9764,
      "step": 101913
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9867693185806274,
      "learning_rate": 0.00035405647080368754,
      "loss": 2.7758,
      "step": 101914
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6984206438064575,
      "learning_rate": 0.00035405244719021324,
      "loss": 2.7923,
      "step": 101915
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.890654444694519,
      "learning_rate": 0.00035404842356668946,
      "loss": 3.1339,
      "step": 101916
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0113372802734375,
      "learning_rate": 0.0003540443999331172,
      "loss": 3.108,
      "step": 101917
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8239699602127075,
      "learning_rate": 0.0003540403762894969,
      "loss": 3.1563,
      "step": 101918
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7867413759231567,
      "learning_rate": 0.0003540363526358295,
      "loss": 2.8012,
      "step": 101919
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8605570793151855,
      "learning_rate": 0.00035403232897211577,
      "loss": 2.7422,
      "step": 101920
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.205282688140869,
      "learning_rate": 0.0003540283052983565,
      "loss": 3.0285,
      "step": 101921
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.8206028938293457,
      "learning_rate": 0.00035402428161455225,
      "loss": 3.1841,
      "step": 101922
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7101609706878662,
      "learning_rate": 0.00035402025792070387,
      "loss": 3.3069,
      "step": 101923
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8691736459732056,
      "learning_rate": 0.0003540162342168121,
      "loss": 2.8158,
      "step": 101924
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.264197587966919,
      "learning_rate": 0.0003540122105028778,
      "loss": 3.0958,
      "step": 101925
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4690210819244385,
      "learning_rate": 0.00035400818677890143,
      "loss": 2.8597,
      "step": 101926
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.045682668685913,
      "learning_rate": 0.00035400416304488396,
      "loss": 2.9836,
      "step": 101927
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.855868935585022,
      "learning_rate": 0.00035400013930082623,
      "loss": 3.045,
      "step": 101928
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.655990719795227,
      "learning_rate": 0.0003539961155467287,
      "loss": 2.9253,
      "step": 101929
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9769980907440186,
      "learning_rate": 0.00035399209178259234,
      "loss": 3.202,
      "step": 101930
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7449114322662354,
      "learning_rate": 0.0003539880680084179,
      "loss": 3.1754,
      "step": 101931
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1901583671569824,
      "learning_rate": 0.00035398404422420595,
      "loss": 2.8189,
      "step": 101932
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.2147419452667236,
      "learning_rate": 0.00035398002042995736,
      "loss": 2.956,
      "step": 101933
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8410457372665405,
      "learning_rate": 0.0003539759966256729,
      "loss": 3.3499,
      "step": 101934
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.2067434787750244,
      "learning_rate": 0.0003539719728113533,
      "loss": 3.1521,
      "step": 101935
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.705820083618164,
      "learning_rate": 0.00035396794898699923,
      "loss": 2.8412,
      "step": 101936
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.4088664054870605,
      "learning_rate": 0.00035396392515261164,
      "loss": 2.7964,
      "step": 101937
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.884443998336792,
      "learning_rate": 0.00035395990130819094,
      "loss": 3.0351,
      "step": 101938
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.2110424041748047,
      "learning_rate": 0.0003539558774537382,
      "loss": 2.8135,
      "step": 101939
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4582595825195312,
      "learning_rate": 0.00035395185358925405,
      "loss": 3.2264,
      "step": 101940
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0752906799316406,
      "learning_rate": 0.0003539478297147392,
      "loss": 2.8684,
      "step": 101941
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9607019424438477,
      "learning_rate": 0.00035394380583019436,
      "loss": 3.235,
      "step": 101942
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.267934560775757,
      "learning_rate": 0.0003539397819356205,
      "loss": 3.061,
      "step": 101943
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.645277976989746,
      "learning_rate": 0.0003539357580310181,
      "loss": 3.0857,
      "step": 101944
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8359841108322144,
      "learning_rate": 0.00035393173411638805,
      "loss": 3.1132,
      "step": 101945
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.787736415863037,
      "learning_rate": 0.0003539277101917311,
      "loss": 2.9548,
      "step": 101946
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.246598720550537,
      "learning_rate": 0.0003539236862570479,
      "loss": 3.0627,
      "step": 101947
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.1507256031036377,
      "learning_rate": 0.0003539196623123393,
      "loss": 3.0815,
      "step": 101948
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7739336490631104,
      "learning_rate": 0.0003539156383576061,
      "loss": 3.1108,
      "step": 101949
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9660717248916626,
      "learning_rate": 0.0003539116143928489,
      "loss": 3.081,
      "step": 101950
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9960675239562988,
      "learning_rate": 0.0003539075904180684,
      "loss": 2.8657,
      "step": 101951
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9520494937896729,
      "learning_rate": 0.00035390356643326566,
      "loss": 2.969,
      "step": 101952
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7565745115280151,
      "learning_rate": 0.00035389954243844114,
      "loss": 3.1368,
      "step": 101953
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.756765365600586,
      "learning_rate": 0.00035389551843359565,
      "loss": 3.0584,
      "step": 101954
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.9273054599761963,
      "learning_rate": 0.0003538914944187301,
      "loss": 2.8785,
      "step": 101955
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7225035429000854,
      "learning_rate": 0.00035388747039384496,
      "loss": 2.962,
      "step": 101956
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8035868406295776,
      "learning_rate": 0.0003538834463589411,
      "loss": 2.8826,
      "step": 101957
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1549861431121826,
      "learning_rate": 0.00035387942231401945,
      "loss": 2.9573,
      "step": 101958
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.40786075592041,
      "learning_rate": 0.00035387539825908047,
      "loss": 2.9573,
      "step": 101959
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.843042016029358,
      "learning_rate": 0.00035387137419412497,
      "loss": 3.1145,
      "step": 101960
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.007800340652466,
      "learning_rate": 0.00035386735011915394,
      "loss": 2.8336,
      "step": 101961
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7447657585144043,
      "learning_rate": 0.00035386332603416787,
      "loss": 3.0674,
      "step": 101962
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.185307741165161,
      "learning_rate": 0.00035385930193916756,
      "loss": 3.1579,
      "step": 101963
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7247589826583862,
      "learning_rate": 0.0003538552778341539,
      "loss": 3.1435,
      "step": 101964
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7177566289901733,
      "learning_rate": 0.00035385125371912744,
      "loss": 2.8983,
      "step": 101965
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.6272573471069336,
      "learning_rate": 0.00035384722959408904,
      "loss": 2.9367,
      "step": 101966
    },
    {
      "epoch": 1.33,
      "grad_norm": 4.137131214141846,
      "learning_rate": 0.00035384320545903944,
      "loss": 2.7435,
      "step": 101967
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4416239261627197,
      "learning_rate": 0.00035383918131397935,
      "loss": 3.3169,
      "step": 101968
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9005321264266968,
      "learning_rate": 0.00035383515715890946,
      "loss": 3.1182,
      "step": 101969
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.944240927696228,
      "learning_rate": 0.0003538311329938307,
      "loss": 3.3454,
      "step": 101970
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.08825945854187,
      "learning_rate": 0.0003538271088187437,
      "loss": 3.1031,
      "step": 101971
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.153468608856201,
      "learning_rate": 0.0003538230846336491,
      "loss": 2.7364,
      "step": 101972
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0258541107177734,
      "learning_rate": 0.000353819060438548,
      "loss": 2.9125,
      "step": 101973
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4505560398101807,
      "learning_rate": 0.0003538150362334408,
      "loss": 3.0908,
      "step": 101974
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.012800455093384,
      "learning_rate": 0.0003538110120183283,
      "loss": 2.9226,
      "step": 101975
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.8624186515808105,
      "learning_rate": 0.00035380698779321145,
      "loss": 3.1056,
      "step": 101976
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9780486822128296,
      "learning_rate": 0.00035380296355809086,
      "loss": 3.064,
      "step": 101977
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8888434171676636,
      "learning_rate": 0.00035379893931296715,
      "loss": 2.9398,
      "step": 101978
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8989472389221191,
      "learning_rate": 0.00035379491505784134,
      "loss": 3.0911,
      "step": 101979
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8733571767807007,
      "learning_rate": 0.000353790890792714,
      "loss": 3.0739,
      "step": 101980
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7219552993774414,
      "learning_rate": 0.00035378686651758585,
      "loss": 3.1554,
      "step": 101981
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8968008756637573,
      "learning_rate": 0.0003537828422324578,
      "loss": 2.9018,
      "step": 101982
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.105062961578369,
      "learning_rate": 0.0003537788179373304,
      "loss": 3.1665,
      "step": 101983
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8096925020217896,
      "learning_rate": 0.00035377479363220456,
      "loss": 3.0951,
      "step": 101984
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7596184015274048,
      "learning_rate": 0.000353770769317081,
      "loss": 3.06,
      "step": 101985
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9770888090133667,
      "learning_rate": 0.0003537667449919604,
      "loss": 3.3256,
      "step": 101986
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.432799816131592,
      "learning_rate": 0.0003537627206568436,
      "loss": 2.7771,
      "step": 101987
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.5774288177490234,
      "learning_rate": 0.0003537586963117312,
      "loss": 3.0302,
      "step": 101988
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.5659372806549072,
      "learning_rate": 0.00035375467195662415,
      "loss": 2.8077,
      "step": 101989
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3450887203216553,
      "learning_rate": 0.000353750647591523,
      "loss": 2.8115,
      "step": 101990
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3524112701416016,
      "learning_rate": 0.0003537466232164286,
      "loss": 3.2695,
      "step": 101991
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3422930240631104,
      "learning_rate": 0.00035374259883134176,
      "loss": 3.2328,
      "step": 101992
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7884373664855957,
      "learning_rate": 0.00035373857443626313,
      "loss": 3.1279,
      "step": 101993
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.274470090866089,
      "learning_rate": 0.00035373455003119347,
      "loss": 2.8261,
      "step": 101994
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9121606349945068,
      "learning_rate": 0.00035373052561613353,
      "loss": 2.9812,
      "step": 101995
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.055657148361206,
      "learning_rate": 0.00035372650119108403,
      "loss": 2.9569,
      "step": 101996
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.153365135192871,
      "learning_rate": 0.00035372247675604583,
      "loss": 2.9825,
      "step": 101997
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7259734869003296,
      "learning_rate": 0.0003537184523110197,
      "loss": 3.0132,
      "step": 101998
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.955966591835022,
      "learning_rate": 0.0003537144278560061,
      "loss": 2.9366,
      "step": 101999
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8810111284255981,
      "learning_rate": 0.00035371040339100606,
      "loss": 2.6868,
      "step": 102000
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5979830026626587,
      "learning_rate": 0.00035370637891602027,
      "loss": 3.0252,
      "step": 102001
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7169718742370605,
      "learning_rate": 0.00035370235443104946,
      "loss": 2.9064,
      "step": 102002
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5307345390319824,
      "learning_rate": 0.0003536983299360943,
      "loss": 3.0147,
      "step": 102003
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8615734577178955,
      "learning_rate": 0.00035369430543115557,
      "loss": 3.1623,
      "step": 102004
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.6825196743011475,
      "learning_rate": 0.0003536902809162341,
      "loss": 2.9174,
      "step": 102005
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6340699195861816,
      "learning_rate": 0.00035368625639133067,
      "loss": 2.9111,
      "step": 102006
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9584629535675049,
      "learning_rate": 0.0003536822318564459,
      "loss": 2.9827,
      "step": 102007
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7821359634399414,
      "learning_rate": 0.00035367820731158057,
      "loss": 2.8978,
      "step": 102008
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6964770555496216,
      "learning_rate": 0.0003536741827567355,
      "loss": 2.954,
      "step": 102009
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9412851333618164,
      "learning_rate": 0.0003536701581919113,
      "loss": 3.0095,
      "step": 102010
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8826258182525635,
      "learning_rate": 0.00035366613361710884,
      "loss": 2.7995,
      "step": 102011
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0502092838287354,
      "learning_rate": 0.0003536621090323288,
      "loss": 2.8349,
      "step": 102012
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8089845180511475,
      "learning_rate": 0.00035365808443757214,
      "loss": 3.2749,
      "step": 102013
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8459501266479492,
      "learning_rate": 0.0003536540598328392,
      "loss": 2.8094,
      "step": 102014
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8206918239593506,
      "learning_rate": 0.00035365003521813104,
      "loss": 2.8569,
      "step": 102015
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7953660488128662,
      "learning_rate": 0.0003536460105934484,
      "loss": 3.2122,
      "step": 102016
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7093777656555176,
      "learning_rate": 0.00035364198595879185,
      "loss": 2.9995,
      "step": 102017
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9790352582931519,
      "learning_rate": 0.0003536379613141623,
      "loss": 3.0718,
      "step": 102018
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.5031468868255615,
      "learning_rate": 0.0003536339366595605,
      "loss": 3.1036,
      "step": 102019
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1732537746429443,
      "learning_rate": 0.00035362991199498707,
      "loss": 3.0456,
      "step": 102020
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.857318639755249,
      "learning_rate": 0.0003536258873204428,
      "loss": 2.8529,
      "step": 102021
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.2088186740875244,
      "learning_rate": 0.0003536218626359285,
      "loss": 2.8015,
      "step": 102022
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.8046274185180664,
      "learning_rate": 0.0003536178379414449,
      "loss": 2.8685,
      "step": 102023
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6618608236312866,
      "learning_rate": 0.00035361381323699263,
      "loss": 3.0303,
      "step": 102024
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6994621753692627,
      "learning_rate": 0.0003536097885225727,
      "loss": 3.1634,
      "step": 102025
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7482751607894897,
      "learning_rate": 0.0003536057637981856,
      "loss": 3.0552,
      "step": 102026
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.416280746459961,
      "learning_rate": 0.00035360173906383215,
      "loss": 2.9552,
      "step": 102027
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.6274402141571045,
      "learning_rate": 0.0003535977143195132,
      "loss": 3.0713,
      "step": 102028
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6926202774047852,
      "learning_rate": 0.0003535936895652294,
      "loss": 3.1915,
      "step": 102029
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0508365631103516,
      "learning_rate": 0.00035358966480098143,
      "loss": 3.0953,
      "step": 102030
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.522308111190796,
      "learning_rate": 0.00035358564002677025,
      "loss": 2.9604,
      "step": 102031
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0593221187591553,
      "learning_rate": 0.00035358161524259645,
      "loss": 2.8989,
      "step": 102032
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6980974674224854,
      "learning_rate": 0.00035357759044846084,
      "loss": 2.9962,
      "step": 102033
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6892545223236084,
      "learning_rate": 0.0003535735656443642,
      "loss": 3.1878,
      "step": 102034
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.2311391830444336,
      "learning_rate": 0.00035356954083030705,
      "loss": 3.1442,
      "step": 102035
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.13388991355896,
      "learning_rate": 0.0003535655160062904,
      "loss": 3.0078,
      "step": 102036
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0802018642425537,
      "learning_rate": 0.00035356149117231496,
      "loss": 2.9901,
      "step": 102037
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0660035610198975,
      "learning_rate": 0.00035355746632838135,
      "loss": 2.8508,
      "step": 102038
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8626083135604858,
      "learning_rate": 0.00035355344147449037,
      "loss": 3.119,
      "step": 102039
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.095303773880005,
      "learning_rate": 0.0003535494166106429,
      "loss": 2.879,
      "step": 102040
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.6521875858306885,
      "learning_rate": 0.00035354539173683956,
      "loss": 3.084,
      "step": 102041
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.9567532539367676,
      "learning_rate": 0.00035354136685308103,
      "loss": 3.1008,
      "step": 102042
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.934799075126648,
      "learning_rate": 0.0003535373419593683,
      "loss": 3.1023,
      "step": 102043
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.58780837059021,
      "learning_rate": 0.00035353331705570185,
      "loss": 3.0014,
      "step": 102044
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.065030574798584,
      "learning_rate": 0.0003535292921420825,
      "loss": 2.9409,
      "step": 102045
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.547666549682617,
      "learning_rate": 0.0003535252672185112,
      "loss": 3.1292,
      "step": 102046
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7478464841842651,
      "learning_rate": 0.0003535212422849885,
      "loss": 3.1039,
      "step": 102047
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7371714115142822,
      "learning_rate": 0.0003535172173415151,
      "loss": 3.0626,
      "step": 102048
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4289162158966064,
      "learning_rate": 0.00035351319238809196,
      "loss": 2.95,
      "step": 102049
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8779022693634033,
      "learning_rate": 0.00035350916742471955,
      "loss": 2.9097,
      "step": 102050
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.998175024986267,
      "learning_rate": 0.00035350514245139893,
      "loss": 3.0485,
      "step": 102051
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.07934308052063,
      "learning_rate": 0.00035350111746813064,
      "loss": 3.1862,
      "step": 102052
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.537883758544922,
      "learning_rate": 0.00035349709247491545,
      "loss": 2.8457,
      "step": 102053
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1014111042022705,
      "learning_rate": 0.0003534930674717541,
      "loss": 2.9456,
      "step": 102054
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.813416838645935,
      "learning_rate": 0.0003534890424586475,
      "loss": 2.893,
      "step": 102055
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7326339483261108,
      "learning_rate": 0.0003534850174355962,
      "loss": 2.9847,
      "step": 102056
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6408811807632446,
      "learning_rate": 0.00035348099240260105,
      "loss": 3.0506,
      "step": 102057
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0456976890563965,
      "learning_rate": 0.0003534769673596628,
      "loss": 3.0511,
      "step": 102058
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0046911239624023,
      "learning_rate": 0.0003534729423067821,
      "loss": 2.9536,
      "step": 102059
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1660139560699463,
      "learning_rate": 0.00035346891724395983,
      "loss": 2.7999,
      "step": 102060
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.348043203353882,
      "learning_rate": 0.0003534648921711966,
      "loss": 2.8646,
      "step": 102061
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.9878621101379395,
      "learning_rate": 0.00035346086708849334,
      "loss": 3.0847,
      "step": 102062
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7734637260437012,
      "learning_rate": 0.0003534568419958507,
      "loss": 2.981,
      "step": 102063
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.2002763748168945,
      "learning_rate": 0.0003534528168932693,
      "loss": 3.0102,
      "step": 102064
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.9015750885009766,
      "learning_rate": 0.00035344879178075014,
      "loss": 3.1172,
      "step": 102065
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7488065958023071,
      "learning_rate": 0.0003534447666582938,
      "loss": 3.0353,
      "step": 102066
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9569637775421143,
      "learning_rate": 0.000353440741525901,
      "loss": 2.943,
      "step": 102067
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8077791929244995,
      "learning_rate": 0.00035343671638357274,
      "loss": 2.7903,
      "step": 102068
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.4924360513687134,
      "learning_rate": 0.00035343269123130943,
      "loss": 3.2104,
      "step": 102069
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7985535860061646,
      "learning_rate": 0.00035342866606911195,
      "loss": 3.0961,
      "step": 102070
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3516082763671875,
      "learning_rate": 0.0003534246408969812,
      "loss": 3.3662,
      "step": 102071
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.5002713203430176,
      "learning_rate": 0.00035342061571491765,
      "loss": 3.0849,
      "step": 102072
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1297523975372314,
      "learning_rate": 0.0003534165905229223,
      "loss": 2.8536,
      "step": 102073
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.746466875076294,
      "learning_rate": 0.00035341256532099585,
      "loss": 3.112,
      "step": 102074
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.919978380203247,
      "learning_rate": 0.0003534085401091389,
      "loss": 3.0171,
      "step": 102075
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6297374963760376,
      "learning_rate": 0.00035340451488735224,
      "loss": 2.8418,
      "step": 102076
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5706170797348022,
      "learning_rate": 0.0003534004896556368,
      "loss": 3.0409,
      "step": 102077
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0861401557922363,
      "learning_rate": 0.00035339646441399317,
      "loss": 3.1346,
      "step": 102078
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5686061382293701,
      "learning_rate": 0.00035339243916242204,
      "loss": 3.0147,
      "step": 102079
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3438665866851807,
      "learning_rate": 0.0003533884139009244,
      "loss": 2.7924,
      "step": 102080
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6722137928009033,
      "learning_rate": 0.00035338438862950075,
      "loss": 3.109,
      "step": 102081
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5054872035980225,
      "learning_rate": 0.0003533803633481519,
      "loss": 2.9757,
      "step": 102082
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6627596616744995,
      "learning_rate": 0.00035337633805687874,
      "loss": 2.969,
      "step": 102083
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.123744249343872,
      "learning_rate": 0.00035337231275568183,
      "loss": 3.2281,
      "step": 102084
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.718764066696167,
      "learning_rate": 0.00035336828744456203,
      "loss": 2.8058,
      "step": 102085
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.265014886856079,
      "learning_rate": 0.00035336426212352,
      "loss": 3.053,
      "step": 102086
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8080370426177979,
      "learning_rate": 0.00035336023679255664,
      "loss": 3.1968,
      "step": 102087
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9030799865722656,
      "learning_rate": 0.00035335621145167256,
      "loss": 2.8146,
      "step": 102088
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6939283609390259,
      "learning_rate": 0.00035335218610086847,
      "loss": 2.8189,
      "step": 102089
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1235554218292236,
      "learning_rate": 0.0003533481607401454,
      "loss": 3.2035,
      "step": 102090
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1535115242004395,
      "learning_rate": 0.0003533441353695037,
      "loss": 3.2046,
      "step": 102091
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.0346524715423584,
      "learning_rate": 0.00035334010998894444,
      "loss": 2.95,
      "step": 102092
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.361377239227295,
      "learning_rate": 0.00035333608459846814,
      "loss": 2.601,
      "step": 102093
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.668197751045227,
      "learning_rate": 0.0003533320591980758,
      "loss": 2.9748,
      "step": 102094
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.7015223503112793,
      "learning_rate": 0.0003533280337877679,
      "loss": 2.7441,
      "step": 102095
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7975006103515625,
      "learning_rate": 0.0003533240083675454,
      "loss": 3.3044,
      "step": 102096
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6818548440933228,
      "learning_rate": 0.00035331998293740887,
      "loss": 3.0005,
      "step": 102097
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.777643084526062,
      "learning_rate": 0.0003533159574973593,
      "loss": 3.1028,
      "step": 102098
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0311434268951416,
      "learning_rate": 0.0003533119320473971,
      "loss": 3.0789,
      "step": 102099
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8367654085159302,
      "learning_rate": 0.00035330790658752334,
      "loss": 3.1164,
      "step": 102100
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5001732110977173,
      "learning_rate": 0.0003533038811177386,
      "loss": 3.1656,
      "step": 102101
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7530118227005005,
      "learning_rate": 0.00035329985563804355,
      "loss": 2.9534,
      "step": 102102
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6013514995574951,
      "learning_rate": 0.0003532958301484392,
      "loss": 3.0154,
      "step": 102103
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0584652423858643,
      "learning_rate": 0.00035329180464892615,
      "loss": 2.8983,
      "step": 102104
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.008009910583496,
      "learning_rate": 0.00035328777913950506,
      "loss": 3.1021,
      "step": 102105
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1709368228912354,
      "learning_rate": 0.00035328375362017676,
      "loss": 2.6933,
      "step": 102106
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8825719356536865,
      "learning_rate": 0.0003532797280909421,
      "loss": 3.1854,
      "step": 102107
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.6488683223724365,
      "learning_rate": 0.0003532757025518017,
      "loss": 3.1169,
      "step": 102108
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1654512882232666,
      "learning_rate": 0.0003532716770027563,
      "loss": 3.147,
      "step": 102109
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.4526869058609009,
      "learning_rate": 0.0003532676514438067,
      "loss": 3.0391,
      "step": 102110
    },
    {
      "epoch": 1.33,
      "grad_norm": 4.121048450469971,
      "learning_rate": 0.0003532636258749536,
      "loss": 3.0789,
      "step": 102111
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.6830899715423584,
      "learning_rate": 0.00035325960029619787,
      "loss": 2.9584,
      "step": 102112
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5921316146850586,
      "learning_rate": 0.00035325557470754015,
      "loss": 3.054,
      "step": 102113
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3120179176330566,
      "learning_rate": 0.00035325154910898117,
      "loss": 3.2196,
      "step": 102114
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5630806684494019,
      "learning_rate": 0.00035324752350052174,
      "loss": 3.0505,
      "step": 102115
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.764222264289856,
      "learning_rate": 0.0003532434978821627,
      "loss": 3.1869,
      "step": 102116
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.770166277885437,
      "learning_rate": 0.00035323947225390453,
      "loss": 2.9897,
      "step": 102117
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.752578616142273,
      "learning_rate": 0.0003532354466157482,
      "loss": 2.9628,
      "step": 102118
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7585629224777222,
      "learning_rate": 0.0003532314209676945,
      "loss": 3.0289,
      "step": 102119
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6900153160095215,
      "learning_rate": 0.0003532273953097439,
      "loss": 3.1777,
      "step": 102120
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.4205334186553955,
      "learning_rate": 0.00035322336964189733,
      "loss": 3.0693,
      "step": 102121
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.454845666885376,
      "learning_rate": 0.00035321934396415565,
      "loss": 3.2772,
      "step": 102122
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.5821144580841064,
      "learning_rate": 0.0003532153182765195,
      "loss": 2.8362,
      "step": 102123
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.282914400100708,
      "learning_rate": 0.0003532112925789895,
      "loss": 3.0096,
      "step": 102124
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.108215808868408,
      "learning_rate": 0.00035320726687156653,
      "loss": 2.8921,
      "step": 102125
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6271131038665771,
      "learning_rate": 0.0003532032411542514,
      "loss": 3.0234,
      "step": 102126
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.327791213989258,
      "learning_rate": 0.0003531992154270447,
      "loss": 2.6701,
      "step": 102127
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.943856954574585,
      "learning_rate": 0.0003531951896899473,
      "loss": 2.9951,
      "step": 102128
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.599024534225464,
      "learning_rate": 0.00035319116394296,
      "loss": 2.7922,
      "step": 102129
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7004956007003784,
      "learning_rate": 0.0003531871381860833,
      "loss": 2.988,
      "step": 102130
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.627387523651123,
      "learning_rate": 0.0003531831124193182,
      "loss": 3.0565,
      "step": 102131
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.867521286010742,
      "learning_rate": 0.0003531790866426654,
      "loss": 2.762,
      "step": 102132
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.74820077419281,
      "learning_rate": 0.0003531750608561255,
      "loss": 2.9461,
      "step": 102133
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7704778909683228,
      "learning_rate": 0.0003531710350596994,
      "loss": 3.0829,
      "step": 102134
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9553773403167725,
      "learning_rate": 0.0003531670092533878,
      "loss": 3.3261,
      "step": 102135
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1911842823028564,
      "learning_rate": 0.0003531629834371914,
      "loss": 2.973,
      "step": 102136
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5996891260147095,
      "learning_rate": 0.000353158957611111,
      "loss": 3.0748,
      "step": 102137
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8733787536621094,
      "learning_rate": 0.0003531549317751475,
      "loss": 3.1349,
      "step": 102138
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9393415451049805,
      "learning_rate": 0.00035315090592930135,
      "loss": 2.9152,
      "step": 102139
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9863190650939941,
      "learning_rate": 0.00035314688007357346,
      "loss": 3.2298,
      "step": 102140
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9562894105911255,
      "learning_rate": 0.00035314285420796464,
      "loss": 3.004,
      "step": 102141
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5741108655929565,
      "learning_rate": 0.00035313882833247544,
      "loss": 2.8195,
      "step": 102142
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8096466064453125,
      "learning_rate": 0.00035313480244710677,
      "loss": 2.9538,
      "step": 102143
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8291572332382202,
      "learning_rate": 0.0003531307765518594,
      "loss": 3.0633,
      "step": 102144
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7442742586135864,
      "learning_rate": 0.000353126750646734,
      "loss": 3.049,
      "step": 102145
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0568156242370605,
      "learning_rate": 0.00035312272473173126,
      "loss": 2.9267,
      "step": 102146
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5426238775253296,
      "learning_rate": 0.0003531186988068521,
      "loss": 3.0662,
      "step": 102147
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.781316876411438,
      "learning_rate": 0.00035311467287209704,
      "loss": 2.9672,
      "step": 102148
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7156696319580078,
      "learning_rate": 0.00035311064692746707,
      "loss": 3.0113,
      "step": 102149
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9177676439285278,
      "learning_rate": 0.00035310662097296283,
      "loss": 3.2703,
      "step": 102150
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.260035276412964,
      "learning_rate": 0.000353102595008585,
      "loss": 3.0319,
      "step": 102151
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7738780975341797,
      "learning_rate": 0.0003530985690343344,
      "loss": 2.9829,
      "step": 102152
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.048443078994751,
      "learning_rate": 0.00035309454305021187,
      "loss": 3.0597,
      "step": 102153
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.621645450592041,
      "learning_rate": 0.00035309051705621796,
      "loss": 3.0512,
      "step": 102154
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8120578527450562,
      "learning_rate": 0.0003530864910523535,
      "loss": 2.987,
      "step": 102155
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6925565004348755,
      "learning_rate": 0.00035308246503861943,
      "loss": 3.126,
      "step": 102156
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.297750949859619,
      "learning_rate": 0.0003530784390150162,
      "loss": 2.8187,
      "step": 102157
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8468502759933472,
      "learning_rate": 0.0003530744129815446,
      "loss": 2.8557,
      "step": 102158
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7275331020355225,
      "learning_rate": 0.0003530703869382057,
      "loss": 2.6717,
      "step": 102159
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7993525266647339,
      "learning_rate": 0.00035306636088499986,
      "loss": 3.1764,
      "step": 102160
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9431090354919434,
      "learning_rate": 0.0003530623348219279,
      "loss": 3.0125,
      "step": 102161
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.290058135986328,
      "learning_rate": 0.0003530583087489908,
      "loss": 3.0055,
      "step": 102162
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7023723125457764,
      "learning_rate": 0.0003530542826661891,
      "loss": 2.9436,
      "step": 102163
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.213716745376587,
      "learning_rate": 0.0003530502565735236,
      "loss": 3.1836,
      "step": 102164
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.454655408859253,
      "learning_rate": 0.00035304623047099513,
      "loss": 2.9558,
      "step": 102165
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.451690196990967,
      "learning_rate": 0.0003530422043586043,
      "loss": 3.0229,
      "step": 102166
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7346515655517578,
      "learning_rate": 0.0003530381782363519,
      "loss": 3.0157,
      "step": 102167
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.043672561645508,
      "learning_rate": 0.00035303415210423877,
      "loss": 2.967,
      "step": 102168
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.784285545349121,
      "learning_rate": 0.0003530301259622655,
      "loss": 2.8709,
      "step": 102169
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9240728616714478,
      "learning_rate": 0.000353026099810433,
      "loss": 2.8777,
      "step": 102170
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8638380765914917,
      "learning_rate": 0.000353022073648742,
      "loss": 3.0945,
      "step": 102171
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8489274978637695,
      "learning_rate": 0.00035301804747719305,
      "loss": 3.0732,
      "step": 102172
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.1691372394561768,
      "learning_rate": 0.0003530140212957871,
      "loss": 2.9466,
      "step": 102173
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.4822081327438354,
      "learning_rate": 0.0003530099951045248,
      "loss": 2.7644,
      "step": 102174
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7108287811279297,
      "learning_rate": 0.0003530059689034071,
      "loss": 2.8868,
      "step": 102175
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.98153817653656,
      "learning_rate": 0.00035300194269243445,
      "loss": 3.1211,
      "step": 102176
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.3806018829345703,
      "learning_rate": 0.0003529979164716078,
      "loss": 2.8947,
      "step": 102177
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7216036319732666,
      "learning_rate": 0.0003529938902409277,
      "loss": 3.1454,
      "step": 102178
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.545648217201233,
      "learning_rate": 0.0003529898640003952,
      "loss": 3.0063,
      "step": 102179
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.6414248943328857,
      "learning_rate": 0.00035298583775001085,
      "loss": 2.9975,
      "step": 102180
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7774088382720947,
      "learning_rate": 0.0003529818114897754,
      "loss": 3.193,
      "step": 102181
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7866767644882202,
      "learning_rate": 0.0003529777852196897,
      "loss": 2.8333,
      "step": 102182
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7596515417099,
      "learning_rate": 0.00035297375893975433,
      "loss": 3.0039,
      "step": 102183
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.340733766555786,
      "learning_rate": 0.0003529697326499702,
      "loss": 3.0499,
      "step": 102184
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.1621315479278564,
      "learning_rate": 0.0003529657063503379,
      "loss": 3.0065,
      "step": 102185
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.760068416595459,
      "learning_rate": 0.0003529616800408584,
      "loss": 3.1005,
      "step": 102186
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.05694842338562,
      "learning_rate": 0.0003529576537215323,
      "loss": 2.95,
      "step": 102187
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.4836606979370117,
      "learning_rate": 0.0003529536273923604,
      "loss": 2.8164,
      "step": 102188
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4472789764404297,
      "learning_rate": 0.00035294960105334335,
      "loss": 2.7604,
      "step": 102189
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.9450578689575195,
      "learning_rate": 0.000352945574704482,
      "loss": 2.9113,
      "step": 102190
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.082331418991089,
      "learning_rate": 0.00035294154834577705,
      "loss": 3.1167,
      "step": 102191
    },
    {
      "epoch": 1.33,
      "grad_norm": 4.7784271240234375,
      "learning_rate": 0.00035293752197722923,
      "loss": 3.1357,
      "step": 102192
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.7558531761169434,
      "learning_rate": 0.00035293349559883946,
      "loss": 3.0722,
      "step": 102193
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8177669048309326,
      "learning_rate": 0.00035292946921060823,
      "loss": 3.1245,
      "step": 102194
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5902154445648193,
      "learning_rate": 0.0003529254428125364,
      "loss": 2.8303,
      "step": 102195
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4655463695526123,
      "learning_rate": 0.0003529214164046249,
      "loss": 2.8974,
      "step": 102196
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.202659845352173,
      "learning_rate": 0.00035291738998687413,
      "loss": 2.9721,
      "step": 102197
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8026022911071777,
      "learning_rate": 0.00035291336355928505,
      "loss": 3.0582,
      "step": 102198
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8134515285491943,
      "learning_rate": 0.0003529093371218584,
      "loss": 3.2724,
      "step": 102199
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.860622763633728,
      "learning_rate": 0.000352905310674595,
      "loss": 2.9579,
      "step": 102200
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8233343362808228,
      "learning_rate": 0.0003529012842174953,
      "loss": 3.0899,
      "step": 102201
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.6451046466827393,
      "learning_rate": 0.0003528972577505605,
      "loss": 3.0146,
      "step": 102202
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0300650596618652,
      "learning_rate": 0.0003528932312737909,
      "loss": 2.9937,
      "step": 102203
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.696587324142456,
      "learning_rate": 0.00035288920478718754,
      "loss": 3.0556,
      "step": 102204
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1975858211517334,
      "learning_rate": 0.0003528851782907511,
      "loss": 2.844,
      "step": 102205
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.119166851043701,
      "learning_rate": 0.0003528811517844822,
      "loss": 3.0837,
      "step": 102206
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.135891914367676,
      "learning_rate": 0.0003528771252683818,
      "loss": 2.9711,
      "step": 102207
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.5120339393615723,
      "learning_rate": 0.0003528730987424505,
      "loss": 3.1931,
      "step": 102208
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9247044324874878,
      "learning_rate": 0.0003528690722066891,
      "loss": 3.2282,
      "step": 102209
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7827116250991821,
      "learning_rate": 0.00035286504566109833,
      "loss": 2.9753,
      "step": 102210
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0820703506469727,
      "learning_rate": 0.000352861019105679,
      "loss": 3.1492,
      "step": 102211
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1337573528289795,
      "learning_rate": 0.00035285699254043176,
      "loss": 3.1023,
      "step": 102212
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.49896240234375,
      "learning_rate": 0.0003528529659653574,
      "loss": 2.7691,
      "step": 102213
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3532040119171143,
      "learning_rate": 0.0003528489393804567,
      "loss": 3.0677,
      "step": 102214
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.138331413269043,
      "learning_rate": 0.00035284491278573035,
      "loss": 2.9055,
      "step": 102215
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9497531652450562,
      "learning_rate": 0.0003528408861811792,
      "loss": 2.8823,
      "step": 102216
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.098630428314209,
      "learning_rate": 0.00035283685956680395,
      "loss": 3.1711,
      "step": 102217
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.1151089668273926,
      "learning_rate": 0.00035283283294260516,
      "loss": 2.784,
      "step": 102218
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.089592933654785,
      "learning_rate": 0.0003528288063085839,
      "loss": 3.3208,
      "step": 102219
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9845166206359863,
      "learning_rate": 0.00035282477966474076,
      "loss": 2.8958,
      "step": 102220
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.930689811706543,
      "learning_rate": 0.00035282075301107645,
      "loss": 3.197,
      "step": 102221
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6217676401138306,
      "learning_rate": 0.0003528167263475918,
      "loss": 3.1327,
      "step": 102222
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5790159702301025,
      "learning_rate": 0.0003528126996742875,
      "loss": 3.2269,
      "step": 102223
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.966064453125,
      "learning_rate": 0.00035280867299116434,
      "loss": 3.1231,
      "step": 102224
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8856425285339355,
      "learning_rate": 0.00035280464629822297,
      "loss": 3.0414,
      "step": 102225
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1192729473114014,
      "learning_rate": 0.00035280061959546436,
      "loss": 2.8219,
      "step": 102226
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.490659713745117,
      "learning_rate": 0.00035279659288288905,
      "loss": 3.0364,
      "step": 102227
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.5457236766815186,
      "learning_rate": 0.00035279256616049776,
      "loss": 3.0037,
      "step": 102228
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9992207288742065,
      "learning_rate": 0.0003527885394282915,
      "loss": 2.8662,
      "step": 102229
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0401105880737305,
      "learning_rate": 0.00035278451268627075,
      "loss": 2.9683,
      "step": 102230
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.130398988723755,
      "learning_rate": 0.0003527804859344364,
      "loss": 2.9423,
      "step": 102231
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.644501209259033,
      "learning_rate": 0.00035277645917278916,
      "loss": 3.0971,
      "step": 102232
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.911986231803894,
      "learning_rate": 0.00035277243240132975,
      "loss": 3.0345,
      "step": 102233
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.847086787223816,
      "learning_rate": 0.000352768405620059,
      "loss": 3.2157,
      "step": 102234
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3620481491088867,
      "learning_rate": 0.0003527643788289776,
      "loss": 2.8666,
      "step": 102235
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.909747838973999,
      "learning_rate": 0.0003527603520280863,
      "loss": 3.059,
      "step": 102236
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3511390686035156,
      "learning_rate": 0.0003527563252173858,
      "loss": 2.9485,
      "step": 102237
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3799684047698975,
      "learning_rate": 0.00035275229839687705,
      "loss": 3.2288,
      "step": 102238
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8064849376678467,
      "learning_rate": 0.00035274827156656053,
      "loss": 3.141,
      "step": 102239
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9856432676315308,
      "learning_rate": 0.00035274424472643703,
      "loss": 2.9518,
      "step": 102240
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8998908996582031,
      "learning_rate": 0.0003527402178765075,
      "loss": 2.8465,
      "step": 102241
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.997767925262451,
      "learning_rate": 0.0003527361910167726,
      "loss": 3.0416,
      "step": 102242
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.96723473072052,
      "learning_rate": 0.000352732164147233,
      "loss": 2.5967,
      "step": 102243
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.773579478263855,
      "learning_rate": 0.00035272813726788945,
      "loss": 2.7763,
      "step": 102244
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6143138408660889,
      "learning_rate": 0.0003527241103787428,
      "loss": 3.1678,
      "step": 102245
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.079683303833008,
      "learning_rate": 0.0003527200834797937,
      "loss": 3.3107,
      "step": 102246
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9918560981750488,
      "learning_rate": 0.000352716056571043,
      "loss": 3.0738,
      "step": 102247
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.978451132774353,
      "learning_rate": 0.0003527120296524914,
      "loss": 2.8187,
      "step": 102248
    },
    {
      "epoch": 1.33,
      "grad_norm": 4.4461822509765625,
      "learning_rate": 0.0003527080027241395,
      "loss": 3.0163,
      "step": 102249
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.2824318408966064,
      "learning_rate": 0.0003527039757859883,
      "loss": 3.3411,
      "step": 102250
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7692973613739014,
      "learning_rate": 0.0003526999488380384,
      "loss": 3.1101,
      "step": 102251
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.4945106506347656,
      "learning_rate": 0.0003526959218802906,
      "loss": 3.1812,
      "step": 102252
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8310949802398682,
      "learning_rate": 0.00035269189491274566,
      "loss": 2.9298,
      "step": 102253
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5776573419570923,
      "learning_rate": 0.0003526878679354043,
      "loss": 3.0099,
      "step": 102254
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7885957956314087,
      "learning_rate": 0.0003526838409482672,
      "loss": 2.7893,
      "step": 102255
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.375077486038208,
      "learning_rate": 0.00035267981395133526,
      "loss": 2.7445,
      "step": 102256
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.761112093925476,
      "learning_rate": 0.00035267578694460913,
      "loss": 2.7857,
      "step": 102257
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7722419500350952,
      "learning_rate": 0.00035267175992808957,
      "loss": 2.8133,
      "step": 102258
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.5615856647491455,
      "learning_rate": 0.0003526677329017773,
      "loss": 2.9663,
      "step": 102259
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.615579128265381,
      "learning_rate": 0.00035266370586567317,
      "loss": 2.8385,
      "step": 102260
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.803353190422058,
      "learning_rate": 0.0003526596788197778,
      "loss": 3.0179,
      "step": 102261
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.704420804977417,
      "learning_rate": 0.00035265565176409203,
      "loss": 2.9728,
      "step": 102262
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.5093932151794434,
      "learning_rate": 0.0003526516246986166,
      "loss": 3.0366,
      "step": 102263
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3510468006134033,
      "learning_rate": 0.0003526475976233522,
      "loss": 2.8334,
      "step": 102264
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.994724988937378,
      "learning_rate": 0.0003526435705382996,
      "loss": 3.1034,
      "step": 102265
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7644410133361816,
      "learning_rate": 0.0003526395434434596,
      "loss": 2.8918,
      "step": 102266
    },
    {
      "epoch": 1.33,
      "grad_norm": 4.300915241241455,
      "learning_rate": 0.000352635516338833,
      "loss": 2.6761,
      "step": 102267
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8378998041152954,
      "learning_rate": 0.00035263148922442026,
      "loss": 3.071,
      "step": 102268
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.647838830947876,
      "learning_rate": 0.0003526274621002225,
      "loss": 3.0097,
      "step": 102269
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.169126510620117,
      "learning_rate": 0.00035262343496624026,
      "loss": 2.7919,
      "step": 102270
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9211783409118652,
      "learning_rate": 0.00035261940782247434,
      "loss": 2.9199,
      "step": 102271
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7696889638900757,
      "learning_rate": 0.00035261538066892544,
      "loss": 3.2108,
      "step": 102272
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0314502716064453,
      "learning_rate": 0.0003526113535055944,
      "loss": 2.8995,
      "step": 102273
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7842313051223755,
      "learning_rate": 0.0003526073263324819,
      "loss": 3.0233,
      "step": 102274
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3074734210968018,
      "learning_rate": 0.0003526032991495886,
      "loss": 3.0263,
      "step": 102275
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9425960779190063,
      "learning_rate": 0.0003525992719569155,
      "loss": 3.0972,
      "step": 102276
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.091144323348999,
      "learning_rate": 0.0003525952447544631,
      "loss": 2.855,
      "step": 102277
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.856141209602356,
      "learning_rate": 0.00035259121754223236,
      "loss": 3.0378,
      "step": 102278
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7480021715164185,
      "learning_rate": 0.0003525871903202238,
      "loss": 3.0988,
      "step": 102279
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.953089952468872,
      "learning_rate": 0.0003525831630884383,
      "loss": 3.1459,
      "step": 102280
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6239572763442993,
      "learning_rate": 0.0003525791358468767,
      "loss": 2.8995,
      "step": 102281
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9167983531951904,
      "learning_rate": 0.0003525751085955396,
      "loss": 3.0307,
      "step": 102282
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7618805170059204,
      "learning_rate": 0.0003525710813344278,
      "loss": 2.9018,
      "step": 102283
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5521299839019775,
      "learning_rate": 0.0003525670540635421,
      "loss": 3.4037,
      "step": 102284
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0524489879608154,
      "learning_rate": 0.00035256302678288306,
      "loss": 2.963,
      "step": 102285
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.313001871109009,
      "learning_rate": 0.00035255899949245163,
      "loss": 2.8348,
      "step": 102286
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9843213558197021,
      "learning_rate": 0.0003525549721922485,
      "loss": 3.0432,
      "step": 102287
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3935201168060303,
      "learning_rate": 0.0003525509448822744,
      "loss": 3.1013,
      "step": 102288
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.936930775642395,
      "learning_rate": 0.00035254691756253005,
      "loss": 2.9149,
      "step": 102289
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8446234464645386,
      "learning_rate": 0.0003525428902330163,
      "loss": 3.0465,
      "step": 102290
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5879878997802734,
      "learning_rate": 0.00035253886289373374,
      "loss": 2.8239,
      "step": 102291
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0389113426208496,
      "learning_rate": 0.0003525348355446833,
      "loss": 3.0152,
      "step": 102292
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9588394165039062,
      "learning_rate": 0.00035253080818586566,
      "loss": 2.9234,
      "step": 102293
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5953030586242676,
      "learning_rate": 0.00035252678081728147,
      "loss": 2.9802,
      "step": 102294
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9321672916412354,
      "learning_rate": 0.00035252275343893155,
      "loss": 3.1197,
      "step": 102295
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1537516117095947,
      "learning_rate": 0.00035251872605081676,
      "loss": 2.8246,
      "step": 102296
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9572447538375854,
      "learning_rate": 0.0003525146986529377,
      "loss": 2.9139,
      "step": 102297
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9586364030838013,
      "learning_rate": 0.00035251067124529513,
      "loss": 3.1273,
      "step": 102298
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7415121793746948,
      "learning_rate": 0.0003525066438278899,
      "loss": 3.1604,
      "step": 102299
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6522650718688965,
      "learning_rate": 0.00035250261640072265,
      "loss": 3.0696,
      "step": 102300
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6827510595321655,
      "learning_rate": 0.00035249858896379416,
      "loss": 2.8684,
      "step": 102301
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.172464609146118,
      "learning_rate": 0.00035249456151710524,
      "loss": 2.9299,
      "step": 102302
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.719722032546997,
      "learning_rate": 0.00035249053406065655,
      "loss": 2.8494,
      "step": 102303
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.725890874862671,
      "learning_rate": 0.0003524865065944489,
      "loss": 2.9384,
      "step": 102304
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.5361344814300537,
      "learning_rate": 0.000352482479118483,
      "loss": 2.9213,
      "step": 102305
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.2525546550750732,
      "learning_rate": 0.00035247845163275966,
      "loss": 3.4331,
      "step": 102306
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8613930940628052,
      "learning_rate": 0.0003524744241372795,
      "loss": 2.9911,
      "step": 102307
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.466547966003418,
      "learning_rate": 0.00035247039663204347,
      "loss": 2.9607,
      "step": 102308
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0700507164001465,
      "learning_rate": 0.0003524663691170521,
      "loss": 2.979,
      "step": 102309
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9880377054214478,
      "learning_rate": 0.00035246234159230625,
      "loss": 3.3844,
      "step": 102310
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.720935344696045,
      "learning_rate": 0.0003524583140578067,
      "loss": 2.9727,
      "step": 102311
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5565685033798218,
      "learning_rate": 0.0003524542865135542,
      "loss": 3.1414,
      "step": 102312
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.910591721534729,
      "learning_rate": 0.0003524502589595494,
      "loss": 3.095,
      "step": 102313
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0383689403533936,
      "learning_rate": 0.00035244623139579317,
      "loss": 3.0065,
      "step": 102314
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7175434827804565,
      "learning_rate": 0.00035244220382228605,
      "loss": 3.0707,
      "step": 102315
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.230642557144165,
      "learning_rate": 0.000352438176239029,
      "loss": 3.0018,
      "step": 102316
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9371048212051392,
      "learning_rate": 0.0003524341486460228,
      "loss": 2.7047,
      "step": 102317
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8495548963546753,
      "learning_rate": 0.000352430121043268,
      "loss": 3.0798,
      "step": 102318
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.7617359161376953,
      "learning_rate": 0.0003524260934307655,
      "loss": 3.1857,
      "step": 102319
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7797260284423828,
      "learning_rate": 0.000352422065808516,
      "loss": 2.9385,
      "step": 102320
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8625068664550781,
      "learning_rate": 0.00035241803817652017,
      "loss": 3.0189,
      "step": 102321
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.007502794265747,
      "learning_rate": 0.00035241401053477895,
      "loss": 3.1669,
      "step": 102322
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.453498601913452,
      "learning_rate": 0.00035240998288329296,
      "loss": 3.0916,
      "step": 102323
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.292412042617798,
      "learning_rate": 0.0003524059552220629,
      "loss": 3.0283,
      "step": 102324
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.487636089324951,
      "learning_rate": 0.00035240192755108955,
      "loss": 3.0697,
      "step": 102325
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4829084873199463,
      "learning_rate": 0.0003523978998703737,
      "loss": 2.7663,
      "step": 102326
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.634787082672119,
      "learning_rate": 0.00035239387217991623,
      "loss": 2.9963,
      "step": 102327
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.085676431655884,
      "learning_rate": 0.0003523898444797176,
      "loss": 2.7429,
      "step": 102328
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.39560604095459,
      "learning_rate": 0.0003523858167697788,
      "loss": 2.7929,
      "step": 102329
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.252732276916504,
      "learning_rate": 0.0003523817890501005,
      "loss": 3.1158,
      "step": 102330
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7606384754180908,
      "learning_rate": 0.0003523777613206833,
      "loss": 3.0815,
      "step": 102331
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.5528321266174316,
      "learning_rate": 0.0003523737335815282,
      "loss": 2.7996,
      "step": 102332
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.732131004333496,
      "learning_rate": 0.00035236970583263594,
      "loss": 2.8591,
      "step": 102333
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3618695735931396,
      "learning_rate": 0.000352365678074007,
      "loss": 3.0745,
      "step": 102334
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0688893795013428,
      "learning_rate": 0.00035236165030564227,
      "loss": 3.0555,
      "step": 102335
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0496268272399902,
      "learning_rate": 0.0003523576225275426,
      "loss": 2.9258,
      "step": 102336
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4817628860473633,
      "learning_rate": 0.0003523535947397087,
      "loss": 3.1837,
      "step": 102337
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3450047969818115,
      "learning_rate": 0.00035234956694214114,
      "loss": 2.7815,
      "step": 102338
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7747300863265991,
      "learning_rate": 0.00035234553913484094,
      "loss": 3.1795,
      "step": 102339
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.9446215629577637,
      "learning_rate": 0.0003523415113178086,
      "loss": 2.8104,
      "step": 102340
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.116793155670166,
      "learning_rate": 0.0003523374834910451,
      "loss": 2.9224,
      "step": 102341
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.857565999031067,
      "learning_rate": 0.00035233345565455107,
      "loss": 2.9671,
      "step": 102342
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8397234678268433,
      "learning_rate": 0.00035232942780832714,
      "loss": 2.8839,
      "step": 102343
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.999048948287964,
      "learning_rate": 0.00035232539995237424,
      "loss": 2.839,
      "step": 102344
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9716719388961792,
      "learning_rate": 0.0003523213720866931,
      "loss": 2.9285,
      "step": 102345
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6450340747833252,
      "learning_rate": 0.00035231734421128447,
      "loss": 2.7548,
      "step": 102346
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3802359104156494,
      "learning_rate": 0.00035231331632614896,
      "loss": 3.1122,
      "step": 102347
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.703315496444702,
      "learning_rate": 0.0003523092884312875,
      "loss": 2.9838,
      "step": 102348
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.2054970264434814,
      "learning_rate": 0.0003523052605267007,
      "loss": 2.8721,
      "step": 102349
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.832271933555603,
      "learning_rate": 0.0003523012326123894,
      "loss": 2.8792,
      "step": 102350
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9495649337768555,
      "learning_rate": 0.00035229720468835427,
      "loss": 3.0435,
      "step": 102351
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.3537704944610596,
      "learning_rate": 0.00035229317675459614,
      "loss": 3.0534,
      "step": 102352
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.370544672012329,
      "learning_rate": 0.00035228914881111565,
      "loss": 2.9568,
      "step": 102353
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7795395851135254,
      "learning_rate": 0.00035228512085791366,
      "loss": 3.1458,
      "step": 102354
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4035797119140625,
      "learning_rate": 0.00035228109289499093,
      "loss": 3.1813,
      "step": 102355
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.7229416370391846,
      "learning_rate": 0.00035227706492234806,
      "loss": 3.1388,
      "step": 102356
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.378228187561035,
      "learning_rate": 0.00035227303693998596,
      "loss": 3.3125,
      "step": 102357
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8761826753616333,
      "learning_rate": 0.0003522690089479053,
      "loss": 2.9942,
      "step": 102358
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.097681760787964,
      "learning_rate": 0.00035226498094610687,
      "loss": 2.8072,
      "step": 102359
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9979256391525269,
      "learning_rate": 0.0003522609529345914,
      "loss": 2.989,
      "step": 102360
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7253462076187134,
      "learning_rate": 0.00035225692491335956,
      "loss": 2.8824,
      "step": 102361
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3501663208007812,
      "learning_rate": 0.00035225289688241225,
      "loss": 2.7686,
      "step": 102362
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.686323642730713,
      "learning_rate": 0.00035224886884175006,
      "loss": 2.8082,
      "step": 102363
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.891854166984558,
      "learning_rate": 0.0003522448407913739,
      "loss": 2.9619,
      "step": 102364
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9678449630737305,
      "learning_rate": 0.0003522408127312844,
      "loss": 2.8864,
      "step": 102365
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.030543565750122,
      "learning_rate": 0.00035223678466148235,
      "loss": 2.9928,
      "step": 102366
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.7938992977142334,
      "learning_rate": 0.0003522327565819685,
      "loss": 3.0212,
      "step": 102367
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6660139560699463,
      "learning_rate": 0.0003522287284927436,
      "loss": 2.9796,
      "step": 102368
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.2078821659088135,
      "learning_rate": 0.0003522247003938084,
      "loss": 2.9563,
      "step": 102369
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.560833692550659,
      "learning_rate": 0.0003522206722851636,
      "loss": 2.975,
      "step": 102370
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8679670095443726,
      "learning_rate": 0.00035221664416681004,
      "loss": 3.2031,
      "step": 102371
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.155579090118408,
      "learning_rate": 0.00035221261603874846,
      "loss": 2.8401,
      "step": 102372
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4180777072906494,
      "learning_rate": 0.0003522085879009795,
      "loss": 3.0326,
      "step": 102373
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.2381439208984375,
      "learning_rate": 0.0003522045597535039,
      "loss": 2.8277,
      "step": 102374
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.049837112426758,
      "learning_rate": 0.0003522005315963226,
      "loss": 3.177,
      "step": 102375
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3186662197113037,
      "learning_rate": 0.00035219650342943625,
      "loss": 3.2322,
      "step": 102376
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0277981758117676,
      "learning_rate": 0.0003521924752528455,
      "loss": 3.0606,
      "step": 102377
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0936992168426514,
      "learning_rate": 0.00035218844706655134,
      "loss": 3.0953,
      "step": 102378
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.2417995929718018,
      "learning_rate": 0.0003521844188705542,
      "loss": 2.9609,
      "step": 102379
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9689042568206787,
      "learning_rate": 0.000352180390664855,
      "loss": 3.0745,
      "step": 102380
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8699514865875244,
      "learning_rate": 0.0003521763624494546,
      "loss": 3.097,
      "step": 102381
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9449094533920288,
      "learning_rate": 0.0003521723342243536,
      "loss": 2.9349,
      "step": 102382
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1858015060424805,
      "learning_rate": 0.00035216830598955263,
      "loss": 2.9576,
      "step": 102383
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.737194538116455,
      "learning_rate": 0.00035216427774505273,
      "loss": 3.0878,
      "step": 102384
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9912958145141602,
      "learning_rate": 0.0003521602494908545,
      "loss": 2.8397,
      "step": 102385
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9074610471725464,
      "learning_rate": 0.00035215622122695863,
      "loss": 2.8775,
      "step": 102386
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.2812366485595703,
      "learning_rate": 0.000352152192953366,
      "loss": 3.0917,
      "step": 102387
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8354253768920898,
      "learning_rate": 0.00035214816467007726,
      "loss": 2.95,
      "step": 102388
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7062519788742065,
      "learning_rate": 0.00035214413637709314,
      "loss": 3.3874,
      "step": 102389
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4253365993499756,
      "learning_rate": 0.00035214010807441457,
      "loss": 3.1963,
      "step": 102390
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7173033952713013,
      "learning_rate": 0.00035213607976204204,
      "loss": 2.9827,
      "step": 102391
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8205416202545166,
      "learning_rate": 0.0003521320514399765,
      "loss": 2.8645,
      "step": 102392
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9544612169265747,
      "learning_rate": 0.00035212802310821857,
      "loss": 3.1153,
      "step": 102393
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7201367616653442,
      "learning_rate": 0.00035212399476676915,
      "loss": 2.7846,
      "step": 102394
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5407483577728271,
      "learning_rate": 0.0003521199664156288,
      "loss": 3.1718,
      "step": 102395
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3042235374450684,
      "learning_rate": 0.00035211593805479835,
      "loss": 2.8583,
      "step": 102396
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4436841011047363,
      "learning_rate": 0.00035211190968427865,
      "loss": 3.1916,
      "step": 102397
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7882989645004272,
      "learning_rate": 0.00035210788130407035,
      "loss": 2.9834,
      "step": 102398
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.6260526180267334,
      "learning_rate": 0.0003521038529141741,
      "loss": 3.1708,
      "step": 102399
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.064789295196533,
      "learning_rate": 0.00035209982451459097,
      "loss": 2.9231,
      "step": 102400
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7974404096603394,
      "learning_rate": 0.00035209579610532134,
      "loss": 3.0766,
      "step": 102401
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3012447357177734,
      "learning_rate": 0.0003520917676863661,
      "loss": 2.9598,
      "step": 102402
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.947543144226074,
      "learning_rate": 0.00035208773925772617,
      "loss": 2.9075,
      "step": 102403
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7727290391921997,
      "learning_rate": 0.000352083710819402,
      "loss": 2.8985,
      "step": 102404
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.8237295150756836,
      "learning_rate": 0.00035207968237139457,
      "loss": 2.9313,
      "step": 102405
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.273442506790161,
      "learning_rate": 0.0003520756539137045,
      "loss": 2.8106,
      "step": 102406
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.2209930419921875,
      "learning_rate": 0.00035207162544633255,
      "loss": 2.9839,
      "step": 102407
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6616872549057007,
      "learning_rate": 0.0003520675969692795,
      "loss": 3.07,
      "step": 102408
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.149555206298828,
      "learning_rate": 0.0003520635684825462,
      "loss": 2.9329,
      "step": 102409
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.841356873512268,
      "learning_rate": 0.0003520595399861333,
      "loss": 2.8802,
      "step": 102410
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9362679719924927,
      "learning_rate": 0.00035205551148004147,
      "loss": 3.0237,
      "step": 102411
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.650267481803894,
      "learning_rate": 0.00035205148296427164,
      "loss": 2.7773,
      "step": 102412
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9916455745697021,
      "learning_rate": 0.0003520474544388243,
      "loss": 2.9613,
      "step": 102413
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6274298429489136,
      "learning_rate": 0.0003520434259037004,
      "loss": 3.0215,
      "step": 102414
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5839444398880005,
      "learning_rate": 0.00035203939735890073,
      "loss": 3.0288,
      "step": 102415
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.832319498062134,
      "learning_rate": 0.00035203536880442585,
      "loss": 2.7237,
      "step": 102416
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.611283779144287,
      "learning_rate": 0.00035203134024027664,
      "loss": 2.9027,
      "step": 102417
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6786826848983765,
      "learning_rate": 0.0003520273116664539,
      "loss": 2.8069,
      "step": 102418
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.537360668182373,
      "learning_rate": 0.00035202328308295825,
      "loss": 2.859,
      "step": 102419
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.5156760215759277,
      "learning_rate": 0.0003520192544897905,
      "loss": 2.9477,
      "step": 102420
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8381078243255615,
      "learning_rate": 0.0003520152258869514,
      "loss": 2.8895,
      "step": 102421
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9872303009033203,
      "learning_rate": 0.00035201119727444163,
      "loss": 2.8669,
      "step": 102422
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.825951337814331,
      "learning_rate": 0.000352007168652262,
      "loss": 3.0826,
      "step": 102423
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9259973764419556,
      "learning_rate": 0.0003520031400204134,
      "loss": 2.8729,
      "step": 102424
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7305642366409302,
      "learning_rate": 0.00035199911137889626,
      "loss": 2.9865,
      "step": 102425
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5456266403198242,
      "learning_rate": 0.00035199508272771153,
      "loss": 2.9911,
      "step": 102426
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.189534902572632,
      "learning_rate": 0.00035199105406686005,
      "loss": 3.0142,
      "step": 102427
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.276960849761963,
      "learning_rate": 0.0003519870253963423,
      "loss": 3.207,
      "step": 102428
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9733893871307373,
      "learning_rate": 0.00035198299671615924,
      "loss": 2.9531,
      "step": 102429
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.5239076614379883,
      "learning_rate": 0.0003519789680263116,
      "loss": 3.2539,
      "step": 102430
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.606279969215393,
      "learning_rate": 0.00035197493932680006,
      "loss": 2.951,
      "step": 102431
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7026911973953247,
      "learning_rate": 0.00035197091061762537,
      "loss": 2.8715,
      "step": 102432
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.831129550933838,
      "learning_rate": 0.0003519668818987884,
      "loss": 2.7984,
      "step": 102433
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5806233882904053,
      "learning_rate": 0.0003519628531702897,
      "loss": 2.9717,
      "step": 102434
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.9056291580200195,
      "learning_rate": 0.00035195882443213013,
      "loss": 2.9366,
      "step": 102435
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8507308959960938,
      "learning_rate": 0.0003519547956843105,
      "loss": 3.117,
      "step": 102436
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.2997381687164307,
      "learning_rate": 0.00035195076692683147,
      "loss": 3.1183,
      "step": 102437
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.582318067550659,
      "learning_rate": 0.0003519467381596938,
      "loss": 3.1266,
      "step": 102438
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.032744884490967,
      "learning_rate": 0.00035194270938289823,
      "loss": 3.2663,
      "step": 102439
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9061195850372314,
      "learning_rate": 0.00035193868059644557,
      "loss": 3.1347,
      "step": 102440
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.8941726684570312,
      "learning_rate": 0.0003519346518003365,
      "loss": 3.1251,
      "step": 102441
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.858823299407959,
      "learning_rate": 0.00035193062299457176,
      "loss": 3.0573,
      "step": 102442
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.783151388168335,
      "learning_rate": 0.00035192659417915224,
      "loss": 3.0799,
      "step": 102443
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.913706660270691,
      "learning_rate": 0.00035192256535407856,
      "loss": 2.8214,
      "step": 102444
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.14227557182312,
      "learning_rate": 0.00035191853651935135,
      "loss": 3.0403,
      "step": 102445
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0550453662872314,
      "learning_rate": 0.00035191450767497166,
      "loss": 2.9088,
      "step": 102446
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6747026443481445,
      "learning_rate": 0.0003519104788209401,
      "loss": 3.175,
      "step": 102447
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9133847951889038,
      "learning_rate": 0.00035190644995725735,
      "loss": 2.843,
      "step": 102448
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.783124566078186,
      "learning_rate": 0.00035190242108392415,
      "loss": 3.0964,
      "step": 102449
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.51497483253479,
      "learning_rate": 0.0003518983922009414,
      "loss": 3.1201,
      "step": 102450
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.834490180015564,
      "learning_rate": 0.0003518943633083097,
      "loss": 3.0565,
      "step": 102451
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0683627128601074,
      "learning_rate": 0.0003518903344060298,
      "loss": 2.8542,
      "step": 102452
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4246857166290283,
      "learning_rate": 0.00035188630549410266,
      "loss": 2.9776,
      "step": 102453
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.193387508392334,
      "learning_rate": 0.0003518822765725288,
      "loss": 2.9156,
      "step": 102454
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.540963053703308,
      "learning_rate": 0.0003518782476413091,
      "loss": 3.0514,
      "step": 102455
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6146528720855713,
      "learning_rate": 0.00035187421870044416,
      "loss": 3.1789,
      "step": 102456
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6647837162017822,
      "learning_rate": 0.0003518701897499349,
      "loss": 2.9864,
      "step": 102457
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.982469916343689,
      "learning_rate": 0.000351866160789782,
      "loss": 2.678,
      "step": 102458
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.6916379928588867,
      "learning_rate": 0.0003518621318199861,
      "loss": 2.8338,
      "step": 102459
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.769858479499817,
      "learning_rate": 0.0003518581028405481,
      "loss": 2.9858,
      "step": 102460
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5891199111938477,
      "learning_rate": 0.00035185407385146875,
      "loss": 2.7082,
      "step": 102461
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7070082426071167,
      "learning_rate": 0.0003518500448527487,
      "loss": 2.9227,
      "step": 102462
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9108271598815918,
      "learning_rate": 0.00035184601584438874,
      "loss": 2.8959,
      "step": 102463
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8720884323120117,
      "learning_rate": 0.0003518419868263897,
      "loss": 3.0636,
      "step": 102464
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.67365562915802,
      "learning_rate": 0.00035183795779875223,
      "loss": 2.8582,
      "step": 102465
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.5233333110809326,
      "learning_rate": 0.000351833928761477,
      "loss": 3.0223,
      "step": 102466
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1289122104644775,
      "learning_rate": 0.000351829899714565,
      "loss": 2.6917,
      "step": 102467
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.550937294960022,
      "learning_rate": 0.00035182587065801677,
      "loss": 3.0495,
      "step": 102468
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.30342698097229,
      "learning_rate": 0.00035182184159183316,
      "loss": 3.0183,
      "step": 102469
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4930436611175537,
      "learning_rate": 0.0003518178125160149,
      "loss": 3.1107,
      "step": 102470
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.5958356857299805,
      "learning_rate": 0.0003518137834305627,
      "loss": 2.9095,
      "step": 102471
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5843212604522705,
      "learning_rate": 0.00035180975433547735,
      "loss": 3.032,
      "step": 102472
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.446410894393921,
      "learning_rate": 0.0003518057252307596,
      "loss": 3.1055,
      "step": 102473
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.9223053455352783,
      "learning_rate": 0.0003518016961164102,
      "loss": 2.949,
      "step": 102474
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.960644245147705,
      "learning_rate": 0.0003517976669924298,
      "loss": 3.0274,
      "step": 102475
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.955152153968811,
      "learning_rate": 0.0003517936378588194,
      "loss": 2.9744,
      "step": 102476
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.610959529876709,
      "learning_rate": 0.0003517896087155794,
      "loss": 3.094,
      "step": 102477
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.3570148944854736,
      "learning_rate": 0.00035178557956271083,
      "loss": 3.0306,
      "step": 102478
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9376238584518433,
      "learning_rate": 0.00035178155040021435,
      "loss": 2.8379,
      "step": 102479
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7681132555007935,
      "learning_rate": 0.00035177752122809064,
      "loss": 3.2082,
      "step": 102480
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.765474796295166,
      "learning_rate": 0.00035177349204634057,
      "loss": 3.0914,
      "step": 102481
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.164066791534424,
      "learning_rate": 0.0003517694628549649,
      "loss": 2.846,
      "step": 102482
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9256271123886108,
      "learning_rate": 0.00035176543365396417,
      "loss": 3.0929,
      "step": 102483
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7928216457366943,
      "learning_rate": 0.00035176140444333936,
      "loss": 3.0419,
      "step": 102484
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8811829090118408,
      "learning_rate": 0.0003517573752230911,
      "loss": 3.0257,
      "step": 102485
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7416571378707886,
      "learning_rate": 0.00035175334599322014,
      "loss": 2.9531,
      "step": 102486
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0274057388305664,
      "learning_rate": 0.00035174931675372725,
      "loss": 2.9607,
      "step": 102487
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.8359471559524536,
      "learning_rate": 0.00035174528750461326,
      "loss": 3.1464,
      "step": 102488
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.793192148208618,
      "learning_rate": 0.0003517412582458788,
      "loss": 3.0791,
      "step": 102489
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5751060247421265,
      "learning_rate": 0.00035173722897752463,
      "loss": 3.0269,
      "step": 102490
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9544042348861694,
      "learning_rate": 0.00035173319969955156,
      "loss": 2.5752,
      "step": 102491
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.322894334793091,
      "learning_rate": 0.0003517291704119603,
      "loss": 2.8983,
      "step": 102492
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.6467885971069336,
      "learning_rate": 0.00035172514111475166,
      "loss": 3.0271,
      "step": 102493
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.3182499408721924,
      "learning_rate": 0.00035172111180792635,
      "loss": 2.9781,
      "step": 102494
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9425225257873535,
      "learning_rate": 0.00035171708249148503,
      "loss": 2.9994,
      "step": 102495
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.973980188369751,
      "learning_rate": 0.0003517130531654286,
      "loss": 2.8704,
      "step": 102496
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.7173547744750977,
      "learning_rate": 0.0003517090238297577,
      "loss": 2.9558,
      "step": 102497
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.2774534225463867,
      "learning_rate": 0.00035170499448447306,
      "loss": 3.113,
      "step": 102498
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1699070930480957,
      "learning_rate": 0.0003517009651295755,
      "loss": 2.9278,
      "step": 102499
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.723933219909668,
      "learning_rate": 0.0003516969357650659,
      "loss": 2.9803,
      "step": 102500
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.0772252082824707,
      "learning_rate": 0.00035169290639094474,
      "loss": 3.0249,
      "step": 102501
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.34053635597229,
      "learning_rate": 0.00035168887700721285,
      "loss": 3.1348,
      "step": 102502
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.640798807144165,
      "learning_rate": 0.00035168484761387113,
      "loss": 3.0499,
      "step": 102503
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.2956740856170654,
      "learning_rate": 0.0003516808182109202,
      "loss": 2.9836,
      "step": 102504
    },
    {
      "epoch": 1.33,
      "grad_norm": 5.80900764465332,
      "learning_rate": 0.00035167678879836076,
      "loss": 2.9471,
      "step": 102505
    },
    {
      "epoch": 1.33,
      "grad_norm": 4.042099475860596,
      "learning_rate": 0.00035167275937619374,
      "loss": 2.8729,
      "step": 102506
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.0502216815948486,
      "learning_rate": 0.0003516687299444197,
      "loss": 2.9082,
      "step": 102507
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.7982218265533447,
      "learning_rate": 0.00035166470050303943,
      "loss": 3.159,
      "step": 102508
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7101341485977173,
      "learning_rate": 0.00035166067105205377,
      "loss": 3.088,
      "step": 102509
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.793001174926758,
      "learning_rate": 0.0003516566415914635,
      "loss": 2.8964,
      "step": 102510
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.0270047187805176,
      "learning_rate": 0.0003516526121212691,
      "loss": 3.1144,
      "step": 102511
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.937840461730957,
      "learning_rate": 0.00035164858264147167,
      "loss": 3.0045,
      "step": 102512
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.4562106132507324,
      "learning_rate": 0.0003516445531520717,
      "loss": 3.0132,
      "step": 102513
    },
    {
      "epoch": 1.33,
      "grad_norm": 4.193847179412842,
      "learning_rate": 0.00035164052365307003,
      "loss": 2.992,
      "step": 102514
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.0953407287597656,
      "learning_rate": 0.00035163649414446754,
      "loss": 2.9908,
      "step": 102515
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.8392837047576904,
      "learning_rate": 0.0003516324646262647,
      "loss": 3.0398,
      "step": 102516
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1532671451568604,
      "learning_rate": 0.00035162843509846246,
      "loss": 3.1882,
      "step": 102517
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.5649735927581787,
      "learning_rate": 0.00035162440556106155,
      "loss": 2.7945,
      "step": 102518
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.4580893516540527,
      "learning_rate": 0.00035162037601406263,
      "loss": 2.9913,
      "step": 102519
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.193627119064331,
      "learning_rate": 0.0003516163464574666,
      "loss": 2.8153,
      "step": 102520
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.114936590194702,
      "learning_rate": 0.000351612316891274,
      "loss": 2.888,
      "step": 102521
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.5453659296035767,
      "learning_rate": 0.00035160828731548575,
      "loss": 3.0215,
      "step": 102522
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.9787988662719727,
      "learning_rate": 0.00035160425773010255,
      "loss": 2.914,
      "step": 102523
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.1106293201446533,
      "learning_rate": 0.00035160022813512514,
      "loss": 2.8486,
      "step": 102524
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.7055749893188477,
      "learning_rate": 0.00035159619853055433,
      "loss": 2.8788,
      "step": 102525
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.721590995788574,
      "learning_rate": 0.0003515921689163907,
      "loss": 3.1309,
      "step": 102526
    },
    {
      "epoch": 1.33,
      "grad_norm": 2.145430088043213,
      "learning_rate": 0.00035158813929263516,
      "loss": 2.9973,
      "step": 102527
    },
    {
      "epoch": 1.33,
      "grad_norm": 3.2636728286743164,
      "learning_rate": 0.0003515841096592885,
      "loss": 3.1456,
      "step": 102528
    },
    {
      "epoch": 1.33,
      "grad_norm": 1.9057456254959106,
      "learning_rate": 0.00035158008001635123,
      "loss": 3.2337,
      "step": 102529
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.22133731842041,
      "learning_rate": 0.0003515760503638243,
      "loss": 3.0063,
      "step": 102530
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5187160968780518,
      "learning_rate": 0.00035157202070170844,
      "loss": 2.7565,
      "step": 102531
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9502114057540894,
      "learning_rate": 0.0003515679910300044,
      "loss": 3.0337,
      "step": 102532
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.820770502090454,
      "learning_rate": 0.0003515639613487128,
      "loss": 2.8929,
      "step": 102533
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8404964208602905,
      "learning_rate": 0.0003515599316578346,
      "loss": 2.9242,
      "step": 102534
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.02968168258667,
      "learning_rate": 0.00035155590195737037,
      "loss": 3.02,
      "step": 102535
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9947853088378906,
      "learning_rate": 0.00035155187224732094,
      "loss": 2.9839,
      "step": 102536
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.35090970993042,
      "learning_rate": 0.000351547842527687,
      "loss": 2.9113,
      "step": 102537
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7451403141021729,
      "learning_rate": 0.0003515438127984694,
      "loss": 3.029,
      "step": 102538
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.073759078979492,
      "learning_rate": 0.0003515397830596688,
      "loss": 3.2227,
      "step": 102539
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5934319496154785,
      "learning_rate": 0.00035153575331128595,
      "loss": 2.9704,
      "step": 102540
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6467183828353882,
      "learning_rate": 0.0003515317235533217,
      "loss": 3.0167,
      "step": 102541
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6867034435272217,
      "learning_rate": 0.00035152769378577663,
      "loss": 3.1207,
      "step": 102542
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0671346187591553,
      "learning_rate": 0.0003515236640086517,
      "loss": 3.2861,
      "step": 102543
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.755487322807312,
      "learning_rate": 0.00035151963422194754,
      "loss": 2.8462,
      "step": 102544
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.098832368850708,
      "learning_rate": 0.00035151560442566484,
      "loss": 2.9208,
      "step": 102545
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5267374515533447,
      "learning_rate": 0.00035151157461980453,
      "loss": 2.7477,
      "step": 102546
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0505292415618896,
      "learning_rate": 0.0003515075448043671,
      "loss": 2.9468,
      "step": 102547
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8569482564926147,
      "learning_rate": 0.00035150351497935346,
      "loss": 3.0575,
      "step": 102548
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1998531818389893,
      "learning_rate": 0.0003514994851447645,
      "loss": 2.9246,
      "step": 102549
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8638463020324707,
      "learning_rate": 0.00035149545530060067,
      "loss": 2.9342,
      "step": 102550
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8435654640197754,
      "learning_rate": 0.0003514914254468629,
      "loss": 3.2039,
      "step": 102551
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.3382606506347656,
      "learning_rate": 0.0003514873955835519,
      "loss": 3.0776,
      "step": 102552
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9977704286575317,
      "learning_rate": 0.00035148336571066844,
      "loss": 3.1833,
      "step": 102553
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2172436714172363,
      "learning_rate": 0.00035147933582821325,
      "loss": 2.9951,
      "step": 102554
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.013960361480713,
      "learning_rate": 0.0003514753059361871,
      "loss": 2.9797,
      "step": 102555
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0185205936431885,
      "learning_rate": 0.0003514712760345907,
      "loss": 3.0112,
      "step": 102556
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0917842388153076,
      "learning_rate": 0.0003514672461234248,
      "loss": 2.9592,
      "step": 102557
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7132190465927124,
      "learning_rate": 0.0003514632162026902,
      "loss": 2.9123,
      "step": 102558
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9521667957305908,
      "learning_rate": 0.0003514591862723875,
      "loss": 3.2024,
      "step": 102559
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9113211631774902,
      "learning_rate": 0.00035145515633251765,
      "loss": 3.0453,
      "step": 102560
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.665313959121704,
      "learning_rate": 0.0003514511263830814,
      "loss": 2.967,
      "step": 102561
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2428576946258545,
      "learning_rate": 0.00035144709642407934,
      "loss": 3.0307,
      "step": 102562
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7333390712738037,
      "learning_rate": 0.0003514430664555122,
      "loss": 3.0948,
      "step": 102563
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7449321746826172,
      "learning_rate": 0.000351439036477381,
      "loss": 2.9377,
      "step": 102564
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.6370224952697754,
      "learning_rate": 0.0003514350064896862,
      "loss": 3.02,
      "step": 102565
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.9196088314056396,
      "learning_rate": 0.0003514309764924287,
      "loss": 2.9237,
      "step": 102566
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.886220097541809,
      "learning_rate": 0.00035142694648560924,
      "loss": 3.0183,
      "step": 102567
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.0974831581115723,
      "learning_rate": 0.0003514229164692284,
      "loss": 2.8627,
      "step": 102568
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7169334888458252,
      "learning_rate": 0.0003514188864432872,
      "loss": 3.0131,
      "step": 102569
    },
    {
      "epoch": 1.34,
      "grad_norm": 4.289565563201904,
      "learning_rate": 0.0003514148564077863,
      "loss": 2.6933,
      "step": 102570
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7570894956588745,
      "learning_rate": 0.00035141082636272627,
      "loss": 3.0999,
      "step": 102571
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.808388113975525,
      "learning_rate": 0.00035140679630810807,
      "loss": 2.8451,
      "step": 102572
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.23502254486084,
      "learning_rate": 0.00035140276624393235,
      "loss": 2.9855,
      "step": 102573
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6801382303237915,
      "learning_rate": 0.0003513987361701999,
      "loss": 3.2047,
      "step": 102574
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7283505201339722,
      "learning_rate": 0.0003513947060869115,
      "loss": 2.9993,
      "step": 102575
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.053473472595215,
      "learning_rate": 0.0003513906759940678,
      "loss": 2.8108,
      "step": 102576
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8903069496154785,
      "learning_rate": 0.00035138664589166955,
      "loss": 3.0508,
      "step": 102577
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8324726819992065,
      "learning_rate": 0.0003513826157797176,
      "loss": 2.8086,
      "step": 102578
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.915480613708496,
      "learning_rate": 0.00035137858565821274,
      "loss": 2.732,
      "step": 102579
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.778486967086792,
      "learning_rate": 0.00035137455552715553,
      "loss": 2.8829,
      "step": 102580
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5841161012649536,
      "learning_rate": 0.00035137052538654687,
      "loss": 2.8548,
      "step": 102581
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8076541423797607,
      "learning_rate": 0.00035136649523638747,
      "loss": 2.9972,
      "step": 102582
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8782124519348145,
      "learning_rate": 0.00035136246507667804,
      "loss": 2.5258,
      "step": 102583
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8704978227615356,
      "learning_rate": 0.0003513584349074193,
      "loss": 3.1077,
      "step": 102584
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.3173446655273438,
      "learning_rate": 0.0003513544047286122,
      "loss": 3.0502,
      "step": 102585
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8278765678405762,
      "learning_rate": 0.0003513503745402572,
      "loss": 3.2036,
      "step": 102586
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0876901149749756,
      "learning_rate": 0.00035134634434235524,
      "loss": 2.7979,
      "step": 102587
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.465804100036621,
      "learning_rate": 0.0003513423141349071,
      "loss": 3.0844,
      "step": 102588
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.8505167961120605,
      "learning_rate": 0.00035133828391791335,
      "loss": 2.9342,
      "step": 102589
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8849432468414307,
      "learning_rate": 0.00035133425369137484,
      "loss": 2.9662,
      "step": 102590
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.801039218902588,
      "learning_rate": 0.00035133022345529237,
      "loss": 3.052,
      "step": 102591
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.71345853805542,
      "learning_rate": 0.0003513261932096667,
      "loss": 3.1563,
      "step": 102592
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.16960072517395,
      "learning_rate": 0.0003513221629544984,
      "loss": 3.0093,
      "step": 102593
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.385158061981201,
      "learning_rate": 0.0003513181326897884,
      "loss": 3.1885,
      "step": 102594
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6697322130203247,
      "learning_rate": 0.00035131410241553745,
      "loss": 3.1499,
      "step": 102595
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.420679807662964,
      "learning_rate": 0.0003513100721317462,
      "loss": 2.8541,
      "step": 102596
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.569243907928467,
      "learning_rate": 0.00035130604183841534,
      "loss": 2.9706,
      "step": 102597
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9962046146392822,
      "learning_rate": 0.0003513020115355459,
      "loss": 3.127,
      "step": 102598
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.4340929985046387,
      "learning_rate": 0.0003512979812231383,
      "loss": 2.7876,
      "step": 102599
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.2058351039886475,
      "learning_rate": 0.00035129395090119353,
      "loss": 3.1452,
      "step": 102600
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.85404896736145,
      "learning_rate": 0.0003512899205697122,
      "loss": 2.902,
      "step": 102601
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.619343638420105,
      "learning_rate": 0.00035128589022869513,
      "loss": 3.1684,
      "step": 102602
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.4490420818328857,
      "learning_rate": 0.00035128185987814303,
      "loss": 3.0522,
      "step": 102603
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2085800170898438,
      "learning_rate": 0.00035127782951805667,
      "loss": 3.0346,
      "step": 102604
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7001934051513672,
      "learning_rate": 0.00035127379914843675,
      "loss": 2.8707,
      "step": 102605
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8464394807815552,
      "learning_rate": 0.0003512697687692841,
      "loss": 2.9179,
      "step": 102606
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9761359691619873,
      "learning_rate": 0.0003512657383805994,
      "loss": 2.9825,
      "step": 102607
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.735048770904541,
      "learning_rate": 0.0003512617079823835,
      "loss": 3.0636,
      "step": 102608
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8245606422424316,
      "learning_rate": 0.000351257677574637,
      "loss": 2.9014,
      "step": 102609
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9144436120986938,
      "learning_rate": 0.00035125364715736084,
      "loss": 3.2712,
      "step": 102610
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9170053005218506,
      "learning_rate": 0.00035124961673055555,
      "loss": 2.6323,
      "step": 102611
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5847680568695068,
      "learning_rate": 0.00035124558629422197,
      "loss": 2.9258,
      "step": 102612
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8483662605285645,
      "learning_rate": 0.000351241555848361,
      "loss": 3.0614,
      "step": 102613
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.982642650604248,
      "learning_rate": 0.00035123752539297317,
      "loss": 2.9027,
      "step": 102614
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8155978918075562,
      "learning_rate": 0.00035123349492805937,
      "loss": 3.1042,
      "step": 102615
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6512733697891235,
      "learning_rate": 0.0003512294644536203,
      "loss": 2.8696,
      "step": 102616
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.4760825634002686,
      "learning_rate": 0.00035122543396965664,
      "loss": 2.949,
      "step": 102617
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7546324729919434,
      "learning_rate": 0.0003512214034761692,
      "loss": 2.633,
      "step": 102618
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.5359601974487305,
      "learning_rate": 0.00035121737297315873,
      "loss": 2.9118,
      "step": 102619
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8112893104553223,
      "learning_rate": 0.00035121334246062606,
      "loss": 3.0037,
      "step": 102620
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8686875104904175,
      "learning_rate": 0.00035120931193857177,
      "loss": 2.7663,
      "step": 102621
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0951030254364014,
      "learning_rate": 0.0003512052814069967,
      "loss": 3.1661,
      "step": 102622
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8268330097198486,
      "learning_rate": 0.0003512012508659017,
      "loss": 2.9833,
      "step": 102623
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9721317291259766,
      "learning_rate": 0.00035119722031528737,
      "loss": 3.0301,
      "step": 102624
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8951361179351807,
      "learning_rate": 0.00035119318975515454,
      "loss": 2.9377,
      "step": 102625
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.4866551160812378,
      "learning_rate": 0.0003511891591855039,
      "loss": 3.0592,
      "step": 102626
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0859429836273193,
      "learning_rate": 0.00035118512860633625,
      "loss": 3.1086,
      "step": 102627
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6153897047042847,
      "learning_rate": 0.00035118109801765226,
      "loss": 3.1289,
      "step": 102628
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.00272274017334,
      "learning_rate": 0.0003511770674194528,
      "loss": 3.2901,
      "step": 102629
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7484145164489746,
      "learning_rate": 0.0003511730368117385,
      "loss": 2.9621,
      "step": 102630
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7034754753112793,
      "learning_rate": 0.0003511690061945102,
      "loss": 2.6748,
      "step": 102631
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1613352298736572,
      "learning_rate": 0.00035116497556776866,
      "loss": 3.0995,
      "step": 102632
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.4903578758239746,
      "learning_rate": 0.0003511609449315145,
      "loss": 3.0097,
      "step": 102633
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.756259799003601,
      "learning_rate": 0.00035115691428574866,
      "loss": 2.9104,
      "step": 102634
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.617222309112549,
      "learning_rate": 0.0003511528836304717,
      "loss": 3.001,
      "step": 102635
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7743767499923706,
      "learning_rate": 0.0003511488529656845,
      "loss": 3.2998,
      "step": 102636
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.5299735069274902,
      "learning_rate": 0.0003511448222913877,
      "loss": 3.1301,
      "step": 102637
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5928407907485962,
      "learning_rate": 0.00035114079160758217,
      "loss": 2.9078,
      "step": 102638
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0901644229888916,
      "learning_rate": 0.00035113676091426855,
      "loss": 3.4178,
      "step": 102639
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6266498565673828,
      "learning_rate": 0.0003511327302114477,
      "loss": 3.1266,
      "step": 102640
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.686948776245117,
      "learning_rate": 0.0003511286994991203,
      "loss": 3.0131,
      "step": 102641
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7566497325897217,
      "learning_rate": 0.00035112466877728704,
      "loss": 2.9913,
      "step": 102642
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6858292818069458,
      "learning_rate": 0.0003511206380459489,
      "loss": 2.9121,
      "step": 102643
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.962334394454956,
      "learning_rate": 0.00035111660730510633,
      "loss": 2.9881,
      "step": 102644
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9623336791992188,
      "learning_rate": 0.0003511125765547602,
      "loss": 3.1743,
      "step": 102645
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6803549528121948,
      "learning_rate": 0.00035110854579491137,
      "loss": 3.0692,
      "step": 102646
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8456437587738037,
      "learning_rate": 0.0003511045150255604,
      "loss": 3.1174,
      "step": 102647
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.903794527053833,
      "learning_rate": 0.0003511004842467082,
      "loss": 2.988,
      "step": 102648
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9579875469207764,
      "learning_rate": 0.0003510964534583555,
      "loss": 3.0019,
      "step": 102649
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7819933891296387,
      "learning_rate": 0.00035109242266050296,
      "loss": 3.2651,
      "step": 102650
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7603299617767334,
      "learning_rate": 0.0003510883918531513,
      "loss": 3.1156,
      "step": 102651
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.081636667251587,
      "learning_rate": 0.0003510843610363015,
      "loss": 3.0179,
      "step": 102652
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6074072122573853,
      "learning_rate": 0.000351080330209954,
      "loss": 3.061,
      "step": 102653
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6662310361862183,
      "learning_rate": 0.00035107629937410973,
      "loss": 2.8854,
      "step": 102654
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9169622659683228,
      "learning_rate": 0.0003510722685287695,
      "loss": 3.012,
      "step": 102655
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6877946853637695,
      "learning_rate": 0.00035106823767393387,
      "loss": 3.2152,
      "step": 102656
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7306203842163086,
      "learning_rate": 0.00035106420680960373,
      "loss": 2.9138,
      "step": 102657
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6557148694992065,
      "learning_rate": 0.00035106017593577977,
      "loss": 3.1226,
      "step": 102658
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6919199228286743,
      "learning_rate": 0.0003510561450524628,
      "loss": 3.1187,
      "step": 102659
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.070800542831421,
      "learning_rate": 0.00035105211415965354,
      "loss": 2.9715,
      "step": 102660
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.985106348991394,
      "learning_rate": 0.0003510480832573527,
      "loss": 2.9295,
      "step": 102661
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8865605592727661,
      "learning_rate": 0.00035104405234556105,
      "loss": 3.0576,
      "step": 102662
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8571290969848633,
      "learning_rate": 0.00035104002142427934,
      "loss": 2.9437,
      "step": 102663
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8221238851547241,
      "learning_rate": 0.0003510359904935083,
      "loss": 3.0766,
      "step": 102664
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.279186248779297,
      "learning_rate": 0.0003510319595532489,
      "loss": 2.8149,
      "step": 102665
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.766843557357788,
      "learning_rate": 0.0003510279286035015,
      "loss": 3.1307,
      "step": 102666
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0496320724487305,
      "learning_rate": 0.000351023897644267,
      "loss": 3.0651,
      "step": 102667
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.761599063873291,
      "learning_rate": 0.00035101986667554643,
      "loss": 3.1205,
      "step": 102668
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.4364824295043945,
      "learning_rate": 0.0003510158356973401,
      "loss": 2.9827,
      "step": 102669
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.115432024002075,
      "learning_rate": 0.000351011804709649,
      "loss": 3.0785,
      "step": 102670
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8954824209213257,
      "learning_rate": 0.0003510077737124739,
      "loss": 3.1642,
      "step": 102671
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.970874547958374,
      "learning_rate": 0.00035100374270581543,
      "loss": 3.0862,
      "step": 102672
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.7519402503967285,
      "learning_rate": 0.0003509997116896744,
      "loss": 2.7288,
      "step": 102673
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9251885414123535,
      "learning_rate": 0.00035099568066405165,
      "loss": 3.3036,
      "step": 102674
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9427855014801025,
      "learning_rate": 0.00035099164962894774,
      "loss": 2.9675,
      "step": 102675
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.352628707885742,
      "learning_rate": 0.0003509876185843635,
      "loss": 2.8247,
      "step": 102676
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7702548503875732,
      "learning_rate": 0.0003509835875302998,
      "loss": 2.98,
      "step": 102677
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0560336112976074,
      "learning_rate": 0.0003509795564667573,
      "loss": 3.2422,
      "step": 102678
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8540202379226685,
      "learning_rate": 0.0003509755253937366,
      "loss": 3.0963,
      "step": 102679
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.425440549850464,
      "learning_rate": 0.00035097149431123875,
      "loss": 3.0334,
      "step": 102680
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9503870010375977,
      "learning_rate": 0.00035096746321926426,
      "loss": 2.9922,
      "step": 102681
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.182927131652832,
      "learning_rate": 0.0003509634321178139,
      "loss": 3.227,
      "step": 102682
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1905627250671387,
      "learning_rate": 0.0003509594010068886,
      "loss": 3.1344,
      "step": 102683
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.054572820663452,
      "learning_rate": 0.0003509553698864889,
      "loss": 2.9444,
      "step": 102684
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8068244457244873,
      "learning_rate": 0.0003509513387566156,
      "loss": 2.9626,
      "step": 102685
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8485968112945557,
      "learning_rate": 0.0003509473076172696,
      "loss": 2.8822,
      "step": 102686
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8090789318084717,
      "learning_rate": 0.0003509432764684514,
      "loss": 3.0951,
      "step": 102687
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8483879566192627,
      "learning_rate": 0.000350939245310162,
      "loss": 2.9774,
      "step": 102688
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6143391132354736,
      "learning_rate": 0.000350935214142402,
      "loss": 2.9627,
      "step": 102689
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.886913776397705,
      "learning_rate": 0.00035093118296517217,
      "loss": 3.0798,
      "step": 102690
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.685821771621704,
      "learning_rate": 0.00035092715177847326,
      "loss": 2.7864,
      "step": 102691
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.4112656116485596,
      "learning_rate": 0.0003509231205823061,
      "loss": 2.7546,
      "step": 102692
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9400478601455688,
      "learning_rate": 0.00035091908937667126,
      "loss": 3.1845,
      "step": 102693
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.53959584236145,
      "learning_rate": 0.0003509150581615696,
      "loss": 3.1189,
      "step": 102694
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.6387436389923096,
      "learning_rate": 0.000350911026937002,
      "loss": 2.9625,
      "step": 102695
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.877415657043457,
      "learning_rate": 0.00035090699570296903,
      "loss": 3.3129,
      "step": 102696
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.0598459243774414,
      "learning_rate": 0.0003509029644594714,
      "loss": 2.986,
      "step": 102697
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.148987293243408,
      "learning_rate": 0.00035089893320651,
      "loss": 3.0412,
      "step": 102698
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8133208751678467,
      "learning_rate": 0.00035089490194408553,
      "loss": 3.1356,
      "step": 102699
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.863122582435608,
      "learning_rate": 0.00035089087067219875,
      "loss": 2.9013,
      "step": 102700
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.752063512802124,
      "learning_rate": 0.0003508868393908504,
      "loss": 3.1133,
      "step": 102701
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.907025694847107,
      "learning_rate": 0.0003508828081000412,
      "loss": 3.0324,
      "step": 102702
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8954545259475708,
      "learning_rate": 0.0003508787767997719,
      "loss": 3.1706,
      "step": 102703
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6992887258529663,
      "learning_rate": 0.00035087474549004333,
      "loss": 3.019,
      "step": 102704
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6420458555221558,
      "learning_rate": 0.0003508707141708562,
      "loss": 3.1285,
      "step": 102705
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.215191125869751,
      "learning_rate": 0.0003508666828422112,
      "loss": 2.8696,
      "step": 102706
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.6061196327209473,
      "learning_rate": 0.0003508626515041091,
      "loss": 3.0459,
      "step": 102707
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9436620473861694,
      "learning_rate": 0.0003508586201565508,
      "loss": 2.7052,
      "step": 102708
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9578464031219482,
      "learning_rate": 0.0003508545887995368,
      "loss": 3.0298,
      "step": 102709
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9561309814453125,
      "learning_rate": 0.000350850557433068,
      "loss": 3.1286,
      "step": 102710
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8746157884597778,
      "learning_rate": 0.00035084652605714504,
      "loss": 3.1331,
      "step": 102711
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.641265869140625,
      "learning_rate": 0.0003508424946717689,
      "loss": 3.1713,
      "step": 102712
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0735766887664795,
      "learning_rate": 0.0003508384632769401,
      "loss": 2.9861,
      "step": 102713
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.599818468093872,
      "learning_rate": 0.0003508344318726595,
      "loss": 2.9088,
      "step": 102714
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.083224058151245,
      "learning_rate": 0.00035083040045892785,
      "loss": 2.9686,
      "step": 102715
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8599505424499512,
      "learning_rate": 0.00035082636903574575,
      "loss": 2.9558,
      "step": 102716
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9627726078033447,
      "learning_rate": 0.0003508223376031142,
      "loss": 2.8995,
      "step": 102717
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9471337795257568,
      "learning_rate": 0.0003508183061610337,
      "loss": 3.0125,
      "step": 102718
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6670265197753906,
      "learning_rate": 0.0003508142747095052,
      "loss": 2.8373,
      "step": 102719
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0822668075561523,
      "learning_rate": 0.00035081024324852935,
      "loss": 2.8227,
      "step": 102720
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1688754558563232,
      "learning_rate": 0.00035080621177810693,
      "loss": 3.0491,
      "step": 102721
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.622531771659851,
      "learning_rate": 0.0003508021802982387,
      "loss": 2.9413,
      "step": 102722
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7338509559631348,
      "learning_rate": 0.0003507981488089253,
      "loss": 3.0408,
      "step": 102723
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0279853343963623,
      "learning_rate": 0.0003507941173101676,
      "loss": 2.9854,
      "step": 102724
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6659365892410278,
      "learning_rate": 0.0003507900858019663,
      "loss": 3.1287,
      "step": 102725
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7508735656738281,
      "learning_rate": 0.0003507860542843223,
      "loss": 3.0681,
      "step": 102726
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5582574605941772,
      "learning_rate": 0.00035078202275723604,
      "loss": 3.028,
      "step": 102727
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.62557053565979,
      "learning_rate": 0.00035077799122070846,
      "loss": 3.024,
      "step": 102728
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2641477584838867,
      "learning_rate": 0.0003507739596747404,
      "loss": 2.9725,
      "step": 102729
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.213395595550537,
      "learning_rate": 0.00035076992811933243,
      "loss": 2.7528,
      "step": 102730
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9528594017028809,
      "learning_rate": 0.00035076589655448534,
      "loss": 3.0288,
      "step": 102731
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.234022855758667,
      "learning_rate": 0.0003507618649802,
      "loss": 3.1096,
      "step": 102732
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8933535814285278,
      "learning_rate": 0.000350757833396477,
      "loss": 2.7871,
      "step": 102733
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6970975399017334,
      "learning_rate": 0.00035075380180331714,
      "loss": 2.8151,
      "step": 102734
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6592519283294678,
      "learning_rate": 0.0003507497702007213,
      "loss": 3.0518,
      "step": 102735
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7636168003082275,
      "learning_rate": 0.00035074573858869006,
      "loss": 3.0241,
      "step": 102736
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.117574691772461,
      "learning_rate": 0.0003507417069672242,
      "loss": 2.9942,
      "step": 102737
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9377890825271606,
      "learning_rate": 0.00035073767533632457,
      "loss": 3.04,
      "step": 102738
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9539287090301514,
      "learning_rate": 0.0003507336436959918,
      "loss": 3.0799,
      "step": 102739
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.359666347503662,
      "learning_rate": 0.00035072961204622667,
      "loss": 2.9616,
      "step": 102740
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.4344030618667603,
      "learning_rate": 0.00035072558038702997,
      "loss": 2.8805,
      "step": 102741
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1538727283477783,
      "learning_rate": 0.00035072154871840244,
      "loss": 3.0582,
      "step": 102742
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.1899170875549316,
      "learning_rate": 0.00035071751704034474,
      "loss": 2.9193,
      "step": 102743
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7053254842758179,
      "learning_rate": 0.00035071348535285784,
      "loss": 2.9199,
      "step": 102744
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0329055786132812,
      "learning_rate": 0.00035070945365594223,
      "loss": 3.0168,
      "step": 102745
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0566186904907227,
      "learning_rate": 0.00035070542194959883,
      "loss": 2.9761,
      "step": 102746
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9709272384643555,
      "learning_rate": 0.0003507013902338283,
      "loss": 2.877,
      "step": 102747
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7702182531356812,
      "learning_rate": 0.0003506973585086315,
      "loss": 3.3247,
      "step": 102748
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.813957929611206,
      "learning_rate": 0.000350693326774009,
      "loss": 3.0059,
      "step": 102749
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1363301277160645,
      "learning_rate": 0.0003506892950299617,
      "loss": 3.0877,
      "step": 102750
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.272465944290161,
      "learning_rate": 0.0003506852632764903,
      "loss": 2.8162,
      "step": 102751
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8813780546188354,
      "learning_rate": 0.00035068123151359555,
      "loss": 2.9685,
      "step": 102752
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7919371128082275,
      "learning_rate": 0.00035067719974127824,
      "loss": 2.8579,
      "step": 102753
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8128303289413452,
      "learning_rate": 0.00035067316795953904,
      "loss": 3.0992,
      "step": 102754
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.077454090118408,
      "learning_rate": 0.0003506691361683787,
      "loss": 2.8336,
      "step": 102755
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6851613521575928,
      "learning_rate": 0.00035066510436779815,
      "loss": 2.9181,
      "step": 102756
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8672937154769897,
      "learning_rate": 0.0003506610725577979,
      "loss": 3.029,
      "step": 102757
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.4355709552764893,
      "learning_rate": 0.00035065704073837875,
      "loss": 2.8473,
      "step": 102758
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6233084201812744,
      "learning_rate": 0.0003506530089095416,
      "loss": 2.9376,
      "step": 102759
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8082530498504639,
      "learning_rate": 0.00035064897707128706,
      "loss": 3.0343,
      "step": 102760
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9856983423233032,
      "learning_rate": 0.0003506449452236159,
      "loss": 3.0364,
      "step": 102761
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.061194658279419,
      "learning_rate": 0.00035064091336652894,
      "loss": 2.8184,
      "step": 102762
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8724178075790405,
      "learning_rate": 0.0003506368815000268,
      "loss": 3.162,
      "step": 102763
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.758225917816162,
      "learning_rate": 0.00035063284962411035,
      "loss": 2.855,
      "step": 102764
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.726487159729004,
      "learning_rate": 0.00035062881773878035,
      "loss": 3.0996,
      "step": 102765
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9316221475601196,
      "learning_rate": 0.0003506247858440375,
      "loss": 2.7981,
      "step": 102766
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8771618604660034,
      "learning_rate": 0.0003506207539398824,
      "loss": 3.3507,
      "step": 102767
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.084214687347412,
      "learning_rate": 0.0003506167220263161,
      "loss": 3.1519,
      "step": 102768
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5900516510009766,
      "learning_rate": 0.00035061269010333915,
      "loss": 3.1492,
      "step": 102769
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.5155189037323,
      "learning_rate": 0.00035060865817095234,
      "loss": 3.0139,
      "step": 102770
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8435766696929932,
      "learning_rate": 0.00035060462622915644,
      "loss": 3.1769,
      "step": 102771
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2391815185546875,
      "learning_rate": 0.0003506005942779521,
      "loss": 3.1185,
      "step": 102772
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.2487633228302,
      "learning_rate": 0.0003505965623173402,
      "loss": 2.8264,
      "step": 102773
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.770817518234253,
      "learning_rate": 0.00035059253034732155,
      "loss": 3.1269,
      "step": 102774
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.3918874263763428,
      "learning_rate": 0.0003505884983678967,
      "loss": 3.002,
      "step": 102775
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.876849889755249,
      "learning_rate": 0.00035058446637906647,
      "loss": 2.8103,
      "step": 102776
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.13771390914917,
      "learning_rate": 0.00035058043438083167,
      "loss": 3.1307,
      "step": 102777
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2333528995513916,
      "learning_rate": 0.000350576402373193,
      "loss": 2.96,
      "step": 102778
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.907792568206787,
      "learning_rate": 0.00035057237035615124,
      "loss": 3.1859,
      "step": 102779
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.237659454345703,
      "learning_rate": 0.0003505683383297071,
      "loss": 2.9465,
      "step": 102780
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6339858770370483,
      "learning_rate": 0.00035056430629386135,
      "loss": 3.0289,
      "step": 102781
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.087451696395874,
      "learning_rate": 0.00035056027424861473,
      "loss": 2.8359,
      "step": 102782
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0727875232696533,
      "learning_rate": 0.00035055624219396803,
      "loss": 2.9281,
      "step": 102783
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5483617782592773,
      "learning_rate": 0.000350552210129922,
      "loss": 3.0658,
      "step": 102784
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.6370127201080322,
      "learning_rate": 0.0003505481780564773,
      "loss": 2.8302,
      "step": 102785
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.923129677772522,
      "learning_rate": 0.0003505441459736348,
      "loss": 2.9782,
      "step": 102786
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.776824712753296,
      "learning_rate": 0.00035054011388139513,
      "loss": 2.9313,
      "step": 102787
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.789586305618286,
      "learning_rate": 0.00035053608177975905,
      "loss": 2.8181,
      "step": 102788
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6373156309127808,
      "learning_rate": 0.0003505320496687275,
      "loss": 2.9532,
      "step": 102789
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6523288488388062,
      "learning_rate": 0.000350528017548301,
      "loss": 2.8902,
      "step": 102790
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2343831062316895,
      "learning_rate": 0.00035052398541848037,
      "loss": 3.0946,
      "step": 102791
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.57238507270813,
      "learning_rate": 0.0003505199532792664,
      "loss": 3.0207,
      "step": 102792
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7253133058547974,
      "learning_rate": 0.00035051592113065984,
      "loss": 2.9282,
      "step": 102793
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8426742553710938,
      "learning_rate": 0.0003505118889726614,
      "loss": 2.9385,
      "step": 102794
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.5843918323516846,
      "learning_rate": 0.00035050785680527187,
      "loss": 2.7673,
      "step": 102795
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.666541337966919,
      "learning_rate": 0.00035050382462849196,
      "loss": 2.9802,
      "step": 102796
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7468626499176025,
      "learning_rate": 0.0003504997924423224,
      "loss": 2.7047,
      "step": 102797
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8485593795776367,
      "learning_rate": 0.00035049576024676407,
      "loss": 3.1436,
      "step": 102798
    },
    {
      "epoch": 1.34,
      "grad_norm": 4.402024745941162,
      "learning_rate": 0.0003504917280418175,
      "loss": 2.687,
      "step": 102799
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0339512825012207,
      "learning_rate": 0.0003504876958274837,
      "loss": 2.9934,
      "step": 102800
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.169506788253784,
      "learning_rate": 0.00035048366360376315,
      "loss": 3.214,
      "step": 102801
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.6728036403656006,
      "learning_rate": 0.0003504796313706568,
      "loss": 3.0069,
      "step": 102802
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.6059134006500244,
      "learning_rate": 0.0003504755991281653,
      "loss": 2.9991,
      "step": 102803
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7848318815231323,
      "learning_rate": 0.00035047156687628955,
      "loss": 2.8372,
      "step": 102804
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.5101888179779053,
      "learning_rate": 0.00035046753461503,
      "loss": 3.223,
      "step": 102805
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.6372766494750977,
      "learning_rate": 0.00035046350234438774,
      "loss": 3.1184,
      "step": 102806
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.6950159072875977,
      "learning_rate": 0.0003504594700643633,
      "loss": 2.92,
      "step": 102807
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.117405891418457,
      "learning_rate": 0.00035045543777495746,
      "loss": 3.2535,
      "step": 102808
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.552309513092041,
      "learning_rate": 0.000350451405476171,
      "loss": 2.9594,
      "step": 102809
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.4965903759002686,
      "learning_rate": 0.0003504473731680047,
      "loss": 2.8141,
      "step": 102810
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7296679019927979,
      "learning_rate": 0.0003504433408504593,
      "loss": 3.0882,
      "step": 102811
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.2731332778930664,
      "learning_rate": 0.0003504393085235355,
      "loss": 3.0769,
      "step": 102812
    },
    {
      "epoch": 1.34,
      "grad_norm": 4.780187606811523,
      "learning_rate": 0.0003504352761872341,
      "loss": 2.9199,
      "step": 102813
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.104205369949341,
      "learning_rate": 0.0003504312438415559,
      "loss": 2.9781,
      "step": 102814
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6983933448791504,
      "learning_rate": 0.00035042721148650143,
      "loss": 3.0339,
      "step": 102815
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7117208242416382,
      "learning_rate": 0.00035042317912207166,
      "loss": 3.1798,
      "step": 102816
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.888833999633789,
      "learning_rate": 0.0003504191467482673,
      "loss": 2.7679,
      "step": 102817
    },
    {
      "epoch": 1.34,
      "grad_norm": 4.666188716888428,
      "learning_rate": 0.0003504151143650891,
      "loss": 2.9741,
      "step": 102818
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7426104545593262,
      "learning_rate": 0.00035041108197253766,
      "loss": 2.864,
      "step": 102819
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.249149799346924,
      "learning_rate": 0.000350407049570614,
      "loss": 3.0102,
      "step": 102820
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.6670939922332764,
      "learning_rate": 0.0003504030171593186,
      "loss": 2.885,
      "step": 102821
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.423643112182617,
      "learning_rate": 0.00035039898473865236,
      "loss": 3.0949,
      "step": 102822
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.752732276916504,
      "learning_rate": 0.000350394952308616,
      "loss": 2.9262,
      "step": 102823
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8323322534561157,
      "learning_rate": 0.0003503909198692102,
      "loss": 3.1503,
      "step": 102824
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.5157084465026855,
      "learning_rate": 0.00035038688742043586,
      "loss": 2.955,
      "step": 102825
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.128953695297241,
      "learning_rate": 0.00035038285496229366,
      "loss": 2.9043,
      "step": 102826
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0066237449645996,
      "learning_rate": 0.0003503788224947843,
      "loss": 3.1872,
      "step": 102827
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9431023597717285,
      "learning_rate": 0.00035037479001790854,
      "loss": 2.9106,
      "step": 102828
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.5259041786193848,
      "learning_rate": 0.0003503707575316672,
      "loss": 2.916,
      "step": 102829
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.929236888885498,
      "learning_rate": 0.000350366725036061,
      "loss": 3.1596,
      "step": 102830
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6898194551467896,
      "learning_rate": 0.00035036269253109064,
      "loss": 2.9605,
      "step": 102831
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.850553035736084,
      "learning_rate": 0.0003503586600167569,
      "loss": 2.8607,
      "step": 102832
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.338914155960083,
      "learning_rate": 0.00035035462749306054,
      "loss": 2.8382,
      "step": 102833
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.8364317417144775,
      "learning_rate": 0.0003503505949600023,
      "loss": 3.2491,
      "step": 102834
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.8509507179260254,
      "learning_rate": 0.00035034656241758296,
      "loss": 2.9953,
      "step": 102835
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5269131660461426,
      "learning_rate": 0.0003503425298658032,
      "loss": 3.1374,
      "step": 102836
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.4061760902404785,
      "learning_rate": 0.00035033849730466387,
      "loss": 2.8114,
      "step": 102837
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.86539888381958,
      "learning_rate": 0.00035033446473416564,
      "loss": 3.2398,
      "step": 102838
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.798387885093689,
      "learning_rate": 0.0003503304321543093,
      "loss": 2.9502,
      "step": 102839
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.4712703227996826,
      "learning_rate": 0.00035032639956509545,
      "loss": 2.8812,
      "step": 102840
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2739932537078857,
      "learning_rate": 0.0003503223669665252,
      "loss": 2.9568,
      "step": 102841
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6687898635864258,
      "learning_rate": 0.0003503183343585989,
      "loss": 2.9634,
      "step": 102842
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.491092562675476,
      "learning_rate": 0.00035031430174131754,
      "loss": 3.1409,
      "step": 102843
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.003519296646118,
      "learning_rate": 0.0003503102691146818,
      "loss": 2.9036,
      "step": 102844
    },
    {
      "epoch": 1.34,
      "grad_norm": 4.031637191772461,
      "learning_rate": 0.0003503062364786924,
      "loss": 2.7949,
      "step": 102845
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1955313682556152,
      "learning_rate": 0.0003503022038333501,
      "loss": 3.0176,
      "step": 102846
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0972394943237305,
      "learning_rate": 0.0003502981711786558,
      "loss": 3.074,
      "step": 102847
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.888385057449341,
      "learning_rate": 0.00035029413851460996,
      "loss": 2.9062,
      "step": 102848
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.337096691131592,
      "learning_rate": 0.00035029010584121357,
      "loss": 3.2931,
      "step": 102849
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2546138763427734,
      "learning_rate": 0.00035028607315846735,
      "loss": 2.713,
      "step": 102850
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.039623975753784,
      "learning_rate": 0.0003502820404663719,
      "loss": 2.9487,
      "step": 102851
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.4684395790100098,
      "learning_rate": 0.00035027800776492806,
      "loss": 3.0556,
      "step": 102852
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.253432273864746,
      "learning_rate": 0.0003502739750541367,
      "loss": 3.0795,
      "step": 102853
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.010937452316284,
      "learning_rate": 0.0003502699423339984,
      "loss": 3.1802,
      "step": 102854
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9491239786148071,
      "learning_rate": 0.0003502659096045139,
      "loss": 3.0056,
      "step": 102855
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2149899005889893,
      "learning_rate": 0.00035026187686568416,
      "loss": 3.0736,
      "step": 102856
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.371432065963745,
      "learning_rate": 0.0003502578441175097,
      "loss": 3.1197,
      "step": 102857
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.374464988708496,
      "learning_rate": 0.0003502538113599913,
      "loss": 3.0642,
      "step": 102858
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8787543773651123,
      "learning_rate": 0.00035024977859312987,
      "loss": 3.2918,
      "step": 102859
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.102975368499756,
      "learning_rate": 0.000350245745816926,
      "loss": 3.0887,
      "step": 102860
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.4845821857452393,
      "learning_rate": 0.00035024171303138057,
      "loss": 2.9047,
      "step": 102861
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7901064157485962,
      "learning_rate": 0.00035023768023649413,
      "loss": 3.0107,
      "step": 102862
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8690165281295776,
      "learning_rate": 0.0003502336474322677,
      "loss": 3.0729,
      "step": 102863
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7713373899459839,
      "learning_rate": 0.0003502296146187018,
      "loss": 2.8057,
      "step": 102864
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9064539670944214,
      "learning_rate": 0.00035022558179579726,
      "loss": 3.0122,
      "step": 102865
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.23246431350708,
      "learning_rate": 0.0003502215489635549,
      "loss": 3.0641,
      "step": 102866
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6865894794464111,
      "learning_rate": 0.0003502175161219754,
      "loss": 3.0463,
      "step": 102867
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7075819969177246,
      "learning_rate": 0.0003502134832710595,
      "loss": 2.9798,
      "step": 102868
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.182053565979004,
      "learning_rate": 0.00035020945041080797,
      "loss": 2.9085,
      "step": 102869
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.6215767860412598,
      "learning_rate": 0.0003502054175412215,
      "loss": 3.0615,
      "step": 102870
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9307153224945068,
      "learning_rate": 0.00035020138466230094,
      "loss": 2.9,
      "step": 102871
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6318777799606323,
      "learning_rate": 0.000350197351774047,
      "loss": 3.4262,
      "step": 102872
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2860267162323,
      "learning_rate": 0.00035019331887646045,
      "loss": 2.8227,
      "step": 102873
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7966803312301636,
      "learning_rate": 0.00035018928596954193,
      "loss": 2.8802,
      "step": 102874
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5451560020446777,
      "learning_rate": 0.0003501852530532924,
      "loss": 2.9781,
      "step": 102875
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.934102177619934,
      "learning_rate": 0.00035018122012771236,
      "loss": 2.9248,
      "step": 102876
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.896343469619751,
      "learning_rate": 0.00035017718719280267,
      "loss": 2.8235,
      "step": 102877
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.729140043258667,
      "learning_rate": 0.00035017315424856424,
      "loss": 3.076,
      "step": 102878
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6974817514419556,
      "learning_rate": 0.00035016912129499754,
      "loss": 3.1177,
      "step": 102879
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.994862675666809,
      "learning_rate": 0.0003501650883321035,
      "loss": 3.0918,
      "step": 102880
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.4047024250030518,
      "learning_rate": 0.00035016105535988284,
      "loss": 3.1497,
      "step": 102881
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7529290914535522,
      "learning_rate": 0.00035015702237833625,
      "loss": 3.0307,
      "step": 102882
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8055609464645386,
      "learning_rate": 0.0003501529893874646,
      "loss": 3.0184,
      "step": 102883
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8532406091690063,
      "learning_rate": 0.0003501489563872685,
      "loss": 2.9884,
      "step": 102884
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.598976492881775,
      "learning_rate": 0.0003501449233777488,
      "loss": 3.0549,
      "step": 102885
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.58933424949646,
      "learning_rate": 0.00035014089035890616,
      "loss": 3.0326,
      "step": 102886
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8789347410202026,
      "learning_rate": 0.00035013685733074136,
      "loss": 3.0773,
      "step": 102887
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.092200517654419,
      "learning_rate": 0.0003501328242932553,
      "loss": 3.0388,
      "step": 102888
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5716201066970825,
      "learning_rate": 0.00035012879124644847,
      "loss": 3.0596,
      "step": 102889
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9794583320617676,
      "learning_rate": 0.0003501247581903218,
      "loss": 2.7954,
      "step": 102890
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0240166187286377,
      "learning_rate": 0.000350120725124876,
      "loss": 3.1867,
      "step": 102891
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.3616743087768555,
      "learning_rate": 0.00035011669205011186,
      "loss": 2.8679,
      "step": 102892
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2831263542175293,
      "learning_rate": 0.00035011265896603,
      "loss": 3.1432,
      "step": 102893
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.899856686592102,
      "learning_rate": 0.00035010862587263124,
      "loss": 3.3113,
      "step": 102894
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5776655673980713,
      "learning_rate": 0.0003501045927699164,
      "loss": 2.9948,
      "step": 102895
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9642488956451416,
      "learning_rate": 0.00035010055965788617,
      "loss": 2.9954,
      "step": 102896
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.759088158607483,
      "learning_rate": 0.00035009652653654125,
      "loss": 3.0323,
      "step": 102897
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6596990823745728,
      "learning_rate": 0.00035009249340588243,
      "loss": 3.183,
      "step": 102898
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7180463075637817,
      "learning_rate": 0.00035008846026591054,
      "loss": 2.9195,
      "step": 102899
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9551032781600952,
      "learning_rate": 0.00035008442711662626,
      "loss": 2.9181,
      "step": 102900
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.093264102935791,
      "learning_rate": 0.00035008039395803027,
      "loss": 3.0911,
      "step": 102901
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.303156614303589,
      "learning_rate": 0.00035007636079012347,
      "loss": 3.0417,
      "step": 102902
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.536571502685547,
      "learning_rate": 0.00035007232761290647,
      "loss": 2.9456,
      "step": 102903
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.5335233211517334,
      "learning_rate": 0.00035006829442638007,
      "loss": 2.9052,
      "step": 102904
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1798055171966553,
      "learning_rate": 0.00035006426123054514,
      "loss": 3.1998,
      "step": 102905
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7264642715454102,
      "learning_rate": 0.00035006022802540224,
      "loss": 3.0713,
      "step": 102906
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.416733980178833,
      "learning_rate": 0.0003500561948109521,
      "loss": 2.8686,
      "step": 102907
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.976377248764038,
      "learning_rate": 0.0003500521615871958,
      "loss": 3.0585,
      "step": 102908
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.1244935989379883,
      "learning_rate": 0.00035004812835413363,
      "loss": 2.9187,
      "step": 102909
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0843236446380615,
      "learning_rate": 0.0003500440951117666,
      "loss": 2.9541,
      "step": 102910
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.6260483264923096,
      "learning_rate": 0.0003500400618600956,
      "loss": 3.0623,
      "step": 102911
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.825716495513916,
      "learning_rate": 0.00035003602859912103,
      "loss": 3.0808,
      "step": 102912
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0992040634155273,
      "learning_rate": 0.00035003199532884386,
      "loss": 2.7573,
      "step": 102913
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1830263137817383,
      "learning_rate": 0.0003500279620492649,
      "loss": 3.037,
      "step": 102914
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6674778461456299,
      "learning_rate": 0.00035002392876038466,
      "loss": 3.0116,
      "step": 102915
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.8116953372955322,
      "learning_rate": 0.0003500198954622041,
      "loss": 3.0592,
      "step": 102916
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0176033973693848,
      "learning_rate": 0.0003500158621547239,
      "loss": 2.9894,
      "step": 102917
    },
    {
      "epoch": 1.34,
      "grad_norm": 4.719818115234375,
      "learning_rate": 0.0003500118288379448,
      "loss": 2.8356,
      "step": 102918
    },
    {
      "epoch": 1.34,
      "grad_norm": 4.691228866577148,
      "learning_rate": 0.0003500077955118675,
      "loss": 2.7625,
      "step": 102919
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.6392624378204346,
      "learning_rate": 0.0003500037621764929,
      "loss": 3.0855,
      "step": 102920
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8555880784988403,
      "learning_rate": 0.0003499997288318216,
      "loss": 2.772,
      "step": 102921
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0767695903778076,
      "learning_rate": 0.0003499956954778544,
      "loss": 3.1568,
      "step": 102922
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.3598008155822754,
      "learning_rate": 0.00034999166211459207,
      "loss": 2.9484,
      "step": 102923
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.597489356994629,
      "learning_rate": 0.00034998762874203533,
      "loss": 2.8702,
      "step": 102924
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0000224113464355,
      "learning_rate": 0.000349983595360185,
      "loss": 2.8643,
      "step": 102925
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.478881597518921,
      "learning_rate": 0.0003499795619690417,
      "loss": 3.2676,
      "step": 102926
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.383024215698242,
      "learning_rate": 0.0003499755285686064,
      "loss": 3.052,
      "step": 102927
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.7711453437805176,
      "learning_rate": 0.0003499714951588795,
      "loss": 2.9861,
      "step": 102928
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.283712863922119,
      "learning_rate": 0.0003499674617398621,
      "loss": 3.2581,
      "step": 102929
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7144801616668701,
      "learning_rate": 0.0003499634283115548,
      "loss": 3.0979,
      "step": 102930
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.365518808364868,
      "learning_rate": 0.00034995939487395824,
      "loss": 2.9929,
      "step": 102931
    },
    {
      "epoch": 1.34,
      "grad_norm": 5.032533645629883,
      "learning_rate": 0.0003499553614270734,
      "loss": 2.9732,
      "step": 102932
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5969657897949219,
      "learning_rate": 0.0003499513279709009,
      "loss": 3.3016,
      "step": 102933
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.855060338973999,
      "learning_rate": 0.0003499472945054415,
      "loss": 2.8114,
      "step": 102934
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6454063653945923,
      "learning_rate": 0.0003499432610306959,
      "loss": 3.0506,
      "step": 102935
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1669962406158447,
      "learning_rate": 0.00034993922754666495,
      "loss": 3.1408,
      "step": 102936
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.67185115814209,
      "learning_rate": 0.00034993519405334935,
      "loss": 2.9308,
      "step": 102937
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9077386856079102,
      "learning_rate": 0.0003499311605507498,
      "loss": 2.9514,
      "step": 102938
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8937816619873047,
      "learning_rate": 0.00034992712703886726,
      "loss": 2.8319,
      "step": 102939
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.4663150310516357,
      "learning_rate": 0.0003499230935177022,
      "loss": 2.9083,
      "step": 102940
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7794398069381714,
      "learning_rate": 0.0003499190599872555,
      "loss": 2.9304,
      "step": 102941
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.488451361656189,
      "learning_rate": 0.000349915026447528,
      "loss": 2.8729,
      "step": 102942
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8839149475097656,
      "learning_rate": 0.0003499109928985202,
      "loss": 2.7771,
      "step": 102943
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.197821855545044,
      "learning_rate": 0.0003499069593402331,
      "loss": 2.9912,
      "step": 102944
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6961461305618286,
      "learning_rate": 0.00034990292577266743,
      "loss": 2.9048,
      "step": 102945
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8719195127487183,
      "learning_rate": 0.0003498988921958237,
      "loss": 3.1409,
      "step": 102946
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.908862590789795,
      "learning_rate": 0.0003498948586097029,
      "loss": 2.8667,
      "step": 102947
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7568199634552002,
      "learning_rate": 0.00034989082501430573,
      "loss": 3.2149,
      "step": 102948
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9411207437515259,
      "learning_rate": 0.00034988679140963286,
      "loss": 3.0776,
      "step": 102949
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1022486686706543,
      "learning_rate": 0.00034988275779568507,
      "loss": 2.8975,
      "step": 102950
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7624523639678955,
      "learning_rate": 0.00034987872417246323,
      "loss": 2.9475,
      "step": 102951
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7431161403656006,
      "learning_rate": 0.000349874690539968,
      "loss": 3.2796,
      "step": 102952
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.638522982597351,
      "learning_rate": 0.00034987065689819996,
      "loss": 3.1579,
      "step": 102953
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.8047735691070557,
      "learning_rate": 0.0003498666232471602,
      "loss": 3.0392,
      "step": 102954
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2401742935180664,
      "learning_rate": 0.0003498625895868492,
      "loss": 3.2608,
      "step": 102955
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6733866930007935,
      "learning_rate": 0.0003498585559172678,
      "loss": 3.1299,
      "step": 102956
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6119824647903442,
      "learning_rate": 0.0003498545222384169,
      "loss": 3.0926,
      "step": 102957
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.5663132667541504,
      "learning_rate": 0.0003498504885502969,
      "loss": 3.3162,
      "step": 102958
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.748123288154602,
      "learning_rate": 0.0003498464548529089,
      "loss": 3.1178,
      "step": 102959
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7463366985321045,
      "learning_rate": 0.00034984242114625345,
      "loss": 3.0121,
      "step": 102960
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8121209144592285,
      "learning_rate": 0.0003498383874303313,
      "loss": 3.2423,
      "step": 102961
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0070669651031494,
      "learning_rate": 0.0003498343537051433,
      "loss": 2.924,
      "step": 102962
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8325847387313843,
      "learning_rate": 0.00034983031997069016,
      "loss": 3.0965,
      "step": 102963
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7432806491851807,
      "learning_rate": 0.0003498262862269726,
      "loss": 3.0596,
      "step": 102964
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.155850887298584,
      "learning_rate": 0.0003498222524739914,
      "loss": 2.7682,
      "step": 102965
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.647375226020813,
      "learning_rate": 0.0003498182187117473,
      "loss": 3.0459,
      "step": 102966
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7985053062438965,
      "learning_rate": 0.00034981418494024107,
      "loss": 3.1144,
      "step": 102967
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7575334310531616,
      "learning_rate": 0.00034981015115947343,
      "loss": 2.6531,
      "step": 102968
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.3511037826538086,
      "learning_rate": 0.0003498061173694452,
      "loss": 3.113,
      "step": 102969
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6944340467453003,
      "learning_rate": 0.000349802083570157,
      "loss": 3.0352,
      "step": 102970
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.811949372291565,
      "learning_rate": 0.0003497980497616096,
      "loss": 2.8896,
      "step": 102971
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.543426990509033,
      "learning_rate": 0.0003497940159438039,
      "loss": 2.8113,
      "step": 102972
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.166882038116455,
      "learning_rate": 0.00034978998211674056,
      "loss": 3.0707,
      "step": 102973
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0038692951202393,
      "learning_rate": 0.0003497859482804203,
      "loss": 3.0232,
      "step": 102974
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.139399528503418,
      "learning_rate": 0.0003497819144348438,
      "loss": 3.1738,
      "step": 102975
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1750986576080322,
      "learning_rate": 0.00034977788058001197,
      "loss": 2.905,
      "step": 102976
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6760495901107788,
      "learning_rate": 0.0003497738467159256,
      "loss": 2.8407,
      "step": 102977
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9992551803588867,
      "learning_rate": 0.00034976981284258517,
      "loss": 2.8969,
      "step": 102978
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0736896991729736,
      "learning_rate": 0.0003497657789599917,
      "loss": 3.2826,
      "step": 102979
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2846639156341553,
      "learning_rate": 0.0003497617450681458,
      "loss": 2.7144,
      "step": 102980
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.5089221000671387,
      "learning_rate": 0.00034975771116704823,
      "loss": 3.0378,
      "step": 102981
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.591115713119507,
      "learning_rate": 0.00034975367725669973,
      "loss": 2.9896,
      "step": 102982
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.464364528656006,
      "learning_rate": 0.00034974964333710115,
      "loss": 2.8242,
      "step": 102983
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0269298553466797,
      "learning_rate": 0.0003497456094082532,
      "loss": 3.0622,
      "step": 102984
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.314948558807373,
      "learning_rate": 0.00034974157547015654,
      "loss": 2.6496,
      "step": 102985
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0987801551818848,
      "learning_rate": 0.000349737541522812,
      "loss": 2.8133,
      "step": 102986
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6952896118164062,
      "learning_rate": 0.0003497335075662203,
      "loss": 3.0367,
      "step": 102987
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9023792743682861,
      "learning_rate": 0.0003497294736003822,
      "loss": 3.2331,
      "step": 102988
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8167694807052612,
      "learning_rate": 0.0003497254396252984,
      "loss": 3.1002,
      "step": 102989
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8640791177749634,
      "learning_rate": 0.00034972140564096986,
      "loss": 2.7904,
      "step": 102990
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5641356706619263,
      "learning_rate": 0.00034971737164739703,
      "loss": 3.1631,
      "step": 102991
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.804855227470398,
      "learning_rate": 0.0003497133376445809,
      "loss": 3.0752,
      "step": 102992
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7031971216201782,
      "learning_rate": 0.00034970930363252203,
      "loss": 3.1043,
      "step": 102993
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9390270709991455,
      "learning_rate": 0.00034970526961122133,
      "loss": 3.0472,
      "step": 102994
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6933444738388062,
      "learning_rate": 0.0003497012355806795,
      "loss": 2.7763,
      "step": 102995
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9954811334609985,
      "learning_rate": 0.0003496972015408972,
      "loss": 2.884,
      "step": 102996
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8638545274734497,
      "learning_rate": 0.0003496931674918753,
      "loss": 2.9505,
      "step": 102997
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.842726469039917,
      "learning_rate": 0.00034968913343361445,
      "loss": 3.0297,
      "step": 102998
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7083567380905151,
      "learning_rate": 0.00034968509936611546,
      "loss": 2.7142,
      "step": 102999
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.718616008758545,
      "learning_rate": 0.0003496810652893791,
      "loss": 2.9143,
      "step": 103000
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1081645488739014,
      "learning_rate": 0.0003496770312034061,
      "loss": 3.0722,
      "step": 103001
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8035132884979248,
      "learning_rate": 0.0003496729971081972,
      "loss": 2.9043,
      "step": 103002
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7625294923782349,
      "learning_rate": 0.00034966896300375316,
      "loss": 3.1404,
      "step": 103003
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.3301334381103516,
      "learning_rate": 0.00034966492889007474,
      "loss": 3.1212,
      "step": 103004
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0994622707366943,
      "learning_rate": 0.0003496608947671626,
      "loss": 2.8926,
      "step": 103005
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.4290735721588135,
      "learning_rate": 0.00034965686063501757,
      "loss": 3.0414,
      "step": 103006
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.6136574745178223,
      "learning_rate": 0.00034965282649364045,
      "loss": 3.0914,
      "step": 103007
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.026989698410034,
      "learning_rate": 0.0003496487923430319,
      "loss": 3.0529,
      "step": 103008
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7891772985458374,
      "learning_rate": 0.00034964475818319274,
      "loss": 2.9429,
      "step": 103009
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.125823497772217,
      "learning_rate": 0.00034964072401412363,
      "loss": 3.0109,
      "step": 103010
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.5306718349456787,
      "learning_rate": 0.00034963668983582536,
      "loss": 2.7518,
      "step": 103011
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0607993602752686,
      "learning_rate": 0.0003496326556482988,
      "loss": 2.897,
      "step": 103012
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7270963191986084,
      "learning_rate": 0.00034962862145154446,
      "loss": 2.8266,
      "step": 103013
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0636472702026367,
      "learning_rate": 0.00034962458724556325,
      "loss": 2.9735,
      "step": 103014
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.6987311840057373,
      "learning_rate": 0.000349620553030356,
      "loss": 2.8441,
      "step": 103015
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9788888692855835,
      "learning_rate": 0.0003496165188059232,
      "loss": 2.9197,
      "step": 103016
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7606650590896606,
      "learning_rate": 0.00034961248457226584,
      "loss": 2.9743,
      "step": 103017
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9968593120574951,
      "learning_rate": 0.0003496084503293846,
      "loss": 2.8605,
      "step": 103018
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.599545478820801,
      "learning_rate": 0.00034960441607728016,
      "loss": 2.9997,
      "step": 103019
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8544007539749146,
      "learning_rate": 0.00034960038181595335,
      "loss": 3.0207,
      "step": 103020
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.16964054107666,
      "learning_rate": 0.0003495963475454049,
      "loss": 2.8714,
      "step": 103021
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.831409215927124,
      "learning_rate": 0.00034959231326563557,
      "loss": 2.9759,
      "step": 103022
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0039193630218506,
      "learning_rate": 0.000349588278976646,
      "loss": 2.741,
      "step": 103023
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.681928038597107,
      "learning_rate": 0.0003495842446784372,
      "loss": 2.8746,
      "step": 103024
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6999377012252808,
      "learning_rate": 0.00034958021037100956,
      "loss": 2.6964,
      "step": 103025
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9916460514068604,
      "learning_rate": 0.0003495761760543641,
      "loss": 3.0723,
      "step": 103026
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6576476097106934,
      "learning_rate": 0.0003495721417285016,
      "loss": 3.0512,
      "step": 103027
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.986223816871643,
      "learning_rate": 0.0003495681073934226,
      "loss": 3.0218,
      "step": 103028
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.793045997619629,
      "learning_rate": 0.0003495640730491279,
      "loss": 3.3787,
      "step": 103029
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1277506351470947,
      "learning_rate": 0.00034956003869561844,
      "loss": 3.0469,
      "step": 103030
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2645699977874756,
      "learning_rate": 0.0003495560043328947,
      "loss": 3.0548,
      "step": 103031
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.602693796157837,
      "learning_rate": 0.0003495519699609576,
      "loss": 3.0927,
      "step": 103032
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.952814817428589,
      "learning_rate": 0.0003495479355798079,
      "loss": 3.2194,
      "step": 103033
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.4843525886535645,
      "learning_rate": 0.00034954390118944627,
      "loss": 3.126,
      "step": 103034
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2243576049804688,
      "learning_rate": 0.0003495398667898735,
      "loss": 2.89,
      "step": 103035
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2886219024658203,
      "learning_rate": 0.00034953583238109037,
      "loss": 2.8731,
      "step": 103036
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9840466976165771,
      "learning_rate": 0.0003495317979630975,
      "loss": 3.1406,
      "step": 103037
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.9751174449920654,
      "learning_rate": 0.0003495277635358958,
      "loss": 2.971,
      "step": 103038
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9589015245437622,
      "learning_rate": 0.000349523729099486,
      "loss": 3.0967,
      "step": 103039
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8561969995498657,
      "learning_rate": 0.00034951969465386873,
      "loss": 3.0746,
      "step": 103040
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8852267265319824,
      "learning_rate": 0.00034951566019904485,
      "loss": 3.2088,
      "step": 103041
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.888392210006714,
      "learning_rate": 0.00034951162573501507,
      "loss": 2.867,
      "step": 103042
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.5105791091918945,
      "learning_rate": 0.0003495075912617801,
      "loss": 3.0163,
      "step": 103043
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6882084608078003,
      "learning_rate": 0.00034950355677934076,
      "loss": 2.8841,
      "step": 103044
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.3531413078308105,
      "learning_rate": 0.00034949952228769787,
      "loss": 2.8999,
      "step": 103045
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.488283634185791,
      "learning_rate": 0.00034949548778685194,
      "loss": 2.7918,
      "step": 103046
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.9194414615631104,
      "learning_rate": 0.0003494914532768039,
      "loss": 2.8776,
      "step": 103047
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2538483142852783,
      "learning_rate": 0.0003494874187575545,
      "loss": 3.1299,
      "step": 103048
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1276650428771973,
      "learning_rate": 0.00034948338422910447,
      "loss": 3.0474,
      "step": 103049
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.479421615600586,
      "learning_rate": 0.00034947934969145443,
      "loss": 3.0593,
      "step": 103050
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8839294910430908,
      "learning_rate": 0.0003494753151446054,
      "loss": 2.9487,
      "step": 103051
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.701296329498291,
      "learning_rate": 0.00034947128058855794,
      "loss": 2.8921,
      "step": 103052
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0331332683563232,
      "learning_rate": 0.00034946724602331273,
      "loss": 2.6788,
      "step": 103053
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8888635635375977,
      "learning_rate": 0.0003494632114488707,
      "loss": 3.157,
      "step": 103054
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.33686900138855,
      "learning_rate": 0.00034945917686523257,
      "loss": 2.8857,
      "step": 103055
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.913445234298706,
      "learning_rate": 0.0003494551422723989,
      "loss": 3.2233,
      "step": 103056
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.086771249771118,
      "learning_rate": 0.0003494511076703707,
      "loss": 2.9713,
      "step": 103057
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6269288063049316,
      "learning_rate": 0.00034944707305914865,
      "loss": 2.9803,
      "step": 103058
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.963899850845337,
      "learning_rate": 0.0003494430384387333,
      "loss": 3.0991,
      "step": 103059
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7487026453018188,
      "learning_rate": 0.00034943900380912565,
      "loss": 2.7633,
      "step": 103060
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2690913677215576,
      "learning_rate": 0.0003494349691703265,
      "loss": 3.1045,
      "step": 103061
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9849470853805542,
      "learning_rate": 0.0003494309345223362,
      "loss": 2.9462,
      "step": 103062
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5421643257141113,
      "learning_rate": 0.00034942689986515587,
      "loss": 2.9981,
      "step": 103063
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9564919471740723,
      "learning_rate": 0.00034942286519878615,
      "loss": 3.2067,
      "step": 103064
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.113262176513672,
      "learning_rate": 0.00034941883052322777,
      "loss": 3.112,
      "step": 103065
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.795316219329834,
      "learning_rate": 0.0003494147958384815,
      "loss": 3.1963,
      "step": 103066
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0754990577697754,
      "learning_rate": 0.00034941076114454814,
      "loss": 2.8772,
      "step": 103067
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.4743661880493164,
      "learning_rate": 0.00034940672644142825,
      "loss": 2.9187,
      "step": 103068
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8230669498443604,
      "learning_rate": 0.00034940269172912283,
      "loss": 2.9624,
      "step": 103069
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6498100757598877,
      "learning_rate": 0.00034939865700763255,
      "loss": 2.9457,
      "step": 103070
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.6865787506103516,
      "learning_rate": 0.000349394622276958,
      "loss": 2.987,
      "step": 103071
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7472691535949707,
      "learning_rate": 0.0003493905875371002,
      "loss": 2.9888,
      "step": 103072
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8556386232376099,
      "learning_rate": 0.0003493865527880596,
      "loss": 3.1767,
      "step": 103073
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9033982753753662,
      "learning_rate": 0.00034938251802983726,
      "loss": 3.0326,
      "step": 103074
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7091844081878662,
      "learning_rate": 0.00034937848326243367,
      "loss": 3.1165,
      "step": 103075
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.72293221950531,
      "learning_rate": 0.00034937444848584976,
      "loss": 3.0625,
      "step": 103076
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1602108478546143,
      "learning_rate": 0.0003493704137000861,
      "loss": 2.8241,
      "step": 103077
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8563601970672607,
      "learning_rate": 0.00034936637890514363,
      "loss": 3.1858,
      "step": 103078
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8227030038833618,
      "learning_rate": 0.0003493623441010231,
      "loss": 3.1481,
      "step": 103079
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7673395872116089,
      "learning_rate": 0.000349358309287725,
      "loss": 2.855,
      "step": 103080
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8510624170303345,
      "learning_rate": 0.00034935427446525035,
      "loss": 2.8711,
      "step": 103081
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9467905759811401,
      "learning_rate": 0.0003493502396335999,
      "loss": 3.0659,
      "step": 103082
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1968369483947754,
      "learning_rate": 0.00034934620479277415,
      "loss": 3.0786,
      "step": 103083
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.916308045387268,
      "learning_rate": 0.0003493421699427741,
      "loss": 2.9711,
      "step": 103084
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1880948543548584,
      "learning_rate": 0.00034933813508360043,
      "loss": 3.0949,
      "step": 103085
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8445556163787842,
      "learning_rate": 0.00034933410021525377,
      "loss": 2.9617,
      "step": 103086
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7124805450439453,
      "learning_rate": 0.000349330065337735,
      "loss": 3.1287,
      "step": 103087
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0354628562927246,
      "learning_rate": 0.0003493260304510449,
      "loss": 2.9908,
      "step": 103088
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5894912481307983,
      "learning_rate": 0.0003493219955551842,
      "loss": 2.9318,
      "step": 103089
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0272440910339355,
      "learning_rate": 0.0003493179606501535,
      "loss": 3.0861,
      "step": 103090
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8970595598220825,
      "learning_rate": 0.00034931392573595374,
      "loss": 3.0082,
      "step": 103091
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5301060676574707,
      "learning_rate": 0.0003493098908125855,
      "loss": 3.1014,
      "step": 103092
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5402308702468872,
      "learning_rate": 0.0003493058558800496,
      "loss": 3.0934,
      "step": 103093
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1506989002227783,
      "learning_rate": 0.0003493018209383469,
      "loss": 2.9174,
      "step": 103094
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.4939332008361816,
      "learning_rate": 0.000349297785987478,
      "loss": 2.9712,
      "step": 103095
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.596482515335083,
      "learning_rate": 0.00034929375102744377,
      "loss": 3.0599,
      "step": 103096
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9819049835205078,
      "learning_rate": 0.0003492897160582449,
      "loss": 2.8723,
      "step": 103097
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9984592199325562,
      "learning_rate": 0.0003492856810798821,
      "loss": 3.0938,
      "step": 103098
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.679258942604065,
      "learning_rate": 0.00034928164609235616,
      "loss": 2.9167,
      "step": 103099
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6069748401641846,
      "learning_rate": 0.00034927761109566793,
      "loss": 3.0012,
      "step": 103100
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8571230173110962,
      "learning_rate": 0.00034927357608981793,
      "loss": 2.9308,
      "step": 103101
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7103711366653442,
      "learning_rate": 0.0003492695410748071,
      "loss": 2.8267,
      "step": 103102
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.3011183738708496,
      "learning_rate": 0.00034926550605063615,
      "loss": 3.145,
      "step": 103103
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8957271575927734,
      "learning_rate": 0.00034926147101730585,
      "loss": 3.1918,
      "step": 103104
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7749433517456055,
      "learning_rate": 0.00034925743597481675,
      "loss": 2.8515,
      "step": 103105
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8863723278045654,
      "learning_rate": 0.00034925340092316993,
      "loss": 2.9165,
      "step": 103106
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.959559679031372,
      "learning_rate": 0.00034924936586236595,
      "loss": 3.136,
      "step": 103107
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9644404649734497,
      "learning_rate": 0.0003492453307924055,
      "loss": 2.9952,
      "step": 103108
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7352181673049927,
      "learning_rate": 0.0003492412957132895,
      "loss": 2.8329,
      "step": 103109
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9878828525543213,
      "learning_rate": 0.00034923726062501855,
      "loss": 3.0135,
      "step": 103110
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9756227731704712,
      "learning_rate": 0.0003492332255275934,
      "loss": 3.152,
      "step": 103111
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2256672382354736,
      "learning_rate": 0.000349229190421015,
      "loss": 3.0842,
      "step": 103112
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0940706729888916,
      "learning_rate": 0.0003492251553052839,
      "loss": 3.0193,
      "step": 103113
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.763916254043579,
      "learning_rate": 0.000349221120180401,
      "loss": 3.1918,
      "step": 103114
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7685420513153076,
      "learning_rate": 0.0003492170850463669,
      "loss": 2.7615,
      "step": 103115
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8240758180618286,
      "learning_rate": 0.00034921304990318235,
      "loss": 3.2302,
      "step": 103116
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7859930992126465,
      "learning_rate": 0.00034920901475084823,
      "loss": 3.1307,
      "step": 103117
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.5536015033721924,
      "learning_rate": 0.0003492049795893653,
      "loss": 3.0468,
      "step": 103118
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.825764536857605,
      "learning_rate": 0.0003492009444187341,
      "loss": 3.0807,
      "step": 103119
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6213123798370361,
      "learning_rate": 0.0003491969092389555,
      "loss": 2.6812,
      "step": 103120
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.7891294956207275,
      "learning_rate": 0.00034919287405003045,
      "loss": 2.6969,
      "step": 103121
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6773478984832764,
      "learning_rate": 0.00034918883885195936,
      "loss": 3.0105,
      "step": 103122
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7478387355804443,
      "learning_rate": 0.00034918480364474314,
      "loss": 3.2964,
      "step": 103123
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7300077676773071,
      "learning_rate": 0.0003491807684283826,
      "loss": 2.8655,
      "step": 103124
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.610236644744873,
      "learning_rate": 0.0003491767332028784,
      "loss": 2.8802,
      "step": 103125
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7339649200439453,
      "learning_rate": 0.0003491726979682313,
      "loss": 3.0668,
      "step": 103126
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1605544090270996,
      "learning_rate": 0.00034916866272444207,
      "loss": 2.8269,
      "step": 103127
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7879923582077026,
      "learning_rate": 0.00034916462747151155,
      "loss": 2.9428,
      "step": 103128
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.180108070373535,
      "learning_rate": 0.00034916059220944023,
      "loss": 2.8261,
      "step": 103129
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.9611997604370117,
      "learning_rate": 0.00034915655693822914,
      "loss": 3.1128,
      "step": 103130
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6070326566696167,
      "learning_rate": 0.00034915252165787893,
      "loss": 2.9546,
      "step": 103131
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.498853921890259,
      "learning_rate": 0.0003491484863683903,
      "loss": 2.9249,
      "step": 103132
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.179391860961914,
      "learning_rate": 0.00034914445106976397,
      "loss": 2.9872,
      "step": 103133
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.7022411823272705,
      "learning_rate": 0.0003491404157620009,
      "loss": 2.9847,
      "step": 103134
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.3004417419433594,
      "learning_rate": 0.00034913638044510164,
      "loss": 2.9891,
      "step": 103135
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.06380558013916,
      "learning_rate": 0.00034913234511906694,
      "loss": 3.073,
      "step": 103136
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.7144763469696045,
      "learning_rate": 0.0003491283097838977,
      "loss": 3.2239,
      "step": 103137
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.908339023590088,
      "learning_rate": 0.00034912427443959456,
      "loss": 2.9253,
      "step": 103138
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8802454471588135,
      "learning_rate": 0.0003491202390861582,
      "loss": 2.833,
      "step": 103139
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2336337566375732,
      "learning_rate": 0.0003491162037235896,
      "loss": 3.1186,
      "step": 103140
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.049973249435425,
      "learning_rate": 0.0003491121683518893,
      "loss": 3.047,
      "step": 103141
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.3133201599121094,
      "learning_rate": 0.0003491081329710581,
      "loss": 3.0187,
      "step": 103142
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.002739429473877,
      "learning_rate": 0.0003491040975810968,
      "loss": 2.8848,
      "step": 103143
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2052159309387207,
      "learning_rate": 0.0003491000621820061,
      "loss": 3.0943,
      "step": 103144
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7824044227600098,
      "learning_rate": 0.00034909602677378677,
      "loss": 3.1677,
      "step": 103145
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7864869832992554,
      "learning_rate": 0.00034909199135643964,
      "loss": 2.907,
      "step": 103146
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2785661220550537,
      "learning_rate": 0.00034908795592996527,
      "loss": 3.018,
      "step": 103147
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7910665273666382,
      "learning_rate": 0.0003490839204943646,
      "loss": 2.9209,
      "step": 103148
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.330212354660034,
      "learning_rate": 0.0003490798850496382,
      "loss": 2.7455,
      "step": 103149
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9431874752044678,
      "learning_rate": 0.0003490758495957871,
      "loss": 2.959,
      "step": 103150
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.6994082927703857,
      "learning_rate": 0.00034907181413281174,
      "loss": 3.0644,
      "step": 103151
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9748340845108032,
      "learning_rate": 0.000349067778660713,
      "loss": 3.3,
      "step": 103152
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.504544973373413,
      "learning_rate": 0.00034906374317949173,
      "loss": 3.016,
      "step": 103153
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.3259551525115967,
      "learning_rate": 0.0003490597076891484,
      "loss": 3.0556,
      "step": 103154
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.332031488418579,
      "learning_rate": 0.0003490556721896841,
      "loss": 3.0375,
      "step": 103155
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9567240476608276,
      "learning_rate": 0.0003490516366810994,
      "loss": 3.0746,
      "step": 103156
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.030120372772217,
      "learning_rate": 0.00034904760116339506,
      "loss": 2.7831,
      "step": 103157
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.058471202850342,
      "learning_rate": 0.0003490435656365719,
      "loss": 3.0063,
      "step": 103158
    },
    {
      "epoch": 1.34,
      "grad_norm": 4.425196647644043,
      "learning_rate": 0.0003490395301006305,
      "loss": 2.8948,
      "step": 103159
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6074442863464355,
      "learning_rate": 0.0003490354945555718,
      "loss": 3.0319,
      "step": 103160
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.627193808555603,
      "learning_rate": 0.0003490314590013965,
      "loss": 2.9592,
      "step": 103161
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.714179754257202,
      "learning_rate": 0.00034902742343810524,
      "loss": 3.3174,
      "step": 103162
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.4116830825805664,
      "learning_rate": 0.00034902338786569893,
      "loss": 3.1204,
      "step": 103163
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6133439540863037,
      "learning_rate": 0.00034901935228417823,
      "loss": 2.904,
      "step": 103164
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5508086681365967,
      "learning_rate": 0.00034901531669354394,
      "loss": 2.9921,
      "step": 103165
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.146050453186035,
      "learning_rate": 0.00034901128109379666,
      "loss": 2.9492,
      "step": 103166
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6693077087402344,
      "learning_rate": 0.0003490072454849374,
      "loss": 3.0578,
      "step": 103167
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6688356399536133,
      "learning_rate": 0.0003490032098669667,
      "loss": 2.9644,
      "step": 103168
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9401177167892456,
      "learning_rate": 0.00034899917423988533,
      "loss": 3.0332,
      "step": 103169
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1441733837127686,
      "learning_rate": 0.0003489951386036943,
      "loss": 3.198,
      "step": 103170
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.107389450073242,
      "learning_rate": 0.0003489911029583939,
      "loss": 2.9795,
      "step": 103171
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.3172786235809326,
      "learning_rate": 0.0003489870673039852,
      "loss": 2.895,
      "step": 103172
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2261159420013428,
      "learning_rate": 0.000348983031640469,
      "loss": 2.9576,
      "step": 103173
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8826637268066406,
      "learning_rate": 0.00034897899596784574,
      "loss": 3.0252,
      "step": 103174
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.6944079399108887,
      "learning_rate": 0.0003489749602861164,
      "loss": 2.9624,
      "step": 103175
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9779847860336304,
      "learning_rate": 0.00034897092459528184,
      "loss": 2.9535,
      "step": 103176
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9740487337112427,
      "learning_rate": 0.00034896688889534257,
      "loss": 2.9817,
      "step": 103177
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7014319896697998,
      "learning_rate": 0.0003489628531862994,
      "loss": 3.0588,
      "step": 103178
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.6953680515289307,
      "learning_rate": 0.00034895881746815313,
      "loss": 2.8674,
      "step": 103179
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9500608444213867,
      "learning_rate": 0.0003489547817409045,
      "loss": 3.0285,
      "step": 103180
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.5016770362854004,
      "learning_rate": 0.00034895074600455424,
      "loss": 2.9254,
      "step": 103181
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.062258005142212,
      "learning_rate": 0.00034894671025910315,
      "loss": 2.7102,
      "step": 103182
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9217902421951294,
      "learning_rate": 0.00034894267450455186,
      "loss": 2.8905,
      "step": 103183
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8504700660705566,
      "learning_rate": 0.0003489386387409013,
      "loss": 3.2282,
      "step": 103184
    },
    {
      "epoch": 1.34,
      "grad_norm": 4.305344581604004,
      "learning_rate": 0.0003489346029681521,
      "loss": 2.6608,
      "step": 103185
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.4040422439575195,
      "learning_rate": 0.000348930567186305,
      "loss": 3.0317,
      "step": 103186
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.7980997562408447,
      "learning_rate": 0.0003489265313953607,
      "loss": 2.837,
      "step": 103187
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8437116146087646,
      "learning_rate": 0.00034892249559532025,
      "loss": 3.1446,
      "step": 103188
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.400329113006592,
      "learning_rate": 0.00034891845978618404,
      "loss": 3.2676,
      "step": 103189
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7297172546386719,
      "learning_rate": 0.00034891442396795295,
      "loss": 2.839,
      "step": 103190
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8541086912155151,
      "learning_rate": 0.00034891038814062773,
      "loss": 2.9551,
      "step": 103191
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9810683727264404,
      "learning_rate": 0.0003489063523042093,
      "loss": 2.7457,
      "step": 103192
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.1191346645355225,
      "learning_rate": 0.0003489023164586981,
      "loss": 2.8341,
      "step": 103193
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.973357081413269,
      "learning_rate": 0.00034889828060409513,
      "loss": 2.8606,
      "step": 103194
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.182819128036499,
      "learning_rate": 0.000348894244740401,
      "loss": 3.0139,
      "step": 103195
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.536466598510742,
      "learning_rate": 0.00034889020886761654,
      "loss": 3.2386,
      "step": 103196
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5328290462493896,
      "learning_rate": 0.0003488861729857424,
      "loss": 3.164,
      "step": 103197
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.673433780670166,
      "learning_rate": 0.00034888213709477955,
      "loss": 3.0295,
      "step": 103198
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8469054698944092,
      "learning_rate": 0.00034887810119472844,
      "loss": 2.7602,
      "step": 103199
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2715420722961426,
      "learning_rate": 0.0003488740652855899,
      "loss": 3.2072,
      "step": 103200
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7065517902374268,
      "learning_rate": 0.00034887002936736497,
      "loss": 3.2133,
      "step": 103201
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.3897273540496826,
      "learning_rate": 0.00034886599344005403,
      "loss": 3.1268,
      "step": 103202
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6021920442581177,
      "learning_rate": 0.000348861957503658,
      "loss": 3.0474,
      "step": 103203
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.70125150680542,
      "learning_rate": 0.00034885792155817764,
      "loss": 2.9236,
      "step": 103204
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2236480712890625,
      "learning_rate": 0.00034885388560361365,
      "loss": 3.0715,
      "step": 103205
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6901172399520874,
      "learning_rate": 0.0003488498496399668,
      "loss": 3.0445,
      "step": 103206
    },
    {
      "epoch": 1.34,
      "grad_norm": 4.187252998352051,
      "learning_rate": 0.0003488458136672379,
      "loss": 2.8401,
      "step": 103207
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.7550926208496094,
      "learning_rate": 0.0003488417776854276,
      "loss": 3.031,
      "step": 103208
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2729876041412354,
      "learning_rate": 0.00034883774169453663,
      "loss": 3.311,
      "step": 103209
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.939202070236206,
      "learning_rate": 0.0003488337056945659,
      "loss": 3.0169,
      "step": 103210
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9632060527801514,
      "learning_rate": 0.00034882966968551596,
      "loss": 3.2549,
      "step": 103211
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9367139339447021,
      "learning_rate": 0.0003488256336673877,
      "loss": 3.0912,
      "step": 103212
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.178246021270752,
      "learning_rate": 0.00034882159764018194,
      "loss": 2.7588,
      "step": 103213
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.388418436050415,
      "learning_rate": 0.00034881756160389915,
      "loss": 2.9914,
      "step": 103214
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.353275775909424,
      "learning_rate": 0.0003488135255585404,
      "loss": 3.1737,
      "step": 103215
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.7452239990234375,
      "learning_rate": 0.00034880948950410625,
      "loss": 3.1667,
      "step": 103216
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2391674518585205,
      "learning_rate": 0.00034880545344059745,
      "loss": 3.1585,
      "step": 103217
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.239254951477051,
      "learning_rate": 0.0003488014173680148,
      "loss": 3.097,
      "step": 103218
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.3049190044403076,
      "learning_rate": 0.0003487973812863591,
      "loss": 2.9887,
      "step": 103219
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.6415984630584717,
      "learning_rate": 0.00034879334519563105,
      "loss": 2.8495,
      "step": 103220
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.288109302520752,
      "learning_rate": 0.0003487893090958313,
      "loss": 3.0701,
      "step": 103221
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.4441452026367188,
      "learning_rate": 0.0003487852729869608,
      "loss": 2.9493,
      "step": 103222
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.6570444107055664,
      "learning_rate": 0.0003487812368690201,
      "loss": 2.9765,
      "step": 103223
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.115778684616089,
      "learning_rate": 0.00034877720074201013,
      "loss": 2.762,
      "step": 103224
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1091978549957275,
      "learning_rate": 0.00034877316460593156,
      "loss": 3.1824,
      "step": 103225
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.710864543914795,
      "learning_rate": 0.00034876912846078505,
      "loss": 2.9041,
      "step": 103226
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.5886693000793457,
      "learning_rate": 0.00034876509230657147,
      "loss": 3.0266,
      "step": 103227
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7424455881118774,
      "learning_rate": 0.0003487610561432916,
      "loss": 3.0626,
      "step": 103228
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.4739067554473877,
      "learning_rate": 0.00034875701997094606,
      "loss": 2.9978,
      "step": 103229
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.847902774810791,
      "learning_rate": 0.0003487529837895357,
      "loss": 2.8925,
      "step": 103230
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6350836753845215,
      "learning_rate": 0.00034874894759906125,
      "loss": 3.0269,
      "step": 103231
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9929348230361938,
      "learning_rate": 0.00034874491139952346,
      "loss": 3.1394,
      "step": 103232
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.0990407466888428,
      "learning_rate": 0.000348740875190923,
      "loss": 3.2533,
      "step": 103233
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9387028217315674,
      "learning_rate": 0.0003487368389732608,
      "loss": 2.8246,
      "step": 103234
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7026574611663818,
      "learning_rate": 0.00034873280274653746,
      "loss": 2.7467,
      "step": 103235
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.106614828109741,
      "learning_rate": 0.0003487287665107537,
      "loss": 2.7685,
      "step": 103236
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7843915224075317,
      "learning_rate": 0.00034872473026591037,
      "loss": 2.8525,
      "step": 103237
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7723960876464844,
      "learning_rate": 0.00034872069401200824,
      "loss": 2.8458,
      "step": 103238
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.928662657737732,
      "learning_rate": 0.000348716657749048,
      "loss": 2.8954,
      "step": 103239
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.037816286087036,
      "learning_rate": 0.00034871262147703037,
      "loss": 2.9298,
      "step": 103240
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9419351816177368,
      "learning_rate": 0.0003487085851959563,
      "loss": 3.0871,
      "step": 103241
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7200168371200562,
      "learning_rate": 0.0003487045489058262,
      "loss": 2.8333,
      "step": 103242
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1040749549865723,
      "learning_rate": 0.00034870051260664107,
      "loss": 2.9371,
      "step": 103243
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8092660903930664,
      "learning_rate": 0.00034869647629840155,
      "loss": 3.0002,
      "step": 103244
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.019930839538574,
      "learning_rate": 0.00034869243998110857,
      "loss": 3.1165,
      "step": 103245
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.4715352058410645,
      "learning_rate": 0.0003486884036547626,
      "loss": 2.9657,
      "step": 103246
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.140685796737671,
      "learning_rate": 0.00034868436731936463,
      "loss": 2.9216,
      "step": 103247
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7809057235717773,
      "learning_rate": 0.0003486803309749152,
      "loss": 2.6397,
      "step": 103248
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7965244054794312,
      "learning_rate": 0.0003486762946214153,
      "loss": 2.8673,
      "step": 103249
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7395620346069336,
      "learning_rate": 0.00034867225825886553,
      "loss": 3.0952,
      "step": 103250
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.59729266166687,
      "learning_rate": 0.0003486682218872667,
      "loss": 3.0463,
      "step": 103251
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9504644870758057,
      "learning_rate": 0.0003486641855066195,
      "loss": 3.133,
      "step": 103252
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.153921604156494,
      "learning_rate": 0.0003486601491169247,
      "loss": 2.9872,
      "step": 103253
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2520594596862793,
      "learning_rate": 0.00034865611271818296,
      "loss": 2.9641,
      "step": 103254
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8924387693405151,
      "learning_rate": 0.0003486520763103953,
      "loss": 3.093,
      "step": 103255
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.5970818996429443,
      "learning_rate": 0.00034864803989356215,
      "loss": 2.8221,
      "step": 103256
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.976245641708374,
      "learning_rate": 0.00034864400346768453,
      "loss": 3.0734,
      "step": 103257
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.698612928390503,
      "learning_rate": 0.000348639967032763,
      "loss": 3.1343,
      "step": 103258
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7200722694396973,
      "learning_rate": 0.0003486359305887985,
      "loss": 2.9393,
      "step": 103259
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9849960803985596,
      "learning_rate": 0.00034863189413579147,
      "loss": 2.832,
      "step": 103260
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.921143054962158,
      "learning_rate": 0.00034862785767374296,
      "loss": 3.1524,
      "step": 103261
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6390634775161743,
      "learning_rate": 0.00034862382120265373,
      "loss": 2.9367,
      "step": 103262
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.8820935487747192,
      "learning_rate": 0.00034861978472252426,
      "loss": 3.0772,
      "step": 103263
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.698377013206482,
      "learning_rate": 0.00034861574823335546,
      "loss": 3.1123,
      "step": 103264
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7314459085464478,
      "learning_rate": 0.0003486117117351481,
      "loss": 3.0857,
      "step": 103265
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.1872658729553223,
      "learning_rate": 0.0003486076752279029,
      "loss": 3.0342,
      "step": 103266
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9116307497024536,
      "learning_rate": 0.0003486036387116206,
      "loss": 3.1263,
      "step": 103267
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.1300954818725586,
      "learning_rate": 0.000348599602186302,
      "loss": 2.9362,
      "step": 103268
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.557563304901123,
      "learning_rate": 0.00034859556565194785,
      "loss": 2.895,
      "step": 103269
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6184371709823608,
      "learning_rate": 0.00034859152910855876,
      "loss": 3.0814,
      "step": 103270
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.75294828414917,
      "learning_rate": 0.0003485874925561357,
      "loss": 2.8515,
      "step": 103271
    },
    {
      "epoch": 1.34,
      "grad_norm": 4.776372909545898,
      "learning_rate": 0.00034858345599467926,
      "loss": 2.9898,
      "step": 103272
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.8057188987731934,
      "learning_rate": 0.00034857941942419016,
      "loss": 3.0239,
      "step": 103273
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.9341976642608643,
      "learning_rate": 0.00034857538284466943,
      "loss": 3.0584,
      "step": 103274
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7834564447402954,
      "learning_rate": 0.00034857134625611747,
      "loss": 3.1379,
      "step": 103275
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.487248659133911,
      "learning_rate": 0.0003485673096585351,
      "loss": 2.7821,
      "step": 103276
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.245553493499756,
      "learning_rate": 0.0003485632730519234,
      "loss": 3.0535,
      "step": 103277
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.8985071182250977,
      "learning_rate": 0.00034855923643628263,
      "loss": 2.7975,
      "step": 103278
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.913741111755371,
      "learning_rate": 0.0003485551998116139,
      "loss": 3.1657,
      "step": 103279
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.026888132095337,
      "learning_rate": 0.00034855116317791785,
      "loss": 2.8037,
      "step": 103280
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.2743825912475586,
      "learning_rate": 0.0003485471265351952,
      "loss": 2.8773,
      "step": 103281
    },
    {
      "epoch": 1.34,
      "grad_norm": 3.3549277782440186,
      "learning_rate": 0.00034854308988344665,
      "loss": 3.0616,
      "step": 103282
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6239750385284424,
      "learning_rate": 0.00034853905322267315,
      "loss": 2.8892,
      "step": 103283
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7731996774673462,
      "learning_rate": 0.00034853501655287525,
      "loss": 2.8066,
      "step": 103284
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6966676712036133,
      "learning_rate": 0.0003485309798740538,
      "loss": 2.9525,
      "step": 103285
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.3463451862335205,
      "learning_rate": 0.0003485269431862095,
      "loss": 3.0719,
      "step": 103286
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.404729127883911,
      "learning_rate": 0.00034852290648934315,
      "loss": 2.7924,
      "step": 103287
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6505651473999023,
      "learning_rate": 0.0003485188697834554,
      "loss": 3.0342,
      "step": 103288
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6680824756622314,
      "learning_rate": 0.0003485148330685472,
      "loss": 2.9252,
      "step": 103289
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.856217384338379,
      "learning_rate": 0.00034851079634461905,
      "loss": 3.118,
      "step": 103290
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7040618658065796,
      "learning_rate": 0.00034850675961167196,
      "loss": 2.9567,
      "step": 103291
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.6879346370697021,
      "learning_rate": 0.00034850272286970647,
      "loss": 3.0609,
      "step": 103292
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.005958318710327,
      "learning_rate": 0.0003484986861187234,
      "loss": 2.7859,
      "step": 103293
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.871385931968689,
      "learning_rate": 0.0003484946493587235,
      "loss": 3.0174,
      "step": 103294
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.694917917251587,
      "learning_rate": 0.00034849061258970766,
      "loss": 3.0654,
      "step": 103295
    },
    {
      "epoch": 1.34,
      "grad_norm": 1.7632527351379395,
      "learning_rate": 0.00034848657581167635,
      "loss": 3.1366,
      "step": 103296
    },
    {
      "epoch": 1.34,
      "grad_norm": 2.071852445602417,
      "learning_rate": 0.0003484825390246305,
      "loss": 3.0454,
      "step": 103297
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7516599893569946,
      "learning_rate": 0.0003484785022285709,
      "loss": 2.7253,
      "step": 103298
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.352445125579834,
      "learning_rate": 0.0003484744654234981,
      "loss": 3.1491,
      "step": 103299
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.7344906330108643,
      "learning_rate": 0.0003484704286094131,
      "loss": 2.784,
      "step": 103300
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0653305053710938,
      "learning_rate": 0.0003484663917863165,
      "loss": 2.8425,
      "step": 103301
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1507203578948975,
      "learning_rate": 0.00034846235495420905,
      "loss": 2.6923,
      "step": 103302
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.2968287467956543,
      "learning_rate": 0.00034845831811309154,
      "loss": 2.8994,
      "step": 103303
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1159515380859375,
      "learning_rate": 0.0003484542812629648,
      "loss": 2.9315,
      "step": 103304
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.78081214427948,
      "learning_rate": 0.0003484502444038294,
      "loss": 2.8629,
      "step": 103305
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1723735332489014,
      "learning_rate": 0.00034844620753568615,
      "loss": 2.9701,
      "step": 103306
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.083594799041748,
      "learning_rate": 0.00034844217065853594,
      "loss": 3.038,
      "step": 103307
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6364303827285767,
      "learning_rate": 0.0003484381337723794,
      "loss": 3.0062,
      "step": 103308
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8851187229156494,
      "learning_rate": 0.00034843409687721725,
      "loss": 3.1908,
      "step": 103309
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9775307178497314,
      "learning_rate": 0.00034843005997305034,
      "loss": 2.7442,
      "step": 103310
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6870090961456299,
      "learning_rate": 0.00034842602305987935,
      "loss": 2.9859,
      "step": 103311
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7383593320846558,
      "learning_rate": 0.000348421986137705,
      "loss": 3.0032,
      "step": 103312
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.5455482006073,
      "learning_rate": 0.0003484179492065281,
      "loss": 3.1704,
      "step": 103313
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6799615621566772,
      "learning_rate": 0.00034841391226634943,
      "loss": 3.0918,
      "step": 103314
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.942570686340332,
      "learning_rate": 0.0003484098753171697,
      "loss": 2.9942,
      "step": 103315
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.211984395980835,
      "learning_rate": 0.0003484058383589897,
      "loss": 2.6726,
      "step": 103316
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6421144008636475,
      "learning_rate": 0.00034840180139181004,
      "loss": 3.0179,
      "step": 103317
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.103335380554199,
      "learning_rate": 0.00034839776441563156,
      "loss": 3.0663,
      "step": 103318
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8923521041870117,
      "learning_rate": 0.00034839372743045513,
      "loss": 2.7623,
      "step": 103319
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.035288095474243,
      "learning_rate": 0.00034838969043628134,
      "loss": 3.1646,
      "step": 103320
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8727850914001465,
      "learning_rate": 0.00034838565343311094,
      "loss": 2.8122,
      "step": 103321
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.133544921875,
      "learning_rate": 0.00034838161642094487,
      "loss": 2.9836,
      "step": 103322
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8962645530700684,
      "learning_rate": 0.00034837757939978366,
      "loss": 3.2099,
      "step": 103323
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7344608306884766,
      "learning_rate": 0.00034837354236962807,
      "loss": 2.9706,
      "step": 103324
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.301286220550537,
      "learning_rate": 0.00034836950533047897,
      "loss": 2.9025,
      "step": 103325
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5391443967819214,
      "learning_rate": 0.00034836546828233717,
      "loss": 3.1232,
      "step": 103326
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9481117725372314,
      "learning_rate": 0.0003483614312252032,
      "loss": 2.7192,
      "step": 103327
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.955320954322815,
      "learning_rate": 0.000348357394159078,
      "loss": 3.0887,
      "step": 103328
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.643782377243042,
      "learning_rate": 0.0003483533570839622,
      "loss": 2.7715,
      "step": 103329
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.758887767791748,
      "learning_rate": 0.00034834931999985666,
      "loss": 3.1999,
      "step": 103330
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5992268323898315,
      "learning_rate": 0.00034834528290676194,
      "loss": 2.6299,
      "step": 103331
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7809805870056152,
      "learning_rate": 0.00034834124580467897,
      "loss": 3.1016,
      "step": 103332
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.224392890930176,
      "learning_rate": 0.00034833720869360856,
      "loss": 2.865,
      "step": 103333
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.9886906147003174,
      "learning_rate": 0.00034833317157355125,
      "loss": 3.1495,
      "step": 103334
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.028456687927246,
      "learning_rate": 0.0003483291344445079,
      "loss": 3.3478,
      "step": 103335
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.8380091190338135,
      "learning_rate": 0.0003483250973064792,
      "loss": 3.1369,
      "step": 103336
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7621551752090454,
      "learning_rate": 0.00034832106015946613,
      "loss": 3.022,
      "step": 103337
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.406083345413208,
      "learning_rate": 0.0003483170230034691,
      "loss": 3.157,
      "step": 103338
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.963090181350708,
      "learning_rate": 0.00034831298583848906,
      "loss": 3.0264,
      "step": 103339
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.283437967300415,
      "learning_rate": 0.0003483089486645267,
      "loss": 2.8883,
      "step": 103340
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.1165213584899902,
      "learning_rate": 0.0003483049114815829,
      "loss": 2.8793,
      "step": 103341
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1856203079223633,
      "learning_rate": 0.00034830087428965826,
      "loss": 2.7877,
      "step": 103342
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7360564470291138,
      "learning_rate": 0.00034829683708875346,
      "loss": 3.0311,
      "step": 103343
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7874974012374878,
      "learning_rate": 0.0003482927998788695,
      "loss": 3.1665,
      "step": 103344
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.462639808654785,
      "learning_rate": 0.00034828876266000696,
      "loss": 2.7651,
      "step": 103345
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.592665195465088,
      "learning_rate": 0.0003482847254321666,
      "loss": 3.1031,
      "step": 103346
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0104212760925293,
      "learning_rate": 0.0003482806881953493,
      "loss": 2.7847,
      "step": 103347
    },
    {
      "epoch": 1.35,
      "grad_norm": 5.900144577026367,
      "learning_rate": 0.0003482766509495556,
      "loss": 2.9939,
      "step": 103348
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.5227973461151123,
      "learning_rate": 0.0003482726136947864,
      "loss": 2.9591,
      "step": 103349
    },
    {
      "epoch": 1.35,
      "grad_norm": 5.0103912353515625,
      "learning_rate": 0.0003482685764310424,
      "loss": 2.9328,
      "step": 103350
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9732575416564941,
      "learning_rate": 0.0003482645391583243,
      "loss": 3.2391,
      "step": 103351
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7098420858383179,
      "learning_rate": 0.00034826050187663295,
      "loss": 2.8992,
      "step": 103352
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.70931077003479,
      "learning_rate": 0.00034825646458596916,
      "loss": 3.1193,
      "step": 103353
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.073908567428589,
      "learning_rate": 0.0003482524272863335,
      "loss": 2.9451,
      "step": 103354
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.258326530456543,
      "learning_rate": 0.0003482483899777268,
      "loss": 3.1484,
      "step": 103355
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.574444055557251,
      "learning_rate": 0.0003482443526601499,
      "loss": 3.2137,
      "step": 103356
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.3884739875793457,
      "learning_rate": 0.00034824031533360334,
      "loss": 3.0697,
      "step": 103357
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.6756181716918945,
      "learning_rate": 0.00034823627799808796,
      "loss": 2.8714,
      "step": 103358
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.9067583084106445,
      "learning_rate": 0.0003482322406536047,
      "loss": 3.2022,
      "step": 103359
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6635750532150269,
      "learning_rate": 0.00034822820330015407,
      "loss": 3.1453,
      "step": 103360
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.7190442085266113,
      "learning_rate": 0.0003482241659377369,
      "loss": 2.965,
      "step": 103361
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.3650705814361572,
      "learning_rate": 0.000348220128566354,
      "loss": 3.0564,
      "step": 103362
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.548814058303833,
      "learning_rate": 0.00034821609118600605,
      "loss": 2.8742,
      "step": 103363
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0328452587127686,
      "learning_rate": 0.00034821205379669375,
      "loss": 3.0784,
      "step": 103364
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.969214916229248,
      "learning_rate": 0.000348208016398418,
      "loss": 3.1085,
      "step": 103365
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.335995674133301,
      "learning_rate": 0.00034820397899117946,
      "loss": 2.8309,
      "step": 103366
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.444023370742798,
      "learning_rate": 0.00034819994157497884,
      "loss": 3.0804,
      "step": 103367
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.7544097900390625,
      "learning_rate": 0.00034819590414981706,
      "loss": 3.0982,
      "step": 103368
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8694984912872314,
      "learning_rate": 0.00034819186671569463,
      "loss": 3.0523,
      "step": 103369
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.654180884361267,
      "learning_rate": 0.0003481878292726124,
      "loss": 3.0705,
      "step": 103370
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8204702138900757,
      "learning_rate": 0.00034818379182057125,
      "loss": 3.0075,
      "step": 103371
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7579829692840576,
      "learning_rate": 0.00034817975435957177,
      "loss": 2.7115,
      "step": 103372
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0401864051818848,
      "learning_rate": 0.0003481757168896148,
      "loss": 2.938,
      "step": 103373
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9400428533554077,
      "learning_rate": 0.000348171679410701,
      "loss": 2.862,
      "step": 103374
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5819180011749268,
      "learning_rate": 0.0003481676419228312,
      "loss": 3.1488,
      "step": 103375
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1120643615722656,
      "learning_rate": 0.00034816360442600607,
      "loss": 3.0693,
      "step": 103376
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8853726387023926,
      "learning_rate": 0.00034815956692022654,
      "loss": 2.8835,
      "step": 103377
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6319791078567505,
      "learning_rate": 0.00034815552940549313,
      "loss": 2.9726,
      "step": 103378
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0892763137817383,
      "learning_rate": 0.00034815149188180676,
      "loss": 3.2374,
      "step": 103379
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7198978662490845,
      "learning_rate": 0.00034814745434916814,
      "loss": 2.8875,
      "step": 103380
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6964783668518066,
      "learning_rate": 0.00034814341680757796,
      "loss": 3.1433,
      "step": 103381
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9984230995178223,
      "learning_rate": 0.000348139379257037,
      "loss": 3.1648,
      "step": 103382
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0703165531158447,
      "learning_rate": 0.00034813534169754603,
      "loss": 2.8717,
      "step": 103383
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9080809354782104,
      "learning_rate": 0.00034813130412910575,
      "loss": 2.998,
      "step": 103384
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.599088191986084,
      "learning_rate": 0.00034812726655171695,
      "loss": 3.1174,
      "step": 103385
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9271007776260376,
      "learning_rate": 0.0003481232289653805,
      "loss": 2.82,
      "step": 103386
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1554698944091797,
      "learning_rate": 0.0003481191913700969,
      "loss": 3.0083,
      "step": 103387
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.3538568019866943,
      "learning_rate": 0.0003481151537658671,
      "loss": 2.8952,
      "step": 103388
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8574656248092651,
      "learning_rate": 0.0003481111161526918,
      "loss": 3.008,
      "step": 103389
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.942385196685791,
      "learning_rate": 0.0003481070785305717,
      "loss": 2.6864,
      "step": 103390
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6882569789886475,
      "learning_rate": 0.0003481030408995075,
      "loss": 2.8862,
      "step": 103391
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.8933184146881104,
      "learning_rate": 0.0003480990032595002,
      "loss": 3.2785,
      "step": 103392
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6885476112365723,
      "learning_rate": 0.00034809496561055036,
      "loss": 3.076,
      "step": 103393
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.3569107055664062,
      "learning_rate": 0.0003480909279526586,
      "loss": 3.0809,
      "step": 103394
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.180703639984131,
      "learning_rate": 0.000348086890285826,
      "loss": 2.989,
      "step": 103395
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9510815143585205,
      "learning_rate": 0.00034808285261005314,
      "loss": 3.0336,
      "step": 103396
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8266265392303467,
      "learning_rate": 0.0003480788149253407,
      "loss": 2.9587,
      "step": 103397
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.125089645385742,
      "learning_rate": 0.0003480747772316895,
      "loss": 2.7271,
      "step": 103398
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.3962533473968506,
      "learning_rate": 0.0003480707395291003,
      "loss": 2.9767,
      "step": 103399
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7942930459976196,
      "learning_rate": 0.00034806670181757385,
      "loss": 3.1482,
      "step": 103400
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.000408172607422,
      "learning_rate": 0.00034806266409711086,
      "loss": 2.8563,
      "step": 103401
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7193810939788818,
      "learning_rate": 0.00034805862636771225,
      "loss": 2.7599,
      "step": 103402
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.2881386280059814,
      "learning_rate": 0.00034805458862937846,
      "loss": 3.0844,
      "step": 103403
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.109182357788086,
      "learning_rate": 0.00034805055088211047,
      "loss": 2.8216,
      "step": 103404
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.805105447769165,
      "learning_rate": 0.000348046513125909,
      "loss": 3.0475,
      "step": 103405
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.251019239425659,
      "learning_rate": 0.0003480424753607748,
      "loss": 3.2125,
      "step": 103406
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6119009256362915,
      "learning_rate": 0.0003480384375867085,
      "loss": 3.2035,
      "step": 103407
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.714464783668518,
      "learning_rate": 0.00034803439980371113,
      "loss": 3.0551,
      "step": 103408
    },
    {
      "epoch": 1.35,
      "grad_norm": 5.041045665740967,
      "learning_rate": 0.0003480303620117831,
      "loss": 3.0757,
      "step": 103409
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.2622601985931396,
      "learning_rate": 0.0003480263242109253,
      "loss": 3.0377,
      "step": 103410
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0111629962921143,
      "learning_rate": 0.0003480222864011386,
      "loss": 3.0295,
      "step": 103411
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8504149913787842,
      "learning_rate": 0.0003480182485824236,
      "loss": 3.0664,
      "step": 103412
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8783302307128906,
      "learning_rate": 0.0003480142107547811,
      "loss": 2.8766,
      "step": 103413
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8062777519226074,
      "learning_rate": 0.0003480101729182119,
      "loss": 2.9931,
      "step": 103414
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9678478240966797,
      "learning_rate": 0.00034800613507271667,
      "loss": 3.1738,
      "step": 103415
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.137019157409668,
      "learning_rate": 0.00034800209721829616,
      "loss": 2.8364,
      "step": 103416
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.480186700820923,
      "learning_rate": 0.00034799805935495115,
      "loss": 2.9453,
      "step": 103417
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9871182441711426,
      "learning_rate": 0.00034799402148268246,
      "loss": 2.6343,
      "step": 103418
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.786745548248291,
      "learning_rate": 0.0003479899836014907,
      "loss": 2.9816,
      "step": 103419
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8150590658187866,
      "learning_rate": 0.0003479859457113767,
      "loss": 3.0657,
      "step": 103420
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1168649196624756,
      "learning_rate": 0.0003479819078123413,
      "loss": 2.9471,
      "step": 103421
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6116617918014526,
      "learning_rate": 0.00034797786990438514,
      "loss": 3.1959,
      "step": 103422
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.326808452606201,
      "learning_rate": 0.0003479738319875089,
      "loss": 2.9672,
      "step": 103423
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1649444103240967,
      "learning_rate": 0.00034796979406171347,
      "loss": 2.9616,
      "step": 103424
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1725733280181885,
      "learning_rate": 0.0003479657561269996,
      "loss": 3.1723,
      "step": 103425
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7954293489456177,
      "learning_rate": 0.00034796171818336796,
      "loss": 2.9028,
      "step": 103426
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.60415518283844,
      "learning_rate": 0.0003479576802308193,
      "loss": 3.0138,
      "step": 103427
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.094496488571167,
      "learning_rate": 0.0003479536422693543,
      "loss": 3.1622,
      "step": 103428
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.3613359928131104,
      "learning_rate": 0.000347949604298974,
      "loss": 3.0108,
      "step": 103429
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8177186250686646,
      "learning_rate": 0.00034794556631967887,
      "loss": 3.1402,
      "step": 103430
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7558618783950806,
      "learning_rate": 0.0003479415283314697,
      "loss": 3.0849,
      "step": 103431
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.786136507987976,
      "learning_rate": 0.00034793749033434744,
      "loss": 2.9524,
      "step": 103432
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.1277477741241455,
      "learning_rate": 0.00034793345232831264,
      "loss": 2.7298,
      "step": 103433
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7948803901672363,
      "learning_rate": 0.0003479294143133661,
      "loss": 3.0465,
      "step": 103434
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9265228509902954,
      "learning_rate": 0.0003479253762895086,
      "loss": 2.9727,
      "step": 103435
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1906397342681885,
      "learning_rate": 0.00034792133825674076,
      "loss": 3.1892,
      "step": 103436
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6834522485733032,
      "learning_rate": 0.0003479173002150635,
      "loss": 3.3087,
      "step": 103437
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6971665620803833,
      "learning_rate": 0.00034791326216447756,
      "loss": 3.266,
      "step": 103438
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.77726149559021,
      "learning_rate": 0.00034790922410498357,
      "loss": 2.7163,
      "step": 103439
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1624293327331543,
      "learning_rate": 0.0003479051860365824,
      "loss": 3.0723,
      "step": 103440
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.709931492805481,
      "learning_rate": 0.0003479011479592748,
      "loss": 2.8056,
      "step": 103441
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6343681812286377,
      "learning_rate": 0.0003478971098730614,
      "loss": 3.0261,
      "step": 103442
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8661812543869019,
      "learning_rate": 0.000347893071777943,
      "loss": 3.0748,
      "step": 103443
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8488315343856812,
      "learning_rate": 0.00034788903367392047,
      "loss": 3.1537,
      "step": 103444
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8060286045074463,
      "learning_rate": 0.00034788499556099434,
      "loss": 2.9349,
      "step": 103445
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.633806824684143,
      "learning_rate": 0.00034788095743916556,
      "loss": 3.0773,
      "step": 103446
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.2492754459381104,
      "learning_rate": 0.0003478769193084348,
      "loss": 2.8007,
      "step": 103447
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.863158106803894,
      "learning_rate": 0.00034787288116880285,
      "loss": 2.7999,
      "step": 103448
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6396143436431885,
      "learning_rate": 0.00034786884302027033,
      "loss": 3.0906,
      "step": 103449
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1041440963745117,
      "learning_rate": 0.0003478648048628382,
      "loss": 2.9034,
      "step": 103450
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.689028024673462,
      "learning_rate": 0.000347860766696507,
      "loss": 3.0278,
      "step": 103451
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9042333364486694,
      "learning_rate": 0.00034785672852127766,
      "loss": 3.1766,
      "step": 103452
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0127711296081543,
      "learning_rate": 0.00034785269033715084,
      "loss": 2.7661,
      "step": 103453
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9257291555404663,
      "learning_rate": 0.00034784865214412727,
      "loss": 2.83,
      "step": 103454
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.2028629779815674,
      "learning_rate": 0.00034784461394220766,
      "loss": 2.997,
      "step": 103455
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0205631256103516,
      "learning_rate": 0.000347840575731393,
      "loss": 2.8269,
      "step": 103456
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5158100128173828,
      "learning_rate": 0.0003478365375116838,
      "loss": 3.1335,
      "step": 103457
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7437361478805542,
      "learning_rate": 0.00034783249928308077,
      "loss": 3.016,
      "step": 103458
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9362913370132446,
      "learning_rate": 0.00034782846104558485,
      "loss": 3.1666,
      "step": 103459
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.6048238277435303,
      "learning_rate": 0.00034782442279919685,
      "loss": 3.022,
      "step": 103460
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.3340649604797363,
      "learning_rate": 0.0003478203845439172,
      "loss": 3.1148,
      "step": 103461
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9097753763198853,
      "learning_rate": 0.00034781634627974693,
      "loss": 2.8246,
      "step": 103462
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.9999022483825684,
      "learning_rate": 0.00034781230800668675,
      "loss": 3.2124,
      "step": 103463
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.7964346408843994,
      "learning_rate": 0.00034780826972473724,
      "loss": 3.0494,
      "step": 103464
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8909765481948853,
      "learning_rate": 0.0003478042314338993,
      "loss": 2.9794,
      "step": 103465
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.751194715499878,
      "learning_rate": 0.00034780019313417374,
      "loss": 3.0866,
      "step": 103466
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.72717547416687,
      "learning_rate": 0.00034779615482556116,
      "loss": 3.3111,
      "step": 103467
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8038675785064697,
      "learning_rate": 0.0003477921165080623,
      "loss": 2.9476,
      "step": 103468
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.882611870765686,
      "learning_rate": 0.00034778807818167817,
      "loss": 3.1945,
      "step": 103469
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.838187336921692,
      "learning_rate": 0.0003477840398464092,
      "loss": 3.1338,
      "step": 103470
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9678962230682373,
      "learning_rate": 0.0003477800015022562,
      "loss": 3.0754,
      "step": 103471
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8547449111938477,
      "learning_rate": 0.0003477759631492202,
      "loss": 3.2139,
      "step": 103472
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7468125820159912,
      "learning_rate": 0.00034777192478730156,
      "loss": 2.9621,
      "step": 103473
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0467324256896973,
      "learning_rate": 0.0003477678864165013,
      "loss": 2.9191,
      "step": 103474
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6260250806808472,
      "learning_rate": 0.00034776384803682014,
      "loss": 2.8808,
      "step": 103475
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8257642984390259,
      "learning_rate": 0.00034775980964825865,
      "loss": 2.9708,
      "step": 103476
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7697172164916992,
      "learning_rate": 0.0003477557712508178,
      "loss": 3.1392,
      "step": 103477
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0726897716522217,
      "learning_rate": 0.0003477517328444983,
      "loss": 2.8757,
      "step": 103478
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9241924285888672,
      "learning_rate": 0.00034774769442930077,
      "loss": 2.9053,
      "step": 103479
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8942075967788696,
      "learning_rate": 0.000347743656005226,
      "loss": 2.9167,
      "step": 103480
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9891482591629028,
      "learning_rate": 0.0003477396175722749,
      "loss": 2.9398,
      "step": 103481
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.945143699645996,
      "learning_rate": 0.00034773557913044803,
      "loss": 3.0211,
      "step": 103482
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9689595699310303,
      "learning_rate": 0.00034773154067974614,
      "loss": 3.2208,
      "step": 103483
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6554036140441895,
      "learning_rate": 0.0003477275022201702,
      "loss": 2.9342,
      "step": 103484
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.5205206871032715,
      "learning_rate": 0.00034772346375172076,
      "loss": 2.9324,
      "step": 103485
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7522063255310059,
      "learning_rate": 0.0003477194252743986,
      "loss": 3.1625,
      "step": 103486
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9554781913757324,
      "learning_rate": 0.0003477153867882046,
      "loss": 3.3352,
      "step": 103487
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9406750202178955,
      "learning_rate": 0.0003477113482931393,
      "loss": 3.171,
      "step": 103488
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7802629470825195,
      "learning_rate": 0.00034770730978920356,
      "loss": 3.0618,
      "step": 103489
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5310604572296143,
      "learning_rate": 0.0003477032712763982,
      "loss": 3.1165,
      "step": 103490
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6636236906051636,
      "learning_rate": 0.0003476992327547239,
      "loss": 3.0156,
      "step": 103491
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6788971424102783,
      "learning_rate": 0.00034769519422418136,
      "loss": 3.0127,
      "step": 103492
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.5319738388061523,
      "learning_rate": 0.0003476911556847714,
      "loss": 2.7277,
      "step": 103493
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.688516616821289,
      "learning_rate": 0.00034768711713649474,
      "loss": 2.702,
      "step": 103494
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0956473350524902,
      "learning_rate": 0.00034768307857935217,
      "loss": 2.8023,
      "step": 103495
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7846745252609253,
      "learning_rate": 0.0003476790400133444,
      "loss": 2.9737,
      "step": 103496
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7654463052749634,
      "learning_rate": 0.0003476750014384722,
      "loss": 2.7612,
      "step": 103497
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.408731460571289,
      "learning_rate": 0.00034767096285473633,
      "loss": 2.8326,
      "step": 103498
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.93520987033844,
      "learning_rate": 0.00034766692426213753,
      "loss": 2.8632,
      "step": 103499
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.835960626602173,
      "learning_rate": 0.0003476628856606765,
      "loss": 2.9372,
      "step": 103500
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.8113789558410645,
      "learning_rate": 0.0003476588470503541,
      "loss": 2.7854,
      "step": 103501
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1183021068573,
      "learning_rate": 0.00034765480843117094,
      "loss": 2.9036,
      "step": 103502
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8953620195388794,
      "learning_rate": 0.00034765076980312795,
      "loss": 3.15,
      "step": 103503
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.9920687675476074,
      "learning_rate": 0.0003476467311662257,
      "loss": 3.0339,
      "step": 103504
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.1403157711029053,
      "learning_rate": 0.00034764269252046513,
      "loss": 2.9025,
      "step": 103505
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.467008113861084,
      "learning_rate": 0.0003476386538658468,
      "loss": 2.9776,
      "step": 103506
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.897933840751648,
      "learning_rate": 0.0003476346152023716,
      "loss": 3.1203,
      "step": 103507
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.4879143238067627,
      "learning_rate": 0.0003476305765300401,
      "loss": 2.8174,
      "step": 103508
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.815028667449951,
      "learning_rate": 0.00034762653784885326,
      "loss": 2.9269,
      "step": 103509
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.823120594024658,
      "learning_rate": 0.0003476224991588118,
      "loss": 2.9777,
      "step": 103510
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6831157207489014,
      "learning_rate": 0.00034761846045991635,
      "loss": 3.0037,
      "step": 103511
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.410102605819702,
      "learning_rate": 0.00034761442175216775,
      "loss": 2.8181,
      "step": 103512
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.4584643840789795,
      "learning_rate": 0.00034761038303556674,
      "loss": 2.9274,
      "step": 103513
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.228963613510132,
      "learning_rate": 0.00034760634431011404,
      "loss": 3.0558,
      "step": 103514
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8403342962265015,
      "learning_rate": 0.00034760230557581045,
      "loss": 2.9075,
      "step": 103515
    },
    {
      "epoch": 1.35,
      "grad_norm": 4.142551422119141,
      "learning_rate": 0.0003475982668326566,
      "loss": 2.7928,
      "step": 103516
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.1990928649902344,
      "learning_rate": 0.0003475942280806535,
      "loss": 2.8578,
      "step": 103517
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8147066831588745,
      "learning_rate": 0.00034759018931980164,
      "loss": 3.4333,
      "step": 103518
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.776261329650879,
      "learning_rate": 0.00034758615055010183,
      "loss": 2.9686,
      "step": 103519
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0849082469940186,
      "learning_rate": 0.00034758211177155494,
      "loss": 2.7094,
      "step": 103520
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9618548154830933,
      "learning_rate": 0.00034757807298416155,
      "loss": 2.9438,
      "step": 103521
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.80483877658844,
      "learning_rate": 0.00034757403418792253,
      "loss": 3.0075,
      "step": 103522
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6371841430664062,
      "learning_rate": 0.0003475699953828387,
      "loss": 3.1537,
      "step": 103523
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7461068630218506,
      "learning_rate": 0.0003475659565689106,
      "loss": 3.0723,
      "step": 103524
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9611507654190063,
      "learning_rate": 0.0003475619177461391,
      "loss": 3.1011,
      "step": 103525
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0305869579315186,
      "learning_rate": 0.0003475578789145249,
      "loss": 3.1937,
      "step": 103526
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5189982652664185,
      "learning_rate": 0.00034755384007406886,
      "loss": 2.804,
      "step": 103527
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.393582344055176,
      "learning_rate": 0.00034754980122477164,
      "loss": 2.9787,
      "step": 103528
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6416130065917969,
      "learning_rate": 0.00034754576236663406,
      "loss": 2.6721,
      "step": 103529
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6798447370529175,
      "learning_rate": 0.00034754172349965676,
      "loss": 2.9628,
      "step": 103530
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5302702188491821,
      "learning_rate": 0.0003475376846238406,
      "loss": 3.1967,
      "step": 103531
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.5991899967193604,
      "learning_rate": 0.00034753364573918627,
      "loss": 2.8198,
      "step": 103532
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5771468877792358,
      "learning_rate": 0.00034752960684569453,
      "loss": 2.9175,
      "step": 103533
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8272777795791626,
      "learning_rate": 0.00034752556794336617,
      "loss": 2.9444,
      "step": 103534
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.2040910720825195,
      "learning_rate": 0.00034752152903220183,
      "loss": 2.9977,
      "step": 103535
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6767417192459106,
      "learning_rate": 0.0003475174901122025,
      "loss": 2.9667,
      "step": 103536
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.945796012878418,
      "learning_rate": 0.00034751345118336864,
      "loss": 2.8764,
      "step": 103537
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.802659273147583,
      "learning_rate": 0.0003475094122457012,
      "loss": 2.7701,
      "step": 103538
    },
    {
      "epoch": 1.35,
      "grad_norm": 4.941556453704834,
      "learning_rate": 0.0003475053732992008,
      "loss": 2.7266,
      "step": 103539
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9862405061721802,
      "learning_rate": 0.00034750133434386824,
      "loss": 2.8205,
      "step": 103540
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9293841123580933,
      "learning_rate": 0.00034749729537970436,
      "loss": 3.1837,
      "step": 103541
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.3646647930145264,
      "learning_rate": 0.0003474932564067098,
      "loss": 2.9842,
      "step": 103542
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.5217807292938232,
      "learning_rate": 0.00034748921742488537,
      "loss": 2.8879,
      "step": 103543
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.3150386810302734,
      "learning_rate": 0.00034748517843423177,
      "loss": 2.8518,
      "step": 103544
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.229780435562134,
      "learning_rate": 0.00034748113943474984,
      "loss": 2.8087,
      "step": 103545
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8568713665008545,
      "learning_rate": 0.0003474771004264401,
      "loss": 2.8429,
      "step": 103546
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.443864583969116,
      "learning_rate": 0.0003474730614093036,
      "loss": 3.1518,
      "step": 103547
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6666103601455688,
      "learning_rate": 0.00034746902238334105,
      "loss": 2.8329,
      "step": 103548
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7461035251617432,
      "learning_rate": 0.00034746498334855294,
      "loss": 2.7384,
      "step": 103549
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.7996866703033447,
      "learning_rate": 0.00034746094430494024,
      "loss": 2.9293,
      "step": 103550
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.403764247894287,
      "learning_rate": 0.0003474569052525037,
      "loss": 3.0677,
      "step": 103551
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9177370071411133,
      "learning_rate": 0.000347452866191244,
      "loss": 3.0002,
      "step": 103552
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6329481601715088,
      "learning_rate": 0.00034744882712116196,
      "loss": 3.0972,
      "step": 103553
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.8758201599121094,
      "learning_rate": 0.00034744478804225827,
      "loss": 3.221,
      "step": 103554
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.5160751342773438,
      "learning_rate": 0.00034744074895453367,
      "loss": 3.057,
      "step": 103555
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.0475757122039795,
      "learning_rate": 0.0003474367098579889,
      "loss": 3.0273,
      "step": 103556
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7375242710113525,
      "learning_rate": 0.0003474326707526249,
      "loss": 3.0077,
      "step": 103557
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.5817203521728516,
      "learning_rate": 0.0003474286316384421,
      "loss": 3.0487,
      "step": 103558
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.7781214714050293,
      "learning_rate": 0.0003474245925154415,
      "loss": 3.1027,
      "step": 103559
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.071585178375244,
      "learning_rate": 0.0003474205533836238,
      "loss": 2.9023,
      "step": 103560
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.127782106399536,
      "learning_rate": 0.00034741651424298963,
      "loss": 2.9592,
      "step": 103561
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.188720703125,
      "learning_rate": 0.00034741247509353994,
      "loss": 2.9877,
      "step": 103562
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.2061047554016113,
      "learning_rate": 0.00034740843593527535,
      "loss": 2.9293,
      "step": 103563
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.220374822616577,
      "learning_rate": 0.0003474043967681966,
      "loss": 2.9868,
      "step": 103564
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.179658889770508,
      "learning_rate": 0.00034740035759230444,
      "loss": 2.7697,
      "step": 103565
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.665055990219116,
      "learning_rate": 0.0003473963184075998,
      "loss": 3.2566,
      "step": 103566
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7632715702056885,
      "learning_rate": 0.00034739227921408314,
      "loss": 2.9526,
      "step": 103567
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9894248247146606,
      "learning_rate": 0.0003473882400117554,
      "loss": 3.0558,
      "step": 103568
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7680048942565918,
      "learning_rate": 0.00034738420080061735,
      "loss": 2.8485,
      "step": 103569
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.069730520248413,
      "learning_rate": 0.00034738016158066963,
      "loss": 3.045,
      "step": 103570
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9314498901367188,
      "learning_rate": 0.00034737612235191304,
      "loss": 2.8663,
      "step": 103571
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1300158500671387,
      "learning_rate": 0.00034737208311434845,
      "loss": 2.9116,
      "step": 103572
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.6243736743927,
      "learning_rate": 0.0003473680438679764,
      "loss": 3.0958,
      "step": 103573
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6131278276443481,
      "learning_rate": 0.00034736400461279774,
      "loss": 3.0259,
      "step": 103574
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9519551992416382,
      "learning_rate": 0.00034735996534881325,
      "loss": 2.9992,
      "step": 103575
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.779076337814331,
      "learning_rate": 0.0003473559260760235,
      "loss": 3.0608,
      "step": 103576
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.5776619911193848,
      "learning_rate": 0.00034735188679442953,
      "loss": 2.8911,
      "step": 103577
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7620899677276611,
      "learning_rate": 0.0003473478475040319,
      "loss": 3.0235,
      "step": 103578
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.048600196838379,
      "learning_rate": 0.0003473438082048315,
      "loss": 3.0262,
      "step": 103579
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.005280017852783,
      "learning_rate": 0.00034733976889682886,
      "loss": 2.7514,
      "step": 103580
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.756478190422058,
      "learning_rate": 0.00034733572958002497,
      "loss": 3.0074,
      "step": 103581
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6149036884307861,
      "learning_rate": 0.00034733169025442045,
      "loss": 2.9038,
      "step": 103582
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9119160175323486,
      "learning_rate": 0.000347327650920016,
      "loss": 3.0002,
      "step": 103583
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.857762098312378,
      "learning_rate": 0.00034732361157681254,
      "loss": 3.2582,
      "step": 103584
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7071669101715088,
      "learning_rate": 0.00034731957222481073,
      "loss": 2.9007,
      "step": 103585
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9522050619125366,
      "learning_rate": 0.0003473155328640112,
      "loss": 3.1761,
      "step": 103586
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.159154176712036,
      "learning_rate": 0.00034731149349441503,
      "loss": 2.8412,
      "step": 103587
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8089865446090698,
      "learning_rate": 0.0003473074541160226,
      "loss": 3.0893,
      "step": 103588
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.893255591392517,
      "learning_rate": 0.00034730341472883486,
      "loss": 2.8683,
      "step": 103589
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7789150476455688,
      "learning_rate": 0.0003472993753328525,
      "loss": 2.8886,
      "step": 103590
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7706736326217651,
      "learning_rate": 0.0003472953359280763,
      "loss": 3.3284,
      "step": 103591
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7877295017242432,
      "learning_rate": 0.00034729129651450696,
      "loss": 3.0741,
      "step": 103592
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8241504430770874,
      "learning_rate": 0.0003472872570921453,
      "loss": 2.7495,
      "step": 103593
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5236921310424805,
      "learning_rate": 0.0003472832176609921,
      "loss": 2.9691,
      "step": 103594
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8187816143035889,
      "learning_rate": 0.0003472791782210481,
      "loss": 3.1736,
      "step": 103595
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.773605227470398,
      "learning_rate": 0.0003472751387723139,
      "loss": 3.0853,
      "step": 103596
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.191049337387085,
      "learning_rate": 0.00034727109931479035,
      "loss": 3.1201,
      "step": 103597
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.3433101177215576,
      "learning_rate": 0.0003472670598484783,
      "loss": 2.7325,
      "step": 103598
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7146762609481812,
      "learning_rate": 0.0003472630203733784,
      "loss": 3.0453,
      "step": 103599
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.271604061126709,
      "learning_rate": 0.0003472589808894913,
      "loss": 3.0377,
      "step": 103600
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.764064073562622,
      "learning_rate": 0.00034725494139681794,
      "loss": 2.8376,
      "step": 103601
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6163207292556763,
      "learning_rate": 0.0003472509018953591,
      "loss": 2.8475,
      "step": 103602
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7468363046646118,
      "learning_rate": 0.00034724686238511525,
      "loss": 3.0551,
      "step": 103603
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.637045979499817,
      "learning_rate": 0.0003472428228660874,
      "loss": 3.038,
      "step": 103604
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.043870687484741,
      "learning_rate": 0.0003472387833382763,
      "loss": 2.9853,
      "step": 103605
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9155189990997314,
      "learning_rate": 0.00034723474380168244,
      "loss": 2.8199,
      "step": 103606
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1109206676483154,
      "learning_rate": 0.00034723070425630683,
      "loss": 2.9786,
      "step": 103607
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.009874105453491,
      "learning_rate": 0.0003472266647021501,
      "loss": 2.8591,
      "step": 103608
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6239025592803955,
      "learning_rate": 0.0003472226251392132,
      "loss": 3.0169,
      "step": 103609
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7124791145324707,
      "learning_rate": 0.00034721858556749656,
      "loss": 3.0215,
      "step": 103610
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8308863639831543,
      "learning_rate": 0.00034721454598700115,
      "loss": 2.8956,
      "step": 103611
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.3555374145507812,
      "learning_rate": 0.0003472105063977277,
      "loss": 2.9938,
      "step": 103612
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9567936658859253,
      "learning_rate": 0.00034720646679967684,
      "loss": 2.9226,
      "step": 103613
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9451631307601929,
      "learning_rate": 0.00034720242719284947,
      "loss": 2.867,
      "step": 103614
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7985529899597168,
      "learning_rate": 0.00034719838757724624,
      "loss": 3.0605,
      "step": 103615
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9420777559280396,
      "learning_rate": 0.00034719434795286796,
      "loss": 3.1148,
      "step": 103616
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7947943210601807,
      "learning_rate": 0.0003471903083197154,
      "loss": 3.0376,
      "step": 103617
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.783750295639038,
      "learning_rate": 0.00034718626867778925,
      "loss": 2.929,
      "step": 103618
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9582972526550293,
      "learning_rate": 0.00034718222902709023,
      "loss": 3.1459,
      "step": 103619
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.00186824798584,
      "learning_rate": 0.00034717818936761915,
      "loss": 3.182,
      "step": 103620
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.701845407485962,
      "learning_rate": 0.0003471741496993768,
      "loss": 2.8962,
      "step": 103621
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7852718830108643,
      "learning_rate": 0.00034717011002236383,
      "loss": 3.0106,
      "step": 103622
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6037613153457642,
      "learning_rate": 0.0003471660703365811,
      "loss": 3.1514,
      "step": 103623
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.190542697906494,
      "learning_rate": 0.00034716203064202937,
      "loss": 3.3672,
      "step": 103624
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6291213035583496,
      "learning_rate": 0.0003471579909387092,
      "loss": 2.8489,
      "step": 103625
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.100313663482666,
      "learning_rate": 0.0003471539512266215,
      "loss": 2.8928,
      "step": 103626
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9979281425476074,
      "learning_rate": 0.000347149911505767,
      "loss": 2.967,
      "step": 103627
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8505101203918457,
      "learning_rate": 0.0003471458717761465,
      "loss": 2.9663,
      "step": 103628
    },
    {
      "epoch": 1.35,
      "grad_norm": 4.022998332977295,
      "learning_rate": 0.0003471418320377606,
      "loss": 3.0208,
      "step": 103629
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.5618722438812256,
      "learning_rate": 0.0003471377922906102,
      "loss": 2.9373,
      "step": 103630
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7889316082000732,
      "learning_rate": 0.000347133752534696,
      "loss": 3.047,
      "step": 103631
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0327553749084473,
      "learning_rate": 0.0003471297127700187,
      "loss": 2.8419,
      "step": 103632
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.6386070251464844,
      "learning_rate": 0.00034712567299657914,
      "loss": 3.0453,
      "step": 103633
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7751375436782837,
      "learning_rate": 0.000347121633214378,
      "loss": 3.0593,
      "step": 103634
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.3772310018539429,
      "learning_rate": 0.000347117593423416,
      "loss": 2.7,
      "step": 103635
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9190341234207153,
      "learning_rate": 0.0003471135536236941,
      "loss": 3.0118,
      "step": 103636
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0079615116119385,
      "learning_rate": 0.00034710951381521283,
      "loss": 2.8774,
      "step": 103637
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7506308555603027,
      "learning_rate": 0.00034710547399797293,
      "loss": 3.1111,
      "step": 103638
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6335926055908203,
      "learning_rate": 0.00034710143417197533,
      "loss": 3.0011,
      "step": 103639
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6412200927734375,
      "learning_rate": 0.0003470973943372207,
      "loss": 3.0098,
      "step": 103640
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.661189317703247,
      "learning_rate": 0.0003470933544937097,
      "loss": 3.0416,
      "step": 103641
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.278201103210449,
      "learning_rate": 0.0003470893146414432,
      "loss": 3.2465,
      "step": 103642
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9758952856063843,
      "learning_rate": 0.0003470852747804219,
      "loss": 3.0254,
      "step": 103643
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.265367031097412,
      "learning_rate": 0.0003470812349106465,
      "loss": 2.9696,
      "step": 103644
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6788002252578735,
      "learning_rate": 0.00034707719503211795,
      "loss": 3.0721,
      "step": 103645
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8789162635803223,
      "learning_rate": 0.0003470731551448368,
      "loss": 2.8334,
      "step": 103646
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1976542472839355,
      "learning_rate": 0.00034706911524880375,
      "loss": 3.0171,
      "step": 103647
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5568467378616333,
      "learning_rate": 0.00034706507534401984,
      "loss": 3.1792,
      "step": 103648
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8935576677322388,
      "learning_rate": 0.00034706103543048555,
      "loss": 3.0974,
      "step": 103649
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8755685091018677,
      "learning_rate": 0.0003470569955082017,
      "loss": 3.0333,
      "step": 103650
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9119874238967896,
      "learning_rate": 0.00034705295557716913,
      "loss": 2.8331,
      "step": 103651
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8817964792251587,
      "learning_rate": 0.0003470489156373885,
      "loss": 3.0408,
      "step": 103652
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6888008117675781,
      "learning_rate": 0.00034704487568886053,
      "loss": 2.928,
      "step": 103653
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0223734378814697,
      "learning_rate": 0.0003470408357315861,
      "loss": 2.7058,
      "step": 103654
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.4452829360961914,
      "learning_rate": 0.00034703679576556586,
      "loss": 2.9493,
      "step": 103655
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.8652608394622803,
      "learning_rate": 0.0003470327557908006,
      "loss": 3.1347,
      "step": 103656
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.224912643432617,
      "learning_rate": 0.0003470287158072911,
      "loss": 3.0967,
      "step": 103657
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5709532499313354,
      "learning_rate": 0.000347024675815038,
      "loss": 3.2148,
      "step": 103658
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.237499237060547,
      "learning_rate": 0.0003470206358140422,
      "loss": 2.9243,
      "step": 103659
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.821397304534912,
      "learning_rate": 0.00034701659580430434,
      "loss": 2.9426,
      "step": 103660
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7986968755722046,
      "learning_rate": 0.00034701255578582524,
      "loss": 3.0766,
      "step": 103661
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.515092372894287,
      "learning_rate": 0.0003470085157586055,
      "loss": 2.8846,
      "step": 103662
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.2174346446990967,
      "learning_rate": 0.0003470044757226461,
      "loss": 3.1865,
      "step": 103663
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8994200229644775,
      "learning_rate": 0.0003470004356779477,
      "loss": 2.9903,
      "step": 103664
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.8060731887817383,
      "learning_rate": 0.000346996395624511,
      "loss": 3.0314,
      "step": 103665
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.4542057514190674,
      "learning_rate": 0.0003469923555623367,
      "loss": 2.8156,
      "step": 103666
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6957263946533203,
      "learning_rate": 0.00034698831549142574,
      "loss": 3.0182,
      "step": 103667
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0418663024902344,
      "learning_rate": 0.0003469842754117787,
      "loss": 2.9795,
      "step": 103668
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.900437831878662,
      "learning_rate": 0.0003469802353233965,
      "loss": 2.9861,
      "step": 103669
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.3419978618621826,
      "learning_rate": 0.00034697619522627973,
      "loss": 3.1706,
      "step": 103670
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.925983190536499,
      "learning_rate": 0.00034697215512042917,
      "loss": 2.9359,
      "step": 103671
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1390700340270996,
      "learning_rate": 0.0003469681150058456,
      "loss": 2.8461,
      "step": 103672
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.599838137626648,
      "learning_rate": 0.0003469640748825298,
      "loss": 2.6749,
      "step": 103673
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8740721940994263,
      "learning_rate": 0.0003469600347504825,
      "loss": 2.8211,
      "step": 103674
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0001516342163086,
      "learning_rate": 0.00034695599460970436,
      "loss": 2.8551,
      "step": 103675
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.238323450088501,
      "learning_rate": 0.00034695195446019634,
      "loss": 3.1513,
      "step": 103676
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9661002159118652,
      "learning_rate": 0.00034694791430195897,
      "loss": 3.0323,
      "step": 103677
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.844928741455078,
      "learning_rate": 0.0003469438741349931,
      "loss": 2.9205,
      "step": 103678
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8222432136535645,
      "learning_rate": 0.00034693983395929956,
      "loss": 2.8558,
      "step": 103679
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.4449310302734375,
      "learning_rate": 0.00034693579377487893,
      "loss": 2.7066,
      "step": 103680
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.818216323852539,
      "learning_rate": 0.0003469317535817321,
      "loss": 2.965,
      "step": 103681
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.290440320968628,
      "learning_rate": 0.0003469277133798597,
      "loss": 3.0604,
      "step": 103682
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6540824174880981,
      "learning_rate": 0.0003469236731692627,
      "loss": 2.8191,
      "step": 103683
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0355474948883057,
      "learning_rate": 0.0003469196329499416,
      "loss": 2.949,
      "step": 103684
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.130373239517212,
      "learning_rate": 0.0003469155927218972,
      "loss": 3.0688,
      "step": 103685
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.673182487487793,
      "learning_rate": 0.0003469115524851304,
      "loss": 2.7692,
      "step": 103686
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8295109272003174,
      "learning_rate": 0.0003469075122396418,
      "loss": 2.963,
      "step": 103687
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5742111206054688,
      "learning_rate": 0.0003469034719854322,
      "loss": 2.8491,
      "step": 103688
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7727504968643188,
      "learning_rate": 0.00034689943172250243,
      "loss": 3.1889,
      "step": 103689
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6003501415252686,
      "learning_rate": 0.0003468953914508532,
      "loss": 3.1833,
      "step": 103690
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8365966081619263,
      "learning_rate": 0.0003468913511704851,
      "loss": 2.8717,
      "step": 103691
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6953133344650269,
      "learning_rate": 0.00034688731088139906,
      "loss": 3.0657,
      "step": 103692
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.756324052810669,
      "learning_rate": 0.0003468832705835958,
      "loss": 2.7893,
      "step": 103693
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.992504358291626,
      "learning_rate": 0.0003468792302770761,
      "loss": 3.2994,
      "step": 103694
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1755664348602295,
      "learning_rate": 0.00034687518996184065,
      "loss": 3.0996,
      "step": 103695
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7594088315963745,
      "learning_rate": 0.00034687114963789014,
      "loss": 2.7512,
      "step": 103696
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.695552110671997,
      "learning_rate": 0.00034686710930522546,
      "loss": 2.9914,
      "step": 103697
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0378689765930176,
      "learning_rate": 0.00034686306896384726,
      "loss": 2.9834,
      "step": 103698
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8512581586837769,
      "learning_rate": 0.00034685902861375636,
      "loss": 3.0363,
      "step": 103699
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7278395891189575,
      "learning_rate": 0.0003468549882549536,
      "loss": 3.1521,
      "step": 103700
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.150435447692871,
      "learning_rate": 0.0003468509478874394,
      "loss": 2.7934,
      "step": 103701
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5966756343841553,
      "learning_rate": 0.0003468469075112148,
      "loss": 2.8187,
      "step": 103702
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.2083401679992676,
      "learning_rate": 0.00034684286712628054,
      "loss": 2.9473,
      "step": 103703
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.113110303878784,
      "learning_rate": 0.0003468388267326373,
      "loss": 3.0016,
      "step": 103704
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9520750045776367,
      "learning_rate": 0.00034683478633028574,
      "loss": 3.1048,
      "step": 103705
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.614562749862671,
      "learning_rate": 0.0003468307459192268,
      "loss": 2.9697,
      "step": 103706
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.349935293197632,
      "learning_rate": 0.0003468267054994611,
      "loss": 2.6766,
      "step": 103707
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9905732870101929,
      "learning_rate": 0.00034682266507098943,
      "loss": 3.0648,
      "step": 103708
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5976163148880005,
      "learning_rate": 0.0003468186246338126,
      "loss": 2.9088,
      "step": 103709
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.3050551414489746,
      "learning_rate": 0.0003468145841879312,
      "loss": 3.0222,
      "step": 103710
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.786674737930298,
      "learning_rate": 0.0003468105437333462,
      "loss": 2.8951,
      "step": 103711
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.2450435161590576,
      "learning_rate": 0.00034680650327005813,
      "loss": 2.885,
      "step": 103712
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.6778085231781006,
      "learning_rate": 0.0003468024627980679,
      "loss": 2.8735,
      "step": 103713
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.583601236343384,
      "learning_rate": 0.0003467984223173762,
      "loss": 3.1656,
      "step": 103714
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.3386306762695312,
      "learning_rate": 0.0003467943818279839,
      "loss": 3.0004,
      "step": 103715
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8489331007003784,
      "learning_rate": 0.0003467903413298915,
      "loss": 2.9678,
      "step": 103716
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.2791545391082764,
      "learning_rate": 0.0003467863008230999,
      "loss": 3.0819,
      "step": 103717
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.1501548290252686,
      "learning_rate": 0.0003467822603076099,
      "loss": 3.0164,
      "step": 103718
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.697580337524414,
      "learning_rate": 0.00034677821978342215,
      "loss": 2.9727,
      "step": 103719
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.624104380607605,
      "learning_rate": 0.0003467741792505375,
      "loss": 2.8777,
      "step": 103720
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.570683240890503,
      "learning_rate": 0.0003467701387089566,
      "loss": 2.9201,
      "step": 103721
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9069708585739136,
      "learning_rate": 0.0003467660981586802,
      "loss": 3.0507,
      "step": 103722
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.244386672973633,
      "learning_rate": 0.00034676205759970915,
      "loss": 3.1605,
      "step": 103723
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8697726726531982,
      "learning_rate": 0.00034675801703204425,
      "loss": 3.0957,
      "step": 103724
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.275203227996826,
      "learning_rate": 0.000346753976455686,
      "loss": 3.0577,
      "step": 103725
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.719581127166748,
      "learning_rate": 0.0003467499358706353,
      "loss": 3.1361,
      "step": 103726
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.7086021900177,
      "learning_rate": 0.000346745895276893,
      "loss": 3.0503,
      "step": 103727
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6841546297073364,
      "learning_rate": 0.00034674185467445975,
      "loss": 3.044,
      "step": 103728
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.933856725692749,
      "learning_rate": 0.0003467378140633363,
      "loss": 3.0129,
      "step": 103729
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9989092350006104,
      "learning_rate": 0.0003467337734435233,
      "loss": 2.9395,
      "step": 103730
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.384096384048462,
      "learning_rate": 0.0003467297328150218,
      "loss": 2.9057,
      "step": 103731
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9794267416000366,
      "learning_rate": 0.00034672569217783223,
      "loss": 2.842,
      "step": 103732
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0095534324645996,
      "learning_rate": 0.0003467216515319555,
      "loss": 2.9348,
      "step": 103733
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.318962335586548,
      "learning_rate": 0.0003467176108773923,
      "loss": 3.0122,
      "step": 103734
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.715706706047058,
      "learning_rate": 0.0003467135702141435,
      "loss": 2.9738,
      "step": 103735
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6621726751327515,
      "learning_rate": 0.0003467095295422097,
      "loss": 3.359,
      "step": 103736
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.660780429840088,
      "learning_rate": 0.0003467054888615918,
      "loss": 3.0284,
      "step": 103737
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.3594744205474854,
      "learning_rate": 0.0003467014481722904,
      "loss": 2.9258,
      "step": 103738
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8725452423095703,
      "learning_rate": 0.0003466974074743062,
      "loss": 2.8106,
      "step": 103739
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.2555086612701416,
      "learning_rate": 0.00034669336676764026,
      "loss": 3.1463,
      "step": 103740
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8985587358474731,
      "learning_rate": 0.00034668932605229303,
      "loss": 3.0225,
      "step": 103741
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.531864643096924,
      "learning_rate": 0.0003466852853282654,
      "loss": 2.8872,
      "step": 103742
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9383244514465332,
      "learning_rate": 0.00034668124459555816,
      "loss": 3.09,
      "step": 103743
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.4003446102142334,
      "learning_rate": 0.000346677203854172,
      "loss": 2.8509,
      "step": 103744
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.9910354614257812,
      "learning_rate": 0.0003466731631041075,
      "loss": 2.9269,
      "step": 103745
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6481242179870605,
      "learning_rate": 0.0003466691223453658,
      "loss": 3.0622,
      "step": 103746
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.065115451812744,
      "learning_rate": 0.00034666508157794727,
      "loss": 3.1323,
      "step": 103747
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8056321144104004,
      "learning_rate": 0.00034666104080185284,
      "loss": 3.0388,
      "step": 103748
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.5175187587738037,
      "learning_rate": 0.0003466570000170833,
      "loss": 2.9567,
      "step": 103749
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.867674708366394,
      "learning_rate": 0.0003466529592236393,
      "loss": 3.1518,
      "step": 103750
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.873094916343689,
      "learning_rate": 0.00034664891842152163,
      "loss": 3.0897,
      "step": 103751
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8462680578231812,
      "learning_rate": 0.00034664487761073106,
      "loss": 2.8624,
      "step": 103752
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.680795669555664,
      "learning_rate": 0.0003466408367912684,
      "loss": 2.8929,
      "step": 103753
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.4979262351989746,
      "learning_rate": 0.0003466367959631342,
      "loss": 3.0601,
      "step": 103754
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9305169582366943,
      "learning_rate": 0.00034663275512632945,
      "loss": 2.9042,
      "step": 103755
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7895489931106567,
      "learning_rate": 0.00034662871428085465,
      "loss": 3.0831,
      "step": 103756
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.082934856414795,
      "learning_rate": 0.0003466246734267108,
      "loss": 3.0675,
      "step": 103757
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.835160493850708,
      "learning_rate": 0.00034662063256389857,
      "loss": 2.801,
      "step": 103758
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.3087165355682373,
      "learning_rate": 0.0003466165916924186,
      "loss": 3.0411,
      "step": 103759
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0206778049468994,
      "learning_rate": 0.00034661255081227176,
      "loss": 3.212,
      "step": 103760
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.840205430984497,
      "learning_rate": 0.0003466085099234588,
      "loss": 3.0378,
      "step": 103761
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.939457893371582,
      "learning_rate": 0.00034660446902598036,
      "loss": 3.0878,
      "step": 103762
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0044198036193848,
      "learning_rate": 0.00034660042811983735,
      "loss": 3.0958,
      "step": 103763
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8530540466308594,
      "learning_rate": 0.0003465963872050304,
      "loss": 2.8153,
      "step": 103764
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7360478639602661,
      "learning_rate": 0.00034659234628156034,
      "loss": 3.246,
      "step": 103765
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.9059062004089355,
      "learning_rate": 0.0003465883053494278,
      "loss": 2.8296,
      "step": 103766
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.376329183578491,
      "learning_rate": 0.0003465842644086337,
      "loss": 2.9549,
      "step": 103767
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1420514583587646,
      "learning_rate": 0.00034658022345917866,
      "loss": 2.9483,
      "step": 103768
    },
    {
      "epoch": 1.35,
      "grad_norm": 4.176830291748047,
      "learning_rate": 0.0003465761825010635,
      "loss": 3.2267,
      "step": 103769
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.418674945831299,
      "learning_rate": 0.00034657214153428886,
      "loss": 3.1495,
      "step": 103770
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.088066339492798,
      "learning_rate": 0.00034656810055885574,
      "loss": 2.9162,
      "step": 103771
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.8970999717712402,
      "learning_rate": 0.00034656405957476466,
      "loss": 3.2974,
      "step": 103772
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.94325590133667,
      "learning_rate": 0.00034656001858201634,
      "loss": 3.208,
      "step": 103773
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.276913642883301,
      "learning_rate": 0.00034655597758061175,
      "loss": 3.0639,
      "step": 103774
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.005894899368286,
      "learning_rate": 0.00034655193657055154,
      "loss": 2.8319,
      "step": 103775
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9739470481872559,
      "learning_rate": 0.00034654789555183636,
      "loss": 2.9252,
      "step": 103776
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8484864234924316,
      "learning_rate": 0.00034654385452446713,
      "loss": 2.7935,
      "step": 103777
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1325185298919678,
      "learning_rate": 0.0003465398134884445,
      "loss": 3.074,
      "step": 103778
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5361402034759521,
      "learning_rate": 0.00034653577244376924,
      "loss": 3.2505,
      "step": 103779
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.027726888656616,
      "learning_rate": 0.00034653173139044205,
      "loss": 3.1017,
      "step": 103780
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.9834752082824707,
      "learning_rate": 0.00034652769032846373,
      "loss": 2.9544,
      "step": 103781
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.034214496612549,
      "learning_rate": 0.0003465236492578351,
      "loss": 3.0789,
      "step": 103782
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.602321743965149,
      "learning_rate": 0.0003465196081785568,
      "loss": 3.0296,
      "step": 103783
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9066168069839478,
      "learning_rate": 0.0003465155670906296,
      "loss": 2.7761,
      "step": 103784
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.049887180328369,
      "learning_rate": 0.00034651152599405444,
      "loss": 2.8627,
      "step": 103785
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.670080542564392,
      "learning_rate": 0.0003465074848888317,
      "loss": 2.866,
      "step": 103786
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7669185400009155,
      "learning_rate": 0.00034650344377496243,
      "loss": 2.7473,
      "step": 103787
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.744768738746643,
      "learning_rate": 0.00034649940265244736,
      "loss": 3.0642,
      "step": 103788
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6255348920822144,
      "learning_rate": 0.0003464953615212871,
      "loss": 2.8907,
      "step": 103789
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.003380537033081,
      "learning_rate": 0.0003464913203814824,
      "loss": 3.0015,
      "step": 103790
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.5596725940704346,
      "learning_rate": 0.00034648727923303424,
      "loss": 3.1105,
      "step": 103791
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.819199800491333,
      "learning_rate": 0.00034648323807594315,
      "loss": 2.8373,
      "step": 103792
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.255023956298828,
      "learning_rate": 0.0003464791969102099,
      "loss": 3.0667,
      "step": 103793
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.9396309852600098,
      "learning_rate": 0.0003464751557358353,
      "loss": 3.1546,
      "step": 103794
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6398733854293823,
      "learning_rate": 0.00034647111455282013,
      "loss": 3.1335,
      "step": 103795
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0146102905273438,
      "learning_rate": 0.0003464670733611651,
      "loss": 2.9091,
      "step": 103796
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.322981595993042,
      "learning_rate": 0.00034646303216087095,
      "loss": 2.9682,
      "step": 103797
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.968009114265442,
      "learning_rate": 0.0003464589909519385,
      "loss": 3.1802,
      "step": 103798
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8545397520065308,
      "learning_rate": 0.0003464549497343684,
      "loss": 2.7146,
      "step": 103799
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.967214584350586,
      "learning_rate": 0.00034645090850816143,
      "loss": 2.8916,
      "step": 103800
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.673652410507202,
      "learning_rate": 0.00034644686727331847,
      "loss": 2.8357,
      "step": 103801
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.345215082168579,
      "learning_rate": 0.00034644282602984,
      "loss": 2.879,
      "step": 103802
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.0343096256256104,
      "learning_rate": 0.000346438784777727,
      "loss": 2.7701,
      "step": 103803
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.447096586227417,
      "learning_rate": 0.0003464347435169802,
      "loss": 2.9791,
      "step": 103804
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8458200693130493,
      "learning_rate": 0.00034643070224760024,
      "loss": 3.0647,
      "step": 103805
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8794265985488892,
      "learning_rate": 0.000346426660969588,
      "loss": 2.9842,
      "step": 103806
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7106878757476807,
      "learning_rate": 0.0003464226196829441,
      "loss": 2.7841,
      "step": 103807
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9272496700286865,
      "learning_rate": 0.00034641857838766933,
      "loss": 3.1112,
      "step": 103808
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6379083395004272,
      "learning_rate": 0.00034641453708376453,
      "loss": 3.415,
      "step": 103809
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0722405910491943,
      "learning_rate": 0.00034641049577123045,
      "loss": 2.8862,
      "step": 103810
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7615625858306885,
      "learning_rate": 0.0003464064544500677,
      "loss": 3.0565,
      "step": 103811
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.300208330154419,
      "learning_rate": 0.0003464024131202771,
      "loss": 3.0424,
      "step": 103812
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8209811449050903,
      "learning_rate": 0.00034639837178185946,
      "loss": 2.9006,
      "step": 103813
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.7101995944976807,
      "learning_rate": 0.00034639433043481545,
      "loss": 2.8281,
      "step": 103814
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.8383116722106934,
      "learning_rate": 0.0003463902890791459,
      "loss": 3.228,
      "step": 103815
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.0828161239624023,
      "learning_rate": 0.0003463862477148515,
      "loss": 3.0495,
      "step": 103816
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8524991273880005,
      "learning_rate": 0.000346382206341933,
      "loss": 3.0099,
      "step": 103817
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.7168502807617188,
      "learning_rate": 0.0003463781649603912,
      "loss": 2.9159,
      "step": 103818
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.3832895755767822,
      "learning_rate": 0.00034637412357022685,
      "loss": 3.2073,
      "step": 103819
    },
    {
      "epoch": 1.35,
      "grad_norm": 5.006110668182373,
      "learning_rate": 0.00034637008217144064,
      "loss": 3.2381,
      "step": 103820
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.745492458343506,
      "learning_rate": 0.00034636604076403334,
      "loss": 2.9619,
      "step": 103821
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.677456021308899,
      "learning_rate": 0.00034636199934800577,
      "loss": 3.121,
      "step": 103822
    },
    {
      "epoch": 1.35,
      "grad_norm": 4.207258701324463,
      "learning_rate": 0.00034635795792335863,
      "loss": 2.8447,
      "step": 103823
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6944085359573364,
      "learning_rate": 0.00034635391649009257,
      "loss": 2.9837,
      "step": 103824
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.441494941711426,
      "learning_rate": 0.00034634987504820856,
      "loss": 2.7556,
      "step": 103825
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6699146032333374,
      "learning_rate": 0.00034634583359770716,
      "loss": 3.0417,
      "step": 103826
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.6783485412597656,
      "learning_rate": 0.0003463417921385892,
      "loss": 2.835,
      "step": 103827
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.2750494480133057,
      "learning_rate": 0.00034633775067085545,
      "loss": 3.1129,
      "step": 103828
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.670506000518799,
      "learning_rate": 0.0003463337091945066,
      "loss": 2.874,
      "step": 103829
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.820403814315796,
      "learning_rate": 0.0003463296677095435,
      "loss": 2.9892,
      "step": 103830
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9074504375457764,
      "learning_rate": 0.00034632562621596686,
      "loss": 3.1297,
      "step": 103831
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.386178731918335,
      "learning_rate": 0.0003463215847137773,
      "loss": 2.9513,
      "step": 103832
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5558521747589111,
      "learning_rate": 0.0003463175432029757,
      "loss": 2.9783,
      "step": 103833
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.853333830833435,
      "learning_rate": 0.0003463135016835629,
      "loss": 3.1308,
      "step": 103834
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7466598749160767,
      "learning_rate": 0.00034630946015553953,
      "loss": 2.8952,
      "step": 103835
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.750479817390442,
      "learning_rate": 0.00034630541861890623,
      "loss": 2.9531,
      "step": 103836
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7671929597854614,
      "learning_rate": 0.00034630137707366404,
      "loss": 3.0055,
      "step": 103837
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9602959156036377,
      "learning_rate": 0.00034629733551981345,
      "loss": 2.9926,
      "step": 103838
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.891360878944397,
      "learning_rate": 0.00034629329395735527,
      "loss": 3.0507,
      "step": 103839
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5101072788238525,
      "learning_rate": 0.0003462892523862904,
      "loss": 2.8296,
      "step": 103840
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7993084192276,
      "learning_rate": 0.00034628521080661944,
      "loss": 3.2615,
      "step": 103841
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7183594703674316,
      "learning_rate": 0.00034628116921834315,
      "loss": 2.9625,
      "step": 103842
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.520934820175171,
      "learning_rate": 0.0003462771276214624,
      "loss": 2.8202,
      "step": 103843
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.744779348373413,
      "learning_rate": 0.00034627308601597784,
      "loss": 2.8953,
      "step": 103844
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1827151775360107,
      "learning_rate": 0.00034626904440189017,
      "loss": 3.2492,
      "step": 103845
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0391697883605957,
      "learning_rate": 0.0003462650027792003,
      "loss": 3.0031,
      "step": 103846
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7825218439102173,
      "learning_rate": 0.0003462609611479088,
      "loss": 2.9071,
      "step": 103847
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8859081268310547,
      "learning_rate": 0.0003462569195080166,
      "loss": 3.1831,
      "step": 103848
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1074390411376953,
      "learning_rate": 0.0003462528778595243,
      "loss": 3.069,
      "step": 103849
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.119722604751587,
      "learning_rate": 0.00034624883620243277,
      "loss": 2.9578,
      "step": 103850
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.342897891998291,
      "learning_rate": 0.0003462447945367426,
      "loss": 3.3,
      "step": 103851
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.3430943489074707,
      "learning_rate": 0.00034624075286245477,
      "loss": 2.9826,
      "step": 103852
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.2300899028778076,
      "learning_rate": 0.00034623671117956986,
      "loss": 3.0066,
      "step": 103853
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8928892612457275,
      "learning_rate": 0.00034623266948808864,
      "loss": 2.9041,
      "step": 103854
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.575541377067566,
      "learning_rate": 0.00034622862778801197,
      "loss": 2.9439,
      "step": 103855
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.4866633415222168,
      "learning_rate": 0.0003462245860793405,
      "loss": 3.1566,
      "step": 103856
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.422605276107788,
      "learning_rate": 0.00034622054436207495,
      "loss": 2.9358,
      "step": 103857
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9260730743408203,
      "learning_rate": 0.00034621650263621617,
      "loss": 3.2745,
      "step": 103858
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7920998334884644,
      "learning_rate": 0.0003462124609017649,
      "loss": 2.9699,
      "step": 103859
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7468394041061401,
      "learning_rate": 0.00034620841915872187,
      "loss": 2.9077,
      "step": 103860
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.303131341934204,
      "learning_rate": 0.00034620437740708775,
      "loss": 3.1241,
      "step": 103861
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.770122528076172,
      "learning_rate": 0.0003462003356468634,
      "loss": 3.0048,
      "step": 103862
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8659800291061401,
      "learning_rate": 0.00034619629387804956,
      "loss": 2.8884,
      "step": 103863
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9445234537124634,
      "learning_rate": 0.00034619225210064694,
      "loss": 3.0322,
      "step": 103864
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.4410014152526855,
      "learning_rate": 0.0003461882103146563,
      "loss": 3.1568,
      "step": 103865
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.113431453704834,
      "learning_rate": 0.0003461841685200784,
      "loss": 2.8115,
      "step": 103866
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8183451890945435,
      "learning_rate": 0.00034618012671691395,
      "loss": 3.1477,
      "step": 103867
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0411970615386963,
      "learning_rate": 0.0003461760849051638,
      "loss": 2.8886,
      "step": 103868
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6994857788085938,
      "learning_rate": 0.0003461720430848287,
      "loss": 2.9076,
      "step": 103869
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6804718971252441,
      "learning_rate": 0.0003461680012559092,
      "loss": 3.0641,
      "step": 103870
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8336982727050781,
      "learning_rate": 0.0003461639594184063,
      "loss": 3.056,
      "step": 103871
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7235257625579834,
      "learning_rate": 0.0003461599175723206,
      "loss": 2.9719,
      "step": 103872
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.186608076095581,
      "learning_rate": 0.0003461558757176529,
      "loss": 3.2565,
      "step": 103873
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1910665035247803,
      "learning_rate": 0.00034615183385440404,
      "loss": 2.8745,
      "step": 103874
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.98577880859375,
      "learning_rate": 0.00034614779198257456,
      "loss": 3.0936,
      "step": 103875
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9156725406646729,
      "learning_rate": 0.0003461437501021654,
      "loss": 2.8667,
      "step": 103876
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8389912843704224,
      "learning_rate": 0.0003461397082131773,
      "loss": 3.0615,
      "step": 103877
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.712362289428711,
      "learning_rate": 0.0003461356663156109,
      "loss": 3.2566,
      "step": 103878
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7844542264938354,
      "learning_rate": 0.00034613162440946696,
      "loss": 2.8044,
      "step": 103879
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8598994016647339,
      "learning_rate": 0.0003461275824947464,
      "loss": 2.9274,
      "step": 103880
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.7494802474975586,
      "learning_rate": 0.0003461235405714497,
      "loss": 2.9761,
      "step": 103881
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.700531005859375,
      "learning_rate": 0.00034611949863957785,
      "loss": 3.16,
      "step": 103882
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1877644062042236,
      "learning_rate": 0.00034611545669913156,
      "loss": 3.1959,
      "step": 103883
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1118221282958984,
      "learning_rate": 0.0003461114147501114,
      "loss": 2.8305,
      "step": 103884
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.672067403793335,
      "learning_rate": 0.00034610737279251833,
      "loss": 3.0694,
      "step": 103885
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.555681586265564,
      "learning_rate": 0.0003461033308263531,
      "loss": 3.1269,
      "step": 103886
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8638455867767334,
      "learning_rate": 0.0003460992888516163,
      "loss": 3.1285,
      "step": 103887
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0874152183532715,
      "learning_rate": 0.00034609524686830877,
      "loss": 2.914,
      "step": 103888
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6103630065917969,
      "learning_rate": 0.0003460912048764314,
      "loss": 3.0253,
      "step": 103889
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1139075756073,
      "learning_rate": 0.00034608716287598464,
      "loss": 2.9725,
      "step": 103890
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1770033836364746,
      "learning_rate": 0.0003460831208669695,
      "loss": 2.9348,
      "step": 103891
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7933192253112793,
      "learning_rate": 0.00034607907884938655,
      "loss": 3.0571,
      "step": 103892
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8581569194793701,
      "learning_rate": 0.00034607503682323673,
      "loss": 3.0548,
      "step": 103893
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6371357440948486,
      "learning_rate": 0.00034607099478852057,
      "loss": 3.0032,
      "step": 103894
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1279923915863037,
      "learning_rate": 0.0003460669527452391,
      "loss": 2.9477,
      "step": 103895
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9816607236862183,
      "learning_rate": 0.00034606291069339283,
      "loss": 3.2335,
      "step": 103896
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.2589480876922607,
      "learning_rate": 0.00034605886863298254,
      "loss": 2.7126,
      "step": 103897
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.974095344543457,
      "learning_rate": 0.0003460548265640092,
      "loss": 2.9922,
      "step": 103898
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6955161094665527,
      "learning_rate": 0.0003460507844864732,
      "loss": 3.0546,
      "step": 103899
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5841171741485596,
      "learning_rate": 0.0003460467424003756,
      "loss": 3.1597,
      "step": 103900
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1916351318359375,
      "learning_rate": 0.0003460427003057171,
      "loss": 2.9241,
      "step": 103901
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.4187822341918945,
      "learning_rate": 0.00034603865820249827,
      "loss": 2.9774,
      "step": 103902
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.792096495628357,
      "learning_rate": 0.00034603461609071995,
      "loss": 3.041,
      "step": 103903
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8833696842193604,
      "learning_rate": 0.00034603057397038304,
      "loss": 2.8983,
      "step": 103904
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8502529859542847,
      "learning_rate": 0.0003460265318414881,
      "loss": 2.9264,
      "step": 103905
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9881538152694702,
      "learning_rate": 0.00034602248970403603,
      "loss": 3.2531,
      "step": 103906
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8533059358596802,
      "learning_rate": 0.0003460184475580275,
      "loss": 3.1007,
      "step": 103907
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9735825061798096,
      "learning_rate": 0.00034601440540346325,
      "loss": 2.8031,
      "step": 103908
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.082430839538574,
      "learning_rate": 0.00034601036324034406,
      "loss": 3.2303,
      "step": 103909
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.731752634048462,
      "learning_rate": 0.0003460063210686707,
      "loss": 3.1062,
      "step": 103910
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.4106767177581787,
      "learning_rate": 0.00034600227888844384,
      "loss": 2.8522,
      "step": 103911
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.828888177871704,
      "learning_rate": 0.0003459982366996643,
      "loss": 2.8055,
      "step": 103912
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8672327995300293,
      "learning_rate": 0.0003459941945023329,
      "loss": 2.7689,
      "step": 103913
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6110037565231323,
      "learning_rate": 0.0003459901522964502,
      "loss": 2.9252,
      "step": 103914
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.302896499633789,
      "learning_rate": 0.00034598611008201713,
      "loss": 3.1633,
      "step": 103915
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6476150751113892,
      "learning_rate": 0.00034598206785903436,
      "loss": 2.948,
      "step": 103916
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7452994585037231,
      "learning_rate": 0.0003459780256275026,
      "loss": 2.9439,
      "step": 103917
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.11660099029541,
      "learning_rate": 0.0003459739833874227,
      "loss": 2.9967,
      "step": 103918
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.4871132373809814,
      "learning_rate": 0.0003459699411387955,
      "loss": 2.9288,
      "step": 103919
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.4772095680236816,
      "learning_rate": 0.0003459658988816214,
      "loss": 2.9642,
      "step": 103920
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.876521110534668,
      "learning_rate": 0.00034596185661590147,
      "loss": 3.0008,
      "step": 103921
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.4357731342315674,
      "learning_rate": 0.0003459578143416364,
      "loss": 3.0274,
      "step": 103922
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.857682704925537,
      "learning_rate": 0.00034595377205882684,
      "loss": 2.7198,
      "step": 103923
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5232354402542114,
      "learning_rate": 0.00034594972976747363,
      "loss": 2.6047,
      "step": 103924
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.5811216831207275,
      "learning_rate": 0.00034594568746757745,
      "loss": 3.0333,
      "step": 103925
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.947762370109558,
      "learning_rate": 0.0003459416451591392,
      "loss": 2.8787,
      "step": 103926
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0030875205993652,
      "learning_rate": 0.0003459376028421595,
      "loss": 3.2719,
      "step": 103927
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.2470874786376953,
      "learning_rate": 0.00034593356051663905,
      "loss": 3.167,
      "step": 103928
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9098275899887085,
      "learning_rate": 0.0003459295181825788,
      "loss": 2.8699,
      "step": 103929
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9899072647094727,
      "learning_rate": 0.0003459254758399793,
      "loss": 3.1081,
      "step": 103930
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.86297607421875,
      "learning_rate": 0.0003459214334888414,
      "loss": 3.135,
      "step": 103931
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.5704073905944824,
      "learning_rate": 0.0003459173911291659,
      "loss": 2.7219,
      "step": 103932
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.739246129989624,
      "learning_rate": 0.00034591334876095343,
      "loss": 3.0511,
      "step": 103933
    },
    {
      "epoch": 1.35,
      "grad_norm": 4.588057041168213,
      "learning_rate": 0.0003459093063842048,
      "loss": 2.7705,
      "step": 103934
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0233469009399414,
      "learning_rate": 0.0003459052639989208,
      "loss": 3.0618,
      "step": 103935
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.217099189758301,
      "learning_rate": 0.00034590122160510213,
      "loss": 3.1694,
      "step": 103936
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.1703572273254395,
      "learning_rate": 0.00034589717920274955,
      "loss": 2.7247,
      "step": 103937
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.9451465606689453,
      "learning_rate": 0.0003458931367918638,
      "loss": 3.1775,
      "step": 103938
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.074366807937622,
      "learning_rate": 0.00034588909437244567,
      "loss": 2.9544,
      "step": 103939
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.005404472351074,
      "learning_rate": 0.0003458850519444959,
      "loss": 3.2803,
      "step": 103940
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8345509767532349,
      "learning_rate": 0.0003458810095080153,
      "loss": 3.0976,
      "step": 103941
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1939449310302734,
      "learning_rate": 0.00034587696706300444,
      "loss": 2.8856,
      "step": 103942
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6827709674835205,
      "learning_rate": 0.0003458729246094642,
      "loss": 3.0386,
      "step": 103943
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8332711458206177,
      "learning_rate": 0.0003458688821473954,
      "loss": 3.001,
      "step": 103944
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7863523960113525,
      "learning_rate": 0.0003458648396767986,
      "loss": 3.2215,
      "step": 103945
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7918949127197266,
      "learning_rate": 0.0003458607971976747,
      "loss": 2.7873,
      "step": 103946
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9784367084503174,
      "learning_rate": 0.0003458567547100244,
      "loss": 3.036,
      "step": 103947
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7541980743408203,
      "learning_rate": 0.00034585271221384856,
      "loss": 2.8919,
      "step": 103948
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9143503904342651,
      "learning_rate": 0.00034584866970914773,
      "loss": 2.9261,
      "step": 103949
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9227057695388794,
      "learning_rate": 0.0003458446271959228,
      "loss": 3.0173,
      "step": 103950
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.6976401805877686,
      "learning_rate": 0.0003458405846741745,
      "loss": 3.139,
      "step": 103951
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9176167249679565,
      "learning_rate": 0.00034583654214390353,
      "loss": 2.9603,
      "step": 103952
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7199918031692505,
      "learning_rate": 0.00034583249960511066,
      "loss": 3.1844,
      "step": 103953
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8140789270401,
      "learning_rate": 0.00034582845705779673,
      "loss": 2.944,
      "step": 103954
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9383643865585327,
      "learning_rate": 0.00034582441450196243,
      "loss": 2.8077,
      "step": 103955
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.005502939224243,
      "learning_rate": 0.00034582037193760847,
      "loss": 3.3014,
      "step": 103956
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9411112070083618,
      "learning_rate": 0.00034581632936473566,
      "loss": 3.0138,
      "step": 103957
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.567281723022461,
      "learning_rate": 0.00034581228678334466,
      "loss": 3.1461,
      "step": 103958
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.470512866973877,
      "learning_rate": 0.00034580824419343643,
      "loss": 2.8714,
      "step": 103959
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.339418649673462,
      "learning_rate": 0.0003458042015950115,
      "loss": 3.1598,
      "step": 103960
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6051589250564575,
      "learning_rate": 0.0003458001589880707,
      "loss": 3.1784,
      "step": 103961
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6606696844100952,
      "learning_rate": 0.0003457961163726148,
      "loss": 2.8452,
      "step": 103962
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9553093910217285,
      "learning_rate": 0.00034579207374864455,
      "loss": 2.9389,
      "step": 103963
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5197917222976685,
      "learning_rate": 0.0003457880311161606,
      "loss": 2.9473,
      "step": 103964
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0827887058258057,
      "learning_rate": 0.0003457839884751639,
      "loss": 2.9612,
      "step": 103965
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7026485204696655,
      "learning_rate": 0.00034577994582565504,
      "loss": 3.0719,
      "step": 103966
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7262792587280273,
      "learning_rate": 0.00034577590316763485,
      "loss": 2.7779,
      "step": 103967
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8814955949783325,
      "learning_rate": 0.0003457718605011041,
      "loss": 3.191,
      "step": 103968
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.516228437423706,
      "learning_rate": 0.00034576781782606346,
      "loss": 3.1165,
      "step": 103969
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6557213068008423,
      "learning_rate": 0.00034576377514251367,
      "loss": 3.2907,
      "step": 103970
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8680340051651,
      "learning_rate": 0.0003457597324504556,
      "loss": 2.7963,
      "step": 103971
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.853419542312622,
      "learning_rate": 0.00034575568974988985,
      "loss": 2.9794,
      "step": 103972
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.905680775642395,
      "learning_rate": 0.00034575164704081724,
      "loss": 3.1587,
      "step": 103973
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9141921997070312,
      "learning_rate": 0.0003457476043232387,
      "loss": 3.1791,
      "step": 103974
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.725351095199585,
      "learning_rate": 0.00034574356159715466,
      "loss": 2.7143,
      "step": 103975
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9304897785186768,
      "learning_rate": 0.00034573951886256605,
      "loss": 2.7943,
      "step": 103976
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9596272706985474,
      "learning_rate": 0.00034573547611947366,
      "loss": 3.0328,
      "step": 103977
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9648957252502441,
      "learning_rate": 0.0003457314333678781,
      "loss": 3.0147,
      "step": 103978
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7292194366455078,
      "learning_rate": 0.00034572739060778027,
      "loss": 3.1215,
      "step": 103979
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9490355253219604,
      "learning_rate": 0.0003457233478391809,
      "loss": 3.1393,
      "step": 103980
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.660213589668274,
      "learning_rate": 0.0003457193050620806,
      "loss": 3.0859,
      "step": 103981
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.760828971862793,
      "learning_rate": 0.00034571526227648023,
      "loss": 2.7301,
      "step": 103982
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1071529388427734,
      "learning_rate": 0.00034571121948238063,
      "loss": 2.9296,
      "step": 103983
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6758993864059448,
      "learning_rate": 0.0003457071766797823,
      "loss": 3.1146,
      "step": 103984
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.139504909515381,
      "learning_rate": 0.0003457031338686862,
      "loss": 3.1002,
      "step": 103985
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.915876626968384,
      "learning_rate": 0.0003456990910490931,
      "loss": 3.0038,
      "step": 103986
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.079784631729126,
      "learning_rate": 0.00034569504822100355,
      "loss": 2.7956,
      "step": 103987
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.26712703704834,
      "learning_rate": 0.0003456910053844185,
      "loss": 3.0932,
      "step": 103988
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.9370458126068115,
      "learning_rate": 0.00034568696253933865,
      "loss": 2.9346,
      "step": 103989
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1130597591400146,
      "learning_rate": 0.00034568291968576476,
      "loss": 2.9597,
      "step": 103990
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.095057487487793,
      "learning_rate": 0.00034567887682369745,
      "loss": 2.9796,
      "step": 103991
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.8528926372528076,
      "learning_rate": 0.0003456748339531376,
      "loss": 2.9469,
      "step": 103992
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1181588172912598,
      "learning_rate": 0.000345670791074086,
      "loss": 2.8209,
      "step": 103993
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.559577226638794,
      "learning_rate": 0.00034566674818654327,
      "loss": 2.9928,
      "step": 103994
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9554088115692139,
      "learning_rate": 0.00034566270529051024,
      "loss": 2.9017,
      "step": 103995
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1031863689422607,
      "learning_rate": 0.00034565866238598774,
      "loss": 3.0343,
      "step": 103996
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.2685434818267822,
      "learning_rate": 0.00034565461947297637,
      "loss": 2.7527,
      "step": 103997
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1869704723358154,
      "learning_rate": 0.00034565057655147683,
      "loss": 2.7262,
      "step": 103998
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.5646800994873047,
      "learning_rate": 0.00034564653362149017,
      "loss": 3.03,
      "step": 103999
    },
    {
      "epoch": 1.35,
      "grad_norm": 4.143294811248779,
      "learning_rate": 0.00034564249068301685,
      "loss": 2.9594,
      "step": 104000
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.080986499786377,
      "learning_rate": 0.00034563844773605776,
      "loss": 3.2056,
      "step": 104001
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.394502639770508,
      "learning_rate": 0.00034563440478061364,
      "loss": 2.8659,
      "step": 104002
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.049487590789795,
      "learning_rate": 0.0003456303618166852,
      "loss": 2.9171,
      "step": 104003
    },
    {
      "epoch": 1.35,
      "grad_norm": 6.291665554046631,
      "learning_rate": 0.00034562631884427317,
      "loss": 2.6707,
      "step": 104004
    },
    {
      "epoch": 1.35,
      "grad_norm": 6.646463394165039,
      "learning_rate": 0.00034562227586337844,
      "loss": 3.1509,
      "step": 104005
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.6279656887054443,
      "learning_rate": 0.0003456182328740016,
      "loss": 2.9563,
      "step": 104006
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.128295421600342,
      "learning_rate": 0.00034561418987614346,
      "loss": 2.9572,
      "step": 104007
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6749322414398193,
      "learning_rate": 0.0003456101468698049,
      "loss": 3.3286,
      "step": 104008
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.760601282119751,
      "learning_rate": 0.0003456061038549864,
      "loss": 2.9326,
      "step": 104009
    },
    {
      "epoch": 1.35,
      "grad_norm": 5.226215839385986,
      "learning_rate": 0.0003456020608316889,
      "loss": 3.0921,
      "step": 104010
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.762639045715332,
      "learning_rate": 0.0003455980177999132,
      "loss": 2.9174,
      "step": 104011
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7682783603668213,
      "learning_rate": 0.00034559397475965987,
      "loss": 2.8423,
      "step": 104012
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7701054811477661,
      "learning_rate": 0.0003455899317109298,
      "loss": 3.1739,
      "step": 104013
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.5817813873291016,
      "learning_rate": 0.0003455858886537238,
      "loss": 2.9076,
      "step": 104014
    },
    {
      "epoch": 1.35,
      "grad_norm": 4.841375827789307,
      "learning_rate": 0.0003455818455880423,
      "loss": 2.6498,
      "step": 104015
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6946810483932495,
      "learning_rate": 0.00034557780251388647,
      "loss": 2.8917,
      "step": 104016
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9947510957717896,
      "learning_rate": 0.00034557375943125676,
      "loss": 2.9909,
      "step": 104017
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.3662266731262207,
      "learning_rate": 0.0003455697163401541,
      "loss": 3.0475,
      "step": 104018
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.525972604751587,
      "learning_rate": 0.00034556567324057916,
      "loss": 3.1438,
      "step": 104019
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0797955989837646,
      "learning_rate": 0.0003455616301325327,
      "loss": 3.0736,
      "step": 104020
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6467658281326294,
      "learning_rate": 0.0003455575870160154,
      "loss": 2.9718,
      "step": 104021
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0667147636413574,
      "learning_rate": 0.0003455535438910281,
      "loss": 3.1058,
      "step": 104022
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.4276134967803955,
      "learning_rate": 0.0003455495007575716,
      "loss": 2.8431,
      "step": 104023
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6692664623260498,
      "learning_rate": 0.00034554545761564655,
      "loss": 2.8565,
      "step": 104024
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.894778847694397,
      "learning_rate": 0.0003455414144652537,
      "loss": 2.9187,
      "step": 104025
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7405060529708862,
      "learning_rate": 0.000345537371306394,
      "loss": 2.7746,
      "step": 104026
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6388030052185059,
      "learning_rate": 0.00034553332813906787,
      "loss": 3.2252,
      "step": 104027
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.275216579437256,
      "learning_rate": 0.00034552928496327625,
      "loss": 2.7532,
      "step": 104028
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9350026845932007,
      "learning_rate": 0.00034552524177902,
      "loss": 2.8082,
      "step": 104029
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7845025062561035,
      "learning_rate": 0.00034552119858629964,
      "loss": 3.0291,
      "step": 104030
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9791359901428223,
      "learning_rate": 0.00034551715538511606,
      "loss": 3.1415,
      "step": 104031
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.179548740386963,
      "learning_rate": 0.00034551311217547,
      "loss": 2.8643,
      "step": 104032
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6065399646759033,
      "learning_rate": 0.00034550906895736215,
      "loss": 3.1275,
      "step": 104033
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6863524913787842,
      "learning_rate": 0.00034550502573079333,
      "loss": 3.2084,
      "step": 104034
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.4776337146759033,
      "learning_rate": 0.0003455009824957642,
      "loss": 2.9982,
      "step": 104035
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.758256196975708,
      "learning_rate": 0.00034549693925227577,
      "loss": 3.1014,
      "step": 104036
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9451812505722046,
      "learning_rate": 0.00034549289600032843,
      "loss": 3.0773,
      "step": 104037
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8116754293441772,
      "learning_rate": 0.0003454888527399231,
      "loss": 3.2552,
      "step": 104038
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7403521537780762,
      "learning_rate": 0.00034548480947106056,
      "loss": 3.0189,
      "step": 104039
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.813688039779663,
      "learning_rate": 0.0003454807661937416,
      "loss": 3.0312,
      "step": 104040
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.013334035873413,
      "learning_rate": 0.00034547672290796683,
      "loss": 2.8928,
      "step": 104041
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7911949157714844,
      "learning_rate": 0.00034547267961373713,
      "loss": 3.1086,
      "step": 104042
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.5509824752807617,
      "learning_rate": 0.0003454686363110532,
      "loss": 3.07,
      "step": 104043
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1440911293029785,
      "learning_rate": 0.00034546459299991574,
      "loss": 2.946,
      "step": 104044
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.265770196914673,
      "learning_rate": 0.00034546054968032563,
      "loss": 3.0338,
      "step": 104045
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6860018968582153,
      "learning_rate": 0.00034545650635228346,
      "loss": 2.9969,
      "step": 104046
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7383980751037598,
      "learning_rate": 0.00034545246301579015,
      "loss": 3.1557,
      "step": 104047
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6830165386199951,
      "learning_rate": 0.0003454484196708463,
      "loss": 2.8015,
      "step": 104048
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.825582504272461,
      "learning_rate": 0.0003454443763174527,
      "loss": 3.1672,
      "step": 104049
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7797218561172485,
      "learning_rate": 0.0003454403329556103,
      "loss": 3.1662,
      "step": 104050
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8115363121032715,
      "learning_rate": 0.0003454362895853195,
      "loss": 2.9517,
      "step": 104051
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.8258647918701172,
      "learning_rate": 0.0003454322462065814,
      "loss": 2.945,
      "step": 104052
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.5817636251449585,
      "learning_rate": 0.0003454282028193965,
      "loss": 3.0787,
      "step": 104053
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.1152119636535645,
      "learning_rate": 0.0003454241594237656,
      "loss": 2.961,
      "step": 104054
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.649222731590271,
      "learning_rate": 0.0003454201160196895,
      "loss": 3.0694,
      "step": 104055
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.074315309524536,
      "learning_rate": 0.000345416072607169,
      "loss": 2.9535,
      "step": 104056
    },
    {
      "epoch": 1.35,
      "grad_norm": 3.1428165435791016,
      "learning_rate": 0.00034541202918620474,
      "loss": 3.0142,
      "step": 104057
    },
    {
      "epoch": 1.35,
      "grad_norm": 4.502766132354736,
      "learning_rate": 0.0003454079857567976,
      "loss": 2.9302,
      "step": 104058
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9100514650344849,
      "learning_rate": 0.00034540394231894817,
      "loss": 3.1903,
      "step": 104059
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7338330745697021,
      "learning_rate": 0.0003453998988726574,
      "loss": 3.0209,
      "step": 104060
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.9648993015289307,
      "learning_rate": 0.0003453958554179258,
      "loss": 3.0485,
      "step": 104061
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.4455652236938477,
      "learning_rate": 0.0003453918119547543,
      "loss": 2.8859,
      "step": 104062
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.6758266687393188,
      "learning_rate": 0.0003453877684831437,
      "loss": 3.1618,
      "step": 104063
    },
    {
      "epoch": 1.35,
      "grad_norm": 2.0235159397125244,
      "learning_rate": 0.00034538372500309456,
      "loss": 2.9234,
      "step": 104064
    },
    {
      "epoch": 1.35,
      "grad_norm": 1.7526334524154663,
      "learning_rate": 0.00034537968151460776,
      "loss": 3.0452,
      "step": 104065
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6379657983779907,
      "learning_rate": 0.000345375638017684,
      "loss": 2.8887,
      "step": 104066
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8787330389022827,
      "learning_rate": 0.0003453715945123241,
      "loss": 3.0745,
      "step": 104067
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.2378735542297363,
      "learning_rate": 0.00034536755099852867,
      "loss": 3.1019,
      "step": 104068
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.78230619430542,
      "learning_rate": 0.00034536350747629873,
      "loss": 2.9564,
      "step": 104069
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6931043863296509,
      "learning_rate": 0.0003453594639456347,
      "loss": 3.3629,
      "step": 104070
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.846846580505371,
      "learning_rate": 0.00034535542040653753,
      "loss": 2.81,
      "step": 104071
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.886498212814331,
      "learning_rate": 0.00034535137685900796,
      "loss": 3.0419,
      "step": 104072
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.771653413772583,
      "learning_rate": 0.00034534733330304665,
      "loss": 2.842,
      "step": 104073
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.6202187538146973,
      "learning_rate": 0.00034534328973865447,
      "loss": 3.0262,
      "step": 104074
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9617927074432373,
      "learning_rate": 0.0003453392461658321,
      "loss": 3.0155,
      "step": 104075
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8355275392532349,
      "learning_rate": 0.0003453352025845803,
      "loss": 3.0012,
      "step": 104076
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.550691843032837,
      "learning_rate": 0.0003453311589948998,
      "loss": 3.0349,
      "step": 104077
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.8584508895874023,
      "learning_rate": 0.0003453271153967915,
      "loss": 3.0911,
      "step": 104078
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.9158926010131836,
      "learning_rate": 0.00034532307179025595,
      "loss": 2.9011,
      "step": 104079
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8410874605178833,
      "learning_rate": 0.00034531902817529393,
      "loss": 3.03,
      "step": 104080
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9725089073181152,
      "learning_rate": 0.00034531498455190636,
      "loss": 3.2824,
      "step": 104081
    },
    {
      "epoch": 1.36,
      "grad_norm": 4.522538185119629,
      "learning_rate": 0.00034531094092009383,
      "loss": 2.7857,
      "step": 104082
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.9324371814727783,
      "learning_rate": 0.00034530689727985705,
      "loss": 3.0452,
      "step": 104083
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9552888870239258,
      "learning_rate": 0.000345302853631197,
      "loss": 2.8385,
      "step": 104084
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6971371173858643,
      "learning_rate": 0.0003452988099741142,
      "loss": 3.0062,
      "step": 104085
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.660263776779175,
      "learning_rate": 0.00034529476630860956,
      "loss": 2.6946,
      "step": 104086
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.019824504852295,
      "learning_rate": 0.0003452907226346838,
      "loss": 2.9565,
      "step": 104087
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.3989503383636475,
      "learning_rate": 0.0003452866789523376,
      "loss": 2.9053,
      "step": 104088
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.8227899074554443,
      "learning_rate": 0.0003452826352615717,
      "loss": 2.8453,
      "step": 104089
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.7944748401641846,
      "learning_rate": 0.000345278591562387,
      "loss": 2.8939,
      "step": 104090
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.2249972820281982,
      "learning_rate": 0.000345274547854784,
      "loss": 2.7157,
      "step": 104091
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8500523567199707,
      "learning_rate": 0.00034527050413876373,
      "loss": 2.8953,
      "step": 104092
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6389997005462646,
      "learning_rate": 0.00034526646041432683,
      "loss": 2.9147,
      "step": 104093
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9516322612762451,
      "learning_rate": 0.00034526241668147393,
      "loss": 3.1301,
      "step": 104094
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.033442735671997,
      "learning_rate": 0.00034525837294020593,
      "loss": 2.9756,
      "step": 104095
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.4710426330566406,
      "learning_rate": 0.00034525432919052366,
      "loss": 3.0587,
      "step": 104096
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9553394317626953,
      "learning_rate": 0.00034525028543242766,
      "loss": 3.3342,
      "step": 104097
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.100337266921997,
      "learning_rate": 0.0003452462416659188,
      "loss": 2.8717,
      "step": 104098
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.732708692550659,
      "learning_rate": 0.0003452421978909978,
      "loss": 2.9446,
      "step": 104099
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.256693124771118,
      "learning_rate": 0.0003452381541076654,
      "loss": 2.8742,
      "step": 104100
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.773369550704956,
      "learning_rate": 0.0003452341103159224,
      "loss": 2.9526,
      "step": 104101
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.6403067111968994,
      "learning_rate": 0.00034523006651576954,
      "loss": 3.0145,
      "step": 104102
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.390800952911377,
      "learning_rate": 0.00034522602270720755,
      "loss": 2.8816,
      "step": 104103
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.179452419281006,
      "learning_rate": 0.00034522197889023715,
      "loss": 3.1668,
      "step": 104104
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9083449840545654,
      "learning_rate": 0.0003452179350648592,
      "loss": 2.8172,
      "step": 104105
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.731529951095581,
      "learning_rate": 0.00034521389123107427,
      "loss": 3.0923,
      "step": 104106
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8526971340179443,
      "learning_rate": 0.0003452098473888833,
      "loss": 3.1127,
      "step": 104107
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.145578384399414,
      "learning_rate": 0.000345205803538287,
      "loss": 2.8607,
      "step": 104108
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8192740678787231,
      "learning_rate": 0.00034520175967928606,
      "loss": 2.6759,
      "step": 104109
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8144022226333618,
      "learning_rate": 0.0003451977158118812,
      "loss": 3.0917,
      "step": 104110
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.6569111347198486,
      "learning_rate": 0.00034519367193607334,
      "loss": 3.1115,
      "step": 104111
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.379517078399658,
      "learning_rate": 0.00034518962805186304,
      "loss": 2.9525,
      "step": 104112
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.288320779800415,
      "learning_rate": 0.0003451855841592511,
      "loss": 2.8616,
      "step": 104113
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8206593990325928,
      "learning_rate": 0.0003451815402582384,
      "loss": 2.9749,
      "step": 104114
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.329771041870117,
      "learning_rate": 0.00034517749634882554,
      "loss": 3.0235,
      "step": 104115
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.5889962911605835,
      "learning_rate": 0.0003451734524310134,
      "loss": 3.3096,
      "step": 104116
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8789191246032715,
      "learning_rate": 0.0003451694085048026,
      "loss": 3.012,
      "step": 104117
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.5386407375335693,
      "learning_rate": 0.000345165364570194,
      "loss": 3.1768,
      "step": 104118
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.4461424350738525,
      "learning_rate": 0.0003451613206271882,
      "loss": 3.0618,
      "step": 104119
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8055195808410645,
      "learning_rate": 0.00034515727667578614,
      "loss": 3.3032,
      "step": 104120
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.9191935062408447,
      "learning_rate": 0.0003451532327159886,
      "loss": 3.3306,
      "step": 104121
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.821704626083374,
      "learning_rate": 0.00034514918874779604,
      "loss": 3.2166,
      "step": 104122
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6751304864883423,
      "learning_rate": 0.00034514514477120944,
      "loss": 3.1843,
      "step": 104123
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.23799204826355,
      "learning_rate": 0.00034514110078622955,
      "loss": 3.0783,
      "step": 104124
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.450010299682617,
      "learning_rate": 0.00034513705679285706,
      "loss": 2.9876,
      "step": 104125
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6324362754821777,
      "learning_rate": 0.0003451330127910927,
      "loss": 3.2321,
      "step": 104126
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6845738887786865,
      "learning_rate": 0.00034512896878093725,
      "loss": 3.0377,
      "step": 104127
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8468416929244995,
      "learning_rate": 0.00034512492476239154,
      "loss": 2.7614,
      "step": 104128
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.728074312210083,
      "learning_rate": 0.0003451208807354562,
      "loss": 2.9464,
      "step": 104129
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8974918127059937,
      "learning_rate": 0.0003451168367001321,
      "loss": 3.013,
      "step": 104130
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.5518519878387451,
      "learning_rate": 0.0003451127926564199,
      "loss": 2.8983,
      "step": 104131
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.832592725753784,
      "learning_rate": 0.0003451087486043204,
      "loss": 3.0755,
      "step": 104132
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.4882733821868896,
      "learning_rate": 0.00034510470454383424,
      "loss": 2.9273,
      "step": 104133
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.103196382522583,
      "learning_rate": 0.0003451006604749623,
      "loss": 3.0317,
      "step": 104134
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.3482718467712402,
      "learning_rate": 0.00034509661639770537,
      "loss": 3.0412,
      "step": 104135
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.144165515899658,
      "learning_rate": 0.0003450925723120641,
      "loss": 3.1962,
      "step": 104136
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.3255274295806885,
      "learning_rate": 0.00034508852821803925,
      "loss": 2.8463,
      "step": 104137
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.368086576461792,
      "learning_rate": 0.00034508448411563154,
      "loss": 2.9462,
      "step": 104138
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.652208685874939,
      "learning_rate": 0.0003450804400048419,
      "loss": 3.0104,
      "step": 104139
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.8626210689544678,
      "learning_rate": 0.0003450763958856708,
      "loss": 3.0698,
      "step": 104140
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9901541471481323,
      "learning_rate": 0.00034507235175811925,
      "loss": 3.0719,
      "step": 104141
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9488272666931152,
      "learning_rate": 0.0003450683076221879,
      "loss": 2.9039,
      "step": 104142
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.718372344970703,
      "learning_rate": 0.0003450642634778775,
      "loss": 2.9059,
      "step": 104143
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.3135130405426025,
      "learning_rate": 0.00034506021932518877,
      "loss": 3.02,
      "step": 104144
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8876521587371826,
      "learning_rate": 0.0003450561751641225,
      "loss": 3.0381,
      "step": 104145
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.817842721939087,
      "learning_rate": 0.00034505213099467945,
      "loss": 2.7445,
      "step": 104146
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8875055313110352,
      "learning_rate": 0.0003450480868168604,
      "loss": 3.0469,
      "step": 104147
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.375946044921875,
      "learning_rate": 0.000345044042630666,
      "loss": 2.7345,
      "step": 104148
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.2119204998016357,
      "learning_rate": 0.00034503999843609704,
      "loss": 2.8581,
      "step": 104149
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.68220853805542,
      "learning_rate": 0.00034503595423315435,
      "loss": 3.0418,
      "step": 104150
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9021921157836914,
      "learning_rate": 0.00034503191002183863,
      "loss": 2.9452,
      "step": 104151
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6449414491653442,
      "learning_rate": 0.00034502786580215054,
      "loss": 2.966,
      "step": 104152
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.5478262901306152,
      "learning_rate": 0.000345023821574091,
      "loss": 2.7874,
      "step": 104153
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9107707738876343,
      "learning_rate": 0.0003450197773376607,
      "loss": 3.1492,
      "step": 104154
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.064958095550537,
      "learning_rate": 0.00034501573309286035,
      "loss": 3.0811,
      "step": 104155
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8747144937515259,
      "learning_rate": 0.0003450116888396907,
      "loss": 3.0477,
      "step": 104156
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.3430185317993164,
      "learning_rate": 0.00034500764457815256,
      "loss": 3.0604,
      "step": 104157
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.3057918548583984,
      "learning_rate": 0.0003450036003082466,
      "loss": 2.9834,
      "step": 104158
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6703120470046997,
      "learning_rate": 0.00034499955602997366,
      "loss": 2.9457,
      "step": 104159
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.074233293533325,
      "learning_rate": 0.00034499551174333446,
      "loss": 2.6667,
      "step": 104160
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7087494134902954,
      "learning_rate": 0.0003449914674483297,
      "loss": 3.003,
      "step": 104161
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.708058476448059,
      "learning_rate": 0.0003449874231449602,
      "loss": 2.8485,
      "step": 104162
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8124542236328125,
      "learning_rate": 0.00034498337883322677,
      "loss": 3.2429,
      "step": 104163
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7647266387939453,
      "learning_rate": 0.00034497933451312996,
      "loss": 2.9627,
      "step": 104164
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7048386335372925,
      "learning_rate": 0.0003449752901846707,
      "loss": 3.0315,
      "step": 104165
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.188223123550415,
      "learning_rate": 0.0003449712458478497,
      "loss": 2.8796,
      "step": 104166
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.813752293586731,
      "learning_rate": 0.0003449672015026677,
      "loss": 2.7259,
      "step": 104167
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.0996270179748535,
      "learning_rate": 0.00034496315714912536,
      "loss": 3.019,
      "step": 104168
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.4439241886138916,
      "learning_rate": 0.0003449591127872236,
      "loss": 3.0565,
      "step": 104169
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.071129322052002,
      "learning_rate": 0.0003449550684169631,
      "loss": 3.1137,
      "step": 104170
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8495566844940186,
      "learning_rate": 0.00034495102403834453,
      "loss": 2.972,
      "step": 104171
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8834651708602905,
      "learning_rate": 0.0003449469796513688,
      "loss": 3.1283,
      "step": 104172
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7294673919677734,
      "learning_rate": 0.0003449429352560365,
      "loss": 3.2704,
      "step": 104173
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.059457540512085,
      "learning_rate": 0.00034493889085234847,
      "loss": 3.1096,
      "step": 104174
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9532215595245361,
      "learning_rate": 0.00034493484644030557,
      "loss": 3.0224,
      "step": 104175
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7132216691970825,
      "learning_rate": 0.0003449308020199083,
      "loss": 2.8121,
      "step": 104176
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8588879108428955,
      "learning_rate": 0.0003449267575911576,
      "loss": 3.173,
      "step": 104177
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.294145107269287,
      "learning_rate": 0.00034492271315405415,
      "loss": 3.0372,
      "step": 104178
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7519546747207642,
      "learning_rate": 0.00034491866870859874,
      "loss": 3.0531,
      "step": 104179
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6568130254745483,
      "learning_rate": 0.00034491462425479205,
      "loss": 3.2746,
      "step": 104180
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.058689832687378,
      "learning_rate": 0.000344910579792635,
      "loss": 3.0121,
      "step": 104181
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.947714924812317,
      "learning_rate": 0.00034490653532212806,
      "loss": 3.1922,
      "step": 104182
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7664411067962646,
      "learning_rate": 0.00034490249084327224,
      "loss": 2.8979,
      "step": 104183
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7056641578674316,
      "learning_rate": 0.00034489844635606827,
      "loss": 2.9775,
      "step": 104184
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.457289218902588,
      "learning_rate": 0.00034489440186051675,
      "loss": 3.0926,
      "step": 104185
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.5296616554260254,
      "learning_rate": 0.0003448903573566185,
      "loss": 3.1119,
      "step": 104186
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8088959455490112,
      "learning_rate": 0.00034488631284437435,
      "loss": 3.0858,
      "step": 104187
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9253917932510376,
      "learning_rate": 0.00034488226832378487,
      "loss": 2.8575,
      "step": 104188
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8282560110092163,
      "learning_rate": 0.000344878223794851,
      "loss": 3.0058,
      "step": 104189
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.521965503692627,
      "learning_rate": 0.0003448741792575735,
      "loss": 2.9842,
      "step": 104190
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8328145742416382,
      "learning_rate": 0.00034487013471195296,
      "loss": 3.0184,
      "step": 104191
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0360851287841797,
      "learning_rate": 0.0003448660901579902,
      "loss": 2.9716,
      "step": 104192
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7861603498458862,
      "learning_rate": 0.000344862045595686,
      "loss": 3.1306,
      "step": 104193
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7629624605178833,
      "learning_rate": 0.00034485800102504113,
      "loss": 3.0086,
      "step": 104194
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6405889987945557,
      "learning_rate": 0.00034485395644605625,
      "loss": 2.7938,
      "step": 104195
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.095029592514038,
      "learning_rate": 0.00034484991185873223,
      "loss": 2.6681,
      "step": 104196
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0875496864318848,
      "learning_rate": 0.00034484586726306977,
      "loss": 2.9894,
      "step": 104197
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8038274049758911,
      "learning_rate": 0.0003448418226590696,
      "loss": 3.034,
      "step": 104198
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.45316219329834,
      "learning_rate": 0.0003448377780467325,
      "loss": 2.9066,
      "step": 104199
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.393423080444336,
      "learning_rate": 0.00034483373342605915,
      "loss": 2.9331,
      "step": 104200
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9546223878860474,
      "learning_rate": 0.00034482968879705043,
      "loss": 3.0414,
      "step": 104201
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.3644044399261475,
      "learning_rate": 0.0003448256441597069,
      "loss": 2.8923,
      "step": 104202
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1495778560638428,
      "learning_rate": 0.0003448215995140296,
      "loss": 2.9052,
      "step": 104203
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9589707851409912,
      "learning_rate": 0.00034481755486001907,
      "loss": 2.6195,
      "step": 104204
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8701835870742798,
      "learning_rate": 0.000344813510197676,
      "loss": 2.6898,
      "step": 104205
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.537249207496643,
      "learning_rate": 0.00034480946552700145,
      "loss": 3.0047,
      "step": 104206
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1495721340179443,
      "learning_rate": 0.00034480542084799583,
      "loss": 3.2204,
      "step": 104207
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.623469829559326,
      "learning_rate": 0.00034480137616066007,
      "loss": 2.9564,
      "step": 104208
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9157127141952515,
      "learning_rate": 0.0003447973314649949,
      "loss": 2.8876,
      "step": 104209
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.8277647495269775,
      "learning_rate": 0.00034479328676100104,
      "loss": 2.8373,
      "step": 104210
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.8219709396362305,
      "learning_rate": 0.00034478924204867935,
      "loss": 2.7012,
      "step": 104211
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.5689666271209717,
      "learning_rate": 0.00034478519732803045,
      "loss": 3.3483,
      "step": 104212
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.4856679439544678,
      "learning_rate": 0.0003447811525990551,
      "loss": 3.3342,
      "step": 104213
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9606844186782837,
      "learning_rate": 0.00034477710786175403,
      "loss": 2.7954,
      "step": 104214
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9720927476882935,
      "learning_rate": 0.0003447730631161281,
      "loss": 3.2501,
      "step": 104215
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6978956460952759,
      "learning_rate": 0.0003447690183621781,
      "loss": 2.835,
      "step": 104216
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6569660902023315,
      "learning_rate": 0.00034476497359990454,
      "loss": 2.9917,
      "step": 104217
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8937894105911255,
      "learning_rate": 0.00034476092882930844,
      "loss": 3.0134,
      "step": 104218
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9866145849227905,
      "learning_rate": 0.0003447568840503904,
      "loss": 3.2727,
      "step": 104219
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.540135622024536,
      "learning_rate": 0.0003447528392631513,
      "loss": 3.0865,
      "step": 104220
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7477028369903564,
      "learning_rate": 0.0003447487944675917,
      "loss": 3.0298,
      "step": 104221
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.711459994316101,
      "learning_rate": 0.0003447447496637124,
      "loss": 2.9437,
      "step": 104222
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6398930549621582,
      "learning_rate": 0.00034474070485151435,
      "loss": 2.9024,
      "step": 104223
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.2505364418029785,
      "learning_rate": 0.00034473666003099806,
      "loss": 2.7734,
      "step": 104224
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6956530809402466,
      "learning_rate": 0.0003447326152021644,
      "loss": 2.758,
      "step": 104225
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7281227111816406,
      "learning_rate": 0.0003447285703650141,
      "loss": 2.7655,
      "step": 104226
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.76684832572937,
      "learning_rate": 0.000344724525519548,
      "loss": 2.972,
      "step": 104227
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6264861822128296,
      "learning_rate": 0.00034472048066576665,
      "loss": 2.9405,
      "step": 104228
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9606715440750122,
      "learning_rate": 0.00034471643580367094,
      "loss": 3.1645,
      "step": 104229
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.763118863105774,
      "learning_rate": 0.0003447123909332617,
      "loss": 2.8766,
      "step": 104230
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7367926836013794,
      "learning_rate": 0.00034470834605453946,
      "loss": 3.0061,
      "step": 104231
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1515324115753174,
      "learning_rate": 0.00034470430116750514,
      "loss": 3.134,
      "step": 104232
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.053938627243042,
      "learning_rate": 0.00034470025627215945,
      "loss": 2.8833,
      "step": 104233
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7677496671676636,
      "learning_rate": 0.0003446962113685032,
      "loss": 3.2187,
      "step": 104234
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6765446662902832,
      "learning_rate": 0.00034469216645653693,
      "loss": 3.0077,
      "step": 104235
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.5364097356796265,
      "learning_rate": 0.0003446881215362617,
      "loss": 3.0719,
      "step": 104236
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.716941237449646,
      "learning_rate": 0.000344684076607678,
      "loss": 2.9282,
      "step": 104237
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8888437747955322,
      "learning_rate": 0.0003446800316707867,
      "loss": 3.0122,
      "step": 104238
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.731593370437622,
      "learning_rate": 0.0003446759867255886,
      "loss": 2.707,
      "step": 104239
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.79245924949646,
      "learning_rate": 0.00034467194177208437,
      "loss": 3.0613,
      "step": 104240
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.137759208679199,
      "learning_rate": 0.00034466789681027475,
      "loss": 3.0253,
      "step": 104241
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.5623369216918945,
      "learning_rate": 0.0003446638518401605,
      "loss": 2.9763,
      "step": 104242
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.950552225112915,
      "learning_rate": 0.00034465980686174244,
      "loss": 2.9722,
      "step": 104243
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.092623472213745,
      "learning_rate": 0.00034465576187502127,
      "loss": 3.1358,
      "step": 104244
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.379201889038086,
      "learning_rate": 0.0003446517168799978,
      "loss": 3.0402,
      "step": 104245
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7318477630615234,
      "learning_rate": 0.00034464767187667263,
      "loss": 2.9635,
      "step": 104246
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.782188057899475,
      "learning_rate": 0.0003446436268650467,
      "loss": 3.163,
      "step": 104247
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6347746849060059,
      "learning_rate": 0.00034463958184512074,
      "loss": 3.1792,
      "step": 104248
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8456133604049683,
      "learning_rate": 0.0003446355368168953,
      "loss": 3.2162,
      "step": 104249
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1744420528411865,
      "learning_rate": 0.0003446314917803713,
      "loss": 3.0465,
      "step": 104250
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.2513012886047363,
      "learning_rate": 0.0003446274467355495,
      "loss": 2.9274,
      "step": 104251
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.847036123275757,
      "learning_rate": 0.0003446234016824306,
      "loss": 3.3081,
      "step": 104252
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.2880442142486572,
      "learning_rate": 0.0003446193566210154,
      "loss": 3.2774,
      "step": 104253
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9829143285751343,
      "learning_rate": 0.00034461531155130455,
      "loss": 2.8722,
      "step": 104254
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.507652997970581,
      "learning_rate": 0.000344611266473299,
      "loss": 2.9629,
      "step": 104255
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.413729667663574,
      "learning_rate": 0.00034460722138699915,
      "loss": 2.9742,
      "step": 104256
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.282400608062744,
      "learning_rate": 0.00034460317629240615,
      "loss": 2.7652,
      "step": 104257
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.716025948524475,
      "learning_rate": 0.0003445991311895206,
      "loss": 2.7126,
      "step": 104258
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.86506187915802,
      "learning_rate": 0.0003445950860783431,
      "loss": 3.1733,
      "step": 104259
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.5897496938705444,
      "learning_rate": 0.0003445910409588746,
      "loss": 3.1575,
      "step": 104260
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7469838857650757,
      "learning_rate": 0.0003445869958311158,
      "loss": 2.888,
      "step": 104261
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7511928081512451,
      "learning_rate": 0.0003445829506950674,
      "loss": 3.055,
      "step": 104262
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6503080129623413,
      "learning_rate": 0.00034457890555073017,
      "loss": 3.093,
      "step": 104263
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9003430604934692,
      "learning_rate": 0.000344574860398105,
      "loss": 2.9769,
      "step": 104264
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8583014011383057,
      "learning_rate": 0.00034457081523719236,
      "loss": 3.2614,
      "step": 104265
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.5470951795578003,
      "learning_rate": 0.0003445667700679932,
      "loss": 3.0725,
      "step": 104266
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9282269477844238,
      "learning_rate": 0.0003445627248905084,
      "loss": 3.157,
      "step": 104267
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8167129755020142,
      "learning_rate": 0.00034455867970473837,
      "loss": 3.207,
      "step": 104268
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9245330095291138,
      "learning_rate": 0.0003445546345106841,
      "loss": 2.8922,
      "step": 104269
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.2837014198303223,
      "learning_rate": 0.0003445505893083463,
      "loss": 3.147,
      "step": 104270
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.024641752243042,
      "learning_rate": 0.00034454654409772563,
      "loss": 3.1271,
      "step": 104271
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7128944396972656,
      "learning_rate": 0.00034454249887882295,
      "loss": 2.9564,
      "step": 104272
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0224735736846924,
      "learning_rate": 0.000344538453651639,
      "loss": 2.8079,
      "step": 104273
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.3931517601013184,
      "learning_rate": 0.00034453440841617454,
      "loss": 2.9814,
      "step": 104274
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6767185926437378,
      "learning_rate": 0.00034453036317243024,
      "loss": 3.2534,
      "step": 104275
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9281721115112305,
      "learning_rate": 0.00034452631792040696,
      "loss": 2.839,
      "step": 104276
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.6769134998321533,
      "learning_rate": 0.00034452227266010537,
      "loss": 2.7243,
      "step": 104277
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.539435863494873,
      "learning_rate": 0.00034451822739152617,
      "loss": 2.8481,
      "step": 104278
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8758878707885742,
      "learning_rate": 0.0003445141821146704,
      "loss": 2.8761,
      "step": 104279
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.7164416313171387,
      "learning_rate": 0.0003445101368295384,
      "loss": 3.0171,
      "step": 104280
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9868978261947632,
      "learning_rate": 0.0003445060915361311,
      "loss": 3.2398,
      "step": 104281
    },
    {
      "epoch": 1.36,
      "grad_norm": 4.152893543243408,
      "learning_rate": 0.0003445020462344495,
      "loss": 2.8172,
      "step": 104282
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.639098882675171,
      "learning_rate": 0.00034449800092449393,
      "loss": 3.0276,
      "step": 104283
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1536383628845215,
      "learning_rate": 0.0003444939556062654,
      "loss": 3.06,
      "step": 104284
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.2094521522521973,
      "learning_rate": 0.0003444899102797646,
      "loss": 2.7697,
      "step": 104285
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.4779231548309326,
      "learning_rate": 0.0003444858649449923,
      "loss": 3.1577,
      "step": 104286
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7116782665252686,
      "learning_rate": 0.00034448181960194924,
      "loss": 3.1438,
      "step": 104287
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.6226494312286377,
      "learning_rate": 0.00034447777425063625,
      "loss": 3.0027,
      "step": 104288
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.6936991214752197,
      "learning_rate": 0.0003444737288910539,
      "loss": 3.0493,
      "step": 104289
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.5391478538513184,
      "learning_rate": 0.000344469683523203,
      "loss": 2.933,
      "step": 104290
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.057871103286743,
      "learning_rate": 0.00034446563814708445,
      "loss": 2.9384,
      "step": 104291
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.5764033794403076,
      "learning_rate": 0.00034446159276269885,
      "loss": 2.7463,
      "step": 104292
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.7340290546417236,
      "learning_rate": 0.000344457547370047,
      "loss": 2.6957,
      "step": 104293
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8753334283828735,
      "learning_rate": 0.00034445350196912966,
      "loss": 3.2381,
      "step": 104294
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7095028162002563,
      "learning_rate": 0.0003444494565599476,
      "loss": 3.251,
      "step": 104295
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8085851669311523,
      "learning_rate": 0.00034444541114250147,
      "loss": 2.9178,
      "step": 104296
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.2271885871887207,
      "learning_rate": 0.0003444413657167922,
      "loss": 3.1603,
      "step": 104297
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.5544081926345825,
      "learning_rate": 0.0003444373202828204,
      "loss": 2.8763,
      "step": 104298
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1695992946624756,
      "learning_rate": 0.00034443327484058687,
      "loss": 3.013,
      "step": 104299
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8004553318023682,
      "learning_rate": 0.0003444292293900923,
      "loss": 2.8277,
      "step": 104300
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7795722484588623,
      "learning_rate": 0.0003444251839313376,
      "loss": 3.0955,
      "step": 104301
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.812726616859436,
      "learning_rate": 0.00034442113846432326,
      "loss": 2.9497,
      "step": 104302
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.88711678981781,
      "learning_rate": 0.0003444170929890503,
      "loss": 2.8101,
      "step": 104303
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7431318759918213,
      "learning_rate": 0.0003444130475055194,
      "loss": 3.1919,
      "step": 104304
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.161071538925171,
      "learning_rate": 0.00034440900201373125,
      "loss": 3.0666,
      "step": 104305
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9788280725479126,
      "learning_rate": 0.00034440495651368654,
      "loss": 3.1454,
      "step": 104306
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.236231803894043,
      "learning_rate": 0.0003444009110053862,
      "loss": 2.8854,
      "step": 104307
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7849807739257812,
      "learning_rate": 0.0003443968654888309,
      "loss": 3.4202,
      "step": 104308
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1589725017547607,
      "learning_rate": 0.0003443928199640214,
      "loss": 2.9831,
      "step": 104309
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.2450149059295654,
      "learning_rate": 0.0003443887744309584,
      "loss": 3.1159,
      "step": 104310
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.3816416263580322,
      "learning_rate": 0.0003443847288896426,
      "loss": 3.1467,
      "step": 104311
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8465311527252197,
      "learning_rate": 0.00034438068334007493,
      "loss": 3.0834,
      "step": 104312
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7715946435928345,
      "learning_rate": 0.00034437663778225607,
      "loss": 2.8346,
      "step": 104313
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.4735097885131836,
      "learning_rate": 0.0003443725922161867,
      "loss": 2.8188,
      "step": 104314
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6742591857910156,
      "learning_rate": 0.0003443685466418677,
      "loss": 2.8968,
      "step": 104315
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1463782787323,
      "learning_rate": 0.0003443645010592997,
      "loss": 3.1704,
      "step": 104316
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.5384540557861328,
      "learning_rate": 0.0003443604554684835,
      "loss": 2.8767,
      "step": 104317
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8282711505889893,
      "learning_rate": 0.0003443564098694199,
      "loss": 3.1318,
      "step": 104318
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1322438716888428,
      "learning_rate": 0.0003443523642621096,
      "loss": 3.0226,
      "step": 104319
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0858757495880127,
      "learning_rate": 0.0003443483186465533,
      "loss": 3.0888,
      "step": 104320
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.3424174785614014,
      "learning_rate": 0.0003443442730227518,
      "loss": 2.898,
      "step": 104321
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.386523962020874,
      "learning_rate": 0.00034434022739070593,
      "loss": 3.0072,
      "step": 104322
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.2850534915924072,
      "learning_rate": 0.0003443361817504163,
      "loss": 3.0161,
      "step": 104323
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8892285823822021,
      "learning_rate": 0.00034433213610188387,
      "loss": 3.2139,
      "step": 104324
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.6900124549865723,
      "learning_rate": 0.00034432809044510917,
      "loss": 2.8137,
      "step": 104325
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8843393325805664,
      "learning_rate": 0.000344324044780093,
      "loss": 3.2114,
      "step": 104326
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8863569498062134,
      "learning_rate": 0.00034431999910683616,
      "loss": 3.2006,
      "step": 104327
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1375367641448975,
      "learning_rate": 0.0003443159534253395,
      "loss": 3.3134,
      "step": 104328
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.922157645225525,
      "learning_rate": 0.00034431190773560357,
      "loss": 2.8959,
      "step": 104329
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.154895544052124,
      "learning_rate": 0.00034430786203762924,
      "loss": 2.9127,
      "step": 104330
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.310511589050293,
      "learning_rate": 0.0003443038163314173,
      "loss": 2.9979,
      "step": 104331
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7611162662506104,
      "learning_rate": 0.0003442997706169684,
      "loss": 3.0022,
      "step": 104332
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.936166524887085,
      "learning_rate": 0.00034429572489428327,
      "loss": 2.9001,
      "step": 104333
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9908719062805176,
      "learning_rate": 0.00034429167916336285,
      "loss": 2.8044,
      "step": 104334
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7683906555175781,
      "learning_rate": 0.0003442876334242077,
      "loss": 3.0415,
      "step": 104335
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.389373779296875,
      "learning_rate": 0.00034428358767681864,
      "loss": 3.0595,
      "step": 104336
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.3019537925720215,
      "learning_rate": 0.00034427954192119646,
      "loss": 3.1501,
      "step": 104337
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.80818772315979,
      "learning_rate": 0.0003442754961573419,
      "loss": 2.8682,
      "step": 104338
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7296003103256226,
      "learning_rate": 0.00034427145038525556,
      "loss": 2.9783,
      "step": 104339
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8847569227218628,
      "learning_rate": 0.00034426740460493846,
      "loss": 3.0203,
      "step": 104340
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7833677530288696,
      "learning_rate": 0.0003442633588163912,
      "loss": 3.3125,
      "step": 104341
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.796377182006836,
      "learning_rate": 0.00034425931301961443,
      "loss": 2.9725,
      "step": 104342
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.369056463241577,
      "learning_rate": 0.0003442552672146092,
      "loss": 2.8376,
      "step": 104343
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.4939162731170654,
      "learning_rate": 0.0003442512214013759,
      "loss": 2.7542,
      "step": 104344
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6345642805099487,
      "learning_rate": 0.00034424717557991557,
      "loss": 3.1682,
      "step": 104345
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.312011957168579,
      "learning_rate": 0.0003442431297502289,
      "loss": 3.1608,
      "step": 104346
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9226969480514526,
      "learning_rate": 0.0003442390839123165,
      "loss": 3.0964,
      "step": 104347
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8600807189941406,
      "learning_rate": 0.0003442350380661792,
      "loss": 3.1645,
      "step": 104348
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.759999394416809,
      "learning_rate": 0.00034423099221181786,
      "loss": 2.964,
      "step": 104349
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9740076065063477,
      "learning_rate": 0.000344226946349233,
      "loss": 2.7381,
      "step": 104350
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7521063089370728,
      "learning_rate": 0.00034422290047842564,
      "loss": 3.0841,
      "step": 104351
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7034446001052856,
      "learning_rate": 0.0003442188545993965,
      "loss": 3.1029,
      "step": 104352
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6928554773330688,
      "learning_rate": 0.00034421480871214605,
      "loss": 2.8817,
      "step": 104353
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.715480089187622,
      "learning_rate": 0.00034421076281667524,
      "loss": 3.0615,
      "step": 104354
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.109818696975708,
      "learning_rate": 0.000344206716912985,
      "loss": 3.0573,
      "step": 104355
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.164442300796509,
      "learning_rate": 0.00034420267100107566,
      "loss": 2.9678,
      "step": 104356
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.807621717453003,
      "learning_rate": 0.00034419862508094836,
      "loss": 3.0602,
      "step": 104357
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9208743572235107,
      "learning_rate": 0.00034419457915260373,
      "loss": 2.922,
      "step": 104358
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.708993434906006,
      "learning_rate": 0.00034419053321604236,
      "loss": 2.8987,
      "step": 104359
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.5583481788635254,
      "learning_rate": 0.00034418648727126524,
      "loss": 3.0091,
      "step": 104360
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.679553747177124,
      "learning_rate": 0.00034418244131827295,
      "loss": 2.9952,
      "step": 104361
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9401373863220215,
      "learning_rate": 0.00034417839535706636,
      "loss": 3.2192,
      "step": 104362
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.764914631843567,
      "learning_rate": 0.00034417434938764614,
      "loss": 2.8279,
      "step": 104363
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.715740442276001,
      "learning_rate": 0.00034417030341001313,
      "loss": 3.0853,
      "step": 104364
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.271422863006592,
      "learning_rate": 0.000344166257424168,
      "loss": 2.9234,
      "step": 104365
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6938612461090088,
      "learning_rate": 0.00034416221143011144,
      "loss": 2.9178,
      "step": 104366
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8049120903015137,
      "learning_rate": 0.00034415816542784444,
      "loss": 2.8732,
      "step": 104367
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.200056791305542,
      "learning_rate": 0.00034415411941736754,
      "loss": 3.0428,
      "step": 104368
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.6190950870513916,
      "learning_rate": 0.0003441500733986815,
      "loss": 3.0864,
      "step": 104369
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.793403148651123,
      "learning_rate": 0.00034414602737178727,
      "loss": 3.0534,
      "step": 104370
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.9031739234924316,
      "learning_rate": 0.0003441419813366853,
      "loss": 2.8501,
      "step": 104371
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0012753009796143,
      "learning_rate": 0.00034413793529337655,
      "loss": 2.8187,
      "step": 104372
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8524088859558105,
      "learning_rate": 0.0003441338892418618,
      "loss": 3.065,
      "step": 104373
    },
    {
      "epoch": 1.36,
      "grad_norm": 4.79489803314209,
      "learning_rate": 0.0003441298431821416,
      "loss": 2.6915,
      "step": 104374
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.558415651321411,
      "learning_rate": 0.0003441257971142169,
      "loss": 3.0772,
      "step": 104375
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9057403802871704,
      "learning_rate": 0.00034412175103808846,
      "loss": 3.103,
      "step": 104376
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.799665689468384,
      "learning_rate": 0.00034411770495375684,
      "loss": 2.8074,
      "step": 104377
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.378077983856201,
      "learning_rate": 0.0003441136588612229,
      "loss": 2.9475,
      "step": 104378
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.087096691131592,
      "learning_rate": 0.00034410961276048753,
      "loss": 3.0643,
      "step": 104379
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0169873237609863,
      "learning_rate": 0.00034410556665155124,
      "loss": 3.0251,
      "step": 104380
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0689592361450195,
      "learning_rate": 0.0003441015205344148,
      "loss": 3.0489,
      "step": 104381
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.049649477005005,
      "learning_rate": 0.00034409747440907926,
      "loss": 2.9487,
      "step": 104382
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7575840950012207,
      "learning_rate": 0.00034409342827554503,
      "loss": 2.8342,
      "step": 104383
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.748892068862915,
      "learning_rate": 0.00034408938213381304,
      "loss": 2.8725,
      "step": 104384
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8936222791671753,
      "learning_rate": 0.00034408533598388405,
      "loss": 3.0313,
      "step": 104385
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6569887399673462,
      "learning_rate": 0.0003440812898257587,
      "loss": 3.0731,
      "step": 104386
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7777986526489258,
      "learning_rate": 0.00034407724365943777,
      "loss": 3.0547,
      "step": 104387
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.984073281288147,
      "learning_rate": 0.0003440731974849221,
      "loss": 2.8329,
      "step": 104388
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.27437424659729,
      "learning_rate": 0.0003440691513022125,
      "loss": 2.9481,
      "step": 104389
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.5768699645996094,
      "learning_rate": 0.0003440651051113094,
      "loss": 3.1545,
      "step": 104390
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.822964072227478,
      "learning_rate": 0.0003440610589122139,
      "loss": 2.9643,
      "step": 104391
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.293278694152832,
      "learning_rate": 0.00034405701270492656,
      "loss": 3.1514,
      "step": 104392
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8946583271026611,
      "learning_rate": 0.0003440529664894483,
      "loss": 2.9662,
      "step": 104393
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8823744058609009,
      "learning_rate": 0.0003440489202657796,
      "loss": 2.7721,
      "step": 104394
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.5914363861083984,
      "learning_rate": 0.00034404487403392147,
      "loss": 2.9425,
      "step": 104395
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0843544006347656,
      "learning_rate": 0.00034404082779387456,
      "loss": 2.839,
      "step": 104396
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7178208827972412,
      "learning_rate": 0.0003440367815456396,
      "loss": 3.0162,
      "step": 104397
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8887286186218262,
      "learning_rate": 0.0003440327352892174,
      "loss": 2.9253,
      "step": 104398
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.806735873222351,
      "learning_rate": 0.0003440286890246086,
      "loss": 3.4185,
      "step": 104399
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.009629964828491,
      "learning_rate": 0.0003440246427518142,
      "loss": 2.8155,
      "step": 104400
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6227625608444214,
      "learning_rate": 0.00034402059647083464,
      "loss": 2.9046,
      "step": 104401
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.6134204864501953,
      "learning_rate": 0.00034401655018167083,
      "loss": 2.908,
      "step": 104402
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7198033332824707,
      "learning_rate": 0.0003440125038843236,
      "loss": 2.8506,
      "step": 104403
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7895548343658447,
      "learning_rate": 0.0003440084575787936,
      "loss": 2.9938,
      "step": 104404
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7714580297470093,
      "learning_rate": 0.00034400441126508153,
      "loss": 2.9935,
      "step": 104405
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7274733781814575,
      "learning_rate": 0.00034400036494318826,
      "loss": 3.0362,
      "step": 104406
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.245966911315918,
      "learning_rate": 0.0003439963186131145,
      "loss": 2.8518,
      "step": 104407
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7337723970413208,
      "learning_rate": 0.00034399227227486096,
      "loss": 2.8574,
      "step": 104408
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6969311237335205,
      "learning_rate": 0.0003439882259284284,
      "loss": 3.0535,
      "step": 104409
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.14749813079834,
      "learning_rate": 0.0003439841795738177,
      "loss": 3.0995,
      "step": 104410
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8062244653701782,
      "learning_rate": 0.00034398013321102947,
      "loss": 3.0257,
      "step": 104411
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.5426033735275269,
      "learning_rate": 0.00034397608684006445,
      "loss": 2.8538,
      "step": 104412
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.702716588973999,
      "learning_rate": 0.00034397204046092357,
      "loss": 3.0852,
      "step": 104413
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6159663200378418,
      "learning_rate": 0.00034396799407360727,
      "loss": 3.047,
      "step": 104414
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7060998678207397,
      "learning_rate": 0.0003439639476781166,
      "loss": 2.9684,
      "step": 104415
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.010362148284912,
      "learning_rate": 0.0003439599012744523,
      "loss": 3.0261,
      "step": 104416
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7787501811981201,
      "learning_rate": 0.00034395585486261484,
      "loss": 2.9803,
      "step": 104417
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.940891146659851,
      "learning_rate": 0.0003439518084426052,
      "loss": 2.7921,
      "step": 104418
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.790431022644043,
      "learning_rate": 0.00034394776201442415,
      "loss": 2.8614,
      "step": 104419
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.8950324058532715,
      "learning_rate": 0.00034394371557807237,
      "loss": 3.0655,
      "step": 104420
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.137843132019043,
      "learning_rate": 0.00034393966913355057,
      "loss": 3.0713,
      "step": 104421
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6408461332321167,
      "learning_rate": 0.00034393562268085964,
      "loss": 2.8859,
      "step": 104422
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7462098598480225,
      "learning_rate": 0.00034393157622000017,
      "loss": 3.0638,
      "step": 104423
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.8143889904022217,
      "learning_rate": 0.000343927529750973,
      "loss": 2.9019,
      "step": 104424
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.0288851261138916,
      "learning_rate": 0.00034392348327377895,
      "loss": 2.8593,
      "step": 104425
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.915763258934021,
      "learning_rate": 0.00034391943678841857,
      "loss": 3.0967,
      "step": 104426
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.7818350791931152,
      "learning_rate": 0.00034391539029489284,
      "loss": 2.9384,
      "step": 104427
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.1085712909698486,
      "learning_rate": 0.00034391134379320237,
      "loss": 3.1547,
      "step": 104428
    },
    {
      "epoch": 1.36,
      "grad_norm": 4.368484973907471,
      "learning_rate": 0.00034390729728334796,
      "loss": 3.1239,
      "step": 104429
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9421340227127075,
      "learning_rate": 0.0003439032507653303,
      "loss": 3.0135,
      "step": 104430
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1642651557922363,
      "learning_rate": 0.0003438992042391503,
      "loss": 2.838,
      "step": 104431
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.557549238204956,
      "learning_rate": 0.00034389515770480856,
      "loss": 2.8196,
      "step": 104432
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.549745559692383,
      "learning_rate": 0.0003438911111623059,
      "loss": 2.9856,
      "step": 104433
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.5257909297943115,
      "learning_rate": 0.0003438870646116431,
      "loss": 2.9429,
      "step": 104434
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7376128435134888,
      "learning_rate": 0.0003438830180528207,
      "loss": 2.9992,
      "step": 104435
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.37725830078125,
      "learning_rate": 0.0003438789714858397,
      "loss": 3.0677,
      "step": 104436
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.4405925273895264,
      "learning_rate": 0.00034387492491070085,
      "loss": 3.114,
      "step": 104437
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9634498357772827,
      "learning_rate": 0.00034387087832740476,
      "loss": 2.7851,
      "step": 104438
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0812599658966064,
      "learning_rate": 0.0003438668317359522,
      "loss": 3.2406,
      "step": 104439
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9349361658096313,
      "learning_rate": 0.0003438627851363441,
      "loss": 2.9011,
      "step": 104440
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.8089826107025146,
      "learning_rate": 0.00034385873852858096,
      "loss": 2.8133,
      "step": 104441
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1969494819641113,
      "learning_rate": 0.00034385469191266366,
      "loss": 3.1452,
      "step": 104442
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.01820707321167,
      "learning_rate": 0.00034385064528859305,
      "loss": 2.8261,
      "step": 104443
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1230814456939697,
      "learning_rate": 0.0003438465986563697,
      "loss": 2.9779,
      "step": 104444
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0800893306732178,
      "learning_rate": 0.0003438425520159944,
      "loss": 2.9684,
      "step": 104445
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.178699016571045,
      "learning_rate": 0.000343838505367468,
      "loss": 3.0953,
      "step": 104446
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.4930901527404785,
      "learning_rate": 0.0003438344587107912,
      "loss": 2.8854,
      "step": 104447
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.2187650203704834,
      "learning_rate": 0.00034383041204596467,
      "loss": 2.9711,
      "step": 104448
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.135960578918457,
      "learning_rate": 0.0003438263653729894,
      "loss": 2.8936,
      "step": 104449
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.276148796081543,
      "learning_rate": 0.0003438223186918658,
      "loss": 3.0255,
      "step": 104450
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.512127637863159,
      "learning_rate": 0.00034381827200259484,
      "loss": 3.2605,
      "step": 104451
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.2432971000671387,
      "learning_rate": 0.00034381422530517736,
      "loss": 2.8431,
      "step": 104452
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9640815258026123,
      "learning_rate": 0.00034381017859961383,
      "loss": 2.9416,
      "step": 104453
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.554743528366089,
      "learning_rate": 0.00034380613188590524,
      "loss": 3.087,
      "step": 104454
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0267748832702637,
      "learning_rate": 0.0003438020851640523,
      "loss": 2.927,
      "step": 104455
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9415833950042725,
      "learning_rate": 0.00034379803843405567,
      "loss": 2.9372,
      "step": 104456
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9627792835235596,
      "learning_rate": 0.00034379399169591615,
      "loss": 2.8806,
      "step": 104457
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.4959893226623535,
      "learning_rate": 0.00034378994494963454,
      "loss": 2.9037,
      "step": 104458
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.5086677074432373,
      "learning_rate": 0.0003437858981952115,
      "loss": 3.1028,
      "step": 104459
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.499610424041748,
      "learning_rate": 0.0003437818514326479,
      "loss": 2.9234,
      "step": 104460
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0926623344421387,
      "learning_rate": 0.0003437778046619444,
      "loss": 2.9183,
      "step": 104461
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7282243967056274,
      "learning_rate": 0.0003437737578831018,
      "loss": 2.9231,
      "step": 104462
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.62709641456604,
      "learning_rate": 0.0003437697110961208,
      "loss": 3.2927,
      "step": 104463
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.031996488571167,
      "learning_rate": 0.00034376566430100217,
      "loss": 2.9894,
      "step": 104464
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.58440101146698,
      "learning_rate": 0.0003437616174977468,
      "loss": 2.8926,
      "step": 104465
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.358071804046631,
      "learning_rate": 0.00034375757068635514,
      "loss": 2.7908,
      "step": 104466
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0885331630706787,
      "learning_rate": 0.00034375352386682815,
      "loss": 2.7737,
      "step": 104467
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.790406584739685,
      "learning_rate": 0.00034374947703916665,
      "loss": 2.9511,
      "step": 104468
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6425750255584717,
      "learning_rate": 0.00034374543020337125,
      "loss": 3.2172,
      "step": 104469
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9764883518218994,
      "learning_rate": 0.00034374138335944277,
      "loss": 3.3606,
      "step": 104470
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.000444173812866,
      "learning_rate": 0.00034373733650738196,
      "loss": 3.0387,
      "step": 104471
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8898899555206299,
      "learning_rate": 0.00034373328964718946,
      "loss": 2.8942,
      "step": 104472
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.181549549102783,
      "learning_rate": 0.00034372924277886616,
      "loss": 3.0517,
      "step": 104473
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9956862926483154,
      "learning_rate": 0.0003437251959024128,
      "loss": 2.8438,
      "step": 104474
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.524695873260498,
      "learning_rate": 0.0003437211490178301,
      "loss": 2.8529,
      "step": 104475
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.3535830974578857,
      "learning_rate": 0.00034371710212511875,
      "loss": 2.9882,
      "step": 104476
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9066554307937622,
      "learning_rate": 0.0003437130552242797,
      "loss": 3.0433,
      "step": 104477
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.469076633453369,
      "learning_rate": 0.0003437090083153134,
      "loss": 2.7746,
      "step": 104478
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.495813012123108,
      "learning_rate": 0.0003437049613982208,
      "loss": 3.0015,
      "step": 104479
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.452637195587158,
      "learning_rate": 0.0003437009144730027,
      "loss": 3.0278,
      "step": 104480
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.847447156906128,
      "learning_rate": 0.0003436968675396598,
      "loss": 3.0544,
      "step": 104481
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.011711835861206,
      "learning_rate": 0.0003436928205981927,
      "loss": 3.0629,
      "step": 104482
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.9493649005889893,
      "learning_rate": 0.0003436887736486024,
      "loss": 3.234,
      "step": 104483
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.259138822555542,
      "learning_rate": 0.00034368472669088945,
      "loss": 3.0247,
      "step": 104484
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.723140001296997,
      "learning_rate": 0.00034368067972505474,
      "loss": 2.844,
      "step": 104485
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.591047763824463,
      "learning_rate": 0.0003436766327510989,
      "loss": 2.9141,
      "step": 104486
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.424224853515625,
      "learning_rate": 0.0003436725857690228,
      "loss": 2.9688,
      "step": 104487
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.412060260772705,
      "learning_rate": 0.00034366853877882715,
      "loss": 3.0698,
      "step": 104488
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.2038040161132812,
      "learning_rate": 0.00034366449178051267,
      "loss": 2.8742,
      "step": 104489
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.799739956855774,
      "learning_rate": 0.0003436604447740801,
      "loss": 2.9774,
      "step": 104490
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.667562484741211,
      "learning_rate": 0.0003436563977595303,
      "loss": 3.1294,
      "step": 104491
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.198078155517578,
      "learning_rate": 0.000343652350736864,
      "loss": 3.0714,
      "step": 104492
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.450427293777466,
      "learning_rate": 0.00034364830370608177,
      "loss": 3.0387,
      "step": 104493
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.8579025268554688,
      "learning_rate": 0.00034364425666718457,
      "loss": 2.9373,
      "step": 104494
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.2229013442993164,
      "learning_rate": 0.0003436402096201731,
      "loss": 2.7478,
      "step": 104495
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0312323570251465,
      "learning_rate": 0.00034363616256504804,
      "loss": 3.0604,
      "step": 104496
    },
    {
      "epoch": 1.36,
      "grad_norm": 4.131098747253418,
      "learning_rate": 0.0003436321155018102,
      "loss": 3.037,
      "step": 104497
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9590251445770264,
      "learning_rate": 0.00034362806843046036,
      "loss": 2.863,
      "step": 104498
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.193744421005249,
      "learning_rate": 0.0003436240213509992,
      "loss": 2.954,
      "step": 104499
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0367774963378906,
      "learning_rate": 0.0003436199742634275,
      "loss": 3.0508,
      "step": 104500
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.514492988586426,
      "learning_rate": 0.0003436159271677461,
      "loss": 2.8037,
      "step": 104501
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.0140910148620605,
      "learning_rate": 0.0003436118800639556,
      "loss": 2.8399,
      "step": 104502
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9411672353744507,
      "learning_rate": 0.0003436078329520569,
      "loss": 3.0286,
      "step": 104503
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.2090818881988525,
      "learning_rate": 0.00034360378583205063,
      "loss": 3.0048,
      "step": 104504
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.5570764541625977,
      "learning_rate": 0.0003435997387039376,
      "loss": 2.9318,
      "step": 104505
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0240581035614014,
      "learning_rate": 0.0003435956915677185,
      "loss": 3.1198,
      "step": 104506
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.457198143005371,
      "learning_rate": 0.0003435916444233943,
      "loss": 2.9884,
      "step": 104507
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.200164794921875,
      "learning_rate": 0.00034358759727096545,
      "loss": 2.9397,
      "step": 104508
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.4682095050811768,
      "learning_rate": 0.0003435835501104328,
      "loss": 2.9141,
      "step": 104509
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.38967227935791,
      "learning_rate": 0.0003435795029417973,
      "loss": 3.1358,
      "step": 104510
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0031898021698,
      "learning_rate": 0.0003435754557650594,
      "loss": 3.0568,
      "step": 104511
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.905147671699524,
      "learning_rate": 0.0003435714085802201,
      "loss": 3.3991,
      "step": 104512
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.595712900161743,
      "learning_rate": 0.00034356736138728004,
      "loss": 3.0808,
      "step": 104513
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.678757667541504,
      "learning_rate": 0.0003435633141862399,
      "loss": 2.881,
      "step": 104514
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.3312811851501465,
      "learning_rate": 0.00034355926697710065,
      "loss": 2.8645,
      "step": 104515
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.778030514717102,
      "learning_rate": 0.0003435552197598629,
      "loss": 3.0409,
      "step": 104516
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.5245304107666016,
      "learning_rate": 0.0003435511725345273,
      "loss": 2.9807,
      "step": 104517
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.778531551361084,
      "learning_rate": 0.0003435471253010948,
      "loss": 2.8635,
      "step": 104518
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8353272676467896,
      "learning_rate": 0.00034354307805956605,
      "loss": 3.119,
      "step": 104519
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.4929349422454834,
      "learning_rate": 0.00034353903080994175,
      "loss": 2.8449,
      "step": 104520
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.639377474784851,
      "learning_rate": 0.0003435349835522228,
      "loss": 2.9159,
      "step": 104521
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.6598331928253174,
      "learning_rate": 0.00034353093628640993,
      "loss": 2.9453,
      "step": 104522
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.412782669067383,
      "learning_rate": 0.00034352688901250367,
      "loss": 3.0435,
      "step": 104523
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.82594895362854,
      "learning_rate": 0.0003435228417305051,
      "loss": 2.8034,
      "step": 104524
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0964243412017822,
      "learning_rate": 0.0003435187944404147,
      "loss": 3.0262,
      "step": 104525
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.5345332622528076,
      "learning_rate": 0.0003435147471422334,
      "loss": 3.1052,
      "step": 104526
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.152008533477783,
      "learning_rate": 0.0003435106998359619,
      "loss": 3.154,
      "step": 104527
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.608219861984253,
      "learning_rate": 0.0003435066525216008,
      "loss": 3.272,
      "step": 104528
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.58099365234375,
      "learning_rate": 0.0003435026051991512,
      "loss": 3.2923,
      "step": 104529
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8165034055709839,
      "learning_rate": 0.0003434985578686135,
      "loss": 2.9267,
      "step": 104530
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1206860542297363,
      "learning_rate": 0.00034349451052998863,
      "loss": 3.052,
      "step": 104531
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7768487930297852,
      "learning_rate": 0.0003434904631832774,
      "loss": 3.0607,
      "step": 104532
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8243712186813354,
      "learning_rate": 0.0003434864158284803,
      "loss": 3.164,
      "step": 104533
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.050588607788086,
      "learning_rate": 0.00034348236846559835,
      "loss": 3.0953,
      "step": 104534
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9776097536087036,
      "learning_rate": 0.00034347832109463225,
      "loss": 3.1065,
      "step": 104535
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6800822019577026,
      "learning_rate": 0.00034347427371558267,
      "loss": 2.8995,
      "step": 104536
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1922640800476074,
      "learning_rate": 0.00034347022632845036,
      "loss": 3.0907,
      "step": 104537
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.134341239929199,
      "learning_rate": 0.0003434661789332362,
      "loss": 3.0619,
      "step": 104538
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6375960111618042,
      "learning_rate": 0.0003434621315299408,
      "loss": 2.8831,
      "step": 104539
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1926393508911133,
      "learning_rate": 0.00034345808411856497,
      "loss": 3.1959,
      "step": 104540
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.5609676837921143,
      "learning_rate": 0.0003434540366991095,
      "loss": 2.9327,
      "step": 104541
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9822672605514526,
      "learning_rate": 0.00034344998927157504,
      "loss": 2.7831,
      "step": 104542
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.4524753093719482,
      "learning_rate": 0.0003434459418359624,
      "loss": 2.9848,
      "step": 104543
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9140492677688599,
      "learning_rate": 0.0003434418943922724,
      "loss": 3.0675,
      "step": 104544
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.427725315093994,
      "learning_rate": 0.0003434378469405058,
      "loss": 2.9597,
      "step": 104545
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.824875831604004,
      "learning_rate": 0.00034343379948066307,
      "loss": 3.0618,
      "step": 104546
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.2780351638793945,
      "learning_rate": 0.0003434297520127454,
      "loss": 3.0916,
      "step": 104547
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9000734090805054,
      "learning_rate": 0.00034342570453675317,
      "loss": 3.0748,
      "step": 104548
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0154550075531006,
      "learning_rate": 0.00034342165705268733,
      "loss": 2.8333,
      "step": 104549
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.680872917175293,
      "learning_rate": 0.0003434176095605486,
      "loss": 2.8545,
      "step": 104550
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.356719732284546,
      "learning_rate": 0.00034341356206033766,
      "loss": 3.0139,
      "step": 104551
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9169644117355347,
      "learning_rate": 0.0003434095145520554,
      "loss": 2.9176,
      "step": 104552
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.9078915119171143,
      "learning_rate": 0.00034340546703570245,
      "loss": 2.8704,
      "step": 104553
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.461550235748291,
      "learning_rate": 0.0003434014195112796,
      "loss": 3.0421,
      "step": 104554
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9137537479400635,
      "learning_rate": 0.0003433973719787876,
      "loss": 3.0838,
      "step": 104555
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0342564582824707,
      "learning_rate": 0.0003433933244382273,
      "loss": 2.9473,
      "step": 104556
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7491039037704468,
      "learning_rate": 0.0003433892768895993,
      "loss": 3.002,
      "step": 104557
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.2604386806488037,
      "learning_rate": 0.00034338522933290437,
      "loss": 3.026,
      "step": 104558
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.3767969608306885,
      "learning_rate": 0.0003433811817681433,
      "loss": 2.9195,
      "step": 104559
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.786266803741455,
      "learning_rate": 0.0003433771341953169,
      "loss": 3.0905,
      "step": 104560
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.134310483932495,
      "learning_rate": 0.0003433730866144258,
      "loss": 2.9016,
      "step": 104561
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1805684566497803,
      "learning_rate": 0.00034336903902547095,
      "loss": 3.0242,
      "step": 104562
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7219220399856567,
      "learning_rate": 0.0003433649914284529,
      "loss": 3.1947,
      "step": 104563
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6216635704040527,
      "learning_rate": 0.0003433609438233725,
      "loss": 3.2546,
      "step": 104564
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.431474208831787,
      "learning_rate": 0.00034335689621023044,
      "loss": 2.8703,
      "step": 104565
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.940030574798584,
      "learning_rate": 0.0003433528485890276,
      "loss": 2.7632,
      "step": 104566
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.604983925819397,
      "learning_rate": 0.00034334880095976454,
      "loss": 3.0773,
      "step": 104567
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8368780612945557,
      "learning_rate": 0.00034334475332244217,
      "loss": 2.6498,
      "step": 104568
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.135404586791992,
      "learning_rate": 0.00034334070567706125,
      "loss": 2.7783,
      "step": 104569
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.689702033996582,
      "learning_rate": 0.00034333665802362243,
      "loss": 3.0531,
      "step": 104570
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.874251365661621,
      "learning_rate": 0.00034333261036212647,
      "loss": 3.1684,
      "step": 104571
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.2267603874206543,
      "learning_rate": 0.0003433285626925742,
      "loss": 2.6134,
      "step": 104572
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.5504711866378784,
      "learning_rate": 0.00034332451501496627,
      "loss": 3.126,
      "step": 104573
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.3367130756378174,
      "learning_rate": 0.0003433204673293036,
      "loss": 2.7182,
      "step": 104574
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6794377565383911,
      "learning_rate": 0.00034331641963558673,
      "loss": 2.8265,
      "step": 104575
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.093982696533203,
      "learning_rate": 0.0003433123719338166,
      "loss": 2.8629,
      "step": 104576
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0339419841766357,
      "learning_rate": 0.00034330832422399386,
      "loss": 3.0364,
      "step": 104577
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8114814758300781,
      "learning_rate": 0.0003433042765061192,
      "loss": 2.9815,
      "step": 104578
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.3642702102661133,
      "learning_rate": 0.00034330022878019357,
      "loss": 2.9485,
      "step": 104579
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7208075523376465,
      "learning_rate": 0.0003432961810462176,
      "loss": 2.6312,
      "step": 104580
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.259021520614624,
      "learning_rate": 0.00034329213330419204,
      "loss": 2.8851,
      "step": 104581
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.2638628482818604,
      "learning_rate": 0.00034328808555411763,
      "loss": 3.0104,
      "step": 104582
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6811633110046387,
      "learning_rate": 0.00034328403779599524,
      "loss": 2.9812,
      "step": 104583
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.179738998413086,
      "learning_rate": 0.00034327999002982536,
      "loss": 2.9394,
      "step": 104584
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0263564586639404,
      "learning_rate": 0.000343275942255609,
      "loss": 3.1319,
      "step": 104585
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.035935401916504,
      "learning_rate": 0.00034327189447334687,
      "loss": 3.028,
      "step": 104586
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.5322821140289307,
      "learning_rate": 0.00034326784668303967,
      "loss": 3.0065,
      "step": 104587
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.65535306930542,
      "learning_rate": 0.00034326379888468807,
      "loss": 3.024,
      "step": 104588
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.5848209857940674,
      "learning_rate": 0.00034325975107829304,
      "loss": 2.8113,
      "step": 104589
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8066240549087524,
      "learning_rate": 0.00034325570326385513,
      "loss": 3.047,
      "step": 104590
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1455976963043213,
      "learning_rate": 0.0003432516554413751,
      "loss": 3.0168,
      "step": 104591
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.600399971008301,
      "learning_rate": 0.00034324760761085397,
      "loss": 2.9778,
      "step": 104592
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.5292725563049316,
      "learning_rate": 0.0003432435597722921,
      "loss": 3.1103,
      "step": 104593
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8247668743133545,
      "learning_rate": 0.0003432395119256905,
      "loss": 2.9988,
      "step": 104594
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.5623230934143066,
      "learning_rate": 0.0003432354640710498,
      "loss": 2.9052,
      "step": 104595
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.0701100826263428,
      "learning_rate": 0.000343231416208371,
      "loss": 2.8859,
      "step": 104596
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9815982580184937,
      "learning_rate": 0.00034322736833765447,
      "loss": 3.3319,
      "step": 104597
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7847011089324951,
      "learning_rate": 0.00034322332045890125,
      "loss": 2.8292,
      "step": 104598
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.089397668838501,
      "learning_rate": 0.00034321927257211204,
      "loss": 3.0821,
      "step": 104599
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.8079118728637695,
      "learning_rate": 0.0003432152246772874,
      "loss": 3.0734,
      "step": 104600
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.5947277545928955,
      "learning_rate": 0.0003432111767744283,
      "loss": 2.8526,
      "step": 104601
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7151787281036377,
      "learning_rate": 0.0003432071288635355,
      "loss": 2.8469,
      "step": 104602
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9093167781829834,
      "learning_rate": 0.0003432030809446096,
      "loss": 2.7666,
      "step": 104603
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.894961357116699,
      "learning_rate": 0.00034319903301765147,
      "loss": 2.928,
      "step": 104604
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.0085952281951904,
      "learning_rate": 0.0003431949850826618,
      "loss": 2.9193,
      "step": 104605
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9081991910934448,
      "learning_rate": 0.00034319093713964144,
      "loss": 3.0343,
      "step": 104606
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.79228675365448,
      "learning_rate": 0.00034318688918859097,
      "loss": 3.3014,
      "step": 104607
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.676835298538208,
      "learning_rate": 0.00034318284122951127,
      "loss": 2.7293,
      "step": 104608
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.907832622528076,
      "learning_rate": 0.0003431787932624031,
      "loss": 2.6341,
      "step": 104609
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.787505865097046,
      "learning_rate": 0.00034317474528726714,
      "loss": 3.024,
      "step": 104610
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.5051207542419434,
      "learning_rate": 0.0003431706973041042,
      "loss": 3.0141,
      "step": 104611
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.197554588317871,
      "learning_rate": 0.000343166649312915,
      "loss": 2.9988,
      "step": 104612
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.649790048599243,
      "learning_rate": 0.0003431626013137003,
      "loss": 3.0722,
      "step": 104613
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.447411298751831,
      "learning_rate": 0.0003431585533064609,
      "loss": 2.8696,
      "step": 104614
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9651306867599487,
      "learning_rate": 0.0003431545052911975,
      "loss": 2.914,
      "step": 104615
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.802118182182312,
      "learning_rate": 0.00034315045726791084,
      "loss": 3.0525,
      "step": 104616
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.6673851013183594,
      "learning_rate": 0.0003431464092366017,
      "loss": 2.9736,
      "step": 104617
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.059093713760376,
      "learning_rate": 0.00034314236119727077,
      "loss": 3.0829,
      "step": 104618
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7581866979599,
      "learning_rate": 0.0003431383131499189,
      "loss": 2.8336,
      "step": 104619
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.3862504959106445,
      "learning_rate": 0.0003431342650945469,
      "loss": 3.2961,
      "step": 104620
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.9954733848571777,
      "learning_rate": 0.00034313021703115534,
      "loss": 3.0136,
      "step": 104621
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0600905418395996,
      "learning_rate": 0.000343126168959745,
      "loss": 2.8768,
      "step": 104622
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0327041149139404,
      "learning_rate": 0.0003431221208803168,
      "loss": 2.7302,
      "step": 104623
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7904796600341797,
      "learning_rate": 0.00034311807279287134,
      "loss": 2.9475,
      "step": 104624
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9485743045806885,
      "learning_rate": 0.00034311402469740937,
      "loss": 3.2147,
      "step": 104625
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6676115989685059,
      "learning_rate": 0.0003431099765939318,
      "loss": 3.1071,
      "step": 104626
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.720601201057434,
      "learning_rate": 0.00034310592848243917,
      "loss": 3.0344,
      "step": 104627
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9743974208831787,
      "learning_rate": 0.00034310188036293235,
      "loss": 3.1878,
      "step": 104628
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.591369867324829,
      "learning_rate": 0.0003430978322354122,
      "loss": 2.8028,
      "step": 104629
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7936981916427612,
      "learning_rate": 0.0003430937840998792,
      "loss": 2.9404,
      "step": 104630
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7439779043197632,
      "learning_rate": 0.0003430897359563342,
      "loss": 3.0109,
      "step": 104631
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.069866895675659,
      "learning_rate": 0.00034308568780477816,
      "loss": 3.0484,
      "step": 104632
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7609249353408813,
      "learning_rate": 0.00034308163964521165,
      "loss": 3.0738,
      "step": 104633
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.680879831314087,
      "learning_rate": 0.0003430775914776354,
      "loss": 3.0261,
      "step": 104634
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7155693769454956,
      "learning_rate": 0.00034307354330205027,
      "loss": 3.2393,
      "step": 104635
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.5596188306808472,
      "learning_rate": 0.0003430694951184569,
      "loss": 2.8793,
      "step": 104636
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.676456332206726,
      "learning_rate": 0.00034306544692685613,
      "loss": 3.1238,
      "step": 104637
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.023566961288452,
      "learning_rate": 0.0003430613987272487,
      "loss": 3.0057,
      "step": 104638
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6444766521453857,
      "learning_rate": 0.00034305735051963525,
      "loss": 2.9609,
      "step": 104639
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0908517837524414,
      "learning_rate": 0.0003430533023040167,
      "loss": 2.9738,
      "step": 104640
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9802789688110352,
      "learning_rate": 0.0003430492540803937,
      "loss": 3.0648,
      "step": 104641
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.144887924194336,
      "learning_rate": 0.000343045205848767,
      "loss": 2.8502,
      "step": 104642
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.157578945159912,
      "learning_rate": 0.0003430411576091374,
      "loss": 3.0581,
      "step": 104643
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9774190187454224,
      "learning_rate": 0.0003430371093615058,
      "loss": 3.1637,
      "step": 104644
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.020503282546997,
      "learning_rate": 0.0003430330611058726,
      "loss": 3.1736,
      "step": 104645
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6629626750946045,
      "learning_rate": 0.0003430290128422388,
      "loss": 2.9563,
      "step": 104646
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6553564071655273,
      "learning_rate": 0.0003430249645706051,
      "loss": 2.974,
      "step": 104647
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.771666169166565,
      "learning_rate": 0.00034302091629097217,
      "loss": 3.0631,
      "step": 104648
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9916701316833496,
      "learning_rate": 0.00034301686800334094,
      "loss": 3.0888,
      "step": 104649
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.3467791080474854,
      "learning_rate": 0.0003430128197077121,
      "loss": 2.8689,
      "step": 104650
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6764096021652222,
      "learning_rate": 0.00034300877140408623,
      "loss": 2.7899,
      "step": 104651
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.446566104888916,
      "learning_rate": 0.0003430047230924643,
      "loss": 2.8461,
      "step": 104652
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.818160891532898,
      "learning_rate": 0.0003430006747728469,
      "loss": 3.0185,
      "step": 104653
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6430332660675049,
      "learning_rate": 0.00034299662644523497,
      "loss": 3.0112,
      "step": 104654
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6993108987808228,
      "learning_rate": 0.0003429925781096291,
      "loss": 2.9558,
      "step": 104655
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7519115209579468,
      "learning_rate": 0.00034298852976603005,
      "loss": 3.0115,
      "step": 104656
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6574208736419678,
      "learning_rate": 0.00034298448141443876,
      "loss": 3.1367,
      "step": 104657
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6596944332122803,
      "learning_rate": 0.00034298043305485575,
      "loss": 2.7677,
      "step": 104658
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7641838788986206,
      "learning_rate": 0.00034297638468728186,
      "loss": 2.7833,
      "step": 104659
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9623055458068848,
      "learning_rate": 0.0003429723363117179,
      "loss": 3.2093,
      "step": 104660
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9192613363265991,
      "learning_rate": 0.0003429682879281645,
      "loss": 2.9262,
      "step": 104661
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.337839365005493,
      "learning_rate": 0.00034296423953662254,
      "loss": 3.0631,
      "step": 104662
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.010010004043579,
      "learning_rate": 0.00034296019113709265,
      "loss": 2.9875,
      "step": 104663
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9867208003997803,
      "learning_rate": 0.0003429561427295757,
      "loss": 2.8786,
      "step": 104664
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0121262073516846,
      "learning_rate": 0.0003429520943140724,
      "loss": 3.044,
      "step": 104665
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6539859771728516,
      "learning_rate": 0.00034294804589058347,
      "loss": 3.1382,
      "step": 104666
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.857040524482727,
      "learning_rate": 0.0003429439974591097,
      "loss": 3.048,
      "step": 104667
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0377886295318604,
      "learning_rate": 0.0003429399490196519,
      "loss": 2.9285,
      "step": 104668
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.960015058517456,
      "learning_rate": 0.00034293590057221066,
      "loss": 2.9752,
      "step": 104669
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.5607017278671265,
      "learning_rate": 0.00034293185211678684,
      "loss": 2.9783,
      "step": 104670
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.944968342781067,
      "learning_rate": 0.0003429278036533811,
      "loss": 2.8575,
      "step": 104671
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.383911609649658,
      "learning_rate": 0.0003429237551819945,
      "loss": 2.9295,
      "step": 104672
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.435662269592285,
      "learning_rate": 0.00034291970670262737,
      "loss": 3.0704,
      "step": 104673
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0442745685577393,
      "learning_rate": 0.0003429156582152807,
      "loss": 3.1701,
      "step": 104674
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0905652046203613,
      "learning_rate": 0.00034291160971995524,
      "loss": 2.861,
      "step": 104675
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1266465187072754,
      "learning_rate": 0.00034290756121665164,
      "loss": 3.1372,
      "step": 104676
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7048819065093994,
      "learning_rate": 0.00034290351270537076,
      "loss": 2.8231,
      "step": 104677
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8702913522720337,
      "learning_rate": 0.0003428994641861133,
      "loss": 2.9908,
      "step": 104678
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6276583671569824,
      "learning_rate": 0.00034289541565888,
      "loss": 3.1207,
      "step": 104679
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.0467371940612793,
      "learning_rate": 0.00034289136712367164,
      "loss": 2.9156,
      "step": 104680
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.725346326828003,
      "learning_rate": 0.00034288731858048903,
      "loss": 3.1844,
      "step": 104681
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.479949712753296,
      "learning_rate": 0.00034288327002933276,
      "loss": 2.9777,
      "step": 104682
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0466225147247314,
      "learning_rate": 0.0003428792214702037,
      "loss": 2.7397,
      "step": 104683
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8939439058303833,
      "learning_rate": 0.00034287517290310267,
      "loss": 2.8,
      "step": 104684
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9647759199142456,
      "learning_rate": 0.0003428711243280302,
      "loss": 3.0922,
      "step": 104685
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.760040521621704,
      "learning_rate": 0.0003428670757449872,
      "loss": 3.0572,
      "step": 104686
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1690902709960938,
      "learning_rate": 0.00034286302715397454,
      "loss": 2.8139,
      "step": 104687
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1387970447540283,
      "learning_rate": 0.0003428589785549927,
      "loss": 3.023,
      "step": 104688
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9511111974716187,
      "learning_rate": 0.0003428549299480426,
      "loss": 3.1076,
      "step": 104689
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6333268880844116,
      "learning_rate": 0.000342850881333125,
      "loss": 3.0309,
      "step": 104690
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.6841940879821777,
      "learning_rate": 0.0003428468327102406,
      "loss": 2.7821,
      "step": 104691
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.288419485092163,
      "learning_rate": 0.00034284278407939017,
      "loss": 3.1856,
      "step": 104692
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8501888513565063,
      "learning_rate": 0.0003428387354405744,
      "loss": 2.754,
      "step": 104693
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.404019832611084,
      "learning_rate": 0.0003428346867937941,
      "loss": 2.8205,
      "step": 104694
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.941688895225525,
      "learning_rate": 0.00034283063813905003,
      "loss": 3.1066,
      "step": 104695
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.619577169418335,
      "learning_rate": 0.000342826589476343,
      "loss": 2.9731,
      "step": 104696
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0686163902282715,
      "learning_rate": 0.0003428225408056737,
      "loss": 3.0647,
      "step": 104697
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1025431156158447,
      "learning_rate": 0.00034281849212704284,
      "loss": 2.941,
      "step": 104698
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6642320156097412,
      "learning_rate": 0.00034281444344045123,
      "loss": 3.0162,
      "step": 104699
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9026176929473877,
      "learning_rate": 0.0003428103947458996,
      "loss": 3.2715,
      "step": 104700
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6307939291000366,
      "learning_rate": 0.0003428063460433887,
      "loss": 3.1937,
      "step": 104701
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.4451701641082764,
      "learning_rate": 0.00034280229733291926,
      "loss": 3.056,
      "step": 104702
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.033191680908203,
      "learning_rate": 0.0003427982486144921,
      "loss": 3.1202,
      "step": 104703
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7147265672683716,
      "learning_rate": 0.00034279419988810796,
      "loss": 3.1604,
      "step": 104704
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.606274962425232,
      "learning_rate": 0.0003427901511537676,
      "loss": 3.2847,
      "step": 104705
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.6669325828552246,
      "learning_rate": 0.00034278610241147164,
      "loss": 3.2727,
      "step": 104706
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.020136594772339,
      "learning_rate": 0.000342782053661221,
      "loss": 3.0882,
      "step": 104707
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.116434335708618,
      "learning_rate": 0.0003427780049030164,
      "loss": 3.0825,
      "step": 104708
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7661536931991577,
      "learning_rate": 0.0003427739561368585,
      "loss": 2.9706,
      "step": 104709
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8681014776229858,
      "learning_rate": 0.0003427699073627481,
      "loss": 2.9108,
      "step": 104710
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.252039670944214,
      "learning_rate": 0.000342765858580686,
      "loss": 2.8271,
      "step": 104711
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.606970191001892,
      "learning_rate": 0.00034276180979067296,
      "loss": 2.7996,
      "step": 104712
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.6520614624023438,
      "learning_rate": 0.0003427577609927096,
      "loss": 2.9951,
      "step": 104713
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.200017213821411,
      "learning_rate": 0.0003427537121867969,
      "loss": 3.1931,
      "step": 104714
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.7824270725250244,
      "learning_rate": 0.00034274966337293535,
      "loss": 3.0287,
      "step": 104715
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.0803115367889404,
      "learning_rate": 0.0003427456145511259,
      "loss": 3.036,
      "step": 104716
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.108759880065918,
      "learning_rate": 0.0003427415657213692,
      "loss": 2.8212,
      "step": 104717
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.148491621017456,
      "learning_rate": 0.00034273751688366604,
      "loss": 3.1283,
      "step": 104718
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.282944917678833,
      "learning_rate": 0.00034273346803801716,
      "loss": 2.7562,
      "step": 104719
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6613011360168457,
      "learning_rate": 0.00034272941918442334,
      "loss": 2.9037,
      "step": 104720
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9021703004837036,
      "learning_rate": 0.0003427253703228853,
      "loss": 3.0004,
      "step": 104721
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.2395596504211426,
      "learning_rate": 0.0003427213214534038,
      "loss": 3.2761,
      "step": 104722
    },
    {
      "epoch": 1.36,
      "grad_norm": 4.059601783752441,
      "learning_rate": 0.00034271727257597967,
      "loss": 2.8484,
      "step": 104723
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.048588275909424,
      "learning_rate": 0.00034271322369061353,
      "loss": 2.9742,
      "step": 104724
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8816008567810059,
      "learning_rate": 0.0003427091747973062,
      "loss": 3.0091,
      "step": 104725
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7670834064483643,
      "learning_rate": 0.0003427051258960584,
      "loss": 3.2609,
      "step": 104726
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7289022207260132,
      "learning_rate": 0.0003427010769868709,
      "loss": 2.7306,
      "step": 104727
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8320766687393188,
      "learning_rate": 0.0003426970280697446,
      "loss": 3.0783,
      "step": 104728
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.816947102546692,
      "learning_rate": 0.00034269297914468,
      "loss": 2.9092,
      "step": 104729
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.116292715072632,
      "learning_rate": 0.000342688930211678,
      "loss": 2.8636,
      "step": 104730
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9189083576202393,
      "learning_rate": 0.00034268488127073927,
      "loss": 2.8456,
      "step": 104731
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.4167065620422363,
      "learning_rate": 0.0003426808323218647,
      "loss": 2.8145,
      "step": 104732
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8551008701324463,
      "learning_rate": 0.00034267678336505494,
      "loss": 2.9267,
      "step": 104733
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7850710153579712,
      "learning_rate": 0.0003426727344003107,
      "loss": 3.0326,
      "step": 104734
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8277347087860107,
      "learning_rate": 0.00034266868542763286,
      "loss": 2.9426,
      "step": 104735
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.184032440185547,
      "learning_rate": 0.0003426646364470221,
      "loss": 2.8778,
      "step": 104736
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.7436881065368652,
      "learning_rate": 0.0003426605874584791,
      "loss": 2.9849,
      "step": 104737
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.038975238800049,
      "learning_rate": 0.00034265653846200474,
      "loss": 3.132,
      "step": 104738
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.3848226070404053,
      "learning_rate": 0.0003426524894575998,
      "loss": 2.9859,
      "step": 104739
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.7245090007781982,
      "learning_rate": 0.00034264844044526487,
      "loss": 2.8914,
      "step": 104740
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.138232707977295,
      "learning_rate": 0.0003426443914250008,
      "loss": 3.1751,
      "step": 104741
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0266542434692383,
      "learning_rate": 0.0003426403423968084,
      "loss": 3.1472,
      "step": 104742
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8442447185516357,
      "learning_rate": 0.00034263629336068823,
      "loss": 2.8276,
      "step": 104743
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8969014883041382,
      "learning_rate": 0.0003426322443166412,
      "loss": 3.3275,
      "step": 104744
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9298417568206787,
      "learning_rate": 0.0003426281952646681,
      "loss": 3.1715,
      "step": 104745
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8540966510772705,
      "learning_rate": 0.00034262414620476957,
      "loss": 2.9238,
      "step": 104746
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1158571243286133,
      "learning_rate": 0.0003426200971369464,
      "loss": 3.0816,
      "step": 104747
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.5884807109832764,
      "learning_rate": 0.0003426160480611994,
      "loss": 3.082,
      "step": 104748
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6374216079711914,
      "learning_rate": 0.00034261199897752927,
      "loss": 2.9619,
      "step": 104749
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6544530391693115,
      "learning_rate": 0.0003426079498859367,
      "loss": 3.1116,
      "step": 104750
    },
    {
      "epoch": 1.36,
      "grad_norm": 4.680375099182129,
      "learning_rate": 0.00034260390078642253,
      "loss": 3.0699,
      "step": 104751
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.433112621307373,
      "learning_rate": 0.00034259985167898744,
      "loss": 2.9322,
      "step": 104752
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0471155643463135,
      "learning_rate": 0.00034259580256363237,
      "loss": 2.9413,
      "step": 104753
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7150323390960693,
      "learning_rate": 0.00034259175344035785,
      "loss": 2.9519,
      "step": 104754
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.3711278438568115,
      "learning_rate": 0.0003425877043091647,
      "loss": 3.0394,
      "step": 104755
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.9873034954071045,
      "learning_rate": 0.0003425836551700537,
      "loss": 2.9687,
      "step": 104756
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9919477701187134,
      "learning_rate": 0.0003425796060230257,
      "loss": 3.3191,
      "step": 104757
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8857159614562988,
      "learning_rate": 0.00034257555686808123,
      "loss": 2.9291,
      "step": 104758
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9671186208724976,
      "learning_rate": 0.00034257150770522114,
      "loss": 2.9801,
      "step": 104759
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9411015510559082,
      "learning_rate": 0.00034256745853444636,
      "loss": 3.1747,
      "step": 104760
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.659629225730896,
      "learning_rate": 0.00034256340935575735,
      "loss": 2.9625,
      "step": 104761
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9108061790466309,
      "learning_rate": 0.00034255936016915496,
      "loss": 3.133,
      "step": 104762
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9031962156295776,
      "learning_rate": 0.00034255531097464017,
      "loss": 2.9132,
      "step": 104763
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7548880577087402,
      "learning_rate": 0.0003425512617722134,
      "loss": 3.0898,
      "step": 104764
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.4176688194274902,
      "learning_rate": 0.00034254721256187556,
      "loss": 2.8512,
      "step": 104765
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7803783416748047,
      "learning_rate": 0.0003425431633436274,
      "loss": 3.0806,
      "step": 104766
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.745537519454956,
      "learning_rate": 0.0003425391141174697,
      "loss": 3.2249,
      "step": 104767
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.8199944496154785,
      "learning_rate": 0.0003425350648834031,
      "loss": 2.9263,
      "step": 104768
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6959166526794434,
      "learning_rate": 0.0003425310156414285,
      "loss": 2.8713,
      "step": 104769
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.3237788677215576,
      "learning_rate": 0.0003425269663915466,
      "loss": 2.5024,
      "step": 104770
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8142071962356567,
      "learning_rate": 0.00034252291713375804,
      "loss": 3.1544,
      "step": 104771
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.5583997964859009,
      "learning_rate": 0.00034251886786806375,
      "loss": 3.2389,
      "step": 104772
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1591761112213135,
      "learning_rate": 0.00034251481859446433,
      "loss": 3.0543,
      "step": 104773
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1937170028686523,
      "learning_rate": 0.00034251076931296064,
      "loss": 2.7646,
      "step": 104774
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7759084701538086,
      "learning_rate": 0.00034250672002355344,
      "loss": 3.0383,
      "step": 104775
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.3930656909942627,
      "learning_rate": 0.0003425026707262434,
      "loss": 3.1529,
      "step": 104776
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.585986852645874,
      "learning_rate": 0.0003424986214210312,
      "loss": 2.886,
      "step": 104777
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7206902503967285,
      "learning_rate": 0.0003424945721079179,
      "loss": 2.9083,
      "step": 104778
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.862418293952942,
      "learning_rate": 0.0003424905227869039,
      "loss": 3.031,
      "step": 104779
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.417947292327881,
      "learning_rate": 0.0003424864734579902,
      "loss": 2.9336,
      "step": 104780
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6343882083892822,
      "learning_rate": 0.0003424824241211775,
      "loss": 3.1022,
      "step": 104781
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7824773788452148,
      "learning_rate": 0.0003424783747764664,
      "loss": 2.9575,
      "step": 104782
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7443867921829224,
      "learning_rate": 0.0003424743254238578,
      "loss": 2.8088,
      "step": 104783
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.0923197269439697,
      "learning_rate": 0.00034247027606335245,
      "loss": 3.0278,
      "step": 104784
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8078898191452026,
      "learning_rate": 0.0003424662266949511,
      "loss": 3.1017,
      "step": 104785
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.2786478996276855,
      "learning_rate": 0.0003424621773186544,
      "loss": 3.1004,
      "step": 104786
    },
    {
      "epoch": 1.36,
      "grad_norm": 4.731578826904297,
      "learning_rate": 0.0003424581279344633,
      "loss": 2.7427,
      "step": 104787
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.6398208141326904,
      "learning_rate": 0.00034245407854237834,
      "loss": 2.9644,
      "step": 104788
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.211731433868408,
      "learning_rate": 0.0003424500291424003,
      "loss": 2.8395,
      "step": 104789
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.323942184448242,
      "learning_rate": 0.00034244597973453014,
      "loss": 2.8663,
      "step": 104790
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.805680513381958,
      "learning_rate": 0.0003424419303187684,
      "loss": 2.9796,
      "step": 104791
    },
    {
      "epoch": 1.36,
      "grad_norm": 4.502284526824951,
      "learning_rate": 0.0003424378808951159,
      "loss": 2.8984,
      "step": 104792
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1167049407958984,
      "learning_rate": 0.00034243383146357335,
      "loss": 2.7776,
      "step": 104793
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.6767420768737793,
      "learning_rate": 0.0003424297820241417,
      "loss": 2.6892,
      "step": 104794
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9318814277648926,
      "learning_rate": 0.00034242573257682146,
      "loss": 2.8261,
      "step": 104795
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.8994476795196533,
      "learning_rate": 0.00034242168312161346,
      "loss": 3.2108,
      "step": 104796
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.4291319847106934,
      "learning_rate": 0.00034241763365851853,
      "loss": 2.9555,
      "step": 104797
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8330334424972534,
      "learning_rate": 0.0003424135841875373,
      "loss": 2.7654,
      "step": 104798
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7370929718017578,
      "learning_rate": 0.0003424095347086706,
      "loss": 2.9785,
      "step": 104799
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8473573923110962,
      "learning_rate": 0.0003424054852219192,
      "loss": 2.89,
      "step": 104800
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.6009886264801025,
      "learning_rate": 0.0003424014357272838,
      "loss": 3.1647,
      "step": 104801
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1235289573669434,
      "learning_rate": 0.00034239738622476504,
      "loss": 2.7375,
      "step": 104802
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9414422512054443,
      "learning_rate": 0.00034239333671436405,
      "loss": 2.9927,
      "step": 104803
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8263365030288696,
      "learning_rate": 0.0003423892871960811,
      "loss": 2.9307,
      "step": 104804
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.6060761213302612,
      "learning_rate": 0.00034238523766991734,
      "loss": 2.9232,
      "step": 104805
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.092327117919922,
      "learning_rate": 0.00034238118813587336,
      "loss": 2.9605,
      "step": 104806
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.7238444089889526,
      "learning_rate": 0.00034237713859394984,
      "loss": 3.0009,
      "step": 104807
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.114459991455078,
      "learning_rate": 0.00034237308904414764,
      "loss": 2.9573,
      "step": 104808
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8296382427215576,
      "learning_rate": 0.0003423690394864675,
      "loss": 2.7707,
      "step": 104809
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.248713493347168,
      "learning_rate": 0.00034236498992091017,
      "loss": 2.9534,
      "step": 104810
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.9499064683914185,
      "learning_rate": 0.0003423609403474763,
      "loss": 2.9937,
      "step": 104811
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.8425127267837524,
      "learning_rate": 0.0003423568907661669,
      "loss": 2.8534,
      "step": 104812
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0654971599578857,
      "learning_rate": 0.0003423528411769824,
      "loss": 2.7893,
      "step": 104813
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.3022079467773438,
      "learning_rate": 0.00034234879157992376,
      "loss": 3.2411,
      "step": 104814
    },
    {
      "epoch": 1.36,
      "grad_norm": 4.746435642242432,
      "learning_rate": 0.0003423447419749917,
      "loss": 2.9622,
      "step": 104815
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.751936912536621,
      "learning_rate": 0.00034234069236218696,
      "loss": 3.0482,
      "step": 104816
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.0504603385925293,
      "learning_rate": 0.00034233664274151024,
      "loss": 2.88,
      "step": 104817
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.003152847290039,
      "learning_rate": 0.0003423325931129624,
      "loss": 3.0256,
      "step": 104818
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.6779959201812744,
      "learning_rate": 0.0003423285434765441,
      "loss": 2.8778,
      "step": 104819
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.570589065551758,
      "learning_rate": 0.00034232449383225604,
      "loss": 3.1119,
      "step": 104820
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.1348538398742676,
      "learning_rate": 0.00034232044418009913,
      "loss": 3.0365,
      "step": 104821
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.792513608932495,
      "learning_rate": 0.0003423163945200741,
      "loss": 2.5768,
      "step": 104822
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.4279963970184326,
      "learning_rate": 0.00034231234485218153,
      "loss": 2.9662,
      "step": 104823
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.7110793590545654,
      "learning_rate": 0.00034230829517642247,
      "loss": 3.1132,
      "step": 104824
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.411862850189209,
      "learning_rate": 0.00034230424549279735,
      "loss": 2.9958,
      "step": 104825
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.4039411544799805,
      "learning_rate": 0.00034230019580130716,
      "loss": 2.8136,
      "step": 104826
    },
    {
      "epoch": 1.36,
      "grad_norm": 4.966163635253906,
      "learning_rate": 0.0003422961461019525,
      "loss": 2.8765,
      "step": 104827
    },
    {
      "epoch": 1.36,
      "grad_norm": 4.211655616760254,
      "learning_rate": 0.00034229209639473427,
      "loss": 2.7827,
      "step": 104828
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.7618212699890137,
      "learning_rate": 0.00034228804667965304,
      "loss": 2.9565,
      "step": 104829
    },
    {
      "epoch": 1.36,
      "grad_norm": 1.78629469871521,
      "learning_rate": 0.0003422839969567097,
      "loss": 3.0146,
      "step": 104830
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.248502254486084,
      "learning_rate": 0.000342279947225905,
      "loss": 2.8094,
      "step": 104831
    },
    {
      "epoch": 1.36,
      "grad_norm": 3.959986686706543,
      "learning_rate": 0.0003422758974872396,
      "loss": 2.9161,
      "step": 104832
    },
    {
      "epoch": 1.36,
      "grad_norm": 2.81198787689209,
      "learning_rate": 0.00034227184774071433,
      "loss": 3.0366,
      "step": 104833
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0975537300109863,
      "learning_rate": 0.00034226779798632995,
      "loss": 3.2307,
      "step": 104834
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1326191425323486,
      "learning_rate": 0.0003422637482240872,
      "loss": 3.2253,
      "step": 104835
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7348846197128296,
      "learning_rate": 0.00034225969845398684,
      "loss": 3.1005,
      "step": 104836
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.5783543586730957,
      "learning_rate": 0.0003422556486760295,
      "loss": 2.9281,
      "step": 104837
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6832334995269775,
      "learning_rate": 0.00034225159889021613,
      "loss": 3.0211,
      "step": 104838
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7654391527175903,
      "learning_rate": 0.00034224754909654737,
      "loss": 2.8726,
      "step": 104839
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.7366411685943604,
      "learning_rate": 0.000342243499295024,
      "loss": 3.0449,
      "step": 104840
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.9197428226470947,
      "learning_rate": 0.0003422394494856468,
      "loss": 3.307,
      "step": 104841
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8047045469284058,
      "learning_rate": 0.00034223539966841645,
      "loss": 3.1716,
      "step": 104842
    },
    {
      "epoch": 1.37,
      "grad_norm": 4.103753566741943,
      "learning_rate": 0.0003422313498433337,
      "loss": 3.1208,
      "step": 104843
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2880120277404785,
      "learning_rate": 0.00034222730001039935,
      "loss": 3.1879,
      "step": 104844
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9801682233810425,
      "learning_rate": 0.00034222325016961425,
      "loss": 2.8082,
      "step": 104845
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8998712301254272,
      "learning_rate": 0.00034221920032097893,
      "loss": 3.2019,
      "step": 104846
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6877895593643188,
      "learning_rate": 0.00034221515046449426,
      "loss": 2.9498,
      "step": 104847
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.332428455352783,
      "learning_rate": 0.0003422111006001611,
      "loss": 2.9914,
      "step": 104848
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1996347904205322,
      "learning_rate": 0.00034220705072798006,
      "loss": 3.0902,
      "step": 104849
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.747874140739441,
      "learning_rate": 0.00034220300084795193,
      "loss": 3.1382,
      "step": 104850
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7235748767852783,
      "learning_rate": 0.00034219895096007744,
      "loss": 2.89,
      "step": 104851
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8058722019195557,
      "learning_rate": 0.0003421949010643574,
      "loss": 2.8973,
      "step": 104852
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7766454219818115,
      "learning_rate": 0.0003421908511607925,
      "loss": 2.9093,
      "step": 104853
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8687704801559448,
      "learning_rate": 0.0003421868012493836,
      "loss": 2.8883,
      "step": 104854
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.534097909927368,
      "learning_rate": 0.0003421827513301313,
      "loss": 2.8665,
      "step": 104855
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6613881587982178,
      "learning_rate": 0.0003421787014030364,
      "loss": 3.3266,
      "step": 104856
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.760032057762146,
      "learning_rate": 0.00034217465146809984,
      "loss": 2.9941,
      "step": 104857
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.857661485671997,
      "learning_rate": 0.00034217060152532205,
      "loss": 3.1389,
      "step": 104858
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.462463617324829,
      "learning_rate": 0.000342166551574704,
      "loss": 3.1081,
      "step": 104859
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6011228561401367,
      "learning_rate": 0.0003421625016162464,
      "loss": 2.9106,
      "step": 104860
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.706637978553772,
      "learning_rate": 0.00034215845164995004,
      "loss": 2.9239,
      "step": 104861
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.230896234512329,
      "learning_rate": 0.00034215440167581555,
      "loss": 3.0056,
      "step": 104862
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.5971665382385254,
      "learning_rate": 0.00034215035169384375,
      "loss": 3.1618,
      "step": 104863
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.12636399269104,
      "learning_rate": 0.0003421463017040355,
      "loss": 2.826,
      "step": 104864
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9654781818389893,
      "learning_rate": 0.0003421422517063914,
      "loss": 2.8241,
      "step": 104865
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.930438280105591,
      "learning_rate": 0.00034213820170091223,
      "loss": 2.9948,
      "step": 104866
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.005650281906128,
      "learning_rate": 0.0003421341516875989,
      "loss": 2.822,
      "step": 104867
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7883872985839844,
      "learning_rate": 0.0003421301016664519,
      "loss": 3.2891,
      "step": 104868
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0592010021209717,
      "learning_rate": 0.00034212605163747213,
      "loss": 3.0125,
      "step": 104869
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1177704334259033,
      "learning_rate": 0.0003421220016006604,
      "loss": 3.0494,
      "step": 104870
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8606418371200562,
      "learning_rate": 0.0003421179515560174,
      "loss": 3.0001,
      "step": 104871
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9755113124847412,
      "learning_rate": 0.00034211390150354375,
      "loss": 2.9627,
      "step": 104872
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1353373527526855,
      "learning_rate": 0.0003421098514432405,
      "loss": 3.0295,
      "step": 104873
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.022312879562378,
      "learning_rate": 0.0003421058013751082,
      "loss": 2.9133,
      "step": 104874
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1745617389678955,
      "learning_rate": 0.0003421017512991475,
      "loss": 2.718,
      "step": 104875
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3432939052581787,
      "learning_rate": 0.0003420977012153594,
      "loss": 3.0791,
      "step": 104876
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8930312395095825,
      "learning_rate": 0.0003420936511237445,
      "loss": 2.9836,
      "step": 104877
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.6905453205108643,
      "learning_rate": 0.00034208960102430363,
      "loss": 2.9114,
      "step": 104878
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.914262056350708,
      "learning_rate": 0.0003420855509170376,
      "loss": 2.9602,
      "step": 104879
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9097814559936523,
      "learning_rate": 0.00034208150080194694,
      "loss": 3.0269,
      "step": 104880
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.054758310317993,
      "learning_rate": 0.0003420774506790325,
      "loss": 3.2206,
      "step": 104881
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2168185710906982,
      "learning_rate": 0.00034207340054829514,
      "loss": 3.022,
      "step": 104882
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7640074491500854,
      "learning_rate": 0.00034206935040973554,
      "loss": 2.7813,
      "step": 104883
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.921195149421692,
      "learning_rate": 0.0003420653002633544,
      "loss": 2.9143,
      "step": 104884
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.5341598987579346,
      "learning_rate": 0.0003420612501091527,
      "loss": 3.0889,
      "step": 104885
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1653695106506348,
      "learning_rate": 0.00034205719994713084,
      "loss": 2.9643,
      "step": 104886
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.6363470554351807,
      "learning_rate": 0.00034205314977728973,
      "loss": 3.0772,
      "step": 104887
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9889764785766602,
      "learning_rate": 0.0003420490995996303,
      "loss": 3.04,
      "step": 104888
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6598494052886963,
      "learning_rate": 0.0003420450494141531,
      "loss": 2.9114,
      "step": 104889
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.697506070137024,
      "learning_rate": 0.0003420409992208588,
      "loss": 2.8729,
      "step": 104890
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.271601438522339,
      "learning_rate": 0.0003420369490197484,
      "loss": 2.9839,
      "step": 104891
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.625218629837036,
      "learning_rate": 0.0003420328988108225,
      "loss": 2.8948,
      "step": 104892
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7848631143569946,
      "learning_rate": 0.0003420288485940819,
      "loss": 3.0513,
      "step": 104893
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.5053277015686035,
      "learning_rate": 0.00034202479836952746,
      "loss": 3.1706,
      "step": 104894
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7424306869506836,
      "learning_rate": 0.0003420207481371597,
      "loss": 3.199,
      "step": 104895
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8870213031768799,
      "learning_rate": 0.00034201669789697946,
      "loss": 2.929,
      "step": 104896
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.5810413360595703,
      "learning_rate": 0.0003420126476489876,
      "loss": 3.213,
      "step": 104897
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0418500900268555,
      "learning_rate": 0.00034200859739318476,
      "loss": 2.9174,
      "step": 104898
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.4723843336105347,
      "learning_rate": 0.00034200454712957166,
      "loss": 3.0345,
      "step": 104899
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.643585205078125,
      "learning_rate": 0.0003420004968581493,
      "loss": 3.1644,
      "step": 104900
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.882382869720459,
      "learning_rate": 0.0003419964465789181,
      "loss": 2.8988,
      "step": 104901
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8922326564788818,
      "learning_rate": 0.000341992396291879,
      "loss": 2.9411,
      "step": 104902
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.729935646057129,
      "learning_rate": 0.0003419883459970328,
      "loss": 3.0434,
      "step": 104903
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.143089771270752,
      "learning_rate": 0.0003419842956943801,
      "loss": 2.8666,
      "step": 104904
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7859257459640503,
      "learning_rate": 0.00034198024538392165,
      "loss": 3.2463,
      "step": 104905
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.7291598320007324,
      "learning_rate": 0.00034197619506565847,
      "loss": 3.025,
      "step": 104906
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3397884368896484,
      "learning_rate": 0.000341972144739591,
      "loss": 2.8423,
      "step": 104907
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7718173265457153,
      "learning_rate": 0.0003419680944057201,
      "loss": 3.0153,
      "step": 104908
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.6614458560943604,
      "learning_rate": 0.00034196404406404657,
      "loss": 3.0047,
      "step": 104909
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.5861284732818604,
      "learning_rate": 0.00034195999371457116,
      "loss": 2.7901,
      "step": 104910
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6614899635314941,
      "learning_rate": 0.00034195594335729453,
      "loss": 2.9514,
      "step": 104911
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8288497924804688,
      "learning_rate": 0.0003419518929922176,
      "loss": 2.9955,
      "step": 104912
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.606263279914856,
      "learning_rate": 0.00034194784261934094,
      "loss": 3.0549,
      "step": 104913
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9951342344284058,
      "learning_rate": 0.0003419437922386654,
      "loss": 3.1367,
      "step": 104914
    },
    {
      "epoch": 1.37,
      "grad_norm": 4.434761047363281,
      "learning_rate": 0.00034193974185019177,
      "loss": 2.8712,
      "step": 104915
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.0292155742645264,
      "learning_rate": 0.0003419356914539206,
      "loss": 3.3086,
      "step": 104916
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6729601621627808,
      "learning_rate": 0.000341931641049853,
      "loss": 2.8477,
      "step": 104917
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.7303683757781982,
      "learning_rate": 0.0003419275906379893,
      "loss": 2.8861,
      "step": 104918
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.4585447311401367,
      "learning_rate": 0.0003419235402183306,
      "loss": 3.0991,
      "step": 104919
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.4813499450683594,
      "learning_rate": 0.00034191948979087755,
      "loss": 2.8947,
      "step": 104920
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6863151788711548,
      "learning_rate": 0.0003419154393556308,
      "loss": 2.9579,
      "step": 104921
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.9661734104156494,
      "learning_rate": 0.0003419113889125913,
      "loss": 3.0248,
      "step": 104922
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.671696424484253,
      "learning_rate": 0.00034190733846175956,
      "loss": 2.8947,
      "step": 104923
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1031928062438965,
      "learning_rate": 0.00034190328800313643,
      "loss": 3.2037,
      "step": 104924
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8950051069259644,
      "learning_rate": 0.0003418992375367227,
      "loss": 3.2439,
      "step": 104925
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9159437417984009,
      "learning_rate": 0.00034189518706251917,
      "loss": 2.9353,
      "step": 104926
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.5268094539642334,
      "learning_rate": 0.0003418911365805266,
      "loss": 2.9537,
      "step": 104927
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0197367668151855,
      "learning_rate": 0.0003418870860907455,
      "loss": 3.1671,
      "step": 104928
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.304687738418579,
      "learning_rate": 0.0003418830355931769,
      "loss": 3.0408,
      "step": 104929
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1398630142211914,
      "learning_rate": 0.00034187898508782153,
      "loss": 2.7592,
      "step": 104930
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.610393524169922,
      "learning_rate": 0.0003418749345746799,
      "loss": 2.9339,
      "step": 104931
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.6627533435821533,
      "learning_rate": 0.00034187088405375303,
      "loss": 2.9857,
      "step": 104932
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.409262180328369,
      "learning_rate": 0.0003418668335250416,
      "loss": 2.8917,
      "step": 104933
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8013653755187988,
      "learning_rate": 0.00034186278298854625,
      "loss": 3.0319,
      "step": 104934
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6764317750930786,
      "learning_rate": 0.00034185873244426784,
      "loss": 2.8727,
      "step": 104935
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7891401052474976,
      "learning_rate": 0.0003418546818922071,
      "loss": 3.0013,
      "step": 104936
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2373769283294678,
      "learning_rate": 0.0003418506313323649,
      "loss": 2.9648,
      "step": 104937
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.705667495727539,
      "learning_rate": 0.00034184658076474167,
      "loss": 2.9164,
      "step": 104938
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.581982135772705,
      "learning_rate": 0.00034184253018933853,
      "loss": 3.1711,
      "step": 104939
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.824228286743164,
      "learning_rate": 0.0003418384796061561,
      "loss": 3.1039,
      "step": 104940
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2251508235931396,
      "learning_rate": 0.00034183442901519496,
      "loss": 3.0882,
      "step": 104941
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6835923194885254,
      "learning_rate": 0.00034183037841645605,
      "loss": 2.8858,
      "step": 104942
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.818969488143921,
      "learning_rate": 0.0003418263278099402,
      "loss": 2.8148,
      "step": 104943
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.847925901412964,
      "learning_rate": 0.0003418222771956479,
      "loss": 2.9659,
      "step": 104944
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.853097438812256,
      "learning_rate": 0.00034181822657358013,
      "loss": 2.8811,
      "step": 104945
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0631661415100098,
      "learning_rate": 0.00034181417594373757,
      "loss": 2.9768,
      "step": 104946
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.996364712715149,
      "learning_rate": 0.0003418101253061209,
      "loss": 3.189,
      "step": 104947
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.398569345474243,
      "learning_rate": 0.000341806074660731,
      "loss": 3.0178,
      "step": 104948
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.094172477722168,
      "learning_rate": 0.0003418020240075686,
      "loss": 2.8064,
      "step": 104949
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6538951396942139,
      "learning_rate": 0.0003417979733466343,
      "loss": 3.1482,
      "step": 104950
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7632169723510742,
      "learning_rate": 0.000341793922677929,
      "loss": 2.8168,
      "step": 104951
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.6907498836517334,
      "learning_rate": 0.0003417898720014535,
      "loss": 3.2346,
      "step": 104952
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.56353497505188,
      "learning_rate": 0.00034178582131720836,
      "loss": 3.2549,
      "step": 104953
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6887578964233398,
      "learning_rate": 0.0003417817706251945,
      "loss": 3.232,
      "step": 104954
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.082367181777954,
      "learning_rate": 0.0003417777199254127,
      "loss": 3.063,
      "step": 104955
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.633603096008301,
      "learning_rate": 0.0003417736692178635,
      "loss": 3.1474,
      "step": 104956
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.507509708404541,
      "learning_rate": 0.00034176961850254786,
      "loss": 2.8806,
      "step": 104957
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7083700895309448,
      "learning_rate": 0.0003417655677794665,
      "loss": 3.0001,
      "step": 104958
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.221864700317383,
      "learning_rate": 0.00034176151704862,
      "loss": 2.9046,
      "step": 104959
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6998766660690308,
      "learning_rate": 0.00034175746631000937,
      "loss": 3.0505,
      "step": 104960
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7769334316253662,
      "learning_rate": 0.00034175341556363524,
      "loss": 2.6918,
      "step": 104961
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0288333892822266,
      "learning_rate": 0.0003417493648094982,
      "loss": 2.7857,
      "step": 104962
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.7428224086761475,
      "learning_rate": 0.0003417453140475993,
      "loss": 2.9314,
      "step": 104963
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8879377841949463,
      "learning_rate": 0.0003417412632779392,
      "loss": 2.8436,
      "step": 104964
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.8522541522979736,
      "learning_rate": 0.00034173721250051854,
      "loss": 2.8438,
      "step": 104965
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8680042028427124,
      "learning_rate": 0.0003417331617153381,
      "loss": 3.0377,
      "step": 104966
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.313018560409546,
      "learning_rate": 0.00034172911092239874,
      "loss": 3.2959,
      "step": 104967
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3545498847961426,
      "learning_rate": 0.00034172506012170115,
      "loss": 3.1781,
      "step": 104968
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.397083282470703,
      "learning_rate": 0.0003417210093132461,
      "loss": 3.0856,
      "step": 104969
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7195836305618286,
      "learning_rate": 0.0003417169584970343,
      "loss": 2.9358,
      "step": 104970
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9913095235824585,
      "learning_rate": 0.0003417129076730666,
      "loss": 3.0812,
      "step": 104971
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.4204063415527344,
      "learning_rate": 0.0003417088568413435,
      "loss": 3.0332,
      "step": 104972
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.021451950073242,
      "learning_rate": 0.0003417048060018661,
      "loss": 3.2061,
      "step": 104973
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.8661270141601562,
      "learning_rate": 0.0003417007551546349,
      "loss": 2.9001,
      "step": 104974
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6975640058517456,
      "learning_rate": 0.00034169670429965083,
      "loss": 2.7162,
      "step": 104975
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6280994415283203,
      "learning_rate": 0.0003416926534369145,
      "loss": 2.7648,
      "step": 104976
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.242488384246826,
      "learning_rate": 0.0003416886025664267,
      "loss": 2.9495,
      "step": 104977
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9364320039749146,
      "learning_rate": 0.0003416845516881882,
      "loss": 3.069,
      "step": 104978
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.20894193649292,
      "learning_rate": 0.00034168050080219986,
      "loss": 3.2022,
      "step": 104979
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.618628978729248,
      "learning_rate": 0.0003416764499084622,
      "loss": 2.8801,
      "step": 104980
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2509965896606445,
      "learning_rate": 0.0003416723990069761,
      "loss": 2.8664,
      "step": 104981
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.7555911540985107,
      "learning_rate": 0.00034166834809774247,
      "loss": 3.0904,
      "step": 104982
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.083099603652954,
      "learning_rate": 0.0003416642971807618,
      "loss": 2.7629,
      "step": 104983
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1956942081451416,
      "learning_rate": 0.00034166024625603496,
      "loss": 3.3023,
      "step": 104984
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0829129219055176,
      "learning_rate": 0.0003416561953235627,
      "loss": 3.16,
      "step": 104985
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1827332973480225,
      "learning_rate": 0.00034165214438334573,
      "loss": 3.0979,
      "step": 104986
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9740657806396484,
      "learning_rate": 0.00034164809343538487,
      "loss": 3.0411,
      "step": 104987
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.683063268661499,
      "learning_rate": 0.00034164404247968085,
      "loss": 3.1563,
      "step": 104988
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.583383083343506,
      "learning_rate": 0.0003416399915162344,
      "loss": 3.236,
      "step": 104989
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.043914556503296,
      "learning_rate": 0.00034163594054504627,
      "loss": 3.0371,
      "step": 104990
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.050929546356201,
      "learning_rate": 0.0003416318895661173,
      "loss": 3.1817,
      "step": 104991
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.725211262702942,
      "learning_rate": 0.0003416278385794481,
      "loss": 2.8765,
      "step": 104992
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.12939190864563,
      "learning_rate": 0.0003416237875850395,
      "loss": 3.1095,
      "step": 104993
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8377958536148071,
      "learning_rate": 0.00034161973658289227,
      "loss": 3.239,
      "step": 104994
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8155930042266846,
      "learning_rate": 0.0003416156855730072,
      "loss": 2.9301,
      "step": 104995
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9413633346557617,
      "learning_rate": 0.000341611634555385,
      "loss": 3.0116,
      "step": 104996
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.196648597717285,
      "learning_rate": 0.0003416075835300263,
      "loss": 3.0498,
      "step": 104997
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.322463274002075,
      "learning_rate": 0.000341603532496932,
      "loss": 2.7585,
      "step": 104998
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.3659496307373047,
      "learning_rate": 0.0003415994814561029,
      "loss": 2.9518,
      "step": 104999
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9593653678894043,
      "learning_rate": 0.00034159543040753956,
      "loss": 2.8662,
      "step": 105000
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.4324698448181152,
      "learning_rate": 0.00034159137935124294,
      "loss": 3.1103,
      "step": 105001
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.160184144973755,
      "learning_rate": 0.0003415873282872136,
      "loss": 3.2172,
      "step": 105002
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3665049076080322,
      "learning_rate": 0.0003415832772154525,
      "loss": 3.0565,
      "step": 105003
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8004860877990723,
      "learning_rate": 0.0003415792261359603,
      "loss": 2.9908,
      "step": 105004
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8909966945648193,
      "learning_rate": 0.0003415751750487376,
      "loss": 3.064,
      "step": 105005
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.166958808898926,
      "learning_rate": 0.0003415711239537853,
      "loss": 2.8383,
      "step": 105006
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9760479927062988,
      "learning_rate": 0.00034156707285110425,
      "loss": 2.7307,
      "step": 105007
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9281079769134521,
      "learning_rate": 0.00034156302174069504,
      "loss": 3.1287,
      "step": 105008
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8083374500274658,
      "learning_rate": 0.00034155897062255845,
      "loss": 2.897,
      "step": 105009
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6647030115127563,
      "learning_rate": 0.00034155491949669525,
      "loss": 2.9846,
      "step": 105010
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7121866941452026,
      "learning_rate": 0.00034155086836310634,
      "loss": 3.1094,
      "step": 105011
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.45898175239563,
      "learning_rate": 0.0003415468172217922,
      "loss": 2.9229,
      "step": 105012
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6102317571640015,
      "learning_rate": 0.00034154276607275375,
      "loss": 2.9733,
      "step": 105013
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8350623846054077,
      "learning_rate": 0.00034153871491599187,
      "loss": 3.0125,
      "step": 105014
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2557311058044434,
      "learning_rate": 0.000341534663751507,
      "loss": 3.2344,
      "step": 105015
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0013039112091064,
      "learning_rate": 0.0003415306125793,
      "loss": 3.0381,
      "step": 105016
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8673185110092163,
      "learning_rate": 0.00034152656139937174,
      "loss": 3.0754,
      "step": 105017
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.8718457221984863,
      "learning_rate": 0.000341522510211723,
      "loss": 3.0065,
      "step": 105018
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.867251992225647,
      "learning_rate": 0.00034151845901635435,
      "loss": 3.1097,
      "step": 105019
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.6237869262695312,
      "learning_rate": 0.0003415144078132666,
      "loss": 3.1295,
      "step": 105020
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.979628562927246,
      "learning_rate": 0.00034151035660246067,
      "loss": 2.757,
      "step": 105021
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.790134310722351,
      "learning_rate": 0.0003415063053839371,
      "loss": 2.9843,
      "step": 105022
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6940877437591553,
      "learning_rate": 0.0003415022541576967,
      "loss": 2.8585,
      "step": 105023
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.894809365272522,
      "learning_rate": 0.0003414982029237403,
      "loss": 3.1417,
      "step": 105024
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8110517263412476,
      "learning_rate": 0.0003414941516820685,
      "loss": 3.1541,
      "step": 105025
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.0604233741760254,
      "learning_rate": 0.0003414901004326823,
      "loss": 2.9684,
      "step": 105026
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.935758352279663,
      "learning_rate": 0.0003414860491755822,
      "loss": 2.864,
      "step": 105027
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7073341608047485,
      "learning_rate": 0.00034148199791076913,
      "loss": 2.9621,
      "step": 105028
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.952693223953247,
      "learning_rate": 0.00034147794663824373,
      "loss": 2.8994,
      "step": 105029
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3592727184295654,
      "learning_rate": 0.00034147389535800677,
      "loss": 3.1091,
      "step": 105030
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2863874435424805,
      "learning_rate": 0.0003414698440700591,
      "loss": 2.8032,
      "step": 105031
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.548064947128296,
      "learning_rate": 0.00034146579277440135,
      "loss": 2.976,
      "step": 105032
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8229243755340576,
      "learning_rate": 0.00034146174147103435,
      "loss": 2.6785,
      "step": 105033
    },
    {
      "epoch": 1.37,
      "grad_norm": 5.607914447784424,
      "learning_rate": 0.00034145769015995884,
      "loss": 3.1056,
      "step": 105034
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.4705159664154053,
      "learning_rate": 0.00034145363884117557,
      "loss": 2.9395,
      "step": 105035
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.073702573776245,
      "learning_rate": 0.00034144958751468525,
      "loss": 3.0819,
      "step": 105036
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.36592960357666,
      "learning_rate": 0.0003414455361804887,
      "loss": 3.1298,
      "step": 105037
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2445621490478516,
      "learning_rate": 0.0003414414848385866,
      "loss": 2.9262,
      "step": 105038
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3617968559265137,
      "learning_rate": 0.0003414374334889798,
      "loss": 2.9398,
      "step": 105039
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.19340181350708,
      "learning_rate": 0.000341433382131669,
      "loss": 3.1026,
      "step": 105040
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.450770854949951,
      "learning_rate": 0.00034142933076665487,
      "loss": 2.8944,
      "step": 105041
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.424070358276367,
      "learning_rate": 0.0003414252793939383,
      "loss": 3.1868,
      "step": 105042
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.029695510864258,
      "learning_rate": 0.00034142122801352,
      "loss": 2.9056,
      "step": 105043
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1295206546783447,
      "learning_rate": 0.00034141717662540067,
      "loss": 3.0055,
      "step": 105044
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.610956072807312,
      "learning_rate": 0.00034141312522958115,
      "loss": 3.182,
      "step": 105045
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6002492904663086,
      "learning_rate": 0.0003414090738260622,
      "loss": 3.2116,
      "step": 105046
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7463016510009766,
      "learning_rate": 0.0003414050224148444,
      "loss": 3.0266,
      "step": 105047
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2158753871917725,
      "learning_rate": 0.0003414009709959287,
      "loss": 2.626,
      "step": 105048
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9706134796142578,
      "learning_rate": 0.00034139691956931576,
      "loss": 3.1726,
      "step": 105049
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.866349697113037,
      "learning_rate": 0.0003413928681350063,
      "loss": 2.944,
      "step": 105050
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6800938844680786,
      "learning_rate": 0.00034138881669300113,
      "loss": 2.9364,
      "step": 105051
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.307605504989624,
      "learning_rate": 0.00034138476524330115,
      "loss": 2.8709,
      "step": 105052
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.090771198272705,
      "learning_rate": 0.0003413807137859068,
      "loss": 3.122,
      "step": 105053
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.215846538543701,
      "learning_rate": 0.000341376662320819,
      "loss": 3.1075,
      "step": 105054
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.14003324508667,
      "learning_rate": 0.00034137261084803865,
      "loss": 2.7878,
      "step": 105055
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.4055979251861572,
      "learning_rate": 0.0003413685593675662,
      "loss": 2.9307,
      "step": 105056
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.6872992515563965,
      "learning_rate": 0.0003413645078794026,
      "loss": 2.6936,
      "step": 105057
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.744411587715149,
      "learning_rate": 0.00034136045638354854,
      "loss": 2.7836,
      "step": 105058
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.375001907348633,
      "learning_rate": 0.00034135640488000486,
      "loss": 2.8255,
      "step": 105059
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.9918620586395264,
      "learning_rate": 0.00034135235336877213,
      "loss": 3.1801,
      "step": 105060
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.931365728378296,
      "learning_rate": 0.00034134830184985127,
      "loss": 3.099,
      "step": 105061
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0537071228027344,
      "learning_rate": 0.000341344250323243,
      "loss": 2.8848,
      "step": 105062
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.5503008365631104,
      "learning_rate": 0.00034134019878894807,
      "loss": 2.762,
      "step": 105063
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0466973781585693,
      "learning_rate": 0.00034133614724696714,
      "loss": 3.0735,
      "step": 105064
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.304344654083252,
      "learning_rate": 0.0003413320956973011,
      "loss": 3.4604,
      "step": 105065
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7293912172317505,
      "learning_rate": 0.00034132804413995066,
      "loss": 2.9204,
      "step": 105066
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0003244876861572,
      "learning_rate": 0.0003413239925749165,
      "loss": 3.1097,
      "step": 105067
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2084808349609375,
      "learning_rate": 0.0003413199410021994,
      "loss": 2.9387,
      "step": 105068
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6957886219024658,
      "learning_rate": 0.0003413158894218003,
      "loss": 2.9858,
      "step": 105069
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9997971057891846,
      "learning_rate": 0.0003413118378337196,
      "loss": 2.91,
      "step": 105070
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9354331493377686,
      "learning_rate": 0.0003413077862379584,
      "loss": 3.1509,
      "step": 105071
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2427685260772705,
      "learning_rate": 0.0003413037346345173,
      "loss": 2.8978,
      "step": 105072
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.718262791633606,
      "learning_rate": 0.00034129968302339694,
      "loss": 2.9971,
      "step": 105073
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.8305420875549316,
      "learning_rate": 0.00034129563140459823,
      "loss": 2.8888,
      "step": 105074
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8268229961395264,
      "learning_rate": 0.0003412915797781219,
      "loss": 3.2073,
      "step": 105075
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8139111995697021,
      "learning_rate": 0.0003412875281439687,
      "loss": 3.0697,
      "step": 105076
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.804626703262329,
      "learning_rate": 0.0003412834765021394,
      "loss": 3.1699,
      "step": 105077
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3146770000457764,
      "learning_rate": 0.0003412794248526347,
      "loss": 3.0823,
      "step": 105078
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0743582248687744,
      "learning_rate": 0.0003412753731954553,
      "loss": 3.3286,
      "step": 105079
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7290340662002563,
      "learning_rate": 0.0003412713215306022,
      "loss": 2.8761,
      "step": 105080
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.507388949394226,
      "learning_rate": 0.00034126726985807574,
      "loss": 3.1893,
      "step": 105081
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3743011951446533,
      "learning_rate": 0.0003412632181778771,
      "loss": 2.9012,
      "step": 105082
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.727913737297058,
      "learning_rate": 0.0003412591664900068,
      "loss": 3.0745,
      "step": 105083
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7682985067367554,
      "learning_rate": 0.0003412551147944656,
      "loss": 2.839,
      "step": 105084
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6020722389221191,
      "learning_rate": 0.0003412510630912543,
      "loss": 3.0313,
      "step": 105085
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.296967029571533,
      "learning_rate": 0.0003412470113803738,
      "loss": 2.8148,
      "step": 105086
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.057558298110962,
      "learning_rate": 0.0003412429596618245,
      "loss": 2.9526,
      "step": 105087
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0074961185455322,
      "learning_rate": 0.00034123890793560745,
      "loss": 2.927,
      "step": 105088
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.767404556274414,
      "learning_rate": 0.0003412348562017233,
      "loss": 2.8437,
      "step": 105089
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.863922119140625,
      "learning_rate": 0.00034123080446017283,
      "loss": 3.0891,
      "step": 105090
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8349764347076416,
      "learning_rate": 0.0003412267527109567,
      "loss": 3.0589,
      "step": 105091
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7577142715454102,
      "learning_rate": 0.00034122270095407584,
      "loss": 2.9033,
      "step": 105092
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7879180908203125,
      "learning_rate": 0.00034121864918953087,
      "loss": 2.8741,
      "step": 105093
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7629581689834595,
      "learning_rate": 0.00034121459741732254,
      "loss": 3.0159,
      "step": 105094
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.144277334213257,
      "learning_rate": 0.00034121054563745166,
      "loss": 2.8814,
      "step": 105095
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9156522750854492,
      "learning_rate": 0.00034120649384991895,
      "loss": 3.1864,
      "step": 105096
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.775665283203125,
      "learning_rate": 0.00034120244205472516,
      "loss": 2.7353,
      "step": 105097
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3932247161865234,
      "learning_rate": 0.00034119839025187104,
      "loss": 3.1869,
      "step": 105098
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6973012685775757,
      "learning_rate": 0.00034119433844135747,
      "loss": 3.0245,
      "step": 105099
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.444091320037842,
      "learning_rate": 0.00034119028662318504,
      "loss": 3.0135,
      "step": 105100
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0173747539520264,
      "learning_rate": 0.0003411862347973545,
      "loss": 3.1973,
      "step": 105101
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.5803282260894775,
      "learning_rate": 0.00034118218296386675,
      "loss": 3.1371,
      "step": 105102
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.276503086090088,
      "learning_rate": 0.00034117813112272236,
      "loss": 3.1107,
      "step": 105103
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6820124387741089,
      "learning_rate": 0.0003411740792739222,
      "loss": 3.0511,
      "step": 105104
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.158299684524536,
      "learning_rate": 0.0003411700274174671,
      "loss": 3.0553,
      "step": 105105
    },
    {
      "epoch": 1.37,
      "grad_norm": 4.356516361236572,
      "learning_rate": 0.00034116597555335765,
      "loss": 2.7651,
      "step": 105106
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9137568473815918,
      "learning_rate": 0.0003411619236815946,
      "loss": 3.1634,
      "step": 105107
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.8784592151641846,
      "learning_rate": 0.00034115787180217887,
      "loss": 3.0087,
      "step": 105108
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.581958293914795,
      "learning_rate": 0.0003411538199151111,
      "loss": 2.8216,
      "step": 105109
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.0945355892181396,
      "learning_rate": 0.00034114976802039203,
      "loss": 3.089,
      "step": 105110
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.1991560459136963,
      "learning_rate": 0.0003411457161180224,
      "loss": 2.8633,
      "step": 105111
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9025686979293823,
      "learning_rate": 0.00034114166420800304,
      "loss": 3.3389,
      "step": 105112
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.5757997035980225,
      "learning_rate": 0.00034113761229033474,
      "loss": 2.9089,
      "step": 105113
    },
    {
      "epoch": 1.37,
      "grad_norm": 4.153177738189697,
      "learning_rate": 0.0003411335603650181,
      "loss": 3.056,
      "step": 105114
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.829909563064575,
      "learning_rate": 0.00034112950843205397,
      "loss": 3.0545,
      "step": 105115
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.516864538192749,
      "learning_rate": 0.00034112545649144313,
      "loss": 2.9409,
      "step": 105116
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9163117408752441,
      "learning_rate": 0.0003411214045431863,
      "loss": 3.2938,
      "step": 105117
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.6766979694366455,
      "learning_rate": 0.0003411173525872841,
      "loss": 2.9972,
      "step": 105118
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.6024863719940186,
      "learning_rate": 0.0003411133006237376,
      "loss": 3.0319,
      "step": 105119
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.775667905807495,
      "learning_rate": 0.0003411092486525472,
      "loss": 3.0826,
      "step": 105120
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6020421981811523,
      "learning_rate": 0.0003411051966737138,
      "loss": 3.0693,
      "step": 105121
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.5605151653289795,
      "learning_rate": 0.0003411011446872383,
      "loss": 2.9565,
      "step": 105122
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.072274923324585,
      "learning_rate": 0.00034109709269312125,
      "loss": 2.7953,
      "step": 105123
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2867372035980225,
      "learning_rate": 0.0003410930406913634,
      "loss": 3.069,
      "step": 105124
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6898339986801147,
      "learning_rate": 0.00034108898868196575,
      "loss": 2.8974,
      "step": 105125
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0316643714904785,
      "learning_rate": 0.00034108493666492874,
      "loss": 3.09,
      "step": 105126
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7071157693862915,
      "learning_rate": 0.0003410808846402533,
      "loss": 2.9267,
      "step": 105127
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9729633331298828,
      "learning_rate": 0.00034107683260794017,
      "loss": 3.1422,
      "step": 105128
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6708576679229736,
      "learning_rate": 0.0003410727805679901,
      "loss": 2.9571,
      "step": 105129
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6224501132965088,
      "learning_rate": 0.00034106872852040376,
      "loss": 2.9267,
      "step": 105130
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3988683223724365,
      "learning_rate": 0.000341064676465182,
      "loss": 3.018,
      "step": 105131
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.5276732444763184,
      "learning_rate": 0.0003410606244023256,
      "loss": 2.7719,
      "step": 105132
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0618534088134766,
      "learning_rate": 0.0003410565723318352,
      "loss": 2.6965,
      "step": 105133
    },
    {
      "epoch": 1.37,
      "grad_norm": 4.2044172286987305,
      "learning_rate": 0.0003410525202537116,
      "loss": 2.6736,
      "step": 105134
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.337013006210327,
      "learning_rate": 0.0003410484681679556,
      "loss": 2.6171,
      "step": 105135
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.4115946292877197,
      "learning_rate": 0.00034104441607456785,
      "loss": 2.9034,
      "step": 105136
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.5574593544006348,
      "learning_rate": 0.0003410403639735492,
      "loss": 2.9368,
      "step": 105137
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.512061595916748,
      "learning_rate": 0.00034103631186490046,
      "loss": 2.9462,
      "step": 105138
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9819105863571167,
      "learning_rate": 0.00034103225974862214,
      "loss": 2.9732,
      "step": 105139
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.640582323074341,
      "learning_rate": 0.00034102820762471517,
      "loss": 2.8555,
      "step": 105140
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8423542976379395,
      "learning_rate": 0.0003410241554931804,
      "loss": 3.0141,
      "step": 105141
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.967806339263916,
      "learning_rate": 0.0003410201033540184,
      "loss": 3.0626,
      "step": 105142
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.627890706062317,
      "learning_rate": 0.00034101605120723,
      "loss": 3.0724,
      "step": 105143
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.4346797466278076,
      "learning_rate": 0.00034101199905281596,
      "loss": 2.7882,
      "step": 105144
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.083301305770874,
      "learning_rate": 0.00034100794689077694,
      "loss": 3.0997,
      "step": 105145
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7643842697143555,
      "learning_rate": 0.0003410038947211138,
      "loss": 2.9437,
      "step": 105146
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.684281587600708,
      "learning_rate": 0.00034099984254382733,
      "loss": 2.9308,
      "step": 105147
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.5144962072372437,
      "learning_rate": 0.00034099579035891815,
      "loss": 3.295,
      "step": 105148
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2217538356781006,
      "learning_rate": 0.0003409917381663871,
      "loss": 3.0674,
      "step": 105149
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1837158203125,
      "learning_rate": 0.00034098768596623494,
      "loss": 3.004,
      "step": 105150
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.567338705062866,
      "learning_rate": 0.0003409836337584623,
      "loss": 3.0278,
      "step": 105151
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8760545253753662,
      "learning_rate": 0.0003409795815430701,
      "loss": 2.7919,
      "step": 105152
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.187147378921509,
      "learning_rate": 0.000340975529320059,
      "loss": 3.1644,
      "step": 105153
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.184878349304199,
      "learning_rate": 0.0003409714770894298,
      "loss": 2.9376,
      "step": 105154
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9771466255187988,
      "learning_rate": 0.0003409674248511832,
      "loss": 2.9519,
      "step": 105155
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2177886962890625,
      "learning_rate": 0.00034096337260532003,
      "loss": 3.2936,
      "step": 105156
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.924011707305908,
      "learning_rate": 0.0003409593203518409,
      "loss": 2.9762,
      "step": 105157
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.7506163120269775,
      "learning_rate": 0.0003409552680907467,
      "loss": 2.8183,
      "step": 105158
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1970834732055664,
      "learning_rate": 0.00034095121582203823,
      "loss": 3.1138,
      "step": 105159
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0510103702545166,
      "learning_rate": 0.00034094716354571606,
      "loss": 2.8236,
      "step": 105160
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2725210189819336,
      "learning_rate": 0.00034094311126178106,
      "loss": 3.164,
      "step": 105161
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2232303619384766,
      "learning_rate": 0.00034093905897023403,
      "loss": 3.1148,
      "step": 105162
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9295268058776855,
      "learning_rate": 0.00034093500667107553,
      "loss": 2.7466,
      "step": 105163
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7917265892028809,
      "learning_rate": 0.0003409309543643065,
      "loss": 3.0045,
      "step": 105164
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.168138027191162,
      "learning_rate": 0.0003409269020499277,
      "loss": 3.1619,
      "step": 105165
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.9946224689483643,
      "learning_rate": 0.00034092284972793977,
      "loss": 2.9227,
      "step": 105166
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7426023483276367,
      "learning_rate": 0.0003409187973983434,
      "loss": 2.8587,
      "step": 105167
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.4697799682617188,
      "learning_rate": 0.0003409147450611396,
      "loss": 2.783,
      "step": 105168
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1009583473205566,
      "learning_rate": 0.0003409106927163289,
      "loss": 3.1062,
      "step": 105169
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1359939575195312,
      "learning_rate": 0.00034090664036391216,
      "loss": 2.8862,
      "step": 105170
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.6263039112091064,
      "learning_rate": 0.0003409025880038901,
      "loss": 2.7606,
      "step": 105171
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0870540142059326,
      "learning_rate": 0.00034089853563626344,
      "loss": 2.9981,
      "step": 105172
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.245697021484375,
      "learning_rate": 0.00034089448326103303,
      "loss": 3.162,
      "step": 105173
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.47878098487854,
      "learning_rate": 0.0003408904308781996,
      "loss": 3.1319,
      "step": 105174
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.8972055912017822,
      "learning_rate": 0.00034088637848776374,
      "loss": 3.2963,
      "step": 105175
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0248897075653076,
      "learning_rate": 0.00034088232608972644,
      "loss": 3.0473,
      "step": 105176
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.386715888977051,
      "learning_rate": 0.00034087827368408833,
      "loss": 3.0477,
      "step": 105177
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0861339569091797,
      "learning_rate": 0.00034087422127085013,
      "loss": 3.1016,
      "step": 105178
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.8849947452545166,
      "learning_rate": 0.00034087016885001265,
      "loss": 3.1263,
      "step": 105179
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.6835412979125977,
      "learning_rate": 0.0003408661164215767,
      "loss": 3.0503,
      "step": 105180
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.016237497329712,
      "learning_rate": 0.0003408620639855429,
      "loss": 2.9758,
      "step": 105181
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.03855562210083,
      "learning_rate": 0.000340858011541912,
      "loss": 2.8415,
      "step": 105182
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.678945541381836,
      "learning_rate": 0.000340853959090685,
      "loss": 3.0647,
      "step": 105183
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2871103286743164,
      "learning_rate": 0.0003408499066318623,
      "loss": 3.0051,
      "step": 105184
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8722511529922485,
      "learning_rate": 0.00034084585416544497,
      "loss": 3.0344,
      "step": 105185
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.0051448345184326,
      "learning_rate": 0.0003408418016914335,
      "loss": 2.8683,
      "step": 105186
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.4750311374664307,
      "learning_rate": 0.0003408377492098289,
      "loss": 2.9143,
      "step": 105187
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0208945274353027,
      "learning_rate": 0.0003408336967206317,
      "loss": 3.175,
      "step": 105188
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7000566720962524,
      "learning_rate": 0.00034082964422384274,
      "loss": 3.0358,
      "step": 105189
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.2203803062438965,
      "learning_rate": 0.0003408255917194628,
      "loss": 3.079,
      "step": 105190
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.771493911743164,
      "learning_rate": 0.00034082153920749267,
      "loss": 3.0209,
      "step": 105191
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.1276681423187256,
      "learning_rate": 0.00034081748668793294,
      "loss": 3.03,
      "step": 105192
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7641559839248657,
      "learning_rate": 0.00034081343416078456,
      "loss": 3.2292,
      "step": 105193
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8343448638916016,
      "learning_rate": 0.00034080938162604817,
      "loss": 2.6152,
      "step": 105194
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.287602186203003,
      "learning_rate": 0.00034080532908372453,
      "loss": 3.1015,
      "step": 105195
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.833324670791626,
      "learning_rate": 0.0003408012765338144,
      "loss": 3.0379,
      "step": 105196
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6021251678466797,
      "learning_rate": 0.00034079722397631855,
      "loss": 3.1136,
      "step": 105197
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7363438606262207,
      "learning_rate": 0.0003407931714112378,
      "loss": 3.0242,
      "step": 105198
    },
    {
      "epoch": 1.37,
      "grad_norm": 4.157305717468262,
      "learning_rate": 0.00034078911883857264,
      "loss": 3.1661,
      "step": 105199
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.706676483154297,
      "learning_rate": 0.00034078506625832416,
      "loss": 2.8273,
      "step": 105200
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7698572874069214,
      "learning_rate": 0.00034078101367049287,
      "loss": 2.963,
      "step": 105201
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.108574151992798,
      "learning_rate": 0.00034077696107507975,
      "loss": 3.0306,
      "step": 105202
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.416107416152954,
      "learning_rate": 0.00034077290847208536,
      "loss": 3.0375,
      "step": 105203
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.087507963180542,
      "learning_rate": 0.00034076885586151054,
      "loss": 3.0014,
      "step": 105204
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.700476884841919,
      "learning_rate": 0.000340764803243356,
      "loss": 2.9053,
      "step": 105205
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8383742570877075,
      "learning_rate": 0.00034076075061762254,
      "loss": 3.2208,
      "step": 105206
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.816464900970459,
      "learning_rate": 0.0003407566979843108,
      "loss": 2.9557,
      "step": 105207
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.175063133239746,
      "learning_rate": 0.0003407526453434217,
      "loss": 2.9876,
      "step": 105208
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9070760011672974,
      "learning_rate": 0.0003407485926949559,
      "loss": 2.8182,
      "step": 105209
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0336453914642334,
      "learning_rate": 0.00034074454003891417,
      "loss": 3.0241,
      "step": 105210
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.8433115482330322,
      "learning_rate": 0.00034074048737529726,
      "loss": 2.8039,
      "step": 105211
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8430677652359009,
      "learning_rate": 0.0003407364347041059,
      "loss": 3.1023,
      "step": 105212
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7597414255142212,
      "learning_rate": 0.00034073238202534094,
      "loss": 3.0623,
      "step": 105213
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8501285314559937,
      "learning_rate": 0.000340728329339003,
      "loss": 2.9917,
      "step": 105214
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.96639084815979,
      "learning_rate": 0.0003407242766450929,
      "loss": 3.111,
      "step": 105215
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9187098741531372,
      "learning_rate": 0.00034072022394361135,
      "loss": 2.9455,
      "step": 105216
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.274742841720581,
      "learning_rate": 0.00034071617123455927,
      "loss": 3.0454,
      "step": 105217
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8539185523986816,
      "learning_rate": 0.0003407121185179372,
      "loss": 2.8585,
      "step": 105218
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8135342597961426,
      "learning_rate": 0.00034070806579374594,
      "loss": 2.9415,
      "step": 105219
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7444559335708618,
      "learning_rate": 0.00034070401306198633,
      "loss": 3.0331,
      "step": 105220
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.8305556774139404,
      "learning_rate": 0.0003406999603226591,
      "loss": 3.0873,
      "step": 105221
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.80414879322052,
      "learning_rate": 0.0003406959075757649,
      "loss": 3.2287,
      "step": 105222
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6685850620269775,
      "learning_rate": 0.0003406918548213046,
      "loss": 3.0941,
      "step": 105223
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.788225769996643,
      "learning_rate": 0.0003406878020592789,
      "loss": 3.1246,
      "step": 105224
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.843851089477539,
      "learning_rate": 0.0003406837492896886,
      "loss": 3.0877,
      "step": 105225
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1353743076324463,
      "learning_rate": 0.0003406796965125345,
      "loss": 3.2078,
      "step": 105226
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6792027950286865,
      "learning_rate": 0.0003406756437278171,
      "loss": 3.1066,
      "step": 105227
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.9876677989959717,
      "learning_rate": 0.0003406715909355374,
      "loss": 2.9888,
      "step": 105228
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7815529108047485,
      "learning_rate": 0.0003406675381356962,
      "loss": 2.9226,
      "step": 105229
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9113328456878662,
      "learning_rate": 0.000340663485328294,
      "loss": 2.8752,
      "step": 105230
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.491429328918457,
      "learning_rate": 0.0003406594325133317,
      "loss": 3.1524,
      "step": 105231
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8897356986999512,
      "learning_rate": 0.00034065537969081013,
      "loss": 3.1568,
      "step": 105232
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.6495466232299805,
      "learning_rate": 0.0003406513268607299,
      "loss": 2.7296,
      "step": 105233
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8825640678405762,
      "learning_rate": 0.0003406472740230918,
      "loss": 2.9621,
      "step": 105234
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.641804575920105,
      "learning_rate": 0.00034064322117789665,
      "loss": 2.9548,
      "step": 105235
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.766019821166992,
      "learning_rate": 0.0003406391683251451,
      "loss": 3.054,
      "step": 105236
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6298129558563232,
      "learning_rate": 0.000340635115464838,
      "loss": 3.0495,
      "step": 105237
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.942812442779541,
      "learning_rate": 0.0003406310625969761,
      "loss": 3.1406,
      "step": 105238
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8802679777145386,
      "learning_rate": 0.0003406270097215601,
      "loss": 3.1257,
      "step": 105239
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0790297985076904,
      "learning_rate": 0.00034062295683859073,
      "loss": 2.8967,
      "step": 105240
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.886842131614685,
      "learning_rate": 0.0003406189039480688,
      "loss": 2.9046,
      "step": 105241
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.62769615650177,
      "learning_rate": 0.00034061485104999506,
      "loss": 2.9322,
      "step": 105242
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9216797351837158,
      "learning_rate": 0.00034061079814437024,
      "loss": 3.1353,
      "step": 105243
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9097833633422852,
      "learning_rate": 0.00034060674523119514,
      "loss": 3.1225,
      "step": 105244
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.681154727935791,
      "learning_rate": 0.00034060269231047047,
      "loss": 2.987,
      "step": 105245
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.228778600692749,
      "learning_rate": 0.00034059863938219693,
      "loss": 2.9339,
      "step": 105246
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.913954019546509,
      "learning_rate": 0.00034059458644637546,
      "loss": 2.7908,
      "step": 105247
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7802010774612427,
      "learning_rate": 0.00034059053350300663,
      "loss": 2.9942,
      "step": 105248
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.977861762046814,
      "learning_rate": 0.0003405864805520912,
      "loss": 3.3,
      "step": 105249
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7575721740722656,
      "learning_rate": 0.00034058242759363,
      "loss": 2.8575,
      "step": 105250
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0066797733306885,
      "learning_rate": 0.0003405783746276238,
      "loss": 2.818,
      "step": 105251
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0904860496520996,
      "learning_rate": 0.00034057432165407323,
      "loss": 3.1901,
      "step": 105252
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0489721298217773,
      "learning_rate": 0.0003405702686729793,
      "loss": 2.8992,
      "step": 105253
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8015062808990479,
      "learning_rate": 0.00034056621568434245,
      "loss": 3.0839,
      "step": 105254
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7154552936553955,
      "learning_rate": 0.0003405621626881636,
      "loss": 2.9369,
      "step": 105255
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.666495680809021,
      "learning_rate": 0.00034055810968444347,
      "loss": 2.8321,
      "step": 105256
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7193386554718018,
      "learning_rate": 0.0003405540566731828,
      "loss": 2.882,
      "step": 105257
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9833879470825195,
      "learning_rate": 0.0003405500036543824,
      "loss": 2.8353,
      "step": 105258
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.757933497428894,
      "learning_rate": 0.00034054595062804307,
      "loss": 2.944,
      "step": 105259
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8426458835601807,
      "learning_rate": 0.00034054189759416537,
      "loss": 2.7127,
      "step": 105260
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.5653640031814575,
      "learning_rate": 0.00034053784455275013,
      "loss": 2.8551,
      "step": 105261
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6926602125167847,
      "learning_rate": 0.0003405337915037982,
      "loss": 3.4218,
      "step": 105262
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.034958600997925,
      "learning_rate": 0.0003405297384473103,
      "loss": 2.9867,
      "step": 105263
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2376370429992676,
      "learning_rate": 0.0003405256853832872,
      "loss": 2.8386,
      "step": 105264
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6221466064453125,
      "learning_rate": 0.0003405216323117295,
      "loss": 3.2058,
      "step": 105265
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7857509851455688,
      "learning_rate": 0.0003405175792326381,
      "loss": 3.174,
      "step": 105266
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9688993692398071,
      "learning_rate": 0.00034051352614601377,
      "loss": 3.0548,
      "step": 105267
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.708024740219116,
      "learning_rate": 0.00034050947305185714,
      "loss": 3.0828,
      "step": 105268
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2650997638702393,
      "learning_rate": 0.0003405054199501691,
      "loss": 2.8935,
      "step": 105269
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.345813035964966,
      "learning_rate": 0.00034050136684095036,
      "loss": 3.1225,
      "step": 105270
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8423477411270142,
      "learning_rate": 0.0003404973137242015,
      "loss": 3.0594,
      "step": 105271
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6479789018630981,
      "learning_rate": 0.00034049326059992357,
      "loss": 3.023,
      "step": 105272
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.292999505996704,
      "learning_rate": 0.0003404892074681171,
      "loss": 2.9263,
      "step": 105273
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2405812740325928,
      "learning_rate": 0.00034048515432878295,
      "loss": 2.9722,
      "step": 105274
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.926835298538208,
      "learning_rate": 0.0003404811011819219,
      "loss": 3.0983,
      "step": 105275
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.097241163253784,
      "learning_rate": 0.0003404770480275346,
      "loss": 2.9486,
      "step": 105276
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.965198278427124,
      "learning_rate": 0.0003404729948656217,
      "loss": 2.9569,
      "step": 105277
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1338727474212646,
      "learning_rate": 0.0003404689416961843,
      "loss": 2.9923,
      "step": 105278
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.195411443710327,
      "learning_rate": 0.00034046488851922294,
      "loss": 3.0019,
      "step": 105279
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8166570663452148,
      "learning_rate": 0.00034046083533473834,
      "loss": 3.0581,
      "step": 105280
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9996100664138794,
      "learning_rate": 0.0003404567821427313,
      "loss": 2.7554,
      "step": 105281
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.027994155883789,
      "learning_rate": 0.00034045272894320266,
      "loss": 2.8469,
      "step": 105282
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0103323459625244,
      "learning_rate": 0.00034044867573615304,
      "loss": 2.5674,
      "step": 105283
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.932189702987671,
      "learning_rate": 0.0003404446225215833,
      "loss": 3.0819,
      "step": 105284
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7652947902679443,
      "learning_rate": 0.00034044056929949406,
      "loss": 2.9221,
      "step": 105285
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.003427743911743,
      "learning_rate": 0.00034043651606988616,
      "loss": 2.7879,
      "step": 105286
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.849671483039856,
      "learning_rate": 0.0003404324628327604,
      "loss": 3.0106,
      "step": 105287
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6514214277267456,
      "learning_rate": 0.0003404284095881175,
      "loss": 2.8199,
      "step": 105288
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9724845886230469,
      "learning_rate": 0.00034042435633595806,
      "loss": 3.188,
      "step": 105289
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.106961488723755,
      "learning_rate": 0.0003404203030762831,
      "loss": 3.0993,
      "step": 105290
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1091175079345703,
      "learning_rate": 0.00034041624980909324,
      "loss": 3.1751,
      "step": 105291
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6965731382369995,
      "learning_rate": 0.00034041219653438914,
      "loss": 3.1066,
      "step": 105292
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9195857048034668,
      "learning_rate": 0.0003404081432521718,
      "loss": 2.9409,
      "step": 105293
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7911335229873657,
      "learning_rate": 0.00034040408996244173,
      "loss": 3.0509,
      "step": 105294
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8036333322525024,
      "learning_rate": 0.0003404000366651997,
      "loss": 3.0674,
      "step": 105295
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.273984909057617,
      "learning_rate": 0.0003403959833604467,
      "loss": 3.0184,
      "step": 105296
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.884148120880127,
      "learning_rate": 0.00034039193004818327,
      "loss": 2.9538,
      "step": 105297
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9952806234359741,
      "learning_rate": 0.0003403878767284101,
      "loss": 2.8324,
      "step": 105298
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9970840215682983,
      "learning_rate": 0.00034038382340112824,
      "loss": 2.8145,
      "step": 105299
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3951656818389893,
      "learning_rate": 0.00034037977006633814,
      "loss": 2.8754,
      "step": 105300
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.241410255432129,
      "learning_rate": 0.00034037571672404076,
      "loss": 3.0771,
      "step": 105301
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9768195152282715,
      "learning_rate": 0.00034037166337423674,
      "loss": 3.0593,
      "step": 105302
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3364086151123047,
      "learning_rate": 0.0003403676100169269,
      "loss": 3.1721,
      "step": 105303
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.050323009490967,
      "learning_rate": 0.0003403635566521118,
      "loss": 2.8748,
      "step": 105304
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9512046575546265,
      "learning_rate": 0.00034035950327979255,
      "loss": 2.9015,
      "step": 105305
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.7787692546844482,
      "learning_rate": 0.00034035544989996963,
      "loss": 2.9692,
      "step": 105306
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0677695274353027,
      "learning_rate": 0.00034035139651264387,
      "loss": 3.4216,
      "step": 105307
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.222196340560913,
      "learning_rate": 0.000340347343117816,
      "loss": 2.9486,
      "step": 105308
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7588741779327393,
      "learning_rate": 0.0003403432897154868,
      "loss": 3.267,
      "step": 105309
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.398216962814331,
      "learning_rate": 0.000340339236305657,
      "loss": 3.2327,
      "step": 105310
    },
    {
      "epoch": 1.37,
      "grad_norm": 4.365739345550537,
      "learning_rate": 0.0003403351828883275,
      "loss": 2.8845,
      "step": 105311
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1840741634368896,
      "learning_rate": 0.00034033112946349887,
      "loss": 2.7388,
      "step": 105312
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7246172428131104,
      "learning_rate": 0.0003403270760311719,
      "loss": 3.031,
      "step": 105313
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.418039083480835,
      "learning_rate": 0.0003403230225913473,
      "loss": 2.8364,
      "step": 105314
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.6506216526031494,
      "learning_rate": 0.0003403189691440259,
      "loss": 2.9555,
      "step": 105315
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8801268339157104,
      "learning_rate": 0.0003403149156892085,
      "loss": 3.0507,
      "step": 105316
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.4154837131500244,
      "learning_rate": 0.00034031086222689586,
      "loss": 2.9847,
      "step": 105317
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7797330617904663,
      "learning_rate": 0.0003403068087570886,
      "loss": 2.8548,
      "step": 105318
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2378265857696533,
      "learning_rate": 0.0003403027552797875,
      "loss": 3.0715,
      "step": 105319
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.7345454692840576,
      "learning_rate": 0.00034029870179499343,
      "loss": 2.9892,
      "step": 105320
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.117262601852417,
      "learning_rate": 0.00034029464830270703,
      "loss": 2.9407,
      "step": 105321
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.6214439868927,
      "learning_rate": 0.0003402905948029291,
      "loss": 2.9269,
      "step": 105322
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.6720852851867676,
      "learning_rate": 0.0003402865412956604,
      "loss": 2.8964,
      "step": 105323
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9417059421539307,
      "learning_rate": 0.0003402824877809017,
      "loss": 2.9772,
      "step": 105324
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1730458736419678,
      "learning_rate": 0.0003402784342586536,
      "loss": 3.1948,
      "step": 105325
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9680941104888916,
      "learning_rate": 0.00034027438072891714,
      "loss": 3.1396,
      "step": 105326
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3939712047576904,
      "learning_rate": 0.0003402703271916928,
      "loss": 2.8551,
      "step": 105327
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.913068413734436,
      "learning_rate": 0.0003402662736469815,
      "loss": 3.0245,
      "step": 105328
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.686418056488037,
      "learning_rate": 0.0003402622200947839,
      "loss": 2.8927,
      "step": 105329
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2380857467651367,
      "learning_rate": 0.00034025816653510083,
      "loss": 3.091,
      "step": 105330
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8253796100616455,
      "learning_rate": 0.00034025411296793303,
      "loss": 2.7358,
      "step": 105331
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.049652576446533,
      "learning_rate": 0.00034025005939328115,
      "loss": 3.1746,
      "step": 105332
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9587281942367554,
      "learning_rate": 0.00034024600581114605,
      "loss": 3.0232,
      "step": 105333
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.633421540260315,
      "learning_rate": 0.0003402419522215285,
      "loss": 2.9055,
      "step": 105334
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9358935356140137,
      "learning_rate": 0.00034023789862442917,
      "loss": 3.1286,
      "step": 105335
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.172649383544922,
      "learning_rate": 0.0003402338450198489,
      "loss": 3.1273,
      "step": 105336
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.660405158996582,
      "learning_rate": 0.00034022979140778837,
      "loss": 3.0464,
      "step": 105337
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.998859167098999,
      "learning_rate": 0.00034022573778824834,
      "loss": 2.9021,
      "step": 105338
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.393083095550537,
      "learning_rate": 0.00034022168416122966,
      "loss": 2.9309,
      "step": 105339
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8085910081863403,
      "learning_rate": 0.00034021763052673296,
      "loss": 2.7992,
      "step": 105340
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9447300434112549,
      "learning_rate": 0.000340213576884759,
      "loss": 2.9927,
      "step": 105341
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0700602531433105,
      "learning_rate": 0.00034020952323530866,
      "loss": 2.9102,
      "step": 105342
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.7855637073516846,
      "learning_rate": 0.0003402054695783825,
      "loss": 3.0368,
      "step": 105343
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7613245248794556,
      "learning_rate": 0.00034020141591398147,
      "loss": 2.9852,
      "step": 105344
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7008363008499146,
      "learning_rate": 0.0003401973622421063,
      "loss": 3.0393,
      "step": 105345
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.6650850772857666,
      "learning_rate": 0.00034019330856275757,
      "loss": 2.8334,
      "step": 105346
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.3007736206054688,
      "learning_rate": 0.00034018925487593613,
      "loss": 2.975,
      "step": 105347
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0493993759155273,
      "learning_rate": 0.0003401852011816429,
      "loss": 2.9844,
      "step": 105348
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.11169171333313,
      "learning_rate": 0.0003401811474798783,
      "loss": 2.9923,
      "step": 105349
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.273914337158203,
      "learning_rate": 0.00034017709377064337,
      "loss": 3.0615,
      "step": 105350
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.6384482383728027,
      "learning_rate": 0.0003401730400539388,
      "loss": 2.9303,
      "step": 105351
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0822055339813232,
      "learning_rate": 0.0003401689863297652,
      "loss": 2.8179,
      "step": 105352
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.729588270187378,
      "learning_rate": 0.0003401649325981234,
      "loss": 2.9331,
      "step": 105353
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.411456346511841,
      "learning_rate": 0.00034016087885901437,
      "loss": 2.9456,
      "step": 105354
    },
    {
      "epoch": 1.37,
      "grad_norm": 8.918606758117676,
      "learning_rate": 0.0003401568251124385,
      "loss": 3.1113,
      "step": 105355
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.404843807220459,
      "learning_rate": 0.0003401527713583968,
      "loss": 3.0851,
      "step": 105356
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.660701274871826,
      "learning_rate": 0.0003401487175968899,
      "loss": 3.0526,
      "step": 105357
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.63698673248291,
      "learning_rate": 0.0003401446638279186,
      "loss": 3.0821,
      "step": 105358
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.9525718688964844,
      "learning_rate": 0.0003401406100514837,
      "loss": 3.2597,
      "step": 105359
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.052539825439453,
      "learning_rate": 0.00034013655626758586,
      "loss": 2.957,
      "step": 105360
    },
    {
      "epoch": 1.37,
      "grad_norm": 5.9939470291137695,
      "learning_rate": 0.00034013250247622587,
      "loss": 2.8739,
      "step": 105361
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.82034170627594,
      "learning_rate": 0.00034012844867740445,
      "loss": 2.9898,
      "step": 105362
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.9511055946350098,
      "learning_rate": 0.0003401243948711224,
      "loss": 3.0246,
      "step": 105363
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.329784870147705,
      "learning_rate": 0.0003401203410573806,
      "loss": 3.1597,
      "step": 105364
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.7720742225646973,
      "learning_rate": 0.00034011628723617956,
      "loss": 2.9918,
      "step": 105365
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.5631558895111084,
      "learning_rate": 0.00034011223340752014,
      "loss": 2.9272,
      "step": 105366
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.5108261108398438,
      "learning_rate": 0.00034010817957140315,
      "loss": 2.8989,
      "step": 105367
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.438342571258545,
      "learning_rate": 0.0003401041257278293,
      "loss": 2.9198,
      "step": 105368
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1623497009277344,
      "learning_rate": 0.00034010007187679924,
      "loss": 3.1788,
      "step": 105369
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9229590892791748,
      "learning_rate": 0.00034009601801831386,
      "loss": 2.8995,
      "step": 105370
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6774076223373413,
      "learning_rate": 0.000340091964152374,
      "loss": 3.034,
      "step": 105371
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9509745836257935,
      "learning_rate": 0.0003400879102789801,
      "loss": 3.1278,
      "step": 105372
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2262821197509766,
      "learning_rate": 0.00034008385639813316,
      "loss": 3.1288,
      "step": 105373
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.9050920009613037,
      "learning_rate": 0.0003400798025098339,
      "loss": 2.978,
      "step": 105374
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2213962078094482,
      "learning_rate": 0.00034007574861408306,
      "loss": 2.9874,
      "step": 105375
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.7196922302246094,
      "learning_rate": 0.0003400716947108814,
      "loss": 2.9145,
      "step": 105376
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9326086044311523,
      "learning_rate": 0.00034006764080022955,
      "loss": 3.0172,
      "step": 105377
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.185098648071289,
      "learning_rate": 0.00034006358688212855,
      "loss": 3.0482,
      "step": 105378
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1981427669525146,
      "learning_rate": 0.0003400595329565788,
      "loss": 3.0572,
      "step": 105379
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.67109215259552,
      "learning_rate": 0.00034005547902358124,
      "loss": 3.1529,
      "step": 105380
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.571873426437378,
      "learning_rate": 0.00034005142508313673,
      "loss": 2.8928,
      "step": 105381
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0793309211730957,
      "learning_rate": 0.0003400473711352458,
      "loss": 2.9744,
      "step": 105382
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7122248411178589,
      "learning_rate": 0.0003400433171799094,
      "loss": 3.0105,
      "step": 105383
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6566798686981201,
      "learning_rate": 0.0003400392632171281,
      "loss": 2.8994,
      "step": 105384
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.845799446105957,
      "learning_rate": 0.00034003520924690277,
      "loss": 3.0707,
      "step": 105385
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8324919939041138,
      "learning_rate": 0.0003400311552692342,
      "loss": 3.2052,
      "step": 105386
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3172521591186523,
      "learning_rate": 0.00034002710128412303,
      "loss": 2.9278,
      "step": 105387
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6121606826782227,
      "learning_rate": 0.00034002304729157,
      "loss": 3.2336,
      "step": 105388
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.969266653060913,
      "learning_rate": 0.000340018993291576,
      "loss": 3.0867,
      "step": 105389
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.005497694015503,
      "learning_rate": 0.00034001493928414176,
      "loss": 2.8502,
      "step": 105390
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3178155422210693,
      "learning_rate": 0.0003400108852692679,
      "loss": 2.8506,
      "step": 105391
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7968571186065674,
      "learning_rate": 0.00034000683124695534,
      "loss": 2.8749,
      "step": 105392
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.032635450363159,
      "learning_rate": 0.0003400027772172047,
      "loss": 2.8976,
      "step": 105393
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.855989933013916,
      "learning_rate": 0.00033999872318001684,
      "loss": 2.8397,
      "step": 105394
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9158700704574585,
      "learning_rate": 0.0003399946691353924,
      "loss": 3.0021,
      "step": 105395
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0318498611450195,
      "learning_rate": 0.0003399906150833322,
      "loss": 2.9333,
      "step": 105396
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.151857852935791,
      "learning_rate": 0.00033998656102383705,
      "loss": 2.8759,
      "step": 105397
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.435800313949585,
      "learning_rate": 0.0003399825069569076,
      "loss": 3.1411,
      "step": 105398
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8173887729644775,
      "learning_rate": 0.0003399784528825446,
      "loss": 3.2152,
      "step": 105399
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.044429302215576,
      "learning_rate": 0.00033997439880074904,
      "loss": 3.0194,
      "step": 105400
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.7816569805145264,
      "learning_rate": 0.0003399703447115213,
      "loss": 2.9104,
      "step": 105401
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8195791244506836,
      "learning_rate": 0.00033996629061486234,
      "loss": 2.9218,
      "step": 105402
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8602184057235718,
      "learning_rate": 0.00033996223651077296,
      "loss": 2.8291,
      "step": 105403
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2158567905426025,
      "learning_rate": 0.0003399581823992538,
      "loss": 3.0058,
      "step": 105404
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0293021202087402,
      "learning_rate": 0.00033995412828030565,
      "loss": 2.7733,
      "step": 105405
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2634918689727783,
      "learning_rate": 0.0003399500741539293,
      "loss": 2.7008,
      "step": 105406
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8460789918899536,
      "learning_rate": 0.0003399460200201254,
      "loss": 3.1087,
      "step": 105407
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.14522647857666,
      "learning_rate": 0.00033994196587889484,
      "loss": 3.0792,
      "step": 105408
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7086180448532104,
      "learning_rate": 0.0003399379117302384,
      "loss": 3.146,
      "step": 105409
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.080963373184204,
      "learning_rate": 0.00033993385757415665,
      "loss": 2.7892,
      "step": 105410
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.9794132709503174,
      "learning_rate": 0.00033992980341065044,
      "loss": 2.9703,
      "step": 105411
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0481157302856445,
      "learning_rate": 0.0003399257492397206,
      "loss": 2.7566,
      "step": 105412
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7343909740447998,
      "learning_rate": 0.0003399216950613677,
      "loss": 3.1245,
      "step": 105413
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.370659828186035,
      "learning_rate": 0.0003399176408755927,
      "loss": 2.905,
      "step": 105414
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.835868239402771,
      "learning_rate": 0.0003399135866823962,
      "loss": 3.0816,
      "step": 105415
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.5163378715515137,
      "learning_rate": 0.00033990953248177905,
      "loss": 3.3053,
      "step": 105416
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0772101879119873,
      "learning_rate": 0.0003399054782737419,
      "loss": 3.0204,
      "step": 105417
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7547270059585571,
      "learning_rate": 0.00033990142405828565,
      "loss": 2.9931,
      "step": 105418
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6951814889907837,
      "learning_rate": 0.0003398973698354109,
      "loss": 2.95,
      "step": 105419
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.6325106620788574,
      "learning_rate": 0.0003398933156051185,
      "loss": 3.0993,
      "step": 105420
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8091334104537964,
      "learning_rate": 0.0003398892613674092,
      "loss": 2.7994,
      "step": 105421
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8412840366363525,
      "learning_rate": 0.00033988520712228374,
      "loss": 3.1713,
      "step": 105422
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9341764450073242,
      "learning_rate": 0.00033988115286974285,
      "loss": 2.9706,
      "step": 105423
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6774303913116455,
      "learning_rate": 0.00033987709860978737,
      "loss": 2.8992,
      "step": 105424
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6039212942123413,
      "learning_rate": 0.00033987304434241784,
      "loss": 3.008,
      "step": 105425
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6709061861038208,
      "learning_rate": 0.00033986899006763525,
      "loss": 2.95,
      "step": 105426
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.702652096748352,
      "learning_rate": 0.0003398649357854403,
      "loss": 3.0613,
      "step": 105427
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2071213722229004,
      "learning_rate": 0.00033986088149583356,
      "loss": 2.9077,
      "step": 105428
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.459332227706909,
      "learning_rate": 0.00033985682719881604,
      "loss": 3.0163,
      "step": 105429
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.5452158451080322,
      "learning_rate": 0.00033985277289438843,
      "loss": 2.8133,
      "step": 105430
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8357646465301514,
      "learning_rate": 0.00033984871858255133,
      "loss": 2.9199,
      "step": 105431
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7319190502166748,
      "learning_rate": 0.0003398446642633056,
      "loss": 2.7434,
      "step": 105432
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.0946578979492188,
      "learning_rate": 0.0003398406099366521,
      "loss": 2.9836,
      "step": 105433
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9541226625442505,
      "learning_rate": 0.00033983655560259143,
      "loss": 2.9948,
      "step": 105434
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.028918504714966,
      "learning_rate": 0.00033983250126112433,
      "loss": 3.0077,
      "step": 105435
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0048623085021973,
      "learning_rate": 0.0003398284469122517,
      "loss": 3.0427,
      "step": 105436
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7943428754806519,
      "learning_rate": 0.00033982439255597413,
      "loss": 3.3273,
      "step": 105437
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.688628077507019,
      "learning_rate": 0.0003398203381922925,
      "loss": 3.1081,
      "step": 105438
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7230082750320435,
      "learning_rate": 0.0003398162838212076,
      "loss": 2.8588,
      "step": 105439
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6441354751586914,
      "learning_rate": 0.00033981222944271997,
      "loss": 3.1648,
      "step": 105440
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.650966763496399,
      "learning_rate": 0.00033980817505683056,
      "loss": 2.9281,
      "step": 105441
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.520726203918457,
      "learning_rate": 0.00033980412066354006,
      "loss": 2.8715,
      "step": 105442
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.034203290939331,
      "learning_rate": 0.0003398000662628492,
      "loss": 2.7981,
      "step": 105443
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.030912399291992,
      "learning_rate": 0.0003397960118547587,
      "loss": 2.9611,
      "step": 105444
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.5488579273223877,
      "learning_rate": 0.00033979195743926945,
      "loss": 3.1267,
      "step": 105445
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.443126916885376,
      "learning_rate": 0.0003397879030163821,
      "loss": 3.0838,
      "step": 105446
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.933924913406372,
      "learning_rate": 0.0003397838485860974,
      "loss": 3.1377,
      "step": 105447
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.719904899597168,
      "learning_rate": 0.0003397797941484162,
      "loss": 2.9837,
      "step": 105448
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.893819570541382,
      "learning_rate": 0.0003397757397033391,
      "loss": 3.1305,
      "step": 105449
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.065516948699951,
      "learning_rate": 0.00033977168525086706,
      "loss": 3.1302,
      "step": 105450
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3606879711151123,
      "learning_rate": 0.0003397676307910006,
      "loss": 3.0466,
      "step": 105451
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9269444942474365,
      "learning_rate": 0.00033976357632374066,
      "loss": 2.7919,
      "step": 105452
    },
    {
      "epoch": 1.37,
      "grad_norm": 4.329913139343262,
      "learning_rate": 0.00033975952184908784,
      "loss": 2.8099,
      "step": 105453
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.067943572998047,
      "learning_rate": 0.00033975546736704303,
      "loss": 3.1226,
      "step": 105454
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.736742377281189,
      "learning_rate": 0.0003397514128776069,
      "loss": 3.0124,
      "step": 105455
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7559874057769775,
      "learning_rate": 0.0003397473583807803,
      "loss": 3.1529,
      "step": 105456
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.009113311767578,
      "learning_rate": 0.0003397433038765639,
      "loss": 3.03,
      "step": 105457
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6170470714569092,
      "learning_rate": 0.00033973924936495835,
      "loss": 2.9268,
      "step": 105458
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.5817787647247314,
      "learning_rate": 0.00033973519484596464,
      "loss": 3.0373,
      "step": 105459
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6010066270828247,
      "learning_rate": 0.00033973114031958334,
      "loss": 3.2224,
      "step": 105460
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6865394115447998,
      "learning_rate": 0.00033972708578581536,
      "loss": 2.9345,
      "step": 105461
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8111679553985596,
      "learning_rate": 0.00033972303124466126,
      "loss": 2.931,
      "step": 105462
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6544923782348633,
      "learning_rate": 0.000339718976696122,
      "loss": 2.8398,
      "step": 105463
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7608342170715332,
      "learning_rate": 0.00033971492214019815,
      "loss": 2.9714,
      "step": 105464
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.275686025619507,
      "learning_rate": 0.0003397108675768906,
      "loss": 3.1595,
      "step": 105465
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8125008344650269,
      "learning_rate": 0.00033970681300620007,
      "loss": 2.9332,
      "step": 105466
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.168602228164673,
      "learning_rate": 0.00033970275842812726,
      "loss": 3.1093,
      "step": 105467
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2426400184631348,
      "learning_rate": 0.00033969870384267293,
      "loss": 2.9954,
      "step": 105468
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.741692066192627,
      "learning_rate": 0.00033969464924983784,
      "loss": 3.0171,
      "step": 105469
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9136533737182617,
      "learning_rate": 0.0003396905946496228,
      "loss": 2.807,
      "step": 105470
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3110673427581787,
      "learning_rate": 0.0003396865400420285,
      "loss": 3.0683,
      "step": 105471
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.469770669937134,
      "learning_rate": 0.0003396824854270558,
      "loss": 3.1345,
      "step": 105472
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9851099252700806,
      "learning_rate": 0.0003396784308047054,
      "loss": 2.9651,
      "step": 105473
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7283474206924438,
      "learning_rate": 0.00033967437617497793,
      "loss": 2.9507,
      "step": 105474
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.799531936645508,
      "learning_rate": 0.00033967032153787427,
      "loss": 3.0643,
      "step": 105475
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.626042604446411,
      "learning_rate": 0.0003396662668933952,
      "loss": 2.7511,
      "step": 105476
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1631767749786377,
      "learning_rate": 0.00033966221224154136,
      "loss": 2.8241,
      "step": 105477
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7992258071899414,
      "learning_rate": 0.0003396581575823136,
      "loss": 3.0929,
      "step": 105478
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8617132902145386,
      "learning_rate": 0.0003396541029157126,
      "loss": 2.8188,
      "step": 105479
    },
    {
      "epoch": 1.37,
      "grad_norm": 4.627654552459717,
      "learning_rate": 0.0003396500482417392,
      "loss": 2.8202,
      "step": 105480
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.7898213863372803,
      "learning_rate": 0.00033964599356039413,
      "loss": 3.1875,
      "step": 105481
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9751919507980347,
      "learning_rate": 0.00033964193887167806,
      "loss": 3.0255,
      "step": 105482
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.9767425060272217,
      "learning_rate": 0.00033963788417559185,
      "loss": 2.942,
      "step": 105483
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.330289363861084,
      "learning_rate": 0.00033963382947213615,
      "loss": 2.9052,
      "step": 105484
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2982656955718994,
      "learning_rate": 0.00033962977476131184,
      "loss": 2.996,
      "step": 105485
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.4326939582824707,
      "learning_rate": 0.00033962572004311956,
      "loss": 3.1086,
      "step": 105486
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.7081143856048584,
      "learning_rate": 0.0003396216653175601,
      "loss": 2.8636,
      "step": 105487
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.6784732341766357,
      "learning_rate": 0.0003396176105846343,
      "loss": 3.093,
      "step": 105488
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.113557815551758,
      "learning_rate": 0.0003396135558443428,
      "loss": 3.0474,
      "step": 105489
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9845942258834839,
      "learning_rate": 0.00033960950109668633,
      "loss": 2.9406,
      "step": 105490
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9898098707199097,
      "learning_rate": 0.00033960544634166587,
      "loss": 2.9523,
      "step": 105491
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.292997121810913,
      "learning_rate": 0.0003396013915792819,
      "loss": 2.7932,
      "step": 105492
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7712647914886475,
      "learning_rate": 0.0003395973368095352,
      "loss": 3.1835,
      "step": 105493
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8325141668319702,
      "learning_rate": 0.0003395932820324268,
      "loss": 2.9733,
      "step": 105494
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.674639105796814,
      "learning_rate": 0.00033958922724795706,
      "loss": 3.1684,
      "step": 105495
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7331266403198242,
      "learning_rate": 0.000339585172456127,
      "loss": 3.4072,
      "step": 105496
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.683671474456787,
      "learning_rate": 0.0003395811176569374,
      "loss": 3.0058,
      "step": 105497
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8458983898162842,
      "learning_rate": 0.0003395770628503889,
      "loss": 3.2526,
      "step": 105498
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.841578483581543,
      "learning_rate": 0.0003395730080364822,
      "loss": 3.2018,
      "step": 105499
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9155210256576538,
      "learning_rate": 0.00033956895321521813,
      "loss": 3.1172,
      "step": 105500
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.5829954147338867,
      "learning_rate": 0.0003395648983865975,
      "loss": 2.8381,
      "step": 105501
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3674421310424805,
      "learning_rate": 0.00033956084355062094,
      "loss": 3.1254,
      "step": 105502
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7948919534683228,
      "learning_rate": 0.0003395567887072894,
      "loss": 3.0836,
      "step": 105503
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.679041862487793,
      "learning_rate": 0.00033955273385660343,
      "loss": 2.9749,
      "step": 105504
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.1372299194335938,
      "learning_rate": 0.0003395486789985638,
      "loss": 2.7543,
      "step": 105505
    },
    {
      "epoch": 1.37,
      "grad_norm": 4.455268383026123,
      "learning_rate": 0.00033954462413317147,
      "loss": 2.5952,
      "step": 105506
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.748612403869629,
      "learning_rate": 0.00033954056926042693,
      "loss": 2.9808,
      "step": 105507
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6995868682861328,
      "learning_rate": 0.00033953651438033106,
      "loss": 3.1409,
      "step": 105508
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.756355047225952,
      "learning_rate": 0.0003395324594928847,
      "loss": 2.8353,
      "step": 105509
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.5253958702087402,
      "learning_rate": 0.00033952840459808845,
      "loss": 2.9636,
      "step": 105510
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0017974376678467,
      "learning_rate": 0.00033952434969594306,
      "loss": 3.0986,
      "step": 105511
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.245065927505493,
      "learning_rate": 0.0003395202947864495,
      "loss": 2.9893,
      "step": 105512
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6551426649093628,
      "learning_rate": 0.00033951623986960816,
      "loss": 3.1335,
      "step": 105513
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7308971881866455,
      "learning_rate": 0.00033951218494542013,
      "loss": 2.8904,
      "step": 105514
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7151267528533936,
      "learning_rate": 0.0003395081300138861,
      "loss": 3.0793,
      "step": 105515
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.4685635566711426,
      "learning_rate": 0.00033950407507500667,
      "loss": 2.8128,
      "step": 105516
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6967697143554688,
      "learning_rate": 0.00033950002012878266,
      "loss": 2.9661,
      "step": 105517
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9801527261734009,
      "learning_rate": 0.0003394959651752149,
      "loss": 3.0679,
      "step": 105518
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0342812538146973,
      "learning_rate": 0.0003394919102143041,
      "loss": 3.0343,
      "step": 105519
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.894096851348877,
      "learning_rate": 0.000339487855246051,
      "loss": 2.9169,
      "step": 105520
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9202065467834473,
      "learning_rate": 0.0003394838002704564,
      "loss": 2.9914,
      "step": 105521
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8732234239578247,
      "learning_rate": 0.0003394797452875209,
      "loss": 2.7761,
      "step": 105522
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.5725364685058594,
      "learning_rate": 0.00033947569029724547,
      "loss": 2.8223,
      "step": 105523
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7616506814956665,
      "learning_rate": 0.0003394716352996308,
      "loss": 2.9302,
      "step": 105524
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.024258613586426,
      "learning_rate": 0.0003394675802946775,
      "loss": 2.9727,
      "step": 105525
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6836847066879272,
      "learning_rate": 0.0003394635252823865,
      "loss": 3.105,
      "step": 105526
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.599407196044922,
      "learning_rate": 0.00033945947026275844,
      "loss": 3.0644,
      "step": 105527
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8097485303878784,
      "learning_rate": 0.00033945541523579416,
      "loss": 2.7778,
      "step": 105528
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.052823543548584,
      "learning_rate": 0.00033945136020149434,
      "loss": 3.1525,
      "step": 105529
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7057480812072754,
      "learning_rate": 0.0003394473051598598,
      "loss": 2.9914,
      "step": 105530
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.5991989374160767,
      "learning_rate": 0.00033944325011089124,
      "loss": 3.2149,
      "step": 105531
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8397629261016846,
      "learning_rate": 0.0003394391950545894,
      "loss": 2.7716,
      "step": 105532
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.4887906312942505,
      "learning_rate": 0.00033943513999095515,
      "loss": 2.8866,
      "step": 105533
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7002301216125488,
      "learning_rate": 0.00033943108491998913,
      "loss": 3.0851,
      "step": 105534
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.546161413192749,
      "learning_rate": 0.0003394270298416921,
      "loss": 3.073,
      "step": 105535
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.001314401626587,
      "learning_rate": 0.0003394229747560649,
      "loss": 2.9962,
      "step": 105536
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1072072982788086,
      "learning_rate": 0.0003394189196631082,
      "loss": 3.0748,
      "step": 105537
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.982683539390564,
      "learning_rate": 0.00033941486456282277,
      "loss": 2.9761,
      "step": 105538
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8443095684051514,
      "learning_rate": 0.00033941080945520935,
      "loss": 3.0007,
      "step": 105539
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8277108669281006,
      "learning_rate": 0.0003394067543402688,
      "loss": 3.0368,
      "step": 105540
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6788907051086426,
      "learning_rate": 0.0003394026992180017,
      "loss": 3.0403,
      "step": 105541
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.109013080596924,
      "learning_rate": 0.0003393986440884089,
      "loss": 3.0648,
      "step": 105542
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9563969373703003,
      "learning_rate": 0.00033939458895149117,
      "loss": 3.1614,
      "step": 105543
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8189268112182617,
      "learning_rate": 0.00033939053380724923,
      "loss": 2.7476,
      "step": 105544
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7873108386993408,
      "learning_rate": 0.00033938647865568383,
      "loss": 2.9847,
      "step": 105545
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3693885803222656,
      "learning_rate": 0.0003393824234967958,
      "loss": 2.9191,
      "step": 105546
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.3375070095062256,
      "learning_rate": 0.0003393783683305858,
      "loss": 3.0795,
      "step": 105547
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.676662564277649,
      "learning_rate": 0.00033937431315705463,
      "loss": 3.0277,
      "step": 105548
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.9688987731933594,
      "learning_rate": 0.000339370257976203,
      "loss": 2.8435,
      "step": 105549
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.5309102535247803,
      "learning_rate": 0.0003393662027880317,
      "loss": 2.9279,
      "step": 105550
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0479516983032227,
      "learning_rate": 0.00033936214759254155,
      "loss": 3.2475,
      "step": 105551
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6686948537826538,
      "learning_rate": 0.0003393580923897331,
      "loss": 3.1753,
      "step": 105552
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9981547594070435,
      "learning_rate": 0.0003393540371796073,
      "loss": 2.8661,
      "step": 105553
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.964541435241699,
      "learning_rate": 0.00033934998196216487,
      "loss": 2.9374,
      "step": 105554
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9653897285461426,
      "learning_rate": 0.0003393459267374066,
      "loss": 3.0213,
      "step": 105555
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7790822982788086,
      "learning_rate": 0.00033934187150533307,
      "loss": 2.7894,
      "step": 105556
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.1354920864105225,
      "learning_rate": 0.0003393378162659452,
      "loss": 3.1017,
      "step": 105557
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6527831554412842,
      "learning_rate": 0.0003393337610192437,
      "loss": 3.0498,
      "step": 105558
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0304884910583496,
      "learning_rate": 0.0003393297057652292,
      "loss": 2.9366,
      "step": 105559
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6897635459899902,
      "learning_rate": 0.00033932565050390266,
      "loss": 2.9423,
      "step": 105560
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.039003372192383,
      "learning_rate": 0.0003393215952352648,
      "loss": 2.9964,
      "step": 105561
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0780646800994873,
      "learning_rate": 0.0003393175399593162,
      "loss": 2.844,
      "step": 105562
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7898685932159424,
      "learning_rate": 0.00033931348467605775,
      "loss": 3.1295,
      "step": 105563
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.5790245532989502,
      "learning_rate": 0.0003393094293854902,
      "loss": 2.7756,
      "step": 105564
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.10319185256958,
      "learning_rate": 0.00033930537408761427,
      "loss": 3.0628,
      "step": 105565
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.371511459350586,
      "learning_rate": 0.0003393013187824307,
      "loss": 2.9035,
      "step": 105566
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.817138910293579,
      "learning_rate": 0.0003392972634699403,
      "loss": 2.8085,
      "step": 105567
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7435495853424072,
      "learning_rate": 0.00033929320815014383,
      "loss": 3.0094,
      "step": 105568
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8130207061767578,
      "learning_rate": 0.000339289152823042,
      "loss": 3.1604,
      "step": 105569
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9354645013809204,
      "learning_rate": 0.00033928509748863564,
      "loss": 2.9743,
      "step": 105570
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.269279718399048,
      "learning_rate": 0.0003392810421469253,
      "loss": 3.0565,
      "step": 105571
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.5816816091537476,
      "learning_rate": 0.00033927698679791195,
      "loss": 2.9771,
      "step": 105572
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6711516380310059,
      "learning_rate": 0.0003392729314415963,
      "loss": 3.1197,
      "step": 105573
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.5962237119674683,
      "learning_rate": 0.000339268876077979,
      "loss": 2.8501,
      "step": 105574
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9474353790283203,
      "learning_rate": 0.00033926482070706083,
      "loss": 2.9744,
      "step": 105575
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7311737537384033,
      "learning_rate": 0.00033926076532884275,
      "loss": 3.1635,
      "step": 105576
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2286128997802734,
      "learning_rate": 0.00033925670994332523,
      "loss": 2.8992,
      "step": 105577
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7792892456054688,
      "learning_rate": 0.00033925265455050913,
      "loss": 3.2802,
      "step": 105578
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.078836679458618,
      "learning_rate": 0.0003392485991503953,
      "loss": 3.0515,
      "step": 105579
    },
    {
      "epoch": 1.37,
      "grad_norm": 5.4723076820373535,
      "learning_rate": 0.0003392445437429844,
      "loss": 3.1594,
      "step": 105580
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0483360290527344,
      "learning_rate": 0.0003392404883282771,
      "loss": 2.9633,
      "step": 105581
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.8726526498794556,
      "learning_rate": 0.00033923643290627444,
      "loss": 3.133,
      "step": 105582
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9862186908721924,
      "learning_rate": 0.0003392323774769769,
      "loss": 3.0712,
      "step": 105583
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9511382579803467,
      "learning_rate": 0.00033922832204038526,
      "loss": 3.177,
      "step": 105584
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6175498962402344,
      "learning_rate": 0.0003392242665965004,
      "loss": 2.9114,
      "step": 105585
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9453741312026978,
      "learning_rate": 0.00033922021114532295,
      "loss": 2.8658,
      "step": 105586
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6180286407470703,
      "learning_rate": 0.0003392161556868537,
      "loss": 2.9796,
      "step": 105587
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.084965705871582,
      "learning_rate": 0.00033921210022109355,
      "loss": 3.0772,
      "step": 105588
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9001704454421997,
      "learning_rate": 0.000339208044748043,
      "loss": 3.0748,
      "step": 105589
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6372933387756348,
      "learning_rate": 0.000339203989267703,
      "loss": 3.0444,
      "step": 105590
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.6617668867111206,
      "learning_rate": 0.0003391999337800743,
      "loss": 3.1795,
      "step": 105591
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.5651737451553345,
      "learning_rate": 0.00033919587828515747,
      "loss": 2.7195,
      "step": 105592
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3362808227539062,
      "learning_rate": 0.00033919182278295345,
      "loss": 2.9558,
      "step": 105593
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.9928749799728394,
      "learning_rate": 0.00033918776727346294,
      "loss": 2.834,
      "step": 105594
    },
    {
      "epoch": 1.37,
      "grad_norm": 1.7320020198822021,
      "learning_rate": 0.00033918371175668664,
      "loss": 2.8945,
      "step": 105595
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.3793141841888428,
      "learning_rate": 0.0003391796562326254,
      "loss": 3.0852,
      "step": 105596
    },
    {
      "epoch": 1.37,
      "grad_norm": 3.0727083683013916,
      "learning_rate": 0.0003391756007012799,
      "loss": 2.6181,
      "step": 105597
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.0802571773529053,
      "learning_rate": 0.00033917154516265095,
      "loss": 3.0056,
      "step": 105598
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.2092273235321045,
      "learning_rate": 0.00033916748961673916,
      "loss": 2.8962,
      "step": 105599
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.4236509799957275,
      "learning_rate": 0.00033916343406354543,
      "loss": 3.065,
      "step": 105600
    },
    {
      "epoch": 1.37,
      "grad_norm": 2.3926353454589844,
      "learning_rate": 0.0003391593785030706,
      "loss": 3.01,
      "step": 105601
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8388240337371826,
      "learning_rate": 0.00033915532293531515,
      "loss": 3.101,
      "step": 105602
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.5744237899780273,
      "learning_rate": 0.0003391512673602801,
      "loss": 3.0057,
      "step": 105603
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.024691343307495,
      "learning_rate": 0.0003391472117779661,
      "loss": 2.8117,
      "step": 105604
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.175107717514038,
      "learning_rate": 0.0003391431561883738,
      "loss": 2.9907,
      "step": 105605
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8776707649230957,
      "learning_rate": 0.0003391391005915041,
      "loss": 3.1409,
      "step": 105606
    },
    {
      "epoch": 1.38,
      "grad_norm": 4.403399467468262,
      "learning_rate": 0.0003391350449873577,
      "loss": 2.9008,
      "step": 105607
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.4214084148406982,
      "learning_rate": 0.0003391309893759354,
      "loss": 2.8637,
      "step": 105608
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7263118028640747,
      "learning_rate": 0.00033912693375723774,
      "loss": 3.1073,
      "step": 105609
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.732574701309204,
      "learning_rate": 0.00033912287813126586,
      "loss": 3.018,
      "step": 105610
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.25258207321167,
      "learning_rate": 0.0003391188224980201,
      "loss": 3.1988,
      "step": 105611
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0804603099823,
      "learning_rate": 0.0003391147668575016,
      "loss": 3.0042,
      "step": 105612
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6969666481018066,
      "learning_rate": 0.00033911071120971086,
      "loss": 2.891,
      "step": 105613
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.2525041103363037,
      "learning_rate": 0.00033910665555464864,
      "loss": 3.2426,
      "step": 105614
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.609766721725464,
      "learning_rate": 0.00033910259989231576,
      "loss": 3.0257,
      "step": 105615
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9772183895111084,
      "learning_rate": 0.0003390985442227131,
      "loss": 3.031,
      "step": 105616
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8630506992340088,
      "learning_rate": 0.0003390944885458411,
      "loss": 3.0246,
      "step": 105617
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.8047561645507812,
      "learning_rate": 0.0003390904328617008,
      "loss": 3.0424,
      "step": 105618
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.9793341159820557,
      "learning_rate": 0.0003390863771702929,
      "loss": 2.9538,
      "step": 105619
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.680962324142456,
      "learning_rate": 0.000339082321471618,
      "loss": 2.8968,
      "step": 105620
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8011332750320435,
      "learning_rate": 0.00033907826576567697,
      "loss": 3.0818,
      "step": 105621
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.9393019676208496,
      "learning_rate": 0.0003390742100524706,
      "loss": 2.9102,
      "step": 105622
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.123898506164551,
      "learning_rate": 0.00033907015433199956,
      "loss": 3.088,
      "step": 105623
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7032790184020996,
      "learning_rate": 0.0003390660986042647,
      "loss": 2.9091,
      "step": 105624
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6909711360931396,
      "learning_rate": 0.0003390620428692667,
      "loss": 2.8145,
      "step": 105625
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.025770902633667,
      "learning_rate": 0.00033905798712700625,
      "loss": 2.8957,
      "step": 105626
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0111725330352783,
      "learning_rate": 0.0003390539313774842,
      "loss": 2.8341,
      "step": 105627
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8200418949127197,
      "learning_rate": 0.0003390498756207014,
      "loss": 3.1831,
      "step": 105628
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0335168838500977,
      "learning_rate": 0.00033904581985665837,
      "loss": 3.0921,
      "step": 105629
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.6521339416503906,
      "learning_rate": 0.000339041764085356,
      "loss": 3.2293,
      "step": 105630
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.627055048942566,
      "learning_rate": 0.00033903770830679504,
      "loss": 2.792,
      "step": 105631
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8817498683929443,
      "learning_rate": 0.00033903365252097636,
      "loss": 2.9884,
      "step": 105632
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8737068176269531,
      "learning_rate": 0.0003390295967279004,
      "loss": 2.8494,
      "step": 105633
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.061142921447754,
      "learning_rate": 0.00033902554092756816,
      "loss": 3.0126,
      "step": 105634
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1373655796051025,
      "learning_rate": 0.0003390214851199803,
      "loss": 3.0048,
      "step": 105635
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.1926920413970947,
      "learning_rate": 0.00033901742930513777,
      "loss": 3.2145,
      "step": 105636
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3376212120056152,
      "learning_rate": 0.000339013373483041,
      "loss": 3.2301,
      "step": 105637
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.777447462081909,
      "learning_rate": 0.00033900931765369093,
      "loss": 3.0041,
      "step": 105638
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.077812433242798,
      "learning_rate": 0.0003390052618170884,
      "loss": 3.029,
      "step": 105639
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.378411054611206,
      "learning_rate": 0.00033900120597323393,
      "loss": 3.1961,
      "step": 105640
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9312636852264404,
      "learning_rate": 0.00033899715012212844,
      "loss": 3.1183,
      "step": 105641
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.2836062908172607,
      "learning_rate": 0.0003389930942637726,
      "loss": 3.1102,
      "step": 105642
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5776381492614746,
      "learning_rate": 0.0003389890383981673,
      "loss": 3.0622,
      "step": 105643
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8272714614868164,
      "learning_rate": 0.0003389849825253132,
      "loss": 3.0049,
      "step": 105644
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.4747605323791504,
      "learning_rate": 0.000338980926645211,
      "loss": 2.8393,
      "step": 105645
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1932995319366455,
      "learning_rate": 0.00033897687075786154,
      "loss": 2.8396,
      "step": 105646
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8881112337112427,
      "learning_rate": 0.00033897281486326556,
      "loss": 3.0532,
      "step": 105647
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9815516471862793,
      "learning_rate": 0.0003389687589614237,
      "loss": 2.8921,
      "step": 105648
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9326969385147095,
      "learning_rate": 0.0003389647030523369,
      "loss": 2.9694,
      "step": 105649
    },
    {
      "epoch": 1.38,
      "grad_norm": 4.21052885055542,
      "learning_rate": 0.0003389606471360059,
      "loss": 3.109,
      "step": 105650
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.391937255859375,
      "learning_rate": 0.0003389565912124312,
      "loss": 2.9924,
      "step": 105651
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.320190906524658,
      "learning_rate": 0.00033895253528161385,
      "loss": 3.1213,
      "step": 105652
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8531205654144287,
      "learning_rate": 0.0003389484793435544,
      "loss": 3.1351,
      "step": 105653
    },
    {
      "epoch": 1.38,
      "grad_norm": 4.333937644958496,
      "learning_rate": 0.0003389444233982538,
      "loss": 3.1377,
      "step": 105654
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.8882651329040527,
      "learning_rate": 0.00033894036744571267,
      "loss": 2.8831,
      "step": 105655
    },
    {
      "epoch": 1.38,
      "grad_norm": 4.153658390045166,
      "learning_rate": 0.00033893631148593174,
      "loss": 3.0377,
      "step": 105656
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7503550052642822,
      "learning_rate": 0.00033893225551891187,
      "loss": 3.0634,
      "step": 105657
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3942651748657227,
      "learning_rate": 0.00033892819954465375,
      "loss": 3.1448,
      "step": 105658
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.544825315475464,
      "learning_rate": 0.00033892414356315816,
      "loss": 3.0712,
      "step": 105659
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.648554801940918,
      "learning_rate": 0.0003389200875744257,
      "loss": 2.9312,
      "step": 105660
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8439216613769531,
      "learning_rate": 0.0003389160315784574,
      "loss": 2.9331,
      "step": 105661
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5599151849746704,
      "learning_rate": 0.0003389119755752539,
      "loss": 2.9538,
      "step": 105662
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.592259645462036,
      "learning_rate": 0.00033890791956481584,
      "loss": 2.6525,
      "step": 105663
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1196815967559814,
      "learning_rate": 0.00033890386354714405,
      "loss": 3.1632,
      "step": 105664
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8585922718048096,
      "learning_rate": 0.00033889980752223943,
      "loss": 2.8855,
      "step": 105665
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8946224451065063,
      "learning_rate": 0.0003388957514901025,
      "loss": 3.0179,
      "step": 105666
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.638540506362915,
      "learning_rate": 0.0003388916954507341,
      "loss": 3.1049,
      "step": 105667
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.4568073749542236,
      "learning_rate": 0.00033888763940413506,
      "loss": 2.8765,
      "step": 105668
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6640204191207886,
      "learning_rate": 0.00033888358335030605,
      "loss": 3.1337,
      "step": 105669
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9781908988952637,
      "learning_rate": 0.00033887952728924774,
      "loss": 2.955,
      "step": 105670
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.241872787475586,
      "learning_rate": 0.00033887547122096116,
      "loss": 3.2866,
      "step": 105671
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.232914686203003,
      "learning_rate": 0.00033887141514544686,
      "loss": 3.1854,
      "step": 105672
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7738128900527954,
      "learning_rate": 0.0003388673590627055,
      "loss": 3.0694,
      "step": 105673
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.8461244106292725,
      "learning_rate": 0.00033886330297273816,
      "loss": 3.1168,
      "step": 105674
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8167833089828491,
      "learning_rate": 0.0003388592468755453,
      "loss": 2.91,
      "step": 105675
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6778771877288818,
      "learning_rate": 0.0003388551907711277,
      "loss": 2.8153,
      "step": 105676
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6573824882507324,
      "learning_rate": 0.00033885113465948635,
      "loss": 3.0097,
      "step": 105677
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.6663553714752197,
      "learning_rate": 0.0003388470785406217,
      "loss": 3.0561,
      "step": 105678
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6203376054763794,
      "learning_rate": 0.00033884302241453466,
      "loss": 3.243,
      "step": 105679
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6184593439102173,
      "learning_rate": 0.00033883896628122604,
      "loss": 2.9708,
      "step": 105680
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.2590627670288086,
      "learning_rate": 0.00033883491014069647,
      "loss": 3.1492,
      "step": 105681
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9764810800552368,
      "learning_rate": 0.00033883085399294676,
      "loss": 3.0382,
      "step": 105682
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.08335018157959,
      "learning_rate": 0.00033882679783797773,
      "loss": 2.9429,
      "step": 105683
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.184372663497925,
      "learning_rate": 0.00033882274167579,
      "loss": 2.7909,
      "step": 105684
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8891681432724,
      "learning_rate": 0.0003388186855063844,
      "loss": 2.81,
      "step": 105685
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1402580738067627,
      "learning_rate": 0.0003388146293297617,
      "loss": 3.048,
      "step": 105686
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.360297679901123,
      "learning_rate": 0.00033881057314592253,
      "loss": 2.7253,
      "step": 105687
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7559171915054321,
      "learning_rate": 0.00033880651695486784,
      "loss": 3.0733,
      "step": 105688
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.937717318534851,
      "learning_rate": 0.0003388024607565983,
      "loss": 2.9404,
      "step": 105689
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.920597791671753,
      "learning_rate": 0.0003387984045511146,
      "loss": 3.1308,
      "step": 105690
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3156113624572754,
      "learning_rate": 0.0003387943483384175,
      "loss": 3.1344,
      "step": 105691
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1649796962738037,
      "learning_rate": 0.00033879029211850793,
      "loss": 2.835,
      "step": 105692
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6724320650100708,
      "learning_rate": 0.00033878623589138644,
      "loss": 3.0646,
      "step": 105693
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3616244792938232,
      "learning_rate": 0.0003387821796570538,
      "loss": 2.9603,
      "step": 105694
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9941271543502808,
      "learning_rate": 0.0003387781234155109,
      "loss": 2.8865,
      "step": 105695
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9354499578475952,
      "learning_rate": 0.00033877406716675834,
      "loss": 3.1,
      "step": 105696
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3455471992492676,
      "learning_rate": 0.000338770010910797,
      "loss": 3.3573,
      "step": 105697
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8225290775299072,
      "learning_rate": 0.0003387659546476276,
      "loss": 2.793,
      "step": 105698
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9025382995605469,
      "learning_rate": 0.0003387618983772508,
      "loss": 3.0695,
      "step": 105699
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6039925813674927,
      "learning_rate": 0.0003387578420996675,
      "loss": 3.0142,
      "step": 105700
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6638072729110718,
      "learning_rate": 0.0003387537858148784,
      "loss": 2.7885,
      "step": 105701
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1105644702911377,
      "learning_rate": 0.0003387497295228843,
      "loss": 2.8631,
      "step": 105702
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.8350656032562256,
      "learning_rate": 0.0003387456732236857,
      "loss": 2.9412,
      "step": 105703
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9779770374298096,
      "learning_rate": 0.0003387416169172837,
      "loss": 2.7612,
      "step": 105704
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.7958152294158936,
      "learning_rate": 0.0003387375606036789,
      "loss": 2.8221,
      "step": 105705
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.9187893867492676,
      "learning_rate": 0.00033873350428287194,
      "loss": 3.0955,
      "step": 105706
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0973119735717773,
      "learning_rate": 0.0003387294479548638,
      "loss": 2.9524,
      "step": 105707
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.3705971240997314,
      "learning_rate": 0.0003387253916196551,
      "loss": 3.0894,
      "step": 105708
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.569892406463623,
      "learning_rate": 0.0003387213352772466,
      "loss": 2.8597,
      "step": 105709
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.8811371326446533,
      "learning_rate": 0.00033871727892763906,
      "loss": 2.9564,
      "step": 105710
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.206648588180542,
      "learning_rate": 0.00033871322257083327,
      "loss": 3.1604,
      "step": 105711
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.429337501525879,
      "learning_rate": 0.00033870916620682995,
      "loss": 2.7781,
      "step": 105712
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.000088930130005,
      "learning_rate": 0.0003387051098356299,
      "loss": 3.3616,
      "step": 105713
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8191795349121094,
      "learning_rate": 0.0003387010534572338,
      "loss": 2.8763,
      "step": 105714
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6560158729553223,
      "learning_rate": 0.00033869699707164247,
      "loss": 2.9045,
      "step": 105715
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9412108659744263,
      "learning_rate": 0.0003386929406788566,
      "loss": 3.0278,
      "step": 105716
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9708791971206665,
      "learning_rate": 0.00033868888427887703,
      "loss": 3.0304,
      "step": 105717
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5888988971710205,
      "learning_rate": 0.0003386848278717044,
      "loss": 2.9369,
      "step": 105718
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6991493701934814,
      "learning_rate": 0.0003386807714573396,
      "loss": 3.032,
      "step": 105719
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8222485780715942,
      "learning_rate": 0.00033867671503578334,
      "loss": 3.0777,
      "step": 105720
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6324723958969116,
      "learning_rate": 0.00033867265860703624,
      "loss": 2.9878,
      "step": 105721
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9699015617370605,
      "learning_rate": 0.0003386686021710992,
      "loss": 2.9308,
      "step": 105722
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.4218170642852783,
      "learning_rate": 0.0003386645457279729,
      "loss": 2.9091,
      "step": 105723
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3920421600341797,
      "learning_rate": 0.0003386604892776582,
      "loss": 3.0278,
      "step": 105724
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.69319486618042,
      "learning_rate": 0.0003386564328201558,
      "loss": 3.0103,
      "step": 105725
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8175121545791626,
      "learning_rate": 0.0003386523763554664,
      "loss": 3.1121,
      "step": 105726
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.082537889480591,
      "learning_rate": 0.0003386483198835908,
      "loss": 3.1339,
      "step": 105727
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.662683367729187,
      "learning_rate": 0.0003386442634045298,
      "loss": 2.9231,
      "step": 105728
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7599633932113647,
      "learning_rate": 0.000338640206918284,
      "loss": 2.8777,
      "step": 105729
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7793307304382324,
      "learning_rate": 0.00033863615042485434,
      "loss": 3.152,
      "step": 105730
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3148763179779053,
      "learning_rate": 0.00033863209392424144,
      "loss": 2.9555,
      "step": 105731
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.4530510902404785,
      "learning_rate": 0.00033862803741644613,
      "loss": 3.0744,
      "step": 105732
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8828107118606567,
      "learning_rate": 0.0003386239809014691,
      "loss": 2.9726,
      "step": 105733
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1228091716766357,
      "learning_rate": 0.0003386199243793111,
      "loss": 3.0277,
      "step": 105734
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3624560832977295,
      "learning_rate": 0.0003386158678499731,
      "loss": 2.8851,
      "step": 105735
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1050896644592285,
      "learning_rate": 0.0003386118113134556,
      "loss": 3.0908,
      "step": 105736
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0845746994018555,
      "learning_rate": 0.0003386077547697594,
      "loss": 2.9613,
      "step": 105737
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.993898630142212,
      "learning_rate": 0.0003386036982188853,
      "loss": 2.9463,
      "step": 105738
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.5446906089782715,
      "learning_rate": 0.00033859964166083403,
      "loss": 2.9592,
      "step": 105739
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9548683166503906,
      "learning_rate": 0.00033859558509560637,
      "loss": 2.8443,
      "step": 105740
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1017019748687744,
      "learning_rate": 0.00033859152852320313,
      "loss": 2.8215,
      "step": 105741
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.659285068511963,
      "learning_rate": 0.00033858747194362497,
      "loss": 2.7726,
      "step": 105742
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1052072048187256,
      "learning_rate": 0.0003385834153568726,
      "loss": 2.9753,
      "step": 105743
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1475980281829834,
      "learning_rate": 0.0003385793587629469,
      "loss": 2.8407,
      "step": 105744
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9002454280853271,
      "learning_rate": 0.0003385753021618486,
      "loss": 2.9615,
      "step": 105745
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.547553539276123,
      "learning_rate": 0.0003385712455535783,
      "loss": 2.9736,
      "step": 105746
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.681522011756897,
      "learning_rate": 0.000338567188938137,
      "loss": 2.9783,
      "step": 105747
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8982020616531372,
      "learning_rate": 0.0003385631323155253,
      "loss": 3.0853,
      "step": 105748
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.672398328781128,
      "learning_rate": 0.0003385590756857439,
      "loss": 2.9653,
      "step": 105749
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.794835090637207,
      "learning_rate": 0.0003385550190487938,
      "loss": 3.1567,
      "step": 105750
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9284765720367432,
      "learning_rate": 0.0003385509624046755,
      "loss": 2.946,
      "step": 105751
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.82673978805542,
      "learning_rate": 0.0003385469057533898,
      "loss": 3.0026,
      "step": 105752
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0730397701263428,
      "learning_rate": 0.0003385428490949376,
      "loss": 2.996,
      "step": 105753
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8523433208465576,
      "learning_rate": 0.00033853879242931947,
      "loss": 3.0383,
      "step": 105754
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7802133560180664,
      "learning_rate": 0.0003385347357565363,
      "loss": 2.9326,
      "step": 105755
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7467551231384277,
      "learning_rate": 0.0003385306790765888,
      "loss": 3.1292,
      "step": 105756
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5478485822677612,
      "learning_rate": 0.0003385266223894777,
      "loss": 3.0208,
      "step": 105757
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.749350666999817,
      "learning_rate": 0.00033852256569520374,
      "loss": 3.0208,
      "step": 105758
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3291618824005127,
      "learning_rate": 0.00033851850899376787,
      "loss": 2.9711,
      "step": 105759
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7268329858779907,
      "learning_rate": 0.0003385144522851705,
      "loss": 3.2636,
      "step": 105760
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0579917430877686,
      "learning_rate": 0.0003385103955694126,
      "loss": 2.9864,
      "step": 105761
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.4662351608276367,
      "learning_rate": 0.00033850633884649494,
      "loss": 2.9468,
      "step": 105762
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9118245840072632,
      "learning_rate": 0.00033850228211641825,
      "loss": 2.9972,
      "step": 105763
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1368699073791504,
      "learning_rate": 0.00033849822537918315,
      "loss": 3.1792,
      "step": 105764
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.156590461730957,
      "learning_rate": 0.0003384941686347906,
      "loss": 3.0008,
      "step": 105765
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.071078062057495,
      "learning_rate": 0.00033849011188324127,
      "loss": 2.9524,
      "step": 105766
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8039960861206055,
      "learning_rate": 0.0003384860551245358,
      "loss": 3.1981,
      "step": 105767
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.3107588291168213,
      "learning_rate": 0.00033848199835867515,
      "loss": 3.205,
      "step": 105768
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.407977342605591,
      "learning_rate": 0.00033847794158565987,
      "loss": 3.0643,
      "step": 105769
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.765774965286255,
      "learning_rate": 0.0003384738848054909,
      "loss": 2.8969,
      "step": 105770
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.649840235710144,
      "learning_rate": 0.0003384698280181689,
      "loss": 3.0488,
      "step": 105771
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0880587100982666,
      "learning_rate": 0.0003384657712236946,
      "loss": 2.9011,
      "step": 105772
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.0504865646362305,
      "learning_rate": 0.0003384617144220688,
      "loss": 2.9444,
      "step": 105773
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.7136383056640625,
      "learning_rate": 0.0003384576576132923,
      "loss": 3.0833,
      "step": 105774
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7704013586044312,
      "learning_rate": 0.0003384536007973657,
      "loss": 2.8693,
      "step": 105775
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.2811214923858643,
      "learning_rate": 0.0003384495439742898,
      "loss": 2.8083,
      "step": 105776
    },
    {
      "epoch": 1.38,
      "grad_norm": 6.891556739807129,
      "learning_rate": 0.0003384454871440655,
      "loss": 2.9184,
      "step": 105777
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.341418981552124,
      "learning_rate": 0.00033844143030669347,
      "loss": 2.9015,
      "step": 105778
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.760075330734253,
      "learning_rate": 0.00033843737346217435,
      "loss": 3.0196,
      "step": 105779
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3188743591308594,
      "learning_rate": 0.00033843331661050915,
      "loss": 2.7785,
      "step": 105780
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.0708327293395996,
      "learning_rate": 0.0003384292597516984,
      "loss": 3.1151,
      "step": 105781
    },
    {
      "epoch": 1.38,
      "grad_norm": 4.36785364151001,
      "learning_rate": 0.00033842520288574293,
      "loss": 2.7775,
      "step": 105782
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.5595390796661377,
      "learning_rate": 0.00033842114601264343,
      "loss": 3.1126,
      "step": 105783
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8170676231384277,
      "learning_rate": 0.00033841708913240077,
      "loss": 3.0855,
      "step": 105784
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.752704381942749,
      "learning_rate": 0.00033841303224501566,
      "loss": 3.297,
      "step": 105785
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.180177927017212,
      "learning_rate": 0.00033840897535048885,
      "loss": 3.0336,
      "step": 105786
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.7323477268218994,
      "learning_rate": 0.000338404918448821,
      "loss": 2.7451,
      "step": 105787
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6551482677459717,
      "learning_rate": 0.000338400861540013,
      "loss": 3.0082,
      "step": 105788
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9001295566558838,
      "learning_rate": 0.0003383968046240656,
      "loss": 3.0618,
      "step": 105789
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.243385076522827,
      "learning_rate": 0.0003383927477009795,
      "loss": 2.9392,
      "step": 105790
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9738513231277466,
      "learning_rate": 0.0003383886907707554,
      "loss": 3.0475,
      "step": 105791
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.154951333999634,
      "learning_rate": 0.0003383846338333942,
      "loss": 3.313,
      "step": 105792
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8167673349380493,
      "learning_rate": 0.0003383805768888965,
      "loss": 3.1998,
      "step": 105793
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.726668357849121,
      "learning_rate": 0.00033837651993726315,
      "loss": 2.7247,
      "step": 105794
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.470374584197998,
      "learning_rate": 0.0003383724629784948,
      "loss": 2.8257,
      "step": 105795
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6151472330093384,
      "learning_rate": 0.0003383684060125925,
      "loss": 2.9847,
      "step": 105796
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.098587989807129,
      "learning_rate": 0.0003383643490395566,
      "loss": 2.7679,
      "step": 105797
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.898758888244629,
      "learning_rate": 0.0003383602920593881,
      "loss": 3.0801,
      "step": 105798
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8136755228042603,
      "learning_rate": 0.0003383562350720877,
      "loss": 3.0228,
      "step": 105799
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.035238742828369,
      "learning_rate": 0.0003383521780776562,
      "loss": 2.7783,
      "step": 105800
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.700548529624939,
      "learning_rate": 0.0003383481210760942,
      "loss": 2.8847,
      "step": 105801
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0751631259918213,
      "learning_rate": 0.00033834406406740267,
      "loss": 3.1405,
      "step": 105802
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.6061723232269287,
      "learning_rate": 0.0003383400070515822,
      "loss": 3.0463,
      "step": 105803
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1293535232543945,
      "learning_rate": 0.00033833595002863355,
      "loss": 2.8221,
      "step": 105804
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8225573301315308,
      "learning_rate": 0.00033833189299855757,
      "loss": 2.9471,
      "step": 105805
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.069685935974121,
      "learning_rate": 0.000338327835961355,
      "loss": 3.1648,
      "step": 105806
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9432404041290283,
      "learning_rate": 0.0003383237789170265,
      "loss": 3.0171,
      "step": 105807
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.015606164932251,
      "learning_rate": 0.0003383197218655729,
      "loss": 3.127,
      "step": 105808
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7459495067596436,
      "learning_rate": 0.00033831566480699505,
      "loss": 2.8601,
      "step": 105809
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7452208995819092,
      "learning_rate": 0.00033831160774129343,
      "loss": 2.9258,
      "step": 105810
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6473089456558228,
      "learning_rate": 0.00033830755066846903,
      "loss": 2.9784,
      "step": 105811
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8150776624679565,
      "learning_rate": 0.00033830349358852257,
      "loss": 2.778,
      "step": 105812
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.9064741134643555,
      "learning_rate": 0.00033829943650145474,
      "loss": 2.7221,
      "step": 105813
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.566036343574524,
      "learning_rate": 0.00033829537940726625,
      "loss": 2.9485,
      "step": 105814
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6932446956634521,
      "learning_rate": 0.000338291322305958,
      "loss": 3.0441,
      "step": 105815
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6338444948196411,
      "learning_rate": 0.0003382872651975307,
      "loss": 2.8768,
      "step": 105816
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6855823993682861,
      "learning_rate": 0.000338283208081985,
      "loss": 2.9298,
      "step": 105817
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6187375783920288,
      "learning_rate": 0.0003382791509593218,
      "loss": 3.2104,
      "step": 105818
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5811469554901123,
      "learning_rate": 0.0003382750938295417,
      "loss": 2.9438,
      "step": 105819
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3524258136749268,
      "learning_rate": 0.0003382710366926456,
      "loss": 2.825,
      "step": 105820
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.839320182800293,
      "learning_rate": 0.0003382669795486342,
      "loss": 3.1537,
      "step": 105821
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6438318490982056,
      "learning_rate": 0.0003382629223975082,
      "loss": 3.0313,
      "step": 105822
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5691704750061035,
      "learning_rate": 0.00033825886523926844,
      "loss": 3.2047,
      "step": 105823
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.817385196685791,
      "learning_rate": 0.00033825480807391563,
      "loss": 2.7194,
      "step": 105824
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8480591773986816,
      "learning_rate": 0.0003382507509014505,
      "loss": 2.9734,
      "step": 105825
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.127574920654297,
      "learning_rate": 0.0003382466937218739,
      "loss": 3.1607,
      "step": 105826
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8782988786697388,
      "learning_rate": 0.0003382426365351865,
      "loss": 3.0647,
      "step": 105827
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8755689859390259,
      "learning_rate": 0.0003382385793413889,
      "loss": 3.0041,
      "step": 105828
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3471593856811523,
      "learning_rate": 0.0003382345221404823,
      "loss": 3.38,
      "step": 105829
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8861206769943237,
      "learning_rate": 0.000338230464932467,
      "loss": 2.9375,
      "step": 105830
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.270726442337036,
      "learning_rate": 0.00033822640771734395,
      "loss": 2.8682,
      "step": 105831
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.681441068649292,
      "learning_rate": 0.00033822235049511395,
      "loss": 3.1233,
      "step": 105832
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1898231506347656,
      "learning_rate": 0.00033821829326577765,
      "loss": 2.976,
      "step": 105833
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7953628301620483,
      "learning_rate": 0.00033821423602933587,
      "loss": 3.0166,
      "step": 105834
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.4543246030807495,
      "learning_rate": 0.0003382101787857894,
      "loss": 3.0657,
      "step": 105835
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8116347789764404,
      "learning_rate": 0.0003382061215351388,
      "loss": 3.0578,
      "step": 105836
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6872612237930298,
      "learning_rate": 0.0003382020642773851,
      "loss": 3.047,
      "step": 105837
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.047888994216919,
      "learning_rate": 0.00033819800701252885,
      "loss": 2.8803,
      "step": 105838
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.845137596130371,
      "learning_rate": 0.00033819394974057083,
      "loss": 2.9235,
      "step": 105839
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6597459316253662,
      "learning_rate": 0.00033818989246151183,
      "loss": 2.9923,
      "step": 105840
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7286083698272705,
      "learning_rate": 0.0003381858351753527,
      "loss": 2.9744,
      "step": 105841
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.960666537284851,
      "learning_rate": 0.00033818177788209403,
      "loss": 3.2246,
      "step": 105842
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.418991804122925,
      "learning_rate": 0.0003381777205817367,
      "loss": 2.8024,
      "step": 105843
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.027517318725586,
      "learning_rate": 0.0003381736632742814,
      "loss": 2.9183,
      "step": 105844
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5896085500717163,
      "learning_rate": 0.0003381696059597289,
      "loss": 3.2462,
      "step": 105845
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.7917885780334473,
      "learning_rate": 0.0003381655486380798,
      "loss": 3.2021,
      "step": 105846
    },
    {
      "epoch": 1.38,
      "grad_norm": 5.555025577545166,
      "learning_rate": 0.00033816149130933525,
      "loss": 3.0975,
      "step": 105847
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.7074434757232666,
      "learning_rate": 0.0003381574339734956,
      "loss": 2.9017,
      "step": 105848
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9860424995422363,
      "learning_rate": 0.0003381533766305618,
      "loss": 2.9733,
      "step": 105849
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.5072476863861084,
      "learning_rate": 0.00033814931928053456,
      "loss": 2.9407,
      "step": 105850
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.9805047512054443,
      "learning_rate": 0.0003381452619234147,
      "loss": 2.9591,
      "step": 105851
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.6959235668182373,
      "learning_rate": 0.00033814120455920283,
      "loss": 3.0015,
      "step": 105852
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.566420555114746,
      "learning_rate": 0.0003381371471878999,
      "loss": 3.3115,
      "step": 105853
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.6616134643554688,
      "learning_rate": 0.0003381330898095064,
      "loss": 2.8594,
      "step": 105854
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.8693673610687256,
      "learning_rate": 0.0003381290324240233,
      "loss": 2.9935,
      "step": 105855
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.956644058227539,
      "learning_rate": 0.0003381249750314514,
      "loss": 3.0778,
      "step": 105856
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.855049967765808,
      "learning_rate": 0.0003381209176317912,
      "loss": 2.8666,
      "step": 105857
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.59955096244812,
      "learning_rate": 0.0003381168602250436,
      "loss": 2.7886,
      "step": 105858
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8780272006988525,
      "learning_rate": 0.0003381128028112095,
      "loss": 2.8908,
      "step": 105859
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.6698224544525146,
      "learning_rate": 0.00033810874539028936,
      "loss": 2.8575,
      "step": 105860
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.547221302986145,
      "learning_rate": 0.0003381046879622842,
      "loss": 2.9345,
      "step": 105861
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.297863721847534,
      "learning_rate": 0.00033810063052719456,
      "loss": 2.8925,
      "step": 105862
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.34084415435791,
      "learning_rate": 0.00033809657308502136,
      "loss": 3.2233,
      "step": 105863
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8291867971420288,
      "learning_rate": 0.0003380925156357653,
      "loss": 2.9272,
      "step": 105864
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6686276197433472,
      "learning_rate": 0.0003380884581794271,
      "loss": 2.7843,
      "step": 105865
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.380162000656128,
      "learning_rate": 0.0003380844007160076,
      "loss": 2.724,
      "step": 105866
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8181102275848389,
      "learning_rate": 0.0003380803432455073,
      "loss": 2.946,
      "step": 105867
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0142006874084473,
      "learning_rate": 0.00033807628576792726,
      "loss": 2.9561,
      "step": 105868
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.961552619934082,
      "learning_rate": 0.00033807222828326824,
      "loss": 2.8413,
      "step": 105869
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7585095167160034,
      "learning_rate": 0.00033806817079153073,
      "loss": 3.0909,
      "step": 105870
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8004257678985596,
      "learning_rate": 0.0003380641132927156,
      "loss": 3.2844,
      "step": 105871
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7101000547409058,
      "learning_rate": 0.00033806005578682373,
      "loss": 3.0594,
      "step": 105872
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6199607849121094,
      "learning_rate": 0.0003380559982738557,
      "loss": 2.934,
      "step": 105873
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8600701093673706,
      "learning_rate": 0.00033805194075381244,
      "loss": 2.736,
      "step": 105874
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.622543215751648,
      "learning_rate": 0.00033804788322669454,
      "loss": 3.0343,
      "step": 105875
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8216016292572021,
      "learning_rate": 0.0003380438256925028,
      "loss": 2.9137,
      "step": 105876
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6433273553848267,
      "learning_rate": 0.00033803976815123803,
      "loss": 2.7906,
      "step": 105877
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6296342611312866,
      "learning_rate": 0.000338035710602901,
      "loss": 2.9956,
      "step": 105878
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.809395432472229,
      "learning_rate": 0.00033803165304749236,
      "loss": 3.2303,
      "step": 105879
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9527604579925537,
      "learning_rate": 0.0003380275954850129,
      "loss": 2.9113,
      "step": 105880
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5020686388015747,
      "learning_rate": 0.00033802353791546343,
      "loss": 3.3246,
      "step": 105881
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8324726819992065,
      "learning_rate": 0.0003380194803388446,
      "loss": 3.251,
      "step": 105882
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.816311001777649,
      "learning_rate": 0.0003380154227551572,
      "loss": 3.0008,
      "step": 105883
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7265410423278809,
      "learning_rate": 0.0003380113651644022,
      "loss": 3.129,
      "step": 105884
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.438157320022583,
      "learning_rate": 0.00033800730756658004,
      "loss": 2.859,
      "step": 105885
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8755083084106445,
      "learning_rate": 0.0003380032499616916,
      "loss": 2.9195,
      "step": 105886
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.010228395462036,
      "learning_rate": 0.0003379991923497377,
      "loss": 2.9989,
      "step": 105887
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.95052170753479,
      "learning_rate": 0.000337995134730719,
      "loss": 3.0628,
      "step": 105888
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9604439735412598,
      "learning_rate": 0.00033799107710463626,
      "loss": 3.1689,
      "step": 105889
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8908387422561646,
      "learning_rate": 0.0003379870194714903,
      "loss": 2.9697,
      "step": 105890
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1320958137512207,
      "learning_rate": 0.0003379829618312818,
      "loss": 2.8537,
      "step": 105891
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.914597511291504,
      "learning_rate": 0.0003379789041840116,
      "loss": 2.9253,
      "step": 105892
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.451131820678711,
      "learning_rate": 0.00033797484652968034,
      "loss": 2.9943,
      "step": 105893
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1290483474731445,
      "learning_rate": 0.0003379707888682889,
      "loss": 2.9071,
      "step": 105894
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.850878357887268,
      "learning_rate": 0.0003379667311998379,
      "loss": 3.0173,
      "step": 105895
    },
    {
      "epoch": 1.38,
      "grad_norm": 4.280046463012695,
      "learning_rate": 0.0003379626735243282,
      "loss": 3.1671,
      "step": 105896
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.3034420013427734,
      "learning_rate": 0.0003379586158417606,
      "loss": 2.8338,
      "step": 105897
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.5138356685638428,
      "learning_rate": 0.00033795455815213563,
      "loss": 2.9744,
      "step": 105898
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7051103115081787,
      "learning_rate": 0.00033795050045545426,
      "loss": 2.8336,
      "step": 105899
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.640673875808716,
      "learning_rate": 0.00033794644275171717,
      "loss": 2.8289,
      "step": 105900
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.4092202186584473,
      "learning_rate": 0.00033794238504092523,
      "loss": 2.7456,
      "step": 105901
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8853460550308228,
      "learning_rate": 0.00033793832732307894,
      "loss": 2.8596,
      "step": 105902
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.2205047607421875,
      "learning_rate": 0.00033793426959817925,
      "loss": 3.0174,
      "step": 105903
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5537487268447876,
      "learning_rate": 0.00033793021186622694,
      "loss": 3.1072,
      "step": 105904
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.166193723678589,
      "learning_rate": 0.00033792615412722254,
      "loss": 2.7027,
      "step": 105905
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.034705400466919,
      "learning_rate": 0.00033792209638116704,
      "loss": 2.8047,
      "step": 105906
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7489986419677734,
      "learning_rate": 0.0003379180386280611,
      "loss": 3.0879,
      "step": 105907
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.192333936691284,
      "learning_rate": 0.0003379139808679055,
      "loss": 2.8694,
      "step": 105908
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9261122941970825,
      "learning_rate": 0.0003379099231007009,
      "loss": 2.9424,
      "step": 105909
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.500519871711731,
      "learning_rate": 0.0003379058653264482,
      "loss": 2.8832,
      "step": 105910
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.2266223430633545,
      "learning_rate": 0.0003379018075451481,
      "loss": 2.9428,
      "step": 105911
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5838501453399658,
      "learning_rate": 0.0003378977497568013,
      "loss": 3.1696,
      "step": 105912
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7108311653137207,
      "learning_rate": 0.0003378936919614085,
      "loss": 2.8918,
      "step": 105913
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3375797271728516,
      "learning_rate": 0.00033788963415897074,
      "loss": 3.0625,
      "step": 105914
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0651519298553467,
      "learning_rate": 0.00033788557634948843,
      "loss": 2.7293,
      "step": 105915
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7410746812820435,
      "learning_rate": 0.0003378815185329625,
      "loss": 2.8573,
      "step": 105916
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9427217245101929,
      "learning_rate": 0.00033787746070939375,
      "loss": 3.0668,
      "step": 105917
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.153762102127075,
      "learning_rate": 0.00033787340287878283,
      "loss": 2.8857,
      "step": 105918
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0241141319274902,
      "learning_rate": 0.0003378693450411305,
      "loss": 2.8714,
      "step": 105919
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.55472469329834,
      "learning_rate": 0.0003378652871964376,
      "loss": 3.0371,
      "step": 105920
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.043090581893921,
      "learning_rate": 0.00033786122934470475,
      "loss": 2.78,
      "step": 105921
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.58084774017334,
      "learning_rate": 0.0003378571714859328,
      "loss": 3.1723,
      "step": 105922
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.3229405879974365,
      "learning_rate": 0.0003378531136201226,
      "loss": 2.977,
      "step": 105923
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9673266410827637,
      "learning_rate": 0.0003378490557472747,
      "loss": 2.9721,
      "step": 105924
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.2308056354522705,
      "learning_rate": 0.00033784499786738997,
      "loss": 3.0551,
      "step": 105925
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.3202364444732666,
      "learning_rate": 0.0003378409399804691,
      "loss": 3.1793,
      "step": 105926
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0025291442871094,
      "learning_rate": 0.00033783688208651293,
      "loss": 2.9559,
      "step": 105927
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1520862579345703,
      "learning_rate": 0.0003378328241855221,
      "loss": 3.0886,
      "step": 105928
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0719239711761475,
      "learning_rate": 0.0003378287662774975,
      "loss": 2.948,
      "step": 105929
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6535283327102661,
      "learning_rate": 0.00033782470836243986,
      "loss": 2.8535,
      "step": 105930
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9272814989089966,
      "learning_rate": 0.00033782065044034976,
      "loss": 3.0284,
      "step": 105931
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.162059783935547,
      "learning_rate": 0.0003378165925112282,
      "loss": 2.9382,
      "step": 105932
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3465986251831055,
      "learning_rate": 0.0003378125345750758,
      "loss": 3.1284,
      "step": 105933
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7888288497924805,
      "learning_rate": 0.0003378084766318933,
      "loss": 2.9981,
      "step": 105934
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6731294393539429,
      "learning_rate": 0.0003378044186816815,
      "loss": 3.2348,
      "step": 105935
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8842601776123047,
      "learning_rate": 0.0003378003607244412,
      "loss": 3.0119,
      "step": 105936
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0125794410705566,
      "learning_rate": 0.00033779630276017307,
      "loss": 2.6802,
      "step": 105937
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9854028224945068,
      "learning_rate": 0.00033779224478887776,
      "loss": 2.9143,
      "step": 105938
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8632631301879883,
      "learning_rate": 0.00033778818681055633,
      "loss": 3.1048,
      "step": 105939
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7893153429031372,
      "learning_rate": 0.00033778412882520933,
      "loss": 3.0174,
      "step": 105940
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7912778854370117,
      "learning_rate": 0.0003377800708328375,
      "loss": 3.3405,
      "step": 105941
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.4597458839416504,
      "learning_rate": 0.0003377760128334417,
      "loss": 2.9211,
      "step": 105942
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.054826259613037,
      "learning_rate": 0.00033777195482702257,
      "loss": 3.031,
      "step": 105943
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5532810688018799,
      "learning_rate": 0.00033776789681358096,
      "loss": 3.0274,
      "step": 105944
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.667450189590454,
      "learning_rate": 0.0003377638387931176,
      "loss": 2.8123,
      "step": 105945
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.907862901687622,
      "learning_rate": 0.00033775978076563317,
      "loss": 3.151,
      "step": 105946
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.7126259803771973,
      "learning_rate": 0.00033775572273112855,
      "loss": 3.2271,
      "step": 105947
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.2423288822174072,
      "learning_rate": 0.00033775166468960437,
      "loss": 2.9566,
      "step": 105948
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7952154874801636,
      "learning_rate": 0.00033774760664106144,
      "loss": 3.0857,
      "step": 105949
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.698472023010254,
      "learning_rate": 0.00033774354858550053,
      "loss": 2.9854,
      "step": 105950
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.605130434036255,
      "learning_rate": 0.0003377394905229225,
      "loss": 2.9159,
      "step": 105951
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.7040297985076904,
      "learning_rate": 0.0003377354324533278,
      "loss": 3.0485,
      "step": 105952
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.842937707901001,
      "learning_rate": 0.0003377313743767174,
      "loss": 3.0098,
      "step": 105953
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.5987210273742676,
      "learning_rate": 0.00033772731629309215,
      "loss": 3.1327,
      "step": 105954
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.600285291671753,
      "learning_rate": 0.0003377232582024526,
      "loss": 2.7192,
      "step": 105955
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9221900701522827,
      "learning_rate": 0.0003377192001047996,
      "loss": 3.0357,
      "step": 105956
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5867621898651123,
      "learning_rate": 0.0003377151420001339,
      "loss": 3.1718,
      "step": 105957
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.2015414237976074,
      "learning_rate": 0.0003377110838884562,
      "loss": 3.0103,
      "step": 105958
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.0794997215270996,
      "learning_rate": 0.0003377070257697673,
      "loss": 3.2037,
      "step": 105959
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6590214967727661,
      "learning_rate": 0.000337702967644068,
      "loss": 3.1062,
      "step": 105960
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.660285234451294,
      "learning_rate": 0.00033769890951135896,
      "loss": 3.0286,
      "step": 105961
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.809614658355713,
      "learning_rate": 0.000337694851371641,
      "loss": 3.2567,
      "step": 105962
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.769202947616577,
      "learning_rate": 0.0003376907932249149,
      "loss": 3.0963,
      "step": 105963
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7124611139297485,
      "learning_rate": 0.0003376867350711813,
      "loss": 2.7836,
      "step": 105964
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7916646003723145,
      "learning_rate": 0.000337682676910441,
      "loss": 3.1662,
      "step": 105965
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.789357304573059,
      "learning_rate": 0.0003376786187426949,
      "loss": 2.8218,
      "step": 105966
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8310480117797852,
      "learning_rate": 0.00033767456056794356,
      "loss": 2.9742,
      "step": 105967
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0062694549560547,
      "learning_rate": 0.0003376705023861878,
      "loss": 3.0853,
      "step": 105968
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6963932514190674,
      "learning_rate": 0.00033766644419742844,
      "loss": 2.8699,
      "step": 105969
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.527013063430786,
      "learning_rate": 0.0003376623860016661,
      "loss": 2.995,
      "step": 105970
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.6193127632141113,
      "learning_rate": 0.00033765832779890164,
      "loss": 2.7581,
      "step": 105971
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0371131896972656,
      "learning_rate": 0.0003376542695891358,
      "loss": 3.1457,
      "step": 105972
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.065793037414551,
      "learning_rate": 0.0003376502113723693,
      "loss": 3.2695,
      "step": 105973
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.664263129234314,
      "learning_rate": 0.00033764615314860283,
      "loss": 2.9301,
      "step": 105974
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8488138914108276,
      "learning_rate": 0.0003376420949178374,
      "loss": 3.2437,
      "step": 105975
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7752790451049805,
      "learning_rate": 0.0003376380366800735,
      "loss": 3.0468,
      "step": 105976
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7453069686889648,
      "learning_rate": 0.000337633978435312,
      "loss": 2.9171,
      "step": 105977
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.561172366142273,
      "learning_rate": 0.00033762992018355364,
      "loss": 2.7485,
      "step": 105978
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9897828102111816,
      "learning_rate": 0.0003376258619247992,
      "loss": 3.0156,
      "step": 105979
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1898293495178223,
      "learning_rate": 0.0003376218036590493,
      "loss": 2.9825,
      "step": 105980
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.47560977935791,
      "learning_rate": 0.00033761774538630483,
      "loss": 3.1967,
      "step": 105981
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.043900966644287,
      "learning_rate": 0.00033761368710656664,
      "loss": 2.9571,
      "step": 105982
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8139697313308716,
      "learning_rate": 0.0003376096288198352,
      "loss": 2.893,
      "step": 105983
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.837223768234253,
      "learning_rate": 0.0003376055705261114,
      "loss": 2.9896,
      "step": 105984
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9225860834121704,
      "learning_rate": 0.00033760151222539614,
      "loss": 2.9213,
      "step": 105985
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.809299349784851,
      "learning_rate": 0.00033759745391769,
      "loss": 3.0289,
      "step": 105986
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.6375253200531006,
      "learning_rate": 0.00033759339560299374,
      "loss": 2.9934,
      "step": 105987
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1814732551574707,
      "learning_rate": 0.00033758933728130814,
      "loss": 3.0373,
      "step": 105988
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9028069972991943,
      "learning_rate": 0.0003375852789526341,
      "loss": 3.3056,
      "step": 105989
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.571637511253357,
      "learning_rate": 0.00033758122061697217,
      "loss": 3.109,
      "step": 105990
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.5748329162597656,
      "learning_rate": 0.0003375771622743232,
      "loss": 2.8617,
      "step": 105991
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.779103994369507,
      "learning_rate": 0.0003375731039246879,
      "loss": 2.9569,
      "step": 105992
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6316754817962646,
      "learning_rate": 0.0003375690455680671,
      "loss": 3.0064,
      "step": 105993
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3747246265411377,
      "learning_rate": 0.00033756498720446146,
      "loss": 3.0164,
      "step": 105994
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7247953414916992,
      "learning_rate": 0.00033756092883387173,
      "loss": 3.1533,
      "step": 105995
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.157034397125244,
      "learning_rate": 0.00033755687045629887,
      "loss": 2.8581,
      "step": 105996
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6508125066757202,
      "learning_rate": 0.0003375528120717433,
      "loss": 2.6504,
      "step": 105997
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0455949306488037,
      "learning_rate": 0.000337548753680206,
      "loss": 3.0656,
      "step": 105998
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0541374683380127,
      "learning_rate": 0.00033754469528168775,
      "loss": 3.1402,
      "step": 105999
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.543379545211792,
      "learning_rate": 0.0003375406368761892,
      "loss": 3.0046,
      "step": 106000
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7471517324447632,
      "learning_rate": 0.00033753657846371116,
      "loss": 2.9423,
      "step": 106001
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8762346506118774,
      "learning_rate": 0.00033753252004425433,
      "loss": 3.0,
      "step": 106002
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7372201681137085,
      "learning_rate": 0.0003375284616178195,
      "loss": 3.0689,
      "step": 106003
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8089724779129028,
      "learning_rate": 0.00033752440318440737,
      "loss": 3.0577,
      "step": 106004
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9196579456329346,
      "learning_rate": 0.00033752034474401877,
      "loss": 2.9168,
      "step": 106005
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8845151662826538,
      "learning_rate": 0.0003375162862966545,
      "loss": 2.8581,
      "step": 106006
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9257855415344238,
      "learning_rate": 0.0003375122278423152,
      "loss": 3.2367,
      "step": 106007
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7403082847595215,
      "learning_rate": 0.00033750816938100166,
      "loss": 3.2417,
      "step": 106008
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.453784227371216,
      "learning_rate": 0.0003375041109127147,
      "loss": 2.877,
      "step": 106009
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.977047085762024,
      "learning_rate": 0.00033750005243745495,
      "loss": 3.1866,
      "step": 106010
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0619983673095703,
      "learning_rate": 0.00033749599395522324,
      "loss": 3.0539,
      "step": 106011
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0364327430725098,
      "learning_rate": 0.00033749193546602033,
      "loss": 2.8952,
      "step": 106012
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0303285121917725,
      "learning_rate": 0.000337487876969847,
      "loss": 3.0591,
      "step": 106013
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.729679822921753,
      "learning_rate": 0.0003374838184667039,
      "loss": 2.8948,
      "step": 106014
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.69409441947937,
      "learning_rate": 0.00033747975995659186,
      "loss": 2.9744,
      "step": 106015
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.425157070159912,
      "learning_rate": 0.0003374757014395117,
      "loss": 3.1856,
      "step": 106016
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7808141708374023,
      "learning_rate": 0.00033747164291546397,
      "loss": 2.8358,
      "step": 106017
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.207183837890625,
      "learning_rate": 0.0003374675843844497,
      "loss": 2.9697,
      "step": 106018
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.4659833908081055,
      "learning_rate": 0.0003374635258464694,
      "loss": 3.0387,
      "step": 106019
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7617501020431519,
      "learning_rate": 0.0003374594673015239,
      "loss": 2.8679,
      "step": 106020
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3132247924804688,
      "learning_rate": 0.0003374554087496141,
      "loss": 2.8075,
      "step": 106021
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.0534512996673584,
      "learning_rate": 0.0003374513501907405,
      "loss": 3.0619,
      "step": 106022
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1668760776519775,
      "learning_rate": 0.00033744729162490407,
      "loss": 3.0658,
      "step": 106023
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.4719913005828857,
      "learning_rate": 0.0003374432330521055,
      "loss": 2.9926,
      "step": 106024
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9475799798965454,
      "learning_rate": 0.00033743917447234546,
      "loss": 2.8658,
      "step": 106025
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6845476627349854,
      "learning_rate": 0.0003374351158856248,
      "loss": 2.9094,
      "step": 106026
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7694876194000244,
      "learning_rate": 0.00033743105729194426,
      "loss": 3.0867,
      "step": 106027
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6832481622695923,
      "learning_rate": 0.00033742699869130456,
      "loss": 3.1941,
      "step": 106028
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.02880859375,
      "learning_rate": 0.00033742294008370645,
      "loss": 3.0133,
      "step": 106029
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6539450883865356,
      "learning_rate": 0.0003374188814691508,
      "loss": 2.9909,
      "step": 106030
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8648746013641357,
      "learning_rate": 0.0003374148228476382,
      "loss": 2.6753,
      "step": 106031
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0174503326416016,
      "learning_rate": 0.0003374107642191694,
      "loss": 2.9694,
      "step": 106032
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8878369331359863,
      "learning_rate": 0.0003374067055837454,
      "loss": 2.6865,
      "step": 106033
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.202049970626831,
      "learning_rate": 0.0003374026469413667,
      "loss": 3.095,
      "step": 106034
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7628815174102783,
      "learning_rate": 0.0003373985882920341,
      "loss": 3.0681,
      "step": 106035
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.987837314605713,
      "learning_rate": 0.0003373945296357485,
      "loss": 3.1871,
      "step": 106036
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.782361626625061,
      "learning_rate": 0.0003373904709725104,
      "loss": 2.6666,
      "step": 106037
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.3549399375915527,
      "learning_rate": 0.0003373864123023208,
      "loss": 2.9167,
      "step": 106038
    },
    {
      "epoch": 1.38,
      "grad_norm": 4.42011022567749,
      "learning_rate": 0.00033738235362518046,
      "loss": 2.8619,
      "step": 106039
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.788710594177246,
      "learning_rate": 0.00033737829494108994,
      "loss": 2.8943,
      "step": 106040
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.819746971130371,
      "learning_rate": 0.00033737423625005,
      "loss": 2.9379,
      "step": 106041
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.4953696727752686,
      "learning_rate": 0.0003373701775520616,
      "loss": 3.1205,
      "step": 106042
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.540128469467163,
      "learning_rate": 0.00033736611884712536,
      "loss": 2.9224,
      "step": 106043
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.822863221168518,
      "learning_rate": 0.00033736206013524207,
      "loss": 2.9522,
      "step": 106044
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6853655576705933,
      "learning_rate": 0.0003373580014164124,
      "loss": 3.0287,
      "step": 106045
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7296960353851318,
      "learning_rate": 0.00033735394269063723,
      "loss": 2.8937,
      "step": 106046
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.055043935775757,
      "learning_rate": 0.00033734988395791725,
      "loss": 3.1259,
      "step": 106047
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5990605354309082,
      "learning_rate": 0.00033734582521825323,
      "loss": 3.046,
      "step": 106048
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3113555908203125,
      "learning_rate": 0.00033734176647164587,
      "loss": 2.9403,
      "step": 106049
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7116211652755737,
      "learning_rate": 0.000337337707718096,
      "loss": 2.9502,
      "step": 106050
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9506760835647583,
      "learning_rate": 0.0003373336489576044,
      "loss": 3.2146,
      "step": 106051
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9161927700042725,
      "learning_rate": 0.00033732959019017167,
      "loss": 2.9382,
      "step": 106052
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.771816611289978,
      "learning_rate": 0.00033732553141579865,
      "loss": 2.681,
      "step": 106053
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1576015949249268,
      "learning_rate": 0.0003373214726344862,
      "loss": 2.8831,
      "step": 106054
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7477796077728271,
      "learning_rate": 0.00033731741384623496,
      "loss": 3.1319,
      "step": 106055
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.02020263671875,
      "learning_rate": 0.0003373133550510457,
      "loss": 3.147,
      "step": 106056
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8283120393753052,
      "learning_rate": 0.00033730929624891916,
      "loss": 3.0049,
      "step": 106057
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.847675323486328,
      "learning_rate": 0.0003373052374398561,
      "loss": 3.101,
      "step": 106058
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7296234369277954,
      "learning_rate": 0.0003373011786238574,
      "loss": 3.2334,
      "step": 106059
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8663270473480225,
      "learning_rate": 0.0003372971198009236,
      "loss": 2.9322,
      "step": 106060
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7931849956512451,
      "learning_rate": 0.00033729306097105564,
      "loss": 3.0746,
      "step": 106061
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.948222041130066,
      "learning_rate": 0.0003372890021342541,
      "loss": 2.977,
      "step": 106062
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.031625747680664,
      "learning_rate": 0.0003372849432905199,
      "loss": 2.9304,
      "step": 106063
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8227577209472656,
      "learning_rate": 0.00033728088443985373,
      "loss": 3.0522,
      "step": 106064
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5710887908935547,
      "learning_rate": 0.00033727682558225633,
      "loss": 2.9011,
      "step": 106065
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7815382480621338,
      "learning_rate": 0.00033727276671772843,
      "loss": 3.09,
      "step": 106066
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8928357362747192,
      "learning_rate": 0.00033726870784627085,
      "loss": 2.8335,
      "step": 106067
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7089308500289917,
      "learning_rate": 0.0003372646489678843,
      "loss": 2.9129,
      "step": 106068
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8485487699508667,
      "learning_rate": 0.00033726059008256954,
      "loss": 2.9536,
      "step": 106069
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.319159984588623,
      "learning_rate": 0.0003372565311903274,
      "loss": 2.8116,
      "step": 106070
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8133299350738525,
      "learning_rate": 0.0003372524722911585,
      "loss": 3.2196,
      "step": 106071
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9548557996749878,
      "learning_rate": 0.00033724841338506365,
      "loss": 2.8425,
      "step": 106072
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.380326986312866,
      "learning_rate": 0.0003372443544720437,
      "loss": 2.8413,
      "step": 106073
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.840623140335083,
      "learning_rate": 0.0003372402955520993,
      "loss": 3.1054,
      "step": 106074
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6661417484283447,
      "learning_rate": 0.0003372362366252311,
      "loss": 3.1002,
      "step": 106075
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.174510955810547,
      "learning_rate": 0.00033723217769144004,
      "loss": 3.0568,
      "step": 106076
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.662265658378601,
      "learning_rate": 0.0003372281187507269,
      "loss": 2.8887,
      "step": 106077
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9586554765701294,
      "learning_rate": 0.0003372240598030923,
      "loss": 2.9621,
      "step": 106078
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.239149570465088,
      "learning_rate": 0.00033722000084853706,
      "loss": 2.8414,
      "step": 106079
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1244752407073975,
      "learning_rate": 0.00033721594188706186,
      "loss": 2.8697,
      "step": 106080
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.129063367843628,
      "learning_rate": 0.00033721188291866756,
      "loss": 3.2243,
      "step": 106081
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.981278896331787,
      "learning_rate": 0.00033720782394335485,
      "loss": 3.0037,
      "step": 106082
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8772588968276978,
      "learning_rate": 0.00033720376496112456,
      "loss": 2.9663,
      "step": 106083
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.412924289703369,
      "learning_rate": 0.0003371997059719773,
      "loss": 3.084,
      "step": 106084
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.2355151176452637,
      "learning_rate": 0.000337195646975914,
      "loss": 3.0896,
      "step": 106085
    },
    {
      "epoch": 1.38,
      "grad_norm": 5.277058124542236,
      "learning_rate": 0.00033719158797293524,
      "loss": 2.7869,
      "step": 106086
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0584256649017334,
      "learning_rate": 0.0003371875289630419,
      "loss": 3.0334,
      "step": 106087
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9003552198410034,
      "learning_rate": 0.00033718346994623477,
      "loss": 2.8966,
      "step": 106088
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.0910754203796387,
      "learning_rate": 0.0003371794109225144,
      "loss": 2.9703,
      "step": 106089
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.9380340576171875,
      "learning_rate": 0.00033717535189188173,
      "loss": 2.7825,
      "step": 106090
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.4245550632476807,
      "learning_rate": 0.0003371712928543375,
      "loss": 3.2318,
      "step": 106091
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.8037848472595215,
      "learning_rate": 0.00033716723380988235,
      "loss": 3.0786,
      "step": 106092
    },
    {
      "epoch": 1.38,
      "grad_norm": 4.45987606048584,
      "learning_rate": 0.0003371631747585172,
      "loss": 2.9544,
      "step": 106093
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.475558042526245,
      "learning_rate": 0.0003371591157002427,
      "loss": 3.32,
      "step": 106094
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.288008213043213,
      "learning_rate": 0.00033715505663505955,
      "loss": 2.9272,
      "step": 106095
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1235239505767822,
      "learning_rate": 0.0003371509975629686,
      "loss": 2.7823,
      "step": 106096
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.9286048412323,
      "learning_rate": 0.00033714693848397063,
      "loss": 2.9673,
      "step": 106097
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7041597366333008,
      "learning_rate": 0.0003371428793980663,
      "loss": 3.1987,
      "step": 106098
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.7438693046569824,
      "learning_rate": 0.0003371388203052564,
      "loss": 3.1274,
      "step": 106099
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.640648603439331,
      "learning_rate": 0.00033713476120554174,
      "loss": 2.9482,
      "step": 106100
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8654013872146606,
      "learning_rate": 0.00033713070209892294,
      "loss": 3.1136,
      "step": 106101
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.665842294692993,
      "learning_rate": 0.0003371266429854009,
      "loss": 3.1133,
      "step": 106102
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7043291330337524,
      "learning_rate": 0.0003371225838649763,
      "loss": 3.0224,
      "step": 106103
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0846447944641113,
      "learning_rate": 0.00033711852473764995,
      "loss": 3.2233,
      "step": 106104
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9355409145355225,
      "learning_rate": 0.00033711446560342253,
      "loss": 3.2357,
      "step": 106105
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8012609481811523,
      "learning_rate": 0.0003371104064622948,
      "loss": 3.1789,
      "step": 106106
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7518755197525024,
      "learning_rate": 0.0003371063473142676,
      "loss": 3.1018,
      "step": 106107
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9468274116516113,
      "learning_rate": 0.00033710228815934164,
      "loss": 2.8738,
      "step": 106108
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6698317527770996,
      "learning_rate": 0.0003370982289975177,
      "loss": 3.1231,
      "step": 106109
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.027392625808716,
      "learning_rate": 0.00033709416982879634,
      "loss": 3.0435,
      "step": 106110
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.816986322402954,
      "learning_rate": 0.0003370901106531786,
      "loss": 3.221,
      "step": 106111
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.592871069908142,
      "learning_rate": 0.00033708605147066514,
      "loss": 2.8272,
      "step": 106112
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5564428567886353,
      "learning_rate": 0.00033708199228125655,
      "loss": 2.7347,
      "step": 106113
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9608640670776367,
      "learning_rate": 0.0003370779330849538,
      "loss": 2.9243,
      "step": 106114
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5878050327301025,
      "learning_rate": 0.0003370738738817575,
      "loss": 2.8215,
      "step": 106115
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7964555025100708,
      "learning_rate": 0.0003370698146716685,
      "loss": 2.9309,
      "step": 106116
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6589508056640625,
      "learning_rate": 0.0003370657554546876,
      "loss": 2.8271,
      "step": 106117
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8835530281066895,
      "learning_rate": 0.00033706169623081545,
      "loss": 3.0724,
      "step": 106118
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.870922327041626,
      "learning_rate": 0.0003370576370000528,
      "loss": 3.055,
      "step": 106119
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.705723762512207,
      "learning_rate": 0.00033705357776240034,
      "loss": 3.0769,
      "step": 106120
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8792685270309448,
      "learning_rate": 0.00033704951851785904,
      "loss": 2.7362,
      "step": 106121
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9655282497406006,
      "learning_rate": 0.00033704545926642953,
      "loss": 3.1387,
      "step": 106122
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9043697118759155,
      "learning_rate": 0.0003370414000081125,
      "loss": 3.2356,
      "step": 106123
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.895112156867981,
      "learning_rate": 0.00033703734074290887,
      "loss": 3.0834,
      "step": 106124
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.961956024169922,
      "learning_rate": 0.00033703328147081924,
      "loss": 3.0119,
      "step": 106125
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.062053680419922,
      "learning_rate": 0.0003370292221918444,
      "loss": 3.0902,
      "step": 106126
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.531477689743042,
      "learning_rate": 0.00033702516290598516,
      "loss": 2.8483,
      "step": 106127
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7100200653076172,
      "learning_rate": 0.0003370211036132423,
      "loss": 3.0114,
      "step": 106128
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7823320627212524,
      "learning_rate": 0.00033701704431361647,
      "loss": 2.9036,
      "step": 106129
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7212584018707275,
      "learning_rate": 0.0003370129850071084,
      "loss": 3.1029,
      "step": 106130
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8341724872589111,
      "learning_rate": 0.000337008925693719,
      "loss": 2.8805,
      "step": 106131
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.341660737991333,
      "learning_rate": 0.0003370048663734489,
      "loss": 3.1376,
      "step": 106132
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.105025053024292,
      "learning_rate": 0.00033700080704629894,
      "loss": 3.0541,
      "step": 106133
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.75941801071167,
      "learning_rate": 0.0003369967477122698,
      "loss": 2.9167,
      "step": 106134
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.079179048538208,
      "learning_rate": 0.0003369926883713623,
      "loss": 2.9528,
      "step": 106135
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1346206665039062,
      "learning_rate": 0.00033698862902357713,
      "loss": 3.0692,
      "step": 106136
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7645877599716187,
      "learning_rate": 0.00033698456966891515,
      "loss": 3.24,
      "step": 106137
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.024024248123169,
      "learning_rate": 0.00033698051030737694,
      "loss": 2.9473,
      "step": 106138
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8856918811798096,
      "learning_rate": 0.00033697645093896337,
      "loss": 3.1005,
      "step": 106139
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1445114612579346,
      "learning_rate": 0.00033697239156367525,
      "loss": 2.9724,
      "step": 106140
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6096898317337036,
      "learning_rate": 0.0003369683321815132,
      "loss": 3.2485,
      "step": 106141
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9696215391159058,
      "learning_rate": 0.000336964272792478,
      "loss": 2.8846,
      "step": 106142
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8042750358581543,
      "learning_rate": 0.00033696021339657057,
      "loss": 2.8913,
      "step": 106143
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.597926139831543,
      "learning_rate": 0.0003369561539937915,
      "loss": 3.1666,
      "step": 106144
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.4139580726623535,
      "learning_rate": 0.00033695209458414147,
      "loss": 2.8136,
      "step": 106145
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.2669060230255127,
      "learning_rate": 0.0003369480351676215,
      "loss": 3.0988,
      "step": 106146
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5532126426696777,
      "learning_rate": 0.0003369439757442321,
      "loss": 3.0778,
      "step": 106147
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.93739914894104,
      "learning_rate": 0.0003369399163139741,
      "loss": 3.2212,
      "step": 106148
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8663278818130493,
      "learning_rate": 0.00033693585687684835,
      "loss": 3.0873,
      "step": 106149
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.695611596107483,
      "learning_rate": 0.00033693179743285543,
      "loss": 2.9613,
      "step": 106150
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8469598293304443,
      "learning_rate": 0.00033692773798199625,
      "loss": 3.0195,
      "step": 106151
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.6336750984191895,
      "learning_rate": 0.00033692367852427153,
      "loss": 2.9746,
      "step": 106152
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.146465539932251,
      "learning_rate": 0.000336919619059682,
      "loss": 2.7786,
      "step": 106153
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.008571147918701,
      "learning_rate": 0.00033691555958822837,
      "loss": 3.1992,
      "step": 106154
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.944894552230835,
      "learning_rate": 0.0003369115001099115,
      "loss": 2.7639,
      "step": 106155
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9718996286392212,
      "learning_rate": 0.00033690744062473207,
      "loss": 3.0897,
      "step": 106156
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9112125635147095,
      "learning_rate": 0.0003369033811326908,
      "loss": 3.1282,
      "step": 106157
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7319730520248413,
      "learning_rate": 0.00033689932163378855,
      "loss": 2.9879,
      "step": 106158
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3527984619140625,
      "learning_rate": 0.00033689526212802597,
      "loss": 3.1087,
      "step": 106159
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9502220153808594,
      "learning_rate": 0.0003368912026154039,
      "loss": 3.0456,
      "step": 106160
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7221434116363525,
      "learning_rate": 0.000336887143095923,
      "loss": 3.1319,
      "step": 106161
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7142869234085083,
      "learning_rate": 0.0003368830835695842,
      "loss": 3.1624,
      "step": 106162
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8529216051101685,
      "learning_rate": 0.00033687902403638806,
      "loss": 3.2088,
      "step": 106163
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.079472064971924,
      "learning_rate": 0.0003368749644963354,
      "loss": 2.8462,
      "step": 106164
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.165255546569824,
      "learning_rate": 0.00033687090494942704,
      "loss": 3.0412,
      "step": 106165
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.036280870437622,
      "learning_rate": 0.00033686684539566364,
      "loss": 2.7625,
      "step": 106166
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3868887424468994,
      "learning_rate": 0.00033686278583504596,
      "loss": 3.1638,
      "step": 106167
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.8501036167144775,
      "learning_rate": 0.0003368587262675748,
      "loss": 2.9187,
      "step": 106168
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9576655626296997,
      "learning_rate": 0.000336854666693251,
      "loss": 2.9845,
      "step": 106169
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.113375425338745,
      "learning_rate": 0.0003368506071120752,
      "loss": 3.1614,
      "step": 106170
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.22084379196167,
      "learning_rate": 0.0003368465475240481,
      "loss": 2.9523,
      "step": 106171
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0862441062927246,
      "learning_rate": 0.0003368424879291706,
      "loss": 3.159,
      "step": 106172
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.536302089691162,
      "learning_rate": 0.0003368384283274434,
      "loss": 3.13,
      "step": 106173
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.134704113006592,
      "learning_rate": 0.0003368343687188672,
      "loss": 3.1666,
      "step": 106174
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.892103910446167,
      "learning_rate": 0.00033683030910344275,
      "loss": 2.8978,
      "step": 106175
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.759761095046997,
      "learning_rate": 0.00033682624948117094,
      "loss": 3.2206,
      "step": 106176
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0497541427612305,
      "learning_rate": 0.0003368221898520524,
      "loss": 2.9511,
      "step": 106177
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.2467026710510254,
      "learning_rate": 0.00033681813021608787,
      "loss": 2.8664,
      "step": 106178
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.356114149093628,
      "learning_rate": 0.0003368140705732783,
      "loss": 3.0726,
      "step": 106179
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.622525930404663,
      "learning_rate": 0.00033681001092362414,
      "loss": 2.916,
      "step": 106180
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7659188508987427,
      "learning_rate": 0.00033680595126712635,
      "loss": 3.0981,
      "step": 106181
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0306942462921143,
      "learning_rate": 0.0003368018916037857,
      "loss": 2.8152,
      "step": 106182
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7162050008773804,
      "learning_rate": 0.00033679783193360285,
      "loss": 2.8745,
      "step": 106183
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.2151308059692383,
      "learning_rate": 0.00033679377225657856,
      "loss": 2.9492,
      "step": 106184
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.78533136844635,
      "learning_rate": 0.0003367897125727136,
      "loss": 2.9852,
      "step": 106185
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8690567016601562,
      "learning_rate": 0.0003367856528820088,
      "loss": 2.8116,
      "step": 106186
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3083038330078125,
      "learning_rate": 0.0003367815931844648,
      "loss": 3.1794,
      "step": 106187
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0338551998138428,
      "learning_rate": 0.00033677753348008245,
      "loss": 2.9734,
      "step": 106188
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8232371807098389,
      "learning_rate": 0.0003367734737688624,
      "loss": 3.128,
      "step": 106189
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6595091819763184,
      "learning_rate": 0.0003367694140508056,
      "loss": 2.7178,
      "step": 106190
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9601800441741943,
      "learning_rate": 0.00033676535432591257,
      "loss": 3.1799,
      "step": 106191
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6525753736495972,
      "learning_rate": 0.00033676129459418415,
      "loss": 2.9758,
      "step": 106192
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.052574634552002,
      "learning_rate": 0.0003367572348556211,
      "loss": 2.8991,
      "step": 106193
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3216392993927,
      "learning_rate": 0.0003367531751102242,
      "loss": 3.0332,
      "step": 106194
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.260161876678467,
      "learning_rate": 0.00033674911535799427,
      "loss": 3.2022,
      "step": 106195
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.221590995788574,
      "learning_rate": 0.0003367450555989319,
      "loss": 2.9446,
      "step": 106196
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.060873508453369,
      "learning_rate": 0.00033674099583303797,
      "loss": 2.9718,
      "step": 106197
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1569221019744873,
      "learning_rate": 0.0003367369360603132,
      "loss": 2.9512,
      "step": 106198
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.940683364868164,
      "learning_rate": 0.0003367328762807583,
      "loss": 2.9635,
      "step": 106199
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9224188327789307,
      "learning_rate": 0.00033672881649437407,
      "loss": 3.1264,
      "step": 106200
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1248779296875,
      "learning_rate": 0.00033672475670116136,
      "loss": 2.9803,
      "step": 106201
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.859208345413208,
      "learning_rate": 0.00033672069690112073,
      "loss": 2.8311,
      "step": 106202
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.248678684234619,
      "learning_rate": 0.000336716637094253,
      "loss": 2.8435,
      "step": 106203
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0064737796783447,
      "learning_rate": 0.00033671257728055905,
      "loss": 3.1925,
      "step": 106204
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5448397397994995,
      "learning_rate": 0.00033670851746003946,
      "loss": 2.6995,
      "step": 106205
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.02455472946167,
      "learning_rate": 0.0003367044576326951,
      "loss": 2.9126,
      "step": 106206
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5625922679901123,
      "learning_rate": 0.0003367003977985267,
      "loss": 3.0249,
      "step": 106207
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.877288579940796,
      "learning_rate": 0.000336696337957535,
      "loss": 3.0053,
      "step": 106208
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7864996194839478,
      "learning_rate": 0.0003366922781097207,
      "loss": 3.1907,
      "step": 106209
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0268630981445312,
      "learning_rate": 0.0003366882182550847,
      "loss": 2.969,
      "step": 106210
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.823645830154419,
      "learning_rate": 0.00033668415839362755,
      "loss": 3.0394,
      "step": 106211
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.9788661003112793,
      "learning_rate": 0.0003366800985253502,
      "loss": 2.9616,
      "step": 106212
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3274757862091064,
      "learning_rate": 0.00033667603865025335,
      "loss": 3.1124,
      "step": 106213
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9649792909622192,
      "learning_rate": 0.00033667197876833773,
      "loss": 3.1671,
      "step": 106214
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.8232789039611816,
      "learning_rate": 0.00033666791887960405,
      "loss": 2.8811,
      "step": 106215
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.983373761177063,
      "learning_rate": 0.00033666385898405317,
      "loss": 3.0433,
      "step": 106216
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.796024203300476,
      "learning_rate": 0.00033665979908168575,
      "loss": 2.8654,
      "step": 106217
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7977734804153442,
      "learning_rate": 0.00033665573917250255,
      "loss": 3.409,
      "step": 106218
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7774291038513184,
      "learning_rate": 0.00033665167925650444,
      "loss": 2.9775,
      "step": 106219
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8789889812469482,
      "learning_rate": 0.000336647619333692,
      "loss": 3.0192,
      "step": 106220
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0289530754089355,
      "learning_rate": 0.00033664355940406606,
      "loss": 3.008,
      "step": 106221
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0437350273132324,
      "learning_rate": 0.00033663949946762754,
      "loss": 3.0777,
      "step": 106222
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7636057138442993,
      "learning_rate": 0.0003366354395243769,
      "loss": 2.9052,
      "step": 106223
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.0069942474365234,
      "learning_rate": 0.00033663137957431506,
      "loss": 3.0498,
      "step": 106224
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1793229579925537,
      "learning_rate": 0.00033662731961744285,
      "loss": 3.0894,
      "step": 106225
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8639636039733887,
      "learning_rate": 0.0003366232596537608,
      "loss": 3.0937,
      "step": 106226
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.4124886989593506,
      "learning_rate": 0.00033661919968326983,
      "loss": 2.8199,
      "step": 106227
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.380018472671509,
      "learning_rate": 0.00033661513970597077,
      "loss": 2.8243,
      "step": 106228
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9597561359405518,
      "learning_rate": 0.0003366110797218642,
      "loss": 3.032,
      "step": 106229
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6645426750183105,
      "learning_rate": 0.00033660701973095087,
      "loss": 3.285,
      "step": 106230
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.782130002975464,
      "learning_rate": 0.0003366029597332317,
      "loss": 2.8467,
      "step": 106231
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9455734491348267,
      "learning_rate": 0.0003365988997287073,
      "loss": 2.9478,
      "step": 106232
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.397254467010498,
      "learning_rate": 0.0003365948397173785,
      "loss": 2.6704,
      "step": 106233
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1074135303497314,
      "learning_rate": 0.00033659077969924603,
      "loss": 3.3046,
      "step": 106234
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7390129566192627,
      "learning_rate": 0.0003365867196743106,
      "loss": 2.8422,
      "step": 106235
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7433066368103027,
      "learning_rate": 0.00033658265964257297,
      "loss": 3.1149,
      "step": 106236
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7736800909042358,
      "learning_rate": 0.0003365785996040341,
      "loss": 3.3287,
      "step": 106237
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8522906303405762,
      "learning_rate": 0.0003365745395586944,
      "loss": 3.0104,
      "step": 106238
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8661766052246094,
      "learning_rate": 0.00033657047950655486,
      "loss": 2.9061,
      "step": 106239
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.4223170280456543,
      "learning_rate": 0.00033656641944761624,
      "loss": 2.869,
      "step": 106240
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.902938961982727,
      "learning_rate": 0.0003365623593818791,
      "loss": 2.9499,
      "step": 106241
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.809120535850525,
      "learning_rate": 0.00033655829930934444,
      "loss": 2.7856,
      "step": 106242
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.498096227645874,
      "learning_rate": 0.0003365542392300129,
      "loss": 3.0602,
      "step": 106243
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.4619098901748657,
      "learning_rate": 0.0003365501791438852,
      "loss": 3.0984,
      "step": 106244
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.7866148948669434,
      "learning_rate": 0.00033654611905096214,
      "loss": 3.0134,
      "step": 106245
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.640904188156128,
      "learning_rate": 0.0003365420589512445,
      "loss": 3.0366,
      "step": 106246
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.5576846599578857,
      "learning_rate": 0.000336537998844733,
      "loss": 2.9378,
      "step": 106247
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0478579998016357,
      "learning_rate": 0.0003365339387314283,
      "loss": 3.0055,
      "step": 106248
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8775700330734253,
      "learning_rate": 0.0003365298786113313,
      "loss": 2.9471,
      "step": 106249
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8967771530151367,
      "learning_rate": 0.0003365258184844427,
      "loss": 2.7377,
      "step": 106250
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6039607524871826,
      "learning_rate": 0.0003365217583507633,
      "loss": 3.0128,
      "step": 106251
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.142664670944214,
      "learning_rate": 0.00033651769821029377,
      "loss": 3.1979,
      "step": 106252
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7315553426742554,
      "learning_rate": 0.00033651363806303495,
      "loss": 3.0373,
      "step": 106253
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8379480838775635,
      "learning_rate": 0.0003365095779089876,
      "loss": 2.8102,
      "step": 106254
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7044613361358643,
      "learning_rate": 0.0003365055177481523,
      "loss": 2.7898,
      "step": 106255
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5766546726226807,
      "learning_rate": 0.00033650145758053,
      "loss": 3.077,
      "step": 106256
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8635741472244263,
      "learning_rate": 0.00033649739740612147,
      "loss": 3.0105,
      "step": 106257
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6121792793273926,
      "learning_rate": 0.0003364933372249273,
      "loss": 2.8914,
      "step": 106258
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9047049283981323,
      "learning_rate": 0.0003364892770369483,
      "loss": 2.8058,
      "step": 106259
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0109143257141113,
      "learning_rate": 0.00033648521684218535,
      "loss": 2.7672,
      "step": 106260
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8293728828430176,
      "learning_rate": 0.00033648115664063906,
      "loss": 3.0007,
      "step": 106261
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.615304470062256,
      "learning_rate": 0.00033647709643231023,
      "loss": 2.8215,
      "step": 106262
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.0063071250915527,
      "learning_rate": 0.0003364730362171996,
      "loss": 2.98,
      "step": 106263
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.54329252243042,
      "learning_rate": 0.00033646897599530793,
      "loss": 2.8219,
      "step": 106264
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0380477905273438,
      "learning_rate": 0.0003364649157666361,
      "loss": 3.1804,
      "step": 106265
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.5651323795318604,
      "learning_rate": 0.00033646085553118465,
      "loss": 2.9654,
      "step": 106266
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.5547707080841064,
      "learning_rate": 0.00033645679528895443,
      "loss": 3.0791,
      "step": 106267
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.016378402709961,
      "learning_rate": 0.0003364527350399463,
      "loss": 2.8805,
      "step": 106268
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.298018217086792,
      "learning_rate": 0.00033644867478416083,
      "loss": 3.2166,
      "step": 106269
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.631171226501465,
      "learning_rate": 0.00033644461452159884,
      "loss": 3.0811,
      "step": 106270
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.778969168663025,
      "learning_rate": 0.00033644055425226124,
      "loss": 2.8974,
      "step": 106271
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.625648856163025,
      "learning_rate": 0.0003364364939761486,
      "loss": 3.0696,
      "step": 106272
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.4359982013702393,
      "learning_rate": 0.0003364324336932616,
      "loss": 3.1965,
      "step": 106273
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7218873500823975,
      "learning_rate": 0.00033642837340360133,
      "loss": 2.9613,
      "step": 106274
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.847760796546936,
      "learning_rate": 0.00033642431310716817,
      "loss": 2.8191,
      "step": 106275
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8341606855392456,
      "learning_rate": 0.00033642025280396313,
      "loss": 2.8101,
      "step": 106276
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8660974502563477,
      "learning_rate": 0.00033641619249398687,
      "loss": 3.0042,
      "step": 106277
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5290385484695435,
      "learning_rate": 0.00033641213217724014,
      "loss": 3.0482,
      "step": 106278
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1933512687683105,
      "learning_rate": 0.00033640807185372366,
      "loss": 2.9313,
      "step": 106279
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.799537181854248,
      "learning_rate": 0.00033640401152343834,
      "loss": 2.9018,
      "step": 106280
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0904505252838135,
      "learning_rate": 0.00033639995118638473,
      "loss": 3.1121,
      "step": 106281
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8833556175231934,
      "learning_rate": 0.0003363958908425637,
      "loss": 3.268,
      "step": 106282
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6830583810806274,
      "learning_rate": 0.00033639183049197604,
      "loss": 2.9918,
      "step": 106283
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9056882858276367,
      "learning_rate": 0.0003363877701346224,
      "loss": 2.9695,
      "step": 106284
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9094970226287842,
      "learning_rate": 0.00033638370977050354,
      "loss": 3.1295,
      "step": 106285
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6971862316131592,
      "learning_rate": 0.0003363796493996204,
      "loss": 3.0943,
      "step": 106286
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7429800033569336,
      "learning_rate": 0.00033637558902197353,
      "loss": 2.7664,
      "step": 106287
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6647148132324219,
      "learning_rate": 0.0003363715286375637,
      "loss": 3.1668,
      "step": 106288
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7817527055740356,
      "learning_rate": 0.0003363674682463918,
      "loss": 3.0463,
      "step": 106289
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.200531244277954,
      "learning_rate": 0.0003363634078484584,
      "loss": 2.8319,
      "step": 106290
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7036629915237427,
      "learning_rate": 0.0003363593474437644,
      "loss": 3.1797,
      "step": 106291
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8682060241699219,
      "learning_rate": 0.00033635528703231056,
      "loss": 3.0812,
      "step": 106292
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6799707412719727,
      "learning_rate": 0.00033635122661409753,
      "loss": 3.2254,
      "step": 106293
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0637567043304443,
      "learning_rate": 0.0003363471661891261,
      "loss": 3.0582,
      "step": 106294
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.635468006134033,
      "learning_rate": 0.00033634310575739716,
      "loss": 2.9542,
      "step": 106295
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1406748294830322,
      "learning_rate": 0.00033633904531891123,
      "loss": 2.8105,
      "step": 106296
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5920467376708984,
      "learning_rate": 0.0003363349848736692,
      "loss": 2.9955,
      "step": 106297
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.792658567428589,
      "learning_rate": 0.0003363309244216719,
      "loss": 2.9462,
      "step": 106298
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.4570608139038086,
      "learning_rate": 0.00033632686396291994,
      "loss": 2.7286,
      "step": 106299
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.5940141677856445,
      "learning_rate": 0.0003363228034974141,
      "loss": 2.8592,
      "step": 106300
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9301071166992188,
      "learning_rate": 0.00033631874302515524,
      "loss": 3.0903,
      "step": 106301
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.646660089492798,
      "learning_rate": 0.000336314682546144,
      "loss": 2.7869,
      "step": 106302
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7633329629898071,
      "learning_rate": 0.0003363106220603811,
      "loss": 2.9517,
      "step": 106303
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.806759238243103,
      "learning_rate": 0.00033630656156786754,
      "loss": 2.9657,
      "step": 106304
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6693025827407837,
      "learning_rate": 0.0003363025010686038,
      "loss": 3.1503,
      "step": 106305
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9136109352111816,
      "learning_rate": 0.0003362984405625907,
      "loss": 3.02,
      "step": 106306
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8770004510879517,
      "learning_rate": 0.0003362943800498291,
      "loss": 2.969,
      "step": 106307
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.027191400527954,
      "learning_rate": 0.0003362903195303196,
      "loss": 3.264,
      "step": 106308
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7678678035736084,
      "learning_rate": 0.00033628625900406314,
      "loss": 3.1117,
      "step": 106309
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.5186357498168945,
      "learning_rate": 0.0003362821984710604,
      "loss": 3.0269,
      "step": 106310
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.182408571243286,
      "learning_rate": 0.00033627813793131206,
      "loss": 2.9611,
      "step": 106311
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.612079620361328,
      "learning_rate": 0.0003362740773848189,
      "loss": 2.9058,
      "step": 106312
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9161163568496704,
      "learning_rate": 0.00033627001683158176,
      "loss": 3.0864,
      "step": 106313
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.423093318939209,
      "learning_rate": 0.0003362659562716013,
      "loss": 2.9331,
      "step": 106314
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.769212007522583,
      "learning_rate": 0.0003362618957048783,
      "loss": 3.2103,
      "step": 106315
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.475273847579956,
      "learning_rate": 0.00033625783513141364,
      "loss": 3.0464,
      "step": 106316
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.689260482788086,
      "learning_rate": 0.0003362537745512079,
      "loss": 3.1392,
      "step": 106317
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1516056060791016,
      "learning_rate": 0.0003362497139642618,
      "loss": 3.0568,
      "step": 106318
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3964767456054688,
      "learning_rate": 0.0003362456533705764,
      "loss": 3.3564,
      "step": 106319
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.723985195159912,
      "learning_rate": 0.00033624159277015206,
      "loss": 3.0178,
      "step": 106320
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.3656158447265625,
      "learning_rate": 0.00033623753216298975,
      "loss": 2.878,
      "step": 106321
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.0839779376983643,
      "learning_rate": 0.0003362334715490903,
      "loss": 2.9517,
      "step": 106322
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.808458089828491,
      "learning_rate": 0.0003362294109284543,
      "loss": 3.104,
      "step": 106323
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.7454988956451416,
      "learning_rate": 0.0003362253503010826,
      "loss": 3.1363,
      "step": 106324
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7723461389541626,
      "learning_rate": 0.00033622128966697594,
      "loss": 3.0813,
      "step": 106325
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.8239822387695312,
      "learning_rate": 0.00033621722902613495,
      "loss": 3.1262,
      "step": 106326
    },
    {
      "epoch": 1.38,
      "grad_norm": 3.0883469581604004,
      "learning_rate": 0.0003362131683785606,
      "loss": 3.0339,
      "step": 106327
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7930525541305542,
      "learning_rate": 0.0003362091077242534,
      "loss": 2.7874,
      "step": 106328
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9760874509811401,
      "learning_rate": 0.0003362050470632144,
      "loss": 3.2102,
      "step": 106329
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.11035418510437,
      "learning_rate": 0.0003362009863954441,
      "loss": 2.8597,
      "step": 106330
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.9474529027938843,
      "learning_rate": 0.0003361969257209434,
      "loss": 3.1387,
      "step": 106331
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.07365083694458,
      "learning_rate": 0.00033619286503971306,
      "loss": 3.1758,
      "step": 106332
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.762851357460022,
      "learning_rate": 0.00033618880435175373,
      "loss": 2.938,
      "step": 106333
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.88202965259552,
      "learning_rate": 0.00033618474365706616,
      "loss": 3.0857,
      "step": 106334
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.74861478805542,
      "learning_rate": 0.00033618068295565123,
      "loss": 2.6868,
      "step": 106335
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7854734659194946,
      "learning_rate": 0.0003361766222475096,
      "loss": 2.8402,
      "step": 106336
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8465181589126587,
      "learning_rate": 0.00033617256153264203,
      "loss": 2.9328,
      "step": 106337
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.094625234603882,
      "learning_rate": 0.0003361685008110494,
      "loss": 2.7715,
      "step": 106338
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.2307331562042236,
      "learning_rate": 0.00033616444008273226,
      "loss": 2.8784,
      "step": 106339
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8881354331970215,
      "learning_rate": 0.0003361603793476915,
      "loss": 2.9447,
      "step": 106340
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6160720586776733,
      "learning_rate": 0.0003361563186059278,
      "loss": 3.2284,
      "step": 106341
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.042919874191284,
      "learning_rate": 0.00033615225785744204,
      "loss": 2.8496,
      "step": 106342
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6512407064437866,
      "learning_rate": 0.00033614819710223485,
      "loss": 3.0949,
      "step": 106343
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.629915237426758,
      "learning_rate": 0.00033614413634030707,
      "loss": 2.8072,
      "step": 106344
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7883026599884033,
      "learning_rate": 0.0003361400755716593,
      "loss": 2.9371,
      "step": 106345
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.6317877769470215,
      "learning_rate": 0.00033613601479629256,
      "loss": 2.9339,
      "step": 106346
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8087292909622192,
      "learning_rate": 0.00033613195401420735,
      "loss": 3.0255,
      "step": 106347
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7924926280975342,
      "learning_rate": 0.00033612789322540453,
      "loss": 2.8379,
      "step": 106348
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.1232457160949707,
      "learning_rate": 0.0003361238324298849,
      "loss": 2.8587,
      "step": 106349
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.6988283395767212,
      "learning_rate": 0.00033611977162764916,
      "loss": 3.0175,
      "step": 106350
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8534682989120483,
      "learning_rate": 0.00033611571081869807,
      "loss": 3.0969,
      "step": 106351
    },
    {
      "epoch": 1.38,
      "grad_norm": 4.995561122894287,
      "learning_rate": 0.00033611165000303235,
      "loss": 2.9725,
      "step": 106352
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.619523763656616,
      "learning_rate": 0.0003361075891806528,
      "loss": 2.8857,
      "step": 106353
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8479273319244385,
      "learning_rate": 0.0003361035283515602,
      "loss": 3.1145,
      "step": 106354
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.982028841972351,
      "learning_rate": 0.00033609946751575524,
      "loss": 2.9992,
      "step": 106355
    },
    {
      "epoch": 1.38,
      "grad_norm": 2.861610174179077,
      "learning_rate": 0.00033609540667323877,
      "loss": 2.8599,
      "step": 106356
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.801352858543396,
      "learning_rate": 0.0003360913458240114,
      "loss": 2.9903,
      "step": 106357
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8519712686538696,
      "learning_rate": 0.00033608728496807404,
      "loss": 2.9988,
      "step": 106358
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8193387985229492,
      "learning_rate": 0.00033608322410542735,
      "loss": 3.1208,
      "step": 106359
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.599070429801941,
      "learning_rate": 0.0003360791632360721,
      "loss": 2.9719,
      "step": 106360
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.661784052848816,
      "learning_rate": 0.00033607510236000906,
      "loss": 2.9983,
      "step": 106361
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.708752155303955,
      "learning_rate": 0.000336071041477239,
      "loss": 3.0363,
      "step": 106362
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8703521490097046,
      "learning_rate": 0.00033606698058776256,
      "loss": 2.8034,
      "step": 106363
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.8247792720794678,
      "learning_rate": 0.00033606291969158063,
      "loss": 3.1204,
      "step": 106364
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.930773138999939,
      "learning_rate": 0.000336058858788694,
      "loss": 3.0347,
      "step": 106365
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.7318239212036133,
      "learning_rate": 0.0003360547978791033,
      "loss": 2.9437,
      "step": 106366
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.679364562034607,
      "learning_rate": 0.00033605073696280926,
      "loss": 2.8829,
      "step": 106367
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.613829255104065,
      "learning_rate": 0.00033604667603981284,
      "loss": 3.2521,
      "step": 106368
    },
    {
      "epoch": 1.38,
      "grad_norm": 1.951723575592041,
      "learning_rate": 0.00033604261511011453,
      "loss": 2.8781,
      "step": 106369
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7955747842788696,
      "learning_rate": 0.0003360385541737153,
      "loss": 3.337,
      "step": 106370
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7381283044815063,
      "learning_rate": 0.00033603449323061583,
      "loss": 3.0061,
      "step": 106371
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.824397087097168,
      "learning_rate": 0.0003360304322808168,
      "loss": 3.0499,
      "step": 106372
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.127169370651245,
      "learning_rate": 0.00033602637132431907,
      "loss": 3.1264,
      "step": 106373
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7665692567825317,
      "learning_rate": 0.0003360223103611234,
      "loss": 3.006,
      "step": 106374
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.4574880599975586,
      "learning_rate": 0.0003360182493912305,
      "loss": 2.8452,
      "step": 106375
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.437469482421875,
      "learning_rate": 0.00033601418841464097,
      "loss": 2.7944,
      "step": 106376
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1765012741088867,
      "learning_rate": 0.0003360101274313559,
      "loss": 3.1716,
      "step": 106377
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8659744262695312,
      "learning_rate": 0.00033600606644137575,
      "loss": 2.7414,
      "step": 106378
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.786080241203308,
      "learning_rate": 0.0003360020054447015,
      "loss": 3.0629,
      "step": 106379
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7796392440795898,
      "learning_rate": 0.0003359979444413338,
      "loss": 3.0199,
      "step": 106380
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.488337755203247,
      "learning_rate": 0.0003359938834312733,
      "loss": 2.9371,
      "step": 106381
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6843130588531494,
      "learning_rate": 0.0003359898224145209,
      "loss": 2.9688,
      "step": 106382
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8568769693374634,
      "learning_rate": 0.00033598576139107737,
      "loss": 3.0093,
      "step": 106383
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7484378814697266,
      "learning_rate": 0.0003359817003609433,
      "loss": 3.1102,
      "step": 106384
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.983569860458374,
      "learning_rate": 0.00033597763932411963,
      "loss": 2.8644,
      "step": 106385
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.6416871547698975,
      "learning_rate": 0.00033597357828060704,
      "loss": 2.9835,
      "step": 106386
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.963881015777588,
      "learning_rate": 0.0003359695172304062,
      "loss": 2.8536,
      "step": 106387
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.712719440460205,
      "learning_rate": 0.00033596545617351804,
      "loss": 3.1352,
      "step": 106388
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.5022358894348145,
      "learning_rate": 0.00033596139510994326,
      "loss": 3.2241,
      "step": 106389
    },
    {
      "epoch": 1.39,
      "grad_norm": 4.042888641357422,
      "learning_rate": 0.0003359573340396824,
      "loss": 2.9691,
      "step": 106390
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.656132459640503,
      "learning_rate": 0.00033595327296273656,
      "loss": 2.7328,
      "step": 106391
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.752084493637085,
      "learning_rate": 0.0003359492118791063,
      "loss": 3.3421,
      "step": 106392
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0720646381378174,
      "learning_rate": 0.00033594515078879235,
      "loss": 3.0379,
      "step": 106393
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.5416576862335205,
      "learning_rate": 0.0003359410896917955,
      "loss": 3.1204,
      "step": 106394
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.20973801612854,
      "learning_rate": 0.0003359370285881166,
      "loss": 2.8064,
      "step": 106395
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.914832353591919,
      "learning_rate": 0.0003359329674777563,
      "loss": 3.1485,
      "step": 106396
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.151965856552124,
      "learning_rate": 0.00033592890636071536,
      "loss": 2.7354,
      "step": 106397
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1652114391326904,
      "learning_rate": 0.0003359248452369946,
      "loss": 3.0205,
      "step": 106398
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.5895811319351196,
      "learning_rate": 0.0003359207841065947,
      "loss": 2.8678,
      "step": 106399
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8250457048416138,
      "learning_rate": 0.00033591672296951646,
      "loss": 3.1577,
      "step": 106400
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3066368103027344,
      "learning_rate": 0.00033591266182576054,
      "loss": 3.0498,
      "step": 106401
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1557202339172363,
      "learning_rate": 0.00033590860067532794,
      "loss": 3.0904,
      "step": 106402
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.17623233795166,
      "learning_rate": 0.00033590453951821915,
      "loss": 3.1393,
      "step": 106403
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.193767547607422,
      "learning_rate": 0.00033590047835443503,
      "loss": 2.8883,
      "step": 106404
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3576557636260986,
      "learning_rate": 0.0003358964171839764,
      "loss": 3.0005,
      "step": 106405
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.2975013256073,
      "learning_rate": 0.0003358923560068439,
      "loss": 2.9801,
      "step": 106406
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6564847230911255,
      "learning_rate": 0.00033588829482303833,
      "loss": 3.0686,
      "step": 106407
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.1829962730407715,
      "learning_rate": 0.00033588423363256045,
      "loss": 2.8554,
      "step": 106408
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8893771171569824,
      "learning_rate": 0.00033588017243541105,
      "loss": 3.1124,
      "step": 106409
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7435102462768555,
      "learning_rate": 0.0003358761112315907,
      "loss": 2.8704,
      "step": 106410
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.896749496459961,
      "learning_rate": 0.0003358720500211005,
      "loss": 3.0693,
      "step": 106411
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.4872243404388428,
      "learning_rate": 0.0003358679888039409,
      "loss": 2.7871,
      "step": 106412
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7973445653915405,
      "learning_rate": 0.0003358639275801128,
      "loss": 2.8116,
      "step": 106413
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0716426372528076,
      "learning_rate": 0.0003358598663496169,
      "loss": 2.9398,
      "step": 106414
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.325160264968872,
      "learning_rate": 0.000335855805112454,
      "loss": 2.9309,
      "step": 106415
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8104310035705566,
      "learning_rate": 0.0003358517438686248,
      "loss": 2.8293,
      "step": 106416
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7192047834396362,
      "learning_rate": 0.00033584768261813016,
      "loss": 3.1168,
      "step": 106417
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.125434160232544,
      "learning_rate": 0.00033584362136097065,
      "loss": 2.9012,
      "step": 106418
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8090473413467407,
      "learning_rate": 0.0003358395600971472,
      "loss": 3.1144,
      "step": 106419
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.185081720352173,
      "learning_rate": 0.0003358354988266605,
      "loss": 2.7651,
      "step": 106420
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7461771965026855,
      "learning_rate": 0.00033583143754951125,
      "loss": 2.8317,
      "step": 106421
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8178027868270874,
      "learning_rate": 0.00033582737626570026,
      "loss": 2.9014,
      "step": 106422
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8760716915130615,
      "learning_rate": 0.0003358233149752284,
      "loss": 2.9295,
      "step": 106423
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7749979496002197,
      "learning_rate": 0.0003358192536780962,
      "loss": 3.1453,
      "step": 106424
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9654210805892944,
      "learning_rate": 0.0003358151923743045,
      "loss": 2.9593,
      "step": 106425
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.890535593032837,
      "learning_rate": 0.00033581113106385415,
      "loss": 2.7731,
      "step": 106426
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.5042847394943237,
      "learning_rate": 0.00033580706974674585,
      "loss": 2.9559,
      "step": 106427
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9892311096191406,
      "learning_rate": 0.0003358030084229803,
      "loss": 2.8262,
      "step": 106428
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7829704284667969,
      "learning_rate": 0.00033579894709255824,
      "loss": 2.9534,
      "step": 106429
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6164696216583252,
      "learning_rate": 0.0003357948857554806,
      "loss": 2.9573,
      "step": 106430
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.476783275604248,
      "learning_rate": 0.00033579082441174794,
      "loss": 2.8177,
      "step": 106431
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.159586191177368,
      "learning_rate": 0.0003357867630613611,
      "loss": 3.217,
      "step": 106432
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7525790929794312,
      "learning_rate": 0.0003357827017043208,
      "loss": 3.3017,
      "step": 106433
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7196965217590332,
      "learning_rate": 0.00033577864034062786,
      "loss": 3.1959,
      "step": 106434
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1273694038391113,
      "learning_rate": 0.000335774578970283,
      "loss": 3.1735,
      "step": 106435
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.4113054275512695,
      "learning_rate": 0.0003357705175932869,
      "loss": 3.0453,
      "step": 106436
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9862353801727295,
      "learning_rate": 0.0003357664562096405,
      "loss": 3.0758,
      "step": 106437
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0659708976745605,
      "learning_rate": 0.0003357623948193444,
      "loss": 2.8212,
      "step": 106438
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.725722074508667,
      "learning_rate": 0.00033575833342239936,
      "loss": 2.9862,
      "step": 106439
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8506399393081665,
      "learning_rate": 0.00033575427201880615,
      "loss": 3.0234,
      "step": 106440
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1982669830322266,
      "learning_rate": 0.00033575021060856564,
      "loss": 3.0589,
      "step": 106441
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8008288145065308,
      "learning_rate": 0.00033574614919167843,
      "loss": 2.9414,
      "step": 106442
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.011331558227539,
      "learning_rate": 0.00033574208776814533,
      "loss": 2.8698,
      "step": 106443
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9872554540634155,
      "learning_rate": 0.00033573802633796716,
      "loss": 3.134,
      "step": 106444
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.5612506866455078,
      "learning_rate": 0.0003357339649011445,
      "loss": 2.9548,
      "step": 106445
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.043036460876465,
      "learning_rate": 0.0003357299034576783,
      "loss": 3.0552,
      "step": 106446
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.010158061981201,
      "learning_rate": 0.0003357258420075693,
      "loss": 2.9841,
      "step": 106447
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.5214948654174805,
      "learning_rate": 0.0003357217805508181,
      "loss": 3.0926,
      "step": 106448
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.917646884918213,
      "learning_rate": 0.00033571771908742553,
      "loss": 2.941,
      "step": 106449
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7473541498184204,
      "learning_rate": 0.00033571365761739247,
      "loss": 3.0357,
      "step": 106450
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0313596725463867,
      "learning_rate": 0.0003357095961407194,
      "loss": 2.9609,
      "step": 106451
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7588527202606201,
      "learning_rate": 0.0003357055346574074,
      "loss": 2.9312,
      "step": 106452
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6659497022628784,
      "learning_rate": 0.000335701473167457,
      "loss": 2.8911,
      "step": 106453
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8113611936569214,
      "learning_rate": 0.00033569741167086903,
      "loss": 3.2179,
      "step": 106454
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0157477855682373,
      "learning_rate": 0.0003356933501676442,
      "loss": 2.9418,
      "step": 106455
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.720966339111328,
      "learning_rate": 0.00033568928865778335,
      "loss": 2.8125,
      "step": 106456
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.625859260559082,
      "learning_rate": 0.0003356852271412872,
      "loss": 3.0879,
      "step": 106457
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9782180786132812,
      "learning_rate": 0.00033568116561815645,
      "loss": 2.6024,
      "step": 106458
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8217328786849976,
      "learning_rate": 0.0003356771040883919,
      "loss": 2.7612,
      "step": 106459
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1531689167022705,
      "learning_rate": 0.0003356730425519943,
      "loss": 2.9875,
      "step": 106460
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7476699352264404,
      "learning_rate": 0.0003356689810089644,
      "loss": 3.1183,
      "step": 106461
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0326712131500244,
      "learning_rate": 0.000335664919459303,
      "loss": 2.9604,
      "step": 106462
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.688138484954834,
      "learning_rate": 0.0003356608579030109,
      "loss": 3.1173,
      "step": 106463
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.753269910812378,
      "learning_rate": 0.00033565679634008864,
      "loss": 2.9851,
      "step": 106464
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.694295644760132,
      "learning_rate": 0.00033565273477053714,
      "loss": 3.0573,
      "step": 106465
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3018240928649902,
      "learning_rate": 0.00033564867319435716,
      "loss": 3.0594,
      "step": 106466
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.394136428833008,
      "learning_rate": 0.00033564461161154934,
      "loss": 2.8093,
      "step": 106467
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8584179878234863,
      "learning_rate": 0.00033564055002211456,
      "loss": 3.1634,
      "step": 106468
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.2101731300354004,
      "learning_rate": 0.0003356364884260536,
      "loss": 3.1225,
      "step": 106469
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7367823123931885,
      "learning_rate": 0.00033563242682336706,
      "loss": 2.8055,
      "step": 106470
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7701188325881958,
      "learning_rate": 0.00033562836521405575,
      "loss": 2.8749,
      "step": 106471
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7814146280288696,
      "learning_rate": 0.00033562430359812057,
      "loss": 2.8661,
      "step": 106472
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8254289627075195,
      "learning_rate": 0.00033562024197556205,
      "loss": 2.9891,
      "step": 106473
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9408830404281616,
      "learning_rate": 0.00033561618034638106,
      "loss": 3.1658,
      "step": 106474
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7254854440689087,
      "learning_rate": 0.0003356121187105784,
      "loss": 3.1795,
      "step": 106475
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8305647373199463,
      "learning_rate": 0.00033560805706815477,
      "loss": 3.0139,
      "step": 106476
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.849960207939148,
      "learning_rate": 0.0003356039954191109,
      "loss": 3.0401,
      "step": 106477
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6573286056518555,
      "learning_rate": 0.00033559993376344765,
      "loss": 2.8683,
      "step": 106478
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.501589059829712,
      "learning_rate": 0.00033559587210116564,
      "loss": 2.9229,
      "step": 106479
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6999845504760742,
      "learning_rate": 0.00033559181043226567,
      "loss": 2.8551,
      "step": 106480
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.76060152053833,
      "learning_rate": 0.0003355877487567486,
      "loss": 2.7929,
      "step": 106481
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7213771343231201,
      "learning_rate": 0.000335583687074615,
      "loss": 3.058,
      "step": 106482
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.2462334632873535,
      "learning_rate": 0.0003355796253858657,
      "loss": 3.1503,
      "step": 106483
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3772597312927246,
      "learning_rate": 0.00033557556369050157,
      "loss": 2.8318,
      "step": 106484
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.6941936016082764,
      "learning_rate": 0.0003355715019885232,
      "loss": 2.9215,
      "step": 106485
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7125746011734009,
      "learning_rate": 0.00033556744027993143,
      "loss": 3.0859,
      "step": 106486
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.042168617248535,
      "learning_rate": 0.000335563378564727,
      "loss": 3.1333,
      "step": 106487
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0270583629608154,
      "learning_rate": 0.00033555931684291077,
      "loss": 2.9614,
      "step": 106488
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.927039384841919,
      "learning_rate": 0.0003355552551144832,
      "loss": 2.7091,
      "step": 106489
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9720674753189087,
      "learning_rate": 0.00033555119337944545,
      "loss": 2.9012,
      "step": 106490
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.895506501197815,
      "learning_rate": 0.0003355471316377979,
      "loss": 3.0529,
      "step": 106491
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1609387397766113,
      "learning_rate": 0.00033554306988954147,
      "loss": 3.0974,
      "step": 106492
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.052816867828369,
      "learning_rate": 0.00033553900813467706,
      "loss": 3.1299,
      "step": 106493
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.74928879737854,
      "learning_rate": 0.00033553494637320513,
      "loss": 2.9877,
      "step": 106494
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0206308364868164,
      "learning_rate": 0.0003355308846051266,
      "loss": 2.9204,
      "step": 106495
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7033698558807373,
      "learning_rate": 0.0003355268228304423,
      "loss": 2.9371,
      "step": 106496
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.1086440086364746,
      "learning_rate": 0.00033552276104915283,
      "loss": 3.0828,
      "step": 106497
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.5158379077911377,
      "learning_rate": 0.00033551869926125896,
      "loss": 3.0005,
      "step": 106498
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8256404399871826,
      "learning_rate": 0.0003355146374667616,
      "loss": 3.261,
      "step": 106499
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9593449831008911,
      "learning_rate": 0.0003355105756656613,
      "loss": 3.1928,
      "step": 106500
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.9370245933532715,
      "learning_rate": 0.00033550651385795896,
      "loss": 2.9877,
      "step": 106501
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.961618423461914,
      "learning_rate": 0.00033550245204365533,
      "loss": 3.005,
      "step": 106502
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8661377429962158,
      "learning_rate": 0.000335498390222751,
      "loss": 3.187,
      "step": 106503
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.292539596557617,
      "learning_rate": 0.00033549432839524696,
      "loss": 3.2023,
      "step": 106504
    },
    {
      "epoch": 1.39,
      "grad_norm": 4.390739917755127,
      "learning_rate": 0.0003354902665611439,
      "loss": 2.9037,
      "step": 106505
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.741894245147705,
      "learning_rate": 0.00033548620472044244,
      "loss": 2.9374,
      "step": 106506
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.668985366821289,
      "learning_rate": 0.0003354821428731434,
      "loss": 3.1666,
      "step": 106507
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.652099370956421,
      "learning_rate": 0.00033547808101924763,
      "loss": 2.867,
      "step": 106508
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.4360508918762207,
      "learning_rate": 0.00033547401915875574,
      "loss": 2.7836,
      "step": 106509
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.221118450164795,
      "learning_rate": 0.0003354699572916686,
      "loss": 2.8908,
      "step": 106510
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8274855613708496,
      "learning_rate": 0.000335465895417987,
      "loss": 2.9118,
      "step": 106511
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.432121515274048,
      "learning_rate": 0.00033546183353771146,
      "loss": 2.9885,
      "step": 106512
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.8935587406158447,
      "learning_rate": 0.000335457771650843,
      "loss": 3.0808,
      "step": 106513
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.503664493560791,
      "learning_rate": 0.0003354537097573822,
      "loss": 3.0087,
      "step": 106514
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.841915488243103,
      "learning_rate": 0.00033544964785733,
      "loss": 3.2797,
      "step": 106515
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.9725825786590576,
      "learning_rate": 0.0003354455859506869,
      "loss": 2.7815,
      "step": 106516
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.654020309448242,
      "learning_rate": 0.0003354415240374539,
      "loss": 2.7739,
      "step": 106517
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6777143478393555,
      "learning_rate": 0.00033543746211763166,
      "loss": 3.0086,
      "step": 106518
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.809771180152893,
      "learning_rate": 0.0003354334001912209,
      "loss": 3.1177,
      "step": 106519
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.421607732772827,
      "learning_rate": 0.00033542933825822237,
      "loss": 2.7391,
      "step": 106520
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3410472869873047,
      "learning_rate": 0.0003354252763186369,
      "loss": 2.8664,
      "step": 106521
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0551843643188477,
      "learning_rate": 0.00033542121437246517,
      "loss": 3.1437,
      "step": 106522
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.601997137069702,
      "learning_rate": 0.00033541715241970806,
      "loss": 3.0239,
      "step": 106523
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.97379469871521,
      "learning_rate": 0.0003354130904603661,
      "loss": 2.9457,
      "step": 106524
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.287097215652466,
      "learning_rate": 0.0003354090284944402,
      "loss": 3.1545,
      "step": 106525
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.106994152069092,
      "learning_rate": 0.0003354049665219312,
      "loss": 2.8208,
      "step": 106526
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.663893461227417,
      "learning_rate": 0.00033540090454283963,
      "loss": 3.008,
      "step": 106527
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0253820419311523,
      "learning_rate": 0.0003353968425571664,
      "loss": 3.1493,
      "step": 106528
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6483343839645386,
      "learning_rate": 0.00033539278056491224,
      "loss": 3.2416,
      "step": 106529
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6676485538482666,
      "learning_rate": 0.0003353887185660779,
      "loss": 3.0337,
      "step": 106530
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.025933027267456,
      "learning_rate": 0.0003353846565606641,
      "loss": 3.0052,
      "step": 106531
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.075592279434204,
      "learning_rate": 0.0003353805945486716,
      "loss": 2.694,
      "step": 106532
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.792616605758667,
      "learning_rate": 0.0003353765325301013,
      "loss": 3.0776,
      "step": 106533
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.2055163383483887,
      "learning_rate": 0.00033537247050495375,
      "loss": 3.1308,
      "step": 106534
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.4369497299194336,
      "learning_rate": 0.00033536840847322975,
      "loss": 3.0084,
      "step": 106535
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.651442527770996,
      "learning_rate": 0.0003353643464349302,
      "loss": 2.9263,
      "step": 106536
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8077712059020996,
      "learning_rate": 0.0003353602843900557,
      "loss": 2.9897,
      "step": 106537
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6268020868301392,
      "learning_rate": 0.00033535622233860703,
      "loss": 2.8996,
      "step": 106538
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.5073959827423096,
      "learning_rate": 0.0003353521602805851,
      "loss": 2.838,
      "step": 106539
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8192607164382935,
      "learning_rate": 0.0003353480982159904,
      "loss": 3.2951,
      "step": 106540
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7058262825012207,
      "learning_rate": 0.0003353440361448238,
      "loss": 2.9398,
      "step": 106541
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9793858528137207,
      "learning_rate": 0.0003353399740670862,
      "loss": 3.1475,
      "step": 106542
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0657742023468018,
      "learning_rate": 0.0003353359119827781,
      "loss": 2.7288,
      "step": 106543
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.7347235679626465,
      "learning_rate": 0.0003353318498919005,
      "loss": 2.7732,
      "step": 106544
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.74592125415802,
      "learning_rate": 0.000335327787794454,
      "loss": 2.9461,
      "step": 106545
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.133915424346924,
      "learning_rate": 0.0003353237256904394,
      "loss": 2.9869,
      "step": 106546
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6561928987503052,
      "learning_rate": 0.00033531966357985734,
      "loss": 2.6567,
      "step": 106547
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8761996030807495,
      "learning_rate": 0.0003353156014627089,
      "loss": 2.846,
      "step": 106548
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.646224021911621,
      "learning_rate": 0.0003353115393389944,
      "loss": 3.3118,
      "step": 106549
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7528116703033447,
      "learning_rate": 0.00033530747720871495,
      "loss": 2.8942,
      "step": 106550
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.717242956161499,
      "learning_rate": 0.0003353034150718712,
      "loss": 2.8521,
      "step": 106551
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7393598556518555,
      "learning_rate": 0.0003352993529284638,
      "loss": 2.9059,
      "step": 106552
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0270769596099854,
      "learning_rate": 0.0003352952907784936,
      "loss": 3.0381,
      "step": 106553
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.071890354156494,
      "learning_rate": 0.0003352912286219614,
      "loss": 3.2138,
      "step": 106554
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1837356090545654,
      "learning_rate": 0.0003352871664588678,
      "loss": 3.0831,
      "step": 106555
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.2507011890411377,
      "learning_rate": 0.0003352831042892137,
      "loss": 2.9113,
      "step": 106556
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.81461501121521,
      "learning_rate": 0.00033527904211299984,
      "loss": 3.2505,
      "step": 106557
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7930235862731934,
      "learning_rate": 0.00033527497993022685,
      "loss": 2.917,
      "step": 106558
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.5805017948150635,
      "learning_rate": 0.00033527091774089563,
      "loss": 2.9696,
      "step": 106559
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.059241533279419,
      "learning_rate": 0.000335266855545007,
      "loss": 3.0263,
      "step": 106560
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.2984139919281006,
      "learning_rate": 0.0003352627933425614,
      "loss": 2.8175,
      "step": 106561
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.799626350402832,
      "learning_rate": 0.00033525873113355984,
      "loss": 3.1239,
      "step": 106562
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.5117295980453491,
      "learning_rate": 0.00033525466891800307,
      "loss": 3.0368,
      "step": 106563
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.418179988861084,
      "learning_rate": 0.00033525060669589176,
      "loss": 2.9781,
      "step": 106564
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6624566316604614,
      "learning_rate": 0.00033524654446722666,
      "loss": 3.2873,
      "step": 106565
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7305922508239746,
      "learning_rate": 0.00033524248223200864,
      "loss": 3.1194,
      "step": 106566
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9701073169708252,
      "learning_rate": 0.00033523841999023835,
      "loss": 2.8863,
      "step": 106567
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.5689588785171509,
      "learning_rate": 0.00033523435774191645,
      "loss": 2.8055,
      "step": 106568
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0644993782043457,
      "learning_rate": 0.000335230295487044,
      "loss": 3.1397,
      "step": 106569
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7234407663345337,
      "learning_rate": 0.0003352262332256215,
      "loss": 3.0356,
      "step": 106570
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7412840127944946,
      "learning_rate": 0.0003352221709576497,
      "loss": 3.0638,
      "step": 106571
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6020935773849487,
      "learning_rate": 0.0003352181086831296,
      "loss": 3.021,
      "step": 106572
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.207714796066284,
      "learning_rate": 0.00033521404640206163,
      "loss": 2.9286,
      "step": 106573
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.25342059135437,
      "learning_rate": 0.0003352099841144468,
      "loss": 2.9268,
      "step": 106574
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.301189422607422,
      "learning_rate": 0.0003352059218202858,
      "loss": 3.0004,
      "step": 106575
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.680311679840088,
      "learning_rate": 0.0003352018595195792,
      "loss": 3.0882,
      "step": 106576
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7557368278503418,
      "learning_rate": 0.000335197797212328,
      "loss": 2.8114,
      "step": 106577
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7448443174362183,
      "learning_rate": 0.0003351937348985329,
      "loss": 2.9586,
      "step": 106578
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.4472930431365967,
      "learning_rate": 0.00033518967257819453,
      "loss": 2.9175,
      "step": 106579
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0293679237365723,
      "learning_rate": 0.00033518561025131383,
      "loss": 2.9728,
      "step": 106580
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.809484839439392,
      "learning_rate": 0.00033518154791789144,
      "loss": 2.8959,
      "step": 106581
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.908403754234314,
      "learning_rate": 0.00033517748557792816,
      "loss": 2.8196,
      "step": 106582
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8240705728530884,
      "learning_rate": 0.0003351734232314246,
      "loss": 2.9038,
      "step": 106583
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.167501449584961,
      "learning_rate": 0.00033516936087838177,
      "loss": 3.049,
      "step": 106584
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0866613388061523,
      "learning_rate": 0.0003351652985188002,
      "loss": 3.2949,
      "step": 106585
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.265589475631714,
      "learning_rate": 0.0003351612361526808,
      "loss": 2.8327,
      "step": 106586
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8933340311050415,
      "learning_rate": 0.00033515717378002427,
      "loss": 2.8572,
      "step": 106587
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0456204414367676,
      "learning_rate": 0.00033515311140083125,
      "loss": 2.7709,
      "step": 106588
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.742179036140442,
      "learning_rate": 0.0003351490490151027,
      "loss": 3.0243,
      "step": 106589
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8443588018417358,
      "learning_rate": 0.0003351449866228393,
      "loss": 3.0603,
      "step": 106590
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9143223762512207,
      "learning_rate": 0.00033514092422404175,
      "loss": 3.1456,
      "step": 106591
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.061293840408325,
      "learning_rate": 0.0003351368618187108,
      "loss": 2.9459,
      "step": 106592
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.5581597089767456,
      "learning_rate": 0.0003351327994068473,
      "loss": 3.0277,
      "step": 106593
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8460482358932495,
      "learning_rate": 0.00033512873698845195,
      "loss": 3.0335,
      "step": 106594
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.210498094558716,
      "learning_rate": 0.00033512467456352544,
      "loss": 2.793,
      "step": 106595
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1537859439849854,
      "learning_rate": 0.0003351206121320686,
      "loss": 3.1254,
      "step": 106596
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.328423500061035,
      "learning_rate": 0.00033511654969408224,
      "loss": 2.7824,
      "step": 106597
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3842172622680664,
      "learning_rate": 0.000335112487249567,
      "loss": 2.7816,
      "step": 106598
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.076059103012085,
      "learning_rate": 0.0003351084247985237,
      "loss": 2.8731,
      "step": 106599
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7982099056243896,
      "learning_rate": 0.0003351043623409531,
      "loss": 2.9736,
      "step": 106600
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.443384885787964,
      "learning_rate": 0.0003351002998768559,
      "loss": 3.0145,
      "step": 106601
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.923351526260376,
      "learning_rate": 0.00033509623740623286,
      "loss": 3.031,
      "step": 106602
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1314165592193604,
      "learning_rate": 0.00033509217492908494,
      "loss": 3.1036,
      "step": 106603
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.766395092010498,
      "learning_rate": 0.00033508811244541256,
      "loss": 2.9541,
      "step": 106604
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3399784564971924,
      "learning_rate": 0.00033508404995521665,
      "loss": 2.9993,
      "step": 106605
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.967146635055542,
      "learning_rate": 0.00033507998745849795,
      "loss": 3.0579,
      "step": 106606
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8943259716033936,
      "learning_rate": 0.00033507592495525724,
      "loss": 2.9532,
      "step": 106607
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.9096124172210693,
      "learning_rate": 0.0003350718624454953,
      "loss": 3.0611,
      "step": 106608
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0010344982147217,
      "learning_rate": 0.00033506779992921275,
      "loss": 2.938,
      "step": 106609
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.89180326461792,
      "learning_rate": 0.00033506373740641054,
      "loss": 3.0977,
      "step": 106610
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.718522548675537,
      "learning_rate": 0.00033505967487708924,
      "loss": 2.9926,
      "step": 106611
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.095090389251709,
      "learning_rate": 0.0003350556123412497,
      "loss": 3.1998,
      "step": 106612
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8695437908172607,
      "learning_rate": 0.00033505154979889267,
      "loss": 3.0001,
      "step": 106613
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.5509405136108398,
      "learning_rate": 0.00033504748725001886,
      "loss": 3.1095,
      "step": 106614
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.572312593460083,
      "learning_rate": 0.0003350434246946291,
      "loss": 2.9462,
      "step": 106615
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.9734914302825928,
      "learning_rate": 0.0003350393621327241,
      "loss": 3.0039,
      "step": 106616
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.9769163131713867,
      "learning_rate": 0.00033503529956430464,
      "loss": 3.1571,
      "step": 106617
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8293945789337158,
      "learning_rate": 0.00033503123698937144,
      "loss": 2.7102,
      "step": 106618
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8812252283096313,
      "learning_rate": 0.0003350271744079253,
      "loss": 3.1416,
      "step": 106619
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.0316998958587646,
      "learning_rate": 0.0003350231118199669,
      "loss": 2.7792,
      "step": 106620
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.621455430984497,
      "learning_rate": 0.00033501904922549704,
      "loss": 2.9745,
      "step": 106621
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9457954168319702,
      "learning_rate": 0.0003350149866245165,
      "loss": 2.9422,
      "step": 106622
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.886470913887024,
      "learning_rate": 0.000335010924017026,
      "loss": 2.7688,
      "step": 106623
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.342313528060913,
      "learning_rate": 0.0003350068614030263,
      "loss": 2.9612,
      "step": 106624
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.1439664363861084,
      "learning_rate": 0.0003350027987825182,
      "loss": 2.824,
      "step": 106625
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0699422359466553,
      "learning_rate": 0.00033499873615550237,
      "loss": 3.0212,
      "step": 106626
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7460733652114868,
      "learning_rate": 0.00033499467352197975,
      "loss": 3.0097,
      "step": 106627
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1252570152282715,
      "learning_rate": 0.0003349906108819508,
      "loss": 3.0311,
      "step": 106628
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.356044292449951,
      "learning_rate": 0.0003349865482354165,
      "loss": 3.2091,
      "step": 106629
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6535284519195557,
      "learning_rate": 0.00033498248558237753,
      "loss": 2.7695,
      "step": 106630
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.082926034927368,
      "learning_rate": 0.00033497842292283466,
      "loss": 2.7876,
      "step": 106631
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.967864990234375,
      "learning_rate": 0.00033497436025678863,
      "loss": 2.9879,
      "step": 106632
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.5543510913848877,
      "learning_rate": 0.0003349702975842402,
      "loss": 2.9509,
      "step": 106633
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.558716058731079,
      "learning_rate": 0.0003349662349051901,
      "loss": 2.703,
      "step": 106634
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.4350545406341553,
      "learning_rate": 0.0003349621722196392,
      "loss": 3.0241,
      "step": 106635
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.341533899307251,
      "learning_rate": 0.00033495810952758817,
      "loss": 3.1851,
      "step": 106636
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.6285910606384277,
      "learning_rate": 0.0003349540468290377,
      "loss": 2.9121,
      "step": 106637
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8026255369186401,
      "learning_rate": 0.0003349499841239886,
      "loss": 3.2364,
      "step": 106638
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.955008864402771,
      "learning_rate": 0.00033494592141244175,
      "loss": 2.9195,
      "step": 106639
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.973374128341675,
      "learning_rate": 0.0003349418586943977,
      "loss": 2.5755,
      "step": 106640
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7346272468566895,
      "learning_rate": 0.00033493779596985735,
      "loss": 3.1727,
      "step": 106641
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.640493631362915,
      "learning_rate": 0.0003349337332388214,
      "loss": 3.0844,
      "step": 106642
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.94057035446167,
      "learning_rate": 0.00033492967050129053,
      "loss": 3.1883,
      "step": 106643
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7335317134857178,
      "learning_rate": 0.0003349256077572656,
      "loss": 3.0553,
      "step": 106644
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7456048727035522,
      "learning_rate": 0.0003349215450067475,
      "loss": 3.0416,
      "step": 106645
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6163617372512817,
      "learning_rate": 0.0003349174822497366,
      "loss": 3.1111,
      "step": 106646
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7962663173675537,
      "learning_rate": 0.00033491341948623394,
      "loss": 2.7272,
      "step": 106647
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.583533525466919,
      "learning_rate": 0.00033490935671624036,
      "loss": 2.8701,
      "step": 106648
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0187017917633057,
      "learning_rate": 0.00033490529393975636,
      "loss": 2.9592,
      "step": 106649
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0156891345977783,
      "learning_rate": 0.00033490123115678276,
      "loss": 3.0378,
      "step": 106650
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.505906581878662,
      "learning_rate": 0.0003348971683673205,
      "loss": 2.5672,
      "step": 106651
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6836696863174438,
      "learning_rate": 0.00033489310557137007,
      "loss": 2.819,
      "step": 106652
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7893693447113037,
      "learning_rate": 0.0003348890427689324,
      "loss": 3.1126,
      "step": 106653
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.834010124206543,
      "learning_rate": 0.00033488497996000824,
      "loss": 3.0195,
      "step": 106654
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.119497060775757,
      "learning_rate": 0.0003348809171445982,
      "loss": 3.0022,
      "step": 106655
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7458522319793701,
      "learning_rate": 0.0003348768543227032,
      "loss": 2.9486,
      "step": 106656
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.111431121826172,
      "learning_rate": 0.00033487279149432397,
      "loss": 2.8567,
      "step": 106657
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.248225212097168,
      "learning_rate": 0.00033486872865946116,
      "loss": 2.8459,
      "step": 106658
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.5556206703186035,
      "learning_rate": 0.0003348646658181156,
      "loss": 2.9534,
      "step": 106659
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7280733585357666,
      "learning_rate": 0.0003348606029702881,
      "loss": 2.6431,
      "step": 106660
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9162747859954834,
      "learning_rate": 0.0003348565401159793,
      "loss": 3.1362,
      "step": 106661
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8645401000976562,
      "learning_rate": 0.00033485247725519,
      "loss": 2.8162,
      "step": 106662
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8423117399215698,
      "learning_rate": 0.000334848414387921,
      "loss": 2.9921,
      "step": 106663
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.446005344390869,
      "learning_rate": 0.0003348443515141731,
      "loss": 3.0423,
      "step": 106664
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.855295181274414,
      "learning_rate": 0.00033484028863394686,
      "loss": 3.0004,
      "step": 106665
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.319681406021118,
      "learning_rate": 0.0003348362257472432,
      "loss": 2.9251,
      "step": 106666
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.633206844329834,
      "learning_rate": 0.00033483216285406283,
      "loss": 3.2166,
      "step": 106667
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6908684968948364,
      "learning_rate": 0.0003348280999544065,
      "loss": 2.8183,
      "step": 106668
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3878793716430664,
      "learning_rate": 0.00033482403704827493,
      "loss": 2.9429,
      "step": 106669
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7693134546279907,
      "learning_rate": 0.000334819974135669,
      "loss": 2.8644,
      "step": 106670
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.682020664215088,
      "learning_rate": 0.0003348159112165893,
      "loss": 2.8583,
      "step": 106671
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.249046802520752,
      "learning_rate": 0.00033481184829103666,
      "loss": 3.1425,
      "step": 106672
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.866641879081726,
      "learning_rate": 0.00033480778535901194,
      "loss": 2.6958,
      "step": 106673
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.13154935836792,
      "learning_rate": 0.0003348037224205157,
      "loss": 3.0371,
      "step": 106674
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3749210834503174,
      "learning_rate": 0.0003347996594755488,
      "loss": 2.8321,
      "step": 106675
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9756509065628052,
      "learning_rate": 0.0003347955965241121,
      "loss": 2.9406,
      "step": 106676
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8729844093322754,
      "learning_rate": 0.0003347915335662061,
      "loss": 3.1318,
      "step": 106677
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.118150234222412,
      "learning_rate": 0.00033478747060183166,
      "loss": 2.9209,
      "step": 106678
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3427302837371826,
      "learning_rate": 0.0003347834076309898,
      "loss": 2.8919,
      "step": 106679
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7222602367401123,
      "learning_rate": 0.00033477934465368087,
      "loss": 3.0606,
      "step": 106680
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.78524649143219,
      "learning_rate": 0.00033477528166990576,
      "loss": 3.0031,
      "step": 106681
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8758410215377808,
      "learning_rate": 0.00033477121867966545,
      "loss": 2.9999,
      "step": 106682
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9020873308181763,
      "learning_rate": 0.00033476715568296037,
      "loss": 3.1198,
      "step": 106683
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9209965467453003,
      "learning_rate": 0.0003347630926797915,
      "loss": 3.1109,
      "step": 106684
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8095120191574097,
      "learning_rate": 0.00033475902967015946,
      "loss": 2.9198,
      "step": 106685
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.37032413482666,
      "learning_rate": 0.00033475496665406505,
      "loss": 3.1157,
      "step": 106686
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.993082046508789,
      "learning_rate": 0.000334750903631509,
      "loss": 2.9389,
      "step": 106687
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1326377391815186,
      "learning_rate": 0.00033474684060249226,
      "loss": 2.9781,
      "step": 106688
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9655719995498657,
      "learning_rate": 0.00033474277756701535,
      "loss": 2.9705,
      "step": 106689
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.945608377456665,
      "learning_rate": 0.00033473871452507903,
      "loss": 3.2183,
      "step": 106690
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.075448989868164,
      "learning_rate": 0.0003347346514766842,
      "loss": 3.0732,
      "step": 106691
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8243498802185059,
      "learning_rate": 0.0003347305884218315,
      "loss": 3.0023,
      "step": 106692
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.242177963256836,
      "learning_rate": 0.0003347265253605217,
      "loss": 3.0105,
      "step": 106693
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8225233554840088,
      "learning_rate": 0.00033472246229275564,
      "loss": 2.9678,
      "step": 106694
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8282606601715088,
      "learning_rate": 0.0003347183992185341,
      "loss": 2.9332,
      "step": 106695
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7180424928665161,
      "learning_rate": 0.0003347143361378576,
      "loss": 2.97,
      "step": 106696
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.131058692932129,
      "learning_rate": 0.0003347102730507271,
      "loss": 2.622,
      "step": 106697
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9536157846450806,
      "learning_rate": 0.00033470620995714335,
      "loss": 3.1843,
      "step": 106698
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.858322262763977,
      "learning_rate": 0.000334702146857107,
      "loss": 2.8744,
      "step": 106699
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.8526058197021484,
      "learning_rate": 0.0003346980837506189,
      "loss": 3.0137,
      "step": 106700
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.722781777381897,
      "learning_rate": 0.00033469402063767973,
      "loss": 3.1587,
      "step": 106701
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9587383270263672,
      "learning_rate": 0.0003346899575182904,
      "loss": 2.8537,
      "step": 106702
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.006943464279175,
      "learning_rate": 0.0003346858943924514,
      "loss": 2.9697,
      "step": 106703
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6982049942016602,
      "learning_rate": 0.0003346818312601637,
      "loss": 3.2776,
      "step": 106704
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.6556239128112793,
      "learning_rate": 0.0003346777681214281,
      "loss": 2.8173,
      "step": 106705
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.2852983474731445,
      "learning_rate": 0.00033467370497624514,
      "loss": 2.8789,
      "step": 106706
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7578190565109253,
      "learning_rate": 0.00033466964182461565,
      "loss": 3.2483,
      "step": 106707
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7783987522125244,
      "learning_rate": 0.00033466557866654046,
      "loss": 2.9237,
      "step": 106708
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.5327593088150024,
      "learning_rate": 0.00033466151550202034,
      "loss": 3.0952,
      "step": 106709
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3723299503326416,
      "learning_rate": 0.00033465745233105594,
      "loss": 2.7956,
      "step": 106710
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6012190580368042,
      "learning_rate": 0.000334653389153648,
      "loss": 3.0392,
      "step": 106711
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6579644680023193,
      "learning_rate": 0.0003346493259697975,
      "loss": 2.9841,
      "step": 106712
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7601771354675293,
      "learning_rate": 0.00033464526277950487,
      "loss": 3.3124,
      "step": 106713
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.285612106323242,
      "learning_rate": 0.00033464119958277115,
      "loss": 2.9242,
      "step": 106714
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8681412935256958,
      "learning_rate": 0.00033463713637959695,
      "loss": 3.2085,
      "step": 106715
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8208810091018677,
      "learning_rate": 0.00033463307316998307,
      "loss": 3.0123,
      "step": 106716
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.2088563442230225,
      "learning_rate": 0.0003346290099539302,
      "loss": 3.1412,
      "step": 106717
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8615341186523438,
      "learning_rate": 0.0003346249467314392,
      "loss": 3.0457,
      "step": 106718
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6933073997497559,
      "learning_rate": 0.0003346208835025106,
      "loss": 2.872,
      "step": 106719
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9129477739334106,
      "learning_rate": 0.0003346168202671455,
      "loss": 2.8633,
      "step": 106720
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9929746389389038,
      "learning_rate": 0.00033461275702534446,
      "loss": 3.0294,
      "step": 106721
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.3387463092803955,
      "learning_rate": 0.00033460869377710816,
      "loss": 3.1813,
      "step": 106722
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1048741340637207,
      "learning_rate": 0.0003346046305224375,
      "loss": 2.7216,
      "step": 106723
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7848484516143799,
      "learning_rate": 0.0003346005672613333,
      "loss": 2.9536,
      "step": 106724
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0697200298309326,
      "learning_rate": 0.00033459650399379607,
      "loss": 2.9823,
      "step": 106725
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.140784502029419,
      "learning_rate": 0.0003345924407198267,
      "loss": 2.8447,
      "step": 106726
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.12219500541687,
      "learning_rate": 0.000334588377439426,
      "loss": 2.7638,
      "step": 106727
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3553342819213867,
      "learning_rate": 0.00033458431415259464,
      "loss": 3.1644,
      "step": 106728
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1370222568511963,
      "learning_rate": 0.00033458025085933337,
      "loss": 2.9363,
      "step": 106729
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.715017318725586,
      "learning_rate": 0.000334576187559643,
      "loss": 2.9415,
      "step": 106730
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7158300876617432,
      "learning_rate": 0.0003345721242535243,
      "loss": 3.0492,
      "step": 106731
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.656414270401001,
      "learning_rate": 0.00033456806094097794,
      "loss": 2.995,
      "step": 106732
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8208483457565308,
      "learning_rate": 0.00033456399762200477,
      "loss": 2.9131,
      "step": 106733
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.912958025932312,
      "learning_rate": 0.00033455993429660553,
      "loss": 3.119,
      "step": 106734
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.5701861381530762,
      "learning_rate": 0.00033455587096478087,
      "loss": 3.2172,
      "step": 106735
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.4224913120269775,
      "learning_rate": 0.0003345518076265316,
      "loss": 3.2717,
      "step": 106736
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7746200561523438,
      "learning_rate": 0.0003345477442818586,
      "loss": 2.9668,
      "step": 106737
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3852784633636475,
      "learning_rate": 0.0003345436809307625,
      "loss": 3.1208,
      "step": 106738
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.9942338466644287,
      "learning_rate": 0.00033453961757324397,
      "loss": 2.9998,
      "step": 106739
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.7360284328460693,
      "learning_rate": 0.000334535554209304,
      "loss": 3.0617,
      "step": 106740
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.570950984954834,
      "learning_rate": 0.0003345314908389431,
      "loss": 2.9376,
      "step": 106741
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6534587144851685,
      "learning_rate": 0.00033452742746216223,
      "loss": 3.1144,
      "step": 106742
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9283241033554077,
      "learning_rate": 0.0003345233640789621,
      "loss": 3.0193,
      "step": 106743
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6863142251968384,
      "learning_rate": 0.0003345193006893433,
      "loss": 2.8961,
      "step": 106744
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.825573205947876,
      "learning_rate": 0.0003345152372933068,
      "loss": 3.197,
      "step": 106745
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.5387113094329834,
      "learning_rate": 0.0003345111738908533,
      "loss": 3.2353,
      "step": 106746
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0279541015625,
      "learning_rate": 0.0003345071104819834,
      "loss": 2.9603,
      "step": 106747
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.882197618484497,
      "learning_rate": 0.0003345030470666981,
      "loss": 3.1234,
      "step": 106748
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.837760090827942,
      "learning_rate": 0.00033449898364499794,
      "loss": 3.0974,
      "step": 106749
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.005457639694214,
      "learning_rate": 0.0003344949202168838,
      "loss": 2.9738,
      "step": 106750
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.4310450553894043,
      "learning_rate": 0.0003344908567823564,
      "loss": 2.9409,
      "step": 106751
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.8707828521728516,
      "learning_rate": 0.0003344867933414165,
      "loss": 3.1627,
      "step": 106752
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9801840782165527,
      "learning_rate": 0.0003344827298940648,
      "loss": 2.9999,
      "step": 106753
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.6461615562438965,
      "learning_rate": 0.0003344786664403022,
      "loss": 3.0934,
      "step": 106754
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7493208646774292,
      "learning_rate": 0.00033447460298012934,
      "loss": 3.0708,
      "step": 106755
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.623887300491333,
      "learning_rate": 0.000334470539513547,
      "loss": 2.9845,
      "step": 106756
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9005351066589355,
      "learning_rate": 0.00033446647604055594,
      "loss": 3.1365,
      "step": 106757
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.020275592803955,
      "learning_rate": 0.0003344624125611569,
      "loss": 2.9579,
      "step": 106758
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.705013394355774,
      "learning_rate": 0.00033445834907535064,
      "loss": 2.9511,
      "step": 106759
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.570282459259033,
      "learning_rate": 0.00033445428558313796,
      "loss": 2.8643,
      "step": 106760
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6037230491638184,
      "learning_rate": 0.00033445022208451953,
      "loss": 3.121,
      "step": 106761
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8660862445831299,
      "learning_rate": 0.0003344461585794961,
      "loss": 2.9935,
      "step": 106762
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0385630130767822,
      "learning_rate": 0.0003344420950680686,
      "loss": 2.9014,
      "step": 106763
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6765118837356567,
      "learning_rate": 0.00033443803155023763,
      "loss": 3.0858,
      "step": 106764
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9265213012695312,
      "learning_rate": 0.00033443396802600395,
      "loss": 3.0001,
      "step": 106765
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.672942042350769,
      "learning_rate": 0.0003344299044953683,
      "loss": 2.8923,
      "step": 106766
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0115833282470703,
      "learning_rate": 0.00033442584095833163,
      "loss": 3.1287,
      "step": 106767
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.308976650238037,
      "learning_rate": 0.00033442177741489445,
      "loss": 2.6709,
      "step": 106768
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8452556133270264,
      "learning_rate": 0.00033441771386505755,
      "loss": 2.9972,
      "step": 106769
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.712707281112671,
      "learning_rate": 0.00033441365030882186,
      "loss": 3.0099,
      "step": 106770
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6782824993133545,
      "learning_rate": 0.000334409586746188,
      "loss": 3.1243,
      "step": 106771
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8977830410003662,
      "learning_rate": 0.0003344055231771567,
      "loss": 2.8072,
      "step": 106772
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.812904715538025,
      "learning_rate": 0.0003344014596017288,
      "loss": 3.054,
      "step": 106773
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3119630813598633,
      "learning_rate": 0.000334397396019905,
      "loss": 2.8956,
      "step": 106774
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.781768798828125,
      "learning_rate": 0.00033439333243168607,
      "loss": 3.026,
      "step": 106775
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7850208282470703,
      "learning_rate": 0.00033438926883707287,
      "loss": 2.9522,
      "step": 106776
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3760435581207275,
      "learning_rate": 0.0003343852052360659,
      "loss": 3.2007,
      "step": 106777
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8020697832107544,
      "learning_rate": 0.0003343811416286661,
      "loss": 2.9922,
      "step": 106778
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7507978677749634,
      "learning_rate": 0.00033437707801487425,
      "loss": 3.124,
      "step": 106779
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9345598220825195,
      "learning_rate": 0.00033437301439469107,
      "loss": 3.2478,
      "step": 106780
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.4428579807281494,
      "learning_rate": 0.00033436895076811727,
      "loss": 3.1152,
      "step": 106781
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.484055519104004,
      "learning_rate": 0.00033436488713515356,
      "loss": 2.9714,
      "step": 106782
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9546252489089966,
      "learning_rate": 0.0003343608234958009,
      "loss": 3.1304,
      "step": 106783
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.467615842819214,
      "learning_rate": 0.00033435675985005983,
      "loss": 2.9447,
      "step": 106784
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.7353034019470215,
      "learning_rate": 0.0003343526961979312,
      "loss": 3.0148,
      "step": 106785
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0991764068603516,
      "learning_rate": 0.00033434863253941575,
      "loss": 2.9098,
      "step": 106786
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.624010682106018,
      "learning_rate": 0.0003343445688745143,
      "loss": 2.8197,
      "step": 106787
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8703398704528809,
      "learning_rate": 0.0003343405052032275,
      "loss": 2.7297,
      "step": 106788
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8293722867965698,
      "learning_rate": 0.00033433644152555615,
      "loss": 3.1729,
      "step": 106789
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.2929940223693848,
      "learning_rate": 0.00033433237784150094,
      "loss": 2.825,
      "step": 106790
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6849554777145386,
      "learning_rate": 0.00033432831415106286,
      "loss": 2.9607,
      "step": 106791
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.5968942642211914,
      "learning_rate": 0.00033432425045424235,
      "loss": 3.0463,
      "step": 106792
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.9596972465515137,
      "learning_rate": 0.00033432018675104034,
      "loss": 2.8188,
      "step": 106793
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6342917680740356,
      "learning_rate": 0.00033431612304145764,
      "loss": 3.0986,
      "step": 106794
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.187450885772705,
      "learning_rate": 0.00033431205932549484,
      "loss": 3.2148,
      "step": 106795
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.5930986404418945,
      "learning_rate": 0.0003343079956031528,
      "loss": 2.8614,
      "step": 106796
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.550173282623291,
      "learning_rate": 0.0003343039318744323,
      "loss": 2.6756,
      "step": 106797
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.974539279937744,
      "learning_rate": 0.00033429986813933405,
      "loss": 2.9528,
      "step": 106798
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.0443532466888428,
      "learning_rate": 0.00033429580439785873,
      "loss": 2.9518,
      "step": 106799
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.2684171199798584,
      "learning_rate": 0.0003342917406500072,
      "loss": 2.9832,
      "step": 106800
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9850064516067505,
      "learning_rate": 0.00033428767689578026,
      "loss": 2.9725,
      "step": 106801
    },
    {
      "epoch": 1.39,
      "grad_norm": 4.401342868804932,
      "learning_rate": 0.00033428361313517853,
      "loss": 2.9074,
      "step": 106802
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6972507238388062,
      "learning_rate": 0.00033427954936820283,
      "loss": 3.0317,
      "step": 106803
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3648195266723633,
      "learning_rate": 0.00033427548559485397,
      "loss": 2.859,
      "step": 106804
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9173959493637085,
      "learning_rate": 0.00033427142181513254,
      "loss": 3.2618,
      "step": 106805
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.63968825340271,
      "learning_rate": 0.00033426735802903954,
      "loss": 3.0676,
      "step": 106806
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.772047758102417,
      "learning_rate": 0.00033426329423657554,
      "loss": 3.0227,
      "step": 106807
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0640945434570312,
      "learning_rate": 0.00033425923043774126,
      "loss": 3.0525,
      "step": 106808
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7825433015823364,
      "learning_rate": 0.0003342551666325377,
      "loss": 3.142,
      "step": 106809
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7563129663467407,
      "learning_rate": 0.0003342511028209653,
      "loss": 2.863,
      "step": 106810
    },
    {
      "epoch": 1.39,
      "grad_norm": 5.103618144989014,
      "learning_rate": 0.00033424703900302507,
      "loss": 2.8301,
      "step": 106811
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.7820534706115723,
      "learning_rate": 0.0003342429751787177,
      "loss": 2.9446,
      "step": 106812
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8448925018310547,
      "learning_rate": 0.0003342389113480438,
      "loss": 2.9715,
      "step": 106813
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.8719191551208496,
      "learning_rate": 0.00033423484751100434,
      "loss": 2.8469,
      "step": 106814
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.2085349559783936,
      "learning_rate": 0.00033423078366759986,
      "loss": 3.0844,
      "step": 106815
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8041746616363525,
      "learning_rate": 0.0003342267198178314,
      "loss": 2.9713,
      "step": 106816
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7059636116027832,
      "learning_rate": 0.0003342226559616994,
      "loss": 2.9677,
      "step": 106817
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.1264352798461914,
      "learning_rate": 0.0003342185920992048,
      "loss": 2.9387,
      "step": 106818
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1107091903686523,
      "learning_rate": 0.00033421452823034835,
      "loss": 3.0106,
      "step": 106819
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8334823846817017,
      "learning_rate": 0.0003342104643551307,
      "loss": 2.896,
      "step": 106820
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8674167394638062,
      "learning_rate": 0.00033420640047355274,
      "loss": 2.966,
      "step": 106821
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.5851259231567383,
      "learning_rate": 0.00033420233658561525,
      "loss": 3.0442,
      "step": 106822
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7312275171279907,
      "learning_rate": 0.00033419827269131876,
      "loss": 3.0494,
      "step": 106823
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8178199529647827,
      "learning_rate": 0.0003341942087906642,
      "loss": 2.8192,
      "step": 106824
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0702006816864014,
      "learning_rate": 0.00033419014488365233,
      "loss": 2.9262,
      "step": 106825
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.324209690093994,
      "learning_rate": 0.00033418608097028385,
      "loss": 3.1811,
      "step": 106826
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.315161943435669,
      "learning_rate": 0.0003341820170505595,
      "loss": 2.8989,
      "step": 106827
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.4843966960906982,
      "learning_rate": 0.0003341779531244801,
      "loss": 2.9597,
      "step": 106828
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7244243621826172,
      "learning_rate": 0.00033417388919204644,
      "loss": 2.9514,
      "step": 106829
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.448197364807129,
      "learning_rate": 0.00033416982525325906,
      "loss": 3.2571,
      "step": 106830
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.29311203956604,
      "learning_rate": 0.00033416576130811896,
      "loss": 2.8311,
      "step": 106831
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.77814519405365,
      "learning_rate": 0.0003341616973566268,
      "loss": 3.0164,
      "step": 106832
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8407962322235107,
      "learning_rate": 0.00033415763339878326,
      "loss": 3.0575,
      "step": 106833
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1418673992156982,
      "learning_rate": 0.0003341535694345893,
      "loss": 3.0085,
      "step": 106834
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.6904425621032715,
      "learning_rate": 0.0003341495054640454,
      "loss": 3.1442,
      "step": 106835
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9820221662521362,
      "learning_rate": 0.00033414544148715255,
      "loss": 2.9619,
      "step": 106836
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9884601831436157,
      "learning_rate": 0.00033414137750391147,
      "loss": 2.9486,
      "step": 106837
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3742990493774414,
      "learning_rate": 0.00033413731351432277,
      "loss": 2.8547,
      "step": 106838
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1343114376068115,
      "learning_rate": 0.0003341332495183873,
      "loss": 2.8944,
      "step": 106839
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.160587787628174,
      "learning_rate": 0.00033412918551610585,
      "loss": 3.097,
      "step": 106840
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.274069309234619,
      "learning_rate": 0.0003341251215074791,
      "loss": 2.8828,
      "step": 106841
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.079542636871338,
      "learning_rate": 0.00033412105749250793,
      "loss": 3.0346,
      "step": 106842
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7747280597686768,
      "learning_rate": 0.000334116993471193,
      "loss": 3.0506,
      "step": 106843
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.754148483276367,
      "learning_rate": 0.00033411292944353496,
      "loss": 2.9317,
      "step": 106844
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9729278087615967,
      "learning_rate": 0.0003341088654095348,
      "loss": 2.8894,
      "step": 106845
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.823884129524231,
      "learning_rate": 0.0003341048013691931,
      "loss": 2.9488,
      "step": 106846
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.888940453529358,
      "learning_rate": 0.0003341007373225107,
      "loss": 2.8952,
      "step": 106847
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9171867370605469,
      "learning_rate": 0.00033409667326948826,
      "loss": 2.9139,
      "step": 106848
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7449313402175903,
      "learning_rate": 0.00033409260921012676,
      "loss": 2.9778,
      "step": 106849
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3963682651519775,
      "learning_rate": 0.0003340885451444267,
      "loss": 2.8785,
      "step": 106850
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.843715190887451,
      "learning_rate": 0.0003340844810723889,
      "loss": 2.9698,
      "step": 106851
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3957595825195312,
      "learning_rate": 0.0003340804169940142,
      "loss": 2.994,
      "step": 106852
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.071272373199463,
      "learning_rate": 0.0003340763529093033,
      "loss": 2.9183,
      "step": 106853
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.134800910949707,
      "learning_rate": 0.00033407228881825694,
      "loss": 2.9807,
      "step": 106854
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.860246419906616,
      "learning_rate": 0.000334068224720876,
      "loss": 3.0931,
      "step": 106855
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7420884370803833,
      "learning_rate": 0.00033406416061716103,
      "loss": 2.8927,
      "step": 106856
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.80099356174469,
      "learning_rate": 0.00033406009650711286,
      "loss": 3.064,
      "step": 106857
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.080749988555908,
      "learning_rate": 0.00033405603239073235,
      "loss": 3.0016,
      "step": 106858
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3188157081604004,
      "learning_rate": 0.0003340519682680201,
      "loss": 3.0321,
      "step": 106859
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8587064743041992,
      "learning_rate": 0.00033404790413897703,
      "loss": 2.9366,
      "step": 106860
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1741433143615723,
      "learning_rate": 0.0003340438400036038,
      "loss": 2.9963,
      "step": 106861
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.015204668045044,
      "learning_rate": 0.0003340397758619012,
      "loss": 3.1445,
      "step": 106862
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.532491445541382,
      "learning_rate": 0.0003340357117138699,
      "loss": 2.7968,
      "step": 106863
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6627172231674194,
      "learning_rate": 0.0003340316475595107,
      "loss": 3.0166,
      "step": 106864
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7954432964324951,
      "learning_rate": 0.00033402758339882445,
      "loss": 3.0361,
      "step": 106865
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.1922380924224854,
      "learning_rate": 0.0003340235192318118,
      "loss": 3.149,
      "step": 106866
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.819512128829956,
      "learning_rate": 0.0003340194550584735,
      "loss": 2.9288,
      "step": 106867
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.04262375831604,
      "learning_rate": 0.00033401539087881045,
      "loss": 3.0712,
      "step": 106868
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8626927137374878,
      "learning_rate": 0.00033401132669282324,
      "loss": 3.0063,
      "step": 106869
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.718052864074707,
      "learning_rate": 0.0003340072625005126,
      "loss": 2.9506,
      "step": 106870
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8344541788101196,
      "learning_rate": 0.0003340031983018794,
      "loss": 3.024,
      "step": 106871
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0131795406341553,
      "learning_rate": 0.00033399913409692445,
      "loss": 3.3103,
      "step": 106872
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9694660902023315,
      "learning_rate": 0.00033399506988564834,
      "loss": 3.2536,
      "step": 106873
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.0555107593536377,
      "learning_rate": 0.0003339910056680519,
      "loss": 3.0097,
      "step": 106874
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7391245365142822,
      "learning_rate": 0.00033398694144413596,
      "loss": 2.9033,
      "step": 106875
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9417283535003662,
      "learning_rate": 0.00033398287721390116,
      "loss": 2.962,
      "step": 106876
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.366687536239624,
      "learning_rate": 0.00033397881297734833,
      "loss": 2.9112,
      "step": 106877
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1657326221466064,
      "learning_rate": 0.00033397474873447813,
      "loss": 2.9995,
      "step": 106878
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0808286666870117,
      "learning_rate": 0.0003339706844852915,
      "loss": 3.0274,
      "step": 106879
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.053985834121704,
      "learning_rate": 0.0003339666202297889,
      "loss": 3.0335,
      "step": 106880
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.7963879108428955,
      "learning_rate": 0.00033396255596797146,
      "loss": 2.8138,
      "step": 106881
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.511549472808838,
      "learning_rate": 0.0003339584916998397,
      "loss": 3.0482,
      "step": 106882
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.7323906421661377,
      "learning_rate": 0.00033395442742539425,
      "loss": 2.9475,
      "step": 106883
    },
    {
      "epoch": 1.39,
      "grad_norm": 4.003730297088623,
      "learning_rate": 0.0003339503631446362,
      "loss": 2.9166,
      "step": 106884
    },
    {
      "epoch": 1.39,
      "grad_norm": 5.894525527954102,
      "learning_rate": 0.00033394629885756603,
      "loss": 3.035,
      "step": 106885
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.456688165664673,
      "learning_rate": 0.0003339422345641847,
      "loss": 2.9315,
      "step": 106886
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.003094434738159,
      "learning_rate": 0.0003339381702644928,
      "loss": 3.0034,
      "step": 106887
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1210360527038574,
      "learning_rate": 0.00033393410595849115,
      "loss": 2.8261,
      "step": 106888
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.342327356338501,
      "learning_rate": 0.0003339300416461806,
      "loss": 2.9336,
      "step": 106889
    },
    {
      "epoch": 1.39,
      "grad_norm": 4.0384721755981445,
      "learning_rate": 0.00033392597732756176,
      "loss": 3.2432,
      "step": 106890
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.832520008087158,
      "learning_rate": 0.0003339219130026354,
      "loss": 3.029,
      "step": 106891
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7800005674362183,
      "learning_rate": 0.00033391784867140234,
      "loss": 3.1913,
      "step": 106892
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.059399366378784,
      "learning_rate": 0.0003339137843338633,
      "loss": 2.942,
      "step": 106893
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.6809544563293457,
      "learning_rate": 0.0003339097199900191,
      "loss": 3.0232,
      "step": 106894
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.865736722946167,
      "learning_rate": 0.0003339056556398705,
      "loss": 2.9422,
      "step": 106895
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.732547402381897,
      "learning_rate": 0.000333901591283418,
      "loss": 3.2361,
      "step": 106896
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.2952895164489746,
      "learning_rate": 0.00033389752692066264,
      "loss": 2.9767,
      "step": 106897
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.326077461242676,
      "learning_rate": 0.0003338934625516052,
      "loss": 2.9118,
      "step": 106898
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.4167473316192627,
      "learning_rate": 0.0003338893981762462,
      "loss": 3.1548,
      "step": 106899
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6535788774490356,
      "learning_rate": 0.00033388533379458655,
      "loss": 2.9978,
      "step": 106900
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.410584568977356,
      "learning_rate": 0.00033388126940662706,
      "loss": 2.8099,
      "step": 106901
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.0516016483306885,
      "learning_rate": 0.0003338772050123683,
      "loss": 2.9309,
      "step": 106902
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.347478151321411,
      "learning_rate": 0.00033387314061181114,
      "loss": 2.85,
      "step": 106903
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.933026909828186,
      "learning_rate": 0.0003338690762049564,
      "loss": 2.7871,
      "step": 106904
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3229267597198486,
      "learning_rate": 0.00033386501179180465,
      "loss": 3.1896,
      "step": 106905
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.9974288940429688,
      "learning_rate": 0.0003338609473723568,
      "loss": 2.9577,
      "step": 106906
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.358673572540283,
      "learning_rate": 0.00033385688294661355,
      "loss": 2.9988,
      "step": 106907
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8036408424377441,
      "learning_rate": 0.0003338528185145757,
      "loss": 3.0771,
      "step": 106908
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.490003824234009,
      "learning_rate": 0.0003338487540762439,
      "loss": 2.9975,
      "step": 106909
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.923869252204895,
      "learning_rate": 0.00033384468963161914,
      "loss": 3.0956,
      "step": 106910
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.03908109664917,
      "learning_rate": 0.0003338406251807019,
      "loss": 3.0342,
      "step": 106911
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7153639793395996,
      "learning_rate": 0.000333836560723493,
      "loss": 3.1472,
      "step": 106912
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.95322585105896,
      "learning_rate": 0.00033383249625999333,
      "loss": 3.0787,
      "step": 106913
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.8710057735443115,
      "learning_rate": 0.00033382843179020346,
      "loss": 2.9933,
      "step": 106914
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.350911855697632,
      "learning_rate": 0.0003338243673141243,
      "loss": 3.0341,
      "step": 106915
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.4760870933532715,
      "learning_rate": 0.00033382030283175666,
      "loss": 3.0522,
      "step": 106916
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.3940930366516113,
      "learning_rate": 0.0003338162383431011,
      "loss": 3.0326,
      "step": 106917
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.2540884017944336,
      "learning_rate": 0.00033381217384815837,
      "loss": 2.9291,
      "step": 106918
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.080444097518921,
      "learning_rate": 0.00033380810934692946,
      "loss": 2.7041,
      "step": 106919
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.228724241256714,
      "learning_rate": 0.00033380404483941494,
      "loss": 2.8092,
      "step": 106920
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.2096452713012695,
      "learning_rate": 0.0003337999803256156,
      "loss": 3.0779,
      "step": 106921
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.2591421604156494,
      "learning_rate": 0.0003337959158055322,
      "loss": 3.0686,
      "step": 106922
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3727850914001465,
      "learning_rate": 0.00033379185127916554,
      "loss": 2.9952,
      "step": 106923
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.950598120689392,
      "learning_rate": 0.0003337877867465163,
      "loss": 3.0394,
      "step": 106924
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.3227720260620117,
      "learning_rate": 0.0003337837222075853,
      "loss": 3.1825,
      "step": 106925
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.212597608566284,
      "learning_rate": 0.0003337796576623732,
      "loss": 3.1631,
      "step": 106926
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0579187870025635,
      "learning_rate": 0.0003337755931108809,
      "loss": 3.0804,
      "step": 106927
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1484105587005615,
      "learning_rate": 0.000333771528553109,
      "loss": 3.0421,
      "step": 106928
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.457645893096924,
      "learning_rate": 0.0003337674639890584,
      "loss": 3.1395,
      "step": 106929
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1609537601470947,
      "learning_rate": 0.00033376339941872975,
      "loss": 2.9552,
      "step": 106930
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6820220947265625,
      "learning_rate": 0.00033375933484212385,
      "loss": 2.9805,
      "step": 106931
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9156864881515503,
      "learning_rate": 0.00033375527025924156,
      "loss": 2.9639,
      "step": 106932
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8594895601272583,
      "learning_rate": 0.0003337512056700834,
      "loss": 2.9125,
      "step": 106933
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.389909029006958,
      "learning_rate": 0.00033374714107465024,
      "loss": 3.0732,
      "step": 106934
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8055158853530884,
      "learning_rate": 0.000333743076472943,
      "loss": 3.2882,
      "step": 106935
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.335139513015747,
      "learning_rate": 0.0003337390118649622,
      "loss": 3.0181,
      "step": 106936
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.345775604248047,
      "learning_rate": 0.0003337349472507086,
      "loss": 2.9441,
      "step": 106937
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.6355011463165283,
      "learning_rate": 0.0003337308826301832,
      "loss": 3.2566,
      "step": 106938
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8503820896148682,
      "learning_rate": 0.00033372681800338644,
      "loss": 2.7094,
      "step": 106939
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.579097270965576,
      "learning_rate": 0.00033372275337031925,
      "loss": 2.8498,
      "step": 106940
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.278477668762207,
      "learning_rate": 0.0003337186887309825,
      "loss": 2.9528,
      "step": 106941
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9647548198699951,
      "learning_rate": 0.0003337146240853767,
      "loss": 2.8903,
      "step": 106942
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8165630102157593,
      "learning_rate": 0.00033371055943350264,
      "loss": 2.8568,
      "step": 106943
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.80544912815094,
      "learning_rate": 0.0003337064947753613,
      "loss": 3.11,
      "step": 106944
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.2086360454559326,
      "learning_rate": 0.0003337024301109532,
      "loss": 2.8855,
      "step": 106945
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.16906476020813,
      "learning_rate": 0.0003336983654402792,
      "loss": 3.1378,
      "step": 106946
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.101016044616699,
      "learning_rate": 0.0003336943007633401,
      "loss": 2.6987,
      "step": 106947
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.362635850906372,
      "learning_rate": 0.0003336902360801365,
      "loss": 3.1345,
      "step": 106948
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0219244956970215,
      "learning_rate": 0.00033368617139066927,
      "loss": 2.9793,
      "step": 106949
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.2568202018737793,
      "learning_rate": 0.0003336821066949392,
      "loss": 3.0478,
      "step": 106950
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7472578287124634,
      "learning_rate": 0.00033367804199294695,
      "loss": 3.0429,
      "step": 106951
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7169239521026611,
      "learning_rate": 0.00033367397728469326,
      "loss": 2.8813,
      "step": 106952
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.5688562393188477,
      "learning_rate": 0.00033366991257017907,
      "loss": 3.0706,
      "step": 106953
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.575921058654785,
      "learning_rate": 0.0003336658478494049,
      "loss": 3.1126,
      "step": 106954
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.099999189376831,
      "learning_rate": 0.0003336617831223716,
      "loss": 3.0157,
      "step": 106955
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7665328979492188,
      "learning_rate": 0.00033365771838908006,
      "loss": 2.9575,
      "step": 106956
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.061569929122925,
      "learning_rate": 0.0003336536536495309,
      "loss": 2.9959,
      "step": 106957
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9067184925079346,
      "learning_rate": 0.00033364958890372477,
      "loss": 2.5535,
      "step": 106958
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8490358591079712,
      "learning_rate": 0.0003336455241516626,
      "loss": 2.9439,
      "step": 106959
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9780616760253906,
      "learning_rate": 0.0003336414593933451,
      "loss": 3.1749,
      "step": 106960
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7917016744613647,
      "learning_rate": 0.00033363739462877304,
      "loss": 3.055,
      "step": 106961
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0525124073028564,
      "learning_rate": 0.0003336333298579471,
      "loss": 2.9597,
      "step": 106962
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8186712265014648,
      "learning_rate": 0.00033362926508086817,
      "loss": 2.7023,
      "step": 106963
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7931907176971436,
      "learning_rate": 0.00033362520029753685,
      "loss": 2.968,
      "step": 106964
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6188217401504517,
      "learning_rate": 0.000333621135507954,
      "loss": 3.0353,
      "step": 106965
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9283716678619385,
      "learning_rate": 0.0003336170707121203,
      "loss": 3.0643,
      "step": 106966
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.403872013092041,
      "learning_rate": 0.0003336130059100367,
      "loss": 3.0785,
      "step": 106967
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8114657402038574,
      "learning_rate": 0.0003336089411017036,
      "loss": 3.1478,
      "step": 106968
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.947724461555481,
      "learning_rate": 0.0003336048762871221,
      "loss": 3.2049,
      "step": 106969
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.456857681274414,
      "learning_rate": 0.00033360081146629274,
      "loss": 2.7561,
      "step": 106970
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7220077514648438,
      "learning_rate": 0.0003335967466392164,
      "loss": 3.0462,
      "step": 106971
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7987128496170044,
      "learning_rate": 0.0003335926818058938,
      "loss": 3.2433,
      "step": 106972
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8144288063049316,
      "learning_rate": 0.0003335886169663257,
      "loss": 3.0017,
      "step": 106973
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.009627103805542,
      "learning_rate": 0.0003335845521205129,
      "loss": 3.0001,
      "step": 106974
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7076539993286133,
      "learning_rate": 0.00033358048726845593,
      "loss": 2.9963,
      "step": 106975
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9862244129180908,
      "learning_rate": 0.0003335764224101558,
      "loss": 2.803,
      "step": 106976
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6658117771148682,
      "learning_rate": 0.0003335723575456132,
      "loss": 3.0897,
      "step": 106977
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6182725429534912,
      "learning_rate": 0.00033356829267482883,
      "loss": 2.9027,
      "step": 106978
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.587387204170227,
      "learning_rate": 0.0003335642277978035,
      "loss": 2.9309,
      "step": 106979
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3796615600585938,
      "learning_rate": 0.000333560162914538,
      "loss": 3.1661,
      "step": 106980
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9411503076553345,
      "learning_rate": 0.00033355609802503296,
      "loss": 3.0157,
      "step": 106981
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.153167724609375,
      "learning_rate": 0.0003335520331292892,
      "loss": 2.8163,
      "step": 106982
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.6128156185150146,
      "learning_rate": 0.00033354796822730753,
      "loss": 2.8896,
      "step": 106983
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.117926597595215,
      "learning_rate": 0.00033354390331908864,
      "loss": 3.0297,
      "step": 106984
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9238380193710327,
      "learning_rate": 0.00033353983840463325,
      "loss": 2.8872,
      "step": 106985
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8995802402496338,
      "learning_rate": 0.00033353577348394224,
      "loss": 3.0405,
      "step": 106986
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.571326494216919,
      "learning_rate": 0.00033353170855701637,
      "loss": 2.8335,
      "step": 106987
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9162355661392212,
      "learning_rate": 0.0003335276436238561,
      "loss": 2.9022,
      "step": 106988
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0438356399536133,
      "learning_rate": 0.00033352357868446267,
      "loss": 3.0077,
      "step": 106989
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6542468070983887,
      "learning_rate": 0.0003335195137388364,
      "loss": 2.8792,
      "step": 106990
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9543882608413696,
      "learning_rate": 0.0003335154487869782,
      "loss": 3.2016,
      "step": 106991
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7926926612854004,
      "learning_rate": 0.000333511383828889,
      "loss": 3.2535,
      "step": 106992
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.887002468109131,
      "learning_rate": 0.0003335073188645693,
      "loss": 2.7658,
      "step": 106993
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.237118721008301,
      "learning_rate": 0.0003335032538940199,
      "loss": 2.9757,
      "step": 106994
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9446696043014526,
      "learning_rate": 0.00033349918891724167,
      "loss": 2.8712,
      "step": 106995
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3635833263397217,
      "learning_rate": 0.0003334951239342354,
      "loss": 2.8123,
      "step": 106996
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.415907382965088,
      "learning_rate": 0.00033349105894500165,
      "loss": 3.0183,
      "step": 106997
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8312408924102783,
      "learning_rate": 0.00033348699394954124,
      "loss": 3.0294,
      "step": 106998
    },
    {
      "epoch": 1.39,
      "grad_norm": 4.5121049880981445,
      "learning_rate": 0.00033348292894785507,
      "loss": 2.8925,
      "step": 106999
    },
    {
      "epoch": 1.39,
      "grad_norm": 4.076921463012695,
      "learning_rate": 0.0003334788639399438,
      "loss": 3.2074,
      "step": 107000
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1367359161376953,
      "learning_rate": 0.0003334747989258081,
      "loss": 3.1709,
      "step": 107001
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8385342359542847,
      "learning_rate": 0.00033347073390544883,
      "loss": 3.2202,
      "step": 107002
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7972211837768555,
      "learning_rate": 0.00033346666887886666,
      "loss": 3.0618,
      "step": 107003
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.2056500911712646,
      "learning_rate": 0.00033346260384606253,
      "loss": 2.8826,
      "step": 107004
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7014766931533813,
      "learning_rate": 0.000333458538807037,
      "loss": 2.9234,
      "step": 107005
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.952941656112671,
      "learning_rate": 0.00033345447376179093,
      "loss": 3.3785,
      "step": 107006
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7623575925827026,
      "learning_rate": 0.0003334504087103249,
      "loss": 2.9107,
      "step": 107007
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.0736501216888428,
      "learning_rate": 0.00033344634365264003,
      "loss": 3.0143,
      "step": 107008
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.012604236602783,
      "learning_rate": 0.0003334422785887367,
      "loss": 2.8366,
      "step": 107009
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3421788215637207,
      "learning_rate": 0.00033343821351861585,
      "loss": 3.0822,
      "step": 107010
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.9372076988220215,
      "learning_rate": 0.00033343414844227824,
      "loss": 3.1224,
      "step": 107011
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0762014389038086,
      "learning_rate": 0.0003334300833597245,
      "loss": 2.649,
      "step": 107012
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7482830286026,
      "learning_rate": 0.0003334260182709556,
      "loss": 2.9436,
      "step": 107013
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.998339295387268,
      "learning_rate": 0.00033342195317597216,
      "loss": 2.9646,
      "step": 107014
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3956024646759033,
      "learning_rate": 0.0003334178880747749,
      "loss": 3.044,
      "step": 107015
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7000706195831299,
      "learning_rate": 0.00033341382296736457,
      "loss": 3.0631,
      "step": 107016
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.956496238708496,
      "learning_rate": 0.0003334097578537421,
      "loss": 3.1389,
      "step": 107017
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9647164344787598,
      "learning_rate": 0.00033340569273390805,
      "loss": 2.9346,
      "step": 107018
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.360593557357788,
      "learning_rate": 0.00033340162760786324,
      "loss": 3.1483,
      "step": 107019
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0792055130004883,
      "learning_rate": 0.0003333975624756085,
      "loss": 3.1,
      "step": 107020
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.5098555088043213,
      "learning_rate": 0.0003333934973371444,
      "loss": 2.9351,
      "step": 107021
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.898089051246643,
      "learning_rate": 0.000333389432192472,
      "loss": 3.0825,
      "step": 107022
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.047257900238037,
      "learning_rate": 0.0003333853670415918,
      "loss": 3.0599,
      "step": 107023
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8513641357421875,
      "learning_rate": 0.00033338130188450453,
      "loss": 3.0388,
      "step": 107024
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1067216396331787,
      "learning_rate": 0.0003333772367212111,
      "loss": 2.8335,
      "step": 107025
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6566494703292847,
      "learning_rate": 0.00033337317155171227,
      "loss": 3.0397,
      "step": 107026
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1344966888427734,
      "learning_rate": 0.00033336910637600867,
      "loss": 2.929,
      "step": 107027
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8999899625778198,
      "learning_rate": 0.0003333650411941012,
      "loss": 2.7961,
      "step": 107028
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7306326627731323,
      "learning_rate": 0.00033336097600599054,
      "loss": 2.9853,
      "step": 107029
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7140425443649292,
      "learning_rate": 0.00033335691081167737,
      "loss": 2.8436,
      "step": 107030
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.043639898300171,
      "learning_rate": 0.00033335284561116256,
      "loss": 2.8995,
      "step": 107031
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8382902145385742,
      "learning_rate": 0.0003333487804044468,
      "loss": 2.9848,
      "step": 107032
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.743451476097107,
      "learning_rate": 0.00033334471519153085,
      "loss": 3.0091,
      "step": 107033
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9862674474716187,
      "learning_rate": 0.0003333406499724155,
      "loss": 2.8125,
      "step": 107034
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9538099765777588,
      "learning_rate": 0.0003333365847471016,
      "loss": 2.827,
      "step": 107035
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.5580382347106934,
      "learning_rate": 0.0003333325195155897,
      "loss": 3.0718,
      "step": 107036
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.834649682044983,
      "learning_rate": 0.00033332845427788064,
      "loss": 3.3805,
      "step": 107037
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.281754493713379,
      "learning_rate": 0.0003333243890339753,
      "loss": 3.0217,
      "step": 107038
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.978743314743042,
      "learning_rate": 0.0003333203237838742,
      "loss": 2.9957,
      "step": 107039
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.246103286743164,
      "learning_rate": 0.0003333162585275783,
      "loss": 3.1545,
      "step": 107040
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3265905380249023,
      "learning_rate": 0.00033331219326508823,
      "loss": 2.7564,
      "step": 107041
    },
    {
      "epoch": 1.39,
      "grad_norm": 4.3233466148376465,
      "learning_rate": 0.0003333081279964048,
      "loss": 2.8467,
      "step": 107042
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.8377034664154053,
      "learning_rate": 0.00033330406272152885,
      "loss": 2.8847,
      "step": 107043
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.0038368701934814,
      "learning_rate": 0.00033329999744046096,
      "loss": 3.1199,
      "step": 107044
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.310159921646118,
      "learning_rate": 0.00033329593215320196,
      "loss": 2.9634,
      "step": 107045
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.1125004291534424,
      "learning_rate": 0.00033329186685975264,
      "loss": 2.9002,
      "step": 107046
    },
    {
      "epoch": 1.39,
      "grad_norm": 4.194305896759033,
      "learning_rate": 0.0003332878015601137,
      "loss": 2.8425,
      "step": 107047
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.51096510887146,
      "learning_rate": 0.00033328373625428597,
      "loss": 3.0702,
      "step": 107048
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6221227645874023,
      "learning_rate": 0.0003332796709422702,
      "loss": 3.1799,
      "step": 107049
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.5790696144104004,
      "learning_rate": 0.000333275605624067,
      "loss": 2.7064,
      "step": 107050
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.6753716468811035,
      "learning_rate": 0.0003332715402996774,
      "loss": 2.9046,
      "step": 107051
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1173763275146484,
      "learning_rate": 0.00033326747496910186,
      "loss": 2.9507,
      "step": 107052
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.004582166671753,
      "learning_rate": 0.0003332634096323413,
      "loss": 2.7858,
      "step": 107053
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.89239239692688,
      "learning_rate": 0.0003332593442893965,
      "loss": 2.8745,
      "step": 107054
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.680788278579712,
      "learning_rate": 0.0003332552789402681,
      "loss": 2.9528,
      "step": 107055
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3406317234039307,
      "learning_rate": 0.0003332512135849569,
      "loss": 2.818,
      "step": 107056
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9228585958480835,
      "learning_rate": 0.0003332471482234637,
      "loss": 3.3116,
      "step": 107057
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6497117280960083,
      "learning_rate": 0.0003332430828557892,
      "loss": 2.9192,
      "step": 107058
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.26704740524292,
      "learning_rate": 0.0003332390174819343,
      "loss": 2.9696,
      "step": 107059
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.2315711975097656,
      "learning_rate": 0.0003332349521018995,
      "loss": 3.2867,
      "step": 107060
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7980843782424927,
      "learning_rate": 0.0003332308867156857,
      "loss": 2.8932,
      "step": 107061
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.716773271560669,
      "learning_rate": 0.0003332268213232937,
      "loss": 2.8692,
      "step": 107062
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.2619214057922363,
      "learning_rate": 0.0003332227559247243,
      "loss": 2.9431,
      "step": 107063
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1837193965911865,
      "learning_rate": 0.00033321869051997804,
      "loss": 3.108,
      "step": 107064
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6394177675247192,
      "learning_rate": 0.0003332146251090558,
      "loss": 3.1339,
      "step": 107065
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0641629695892334,
      "learning_rate": 0.00033321055969195843,
      "loss": 2.9522,
      "step": 107066
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.5552228689193726,
      "learning_rate": 0.0003332064942686865,
      "loss": 2.9444,
      "step": 107067
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.344724416732788,
      "learning_rate": 0.00033320242883924086,
      "loss": 3.0827,
      "step": 107068
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.184113025665283,
      "learning_rate": 0.0003331983634036223,
      "loss": 3.008,
      "step": 107069
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6299909353256226,
      "learning_rate": 0.00033319429796183146,
      "loss": 2.9955,
      "step": 107070
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.997838020324707,
      "learning_rate": 0.0003331902325138692,
      "loss": 3.1267,
      "step": 107071
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6857757568359375,
      "learning_rate": 0.00033318616705973634,
      "loss": 3.157,
      "step": 107072
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9951952695846558,
      "learning_rate": 0.00033318210159943347,
      "loss": 2.839,
      "step": 107073
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7133657932281494,
      "learning_rate": 0.0003331780361329615,
      "loss": 3.0479,
      "step": 107074
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.2427470684051514,
      "learning_rate": 0.000333173970660321,
      "loss": 2.9023,
      "step": 107075
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.70711612701416,
      "learning_rate": 0.00033316990518151283,
      "loss": 3.1584,
      "step": 107076
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.323134183883667,
      "learning_rate": 0.0003331658396965378,
      "loss": 2.8684,
      "step": 107077
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.112441062927246,
      "learning_rate": 0.00033316177420539663,
      "loss": 3.0864,
      "step": 107078
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.629148006439209,
      "learning_rate": 0.00033315770870809004,
      "loss": 3.1852,
      "step": 107079
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.407470464706421,
      "learning_rate": 0.0003331536432046188,
      "loss": 3.1041,
      "step": 107080
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.1704213619232178,
      "learning_rate": 0.0003331495776949837,
      "loss": 2.9432,
      "step": 107081
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3738815784454346,
      "learning_rate": 0.0003331455121791854,
      "loss": 3.0882,
      "step": 107082
    },
    {
      "epoch": 1.39,
      "grad_norm": 4.1576924324035645,
      "learning_rate": 0.0003331414466572248,
      "loss": 2.7885,
      "step": 107083
    },
    {
      "epoch": 1.39,
      "grad_norm": 4.228641986846924,
      "learning_rate": 0.00033313738112910257,
      "loss": 2.8795,
      "step": 107084
    },
    {
      "epoch": 1.39,
      "grad_norm": 4.550188064575195,
      "learning_rate": 0.0003331333155948194,
      "loss": 2.9218,
      "step": 107085
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.023846387863159,
      "learning_rate": 0.0003331292500543762,
      "loss": 2.6265,
      "step": 107086
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6590696573257446,
      "learning_rate": 0.0003331251845077737,
      "loss": 2.7928,
      "step": 107087
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.62654185295105,
      "learning_rate": 0.0003331211189550124,
      "loss": 3.0918,
      "step": 107088
    },
    {
      "epoch": 1.39,
      "grad_norm": 4.15520715713501,
      "learning_rate": 0.00033311705339609344,
      "loss": 3.106,
      "step": 107089
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.1222176551818848,
      "learning_rate": 0.0003331129878310174,
      "loss": 3.0646,
      "step": 107090
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.881638526916504,
      "learning_rate": 0.0003331089222597849,
      "loss": 3.1836,
      "step": 107091
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9667567014694214,
      "learning_rate": 0.0003331048566823969,
      "loss": 3.0455,
      "step": 107092
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.129761219024658,
      "learning_rate": 0.0003331007910988541,
      "loss": 2.8621,
      "step": 107093
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8825453519821167,
      "learning_rate": 0.00033309672550915716,
      "loss": 2.8986,
      "step": 107094
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7049542665481567,
      "learning_rate": 0.000333092659913307,
      "loss": 3.0684,
      "step": 107095
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.83380925655365,
      "learning_rate": 0.00033308859431130426,
      "loss": 2.6911,
      "step": 107096
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8328999280929565,
      "learning_rate": 0.00033308452870314977,
      "loss": 2.9388,
      "step": 107097
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.019908905029297,
      "learning_rate": 0.0003330804630888441,
      "loss": 3.1518,
      "step": 107098
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.173444986343384,
      "learning_rate": 0.0003330763974683883,
      "loss": 3.2626,
      "step": 107099
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8990122079849243,
      "learning_rate": 0.0003330723318417829,
      "loss": 2.9858,
      "step": 107100
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0985584259033203,
      "learning_rate": 0.00033306826620902873,
      "loss": 3.0155,
      "step": 107101
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9305479526519775,
      "learning_rate": 0.0003330642005701266,
      "loss": 2.977,
      "step": 107102
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8042210340499878,
      "learning_rate": 0.0003330601349250772,
      "loss": 3.2038,
      "step": 107103
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.813859462738037,
      "learning_rate": 0.0003330560692738812,
      "loss": 2.9753,
      "step": 107104
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.616652011871338,
      "learning_rate": 0.0003330520036165396,
      "loss": 3.1988,
      "step": 107105
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.321453094482422,
      "learning_rate": 0.00033304793795305284,
      "loss": 3.0579,
      "step": 107106
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.807086706161499,
      "learning_rate": 0.00033304387228342197,
      "loss": 3.0072,
      "step": 107107
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.3104400634765625,
      "learning_rate": 0.00033303980660764757,
      "loss": 3.1342,
      "step": 107108
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.690840244293213,
      "learning_rate": 0.00033303574092573046,
      "loss": 3.085,
      "step": 107109
    },
    {
      "epoch": 1.39,
      "grad_norm": 3.1786153316497803,
      "learning_rate": 0.0003330316752376714,
      "loss": 3.0313,
      "step": 107110
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.2675697803497314,
      "learning_rate": 0.00033302760954347115,
      "loss": 2.8878,
      "step": 107111
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0761382579803467,
      "learning_rate": 0.0003330235438431304,
      "loss": 3.0131,
      "step": 107112
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9474982023239136,
      "learning_rate": 0.00033301947813664995,
      "loss": 2.9732,
      "step": 107113
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.4855899810791016,
      "learning_rate": 0.00033301541242403056,
      "loss": 3.1088,
      "step": 107114
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.179312229156494,
      "learning_rate": 0.00033301134670527297,
      "loss": 3.1719,
      "step": 107115
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6592717170715332,
      "learning_rate": 0.0003330072809803779,
      "loss": 3.1308,
      "step": 107116
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.315990447998047,
      "learning_rate": 0.0003330032152493463,
      "loss": 3.1792,
      "step": 107117
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9115102291107178,
      "learning_rate": 0.0003329991495121787,
      "loss": 2.8839,
      "step": 107118
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0225510597229004,
      "learning_rate": 0.0003329950837688759,
      "loss": 2.9926,
      "step": 107119
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7908076047897339,
      "learning_rate": 0.00033299101801943874,
      "loss": 3.058,
      "step": 107120
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.930232286453247,
      "learning_rate": 0.00033298695226386787,
      "loss": 3.1202,
      "step": 107121
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.6580843925476074,
      "learning_rate": 0.00033298288650216416,
      "loss": 3.0654,
      "step": 107122
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.976731777191162,
      "learning_rate": 0.0003329788207343283,
      "loss": 2.9307,
      "step": 107123
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.644684076309204,
      "learning_rate": 0.00033297475496036103,
      "loss": 3.1464,
      "step": 107124
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7074267864227295,
      "learning_rate": 0.00033297068918026313,
      "loss": 2.8625,
      "step": 107125
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.9463586807250977,
      "learning_rate": 0.0003329666233940354,
      "loss": 3.0972,
      "step": 107126
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8167561292648315,
      "learning_rate": 0.0003329625576016785,
      "loss": 2.9697,
      "step": 107127
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.5767295360565186,
      "learning_rate": 0.00033295849180319323,
      "loss": 3.1586,
      "step": 107128
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7987943887710571,
      "learning_rate": 0.0003329544259985804,
      "loss": 3.1265,
      "step": 107129
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7310791015625,
      "learning_rate": 0.00033295036018784075,
      "loss": 2.9382,
      "step": 107130
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7384499311447144,
      "learning_rate": 0.0003329462943709749,
      "loss": 2.8618,
      "step": 107131
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.831555962562561,
      "learning_rate": 0.00033294222854798373,
      "loss": 3.189,
      "step": 107132
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.7635177373886108,
      "learning_rate": 0.0003329381627188681,
      "loss": 3.0267,
      "step": 107133
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0992918014526367,
      "learning_rate": 0.00033293409688362857,
      "loss": 3.123,
      "step": 107134
    },
    {
      "epoch": 1.39,
      "grad_norm": 1.8087475299835205,
      "learning_rate": 0.0003329300310422659,
      "loss": 3.0023,
      "step": 107135
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.0963308811187744,
      "learning_rate": 0.00033292596519478106,
      "loss": 3.0358,
      "step": 107136
    },
    {
      "epoch": 1.39,
      "grad_norm": 2.5637059211730957,
      "learning_rate": 0.0003329218993411745,
      "loss": 2.78,
      "step": 107137
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9410247802734375,
      "learning_rate": 0.00033291783348144723,
      "loss": 2.84,
      "step": 107138
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8479934930801392,
      "learning_rate": 0.00033291376761559994,
      "loss": 3.03,
      "step": 107139
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.544818162918091,
      "learning_rate": 0.00033290970174363336,
      "loss": 2.8849,
      "step": 107140
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.126603841781616,
      "learning_rate": 0.00033290563586554813,
      "loss": 3.0068,
      "step": 107141
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9878815412521362,
      "learning_rate": 0.0003329015699813452,
      "loss": 2.9388,
      "step": 107142
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7385642528533936,
      "learning_rate": 0.00033289750409102524,
      "loss": 2.7485,
      "step": 107143
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.1235060691833496,
      "learning_rate": 0.000332893438194589,
      "loss": 2.8598,
      "step": 107144
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2383675575256348,
      "learning_rate": 0.00033288937229203726,
      "loss": 2.9915,
      "step": 107145
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.306119203567505,
      "learning_rate": 0.00033288530638337076,
      "loss": 3.1094,
      "step": 107146
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.938362717628479,
      "learning_rate": 0.00033288124046859034,
      "loss": 2.9321,
      "step": 107147
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8376408815383911,
      "learning_rate": 0.00033287717454769653,
      "loss": 3.1902,
      "step": 107148
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.8762435913085938,
      "learning_rate": 0.0003328731086206903,
      "loss": 3.0441,
      "step": 107149
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0710995197296143,
      "learning_rate": 0.00033286904268757235,
      "loss": 2.9586,
      "step": 107150
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.278301239013672,
      "learning_rate": 0.0003328649767483435,
      "loss": 3.056,
      "step": 107151
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9826467037200928,
      "learning_rate": 0.0003328609108030043,
      "loss": 3.059,
      "step": 107152
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8143436908721924,
      "learning_rate": 0.0003328568448515557,
      "loss": 2.9538,
      "step": 107153
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2131054401397705,
      "learning_rate": 0.0003328527788939984,
      "loss": 3.1665,
      "step": 107154
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7728703022003174,
      "learning_rate": 0.0003328487129303331,
      "loss": 2.9925,
      "step": 107155
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.154329299926758,
      "learning_rate": 0.00033284464696056064,
      "loss": 2.9695,
      "step": 107156
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.3814315795898438,
      "learning_rate": 0.0003328405809846818,
      "loss": 2.9221,
      "step": 107157
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6577707529067993,
      "learning_rate": 0.0003328365150026972,
      "loss": 2.9881,
      "step": 107158
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6828205585479736,
      "learning_rate": 0.00033283244901460764,
      "loss": 3.0348,
      "step": 107159
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.3149118423461914,
      "learning_rate": 0.000332828383020414,
      "loss": 3.0037,
      "step": 107160
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.426359176635742,
      "learning_rate": 0.0003328243170201169,
      "loss": 2.924,
      "step": 107161
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6956294775009155,
      "learning_rate": 0.0003328202510137171,
      "loss": 3.0435,
      "step": 107162
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7463473081588745,
      "learning_rate": 0.00033281618500121546,
      "loss": 2.9273,
      "step": 107163
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.3133957386016846,
      "learning_rate": 0.00033281211898261265,
      "loss": 2.9881,
      "step": 107164
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2431387901306152,
      "learning_rate": 0.0003328080529579094,
      "loss": 3.0485,
      "step": 107165
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.0225632190704346,
      "learning_rate": 0.0003328039869271066,
      "loss": 3.0529,
      "step": 107166
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9419056177139282,
      "learning_rate": 0.00033279992089020485,
      "loss": 2.8233,
      "step": 107167
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4881300926208496,
      "learning_rate": 0.000332795854847205,
      "loss": 2.9084,
      "step": 107168
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1947498321533203,
      "learning_rate": 0.0003327917887981078,
      "loss": 2.9699,
      "step": 107169
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.5656232833862305,
      "learning_rate": 0.000332787722742914,
      "loss": 3.0047,
      "step": 107170
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2220585346221924,
      "learning_rate": 0.00033278365668162424,
      "loss": 2.6848,
      "step": 107171
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.930343747138977,
      "learning_rate": 0.00033277959061423954,
      "loss": 3.0582,
      "step": 107172
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.6688120365142822,
      "learning_rate": 0.0003327755245407603,
      "loss": 3.016,
      "step": 107173
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.5025479793548584,
      "learning_rate": 0.0003327714584611876,
      "loss": 2.8719,
      "step": 107174
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.47395658493042,
      "learning_rate": 0.0003327673923755221,
      "loss": 2.8624,
      "step": 107175
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7250112295150757,
      "learning_rate": 0.00033276332628376446,
      "loss": 2.9822,
      "step": 107176
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.2068371772766113,
      "learning_rate": 0.0003327592601859155,
      "loss": 3.0736,
      "step": 107177
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.7315027713775635,
      "learning_rate": 0.00033275519408197603,
      "loss": 2.7444,
      "step": 107178
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9674547910690308,
      "learning_rate": 0.0003327511279719467,
      "loss": 3.0631,
      "step": 107179
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9415360689163208,
      "learning_rate": 0.0003327470618558283,
      "loss": 3.0508,
      "step": 107180
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.180922508239746,
      "learning_rate": 0.00033274299573362166,
      "loss": 3.1717,
      "step": 107181
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8829203844070435,
      "learning_rate": 0.0003327389296053274,
      "loss": 2.8445,
      "step": 107182
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0905730724334717,
      "learning_rate": 0.0003327348634709464,
      "loss": 2.9845,
      "step": 107183
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0604207515716553,
      "learning_rate": 0.0003327307973304794,
      "loss": 3.1588,
      "step": 107184
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0967564582824707,
      "learning_rate": 0.0003327267311839271,
      "loss": 3.1214,
      "step": 107185
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.5748560428619385,
      "learning_rate": 0.0003327226650312902,
      "loss": 2.9978,
      "step": 107186
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7077147960662842,
      "learning_rate": 0.00033271859887256967,
      "loss": 2.7959,
      "step": 107187
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7457711696624756,
      "learning_rate": 0.0003327145327077661,
      "loss": 2.9894,
      "step": 107188
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.5898679494857788,
      "learning_rate": 0.0003327104665368802,
      "loss": 3.1776,
      "step": 107189
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8282475471496582,
      "learning_rate": 0.00033270640035991297,
      "loss": 3.2913,
      "step": 107190
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.425095558166504,
      "learning_rate": 0.0003327023341768648,
      "loss": 2.9377,
      "step": 107191
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6547483205795288,
      "learning_rate": 0.00033269826798773674,
      "loss": 2.9348,
      "step": 107192
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9965102672576904,
      "learning_rate": 0.00033269420179252954,
      "loss": 3.2219,
      "step": 107193
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.092071771621704,
      "learning_rate": 0.0003326901355912438,
      "loss": 3.0324,
      "step": 107194
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7900077104568481,
      "learning_rate": 0.0003326860693838803,
      "loss": 2.9219,
      "step": 107195
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.881807565689087,
      "learning_rate": 0.0003326820031704399,
      "loss": 3.2611,
      "step": 107196
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7646037340164185,
      "learning_rate": 0.00033267793695092334,
      "loss": 3.2116,
      "step": 107197
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9416677951812744,
      "learning_rate": 0.0003326738707253312,
      "loss": 3.1399,
      "step": 107198
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.5754470825195312,
      "learning_rate": 0.00033266980449366446,
      "loss": 2.558,
      "step": 107199
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8805887699127197,
      "learning_rate": 0.00033266573825592384,
      "loss": 3.0561,
      "step": 107200
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8469945192337036,
      "learning_rate": 0.00033266167201210996,
      "loss": 3.0028,
      "step": 107201
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8752046823501587,
      "learning_rate": 0.0003326576057622237,
      "loss": 2.7072,
      "step": 107202
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8210123777389526,
      "learning_rate": 0.0003326535395062657,
      "loss": 3.1053,
      "step": 107203
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.5804920196533203,
      "learning_rate": 0.0003326494732442369,
      "loss": 2.8362,
      "step": 107204
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8961361646652222,
      "learning_rate": 0.00033264540697613786,
      "loss": 3.4069,
      "step": 107205
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0634922981262207,
      "learning_rate": 0.00033264134070196946,
      "loss": 3.0987,
      "step": 107206
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0864267349243164,
      "learning_rate": 0.0003326372744217324,
      "loss": 3.1635,
      "step": 107207
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.358283042907715,
      "learning_rate": 0.0003326332081354275,
      "loss": 2.8489,
      "step": 107208
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7986540794372559,
      "learning_rate": 0.00033262914184305545,
      "loss": 2.7094,
      "step": 107209
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7080506086349487,
      "learning_rate": 0.00033262507554461697,
      "loss": 3.23,
      "step": 107210
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.504582405090332,
      "learning_rate": 0.0003326210092401129,
      "loss": 3.125,
      "step": 107211
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.783811092376709,
      "learning_rate": 0.00033261694292954406,
      "loss": 3.0465,
      "step": 107212
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.5656509399414062,
      "learning_rate": 0.000332612876612911,
      "loss": 3.3187,
      "step": 107213
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.207510471343994,
      "learning_rate": 0.0003326088102902146,
      "loss": 2.9971,
      "step": 107214
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.2574892044067383,
      "learning_rate": 0.0003326047439614557,
      "loss": 3.0897,
      "step": 107215
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.182965040206909,
      "learning_rate": 0.0003326006776266348,
      "loss": 2.9268,
      "step": 107216
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7161667346954346,
      "learning_rate": 0.00033259661128575297,
      "loss": 3.0428,
      "step": 107217
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.8933186531066895,
      "learning_rate": 0.0003325925449388108,
      "loss": 2.9644,
      "step": 107218
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6212427616119385,
      "learning_rate": 0.00033258847858580904,
      "loss": 2.9607,
      "step": 107219
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7399448156356812,
      "learning_rate": 0.0003325844122267484,
      "loss": 2.9509,
      "step": 107220
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9680511951446533,
      "learning_rate": 0.00033258034586162977,
      "loss": 2.9918,
      "step": 107221
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.423570394515991,
      "learning_rate": 0.0003325762794904538,
      "loss": 2.637,
      "step": 107222
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.8706681728363037,
      "learning_rate": 0.00033257221311322135,
      "loss": 2.9677,
      "step": 107223
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.0646307468414307,
      "learning_rate": 0.00033256814672993305,
      "loss": 3.0249,
      "step": 107224
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9662178754806519,
      "learning_rate": 0.00033256408034058974,
      "loss": 2.8144,
      "step": 107225
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.911893606185913,
      "learning_rate": 0.00033256001394519214,
      "loss": 2.8476,
      "step": 107226
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.030231237411499,
      "learning_rate": 0.000332555947543741,
      "loss": 2.9804,
      "step": 107227
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.610233783721924,
      "learning_rate": 0.00033255188113623716,
      "loss": 2.9882,
      "step": 107228
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.3396084308624268,
      "learning_rate": 0.00033254781472268123,
      "loss": 3.0684,
      "step": 107229
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.194434642791748,
      "learning_rate": 0.0003325437483030741,
      "loss": 3.0131,
      "step": 107230
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.5923237800598145,
      "learning_rate": 0.0003325396818774164,
      "loss": 3.2161,
      "step": 107231
    },
    {
      "epoch": 1.4,
      "grad_norm": 4.7663469314575195,
      "learning_rate": 0.00033253561544570906,
      "loss": 2.9943,
      "step": 107232
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.9775989055633545,
      "learning_rate": 0.0003325315490079527,
      "loss": 2.8482,
      "step": 107233
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.642572283744812,
      "learning_rate": 0.0003325274825641481,
      "loss": 2.9149,
      "step": 107234
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.8165509700775146,
      "learning_rate": 0.000332523416114296,
      "loss": 3.0253,
      "step": 107235
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.3003058433532715,
      "learning_rate": 0.00033251934965839727,
      "loss": 3.0893,
      "step": 107236
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.9139859676361084,
      "learning_rate": 0.00033251528319645253,
      "loss": 2.7942,
      "step": 107237
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.074402332305908,
      "learning_rate": 0.0003325112167284625,
      "loss": 3.269,
      "step": 107238
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9076570272445679,
      "learning_rate": 0.0003325071502544282,
      "loss": 3.1511,
      "step": 107239
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.026888132095337,
      "learning_rate": 0.0003325030837743501,
      "loss": 3.2932,
      "step": 107240
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.938539743423462,
      "learning_rate": 0.00033249901728822905,
      "loss": 3.0392,
      "step": 107241
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9552252292633057,
      "learning_rate": 0.0003324949507960658,
      "loss": 2.8936,
      "step": 107242
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9725161790847778,
      "learning_rate": 0.0003324908842978612,
      "loss": 2.7845,
      "step": 107243
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.207632303237915,
      "learning_rate": 0.00033248681779361587,
      "loss": 3.0542,
      "step": 107244
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8070166110992432,
      "learning_rate": 0.0003324827512833307,
      "loss": 3.1882,
      "step": 107245
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.98270845413208,
      "learning_rate": 0.00033247868476700627,
      "loss": 3.1005,
      "step": 107246
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9236689805984497,
      "learning_rate": 0.0003324746182446435,
      "loss": 2.9674,
      "step": 107247
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.107020854949951,
      "learning_rate": 0.00033247055171624313,
      "loss": 3.0202,
      "step": 107248
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6234835386276245,
      "learning_rate": 0.0003324664851818058,
      "loss": 3.0363,
      "step": 107249
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.180108070373535,
      "learning_rate": 0.0003324624186413323,
      "loss": 3.1528,
      "step": 107250
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1991140842437744,
      "learning_rate": 0.0003324583520948236,
      "loss": 2.8751,
      "step": 107251
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7147072553634644,
      "learning_rate": 0.0003324542855422801,
      "loss": 2.988,
      "step": 107252
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.197859525680542,
      "learning_rate": 0.00033245021898370284,
      "loss": 3.2005,
      "step": 107253
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.523012161254883,
      "learning_rate": 0.00033244615241909245,
      "loss": 3.1567,
      "step": 107254
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.947159767150879,
      "learning_rate": 0.0003324420858484497,
      "loss": 3.1505,
      "step": 107255
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7975246906280518,
      "learning_rate": 0.0003324380192717753,
      "loss": 2.797,
      "step": 107256
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.511491060256958,
      "learning_rate": 0.00033243395268907014,
      "loss": 2.8448,
      "step": 107257
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.046591281890869,
      "learning_rate": 0.00033242988610033484,
      "loss": 2.8776,
      "step": 107258
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0277528762817383,
      "learning_rate": 0.00033242581950557025,
      "loss": 3.0469,
      "step": 107259
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.8723933696746826,
      "learning_rate": 0.00033242175290477705,
      "loss": 2.7104,
      "step": 107260
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7243636846542358,
      "learning_rate": 0.000332417686297956,
      "loss": 3.0399,
      "step": 107261
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.6682145595550537,
      "learning_rate": 0.00033241361968510796,
      "loss": 3.1345,
      "step": 107262
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0338308811187744,
      "learning_rate": 0.0003324095530662336,
      "loss": 2.9014,
      "step": 107263
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7608245611190796,
      "learning_rate": 0.0003324054864413337,
      "loss": 2.8447,
      "step": 107264
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4215047359466553,
      "learning_rate": 0.00033240141981040906,
      "loss": 2.7691,
      "step": 107265
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2535290718078613,
      "learning_rate": 0.00033239735317346027,
      "loss": 3.0469,
      "step": 107266
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6775606870651245,
      "learning_rate": 0.0003323932865304883,
      "loss": 2.9757,
      "step": 107267
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.6106369495391846,
      "learning_rate": 0.00033238921988149376,
      "loss": 2.9203,
      "step": 107268
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4167068004608154,
      "learning_rate": 0.0003323851532264774,
      "loss": 3.0897,
      "step": 107269
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.012087821960449,
      "learning_rate": 0.0003323810865654402,
      "loss": 3.221,
      "step": 107270
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8433128595352173,
      "learning_rate": 0.00033237701989838255,
      "loss": 2.7481,
      "step": 107271
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9788402318954468,
      "learning_rate": 0.00033237295322530543,
      "loss": 2.6909,
      "step": 107272
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.3307900428771973,
      "learning_rate": 0.0003323688865462097,
      "loss": 2.9704,
      "step": 107273
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9227449893951416,
      "learning_rate": 0.0003323648198610958,
      "loss": 2.9959,
      "step": 107274
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.5468493700027466,
      "learning_rate": 0.0003323607531699648,
      "loss": 2.7208,
      "step": 107275
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4374008178710938,
      "learning_rate": 0.00033235668647281735,
      "loss": 3.0583,
      "step": 107276
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1825127601623535,
      "learning_rate": 0.00033235261976965406,
      "loss": 3.0159,
      "step": 107277
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.741221308708191,
      "learning_rate": 0.0003323485530604759,
      "loss": 2.9991,
      "step": 107278
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.5136890411376953,
      "learning_rate": 0.00033234448634528353,
      "loss": 2.8902,
      "step": 107279
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.884432077407837,
      "learning_rate": 0.0003323404196240777,
      "loss": 2.9582,
      "step": 107280
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8507745265960693,
      "learning_rate": 0.0003323363528968591,
      "loss": 2.8636,
      "step": 107281
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2356271743774414,
      "learning_rate": 0.00033233228616362867,
      "loss": 3.0053,
      "step": 107282
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9378767013549805,
      "learning_rate": 0.000332328219424387,
      "loss": 3.1926,
      "step": 107283
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9055718183517456,
      "learning_rate": 0.0003323241526791349,
      "loss": 2.9383,
      "step": 107284
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8390400409698486,
      "learning_rate": 0.0003323200859278732,
      "loss": 3.1151,
      "step": 107285
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.68067467212677,
      "learning_rate": 0.0003323160191706025,
      "loss": 3.246,
      "step": 107286
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.025416135787964,
      "learning_rate": 0.0003323119524073237,
      "loss": 3.0263,
      "step": 107287
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.426034688949585,
      "learning_rate": 0.0003323078856380375,
      "loss": 3.0713,
      "step": 107288
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1203579902648926,
      "learning_rate": 0.0003323038188627446,
      "loss": 3.005,
      "step": 107289
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9875681400299072,
      "learning_rate": 0.00033229975208144577,
      "loss": 2.8965,
      "step": 107290
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.825881838798523,
      "learning_rate": 0.00033229568529414194,
      "loss": 3.1225,
      "step": 107291
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7572031021118164,
      "learning_rate": 0.00033229161850083363,
      "loss": 2.7502,
      "step": 107292
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6008957624435425,
      "learning_rate": 0.0003322875517015217,
      "loss": 3.1307,
      "step": 107293
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.377406120300293,
      "learning_rate": 0.000332283484896207,
      "loss": 2.9108,
      "step": 107294
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4470415115356445,
      "learning_rate": 0.0003322794180848901,
      "loss": 2.9361,
      "step": 107295
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.589733123779297,
      "learning_rate": 0.0003322753512675719,
      "loss": 3.2133,
      "step": 107296
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8018677234649658,
      "learning_rate": 0.0003322712844442531,
      "loss": 3.0334,
      "step": 107297
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.5918493270874023,
      "learning_rate": 0.0003322672176149344,
      "loss": 3.0835,
      "step": 107298
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6328470706939697,
      "learning_rate": 0.0003322631507796166,
      "loss": 2.7581,
      "step": 107299
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1542468070983887,
      "learning_rate": 0.0003322590839383006,
      "loss": 3.0516,
      "step": 107300
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.05149507522583,
      "learning_rate": 0.00033225501709098693,
      "loss": 2.8791,
      "step": 107301
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2363510131835938,
      "learning_rate": 0.00033225095023767644,
      "loss": 3.1158,
      "step": 107302
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.887648344039917,
      "learning_rate": 0.00033224688337837,
      "loss": 2.914,
      "step": 107303
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1459155082702637,
      "learning_rate": 0.0003322428165130681,
      "loss": 3.0087,
      "step": 107304
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7965878248214722,
      "learning_rate": 0.0003322387496417717,
      "loss": 2.7627,
      "step": 107305
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7982374429702759,
      "learning_rate": 0.0003322346827644816,
      "loss": 2.874,
      "step": 107306
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.53115177154541,
      "learning_rate": 0.00033223061588119834,
      "loss": 2.9887,
      "step": 107307
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.722051978111267,
      "learning_rate": 0.00033222654899192286,
      "loss": 3.2534,
      "step": 107308
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6910501718521118,
      "learning_rate": 0.0003322224820966559,
      "loss": 3.073,
      "step": 107309
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1862246990203857,
      "learning_rate": 0.0003322184151953981,
      "loss": 3.1585,
      "step": 107310
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.796546220779419,
      "learning_rate": 0.0003322143482881502,
      "loss": 2.7866,
      "step": 107311
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1578519344329834,
      "learning_rate": 0.0003322102813749132,
      "loss": 3.3106,
      "step": 107312
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2096638679504395,
      "learning_rate": 0.0003322062144556877,
      "loss": 3.0277,
      "step": 107313
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.528865933418274,
      "learning_rate": 0.0003322021475304744,
      "loss": 2.9824,
      "step": 107314
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.75942862033844,
      "learning_rate": 0.0003321980805992741,
      "loss": 3.0869,
      "step": 107315
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9830012321472168,
      "learning_rate": 0.0003321940136620876,
      "loss": 2.8885,
      "step": 107316
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9536972045898438,
      "learning_rate": 0.0003321899467189156,
      "loss": 3.0516,
      "step": 107317
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.6412572860717773,
      "learning_rate": 0.0003321858797697589,
      "loss": 3.1947,
      "step": 107318
    },
    {
      "epoch": 1.4,
      "grad_norm": 5.135239124298096,
      "learning_rate": 0.00033218181281461826,
      "loss": 2.8139,
      "step": 107319
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.353416681289673,
      "learning_rate": 0.0003321777458534944,
      "loss": 2.913,
      "step": 107320
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1733479499816895,
      "learning_rate": 0.0003321736788863881,
      "loss": 2.7028,
      "step": 107321
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9307037591934204,
      "learning_rate": 0.00033216961191330003,
      "loss": 3.296,
      "step": 107322
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0397887229919434,
      "learning_rate": 0.0003321655449342311,
      "loss": 3.3448,
      "step": 107323
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.231311321258545,
      "learning_rate": 0.00033216147794918195,
      "loss": 3.1026,
      "step": 107324
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8926398754119873,
      "learning_rate": 0.0003321574109581534,
      "loss": 2.8995,
      "step": 107325
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1376798152923584,
      "learning_rate": 0.00033215334396114615,
      "loss": 2.7315,
      "step": 107326
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6937564611434937,
      "learning_rate": 0.000332149276958161,
      "loss": 2.812,
      "step": 107327
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0661864280700684,
      "learning_rate": 0.0003321452099491987,
      "loss": 2.974,
      "step": 107328
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.770222544670105,
      "learning_rate": 0.00033214114293426,
      "loss": 3.029,
      "step": 107329
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7118791341781616,
      "learning_rate": 0.00033213707591334563,
      "loss": 2.9302,
      "step": 107330
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7862229347229004,
      "learning_rate": 0.00033213300888645643,
      "loss": 2.7556,
      "step": 107331
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.646716594696045,
      "learning_rate": 0.00033212894185359306,
      "loss": 2.9139,
      "step": 107332
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2239017486572266,
      "learning_rate": 0.0003321248748147563,
      "loss": 3.1373,
      "step": 107333
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.72283136844635,
      "learning_rate": 0.00033212080776994694,
      "loss": 2.9745,
      "step": 107334
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0738205909729004,
      "learning_rate": 0.00033211674071916567,
      "loss": 2.8518,
      "step": 107335
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.101872205734253,
      "learning_rate": 0.0003321126736624133,
      "loss": 2.8201,
      "step": 107336
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8809080123901367,
      "learning_rate": 0.0003321086065996907,
      "loss": 3.0722,
      "step": 107337
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7967162132263184,
      "learning_rate": 0.0003321045395309984,
      "loss": 3.1108,
      "step": 107338
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0469815731048584,
      "learning_rate": 0.00033210047245633723,
      "loss": 2.791,
      "step": 107339
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4012529850006104,
      "learning_rate": 0.0003320964053757081,
      "loss": 3.0099,
      "step": 107340
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8184587955474854,
      "learning_rate": 0.0003320923382891115,
      "loss": 2.9822,
      "step": 107341
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8736695051193237,
      "learning_rate": 0.00033208827119654837,
      "loss": 3.0384,
      "step": 107342
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6563628911972046,
      "learning_rate": 0.0003320842040980195,
      "loss": 2.6157,
      "step": 107343
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7871813774108887,
      "learning_rate": 0.00033208013699352547,
      "loss": 2.8169,
      "step": 107344
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6529124975204468,
      "learning_rate": 0.0003320760698830672,
      "loss": 2.7934,
      "step": 107345
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7086906433105469,
      "learning_rate": 0.00033207200276664535,
      "loss": 3.0426,
      "step": 107346
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2297112941741943,
      "learning_rate": 0.00033206793564426076,
      "loss": 2.9096,
      "step": 107347
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1122355461120605,
      "learning_rate": 0.00033206386851591404,
      "loss": 3.0209,
      "step": 107348
    },
    {
      "epoch": 1.4,
      "grad_norm": 4.611866474151611,
      "learning_rate": 0.00033205980138160615,
      "loss": 3.0043,
      "step": 107349
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.5764145851135254,
      "learning_rate": 0.00033205573424133764,
      "loss": 2.9089,
      "step": 107350
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.933010220527649,
      "learning_rate": 0.00033205166709510944,
      "loss": 2.9021,
      "step": 107351
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4488706588745117,
      "learning_rate": 0.00033204759994292225,
      "loss": 2.9029,
      "step": 107352
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.1258437633514404,
      "learning_rate": 0.00033204353278477677,
      "loss": 3.0813,
      "step": 107353
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.067866563796997,
      "learning_rate": 0.0003320394656206737,
      "loss": 3.0807,
      "step": 107354
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.658286690711975,
      "learning_rate": 0.00033203539845061403,
      "loss": 3.1656,
      "step": 107355
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4988629817962646,
      "learning_rate": 0.00033203133127459824,
      "loss": 2.8367,
      "step": 107356
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.5052993297576904,
      "learning_rate": 0.0003320272640926273,
      "loss": 2.8614,
      "step": 107357
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.03088641166687,
      "learning_rate": 0.00033202319690470194,
      "loss": 2.936,
      "step": 107358
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9679234027862549,
      "learning_rate": 0.0003320191297108228,
      "loss": 3.0846,
      "step": 107359
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.2653865814208984,
      "learning_rate": 0.00033201506251099064,
      "loss": 3.2731,
      "step": 107360
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.765586018562317,
      "learning_rate": 0.0003320109953052064,
      "loss": 3.1797,
      "step": 107361
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6200870275497437,
      "learning_rate": 0.0003320069280934707,
      "loss": 2.8311,
      "step": 107362
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7806518077850342,
      "learning_rate": 0.0003320028608757842,
      "loss": 3.0093,
      "step": 107363
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6983270645141602,
      "learning_rate": 0.0003319987936521479,
      "loss": 3.1654,
      "step": 107364
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.3293213844299316,
      "learning_rate": 0.0003319947264225623,
      "loss": 3.2121,
      "step": 107365
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7769855260849,
      "learning_rate": 0.00033199065918702826,
      "loss": 2.9325,
      "step": 107366
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.5243825912475586,
      "learning_rate": 0.0003319865919455467,
      "loss": 3.1285,
      "step": 107367
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.322458267211914,
      "learning_rate": 0.00033198252469811816,
      "loss": 3.0586,
      "step": 107368
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8655062913894653,
      "learning_rate": 0.0003319784574447434,
      "loss": 3.218,
      "step": 107369
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8888540267944336,
      "learning_rate": 0.0003319743901854233,
      "loss": 2.8059,
      "step": 107370
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7772042751312256,
      "learning_rate": 0.0003319703229201585,
      "loss": 3.1913,
      "step": 107371
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8186708688735962,
      "learning_rate": 0.00033196625564894987,
      "loss": 2.9225,
      "step": 107372
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8095890283584595,
      "learning_rate": 0.00033196218837179813,
      "loss": 2.7668,
      "step": 107373
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.5141565799713135,
      "learning_rate": 0.00033195812108870404,
      "loss": 2.9779,
      "step": 107374
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9740049839019775,
      "learning_rate": 0.0003319540537996682,
      "loss": 3.0094,
      "step": 107375
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.448502779006958,
      "learning_rate": 0.00033194998650469163,
      "loss": 2.9938,
      "step": 107376
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0010366439819336,
      "learning_rate": 0.00033194591920377494,
      "loss": 2.9719,
      "step": 107377
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4331467151641846,
      "learning_rate": 0.0003319418518969188,
      "loss": 3.101,
      "step": 107378
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2587413787841797,
      "learning_rate": 0.00033193778458412415,
      "loss": 2.9967,
      "step": 107379
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2150957584381104,
      "learning_rate": 0.00033193371726539163,
      "loss": 3.1916,
      "step": 107380
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.498840808868408,
      "learning_rate": 0.000331929649940722,
      "loss": 2.8393,
      "step": 107381
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.697746515274048,
      "learning_rate": 0.0003319255826101162,
      "loss": 2.8535,
      "step": 107382
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8983333110809326,
      "learning_rate": 0.00033192151527357467,
      "loss": 2.9736,
      "step": 107383
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2488603591918945,
      "learning_rate": 0.0003319174479310983,
      "loss": 3.0463,
      "step": 107384
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.043954372406006,
      "learning_rate": 0.00033191338058268806,
      "loss": 3.2048,
      "step": 107385
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.8696393966674805,
      "learning_rate": 0.0003319093132283443,
      "loss": 2.7311,
      "step": 107386
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.197617530822754,
      "learning_rate": 0.0003319052458680681,
      "loss": 3.0342,
      "step": 107387
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8360017538070679,
      "learning_rate": 0.00033190117850186016,
      "loss": 2.9318,
      "step": 107388
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.074883222579956,
      "learning_rate": 0.00033189711112972115,
      "loss": 2.9465,
      "step": 107389
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.9863603115081787,
      "learning_rate": 0.00033189304375165187,
      "loss": 3.081,
      "step": 107390
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2171547412872314,
      "learning_rate": 0.0003318889763676531,
      "loss": 2.758,
      "step": 107391
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6667733192443848,
      "learning_rate": 0.0003318849089777255,
      "loss": 2.8743,
      "step": 107392
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.752474546432495,
      "learning_rate": 0.0003318808415818699,
      "loss": 2.8095,
      "step": 107393
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.9082584381103516,
      "learning_rate": 0.0003318767741800871,
      "loss": 3.0358,
      "step": 107394
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.930976390838623,
      "learning_rate": 0.00033187270677237775,
      "loss": 3.0103,
      "step": 107395
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.801781415939331,
      "learning_rate": 0.0003318686393587427,
      "loss": 2.8303,
      "step": 107396
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.7635884284973145,
      "learning_rate": 0.0003318645719391826,
      "loss": 3.0511,
      "step": 107397
    },
    {
      "epoch": 1.4,
      "grad_norm": 4.129044055938721,
      "learning_rate": 0.0003318605045136984,
      "loss": 3.0571,
      "step": 107398
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.788888931274414,
      "learning_rate": 0.00033185643708229066,
      "loss": 2.9188,
      "step": 107399
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.5392918586730957,
      "learning_rate": 0.0003318523696449602,
      "loss": 3.2776,
      "step": 107400
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9611858129501343,
      "learning_rate": 0.0003318483022017078,
      "loss": 3.1696,
      "step": 107401
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.146012306213379,
      "learning_rate": 0.0003318442347525342,
      "loss": 2.8071,
      "step": 107402
    },
    {
      "epoch": 1.4,
      "grad_norm": 5.38847541809082,
      "learning_rate": 0.0003318401672974401,
      "loss": 2.8039,
      "step": 107403
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.561197519302368,
      "learning_rate": 0.00033183609983642636,
      "loss": 3.1172,
      "step": 107404
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.692384123802185,
      "learning_rate": 0.00033183203236949375,
      "loss": 3.1095,
      "step": 107405
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9063926935195923,
      "learning_rate": 0.00033182796489664287,
      "loss": 2.9456,
      "step": 107406
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.129319667816162,
      "learning_rate": 0.0003318238974178746,
      "loss": 2.6138,
      "step": 107407
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.8493425846099854,
      "learning_rate": 0.0003318198299331897,
      "loss": 2.9555,
      "step": 107408
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.058915853500366,
      "learning_rate": 0.00033181576244258883,
      "loss": 3.0197,
      "step": 107409
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.5827064514160156,
      "learning_rate": 0.0003318116949460728,
      "loss": 3.0714,
      "step": 107410
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0881423950195312,
      "learning_rate": 0.00033180762744364244,
      "loss": 2.8884,
      "step": 107411
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.855492353439331,
      "learning_rate": 0.00033180355993529847,
      "loss": 2.9573,
      "step": 107412
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.690725564956665,
      "learning_rate": 0.0003317994924210415,
      "loss": 3.0029,
      "step": 107413
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1459007263183594,
      "learning_rate": 0.0003317954249008725,
      "loss": 3.0128,
      "step": 107414
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.011809825897217,
      "learning_rate": 0.00033179135737479204,
      "loss": 3.0067,
      "step": 107415
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2433698177337646,
      "learning_rate": 0.00033178728984280107,
      "loss": 3.0645,
      "step": 107416
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.069716215133667,
      "learning_rate": 0.0003317832223049002,
      "loss": 2.7574,
      "step": 107417
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.727513074874878,
      "learning_rate": 0.0003317791547610902,
      "loss": 3.0744,
      "step": 107418
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.747576355934143,
      "learning_rate": 0.00033177508721137194,
      "loss": 3.0927,
      "step": 107419
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7267327308654785,
      "learning_rate": 0.000331771019655746,
      "loss": 3.1847,
      "step": 107420
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7429020404815674,
      "learning_rate": 0.00033176695209421325,
      "loss": 3.076,
      "step": 107421
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1666383743286133,
      "learning_rate": 0.0003317628845267745,
      "loss": 3.0168,
      "step": 107422
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8371763229370117,
      "learning_rate": 0.00033175881695343034,
      "loss": 3.1138,
      "step": 107423
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7611571550369263,
      "learning_rate": 0.00033175474937418165,
      "loss": 2.8481,
      "step": 107424
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0585756301879883,
      "learning_rate": 0.00033175068178902916,
      "loss": 2.8247,
      "step": 107425
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7131068706512451,
      "learning_rate": 0.00033174661419797357,
      "loss": 2.8645,
      "step": 107426
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.074761390686035,
      "learning_rate": 0.00033174254660101575,
      "loss": 3.0672,
      "step": 107427
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.016791582107544,
      "learning_rate": 0.0003317384789981564,
      "loss": 2.8908,
      "step": 107428
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.073174238204956,
      "learning_rate": 0.00033173441138939616,
      "loss": 2.9099,
      "step": 107429
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.804003119468689,
      "learning_rate": 0.000331730343774736,
      "loss": 2.7132,
      "step": 107430
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.239753007888794,
      "learning_rate": 0.0003317262761541766,
      "loss": 2.9064,
      "step": 107431
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.4948296546936035,
      "learning_rate": 0.0003317222085277186,
      "loss": 2.9273,
      "step": 107432
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7394402027130127,
      "learning_rate": 0.0003317181408953628,
      "loss": 3.0804,
      "step": 107433
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.36852765083313,
      "learning_rate": 0.00033171407325711017,
      "loss": 2.6581,
      "step": 107434
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8456276655197144,
      "learning_rate": 0.0003317100056129611,
      "loss": 3.0996,
      "step": 107435
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.702495813369751,
      "learning_rate": 0.00033170593796291666,
      "loss": 2.8562,
      "step": 107436
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.267224073410034,
      "learning_rate": 0.00033170187030697753,
      "loss": 2.82,
      "step": 107437
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7736166715621948,
      "learning_rate": 0.0003316978026451443,
      "loss": 2.9918,
      "step": 107438
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.03277850151062,
      "learning_rate": 0.0003316937349774179,
      "loss": 2.8863,
      "step": 107439
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.717769980430603,
      "learning_rate": 0.000331689667303799,
      "loss": 3.0314,
      "step": 107440
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.8567311763763428,
      "learning_rate": 0.00033168559962428845,
      "loss": 3.0974,
      "step": 107441
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9106290340423584,
      "learning_rate": 0.0003316815319388869,
      "loss": 3.1011,
      "step": 107442
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.720375895500183,
      "learning_rate": 0.00033167746424759523,
      "loss": 3.2004,
      "step": 107443
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8953897953033447,
      "learning_rate": 0.0003316733965504141,
      "loss": 3.1753,
      "step": 107444
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8128995895385742,
      "learning_rate": 0.00033166932884734425,
      "loss": 2.9757,
      "step": 107445
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.504629135131836,
      "learning_rate": 0.0003316652611383865,
      "loss": 2.9506,
      "step": 107446
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1970479488372803,
      "learning_rate": 0.00033166119342354153,
      "loss": 2.9856,
      "step": 107447
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2455556392669678,
      "learning_rate": 0.00033165712570281015,
      "loss": 2.9941,
      "step": 107448
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7533496618270874,
      "learning_rate": 0.00033165305797619324,
      "loss": 3.0679,
      "step": 107449
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.542155146598816,
      "learning_rate": 0.00033164899024369133,
      "loss": 3.0968,
      "step": 107450
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6055241823196411,
      "learning_rate": 0.00033164492250530524,
      "loss": 3.0892,
      "step": 107451
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8696043491363525,
      "learning_rate": 0.00033164085476103584,
      "loss": 3.0266,
      "step": 107452
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.775559902191162,
      "learning_rate": 0.0003316367870108837,
      "loss": 3.1215,
      "step": 107453
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.326119899749756,
      "learning_rate": 0.0003316327192548498,
      "loss": 3.0939,
      "step": 107454
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0113744735717773,
      "learning_rate": 0.00033162865149293473,
      "loss": 3.1182,
      "step": 107455
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8988978862762451,
      "learning_rate": 0.00033162458372513933,
      "loss": 2.8571,
      "step": 107456
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.6670215129852295,
      "learning_rate": 0.00033162051595146425,
      "loss": 2.9005,
      "step": 107457
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8846391439437866,
      "learning_rate": 0.00033161644817191046,
      "loss": 2.824,
      "step": 107458
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.876930594444275,
      "learning_rate": 0.0003316123803864784,
      "loss": 2.8574,
      "step": 107459
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.8072407245635986,
      "learning_rate": 0.0003316083125951691,
      "loss": 3.0484,
      "step": 107460
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.06788969039917,
      "learning_rate": 0.0003316042447979832,
      "loss": 3.0651,
      "step": 107461
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.510401725769043,
      "learning_rate": 0.00033160017699492147,
      "loss": 3.269,
      "step": 107462
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1216065883636475,
      "learning_rate": 0.00033159610918598466,
      "loss": 3.1963,
      "step": 107463
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9880393743515015,
      "learning_rate": 0.00033159204137117354,
      "loss": 2.9952,
      "step": 107464
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.291473388671875,
      "learning_rate": 0.0003315879735504889,
      "loss": 2.948,
      "step": 107465
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9255214929580688,
      "learning_rate": 0.00033158390572393144,
      "loss": 3.1489,
      "step": 107466
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.488360643386841,
      "learning_rate": 0.00033157983789150187,
      "loss": 2.9378,
      "step": 107467
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.1405398845672607,
      "learning_rate": 0.00033157577005320113,
      "loss": 3.1639,
      "step": 107468
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.70503568649292,
      "learning_rate": 0.0003315717022090298,
      "loss": 3.0331,
      "step": 107469
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9842333793640137,
      "learning_rate": 0.0003315676343589887,
      "loss": 2.8924,
      "step": 107470
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2634100914001465,
      "learning_rate": 0.00033156356650307855,
      "loss": 2.9965,
      "step": 107471
    },
    {
      "epoch": 1.4,
      "grad_norm": 4.330925464630127,
      "learning_rate": 0.0003315594986413002,
      "loss": 2.7308,
      "step": 107472
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.9645578861236572,
      "learning_rate": 0.00033155543077365425,
      "loss": 3.1014,
      "step": 107473
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4276986122131348,
      "learning_rate": 0.00033155136290014166,
      "loss": 3.0224,
      "step": 107474
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.753241777420044,
      "learning_rate": 0.00033154729502076304,
      "loss": 3.124,
      "step": 107475
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9231740236282349,
      "learning_rate": 0.0003315432271355191,
      "loss": 3.0807,
      "step": 107476
    },
    {
      "epoch": 1.4,
      "grad_norm": 6.039846897125244,
      "learning_rate": 0.0003315391592444108,
      "loss": 3.0621,
      "step": 107477
    },
    {
      "epoch": 1.4,
      "grad_norm": 4.33689546585083,
      "learning_rate": 0.00033153509134743866,
      "loss": 2.9994,
      "step": 107478
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.397334098815918,
      "learning_rate": 0.0003315310234446036,
      "loss": 3.0308,
      "step": 107479
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9361658096313477,
      "learning_rate": 0.0003315269555359064,
      "loss": 2.9143,
      "step": 107480
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1948938369750977,
      "learning_rate": 0.0003315228876213477,
      "loss": 3.03,
      "step": 107481
    },
    {
      "epoch": 1.4,
      "grad_norm": 4.324747562408447,
      "learning_rate": 0.0003315188197009282,
      "loss": 3.1013,
      "step": 107482
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.3188045024871826,
      "learning_rate": 0.0003315147517746489,
      "loss": 2.8404,
      "step": 107483
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8613955974578857,
      "learning_rate": 0.0003315106838425103,
      "loss": 2.9648,
      "step": 107484
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8558086156845093,
      "learning_rate": 0.00033150661590451334,
      "loss": 3.0337,
      "step": 107485
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0370280742645264,
      "learning_rate": 0.0003315025479606587,
      "loss": 2.9564,
      "step": 107486
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.338609218597412,
      "learning_rate": 0.0003314984800109471,
      "loss": 2.7295,
      "step": 107487
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.6567089557647705,
      "learning_rate": 0.00033149441205537934,
      "loss": 3.2374,
      "step": 107488
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9785610437393188,
      "learning_rate": 0.0003314903440939563,
      "loss": 3.0756,
      "step": 107489
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.568495750427246,
      "learning_rate": 0.00033148627612667847,
      "loss": 2.9972,
      "step": 107490
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.5878593921661377,
      "learning_rate": 0.0003314822081535468,
      "loss": 3.16,
      "step": 107491
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.126664161682129,
      "learning_rate": 0.0003314781401745619,
      "loss": 2.8747,
      "step": 107492
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.075559139251709,
      "learning_rate": 0.0003314740721897248,
      "loss": 2.8969,
      "step": 107493
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7847434282302856,
      "learning_rate": 0.00033147000419903597,
      "loss": 2.7558,
      "step": 107494
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.49910831451416,
      "learning_rate": 0.0003314659362024962,
      "loss": 3.0666,
      "step": 107495
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1633107662200928,
      "learning_rate": 0.0003314618682001064,
      "loss": 2.8938,
      "step": 107496
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4307174682617188,
      "learning_rate": 0.00033145780019186726,
      "loss": 3.0556,
      "step": 107497
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9350392818450928,
      "learning_rate": 0.00033145373217777945,
      "loss": 2.8921,
      "step": 107498
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.763052225112915,
      "learning_rate": 0.0003314496641578439,
      "loss": 3.0661,
      "step": 107499
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.486524820327759,
      "learning_rate": 0.00033144559613206123,
      "loss": 3.0505,
      "step": 107500
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.915379524230957,
      "learning_rate": 0.00033144152810043217,
      "loss": 3.2192,
      "step": 107501
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8591140508651733,
      "learning_rate": 0.00033143746006295753,
      "loss": 3.1288,
      "step": 107502
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6665762662887573,
      "learning_rate": 0.00033143339201963817,
      "loss": 3.0522,
      "step": 107503
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.965899705886841,
      "learning_rate": 0.00033142932397047474,
      "loss": 3.0362,
      "step": 107504
    },
    {
      "epoch": 1.4,
      "grad_norm": 4.729612350463867,
      "learning_rate": 0.0003314252559154679,
      "loss": 2.8287,
      "step": 107505
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.3320095539093018,
      "learning_rate": 0.00033142118785461856,
      "loss": 2.9464,
      "step": 107506
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7604386806488037,
      "learning_rate": 0.00033141711978792754,
      "loss": 2.9667,
      "step": 107507
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.5752949714660645,
      "learning_rate": 0.0003314130517153954,
      "loss": 2.9123,
      "step": 107508
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.404747486114502,
      "learning_rate": 0.00033140898363702285,
      "loss": 2.9561,
      "step": 107509
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2631399631500244,
      "learning_rate": 0.00033140491555281096,
      "loss": 2.8048,
      "step": 107510
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.818595051765442,
      "learning_rate": 0.00033140084746276026,
      "loss": 2.9661,
      "step": 107511
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.521275758743286,
      "learning_rate": 0.0003313967793668715,
      "loss": 2.8934,
      "step": 107512
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.738291025161743,
      "learning_rate": 0.0003313927112651456,
      "loss": 2.8291,
      "step": 107513
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.360841751098633,
      "learning_rate": 0.0003313886431575831,
      "loss": 3.1497,
      "step": 107514
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.037444591522217,
      "learning_rate": 0.0003313845750441848,
      "loss": 2.9304,
      "step": 107515
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.604553699493408,
      "learning_rate": 0.00033138050692495166,
      "loss": 3.0721,
      "step": 107516
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.609151601791382,
      "learning_rate": 0.00033137643879988416,
      "loss": 2.8922,
      "step": 107517
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.6367273330688477,
      "learning_rate": 0.0003313723706689833,
      "loss": 2.9419,
      "step": 107518
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6581939458847046,
      "learning_rate": 0.0003313683025322497,
      "loss": 3.1267,
      "step": 107519
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.784085273742676,
      "learning_rate": 0.00033136423438968405,
      "loss": 2.7252,
      "step": 107520
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4056947231292725,
      "learning_rate": 0.00033136016624128723,
      "loss": 2.9858,
      "step": 107521
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2372968196868896,
      "learning_rate": 0.0003313560980870601,
      "loss": 2.8074,
      "step": 107522
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.562634825706482,
      "learning_rate": 0.00033135202992700313,
      "loss": 2.9419,
      "step": 107523
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4019293785095215,
      "learning_rate": 0.0003313479617611173,
      "loss": 2.9576,
      "step": 107524
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.9444739818573,
      "learning_rate": 0.00033134389358940327,
      "loss": 3.0151,
      "step": 107525
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9603819847106934,
      "learning_rate": 0.00033133982541186185,
      "loss": 3.1077,
      "step": 107526
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7042347192764282,
      "learning_rate": 0.00033133575722849365,
      "loss": 2.9007,
      "step": 107527
    },
    {
      "epoch": 1.4,
      "grad_norm": 4.659424781799316,
      "learning_rate": 0.00033133168903929966,
      "loss": 2.8943,
      "step": 107528
    },
    {
      "epoch": 1.4,
      "grad_norm": 4.305883407592773,
      "learning_rate": 0.00033132762084428047,
      "loss": 2.9544,
      "step": 107529
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.5708389282226562,
      "learning_rate": 0.0003313235526434369,
      "loss": 3.149,
      "step": 107530
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.5343564748764038,
      "learning_rate": 0.0003313194844367696,
      "loss": 2.9909,
      "step": 107531
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.23168683052063,
      "learning_rate": 0.0003313154162242796,
      "loss": 3.1738,
      "step": 107532
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.938987970352173,
      "learning_rate": 0.0003313113480059673,
      "loss": 2.981,
      "step": 107533
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0796518325805664,
      "learning_rate": 0.00033130727978183375,
      "loss": 3.1807,
      "step": 107534
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7669093608856201,
      "learning_rate": 0.00033130321155187955,
      "loss": 3.0539,
      "step": 107535
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.5937033891677856,
      "learning_rate": 0.0003312991433161054,
      "loss": 2.9704,
      "step": 107536
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.520087242126465,
      "learning_rate": 0.0003312950750745122,
      "loss": 2.9322,
      "step": 107537
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.073982000350952,
      "learning_rate": 0.00033129100682710073,
      "loss": 2.7098,
      "step": 107538
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.91225266456604,
      "learning_rate": 0.00033128693857387164,
      "loss": 2.8915,
      "step": 107539
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.418839454650879,
      "learning_rate": 0.00033128287031482565,
      "loss": 3.1135,
      "step": 107540
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8007193803787231,
      "learning_rate": 0.00033127880204996367,
      "loss": 2.9452,
      "step": 107541
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1564407348632812,
      "learning_rate": 0.00033127473377928636,
      "loss": 3.1071,
      "step": 107542
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8537099361419678,
      "learning_rate": 0.00033127066550279447,
      "loss": 2.952,
      "step": 107543
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0702483654022217,
      "learning_rate": 0.00033126659722048876,
      "loss": 2.9792,
      "step": 107544
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.1230828762054443,
      "learning_rate": 0.00033126252893237,
      "loss": 2.9172,
      "step": 107545
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.5830050706863403,
      "learning_rate": 0.0003312584606384389,
      "loss": 2.9398,
      "step": 107546
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6649779081344604,
      "learning_rate": 0.0003312543923386964,
      "loss": 3.255,
      "step": 107547
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.786752462387085,
      "learning_rate": 0.000331250324033143,
      "loss": 3.0713,
      "step": 107548
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.7885146141052246,
      "learning_rate": 0.0003312462557217796,
      "loss": 3.0116,
      "step": 107549
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.8502254486083984,
      "learning_rate": 0.00033124218740460695,
      "loss": 2.8437,
      "step": 107550
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2677161693573,
      "learning_rate": 0.0003312381190816257,
      "loss": 3.0168,
      "step": 107551
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.3943941593170166,
      "learning_rate": 0.0003312340507528368,
      "loss": 2.9866,
      "step": 107552
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0539932250976562,
      "learning_rate": 0.00033122998241824094,
      "loss": 3.0975,
      "step": 107553
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.106707811355591,
      "learning_rate": 0.0003312259140778387,
      "loss": 2.8688,
      "step": 107554
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8624165058135986,
      "learning_rate": 0.0003312218457316311,
      "loss": 2.8797,
      "step": 107555
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7059073448181152,
      "learning_rate": 0.0003312177773796187,
      "loss": 2.9011,
      "step": 107556
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4676597118377686,
      "learning_rate": 0.0003312137090218023,
      "loss": 3.1491,
      "step": 107557
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.815535306930542,
      "learning_rate": 0.0003312096406581827,
      "loss": 2.9092,
      "step": 107558
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.824203372001648,
      "learning_rate": 0.0003312055722887607,
      "loss": 2.998,
      "step": 107559
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9687201976776123,
      "learning_rate": 0.0003312015039135369,
      "loss": 2.9108,
      "step": 107560
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8169547319412231,
      "learning_rate": 0.00033119743553251216,
      "loss": 2.7557,
      "step": 107561
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.070253610610962,
      "learning_rate": 0.00033119336714568736,
      "loss": 3.0486,
      "step": 107562
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.018059730529785,
      "learning_rate": 0.000331189298753063,
      "loss": 3.2195,
      "step": 107563
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4152820110321045,
      "learning_rate": 0.00033118523035464,
      "loss": 2.9094,
      "step": 107564
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7588202953338623,
      "learning_rate": 0.00033118116195041906,
      "loss": 3.0154,
      "step": 107565
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.249009132385254,
      "learning_rate": 0.000331177093540401,
      "loss": 2.8645,
      "step": 107566
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4726576805114746,
      "learning_rate": 0.0003311730251245864,
      "loss": 2.9284,
      "step": 107567
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7568414211273193,
      "learning_rate": 0.00033116895670297635,
      "loss": 2.9659,
      "step": 107568
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8520647287368774,
      "learning_rate": 0.00033116488827557126,
      "loss": 2.9304,
      "step": 107569
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7352321147918701,
      "learning_rate": 0.00033116081984237204,
      "loss": 3.1019,
      "step": 107570
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.781183958053589,
      "learning_rate": 0.0003311567514033795,
      "loss": 2.8692,
      "step": 107571
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.078303337097168,
      "learning_rate": 0.0003311526829585943,
      "loss": 2.9065,
      "step": 107572
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.1657934188842773,
      "learning_rate": 0.00033114861450801714,
      "loss": 2.9018,
      "step": 107573
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.5465524196624756,
      "learning_rate": 0.000331144546051649,
      "loss": 3.1978,
      "step": 107574
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.5087876319885254,
      "learning_rate": 0.0003311404775894904,
      "loss": 3.16,
      "step": 107575
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.29300856590271,
      "learning_rate": 0.00033113640912154225,
      "loss": 2.9464,
      "step": 107576
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.030775547027588,
      "learning_rate": 0.00033113234064780525,
      "loss": 2.8205,
      "step": 107577
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.776072382926941,
      "learning_rate": 0.0003311282721682802,
      "loss": 2.8794,
      "step": 107578
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.8332953453063965,
      "learning_rate": 0.0003311242036829677,
      "loss": 3.1087,
      "step": 107579
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7848330736160278,
      "learning_rate": 0.00033112013519186873,
      "loss": 2.8959,
      "step": 107580
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4929816722869873,
      "learning_rate": 0.0003311160666949839,
      "loss": 2.9624,
      "step": 107581
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9701589345932007,
      "learning_rate": 0.000331111998192314,
      "loss": 2.8491,
      "step": 107582
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6373370885849,
      "learning_rate": 0.00033110792968385976,
      "loss": 3.0301,
      "step": 107583
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7702929973602295,
      "learning_rate": 0.00033110386116962196,
      "loss": 3.0964,
      "step": 107584
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8357367515563965,
      "learning_rate": 0.0003310997926496015,
      "loss": 3.0294,
      "step": 107585
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6674455404281616,
      "learning_rate": 0.0003310957241237989,
      "loss": 2.8828,
      "step": 107586
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9323270320892334,
      "learning_rate": 0.000331091655592215,
      "loss": 2.8706,
      "step": 107587
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.740431547164917,
      "learning_rate": 0.00033108758705485057,
      "loss": 2.9056,
      "step": 107588
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.595740795135498,
      "learning_rate": 0.00033108351851170647,
      "loss": 3.0346,
      "step": 107589
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7123985290527344,
      "learning_rate": 0.0003310794499627833,
      "loss": 3.4195,
      "step": 107590
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8167874813079834,
      "learning_rate": 0.00033107538140808183,
      "loss": 2.9497,
      "step": 107591
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.397261142730713,
      "learning_rate": 0.00033107131284760297,
      "loss": 2.9602,
      "step": 107592
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.5790019035339355,
      "learning_rate": 0.00033106724428134726,
      "loss": 2.903,
      "step": 107593
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.781421184539795,
      "learning_rate": 0.00033106317570931557,
      "loss": 3.0991,
      "step": 107594
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2738595008850098,
      "learning_rate": 0.00033105910713150866,
      "loss": 2.9958,
      "step": 107595
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0443193912506104,
      "learning_rate": 0.00033105503854792735,
      "loss": 3.0672,
      "step": 107596
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.775247573852539,
      "learning_rate": 0.0003310509699585722,
      "loss": 3.0035,
      "step": 107597
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6821162700653076,
      "learning_rate": 0.00033104690136344416,
      "loss": 3.1494,
      "step": 107598
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.013335943222046,
      "learning_rate": 0.00033104283276254396,
      "loss": 2.9165,
      "step": 107599
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.788517713546753,
      "learning_rate": 0.00033103876415587223,
      "loss": 3.0853,
      "step": 107600
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.938953161239624,
      "learning_rate": 0.00033103469554342983,
      "loss": 3.2407,
      "step": 107601
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.104184627532959,
      "learning_rate": 0.00033103062692521753,
      "loss": 3.2279,
      "step": 107602
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9368412494659424,
      "learning_rate": 0.000331026558301236,
      "loss": 3.0166,
      "step": 107603
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.769101619720459,
      "learning_rate": 0.0003310224896714861,
      "loss": 3.2638,
      "step": 107604
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8047398328781128,
      "learning_rate": 0.0003310184210359685,
      "loss": 2.9118,
      "step": 107605
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8193280696868896,
      "learning_rate": 0.000331014352394684,
      "loss": 3.2489,
      "step": 107606
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.728528618812561,
      "learning_rate": 0.00033101028374763326,
      "loss": 3.1024,
      "step": 107607
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7069720029830933,
      "learning_rate": 0.0003310062150948173,
      "loss": 3.1147,
      "step": 107608
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9486438035964966,
      "learning_rate": 0.00033100214643623656,
      "loss": 2.6707,
      "step": 107609
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9919897317886353,
      "learning_rate": 0.000330998077771892,
      "loss": 2.7495,
      "step": 107610
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2686150074005127,
      "learning_rate": 0.0003309940091017843,
      "loss": 2.7434,
      "step": 107611
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7769575119018555,
      "learning_rate": 0.00033098994042591424,
      "loss": 3.0531,
      "step": 107612
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4460151195526123,
      "learning_rate": 0.0003309858717442825,
      "loss": 2.8615,
      "step": 107613
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1699914932250977,
      "learning_rate": 0.00033098180305689,
      "loss": 3.2632,
      "step": 107614
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8779869079589844,
      "learning_rate": 0.0003309777343637373,
      "loss": 3.2091,
      "step": 107615
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.321950912475586,
      "learning_rate": 0.0003309736656648253,
      "loss": 3.1922,
      "step": 107616
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.199831962585449,
      "learning_rate": 0.0003309695969601547,
      "loss": 3.0507,
      "step": 107617
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.777530550956726,
      "learning_rate": 0.0003309655282497263,
      "loss": 2.9219,
      "step": 107618
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7504643201828003,
      "learning_rate": 0.0003309614595335408,
      "loss": 2.89,
      "step": 107619
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8909389972686768,
      "learning_rate": 0.000330957390811599,
      "loss": 2.8482,
      "step": 107620
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.7169597148895264,
      "learning_rate": 0.0003309533220839016,
      "loss": 2.8325,
      "step": 107621
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.3207778930664062,
      "learning_rate": 0.0003309492533504494,
      "loss": 3.0081,
      "step": 107622
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9239696264266968,
      "learning_rate": 0.0003309451846112432,
      "loss": 3.0061,
      "step": 107623
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.5059895515441895,
      "learning_rate": 0.00033094111586628365,
      "loss": 2.9193,
      "step": 107624
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0412068367004395,
      "learning_rate": 0.00033093704711557154,
      "loss": 2.9196,
      "step": 107625
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.896060585975647,
      "learning_rate": 0.0003309329783591078,
      "loss": 3.2088,
      "step": 107626
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.864590644836426,
      "learning_rate": 0.00033092890959689284,
      "loss": 2.7566,
      "step": 107627
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.798566222190857,
      "learning_rate": 0.00033092484082892766,
      "loss": 2.8275,
      "step": 107628
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.5851404666900635,
      "learning_rate": 0.00033092077205521306,
      "loss": 2.9979,
      "step": 107629
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.551664113998413,
      "learning_rate": 0.00033091670327574965,
      "loss": 3.166,
      "step": 107630
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.519268035888672,
      "learning_rate": 0.0003309126344905382,
      "loss": 3.0615,
      "step": 107631
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.050945281982422,
      "learning_rate": 0.0003309085656995796,
      "loss": 3.033,
      "step": 107632
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.245107889175415,
      "learning_rate": 0.00033090449690287445,
      "loss": 3.0829,
      "step": 107633
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.7215027809143066,
      "learning_rate": 0.00033090042810042357,
      "loss": 2.9963,
      "step": 107634
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.174180507659912,
      "learning_rate": 0.00033089635929222776,
      "loss": 2.8157,
      "step": 107635
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0155889987945557,
      "learning_rate": 0.0003308922904782877,
      "loss": 2.9809,
      "step": 107636
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7258273363113403,
      "learning_rate": 0.00033088822165860417,
      "loss": 3.1665,
      "step": 107637
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9661579132080078,
      "learning_rate": 0.00033088415283317796,
      "loss": 3.1073,
      "step": 107638
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6912527084350586,
      "learning_rate": 0.0003308800840020098,
      "loss": 2.9937,
      "step": 107639
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.00946307182312,
      "learning_rate": 0.00033087601516510037,
      "loss": 3.078,
      "step": 107640
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.059673547744751,
      "learning_rate": 0.00033087194632245063,
      "loss": 2.878,
      "step": 107641
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9762853384017944,
      "learning_rate": 0.0003308678774740612,
      "loss": 2.9473,
      "step": 107642
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.413816213607788,
      "learning_rate": 0.0003308638086199327,
      "loss": 3.1964,
      "step": 107643
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.3612451553344727,
      "learning_rate": 0.0003308597397600662,
      "loss": 2.8418,
      "step": 107644
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9555672407150269,
      "learning_rate": 0.00033085567089446223,
      "loss": 2.9038,
      "step": 107645
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9314101934432983,
      "learning_rate": 0.0003308516020231216,
      "loss": 2.9523,
      "step": 107646
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6384310722351074,
      "learning_rate": 0.0003308475331460451,
      "loss": 3.117,
      "step": 107647
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0962069034576416,
      "learning_rate": 0.0003308434642632334,
      "loss": 3.1132,
      "step": 107648
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.804978370666504,
      "learning_rate": 0.0003308393953746874,
      "loss": 3.0531,
      "step": 107649
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.6696994304656982,
      "learning_rate": 0.0003308353264804078,
      "loss": 3.0601,
      "step": 107650
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.408001661300659,
      "learning_rate": 0.00033083125758039524,
      "loss": 2.8853,
      "step": 107651
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0655672550201416,
      "learning_rate": 0.00033082718867465056,
      "loss": 2.9494,
      "step": 107652
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.456331729888916,
      "learning_rate": 0.00033082311976317464,
      "loss": 2.9697,
      "step": 107653
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.057889699935913,
      "learning_rate": 0.000330819050845968,
      "loss": 2.8776,
      "step": 107654
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8135309219360352,
      "learning_rate": 0.00033081498192303157,
      "loss": 3.1987,
      "step": 107655
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7036268711090088,
      "learning_rate": 0.0003308109129943661,
      "loss": 2.9357,
      "step": 107656
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8228243589401245,
      "learning_rate": 0.0003308068440599722,
      "loss": 3.1939,
      "step": 107657
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6659437417984009,
      "learning_rate": 0.0003308027751198508,
      "loss": 3.1449,
      "step": 107658
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.192051649093628,
      "learning_rate": 0.00033079870617400256,
      "loss": 3.0573,
      "step": 107659
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8742581605911255,
      "learning_rate": 0.00033079463722242823,
      "loss": 3.1237,
      "step": 107660
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.221301555633545,
      "learning_rate": 0.0003307905682651286,
      "loss": 3.0862,
      "step": 107661
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8993011713027954,
      "learning_rate": 0.0003307864993021044,
      "loss": 3.0769,
      "step": 107662
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.063286781311035,
      "learning_rate": 0.0003307824303333565,
      "loss": 3.095,
      "step": 107663
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.674483060836792,
      "learning_rate": 0.0003307783613588855,
      "loss": 2.8979,
      "step": 107664
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8873460292816162,
      "learning_rate": 0.0003307742923786922,
      "loss": 2.8073,
      "step": 107665
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2074010372161865,
      "learning_rate": 0.00033077022339277745,
      "loss": 2.9078,
      "step": 107666
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.5182247161865234,
      "learning_rate": 0.0003307661544011419,
      "loss": 2.6878,
      "step": 107667
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.470282554626465,
      "learning_rate": 0.0003307620854037863,
      "loss": 3.0318,
      "step": 107668
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1835155487060547,
      "learning_rate": 0.00033075801640071147,
      "loss": 3.074,
      "step": 107669
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.12202525138855,
      "learning_rate": 0.00033075394739191826,
      "loss": 3.1587,
      "step": 107670
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.222520589828491,
      "learning_rate": 0.0003307498783774072,
      "loss": 3.0087,
      "step": 107671
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9652680158615112,
      "learning_rate": 0.0003307458093571791,
      "loss": 3.1606,
      "step": 107672
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.393496513366699,
      "learning_rate": 0.0003307417403312349,
      "loss": 2.8483,
      "step": 107673
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0023794174194336,
      "learning_rate": 0.0003307376712995752,
      "loss": 3.0753,
      "step": 107674
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.8600430488586426,
      "learning_rate": 0.0003307336022622007,
      "loss": 2.921,
      "step": 107675
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2491188049316406,
      "learning_rate": 0.00033072953321911224,
      "loss": 2.8057,
      "step": 107676
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.5768492221832275,
      "learning_rate": 0.0003307254641703107,
      "loss": 3.3131,
      "step": 107677
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.883352518081665,
      "learning_rate": 0.00033072139511579666,
      "loss": 3.0277,
      "step": 107678
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9636890888214111,
      "learning_rate": 0.0003307173260555709,
      "loss": 3.1909,
      "step": 107679
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.023477077484131,
      "learning_rate": 0.0003307132569896342,
      "loss": 2.8239,
      "step": 107680
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.866763710975647,
      "learning_rate": 0.0003307091879179874,
      "loss": 3.1762,
      "step": 107681
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.504303455352783,
      "learning_rate": 0.00033070511884063117,
      "loss": 3.0731,
      "step": 107682
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7122998237609863,
      "learning_rate": 0.00033070104975756626,
      "loss": 3.1074,
      "step": 107683
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9220471382141113,
      "learning_rate": 0.00033069698066879346,
      "loss": 2.9181,
      "step": 107684
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.3159291744232178,
      "learning_rate": 0.0003306929115743135,
      "loss": 2.9001,
      "step": 107685
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2272274494171143,
      "learning_rate": 0.0003306888424741271,
      "loss": 3.1103,
      "step": 107686
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7847646474838257,
      "learning_rate": 0.00033068477336823515,
      "loss": 2.8887,
      "step": 107687
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.3087234497070312,
      "learning_rate": 0.0003306807042566383,
      "loss": 2.7309,
      "step": 107688
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.6061174869537354,
      "learning_rate": 0.00033067663513933725,
      "loss": 2.8151,
      "step": 107689
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8064738512039185,
      "learning_rate": 0.00033067256601633293,
      "loss": 2.878,
      "step": 107690
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.504295825958252,
      "learning_rate": 0.00033066849688762593,
      "loss": 2.8793,
      "step": 107691
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2433414459228516,
      "learning_rate": 0.0003306644277532171,
      "loss": 3.0418,
      "step": 107692
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.019620180130005,
      "learning_rate": 0.0003306603586131072,
      "loss": 3.2271,
      "step": 107693
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9973316192626953,
      "learning_rate": 0.00033065628946729695,
      "loss": 3.0315,
      "step": 107694
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.0140676498413086,
      "learning_rate": 0.00033065222031578706,
      "loss": 3.1245,
      "step": 107695
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.3527631759643555,
      "learning_rate": 0.0003306481511585785,
      "loss": 2.9086,
      "step": 107696
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.5937446355819702,
      "learning_rate": 0.00033064408199567175,
      "loss": 3.1307,
      "step": 107697
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7430635690689087,
      "learning_rate": 0.0003306400128270677,
      "loss": 3.045,
      "step": 107698
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.091498613357544,
      "learning_rate": 0.0003306359436527671,
      "loss": 3.2469,
      "step": 107699
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1637320518493652,
      "learning_rate": 0.0003306318744727707,
      "loss": 3.1165,
      "step": 107700
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6293878555297852,
      "learning_rate": 0.0003306278052870792,
      "loss": 3.131,
      "step": 107701
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.5563032627105713,
      "learning_rate": 0.00033062373609569345,
      "loss": 2.6962,
      "step": 107702
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7668945789337158,
      "learning_rate": 0.0003306196668986143,
      "loss": 3.1111,
      "step": 107703
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.002141237258911,
      "learning_rate": 0.0003306155976958422,
      "loss": 3.226,
      "step": 107704
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.8596982955932617,
      "learning_rate": 0.0003306115284873781,
      "loss": 2.9297,
      "step": 107705
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9918895959854126,
      "learning_rate": 0.0003306074592732228,
      "loss": 3.038,
      "step": 107706
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6934868097305298,
      "learning_rate": 0.000330603390053377,
      "loss": 2.9446,
      "step": 107707
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2515647411346436,
      "learning_rate": 0.0003305993208278414,
      "loss": 2.83,
      "step": 107708
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.116894006729126,
      "learning_rate": 0.00033059525159661685,
      "loss": 3.0651,
      "step": 107709
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.771016001701355,
      "learning_rate": 0.00033059118235970403,
      "loss": 3.0932,
      "step": 107710
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.893357276916504,
      "learning_rate": 0.0003305871131171038,
      "loss": 2.8644,
      "step": 107711
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6750227212905884,
      "learning_rate": 0.0003305830438688168,
      "loss": 2.9451,
      "step": 107712
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.5832223892211914,
      "learning_rate": 0.0003305789746148438,
      "loss": 3.0935,
      "step": 107713
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.527802586555481,
      "learning_rate": 0.00033057490535518566,
      "loss": 3.1327,
      "step": 107714
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8636436462402344,
      "learning_rate": 0.000330570836089843,
      "loss": 3.0431,
      "step": 107715
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.3143680095672607,
      "learning_rate": 0.0003305667668188167,
      "loss": 2.8938,
      "step": 107716
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2660021781921387,
      "learning_rate": 0.0003305626975421075,
      "loss": 3.0293,
      "step": 107717
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4952666759490967,
      "learning_rate": 0.00033055862825971606,
      "loss": 2.8801,
      "step": 107718
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.915369987487793,
      "learning_rate": 0.00033055455897164316,
      "loss": 3.0626,
      "step": 107719
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4507675170898438,
      "learning_rate": 0.00033055048967788964,
      "loss": 2.7526,
      "step": 107720
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.3168978691101074,
      "learning_rate": 0.00033054642037845614,
      "loss": 3.0932,
      "step": 107721
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8224669694900513,
      "learning_rate": 0.00033054235107334354,
      "loss": 2.9561,
      "step": 107722
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.935760736465454,
      "learning_rate": 0.0003305382817625526,
      "loss": 2.8633,
      "step": 107723
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.711141586303711,
      "learning_rate": 0.00033053421244608395,
      "loss": 3.1594,
      "step": 107724
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7343758344650269,
      "learning_rate": 0.00033053014312393837,
      "loss": 2.6899,
      "step": 107725
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.779110312461853,
      "learning_rate": 0.00033052607379611677,
      "loss": 3.0293,
      "step": 107726
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.3011462688446045,
      "learning_rate": 0.0003305220044626197,
      "loss": 2.869,
      "step": 107727
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0559258460998535,
      "learning_rate": 0.0003305179351234481,
      "loss": 3.1327,
      "step": 107728
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9012130498886108,
      "learning_rate": 0.0003305138657786025,
      "loss": 2.896,
      "step": 107729
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2127997875213623,
      "learning_rate": 0.00033050979642808395,
      "loss": 2.9404,
      "step": 107730
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.605128526687622,
      "learning_rate": 0.0003305057270718929,
      "loss": 3.0351,
      "step": 107731
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8985198736190796,
      "learning_rate": 0.0003305016577100304,
      "loss": 3.1038,
      "step": 107732
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.720201015472412,
      "learning_rate": 0.000330497588342497,
      "loss": 3.2804,
      "step": 107733
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6677263975143433,
      "learning_rate": 0.00033049351896929355,
      "loss": 3.1489,
      "step": 107734
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6164692640304565,
      "learning_rate": 0.0003304894495904208,
      "loss": 3.1223,
      "step": 107735
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6447349786758423,
      "learning_rate": 0.0003304853802058794,
      "loss": 2.9585,
      "step": 107736
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7465711832046509,
      "learning_rate": 0.0003304813108156702,
      "loss": 3.1041,
      "step": 107737
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.068800210952759,
      "learning_rate": 0.000330477241419794,
      "loss": 3.0173,
      "step": 107738
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7906779050827026,
      "learning_rate": 0.00033047317201825155,
      "loss": 2.7844,
      "step": 107739
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.562869668006897,
      "learning_rate": 0.0003304691026110435,
      "loss": 2.8537,
      "step": 107740
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0511651039123535,
      "learning_rate": 0.00033046503319817067,
      "loss": 3.1824,
      "step": 107741
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.5262742042541504,
      "learning_rate": 0.0003304609637796338,
      "loss": 2.9327,
      "step": 107742
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7479804754257202,
      "learning_rate": 0.0003304568943554337,
      "loss": 2.9085,
      "step": 107743
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8166029453277588,
      "learning_rate": 0.00033045282492557106,
      "loss": 3.0418,
      "step": 107744
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.979623794555664,
      "learning_rate": 0.0003304487554900467,
      "loss": 3.1284,
      "step": 107745
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0477466583251953,
      "learning_rate": 0.00033044468604886127,
      "loss": 3.1536,
      "step": 107746
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.8585402965545654,
      "learning_rate": 0.00033044061660201564,
      "loss": 2.9831,
      "step": 107747
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.853865623474121,
      "learning_rate": 0.00033043654714951063,
      "loss": 2.8772,
      "step": 107748
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8505617380142212,
      "learning_rate": 0.0003304324776913468,
      "loss": 3.0054,
      "step": 107749
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.006287097930908,
      "learning_rate": 0.00033042840822752493,
      "loss": 3.3253,
      "step": 107750
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.8381457328796387,
      "learning_rate": 0.0003304243387580459,
      "loss": 2.9541,
      "step": 107751
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7495194673538208,
      "learning_rate": 0.0003304202692829105,
      "loss": 3.0142,
      "step": 107752
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.574230432510376,
      "learning_rate": 0.00033041619980211923,
      "loss": 2.8615,
      "step": 107753
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.007659912109375,
      "learning_rate": 0.00033041213031567316,
      "loss": 3.0667,
      "step": 107754
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.332897424697876,
      "learning_rate": 0.0003304080608235729,
      "loss": 2.9452,
      "step": 107755
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7526782751083374,
      "learning_rate": 0.00033040399132581907,
      "loss": 3.3433,
      "step": 107756
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6704392433166504,
      "learning_rate": 0.0003303999218224126,
      "loss": 2.9843,
      "step": 107757
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.676650285720825,
      "learning_rate": 0.00033039585231335426,
      "loss": 3.0699,
      "step": 107758
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.861167311668396,
      "learning_rate": 0.00033039178279864473,
      "loss": 2.9966,
      "step": 107759
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9597842693328857,
      "learning_rate": 0.0003303877132782848,
      "loss": 2.8166,
      "step": 107760
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.5387201309204102,
      "learning_rate": 0.00033038364375227524,
      "loss": 2.9202,
      "step": 107761
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8553248643875122,
      "learning_rate": 0.0003303795742206168,
      "loss": 3.0635,
      "step": 107762
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.024358034133911,
      "learning_rate": 0.0003303755046833102,
      "loss": 3.0515,
      "step": 107763
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8194503784179688,
      "learning_rate": 0.0003303714351403562,
      "loss": 3.1787,
      "step": 107764
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4780423641204834,
      "learning_rate": 0.0003303673655917557,
      "loss": 2.9314,
      "step": 107765
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4139151573181152,
      "learning_rate": 0.00033036329603750914,
      "loss": 2.9916,
      "step": 107766
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8470349311828613,
      "learning_rate": 0.00033035922647761754,
      "loss": 2.8753,
      "step": 107767
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8918579816818237,
      "learning_rate": 0.00033035515691208164,
      "loss": 2.8545,
      "step": 107768
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0856261253356934,
      "learning_rate": 0.00033035108734090214,
      "loss": 2.9317,
      "step": 107769
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.6931779384613037,
      "learning_rate": 0.00033034701776407974,
      "loss": 2.9365,
      "step": 107770
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8066788911819458,
      "learning_rate": 0.0003303429481816153,
      "loss": 2.7562,
      "step": 107771
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9769610166549683,
      "learning_rate": 0.0003303388785935096,
      "loss": 2.927,
      "step": 107772
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.792249083518982,
      "learning_rate": 0.00033033480899976323,
      "loss": 2.721,
      "step": 107773
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7986818552017212,
      "learning_rate": 0.000330330739400377,
      "loss": 2.8187,
      "step": 107774
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8883050680160522,
      "learning_rate": 0.00033032666979535187,
      "loss": 3.0235,
      "step": 107775
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.4330580234527588,
      "learning_rate": 0.0003303226001846883,
      "loss": 3.0026,
      "step": 107776
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0898079872131348,
      "learning_rate": 0.00033031853056838726,
      "loss": 2.9529,
      "step": 107777
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.458630084991455,
      "learning_rate": 0.0003303144609464495,
      "loss": 2.8049,
      "step": 107778
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9687018394470215,
      "learning_rate": 0.0003303103913188755,
      "loss": 2.9303,
      "step": 107779
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.63813054561615,
      "learning_rate": 0.0003303063216856664,
      "loss": 2.9161,
      "step": 107780
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.901562452316284,
      "learning_rate": 0.00033030225204682274,
      "loss": 2.9847,
      "step": 107781
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6333749294281006,
      "learning_rate": 0.0003302981824023453,
      "loss": 2.8647,
      "step": 107782
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8851430416107178,
      "learning_rate": 0.0003302941127522348,
      "loss": 2.7696,
      "step": 107783
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.042351722717285,
      "learning_rate": 0.0003302900430964922,
      "loss": 3.1082,
      "step": 107784
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.3097970485687256,
      "learning_rate": 0.000330285973435118,
      "loss": 2.9035,
      "step": 107785
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9362090826034546,
      "learning_rate": 0.00033028190376811306,
      "loss": 3.0549,
      "step": 107786
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.5209298133850098,
      "learning_rate": 0.00033027783409547826,
      "loss": 2.8303,
      "step": 107787
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.7932047843933105,
      "learning_rate": 0.0003302737644172141,
      "loss": 3.1555,
      "step": 107788
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9693899154663086,
      "learning_rate": 0.0003302696947333215,
      "loss": 2.9899,
      "step": 107789
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.549085855484009,
      "learning_rate": 0.0003302656250438013,
      "loss": 3.0551,
      "step": 107790
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.624708652496338,
      "learning_rate": 0.00033026155534865404,
      "loss": 2.9163,
      "step": 107791
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.8625526428222656,
      "learning_rate": 0.0003302574856478806,
      "loss": 2.985,
      "step": 107792
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8160017728805542,
      "learning_rate": 0.0003302534159414818,
      "loss": 2.9861,
      "step": 107793
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.779148817062378,
      "learning_rate": 0.0003302493462294582,
      "loss": 3.1098,
      "step": 107794
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.287712574005127,
      "learning_rate": 0.00033024527651181074,
      "loss": 3.1877,
      "step": 107795
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.372919797897339,
      "learning_rate": 0.00033024120678854007,
      "loss": 2.9083,
      "step": 107796
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6715000867843628,
      "learning_rate": 0.0003302371370596471,
      "loss": 3.1712,
      "step": 107797
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.372650623321533,
      "learning_rate": 0.0003302330673251324,
      "loss": 2.8649,
      "step": 107798
    },
    {
      "epoch": 1.4,
      "grad_norm": 4.567084312438965,
      "learning_rate": 0.00033022899758499677,
      "loss": 3.075,
      "step": 107799
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.8665690422058105,
      "learning_rate": 0.00033022492783924104,
      "loss": 2.8201,
      "step": 107800
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7722485065460205,
      "learning_rate": 0.00033022085808786595,
      "loss": 2.8688,
      "step": 107801
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0725672245025635,
      "learning_rate": 0.00033021678833087214,
      "loss": 2.9118,
      "step": 107802
    },
    {
      "epoch": 1.4,
      "grad_norm": 4.18280029296875,
      "learning_rate": 0.00033021271856826053,
      "loss": 3.0708,
      "step": 107803
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.3037326335906982,
      "learning_rate": 0.0003302086488000318,
      "loss": 3.0621,
      "step": 107804
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.4223357439041138,
      "learning_rate": 0.0003302045790261867,
      "loss": 2.9107,
      "step": 107805
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1457021236419678,
      "learning_rate": 0.000330200509246726,
      "loss": 2.6606,
      "step": 107806
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.600687026977539,
      "learning_rate": 0.0003301964394616505,
      "loss": 3.2976,
      "step": 107807
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.070573091506958,
      "learning_rate": 0.0003301923696709608,
      "loss": 3.1425,
      "step": 107808
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0423808097839355,
      "learning_rate": 0.00033018829987465796,
      "loss": 2.9234,
      "step": 107809
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2689499855041504,
      "learning_rate": 0.0003301842300727424,
      "loss": 3.1447,
      "step": 107810
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4256224632263184,
      "learning_rate": 0.000330180160265215,
      "loss": 3.0432,
      "step": 107811
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2373645305633545,
      "learning_rate": 0.00033017609045207666,
      "loss": 3.1025,
      "step": 107812
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.73958158493042,
      "learning_rate": 0.0003301720206333279,
      "loss": 3.0605,
      "step": 107813
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0831127166748047,
      "learning_rate": 0.00033016795080896956,
      "loss": 2.9706,
      "step": 107814
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.503281354904175,
      "learning_rate": 0.0003301638809790026,
      "loss": 3.4122,
      "step": 107815
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.737236976623535,
      "learning_rate": 0.00033015981114342744,
      "loss": 3.0903,
      "step": 107816
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9424047470092773,
      "learning_rate": 0.00033015574130224507,
      "loss": 3.2828,
      "step": 107817
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.341561794281006,
      "learning_rate": 0.0003301516714554562,
      "loss": 2.9174,
      "step": 107818
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9790211915969849,
      "learning_rate": 0.0003301476016030615,
      "loss": 2.9517,
      "step": 107819
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7517156600952148,
      "learning_rate": 0.00033014353174506186,
      "loss": 2.9415,
      "step": 107820
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9138786792755127,
      "learning_rate": 0.000330139461881458,
      "loss": 3.0343,
      "step": 107821
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.698532223701477,
      "learning_rate": 0.0003301353920122505,
      "loss": 3.1243,
      "step": 107822
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9650590419769287,
      "learning_rate": 0.00033013132213744037,
      "loss": 3.0318,
      "step": 107823
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.868048906326294,
      "learning_rate": 0.0003301272522570283,
      "loss": 3.0255,
      "step": 107824
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.1024956703186035,
      "learning_rate": 0.000330123182371015,
      "loss": 2.9251,
      "step": 107825
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7769984006881714,
      "learning_rate": 0.0003301191124794011,
      "loss": 2.9041,
      "step": 107826
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.283291816711426,
      "learning_rate": 0.0003301150425821876,
      "loss": 3.0048,
      "step": 107827
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.1586055755615234,
      "learning_rate": 0.0003301109726793751,
      "loss": 2.898,
      "step": 107828
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.7748355865478516,
      "learning_rate": 0.00033010690277096436,
      "loss": 3.0246,
      "step": 107829
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6715421676635742,
      "learning_rate": 0.0003301028328569563,
      "loss": 3.1484,
      "step": 107830
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.718616008758545,
      "learning_rate": 0.00033009876293735145,
      "loss": 2.9245,
      "step": 107831
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.7909765243530273,
      "learning_rate": 0.00033009469301215066,
      "loss": 3.1635,
      "step": 107832
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.8594584465026855,
      "learning_rate": 0.00033009062308135485,
      "loss": 2.9607,
      "step": 107833
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.5756757259368896,
      "learning_rate": 0.00033008655314496445,
      "loss": 3.0721,
      "step": 107834
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.241201162338257,
      "learning_rate": 0.00033008248320298044,
      "loss": 2.8723,
      "step": 107835
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.8514838218688965,
      "learning_rate": 0.0003300784132554036,
      "loss": 3.1454,
      "step": 107836
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.1715004444122314,
      "learning_rate": 0.00033007434330223454,
      "loss": 3.0268,
      "step": 107837
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.261756658554077,
      "learning_rate": 0.0003300702733434741,
      "loss": 2.7579,
      "step": 107838
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6203866004943848,
      "learning_rate": 0.0003300662033791231,
      "loss": 3.2287,
      "step": 107839
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.284137725830078,
      "learning_rate": 0.0003300621334091821,
      "loss": 2.9404,
      "step": 107840
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.109464645385742,
      "learning_rate": 0.00033005806343365204,
      "loss": 3.0625,
      "step": 107841
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9225757122039795,
      "learning_rate": 0.00033005399345253364,
      "loss": 2.9462,
      "step": 107842
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.801275610923767,
      "learning_rate": 0.00033004992346582763,
      "loss": 2.9183,
      "step": 107843
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.326711893081665,
      "learning_rate": 0.0003300458534735348,
      "loss": 3.0163,
      "step": 107844
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.2433547973632812,
      "learning_rate": 0.0003300417834756558,
      "loss": 3.1055,
      "step": 107845
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.5832877159118652,
      "learning_rate": 0.0003300377134721916,
      "loss": 3.3393,
      "step": 107846
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.92930269241333,
      "learning_rate": 0.00033003364346314266,
      "loss": 2.8432,
      "step": 107847
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.4019107818603516,
      "learning_rate": 0.00033002957344851,
      "loss": 3.1809,
      "step": 107848
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8725279569625854,
      "learning_rate": 0.00033002550342829424,
      "loss": 2.9533,
      "step": 107849
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.27305269241333,
      "learning_rate": 0.0003300214334024962,
      "loss": 3.111,
      "step": 107850
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6083042621612549,
      "learning_rate": 0.00033001736337111656,
      "loss": 2.9719,
      "step": 107851
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.914923071861267,
      "learning_rate": 0.0003300132933341561,
      "loss": 2.9924,
      "step": 107852
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.6354756355285645,
      "learning_rate": 0.0003300092232916157,
      "loss": 2.7211,
      "step": 107853
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.64997136592865,
      "learning_rate": 0.000330005153243496,
      "loss": 2.9738,
      "step": 107854
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6645046472549438,
      "learning_rate": 0.00033000108318979773,
      "loss": 3.0459,
      "step": 107855
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8878380060195923,
      "learning_rate": 0.00032999701313052163,
      "loss": 2.8898,
      "step": 107856
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.5536701679229736,
      "learning_rate": 0.0003299929430656687,
      "loss": 3.2268,
      "step": 107857
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0076520442962646,
      "learning_rate": 0.00032998887299523944,
      "loss": 2.7976,
      "step": 107858
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9203858375549316,
      "learning_rate": 0.0003299848029192346,
      "loss": 3.0625,
      "step": 107859
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8809874057769775,
      "learning_rate": 0.0003299807328376551,
      "loss": 3.1486,
      "step": 107860
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.308067560195923,
      "learning_rate": 0.00032997666275050156,
      "loss": 3.1431,
      "step": 107861
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8153729438781738,
      "learning_rate": 0.00032997259265777484,
      "loss": 2.844,
      "step": 107862
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7939749956130981,
      "learning_rate": 0.0003299685225594756,
      "loss": 3.0988,
      "step": 107863
    },
    {
      "epoch": 1.4,
      "grad_norm": 3.8802084922790527,
      "learning_rate": 0.00032996445245560475,
      "loss": 2.8318,
      "step": 107864
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9371110200881958,
      "learning_rate": 0.0003299603823461629,
      "loss": 3.1136,
      "step": 107865
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.423002004623413,
      "learning_rate": 0.0003299563122311508,
      "loss": 3.2483,
      "step": 107866
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.911455750465393,
      "learning_rate": 0.0003299522421105693,
      "loss": 2.7712,
      "step": 107867
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.989399790763855,
      "learning_rate": 0.00032994817198441905,
      "loss": 3.198,
      "step": 107868
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.817214846611023,
      "learning_rate": 0.0003299441018527009,
      "loss": 2.6776,
      "step": 107869
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8066692352294922,
      "learning_rate": 0.00032994003171541564,
      "loss": 3.068,
      "step": 107870
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.023000478744507,
      "learning_rate": 0.0003299359615725639,
      "loss": 2.7621,
      "step": 107871
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8629097938537598,
      "learning_rate": 0.0003299318914241465,
      "loss": 2.8894,
      "step": 107872
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.814958930015564,
      "learning_rate": 0.0003299278212701643,
      "loss": 3.0584,
      "step": 107873
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.09040904045105,
      "learning_rate": 0.0003299237511106178,
      "loss": 3.0498,
      "step": 107874
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7814170122146606,
      "learning_rate": 0.0003299196809455079,
      "loss": 3.0353,
      "step": 107875
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.185779094696045,
      "learning_rate": 0.0003299156107748355,
      "loss": 2.9349,
      "step": 107876
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.6079022884368896,
      "learning_rate": 0.00032991154059860116,
      "loss": 2.8898,
      "step": 107877
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.376826286315918,
      "learning_rate": 0.0003299074704168057,
      "loss": 2.968,
      "step": 107878
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7349573373794556,
      "learning_rate": 0.0003299034002294499,
      "loss": 3.2091,
      "step": 107879
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.000826597213745,
      "learning_rate": 0.00032989933003653443,
      "loss": 2.9097,
      "step": 107880
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9562894105911255,
      "learning_rate": 0.00032989525983806016,
      "loss": 3.1506,
      "step": 107881
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6793631315231323,
      "learning_rate": 0.00032989118963402784,
      "loss": 3.118,
      "step": 107882
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7872025966644287,
      "learning_rate": 0.0003298871194244381,
      "loss": 3.1283,
      "step": 107883
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.097198724746704,
      "learning_rate": 0.0003298830492092918,
      "loss": 2.8221,
      "step": 107884
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.798233151435852,
      "learning_rate": 0.0003298789789885897,
      "loss": 3.0894,
      "step": 107885
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.870365858078003,
      "learning_rate": 0.00032987490876233255,
      "loss": 2.9097,
      "step": 107886
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6728119850158691,
      "learning_rate": 0.000329870838530521,
      "loss": 3.0122,
      "step": 107887
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.614351749420166,
      "learning_rate": 0.00032986676829315604,
      "loss": 2.7566,
      "step": 107888
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.760042190551758,
      "learning_rate": 0.00032986269805023816,
      "loss": 3.1835,
      "step": 107889
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.6049070358276367,
      "learning_rate": 0.0003298586278017683,
      "loss": 2.912,
      "step": 107890
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.18129563331604,
      "learning_rate": 0.00032985455754774715,
      "loss": 2.7991,
      "step": 107891
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.0947022438049316,
      "learning_rate": 0.0003298504872881755,
      "loss": 2.9042,
      "step": 107892
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8970082998275757,
      "learning_rate": 0.000329846417023054,
      "loss": 2.9825,
      "step": 107893
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8438032865524292,
      "learning_rate": 0.0003298423467523836,
      "loss": 2.9314,
      "step": 107894
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.38340163230896,
      "learning_rate": 0.0003298382764761648,
      "loss": 3.1125,
      "step": 107895
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.8760371208190918,
      "learning_rate": 0.0003298342061943986,
      "loss": 3.1154,
      "step": 107896
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.872611165046692,
      "learning_rate": 0.0003298301359070857,
      "loss": 3.028,
      "step": 107897
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4387569427490234,
      "learning_rate": 0.0003298260656142267,
      "loss": 2.9457,
      "step": 107898
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.298678398132324,
      "learning_rate": 0.00032982199531582254,
      "loss": 2.9522,
      "step": 107899
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9985209703445435,
      "learning_rate": 0.0003298179250118739,
      "loss": 3.0346,
      "step": 107900
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.7837473154067993,
      "learning_rate": 0.0003298138547023815,
      "loss": 3.03,
      "step": 107901
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.9439226388931274,
      "learning_rate": 0.0003298097843873462,
      "loss": 2.9907,
      "step": 107902
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.987774133682251,
      "learning_rate": 0.0003298057140667687,
      "loss": 3.0212,
      "step": 107903
    },
    {
      "epoch": 1.4,
      "grad_norm": 1.930328369140625,
      "learning_rate": 0.00032980164374064977,
      "loss": 2.9961,
      "step": 107904
    },
    {
      "epoch": 1.4,
      "grad_norm": 2.4628419876098633,
      "learning_rate": 0.0003297975734089901,
      "loss": 2.6321,
      "step": 107905
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6258920431137085,
      "learning_rate": 0.00032979350307179054,
      "loss": 3.0615,
      "step": 107906
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9146126508712769,
      "learning_rate": 0.00032978943272905174,
      "loss": 2.8642,
      "step": 107907
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.904983639717102,
      "learning_rate": 0.00032978536238077456,
      "loss": 3.0643,
      "step": 107908
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.647205114364624,
      "learning_rate": 0.00032978129202695976,
      "loss": 3.0363,
      "step": 107909
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.5917812585830688,
      "learning_rate": 0.000329777221667608,
      "loss": 3.025,
      "step": 107910
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0086076259613037,
      "learning_rate": 0.0003297731513027201,
      "loss": 2.8267,
      "step": 107911
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.667137861251831,
      "learning_rate": 0.00032976908093229683,
      "loss": 3.116,
      "step": 107912
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8488199710845947,
      "learning_rate": 0.00032976501055633894,
      "loss": 3.2431,
      "step": 107913
    },
    {
      "epoch": 1.41,
      "grad_norm": 4.389529705047607,
      "learning_rate": 0.00032976094017484706,
      "loss": 2.9635,
      "step": 107914
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2137343883514404,
      "learning_rate": 0.00032975686978782223,
      "loss": 2.8776,
      "step": 107915
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9211888313293457,
      "learning_rate": 0.00032975279939526493,
      "loss": 3.0308,
      "step": 107916
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.066289186477661,
      "learning_rate": 0.00032974872899717603,
      "loss": 2.8013,
      "step": 107917
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.5986979007720947,
      "learning_rate": 0.00032974465859355635,
      "loss": 3.0354,
      "step": 107918
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.5405571460723877,
      "learning_rate": 0.0003297405881844065,
      "loss": 3.0774,
      "step": 107919
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9235540628433228,
      "learning_rate": 0.0003297365177697273,
      "loss": 2.8652,
      "step": 107920
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.692328691482544,
      "learning_rate": 0.0003297324473495196,
      "loss": 2.8556,
      "step": 107921
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7009285688400269,
      "learning_rate": 0.000329728376923784,
      "loss": 2.9037,
      "step": 107922
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7098197937011719,
      "learning_rate": 0.00032972430649252135,
      "loss": 3.1646,
      "step": 107923
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9550213813781738,
      "learning_rate": 0.00032972023605573245,
      "loss": 3.1452,
      "step": 107924
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9519234895706177,
      "learning_rate": 0.0003297161656134179,
      "loss": 2.9168,
      "step": 107925
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.882535696029663,
      "learning_rate": 0.0003297120951655786,
      "loss": 2.8816,
      "step": 107926
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6714775562286377,
      "learning_rate": 0.00032970802471221536,
      "loss": 3.1596,
      "step": 107927
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6551849842071533,
      "learning_rate": 0.0003297039542533287,
      "loss": 2.7612,
      "step": 107928
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9351667165756226,
      "learning_rate": 0.0003296998837889195,
      "loss": 2.7533,
      "step": 107929
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6527621746063232,
      "learning_rate": 0.0003296958133189886,
      "loss": 2.8549,
      "step": 107930
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6730486154556274,
      "learning_rate": 0.0003296917428435367,
      "loss": 3.064,
      "step": 107931
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.760966181755066,
      "learning_rate": 0.0003296876723625645,
      "loss": 2.9902,
      "step": 107932
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1382319927215576,
      "learning_rate": 0.00032968360187607285,
      "loss": 2.9376,
      "step": 107933
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7799675464630127,
      "learning_rate": 0.00032967953138406244,
      "loss": 2.9617,
      "step": 107934
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6999218463897705,
      "learning_rate": 0.000329675460886534,
      "loss": 2.8404,
      "step": 107935
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9081147909164429,
      "learning_rate": 0.00032967139038348836,
      "loss": 2.8224,
      "step": 107936
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0484628677368164,
      "learning_rate": 0.0003296673198749262,
      "loss": 2.8979,
      "step": 107937
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7442322969436646,
      "learning_rate": 0.0003296632493608485,
      "loss": 3.0147,
      "step": 107938
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.079594135284424,
      "learning_rate": 0.0003296591788412557,
      "loss": 3.058,
      "step": 107939
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0546700954437256,
      "learning_rate": 0.00032965510831614873,
      "loss": 2.9927,
      "step": 107940
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6887493133544922,
      "learning_rate": 0.00032965103778552827,
      "loss": 2.916,
      "step": 107941
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8620771169662476,
      "learning_rate": 0.00032964696724939517,
      "loss": 2.8649,
      "step": 107942
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9717886447906494,
      "learning_rate": 0.0003296428967077501,
      "loss": 3.0452,
      "step": 107943
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0867063999176025,
      "learning_rate": 0.0003296388261605939,
      "loss": 2.8551,
      "step": 107944
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7560620307922363,
      "learning_rate": 0.0003296347556079273,
      "loss": 2.8454,
      "step": 107945
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.4653924703598022,
      "learning_rate": 0.00032963068504975095,
      "loss": 2.8272,
      "step": 107946
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.091402292251587,
      "learning_rate": 0.0003296266144860658,
      "loss": 3.1996,
      "step": 107947
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.713029146194458,
      "learning_rate": 0.0003296225439168724,
      "loss": 2.9453,
      "step": 107948
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7801592350006104,
      "learning_rate": 0.0003296184733421718,
      "loss": 3.0512,
      "step": 107949
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2594220638275146,
      "learning_rate": 0.0003296144027619643,
      "loss": 3.044,
      "step": 107950
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9322458505630493,
      "learning_rate": 0.00032961033217625106,
      "loss": 3.3781,
      "step": 107951
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0139620304107666,
      "learning_rate": 0.0003296062615850328,
      "loss": 2.9588,
      "step": 107952
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8839802742004395,
      "learning_rate": 0.00032960219098831004,
      "loss": 2.982,
      "step": 107953
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9545555114746094,
      "learning_rate": 0.00032959812038608365,
      "loss": 3.1784,
      "step": 107954
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3822805881500244,
      "learning_rate": 0.0003295940497783545,
      "loss": 2.9024,
      "step": 107955
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.89451265335083,
      "learning_rate": 0.0003295899791651232,
      "loss": 3.121,
      "step": 107956
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1946067810058594,
      "learning_rate": 0.0003295859085463906,
      "loss": 3.062,
      "step": 107957
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0633153915405273,
      "learning_rate": 0.00032958183792215746,
      "loss": 2.798,
      "step": 107958
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.8474059104919434,
      "learning_rate": 0.00032957776729242443,
      "loss": 2.7746,
      "step": 107959
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.241375684738159,
      "learning_rate": 0.0003295736966571923,
      "loss": 2.8822,
      "step": 107960
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1110658645629883,
      "learning_rate": 0.00032956962601646196,
      "loss": 3.0522,
      "step": 107961
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.096306562423706,
      "learning_rate": 0.000329565555370234,
      "loss": 2.9579,
      "step": 107962
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.121580123901367,
      "learning_rate": 0.00032956148471850933,
      "loss": 3.0118,
      "step": 107963
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9860676527023315,
      "learning_rate": 0.00032955741406128853,
      "loss": 2.9527,
      "step": 107964
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6352707147598267,
      "learning_rate": 0.0003295533433985725,
      "loss": 2.8508,
      "step": 107965
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.747365117073059,
      "learning_rate": 0.00032954927273036195,
      "loss": 3.1137,
      "step": 107966
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.793574094772339,
      "learning_rate": 0.00032954520205665764,
      "loss": 3.0357,
      "step": 107967
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6962417364120483,
      "learning_rate": 0.0003295411313774602,
      "loss": 3.085,
      "step": 107968
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7028847932815552,
      "learning_rate": 0.00032953706069277066,
      "loss": 2.9292,
      "step": 107969
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1491544246673584,
      "learning_rate": 0.00032953299000258957,
      "loss": 2.9364,
      "step": 107970
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.4483814239501953,
      "learning_rate": 0.0003295289193069177,
      "loss": 3.0411,
      "step": 107971
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.74343740940094,
      "learning_rate": 0.00032952484860575585,
      "loss": 3.0054,
      "step": 107972
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.113205671310425,
      "learning_rate": 0.00032952077789910486,
      "loss": 2.8701,
      "step": 107973
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8801606893539429,
      "learning_rate": 0.00032951670718696526,
      "loss": 3.215,
      "step": 107974
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.920156955718994,
      "learning_rate": 0.000329512636469338,
      "loss": 3.0024,
      "step": 107975
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6998735666275024,
      "learning_rate": 0.0003295085657462239,
      "loss": 3.3074,
      "step": 107976
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0623419284820557,
      "learning_rate": 0.0003295044950176235,
      "loss": 2.7363,
      "step": 107977
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7491437196731567,
      "learning_rate": 0.0003295004242835376,
      "loss": 2.9063,
      "step": 107978
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.974250316619873,
      "learning_rate": 0.00032949635354396714,
      "loss": 3.1886,
      "step": 107979
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7291618585586548,
      "learning_rate": 0.0003294922827989127,
      "loss": 3.0149,
      "step": 107980
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7158701419830322,
      "learning_rate": 0.000329488212048375,
      "loss": 3.0554,
      "step": 107981
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3820579051971436,
      "learning_rate": 0.00032948414129235505,
      "loss": 3.0512,
      "step": 107982
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8715251684188843,
      "learning_rate": 0.00032948007053085334,
      "loss": 3.0661,
      "step": 107983
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.894365668296814,
      "learning_rate": 0.00032947599976387073,
      "loss": 3.0709,
      "step": 107984
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.682435393333435,
      "learning_rate": 0.000329471928991408,
      "loss": 2.9371,
      "step": 107985
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8976048231124878,
      "learning_rate": 0.00032946785821346585,
      "loss": 3.0333,
      "step": 107986
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9386013746261597,
      "learning_rate": 0.0003294637874300451,
      "loss": 3.1582,
      "step": 107987
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8767465353012085,
      "learning_rate": 0.0003294597166411465,
      "loss": 2.8639,
      "step": 107988
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8966913223266602,
      "learning_rate": 0.00032945564584677074,
      "loss": 2.8674,
      "step": 107989
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8201830387115479,
      "learning_rate": 0.00032945157504691864,
      "loss": 2.8531,
      "step": 107990
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.208810806274414,
      "learning_rate": 0.00032944750424159094,
      "loss": 2.9246,
      "step": 107991
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7375208139419556,
      "learning_rate": 0.00032944343343078834,
      "loss": 3.016,
      "step": 107992
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.974156379699707,
      "learning_rate": 0.00032943936261451166,
      "loss": 3.167,
      "step": 107993
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.228794813156128,
      "learning_rate": 0.0003294352917927617,
      "loss": 3.0013,
      "step": 107994
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7004562616348267,
      "learning_rate": 0.00032943122096553914,
      "loss": 3.122,
      "step": 107995
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.764508605003357,
      "learning_rate": 0.0003294271501328447,
      "loss": 2.9828,
      "step": 107996
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.5467323064804077,
      "learning_rate": 0.0003294230792946793,
      "loss": 3.0909,
      "step": 107997
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7905193567276,
      "learning_rate": 0.0003294190084510436,
      "loss": 2.9457,
      "step": 107998
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.030050039291382,
      "learning_rate": 0.00032941493760193824,
      "loss": 3.4276,
      "step": 107999
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9152095317840576,
      "learning_rate": 0.0003294108667473641,
      "loss": 3.3989,
      "step": 108000
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1981639862060547,
      "learning_rate": 0.000329406795887322,
      "loss": 2.8537,
      "step": 108001
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.958860993385315,
      "learning_rate": 0.0003294027250218126,
      "loss": 3.2425,
      "step": 108002
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1316304206848145,
      "learning_rate": 0.0003293986541508367,
      "loss": 3.0133,
      "step": 108003
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.787386894226074,
      "learning_rate": 0.000329394583274395,
      "loss": 2.8972,
      "step": 108004
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.4568545818328857,
      "learning_rate": 0.0003293905123924883,
      "loss": 2.989,
      "step": 108005
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.9895052909851074,
      "learning_rate": 0.0003293864415051174,
      "loss": 2.8931,
      "step": 108006
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3423705101013184,
      "learning_rate": 0.00032938237061228297,
      "loss": 3.0108,
      "step": 108007
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.6600921154022217,
      "learning_rate": 0.00032937829971398574,
      "loss": 2.9334,
      "step": 108008
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2301769256591797,
      "learning_rate": 0.0003293742288102265,
      "loss": 3.1279,
      "step": 108009
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9089901447296143,
      "learning_rate": 0.0003293701579010062,
      "loss": 3.1272,
      "step": 108010
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.259946346282959,
      "learning_rate": 0.0003293660869863253,
      "loss": 2.8901,
      "step": 108011
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.7989718914031982,
      "learning_rate": 0.0003293620160661847,
      "loss": 2.7191,
      "step": 108012
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8736971616744995,
      "learning_rate": 0.00032935794514058533,
      "loss": 2.966,
      "step": 108013
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7879291772842407,
      "learning_rate": 0.0003293538742095275,
      "loss": 3.2474,
      "step": 108014
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0021209716796875,
      "learning_rate": 0.0003293498032730124,
      "loss": 3.2524,
      "step": 108015
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8925206661224365,
      "learning_rate": 0.0003293457323310406,
      "loss": 3.0542,
      "step": 108016
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.1785666942596436,
      "learning_rate": 0.00032934166138361284,
      "loss": 3.1491,
      "step": 108017
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.304370164871216,
      "learning_rate": 0.00032933759043072983,
      "loss": 2.9813,
      "step": 108018
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.3092615604400635,
      "learning_rate": 0.0003293335194723926,
      "loss": 2.9495,
      "step": 108019
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.967559337615967,
      "learning_rate": 0.0003293294485086015,
      "loss": 3.0206,
      "step": 108020
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8828037977218628,
      "learning_rate": 0.00032932537753935763,
      "loss": 2.8979,
      "step": 108021
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8206984996795654,
      "learning_rate": 0.0003293213065646616,
      "loss": 3.0669,
      "step": 108022
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.516700029373169,
      "learning_rate": 0.0003293172355845142,
      "loss": 2.5313,
      "step": 108023
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.558103561401367,
      "learning_rate": 0.0003293131645989161,
      "loss": 2.882,
      "step": 108024
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9122453927993774,
      "learning_rate": 0.0003293090936078682,
      "loss": 3.0566,
      "step": 108025
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7744150161743164,
      "learning_rate": 0.0003293050226113712,
      "loss": 3.0956,
      "step": 108026
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1603331565856934,
      "learning_rate": 0.0003293009516094258,
      "loss": 2.9533,
      "step": 108027
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6881862878799438,
      "learning_rate": 0.00032929688060203283,
      "loss": 3.1412,
      "step": 108028
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1249284744262695,
      "learning_rate": 0.00032929280958919296,
      "loss": 2.9095,
      "step": 108029
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9499037265777588,
      "learning_rate": 0.0003292887385709071,
      "loss": 3.1274,
      "step": 108030
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.142707586288452,
      "learning_rate": 0.0003292846675471758,
      "loss": 2.7463,
      "step": 108031
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.176408529281616,
      "learning_rate": 0.0003292805965179999,
      "loss": 2.801,
      "step": 108032
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.985267162322998,
      "learning_rate": 0.0003292765254833803,
      "loss": 3.3102,
      "step": 108033
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9698787927627563,
      "learning_rate": 0.0003292724544433177,
      "loss": 3.0682,
      "step": 108034
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0129735469818115,
      "learning_rate": 0.00032926838339781265,
      "loss": 3.1805,
      "step": 108035
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7370513677597046,
      "learning_rate": 0.0003292643123468661,
      "loss": 2.775,
      "step": 108036
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.023853302001953,
      "learning_rate": 0.0003292602412904788,
      "loss": 2.9173,
      "step": 108037
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2554755210876465,
      "learning_rate": 0.0003292561702286514,
      "loss": 2.8013,
      "step": 108038
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.862484097480774,
      "learning_rate": 0.0003292520991613848,
      "loss": 3.0441,
      "step": 108039
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7990643978118896,
      "learning_rate": 0.00032924802808867963,
      "loss": 3.0403,
      "step": 108040
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9007411003112793,
      "learning_rate": 0.00032924395701053674,
      "loss": 3.0149,
      "step": 108041
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8949551582336426,
      "learning_rate": 0.0003292398859269568,
      "loss": 3.0435,
      "step": 108042
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.128835916519165,
      "learning_rate": 0.0003292358148379407,
      "loss": 3.1519,
      "step": 108043
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7563159465789795,
      "learning_rate": 0.00032923174374348903,
      "loss": 2.9607,
      "step": 108044
    },
    {
      "epoch": 1.41,
      "grad_norm": 4.415308952331543,
      "learning_rate": 0.0003292276726436026,
      "loss": 2.7592,
      "step": 108045
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.6817822456359863,
      "learning_rate": 0.0003292236015382823,
      "loss": 3.1101,
      "step": 108046
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8513059616088867,
      "learning_rate": 0.00032921953042752873,
      "loss": 3.0257,
      "step": 108047
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7523517608642578,
      "learning_rate": 0.0003292154593113426,
      "loss": 3.0889,
      "step": 108048
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2014498710632324,
      "learning_rate": 0.00032921138818972493,
      "loss": 2.9769,
      "step": 108049
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0870654582977295,
      "learning_rate": 0.00032920731706267617,
      "loss": 3.0497,
      "step": 108050
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9175971746444702,
      "learning_rate": 0.0003292032459301973,
      "loss": 2.8959,
      "step": 108051
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7449110746383667,
      "learning_rate": 0.000329199174792289,
      "loss": 3.137,
      "step": 108052
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.406341314315796,
      "learning_rate": 0.000329195103648952,
      "loss": 2.8712,
      "step": 108053
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9165672063827515,
      "learning_rate": 0.00032919103250018706,
      "loss": 2.9592,
      "step": 108054
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8269139528274536,
      "learning_rate": 0.000329186961345995,
      "loss": 3.089,
      "step": 108055
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.2533769607543945,
      "learning_rate": 0.0003291828901863765,
      "loss": 3.0608,
      "step": 108056
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.0224289894104004,
      "learning_rate": 0.0003291788190213324,
      "loss": 3.0544,
      "step": 108057
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.388653516769409,
      "learning_rate": 0.0003291747478508633,
      "loss": 3.1992,
      "step": 108058
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.062074899673462,
      "learning_rate": 0.0003291706766749701,
      "loss": 3.0895,
      "step": 108059
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.6273200511932373,
      "learning_rate": 0.00032916660549365363,
      "loss": 2.9992,
      "step": 108060
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.267612934112549,
      "learning_rate": 0.0003291625343069145,
      "loss": 3.1346,
      "step": 108061
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8271152973175049,
      "learning_rate": 0.00032915846311475344,
      "loss": 3.0466,
      "step": 108062
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0948076248168945,
      "learning_rate": 0.00032915439191717126,
      "loss": 3.0036,
      "step": 108063
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8759206533432007,
      "learning_rate": 0.0003291503207141687,
      "loss": 3.0186,
      "step": 108064
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8289474248886108,
      "learning_rate": 0.0003291462495057467,
      "loss": 2.9666,
      "step": 108065
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.4397313594818115,
      "learning_rate": 0.0003291421782919057,
      "loss": 3.0946,
      "step": 108066
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9352366924285889,
      "learning_rate": 0.0003291381070726467,
      "loss": 2.8643,
      "step": 108067
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.768395185470581,
      "learning_rate": 0.0003291340358479704,
      "loss": 2.9022,
      "step": 108068
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1513326168060303,
      "learning_rate": 0.00032912996461787744,
      "loss": 2.9862,
      "step": 108069
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2365760803222656,
      "learning_rate": 0.0003291258933823687,
      "loss": 3.1302,
      "step": 108070
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.062854290008545,
      "learning_rate": 0.000329121822141445,
      "loss": 3.1693,
      "step": 108071
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1691973209381104,
      "learning_rate": 0.0003291177508951069,
      "loss": 3.0764,
      "step": 108072
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.526465654373169,
      "learning_rate": 0.0003291136796433553,
      "loss": 2.8302,
      "step": 108073
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.970982551574707,
      "learning_rate": 0.00032910960838619094,
      "loss": 2.9589,
      "step": 108074
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3546719551086426,
      "learning_rate": 0.0003291055371236145,
      "loss": 2.7755,
      "step": 108075
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.690354347229004,
      "learning_rate": 0.0003291014658556268,
      "loss": 3.1384,
      "step": 108076
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.328667163848877,
      "learning_rate": 0.00032909739458222864,
      "loss": 3.028,
      "step": 108077
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2988944053649902,
      "learning_rate": 0.0003290933233034207,
      "loss": 2.9137,
      "step": 108078
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.627453088760376,
      "learning_rate": 0.00032908925201920376,
      "loss": 3.1466,
      "step": 108079
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.6682095527648926,
      "learning_rate": 0.00032908518072957856,
      "loss": 3.0731,
      "step": 108080
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7827857732772827,
      "learning_rate": 0.00032908110943454585,
      "loss": 3.1581,
      "step": 108081
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.906136393547058,
      "learning_rate": 0.0003290770381341065,
      "loss": 2.9328,
      "step": 108082
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2329025268554688,
      "learning_rate": 0.00032907296682826114,
      "loss": 2.8689,
      "step": 108083
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.278837203979492,
      "learning_rate": 0.00032906889551701055,
      "loss": 3.2281,
      "step": 108084
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.7332804203033447,
      "learning_rate": 0.0003290648242003555,
      "loss": 2.8896,
      "step": 108085
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3551840782165527,
      "learning_rate": 0.0003290607528782968,
      "loss": 3.0939,
      "step": 108086
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.30849027633667,
      "learning_rate": 0.0003290566815508351,
      "loss": 2.8231,
      "step": 108087
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9062151908874512,
      "learning_rate": 0.0003290526102179712,
      "loss": 2.9872,
      "step": 108088
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7794913053512573,
      "learning_rate": 0.00032904853887970597,
      "loss": 3.2106,
      "step": 108089
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.546321153640747,
      "learning_rate": 0.00032904446753603996,
      "loss": 3.0486,
      "step": 108090
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.9056286811828613,
      "learning_rate": 0.0003290403961869741,
      "loss": 3.088,
      "step": 108091
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0887081623077393,
      "learning_rate": 0.00032903632483250903,
      "loss": 3.1898,
      "step": 108092
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0374889373779297,
      "learning_rate": 0.0003290322534726456,
      "loss": 3.0198,
      "step": 108093
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7369163036346436,
      "learning_rate": 0.00032902818210738455,
      "loss": 2.7539,
      "step": 108094
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0427651405334473,
      "learning_rate": 0.00032902411073672656,
      "loss": 2.8682,
      "step": 108095
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8857817649841309,
      "learning_rate": 0.00032902003936067245,
      "loss": 2.7133,
      "step": 108096
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.4373130798339844,
      "learning_rate": 0.000329015967979223,
      "loss": 3.2172,
      "step": 108097
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7358250617980957,
      "learning_rate": 0.00032901189659237894,
      "loss": 2.942,
      "step": 108098
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9663581848144531,
      "learning_rate": 0.0003290078252001409,
      "loss": 2.8234,
      "step": 108099
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.25533127784729,
      "learning_rate": 0.00032900375380250985,
      "loss": 2.7386,
      "step": 108100
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2584943771362305,
      "learning_rate": 0.0003289996823994865,
      "loss": 3.1515,
      "step": 108101
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0769426822662354,
      "learning_rate": 0.0003289956109910715,
      "loss": 3.1152,
      "step": 108102
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9626359939575195,
      "learning_rate": 0.0003289915395772657,
      "loss": 2.9555,
      "step": 108103
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8986274003982544,
      "learning_rate": 0.0003289874681580698,
      "loss": 3.0172,
      "step": 108104
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.4788365364074707,
      "learning_rate": 0.0003289833967334846,
      "loss": 3.2426,
      "step": 108105
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8182647228240967,
      "learning_rate": 0.0003289793253035108,
      "loss": 2.976,
      "step": 108106
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3633673191070557,
      "learning_rate": 0.0003289752538681492,
      "loss": 3.1689,
      "step": 108107
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.7624800205230713,
      "learning_rate": 0.0003289711824274006,
      "loss": 2.8878,
      "step": 108108
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.879873752593994,
      "learning_rate": 0.00032896711098126573,
      "loss": 2.9141,
      "step": 108109
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.926738977432251,
      "learning_rate": 0.0003289630395297452,
      "loss": 2.9397,
      "step": 108110
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.13493013381958,
      "learning_rate": 0.00032895896807284006,
      "loss": 3.1168,
      "step": 108111
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.633779525756836,
      "learning_rate": 0.0003289548966105508,
      "loss": 3.0108,
      "step": 108112
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.956691861152649,
      "learning_rate": 0.0003289508251428783,
      "loss": 2.8295,
      "step": 108113
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.5462497472763062,
      "learning_rate": 0.00032894675366982326,
      "loss": 3.218,
      "step": 108114
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7680778503417969,
      "learning_rate": 0.0003289426821913866,
      "loss": 2.9805,
      "step": 108115
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2760069370269775,
      "learning_rate": 0.0003289386107075688,
      "loss": 2.7504,
      "step": 108116
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8380494117736816,
      "learning_rate": 0.0003289345392183708,
      "loss": 3.1105,
      "step": 108117
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.897219181060791,
      "learning_rate": 0.0003289304677237934,
      "loss": 2.7883,
      "step": 108118
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7061606645584106,
      "learning_rate": 0.0003289263962238372,
      "loss": 2.8316,
      "step": 108119
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8341196775436401,
      "learning_rate": 0.000328922324718503,
      "loss": 2.8126,
      "step": 108120
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.030125379562378,
      "learning_rate": 0.0003289182532077917,
      "loss": 3.1198,
      "step": 108121
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1811490058898926,
      "learning_rate": 0.000328914181691704,
      "loss": 2.9053,
      "step": 108122
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.4160044193267822,
      "learning_rate": 0.00032891011017024046,
      "loss": 3.0111,
      "step": 108123
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.010453462600708,
      "learning_rate": 0.00032890603864340197,
      "loss": 2.876,
      "step": 108124
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.8757121562957764,
      "learning_rate": 0.00032890196711118946,
      "loss": 3.0453,
      "step": 108125
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.047593116760254,
      "learning_rate": 0.0003288978955736034,
      "loss": 2.8519,
      "step": 108126
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.7270615100860596,
      "learning_rate": 0.00032889382403064474,
      "loss": 2.942,
      "step": 108127
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8341926336288452,
      "learning_rate": 0.00032888975248231416,
      "loss": 2.947,
      "step": 108128
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0244903564453125,
      "learning_rate": 0.0003288856809286124,
      "loss": 2.8151,
      "step": 108129
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3630783557891846,
      "learning_rate": 0.00032888160936954023,
      "loss": 2.9001,
      "step": 108130
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8444719314575195,
      "learning_rate": 0.0003288775378050984,
      "loss": 2.9918,
      "step": 108131
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.318855047225952,
      "learning_rate": 0.0003288734662352879,
      "loss": 3.3353,
      "step": 108132
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.6044704914093018,
      "learning_rate": 0.00032886939466010903,
      "loss": 2.9258,
      "step": 108133
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.096045732498169,
      "learning_rate": 0.00032886532307956285,
      "loss": 3.015,
      "step": 108134
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1109981536865234,
      "learning_rate": 0.0003288612514936502,
      "loss": 2.8663,
      "step": 108135
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8763947486877441,
      "learning_rate": 0.0003288571799023716,
      "loss": 3.0727,
      "step": 108136
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.4417457580566406,
      "learning_rate": 0.0003288531083057279,
      "loss": 3.1313,
      "step": 108137
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.619909405708313,
      "learning_rate": 0.00032884903670371985,
      "loss": 3.0017,
      "step": 108138
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6135761737823486,
      "learning_rate": 0.0003288449650963483,
      "loss": 3.056,
      "step": 108139
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8226065635681152,
      "learning_rate": 0.00032884089348361376,
      "loss": 2.8471,
      "step": 108140
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7463724613189697,
      "learning_rate": 0.00032883682186551737,
      "loss": 3.0605,
      "step": 108141
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.711281418800354,
      "learning_rate": 0.0003288327502420595,
      "loss": 3.0443,
      "step": 108142
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.207231283187866,
      "learning_rate": 0.0003288286786132411,
      "loss": 2.7803,
      "step": 108143
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7041107416152954,
      "learning_rate": 0.000328824606979063,
      "loss": 3.031,
      "step": 108144
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.5848031044006348,
      "learning_rate": 0.00032882053533952576,
      "loss": 2.9494,
      "step": 108145
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3952455520629883,
      "learning_rate": 0.0003288164636946303,
      "loss": 3.1162,
      "step": 108146
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8846668004989624,
      "learning_rate": 0.0003288123920443773,
      "loss": 3.0031,
      "step": 108147
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1551473140716553,
      "learning_rate": 0.0003288083203887676,
      "loss": 3.0871,
      "step": 108148
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7371490001678467,
      "learning_rate": 0.00032880424872780173,
      "loss": 3.2049,
      "step": 108149
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2540509700775146,
      "learning_rate": 0.00032880017706148073,
      "loss": 3.0128,
      "step": 108150
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.6068167686462402,
      "learning_rate": 0.0003287961053898052,
      "loss": 2.8682,
      "step": 108151
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3401522636413574,
      "learning_rate": 0.0003287920337127759,
      "loss": 2.8994,
      "step": 108152
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.7641115188598633,
      "learning_rate": 0.00032878796203039375,
      "loss": 3.0994,
      "step": 108153
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.128413438796997,
      "learning_rate": 0.0003287838903426592,
      "loss": 2.8665,
      "step": 108154
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6643716096878052,
      "learning_rate": 0.0003287798186495733,
      "loss": 2.9945,
      "step": 108155
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.4667036533355713,
      "learning_rate": 0.00032877574695113664,
      "loss": 3.0541,
      "step": 108156
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7492743730545044,
      "learning_rate": 0.00032877167524734995,
      "loss": 2.6638,
      "step": 108157
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8590260744094849,
      "learning_rate": 0.0003287676035382142,
      "loss": 2.9748,
      "step": 108158
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6972657442092896,
      "learning_rate": 0.00032876353182372995,
      "loss": 3.0403,
      "step": 108159
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6366596221923828,
      "learning_rate": 0.000328759460103898,
      "loss": 2.9938,
      "step": 108160
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6856287717819214,
      "learning_rate": 0.00032875538837871907,
      "loss": 3.0237,
      "step": 108161
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0585994720458984,
      "learning_rate": 0.00032875131664819417,
      "loss": 3.0207,
      "step": 108162
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.904958724975586,
      "learning_rate": 0.00032874724491232366,
      "loss": 3.1564,
      "step": 108163
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2759180068969727,
      "learning_rate": 0.0003287431731711085,
      "loss": 2.876,
      "step": 108164
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3864359855651855,
      "learning_rate": 0.0003287391014245496,
      "loss": 3.3223,
      "step": 108165
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.220881462097168,
      "learning_rate": 0.00032873502967264745,
      "loss": 2.9527,
      "step": 108166
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.5367932319641113,
      "learning_rate": 0.0003287309579154029,
      "loss": 3.02,
      "step": 108167
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8193907737731934,
      "learning_rate": 0.00032872688615281676,
      "loss": 3.0279,
      "step": 108168
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.007646322250366,
      "learning_rate": 0.00032872281438488975,
      "loss": 3.1414,
      "step": 108169
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.5060067176818848,
      "learning_rate": 0.00032871874261162256,
      "loss": 3.1078,
      "step": 108170
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.970197916030884,
      "learning_rate": 0.00032871467083301617,
      "loss": 3.0004,
      "step": 108171
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.579845428466797,
      "learning_rate": 0.000328710599049071,
      "loss": 2.935,
      "step": 108172
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.7079479694366455,
      "learning_rate": 0.0003287065272597881,
      "loss": 3.3583,
      "step": 108173
    },
    {
      "epoch": 1.41,
      "grad_norm": 6.13341760635376,
      "learning_rate": 0.00032870245546516813,
      "loss": 3.0921,
      "step": 108174
    },
    {
      "epoch": 1.41,
      "grad_norm": 4.515932083129883,
      "learning_rate": 0.00032869838366521173,
      "loss": 2.931,
      "step": 108175
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2338480949401855,
      "learning_rate": 0.0003286943118599198,
      "loss": 3.203,
      "step": 108176
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.450345993041992,
      "learning_rate": 0.00032869024004929313,
      "loss": 2.9834,
      "step": 108177
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.704998731613159,
      "learning_rate": 0.00032868616823333234,
      "loss": 3.0152,
      "step": 108178
    },
    {
      "epoch": 1.41,
      "grad_norm": 4.283143043518066,
      "learning_rate": 0.00032868209641203825,
      "loss": 2.9389,
      "step": 108179
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3222768306732178,
      "learning_rate": 0.0003286780245854116,
      "loss": 2.9016,
      "step": 108180
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.7795073986053467,
      "learning_rate": 0.00032867395275345315,
      "loss": 2.9392,
      "step": 108181
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.9786465167999268,
      "learning_rate": 0.0003286698809161637,
      "loss": 2.8956,
      "step": 108182
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.858747720718384,
      "learning_rate": 0.00032866580907354407,
      "loss": 2.8851,
      "step": 108183
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7990686893463135,
      "learning_rate": 0.0003286617372255948,
      "loss": 3.0637,
      "step": 108184
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0365548133850098,
      "learning_rate": 0.00032865766537231675,
      "loss": 3.1723,
      "step": 108185
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.33979868888855,
      "learning_rate": 0.0003286535935137108,
      "loss": 3.2144,
      "step": 108186
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1621527671813965,
      "learning_rate": 0.0003286495216497776,
      "loss": 3.0986,
      "step": 108187
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1004247665405273,
      "learning_rate": 0.0003286454497805178,
      "loss": 3.0069,
      "step": 108188
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3073127269744873,
      "learning_rate": 0.0003286413779059324,
      "loss": 3.0155,
      "step": 108189
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.7388155460357666,
      "learning_rate": 0.00032863730602602197,
      "loss": 3.1455,
      "step": 108190
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7189548015594482,
      "learning_rate": 0.0003286332341407873,
      "loss": 3.0199,
      "step": 108191
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8597798347473145,
      "learning_rate": 0.00032862916225022924,
      "loss": 3.1741,
      "step": 108192
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.051490306854248,
      "learning_rate": 0.00032862509035434844,
      "loss": 2.7028,
      "step": 108193
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.4932632446289062,
      "learning_rate": 0.0003286210184531457,
      "loss": 3.0738,
      "step": 108194
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2748184204101562,
      "learning_rate": 0.00032861694654662174,
      "loss": 3.041,
      "step": 108195
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.742299795150757,
      "learning_rate": 0.00032861287463477735,
      "loss": 3.0129,
      "step": 108196
    },
    {
      "epoch": 1.41,
      "grad_norm": 4.335376262664795,
      "learning_rate": 0.00032860880271761334,
      "loss": 2.8068,
      "step": 108197
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.927938222885132,
      "learning_rate": 0.0003286047307951304,
      "loss": 2.9839,
      "step": 108198
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.031792640686035,
      "learning_rate": 0.0003286006588673293,
      "loss": 3.0152,
      "step": 108199
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.324688673019409,
      "learning_rate": 0.00032859658693421077,
      "loss": 2.6928,
      "step": 108200
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.765855073928833,
      "learning_rate": 0.00032859251499577557,
      "loss": 2.8311,
      "step": 108201
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.637678861618042,
      "learning_rate": 0.0003285884430520245,
      "loss": 3.2547,
      "step": 108202
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.706367254257202,
      "learning_rate": 0.00032858437110295835,
      "loss": 2.902,
      "step": 108203
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.007073402404785,
      "learning_rate": 0.0003285802991485778,
      "loss": 2.932,
      "step": 108204
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8179877996444702,
      "learning_rate": 0.00032857622718888356,
      "loss": 3.0444,
      "step": 108205
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8730216026306152,
      "learning_rate": 0.0003285721552238765,
      "loss": 3.1577,
      "step": 108206
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.714304804801941,
      "learning_rate": 0.0003285680832535575,
      "loss": 3.2178,
      "step": 108207
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.08701753616333,
      "learning_rate": 0.000328564011277927,
      "loss": 2.74,
      "step": 108208
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1092395782470703,
      "learning_rate": 0.00032855993929698586,
      "loss": 3.2871,
      "step": 108209
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.666452407836914,
      "learning_rate": 0.000328555867310735,
      "loss": 3.1721,
      "step": 108210
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8336467742919922,
      "learning_rate": 0.000328551795319175,
      "loss": 3.2281,
      "step": 108211
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9384433031082153,
      "learning_rate": 0.00032854772332230667,
      "loss": 2.982,
      "step": 108212
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6441246271133423,
      "learning_rate": 0.0003285436513201308,
      "loss": 3.1946,
      "step": 108213
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6923404932022095,
      "learning_rate": 0.0003285395793126482,
      "loss": 3.1341,
      "step": 108214
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.353332042694092,
      "learning_rate": 0.0003285355072998594,
      "loss": 2.9598,
      "step": 108215
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.598374605178833,
      "learning_rate": 0.00032853143528176545,
      "loss": 2.9676,
      "step": 108216
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3783023357391357,
      "learning_rate": 0.00032852736325836697,
      "loss": 2.8078,
      "step": 108217
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2256765365600586,
      "learning_rate": 0.0003285232912296646,
      "loss": 3.2138,
      "step": 108218
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8854198455810547,
      "learning_rate": 0.00032851921919565933,
      "loss": 2.8631,
      "step": 108219
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.752042293548584,
      "learning_rate": 0.00032851514715635176,
      "loss": 3.28,
      "step": 108220
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.027360200881958,
      "learning_rate": 0.00032851107511174264,
      "loss": 2.7092,
      "step": 108221
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2688260078430176,
      "learning_rate": 0.0003285070030618328,
      "loss": 2.9884,
      "step": 108222
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.019865036010742,
      "learning_rate": 0.000328502931006623,
      "loss": 3.3023,
      "step": 108223
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8294874429702759,
      "learning_rate": 0.000328498858946114,
      "loss": 3.0957,
      "step": 108224
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9104126691818237,
      "learning_rate": 0.0003284947868803064,
      "loss": 3.1851,
      "step": 108225
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8182576894760132,
      "learning_rate": 0.0003284907148092012,
      "loss": 2.9917,
      "step": 108226
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9142879247665405,
      "learning_rate": 0.00032848664273279896,
      "loss": 2.8226,
      "step": 108227
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.104287624359131,
      "learning_rate": 0.00032848257065110054,
      "loss": 2.8359,
      "step": 108228
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7385245561599731,
      "learning_rate": 0.00032847849856410666,
      "loss": 2.8231,
      "step": 108229
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.5718334913253784,
      "learning_rate": 0.00032847442647181813,
      "loss": 2.8897,
      "step": 108230
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9341533184051514,
      "learning_rate": 0.0003284703543742356,
      "loss": 3.0464,
      "step": 108231
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7270572185516357,
      "learning_rate": 0.00032846628227136,
      "loss": 3.0209,
      "step": 108232
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7883086204528809,
      "learning_rate": 0.0003284622101631919,
      "loss": 3.1776,
      "step": 108233
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7858340740203857,
      "learning_rate": 0.00032845813804973213,
      "loss": 2.9389,
      "step": 108234
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8431851863861084,
      "learning_rate": 0.00032845406593098154,
      "loss": 2.7434,
      "step": 108235
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.713396668434143,
      "learning_rate": 0.0003284499938069407,
      "loss": 2.9113,
      "step": 108236
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.77671217918396,
      "learning_rate": 0.0003284459216776105,
      "loss": 2.8635,
      "step": 108237
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3700857162475586,
      "learning_rate": 0.00032844184954299174,
      "loss": 2.9842,
      "step": 108238
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.526766061782837,
      "learning_rate": 0.000328437777403085,
      "loss": 2.944,
      "step": 108239
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1762804985046387,
      "learning_rate": 0.00032843370525789114,
      "loss": 2.8921,
      "step": 108240
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.339002847671509,
      "learning_rate": 0.000328429633107411,
      "loss": 2.6747,
      "step": 108241
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.938808560371399,
      "learning_rate": 0.0003284255609516452,
      "loss": 3.0129,
      "step": 108242
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8443032503128052,
      "learning_rate": 0.00032842148879059455,
      "loss": 3.1774,
      "step": 108243
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.820073127746582,
      "learning_rate": 0.00032841741662425984,
      "loss": 3.0266,
      "step": 108244
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3080337047576904,
      "learning_rate": 0.0003284133444526418,
      "loss": 2.9599,
      "step": 108245
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8614139556884766,
      "learning_rate": 0.0003284092722757411,
      "loss": 2.9304,
      "step": 108246
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8456417322158813,
      "learning_rate": 0.00032840520009355873,
      "loss": 2.832,
      "step": 108247
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.196101188659668,
      "learning_rate": 0.00032840112790609514,
      "loss": 2.6658,
      "step": 108248
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.9498982429504395,
      "learning_rate": 0.00032839705571335127,
      "loss": 2.8897,
      "step": 108249
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.8297436237335205,
      "learning_rate": 0.000328392983515328,
      "loss": 3.1513,
      "step": 108250
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8038115501403809,
      "learning_rate": 0.00032838891131202583,
      "loss": 2.9671,
      "step": 108251
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.4106600284576416,
      "learning_rate": 0.0003283848391034456,
      "loss": 2.8453,
      "step": 108252
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.8702709674835205,
      "learning_rate": 0.00032838076688958815,
      "loss": 3.1056,
      "step": 108253
    },
    {
      "epoch": 1.41,
      "grad_norm": 5.309589862823486,
      "learning_rate": 0.0003283766946704541,
      "loss": 2.8556,
      "step": 108254
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.729858636856079,
      "learning_rate": 0.00032837262244604437,
      "loss": 2.8663,
      "step": 108255
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8400124311447144,
      "learning_rate": 0.0003283685502163596,
      "loss": 2.9452,
      "step": 108256
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.256927967071533,
      "learning_rate": 0.0003283644779814006,
      "loss": 2.8039,
      "step": 108257
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.7445755004882812,
      "learning_rate": 0.000328360405741168,
      "loss": 3.1274,
      "step": 108258
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0902700424194336,
      "learning_rate": 0.00032835633349566286,
      "loss": 3.1867,
      "step": 108259
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7622257471084595,
      "learning_rate": 0.0003283522612448856,
      "loss": 3.2578,
      "step": 108260
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.921673059463501,
      "learning_rate": 0.0003283481889888371,
      "loss": 2.7469,
      "step": 108261
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.7840867042541504,
      "learning_rate": 0.0003283441167275183,
      "loss": 2.8823,
      "step": 108262
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.17612886428833,
      "learning_rate": 0.00032834004446092967,
      "loss": 2.9277,
      "step": 108263
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2001421451568604,
      "learning_rate": 0.00032833597218907205,
      "loss": 2.7716,
      "step": 108264
    },
    {
      "epoch": 1.41,
      "grad_norm": 4.29608678817749,
      "learning_rate": 0.0003283318999119463,
      "loss": 2.9018,
      "step": 108265
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.5539939403533936,
      "learning_rate": 0.0003283278276295531,
      "loss": 3.2387,
      "step": 108266
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.537959575653076,
      "learning_rate": 0.00032832375534189325,
      "loss": 2.944,
      "step": 108267
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2202155590057373,
      "learning_rate": 0.0003283196830489674,
      "loss": 2.8534,
      "step": 108268
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.288581132888794,
      "learning_rate": 0.0003283156107507765,
      "loss": 3.1818,
      "step": 108269
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.293266534805298,
      "learning_rate": 0.0003283115384473211,
      "loss": 3.1616,
      "step": 108270
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.052241325378418,
      "learning_rate": 0.0003283074661386021,
      "loss": 3.0659,
      "step": 108271
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6716269254684448,
      "learning_rate": 0.00032830339382462027,
      "loss": 2.7398,
      "step": 108272
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.95384681224823,
      "learning_rate": 0.00032829932150537625,
      "loss": 2.8905,
      "step": 108273
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6708580255508423,
      "learning_rate": 0.00032829524918087075,
      "loss": 3.0232,
      "step": 108274
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.217616081237793,
      "learning_rate": 0.0003282911768511048,
      "loss": 3.0215,
      "step": 108275
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9077726602554321,
      "learning_rate": 0.00032828710451607887,
      "loss": 3.2728,
      "step": 108276
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.677993893623352,
      "learning_rate": 0.00032828303217579384,
      "loss": 3.0821,
      "step": 108277
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.4637372493743896,
      "learning_rate": 0.00032827895983025053,
      "loss": 3.0371,
      "step": 108278
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7419573068618774,
      "learning_rate": 0.00032827488747944956,
      "loss": 3.2424,
      "step": 108279
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9179133176803589,
      "learning_rate": 0.00032827081512339177,
      "loss": 3.0273,
      "step": 108280
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.663407802581787,
      "learning_rate": 0.00032826674276207794,
      "loss": 3.0429,
      "step": 108281
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.440185308456421,
      "learning_rate": 0.00032826267039550875,
      "loss": 3.0922,
      "step": 108282
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1427745819091797,
      "learning_rate": 0.000328258598023685,
      "loss": 2.9208,
      "step": 108283
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8994474411010742,
      "learning_rate": 0.00032825452564660745,
      "loss": 2.6816,
      "step": 108284
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.116081476211548,
      "learning_rate": 0.00032825045326427684,
      "loss": 3.1108,
      "step": 108285
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8248403072357178,
      "learning_rate": 0.00032824638087669393,
      "loss": 3.2228,
      "step": 108286
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9748892784118652,
      "learning_rate": 0.00032824230848385946,
      "loss": 2.9971,
      "step": 108287
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.318314552307129,
      "learning_rate": 0.0003282382360857743,
      "loss": 2.9753,
      "step": 108288
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.868765115737915,
      "learning_rate": 0.0003282341636824391,
      "loss": 3.0115,
      "step": 108289
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.014843463897705,
      "learning_rate": 0.0003282300912738545,
      "loss": 3.0942,
      "step": 108290
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.4171621799468994,
      "learning_rate": 0.00032822601886002166,
      "loss": 2.9403,
      "step": 108291
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.841213583946228,
      "learning_rate": 0.0003282219464409408,
      "loss": 2.9363,
      "step": 108292
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.106084108352661,
      "learning_rate": 0.00032821787401661305,
      "loss": 3.0195,
      "step": 108293
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6658669710159302,
      "learning_rate": 0.0003282138015870391,
      "loss": 3.002,
      "step": 108294
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.6491353511810303,
      "learning_rate": 0.00032820972915221966,
      "loss": 2.8558,
      "step": 108295
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9998365640640259,
      "learning_rate": 0.0003282056567121555,
      "loss": 2.9392,
      "step": 108296
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.5728545188903809,
      "learning_rate": 0.00032820158426684733,
      "loss": 2.952,
      "step": 108297
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.669421911239624,
      "learning_rate": 0.000328197511816296,
      "loss": 3.0199,
      "step": 108298
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.66975736618042,
      "learning_rate": 0.0003281934393605022,
      "loss": 2.8968,
      "step": 108299
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.153093099594116,
      "learning_rate": 0.00032818936689946664,
      "loss": 3.0623,
      "step": 108300
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.089442491531372,
      "learning_rate": 0.0003281852944331903,
      "loss": 2.7037,
      "step": 108301
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.5215487480163574,
      "learning_rate": 0.00032818122196167363,
      "loss": 2.9883,
      "step": 108302
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8499587774276733,
      "learning_rate": 0.00032817714948491763,
      "loss": 2.9156,
      "step": 108303
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7520846128463745,
      "learning_rate": 0.00032817307700292294,
      "loss": 2.93,
      "step": 108304
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0383808612823486,
      "learning_rate": 0.00032816900451569035,
      "loss": 3.0449,
      "step": 108305
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7925350666046143,
      "learning_rate": 0.00032816493202322063,
      "loss": 2.9187,
      "step": 108306
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.499362587928772,
      "learning_rate": 0.00032816085952551443,
      "loss": 3.0018,
      "step": 108307
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.5295040607452393,
      "learning_rate": 0.0003281567870225727,
      "loss": 3.1361,
      "step": 108308
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.817622184753418,
      "learning_rate": 0.00032815271451439607,
      "loss": 3.1339,
      "step": 108309
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.016064167022705,
      "learning_rate": 0.00032814864200098525,
      "loss": 3.0792,
      "step": 108310
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2225143909454346,
      "learning_rate": 0.00032814456948234116,
      "loss": 2.9823,
      "step": 108311
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.951332926750183,
      "learning_rate": 0.0003281404969584644,
      "loss": 2.9307,
      "step": 108312
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2112672328948975,
      "learning_rate": 0.0003281364244293558,
      "loss": 2.9102,
      "step": 108313
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7175920009613037,
      "learning_rate": 0.00032813235189501613,
      "loss": 3.0854,
      "step": 108314
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7951635122299194,
      "learning_rate": 0.0003281282793554461,
      "loss": 2.9332,
      "step": 108315
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8328111171722412,
      "learning_rate": 0.0003281242068106465,
      "loss": 2.8462,
      "step": 108316
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6639761924743652,
      "learning_rate": 0.0003281201342606181,
      "loss": 3.152,
      "step": 108317
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6988095045089722,
      "learning_rate": 0.0003281160617053616,
      "loss": 2.9906,
      "step": 108318
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8434956073760986,
      "learning_rate": 0.00032811198914487784,
      "loss": 2.8082,
      "step": 108319
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1181390285491943,
      "learning_rate": 0.0003281079165791675,
      "loss": 3.4006,
      "step": 108320
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.758128046989441,
      "learning_rate": 0.0003281038440082313,
      "loss": 2.8766,
      "step": 108321
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1987967491149902,
      "learning_rate": 0.0003280997714320702,
      "loss": 3.0149,
      "step": 108322
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.338289499282837,
      "learning_rate": 0.00032809569885068477,
      "loss": 3.1177,
      "step": 108323
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.09938383102417,
      "learning_rate": 0.00032809162626407573,
      "loss": 2.7545,
      "step": 108324
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.405813217163086,
      "learning_rate": 0.00032808755367224403,
      "loss": 3.038,
      "step": 108325
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8255101442337036,
      "learning_rate": 0.00032808348107519033,
      "loss": 3.0817,
      "step": 108326
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7504472732543945,
      "learning_rate": 0.00032807940847291527,
      "loss": 2.9557,
      "step": 108327
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9194806814193726,
      "learning_rate": 0.00032807533586541983,
      "loss": 2.9834,
      "step": 108328
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3799972534179688,
      "learning_rate": 0.0003280712632527046,
      "loss": 3.0575,
      "step": 108329
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.755018949508667,
      "learning_rate": 0.0003280671906347704,
      "loss": 3.2349,
      "step": 108330
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0128860473632812,
      "learning_rate": 0.000328063118011618,
      "loss": 3.1808,
      "step": 108331
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9213207960128784,
      "learning_rate": 0.0003280590453832481,
      "loss": 3.1225,
      "step": 108332
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.528280735015869,
      "learning_rate": 0.00032805497274966157,
      "loss": 3.2268,
      "step": 108333
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1861331462860107,
      "learning_rate": 0.00032805090011085904,
      "loss": 2.8426,
      "step": 108334
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.817417025566101,
      "learning_rate": 0.0003280468274668413,
      "loss": 3.033,
      "step": 108335
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0835795402526855,
      "learning_rate": 0.0003280427548176092,
      "loss": 3.0121,
      "step": 108336
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.771388053894043,
      "learning_rate": 0.00032803868216316337,
      "loss": 2.815,
      "step": 108337
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7212705612182617,
      "learning_rate": 0.0003280346095035046,
      "loss": 2.9447,
      "step": 108338
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7459189891815186,
      "learning_rate": 0.0003280305368386337,
      "loss": 2.9522,
      "step": 108339
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9435105323791504,
      "learning_rate": 0.0003280264641685514,
      "loss": 3.2799,
      "step": 108340
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1348462104797363,
      "learning_rate": 0.00032802239149325836,
      "loss": 3.0011,
      "step": 108341
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9856626987457275,
      "learning_rate": 0.0003280183188127555,
      "loss": 3.0316,
      "step": 108342
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.4039695262908936,
      "learning_rate": 0.0003280142461270435,
      "loss": 3.0501,
      "step": 108343
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.657811403274536,
      "learning_rate": 0.00032801017343612315,
      "loss": 2.9056,
      "step": 108344
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.373905897140503,
      "learning_rate": 0.0003280061007399952,
      "loss": 3.0067,
      "step": 108345
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3085837364196777,
      "learning_rate": 0.0003280020280386603,
      "loss": 3.1761,
      "step": 108346
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3311851024627686,
      "learning_rate": 0.00032799795533211936,
      "loss": 3.1202,
      "step": 108347
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.548198699951172,
      "learning_rate": 0.0003279938826203731,
      "loss": 3.0063,
      "step": 108348
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9879618883132935,
      "learning_rate": 0.00032798980990342215,
      "loss": 3.0884,
      "step": 108349
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0562851428985596,
      "learning_rate": 0.0003279857371812674,
      "loss": 3.0292,
      "step": 108350
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.9096460342407227,
      "learning_rate": 0.00032798166445390957,
      "loss": 3.1131,
      "step": 108351
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2113189697265625,
      "learning_rate": 0.00032797759172134946,
      "loss": 2.9007,
      "step": 108352
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9905256032943726,
      "learning_rate": 0.0003279735189835877,
      "loss": 3.2331,
      "step": 108353
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8224925994873047,
      "learning_rate": 0.0003279694462406253,
      "loss": 2.9645,
      "step": 108354
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2193243503570557,
      "learning_rate": 0.00032796537349246276,
      "loss": 2.8458,
      "step": 108355
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.6901602745056152,
      "learning_rate": 0.00032796130073910087,
      "loss": 2.788,
      "step": 108356
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7263996601104736,
      "learning_rate": 0.0003279572279805405,
      "loss": 3.1456,
      "step": 108357
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1787328720092773,
      "learning_rate": 0.00032795315521678243,
      "loss": 2.8404,
      "step": 108358
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.072615385055542,
      "learning_rate": 0.00032794908244782717,
      "loss": 3.0159,
      "step": 108359
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.124227285385132,
      "learning_rate": 0.00032794500967367577,
      "loss": 2.9553,
      "step": 108360
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.576181173324585,
      "learning_rate": 0.00032794093689432886,
      "loss": 3.0351,
      "step": 108361
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0478782653808594,
      "learning_rate": 0.0003279368641097871,
      "loss": 3.0927,
      "step": 108362
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9569672346115112,
      "learning_rate": 0.00032793279132005155,
      "loss": 2.8378,
      "step": 108363
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6183841228485107,
      "learning_rate": 0.0003279287185251226,
      "loss": 2.7848,
      "step": 108364
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.723997712135315,
      "learning_rate": 0.0003279246457250012,
      "loss": 2.8972,
      "step": 108365
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8479490280151367,
      "learning_rate": 0.00032792057291968814,
      "loss": 2.8893,
      "step": 108366
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9370436668395996,
      "learning_rate": 0.0003279165001091841,
      "loss": 3.1499,
      "step": 108367
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.54811692237854,
      "learning_rate": 0.0003279124272934898,
      "loss": 2.9345,
      "step": 108368
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0652618408203125,
      "learning_rate": 0.00032790835447260607,
      "loss": 2.9105,
      "step": 108369
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.009434700012207,
      "learning_rate": 0.0003279042816465337,
      "loss": 2.8619,
      "step": 108370
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.138355255126953,
      "learning_rate": 0.0003279002088152733,
      "loss": 2.9913,
      "step": 108371
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0199925899505615,
      "learning_rate": 0.00032789613597882585,
      "loss": 2.9647,
      "step": 108372
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.032320737838745,
      "learning_rate": 0.0003278920631371919,
      "loss": 3.19,
      "step": 108373
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.019199848175049,
      "learning_rate": 0.00032788799029037234,
      "loss": 2.9316,
      "step": 108374
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.051328420639038,
      "learning_rate": 0.0003278839174383678,
      "loss": 2.9615,
      "step": 108375
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.4970741271972656,
      "learning_rate": 0.00032787984458117913,
      "loss": 2.9533,
      "step": 108376
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.8337278366088867,
      "learning_rate": 0.0003278757717188071,
      "loss": 2.9737,
      "step": 108377
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.819564938545227,
      "learning_rate": 0.00032787169885125245,
      "loss": 2.9636,
      "step": 108378
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.8100426197052,
      "learning_rate": 0.00032786762597851595,
      "loss": 2.8829,
      "step": 108379
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9741852283477783,
      "learning_rate": 0.00032786355310059827,
      "loss": 3.0794,
      "step": 108380
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.165163993835449,
      "learning_rate": 0.00032785948021750027,
      "loss": 3.1383,
      "step": 108381
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.658552646636963,
      "learning_rate": 0.0003278554073292226,
      "loss": 2.9407,
      "step": 108382
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.898775577545166,
      "learning_rate": 0.0003278513344357662,
      "loss": 2.9085,
      "step": 108383
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9640144109725952,
      "learning_rate": 0.0003278472615371316,
      "loss": 2.8429,
      "step": 108384
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6297379732131958,
      "learning_rate": 0.0003278431886333197,
      "loss": 2.9179,
      "step": 108385
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7231236696243286,
      "learning_rate": 0.00032783911572433123,
      "loss": 3.0454,
      "step": 108386
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0404410362243652,
      "learning_rate": 0.00032783504281016704,
      "loss": 3.1424,
      "step": 108387
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8644506931304932,
      "learning_rate": 0.00032783096989082763,
      "loss": 3.1192,
      "step": 108388
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0500597953796387,
      "learning_rate": 0.00032782689696631397,
      "loss": 2.8708,
      "step": 108389
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.244799852371216,
      "learning_rate": 0.0003278228240366269,
      "loss": 3.0598,
      "step": 108390
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.623117446899414,
      "learning_rate": 0.0003278187511017668,
      "loss": 2.7293,
      "step": 108391
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.645943284034729,
      "learning_rate": 0.00032781467816173483,
      "loss": 3.1194,
      "step": 108392
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1205363273620605,
      "learning_rate": 0.0003278106052165316,
      "loss": 3.2544,
      "step": 108393
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.130077362060547,
      "learning_rate": 0.0003278065322661578,
      "loss": 3.0175,
      "step": 108394
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.894030213356018,
      "learning_rate": 0.0003278024593106142,
      "loss": 3.0619,
      "step": 108395
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8078594207763672,
      "learning_rate": 0.00032779838634990164,
      "loss": 3.1038,
      "step": 108396
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.7639224529266357,
      "learning_rate": 0.0003277943133840209,
      "loss": 3.1862,
      "step": 108397
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3240551948547363,
      "learning_rate": 0.0003277902404129726,
      "loss": 2.8099,
      "step": 108398
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8079304695129395,
      "learning_rate": 0.00032778616743675754,
      "loss": 2.9774,
      "step": 108399
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.147491931915283,
      "learning_rate": 0.00032778209445537656,
      "loss": 3.2071,
      "step": 108400
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8077954053878784,
      "learning_rate": 0.0003277780214688303,
      "loss": 3.0417,
      "step": 108401
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6945899724960327,
      "learning_rate": 0.00032777394847711966,
      "loss": 3.1121,
      "step": 108402
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7190511226654053,
      "learning_rate": 0.0003277698754802453,
      "loss": 2.836,
      "step": 108403
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8815453052520752,
      "learning_rate": 0.000327765802478208,
      "loss": 3.0247,
      "step": 108404
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.5099527835845947,
      "learning_rate": 0.00032776172947100843,
      "loss": 2.7871,
      "step": 108405
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8367096185684204,
      "learning_rate": 0.0003277576564586476,
      "loss": 2.8681,
      "step": 108406
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.978492021560669,
      "learning_rate": 0.000327753583441126,
      "loss": 3.046,
      "step": 108407
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.414275884628296,
      "learning_rate": 0.0003277495104184444,
      "loss": 3.1437,
      "step": 108408
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9403170347213745,
      "learning_rate": 0.0003277454373906038,
      "loss": 3.1292,
      "step": 108409
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7040668725967407,
      "learning_rate": 0.00032774136435760473,
      "loss": 3.0375,
      "step": 108410
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7079623937606812,
      "learning_rate": 0.000327737291319448,
      "loss": 2.9761,
      "step": 108411
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8858463764190674,
      "learning_rate": 0.00032773321827613445,
      "loss": 3.0892,
      "step": 108412
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.6940629482269287,
      "learning_rate": 0.00032772914522766466,
      "loss": 3.0131,
      "step": 108413
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.101567029953003,
      "learning_rate": 0.00032772507217403954,
      "loss": 3.1921,
      "step": 108414
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7058541774749756,
      "learning_rate": 0.0003277209991152599,
      "loss": 2.949,
      "step": 108415
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7874563932418823,
      "learning_rate": 0.0003277169260513263,
      "loss": 2.8822,
      "step": 108416
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1794474124908447,
      "learning_rate": 0.00032771285298223955,
      "loss": 2.9058,
      "step": 108417
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6697784662246704,
      "learning_rate": 0.0003277087799080006,
      "loss": 3.1059,
      "step": 108418
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8959680795669556,
      "learning_rate": 0.00032770470682860997,
      "loss": 2.8646,
      "step": 108419
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.903512954711914,
      "learning_rate": 0.00032770063374406856,
      "loss": 3.2081,
      "step": 108420
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8692928552627563,
      "learning_rate": 0.0003276965606543771,
      "loss": 2.7809,
      "step": 108421
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.923059105873108,
      "learning_rate": 0.0003276924875595363,
      "loss": 2.9979,
      "step": 108422
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.799635410308838,
      "learning_rate": 0.00032768841445954684,
      "loss": 2.976,
      "step": 108423
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.885290503501892,
      "learning_rate": 0.00032768434135440976,
      "loss": 2.9973,
      "step": 108424
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6963138580322266,
      "learning_rate": 0.0003276802682441255,
      "loss": 3.0985,
      "step": 108425
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.066753625869751,
      "learning_rate": 0.00032767619512869505,
      "loss": 2.8031,
      "step": 108426
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6969573497772217,
      "learning_rate": 0.000327672122008119,
      "loss": 2.8292,
      "step": 108427
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6623055934906006,
      "learning_rate": 0.0003276680488823982,
      "loss": 3.2849,
      "step": 108428
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6159324645996094,
      "learning_rate": 0.0003276639757515334,
      "loss": 2.9452,
      "step": 108429
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8168388605117798,
      "learning_rate": 0.00032765990261552535,
      "loss": 3.1555,
      "step": 108430
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.070249319076538,
      "learning_rate": 0.0003276558294743748,
      "loss": 3.1264,
      "step": 108431
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8097667694091797,
      "learning_rate": 0.0003276517563280825,
      "loss": 2.81,
      "step": 108432
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8838882446289062,
      "learning_rate": 0.00032764768317664923,
      "loss": 3.1171,
      "step": 108433
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0484955310821533,
      "learning_rate": 0.0003276436100200757,
      "loss": 3.0305,
      "step": 108434
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8129321336746216,
      "learning_rate": 0.00032763953685836274,
      "loss": 3.1086,
      "step": 108435
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.726953387260437,
      "learning_rate": 0.0003276354636915111,
      "loss": 2.8143,
      "step": 108436
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.492844820022583,
      "learning_rate": 0.0003276313905195214,
      "loss": 2.8239,
      "step": 108437
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.8056750297546387,
      "learning_rate": 0.00032762731734239454,
      "loss": 2.7753,
      "step": 108438
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2549726963043213,
      "learning_rate": 0.0003276232441601313,
      "loss": 2.9987,
      "step": 108439
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.5523104667663574,
      "learning_rate": 0.00032761917097273236,
      "loss": 2.7974,
      "step": 108440
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8214738368988037,
      "learning_rate": 0.00032761509778019845,
      "loss": 2.8269,
      "step": 108441
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.787284255027771,
      "learning_rate": 0.0003276110245825304,
      "loss": 2.9012,
      "step": 108442
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9195671081542969,
      "learning_rate": 0.000327606951379729,
      "loss": 3.0646,
      "step": 108443
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2469866275787354,
      "learning_rate": 0.0003276028781717948,
      "loss": 3.16,
      "step": 108444
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.942748785018921,
      "learning_rate": 0.00032759880495872887,
      "loss": 2.8888,
      "step": 108445
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7393252849578857,
      "learning_rate": 0.0003275947317405317,
      "loss": 3.0569,
      "step": 108446
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6121655702590942,
      "learning_rate": 0.0003275906585172041,
      "loss": 3.0411,
      "step": 108447
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.758763551712036,
      "learning_rate": 0.000327586585288747,
      "loss": 2.9188,
      "step": 108448
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.963000535964966,
      "learning_rate": 0.00032758251205516095,
      "loss": 2.6134,
      "step": 108449
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.3182547092437744,
      "learning_rate": 0.00032757843881644675,
      "loss": 3.0344,
      "step": 108450
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6212128400802612,
      "learning_rate": 0.00032757436557260535,
      "loss": 3.0644,
      "step": 108451
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.152418375015259,
      "learning_rate": 0.0003275702923236372,
      "loss": 2.7628,
      "step": 108452
    },
    {
      "epoch": 1.41,
      "grad_norm": 4.3606486320495605,
      "learning_rate": 0.00032756621906954327,
      "loss": 2.8917,
      "step": 108453
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.5049846172332764,
      "learning_rate": 0.00032756214581032436,
      "loss": 2.9722,
      "step": 108454
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7824584245681763,
      "learning_rate": 0.00032755807254598103,
      "loss": 3.1143,
      "step": 108455
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3849093914031982,
      "learning_rate": 0.0003275539992765141,
      "loss": 3.2536,
      "step": 108456
    },
    {
      "epoch": 1.41,
      "grad_norm": 5.147651672363281,
      "learning_rate": 0.0003275499260019244,
      "loss": 2.773,
      "step": 108457
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.797563910484314,
      "learning_rate": 0.00032754585272221266,
      "loss": 3.1211,
      "step": 108458
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.8880016803741455,
      "learning_rate": 0.0003275417794373796,
      "loss": 2.7546,
      "step": 108459
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9572523832321167,
      "learning_rate": 0.000327537706147426,
      "loss": 3.0596,
      "step": 108460
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.6882331371307373,
      "learning_rate": 0.0003275336328523527,
      "loss": 2.9891,
      "step": 108461
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.839134693145752,
      "learning_rate": 0.00032752955955216026,
      "loss": 2.959,
      "step": 108462
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.67948842048645,
      "learning_rate": 0.00032752548624684965,
      "loss": 2.8895,
      "step": 108463
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.6274378299713135,
      "learning_rate": 0.0003275214129364215,
      "loss": 3.076,
      "step": 108464
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6652319431304932,
      "learning_rate": 0.00032751733962087655,
      "loss": 3.0057,
      "step": 108465
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.7943975925445557,
      "learning_rate": 0.00032751326630021564,
      "loss": 3.016,
      "step": 108466
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.7404987812042236,
      "learning_rate": 0.0003275091929744395,
      "loss": 3.0508,
      "step": 108467
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.9258248805999756,
      "learning_rate": 0.0003275051196435489,
      "loss": 2.9129,
      "step": 108468
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0929298400878906,
      "learning_rate": 0.00032750104630754456,
      "loss": 3.1546,
      "step": 108469
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.527844190597534,
      "learning_rate": 0.0003274969729664273,
      "loss": 2.9082,
      "step": 108470
    },
    {
      "epoch": 1.41,
      "grad_norm": 4.225921154022217,
      "learning_rate": 0.00032749289962019775,
      "loss": 2.8023,
      "step": 108471
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.693053722381592,
      "learning_rate": 0.0003274888262688568,
      "loss": 3.1621,
      "step": 108472
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.018833637237549,
      "learning_rate": 0.0003274847529124051,
      "loss": 2.619,
      "step": 108473
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6754567623138428,
      "learning_rate": 0.00032748067955084355,
      "loss": 2.9104,
      "step": 108474
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.93231999874115,
      "learning_rate": 0.00032747660618417284,
      "loss": 2.9327,
      "step": 108475
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.484891176223755,
      "learning_rate": 0.00032747253281239364,
      "loss": 3.094,
      "step": 108476
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.67582106590271,
      "learning_rate": 0.00032746845943550676,
      "loss": 3.1023,
      "step": 108477
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.857369065284729,
      "learning_rate": 0.000327464386053513,
      "loss": 3.0429,
      "step": 108478
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.084247350692749,
      "learning_rate": 0.00032746031266641317,
      "loss": 3.1283,
      "step": 108479
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.7132413387298584,
      "learning_rate": 0.00032745623927420787,
      "loss": 3.272,
      "step": 108480
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.971787929534912,
      "learning_rate": 0.00032745216587689793,
      "loss": 2.8732,
      "step": 108481
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.5585190057754517,
      "learning_rate": 0.0003274480924744842,
      "loss": 3.0596,
      "step": 108482
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.199899673461914,
      "learning_rate": 0.0003274440190669672,
      "loss": 2.9787,
      "step": 108483
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0344793796539307,
      "learning_rate": 0.00032743994565434797,
      "loss": 3.1194,
      "step": 108484
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.5967774391174316,
      "learning_rate": 0.00032743587223662716,
      "loss": 2.6223,
      "step": 108485
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8149234056472778,
      "learning_rate": 0.0003274317988138054,
      "loss": 3.0547,
      "step": 108486
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.848889708518982,
      "learning_rate": 0.00032742772538588356,
      "loss": 2.7408,
      "step": 108487
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2704198360443115,
      "learning_rate": 0.0003274236519528625,
      "loss": 2.8539,
      "step": 108488
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3948278427124023,
      "learning_rate": 0.0003274195785147427,
      "loss": 2.795,
      "step": 108489
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.7591402530670166,
      "learning_rate": 0.0003274155050715252,
      "loss": 3.1706,
      "step": 108490
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7077823877334595,
      "learning_rate": 0.00032741143162321064,
      "loss": 2.9855,
      "step": 108491
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.54974365234375,
      "learning_rate": 0.00032740735816979975,
      "loss": 2.7926,
      "step": 108492
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9986211061477661,
      "learning_rate": 0.00032740328471129326,
      "loss": 2.8668,
      "step": 108493
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8549208641052246,
      "learning_rate": 0.0003273992112476921,
      "loss": 2.7838,
      "step": 108494
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.975696086883545,
      "learning_rate": 0.0003273951377789968,
      "loss": 2.955,
      "step": 108495
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.993788003921509,
      "learning_rate": 0.00032739106430520825,
      "loss": 3.0771,
      "step": 108496
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.031128168106079,
      "learning_rate": 0.0003273869908263272,
      "loss": 3.0816,
      "step": 108497
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3245208263397217,
      "learning_rate": 0.00032738291734235445,
      "loss": 3.0743,
      "step": 108498
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9545910358428955,
      "learning_rate": 0.0003273788438532906,
      "loss": 3.0404,
      "step": 108499
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.379695177078247,
      "learning_rate": 0.0003273747703591366,
      "loss": 3.0167,
      "step": 108500
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6983736753463745,
      "learning_rate": 0.00032737069685989304,
      "loss": 2.7706,
      "step": 108501
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.689659833908081,
      "learning_rate": 0.00032736662335556074,
      "loss": 2.8308,
      "step": 108502
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.710744857788086,
      "learning_rate": 0.0003273625498461405,
      "loss": 2.9912,
      "step": 108503
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.5820426940917969,
      "learning_rate": 0.0003273584763316331,
      "loss": 2.9789,
      "step": 108504
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0902512073516846,
      "learning_rate": 0.0003273544028120391,
      "loss": 3.1477,
      "step": 108505
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.5566353797912598,
      "learning_rate": 0.0003273503292873596,
      "loss": 3.1555,
      "step": 108506
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.323976755142212,
      "learning_rate": 0.00032734625575759496,
      "loss": 2.5773,
      "step": 108507
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8184504508972168,
      "learning_rate": 0.0003273421822227463,
      "loss": 2.8625,
      "step": 108508
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.688123345375061,
      "learning_rate": 0.0003273381086828141,
      "loss": 2.8079,
      "step": 108509
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0064468383789062,
      "learning_rate": 0.0003273340351377993,
      "loss": 3.0783,
      "step": 108510
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3933699131011963,
      "learning_rate": 0.0003273299615877025,
      "loss": 3.0879,
      "step": 108511
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8254578113555908,
      "learning_rate": 0.00032732588803252463,
      "loss": 2.7267,
      "step": 108512
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3642630577087402,
      "learning_rate": 0.0003273218144722663,
      "loss": 2.9339,
      "step": 108513
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9320071935653687,
      "learning_rate": 0.00032731774090692835,
      "loss": 2.8556,
      "step": 108514
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0712857246398926,
      "learning_rate": 0.00032731366733651155,
      "loss": 3.0311,
      "step": 108515
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6966607570648193,
      "learning_rate": 0.00032730959376101656,
      "loss": 2.9439,
      "step": 108516
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.354296922683716,
      "learning_rate": 0.00032730552018044425,
      "loss": 3.0613,
      "step": 108517
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0009212493896484,
      "learning_rate": 0.0003273014465947953,
      "loss": 3.1406,
      "step": 108518
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8816821575164795,
      "learning_rate": 0.0003272973730040705,
      "loss": 3.2173,
      "step": 108519
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.757960081100464,
      "learning_rate": 0.0003272932994082706,
      "loss": 2.9791,
      "step": 108520
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1046297550201416,
      "learning_rate": 0.00032728922580739647,
      "loss": 2.9648,
      "step": 108521
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.951226830482483,
      "learning_rate": 0.0003272851522014486,
      "loss": 2.6064,
      "step": 108522
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9804774522781372,
      "learning_rate": 0.00032728107859042796,
      "loss": 3.0558,
      "step": 108523
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8522353172302246,
      "learning_rate": 0.0003272770049743353,
      "loss": 3.06,
      "step": 108524
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9259482622146606,
      "learning_rate": 0.0003272729313531713,
      "loss": 3.1186,
      "step": 108525
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1819493770599365,
      "learning_rate": 0.0003272688577269367,
      "loss": 2.8995,
      "step": 108526
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8350964784622192,
      "learning_rate": 0.00032726478409563235,
      "loss": 2.9302,
      "step": 108527
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8449950218200684,
      "learning_rate": 0.0003272607104592589,
      "loss": 3.0272,
      "step": 108528
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1127116680145264,
      "learning_rate": 0.0003272566368178173,
      "loss": 2.8833,
      "step": 108529
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0163450241088867,
      "learning_rate": 0.000327252563171308,
      "loss": 2.8833,
      "step": 108530
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.678269624710083,
      "learning_rate": 0.00032724848951973213,
      "loss": 3.0281,
      "step": 108531
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8141120672225952,
      "learning_rate": 0.0003272444158630901,
      "loss": 3.2237,
      "step": 108532
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.405704975128174,
      "learning_rate": 0.00032724034220138286,
      "loss": 2.9368,
      "step": 108533
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0683679580688477,
      "learning_rate": 0.0003272362685346112,
      "loss": 3.3767,
      "step": 108534
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.8833296298980713,
      "learning_rate": 0.0003272321948627757,
      "loss": 2.8996,
      "step": 108535
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.5681169033050537,
      "learning_rate": 0.0003272281211858773,
      "loss": 2.817,
      "step": 108536
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2121617794036865,
      "learning_rate": 0.0003272240475039167,
      "loss": 3.0667,
      "step": 108537
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3766613006591797,
      "learning_rate": 0.0003272199738168945,
      "loss": 3.1099,
      "step": 108538
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2799274921417236,
      "learning_rate": 0.0003272159001248116,
      "loss": 2.8594,
      "step": 108539
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1095824241638184,
      "learning_rate": 0.00032721182642766897,
      "loss": 3.1377,
      "step": 108540
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.6504147052764893,
      "learning_rate": 0.0003272077527254669,
      "loss": 3.0367,
      "step": 108541
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6837867498397827,
      "learning_rate": 0.00032720367901820655,
      "loss": 2.9591,
      "step": 108542
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8760288953781128,
      "learning_rate": 0.00032719960530588844,
      "loss": 3.2559,
      "step": 108543
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.140272378921509,
      "learning_rate": 0.0003271955315885134,
      "loss": 2.908,
      "step": 108544
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.716797947883606,
      "learning_rate": 0.00032719145786608224,
      "loss": 3.1262,
      "step": 108545
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.231856107711792,
      "learning_rate": 0.00032718738413859577,
      "loss": 2.948,
      "step": 108546
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.750183343887329,
      "learning_rate": 0.00032718331040605447,
      "loss": 2.7815,
      "step": 108547
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0529115200042725,
      "learning_rate": 0.00032717923666845933,
      "loss": 3.1561,
      "step": 108548
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.10259747505188,
      "learning_rate": 0.00032717516292581116,
      "loss": 2.9353,
      "step": 108549
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9202110767364502,
      "learning_rate": 0.00032717108917811055,
      "loss": 3.1149,
      "step": 108550
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8779369592666626,
      "learning_rate": 0.00032716701542535826,
      "loss": 3.0044,
      "step": 108551
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.604839563369751,
      "learning_rate": 0.00032716294166755517,
      "loss": 3.0433,
      "step": 108552
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.229665994644165,
      "learning_rate": 0.000327158867904702,
      "loss": 3.0093,
      "step": 108553
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0931878089904785,
      "learning_rate": 0.0003271547941367994,
      "loss": 2.998,
      "step": 108554
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2024595737457275,
      "learning_rate": 0.0003271507203638483,
      "loss": 2.9056,
      "step": 108555
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9482977390289307,
      "learning_rate": 0.00032714664658584937,
      "loss": 2.9255,
      "step": 108556
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6193095445632935,
      "learning_rate": 0.00032714257280280323,
      "loss": 2.8586,
      "step": 108557
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.5911979675292969,
      "learning_rate": 0.00032713849901471094,
      "loss": 3.298,
      "step": 108558
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.771557331085205,
      "learning_rate": 0.00032713442522157293,
      "loss": 2.9342,
      "step": 108559
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.834837794303894,
      "learning_rate": 0.0003271303514233903,
      "loss": 2.6438,
      "step": 108560
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.079160690307617,
      "learning_rate": 0.00032712627762016347,
      "loss": 3.0862,
      "step": 108561
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8878068923950195,
      "learning_rate": 0.0003271222038118934,
      "loss": 3.174,
      "step": 108562
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.061835289001465,
      "learning_rate": 0.00032711812999858085,
      "loss": 2.6787,
      "step": 108563
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0672123432159424,
      "learning_rate": 0.00032711405618022647,
      "loss": 2.9771,
      "step": 108564
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9998078346252441,
      "learning_rate": 0.00032710998235683115,
      "loss": 2.9402,
      "step": 108565
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1522533893585205,
      "learning_rate": 0.0003271059085283955,
      "loss": 2.878,
      "step": 108566
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3411920070648193,
      "learning_rate": 0.0003271018346949204,
      "loss": 2.9654,
      "step": 108567
    },
    {
      "epoch": 1.41,
      "grad_norm": 5.59271764755249,
      "learning_rate": 0.0003270977608564065,
      "loss": 2.9785,
      "step": 108568
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.727933883666992,
      "learning_rate": 0.0003270936870128546,
      "loss": 2.941,
      "step": 108569
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8057823181152344,
      "learning_rate": 0.0003270896131642656,
      "loss": 2.9817,
      "step": 108570
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1226909160614014,
      "learning_rate": 0.00032708553931064005,
      "loss": 3.1375,
      "step": 108571
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0398452281951904,
      "learning_rate": 0.0003270814654519788,
      "loss": 3.1304,
      "step": 108572
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.143556833267212,
      "learning_rate": 0.0003270773915882826,
      "loss": 3.1172,
      "step": 108573
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.023348331451416,
      "learning_rate": 0.00032707331771955216,
      "loss": 3.1693,
      "step": 108574
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9146416187286377,
      "learning_rate": 0.0003270692438457882,
      "loss": 3.0765,
      "step": 108575
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9414540529251099,
      "learning_rate": 0.0003270651699669918,
      "loss": 3.091,
      "step": 108576
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.411144495010376,
      "learning_rate": 0.0003270610960831632,
      "loss": 2.9117,
      "step": 108577
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6215155124664307,
      "learning_rate": 0.00032705702219430364,
      "loss": 3.1677,
      "step": 108578
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9766526222229004,
      "learning_rate": 0.0003270529483004136,
      "loss": 2.892,
      "step": 108579
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0398948192596436,
      "learning_rate": 0.00032704887440149384,
      "loss": 3.0676,
      "step": 108580
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.812958002090454,
      "learning_rate": 0.0003270448004975453,
      "loss": 2.9843,
      "step": 108581
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.0217294692993164,
      "learning_rate": 0.00032704072658856857,
      "loss": 2.8962,
      "step": 108582
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.4083962440490723,
      "learning_rate": 0.0003270366526745644,
      "loss": 2.9253,
      "step": 108583
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8629194498062134,
      "learning_rate": 0.0003270325787555336,
      "loss": 3.0315,
      "step": 108584
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7391424179077148,
      "learning_rate": 0.00032702850483147706,
      "loss": 3.128,
      "step": 108585
    },
    {
      "epoch": 1.41,
      "grad_norm": 3.109719753265381,
      "learning_rate": 0.00032702443090239534,
      "loss": 2.8174,
      "step": 108586
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9536362886428833,
      "learning_rate": 0.00032702035696828926,
      "loss": 2.8272,
      "step": 108587
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0753839015960693,
      "learning_rate": 0.0003270162830291596,
      "loss": 2.6574,
      "step": 108588
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7960246801376343,
      "learning_rate": 0.0003270122090850071,
      "loss": 2.7922,
      "step": 108589
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.812942624092102,
      "learning_rate": 0.0003270081351358325,
      "loss": 2.8988,
      "step": 108590
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.7051236629486084,
      "learning_rate": 0.0003270040611816366,
      "loss": 2.9193,
      "step": 108591
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1003241539001465,
      "learning_rate": 0.00032699998722242015,
      "loss": 3.0865,
      "step": 108592
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.9599130153656006,
      "learning_rate": 0.0003269959132581838,
      "loss": 2.959,
      "step": 108593
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.787210464477539,
      "learning_rate": 0.00032699183928892856,
      "loss": 2.8777,
      "step": 108594
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.026127815246582,
      "learning_rate": 0.00032698776531465496,
      "loss": 3.0872,
      "step": 108595
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8074007034301758,
      "learning_rate": 0.0003269836913353638,
      "loss": 3.2146,
      "step": 108596
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.596462607383728,
      "learning_rate": 0.0003269796173510558,
      "loss": 3.032,
      "step": 108597
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.920510172843933,
      "learning_rate": 0.00032697554336173193,
      "loss": 2.9988,
      "step": 108598
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1773595809936523,
      "learning_rate": 0.0003269714693673927,
      "loss": 2.9319,
      "step": 108599
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8897266387939453,
      "learning_rate": 0.00032696739536803893,
      "loss": 3.039,
      "step": 108600
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9422541856765747,
      "learning_rate": 0.0003269633213636715,
      "loss": 2.8882,
      "step": 108601
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8205718994140625,
      "learning_rate": 0.00032695924735429106,
      "loss": 2.8405,
      "step": 108602
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7069969177246094,
      "learning_rate": 0.00032695517333989835,
      "loss": 2.9393,
      "step": 108603
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.4323530197143555,
      "learning_rate": 0.00032695109932049423,
      "loss": 2.8303,
      "step": 108604
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.639040470123291,
      "learning_rate": 0.00032694702529607926,
      "loss": 2.8517,
      "step": 108605
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0295422077178955,
      "learning_rate": 0.00032694295126665447,
      "loss": 3.0813,
      "step": 108606
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1570966243743896,
      "learning_rate": 0.00032693887723222044,
      "loss": 2.8262,
      "step": 108607
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.749646544456482,
      "learning_rate": 0.00032693480319277794,
      "loss": 3.0045,
      "step": 108608
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.6526124477386475,
      "learning_rate": 0.0003269307291483277,
      "loss": 2.8843,
      "step": 108609
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.6599044799804688,
      "learning_rate": 0.00032692665509887066,
      "loss": 3.1276,
      "step": 108610
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0349690914154053,
      "learning_rate": 0.00032692258104440736,
      "loss": 2.8667,
      "step": 108611
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.7780628204345703,
      "learning_rate": 0.0003269185069849386,
      "loss": 3.2195,
      "step": 108612
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6860178709030151,
      "learning_rate": 0.0003269144329204653,
      "loss": 2.8727,
      "step": 108613
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8975944519042969,
      "learning_rate": 0.000326910358850988,
      "loss": 2.9752,
      "step": 108614
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0578854084014893,
      "learning_rate": 0.0003269062847765076,
      "loss": 3.1001,
      "step": 108615
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.4843969345092773,
      "learning_rate": 0.00032690221069702483,
      "loss": 2.9153,
      "step": 108616
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9753531217575073,
      "learning_rate": 0.0003268981366125404,
      "loss": 3.0914,
      "step": 108617
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0188350677490234,
      "learning_rate": 0.0003268940625230551,
      "loss": 2.9709,
      "step": 108618
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2637248039245605,
      "learning_rate": 0.00032688998842856975,
      "loss": 2.9378,
      "step": 108619
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0404181480407715,
      "learning_rate": 0.00032688591432908495,
      "loss": 3.0493,
      "step": 108620
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8126676082611084,
      "learning_rate": 0.0003268818402246016,
      "loss": 3.0342,
      "step": 108621
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.5043234825134277,
      "learning_rate": 0.0003268777661151204,
      "loss": 2.9136,
      "step": 108622
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.662274122238159,
      "learning_rate": 0.0003268736920006421,
      "loss": 2.9146,
      "step": 108623
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0521295070648193,
      "learning_rate": 0.00032686961788116743,
      "loss": 2.9092,
      "step": 108624
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.834893226623535,
      "learning_rate": 0.0003268655437566973,
      "loss": 2.8281,
      "step": 108625
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8198845386505127,
      "learning_rate": 0.0003268614696272323,
      "loss": 2.705,
      "step": 108626
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6319578886032104,
      "learning_rate": 0.00032685739549277325,
      "loss": 2.9167,
      "step": 108627
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.67321515083313,
      "learning_rate": 0.0003268533213533209,
      "loss": 2.9607,
      "step": 108628
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6814459562301636,
      "learning_rate": 0.000326849247208876,
      "loss": 2.9934,
      "step": 108629
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.5052390098571777,
      "learning_rate": 0.00032684517305943937,
      "loss": 2.9639,
      "step": 108630
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.4704723358154297,
      "learning_rate": 0.00032684109890501175,
      "loss": 2.9896,
      "step": 108631
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7259533405303955,
      "learning_rate": 0.0003268370247455937,
      "loss": 3.1163,
      "step": 108632
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1505908966064453,
      "learning_rate": 0.00032683295058118624,
      "loss": 3.1376,
      "step": 108633
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.3300631046295166,
      "learning_rate": 0.0003268288764117901,
      "loss": 2.9735,
      "step": 108634
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8379559516906738,
      "learning_rate": 0.0003268248022374058,
      "loss": 2.9674,
      "step": 108635
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6420787572860718,
      "learning_rate": 0.00032682072805803434,
      "loss": 3.0374,
      "step": 108636
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.841446042060852,
      "learning_rate": 0.00032681665387367646,
      "loss": 3.1392,
      "step": 108637
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.2424991130828857,
      "learning_rate": 0.00032681257968433276,
      "loss": 3.0248,
      "step": 108638
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7318834066390991,
      "learning_rate": 0.0003268085054900041,
      "loss": 3.1084,
      "step": 108639
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9775139093399048,
      "learning_rate": 0.00032680443129069135,
      "loss": 3.088,
      "step": 108640
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8288190364837646,
      "learning_rate": 0.0003268003570863951,
      "loss": 2.8595,
      "step": 108641
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9632493257522583,
      "learning_rate": 0.00032679628287711607,
      "loss": 2.9129,
      "step": 108642
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7034980058670044,
      "learning_rate": 0.0003267922086628552,
      "loss": 3.1089,
      "step": 108643
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8980318307876587,
      "learning_rate": 0.0003267881344436131,
      "loss": 3.0466,
      "step": 108644
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9556868076324463,
      "learning_rate": 0.0003267840602193906,
      "loss": 3.0773,
      "step": 108645
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9867264032363892,
      "learning_rate": 0.00032677998599018844,
      "loss": 3.2164,
      "step": 108646
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7755380868911743,
      "learning_rate": 0.00032677591175600735,
      "loss": 2.9206,
      "step": 108647
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.689391613006592,
      "learning_rate": 0.00032677183751684814,
      "loss": 2.9522,
      "step": 108648
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7842170000076294,
      "learning_rate": 0.0003267677632727116,
      "loss": 2.8648,
      "step": 108649
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.130775213241577,
      "learning_rate": 0.0003267636890235983,
      "loss": 2.8397,
      "step": 108650
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8365178108215332,
      "learning_rate": 0.0003267596147695092,
      "loss": 2.9322,
      "step": 108651
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.871666669845581,
      "learning_rate": 0.00032675554051044497,
      "loss": 2.6582,
      "step": 108652
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8904422521591187,
      "learning_rate": 0.00032675146624640635,
      "loss": 3.0109,
      "step": 108653
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.104795455932617,
      "learning_rate": 0.0003267473919773941,
      "loss": 3.0291,
      "step": 108654
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8973937034606934,
      "learning_rate": 0.00032674331770340915,
      "loss": 3.148,
      "step": 108655
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.5774943828582764,
      "learning_rate": 0.000326739243424452,
      "loss": 2.8481,
      "step": 108656
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0099291801452637,
      "learning_rate": 0.0003267351691405235,
      "loss": 2.7826,
      "step": 108657
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8545817136764526,
      "learning_rate": 0.0003267310948516246,
      "loss": 2.9274,
      "step": 108658
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.4621009826660156,
      "learning_rate": 0.0003267270205577557,
      "loss": 3.1311,
      "step": 108659
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8651753664016724,
      "learning_rate": 0.0003267229462589178,
      "loss": 3.1522,
      "step": 108660
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.094466209411621,
      "learning_rate": 0.00032671887195511167,
      "loss": 3.1406,
      "step": 108661
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0302743911743164,
      "learning_rate": 0.00032671479764633784,
      "loss": 2.9567,
      "step": 108662
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.328599691390991,
      "learning_rate": 0.0003267107233325973,
      "loss": 2.7934,
      "step": 108663
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.6234179735183716,
      "learning_rate": 0.00032670664901389077,
      "loss": 3.2936,
      "step": 108664
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9037246704101562,
      "learning_rate": 0.000326702574690219,
      "loss": 3.1015,
      "step": 108665
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8913285732269287,
      "learning_rate": 0.00032669850036158257,
      "loss": 2.951,
      "step": 108666
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9325180053710938,
      "learning_rate": 0.00032669442602798246,
      "loss": 3.172,
      "step": 108667
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.1675448417663574,
      "learning_rate": 0.0003266903516894194,
      "loss": 3.0453,
      "step": 108668
    },
    {
      "epoch": 1.41,
      "grad_norm": 2.0734329223632812,
      "learning_rate": 0.0003266862773458941,
      "loss": 2.9677,
      "step": 108669
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9604429006576538,
      "learning_rate": 0.00032668220299740725,
      "loss": 3.1349,
      "step": 108670
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.8947577476501465,
      "learning_rate": 0.00032667812864395975,
      "loss": 3.0097,
      "step": 108671
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.7847517728805542,
      "learning_rate": 0.00032667405428555223,
      "loss": 3.0301,
      "step": 108672
    },
    {
      "epoch": 1.41,
      "grad_norm": 1.9134626388549805,
      "learning_rate": 0.00032666997992218547,
      "loss": 3.039,
      "step": 108673
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.6316699981689453,
      "learning_rate": 0.0003266659055538604,
      "loss": 2.932,
      "step": 108674
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.112464427947998,
      "learning_rate": 0.00032666183118057744,
      "loss": 3.0131,
      "step": 108675
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.217766284942627,
      "learning_rate": 0.0003266577568023377,
      "loss": 2.9156,
      "step": 108676
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.4313602447509766,
      "learning_rate": 0.0003266536824191417,
      "loss": 3.0146,
      "step": 108677
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7341142892837524,
      "learning_rate": 0.00032664960803099034,
      "loss": 3.1441,
      "step": 108678
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.200990915298462,
      "learning_rate": 0.00032664553363788423,
      "loss": 2.9835,
      "step": 108679
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.331369638442993,
      "learning_rate": 0.0003266414592398243,
      "loss": 3.1125,
      "step": 108680
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.356430768966675,
      "learning_rate": 0.0003266373848368111,
      "loss": 2.9701,
      "step": 108681
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1503806114196777,
      "learning_rate": 0.0003266333104288456,
      "loss": 3.1175,
      "step": 108682
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7973583936691284,
      "learning_rate": 0.0003266292360159285,
      "loss": 3.0181,
      "step": 108683
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.720389485359192,
      "learning_rate": 0.0003266251615980604,
      "loss": 3.1964,
      "step": 108684
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8883867263793945,
      "learning_rate": 0.00032662108717524226,
      "loss": 2.9114,
      "step": 108685
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3891963958740234,
      "learning_rate": 0.00032661701274747475,
      "loss": 3.0593,
      "step": 108686
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9745595455169678,
      "learning_rate": 0.0003266129383147586,
      "loss": 3.1782,
      "step": 108687
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7308084964752197,
      "learning_rate": 0.00032660886387709463,
      "loss": 2.8757,
      "step": 108688
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2678093910217285,
      "learning_rate": 0.00032660478943448364,
      "loss": 3.1022,
      "step": 108689
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.6558215618133545,
      "learning_rate": 0.00032660071498692624,
      "loss": 3.044,
      "step": 108690
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.5191214084625244,
      "learning_rate": 0.00032659664053442326,
      "loss": 3.0073,
      "step": 108691
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1972148418426514,
      "learning_rate": 0.0003265925660769755,
      "loss": 2.9873,
      "step": 108692
    },
    {
      "epoch": 1.42,
      "grad_norm": 4.6002936363220215,
      "learning_rate": 0.0003265884916145836,
      "loss": 3.1419,
      "step": 108693
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2259116172790527,
      "learning_rate": 0.0003265844171472485,
      "loss": 3.1368,
      "step": 108694
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2126150131225586,
      "learning_rate": 0.0003265803426749708,
      "loss": 3.1703,
      "step": 108695
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8843181133270264,
      "learning_rate": 0.0003265762681977513,
      "loss": 3.1038,
      "step": 108696
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.0181121826171875,
      "learning_rate": 0.00032657219371559073,
      "loss": 3.0466,
      "step": 108697
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1074070930480957,
      "learning_rate": 0.00032656811922849004,
      "loss": 2.9325,
      "step": 108698
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.9584450721740723,
      "learning_rate": 0.00032656404473644973,
      "loss": 3.0722,
      "step": 108699
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7252367734909058,
      "learning_rate": 0.00032655997023947066,
      "loss": 3.0185,
      "step": 108700
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.118314743041992,
      "learning_rate": 0.00032655589573755365,
      "loss": 2.9998,
      "step": 108701
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.5598292350769043,
      "learning_rate": 0.0003265518212306993,
      "loss": 3.0555,
      "step": 108702
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8651695251464844,
      "learning_rate": 0.00032654774671890855,
      "loss": 2.6386,
      "step": 108703
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.5982118844985962,
      "learning_rate": 0.00032654367220218204,
      "loss": 3.1831,
      "step": 108704
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1044492721557617,
      "learning_rate": 0.00032653959768052057,
      "loss": 2.9865,
      "step": 108705
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.7057418823242188,
      "learning_rate": 0.00032653552315392483,
      "loss": 3.3445,
      "step": 108706
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.015967845916748,
      "learning_rate": 0.00032653144862239576,
      "loss": 3.1874,
      "step": 108707
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6928083896636963,
      "learning_rate": 0.00032652737408593385,
      "loss": 2.9352,
      "step": 108708
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1478638648986816,
      "learning_rate": 0.0003265232995445401,
      "loss": 3.25,
      "step": 108709
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.2462360858917236,
      "learning_rate": 0.00032651922499821515,
      "loss": 2.9521,
      "step": 108710
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6118288040161133,
      "learning_rate": 0.0003265151504469598,
      "loss": 2.9522,
      "step": 108711
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.044872283935547,
      "learning_rate": 0.00032651107589077465,
      "loss": 2.9427,
      "step": 108712
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.154799699783325,
      "learning_rate": 0.00032650700132966074,
      "loss": 3.0539,
      "step": 108713
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.58793306350708,
      "learning_rate": 0.00032650292676361865,
      "loss": 3.0326,
      "step": 108714
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.083108901977539,
      "learning_rate": 0.00032649885219264907,
      "loss": 3.0981,
      "step": 108715
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.71343731880188,
      "learning_rate": 0.000326494777616753,
      "loss": 3.2415,
      "step": 108716
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9201668500900269,
      "learning_rate": 0.00032649070303593093,
      "loss": 3.0327,
      "step": 108717
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7606858015060425,
      "learning_rate": 0.0003264866284501838,
      "loss": 3.1111,
      "step": 108718
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3965210914611816,
      "learning_rate": 0.0003264825538595123,
      "loss": 3.0995,
      "step": 108719
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.300844192504883,
      "learning_rate": 0.0003264784792639172,
      "loss": 2.8639,
      "step": 108720
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.8345255851745605,
      "learning_rate": 0.0003264744046633992,
      "loss": 2.9087,
      "step": 108721
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8609535694122314,
      "learning_rate": 0.00032647033005795914,
      "loss": 2.9253,
      "step": 108722
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.9595162868499756,
      "learning_rate": 0.0003264662554475978,
      "loss": 3.1268,
      "step": 108723
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1556499004364014,
      "learning_rate": 0.0003264621808323158,
      "loss": 2.6462,
      "step": 108724
    },
    {
      "epoch": 1.42,
      "grad_norm": 4.254833221435547,
      "learning_rate": 0.00032645810621211404,
      "loss": 2.9812,
      "step": 108725
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.787829875946045,
      "learning_rate": 0.00032645403158699317,
      "loss": 2.9056,
      "step": 108726
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8025524616241455,
      "learning_rate": 0.000326449956956954,
      "loss": 2.9234,
      "step": 108727
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.9192323684692383,
      "learning_rate": 0.0003264458823219972,
      "loss": 2.8703,
      "step": 108728
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.213585376739502,
      "learning_rate": 0.0003264418076821238,
      "loss": 3.0064,
      "step": 108729
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.9476401805877686,
      "learning_rate": 0.0003264377330373343,
      "loss": 2.9787,
      "step": 108730
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8506686687469482,
      "learning_rate": 0.0003264336583876294,
      "loss": 3.1175,
      "step": 108731
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7112396955490112,
      "learning_rate": 0.0003264295837330101,
      "loss": 3.0162,
      "step": 108732
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.6859257221221924,
      "learning_rate": 0.0003264255090734771,
      "loss": 3.0098,
      "step": 108733
    },
    {
      "epoch": 1.42,
      "grad_norm": 4.460376262664795,
      "learning_rate": 0.000326421434409031,
      "loss": 3.0768,
      "step": 108734
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.236095905303955,
      "learning_rate": 0.0003264173597396727,
      "loss": 2.8747,
      "step": 108735
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7105510234832764,
      "learning_rate": 0.0003264132850654029,
      "loss": 3.0306,
      "step": 108736
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.571831464767456,
      "learning_rate": 0.0003264092103862224,
      "loss": 2.7967,
      "step": 108737
    },
    {
      "epoch": 1.42,
      "grad_norm": 5.663014888763428,
      "learning_rate": 0.0003264051357021318,
      "loss": 3.117,
      "step": 108738
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.1728291511535645,
      "learning_rate": 0.0003264010610131321,
      "loss": 3.1506,
      "step": 108739
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.6220812797546387,
      "learning_rate": 0.000326396986319224,
      "loss": 3.0086,
      "step": 108740
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.079357147216797,
      "learning_rate": 0.0003263929116204081,
      "loss": 3.0201,
      "step": 108741
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.284491539001465,
      "learning_rate": 0.00032638883691668525,
      "loss": 2.878,
      "step": 108742
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.5880377292633057,
      "learning_rate": 0.00032638476220805637,
      "loss": 2.98,
      "step": 108743
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.8147220611572266,
      "learning_rate": 0.0003263806874945219,
      "loss": 2.7047,
      "step": 108744
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.031569480895996,
      "learning_rate": 0.00032637661277608277,
      "loss": 2.9909,
      "step": 108745
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8724056482315063,
      "learning_rate": 0.0003263725380527398,
      "loss": 2.9561,
      "step": 108746
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1895196437835693,
      "learning_rate": 0.0003263684633244937,
      "loss": 3.3563,
      "step": 108747
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.4033169746398926,
      "learning_rate": 0.00032636438859134513,
      "loss": 3.023,
      "step": 108748
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.388824701309204,
      "learning_rate": 0.00032636031385329496,
      "loss": 2.8297,
      "step": 108749
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.728819727897644,
      "learning_rate": 0.0003263562391103439,
      "loss": 3.0192,
      "step": 108750
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.966407060623169,
      "learning_rate": 0.0003263521643624927,
      "loss": 3.0207,
      "step": 108751
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.4513165950775146,
      "learning_rate": 0.00032634808960974217,
      "loss": 2.8484,
      "step": 108752
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6596922874450684,
      "learning_rate": 0.00032634401485209303,
      "loss": 3.1125,
      "step": 108753
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9322445392608643,
      "learning_rate": 0.000326339940089546,
      "loss": 2.7153,
      "step": 108754
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.4201433658599854,
      "learning_rate": 0.0003263358653221019,
      "loss": 2.9535,
      "step": 108755
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.6364049911499023,
      "learning_rate": 0.0003263317905497615,
      "loss": 3.132,
      "step": 108756
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6131242513656616,
      "learning_rate": 0.0003263277157725255,
      "loss": 2.8988,
      "step": 108757
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.850686550140381,
      "learning_rate": 0.00032632364099039466,
      "loss": 2.9823,
      "step": 108758
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.191744327545166,
      "learning_rate": 0.00032631956620336977,
      "loss": 3.0118,
      "step": 108759
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.733152389526367,
      "learning_rate": 0.00032631549141145166,
      "loss": 2.9734,
      "step": 108760
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.586134433746338,
      "learning_rate": 0.00032631141661464087,
      "loss": 2.9149,
      "step": 108761
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.796985149383545,
      "learning_rate": 0.0003263073418129383,
      "loss": 2.9747,
      "step": 108762
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9591251611709595,
      "learning_rate": 0.0003263032670063448,
      "loss": 2.9528,
      "step": 108763
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9353289604187012,
      "learning_rate": 0.00032629919219486094,
      "loss": 3.3178,
      "step": 108764
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.254849433898926,
      "learning_rate": 0.0003262951173784876,
      "loss": 3.3007,
      "step": 108765
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.355426788330078,
      "learning_rate": 0.00032629104255722547,
      "loss": 3.0876,
      "step": 108766
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3792521953582764,
      "learning_rate": 0.0003262869677310754,
      "loss": 3.1026,
      "step": 108767
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7564311027526855,
      "learning_rate": 0.00032628289290003804,
      "loss": 2.8913,
      "step": 108768
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.7355618476867676,
      "learning_rate": 0.0003262788180641142,
      "loss": 3.0479,
      "step": 108769
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.822875738143921,
      "learning_rate": 0.00032627474322330466,
      "loss": 3.0912,
      "step": 108770
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.274580240249634,
      "learning_rate": 0.0003262706683776101,
      "loss": 2.9672,
      "step": 108771
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.5377318859100342,
      "learning_rate": 0.00032626659352703137,
      "loss": 3.1177,
      "step": 108772
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.4856542348861694,
      "learning_rate": 0.0003262625186715692,
      "loss": 3.0155,
      "step": 108773
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.8922548294067383,
      "learning_rate": 0.0003262584438112243,
      "loss": 2.8578,
      "step": 108774
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8549320697784424,
      "learning_rate": 0.00032625436894599745,
      "loss": 3.0588,
      "step": 108775
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0243122577667236,
      "learning_rate": 0.0003262502940758894,
      "loss": 2.8528,
      "step": 108776
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.050381660461426,
      "learning_rate": 0.000326246219200901,
      "loss": 3.0302,
      "step": 108777
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.365118980407715,
      "learning_rate": 0.0003262421443210328,
      "loss": 2.8876,
      "step": 108778
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.30131459236145,
      "learning_rate": 0.0003262380694362858,
      "loss": 3.0653,
      "step": 108779
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.189786434173584,
      "learning_rate": 0.0003262339945466607,
      "loss": 2.9165,
      "step": 108780
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1506950855255127,
      "learning_rate": 0.0003262299196521581,
      "loss": 3.0577,
      "step": 108781
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2943520545959473,
      "learning_rate": 0.0003262258447527788,
      "loss": 3.0133,
      "step": 108782
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.4104325771331787,
      "learning_rate": 0.0003262217698485238,
      "loss": 3.2113,
      "step": 108783
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3466107845306396,
      "learning_rate": 0.0003262176949393936,
      "loss": 2.9986,
      "step": 108784
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.015117645263672,
      "learning_rate": 0.00032621362002538894,
      "loss": 3.1026,
      "step": 108785
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.110804319381714,
      "learning_rate": 0.0003262095451065108,
      "loss": 3.0894,
      "step": 108786
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8399529457092285,
      "learning_rate": 0.0003262054701827598,
      "loss": 3.1268,
      "step": 108787
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9082306623458862,
      "learning_rate": 0.00032620139525413663,
      "loss": 2.8563,
      "step": 108788
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.7774977684020996,
      "learning_rate": 0.0003261973203206422,
      "loss": 2.9748,
      "step": 108789
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0895793437957764,
      "learning_rate": 0.0003261932453822771,
      "loss": 2.9203,
      "step": 108790
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8411377668380737,
      "learning_rate": 0.0003261891704390423,
      "loss": 3.0052,
      "step": 108791
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9142229557037354,
      "learning_rate": 0.0003261850954909384,
      "loss": 2.7965,
      "step": 108792
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.928455114364624,
      "learning_rate": 0.00032618102053796617,
      "loss": 2.9941,
      "step": 108793
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9070217609405518,
      "learning_rate": 0.00032617694558012637,
      "loss": 3.0783,
      "step": 108794
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.901236891746521,
      "learning_rate": 0.00032617287061741985,
      "loss": 3.1077,
      "step": 108795
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.777386426925659,
      "learning_rate": 0.0003261687956498473,
      "loss": 2.7934,
      "step": 108796
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.791949510574341,
      "learning_rate": 0.0003261647206774094,
      "loss": 2.8602,
      "step": 108797
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6361007690429688,
      "learning_rate": 0.000326160645700107,
      "loss": 3.0171,
      "step": 108798
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9218707084655762,
      "learning_rate": 0.0003261565707179409,
      "loss": 3.0281,
      "step": 108799
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.469691514968872,
      "learning_rate": 0.00032615249573091174,
      "loss": 3.0575,
      "step": 108800
    },
    {
      "epoch": 1.42,
      "grad_norm": 4.070036888122559,
      "learning_rate": 0.00032614842073902035,
      "loss": 3.1459,
      "step": 108801
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8039592504501343,
      "learning_rate": 0.00032614434574226755,
      "loss": 3.1473,
      "step": 108802
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.246249198913574,
      "learning_rate": 0.0003261402707406539,
      "loss": 3.0303,
      "step": 108803
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.331702709197998,
      "learning_rate": 0.0003261361957341803,
      "loss": 3.0599,
      "step": 108804
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.9857566356658936,
      "learning_rate": 0.0003261321207228476,
      "loss": 3.0388,
      "step": 108805
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.333993911743164,
      "learning_rate": 0.0003261280457066564,
      "loss": 2.9098,
      "step": 108806
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3324625492095947,
      "learning_rate": 0.00032612397068560746,
      "loss": 3.1772,
      "step": 108807
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.202458381652832,
      "learning_rate": 0.0003261198956597016,
      "loss": 3.0982,
      "step": 108808
    },
    {
      "epoch": 1.42,
      "grad_norm": 5.518702983856201,
      "learning_rate": 0.0003261158206289396,
      "loss": 2.8697,
      "step": 108809
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.4185404777526855,
      "learning_rate": 0.00032611174559332213,
      "loss": 2.8219,
      "step": 108810
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.289304256439209,
      "learning_rate": 0.00032610767055284997,
      "loss": 3.3161,
      "step": 108811
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.0146484375,
      "learning_rate": 0.00032610359550752394,
      "loss": 2.9886,
      "step": 108812
    },
    {
      "epoch": 1.42,
      "grad_norm": 4.044744491577148,
      "learning_rate": 0.0003260995204573447,
      "loss": 3.1386,
      "step": 108813
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.9612746238708496,
      "learning_rate": 0.0003260954454023132,
      "loss": 2.862,
      "step": 108814
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.455339193344116,
      "learning_rate": 0.00032609137034242993,
      "loss": 2.9532,
      "step": 108815
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1042065620422363,
      "learning_rate": 0.0003260872952776958,
      "loss": 2.9301,
      "step": 108816
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.5292341709136963,
      "learning_rate": 0.0003260832202081117,
      "loss": 2.9958,
      "step": 108817
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.587385892868042,
      "learning_rate": 0.00032607914513367803,
      "loss": 3.1275,
      "step": 108818
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.909207582473755,
      "learning_rate": 0.0003260750700543959,
      "loss": 2.9197,
      "step": 108819
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9340816736221313,
      "learning_rate": 0.0003260709949702658,
      "loss": 3.0397,
      "step": 108820
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.5230789184570312,
      "learning_rate": 0.0003260669198812887,
      "loss": 3.1418,
      "step": 108821
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.835353136062622,
      "learning_rate": 0.00032606284478746525,
      "loss": 2.8476,
      "step": 108822
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.920710563659668,
      "learning_rate": 0.0003260587696887962,
      "loss": 3.2037,
      "step": 108823
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6423646211624146,
      "learning_rate": 0.00032605469458528235,
      "loss": 2.9971,
      "step": 108824
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.813794732093811,
      "learning_rate": 0.00032605061947692444,
      "loss": 2.986,
      "step": 108825
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.0795938968658447,
      "learning_rate": 0.00032604654436372324,
      "loss": 3.0416,
      "step": 108826
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9267196655273438,
      "learning_rate": 0.00032604246924567944,
      "loss": 3.0765,
      "step": 108827
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.159580707550049,
      "learning_rate": 0.000326038394122794,
      "loss": 2.9471,
      "step": 108828
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1335628032684326,
      "learning_rate": 0.00032603431899506734,
      "loss": 2.7997,
      "step": 108829
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8805984258651733,
      "learning_rate": 0.0003260302438625005,
      "loss": 2.8965,
      "step": 108830
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9577903747558594,
      "learning_rate": 0.0003260261687250941,
      "loss": 3.3673,
      "step": 108831
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8583534955978394,
      "learning_rate": 0.0003260220935828491,
      "loss": 2.867,
      "step": 108832
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.785080671310425,
      "learning_rate": 0.00032601801843576596,
      "loss": 2.9438,
      "step": 108833
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0765528678894043,
      "learning_rate": 0.00032601394328384556,
      "loss": 3.0528,
      "step": 108834
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7225706577301025,
      "learning_rate": 0.00032600986812708876,
      "loss": 2.9393,
      "step": 108835
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8510949611663818,
      "learning_rate": 0.0003260057929654962,
      "loss": 2.7398,
      "step": 108836
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7051832675933838,
      "learning_rate": 0.00032600171779906864,
      "loss": 2.7904,
      "step": 108837
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.281169891357422,
      "learning_rate": 0.00032599764262780694,
      "loss": 2.9231,
      "step": 108838
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9024347066879272,
      "learning_rate": 0.00032599356745171177,
      "loss": 2.9224,
      "step": 108839
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9838069677352905,
      "learning_rate": 0.0003259894922707838,
      "loss": 3.0244,
      "step": 108840
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0930049419403076,
      "learning_rate": 0.00032598541708502406,
      "loss": 3.0376,
      "step": 108841
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3616244792938232,
      "learning_rate": 0.000325981341894433,
      "loss": 2.7332,
      "step": 108842
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.631711483001709,
      "learning_rate": 0.0003259772666990116,
      "loss": 2.9269,
      "step": 108843
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.201681613922119,
      "learning_rate": 0.0003259731914987605,
      "loss": 3.0086,
      "step": 108844
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.0175750255584717,
      "learning_rate": 0.00032596911629368053,
      "loss": 3.0876,
      "step": 108845
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6853033304214478,
      "learning_rate": 0.00032596504108377234,
      "loss": 3.045,
      "step": 108846
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3322017192840576,
      "learning_rate": 0.00032596096586903685,
      "loss": 3.0958,
      "step": 108847
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.640779733657837,
      "learning_rate": 0.00032595689064947466,
      "loss": 3.1157,
      "step": 108848
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8071812391281128,
      "learning_rate": 0.0003259528154250866,
      "loss": 3.1301,
      "step": 108849
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.800138235092163,
      "learning_rate": 0.0003259487401958735,
      "loss": 3.1935,
      "step": 108850
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6428295373916626,
      "learning_rate": 0.0003259446649618359,
      "loss": 3.1952,
      "step": 108851
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9921213388442993,
      "learning_rate": 0.0003259405897229748,
      "loss": 2.9626,
      "step": 108852
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.6734328269958496,
      "learning_rate": 0.00032593651447929085,
      "loss": 3.0092,
      "step": 108853
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3672571182250977,
      "learning_rate": 0.0003259324392307848,
      "loss": 2.9745,
      "step": 108854
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.163663625717163,
      "learning_rate": 0.0003259283639774574,
      "loss": 3.0645,
      "step": 108855
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.19244122505188,
      "learning_rate": 0.0003259242887193095,
      "loss": 2.9948,
      "step": 108856
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8491452932357788,
      "learning_rate": 0.00032592021345634163,
      "loss": 3.1847,
      "step": 108857
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2392489910125732,
      "learning_rate": 0.0003259161381885548,
      "loss": 2.959,
      "step": 108858
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8684215545654297,
      "learning_rate": 0.0003259120629159497,
      "loss": 2.9861,
      "step": 108859
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7283660173416138,
      "learning_rate": 0.00032590798763852707,
      "loss": 3.2193,
      "step": 108860
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2588047981262207,
      "learning_rate": 0.0003259039123562876,
      "loss": 2.9728,
      "step": 108861
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.351562023162842,
      "learning_rate": 0.0003258998370692321,
      "loss": 2.8548,
      "step": 108862
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.1866724491119385,
      "learning_rate": 0.00032589576177736133,
      "loss": 2.9423,
      "step": 108863
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.653373956680298,
      "learning_rate": 0.0003258916864806761,
      "loss": 3.0207,
      "step": 108864
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2886717319488525,
      "learning_rate": 0.00032588761117917706,
      "loss": 2.9554,
      "step": 108865
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7632721662521362,
      "learning_rate": 0.0003258835358728651,
      "loss": 2.9383,
      "step": 108866
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9541058540344238,
      "learning_rate": 0.00032587946056174076,
      "loss": 3.0558,
      "step": 108867
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3152217864990234,
      "learning_rate": 0.00032587538524580503,
      "loss": 2.8444,
      "step": 108868
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7734110355377197,
      "learning_rate": 0.00032587130992505864,
      "loss": 3.0646,
      "step": 108869
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6780304908752441,
      "learning_rate": 0.0003258672345995022,
      "loss": 3.2005,
      "step": 108870
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8313673734664917,
      "learning_rate": 0.00032586315926913653,
      "loss": 2.8734,
      "step": 108871
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.052374839782715,
      "learning_rate": 0.0003258590839339625,
      "loss": 2.9537,
      "step": 108872
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.932844877243042,
      "learning_rate": 0.00032585500859398073,
      "loss": 2.9587,
      "step": 108873
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0032808780670166,
      "learning_rate": 0.000325850933249192,
      "loss": 2.9888,
      "step": 108874
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9344500303268433,
      "learning_rate": 0.0003258468578995972,
      "loss": 2.9949,
      "step": 108875
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.6417808532714844,
      "learning_rate": 0.00032584278254519684,
      "loss": 3.1278,
      "step": 108876
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0926785469055176,
      "learning_rate": 0.0003258387071859919,
      "loss": 3.0666,
      "step": 108877
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.01252818107605,
      "learning_rate": 0.0003258346318219831,
      "loss": 3.2211,
      "step": 108878
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7955247163772583,
      "learning_rate": 0.0003258305564531711,
      "loss": 3.1247,
      "step": 108879
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0472662448883057,
      "learning_rate": 0.0003258264810795566,
      "loss": 2.8674,
      "step": 108880
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9245020151138306,
      "learning_rate": 0.0003258224057011406,
      "loss": 2.9165,
      "step": 108881
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8183162212371826,
      "learning_rate": 0.00032581833031792375,
      "loss": 3.1413,
      "step": 108882
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6065596342086792,
      "learning_rate": 0.0003258142549299067,
      "loss": 2.9651,
      "step": 108883
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7686774730682373,
      "learning_rate": 0.0003258101795370904,
      "loss": 3.0772,
      "step": 108884
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7505507469177246,
      "learning_rate": 0.0003258061041394754,
      "loss": 3.0323,
      "step": 108885
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8257519006729126,
      "learning_rate": 0.0003258020287370625,
      "loss": 2.9757,
      "step": 108886
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.89652681350708,
      "learning_rate": 0.0003257979533298527,
      "loss": 2.9425,
      "step": 108887
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2407023906707764,
      "learning_rate": 0.00032579387791784644,
      "loss": 2.8714,
      "step": 108888
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9575623273849487,
      "learning_rate": 0.00032578980250104467,
      "loss": 3.1132,
      "step": 108889
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.71664297580719,
      "learning_rate": 0.00032578572707944807,
      "loss": 2.9047,
      "step": 108890
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.610168218612671,
      "learning_rate": 0.00032578165165305745,
      "loss": 3.2464,
      "step": 108891
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.43411922454834,
      "learning_rate": 0.00032577757622187346,
      "loss": 3.1755,
      "step": 108892
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9601882696151733,
      "learning_rate": 0.00032577350078589703,
      "loss": 3.1913,
      "step": 108893
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.429335117340088,
      "learning_rate": 0.0003257694253451287,
      "loss": 3.1015,
      "step": 108894
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.076122760772705,
      "learning_rate": 0.00032576534989956944,
      "loss": 2.9738,
      "step": 108895
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7313051223754883,
      "learning_rate": 0.0003257612744492199,
      "loss": 3.0593,
      "step": 108896
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7327425479888916,
      "learning_rate": 0.0003257571989940808,
      "loss": 3.0175,
      "step": 108897
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1072728633880615,
      "learning_rate": 0.000325753123534153,
      "loss": 2.9881,
      "step": 108898
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.547522783279419,
      "learning_rate": 0.0003257490480694372,
      "loss": 2.8451,
      "step": 108899
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.919121265411377,
      "learning_rate": 0.0003257449725999342,
      "loss": 2.8879,
      "step": 108900
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2884304523468018,
      "learning_rate": 0.00032574089712564464,
      "loss": 2.9415,
      "step": 108901
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6712541580200195,
      "learning_rate": 0.00032573682164656944,
      "loss": 2.8381,
      "step": 108902
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1096251010894775,
      "learning_rate": 0.0003257327461627092,
      "loss": 2.9205,
      "step": 108903
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.234719753265381,
      "learning_rate": 0.0003257286706740648,
      "loss": 2.9135,
      "step": 108904
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.4827656745910645,
      "learning_rate": 0.0003257245951806369,
      "loss": 3.2352,
      "step": 108905
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9361039400100708,
      "learning_rate": 0.0003257205196824264,
      "loss": 2.88,
      "step": 108906
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9565556049346924,
      "learning_rate": 0.00032571644417943394,
      "loss": 3.1521,
      "step": 108907
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8712023496627808,
      "learning_rate": 0.0003257123686716603,
      "loss": 2.8589,
      "step": 108908
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.390872001647949,
      "learning_rate": 0.00032570829315910633,
      "loss": 2.9092,
      "step": 108909
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.506023406982422,
      "learning_rate": 0.00032570421764177256,
      "loss": 2.9731,
      "step": 108910
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1552727222442627,
      "learning_rate": 0.00032570014211965996,
      "loss": 2.9407,
      "step": 108911
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1465306282043457,
      "learning_rate": 0.0003256960665927692,
      "loss": 3.1165,
      "step": 108912
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.195889472961426,
      "learning_rate": 0.00032569199106110105,
      "loss": 2.9358,
      "step": 108913
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6865954399108887,
      "learning_rate": 0.0003256879155246563,
      "loss": 2.9836,
      "step": 108914
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.140251874923706,
      "learning_rate": 0.0003256838399834357,
      "loss": 3.1048,
      "step": 108915
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.820142388343811,
      "learning_rate": 0.00032567976443743994,
      "loss": 3.1993,
      "step": 108916
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9198837280273438,
      "learning_rate": 0.00032567568888666986,
      "loss": 2.9134,
      "step": 108917
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1282920837402344,
      "learning_rate": 0.00032567161333112615,
      "loss": 2.7467,
      "step": 108918
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0738348960876465,
      "learning_rate": 0.0003256675377708096,
      "loss": 2.8197,
      "step": 108919
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8132675886154175,
      "learning_rate": 0.00032566346220572103,
      "loss": 2.7216,
      "step": 108920
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7803349494934082,
      "learning_rate": 0.00032565938663586114,
      "loss": 2.9329,
      "step": 108921
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0166609287261963,
      "learning_rate": 0.0003256553110612306,
      "loss": 2.9235,
      "step": 108922
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8041250705718994,
      "learning_rate": 0.00032565123548183037,
      "loss": 2.9285,
      "step": 108923
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0495352745056152,
      "learning_rate": 0.000325647159897661,
      "loss": 2.9029,
      "step": 108924
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6265290975570679,
      "learning_rate": 0.0003256430843087234,
      "loss": 3.1938,
      "step": 108925
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9751594066619873,
      "learning_rate": 0.00032563900871501825,
      "loss": 2.866,
      "step": 108926
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1715221405029297,
      "learning_rate": 0.00032563493311654626,
      "loss": 2.7022,
      "step": 108927
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9624638557434082,
      "learning_rate": 0.0003256308575133083,
      "loss": 2.8975,
      "step": 108928
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.761948585510254,
      "learning_rate": 0.00032562678190530507,
      "loss": 3.0276,
      "step": 108929
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.785170555114746,
      "learning_rate": 0.00032562270629253733,
      "loss": 3.0036,
      "step": 108930
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.5625940561294556,
      "learning_rate": 0.0003256186306750059,
      "loss": 3.0448,
      "step": 108931
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.859281063079834,
      "learning_rate": 0.00032561455505271137,
      "loss": 2.9012,
      "step": 108932
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.969678282737732,
      "learning_rate": 0.00032561047942565477,
      "loss": 3.0078,
      "step": 108933
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.16978120803833,
      "learning_rate": 0.00032560640379383657,
      "loss": 2.9975,
      "step": 108934
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0144453048706055,
      "learning_rate": 0.00032560232815725764,
      "loss": 2.8107,
      "step": 108935
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.941275954246521,
      "learning_rate": 0.0003255982525159189,
      "loss": 2.9758,
      "step": 108936
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.217679977416992,
      "learning_rate": 0.00032559417686982083,
      "loss": 2.953,
      "step": 108937
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.303522825241089,
      "learning_rate": 0.00032559010121896437,
      "loss": 3.1716,
      "step": 108938
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3693721294403076,
      "learning_rate": 0.0003255860255633503,
      "loss": 3.2049,
      "step": 108939
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.337287187576294,
      "learning_rate": 0.00032558194990297916,
      "loss": 2.9867,
      "step": 108940
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0306589603424072,
      "learning_rate": 0.0003255778742378519,
      "loss": 2.8824,
      "step": 108941
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2721269130706787,
      "learning_rate": 0.0003255737985679693,
      "loss": 3.0759,
      "step": 108942
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.422903537750244,
      "learning_rate": 0.000325569722893332,
      "loss": 2.9762,
      "step": 108943
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.9737095832824707,
      "learning_rate": 0.00032556564721394074,
      "loss": 2.9531,
      "step": 108944
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.5947930812835693,
      "learning_rate": 0.0003255615715297965,
      "loss": 3.0363,
      "step": 108945
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0089170932769775,
      "learning_rate": 0.0003255574958408997,
      "loss": 3.2943,
      "step": 108946
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.2257511615753174,
      "learning_rate": 0.0003255534201472514,
      "loss": 2.8472,
      "step": 108947
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.854762077331543,
      "learning_rate": 0.00032554934444885226,
      "loss": 2.8517,
      "step": 108948
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.057185649871826,
      "learning_rate": 0.00032554526874570294,
      "loss": 3.0031,
      "step": 108949
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.61027193069458,
      "learning_rate": 0.0003255411930378042,
      "loss": 3.1014,
      "step": 108950
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2567288875579834,
      "learning_rate": 0.00032553711732515706,
      "loss": 3.0705,
      "step": 108951
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3532533645629883,
      "learning_rate": 0.00032553304160776197,
      "loss": 3.0717,
      "step": 108952
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7924271821975708,
      "learning_rate": 0.0003255289658856198,
      "loss": 2.8475,
      "step": 108953
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2342965602874756,
      "learning_rate": 0.0003255248901587314,
      "loss": 3.0449,
      "step": 108954
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9324311017990112,
      "learning_rate": 0.0003255208144270973,
      "loss": 2.9552,
      "step": 108955
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.5356738567352295,
      "learning_rate": 0.00032551673869071854,
      "loss": 3.2541,
      "step": 108956
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8398128747940063,
      "learning_rate": 0.00032551266294959563,
      "loss": 2.7965,
      "step": 108957
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2700324058532715,
      "learning_rate": 0.0003255085872037295,
      "loss": 2.9619,
      "step": 108958
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.9487345218658447,
      "learning_rate": 0.00032550451145312085,
      "loss": 2.727,
      "step": 108959
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8261653184890747,
      "learning_rate": 0.00032550043569777045,
      "loss": 3.0731,
      "step": 108960
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0958003997802734,
      "learning_rate": 0.00032549635993767893,
      "loss": 3.1362,
      "step": 108961
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.532705783843994,
      "learning_rate": 0.0003254922841728472,
      "loss": 2.9648,
      "step": 108962
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7966684103012085,
      "learning_rate": 0.00032548820840327605,
      "loss": 2.9002,
      "step": 108963
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9272822141647339,
      "learning_rate": 0.00032548413262896604,
      "loss": 2.9332,
      "step": 108964
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.822599172592163,
      "learning_rate": 0.00032548005684991806,
      "loss": 2.998,
      "step": 108965
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9590568542480469,
      "learning_rate": 0.000325475981066133,
      "loss": 2.8754,
      "step": 108966
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.4275522232055664,
      "learning_rate": 0.0003254719052776113,
      "loss": 3.1514,
      "step": 108967
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8741166591644287,
      "learning_rate": 0.000325467829484354,
      "loss": 2.7512,
      "step": 108968
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.969022512435913,
      "learning_rate": 0.00032546375368636176,
      "loss": 3.213,
      "step": 108969
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8506157398223877,
      "learning_rate": 0.00032545967788363524,
      "loss": 3.0593,
      "step": 108970
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.8919076919555664,
      "learning_rate": 0.0003254556020761753,
      "loss": 2.8586,
      "step": 108971
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9717071056365967,
      "learning_rate": 0.0003254515262639828,
      "loss": 2.9016,
      "step": 108972
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.120941162109375,
      "learning_rate": 0.00032544745044705826,
      "loss": 3.1939,
      "step": 108973
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.3292441368103027,
      "learning_rate": 0.00032544337462540255,
      "loss": 3.0499,
      "step": 108974
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.2166085243225098,
      "learning_rate": 0.0003254392987990166,
      "loss": 2.9155,
      "step": 108975
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9260364770889282,
      "learning_rate": 0.0003254352229679008,
      "loss": 3.2044,
      "step": 108976
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.8296127319335938,
      "learning_rate": 0.00032543114713205615,
      "loss": 3.0327,
      "step": 108977
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.5619518756866455,
      "learning_rate": 0.00032542707129148355,
      "loss": 2.6793,
      "step": 108978
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.3953046798706055,
      "learning_rate": 0.0003254229954461834,
      "loss": 3.1255,
      "step": 108979
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6124078035354614,
      "learning_rate": 0.00032541891959615666,
      "loss": 2.975,
      "step": 108980
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0881340503692627,
      "learning_rate": 0.00032541484374140416,
      "loss": 2.926,
      "step": 108981
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0596232414245605,
      "learning_rate": 0.00032541076788192644,
      "loss": 2.9058,
      "step": 108982
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0623433589935303,
      "learning_rate": 0.0003254066920177244,
      "loss": 2.9743,
      "step": 108983
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7978324890136719,
      "learning_rate": 0.00032540261614879884,
      "loss": 2.9432,
      "step": 108984
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.396300792694092,
      "learning_rate": 0.0003253985402751504,
      "loss": 2.8278,
      "step": 108985
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.864928960800171,
      "learning_rate": 0.00032539446439677983,
      "loss": 3.0664,
      "step": 108986
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7609834671020508,
      "learning_rate": 0.0003253903885136881,
      "loss": 3.1395,
      "step": 108987
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6662577390670776,
      "learning_rate": 0.00032538631262587575,
      "loss": 2.9585,
      "step": 108988
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7576956748962402,
      "learning_rate": 0.0003253822367333435,
      "loss": 2.9735,
      "step": 108989
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.0138845443725586,
      "learning_rate": 0.0003253781608360924,
      "loss": 3.0338,
      "step": 108990
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.7925798892974854,
      "learning_rate": 0.00032537408493412296,
      "loss": 2.8191,
      "step": 108991
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.885040283203125,
      "learning_rate": 0.00032537000902743593,
      "loss": 3.4162,
      "step": 108992
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.098886728286743,
      "learning_rate": 0.00032536593311603213,
      "loss": 2.9126,
      "step": 108993
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.7495203018188477,
      "learning_rate": 0.00032536185719991243,
      "loss": 3.0508,
      "step": 108994
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0406529903411865,
      "learning_rate": 0.00032535778127907737,
      "loss": 2.8859,
      "step": 108995
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9296951293945312,
      "learning_rate": 0.0003253537053535278,
      "loss": 2.9781,
      "step": 108996
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2277212142944336,
      "learning_rate": 0.00032534962942326465,
      "loss": 2.9776,
      "step": 108997
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.6903817653656006,
      "learning_rate": 0.00032534555348828846,
      "loss": 3.0848,
      "step": 108998
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6830706596374512,
      "learning_rate": 0.0003253414775486,
      "loss": 2.6898,
      "step": 108999
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7820358276367188,
      "learning_rate": 0.0003253374016042001,
      "loss": 3.1575,
      "step": 109000
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9899866580963135,
      "learning_rate": 0.00032533332565508956,
      "loss": 3.1107,
      "step": 109001
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.416229248046875,
      "learning_rate": 0.000325329249701269,
      "loss": 3.1402,
      "step": 109002
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.7179927825927734,
      "learning_rate": 0.0003253251737427393,
      "loss": 3.1194,
      "step": 109003
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.949771761894226,
      "learning_rate": 0.00032532109777950113,
      "loss": 2.9918,
      "step": 109004
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.366899013519287,
      "learning_rate": 0.00032531702181155537,
      "loss": 2.8113,
      "step": 109005
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2098655700683594,
      "learning_rate": 0.0003253129458389026,
      "loss": 2.8804,
      "step": 109006
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9654386043548584,
      "learning_rate": 0.0003253088698615437,
      "loss": 2.8301,
      "step": 109007
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.5336732864379883,
      "learning_rate": 0.0003253047938794795,
      "loss": 2.7847,
      "step": 109008
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0639877319335938,
      "learning_rate": 0.0003253007178927106,
      "loss": 3.0368,
      "step": 109009
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8018364906311035,
      "learning_rate": 0.0003252966419012377,
      "loss": 3.0082,
      "step": 109010
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7234479188919067,
      "learning_rate": 0.0003252925659050618,
      "loss": 3.0842,
      "step": 109011
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9390554428100586,
      "learning_rate": 0.0003252884899041836,
      "loss": 3.0026,
      "step": 109012
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.980516791343689,
      "learning_rate": 0.00032528441389860366,
      "loss": 3.043,
      "step": 109013
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.284642457962036,
      "learning_rate": 0.0003252803378883229,
      "loss": 3.0909,
      "step": 109014
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1216609477996826,
      "learning_rate": 0.0003252762618733421,
      "loss": 2.7329,
      "step": 109015
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.0178964138031006,
      "learning_rate": 0.0003252721858536619,
      "loss": 3.0808,
      "step": 109016
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9483824968338013,
      "learning_rate": 0.0003252681098292832,
      "loss": 2.9289,
      "step": 109017
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.709955096244812,
      "learning_rate": 0.00032526403380020663,
      "loss": 3.247,
      "step": 109018
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.888124704360962,
      "learning_rate": 0.000325259957766433,
      "loss": 3.0408,
      "step": 109019
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.974003553390503,
      "learning_rate": 0.00032525588172796304,
      "loss": 2.8711,
      "step": 109020
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.131505250930786,
      "learning_rate": 0.00032525180568479767,
      "loss": 3.057,
      "step": 109021
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.069776773452759,
      "learning_rate": 0.0003252477296369374,
      "loss": 3.0214,
      "step": 109022
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8902887105941772,
      "learning_rate": 0.0003252436535843831,
      "loss": 3.0191,
      "step": 109023
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3998637199401855,
      "learning_rate": 0.00032523957752713555,
      "loss": 2.8661,
      "step": 109024
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9797168970108032,
      "learning_rate": 0.0003252355014651955,
      "loss": 2.8679,
      "step": 109025
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9165505170822144,
      "learning_rate": 0.00032523142539856364,
      "loss": 2.9252,
      "step": 109026
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8334405422210693,
      "learning_rate": 0.00032522734932724083,
      "loss": 3.0695,
      "step": 109027
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.123837471008301,
      "learning_rate": 0.0003252232732512277,
      "loss": 2.8101,
      "step": 109028
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8773577213287354,
      "learning_rate": 0.00032521919717052515,
      "loss": 3.1916,
      "step": 109029
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.828574299812317,
      "learning_rate": 0.00032521512108513395,
      "loss": 3.1364,
      "step": 109030
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0754263401031494,
      "learning_rate": 0.00032521104499505467,
      "loss": 2.8848,
      "step": 109031
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7830066680908203,
      "learning_rate": 0.0003252069689002882,
      "loss": 2.8406,
      "step": 109032
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8232225179672241,
      "learning_rate": 0.0003252028928008354,
      "loss": 3.1003,
      "step": 109033
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.674957036972046,
      "learning_rate": 0.0003251988166966968,
      "loss": 2.8999,
      "step": 109034
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9999923706054688,
      "learning_rate": 0.0003251947405878732,
      "loss": 2.7961,
      "step": 109035
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8264273405075073,
      "learning_rate": 0.0003251906644743655,
      "loss": 2.7277,
      "step": 109036
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.023104667663574,
      "learning_rate": 0.0003251865883561744,
      "loss": 3.254,
      "step": 109037
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6917237043380737,
      "learning_rate": 0.00032518251223330057,
      "loss": 3.2769,
      "step": 109038
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8586214780807495,
      "learning_rate": 0.00032517843610574496,
      "loss": 2.873,
      "step": 109039
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9883778095245361,
      "learning_rate": 0.0003251743599735081,
      "loss": 2.8271,
      "step": 109040
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.862975001335144,
      "learning_rate": 0.00032517028383659085,
      "loss": 2.9515,
      "step": 109041
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8457098007202148,
      "learning_rate": 0.00032516620769499396,
      "loss": 2.8065,
      "step": 109042
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.296365976333618,
      "learning_rate": 0.00032516213154871825,
      "loss": 2.9237,
      "step": 109043
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2331864833831787,
      "learning_rate": 0.00032515805539776437,
      "loss": 3.0673,
      "step": 109044
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.631280541419983,
      "learning_rate": 0.0003251539792421332,
      "loss": 3.1355,
      "step": 109045
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.0625219345092773,
      "learning_rate": 0.00032514990308182536,
      "loss": 2.9431,
      "step": 109046
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.927540898323059,
      "learning_rate": 0.0003251458269168417,
      "loss": 2.7643,
      "step": 109047
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8443416357040405,
      "learning_rate": 0.00032514175074718304,
      "loss": 2.9618,
      "step": 109048
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6430349349975586,
      "learning_rate": 0.00032513767457284997,
      "loss": 2.9441,
      "step": 109049
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.052154779434204,
      "learning_rate": 0.0003251335983938433,
      "loss": 3.0905,
      "step": 109050
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8758193254470825,
      "learning_rate": 0.0003251295222101639,
      "loss": 3.0393,
      "step": 109051
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.819267988204956,
      "learning_rate": 0.00032512544602181237,
      "loss": 2.8892,
      "step": 109052
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.475227117538452,
      "learning_rate": 0.0003251213698287896,
      "loss": 2.9577,
      "step": 109053
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7024474143981934,
      "learning_rate": 0.00032511729363109636,
      "loss": 3.0678,
      "step": 109054
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.9183144569396973,
      "learning_rate": 0.00032511321742873325,
      "loss": 2.9909,
      "step": 109055
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.4926321506500244,
      "learning_rate": 0.0003251091412217011,
      "loss": 2.8614,
      "step": 109056
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.172166109085083,
      "learning_rate": 0.00032510506501000076,
      "loss": 2.8758,
      "step": 109057
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6625458002090454,
      "learning_rate": 0.00032510098879363284,
      "loss": 3.1817,
      "step": 109058
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8396083116531372,
      "learning_rate": 0.00032509691257259814,
      "loss": 2.8783,
      "step": 109059
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0347564220428467,
      "learning_rate": 0.0003250928363468976,
      "loss": 3.0946,
      "step": 109060
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.603013753890991,
      "learning_rate": 0.0003250887601165317,
      "loss": 2.9773,
      "step": 109061
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.76569664478302,
      "learning_rate": 0.00032508468388150135,
      "loss": 2.9102,
      "step": 109062
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.421091318130493,
      "learning_rate": 0.00032508060764180726,
      "loss": 2.8653,
      "step": 109063
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.713201642036438,
      "learning_rate": 0.0003250765313974503,
      "loss": 2.9947,
      "step": 109064
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0350964069366455,
      "learning_rate": 0.0003250724551484311,
      "loss": 3.0502,
      "step": 109065
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3950157165527344,
      "learning_rate": 0.00032506837889475045,
      "loss": 2.9459,
      "step": 109066
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.4454848766326904,
      "learning_rate": 0.0003250643026364091,
      "loss": 3.2751,
      "step": 109067
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.9976651668548584,
      "learning_rate": 0.0003250602263734078,
      "loss": 3.0177,
      "step": 109068
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8381785154342651,
      "learning_rate": 0.00032505615010574744,
      "loss": 2.861,
      "step": 109069
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.249572277069092,
      "learning_rate": 0.0003250520738334286,
      "loss": 3.1452,
      "step": 109070
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.1275384426116943,
      "learning_rate": 0.0003250479975564521,
      "loss": 2.9959,
      "step": 109071
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.8491272926330566,
      "learning_rate": 0.00032504392127481865,
      "loss": 2.9856,
      "step": 109072
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.913874626159668,
      "learning_rate": 0.00032503984498852913,
      "loss": 3.0386,
      "step": 109073
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.785340666770935,
      "learning_rate": 0.00032503576869758426,
      "loss": 2.6778,
      "step": 109074
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.375486373901367,
      "learning_rate": 0.0003250316924019847,
      "loss": 3.0376,
      "step": 109075
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.659257173538208,
      "learning_rate": 0.00032502761610173135,
      "loss": 2.601,
      "step": 109076
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7218906879425049,
      "learning_rate": 0.00032502353979682483,
      "loss": 3.3148,
      "step": 109077
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.991515040397644,
      "learning_rate": 0.000325019463487266,
      "loss": 3.1889,
      "step": 109078
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7386605739593506,
      "learning_rate": 0.00032501538717305556,
      "loss": 3.0388,
      "step": 109079
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2638213634490967,
      "learning_rate": 0.00032501131085419427,
      "loss": 2.8242,
      "step": 109080
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.490825653076172,
      "learning_rate": 0.0003250072345306829,
      "loss": 2.8774,
      "step": 109081
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6291948556900024,
      "learning_rate": 0.00032500315820252224,
      "loss": 3.13,
      "step": 109082
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.334754467010498,
      "learning_rate": 0.000324999081869713,
      "loss": 2.9933,
      "step": 109083
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.169947385787964,
      "learning_rate": 0.000324995005532256,
      "loss": 3.0403,
      "step": 109084
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1623120307922363,
      "learning_rate": 0.0003249909291901519,
      "loss": 2.9855,
      "step": 109085
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0457077026367188,
      "learning_rate": 0.00032498685284340157,
      "loss": 3.1483,
      "step": 109086
    },
    {
      "epoch": 1.42,
      "grad_norm": 4.369185924530029,
      "learning_rate": 0.00032498277649200565,
      "loss": 3.0595,
      "step": 109087
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.44052791595459,
      "learning_rate": 0.000324978700135965,
      "loss": 2.8337,
      "step": 109088
    },
    {
      "epoch": 1.42,
      "grad_norm": 4.067627429962158,
      "learning_rate": 0.0003249746237752804,
      "loss": 2.9409,
      "step": 109089
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.5986411571502686,
      "learning_rate": 0.00032497054740995244,
      "loss": 3.0935,
      "step": 109090
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.119828939437866,
      "learning_rate": 0.000324966471039982,
      "loss": 3.0128,
      "step": 109091
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.5688555240631104,
      "learning_rate": 0.0003249623946653698,
      "loss": 2.9747,
      "step": 109092
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.446812391281128,
      "learning_rate": 0.00032495831828611673,
      "loss": 3.2856,
      "step": 109093
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.464752435684204,
      "learning_rate": 0.00032495424190222337,
      "loss": 3.001,
      "step": 109094
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.184783458709717,
      "learning_rate": 0.0003249501655136906,
      "loss": 2.9473,
      "step": 109095
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1619460582733154,
      "learning_rate": 0.000324946089120519,
      "loss": 3.0352,
      "step": 109096
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9364763498306274,
      "learning_rate": 0.0003249420127227096,
      "loss": 3.0942,
      "step": 109097
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0269501209259033,
      "learning_rate": 0.0003249379363202629,
      "loss": 2.9189,
      "step": 109098
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9306915998458862,
      "learning_rate": 0.0003249338599131798,
      "loss": 3.0905,
      "step": 109099
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.040403127670288,
      "learning_rate": 0.00032492978350146104,
      "loss": 3.0888,
      "step": 109100
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8818188905715942,
      "learning_rate": 0.0003249257070851073,
      "loss": 2.9027,
      "step": 109101
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3136918544769287,
      "learning_rate": 0.00032492163066411944,
      "loss": 2.9583,
      "step": 109102
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.762352228164673,
      "learning_rate": 0.00032491755423849823,
      "loss": 3.0184,
      "step": 109103
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.0156946182250977,
      "learning_rate": 0.00032491347780824433,
      "loss": 2.9393,
      "step": 109104
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3286473751068115,
      "learning_rate": 0.0003249094013733585,
      "loss": 2.9255,
      "step": 109105
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.835815668106079,
      "learning_rate": 0.0003249053249338416,
      "loss": 3.0558,
      "step": 109106
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.5920729637145996,
      "learning_rate": 0.0003249012484896943,
      "loss": 3.188,
      "step": 109107
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7104709148406982,
      "learning_rate": 0.00032489717204091747,
      "loss": 2.7878,
      "step": 109108
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.5688652992248535,
      "learning_rate": 0.00032489309558751174,
      "loss": 3.1609,
      "step": 109109
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2620046138763428,
      "learning_rate": 0.00032488901912947787,
      "loss": 2.8688,
      "step": 109110
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8382922410964966,
      "learning_rate": 0.0003248849426668167,
      "loss": 3.0493,
      "step": 109111
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8815360069274902,
      "learning_rate": 0.0003248808661995289,
      "loss": 3.0431,
      "step": 109112
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7173511981964111,
      "learning_rate": 0.00032487678972761537,
      "loss": 2.9162,
      "step": 109113
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.8090860843658447,
      "learning_rate": 0.0003248727132510767,
      "loss": 2.8069,
      "step": 109114
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0454368591308594,
      "learning_rate": 0.00032486863676991373,
      "loss": 2.98,
      "step": 109115
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.693995475769043,
      "learning_rate": 0.0003248645602841272,
      "loss": 3.004,
      "step": 109116
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7110435962677002,
      "learning_rate": 0.0003248604837937179,
      "loss": 3.0217,
      "step": 109117
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0163967609405518,
      "learning_rate": 0.00032485640729868663,
      "loss": 3.0898,
      "step": 109118
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.300152540206909,
      "learning_rate": 0.000324852330799034,
      "loss": 2.9754,
      "step": 109119
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9187133312225342,
      "learning_rate": 0.00032484825429476084,
      "loss": 2.8246,
      "step": 109120
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9015003442764282,
      "learning_rate": 0.000324844177785868,
      "loss": 3.1855,
      "step": 109121
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.143416404724121,
      "learning_rate": 0.00032484010127235613,
      "loss": 3.0203,
      "step": 109122
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.126020908355713,
      "learning_rate": 0.000324836024754226,
      "loss": 3.1537,
      "step": 109123
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9174935817718506,
      "learning_rate": 0.00032483194823147845,
      "loss": 3.0556,
      "step": 109124
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6865941286087036,
      "learning_rate": 0.00032482787170411403,
      "loss": 3.2379,
      "step": 109125
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.6613028049468994,
      "learning_rate": 0.0003248237951721337,
      "loss": 3.1419,
      "step": 109126
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9555014371871948,
      "learning_rate": 0.00032481971863553824,
      "loss": 3.0098,
      "step": 109127
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.944293737411499,
      "learning_rate": 0.00032481564209432823,
      "loss": 3.0504,
      "step": 109128
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8697749376296997,
      "learning_rate": 0.00032481156554850456,
      "loss": 2.9861,
      "step": 109129
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.884577989578247,
      "learning_rate": 0.00032480748899806793,
      "loss": 3.0019,
      "step": 109130
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8940181732177734,
      "learning_rate": 0.0003248034124430192,
      "loss": 2.7992,
      "step": 109131
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.651072382926941,
      "learning_rate": 0.00032479933588335896,
      "loss": 2.7543,
      "step": 109132
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9410876035690308,
      "learning_rate": 0.0003247952593190881,
      "loss": 3.1487,
      "step": 109133
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.696875810623169,
      "learning_rate": 0.0003247911827502073,
      "loss": 3.0944,
      "step": 109134
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9263347387313843,
      "learning_rate": 0.00032478710617671737,
      "loss": 2.9098,
      "step": 109135
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8374733924865723,
      "learning_rate": 0.000324783029598619,
      "loss": 3.2033,
      "step": 109136
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9216850996017456,
      "learning_rate": 0.0003247789530159131,
      "loss": 2.9408,
      "step": 109137
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0804033279418945,
      "learning_rate": 0.0003247748764286002,
      "loss": 2.7266,
      "step": 109138
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8792320489883423,
      "learning_rate": 0.00032477079983668126,
      "loss": 3.2016,
      "step": 109139
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.109447956085205,
      "learning_rate": 0.000324766723240157,
      "loss": 3.1532,
      "step": 109140
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.5653605461120605,
      "learning_rate": 0.00032476264663902805,
      "loss": 2.8168,
      "step": 109141
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.755349040031433,
      "learning_rate": 0.0003247585700332953,
      "loss": 3.2706,
      "step": 109142
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7982317209243774,
      "learning_rate": 0.0003247544934229595,
      "loss": 3.0345,
      "step": 109143
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9156676530838013,
      "learning_rate": 0.00032475041680802126,
      "loss": 2.6981,
      "step": 109144
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.508791923522949,
      "learning_rate": 0.00032474634018848155,
      "loss": 3.0831,
      "step": 109145
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.9424450397491455,
      "learning_rate": 0.000324742263564341,
      "loss": 3.0777,
      "step": 109146
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1912877559661865,
      "learning_rate": 0.00032473818693560044,
      "loss": 3.067,
      "step": 109147
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.699558973312378,
      "learning_rate": 0.00032473411030226046,
      "loss": 3.1801,
      "step": 109148
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.432269334793091,
      "learning_rate": 0.0003247300336643221,
      "loss": 2.8711,
      "step": 109149
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.6585257053375244,
      "learning_rate": 0.0003247259570217858,
      "loss": 2.9552,
      "step": 109150
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.510734796524048,
      "learning_rate": 0.00032472188037465256,
      "loss": 3.249,
      "step": 109151
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.783276081085205,
      "learning_rate": 0.0003247178037229231,
      "loss": 3.0404,
      "step": 109152
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3217411041259766,
      "learning_rate": 0.00032471372706659814,
      "loss": 2.9299,
      "step": 109153
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.6274874210357666,
      "learning_rate": 0.0003247096504056784,
      "loss": 3.0359,
      "step": 109154
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.6483771800994873,
      "learning_rate": 0.0003247055737401646,
      "loss": 3.019,
      "step": 109155
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6855744123458862,
      "learning_rate": 0.00032470149707005763,
      "loss": 2.9073,
      "step": 109156
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0636281967163086,
      "learning_rate": 0.00032469742039535816,
      "loss": 2.9652,
      "step": 109157
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.6824746131896973,
      "learning_rate": 0.00032469334371606703,
      "loss": 2.989,
      "step": 109158
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.619916558265686,
      "learning_rate": 0.00032468926703218485,
      "loss": 3.0356,
      "step": 109159
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8916512727737427,
      "learning_rate": 0.0003246851903437125,
      "loss": 3.0735,
      "step": 109160
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0711841583251953,
      "learning_rate": 0.00032468111365065074,
      "loss": 3.0555,
      "step": 109161
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7557038068771362,
      "learning_rate": 0.0003246770369530003,
      "loss": 2.8835,
      "step": 109162
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.5983508825302124,
      "learning_rate": 0.0003246729602507619,
      "loss": 2.8443,
      "step": 109163
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.4355124235153198,
      "learning_rate": 0.0003246688835439364,
      "loss": 3.1222,
      "step": 109164
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.5348142385482788,
      "learning_rate": 0.0003246648068325244,
      "loss": 3.0732,
      "step": 109165
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0150058269500732,
      "learning_rate": 0.00032466073011652675,
      "loss": 3.0355,
      "step": 109166
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7238303422927856,
      "learning_rate": 0.0003246566533959442,
      "loss": 3.3336,
      "step": 109167
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7867079973220825,
      "learning_rate": 0.00032465257667077757,
      "loss": 2.9467,
      "step": 109168
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.5703489780426025,
      "learning_rate": 0.00032464849994102757,
      "loss": 2.7363,
      "step": 109169
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6985198259353638,
      "learning_rate": 0.00032464442320669497,
      "loss": 3.1029,
      "step": 109170
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.53189754486084,
      "learning_rate": 0.00032464034646778036,
      "loss": 3.1424,
      "step": 109171
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.849967360496521,
      "learning_rate": 0.00032463626972428474,
      "loss": 3.1734,
      "step": 109172
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.5330063104629517,
      "learning_rate": 0.00032463219297620873,
      "loss": 3.1103,
      "step": 109173
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2573368549346924,
      "learning_rate": 0.0003246281162235532,
      "loss": 2.8776,
      "step": 109174
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8889423608779907,
      "learning_rate": 0.0003246240394663188,
      "loss": 3.2096,
      "step": 109175
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.916593313217163,
      "learning_rate": 0.00032461996270450634,
      "loss": 3.0502,
      "step": 109176
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.573683977127075,
      "learning_rate": 0.00032461588593811654,
      "loss": 3.184,
      "step": 109177
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.5827488899230957,
      "learning_rate": 0.00032461180916715026,
      "loss": 3.1177,
      "step": 109178
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8284305334091187,
      "learning_rate": 0.00032460773239160806,
      "loss": 3.0123,
      "step": 109179
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.084275245666504,
      "learning_rate": 0.0003246036556114909,
      "loss": 3.0961,
      "step": 109180
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7243331670761108,
      "learning_rate": 0.0003245995788267995,
      "loss": 2.9528,
      "step": 109181
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9791502952575684,
      "learning_rate": 0.00032459550203753443,
      "loss": 3.032,
      "step": 109182
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0802536010742188,
      "learning_rate": 0.00032459142524369664,
      "loss": 2.9286,
      "step": 109183
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.108872413635254,
      "learning_rate": 0.0003245873484452869,
      "loss": 3.1068,
      "step": 109184
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.861700415611267,
      "learning_rate": 0.0003245832716423059,
      "loss": 3.1624,
      "step": 109185
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8746365308761597,
      "learning_rate": 0.00032457919483475437,
      "loss": 2.9768,
      "step": 109186
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.2291440963745117,
      "learning_rate": 0.0003245751180226331,
      "loss": 2.9254,
      "step": 109187
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1210336685180664,
      "learning_rate": 0.0003245710412059429,
      "loss": 2.8755,
      "step": 109188
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.4828217029571533,
      "learning_rate": 0.0003245669643846845,
      "loss": 3.0045,
      "step": 109189
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.200582265853882,
      "learning_rate": 0.00032456288755885847,
      "loss": 2.8352,
      "step": 109190
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2705750465393066,
      "learning_rate": 0.00032455881072846593,
      "loss": 2.9129,
      "step": 109191
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6956253051757812,
      "learning_rate": 0.00032455473389350734,
      "loss": 3.1501,
      "step": 109192
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8340965509414673,
      "learning_rate": 0.00032455065705398354,
      "loss": 2.9499,
      "step": 109193
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3978655338287354,
      "learning_rate": 0.0003245465802098954,
      "loss": 3.0678,
      "step": 109194
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.772354006767273,
      "learning_rate": 0.00032454250336124357,
      "loss": 2.9124,
      "step": 109195
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7651145458221436,
      "learning_rate": 0.00032453842650802874,
      "loss": 2.9961,
      "step": 109196
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.03174090385437,
      "learning_rate": 0.00032453434965025186,
      "loss": 3.0437,
      "step": 109197
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.48418927192688,
      "learning_rate": 0.0003245302727879135,
      "loss": 2.893,
      "step": 109198
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1222054958343506,
      "learning_rate": 0.0003245261959210146,
      "loss": 3.3057,
      "step": 109199
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1546144485473633,
      "learning_rate": 0.0003245221190495557,
      "loss": 3.1041,
      "step": 109200
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8385010957717896,
      "learning_rate": 0.00032451804217353776,
      "loss": 3.0408,
      "step": 109201
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6845366954803467,
      "learning_rate": 0.0003245139652929614,
      "loss": 2.8158,
      "step": 109202
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1625545024871826,
      "learning_rate": 0.00032450988840782745,
      "loss": 2.9597,
      "step": 109203
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.5455880165100098,
      "learning_rate": 0.00032450581151813665,
      "loss": 2.8295,
      "step": 109204
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0889642238616943,
      "learning_rate": 0.00032450173462388976,
      "loss": 2.9856,
      "step": 109205
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9202797412872314,
      "learning_rate": 0.00032449765772508754,
      "loss": 2.5927,
      "step": 109206
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.8392229080200195,
      "learning_rate": 0.00032449358082173074,
      "loss": 2.9589,
      "step": 109207
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.947167158126831,
      "learning_rate": 0.0003244895039138201,
      "loss": 3.062,
      "step": 109208
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8747910261154175,
      "learning_rate": 0.0003244854270013564,
      "loss": 3.0246,
      "step": 109209
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0514533519744873,
      "learning_rate": 0.0003244813500843405,
      "loss": 3.0501,
      "step": 109210
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.303610324859619,
      "learning_rate": 0.0003244772731627729,
      "loss": 3.1373,
      "step": 109211
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.5215696096420288,
      "learning_rate": 0.00032447319623665457,
      "loss": 2.7406,
      "step": 109212
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.738130807876587,
      "learning_rate": 0.00032446911930598623,
      "loss": 3.2024,
      "step": 109213
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8142997026443481,
      "learning_rate": 0.0003244650423707686,
      "loss": 3.0318,
      "step": 109214
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.4939074516296387,
      "learning_rate": 0.00032446096543100244,
      "loss": 2.8337,
      "step": 109215
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.770040512084961,
      "learning_rate": 0.0003244568884866886,
      "loss": 3.0911,
      "step": 109216
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7196756601333618,
      "learning_rate": 0.0003244528115378277,
      "loss": 3.1183,
      "step": 109217
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0341246128082275,
      "learning_rate": 0.00032444873458442055,
      "loss": 3.1962,
      "step": 109218
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0004971027374268,
      "learning_rate": 0.000324444657626468,
      "loss": 3.0767,
      "step": 109219
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9116307497024536,
      "learning_rate": 0.00032444058066397063,
      "loss": 2.9063,
      "step": 109220
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7401493787765503,
      "learning_rate": 0.00032443650369692927,
      "loss": 3.0621,
      "step": 109221
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.648488998413086,
      "learning_rate": 0.0003244324267253448,
      "loss": 3.0885,
      "step": 109222
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2636799812316895,
      "learning_rate": 0.00032442834974921785,
      "loss": 2.9662,
      "step": 109223
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.996808409690857,
      "learning_rate": 0.0003244242727685492,
      "loss": 3.2518,
      "step": 109224
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8971542119979858,
      "learning_rate": 0.0003244201957833397,
      "loss": 2.9386,
      "step": 109225
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2536585330963135,
      "learning_rate": 0.0003244161187935899,
      "loss": 3.0951,
      "step": 109226
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9082980155944824,
      "learning_rate": 0.0003244120417993007,
      "loss": 2.8823,
      "step": 109227
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1463849544525146,
      "learning_rate": 0.0003244079648004729,
      "loss": 2.8613,
      "step": 109228
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.039093017578125,
      "learning_rate": 0.0003244038877971072,
      "loss": 3.0406,
      "step": 109229
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.5737613439559937,
      "learning_rate": 0.00032439981078920423,
      "loss": 3.0972,
      "step": 109230
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9100755453109741,
      "learning_rate": 0.00032439573377676504,
      "loss": 2.9656,
      "step": 109231
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9426621198654175,
      "learning_rate": 0.00032439165675979015,
      "loss": 2.9853,
      "step": 109232
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6840002536773682,
      "learning_rate": 0.00032438757973828037,
      "loss": 2.9278,
      "step": 109233
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.5886465311050415,
      "learning_rate": 0.00032438350271223656,
      "loss": 2.9563,
      "step": 109234
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8731731176376343,
      "learning_rate": 0.0003243794256816593,
      "loss": 2.9337,
      "step": 109235
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1161515712738037,
      "learning_rate": 0.00032437534864654947,
      "loss": 2.9333,
      "step": 109236
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7680425643920898,
      "learning_rate": 0.00032437127160690786,
      "loss": 3.0519,
      "step": 109237
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8559147119522095,
      "learning_rate": 0.0003243671945627351,
      "loss": 3.1373,
      "step": 109238
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2887699604034424,
      "learning_rate": 0.000324363117514032,
      "loss": 2.8726,
      "step": 109239
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.854402780532837,
      "learning_rate": 0.00032435904046079945,
      "loss": 3.1338,
      "step": 109240
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8817389011383057,
      "learning_rate": 0.000324354963403038,
      "loss": 2.9352,
      "step": 109241
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.4677581787109375,
      "learning_rate": 0.00032435088634074857,
      "loss": 2.9549,
      "step": 109242
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.786005973815918,
      "learning_rate": 0.0003243468092739318,
      "loss": 3.017,
      "step": 109243
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0578067302703857,
      "learning_rate": 0.0003243427322025885,
      "loss": 2.836,
      "step": 109244
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7444956302642822,
      "learning_rate": 0.0003243386551267194,
      "loss": 2.9292,
      "step": 109245
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.675233006477356,
      "learning_rate": 0.0003243345780463254,
      "loss": 2.9778,
      "step": 109246
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0584726333618164,
      "learning_rate": 0.00032433050096140697,
      "loss": 3.0997,
      "step": 109247
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.8134677410125732,
      "learning_rate": 0.00032432642387196514,
      "loss": 3.0672,
      "step": 109248
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.055920124053955,
      "learning_rate": 0.0003243223467780006,
      "loss": 3.0214,
      "step": 109249
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.861905813217163,
      "learning_rate": 0.000324318269679514,
      "loss": 2.9986,
      "step": 109250
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1732661724090576,
      "learning_rate": 0.0003243141925765062,
      "loss": 3.1445,
      "step": 109251
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8238579034805298,
      "learning_rate": 0.000324310115468978,
      "loss": 3.0455,
      "step": 109252
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.050413131713867,
      "learning_rate": 0.00032430603835693005,
      "loss": 2.9525,
      "step": 109253
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3507578372955322,
      "learning_rate": 0.00032430196124036306,
      "loss": 2.8538,
      "step": 109254
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.779545545578003,
      "learning_rate": 0.000324297884119278,
      "loss": 3.0816,
      "step": 109255
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7641973495483398,
      "learning_rate": 0.00032429380699367546,
      "loss": 3.0823,
      "step": 109256
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1112635135650635,
      "learning_rate": 0.0003242897298635562,
      "loss": 2.6671,
      "step": 109257
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.9621872901916504,
      "learning_rate": 0.00032428565272892105,
      "loss": 3.0767,
      "step": 109258
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7986763715744019,
      "learning_rate": 0.0003242815755897708,
      "loss": 3.0093,
      "step": 109259
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.054880142211914,
      "learning_rate": 0.00032427749844610604,
      "loss": 2.7851,
      "step": 109260
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.9314115047454834,
      "learning_rate": 0.0003242734212979277,
      "loss": 3.2964,
      "step": 109261
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.2961483001708984,
      "learning_rate": 0.00032426934414523653,
      "loss": 2.9016,
      "step": 109262
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0426385402679443,
      "learning_rate": 0.00032426526698803315,
      "loss": 2.9047,
      "step": 109263
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8018614053726196,
      "learning_rate": 0.00032426118982631835,
      "loss": 3.2599,
      "step": 109264
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.5592098236083984,
      "learning_rate": 0.000324257112660093,
      "loss": 2.9946,
      "step": 109265
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.8295488357543945,
      "learning_rate": 0.00032425303548935786,
      "loss": 2.9809,
      "step": 109266
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9579497575759888,
      "learning_rate": 0.00032424895831411346,
      "loss": 2.5482,
      "step": 109267
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1001250743865967,
      "learning_rate": 0.0003242448811343608,
      "loss": 2.988,
      "step": 109268
    },
    {
      "epoch": 1.42,
      "grad_norm": 4.49371862411499,
      "learning_rate": 0.00032424080395010066,
      "loss": 2.9325,
      "step": 109269
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.2339751720428467,
      "learning_rate": 0.0003242367267613336,
      "loss": 3.0209,
      "step": 109270
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.747151494026184,
      "learning_rate": 0.00032423264956806045,
      "loss": 2.9694,
      "step": 109271
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.789592981338501,
      "learning_rate": 0.000324228572370282,
      "loss": 2.8787,
      "step": 109272
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9513707160949707,
      "learning_rate": 0.00032422449516799907,
      "loss": 2.6965,
      "step": 109273
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.9661941528320312,
      "learning_rate": 0.00032422041796121225,
      "loss": 2.9819,
      "step": 109274
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.773360013961792,
      "learning_rate": 0.0003242163407499225,
      "loss": 2.9519,
      "step": 109275
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.852391242980957,
      "learning_rate": 0.00032421226353413044,
      "loss": 3.0321,
      "step": 109276
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9188132286071777,
      "learning_rate": 0.00032420818631383685,
      "loss": 3.0974,
      "step": 109277
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.5981193780899048,
      "learning_rate": 0.00032420410908904246,
      "loss": 2.9512,
      "step": 109278
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8656376600265503,
      "learning_rate": 0.00032420003185974815,
      "loss": 3.0638,
      "step": 109279
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.197140693664551,
      "learning_rate": 0.0003241959546259546,
      "loss": 3.2577,
      "step": 109280
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.351548194885254,
      "learning_rate": 0.0003241918773876625,
      "loss": 2.923,
      "step": 109281
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0324089527130127,
      "learning_rate": 0.00032418780014487265,
      "loss": 2.9405,
      "step": 109282
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.13287091255188,
      "learning_rate": 0.000324183722897586,
      "loss": 3.0122,
      "step": 109283
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3150794506073,
      "learning_rate": 0.000324179645645803,
      "loss": 3.0605,
      "step": 109284
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8331621885299683,
      "learning_rate": 0.0003241755683895245,
      "loss": 2.9733,
      "step": 109285
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6469265222549438,
      "learning_rate": 0.00032417149112875143,
      "loss": 2.8752,
      "step": 109286
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.256639242172241,
      "learning_rate": 0.0003241674138634844,
      "loss": 2.9054,
      "step": 109287
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8658133745193481,
      "learning_rate": 0.00032416333659372406,
      "loss": 2.9679,
      "step": 109288
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8590484857559204,
      "learning_rate": 0.00032415925931947147,
      "loss": 2.992,
      "step": 109289
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7374211549758911,
      "learning_rate": 0.0003241551820407271,
      "loss": 3.1025,
      "step": 109290
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7721643447875977,
      "learning_rate": 0.00032415110475749186,
      "loss": 3.0472,
      "step": 109291
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8066349029541016,
      "learning_rate": 0.00032414702746976653,
      "loss": 2.9469,
      "step": 109292
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.258401870727539,
      "learning_rate": 0.00032414295017755174,
      "loss": 3.0803,
      "step": 109293
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9847323894500732,
      "learning_rate": 0.00032413887288084835,
      "loss": 3.1072,
      "step": 109294
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0564708709716797,
      "learning_rate": 0.0003241347955796572,
      "loss": 3.0368,
      "step": 109295
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0997185707092285,
      "learning_rate": 0.00032413071827397876,
      "loss": 2.9336,
      "step": 109296
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.958263635635376,
      "learning_rate": 0.000324126640963814,
      "loss": 2.7522,
      "step": 109297
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9018281698226929,
      "learning_rate": 0.00032412256364916373,
      "loss": 3.2857,
      "step": 109298
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1038670539855957,
      "learning_rate": 0.0003241184863300285,
      "loss": 2.967,
      "step": 109299
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9631321430206299,
      "learning_rate": 0.0003241144090064092,
      "loss": 2.996,
      "step": 109300
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.902972936630249,
      "learning_rate": 0.00032411033167830674,
      "loss": 2.9204,
      "step": 109301
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2359955310821533,
      "learning_rate": 0.00032410625434572157,
      "loss": 3.2447,
      "step": 109302
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8428914546966553,
      "learning_rate": 0.0003241021770086546,
      "loss": 3.2844,
      "step": 109303
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.041254758834839,
      "learning_rate": 0.0003240980996671066,
      "loss": 3.0997,
      "step": 109304
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.130192518234253,
      "learning_rate": 0.00032409402232107834,
      "loss": 2.8665,
      "step": 109305
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8098667860031128,
      "learning_rate": 0.00032408994497057044,
      "loss": 2.8675,
      "step": 109306
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.935837984085083,
      "learning_rate": 0.0003240858676155839,
      "loss": 3.0778,
      "step": 109307
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6941806077957153,
      "learning_rate": 0.00032408179025611927,
      "loss": 3.0955,
      "step": 109308
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.084609270095825,
      "learning_rate": 0.0003240777128921774,
      "loss": 3.1658,
      "step": 109309
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9148082733154297,
      "learning_rate": 0.000324073635523759,
      "loss": 3.1576,
      "step": 109310
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.149415969848633,
      "learning_rate": 0.0003240695581508649,
      "loss": 3.0082,
      "step": 109311
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3167941570281982,
      "learning_rate": 0.0003240654807734958,
      "loss": 2.9138,
      "step": 109312
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.613534450531006,
      "learning_rate": 0.0003240614033916525,
      "loss": 3.0297,
      "step": 109313
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.367617607116699,
      "learning_rate": 0.0003240573260053356,
      "loss": 3.2151,
      "step": 109314
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1768076419830322,
      "learning_rate": 0.0003240532486145461,
      "loss": 3.1457,
      "step": 109315
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.040449380874634,
      "learning_rate": 0.0003240491712192847,
      "loss": 3.1354,
      "step": 109316
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8187909126281738,
      "learning_rate": 0.000324045093819552,
      "loss": 2.9778,
      "step": 109317
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.153380870819092,
      "learning_rate": 0.00032404101641534885,
      "loss": 3.1495,
      "step": 109318
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1756904125213623,
      "learning_rate": 0.00032403693900667617,
      "loss": 2.7941,
      "step": 109319
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.058762311935425,
      "learning_rate": 0.00032403286159353445,
      "loss": 2.7439,
      "step": 109320
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.343630075454712,
      "learning_rate": 0.0003240287841759246,
      "loss": 2.7882,
      "step": 109321
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.7200865745544434,
      "learning_rate": 0.00032402470675384735,
      "loss": 3.0746,
      "step": 109322
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3668365478515625,
      "learning_rate": 0.0003240206293273034,
      "loss": 2.9761,
      "step": 109323
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9048237800598145,
      "learning_rate": 0.0003240165518962936,
      "loss": 3.1328,
      "step": 109324
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.30118727684021,
      "learning_rate": 0.0003240124744608187,
      "loss": 2.9798,
      "step": 109325
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.6017909049987793,
      "learning_rate": 0.00032400839702087937,
      "loss": 3.1489,
      "step": 109326
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.929227113723755,
      "learning_rate": 0.0003240043195764764,
      "loss": 3.0395,
      "step": 109327
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1907126903533936,
      "learning_rate": 0.0003240002421276107,
      "loss": 3.0166,
      "step": 109328
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.5889060497283936,
      "learning_rate": 0.00032399616467428284,
      "loss": 2.7675,
      "step": 109329
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.8142385482788086,
      "learning_rate": 0.0003239920872164936,
      "loss": 2.9738,
      "step": 109330
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.9298739433288574,
      "learning_rate": 0.0003239880097542438,
      "loss": 3.0194,
      "step": 109331
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8988226652145386,
      "learning_rate": 0.0003239839322875342,
      "loss": 2.9529,
      "step": 109332
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.817996025085449,
      "learning_rate": 0.0003239798548163655,
      "loss": 3.0302,
      "step": 109333
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.228370189666748,
      "learning_rate": 0.0003239757773407385,
      "loss": 2.9204,
      "step": 109334
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.7138452529907227,
      "learning_rate": 0.00032397169986065396,
      "loss": 3.0462,
      "step": 109335
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7787328958511353,
      "learning_rate": 0.0003239676223761126,
      "loss": 3.0087,
      "step": 109336
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.221879720687866,
      "learning_rate": 0.0003239635448871152,
      "loss": 2.9299,
      "step": 109337
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0882201194763184,
      "learning_rate": 0.00032395946739366264,
      "loss": 2.9381,
      "step": 109338
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.6393837928771973,
      "learning_rate": 0.0003239553898957555,
      "loss": 3.1577,
      "step": 109339
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7813074588775635,
      "learning_rate": 0.0003239513123933946,
      "loss": 2.7838,
      "step": 109340
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.563323497772217,
      "learning_rate": 0.0003239472348865807,
      "loss": 2.8095,
      "step": 109341
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9674463272094727,
      "learning_rate": 0.0003239431573753145,
      "loss": 3.2841,
      "step": 109342
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9056236743927002,
      "learning_rate": 0.0003239390798595968,
      "loss": 2.9149,
      "step": 109343
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.5045549869537354,
      "learning_rate": 0.0003239350023394285,
      "loss": 3.2183,
      "step": 109344
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9571906328201294,
      "learning_rate": 0.0003239309248148101,
      "loss": 2.9095,
      "step": 109345
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.0985047817230225,
      "learning_rate": 0.00032392684728574253,
      "loss": 3.2325,
      "step": 109346
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.4849867820739746,
      "learning_rate": 0.00032392276975222657,
      "loss": 3.0509,
      "step": 109347
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.790157437324524,
      "learning_rate": 0.0003239186922142629,
      "loss": 3.0541,
      "step": 109348
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.727545142173767,
      "learning_rate": 0.0003239146146718522,
      "loss": 3.0577,
      "step": 109349
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.1759040355682373,
      "learning_rate": 0.00032391053712499537,
      "loss": 2.952,
      "step": 109350
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.43582820892334,
      "learning_rate": 0.00032390645957369323,
      "loss": 3.0209,
      "step": 109351
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.4093718528747559,
      "learning_rate": 0.0003239023820179463,
      "loss": 3.075,
      "step": 109352
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.069380760192871,
      "learning_rate": 0.0003238983044577555,
      "loss": 3.0079,
      "step": 109353
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3575732707977295,
      "learning_rate": 0.0003238942268931216,
      "loss": 2.6766,
      "step": 109354
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3779473304748535,
      "learning_rate": 0.0003238901493240452,
      "loss": 2.8741,
      "step": 109355
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.918195128440857,
      "learning_rate": 0.00032388607175052724,
      "loss": 2.7238,
      "step": 109356
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3776164054870605,
      "learning_rate": 0.00032388199417256836,
      "loss": 2.7321,
      "step": 109357
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7549610137939453,
      "learning_rate": 0.00032387791659016946,
      "loss": 3.0073,
      "step": 109358
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9406791925430298,
      "learning_rate": 0.0003238738390033311,
      "loss": 2.9197,
      "step": 109359
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8846760988235474,
      "learning_rate": 0.0003238697614120542,
      "loss": 3.0369,
      "step": 109360
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.485447406768799,
      "learning_rate": 0.00032386568381633955,
      "loss": 2.9614,
      "step": 109361
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.356708526611328,
      "learning_rate": 0.00032386160621618765,
      "loss": 2.8917,
      "step": 109362
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8120100498199463,
      "learning_rate": 0.00032385752861159947,
      "loss": 2.8203,
      "step": 109363
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2498483657836914,
      "learning_rate": 0.00032385345100257576,
      "loss": 3.1073,
      "step": 109364
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.182671546936035,
      "learning_rate": 0.0003238493733891173,
      "loss": 2.9548,
      "step": 109365
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.5878100395202637,
      "learning_rate": 0.0003238452957712247,
      "loss": 2.8508,
      "step": 109366
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2240443229675293,
      "learning_rate": 0.00032384121814889885,
      "loss": 2.9977,
      "step": 109367
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.0528147220611572,
      "learning_rate": 0.0003238371405221405,
      "loss": 2.9126,
      "step": 109368
    },
    {
      "epoch": 1.42,
      "grad_norm": 5.325241565704346,
      "learning_rate": 0.00032383306289095036,
      "loss": 2.8266,
      "step": 109369
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.804715633392334,
      "learning_rate": 0.0003238289852553291,
      "loss": 3.0748,
      "step": 109370
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7753992080688477,
      "learning_rate": 0.0003238249076152777,
      "loss": 2.9507,
      "step": 109371
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0197558403015137,
      "learning_rate": 0.0003238208299707968,
      "loss": 3.2754,
      "step": 109372
    },
    {
      "epoch": 1.42,
      "grad_norm": 3.948723077774048,
      "learning_rate": 0.000323816752321887,
      "loss": 3.0895,
      "step": 109373
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.456930637359619,
      "learning_rate": 0.00032381267466854947,
      "loss": 2.7489,
      "step": 109374
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3772146701812744,
      "learning_rate": 0.00032380859701078453,
      "loss": 2.9903,
      "step": 109375
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8019776344299316,
      "learning_rate": 0.0003238045193485932,
      "loss": 2.8674,
      "step": 109376
    },
    {
      "epoch": 1.42,
      "grad_norm": 4.025084495544434,
      "learning_rate": 0.00032380044168197616,
      "loss": 2.7836,
      "step": 109377
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.698028564453125,
      "learning_rate": 0.00032379636401093414,
      "loss": 2.7482,
      "step": 109378
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2095954418182373,
      "learning_rate": 0.0003237922863354679,
      "loss": 3.2512,
      "step": 109379
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1476151943206787,
      "learning_rate": 0.00032378820865557826,
      "loss": 2.9654,
      "step": 109380
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.44773268699646,
      "learning_rate": 0.0003237841309712659,
      "loss": 2.7929,
      "step": 109381
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1719837188720703,
      "learning_rate": 0.0003237800532825317,
      "loss": 2.8972,
      "step": 109382
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7107782363891602,
      "learning_rate": 0.0003237759755893763,
      "loss": 3.1693,
      "step": 109383
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.5629842281341553,
      "learning_rate": 0.00032377189789180045,
      "loss": 3.2875,
      "step": 109384
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.928646445274353,
      "learning_rate": 0.000323767820189805,
      "loss": 3.1011,
      "step": 109385
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.266752004623413,
      "learning_rate": 0.00032376374248339077,
      "loss": 2.9492,
      "step": 109386
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.574710488319397,
      "learning_rate": 0.0003237596647725582,
      "loss": 2.8708,
      "step": 109387
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.659301996231079,
      "learning_rate": 0.00032375558705730837,
      "loss": 2.7444,
      "step": 109388
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.85635507106781,
      "learning_rate": 0.00032375150933764194,
      "loss": 2.8188,
      "step": 109389
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8108644485473633,
      "learning_rate": 0.0003237474316135596,
      "loss": 3.1247,
      "step": 109390
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7273379564285278,
      "learning_rate": 0.00032374335388506224,
      "loss": 3.0534,
      "step": 109391
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.547729730606079,
      "learning_rate": 0.00032373927615215054,
      "loss": 2.9964,
      "step": 109392
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.094146251678467,
      "learning_rate": 0.0003237351984148252,
      "loss": 2.602,
      "step": 109393
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2945683002471924,
      "learning_rate": 0.0003237311206730871,
      "loss": 2.9007,
      "step": 109394
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9785758256912231,
      "learning_rate": 0.00032372704292693685,
      "loss": 2.7158,
      "step": 109395
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.738511323928833,
      "learning_rate": 0.0003237229651763754,
      "loss": 3.0083,
      "step": 109396
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.568739652633667,
      "learning_rate": 0.0003237188874214033,
      "loss": 2.9613,
      "step": 109397
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2445883750915527,
      "learning_rate": 0.00032371480966202147,
      "loss": 2.886,
      "step": 109398
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8580676317214966,
      "learning_rate": 0.00032371073189823066,
      "loss": 2.9833,
      "step": 109399
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.605168342590332,
      "learning_rate": 0.0003237066541300315,
      "loss": 2.8821,
      "step": 109400
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2210776805877686,
      "learning_rate": 0.00032370257635742477,
      "loss": 3.0484,
      "step": 109401
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.658758282661438,
      "learning_rate": 0.0003236984985804114,
      "loss": 3.0104,
      "step": 109402
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2016654014587402,
      "learning_rate": 0.000323694420798992,
      "loss": 2.6647,
      "step": 109403
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.678880214691162,
      "learning_rate": 0.0003236903430131673,
      "loss": 2.9078,
      "step": 109404
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.050722360610962,
      "learning_rate": 0.00032368626522293823,
      "loss": 3.0833,
      "step": 109405
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8115040063858032,
      "learning_rate": 0.0003236821874283053,
      "loss": 2.8556,
      "step": 109406
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.659788727760315,
      "learning_rate": 0.00032367810962926946,
      "loss": 2.8875,
      "step": 109407
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1397268772125244,
      "learning_rate": 0.0003236740318258315,
      "loss": 3.089,
      "step": 109408
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7097572088241577,
      "learning_rate": 0.00032366995401799205,
      "loss": 2.9979,
      "step": 109409
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.5002131462097168,
      "learning_rate": 0.00032366587620575183,
      "loss": 2.9714,
      "step": 109410
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.092660665512085,
      "learning_rate": 0.0003236617983891118,
      "loss": 3.0806,
      "step": 109411
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9704800844192505,
      "learning_rate": 0.0003236577205680725,
      "loss": 2.9385,
      "step": 109412
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.062002182006836,
      "learning_rate": 0.00032365364274263475,
      "loss": 2.9553,
      "step": 109413
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9552139043807983,
      "learning_rate": 0.00032364956491279945,
      "loss": 3.0521,
      "step": 109414
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7381858825683594,
      "learning_rate": 0.00032364548707856716,
      "loss": 2.9158,
      "step": 109415
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2267708778381348,
      "learning_rate": 0.0003236414092399387,
      "loss": 2.8458,
      "step": 109416
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.337137460708618,
      "learning_rate": 0.000323637331396915,
      "loss": 2.985,
      "step": 109417
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.00079607963562,
      "learning_rate": 0.0003236332535494966,
      "loss": 3.3506,
      "step": 109418
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8219430446624756,
      "learning_rate": 0.0003236291756976843,
      "loss": 3.2077,
      "step": 109419
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.6787527799606323,
      "learning_rate": 0.0003236250978414789,
      "loss": 2.9852,
      "step": 109420
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.148272752761841,
      "learning_rate": 0.0003236210199808812,
      "loss": 2.9357,
      "step": 109421
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8534948825836182,
      "learning_rate": 0.00032361694211589185,
      "loss": 3.2776,
      "step": 109422
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7225755453109741,
      "learning_rate": 0.0003236128642465117,
      "loss": 2.8357,
      "step": 109423
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.021942615509033,
      "learning_rate": 0.0003236087863727414,
      "loss": 2.9614,
      "step": 109424
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.4343411922454834,
      "learning_rate": 0.00032360470849458186,
      "loss": 2.8526,
      "step": 109425
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.3649370670318604,
      "learning_rate": 0.0003236006306120338,
      "loss": 3.2108,
      "step": 109426
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8917250633239746,
      "learning_rate": 0.00032359655272509784,
      "loss": 2.9376,
      "step": 109427
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.900804877281189,
      "learning_rate": 0.0003235924748337748,
      "loss": 2.9586,
      "step": 109428
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7392528057098389,
      "learning_rate": 0.0003235883969380656,
      "loss": 3.0908,
      "step": 109429
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.9592238664627075,
      "learning_rate": 0.0003235843190379708,
      "loss": 2.9359,
      "step": 109430
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.8520431518554688,
      "learning_rate": 0.00032358024113349123,
      "loss": 3.1559,
      "step": 109431
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7013577222824097,
      "learning_rate": 0.00032357616322462767,
      "loss": 3.1327,
      "step": 109432
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.8659679889678955,
      "learning_rate": 0.0003235720853113809,
      "loss": 3.153,
      "step": 109433
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.857950210571289,
      "learning_rate": 0.0003235680073937515,
      "loss": 2.7161,
      "step": 109434
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7224117517471313,
      "learning_rate": 0.0003235639294717405,
      "loss": 2.8269,
      "step": 109435
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.439732313156128,
      "learning_rate": 0.00032355985154534846,
      "loss": 3.037,
      "step": 109436
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0903775691986084,
      "learning_rate": 0.0003235557736145762,
      "loss": 2.9603,
      "step": 109437
    },
    {
      "epoch": 1.42,
      "grad_norm": 1.7871531248092651,
      "learning_rate": 0.00032355169567942443,
      "loss": 2.6261,
      "step": 109438
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.2226626873016357,
      "learning_rate": 0.00032354761773989405,
      "loss": 2.9547,
      "step": 109439
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.0590577125549316,
      "learning_rate": 0.0003235435397959857,
      "loss": 2.8492,
      "step": 109440
    },
    {
      "epoch": 1.42,
      "grad_norm": 2.1520590782165527,
      "learning_rate": 0.00032353946184770013,
      "loss": 3.1066,
      "step": 109441
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6211986541748047,
      "learning_rate": 0.00032353538389503817,
      "loss": 3.3366,
      "step": 109442
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7852452993392944,
      "learning_rate": 0.0003235313059380005,
      "loss": 3.2161,
      "step": 109443
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7116049528121948,
      "learning_rate": 0.0003235272279765879,
      "loss": 2.8996,
      "step": 109444
    },
    {
      "epoch": 1.43,
      "grad_norm": 4.019217014312744,
      "learning_rate": 0.00032352315001080113,
      "loss": 2.7975,
      "step": 109445
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.890976905822754,
      "learning_rate": 0.000323519072040641,
      "loss": 3.5019,
      "step": 109446
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9030324220657349,
      "learning_rate": 0.00032351499406610824,
      "loss": 3.2135,
      "step": 109447
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.821529746055603,
      "learning_rate": 0.0003235109160872036,
      "loss": 2.9595,
      "step": 109448
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.573972463607788,
      "learning_rate": 0.0003235068381039278,
      "loss": 2.9182,
      "step": 109449
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.833116888999939,
      "learning_rate": 0.0003235027601162817,
      "loss": 2.9174,
      "step": 109450
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0141549110412598,
      "learning_rate": 0.00032349868212426595,
      "loss": 2.8671,
      "step": 109451
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.5799684524536133,
      "learning_rate": 0.0003234946041278813,
      "loss": 2.8625,
      "step": 109452
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9466779232025146,
      "learning_rate": 0.0003234905261271287,
      "loss": 2.9413,
      "step": 109453
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.693516731262207,
      "learning_rate": 0.0003234864481220087,
      "loss": 3.1424,
      "step": 109454
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.5758137702941895,
      "learning_rate": 0.000323482370112522,
      "loss": 2.9938,
      "step": 109455
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2665343284606934,
      "learning_rate": 0.00032347829209866967,
      "loss": 2.8543,
      "step": 109456
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.225680351257324,
      "learning_rate": 0.00032347421408045223,
      "loss": 2.7473,
      "step": 109457
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8634535074234009,
      "learning_rate": 0.00032347013605787045,
      "loss": 3.0658,
      "step": 109458
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2021212577819824,
      "learning_rate": 0.0003234660580309252,
      "loss": 3.2555,
      "step": 109459
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.7476062774658203,
      "learning_rate": 0.00032346197999961703,
      "loss": 3.02,
      "step": 109460
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6428911685943604,
      "learning_rate": 0.000323457901963947,
      "loss": 2.7742,
      "step": 109461
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2082412242889404,
      "learning_rate": 0.00032345382392391566,
      "loss": 2.8537,
      "step": 109462
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8020555973052979,
      "learning_rate": 0.00032344974587952374,
      "loss": 3.1761,
      "step": 109463
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8841750621795654,
      "learning_rate": 0.000323445667830772,
      "loss": 3.1829,
      "step": 109464
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8476923704147339,
      "learning_rate": 0.0003234415897776614,
      "loss": 3.1972,
      "step": 109465
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9896752834320068,
      "learning_rate": 0.0003234375117201926,
      "loss": 3.3405,
      "step": 109466
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3802833557128906,
      "learning_rate": 0.00032343343365836625,
      "loss": 2.8351,
      "step": 109467
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.538090944290161,
      "learning_rate": 0.00032342935559218316,
      "loss": 3.1316,
      "step": 109468
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.5771045684814453,
      "learning_rate": 0.00032342527752164413,
      "loss": 2.9764,
      "step": 109469
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1564431190490723,
      "learning_rate": 0.0003234211994467499,
      "loss": 3.0843,
      "step": 109470
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8306025266647339,
      "learning_rate": 0.0003234171213675012,
      "loss": 3.0121,
      "step": 109471
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.531419038772583,
      "learning_rate": 0.00032341304328389897,
      "loss": 3.0649,
      "step": 109472
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7939473390579224,
      "learning_rate": 0.0003234089651959437,
      "loss": 3.0626,
      "step": 109473
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.343764066696167,
      "learning_rate": 0.00032340488710363616,
      "loss": 3.1707,
      "step": 109474
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9387648105621338,
      "learning_rate": 0.0003234008090069774,
      "loss": 3.2125,
      "step": 109475
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8695424795150757,
      "learning_rate": 0.00032339673090596785,
      "loss": 3.0656,
      "step": 109476
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1485767364501953,
      "learning_rate": 0.0003233926528006084,
      "loss": 3.082,
      "step": 109477
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7193000316619873,
      "learning_rate": 0.0003233885746909,
      "loss": 3.0367,
      "step": 109478
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7955180406570435,
      "learning_rate": 0.000323384496576843,
      "loss": 2.9603,
      "step": 109479
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.835584282875061,
      "learning_rate": 0.0003233804184584385,
      "loss": 2.8126,
      "step": 109480
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.5996158123016357,
      "learning_rate": 0.00032337634033568715,
      "loss": 3.0211,
      "step": 109481
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.5126988887786865,
      "learning_rate": 0.0003233722622085896,
      "loss": 2.9015,
      "step": 109482
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.97459077835083,
      "learning_rate": 0.0003233681840771468,
      "loss": 2.9866,
      "step": 109483
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.926971673965454,
      "learning_rate": 0.0003233641059413594,
      "loss": 2.8942,
      "step": 109484
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9686578512191772,
      "learning_rate": 0.00032336002780122814,
      "loss": 3.1013,
      "step": 109485
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.915794014930725,
      "learning_rate": 0.00032335594965675373,
      "loss": 3.155,
      "step": 109486
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.312525987625122,
      "learning_rate": 0.00032335187150793715,
      "loss": 2.9009,
      "step": 109487
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7738631963729858,
      "learning_rate": 0.000323347793354779,
      "loss": 3.1199,
      "step": 109488
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.010369062423706,
      "learning_rate": 0.00032334371519727995,
      "loss": 2.9052,
      "step": 109489
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0861804485321045,
      "learning_rate": 0.000323339637035441,
      "loss": 3.0108,
      "step": 109490
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.182168483734131,
      "learning_rate": 0.0003233355588692626,
      "loss": 3.0322,
      "step": 109491
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0664432048797607,
      "learning_rate": 0.00032333148069874583,
      "loss": 3.0745,
      "step": 109492
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.7565042972564697,
      "learning_rate": 0.00032332740252389127,
      "loss": 2.7698,
      "step": 109493
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.011868476867676,
      "learning_rate": 0.0003233233243446997,
      "loss": 2.9214,
      "step": 109494
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.986100196838379,
      "learning_rate": 0.00032331924616117177,
      "loss": 2.9662,
      "step": 109495
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0895516872406006,
      "learning_rate": 0.0003233151679733085,
      "loss": 2.9578,
      "step": 109496
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9413576126098633,
      "learning_rate": 0.0003233110897811104,
      "loss": 2.729,
      "step": 109497
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8520208597183228,
      "learning_rate": 0.0003233070115845784,
      "loss": 2.8299,
      "step": 109498
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0243852138519287,
      "learning_rate": 0.0003233029333837132,
      "loss": 2.9871,
      "step": 109499
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.890464425086975,
      "learning_rate": 0.00032329885517851553,
      "loss": 2.8878,
      "step": 109500
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.915706753730774,
      "learning_rate": 0.0003232947769689861,
      "loss": 3.0173,
      "step": 109501
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9774945974349976,
      "learning_rate": 0.0003232906987551258,
      "loss": 2.8408,
      "step": 109502
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.317070245742798,
      "learning_rate": 0.0003232866205369353,
      "loss": 2.712,
      "step": 109503
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3621795177459717,
      "learning_rate": 0.0003232825423144153,
      "loss": 3.2707,
      "step": 109504
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.2805228233337402,
      "learning_rate": 0.00032327846408756675,
      "loss": 2.8703,
      "step": 109505
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.767528772354126,
      "learning_rate": 0.00032327438585639026,
      "loss": 2.9786,
      "step": 109506
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3868796825408936,
      "learning_rate": 0.00032327030762088657,
      "loss": 3.3019,
      "step": 109507
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1054141521453857,
      "learning_rate": 0.0003232662293810566,
      "loss": 2.9028,
      "step": 109508
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1553969383239746,
      "learning_rate": 0.0003232621511369009,
      "loss": 2.9714,
      "step": 109509
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9761918783187866,
      "learning_rate": 0.0003232580728884204,
      "loss": 2.8588,
      "step": 109510
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3360495567321777,
      "learning_rate": 0.0003232539946356157,
      "loss": 3.265,
      "step": 109511
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3898136615753174,
      "learning_rate": 0.0003232499163784877,
      "loss": 3.0837,
      "step": 109512
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.724192500114441,
      "learning_rate": 0.0003232458381170371,
      "loss": 2.8228,
      "step": 109513
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.025738477706909,
      "learning_rate": 0.00032324175985126464,
      "loss": 3.0357,
      "step": 109514
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0481619834899902,
      "learning_rate": 0.0003232376815811711,
      "loss": 3.0824,
      "step": 109515
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1370434761047363,
      "learning_rate": 0.00032323360330675717,
      "loss": 2.9752,
      "step": 109516
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7397915124893188,
      "learning_rate": 0.00032322952502802384,
      "loss": 2.9104,
      "step": 109517
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7036235332489014,
      "learning_rate": 0.0003232254467449716,
      "loss": 2.9975,
      "step": 109518
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0575597286224365,
      "learning_rate": 0.00032322136845760124,
      "loss": 2.9198,
      "step": 109519
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6998071670532227,
      "learning_rate": 0.00032321729016591375,
      "loss": 2.8475,
      "step": 109520
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.438709259033203,
      "learning_rate": 0.0003232132118699097,
      "loss": 2.9517,
      "step": 109521
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.1101601123809814,
      "learning_rate": 0.0003232091335695897,
      "loss": 2.8756,
      "step": 109522
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.618279457092285,
      "learning_rate": 0.00032320505526495485,
      "loss": 3.0301,
      "step": 109523
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4609405994415283,
      "learning_rate": 0.0003232009769560057,
      "loss": 3.1258,
      "step": 109524
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.6742076873779297,
      "learning_rate": 0.000323196898642743,
      "loss": 2.9722,
      "step": 109525
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.273522138595581,
      "learning_rate": 0.00032319282032516764,
      "loss": 3.0038,
      "step": 109526
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8541960716247559,
      "learning_rate": 0.0003231887420032803,
      "loss": 2.9112,
      "step": 109527
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8506174087524414,
      "learning_rate": 0.0003231846636770817,
      "loss": 2.905,
      "step": 109528
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.7649765014648438,
      "learning_rate": 0.00032318058534657254,
      "loss": 3.1005,
      "step": 109529
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2396035194396973,
      "learning_rate": 0.00032317650701175376,
      "loss": 2.8828,
      "step": 109530
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.352471113204956,
      "learning_rate": 0.0003231724286726261,
      "loss": 2.7582,
      "step": 109531
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4128425121307373,
      "learning_rate": 0.00032316835032919017,
      "loss": 3.043,
      "step": 109532
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4905037879943848,
      "learning_rate": 0.00032316427198144683,
      "loss": 3.1524,
      "step": 109533
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7208619117736816,
      "learning_rate": 0.0003231601936293968,
      "loss": 3.0443,
      "step": 109534
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.440906286239624,
      "learning_rate": 0.00032315611527304085,
      "loss": 2.9627,
      "step": 109535
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4700140953063965,
      "learning_rate": 0.0003231520369123797,
      "loss": 2.8351,
      "step": 109536
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8904114961624146,
      "learning_rate": 0.00032314795854741416,
      "loss": 3.1502,
      "step": 109537
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8261617422103882,
      "learning_rate": 0.00032314388017814507,
      "loss": 3.2268,
      "step": 109538
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7183948755264282,
      "learning_rate": 0.0003231398018045731,
      "loss": 2.8157,
      "step": 109539
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6144100427627563,
      "learning_rate": 0.00032313572342669886,
      "loss": 3.034,
      "step": 109540
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8127565383911133,
      "learning_rate": 0.0003231316450445234,
      "loss": 2.8783,
      "step": 109541
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6466312408447266,
      "learning_rate": 0.00032312756665804724,
      "loss": 3.1435,
      "step": 109542
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0697247982025146,
      "learning_rate": 0.00032312348826727126,
      "loss": 2.9123,
      "step": 109543
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.574937343597412,
      "learning_rate": 0.00032311940987219615,
      "loss": 2.7081,
      "step": 109544
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6784038543701172,
      "learning_rate": 0.0003231153314728228,
      "loss": 2.9588,
      "step": 109545
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8787306547164917,
      "learning_rate": 0.0003231112530691518,
      "loss": 2.9587,
      "step": 109546
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8510595560073853,
      "learning_rate": 0.0003231071746611839,
      "loss": 2.8572,
      "step": 109547
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9279675483703613,
      "learning_rate": 0.00032310309624892017,
      "loss": 3.1048,
      "step": 109548
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8133305311203003,
      "learning_rate": 0.00032309901783236094,
      "loss": 2.7979,
      "step": 109549
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9394710063934326,
      "learning_rate": 0.0003230949394115072,
      "loss": 3.1091,
      "step": 109550
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1811797618865967,
      "learning_rate": 0.0003230908609863598,
      "loss": 2.8301,
      "step": 109551
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.075150966644287,
      "learning_rate": 0.0003230867825569192,
      "loss": 3.0174,
      "step": 109552
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9648770093917847,
      "learning_rate": 0.0003230827041231864,
      "loss": 2.9771,
      "step": 109553
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.868573546409607,
      "learning_rate": 0.0003230786256851622,
      "loss": 2.8679,
      "step": 109554
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9367841482162476,
      "learning_rate": 0.00032307454724284703,
      "loss": 2.9034,
      "step": 109555
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0017950534820557,
      "learning_rate": 0.000323070468796242,
      "loss": 3.0825,
      "step": 109556
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.772896409034729,
      "learning_rate": 0.0003230663903453478,
      "loss": 2.9016,
      "step": 109557
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.6434261798858643,
      "learning_rate": 0.00032306231189016494,
      "loss": 2.989,
      "step": 109558
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.975015878677368,
      "learning_rate": 0.00032305823343069446,
      "loss": 3.2644,
      "step": 109559
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7952450513839722,
      "learning_rate": 0.0003230541549669371,
      "loss": 3.1841,
      "step": 109560
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3220055103302,
      "learning_rate": 0.0003230500764988934,
      "loss": 3.3826,
      "step": 109561
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.3388731479644775,
      "learning_rate": 0.0003230459980265643,
      "loss": 2.9121,
      "step": 109562
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9824588298797607,
      "learning_rate": 0.00032304191954995055,
      "loss": 2.9861,
      "step": 109563
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.936855673789978,
      "learning_rate": 0.0003230378410690528,
      "loss": 3.0971,
      "step": 109564
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7309433221817017,
      "learning_rate": 0.000323033762583872,
      "loss": 3.1356,
      "step": 109565
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.774984836578369,
      "learning_rate": 0.0003230296840944087,
      "loss": 3.0388,
      "step": 109566
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.095515012741089,
      "learning_rate": 0.00032302560560066375,
      "loss": 2.9238,
      "step": 109567
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.850554347038269,
      "learning_rate": 0.0003230215271026379,
      "loss": 3.0398,
      "step": 109568
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2919559478759766,
      "learning_rate": 0.00032301744860033194,
      "loss": 3.0298,
      "step": 109569
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7247788906097412,
      "learning_rate": 0.00032301337009374664,
      "loss": 3.0049,
      "step": 109570
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8461560010910034,
      "learning_rate": 0.0003230092915828826,
      "loss": 2.9729,
      "step": 109571
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.812762498855591,
      "learning_rate": 0.0003230052130677409,
      "loss": 2.924,
      "step": 109572
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6811295747756958,
      "learning_rate": 0.00032300113454832187,
      "loss": 2.8929,
      "step": 109573
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6465634107589722,
      "learning_rate": 0.0003229970560246266,
      "loss": 3.022,
      "step": 109574
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.5946240425109863,
      "learning_rate": 0.0003229929774966558,
      "loss": 3.1062,
      "step": 109575
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.147062063217163,
      "learning_rate": 0.0003229888989644101,
      "loss": 3.0635,
      "step": 109576
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.2917823791503906,
      "learning_rate": 0.0003229848204278903,
      "loss": 2.9622,
      "step": 109577
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.98616886138916,
      "learning_rate": 0.0003229807418870973,
      "loss": 2.8427,
      "step": 109578
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.1131818294525146,
      "learning_rate": 0.0003229766633420316,
      "loss": 2.7307,
      "step": 109579
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.6925697326660156,
      "learning_rate": 0.0003229725847926942,
      "loss": 3.0925,
      "step": 109580
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.162306547164917,
      "learning_rate": 0.00032296850623908573,
      "loss": 3.164,
      "step": 109581
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.7049076557159424,
      "learning_rate": 0.000322964427681207,
      "loss": 2.9646,
      "step": 109582
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.181910991668701,
      "learning_rate": 0.0003229603491190587,
      "loss": 2.9004,
      "step": 109583
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.6527791023254395,
      "learning_rate": 0.0003229562705526417,
      "loss": 2.9704,
      "step": 109584
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.9267046451568604,
      "learning_rate": 0.00032295219198195664,
      "loss": 2.8548,
      "step": 109585
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.281460762023926,
      "learning_rate": 0.0003229481134070044,
      "loss": 3.0194,
      "step": 109586
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8529387712478638,
      "learning_rate": 0.00032294403482778565,
      "loss": 3.1498,
      "step": 109587
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.453517436981201,
      "learning_rate": 0.0003229399562443012,
      "loss": 3.0161,
      "step": 109588
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.15179181098938,
      "learning_rate": 0.0003229358776565517,
      "loss": 3.0016,
      "step": 109589
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.859470248222351,
      "learning_rate": 0.00032293179906453805,
      "loss": 3.0794,
      "step": 109590
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7535223960876465,
      "learning_rate": 0.00032292772046826093,
      "loss": 3.0936,
      "step": 109591
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.183378219604492,
      "learning_rate": 0.0003229236418677211,
      "loss": 2.9346,
      "step": 109592
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.018613815307617,
      "learning_rate": 0.00032291956326291934,
      "loss": 3.0622,
      "step": 109593
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2468011379241943,
      "learning_rate": 0.0003229154846538564,
      "loss": 2.8674,
      "step": 109594
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8066335916519165,
      "learning_rate": 0.00032291140604053306,
      "loss": 2.8069,
      "step": 109595
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.874746322631836,
      "learning_rate": 0.00032290732742295007,
      "loss": 2.9102,
      "step": 109596
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.2857656478881836,
      "learning_rate": 0.0003229032488011081,
      "loss": 2.8967,
      "step": 109597
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6739393472671509,
      "learning_rate": 0.000322899170175008,
      "loss": 3.0253,
      "step": 109598
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.210130214691162,
      "learning_rate": 0.00032289509154465057,
      "loss": 3.0588,
      "step": 109599
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6750723123550415,
      "learning_rate": 0.0003228910129100365,
      "loss": 3.1223,
      "step": 109600
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9433287382125854,
      "learning_rate": 0.00032288693427116655,
      "loss": 2.9736,
      "step": 109601
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2239274978637695,
      "learning_rate": 0.00032288285562804143,
      "loss": 3.1666,
      "step": 109602
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8322899341583252,
      "learning_rate": 0.00032287877698066196,
      "loss": 2.9635,
      "step": 109603
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9865763187408447,
      "learning_rate": 0.00032287469832902895,
      "loss": 2.95,
      "step": 109604
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.63215970993042,
      "learning_rate": 0.00032287061967314304,
      "loss": 3.0213,
      "step": 109605
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9903069734573364,
      "learning_rate": 0.00032286654101300517,
      "loss": 3.0761,
      "step": 109606
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.525857448577881,
      "learning_rate": 0.00032286246234861587,
      "loss": 2.8544,
      "step": 109607
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7253628969192505,
      "learning_rate": 0.000322858383679976,
      "loss": 3.0087,
      "step": 109608
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.910688042640686,
      "learning_rate": 0.00032285430500708645,
      "loss": 2.9922,
      "step": 109609
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.138054132461548,
      "learning_rate": 0.00032285022632994774,
      "loss": 2.8665,
      "step": 109610
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8650285005569458,
      "learning_rate": 0.00032284614764856075,
      "loss": 2.9624,
      "step": 109611
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2526745796203613,
      "learning_rate": 0.0003228420689629263,
      "loss": 3.1259,
      "step": 109612
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.5108730792999268,
      "learning_rate": 0.000322837990273045,
      "loss": 3.0571,
      "step": 109613
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.953111410140991,
      "learning_rate": 0.0003228339115789177,
      "loss": 2.8645,
      "step": 109614
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8935784101486206,
      "learning_rate": 0.0003228298328805452,
      "loss": 2.7905,
      "step": 109615
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4728055000305176,
      "learning_rate": 0.0003228257541779281,
      "loss": 3.0152,
      "step": 109616
    },
    {
      "epoch": 1.43,
      "grad_norm": 4.117797374725342,
      "learning_rate": 0.00032282167547106736,
      "loss": 2.9611,
      "step": 109617
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.955224871635437,
      "learning_rate": 0.00032281759675996353,
      "loss": 3.0113,
      "step": 109618
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8487389087677002,
      "learning_rate": 0.0003228135180446176,
      "loss": 2.824,
      "step": 109619
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1885321140289307,
      "learning_rate": 0.00032280943932503015,
      "loss": 3.0198,
      "step": 109620
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8400238752365112,
      "learning_rate": 0.000322805360601202,
      "loss": 2.9174,
      "step": 109621
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.003793239593506,
      "learning_rate": 0.0003228012818731339,
      "loss": 3.1819,
      "step": 109622
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0157768726348877,
      "learning_rate": 0.0003227972031408267,
      "loss": 3.0655,
      "step": 109623
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.720369815826416,
      "learning_rate": 0.0003227931244042809,
      "loss": 2.9852,
      "step": 109624
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.5096518993377686,
      "learning_rate": 0.0003227890456634975,
      "loss": 3.1525,
      "step": 109625
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9984232187271118,
      "learning_rate": 0.00032278496691847726,
      "loss": 2.9045,
      "step": 109626
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.5369598865509033,
      "learning_rate": 0.0003227808881692208,
      "loss": 2.9796,
      "step": 109627
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3014650344848633,
      "learning_rate": 0.0003227768094157289,
      "loss": 2.9772,
      "step": 109628
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.878883957862854,
      "learning_rate": 0.0003227727306580024,
      "loss": 2.9532,
      "step": 109629
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.937423586845398,
      "learning_rate": 0.00032276865189604205,
      "loss": 3.0588,
      "step": 109630
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.654709815979004,
      "learning_rate": 0.00032276457312984855,
      "loss": 3.1539,
      "step": 109631
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6794782876968384,
      "learning_rate": 0.00032276049435942265,
      "loss": 2.9251,
      "step": 109632
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.7706356048583984,
      "learning_rate": 0.0003227564155847652,
      "loss": 2.9133,
      "step": 109633
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.986389398574829,
      "learning_rate": 0.0003227523368058769,
      "loss": 2.9423,
      "step": 109634
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.1871211528778076,
      "learning_rate": 0.00032274825802275847,
      "loss": 2.9127,
      "step": 109635
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2003018856048584,
      "learning_rate": 0.0003227441792354108,
      "loss": 2.9983,
      "step": 109636
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6721949577331543,
      "learning_rate": 0.00032274010044383453,
      "loss": 3.0814,
      "step": 109637
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0233891010284424,
      "learning_rate": 0.00032273602164803033,
      "loss": 2.8962,
      "step": 109638
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3495800495147705,
      "learning_rate": 0.0003227319428479992,
      "loss": 3.1254,
      "step": 109639
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.478527784347534,
      "learning_rate": 0.00032272786404374175,
      "loss": 3.2496,
      "step": 109640
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.6036598682403564,
      "learning_rate": 0.0003227237852352587,
      "loss": 2.8974,
      "step": 109641
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.942539930343628,
      "learning_rate": 0.000322719706422551,
      "loss": 2.9381,
      "step": 109642
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.5446062088012695,
      "learning_rate": 0.00032271562760561917,
      "loss": 2.9066,
      "step": 109643
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.0197834968566895,
      "learning_rate": 0.0003227115487844641,
      "loss": 2.9965,
      "step": 109644
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.4918084144592285,
      "learning_rate": 0.0003227074699590865,
      "loss": 3.01,
      "step": 109645
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.331744909286499,
      "learning_rate": 0.0003227033911294872,
      "loss": 2.8886,
      "step": 109646
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.672372817993164,
      "learning_rate": 0.0003226993122956668,
      "loss": 2.9329,
      "step": 109647
    },
    {
      "epoch": 1.43,
      "grad_norm": 4.937004566192627,
      "learning_rate": 0.00032269523345762634,
      "loss": 2.865,
      "step": 109648
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4558234214782715,
      "learning_rate": 0.00032269115461536625,
      "loss": 3.038,
      "step": 109649
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9338743686676025,
      "learning_rate": 0.0003226870757688875,
      "loss": 2.96,
      "step": 109650
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.925760269165039,
      "learning_rate": 0.0003226829969181909,
      "loss": 3.0097,
      "step": 109651
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.0412673950195312,
      "learning_rate": 0.000322678918063277,
      "loss": 2.9973,
      "step": 109652
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.4778685569763184,
      "learning_rate": 0.0003226748392041466,
      "loss": 2.9524,
      "step": 109653
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6482652425765991,
      "learning_rate": 0.0003226707603408007,
      "loss": 2.9918,
      "step": 109654
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0783743858337402,
      "learning_rate": 0.00032266668147323975,
      "loss": 2.9789,
      "step": 109655
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.64296293258667,
      "learning_rate": 0.00032266260260146464,
      "loss": 2.9715,
      "step": 109656
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0986592769622803,
      "learning_rate": 0.0003226585237254762,
      "loss": 2.9851,
      "step": 109657
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7814046144485474,
      "learning_rate": 0.000322654444845275,
      "loss": 2.9246,
      "step": 109658
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.509155035018921,
      "learning_rate": 0.00032265036596086194,
      "loss": 2.9898,
      "step": 109659
    },
    {
      "epoch": 1.43,
      "grad_norm": 4.051085948944092,
      "learning_rate": 0.00032264628707223783,
      "loss": 2.9368,
      "step": 109660
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.0728659629821777,
      "learning_rate": 0.0003226422081794033,
      "loss": 2.7383,
      "step": 109661
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.8977150917053223,
      "learning_rate": 0.0003226381292823591,
      "loss": 3.0837,
      "step": 109662
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.367267370223999,
      "learning_rate": 0.0003226340503811061,
      "loss": 2.8027,
      "step": 109663
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.1196446418762207,
      "learning_rate": 0.000322629971475645,
      "loss": 3.1052,
      "step": 109664
    },
    {
      "epoch": 1.43,
      "grad_norm": 4.417792320251465,
      "learning_rate": 0.00032262589256597653,
      "loss": 2.996,
      "step": 109665
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.1633989810943604,
      "learning_rate": 0.0003226218136521015,
      "loss": 2.7713,
      "step": 109666
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.263845205307007,
      "learning_rate": 0.00032261773473402067,
      "loss": 3.0677,
      "step": 109667
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4554941654205322,
      "learning_rate": 0.00032261365581173475,
      "loss": 2.928,
      "step": 109668
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.481795072555542,
      "learning_rate": 0.0003226095768852445,
      "loss": 2.8738,
      "step": 109669
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9466005563735962,
      "learning_rate": 0.00032260549795455086,
      "loss": 2.8128,
      "step": 109670
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6491774320602417,
      "learning_rate": 0.00032260141901965426,
      "loss": 2.9457,
      "step": 109671
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.713319182395935,
      "learning_rate": 0.00032259734008055566,
      "loss": 2.85,
      "step": 109672
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.932847499847412,
      "learning_rate": 0.00032259326113725584,
      "loss": 3.0156,
      "step": 109673
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.12160062789917,
      "learning_rate": 0.00032258918218975544,
      "loss": 2.9713,
      "step": 109674
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0572798252105713,
      "learning_rate": 0.0003225851032380553,
      "loss": 2.9725,
      "step": 109675
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.104836940765381,
      "learning_rate": 0.00032258102428215626,
      "loss": 3.027,
      "step": 109676
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.457195281982422,
      "learning_rate": 0.00032257694532205884,
      "loss": 2.9247,
      "step": 109677
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9067562818527222,
      "learning_rate": 0.00032257286635776404,
      "loss": 3.0961,
      "step": 109678
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8358262777328491,
      "learning_rate": 0.00032256878738927245,
      "loss": 3.0108,
      "step": 109679
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.622288703918457,
      "learning_rate": 0.0003225647084165849,
      "loss": 2.8496,
      "step": 109680
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1111791133880615,
      "learning_rate": 0.0003225606294397022,
      "loss": 2.9801,
      "step": 109681
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.728435754776001,
      "learning_rate": 0.00032255655045862507,
      "loss": 3.1763,
      "step": 109682
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3235902786254883,
      "learning_rate": 0.0003225524714733542,
      "loss": 2.9439,
      "step": 109683
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6910909414291382,
      "learning_rate": 0.0003225483924838903,
      "loss": 2.8213,
      "step": 109684
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7013435363769531,
      "learning_rate": 0.00032254431349023444,
      "loss": 2.8361,
      "step": 109685
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.817772388458252,
      "learning_rate": 0.000322540234492387,
      "loss": 3.0141,
      "step": 109686
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9615540504455566,
      "learning_rate": 0.000322536155490349,
      "loss": 3.1008,
      "step": 109687
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.696932077407837,
      "learning_rate": 0.00032253207648412106,
      "loss": 2.9494,
      "step": 109688
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.038778066635132,
      "learning_rate": 0.000322527997473704,
      "loss": 2.8233,
      "step": 109689
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6480783224105835,
      "learning_rate": 0.00032252391845909856,
      "loss": 3.0953,
      "step": 109690
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8331154584884644,
      "learning_rate": 0.00032251983944030547,
      "loss": 2.9107,
      "step": 109691
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.694694757461548,
      "learning_rate": 0.00032251576041732557,
      "loss": 2.8376,
      "step": 109692
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.0198869705200195,
      "learning_rate": 0.00032251168139015945,
      "loss": 2.9526,
      "step": 109693
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9289741516113281,
      "learning_rate": 0.00032250760235880816,
      "loss": 2.8923,
      "step": 109694
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1047258377075195,
      "learning_rate": 0.00032250352332327217,
      "loss": 2.9693,
      "step": 109695
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.5532069206237793,
      "learning_rate": 0.0003224994442835523,
      "loss": 2.7377,
      "step": 109696
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1058647632598877,
      "learning_rate": 0.0003224953652396495,
      "loss": 3.1314,
      "step": 109697
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.167466640472412,
      "learning_rate": 0.00032249128619156426,
      "loss": 3.0352,
      "step": 109698
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8152050971984863,
      "learning_rate": 0.0003224872071392975,
      "loss": 3.2685,
      "step": 109699
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.671396493911743,
      "learning_rate": 0.00032248312808285,
      "loss": 2.7461,
      "step": 109700
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.0238239765167236,
      "learning_rate": 0.00032247904902222245,
      "loss": 2.9231,
      "step": 109701
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1054961681365967,
      "learning_rate": 0.00032247496995741555,
      "loss": 2.7368,
      "step": 109702
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.010512590408325,
      "learning_rate": 0.0003224708908884302,
      "loss": 2.9752,
      "step": 109703
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.283053398132324,
      "learning_rate": 0.00032246681181526707,
      "loss": 3.0477,
      "step": 109704
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.6968936920166016,
      "learning_rate": 0.0003224627327379269,
      "loss": 2.9315,
      "step": 109705
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.974413275718689,
      "learning_rate": 0.0003224586536564105,
      "loss": 2.9606,
      "step": 109706
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8994759321212769,
      "learning_rate": 0.00032245457457071865,
      "loss": 3.1471,
      "step": 109707
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.54244065284729,
      "learning_rate": 0.00032245049548085204,
      "loss": 3.0511,
      "step": 109708
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.9324727058410645,
      "learning_rate": 0.0003224464163868114,
      "loss": 3.2316,
      "step": 109709
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7445460557937622,
      "learning_rate": 0.00032244233728859765,
      "loss": 2.7893,
      "step": 109710
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.012340545654297,
      "learning_rate": 0.00032243825818621143,
      "loss": 2.7531,
      "step": 109711
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.387660264968872,
      "learning_rate": 0.0003224341790796534,
      "loss": 2.806,
      "step": 109712
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.1166574954986572,
      "learning_rate": 0.0003224300999689246,
      "loss": 3.0215,
      "step": 109713
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6660939455032349,
      "learning_rate": 0.0003224260208540255,
      "loss": 3.0325,
      "step": 109714
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9638468027114868,
      "learning_rate": 0.00032242194173495703,
      "loss": 2.9942,
      "step": 109715
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9057775735855103,
      "learning_rate": 0.0003224178626117199,
      "loss": 2.8714,
      "step": 109716
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.837314486503601,
      "learning_rate": 0.0003224137834843148,
      "loss": 2.8421,
      "step": 109717
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9165371656417847,
      "learning_rate": 0.0003224097043527427,
      "loss": 2.9347,
      "step": 109718
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.683814764022827,
      "learning_rate": 0.00032240562521700404,
      "loss": 3.2302,
      "step": 109719
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.949295163154602,
      "learning_rate": 0.00032240154607709987,
      "loss": 2.9621,
      "step": 109720
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.5824929475784302,
      "learning_rate": 0.0003223974669330308,
      "loss": 3.0626,
      "step": 109721
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6702344417572021,
      "learning_rate": 0.0003223933877847976,
      "loss": 3.1765,
      "step": 109722
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.470439910888672,
      "learning_rate": 0.00032238930863240107,
      "loss": 2.9972,
      "step": 109723
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.9350531101226807,
      "learning_rate": 0.00032238522947584193,
      "loss": 3.1119,
      "step": 109724
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6535158157348633,
      "learning_rate": 0.0003223811503151209,
      "loss": 3.1871,
      "step": 109725
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.5523464679718018,
      "learning_rate": 0.00032237707115023885,
      "loss": 3.0481,
      "step": 109726
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4625256061553955,
      "learning_rate": 0.0003223729919811966,
      "loss": 2.9953,
      "step": 109727
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.336120128631592,
      "learning_rate": 0.0003223689128079946,
      "loss": 2.9164,
      "step": 109728
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.277271270751953,
      "learning_rate": 0.0003223648336306339,
      "loss": 3.007,
      "step": 109729
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.186704158782959,
      "learning_rate": 0.0003223607544491151,
      "loss": 3.0761,
      "step": 109730
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6181234121322632,
      "learning_rate": 0.00032235667526343907,
      "loss": 3.1304,
      "step": 109731
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8947783708572388,
      "learning_rate": 0.0003223525960736064,
      "loss": 3.0618,
      "step": 109732
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6250441074371338,
      "learning_rate": 0.000322348516879618,
      "loss": 2.8645,
      "step": 109733
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6377308368682861,
      "learning_rate": 0.0003223444376814747,
      "loss": 3.1396,
      "step": 109734
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8852653503417969,
      "learning_rate": 0.00032234035847917707,
      "loss": 2.9617,
      "step": 109735
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7654153108596802,
      "learning_rate": 0.0003223362792727259,
      "loss": 2.9865,
      "step": 109736
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.59800124168396,
      "learning_rate": 0.0003223322000621221,
      "loss": 2.7602,
      "step": 109737
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6636992692947388,
      "learning_rate": 0.0003223281208473662,
      "loss": 2.956,
      "step": 109738
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0485422611236572,
      "learning_rate": 0.00032232404162845917,
      "loss": 2.9455,
      "step": 109739
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.668088436126709,
      "learning_rate": 0.0003223199624054017,
      "loss": 2.75,
      "step": 109740
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.4893690347671509,
      "learning_rate": 0.00032231588317819447,
      "loss": 3.0016,
      "step": 109741
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.04127836227417,
      "learning_rate": 0.0003223118039468383,
      "loss": 3.0004,
      "step": 109742
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.475003957748413,
      "learning_rate": 0.000322307724711334,
      "loss": 2.9822,
      "step": 109743
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.412976026535034,
      "learning_rate": 0.00032230364547168215,
      "loss": 2.7178,
      "step": 109744
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4751973152160645,
      "learning_rate": 0.0003222995662278837,
      "loss": 2.7977,
      "step": 109745
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.2965104579925537,
      "learning_rate": 0.0003222954869799394,
      "loss": 2.8042,
      "step": 109746
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7111979722976685,
      "learning_rate": 0.00032229140772784985,
      "loss": 3.118,
      "step": 109747
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2502171993255615,
      "learning_rate": 0.00032228732847161594,
      "loss": 2.9852,
      "step": 109748
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8483985662460327,
      "learning_rate": 0.00032228324921123843,
      "loss": 3.1538,
      "step": 109749
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.162142038345337,
      "learning_rate": 0.000322279169946718,
      "loss": 2.8851,
      "step": 109750
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8018207550048828,
      "learning_rate": 0.00032227509067805546,
      "loss": 3.1729,
      "step": 109751
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8528388738632202,
      "learning_rate": 0.0003222710114052516,
      "loss": 3.0313,
      "step": 109752
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6000381708145142,
      "learning_rate": 0.0003222669321283071,
      "loss": 3.3182,
      "step": 109753
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.5121997594833374,
      "learning_rate": 0.0003222628528472228,
      "loss": 3.0403,
      "step": 109754
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.127788543701172,
      "learning_rate": 0.00032225877356199936,
      "loss": 3.0697,
      "step": 109755
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.831459641456604,
      "learning_rate": 0.00032225469427263765,
      "loss": 2.8513,
      "step": 109756
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.5947961807250977,
      "learning_rate": 0.00032225061497913825,
      "loss": 3.3617,
      "step": 109757
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8201907873153687,
      "learning_rate": 0.0003222465356815022,
      "loss": 2.9017,
      "step": 109758
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.371877431869507,
      "learning_rate": 0.00032224245637973,
      "loss": 3.0039,
      "step": 109759
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2471930980682373,
      "learning_rate": 0.00032223837707382255,
      "loss": 2.9037,
      "step": 109760
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.073803424835205,
      "learning_rate": 0.00032223429776378056,
      "loss": 2.9641,
      "step": 109761
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.194059371948242,
      "learning_rate": 0.00032223021844960477,
      "loss": 2.9044,
      "step": 109762
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.090208053588867,
      "learning_rate": 0.000322226139131296,
      "loss": 2.8537,
      "step": 109763
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1317243576049805,
      "learning_rate": 0.000322222059808855,
      "loss": 3.0549,
      "step": 109764
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9091711044311523,
      "learning_rate": 0.0003222179804822824,
      "loss": 2.9396,
      "step": 109765
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8203312158584595,
      "learning_rate": 0.0003222139011515791,
      "loss": 2.752,
      "step": 109766
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.084456205368042,
      "learning_rate": 0.00032220982181674584,
      "loss": 2.9729,
      "step": 109767
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9963186979293823,
      "learning_rate": 0.00032220574247778335,
      "loss": 2.8612,
      "step": 109768
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.00102162361145,
      "learning_rate": 0.0003222016631346924,
      "loss": 3.1009,
      "step": 109769
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.081092357635498,
      "learning_rate": 0.00032219758378747374,
      "loss": 2.9619,
      "step": 109770
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7097400426864624,
      "learning_rate": 0.0003221935044361281,
      "loss": 2.9729,
      "step": 109771
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.0631351470947266,
      "learning_rate": 0.0003221894250806562,
      "loss": 2.7281,
      "step": 109772
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.805442214012146,
      "learning_rate": 0.000322185345721059,
      "loss": 3.0095,
      "step": 109773
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7712494134902954,
      "learning_rate": 0.00032218126635733706,
      "loss": 3.127,
      "step": 109774
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1475441455841064,
      "learning_rate": 0.0003221771869894912,
      "loss": 2.5734,
      "step": 109775
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2062172889709473,
      "learning_rate": 0.00032217310761752224,
      "loss": 2.8115,
      "step": 109776
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4187934398651123,
      "learning_rate": 0.00032216902824143087,
      "loss": 2.9937,
      "step": 109777
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9392346143722534,
      "learning_rate": 0.00032216494886121773,
      "loss": 3.0009,
      "step": 109778
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7417489290237427,
      "learning_rate": 0.0003221608694768839,
      "loss": 2.9547,
      "step": 109779
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7358579635620117,
      "learning_rate": 0.00032215679008842983,
      "loss": 3.0575,
      "step": 109780
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.439786195755005,
      "learning_rate": 0.00032215271069585636,
      "loss": 2.9592,
      "step": 109781
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.250880002975464,
      "learning_rate": 0.00032214863129916444,
      "loss": 2.9604,
      "step": 109782
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1169683933258057,
      "learning_rate": 0.00032214455189835456,
      "loss": 3.2098,
      "step": 109783
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.062481641769409,
      "learning_rate": 0.0003221404724934276,
      "loss": 3.3233,
      "step": 109784
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.778498649597168,
      "learning_rate": 0.00032213639308438437,
      "loss": 3.1155,
      "step": 109785
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8115140199661255,
      "learning_rate": 0.0003221323136712254,
      "loss": 2.9843,
      "step": 109786
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0368497371673584,
      "learning_rate": 0.00032212823425395177,
      "loss": 2.8951,
      "step": 109787
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0778756141662598,
      "learning_rate": 0.00032212415483256405,
      "loss": 3.1431,
      "step": 109788
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.767653226852417,
      "learning_rate": 0.000322120075407063,
      "loss": 3.0249,
      "step": 109789
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4475629329681396,
      "learning_rate": 0.00032211599597744943,
      "loss": 2.9035,
      "step": 109790
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.357283592224121,
      "learning_rate": 0.00032211191654372403,
      "loss": 2.7147,
      "step": 109791
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0164389610290527,
      "learning_rate": 0.0003221078371058877,
      "loss": 2.9751,
      "step": 109792
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6949352025985718,
      "learning_rate": 0.0003221037576639411,
      "loss": 3.1069,
      "step": 109793
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8773821592330933,
      "learning_rate": 0.00032209967821788486,
      "loss": 2.944,
      "step": 109794
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.8535401821136475,
      "learning_rate": 0.00032209559876772,
      "loss": 3.103,
      "step": 109795
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1126043796539307,
      "learning_rate": 0.0003220915193134471,
      "loss": 2.9804,
      "step": 109796
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0369036197662354,
      "learning_rate": 0.000322087439855067,
      "loss": 3.0407,
      "step": 109797
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7306185960769653,
      "learning_rate": 0.0003220833603925804,
      "loss": 3.0114,
      "step": 109798
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3966405391693115,
      "learning_rate": 0.0003220792809259881,
      "loss": 2.8923,
      "step": 109799
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1312737464904785,
      "learning_rate": 0.0003220752014552909,
      "loss": 2.8356,
      "step": 109800
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8219168186187744,
      "learning_rate": 0.0003220711219804894,
      "loss": 3.1572,
      "step": 109801
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.2962913513183594,
      "learning_rate": 0.0003220670425015845,
      "loss": 3.0248,
      "step": 109802
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.6763834953308105,
      "learning_rate": 0.00032206296301857696,
      "loss": 2.8236,
      "step": 109803
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6593540906906128,
      "learning_rate": 0.00032205888353146746,
      "loss": 3.1696,
      "step": 109804
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.056903600692749,
      "learning_rate": 0.0003220548040402568,
      "loss": 2.9397,
      "step": 109805
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.142143964767456,
      "learning_rate": 0.0003220507245449458,
      "loss": 3.115,
      "step": 109806
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.0346693992614746,
      "learning_rate": 0.00032204664504553504,
      "loss": 2.9923,
      "step": 109807
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.307901620864868,
      "learning_rate": 0.00032204256554202546,
      "loss": 3.0046,
      "step": 109808
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4601473808288574,
      "learning_rate": 0.0003220384860344177,
      "loss": 2.997,
      "step": 109809
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.866969108581543,
      "learning_rate": 0.00032203440652271264,
      "loss": 3.0476,
      "step": 109810
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.6899240016937256,
      "learning_rate": 0.000322030327006911,
      "loss": 3.2781,
      "step": 109811
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.6128358840942383,
      "learning_rate": 0.0003220262474870134,
      "loss": 2.7156,
      "step": 109812
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.167670965194702,
      "learning_rate": 0.00032202216796302076,
      "loss": 3.0125,
      "step": 109813
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1182382106781006,
      "learning_rate": 0.0003220180884349338,
      "loss": 3.054,
      "step": 109814
    },
    {
      "epoch": 1.43,
      "grad_norm": 4.020155429840088,
      "learning_rate": 0.0003220140089027532,
      "loss": 2.8967,
      "step": 109815
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.535972833633423,
      "learning_rate": 0.0003220099293664798,
      "loss": 2.8962,
      "step": 109816
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0608761310577393,
      "learning_rate": 0.0003220058498261144,
      "loss": 3.0377,
      "step": 109817
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.8142807483673096,
      "learning_rate": 0.0003220017702816576,
      "loss": 2.7903,
      "step": 109818
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.269378423690796,
      "learning_rate": 0.0003219976907331104,
      "loss": 3.0241,
      "step": 109819
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.19939923286438,
      "learning_rate": 0.0003219936111804733,
      "loss": 3.1086,
      "step": 109820
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7866387367248535,
      "learning_rate": 0.0003219895316237471,
      "loss": 2.8228,
      "step": 109821
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.265157461166382,
      "learning_rate": 0.0003219854520629328,
      "loss": 3.1783,
      "step": 109822
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.5525360107421875,
      "learning_rate": 0.0003219813724980309,
      "loss": 3.0625,
      "step": 109823
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0246806144714355,
      "learning_rate": 0.0003219772929290422,
      "loss": 3.0087,
      "step": 109824
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1726558208465576,
      "learning_rate": 0.0003219732133559677,
      "loss": 2.8633,
      "step": 109825
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.528141498565674,
      "learning_rate": 0.00032196913377880777,
      "loss": 3.0052,
      "step": 109826
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.858378529548645,
      "learning_rate": 0.00032196505419756344,
      "loss": 3.0555,
      "step": 109827
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3719451427459717,
      "learning_rate": 0.0003219609746122354,
      "loss": 2.9622,
      "step": 109828
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2161753177642822,
      "learning_rate": 0.00032195689502282433,
      "loss": 2.913,
      "step": 109829
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.963281273841858,
      "learning_rate": 0.00032195281542933107,
      "loss": 2.9213,
      "step": 109830
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.097839593887329,
      "learning_rate": 0.0003219487358317565,
      "loss": 2.827,
      "step": 109831
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.7207655906677246,
      "learning_rate": 0.0003219446562301011,
      "loss": 2.9708,
      "step": 109832
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1145639419555664,
      "learning_rate": 0.0003219405766243658,
      "loss": 2.9279,
      "step": 109833
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.47525691986084,
      "learning_rate": 0.00032193649701455147,
      "loss": 3.0038,
      "step": 109834
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.4822335243225098,
      "learning_rate": 0.00032193241740065856,
      "loss": 2.9254,
      "step": 109835
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.1439130306243896,
      "learning_rate": 0.000321928337782688,
      "loss": 3.0363,
      "step": 109836
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.5780717134475708,
      "learning_rate": 0.00032192425816064067,
      "loss": 2.7957,
      "step": 109837
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.5572257041931152,
      "learning_rate": 0.00032192017853451715,
      "loss": 2.8572,
      "step": 109838
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.6854259967803955,
      "learning_rate": 0.0003219160989043182,
      "loss": 2.8854,
      "step": 109839
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8852369785308838,
      "learning_rate": 0.00032191201927004475,
      "loss": 3.0037,
      "step": 109840
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.700070381164551,
      "learning_rate": 0.00032190793963169734,
      "loss": 2.9749,
      "step": 109841
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9619699716567993,
      "learning_rate": 0.00032190385998927685,
      "loss": 2.8055,
      "step": 109842
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.5082321166992188,
      "learning_rate": 0.0003218997803427841,
      "loss": 2.9177,
      "step": 109843
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.248408317565918,
      "learning_rate": 0.0003218957006922196,
      "loss": 3.0424,
      "step": 109844
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.15478253364563,
      "learning_rate": 0.0003218916210375844,
      "loss": 3.1215,
      "step": 109845
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.8728034496307373,
      "learning_rate": 0.00032188754137887915,
      "loss": 2.8423,
      "step": 109846
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.0784075260162354,
      "learning_rate": 0.00032188346171610455,
      "loss": 3.0589,
      "step": 109847
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0547077655792236,
      "learning_rate": 0.0003218793820492614,
      "loss": 2.957,
      "step": 109848
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8583300113677979,
      "learning_rate": 0.0003218753023783505,
      "loss": 3.0052,
      "step": 109849
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.5298430919647217,
      "learning_rate": 0.0003218712227033725,
      "loss": 3.0479,
      "step": 109850
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.574519634246826,
      "learning_rate": 0.00032186714302432826,
      "loss": 2.774,
      "step": 109851
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.38810396194458,
      "learning_rate": 0.00032186306334121853,
      "loss": 2.8848,
      "step": 109852
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.030503511428833,
      "learning_rate": 0.00032185898365404397,
      "loss": 2.7799,
      "step": 109853
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9838569164276123,
      "learning_rate": 0.0003218549039628055,
      "loss": 2.6294,
      "step": 109854
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7152471542358398,
      "learning_rate": 0.00032185082426750374,
      "loss": 3.0318,
      "step": 109855
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.114440679550171,
      "learning_rate": 0.0003218467445681395,
      "loss": 3.0179,
      "step": 109856
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.867327094078064,
      "learning_rate": 0.00032184266486471355,
      "loss": 2.9221,
      "step": 109857
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9710516929626465,
      "learning_rate": 0.0003218385851572266,
      "loss": 2.7174,
      "step": 109858
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2535319328308105,
      "learning_rate": 0.0003218345054456795,
      "loss": 2.8191,
      "step": 109859
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.5315263271331787,
      "learning_rate": 0.00032183042573007294,
      "loss": 2.9159,
      "step": 109860
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.0120646953582764,
      "learning_rate": 0.0003218263460104077,
      "loss": 2.8735,
      "step": 109861
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8284896612167358,
      "learning_rate": 0.0003218222662866845,
      "loss": 2.9466,
      "step": 109862
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.470982313156128,
      "learning_rate": 0.00032181818655890415,
      "loss": 2.9872,
      "step": 109863
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.3078083992004395,
      "learning_rate": 0.00032181410682706734,
      "loss": 2.884,
      "step": 109864
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.283932685852051,
      "learning_rate": 0.000321810027091175,
      "loss": 2.9955,
      "step": 109865
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1497559547424316,
      "learning_rate": 0.0003218059473512276,
      "loss": 3.1654,
      "step": 109866
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.321054220199585,
      "learning_rate": 0.00032180186760722617,
      "loss": 3.0426,
      "step": 109867
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.6665821075439453,
      "learning_rate": 0.00032179778785917137,
      "loss": 2.9006,
      "step": 109868
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.6159045696258545,
      "learning_rate": 0.0003217937081070639,
      "loss": 2.9438,
      "step": 109869
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.968245506286621,
      "learning_rate": 0.0003217896283509045,
      "loss": 2.9406,
      "step": 109870
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.5258275270462036,
      "learning_rate": 0.0003217855485906942,
      "loss": 3.1137,
      "step": 109871
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.647188663482666,
      "learning_rate": 0.0003217814688264334,
      "loss": 2.8839,
      "step": 109872
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.657914876937866,
      "learning_rate": 0.000321777389058123,
      "loss": 3.2725,
      "step": 109873
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8290592432022095,
      "learning_rate": 0.0003217733092857639,
      "loss": 2.8102,
      "step": 109874
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6840534210205078,
      "learning_rate": 0.00032176922950935663,
      "loss": 2.9209,
      "step": 109875
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3227946758270264,
      "learning_rate": 0.000321765149728902,
      "loss": 2.9288,
      "step": 109876
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0745391845703125,
      "learning_rate": 0.00032176106994440096,
      "loss": 2.8427,
      "step": 109877
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.9544131755828857,
      "learning_rate": 0.00032175699015585404,
      "loss": 2.997,
      "step": 109878
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8290036916732788,
      "learning_rate": 0.0003217529103632621,
      "loss": 2.9652,
      "step": 109879
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0699045658111572,
      "learning_rate": 0.0003217488305666259,
      "loss": 3.1099,
      "step": 109880
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.7628018856048584,
      "learning_rate": 0.00032174475076594616,
      "loss": 2.8831,
      "step": 109881
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.302253246307373,
      "learning_rate": 0.0003217406709612237,
      "loss": 3.0447,
      "step": 109882
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.035141944885254,
      "learning_rate": 0.0003217365911524591,
      "loss": 2.8545,
      "step": 109883
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.7314863204956055,
      "learning_rate": 0.0003217325113396535,
      "loss": 3.0003,
      "step": 109884
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.018786907196045,
      "learning_rate": 0.0003217284315228072,
      "loss": 2.9235,
      "step": 109885
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.042661428451538,
      "learning_rate": 0.00032172435170192123,
      "loss": 3.0171,
      "step": 109886
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7098301649093628,
      "learning_rate": 0.00032172027187699634,
      "loss": 3.1147,
      "step": 109887
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8317543268203735,
      "learning_rate": 0.00032171619204803323,
      "loss": 3.1559,
      "step": 109888
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.561582326889038,
      "learning_rate": 0.00032171211221503266,
      "loss": 3.0613,
      "step": 109889
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9934278726577759,
      "learning_rate": 0.0003217080323779954,
      "loss": 2.7844,
      "step": 109890
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7089347839355469,
      "learning_rate": 0.00032170395253692223,
      "loss": 3.0225,
      "step": 109891
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4172654151916504,
      "learning_rate": 0.0003216998726918138,
      "loss": 2.8217,
      "step": 109892
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.247906446456909,
      "learning_rate": 0.000321695792842671,
      "loss": 2.8768,
      "step": 109893
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0547549724578857,
      "learning_rate": 0.0003216917129894946,
      "loss": 2.9767,
      "step": 109894
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.674575924873352,
      "learning_rate": 0.00032168763313228523,
      "loss": 3.0412,
      "step": 109895
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.884290099143982,
      "learning_rate": 0.0003216835532710437,
      "loss": 3.1105,
      "step": 109896
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0472941398620605,
      "learning_rate": 0.0003216794734057708,
      "loss": 3.1673,
      "step": 109897
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1743929386138916,
      "learning_rate": 0.0003216753935364674,
      "loss": 3.0423,
      "step": 109898
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.009549617767334,
      "learning_rate": 0.00032167131366313403,
      "loss": 2.8939,
      "step": 109899
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7325270175933838,
      "learning_rate": 0.0003216672337857716,
      "loss": 2.9158,
      "step": 109900
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7446434497833252,
      "learning_rate": 0.0003216631539043808,
      "loss": 2.8642,
      "step": 109901
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.481442451477051,
      "learning_rate": 0.0003216590740189623,
      "loss": 2.8104,
      "step": 109902
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2630255222320557,
      "learning_rate": 0.00032165499412951714,
      "loss": 3.1527,
      "step": 109903
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9130979776382446,
      "learning_rate": 0.00032165091423604585,
      "loss": 2.8741,
      "step": 109904
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0497589111328125,
      "learning_rate": 0.00032164683433854923,
      "loss": 2.9873,
      "step": 109905
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.8924262523651123,
      "learning_rate": 0.00032164275443702806,
      "loss": 3.0322,
      "step": 109906
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.287250518798828,
      "learning_rate": 0.0003216386745314831,
      "loss": 2.992,
      "step": 109907
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3651864528656006,
      "learning_rate": 0.00032163459462191513,
      "loss": 3.1673,
      "step": 109908
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.703101634979248,
      "learning_rate": 0.00032163051470832484,
      "loss": 3.0293,
      "step": 109909
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.1280629634857178,
      "learning_rate": 0.0003216264347907131,
      "loss": 2.8686,
      "step": 109910
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.091961145401001,
      "learning_rate": 0.00032162235486908044,
      "loss": 3.2562,
      "step": 109911
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.5963799953460693,
      "learning_rate": 0.00032161827494342786,
      "loss": 3.1892,
      "step": 109912
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6410125494003296,
      "learning_rate": 0.0003216141950137561,
      "loss": 2.9516,
      "step": 109913
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.2133922576904297,
      "learning_rate": 0.0003216101150800657,
      "loss": 3.0403,
      "step": 109914
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8014593124389648,
      "learning_rate": 0.0003216060351423577,
      "loss": 2.9686,
      "step": 109915
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3259613513946533,
      "learning_rate": 0.0003216019552006327,
      "loss": 2.7657,
      "step": 109916
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.809868335723877,
      "learning_rate": 0.00032159787525489153,
      "loss": 2.9393,
      "step": 109917
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7802499532699585,
      "learning_rate": 0.0003215937953051348,
      "loss": 2.9096,
      "step": 109918
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0196807384490967,
      "learning_rate": 0.00032158971535136354,
      "loss": 3.0568,
      "step": 109919
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0718255043029785,
      "learning_rate": 0.0003215856353935782,
      "loss": 2.7223,
      "step": 109920
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7343064546585083,
      "learning_rate": 0.0003215815554317797,
      "loss": 3.0399,
      "step": 109921
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.7992255687713623,
      "learning_rate": 0.0003215774754659688,
      "loss": 2.8712,
      "step": 109922
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.5016608238220215,
      "learning_rate": 0.0003215733954961463,
      "loss": 3.0965,
      "step": 109923
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7405716180801392,
      "learning_rate": 0.0003215693155223128,
      "loss": 3.0478,
      "step": 109924
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7175029516220093,
      "learning_rate": 0.0003215652355444693,
      "loss": 2.871,
      "step": 109925
    },
    {
      "epoch": 1.43,
      "grad_norm": 4.2480316162109375,
      "learning_rate": 0.0003215611555626163,
      "loss": 2.9622,
      "step": 109926
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.6805524826049805,
      "learning_rate": 0.00032155707557675464,
      "loss": 3.0618,
      "step": 109927
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.799474835395813,
      "learning_rate": 0.0003215529955868852,
      "loss": 3.1474,
      "step": 109928
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9823187589645386,
      "learning_rate": 0.0003215489155930086,
      "loss": 2.9623,
      "step": 109929
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.5941474437713623,
      "learning_rate": 0.00032154483559512564,
      "loss": 2.9259,
      "step": 109930
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9793524742126465,
      "learning_rate": 0.0003215407555932371,
      "loss": 2.7499,
      "step": 109931
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.117769956588745,
      "learning_rate": 0.00032153667558734386,
      "loss": 2.8195,
      "step": 109932
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0212817192077637,
      "learning_rate": 0.0003215325955774464,
      "loss": 2.9254,
      "step": 109933
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8652235269546509,
      "learning_rate": 0.0003215285155635456,
      "loss": 3.2015,
      "step": 109934
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8838294744491577,
      "learning_rate": 0.0003215244355456423,
      "loss": 3.024,
      "step": 109935
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.232654094696045,
      "learning_rate": 0.0003215203555237372,
      "loss": 3.0872,
      "step": 109936
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9788336753845215,
      "learning_rate": 0.0003215162754978311,
      "loss": 2.9501,
      "step": 109937
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.426705837249756,
      "learning_rate": 0.00032151219546792465,
      "loss": 2.9922,
      "step": 109938
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.224707841873169,
      "learning_rate": 0.00032150811543401865,
      "loss": 2.9894,
      "step": 109939
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6533311605453491,
      "learning_rate": 0.00032150403539611395,
      "loss": 2.9644,
      "step": 109940
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6436351537704468,
      "learning_rate": 0.00032149995535421126,
      "loss": 3.2505,
      "step": 109941
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7441515922546387,
      "learning_rate": 0.00032149587530831133,
      "loss": 2.9405,
      "step": 109942
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7984627485275269,
      "learning_rate": 0.0003214917952584148,
      "loss": 2.9537,
      "step": 109943
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7673466205596924,
      "learning_rate": 0.00032148771520452274,
      "loss": 2.8631,
      "step": 109944
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.5381494760513306,
      "learning_rate": 0.0003214836351466355,
      "loss": 3.0593,
      "step": 109945
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9604741334915161,
      "learning_rate": 0.0003214795550847541,
      "loss": 2.8417,
      "step": 109946
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0164952278137207,
      "learning_rate": 0.0003214754750188793,
      "loss": 2.8871,
      "step": 109947
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8084605932235718,
      "learning_rate": 0.0003214713949490118,
      "loss": 2.7093,
      "step": 109948
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.290353298187256,
      "learning_rate": 0.00032146731487515235,
      "loss": 3.0136,
      "step": 109949
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.222289562225342,
      "learning_rate": 0.0003214632347973017,
      "loss": 2.9387,
      "step": 109950
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1541545391082764,
      "learning_rate": 0.0003214591547154606,
      "loss": 3.1712,
      "step": 109951
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.5478267669677734,
      "learning_rate": 0.0003214550746296299,
      "loss": 2.9799,
      "step": 109952
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7694429159164429,
      "learning_rate": 0.0003214509945398103,
      "loss": 3.0945,
      "step": 109953
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6516207456588745,
      "learning_rate": 0.00032144691444600256,
      "loss": 2.9707,
      "step": 109954
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6403735876083374,
      "learning_rate": 0.00032144283434820735,
      "loss": 2.9478,
      "step": 109955
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.306903839111328,
      "learning_rate": 0.00032143875424642563,
      "loss": 2.9399,
      "step": 109956
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6881386041641235,
      "learning_rate": 0.00032143467414065796,
      "loss": 2.9454,
      "step": 109957
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9652049541473389,
      "learning_rate": 0.00032143059403090513,
      "loss": 2.9005,
      "step": 109958
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8495548963546753,
      "learning_rate": 0.0003214265139171681,
      "loss": 2.9102,
      "step": 109959
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.5654042959213257,
      "learning_rate": 0.0003214224337994474,
      "loss": 3.0421,
      "step": 109960
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.127653121948242,
      "learning_rate": 0.00032141835367774385,
      "loss": 3.077,
      "step": 109961
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9918383359909058,
      "learning_rate": 0.00032141427355205824,
      "loss": 3.0944,
      "step": 109962
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8704332113265991,
      "learning_rate": 0.0003214101934223912,
      "loss": 3.0323,
      "step": 109963
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0606627464294434,
      "learning_rate": 0.0003214061132887438,
      "loss": 3.0634,
      "step": 109964
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.954856276512146,
      "learning_rate": 0.0003214020331511165,
      "loss": 2.8791,
      "step": 109965
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9444026947021484,
      "learning_rate": 0.0003213979530095102,
      "loss": 3.0484,
      "step": 109966
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.2067880630493164,
      "learning_rate": 0.0003213938728639255,
      "loss": 2.8583,
      "step": 109967
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.5104808807373047,
      "learning_rate": 0.00032138979271436344,
      "loss": 2.8547,
      "step": 109968
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7889275550842285,
      "learning_rate": 0.00032138571256082446,
      "loss": 2.9377,
      "step": 109969
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7939488887786865,
      "learning_rate": 0.00032138163240330955,
      "loss": 2.952,
      "step": 109970
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.849851131439209,
      "learning_rate": 0.00032137755224181935,
      "loss": 3.0786,
      "step": 109971
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0228323936462402,
      "learning_rate": 0.0003213734720763547,
      "loss": 3.1317,
      "step": 109972
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6590269804000854,
      "learning_rate": 0.0003213693919069163,
      "loss": 2.8428,
      "step": 109973
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.4314706325531006,
      "learning_rate": 0.00032136531173350495,
      "loss": 3.1025,
      "step": 109974
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7594325542449951,
      "learning_rate": 0.0003213612315561213,
      "loss": 2.7864,
      "step": 109975
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.985127329826355,
      "learning_rate": 0.0003213571513747663,
      "loss": 2.7742,
      "step": 109976
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0543835163116455,
      "learning_rate": 0.0003213530711894406,
      "loss": 3.0287,
      "step": 109977
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0097734928131104,
      "learning_rate": 0.0003213489910001449,
      "loss": 2.8328,
      "step": 109978
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8517965078353882,
      "learning_rate": 0.0003213449108068801,
      "loss": 2.8981,
      "step": 109979
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.2652018070220947,
      "learning_rate": 0.0003213408306096468,
      "loss": 2.9419,
      "step": 109980
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.858711838722229,
      "learning_rate": 0.00032133675040844586,
      "loss": 3.0123,
      "step": 109981
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.985485553741455,
      "learning_rate": 0.000321332670203278,
      "loss": 2.8851,
      "step": 109982
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.114999532699585,
      "learning_rate": 0.000321328589994144,
      "loss": 3.2357,
      "step": 109983
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1916730403900146,
      "learning_rate": 0.00032132450978104464,
      "loss": 3.0357,
      "step": 109984
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1576433181762695,
      "learning_rate": 0.0003213204295639806,
      "loss": 3.3651,
      "step": 109985
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.241770029067993,
      "learning_rate": 0.00032131634934295277,
      "loss": 2.8225,
      "step": 109986
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0376980304718018,
      "learning_rate": 0.0003213122691179618,
      "loss": 2.8579,
      "step": 109987
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.28539776802063,
      "learning_rate": 0.00032130818888900835,
      "loss": 2.8209,
      "step": 109988
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6075929403305054,
      "learning_rate": 0.0003213041086560935,
      "loss": 3.049,
      "step": 109989
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.656526565551758,
      "learning_rate": 0.0003213000284192177,
      "loss": 2.944,
      "step": 109990
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.6507480144500732,
      "learning_rate": 0.00032129594817838187,
      "loss": 3.0834,
      "step": 109991
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8196810483932495,
      "learning_rate": 0.0003212918679335867,
      "loss": 2.8522,
      "step": 109992
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8527190685272217,
      "learning_rate": 0.00032128778768483296,
      "loss": 3.0447,
      "step": 109993
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3791303634643555,
      "learning_rate": 0.00032128370743212146,
      "loss": 2.8975,
      "step": 109994
    },
    {
      "epoch": 1.43,
      "grad_norm": 4.846926689147949,
      "learning_rate": 0.0003212796271754529,
      "loss": 3.1759,
      "step": 109995
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.574213743209839,
      "learning_rate": 0.00032127554691482803,
      "loss": 3.0027,
      "step": 109996
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1208674907684326,
      "learning_rate": 0.0003212714666502476,
      "loss": 2.9101,
      "step": 109997
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9625282287597656,
      "learning_rate": 0.00032126738638171246,
      "loss": 3.0012,
      "step": 109998
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.628488302230835,
      "learning_rate": 0.00032126330610922333,
      "loss": 3.0389,
      "step": 109999
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.405795097351074,
      "learning_rate": 0.0003212592258327809,
      "loss": 2.8268,
      "step": 110000
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1956231594085693,
      "learning_rate": 0.000321255145552386,
      "loss": 3.0391,
      "step": 110001
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7820398807525635,
      "learning_rate": 0.0003212510652680394,
      "loss": 2.9344,
      "step": 110002
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2441065311431885,
      "learning_rate": 0.0003212469849797418,
      "loss": 2.9748,
      "step": 110003
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.408778429031372,
      "learning_rate": 0.000321242904687494,
      "loss": 3.1464,
      "step": 110004
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9808681011199951,
      "learning_rate": 0.0003212388243912967,
      "loss": 2.9673,
      "step": 110005
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.020703077316284,
      "learning_rate": 0.00032123474409115067,
      "loss": 2.9566,
      "step": 110006
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.637061595916748,
      "learning_rate": 0.0003212306637870568,
      "loss": 3.1707,
      "step": 110007
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9754538536071777,
      "learning_rate": 0.00032122658347901573,
      "loss": 3.1933,
      "step": 110008
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8694123029708862,
      "learning_rate": 0.00032122250316702814,
      "loss": 3.0459,
      "step": 110009
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.9816343784332275,
      "learning_rate": 0.00032121842285109497,
      "loss": 3.0501,
      "step": 110010
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3159098625183105,
      "learning_rate": 0.00032121434253121694,
      "loss": 2.6963,
      "step": 110011
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8414835929870605,
      "learning_rate": 0.0003212102622073947,
      "loss": 3.0153,
      "step": 110012
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.145392417907715,
      "learning_rate": 0.000321206181879629,
      "loss": 3.068,
      "step": 110013
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3763463497161865,
      "learning_rate": 0.00032120210154792077,
      "loss": 3.0164,
      "step": 110014
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.723392128944397,
      "learning_rate": 0.0003211980212122707,
      "loss": 2.7762,
      "step": 110015
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.5735561847686768,
      "learning_rate": 0.0003211939408726794,
      "loss": 2.9988,
      "step": 110016
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6269944906234741,
      "learning_rate": 0.0003211898605291479,
      "loss": 2.9434,
      "step": 110017
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.688376784324646,
      "learning_rate": 0.0003211857801816767,
      "loss": 2.9619,
      "step": 110018
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.5054824352264404,
      "learning_rate": 0.0003211816998302666,
      "loss": 2.7409,
      "step": 110019
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.023728609085083,
      "learning_rate": 0.0003211776194749186,
      "loss": 2.8086,
      "step": 110020
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.378937005996704,
      "learning_rate": 0.0003211735391156331,
      "loss": 2.8724,
      "step": 110021
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0616676807403564,
      "learning_rate": 0.0003211694587524111,
      "loss": 3.0714,
      "step": 110022
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0493431091308594,
      "learning_rate": 0.0003211653783852534,
      "loss": 3.0989,
      "step": 110023
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1897685527801514,
      "learning_rate": 0.00032116129801416053,
      "loss": 3.2496,
      "step": 110024
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8608019351959229,
      "learning_rate": 0.0003211572176391334,
      "loss": 3.013,
      "step": 110025
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.3438849449157715,
      "learning_rate": 0.0003211531372601728,
      "loss": 3.0145,
      "step": 110026
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.13398814201355,
      "learning_rate": 0.0003211490568772794,
      "loss": 2.8776,
      "step": 110027
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8633527755737305,
      "learning_rate": 0.00032114497649045397,
      "loss": 3.1444,
      "step": 110028
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.226180076599121,
      "learning_rate": 0.00032114089609969735,
      "loss": 3.0962,
      "step": 110029
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8265292644500732,
      "learning_rate": 0.0003211368157050102,
      "loss": 3.1906,
      "step": 110030
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.155454635620117,
      "learning_rate": 0.0003211327353063932,
      "loss": 3.1818,
      "step": 110031
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8767197132110596,
      "learning_rate": 0.00032112865490384747,
      "loss": 2.9245,
      "step": 110032
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.6845295429229736,
      "learning_rate": 0.00032112457449737334,
      "loss": 3.1309,
      "step": 110033
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1718239784240723,
      "learning_rate": 0.0003211204940869718,
      "loss": 2.9999,
      "step": 110034
    },
    {
      "epoch": 1.43,
      "grad_norm": 4.074697971343994,
      "learning_rate": 0.00032111641367264365,
      "loss": 2.8943,
      "step": 110035
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.6859185695648193,
      "learning_rate": 0.0003211123332543894,
      "loss": 3.1503,
      "step": 110036
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6499260663986206,
      "learning_rate": 0.00032110825283221,
      "loss": 3.0489,
      "step": 110037
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4427711963653564,
      "learning_rate": 0.00032110417240610627,
      "loss": 2.9964,
      "step": 110038
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2220852375030518,
      "learning_rate": 0.0003211000919760789,
      "loss": 2.8299,
      "step": 110039
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.06705904006958,
      "learning_rate": 0.00032109601154212846,
      "loss": 2.9746,
      "step": 110040
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.950756549835205,
      "learning_rate": 0.000321091931104256,
      "loss": 3.0376,
      "step": 110041
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8245035409927368,
      "learning_rate": 0.0003210878506624621,
      "loss": 2.9693,
      "step": 110042
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9208766222000122,
      "learning_rate": 0.0003210837702167476,
      "loss": 3.4207,
      "step": 110043
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.7568655014038086,
      "learning_rate": 0.00032107968976711327,
      "loss": 3.085,
      "step": 110044
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.177262783050537,
      "learning_rate": 0.00032107560931355977,
      "loss": 2.9656,
      "step": 110045
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8918116092681885,
      "learning_rate": 0.00032107152885608786,
      "loss": 2.8277,
      "step": 110046
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.8075759410858154,
      "learning_rate": 0.00032106744839469845,
      "loss": 2.8295,
      "step": 110047
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.747879981994629,
      "learning_rate": 0.00032106336792939215,
      "loss": 2.7338,
      "step": 110048
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2050106525421143,
      "learning_rate": 0.00032105928746016977,
      "loss": 2.9493,
      "step": 110049
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4026756286621094,
      "learning_rate": 0.0003210552069870321,
      "loss": 3.0884,
      "step": 110050
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9030495882034302,
      "learning_rate": 0.0003210511265099799,
      "loss": 2.953,
      "step": 110051
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.9264402389526367,
      "learning_rate": 0.00032104704602901386,
      "loss": 2.9085,
      "step": 110052
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.65833580493927,
      "learning_rate": 0.00032104296554413483,
      "loss": 2.8211,
      "step": 110053
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7376877069473267,
      "learning_rate": 0.00032103888505534347,
      "loss": 3.1184,
      "step": 110054
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6850900650024414,
      "learning_rate": 0.0003210348045626405,
      "loss": 2.8407,
      "step": 110055
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8047178983688354,
      "learning_rate": 0.00032103072406602684,
      "loss": 2.8247,
      "step": 110056
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.994461178779602,
      "learning_rate": 0.00032102664356550316,
      "loss": 2.965,
      "step": 110057
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.5533246994018555,
      "learning_rate": 0.0003210225630610702,
      "loss": 2.9768,
      "step": 110058
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9813686609268188,
      "learning_rate": 0.0003210184825527288,
      "loss": 3.0205,
      "step": 110059
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.2553577423095703,
      "learning_rate": 0.00032101440204047974,
      "loss": 2.9503,
      "step": 110060
    },
    {
      "epoch": 1.43,
      "grad_norm": 4.29147481918335,
      "learning_rate": 0.0003210103215243236,
      "loss": 2.925,
      "step": 110061
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.542755126953125,
      "learning_rate": 0.00032100624100426123,
      "loss": 3.3168,
      "step": 110062
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.586723804473877,
      "learning_rate": 0.00032100216048029347,
      "loss": 3.2209,
      "step": 110063
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.97417151927948,
      "learning_rate": 0.000320998079952421,
      "loss": 2.9613,
      "step": 110064
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.414029836654663,
      "learning_rate": 0.0003209939994206446,
      "loss": 2.7845,
      "step": 110065
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.740272879600525,
      "learning_rate": 0.0003209899188849649,
      "loss": 2.9348,
      "step": 110066
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.238117218017578,
      "learning_rate": 0.0003209858383453829,
      "loss": 3.3872,
      "step": 110067
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.7384822368621826,
      "learning_rate": 0.0003209817578018992,
      "loss": 2.9829,
      "step": 110068
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7373265027999878,
      "learning_rate": 0.00032097767725451464,
      "loss": 3.1043,
      "step": 110069
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9036492109298706,
      "learning_rate": 0.00032097359670322985,
      "loss": 3.123,
      "step": 110070
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6866642236709595,
      "learning_rate": 0.00032096951614804574,
      "loss": 3.1411,
      "step": 110071
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8655493259429932,
      "learning_rate": 0.000320965435588963,
      "loss": 2.9439,
      "step": 110072
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.640457034111023,
      "learning_rate": 0.00032096135502598233,
      "loss": 3.2054,
      "step": 110073
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.649266481399536,
      "learning_rate": 0.0003209572744591046,
      "loss": 2.8468,
      "step": 110074
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0497589111328125,
      "learning_rate": 0.00032095319388833054,
      "loss": 3.4022,
      "step": 110075
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2461438179016113,
      "learning_rate": 0.00032094911331366087,
      "loss": 3.0067,
      "step": 110076
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.410618782043457,
      "learning_rate": 0.00032094503273509626,
      "loss": 3.0734,
      "step": 110077
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9424256086349487,
      "learning_rate": 0.00032094095215263774,
      "loss": 3.1444,
      "step": 110078
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.720970869064331,
      "learning_rate": 0.0003209368715662858,
      "loss": 2.9438,
      "step": 110079
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9711731672286987,
      "learning_rate": 0.0003209327909760413,
      "loss": 2.8649,
      "step": 110080
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9478662014007568,
      "learning_rate": 0.0003209287103819051,
      "loss": 3.0662,
      "step": 110081
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.5648176670074463,
      "learning_rate": 0.00032092462978387776,
      "loss": 2.9658,
      "step": 110082
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6062452793121338,
      "learning_rate": 0.0003209205491819601,
      "loss": 3.1171,
      "step": 110083
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.435288429260254,
      "learning_rate": 0.000320916468576153,
      "loss": 2.9755,
      "step": 110084
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.915208101272583,
      "learning_rate": 0.0003209123879664571,
      "loss": 2.8038,
      "step": 110085
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0767014026641846,
      "learning_rate": 0.00032090830735287314,
      "loss": 2.9815,
      "step": 110086
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6976653337478638,
      "learning_rate": 0.0003209042267354021,
      "loss": 3.1496,
      "step": 110087
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4436519145965576,
      "learning_rate": 0.0003209001461140444,
      "loss": 2.8187,
      "step": 110088
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.408050060272217,
      "learning_rate": 0.000320896065488801,
      "loss": 2.9974,
      "step": 110089
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1933038234710693,
      "learning_rate": 0.00032089198485967267,
      "loss": 2.8921,
      "step": 110090
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.5458526611328125,
      "learning_rate": 0.00032088790422666013,
      "loss": 3.24,
      "step": 110091
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9875879287719727,
      "learning_rate": 0.0003208838235897641,
      "loss": 3.0565,
      "step": 110092
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7570466995239258,
      "learning_rate": 0.0003208797429489854,
      "loss": 2.981,
      "step": 110093
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.827970027923584,
      "learning_rate": 0.00032087566230432475,
      "loss": 3.0402,
      "step": 110094
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.746847629547119,
      "learning_rate": 0.00032087158165578286,
      "loss": 3.055,
      "step": 110095
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.410552978515625,
      "learning_rate": 0.00032086750100336067,
      "loss": 2.8937,
      "step": 110096
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.227100133895874,
      "learning_rate": 0.0003208634203470587,
      "loss": 2.8091,
      "step": 110097
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.166154146194458,
      "learning_rate": 0.0003208593396868779,
      "loss": 3.0681,
      "step": 110098
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0097036361694336,
      "learning_rate": 0.00032085525902281897,
      "loss": 2.8186,
      "step": 110099
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7609933614730835,
      "learning_rate": 0.0003208511783548826,
      "loss": 2.9843,
      "step": 110100
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9000155925750732,
      "learning_rate": 0.0003208470976830696,
      "loss": 3.138,
      "step": 110101
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.72111976146698,
      "learning_rate": 0.0003208430170073808,
      "loss": 3.1106,
      "step": 110102
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4546263217926025,
      "learning_rate": 0.0003208389363278168,
      "loss": 2.9648,
      "step": 110103
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.000742197036743,
      "learning_rate": 0.00032083485564437847,
      "loss": 3.0645,
      "step": 110104
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.6389713287353516,
      "learning_rate": 0.0003208307749570666,
      "loss": 2.9539,
      "step": 110105
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.522125720977783,
      "learning_rate": 0.00032082669426588184,
      "loss": 3.0913,
      "step": 110106
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3391313552856445,
      "learning_rate": 0.000320822613570825,
      "loss": 2.9042,
      "step": 110107
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2727413177490234,
      "learning_rate": 0.00032081853287189695,
      "loss": 2.9588,
      "step": 110108
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8746497631072998,
      "learning_rate": 0.0003208144521690982,
      "loss": 2.8568,
      "step": 110109
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.795867919921875,
      "learning_rate": 0.0003208103714624297,
      "loss": 2.8462,
      "step": 110110
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.21580171585083,
      "learning_rate": 0.0003208062907518922,
      "loss": 2.828,
      "step": 110111
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2875194549560547,
      "learning_rate": 0.00032080221003748645,
      "loss": 3.0511,
      "step": 110112
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6764068603515625,
      "learning_rate": 0.00032079812931921307,
      "loss": 3.0668,
      "step": 110113
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7556616067886353,
      "learning_rate": 0.000320794048597073,
      "loss": 3.0535,
      "step": 110114
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.276895046234131,
      "learning_rate": 0.0003207899678710669,
      "loss": 3.0997,
      "step": 110115
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.484065055847168,
      "learning_rate": 0.0003207858871411955,
      "loss": 3.251,
      "step": 110116
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6742171049118042,
      "learning_rate": 0.0003207818064074597,
      "loss": 2.8859,
      "step": 110117
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7529737949371338,
      "learning_rate": 0.00032077772566986015,
      "loss": 2.9581,
      "step": 110118
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7447631359100342,
      "learning_rate": 0.0003207736449283975,
      "loss": 2.9308,
      "step": 110119
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9196492433547974,
      "learning_rate": 0.0003207695641830728,
      "loss": 3.0053,
      "step": 110120
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8927843570709229,
      "learning_rate": 0.00032076548343388665,
      "loss": 2.5728,
      "step": 110121
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9501409530639648,
      "learning_rate": 0.0003207614026808397,
      "loss": 3.1523,
      "step": 110122
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9285691976547241,
      "learning_rate": 0.0003207573219239329,
      "loss": 2.9618,
      "step": 110123
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.8278448581695557,
      "learning_rate": 0.00032075324116316687,
      "loss": 3.0094,
      "step": 110124
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8442511558532715,
      "learning_rate": 0.0003207491603985424,
      "loss": 2.7628,
      "step": 110125
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3286020755767822,
      "learning_rate": 0.00032074507963006035,
      "loss": 2.7488,
      "step": 110126
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6200627088546753,
      "learning_rate": 0.0003207409988577213,
      "loss": 3.0453,
      "step": 110127
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0248732566833496,
      "learning_rate": 0.0003207369180815261,
      "loss": 3.1068,
      "step": 110128
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1563172340393066,
      "learning_rate": 0.00032073283730147566,
      "loss": 3.1512,
      "step": 110129
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9857704639434814,
      "learning_rate": 0.0003207287565175705,
      "loss": 2.8501,
      "step": 110130
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.300867795944214,
      "learning_rate": 0.0003207246757298114,
      "loss": 2.8602,
      "step": 110131
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.9505186080932617,
      "learning_rate": 0.00032072059493819927,
      "loss": 2.8525,
      "step": 110132
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2326409816741943,
      "learning_rate": 0.00032071651414273487,
      "loss": 3.0418,
      "step": 110133
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.030113458633423,
      "learning_rate": 0.0003207124333434188,
      "loss": 2.9128,
      "step": 110134
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9226288795471191,
      "learning_rate": 0.0003207083525402518,
      "loss": 3.0207,
      "step": 110135
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.755106806755066,
      "learning_rate": 0.00032070427173323487,
      "loss": 3.2297,
      "step": 110136
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9532544612884521,
      "learning_rate": 0.0003207001909223686,
      "loss": 3.2817,
      "step": 110137
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9952787160873413,
      "learning_rate": 0.0003206961101076537,
      "loss": 2.711,
      "step": 110138
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3883042335510254,
      "learning_rate": 0.00032069202928909114,
      "loss": 3.0454,
      "step": 110139
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.6522274017333984,
      "learning_rate": 0.0003206879484666814,
      "loss": 2.9804,
      "step": 110140
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.6734533309936523,
      "learning_rate": 0.00032068386764042543,
      "loss": 2.9361,
      "step": 110141
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9774514436721802,
      "learning_rate": 0.00032067978681032396,
      "loss": 3.0113,
      "step": 110142
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.826141834259033,
      "learning_rate": 0.0003206757059763777,
      "loss": 3.0177,
      "step": 110143
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.625063419342041,
      "learning_rate": 0.00032067162513858745,
      "loss": 3.1372,
      "step": 110144
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1481664180755615,
      "learning_rate": 0.000320667544296954,
      "loss": 3.0514,
      "step": 110145
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.210554361343384,
      "learning_rate": 0.000320663463451478,
      "loss": 2.9518,
      "step": 110146
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6379677057266235,
      "learning_rate": 0.00032065938260216023,
      "loss": 2.9609,
      "step": 110147
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3440330028533936,
      "learning_rate": 0.00032065530174900153,
      "loss": 2.9266,
      "step": 110148
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.894016146659851,
      "learning_rate": 0.00032065122089200274,
      "loss": 3.1247,
      "step": 110149
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1240298748016357,
      "learning_rate": 0.00032064714003116437,
      "loss": 2.9749,
      "step": 110150
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.148045778274536,
      "learning_rate": 0.0003206430591664873,
      "loss": 2.9742,
      "step": 110151
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.612938642501831,
      "learning_rate": 0.0003206389782979723,
      "loss": 3.0261,
      "step": 110152
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2727620601654053,
      "learning_rate": 0.00032063489742562023,
      "loss": 3.0426,
      "step": 110153
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.304183006286621,
      "learning_rate": 0.00032063081654943156,
      "loss": 3.0618,
      "step": 110154
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.708235740661621,
      "learning_rate": 0.00032062673566940735,
      "loss": 3.1289,
      "step": 110155
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0840907096862793,
      "learning_rate": 0.00032062265478554827,
      "loss": 2.9008,
      "step": 110156
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4565317630767822,
      "learning_rate": 0.00032061857389785496,
      "loss": 2.9885,
      "step": 110157
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0627410411834717,
      "learning_rate": 0.00032061449300632835,
      "loss": 2.9061,
      "step": 110158
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6090526580810547,
      "learning_rate": 0.00032061041211096914,
      "loss": 2.9526,
      "step": 110159
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7672369480133057,
      "learning_rate": 0.0003206063312117779,
      "loss": 3.1633,
      "step": 110160
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9635369777679443,
      "learning_rate": 0.00032060225030875573,
      "loss": 3.0779,
      "step": 110161
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7365643978118896,
      "learning_rate": 0.00032059816940190305,
      "loss": 3.0238,
      "step": 110162
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6503442525863647,
      "learning_rate": 0.00032059408849122097,
      "loss": 2.9735,
      "step": 110163
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.788648009300232,
      "learning_rate": 0.0003205900075767099,
      "loss": 2.9394,
      "step": 110164
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0522425174713135,
      "learning_rate": 0.0003205859266583708,
      "loss": 3.2689,
      "step": 110165
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8553634881973267,
      "learning_rate": 0.0003205818457362045,
      "loss": 2.8147,
      "step": 110166
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.3245689868927,
      "learning_rate": 0.00032057776481021153,
      "loss": 3.0536,
      "step": 110167
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.72742760181427,
      "learning_rate": 0.0003205736838803928,
      "loss": 3.0574,
      "step": 110168
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0462820529937744,
      "learning_rate": 0.00032056960294674905,
      "loss": 2.8763,
      "step": 110169
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7905930280685425,
      "learning_rate": 0.00032056552200928094,
      "loss": 3.0723,
      "step": 110170
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.148906707763672,
      "learning_rate": 0.0003205614410679894,
      "loss": 2.9671,
      "step": 110171
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.5866950750350952,
      "learning_rate": 0.00032055736012287506,
      "loss": 2.9455,
      "step": 110172
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.754144310951233,
      "learning_rate": 0.00032055327917393875,
      "loss": 2.8364,
      "step": 110173
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.253493547439575,
      "learning_rate": 0.00032054919822118116,
      "loss": 2.7908,
      "step": 110174
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6919387578964233,
      "learning_rate": 0.0003205451172646031,
      "loss": 3.0971,
      "step": 110175
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8055411577224731,
      "learning_rate": 0.00032054103630420534,
      "loss": 3.1653,
      "step": 110176
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8192343711853027,
      "learning_rate": 0.00032053695533998855,
      "loss": 3.2972,
      "step": 110177
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.9991490840911865,
      "learning_rate": 0.00032053287437195364,
      "loss": 2.9866,
      "step": 110178
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7609782218933105,
      "learning_rate": 0.00032052879340010125,
      "loss": 3.0426,
      "step": 110179
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.6839686632156372,
      "learning_rate": 0.0003205247124244321,
      "loss": 3.1555,
      "step": 110180
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.2703564167022705,
      "learning_rate": 0.0003205206314449471,
      "loss": 3.0159,
      "step": 110181
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8803775310516357,
      "learning_rate": 0.0003205165504616469,
      "loss": 2.8046,
      "step": 110182
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.5413106679916382,
      "learning_rate": 0.00032051246947453223,
      "loss": 3.1953,
      "step": 110183
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1820101737976074,
      "learning_rate": 0.00032050838848360407,
      "loss": 3.1032,
      "step": 110184
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.307319402694702,
      "learning_rate": 0.00032050430748886287,
      "loss": 2.7646,
      "step": 110185
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0221145153045654,
      "learning_rate": 0.00032050022649030956,
      "loss": 3.264,
      "step": 110186
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8475871086120605,
      "learning_rate": 0.0003204961454879449,
      "loss": 3.1206,
      "step": 110187
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.7760807275772095,
      "learning_rate": 0.0003204920644817696,
      "loss": 2.6135,
      "step": 110188
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4056808948516846,
      "learning_rate": 0.00032048798347178437,
      "loss": 3.0685,
      "step": 110189
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4373602867126465,
      "learning_rate": 0.00032048390245799016,
      "loss": 3.152,
      "step": 110190
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.1543076038360596,
      "learning_rate": 0.00032047982144038753,
      "loss": 2.9884,
      "step": 110191
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.6351592540740967,
      "learning_rate": 0.00032047574041897735,
      "loss": 3.0257,
      "step": 110192
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4016807079315186,
      "learning_rate": 0.0003204716593937603,
      "loss": 2.8593,
      "step": 110193
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.4601891040802,
      "learning_rate": 0.0003204675783647372,
      "loss": 3.0346,
      "step": 110194
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.975937843322754,
      "learning_rate": 0.0003204634973319088,
      "loss": 2.7942,
      "step": 110195
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.382277011871338,
      "learning_rate": 0.0003204594162952759,
      "loss": 3.0211,
      "step": 110196
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.155776262283325,
      "learning_rate": 0.0003204553352548391,
      "loss": 2.6257,
      "step": 110197
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.796990156173706,
      "learning_rate": 0.0003204512542105994,
      "loss": 3.0102,
      "step": 110198
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.9798548221588135,
      "learning_rate": 0.0003204471731625573,
      "loss": 3.0457,
      "step": 110199
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.322330951690674,
      "learning_rate": 0.0003204430921107137,
      "loss": 2.8357,
      "step": 110200
    },
    {
      "epoch": 1.43,
      "grad_norm": 3.4058096408843994,
      "learning_rate": 0.00032043901105506935,
      "loss": 2.9056,
      "step": 110201
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.0042688846588135,
      "learning_rate": 0.0003204349299956251,
      "loss": 3.1135,
      "step": 110202
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.027461051940918,
      "learning_rate": 0.00032043084893238155,
      "loss": 2.9143,
      "step": 110203
    },
    {
      "epoch": 1.43,
      "grad_norm": 4.042590618133545,
      "learning_rate": 0.00032042676786533947,
      "loss": 2.8654,
      "step": 110204
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.9387855529785156,
      "learning_rate": 0.00032042268679449975,
      "loss": 3.0348,
      "step": 110205
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8474630117416382,
      "learning_rate": 0.00032041860571986304,
      "loss": 2.9463,
      "step": 110206
    },
    {
      "epoch": 1.43,
      "grad_norm": 1.8662614822387695,
      "learning_rate": 0.0003204145246414301,
      "loss": 2.9254,
      "step": 110207
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.8238189220428467,
      "learning_rate": 0.0003204104435592017,
      "loss": 2.9352,
      "step": 110208
    },
    {
      "epoch": 1.43,
      "grad_norm": 2.199674129486084,
      "learning_rate": 0.0003204063624731787,
      "loss": 2.8216,
      "step": 110209
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.957383632659912,
      "learning_rate": 0.00032040228138336167,
      "loss": 3.022,
      "step": 110210
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6494179964065552,
      "learning_rate": 0.0003203982002897515,
      "loss": 2.9645,
      "step": 110211
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8885796070098877,
      "learning_rate": 0.00032039411919234897,
      "loss": 3.0557,
      "step": 110212
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1318159103393555,
      "learning_rate": 0.0003203900380911547,
      "loss": 3.0572,
      "step": 110213
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.119046211242676,
      "learning_rate": 0.0003203859569861696,
      "loss": 2.9261,
      "step": 110214
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.945446491241455,
      "learning_rate": 0.00032038187587739444,
      "loss": 2.881,
      "step": 110215
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2054011821746826,
      "learning_rate": 0.00032037779476482974,
      "loss": 2.9858,
      "step": 110216
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.528221368789673,
      "learning_rate": 0.00032037371364847646,
      "loss": 2.9728,
      "step": 110217
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8555731773376465,
      "learning_rate": 0.0003203696325283354,
      "loss": 3.0103,
      "step": 110218
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9279967546463013,
      "learning_rate": 0.00032036555140440724,
      "loss": 2.9003,
      "step": 110219
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.242482900619507,
      "learning_rate": 0.00032036147027669266,
      "loss": 2.9739,
      "step": 110220
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6329401731491089,
      "learning_rate": 0.0003203573891451926,
      "loss": 2.9299,
      "step": 110221
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7850837707519531,
      "learning_rate": 0.0003203533080099076,
      "loss": 3.0902,
      "step": 110222
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.86173677444458,
      "learning_rate": 0.00032034922687083857,
      "loss": 2.8404,
      "step": 110223
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.008894205093384,
      "learning_rate": 0.0003203451457279863,
      "loss": 2.8459,
      "step": 110224
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.781057357788086,
      "learning_rate": 0.00032034106458135147,
      "loss": 2.9995,
      "step": 110225
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2996039390563965,
      "learning_rate": 0.0003203369834309348,
      "loss": 2.9374,
      "step": 110226
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.833030343055725,
      "learning_rate": 0.00032033290227673715,
      "loss": 2.8919,
      "step": 110227
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.156703233718872,
      "learning_rate": 0.0003203288211187592,
      "loss": 2.7432,
      "step": 110228
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.558145761489868,
      "learning_rate": 0.0003203247399570017,
      "loss": 3.0507,
      "step": 110229
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.147502899169922,
      "learning_rate": 0.0003203206587914655,
      "loss": 2.7859,
      "step": 110230
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.438520669937134,
      "learning_rate": 0.00032031657762215135,
      "loss": 3.1621,
      "step": 110231
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1594560146331787,
      "learning_rate": 0.00032031249644905987,
      "loss": 2.8565,
      "step": 110232
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0225489139556885,
      "learning_rate": 0.000320308415272192,
      "loss": 3.045,
      "step": 110233
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6285356283187866,
      "learning_rate": 0.0003203043340915483,
      "loss": 2.8814,
      "step": 110234
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6904890537261963,
      "learning_rate": 0.0003203002529071297,
      "loss": 3.0839,
      "step": 110235
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9624488353729248,
      "learning_rate": 0.0003202961717189369,
      "loss": 3.1556,
      "step": 110236
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0932602882385254,
      "learning_rate": 0.00032029209052697076,
      "loss": 3.1103,
      "step": 110237
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.783440113067627,
      "learning_rate": 0.00032028800933123176,
      "loss": 3.1405,
      "step": 110238
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.862476110458374,
      "learning_rate": 0.0003202839281317209,
      "loss": 3.0245,
      "step": 110239
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3022618293762207,
      "learning_rate": 0.0003202798469284389,
      "loss": 3.1048,
      "step": 110240
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.965057373046875,
      "learning_rate": 0.00032027576572138654,
      "loss": 2.9358,
      "step": 110241
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.7997372150421143,
      "learning_rate": 0.00032027168451056445,
      "loss": 2.7535,
      "step": 110242
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9865772724151611,
      "learning_rate": 0.0003202676032959735,
      "loss": 2.8152,
      "step": 110243
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.756762146949768,
      "learning_rate": 0.0003202635220776144,
      "loss": 3.0707,
      "step": 110244
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.489113450050354,
      "learning_rate": 0.000320259440855488,
      "loss": 3.2441,
      "step": 110245
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.8843576908111572,
      "learning_rate": 0.00032025535962959485,
      "loss": 2.9418,
      "step": 110246
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8300158977508545,
      "learning_rate": 0.000320251278399936,
      "loss": 3.0187,
      "step": 110247
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.937503695487976,
      "learning_rate": 0.00032024719716651197,
      "loss": 2.8182,
      "step": 110248
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.308664083480835,
      "learning_rate": 0.00032024311592932365,
      "loss": 3.0806,
      "step": 110249
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7153421640396118,
      "learning_rate": 0.0003202390346883717,
      "loss": 2.6202,
      "step": 110250
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.098618507385254,
      "learning_rate": 0.000320234953443657,
      "loss": 3.0148,
      "step": 110251
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7409132719039917,
      "learning_rate": 0.00032023087219518014,
      "loss": 2.9296,
      "step": 110252
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.452303886413574,
      "learning_rate": 0.00032022679094294206,
      "loss": 3.0536,
      "step": 110253
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.218502998352051,
      "learning_rate": 0.00032022270968694343,
      "loss": 3.1379,
      "step": 110254
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8039286136627197,
      "learning_rate": 0.00032021862842718495,
      "loss": 3.1398,
      "step": 110255
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1816091537475586,
      "learning_rate": 0.0003202145471636675,
      "loss": 2.6943,
      "step": 110256
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.5971741676330566,
      "learning_rate": 0.0003202104658963918,
      "loss": 2.9728,
      "step": 110257
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.695481538772583,
      "learning_rate": 0.0003202063846253586,
      "loss": 3.1705,
      "step": 110258
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.890625,
      "learning_rate": 0.0003202023033505686,
      "loss": 3.0761,
      "step": 110259
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7353919744491577,
      "learning_rate": 0.00032019822207202266,
      "loss": 3.1684,
      "step": 110260
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7675749063491821,
      "learning_rate": 0.00032019414078972145,
      "loss": 2.9311,
      "step": 110261
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.724199891090393,
      "learning_rate": 0.00032019005950366576,
      "loss": 3.087,
      "step": 110262
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1766133308410645,
      "learning_rate": 0.00032018597821385645,
      "loss": 2.9948,
      "step": 110263
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7482389211654663,
      "learning_rate": 0.00032018189692029406,
      "loss": 3.0115,
      "step": 110264
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2522568702697754,
      "learning_rate": 0.0003201778156229796,
      "loss": 3.2694,
      "step": 110265
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9821743965148926,
      "learning_rate": 0.00032017373432191354,
      "loss": 2.8834,
      "step": 110266
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6035282611846924,
      "learning_rate": 0.0003201696530170969,
      "loss": 3.0258,
      "step": 110267
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.009093761444092,
      "learning_rate": 0.00032016557170853037,
      "loss": 3.2153,
      "step": 110268
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.688114047050476,
      "learning_rate": 0.0003201614903962146,
      "loss": 3.1039,
      "step": 110269
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.841515064239502,
      "learning_rate": 0.00032015740908015056,
      "loss": 2.9991,
      "step": 110270
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.024853229522705,
      "learning_rate": 0.0003201533277603387,
      "loss": 2.8735,
      "step": 110271
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1262106895446777,
      "learning_rate": 0.00032014924643678004,
      "loss": 2.9648,
      "step": 110272
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7304483652114868,
      "learning_rate": 0.00032014516510947533,
      "loss": 3.0153,
      "step": 110273
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.876267433166504,
      "learning_rate": 0.00032014108377842514,
      "loss": 3.1877,
      "step": 110274
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.757015585899353,
      "learning_rate": 0.0003201370024436304,
      "loss": 2.99,
      "step": 110275
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.5664660930633545,
      "learning_rate": 0.0003201329211050918,
      "loss": 2.7365,
      "step": 110276
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.5486215353012085,
      "learning_rate": 0.0003201288397628101,
      "loss": 3.0816,
      "step": 110277
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7140792608261108,
      "learning_rate": 0.0003201247584167861,
      "loss": 2.9891,
      "step": 110278
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.818634271621704,
      "learning_rate": 0.0003201206770670205,
      "loss": 3.0466,
      "step": 110279
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.130418062210083,
      "learning_rate": 0.0003201165957135141,
      "loss": 2.8045,
      "step": 110280
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8625200986862183,
      "learning_rate": 0.0003201125143562676,
      "loss": 3.1537,
      "step": 110281
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7638188600540161,
      "learning_rate": 0.00032010843299528193,
      "loss": 3.0965,
      "step": 110282
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8417760133743286,
      "learning_rate": 0.0003201043516305576,
      "loss": 3.0203,
      "step": 110283
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9000967741012573,
      "learning_rate": 0.00032010027026209545,
      "loss": 3.1328,
      "step": 110284
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7040822505950928,
      "learning_rate": 0.00032009618888989637,
      "loss": 3.1662,
      "step": 110285
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6867867708206177,
      "learning_rate": 0.000320092107513961,
      "loss": 2.9051,
      "step": 110286
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.5460519790649414,
      "learning_rate": 0.00032008802613429016,
      "loss": 3.0126,
      "step": 110287
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.445547103881836,
      "learning_rate": 0.00032008394475088465,
      "loss": 2.986,
      "step": 110288
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.520847797393799,
      "learning_rate": 0.000320079863363745,
      "loss": 3.0213,
      "step": 110289
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.616540551185608,
      "learning_rate": 0.00032007578197287216,
      "loss": 3.0839,
      "step": 110290
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.8553378582000732,
      "learning_rate": 0.00032007170057826695,
      "loss": 2.9884,
      "step": 110291
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8685590028762817,
      "learning_rate": 0.00032006761917992993,
      "loss": 3.1148,
      "step": 110292
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8727933168411255,
      "learning_rate": 0.00032006353777786194,
      "loss": 3.0209,
      "step": 110293
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.4346866607666016,
      "learning_rate": 0.00032005945637206387,
      "loss": 3.0471,
      "step": 110294
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8585461378097534,
      "learning_rate": 0.0003200553749625363,
      "loss": 3.021,
      "step": 110295
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0312752723693848,
      "learning_rate": 0.00032005129354928006,
      "loss": 2.9453,
      "step": 110296
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8856149911880493,
      "learning_rate": 0.00032004721213229595,
      "loss": 3.2264,
      "step": 110297
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9003074169158936,
      "learning_rate": 0.0003200431307115846,
      "loss": 3.0357,
      "step": 110298
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.007554292678833,
      "learning_rate": 0.00032003904928714694,
      "loss": 3.2328,
      "step": 110299
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1363556385040283,
      "learning_rate": 0.0003200349678589836,
      "loss": 2.9825,
      "step": 110300
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8681676387786865,
      "learning_rate": 0.0003200308864270954,
      "loss": 2.9168,
      "step": 110301
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7889623641967773,
      "learning_rate": 0.00032002680499148303,
      "loss": 2.8893,
      "step": 110302
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0874264240264893,
      "learning_rate": 0.0003200227235521474,
      "loss": 2.7701,
      "step": 110303
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.808907389640808,
      "learning_rate": 0.00032001864210908903,
      "loss": 2.9896,
      "step": 110304
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0674331188201904,
      "learning_rate": 0.0003200145606623089,
      "loss": 2.9532,
      "step": 110305
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9712375402450562,
      "learning_rate": 0.00032001047921180774,
      "loss": 2.9786,
      "step": 110306
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.069465160369873,
      "learning_rate": 0.00032000639775758616,
      "loss": 3.0071,
      "step": 110307
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.0893805027008057,
      "learning_rate": 0.00032000231629964506,
      "loss": 2.955,
      "step": 110308
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.5806798934936523,
      "learning_rate": 0.00031999823483798515,
      "loss": 2.6386,
      "step": 110309
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9736980199813843,
      "learning_rate": 0.00031999415337260714,
      "loss": 2.9108,
      "step": 110310
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8321585655212402,
      "learning_rate": 0.0003199900719035118,
      "loss": 2.8307,
      "step": 110311
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.813018560409546,
      "learning_rate": 0.00031998599043070013,
      "loss": 3.1758,
      "step": 110312
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.615211009979248,
      "learning_rate": 0.0003199819089541725,
      "loss": 3.0581,
      "step": 110313
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6681164503097534,
      "learning_rate": 0.0003199778274739299,
      "loss": 3.2029,
      "step": 110314
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.772011399269104,
      "learning_rate": 0.0003199737459899731,
      "loss": 2.9675,
      "step": 110315
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8478634357452393,
      "learning_rate": 0.00031996966450230276,
      "loss": 3.1053,
      "step": 110316
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6515376567840576,
      "learning_rate": 0.00031996558301091967,
      "loss": 3.0178,
      "step": 110317
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6359319686889648,
      "learning_rate": 0.00031996150151582465,
      "loss": 2.9789,
      "step": 110318
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8748377561569214,
      "learning_rate": 0.0003199574200170184,
      "loss": 2.9001,
      "step": 110319
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.106743335723877,
      "learning_rate": 0.00031995333851450163,
      "loss": 2.977,
      "step": 110320
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0383036136627197,
      "learning_rate": 0.00031994925700827516,
      "loss": 2.8475,
      "step": 110321
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.343125104904175,
      "learning_rate": 0.00031994517549833985,
      "loss": 3.0345,
      "step": 110322
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6612573862075806,
      "learning_rate": 0.0003199410939846963,
      "loss": 3.0303,
      "step": 110323
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9717248678207397,
      "learning_rate": 0.00031993701246734525,
      "loss": 3.0855,
      "step": 110324
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2640671730041504,
      "learning_rate": 0.00031993293094628765,
      "loss": 3.0096,
      "step": 110325
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.100809097290039,
      "learning_rate": 0.0003199288494215241,
      "loss": 3.1296,
      "step": 110326
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7512048482894897,
      "learning_rate": 0.00031992476789305537,
      "loss": 3.2104,
      "step": 110327
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.893035650253296,
      "learning_rate": 0.00031992068636088215,
      "loss": 3.0946,
      "step": 110328
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9757331609725952,
      "learning_rate": 0.0003199166048250055,
      "loss": 2.9491,
      "step": 110329
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8707923889160156,
      "learning_rate": 0.00031991252328542584,
      "loss": 3.3328,
      "step": 110330
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.4124183654785156,
      "learning_rate": 0.00031990844174214415,
      "loss": 3.1322,
      "step": 110331
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0367531776428223,
      "learning_rate": 0.0003199043601951611,
      "loss": 3.2669,
      "step": 110332
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1255228519439697,
      "learning_rate": 0.0003199002786444774,
      "loss": 3.2961,
      "step": 110333
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.4331841468811035,
      "learning_rate": 0.0003198961970900939,
      "loss": 2.9105,
      "step": 110334
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7090861797332764,
      "learning_rate": 0.00031989211553201126,
      "loss": 3.0603,
      "step": 110335
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.7694292068481445,
      "learning_rate": 0.0003198880339702304,
      "loss": 2.6196,
      "step": 110336
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2022156715393066,
      "learning_rate": 0.00031988395240475193,
      "loss": 3.1358,
      "step": 110337
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7788574695587158,
      "learning_rate": 0.00031987987083557663,
      "loss": 3.284,
      "step": 110338
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.866555094718933,
      "learning_rate": 0.0003198757892627053,
      "loss": 3.0564,
      "step": 110339
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9903827905654907,
      "learning_rate": 0.00031987170768613875,
      "loss": 2.7862,
      "step": 110340
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.109196186065674,
      "learning_rate": 0.0003198676261058776,
      "loss": 3.2427,
      "step": 110341
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6463054418563843,
      "learning_rate": 0.0003198635445219227,
      "loss": 3.2765,
      "step": 110342
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0163638591766357,
      "learning_rate": 0.0003198594629342748,
      "loss": 2.8477,
      "step": 110343
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0151307582855225,
      "learning_rate": 0.0003198553813429346,
      "loss": 3.0918,
      "step": 110344
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8450487852096558,
      "learning_rate": 0.00031985129974790294,
      "loss": 3.0479,
      "step": 110345
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6538491249084473,
      "learning_rate": 0.0003198472181491807,
      "loss": 2.8708,
      "step": 110346
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8383357524871826,
      "learning_rate": 0.0003198431365467683,
      "loss": 2.8996,
      "step": 110347
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9577184915542603,
      "learning_rate": 0.00031983905494066675,
      "loss": 2.9635,
      "step": 110348
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7894564867019653,
      "learning_rate": 0.0003198349733308768,
      "loss": 2.8248,
      "step": 110349
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3272173404693604,
      "learning_rate": 0.00031983089171739904,
      "loss": 3.174,
      "step": 110350
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0043256282806396,
      "learning_rate": 0.0003198268101002344,
      "loss": 2.9869,
      "step": 110351
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.751481294631958,
      "learning_rate": 0.0003198227284793836,
      "loss": 2.7994,
      "step": 110352
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.5745309591293335,
      "learning_rate": 0.00031981864685484733,
      "loss": 3.153,
      "step": 110353
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.42287540435791,
      "learning_rate": 0.0003198145652266264,
      "loss": 3.0917,
      "step": 110354
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7895143032073975,
      "learning_rate": 0.00031981048359472163,
      "loss": 3.2046,
      "step": 110355
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9766945838928223,
      "learning_rate": 0.0003198064019591337,
      "loss": 3.1051,
      "step": 110356
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.5859062671661377,
      "learning_rate": 0.0003198023203198633,
      "loss": 2.9645,
      "step": 110357
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7749258279800415,
      "learning_rate": 0.0003197982386769114,
      "loss": 3.2466,
      "step": 110358
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0336554050445557,
      "learning_rate": 0.0003197941570302785,
      "loss": 2.9327,
      "step": 110359
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0907464027404785,
      "learning_rate": 0.0003197900753799656,
      "loss": 3.0026,
      "step": 110360
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.213839530944824,
      "learning_rate": 0.0003197859937259734,
      "loss": 2.9929,
      "step": 110361
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9532735347747803,
      "learning_rate": 0.0003197819120683025,
      "loss": 3.0165,
      "step": 110362
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.05403208732605,
      "learning_rate": 0.0003197778304069538,
      "loss": 2.9686,
      "step": 110363
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6817985773086548,
      "learning_rate": 0.0003197737487419281,
      "loss": 3.0682,
      "step": 110364
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8493173122406006,
      "learning_rate": 0.0003197696670732259,
      "loss": 2.9591,
      "step": 110365
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8848952054977417,
      "learning_rate": 0.0003197655854008483,
      "loss": 3.0662,
      "step": 110366
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8042664527893066,
      "learning_rate": 0.0003197615037247959,
      "loss": 2.921,
      "step": 110367
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.897792935371399,
      "learning_rate": 0.0003197574220450694,
      "loss": 2.9831,
      "step": 110368
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.827283501625061,
      "learning_rate": 0.0003197533403616696,
      "loss": 3.0281,
      "step": 110369
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8214805126190186,
      "learning_rate": 0.0003197492586745974,
      "loss": 3.1375,
      "step": 110370
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.612746238708496,
      "learning_rate": 0.0003197451769838533,
      "loss": 2.996,
      "step": 110371
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3694303035736084,
      "learning_rate": 0.0003197410952894383,
      "loss": 2.9286,
      "step": 110372
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.70339035987854,
      "learning_rate": 0.0003197370135913531,
      "loss": 2.8312,
      "step": 110373
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9785246849060059,
      "learning_rate": 0.0003197329318895983,
      "loss": 3.0164,
      "step": 110374
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7209069728851318,
      "learning_rate": 0.00031972885018417477,
      "loss": 2.9908,
      "step": 110375
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9352480173110962,
      "learning_rate": 0.0003197247684750834,
      "loss": 2.9284,
      "step": 110376
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.682436466217041,
      "learning_rate": 0.00031972068676232475,
      "loss": 2.8262,
      "step": 110377
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.747100591659546,
      "learning_rate": 0.0003197166050458996,
      "loss": 3.0975,
      "step": 110378
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3377420902252197,
      "learning_rate": 0.0003197125233258089,
      "loss": 2.8095,
      "step": 110379
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8603625297546387,
      "learning_rate": 0.0003197084416020531,
      "loss": 3.0133,
      "step": 110380
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.29211688041687,
      "learning_rate": 0.00031970435987463327,
      "loss": 2.9105,
      "step": 110381
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.471364974975586,
      "learning_rate": 0.00031970027814355,
      "loss": 3.0165,
      "step": 110382
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.5733652114868164,
      "learning_rate": 0.000319696196408804,
      "loss": 2.9855,
      "step": 110383
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.34464693069458,
      "learning_rate": 0.0003196921146703961,
      "loss": 2.8191,
      "step": 110384
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8559706211090088,
      "learning_rate": 0.0003196880329283272,
      "loss": 2.8701,
      "step": 110385
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.699538469314575,
      "learning_rate": 0.00031968395118259777,
      "loss": 2.8342,
      "step": 110386
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.926526665687561,
      "learning_rate": 0.0003196798694332088,
      "loss": 2.9884,
      "step": 110387
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8537626266479492,
      "learning_rate": 0.000319675787680161,
      "loss": 3.1229,
      "step": 110388
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7754710912704468,
      "learning_rate": 0.00031967170592345505,
      "loss": 2.7988,
      "step": 110389
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0907270908355713,
      "learning_rate": 0.00031966762416309175,
      "loss": 2.8232,
      "step": 110390
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9960960149765015,
      "learning_rate": 0.00031966354239907187,
      "loss": 2.8758,
      "step": 110391
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2925448417663574,
      "learning_rate": 0.0003196594606313962,
      "loss": 3.1363,
      "step": 110392
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.769305944442749,
      "learning_rate": 0.0003196553788600654,
      "loss": 2.9149,
      "step": 110393
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.4905412197113037,
      "learning_rate": 0.0003196512970850804,
      "loss": 3.2415,
      "step": 110394
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.06152606010437,
      "learning_rate": 0.0003196472153064418,
      "loss": 2.6708,
      "step": 110395
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0757479667663574,
      "learning_rate": 0.0003196431335241504,
      "loss": 2.8283,
      "step": 110396
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9819822311401367,
      "learning_rate": 0.00031963905173820696,
      "loss": 2.9025,
      "step": 110397
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.2068614959716797,
      "learning_rate": 0.00031963496994861227,
      "loss": 3.2176,
      "step": 110398
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.42867112159729,
      "learning_rate": 0.00031963088815536707,
      "loss": 2.8432,
      "step": 110399
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2615208625793457,
      "learning_rate": 0.00031962680635847207,
      "loss": 2.8696,
      "step": 110400
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.9465088844299316,
      "learning_rate": 0.0003196227245579281,
      "loss": 3.0384,
      "step": 110401
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.134787082672119,
      "learning_rate": 0.00031961864275373595,
      "loss": 3.0862,
      "step": 110402
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.6533071994781494,
      "learning_rate": 0.0003196145609458963,
      "loss": 2.984,
      "step": 110403
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0978195667266846,
      "learning_rate": 0.00031961047913440995,
      "loss": 2.7805,
      "step": 110404
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8978890180587769,
      "learning_rate": 0.0003196063973192776,
      "loss": 3.0614,
      "step": 110405
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.662846565246582,
      "learning_rate": 0.0003196023155005,
      "loss": 3.0402,
      "step": 110406
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.990239143371582,
      "learning_rate": 0.00031959823367807807,
      "loss": 3.0713,
      "step": 110407
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8915446996688843,
      "learning_rate": 0.0003195941518520124,
      "loss": 2.9698,
      "step": 110408
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7412285804748535,
      "learning_rate": 0.00031959007002230384,
      "loss": 3.1242,
      "step": 110409
    },
    {
      "epoch": 1.44,
      "grad_norm": 5.211151123046875,
      "learning_rate": 0.00031958598818895313,
      "loss": 2.9629,
      "step": 110410
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.7010130882263184,
      "learning_rate": 0.0003195819063519609,
      "loss": 2.9344,
      "step": 110411
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7351230382919312,
      "learning_rate": 0.0003195778245113281,
      "loss": 3.053,
      "step": 110412
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2105534076690674,
      "learning_rate": 0.0003195737426670555,
      "loss": 3.0013,
      "step": 110413
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3051016330718994,
      "learning_rate": 0.0003195696608191436,
      "loss": 2.9527,
      "step": 110414
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.06290340423584,
      "learning_rate": 0.00031956557896759344,
      "loss": 3.0001,
      "step": 110415
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.08093523979187,
      "learning_rate": 0.0003195614971124056,
      "loss": 3.0054,
      "step": 110416
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.642050862312317,
      "learning_rate": 0.0003195574152535811,
      "loss": 2.9228,
      "step": 110417
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.373142719268799,
      "learning_rate": 0.0003195533333911203,
      "loss": 3.0284,
      "step": 110418
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8805485963821411,
      "learning_rate": 0.00031954925152502414,
      "loss": 2.9467,
      "step": 110419
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6866480112075806,
      "learning_rate": 0.0003195451696552935,
      "loss": 3.1616,
      "step": 110420
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.193331003189087,
      "learning_rate": 0.0003195410877819291,
      "loss": 2.9383,
      "step": 110421
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9404046535491943,
      "learning_rate": 0.00031953700590493155,
      "loss": 2.9821,
      "step": 110422
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.133084297180176,
      "learning_rate": 0.0003195329240243017,
      "loss": 2.8997,
      "step": 110423
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7028106451034546,
      "learning_rate": 0.0003195288421400404,
      "loss": 2.8854,
      "step": 110424
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9125174283981323,
      "learning_rate": 0.0003195247602521482,
      "loss": 3.0731,
      "step": 110425
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7537270784378052,
      "learning_rate": 0.0003195206783606261,
      "loss": 3.0214,
      "step": 110426
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1269373893737793,
      "learning_rate": 0.0003195165964654746,
      "loss": 2.9507,
      "step": 110427
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7386107444763184,
      "learning_rate": 0.00031951251456669474,
      "loss": 2.8903,
      "step": 110428
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.126645088195801,
      "learning_rate": 0.0003195084326642871,
      "loss": 3.1813,
      "step": 110429
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.792831301689148,
      "learning_rate": 0.0003195043507582524,
      "loss": 3.1021,
      "step": 110430
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0159544944763184,
      "learning_rate": 0.00031950026884859157,
      "loss": 3.0557,
      "step": 110431
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.863152027130127,
      "learning_rate": 0.0003194961869353052,
      "loss": 3.0495,
      "step": 110432
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.819512128829956,
      "learning_rate": 0.0003194921050183941,
      "loss": 2.9467,
      "step": 110433
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7690362930297852,
      "learning_rate": 0.0003194880230978591,
      "loss": 3.2103,
      "step": 110434
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0448548793792725,
      "learning_rate": 0.0003194839411737009,
      "loss": 3.0429,
      "step": 110435
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1845035552978516,
      "learning_rate": 0.00031947985924592025,
      "loss": 3.0125,
      "step": 110436
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.687127709388733,
      "learning_rate": 0.00031947577731451804,
      "loss": 3.0904,
      "step": 110437
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9328943490982056,
      "learning_rate": 0.00031947169537949475,
      "loss": 2.8979,
      "step": 110438
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.669197916984558,
      "learning_rate": 0.0003194676134408513,
      "loss": 2.9172,
      "step": 110439
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8555190563201904,
      "learning_rate": 0.0003194635314985886,
      "loss": 2.9195,
      "step": 110440
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.450589656829834,
      "learning_rate": 0.0003194594495527072,
      "loss": 3.1643,
      "step": 110441
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.978170394897461,
      "learning_rate": 0.00031945536760320787,
      "loss": 3.093,
      "step": 110442
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.742438793182373,
      "learning_rate": 0.00031945128565009147,
      "loss": 2.9469,
      "step": 110443
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.934522032737732,
      "learning_rate": 0.0003194472036933587,
      "loss": 2.887,
      "step": 110444
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.4451444149017334,
      "learning_rate": 0.0003194431217330103,
      "loss": 3.2866,
      "step": 110445
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7781552076339722,
      "learning_rate": 0.00031943903976904713,
      "loss": 3.0452,
      "step": 110446
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9528090953826904,
      "learning_rate": 0.0003194349578014698,
      "loss": 3.1986,
      "step": 110447
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.312610387802124,
      "learning_rate": 0.0003194308758302792,
      "loss": 3.1712,
      "step": 110448
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.33278226852417,
      "learning_rate": 0.000319426793855476,
      "loss": 3.1154,
      "step": 110449
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.228195905685425,
      "learning_rate": 0.000319422711877061,
      "loss": 2.8367,
      "step": 110450
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9929522275924683,
      "learning_rate": 0.00031941862989503497,
      "loss": 2.9697,
      "step": 110451
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3854191303253174,
      "learning_rate": 0.00031941454790939864,
      "loss": 3.0132,
      "step": 110452
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.8588039875030518,
      "learning_rate": 0.0003194104659201528,
      "loss": 2.8314,
      "step": 110453
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8029818534851074,
      "learning_rate": 0.0003194063839272981,
      "loss": 2.9705,
      "step": 110454
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8525269031524658,
      "learning_rate": 0.00031940230193083547,
      "loss": 2.7252,
      "step": 110455
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.6243021488189697,
      "learning_rate": 0.00031939821993076553,
      "loss": 3.017,
      "step": 110456
    },
    {
      "epoch": 1.44,
      "grad_norm": 4.1456618309021,
      "learning_rate": 0.00031939413792708915,
      "loss": 3.0244,
      "step": 110457
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1071643829345703,
      "learning_rate": 0.000319390055919807,
      "loss": 3.0493,
      "step": 110458
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.270825147628784,
      "learning_rate": 0.00031938597390891987,
      "loss": 3.0548,
      "step": 110459
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.295970916748047,
      "learning_rate": 0.00031938189189442847,
      "loss": 3.1564,
      "step": 110460
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0616402626037598,
      "learning_rate": 0.0003193778098763337,
      "loss": 3.037,
      "step": 110461
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.195906162261963,
      "learning_rate": 0.00031937372785463623,
      "loss": 3.2227,
      "step": 110462
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2077207565307617,
      "learning_rate": 0.0003193696458293368,
      "loss": 2.917,
      "step": 110463
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.502373695373535,
      "learning_rate": 0.00031936556380043614,
      "loss": 2.7601,
      "step": 110464
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7248502969741821,
      "learning_rate": 0.0003193614817679351,
      "loss": 2.9708,
      "step": 110465
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8969683647155762,
      "learning_rate": 0.00031935739973183437,
      "loss": 2.8158,
      "step": 110466
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.064552068710327,
      "learning_rate": 0.0003193533176921348,
      "loss": 3.0429,
      "step": 110467
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.7276034355163574,
      "learning_rate": 0.00031934923564883705,
      "loss": 2.8725,
      "step": 110468
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.9507851600646973,
      "learning_rate": 0.00031934515360194183,
      "loss": 3.0159,
      "step": 110469
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.831928014755249,
      "learning_rate": 0.0003193410715514501,
      "loss": 3.405,
      "step": 110470
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.7279458045959473,
      "learning_rate": 0.00031933698949736244,
      "loss": 3.0721,
      "step": 110471
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.963883876800537,
      "learning_rate": 0.00031933290743967963,
      "loss": 3.0405,
      "step": 110472
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7218389511108398,
      "learning_rate": 0.0003193288253784025,
      "loss": 2.7805,
      "step": 110473
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8565764427185059,
      "learning_rate": 0.0003193247433135319,
      "loss": 2.891,
      "step": 110474
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7467848062515259,
      "learning_rate": 0.0003193206612450683,
      "loss": 3.3865,
      "step": 110475
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6824979782104492,
      "learning_rate": 0.0003193165791730127,
      "loss": 2.8643,
      "step": 110476
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.503552198410034,
      "learning_rate": 0.0003193124970973658,
      "loss": 2.9109,
      "step": 110477
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9876506328582764,
      "learning_rate": 0.0003193084150181283,
      "loss": 2.7346,
      "step": 110478
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.811719536781311,
      "learning_rate": 0.00031930433293530093,
      "loss": 2.6568,
      "step": 110479
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.8332626819610596,
      "learning_rate": 0.00031930025084888464,
      "loss": 3.1222,
      "step": 110480
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7166846990585327,
      "learning_rate": 0.00031929616875888,
      "loss": 2.7802,
      "step": 110481
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7298671007156372,
      "learning_rate": 0.0003192920866652879,
      "loss": 3.123,
      "step": 110482
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.916029453277588,
      "learning_rate": 0.000319288004568109,
      "loss": 2.9696,
      "step": 110483
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8416998386383057,
      "learning_rate": 0.0003192839224673441,
      "loss": 3.3117,
      "step": 110484
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2124385833740234,
      "learning_rate": 0.0003192798403629939,
      "loss": 2.9096,
      "step": 110485
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.5036075115203857,
      "learning_rate": 0.0003192757582550594,
      "loss": 2.779,
      "step": 110486
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.1635777950286865,
      "learning_rate": 0.00031927167614354094,
      "loss": 3.3468,
      "step": 110487
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9518791437149048,
      "learning_rate": 0.0003192675940284396,
      "loss": 3.1464,
      "step": 110488
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.389261484146118,
      "learning_rate": 0.00031926351190975614,
      "loss": 2.9656,
      "step": 110489
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8044229745864868,
      "learning_rate": 0.0003192594297874911,
      "loss": 2.9399,
      "step": 110490
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2341198921203613,
      "learning_rate": 0.0003192553476616454,
      "loss": 2.8721,
      "step": 110491
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.276679039001465,
      "learning_rate": 0.0003192512655322198,
      "loss": 2.8756,
      "step": 110492
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1425979137420654,
      "learning_rate": 0.00031924718339921496,
      "loss": 3.1147,
      "step": 110493
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9968981742858887,
      "learning_rate": 0.0003192431012626318,
      "loss": 3.0491,
      "step": 110494
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.452319383621216,
      "learning_rate": 0.000319239019122471,
      "loss": 2.8184,
      "step": 110495
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.7514379024505615,
      "learning_rate": 0.0003192349369787332,
      "loss": 3.011,
      "step": 110496
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6720808744430542,
      "learning_rate": 0.0003192308548314193,
      "loss": 2.9159,
      "step": 110497
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9439204931259155,
      "learning_rate": 0.00031922677268053006,
      "loss": 3.106,
      "step": 110498
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8173446655273438,
      "learning_rate": 0.00031922269052606615,
      "loss": 2.971,
      "step": 110499
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.614844799041748,
      "learning_rate": 0.0003192186083680284,
      "loss": 2.9459,
      "step": 110500
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.22355580329895,
      "learning_rate": 0.00031921452620641753,
      "loss": 3.1673,
      "step": 110501
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8380502462387085,
      "learning_rate": 0.0003192104440412344,
      "loss": 2.9956,
      "step": 110502
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9131014347076416,
      "learning_rate": 0.00031920636187247957,
      "loss": 2.9803,
      "step": 110503
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8341693878173828,
      "learning_rate": 0.0003192022797001539,
      "loss": 2.8986,
      "step": 110504
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.229570150375366,
      "learning_rate": 0.00031919819752425826,
      "loss": 2.9556,
      "step": 110505
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.268051862716675,
      "learning_rate": 0.00031919411534479325,
      "loss": 3.0461,
      "step": 110506
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.2498159408569336,
      "learning_rate": 0.00031919003316175975,
      "loss": 3.0371,
      "step": 110507
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.527189016342163,
      "learning_rate": 0.00031918595097515836,
      "loss": 3.0343,
      "step": 110508
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.924837350845337,
      "learning_rate": 0.00031918186878499005,
      "loss": 3.0251,
      "step": 110509
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0965187549591064,
      "learning_rate": 0.0003191777865912554,
      "loss": 2.7588,
      "step": 110510
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2598562240600586,
      "learning_rate": 0.0003191737043939553,
      "loss": 3.2129,
      "step": 110511
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.131411075592041,
      "learning_rate": 0.00031916962219309035,
      "loss": 3.1153,
      "step": 110512
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9655457735061646,
      "learning_rate": 0.00031916553998866153,
      "loss": 2.8753,
      "step": 110513
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9996469020843506,
      "learning_rate": 0.00031916145778066937,
      "loss": 3.148,
      "step": 110514
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.8190016746520996,
      "learning_rate": 0.0003191573755691148,
      "loss": 2.9069,
      "step": 110515
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9293633699417114,
      "learning_rate": 0.0003191532933539985,
      "loss": 3.0143,
      "step": 110516
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.0649631023406982,
      "learning_rate": 0.0003191492111353212,
      "loss": 2.5925,
      "step": 110517
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2464780807495117,
      "learning_rate": 0.0003191451289130837,
      "loss": 3.0252,
      "step": 110518
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.217533826828003,
      "learning_rate": 0.00031914104668728685,
      "loss": 2.9909,
      "step": 110519
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8963079452514648,
      "learning_rate": 0.00031913696445793124,
      "loss": 3.007,
      "step": 110520
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9647777080535889,
      "learning_rate": 0.0003191328822250177,
      "loss": 2.943,
      "step": 110521
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.305218458175659,
      "learning_rate": 0.00031912879998854704,
      "loss": 2.9234,
      "step": 110522
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6482614278793335,
      "learning_rate": 0.00031912471774851996,
      "loss": 3.382,
      "step": 110523
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.046821355819702,
      "learning_rate": 0.0003191206355049372,
      "loss": 3.0071,
      "step": 110524
    },
    {
      "epoch": 1.44,
      "grad_norm": 4.1513800621032715,
      "learning_rate": 0.0003191165532577997,
      "loss": 3.1034,
      "step": 110525
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.40826678276062,
      "learning_rate": 0.00031911247100710786,
      "loss": 2.9192,
      "step": 110526
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6417083740234375,
      "learning_rate": 0.00031910838875286277,
      "loss": 3.0448,
      "step": 110527
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.975541353225708,
      "learning_rate": 0.00031910430649506507,
      "loss": 3.0001,
      "step": 110528
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.6947059631347656,
      "learning_rate": 0.0003191002242337155,
      "loss": 3.0601,
      "step": 110529
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.099483013153076,
      "learning_rate": 0.00031909614196881474,
      "loss": 3.2805,
      "step": 110530
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.662365436553955,
      "learning_rate": 0.0003190920597003637,
      "loss": 3.1519,
      "step": 110531
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.195836067199707,
      "learning_rate": 0.00031908797742836325,
      "loss": 3.1588,
      "step": 110532
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9189586639404297,
      "learning_rate": 0.0003190838951528138,
      "loss": 2.969,
      "step": 110533
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.082209348678589,
      "learning_rate": 0.0003190798128737163,
      "loss": 3.0083,
      "step": 110534
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8153175115585327,
      "learning_rate": 0.0003190757305910716,
      "loss": 3.2467,
      "step": 110535
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.628524899482727,
      "learning_rate": 0.0003190716483048803,
      "loss": 2.8321,
      "step": 110536
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7145355939865112,
      "learning_rate": 0.00031906756601514326,
      "loss": 3.0083,
      "step": 110537
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.5995540618896484,
      "learning_rate": 0.0003190634837218612,
      "loss": 2.9792,
      "step": 110538
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9112404584884644,
      "learning_rate": 0.0003190594014250348,
      "loss": 2.8027,
      "step": 110539
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6255815029144287,
      "learning_rate": 0.000319055319124665,
      "loss": 3.1856,
      "step": 110540
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3995919227600098,
      "learning_rate": 0.0003190512368207524,
      "loss": 2.6403,
      "step": 110541
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.794219970703125,
      "learning_rate": 0.0003190471545132978,
      "loss": 2.9721,
      "step": 110542
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1121580600738525,
      "learning_rate": 0.00031904307220230205,
      "loss": 2.8392,
      "step": 110543
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.692434787750244,
      "learning_rate": 0.00031903898988776573,
      "loss": 2.9382,
      "step": 110544
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.8923709392547607,
      "learning_rate": 0.00031903490756968975,
      "loss": 3.019,
      "step": 110545
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8631783723831177,
      "learning_rate": 0.00031903082524807483,
      "loss": 3.1954,
      "step": 110546
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.5810437202453613,
      "learning_rate": 0.0003190267429229218,
      "loss": 3.0951,
      "step": 110547
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.418715238571167,
      "learning_rate": 0.0003190226605942312,
      "loss": 2.9172,
      "step": 110548
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7963871955871582,
      "learning_rate": 0.00031901857826200394,
      "loss": 2.7138,
      "step": 110549
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7082494497299194,
      "learning_rate": 0.0003190144959262409,
      "loss": 2.9728,
      "step": 110550
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.2413735389709473,
      "learning_rate": 0.0003190104135869426,
      "loss": 3.1684,
      "step": 110551
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3567700386047363,
      "learning_rate": 0.0003190063312441099,
      "loss": 3.1331,
      "step": 110552
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0507335662841797,
      "learning_rate": 0.00031900224889774365,
      "loss": 2.6646,
      "step": 110553
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.9789772033691406,
      "learning_rate": 0.0003189981665478444,
      "loss": 3.0675,
      "step": 110554
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.660308361053467,
      "learning_rate": 0.00031899408419441307,
      "loss": 2.8609,
      "step": 110555
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9626743793487549,
      "learning_rate": 0.00031899000183745045,
      "loss": 2.8061,
      "step": 110556
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7637237310409546,
      "learning_rate": 0.0003189859194769572,
      "loss": 3.3566,
      "step": 110557
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.5990113019943237,
      "learning_rate": 0.00031898183711293407,
      "loss": 2.892,
      "step": 110558
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0041396617889404,
      "learning_rate": 0.00031897775474538193,
      "loss": 2.9222,
      "step": 110559
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0354926586151123,
      "learning_rate": 0.0003189736723743013,
      "loss": 2.8087,
      "step": 110560
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.983034372329712,
      "learning_rate": 0.00031896958999969325,
      "loss": 2.8752,
      "step": 110561
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.185370922088623,
      "learning_rate": 0.00031896550762155844,
      "loss": 3.0078,
      "step": 110562
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.161486864089966,
      "learning_rate": 0.0003189614252398974,
      "loss": 3.1411,
      "step": 110563
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9478986263275146,
      "learning_rate": 0.00031895734285471123,
      "loss": 2.9657,
      "step": 110564
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3983492851257324,
      "learning_rate": 0.0003189532604660005,
      "loss": 3.2479,
      "step": 110565
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2724127769470215,
      "learning_rate": 0.00031894917807376603,
      "loss": 2.8108,
      "step": 110566
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8421401977539062,
      "learning_rate": 0.0003189450956780085,
      "loss": 3.13,
      "step": 110567
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7266998291015625,
      "learning_rate": 0.00031894101327872875,
      "loss": 3.0176,
      "step": 110568
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.160482406616211,
      "learning_rate": 0.00031893693087592747,
      "loss": 3.2568,
      "step": 110569
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9267429113388062,
      "learning_rate": 0.00031893284846960546,
      "loss": 2.8779,
      "step": 110570
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0682692527770996,
      "learning_rate": 0.0003189287660597635,
      "loss": 3.1702,
      "step": 110571
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.8877265453338623,
      "learning_rate": 0.00031892468364640234,
      "loss": 3.0323,
      "step": 110572
    },
    {
      "epoch": 1.44,
      "grad_norm": 4.352756500244141,
      "learning_rate": 0.00031892060122952265,
      "loss": 2.9302,
      "step": 110573
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.878300905227661,
      "learning_rate": 0.00031891651880912536,
      "loss": 3.0808,
      "step": 110574
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6027964353561401,
      "learning_rate": 0.0003189124363852111,
      "loss": 2.8607,
      "step": 110575
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.325343608856201,
      "learning_rate": 0.00031890835395778064,
      "loss": 2.8246,
      "step": 110576
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0515687465667725,
      "learning_rate": 0.0003189042715268348,
      "loss": 3.1337,
      "step": 110577
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7897402048110962,
      "learning_rate": 0.0003189001890923742,
      "loss": 2.9574,
      "step": 110578
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0326356887817383,
      "learning_rate": 0.00031889610665439977,
      "loss": 2.918,
      "step": 110579
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.749091625213623,
      "learning_rate": 0.00031889202421291226,
      "loss": 3.0253,
      "step": 110580
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.4740854501724243,
      "learning_rate": 0.00031888794176791226,
      "loss": 3.1374,
      "step": 110581
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.529402256011963,
      "learning_rate": 0.00031888385931940063,
      "loss": 3.0225,
      "step": 110582
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.5882604122161865,
      "learning_rate": 0.0003188797768673782,
      "loss": 2.908,
      "step": 110583
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.950554370880127,
      "learning_rate": 0.0003188756944118456,
      "loss": 3.059,
      "step": 110584
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1104166507720947,
      "learning_rate": 0.00031887161195280373,
      "loss": 3.1772,
      "step": 110585
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.6284537315368652,
      "learning_rate": 0.00031886752949025326,
      "loss": 3.0956,
      "step": 110586
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9003525972366333,
      "learning_rate": 0.0003188634470241949,
      "loss": 3.1242,
      "step": 110587
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9995070695877075,
      "learning_rate": 0.0003188593645546295,
      "loss": 2.9807,
      "step": 110588
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3367795944213867,
      "learning_rate": 0.00031885528208155776,
      "loss": 2.9588,
      "step": 110589
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.6034438610076904,
      "learning_rate": 0.00031885119960498053,
      "loss": 3.1447,
      "step": 110590
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9112730026245117,
      "learning_rate": 0.00031884711712489846,
      "loss": 2.9141,
      "step": 110591
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8072574138641357,
      "learning_rate": 0.00031884303464131236,
      "loss": 2.9295,
      "step": 110592
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.072685718536377,
      "learning_rate": 0.0003188389521542229,
      "loss": 3.1584,
      "step": 110593
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.307030439376831,
      "learning_rate": 0.0003188348696636311,
      "loss": 3.0893,
      "step": 110594
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9403759241104126,
      "learning_rate": 0.00031883078716953743,
      "loss": 2.7619,
      "step": 110595
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9039767980575562,
      "learning_rate": 0.0003188267046719428,
      "loss": 3.0214,
      "step": 110596
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8993438482284546,
      "learning_rate": 0.00031882262217084795,
      "loss": 2.9835,
      "step": 110597
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.036850929260254,
      "learning_rate": 0.0003188185396662536,
      "loss": 3.0091,
      "step": 110598
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.25350284576416,
      "learning_rate": 0.00031881445715816045,
      "loss": 3.2382,
      "step": 110599
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1196041107177734,
      "learning_rate": 0.0003188103746465694,
      "loss": 3.0113,
      "step": 110600
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7458871603012085,
      "learning_rate": 0.00031880629213148115,
      "loss": 2.999,
      "step": 110601
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.33453369140625,
      "learning_rate": 0.0003188022096128965,
      "loss": 3.0519,
      "step": 110602
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.0858113765716553,
      "learning_rate": 0.000318798127090816,
      "loss": 3.0383,
      "step": 110603
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.229210615158081,
      "learning_rate": 0.00031879404456524083,
      "loss": 3.2417,
      "step": 110604
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7511941194534302,
      "learning_rate": 0.0003187899620361713,
      "loss": 2.9183,
      "step": 110605
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.8536887168884277,
      "learning_rate": 0.00031878587950360843,
      "loss": 2.7386,
      "step": 110606
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.643141508102417,
      "learning_rate": 0.00031878179696755285,
      "loss": 2.8428,
      "step": 110607
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1950998306274414,
      "learning_rate": 0.00031877771442800543,
      "loss": 3.3675,
      "step": 110608
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7876230478286743,
      "learning_rate": 0.0003187736318849669,
      "loss": 2.9092,
      "step": 110609
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7859975099563599,
      "learning_rate": 0.0003187695493384379,
      "loss": 3.2841,
      "step": 110610
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1727871894836426,
      "learning_rate": 0.00031876546678841943,
      "loss": 2.8793,
      "step": 110611
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.254892587661743,
      "learning_rate": 0.000318761384234912,
      "loss": 2.8098,
      "step": 110612
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9432692527770996,
      "learning_rate": 0.00031875730167791656,
      "loss": 3.1142,
      "step": 110613
    },
    {
      "epoch": 1.44,
      "grad_norm": 4.170714378356934,
      "learning_rate": 0.00031875321911743374,
      "loss": 2.9189,
      "step": 110614
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.145535469055176,
      "learning_rate": 0.0003187491365534643,
      "loss": 2.8883,
      "step": 110615
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7806864976882935,
      "learning_rate": 0.0003187450539860091,
      "loss": 2.9204,
      "step": 110616
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3105366230010986,
      "learning_rate": 0.0003187409714150688,
      "loss": 2.8608,
      "step": 110617
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1246745586395264,
      "learning_rate": 0.0003187368888406442,
      "loss": 3.2343,
      "step": 110618
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1608972549438477,
      "learning_rate": 0.00031873280626273606,
      "loss": 2.9139,
      "step": 110619
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0732297897338867,
      "learning_rate": 0.0003187287236813452,
      "loss": 3.2408,
      "step": 110620
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8410793542861938,
      "learning_rate": 0.00031872464109647223,
      "loss": 2.9804,
      "step": 110621
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9090023040771484,
      "learning_rate": 0.00031872055850811805,
      "loss": 3.0892,
      "step": 110622
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.6840217113494873,
      "learning_rate": 0.0003187164759162834,
      "loss": 2.9682,
      "step": 110623
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3574318885803223,
      "learning_rate": 0.0003187123933209689,
      "loss": 2.9785,
      "step": 110624
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1934924125671387,
      "learning_rate": 0.0003187083107221755,
      "loss": 2.9982,
      "step": 110625
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.971760630607605,
      "learning_rate": 0.00031870422811990383,
      "loss": 3.2649,
      "step": 110626
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.5287940502166748,
      "learning_rate": 0.0003187001455141547,
      "loss": 3.238,
      "step": 110627
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6645675897598267,
      "learning_rate": 0.0003186960629049288,
      "loss": 2.8965,
      "step": 110628
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8320167064666748,
      "learning_rate": 0.00031869198029222713,
      "loss": 2.9389,
      "step": 110629
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9682486057281494,
      "learning_rate": 0.00031868789767605004,
      "loss": 3.0513,
      "step": 110630
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.399038791656494,
      "learning_rate": 0.0003186838150563986,
      "loss": 3.0777,
      "step": 110631
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0250654220581055,
      "learning_rate": 0.0003186797324332736,
      "loss": 3.2602,
      "step": 110632
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8034837245941162,
      "learning_rate": 0.00031867564980667557,
      "loss": 3.166,
      "step": 110633
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.993861675262451,
      "learning_rate": 0.0003186715671766054,
      "loss": 2.8646,
      "step": 110634
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.826625108718872,
      "learning_rate": 0.0003186674845430639,
      "loss": 2.7563,
      "step": 110635
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8331282138824463,
      "learning_rate": 0.00031866340190605163,
      "loss": 2.9607,
      "step": 110636
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.6291322708129883,
      "learning_rate": 0.00031865931926556955,
      "loss": 3.0606,
      "step": 110637
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2561089992523193,
      "learning_rate": 0.0003186552366216184,
      "loss": 2.8802,
      "step": 110638
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7003295421600342,
      "learning_rate": 0.0003186511539741988,
      "loss": 2.9592,
      "step": 110639
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7373887300491333,
      "learning_rate": 0.0003186470713233117,
      "loss": 3.1968,
      "step": 110640
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8788199424743652,
      "learning_rate": 0.0003186429886689577,
      "loss": 2.6964,
      "step": 110641
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.071110963821411,
      "learning_rate": 0.00031863890601113764,
      "loss": 3.0508,
      "step": 110642
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.4490559101104736,
      "learning_rate": 0.00031863482334985215,
      "loss": 3.041,
      "step": 110643
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.123570203781128,
      "learning_rate": 0.00031863074068510226,
      "loss": 2.6834,
      "step": 110644
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8178735971450806,
      "learning_rate": 0.00031862665801688845,
      "loss": 3.0467,
      "step": 110645
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.6274402141571045,
      "learning_rate": 0.0003186225753452116,
      "loss": 3.4431,
      "step": 110646
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.4390757083892822,
      "learning_rate": 0.0003186184926700725,
      "loss": 2.9946,
      "step": 110647
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0182619094848633,
      "learning_rate": 0.00031861440999147187,
      "loss": 2.9452,
      "step": 110648
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.253880023956299,
      "learning_rate": 0.0003186103273094104,
      "loss": 2.895,
      "step": 110649
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.6618733406066895,
      "learning_rate": 0.00031860624462388905,
      "loss": 2.822,
      "step": 110650
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7561932802200317,
      "learning_rate": 0.0003186021619349084,
      "loss": 2.8378,
      "step": 110651
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0187666416168213,
      "learning_rate": 0.00031859807924246916,
      "loss": 3.0292,
      "step": 110652
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6670770645141602,
      "learning_rate": 0.00031859399654657225,
      "loss": 2.9348,
      "step": 110653
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3161959648132324,
      "learning_rate": 0.0003185899138472184,
      "loss": 3.067,
      "step": 110654
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6887623071670532,
      "learning_rate": 0.00031858583114440826,
      "loss": 3.0567,
      "step": 110655
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1578221321105957,
      "learning_rate": 0.0003185817484381428,
      "loss": 3.0314,
      "step": 110656
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9859539270401,
      "learning_rate": 0.0003185776657284225,
      "loss": 2.9748,
      "step": 110657
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9627206325531006,
      "learning_rate": 0.0003185735830152483,
      "loss": 3.166,
      "step": 110658
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.087956666946411,
      "learning_rate": 0.000318569500298621,
      "loss": 3.1878,
      "step": 110659
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8999696969985962,
      "learning_rate": 0.0003185654175785412,
      "loss": 2.9657,
      "step": 110660
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6204142570495605,
      "learning_rate": 0.00031856133485500974,
      "loss": 2.9734,
      "step": 110661
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.040172576904297,
      "learning_rate": 0.00031855725212802735,
      "loss": 2.8762,
      "step": 110662
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1993000507354736,
      "learning_rate": 0.0003185531693975949,
      "loss": 2.9418,
      "step": 110663
    },
    {
      "epoch": 1.44,
      "grad_norm": 4.560910224914551,
      "learning_rate": 0.000318549086663713,
      "loss": 3.03,
      "step": 110664
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.7604305744171143,
      "learning_rate": 0.00031854500392638243,
      "loss": 2.8423,
      "step": 110665
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.262916088104248,
      "learning_rate": 0.0003185409211856041,
      "loss": 2.7149,
      "step": 110666
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.5088260173797607,
      "learning_rate": 0.0003185368384413786,
      "loss": 2.8904,
      "step": 110667
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.5571110248565674,
      "learning_rate": 0.0003185327556937067,
      "loss": 2.6153,
      "step": 110668
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.692047595977783,
      "learning_rate": 0.00031852867294258934,
      "loss": 3.0418,
      "step": 110669
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6543582677841187,
      "learning_rate": 0.0003185245901880271,
      "loss": 2.8387,
      "step": 110670
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.8853163719177246,
      "learning_rate": 0.0003185205074300208,
      "loss": 3.1222,
      "step": 110671
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.83127760887146,
      "learning_rate": 0.0003185164246685712,
      "loss": 2.8936,
      "step": 110672
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8977726697921753,
      "learning_rate": 0.000318512341903679,
      "loss": 3.1027,
      "step": 110673
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8141720294952393,
      "learning_rate": 0.000318508259135345,
      "loss": 2.8821,
      "step": 110674
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.760703206062317,
      "learning_rate": 0.00031850417636357,
      "loss": 2.8254,
      "step": 110675
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.713839054107666,
      "learning_rate": 0.0003185000935883547,
      "loss": 2.9251,
      "step": 110676
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.163456916809082,
      "learning_rate": 0.0003184960108096999,
      "loss": 2.9717,
      "step": 110677
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9815808534622192,
      "learning_rate": 0.0003184919280276064,
      "loss": 3.0703,
      "step": 110678
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.827405571937561,
      "learning_rate": 0.0003184878452420748,
      "loss": 3.2551,
      "step": 110679
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1194684505462646,
      "learning_rate": 0.000318483762453106,
      "loss": 2.8221,
      "step": 110680
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.287973403930664,
      "learning_rate": 0.0003184796796607007,
      "loss": 3.0251,
      "step": 110681
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0839879512786865,
      "learning_rate": 0.0003184755968648597,
      "loss": 2.9502,
      "step": 110682
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.5914961099624634,
      "learning_rate": 0.0003184715140655838,
      "loss": 3.1117,
      "step": 110683
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.308729648590088,
      "learning_rate": 0.0003184674312628735,
      "loss": 2.7481,
      "step": 110684
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1405491828918457,
      "learning_rate": 0.00031846334845672995,
      "loss": 2.9432,
      "step": 110685
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.643809199333191,
      "learning_rate": 0.0003184592656471537,
      "loss": 2.9752,
      "step": 110686
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0483322143554688,
      "learning_rate": 0.0003184551828341455,
      "loss": 2.9337,
      "step": 110687
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.027782440185547,
      "learning_rate": 0.00031845110001770606,
      "loss": 3.0642,
      "step": 110688
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.157637119293213,
      "learning_rate": 0.0003184470171978363,
      "loss": 2.9685,
      "step": 110689
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0424506664276123,
      "learning_rate": 0.00031844293437453683,
      "loss": 2.9987,
      "step": 110690
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.5390920639038086,
      "learning_rate": 0.00031843885154780844,
      "loss": 3.1557,
      "step": 110691
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.655349016189575,
      "learning_rate": 0.00031843476871765196,
      "loss": 3.0037,
      "step": 110692
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2129039764404297,
      "learning_rate": 0.0003184306858840682,
      "loss": 3.0475,
      "step": 110693
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9037432670593262,
      "learning_rate": 0.00031842660304705773,
      "loss": 2.9654,
      "step": 110694
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.5724668502807617,
      "learning_rate": 0.0003184225202066214,
      "loss": 2.8996,
      "step": 110695
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8955048322677612,
      "learning_rate": 0.00031841843736276005,
      "loss": 3.0546,
      "step": 110696
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.875713586807251,
      "learning_rate": 0.00031841435451547426,
      "loss": 2.9292,
      "step": 110697
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.697752594947815,
      "learning_rate": 0.00031841027166476494,
      "loss": 2.912,
      "step": 110698
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7160145044326782,
      "learning_rate": 0.0003184061888106329,
      "loss": 2.8294,
      "step": 110699
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8697301149368286,
      "learning_rate": 0.0003184021059530787,
      "loss": 2.9121,
      "step": 110700
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.697697639465332,
      "learning_rate": 0.00031839802309210316,
      "loss": 3.1539,
      "step": 110701
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6973437070846558,
      "learning_rate": 0.0003183939402277072,
      "loss": 2.8063,
      "step": 110702
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.151193141937256,
      "learning_rate": 0.00031838985735989134,
      "loss": 3.196,
      "step": 110703
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7148497104644775,
      "learning_rate": 0.0003183857744886565,
      "loss": 2.7516,
      "step": 110704
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.4643259048461914,
      "learning_rate": 0.0003183816916140035,
      "loss": 2.9751,
      "step": 110705
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.421485662460327,
      "learning_rate": 0.00031837760873593287,
      "loss": 2.8562,
      "step": 110706
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0498554706573486,
      "learning_rate": 0.00031837352585444554,
      "loss": 3.085,
      "step": 110707
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7718219757080078,
      "learning_rate": 0.0003183694429695423,
      "loss": 3.2523,
      "step": 110708
    },
    {
      "epoch": 1.44,
      "grad_norm": 4.059209823608398,
      "learning_rate": 0.00031836536008122366,
      "loss": 3.0568,
      "step": 110709
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2567107677459717,
      "learning_rate": 0.0003183612771894906,
      "loss": 2.6613,
      "step": 110710
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.642073392868042,
      "learning_rate": 0.00031835719429434403,
      "loss": 3.178,
      "step": 110711
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.132563591003418,
      "learning_rate": 0.0003183531113957843,
      "loss": 3.0235,
      "step": 110712
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.6709208488464355,
      "learning_rate": 0.0003183490284938124,
      "loss": 2.7777,
      "step": 110713
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7753008604049683,
      "learning_rate": 0.00031834494558842915,
      "loss": 3.1331,
      "step": 110714
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1765921115875244,
      "learning_rate": 0.0003183408626796352,
      "loss": 3.1643,
      "step": 110715
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1592957973480225,
      "learning_rate": 0.0003183367797674313,
      "loss": 2.8698,
      "step": 110716
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.5077649354934692,
      "learning_rate": 0.0003183326968518183,
      "loss": 3.0345,
      "step": 110717
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.4033069610595703,
      "learning_rate": 0.0003183286139327969,
      "loss": 3.0893,
      "step": 110718
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.945039987564087,
      "learning_rate": 0.00031832453101036777,
      "loss": 2.7149,
      "step": 110719
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.731355905532837,
      "learning_rate": 0.0003183204480845319,
      "loss": 2.9068,
      "step": 110720
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7645319700241089,
      "learning_rate": 0.0003183163651552898,
      "loss": 2.9832,
      "step": 110721
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.783834934234619,
      "learning_rate": 0.0003183122822226424,
      "loss": 2.6854,
      "step": 110722
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6469902992248535,
      "learning_rate": 0.00031830819928659043,
      "loss": 3.0066,
      "step": 110723
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2010631561279297,
      "learning_rate": 0.00031830411634713456,
      "loss": 3.0913,
      "step": 110724
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2862424850463867,
      "learning_rate": 0.00031830003340427564,
      "loss": 3.0146,
      "step": 110725
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9077064990997314,
      "learning_rate": 0.0003182959504580144,
      "loss": 3.1118,
      "step": 110726
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.4497112035751343,
      "learning_rate": 0.00031829186750835156,
      "loss": 2.9755,
      "step": 110727
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.5984903573989868,
      "learning_rate": 0.0003182877845552879,
      "loss": 2.974,
      "step": 110728
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.635019063949585,
      "learning_rate": 0.00031828370159882433,
      "loss": 2.8657,
      "step": 110729
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.659644365310669,
      "learning_rate": 0.0003182796186389613,
      "loss": 2.8232,
      "step": 110730
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.7126269340515137,
      "learning_rate": 0.00031827553567569984,
      "loss": 2.9573,
      "step": 110731
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8582775592803955,
      "learning_rate": 0.0003182714527090406,
      "loss": 2.9805,
      "step": 110732
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.969876766204834,
      "learning_rate": 0.00031826736973898445,
      "loss": 2.7734,
      "step": 110733
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8995083570480347,
      "learning_rate": 0.0003182632867655319,
      "loss": 3.0213,
      "step": 110734
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.236480474472046,
      "learning_rate": 0.0003182592037886839,
      "loss": 3.037,
      "step": 110735
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9010756015777588,
      "learning_rate": 0.00031825512080844125,
      "loss": 2.8881,
      "step": 110736
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9149333238601685,
      "learning_rate": 0.0003182510378248045,
      "loss": 2.8523,
      "step": 110737
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.9246461391448975,
      "learning_rate": 0.0003182469548377746,
      "loss": 3.1533,
      "step": 110738
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.428898334503174,
      "learning_rate": 0.0003182428718473523,
      "loss": 2.9403,
      "step": 110739
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.203094720840454,
      "learning_rate": 0.00031823878885353825,
      "loss": 2.8894,
      "step": 110740
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2134385108947754,
      "learning_rate": 0.00031823470585633327,
      "loss": 2.9158,
      "step": 110741
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.836681842803955,
      "learning_rate": 0.0003182306228557381,
      "loss": 2.9901,
      "step": 110742
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1018128395080566,
      "learning_rate": 0.0003182265398517535,
      "loss": 3.1417,
      "step": 110743
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.736495018005371,
      "learning_rate": 0.00031822245684438025,
      "loss": 2.9984,
      "step": 110744
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9603235721588135,
      "learning_rate": 0.00031821837383361917,
      "loss": 3.0027,
      "step": 110745
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7987034320831299,
      "learning_rate": 0.0003182142908194709,
      "loss": 2.9718,
      "step": 110746
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0796406269073486,
      "learning_rate": 0.0003182102078019362,
      "loss": 3.0731,
      "step": 110747
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.5533900260925293,
      "learning_rate": 0.000318206124781016,
      "loss": 2.9292,
      "step": 110748
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.698960304260254,
      "learning_rate": 0.0003182020417567108,
      "loss": 2.9837,
      "step": 110749
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9008204936981201,
      "learning_rate": 0.0003181979587290215,
      "loss": 2.9332,
      "step": 110750
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8867943286895752,
      "learning_rate": 0.00031819387569794903,
      "loss": 2.8693,
      "step": 110751
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7527875900268555,
      "learning_rate": 0.0003181897926634938,
      "loss": 2.9487,
      "step": 110752
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.745511293411255,
      "learning_rate": 0.0003181857096256568,
      "loss": 3.1484,
      "step": 110753
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.86623477935791,
      "learning_rate": 0.0003181816265844388,
      "loss": 2.9982,
      "step": 110754
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.025799512863159,
      "learning_rate": 0.0003181775435398404,
      "loss": 3.1391,
      "step": 110755
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.797510027885437,
      "learning_rate": 0.0003181734604918625,
      "loss": 3.0914,
      "step": 110756
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.193351984024048,
      "learning_rate": 0.0003181693774405058,
      "loss": 2.9566,
      "step": 110757
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.8978254795074463,
      "learning_rate": 0.00031816529438577105,
      "loss": 2.8021,
      "step": 110758
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7519147396087646,
      "learning_rate": 0.000318161211327659,
      "loss": 3.0695,
      "step": 110759
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.831800103187561,
      "learning_rate": 0.0003181571282661706,
      "loss": 3.0168,
      "step": 110760
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.076629877090454,
      "learning_rate": 0.00031815304520130624,
      "loss": 2.8939,
      "step": 110761
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.054043769836426,
      "learning_rate": 0.000318148962133067,
      "loss": 2.8621,
      "step": 110762
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8247385025024414,
      "learning_rate": 0.0003181448790614535,
      "loss": 2.9726,
      "step": 110763
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.5817793607711792,
      "learning_rate": 0.00031814079598646653,
      "loss": 2.922,
      "step": 110764
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.74155592918396,
      "learning_rate": 0.00031813671290810685,
      "loss": 2.8963,
      "step": 110765
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8797518014907837,
      "learning_rate": 0.00031813262982637513,
      "loss": 3.4388,
      "step": 110766
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8156276941299438,
      "learning_rate": 0.00031812854674127236,
      "loss": 3.1087,
      "step": 110767
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6387438774108887,
      "learning_rate": 0.000318124463652799,
      "loss": 2.8474,
      "step": 110768
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.702866315841675,
      "learning_rate": 0.00031812038056095604,
      "loss": 2.9569,
      "step": 110769
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.042942762374878,
      "learning_rate": 0.00031811629746574416,
      "loss": 2.9207,
      "step": 110770
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.5418434143066406,
      "learning_rate": 0.00031811221436716416,
      "loss": 2.7745,
      "step": 110771
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.5639517307281494,
      "learning_rate": 0.0003181081312652167,
      "loss": 2.8728,
      "step": 110772
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.969388484954834,
      "learning_rate": 0.00031810404815990254,
      "loss": 2.7403,
      "step": 110773
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6253975629806519,
      "learning_rate": 0.00031809996505122255,
      "loss": 3.0358,
      "step": 110774
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1890852451324463,
      "learning_rate": 0.0003180958819391775,
      "loss": 2.8926,
      "step": 110775
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9541893005371094,
      "learning_rate": 0.000318091798823768,
      "loss": 2.8984,
      "step": 110776
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.582153797149658,
      "learning_rate": 0.00031808771570499494,
      "loss": 2.8486,
      "step": 110777
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2021753787994385,
      "learning_rate": 0.000318083632582859,
      "loss": 2.7891,
      "step": 110778
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.708031177520752,
      "learning_rate": 0.000318079549457361,
      "loss": 2.9888,
      "step": 110779
    },
    {
      "epoch": 1.44,
      "grad_norm": 4.093202590942383,
      "learning_rate": 0.0003180754663285016,
      "loss": 3.0009,
      "step": 110780
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.4747235774993896,
      "learning_rate": 0.00031807138319628174,
      "loss": 2.5209,
      "step": 110781
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8931008577346802,
      "learning_rate": 0.000318067300060702,
      "loss": 3.0367,
      "step": 110782
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6279075145721436,
      "learning_rate": 0.0003180632169217632,
      "loss": 3.2554,
      "step": 110783
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.6273021697998047,
      "learning_rate": 0.00031805913377946617,
      "loss": 3.0955,
      "step": 110784
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.6281166076660156,
      "learning_rate": 0.00031805505063381157,
      "loss": 2.925,
      "step": 110785
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3283936977386475,
      "learning_rate": 0.0003180509674848001,
      "loss": 2.9881,
      "step": 110786
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0637753009796143,
      "learning_rate": 0.00031804688433243275,
      "loss": 3.1133,
      "step": 110787
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9462705850601196,
      "learning_rate": 0.0003180428011767101,
      "loss": 2.9891,
      "step": 110788
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.9189817905426025,
      "learning_rate": 0.0003180387180176329,
      "loss": 3.0682,
      "step": 110789
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.85074782371521,
      "learning_rate": 0.00031803463485520207,
      "loss": 2.9155,
      "step": 110790
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.950624942779541,
      "learning_rate": 0.0003180305516894182,
      "loss": 3.0906,
      "step": 110791
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0442967414855957,
      "learning_rate": 0.00031802646852028215,
      "loss": 2.9245,
      "step": 110792
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6418163776397705,
      "learning_rate": 0.0003180223853477946,
      "loss": 3.1482,
      "step": 110793
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.7303059101104736,
      "learning_rate": 0.00031801830217195634,
      "loss": 2.9845,
      "step": 110794
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.119100570678711,
      "learning_rate": 0.00031801421899276813,
      "loss": 3.1951,
      "step": 110795
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0913937091827393,
      "learning_rate": 0.00031801013581023076,
      "loss": 2.9147,
      "step": 110796
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2598583698272705,
      "learning_rate": 0.00031800605262434495,
      "loss": 3.0443,
      "step": 110797
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8006601333618164,
      "learning_rate": 0.0003180019694351115,
      "loss": 2.8648,
      "step": 110798
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0830516815185547,
      "learning_rate": 0.0003179978862425311,
      "loss": 2.8901,
      "step": 110799
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.886081576347351,
      "learning_rate": 0.0003179938030466046,
      "loss": 2.8755,
      "step": 110800
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9454002380371094,
      "learning_rate": 0.00031798971984733275,
      "loss": 3.1029,
      "step": 110801
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.001784324645996,
      "learning_rate": 0.0003179856366447162,
      "loss": 2.896,
      "step": 110802
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7840330600738525,
      "learning_rate": 0.0003179815534387558,
      "loss": 2.9121,
      "step": 110803
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7813305854797363,
      "learning_rate": 0.00031797747022945227,
      "loss": 2.8946,
      "step": 110804
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7380845546722412,
      "learning_rate": 0.0003179733870168064,
      "loss": 3.0174,
      "step": 110805
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.929906964302063,
      "learning_rate": 0.00031796930380081895,
      "loss": 3.0169,
      "step": 110806
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8875676393508911,
      "learning_rate": 0.0003179652205814907,
      "loss": 2.9474,
      "step": 110807
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.9076733589172363,
      "learning_rate": 0.0003179611373588223,
      "loss": 2.8615,
      "step": 110808
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.682133913040161,
      "learning_rate": 0.00031795705413281466,
      "loss": 3.5061,
      "step": 110809
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.165663242340088,
      "learning_rate": 0.00031795297090346844,
      "loss": 3.0854,
      "step": 110810
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.284877300262451,
      "learning_rate": 0.0003179488876707844,
      "loss": 3.054,
      "step": 110811
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.376925230026245,
      "learning_rate": 0.00031794480443476334,
      "loss": 2.8077,
      "step": 110812
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3803212642669678,
      "learning_rate": 0.000317940721195406,
      "loss": 2.7065,
      "step": 110813
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.5662999153137207,
      "learning_rate": 0.0003179366379527131,
      "loss": 3.0569,
      "step": 110814
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8971385955810547,
      "learning_rate": 0.0003179325547066855,
      "loss": 3.1449,
      "step": 110815
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2013936042785645,
      "learning_rate": 0.0003179284714573239,
      "loss": 2.9899,
      "step": 110816
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.389676570892334,
      "learning_rate": 0.000317924388204629,
      "loss": 3.0058,
      "step": 110817
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9522145986557007,
      "learning_rate": 0.0003179203049486017,
      "loss": 3.1291,
      "step": 110818
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.125227928161621,
      "learning_rate": 0.0003179162216892426,
      "loss": 2.8043,
      "step": 110819
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9787366390228271,
      "learning_rate": 0.00031791213842655256,
      "loss": 3.2228,
      "step": 110820
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.5562374591827393,
      "learning_rate": 0.0003179080551605324,
      "loss": 3.0646,
      "step": 110821
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.7079222202301025,
      "learning_rate": 0.0003179039718911827,
      "loss": 2.9867,
      "step": 110822
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.6218769550323486,
      "learning_rate": 0.0003178998886185043,
      "loss": 3.0984,
      "step": 110823
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8522435426712036,
      "learning_rate": 0.00031789580534249804,
      "loss": 3.2859,
      "step": 110824
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8974803686141968,
      "learning_rate": 0.0003178917220631645,
      "loss": 3.0222,
      "step": 110825
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2413980960845947,
      "learning_rate": 0.00031788763878050466,
      "loss": 2.9878,
      "step": 110826
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2736003398895264,
      "learning_rate": 0.00031788355549451923,
      "loss": 2.9509,
      "step": 110827
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8685981035232544,
      "learning_rate": 0.0003178794722052087,
      "loss": 3.0147,
      "step": 110828
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7850120067596436,
      "learning_rate": 0.0003178753889125742,
      "loss": 2.9847,
      "step": 110829
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0763933658599854,
      "learning_rate": 0.00031787130561661633,
      "loss": 3.1654,
      "step": 110830
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.4430317878723145,
      "learning_rate": 0.0003178672223173358,
      "loss": 3.098,
      "step": 110831
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.032393217086792,
      "learning_rate": 0.00031786313901473336,
      "loss": 2.9371,
      "step": 110832
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0772080421447754,
      "learning_rate": 0.00031785905570880994,
      "loss": 2.9049,
      "step": 110833
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8856855630874634,
      "learning_rate": 0.0003178549723995661,
      "loss": 3.0169,
      "step": 110834
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.011240243911743,
      "learning_rate": 0.0003178508890870027,
      "loss": 2.967,
      "step": 110835
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8448055982589722,
      "learning_rate": 0.00031784680577112055,
      "loss": 3.3353,
      "step": 110836
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.237367868423462,
      "learning_rate": 0.00031784272245192027,
      "loss": 3.1882,
      "step": 110837
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6837120056152344,
      "learning_rate": 0.00031783863912940264,
      "loss": 2.9187,
      "step": 110838
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.024894952774048,
      "learning_rate": 0.0003178345558035686,
      "loss": 3.0152,
      "step": 110839
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.822902798652649,
      "learning_rate": 0.0003178304724744187,
      "loss": 3.0462,
      "step": 110840
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0664315223693848,
      "learning_rate": 0.0003178263891419538,
      "loss": 3.0512,
      "step": 110841
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7582930326461792,
      "learning_rate": 0.00031782230580617465,
      "loss": 2.9762,
      "step": 110842
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.797483205795288,
      "learning_rate": 0.00031781822246708196,
      "loss": 2.9562,
      "step": 110843
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.115128755569458,
      "learning_rate": 0.00031781413912467654,
      "loss": 2.9353,
      "step": 110844
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9973578453063965,
      "learning_rate": 0.0003178100557789592,
      "loss": 2.7208,
      "step": 110845
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6811922788619995,
      "learning_rate": 0.0003178059724299306,
      "loss": 2.9257,
      "step": 110846
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.5469253063201904,
      "learning_rate": 0.00031780188907759143,
      "loss": 2.8068,
      "step": 110847
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.195147752761841,
      "learning_rate": 0.0003177978057219427,
      "loss": 3.1894,
      "step": 110848
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8952900171279907,
      "learning_rate": 0.00031779372236298494,
      "loss": 2.761,
      "step": 110849
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.4169585704803467,
      "learning_rate": 0.00031778963900071895,
      "loss": 2.7868,
      "step": 110850
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1022415161132812,
      "learning_rate": 0.0003177855556351456,
      "loss": 2.9949,
      "step": 110851
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8269071578979492,
      "learning_rate": 0.0003177814722662656,
      "loss": 2.8482,
      "step": 110852
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.6854894161224365,
      "learning_rate": 0.0003177773888940796,
      "loss": 2.9061,
      "step": 110853
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.4031691551208496,
      "learning_rate": 0.00031777330551858844,
      "loss": 3.0781,
      "step": 110854
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9565410614013672,
      "learning_rate": 0.000317769222139793,
      "loss": 3.1851,
      "step": 110855
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2304294109344482,
      "learning_rate": 0.00031776513875769394,
      "loss": 3.1123,
      "step": 110856
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.408175230026245,
      "learning_rate": 0.0003177610553722919,
      "loss": 2.9843,
      "step": 110857
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3395307064056396,
      "learning_rate": 0.0003177569719835878,
      "loss": 2.7895,
      "step": 110858
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.875552773475647,
      "learning_rate": 0.00031775288859158234,
      "loss": 3.0927,
      "step": 110859
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.562924861907959,
      "learning_rate": 0.00031774880519627627,
      "loss": 3.0413,
      "step": 110860
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.7770285606384277,
      "learning_rate": 0.00031774472179767026,
      "loss": 2.9433,
      "step": 110861
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8061766624450684,
      "learning_rate": 0.00031774063839576535,
      "loss": 3.2119,
      "step": 110862
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7612860202789307,
      "learning_rate": 0.000317736554990562,
      "loss": 3.1726,
      "step": 110863
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0709757804870605,
      "learning_rate": 0.00031773247158206115,
      "loss": 2.9094,
      "step": 110864
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7156440019607544,
      "learning_rate": 0.0003177283881702635,
      "loss": 3.0062,
      "step": 110865
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6001192331314087,
      "learning_rate": 0.0003177243047551698,
      "loss": 3.002,
      "step": 110866
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7235106229782104,
      "learning_rate": 0.0003177202213367808,
      "loss": 3.1572,
      "step": 110867
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.4853851795196533,
      "learning_rate": 0.00031771613791509726,
      "loss": 3.1943,
      "step": 110868
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3717844486236572,
      "learning_rate": 0.00031771205449012,
      "loss": 3.1563,
      "step": 110869
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.6018354892730713,
      "learning_rate": 0.0003177079710618497,
      "loss": 3.1084,
      "step": 110870
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.508920907974243,
      "learning_rate": 0.00031770388763028716,
      "loss": 3.0119,
      "step": 110871
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.064178705215454,
      "learning_rate": 0.00031769980419543313,
      "loss": 3.225,
      "step": 110872
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6839079856872559,
      "learning_rate": 0.00031769572075728843,
      "loss": 3.2028,
      "step": 110873
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.6201133728027344,
      "learning_rate": 0.0003176916373158537,
      "loss": 3.1801,
      "step": 110874
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7689838409423828,
      "learning_rate": 0.00031768755387112977,
      "loss": 2.866,
      "step": 110875
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0831644535064697,
      "learning_rate": 0.0003176834704231174,
      "loss": 2.8996,
      "step": 110876
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.949533462524414,
      "learning_rate": 0.00031767938697181735,
      "loss": 3.0413,
      "step": 110877
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.2941744327545166,
      "learning_rate": 0.0003176753035172303,
      "loss": 2.7963,
      "step": 110878
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.236806631088257,
      "learning_rate": 0.0003176712200593572,
      "loss": 2.8623,
      "step": 110879
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3479106426239014,
      "learning_rate": 0.00031766713659819857,
      "loss": 2.7502,
      "step": 110880
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9158477783203125,
      "learning_rate": 0.0003176630531337553,
      "loss": 2.8018,
      "step": 110881
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.811220645904541,
      "learning_rate": 0.0003176589696660282,
      "loss": 3.1356,
      "step": 110882
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.765540599822998,
      "learning_rate": 0.000317654886195018,
      "loss": 2.9503,
      "step": 110883
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8435771465301514,
      "learning_rate": 0.00031765080272072534,
      "loss": 2.5663,
      "step": 110884
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.001532554626465,
      "learning_rate": 0.00031764671924315103,
      "loss": 3.1093,
      "step": 110885
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9458000659942627,
      "learning_rate": 0.0003176426357622959,
      "loss": 3.0356,
      "step": 110886
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.7061123847961426,
      "learning_rate": 0.0003176385522781607,
      "loss": 2.9124,
      "step": 110887
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7591370344161987,
      "learning_rate": 0.0003176344687907462,
      "loss": 3.0927,
      "step": 110888
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.5815060138702393,
      "learning_rate": 0.00031763038530005305,
      "loss": 2.9568,
      "step": 110889
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6822689771652222,
      "learning_rate": 0.0003176263018060821,
      "loss": 3.1041,
      "step": 110890
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.6200366020202637,
      "learning_rate": 0.00031762221830883405,
      "loss": 3.0592,
      "step": 110891
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.3726589679718018,
      "learning_rate": 0.00031761813480830973,
      "loss": 3.1626,
      "step": 110892
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.957511305809021,
      "learning_rate": 0.0003176140513045098,
      "loss": 3.0416,
      "step": 110893
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9508346319198608,
      "learning_rate": 0.00031760996779743524,
      "loss": 3.2035,
      "step": 110894
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8074374198913574,
      "learning_rate": 0.0003176058842870865,
      "loss": 2.9839,
      "step": 110895
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.102423667907715,
      "learning_rate": 0.00031760180077346456,
      "loss": 2.9983,
      "step": 110896
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7796169519424438,
      "learning_rate": 0.00031759771725657014,
      "loss": 3.1107,
      "step": 110897
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2309389114379883,
      "learning_rate": 0.0003175936337364039,
      "loss": 3.0103,
      "step": 110898
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.605780839920044,
      "learning_rate": 0.00031758955021296667,
      "loss": 3.002,
      "step": 110899
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7834643125534058,
      "learning_rate": 0.0003175854666862593,
      "loss": 2.8595,
      "step": 110900
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6986337900161743,
      "learning_rate": 0.0003175813831562824,
      "loss": 2.9183,
      "step": 110901
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0929131507873535,
      "learning_rate": 0.0003175772996230368,
      "loss": 3.0318,
      "step": 110902
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.037261724472046,
      "learning_rate": 0.0003175732160865233,
      "loss": 3.0684,
      "step": 110903
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.365222692489624,
      "learning_rate": 0.00031756913254674254,
      "loss": 2.9909,
      "step": 110904
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8408817052841187,
      "learning_rate": 0.0003175650490036953,
      "loss": 3.1612,
      "step": 110905
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.995412826538086,
      "learning_rate": 0.00031756096545738254,
      "loss": 3.1929,
      "step": 110906
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7812225818634033,
      "learning_rate": 0.00031755688190780474,
      "loss": 2.9181,
      "step": 110907
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.857076644897461,
      "learning_rate": 0.0003175527983549628,
      "loss": 3.0487,
      "step": 110908
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7850309610366821,
      "learning_rate": 0.00031754871479885754,
      "loss": 3.0118,
      "step": 110909
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.573879361152649,
      "learning_rate": 0.00031754463123948956,
      "loss": 2.9942,
      "step": 110910
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0567753314971924,
      "learning_rate": 0.0003175405476768597,
      "loss": 2.7093,
      "step": 110911
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3384952545166016,
      "learning_rate": 0.0003175364641109688,
      "loss": 3.2323,
      "step": 110912
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.6587698459625244,
      "learning_rate": 0.00031753238054181744,
      "loss": 3.1396,
      "step": 110913
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.223353147506714,
      "learning_rate": 0.0003175282969694065,
      "loss": 3.109,
      "step": 110914
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0446157455444336,
      "learning_rate": 0.00031752421339373684,
      "loss": 2.9335,
      "step": 110915
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1557974815368652,
      "learning_rate": 0.0003175201298148089,
      "loss": 2.7861,
      "step": 110916
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.4102747440338135,
      "learning_rate": 0.00031751604623262374,
      "loss": 3.0799,
      "step": 110917
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.9040586948394775,
      "learning_rate": 0.0003175119626471821,
      "loss": 3.0842,
      "step": 110918
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.275627613067627,
      "learning_rate": 0.00031750787905848455,
      "loss": 2.8794,
      "step": 110919
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.994995355606079,
      "learning_rate": 0.00031750379546653196,
      "loss": 2.9842,
      "step": 110920
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.873724102973938,
      "learning_rate": 0.00031749971187132513,
      "loss": 3.2259,
      "step": 110921
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.328965902328491,
      "learning_rate": 0.0003174956282728647,
      "loss": 2.9852,
      "step": 110922
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.8896641731262207,
      "learning_rate": 0.0003174915446711516,
      "loss": 3.0693,
      "step": 110923
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.167818069458008,
      "learning_rate": 0.00031748746106618646,
      "loss": 2.9792,
      "step": 110924
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.250762701034546,
      "learning_rate": 0.00031748337745797,
      "loss": 2.9905,
      "step": 110925
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.920575737953186,
      "learning_rate": 0.0003174792938465031,
      "loss": 2.8464,
      "step": 110926
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.573791980743408,
      "learning_rate": 0.0003174752102317865,
      "loss": 2.8476,
      "step": 110927
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8813360929489136,
      "learning_rate": 0.0003174711266138209,
      "loss": 2.9126,
      "step": 110928
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6357710361480713,
      "learning_rate": 0.000317467042992607,
      "loss": 2.7977,
      "step": 110929
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6900080442428589,
      "learning_rate": 0.00031746295936814576,
      "loss": 3.2804,
      "step": 110930
    },
    {
      "epoch": 1.44,
      "grad_norm": 3.8749217987060547,
      "learning_rate": 0.00031745887574043786,
      "loss": 2.9381,
      "step": 110931
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8610578775405884,
      "learning_rate": 0.00031745479210948396,
      "loss": 3.0053,
      "step": 110932
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.981162428855896,
      "learning_rate": 0.00031745070847528483,
      "loss": 3.1315,
      "step": 110933
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.45995831489563,
      "learning_rate": 0.00031744662483784134,
      "loss": 3.0288,
      "step": 110934
    },
    {
      "epoch": 1.44,
      "grad_norm": 4.251031875610352,
      "learning_rate": 0.00031744254119715423,
      "loss": 2.9305,
      "step": 110935
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6076971292495728,
      "learning_rate": 0.00031743845755322417,
      "loss": 3.3828,
      "step": 110936
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6788338422775269,
      "learning_rate": 0.000317434373906052,
      "loss": 2.7092,
      "step": 110937
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.487673044204712,
      "learning_rate": 0.00031743029025563843,
      "loss": 2.9118,
      "step": 110938
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6920372247695923,
      "learning_rate": 0.0003174262066019842,
      "loss": 3.1132,
      "step": 110939
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.5181078910827637,
      "learning_rate": 0.00031742212294509024,
      "loss": 3.0403,
      "step": 110940
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1446337699890137,
      "learning_rate": 0.00031741803928495704,
      "loss": 2.9677,
      "step": 110941
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.2597594261169434,
      "learning_rate": 0.0003174139556215856,
      "loss": 3.02,
      "step": 110942
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1932709217071533,
      "learning_rate": 0.00031740987195497653,
      "loss": 2.7857,
      "step": 110943
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6877050399780273,
      "learning_rate": 0.0003174057882851307,
      "loss": 3.116,
      "step": 110944
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.052396774291992,
      "learning_rate": 0.00031740170461204865,
      "loss": 2.8206,
      "step": 110945
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.151738166809082,
      "learning_rate": 0.0003173976209357314,
      "loss": 2.9415,
      "step": 110946
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8413201570510864,
      "learning_rate": 0.0003173935372561796,
      "loss": 2.8801,
      "step": 110947
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8511966466903687,
      "learning_rate": 0.000317389453573394,
      "loss": 3.1872,
      "step": 110948
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.571244478225708,
      "learning_rate": 0.00031738536988737534,
      "loss": 2.8908,
      "step": 110949
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.534942388534546,
      "learning_rate": 0.00031738128619812454,
      "loss": 3.1046,
      "step": 110950
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7276970148086548,
      "learning_rate": 0.0003173772025056421,
      "loss": 2.8314,
      "step": 110951
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8111542463302612,
      "learning_rate": 0.0003173731188099289,
      "loss": 3.2872,
      "step": 110952
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9360170364379883,
      "learning_rate": 0.00031736903511098575,
      "loss": 3.1311,
      "step": 110953
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.5838412046432495,
      "learning_rate": 0.00031736495140881343,
      "loss": 3.0538,
      "step": 110954
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7731858491897583,
      "learning_rate": 0.0003173608677034126,
      "loss": 3.0193,
      "step": 110955
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.678119421005249,
      "learning_rate": 0.000317356783994784,
      "loss": 2.8134,
      "step": 110956
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9487642049789429,
      "learning_rate": 0.00031735270028292845,
      "loss": 3.0387,
      "step": 110957
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6798678636550903,
      "learning_rate": 0.0003173486165678468,
      "loss": 3.0952,
      "step": 110958
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1413750648498535,
      "learning_rate": 0.00031734453284953967,
      "loss": 3.0967,
      "step": 110959
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.1415746212005615,
      "learning_rate": 0.0003173404491280078,
      "loss": 2.8503,
      "step": 110960
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.986326813697815,
      "learning_rate": 0.0003173363654032521,
      "loss": 2.9438,
      "step": 110961
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.6781394481658936,
      "learning_rate": 0.0003173322816752732,
      "loss": 3.024,
      "step": 110962
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.0084989070892334,
      "learning_rate": 0.0003173281979440719,
      "loss": 2.9831,
      "step": 110963
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.583233118057251,
      "learning_rate": 0.000317324114209649,
      "loss": 2.9489,
      "step": 110964
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.8013874292373657,
      "learning_rate": 0.0003173200304720052,
      "loss": 2.9044,
      "step": 110965
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.201536178588867,
      "learning_rate": 0.00031731594673114127,
      "loss": 2.9811,
      "step": 110966
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9011561870574951,
      "learning_rate": 0.0003173118629870579,
      "loss": 3.0313,
      "step": 110967
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3387389183044434,
      "learning_rate": 0.0003173077792397561,
      "loss": 3.1507,
      "step": 110968
    },
    {
      "epoch": 1.44,
      "grad_norm": 2.3234095573425293,
      "learning_rate": 0.0003173036954892363,
      "loss": 2.9683,
      "step": 110969
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.634636402130127,
      "learning_rate": 0.0003172996117354995,
      "loss": 2.8978,
      "step": 110970
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9786748886108398,
      "learning_rate": 0.0003172955279785464,
      "loss": 3.0629,
      "step": 110971
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9983813762664795,
      "learning_rate": 0.0003172914442183776,
      "loss": 2.949,
      "step": 110972
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.9700062274932861,
      "learning_rate": 0.00031728736045499417,
      "loss": 3.1096,
      "step": 110973
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.998793363571167,
      "learning_rate": 0.0003172832766883966,
      "loss": 2.9948,
      "step": 110974
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.5964235067367554,
      "learning_rate": 0.0003172791929185857,
      "loss": 3.0067,
      "step": 110975
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.7234681844711304,
      "learning_rate": 0.00031727510914556243,
      "loss": 2.7929,
      "step": 110976
    },
    {
      "epoch": 1.44,
      "grad_norm": 1.5743365287780762,
      "learning_rate": 0.00031727102536932724,
      "loss": 2.8853,
      "step": 110977
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7150745391845703,
      "learning_rate": 0.00031726694158988106,
      "loss": 2.7288,
      "step": 110978
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0921194553375244,
      "learning_rate": 0.0003172628578072247,
      "loss": 2.9243,
      "step": 110979
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9960274696350098,
      "learning_rate": 0.0003172587740213588,
      "loss": 2.9228,
      "step": 110980
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8256241083145142,
      "learning_rate": 0.00031725469023228414,
      "loss": 3.3581,
      "step": 110981
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.0549914836883545,
      "learning_rate": 0.00031725060644000156,
      "loss": 2.9302,
      "step": 110982
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6590741872787476,
      "learning_rate": 0.00031724652264451174,
      "loss": 3.1926,
      "step": 110983
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9523166418075562,
      "learning_rate": 0.00031724243884581545,
      "loss": 3.1191,
      "step": 110984
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8997244834899902,
      "learning_rate": 0.00031723835504391355,
      "loss": 2.5908,
      "step": 110985
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1376047134399414,
      "learning_rate": 0.0003172342712388066,
      "loss": 3.0836,
      "step": 110986
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0966901779174805,
      "learning_rate": 0.00031723018743049555,
      "loss": 3.0492,
      "step": 110987
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8068469762802124,
      "learning_rate": 0.00031722610361898106,
      "loss": 3.005,
      "step": 110988
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.644303560256958,
      "learning_rate": 0.0003172220198042639,
      "loss": 2.9336,
      "step": 110989
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.0472850799560547,
      "learning_rate": 0.0003172179359863448,
      "loss": 3.112,
      "step": 110990
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6792525053024292,
      "learning_rate": 0.00031721385216522467,
      "loss": 3.0598,
      "step": 110991
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8246058225631714,
      "learning_rate": 0.0003172097683409041,
      "loss": 3.0707,
      "step": 110992
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2179934978485107,
      "learning_rate": 0.0003172056845133839,
      "loss": 2.7487,
      "step": 110993
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9377638101577759,
      "learning_rate": 0.0003172016006826649,
      "loss": 3.0952,
      "step": 110994
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7410404682159424,
      "learning_rate": 0.00031719751684874774,
      "loss": 2.8207,
      "step": 110995
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8886157274246216,
      "learning_rate": 0.0003171934330116332,
      "loss": 2.7541,
      "step": 110996
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.208711624145508,
      "learning_rate": 0.0003171893491713221,
      "loss": 3.0767,
      "step": 110997
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1154797077178955,
      "learning_rate": 0.0003171852653278152,
      "loss": 3.02,
      "step": 110998
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1261274814605713,
      "learning_rate": 0.0003171811814811132,
      "loss": 3.1621,
      "step": 110999
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.1805059909820557,
      "learning_rate": 0.000317177097631217,
      "loss": 2.9713,
      "step": 111000
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.7732977867126465,
      "learning_rate": 0.00031717301377812715,
      "loss": 2.8626,
      "step": 111001
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0678160190582275,
      "learning_rate": 0.00031716892992184455,
      "loss": 3.0601,
      "step": 111002
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0145177841186523,
      "learning_rate": 0.0003171648460623699,
      "loss": 3.1664,
      "step": 111003
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.582451343536377,
      "learning_rate": 0.00031716076219970405,
      "loss": 2.8207,
      "step": 111004
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.770862340927124,
      "learning_rate": 0.00031715667833384756,
      "loss": 2.9106,
      "step": 111005
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.6472394466400146,
      "learning_rate": 0.0003171525944648014,
      "loss": 2.6102,
      "step": 111006
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9425990581512451,
      "learning_rate": 0.0003171485105925663,
      "loss": 3.1784,
      "step": 111007
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6543288230895996,
      "learning_rate": 0.00031714442671714285,
      "loss": 3.2352,
      "step": 111008
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.493563413619995,
      "learning_rate": 0.00031714034283853197,
      "loss": 3.1568,
      "step": 111009
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8169715404510498,
      "learning_rate": 0.00031713625895673445,
      "loss": 3.0501,
      "step": 111010
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.020747661590576,
      "learning_rate": 0.0003171321750717509,
      "loss": 2.9247,
      "step": 111011
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.9976084232330322,
      "learning_rate": 0.00031712809118358213,
      "loss": 3.1409,
      "step": 111012
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4250662326812744,
      "learning_rate": 0.00031712400729222906,
      "loss": 3.3101,
      "step": 111013
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8391163349151611,
      "learning_rate": 0.00031711992339769217,
      "loss": 2.8594,
      "step": 111014
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.6403720378875732,
      "learning_rate": 0.0003171158394999724,
      "loss": 3.0055,
      "step": 111015
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.9113502502441406,
      "learning_rate": 0.00031711175559907053,
      "loss": 3.0356,
      "step": 111016
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.8209657669067383,
      "learning_rate": 0.00031710767169498724,
      "loss": 2.8839,
      "step": 111017
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.171956777572632,
      "learning_rate": 0.0003171035877877233,
      "loss": 2.5925,
      "step": 111018
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.10608172416687,
      "learning_rate": 0.00031709950387727947,
      "loss": 2.7842,
      "step": 111019
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.519228219985962,
      "learning_rate": 0.0003170954199636565,
      "loss": 3.1773,
      "step": 111020
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.1917974948883057,
      "learning_rate": 0.0003170913360468552,
      "loss": 2.9238,
      "step": 111021
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9360933303833008,
      "learning_rate": 0.0003170872521268763,
      "loss": 2.839,
      "step": 111022
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.856290817260742,
      "learning_rate": 0.00031708316820372055,
      "loss": 2.9178,
      "step": 111023
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.6303203105926514,
      "learning_rate": 0.0003170790842773887,
      "loss": 2.9921,
      "step": 111024
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.478666067123413,
      "learning_rate": 0.0003170750003478816,
      "loss": 2.92,
      "step": 111025
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.089531660079956,
      "learning_rate": 0.00031707091641519983,
      "loss": 3.29,
      "step": 111026
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.227691173553467,
      "learning_rate": 0.00031706683247934427,
      "loss": 2.9499,
      "step": 111027
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.8174943923950195,
      "learning_rate": 0.0003170627485403157,
      "loss": 3.0344,
      "step": 111028
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.936906337738037,
      "learning_rate": 0.0003170586645981148,
      "loss": 2.9657,
      "step": 111029
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.134462594985962,
      "learning_rate": 0.0003170545806527424,
      "loss": 2.9136,
      "step": 111030
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.6608684062957764,
      "learning_rate": 0.00031705049670419935,
      "loss": 3.1213,
      "step": 111031
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6743782758712769,
      "learning_rate": 0.00031704641275248613,
      "loss": 2.8815,
      "step": 111032
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0895795822143555,
      "learning_rate": 0.00031704232879760364,
      "loss": 3.1971,
      "step": 111033
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1998958587646484,
      "learning_rate": 0.0003170382448395527,
      "loss": 2.7588,
      "step": 111034
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.3469488620758057,
      "learning_rate": 0.0003170341608783342,
      "loss": 3.1446,
      "step": 111035
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.530273675918579,
      "learning_rate": 0.0003170300769139485,
      "loss": 3.0837,
      "step": 111036
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.6490638256073,
      "learning_rate": 0.0003170259929463966,
      "loss": 2.9349,
      "step": 111037
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.166774272918701,
      "learning_rate": 0.00031702190897567935,
      "loss": 3.24,
      "step": 111038
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7904285192489624,
      "learning_rate": 0.0003170178250017974,
      "loss": 3.1261,
      "step": 111039
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0760087966918945,
      "learning_rate": 0.0003170137410247515,
      "loss": 2.9514,
      "step": 111040
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4069912433624268,
      "learning_rate": 0.0003170096570445424,
      "loss": 3.0837,
      "step": 111041
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.197404384613037,
      "learning_rate": 0.0003170055730611709,
      "loss": 2.827,
      "step": 111042
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.008683919906616,
      "learning_rate": 0.00031700148907463775,
      "loss": 2.8393,
      "step": 111043
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.095780372619629,
      "learning_rate": 0.0003169974050849437,
      "loss": 3.0172,
      "step": 111044
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.1875901222229004,
      "learning_rate": 0.00031699332109208945,
      "loss": 2.9598,
      "step": 111045
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.39335298538208,
      "learning_rate": 0.00031698923709607595,
      "loss": 3.3405,
      "step": 111046
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8342348337173462,
      "learning_rate": 0.0003169851530969037,
      "loss": 2.8773,
      "step": 111047
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.696887493133545,
      "learning_rate": 0.00031698106909457363,
      "loss": 3.2797,
      "step": 111048
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.932462215423584,
      "learning_rate": 0.0003169769850890865,
      "loss": 3.0182,
      "step": 111049
    },
    {
      "epoch": 1.45,
      "grad_norm": 4.16351842880249,
      "learning_rate": 0.00031697290108044306,
      "loss": 2.9876,
      "step": 111050
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2240049839019775,
      "learning_rate": 0.0003169688170686439,
      "loss": 2.7592,
      "step": 111051
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6109033823013306,
      "learning_rate": 0.00031696473305369005,
      "loss": 3.1137,
      "step": 111052
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7039713859558105,
      "learning_rate": 0.00031696064903558203,
      "loss": 2.9137,
      "step": 111053
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.152920961380005,
      "learning_rate": 0.0003169565650143207,
      "loss": 2.8185,
      "step": 111054
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2592382431030273,
      "learning_rate": 0.00031695248098990704,
      "loss": 3.1101,
      "step": 111055
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.889849066734314,
      "learning_rate": 0.00031694839696234136,
      "loss": 2.8848,
      "step": 111056
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.048525094985962,
      "learning_rate": 0.0003169443129316247,
      "loss": 2.7375,
      "step": 111057
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9140172004699707,
      "learning_rate": 0.00031694022889775783,
      "loss": 3.0818,
      "step": 111058
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.70215904712677,
      "learning_rate": 0.00031693614486074143,
      "loss": 2.9024,
      "step": 111059
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.0333034992218018,
      "learning_rate": 0.00031693206082057624,
      "loss": 3.0087,
      "step": 111060
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7674051523208618,
      "learning_rate": 0.0003169279767772632,
      "loss": 2.8499,
      "step": 111061
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7288144826889038,
      "learning_rate": 0.00031692389273080274,
      "loss": 2.9539,
      "step": 111062
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.3592052459716797,
      "learning_rate": 0.00031691980868119593,
      "loss": 3.2374,
      "step": 111063
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2911977767944336,
      "learning_rate": 0.0003169157246284434,
      "loss": 2.9023,
      "step": 111064
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.182250499725342,
      "learning_rate": 0.00031691164057254587,
      "loss": 3.1581,
      "step": 111065
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2513179779052734,
      "learning_rate": 0.0003169075565135041,
      "loss": 2.9473,
      "step": 111066
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1174259185791016,
      "learning_rate": 0.00031690347245131895,
      "loss": 3.2408,
      "step": 111067
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.267197847366333,
      "learning_rate": 0.0003168993883859912,
      "loss": 2.9019,
      "step": 111068
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8998721837997437,
      "learning_rate": 0.00031689530431752147,
      "loss": 2.8114,
      "step": 111069
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.391718864440918,
      "learning_rate": 0.0003168912202459105,
      "loss": 2.9853,
      "step": 111070
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9705160856246948,
      "learning_rate": 0.0003168871361711593,
      "loss": 2.9465,
      "step": 111071
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8577243089675903,
      "learning_rate": 0.00031688305209326835,
      "loss": 2.9175,
      "step": 111072
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.414982318878174,
      "learning_rate": 0.00031687896801223855,
      "loss": 2.9388,
      "step": 111073
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0974340438842773,
      "learning_rate": 0.0003168748839280707,
      "loss": 3.0157,
      "step": 111074
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.100933790206909,
      "learning_rate": 0.0003168707998407653,
      "loss": 3.0591,
      "step": 111075
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6799222230911255,
      "learning_rate": 0.0003168667157503235,
      "loss": 3.1092,
      "step": 111076
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0571134090423584,
      "learning_rate": 0.0003168626316567458,
      "loss": 3.1449,
      "step": 111077
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.242349863052368,
      "learning_rate": 0.00031685854756003295,
      "loss": 2.9204,
      "step": 111078
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.3627636432647705,
      "learning_rate": 0.00031685446346018585,
      "loss": 3.0303,
      "step": 111079
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.403496265411377,
      "learning_rate": 0.0003168503793572052,
      "loss": 3.2388,
      "step": 111080
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.055994987487793,
      "learning_rate": 0.00031684629525109166,
      "loss": 2.9363,
      "step": 111081
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.9562764167785645,
      "learning_rate": 0.00031684221114184615,
      "loss": 3.0289,
      "step": 111082
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8821852207183838,
      "learning_rate": 0.00031683812702946936,
      "loss": 2.9913,
      "step": 111083
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.096190929412842,
      "learning_rate": 0.000316834042913962,
      "loss": 2.9467,
      "step": 111084
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.155264139175415,
      "learning_rate": 0.00031682995879532484,
      "loss": 3.0407,
      "step": 111085
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.8011200428009033,
      "learning_rate": 0.0003168258746735588,
      "loss": 3.0236,
      "step": 111086
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8184564113616943,
      "learning_rate": 0.0003168217905486644,
      "loss": 3.0168,
      "step": 111087
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.3880038261413574,
      "learning_rate": 0.0003168177064206425,
      "loss": 2.8572,
      "step": 111088
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.8328380584716797,
      "learning_rate": 0.000316813622289494,
      "loss": 2.847,
      "step": 111089
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.042511463165283,
      "learning_rate": 0.0003168095381552194,
      "loss": 2.8678,
      "step": 111090
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.181856393814087,
      "learning_rate": 0.0003168054540178196,
      "loss": 2.9294,
      "step": 111091
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.660194993019104,
      "learning_rate": 0.00031680136987729553,
      "loss": 2.8096,
      "step": 111092
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.70939564704895,
      "learning_rate": 0.0003167972857336476,
      "loss": 3.0594,
      "step": 111093
    },
    {
      "epoch": 1.45,
      "grad_norm": 4.642984390258789,
      "learning_rate": 0.00031679320158687675,
      "loss": 2.871,
      "step": 111094
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.620988130569458,
      "learning_rate": 0.00031678911743698374,
      "loss": 2.9239,
      "step": 111095
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.594642400741577,
      "learning_rate": 0.00031678503328396926,
      "loss": 3.0222,
      "step": 111096
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7865558862686157,
      "learning_rate": 0.0003167809491278342,
      "loss": 3.1495,
      "step": 111097
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0252649784088135,
      "learning_rate": 0.00031677686496857924,
      "loss": 3.1826,
      "step": 111098
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9484264850616455,
      "learning_rate": 0.00031677278080620517,
      "loss": 3.048,
      "step": 111099
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.808589220046997,
      "learning_rate": 0.0003167686966407126,
      "loss": 2.8623,
      "step": 111100
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.880431056022644,
      "learning_rate": 0.0003167646124721026,
      "loss": 3.0508,
      "step": 111101
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8823245763778687,
      "learning_rate": 0.0003167605283003756,
      "loss": 2.8818,
      "step": 111102
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8389780521392822,
      "learning_rate": 0.00031675644412553253,
      "loss": 3.0242,
      "step": 111103
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.3285274505615234,
      "learning_rate": 0.00031675235994757414,
      "loss": 2.8283,
      "step": 111104
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9587575197219849,
      "learning_rate": 0.00031674827576650116,
      "loss": 3.0406,
      "step": 111105
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0112013816833496,
      "learning_rate": 0.00031674419158231435,
      "loss": 2.8038,
      "step": 111106
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0443084239959717,
      "learning_rate": 0.0003167401073950145,
      "loss": 3.1677,
      "step": 111107
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9674879312515259,
      "learning_rate": 0.0003167360232046023,
      "loss": 2.8156,
      "step": 111108
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8416023254394531,
      "learning_rate": 0.00031673193901107864,
      "loss": 2.9765,
      "step": 111109
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9820863008499146,
      "learning_rate": 0.0003167278548144442,
      "loss": 2.9578,
      "step": 111110
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.884800672531128,
      "learning_rate": 0.0003167237706146997,
      "loss": 2.8566,
      "step": 111111
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6793996095657349,
      "learning_rate": 0.0003167196864118458,
      "loss": 2.9029,
      "step": 111112
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0317320823669434,
      "learning_rate": 0.0003167156022058836,
      "loss": 2.7882,
      "step": 111113
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9211345911026,
      "learning_rate": 0.0003167115179968136,
      "loss": 2.854,
      "step": 111114
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.82767653465271,
      "learning_rate": 0.0003167074337846365,
      "loss": 3.1878,
      "step": 111115
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.862352132797241,
      "learning_rate": 0.00031670334956935333,
      "loss": 2.9945,
      "step": 111116
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9604014158248901,
      "learning_rate": 0.0003166992653509646,
      "loss": 3.1812,
      "step": 111117
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.852161169052124,
      "learning_rate": 0.0003166951811294712,
      "loss": 3.0494,
      "step": 111118
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.461958408355713,
      "learning_rate": 0.0003166910969048738,
      "loss": 3.0431,
      "step": 111119
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8760615587234497,
      "learning_rate": 0.0003166870126771733,
      "loss": 3.1086,
      "step": 111120
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.144258975982666,
      "learning_rate": 0.0003166829284463703,
      "loss": 2.7559,
      "step": 111121
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9895589351654053,
      "learning_rate": 0.0003166788442124656,
      "loss": 3.0528,
      "step": 111122
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.5701771974563599,
      "learning_rate": 0.0003166747599754601,
      "loss": 3.1291,
      "step": 111123
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8169753551483154,
      "learning_rate": 0.0003166706757353543,
      "loss": 2.8741,
      "step": 111124
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.423804521560669,
      "learning_rate": 0.00031666659149214916,
      "loss": 3.1016,
      "step": 111125
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1715452671051025,
      "learning_rate": 0.0003166625072458454,
      "loss": 3.2045,
      "step": 111126
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6984505653381348,
      "learning_rate": 0.0003166584229964438,
      "loss": 2.6419,
      "step": 111127
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8277825117111206,
      "learning_rate": 0.00031665433874394504,
      "loss": 2.9796,
      "step": 111128
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0012805461883545,
      "learning_rate": 0.0003166502544883499,
      "loss": 3.022,
      "step": 111129
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.3888068199157715,
      "learning_rate": 0.0003166461702296592,
      "loss": 2.935,
      "step": 111130
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8609591722488403,
      "learning_rate": 0.0003166420859678737,
      "loss": 2.9466,
      "step": 111131
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8390921354293823,
      "learning_rate": 0.0003166380017029941,
      "loss": 3.0454,
      "step": 111132
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8977108001708984,
      "learning_rate": 0.0003166339174350211,
      "loss": 3.0006,
      "step": 111133
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.429932117462158,
      "learning_rate": 0.0003166298331639557,
      "loss": 2.9374,
      "step": 111134
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7985795736312866,
      "learning_rate": 0.00031662574888979844,
      "loss": 3.303,
      "step": 111135
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.822871208190918,
      "learning_rate": 0.00031662166461255006,
      "loss": 3.0597,
      "step": 111136
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.747902750968933,
      "learning_rate": 0.0003166175803322114,
      "loss": 2.9902,
      "step": 111137
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8934500217437744,
      "learning_rate": 0.00031661349604878336,
      "loss": 3.1759,
      "step": 111138
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8611040115356445,
      "learning_rate": 0.0003166094117622664,
      "loss": 2.8848,
      "step": 111139
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.8143374919891357,
      "learning_rate": 0.0003166053274726615,
      "loss": 2.7114,
      "step": 111140
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.159825563430786,
      "learning_rate": 0.0003166012431799694,
      "loss": 3.0285,
      "step": 111141
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9287906885147095,
      "learning_rate": 0.00031659715888419075,
      "loss": 3.1829,
      "step": 111142
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4544613361358643,
      "learning_rate": 0.0003165930745853264,
      "loss": 3.005,
      "step": 111143
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1510250568389893,
      "learning_rate": 0.0003165889902833771,
      "loss": 2.758,
      "step": 111144
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.3322460651397705,
      "learning_rate": 0.0003165849059783436,
      "loss": 3.2112,
      "step": 111145
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8495675325393677,
      "learning_rate": 0.0003165808216702266,
      "loss": 2.9453,
      "step": 111146
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9608592987060547,
      "learning_rate": 0.00031657673735902697,
      "loss": 3.0024,
      "step": 111147
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1100399494171143,
      "learning_rate": 0.0003165726530447454,
      "loss": 3.071,
      "step": 111148
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.022780656814575,
      "learning_rate": 0.0003165685687273826,
      "loss": 3.1403,
      "step": 111149
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9875540733337402,
      "learning_rate": 0.00031656448440693945,
      "loss": 2.8512,
      "step": 111150
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.205348491668701,
      "learning_rate": 0.0003165604000834166,
      "loss": 3.1125,
      "step": 111151
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.96498703956604,
      "learning_rate": 0.00031655631575681486,
      "loss": 3.0316,
      "step": 111152
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.308037519454956,
      "learning_rate": 0.0003165522314271351,
      "loss": 3.1131,
      "step": 111153
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.009274959564209,
      "learning_rate": 0.0003165481470943779,
      "loss": 3.0699,
      "step": 111154
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.794819951057434,
      "learning_rate": 0.00031654406275854406,
      "loss": 2.857,
      "step": 111155
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.182788133621216,
      "learning_rate": 0.0003165399784196344,
      "loss": 2.8727,
      "step": 111156
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.980988621711731,
      "learning_rate": 0.00031653589407764965,
      "loss": 2.8837,
      "step": 111157
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4084811210632324,
      "learning_rate": 0.0003165318097325905,
      "loss": 2.8191,
      "step": 111158
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.234954357147217,
      "learning_rate": 0.00031652772538445784,
      "loss": 2.7847,
      "step": 111159
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7886767387390137,
      "learning_rate": 0.00031652364103325234,
      "loss": 2.8884,
      "step": 111160
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0097944736480713,
      "learning_rate": 0.0003165195566789748,
      "loss": 2.8296,
      "step": 111161
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8192063570022583,
      "learning_rate": 0.00031651547232162594,
      "loss": 3.029,
      "step": 111162
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8661086559295654,
      "learning_rate": 0.00031651138796120657,
      "loss": 3.0984,
      "step": 111163
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6258158683776855,
      "learning_rate": 0.0003165073035977174,
      "loss": 2.9036,
      "step": 111164
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8633650541305542,
      "learning_rate": 0.0003165032192311592,
      "loss": 2.651,
      "step": 111165
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8536452054977417,
      "learning_rate": 0.00031649913486153273,
      "loss": 2.8846,
      "step": 111166
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0023698806762695,
      "learning_rate": 0.0003164950504888388,
      "loss": 2.9334,
      "step": 111167
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7263332605361938,
      "learning_rate": 0.00031649096611307816,
      "loss": 2.9903,
      "step": 111168
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.777104139328003,
      "learning_rate": 0.00031648688173425147,
      "loss": 3.0287,
      "step": 111169
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7105553150177002,
      "learning_rate": 0.0003164827973523595,
      "loss": 2.9888,
      "step": 111170
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0084846019744873,
      "learning_rate": 0.00031647871296740324,
      "loss": 2.873,
      "step": 111171
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0751171112060547,
      "learning_rate": 0.0003164746285793831,
      "loss": 3.0411,
      "step": 111172
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.5349056720733643,
      "learning_rate": 0.0003164705441883001,
      "loss": 3.1779,
      "step": 111173
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8029783964157104,
      "learning_rate": 0.00031646645979415495,
      "loss": 2.9364,
      "step": 111174
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.5480499267578125,
      "learning_rate": 0.0003164623753969484,
      "loss": 2.9156,
      "step": 111175
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0824685096740723,
      "learning_rate": 0.00031645829099668105,
      "loss": 3.014,
      "step": 111176
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0592174530029297,
      "learning_rate": 0.0003164542065933539,
      "loss": 3.019,
      "step": 111177
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1117589473724365,
      "learning_rate": 0.0003164501221869676,
      "loss": 2.8767,
      "step": 111178
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9368617534637451,
      "learning_rate": 0.00031644603777752277,
      "loss": 3.0058,
      "step": 111179
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.1982905864715576,
      "learning_rate": 0.0003164419533650205,
      "loss": 3.1938,
      "step": 111180
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4182305335998535,
      "learning_rate": 0.00031643786894946125,
      "loss": 3.0441,
      "step": 111181
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.6358962059020996,
      "learning_rate": 0.00031643378453084586,
      "loss": 3.3147,
      "step": 111182
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.837757110595703,
      "learning_rate": 0.00031642970010917524,
      "loss": 3.1732,
      "step": 111183
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1716806888580322,
      "learning_rate": 0.00031642561568444994,
      "loss": 2.9595,
      "step": 111184
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2473630905151367,
      "learning_rate": 0.0003164215312566708,
      "loss": 2.7921,
      "step": 111185
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.030963897705078,
      "learning_rate": 0.0003164174468258387,
      "loss": 2.8625,
      "step": 111186
    },
    {
      "epoch": 1.45,
      "grad_norm": 5.001802921295166,
      "learning_rate": 0.00031641336239195415,
      "loss": 2.93,
      "step": 111187
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.2572925090789795,
      "learning_rate": 0.0003164092779550181,
      "loss": 2.8734,
      "step": 111188
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.77456533908844,
      "learning_rate": 0.00031640519351503133,
      "loss": 2.9176,
      "step": 111189
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9764996767044067,
      "learning_rate": 0.0003164011090719944,
      "loss": 3.1171,
      "step": 111190
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.219689130783081,
      "learning_rate": 0.00031639702462590825,
      "loss": 3.0616,
      "step": 111191
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.360059976577759,
      "learning_rate": 0.0003163929401767736,
      "loss": 3.1472,
      "step": 111192
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.5432181358337402,
      "learning_rate": 0.0003163888557245912,
      "loss": 3.0122,
      "step": 111193
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.498781681060791,
      "learning_rate": 0.0003163847712693617,
      "loss": 2.9709,
      "step": 111194
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.22271990776062,
      "learning_rate": 0.0003163806868110861,
      "loss": 2.9159,
      "step": 111195
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.5970253944396973,
      "learning_rate": 0.00031637660234976495,
      "loss": 3.107,
      "step": 111196
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.717604160308838,
      "learning_rate": 0.0003163725178853991,
      "loss": 2.947,
      "step": 111197
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.295316219329834,
      "learning_rate": 0.00031636843341798925,
      "loss": 2.8798,
      "step": 111198
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.0080604553222656,
      "learning_rate": 0.00031636434894753625,
      "loss": 3.1373,
      "step": 111199
    },
    {
      "epoch": 1.45,
      "grad_norm": 4.857082843780518,
      "learning_rate": 0.00031636026447404077,
      "loss": 2.7035,
      "step": 111200
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1870052814483643,
      "learning_rate": 0.00031635617999750356,
      "loss": 2.8957,
      "step": 111201
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.8809239864349365,
      "learning_rate": 0.00031635209551792554,
      "loss": 2.7052,
      "step": 111202
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.720430850982666,
      "learning_rate": 0.00031634801103530737,
      "loss": 3.3765,
      "step": 111203
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.514024496078491,
      "learning_rate": 0.0003163439265496497,
      "loss": 3.1282,
      "step": 111204
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0820462703704834,
      "learning_rate": 0.00031633984206095347,
      "loss": 3.0984,
      "step": 111205
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7959010601043701,
      "learning_rate": 0.00031633575756921927,
      "loss": 3.0771,
      "step": 111206
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.43835711479187,
      "learning_rate": 0.00031633167307444795,
      "loss": 2.951,
      "step": 111207
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.5757808685302734,
      "learning_rate": 0.0003163275885766403,
      "loss": 3.1874,
      "step": 111208
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6560711860656738,
      "learning_rate": 0.000316323504075797,
      "loss": 2.757,
      "step": 111209
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1464855670928955,
      "learning_rate": 0.0003163194195719189,
      "loss": 2.6911,
      "step": 111210
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7440862655639648,
      "learning_rate": 0.0003163153350650067,
      "loss": 3.0382,
      "step": 111211
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8582510948181152,
      "learning_rate": 0.00031631125055506124,
      "loss": 2.8209,
      "step": 111212
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7759349346160889,
      "learning_rate": 0.0003163071660420831,
      "loss": 2.889,
      "step": 111213
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8128232955932617,
      "learning_rate": 0.00031630308152607315,
      "loss": 3.1571,
      "step": 111214
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.015697717666626,
      "learning_rate": 0.0003162989970070323,
      "loss": 2.9803,
      "step": 111215
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.776655435562134,
      "learning_rate": 0.00031629491248496097,
      "loss": 2.9672,
      "step": 111216
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9308314323425293,
      "learning_rate": 0.0003162908279598602,
      "loss": 2.9334,
      "step": 111217
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.78292715549469,
      "learning_rate": 0.00031628674343173066,
      "loss": 3.0221,
      "step": 111218
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.727748155593872,
      "learning_rate": 0.0003162826589005731,
      "loss": 2.9672,
      "step": 111219
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2155799865722656,
      "learning_rate": 0.0003162785743663883,
      "loss": 3.1447,
      "step": 111220
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.97059166431427,
      "learning_rate": 0.00031627448982917696,
      "loss": 3.1725,
      "step": 111221
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6278796195983887,
      "learning_rate": 0.00031627040528893995,
      "loss": 3.1951,
      "step": 111222
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8840057849884033,
      "learning_rate": 0.0003162663207456779,
      "loss": 2.9123,
      "step": 111223
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8474152088165283,
      "learning_rate": 0.00031626223619939163,
      "loss": 3.071,
      "step": 111224
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7036077976226807,
      "learning_rate": 0.0003162581516500819,
      "loss": 3.2529,
      "step": 111225
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7915695905685425,
      "learning_rate": 0.0003162540670977496,
      "loss": 2.661,
      "step": 111226
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7312402725219727,
      "learning_rate": 0.0003162499825423952,
      "loss": 3.0337,
      "step": 111227
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.211921215057373,
      "learning_rate": 0.0003162458979840197,
      "loss": 2.8294,
      "step": 111228
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0241332054138184,
      "learning_rate": 0.0003162418134226238,
      "loss": 2.9257,
      "step": 111229
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.391282796859741,
      "learning_rate": 0.0003162377288582082,
      "loss": 3.0034,
      "step": 111230
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1602962017059326,
      "learning_rate": 0.0003162336442907737,
      "loss": 2.8736,
      "step": 111231
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.7756927013397217,
      "learning_rate": 0.00031622955972032105,
      "loss": 2.8266,
      "step": 111232
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6314276456832886,
      "learning_rate": 0.00031622547514685107,
      "loss": 2.96,
      "step": 111233
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6848773956298828,
      "learning_rate": 0.0003162213905703644,
      "loss": 2.9253,
      "step": 111234
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.463869094848633,
      "learning_rate": 0.000316217305990862,
      "loss": 2.7148,
      "step": 111235
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.522630453109741,
      "learning_rate": 0.0003162132214083443,
      "loss": 3.0968,
      "step": 111236
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7911540269851685,
      "learning_rate": 0.0003162091368228124,
      "loss": 2.9325,
      "step": 111237
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.639711856842041,
      "learning_rate": 0.00031620505223426684,
      "loss": 2.87,
      "step": 111238
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.4350361824035645,
      "learning_rate": 0.00031620096764270846,
      "loss": 2.9834,
      "step": 111239
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.206385374069214,
      "learning_rate": 0.0003161968830481381,
      "loss": 2.9809,
      "step": 111240
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0205307006835938,
      "learning_rate": 0.0003161927984505564,
      "loss": 3.0868,
      "step": 111241
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.5829293727874756,
      "learning_rate": 0.00031618871384996413,
      "loss": 2.8006,
      "step": 111242
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1554653644561768,
      "learning_rate": 0.00031618462924636205,
      "loss": 2.8081,
      "step": 111243
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9738222360610962,
      "learning_rate": 0.00031618054463975094,
      "loss": 2.9918,
      "step": 111244
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6925349235534668,
      "learning_rate": 0.0003161764600301316,
      "loss": 2.8666,
      "step": 111245
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8603540658950806,
      "learning_rate": 0.0003161723754175047,
      "loss": 3.02,
      "step": 111246
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.5494332313537598,
      "learning_rate": 0.00031616829080187116,
      "loss": 2.9719,
      "step": 111247
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8932325839996338,
      "learning_rate": 0.00031616420618323153,
      "loss": 2.9634,
      "step": 111248
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7153891324996948,
      "learning_rate": 0.00031616012156158665,
      "loss": 2.8639,
      "step": 111249
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6909023523330688,
      "learning_rate": 0.0003161560369369374,
      "loss": 2.7825,
      "step": 111250
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8961024284362793,
      "learning_rate": 0.00031615195230928436,
      "loss": 3.0049,
      "step": 111251
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.851933479309082,
      "learning_rate": 0.0003161478676786284,
      "loss": 3.2088,
      "step": 111252
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9101272821426392,
      "learning_rate": 0.0003161437830449702,
      "loss": 3.087,
      "step": 111253
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1605522632598877,
      "learning_rate": 0.0003161396984083106,
      "loss": 2.9065,
      "step": 111254
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.101196050643921,
      "learning_rate": 0.0003161356137686503,
      "loss": 3.1258,
      "step": 111255
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.074557304382324,
      "learning_rate": 0.00031613152912599013,
      "loss": 3.145,
      "step": 111256
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7905628681182861,
      "learning_rate": 0.0003161274444803308,
      "loss": 2.9926,
      "step": 111257
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.637984037399292,
      "learning_rate": 0.00031612335983167305,
      "loss": 2.913,
      "step": 111258
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.304547071456909,
      "learning_rate": 0.0003161192751800177,
      "loss": 2.8899,
      "step": 111259
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.220311164855957,
      "learning_rate": 0.00031611519052536545,
      "loss": 3.0463,
      "step": 111260
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.636508822441101,
      "learning_rate": 0.00031611110586771704,
      "loss": 2.948,
      "step": 111261
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7682603597640991,
      "learning_rate": 0.00031610702120707336,
      "loss": 2.9436,
      "step": 111262
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.38372540473938,
      "learning_rate": 0.000316102936543435,
      "loss": 3.0278,
      "step": 111263
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.5145444869995117,
      "learning_rate": 0.00031609885187680283,
      "loss": 2.9288,
      "step": 111264
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8584500551223755,
      "learning_rate": 0.00031609476720717756,
      "loss": 2.9622,
      "step": 111265
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8081285953521729,
      "learning_rate": 0.00031609068253456,
      "loss": 2.845,
      "step": 111266
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7686543464660645,
      "learning_rate": 0.00031608659785895086,
      "loss": 3.0355,
      "step": 111267
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6922214031219482,
      "learning_rate": 0.0003160825131803509,
      "loss": 2.9329,
      "step": 111268
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7631890773773193,
      "learning_rate": 0.0003160784284987609,
      "loss": 2.8273,
      "step": 111269
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8053642511367798,
      "learning_rate": 0.00031607434381418167,
      "loss": 2.7668,
      "step": 111270
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7782902717590332,
      "learning_rate": 0.00031607025912661383,
      "loss": 3.0979,
      "step": 111271
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4374656677246094,
      "learning_rate": 0.0003160661744360583,
      "loss": 2.888,
      "step": 111272
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8636267185211182,
      "learning_rate": 0.00031606208974251576,
      "loss": 2.8309,
      "step": 111273
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9290951490402222,
      "learning_rate": 0.00031605800504598693,
      "loss": 2.8278,
      "step": 111274
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7119983434677124,
      "learning_rate": 0.00031605392034647265,
      "loss": 2.7314,
      "step": 111275
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.694887161254883,
      "learning_rate": 0.0003160498356439736,
      "loss": 3.4484,
      "step": 111276
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0159153938293457,
      "learning_rate": 0.00031604575093849063,
      "loss": 2.9312,
      "step": 111277
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2129929065704346,
      "learning_rate": 0.00031604166623002447,
      "loss": 3.0144,
      "step": 111278
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.800963282585144,
      "learning_rate": 0.00031603758151857583,
      "loss": 3.0247,
      "step": 111279
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4715113639831543,
      "learning_rate": 0.00031603349680414547,
      "loss": 3.0557,
      "step": 111280
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.8482069969177246,
      "learning_rate": 0.00031602941208673426,
      "loss": 3.1579,
      "step": 111281
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8944357633590698,
      "learning_rate": 0.0003160253273663428,
      "loss": 2.9263,
      "step": 111282
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6112498044967651,
      "learning_rate": 0.00031602124264297195,
      "loss": 3.2108,
      "step": 111283
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.370898723602295,
      "learning_rate": 0.0003160171579166225,
      "loss": 2.9519,
      "step": 111284
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.49739146232605,
      "learning_rate": 0.0003160130731872951,
      "loss": 2.777,
      "step": 111285
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1883435249328613,
      "learning_rate": 0.0003160089884549906,
      "loss": 3.2061,
      "step": 111286
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9508579969406128,
      "learning_rate": 0.0003160049037197097,
      "loss": 3.1844,
      "step": 111287
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9069968461990356,
      "learning_rate": 0.0003160008189814532,
      "loss": 2.9322,
      "step": 111288
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.973085641860962,
      "learning_rate": 0.0003159967342402218,
      "loss": 3.0437,
      "step": 111289
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.885158658027649,
      "learning_rate": 0.0003159926494960164,
      "loss": 2.7473,
      "step": 111290
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.893429160118103,
      "learning_rate": 0.0003159885647488376,
      "loss": 2.918,
      "step": 111291
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0559515953063965,
      "learning_rate": 0.00031598447999868625,
      "loss": 2.7381,
      "step": 111292
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.094026803970337,
      "learning_rate": 0.0003159803952455631,
      "loss": 3.2015,
      "step": 111293
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0508370399475098,
      "learning_rate": 0.0003159763104894688,
      "loss": 2.8611,
      "step": 111294
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2205071449279785,
      "learning_rate": 0.00031597222573040423,
      "loss": 2.9381,
      "step": 111295
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9904530048370361,
      "learning_rate": 0.00031596814096837024,
      "loss": 3.1275,
      "step": 111296
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6431788206100464,
      "learning_rate": 0.00031596405620336733,
      "loss": 3.0691,
      "step": 111297
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8376880884170532,
      "learning_rate": 0.00031595997143539647,
      "loss": 3.3398,
      "step": 111298
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.814357042312622,
      "learning_rate": 0.0003159558866644583,
      "loss": 2.9481,
      "step": 111299
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2805159091949463,
      "learning_rate": 0.0003159518018905537,
      "loss": 2.9141,
      "step": 111300
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2636775970458984,
      "learning_rate": 0.0003159477171136833,
      "loss": 2.9402,
      "step": 111301
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.892967939376831,
      "learning_rate": 0.00031594363233384794,
      "loss": 3.0988,
      "step": 111302
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.901188373565674,
      "learning_rate": 0.00031593954755104845,
      "loss": 3.1245,
      "step": 111303
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1243529319763184,
      "learning_rate": 0.00031593546276528534,
      "loss": 2.6359,
      "step": 111304
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.165457010269165,
      "learning_rate": 0.0003159313779765596,
      "loss": 3.0424,
      "step": 111305
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.0784201622009277,
      "learning_rate": 0.0003159272931848719,
      "loss": 2.929,
      "step": 111306
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1868886947631836,
      "learning_rate": 0.00031592320839022306,
      "loss": 2.8657,
      "step": 111307
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.063537836074829,
      "learning_rate": 0.00031591912359261375,
      "loss": 2.9628,
      "step": 111308
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.033923625946045,
      "learning_rate": 0.00031591503879204477,
      "loss": 3.0888,
      "step": 111309
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.792245626449585,
      "learning_rate": 0.00031591095398851685,
      "loss": 2.9722,
      "step": 111310
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.6916205883026123,
      "learning_rate": 0.0003159068691820309,
      "loss": 2.7909,
      "step": 111311
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.0751988887786865,
      "learning_rate": 0.00031590278437258746,
      "loss": 3.0243,
      "step": 111312
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8407981395721436,
      "learning_rate": 0.00031589869956018745,
      "loss": 3.1447,
      "step": 111313
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.8126702308654785,
      "learning_rate": 0.00031589461474483153,
      "loss": 3.1128,
      "step": 111314
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.7230496406555176,
      "learning_rate": 0.00031589052992652057,
      "loss": 2.9617,
      "step": 111315
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.881683826446533,
      "learning_rate": 0.0003158864451052552,
      "loss": 3.1533,
      "step": 111316
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9643853902816772,
      "learning_rate": 0.00031588236028103626,
      "loss": 3.0473,
      "step": 111317
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.800673246383667,
      "learning_rate": 0.0003158782754538645,
      "loss": 3.2698,
      "step": 111318
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.7372729778289795,
      "learning_rate": 0.0003158741906237406,
      "loss": 3.1181,
      "step": 111319
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.772440195083618,
      "learning_rate": 0.00031587010579066545,
      "loss": 2.9168,
      "step": 111320
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.966813325881958,
      "learning_rate": 0.00031586602095463976,
      "loss": 2.8438,
      "step": 111321
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8472695350646973,
      "learning_rate": 0.0003158619361156642,
      "loss": 2.9097,
      "step": 111322
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9613852500915527,
      "learning_rate": 0.0003158578512737397,
      "loss": 3.1152,
      "step": 111323
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.6895205974578857,
      "learning_rate": 0.0003158537664288669,
      "loss": 3.1586,
      "step": 111324
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.936394214630127,
      "learning_rate": 0.00031584968158104653,
      "loss": 3.2582,
      "step": 111325
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.433868408203125,
      "learning_rate": 0.00031584559673027954,
      "loss": 2.9959,
      "step": 111326
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8935915231704712,
      "learning_rate": 0.0003158415118765664,
      "loss": 2.9395,
      "step": 111327
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.29716420173645,
      "learning_rate": 0.00031583742701990806,
      "loss": 3.0627,
      "step": 111328
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1783268451690674,
      "learning_rate": 0.00031583334216030536,
      "loss": 3.1453,
      "step": 111329
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0622951984405518,
      "learning_rate": 0.0003158292572977588,
      "loss": 2.8323,
      "step": 111330
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.550166130065918,
      "learning_rate": 0.0003158251724322693,
      "loss": 2.962,
      "step": 111331
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1604080200195312,
      "learning_rate": 0.0003158210875638376,
      "loss": 3.0417,
      "step": 111332
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4295449256896973,
      "learning_rate": 0.00031581700269246455,
      "loss": 2.981,
      "step": 111333
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.143009662628174,
      "learning_rate": 0.00031581291781815075,
      "loss": 2.9403,
      "step": 111334
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.856013536453247,
      "learning_rate": 0.000315808832940897,
      "loss": 3.0425,
      "step": 111335
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9045004844665527,
      "learning_rate": 0.00031580474806070424,
      "loss": 3.0083,
      "step": 111336
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7765837907791138,
      "learning_rate": 0.00031580066317757295,
      "loss": 3.1128,
      "step": 111337
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7229965925216675,
      "learning_rate": 0.000315796578291504,
      "loss": 2.8917,
      "step": 111338
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8614643812179565,
      "learning_rate": 0.00031579249340249824,
      "loss": 2.8577,
      "step": 111339
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.482100009918213,
      "learning_rate": 0.00031578840851055633,
      "loss": 2.9505,
      "step": 111340
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.047137975692749,
      "learning_rate": 0.00031578432361567907,
      "loss": 3.0676,
      "step": 111341
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7572002410888672,
      "learning_rate": 0.0003157802387178672,
      "loss": 3.0057,
      "step": 111342
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.287449836730957,
      "learning_rate": 0.00031577615381712145,
      "loss": 2.9504,
      "step": 111343
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8390543460845947,
      "learning_rate": 0.0003157720689134427,
      "loss": 2.9675,
      "step": 111344
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8887248039245605,
      "learning_rate": 0.0003157679840068316,
      "loss": 3.113,
      "step": 111345
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.3783681392669678,
      "learning_rate": 0.0003157638990972888,
      "loss": 2.8901,
      "step": 111346
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1939809322357178,
      "learning_rate": 0.0003157598141848153,
      "loss": 3.2251,
      "step": 111347
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6549724340438843,
      "learning_rate": 0.0003157557292694118,
      "loss": 2.9059,
      "step": 111348
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7938916683197021,
      "learning_rate": 0.0003157516443510789,
      "loss": 3.0276,
      "step": 111349
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.671635627746582,
      "learning_rate": 0.00031574755942981753,
      "loss": 3.3247,
      "step": 111350
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.456441879272461,
      "learning_rate": 0.0003157434745056284,
      "loss": 2.9522,
      "step": 111351
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8842376470565796,
      "learning_rate": 0.0003157393895785123,
      "loss": 2.5097,
      "step": 111352
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2600317001342773,
      "learning_rate": 0.00031573530464846985,
      "loss": 2.9915,
      "step": 111353
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.3904144763946533,
      "learning_rate": 0.00031573121971550206,
      "loss": 2.8959,
      "step": 111354
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9866304397583008,
      "learning_rate": 0.0003157271347796094,
      "loss": 3.0239,
      "step": 111355
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8985992670059204,
      "learning_rate": 0.0003157230498407928,
      "loss": 3.0099,
      "step": 111356
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.586769938468933,
      "learning_rate": 0.00031571896489905306,
      "loss": 3.043,
      "step": 111357
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7849233150482178,
      "learning_rate": 0.0003157148799543908,
      "loss": 3.0312,
      "step": 111358
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.936458945274353,
      "learning_rate": 0.00031571079500680683,
      "loss": 2.7356,
      "step": 111359
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.189805269241333,
      "learning_rate": 0.00031570671005630197,
      "loss": 2.759,
      "step": 111360
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0359981060028076,
      "learning_rate": 0.0003157026251028769,
      "loss": 3.0537,
      "step": 111361
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4502742290496826,
      "learning_rate": 0.0003156985401465325,
      "loss": 2.6898,
      "step": 111362
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.318936347961426,
      "learning_rate": 0.0003156944551872693,
      "loss": 2.994,
      "step": 111363
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.3384368419647217,
      "learning_rate": 0.00031569037022508836,
      "loss": 2.8649,
      "step": 111364
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.312861442565918,
      "learning_rate": 0.0003156862852599902,
      "loss": 3.3657,
      "step": 111365
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.5232536792755127,
      "learning_rate": 0.0003156822002919757,
      "loss": 2.866,
      "step": 111366
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0487682819366455,
      "learning_rate": 0.00031567811532104553,
      "loss": 3.0794,
      "step": 111367
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0754334926605225,
      "learning_rate": 0.0003156740303472005,
      "loss": 3.0063,
      "step": 111368
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.6673474311828613,
      "learning_rate": 0.0003156699453704414,
      "loss": 2.9859,
      "step": 111369
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0681679248809814,
      "learning_rate": 0.00031566586039076896,
      "loss": 2.9692,
      "step": 111370
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0755679607391357,
      "learning_rate": 0.0003156617754081839,
      "loss": 3.0949,
      "step": 111371
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.131131410598755,
      "learning_rate": 0.0003156576904226871,
      "loss": 3.0631,
      "step": 111372
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.3215768337249756,
      "learning_rate": 0.00031565360543427926,
      "loss": 2.8928,
      "step": 111373
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9389441013336182,
      "learning_rate": 0.000315649520442961,
      "loss": 2.9159,
      "step": 111374
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.273827314376831,
      "learning_rate": 0.0003156454354487333,
      "loss": 2.9867,
      "step": 111375
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.164741277694702,
      "learning_rate": 0.0003156413504515968,
      "loss": 2.7659,
      "step": 111376
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2226951122283936,
      "learning_rate": 0.00031563726545155225,
      "loss": 2.895,
      "step": 111377
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.003255605697632,
      "learning_rate": 0.00031563318044860046,
      "loss": 2.9777,
      "step": 111378
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.807934045791626,
      "learning_rate": 0.00031562909544274214,
      "loss": 2.8716,
      "step": 111379
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4488449096679688,
      "learning_rate": 0.00031562501043397805,
      "loss": 2.8547,
      "step": 111380
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.928822636604309,
      "learning_rate": 0.00031562092542230906,
      "loss": 3.2218,
      "step": 111381
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1850292682647705,
      "learning_rate": 0.00031561684040773576,
      "loss": 2.9553,
      "step": 111382
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0942044258117676,
      "learning_rate": 0.000315612755390259,
      "loss": 2.9086,
      "step": 111383
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.151153802871704,
      "learning_rate": 0.00031560867036987966,
      "loss": 2.8126,
      "step": 111384
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.605519413948059,
      "learning_rate": 0.0003156045853465982,
      "loss": 3.1337,
      "step": 111385
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9331088066101074,
      "learning_rate": 0.0003156005003204156,
      "loss": 3.1453,
      "step": 111386
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9794392585754395,
      "learning_rate": 0.0003155964152913326,
      "loss": 3.381,
      "step": 111387
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.123736619949341,
      "learning_rate": 0.00031559233025935,
      "loss": 2.7942,
      "step": 111388
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0809333324432373,
      "learning_rate": 0.00031558824522446845,
      "loss": 2.8457,
      "step": 111389
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.846644401550293,
      "learning_rate": 0.00031558416018668867,
      "loss": 2.9375,
      "step": 111390
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.763993263244629,
      "learning_rate": 0.0003155800751460115,
      "loss": 3.058,
      "step": 111391
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8025174140930176,
      "learning_rate": 0.0003155759901024379,
      "loss": 3.0705,
      "step": 111392
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.076871156692505,
      "learning_rate": 0.0003155719050559682,
      "loss": 2.844,
      "step": 111393
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9781513214111328,
      "learning_rate": 0.00031556782000660347,
      "loss": 2.8869,
      "step": 111394
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6141719818115234,
      "learning_rate": 0.00031556373495434444,
      "loss": 3.1667,
      "step": 111395
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.645089626312256,
      "learning_rate": 0.0003155596498991918,
      "loss": 2.717,
      "step": 111396
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6117233037948608,
      "learning_rate": 0.0003155555648411462,
      "loss": 2.8936,
      "step": 111397
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.53067684173584,
      "learning_rate": 0.00031555147978020863,
      "loss": 3.0625,
      "step": 111398
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8512091636657715,
      "learning_rate": 0.0003155473947163798,
      "loss": 2.9144,
      "step": 111399
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4504075050354004,
      "learning_rate": 0.00031554330964966035,
      "loss": 2.8332,
      "step": 111400
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0466623306274414,
      "learning_rate": 0.00031553922458005105,
      "loss": 3.1841,
      "step": 111401
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8710596561431885,
      "learning_rate": 0.0003155351395075528,
      "loss": 2.6552,
      "step": 111402
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8416199684143066,
      "learning_rate": 0.0003155310544321663,
      "loss": 2.9067,
      "step": 111403
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7471134662628174,
      "learning_rate": 0.00031552696935389224,
      "loss": 3.084,
      "step": 111404
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8258644342422485,
      "learning_rate": 0.00031552288427273137,
      "loss": 2.9847,
      "step": 111405
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1522676944732666,
      "learning_rate": 0.0003155187991886846,
      "loss": 3.119,
      "step": 111406
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.536933183670044,
      "learning_rate": 0.00031551471410175253,
      "loss": 2.8174,
      "step": 111407
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.037184953689575,
      "learning_rate": 0.000315510629011936,
      "loss": 2.9115,
      "step": 111408
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1491787433624268,
      "learning_rate": 0.00031550654391923583,
      "loss": 3.2847,
      "step": 111409
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2538297176361084,
      "learning_rate": 0.0003155024588236525,
      "loss": 2.8415,
      "step": 111410
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9483903646469116,
      "learning_rate": 0.00031549837372518713,
      "loss": 3.2209,
      "step": 111411
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7214610576629639,
      "learning_rate": 0.0003154942886238403,
      "loss": 3.0584,
      "step": 111412
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.814199447631836,
      "learning_rate": 0.00031549020351961276,
      "loss": 3.0988,
      "step": 111413
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.5487743616104126,
      "learning_rate": 0.0003154861184125053,
      "loss": 2.8461,
      "step": 111414
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.115901231765747,
      "learning_rate": 0.00031548203330251874,
      "loss": 2.9612,
      "step": 111415
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7727817296981812,
      "learning_rate": 0.0003154779481896537,
      "loss": 2.8629,
      "step": 111416
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0700697898864746,
      "learning_rate": 0.00031547386307391104,
      "loss": 3.0665,
      "step": 111417
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2723264694213867,
      "learning_rate": 0.0003154697779552915,
      "loss": 2.9257,
      "step": 111418
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0614452362060547,
      "learning_rate": 0.00031546569283379583,
      "loss": 2.9127,
      "step": 111419
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.921618938446045,
      "learning_rate": 0.0003154616077094248,
      "loss": 3.0735,
      "step": 111420
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.7392168045043945,
      "learning_rate": 0.0003154575225821792,
      "loss": 2.801,
      "step": 111421
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1729631423950195,
      "learning_rate": 0.00031545343745205975,
      "loss": 3.1174,
      "step": 111422
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.5538982152938843,
      "learning_rate": 0.00031544935231906713,
      "loss": 2.8518,
      "step": 111423
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8396662473678589,
      "learning_rate": 0.0003154452671832023,
      "loss": 3.0797,
      "step": 111424
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.7854416370391846,
      "learning_rate": 0.0003154411820444658,
      "loss": 3.0911,
      "step": 111425
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4461846351623535,
      "learning_rate": 0.0003154370969028585,
      "loss": 2.9837,
      "step": 111426
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.3714494705200195,
      "learning_rate": 0.0003154330117583812,
      "loss": 2.98,
      "step": 111427
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4684925079345703,
      "learning_rate": 0.0003154289266110346,
      "loss": 2.8705,
      "step": 111428
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4464592933654785,
      "learning_rate": 0.00031542484146081945,
      "loss": 3.1228,
      "step": 111429
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.5285236835479736,
      "learning_rate": 0.00031542075630773667,
      "loss": 3.2722,
      "step": 111430
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.152536630630493,
      "learning_rate": 0.0003154166711517867,
      "loss": 2.7481,
      "step": 111431
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0530030727386475,
      "learning_rate": 0.0003154125859929705,
      "loss": 2.9607,
      "step": 111432
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.22152042388916,
      "learning_rate": 0.0003154085008312889,
      "loss": 3.1615,
      "step": 111433
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.752706289291382,
      "learning_rate": 0.00031540441566674254,
      "loss": 3.0174,
      "step": 111434
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.8639419078826904,
      "learning_rate": 0.00031540033049933213,
      "loss": 2.7622,
      "step": 111435
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9082988500595093,
      "learning_rate": 0.0003153962453290587,
      "loss": 3.1775,
      "step": 111436
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.693922519683838,
      "learning_rate": 0.0003153921601559226,
      "loss": 3.2539,
      "step": 111437
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.6937108039855957,
      "learning_rate": 0.0003153880749799249,
      "loss": 3.0865,
      "step": 111438
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7402726411819458,
      "learning_rate": 0.0003153839898010663,
      "loss": 2.8678,
      "step": 111439
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8275978565216064,
      "learning_rate": 0.00031537990461934744,
      "loss": 3.0314,
      "step": 111440
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0320942401885986,
      "learning_rate": 0.0003153758194347691,
      "loss": 3.0343,
      "step": 111441
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.119767427444458,
      "learning_rate": 0.00031537173424733237,
      "loss": 2.9252,
      "step": 111442
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.218127489089966,
      "learning_rate": 0.0003153676490570375,
      "loss": 2.7854,
      "step": 111443
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0723633766174316,
      "learning_rate": 0.00031536356386388563,
      "loss": 2.6563,
      "step": 111444
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.6981546878814697,
      "learning_rate": 0.00031535947866787734,
      "loss": 3.1006,
      "step": 111445
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.915299415588379,
      "learning_rate": 0.0003153553934690134,
      "loss": 3.0144,
      "step": 111446
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.852287530899048,
      "learning_rate": 0.0003153513082672946,
      "loss": 3.0487,
      "step": 111447
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1487298011779785,
      "learning_rate": 0.00031534722306272183,
      "loss": 3.0451,
      "step": 111448
    },
    {
      "epoch": 1.45,
      "grad_norm": 4.636030673980713,
      "learning_rate": 0.00031534313785529553,
      "loss": 3.0157,
      "step": 111449
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.46042799949646,
      "learning_rate": 0.00031533905264501675,
      "loss": 3.1942,
      "step": 111450
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9493907690048218,
      "learning_rate": 0.0003153349674318862,
      "loss": 2.8136,
      "step": 111451
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2849409580230713,
      "learning_rate": 0.00031533088221590456,
      "loss": 2.813,
      "step": 111452
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.376944065093994,
      "learning_rate": 0.0003153267969970725,
      "loss": 2.827,
      "step": 111453
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0458900928497314,
      "learning_rate": 0.00031532271177539106,
      "loss": 2.963,
      "step": 111454
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.292346954345703,
      "learning_rate": 0.0003153186265508607,
      "loss": 2.8803,
      "step": 111455
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4914863109588623,
      "learning_rate": 0.00031531454132348243,
      "loss": 3.078,
      "step": 111456
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8043568134307861,
      "learning_rate": 0.00031531045609325693,
      "loss": 2.9029,
      "step": 111457
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8092130422592163,
      "learning_rate": 0.0003153063708601848,
      "loss": 3.0105,
      "step": 111458
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.906730055809021,
      "learning_rate": 0.000315302285624267,
      "loss": 2.8577,
      "step": 111459
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8339214324951172,
      "learning_rate": 0.0003152982003855042,
      "loss": 2.8752,
      "step": 111460
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1531944274902344,
      "learning_rate": 0.0003152941151438972,
      "loss": 2.8446,
      "step": 111461
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.043386220932007,
      "learning_rate": 0.00031529002989944666,
      "loss": 2.9051,
      "step": 111462
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.0885117053985596,
      "learning_rate": 0.0003152859446521535,
      "loss": 3.0038,
      "step": 111463
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.6712405681610107,
      "learning_rate": 0.0003152818594020184,
      "loss": 2.8793,
      "step": 111464
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9242851734161377,
      "learning_rate": 0.000315277774149042,
      "loss": 2.8666,
      "step": 111465
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9962141513824463,
      "learning_rate": 0.00031527368889322526,
      "loss": 2.8472,
      "step": 111466
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.106759786605835,
      "learning_rate": 0.0003152696036345689,
      "loss": 2.9551,
      "step": 111467
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7152917385101318,
      "learning_rate": 0.00031526551837307354,
      "loss": 2.5449,
      "step": 111468
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0805180072784424,
      "learning_rate": 0.00031526143310874,
      "loss": 2.9569,
      "step": 111469
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1474075317382812,
      "learning_rate": 0.00031525734784156924,
      "loss": 3.0074,
      "step": 111470
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7916151285171509,
      "learning_rate": 0.00031525326257156165,
      "loss": 2.815,
      "step": 111471
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6693564653396606,
      "learning_rate": 0.0003152491772987183,
      "loss": 2.9761,
      "step": 111472
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.435143232345581,
      "learning_rate": 0.00031524509202303987,
      "loss": 2.8849,
      "step": 111473
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.343881130218506,
      "learning_rate": 0.00031524100674452704,
      "loss": 3.0326,
      "step": 111474
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0471932888031006,
      "learning_rate": 0.00031523692146318054,
      "loss": 3.0471,
      "step": 111475
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7865897417068481,
      "learning_rate": 0.0003152328361790014,
      "loss": 2.834,
      "step": 111476
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.101822853088379,
      "learning_rate": 0.00031522875089199005,
      "loss": 3.0369,
      "step": 111477
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9390792846679688,
      "learning_rate": 0.00031522466560214736,
      "loss": 3.1936,
      "step": 111478
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.084700345993042,
      "learning_rate": 0.00031522058030947415,
      "loss": 2.9598,
      "step": 111479
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9551702737808228,
      "learning_rate": 0.00031521649501397127,
      "loss": 2.978,
      "step": 111480
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0147457122802734,
      "learning_rate": 0.0003152124097156393,
      "loss": 2.9355,
      "step": 111481
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9343806505203247,
      "learning_rate": 0.000315208324414479,
      "loss": 3.1815,
      "step": 111482
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7082411050796509,
      "learning_rate": 0.00031520423911049115,
      "loss": 2.8701,
      "step": 111483
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.689469814300537,
      "learning_rate": 0.0003152001538036766,
      "loss": 2.8447,
      "step": 111484
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.219792604446411,
      "learning_rate": 0.00031519606849403603,
      "loss": 3.3062,
      "step": 111485
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.5694961547851562,
      "learning_rate": 0.00031519198318157027,
      "loss": 3.1331,
      "step": 111486
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.5954885482788086,
      "learning_rate": 0.00031518789786628,
      "loss": 2.9126,
      "step": 111487
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1486878395080566,
      "learning_rate": 0.000315183812548166,
      "loss": 3.2329,
      "step": 111488
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9056785106658936,
      "learning_rate": 0.0003151797272272291,
      "loss": 2.9806,
      "step": 111489
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9950038194656372,
      "learning_rate": 0.0003151756419034699,
      "loss": 3.0683,
      "step": 111490
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4368550777435303,
      "learning_rate": 0.00031517155657688934,
      "loss": 2.9902,
      "step": 111491
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0083370208740234,
      "learning_rate": 0.0003151674712474881,
      "loss": 2.9889,
      "step": 111492
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.27608323097229,
      "learning_rate": 0.00031516338591526695,
      "loss": 3.0267,
      "step": 111493
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0972254276275635,
      "learning_rate": 0.0003151593005802267,
      "loss": 3.2372,
      "step": 111494
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9443615674972534,
      "learning_rate": 0.0003151552152423679,
      "loss": 2.9909,
      "step": 111495
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.025637626647949,
      "learning_rate": 0.0003151511299016915,
      "loss": 3.1317,
      "step": 111496
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2416574954986572,
      "learning_rate": 0.0003151470445581983,
      "loss": 3.0589,
      "step": 111497
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7618322372436523,
      "learning_rate": 0.0003151429592118889,
      "loss": 3.1529,
      "step": 111498
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1292693614959717,
      "learning_rate": 0.0003151388738627642,
      "loss": 2.893,
      "step": 111499
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0469090938568115,
      "learning_rate": 0.00031513478851082484,
      "loss": 3.0215,
      "step": 111500
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7364530563354492,
      "learning_rate": 0.00031513070315607167,
      "loss": 3.2332,
      "step": 111501
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0416109561920166,
      "learning_rate": 0.0003151266177985054,
      "loss": 2.9927,
      "step": 111502
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8345917463302612,
      "learning_rate": 0.00031512253243812683,
      "loss": 3.0259,
      "step": 111503
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2800166606903076,
      "learning_rate": 0.0003151184470749366,
      "loss": 2.8208,
      "step": 111504
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.955542802810669,
      "learning_rate": 0.00031511436170893566,
      "loss": 2.9806,
      "step": 111505
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.965000867843628,
      "learning_rate": 0.00031511027634012473,
      "loss": 3.0503,
      "step": 111506
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.88825523853302,
      "learning_rate": 0.0003151061909685044,
      "loss": 3.037,
      "step": 111507
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7850253582000732,
      "learning_rate": 0.00031510210559407563,
      "loss": 3.1403,
      "step": 111508
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.178337574005127,
      "learning_rate": 0.00031509802021683904,
      "loss": 2.9103,
      "step": 111509
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0610015392303467,
      "learning_rate": 0.00031509393483679544,
      "loss": 2.8841,
      "step": 111510
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7774609327316284,
      "learning_rate": 0.0003150898494539456,
      "loss": 3.041,
      "step": 111511
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8843202590942383,
      "learning_rate": 0.0003150857640682903,
      "loss": 2.8104,
      "step": 111512
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6674591302871704,
      "learning_rate": 0.00031508167867983026,
      "loss": 2.9853,
      "step": 111513
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.27773118019104,
      "learning_rate": 0.00031507759328856626,
      "loss": 3.0653,
      "step": 111514
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.886842131614685,
      "learning_rate": 0.00031507350789449905,
      "loss": 2.7697,
      "step": 111515
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0888476371765137,
      "learning_rate": 0.00031506942249762935,
      "loss": 3.1878,
      "step": 111516
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1108052730560303,
      "learning_rate": 0.00031506533709795795,
      "loss": 3.0661,
      "step": 111517
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.5886218547821045,
      "learning_rate": 0.0003150612516954857,
      "loss": 3.0368,
      "step": 111518
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1255593299865723,
      "learning_rate": 0.00031505716629021325,
      "loss": 2.8982,
      "step": 111519
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1052582263946533,
      "learning_rate": 0.0003150530808821413,
      "loss": 3.0373,
      "step": 111520
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.219313144683838,
      "learning_rate": 0.00031504899547127085,
      "loss": 3.1316,
      "step": 111521
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.670947313308716,
      "learning_rate": 0.00031504491005760236,
      "loss": 2.9986,
      "step": 111522
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.002194404602051,
      "learning_rate": 0.00031504082464113684,
      "loss": 2.7275,
      "step": 111523
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7262791395187378,
      "learning_rate": 0.00031503673922187494,
      "loss": 3.0956,
      "step": 111524
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.71178936958313,
      "learning_rate": 0.0003150326537998174,
      "loss": 3.0776,
      "step": 111525
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.204021692276001,
      "learning_rate": 0.00031502856837496495,
      "loss": 3.1692,
      "step": 111526
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.243541717529297,
      "learning_rate": 0.0003150244829473185,
      "loss": 2.9577,
      "step": 111527
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.900195837020874,
      "learning_rate": 0.0003150203975168787,
      "loss": 3.0146,
      "step": 111528
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4752070903778076,
      "learning_rate": 0.0003150163120836462,
      "loss": 2.9771,
      "step": 111529
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.367990016937256,
      "learning_rate": 0.00031501222664762204,
      "loss": 3.3306,
      "step": 111530
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.991909980773926,
      "learning_rate": 0.0003150081412088068,
      "loss": 3.1563,
      "step": 111531
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.811068296432495,
      "learning_rate": 0.00031500405576720115,
      "loss": 2.8553,
      "step": 111532
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2458279132843018,
      "learning_rate": 0.00031499997032280604,
      "loss": 3.1458,
      "step": 111533
    },
    {
      "epoch": 1.45,
      "grad_norm": 4.3900980949401855,
      "learning_rate": 0.0003149958848756222,
      "loss": 2.876,
      "step": 111534
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.3791301250457764,
      "learning_rate": 0.0003149917994256503,
      "loss": 2.9641,
      "step": 111535
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.5381834506988525,
      "learning_rate": 0.00031498771397289103,
      "loss": 3.066,
      "step": 111536
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8852792978286743,
      "learning_rate": 0.00031498362851734544,
      "loss": 2.8782,
      "step": 111537
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4841132164001465,
      "learning_rate": 0.000314979543059014,
      "loss": 2.9425,
      "step": 111538
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.7030913829803467,
      "learning_rate": 0.0003149754575978976,
      "loss": 2.9528,
      "step": 111539
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8273907899856567,
      "learning_rate": 0.00031497137213399706,
      "loss": 3.0881,
      "step": 111540
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8211829662322998,
      "learning_rate": 0.00031496728666731297,
      "loss": 3.232,
      "step": 111541
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7262824773788452,
      "learning_rate": 0.00031496320119784617,
      "loss": 3.2954,
      "step": 111542
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6291753053665161,
      "learning_rate": 0.0003149591157255976,
      "loss": 3.3472,
      "step": 111543
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.721207857131958,
      "learning_rate": 0.00031495503025056763,
      "loss": 3.0314,
      "step": 111544
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7292399406433105,
      "learning_rate": 0.0003149509447727573,
      "loss": 2.9462,
      "step": 111545
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8792178630828857,
      "learning_rate": 0.0003149468592921674,
      "loss": 2.8153,
      "step": 111546
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.217975616455078,
      "learning_rate": 0.0003149427738087985,
      "loss": 3.1204,
      "step": 111547
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.4299800395965576,
      "learning_rate": 0.00031493868832265144,
      "loss": 2.8157,
      "step": 111548
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8425981998443604,
      "learning_rate": 0.0003149346028337271,
      "loss": 2.8292,
      "step": 111549
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0153679847717285,
      "learning_rate": 0.00031493051734202604,
      "loss": 3.1306,
      "step": 111550
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.7430663108825684,
      "learning_rate": 0.00031492643184754913,
      "loss": 3.1468,
      "step": 111551
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2056188583374023,
      "learning_rate": 0.00031492234635029716,
      "loss": 2.9641,
      "step": 111552
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7757562398910522,
      "learning_rate": 0.0003149182608502708,
      "loss": 3.0318,
      "step": 111553
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.684108018875122,
      "learning_rate": 0.0003149141753474708,
      "loss": 3.0006,
      "step": 111554
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7062565088272095,
      "learning_rate": 0.00031491008984189814,
      "loss": 3.127,
      "step": 111555
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9692879915237427,
      "learning_rate": 0.00031490600433355326,
      "loss": 2.8689,
      "step": 111556
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.798259973526001,
      "learning_rate": 0.00031490191882243716,
      "loss": 2.8359,
      "step": 111557
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.3823699951171875,
      "learning_rate": 0.00031489783330855054,
      "loss": 2.9545,
      "step": 111558
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.118805170059204,
      "learning_rate": 0.000314893747791894,
      "loss": 3.0563,
      "step": 111559
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.915723204612732,
      "learning_rate": 0.00031488966227246853,
      "loss": 2.8161,
      "step": 111560
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.3755342960357666,
      "learning_rate": 0.00031488557675027475,
      "loss": 3.1255,
      "step": 111561
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7549551725387573,
      "learning_rate": 0.0003148814912253135,
      "loss": 2.9119,
      "step": 111562
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9769766330718994,
      "learning_rate": 0.0003148774056975854,
      "loss": 3.2558,
      "step": 111563
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.663710832595825,
      "learning_rate": 0.00031487332016709137,
      "loss": 2.9377,
      "step": 111564
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.228853464126587,
      "learning_rate": 0.00031486923463383223,
      "loss": 3.1702,
      "step": 111565
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8695961236953735,
      "learning_rate": 0.0003148651490978085,
      "loss": 2.8855,
      "step": 111566
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.394711494445801,
      "learning_rate": 0.000314861063559021,
      "loss": 3.0957,
      "step": 111567
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9401490688323975,
      "learning_rate": 0.00031485697801747063,
      "loss": 2.7318,
      "step": 111568
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.904564380645752,
      "learning_rate": 0.00031485289247315805,
      "loss": 3.0438,
      "step": 111569
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9910247325897217,
      "learning_rate": 0.00031484880692608406,
      "loss": 2.7234,
      "step": 111570
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.5326502323150635,
      "learning_rate": 0.00031484472137624937,
      "loss": 3.012,
      "step": 111571
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4434897899627686,
      "learning_rate": 0.0003148406358236548,
      "loss": 2.9934,
      "step": 111572
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.8149828910827637,
      "learning_rate": 0.000314836550268301,
      "loss": 3.0798,
      "step": 111573
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.764965772628784,
      "learning_rate": 0.00031483246471018887,
      "loss": 2.9207,
      "step": 111574
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9302841424942017,
      "learning_rate": 0.0003148283791493191,
      "loss": 2.9587,
      "step": 111575
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.3163657188415527,
      "learning_rate": 0.00031482429358569244,
      "loss": 2.8067,
      "step": 111576
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.359929084777832,
      "learning_rate": 0.0003148202080193097,
      "loss": 3.1059,
      "step": 111577
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0641701221466064,
      "learning_rate": 0.0003148161224501715,
      "loss": 2.9277,
      "step": 111578
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6664693355560303,
      "learning_rate": 0.00031481203687827884,
      "loss": 2.8966,
      "step": 111579
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8413622379302979,
      "learning_rate": 0.00031480795130363223,
      "loss": 3.16,
      "step": 111580
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.54975163936615,
      "learning_rate": 0.0003148038657262326,
      "loss": 3.1542,
      "step": 111581
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8704514503479004,
      "learning_rate": 0.00031479978014608065,
      "loss": 2.7665,
      "step": 111582
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0644898414611816,
      "learning_rate": 0.00031479569456317714,
      "loss": 3.3244,
      "step": 111583
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7812813520431519,
      "learning_rate": 0.00031479160897752274,
      "loss": 2.9225,
      "step": 111584
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8488003015518188,
      "learning_rate": 0.0003147875233891185,
      "loss": 2.9183,
      "step": 111585
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.5932669639587402,
      "learning_rate": 0.00031478343779796483,
      "loss": 3.1664,
      "step": 111586
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.909687876701355,
      "learning_rate": 0.00031477935220406263,
      "loss": 2.8951,
      "step": 111587
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9845861196517944,
      "learning_rate": 0.0003147752666074128,
      "loss": 2.9075,
      "step": 111588
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8463774919509888,
      "learning_rate": 0.00031477118100801585,
      "loss": 3.0843,
      "step": 111589
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8746719360351562,
      "learning_rate": 0.0003147670954058727,
      "loss": 2.9134,
      "step": 111590
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.87711763381958,
      "learning_rate": 0.00031476300980098405,
      "loss": 3.0397,
      "step": 111591
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1685893535614014,
      "learning_rate": 0.0003147589241933507,
      "loss": 3.2,
      "step": 111592
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9746350049972534,
      "learning_rate": 0.00031475483858297334,
      "loss": 3.1584,
      "step": 111593
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8043724298477173,
      "learning_rate": 0.0003147507529698529,
      "loss": 2.9225,
      "step": 111594
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.101496696472168,
      "learning_rate": 0.00031474666735398986,
      "loss": 3.0409,
      "step": 111595
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9661519527435303,
      "learning_rate": 0.00031474258173538514,
      "loss": 2.83,
      "step": 111596
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9230470657348633,
      "learning_rate": 0.00031473849611403967,
      "loss": 3.0431,
      "step": 111597
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9364101886749268,
      "learning_rate": 0.0003147344104899539,
      "loss": 2.9463,
      "step": 111598
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.110940933227539,
      "learning_rate": 0.00031473032486312874,
      "loss": 2.9578,
      "step": 111599
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7475627660751343,
      "learning_rate": 0.0003147262392335649,
      "loss": 3.1967,
      "step": 111600
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.868696689605713,
      "learning_rate": 0.00031472215360126325,
      "loss": 3.2719,
      "step": 111601
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.5938873291015625,
      "learning_rate": 0.00031471806796622446,
      "loss": 2.9048,
      "step": 111602
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2240726947784424,
      "learning_rate": 0.0003147139823284492,
      "loss": 3.0137,
      "step": 111603
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.853592038154602,
      "learning_rate": 0.0003147098966879385,
      "loss": 2.9915,
      "step": 111604
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8260982036590576,
      "learning_rate": 0.00031470581104469286,
      "loss": 3.0102,
      "step": 111605
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2606663703918457,
      "learning_rate": 0.0003147017253987131,
      "loss": 2.9123,
      "step": 111606
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6283698081970215,
      "learning_rate": 0.0003146976397500001,
      "loss": 2.6557,
      "step": 111607
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.833298921585083,
      "learning_rate": 0.0003146935540985545,
      "loss": 3.1227,
      "step": 111608
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.806795597076416,
      "learning_rate": 0.00031468946844437703,
      "loss": 3.0151,
      "step": 111609
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6854345798492432,
      "learning_rate": 0.00031468538278746865,
      "loss": 3.0287,
      "step": 111610
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.932281255722046,
      "learning_rate": 0.00031468129712782986,
      "loss": 2.9657,
      "step": 111611
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.404026985168457,
      "learning_rate": 0.00031467721146546154,
      "loss": 2.8507,
      "step": 111612
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8045252561569214,
      "learning_rate": 0.0003146731258003645,
      "loss": 2.8231,
      "step": 111613
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6666592359542847,
      "learning_rate": 0.00031466904013253944,
      "loss": 2.864,
      "step": 111614
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.089482069015503,
      "learning_rate": 0.0003146649544619871,
      "loss": 2.9475,
      "step": 111615
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6335132122039795,
      "learning_rate": 0.0003146608687887083,
      "loss": 3.1271,
      "step": 111616
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7739267349243164,
      "learning_rate": 0.00031465678311270377,
      "loss": 3.0558,
      "step": 111617
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1200978755950928,
      "learning_rate": 0.0003146526974339742,
      "loss": 3.1187,
      "step": 111618
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.271376848220825,
      "learning_rate": 0.0003146486117525205,
      "loss": 2.8589,
      "step": 111619
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.920560598373413,
      "learning_rate": 0.0003146445260683433,
      "loss": 3.126,
      "step": 111620
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.4607930183410645,
      "learning_rate": 0.0003146404403814434,
      "loss": 2.8123,
      "step": 111621
    },
    {
      "epoch": 1.45,
      "grad_norm": 4.153728008270264,
      "learning_rate": 0.0003146363546918216,
      "loss": 2.8268,
      "step": 111622
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.5466976165771484,
      "learning_rate": 0.0003146322689994786,
      "loss": 2.9038,
      "step": 111623
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.863152027130127,
      "learning_rate": 0.00031462818330441517,
      "loss": 3.2584,
      "step": 111624
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.344802141189575,
      "learning_rate": 0.00031462409760663214,
      "loss": 3.099,
      "step": 111625
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.724008083343506,
      "learning_rate": 0.00031462001190613015,
      "loss": 2.9804,
      "step": 111626
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2109673023223877,
      "learning_rate": 0.00031461592620291007,
      "loss": 2.9637,
      "step": 111627
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.7493722438812256,
      "learning_rate": 0.00031461184049697265,
      "loss": 3.2489,
      "step": 111628
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2450852394104004,
      "learning_rate": 0.00031460775478831855,
      "loss": 3.1006,
      "step": 111629
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.6150593757629395,
      "learning_rate": 0.00031460366907694857,
      "loss": 2.8933,
      "step": 111630
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7353674173355103,
      "learning_rate": 0.0003145995833628636,
      "loss": 3.0005,
      "step": 111631
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0237314701080322,
      "learning_rate": 0.0003145954976460642,
      "loss": 2.6549,
      "step": 111632
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.634751081466675,
      "learning_rate": 0.0003145914119265512,
      "loss": 3.0901,
      "step": 111633
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.146491289138794,
      "learning_rate": 0.0003145873262043255,
      "loss": 3.1833,
      "step": 111634
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.859419345855713,
      "learning_rate": 0.00031458324047938766,
      "loss": 3.1384,
      "step": 111635
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.605841875076294,
      "learning_rate": 0.00031457915475173844,
      "loss": 3.0655,
      "step": 111636
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.29239821434021,
      "learning_rate": 0.0003145750690213788,
      "loss": 2.8841,
      "step": 111637
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9973539113998413,
      "learning_rate": 0.00031457098328830936,
      "loss": 2.9898,
      "step": 111638
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.343452215194702,
      "learning_rate": 0.00031456689755253084,
      "loss": 2.9779,
      "step": 111639
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8981903791427612,
      "learning_rate": 0.00031456281181404416,
      "loss": 2.9636,
      "step": 111640
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.3515396118164062,
      "learning_rate": 0.00031455872607284995,
      "loss": 3.0897,
      "step": 111641
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.6506738662719727,
      "learning_rate": 0.0003145546403289489,
      "loss": 2.7871,
      "step": 111642
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2388243675231934,
      "learning_rate": 0.000314550554582342,
      "loss": 3.0424,
      "step": 111643
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7974692583084106,
      "learning_rate": 0.0003145464688330298,
      "loss": 2.9078,
      "step": 111644
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.8677613735198975,
      "learning_rate": 0.0003145423830810131,
      "loss": 2.978,
      "step": 111645
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.308730363845825,
      "learning_rate": 0.0003145382973262928,
      "loss": 2.8759,
      "step": 111646
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.344799280166626,
      "learning_rate": 0.0003145342115688695,
      "loss": 2.8902,
      "step": 111647
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.640275001525879,
      "learning_rate": 0.000314530125808744,
      "loss": 3.0721,
      "step": 111648
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.927126169204712,
      "learning_rate": 0.00031452604004591713,
      "loss": 3.2545,
      "step": 111649
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8882451057434082,
      "learning_rate": 0.0003145219542803896,
      "loss": 2.9147,
      "step": 111650
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2420034408569336,
      "learning_rate": 0.00031451786851216203,
      "loss": 3.0365,
      "step": 111651
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0365211963653564,
      "learning_rate": 0.0003145137827412354,
      "loss": 3.1023,
      "step": 111652
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.421985387802124,
      "learning_rate": 0.00031450969696761045,
      "loss": 2.9105,
      "step": 111653
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6322394609451294,
      "learning_rate": 0.00031450561119128776,
      "loss": 3.3061,
      "step": 111654
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9549355506896973,
      "learning_rate": 0.0003145015254122683,
      "loss": 3.1203,
      "step": 111655
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.091477870941162,
      "learning_rate": 0.00031449743963055256,
      "loss": 2.8742,
      "step": 111656
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7442009449005127,
      "learning_rate": 0.00031449335384614173,
      "loss": 3.3125,
      "step": 111657
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.3994221687316895,
      "learning_rate": 0.0003144892680590361,
      "loss": 2.9287,
      "step": 111658
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.721863031387329,
      "learning_rate": 0.0003144851822692367,
      "loss": 2.9371,
      "step": 111659
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.258512496948242,
      "learning_rate": 0.0003144810964767443,
      "loss": 3.0046,
      "step": 111660
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.497152328491211,
      "learning_rate": 0.0003144770106815596,
      "loss": 3.1306,
      "step": 111661
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.405100107192993,
      "learning_rate": 0.00031447292488368325,
      "loss": 2.9792,
      "step": 111662
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.955352783203125,
      "learning_rate": 0.0003144688390831162,
      "loss": 2.9674,
      "step": 111663
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.102705240249634,
      "learning_rate": 0.00031446475327985904,
      "loss": 3.1901,
      "step": 111664
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1570005416870117,
      "learning_rate": 0.00031446066747391264,
      "loss": 3.0189,
      "step": 111665
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9282118082046509,
      "learning_rate": 0.0003144565816652777,
      "loss": 2.895,
      "step": 111666
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.019706964492798,
      "learning_rate": 0.00031445249585395507,
      "loss": 3.161,
      "step": 111667
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9599815607070923,
      "learning_rate": 0.0003144484100399454,
      "loss": 2.7964,
      "step": 111668
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7569842338562012,
      "learning_rate": 0.0003144443242232495,
      "loss": 2.9602,
      "step": 111669
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.268296241760254,
      "learning_rate": 0.0003144402384038682,
      "loss": 3.0114,
      "step": 111670
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.2464542388916016,
      "learning_rate": 0.00031443615258180214,
      "loss": 3.163,
      "step": 111671
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.5940790176391602,
      "learning_rate": 0.0003144320667570521,
      "loss": 2.788,
      "step": 111672
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1772713661193848,
      "learning_rate": 0.0003144279809296189,
      "loss": 2.8235,
      "step": 111673
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9165878295898438,
      "learning_rate": 0.00031442389509950323,
      "loss": 2.7021,
      "step": 111674
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1575307846069336,
      "learning_rate": 0.000314419809266706,
      "loss": 2.8756,
      "step": 111675
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4652717113494873,
      "learning_rate": 0.00031441572343122765,
      "loss": 2.9185,
      "step": 111676
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1789352893829346,
      "learning_rate": 0.00031441163759306933,
      "loss": 3.0605,
      "step": 111677
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8358594179153442,
      "learning_rate": 0.0003144075517522315,
      "loss": 2.8041,
      "step": 111678
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.3896708488464355,
      "learning_rate": 0.0003144034659087151,
      "loss": 2.9156,
      "step": 111679
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.810641050338745,
      "learning_rate": 0.0003143993800625208,
      "loss": 2.9295,
      "step": 111680
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.084810256958008,
      "learning_rate": 0.0003143952942136494,
      "loss": 2.9802,
      "step": 111681
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9837929010391235,
      "learning_rate": 0.0003143912083621016,
      "loss": 3.1021,
      "step": 111682
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.5494234561920166,
      "learning_rate": 0.00031438712250787826,
      "loss": 2.9279,
      "step": 111683
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.5975003242492676,
      "learning_rate": 0.00031438303665098005,
      "loss": 3.0539,
      "step": 111684
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9447126388549805,
      "learning_rate": 0.00031437895079140777,
      "loss": 2.9755,
      "step": 111685
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0923705101013184,
      "learning_rate": 0.0003143748649291622,
      "loss": 2.993,
      "step": 111686
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8453117609024048,
      "learning_rate": 0.000314370779064244,
      "loss": 3.1969,
      "step": 111687
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.9182164669036865,
      "learning_rate": 0.00031436669319665405,
      "loss": 3.0043,
      "step": 111688
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1800427436828613,
      "learning_rate": 0.0003143626073263931,
      "loss": 3.0282,
      "step": 111689
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9378888607025146,
      "learning_rate": 0.0003143585214534618,
      "loss": 2.918,
      "step": 111690
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1644415855407715,
      "learning_rate": 0.000314354435577861,
      "loss": 2.9012,
      "step": 111691
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7944051027297974,
      "learning_rate": 0.00031435034969959147,
      "loss": 3.0487,
      "step": 111692
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4259917736053467,
      "learning_rate": 0.0003143462638186539,
      "loss": 3.0628,
      "step": 111693
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.955652117729187,
      "learning_rate": 0.000314342177935049,
      "loss": 3.0405,
      "step": 111694
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.647728443145752,
      "learning_rate": 0.00031433809204877783,
      "loss": 3.0216,
      "step": 111695
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.491506576538086,
      "learning_rate": 0.0003143340061598408,
      "loss": 2.8454,
      "step": 111696
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8674122095108032,
      "learning_rate": 0.0003143299202682388,
      "loss": 2.88,
      "step": 111697
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.724594831466675,
      "learning_rate": 0.00031432583437397265,
      "loss": 3.0409,
      "step": 111698
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.6415438652038574,
      "learning_rate": 0.000314321748477043,
      "loss": 3.0065,
      "step": 111699
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.7096853256225586,
      "learning_rate": 0.00031431766257745074,
      "loss": 3.0125,
      "step": 111700
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.6660664081573486,
      "learning_rate": 0.00031431357667519657,
      "loss": 2.9807,
      "step": 111701
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.210376501083374,
      "learning_rate": 0.00031430949077028116,
      "loss": 3.0794,
      "step": 111702
    },
    {
      "epoch": 1.45,
      "grad_norm": 4.15307092666626,
      "learning_rate": 0.00031430540486270534,
      "loss": 3.1494,
      "step": 111703
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.2091434001922607,
      "learning_rate": 0.00031430131895246997,
      "loss": 3.0617,
      "step": 111704
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.329367160797119,
      "learning_rate": 0.0003142972330395756,
      "loss": 2.8132,
      "step": 111705
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.800958275794983,
      "learning_rate": 0.00031429314712402314,
      "loss": 2.6913,
      "step": 111706
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.4849109649658203,
      "learning_rate": 0.0003142890612058134,
      "loss": 2.7411,
      "step": 111707
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.125669002532959,
      "learning_rate": 0.00031428497528494695,
      "loss": 3.2492,
      "step": 111708
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.7055327892303467,
      "learning_rate": 0.0003142808893614246,
      "loss": 2.996,
      "step": 111709
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8738161325454712,
      "learning_rate": 0.00031427680343524737,
      "loss": 2.8148,
      "step": 111710
    },
    {
      "epoch": 1.45,
      "grad_norm": 4.494713306427002,
      "learning_rate": 0.00031427271750641565,
      "loss": 3.0096,
      "step": 111711
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.165706157684326,
      "learning_rate": 0.0003142686315749304,
      "loss": 3.017,
      "step": 111712
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1762661933898926,
      "learning_rate": 0.0003142645456407924,
      "loss": 2.8927,
      "step": 111713
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8604779243469238,
      "learning_rate": 0.00031426045970400226,
      "loss": 2.8697,
      "step": 111714
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.1652493476867676,
      "learning_rate": 0.00031425637376456087,
      "loss": 2.8298,
      "step": 111715
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.5480730533599854,
      "learning_rate": 0.000314252287822469,
      "loss": 3.278,
      "step": 111716
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9558627605438232,
      "learning_rate": 0.00031424820187772725,
      "loss": 2.7317,
      "step": 111717
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0599136352539062,
      "learning_rate": 0.00031424411593033655,
      "loss": 2.8893,
      "step": 111718
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.5294923782348633,
      "learning_rate": 0.0003142400299802976,
      "loss": 2.8391,
      "step": 111719
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.5519416332244873,
      "learning_rate": 0.0003142359440276112,
      "loss": 3.1232,
      "step": 111720
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.9471025466918945,
      "learning_rate": 0.000314231858072278,
      "loss": 2.8237,
      "step": 111721
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8530008792877197,
      "learning_rate": 0.00031422777211429886,
      "loss": 3.1613,
      "step": 111722
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.190394878387451,
      "learning_rate": 0.0003142236861536745,
      "loss": 2.6365,
      "step": 111723
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9439690113067627,
      "learning_rate": 0.0003142196001904057,
      "loss": 3.0733,
      "step": 111724
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0444388389587402,
      "learning_rate": 0.00031421551422449324,
      "loss": 2.9359,
      "step": 111725
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.750327229499817,
      "learning_rate": 0.00031421142825593777,
      "loss": 2.9568,
      "step": 111726
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.698356866836548,
      "learning_rate": 0.0003142073422847402,
      "loss": 2.9187,
      "step": 111727
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.3871235847473145,
      "learning_rate": 0.0003142032563109012,
      "loss": 2.8927,
      "step": 111728
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7329963445663452,
      "learning_rate": 0.0003141991703344215,
      "loss": 3.09,
      "step": 111729
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.4100027084350586,
      "learning_rate": 0.0003141950843553019,
      "loss": 3.0853,
      "step": 111730
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9346598386764526,
      "learning_rate": 0.0003141909983735433,
      "loss": 3.1065,
      "step": 111731
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.6856592893600464,
      "learning_rate": 0.0003141869123891462,
      "loss": 2.7526,
      "step": 111732
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.7663421630859375,
      "learning_rate": 0.0003141828264021115,
      "loss": 2.8613,
      "step": 111733
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.0964457988739014,
      "learning_rate": 0.00031417874041243994,
      "loss": 3.0191,
      "step": 111734
    },
    {
      "epoch": 1.45,
      "grad_norm": 3.241495370864868,
      "learning_rate": 0.0003141746544201323,
      "loss": 2.9709,
      "step": 111735
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.8725727796554565,
      "learning_rate": 0.0003141705684251894,
      "loss": 2.8247,
      "step": 111736
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.641845703125,
      "learning_rate": 0.0003141664824276118,
      "loss": 3.068,
      "step": 111737
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.862868070602417,
      "learning_rate": 0.00031416239642740045,
      "loss": 2.9388,
      "step": 111738
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.6509461402893066,
      "learning_rate": 0.00031415831042455605,
      "loss": 3.1087,
      "step": 111739
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1471822261810303,
      "learning_rate": 0.00031415422441907927,
      "loss": 2.7943,
      "step": 111740
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.072077751159668,
      "learning_rate": 0.00031415013841097107,
      "loss": 2.9726,
      "step": 111741
    },
    {
      "epoch": 1.45,
      "grad_norm": 1.9115005731582642,
      "learning_rate": 0.000314146052400232,
      "loss": 3.0527,
      "step": 111742
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1483137607574463,
      "learning_rate": 0.00031414196638686293,
      "loss": 2.8414,
      "step": 111743
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.3863956928253174,
      "learning_rate": 0.0003141378803708646,
      "loss": 2.834,
      "step": 111744
    },
    {
      "epoch": 1.45,
      "grad_norm": 2.1556472778320312,
      "learning_rate": 0.0003141337943522379,
      "loss": 3.1672,
      "step": 111745
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9393893480300903,
      "learning_rate": 0.0003141297083309833,
      "loss": 3.1278,
      "step": 111746
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.751388430595398,
      "learning_rate": 0.00031412562230710174,
      "loss": 2.8642,
      "step": 111747
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.5434980392456055,
      "learning_rate": 0.0003141215362805941,
      "loss": 2.8481,
      "step": 111748
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.059796094894409,
      "learning_rate": 0.00031411745025146083,
      "loss": 2.9777,
      "step": 111749
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7919597625732422,
      "learning_rate": 0.0003141133642197029,
      "loss": 2.9011,
      "step": 111750
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.660862684249878,
      "learning_rate": 0.00031410927818532105,
      "loss": 2.9864,
      "step": 111751
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.803261160850525,
      "learning_rate": 0.00031410519214831605,
      "loss": 3.0461,
      "step": 111752
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7607344388961792,
      "learning_rate": 0.0003141011061086886,
      "loss": 3.0175,
      "step": 111753
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9760593175888062,
      "learning_rate": 0.00031409702006643945,
      "loss": 3.1532,
      "step": 111754
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0990841388702393,
      "learning_rate": 0.00031409293402156937,
      "loss": 2.7515,
      "step": 111755
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9450918436050415,
      "learning_rate": 0.0003140888479740793,
      "loss": 3.0695,
      "step": 111756
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8386892080307007,
      "learning_rate": 0.0003140847619239698,
      "loss": 2.9672,
      "step": 111757
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8551957607269287,
      "learning_rate": 0.0003140806758712415,
      "loss": 2.9483,
      "step": 111758
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9072092771530151,
      "learning_rate": 0.0003140765898158956,
      "loss": 2.9545,
      "step": 111759
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6683602333068848,
      "learning_rate": 0.0003140725037579324,
      "loss": 2.938,
      "step": 111760
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9403821229934692,
      "learning_rate": 0.0003140684176973529,
      "loss": 3.2492,
      "step": 111761
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.169450283050537,
      "learning_rate": 0.0003140643316341579,
      "loss": 3.0752,
      "step": 111762
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7931071519851685,
      "learning_rate": 0.000314060245568348,
      "loss": 3.069,
      "step": 111763
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.8437752723693848,
      "learning_rate": 0.00031405615949992407,
      "loss": 2.9749,
      "step": 111764
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.471041440963745,
      "learning_rate": 0.0003140520734288868,
      "loss": 2.9873,
      "step": 111765
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.5080394744873047,
      "learning_rate": 0.000314047987355237,
      "loss": 2.9602,
      "step": 111766
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8055601119995117,
      "learning_rate": 0.0003140439012789754,
      "loss": 2.9839,
      "step": 111767
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.151103973388672,
      "learning_rate": 0.00031403981520010285,
      "loss": 2.9671,
      "step": 111768
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7423150539398193,
      "learning_rate": 0.0003140357291186199,
      "loss": 2.6326,
      "step": 111769
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.4394118785858154,
      "learning_rate": 0.0003140316430345275,
      "loss": 2.8717,
      "step": 111770
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9398386478424072,
      "learning_rate": 0.0003140275569478265,
      "loss": 3.1447,
      "step": 111771
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.002074718475342,
      "learning_rate": 0.0003140234708585173,
      "loss": 2.9702,
      "step": 111772
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.013026714324951,
      "learning_rate": 0.0003140193847666009,
      "loss": 2.9739,
      "step": 111773
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9863801002502441,
      "learning_rate": 0.0003140152986720781,
      "loss": 2.9069,
      "step": 111774
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.031102180480957,
      "learning_rate": 0.0003140112125749496,
      "loss": 3.0693,
      "step": 111775
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7994277477264404,
      "learning_rate": 0.0003140071264752161,
      "loss": 3.2601,
      "step": 111776
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.753984808921814,
      "learning_rate": 0.00031400304037287847,
      "loss": 3.0843,
      "step": 111777
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9737216234207153,
      "learning_rate": 0.00031399895426793736,
      "loss": 3.186,
      "step": 111778
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7579938173294067,
      "learning_rate": 0.0003139948681603936,
      "loss": 3.1192,
      "step": 111779
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1608872413635254,
      "learning_rate": 0.000313990782050248,
      "loss": 2.8042,
      "step": 111780
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.993489384651184,
      "learning_rate": 0.0003139866959375011,
      "loss": 3.1621,
      "step": 111781
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9230234622955322,
      "learning_rate": 0.0003139826098221539,
      "loss": 3.1773,
      "step": 111782
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3615882396698,
      "learning_rate": 0.0003139785237042071,
      "loss": 2.9584,
      "step": 111783
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6501160860061646,
      "learning_rate": 0.0003139744375836614,
      "loss": 3.2547,
      "step": 111784
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8010351657867432,
      "learning_rate": 0.00031397035146051754,
      "loss": 2.9884,
      "step": 111785
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.051064968109131,
      "learning_rate": 0.00031396626533477634,
      "loss": 3.0627,
      "step": 111786
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7034571170806885,
      "learning_rate": 0.0003139621792064386,
      "loss": 3.1275,
      "step": 111787
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.423764705657959,
      "learning_rate": 0.0003139580930755049,
      "loss": 2.8181,
      "step": 111788
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8746654987335205,
      "learning_rate": 0.0003139540069419763,
      "loss": 3.1064,
      "step": 111789
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.050069808959961,
      "learning_rate": 0.00031394992080585333,
      "loss": 3.2384,
      "step": 111790
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8872956037521362,
      "learning_rate": 0.0003139458346671367,
      "loss": 3.3532,
      "step": 111791
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1310696601867676,
      "learning_rate": 0.00031394174852582746,
      "loss": 3.0978,
      "step": 111792
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.309704542160034,
      "learning_rate": 0.0003139376623819261,
      "loss": 2.9622,
      "step": 111793
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0596020221710205,
      "learning_rate": 0.00031393357623543336,
      "loss": 2.9581,
      "step": 111794
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3361475467681885,
      "learning_rate": 0.00031392949008635025,
      "loss": 3.0084,
      "step": 111795
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2267403602600098,
      "learning_rate": 0.0003139254039346773,
      "loss": 2.9115,
      "step": 111796
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.2464892864227295,
      "learning_rate": 0.00031392131778041536,
      "loss": 2.7789,
      "step": 111797
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2752037048339844,
      "learning_rate": 0.00031391723162356525,
      "loss": 2.9481,
      "step": 111798
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9040868282318115,
      "learning_rate": 0.00031391314546412764,
      "loss": 3.1843,
      "step": 111799
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9763247966766357,
      "learning_rate": 0.0003139090593021033,
      "loss": 3.0583,
      "step": 111800
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8137370347976685,
      "learning_rate": 0.000313904973137493,
      "loss": 2.9871,
      "step": 111801
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.004483461380005,
      "learning_rate": 0.00031390088697029754,
      "loss": 2.9312,
      "step": 111802
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9101511240005493,
      "learning_rate": 0.00031389680080051756,
      "loss": 2.8152,
      "step": 111803
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.209695816040039,
      "learning_rate": 0.00031389271462815393,
      "loss": 3.1672,
      "step": 111804
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.8374080657958984,
      "learning_rate": 0.00031388862845320746,
      "loss": 2.7459,
      "step": 111805
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.243454933166504,
      "learning_rate": 0.00031388454227567874,
      "loss": 3.012,
      "step": 111806
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.366877794265747,
      "learning_rate": 0.00031388045609556864,
      "loss": 2.8489,
      "step": 111807
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2769651412963867,
      "learning_rate": 0.00031387636991287793,
      "loss": 3.166,
      "step": 111808
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.217473030090332,
      "learning_rate": 0.0003138722837276073,
      "loss": 3.0235,
      "step": 111809
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.897612452507019,
      "learning_rate": 0.0003138681975397576,
      "loss": 2.9171,
      "step": 111810
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.057969331741333,
      "learning_rate": 0.00031386411134932957,
      "loss": 2.656,
      "step": 111811
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7900521755218506,
      "learning_rate": 0.00031386002515632386,
      "loss": 3.113,
      "step": 111812
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7543580532073975,
      "learning_rate": 0.0003138559389607413,
      "loss": 3.1183,
      "step": 111813
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8466870784759521,
      "learning_rate": 0.00031385185276258274,
      "loss": 2.9102,
      "step": 111814
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.026466131210327,
      "learning_rate": 0.00031384776656184885,
      "loss": 3.0951,
      "step": 111815
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9377321004867554,
      "learning_rate": 0.0003138436803585403,
      "loss": 2.9191,
      "step": 111816
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6739274263381958,
      "learning_rate": 0.00031383959415265806,
      "loss": 3.1879,
      "step": 111817
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8377807140350342,
      "learning_rate": 0.0003138355079442027,
      "loss": 2.8335,
      "step": 111818
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8535257577896118,
      "learning_rate": 0.00031383142173317506,
      "loss": 3.0887,
      "step": 111819
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.030738115310669,
      "learning_rate": 0.00031382733551957596,
      "loss": 2.9582,
      "step": 111820
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.282137393951416,
      "learning_rate": 0.00031382324930340606,
      "loss": 2.9448,
      "step": 111821
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0369045734405518,
      "learning_rate": 0.00031381916308466617,
      "loss": 2.8603,
      "step": 111822
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9192744493484497,
      "learning_rate": 0.00031381507686335705,
      "loss": 3.01,
      "step": 111823
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.132735013961792,
      "learning_rate": 0.00031381099063947944,
      "loss": 3.0772,
      "step": 111824
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0127205848693848,
      "learning_rate": 0.000313806904413034,
      "loss": 3.1662,
      "step": 111825
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0446853637695312,
      "learning_rate": 0.0003138028181840218,
      "loss": 3.0022,
      "step": 111826
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0437920093536377,
      "learning_rate": 0.0003137987319524432,
      "loss": 3.1046,
      "step": 111827
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9984524250030518,
      "learning_rate": 0.00031379464571829925,
      "loss": 3.0462,
      "step": 111828
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2118186950683594,
      "learning_rate": 0.00031379055948159066,
      "loss": 3.2405,
      "step": 111829
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0524322986602783,
      "learning_rate": 0.00031378647324231803,
      "loss": 2.754,
      "step": 111830
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0142130851745605,
      "learning_rate": 0.00031378238700048225,
      "loss": 2.8719,
      "step": 111831
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2142865657806396,
      "learning_rate": 0.00031377830075608417,
      "loss": 3.0228,
      "step": 111832
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.179614543914795,
      "learning_rate": 0.0003137742145091244,
      "loss": 2.8867,
      "step": 111833
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7817628383636475,
      "learning_rate": 0.0003137701282596037,
      "loss": 2.8924,
      "step": 111834
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7917406558990479,
      "learning_rate": 0.0003137660420075229,
      "loss": 3.1677,
      "step": 111835
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.92400324344635,
      "learning_rate": 0.0003137619557528826,
      "loss": 2.9702,
      "step": 111836
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.5116355419158936,
      "learning_rate": 0.00031375786949568396,
      "loss": 3.0069,
      "step": 111837
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8191401958465576,
      "learning_rate": 0.00031375378323592724,
      "loss": 3.2071,
      "step": 111838
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.106339931488037,
      "learning_rate": 0.00031374969697361353,
      "loss": 3.08,
      "step": 111839
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1492626667022705,
      "learning_rate": 0.00031374561070874354,
      "loss": 3.0544,
      "step": 111840
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.741064429283142,
      "learning_rate": 0.0003137415244413179,
      "loss": 3.0653,
      "step": 111841
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3387672901153564,
      "learning_rate": 0.00031373743817133745,
      "loss": 3.1336,
      "step": 111842
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.277736186981201,
      "learning_rate": 0.00031373335189880297,
      "loss": 3.0215,
      "step": 111843
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6710736751556396,
      "learning_rate": 0.0003137292656237153,
      "loss": 2.8014,
      "step": 111844
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8537976741790771,
      "learning_rate": 0.00031372517934607493,
      "loss": 3.1175,
      "step": 111845
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.681687831878662,
      "learning_rate": 0.00031372109306588284,
      "loss": 2.789,
      "step": 111846
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2930803298950195,
      "learning_rate": 0.0003137170067831399,
      "loss": 3.2284,
      "step": 111847
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.4019973278045654,
      "learning_rate": 0.0003137129204978466,
      "loss": 2.9975,
      "step": 111848
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.762730360031128,
      "learning_rate": 0.0003137088342100037,
      "loss": 2.8853,
      "step": 111849
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7726415395736694,
      "learning_rate": 0.0003137047479196122,
      "loss": 2.9734,
      "step": 111850
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.8252227306365967,
      "learning_rate": 0.0003137006616266727,
      "loss": 2.702,
      "step": 111851
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.8308045864105225,
      "learning_rate": 0.00031369657533118593,
      "loss": 3.015,
      "step": 111852
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.629876971244812,
      "learning_rate": 0.0003136924890331528,
      "loss": 3.2553,
      "step": 111853
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7545716762542725,
      "learning_rate": 0.0003136884027325739,
      "loss": 3.1283,
      "step": 111854
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.753631353378296,
      "learning_rate": 0.0003136843164294501,
      "loss": 2.9957,
      "step": 111855
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.683611273765564,
      "learning_rate": 0.00031368023012378216,
      "loss": 2.8818,
      "step": 111856
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7894939184188843,
      "learning_rate": 0.00031367614381557076,
      "loss": 2.8746,
      "step": 111857
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.548075556755066,
      "learning_rate": 0.00031367205750481675,
      "loss": 3.0043,
      "step": 111858
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.548529624938965,
      "learning_rate": 0.00031366797119152087,
      "loss": 2.9563,
      "step": 111859
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.902748942375183,
      "learning_rate": 0.00031366388487568374,
      "loss": 3.0483,
      "step": 111860
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3602850437164307,
      "learning_rate": 0.00031365979855730626,
      "loss": 2.9601,
      "step": 111861
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.047717332839966,
      "learning_rate": 0.00031365571223638926,
      "loss": 3.0341,
      "step": 111862
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.682931661605835,
      "learning_rate": 0.0003136516259129334,
      "loss": 2.8402,
      "step": 111863
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.6737687587738037,
      "learning_rate": 0.0003136475395869393,
      "loss": 3.2463,
      "step": 111864
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7433501482009888,
      "learning_rate": 0.000313643453258408,
      "loss": 2.8991,
      "step": 111865
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3096635341644287,
      "learning_rate": 0.0003136393669273401,
      "loss": 3.0272,
      "step": 111866
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.075758457183838,
      "learning_rate": 0.00031363528059373633,
      "loss": 2.9975,
      "step": 111867
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.821048378944397,
      "learning_rate": 0.00031363119425759756,
      "loss": 2.9202,
      "step": 111868
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7447996139526367,
      "learning_rate": 0.0003136271079189245,
      "loss": 2.9462,
      "step": 111869
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.051281690597534,
      "learning_rate": 0.0003136230215777178,
      "loss": 3.1536,
      "step": 111870
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.0031182765960693,
      "learning_rate": 0.00031361893523397847,
      "loss": 3.0123,
      "step": 111871
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6811975240707397,
      "learning_rate": 0.00031361484888770707,
      "loss": 2.9587,
      "step": 111872
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6101939678192139,
      "learning_rate": 0.00031361076253890434,
      "loss": 2.9888,
      "step": 111873
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.3565657138824463,
      "learning_rate": 0.0003136066761875712,
      "loss": 2.7553,
      "step": 111874
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8814789056777954,
      "learning_rate": 0.0003136025898337083,
      "loss": 2.9153,
      "step": 111875
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9880433082580566,
      "learning_rate": 0.0003135985034773164,
      "loss": 3.045,
      "step": 111876
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9584815502166748,
      "learning_rate": 0.00031359441711839624,
      "loss": 2.9381,
      "step": 111877
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1267757415771484,
      "learning_rate": 0.0003135903307569488,
      "loss": 3.0066,
      "step": 111878
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6815682649612427,
      "learning_rate": 0.00031358624439297453,
      "loss": 3.0633,
      "step": 111879
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.263972043991089,
      "learning_rate": 0.00031358215802647425,
      "loss": 3.0801,
      "step": 111880
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.715313196182251,
      "learning_rate": 0.0003135780716574489,
      "loss": 3.127,
      "step": 111881
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.173999547958374,
      "learning_rate": 0.0003135739852858991,
      "loss": 2.6679,
      "step": 111882
    },
    {
      "epoch": 1.46,
      "grad_norm": 6.023140907287598,
      "learning_rate": 0.00031356989891182565,
      "loss": 2.8077,
      "step": 111883
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2437148094177246,
      "learning_rate": 0.00031356581253522934,
      "loss": 2.8855,
      "step": 111884
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.856370210647583,
      "learning_rate": 0.0003135617261561108,
      "loss": 2.9602,
      "step": 111885
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.173238754272461,
      "learning_rate": 0.00031355763977447093,
      "loss": 3.0494,
      "step": 111886
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7784395217895508,
      "learning_rate": 0.0003135535533903104,
      "loss": 2.9474,
      "step": 111887
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2850770950317383,
      "learning_rate": 0.00031354946700363004,
      "loss": 3.1073,
      "step": 111888
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.5922967195510864,
      "learning_rate": 0.0003135453806144306,
      "loss": 2.9375,
      "step": 111889
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.853995442390442,
      "learning_rate": 0.0003135412942227128,
      "loss": 2.9649,
      "step": 111890
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.773452877998352,
      "learning_rate": 0.0003135372078284774,
      "loss": 3.0748,
      "step": 111891
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.950126051902771,
      "learning_rate": 0.0003135331214317252,
      "loss": 3.1738,
      "step": 111892
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.6345489025115967,
      "learning_rate": 0.00031352903503245697,
      "loss": 3.1842,
      "step": 111893
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7735217809677124,
      "learning_rate": 0.00031352494863067334,
      "loss": 3.0159,
      "step": 111894
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1407220363616943,
      "learning_rate": 0.00031352086222637517,
      "loss": 3.0781,
      "step": 111895
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.803756833076477,
      "learning_rate": 0.00031351677581956333,
      "loss": 3.0809,
      "step": 111896
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1460769176483154,
      "learning_rate": 0.00031351268941023837,
      "loss": 2.9688,
      "step": 111897
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.483325958251953,
      "learning_rate": 0.00031350860299840115,
      "loss": 3.09,
      "step": 111898
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.464462995529175,
      "learning_rate": 0.00031350451658405254,
      "loss": 2.9733,
      "step": 111899
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.884589672088623,
      "learning_rate": 0.0003135004301671931,
      "loss": 2.9509,
      "step": 111900
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.2543327808380127,
      "learning_rate": 0.0003134963437478236,
      "loss": 2.994,
      "step": 111901
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.337540626525879,
      "learning_rate": 0.00031349225732594496,
      "loss": 3.1187,
      "step": 111902
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0960609912872314,
      "learning_rate": 0.00031348817090155783,
      "loss": 3.0562,
      "step": 111903
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0667331218719482,
      "learning_rate": 0.00031348408447466305,
      "loss": 3.1064,
      "step": 111904
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.324498414993286,
      "learning_rate": 0.0003134799980452613,
      "loss": 3.0695,
      "step": 111905
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.7347564697265625,
      "learning_rate": 0.00031347591161335334,
      "loss": 3.0404,
      "step": 111906
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7192689180374146,
      "learning_rate": 0.00031347182517893994,
      "loss": 3.0493,
      "step": 111907
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.854997158050537,
      "learning_rate": 0.00031346773874202187,
      "loss": 2.7895,
      "step": 111908
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7248129844665527,
      "learning_rate": 0.0003134636523025999,
      "loss": 3.0219,
      "step": 111909
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8240114450454712,
      "learning_rate": 0.0003134595658606748,
      "loss": 2.7982,
      "step": 111910
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8424111604690552,
      "learning_rate": 0.00031345547941624734,
      "loss": 2.6176,
      "step": 111911
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9952656030654907,
      "learning_rate": 0.00031345139296931824,
      "loss": 2.997,
      "step": 111912
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.260288953781128,
      "learning_rate": 0.0003134473065198882,
      "loss": 3.0783,
      "step": 111913
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2649192810058594,
      "learning_rate": 0.00031344322006795816,
      "loss": 3.0387,
      "step": 111914
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8358548879623413,
      "learning_rate": 0.0003134391336135287,
      "loss": 2.7787,
      "step": 111915
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.812069058418274,
      "learning_rate": 0.00031343504715660066,
      "loss": 2.8451,
      "step": 111916
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8127845525741577,
      "learning_rate": 0.00031343096069717477,
      "loss": 2.7354,
      "step": 111917
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9773190021514893,
      "learning_rate": 0.0003134268742352519,
      "loss": 2.9882,
      "step": 111918
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7961206436157227,
      "learning_rate": 0.00031342278777083267,
      "loss": 3.104,
      "step": 111919
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.364125967025757,
      "learning_rate": 0.0003134187013039178,
      "loss": 3.0574,
      "step": 111920
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.93888258934021,
      "learning_rate": 0.0003134146148345083,
      "loss": 2.9626,
      "step": 111921
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.790917992591858,
      "learning_rate": 0.0003134105283626047,
      "loss": 2.9995,
      "step": 111922
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.921980857849121,
      "learning_rate": 0.00031340644188820784,
      "loss": 2.9296,
      "step": 111923
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.138766288757324,
      "learning_rate": 0.00031340235541131844,
      "loss": 2.7601,
      "step": 111924
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.4049251079559326,
      "learning_rate": 0.00031339826893193726,
      "loss": 2.9639,
      "step": 111925
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9427071809768677,
      "learning_rate": 0.0003133941824500652,
      "loss": 3.0058,
      "step": 111926
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8285106420516968,
      "learning_rate": 0.00031339009596570276,
      "loss": 3.0104,
      "step": 111927
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1475110054016113,
      "learning_rate": 0.0003133860094788509,
      "loss": 3.1131,
      "step": 111928
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.927615761756897,
      "learning_rate": 0.0003133819229895104,
      "loss": 3.0479,
      "step": 111929
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7229125499725342,
      "learning_rate": 0.0003133778364976819,
      "loss": 2.8141,
      "step": 111930
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.902327537536621,
      "learning_rate": 0.0003133737500033662,
      "loss": 3.0093,
      "step": 111931
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.4797847270965576,
      "learning_rate": 0.0003133696635065641,
      "loss": 3.1444,
      "step": 111932
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8498400449752808,
      "learning_rate": 0.0003133655770072763,
      "loss": 3.1402,
      "step": 111933
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.588678240776062,
      "learning_rate": 0.0003133614905055036,
      "loss": 3.1153,
      "step": 111934
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.624463677406311,
      "learning_rate": 0.0003133574040012467,
      "loss": 2.9265,
      "step": 111935
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.605305790901184,
      "learning_rate": 0.0003133533174945065,
      "loss": 3.3441,
      "step": 111936
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.651460886001587,
      "learning_rate": 0.0003133492309852836,
      "loss": 2.9366,
      "step": 111937
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6938132047653198,
      "learning_rate": 0.00031334514447357875,
      "loss": 3.1003,
      "step": 111938
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1032538414001465,
      "learning_rate": 0.000313341057959393,
      "loss": 3.0713,
      "step": 111939
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.4161794185638428,
      "learning_rate": 0.0003133369714427267,
      "loss": 3.0616,
      "step": 111940
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3958864212036133,
      "learning_rate": 0.00031333288492358085,
      "loss": 3.1426,
      "step": 111941
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.5723090171813965,
      "learning_rate": 0.00031332879840195623,
      "loss": 3.2341,
      "step": 111942
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1384212970733643,
      "learning_rate": 0.00031332471187785346,
      "loss": 3.0754,
      "step": 111943
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.406310796737671,
      "learning_rate": 0.0003133206253512734,
      "loss": 2.9463,
      "step": 111944
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.283376932144165,
      "learning_rate": 0.00031331653882221683,
      "loss": 2.9773,
      "step": 111945
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7372537851333618,
      "learning_rate": 0.00031331245229068445,
      "loss": 3.0186,
      "step": 111946
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3348026275634766,
      "learning_rate": 0.000313308365756677,
      "loss": 3.0942,
      "step": 111947
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9860608577728271,
      "learning_rate": 0.0003133042792201953,
      "loss": 3.1718,
      "step": 111948
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8626052141189575,
      "learning_rate": 0.0003133001926812401,
      "loss": 2.9914,
      "step": 111949
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0686590671539307,
      "learning_rate": 0.000313296106139812,
      "loss": 3.1393,
      "step": 111950
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9618017673492432,
      "learning_rate": 0.0003132920195959121,
      "loss": 3.2531,
      "step": 111951
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7071627378463745,
      "learning_rate": 0.0003132879330495408,
      "loss": 3.1902,
      "step": 111952
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9664020538330078,
      "learning_rate": 0.00031328384650069916,
      "loss": 3.016,
      "step": 111953
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.623545527458191,
      "learning_rate": 0.0003132797599493878,
      "loss": 3.032,
      "step": 111954
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8855583667755127,
      "learning_rate": 0.00031327567339560735,
      "loss": 3.1548,
      "step": 111955
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.7642343044281006,
      "learning_rate": 0.00031327158683935874,
      "loss": 3.0838,
      "step": 111956
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7589070796966553,
      "learning_rate": 0.0003132675002806428,
      "loss": 2.8601,
      "step": 111957
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.299140691757202,
      "learning_rate": 0.0003132634137194601,
      "loss": 2.7904,
      "step": 111958
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.5424580574035645,
      "learning_rate": 0.0003132593271558115,
      "loss": 2.8568,
      "step": 111959
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0319597721099854,
      "learning_rate": 0.0003132552405896977,
      "loss": 3.2543,
      "step": 111960
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.310227870941162,
      "learning_rate": 0.0003132511540211196,
      "loss": 3.1112,
      "step": 111961
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.028146505355835,
      "learning_rate": 0.00031324706745007775,
      "loss": 3.0881,
      "step": 111962
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.058544635772705,
      "learning_rate": 0.0003132429808765731,
      "loss": 2.8906,
      "step": 111963
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.7319176197052,
      "learning_rate": 0.00031323889430060625,
      "loss": 2.9931,
      "step": 111964
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.3224785327911377,
      "learning_rate": 0.00031323480772217806,
      "loss": 2.9027,
      "step": 111965
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.5533244609832764,
      "learning_rate": 0.0003132307211412893,
      "loss": 3.0243,
      "step": 111966
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.83902907371521,
      "learning_rate": 0.0003132266345579407,
      "loss": 2.93,
      "step": 111967
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6229448318481445,
      "learning_rate": 0.000313222547972133,
      "loss": 2.8415,
      "step": 111968
    },
    {
      "epoch": 1.46,
      "grad_norm": 4.240537643432617,
      "learning_rate": 0.000313218461383867,
      "loss": 2.9769,
      "step": 111969
    },
    {
      "epoch": 1.46,
      "grad_norm": 4.127963542938232,
      "learning_rate": 0.0003132143747931434,
      "loss": 3.0535,
      "step": 111970
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.4177944660186768,
      "learning_rate": 0.000313210288199963,
      "loss": 2.8569,
      "step": 111971
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0569379329681396,
      "learning_rate": 0.00031320620160432667,
      "loss": 3.0483,
      "step": 111972
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.069418430328369,
      "learning_rate": 0.00031320211500623495,
      "loss": 2.8805,
      "step": 111973
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.602248430252075,
      "learning_rate": 0.0003131980284056887,
      "loss": 2.9897,
      "step": 111974
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9276820421218872,
      "learning_rate": 0.00031319394180268876,
      "loss": 3.1335,
      "step": 111975
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8230787515640259,
      "learning_rate": 0.00031318985519723574,
      "loss": 2.9617,
      "step": 111976
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0111632347106934,
      "learning_rate": 0.0003131857685893305,
      "loss": 3.0703,
      "step": 111977
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.758258819580078,
      "learning_rate": 0.00031318168197897383,
      "loss": 3.1208,
      "step": 111978
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.159937858581543,
      "learning_rate": 0.00031317759536616635,
      "loss": 2.845,
      "step": 111979
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.7496349811553955,
      "learning_rate": 0.00031317350875090895,
      "loss": 2.9217,
      "step": 111980
    },
    {
      "epoch": 1.46,
      "grad_norm": 4.123720169067383,
      "learning_rate": 0.0003131694221332024,
      "loss": 3.0634,
      "step": 111981
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.7081823348999023,
      "learning_rate": 0.0003131653355130473,
      "loss": 2.8809,
      "step": 111982
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.8899481296539307,
      "learning_rate": 0.0003131612488904446,
      "loss": 2.9666,
      "step": 111983
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2808940410614014,
      "learning_rate": 0.00031315716226539497,
      "loss": 3.1386,
      "step": 111984
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.824468970298767,
      "learning_rate": 0.0003131530756378991,
      "loss": 2.8361,
      "step": 111985
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0765068531036377,
      "learning_rate": 0.00031314898900795785,
      "loss": 2.9262,
      "step": 111986
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.6424973011016846,
      "learning_rate": 0.000313144902375572,
      "loss": 3.1626,
      "step": 111987
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3774194717407227,
      "learning_rate": 0.0003131408157407422,
      "loss": 3.0549,
      "step": 111988
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7787178754806519,
      "learning_rate": 0.0003131367291034693,
      "loss": 3.1265,
      "step": 111989
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8311630487442017,
      "learning_rate": 0.0003131326424637541,
      "loss": 2.9608,
      "step": 111990
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.958458185195923,
      "learning_rate": 0.00031312855582159727,
      "loss": 2.8763,
      "step": 111991
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.734009027481079,
      "learning_rate": 0.0003131244691769995,
      "loss": 2.9926,
      "step": 111992
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7858011722564697,
      "learning_rate": 0.00031312038252996176,
      "loss": 3.3601,
      "step": 111993
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.64386785030365,
      "learning_rate": 0.0003131162958804846,
      "loss": 2.857,
      "step": 111994
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.9079678058624268,
      "learning_rate": 0.0003131122092285689,
      "loss": 3.2031,
      "step": 111995
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.1000783443450928,
      "learning_rate": 0.0003131081225742155,
      "loss": 3.1232,
      "step": 111996
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.613402009010315,
      "learning_rate": 0.00031310403591742493,
      "loss": 3.0207,
      "step": 111997
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2102694511413574,
      "learning_rate": 0.0003130999492581981,
      "loss": 2.9455,
      "step": 111998
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.8356032371520996,
      "learning_rate": 0.0003130958625965357,
      "loss": 2.8255,
      "step": 111999
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.805741548538208,
      "learning_rate": 0.0003130917759324386,
      "loss": 3.1705,
      "step": 112000
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0332188606262207,
      "learning_rate": 0.00031308768926590746,
      "loss": 2.908,
      "step": 112001
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6376636028289795,
      "learning_rate": 0.00031308360259694307,
      "loss": 3.1035,
      "step": 112002
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8929128646850586,
      "learning_rate": 0.0003130795159255463,
      "loss": 2.9741,
      "step": 112003
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.599946975708008,
      "learning_rate": 0.0003130754292517177,
      "loss": 2.7897,
      "step": 112004
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1484479904174805,
      "learning_rate": 0.00031307134257545807,
      "loss": 2.9264,
      "step": 112005
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2872302532196045,
      "learning_rate": 0.00031306725589676834,
      "loss": 2.9856,
      "step": 112006
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1139893531799316,
      "learning_rate": 0.0003130631692156491,
      "loss": 3.2217,
      "step": 112007
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.158327341079712,
      "learning_rate": 0.00031305908253210113,
      "loss": 2.9441,
      "step": 112008
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.237295627593994,
      "learning_rate": 0.00031305499584612524,
      "loss": 2.9146,
      "step": 112009
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8590396642684937,
      "learning_rate": 0.0003130509091577223,
      "loss": 2.9996,
      "step": 112010
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.546846866607666,
      "learning_rate": 0.0003130468224668929,
      "loss": 3.0271,
      "step": 112011
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9959958791732788,
      "learning_rate": 0.00031304273577363774,
      "loss": 3.2361,
      "step": 112012
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8417575359344482,
      "learning_rate": 0.00031303864907795785,
      "loss": 3.0763,
      "step": 112013
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.945452094078064,
      "learning_rate": 0.0003130345623798537,
      "loss": 3.0934,
      "step": 112014
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8168468475341797,
      "learning_rate": 0.0003130304756793263,
      "loss": 2.9076,
      "step": 112015
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.018540143966675,
      "learning_rate": 0.00031302638897637617,
      "loss": 2.8966,
      "step": 112016
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7821109294891357,
      "learning_rate": 0.0003130223022710042,
      "loss": 3.0272,
      "step": 112017
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.5587263107299805,
      "learning_rate": 0.0003130182155632112,
      "loss": 3.0913,
      "step": 112018
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6890960931777954,
      "learning_rate": 0.00031301412885299785,
      "loss": 3.2859,
      "step": 112019
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8750454187393188,
      "learning_rate": 0.00031301004214036494,
      "loss": 3.1609,
      "step": 112020
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0047614574432373,
      "learning_rate": 0.0003130059554253132,
      "loss": 2.9409,
      "step": 112021
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.709447979927063,
      "learning_rate": 0.00031300186870784343,
      "loss": 2.8703,
      "step": 112022
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.5980851650238037,
      "learning_rate": 0.00031299778198795624,
      "loss": 3.0116,
      "step": 112023
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.7016751766204834,
      "learning_rate": 0.0003129936952656527,
      "loss": 2.9361,
      "step": 112024
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2578909397125244,
      "learning_rate": 0.0003129896085409333,
      "loss": 2.8767,
      "step": 112025
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1393239498138428,
      "learning_rate": 0.0003129855218137989,
      "loss": 2.9836,
      "step": 112026
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.4482603073120117,
      "learning_rate": 0.00031298143508425025,
      "loss": 2.9714,
      "step": 112027
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.7367303371429443,
      "learning_rate": 0.0003129773483522881,
      "loss": 3.2505,
      "step": 112028
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.179988384246826,
      "learning_rate": 0.0003129732616179132,
      "loss": 3.1494,
      "step": 112029
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.875279188156128,
      "learning_rate": 0.00031296917488112636,
      "loss": 2.8578,
      "step": 112030
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.948267698287964,
      "learning_rate": 0.0003129650881419283,
      "loss": 2.972,
      "step": 112031
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0564169883728027,
      "learning_rate": 0.0003129610014003197,
      "loss": 3.1411,
      "step": 112032
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8974441289901733,
      "learning_rate": 0.0003129569146563016,
      "loss": 3.1633,
      "step": 112033
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.587167501449585,
      "learning_rate": 0.00031295282790987436,
      "loss": 3.1164,
      "step": 112034
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8995180130004883,
      "learning_rate": 0.000312948741161039,
      "loss": 3.0632,
      "step": 112035
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8514351844787598,
      "learning_rate": 0.00031294465440979627,
      "loss": 3.0211,
      "step": 112036
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.282853603363037,
      "learning_rate": 0.00031294056765614686,
      "loss": 3.2554,
      "step": 112037
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1838839054107666,
      "learning_rate": 0.00031293648090009155,
      "loss": 2.8811,
      "step": 112038
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9608253240585327,
      "learning_rate": 0.00031293239414163115,
      "loss": 3.1725,
      "step": 112039
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0297629833221436,
      "learning_rate": 0.0003129283073807663,
      "loss": 2.7343,
      "step": 112040
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.136213779449463,
      "learning_rate": 0.00031292422061749786,
      "loss": 3.0491,
      "step": 112041
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3075952529907227,
      "learning_rate": 0.00031292013385182666,
      "loss": 3.1738,
      "step": 112042
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9129612445831299,
      "learning_rate": 0.00031291604708375323,
      "loss": 3.0915,
      "step": 112043
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8575241565704346,
      "learning_rate": 0.00031291196031327847,
      "loss": 2.9038,
      "step": 112044
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9062981605529785,
      "learning_rate": 0.0003129078735404032,
      "loss": 2.856,
      "step": 112045
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9284672737121582,
      "learning_rate": 0.0003129037867651281,
      "loss": 2.8087,
      "step": 112046
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6429107189178467,
      "learning_rate": 0.00031289969998745385,
      "loss": 2.8612,
      "step": 112047
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.651825428009033,
      "learning_rate": 0.00031289561320738146,
      "loss": 3.01,
      "step": 112048
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.4902803897857666,
      "learning_rate": 0.0003128915264249114,
      "loss": 2.9311,
      "step": 112049
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.801190137863159,
      "learning_rate": 0.0003128874396400446,
      "loss": 2.8906,
      "step": 112050
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1284902095794678,
      "learning_rate": 0.0003128833528527819,
      "loss": 3.0724,
      "step": 112051
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.402622699737549,
      "learning_rate": 0.0003128792660631238,
      "loss": 2.825,
      "step": 112052
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.498521566390991,
      "learning_rate": 0.00031287517927107124,
      "loss": 2.9279,
      "step": 112053
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.11720871925354,
      "learning_rate": 0.000312871092476625,
      "loss": 2.7502,
      "step": 112054
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6582828760147095,
      "learning_rate": 0.0003128670056797856,
      "loss": 2.7316,
      "step": 112055
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9155200719833374,
      "learning_rate": 0.0003128629188805541,
      "loss": 3.2339,
      "step": 112056
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9967564344406128,
      "learning_rate": 0.0003128588320789312,
      "loss": 2.7498,
      "step": 112057
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1735639572143555,
      "learning_rate": 0.00031285474527491757,
      "loss": 2.8763,
      "step": 112058
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1785030364990234,
      "learning_rate": 0.0003128506584685139,
      "loss": 3.0144,
      "step": 112059
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.099700927734375,
      "learning_rate": 0.0003128465716597212,
      "loss": 3.0294,
      "step": 112060
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9492799043655396,
      "learning_rate": 0.00031284248484854,
      "loss": 3.0377,
      "step": 112061
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0705723762512207,
      "learning_rate": 0.0003128383980349711,
      "loss": 2.8917,
      "step": 112062
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7885340452194214,
      "learning_rate": 0.0003128343112190154,
      "loss": 2.8302,
      "step": 112063
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8286244869232178,
      "learning_rate": 0.00031283022440067344,
      "loss": 3.1456,
      "step": 112064
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7407922744750977,
      "learning_rate": 0.0003128261375799462,
      "loss": 2.8315,
      "step": 112065
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.285917282104492,
      "learning_rate": 0.00031282205075683426,
      "loss": 2.9373,
      "step": 112066
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.308231830596924,
      "learning_rate": 0.00031281796393133857,
      "loss": 3.1112,
      "step": 112067
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9489284753799438,
      "learning_rate": 0.00031281387710345963,
      "loss": 2.8925,
      "step": 112068
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.870764970779419,
      "learning_rate": 0.00031280979027319844,
      "loss": 2.9627,
      "step": 112069
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0986433029174805,
      "learning_rate": 0.0003128057034405557,
      "loss": 3.0371,
      "step": 112070
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.5606937408447266,
      "learning_rate": 0.00031280161660553207,
      "loss": 3.1083,
      "step": 112071
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.768690586090088,
      "learning_rate": 0.00031279752976812837,
      "loss": 2.8707,
      "step": 112072
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.106462240219116,
      "learning_rate": 0.00031279344292834543,
      "loss": 3.0311,
      "step": 112073
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.6395092010498047,
      "learning_rate": 0.00031278935608618387,
      "loss": 3.0628,
      "step": 112074
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2782158851623535,
      "learning_rate": 0.0003127852692416446,
      "loss": 3.0292,
      "step": 112075
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.7486491203308105,
      "learning_rate": 0.00031278118239472827,
      "loss": 3.1623,
      "step": 112076
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.088285446166992,
      "learning_rate": 0.0003127770955454357,
      "loss": 2.8213,
      "step": 112077
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0282676219940186,
      "learning_rate": 0.00031277300869376757,
      "loss": 2.8944,
      "step": 112078
    },
    {
      "epoch": 1.46,
      "grad_norm": 4.704416275024414,
      "learning_rate": 0.0003127689218397248,
      "loss": 3.3581,
      "step": 112079
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.6777021884918213,
      "learning_rate": 0.00031276483498330796,
      "loss": 3.0175,
      "step": 112080
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8989262580871582,
      "learning_rate": 0.0003127607481245179,
      "loss": 3.0664,
      "step": 112081
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.0333187580108643,
      "learning_rate": 0.0003127566612633554,
      "loss": 2.7744,
      "step": 112082
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8480230569839478,
      "learning_rate": 0.00031275257439982124,
      "loss": 3.0887,
      "step": 112083
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3056490421295166,
      "learning_rate": 0.0003127484875339161,
      "loss": 3.2591,
      "step": 112084
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0748817920684814,
      "learning_rate": 0.00031274440066564075,
      "loss": 3.0853,
      "step": 112085
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.769365668296814,
      "learning_rate": 0.00031274031379499595,
      "loss": 3.0516,
      "step": 112086
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1654746532440186,
      "learning_rate": 0.0003127362269219825,
      "loss": 3.0796,
      "step": 112087
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.037910223007202,
      "learning_rate": 0.00031273214004660124,
      "loss": 2.9668,
      "step": 112088
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0765202045440674,
      "learning_rate": 0.00031272805316885275,
      "loss": 3.1141,
      "step": 112089
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8420217037200928,
      "learning_rate": 0.0003127239662887378,
      "loss": 3.3612,
      "step": 112090
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.095625638961792,
      "learning_rate": 0.0003127198794062574,
      "loss": 2.9584,
      "step": 112091
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.699462652206421,
      "learning_rate": 0.000312715792521412,
      "loss": 3.0168,
      "step": 112092
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1039891242980957,
      "learning_rate": 0.00031271170563420257,
      "loss": 3.1136,
      "step": 112093
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.487598180770874,
      "learning_rate": 0.0003127076187446298,
      "loss": 3.1063,
      "step": 112094
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8326990604400635,
      "learning_rate": 0.00031270353185269443,
      "loss": 2.999,
      "step": 112095
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9664515256881714,
      "learning_rate": 0.00031269944495839716,
      "loss": 2.9698,
      "step": 112096
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.126713275909424,
      "learning_rate": 0.0003126953580617389,
      "loss": 3.0705,
      "step": 112097
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.068986415863037,
      "learning_rate": 0.00031269127116272035,
      "loss": 2.8706,
      "step": 112098
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9529249668121338,
      "learning_rate": 0.00031268718426134216,
      "loss": 2.9525,
      "step": 112099
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2959585189819336,
      "learning_rate": 0.0003126830973576052,
      "loss": 3.0301,
      "step": 112100
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.153533458709717,
      "learning_rate": 0.0003126790104515102,
      "loss": 3.1289,
      "step": 112101
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9251981973648071,
      "learning_rate": 0.00031267492354305804,
      "loss": 2.9824,
      "step": 112102
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.729953646659851,
      "learning_rate": 0.00031267083663224935,
      "loss": 3.0558,
      "step": 112103
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.08691143989563,
      "learning_rate": 0.00031266674971908485,
      "loss": 2.8976,
      "step": 112104
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6797893047332764,
      "learning_rate": 0.0003126626628035654,
      "loss": 3.0708,
      "step": 112105
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8613147735595703,
      "learning_rate": 0.0003126585758856917,
      "loss": 3.0335,
      "step": 112106
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7643036842346191,
      "learning_rate": 0.0003126544889654645,
      "loss": 3.0284,
      "step": 112107
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8234825134277344,
      "learning_rate": 0.00031265040204288464,
      "loss": 2.9423,
      "step": 112108
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0714011192321777,
      "learning_rate": 0.0003126463151179528,
      "loss": 2.9455,
      "step": 112109
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2028045654296875,
      "learning_rate": 0.00031264222819066983,
      "loss": 3.0095,
      "step": 112110
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.877606153488159,
      "learning_rate": 0.00031263814126103635,
      "loss": 3.0302,
      "step": 112111
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.7265427112579346,
      "learning_rate": 0.0003126340543290533,
      "loss": 2.8104,
      "step": 112112
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7815369367599487,
      "learning_rate": 0.00031262996739472133,
      "loss": 3.0193,
      "step": 112113
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.297750473022461,
      "learning_rate": 0.0003126258804580411,
      "loss": 3.0141,
      "step": 112114
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.078848361968994,
      "learning_rate": 0.00031262179351901364,
      "loss": 2.8629,
      "step": 112115
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0866377353668213,
      "learning_rate": 0.00031261770657763945,
      "loss": 3.0817,
      "step": 112116
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.880212426185608,
      "learning_rate": 0.0003126136196339193,
      "loss": 3.1648,
      "step": 112117
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.410203218460083,
      "learning_rate": 0.0003126095326878542,
      "loss": 3.1886,
      "step": 112118
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.6211509704589844,
      "learning_rate": 0.0003126054457394447,
      "loss": 2.9888,
      "step": 112119
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.351369619369507,
      "learning_rate": 0.00031260135878869154,
      "loss": 2.803,
      "step": 112120
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7212485074996948,
      "learning_rate": 0.00031259727183559566,
      "loss": 2.8474,
      "step": 112121
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.5476114749908447,
      "learning_rate": 0.00031259318488015763,
      "loss": 3.0847,
      "step": 112122
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.583339214324951,
      "learning_rate": 0.0003125890979223783,
      "loss": 2.8052,
      "step": 112123
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1056737899780273,
      "learning_rate": 0.00031258501096225854,
      "loss": 2.9067,
      "step": 112124
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.400477886199951,
      "learning_rate": 0.00031258092399979884,
      "loss": 2.8765,
      "step": 112125
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.978350281715393,
      "learning_rate": 0.00031257683703500013,
      "loss": 2.9513,
      "step": 112126
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8981024026870728,
      "learning_rate": 0.00031257275006786324,
      "loss": 3.065,
      "step": 112127
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6428405046463013,
      "learning_rate": 0.0003125686630983887,
      "loss": 2.8456,
      "step": 112128
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.366098642349243,
      "learning_rate": 0.0003125645761265775,
      "loss": 2.8617,
      "step": 112129
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.734879732131958,
      "learning_rate": 0.00031256048915243034,
      "loss": 2.8963,
      "step": 112130
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7883925437927246,
      "learning_rate": 0.00031255640217594784,
      "loss": 2.6889,
      "step": 112131
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6281007528305054,
      "learning_rate": 0.00031255231519713094,
      "loss": 3.0138,
      "step": 112132
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0981991291046143,
      "learning_rate": 0.0003125482282159803,
      "loss": 3.0584,
      "step": 112133
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8906326293945312,
      "learning_rate": 0.0003125441412324967,
      "loss": 2.9433,
      "step": 112134
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8088021278381348,
      "learning_rate": 0.0003125400542466809,
      "loss": 3.2451,
      "step": 112135
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6007869243621826,
      "learning_rate": 0.0003125359672585337,
      "loss": 3.2015,
      "step": 112136
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2978246212005615,
      "learning_rate": 0.00031253188026805594,
      "loss": 2.4376,
      "step": 112137
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0102503299713135,
      "learning_rate": 0.00031252779327524806,
      "loss": 2.9801,
      "step": 112138
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9324833154678345,
      "learning_rate": 0.0003125237062801111,
      "loss": 2.9102,
      "step": 112139
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.897797703742981,
      "learning_rate": 0.0003125196192826458,
      "loss": 2.9429,
      "step": 112140
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0524938106536865,
      "learning_rate": 0.0003125155322828528,
      "loss": 2.8396,
      "step": 112141
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6058648824691772,
      "learning_rate": 0.00031251144528073293,
      "loss": 3.0847,
      "step": 112142
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.4299392700195312,
      "learning_rate": 0.000312507358276287,
      "loss": 2.9066,
      "step": 112143
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.8897716999053955,
      "learning_rate": 0.00031250327126951575,
      "loss": 3.1273,
      "step": 112144
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.474858283996582,
      "learning_rate": 0.0003124991842604198,
      "loss": 2.9385,
      "step": 112145
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2971489429473877,
      "learning_rate": 0.00031249509724900007,
      "loss": 2.946,
      "step": 112146
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.720390796661377,
      "learning_rate": 0.0003124910102352572,
      "loss": 3.0238,
      "step": 112147
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.9122111797332764,
      "learning_rate": 0.00031248692321919207,
      "loss": 2.8903,
      "step": 112148
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.5160350799560547,
      "learning_rate": 0.0003124828362008055,
      "loss": 2.8845,
      "step": 112149
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7243576049804688,
      "learning_rate": 0.00031247874918009794,
      "loss": 3.0499,
      "step": 112150
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.229649066925049,
      "learning_rate": 0.0003124746621570704,
      "loss": 3.378,
      "step": 112151
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9076963663101196,
      "learning_rate": 0.0003124705751317236,
      "loss": 2.9855,
      "step": 112152
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8024885654449463,
      "learning_rate": 0.0003124664881040583,
      "loss": 2.9778,
      "step": 112153
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.458738327026367,
      "learning_rate": 0.0003124624010740752,
      "loss": 2.9884,
      "step": 112154
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.5836890935897827,
      "learning_rate": 0.0003124583140417752,
      "loss": 3.0477,
      "step": 112155
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9259531497955322,
      "learning_rate": 0.000312454227007159,
      "loss": 2.8476,
      "step": 112156
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6174075603485107,
      "learning_rate": 0.0003124501399702271,
      "loss": 3.0616,
      "step": 112157
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.998732566833496,
      "learning_rate": 0.0003124460529309807,
      "loss": 2.8912,
      "step": 112158
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6915948390960693,
      "learning_rate": 0.00031244196588942026,
      "loss": 3.0226,
      "step": 112159
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9716986417770386,
      "learning_rate": 0.00031243787884554657,
      "loss": 2.7397,
      "step": 112160
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.630608558654785,
      "learning_rate": 0.00031243379179936054,
      "loss": 2.8123,
      "step": 112161
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9164875745773315,
      "learning_rate": 0.0003124297047508628,
      "loss": 3.2436,
      "step": 112162
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9135289192199707,
      "learning_rate": 0.0003124256177000541,
      "loss": 3.1174,
      "step": 112163
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8345266580581665,
      "learning_rate": 0.0003124215306469353,
      "loss": 3.0699,
      "step": 112164
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.713605523109436,
      "learning_rate": 0.0003124174435915071,
      "loss": 3.0303,
      "step": 112165
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9785916805267334,
      "learning_rate": 0.0003124133565337702,
      "loss": 2.9771,
      "step": 112166
    },
    {
      "epoch": 1.46,
      "grad_norm": 4.006155014038086,
      "learning_rate": 0.00031240926947372554,
      "loss": 2.9845,
      "step": 112167
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9662725925445557,
      "learning_rate": 0.00031240518241137363,
      "loss": 3.07,
      "step": 112168
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1486551761627197,
      "learning_rate": 0.0003124010953467154,
      "loss": 3.1905,
      "step": 112169
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9240609407424927,
      "learning_rate": 0.0003123970082797517,
      "loss": 3.0479,
      "step": 112170
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.922081470489502,
      "learning_rate": 0.000312392921210483,
      "loss": 2.7227,
      "step": 112171
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9121160507202148,
      "learning_rate": 0.0003123888341389102,
      "loss": 2.9113,
      "step": 112172
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.815373182296753,
      "learning_rate": 0.0003123847470650342,
      "loss": 2.9198,
      "step": 112173
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9874581098556519,
      "learning_rate": 0.00031238065998885563,
      "loss": 2.9355,
      "step": 112174
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.209674119949341,
      "learning_rate": 0.0003123765729103752,
      "loss": 3.2319,
      "step": 112175
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7860623598098755,
      "learning_rate": 0.0003123724858295938,
      "loss": 3.0319,
      "step": 112176
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.438544511795044,
      "learning_rate": 0.0003123683987465121,
      "loss": 2.8377,
      "step": 112177
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.300955295562744,
      "learning_rate": 0.0003123643116611308,
      "loss": 3.0978,
      "step": 112178
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.4530904293060303,
      "learning_rate": 0.00031236022457345084,
      "loss": 2.9797,
      "step": 112179
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7932462692260742,
      "learning_rate": 0.00031235613748347286,
      "loss": 3.0777,
      "step": 112180
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9471372365951538,
      "learning_rate": 0.00031235205039119757,
      "loss": 2.996,
      "step": 112181
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1221041679382324,
      "learning_rate": 0.0003123479632966259,
      "loss": 2.9876,
      "step": 112182
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1939151287078857,
      "learning_rate": 0.00031234387619975845,
      "loss": 2.8594,
      "step": 112183
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2328457832336426,
      "learning_rate": 0.0003123397891005961,
      "loss": 3.0267,
      "step": 112184
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.4863169193267822,
      "learning_rate": 0.00031233570199913944,
      "loss": 2.8312,
      "step": 112185
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1532866954803467,
      "learning_rate": 0.0003123316148953895,
      "loss": 3.0924,
      "step": 112186
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0314862728118896,
      "learning_rate": 0.0003123275277893468,
      "loss": 3.1778,
      "step": 112187
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.5321314334869385,
      "learning_rate": 0.0003123234406810121,
      "loss": 3.1292,
      "step": 112188
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.756244421005249,
      "learning_rate": 0.00031231935357038627,
      "loss": 2.9866,
      "step": 112189
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.7914493083953857,
      "learning_rate": 0.00031231526645747015,
      "loss": 3.1065,
      "step": 112190
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9210926294326782,
      "learning_rate": 0.00031231117934226425,
      "loss": 2.9196,
      "step": 112191
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9508960247039795,
      "learning_rate": 0.00031230709222476955,
      "loss": 3.0709,
      "step": 112192
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.8840787410736084,
      "learning_rate": 0.00031230300510498674,
      "loss": 3.1515,
      "step": 112193
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.196669816970825,
      "learning_rate": 0.00031229891798291654,
      "loss": 3.1177,
      "step": 112194
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.24090313911438,
      "learning_rate": 0.00031229483085855976,
      "loss": 2.8454,
      "step": 112195
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9179058074951172,
      "learning_rate": 0.0003122907437319171,
      "loss": 3.0668,
      "step": 112196
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.820556163787842,
      "learning_rate": 0.00031228665660298937,
      "loss": 2.8355,
      "step": 112197
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.407633066177368,
      "learning_rate": 0.00031228256947177734,
      "loss": 3.1567,
      "step": 112198
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.6354682445526123,
      "learning_rate": 0.0003122784823382817,
      "loss": 2.7651,
      "step": 112199
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8166289329528809,
      "learning_rate": 0.0003122743952025033,
      "loss": 2.7665,
      "step": 112200
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.099932909011841,
      "learning_rate": 0.0003122703080644429,
      "loss": 2.8633,
      "step": 112201
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.7156476974487305,
      "learning_rate": 0.0003122662209241011,
      "loss": 2.7596,
      "step": 112202
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6463799476623535,
      "learning_rate": 0.00031226213378147884,
      "loss": 3.0973,
      "step": 112203
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8378338813781738,
      "learning_rate": 0.0003122580466365769,
      "loss": 2.807,
      "step": 112204
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7258944511413574,
      "learning_rate": 0.0003122539594893959,
      "loss": 2.8743,
      "step": 112205
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1125926971435547,
      "learning_rate": 0.0003122498723399366,
      "loss": 3.1125,
      "step": 112206
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.120939254760742,
      "learning_rate": 0.0003122457851881999,
      "loss": 2.9654,
      "step": 112207
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.936588168144226,
      "learning_rate": 0.00031224169803418644,
      "loss": 2.7868,
      "step": 112208
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.4273390769958496,
      "learning_rate": 0.000312237610877897,
      "loss": 2.8654,
      "step": 112209
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.5451390743255615,
      "learning_rate": 0.0003122335237193324,
      "loss": 3.0872,
      "step": 112210
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.146960735321045,
      "learning_rate": 0.0003122294365584933,
      "loss": 2.8878,
      "step": 112211
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.1273117065429688,
      "learning_rate": 0.00031222534939538053,
      "loss": 2.9636,
      "step": 112212
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0707130432128906,
      "learning_rate": 0.00031222126222999494,
      "loss": 2.8118,
      "step": 112213
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6824185848236084,
      "learning_rate": 0.00031221717506233707,
      "loss": 3.1789,
      "step": 112214
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8249858617782593,
      "learning_rate": 0.0003122130878924078,
      "loss": 2.7982,
      "step": 112215
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7690221071243286,
      "learning_rate": 0.000312209000720208,
      "loss": 3.0102,
      "step": 112216
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7865709066390991,
      "learning_rate": 0.0003122049135457382,
      "loss": 3.0406,
      "step": 112217
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.0743210315704346,
      "learning_rate": 0.00031220082636899927,
      "loss": 3.1236,
      "step": 112218
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.4912290573120117,
      "learning_rate": 0.0003121967391899921,
      "loss": 2.783,
      "step": 112219
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.894240379333496,
      "learning_rate": 0.0003121926520087172,
      "loss": 2.9838,
      "step": 112220
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.9615108966827393,
      "learning_rate": 0.00031218856482517543,
      "loss": 3.2533,
      "step": 112221
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.5220732688903809,
      "learning_rate": 0.0003121844776393677,
      "loss": 2.8934,
      "step": 112222
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.5074220895767212,
      "learning_rate": 0.00031218039045129457,
      "loss": 3.2847,
      "step": 112223
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0241987705230713,
      "learning_rate": 0.0003121763032609569,
      "loss": 3.2284,
      "step": 112224
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8305339813232422,
      "learning_rate": 0.0003121722160683555,
      "loss": 2.9391,
      "step": 112225
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7048382759094238,
      "learning_rate": 0.0003121681288734909,
      "loss": 2.9791,
      "step": 112226
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.7758407592773438,
      "learning_rate": 0.0003121640416763641,
      "loss": 3.0321,
      "step": 112227
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0283493995666504,
      "learning_rate": 0.00031215995447697576,
      "loss": 2.8682,
      "step": 112228
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9392269849777222,
      "learning_rate": 0.0003121558672753267,
      "loss": 3.0577,
      "step": 112229
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.4458439350128174,
      "learning_rate": 0.0003121517800714175,
      "loss": 3.1431,
      "step": 112230
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8661603927612305,
      "learning_rate": 0.00031214769286524926,
      "loss": 2.7794,
      "step": 112231
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1100246906280518,
      "learning_rate": 0.00031214360565682235,
      "loss": 2.9774,
      "step": 112232
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3609166145324707,
      "learning_rate": 0.00031213951844613776,
      "loss": 2.8655,
      "step": 112233
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.953204870223999,
      "learning_rate": 0.0003121354312331963,
      "loss": 2.7532,
      "step": 112234
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9308722019195557,
      "learning_rate": 0.00031213134401799855,
      "loss": 2.8965,
      "step": 112235
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9076857566833496,
      "learning_rate": 0.0003121272568005454,
      "loss": 3.1949,
      "step": 112236
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.037238597869873,
      "learning_rate": 0.00031212316958083747,
      "loss": 3.0453,
      "step": 112237
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6283725500106812,
      "learning_rate": 0.00031211908235887566,
      "loss": 3.2319,
      "step": 112238
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9686781167984009,
      "learning_rate": 0.00031211499513466076,
      "loss": 3.2095,
      "step": 112239
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.888516902923584,
      "learning_rate": 0.0003121109079081934,
      "loss": 3.0463,
      "step": 112240
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8869104385375977,
      "learning_rate": 0.0003121068206794744,
      "loss": 2.8792,
      "step": 112241
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.5221110582351685,
      "learning_rate": 0.00031210273344850443,
      "loss": 3.0005,
      "step": 112242
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8275949954986572,
      "learning_rate": 0.0003120986462152845,
      "loss": 2.8296,
      "step": 112243
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.957122802734375,
      "learning_rate": 0.0003120945589798151,
      "loss": 2.8099,
      "step": 112244
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9915423393249512,
      "learning_rate": 0.00031209047174209705,
      "loss": 2.9563,
      "step": 112245
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3506791591644287,
      "learning_rate": 0.0003120863845021313,
      "loss": 3.0379,
      "step": 112246
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.5024046897888184,
      "learning_rate": 0.0003120822972599183,
      "loss": 2.9092,
      "step": 112247
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7419440746307373,
      "learning_rate": 0.00031207821001545905,
      "loss": 2.9808,
      "step": 112248
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7711308002471924,
      "learning_rate": 0.0003120741227687542,
      "loss": 2.904,
      "step": 112249
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.432332754135132,
      "learning_rate": 0.0003120700355198046,
      "loss": 2.8595,
      "step": 112250
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1153130531311035,
      "learning_rate": 0.0003120659482686109,
      "loss": 2.8416,
      "step": 112251
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.268717050552368,
      "learning_rate": 0.00031206186101517396,
      "loss": 3.1568,
      "step": 112252
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.073740005493164,
      "learning_rate": 0.00031205777375949444,
      "loss": 2.9297,
      "step": 112253
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.456155300140381,
      "learning_rate": 0.00031205368650157316,
      "loss": 2.9395,
      "step": 112254
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2099130153656006,
      "learning_rate": 0.00031204959924141095,
      "loss": 3.0526,
      "step": 112255
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.365377187728882,
      "learning_rate": 0.0003120455119790084,
      "loss": 3.0478,
      "step": 112256
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9605978727340698,
      "learning_rate": 0.0003120414247143664,
      "loss": 3.0907,
      "step": 112257
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1963725090026855,
      "learning_rate": 0.0003120373374474856,
      "loss": 2.8949,
      "step": 112258
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.9816901683807373,
      "learning_rate": 0.000312033250178367,
      "loss": 2.9943,
      "step": 112259
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.373291015625,
      "learning_rate": 0.000312029162907011,
      "loss": 2.9445,
      "step": 112260
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9676328897476196,
      "learning_rate": 0.0003120250756334187,
      "loss": 3.0432,
      "step": 112261
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3725016117095947,
      "learning_rate": 0.00031202098835759063,
      "loss": 2.8748,
      "step": 112262
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0071427822113037,
      "learning_rate": 0.0003120169010795276,
      "loss": 3.1601,
      "step": 112263
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.879457712173462,
      "learning_rate": 0.00031201281379923055,
      "loss": 2.9507,
      "step": 112264
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.6939587593078613,
      "learning_rate": 0.00031200872651669995,
      "loss": 2.7944,
      "step": 112265
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.16214656829834,
      "learning_rate": 0.0003120046392319367,
      "loss": 3.2554,
      "step": 112266
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9976269006729126,
      "learning_rate": 0.0003120005519449416,
      "loss": 3.0649,
      "step": 112267
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.98769211769104,
      "learning_rate": 0.0003119964646557155,
      "loss": 3.0646,
      "step": 112268
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.337897777557373,
      "learning_rate": 0.00031199237736425885,
      "loss": 3.0532,
      "step": 112269
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.935021162033081,
      "learning_rate": 0.0003119882900705726,
      "loss": 2.9854,
      "step": 112270
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0913922786712646,
      "learning_rate": 0.0003119842027746576,
      "loss": 3.0347,
      "step": 112271
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.53641414642334,
      "learning_rate": 0.0003119801154765145,
      "loss": 2.9867,
      "step": 112272
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.581804871559143,
      "learning_rate": 0.000311976028176144,
      "loss": 2.9336,
      "step": 112273
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9039762020111084,
      "learning_rate": 0.00031197194087354696,
      "loss": 2.9827,
      "step": 112274
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1306376457214355,
      "learning_rate": 0.0003119678535687241,
      "loss": 2.8865,
      "step": 112275
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.989791989326477,
      "learning_rate": 0.0003119637662616762,
      "loss": 2.7604,
      "step": 112276
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.02169132232666,
      "learning_rate": 0.00031195967895240395,
      "loss": 3.0106,
      "step": 112277
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1990091800689697,
      "learning_rate": 0.0003119555916409083,
      "loss": 2.929,
      "step": 112278
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.1444272994995117,
      "learning_rate": 0.0003119515043271898,
      "loss": 2.6141,
      "step": 112279
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6534382104873657,
      "learning_rate": 0.00031194741701124924,
      "loss": 3.2563,
      "step": 112280
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.061246156692505,
      "learning_rate": 0.0003119433296930875,
      "loss": 2.9282,
      "step": 112281
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7001802921295166,
      "learning_rate": 0.0003119392423727053,
      "loss": 2.9778,
      "step": 112282
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7907284498214722,
      "learning_rate": 0.0003119351550501033,
      "loss": 3.1323,
      "step": 112283
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.850457191467285,
      "learning_rate": 0.0003119310677252823,
      "loss": 2.8939,
      "step": 112284
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1630733013153076,
      "learning_rate": 0.00031192698039824324,
      "loss": 2.8542,
      "step": 112285
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.158234119415283,
      "learning_rate": 0.0003119228930689866,
      "loss": 2.8061,
      "step": 112286
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3255345821380615,
      "learning_rate": 0.0003119188057375133,
      "loss": 3.1059,
      "step": 112287
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9181305170059204,
      "learning_rate": 0.00031191471840382406,
      "loss": 3.191,
      "step": 112288
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6778972148895264,
      "learning_rate": 0.0003119106310679197,
      "loss": 3.1751,
      "step": 112289
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6518661975860596,
      "learning_rate": 0.0003119065437298009,
      "loss": 2.7069,
      "step": 112290
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.5763494968414307,
      "learning_rate": 0.00031190245638946834,
      "loss": 3.1302,
      "step": 112291
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.058414936065674,
      "learning_rate": 0.0003118983690469231,
      "loss": 3.0627,
      "step": 112292
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2687301635742188,
      "learning_rate": 0.00031189428170216556,
      "loss": 2.9786,
      "step": 112293
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0717923641204834,
      "learning_rate": 0.00031189019435519676,
      "loss": 2.7987,
      "step": 112294
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.8214919567108154,
      "learning_rate": 0.00031188610700601726,
      "loss": 3.186,
      "step": 112295
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3667712211608887,
      "learning_rate": 0.00031188201965462795,
      "loss": 2.8282,
      "step": 112296
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9550678730010986,
      "learning_rate": 0.00031187793230102957,
      "loss": 3.0336,
      "step": 112297
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7444912195205688,
      "learning_rate": 0.0003118738449452228,
      "loss": 2.7917,
      "step": 112298
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.5683045387268066,
      "learning_rate": 0.0003118697575872085,
      "loss": 3.1692,
      "step": 112299
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.7617273330688477,
      "learning_rate": 0.00031186567022698734,
      "loss": 2.9942,
      "step": 112300
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8550935983657837,
      "learning_rate": 0.00031186158286456025,
      "loss": 2.995,
      "step": 112301
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6882903575897217,
      "learning_rate": 0.00031185749549992773,
      "loss": 2.9503,
      "step": 112302
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2450742721557617,
      "learning_rate": 0.00031185340813309076,
      "loss": 3.0469,
      "step": 112303
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8094083070755005,
      "learning_rate": 0.00031184932076405,
      "loss": 3.1384,
      "step": 112304
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1955249309539795,
      "learning_rate": 0.00031184523339280616,
      "loss": 2.8127,
      "step": 112305
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7890262603759766,
      "learning_rate": 0.00031184114601936016,
      "loss": 3.3333,
      "step": 112306
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9856007099151611,
      "learning_rate": 0.0003118370586437127,
      "loss": 3.085,
      "step": 112307
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.579557180404663,
      "learning_rate": 0.00031183297126586434,
      "loss": 3.0474,
      "step": 112308
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.944122314453125,
      "learning_rate": 0.00031182888388581615,
      "loss": 2.7608,
      "step": 112309
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.140176773071289,
      "learning_rate": 0.0003118247965035687,
      "loss": 3.1664,
      "step": 112310
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.615328550338745,
      "learning_rate": 0.0003118207091191228,
      "loss": 2.9466,
      "step": 112311
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.236448049545288,
      "learning_rate": 0.00031181662173247916,
      "loss": 2.8029,
      "step": 112312
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9191033840179443,
      "learning_rate": 0.0003118125343436387,
      "loss": 3.2961,
      "step": 112313
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.422466278076172,
      "learning_rate": 0.000311808446952602,
      "loss": 2.7366,
      "step": 112314
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.6123063564300537,
      "learning_rate": 0.00031180435955936985,
      "loss": 2.8736,
      "step": 112315
    },
    {
      "epoch": 1.46,
      "grad_norm": 4.082957744598389,
      "learning_rate": 0.0003118002721639431,
      "loss": 2.8974,
      "step": 112316
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2149035930633545,
      "learning_rate": 0.0003117961847663224,
      "loss": 2.9407,
      "step": 112317
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.9287121295928955,
      "learning_rate": 0.0003117920973665086,
      "loss": 2.9267,
      "step": 112318
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.4665820598602295,
      "learning_rate": 0.0003117880099645025,
      "loss": 2.9693,
      "step": 112319
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.353379726409912,
      "learning_rate": 0.00031178392256030473,
      "loss": 2.9374,
      "step": 112320
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7041451930999756,
      "learning_rate": 0.000311779835153916,
      "loss": 2.7538,
      "step": 112321
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.7635042667388916,
      "learning_rate": 0.00031177574774533736,
      "loss": 2.957,
      "step": 112322
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2345147132873535,
      "learning_rate": 0.0003117716603345693,
      "loss": 2.8611,
      "step": 112323
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6518163681030273,
      "learning_rate": 0.0003117675729216126,
      "loss": 3.0715,
      "step": 112324
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.92300283908844,
      "learning_rate": 0.0003117634855064682,
      "loss": 2.9472,
      "step": 112325
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2374300956726074,
      "learning_rate": 0.00031175939808913674,
      "loss": 3.1116,
      "step": 112326
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9342546463012695,
      "learning_rate": 0.00031175531066961887,
      "loss": 3.0767,
      "step": 112327
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6470768451690674,
      "learning_rate": 0.0003117512232479156,
      "loss": 2.8473,
      "step": 112328
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6277674436569214,
      "learning_rate": 0.0003117471358240275,
      "loss": 3.1686,
      "step": 112329
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7847734689712524,
      "learning_rate": 0.00031174304839795536,
      "loss": 3.022,
      "step": 112330
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7252745628356934,
      "learning_rate": 0.0003117389609697,
      "loss": 3.1731,
      "step": 112331
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.976709246635437,
      "learning_rate": 0.0003117348735392621,
      "loss": 2.928,
      "step": 112332
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.720659017562866,
      "learning_rate": 0.0003117307861066425,
      "loss": 3.1561,
      "step": 112333
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0168051719665527,
      "learning_rate": 0.0003117266986718419,
      "loss": 2.9922,
      "step": 112334
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7342162132263184,
      "learning_rate": 0.00031172261123486113,
      "loss": 2.9921,
      "step": 112335
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.771058440208435,
      "learning_rate": 0.0003117185237957009,
      "loss": 2.9559,
      "step": 112336
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6976569890975952,
      "learning_rate": 0.00031171443635436187,
      "loss": 3.1093,
      "step": 112337
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.5748826265335083,
      "learning_rate": 0.0003117103489108451,
      "loss": 2.9156,
      "step": 112338
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.91169011592865,
      "learning_rate": 0.000311706261465151,
      "loss": 2.7504,
      "step": 112339
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9391517639160156,
      "learning_rate": 0.00031170217401728053,
      "loss": 3.0464,
      "step": 112340
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.101654052734375,
      "learning_rate": 0.00031169808656723446,
      "loss": 3.019,
      "step": 112341
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1469638347625732,
      "learning_rate": 0.0003116939991150134,
      "loss": 2.8684,
      "step": 112342
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.001466751098633,
      "learning_rate": 0.00031168991166061823,
      "loss": 3.3127,
      "step": 112343
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0396971702575684,
      "learning_rate": 0.0003116858242040497,
      "loss": 2.9365,
      "step": 112344
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1556131839752197,
      "learning_rate": 0.0003116817367453086,
      "loss": 3.09,
      "step": 112345
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0120608806610107,
      "learning_rate": 0.00031167764928439557,
      "loss": 2.9654,
      "step": 112346
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.511549711227417,
      "learning_rate": 0.00031167356182131153,
      "loss": 2.7599,
      "step": 112347
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.415459394454956,
      "learning_rate": 0.000311669474356057,
      "loss": 2.7854,
      "step": 112348
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9902586936950684,
      "learning_rate": 0.00031166538688863303,
      "loss": 2.9163,
      "step": 112349
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6939488649368286,
      "learning_rate": 0.00031166129941904026,
      "loss": 2.9237,
      "step": 112350
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9282439947128296,
      "learning_rate": 0.0003116572119472794,
      "loss": 2.9724,
      "step": 112351
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8258800506591797,
      "learning_rate": 0.0003116531244733512,
      "loss": 2.9686,
      "step": 112352
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8872172832489014,
      "learning_rate": 0.00031164903699725655,
      "loss": 2.8825,
      "step": 112353
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7919036149978638,
      "learning_rate": 0.000311644949518996,
      "loss": 3.1762,
      "step": 112354
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9118181467056274,
      "learning_rate": 0.0003116408620385705,
      "loss": 2.9617,
      "step": 112355
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8164805173873901,
      "learning_rate": 0.00031163677455598083,
      "loss": 3.1584,
      "step": 112356
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1340646743774414,
      "learning_rate": 0.00031163268707122755,
      "loss": 2.9043,
      "step": 112357
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9968456029891968,
      "learning_rate": 0.00031162859958431155,
      "loss": 3.1522,
      "step": 112358
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8162899017333984,
      "learning_rate": 0.0003116245120952336,
      "loss": 2.9612,
      "step": 112359
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6078349351882935,
      "learning_rate": 0.0003116204246039944,
      "loss": 2.8781,
      "step": 112360
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9266109466552734,
      "learning_rate": 0.0003116163371105948,
      "loss": 2.9215,
      "step": 112361
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.63279390335083,
      "learning_rate": 0.0003116122496150354,
      "loss": 2.8172,
      "step": 112362
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.459705352783203,
      "learning_rate": 0.0003116081621173172,
      "loss": 2.8675,
      "step": 112363
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9168018102645874,
      "learning_rate": 0.0003116040746174407,
      "loss": 2.9647,
      "step": 112364
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9080171585083008,
      "learning_rate": 0.00031159998711540685,
      "loss": 3.2041,
      "step": 112365
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.403035879135132,
      "learning_rate": 0.0003115958996112163,
      "loss": 3.0967,
      "step": 112366
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.720633625984192,
      "learning_rate": 0.0003115918121048699,
      "loss": 3.2051,
      "step": 112367
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.41756010055542,
      "learning_rate": 0.0003115877245963683,
      "loss": 3.0496,
      "step": 112368
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9598480463027954,
      "learning_rate": 0.00031158363708571235,
      "loss": 2.9629,
      "step": 112369
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9387108087539673,
      "learning_rate": 0.0003115795495729029,
      "loss": 3.0359,
      "step": 112370
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.408935546875,
      "learning_rate": 0.00031157546205794044,
      "loss": 3.1793,
      "step": 112371
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3056600093841553,
      "learning_rate": 0.00031157137454082594,
      "loss": 3.0692,
      "step": 112372
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1831345558166504,
      "learning_rate": 0.00031156728702156006,
      "loss": 3.1739,
      "step": 112373
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9645717144012451,
      "learning_rate": 0.0003115631995001437,
      "loss": 2.8905,
      "step": 112374
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.543760299682617,
      "learning_rate": 0.0003115591119765775,
      "loss": 2.901,
      "step": 112375
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.125075578689575,
      "learning_rate": 0.00031155502445086216,
      "loss": 2.8734,
      "step": 112376
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.177333354949951,
      "learning_rate": 0.0003115509369229986,
      "loss": 2.9229,
      "step": 112377
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.4543051719665527,
      "learning_rate": 0.0003115468493929875,
      "loss": 3.0722,
      "step": 112378
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.185776710510254,
      "learning_rate": 0.0003115427618608295,
      "loss": 2.8654,
      "step": 112379
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3064868450164795,
      "learning_rate": 0.0003115386743265257,
      "loss": 3.0666,
      "step": 112380
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.469036102294922,
      "learning_rate": 0.0003115345867900765,
      "loss": 2.7256,
      "step": 112381
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6850165128707886,
      "learning_rate": 0.0003115304992514828,
      "loss": 2.9525,
      "step": 112382
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9922157526016235,
      "learning_rate": 0.0003115264117107454,
      "loss": 2.8503,
      "step": 112383
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.78848397731781,
      "learning_rate": 0.0003115223241678651,
      "loss": 3.1024,
      "step": 112384
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.4956814050674438,
      "learning_rate": 0.0003115182366228424,
      "loss": 3.0254,
      "step": 112385
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9502936601638794,
      "learning_rate": 0.0003115141490756784,
      "loss": 3.1129,
      "step": 112386
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9033608436584473,
      "learning_rate": 0.00031151006152637365,
      "loss": 2.9197,
      "step": 112387
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7362849712371826,
      "learning_rate": 0.0003115059739749289,
      "loss": 3.0721,
      "step": 112388
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.864412784576416,
      "learning_rate": 0.0003115018864213451,
      "loss": 2.78,
      "step": 112389
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7497382164001465,
      "learning_rate": 0.00031149779886562273,
      "loss": 3.1262,
      "step": 112390
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9046106338500977,
      "learning_rate": 0.00031149371130776283,
      "loss": 2.7687,
      "step": 112391
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8170973062515259,
      "learning_rate": 0.00031148962374776603,
      "loss": 3.0091,
      "step": 112392
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9131379127502441,
      "learning_rate": 0.00031148553618563297,
      "loss": 3.135,
      "step": 112393
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7097272872924805,
      "learning_rate": 0.0003114814486213646,
      "loss": 3.1669,
      "step": 112394
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.540153741836548,
      "learning_rate": 0.0003114773610549617,
      "loss": 2.9302,
      "step": 112395
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.6798574924468994,
      "learning_rate": 0.00031147327348642485,
      "loss": 2.8116,
      "step": 112396
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.181938648223877,
      "learning_rate": 0.00031146918591575483,
      "loss": 3.0296,
      "step": 112397
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8056459426879883,
      "learning_rate": 0.0003114650983429527,
      "loss": 2.9884,
      "step": 112398
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.4010539054870605,
      "learning_rate": 0.0003114610107680187,
      "loss": 2.8001,
      "step": 112399
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0203311443328857,
      "learning_rate": 0.000311456923190954,
      "loss": 2.6793,
      "step": 112400
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.2233474254608154,
      "learning_rate": 0.0003114528356117593,
      "loss": 2.9392,
      "step": 112401
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.410188674926758,
      "learning_rate": 0.0003114487480304353,
      "loss": 2.9793,
      "step": 112402
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3060576915740967,
      "learning_rate": 0.0003114446604469827,
      "loss": 2.9607,
      "step": 112403
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6416345834732056,
      "learning_rate": 0.00031144057286140237,
      "loss": 3.1341,
      "step": 112404
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.4246981143951416,
      "learning_rate": 0.00031143648527369504,
      "loss": 3.2227,
      "step": 112405
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7589043378829956,
      "learning_rate": 0.0003114323976838614,
      "loss": 3.0582,
      "step": 112406
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.538451075553894,
      "learning_rate": 0.00031142831009190223,
      "loss": 2.9901,
      "step": 112407
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.754479169845581,
      "learning_rate": 0.00031142422249781843,
      "loss": 2.9593,
      "step": 112408
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.5794703960418701,
      "learning_rate": 0.0003114201349016106,
      "loss": 2.8748,
      "step": 112409
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6597450971603394,
      "learning_rate": 0.0003114160473032795,
      "loss": 3.003,
      "step": 112410
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.472252368927002,
      "learning_rate": 0.00031141195970282604,
      "loss": 3.1171,
      "step": 112411
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1766648292541504,
      "learning_rate": 0.00031140787210025083,
      "loss": 2.9479,
      "step": 112412
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.055433511734009,
      "learning_rate": 0.00031140378449555464,
      "loss": 3.258,
      "step": 112413
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7941255569458008,
      "learning_rate": 0.00031139969688873836,
      "loss": 2.8895,
      "step": 112414
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9588813781738281,
      "learning_rate": 0.00031139560927980256,
      "loss": 3.0306,
      "step": 112415
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.025801181793213,
      "learning_rate": 0.00031139152166874813,
      "loss": 2.9167,
      "step": 112416
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7346165180206299,
      "learning_rate": 0.0003113874340555759,
      "loss": 2.8228,
      "step": 112417
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8520079851150513,
      "learning_rate": 0.0003113833464402864,
      "loss": 2.7431,
      "step": 112418
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7705222368240356,
      "learning_rate": 0.0003113792588228806,
      "loss": 2.8795,
      "step": 112419
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9427980184555054,
      "learning_rate": 0.0003113751712033592,
      "loss": 3.1676,
      "step": 112420
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6433051824569702,
      "learning_rate": 0.0003113710835817229,
      "loss": 2.8153,
      "step": 112421
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6496340036392212,
      "learning_rate": 0.00031136699595797244,
      "loss": 2.8557,
      "step": 112422
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.948475956916809,
      "learning_rate": 0.0003113629083321087,
      "loss": 3.1448,
      "step": 112423
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.192467451095581,
      "learning_rate": 0.00031135882070413234,
      "loss": 2.8663,
      "step": 112424
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9790457487106323,
      "learning_rate": 0.0003113547330740442,
      "loss": 3.1187,
      "step": 112425
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.7752444744110107,
      "learning_rate": 0.0003113506454418451,
      "loss": 3.0454,
      "step": 112426
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.954601526260376,
      "learning_rate": 0.0003113465578075355,
      "loss": 3.1262,
      "step": 112427
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2639129161834717,
      "learning_rate": 0.0003113424701711165,
      "loss": 2.9862,
      "step": 112428
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.6139488220214844,
      "learning_rate": 0.0003113383825325887,
      "loss": 2.6751,
      "step": 112429
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.0597996711730957,
      "learning_rate": 0.0003113342948919529,
      "loss": 2.9747,
      "step": 112430
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2161033153533936,
      "learning_rate": 0.0003113302072492097,
      "loss": 3.1997,
      "step": 112431
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3662869930267334,
      "learning_rate": 0.00031132611960436017,
      "loss": 2.9874,
      "step": 112432
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.203928232192993,
      "learning_rate": 0.0003113220319574049,
      "loss": 3.1728,
      "step": 112433
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7286522388458252,
      "learning_rate": 0.0003113179443083445,
      "loss": 3.2493,
      "step": 112434
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7946947813034058,
      "learning_rate": 0.00031131385665718,
      "loss": 3.2006,
      "step": 112435
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.9779562950134277,
      "learning_rate": 0.000311309769003912,
      "loss": 2.8863,
      "step": 112436
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.8640267848968506,
      "learning_rate": 0.0003113056813485413,
      "loss": 3.1618,
      "step": 112437
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.5647270679473877,
      "learning_rate": 0.0003113015936910687,
      "loss": 2.9925,
      "step": 112438
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.648702383041382,
      "learning_rate": 0.0003112975060314949,
      "loss": 3.0999,
      "step": 112439
    },
    {
      "epoch": 1.46,
      "grad_norm": 4.523387908935547,
      "learning_rate": 0.0003112934183698206,
      "loss": 2.8243,
      "step": 112440
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.7460684776306152,
      "learning_rate": 0.0003112893307060467,
      "loss": 3.2442,
      "step": 112441
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8578991889953613,
      "learning_rate": 0.00031128524304017394,
      "loss": 3.0233,
      "step": 112442
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.143979072570801,
      "learning_rate": 0.00031128115537220295,
      "loss": 2.8121,
      "step": 112443
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.8763797283172607,
      "learning_rate": 0.00031127706770213475,
      "loss": 3.2449,
      "step": 112444
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.907529354095459,
      "learning_rate": 0.0003112729800299698,
      "loss": 3.1251,
      "step": 112445
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3034114837646484,
      "learning_rate": 0.000311268892355709,
      "loss": 2.9102,
      "step": 112446
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.5284852981567383,
      "learning_rate": 0.0003112648046793531,
      "loss": 3.0081,
      "step": 112447
    },
    {
      "epoch": 1.46,
      "grad_norm": 4.101875305175781,
      "learning_rate": 0.0003112607170009028,
      "loss": 2.8538,
      "step": 112448
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.6888420581817627,
      "learning_rate": 0.00031125662932035905,
      "loss": 2.8865,
      "step": 112449
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.791661262512207,
      "learning_rate": 0.0003112525416377224,
      "loss": 2.9421,
      "step": 112450
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.888492226600647,
      "learning_rate": 0.00031124845395299377,
      "loss": 2.9758,
      "step": 112451
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.8343212604522705,
      "learning_rate": 0.00031124436626617377,
      "loss": 2.8999,
      "step": 112452
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.6034977436065674,
      "learning_rate": 0.0003112402785772632,
      "loss": 2.9955,
      "step": 112453
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9366168975830078,
      "learning_rate": 0.00031123619088626285,
      "loss": 2.9443,
      "step": 112454
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.033625841140747,
      "learning_rate": 0.00031123210319317356,
      "loss": 3.0117,
      "step": 112455
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.497164011001587,
      "learning_rate": 0.00031122801549799594,
      "loss": 2.9656,
      "step": 112456
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.810181736946106,
      "learning_rate": 0.00031122392780073084,
      "loss": 3.15,
      "step": 112457
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.2152786254882812,
      "learning_rate": 0.00031121984010137904,
      "loss": 3.1165,
      "step": 112458
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6277244091033936,
      "learning_rate": 0.00031121575239994123,
      "loss": 2.9877,
      "step": 112459
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.4287824630737305,
      "learning_rate": 0.00031121166469641817,
      "loss": 2.9505,
      "step": 112460
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.3184092044830322,
      "learning_rate": 0.00031120757699081063,
      "loss": 3.0589,
      "step": 112461
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.4255833625793457,
      "learning_rate": 0.0003112034892831195,
      "loss": 2.9674,
      "step": 112462
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8532441854476929,
      "learning_rate": 0.0003111994015733454,
      "loss": 3.1438,
      "step": 112463
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.631704330444336,
      "learning_rate": 0.00031119531386148905,
      "loss": 3.0786,
      "step": 112464
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.8499066829681396,
      "learning_rate": 0.00031119122614755136,
      "loss": 3.0001,
      "step": 112465
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.404492139816284,
      "learning_rate": 0.00031118713843153293,
      "loss": 3.2676,
      "step": 112466
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1969261169433594,
      "learning_rate": 0.0003111830507134346,
      "loss": 3.0688,
      "step": 112467
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.735258936882019,
      "learning_rate": 0.0003111789629932572,
      "loss": 2.8489,
      "step": 112468
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.060741424560547,
      "learning_rate": 0.00031117487527100146,
      "loss": 2.8889,
      "step": 112469
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.4125924110412598,
      "learning_rate": 0.00031117078754666795,
      "loss": 3.0293,
      "step": 112470
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.917930006980896,
      "learning_rate": 0.00031116669982025766,
      "loss": 2.8932,
      "step": 112471
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.955774188041687,
      "learning_rate": 0.0003111626120917713,
      "loss": 3.0698,
      "step": 112472
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1254794597625732,
      "learning_rate": 0.0003111585243612096,
      "loss": 2.939,
      "step": 112473
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1236095428466797,
      "learning_rate": 0.0003111544366285732,
      "loss": 2.9659,
      "step": 112474
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.199442148208618,
      "learning_rate": 0.00031115034889386313,
      "loss": 3.1187,
      "step": 112475
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7299671173095703,
      "learning_rate": 0.00031114626115707995,
      "loss": 2.8852,
      "step": 112476
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7300221920013428,
      "learning_rate": 0.0003111421734182244,
      "loss": 2.9068,
      "step": 112477
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1487903594970703,
      "learning_rate": 0.0003111380856772975,
      "loss": 2.9592,
      "step": 112478
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.0183873176574707,
      "learning_rate": 0.00031113399793429954,
      "loss": 3.0797,
      "step": 112479
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.87030827999115,
      "learning_rate": 0.0003111299101892317,
      "loss": 3.0681,
      "step": 112480
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9826617240905762,
      "learning_rate": 0.00031112582244209467,
      "loss": 3.0275,
      "step": 112481
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.4425249099731445,
      "learning_rate": 0.0003111217346928891,
      "loss": 2.957,
      "step": 112482
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.3339760303497314,
      "learning_rate": 0.00031111764694161575,
      "loss": 2.8121,
      "step": 112483
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.300861120223999,
      "learning_rate": 0.00031111355918827545,
      "loss": 2.8189,
      "step": 112484
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.144897222518921,
      "learning_rate": 0.0003111094714328689,
      "loss": 2.8295,
      "step": 112485
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.3057618141174316,
      "learning_rate": 0.00031110538367539684,
      "loss": 3.0931,
      "step": 112486
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.540705442428589,
      "learning_rate": 0.0003111012959158602,
      "loss": 3.1865,
      "step": 112487
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8413951396942139,
      "learning_rate": 0.00031109720815425954,
      "loss": 2.7907,
      "step": 112488
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9777076244354248,
      "learning_rate": 0.00031109312039059574,
      "loss": 2.9484,
      "step": 112489
    },
    {
      "epoch": 1.46,
      "grad_norm": 3.486266613006592,
      "learning_rate": 0.00031108903262486956,
      "loss": 3.0612,
      "step": 112490
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.847992181777954,
      "learning_rate": 0.0003110849448570816,
      "loss": 3.1131,
      "step": 112491
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8244527578353882,
      "learning_rate": 0.0003110808570872328,
      "loss": 2.9211,
      "step": 112492
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.264824628829956,
      "learning_rate": 0.00031107676931532393,
      "loss": 3.1154,
      "step": 112493
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.579432487487793,
      "learning_rate": 0.00031107268154135554,
      "loss": 2.9173,
      "step": 112494
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0110933780670166,
      "learning_rate": 0.0003110685937653286,
      "loss": 2.9614,
      "step": 112495
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6115041971206665,
      "learning_rate": 0.0003110645059872438,
      "loss": 3.1304,
      "step": 112496
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8460973501205444,
      "learning_rate": 0.0003110604182071019,
      "loss": 2.805,
      "step": 112497
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.7772183418273926,
      "learning_rate": 0.0003110563304249036,
      "loss": 2.8862,
      "step": 112498
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.252208709716797,
      "learning_rate": 0.0003110522426406499,
      "loss": 3.0272,
      "step": 112499
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8874372243881226,
      "learning_rate": 0.0003110481548543412,
      "loss": 2.9182,
      "step": 112500
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8977015018463135,
      "learning_rate": 0.0003110440670659785,
      "loss": 2.8705,
      "step": 112501
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.776026725769043,
      "learning_rate": 0.00031103997927556254,
      "loss": 3.0127,
      "step": 112502
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8304469585418701,
      "learning_rate": 0.0003110358914830939,
      "loss": 2.9623,
      "step": 112503
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0144221782684326,
      "learning_rate": 0.0003110318036885736,
      "loss": 2.6698,
      "step": 112504
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.1929054260253906,
      "learning_rate": 0.00031102771589200227,
      "loss": 2.9064,
      "step": 112505
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.7527060508728027,
      "learning_rate": 0.0003110236280933807,
      "loss": 2.7982,
      "step": 112506
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.5675864219665527,
      "learning_rate": 0.00031101954029270955,
      "loss": 3.0839,
      "step": 112507
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.6033014059066772,
      "learning_rate": 0.00031101545248998975,
      "loss": 2.9238,
      "step": 112508
    },
    {
      "epoch": 1.46,
      "grad_norm": 2.0146658420562744,
      "learning_rate": 0.00031101136468522183,
      "loss": 3.1447,
      "step": 112509
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.8930436372756958,
      "learning_rate": 0.0003110072768784068,
      "loss": 2.9238,
      "step": 112510
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.809472918510437,
      "learning_rate": 0.00031100318906954536,
      "loss": 2.9889,
      "step": 112511
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9512591361999512,
      "learning_rate": 0.0003109991012586381,
      "loss": 2.8842,
      "step": 112512
    },
    {
      "epoch": 1.46,
      "grad_norm": 1.9127813577651978,
      "learning_rate": 0.00031099501344568597,
      "loss": 2.9825,
      "step": 112513
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8709993362426758,
      "learning_rate": 0.00031099092563068964,
      "loss": 2.8787,
      "step": 112514
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7960491180419922,
      "learning_rate": 0.00031098683781364986,
      "loss": 3.1999,
      "step": 112515
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.445716381072998,
      "learning_rate": 0.00031098274999456744,
      "loss": 3.0198,
      "step": 112516
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.911634683609009,
      "learning_rate": 0.00031097866217344314,
      "loss": 2.8134,
      "step": 112517
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.704358458518982,
      "learning_rate": 0.0003109745743502777,
      "loss": 3.0794,
      "step": 112518
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.078015089035034,
      "learning_rate": 0.00031097048652507184,
      "loss": 3.2792,
      "step": 112519
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.101940393447876,
      "learning_rate": 0.0003109663986978264,
      "loss": 3.009,
      "step": 112520
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.213446617126465,
      "learning_rate": 0.0003109623108685421,
      "loss": 2.9984,
      "step": 112521
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9480204582214355,
      "learning_rate": 0.0003109582230372196,
      "loss": 2.7936,
      "step": 112522
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8014683723449707,
      "learning_rate": 0.00031095413520385994,
      "loss": 3.0257,
      "step": 112523
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4072768688201904,
      "learning_rate": 0.0003109500473684636,
      "loss": 2.8846,
      "step": 112524
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.798323631286621,
      "learning_rate": 0.0003109459595310314,
      "loss": 3.0224,
      "step": 112525
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7238787412643433,
      "learning_rate": 0.0003109418716915642,
      "loss": 2.9407,
      "step": 112526
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1251087188720703,
      "learning_rate": 0.00031093778385006267,
      "loss": 2.9467,
      "step": 112527
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.381523847579956,
      "learning_rate": 0.0003109336960065276,
      "loss": 3.0158,
      "step": 112528
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8388906717300415,
      "learning_rate": 0.00031092960816095984,
      "loss": 3.2475,
      "step": 112529
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9032313823699951,
      "learning_rate": 0.0003109255203133599,
      "loss": 2.8874,
      "step": 112530
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0477471351623535,
      "learning_rate": 0.0003109214324637288,
      "loss": 2.985,
      "step": 112531
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9868285655975342,
      "learning_rate": 0.00031091734461206725,
      "loss": 3.0703,
      "step": 112532
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7023649215698242,
      "learning_rate": 0.0003109132567583758,
      "loss": 2.897,
      "step": 112533
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.121518135070801,
      "learning_rate": 0.0003109091689026555,
      "loss": 2.9828,
      "step": 112534
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7153270244598389,
      "learning_rate": 0.00031090508104490694,
      "loss": 3.174,
      "step": 112535
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.11427903175354,
      "learning_rate": 0.0003109009931851309,
      "loss": 2.9229,
      "step": 112536
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6522630453109741,
      "learning_rate": 0.0003108969053233282,
      "loss": 2.8295,
      "step": 112537
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2544896602630615,
      "learning_rate": 0.00031089281745949946,
      "loss": 3.0829,
      "step": 112538
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.560441255569458,
      "learning_rate": 0.00031088872959364575,
      "loss": 2.7824,
      "step": 112539
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.748098373413086,
      "learning_rate": 0.0003108846417257674,
      "loss": 2.9483,
      "step": 112540
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8241499662399292,
      "learning_rate": 0.0003108805538558655,
      "loss": 3.2345,
      "step": 112541
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4615066051483154,
      "learning_rate": 0.0003108764659839406,
      "loss": 2.969,
      "step": 112542
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.606720209121704,
      "learning_rate": 0.0003108723781099937,
      "loss": 3.1077,
      "step": 112543
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2290616035461426,
      "learning_rate": 0.0003108682902340253,
      "loss": 3.1283,
      "step": 112544
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3282761573791504,
      "learning_rate": 0.00031086420235603633,
      "loss": 2.8808,
      "step": 112545
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0002834796905518,
      "learning_rate": 0.0003108601144760275,
      "loss": 2.9426,
      "step": 112546
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.079453468322754,
      "learning_rate": 0.0003108560265939996,
      "loss": 2.9249,
      "step": 112547
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2309348583221436,
      "learning_rate": 0.00031085193870995337,
      "loss": 3.2234,
      "step": 112548
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3453352451324463,
      "learning_rate": 0.0003108478508238894,
      "loss": 3.1418,
      "step": 112549
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3747353553771973,
      "learning_rate": 0.00031084376293580887,
      "loss": 3.0551,
      "step": 112550
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.9625070095062256,
      "learning_rate": 0.0003108396750457121,
      "loss": 3.1513,
      "step": 112551
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.106274366378784,
      "learning_rate": 0.0003108355871536,
      "loss": 3.1356,
      "step": 112552
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7354209423065186,
      "learning_rate": 0.0003108314992594734,
      "loss": 2.8794,
      "step": 112553
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.1812329292297363,
      "learning_rate": 0.00031082741136333316,
      "loss": 2.926,
      "step": 112554
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2831382751464844,
      "learning_rate": 0.0003108233234651798,
      "loss": 3.1876,
      "step": 112555
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.029423952102661,
      "learning_rate": 0.00031081923556501416,
      "loss": 3.0078,
      "step": 112556
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.1137921810150146,
      "learning_rate": 0.000310815147662837,
      "loss": 2.7412,
      "step": 112557
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4357080459594727,
      "learning_rate": 0.0003108110597586492,
      "loss": 3.3706,
      "step": 112558
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8860262632369995,
      "learning_rate": 0.0003108069718524513,
      "loss": 3.2249,
      "step": 112559
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9765982627868652,
      "learning_rate": 0.00031080288394424424,
      "loss": 3.0646,
      "step": 112560
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1798737049102783,
      "learning_rate": 0.00031079879603402867,
      "loss": 2.9719,
      "step": 112561
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.627893328666687,
      "learning_rate": 0.0003107947081218055,
      "loss": 2.9786,
      "step": 112562
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9052836894989014,
      "learning_rate": 0.00031079062020757537,
      "loss": 3.1368,
      "step": 112563
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2450509071350098,
      "learning_rate": 0.00031078653229133895,
      "loss": 2.9732,
      "step": 112564
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7554564476013184,
      "learning_rate": 0.0003107824443730972,
      "loss": 2.8983,
      "step": 112565
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.5536532402038574,
      "learning_rate": 0.0003107783564528508,
      "loss": 3.1615,
      "step": 112566
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9450029134750366,
      "learning_rate": 0.0003107742685306004,
      "loss": 3.007,
      "step": 112567
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9567195177078247,
      "learning_rate": 0.0003107701806063469,
      "loss": 3.2358,
      "step": 112568
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.079129695892334,
      "learning_rate": 0.00031076609268009117,
      "loss": 2.8707,
      "step": 112569
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0576319694519043,
      "learning_rate": 0.00031076200475183366,
      "loss": 3.1193,
      "step": 112570
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.217256546020508,
      "learning_rate": 0.0003107579168215753,
      "loss": 2.9297,
      "step": 112571
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.9220592975616455,
      "learning_rate": 0.0003107538288893169,
      "loss": 3.1969,
      "step": 112572
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.143461227416992,
      "learning_rate": 0.0003107497409550591,
      "loss": 2.8442,
      "step": 112573
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8124301433563232,
      "learning_rate": 0.0003107456530188028,
      "loss": 2.8857,
      "step": 112574
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8087736368179321,
      "learning_rate": 0.0003107415650805486,
      "loss": 2.9821,
      "step": 112575
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.610816240310669,
      "learning_rate": 0.0003107374771402974,
      "loss": 3.0687,
      "step": 112576
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7808717489242554,
      "learning_rate": 0.00031073338919804987,
      "loss": 3.0959,
      "step": 112577
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8010525703430176,
      "learning_rate": 0.0003107293012538068,
      "loss": 2.9847,
      "step": 112578
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4839789867401123,
      "learning_rate": 0.00031072521330756896,
      "loss": 3.1259,
      "step": 112579
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8274273872375488,
      "learning_rate": 0.00031072112535933703,
      "loss": 2.9591,
      "step": 112580
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9029771089553833,
      "learning_rate": 0.00031071703740911195,
      "loss": 3.0467,
      "step": 112581
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.239814519882202,
      "learning_rate": 0.00031071294945689433,
      "loss": 2.9706,
      "step": 112582
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8615071773529053,
      "learning_rate": 0.000310708861502685,
      "loss": 3.0216,
      "step": 112583
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9465664625167847,
      "learning_rate": 0.0003107047735464847,
      "loss": 3.1522,
      "step": 112584
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.077465295791626,
      "learning_rate": 0.00031070068558829405,
      "loss": 2.9426,
      "step": 112585
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.5706162452697754,
      "learning_rate": 0.00031069659762811405,
      "loss": 3.1683,
      "step": 112586
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8195277452468872,
      "learning_rate": 0.0003106925096659453,
      "loss": 2.9325,
      "step": 112587
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4913036823272705,
      "learning_rate": 0.00031068842170178866,
      "loss": 3.0103,
      "step": 112588
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.240678548812866,
      "learning_rate": 0.0003106843337356448,
      "loss": 3.0641,
      "step": 112589
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1514182090759277,
      "learning_rate": 0.0003106802457675146,
      "loss": 2.9707,
      "step": 112590
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8442715406417847,
      "learning_rate": 0.00031067615779739865,
      "loss": 3.1215,
      "step": 112591
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.679109573364258,
      "learning_rate": 0.00031067206982529784,
      "loss": 2.832,
      "step": 112592
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.098900318145752,
      "learning_rate": 0.0003106679818512129,
      "loss": 3.203,
      "step": 112593
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9212063550949097,
      "learning_rate": 0.0003106638938751445,
      "loss": 3.2494,
      "step": 112594
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4761736392974854,
      "learning_rate": 0.0003106598058970936,
      "loss": 2.8961,
      "step": 112595
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.7434725761413574,
      "learning_rate": 0.0003106557179170608,
      "loss": 3.1929,
      "step": 112596
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.515047073364258,
      "learning_rate": 0.00031065162993504684,
      "loss": 3.0967,
      "step": 112597
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7994359731674194,
      "learning_rate": 0.0003106475419510526,
      "loss": 2.7756,
      "step": 112598
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.548638343811035,
      "learning_rate": 0.00031064345396507884,
      "loss": 3.0196,
      "step": 112599
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.4755446910858154,
      "learning_rate": 0.00031063936597712613,
      "loss": 3.1762,
      "step": 112600
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1869256496429443,
      "learning_rate": 0.00031063527798719535,
      "loss": 2.9207,
      "step": 112601
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.793233036994934,
      "learning_rate": 0.00031063118999528737,
      "loss": 2.8976,
      "step": 112602
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.685922622680664,
      "learning_rate": 0.0003106271020014028,
      "loss": 3.2144,
      "step": 112603
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.7795817852020264,
      "learning_rate": 0.0003106230140055425,
      "loss": 2.8282,
      "step": 112604
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.428929090499878,
      "learning_rate": 0.00031061892600770717,
      "loss": 3.1363,
      "step": 112605
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.170539617538452,
      "learning_rate": 0.0003106148380078976,
      "loss": 2.9051,
      "step": 112606
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.7118289470672607,
      "learning_rate": 0.00031061075000611446,
      "loss": 3.1547,
      "step": 112607
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.876035690307617,
      "learning_rate": 0.0003106066620023586,
      "loss": 2.9854,
      "step": 112608
    },
    {
      "epoch": 1.47,
      "grad_norm": 4.762203216552734,
      "learning_rate": 0.0003106025739966308,
      "loss": 2.9949,
      "step": 112609
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.6256465911865234,
      "learning_rate": 0.00031059848598893175,
      "loss": 2.7215,
      "step": 112610
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.240316390991211,
      "learning_rate": 0.00031059439797926226,
      "loss": 3.0531,
      "step": 112611
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.9540395736694336,
      "learning_rate": 0.0003105903099676231,
      "loss": 2.9396,
      "step": 112612
    },
    {
      "epoch": 1.47,
      "grad_norm": 4.294394016265869,
      "learning_rate": 0.00031058622195401497,
      "loss": 3.1873,
      "step": 112613
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8105661869049072,
      "learning_rate": 0.00031058213393843863,
      "loss": 3.1254,
      "step": 112614
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9339070320129395,
      "learning_rate": 0.0003105780459208949,
      "loss": 3.1153,
      "step": 112615
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4658265113830566,
      "learning_rate": 0.0003105739579013845,
      "loss": 2.9024,
      "step": 112616
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1227481365203857,
      "learning_rate": 0.0003105698698799082,
      "loss": 3.015,
      "step": 112617
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6264262199401855,
      "learning_rate": 0.00031056578185646684,
      "loss": 2.9008,
      "step": 112618
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7529054880142212,
      "learning_rate": 0.00031056169383106096,
      "loss": 2.9028,
      "step": 112619
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7489763498306274,
      "learning_rate": 0.00031055760580369154,
      "loss": 3.0501,
      "step": 112620
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7489838600158691,
      "learning_rate": 0.0003105535177743593,
      "loss": 3.0597,
      "step": 112621
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1261978149414062,
      "learning_rate": 0.0003105494297430649,
      "loss": 3.0418,
      "step": 112622
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.115140676498413,
      "learning_rate": 0.0003105453417098092,
      "loss": 3.1266,
      "step": 112623
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8215512037277222,
      "learning_rate": 0.0003105412536745929,
      "loss": 2.9901,
      "step": 112624
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.2071094512939453,
      "learning_rate": 0.00031053716563741676,
      "loss": 2.7434,
      "step": 112625
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9888449907302856,
      "learning_rate": 0.0003105330775982816,
      "loss": 2.8199,
      "step": 112626
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9801517724990845,
      "learning_rate": 0.00031052898955718806,
      "loss": 2.7658,
      "step": 112627
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2243971824645996,
      "learning_rate": 0.0003105249015141371,
      "loss": 3.1884,
      "step": 112628
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8524585962295532,
      "learning_rate": 0.00031052081346912935,
      "loss": 3.1307,
      "step": 112629
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8569267988204956,
      "learning_rate": 0.0003105167254221655,
      "loss": 2.8193,
      "step": 112630
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.060396432876587,
      "learning_rate": 0.00031051263737324645,
      "loss": 3.0872,
      "step": 112631
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9755635261535645,
      "learning_rate": 0.00031050854932237294,
      "loss": 3.0822,
      "step": 112632
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3104114532470703,
      "learning_rate": 0.0003105044612695456,
      "loss": 3.0138,
      "step": 112633
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.021461009979248,
      "learning_rate": 0.0003105003732147653,
      "loss": 3.1064,
      "step": 112634
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.8494930267333984,
      "learning_rate": 0.0003104962851580328,
      "loss": 3.1048,
      "step": 112635
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3443143367767334,
      "learning_rate": 0.0003104921970993489,
      "loss": 2.9208,
      "step": 112636
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0280072689056396,
      "learning_rate": 0.0003104881090387142,
      "loss": 2.9658,
      "step": 112637
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.5047879219055176,
      "learning_rate": 0.00031048402097612966,
      "loss": 2.9662,
      "step": 112638
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.634545922279358,
      "learning_rate": 0.00031047993291159585,
      "loss": 2.9388,
      "step": 112639
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.92213773727417,
      "learning_rate": 0.0003104758448451137,
      "loss": 3.0034,
      "step": 112640
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.845958948135376,
      "learning_rate": 0.0003104717567766838,
      "loss": 3.151,
      "step": 112641
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.92887544631958,
      "learning_rate": 0.0003104676687063071,
      "loss": 2.9753,
      "step": 112642
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3136589527130127,
      "learning_rate": 0.0003104635806339842,
      "loss": 3.1252,
      "step": 112643
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1906580924987793,
      "learning_rate": 0.00031045949255971596,
      "loss": 2.9795,
      "step": 112644
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2290217876434326,
      "learning_rate": 0.0003104554044835031,
      "loss": 2.968,
      "step": 112645
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.207003116607666,
      "learning_rate": 0.0003104513164053464,
      "loss": 3.1022,
      "step": 112646
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.028136968612671,
      "learning_rate": 0.00031044722832524656,
      "loss": 2.9162,
      "step": 112647
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0742993354797363,
      "learning_rate": 0.0003104431402432044,
      "loss": 3.0513,
      "step": 112648
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9078987836837769,
      "learning_rate": 0.0003104390521592207,
      "loss": 3.1343,
      "step": 112649
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.531031370162964,
      "learning_rate": 0.00031043496407329613,
      "loss": 3.151,
      "step": 112650
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.308168649673462,
      "learning_rate": 0.00031043087598543154,
      "loss": 2.9364,
      "step": 112651
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9872928857803345,
      "learning_rate": 0.0003104267878956276,
      "loss": 3.0056,
      "step": 112652
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.908682107925415,
      "learning_rate": 0.0003104226998038851,
      "loss": 2.9631,
      "step": 112653
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9184650182724,
      "learning_rate": 0.00031041861171020497,
      "loss": 2.8258,
      "step": 112654
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.656520128250122,
      "learning_rate": 0.00031041452361458774,
      "loss": 3.2304,
      "step": 112655
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6610978841781616,
      "learning_rate": 0.0003104104355170343,
      "loss": 2.8001,
      "step": 112656
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.5503485202789307,
      "learning_rate": 0.00031040634741754525,
      "loss": 3.0121,
      "step": 112657
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.003399610519409,
      "learning_rate": 0.00031040225931612157,
      "loss": 2.8805,
      "step": 112658
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7788770198822021,
      "learning_rate": 0.00031039817121276383,
      "loss": 3.1287,
      "step": 112659
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.600172281265259,
      "learning_rate": 0.00031039408310747297,
      "loss": 2.8278,
      "step": 112660
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8019187450408936,
      "learning_rate": 0.00031038999500024956,
      "loss": 3.0052,
      "step": 112661
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1316025257110596,
      "learning_rate": 0.0003103859068910945,
      "loss": 2.9817,
      "step": 112662
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8374381065368652,
      "learning_rate": 0.0003103818187800085,
      "loss": 2.8962,
      "step": 112663
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.296966552734375,
      "learning_rate": 0.0003103777306669923,
      "loss": 3.103,
      "step": 112664
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.393101930618286,
      "learning_rate": 0.0003103736425520467,
      "loss": 3.2558,
      "step": 112665
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.298520088195801,
      "learning_rate": 0.00031036955443517237,
      "loss": 3.0824,
      "step": 112666
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8927042484283447,
      "learning_rate": 0.00031036546631637034,
      "loss": 2.791,
      "step": 112667
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.8268065452575684,
      "learning_rate": 0.000310361378195641,
      "loss": 3.0379,
      "step": 112668
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6628942489624023,
      "learning_rate": 0.0003103572900729853,
      "loss": 2.8682,
      "step": 112669
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6442270278930664,
      "learning_rate": 0.0003103532019484041,
      "loss": 2.8658,
      "step": 112670
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9525926113128662,
      "learning_rate": 0.00031034911382189794,
      "loss": 2.9222,
      "step": 112671
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8584157228469849,
      "learning_rate": 0.0003103450256934677,
      "loss": 3.0393,
      "step": 112672
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8996227979660034,
      "learning_rate": 0.00031034093756311415,
      "loss": 2.9483,
      "step": 112673
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8833776712417603,
      "learning_rate": 0.000310336849430838,
      "loss": 2.7013,
      "step": 112674
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8070068359375,
      "learning_rate": 0.00031033276129664006,
      "loss": 3.2125,
      "step": 112675
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.014765501022339,
      "learning_rate": 0.0003103286731605211,
      "loss": 2.7833,
      "step": 112676
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7618166208267212,
      "learning_rate": 0.00031032458502248174,
      "loss": 2.9989,
      "step": 112677
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9201946258544922,
      "learning_rate": 0.0003103204968825229,
      "loss": 2.9129,
      "step": 112678
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.5129387378692627,
      "learning_rate": 0.0003103164087406453,
      "loss": 3.0266,
      "step": 112679
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8669577836990356,
      "learning_rate": 0.00031031232059684967,
      "loss": 2.8791,
      "step": 112680
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.035898447036743,
      "learning_rate": 0.00031030823245113676,
      "loss": 2.9395,
      "step": 112681
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.296170949935913,
      "learning_rate": 0.0003103041443035074,
      "loss": 2.9066,
      "step": 112682
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2419373989105225,
      "learning_rate": 0.0003103000561539623,
      "loss": 2.8205,
      "step": 112683
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9863593578338623,
      "learning_rate": 0.00031029596800250214,
      "loss": 3.1824,
      "step": 112684
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9644474983215332,
      "learning_rate": 0.00031029187984912794,
      "loss": 3.0504,
      "step": 112685
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0508108139038086,
      "learning_rate": 0.00031028779169384017,
      "loss": 2.9443,
      "step": 112686
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.5766963958740234,
      "learning_rate": 0.00031028370353663966,
      "loss": 3.0461,
      "step": 112687
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.5585981607437134,
      "learning_rate": 0.0003102796153775273,
      "loss": 2.6689,
      "step": 112688
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8615185022354126,
      "learning_rate": 0.00031027552721650374,
      "loss": 2.8293,
      "step": 112689
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.983896017074585,
      "learning_rate": 0.00031027143905356974,
      "loss": 3.0808,
      "step": 112690
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9041385650634766,
      "learning_rate": 0.00031026735088872615,
      "loss": 3.1886,
      "step": 112691
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.736467957496643,
      "learning_rate": 0.00031026326272197365,
      "loss": 3.0245,
      "step": 112692
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7163870334625244,
      "learning_rate": 0.0003102591745533129,
      "loss": 3.0166,
      "step": 112693
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2357122898101807,
      "learning_rate": 0.000310255086382745,
      "loss": 2.9957,
      "step": 112694
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1743834018707275,
      "learning_rate": 0.0003102509982102703,
      "loss": 3.1205,
      "step": 112695
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0856640338897705,
      "learning_rate": 0.0003102469100358897,
      "loss": 3.0682,
      "step": 112696
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.054023504257202,
      "learning_rate": 0.00031024282185960415,
      "loss": 2.7934,
      "step": 112697
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6353976726531982,
      "learning_rate": 0.00031023873368141424,
      "loss": 3.1223,
      "step": 112698
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.993271827697754,
      "learning_rate": 0.00031023464550132074,
      "loss": 2.9302,
      "step": 112699
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7016710042953491,
      "learning_rate": 0.00031023055731932447,
      "loss": 3.0431,
      "step": 112700
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.5955396890640259,
      "learning_rate": 0.00031022646913542607,
      "loss": 2.9664,
      "step": 112701
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.541897773742676,
      "learning_rate": 0.00031022238094962636,
      "loss": 2.9304,
      "step": 112702
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6804486513137817,
      "learning_rate": 0.0003102182927619262,
      "loss": 3.1084,
      "step": 112703
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8984102010726929,
      "learning_rate": 0.0003102142045723262,
      "loss": 2.9252,
      "step": 112704
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8973736763000488,
      "learning_rate": 0.00031021011638082723,
      "loss": 3.0929,
      "step": 112705
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0277631282806396,
      "learning_rate": 0.00031020602818743003,
      "loss": 3.044,
      "step": 112706
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0425121784210205,
      "learning_rate": 0.00031020193999213526,
      "loss": 2.8999,
      "step": 112707
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.2508938312530518,
      "learning_rate": 0.00031019785179494373,
      "loss": 3.1223,
      "step": 112708
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.143747329711914,
      "learning_rate": 0.00031019376359585636,
      "loss": 2.9299,
      "step": 112709
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8100569248199463,
      "learning_rate": 0.00031018967539487365,
      "loss": 3.0634,
      "step": 112710
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0000669956207275,
      "learning_rate": 0.00031018558719199656,
      "loss": 3.1066,
      "step": 112711
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.85292649269104,
      "learning_rate": 0.0003101814989872258,
      "loss": 2.7627,
      "step": 112712
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1708974838256836,
      "learning_rate": 0.00031017741078056204,
      "loss": 3.1891,
      "step": 112713
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.00408935546875,
      "learning_rate": 0.0003101733225720061,
      "loss": 2.8836,
      "step": 112714
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9254546165466309,
      "learning_rate": 0.0003101692343615588,
      "loss": 2.9397,
      "step": 112715
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.125579833984375,
      "learning_rate": 0.00031016514614922087,
      "loss": 3.1146,
      "step": 112716
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7780603170394897,
      "learning_rate": 0.000310161057934993,
      "loss": 2.8825,
      "step": 112717
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7511361837387085,
      "learning_rate": 0.00031015696971887596,
      "loss": 2.9767,
      "step": 112718
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.804452896118164,
      "learning_rate": 0.0003101528815008706,
      "loss": 2.8797,
      "step": 112719
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.237245559692383,
      "learning_rate": 0.0003101487932809777,
      "loss": 3.0066,
      "step": 112720
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9761441946029663,
      "learning_rate": 0.00031014470505919776,
      "loss": 2.8063,
      "step": 112721
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9603570699691772,
      "learning_rate": 0.00031014061683553185,
      "loss": 2.9679,
      "step": 112722
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.178515672683716,
      "learning_rate": 0.00031013652860998063,
      "loss": 2.6057,
      "step": 112723
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9360724687576294,
      "learning_rate": 0.0003101324403825448,
      "loss": 2.9721,
      "step": 112724
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8451511859893799,
      "learning_rate": 0.00031012835215322516,
      "loss": 2.928,
      "step": 112725
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8103314638137817,
      "learning_rate": 0.0003101242639220225,
      "loss": 2.7613,
      "step": 112726
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7008041143417358,
      "learning_rate": 0.00031012017568893746,
      "loss": 2.806,
      "step": 112727
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7887048721313477,
      "learning_rate": 0.00031011608745397097,
      "loss": 2.9273,
      "step": 112728
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9598419666290283,
      "learning_rate": 0.00031011199921712366,
      "loss": 3.1946,
      "step": 112729
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4965665340423584,
      "learning_rate": 0.0003101079109783964,
      "loss": 2.813,
      "step": 112730
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6852657794952393,
      "learning_rate": 0.00031010382273778984,
      "loss": 2.7553,
      "step": 112731
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.505016565322876,
      "learning_rate": 0.0003100997344953048,
      "loss": 2.8605,
      "step": 112732
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2508544921875,
      "learning_rate": 0.000310095646250942,
      "loss": 2.8873,
      "step": 112733
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2083446979522705,
      "learning_rate": 0.00031009155800470226,
      "loss": 2.9593,
      "step": 112734
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0842862129211426,
      "learning_rate": 0.0003100874697565863,
      "loss": 3.0284,
      "step": 112735
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1942899227142334,
      "learning_rate": 0.0003100833815065949,
      "loss": 2.9785,
      "step": 112736
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.745083212852478,
      "learning_rate": 0.0003100792932547288,
      "loss": 3.0322,
      "step": 112737
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.534909963607788,
      "learning_rate": 0.0003100752050009887,
      "loss": 2.8363,
      "step": 112738
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9664247035980225,
      "learning_rate": 0.00031007111674537555,
      "loss": 2.9492,
      "step": 112739
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8253254890441895,
      "learning_rate": 0.00031006702848788997,
      "loss": 3.1687,
      "step": 112740
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.719533085823059,
      "learning_rate": 0.0003100629402285327,
      "loss": 3.1171,
      "step": 112741
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.915304183959961,
      "learning_rate": 0.00031005885196730453,
      "loss": 3.0629,
      "step": 112742
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.030437707901001,
      "learning_rate": 0.0003100547637042063,
      "loss": 2.9625,
      "step": 112743
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8532898426055908,
      "learning_rate": 0.0003100506754392386,
      "loss": 3.3818,
      "step": 112744
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3581461906433105,
      "learning_rate": 0.00031004658717240235,
      "loss": 3.0012,
      "step": 112745
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.803502321243286,
      "learning_rate": 0.0003100424989036983,
      "loss": 2.9736,
      "step": 112746
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0009238719940186,
      "learning_rate": 0.000310038410633127,
      "loss": 2.7565,
      "step": 112747
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.072605609893799,
      "learning_rate": 0.0003100343223606895,
      "loss": 3.159,
      "step": 112748
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.169588088989258,
      "learning_rate": 0.00031003023408638644,
      "loss": 3.1135,
      "step": 112749
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.8304927349090576,
      "learning_rate": 0.0003100261458102185,
      "loss": 3.0578,
      "step": 112750
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0323965549468994,
      "learning_rate": 0.0003100220575321865,
      "loss": 3.1252,
      "step": 112751
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4323580265045166,
      "learning_rate": 0.0003100179692522913,
      "loss": 3.1633,
      "step": 112752
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3222453594207764,
      "learning_rate": 0.00031001388097053354,
      "loss": 3.0854,
      "step": 112753
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2460715770721436,
      "learning_rate": 0.00031000979268691397,
      "loss": 2.9641,
      "step": 112754
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4035236835479736,
      "learning_rate": 0.0003100057044014334,
      "loss": 3.0,
      "step": 112755
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.794874906539917,
      "learning_rate": 0.00031000161611409265,
      "loss": 3.2003,
      "step": 112756
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9642117023468018,
      "learning_rate": 0.0003099975278248923,
      "loss": 3.0248,
      "step": 112757
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.857222318649292,
      "learning_rate": 0.00030999343953383326,
      "loss": 3.0391,
      "step": 112758
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7123620510101318,
      "learning_rate": 0.0003099893512409163,
      "loss": 3.027,
      "step": 112759
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2654407024383545,
      "learning_rate": 0.00030998526294614205,
      "loss": 2.987,
      "step": 112760
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.126427412033081,
      "learning_rate": 0.00030998117464951146,
      "loss": 2.9557,
      "step": 112761
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4203033447265625,
      "learning_rate": 0.00030997708635102503,
      "loss": 2.7136,
      "step": 112762
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.074838638305664,
      "learning_rate": 0.0003099729980506838,
      "loss": 3.0611,
      "step": 112763
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.7304306030273438,
      "learning_rate": 0.0003099689097484884,
      "loss": 2.9179,
      "step": 112764
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7578235864639282,
      "learning_rate": 0.0003099648214444395,
      "loss": 3.114,
      "step": 112765
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9225645065307617,
      "learning_rate": 0.000309960733138538,
      "loss": 2.8847,
      "step": 112766
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1396820545196533,
      "learning_rate": 0.0003099566448307847,
      "loss": 2.8254,
      "step": 112767
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2438387870788574,
      "learning_rate": 0.00030995255652118016,
      "loss": 2.998,
      "step": 112768
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0467517375946045,
      "learning_rate": 0.00030994846820972527,
      "loss": 2.9046,
      "step": 112769
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9961764812469482,
      "learning_rate": 0.0003099443798964208,
      "loss": 2.7565,
      "step": 112770
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.451439380645752,
      "learning_rate": 0.0003099402915812674,
      "loss": 3.0335,
      "step": 112771
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.907741069793701,
      "learning_rate": 0.000309936203264266,
      "loss": 2.8229,
      "step": 112772
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8242785930633545,
      "learning_rate": 0.0003099321149454173,
      "loss": 3.1428,
      "step": 112773
    },
    {
      "epoch": 1.47,
      "grad_norm": 4.8648457527160645,
      "learning_rate": 0.00030992802662472196,
      "loss": 2.9779,
      "step": 112774
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.9091951847076416,
      "learning_rate": 0.0003099239383021808,
      "loss": 3.1176,
      "step": 112775
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9079036712646484,
      "learning_rate": 0.0003099198499777946,
      "loss": 3.0041,
      "step": 112776
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7467092275619507,
      "learning_rate": 0.0003099157616515642,
      "loss": 3.0544,
      "step": 112777
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.14689564704895,
      "learning_rate": 0.0003099116733234902,
      "loss": 3.0441,
      "step": 112778
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.837303400039673,
      "learning_rate": 0.00030990758499357344,
      "loss": 3.1218,
      "step": 112779
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6143648624420166,
      "learning_rate": 0.00030990349666181464,
      "loss": 3.1981,
      "step": 112780
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.339487314224243,
      "learning_rate": 0.00030989940832821465,
      "loss": 2.9257,
      "step": 112781
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8354077339172363,
      "learning_rate": 0.00030989531999277417,
      "loss": 2.9005,
      "step": 112782
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8418313264846802,
      "learning_rate": 0.00030989123165549396,
      "loss": 3.0954,
      "step": 112783
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8220864534378052,
      "learning_rate": 0.0003098871433163747,
      "loss": 3.0231,
      "step": 112784
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.203908681869507,
      "learning_rate": 0.0003098830549754174,
      "loss": 2.9351,
      "step": 112785
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.794832706451416,
      "learning_rate": 0.0003098789666326225,
      "loss": 2.8467,
      "step": 112786
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6658220291137695,
      "learning_rate": 0.0003098748782879909,
      "loss": 2.9629,
      "step": 112787
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7628902196884155,
      "learning_rate": 0.00030987078994152356,
      "loss": 2.8362,
      "step": 112788
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7934038639068604,
      "learning_rate": 0.0003098667015932209,
      "loss": 2.8676,
      "step": 112789
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1137747764587402,
      "learning_rate": 0.0003098626132430838,
      "loss": 3.0031,
      "step": 112790
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.5321438312530518,
      "learning_rate": 0.0003098585248911132,
      "loss": 3.0923,
      "step": 112791
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6923339366912842,
      "learning_rate": 0.0003098544365373096,
      "loss": 2.956,
      "step": 112792
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.912740707397461,
      "learning_rate": 0.0003098503481816739,
      "loss": 2.9709,
      "step": 112793
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4059741497039795,
      "learning_rate": 0.00030984625982420687,
      "loss": 3.1096,
      "step": 112794
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9299525022506714,
      "learning_rate": 0.0003098421714649092,
      "loss": 3.1418,
      "step": 112795
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8492971658706665,
      "learning_rate": 0.00030983808310378167,
      "loss": 3.0963,
      "step": 112796
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.3799614906311035,
      "learning_rate": 0.00030983399474082517,
      "loss": 2.9325,
      "step": 112797
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8245130777359009,
      "learning_rate": 0.0003098299063760402,
      "loss": 3.0384,
      "step": 112798
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.527719259262085,
      "learning_rate": 0.00030982581800942773,
      "loss": 3.0712,
      "step": 112799
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2079296112060547,
      "learning_rate": 0.0003098217296409884,
      "loss": 3.1588,
      "step": 112800
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6636618375778198,
      "learning_rate": 0.00030981764127072315,
      "loss": 2.8062,
      "step": 112801
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4847412109375,
      "learning_rate": 0.0003098135528986325,
      "loss": 2.9474,
      "step": 112802
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6490596532821655,
      "learning_rate": 0.00030980946452471733,
      "loss": 3.2387,
      "step": 112803
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.6961233615875244,
      "learning_rate": 0.00030980537614897847,
      "loss": 2.8468,
      "step": 112804
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.78568696975708,
      "learning_rate": 0.0003098012877714165,
      "loss": 2.852,
      "step": 112805
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.737762689590454,
      "learning_rate": 0.00030979719939203235,
      "loss": 3.0397,
      "step": 112806
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.74213707447052,
      "learning_rate": 0.0003097931110108267,
      "loss": 3.1395,
      "step": 112807
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.534945487976074,
      "learning_rate": 0.0003097890226278004,
      "loss": 3.1599,
      "step": 112808
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.856923222541809,
      "learning_rate": 0.00030978493424295406,
      "loss": 3.1703,
      "step": 112809
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1734654903411865,
      "learning_rate": 0.0003097808458562885,
      "loss": 2.9396,
      "step": 112810
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.126147985458374,
      "learning_rate": 0.00030977675746780457,
      "loss": 3.1343,
      "step": 112811
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0591626167297363,
      "learning_rate": 0.0003097726690775029,
      "loss": 3.0481,
      "step": 112812
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7587250471115112,
      "learning_rate": 0.00030976858068538425,
      "loss": 3.1606,
      "step": 112813
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.060295343399048,
      "learning_rate": 0.00030976449229144955,
      "loss": 3.0412,
      "step": 112814
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8419724702835083,
      "learning_rate": 0.0003097604038956994,
      "loss": 3.1957,
      "step": 112815
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.6307153701782227,
      "learning_rate": 0.0003097563154981346,
      "loss": 3.0025,
      "step": 112816
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.860379457473755,
      "learning_rate": 0.0003097522270987559,
      "loss": 3.1335,
      "step": 112817
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1119635105133057,
      "learning_rate": 0.0003097481386975641,
      "loss": 3.0597,
      "step": 112818
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.613360643386841,
      "learning_rate": 0.00030974405029456,
      "loss": 3.073,
      "step": 112819
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.506498098373413,
      "learning_rate": 0.0003097399618897442,
      "loss": 3.1668,
      "step": 112820
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.747779130935669,
      "learning_rate": 0.00030973587348311754,
      "loss": 3.0142,
      "step": 112821
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6934916973114014,
      "learning_rate": 0.00030973178507468093,
      "loss": 2.8343,
      "step": 112822
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.395366668701172,
      "learning_rate": 0.0003097276966644349,
      "loss": 3.0332,
      "step": 112823
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.655242681503296,
      "learning_rate": 0.0003097236082523803,
      "loss": 2.8191,
      "step": 112824
    },
    {
      "epoch": 1.47,
      "grad_norm": 5.476478099822998,
      "learning_rate": 0.0003097195198385179,
      "loss": 3.0105,
      "step": 112825
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7578985691070557,
      "learning_rate": 0.0003097154314228485,
      "loss": 3.0282,
      "step": 112826
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6742744445800781,
      "learning_rate": 0.0003097113430053728,
      "loss": 2.8411,
      "step": 112827
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.75067138671875,
      "learning_rate": 0.0003097072545860916,
      "loss": 3.2847,
      "step": 112828
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4917068481445312,
      "learning_rate": 0.0003097031661650056,
      "loss": 2.9141,
      "step": 112829
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.6911556720733643,
      "learning_rate": 0.0003096990777421156,
      "loss": 3.1695,
      "step": 112830
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2968103885650635,
      "learning_rate": 0.0003096949893174224,
      "loss": 3.0487,
      "step": 112831
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8402115106582642,
      "learning_rate": 0.0003096909008909267,
      "loss": 3.0721,
      "step": 112832
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.5736780166625977,
      "learning_rate": 0.0003096868124626292,
      "loss": 2.7595,
      "step": 112833
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.9901463985443115,
      "learning_rate": 0.0003096827240325309,
      "loss": 2.9166,
      "step": 112834
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1218905448913574,
      "learning_rate": 0.00030967863560063226,
      "loss": 2.9919,
      "step": 112835
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.21423602104187,
      "learning_rate": 0.00030967454716693423,
      "loss": 3.1774,
      "step": 112836
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0071561336517334,
      "learning_rate": 0.00030967045873143756,
      "loss": 2.9779,
      "step": 112837
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1700234413146973,
      "learning_rate": 0.0003096663702941429,
      "loss": 2.8594,
      "step": 112838
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.087798833847046,
      "learning_rate": 0.00030966228185505106,
      "loss": 3.1242,
      "step": 112839
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.131655693054199,
      "learning_rate": 0.0003096581934141629,
      "loss": 3.001,
      "step": 112840
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.543245792388916,
      "learning_rate": 0.000309654104971479,
      "loss": 3.0822,
      "step": 112841
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.036410331726074,
      "learning_rate": 0.00030965001652700033,
      "loss": 2.9052,
      "step": 112842
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1370205879211426,
      "learning_rate": 0.00030964592808072755,
      "loss": 3.2398,
      "step": 112843
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2668025493621826,
      "learning_rate": 0.0003096418396326613,
      "loss": 3.0084,
      "step": 112844
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.7226946353912354,
      "learning_rate": 0.0003096377511828025,
      "loss": 2.8676,
      "step": 112845
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8429176807403564,
      "learning_rate": 0.0003096336627311519,
      "loss": 2.9476,
      "step": 112846
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.5103960037231445,
      "learning_rate": 0.00030962957427771014,
      "loss": 3.0472,
      "step": 112847
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.4456191062927246,
      "learning_rate": 0.0003096254858224781,
      "loss": 2.8683,
      "step": 112848
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9681164026260376,
      "learning_rate": 0.0003096213973654565,
      "loss": 3.0377,
      "step": 112849
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8051897287368774,
      "learning_rate": 0.0003096173089066462,
      "loss": 3.0027,
      "step": 112850
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7391058206558228,
      "learning_rate": 0.0003096132204460477,
      "loss": 2.8822,
      "step": 112851
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.913737416267395,
      "learning_rate": 0.00030960913198366203,
      "loss": 3.256,
      "step": 112852
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7043178081512451,
      "learning_rate": 0.0003096050435194898,
      "loss": 3.0328,
      "step": 112853
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.551147222518921,
      "learning_rate": 0.0003096009550535318,
      "loss": 3.1082,
      "step": 112854
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8112324476242065,
      "learning_rate": 0.00030959686658578884,
      "loss": 3.0712,
      "step": 112855
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.324972629547119,
      "learning_rate": 0.00030959277811626155,
      "loss": 2.9271,
      "step": 112856
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8332353830337524,
      "learning_rate": 0.00030958868964495076,
      "loss": 2.9906,
      "step": 112857
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2358474731445312,
      "learning_rate": 0.0003095846011718574,
      "loss": 3.0123,
      "step": 112858
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3612558841705322,
      "learning_rate": 0.00030958051269698204,
      "loss": 2.9906,
      "step": 112859
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3799185752868652,
      "learning_rate": 0.0003095764242203254,
      "loss": 2.9237,
      "step": 112860
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.5893656015396118,
      "learning_rate": 0.00030957233574188843,
      "loss": 3.0116,
      "step": 112861
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8783907890319824,
      "learning_rate": 0.0003095682472616717,
      "loss": 3.1331,
      "step": 112862
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.367110013961792,
      "learning_rate": 0.00030956415877967606,
      "loss": 3.0061,
      "step": 112863
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7560880184173584,
      "learning_rate": 0.00030956007029590236,
      "loss": 3.1842,
      "step": 112864
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.923408031463623,
      "learning_rate": 0.00030955598181035116,
      "loss": 2.8828,
      "step": 112865
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.970550537109375,
      "learning_rate": 0.00030955189332302333,
      "loss": 3.2085,
      "step": 112866
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6462911367416382,
      "learning_rate": 0.0003095478048339196,
      "loss": 3.0808,
      "step": 112867
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.599987030029297,
      "learning_rate": 0.0003095437163430409,
      "loss": 2.9996,
      "step": 112868
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.580505132675171,
      "learning_rate": 0.0003095396278503877,
      "loss": 3.0123,
      "step": 112869
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8506453037261963,
      "learning_rate": 0.00030953553935596093,
      "loss": 2.9459,
      "step": 112870
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9268124103546143,
      "learning_rate": 0.0003095314508597613,
      "loss": 3.036,
      "step": 112871
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9111566543579102,
      "learning_rate": 0.0003095273623617897,
      "loss": 3.1073,
      "step": 112872
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7443394660949707,
      "learning_rate": 0.00030952327386204667,
      "loss": 3.2728,
      "step": 112873
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7366136312484741,
      "learning_rate": 0.0003095191853605332,
      "loss": 3.1611,
      "step": 112874
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.948699712753296,
      "learning_rate": 0.00030951509685724977,
      "loss": 3.0647,
      "step": 112875
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.128330945968628,
      "learning_rate": 0.0003095110083521974,
      "loss": 3.0211,
      "step": 112876
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6289722919464111,
      "learning_rate": 0.00030950691984537676,
      "loss": 2.978,
      "step": 112877
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.5680646896362305,
      "learning_rate": 0.0003095028313367886,
      "loss": 2.9114,
      "step": 112878
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8035671710968018,
      "learning_rate": 0.0003094987428264337,
      "loss": 3.1271,
      "step": 112879
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7114851474761963,
      "learning_rate": 0.00030949465431431275,
      "loss": 3.1595,
      "step": 112880
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.390704393386841,
      "learning_rate": 0.0003094905658004266,
      "loss": 3.0111,
      "step": 112881
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.82718825340271,
      "learning_rate": 0.0003094864772847759,
      "loss": 2.8259,
      "step": 112882
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3475730419158936,
      "learning_rate": 0.00030948238876736165,
      "loss": 3.2526,
      "step": 112883
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4712021350860596,
      "learning_rate": 0.0003094783002481843,
      "loss": 3.1653,
      "step": 112884
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.188981771469116,
      "learning_rate": 0.0003094742117272448,
      "loss": 2.9989,
      "step": 112885
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6763079166412354,
      "learning_rate": 0.0003094701232045439,
      "loss": 3.0715,
      "step": 112886
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8842201232910156,
      "learning_rate": 0.0003094660346800823,
      "loss": 2.9841,
      "step": 112887
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0339767932891846,
      "learning_rate": 0.00030946194615386074,
      "loss": 3.0549,
      "step": 112888
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0074596405029297,
      "learning_rate": 0.0003094578576258801,
      "loss": 3.0794,
      "step": 112889
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.5309579372406006,
      "learning_rate": 0.00030945376909614106,
      "loss": 2.9668,
      "step": 112890
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7907991409301758,
      "learning_rate": 0.0003094496805646443,
      "loss": 3.1592,
      "step": 112891
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8945939540863037,
      "learning_rate": 0.00030944559203139075,
      "loss": 2.7807,
      "step": 112892
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9770790338516235,
      "learning_rate": 0.00030944150349638107,
      "loss": 3.1026,
      "step": 112893
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.129356861114502,
      "learning_rate": 0.00030943741495961596,
      "loss": 3.302,
      "step": 112894
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.026118278503418,
      "learning_rate": 0.0003094333264210964,
      "loss": 3.0546,
      "step": 112895
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3671886920928955,
      "learning_rate": 0.0003094292378808229,
      "loss": 3.1638,
      "step": 112896
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.341285467147827,
      "learning_rate": 0.0003094251493387964,
      "loss": 3.1137,
      "step": 112897
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.170307159423828,
      "learning_rate": 0.0003094210607950175,
      "loss": 3.0505,
      "step": 112898
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9258577823638916,
      "learning_rate": 0.00030941697224948705,
      "loss": 2.9048,
      "step": 112899
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7557517290115356,
      "learning_rate": 0.0003094128837022059,
      "loss": 3.1453,
      "step": 112900
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4271798133850098,
      "learning_rate": 0.00030940879515317464,
      "loss": 2.8706,
      "step": 112901
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.6455821990966797,
      "learning_rate": 0.0003094047066023941,
      "loss": 3.2397,
      "step": 112902
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8324580192565918,
      "learning_rate": 0.00030940061804986507,
      "loss": 2.8178,
      "step": 112903
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0419580936431885,
      "learning_rate": 0.0003093965294955883,
      "loss": 2.9075,
      "step": 112904
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.5986738204956055,
      "learning_rate": 0.00030939244093956456,
      "loss": 3.0127,
      "step": 112905
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0010058879852295,
      "learning_rate": 0.00030938835238179455,
      "loss": 2.9646,
      "step": 112906
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.913545846939087,
      "learning_rate": 0.0003093842638222791,
      "loss": 3.077,
      "step": 112907
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.069761037826538,
      "learning_rate": 0.0003093801752610189,
      "loss": 3.0453,
      "step": 112908
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.610085368156433,
      "learning_rate": 0.00030937608669801475,
      "loss": 2.8679,
      "step": 112909
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1630921363830566,
      "learning_rate": 0.00030937199813326746,
      "loss": 2.9368,
      "step": 112910
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8397831916809082,
      "learning_rate": 0.0003093679095667777,
      "loss": 2.9944,
      "step": 112911
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6441047191619873,
      "learning_rate": 0.0003093638209985462,
      "loss": 2.8248,
      "step": 112912
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8287675380706787,
      "learning_rate": 0.00030935973242857394,
      "loss": 2.8996,
      "step": 112913
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2474493980407715,
      "learning_rate": 0.0003093556438568614,
      "loss": 2.9598,
      "step": 112914
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8535594940185547,
      "learning_rate": 0.00030935155528340956,
      "loss": 3.0983,
      "step": 112915
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.969329595565796,
      "learning_rate": 0.0003093474667082191,
      "loss": 2.8544,
      "step": 112916
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8484246730804443,
      "learning_rate": 0.00030934337813129067,
      "loss": 3.201,
      "step": 112917
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8613159656524658,
      "learning_rate": 0.00030933928955262515,
      "loss": 3.0662,
      "step": 112918
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.096405506134033,
      "learning_rate": 0.0003093352009722233,
      "loss": 3.1501,
      "step": 112919
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0933218002319336,
      "learning_rate": 0.0003093311123900859,
      "loss": 2.879,
      "step": 112920
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8556846380233765,
      "learning_rate": 0.0003093270238062136,
      "loss": 2.999,
      "step": 112921
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.013371229171753,
      "learning_rate": 0.00030932293522060734,
      "loss": 3.2056,
      "step": 112922
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0563833713531494,
      "learning_rate": 0.0003093188466332676,
      "loss": 2.8431,
      "step": 112923
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2228572368621826,
      "learning_rate": 0.00030931475804419544,
      "loss": 3.0183,
      "step": 112924
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8867131471633911,
      "learning_rate": 0.00030931066945339146,
      "loss": 3.09,
      "step": 112925
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6963903903961182,
      "learning_rate": 0.00030930658086085645,
      "loss": 2.7899,
      "step": 112926
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9786628484725952,
      "learning_rate": 0.00030930249226659117,
      "loss": 2.8647,
      "step": 112927
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1434075832366943,
      "learning_rate": 0.0003092984036705964,
      "loss": 3.0488,
      "step": 112928
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2734391689300537,
      "learning_rate": 0.0003092943150728729,
      "loss": 3.2054,
      "step": 112929
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3256590366363525,
      "learning_rate": 0.0003092902264734213,
      "loss": 2.9285,
      "step": 112930
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9450546503067017,
      "learning_rate": 0.00030928613787224263,
      "loss": 2.8474,
      "step": 112931
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.7254762649536133,
      "learning_rate": 0.0003092820492693374,
      "loss": 3.0525,
      "step": 112932
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9949325323104858,
      "learning_rate": 0.0003092779606647064,
      "loss": 2.9589,
      "step": 112933
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0030534267425537,
      "learning_rate": 0.0003092738720583505,
      "loss": 2.9127,
      "step": 112934
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6927982568740845,
      "learning_rate": 0.00030926978345027046,
      "loss": 2.8231,
      "step": 112935
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.903923511505127,
      "learning_rate": 0.00030926569484046704,
      "loss": 3.0013,
      "step": 112936
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8195996284484863,
      "learning_rate": 0.0003092616062289408,
      "loss": 2.9803,
      "step": 112937
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8801757097244263,
      "learning_rate": 0.0003092575176156928,
      "loss": 3.2185,
      "step": 112938
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.263686180114746,
      "learning_rate": 0.0003092534290007235,
      "loss": 3.0979,
      "step": 112939
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9052488803863525,
      "learning_rate": 0.0003092493403840339,
      "loss": 3.0711,
      "step": 112940
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.877968430519104,
      "learning_rate": 0.0003092452517656247,
      "loss": 3.199,
      "step": 112941
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9220396280288696,
      "learning_rate": 0.0003092411631454966,
      "loss": 2.912,
      "step": 112942
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.140563488006592,
      "learning_rate": 0.0003092370745236504,
      "loss": 2.9371,
      "step": 112943
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4660816192626953,
      "learning_rate": 0.0003092329859000869,
      "loss": 3.1335,
      "step": 112944
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.090888500213623,
      "learning_rate": 0.00030922889727480674,
      "loss": 3.0452,
      "step": 112945
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9575798511505127,
      "learning_rate": 0.00030922480864781075,
      "loss": 3.0422,
      "step": 112946
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.6237034797668457,
      "learning_rate": 0.00030922072001909976,
      "loss": 2.9294,
      "step": 112947
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.582665205001831,
      "learning_rate": 0.00030921663138867443,
      "loss": 2.9703,
      "step": 112948
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9539892673492432,
      "learning_rate": 0.00030921254275653547,
      "loss": 2.8939,
      "step": 112949
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.753613829612732,
      "learning_rate": 0.00030920845412268385,
      "loss": 2.9403,
      "step": 112950
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.650036096572876,
      "learning_rate": 0.00030920436548712016,
      "loss": 3.1389,
      "step": 112951
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.9356191158294678,
      "learning_rate": 0.0003092002768498452,
      "loss": 2.9236,
      "step": 112952
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9439723491668701,
      "learning_rate": 0.00030919618821085974,
      "loss": 3.0581,
      "step": 112953
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.848177433013916,
      "learning_rate": 0.0003091920995701646,
      "loss": 3.1294,
      "step": 112954
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.668975591659546,
      "learning_rate": 0.00030918801092776033,
      "loss": 2.8906,
      "step": 112955
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.043140411376953,
      "learning_rate": 0.000309183922283648,
      "loss": 2.9624,
      "step": 112956
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1700685024261475,
      "learning_rate": 0.00030917983363782807,
      "loss": 2.9166,
      "step": 112957
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.810234785079956,
      "learning_rate": 0.00030917574499030146,
      "loss": 2.6565,
      "step": 112958
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7600871324539185,
      "learning_rate": 0.000309171656341069,
      "loss": 2.973,
      "step": 112959
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.5750837326049805,
      "learning_rate": 0.00030916756769013123,
      "loss": 2.9705,
      "step": 112960
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.956838607788086,
      "learning_rate": 0.00030916347903748903,
      "loss": 2.7476,
      "step": 112961
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.21734881401062,
      "learning_rate": 0.0003091593903831433,
      "loss": 3.0845,
      "step": 112962
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9493601322174072,
      "learning_rate": 0.00030915530172709457,
      "loss": 2.9364,
      "step": 112963
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8562787771224976,
      "learning_rate": 0.00030915121306934377,
      "loss": 2.8861,
      "step": 112964
    },
    {
      "epoch": 1.47,
      "grad_norm": 4.13769006729126,
      "learning_rate": 0.00030914712440989153,
      "loss": 2.9989,
      "step": 112965
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.029731512069702,
      "learning_rate": 0.00030914303574873866,
      "loss": 2.9666,
      "step": 112966
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9470714330673218,
      "learning_rate": 0.00030913894708588593,
      "loss": 3.2846,
      "step": 112967
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9544488191604614,
      "learning_rate": 0.00030913485842133415,
      "loss": 3.0465,
      "step": 112968
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9299650192260742,
      "learning_rate": 0.00030913076975508396,
      "loss": 3.0313,
      "step": 112969
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9070395231246948,
      "learning_rate": 0.00030912668108713624,
      "loss": 3.1235,
      "step": 112970
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9084863662719727,
      "learning_rate": 0.00030912259241749164,
      "loss": 2.8003,
      "step": 112971
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9792561531066895,
      "learning_rate": 0.000309118503746151,
      "loss": 2.7212,
      "step": 112972
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.256636142730713,
      "learning_rate": 0.0003091144150731151,
      "loss": 2.772,
      "step": 112973
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7286171913146973,
      "learning_rate": 0.00030911032639838464,
      "loss": 2.6995,
      "step": 112974
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.187962770462036,
      "learning_rate": 0.0003091062377219604,
      "loss": 2.9652,
      "step": 112975
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6618402004241943,
      "learning_rate": 0.00030910214904384303,
      "loss": 2.7545,
      "step": 112976
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.923705816268921,
      "learning_rate": 0.0003090980603640336,
      "loss": 2.9554,
      "step": 112977
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.310185670852661,
      "learning_rate": 0.0003090939716825325,
      "loss": 3.2306,
      "step": 112978
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9003117084503174,
      "learning_rate": 0.00030908988299934077,
      "loss": 3.0577,
      "step": 112979
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9918251037597656,
      "learning_rate": 0.00030908579431445895,
      "loss": 3.013,
      "step": 112980
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.865334391593933,
      "learning_rate": 0.00030908170562788794,
      "loss": 3.0326,
      "step": 112981
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.42183780670166,
      "learning_rate": 0.00030907761693962856,
      "loss": 2.8455,
      "step": 112982
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.8092973232269287,
      "learning_rate": 0.0003090735282496814,
      "loss": 3.1169,
      "step": 112983
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7901756763458252,
      "learning_rate": 0.00030906943955804733,
      "loss": 3.0286,
      "step": 112984
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7511929273605347,
      "learning_rate": 0.00030906535086472705,
      "loss": 2.6481,
      "step": 112985
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.6052589416503906,
      "learning_rate": 0.00030906126216972133,
      "loss": 3.2104,
      "step": 112986
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.054076671600342,
      "learning_rate": 0.000309057173473031,
      "loss": 2.8884,
      "step": 112987
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.8028125762939453,
      "learning_rate": 0.00030905308477465675,
      "loss": 2.9918,
      "step": 112988
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.8716752529144287,
      "learning_rate": 0.00030904899607459935,
      "loss": 2.6914,
      "step": 112989
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2406766414642334,
      "learning_rate": 0.0003090449073728596,
      "loss": 2.8864,
      "step": 112990
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4028425216674805,
      "learning_rate": 0.00030904081866943824,
      "loss": 3.077,
      "step": 112991
    },
    {
      "epoch": 1.47,
      "grad_norm": 5.217614650726318,
      "learning_rate": 0.000309036729964336,
      "loss": 3.1669,
      "step": 112992
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.167257308959961,
      "learning_rate": 0.00030903264125755367,
      "loss": 2.9578,
      "step": 112993
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.199613571166992,
      "learning_rate": 0.00030902855254909197,
      "loss": 2.8032,
      "step": 112994
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.53595232963562,
      "learning_rate": 0.0003090244638389517,
      "loss": 3.015,
      "step": 112995
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.2557849884033203,
      "learning_rate": 0.0003090203751271337,
      "loss": 2.937,
      "step": 112996
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8059948682785034,
      "learning_rate": 0.0003090162864136385,
      "loss": 3.1624,
      "step": 112997
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2108328342437744,
      "learning_rate": 0.00030901219769846714,
      "loss": 2.7373,
      "step": 112998
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.000584840774536,
      "learning_rate": 0.00030900810898162016,
      "loss": 3.1027,
      "step": 112999
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1517691612243652,
      "learning_rate": 0.00030900402026309837,
      "loss": 2.8771,
      "step": 113000
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0769150257110596,
      "learning_rate": 0.0003089999315429026,
      "loss": 3.0135,
      "step": 113001
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.13034987449646,
      "learning_rate": 0.00030899584282103367,
      "loss": 2.9894,
      "step": 113002
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4584996700286865,
      "learning_rate": 0.00030899175409749213,
      "loss": 2.927,
      "step": 113003
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.075096845626831,
      "learning_rate": 0.00030898766537227885,
      "loss": 3.0631,
      "step": 113004
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1272060871124268,
      "learning_rate": 0.00030898357664539466,
      "loss": 2.9074,
      "step": 113005
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.6237215995788574,
      "learning_rate": 0.00030897948791684015,
      "loss": 3.2584,
      "step": 113006
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8780206441879272,
      "learning_rate": 0.00030897539918661623,
      "loss": 2.9514,
      "step": 113007
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.5056774616241455,
      "learning_rate": 0.00030897131045472373,
      "loss": 3.0468,
      "step": 113008
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4949798583984375,
      "learning_rate": 0.00030896722172116313,
      "loss": 2.866,
      "step": 113009
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9843025207519531,
      "learning_rate": 0.0003089631329859354,
      "loss": 2.9595,
      "step": 113010
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2709145545959473,
      "learning_rate": 0.00030895904424904137,
      "loss": 2.9624,
      "step": 113011
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.9603540897369385,
      "learning_rate": 0.00030895495551048157,
      "loss": 2.9816,
      "step": 113012
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.28442120552063,
      "learning_rate": 0.0003089508667702568,
      "loss": 2.8234,
      "step": 113013
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9182438850402832,
      "learning_rate": 0.00030894677802836807,
      "loss": 2.9032,
      "step": 113014
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7734330892562866,
      "learning_rate": 0.00030894268928481584,
      "loss": 2.8697,
      "step": 113015
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.202583074569702,
      "learning_rate": 0.00030893860053960104,
      "loss": 2.8596,
      "step": 113016
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9682230949401855,
      "learning_rate": 0.00030893451179272443,
      "loss": 3.0049,
      "step": 113017
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1928014755249023,
      "learning_rate": 0.00030893042304418666,
      "loss": 2.8746,
      "step": 113018
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.207345485687256,
      "learning_rate": 0.0003089263342939886,
      "loss": 3.1028,
      "step": 113019
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.516230821609497,
      "learning_rate": 0.00030892224554213095,
      "loss": 3.1294,
      "step": 113020
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8347785472869873,
      "learning_rate": 0.00030891815678861447,
      "loss": 2.8324,
      "step": 113021
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.318796157836914,
      "learning_rate": 0.0003089140680334399,
      "loss": 2.8691,
      "step": 113022
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.159850597381592,
      "learning_rate": 0.0003089099792766081,
      "loss": 2.8944,
      "step": 113023
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9628357887268066,
      "learning_rate": 0.00030890589051811975,
      "loss": 3.0599,
      "step": 113024
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.5141122341156006,
      "learning_rate": 0.0003089018017579756,
      "loss": 2.9305,
      "step": 113025
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.619583010673523,
      "learning_rate": 0.00030889771299617657,
      "loss": 2.8584,
      "step": 113026
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.912185788154602,
      "learning_rate": 0.0003088936242327231,
      "loss": 3.0548,
      "step": 113027
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7804419994354248,
      "learning_rate": 0.00030888953546761623,
      "loss": 3.2187,
      "step": 113028
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1631124019622803,
      "learning_rate": 0.00030888544670085665,
      "loss": 2.9621,
      "step": 113029
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.021214246749878,
      "learning_rate": 0.000308881357932445,
      "loss": 2.6997,
      "step": 113030
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.5852750539779663,
      "learning_rate": 0.0003088772691623822,
      "loss": 3.4188,
      "step": 113031
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.308485984802246,
      "learning_rate": 0.000308873180390669,
      "loss": 3.0731,
      "step": 113032
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8452165126800537,
      "learning_rate": 0.00030886909161730605,
      "loss": 2.8745,
      "step": 113033
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8924980163574219,
      "learning_rate": 0.0003088650028422942,
      "loss": 3.1004,
      "step": 113034
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.223940134048462,
      "learning_rate": 0.0003088609140656341,
      "loss": 2.9493,
      "step": 113035
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4743406772613525,
      "learning_rate": 0.00030885682528732664,
      "loss": 2.8509,
      "step": 113036
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2535531520843506,
      "learning_rate": 0.0003088527365073726,
      "loss": 2.9526,
      "step": 113037
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.363030195236206,
      "learning_rate": 0.0003088486477257726,
      "loss": 3.0809,
      "step": 113038
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4940426349639893,
      "learning_rate": 0.0003088445589425274,
      "loss": 3.1105,
      "step": 113039
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.549837350845337,
      "learning_rate": 0.00030884047015763787,
      "loss": 3.2215,
      "step": 113040
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7628728151321411,
      "learning_rate": 0.0003088363813711049,
      "loss": 3.0068,
      "step": 113041
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0961828231811523,
      "learning_rate": 0.00030883229258292885,
      "loss": 2.7962,
      "step": 113042
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.177635431289673,
      "learning_rate": 0.0003088282037931108,
      "loss": 3.1673,
      "step": 113043
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9099301099777222,
      "learning_rate": 0.00030882411500165145,
      "loss": 3.1154,
      "step": 113044
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0413122177124023,
      "learning_rate": 0.00030882002620855146,
      "loss": 2.9222,
      "step": 113045
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2333438396453857,
      "learning_rate": 0.00030881593741381166,
      "loss": 2.809,
      "step": 113046
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8547253608703613,
      "learning_rate": 0.0003088118486174329,
      "loss": 2.9578,
      "step": 113047
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9996906518936157,
      "learning_rate": 0.00030880775981941585,
      "loss": 2.9992,
      "step": 113048
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6795958280563354,
      "learning_rate": 0.0003088036710197612,
      "loss": 2.8437,
      "step": 113049
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1794652938842773,
      "learning_rate": 0.00030879958221846984,
      "loss": 3.1184,
      "step": 113050
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2428157329559326,
      "learning_rate": 0.0003087954934155424,
      "loss": 3.0599,
      "step": 113051
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8396060466766357,
      "learning_rate": 0.00030879140461097975,
      "loss": 2.9843,
      "step": 113052
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1181557178497314,
      "learning_rate": 0.00030878731580478264,
      "loss": 3.2029,
      "step": 113053
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0444116592407227,
      "learning_rate": 0.0003087832269969517,
      "loss": 3.0939,
      "step": 113054
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2585184574127197,
      "learning_rate": 0.0003087791381874879,
      "loss": 3.0072,
      "step": 113055
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2669475078582764,
      "learning_rate": 0.0003087750493763919,
      "loss": 2.9171,
      "step": 113056
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0983967781066895,
      "learning_rate": 0.00030877096056366435,
      "loss": 3.0511,
      "step": 113057
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3478944301605225,
      "learning_rate": 0.0003087668717493062,
      "loss": 3.1865,
      "step": 113058
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1242921352386475,
      "learning_rate": 0.00030876278293331814,
      "loss": 2.9737,
      "step": 113059
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3289239406585693,
      "learning_rate": 0.00030875869411570086,
      "loss": 3.122,
      "step": 113060
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.007741928100586,
      "learning_rate": 0.0003087546052964551,
      "loss": 3.1541,
      "step": 113061
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8090054988861084,
      "learning_rate": 0.00030875051647558186,
      "loss": 3.0264,
      "step": 113062
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.097660779953003,
      "learning_rate": 0.0003087464276530816,
      "loss": 2.9342,
      "step": 113063
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8735952377319336,
      "learning_rate": 0.00030874233882895527,
      "loss": 2.9808,
      "step": 113064
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7629748582839966,
      "learning_rate": 0.0003087382500032036,
      "loss": 2.9725,
      "step": 113065
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6673011779785156,
      "learning_rate": 0.00030873416117582726,
      "loss": 3.1449,
      "step": 113066
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.6976077556610107,
      "learning_rate": 0.00030873007234682716,
      "loss": 3.0783,
      "step": 113067
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2274200916290283,
      "learning_rate": 0.00030872598351620385,
      "loss": 2.941,
      "step": 113068
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7982593774795532,
      "learning_rate": 0.00030872189468395835,
      "loss": 3.0646,
      "step": 113069
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.9618115425109863,
      "learning_rate": 0.00030871780585009116,
      "loss": 3.1169,
      "step": 113070
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0779237747192383,
      "learning_rate": 0.00030871371701460325,
      "loss": 3.0025,
      "step": 113071
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9781668186187744,
      "learning_rate": 0.0003087096281774952,
      "loss": 2.9319,
      "step": 113072
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.736451268196106,
      "learning_rate": 0.00030870553933876803,
      "loss": 3.0235,
      "step": 113073
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6204410791397095,
      "learning_rate": 0.0003087014504984222,
      "loss": 3.0608,
      "step": 113074
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.7721900939941406,
      "learning_rate": 0.00030869736165645866,
      "loss": 3.002,
      "step": 113075
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7310842275619507,
      "learning_rate": 0.0003086932728128781,
      "loss": 3.1644,
      "step": 113076
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7153348922729492,
      "learning_rate": 0.0003086891839676813,
      "loss": 2.7167,
      "step": 113077
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.5312235355377197,
      "learning_rate": 0.00030868509512086903,
      "loss": 3.1601,
      "step": 113078
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7315641641616821,
      "learning_rate": 0.000308681006272442,
      "loss": 2.9204,
      "step": 113079
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3247838020324707,
      "learning_rate": 0.00030867691742240104,
      "loss": 2.9264,
      "step": 113080
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9848010540008545,
      "learning_rate": 0.00030867282857074686,
      "loss": 3.0817,
      "step": 113081
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9856363534927368,
      "learning_rate": 0.00030866873971748026,
      "loss": 3.0837,
      "step": 113082
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9918065071105957,
      "learning_rate": 0.000308664650862602,
      "loss": 2.8566,
      "step": 113083
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.7478291988372803,
      "learning_rate": 0.00030866056200611275,
      "loss": 2.9858,
      "step": 113084
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.016758441925049,
      "learning_rate": 0.00030865647314801337,
      "loss": 3.2099,
      "step": 113085
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.2858574390411377,
      "learning_rate": 0.00030865238428830455,
      "loss": 2.8392,
      "step": 113086
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.629971742630005,
      "learning_rate": 0.0003086482954269872,
      "loss": 3.0339,
      "step": 113087
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2302398681640625,
      "learning_rate": 0.0003086442065640618,
      "loss": 3.0402,
      "step": 113088
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7182327508926392,
      "learning_rate": 0.00030864011769952943,
      "loss": 3.1943,
      "step": 113089
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1968209743499756,
      "learning_rate": 0.0003086360288333907,
      "loss": 3.0315,
      "step": 113090
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1497251987457275,
      "learning_rate": 0.0003086319399656463,
      "loss": 3.1407,
      "step": 113091
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.190122127532959,
      "learning_rate": 0.000308627851096297,
      "loss": 2.9927,
      "step": 113092
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.113502025604248,
      "learning_rate": 0.00030862376222534377,
      "loss": 3.1153,
      "step": 113093
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8801865577697754,
      "learning_rate": 0.00030861967335278715,
      "loss": 2.8312,
      "step": 113094
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9023295640945435,
      "learning_rate": 0.00030861558447862786,
      "loss": 2.9321,
      "step": 113095
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.820271372795105,
      "learning_rate": 0.000308611495602867,
      "loss": 2.6477,
      "step": 113096
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.413398265838623,
      "learning_rate": 0.00030860740672550497,
      "loss": 2.7743,
      "step": 113097
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.942957878112793,
      "learning_rate": 0.0003086033178465426,
      "loss": 3.0395,
      "step": 113098
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1616013050079346,
      "learning_rate": 0.0003085992289659808,
      "loss": 3.0361,
      "step": 113099
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8670814037322998,
      "learning_rate": 0.0003085951400838202,
      "loss": 2.9638,
      "step": 113100
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1028623580932617,
      "learning_rate": 0.00030859105120006165,
      "loss": 3.0288,
      "step": 113101
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9430254697799683,
      "learning_rate": 0.00030858696231470586,
      "loss": 3.1123,
      "step": 113102
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.046854019165039,
      "learning_rate": 0.00030858287342775347,
      "loss": 3.1284,
      "step": 113103
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7065473794937134,
      "learning_rate": 0.00030857878453920546,
      "loss": 2.7119,
      "step": 113104
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9472728967666626,
      "learning_rate": 0.00030857469564906254,
      "loss": 3.0745,
      "step": 113105
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0418856143951416,
      "learning_rate": 0.00030857060675732535,
      "loss": 3.0804,
      "step": 113106
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.276533365249634,
      "learning_rate": 0.0003085665178639947,
      "loss": 3.0204,
      "step": 113107
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0542409420013428,
      "learning_rate": 0.0003085624289690715,
      "loss": 2.8041,
      "step": 113108
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.115554094314575,
      "learning_rate": 0.0003085583400725563,
      "loss": 3.0243,
      "step": 113109
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3352622985839844,
      "learning_rate": 0.0003085542511744499,
      "loss": 3.002,
      "step": 113110
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0841715335845947,
      "learning_rate": 0.00030855016227475314,
      "loss": 2.9904,
      "step": 113111
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4749245643615723,
      "learning_rate": 0.0003085460733734667,
      "loss": 2.893,
      "step": 113112
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0243308544158936,
      "learning_rate": 0.00030854198447059143,
      "loss": 3.2782,
      "step": 113113
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.966975450515747,
      "learning_rate": 0.00030853789556612804,
      "loss": 3.0466,
      "step": 113114
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.762367844581604,
      "learning_rate": 0.0003085338066600773,
      "loss": 3.2026,
      "step": 113115
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9407082796096802,
      "learning_rate": 0.00030852971775244,
      "loss": 3.0439,
      "step": 113116
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.5817006826400757,
      "learning_rate": 0.00030852562884321684,
      "loss": 3.1295,
      "step": 113117
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7904624938964844,
      "learning_rate": 0.00030852153993240854,
      "loss": 3.1654,
      "step": 113118
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6789040565490723,
      "learning_rate": 0.000308517451020016,
      "loss": 3.1854,
      "step": 113119
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9115554094314575,
      "learning_rate": 0.00030851336210603993,
      "loss": 2.911,
      "step": 113120
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7996426820755005,
      "learning_rate": 0.000308509273190481,
      "loss": 2.7904,
      "step": 113121
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0583417415618896,
      "learning_rate": 0.00030850518427334,
      "loss": 2.878,
      "step": 113122
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6648197174072266,
      "learning_rate": 0.00030850109535461784,
      "loss": 3.0883,
      "step": 113123
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7036887407302856,
      "learning_rate": 0.000308497006434315,
      "loss": 2.9192,
      "step": 113124
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9342752695083618,
      "learning_rate": 0.00030849291751243257,
      "loss": 3.1024,
      "step": 113125
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.142446756362915,
      "learning_rate": 0.00030848882858897114,
      "loss": 3.2421,
      "step": 113126
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7354480028152466,
      "learning_rate": 0.0003084847396639314,
      "loss": 3.122,
      "step": 113127
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7332127094268799,
      "learning_rate": 0.00030848065073731416,
      "loss": 3.0947,
      "step": 113128
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.055332660675049,
      "learning_rate": 0.0003084765618091203,
      "loss": 3.0712,
      "step": 113129
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.3484601974487305,
      "learning_rate": 0.0003084724728793504,
      "loss": 2.8521,
      "step": 113130
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9565447568893433,
      "learning_rate": 0.00030846838394800534,
      "loss": 2.9512,
      "step": 113131
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.1204893589019775,
      "learning_rate": 0.00030846429501508587,
      "loss": 3.1141,
      "step": 113132
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.502277374267578,
      "learning_rate": 0.0003084602060805927,
      "loss": 3.1531,
      "step": 113133
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.084357976913452,
      "learning_rate": 0.00030845611714452664,
      "loss": 3.1667,
      "step": 113134
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.954406976699829,
      "learning_rate": 0.0003084520282068884,
      "loss": 3.1797,
      "step": 113135
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0838170051574707,
      "learning_rate": 0.0003084479392676789,
      "loss": 3.0243,
      "step": 113136
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8494528532028198,
      "learning_rate": 0.00030844385032689856,
      "loss": 3.2936,
      "step": 113137
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4582207202911377,
      "learning_rate": 0.00030843976138454843,
      "loss": 3.2295,
      "step": 113138
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7133861780166626,
      "learning_rate": 0.00030843567244062925,
      "loss": 2.9511,
      "step": 113139
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8729826211929321,
      "learning_rate": 0.00030843158349514165,
      "loss": 2.8729,
      "step": 113140
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6453174352645874,
      "learning_rate": 0.00030842749454808646,
      "loss": 2.9482,
      "step": 113141
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.199122428894043,
      "learning_rate": 0.00030842340559946455,
      "loss": 3.0892,
      "step": 113142
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.482586622238159,
      "learning_rate": 0.00030841931664927645,
      "loss": 3.1313,
      "step": 113143
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0911827087402344,
      "learning_rate": 0.00030841522769752303,
      "loss": 2.9399,
      "step": 113144
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.661738395690918,
      "learning_rate": 0.00030841113874420517,
      "loss": 2.8679,
      "step": 113145
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.746648073196411,
      "learning_rate": 0.0003084070497893234,
      "loss": 2.8343,
      "step": 113146
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.5366008281707764,
      "learning_rate": 0.00030840296083287864,
      "loss": 2.97,
      "step": 113147
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6485563516616821,
      "learning_rate": 0.00030839887187487165,
      "loss": 3.0323,
      "step": 113148
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8185070753097534,
      "learning_rate": 0.00030839478291530315,
      "loss": 3.1985,
      "step": 113149
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.968740463256836,
      "learning_rate": 0.0003083906939541738,
      "loss": 3.0585,
      "step": 113150
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.965229868888855,
      "learning_rate": 0.00030838660499148466,
      "loss": 2.9816,
      "step": 113151
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.5993212461471558,
      "learning_rate": 0.00030838251602723614,
      "loss": 3.1328,
      "step": 113152
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0790762901306152,
      "learning_rate": 0.00030837842706142913,
      "loss": 3.3201,
      "step": 113153
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.798297166824341,
      "learning_rate": 0.00030837433809406456,
      "loss": 3.0618,
      "step": 113154
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9459673166275024,
      "learning_rate": 0.0003083702491251429,
      "loss": 3.0236,
      "step": 113155
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6316330432891846,
      "learning_rate": 0.0003083661601546651,
      "loss": 3.0631,
      "step": 113156
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.087144136428833,
      "learning_rate": 0.0003083620711826319,
      "loss": 2.8538,
      "step": 113157
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9973008632659912,
      "learning_rate": 0.000308357982209044,
      "loss": 3.4468,
      "step": 113158
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.6028666496276855,
      "learning_rate": 0.0003083538932339022,
      "loss": 2.7154,
      "step": 113159
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.7957170009613037,
      "learning_rate": 0.0003083498042572072,
      "loss": 2.937,
      "step": 113160
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.426342248916626,
      "learning_rate": 0.0003083457152789599,
      "loss": 2.9938,
      "step": 113161
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.9372029304504395,
      "learning_rate": 0.0003083416262991609,
      "loss": 2.8423,
      "step": 113162
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.963310956954956,
      "learning_rate": 0.00030833753731781107,
      "loss": 2.9642,
      "step": 113163
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1267354488372803,
      "learning_rate": 0.0003083334483349112,
      "loss": 2.957,
      "step": 113164
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.4742043018341064,
      "learning_rate": 0.0003083293593504619,
      "loss": 3.1478,
      "step": 113165
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9098635911941528,
      "learning_rate": 0.000308325270364464,
      "loss": 3.0003,
      "step": 113166
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0951006412506104,
      "learning_rate": 0.00030832118137691833,
      "loss": 2.8773,
      "step": 113167
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.829193353652954,
      "learning_rate": 0.00030831709238782564,
      "loss": 2.9843,
      "step": 113168
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.3039450645446777,
      "learning_rate": 0.00030831300339718656,
      "loss": 3.0916,
      "step": 113169
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.5651705265045166,
      "learning_rate": 0.0003083089144050019,
      "loss": 2.9469,
      "step": 113170
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.5509189367294312,
      "learning_rate": 0.00030830482541127256,
      "loss": 3.0561,
      "step": 113171
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.505558729171753,
      "learning_rate": 0.00030830073641599916,
      "loss": 3.1149,
      "step": 113172
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.02681827545166,
      "learning_rate": 0.0003082966474191825,
      "loss": 3.0533,
      "step": 113173
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8385984897613525,
      "learning_rate": 0.00030829255842082325,
      "loss": 3.038,
      "step": 113174
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7444242238998413,
      "learning_rate": 0.00030828846942092237,
      "loss": 3.1501,
      "step": 113175
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8046987056732178,
      "learning_rate": 0.00030828438041948045,
      "loss": 3.0158,
      "step": 113176
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7786719799041748,
      "learning_rate": 0.0003082802914164983,
      "loss": 3.0123,
      "step": 113177
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.976021409034729,
      "learning_rate": 0.0003082762024119768,
      "loss": 3.2307,
      "step": 113178
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.947234034538269,
      "learning_rate": 0.00030827211340591655,
      "loss": 2.9366,
      "step": 113179
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7431082725524902,
      "learning_rate": 0.00030826802439831827,
      "loss": 3.0877,
      "step": 113180
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9844847917556763,
      "learning_rate": 0.00030826393538918286,
      "loss": 2.9093,
      "step": 113181
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9749579429626465,
      "learning_rate": 0.0003082598463785111,
      "loss": 2.8744,
      "step": 113182
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6243313550949097,
      "learning_rate": 0.0003082557573663035,
      "loss": 2.7429,
      "step": 113183
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8565998077392578,
      "learning_rate": 0.0003082516683525612,
      "loss": 2.9111,
      "step": 113184
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8257147073745728,
      "learning_rate": 0.00030824757933728457,
      "loss": 3.1784,
      "step": 113185
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0384469032287598,
      "learning_rate": 0.0003082434903204747,
      "loss": 2.8576,
      "step": 113186
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1583075523376465,
      "learning_rate": 0.0003082394013021322,
      "loss": 2.8949,
      "step": 113187
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6220803260803223,
      "learning_rate": 0.00030823531228225774,
      "loss": 2.8671,
      "step": 113188
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.033942937850952,
      "learning_rate": 0.00030823122326085225,
      "loss": 2.9875,
      "step": 113189
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.6928822994232178,
      "learning_rate": 0.0003082271342379165,
      "loss": 2.8692,
      "step": 113190
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9972270727157593,
      "learning_rate": 0.000308223045213451,
      "loss": 3.3781,
      "step": 113191
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.91512131690979,
      "learning_rate": 0.0003082189561874568,
      "loss": 2.8603,
      "step": 113192
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0632336139678955,
      "learning_rate": 0.00030821486715993453,
      "loss": 3.0688,
      "step": 113193
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8747868537902832,
      "learning_rate": 0.0003082107781308849,
      "loss": 3.1503,
      "step": 113194
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0558876991271973,
      "learning_rate": 0.0003082066891003087,
      "loss": 3.0071,
      "step": 113195
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2009122371673584,
      "learning_rate": 0.00030820260006820686,
      "loss": 3.0175,
      "step": 113196
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.5261547565460205,
      "learning_rate": 0.00030819851103458,
      "loss": 3.2171,
      "step": 113197
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8925347328186035,
      "learning_rate": 0.00030819442199942875,
      "loss": 3.1225,
      "step": 113198
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.910347580909729,
      "learning_rate": 0.0003081903329627541,
      "loss": 2.5469,
      "step": 113199
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8807700872421265,
      "learning_rate": 0.00030818624392455667,
      "loss": 2.9265,
      "step": 113200
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9926482439041138,
      "learning_rate": 0.00030818215488483727,
      "loss": 2.8801,
      "step": 113201
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.315823793411255,
      "learning_rate": 0.00030817806584359657,
      "loss": 3.1117,
      "step": 113202
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.171743392944336,
      "learning_rate": 0.0003081739768008356,
      "loss": 3.0487,
      "step": 113203
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.401324987411499,
      "learning_rate": 0.0003081698877565548,
      "loss": 3.0279,
      "step": 113204
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.061511754989624,
      "learning_rate": 0.00030816579871075503,
      "loss": 2.7248,
      "step": 113205
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7754839658737183,
      "learning_rate": 0.0003081617096634372,
      "loss": 3.0316,
      "step": 113206
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4818854331970215,
      "learning_rate": 0.0003081576206146019,
      "loss": 3.0328,
      "step": 113207
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8004366159439087,
      "learning_rate": 0.0003081535315642499,
      "loss": 2.9845,
      "step": 113208
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.855657935142517,
      "learning_rate": 0.0003081494425123821,
      "loss": 3.1814,
      "step": 113209
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.050860643386841,
      "learning_rate": 0.0003081453534589991,
      "loss": 2.7741,
      "step": 113210
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.93056058883667,
      "learning_rate": 0.00030814126440410175,
      "loss": 3.1424,
      "step": 113211
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1584980487823486,
      "learning_rate": 0.00030813717534769084,
      "loss": 2.7226,
      "step": 113212
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.94581937789917,
      "learning_rate": 0.000308133086289767,
      "loss": 2.8555,
      "step": 113213
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.853391408920288,
      "learning_rate": 0.00030812899723033104,
      "loss": 2.8971,
      "step": 113214
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3493282794952393,
      "learning_rate": 0.0003081249081693838,
      "loss": 2.9946,
      "step": 113215
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6251914501190186,
      "learning_rate": 0.00030812081910692595,
      "loss": 3.0246,
      "step": 113216
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8361284732818604,
      "learning_rate": 0.00030811673004295826,
      "loss": 3.0343,
      "step": 113217
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7218228578567505,
      "learning_rate": 0.00030811264097748163,
      "loss": 2.9847,
      "step": 113218
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3448426723480225,
      "learning_rate": 0.00030810855191049666,
      "loss": 2.9523,
      "step": 113219
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8421517610549927,
      "learning_rate": 0.0003081044628420041,
      "loss": 3.0972,
      "step": 113220
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8656655550003052,
      "learning_rate": 0.00030810037377200484,
      "loss": 3.1691,
      "step": 113221
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7772372961044312,
      "learning_rate": 0.0003080962847004995,
      "loss": 3.1088,
      "step": 113222
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.84481680393219,
      "learning_rate": 0.0003080921956274889,
      "loss": 2.9119,
      "step": 113223
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8807621002197266,
      "learning_rate": 0.0003080881065529739,
      "loss": 3.1164,
      "step": 113224
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0531938076019287,
      "learning_rate": 0.00030808401747695513,
      "loss": 2.881,
      "step": 113225
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8319110870361328,
      "learning_rate": 0.00030807992839943334,
      "loss": 3.1103,
      "step": 113226
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9426747560501099,
      "learning_rate": 0.00030807583932040945,
      "loss": 2.9465,
      "step": 113227
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0510284900665283,
      "learning_rate": 0.000308071750239884,
      "loss": 2.8752,
      "step": 113228
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.63824725151062,
      "learning_rate": 0.0003080676611578579,
      "loss": 2.9126,
      "step": 113229
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.367161989212036,
      "learning_rate": 0.0003080635720743319,
      "loss": 3.2523,
      "step": 113230
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9419351816177368,
      "learning_rate": 0.0003080594829893066,
      "loss": 2.9182,
      "step": 113231
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.5401968955993652,
      "learning_rate": 0.00030805539390278295,
      "loss": 3.0903,
      "step": 113232
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4290249347686768,
      "learning_rate": 0.0003080513048147618,
      "loss": 3.0513,
      "step": 113233
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9559334516525269,
      "learning_rate": 0.0003080472157252436,
      "loss": 2.9587,
      "step": 113234
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3957858085632324,
      "learning_rate": 0.0003080431266342293,
      "loss": 2.9439,
      "step": 113235
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.763603925704956,
      "learning_rate": 0.0003080390375417197,
      "loss": 2.9878,
      "step": 113236
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.926397442817688,
      "learning_rate": 0.00030803494844771533,
      "loss": 2.8224,
      "step": 113237
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.461942672729492,
      "learning_rate": 0.00030803085935221725,
      "loss": 2.8395,
      "step": 113238
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4213547706604004,
      "learning_rate": 0.00030802677025522606,
      "loss": 3.061,
      "step": 113239
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.918283224105835,
      "learning_rate": 0.00030802268115674253,
      "loss": 3.1738,
      "step": 113240
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.7132320404052734,
      "learning_rate": 0.0003080185920567674,
      "loss": 2.9479,
      "step": 113241
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.2835123538970947,
      "learning_rate": 0.00030801450295530155,
      "loss": 2.9697,
      "step": 113242
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7965830564498901,
      "learning_rate": 0.00030801041385234557,
      "loss": 3.0722,
      "step": 113243
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.580948829650879,
      "learning_rate": 0.0003080063247479003,
      "loss": 2.8767,
      "step": 113244
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.596494674682617,
      "learning_rate": 0.0003080022356419666,
      "loss": 3.0035,
      "step": 113245
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.245300769805908,
      "learning_rate": 0.00030799814653454505,
      "loss": 3.1302,
      "step": 113246
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.5317400693893433,
      "learning_rate": 0.0003079940574256365,
      "loss": 2.8949,
      "step": 113247
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.340219736099243,
      "learning_rate": 0.0003079899683152417,
      "loss": 3.0694,
      "step": 113248
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0741121768951416,
      "learning_rate": 0.0003079858792033614,
      "loss": 2.8704,
      "step": 113249
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.11692476272583,
      "learning_rate": 0.0003079817900899964,
      "loss": 2.833,
      "step": 113250
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.2613255977630615,
      "learning_rate": 0.00030797770097514747,
      "loss": 2.9483,
      "step": 113251
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.044597625732422,
      "learning_rate": 0.00030797361185881526,
      "loss": 2.8599,
      "step": 113252
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.670409679412842,
      "learning_rate": 0.0003079695227410007,
      "loss": 2.7568,
      "step": 113253
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.6994695663452148,
      "learning_rate": 0.00030796543362170435,
      "loss": 3.0224,
      "step": 113254
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8905609846115112,
      "learning_rate": 0.00030796134450092707,
      "loss": 3.002,
      "step": 113255
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0651659965515137,
      "learning_rate": 0.00030795725537866974,
      "loss": 2.5853,
      "step": 113256
    },
    {
      "epoch": 1.47,
      "grad_norm": 3.196390390396118,
      "learning_rate": 0.0003079531662549329,
      "loss": 2.7358,
      "step": 113257
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.632234811782837,
      "learning_rate": 0.0003079490771297175,
      "loss": 2.9081,
      "step": 113258
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.9225246906280518,
      "learning_rate": 0.00030794498800302417,
      "loss": 2.9472,
      "step": 113259
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0891966819763184,
      "learning_rate": 0.00030794089887485376,
      "loss": 3.0255,
      "step": 113260
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7952677011489868,
      "learning_rate": 0.00030793680974520695,
      "loss": 2.6196,
      "step": 113261
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8916391134262085,
      "learning_rate": 0.00030793272061408454,
      "loss": 3.0077,
      "step": 113262
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8407886028289795,
      "learning_rate": 0.00030792863148148734,
      "loss": 2.8767,
      "step": 113263
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9607257843017578,
      "learning_rate": 0.00030792454234741595,
      "loss": 2.9842,
      "step": 113264
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.7698779106140137,
      "learning_rate": 0.0003079204532118713,
      "loss": 2.9931,
      "step": 113265
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.419400215148926,
      "learning_rate": 0.0003079163640748541,
      "loss": 3.2127,
      "step": 113266
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.1315226554870605,
      "learning_rate": 0.00030791227493636505,
      "loss": 3.0676,
      "step": 113267
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0404388904571533,
      "learning_rate": 0.00030790818579640503,
      "loss": 2.801,
      "step": 113268
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.186763286590576,
      "learning_rate": 0.0003079040966549746,
      "loss": 3.371,
      "step": 113269
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.0227205753326416,
      "learning_rate": 0.0003079000075120748,
      "loss": 3.1509,
      "step": 113270
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.530532121658325,
      "learning_rate": 0.00030789591836770613,
      "loss": 3.1569,
      "step": 113271
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.297663688659668,
      "learning_rate": 0.0003078918292218695,
      "loss": 2.9887,
      "step": 113272
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.083876371383667,
      "learning_rate": 0.0003078877400745657,
      "loss": 2.9624,
      "step": 113273
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.813660740852356,
      "learning_rate": 0.00030788365092579537,
      "loss": 3.0289,
      "step": 113274
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.8196676969528198,
      "learning_rate": 0.0003078795617755593,
      "loss": 2.6343,
      "step": 113275
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.9613322019577026,
      "learning_rate": 0.00030787547262385833,
      "loss": 3.2022,
      "step": 113276
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.4584994316101074,
      "learning_rate": 0.00030787138347069306,
      "loss": 2.7898,
      "step": 113277
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.453278064727783,
      "learning_rate": 0.0003078672943160644,
      "loss": 2.8603,
      "step": 113278
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.3972396850585938,
      "learning_rate": 0.00030786320515997307,
      "loss": 3.0761,
      "step": 113279
    },
    {
      "epoch": 1.47,
      "grad_norm": 2.6456310749053955,
      "learning_rate": 0.00030785911600241986,
      "loss": 2.8046,
      "step": 113280
    },
    {
      "epoch": 1.47,
      "grad_norm": 1.7483563423156738,
      "learning_rate": 0.0003078550268434054,
      "loss": 2.9945,
      "step": 113281
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0197954177856445,
      "learning_rate": 0.0003078509376829306,
      "loss": 2.9672,
      "step": 113282
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8232487440109253,
      "learning_rate": 0.00030784684852099616,
      "loss": 3.1293,
      "step": 113283
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9891483783721924,
      "learning_rate": 0.0003078427593576028,
      "loss": 2.9798,
      "step": 113284
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6781761646270752,
      "learning_rate": 0.0003078386701927514,
      "loss": 2.7368,
      "step": 113285
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6720268726348877,
      "learning_rate": 0.00030783458102644256,
      "loss": 2.9423,
      "step": 113286
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1944820880889893,
      "learning_rate": 0.00030783049185867714,
      "loss": 2.8221,
      "step": 113287
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.9122109413146973,
      "learning_rate": 0.000307826402689456,
      "loss": 2.9358,
      "step": 113288
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6612365245819092,
      "learning_rate": 0.00030782231351877964,
      "loss": 2.9046,
      "step": 113289
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.5383388996124268,
      "learning_rate": 0.000307818224346649,
      "loss": 3.014,
      "step": 113290
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8408246040344238,
      "learning_rate": 0.00030781413517306486,
      "loss": 2.6901,
      "step": 113291
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6640373468399048,
      "learning_rate": 0.0003078100459980279,
      "loss": 2.9576,
      "step": 113292
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.073289632797241,
      "learning_rate": 0.00030780595682153885,
      "loss": 2.8311,
      "step": 113293
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.4908101558685303,
      "learning_rate": 0.00030780186764359864,
      "loss": 3.0973,
      "step": 113294
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.157078504562378,
      "learning_rate": 0.00030779777846420776,
      "loss": 3.0074,
      "step": 113295
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0229532718658447,
      "learning_rate": 0.00030779368928336725,
      "loss": 2.8634,
      "step": 113296
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0829896926879883,
      "learning_rate": 0.00030778960010107776,
      "loss": 3.2714,
      "step": 113297
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6250749826431274,
      "learning_rate": 0.00030778551091733994,
      "loss": 3.2214,
      "step": 113298
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9285200834274292,
      "learning_rate": 0.00030778142173215465,
      "loss": 3.1118,
      "step": 113299
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.021428108215332,
      "learning_rate": 0.00030777733254552277,
      "loss": 3.0147,
      "step": 113300
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6049679517745972,
      "learning_rate": 0.0003077732433574448,
      "loss": 3.1044,
      "step": 113301
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1348423957824707,
      "learning_rate": 0.00030776915416792165,
      "loss": 3.0616,
      "step": 113302
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.915653109550476,
      "learning_rate": 0.00030776506497695415,
      "loss": 3.1885,
      "step": 113303
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7566258907318115,
      "learning_rate": 0.00030776097578454287,
      "loss": 3.1058,
      "step": 113304
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1011650562286377,
      "learning_rate": 0.00030775688659068874,
      "loss": 2.8733,
      "step": 113305
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7394671440124512,
      "learning_rate": 0.0003077527973953925,
      "loss": 2.8804,
      "step": 113306
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0048813819885254,
      "learning_rate": 0.0003077487081986548,
      "loss": 3.0448,
      "step": 113307
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6548385620117188,
      "learning_rate": 0.0003077446190004765,
      "loss": 3.0411,
      "step": 113308
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9528677463531494,
      "learning_rate": 0.0003077405298008584,
      "loss": 2.9587,
      "step": 113309
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7759079933166504,
      "learning_rate": 0.0003077364405998011,
      "loss": 2.9071,
      "step": 113310
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.120676279067993,
      "learning_rate": 0.0003077323513973055,
      "loss": 3.2064,
      "step": 113311
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0594699382781982,
      "learning_rate": 0.00030772826219337227,
      "loss": 3.1401,
      "step": 113312
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9885491132736206,
      "learning_rate": 0.0003077241729880021,
      "loss": 3.1742,
      "step": 113313
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3046607971191406,
      "learning_rate": 0.00030772008378119605,
      "loss": 2.8616,
      "step": 113314
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.812474250793457,
      "learning_rate": 0.0003077159945729546,
      "loss": 2.9271,
      "step": 113315
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.308807134628296,
      "learning_rate": 0.0003077119053632786,
      "loss": 2.7103,
      "step": 113316
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8274593353271484,
      "learning_rate": 0.0003077078161521688,
      "loss": 2.9937,
      "step": 113317
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9401580095291138,
      "learning_rate": 0.0003077037269396261,
      "loss": 3.0211,
      "step": 113318
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.0203824043273926,
      "learning_rate": 0.000307699637725651,
      "loss": 2.8597,
      "step": 113319
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2687268257141113,
      "learning_rate": 0.00030769554851024436,
      "loss": 2.9731,
      "step": 113320
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0536766052246094,
      "learning_rate": 0.0003076914592934071,
      "loss": 3.1605,
      "step": 113321
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0492632389068604,
      "learning_rate": 0.00030768737007513976,
      "loss": 3.008,
      "step": 113322
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8579926490783691,
      "learning_rate": 0.00030768328085544323,
      "loss": 2.9392,
      "step": 113323
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.036522626876831,
      "learning_rate": 0.00030767919163431827,
      "loss": 3.1792,
      "step": 113324
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8097360134124756,
      "learning_rate": 0.0003076751024117656,
      "loss": 3.1199,
      "step": 113325
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1919543743133545,
      "learning_rate": 0.00030767101318778585,
      "loss": 3.1855,
      "step": 113326
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8649468421936035,
      "learning_rate": 0.00030766692396238,
      "loss": 3.1451,
      "step": 113327
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.805600643157959,
      "learning_rate": 0.00030766283473554875,
      "loss": 2.9162,
      "step": 113328
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9414983987808228,
      "learning_rate": 0.0003076587455072928,
      "loss": 2.8722,
      "step": 113329
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.11814022064209,
      "learning_rate": 0.000307654656277613,
      "loss": 3.0126,
      "step": 113330
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0578105449676514,
      "learning_rate": 0.00030765056704651003,
      "loss": 3.0292,
      "step": 113331
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.5966002941131592,
      "learning_rate": 0.00030764647781398463,
      "loss": 2.7195,
      "step": 113332
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.1623685359954834,
      "learning_rate": 0.0003076423885800376,
      "loss": 2.9065,
      "step": 113333
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.375502824783325,
      "learning_rate": 0.00030763829934466976,
      "loss": 2.8664,
      "step": 113334
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.844083309173584,
      "learning_rate": 0.00030763421010788177,
      "loss": 2.8468,
      "step": 113335
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8208352327346802,
      "learning_rate": 0.0003076301208696745,
      "loss": 2.9437,
      "step": 113336
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.70750093460083,
      "learning_rate": 0.0003076260316300486,
      "loss": 2.9092,
      "step": 113337
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.6595585346221924,
      "learning_rate": 0.0003076219423890049,
      "loss": 3.147,
      "step": 113338
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.261195659637451,
      "learning_rate": 0.00030761785314654407,
      "loss": 3.0543,
      "step": 113339
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1400012969970703,
      "learning_rate": 0.000307613763902667,
      "loss": 2.88,
      "step": 113340
    },
    {
      "epoch": 1.48,
      "grad_norm": 4.029241561889648,
      "learning_rate": 0.00030760967465737443,
      "loss": 2.6191,
      "step": 113341
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.514708995819092,
      "learning_rate": 0.00030760558541066696,
      "loss": 3.1647,
      "step": 113342
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7162911891937256,
      "learning_rate": 0.0003076014961625455,
      "loss": 2.8013,
      "step": 113343
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7449346780776978,
      "learning_rate": 0.0003075974069130108,
      "loss": 2.853,
      "step": 113344
    },
    {
      "epoch": 1.48,
      "grad_norm": 4.517045974731445,
      "learning_rate": 0.0003075933176620637,
      "loss": 2.8563,
      "step": 113345
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.9341824054718018,
      "learning_rate": 0.00030758922840970476,
      "loss": 2.9272,
      "step": 113346
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.07832407951355,
      "learning_rate": 0.0003075851391559348,
      "loss": 2.9064,
      "step": 113347
    },
    {
      "epoch": 1.48,
      "grad_norm": 5.063198566436768,
      "learning_rate": 0.0003075810499007547,
      "loss": 2.9266,
      "step": 113348
    },
    {
      "epoch": 1.48,
      "grad_norm": 4.1238908767700195,
      "learning_rate": 0.00030757696064416507,
      "loss": 2.7322,
      "step": 113349
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.7071900367736816,
      "learning_rate": 0.00030757287138616677,
      "loss": 2.9801,
      "step": 113350
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.066945791244507,
      "learning_rate": 0.0003075687821267605,
      "loss": 3.0328,
      "step": 113351
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.583503246307373,
      "learning_rate": 0.0003075646928659472,
      "loss": 3.1136,
      "step": 113352
    },
    {
      "epoch": 1.48,
      "grad_norm": 4.089305400848389,
      "learning_rate": 0.00030756060360372735,
      "loss": 3.2146,
      "step": 113353
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.6890578269958496,
      "learning_rate": 0.0003075565143401018,
      "loss": 3.0371,
      "step": 113354
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.5765984058380127,
      "learning_rate": 0.00030755242507507144,
      "loss": 2.821,
      "step": 113355
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0786359310150146,
      "learning_rate": 0.0003075483358086369,
      "loss": 3.0367,
      "step": 113356
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.0835466384887695,
      "learning_rate": 0.00030754424654079893,
      "loss": 2.8919,
      "step": 113357
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.2453484535217285,
      "learning_rate": 0.00030754015727155854,
      "loss": 2.9856,
      "step": 113358
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.3246912956237793,
      "learning_rate": 0.00030753606800091606,
      "loss": 2.8825,
      "step": 113359
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7583677768707275,
      "learning_rate": 0.0003075319787288726,
      "loss": 3.0673,
      "step": 113360
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8798779249191284,
      "learning_rate": 0.00030752788945542884,
      "loss": 3.0344,
      "step": 113361
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3396084308624268,
      "learning_rate": 0.0003075238001805854,
      "loss": 3.147,
      "step": 113362
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.3285834789276123,
      "learning_rate": 0.00030751971090434317,
      "loss": 3.1942,
      "step": 113363
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.010516405105591,
      "learning_rate": 0.000307515621626703,
      "loss": 2.8174,
      "step": 113364
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1603522300720215,
      "learning_rate": 0.00030751153234766536,
      "loss": 3.0043,
      "step": 113365
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.841186046600342,
      "learning_rate": 0.0003075074430672313,
      "loss": 2.9838,
      "step": 113366
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2110602855682373,
      "learning_rate": 0.00030750335378540146,
      "loss": 3.0834,
      "step": 113367
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.91413950920105,
      "learning_rate": 0.00030749926450217654,
      "loss": 2.9214,
      "step": 113368
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9944075345993042,
      "learning_rate": 0.0003074951752175574,
      "loss": 2.6859,
      "step": 113369
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.154866933822632,
      "learning_rate": 0.00030749108593154477,
      "loss": 2.9724,
      "step": 113370
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.9338302612304688,
      "learning_rate": 0.0003074869966441394,
      "loss": 2.8981,
      "step": 113371
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.612495183944702,
      "learning_rate": 0.00030748290735534205,
      "loss": 3.076,
      "step": 113372
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.972618818283081,
      "learning_rate": 0.0003074788180651535,
      "loss": 2.9322,
      "step": 113373
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3017914295196533,
      "learning_rate": 0.00030747472877357444,
      "loss": 3.1776,
      "step": 113374
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2936530113220215,
      "learning_rate": 0.00030747063948060574,
      "loss": 3.0278,
      "step": 113375
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.129460573196411,
      "learning_rate": 0.0003074665501862482,
      "loss": 2.7729,
      "step": 113376
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6675337553024292,
      "learning_rate": 0.00030746246089050235,
      "loss": 3.1946,
      "step": 113377
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.325270652770996,
      "learning_rate": 0.00030745837159336907,
      "loss": 3.0511,
      "step": 113378
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.796229600906372,
      "learning_rate": 0.00030745428229484925,
      "loss": 2.9164,
      "step": 113379
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8353687524795532,
      "learning_rate": 0.00030745019299494345,
      "loss": 3.1511,
      "step": 113380
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8205991983413696,
      "learning_rate": 0.0003074461036936526,
      "loss": 3.101,
      "step": 113381
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8937065601348877,
      "learning_rate": 0.0003074420143909773,
      "loss": 2.9435,
      "step": 113382
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.197728157043457,
      "learning_rate": 0.0003074379250869184,
      "loss": 2.7859,
      "step": 113383
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0800838470458984,
      "learning_rate": 0.00030743383578147666,
      "loss": 2.9953,
      "step": 113384
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7869343757629395,
      "learning_rate": 0.0003074297464746529,
      "loss": 2.8725,
      "step": 113385
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4710195064544678,
      "learning_rate": 0.00030742565716644773,
      "loss": 2.9151,
      "step": 113386
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6724841594696045,
      "learning_rate": 0.00030742156785686195,
      "loss": 2.9428,
      "step": 113387
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.345991373062134,
      "learning_rate": 0.00030741747854589653,
      "loss": 2.8795,
      "step": 113388
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.019996166229248,
      "learning_rate": 0.0003074133892335519,
      "loss": 2.9749,
      "step": 113389
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1401095390319824,
      "learning_rate": 0.000307409299919829,
      "loss": 2.9706,
      "step": 113390
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3185667991638184,
      "learning_rate": 0.00030740521060472866,
      "loss": 3.1362,
      "step": 113391
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7003339529037476,
      "learning_rate": 0.0003074011212882515,
      "loss": 2.8616,
      "step": 113392
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.245072364807129,
      "learning_rate": 0.0003073970319703983,
      "loss": 3.1616,
      "step": 113393
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.471660614013672,
      "learning_rate": 0.0003073929426511699,
      "loss": 2.8928,
      "step": 113394
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.215838670730591,
      "learning_rate": 0.000307388853330567,
      "loss": 3.0384,
      "step": 113395
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6749190092086792,
      "learning_rate": 0.0003073847640085903,
      "loss": 2.9853,
      "step": 113396
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7452765703201294,
      "learning_rate": 0.0003073806746852408,
      "loss": 2.9442,
      "step": 113397
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.970576524734497,
      "learning_rate": 0.00030737658536051895,
      "loss": 2.885,
      "step": 113398
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7518589496612549,
      "learning_rate": 0.00030737249603442565,
      "loss": 2.9907,
      "step": 113399
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2431769371032715,
      "learning_rate": 0.00030736840670696165,
      "loss": 2.9138,
      "step": 113400
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8128650188446045,
      "learning_rate": 0.0003073643173781278,
      "loss": 2.8917,
      "step": 113401
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6732139587402344,
      "learning_rate": 0.0003073602280479248,
      "loss": 2.9749,
      "step": 113402
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9791938066482544,
      "learning_rate": 0.00030735613871635335,
      "loss": 2.8646,
      "step": 113403
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9426822662353516,
      "learning_rate": 0.00030735204938341434,
      "loss": 2.9219,
      "step": 113404
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.856705665588379,
      "learning_rate": 0.00030734796004910827,
      "loss": 2.7608,
      "step": 113405
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.014880895614624,
      "learning_rate": 0.0003073438707134362,
      "loss": 2.9925,
      "step": 113406
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.716262698173523,
      "learning_rate": 0.0003073397813763988,
      "loss": 3.1272,
      "step": 113407
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.023921489715576,
      "learning_rate": 0.00030733569203799673,
      "loss": 3.1189,
      "step": 113408
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3633036613464355,
      "learning_rate": 0.0003073316026982308,
      "loss": 3.1462,
      "step": 113409
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.4328835010528564,
      "learning_rate": 0.0003073275133571018,
      "loss": 3.1127,
      "step": 113410
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9566789865493774,
      "learning_rate": 0.00030732342401461045,
      "loss": 2.8813,
      "step": 113411
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9162966012954712,
      "learning_rate": 0.0003073193346707576,
      "loss": 3.0261,
      "step": 113412
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3041775226593018,
      "learning_rate": 0.00030731524532554396,
      "loss": 3.1739,
      "step": 113413
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.009586811065674,
      "learning_rate": 0.00030731115597897024,
      "loss": 2.8493,
      "step": 113414
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2713592052459717,
      "learning_rate": 0.0003073070666310372,
      "loss": 2.7806,
      "step": 113415
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.944042682647705,
      "learning_rate": 0.00030730297728174573,
      "loss": 2.988,
      "step": 113416
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4566564559936523,
      "learning_rate": 0.0003072988879310964,
      "loss": 2.9089,
      "step": 113417
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.0228140354156494,
      "learning_rate": 0.0003072947985790901,
      "loss": 2.9164,
      "step": 113418
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1817638874053955,
      "learning_rate": 0.0003072907092257277,
      "loss": 3.0433,
      "step": 113419
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8516952991485596,
      "learning_rate": 0.0003072866198710096,
      "loss": 3.1083,
      "step": 113420
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.946077346801758,
      "learning_rate": 0.0003072825305149368,
      "loss": 2.8829,
      "step": 113421
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.12501859664917,
      "learning_rate": 0.00030727844115751023,
      "loss": 2.9837,
      "step": 113422
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1972811222076416,
      "learning_rate": 0.0003072743517987303,
      "loss": 3.096,
      "step": 113423
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.839660406112671,
      "learning_rate": 0.000307270262438598,
      "loss": 2.7529,
      "step": 113424
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6557435989379883,
      "learning_rate": 0.00030726617307711394,
      "loss": 2.9452,
      "step": 113425
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.130979061126709,
      "learning_rate": 0.00030726208371427904,
      "loss": 3.124,
      "step": 113426
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.89174485206604,
      "learning_rate": 0.0003072579943500939,
      "loss": 2.7961,
      "step": 113427
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.578709363937378,
      "learning_rate": 0.00030725390498455947,
      "loss": 3.0545,
      "step": 113428
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.291964530944824,
      "learning_rate": 0.00030724981561767637,
      "loss": 3.0438,
      "step": 113429
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6223182678222656,
      "learning_rate": 0.0003072457262494454,
      "loss": 3.0994,
      "step": 113430
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9233146905899048,
      "learning_rate": 0.00030724163687986724,
      "loss": 2.9978,
      "step": 113431
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6709513664245605,
      "learning_rate": 0.0003072375475089427,
      "loss": 3.1064,
      "step": 113432
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9058631658554077,
      "learning_rate": 0.0003072334581366727,
      "loss": 3.0896,
      "step": 113433
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9677826166152954,
      "learning_rate": 0.0003072293687630578,
      "loss": 2.7227,
      "step": 113434
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2517151832580566,
      "learning_rate": 0.0003072252793880988,
      "loss": 3.1525,
      "step": 113435
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8742903470993042,
      "learning_rate": 0.0003072211900117965,
      "loss": 2.8126,
      "step": 113436
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.621380090713501,
      "learning_rate": 0.00030721710063415166,
      "loss": 2.8084,
      "step": 113437
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8582119941711426,
      "learning_rate": 0.000307213011255165,
      "loss": 3.0904,
      "step": 113438
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9477853775024414,
      "learning_rate": 0.0003072089218748373,
      "loss": 3.0791,
      "step": 113439
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9057122468948364,
      "learning_rate": 0.0003072048324931694,
      "loss": 2.9498,
      "step": 113440
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8292820453643799,
      "learning_rate": 0.00030720074311016193,
      "loss": 2.869,
      "step": 113441
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8887799978256226,
      "learning_rate": 0.00030719665372581565,
      "loss": 2.9081,
      "step": 113442
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.212815761566162,
      "learning_rate": 0.0003071925643401315,
      "loss": 2.7898,
      "step": 113443
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8475899696350098,
      "learning_rate": 0.00030718847495311,
      "loss": 2.9942,
      "step": 113444
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.704124927520752,
      "learning_rate": 0.00030718438556475206,
      "loss": 2.757,
      "step": 113445
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1441190242767334,
      "learning_rate": 0.00030718029617505843,
      "loss": 3.0655,
      "step": 113446
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8103431463241577,
      "learning_rate": 0.00030717620678402983,
      "loss": 2.9144,
      "step": 113447
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4114248752593994,
      "learning_rate": 0.00030717211739166706,
      "loss": 3.0937,
      "step": 113448
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2632503509521484,
      "learning_rate": 0.00030716802799797094,
      "loss": 3.1515,
      "step": 113449
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1389100551605225,
      "learning_rate": 0.00030716393860294195,
      "loss": 3.079,
      "step": 113450
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1351981163024902,
      "learning_rate": 0.0003071598492065812,
      "loss": 3.0103,
      "step": 113451
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7163565158843994,
      "learning_rate": 0.0003071557598088893,
      "loss": 2.9241,
      "step": 113452
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.098926305770874,
      "learning_rate": 0.00030715167040986686,
      "loss": 2.9337,
      "step": 113453
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0704238414764404,
      "learning_rate": 0.00030714758100951493,
      "loss": 2.9361,
      "step": 113454
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9187391996383667,
      "learning_rate": 0.0003071434916078341,
      "loss": 2.903,
      "step": 113455
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8993645906448364,
      "learning_rate": 0.00030713940220482516,
      "loss": 2.8127,
      "step": 113456
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.627074718475342,
      "learning_rate": 0.0003071353128004888,
      "loss": 2.9321,
      "step": 113457
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9841078519821167,
      "learning_rate": 0.00030713122339482595,
      "loss": 3.2321,
      "step": 113458
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7166599035263062,
      "learning_rate": 0.00030712713398783724,
      "loss": 2.7836,
      "step": 113459
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9183212518692017,
      "learning_rate": 0.0003071230445795234,
      "loss": 2.9781,
      "step": 113460
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.5336034297943115,
      "learning_rate": 0.00030711895516988535,
      "loss": 3.0047,
      "step": 113461
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.9330382347106934,
      "learning_rate": 0.0003071148657589237,
      "loss": 3.1371,
      "step": 113462
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.6034839153289795,
      "learning_rate": 0.0003071107763466393,
      "loss": 2.81,
      "step": 113463
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9859646558761597,
      "learning_rate": 0.0003071066869330329,
      "loss": 3.0359,
      "step": 113464
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4449870586395264,
      "learning_rate": 0.0003071025975181051,
      "loss": 2.9229,
      "step": 113465
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.6386570930480957,
      "learning_rate": 0.0003070985081018568,
      "loss": 3.0544,
      "step": 113466
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8574497699737549,
      "learning_rate": 0.00030709441868428884,
      "loss": 3.0019,
      "step": 113467
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.024841785430908,
      "learning_rate": 0.00030709032926540193,
      "loss": 2.9456,
      "step": 113468
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1918365955352783,
      "learning_rate": 0.00030708623984519674,
      "loss": 2.9843,
      "step": 113469
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.5736474990844727,
      "learning_rate": 0.00030708215042367403,
      "loss": 2.8519,
      "step": 113470
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7513467073440552,
      "learning_rate": 0.0003070780610008347,
      "loss": 3.147,
      "step": 113471
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.703772783279419,
      "learning_rate": 0.0003070739715766794,
      "loss": 3.133,
      "step": 113472
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3443639278411865,
      "learning_rate": 0.0003070698821512089,
      "loss": 3.1745,
      "step": 113473
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.557318925857544,
      "learning_rate": 0.000307065792724424,
      "loss": 2.9742,
      "step": 113474
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9404641389846802,
      "learning_rate": 0.0003070617032963254,
      "loss": 2.8523,
      "step": 113475
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.7342545986175537,
      "learning_rate": 0.00030705761386691386,
      "loss": 2.8674,
      "step": 113476
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.6307008266448975,
      "learning_rate": 0.0003070535244361903,
      "loss": 2.9417,
      "step": 113477
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.0998313426971436,
      "learning_rate": 0.0003070494350041553,
      "loss": 3.044,
      "step": 113478
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2320573329925537,
      "learning_rate": 0.0003070453455708096,
      "loss": 3.2086,
      "step": 113479
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.6230783462524414,
      "learning_rate": 0.00030704125613615415,
      "loss": 3.0484,
      "step": 113480
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.3943400382995605,
      "learning_rate": 0.0003070371667001896,
      "loss": 3.1884,
      "step": 113481
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.7755181789398193,
      "learning_rate": 0.0003070330772629166,
      "loss": 3.0999,
      "step": 113482
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2908616065979004,
      "learning_rate": 0.0003070289878243361,
      "loss": 2.7863,
      "step": 113483
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.423677682876587,
      "learning_rate": 0.00030702489838444874,
      "loss": 2.9492,
      "step": 113484
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.529874563217163,
      "learning_rate": 0.0003070208089432553,
      "loss": 2.9327,
      "step": 113485
    },
    {
      "epoch": 1.48,
      "grad_norm": 4.408243179321289,
      "learning_rate": 0.0003070167195007566,
      "loss": 2.9843,
      "step": 113486
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.640312671661377,
      "learning_rate": 0.00030701263005695334,
      "loss": 3.0536,
      "step": 113487
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9366093873977661,
      "learning_rate": 0.00030700854061184636,
      "loss": 3.0055,
      "step": 113488
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.636134624481201,
      "learning_rate": 0.0003070044511654363,
      "loss": 3.1672,
      "step": 113489
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.135085344314575,
      "learning_rate": 0.00030700036171772395,
      "loss": 2.9876,
      "step": 113490
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.355043411254883,
      "learning_rate": 0.00030699627226871015,
      "loss": 3.0188,
      "step": 113491
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8862130641937256,
      "learning_rate": 0.00030699218281839564,
      "loss": 2.9384,
      "step": 113492
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.289252996444702,
      "learning_rate": 0.0003069880933667811,
      "loss": 3.0693,
      "step": 113493
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3383383750915527,
      "learning_rate": 0.00030698400391386733,
      "loss": 2.9872,
      "step": 113494
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.1292366981506348,
      "learning_rate": 0.0003069799144596552,
      "loss": 3.1227,
      "step": 113495
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.101600170135498,
      "learning_rate": 0.00030697582500414526,
      "loss": 3.0471,
      "step": 113496
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7927963733673096,
      "learning_rate": 0.0003069717355473384,
      "loss": 3.2105,
      "step": 113497
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.9393999576568604,
      "learning_rate": 0.00030696764608923545,
      "loss": 2.9329,
      "step": 113498
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.2400739192962646,
      "learning_rate": 0.000306963556629837,
      "loss": 3.0799,
      "step": 113499
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4608840942382812,
      "learning_rate": 0.0003069594671691439,
      "loss": 3.3846,
      "step": 113500
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8402470350265503,
      "learning_rate": 0.0003069553777071569,
      "loss": 2.741,
      "step": 113501
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.900606632232666,
      "learning_rate": 0.00030695128824387676,
      "loss": 2.8931,
      "step": 113502
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4404196739196777,
      "learning_rate": 0.0003069471987793043,
      "loss": 3.1588,
      "step": 113503
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0138401985168457,
      "learning_rate": 0.0003069431093134402,
      "loss": 2.7899,
      "step": 113504
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.5664106607437134,
      "learning_rate": 0.0003069390198462852,
      "loss": 3.2369,
      "step": 113505
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.979738712310791,
      "learning_rate": 0.0003069349303778401,
      "loss": 2.9861,
      "step": 113506
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.9447014331817627,
      "learning_rate": 0.00030693084090810575,
      "loss": 3.1147,
      "step": 113507
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9278143644332886,
      "learning_rate": 0.0003069267514370828,
      "loss": 3.2056,
      "step": 113508
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7711644172668457,
      "learning_rate": 0.00030692266196477195,
      "loss": 3.0598,
      "step": 113509
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9528108835220337,
      "learning_rate": 0.0003069185724911742,
      "loss": 3.0158,
      "step": 113510
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.5198373794555664,
      "learning_rate": 0.00030691448301629,
      "loss": 3.1278,
      "step": 113511
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8792970180511475,
      "learning_rate": 0.0003069103935401204,
      "loss": 2.9064,
      "step": 113512
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1173224449157715,
      "learning_rate": 0.0003069063040626659,
      "loss": 2.9663,
      "step": 113513
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.726625442504883,
      "learning_rate": 0.00030690221458392746,
      "loss": 3.2252,
      "step": 113514
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.7478864192962646,
      "learning_rate": 0.00030689812510390575,
      "loss": 3.0585,
      "step": 113515
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.83222496509552,
      "learning_rate": 0.0003068940356226016,
      "loss": 2.8492,
      "step": 113516
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7824674844741821,
      "learning_rate": 0.0003068899461400156,
      "loss": 2.9606,
      "step": 113517
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0872180461883545,
      "learning_rate": 0.00030688585665614875,
      "loss": 3.0936,
      "step": 113518
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2769200801849365,
      "learning_rate": 0.0003068817671710016,
      "loss": 2.7756,
      "step": 113519
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.645572543144226,
      "learning_rate": 0.00030687767768457507,
      "loss": 2.8482,
      "step": 113520
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1028308868408203,
      "learning_rate": 0.00030687358819686983,
      "loss": 2.8382,
      "step": 113521
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.6589889526367188,
      "learning_rate": 0.00030686949870788665,
      "loss": 2.9835,
      "step": 113522
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8077330589294434,
      "learning_rate": 0.0003068654092176263,
      "loss": 3.0495,
      "step": 113523
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0263278484344482,
      "learning_rate": 0.0003068613197260895,
      "loss": 3.0209,
      "step": 113524
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.152865171432495,
      "learning_rate": 0.00030685723023327717,
      "loss": 3.26,
      "step": 113525
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3150691986083984,
      "learning_rate": 0.00030685314073918983,
      "loss": 3.0278,
      "step": 113526
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8000068664550781,
      "learning_rate": 0.0003068490512438284,
      "loss": 2.9666,
      "step": 113527
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0782470703125,
      "learning_rate": 0.0003068449617471937,
      "loss": 2.8705,
      "step": 113528
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7618526220321655,
      "learning_rate": 0.0003068408722492862,
      "loss": 3.1324,
      "step": 113529
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.6885876655578613,
      "learning_rate": 0.000306836782750107,
      "loss": 3.0123,
      "step": 113530
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7635053396224976,
      "learning_rate": 0.0003068326932496566,
      "loss": 2.8779,
      "step": 113531
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.045393943786621,
      "learning_rate": 0.00030682860374793596,
      "loss": 3.018,
      "step": 113532
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.7752671241760254,
      "learning_rate": 0.00030682451424494575,
      "loss": 2.9685,
      "step": 113533
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.7654764652252197,
      "learning_rate": 0.0003068204247406867,
      "loss": 2.7017,
      "step": 113534
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9736491441726685,
      "learning_rate": 0.0003068163352351597,
      "loss": 2.9548,
      "step": 113535
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.460237741470337,
      "learning_rate": 0.0003068122457283653,
      "loss": 3.0481,
      "step": 113536
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4655277729034424,
      "learning_rate": 0.0003068081562203044,
      "loss": 3.0363,
      "step": 113537
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4792752265930176,
      "learning_rate": 0.0003068040667109778,
      "loss": 2.9879,
      "step": 113538
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7092649936676025,
      "learning_rate": 0.0003067999772003861,
      "loss": 2.8327,
      "step": 113539
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.596257209777832,
      "learning_rate": 0.0003067958876885302,
      "loss": 3.3015,
      "step": 113540
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0181314945220947,
      "learning_rate": 0.00030679179817541085,
      "loss": 3.0444,
      "step": 113541
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7210394144058228,
      "learning_rate": 0.0003067877086610287,
      "loss": 3.1077,
      "step": 113542
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9727345705032349,
      "learning_rate": 0.0003067836191453846,
      "loss": 2.9483,
      "step": 113543
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1236629486083984,
      "learning_rate": 0.00030677952962847944,
      "loss": 2.9093,
      "step": 113544
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6739444732666016,
      "learning_rate": 0.00030677544011031366,
      "loss": 3.1476,
      "step": 113545
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.046625852584839,
      "learning_rate": 0.0003067713505908882,
      "loss": 2.9057,
      "step": 113546
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.339792013168335,
      "learning_rate": 0.000306767261070204,
      "loss": 3.1436,
      "step": 113547
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.001561164855957,
      "learning_rate": 0.0003067631715482614,
      "loss": 2.9077,
      "step": 113548
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.178708791732788,
      "learning_rate": 0.0003067590820250616,
      "loss": 3.2777,
      "step": 113549
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.160614013671875,
      "learning_rate": 0.0003067549925006051,
      "loss": 2.963,
      "step": 113550
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9088315963745117,
      "learning_rate": 0.00030675090297489267,
      "loss": 2.887,
      "step": 113551
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2303545475006104,
      "learning_rate": 0.0003067468134479251,
      "loss": 2.9778,
      "step": 113552
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7477307319641113,
      "learning_rate": 0.0003067427239197033,
      "loss": 2.8421,
      "step": 113553
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9193943738937378,
      "learning_rate": 0.00030673863439022777,
      "loss": 3.0072,
      "step": 113554
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9544777870178223,
      "learning_rate": 0.00030673454485949945,
      "loss": 2.9359,
      "step": 113555
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7315139770507812,
      "learning_rate": 0.00030673045532751903,
      "loss": 3.1337,
      "step": 113556
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2274768352508545,
      "learning_rate": 0.00030672636579428733,
      "loss": 2.7672,
      "step": 113557
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9521384239196777,
      "learning_rate": 0.000306722276259805,
      "loss": 2.9032,
      "step": 113558
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.6285345554351807,
      "learning_rate": 0.00030671818672407304,
      "loss": 3.0976,
      "step": 113559
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.807931900024414,
      "learning_rate": 0.0003067140971870919,
      "loss": 3.0422,
      "step": 113560
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.469754934310913,
      "learning_rate": 0.00030671000764886247,
      "loss": 3.2256,
      "step": 113561
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1590423583984375,
      "learning_rate": 0.0003067059181093856,
      "loss": 3.0921,
      "step": 113562
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.940771460533142,
      "learning_rate": 0.0003067018285686619,
      "loss": 3.0126,
      "step": 113563
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.233820915222168,
      "learning_rate": 0.0003066977390266922,
      "loss": 2.7552,
      "step": 113564
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.228994607925415,
      "learning_rate": 0.0003066936494834774,
      "loss": 2.6311,
      "step": 113565
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.623390793800354,
      "learning_rate": 0.00030668955993901797,
      "loss": 3.0082,
      "step": 113566
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.02327036857605,
      "learning_rate": 0.00030668547039331486,
      "loss": 2.9492,
      "step": 113567
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7729058265686035,
      "learning_rate": 0.0003066813808463689,
      "loss": 2.8451,
      "step": 113568
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7488306760787964,
      "learning_rate": 0.0003066772912981806,
      "loss": 2.8633,
      "step": 113569
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.9892070293426514,
      "learning_rate": 0.0003066732017487509,
      "loss": 3.1558,
      "step": 113570
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2508158683776855,
      "learning_rate": 0.0003066691121980806,
      "loss": 3.2236,
      "step": 113571
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9242702722549438,
      "learning_rate": 0.00030666502264617033,
      "loss": 2.9804,
      "step": 113572
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0215353965759277,
      "learning_rate": 0.00030666093309302087,
      "loss": 2.9582,
      "step": 113573
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.882774829864502,
      "learning_rate": 0.0003066568435386331,
      "loss": 3.1077,
      "step": 113574
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8427965641021729,
      "learning_rate": 0.0003066527539830077,
      "loss": 3.3592,
      "step": 113575
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.783503532409668,
      "learning_rate": 0.0003066486644261453,
      "loss": 3.1608,
      "step": 113576
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7840814590454102,
      "learning_rate": 0.00030664457486804695,
      "loss": 3.1573,
      "step": 113577
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8130757808685303,
      "learning_rate": 0.00030664048530871314,
      "loss": 2.8971,
      "step": 113578
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.901092767715454,
      "learning_rate": 0.00030663639574814475,
      "loss": 3.1489,
      "step": 113579
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7171798944473267,
      "learning_rate": 0.0003066323061863426,
      "loss": 3.0403,
      "step": 113580
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.766528606414795,
      "learning_rate": 0.0003066282166233073,
      "loss": 2.9038,
      "step": 113581
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9584033489227295,
      "learning_rate": 0.0003066241270590397,
      "loss": 2.7443,
      "step": 113582
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4715068340301514,
      "learning_rate": 0.00030662003749354057,
      "loss": 2.582,
      "step": 113583
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0591533184051514,
      "learning_rate": 0.00030661594792681066,
      "loss": 3.0434,
      "step": 113584
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.356865406036377,
      "learning_rate": 0.0003066118583588507,
      "loss": 2.6958,
      "step": 113585
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.1973342895507812,
      "learning_rate": 0.0003066077687896615,
      "loss": 2.9399,
      "step": 113586
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7313247919082642,
      "learning_rate": 0.0003066036792192438,
      "loss": 2.7926,
      "step": 113587
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8940932750701904,
      "learning_rate": 0.00030659958964759823,
      "loss": 3.0198,
      "step": 113588
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.3262267112731934,
      "learning_rate": 0.0003065955000747258,
      "loss": 2.8641,
      "step": 113589
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.134732246398926,
      "learning_rate": 0.00030659141050062714,
      "loss": 3.083,
      "step": 113590
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.464236259460449,
      "learning_rate": 0.00030658732092530297,
      "loss": 3.1253,
      "step": 113591
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.101945400238037,
      "learning_rate": 0.0003065832313487541,
      "loss": 3.0245,
      "step": 113592
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.557910680770874,
      "learning_rate": 0.00030657914177098126,
      "loss": 3.0184,
      "step": 113593
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.78420889377594,
      "learning_rate": 0.0003065750521919853,
      "loss": 3.0883,
      "step": 113594
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.651678204536438,
      "learning_rate": 0.00030657096261176687,
      "loss": 3.1911,
      "step": 113595
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.201571226119995,
      "learning_rate": 0.0003065668730303267,
      "loss": 2.797,
      "step": 113596
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.265822410583496,
      "learning_rate": 0.00030656278344766576,
      "loss": 3.1037,
      "step": 113597
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6048741340637207,
      "learning_rate": 0.00030655869386378453,
      "loss": 2.9521,
      "step": 113598
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9469070434570312,
      "learning_rate": 0.0003065546042786841,
      "loss": 2.9909,
      "step": 113599
    },
    {
      "epoch": 1.48,
      "grad_norm": 4.8392839431762695,
      "learning_rate": 0.0003065505146923649,
      "loss": 2.8488,
      "step": 113600
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8859529495239258,
      "learning_rate": 0.00030654642510482785,
      "loss": 3.0414,
      "step": 113601
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.6548988819122314,
      "learning_rate": 0.0003065423355160738,
      "loss": 3.1063,
      "step": 113602
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2595739364624023,
      "learning_rate": 0.00030653824592610324,
      "loss": 2.8637,
      "step": 113603
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.919752597808838,
      "learning_rate": 0.00030653415633491725,
      "loss": 2.9878,
      "step": 113604
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.031372547149658,
      "learning_rate": 0.0003065300667425163,
      "loss": 3.192,
      "step": 113605
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.928368330001831,
      "learning_rate": 0.00030652597714890144,
      "loss": 3.1527,
      "step": 113606
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3979368209838867,
      "learning_rate": 0.0003065218875540732,
      "loss": 2.9585,
      "step": 113607
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.5335590839385986,
      "learning_rate": 0.00030651779795803235,
      "loss": 2.9366,
      "step": 113608
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.7971320152282715,
      "learning_rate": 0.0003065137083607798,
      "loss": 3.0211,
      "step": 113609
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6918158531188965,
      "learning_rate": 0.00030650961876231624,
      "loss": 3.0336,
      "step": 113610
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4345717430114746,
      "learning_rate": 0.0003065055291626423,
      "loss": 2.9376,
      "step": 113611
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.1059505939483643,
      "learning_rate": 0.000306501439561759,
      "loss": 3.0135,
      "step": 113612
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9411835670471191,
      "learning_rate": 0.00030649734995966696,
      "loss": 3.1348,
      "step": 113613
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8052983283996582,
      "learning_rate": 0.00030649326035636686,
      "loss": 2.8556,
      "step": 113614
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.82891047000885,
      "learning_rate": 0.00030648917075185955,
      "loss": 3.0646,
      "step": 113615
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.62599778175354,
      "learning_rate": 0.00030648508114614574,
      "loss": 3.0723,
      "step": 113616
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7604470252990723,
      "learning_rate": 0.0003064809915392264,
      "loss": 2.9486,
      "step": 113617
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9662765264511108,
      "learning_rate": 0.000306476901931102,
      "loss": 3.021,
      "step": 113618
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7782329320907593,
      "learning_rate": 0.0003064728123217734,
      "loss": 2.9007,
      "step": 113619
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0295944213867188,
      "learning_rate": 0.00030646872271124145,
      "loss": 2.7529,
      "step": 113620
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.0840842723846436,
      "learning_rate": 0.0003064646330995068,
      "loss": 2.9057,
      "step": 113621
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.197399854660034,
      "learning_rate": 0.0003064605434865702,
      "loss": 3.0052,
      "step": 113622
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0513057708740234,
      "learning_rate": 0.0003064564538724326,
      "loss": 3.0549,
      "step": 113623
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.188236951828003,
      "learning_rate": 0.00030645236425709457,
      "loss": 3.1342,
      "step": 113624
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.5862278938293457,
      "learning_rate": 0.0003064482746405569,
      "loss": 3.1919,
      "step": 113625
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.5559853315353394,
      "learning_rate": 0.00030644418502282037,
      "loss": 3.0482,
      "step": 113626
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.342818021774292,
      "learning_rate": 0.00030644009540388575,
      "loss": 3.1588,
      "step": 113627
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8179075717926025,
      "learning_rate": 0.0003064360057837538,
      "loss": 2.8991,
      "step": 113628
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7779204845428467,
      "learning_rate": 0.00030643191616242524,
      "loss": 3.1575,
      "step": 113629
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9808319807052612,
      "learning_rate": 0.0003064278265399009,
      "loss": 3.1131,
      "step": 113630
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7752832174301147,
      "learning_rate": 0.0003064237369161815,
      "loss": 3.0409,
      "step": 113631
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.6430020332336426,
      "learning_rate": 0.00030641964729126784,
      "loss": 2.979,
      "step": 113632
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9632362127304077,
      "learning_rate": 0.0003064155576651606,
      "loss": 3.0089,
      "step": 113633
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.919965982437134,
      "learning_rate": 0.0003064114680378606,
      "loss": 2.8395,
      "step": 113634
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3249568939208984,
      "learning_rate": 0.00030640737840936864,
      "loss": 2.9691,
      "step": 113635
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.013005256652832,
      "learning_rate": 0.00030640328877968535,
      "loss": 2.9856,
      "step": 113636
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.931527853012085,
      "learning_rate": 0.0003063991991488116,
      "loss": 3.1347,
      "step": 113637
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9883289337158203,
      "learning_rate": 0.0003063951095167481,
      "loss": 3.0102,
      "step": 113638
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.3117172718048096,
      "learning_rate": 0.00030639101988349566,
      "loss": 2.9941,
      "step": 113639
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7643404006958008,
      "learning_rate": 0.00030638693024905494,
      "loss": 3.083,
      "step": 113640
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0347213745117188,
      "learning_rate": 0.00030638284061342686,
      "loss": 2.8325,
      "step": 113641
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.0665106773376465,
      "learning_rate": 0.0003063787509766121,
      "loss": 3.1379,
      "step": 113642
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.0291829109191895,
      "learning_rate": 0.0003063746613386113,
      "loss": 2.9558,
      "step": 113643
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.979928970336914,
      "learning_rate": 0.0003063705716994254,
      "loss": 3.1289,
      "step": 113644
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.8003897666931152,
      "learning_rate": 0.00030636648205905514,
      "loss": 3.0182,
      "step": 113645
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.466691255569458,
      "learning_rate": 0.00030636239241750113,
      "loss": 3.1149,
      "step": 113646
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.510763168334961,
      "learning_rate": 0.00030635830277476425,
      "loss": 2.9562,
      "step": 113647
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.8776676654815674,
      "learning_rate": 0.0003063542131308453,
      "loss": 2.9551,
      "step": 113648
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.998641848564148,
      "learning_rate": 0.0003063501234857449,
      "loss": 3.0464,
      "step": 113649
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8986806869506836,
      "learning_rate": 0.00030634603383946404,
      "loss": 3.0813,
      "step": 113650
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4559683799743652,
      "learning_rate": 0.00030634194419200313,
      "loss": 3.1371,
      "step": 113651
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.804464817047119,
      "learning_rate": 0.00030633785454336327,
      "loss": 2.992,
      "step": 113652
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8709627389907837,
      "learning_rate": 0.0003063337648935451,
      "loss": 2.7781,
      "step": 113653
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.241611957550049,
      "learning_rate": 0.0003063296752425492,
      "loss": 3.0385,
      "step": 113654
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.088667869567871,
      "learning_rate": 0.0003063255855903766,
      "loss": 2.996,
      "step": 113655
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8517398834228516,
      "learning_rate": 0.000306321495937028,
      "loss": 2.95,
      "step": 113656
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.108732223510742,
      "learning_rate": 0.0003063174062825041,
      "loss": 2.9686,
      "step": 113657
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1381301879882812,
      "learning_rate": 0.0003063133166268056,
      "loss": 3.0155,
      "step": 113658
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.217590093612671,
      "learning_rate": 0.00030630922696993346,
      "loss": 3.013,
      "step": 113659
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7207090854644775,
      "learning_rate": 0.0003063051373118882,
      "loss": 3.0714,
      "step": 113660
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9693890810012817,
      "learning_rate": 0.0003063010476526707,
      "loss": 2.7421,
      "step": 113661
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7596805095672607,
      "learning_rate": 0.0003062969579922818,
      "loss": 3.1204,
      "step": 113662
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8821721076965332,
      "learning_rate": 0.0003062928683307221,
      "loss": 2.9426,
      "step": 113663
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.046161651611328,
      "learning_rate": 0.00030628877866799244,
      "loss": 3.1044,
      "step": 113664
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8741260766983032,
      "learning_rate": 0.0003062846890040936,
      "loss": 3.0184,
      "step": 113665
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8627439737319946,
      "learning_rate": 0.00030628059933902635,
      "loss": 3.072,
      "step": 113666
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.841166615486145,
      "learning_rate": 0.00030627650967279135,
      "loss": 3.2169,
      "step": 113667
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.378542184829712,
      "learning_rate": 0.0003062724200053894,
      "loss": 2.8118,
      "step": 113668
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.000308036804199,
      "learning_rate": 0.00030626833033682143,
      "loss": 3.0902,
      "step": 113669
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.220296621322632,
      "learning_rate": 0.000306264240667088,
      "loss": 3.0978,
      "step": 113670
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.6755542755126953,
      "learning_rate": 0.0003062601509961899,
      "loss": 3.295,
      "step": 113671
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.10453724861145,
      "learning_rate": 0.000306256061324128,
      "loss": 3.0263,
      "step": 113672
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1867194175720215,
      "learning_rate": 0.0003062519716509028,
      "loss": 2.914,
      "step": 113673
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.545077323913574,
      "learning_rate": 0.00030624788197651536,
      "loss": 3.1877,
      "step": 113674
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.8067996501922607,
      "learning_rate": 0.00030624379230096636,
      "loss": 2.9658,
      "step": 113675
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2928249835968018,
      "learning_rate": 0.00030623970262425645,
      "loss": 2.9974,
      "step": 113676
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0471150875091553,
      "learning_rate": 0.0003062356129463865,
      "loss": 3.0506,
      "step": 113677
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7658569812774658,
      "learning_rate": 0.00030623152326735725,
      "loss": 3.179,
      "step": 113678
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8301141262054443,
      "learning_rate": 0.0003062274335871694,
      "loss": 2.9332,
      "step": 113679
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.1000378131866455,
      "learning_rate": 0.00030622334390582374,
      "loss": 2.7454,
      "step": 113680
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.4987988471984863,
      "learning_rate": 0.0003062192542233211,
      "loss": 2.8321,
      "step": 113681
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7950702905654907,
      "learning_rate": 0.0003062151645396621,
      "loss": 3.1429,
      "step": 113682
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1233086585998535,
      "learning_rate": 0.0003062110748548476,
      "loss": 3.1236,
      "step": 113683
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7884225845336914,
      "learning_rate": 0.0003062069851688785,
      "loss": 3.1125,
      "step": 113684
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7327275276184082,
      "learning_rate": 0.0003062028954817552,
      "loss": 3.2566,
      "step": 113685
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0534868240356445,
      "learning_rate": 0.00030619880579347875,
      "loss": 3.0648,
      "step": 113686
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.112102508544922,
      "learning_rate": 0.00030619471610404984,
      "loss": 2.8466,
      "step": 113687
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2025890350341797,
      "learning_rate": 0.0003061906264134692,
      "loss": 2.7732,
      "step": 113688
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.930867314338684,
      "learning_rate": 0.0003061865367217376,
      "loss": 3.2117,
      "step": 113689
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9676127433776855,
      "learning_rate": 0.00030618244702885583,
      "loss": 3.0254,
      "step": 113690
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6099015474319458,
      "learning_rate": 0.00030617835733482454,
      "loss": 3.1216,
      "step": 113691
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.1408464908599854,
      "learning_rate": 0.00030617426763964464,
      "loss": 2.7296,
      "step": 113692
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.6936700344085693,
      "learning_rate": 0.00030617017794331685,
      "loss": 3.1445,
      "step": 113693
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8513349294662476,
      "learning_rate": 0.0003061660882458419,
      "loss": 2.8374,
      "step": 113694
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.8616793155670166,
      "learning_rate": 0.0003061619985472205,
      "loss": 2.9873,
      "step": 113695
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9117342233657837,
      "learning_rate": 0.0003061579088474535,
      "loss": 2.8272,
      "step": 113696
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.944645643234253,
      "learning_rate": 0.00030615381914654165,
      "loss": 2.8582,
      "step": 113697
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1337335109710693,
      "learning_rate": 0.0003061497294444857,
      "loss": 2.9724,
      "step": 113698
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.051339626312256,
      "learning_rate": 0.00030614563974128633,
      "loss": 3.0245,
      "step": 113699
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7790486812591553,
      "learning_rate": 0.00030614155003694444,
      "loss": 3.2993,
      "step": 113700
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0164618492126465,
      "learning_rate": 0.00030613746033146067,
      "loss": 3.0942,
      "step": 113701
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.845658540725708,
      "learning_rate": 0.0003061333706248359,
      "loss": 3.1169,
      "step": 113702
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.224924325942993,
      "learning_rate": 0.0003061292809170708,
      "loss": 3.1013,
      "step": 113703
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6689482927322388,
      "learning_rate": 0.00030612519120816603,
      "loss": 3.0343,
      "step": 113704
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1419742107391357,
      "learning_rate": 0.0003061211014981226,
      "loss": 2.8292,
      "step": 113705
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.787765622138977,
      "learning_rate": 0.0003061170117869411,
      "loss": 2.7912,
      "step": 113706
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6173471212387085,
      "learning_rate": 0.00030611292207462237,
      "loss": 2.9153,
      "step": 113707
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6654075384140015,
      "learning_rate": 0.00030610883236116716,
      "loss": 2.9299,
      "step": 113708
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1075284481048584,
      "learning_rate": 0.0003061047426465761,
      "loss": 3.0644,
      "step": 113709
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7873903512954712,
      "learning_rate": 0.0003061006529308501,
      "loss": 3.0514,
      "step": 113710
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.300448417663574,
      "learning_rate": 0.00030609656321398994,
      "loss": 2.7275,
      "step": 113711
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3471763134002686,
      "learning_rate": 0.0003060924734959962,
      "loss": 2.8521,
      "step": 113712
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.96193265914917,
      "learning_rate": 0.00030608838377686975,
      "loss": 2.9837,
      "step": 113713
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7757335901260376,
      "learning_rate": 0.00030608429405661145,
      "loss": 3.0791,
      "step": 113714
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4238460063934326,
      "learning_rate": 0.0003060802043352219,
      "loss": 2.8868,
      "step": 113715
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.18416690826416,
      "learning_rate": 0.00030607611461270194,
      "loss": 3.022,
      "step": 113716
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0404911041259766,
      "learning_rate": 0.0003060720248890524,
      "loss": 3.023,
      "step": 113717
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.716955304145813,
      "learning_rate": 0.00030606793516427385,
      "loss": 2.938,
      "step": 113718
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.531496286392212,
      "learning_rate": 0.0003060638454383672,
      "loss": 2.953,
      "step": 113719
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.6582765579223633,
      "learning_rate": 0.00030605975571133316,
      "loss": 3.0444,
      "step": 113720
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0153677463531494,
      "learning_rate": 0.0003060556659831725,
      "loss": 3.3766,
      "step": 113721
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.7318015098571777,
      "learning_rate": 0.0003060515762538859,
      "loss": 2.7568,
      "step": 113722
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.2289483547210693,
      "learning_rate": 0.00030604748652347433,
      "loss": 2.7731,
      "step": 113723
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.626300096511841,
      "learning_rate": 0.0003060433967919383,
      "loss": 3.2232,
      "step": 113724
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.276951313018799,
      "learning_rate": 0.0003060393070592787,
      "loss": 3.0347,
      "step": 113725
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.522549867630005,
      "learning_rate": 0.00030603521732549643,
      "loss": 2.7726,
      "step": 113726
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.636762857437134,
      "learning_rate": 0.00030603112759059197,
      "loss": 3.0112,
      "step": 113727
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.743159532546997,
      "learning_rate": 0.00030602703785456616,
      "loss": 3.0092,
      "step": 113728
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0905144214630127,
      "learning_rate": 0.00030602294811741995,
      "loss": 2.9893,
      "step": 113729
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.849029302597046,
      "learning_rate": 0.00030601885837915386,
      "loss": 3.1693,
      "step": 113730
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.824713945388794,
      "learning_rate": 0.00030601476863976876,
      "loss": 2.9106,
      "step": 113731
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1753787994384766,
      "learning_rate": 0.0003060106788992654,
      "loss": 2.9362,
      "step": 113732
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2278599739074707,
      "learning_rate": 0.0003060065891576446,
      "loss": 3.0161,
      "step": 113733
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.661157250404358,
      "learning_rate": 0.00030600249941490704,
      "loss": 3.0481,
      "step": 113734
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4091529846191406,
      "learning_rate": 0.00030599840967105346,
      "loss": 2.7526,
      "step": 113735
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.7338950634002686,
      "learning_rate": 0.00030599431992608467,
      "loss": 3.1747,
      "step": 113736
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1695096492767334,
      "learning_rate": 0.00030599023018000143,
      "loss": 3.0792,
      "step": 113737
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1674253940582275,
      "learning_rate": 0.0003059861404328045,
      "loss": 3.0401,
      "step": 113738
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9816246032714844,
      "learning_rate": 0.0003059820506844947,
      "loss": 2.973,
      "step": 113739
    },
    {
      "epoch": 1.48,
      "grad_norm": 4.425614833831787,
      "learning_rate": 0.0003059779609350726,
      "loss": 2.8328,
      "step": 113740
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.234971284866333,
      "learning_rate": 0.0003059738711845392,
      "loss": 2.9642,
      "step": 113741
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9225560426712036,
      "learning_rate": 0.0003059697814328951,
      "loss": 2.7049,
      "step": 113742
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.3077523708343506,
      "learning_rate": 0.00030596569168014106,
      "loss": 3.2259,
      "step": 113743
    },
    {
      "epoch": 1.48,
      "grad_norm": 4.7645039558410645,
      "learning_rate": 0.00030596160192627786,
      "loss": 2.9708,
      "step": 113744
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.296792507171631,
      "learning_rate": 0.0003059575121713064,
      "loss": 3.0315,
      "step": 113745
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6719290018081665,
      "learning_rate": 0.0003059534224152273,
      "loss": 3.2493,
      "step": 113746
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.7427732944488525,
      "learning_rate": 0.00030594933265804127,
      "loss": 2.9911,
      "step": 113747
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.2313718795776367,
      "learning_rate": 0.00030594524289974926,
      "loss": 2.9412,
      "step": 113748
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1359736919403076,
      "learning_rate": 0.0003059411531403518,
      "loss": 3.2554,
      "step": 113749
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9925861358642578,
      "learning_rate": 0.0003059370633798498,
      "loss": 3.1263,
      "step": 113750
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9799096584320068,
      "learning_rate": 0.0003059329736182441,
      "loss": 3.1302,
      "step": 113751
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.8812055587768555,
      "learning_rate": 0.0003059288838555352,
      "loss": 2.848,
      "step": 113752
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.145120620727539,
      "learning_rate": 0.0003059247940917241,
      "loss": 3.0427,
      "step": 113753
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1750705242156982,
      "learning_rate": 0.0003059207043268114,
      "loss": 3.0432,
      "step": 113754
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9446159601211548,
      "learning_rate": 0.00030591661456079797,
      "loss": 2.8585,
      "step": 113755
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7465431690216064,
      "learning_rate": 0.0003059125247936845,
      "loss": 3.0142,
      "step": 113756
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9273011684417725,
      "learning_rate": 0.00030590843502547183,
      "loss": 3.0895,
      "step": 113757
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7013980150222778,
      "learning_rate": 0.0003059043452561606,
      "loss": 2.7826,
      "step": 113758
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6911283731460571,
      "learning_rate": 0.0003059002554857517,
      "loss": 2.9059,
      "step": 113759
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4028265476226807,
      "learning_rate": 0.0003058961657142459,
      "loss": 2.9829,
      "step": 113760
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7384508848190308,
      "learning_rate": 0.00030589207594164375,
      "loss": 3.1411,
      "step": 113761
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.902402400970459,
      "learning_rate": 0.0003058879861679462,
      "loss": 2.9262,
      "step": 113762
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9653297662734985,
      "learning_rate": 0.000305883896393154,
      "loss": 2.786,
      "step": 113763
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.9122602939605713,
      "learning_rate": 0.00030587980661726783,
      "loss": 2.8888,
      "step": 113764
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4885129928588867,
      "learning_rate": 0.0003058757168402885,
      "loss": 3.0168,
      "step": 113765
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.928626537322998,
      "learning_rate": 0.0003058716270622168,
      "loss": 3.0148,
      "step": 113766
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.875813603401184,
      "learning_rate": 0.0003058675372830534,
      "loss": 2.9246,
      "step": 113767
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4102909564971924,
      "learning_rate": 0.0003058634475027991,
      "loss": 3.1161,
      "step": 113768
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3959298133850098,
      "learning_rate": 0.0003058593577214548,
      "loss": 2.8207,
      "step": 113769
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.358776807785034,
      "learning_rate": 0.000305855267939021,
      "loss": 3.1092,
      "step": 113770
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2871901988983154,
      "learning_rate": 0.0003058511781554986,
      "loss": 3.0459,
      "step": 113771
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1308486461639404,
      "learning_rate": 0.00030584708837088845,
      "loss": 3.024,
      "step": 113772
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6334521770477295,
      "learning_rate": 0.00030584299858519117,
      "loss": 2.9269,
      "step": 113773
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2605044841766357,
      "learning_rate": 0.0003058389087984075,
      "loss": 3.1594,
      "step": 113774
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9234322309494019,
      "learning_rate": 0.0003058348190105384,
      "loss": 3.0829,
      "step": 113775
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.6137309074401855,
      "learning_rate": 0.00030583072922158437,
      "loss": 2.899,
      "step": 113776
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.46950364112854,
      "learning_rate": 0.0003058266394315464,
      "loss": 2.912,
      "step": 113777
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1406073570251465,
      "learning_rate": 0.000305822549640425,
      "loss": 2.7921,
      "step": 113778
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.347076892852783,
      "learning_rate": 0.0003058184598482212,
      "loss": 2.894,
      "step": 113779
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8400747776031494,
      "learning_rate": 0.00030581437005493565,
      "loss": 2.8097,
      "step": 113780
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0701658725738525,
      "learning_rate": 0.00030581028026056904,
      "loss": 2.9304,
      "step": 113781
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.830917239189148,
      "learning_rate": 0.00030580619046512224,
      "loss": 2.6554,
      "step": 113782
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.005073308944702,
      "learning_rate": 0.00030580210066859595,
      "loss": 2.9966,
      "step": 113783
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.985711693763733,
      "learning_rate": 0.0003057980108709909,
      "loss": 2.9948,
      "step": 113784
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0149104595184326,
      "learning_rate": 0.00030579392107230784,
      "loss": 3.2235,
      "step": 113785
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8396848440170288,
      "learning_rate": 0.0003057898312725477,
      "loss": 3.1821,
      "step": 113786
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.906031608581543,
      "learning_rate": 0.00030578574147171106,
      "loss": 3.2137,
      "step": 113787
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.996175765991211,
      "learning_rate": 0.0003057816516697987,
      "loss": 3.0497,
      "step": 113788
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.097764730453491,
      "learning_rate": 0.0003057775618668115,
      "loss": 2.9391,
      "step": 113789
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3978490829467773,
      "learning_rate": 0.0003057734720627502,
      "loss": 2.8315,
      "step": 113790
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.288907289505005,
      "learning_rate": 0.0003057693822576154,
      "loss": 2.8797,
      "step": 113791
    },
    {
      "epoch": 1.48,
      "grad_norm": 4.20627498626709,
      "learning_rate": 0.0003057652924514079,
      "loss": 3.0797,
      "step": 113792
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.305769443511963,
      "learning_rate": 0.0003057612026441287,
      "loss": 2.7493,
      "step": 113793
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.288736343383789,
      "learning_rate": 0.00030575711283577824,
      "loss": 3.0234,
      "step": 113794
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6227589845657349,
      "learning_rate": 0.00030575302302635743,
      "loss": 3.1886,
      "step": 113795
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.558908462524414,
      "learning_rate": 0.00030574893321586716,
      "loss": 3.0533,
      "step": 113796
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.384807825088501,
      "learning_rate": 0.000305744843404308,
      "loss": 2.6967,
      "step": 113797
    },
    {
      "epoch": 1.48,
      "grad_norm": 4.749314308166504,
      "learning_rate": 0.0003057407535916807,
      "loss": 2.9188,
      "step": 113798
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1974363327026367,
      "learning_rate": 0.0003057366637779861,
      "loss": 2.8233,
      "step": 113799
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.7302281856536865,
      "learning_rate": 0.00030573257396322495,
      "loss": 3.1899,
      "step": 113800
    },
    {
      "epoch": 1.48,
      "grad_norm": 4.058033466339111,
      "learning_rate": 0.00030572848414739796,
      "loss": 2.9509,
      "step": 113801
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2311484813690186,
      "learning_rate": 0.000305724394330506,
      "loss": 3.0189,
      "step": 113802
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.6585843563079834,
      "learning_rate": 0.00030572030451254977,
      "loss": 2.9106,
      "step": 113803
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9800668954849243,
      "learning_rate": 0.0003057162146935301,
      "loss": 2.7602,
      "step": 113804
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.5000956058502197,
      "learning_rate": 0.00030571212487344755,
      "loss": 2.9338,
      "step": 113805
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.9767162799835205,
      "learning_rate": 0.00030570803505230304,
      "loss": 2.8114,
      "step": 113806
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.780844211578369,
      "learning_rate": 0.0003057039452300973,
      "loss": 2.8553,
      "step": 113807
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9481111764907837,
      "learning_rate": 0.00030569985540683113,
      "loss": 2.9982,
      "step": 113808
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4509084224700928,
      "learning_rate": 0.00030569576558250525,
      "loss": 2.9203,
      "step": 113809
    },
    {
      "epoch": 1.48,
      "grad_norm": 4.136952877044678,
      "learning_rate": 0.00030569167575712037,
      "loss": 2.9581,
      "step": 113810
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.450709819793701,
      "learning_rate": 0.0003056875859306773,
      "loss": 2.9212,
      "step": 113811
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9937738180160522,
      "learning_rate": 0.00030568349610317683,
      "loss": 3.0966,
      "step": 113812
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.8603856563568115,
      "learning_rate": 0.0003056794062746197,
      "loss": 2.9785,
      "step": 113813
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.641144037246704,
      "learning_rate": 0.0003056753164450066,
      "loss": 3.0984,
      "step": 113814
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0450587272644043,
      "learning_rate": 0.00030567122661433844,
      "loss": 2.9694,
      "step": 113815
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.1015002727508545,
      "learning_rate": 0.0003056671367826158,
      "loss": 3.0111,
      "step": 113816
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.208815097808838,
      "learning_rate": 0.0003056630469498396,
      "loss": 2.9484,
      "step": 113817
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9195014238357544,
      "learning_rate": 0.00030565895711601057,
      "loss": 2.9756,
      "step": 113818
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.832486867904663,
      "learning_rate": 0.00030565486728112933,
      "loss": 2.8254,
      "step": 113819
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.6165709495544434,
      "learning_rate": 0.0003056507774451968,
      "loss": 3.0886,
      "step": 113820
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.296299695968628,
      "learning_rate": 0.0003056466876082137,
      "loss": 2.9558,
      "step": 113821
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1207275390625,
      "learning_rate": 0.0003056425977701807,
      "loss": 2.7742,
      "step": 113822
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.664724111557007,
      "learning_rate": 0.00030563850793109866,
      "loss": 2.7933,
      "step": 113823
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9033833742141724,
      "learning_rate": 0.0003056344180909684,
      "loss": 2.7869,
      "step": 113824
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.943100929260254,
      "learning_rate": 0.0003056303282497905,
      "loss": 3.1875,
      "step": 113825
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7264816761016846,
      "learning_rate": 0.0003056262384075658,
      "loss": 3.0395,
      "step": 113826
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.98339581489563,
      "learning_rate": 0.0003056221485642952,
      "loss": 2.928,
      "step": 113827
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.183927297592163,
      "learning_rate": 0.0003056180587199792,
      "loss": 3.0049,
      "step": 113828
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1969990730285645,
      "learning_rate": 0.00030561396887461876,
      "loss": 3.0382,
      "step": 113829
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0456368923187256,
      "learning_rate": 0.0003056098790282146,
      "loss": 3.0789,
      "step": 113830
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.714637517929077,
      "learning_rate": 0.0003056057891807675,
      "loss": 2.7652,
      "step": 113831
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.478107213973999,
      "learning_rate": 0.00030560169933227803,
      "loss": 2.8924,
      "step": 113832
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.5964508056640625,
      "learning_rate": 0.0003055976094827472,
      "loss": 2.8523,
      "step": 113833
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.044766426086426,
      "learning_rate": 0.00030559351963217567,
      "loss": 3.2418,
      "step": 113834
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.5267107486724854,
      "learning_rate": 0.00030558942978056416,
      "loss": 3.0135,
      "step": 113835
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7214183807373047,
      "learning_rate": 0.00030558533992791354,
      "loss": 2.607,
      "step": 113836
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9540793895721436,
      "learning_rate": 0.00030558125007422443,
      "loss": 3.1257,
      "step": 113837
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.2882606983184814,
      "learning_rate": 0.00030557716021949763,
      "loss": 2.6671,
      "step": 113838
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8736218214035034,
      "learning_rate": 0.000305573070363734,
      "loss": 2.9164,
      "step": 113839
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.828082799911499,
      "learning_rate": 0.0003055689805069342,
      "loss": 2.877,
      "step": 113840
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1034343242645264,
      "learning_rate": 0.000305564890649099,
      "loss": 3.1429,
      "step": 113841
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4353621006011963,
      "learning_rate": 0.0003055608007902293,
      "loss": 2.7104,
      "step": 113842
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4728400707244873,
      "learning_rate": 0.00030555671093032564,
      "loss": 2.9551,
      "step": 113843
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8192352056503296,
      "learning_rate": 0.00030555262106938885,
      "loss": 2.8483,
      "step": 113844
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7087430953979492,
      "learning_rate": 0.0003055485312074198,
      "loss": 3.1887,
      "step": 113845
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9096907377243042,
      "learning_rate": 0.0003055444413444191,
      "loss": 2.9324,
      "step": 113846
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9040497541427612,
      "learning_rate": 0.00030554035148038763,
      "loss": 3.0428,
      "step": 113847
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.457745313644409,
      "learning_rate": 0.00030553626161532616,
      "loss": 2.8802,
      "step": 113848
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9767471551895142,
      "learning_rate": 0.0003055321717492353,
      "loss": 2.6692,
      "step": 113849
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0471410751342773,
      "learning_rate": 0.0003055280818821159,
      "loss": 2.9153,
      "step": 113850
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9475460052490234,
      "learning_rate": 0.00030552399201396884,
      "loss": 2.9045,
      "step": 113851
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.331939220428467,
      "learning_rate": 0.0003055199021447946,
      "loss": 3.0642,
      "step": 113852
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7979803085327148,
      "learning_rate": 0.0003055158122745942,
      "loss": 2.9316,
      "step": 113853
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0198142528533936,
      "learning_rate": 0.00030551172240336836,
      "loss": 2.8601,
      "step": 113854
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.738518238067627,
      "learning_rate": 0.0003055076325311177,
      "loss": 3.1334,
      "step": 113855
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.005734443664551,
      "learning_rate": 0.000305503542657843,
      "loss": 2.7857,
      "step": 113856
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9240272045135498,
      "learning_rate": 0.0003054994527835453,
      "loss": 3.014,
      "step": 113857
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9543670415878296,
      "learning_rate": 0.000305495362908225,
      "loss": 3.1344,
      "step": 113858
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2187211513519287,
      "learning_rate": 0.000305491273031883,
      "loss": 3.0871,
      "step": 113859
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.5974780321121216,
      "learning_rate": 0.00030548718315452015,
      "loss": 2.975,
      "step": 113860
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.471813440322876,
      "learning_rate": 0.00030548309327613714,
      "loss": 2.9082,
      "step": 113861
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.9976375102996826,
      "learning_rate": 0.0003054790033967346,
      "loss": 3.1836,
      "step": 113862
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.04986834526062,
      "learning_rate": 0.0003054749135163135,
      "loss": 2.8973,
      "step": 113863
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.350447177886963,
      "learning_rate": 0.00030547082363487446,
      "loss": 2.9989,
      "step": 113864
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0196406841278076,
      "learning_rate": 0.0003054667337524183,
      "loss": 2.9484,
      "step": 113865
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.668980360031128,
      "learning_rate": 0.0003054626438689458,
      "loss": 2.8835,
      "step": 113866
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.639399766921997,
      "learning_rate": 0.00030545855398445774,
      "loss": 2.9037,
      "step": 113867
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.667898416519165,
      "learning_rate": 0.0003054544640989547,
      "loss": 3.0236,
      "step": 113868
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.163743257522583,
      "learning_rate": 0.0003054503742124377,
      "loss": 3.1029,
      "step": 113869
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0031285285949707,
      "learning_rate": 0.0003054462843249072,
      "loss": 3.1357,
      "step": 113870
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.7801263332366943,
      "learning_rate": 0.0003054421944363643,
      "loss": 2.7589,
      "step": 113871
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.8324835300445557,
      "learning_rate": 0.0003054381045468096,
      "loss": 2.9905,
      "step": 113872
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6878294944763184,
      "learning_rate": 0.00030543401465624376,
      "loss": 2.9193,
      "step": 113873
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0694854259490967,
      "learning_rate": 0.0003054299247646677,
      "loss": 3.1831,
      "step": 113874
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.097386360168457,
      "learning_rate": 0.00030542583487208207,
      "loss": 2.9338,
      "step": 113875
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.5100269317626953,
      "learning_rate": 0.0003054217449784877,
      "loss": 2.8923,
      "step": 113876
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7237586975097656,
      "learning_rate": 0.00030541765508388524,
      "loss": 2.938,
      "step": 113877
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.786579966545105,
      "learning_rate": 0.00030541356518827567,
      "loss": 3.1051,
      "step": 113878
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2875163555145264,
      "learning_rate": 0.0003054094752916595,
      "loss": 3.0767,
      "step": 113879
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0813493728637695,
      "learning_rate": 0.00030540538539403767,
      "loss": 3.0113,
      "step": 113880
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2572004795074463,
      "learning_rate": 0.00030540129549541087,
      "loss": 3.058,
      "step": 113881
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.1397457122802734,
      "learning_rate": 0.0003053972055957799,
      "loss": 2.9811,
      "step": 113882
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6683269739151,
      "learning_rate": 0.0003053931156951454,
      "loss": 3.0094,
      "step": 113883
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.783483624458313,
      "learning_rate": 0.0003053890257935083,
      "loss": 3.0192,
      "step": 113884
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8335107564926147,
      "learning_rate": 0.00030538493589086927,
      "loss": 3.0259,
      "step": 113885
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6053979396820068,
      "learning_rate": 0.00030538084598722906,
      "loss": 3.0064,
      "step": 113886
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.00030517578125,
      "learning_rate": 0.0003053767560825884,
      "loss": 2.9865,
      "step": 113887
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8384203910827637,
      "learning_rate": 0.0003053726661769482,
      "loss": 3.0848,
      "step": 113888
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.061250686645508,
      "learning_rate": 0.000305368576270309,
      "loss": 2.9422,
      "step": 113889
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.039210557937622,
      "learning_rate": 0.0003053644863626718,
      "loss": 2.9748,
      "step": 113890
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7893810272216797,
      "learning_rate": 0.00030536039645403726,
      "loss": 3.0256,
      "step": 113891
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.809226632118225,
      "learning_rate": 0.000305356306544406,
      "loss": 3.1876,
      "step": 113892
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6537222862243652,
      "learning_rate": 0.0003053522166337789,
      "loss": 2.9378,
      "step": 113893
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9996981620788574,
      "learning_rate": 0.0003053481267221568,
      "loss": 2.9561,
      "step": 113894
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9840257167816162,
      "learning_rate": 0.00030534403680954037,
      "loss": 2.971,
      "step": 113895
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.827358365058899,
      "learning_rate": 0.0003053399468959304,
      "loss": 3.024,
      "step": 113896
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.7182042598724365,
      "learning_rate": 0.0003053358569813276,
      "loss": 3.2165,
      "step": 113897
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9276427030563354,
      "learning_rate": 0.0003053317670657327,
      "loss": 3.3485,
      "step": 113898
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7637840509414673,
      "learning_rate": 0.0003053276771491466,
      "loss": 2.9337,
      "step": 113899
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.5661494731903076,
      "learning_rate": 0.00030532358723157004,
      "loss": 2.8231,
      "step": 113900
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2592709064483643,
      "learning_rate": 0.0003053194973130036,
      "loss": 3.0277,
      "step": 113901
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9412245750427246,
      "learning_rate": 0.0003053154073934482,
      "loss": 2.9436,
      "step": 113902
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9706931114196777,
      "learning_rate": 0.00030531131747290463,
      "loss": 2.9538,
      "step": 113903
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.739105463027954,
      "learning_rate": 0.0003053072275513736,
      "loss": 3.0924,
      "step": 113904
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9561976194381714,
      "learning_rate": 0.00030530313762885573,
      "loss": 3.0023,
      "step": 113905
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7009460926055908,
      "learning_rate": 0.00030529904770535207,
      "loss": 3.0368,
      "step": 113906
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2710421085357666,
      "learning_rate": 0.00030529495778086304,
      "loss": 3.2442,
      "step": 113907
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0424680709838867,
      "learning_rate": 0.00030529086785538966,
      "loss": 3.1282,
      "step": 113908
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.441498041152954,
      "learning_rate": 0.00030528677792893263,
      "loss": 2.9933,
      "step": 113909
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.104644536972046,
      "learning_rate": 0.00030528268800149267,
      "loss": 3.0418,
      "step": 113910
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9091631174087524,
      "learning_rate": 0.00030527859807307053,
      "loss": 3.0416,
      "step": 113911
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6449096202850342,
      "learning_rate": 0.0003052745081436671,
      "loss": 3.0739,
      "step": 113912
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.382272243499756,
      "learning_rate": 0.0003052704182132829,
      "loss": 2.8385,
      "step": 113913
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9485808610916138,
      "learning_rate": 0.0003052663282819189,
      "loss": 3.1726,
      "step": 113914
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7778873443603516,
      "learning_rate": 0.0003052622383495758,
      "loss": 3.1742,
      "step": 113915
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8539721965789795,
      "learning_rate": 0.0003052581484162543,
      "loss": 2.9112,
      "step": 113916
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1225898265838623,
      "learning_rate": 0.00030525405848195526,
      "loss": 2.8326,
      "step": 113917
    },
    {
      "epoch": 1.48,
      "grad_norm": 4.987638473510742,
      "learning_rate": 0.00030524996854667944,
      "loss": 2.8804,
      "step": 113918
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.747403860092163,
      "learning_rate": 0.00030524587861042746,
      "loss": 2.6503,
      "step": 113919
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.640031576156616,
      "learning_rate": 0.00030524178867320014,
      "loss": 2.8365,
      "step": 113920
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1415796279907227,
      "learning_rate": 0.00030523769873499844,
      "loss": 3.0164,
      "step": 113921
    },
    {
      "epoch": 1.48,
      "grad_norm": 4.119655132293701,
      "learning_rate": 0.00030523360879582275,
      "loss": 2.9711,
      "step": 113922
    },
    {
      "epoch": 1.48,
      "grad_norm": 4.220468044281006,
      "learning_rate": 0.00030522951885567415,
      "loss": 3.0385,
      "step": 113923
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.1324574947357178,
      "learning_rate": 0.0003052254289145533,
      "loss": 2.9169,
      "step": 113924
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6300019025802612,
      "learning_rate": 0.0003052213389724609,
      "loss": 3.1776,
      "step": 113925
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9130656719207764,
      "learning_rate": 0.00030521724902939773,
      "loss": 2.8037,
      "step": 113926
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3352034091949463,
      "learning_rate": 0.00030521315908536465,
      "loss": 2.8117,
      "step": 113927
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.664379596710205,
      "learning_rate": 0.0003052090691403623,
      "loss": 3.0881,
      "step": 113928
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1292226314544678,
      "learning_rate": 0.0003052049791943915,
      "loss": 3.2784,
      "step": 113929
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.5379865169525146,
      "learning_rate": 0.000305200889247453,
      "loss": 2.9391,
      "step": 113930
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3505194187164307,
      "learning_rate": 0.00030519679929954754,
      "loss": 2.8895,
      "step": 113931
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.9905734062194824,
      "learning_rate": 0.00030519270935067583,
      "loss": 3.0797,
      "step": 113932
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0026562213897705,
      "learning_rate": 0.0003051886194008388,
      "loss": 2.7497,
      "step": 113933
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2472429275512695,
      "learning_rate": 0.0003051845294500371,
      "loss": 2.938,
      "step": 113934
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.600860595703125,
      "learning_rate": 0.00030518043949827144,
      "loss": 3.1741,
      "step": 113935
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.756322145462036,
      "learning_rate": 0.0003051763495455426,
      "loss": 3.0423,
      "step": 113936
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7386739253997803,
      "learning_rate": 0.00030517225959185147,
      "loss": 3.0884,
      "step": 113937
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3325605392456055,
      "learning_rate": 0.00030516816963719874,
      "loss": 3.0007,
      "step": 113938
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0730810165405273,
      "learning_rate": 0.0003051640796815851,
      "loss": 2.8359,
      "step": 113939
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.95750892162323,
      "learning_rate": 0.0003051599897250114,
      "loss": 2.9521,
      "step": 113940
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7032463550567627,
      "learning_rate": 0.0003051558997674783,
      "loss": 2.8842,
      "step": 113941
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.11440372467041,
      "learning_rate": 0.0003051518098089866,
      "loss": 2.9845,
      "step": 113942
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.636433482170105,
      "learning_rate": 0.0003051477198495372,
      "loss": 3.1039,
      "step": 113943
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8345966339111328,
      "learning_rate": 0.00030514362988913067,
      "loss": 3.002,
      "step": 113944
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.4344737529754639,
      "learning_rate": 0.0003051395399277678,
      "loss": 3.0114,
      "step": 113945
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.674905776977539,
      "learning_rate": 0.00030513544996544945,
      "loss": 3.0831,
      "step": 113946
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.126091480255127,
      "learning_rate": 0.00030513136000217633,
      "loss": 2.9306,
      "step": 113947
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9895036220550537,
      "learning_rate": 0.0003051272700379491,
      "loss": 2.9294,
      "step": 113948
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0694122314453125,
      "learning_rate": 0.0003051231800727688,
      "loss": 3.0164,
      "step": 113949
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.104576587677002,
      "learning_rate": 0.0003051190901066358,
      "loss": 2.7249,
      "step": 113950
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.713366985321045,
      "learning_rate": 0.0003051150001395512,
      "loss": 3.0729,
      "step": 113951
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9375311136245728,
      "learning_rate": 0.0003051109101715156,
      "loss": 3.0195,
      "step": 113952
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.10514235496521,
      "learning_rate": 0.00030510682020252973,
      "loss": 2.7151,
      "step": 113953
    },
    {
      "epoch": 1.48,
      "grad_norm": 4.121781349182129,
      "learning_rate": 0.0003051027302325944,
      "loss": 2.9102,
      "step": 113954
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0682146549224854,
      "learning_rate": 0.0003050986402617105,
      "loss": 3.0716,
      "step": 113955
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.930576205253601,
      "learning_rate": 0.00030509455028987856,
      "loss": 3.1205,
      "step": 113956
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.195683240890503,
      "learning_rate": 0.00030509046031709943,
      "loss": 2.9111,
      "step": 113957
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.108781099319458,
      "learning_rate": 0.00030508637034337395,
      "loss": 2.8744,
      "step": 113958
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3418102264404297,
      "learning_rate": 0.00030508228036870284,
      "loss": 3.0845,
      "step": 113959
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7887240648269653,
      "learning_rate": 0.0003050781903930868,
      "loss": 3.2367,
      "step": 113960
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.6339495182037354,
      "learning_rate": 0.00030507410041652656,
      "loss": 2.6537,
      "step": 113961
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.995844841003418,
      "learning_rate": 0.00030507001043902297,
      "loss": 3.0273,
      "step": 113962
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.03179669380188,
      "learning_rate": 0.0003050659204605769,
      "loss": 2.9867,
      "step": 113963
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8383653163909912,
      "learning_rate": 0.00030506183048118884,
      "loss": 3.1693,
      "step": 113964
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.022106170654297,
      "learning_rate": 0.00030505774050085976,
      "loss": 2.8528,
      "step": 113965
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8611174821853638,
      "learning_rate": 0.00030505365051959035,
      "loss": 2.8232,
      "step": 113966
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7279082536697388,
      "learning_rate": 0.0003050495605373813,
      "loss": 3.0875,
      "step": 113967
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7960010766983032,
      "learning_rate": 0.0003050454705542335,
      "loss": 3.091,
      "step": 113968
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7724262475967407,
      "learning_rate": 0.00030504138057014764,
      "loss": 3.0441,
      "step": 113969
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9281619787216187,
      "learning_rate": 0.0003050372905851245,
      "loss": 3.0348,
      "step": 113970
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8120067119598389,
      "learning_rate": 0.00030503320059916485,
      "loss": 2.9574,
      "step": 113971
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8463274240493774,
      "learning_rate": 0.00030502911061226935,
      "loss": 3.1109,
      "step": 113972
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6952537298202515,
      "learning_rate": 0.000305025020624439,
      "loss": 3.0646,
      "step": 113973
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.75237238407135,
      "learning_rate": 0.0003050209306356743,
      "loss": 2.9989,
      "step": 113974
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0306169986724854,
      "learning_rate": 0.0003050168406459761,
      "loss": 2.6579,
      "step": 113975
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0622401237487793,
      "learning_rate": 0.0003050127506553452,
      "loss": 3.0497,
      "step": 113976
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7000516653060913,
      "learning_rate": 0.00030500866066378237,
      "loss": 3.0201,
      "step": 113977
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7909551858901978,
      "learning_rate": 0.0003050045706712882,
      "loss": 3.0468,
      "step": 113978
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6376529932022095,
      "learning_rate": 0.0003050004806778637,
      "loss": 3.0353,
      "step": 113979
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.440321922302246,
      "learning_rate": 0.00030499639068350956,
      "loss": 3.115,
      "step": 113980
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8452444076538086,
      "learning_rate": 0.0003049923006882264,
      "loss": 3.0223,
      "step": 113981
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2452495098114014,
      "learning_rate": 0.0003049882106920152,
      "loss": 3.0302,
      "step": 113982
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0112953186035156,
      "learning_rate": 0.00030498412069487646,
      "loss": 3.2078,
      "step": 113983
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8678665161132812,
      "learning_rate": 0.00030498003069681114,
      "loss": 2.7213,
      "step": 113984
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7806991338729858,
      "learning_rate": 0.00030497594069781996,
      "loss": 3.0806,
      "step": 113985
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.504094123840332,
      "learning_rate": 0.00030497185069790363,
      "loss": 2.9827,
      "step": 113986
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.528332471847534,
      "learning_rate": 0.00030496776069706286,
      "loss": 2.9211,
      "step": 113987
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2872226238250732,
      "learning_rate": 0.0003049636706952986,
      "loss": 3.2271,
      "step": 113988
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.919620394706726,
      "learning_rate": 0.0003049595806926115,
      "loss": 2.9154,
      "step": 113989
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8470427989959717,
      "learning_rate": 0.0003049554906890022,
      "loss": 2.6745,
      "step": 113990
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6667760610580444,
      "learning_rate": 0.0003049514006844718,
      "loss": 3.0488,
      "step": 113991
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6461857557296753,
      "learning_rate": 0.00030494731067902067,
      "loss": 2.9919,
      "step": 113992
    },
    {
      "epoch": 1.48,
      "grad_norm": 3.1217641830444336,
      "learning_rate": 0.00030494322067264976,
      "loss": 3.0859,
      "step": 113993
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.916508436203003,
      "learning_rate": 0.00030493913066535983,
      "loss": 2.9559,
      "step": 113994
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.295956611633301,
      "learning_rate": 0.00030493504065715163,
      "loss": 3.0771,
      "step": 113995
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0573675632476807,
      "learning_rate": 0.00030493095064802587,
      "loss": 2.916,
      "step": 113996
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7969834804534912,
      "learning_rate": 0.0003049268606379834,
      "loss": 2.8619,
      "step": 113997
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3245837688446045,
      "learning_rate": 0.000304922770627025,
      "loss": 3.0857,
      "step": 113998
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9858814477920532,
      "learning_rate": 0.0003049186806151512,
      "loss": 2.9459,
      "step": 113999
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9696012735366821,
      "learning_rate": 0.00030491459060236306,
      "loss": 3.0258,
      "step": 114000
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0287163257598877,
      "learning_rate": 0.00030491050058866116,
      "loss": 2.8702,
      "step": 114001
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.488603353500366,
      "learning_rate": 0.00030490641057404625,
      "loss": 3.0586,
      "step": 114002
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0285933017730713,
      "learning_rate": 0.0003049023205585192,
      "loss": 2.9758,
      "step": 114003
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.09978985786438,
      "learning_rate": 0.00030489823054208077,
      "loss": 2.9247,
      "step": 114004
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7629421949386597,
      "learning_rate": 0.00030489414052473155,
      "loss": 2.9712,
      "step": 114005
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0406863689422607,
      "learning_rate": 0.00030489005050647253,
      "loss": 3.0085,
      "step": 114006
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2279927730560303,
      "learning_rate": 0.0003048859604873043,
      "loss": 2.8823,
      "step": 114007
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.066756010055542,
      "learning_rate": 0.00030488187046722766,
      "loss": 3.1019,
      "step": 114008
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3314197063446045,
      "learning_rate": 0.0003048777804462434,
      "loss": 2.8478,
      "step": 114009
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.408599615097046,
      "learning_rate": 0.00030487369042435234,
      "loss": 3.1432,
      "step": 114010
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.075716733932495,
      "learning_rate": 0.00030486960040155506,
      "loss": 3.1564,
      "step": 114011
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.954459309577942,
      "learning_rate": 0.0003048655103778525,
      "loss": 2.9108,
      "step": 114012
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1102054119110107,
      "learning_rate": 0.00030486142035324534,
      "loss": 3.0225,
      "step": 114013
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.027190923690796,
      "learning_rate": 0.0003048573303277343,
      "loss": 3.1092,
      "step": 114014
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1425395011901855,
      "learning_rate": 0.00030485324030132024,
      "loss": 3.0633,
      "step": 114015
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.622615337371826,
      "learning_rate": 0.00030484915027400385,
      "loss": 2.9096,
      "step": 114016
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.939634919166565,
      "learning_rate": 0.0003048450602457859,
      "loss": 2.9271,
      "step": 114017
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0904526710510254,
      "learning_rate": 0.00030484097021666717,
      "loss": 3.1802,
      "step": 114018
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9559428691864014,
      "learning_rate": 0.0003048368801866485,
      "loss": 2.8155,
      "step": 114019
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7996381521224976,
      "learning_rate": 0.00030483279015573045,
      "loss": 2.8585,
      "step": 114020
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7245362997055054,
      "learning_rate": 0.00030482870012391397,
      "loss": 2.838,
      "step": 114021
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7755849361419678,
      "learning_rate": 0.0003048246100911997,
      "loss": 3.2,
      "step": 114022
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8877307176589966,
      "learning_rate": 0.00030482052005758844,
      "loss": 2.9216,
      "step": 114023
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8526324033737183,
      "learning_rate": 0.0003048164300230809,
      "loss": 2.8635,
      "step": 114024
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8568065166473389,
      "learning_rate": 0.000304812339987678,
      "loss": 3.143,
      "step": 114025
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6841005086898804,
      "learning_rate": 0.00030480824995138034,
      "loss": 3.0635,
      "step": 114026
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7069355249404907,
      "learning_rate": 0.00030480415991418876,
      "loss": 2.8951,
      "step": 114027
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.664109468460083,
      "learning_rate": 0.000304800069876104,
      "loss": 2.7513,
      "step": 114028
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8566302061080933,
      "learning_rate": 0.00030479597983712676,
      "loss": 2.904,
      "step": 114029
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.7297956943511963,
      "learning_rate": 0.0003047918897972579,
      "loss": 3.0048,
      "step": 114030
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.003342390060425,
      "learning_rate": 0.00030478779975649816,
      "loss": 2.7855,
      "step": 114031
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.160090684890747,
      "learning_rate": 0.0003047837097148483,
      "loss": 3.0179,
      "step": 114032
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.9546151161193848,
      "learning_rate": 0.000304779619672309,
      "loss": 2.8818,
      "step": 114033
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.778719186782837,
      "learning_rate": 0.0003047755296288811,
      "loss": 2.7604,
      "step": 114034
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1304500102996826,
      "learning_rate": 0.00030477143958456527,
      "loss": 3.0609,
      "step": 114035
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.7235121726989746,
      "learning_rate": 0.0003047673495393624,
      "loss": 3.0374,
      "step": 114036
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.4768459796905518,
      "learning_rate": 0.0003047632594932732,
      "loss": 2.9865,
      "step": 114037
    },
    {
      "epoch": 1.48,
      "grad_norm": 6.081432342529297,
      "learning_rate": 0.00030475916944629845,
      "loss": 2.9218,
      "step": 114038
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9319970607757568,
      "learning_rate": 0.00030475507939843876,
      "loss": 2.96,
      "step": 114039
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.8515186309814453,
      "learning_rate": 0.00030475098934969517,
      "loss": 2.9603,
      "step": 114040
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.3039674758911133,
      "learning_rate": 0.00030474689930006817,
      "loss": 3.0802,
      "step": 114041
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9681648015975952,
      "learning_rate": 0.00030474280924955863,
      "loss": 3.1626,
      "step": 114042
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.1802279949188232,
      "learning_rate": 0.0003047387191981673,
      "loss": 3.0091,
      "step": 114043
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.6575051546096802,
      "learning_rate": 0.0003047346291458951,
      "loss": 2.9726,
      "step": 114044
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.181227922439575,
      "learning_rate": 0.0003047305390927425,
      "loss": 3.1605,
      "step": 114045
    },
    {
      "epoch": 1.48,
      "grad_norm": 1.9463924169540405,
      "learning_rate": 0.00030472644903871036,
      "loss": 2.969,
      "step": 114046
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.0722672939300537,
      "learning_rate": 0.00030472235898379966,
      "loss": 3.0663,
      "step": 114047
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.7779788970947266,
      "learning_rate": 0.0003047182689280108,
      "loss": 3.0279,
      "step": 114048
    },
    {
      "epoch": 1.48,
      "grad_norm": 2.2023980617523193,
      "learning_rate": 0.00030471417887134486,
      "loss": 3.0233,
      "step": 114049
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2774057388305664,
      "learning_rate": 0.00030471008881380237,
      "loss": 3.0383,
      "step": 114050
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.891212821006775,
      "learning_rate": 0.0003047059987553843,
      "loss": 2.9846,
      "step": 114051
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7995814085006714,
      "learning_rate": 0.00030470190869609124,
      "loss": 2.9163,
      "step": 114052
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.366248369216919,
      "learning_rate": 0.00030469781863592396,
      "loss": 3.1272,
      "step": 114053
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.553218364715576,
      "learning_rate": 0.00030469372857488334,
      "loss": 2.9565,
      "step": 114054
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.107697010040283,
      "learning_rate": 0.00030468963851297,
      "loss": 2.7854,
      "step": 114055
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.810239791870117,
      "learning_rate": 0.0003046855484501848,
      "loss": 3.0608,
      "step": 114056
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3989152908325195,
      "learning_rate": 0.00030468145838652845,
      "loss": 2.9827,
      "step": 114057
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.320237636566162,
      "learning_rate": 0.0003046773683220018,
      "loss": 2.8554,
      "step": 114058
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8162329196929932,
      "learning_rate": 0.00030467327825660546,
      "loss": 2.8803,
      "step": 114059
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.268650770187378,
      "learning_rate": 0.0003046691881903403,
      "loss": 2.9858,
      "step": 114060
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.1388847827911377,
      "learning_rate": 0.00030466509812320707,
      "loss": 3.1217,
      "step": 114061
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8858940601348877,
      "learning_rate": 0.0003046610080552064,
      "loss": 2.8649,
      "step": 114062
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1353657245635986,
      "learning_rate": 0.00030465691798633933,
      "loss": 2.7896,
      "step": 114063
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.7417585849761963,
      "learning_rate": 0.00030465282791660635,
      "loss": 2.7725,
      "step": 114064
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0398616790771484,
      "learning_rate": 0.00030464873784600833,
      "loss": 3.214,
      "step": 114065
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.335994005203247,
      "learning_rate": 0.00030464464777454605,
      "loss": 2.8562,
      "step": 114066
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1478445529937744,
      "learning_rate": 0.0003046405577022202,
      "loss": 3.1611,
      "step": 114067
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.904861569404602,
      "learning_rate": 0.0003046364676290316,
      "loss": 2.8682,
      "step": 114068
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.916588544845581,
      "learning_rate": 0.00030463237755498103,
      "loss": 2.9492,
      "step": 114069
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.182692289352417,
      "learning_rate": 0.00030462828748006914,
      "loss": 2.8578,
      "step": 114070
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8235063552856445,
      "learning_rate": 0.0003046241974042969,
      "loss": 3.2214,
      "step": 114071
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7408322095870972,
      "learning_rate": 0.00030462010732766473,
      "loss": 2.8307,
      "step": 114072
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.746028184890747,
      "learning_rate": 0.00030461601725017374,
      "loss": 3.0258,
      "step": 114073
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0306146144866943,
      "learning_rate": 0.00030461192717182455,
      "loss": 2.794,
      "step": 114074
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6325546503067017,
      "learning_rate": 0.0003046078370926178,
      "loss": 3.0479,
      "step": 114075
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.047325372695923,
      "learning_rate": 0.00030460374701255443,
      "loss": 3.1201,
      "step": 114076
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.154954671859741,
      "learning_rate": 0.0003045996569316352,
      "loss": 3.0383,
      "step": 114077
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8487768173217773,
      "learning_rate": 0.00030459556684986073,
      "loss": 2.9641,
      "step": 114078
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2356762886047363,
      "learning_rate": 0.0003045914767672319,
      "loss": 3.0986,
      "step": 114079
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8855379819869995,
      "learning_rate": 0.00030458738668374946,
      "loss": 3.0951,
      "step": 114080
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.5131170749664307,
      "learning_rate": 0.00030458329659941406,
      "loss": 2.8493,
      "step": 114081
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1375319957733154,
      "learning_rate": 0.00030457920651422654,
      "loss": 3.1167,
      "step": 114082
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9954607486724854,
      "learning_rate": 0.00030457511642818775,
      "loss": 3.0643,
      "step": 114083
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9355123043060303,
      "learning_rate": 0.0003045710263412983,
      "loss": 2.9415,
      "step": 114084
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9999750852584839,
      "learning_rate": 0.00030456693625355893,
      "loss": 2.976,
      "step": 114085
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.814500570297241,
      "learning_rate": 0.00030456284616497064,
      "loss": 2.7254,
      "step": 114086
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8439232110977173,
      "learning_rate": 0.0003045587560755339,
      "loss": 3.1225,
      "step": 114087
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9309098720550537,
      "learning_rate": 0.00030455466598524965,
      "loss": 3.129,
      "step": 114088
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7660053968429565,
      "learning_rate": 0.0003045505758941186,
      "loss": 3.051,
      "step": 114089
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.2310450077056885,
      "learning_rate": 0.0003045464858021415,
      "loss": 3.0428,
      "step": 114090
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.5220539569854736,
      "learning_rate": 0.00030454239570931914,
      "loss": 3.102,
      "step": 114091
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0601744651794434,
      "learning_rate": 0.00030453830561565234,
      "loss": 3.145,
      "step": 114092
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9339079856872559,
      "learning_rate": 0.00030453421552114165,
      "loss": 3.1842,
      "step": 114093
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.6252429485321045,
      "learning_rate": 0.000304530125425788,
      "loss": 3.1054,
      "step": 114094
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.6082441806793213,
      "learning_rate": 0.0003045260353295922,
      "loss": 2.9761,
      "step": 114095
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0684831142425537,
      "learning_rate": 0.00030452194523255475,
      "loss": 2.9797,
      "step": 114096
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.4460489749908447,
      "learning_rate": 0.00030451785513467675,
      "loss": 3.1854,
      "step": 114097
    },
    {
      "epoch": 1.49,
      "grad_norm": 4.054672718048096,
      "learning_rate": 0.00030451376503595875,
      "loss": 2.8957,
      "step": 114098
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.8167126178741455,
      "learning_rate": 0.0003045096749364015,
      "loss": 2.9557,
      "step": 114099
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.152864694595337,
      "learning_rate": 0.0003045055848360058,
      "loss": 3.0718,
      "step": 114100
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7453755140304565,
      "learning_rate": 0.00030450149473477256,
      "loss": 3.0094,
      "step": 114101
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.1070950031280518,
      "learning_rate": 0.0003044974046327023,
      "loss": 2.975,
      "step": 114102
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4956488609313965,
      "learning_rate": 0.0003044933145297959,
      "loss": 2.952,
      "step": 114103
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.284727096557617,
      "learning_rate": 0.00030448922442605415,
      "loss": 3.0255,
      "step": 114104
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.988548755645752,
      "learning_rate": 0.0003044851343214777,
      "loss": 3.0568,
      "step": 114105
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.7703003883361816,
      "learning_rate": 0.00030448104421606744,
      "loss": 2.6424,
      "step": 114106
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7373849153518677,
      "learning_rate": 0.0003044769541098241,
      "loss": 3.0599,
      "step": 114107
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0442051887512207,
      "learning_rate": 0.0003044728640027484,
      "loss": 2.6812,
      "step": 114108
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.039426803588867,
      "learning_rate": 0.000304468773894841,
      "loss": 2.9663,
      "step": 114109
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.178360939025879,
      "learning_rate": 0.00030446468378610294,
      "loss": 2.9339,
      "step": 114110
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3640310764312744,
      "learning_rate": 0.00030446059367653463,
      "loss": 3.1787,
      "step": 114111
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.713997721672058,
      "learning_rate": 0.0003044565035661371,
      "loss": 2.8787,
      "step": 114112
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.841603398323059,
      "learning_rate": 0.0003044524134549111,
      "loss": 2.9954,
      "step": 114113
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.804758071899414,
      "learning_rate": 0.0003044483233428572,
      "loss": 3.1333,
      "step": 114114
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7489793300628662,
      "learning_rate": 0.00030444423322997625,
      "loss": 3.1649,
      "step": 114115
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.267577648162842,
      "learning_rate": 0.0003044401431162691,
      "loss": 2.9267,
      "step": 114116
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0422821044921875,
      "learning_rate": 0.00030443605300173646,
      "loss": 3.0382,
      "step": 114117
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0817618370056152,
      "learning_rate": 0.000304431962886379,
      "loss": 2.8982,
      "step": 114118
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.02000093460083,
      "learning_rate": 0.00030442787277019766,
      "loss": 2.5776,
      "step": 114119
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8223862648010254,
      "learning_rate": 0.00030442378265319297,
      "loss": 3.1351,
      "step": 114120
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.255606174468994,
      "learning_rate": 0.00030441969253536586,
      "loss": 3.039,
      "step": 114121
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9559376239776611,
      "learning_rate": 0.0003044156024167171,
      "loss": 3.0063,
      "step": 114122
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.934922695159912,
      "learning_rate": 0.00030441151229724736,
      "loss": 3.1678,
      "step": 114123
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8155490159988403,
      "learning_rate": 0.0003044074221769574,
      "loss": 3.1605,
      "step": 114124
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9743938446044922,
      "learning_rate": 0.0003044033320558481,
      "loss": 2.9758,
      "step": 114125
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8275240659713745,
      "learning_rate": 0.00030439924193392007,
      "loss": 2.941,
      "step": 114126
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9041574001312256,
      "learning_rate": 0.00030439515181117415,
      "loss": 2.9417,
      "step": 114127
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.303219795227051,
      "learning_rate": 0.0003043910616876111,
      "loss": 3.2629,
      "step": 114128
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.257821798324585,
      "learning_rate": 0.00030438697156323164,
      "loss": 2.9408,
      "step": 114129
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.623995065689087,
      "learning_rate": 0.0003043828814380366,
      "loss": 3.2113,
      "step": 114130
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8989315032958984,
      "learning_rate": 0.00030437879131202665,
      "loss": 2.9112,
      "step": 114131
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.221374273300171,
      "learning_rate": 0.00030437470118520267,
      "loss": 3.0658,
      "step": 114132
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.9824953079223633,
      "learning_rate": 0.0003043706110575653,
      "loss": 2.6632,
      "step": 114133
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.641448974609375,
      "learning_rate": 0.00030436652092911536,
      "loss": 3.0132,
      "step": 114134
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9539295434951782,
      "learning_rate": 0.0003043624307998536,
      "loss": 3.007,
      "step": 114135
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0895063877105713,
      "learning_rate": 0.0003043583406697808,
      "loss": 2.8553,
      "step": 114136
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2780447006225586,
      "learning_rate": 0.0003043542505388977,
      "loss": 3.1709,
      "step": 114137
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6912767887115479,
      "learning_rate": 0.000304350160407205,
      "loss": 2.7898,
      "step": 114138
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8497724533081055,
      "learning_rate": 0.00030434607027470363,
      "loss": 2.9769,
      "step": 114139
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.201038122177124,
      "learning_rate": 0.0003043419801413942,
      "loss": 3.0402,
      "step": 114140
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8715333938598633,
      "learning_rate": 0.00030433789000727747,
      "loss": 2.8416,
      "step": 114141
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9528125524520874,
      "learning_rate": 0.0003043337998723543,
      "loss": 3.1975,
      "step": 114142
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9463037252426147,
      "learning_rate": 0.00030432970973662537,
      "loss": 2.9926,
      "step": 114143
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.8142964839935303,
      "learning_rate": 0.00030432561960009143,
      "loss": 2.8472,
      "step": 114144
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8754078149795532,
      "learning_rate": 0.00030432152946275335,
      "loss": 2.7429,
      "step": 114145
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8797380924224854,
      "learning_rate": 0.00030431743932461176,
      "loss": 3.1221,
      "step": 114146
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1426939964294434,
      "learning_rate": 0.0003043133491856675,
      "loss": 3.01,
      "step": 114147
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7575327157974243,
      "learning_rate": 0.00030430925904592127,
      "loss": 3.0433,
      "step": 114148
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.792946457862854,
      "learning_rate": 0.0003043051689053739,
      "loss": 3.0188,
      "step": 114149
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7590413093566895,
      "learning_rate": 0.00030430107876402616,
      "loss": 2.7655,
      "step": 114150
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7402087450027466,
      "learning_rate": 0.0003042969886218787,
      "loss": 2.896,
      "step": 114151
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.5160627365112305,
      "learning_rate": 0.0003042928984789324,
      "loss": 2.847,
      "step": 114152
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6460630893707275,
      "learning_rate": 0.00030428880833518795,
      "loss": 2.8307,
      "step": 114153
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7787648439407349,
      "learning_rate": 0.0003042847181906461,
      "loss": 3.0452,
      "step": 114154
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7443495988845825,
      "learning_rate": 0.00030428062804530763,
      "loss": 3.2187,
      "step": 114155
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0062239170074463,
      "learning_rate": 0.0003042765378991734,
      "loss": 2.8945,
      "step": 114156
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.302905559539795,
      "learning_rate": 0.000304272447752244,
      "loss": 2.7952,
      "step": 114157
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.370238780975342,
      "learning_rate": 0.0003042683576045203,
      "loss": 3.0282,
      "step": 114158
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.086967945098877,
      "learning_rate": 0.0003042642674560031,
      "loss": 2.8877,
      "step": 114159
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.556281805038452,
      "learning_rate": 0.00030426017730669297,
      "loss": 2.9489,
      "step": 114160
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8239701986312866,
      "learning_rate": 0.00030425608715659085,
      "loss": 3.16,
      "step": 114161
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.196929454803467,
      "learning_rate": 0.00030425199700569747,
      "loss": 2.8896,
      "step": 114162
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9297720193862915,
      "learning_rate": 0.0003042479068540135,
      "loss": 3.0195,
      "step": 114163
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.737950325012207,
      "learning_rate": 0.0003042438167015398,
      "loss": 2.7364,
      "step": 114164
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7370190620422363,
      "learning_rate": 0.0003042397265482771,
      "loss": 3.0612,
      "step": 114165
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8662159442901611,
      "learning_rate": 0.00030423563639422614,
      "loss": 3.1124,
      "step": 114166
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8405911922454834,
      "learning_rate": 0.0003042315462393876,
      "loss": 3.1091,
      "step": 114167
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8542035818099976,
      "learning_rate": 0.0003042274560837625,
      "loss": 2.8035,
      "step": 114168
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.995022177696228,
      "learning_rate": 0.0003042233659273514,
      "loss": 3.0777,
      "step": 114169
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.822496771812439,
      "learning_rate": 0.000304219275770155,
      "loss": 3.1045,
      "step": 114170
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4755423069000244,
      "learning_rate": 0.0003042151856121743,
      "loss": 2.9345,
      "step": 114171
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9646164178848267,
      "learning_rate": 0.0003042110954534098,
      "loss": 3.0937,
      "step": 114172
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.170720338821411,
      "learning_rate": 0.00030420700529386243,
      "loss": 2.8411,
      "step": 114173
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1329448223114014,
      "learning_rate": 0.0003042029151335329,
      "loss": 2.8289,
      "step": 114174
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7596232891082764,
      "learning_rate": 0.00030419882497242196,
      "loss": 3.188,
      "step": 114175
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6267316341400146,
      "learning_rate": 0.0003041947348105304,
      "loss": 2.8783,
      "step": 114176
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1891896724700928,
      "learning_rate": 0.00030419064464785894,
      "loss": 2.9181,
      "step": 114177
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.120689630508423,
      "learning_rate": 0.00030418655448440833,
      "loss": 3.2658,
      "step": 114178
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9831818342208862,
      "learning_rate": 0.0003041824643201794,
      "loss": 2.6244,
      "step": 114179
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1418192386627197,
      "learning_rate": 0.0003041783741551729,
      "loss": 2.8913,
      "step": 114180
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3727328777313232,
      "learning_rate": 0.0003041742839893895,
      "loss": 2.8824,
      "step": 114181
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.611360788345337,
      "learning_rate": 0.00030417019382282995,
      "loss": 2.9189,
      "step": 114182
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.829203486442566,
      "learning_rate": 0.0003041661036554953,
      "loss": 2.902,
      "step": 114183
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1866180896759033,
      "learning_rate": 0.00030416201348738587,
      "loss": 2.9274,
      "step": 114184
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.662988543510437,
      "learning_rate": 0.00030415792331850276,
      "loss": 3.1417,
      "step": 114185
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.9274251461029053,
      "learning_rate": 0.00030415383314884667,
      "loss": 2.8457,
      "step": 114186
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.182913303375244,
      "learning_rate": 0.0003041497429784182,
      "loss": 2.9278,
      "step": 114187
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.040890693664551,
      "learning_rate": 0.0003041456528072182,
      "loss": 2.9569,
      "step": 114188
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9712876081466675,
      "learning_rate": 0.00030414156263524754,
      "loss": 2.9325,
      "step": 114189
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.786095142364502,
      "learning_rate": 0.00030413747246250673,
      "loss": 2.9158,
      "step": 114190
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.2769775390625,
      "learning_rate": 0.0003041333822889968,
      "loss": 2.6292,
      "step": 114191
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.5778220891952515,
      "learning_rate": 0.00030412929211471847,
      "loss": 3.207,
      "step": 114192
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9687496423721313,
      "learning_rate": 0.00030412520193967227,
      "loss": 3.0655,
      "step": 114193
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.315873146057129,
      "learning_rate": 0.00030412111176385915,
      "loss": 2.9052,
      "step": 114194
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.5075998306274414,
      "learning_rate": 0.00030411702158727994,
      "loss": 2.9076,
      "step": 114195
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9007606506347656,
      "learning_rate": 0.0003041129314099352,
      "loss": 3.0116,
      "step": 114196
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.070115327835083,
      "learning_rate": 0.0003041088412318258,
      "loss": 2.8168,
      "step": 114197
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.076117753982544,
      "learning_rate": 0.00030410475105295255,
      "loss": 3.1571,
      "step": 114198
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8091593980789185,
      "learning_rate": 0.0003041006608733161,
      "loss": 2.8919,
      "step": 114199
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.384216547012329,
      "learning_rate": 0.0003040965706929172,
      "loss": 3.0226,
      "step": 114200
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.076850175857544,
      "learning_rate": 0.0003040924805117568,
      "loss": 3.0284,
      "step": 114201
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.362356662750244,
      "learning_rate": 0.0003040883903298355,
      "loss": 3.1392,
      "step": 114202
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.517181396484375,
      "learning_rate": 0.000304084300147154,
      "loss": 3.0769,
      "step": 114203
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.5011632442474365,
      "learning_rate": 0.0003040802099637132,
      "loss": 2.8271,
      "step": 114204
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9923378229141235,
      "learning_rate": 0.00030407611977951386,
      "loss": 3.135,
      "step": 114205
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.930701971054077,
      "learning_rate": 0.0003040720295945567,
      "loss": 3.0941,
      "step": 114206
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.728451728820801,
      "learning_rate": 0.00030406793940884237,
      "loss": 3.0271,
      "step": 114207
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8190395832061768,
      "learning_rate": 0.0003040638492223718,
      "loss": 3.2321,
      "step": 114208
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9881854057312012,
      "learning_rate": 0.00030405975903514563,
      "loss": 3.0442,
      "step": 114209
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7191020250320435,
      "learning_rate": 0.00030405566884716473,
      "loss": 2.9293,
      "step": 114210
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.021972417831421,
      "learning_rate": 0.00030405157865842983,
      "loss": 2.9886,
      "step": 114211
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0180575847625732,
      "learning_rate": 0.00030404748846894167,
      "loss": 2.9904,
      "step": 114212
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9437164068222046,
      "learning_rate": 0.0003040433982787009,
      "loss": 2.7963,
      "step": 114213
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9339340925216675,
      "learning_rate": 0.00030403930808770854,
      "loss": 3.2538,
      "step": 114214
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.253793716430664,
      "learning_rate": 0.000304035217895965,
      "loss": 2.9987,
      "step": 114215
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.614225149154663,
      "learning_rate": 0.0003040311277034714,
      "loss": 2.9377,
      "step": 114216
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.3791022300720215,
      "learning_rate": 0.0003040270375102283,
      "loss": 2.9384,
      "step": 114217
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6525380611419678,
      "learning_rate": 0.00030402294731623645,
      "loss": 2.8109,
      "step": 114218
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.493079662322998,
      "learning_rate": 0.0003040188571214967,
      "loss": 3.1082,
      "step": 114219
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0182411670684814,
      "learning_rate": 0.0003040147669260098,
      "loss": 3.1448,
      "step": 114220
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.741136908531189,
      "learning_rate": 0.0003040106767297764,
      "loss": 3.2068,
      "step": 114221
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.8164737224578857,
      "learning_rate": 0.0003040065865327974,
      "loss": 2.9401,
      "step": 114222
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.529888868331909,
      "learning_rate": 0.0003040024963350734,
      "loss": 2.81,
      "step": 114223
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7092537879943848,
      "learning_rate": 0.0003039984061366054,
      "loss": 3.0361,
      "step": 114224
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6661051511764526,
      "learning_rate": 0.00030399431593739395,
      "loss": 3.0882,
      "step": 114225
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7553702592849731,
      "learning_rate": 0.00030399022573743983,
      "loss": 2.8368,
      "step": 114226
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.32038950920105,
      "learning_rate": 0.00030398613553674395,
      "loss": 3.2719,
      "step": 114227
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.202981472015381,
      "learning_rate": 0.0003039820453353069,
      "loss": 3.1248,
      "step": 114228
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1555285453796387,
      "learning_rate": 0.0003039779551331295,
      "loss": 2.9047,
      "step": 114229
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8750752210617065,
      "learning_rate": 0.00030397386493021255,
      "loss": 3.1725,
      "step": 114230
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.8830039501190186,
      "learning_rate": 0.00030396977472655685,
      "loss": 2.7864,
      "step": 114231
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2465109825134277,
      "learning_rate": 0.0003039656845221631,
      "loss": 2.787,
      "step": 114232
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8050775527954102,
      "learning_rate": 0.00030396159431703195,
      "loss": 3.0067,
      "step": 114233
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4417169094085693,
      "learning_rate": 0.00030395750411116426,
      "loss": 3.0549,
      "step": 114234
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9415721893310547,
      "learning_rate": 0.00030395341390456094,
      "loss": 2.9716,
      "step": 114235
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.641589641571045,
      "learning_rate": 0.00030394932369722243,
      "loss": 2.6195,
      "step": 114236
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.402897596359253,
      "learning_rate": 0.00030394523348914975,
      "loss": 3.1143,
      "step": 114237
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.801977276802063,
      "learning_rate": 0.0003039411432803436,
      "loss": 3.1275,
      "step": 114238
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.715910792350769,
      "learning_rate": 0.0003039370530708047,
      "loss": 2.8677,
      "step": 114239
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7767364978790283,
      "learning_rate": 0.0003039329628605338,
      "loss": 3.0346,
      "step": 114240
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.462977409362793,
      "learning_rate": 0.00030392887264953176,
      "loss": 2.9678,
      "step": 114241
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7025246620178223,
      "learning_rate": 0.0003039247824377992,
      "loss": 2.8226,
      "step": 114242
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2252037525177,
      "learning_rate": 0.00030392069222533695,
      "loss": 3.0305,
      "step": 114243
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6041218042373657,
      "learning_rate": 0.0003039166020121458,
      "loss": 3.0455,
      "step": 114244
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.275386333465576,
      "learning_rate": 0.0003039125117982264,
      "loss": 2.9199,
      "step": 114245
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.370321035385132,
      "learning_rate": 0.0003039084215835797,
      "loss": 3.2101,
      "step": 114246
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.6037344932556152,
      "learning_rate": 0.00030390433136820633,
      "loss": 3.0227,
      "step": 114247
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.025552272796631,
      "learning_rate": 0.00030390024115210703,
      "loss": 3.1608,
      "step": 114248
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.349647283554077,
      "learning_rate": 0.0003038961509352826,
      "loss": 3.0163,
      "step": 114249
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.385039806365967,
      "learning_rate": 0.00030389206071773384,
      "loss": 3.0344,
      "step": 114250
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8754323720932007,
      "learning_rate": 0.0003038879704994615,
      "loss": 3.1034,
      "step": 114251
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.4723893404006958,
      "learning_rate": 0.0003038838802804662,
      "loss": 2.8921,
      "step": 114252
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7843767404556274,
      "learning_rate": 0.0003038797900607489,
      "loss": 2.8268,
      "step": 114253
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.744877576828003,
      "learning_rate": 0.00030387569984031027,
      "loss": 3.0028,
      "step": 114254
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.9449217319488525,
      "learning_rate": 0.00030387160961915105,
      "loss": 3.1946,
      "step": 114255
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.958094835281372,
      "learning_rate": 0.00030386751939727203,
      "loss": 3.1351,
      "step": 114256
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.7742104530334473,
      "learning_rate": 0.00030386342917467396,
      "loss": 2.9043,
      "step": 114257
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9802192449569702,
      "learning_rate": 0.0003038593389513576,
      "loss": 2.981,
      "step": 114258
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4920060634613037,
      "learning_rate": 0.0003038552487273238,
      "loss": 3.0631,
      "step": 114259
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6120564937591553,
      "learning_rate": 0.0003038511585025731,
      "loss": 3.1484,
      "step": 114260
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.210630178451538,
      "learning_rate": 0.00030384706827710646,
      "loss": 2.7371,
      "step": 114261
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2280938625335693,
      "learning_rate": 0.0003038429780509246,
      "loss": 2.8953,
      "step": 114262
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.7686450481414795,
      "learning_rate": 0.00030383888782402823,
      "loss": 3.1767,
      "step": 114263
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8076765537261963,
      "learning_rate": 0.00030383479759641817,
      "loss": 2.9527,
      "step": 114264
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0740628242492676,
      "learning_rate": 0.00030383070736809514,
      "loss": 2.9594,
      "step": 114265
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.3223812580108643,
      "learning_rate": 0.00030382661713905984,
      "loss": 2.7975,
      "step": 114266
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1046886444091797,
      "learning_rate": 0.00030382252690931314,
      "loss": 2.9023,
      "step": 114267
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6419004201889038,
      "learning_rate": 0.00030381843667885575,
      "loss": 2.9801,
      "step": 114268
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8320547342300415,
      "learning_rate": 0.00030381434644768853,
      "loss": 3.1084,
      "step": 114269
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.003079652786255,
      "learning_rate": 0.00030381025621581203,
      "loss": 2.862,
      "step": 114270
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.073134660720825,
      "learning_rate": 0.0003038061659832272,
      "loss": 3.0521,
      "step": 114271
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3518176078796387,
      "learning_rate": 0.00030380207574993474,
      "loss": 2.9232,
      "step": 114272
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.626244306564331,
      "learning_rate": 0.00030379798551593537,
      "loss": 3.0336,
      "step": 114273
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0708582401275635,
      "learning_rate": 0.00030379389528122986,
      "loss": 3.1423,
      "step": 114274
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0408730506896973,
      "learning_rate": 0.00030378980504581907,
      "loss": 3.3333,
      "step": 114275
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.915366291999817,
      "learning_rate": 0.0003037857148097036,
      "loss": 2.884,
      "step": 114276
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0529277324676514,
      "learning_rate": 0.0003037816245728844,
      "loss": 2.936,
      "step": 114277
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8738235235214233,
      "learning_rate": 0.0003037775343353621,
      "loss": 3.2096,
      "step": 114278
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7656328678131104,
      "learning_rate": 0.0003037734440971374,
      "loss": 3.2894,
      "step": 114279
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.359832286834717,
      "learning_rate": 0.00030376935385821114,
      "loss": 3.0692,
      "step": 114280
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.876461148262024,
      "learning_rate": 0.0003037652636185842,
      "loss": 2.8792,
      "step": 114281
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9154669046401978,
      "learning_rate": 0.0003037611733782571,
      "loss": 2.9697,
      "step": 114282
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.372474431991577,
      "learning_rate": 0.00030375708313723075,
      "loss": 2.8393,
      "step": 114283
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0539629459381104,
      "learning_rate": 0.00030375299289550606,
      "loss": 2.838,
      "step": 114284
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7271522283554077,
      "learning_rate": 0.0003037489026530834,
      "loss": 3.0412,
      "step": 114285
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1077961921691895,
      "learning_rate": 0.00030374481240996384,
      "loss": 3.1655,
      "step": 114286
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.367131471633911,
      "learning_rate": 0.0003037407221661481,
      "loss": 3.079,
      "step": 114287
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.94167160987854,
      "learning_rate": 0.0003037366319216368,
      "loss": 3.1041,
      "step": 114288
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.626729965209961,
      "learning_rate": 0.00030373254167643084,
      "loss": 3.0355,
      "step": 114289
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.669115424156189,
      "learning_rate": 0.0003037284514305309,
      "loss": 3.0016,
      "step": 114290
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.61338210105896,
      "learning_rate": 0.00030372436118393775,
      "loss": 2.8616,
      "step": 114291
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.6423850059509277,
      "learning_rate": 0.00030372027093665224,
      "loss": 3.036,
      "step": 114292
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.352919340133667,
      "learning_rate": 0.000303716180688675,
      "loss": 2.8176,
      "step": 114293
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2892098426818848,
      "learning_rate": 0.00030371209044000686,
      "loss": 2.9506,
      "step": 114294
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9063740968704224,
      "learning_rate": 0.0003037080001906486,
      "loss": 2.8354,
      "step": 114295
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.777296543121338,
      "learning_rate": 0.000303703909940601,
      "loss": 2.9182,
      "step": 114296
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7317153215408325,
      "learning_rate": 0.0003036998196898647,
      "loss": 3.3264,
      "step": 114297
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7467519044876099,
      "learning_rate": 0.0003036957294384405,
      "loss": 3.1757,
      "step": 114298
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8874802589416504,
      "learning_rate": 0.00030369163918632933,
      "loss": 3.0243,
      "step": 114299
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0133113861083984,
      "learning_rate": 0.0003036875489335316,
      "loss": 2.8955,
      "step": 114300
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1583025455474854,
      "learning_rate": 0.00030368345868004844,
      "loss": 2.8108,
      "step": 114301
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9076615571975708,
      "learning_rate": 0.0003036793684258805,
      "loss": 2.5044,
      "step": 114302
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0494225025177,
      "learning_rate": 0.00030367527817102836,
      "loss": 3.0349,
      "step": 114303
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8327791690826416,
      "learning_rate": 0.00030367118791549294,
      "loss": 3.0104,
      "step": 114304
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.451772689819336,
      "learning_rate": 0.00030366709765927507,
      "loss": 2.8911,
      "step": 114305
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.311417579650879,
      "learning_rate": 0.0003036630074023754,
      "loss": 3.0298,
      "step": 114306
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.084805965423584,
      "learning_rate": 0.0003036589171447946,
      "loss": 2.697,
      "step": 114307
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.045783758163452,
      "learning_rate": 0.0003036548268865336,
      "loss": 2.9836,
      "step": 114308
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0925981998443604,
      "learning_rate": 0.00030365073662759316,
      "loss": 3.087,
      "step": 114309
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.761486291885376,
      "learning_rate": 0.0003036466463679739,
      "loss": 2.7402,
      "step": 114310
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9165252447128296,
      "learning_rate": 0.00030364255610767664,
      "loss": 2.948,
      "step": 114311
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.959373116493225,
      "learning_rate": 0.00030363846584670226,
      "loss": 2.9171,
      "step": 114312
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.163163900375366,
      "learning_rate": 0.00030363437558505134,
      "loss": 2.846,
      "step": 114313
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9194514751434326,
      "learning_rate": 0.00030363028532272475,
      "loss": 3.022,
      "step": 114314
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.205838203430176,
      "learning_rate": 0.0003036261950597232,
      "loss": 2.9852,
      "step": 114315
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8007185459136963,
      "learning_rate": 0.0003036221047960475,
      "loss": 3.1444,
      "step": 114316
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.746100664138794,
      "learning_rate": 0.0003036180145316984,
      "loss": 2.8358,
      "step": 114317
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7273108959197998,
      "learning_rate": 0.0003036139242666766,
      "loss": 3.1507,
      "step": 114318
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1694602966308594,
      "learning_rate": 0.0003036098340009829,
      "loss": 2.9402,
      "step": 114319
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9424890279769897,
      "learning_rate": 0.0003036057437346181,
      "loss": 3.0875,
      "step": 114320
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1353883743286133,
      "learning_rate": 0.0003036016534675829,
      "loss": 3.0228,
      "step": 114321
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.703463315963745,
      "learning_rate": 0.000303597563199878,
      "loss": 3.0182,
      "step": 114322
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0273866653442383,
      "learning_rate": 0.0003035934729315044,
      "loss": 3.0316,
      "step": 114323
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.358386754989624,
      "learning_rate": 0.0003035893826624627,
      "loss": 3.0324,
      "step": 114324
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.051687717437744,
      "learning_rate": 0.00030358529239275355,
      "loss": 3.0247,
      "step": 114325
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9045813083648682,
      "learning_rate": 0.00030358120212237795,
      "loss": 2.8379,
      "step": 114326
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8061347007751465,
      "learning_rate": 0.00030357711185133643,
      "loss": 2.858,
      "step": 114327
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8955187797546387,
      "learning_rate": 0.00030357302157962986,
      "loss": 3.051,
      "step": 114328
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.188159465789795,
      "learning_rate": 0.00030356893130725906,
      "loss": 3.1974,
      "step": 114329
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.8441174030303955,
      "learning_rate": 0.0003035648410342247,
      "loss": 2.8942,
      "step": 114330
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8830842971801758,
      "learning_rate": 0.0003035607507605276,
      "loss": 3.1667,
      "step": 114331
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3862037658691406,
      "learning_rate": 0.00030355666048616844,
      "loss": 3.0405,
      "step": 114332
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.930631399154663,
      "learning_rate": 0.0003035525702111481,
      "loss": 3.1434,
      "step": 114333
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.699574589729309,
      "learning_rate": 0.0003035484799354672,
      "loss": 3.1225,
      "step": 114334
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.121190071105957,
      "learning_rate": 0.00030354438965912657,
      "loss": 2.9015,
      "step": 114335
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4163756370544434,
      "learning_rate": 0.000303540299382127,
      "loss": 2.8983,
      "step": 114336
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7729326486587524,
      "learning_rate": 0.00030353620910446923,
      "loss": 2.8349,
      "step": 114337
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.788198709487915,
      "learning_rate": 0.00030353211882615396,
      "loss": 3.1726,
      "step": 114338
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8052279949188232,
      "learning_rate": 0.00030352802854718205,
      "loss": 2.8919,
      "step": 114339
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9026421308517456,
      "learning_rate": 0.0003035239382675542,
      "loss": 2.9867,
      "step": 114340
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.858029842376709,
      "learning_rate": 0.0003035198479872712,
      "loss": 3.024,
      "step": 114341
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.181318998336792,
      "learning_rate": 0.00030351575770633384,
      "loss": 2.7659,
      "step": 114342
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0381827354431152,
      "learning_rate": 0.0003035116674247427,
      "loss": 3.0952,
      "step": 114343
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.630765914916992,
      "learning_rate": 0.0003035075771424988,
      "loss": 2.9266,
      "step": 114344
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.687943935394287,
      "learning_rate": 0.0003035034868596027,
      "loss": 2.9085,
      "step": 114345
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.608657956123352,
      "learning_rate": 0.00030349939657605524,
      "loss": 3.1573,
      "step": 114346
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7580875158309937,
      "learning_rate": 0.00030349530629185715,
      "loss": 3.0028,
      "step": 114347
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.809868574142456,
      "learning_rate": 0.00030349121600700934,
      "loss": 2.9167,
      "step": 114348
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4617559909820557,
      "learning_rate": 0.0003034871257215124,
      "loss": 2.9711,
      "step": 114349
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.120495080947876,
      "learning_rate": 0.00030348303543536705,
      "loss": 2.9417,
      "step": 114350
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.5824644565582275,
      "learning_rate": 0.00030347894514857424,
      "loss": 3.0569,
      "step": 114351
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.7926430702209473,
      "learning_rate": 0.00030347485486113455,
      "loss": 3.1794,
      "step": 114352
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9662941694259644,
      "learning_rate": 0.0003034707645730489,
      "loss": 3.0702,
      "step": 114353
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.982452630996704,
      "learning_rate": 0.00030346667428431795,
      "loss": 2.7962,
      "step": 114354
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.137075662612915,
      "learning_rate": 0.0003034625839949424,
      "loss": 2.9152,
      "step": 114355
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9444934129714966,
      "learning_rate": 0.00030345849370492313,
      "loss": 2.9255,
      "step": 114356
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4884135723114014,
      "learning_rate": 0.00030345440341426086,
      "loss": 2.9533,
      "step": 114357
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.085571050643921,
      "learning_rate": 0.00030345031312295636,
      "loss": 2.8245,
      "step": 114358
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6025782823562622,
      "learning_rate": 0.0003034462228310104,
      "loss": 3.1238,
      "step": 114359
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.343515396118164,
      "learning_rate": 0.00030344213253842377,
      "loss": 3.2439,
      "step": 114360
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.372910499572754,
      "learning_rate": 0.00030343804224519704,
      "loss": 2.8018,
      "step": 114361
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9360216856002808,
      "learning_rate": 0.00030343395195133116,
      "loss": 2.8151,
      "step": 114362
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.6767196655273438,
      "learning_rate": 0.00030342986165682697,
      "loss": 3.1708,
      "step": 114363
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.861419200897217,
      "learning_rate": 0.00030342577136168505,
      "loss": 3.2153,
      "step": 114364
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.0511200428009033,
      "learning_rate": 0.0003034216810659061,
      "loss": 2.9399,
      "step": 114365
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.769369125366211,
      "learning_rate": 0.0003034175907694911,
      "loss": 3.1351,
      "step": 114366
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1546823978424072,
      "learning_rate": 0.00030341350047244066,
      "loss": 3.2364,
      "step": 114367
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.692758321762085,
      "learning_rate": 0.00030340941017475563,
      "loss": 3.0677,
      "step": 114368
    },
    {
      "epoch": 1.49,
      "grad_norm": 4.398066520690918,
      "learning_rate": 0.0003034053198764367,
      "loss": 2.7452,
      "step": 114369
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.025876998901367,
      "learning_rate": 0.00030340122957748464,
      "loss": 3.0111,
      "step": 114370
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.121920108795166,
      "learning_rate": 0.0003033971392779002,
      "loss": 3.0606,
      "step": 114371
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.634772777557373,
      "learning_rate": 0.00030339304897768427,
      "loss": 3.0218,
      "step": 114372
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1160974502563477,
      "learning_rate": 0.00030338895867683743,
      "loss": 3.0959,
      "step": 114373
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1013872623443604,
      "learning_rate": 0.0003033848683753605,
      "loss": 2.8199,
      "step": 114374
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7374000549316406,
      "learning_rate": 0.0003033807780732544,
      "loss": 2.9789,
      "step": 114375
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.7095069885253906,
      "learning_rate": 0.0003033766877705196,
      "loss": 3.121,
      "step": 114376
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.286562442779541,
      "learning_rate": 0.000303372597467157,
      "loss": 3.1358,
      "step": 114377
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9817125797271729,
      "learning_rate": 0.00030336850716316746,
      "loss": 2.8738,
      "step": 114378
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.513641119003296,
      "learning_rate": 0.0003033644168585516,
      "loss": 2.8799,
      "step": 114379
    },
    {
      "epoch": 1.49,
      "grad_norm": 4.577811241149902,
      "learning_rate": 0.00030336032655331024,
      "loss": 2.9597,
      "step": 114380
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2965147495269775,
      "learning_rate": 0.0003033562362474442,
      "loss": 2.8883,
      "step": 114381
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7650989294052124,
      "learning_rate": 0.0003033521459409541,
      "loss": 2.8977,
      "step": 114382
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6601041555404663,
      "learning_rate": 0.00030334805563384074,
      "loss": 3.0337,
      "step": 114383
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.869591236114502,
      "learning_rate": 0.000303343965326105,
      "loss": 2.9598,
      "step": 114384
    },
    {
      "epoch": 1.49,
      "grad_norm": 4.327741622924805,
      "learning_rate": 0.0003033398750177475,
      "loss": 2.9104,
      "step": 114385
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.7763161659240723,
      "learning_rate": 0.00030333578470876906,
      "loss": 2.8428,
      "step": 114386
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0005054473876953,
      "learning_rate": 0.00030333169439917043,
      "loss": 2.9817,
      "step": 114387
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.7802634239196777,
      "learning_rate": 0.00030332760408895235,
      "loss": 3.0479,
      "step": 114388
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.311147928237915,
      "learning_rate": 0.0003033235137781157,
      "loss": 3.0629,
      "step": 114389
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.49338960647583,
      "learning_rate": 0.0003033194234666611,
      "loss": 2.9212,
      "step": 114390
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8872755765914917,
      "learning_rate": 0.0003033153331545894,
      "loss": 3.0939,
      "step": 114391
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3590664863586426,
      "learning_rate": 0.0003033112428419012,
      "loss": 3.1839,
      "step": 114392
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.704057216644287,
      "learning_rate": 0.0003033071525285975,
      "loss": 2.9554,
      "step": 114393
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.5687992572784424,
      "learning_rate": 0.0003033030622146788,
      "loss": 3.1213,
      "step": 114394
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.83882474899292,
      "learning_rate": 0.0003032989719001461,
      "loss": 3.0639,
      "step": 114395
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8578155040740967,
      "learning_rate": 0.00030329488158499995,
      "loss": 3.1708,
      "step": 114396
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.586061477661133,
      "learning_rate": 0.00030329079126924143,
      "loss": 2.7348,
      "step": 114397
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.2457265853881836,
      "learning_rate": 0.0003032867009528709,
      "loss": 2.9531,
      "step": 114398
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7200727462768555,
      "learning_rate": 0.00030328261063588933,
      "loss": 3.0882,
      "step": 114399
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.0949246883392334,
      "learning_rate": 0.0003032785203182976,
      "loss": 3.0013,
      "step": 114400
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7762084007263184,
      "learning_rate": 0.00030327443000009616,
      "loss": 2.9292,
      "step": 114401
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0373659133911133,
      "learning_rate": 0.000303270339681286,
      "loss": 2.9159,
      "step": 114402
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1647727489471436,
      "learning_rate": 0.0003032662493618678,
      "loss": 2.9787,
      "step": 114403
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7350437641143799,
      "learning_rate": 0.00030326215904184246,
      "loss": 3.1521,
      "step": 114404
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.5693142414093018,
      "learning_rate": 0.0003032580687212105,
      "loss": 2.8835,
      "step": 114405
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1698343753814697,
      "learning_rate": 0.0003032539783999729,
      "loss": 3.1343,
      "step": 114406
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9135050773620605,
      "learning_rate": 0.0003032498880781302,
      "loss": 2.8835,
      "step": 114407
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7748206853866577,
      "learning_rate": 0.00030324579775568335,
      "loss": 2.8995,
      "step": 114408
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6787474155426025,
      "learning_rate": 0.0003032417074326331,
      "loss": 3.0184,
      "step": 114409
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6569048166275024,
      "learning_rate": 0.00030323761710898006,
      "loss": 3.3143,
      "step": 114410
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.328516960144043,
      "learning_rate": 0.00030323352678472514,
      "loss": 2.8397,
      "step": 114411
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.6415796279907227,
      "learning_rate": 0.0003032294364598691,
      "loss": 2.9566,
      "step": 114412
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8622899055480957,
      "learning_rate": 0.00030322534613441254,
      "loss": 2.9897,
      "step": 114413
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3391478061676025,
      "learning_rate": 0.0003032212558083563,
      "loss": 3.0892,
      "step": 114414
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.970914125442505,
      "learning_rate": 0.00030321716548170136,
      "loss": 2.8068,
      "step": 114415
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.941443920135498,
      "learning_rate": 0.0003032130751544481,
      "loss": 2.8286,
      "step": 114416
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8108527660369873,
      "learning_rate": 0.00030320898482659757,
      "loss": 2.9662,
      "step": 114417
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.400893449783325,
      "learning_rate": 0.0003032048944981504,
      "loss": 2.9794,
      "step": 114418
    },
    {
      "epoch": 1.49,
      "grad_norm": 4.118997097015381,
      "learning_rate": 0.00030320080416910733,
      "loss": 3.0092,
      "step": 114419
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7729778289794922,
      "learning_rate": 0.0003031967138394692,
      "loss": 2.9063,
      "step": 114420
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.470890998840332,
      "learning_rate": 0.0003031926235092368,
      "loss": 3.2318,
      "step": 114421
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.770195484161377,
      "learning_rate": 0.0003031885331784108,
      "loss": 3.1167,
      "step": 114422
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.14567494392395,
      "learning_rate": 0.00030318444284699195,
      "loss": 2.9659,
      "step": 114423
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8244611024856567,
      "learning_rate": 0.0003031803525149811,
      "loss": 2.6894,
      "step": 114424
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4811739921569824,
      "learning_rate": 0.00030317626218237894,
      "loss": 3.2252,
      "step": 114425
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.003312587738037,
      "learning_rate": 0.00030317217184918624,
      "loss": 3.1264,
      "step": 114426
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6960891485214233,
      "learning_rate": 0.00030316808151540384,
      "loss": 3.0154,
      "step": 114427
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.685688853263855,
      "learning_rate": 0.00030316399118103234,
      "loss": 2.9612,
      "step": 114428
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9830574989318848,
      "learning_rate": 0.0003031599008460726,
      "loss": 2.8766,
      "step": 114429
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9192637205123901,
      "learning_rate": 0.0003031558105105255,
      "loss": 2.9364,
      "step": 114430
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8165336847305298,
      "learning_rate": 0.00030315172017439157,
      "loss": 2.8229,
      "step": 114431
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.071615219116211,
      "learning_rate": 0.0003031476298376717,
      "loss": 2.8233,
      "step": 114432
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.473921775817871,
      "learning_rate": 0.0003031435395003666,
      "loss": 3.0581,
      "step": 114433
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6416999101638794,
      "learning_rate": 0.0003031394491624771,
      "loss": 2.8073,
      "step": 114434
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2599008083343506,
      "learning_rate": 0.0003031353588240038,
      "loss": 3.2073,
      "step": 114435
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.811834454536438,
      "learning_rate": 0.00030313126848494776,
      "loss": 3.0767,
      "step": 114436
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9103273153305054,
      "learning_rate": 0.00030312717814530947,
      "loss": 2.8377,
      "step": 114437
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9669976234436035,
      "learning_rate": 0.0003031230878050898,
      "loss": 3.0918,
      "step": 114438
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.8046720027923584,
      "learning_rate": 0.00030311899746428945,
      "loss": 3.2136,
      "step": 114439
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2766597270965576,
      "learning_rate": 0.00030311490712290923,
      "loss": 2.8638,
      "step": 114440
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0817387104034424,
      "learning_rate": 0.0003031108167809499,
      "loss": 3.0984,
      "step": 114441
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7678756713867188,
      "learning_rate": 0.0003031067264384122,
      "loss": 2.9465,
      "step": 114442
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6745909452438354,
      "learning_rate": 0.0003031026360952969,
      "loss": 3.0384,
      "step": 114443
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8690178394317627,
      "learning_rate": 0.0003030985457516048,
      "loss": 3.0827,
      "step": 114444
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7852261066436768,
      "learning_rate": 0.0003030944554073366,
      "loss": 3.2223,
      "step": 114445
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.741090774536133,
      "learning_rate": 0.0003030903650624931,
      "loss": 3.0292,
      "step": 114446
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4567606449127197,
      "learning_rate": 0.000303086274717075,
      "loss": 2.8721,
      "step": 114447
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3345868587493896,
      "learning_rate": 0.0003030821843710831,
      "loss": 2.9122,
      "step": 114448
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1546151638031006,
      "learning_rate": 0.0003030780940245182,
      "loss": 3.0529,
      "step": 114449
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8334534168243408,
      "learning_rate": 0.0003030740036773809,
      "loss": 3.1289,
      "step": 114450
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8378714323043823,
      "learning_rate": 0.0003030699133296723,
      "loss": 2.7696,
      "step": 114451
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.258899688720703,
      "learning_rate": 0.0003030658229813928,
      "loss": 3.0177,
      "step": 114452
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.630141019821167,
      "learning_rate": 0.00030306173263254337,
      "loss": 2.9165,
      "step": 114453
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.5532777309417725,
      "learning_rate": 0.0003030576422831247,
      "loss": 3.0588,
      "step": 114454
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.785675048828125,
      "learning_rate": 0.0003030535519331375,
      "loss": 2.976,
      "step": 114455
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1982052326202393,
      "learning_rate": 0.0003030494615825826,
      "loss": 3.1087,
      "step": 114456
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.209689140319824,
      "learning_rate": 0.0003030453712314608,
      "loss": 2.6514,
      "step": 114457
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.179612398147583,
      "learning_rate": 0.00030304128087977277,
      "loss": 2.8332,
      "step": 114458
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9381791353225708,
      "learning_rate": 0.0003030371905275193,
      "loss": 3.0928,
      "step": 114459
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.390131711959839,
      "learning_rate": 0.00030303310017470124,
      "loss": 2.9476,
      "step": 114460
    },
    {
      "epoch": 1.49,
      "grad_norm": 4.36674690246582,
      "learning_rate": 0.0003030290098213192,
      "loss": 2.8001,
      "step": 114461
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7929638624191284,
      "learning_rate": 0.000303024919467374,
      "loss": 2.9676,
      "step": 114462
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7856420278549194,
      "learning_rate": 0.00030302082911286645,
      "loss": 3.1526,
      "step": 114463
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.224238395690918,
      "learning_rate": 0.0003030167387577972,
      "loss": 3.123,
      "step": 114464
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.196458101272583,
      "learning_rate": 0.00030301264840216715,
      "loss": 3.1421,
      "step": 114465
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.6971473693847656,
      "learning_rate": 0.000303008558045977,
      "loss": 2.7882,
      "step": 114466
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.921431303024292,
      "learning_rate": 0.0003030044676892274,
      "loss": 2.997,
      "step": 114467
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6514034271240234,
      "learning_rate": 0.0003030003773319193,
      "loss": 3.158,
      "step": 114468
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.7685959339141846,
      "learning_rate": 0.00030299628697405334,
      "loss": 2.8836,
      "step": 114469
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.5093202590942383,
      "learning_rate": 0.0003029921966156304,
      "loss": 2.9495,
      "step": 114470
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.948426604270935,
      "learning_rate": 0.00030298810625665107,
      "loss": 2.9628,
      "step": 114471
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1167383193969727,
      "learning_rate": 0.00030298401589711615,
      "loss": 2.7063,
      "step": 114472
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.230259656906128,
      "learning_rate": 0.0003029799255370265,
      "loss": 3.0817,
      "step": 114473
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.957951307296753,
      "learning_rate": 0.00030297583517638284,
      "loss": 2.8535,
      "step": 114474
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.086228370666504,
      "learning_rate": 0.0003029717448151859,
      "loss": 2.9532,
      "step": 114475
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8085932731628418,
      "learning_rate": 0.00030296765445343655,
      "loss": 3.0028,
      "step": 114476
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.085735321044922,
      "learning_rate": 0.00030296356409113535,
      "loss": 2.8779,
      "step": 114477
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9305514097213745,
      "learning_rate": 0.0003029594737282831,
      "loss": 2.6098,
      "step": 114478
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.5500075817108154,
      "learning_rate": 0.0003029553833648808,
      "loss": 3.0791,
      "step": 114479
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.6768434047698975,
      "learning_rate": 0.0003029512930009289,
      "loss": 2.9746,
      "step": 114480
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8132532835006714,
      "learning_rate": 0.00030294720263642833,
      "loss": 3.2458,
      "step": 114481
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.4862043857574463,
      "learning_rate": 0.0003029431122713799,
      "loss": 2.9212,
      "step": 114482
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.5265238285064697,
      "learning_rate": 0.0003029390219057842,
      "loss": 3.1596,
      "step": 114483
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0265095233917236,
      "learning_rate": 0.0003029349315396421,
      "loss": 3.1453,
      "step": 114484
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.094179391860962,
      "learning_rate": 0.0003029308411729544,
      "loss": 2.7961,
      "step": 114485
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9164023399353027,
      "learning_rate": 0.00030292675080572176,
      "loss": 2.9791,
      "step": 114486
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.8776943683624268,
      "learning_rate": 0.0003029226604379449,
      "loss": 3.1616,
      "step": 114487
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.2439687252044678,
      "learning_rate": 0.00030291857006962475,
      "loss": 3.1337,
      "step": 114488
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.719459891319275,
      "learning_rate": 0.00030291447970076207,
      "loss": 3.0839,
      "step": 114489
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7261714935302734,
      "learning_rate": 0.0003029103893313574,
      "loss": 2.7612,
      "step": 114490
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7425992488861084,
      "learning_rate": 0.00030290629896141165,
      "loss": 2.9989,
      "step": 114491
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9300625324249268,
      "learning_rate": 0.0003029022085909256,
      "loss": 3.1605,
      "step": 114492
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1822454929351807,
      "learning_rate": 0.0003028981182199,
      "loss": 3.2105,
      "step": 114493
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0171239376068115,
      "learning_rate": 0.00030289402784833546,
      "loss": 2.9514,
      "step": 114494
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.6069564819335938,
      "learning_rate": 0.00030288993747623296,
      "loss": 3.1403,
      "step": 114495
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.184807777404785,
      "learning_rate": 0.00030288584710359313,
      "loss": 3.1295,
      "step": 114496
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.402470350265503,
      "learning_rate": 0.0003028817567304168,
      "loss": 2.881,
      "step": 114497
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4728896617889404,
      "learning_rate": 0.00030287766635670463,
      "loss": 3.074,
      "step": 114498
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7887115478515625,
      "learning_rate": 0.0003028735759824576,
      "loss": 3.1774,
      "step": 114499
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9796422719955444,
      "learning_rate": 0.0003028694856076762,
      "loss": 3.0035,
      "step": 114500
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2361581325531006,
      "learning_rate": 0.0003028653952323613,
      "loss": 3.1125,
      "step": 114501
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.111666440963745,
      "learning_rate": 0.0003028613048565137,
      "loss": 3.0705,
      "step": 114502
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9983322620391846,
      "learning_rate": 0.00030285721448013414,
      "loss": 2.7999,
      "step": 114503
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.0917341709136963,
      "learning_rate": 0.00030285312410322337,
      "loss": 2.8635,
      "step": 114504
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1898751258850098,
      "learning_rate": 0.00030284903372578215,
      "loss": 3.0745,
      "step": 114505
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.099429130554199,
      "learning_rate": 0.00030284494334781125,
      "loss": 3.0551,
      "step": 114506
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9113692045211792,
      "learning_rate": 0.00030284085296931136,
      "loss": 3.1119,
      "step": 114507
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.7631328105926514,
      "learning_rate": 0.00030283676259028336,
      "loss": 3.0923,
      "step": 114508
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.252469301223755,
      "learning_rate": 0.00030283267221072795,
      "loss": 2.8899,
      "step": 114509
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.126295804977417,
      "learning_rate": 0.00030282858183064583,
      "loss": 3.0433,
      "step": 114510
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.7015433311462402,
      "learning_rate": 0.0003028244914500379,
      "loss": 2.7764,
      "step": 114511
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.037738084793091,
      "learning_rate": 0.0003028204010689049,
      "loss": 3.2987,
      "step": 114512
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.086885690689087,
      "learning_rate": 0.0003028163106872474,
      "loss": 3.0745,
      "step": 114513
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7494778633117676,
      "learning_rate": 0.00030281222030506636,
      "loss": 2.9882,
      "step": 114514
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7156033515930176,
      "learning_rate": 0.0003028081299223625,
      "loss": 2.6422,
      "step": 114515
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.8832695484161377,
      "learning_rate": 0.00030280403953913645,
      "loss": 2.8818,
      "step": 114516
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.407727003097534,
      "learning_rate": 0.00030279994915538913,
      "loss": 2.8985,
      "step": 114517
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.8827831745147705,
      "learning_rate": 0.0003027958587711213,
      "loss": 3.1913,
      "step": 114518
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7794837951660156,
      "learning_rate": 0.00030279176838633363,
      "loss": 3.0978,
      "step": 114519
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.446808099746704,
      "learning_rate": 0.0003027876780010269,
      "loss": 3.1837,
      "step": 114520
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.6451265811920166,
      "learning_rate": 0.00030278358761520193,
      "loss": 3.0828,
      "step": 114521
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9633359909057617,
      "learning_rate": 0.00030277949722885936,
      "loss": 2.9376,
      "step": 114522
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.837261199951172,
      "learning_rate": 0.00030277540684200006,
      "loss": 2.6761,
      "step": 114523
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.308867931365967,
      "learning_rate": 0.00030277131645462484,
      "loss": 2.7883,
      "step": 114524
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0906519889831543,
      "learning_rate": 0.0003027672260667343,
      "loss": 3.0978,
      "step": 114525
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.530367851257324,
      "learning_rate": 0.00030276313567832925,
      "loss": 3.0705,
      "step": 114526
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.477562189102173,
      "learning_rate": 0.0003027590452894106,
      "loss": 2.8866,
      "step": 114527
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1204118728637695,
      "learning_rate": 0.00030275495489997894,
      "loss": 3.4113,
      "step": 114528
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3937363624572754,
      "learning_rate": 0.000302750864510035,
      "loss": 3.1007,
      "step": 114529
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8833116292953491,
      "learning_rate": 0.00030274677411957975,
      "loss": 2.7811,
      "step": 114530
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2733302116394043,
      "learning_rate": 0.00030274268372861376,
      "loss": 2.9299,
      "step": 114531
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.8620119094848633,
      "learning_rate": 0.0003027385933371378,
      "loss": 2.9853,
      "step": 114532
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.045081853866577,
      "learning_rate": 0.00030273450294515273,
      "loss": 3.0518,
      "step": 114533
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7713971138000488,
      "learning_rate": 0.0003027304125526593,
      "loss": 3.0484,
      "step": 114534
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.444427490234375,
      "learning_rate": 0.0003027263221596582,
      "loss": 3.1162,
      "step": 114535
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1382436752319336,
      "learning_rate": 0.00030272223176615017,
      "loss": 2.9529,
      "step": 114536
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1154937744140625,
      "learning_rate": 0.0003027181413721361,
      "loss": 3.0837,
      "step": 114537
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2463395595550537,
      "learning_rate": 0.0003027140509776167,
      "loss": 2.7688,
      "step": 114538
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.1837565898895264,
      "learning_rate": 0.0003027099605825926,
      "loss": 3.0876,
      "step": 114539
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2303683757781982,
      "learning_rate": 0.0003027058701870648,
      "loss": 2.8886,
      "step": 114540
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7536823749542236,
      "learning_rate": 0.00030270177979103383,
      "loss": 3.0196,
      "step": 114541
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.5868966579437256,
      "learning_rate": 0.0003026976893945006,
      "loss": 2.9457,
      "step": 114542
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.783421277999878,
      "learning_rate": 0.00030269359899746587,
      "loss": 3.0307,
      "step": 114543
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.7569563388824463,
      "learning_rate": 0.00030268950859993027,
      "loss": 2.8681,
      "step": 114544
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.912251591682434,
      "learning_rate": 0.0003026854182018946,
      "loss": 3.0769,
      "step": 114545
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.5217690467834473,
      "learning_rate": 0.0003026813278033597,
      "loss": 3.0841,
      "step": 114546
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3771514892578125,
      "learning_rate": 0.00030267723740432634,
      "loss": 3.2469,
      "step": 114547
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2125155925750732,
      "learning_rate": 0.00030267314700479516,
      "loss": 2.7754,
      "step": 114548
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.332782506942749,
      "learning_rate": 0.00030266905660476703,
      "loss": 2.9067,
      "step": 114549
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.751108169555664,
      "learning_rate": 0.00030266496620424266,
      "loss": 3.0772,
      "step": 114550
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.718855381011963,
      "learning_rate": 0.0003026608758032228,
      "loss": 2.9497,
      "step": 114551
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.893985390663147,
      "learning_rate": 0.00030265678540170835,
      "loss": 3.1594,
      "step": 114552
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9568521976470947,
      "learning_rate": 0.0003026526949996998,
      "loss": 2.9534,
      "step": 114553
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8939311504364014,
      "learning_rate": 0.00030264860459719813,
      "loss": 2.6985,
      "step": 114554
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8631691932678223,
      "learning_rate": 0.00030264451419420406,
      "loss": 2.9673,
      "step": 114555
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0122718811035156,
      "learning_rate": 0.00030264042379071825,
      "loss": 2.9145,
      "step": 114556
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7707912921905518,
      "learning_rate": 0.0003026363333867416,
      "loss": 3.0954,
      "step": 114557
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.195066452026367,
      "learning_rate": 0.0003026322429822748,
      "loss": 3.1591,
      "step": 114558
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.067702054977417,
      "learning_rate": 0.0003026281525773186,
      "loss": 3.0326,
      "step": 114559
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0539894104003906,
      "learning_rate": 0.00030262406217187375,
      "loss": 3.1007,
      "step": 114560
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0565032958984375,
      "learning_rate": 0.0003026199717659411,
      "loss": 3.1421,
      "step": 114561
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.079932451248169,
      "learning_rate": 0.0003026158813595213,
      "loss": 2.9247,
      "step": 114562
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.125485420227051,
      "learning_rate": 0.00030261179095261514,
      "loss": 3.2119,
      "step": 114563
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6843971014022827,
      "learning_rate": 0.0003026077005452235,
      "loss": 2.9274,
      "step": 114564
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7105363607406616,
      "learning_rate": 0.0003026036101373469,
      "loss": 2.9581,
      "step": 114565
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9131006002426147,
      "learning_rate": 0.0003025995197289863,
      "loss": 2.9511,
      "step": 114566
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6785006523132324,
      "learning_rate": 0.0003025954293201425,
      "loss": 2.9093,
      "step": 114567
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7590999603271484,
      "learning_rate": 0.00030259133891081603,
      "loss": 2.9608,
      "step": 114568
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.239600658416748,
      "learning_rate": 0.00030258724850100785,
      "loss": 2.9275,
      "step": 114569
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.3229050636291504,
      "learning_rate": 0.0003025831580907186,
      "loss": 3.0452,
      "step": 114570
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.604193925857544,
      "learning_rate": 0.00030257906767994913,
      "loss": 2.8931,
      "step": 114571
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.777532696723938,
      "learning_rate": 0.00030257497726870016,
      "loss": 2.7566,
      "step": 114572
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8842893838882446,
      "learning_rate": 0.00030257088685697246,
      "loss": 3.2235,
      "step": 114573
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7685657739639282,
      "learning_rate": 0.00030256679644476673,
      "loss": 3.1709,
      "step": 114574
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8430802822113037,
      "learning_rate": 0.00030256270603208385,
      "loss": 2.9793,
      "step": 114575
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9252839088439941,
      "learning_rate": 0.00030255861561892446,
      "loss": 3.1864,
      "step": 114576
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8330514430999756,
      "learning_rate": 0.00030255452520528943,
      "loss": 2.9821,
      "step": 114577
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8499155044555664,
      "learning_rate": 0.0003025504347911794,
      "loss": 3.0806,
      "step": 114578
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.741445779800415,
      "learning_rate": 0.0003025463443765952,
      "loss": 2.8586,
      "step": 114579
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0506858825683594,
      "learning_rate": 0.0003025422539615377,
      "loss": 2.8676,
      "step": 114580
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9915404319763184,
      "learning_rate": 0.0003025381635460074,
      "loss": 3.0635,
      "step": 114581
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9007352590560913,
      "learning_rate": 0.0003025340731300053,
      "loss": 2.9991,
      "step": 114582
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2556869983673096,
      "learning_rate": 0.00030252998271353197,
      "loss": 3.121,
      "step": 114583
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.549710750579834,
      "learning_rate": 0.0003025258922965884,
      "loss": 2.886,
      "step": 114584
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.045832633972168,
      "learning_rate": 0.0003025218018791752,
      "loss": 2.9777,
      "step": 114585
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7480462789535522,
      "learning_rate": 0.000302517711461293,
      "loss": 3.0521,
      "step": 114586
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.6631929874420166,
      "learning_rate": 0.00030251362104294284,
      "loss": 3.1684,
      "step": 114587
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.7965993881225586,
      "learning_rate": 0.0003025095306241254,
      "loss": 3.0705,
      "step": 114588
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3089325428009033,
      "learning_rate": 0.0003025054402048412,
      "loss": 2.7334,
      "step": 114589
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4554734230041504,
      "learning_rate": 0.00030250134978509135,
      "loss": 3.351,
      "step": 114590
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.421128749847412,
      "learning_rate": 0.00030249725936487646,
      "loss": 2.8529,
      "step": 114591
    },
    {
      "epoch": 1.49,
      "grad_norm": 4.526976108551025,
      "learning_rate": 0.0003024931689441972,
      "loss": 3.1398,
      "step": 114592
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1373825073242188,
      "learning_rate": 0.0003024890785230544,
      "loss": 2.9034,
      "step": 114593
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1747500896453857,
      "learning_rate": 0.00030248498810144897,
      "loss": 3.1367,
      "step": 114594
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9901583194732666,
      "learning_rate": 0.0003024808976793814,
      "loss": 2.7908,
      "step": 114595
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9476954936981201,
      "learning_rate": 0.00030247680725685265,
      "loss": 2.7586,
      "step": 114596
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.1036460399627686,
      "learning_rate": 0.00030247271683386345,
      "loss": 2.8951,
      "step": 114597
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.883651614189148,
      "learning_rate": 0.00030246862641041444,
      "loss": 3.1101,
      "step": 114598
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8899097442626953,
      "learning_rate": 0.00030246453598650644,
      "loss": 3.0628,
      "step": 114599
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3089020252227783,
      "learning_rate": 0.00030246044556214035,
      "loss": 3.2962,
      "step": 114600
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1921944618225098,
      "learning_rate": 0.0003024563551373168,
      "loss": 3.16,
      "step": 114601
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.7013099193573,
      "learning_rate": 0.0003024522647120364,
      "loss": 2.7925,
      "step": 114602
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.117117404937744,
      "learning_rate": 0.0003024481742863002,
      "loss": 3.0185,
      "step": 114603
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3776049613952637,
      "learning_rate": 0.0003024440838601089,
      "loss": 3.2426,
      "step": 114604
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.920691728591919,
      "learning_rate": 0.00030243999343346315,
      "loss": 2.9619,
      "step": 114605
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.528716802597046,
      "learning_rate": 0.0003024359030063637,
      "loss": 2.983,
      "step": 114606
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7429778575897217,
      "learning_rate": 0.0003024318125788115,
      "loss": 3.3087,
      "step": 114607
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.232334613800049,
      "learning_rate": 0.00030242772215080706,
      "loss": 2.8777,
      "step": 114608
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9786702394485474,
      "learning_rate": 0.0003024236317223513,
      "loss": 3.0332,
      "step": 114609
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8159183263778687,
      "learning_rate": 0.00030241954129344497,
      "loss": 3.1494,
      "step": 114610
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.110140562057495,
      "learning_rate": 0.00030241545086408874,
      "loss": 3.1934,
      "step": 114611
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8439757823944092,
      "learning_rate": 0.0003024113604342834,
      "loss": 3.2106,
      "step": 114612
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8371546268463135,
      "learning_rate": 0.0003024072700040299,
      "loss": 3.0755,
      "step": 114613
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.77431058883667,
      "learning_rate": 0.0003024031795733287,
      "loss": 3.0134,
      "step": 114614
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.100476026535034,
      "learning_rate": 0.00030239908914218074,
      "loss": 3.124,
      "step": 114615
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7228419780731201,
      "learning_rate": 0.0003023949987105868,
      "loss": 2.8183,
      "step": 114616
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.082909345626831,
      "learning_rate": 0.00030239090827854757,
      "loss": 2.99,
      "step": 114617
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7233433723449707,
      "learning_rate": 0.00030238681784606374,
      "loss": 2.9826,
      "step": 114618
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.617640733718872,
      "learning_rate": 0.00030238272741313626,
      "loss": 2.9082,
      "step": 114619
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.43327260017395,
      "learning_rate": 0.0003023786369797657,
      "loss": 2.9388,
      "step": 114620
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8172593116760254,
      "learning_rate": 0.0003023745465459529,
      "loss": 3.2091,
      "step": 114621
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.04872989654541,
      "learning_rate": 0.0003023704561116987,
      "loss": 2.8692,
      "step": 114622
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4991443157196045,
      "learning_rate": 0.00030236636567700374,
      "loss": 3.0699,
      "step": 114623
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.771662473678589,
      "learning_rate": 0.0003023622752418689,
      "loss": 3.1251,
      "step": 114624
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9856994152069092,
      "learning_rate": 0.00030235818480629485,
      "loss": 2.9835,
      "step": 114625
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.83756947517395,
      "learning_rate": 0.00030235409437028227,
      "loss": 3.0066,
      "step": 114626
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0425055027008057,
      "learning_rate": 0.0003023500039338321,
      "loss": 3.0766,
      "step": 114627
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8212145566940308,
      "learning_rate": 0.00030234591349694506,
      "loss": 3.0063,
      "step": 114628
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4903316497802734,
      "learning_rate": 0.0003023418230596218,
      "loss": 2.9392,
      "step": 114629
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3915657997131348,
      "learning_rate": 0.00030233773262186315,
      "loss": 2.9808,
      "step": 114630
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.161531925201416,
      "learning_rate": 0.0003023336421836699,
      "loss": 3.0951,
      "step": 114631
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9000825881958008,
      "learning_rate": 0.00030232955174504277,
      "loss": 3.2611,
      "step": 114632
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.612820863723755,
      "learning_rate": 0.0003023254613059825,
      "loss": 2.9228,
      "step": 114633
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.9015610218048096,
      "learning_rate": 0.0003023213708664899,
      "loss": 3.0823,
      "step": 114634
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8092058897018433,
      "learning_rate": 0.00030231728042656573,
      "loss": 2.9815,
      "step": 114635
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.038072109222412,
      "learning_rate": 0.0003023131899862107,
      "loss": 2.9087,
      "step": 114636
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.063373327255249,
      "learning_rate": 0.0003023090995454257,
      "loss": 3.1371,
      "step": 114637
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4851343631744385,
      "learning_rate": 0.00030230500910421136,
      "loss": 3.0239,
      "step": 114638
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6188699007034302,
      "learning_rate": 0.0003023009186625684,
      "loss": 2.7815,
      "step": 114639
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.094839334487915,
      "learning_rate": 0.00030229682822049774,
      "loss": 3.0994,
      "step": 114640
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7860009670257568,
      "learning_rate": 0.00030229273777799995,
      "loss": 3.1242,
      "step": 114641
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.339980125427246,
      "learning_rate": 0.000302288647335076,
      "loss": 2.7704,
      "step": 114642
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.554727554321289,
      "learning_rate": 0.0003022845568917266,
      "loss": 3.2592,
      "step": 114643
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.243790864944458,
      "learning_rate": 0.00030228046644795224,
      "loss": 2.8398,
      "step": 114644
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7153816223144531,
      "learning_rate": 0.00030227637600375404,
      "loss": 2.8664,
      "step": 114645
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.5175693035125732,
      "learning_rate": 0.0003022722855591327,
      "loss": 2.9151,
      "step": 114646
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0491604804992676,
      "learning_rate": 0.00030226819511408874,
      "loss": 3.1515,
      "step": 114647
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.660486936569214,
      "learning_rate": 0.0003022641046686231,
      "loss": 2.991,
      "step": 114648
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0470216274261475,
      "learning_rate": 0.00030226001422273665,
      "loss": 2.731,
      "step": 114649
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.587960720062256,
      "learning_rate": 0.0003022559237764299,
      "loss": 2.6707,
      "step": 114650
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.86696195602417,
      "learning_rate": 0.00030225183332970376,
      "loss": 3.0251,
      "step": 114651
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8063513040542603,
      "learning_rate": 0.00030224774288255897,
      "loss": 2.7993,
      "step": 114652
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9478931427001953,
      "learning_rate": 0.00030224365243499633,
      "loss": 2.7224,
      "step": 114653
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.323265790939331,
      "learning_rate": 0.00030223956198701643,
      "loss": 2.9605,
      "step": 114654
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.181126356124878,
      "learning_rate": 0.0003022354715386203,
      "loss": 3.111,
      "step": 114655
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.806456208229065,
      "learning_rate": 0.00030223138108980845,
      "loss": 2.9588,
      "step": 114656
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9135441780090332,
      "learning_rate": 0.0003022272906405817,
      "loss": 3.0108,
      "step": 114657
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0374343395233154,
      "learning_rate": 0.00030222320019094104,
      "loss": 2.7718,
      "step": 114658
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.904186487197876,
      "learning_rate": 0.0003022191097408869,
      "loss": 2.9204,
      "step": 114659
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8273911476135254,
      "learning_rate": 0.00030221501929042017,
      "loss": 2.7546,
      "step": 114660
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1611533164978027,
      "learning_rate": 0.00030221092883954176,
      "loss": 3.2028,
      "step": 114661
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0956857204437256,
      "learning_rate": 0.0003022068383882522,
      "loss": 2.9807,
      "step": 114662
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7720946073532104,
      "learning_rate": 0.0003022027479365523,
      "loss": 2.7949,
      "step": 114663
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.542903423309326,
      "learning_rate": 0.0003021986574844429,
      "loss": 3.2719,
      "step": 114664
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4421896934509277,
      "learning_rate": 0.0003021945670319248,
      "loss": 3.286,
      "step": 114665
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6683906316757202,
      "learning_rate": 0.0003021904765789987,
      "loss": 3.121,
      "step": 114666
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9568263292312622,
      "learning_rate": 0.0003021863861256652,
      "loss": 3.1037,
      "step": 114667
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.554738163948059,
      "learning_rate": 0.00030218229567192527,
      "loss": 2.9704,
      "step": 114668
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.5241196155548096,
      "learning_rate": 0.0003021782052177797,
      "loss": 3.0949,
      "step": 114669
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6358948945999146,
      "learning_rate": 0.0003021741147632291,
      "loss": 2.8705,
      "step": 114670
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6887203454971313,
      "learning_rate": 0.00030217002430827424,
      "loss": 3.0437,
      "step": 114671
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.5439175367355347,
      "learning_rate": 0.000302165933852916,
      "loss": 2.893,
      "step": 114672
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.198103427886963,
      "learning_rate": 0.0003021618433971551,
      "loss": 2.9433,
      "step": 114673
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0946877002716064,
      "learning_rate": 0.0003021577529409922,
      "loss": 2.9077,
      "step": 114674
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0109975337982178,
      "learning_rate": 0.00030215366248442815,
      "loss": 2.9799,
      "step": 114675
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3532302379608154,
      "learning_rate": 0.00030214957202746374,
      "loss": 3.174,
      "step": 114676
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.829607367515564,
      "learning_rate": 0.00030214548157009964,
      "loss": 3.0673,
      "step": 114677
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2414655685424805,
      "learning_rate": 0.00030214139111233666,
      "loss": 3.0627,
      "step": 114678
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2556726932525635,
      "learning_rate": 0.00030213730065417556,
      "loss": 2.9135,
      "step": 114679
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7830091714859009,
      "learning_rate": 0.00030213321019561704,
      "loss": 3.0911,
      "step": 114680
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.80680775642395,
      "learning_rate": 0.000302129119736662,
      "loss": 2.8384,
      "step": 114681
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.271841526031494,
      "learning_rate": 0.0003021250292773111,
      "loss": 3.0274,
      "step": 114682
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.5996344089508057,
      "learning_rate": 0.0003021209388175651,
      "loss": 2.8925,
      "step": 114683
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8680975437164307,
      "learning_rate": 0.00030211684835742473,
      "loss": 2.8512,
      "step": 114684
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1680872440338135,
      "learning_rate": 0.00030211275789689086,
      "loss": 2.4935,
      "step": 114685
    },
    {
      "epoch": 1.49,
      "grad_norm": 4.533444881439209,
      "learning_rate": 0.00030210866743596423,
      "loss": 3.1105,
      "step": 114686
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.864899158477783,
      "learning_rate": 0.0003021045769746455,
      "loss": 3.1092,
      "step": 114687
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3850808143615723,
      "learning_rate": 0.0003021004865129355,
      "loss": 3.1279,
      "step": 114688
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.377656936645508,
      "learning_rate": 0.000302096396050835,
      "loss": 3.061,
      "step": 114689
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.987457036972046,
      "learning_rate": 0.00030209230558834473,
      "loss": 2.9862,
      "step": 114690
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8637067079544067,
      "learning_rate": 0.00030208821512546535,
      "loss": 3.0496,
      "step": 114691
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.429288625717163,
      "learning_rate": 0.0003020841246621979,
      "loss": 3.0895,
      "step": 114692
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3285598754882812,
      "learning_rate": 0.00030208003419854285,
      "loss": 3.2171,
      "step": 114693
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.347978353500366,
      "learning_rate": 0.0003020759437345012,
      "loss": 3.0719,
      "step": 114694
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9453343152999878,
      "learning_rate": 0.0003020718532700735,
      "loss": 3.0449,
      "step": 114695
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.90321683883667,
      "learning_rate": 0.00030206776280526063,
      "loss": 3.1743,
      "step": 114696
    },
    {
      "epoch": 1.49,
      "grad_norm": 3.2733871936798096,
      "learning_rate": 0.00030206367234006333,
      "loss": 2.7452,
      "step": 114697
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2602219581604004,
      "learning_rate": 0.0003020595818744824,
      "loss": 3.2087,
      "step": 114698
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9473938941955566,
      "learning_rate": 0.00030205549140851844,
      "loss": 2.8854,
      "step": 114699
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9926679134368896,
      "learning_rate": 0.00030205140094217243,
      "loss": 2.9747,
      "step": 114700
    },
    {
      "epoch": 1.49,
      "grad_norm": 4.4779486656188965,
      "learning_rate": 0.000302047310475445,
      "loss": 3.1224,
      "step": 114701
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7036489248275757,
      "learning_rate": 0.0003020432200083369,
      "loss": 2.8324,
      "step": 114702
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9489976167678833,
      "learning_rate": 0.0003020391295408489,
      "loss": 2.7205,
      "step": 114703
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4221296310424805,
      "learning_rate": 0.0003020350390729819,
      "loss": 2.92,
      "step": 114704
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.670544385910034,
      "learning_rate": 0.00030203094860473644,
      "loss": 3.1197,
      "step": 114705
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.015472173690796,
      "learning_rate": 0.00030202685813611344,
      "loss": 3.0356,
      "step": 114706
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2075693607330322,
      "learning_rate": 0.0003020227676671136,
      "loss": 2.9024,
      "step": 114707
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2144200801849365,
      "learning_rate": 0.0003020186771977377,
      "loss": 3.0434,
      "step": 114708
    },
    {
      "epoch": 1.49,
      "grad_norm": 4.582021713256836,
      "learning_rate": 0.0003020145867279864,
      "loss": 2.8469,
      "step": 114709
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.407827138900757,
      "learning_rate": 0.0003020104962578608,
      "loss": 2.8384,
      "step": 114710
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3869197368621826,
      "learning_rate": 0.0003020064057873612,
      "loss": 2.871,
      "step": 114711
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4998345375061035,
      "learning_rate": 0.0003020023153164886,
      "loss": 3.1956,
      "step": 114712
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.756676435470581,
      "learning_rate": 0.0003019982248452437,
      "loss": 2.9068,
      "step": 114713
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.383481740951538,
      "learning_rate": 0.00030199413437362733,
      "loss": 2.9024,
      "step": 114714
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.089657783508301,
      "learning_rate": 0.0003019900439016402,
      "loss": 2.8935,
      "step": 114715
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4061484336853027,
      "learning_rate": 0.00030198595342928317,
      "loss": 3.0006,
      "step": 114716
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.126771926879883,
      "learning_rate": 0.0003019818629565568,
      "loss": 3.0265,
      "step": 114717
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4262120723724365,
      "learning_rate": 0.000301977772483462,
      "loss": 3.1921,
      "step": 114718
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.163463830947876,
      "learning_rate": 0.0003019736820099995,
      "loss": 2.9412,
      "step": 114719
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8965773582458496,
      "learning_rate": 0.00030196959153617006,
      "loss": 3.1916,
      "step": 114720
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7036147117614746,
      "learning_rate": 0.0003019655010619744,
      "loss": 2.8561,
      "step": 114721
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3855276107788086,
      "learning_rate": 0.0003019614105874134,
      "loss": 3.0094,
      "step": 114722
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.516232967376709,
      "learning_rate": 0.00030195732011248773,
      "loss": 2.9419,
      "step": 114723
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.4979047775268555,
      "learning_rate": 0.000301953229637198,
      "loss": 2.9878,
      "step": 114724
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.899782657623291,
      "learning_rate": 0.0003019491391615454,
      "loss": 2.9137,
      "step": 114725
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.88539719581604,
      "learning_rate": 0.0003019450486855302,
      "loss": 2.9874,
      "step": 114726
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.990752100944519,
      "learning_rate": 0.0003019409582091534,
      "loss": 2.9142,
      "step": 114727
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.7609918117523193,
      "learning_rate": 0.00030193686773241586,
      "loss": 2.8126,
      "step": 114728
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.267221212387085,
      "learning_rate": 0.0003019327772553181,
      "loss": 3.0453,
      "step": 114729
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2974157333374023,
      "learning_rate": 0.000301928686777861,
      "loss": 3.1192,
      "step": 114730
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.241173505783081,
      "learning_rate": 0.0003019245963000454,
      "loss": 3.047,
      "step": 114731
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.7951138019561768,
      "learning_rate": 0.000301920505821872,
      "loss": 2.9583,
      "step": 114732
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.6941332817077637,
      "learning_rate": 0.00030191641534334137,
      "loss": 2.7987,
      "step": 114733
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.7156832218170166,
      "learning_rate": 0.00030191232486445467,
      "loss": 2.8973,
      "step": 114734
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0678675174713135,
      "learning_rate": 0.00030190823438521223,
      "loss": 3.0858,
      "step": 114735
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2273855209350586,
      "learning_rate": 0.0003019041439056151,
      "loss": 3.008,
      "step": 114736
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.6328768730163574,
      "learning_rate": 0.0003019000534256639,
      "loss": 3.2653,
      "step": 114737
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2788164615631104,
      "learning_rate": 0.00030189596294535957,
      "loss": 3.2079,
      "step": 114738
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7994967699050903,
      "learning_rate": 0.00030189187246470263,
      "loss": 2.8378,
      "step": 114739
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7042711973190308,
      "learning_rate": 0.00030188778198369403,
      "loss": 2.9823,
      "step": 114740
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.343493938446045,
      "learning_rate": 0.0003018836915023344,
      "loss": 2.8783,
      "step": 114741
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.7704968452453613,
      "learning_rate": 0.0003018796010206246,
      "loss": 3.031,
      "step": 114742
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.615018844604492,
      "learning_rate": 0.0003018755105385653,
      "loss": 2.8487,
      "step": 114743
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0840988159179688,
      "learning_rate": 0.0003018714200561574,
      "loss": 2.7653,
      "step": 114744
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.407028913497925,
      "learning_rate": 0.00030186732957340144,
      "loss": 2.9181,
      "step": 114745
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.338444471359253,
      "learning_rate": 0.00030186323909029843,
      "loss": 3.123,
      "step": 114746
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3477516174316406,
      "learning_rate": 0.0003018591486068489,
      "loss": 2.8959,
      "step": 114747
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9393810033798218,
      "learning_rate": 0.00030185505812305375,
      "loss": 2.9891,
      "step": 114748
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.445971727371216,
      "learning_rate": 0.00030185096763891376,
      "loss": 3.0385,
      "step": 114749
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.9297308921813965,
      "learning_rate": 0.00030184687715442964,
      "loss": 2.8291,
      "step": 114750
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7097841501235962,
      "learning_rate": 0.0003018427866696021,
      "loss": 3.1467,
      "step": 114751
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.5833666324615479,
      "learning_rate": 0.00030183869618443195,
      "loss": 2.8126,
      "step": 114752
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.8063039779663086,
      "learning_rate": 0.00030183460569892,
      "loss": 2.659,
      "step": 114753
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.02567720413208,
      "learning_rate": 0.00030183051521306693,
      "loss": 3.0037,
      "step": 114754
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8264085054397583,
      "learning_rate": 0.00030182642472687356,
      "loss": 3.2136,
      "step": 114755
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.0753095149993896,
      "learning_rate": 0.0003018223342403405,
      "loss": 2.854,
      "step": 114756
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9226338863372803,
      "learning_rate": 0.00030181824375346883,
      "loss": 2.8383,
      "step": 114757
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.976101040840149,
      "learning_rate": 0.00030181415326625897,
      "loss": 2.8343,
      "step": 114758
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3908796310424805,
      "learning_rate": 0.0003018100627787119,
      "loss": 2.9,
      "step": 114759
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7248591184616089,
      "learning_rate": 0.0003018059722908282,
      "loss": 2.9028,
      "step": 114760
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.069837808609009,
      "learning_rate": 0.00030180188180260883,
      "loss": 3.1344,
      "step": 114761
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8983147144317627,
      "learning_rate": 0.00030179779131405445,
      "loss": 2.7288,
      "step": 114762
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9616554975509644,
      "learning_rate": 0.0003017937008251658,
      "loss": 3.0586,
      "step": 114763
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.25242280960083,
      "learning_rate": 0.00030178961033594374,
      "loss": 2.8581,
      "step": 114764
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8265563249588013,
      "learning_rate": 0.00030178551984638886,
      "loss": 2.9507,
      "step": 114765
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3109395503997803,
      "learning_rate": 0.00030178142935650206,
      "loss": 2.909,
      "step": 114766
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.417969226837158,
      "learning_rate": 0.0003017773388662841,
      "loss": 2.6576,
      "step": 114767
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3128128051757812,
      "learning_rate": 0.00030177324837573564,
      "loss": 3.0927,
      "step": 114768
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9300626516342163,
      "learning_rate": 0.0003017691578848575,
      "loss": 2.9098,
      "step": 114769
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9155004024505615,
      "learning_rate": 0.0003017650673936504,
      "loss": 3.271,
      "step": 114770
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9849817752838135,
      "learning_rate": 0.00030176097690211523,
      "loss": 3.0673,
      "step": 114771
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.861375570297241,
      "learning_rate": 0.00030175688641025263,
      "loss": 3.0639,
      "step": 114772
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8350192308425903,
      "learning_rate": 0.00030175279591806336,
      "loss": 3.0531,
      "step": 114773
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.068687915802002,
      "learning_rate": 0.0003017487054255483,
      "loss": 3.032,
      "step": 114774
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.219353675842285,
      "learning_rate": 0.00030174461493270806,
      "loss": 2.7491,
      "step": 114775
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.952648639678955,
      "learning_rate": 0.00030174052443954344,
      "loss": 3.0381,
      "step": 114776
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.1105549335479736,
      "learning_rate": 0.0003017364339460553,
      "loss": 2.8131,
      "step": 114777
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6863079071044922,
      "learning_rate": 0.0003017323434522442,
      "loss": 3.037,
      "step": 114778
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7153247594833374,
      "learning_rate": 0.0003017282529581112,
      "loss": 2.9563,
      "step": 114779
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7209373712539673,
      "learning_rate": 0.00030172416246365674,
      "loss": 3.015,
      "step": 114780
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.18641996383667,
      "learning_rate": 0.00030172007196888176,
      "loss": 2.753,
      "step": 114781
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.02201771736145,
      "learning_rate": 0.000301715981473787,
      "loss": 2.749,
      "step": 114782
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.181638240814209,
      "learning_rate": 0.0003017118909783732,
      "loss": 3.0454,
      "step": 114783
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.2672431468963623,
      "learning_rate": 0.0003017078004826411,
      "loss": 3.0098,
      "step": 114784
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7606167793273926,
      "learning_rate": 0.0003017037099865915,
      "loss": 2.9945,
      "step": 114785
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8985246419906616,
      "learning_rate": 0.00030169961949022523,
      "loss": 2.9173,
      "step": 114786
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8821736574172974,
      "learning_rate": 0.0003016955289935429,
      "loss": 3.1758,
      "step": 114787
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9504557847976685,
      "learning_rate": 0.0003016914384965453,
      "loss": 3.0185,
      "step": 114788
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.397895097732544,
      "learning_rate": 0.00030168734799923335,
      "loss": 3.1013,
      "step": 114789
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.336928367614746,
      "learning_rate": 0.00030168325750160757,
      "loss": 2.5152,
      "step": 114790
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8193252086639404,
      "learning_rate": 0.0003016791670036689,
      "loss": 3.0061,
      "step": 114791
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.168842315673828,
      "learning_rate": 0.0003016750765054181,
      "loss": 3.0209,
      "step": 114792
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7031099796295166,
      "learning_rate": 0.00030167098600685573,
      "loss": 3.1596,
      "step": 114793
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.066171169281006,
      "learning_rate": 0.00030166689550798273,
      "loss": 3.0222,
      "step": 114794
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.116408109664917,
      "learning_rate": 0.00030166280500879987,
      "loss": 2.9872,
      "step": 114795
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3296310901641846,
      "learning_rate": 0.0003016587145093079,
      "loss": 3.1071,
      "step": 114796
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8846303224563599,
      "learning_rate": 0.0003016546240095074,
      "loss": 3.1099,
      "step": 114797
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8306323289871216,
      "learning_rate": 0.00030165053350939946,
      "loss": 3.1203,
      "step": 114798
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.9319703578948975,
      "learning_rate": 0.0003016464430089845,
      "loss": 3.0667,
      "step": 114799
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8981785774230957,
      "learning_rate": 0.00030164235250826354,
      "loss": 2.9602,
      "step": 114800
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.6620019674301147,
      "learning_rate": 0.0003016382620072372,
      "loss": 3.2495,
      "step": 114801
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.3915207386016846,
      "learning_rate": 0.0003016341715059062,
      "loss": 2.9448,
      "step": 114802
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7238513231277466,
      "learning_rate": 0.0003016300810042715,
      "loss": 3.1409,
      "step": 114803
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.167316436767578,
      "learning_rate": 0.00030162599050233355,
      "loss": 2.8174,
      "step": 114804
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8899682760238647,
      "learning_rate": 0.0003016219000000935,
      "loss": 2.8767,
      "step": 114805
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7326501607894897,
      "learning_rate": 0.00030161780949755184,
      "loss": 3.0656,
      "step": 114806
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.7517404556274414,
      "learning_rate": 0.00030161371899470937,
      "loss": 2.9101,
      "step": 114807
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.114824056625366,
      "learning_rate": 0.000301609628491567,
      "loss": 2.9322,
      "step": 114808
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.934470295906067,
      "learning_rate": 0.0003016055379881252,
      "loss": 3.1236,
      "step": 114809
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.182600498199463,
      "learning_rate": 0.00030160144748438497,
      "loss": 3.1557,
      "step": 114810
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.615182399749756,
      "learning_rate": 0.00030159735698034705,
      "loss": 3.0406,
      "step": 114811
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.98647141456604,
      "learning_rate": 0.00030159326647601206,
      "loss": 3.0105,
      "step": 114812
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8644165992736816,
      "learning_rate": 0.00030158917597138086,
      "loss": 3.1113,
      "step": 114813
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8134890794754028,
      "learning_rate": 0.00030158508546645433,
      "loss": 2.685,
      "step": 114814
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.222964286804199,
      "learning_rate": 0.000301580994961233,
      "loss": 2.9635,
      "step": 114815
    },
    {
      "epoch": 1.49,
      "grad_norm": 1.8253564834594727,
      "learning_rate": 0.0003015769044557177,
      "loss": 3.2081,
      "step": 114816
    },
    {
      "epoch": 1.49,
      "grad_norm": 2.010666608810425,
      "learning_rate": 0.00030157281394990927,
      "loss": 3.2656,
      "step": 114817
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6874730587005615,
      "learning_rate": 0.00030156872344380844,
      "loss": 2.9565,
      "step": 114818
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3702945709228516,
      "learning_rate": 0.0003015646329374159,
      "loss": 2.7501,
      "step": 114819
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1244192123413086,
      "learning_rate": 0.0003015605424307326,
      "loss": 2.9465,
      "step": 114820
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0631933212280273,
      "learning_rate": 0.00030155645192375896,
      "loss": 2.8158,
      "step": 114821
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.316063404083252,
      "learning_rate": 0.0003015523614164961,
      "loss": 3.0128,
      "step": 114822
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.140716075897217,
      "learning_rate": 0.0003015482709089446,
      "loss": 2.7944,
      "step": 114823
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.5087554454803467,
      "learning_rate": 0.00030154418040110524,
      "loss": 2.848,
      "step": 114824
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2764058113098145,
      "learning_rate": 0.00030154008989297874,
      "loss": 3.0958,
      "step": 114825
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.078432559967041,
      "learning_rate": 0.000301535999384566,
      "loss": 2.946,
      "step": 114826
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.2198994159698486,
      "learning_rate": 0.0003015319088758676,
      "loss": 3.0611,
      "step": 114827
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.375361680984497,
      "learning_rate": 0.00030152781836688435,
      "loss": 3.2165,
      "step": 114828
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3740503787994385,
      "learning_rate": 0.0003015237278576172,
      "loss": 2.8638,
      "step": 114829
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0864908695220947,
      "learning_rate": 0.00030151963734806667,
      "loss": 2.8744,
      "step": 114830
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.492949962615967,
      "learning_rate": 0.0003015155468382336,
      "loss": 3.0739,
      "step": 114831
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.923225998878479,
      "learning_rate": 0.0003015114563281188,
      "loss": 3.036,
      "step": 114832
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.979488492012024,
      "learning_rate": 0.00030150736581772297,
      "loss": 2.8848,
      "step": 114833
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.87142813205719,
      "learning_rate": 0.0003015032753070469,
      "loss": 2.9006,
      "step": 114834
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9107654094696045,
      "learning_rate": 0.00030149918479609135,
      "loss": 2.9268,
      "step": 114835
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0196690559387207,
      "learning_rate": 0.0003014950942848571,
      "loss": 2.9261,
      "step": 114836
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9564545154571533,
      "learning_rate": 0.0003014910037733448,
      "loss": 2.7263,
      "step": 114837
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9290496110916138,
      "learning_rate": 0.00030148691326155536,
      "loss": 3.0212,
      "step": 114838
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.051809310913086,
      "learning_rate": 0.0003014828227494895,
      "loss": 3.1125,
      "step": 114839
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8622851371765137,
      "learning_rate": 0.00030147873223714786,
      "loss": 2.8822,
      "step": 114840
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2939352989196777,
      "learning_rate": 0.0003014746417245313,
      "loss": 2.9708,
      "step": 114841
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3942389488220215,
      "learning_rate": 0.0003014705512116407,
      "loss": 2.9995,
      "step": 114842
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3352668285369873,
      "learning_rate": 0.0003014664606984766,
      "loss": 3.0351,
      "step": 114843
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7387611865997314,
      "learning_rate": 0.00030146237018503986,
      "loss": 3.1669,
      "step": 114844
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7838186025619507,
      "learning_rate": 0.0003014582796713313,
      "loss": 3.1721,
      "step": 114845
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3522934913635254,
      "learning_rate": 0.00030145418915735156,
      "loss": 3.0153,
      "step": 114846
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0793633460998535,
      "learning_rate": 0.00030145009864310143,
      "loss": 2.784,
      "step": 114847
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8564049005508423,
      "learning_rate": 0.0003014460081285817,
      "loss": 2.8786,
      "step": 114848
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9706951379776,
      "learning_rate": 0.0003014419176137932,
      "loss": 2.8463,
      "step": 114849
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9710735082626343,
      "learning_rate": 0.0003014378270987366,
      "loss": 2.9775,
      "step": 114850
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.701225519180298,
      "learning_rate": 0.00030143373658341263,
      "loss": 3.0547,
      "step": 114851
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.711952567100525,
      "learning_rate": 0.0003014296460678222,
      "loss": 2.915,
      "step": 114852
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7338463068008423,
      "learning_rate": 0.00030142555555196594,
      "loss": 2.897,
      "step": 114853
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3220856189727783,
      "learning_rate": 0.0003014214650358446,
      "loss": 3.2374,
      "step": 114854
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.950935959815979,
      "learning_rate": 0.00030141737451945896,
      "loss": 3.1568,
      "step": 114855
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.136439323425293,
      "learning_rate": 0.0003014132840028099,
      "loss": 2.8356,
      "step": 114856
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8940341472625732,
      "learning_rate": 0.00030140919348589797,
      "loss": 2.9663,
      "step": 114857
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.6673598289489746,
      "learning_rate": 0.00030140510296872413,
      "loss": 3.0058,
      "step": 114858
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.125048875808716,
      "learning_rate": 0.00030140101245128905,
      "loss": 2.8649,
      "step": 114859
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.820094108581543,
      "learning_rate": 0.0003013969219335935,
      "loss": 2.932,
      "step": 114860
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1980865001678467,
      "learning_rate": 0.0003013928314156381,
      "loss": 3.085,
      "step": 114861
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0836970806121826,
      "learning_rate": 0.00030138874089742396,
      "loss": 3.2943,
      "step": 114862
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7503911256790161,
      "learning_rate": 0.00030138465037895146,
      "loss": 2.7778,
      "step": 114863
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.057835102081299,
      "learning_rate": 0.0003013805598602216,
      "loss": 2.9389,
      "step": 114864
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0357513427734375,
      "learning_rate": 0.00030137646934123505,
      "loss": 2.8067,
      "step": 114865
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.879514455795288,
      "learning_rate": 0.0003013723788219926,
      "loss": 3.0574,
      "step": 114866
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8475284576416016,
      "learning_rate": 0.00030136828830249493,
      "loss": 3.1858,
      "step": 114867
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8225412368774414,
      "learning_rate": 0.00030136419778274297,
      "loss": 3.1121,
      "step": 114868
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.179704427719116,
      "learning_rate": 0.0003013601072627374,
      "loss": 3.128,
      "step": 114869
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2809536457061768,
      "learning_rate": 0.00030135601674247884,
      "loss": 3.2056,
      "step": 114870
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.113011598587036,
      "learning_rate": 0.0003013519262219682,
      "loss": 2.9711,
      "step": 114871
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.210768938064575,
      "learning_rate": 0.0003013478357012063,
      "loss": 3.0466,
      "step": 114872
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.883341908454895,
      "learning_rate": 0.0003013437451801938,
      "loss": 2.7072,
      "step": 114873
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.435511589050293,
      "learning_rate": 0.00030133965465893136,
      "loss": 2.9606,
      "step": 114874
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9880694150924683,
      "learning_rate": 0.00030133556413741995,
      "loss": 2.9505,
      "step": 114875
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1971752643585205,
      "learning_rate": 0.0003013314736156602,
      "loss": 3.0931,
      "step": 114876
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3051371574401855,
      "learning_rate": 0.00030132738309365293,
      "loss": 2.9107,
      "step": 114877
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0599544048309326,
      "learning_rate": 0.0003013232925713989,
      "loss": 3.0793,
      "step": 114878
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0930745601654053,
      "learning_rate": 0.00030131920204889875,
      "loss": 3.1581,
      "step": 114879
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6917202472686768,
      "learning_rate": 0.00030131511152615335,
      "loss": 3.0392,
      "step": 114880
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.382638454437256,
      "learning_rate": 0.0003013110210031636,
      "loss": 2.8892,
      "step": 114881
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.395538568496704,
      "learning_rate": 0.0003013069304799299,
      "loss": 2.8686,
      "step": 114882
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.194810152053833,
      "learning_rate": 0.0003013028399564533,
      "loss": 2.9814,
      "step": 114883
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9587435722351074,
      "learning_rate": 0.00030129874943273454,
      "loss": 2.9931,
      "step": 114884
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.3546202182769775,
      "learning_rate": 0.0003012946589087742,
      "loss": 2.845,
      "step": 114885
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1789205074310303,
      "learning_rate": 0.00030129056838457325,
      "loss": 3.0536,
      "step": 114886
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.490225315093994,
      "learning_rate": 0.0003012864778601323,
      "loss": 3.0386,
      "step": 114887
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7942841053009033,
      "learning_rate": 0.0003012823873354522,
      "loss": 2.7737,
      "step": 114888
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0499234199523926,
      "learning_rate": 0.0003012782968105337,
      "loss": 2.9559,
      "step": 114889
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.7883994579315186,
      "learning_rate": 0.0003012742062853776,
      "loss": 2.9754,
      "step": 114890
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.120638608932495,
      "learning_rate": 0.0003012701157599844,
      "loss": 2.9563,
      "step": 114891
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.6717052459716797,
      "learning_rate": 0.0003012660252343552,
      "loss": 2.7881,
      "step": 114892
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9238392114639282,
      "learning_rate": 0.00030126193470849066,
      "loss": 2.8908,
      "step": 114893
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9245885610580444,
      "learning_rate": 0.0003012578441823914,
      "loss": 2.9844,
      "step": 114894
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9672877788543701,
      "learning_rate": 0.0003012537536560584,
      "loss": 2.7577,
      "step": 114895
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.683542013168335,
      "learning_rate": 0.0003012496631294922,
      "loss": 3.0842,
      "step": 114896
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.789301872253418,
      "learning_rate": 0.0003012455726026937,
      "loss": 3.0357,
      "step": 114897
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.783521294593811,
      "learning_rate": 0.00030124148207566357,
      "loss": 2.8338,
      "step": 114898
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9739181995391846,
      "learning_rate": 0.00030123739154840274,
      "loss": 2.8564,
      "step": 114899
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7339361906051636,
      "learning_rate": 0.00030123330102091174,
      "loss": 2.8128,
      "step": 114900
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7843825817108154,
      "learning_rate": 0.00030122921049319153,
      "loss": 3.1744,
      "step": 114901
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.5473389625549316,
      "learning_rate": 0.0003012251199652427,
      "loss": 2.9491,
      "step": 114902
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.566596508026123,
      "learning_rate": 0.0003012210294370662,
      "loss": 3.1305,
      "step": 114903
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2221975326538086,
      "learning_rate": 0.0003012169389086626,
      "loss": 2.8265,
      "step": 114904
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.5538580417633057,
      "learning_rate": 0.0003012128483800328,
      "loss": 2.9187,
      "step": 114905
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.7566072940826416,
      "learning_rate": 0.0003012087578511775,
      "loss": 3.1013,
      "step": 114906
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0315463542938232,
      "learning_rate": 0.00030120466732209744,
      "loss": 3.0796,
      "step": 114907
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.513993501663208,
      "learning_rate": 0.0003012005767927935,
      "loss": 3.2927,
      "step": 114908
    },
    {
      "epoch": 1.5,
      "grad_norm": 4.343982219696045,
      "learning_rate": 0.00030119648626326625,
      "loss": 3.0571,
      "step": 114909
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.2599470615386963,
      "learning_rate": 0.0003011923957335165,
      "loss": 3.0147,
      "step": 114910
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0146946907043457,
      "learning_rate": 0.00030118830520354525,
      "loss": 3.1248,
      "step": 114911
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.5683674812316895,
      "learning_rate": 0.00030118421467335296,
      "loss": 2.93,
      "step": 114912
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0138094425201416,
      "learning_rate": 0.0003011801241429405,
      "loss": 3.0264,
      "step": 114913
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.99174964427948,
      "learning_rate": 0.0003011760336123087,
      "loss": 3.0769,
      "step": 114914
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.956406593322754,
      "learning_rate": 0.0003011719430814581,
      "loss": 2.8707,
      "step": 114915
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9313074350357056,
      "learning_rate": 0.0003011678525503897,
      "loss": 2.8449,
      "step": 114916
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.73397159576416,
      "learning_rate": 0.0003011637620191042,
      "loss": 3.1729,
      "step": 114917
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.654176712036133,
      "learning_rate": 0.00030115967148760235,
      "loss": 3.0757,
      "step": 114918
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2848153114318848,
      "learning_rate": 0.0003011555809558848,
      "loss": 2.9387,
      "step": 114919
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.9044620990753174,
      "learning_rate": 0.0003011514904239526,
      "loss": 2.7011,
      "step": 114920
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8835381269454956,
      "learning_rate": 0.0003011473998918061,
      "loss": 3.069,
      "step": 114921
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6380850076675415,
      "learning_rate": 0.00030114330935944634,
      "loss": 3.0447,
      "step": 114922
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8164266347885132,
      "learning_rate": 0.00030113921882687407,
      "loss": 2.9134,
      "step": 114923
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.7869393825531006,
      "learning_rate": 0.00030113512829409,
      "loss": 2.8705,
      "step": 114924
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.457951068878174,
      "learning_rate": 0.00030113103776109476,
      "loss": 3.2089,
      "step": 114925
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.777989149093628,
      "learning_rate": 0.0003011269472278894,
      "loss": 2.9776,
      "step": 114926
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.686908006668091,
      "learning_rate": 0.00030112285669447444,
      "loss": 2.9575,
      "step": 114927
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3332619667053223,
      "learning_rate": 0.00030111876616085076,
      "loss": 3.0109,
      "step": 114928
    },
    {
      "epoch": 1.5,
      "grad_norm": 4.374907970428467,
      "learning_rate": 0.000301114675627019,
      "loss": 2.9892,
      "step": 114929
    },
    {
      "epoch": 1.5,
      "grad_norm": 5.7272233963012695,
      "learning_rate": 0.00030111058509298014,
      "loss": 2.7317,
      "step": 114930
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.160456895828247,
      "learning_rate": 0.00030110649455873466,
      "loss": 3.0849,
      "step": 114931
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.512500762939453,
      "learning_rate": 0.0003011024040242835,
      "loss": 2.9568,
      "step": 114932
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.058880567550659,
      "learning_rate": 0.00030109831348962735,
      "loss": 2.8115,
      "step": 114933
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.210955858230591,
      "learning_rate": 0.00030109422295476713,
      "loss": 2.8377,
      "step": 114934
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.799219846725464,
      "learning_rate": 0.0003010901324197033,
      "loss": 2.9491,
      "step": 114935
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.7617435455322266,
      "learning_rate": 0.0003010860418844369,
      "loss": 3.226,
      "step": 114936
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.243208885192871,
      "learning_rate": 0.0003010819513489686,
      "loss": 3.045,
      "step": 114937
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.04235577583313,
      "learning_rate": 0.00030107786081329913,
      "loss": 2.9037,
      "step": 114938
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.629974126815796,
      "learning_rate": 0.00030107377027742925,
      "loss": 2.9398,
      "step": 114939
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.596525192260742,
      "learning_rate": 0.0003010696797413597,
      "loss": 2.9545,
      "step": 114940
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.5280442237854004,
      "learning_rate": 0.00030106558920509134,
      "loss": 2.7321,
      "step": 114941
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9686486721038818,
      "learning_rate": 0.0003010614986686249,
      "loss": 3.0048,
      "step": 114942
    },
    {
      "epoch": 1.5,
      "grad_norm": 4.845252513885498,
      "learning_rate": 0.0003010574081319609,
      "loss": 2.8398,
      "step": 114943
    },
    {
      "epoch": 1.5,
      "grad_norm": 5.841035842895508,
      "learning_rate": 0.0003010533175951006,
      "loss": 2.9425,
      "step": 114944
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.8578903675079346,
      "learning_rate": 0.0003010492270580443,
      "loss": 3.0469,
      "step": 114945
    },
    {
      "epoch": 1.5,
      "grad_norm": 4.359808921813965,
      "learning_rate": 0.0003010451365207929,
      "loss": 3.0106,
      "step": 114946
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6759865283966064,
      "learning_rate": 0.00030104104598334717,
      "loss": 2.6593,
      "step": 114947
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.03560733795166,
      "learning_rate": 0.00030103695544570803,
      "loss": 3.1114,
      "step": 114948
    },
    {
      "epoch": 1.5,
      "grad_norm": 5.657153129577637,
      "learning_rate": 0.0003010328649078761,
      "loss": 2.8471,
      "step": 114949
    },
    {
      "epoch": 1.5,
      "grad_norm": 5.237691879272461,
      "learning_rate": 0.00030102877436985205,
      "loss": 2.8419,
      "step": 114950
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.489619493484497,
      "learning_rate": 0.0003010246838316367,
      "loss": 3.1634,
      "step": 114951
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6211674213409424,
      "learning_rate": 0.00030102059329323094,
      "loss": 2.9593,
      "step": 114952
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.9587700366973877,
      "learning_rate": 0.0003010165027546354,
      "loss": 2.9556,
      "step": 114953
    },
    {
      "epoch": 1.5,
      "grad_norm": 4.690876007080078,
      "learning_rate": 0.0003010124122158509,
      "loss": 3.0256,
      "step": 114954
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.423375129699707,
      "learning_rate": 0.0003010083216768781,
      "loss": 2.9866,
      "step": 114955
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.4592959880828857,
      "learning_rate": 0.00030100423113771786,
      "loss": 3.0262,
      "step": 114956
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.889004111289978,
      "learning_rate": 0.00030100014059837094,
      "loss": 2.8108,
      "step": 114957
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.3040592670440674,
      "learning_rate": 0.00030099605005883803,
      "loss": 2.8881,
      "step": 114958
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.6538636684417725,
      "learning_rate": 0.00030099195951911994,
      "loss": 2.9462,
      "step": 114959
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.6618313789367676,
      "learning_rate": 0.0003009878689792175,
      "loss": 2.7572,
      "step": 114960
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.429490566253662,
      "learning_rate": 0.0003009837784391313,
      "loss": 3.0126,
      "step": 114961
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6997289657592773,
      "learning_rate": 0.00030097968789886227,
      "loss": 3.0464,
      "step": 114962
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.4665894508361816,
      "learning_rate": 0.0003009755973584111,
      "loss": 3.0208,
      "step": 114963
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.7492525577545166,
      "learning_rate": 0.0003009715068177785,
      "loss": 2.9004,
      "step": 114964
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.0009238719940186,
      "learning_rate": 0.0003009674162769653,
      "loss": 3.1421,
      "step": 114965
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6209321022033691,
      "learning_rate": 0.0003009633257359723,
      "loss": 3.2499,
      "step": 114966
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9321194887161255,
      "learning_rate": 0.0003009592351948001,
      "loss": 2.9516,
      "step": 114967
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2590785026550293,
      "learning_rate": 0.0003009551446534496,
      "loss": 2.9896,
      "step": 114968
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.828690528869629,
      "learning_rate": 0.0003009510541119215,
      "loss": 3.0398,
      "step": 114969
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.716820478439331,
      "learning_rate": 0.0003009469635702166,
      "loss": 2.7484,
      "step": 114970
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0562655925750732,
      "learning_rate": 0.00030094287302833567,
      "loss": 3.0467,
      "step": 114971
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9045352935791016,
      "learning_rate": 0.00030093878248627944,
      "loss": 2.9997,
      "step": 114972
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.995169758796692,
      "learning_rate": 0.0003009346919440486,
      "loss": 3.1257,
      "step": 114973
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.891707181930542,
      "learning_rate": 0.000300930601401644,
      "loss": 2.9729,
      "step": 114974
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0377204418182373,
      "learning_rate": 0.0003009265108590665,
      "loss": 2.9552,
      "step": 114975
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7461739778518677,
      "learning_rate": 0.00030092242031631654,
      "loss": 3.1375,
      "step": 114976
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3915717601776123,
      "learning_rate": 0.00030091832977339523,
      "loss": 2.9758,
      "step": 114977
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3553178310394287,
      "learning_rate": 0.0003009142392303032,
      "loss": 3.027,
      "step": 114978
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2126753330230713,
      "learning_rate": 0.00030091014868704113,
      "loss": 2.8627,
      "step": 114979
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0180468559265137,
      "learning_rate": 0.00030090605814360986,
      "loss": 3.0796,
      "step": 114980
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.403257369995117,
      "learning_rate": 0.0003009019676000102,
      "loss": 3.122,
      "step": 114981
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.5049357414245605,
      "learning_rate": 0.00030089787705624274,
      "loss": 2.9141,
      "step": 114982
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.51401948928833,
      "learning_rate": 0.0003008937865123084,
      "loss": 3.25,
      "step": 114983
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.8729803562164307,
      "learning_rate": 0.0003008896959682079,
      "loss": 2.7937,
      "step": 114984
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.726670980453491,
      "learning_rate": 0.000300885605423942,
      "loss": 2.8833,
      "step": 114985
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.829421281814575,
      "learning_rate": 0.0003008815148795114,
      "loss": 3.0094,
      "step": 114986
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.890391230583191,
      "learning_rate": 0.000300877424334917,
      "loss": 2.9403,
      "step": 114987
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.903809666633606,
      "learning_rate": 0.00030087333379015936,
      "loss": 2.8992,
      "step": 114988
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.3115148544311523,
      "learning_rate": 0.0003008692432452394,
      "loss": 2.9555,
      "step": 114989
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.8224916458129883,
      "learning_rate": 0.00030086515270015784,
      "loss": 2.94,
      "step": 114990
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9820398092269897,
      "learning_rate": 0.0003008610621549154,
      "loss": 3.1897,
      "step": 114991
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8436862230300903,
      "learning_rate": 0.0003008569716095129,
      "loss": 3.0052,
      "step": 114992
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.521355390548706,
      "learning_rate": 0.00030085288106395107,
      "loss": 3.0046,
      "step": 114993
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3250818252563477,
      "learning_rate": 0.00030084879051823067,
      "loss": 3.0543,
      "step": 114994
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1507699489593506,
      "learning_rate": 0.00030084469997235247,
      "loss": 3.0754,
      "step": 114995
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.650725245475769,
      "learning_rate": 0.0003008406094263172,
      "loss": 3.0425,
      "step": 114996
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2855453491210938,
      "learning_rate": 0.0003008365188801257,
      "loss": 3.0335,
      "step": 114997
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.085204839706421,
      "learning_rate": 0.0003008324283337786,
      "loss": 2.9217,
      "step": 114998
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0266025066375732,
      "learning_rate": 0.00030082833778727683,
      "loss": 2.7742,
      "step": 114999
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8943345546722412,
      "learning_rate": 0.0003008242472406209,
      "loss": 2.9039,
      "step": 115000
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8197425603866577,
      "learning_rate": 0.0003008201566938119,
      "loss": 2.8772,
      "step": 115001
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.026489496231079,
      "learning_rate": 0.00030081606614685023,
      "loss": 2.9833,
      "step": 115002
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8181902170181274,
      "learning_rate": 0.000300811975599737,
      "loss": 3.0101,
      "step": 115003
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7966945171356201,
      "learning_rate": 0.00030080788505247277,
      "loss": 3.0972,
      "step": 115004
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8540351390838623,
      "learning_rate": 0.0003008037945050583,
      "loss": 2.6286,
      "step": 115005
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.381519079208374,
      "learning_rate": 0.0003007997039574945,
      "loss": 3.0809,
      "step": 115006
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1375505924224854,
      "learning_rate": 0.00030079561340978187,
      "loss": 3.0755,
      "step": 115007
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9008405208587646,
      "learning_rate": 0.00030079152286192136,
      "loss": 2.9819,
      "step": 115008
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1709611415863037,
      "learning_rate": 0.0003007874323139138,
      "loss": 3.1197,
      "step": 115009
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8127692937850952,
      "learning_rate": 0.00030078334176575973,
      "loss": 2.948,
      "step": 115010
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1702263355255127,
      "learning_rate": 0.00030077925121746007,
      "loss": 2.8875,
      "step": 115011
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.6310176849365234,
      "learning_rate": 0.00030077516066901556,
      "loss": 3.0809,
      "step": 115012
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7731640338897705,
      "learning_rate": 0.00030077107012042685,
      "loss": 3.1258,
      "step": 115013
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.4288108348846436,
      "learning_rate": 0.0003007669795716948,
      "loss": 3.0357,
      "step": 115014
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7490336894989014,
      "learning_rate": 0.0003007628890228202,
      "loss": 3.1577,
      "step": 115015
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7944552898406982,
      "learning_rate": 0.00030075879847380377,
      "loss": 3.0116,
      "step": 115016
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8738634586334229,
      "learning_rate": 0.0003007547079246462,
      "loss": 2.8974,
      "step": 115017
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9583920240402222,
      "learning_rate": 0.0003007506173753484,
      "loss": 3.0306,
      "step": 115018
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9546964168548584,
      "learning_rate": 0.000300746526825911,
      "loss": 3.0652,
      "step": 115019
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.591584324836731,
      "learning_rate": 0.0003007424362763348,
      "loss": 3.045,
      "step": 115020
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8848485946655273,
      "learning_rate": 0.0003007383457266205,
      "loss": 2.862,
      "step": 115021
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6932334899902344,
      "learning_rate": 0.000300734255176769,
      "loss": 3.2355,
      "step": 115022
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.183159351348877,
      "learning_rate": 0.000300730164626781,
      "loss": 2.897,
      "step": 115023
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.5969693660736084,
      "learning_rate": 0.0003007260740766572,
      "loss": 3.0226,
      "step": 115024
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1201331615448,
      "learning_rate": 0.0003007219835263985,
      "loss": 2.9443,
      "step": 115025
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.618964433670044,
      "learning_rate": 0.00030071789297600546,
      "loss": 2.9788,
      "step": 115026
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1122515201568604,
      "learning_rate": 0.00030071380242547907,
      "loss": 2.9479,
      "step": 115027
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6714915037155151,
      "learning_rate": 0.00030070971187481986,
      "loss": 3.2836,
      "step": 115028
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0958683490753174,
      "learning_rate": 0.00030070562132402875,
      "loss": 3.1244,
      "step": 115029
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8515100479125977,
      "learning_rate": 0.00030070153077310646,
      "loss": 3.1041,
      "step": 115030
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8249887228012085,
      "learning_rate": 0.0003006974402220537,
      "loss": 2.8435,
      "step": 115031
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8084022998809814,
      "learning_rate": 0.00030069334967087124,
      "loss": 2.6917,
      "step": 115032
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.339799642562866,
      "learning_rate": 0.00030068925911956004,
      "loss": 3.0255,
      "step": 115033
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8222792148590088,
      "learning_rate": 0.00030068516856812053,
      "loss": 2.9804,
      "step": 115034
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7497700452804565,
      "learning_rate": 0.00030068107801655363,
      "loss": 3.0203,
      "step": 115035
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7830573320388794,
      "learning_rate": 0.0003006769874648603,
      "loss": 3.0781,
      "step": 115036
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7654861211776733,
      "learning_rate": 0.0003006728969130409,
      "loss": 2.706,
      "step": 115037
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0286366939544678,
      "learning_rate": 0.00030066880636109647,
      "loss": 3.3675,
      "step": 115038
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.178562879562378,
      "learning_rate": 0.0003006647158090277,
      "loss": 2.8316,
      "step": 115039
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8552054166793823,
      "learning_rate": 0.00030066062525683536,
      "loss": 2.9789,
      "step": 115040
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.072763442993164,
      "learning_rate": 0.00030065653470452014,
      "loss": 3.0201,
      "step": 115041
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9270968437194824,
      "learning_rate": 0.00030065244415208294,
      "loss": 2.993,
      "step": 115042
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.946086049079895,
      "learning_rate": 0.00030064835359952443,
      "loss": 2.8468,
      "step": 115043
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.019317388534546,
      "learning_rate": 0.0003006442630468453,
      "loss": 3.0513,
      "step": 115044
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.693565011024475,
      "learning_rate": 0.00030064017249404647,
      "loss": 2.8424,
      "step": 115045
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6655693054199219,
      "learning_rate": 0.00030063608194112854,
      "loss": 2.8916,
      "step": 115046
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9023821353912354,
      "learning_rate": 0.0003006319913880924,
      "loss": 3.0748,
      "step": 115047
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7770469188690186,
      "learning_rate": 0.0003006279008349388,
      "loss": 3.0585,
      "step": 115048
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.163755178451538,
      "learning_rate": 0.00030062381028166834,
      "loss": 3.0728,
      "step": 115049
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1774630546569824,
      "learning_rate": 0.000300619719728282,
      "loss": 3.0194,
      "step": 115050
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7172751426696777,
      "learning_rate": 0.0003006156291747805,
      "loss": 3.0755,
      "step": 115051
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.375781297683716,
      "learning_rate": 0.0003006115386211644,
      "loss": 2.9098,
      "step": 115052
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.133305549621582,
      "learning_rate": 0.00030060744806743465,
      "loss": 2.9914,
      "step": 115053
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9463036060333252,
      "learning_rate": 0.00030060335751359206,
      "loss": 3.1049,
      "step": 115054
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.4976694583892822,
      "learning_rate": 0.00030059926695963715,
      "loss": 3.0882,
      "step": 115055
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1111152172088623,
      "learning_rate": 0.0003005951764055709,
      "loss": 3.1189,
      "step": 115056
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.357496738433838,
      "learning_rate": 0.00030059108585139404,
      "loss": 3.0991,
      "step": 115057
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0532736778259277,
      "learning_rate": 0.0003005869952971072,
      "loss": 2.9977,
      "step": 115058
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7153878211975098,
      "learning_rate": 0.0003005829047427112,
      "loss": 3.154,
      "step": 115059
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8718353509902954,
      "learning_rate": 0.00030057881418820695,
      "loss": 2.9863,
      "step": 115060
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.372971773147583,
      "learning_rate": 0.000300574723633595,
      "loss": 2.8827,
      "step": 115061
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3483824729919434,
      "learning_rate": 0.0003005706330788762,
      "loss": 2.9565,
      "step": 115062
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0975425243377686,
      "learning_rate": 0.0003005665425240514,
      "loss": 2.7462,
      "step": 115063
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.472421646118164,
      "learning_rate": 0.00030056245196912113,
      "loss": 3.0166,
      "step": 115064
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.48146653175354,
      "learning_rate": 0.00030055836141408635,
      "loss": 2.9916,
      "step": 115065
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8190419673919678,
      "learning_rate": 0.0003005542708589478,
      "loss": 2.9119,
      "step": 115066
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.164456605911255,
      "learning_rate": 0.00030055018030370617,
      "loss": 3.1085,
      "step": 115067
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8155419826507568,
      "learning_rate": 0.00030054608974836223,
      "loss": 3.1062,
      "step": 115068
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8247395753860474,
      "learning_rate": 0.0003005419991929167,
      "loss": 3.0168,
      "step": 115069
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0993356704711914,
      "learning_rate": 0.00030053790863737056,
      "loss": 3.071,
      "step": 115070
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.215125799179077,
      "learning_rate": 0.00030053381808172434,
      "loss": 2.9711,
      "step": 115071
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.8890342712402344,
      "learning_rate": 0.0003005297275259788,
      "loss": 2.8668,
      "step": 115072
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.1351470947265625,
      "learning_rate": 0.0003005256369701349,
      "loss": 3.1315,
      "step": 115073
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1294305324554443,
      "learning_rate": 0.00030052154641419323,
      "loss": 3.2574,
      "step": 115074
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7469682693481445,
      "learning_rate": 0.0003005174558581546,
      "loss": 3.0108,
      "step": 115075
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.867227554321289,
      "learning_rate": 0.0003005133653020198,
      "loss": 3.0192,
      "step": 115076
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1617536544799805,
      "learning_rate": 0.00030050927474578945,
      "loss": 2.8691,
      "step": 115077
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7454198598861694,
      "learning_rate": 0.0003005051841894644,
      "loss": 2.9197,
      "step": 115078
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.056119680404663,
      "learning_rate": 0.0003005010936330456,
      "loss": 2.9766,
      "step": 115079
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3473222255706787,
      "learning_rate": 0.0003004970030765335,
      "loss": 3.0503,
      "step": 115080
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8541899919509888,
      "learning_rate": 0.000300492912519929,
      "loss": 3.1977,
      "step": 115081
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.7285661697387695,
      "learning_rate": 0.0003004888219632329,
      "loss": 2.934,
      "step": 115082
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.8783485889434814,
      "learning_rate": 0.0003004847314064459,
      "loss": 2.9442,
      "step": 115083
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.640035390853882,
      "learning_rate": 0.0003004806408495688,
      "loss": 3.0627,
      "step": 115084
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9654711484909058,
      "learning_rate": 0.0003004765502926024,
      "loss": 3.2416,
      "step": 115085
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.879093647003174,
      "learning_rate": 0.00030047245973554724,
      "loss": 2.9159,
      "step": 115086
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8772120475769043,
      "learning_rate": 0.00030046836917840434,
      "loss": 3.0975,
      "step": 115087
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7611175775527954,
      "learning_rate": 0.00030046427862117443,
      "loss": 2.5841,
      "step": 115088
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.945448875427246,
      "learning_rate": 0.00030046018806385806,
      "loss": 3.0501,
      "step": 115089
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8931493759155273,
      "learning_rate": 0.00030045609750645614,
      "loss": 3.2095,
      "step": 115090
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3457257747650146,
      "learning_rate": 0.00030045200694896955,
      "loss": 2.918,
      "step": 115091
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1336047649383545,
      "learning_rate": 0.0003004479163913988,
      "loss": 2.8788,
      "step": 115092
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3268306255340576,
      "learning_rate": 0.0003004438258337448,
      "loss": 2.8642,
      "step": 115093
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.92158842086792,
      "learning_rate": 0.0003004397352760084,
      "loss": 2.8877,
      "step": 115094
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.959700584411621,
      "learning_rate": 0.0003004356447181901,
      "loss": 2.9826,
      "step": 115095
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.084592580795288,
      "learning_rate": 0.0003004315541602909,
      "loss": 2.8564,
      "step": 115096
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7870160341262817,
      "learning_rate": 0.00030042746360231146,
      "loss": 2.962,
      "step": 115097
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9124974012374878,
      "learning_rate": 0.0003004233730442524,
      "loss": 3.1367,
      "step": 115098
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8379746675491333,
      "learning_rate": 0.00030041928248611475,
      "loss": 3.084,
      "step": 115099
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.0818231105804443,
      "learning_rate": 0.0003004151919278992,
      "loss": 2.9262,
      "step": 115100
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.602096438407898,
      "learning_rate": 0.0003004111013696064,
      "loss": 3.0302,
      "step": 115101
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.086817502975464,
      "learning_rate": 0.0003004070108112371,
      "loss": 3.0082,
      "step": 115102
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.81805419921875,
      "learning_rate": 0.0003004029202527922,
      "loss": 3.0733,
      "step": 115103
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.377586841583252,
      "learning_rate": 0.0003003988296942724,
      "loss": 2.8115,
      "step": 115104
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.222285509109497,
      "learning_rate": 0.00030039473913567844,
      "loss": 3.2204,
      "step": 115105
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7001057863235474,
      "learning_rate": 0.0003003906485770111,
      "loss": 2.9113,
      "step": 115106
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9528459310531616,
      "learning_rate": 0.0003003865580182711,
      "loss": 2.8723,
      "step": 115107
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7550088167190552,
      "learning_rate": 0.00030038246745945924,
      "loss": 2.801,
      "step": 115108
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9179493188858032,
      "learning_rate": 0.0003003783769005762,
      "loss": 2.9874,
      "step": 115109
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8281614780426025,
      "learning_rate": 0.00030037428634162295,
      "loss": 2.9963,
      "step": 115110
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.050187587738037,
      "learning_rate": 0.0003003701957826001,
      "loss": 2.6963,
      "step": 115111
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.529829263687134,
      "learning_rate": 0.0003003661052235083,
      "loss": 2.9662,
      "step": 115112
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0692687034606934,
      "learning_rate": 0.0003003620146643485,
      "loss": 3.0062,
      "step": 115113
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.231297492980957,
      "learning_rate": 0.00030035792410512143,
      "loss": 3.1769,
      "step": 115114
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7328861951828003,
      "learning_rate": 0.0003003538335458278,
      "loss": 3.0087,
      "step": 115115
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1186745166778564,
      "learning_rate": 0.0003003497429864683,
      "loss": 3.0784,
      "step": 115116
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.438767433166504,
      "learning_rate": 0.0003003456524270439,
      "loss": 2.8953,
      "step": 115117
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.9028563499450684,
      "learning_rate": 0.00030034156186755517,
      "loss": 3.2002,
      "step": 115118
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.805172324180603,
      "learning_rate": 0.00030033747130800295,
      "loss": 2.9834,
      "step": 115119
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.095717191696167,
      "learning_rate": 0.00030033338074838796,
      "loss": 2.8103,
      "step": 115120
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.293001890182495,
      "learning_rate": 0.00030032929018871106,
      "loss": 3.1059,
      "step": 115121
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.0818631649017334,
      "learning_rate": 0.00030032519962897285,
      "loss": 2.9714,
      "step": 115122
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8162623643875122,
      "learning_rate": 0.00030032110906917425,
      "loss": 3.2145,
      "step": 115123
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.832995891571045,
      "learning_rate": 0.0003003170185093159,
      "loss": 3.0106,
      "step": 115124
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9870851039886475,
      "learning_rate": 0.0003003129279493987,
      "loss": 2.874,
      "step": 115125
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.165100336074829,
      "learning_rate": 0.0003003088373894232,
      "loss": 3.0035,
      "step": 115126
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9030890464782715,
      "learning_rate": 0.0003003047468293904,
      "loss": 2.9713,
      "step": 115127
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7925760746002197,
      "learning_rate": 0.0003003006562693008,
      "loss": 2.9632,
      "step": 115128
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.3096184730529785,
      "learning_rate": 0.00030029656570915544,
      "loss": 2.9394,
      "step": 115129
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0536482334136963,
      "learning_rate": 0.0003002924751489549,
      "loss": 2.8158,
      "step": 115130
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7736294269561768,
      "learning_rate": 0.0003002883845886999,
      "loss": 2.6912,
      "step": 115131
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3550326824188232,
      "learning_rate": 0.0003002842940283914,
      "loss": 3.0056,
      "step": 115132
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.6287333965301514,
      "learning_rate": 0.00030028020346803,
      "loss": 3.308,
      "step": 115133
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.224445343017578,
      "learning_rate": 0.0003002761129076165,
      "loss": 2.9069,
      "step": 115134
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.5373849868774414,
      "learning_rate": 0.00030027202234715166,
      "loss": 2.8761,
      "step": 115135
    },
    {
      "epoch": 1.5,
      "grad_norm": 4.1432785987854,
      "learning_rate": 0.00030026793178663624,
      "loss": 2.9122,
      "step": 115136
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.171956777572632,
      "learning_rate": 0.0003002638412260711,
      "loss": 2.9505,
      "step": 115137
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.5845959186553955,
      "learning_rate": 0.0003002597506654568,
      "loss": 3.0594,
      "step": 115138
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.536142110824585,
      "learning_rate": 0.0003002556601047942,
      "loss": 3.077,
      "step": 115139
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1225528717041016,
      "learning_rate": 0.0003002515695440841,
      "loss": 2.5915,
      "step": 115140
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6227730512619019,
      "learning_rate": 0.00030024747898332724,
      "loss": 3.0832,
      "step": 115141
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.398045063018799,
      "learning_rate": 0.0003002433884225244,
      "loss": 3.0141,
      "step": 115142
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9860740900039673,
      "learning_rate": 0.00030023929786167624,
      "loss": 2.8953,
      "step": 115143
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9525362253189087,
      "learning_rate": 0.0003002352073007837,
      "loss": 3.0651,
      "step": 115144
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9285732507705688,
      "learning_rate": 0.00030023111673984726,
      "loss": 3.0585,
      "step": 115145
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.760101556777954,
      "learning_rate": 0.000300227026178868,
      "loss": 3.1141,
      "step": 115146
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.044649600982666,
      "learning_rate": 0.00030022293561784644,
      "loss": 3.1794,
      "step": 115147
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.096585273742676,
      "learning_rate": 0.00030021884505678344,
      "loss": 3.2171,
      "step": 115148
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.8202788829803467,
      "learning_rate": 0.00030021475449567984,
      "loss": 3.0324,
      "step": 115149
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.197455883026123,
      "learning_rate": 0.00030021066393453623,
      "loss": 2.8079,
      "step": 115150
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.3524510860443115,
      "learning_rate": 0.0003002065733733535,
      "loss": 3.2682,
      "step": 115151
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.096571445465088,
      "learning_rate": 0.0003002024828121323,
      "loss": 3.001,
      "step": 115152
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.087409496307373,
      "learning_rate": 0.00030019839225087355,
      "loss": 3.0857,
      "step": 115153
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.837458372116089,
      "learning_rate": 0.00030019430168957777,
      "loss": 2.7749,
      "step": 115154
    },
    {
      "epoch": 1.5,
      "grad_norm": 5.49085807800293,
      "learning_rate": 0.00030019021112824605,
      "loss": 3.1169,
      "step": 115155
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9691933393478394,
      "learning_rate": 0.00030018612056687883,
      "loss": 2.932,
      "step": 115156
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2305123805999756,
      "learning_rate": 0.000300182030005477,
      "loss": 3.092,
      "step": 115157
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.0769550800323486,
      "learning_rate": 0.00030017793944404144,
      "loss": 2.8291,
      "step": 115158
    },
    {
      "epoch": 1.5,
      "grad_norm": 6.584698677062988,
      "learning_rate": 0.0003001738488825727,
      "loss": 3.021,
      "step": 115159
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.63938045501709,
      "learning_rate": 0.00030016975832107166,
      "loss": 2.787,
      "step": 115160
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0217807292938232,
      "learning_rate": 0.0003001656677595391,
      "loss": 2.9628,
      "step": 115161
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.763425827026367,
      "learning_rate": 0.00030016157719797574,
      "loss": 2.8288,
      "step": 115162
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.082573890686035,
      "learning_rate": 0.00030015748663638224,
      "loss": 3.2013,
      "step": 115163
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.5521488189697266,
      "learning_rate": 0.0003001533960747596,
      "loss": 3.0369,
      "step": 115164
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.254401683807373,
      "learning_rate": 0.0003001493055131083,
      "loss": 2.9281,
      "step": 115165
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.757399082183838,
      "learning_rate": 0.0003001452149514293,
      "loss": 3.0648,
      "step": 115166
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0245397090911865,
      "learning_rate": 0.0003001411243897234,
      "loss": 2.9613,
      "step": 115167
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.7037880420684814,
      "learning_rate": 0.00030013703382799115,
      "loss": 2.9505,
      "step": 115168
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.198010206222534,
      "learning_rate": 0.00030013294326623345,
      "loss": 2.7623,
      "step": 115169
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9669432640075684,
      "learning_rate": 0.0003001288527044511,
      "loss": 3.1577,
      "step": 115170
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9451104402542114,
      "learning_rate": 0.00030012476214264465,
      "loss": 3.1486,
      "step": 115171
    },
    {
      "epoch": 1.5,
      "grad_norm": 4.044713497161865,
      "learning_rate": 0.00030012067158081507,
      "loss": 2.8913,
      "step": 115172
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.4853270053863525,
      "learning_rate": 0.0003001165810189631,
      "loss": 3.3033,
      "step": 115173
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.3314385414123535,
      "learning_rate": 0.0003001124904570894,
      "loss": 2.8681,
      "step": 115174
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.030468463897705,
      "learning_rate": 0.00030010839989519475,
      "loss": 2.9856,
      "step": 115175
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.7402608394622803,
      "learning_rate": 0.00030010430933328006,
      "loss": 3.3337,
      "step": 115176
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7880752086639404,
      "learning_rate": 0.00030010021877134585,
      "loss": 3.0714,
      "step": 115177
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.5015692710876465,
      "learning_rate": 0.0003000961282093931,
      "loss": 2.9765,
      "step": 115178
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.877049446105957,
      "learning_rate": 0.00030009203764742246,
      "loss": 2.9053,
      "step": 115179
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9868541955947876,
      "learning_rate": 0.00030008794708543475,
      "loss": 2.9185,
      "step": 115180
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.246461868286133,
      "learning_rate": 0.00030008385652343056,
      "loss": 2.9922,
      "step": 115181
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.497931957244873,
      "learning_rate": 0.0003000797659614109,
      "loss": 3.057,
      "step": 115182
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9575353860855103,
      "learning_rate": 0.00030007567539937636,
      "loss": 3.069,
      "step": 115183
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.030017614364624,
      "learning_rate": 0.0003000715848373277,
      "loss": 2.877,
      "step": 115184
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.8705809116363525,
      "learning_rate": 0.00030006749427526595,
      "loss": 3.0969,
      "step": 115185
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.6511542797088623,
      "learning_rate": 0.0003000634037131914,
      "loss": 2.8675,
      "step": 115186
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8571275472640991,
      "learning_rate": 0.0003000593131511051,
      "loss": 3.0671,
      "step": 115187
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.912764072418213,
      "learning_rate": 0.00030005522258900796,
      "loss": 3.1782,
      "step": 115188
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.113757610321045,
      "learning_rate": 0.0003000511320269004,
      "loss": 2.9281,
      "step": 115189
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.110551118850708,
      "learning_rate": 0.0003000470414647833,
      "loss": 3.2494,
      "step": 115190
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2686564922332764,
      "learning_rate": 0.0003000429509026576,
      "loss": 3.1203,
      "step": 115191
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.4569311141967773,
      "learning_rate": 0.0003000388603405238,
      "loss": 3.0656,
      "step": 115192
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.035600423812866,
      "learning_rate": 0.00030003476977838277,
      "loss": 3.0014,
      "step": 115193
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0365793704986572,
      "learning_rate": 0.0003000306792162353,
      "loss": 3.1497,
      "step": 115194
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.959721326828003,
      "learning_rate": 0.00030002658865408213,
      "loss": 2.9659,
      "step": 115195
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7153551578521729,
      "learning_rate": 0.00030002249809192404,
      "loss": 3.0439,
      "step": 115196
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7855799198150635,
      "learning_rate": 0.0003000184075297617,
      "loss": 3.1698,
      "step": 115197
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1043906211853027,
      "learning_rate": 0.00030001431696759603,
      "loss": 2.8229,
      "step": 115198
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.076637029647827,
      "learning_rate": 0.00030001022640542763,
      "loss": 2.8838,
      "step": 115199
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.043339729309082,
      "learning_rate": 0.00030000613584325734,
      "loss": 3.2303,
      "step": 115200
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1198079586029053,
      "learning_rate": 0.00030000204528108585,
      "loss": 3.0101,
      "step": 115201
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.014251947402954,
      "learning_rate": 0.0002999979547189141,
      "loss": 2.7816,
      "step": 115202
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2448794841766357,
      "learning_rate": 0.00029999386415674266,
      "loss": 3.3293,
      "step": 115203
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2197396755218506,
      "learning_rate": 0.00029998977359457237,
      "loss": 2.965,
      "step": 115204
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.6077494621276855,
      "learning_rate": 0.000299985683032404,
      "loss": 3.0821,
      "step": 115205
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0265610218048096,
      "learning_rate": 0.0002999815924702383,
      "loss": 3.0166,
      "step": 115206
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0157368183135986,
      "learning_rate": 0.00029997750190807596,
      "loss": 2.9385,
      "step": 115207
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.2543227672576904,
      "learning_rate": 0.0002999734113459179,
      "loss": 2.9526,
      "step": 115208
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.8512508869171143,
      "learning_rate": 0.0002999693207837647,
      "loss": 3.3251,
      "step": 115209
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8801807165145874,
      "learning_rate": 0.00029996523022161723,
      "loss": 2.7432,
      "step": 115210
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0166914463043213,
      "learning_rate": 0.0002999611396594762,
      "loss": 2.8183,
      "step": 115211
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6346335411071777,
      "learning_rate": 0.0002999570490973424,
      "loss": 2.9337,
      "step": 115212
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.678468704223633,
      "learning_rate": 0.00029995295853521667,
      "loss": 3.2105,
      "step": 115213
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8828901052474976,
      "learning_rate": 0.00029994886797309967,
      "loss": 3.0737,
      "step": 115214
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0485246181488037,
      "learning_rate": 0.0002999447774109921,
      "loss": 2.6823,
      "step": 115215
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7534865140914917,
      "learning_rate": 0.0002999406868488948,
      "loss": 3.0686,
      "step": 115216
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.8611557483673096,
      "learning_rate": 0.00029993659628680855,
      "loss": 3.1466,
      "step": 115217
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8298763036727905,
      "learning_rate": 0.0002999325057247341,
      "loss": 3.1363,
      "step": 115218
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6121890544891357,
      "learning_rate": 0.0002999284151626721,
      "loss": 3.0588,
      "step": 115219
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.375657081604004,
      "learning_rate": 0.00029992432460062364,
      "loss": 3.2014,
      "step": 115220
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.1205852031707764,
      "learning_rate": 0.00029992023403858914,
      "loss": 2.9906,
      "step": 115221
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9289013147354126,
      "learning_rate": 0.0002999161434765694,
      "loss": 2.9457,
      "step": 115222
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7434751987457275,
      "learning_rate": 0.00029991205291456525,
      "loss": 2.7589,
      "step": 115223
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.439119338989258,
      "learning_rate": 0.0002999079623525775,
      "loss": 2.961,
      "step": 115224
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.105686664581299,
      "learning_rate": 0.0002999038717906068,
      "loss": 2.8805,
      "step": 115225
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3351175785064697,
      "learning_rate": 0.00029989978122865415,
      "loss": 3.2931,
      "step": 115226
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7778568267822266,
      "learning_rate": 0.00029989569066672,
      "loss": 3.0003,
      "step": 115227
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8407343626022339,
      "learning_rate": 0.0002998916001048052,
      "loss": 3.1444,
      "step": 115228
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.898254632949829,
      "learning_rate": 0.0002998875095429106,
      "loss": 3.1382,
      "step": 115229
    },
    {
      "epoch": 1.5,
      "grad_norm": 6.408717155456543,
      "learning_rate": 0.0002998834189810369,
      "loss": 2.7731,
      "step": 115230
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.4149229526519775,
      "learning_rate": 0.0002998793284191849,
      "loss": 2.949,
      "step": 115231
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8559608459472656,
      "learning_rate": 0.00029987523785735546,
      "loss": 2.9621,
      "step": 115232
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.8766896724700928,
      "learning_rate": 0.000299871147295549,
      "loss": 3.0999,
      "step": 115233
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.203923463821411,
      "learning_rate": 0.0002998670567337666,
      "loss": 3.1063,
      "step": 115234
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.253997802734375,
      "learning_rate": 0.00029986296617200885,
      "loss": 2.8452,
      "step": 115235
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8207457065582275,
      "learning_rate": 0.00029985887561027663,
      "loss": 3.1758,
      "step": 115236
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9349195957183838,
      "learning_rate": 0.0002998547850485706,
      "loss": 2.9442,
      "step": 115237
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.65405535697937,
      "learning_rate": 0.0002998506944868916,
      "loss": 2.8199,
      "step": 115238
    },
    {
      "epoch": 1.5,
      "grad_norm": 5.178065299987793,
      "learning_rate": 0.0002998466039252405,
      "loss": 2.8365,
      "step": 115239
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.593140125274658,
      "learning_rate": 0.00029984251336361776,
      "loss": 2.8849,
      "step": 115240
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8353351354599,
      "learning_rate": 0.0002998384228020243,
      "loss": 3.0189,
      "step": 115241
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8923919200897217,
      "learning_rate": 0.00029983433224046085,
      "loss": 3.3185,
      "step": 115242
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.287701368331909,
      "learning_rate": 0.0002998302416789283,
      "loss": 2.8741,
      "step": 115243
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.304405689239502,
      "learning_rate": 0.0002998261511174272,
      "loss": 3.286,
      "step": 115244
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.5162720680236816,
      "learning_rate": 0.0002998220605559586,
      "loss": 2.8292,
      "step": 115245
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.906247854232788,
      "learning_rate": 0.000299817969994523,
      "loss": 2.8661,
      "step": 115246
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0832226276397705,
      "learning_rate": 0.00029981387943312117,
      "loss": 3.0588,
      "step": 115247
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8918284177780151,
      "learning_rate": 0.000299809788871754,
      "loss": 3.0096,
      "step": 115248
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6216356754302979,
      "learning_rate": 0.0002998056983104221,
      "loss": 3.0624,
      "step": 115249
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9977346658706665,
      "learning_rate": 0.0002998016077491264,
      "loss": 2.9282,
      "step": 115250
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.8629820346832275,
      "learning_rate": 0.0002997975171878677,
      "loss": 2.854,
      "step": 115251
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.284477472305298,
      "learning_rate": 0.0002997934266266465,
      "loss": 2.7244,
      "step": 115252
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.21895170211792,
      "learning_rate": 0.00029978933606546377,
      "loss": 2.9042,
      "step": 115253
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1715080738067627,
      "learning_rate": 0.00029978524550432016,
      "loss": 3.031,
      "step": 115254
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.134237289428711,
      "learning_rate": 0.00029978115494321646,
      "loss": 3.1188,
      "step": 115255
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.090651273727417,
      "learning_rate": 0.0002997770643821535,
      "loss": 3.3815,
      "step": 115256
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.189769744873047,
      "learning_rate": 0.0002997729738211321,
      "loss": 3.0093,
      "step": 115257
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7939236164093018,
      "learning_rate": 0.00029976888326015274,
      "loss": 3.1054,
      "step": 115258
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.132380247116089,
      "learning_rate": 0.0002997647926992164,
      "loss": 2.8839,
      "step": 115259
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.657171368598938,
      "learning_rate": 0.0002997607021383237,
      "loss": 2.8856,
      "step": 115260
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1246025562286377,
      "learning_rate": 0.00029975661157747555,
      "loss": 2.9568,
      "step": 115261
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.7985546588897705,
      "learning_rate": 0.00029975252101667265,
      "loss": 2.9579,
      "step": 115262
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.5301835536956787,
      "learning_rate": 0.0002997484304559159,
      "loss": 2.8112,
      "step": 115263
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2656965255737305,
      "learning_rate": 0.0002997443398952058,
      "loss": 3.0997,
      "step": 115264
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.0223231315612793,
      "learning_rate": 0.0002997402493345432,
      "loss": 2.8227,
      "step": 115265
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7919657230377197,
      "learning_rate": 0.0002997361587739289,
      "loss": 2.9888,
      "step": 115266
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8552380800247192,
      "learning_rate": 0.00029973206821336365,
      "loss": 3.0778,
      "step": 115267
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7750577926635742,
      "learning_rate": 0.0002997279776528483,
      "loss": 2.9777,
      "step": 115268
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.84122633934021,
      "learning_rate": 0.00029972388709238355,
      "loss": 3.1758,
      "step": 115269
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9085469245910645,
      "learning_rate": 0.00029971979653197,
      "loss": 2.9608,
      "step": 115270
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1161890029907227,
      "learning_rate": 0.0002997157059716086,
      "loss": 2.9474,
      "step": 115271
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9225413799285889,
      "learning_rate": 0.0002997116154113,
      "loss": 3.1462,
      "step": 115272
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.735312819480896,
      "learning_rate": 0.0002997075248510451,
      "loss": 3.0957,
      "step": 115273
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9327396154403687,
      "learning_rate": 0.00029970343429084446,
      "loss": 3.017,
      "step": 115274
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8034416437149048,
      "learning_rate": 0.00029969934373069913,
      "loss": 3.1524,
      "step": 115275
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6925244331359863,
      "learning_rate": 0.00029969525317060964,
      "loss": 3.0295,
      "step": 115276
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1047616004943848,
      "learning_rate": 0.0002996911626105768,
      "loss": 2.9996,
      "step": 115277
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8630985021591187,
      "learning_rate": 0.0002996870720506013,
      "loss": 2.9582,
      "step": 115278
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2487118244171143,
      "learning_rate": 0.00029968298149068404,
      "loss": 2.9509,
      "step": 115279
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.4814348220825195,
      "learning_rate": 0.0002996788909308257,
      "loss": 3.1168,
      "step": 115280
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.075265645980835,
      "learning_rate": 0.0002996748003710271,
      "loss": 2.9507,
      "step": 115281
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2940940856933594,
      "learning_rate": 0.00029967070981128894,
      "loss": 2.7843,
      "step": 115282
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.083836078643799,
      "learning_rate": 0.00029966661925161204,
      "loss": 3.1213,
      "step": 115283
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.771862506866455,
      "learning_rate": 0.00029966252869199705,
      "loss": 2.8563,
      "step": 115284
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0344042778015137,
      "learning_rate": 0.00029965843813244483,
      "loss": 3.1732,
      "step": 115285
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.848565936088562,
      "learning_rate": 0.0002996543475729561,
      "loss": 3.0784,
      "step": 115286
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.7459635734558105,
      "learning_rate": 0.00029965025701353165,
      "loss": 2.8655,
      "step": 115287
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0786194801330566,
      "learning_rate": 0.0002996461664541722,
      "loss": 3.0917,
      "step": 115288
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1026992797851562,
      "learning_rate": 0.00029964207589487857,
      "loss": 2.9162,
      "step": 115289
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8600354194641113,
      "learning_rate": 0.0002996379853356515,
      "loss": 3.0484,
      "step": 115290
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.4330220222473145,
      "learning_rate": 0.0002996338947764917,
      "loss": 2.9496,
      "step": 115291
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7716825008392334,
      "learning_rate": 0.00029962980421739996,
      "loss": 2.8642,
      "step": 115292
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8284636735916138,
      "learning_rate": 0.0002996257136583771,
      "loss": 2.8564,
      "step": 115293
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.094724655151367,
      "learning_rate": 0.0002996216230994238,
      "loss": 2.9039,
      "step": 115294
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.733367681503296,
      "learning_rate": 0.0002996175325405408,
      "loss": 2.9056,
      "step": 115295
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8277467489242554,
      "learning_rate": 0.0002996134419817289,
      "loss": 3.1537,
      "step": 115296
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8221617937088013,
      "learning_rate": 0.0002996093514229889,
      "loss": 2.7907,
      "step": 115297
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8589198589324951,
      "learning_rate": 0.00029960526086432156,
      "loss": 2.7918,
      "step": 115298
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8931738138198853,
      "learning_rate": 0.00029960117030572767,
      "loss": 3.1372,
      "step": 115299
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8771007061004639,
      "learning_rate": 0.0002995970797472078,
      "loss": 2.8732,
      "step": 115300
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.003721237182617,
      "learning_rate": 0.00029959298918876286,
      "loss": 2.8883,
      "step": 115301
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9941164255142212,
      "learning_rate": 0.0002995888986303936,
      "loss": 3.0395,
      "step": 115302
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3335680961608887,
      "learning_rate": 0.00029958480807210075,
      "loss": 2.9008,
      "step": 115303
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0058608055114746,
      "learning_rate": 0.00029958071751388514,
      "loss": 2.9017,
      "step": 115304
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1754579544067383,
      "learning_rate": 0.00029957662695574754,
      "loss": 3.0943,
      "step": 115305
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.5117380619049072,
      "learning_rate": 0.00029957253639768865,
      "loss": 3.1432,
      "step": 115306
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.9398841857910156,
      "learning_rate": 0.0002995684458397091,
      "loss": 2.9469,
      "step": 115307
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9965004920959473,
      "learning_rate": 0.0002995643552818099,
      "loss": 3.4094,
      "step": 115308
    },
    {
      "epoch": 1.5,
      "grad_norm": 5.961810111999512,
      "learning_rate": 0.00029956026472399163,
      "loss": 3.0478,
      "step": 115309
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.321719169616699,
      "learning_rate": 0.00029955617416625507,
      "loss": 2.9524,
      "step": 115310
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.2735188007354736,
      "learning_rate": 0.0002995520836086011,
      "loss": 2.8581,
      "step": 115311
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8099119663238525,
      "learning_rate": 0.0002995479930510305,
      "loss": 2.9533,
      "step": 115312
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.669912338256836,
      "learning_rate": 0.0002995439024935438,
      "loss": 3.0026,
      "step": 115313
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.102154493331909,
      "learning_rate": 0.00029953981193614194,
      "loss": 2.8164,
      "step": 115314
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7573779821395874,
      "learning_rate": 0.0002995357213788256,
      "loss": 3.0475,
      "step": 115315
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.881003975868225,
      "learning_rate": 0.0002995316308215956,
      "loss": 3.1623,
      "step": 115316
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6888054609298706,
      "learning_rate": 0.00029952754026445265,
      "loss": 2.7617,
      "step": 115317
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2062132358551025,
      "learning_rate": 0.0002995234497073977,
      "loss": 2.8068,
      "step": 115318
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7931643724441528,
      "learning_rate": 0.00029951935915043123,
      "loss": 2.9859,
      "step": 115319
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.030297040939331,
      "learning_rate": 0.00029951526859355406,
      "loss": 3.0008,
      "step": 115320
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.7405920028686523,
      "learning_rate": 0.00029951117803676705,
      "loss": 2.9867,
      "step": 115321
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.043631076812744,
      "learning_rate": 0.00029950708748007095,
      "loss": 2.8465,
      "step": 115322
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.155829906463623,
      "learning_rate": 0.0002995029969234664,
      "loss": 3.0252,
      "step": 115323
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.5177345275878906,
      "learning_rate": 0.00029949890636695446,
      "loss": 3.1269,
      "step": 115324
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.5641067028045654,
      "learning_rate": 0.00029949481581053553,
      "loss": 2.7966,
      "step": 115325
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9333012104034424,
      "learning_rate": 0.00029949072525421055,
      "loss": 2.8795,
      "step": 115326
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.798670530319214,
      "learning_rate": 0.0002994866346979802,
      "loss": 2.7376,
      "step": 115327
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.1964430809020996,
      "learning_rate": 0.00029948254414184536,
      "loss": 3.0344,
      "step": 115328
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.098212718963623,
      "learning_rate": 0.0002994784535858067,
      "loss": 3.0418,
      "step": 115329
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.6557958126068115,
      "learning_rate": 0.0002994743630298651,
      "loss": 3.1,
      "step": 115330
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.7392423152923584,
      "learning_rate": 0.00029947027247402113,
      "loss": 2.9265,
      "step": 115331
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.5906622409820557,
      "learning_rate": 0.00029946618191827566,
      "loss": 2.8118,
      "step": 115332
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1948046684265137,
      "learning_rate": 0.0002994620913626294,
      "loss": 3.0093,
      "step": 115333
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0763707160949707,
      "learning_rate": 0.00029945800080708314,
      "loss": 2.8264,
      "step": 115334
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.156810760498047,
      "learning_rate": 0.00029945391025163766,
      "loss": 2.8266,
      "step": 115335
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9115480184555054,
      "learning_rate": 0.0002994498196962939,
      "loss": 3.0986,
      "step": 115336
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6453351974487305,
      "learning_rate": 0.00029944572914105224,
      "loss": 2.9391,
      "step": 115337
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.859439492225647,
      "learning_rate": 0.00029944163858591365,
      "loss": 3.1366,
      "step": 115338
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9311288595199585,
      "learning_rate": 0.0002994375480308788,
      "loss": 2.7836,
      "step": 115339
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9855890274047852,
      "learning_rate": 0.0002994334574759486,
      "loss": 3.0596,
      "step": 115340
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.054279088973999,
      "learning_rate": 0.00029942936692112373,
      "loss": 3.0873,
      "step": 115341
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8429256677627563,
      "learning_rate": 0.00029942527636640505,
      "loss": 2.8749,
      "step": 115342
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8262673616409302,
      "learning_rate": 0.0002994211858117931,
      "loss": 2.9302,
      "step": 115343
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0269246101379395,
      "learning_rate": 0.0002994170952572888,
      "loss": 3.0094,
      "step": 115344
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6906638145446777,
      "learning_rate": 0.0002994130047028928,
      "loss": 3.0525,
      "step": 115345
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.6476380825042725,
      "learning_rate": 0.00029940891414860596,
      "loss": 3.0337,
      "step": 115346
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.4532032012939453,
      "learning_rate": 0.000299404823594429,
      "loss": 3.0544,
      "step": 115347
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0883800983428955,
      "learning_rate": 0.0002994007330403629,
      "loss": 3.0578,
      "step": 115348
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1535494327545166,
      "learning_rate": 0.00029939664248640805,
      "loss": 3.0731,
      "step": 115349
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.780426263809204,
      "learning_rate": 0.00029939255193256535,
      "loss": 3.0695,
      "step": 115350
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.599285364151001,
      "learning_rate": 0.00029938846137883557,
      "loss": 2.8201,
      "step": 115351
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8929229974746704,
      "learning_rate": 0.0002993843708252195,
      "loss": 3.1136,
      "step": 115352
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7760190963745117,
      "learning_rate": 0.0002993802802717179,
      "loss": 2.83,
      "step": 115353
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.776989698410034,
      "learning_rate": 0.00029937618971833166,
      "loss": 2.8583,
      "step": 115354
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7062705755233765,
      "learning_rate": 0.00029937209916506126,
      "loss": 2.8818,
      "step": 115355
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7595247030258179,
      "learning_rate": 0.00029936800861190757,
      "loss": 3.097,
      "step": 115356
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7037707567214966,
      "learning_rate": 0.0002993639180588714,
      "loss": 3.0293,
      "step": 115357
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.145416736602783,
      "learning_rate": 0.00029935982750595354,
      "loss": 2.8246,
      "step": 115358
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.5855438709259033,
      "learning_rate": 0.00029935573695315465,
      "loss": 3.1932,
      "step": 115359
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.779441475868225,
      "learning_rate": 0.0002993516464004757,
      "loss": 2.9419,
      "step": 115360
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.8740744590759277,
      "learning_rate": 0.0002993475558479171,
      "loss": 3.1303,
      "step": 115361
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6304534673690796,
      "learning_rate": 0.00029934346529547986,
      "loss": 3.2262,
      "step": 115362
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.816524863243103,
      "learning_rate": 0.00029933937474316464,
      "loss": 2.6507,
      "step": 115363
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9576077461242676,
      "learning_rate": 0.00029933528419097226,
      "loss": 3.1642,
      "step": 115364
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.5762834548950195,
      "learning_rate": 0.0002993311936389035,
      "loss": 3.0249,
      "step": 115365
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.672890305519104,
      "learning_rate": 0.00029932710308695906,
      "loss": 2.9056,
      "step": 115366
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9702640771865845,
      "learning_rate": 0.00029932301253513975,
      "loss": 3.011,
      "step": 115367
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7519326210021973,
      "learning_rate": 0.0002993189219834463,
      "loss": 2.8614,
      "step": 115368
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.425693988800049,
      "learning_rate": 0.00029931483143187947,
      "loss": 2.9124,
      "step": 115369
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.6579349040985107,
      "learning_rate": 0.00029931074088043996,
      "loss": 3.1363,
      "step": 115370
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.334287166595459,
      "learning_rate": 0.00029930665032912865,
      "loss": 3.2589,
      "step": 115371
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1278903484344482,
      "learning_rate": 0.0002993025597779462,
      "loss": 2.9987,
      "step": 115372
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.574148654937744,
      "learning_rate": 0.00029929846922689354,
      "loss": 3.1258,
      "step": 115373
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3623294830322266,
      "learning_rate": 0.00029929437867597125,
      "loss": 3.132,
      "step": 115374
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.54561448097229,
      "learning_rate": 0.00029929028812518014,
      "loss": 2.945,
      "step": 115375
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3055169582366943,
      "learning_rate": 0.000299286197574521,
      "loss": 3.1389,
      "step": 115376
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2260186672210693,
      "learning_rate": 0.0002992821070239945,
      "loss": 2.9823,
      "step": 115377
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0013279914855957,
      "learning_rate": 0.0002992780164736015,
      "loss": 2.8311,
      "step": 115378
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1662306785583496,
      "learning_rate": 0.0002992739259233428,
      "loss": 2.9046,
      "step": 115379
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6887876987457275,
      "learning_rate": 0.00029926983537321896,
      "loss": 2.9917,
      "step": 115380
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.7917473316192627,
      "learning_rate": 0.0002992657448232309,
      "loss": 2.7432,
      "step": 115381
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8807942867279053,
      "learning_rate": 0.0002992616542733795,
      "loss": 2.9896,
      "step": 115382
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7866348028182983,
      "learning_rate": 0.0002992575637236652,
      "loss": 2.65,
      "step": 115383
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2179603576660156,
      "learning_rate": 0.000299253473174089,
      "loss": 2.983,
      "step": 115384
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.058344602584839,
      "learning_rate": 0.00029924938262465166,
      "loss": 2.9182,
      "step": 115385
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7282463312149048,
      "learning_rate": 0.00029924529207535377,
      "loss": 3.1378,
      "step": 115386
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.481184482574463,
      "learning_rate": 0.0002992412015261963,
      "loss": 3.3923,
      "step": 115387
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8137966394424438,
      "learning_rate": 0.00029923711097717975,
      "loss": 3.0171,
      "step": 115388
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.055647611618042,
      "learning_rate": 0.0002992330204283052,
      "loss": 2.8819,
      "step": 115389
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3564767837524414,
      "learning_rate": 0.0002992289298795731,
      "loss": 2.8048,
      "step": 115390
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.0757086277008057,
      "learning_rate": 0.0002992248393309845,
      "loss": 2.9286,
      "step": 115391
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9831762313842773,
      "learning_rate": 0.00029922074878253993,
      "loss": 3.0232,
      "step": 115392
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8940447568893433,
      "learning_rate": 0.00029921665823424027,
      "loss": 3.229,
      "step": 115393
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8933806419372559,
      "learning_rate": 0.00029921256768608616,
      "loss": 3.0276,
      "step": 115394
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8375316858291626,
      "learning_rate": 0.00029920847713807853,
      "loss": 3.0543,
      "step": 115395
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7488622665405273,
      "learning_rate": 0.000299204386590218,
      "loss": 3.0898,
      "step": 115396
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0601086616516113,
      "learning_rate": 0.00029920029604250557,
      "loss": 3.0711,
      "step": 115397
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.133875608444214,
      "learning_rate": 0.00029919620549494165,
      "loss": 2.7708,
      "step": 115398
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.934812307357788,
      "learning_rate": 0.0002991921149475272,
      "loss": 3.1363,
      "step": 115399
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1651690006256104,
      "learning_rate": 0.00029918802440026293,
      "loss": 3.0341,
      "step": 115400
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8048447370529175,
      "learning_rate": 0.0002991839338531496,
      "loss": 3.1328,
      "step": 115401
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.546686887741089,
      "learning_rate": 0.00029917984330618807,
      "loss": 2.9797,
      "step": 115402
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.757183313369751,
      "learning_rate": 0.0002991757527593791,
      "loss": 3.0928,
      "step": 115403
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.4613537788391113,
      "learning_rate": 0.00029917166221272323,
      "loss": 2.9133,
      "step": 115404
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.210953712463379,
      "learning_rate": 0.0002991675716662214,
      "loss": 3.043,
      "step": 115405
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.89969801902771,
      "learning_rate": 0.00029916348111987427,
      "loss": 3.0171,
      "step": 115406
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.4845516681671143,
      "learning_rate": 0.00029915939057368273,
      "loss": 3.1931,
      "step": 115407
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6813782453536987,
      "learning_rate": 0.0002991553000276475,
      "loss": 3.0956,
      "step": 115408
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6459580659866333,
      "learning_rate": 0.0002991512094817694,
      "loss": 2.9394,
      "step": 115409
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7621971368789673,
      "learning_rate": 0.00029914711893604893,
      "loss": 2.9404,
      "step": 115410
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.784848928451538,
      "learning_rate": 0.0002991430283904871,
      "loss": 2.9233,
      "step": 115411
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3027029037475586,
      "learning_rate": 0.0002991389378450846,
      "loss": 3.1345,
      "step": 115412
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9188119173049927,
      "learning_rate": 0.0002991348472998421,
      "loss": 3.1198,
      "step": 115413
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.082113742828369,
      "learning_rate": 0.0002991307567547605,
      "loss": 3.0086,
      "step": 115414
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0589704513549805,
      "learning_rate": 0.0002991266662098407,
      "loss": 3.2307,
      "step": 115415
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8405439853668213,
      "learning_rate": 0.00029912257566508307,
      "loss": 2.8642,
      "step": 115416
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7085144519805908,
      "learning_rate": 0.00029911848512048856,
      "loss": 2.9798,
      "step": 115417
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3495283126831055,
      "learning_rate": 0.000299114394576058,
      "loss": 2.9776,
      "step": 115418
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1988284587860107,
      "learning_rate": 0.00029911030403179204,
      "loss": 2.9528,
      "step": 115419
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0620663166046143,
      "learning_rate": 0.0002991062134876915,
      "loss": 2.9268,
      "step": 115420
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1582729816436768,
      "learning_rate": 0.0002991021229437573,
      "loss": 3.1419,
      "step": 115421
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0087087154388428,
      "learning_rate": 0.00029909803239998984,
      "loss": 2.9284,
      "step": 115422
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8591490983963013,
      "learning_rate": 0.0002990939418563901,
      "loss": 3.026,
      "step": 115423
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.9320032596588135,
      "learning_rate": 0.0002990898513129588,
      "loss": 2.7956,
      "step": 115424
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6706894636154175,
      "learning_rate": 0.0002990857607696968,
      "loss": 3.1729,
      "step": 115425
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.4183881282806396,
      "learning_rate": 0.00029908167022660466,
      "loss": 2.8458,
      "step": 115426
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.6076908111572266,
      "learning_rate": 0.00029907757968368346,
      "loss": 2.9884,
      "step": 115427
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3361587524414062,
      "learning_rate": 0.0002990734891409336,
      "loss": 3.0877,
      "step": 115428
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.166236639022827,
      "learning_rate": 0.000299069398598356,
      "loss": 2.8914,
      "step": 115429
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9520512819290161,
      "learning_rate": 0.0002990653080559514,
      "loss": 3.0468,
      "step": 115430
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0512239933013916,
      "learning_rate": 0.00029906121751372057,
      "loss": 3.1661,
      "step": 115431
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6544383764266968,
      "learning_rate": 0.0002990571269716643,
      "loss": 3.1857,
      "step": 115432
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.4249613285064697,
      "learning_rate": 0.0002990530364297835,
      "loss": 2.9579,
      "step": 115433
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.049583911895752,
      "learning_rate": 0.00029904894588807853,
      "loss": 2.9457,
      "step": 115434
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1825737953186035,
      "learning_rate": 0.0002990448553465504,
      "loss": 3.0868,
      "step": 115435
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.049913167953491,
      "learning_rate": 0.0002990407648051999,
      "loss": 3.0266,
      "step": 115436
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7080622911453247,
      "learning_rate": 0.0002990366742640277,
      "loss": 2.9625,
      "step": 115437
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.174151659011841,
      "learning_rate": 0.0002990325837230346,
      "loss": 2.8996,
      "step": 115438
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.386514186859131,
      "learning_rate": 0.0002990284931822214,
      "loss": 2.9333,
      "step": 115439
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.644816279411316,
      "learning_rate": 0.00029902440264158896,
      "loss": 3.0431,
      "step": 115440
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.7258388996124268,
      "learning_rate": 0.00029902031210113773,
      "loss": 3.0444,
      "step": 115441
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.857404112815857,
      "learning_rate": 0.00029901622156086865,
      "loss": 3.2152,
      "step": 115442
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.857834815979004,
      "learning_rate": 0.00029901213102078247,
      "loss": 3.171,
      "step": 115443
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8924821615219116,
      "learning_rate": 0.00029900804048087995,
      "loss": 2.8878,
      "step": 115444
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.372241973876953,
      "learning_rate": 0.00029900394994116186,
      "loss": 2.941,
      "step": 115445
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8676700592041016,
      "learning_rate": 0.0002989998594016291,
      "loss": 2.9576,
      "step": 115446
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.889841079711914,
      "learning_rate": 0.00029899576886228214,
      "loss": 3.0294,
      "step": 115447
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.4583332538604736,
      "learning_rate": 0.00029899167832312187,
      "loss": 3.025,
      "step": 115448
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8427430391311646,
      "learning_rate": 0.0002989875877841491,
      "loss": 2.6768,
      "step": 115449
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.684430718421936,
      "learning_rate": 0.0002989834972453646,
      "loss": 3.153,
      "step": 115450
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0048975944519043,
      "learning_rate": 0.000298979406706769,
      "loss": 2.9262,
      "step": 115451
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9091967344284058,
      "learning_rate": 0.0002989753161683633,
      "loss": 3.0054,
      "step": 115452
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.4719133377075195,
      "learning_rate": 0.000298971225630148,
      "loss": 2.781,
      "step": 115453
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.919739007949829,
      "learning_rate": 0.00029896713509212393,
      "loss": 2.9231,
      "step": 115454
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.4998037815093994,
      "learning_rate": 0.0002989630445542919,
      "loss": 3.102,
      "step": 115455
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8388772010803223,
      "learning_rate": 0.00029895895401665267,
      "loss": 3.0568,
      "step": 115456
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.031128406524658,
      "learning_rate": 0.000298954863479207,
      "loss": 3.0703,
      "step": 115457
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.047994613647461,
      "learning_rate": 0.00029895077294195575,
      "loss": 2.794,
      "step": 115458
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7895809412002563,
      "learning_rate": 0.00029894668240489943,
      "loss": 3.102,
      "step": 115459
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.438447952270508,
      "learning_rate": 0.000298942591868039,
      "loss": 2.7925,
      "step": 115460
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7751903533935547,
      "learning_rate": 0.0002989385013313752,
      "loss": 3.0559,
      "step": 115461
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7494587898254395,
      "learning_rate": 0.00029893441079490866,
      "loss": 3.0126,
      "step": 115462
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.9927000999450684,
      "learning_rate": 0.00029893032025864023,
      "loss": 2.857,
      "step": 115463
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9019672870635986,
      "learning_rate": 0.00029892622972257075,
      "loss": 2.9828,
      "step": 115464
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1980483531951904,
      "learning_rate": 0.00029892213918670087,
      "loss": 2.751,
      "step": 115465
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.944163203239441,
      "learning_rate": 0.00029891804865103135,
      "loss": 2.8936,
      "step": 115466
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.5754332542419434,
      "learning_rate": 0.00029891395811556306,
      "loss": 2.8411,
      "step": 115467
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.026144504547119,
      "learning_rate": 0.00029890986758029665,
      "loss": 2.8741,
      "step": 115468
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2326014041900635,
      "learning_rate": 0.00029890577704523287,
      "loss": 2.9972,
      "step": 115469
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9603114128112793,
      "learning_rate": 0.0002989016865103726,
      "loss": 3.0251,
      "step": 115470
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.714479684829712,
      "learning_rate": 0.00029889759597571654,
      "loss": 2.995,
      "step": 115471
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.607307195663452,
      "learning_rate": 0.00029889350544126534,
      "loss": 2.8708,
      "step": 115472
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3294107913970947,
      "learning_rate": 0.0002988894149070199,
      "loss": 3.0732,
      "step": 115473
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1542110443115234,
      "learning_rate": 0.000298885324372981,
      "loss": 3.0272,
      "step": 115474
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.137493133544922,
      "learning_rate": 0.00029888123383914924,
      "loss": 3.0344,
      "step": 115475
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0859813690185547,
      "learning_rate": 0.0002988771433055256,
      "loss": 3.1154,
      "step": 115476
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.713365077972412,
      "learning_rate": 0.00029887305277211064,
      "loss": 3.0996,
      "step": 115477
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.089545249938965,
      "learning_rate": 0.0002988689622389052,
      "loss": 3.2337,
      "step": 115478
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.17270565032959,
      "learning_rate": 0.00029886487170591,
      "loss": 2.9807,
      "step": 115479
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.5304784774780273,
      "learning_rate": 0.0002988607811731259,
      "loss": 3.0098,
      "step": 115480
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7665493488311768,
      "learning_rate": 0.0002988566906405536,
      "loss": 3.0899,
      "step": 115481
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.962080717086792,
      "learning_rate": 0.00029885260010819394,
      "loss": 3.2844,
      "step": 115482
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1752233505249023,
      "learning_rate": 0.00029884850957604753,
      "loss": 3.0883,
      "step": 115483
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9918639659881592,
      "learning_rate": 0.00029884441904411515,
      "loss": 3.1931,
      "step": 115484
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1489224433898926,
      "learning_rate": 0.00029884032851239766,
      "loss": 3.0536,
      "step": 115485
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.028082847595215,
      "learning_rate": 0.00029883623798089576,
      "loss": 2.9629,
      "step": 115486
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.5109198093414307,
      "learning_rate": 0.0002988321474496102,
      "loss": 3.1183,
      "step": 115487
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0239930152893066,
      "learning_rate": 0.0002988280569185419,
      "loss": 2.7881,
      "step": 115488
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.108111619949341,
      "learning_rate": 0.00029882396638769137,
      "loss": 3.1072,
      "step": 115489
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.068453788757324,
      "learning_rate": 0.0002988198758570595,
      "loss": 2.7867,
      "step": 115490
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.9116697311401367,
      "learning_rate": 0.00029881578532664705,
      "loss": 3.1413,
      "step": 115491
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.5714216232299805,
      "learning_rate": 0.00029881169479645475,
      "loss": 3.0465,
      "step": 115492
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.454591989517212,
      "learning_rate": 0.00029880760426648337,
      "loss": 3.1235,
      "step": 115493
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.8752217292785645,
      "learning_rate": 0.0002988035137367338,
      "loss": 2.8426,
      "step": 115494
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.077529191970825,
      "learning_rate": 0.00029879942320720657,
      "loss": 3.116,
      "step": 115495
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0959830284118652,
      "learning_rate": 0.0002987953326779025,
      "loss": 3.1551,
      "step": 115496
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.106746196746826,
      "learning_rate": 0.0002987912421488225,
      "loss": 3.0491,
      "step": 115497
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.410356283187866,
      "learning_rate": 0.0002987871516199672,
      "loss": 3.114,
      "step": 115498
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8402584791183472,
      "learning_rate": 0.00029878306109133734,
      "loss": 2.8402,
      "step": 115499
    },
    {
      "epoch": 1.5,
      "grad_norm": 4.0007219314575195,
      "learning_rate": 0.0002987789705629339,
      "loss": 3.0075,
      "step": 115500
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.951123833656311,
      "learning_rate": 0.00029877488003475733,
      "loss": 3.0472,
      "step": 115501
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.2233734130859375,
      "learning_rate": 0.0002987707895068085,
      "loss": 3.1599,
      "step": 115502
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7744224071502686,
      "learning_rate": 0.0002987666989790882,
      "loss": 2.9342,
      "step": 115503
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.232917070388794,
      "learning_rate": 0.00029876260845159726,
      "loss": 2.8884,
      "step": 115504
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.30987811088562,
      "learning_rate": 0.00029875851792433633,
      "loss": 2.9974,
      "step": 115505
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.000133514404297,
      "learning_rate": 0.0002987544273973063,
      "loss": 2.9806,
      "step": 115506
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7028497457504272,
      "learning_rate": 0.00029875033687050786,
      "loss": 3.0011,
      "step": 115507
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.536388397216797,
      "learning_rate": 0.0002987462463439417,
      "loss": 2.7802,
      "step": 115508
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1552858352661133,
      "learning_rate": 0.00029874215581760855,
      "loss": 2.9151,
      "step": 115509
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9688982963562012,
      "learning_rate": 0.00029873806529150934,
      "loss": 2.9946,
      "step": 115510
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1158201694488525,
      "learning_rate": 0.0002987339747656447,
      "loss": 3.0161,
      "step": 115511
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7182879447937012,
      "learning_rate": 0.0002987298842400155,
      "loss": 3.1907,
      "step": 115512
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.956286072731018,
      "learning_rate": 0.0002987257937146225,
      "loss": 2.9869,
      "step": 115513
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.132681369781494,
      "learning_rate": 0.0002987217031894663,
      "loss": 2.9462,
      "step": 115514
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8443878889083862,
      "learning_rate": 0.0002987176126645478,
      "loss": 3.1614,
      "step": 115515
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8789174556732178,
      "learning_rate": 0.0002987135221398676,
      "loss": 2.8056,
      "step": 115516
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.005845785140991,
      "learning_rate": 0.0002987094316154267,
      "loss": 2.8642,
      "step": 115517
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6423115730285645,
      "learning_rate": 0.0002987053410912257,
      "loss": 2.9273,
      "step": 115518
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0996639728546143,
      "learning_rate": 0.0002987012505672655,
      "loss": 3.2851,
      "step": 115519
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8871945142745972,
      "learning_rate": 0.00029869716004354667,
      "loss": 3.0717,
      "step": 115520
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8307424783706665,
      "learning_rate": 0.0002986930695200701,
      "loss": 3.0532,
      "step": 115521
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1626148223876953,
      "learning_rate": 0.0002986889789968364,
      "loss": 3.0464,
      "step": 115522
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9516181945800781,
      "learning_rate": 0.00029868488847384654,
      "loss": 2.8483,
      "step": 115523
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.228600025177002,
      "learning_rate": 0.00029868079795110114,
      "loss": 2.9831,
      "step": 115524
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1105668544769287,
      "learning_rate": 0.00029867670742860116,
      "loss": 3.1062,
      "step": 115525
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.7657663822174072,
      "learning_rate": 0.00029867261690634707,
      "loss": 3.0561,
      "step": 115526
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.58157217502594,
      "learning_rate": 0.00029866852638433974,
      "loss": 3.0994,
      "step": 115527
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.078953742980957,
      "learning_rate": 0.00029866443586258,
      "loss": 3.0935,
      "step": 115528
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8647093772888184,
      "learning_rate": 0.00029866034534106853,
      "loss": 2.978,
      "step": 115529
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.63582181930542,
      "learning_rate": 0.00029865625481980616,
      "loss": 3.0307,
      "step": 115530
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.517078399658203,
      "learning_rate": 0.0002986521642987937,
      "loss": 2.9686,
      "step": 115531
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.090177536010742,
      "learning_rate": 0.00029864807377803174,
      "loss": 2.6068,
      "step": 115532
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.096754550933838,
      "learning_rate": 0.0002986439832575211,
      "loss": 2.9633,
      "step": 115533
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.881430149078369,
      "learning_rate": 0.0002986398927372626,
      "loss": 3.2191,
      "step": 115534
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.128526449203491,
      "learning_rate": 0.00029863580221725693,
      "loss": 2.8928,
      "step": 115535
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6812505722045898,
      "learning_rate": 0.0002986317116975049,
      "loss": 2.9119,
      "step": 115536
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.7654030323028564,
      "learning_rate": 0.00029862762117800745,
      "loss": 2.9885,
      "step": 115537
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.0164999961853027,
      "learning_rate": 0.00029862353065876495,
      "loss": 3.2693,
      "step": 115538
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9223055839538574,
      "learning_rate": 0.00029861944013977843,
      "loss": 3.1865,
      "step": 115539
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.02651309967041,
      "learning_rate": 0.0002986153496210485,
      "loss": 2.928,
      "step": 115540
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.295546293258667,
      "learning_rate": 0.00029861125910257604,
      "loss": 2.8857,
      "step": 115541
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.1612675189971924,
      "learning_rate": 0.0002986071685843618,
      "loss": 2.6778,
      "step": 115542
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.5659501552581787,
      "learning_rate": 0.00029860307806640657,
      "loss": 3.1093,
      "step": 115543
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.966287612915039,
      "learning_rate": 0.000298598987548711,
      "loss": 2.8602,
      "step": 115544
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6656520366668701,
      "learning_rate": 0.0002985948970312759,
      "loss": 3.179,
      "step": 115545
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.7182562351226807,
      "learning_rate": 0.000298590806514102,
      "loss": 2.9607,
      "step": 115546
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.037281036376953,
      "learning_rate": 0.00029858671599719013,
      "loss": 2.9905,
      "step": 115547
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6833946704864502,
      "learning_rate": 0.000298582625480541,
      "loss": 3.0099,
      "step": 115548
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3357532024383545,
      "learning_rate": 0.0002985785349641554,
      "loss": 3.0173,
      "step": 115549
    },
    {
      "epoch": 1.5,
      "grad_norm": 4.889672756195068,
      "learning_rate": 0.00029857444444803406,
      "loss": 2.863,
      "step": 115550
    },
    {
      "epoch": 1.5,
      "grad_norm": 4.437634468078613,
      "learning_rate": 0.00029857035393217785,
      "loss": 2.9999,
      "step": 115551
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.4112727642059326,
      "learning_rate": 0.0002985662634165873,
      "loss": 3.1231,
      "step": 115552
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.3935489654541016,
      "learning_rate": 0.0002985621729012634,
      "loss": 3.0784,
      "step": 115553
    },
    {
      "epoch": 1.5,
      "grad_norm": 3.1777379512786865,
      "learning_rate": 0.0002985580823862067,
      "loss": 2.7865,
      "step": 115554
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.678795337677002,
      "learning_rate": 0.00029855399187141824,
      "loss": 3.0605,
      "step": 115555
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.250155448913574,
      "learning_rate": 0.0002985499013568985,
      "loss": 3.0402,
      "step": 115556
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.075503349304199,
      "learning_rate": 0.00029854581084264844,
      "loss": 2.9025,
      "step": 115557
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.5578442811965942,
      "learning_rate": 0.00029854172032866866,
      "loss": 3.0349,
      "step": 115558
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.739105463027954,
      "learning_rate": 0.00029853762981496015,
      "loss": 2.9996,
      "step": 115559
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.120112419128418,
      "learning_rate": 0.0002985335393015234,
      "loss": 3.015,
      "step": 115560
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.04590106010437,
      "learning_rate": 0.00029852944878835935,
      "loss": 2.6838,
      "step": 115561
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.545905828475952,
      "learning_rate": 0.00029852535827546865,
      "loss": 2.9699,
      "step": 115562
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.108405113220215,
      "learning_rate": 0.00029852126776285214,
      "loss": 2.9425,
      "step": 115563
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0306789875030518,
      "learning_rate": 0.00029851717725051054,
      "loss": 2.7886,
      "step": 115564
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9574170112609863,
      "learning_rate": 0.0002985130867384446,
      "loss": 3.1567,
      "step": 115565
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.807996392250061,
      "learning_rate": 0.0002985089962266552,
      "loss": 2.9484,
      "step": 115566
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6661158800125122,
      "learning_rate": 0.00029850490571514297,
      "loss": 2.9283,
      "step": 115567
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6376925706863403,
      "learning_rate": 0.0002985008152039087,
      "loss": 2.9939,
      "step": 115568
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9050426483154297,
      "learning_rate": 0.0002984967246929531,
      "loss": 2.9606,
      "step": 115569
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0641720294952393,
      "learning_rate": 0.00029849263418227703,
      "loss": 2.9353,
      "step": 115570
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7405555248260498,
      "learning_rate": 0.00029848854367188113,
      "loss": 3.0264,
      "step": 115571
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.6718065738677979,
      "learning_rate": 0.0002984844531617663,
      "loss": 3.2255,
      "step": 115572
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.931762218475342,
      "learning_rate": 0.00029848036265193333,
      "loss": 3.136,
      "step": 115573
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.417593240737915,
      "learning_rate": 0.00029847627214238284,
      "loss": 2.8753,
      "step": 115574
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.8361011743545532,
      "learning_rate": 0.0002984721816331156,
      "loss": 2.8891,
      "step": 115575
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0387861728668213,
      "learning_rate": 0.0002984680911241324,
      "loss": 3.1011,
      "step": 115576
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.314783811569214,
      "learning_rate": 0.000298464000615434,
      "loss": 2.928,
      "step": 115577
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.001110076904297,
      "learning_rate": 0.0002984599101070212,
      "loss": 2.9122,
      "step": 115578
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.748798131942749,
      "learning_rate": 0.0002984558195988947,
      "loss": 3.0731,
      "step": 115579
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.9850959777832031,
      "learning_rate": 0.00029845172909105547,
      "loss": 3.0952,
      "step": 115580
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.697054386138916,
      "learning_rate": 0.0002984476385835039,
      "loss": 3.0917,
      "step": 115581
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0241594314575195,
      "learning_rate": 0.000298443548076241,
      "loss": 3.1312,
      "step": 115582
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.3476450443267822,
      "learning_rate": 0.0002984394575692674,
      "loss": 2.9829,
      "step": 115583
    },
    {
      "epoch": 1.5,
      "grad_norm": 2.0636651515960693,
      "learning_rate": 0.000298435367062584,
      "loss": 2.8346,
      "step": 115584
    },
    {
      "epoch": 1.5,
      "grad_norm": 1.7184187173843384,
      "learning_rate": 0.00029843127655619146,
      "loss": 3.1305,
      "step": 115585
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.914541482925415,
      "learning_rate": 0.00029842718605009073,
      "loss": 3.1494,
      "step": 115586
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.056196928024292,
      "learning_rate": 0.0002984230955442823,
      "loss": 3.1171,
      "step": 115587
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7491849660873413,
      "learning_rate": 0.000298419005038767,
      "loss": 2.9782,
      "step": 115588
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7467424869537354,
      "learning_rate": 0.0002984149145335457,
      "loss": 3.2588,
      "step": 115589
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.5638062953948975,
      "learning_rate": 0.00029841082402861903,
      "loss": 3.2702,
      "step": 115590
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.48783802986145,
      "learning_rate": 0.00029840673352398784,
      "loss": 2.9514,
      "step": 115591
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.708630084991455,
      "learning_rate": 0.000298402643019653,
      "loss": 3.0823,
      "step": 115592
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.123350143432617,
      "learning_rate": 0.00029839855251561503,
      "loss": 2.7555,
      "step": 115593
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.884727120399475,
      "learning_rate": 0.0002983944620118748,
      "loss": 3.0834,
      "step": 115594
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.439859390258789,
      "learning_rate": 0.00029839037150843306,
      "loss": 2.7782,
      "step": 115595
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.190417766571045,
      "learning_rate": 0.00029838628100529053,
      "loss": 2.6999,
      "step": 115596
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1763904094696045,
      "learning_rate": 0.0002983821905024481,
      "loss": 2.9031,
      "step": 115597
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.232825517654419,
      "learning_rate": 0.00029837809999990654,
      "loss": 3.0626,
      "step": 115598
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.263216972351074,
      "learning_rate": 0.0002983740094976664,
      "loss": 2.7649,
      "step": 115599
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.2081358432769775,
      "learning_rate": 0.0002983699189957285,
      "loss": 3.1298,
      "step": 115600
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4029815196990967,
      "learning_rate": 0.00029836582849409373,
      "loss": 2.808,
      "step": 115601
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1658661365509033,
      "learning_rate": 0.0002983617379927628,
      "loss": 3.0071,
      "step": 115602
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.829321265220642,
      "learning_rate": 0.0002983576474917364,
      "loss": 3.1222,
      "step": 115603
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.8720755577087402,
      "learning_rate": 0.0002983535569910155,
      "loss": 3.0014,
      "step": 115604
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.198681354522705,
      "learning_rate": 0.0002983494664906006,
      "loss": 2.5917,
      "step": 115605
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5197958946228027,
      "learning_rate": 0.00029834537599049253,
      "loss": 3.0884,
      "step": 115606
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3127503395080566,
      "learning_rate": 0.0002983412854906921,
      "loss": 3.0179,
      "step": 115607
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.6248421669006348,
      "learning_rate": 0.0002983371949912001,
      "loss": 3.0999,
      "step": 115608
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9674839973449707,
      "learning_rate": 0.00029833310449201716,
      "loss": 2.8173,
      "step": 115609
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9009718894958496,
      "learning_rate": 0.0002983290139931443,
      "loss": 3.1179,
      "step": 115610
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3208370208740234,
      "learning_rate": 0.000298324923494582,
      "loss": 3.0287,
      "step": 115611
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.0000698566436768,
      "learning_rate": 0.0002983208329963311,
      "loss": 3.0992,
      "step": 115612
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6190688610076904,
      "learning_rate": 0.0002983167424983924,
      "loss": 3.0554,
      "step": 115613
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9352549314498901,
      "learning_rate": 0.0002983126520007666,
      "loss": 2.9628,
      "step": 115614
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0602025985717773,
      "learning_rate": 0.0002983085615034546,
      "loss": 2.9115,
      "step": 115615
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9130291938781738,
      "learning_rate": 0.00029830447100645714,
      "loss": 3.275,
      "step": 115616
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8813949823379517,
      "learning_rate": 0.00029830038050977477,
      "loss": 2.7926,
      "step": 115617
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.969620943069458,
      "learning_rate": 0.00029829629001340845,
      "loss": 3.045,
      "step": 115618
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7936577796936035,
      "learning_rate": 0.0002982921995173588,
      "loss": 3.2416,
      "step": 115619
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8185467720031738,
      "learning_rate": 0.00029828810902162676,
      "loss": 3.0422,
      "step": 115620
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2305078506469727,
      "learning_rate": 0.0002982840185262129,
      "loss": 2.6301,
      "step": 115621
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.640101909637451,
      "learning_rate": 0.0002982799280311183,
      "loss": 2.867,
      "step": 115622
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.926659107208252,
      "learning_rate": 0.00029827583753634326,
      "loss": 3.0395,
      "step": 115623
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.914724588394165,
      "learning_rate": 0.00029827174704188887,
      "loss": 2.9729,
      "step": 115624
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.7518818378448486,
      "learning_rate": 0.00029826765654775573,
      "loss": 3.1054,
      "step": 115625
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.183511972427368,
      "learning_rate": 0.0002982635660539447,
      "loss": 2.9635,
      "step": 115626
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8506683111190796,
      "learning_rate": 0.00029825947556045646,
      "loss": 3.1093,
      "step": 115627
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.389195442199707,
      "learning_rate": 0.0002982553850672919,
      "loss": 3.1476,
      "step": 115628
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.8140499591827393,
      "learning_rate": 0.0002982512945744517,
      "loss": 2.9096,
      "step": 115629
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8385709524154663,
      "learning_rate": 0.0002982472040819366,
      "loss": 2.9599,
      "step": 115630
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9375662803649902,
      "learning_rate": 0.0002982431135897473,
      "loss": 2.9021,
      "step": 115631
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5561063289642334,
      "learning_rate": 0.0002982390230978847,
      "loss": 2.9804,
      "step": 115632
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0678458213806152,
      "learning_rate": 0.0002982349326063495,
      "loss": 3.0263,
      "step": 115633
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.8872389793395996,
      "learning_rate": 0.0002982308421151425,
      "loss": 3.0336,
      "step": 115634
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4243762493133545,
      "learning_rate": 0.00029822675162426436,
      "loss": 2.7916,
      "step": 115635
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.0369069576263428,
      "learning_rate": 0.000298222661133716,
      "loss": 3.0753,
      "step": 115636
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.010862112045288,
      "learning_rate": 0.00029821857064349795,
      "loss": 3.1135,
      "step": 115637
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9224528074264526,
      "learning_rate": 0.0002982144801536111,
      "loss": 2.9738,
      "step": 115638
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6986643075942993,
      "learning_rate": 0.00029821038966405626,
      "loss": 2.916,
      "step": 115639
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.565467119216919,
      "learning_rate": 0.00029820629917483413,
      "loss": 2.8566,
      "step": 115640
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8672910928726196,
      "learning_rate": 0.0002982022086859455,
      "loss": 2.7936,
      "step": 115641
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9280064105987549,
      "learning_rate": 0.0002981981181973911,
      "loss": 2.9546,
      "step": 115642
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7126654386520386,
      "learning_rate": 0.00029819402770917176,
      "loss": 2.9922,
      "step": 115643
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7756634950637817,
      "learning_rate": 0.00029818993722128813,
      "loss": 2.8378,
      "step": 115644
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7239174842834473,
      "learning_rate": 0.000298185846733741,
      "loss": 3.058,
      "step": 115645
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1713109016418457,
      "learning_rate": 0.0002981817562465312,
      "loss": 2.874,
      "step": 115646
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6102172136306763,
      "learning_rate": 0.0002981776657596595,
      "loss": 3.0575,
      "step": 115647
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1090824604034424,
      "learning_rate": 0.00029817357527312644,
      "loss": 2.9711,
      "step": 115648
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9551949501037598,
      "learning_rate": 0.0002981694847869331,
      "loss": 3.0015,
      "step": 115649
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.810364246368408,
      "learning_rate": 0.00029816539430108,
      "loss": 2.9868,
      "step": 115650
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.710758090019226,
      "learning_rate": 0.000298161303815568,
      "loss": 2.9637,
      "step": 115651
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1280107498168945,
      "learning_rate": 0.00029815721333039785,
      "loss": 2.9026,
      "step": 115652
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3199896812438965,
      "learning_rate": 0.0002981531228455704,
      "loss": 2.8918,
      "step": 115653
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.57218861579895,
      "learning_rate": 0.0002981490323610862,
      "loss": 2.8636,
      "step": 115654
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4951860904693604,
      "learning_rate": 0.0002981449418769462,
      "loss": 2.7602,
      "step": 115655
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.959487795829773,
      "learning_rate": 0.00029814085139315104,
      "loss": 2.873,
      "step": 115656
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4046990871429443,
      "learning_rate": 0.0002981367609097015,
      "loss": 2.6911,
      "step": 115657
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1069095134735107,
      "learning_rate": 0.0002981326704265985,
      "loss": 3.2037,
      "step": 115658
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.8224411010742188,
      "learning_rate": 0.00029812857994384267,
      "loss": 2.8547,
      "step": 115659
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8244050741195679,
      "learning_rate": 0.00029812448946143463,
      "loss": 3.1979,
      "step": 115660
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.506885290145874,
      "learning_rate": 0.0002981203989793754,
      "loss": 2.8748,
      "step": 115661
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3322460651397705,
      "learning_rate": 0.0002981163084976655,
      "loss": 3.1226,
      "step": 115662
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0457139015197754,
      "learning_rate": 0.0002981122180163059,
      "loss": 2.9722,
      "step": 115663
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.7283971309661865,
      "learning_rate": 0.00029810812753529726,
      "loss": 2.7989,
      "step": 115664
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.876220703125,
      "learning_rate": 0.0002981040370546405,
      "loss": 2.9341,
      "step": 115665
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.77373206615448,
      "learning_rate": 0.0002980999465743361,
      "loss": 3.0454,
      "step": 115666
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.782506227493286,
      "learning_rate": 0.00029809585609438485,
      "loss": 2.9922,
      "step": 115667
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.626145839691162,
      "learning_rate": 0.0002980917656147877,
      "loss": 2.7593,
      "step": 115668
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9261095523834229,
      "learning_rate": 0.0002980876751355454,
      "loss": 2.9159,
      "step": 115669
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.139638900756836,
      "learning_rate": 0.0002980835846566585,
      "loss": 3.0948,
      "step": 115670
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.928213119506836,
      "learning_rate": 0.00029807949417812814,
      "loss": 3.025,
      "step": 115671
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9566844701766968,
      "learning_rate": 0.00029807540369995467,
      "loss": 2.7797,
      "step": 115672
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6804121732711792,
      "learning_rate": 0.000298071313222139,
      "loss": 2.8088,
      "step": 115673
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.158599376678467,
      "learning_rate": 0.0002980672227446819,
      "loss": 3.0721,
      "step": 115674
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9528188705444336,
      "learning_rate": 0.00029806313226758414,
      "loss": 3.2323,
      "step": 115675
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1288440227508545,
      "learning_rate": 0.0002980590417908465,
      "loss": 2.9011,
      "step": 115676
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.690237045288086,
      "learning_rate": 0.00029805495131446987,
      "loss": 3.0352,
      "step": 115677
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5650699138641357,
      "learning_rate": 0.0002980508608384547,
      "loss": 2.8252,
      "step": 115678
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9217437505722046,
      "learning_rate": 0.0002980467703628019,
      "loss": 2.9681,
      "step": 115679
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9426233768463135,
      "learning_rate": 0.0002980426798875123,
      "loss": 3.0514,
      "step": 115680
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7510693073272705,
      "learning_rate": 0.00029803858941258654,
      "loss": 2.6637,
      "step": 115681
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.288200855255127,
      "learning_rate": 0.00029803449893802547,
      "loss": 2.7581,
      "step": 115682
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8441499471664429,
      "learning_rate": 0.00029803040846383,
      "loss": 3.175,
      "step": 115683
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9639943838119507,
      "learning_rate": 0.0002980263179900005,
      "loss": 3.0469,
      "step": 115684
    },
    {
      "epoch": 1.51,
      "grad_norm": 4.238924026489258,
      "learning_rate": 0.000298022227516538,
      "loss": 3.0833,
      "step": 115685
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.448909282684326,
      "learning_rate": 0.00029801813704344315,
      "loss": 2.9912,
      "step": 115686
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8447937965393066,
      "learning_rate": 0.00029801404657071683,
      "loss": 2.9607,
      "step": 115687
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.5305533409118652,
      "learning_rate": 0.00029800995609835973,
      "loss": 2.9873,
      "step": 115688
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9590065479278564,
      "learning_rate": 0.0002980058656263727,
      "loss": 3.0084,
      "step": 115689
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0956249237060547,
      "learning_rate": 0.0002980017751547563,
      "loss": 3.0952,
      "step": 115690
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.8191945552825928,
      "learning_rate": 0.0002979976846835115,
      "loss": 3.0593,
      "step": 115691
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.512475609779358,
      "learning_rate": 0.0002979935942126388,
      "loss": 3.0741,
      "step": 115692
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8595974445343018,
      "learning_rate": 0.0002979895037421393,
      "loss": 2.8449,
      "step": 115693
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8981952667236328,
      "learning_rate": 0.00029798541327201347,
      "loss": 2.876,
      "step": 115694
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.812915563583374,
      "learning_rate": 0.00029798132280226236,
      "loss": 2.8879,
      "step": 115695
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8159675598144531,
      "learning_rate": 0.00029797723233288643,
      "loss": 3.0122,
      "step": 115696
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.204299211502075,
      "learning_rate": 0.00029797314186388657,
      "loss": 3.1784,
      "step": 115697
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8428293466567993,
      "learning_rate": 0.0002979690513952635,
      "loss": 2.9348,
      "step": 115698
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6809413433074951,
      "learning_rate": 0.0002979649609270181,
      "loss": 3.2057,
      "step": 115699
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8466826677322388,
      "learning_rate": 0.00029796087045915105,
      "loss": 3.0897,
      "step": 115700
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8402966260910034,
      "learning_rate": 0.00029795677999166315,
      "loss": 3.092,
      "step": 115701
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.876522183418274,
      "learning_rate": 0.00029795268952455506,
      "loss": 2.8025,
      "step": 115702
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1014723777770996,
      "learning_rate": 0.0002979485990578276,
      "loss": 2.7004,
      "step": 115703
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.8645896911621094,
      "learning_rate": 0.0002979445085914815,
      "loss": 2.8554,
      "step": 115704
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.828233003616333,
      "learning_rate": 0.0002979404181255176,
      "loss": 2.9736,
      "step": 115705
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9782421588897705,
      "learning_rate": 0.00029793632765993657,
      "loss": 2.9791,
      "step": 115706
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.878406047821045,
      "learning_rate": 0.00029793223719473927,
      "loss": 3.2353,
      "step": 115707
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.427122116088867,
      "learning_rate": 0.00029792814672992653,
      "loss": 2.9466,
      "step": 115708
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9974629878997803,
      "learning_rate": 0.0002979240562654988,
      "loss": 2.903,
      "step": 115709
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7700849771499634,
      "learning_rate": 0.0002979199658014571,
      "loss": 2.8505,
      "step": 115710
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.130378484725952,
      "learning_rate": 0.0002979158753378021,
      "loss": 2.9822,
      "step": 115711
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.7775392532348633,
      "learning_rate": 0.0002979117848745345,
      "loss": 3.1204,
      "step": 115712
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9611390829086304,
      "learning_rate": 0.0002979076944116552,
      "loss": 3.1584,
      "step": 115713
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.953840970993042,
      "learning_rate": 0.00029790360394916506,
      "loss": 3.0376,
      "step": 115714
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9587581157684326,
      "learning_rate": 0.0002978995134870645,
      "loss": 3.0839,
      "step": 115715
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.251880407333374,
      "learning_rate": 0.0002978954230253545,
      "loss": 3.0173,
      "step": 115716
    },
    {
      "epoch": 1.51,
      "grad_norm": 4.608899116516113,
      "learning_rate": 0.0002978913325640357,
      "loss": 2.7725,
      "step": 115717
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9112101793289185,
      "learning_rate": 0.00029788724210310903,
      "loss": 3.1059,
      "step": 115718
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.159780979156494,
      "learning_rate": 0.0002978831516425751,
      "loss": 2.9326,
      "step": 115719
    },
    {
      "epoch": 1.51,
      "grad_norm": 4.203086853027344,
      "learning_rate": 0.00029787906118243485,
      "loss": 2.943,
      "step": 115720
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.476130247116089,
      "learning_rate": 0.0002978749707226889,
      "loss": 3.2595,
      "step": 115721
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.165545701980591,
      "learning_rate": 0.00029787088026333796,
      "loss": 3.1114,
      "step": 115722
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0934736728668213,
      "learning_rate": 0.0002978667898043829,
      "loss": 3.0188,
      "step": 115723
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.193298578262329,
      "learning_rate": 0.0002978626993458244,
      "loss": 2.9563,
      "step": 115724
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0795652866363525,
      "learning_rate": 0.0002978586088876633,
      "loss": 3.0305,
      "step": 115725
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1626482009887695,
      "learning_rate": 0.00029785451842990037,
      "loss": 3.0284,
      "step": 115726
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.375467538833618,
      "learning_rate": 0.00029785042797253626,
      "loss": 3.101,
      "step": 115727
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.111260175704956,
      "learning_rate": 0.00029784633751557185,
      "loss": 2.7945,
      "step": 115728
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5697200298309326,
      "learning_rate": 0.0002978422470590078,
      "loss": 2.8321,
      "step": 115729
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.084897994995117,
      "learning_rate": 0.00029783815660284487,
      "loss": 2.8323,
      "step": 115730
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.243896245956421,
      "learning_rate": 0.0002978340661470839,
      "loss": 2.9331,
      "step": 115731
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.2834324836730957,
      "learning_rate": 0.0002978299756917257,
      "loss": 3.1734,
      "step": 115732
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7280367612838745,
      "learning_rate": 0.0002978258852367709,
      "loss": 2.8677,
      "step": 115733
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7735062837600708,
      "learning_rate": 0.00029782179478222027,
      "loss": 3.1243,
      "step": 115734
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.3529703617095947,
      "learning_rate": 0.0002978177043280747,
      "loss": 2.7755,
      "step": 115735
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.068347215652466,
      "learning_rate": 0.0002978136138743348,
      "loss": 2.9952,
      "step": 115736
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7834651470184326,
      "learning_rate": 0.0002978095234210014,
      "loss": 3.0096,
      "step": 115737
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.120419502258301,
      "learning_rate": 0.00029780543296807524,
      "loss": 3.0954,
      "step": 115738
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.787019968032837,
      "learning_rate": 0.0002978013425155571,
      "loss": 2.9545,
      "step": 115739
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.8187127113342285,
      "learning_rate": 0.0002977972520634477,
      "loss": 2.9457,
      "step": 115740
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.666060447692871,
      "learning_rate": 0.00029779316161174783,
      "loss": 3.2748,
      "step": 115741
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2157211303710938,
      "learning_rate": 0.00029778907116045824,
      "loss": 3.0462,
      "step": 115742
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.341010570526123,
      "learning_rate": 0.0002977849807095798,
      "loss": 2.8595,
      "step": 115743
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.5863851308822632,
      "learning_rate": 0.00029778089025911316,
      "loss": 2.92,
      "step": 115744
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8317680358886719,
      "learning_rate": 0.000297776799809059,
      "loss": 3.0898,
      "step": 115745
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.276934862136841,
      "learning_rate": 0.00029777270935941823,
      "loss": 2.9173,
      "step": 115746
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7792311906814575,
      "learning_rate": 0.00029776861891019156,
      "loss": 3.0155,
      "step": 115747
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0508711338043213,
      "learning_rate": 0.0002977645284613797,
      "loss": 2.9168,
      "step": 115748
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8101320266723633,
      "learning_rate": 0.00029776043801298346,
      "loss": 2.7747,
      "step": 115749
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.7510344982147217,
      "learning_rate": 0.0002977563475650038,
      "loss": 3.0841,
      "step": 115750
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.99842369556427,
      "learning_rate": 0.00029775225711744103,
      "loss": 3.0727,
      "step": 115751
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.622142791748047,
      "learning_rate": 0.00029774816667029624,
      "loss": 3.045,
      "step": 115752
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.076768636703491,
      "learning_rate": 0.00029774407622357006,
      "loss": 2.8037,
      "step": 115753
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9482485055923462,
      "learning_rate": 0.00029773998577726336,
      "loss": 2.9514,
      "step": 115754
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.9110641479492188,
      "learning_rate": 0.0002977358953313768,
      "loss": 2.9699,
      "step": 115755
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3616864681243896,
      "learning_rate": 0.0002977318048859113,
      "loss": 2.9516,
      "step": 115756
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9133247137069702,
      "learning_rate": 0.0002977277144408674,
      "loss": 3.2054,
      "step": 115757
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8508986234664917,
      "learning_rate": 0.00029772362399624596,
      "loss": 2.8258,
      "step": 115758
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7642561197280884,
      "learning_rate": 0.0002977195335520477,
      "loss": 2.8534,
      "step": 115759
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8738571405410767,
      "learning_rate": 0.0002977154431082735,
      "loss": 3.2023,
      "step": 115760
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9040254354476929,
      "learning_rate": 0.00029771135266492394,
      "loss": 3.147,
      "step": 115761
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1148130893707275,
      "learning_rate": 0.0002977072622220001,
      "loss": 2.9472,
      "step": 115762
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8258883953094482,
      "learning_rate": 0.0002977031717795023,
      "loss": 3.1723,
      "step": 115763
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.88918936252594,
      "learning_rate": 0.0002976990813374316,
      "loss": 3.2951,
      "step": 115764
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9653147459030151,
      "learning_rate": 0.0002976949908957887,
      "loss": 2.7504,
      "step": 115765
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.19075608253479,
      "learning_rate": 0.00029769090045457427,
      "loss": 2.8661,
      "step": 115766
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7935614585876465,
      "learning_rate": 0.0002976868100137892,
      "loss": 2.9924,
      "step": 115767
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0382840633392334,
      "learning_rate": 0.0002976827195734343,
      "loss": 3.2693,
      "step": 115768
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.597739338874817,
      "learning_rate": 0.0002976786291335101,
      "loss": 3.3329,
      "step": 115769
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2552614212036133,
      "learning_rate": 0.0002976745386940175,
      "loss": 3.0093,
      "step": 115770
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.378983974456787,
      "learning_rate": 0.00029767044825495723,
      "loss": 2.9715,
      "step": 115771
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8576128482818604,
      "learning_rate": 0.0002976663578163301,
      "loss": 2.9974,
      "step": 115772
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9211151599884033,
      "learning_rate": 0.0002976622673781368,
      "loss": 3.0099,
      "step": 115773
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0764975547790527,
      "learning_rate": 0.0002976581769403781,
      "loss": 2.9481,
      "step": 115774
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1462972164154053,
      "learning_rate": 0.000297654086503055,
      "loss": 2.8333,
      "step": 115775
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.010265350341797,
      "learning_rate": 0.0002976499960661679,
      "loss": 2.8229,
      "step": 115776
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.050783395767212,
      "learning_rate": 0.0002976459056297177,
      "loss": 2.6876,
      "step": 115777
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0080251693725586,
      "learning_rate": 0.00029764181519370515,
      "loss": 2.8985,
      "step": 115778
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8833144903182983,
      "learning_rate": 0.00029763772475813107,
      "loss": 3.071,
      "step": 115779
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.946804404258728,
      "learning_rate": 0.0002976336343229961,
      "loss": 3.0466,
      "step": 115780
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.835000991821289,
      "learning_rate": 0.0002976295438883013,
      "loss": 3.2123,
      "step": 115781
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9266836643218994,
      "learning_rate": 0.00029762545345404703,
      "loss": 2.9695,
      "step": 115782
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8314719200134277,
      "learning_rate": 0.0002976213630202343,
      "loss": 3.0181,
      "step": 115783
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8235900402069092,
      "learning_rate": 0.00029761727258686375,
      "loss": 3.0443,
      "step": 115784
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0767643451690674,
      "learning_rate": 0.0002976131821539362,
      "loss": 3.1128,
      "step": 115785
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9728856086730957,
      "learning_rate": 0.0002976090917214524,
      "loss": 3.0963,
      "step": 115786
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0338962078094482,
      "learning_rate": 0.00029760500128941323,
      "loss": 2.929,
      "step": 115787
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3830883502960205,
      "learning_rate": 0.00029760091085781926,
      "loss": 2.8899,
      "step": 115788
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7791054248809814,
      "learning_rate": 0.0002975968204266713,
      "loss": 3.1264,
      "step": 115789
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4248509407043457,
      "learning_rate": 0.0002975927299959701,
      "loss": 3.032,
      "step": 115790
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7971493005752563,
      "learning_rate": 0.00029758863956571647,
      "loss": 2.9008,
      "step": 115791
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1757972240448,
      "learning_rate": 0.00029758454913591116,
      "loss": 2.7412,
      "step": 115792
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8242841958999634,
      "learning_rate": 0.0002975804587065551,
      "loss": 2.7343,
      "step": 115793
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.728212594985962,
      "learning_rate": 0.00029757636827764875,
      "loss": 2.928,
      "step": 115794
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1699962615966797,
      "learning_rate": 0.00029757227784919294,
      "loss": 3.0547,
      "step": 115795
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.104954957962036,
      "learning_rate": 0.00029756818742118855,
      "loss": 2.9521,
      "step": 115796
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8317779302597046,
      "learning_rate": 0.0002975640969936362,
      "loss": 2.8971,
      "step": 115797
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4107484817504883,
      "learning_rate": 0.0002975600065665368,
      "loss": 2.9343,
      "step": 115798
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.283933639526367,
      "learning_rate": 0.0002975559161398912,
      "loss": 3.1054,
      "step": 115799
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.732587456703186,
      "learning_rate": 0.0002975518257136998,
      "loss": 3.0205,
      "step": 115800
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9118635654449463,
      "learning_rate": 0.00029754773528796355,
      "loss": 3.0171,
      "step": 115801
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.082226037979126,
      "learning_rate": 0.00029754364486268327,
      "loss": 2.936,
      "step": 115802
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.208127021789551,
      "learning_rate": 0.00029753955443785965,
      "loss": 3.1239,
      "step": 115803
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8074527978897095,
      "learning_rate": 0.00029753546401349346,
      "loss": 3.0341,
      "step": 115804
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8677536249160767,
      "learning_rate": 0.0002975313735895856,
      "loss": 3.0319,
      "step": 115805
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0157690048217773,
      "learning_rate": 0.00029752728316613666,
      "loss": 3.0364,
      "step": 115806
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.295222282409668,
      "learning_rate": 0.00029752319274314735,
      "loss": 3.1763,
      "step": 115807
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7837306261062622,
      "learning_rate": 0.00029751910232061855,
      "loss": 3.2267,
      "step": 115808
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0490620136260986,
      "learning_rate": 0.00029751501189855103,
      "loss": 3.0109,
      "step": 115809
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1579954624176025,
      "learning_rate": 0.00029751092147694544,
      "loss": 2.9227,
      "step": 115810
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2076845169067383,
      "learning_rate": 0.00029750683105580274,
      "loss": 3.1502,
      "step": 115811
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0848569869995117,
      "learning_rate": 0.00029750274063512354,
      "loss": 2.9204,
      "step": 115812
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.760774850845337,
      "learning_rate": 0.0002974986502149086,
      "loss": 3.1694,
      "step": 115813
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6216673851013184,
      "learning_rate": 0.0002974945597951587,
      "loss": 3.0909,
      "step": 115814
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8261630535125732,
      "learning_rate": 0.0002974904693758746,
      "loss": 2.904,
      "step": 115815
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.699402093887329,
      "learning_rate": 0.0002974863789570571,
      "loss": 2.8114,
      "step": 115816
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.9225335121154785,
      "learning_rate": 0.00029748228853870693,
      "loss": 3.1518,
      "step": 115817
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.262200355529785,
      "learning_rate": 0.0002974781981208248,
      "loss": 2.9226,
      "step": 115818
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8361610174179077,
      "learning_rate": 0.00029747410770341156,
      "loss": 3.0216,
      "step": 115819
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1034724712371826,
      "learning_rate": 0.000297470017286468,
      "loss": 3.0719,
      "step": 115820
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6100375652313232,
      "learning_rate": 0.0002974659268699947,
      "loss": 3.1222,
      "step": 115821
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0574285984039307,
      "learning_rate": 0.00029746183645399255,
      "loss": 3.0537,
      "step": 115822
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.770904302597046,
      "learning_rate": 0.0002974577460384624,
      "loss": 2.8218,
      "step": 115823
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9461308717727661,
      "learning_rate": 0.0002974536556234048,
      "loss": 3.0506,
      "step": 115824
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0987305641174316,
      "learning_rate": 0.0002974495652088206,
      "loss": 3.1659,
      "step": 115825
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8034868240356445,
      "learning_rate": 0.00029744547479471057,
      "loss": 2.7157,
      "step": 115826
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.040694236755371,
      "learning_rate": 0.00029744138438107554,
      "loss": 2.637,
      "step": 115827
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.534991979598999,
      "learning_rate": 0.0002974372939679162,
      "loss": 2.7389,
      "step": 115828
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.867604374885559,
      "learning_rate": 0.0002974332035552333,
      "loss": 3.1583,
      "step": 115829
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9503233432769775,
      "learning_rate": 0.0002974291131430276,
      "loss": 3.0088,
      "step": 115830
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7559080123901367,
      "learning_rate": 0.00029742502273129984,
      "loss": 2.7785,
      "step": 115831
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.590610980987549,
      "learning_rate": 0.00029742093232005087,
      "loss": 3.1969,
      "step": 115832
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3312790393829346,
      "learning_rate": 0.00029741684190928133,
      "loss": 2.996,
      "step": 115833
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3813674449920654,
      "learning_rate": 0.00029741275149899215,
      "loss": 2.9769,
      "step": 115834
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.887174367904663,
      "learning_rate": 0.00029740866108918403,
      "loss": 3.0724,
      "step": 115835
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9394603967666626,
      "learning_rate": 0.00029740457067985756,
      "loss": 3.1507,
      "step": 115836
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.935453176498413,
      "learning_rate": 0.0002974004802710137,
      "loss": 3.0128,
      "step": 115837
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9313554763793945,
      "learning_rate": 0.000297396389862653,
      "loss": 3.0373,
      "step": 115838
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5215888023376465,
      "learning_rate": 0.00029739229945477646,
      "loss": 3.0487,
      "step": 115839
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.8622264862060547,
      "learning_rate": 0.00029738820904738476,
      "loss": 3.0272,
      "step": 115840
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.810532569885254,
      "learning_rate": 0.0002973841186404786,
      "loss": 3.0288,
      "step": 115841
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7453991174697876,
      "learning_rate": 0.00029738002823405895,
      "loss": 3.1972,
      "step": 115842
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5127413272857666,
      "learning_rate": 0.0002973759378281262,
      "loss": 3.0007,
      "step": 115843
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.250703811645508,
      "learning_rate": 0.0002973718474226814,
      "loss": 2.9182,
      "step": 115844
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1685757637023926,
      "learning_rate": 0.0002973677570177252,
      "loss": 3.1017,
      "step": 115845
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.460127592086792,
      "learning_rate": 0.00029736366661325833,
      "loss": 2.8821,
      "step": 115846
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.140871047973633,
      "learning_rate": 0.00029735957620928165,
      "loss": 2.8412,
      "step": 115847
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1496307849884033,
      "learning_rate": 0.000297355485805796,
      "loss": 3.1652,
      "step": 115848
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0517256259918213,
      "learning_rate": 0.00029735139540280187,
      "loss": 3.0437,
      "step": 115849
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8722648620605469,
      "learning_rate": 0.0002973473050003002,
      "loss": 3.023,
      "step": 115850
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7587591409683228,
      "learning_rate": 0.0002973432145982917,
      "loss": 2.9646,
      "step": 115851
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7771817445755005,
      "learning_rate": 0.00029733912419677714,
      "loss": 2.9081,
      "step": 115852
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6931674480438232,
      "learning_rate": 0.00029733503379575723,
      "loss": 2.6936,
      "step": 115853
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1597888469696045,
      "learning_rate": 0.00029733094339523303,
      "loss": 2.946,
      "step": 115854
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.640867829322815,
      "learning_rate": 0.00029732685299520484,
      "loss": 2.9902,
      "step": 115855
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8367323875427246,
      "learning_rate": 0.0002973227625956737,
      "loss": 2.9487,
      "step": 115856
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.5892759561538696,
      "learning_rate": 0.00029731867219664023,
      "loss": 2.8468,
      "step": 115857
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7394015789031982,
      "learning_rate": 0.00029731458179810537,
      "loss": 3.1263,
      "step": 115858
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.200129985809326,
      "learning_rate": 0.0002973104914000697,
      "loss": 3.0286,
      "step": 115859
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.935142993927002,
      "learning_rate": 0.00029730640100253424,
      "loss": 3.0043,
      "step": 115860
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9978004693984985,
      "learning_rate": 0.00029730231060549943,
      "loss": 2.9146,
      "step": 115861
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.96860671043396,
      "learning_rate": 0.0002972982202089661,
      "loss": 2.8343,
      "step": 115862
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.794319748878479,
      "learning_rate": 0.00029729412981293517,
      "loss": 3.1833,
      "step": 115863
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7855628728866577,
      "learning_rate": 0.0002972900394174073,
      "loss": 3.09,
      "step": 115864
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9327646493911743,
      "learning_rate": 0.00029728594902238325,
      "loss": 3.0453,
      "step": 115865
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.043989896774292,
      "learning_rate": 0.00029728185862786395,
      "loss": 3.0457,
      "step": 115866
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6690726280212402,
      "learning_rate": 0.0002972777682338498,
      "loss": 2.9167,
      "step": 115867
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8835469484329224,
      "learning_rate": 0.0002972736778403418,
      "loss": 3.0212,
      "step": 115868
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7937209606170654,
      "learning_rate": 0.00029726958744734073,
      "loss": 3.2816,
      "step": 115869
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.872832179069519,
      "learning_rate": 0.0002972654970548472,
      "loss": 2.9782,
      "step": 115870
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8125404119491577,
      "learning_rate": 0.00029726140666286213,
      "loss": 2.979,
      "step": 115871
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9828556776046753,
      "learning_rate": 0.00029725731627138635,
      "loss": 3.0459,
      "step": 115872
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.384280204772949,
      "learning_rate": 0.0002972532258804203,
      "loss": 2.9312,
      "step": 115873
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0270228385925293,
      "learning_rate": 0.000297249135489965,
      "loss": 3.076,
      "step": 115874
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9593045711517334,
      "learning_rate": 0.0002972450451000211,
      "loss": 3.0374,
      "step": 115875
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5494182109832764,
      "learning_rate": 0.0002972409547105894,
      "loss": 2.9332,
      "step": 115876
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.7387874126434326,
      "learning_rate": 0.00029723686432167064,
      "loss": 2.909,
      "step": 115877
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7812817096710205,
      "learning_rate": 0.00029723277393326576,
      "loss": 2.8552,
      "step": 115878
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1347904205322266,
      "learning_rate": 0.0002972286835453752,
      "loss": 3.0801,
      "step": 115879
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.3456313610076904,
      "learning_rate": 0.00029722459315799994,
      "loss": 2.7957,
      "step": 115880
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.2172393798828125,
      "learning_rate": 0.0002972205027711406,
      "loss": 3.118,
      "step": 115881
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8187472820281982,
      "learning_rate": 0.00029721641238479807,
      "loss": 2.9795,
      "step": 115882
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.693584442138672,
      "learning_rate": 0.00029721232199897304,
      "loss": 2.8385,
      "step": 115883
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.170825481414795,
      "learning_rate": 0.00029720823161366643,
      "loss": 2.8623,
      "step": 115884
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.9872632026672363,
      "learning_rate": 0.00029720414122887877,
      "loss": 2.8439,
      "step": 115885
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.077277898788452,
      "learning_rate": 0.00029720005084461087,
      "loss": 3.1126,
      "step": 115886
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.273310422897339,
      "learning_rate": 0.0002971959604608635,
      "loss": 3.111,
      "step": 115887
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.647855758666992,
      "learning_rate": 0.0002971918700776375,
      "loss": 3.1502,
      "step": 115888
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.364802598953247,
      "learning_rate": 0.0002971877796949336,
      "loss": 3.0614,
      "step": 115889
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1327321529388428,
      "learning_rate": 0.0002971836893127527,
      "loss": 2.9711,
      "step": 115890
    },
    {
      "epoch": 1.51,
      "grad_norm": 4.004946231842041,
      "learning_rate": 0.0002971795989310952,
      "loss": 2.8857,
      "step": 115891
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.6645405292510986,
      "learning_rate": 0.00029717550854996207,
      "loss": 3.0342,
      "step": 115892
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.6490957736968994,
      "learning_rate": 0.0002971714181693541,
      "loss": 2.8398,
      "step": 115893
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.94460928440094,
      "learning_rate": 0.000297167327789272,
      "loss": 2.9495,
      "step": 115894
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.8321213722229004,
      "learning_rate": 0.00029716323740971653,
      "loss": 2.9666,
      "step": 115895
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.341080665588379,
      "learning_rate": 0.0002971591470306886,
      "loss": 2.8472,
      "step": 115896
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9900115728378296,
      "learning_rate": 0.0002971550566521888,
      "loss": 2.9066,
      "step": 115897
    },
    {
      "epoch": 1.51,
      "grad_norm": 5.309719562530518,
      "learning_rate": 0.00029715096627421785,
      "loss": 2.8842,
      "step": 115898
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3314664363861084,
      "learning_rate": 0.0002971468758967766,
      "loss": 3.2913,
      "step": 115899
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0617358684539795,
      "learning_rate": 0.0002971427855198658,
      "loss": 2.9874,
      "step": 115900
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5825610160827637,
      "learning_rate": 0.0002971386951434862,
      "loss": 2.9858,
      "step": 115901
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.721128225326538,
      "learning_rate": 0.00029713460476763864,
      "loss": 2.8735,
      "step": 115902
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9403785467147827,
      "learning_rate": 0.00029713051439232376,
      "loss": 3.062,
      "step": 115903
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7099982500076294,
      "learning_rate": 0.00029712642401754236,
      "loss": 2.7661,
      "step": 115904
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7881487607955933,
      "learning_rate": 0.0002971223336432953,
      "loss": 2.951,
      "step": 115905
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8188509941101074,
      "learning_rate": 0.00029711824326958316,
      "loss": 3.1748,
      "step": 115906
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9540321826934814,
      "learning_rate": 0.0002971141528964068,
      "loss": 2.9466,
      "step": 115907
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.205608367919922,
      "learning_rate": 0.000297110062523767,
      "loss": 2.9923,
      "step": 115908
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.032740831375122,
      "learning_rate": 0.00029710597215166454,
      "loss": 2.8662,
      "step": 115909
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.608879327774048,
      "learning_rate": 0.00029710188178010003,
      "loss": 3.0259,
      "step": 115910
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.149909019470215,
      "learning_rate": 0.00029709779140907436,
      "loss": 3.0782,
      "step": 115911
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9670374393463135,
      "learning_rate": 0.00029709370103858836,
      "loss": 2.9146,
      "step": 115912
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1796164512634277,
      "learning_rate": 0.0002970896106686426,
      "loss": 2.916,
      "step": 115913
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.834168553352356,
      "learning_rate": 0.00029708552029923793,
      "loss": 3.0096,
      "step": 115914
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8141742944717407,
      "learning_rate": 0.0002970814299303752,
      "loss": 2.9884,
      "step": 115915
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.7643344402313232,
      "learning_rate": 0.00029707733956205504,
      "loss": 3.1693,
      "step": 115916
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8167486190795898,
      "learning_rate": 0.00029707324919427824,
      "loss": 3.0116,
      "step": 115917
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8975651264190674,
      "learning_rate": 0.00029706915882704555,
      "loss": 2.8527,
      "step": 115918
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0024757385253906,
      "learning_rate": 0.00029706506846035783,
      "loss": 3.0909,
      "step": 115919
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7765406370162964,
      "learning_rate": 0.00029706097809421573,
      "loss": 3.0098,
      "step": 115920
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8156695365905762,
      "learning_rate": 0.00029705688772862013,
      "loss": 3.3233,
      "step": 115921
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0231893062591553,
      "learning_rate": 0.0002970527973635716,
      "loss": 3.1225,
      "step": 115922
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9060990810394287,
      "learning_rate": 0.00029704870699907106,
      "loss": 2.5757,
      "step": 115923
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.457660675048828,
      "learning_rate": 0.0002970446166351192,
      "loss": 3.109,
      "step": 115924
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.853980541229248,
      "learning_rate": 0.0002970405262717168,
      "loss": 2.7894,
      "step": 115925
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.792039155960083,
      "learning_rate": 0.0002970364359088647,
      "loss": 3.2222,
      "step": 115926
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4607341289520264,
      "learning_rate": 0.00029703234554656356,
      "loss": 3.0498,
      "step": 115927
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4055535793304443,
      "learning_rate": 0.00029702825518481405,
      "loss": 2.8885,
      "step": 115928
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5340182781219482,
      "learning_rate": 0.0002970241648236171,
      "loss": 2.9293,
      "step": 115929
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2076425552368164,
      "learning_rate": 0.00029702007446297344,
      "loss": 2.9298,
      "step": 115930
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.808160662651062,
      "learning_rate": 0.00029701598410288374,
      "loss": 3.0849,
      "step": 115931
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.732678174972534,
      "learning_rate": 0.0002970118937433489,
      "loss": 2.9647,
      "step": 115932
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9584791660308838,
      "learning_rate": 0.0002970078033843697,
      "loss": 3.0202,
      "step": 115933
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3936948776245117,
      "learning_rate": 0.00029700371302594666,
      "loss": 3.0301,
      "step": 115934
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3387484550476074,
      "learning_rate": 0.0002969996226680807,
      "loss": 2.8233,
      "step": 115935
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9725773334503174,
      "learning_rate": 0.00029699553231077254,
      "loss": 3.0007,
      "step": 115936
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.844572901725769,
      "learning_rate": 0.000296991441954023,
      "loss": 2.9386,
      "step": 115937
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.575934410095215,
      "learning_rate": 0.0002969873515978328,
      "loss": 3.2001,
      "step": 115938
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7254865169525146,
      "learning_rate": 0.00029698326124220285,
      "loss": 3.087,
      "step": 115939
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2496397495269775,
      "learning_rate": 0.0002969791708871336,
      "loss": 2.6218,
      "step": 115940
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.9372611045837402,
      "learning_rate": 0.00029697508053262603,
      "loss": 2.9554,
      "step": 115941
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9901599884033203,
      "learning_rate": 0.0002969709901786808,
      "loss": 2.8445,
      "step": 115942
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1865923404693604,
      "learning_rate": 0.00029696689982529876,
      "loss": 2.9125,
      "step": 115943
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.075320243835449,
      "learning_rate": 0.0002969628094724806,
      "loss": 3.1161,
      "step": 115944
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.276395082473755,
      "learning_rate": 0.0002969587191202273,
      "loss": 3.1488,
      "step": 115945
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8072625398635864,
      "learning_rate": 0.00029695462876853923,
      "loss": 2.9037,
      "step": 115946
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7783372402191162,
      "learning_rate": 0.00029695053841741735,
      "loss": 3.0388,
      "step": 115947
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0465025901794434,
      "learning_rate": 0.00029694644806686245,
      "loss": 3.4699,
      "step": 115948
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.1611123085021973,
      "learning_rate": 0.0002969423577168753,
      "loss": 3.1964,
      "step": 115949
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.951867938041687,
      "learning_rate": 0.0002969382673674566,
      "loss": 2.7829,
      "step": 115950
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7816567420959473,
      "learning_rate": 0.00029693417701860724,
      "loss": 3.1199,
      "step": 115951
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9320580959320068,
      "learning_rate": 0.00029693008667032775,
      "loss": 2.8796,
      "step": 115952
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9346070289611816,
      "learning_rate": 0.00029692599632261905,
      "loss": 2.9057,
      "step": 115953
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7100183963775635,
      "learning_rate": 0.0002969219059754818,
      "loss": 3.3337,
      "step": 115954
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.028898239135742,
      "learning_rate": 0.00029691781562891684,
      "loss": 3.0183,
      "step": 115955
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1403722763061523,
      "learning_rate": 0.00029691372528292497,
      "loss": 2.8955,
      "step": 115956
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8469922542572021,
      "learning_rate": 0.000296909634937507,
      "loss": 2.8475,
      "step": 115957
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6934716701507568,
      "learning_rate": 0.0002969055445926634,
      "loss": 3.052,
      "step": 115958
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8722995519638062,
      "learning_rate": 0.0002969014542483952,
      "loss": 3.1588,
      "step": 115959
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0401179790496826,
      "learning_rate": 0.00029689736390470304,
      "loss": 2.898,
      "step": 115960
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6514025926589966,
      "learning_rate": 0.0002968932735615878,
      "loss": 2.9124,
      "step": 115961
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7713240385055542,
      "learning_rate": 0.00029688918321905006,
      "loss": 2.9261,
      "step": 115962
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3452725410461426,
      "learning_rate": 0.0002968850928770908,
      "loss": 2.9192,
      "step": 115963
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.111851215362549,
      "learning_rate": 0.0002968810025357106,
      "loss": 2.7547,
      "step": 115964
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.0594375133514404,
      "learning_rate": 0.0002968769121949102,
      "loss": 2.9588,
      "step": 115965
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6288418769836426,
      "learning_rate": 0.00029687282185469053,
      "loss": 3.1194,
      "step": 115966
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.945383906364441,
      "learning_rate": 0.0002968687315150522,
      "loss": 3.1166,
      "step": 115967
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8044414520263672,
      "learning_rate": 0.00029686464117599607,
      "loss": 3.0445,
      "step": 115968
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7840701341629028,
      "learning_rate": 0.00029686055083752287,
      "loss": 3.1778,
      "step": 115969
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.534761905670166,
      "learning_rate": 0.0002968564604996334,
      "loss": 2.9417,
      "step": 115970
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7307623624801636,
      "learning_rate": 0.0002968523701623283,
      "loss": 2.8131,
      "step": 115971
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0721611976623535,
      "learning_rate": 0.00029684827982560843,
      "loss": 2.9881,
      "step": 115972
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.91990065574646,
      "learning_rate": 0.0002968441894894745,
      "loss": 3.0979,
      "step": 115973
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8874902725219727,
      "learning_rate": 0.0002968400991539273,
      "loss": 2.784,
      "step": 115974
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9289324283599854,
      "learning_rate": 0.00029683600881896756,
      "loss": 2.878,
      "step": 115975
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7804019451141357,
      "learning_rate": 0.0002968319184845962,
      "loss": 3.1065,
      "step": 115976
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.811204433441162,
      "learning_rate": 0.00029682782815081376,
      "loss": 3.2082,
      "step": 115977
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0236425399780273,
      "learning_rate": 0.00029682373781762106,
      "loss": 3.009,
      "step": 115978
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.009307384490967,
      "learning_rate": 0.00029681964748501887,
      "loss": 2.9793,
      "step": 115979
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0001096725463867,
      "learning_rate": 0.000296815557153008,
      "loss": 2.8418,
      "step": 115980
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2679903507232666,
      "learning_rate": 0.00029681146682158916,
      "loss": 2.9604,
      "step": 115981
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9059611558914185,
      "learning_rate": 0.0002968073764907632,
      "loss": 2.8437,
      "step": 115982
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8164809942245483,
      "learning_rate": 0.00029680328616053076,
      "loss": 2.8668,
      "step": 115983
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.699877381324768,
      "learning_rate": 0.0002967991958308926,
      "loss": 2.9117,
      "step": 115984
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.161708354949951,
      "learning_rate": 0.0002967951055018496,
      "loss": 3.0685,
      "step": 115985
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.190711736679077,
      "learning_rate": 0.0002967910151734024,
      "loss": 3.171,
      "step": 115986
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.974122405052185,
      "learning_rate": 0.0002967869248455518,
      "loss": 3.0146,
      "step": 115987
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2537286281585693,
      "learning_rate": 0.0002967828345182987,
      "loss": 3.0325,
      "step": 115988
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8539174795150757,
      "learning_rate": 0.0002967787441916437,
      "loss": 3.0982,
      "step": 115989
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.719421148300171,
      "learning_rate": 0.00029677465386558746,
      "loss": 2.5744,
      "step": 115990
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2546963691711426,
      "learning_rate": 0.0002967705635401309,
      "loss": 3.1622,
      "step": 115991
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.9641706943511963,
      "learning_rate": 0.0002967664732152748,
      "loss": 3.0095,
      "step": 115992
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.493741273880005,
      "learning_rate": 0.0002967623828910199,
      "loss": 2.928,
      "step": 115993
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8415861129760742,
      "learning_rate": 0.0002967582925673669,
      "loss": 3.1679,
      "step": 115994
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4033753871917725,
      "learning_rate": 0.0002967542022443166,
      "loss": 2.7634,
      "step": 115995
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3086256980895996,
      "learning_rate": 0.0002967501119218698,
      "loss": 3.0287,
      "step": 115996
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8889607191085815,
      "learning_rate": 0.0002967460216000271,
      "loss": 2.9643,
      "step": 115997
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0132896900177,
      "learning_rate": 0.00029674193127878943,
      "loss": 3.045,
      "step": 115998
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.5957515239715576,
      "learning_rate": 0.00029673784095815754,
      "loss": 2.7689,
      "step": 115999
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.708287477493286,
      "learning_rate": 0.0002967337506381322,
      "loss": 2.9861,
      "step": 116000
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6913176774978638,
      "learning_rate": 0.00029672966031871396,
      "loss": 3.2064,
      "step": 116001
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7230592966079712,
      "learning_rate": 0.0002967255699999038,
      "loss": 2.789,
      "step": 116002
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.256626605987549,
      "learning_rate": 0.00029672147968170246,
      "loss": 3.0998,
      "step": 116003
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0508177280426025,
      "learning_rate": 0.0002967173893641106,
      "loss": 3.0622,
      "step": 116004
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.674187183380127,
      "learning_rate": 0.00029671329904712906,
      "loss": 3.1704,
      "step": 116005
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8650552034378052,
      "learning_rate": 0.0002967092087307587,
      "loss": 3.1231,
      "step": 116006
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0965735912323,
      "learning_rate": 0.000296705118415,
      "loss": 2.9071,
      "step": 116007
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6737772226333618,
      "learning_rate": 0.0002967010280998539,
      "loss": 2.9652,
      "step": 116008
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7457683086395264,
      "learning_rate": 0.0002966969377853212,
      "loss": 3.2538,
      "step": 116009
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.047431230545044,
      "learning_rate": 0.00029669284747140255,
      "loss": 2.9946,
      "step": 116010
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5687379837036133,
      "learning_rate": 0.00029668875715809884,
      "loss": 2.8769,
      "step": 116011
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9500693082809448,
      "learning_rate": 0.0002966846668454107,
      "loss": 3.0171,
      "step": 116012
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0509839057922363,
      "learning_rate": 0.00029668057653333893,
      "loss": 2.8321,
      "step": 116013
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.868981957435608,
      "learning_rate": 0.0002966764862218843,
      "loss": 3.198,
      "step": 116014
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.603530764579773,
      "learning_rate": 0.0002966723959110476,
      "loss": 3.2779,
      "step": 116015
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.742039680480957,
      "learning_rate": 0.0002966683056008295,
      "loss": 2.9095,
      "step": 116016
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.301161766052246,
      "learning_rate": 0.0002966642152912309,
      "loss": 3.183,
      "step": 116017
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3994853496551514,
      "learning_rate": 0.00029666012498225254,
      "loss": 2.9278,
      "step": 116018
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9561948776245117,
      "learning_rate": 0.00029665603467389506,
      "loss": 2.913,
      "step": 116019
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6793261766433716,
      "learning_rate": 0.0002966519443661592,
      "loss": 3.0884,
      "step": 116020
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.106311082839966,
      "learning_rate": 0.0002966478540590459,
      "loss": 3.0243,
      "step": 116021
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6630691289901733,
      "learning_rate": 0.0002966437637525558,
      "loss": 3.0611,
      "step": 116022
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9921201467514038,
      "learning_rate": 0.0002966396734466897,
      "loss": 2.9099,
      "step": 116023
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8088569641113281,
      "learning_rate": 0.00029663558314144844,
      "loss": 2.9043,
      "step": 116024
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.601329803466797,
      "learning_rate": 0.00029663149283683254,
      "loss": 2.9677,
      "step": 116025
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9269585609436035,
      "learning_rate": 0.000296627402532843,
      "loss": 3.0271,
      "step": 116026
    },
    {
      "epoch": 1.51,
      "grad_norm": 5.958847999572754,
      "learning_rate": 0.0002966233122294804,
      "loss": 3.0661,
      "step": 116027
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9070940017700195,
      "learning_rate": 0.0002966192219267456,
      "loss": 3.1523,
      "step": 116028
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6776067018508911,
      "learning_rate": 0.0002966151316246394,
      "loss": 3.1745,
      "step": 116029
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.8250832557678223,
      "learning_rate": 0.0002966110413231626,
      "loss": 2.6282,
      "step": 116030
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9418373107910156,
      "learning_rate": 0.0002966069510223158,
      "loss": 2.6605,
      "step": 116031
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8833589553833008,
      "learning_rate": 0.00029660286072209974,
      "loss": 2.9446,
      "step": 116032
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1374258995056152,
      "learning_rate": 0.00029659877042251536,
      "loss": 2.8331,
      "step": 116033
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.8427305221557617,
      "learning_rate": 0.0002965946801235633,
      "loss": 2.9202,
      "step": 116034
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6179066896438599,
      "learning_rate": 0.0002965905898252443,
      "loss": 2.8991,
      "step": 116035
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.793485164642334,
      "learning_rate": 0.00029658649952755923,
      "loss": 2.9398,
      "step": 116036
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0282230377197266,
      "learning_rate": 0.00029658240923050895,
      "loss": 2.9697,
      "step": 116037
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.990994453430176,
      "learning_rate": 0.00029657831893409385,
      "loss": 2.9109,
      "step": 116038
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5990219116210938,
      "learning_rate": 0.000296574228638315,
      "loss": 2.9825,
      "step": 116039
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5926198959350586,
      "learning_rate": 0.000296570138343173,
      "loss": 3.2486,
      "step": 116040
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8489859104156494,
      "learning_rate": 0.00029656604804866873,
      "loss": 3.0051,
      "step": 116041
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0946474075317383,
      "learning_rate": 0.0002965619577548028,
      "loss": 3.049,
      "step": 116042
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9475804567337036,
      "learning_rate": 0.0002965578674615763,
      "loss": 3.3238,
      "step": 116043
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.710137963294983,
      "learning_rate": 0.0002965537771689896,
      "loss": 2.9731,
      "step": 116044
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9903554916381836,
      "learning_rate": 0.0002965496868770436,
      "loss": 3.0546,
      "step": 116045
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3341565132141113,
      "learning_rate": 0.0002965455965857391,
      "loss": 2.9925,
      "step": 116046
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.760765790939331,
      "learning_rate": 0.0002965415062950768,
      "loss": 3.1841,
      "step": 116047
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.953844666481018,
      "learning_rate": 0.00029653741600505753,
      "loss": 2.9941,
      "step": 116048
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9130529165267944,
      "learning_rate": 0.00029653332571568216,
      "loss": 3.0238,
      "step": 116049
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3844125270843506,
      "learning_rate": 0.0002965292354269512,
      "loss": 3.0091,
      "step": 116050
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9295539855957031,
      "learning_rate": 0.0002965251451388654,
      "loss": 3.228,
      "step": 116051
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9792864322662354,
      "learning_rate": 0.00029652105485142576,
      "loss": 3.0174,
      "step": 116052
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3475499153137207,
      "learning_rate": 0.00029651696456463284,
      "loss": 3.2151,
      "step": 116053
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.733417272567749,
      "learning_rate": 0.0002965128742784875,
      "loss": 2.9839,
      "step": 116054
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0941271781921387,
      "learning_rate": 0.0002965087839929907,
      "loss": 2.8186,
      "step": 116055
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.0448498725891113,
      "learning_rate": 0.0002965046937081428,
      "loss": 3.0848,
      "step": 116056
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5420186519622803,
      "learning_rate": 0.00029650060342394476,
      "loss": 3.1133,
      "step": 116057
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.6153745651245117,
      "learning_rate": 0.0002964965131403973,
      "loss": 2.8052,
      "step": 116058
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7327730655670166,
      "learning_rate": 0.00029649242285750116,
      "loss": 2.7768,
      "step": 116059
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9275884628295898,
      "learning_rate": 0.0002964883325752572,
      "loss": 2.9878,
      "step": 116060
    },
    {
      "epoch": 1.51,
      "grad_norm": 4.072175979614258,
      "learning_rate": 0.00029648424229366627,
      "loss": 2.7171,
      "step": 116061
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.298306941986084,
      "learning_rate": 0.0002964801520127288,
      "loss": 3.009,
      "step": 116062
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2106897830963135,
      "learning_rate": 0.00029647606173244576,
      "loss": 3.0429,
      "step": 116063
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0187013149261475,
      "learning_rate": 0.0002964719714528179,
      "loss": 3.192,
      "step": 116064
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.467319965362549,
      "learning_rate": 0.000296467881173846,
      "loss": 2.9401,
      "step": 116065
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.0167555809020996,
      "learning_rate": 0.0002964637908955307,
      "loss": 3.0848,
      "step": 116066
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3990349769592285,
      "learning_rate": 0.000296459700617873,
      "loss": 2.7992,
      "step": 116067
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.007589817047119,
      "learning_rate": 0.00029645561034087343,
      "loss": 3.071,
      "step": 116068
    },
    {
      "epoch": 1.51,
      "grad_norm": 5.5853190422058105,
      "learning_rate": 0.0002964515200645328,
      "loss": 2.7975,
      "step": 116069
    },
    {
      "epoch": 1.51,
      "grad_norm": 4.290826797485352,
      "learning_rate": 0.00029644742978885194,
      "loss": 3.1226,
      "step": 116070
    },
    {
      "epoch": 1.51,
      "grad_norm": 5.45914363861084,
      "learning_rate": 0.0002964433395138315,
      "loss": 3.0103,
      "step": 116071
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.037696123123169,
      "learning_rate": 0.00029643924923947233,
      "loss": 3.1723,
      "step": 116072
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0292601585388184,
      "learning_rate": 0.0002964351589657753,
      "loss": 2.8566,
      "step": 116073
    },
    {
      "epoch": 1.51,
      "grad_norm": 4.005117893218994,
      "learning_rate": 0.00029643106869274094,
      "loss": 2.8141,
      "step": 116074
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.8518097400665283,
      "learning_rate": 0.0002964269784203701,
      "loss": 3.0261,
      "step": 116075
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.428422451019287,
      "learning_rate": 0.00029642288814866357,
      "loss": 3.229,
      "step": 116076
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.751857876777649,
      "learning_rate": 0.00029641879787762205,
      "loss": 2.8759,
      "step": 116077
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.451885223388672,
      "learning_rate": 0.00029641470760724634,
      "loss": 2.9078,
      "step": 116078
    },
    {
      "epoch": 1.51,
      "grad_norm": 7.3723578453063965,
      "learning_rate": 0.0002964106173375373,
      "loss": 2.9864,
      "step": 116079
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.8024706840515137,
      "learning_rate": 0.00029640652706849555,
      "loss": 2.8289,
      "step": 116080
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1834914684295654,
      "learning_rate": 0.0002964024368001219,
      "loss": 3.0498,
      "step": 116081
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.6972999572753906,
      "learning_rate": 0.00029639834653241715,
      "loss": 3.171,
      "step": 116082
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.0497491359710693,
      "learning_rate": 0.00029639425626538186,
      "loss": 2.7954,
      "step": 116083
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.7757632732391357,
      "learning_rate": 0.00029639016599901705,
      "loss": 2.8259,
      "step": 116084
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.604638934135437,
      "learning_rate": 0.0002963860757333234,
      "loss": 2.9589,
      "step": 116085
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6854544878005981,
      "learning_rate": 0.0002963819854683016,
      "loss": 3.0699,
      "step": 116086
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.8543508052825928,
      "learning_rate": 0.00029637789520395243,
      "loss": 2.945,
      "step": 116087
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3161163330078125,
      "learning_rate": 0.0002963738049402768,
      "loss": 3.0323,
      "step": 116088
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.859581470489502,
      "learning_rate": 0.00029636971467727525,
      "loss": 2.9037,
      "step": 116089
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.093574285507202,
      "learning_rate": 0.0002963656244149486,
      "loss": 3.051,
      "step": 116090
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.301851272583008,
      "learning_rate": 0.0002963615341532978,
      "loss": 3.0093,
      "step": 116091
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.988732933998108,
      "learning_rate": 0.0002963574438923233,
      "loss": 2.8407,
      "step": 116092
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.017408609390259,
      "learning_rate": 0.0002963533536320261,
      "loss": 2.8222,
      "step": 116093
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.951289176940918,
      "learning_rate": 0.00029634926337240684,
      "loss": 3.0219,
      "step": 116094
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.85425865650177,
      "learning_rate": 0.0002963451731134663,
      "loss": 2.4293,
      "step": 116095
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.722870945930481,
      "learning_rate": 0.0002963410828552054,
      "loss": 3.1317,
      "step": 116096
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8232477903366089,
      "learning_rate": 0.00029633699259762466,
      "loss": 2.9652,
      "step": 116097
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.413747549057007,
      "learning_rate": 0.000296332902340725,
      "loss": 3.1019,
      "step": 116098
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6521066427230835,
      "learning_rate": 0.000296328812084507,
      "loss": 2.9641,
      "step": 116099
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8330004215240479,
      "learning_rate": 0.0002963247218289716,
      "loss": 3.1275,
      "step": 116100
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.2320241928100586,
      "learning_rate": 0.00029632063157411954,
      "loss": 3.0811,
      "step": 116101
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8844321966171265,
      "learning_rate": 0.0002963165413199515,
      "loss": 3.039,
      "step": 116102
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.867534875869751,
      "learning_rate": 0.0002963124510664683,
      "loss": 3.1212,
      "step": 116103
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.568258047103882,
      "learning_rate": 0.0002963083608136708,
      "loss": 3.2068,
      "step": 116104
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.3031249046325684,
      "learning_rate": 0.0002963042705615595,
      "loss": 2.9943,
      "step": 116105
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.8598878383636475,
      "learning_rate": 0.00029630018031013536,
      "loss": 3.1295,
      "step": 116106
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0695955753326416,
      "learning_rate": 0.00029629609005939905,
      "loss": 2.9007,
      "step": 116107
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.408740520477295,
      "learning_rate": 0.0002962919998093513,
      "loss": 3.0065,
      "step": 116108
    },
    {
      "epoch": 1.51,
      "grad_norm": 4.807024002075195,
      "learning_rate": 0.00029628790955999303,
      "loss": 3.0449,
      "step": 116109
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.541151523590088,
      "learning_rate": 0.000296283819311325,
      "loss": 2.8627,
      "step": 116110
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.6999704837799072,
      "learning_rate": 0.00029627972906334777,
      "loss": 3.0232,
      "step": 116111
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.679535150527954,
      "learning_rate": 0.00029627563881606226,
      "loss": 3.049,
      "step": 116112
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.5634663105010986,
      "learning_rate": 0.00029627154856946906,
      "loss": 3.0303,
      "step": 116113
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.861799955368042,
      "learning_rate": 0.0002962674583235691,
      "loss": 3.2637,
      "step": 116114
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.9559662342071533,
      "learning_rate": 0.00029626336807836315,
      "loss": 3.201,
      "step": 116115
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3248233795166016,
      "learning_rate": 0.00029625927783385195,
      "loss": 2.9501,
      "step": 116116
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9365499019622803,
      "learning_rate": 0.00029625518759003616,
      "loss": 2.9464,
      "step": 116117
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.150510549545288,
      "learning_rate": 0.00029625109734691653,
      "loss": 3.0172,
      "step": 116118
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8214221000671387,
      "learning_rate": 0.00029624700710449394,
      "loss": 2.8955,
      "step": 116119
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.965538501739502,
      "learning_rate": 0.0002962429168627691,
      "loss": 2.9415,
      "step": 116120
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.097041130065918,
      "learning_rate": 0.0002962388266217428,
      "loss": 2.9574,
      "step": 116121
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0283331871032715,
      "learning_rate": 0.00029623473638141584,
      "loss": 3.0441,
      "step": 116122
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1189284324645996,
      "learning_rate": 0.0002962306461417888,
      "loss": 3.1977,
      "step": 116123
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.1312615871429443,
      "learning_rate": 0.0002962265559028626,
      "loss": 2.7995,
      "step": 116124
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.157897472381592,
      "learning_rate": 0.00029622246566463794,
      "loss": 3.0671,
      "step": 116125
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0657572746276855,
      "learning_rate": 0.00029621837542711556,
      "loss": 3.0528,
      "step": 116126
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.201418161392212,
      "learning_rate": 0.0002962142851902963,
      "loss": 2.8544,
      "step": 116127
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.063296318054199,
      "learning_rate": 0.000296210194954181,
      "loss": 3.3439,
      "step": 116128
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9611897468566895,
      "learning_rate": 0.0002962061047187701,
      "loss": 3.0575,
      "step": 116129
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8918166160583496,
      "learning_rate": 0.00029620201448406463,
      "loss": 3.3098,
      "step": 116130
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.63716721534729,
      "learning_rate": 0.0002961979242500652,
      "loss": 3.1674,
      "step": 116131
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0338308811187744,
      "learning_rate": 0.00029619383401677273,
      "loss": 2.9178,
      "step": 116132
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8301671743392944,
      "learning_rate": 0.00029618974378418786,
      "loss": 2.8792,
      "step": 116133
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7483062744140625,
      "learning_rate": 0.0002961856535523115,
      "loss": 3.2611,
      "step": 116134
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9541977643966675,
      "learning_rate": 0.00029618156332114425,
      "loss": 2.7567,
      "step": 116135
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.7867140769958496,
      "learning_rate": 0.00029617747309068686,
      "loss": 2.8919,
      "step": 116136
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4213085174560547,
      "learning_rate": 0.0002961733828609401,
      "loss": 3.0256,
      "step": 116137
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3729991912841797,
      "learning_rate": 0.00029616929263190486,
      "loss": 2.7549,
      "step": 116138
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.908383846282959,
      "learning_rate": 0.0002961652024035818,
      "loss": 3.2117,
      "step": 116139
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5800273418426514,
      "learning_rate": 0.0002961611121759718,
      "loss": 3.2836,
      "step": 116140
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8074160814285278,
      "learning_rate": 0.00029615702194907544,
      "loss": 3.1344,
      "step": 116141
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.5480844974517822,
      "learning_rate": 0.0002961529317228935,
      "loss": 3.0922,
      "step": 116142
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.165041446685791,
      "learning_rate": 0.00029614884149742685,
      "loss": 3.0719,
      "step": 116143
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.001213550567627,
      "learning_rate": 0.0002961447512726762,
      "loss": 2.7662,
      "step": 116144
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9161309003829956,
      "learning_rate": 0.00029614066104864233,
      "loss": 3.0378,
      "step": 116145
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.696401834487915,
      "learning_rate": 0.0002961365708253261,
      "loss": 2.8518,
      "step": 116146
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2831735610961914,
      "learning_rate": 0.000296132480602728,
      "loss": 3.0809,
      "step": 116147
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7784844636917114,
      "learning_rate": 0.00029612839038084895,
      "loss": 2.8521,
      "step": 116148
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.127443552017212,
      "learning_rate": 0.00029612430015968973,
      "loss": 2.9815,
      "step": 116149
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.742082118988037,
      "learning_rate": 0.0002961202099392511,
      "loss": 2.8933,
      "step": 116150
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.358931303024292,
      "learning_rate": 0.00029611611971953373,
      "loss": 2.8301,
      "step": 116151
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.030045747756958,
      "learning_rate": 0.0002961120295005386,
      "loss": 2.8521,
      "step": 116152
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0752627849578857,
      "learning_rate": 0.00029610793928226616,
      "loss": 3.1941,
      "step": 116153
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3848299980163574,
      "learning_rate": 0.0002961038490647174,
      "loss": 2.8376,
      "step": 116154
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8389917612075806,
      "learning_rate": 0.00029609975884789297,
      "loss": 2.9876,
      "step": 116155
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9667038917541504,
      "learning_rate": 0.00029609566863179367,
      "loss": 3.1085,
      "step": 116156
    },
    {
      "epoch": 1.51,
      "grad_norm": 4.438882827758789,
      "learning_rate": 0.00029609157841642024,
      "loss": 3.1489,
      "step": 116157
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9677457809448242,
      "learning_rate": 0.00029608748820177355,
      "loss": 3.0208,
      "step": 116158
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4758200645446777,
      "learning_rate": 0.00029608339798785425,
      "loss": 3.0317,
      "step": 116159
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.1712238788604736,
      "learning_rate": 0.00029607930777466305,
      "loss": 2.6339,
      "step": 116160
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3236005306243896,
      "learning_rate": 0.0002960752175622008,
      "loss": 3.0106,
      "step": 116161
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.001399517059326,
      "learning_rate": 0.00029607112735046824,
      "loss": 2.995,
      "step": 116162
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2208657264709473,
      "learning_rate": 0.0002960670371394661,
      "loss": 3.1816,
      "step": 116163
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.6349709033966064,
      "learning_rate": 0.0002960629469291953,
      "loss": 2.9323,
      "step": 116164
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.05440354347229,
      "learning_rate": 0.0002960588567196564,
      "loss": 3.0503,
      "step": 116165
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8543680906295776,
      "learning_rate": 0.00029605476651085026,
      "loss": 2.8541,
      "step": 116166
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.0790116786956787,
      "learning_rate": 0.0002960506763027776,
      "loss": 2.8845,
      "step": 116167
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9327455759048462,
      "learning_rate": 0.0002960465860954391,
      "loss": 2.8527,
      "step": 116168
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2873733043670654,
      "learning_rate": 0.00029604249588883563,
      "loss": 2.9259,
      "step": 116169
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8838474750518799,
      "learning_rate": 0.000296038405682968,
      "loss": 3.1816,
      "step": 116170
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1538307666778564,
      "learning_rate": 0.000296034315477837,
      "loss": 3.0175,
      "step": 116171
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.957350730895996,
      "learning_rate": 0.0002960302252734431,
      "loss": 3.0771,
      "step": 116172
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.028327703475952,
      "learning_rate": 0.0002960261350697874,
      "loss": 3.0051,
      "step": 116173
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.984970211982727,
      "learning_rate": 0.0002960220448668704,
      "loss": 3.0041,
      "step": 116174
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0005640983581543,
      "learning_rate": 0.0002960179546646931,
      "loss": 3.0694,
      "step": 116175
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1402359008789062,
      "learning_rate": 0.00029601386446325605,
      "loss": 2.8588,
      "step": 116176
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6556718349456787,
      "learning_rate": 0.00029600977426256017,
      "loss": 2.9936,
      "step": 116177
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.960861086845398,
      "learning_rate": 0.00029600568406260605,
      "loss": 2.7334,
      "step": 116178
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9280813932418823,
      "learning_rate": 0.0002960015938633946,
      "loss": 3.1653,
      "step": 116179
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.937651515007019,
      "learning_rate": 0.0002959975036649266,
      "loss": 2.9891,
      "step": 116180
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7076876163482666,
      "learning_rate": 0.0002959934134672026,
      "loss": 3.0794,
      "step": 116181
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7974445819854736,
      "learning_rate": 0.0002959893232702236,
      "loss": 3.1562,
      "step": 116182
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2654504776000977,
      "learning_rate": 0.0002959852330739903,
      "loss": 3.007,
      "step": 116183
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5308139324188232,
      "learning_rate": 0.0002959811428785033,
      "loss": 3.2416,
      "step": 116184
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.556334376335144,
      "learning_rate": 0.0002959770526837635,
      "loss": 3.0296,
      "step": 116185
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.8106372356414795,
      "learning_rate": 0.00029597296248977167,
      "loss": 3.0651,
      "step": 116186
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.9312562942504883,
      "learning_rate": 0.00029596887229652854,
      "loss": 2.98,
      "step": 116187
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4517290592193604,
      "learning_rate": 0.0002959647821040349,
      "loss": 2.7945,
      "step": 116188
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7669419050216675,
      "learning_rate": 0.0002959606919122916,
      "loss": 2.9521,
      "step": 116189
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.351642608642578,
      "learning_rate": 0.0002959566017212991,
      "loss": 2.9819,
      "step": 116190
    },
    {
      "epoch": 1.51,
      "grad_norm": 5.115481376647949,
      "learning_rate": 0.00029595251153105833,
      "loss": 2.9667,
      "step": 116191
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.822771430015564,
      "learning_rate": 0.0002959484213415701,
      "loss": 3.0696,
      "step": 116192
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.706945776939392,
      "learning_rate": 0.00029594433115283516,
      "loss": 2.8766,
      "step": 116193
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.883717656135559,
      "learning_rate": 0.0002959402409648542,
      "loss": 3.0485,
      "step": 116194
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1556715965270996,
      "learning_rate": 0.00029593615077762824,
      "loss": 3.1796,
      "step": 116195
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3296449184417725,
      "learning_rate": 0.00029593206059115763,
      "loss": 2.975,
      "step": 116196
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7857087850570679,
      "learning_rate": 0.0002959279704054433,
      "loss": 3.2293,
      "step": 116197
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7069309949874878,
      "learning_rate": 0.0002959238802204861,
      "loss": 2.8932,
      "step": 116198
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2708277702331543,
      "learning_rate": 0.0002959197900362867,
      "loss": 3.18,
      "step": 116199
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.213076114654541,
      "learning_rate": 0.0002959156998528459,
      "loss": 2.9887,
      "step": 116200
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.108665943145752,
      "learning_rate": 0.0002959116096701646,
      "loss": 3.3367,
      "step": 116201
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9893337488174438,
      "learning_rate": 0.0002959075194882432,
      "loss": 3.1322,
      "step": 116202
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.0265369415283203,
      "learning_rate": 0.00029590342930708275,
      "loss": 2.9719,
      "step": 116203
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0184366703033447,
      "learning_rate": 0.0002958993391266839,
      "loss": 2.7852,
      "step": 116204
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0584726333618164,
      "learning_rate": 0.0002958952489470474,
      "loss": 2.8593,
      "step": 116205
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.11264705657959,
      "learning_rate": 0.0002958911587681741,
      "loss": 3.0911,
      "step": 116206
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3197920322418213,
      "learning_rate": 0.00029588706859006483,
      "loss": 2.9041,
      "step": 116207
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0999131202697754,
      "learning_rate": 0.0002958829784127201,
      "loss": 2.7039,
      "step": 116208
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6070878505706787,
      "learning_rate": 0.0002958788882361408,
      "loss": 3.1899,
      "step": 116209
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.019563913345337,
      "learning_rate": 0.00029587479806032774,
      "loss": 2.8649,
      "step": 116210
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5940752029418945,
      "learning_rate": 0.00029587070788528153,
      "loss": 3.0446,
      "step": 116211
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9598625898361206,
      "learning_rate": 0.0002958666177110031,
      "loss": 3.084,
      "step": 116212
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.6425280570983887,
      "learning_rate": 0.00029586252753749327,
      "loss": 2.8953,
      "step": 116213
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4116199016571045,
      "learning_rate": 0.0002958584373647525,
      "loss": 3.0927,
      "step": 116214
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4770829677581787,
      "learning_rate": 0.0002958543471927818,
      "loss": 3.0756,
      "step": 116215
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1616837978363037,
      "learning_rate": 0.0002958502570215818,
      "loss": 3.0587,
      "step": 116216
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9595420360565186,
      "learning_rate": 0.0002958461668511534,
      "loss": 3.0457,
      "step": 116217
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.254593849182129,
      "learning_rate": 0.0002958420766814972,
      "loss": 3.1642,
      "step": 116218
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0321385860443115,
      "learning_rate": 0.00029583798651261413,
      "loss": 2.8082,
      "step": 116219
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.5622355937957764,
      "learning_rate": 0.0002958338963445048,
      "loss": 2.9611,
      "step": 116220
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4311039447784424,
      "learning_rate": 0.00029582980617717,
      "loss": 3.1762,
      "step": 116221
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4468235969543457,
      "learning_rate": 0.0002958257160106105,
      "loss": 2.9396,
      "step": 116222
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.013699769973755,
      "learning_rate": 0.0002958216258448271,
      "loss": 3.0413,
      "step": 116223
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9371086359024048,
      "learning_rate": 0.0002958175356798206,
      "loss": 3.0417,
      "step": 116224
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2816379070281982,
      "learning_rate": 0.0002958134455155917,
      "loss": 3.0147,
      "step": 116225
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.5371356010437012,
      "learning_rate": 0.0002958093553521411,
      "loss": 2.9355,
      "step": 116226
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8120033740997314,
      "learning_rate": 0.0002958052651894696,
      "loss": 3.0895,
      "step": 116227
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2752156257629395,
      "learning_rate": 0.000295801175027578,
      "loss": 2.7137,
      "step": 116228
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3979625701904297,
      "learning_rate": 0.0002957970848664671,
      "loss": 3.0085,
      "step": 116229
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2386820316314697,
      "learning_rate": 0.0002957929947061375,
      "loss": 2.9574,
      "step": 116230
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.809531807899475,
      "learning_rate": 0.0002957889045465902,
      "loss": 3.1267,
      "step": 116231
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9148361682891846,
      "learning_rate": 0.00029578481438782573,
      "loss": 2.9178,
      "step": 116232
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8946077823638916,
      "learning_rate": 0.0002957807242298449,
      "loss": 2.8929,
      "step": 116233
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7898293733596802,
      "learning_rate": 0.0002957766340726486,
      "loss": 2.832,
      "step": 116234
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1672451496124268,
      "learning_rate": 0.00029577254391623744,
      "loss": 3.0869,
      "step": 116235
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.616140365600586,
      "learning_rate": 0.0002957684537606122,
      "loss": 3.0755,
      "step": 116236
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.054748773574829,
      "learning_rate": 0.0002957643636057738,
      "loss": 3.042,
      "step": 116237
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.721677303314209,
      "learning_rate": 0.00029576027345172296,
      "loss": 3.1373,
      "step": 116238
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9696235656738281,
      "learning_rate": 0.0002957561832984602,
      "loss": 3.1732,
      "step": 116239
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0998570919036865,
      "learning_rate": 0.00029575209314598646,
      "loss": 2.8459,
      "step": 116240
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1330080032348633,
      "learning_rate": 0.00029574800299430253,
      "loss": 3.1576,
      "step": 116241
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.823444128036499,
      "learning_rate": 0.0002957439128434091,
      "loss": 3.0136,
      "step": 116242
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7473968267440796,
      "learning_rate": 0.000295739822693307,
      "loss": 3.0863,
      "step": 116243
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2177133560180664,
      "learning_rate": 0.00029573573254399697,
      "loss": 2.9828,
      "step": 116244
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.609034776687622,
      "learning_rate": 0.00029573164239547967,
      "loss": 2.9475,
      "step": 116245
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8864213228225708,
      "learning_rate": 0.00029572755224775596,
      "loss": 2.8562,
      "step": 116246
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8643877506256104,
      "learning_rate": 0.0002957234621008266,
      "loss": 3.0599,
      "step": 116247
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5267932415008545,
      "learning_rate": 0.00029571937195469226,
      "loss": 2.8541,
      "step": 116248
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.930977463722229,
      "learning_rate": 0.0002957152818093538,
      "loss": 3.2564,
      "step": 116249
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.668199896812439,
      "learning_rate": 0.0002957111916648121,
      "loss": 2.821,
      "step": 116250
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.416158437728882,
      "learning_rate": 0.00029570710152106763,
      "loss": 2.9216,
      "step": 116251
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9961426258087158,
      "learning_rate": 0.0002957030113781213,
      "loss": 2.9768,
      "step": 116252
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.5453970432281494,
      "learning_rate": 0.0002956989212359738,
      "loss": 3.215,
      "step": 116253
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0898683071136475,
      "learning_rate": 0.00029569483109462605,
      "loss": 3.0832,
      "step": 116254
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7818113565444946,
      "learning_rate": 0.0002956907409540786,
      "loss": 2.9925,
      "step": 116255
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8798307180404663,
      "learning_rate": 0.00029568665081433247,
      "loss": 2.903,
      "step": 116256
    },
    {
      "epoch": 1.51,
      "grad_norm": 4.072177886962891,
      "learning_rate": 0.0002956825606753882,
      "loss": 2.8946,
      "step": 116257
    },
    {
      "epoch": 1.51,
      "grad_norm": 4.2343339920043945,
      "learning_rate": 0.00029567847053724666,
      "loss": 2.7447,
      "step": 116258
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4924840927124023,
      "learning_rate": 0.0002956743803999085,
      "loss": 3.0333,
      "step": 116259
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.450807809829712,
      "learning_rate": 0.00029567029026337464,
      "loss": 3.1169,
      "step": 116260
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4097423553466797,
      "learning_rate": 0.00029566620012764567,
      "loss": 2.6293,
      "step": 116261
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.3311893939971924,
      "learning_rate": 0.0002956621099927225,
      "loss": 3.0793,
      "step": 116262
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4125523567199707,
      "learning_rate": 0.0002956580198586058,
      "loss": 3.01,
      "step": 116263
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.146660566329956,
      "learning_rate": 0.0002956539297252963,
      "loss": 3.1712,
      "step": 116264
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9106255769729614,
      "learning_rate": 0.00029564983959279496,
      "loss": 3.3547,
      "step": 116265
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.154036045074463,
      "learning_rate": 0.0002956457494611023,
      "loss": 2.9815,
      "step": 116266
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9893232583999634,
      "learning_rate": 0.00029564165933021914,
      "loss": 2.8606,
      "step": 116267
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.441923141479492,
      "learning_rate": 0.0002956375692001464,
      "loss": 3.2471,
      "step": 116268
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.992491364479065,
      "learning_rate": 0.0002956334790708846,
      "loss": 2.6478,
      "step": 116269
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.138476848602295,
      "learning_rate": 0.0002956293889424347,
      "loss": 2.5202,
      "step": 116270
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.645298480987549,
      "learning_rate": 0.0002956252988147973,
      "loss": 2.9605,
      "step": 116271
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8908766508102417,
      "learning_rate": 0.00029562120868797335,
      "loss": 3.0689,
      "step": 116272
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8072103261947632,
      "learning_rate": 0.0002956171185619634,
      "loss": 3.0851,
      "step": 116273
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6662877798080444,
      "learning_rate": 0.00029561302843676837,
      "loss": 2.9624,
      "step": 116274
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.9919488430023193,
      "learning_rate": 0.0002956089383123889,
      "loss": 3.0971,
      "step": 116275
    },
    {
      "epoch": 1.51,
      "grad_norm": 4.071492671966553,
      "learning_rate": 0.00029560484818882585,
      "loss": 2.8983,
      "step": 116276
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.135721206665039,
      "learning_rate": 0.0002956007580660799,
      "loss": 3.2298,
      "step": 116277
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.631679058074951,
      "learning_rate": 0.0002955966679441519,
      "loss": 3.0465,
      "step": 116278
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.308458089828491,
      "learning_rate": 0.00029559257782304257,
      "loss": 2.9761,
      "step": 116279
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.8710808753967285,
      "learning_rate": 0.0002955884877027527,
      "loss": 3.0345,
      "step": 116280
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8071906566619873,
      "learning_rate": 0.0002955843975832829,
      "loss": 2.9019,
      "step": 116281
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4256210327148438,
      "learning_rate": 0.0002955803074646341,
      "loss": 3.0156,
      "step": 116282
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.0514302253723145,
      "learning_rate": 0.000295576217346807,
      "loss": 3.1168,
      "step": 116283
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.2924463748931885,
      "learning_rate": 0.00029557212722980235,
      "loss": 2.978,
      "step": 116284
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7052905559539795,
      "learning_rate": 0.00029556803711362095,
      "loss": 3.2938,
      "step": 116285
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.0903139114379883,
      "learning_rate": 0.00029556394699826365,
      "loss": 2.9373,
      "step": 116286
    },
    {
      "epoch": 1.51,
      "grad_norm": 4.200247287750244,
      "learning_rate": 0.00029555985688373094,
      "loss": 3.0447,
      "step": 116287
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.2517364025115967,
      "learning_rate": 0.00029555576677002375,
      "loss": 2.8546,
      "step": 116288
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.008702039718628,
      "learning_rate": 0.0002955516766571428,
      "loss": 2.9834,
      "step": 116289
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6763795614242554,
      "learning_rate": 0.00029554758654508893,
      "loss": 2.9865,
      "step": 116290
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.963380813598633,
      "learning_rate": 0.0002955434964338628,
      "loss": 2.8893,
      "step": 116291
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.2793996334075928,
      "learning_rate": 0.0002955394063234654,
      "loss": 2.9038,
      "step": 116292
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.28096342086792,
      "learning_rate": 0.00029553531621389717,
      "loss": 2.6749,
      "step": 116293
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.037931203842163,
      "learning_rate": 0.000295531226105159,
      "loss": 2.9723,
      "step": 116294
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.084958076477051,
      "learning_rate": 0.0002955271359972516,
      "loss": 2.8759,
      "step": 116295
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.832458972930908,
      "learning_rate": 0.0002955230458901759,
      "loss": 2.8912,
      "step": 116296
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5035619735717773,
      "learning_rate": 0.00029551895578393245,
      "loss": 3.1115,
      "step": 116297
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0297207832336426,
      "learning_rate": 0.0002955148656785223,
      "loss": 3.0715,
      "step": 116298
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.6790313720703125,
      "learning_rate": 0.00029551077557394585,
      "loss": 3.2547,
      "step": 116299
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.783163547515869,
      "learning_rate": 0.00029550668547020405,
      "loss": 3.2267,
      "step": 116300
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.0888614654541016,
      "learning_rate": 0.0002955025953672977,
      "loss": 2.8802,
      "step": 116301
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.6417338848114014,
      "learning_rate": 0.00029549850526522744,
      "loss": 2.8496,
      "step": 116302
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.946916103363037,
      "learning_rate": 0.0002954944151639941,
      "loss": 2.7233,
      "step": 116303
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.494788885116577,
      "learning_rate": 0.0002954903250635984,
      "loss": 2.8278,
      "step": 116304
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.6681907176971436,
      "learning_rate": 0.00029548623496404136,
      "loss": 2.9553,
      "step": 116305
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1015026569366455,
      "learning_rate": 0.0002954821448653233,
      "loss": 3.0025,
      "step": 116306
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8506633043289185,
      "learning_rate": 0.0002954780547674452,
      "loss": 3.0191,
      "step": 116307
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.0477447509765625,
      "learning_rate": 0.0002954739646704078,
      "loss": 2.8813,
      "step": 116308
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5644450187683105,
      "learning_rate": 0.00029546987457421196,
      "loss": 2.89,
      "step": 116309
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.237487554550171,
      "learning_rate": 0.0002954657844788583,
      "loss": 2.8172,
      "step": 116310
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.156123638153076,
      "learning_rate": 0.0002954616943843478,
      "loss": 2.9063,
      "step": 116311
    },
    {
      "epoch": 1.51,
      "grad_norm": 4.5228753089904785,
      "learning_rate": 0.00029545760429068086,
      "loss": 3.035,
      "step": 116312
    },
    {
      "epoch": 1.51,
      "grad_norm": 4.820873260498047,
      "learning_rate": 0.0002954535141978585,
      "loss": 3.036,
      "step": 116313
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.285844326019287,
      "learning_rate": 0.00029544942410588136,
      "loss": 2.8548,
      "step": 116314
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.992722511291504,
      "learning_rate": 0.0002954453340147503,
      "loss": 2.9275,
      "step": 116315
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4392013549804688,
      "learning_rate": 0.000295441243924466,
      "loss": 3.1609,
      "step": 116316
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.931764841079712,
      "learning_rate": 0.0002954371538350294,
      "loss": 3.0307,
      "step": 116317
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.6296615600585938,
      "learning_rate": 0.00029543306374644107,
      "loss": 2.9249,
      "step": 116318
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1499905586242676,
      "learning_rate": 0.0002954289736587017,
      "loss": 2.9773,
      "step": 116319
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.310903310775757,
      "learning_rate": 0.00029542488357181226,
      "loss": 2.8497,
      "step": 116320
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.916229724884033,
      "learning_rate": 0.0002954207934857734,
      "loss": 3.3452,
      "step": 116321
    },
    {
      "epoch": 1.51,
      "grad_norm": 4.847623825073242,
      "learning_rate": 0.0002954167034005859,
      "loss": 2.8996,
      "step": 116322
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.8812637329101562,
      "learning_rate": 0.0002954126133162506,
      "loss": 2.9697,
      "step": 116323
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.201200246810913,
      "learning_rate": 0.00029540852323276814,
      "loss": 3.0768,
      "step": 116324
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.9042662382125854,
      "learning_rate": 0.0002954044331501392,
      "loss": 2.8376,
      "step": 116325
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.3806934356689453,
      "learning_rate": 0.00029540034306836476,
      "loss": 2.7558,
      "step": 116326
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.7614033222198486,
      "learning_rate": 0.00029539625298744546,
      "loss": 2.7914,
      "step": 116327
    },
    {
      "epoch": 1.51,
      "grad_norm": 6.002009391784668,
      "learning_rate": 0.0002953921629073821,
      "loss": 2.8445,
      "step": 116328
    },
    {
      "epoch": 1.51,
      "grad_norm": 4.495451927185059,
      "learning_rate": 0.0002953880728281755,
      "loss": 2.8882,
      "step": 116329
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.114537000656128,
      "learning_rate": 0.00029538398274982626,
      "loss": 3.2183,
      "step": 116330
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.403973340988159,
      "learning_rate": 0.0002953798926723352,
      "loss": 3.0182,
      "step": 116331
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.7223124504089355,
      "learning_rate": 0.0002953758025957031,
      "loss": 3.1143,
      "step": 116332
    },
    {
      "epoch": 1.51,
      "grad_norm": 5.14974308013916,
      "learning_rate": 0.0002953717125199308,
      "loss": 2.8486,
      "step": 116333
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.107959270477295,
      "learning_rate": 0.0002953676224450189,
      "loss": 3.487,
      "step": 116334
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.1738579273223877,
      "learning_rate": 0.0002953635323709685,
      "loss": 3.1014,
      "step": 116335
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.5269041061401367,
      "learning_rate": 0.0002953594422977798,
      "loss": 2.9284,
      "step": 116336
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.64782452583313,
      "learning_rate": 0.000295355352225454,
      "loss": 2.8839,
      "step": 116337
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.307234764099121,
      "learning_rate": 0.0002953512621539916,
      "loss": 2.9333,
      "step": 116338
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.6838204860687256,
      "learning_rate": 0.00029534717208339365,
      "loss": 2.7476,
      "step": 116339
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8947001695632935,
      "learning_rate": 0.0002953430820136606,
      "loss": 3.0139,
      "step": 116340
    },
    {
      "epoch": 1.51,
      "grad_norm": 4.100554943084717,
      "learning_rate": 0.00029533899194479353,
      "loss": 3.0664,
      "step": 116341
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.1224260330200195,
      "learning_rate": 0.000295334901876793,
      "loss": 2.716,
      "step": 116342
    },
    {
      "epoch": 1.51,
      "grad_norm": 3.5262296199798584,
      "learning_rate": 0.0002953308118096597,
      "loss": 3.1183,
      "step": 116343
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.5818899869918823,
      "learning_rate": 0.0002953267217433945,
      "loss": 3.0483,
      "step": 116344
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.251619815826416,
      "learning_rate": 0.0002953226316779982,
      "loss": 2.8926,
      "step": 116345
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.7392220497131348,
      "learning_rate": 0.0002953185416134715,
      "loss": 3.087,
      "step": 116346
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.8585877418518066,
      "learning_rate": 0.0002953144515498152,
      "loss": 3.2701,
      "step": 116347
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7512534856796265,
      "learning_rate": 0.00029531036148702994,
      "loss": 2.9444,
      "step": 116348
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.8702144622802734,
      "learning_rate": 0.00029530627142511666,
      "loss": 2.8009,
      "step": 116349
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.4303786754608154,
      "learning_rate": 0.00029530218136407605,
      "loss": 3.0875,
      "step": 116350
    },
    {
      "epoch": 1.51,
      "grad_norm": 2.33357310295105,
      "learning_rate": 0.00029529809130390876,
      "loss": 3.0646,
      "step": 116351
    },
    {
      "epoch": 1.51,
      "grad_norm": 1.7391655445098877,
      "learning_rate": 0.00029529400124461567,
      "loss": 3.1732,
      "step": 116352
    },
    {
      "epoch": 1.51,
      "grad_norm": 4.543037414550781,
      "learning_rate": 0.0002952899111861976,
      "loss": 2.9293,
      "step": 116353
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.113948106765747,
      "learning_rate": 0.00029528582112865515,
      "loss": 3.1434,
      "step": 116354
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7757859230041504,
      "learning_rate": 0.00029528173107198913,
      "loss": 3.122,
      "step": 116355
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1130805015563965,
      "learning_rate": 0.00029527764101620034,
      "loss": 2.839,
      "step": 116356
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.221214532852173,
      "learning_rate": 0.0002952735509612896,
      "loss": 2.9155,
      "step": 116357
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8904733657836914,
      "learning_rate": 0.00029526946090725753,
      "loss": 3.2032,
      "step": 116358
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8716585636138916,
      "learning_rate": 0.000295265370854105,
      "loss": 3.0641,
      "step": 116359
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.243466854095459,
      "learning_rate": 0.00029526128080183264,
      "loss": 2.8525,
      "step": 116360
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7793965339660645,
      "learning_rate": 0.00029525719075044137,
      "loss": 3.1324,
      "step": 116361
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.290562868118286,
      "learning_rate": 0.00029525310069993183,
      "loss": 3.1902,
      "step": 116362
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6390794515609741,
      "learning_rate": 0.00029524901065030483,
      "loss": 3.0231,
      "step": 116363
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9314886331558228,
      "learning_rate": 0.0002952449206015612,
      "loss": 3.0069,
      "step": 116364
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.158118724822998,
      "learning_rate": 0.00029524083055370166,
      "loss": 2.8597,
      "step": 116365
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.567326068878174,
      "learning_rate": 0.00029523674050672684,
      "loss": 2.882,
      "step": 116366
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2936182022094727,
      "learning_rate": 0.0002952326504606376,
      "loss": 3.1365,
      "step": 116367
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1557135581970215,
      "learning_rate": 0.0002952285604154347,
      "loss": 3.1551,
      "step": 116368
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8662090301513672,
      "learning_rate": 0.0002952244703711189,
      "loss": 2.9971,
      "step": 116369
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7634307146072388,
      "learning_rate": 0.00029522038032769095,
      "loss": 3.0544,
      "step": 116370
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7566722631454468,
      "learning_rate": 0.0002952162902851517,
      "loss": 3.0707,
      "step": 116371
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7578173875808716,
      "learning_rate": 0.00029521220024350184,
      "loss": 3.0752,
      "step": 116372
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2770919799804688,
      "learning_rate": 0.0002952081102027421,
      "loss": 3.0961,
      "step": 116373
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.896169900894165,
      "learning_rate": 0.0002952040201628732,
      "loss": 2.9504,
      "step": 116374
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.266160488128662,
      "learning_rate": 0.000295199930123896,
      "loss": 2.9791,
      "step": 116375
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.7361550331115723,
      "learning_rate": 0.0002951958400858112,
      "loss": 2.9692,
      "step": 116376
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9396405220031738,
      "learning_rate": 0.00029519175004861955,
      "loss": 2.8319,
      "step": 116377
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.5928897857666016,
      "learning_rate": 0.000295187660012322,
      "loss": 2.9505,
      "step": 116378
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4096293449401855,
      "learning_rate": 0.00029518356997691904,
      "loss": 3.1053,
      "step": 116379
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.295679807662964,
      "learning_rate": 0.00029517947994241157,
      "loss": 3.1342,
      "step": 116380
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.80435049533844,
      "learning_rate": 0.0002951753899088003,
      "loss": 2.9764,
      "step": 116381
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6922730207443237,
      "learning_rate": 0.000295171299876086,
      "loss": 2.9539,
      "step": 116382
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.004155158996582,
      "learning_rate": 0.00029516720984426944,
      "loss": 2.9584,
      "step": 116383
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.277200698852539,
      "learning_rate": 0.00029516311981335155,
      "loss": 3.1057,
      "step": 116384
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3301265239715576,
      "learning_rate": 0.0002951590297833328,
      "loss": 3.0,
      "step": 116385
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7170178890228271,
      "learning_rate": 0.00029515493975421406,
      "loss": 2.9101,
      "step": 116386
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.650643825531006,
      "learning_rate": 0.0002951508497259961,
      "loss": 2.8365,
      "step": 116387
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.088636636734009,
      "learning_rate": 0.0002951467596986797,
      "loss": 2.9892,
      "step": 116388
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3662948608398438,
      "learning_rate": 0.0002951426696722656,
      "loss": 3.0734,
      "step": 116389
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.060321092605591,
      "learning_rate": 0.0002951385796467547,
      "loss": 2.7162,
      "step": 116390
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7660380601882935,
      "learning_rate": 0.0002951344896221475,
      "loss": 2.9044,
      "step": 116391
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.533360004425049,
      "learning_rate": 0.0002951303995984449,
      "loss": 2.9622,
      "step": 116392
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.245021343231201,
      "learning_rate": 0.00029512630957564766,
      "loss": 3.1991,
      "step": 116393
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0961062908172607,
      "learning_rate": 0.0002951222195537565,
      "loss": 2.81,
      "step": 116394
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.236250162124634,
      "learning_rate": 0.00029511812953277223,
      "loss": 3.0272,
      "step": 116395
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.450420618057251,
      "learning_rate": 0.00029511403951269576,
      "loss": 3.3701,
      "step": 116396
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6409550905227661,
      "learning_rate": 0.0002951099494935275,
      "loss": 3.0034,
      "step": 116397
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8991957902908325,
      "learning_rate": 0.0002951058594752684,
      "loss": 3.1081,
      "step": 116398
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1487975120544434,
      "learning_rate": 0.00029510176945791923,
      "loss": 2.9412,
      "step": 116399
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3899049758911133,
      "learning_rate": 0.00029509767944148075,
      "loss": 2.9706,
      "step": 116400
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8760396242141724,
      "learning_rate": 0.00029509358942595364,
      "loss": 2.82,
      "step": 116401
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8080880641937256,
      "learning_rate": 0.0002950894994113389,
      "loss": 2.9543,
      "step": 116402
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.632460594177246,
      "learning_rate": 0.000295085409397637,
      "loss": 3.0491,
      "step": 116403
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.8742806911468506,
      "learning_rate": 0.00029508131938484874,
      "loss": 2.8296,
      "step": 116404
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.61507248878479,
      "learning_rate": 0.000295077229372975,
      "loss": 3.0658,
      "step": 116405
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8339290618896484,
      "learning_rate": 0.00029507313936201653,
      "loss": 3.08,
      "step": 116406
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.11228084564209,
      "learning_rate": 0.0002950690493519741,
      "loss": 2.9787,
      "step": 116407
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4917712211608887,
      "learning_rate": 0.0002950649593428484,
      "loss": 2.726,
      "step": 116408
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.802962064743042,
      "learning_rate": 0.0002950608693346402,
      "loss": 2.9971,
      "step": 116409
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9184253215789795,
      "learning_rate": 0.00029505677932735024,
      "loss": 3.0757,
      "step": 116410
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8461722135543823,
      "learning_rate": 0.00029505268932097933,
      "loss": 3.1932,
      "step": 116411
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8837320804595947,
      "learning_rate": 0.00029504859931552826,
      "loss": 2.8079,
      "step": 116412
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9711194038391113,
      "learning_rate": 0.0002950445093109977,
      "loss": 3.1398,
      "step": 116413
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1560821533203125,
      "learning_rate": 0.00029504041930738856,
      "loss": 3.1951,
      "step": 116414
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.5785181522369385,
      "learning_rate": 0.00029503632930470144,
      "loss": 3.1231,
      "step": 116415
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.840043306350708,
      "learning_rate": 0.0002950322393029371,
      "loss": 3.1106,
      "step": 116416
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8417847156524658,
      "learning_rate": 0.00029502814930209637,
      "loss": 3.1954,
      "step": 116417
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.012932538986206,
      "learning_rate": 0.00029502405930218004,
      "loss": 2.8668,
      "step": 116418
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.437551975250244,
      "learning_rate": 0.0002950199693031888,
      "loss": 3.3052,
      "step": 116419
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1072616577148438,
      "learning_rate": 0.0002950158793051236,
      "loss": 3.2285,
      "step": 116420
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3130435943603516,
      "learning_rate": 0.0002950117893079849,
      "loss": 2.8737,
      "step": 116421
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9428036212921143,
      "learning_rate": 0.0002950076993117736,
      "loss": 3.0244,
      "step": 116422
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2773728370666504,
      "learning_rate": 0.00029500360931649045,
      "loss": 3.2705,
      "step": 116423
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.415987253189087,
      "learning_rate": 0.00029499951932213624,
      "loss": 3.1894,
      "step": 116424
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2925760746002197,
      "learning_rate": 0.0002949954293287117,
      "loss": 2.7391,
      "step": 116425
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0912528038024902,
      "learning_rate": 0.00029499133933621764,
      "loss": 2.7729,
      "step": 116426
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9171409606933594,
      "learning_rate": 0.0002949872493446548,
      "loss": 3.0981,
      "step": 116427
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.915527105331421,
      "learning_rate": 0.0002949831593540239,
      "loss": 2.994,
      "step": 116428
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.768446683883667,
      "learning_rate": 0.0002949790693643257,
      "loss": 2.9159,
      "step": 116429
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2981765270233154,
      "learning_rate": 0.000294974979375561,
      "loss": 3.0247,
      "step": 116430
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4875242710113525,
      "learning_rate": 0.00029497088938773054,
      "loss": 2.9336,
      "step": 116431
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.005554676055908,
      "learning_rate": 0.00029496679940083515,
      "loss": 3.1113,
      "step": 116432
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2059402465820312,
      "learning_rate": 0.00029496270941487544,
      "loss": 3.022,
      "step": 116433
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8909245729446411,
      "learning_rate": 0.00029495861942985236,
      "loss": 2.8771,
      "step": 116434
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.54107403755188,
      "learning_rate": 0.00029495452944576647,
      "loss": 3.0133,
      "step": 116435
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.796284556388855,
      "learning_rate": 0.0002949504394626187,
      "loss": 3.0249,
      "step": 116436
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.115494728088379,
      "learning_rate": 0.00029494634948040965,
      "loss": 2.8729,
      "step": 116437
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.673856019973755,
      "learning_rate": 0.0002949422594991402,
      "loss": 2.756,
      "step": 116438
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.008939027786255,
      "learning_rate": 0.00029493816951881117,
      "loss": 3.2293,
      "step": 116439
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9219204187393188,
      "learning_rate": 0.0002949340795394231,
      "loss": 2.8636,
      "step": 116440
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.116332769393921,
      "learning_rate": 0.000294929989560977,
      "loss": 2.6274,
      "step": 116441
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.499037265777588,
      "learning_rate": 0.0002949258995834734,
      "loss": 3.1932,
      "step": 116442
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9236633777618408,
      "learning_rate": 0.0002949218096069132,
      "loss": 3.0553,
      "step": 116443
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0106747150421143,
      "learning_rate": 0.00029491771963129716,
      "loss": 3.2464,
      "step": 116444
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.7235164642333984,
      "learning_rate": 0.00029491362965662605,
      "loss": 3.0596,
      "step": 116445
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0258030891418457,
      "learning_rate": 0.0002949095396829005,
      "loss": 3.2737,
      "step": 116446
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.6987295150756836,
      "learning_rate": 0.0002949054497101214,
      "loss": 2.9893,
      "step": 116447
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1236720085144043,
      "learning_rate": 0.0002949013597382895,
      "loss": 2.8623,
      "step": 116448
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.8571691513061523,
      "learning_rate": 0.00029489726976740554,
      "loss": 3.2622,
      "step": 116449
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9308335781097412,
      "learning_rate": 0.0002948931797974702,
      "loss": 3.0558,
      "step": 116450
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9756563901901245,
      "learning_rate": 0.00029488908982848444,
      "loss": 2.7818,
      "step": 116451
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.423569440841675,
      "learning_rate": 0.0002948849998604488,
      "loss": 2.7931,
      "step": 116452
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.259406805038452,
      "learning_rate": 0.00029488090989336413,
      "loss": 2.9831,
      "step": 116453
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.040698528289795,
      "learning_rate": 0.0002948768199272312,
      "loss": 2.8766,
      "step": 116454
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1996030807495117,
      "learning_rate": 0.00029487272996205077,
      "loss": 3.1513,
      "step": 116455
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.163353204727173,
      "learning_rate": 0.00029486863999782367,
      "loss": 3.09,
      "step": 116456
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2597081661224365,
      "learning_rate": 0.00029486455003455055,
      "loss": 2.9972,
      "step": 116457
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7955557107925415,
      "learning_rate": 0.0002948604600722322,
      "loss": 2.7731,
      "step": 116458
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8655626773834229,
      "learning_rate": 0.00029485637011086933,
      "loss": 2.9966,
      "step": 116459
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2491393089294434,
      "learning_rate": 0.0002948522801504628,
      "loss": 2.9143,
      "step": 116460
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1129865646362305,
      "learning_rate": 0.0002948481901910133,
      "loss": 2.9231,
      "step": 116461
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1469004154205322,
      "learning_rate": 0.0002948441002325216,
      "loss": 3.087,
      "step": 116462
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7247035503387451,
      "learning_rate": 0.0002948400102749887,
      "loss": 2.7792,
      "step": 116463
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3548080921173096,
      "learning_rate": 0.00029483592031841487,
      "loss": 2.9745,
      "step": 116464
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0039634704589844,
      "learning_rate": 0.00029483183036280126,
      "loss": 3.0221,
      "step": 116465
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9157954454421997,
      "learning_rate": 0.0002948277404081485,
      "loss": 2.9864,
      "step": 116466
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0470008850097656,
      "learning_rate": 0.00029482365045445727,
      "loss": 3.0511,
      "step": 116467
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7739629745483398,
      "learning_rate": 0.0002948195605017285,
      "loss": 3.0738,
      "step": 116468
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3087351322174072,
      "learning_rate": 0.00029481547054996296,
      "loss": 3.0018,
      "step": 116469
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3156256675720215,
      "learning_rate": 0.00029481138059916126,
      "loss": 3.0256,
      "step": 116470
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9850574731826782,
      "learning_rate": 0.0002948072906493241,
      "loss": 3.0302,
      "step": 116471
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.28228759765625,
      "learning_rate": 0.0002948032007004525,
      "loss": 2.8968,
      "step": 116472
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.402651786804199,
      "learning_rate": 0.000294799110752547,
      "loss": 3.2006,
      "step": 116473
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0553534030914307,
      "learning_rate": 0.00029479502080560847,
      "loss": 3.204,
      "step": 116474
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.185194969177246,
      "learning_rate": 0.00029479093085963776,
      "loss": 2.9796,
      "step": 116475
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.038825511932373,
      "learning_rate": 0.0002947868409146354,
      "loss": 2.8431,
      "step": 116476
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4463560581207275,
      "learning_rate": 0.0002947827509706022,
      "loss": 2.7416,
      "step": 116477
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8915599584579468,
      "learning_rate": 0.00029477866102753906,
      "loss": 2.9979,
      "step": 116478
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1398890018463135,
      "learning_rate": 0.00029477457108544665,
      "loss": 3.0525,
      "step": 116479
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9778001308441162,
      "learning_rate": 0.00029477048114432574,
      "loss": 3.0008,
      "step": 116480
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8729106187820435,
      "learning_rate": 0.00029476639120417725,
      "loss": 2.5923,
      "step": 116481
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.283156394958496,
      "learning_rate": 0.00029476230126500167,
      "loss": 2.8898,
      "step": 116482
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.095296621322632,
      "learning_rate": 0.0002947582113267998,
      "loss": 2.6833,
      "step": 116483
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.161456823348999,
      "learning_rate": 0.00029475412138957254,
      "loss": 2.6311,
      "step": 116484
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3457705974578857,
      "learning_rate": 0.00029475003145332056,
      "loss": 3.131,
      "step": 116485
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9702117443084717,
      "learning_rate": 0.00029474594151804464,
      "loss": 3.1327,
      "step": 116486
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8591384887695312,
      "learning_rate": 0.0002947418515837457,
      "loss": 2.8655,
      "step": 116487
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.290811538696289,
      "learning_rate": 0.00029473776165042426,
      "loss": 2.8227,
      "step": 116488
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.66071617603302,
      "learning_rate": 0.0002947336717180811,
      "loss": 2.9869,
      "step": 116489
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.8566436767578125,
      "learning_rate": 0.0002947295817867171,
      "loss": 2.9774,
      "step": 116490
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.0624799728393555,
      "learning_rate": 0.0002947254918563329,
      "loss": 3.038,
      "step": 116491
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.3125953674316406,
      "learning_rate": 0.0002947214019269294,
      "loss": 2.8881,
      "step": 116492
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.047347068786621,
      "learning_rate": 0.0002947173119985074,
      "loss": 2.9277,
      "step": 116493
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.2563860416412354,
      "learning_rate": 0.0002947132220710674,
      "loss": 3.0816,
      "step": 116494
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.16951322555542,
      "learning_rate": 0.00029470913214461034,
      "loss": 2.972,
      "step": 116495
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.032682418823242,
      "learning_rate": 0.00029470504221913696,
      "loss": 3.074,
      "step": 116496
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.783738374710083,
      "learning_rate": 0.000294700952294648,
      "loss": 3.2211,
      "step": 116497
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.476451873779297,
      "learning_rate": 0.0002946968623711442,
      "loss": 3.0059,
      "step": 116498
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3310654163360596,
      "learning_rate": 0.0002946927724486265,
      "loss": 2.793,
      "step": 116499
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9733620882034302,
      "learning_rate": 0.00029468868252709537,
      "loss": 3.1927,
      "step": 116500
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2163548469543457,
      "learning_rate": 0.00029468459260655175,
      "loss": 3.0331,
      "step": 116501
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4417457580566406,
      "learning_rate": 0.0002946805026869964,
      "loss": 2.5932,
      "step": 116502
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.863629937171936,
      "learning_rate": 0.00029467641276842996,
      "loss": 2.944,
      "step": 116503
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6902366876602173,
      "learning_rate": 0.0002946723228508533,
      "loss": 2.9165,
      "step": 116504
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2145142555236816,
      "learning_rate": 0.0002946682329342672,
      "loss": 2.8908,
      "step": 116505
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.036503553390503,
      "learning_rate": 0.00029466414301867246,
      "loss": 3.0056,
      "step": 116506
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.8209142684936523,
      "learning_rate": 0.0002946600531040696,
      "loss": 3.1033,
      "step": 116507
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.5516091585159302,
      "learning_rate": 0.00029465596319045963,
      "loss": 2.8894,
      "step": 116508
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.383723258972168,
      "learning_rate": 0.00029465187327784315,
      "loss": 2.7292,
      "step": 116509
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.561262845993042,
      "learning_rate": 0.000294647783366221,
      "loss": 2.799,
      "step": 116510
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2069010734558105,
      "learning_rate": 0.0002946436934555939,
      "loss": 3.1338,
      "step": 116511
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3168070316314697,
      "learning_rate": 0.00029463960354596285,
      "loss": 2.69,
      "step": 116512
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9886404275894165,
      "learning_rate": 0.0002946355136373282,
      "loss": 3.0322,
      "step": 116513
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.0550553798675537,
      "learning_rate": 0.00029463142372969093,
      "loss": 2.7936,
      "step": 116514
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7836414575576782,
      "learning_rate": 0.00029462733382305177,
      "loss": 2.8346,
      "step": 116515
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9697439670562744,
      "learning_rate": 0.00029462324391741154,
      "loss": 2.978,
      "step": 116516
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.097888708114624,
      "learning_rate": 0.0002946191540127709,
      "loss": 2.8614,
      "step": 116517
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9655020236968994,
      "learning_rate": 0.0002946150641091307,
      "loss": 3.2066,
      "step": 116518
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.03944993019104,
      "learning_rate": 0.00029461097420649174,
      "loss": 3.1311,
      "step": 116519
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.966738224029541,
      "learning_rate": 0.00029460688430485454,
      "loss": 3.2627,
      "step": 116520
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3465723991394043,
      "learning_rate": 0.00029460279440422007,
      "loss": 2.9785,
      "step": 116521
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8345873355865479,
      "learning_rate": 0.0002945987045045891,
      "loss": 2.9865,
      "step": 116522
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.022928476333618,
      "learning_rate": 0.0002945946146059623,
      "loss": 2.9653,
      "step": 116523
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9189921617507935,
      "learning_rate": 0.0002945905247083404,
      "loss": 2.8368,
      "step": 116524
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.6325860023498535,
      "learning_rate": 0.0002945864348117243,
      "loss": 2.8335,
      "step": 116525
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.5207581520080566,
      "learning_rate": 0.0002945823449161147,
      "loss": 3.0243,
      "step": 116526
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7927404642105103,
      "learning_rate": 0.00029457825502151236,
      "loss": 3.0394,
      "step": 116527
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.9046788215637207,
      "learning_rate": 0.00029457416512791793,
      "loss": 2.9235,
      "step": 116528
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.564906120300293,
      "learning_rate": 0.00029457007523533225,
      "loss": 3.1341,
      "step": 116529
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.206285238265991,
      "learning_rate": 0.00029456598534375624,
      "loss": 2.9764,
      "step": 116530
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4382431507110596,
      "learning_rate": 0.0002945618954531904,
      "loss": 2.8707,
      "step": 116531
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9335733652114868,
      "learning_rate": 0.00029455780556363563,
      "loss": 2.9226,
      "step": 116532
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0039522647857666,
      "learning_rate": 0.0002945537156750927,
      "loss": 2.8147,
      "step": 116533
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8567217588424683,
      "learning_rate": 0.00029454962578756236,
      "loss": 2.9553,
      "step": 116534
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7333587408065796,
      "learning_rate": 0.0002945455359010452,
      "loss": 3.1933,
      "step": 116535
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9045566320419312,
      "learning_rate": 0.0002945414460155423,
      "loss": 3.0194,
      "step": 116536
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7596685886383057,
      "learning_rate": 0.00029453735613105416,
      "loss": 2.9512,
      "step": 116537
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.834608793258667,
      "learning_rate": 0.00029453326624758163,
      "loss": 3.0227,
      "step": 116538
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2316484451293945,
      "learning_rate": 0.0002945291763651255,
      "loss": 3.1401,
      "step": 116539
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0440597534179688,
      "learning_rate": 0.00029452508648368644,
      "loss": 3.0702,
      "step": 116540
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.229865312576294,
      "learning_rate": 0.00029452099660326535,
      "loss": 2.9625,
      "step": 116541
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.5518991947174072,
      "learning_rate": 0.00029451690672386297,
      "loss": 2.9417,
      "step": 116542
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9996005296707153,
      "learning_rate": 0.00029451281684547985,
      "loss": 2.9951,
      "step": 116543
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0190536975860596,
      "learning_rate": 0.00029450872696811696,
      "loss": 3.0402,
      "step": 116544
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.2003791332244873,
      "learning_rate": 0.00029450463709177496,
      "loss": 2.9894,
      "step": 116545
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8210341930389404,
      "learning_rate": 0.00029450054721645466,
      "loss": 2.9898,
      "step": 116546
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1078262329101562,
      "learning_rate": 0.0002944964573421568,
      "loss": 2.8691,
      "step": 116547
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2298834323883057,
      "learning_rate": 0.00029449236746888235,
      "loss": 3.1052,
      "step": 116548
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.54032826423645,
      "learning_rate": 0.0002944882775966317,
      "loss": 3.0434,
      "step": 116549
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7317328453063965,
      "learning_rate": 0.00029448418772540577,
      "loss": 2.9188,
      "step": 116550
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.069899320602417,
      "learning_rate": 0.00029448009785520534,
      "loss": 3.0181,
      "step": 116551
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1327548027038574,
      "learning_rate": 0.00029447600798603117,
      "loss": 3.0633,
      "step": 116552
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3617465496063232,
      "learning_rate": 0.000294471918117884,
      "loss": 3.1405,
      "step": 116553
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9585059881210327,
      "learning_rate": 0.00029446782825076476,
      "loss": 2.8208,
      "step": 116554
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.5281929969787598,
      "learning_rate": 0.0002944637383846739,
      "loss": 2.9469,
      "step": 116555
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.113179922103882,
      "learning_rate": 0.00029445964851961237,
      "loss": 2.8845,
      "step": 116556
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.7227137088775635,
      "learning_rate": 0.0002944555586555809,
      "loss": 2.8625,
      "step": 116557
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2853755950927734,
      "learning_rate": 0.00029445146879258016,
      "loss": 2.8374,
      "step": 116558
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3776915073394775,
      "learning_rate": 0.0002944473789306111,
      "loss": 2.9129,
      "step": 116559
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.5491151809692383,
      "learning_rate": 0.0002944432890696745,
      "loss": 2.6828,
      "step": 116560
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4179391860961914,
      "learning_rate": 0.00029443919920977077,
      "loss": 2.9053,
      "step": 116561
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8487197160720825,
      "learning_rate": 0.000294435109350901,
      "loss": 2.9028,
      "step": 116562
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8623988628387451,
      "learning_rate": 0.0002944310194930658,
      "loss": 3.1246,
      "step": 116563
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7068620920181274,
      "learning_rate": 0.00029442692963626595,
      "loss": 2.9398,
      "step": 116564
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.86656653881073,
      "learning_rate": 0.0002944228397805023,
      "loss": 2.8378,
      "step": 116565
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.259763717651367,
      "learning_rate": 0.0002944187499257757,
      "loss": 3.3551,
      "step": 116566
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.862775206565857,
      "learning_rate": 0.00029441466007208657,
      "loss": 2.8594,
      "step": 116567
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2658042907714844,
      "learning_rate": 0.00029441057021943585,
      "loss": 2.9161,
      "step": 116568
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9495986700057983,
      "learning_rate": 0.00029440648036782433,
      "loss": 3.1435,
      "step": 116569
    },
    {
      "epoch": 1.52,
      "grad_norm": 4.492377281188965,
      "learning_rate": 0.00029440239051725274,
      "loss": 2.7079,
      "step": 116570
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.164846181869507,
      "learning_rate": 0.00029439830066772186,
      "loss": 3.2163,
      "step": 116571
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1673147678375244,
      "learning_rate": 0.0002943942108192325,
      "loss": 2.9634,
      "step": 116572
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.5110722780227661,
      "learning_rate": 0.00029439012097178544,
      "loss": 3.1891,
      "step": 116573
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8488644361495972,
      "learning_rate": 0.0002943860311253812,
      "loss": 2.8809,
      "step": 116574
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9559837579727173,
      "learning_rate": 0.00029438194128002075,
      "loss": 2.8249,
      "step": 116575
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6873973608016968,
      "learning_rate": 0.0002943778514357048,
      "loss": 3.1025,
      "step": 116576
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3832755088806152,
      "learning_rate": 0.0002943737615924341,
      "loss": 3.0579,
      "step": 116577
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.123476982116699,
      "learning_rate": 0.0002943696717502094,
      "loss": 2.899,
      "step": 116578
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2730510234832764,
      "learning_rate": 0.00029436558190903164,
      "loss": 2.9128,
      "step": 116579
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4242639541625977,
      "learning_rate": 0.0002943614920689013,
      "loss": 3.0413,
      "step": 116580
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7912460565567017,
      "learning_rate": 0.00029435740222981925,
      "loss": 3.0928,
      "step": 116581
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.081772565841675,
      "learning_rate": 0.0002943533123917863,
      "loss": 3.1526,
      "step": 116582
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8970975875854492,
      "learning_rate": 0.00029434922255480316,
      "loss": 2.9691,
      "step": 116583
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1054043769836426,
      "learning_rate": 0.0002943451327188706,
      "loss": 2.7119,
      "step": 116584
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.125518560409546,
      "learning_rate": 0.0002943410428839895,
      "loss": 2.8631,
      "step": 116585
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9800159931182861,
      "learning_rate": 0.0002943369530501604,
      "loss": 2.9282,
      "step": 116586
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.009654998779297,
      "learning_rate": 0.00029433286321738414,
      "loss": 2.9045,
      "step": 116587
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6458557844161987,
      "learning_rate": 0.0002943287733856615,
      "loss": 3.0349,
      "step": 116588
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.67754864692688,
      "learning_rate": 0.0002943246835549933,
      "loss": 2.8358,
      "step": 116589
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9413340091705322,
      "learning_rate": 0.0002943205937253802,
      "loss": 2.8847,
      "step": 116590
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7213550806045532,
      "learning_rate": 0.0002943165038968232,
      "loss": 3.0541,
      "step": 116591
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9787170886993408,
      "learning_rate": 0.0002943124140693227,
      "loss": 2.9998,
      "step": 116592
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7134168148040771,
      "learning_rate": 0.00029430832424287963,
      "loss": 3.1253,
      "step": 116593
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2956933975219727,
      "learning_rate": 0.00029430423441749475,
      "loss": 2.8883,
      "step": 116594
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9021092653274536,
      "learning_rate": 0.0002943001445931688,
      "loss": 3.0942,
      "step": 116595
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9263339042663574,
      "learning_rate": 0.0002942960547699026,
      "loss": 2.7673,
      "step": 116596
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2620108127593994,
      "learning_rate": 0.000294291964947697,
      "loss": 2.8221,
      "step": 116597
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.5406370162963867,
      "learning_rate": 0.00029428787512655245,
      "loss": 3.0277,
      "step": 116598
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7138983011245728,
      "learning_rate": 0.0002942837853064699,
      "loss": 3.2086,
      "step": 116599
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9914157390594482,
      "learning_rate": 0.0002942796954874502,
      "loss": 2.9186,
      "step": 116600
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4667563438415527,
      "learning_rate": 0.00029427560566949387,
      "loss": 3.0648,
      "step": 116601
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4810023307800293,
      "learning_rate": 0.00029427151585260193,
      "loss": 2.8272,
      "step": 116602
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.095001697540283,
      "learning_rate": 0.00029426742603677505,
      "loss": 2.8144,
      "step": 116603
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7278337478637695,
      "learning_rate": 0.00029426333622201395,
      "loss": 3.1196,
      "step": 116604
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.120757818222046,
      "learning_rate": 0.00029425924640831937,
      "loss": 2.7373,
      "step": 116605
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.245739221572876,
      "learning_rate": 0.00029425515659569203,
      "loss": 2.9709,
      "step": 116606
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1281397342681885,
      "learning_rate": 0.00029425106678413284,
      "loss": 2.9525,
      "step": 116607
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8453885316848755,
      "learning_rate": 0.00029424697697364246,
      "loss": 2.8253,
      "step": 116608
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8067145347595215,
      "learning_rate": 0.00029424288716422176,
      "loss": 2.9119,
      "step": 116609
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.792860984802246,
      "learning_rate": 0.00029423879735587127,
      "loss": 2.9501,
      "step": 116610
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.349292755126953,
      "learning_rate": 0.0002942347075485921,
      "loss": 2.8484,
      "step": 116611
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.5011186599731445,
      "learning_rate": 0.0002942306177423846,
      "loss": 2.946,
      "step": 116612
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9147634506225586,
      "learning_rate": 0.0002942265279372498,
      "loss": 2.7715,
      "step": 116613
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7985496520996094,
      "learning_rate": 0.00029422243813318844,
      "loss": 2.8733,
      "step": 116614
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7615368366241455,
      "learning_rate": 0.0002942183483302012,
      "loss": 3.1928,
      "step": 116615
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.029911994934082,
      "learning_rate": 0.0002942142585282889,
      "loss": 3.1263,
      "step": 116616
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.81936776638031,
      "learning_rate": 0.00029421016872745225,
      "loss": 3.0702,
      "step": 116617
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1776816844940186,
      "learning_rate": 0.0002942060789276921,
      "loss": 2.7085,
      "step": 116618
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8756681680679321,
      "learning_rate": 0.0002942019891290091,
      "loss": 3.0097,
      "step": 116619
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.682110071182251,
      "learning_rate": 0.00029419789933140405,
      "loss": 3.0739,
      "step": 116620
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6903895139694214,
      "learning_rate": 0.0002941938095348778,
      "loss": 3.0873,
      "step": 116621
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7916209697723389,
      "learning_rate": 0.00029418971973943096,
      "loss": 2.8128,
      "step": 116622
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8903859853744507,
      "learning_rate": 0.00029418562994506435,
      "loss": 3.0522,
      "step": 116623
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8902020454406738,
      "learning_rate": 0.00029418154015177874,
      "loss": 3.0193,
      "step": 116624
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3211073875427246,
      "learning_rate": 0.00029417745035957495,
      "loss": 3.0926,
      "step": 116625
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.7631804943084717,
      "learning_rate": 0.0002941733605684536,
      "loss": 2.8268,
      "step": 116626
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.542487859725952,
      "learning_rate": 0.0002941692707784157,
      "loss": 3.0226,
      "step": 116627
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.145164966583252,
      "learning_rate": 0.0002941651809894617,
      "loss": 3.1634,
      "step": 116628
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.960784673690796,
      "learning_rate": 0.0002941610912015925,
      "loss": 2.8441,
      "step": 116629
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9205445051193237,
      "learning_rate": 0.00029415700141480883,
      "loss": 2.9833,
      "step": 116630
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.229029417037964,
      "learning_rate": 0.00029415291162911155,
      "loss": 3.4737,
      "step": 116631
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8612183332443237,
      "learning_rate": 0.00029414882184450135,
      "loss": 2.849,
      "step": 116632
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8407223224639893,
      "learning_rate": 0.00029414473206097904,
      "loss": 3.0879,
      "step": 116633
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.398758888244629,
      "learning_rate": 0.0002941406422785453,
      "loss": 2.7991,
      "step": 116634
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.122471332550049,
      "learning_rate": 0.0002941365524972009,
      "loss": 2.7029,
      "step": 116635
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.384016752243042,
      "learning_rate": 0.0002941324627169466,
      "loss": 2.8166,
      "step": 116636
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.3288700580596924,
      "learning_rate": 0.0002941283729377832,
      "loss": 3.0133,
      "step": 116637
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9542350769042969,
      "learning_rate": 0.00029412428315971144,
      "loss": 3.0083,
      "step": 116638
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.272691249847412,
      "learning_rate": 0.00029412019338273207,
      "loss": 2.8233,
      "step": 116639
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6058828830718994,
      "learning_rate": 0.00029411610360684606,
      "loss": 2.9657,
      "step": 116640
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.605130910873413,
      "learning_rate": 0.0002941120138320538,
      "loss": 2.8617,
      "step": 116641
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0460286140441895,
      "learning_rate": 0.00029410792405835625,
      "loss": 2.9937,
      "step": 116642
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9869364500045776,
      "learning_rate": 0.00029410383428575413,
      "loss": 3.006,
      "step": 116643
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.69657564163208,
      "learning_rate": 0.0002940997445142482,
      "loss": 3.0668,
      "step": 116644
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6448354721069336,
      "learning_rate": 0.00029409565474383925,
      "loss": 3.0961,
      "step": 116645
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.103137493133545,
      "learning_rate": 0.00029409156497452817,
      "loss": 2.8719,
      "step": 116646
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8760789632797241,
      "learning_rate": 0.0002940874752063155,
      "loss": 2.9557,
      "step": 116647
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8749059438705444,
      "learning_rate": 0.000294083385439202,
      "loss": 2.9703,
      "step": 116648
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8232654333114624,
      "learning_rate": 0.00029407929567318856,
      "loss": 2.8846,
      "step": 116649
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8203656673431396,
      "learning_rate": 0.00029407520590827587,
      "loss": 3.0728,
      "step": 116650
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8223514556884766,
      "learning_rate": 0.00029407111614446473,
      "loss": 3.0825,
      "step": 116651
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.691756248474121,
      "learning_rate": 0.000294067026381756,
      "loss": 3.2123,
      "step": 116652
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.875962734222412,
      "learning_rate": 0.0002940629366201502,
      "loss": 3.194,
      "step": 116653
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9894899129867554,
      "learning_rate": 0.00029405884685964814,
      "loss": 2.9515,
      "step": 116654
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9598616361618042,
      "learning_rate": 0.00029405475710025074,
      "loss": 2.8933,
      "step": 116655
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.16555118560791,
      "learning_rate": 0.0002940506673419587,
      "loss": 2.9614,
      "step": 116656
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7718584537506104,
      "learning_rate": 0.00029404657758477266,
      "loss": 3.1332,
      "step": 116657
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.24745512008667,
      "learning_rate": 0.0002940424878286936,
      "loss": 3.1999,
      "step": 116658
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.232508659362793,
      "learning_rate": 0.0002940383980737221,
      "loss": 2.9261,
      "step": 116659
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3526816368103027,
      "learning_rate": 0.00029403430831985894,
      "loss": 3.1745,
      "step": 116660
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4175002574920654,
      "learning_rate": 0.00029403021856710493,
      "loss": 3.1535,
      "step": 116661
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.1078107357025146,
      "learning_rate": 0.00029402612881546076,
      "loss": 3.0592,
      "step": 116662
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.2443695068359375,
      "learning_rate": 0.0002940220390649273,
      "loss": 2.8954,
      "step": 116663
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9493385553359985,
      "learning_rate": 0.00029401794931550537,
      "loss": 3.1708,
      "step": 116664
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.9366989135742188,
      "learning_rate": 0.00029401385956719545,
      "loss": 3.0056,
      "step": 116665
    },
    {
      "epoch": 1.52,
      "grad_norm": 4.527834892272949,
      "learning_rate": 0.0002940097698199985,
      "loss": 2.9141,
      "step": 116666
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.7351930141448975,
      "learning_rate": 0.0002940056800739153,
      "loss": 2.7944,
      "step": 116667
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.913285493850708,
      "learning_rate": 0.0002940015903289465,
      "loss": 3.0645,
      "step": 116668
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.485830783843994,
      "learning_rate": 0.0002939975005850929,
      "loss": 2.9865,
      "step": 116669
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.449082612991333,
      "learning_rate": 0.00029399341084235546,
      "loss": 3.1017,
      "step": 116670
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.949834108352661,
      "learning_rate": 0.0002939893211007346,
      "loss": 2.9793,
      "step": 116671
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9984898567199707,
      "learning_rate": 0.00029398523136023124,
      "loss": 3.138,
      "step": 116672
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7662461996078491,
      "learning_rate": 0.00029398114162084614,
      "loss": 2.8403,
      "step": 116673
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.6283273696899414,
      "learning_rate": 0.00029397705188258005,
      "loss": 2.8609,
      "step": 116674
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.397758960723877,
      "learning_rate": 0.00029397296214543373,
      "loss": 2.979,
      "step": 116675
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1737794876098633,
      "learning_rate": 0.0002939688724094081,
      "loss": 2.9391,
      "step": 116676
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.961787462234497,
      "learning_rate": 0.0002939647826745036,
      "loss": 2.8851,
      "step": 116677
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0661377906799316,
      "learning_rate": 0.00029396069294072125,
      "loss": 3.033,
      "step": 116678
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1654837131500244,
      "learning_rate": 0.0002939566032080617,
      "loss": 2.9137,
      "step": 116679
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.173630714416504,
      "learning_rate": 0.00029395251347652567,
      "loss": 2.9443,
      "step": 116680
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.8097944259643555,
      "learning_rate": 0.00029394842374611403,
      "loss": 3.1251,
      "step": 116681
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8781712055206299,
      "learning_rate": 0.0002939443340168276,
      "loss": 3.0938,
      "step": 116682
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9795005321502686,
      "learning_rate": 0.0002939402442886669,
      "loss": 2.937,
      "step": 116683
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9818311929702759,
      "learning_rate": 0.0002939361545616328,
      "loss": 2.9584,
      "step": 116684
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1605606079101562,
      "learning_rate": 0.00029393206483572615,
      "loss": 3.0961,
      "step": 116685
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.869659185409546,
      "learning_rate": 0.0002939279751109476,
      "loss": 3.2428,
      "step": 116686
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0675458908081055,
      "learning_rate": 0.00029392388538729795,
      "loss": 2.999,
      "step": 116687
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.5372400283813477,
      "learning_rate": 0.0002939197956647781,
      "loss": 3.0416,
      "step": 116688
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8897534608840942,
      "learning_rate": 0.0002939157059433886,
      "loss": 2.8007,
      "step": 116689
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.062471389770508,
      "learning_rate": 0.0002939116162231302,
      "loss": 2.9114,
      "step": 116690
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.018024444580078,
      "learning_rate": 0.0002939075265040038,
      "loss": 2.9622,
      "step": 116691
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.5712733268737793,
      "learning_rate": 0.0002939034367860101,
      "loss": 3.1527,
      "step": 116692
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.478515148162842,
      "learning_rate": 0.00029389934706914987,
      "loss": 3.2808,
      "step": 116693
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1753287315368652,
      "learning_rate": 0.0002938952573534239,
      "loss": 3.1154,
      "step": 116694
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.2660367488861084,
      "learning_rate": 0.00029389116763883295,
      "loss": 2.7447,
      "step": 116695
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1706182956695557,
      "learning_rate": 0.00029388707792537763,
      "loss": 2.8701,
      "step": 116696
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6821314096450806,
      "learning_rate": 0.0002938829882130589,
      "loss": 2.9088,
      "step": 116697
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8043416738510132,
      "learning_rate": 0.00029387889850187736,
      "loss": 3.0755,
      "step": 116698
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.0441653728485107,
      "learning_rate": 0.00029387480879183387,
      "loss": 3.0661,
      "step": 116699
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8671101331710815,
      "learning_rate": 0.00029387071908292927,
      "loss": 2.8876,
      "step": 116700
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.219207525253296,
      "learning_rate": 0.0002938666293751641,
      "loss": 3.1767,
      "step": 116701
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8583836555480957,
      "learning_rate": 0.0002938625396685393,
      "loss": 2.9934,
      "step": 116702
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8862148523330688,
      "learning_rate": 0.00029385844996305556,
      "loss": 2.8939,
      "step": 116703
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.5121095180511475,
      "learning_rate": 0.0002938543602587136,
      "loss": 3.1951,
      "step": 116704
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.7721028327941895,
      "learning_rate": 0.00029385027055551426,
      "loss": 3.1696,
      "step": 116705
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4622080326080322,
      "learning_rate": 0.0002938461808534583,
      "loss": 2.7606,
      "step": 116706
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.3935434818267822,
      "learning_rate": 0.0002938420911525465,
      "loss": 2.7288,
      "step": 116707
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0488719940185547,
      "learning_rate": 0.00029383800145277945,
      "loss": 2.9703,
      "step": 116708
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4640605449676514,
      "learning_rate": 0.0002938339117541581,
      "loss": 3.1664,
      "step": 116709
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1767466068267822,
      "learning_rate": 0.00029382982205668315,
      "loss": 3.2279,
      "step": 116710
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.155130624771118,
      "learning_rate": 0.00029382573236035536,
      "loss": 2.9299,
      "step": 116711
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.703197717666626,
      "learning_rate": 0.0002938216426651754,
      "loss": 2.8288,
      "step": 116712
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1253368854522705,
      "learning_rate": 0.0002938175529711442,
      "loss": 3.0635,
      "step": 116713
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1681153774261475,
      "learning_rate": 0.0002938134632782624,
      "loss": 2.9248,
      "step": 116714
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.030904769897461,
      "learning_rate": 0.0002938093735865308,
      "loss": 2.921,
      "step": 116715
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1462020874023438,
      "learning_rate": 0.00029380528389595016,
      "loss": 3.1157,
      "step": 116716
    },
    {
      "epoch": 1.52,
      "grad_norm": 4.072429656982422,
      "learning_rate": 0.00029380119420652125,
      "loss": 2.9435,
      "step": 116717
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6371760368347168,
      "learning_rate": 0.00029379710451824475,
      "loss": 2.7915,
      "step": 116718
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.400616407394409,
      "learning_rate": 0.0002937930148311216,
      "loss": 2.7207,
      "step": 116719
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9981831312179565,
      "learning_rate": 0.00029378892514515234,
      "loss": 3.0794,
      "step": 116720
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0883824825286865,
      "learning_rate": 0.00029378483546033785,
      "loss": 2.581,
      "step": 116721
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7484228610992432,
      "learning_rate": 0.0002937807457766789,
      "loss": 3.0646,
      "step": 116722
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.813332438468933,
      "learning_rate": 0.0002937766560941762,
      "loss": 2.7487,
      "step": 116723
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7473779916763306,
      "learning_rate": 0.0002937725664128306,
      "loss": 2.9627,
      "step": 116724
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.021538019180298,
      "learning_rate": 0.0002937684767326428,
      "loss": 3.0719,
      "step": 116725
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0902435779571533,
      "learning_rate": 0.0002937643870536135,
      "loss": 2.9909,
      "step": 116726
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.767774224281311,
      "learning_rate": 0.0002937602973757435,
      "loss": 2.93,
      "step": 116727
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.846665859222412,
      "learning_rate": 0.0002937562076990336,
      "loss": 2.9175,
      "step": 116728
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2506375312805176,
      "learning_rate": 0.0002937521180234846,
      "loss": 3.035,
      "step": 116729
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.8900821208953857,
      "learning_rate": 0.0002937480283490971,
      "loss": 2.9744,
      "step": 116730
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2622971534729004,
      "learning_rate": 0.0002937439386758721,
      "loss": 3.0281,
      "step": 116731
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9463040828704834,
      "learning_rate": 0.0002937398490038101,
      "loss": 3.0626,
      "step": 116732
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.901589274406433,
      "learning_rate": 0.000293735759332912,
      "loss": 2.7756,
      "step": 116733
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.000056743621826,
      "learning_rate": 0.00029373166966317857,
      "loss": 2.935,
      "step": 116734
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0915582180023193,
      "learning_rate": 0.0002937275799946105,
      "loss": 3.0769,
      "step": 116735
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.154507637023926,
      "learning_rate": 0.00029372349032720854,
      "loss": 3.1186,
      "step": 116736
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.862210988998413,
      "learning_rate": 0.0002937194006609737,
      "loss": 3.0072,
      "step": 116737
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7524974346160889,
      "learning_rate": 0.0002937153109959064,
      "loss": 2.951,
      "step": 116738
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.233287811279297,
      "learning_rate": 0.00029371122133200757,
      "loss": 3.1391,
      "step": 116739
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.6730546951293945,
      "learning_rate": 0.0002937071316692779,
      "loss": 2.9727,
      "step": 116740
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1866772174835205,
      "learning_rate": 0.0002937030420077182,
      "loss": 3.1398,
      "step": 116741
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.630652666091919,
      "learning_rate": 0.0002936989523473292,
      "loss": 2.9968,
      "step": 116742
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8639930486679077,
      "learning_rate": 0.00029369486268811186,
      "loss": 2.9943,
      "step": 116743
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7673754692077637,
      "learning_rate": 0.00029369077303006665,
      "loss": 2.977,
      "step": 116744
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2991788387298584,
      "learning_rate": 0.0002936866833731944,
      "loss": 2.9791,
      "step": 116745
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7207353115081787,
      "learning_rate": 0.00029368259371749593,
      "loss": 2.993,
      "step": 116746
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.9018235206604004,
      "learning_rate": 0.00029367850406297193,
      "loss": 3.2826,
      "step": 116747
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.856735348701477,
      "learning_rate": 0.00029367441440962327,
      "loss": 3.0214,
      "step": 116748
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.802756428718567,
      "learning_rate": 0.0002936703247574508,
      "loss": 2.8133,
      "step": 116749
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.2391161918640137,
      "learning_rate": 0.000293666235106455,
      "loss": 3.1123,
      "step": 116750
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1596250534057617,
      "learning_rate": 0.00029366214545663673,
      "loss": 3.0747,
      "step": 116751
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7920951843261719,
      "learning_rate": 0.00029365805580799687,
      "loss": 3.0119,
      "step": 116752
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.6645452976226807,
      "learning_rate": 0.000293653966160536,
      "loss": 3.093,
      "step": 116753
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7947542667388916,
      "learning_rate": 0.000293649876514255,
      "loss": 3.05,
      "step": 116754
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9990592002868652,
      "learning_rate": 0.0002936457868691548,
      "loss": 2.935,
      "step": 116755
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2537405490875244,
      "learning_rate": 0.00029364169722523575,
      "loss": 2.9852,
      "step": 116756
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.8377811908721924,
      "learning_rate": 0.00029363760758249893,
      "loss": 2.9466,
      "step": 116757
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.15789794921875,
      "learning_rate": 0.0002936335179409449,
      "loss": 3.2206,
      "step": 116758
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.962985634803772,
      "learning_rate": 0.00029362942830057453,
      "loss": 3.173,
      "step": 116759
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.896616816520691,
      "learning_rate": 0.0002936253386613886,
      "loss": 3.157,
      "step": 116760
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.0540170669555664,
      "learning_rate": 0.000293621249023388,
      "loss": 3.0416,
      "step": 116761
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0575389862060547,
      "learning_rate": 0.00029361715938657314,
      "loss": 3.1534,
      "step": 116762
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7420384883880615,
      "learning_rate": 0.000293613069750945,
      "loss": 3.0992,
      "step": 116763
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.878014326095581,
      "learning_rate": 0.00029360898011650434,
      "loss": 2.8617,
      "step": 116764
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3205997943878174,
      "learning_rate": 0.00029360489048325185,
      "loss": 3.0114,
      "step": 116765
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3512375354766846,
      "learning_rate": 0.00029360080085118835,
      "loss": 2.9267,
      "step": 116766
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.9252989292144775,
      "learning_rate": 0.0002935967112203147,
      "loss": 3.1021,
      "step": 116767
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6312391757965088,
      "learning_rate": 0.0002935926215906314,
      "loss": 2.9776,
      "step": 116768
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.6702256202697754,
      "learning_rate": 0.0002935885319621394,
      "loss": 3.0703,
      "step": 116769
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.255120277404785,
      "learning_rate": 0.0002935844423348394,
      "loss": 2.9326,
      "step": 116770
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9834856986999512,
      "learning_rate": 0.0002935803527087321,
      "loss": 2.7919,
      "step": 116771
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6321710348129272,
      "learning_rate": 0.00029357626308381844,
      "loss": 3.2918,
      "step": 116772
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9349815845489502,
      "learning_rate": 0.00029357217346009896,
      "loss": 2.9252,
      "step": 116773
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.748033285140991,
      "learning_rate": 0.00029356808383757476,
      "loss": 3.03,
      "step": 116774
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.094774007797241,
      "learning_rate": 0.0002935639942162462,
      "loss": 3.0876,
      "step": 116775
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2960267066955566,
      "learning_rate": 0.0002935599045961142,
      "loss": 2.8835,
      "step": 116776
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9139412641525269,
      "learning_rate": 0.0002935558149771796,
      "loss": 2.966,
      "step": 116777
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8346389532089233,
      "learning_rate": 0.00029355172535944305,
      "loss": 2.994,
      "step": 116778
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.5460399389266968,
      "learning_rate": 0.0002935476357429054,
      "loss": 2.8567,
      "step": 116779
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8270777463912964,
      "learning_rate": 0.00029354354612756744,
      "loss": 2.9505,
      "step": 116780
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9506323337554932,
      "learning_rate": 0.00029353945651342976,
      "loss": 3.1195,
      "step": 116781
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.1225035190582275,
      "learning_rate": 0.00029353536690049316,
      "loss": 2.9697,
      "step": 116782
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0896754264831543,
      "learning_rate": 0.0002935312772887585,
      "loss": 2.8368,
      "step": 116783
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.129918098449707,
      "learning_rate": 0.00029352718767822655,
      "loss": 3.2183,
      "step": 116784
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4860212802886963,
      "learning_rate": 0.00029352309806889796,
      "loss": 2.9163,
      "step": 116785
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0756473541259766,
      "learning_rate": 0.0002935190084607736,
      "loss": 2.8832,
      "step": 116786
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8948945999145508,
      "learning_rate": 0.0002935149188538542,
      "loss": 2.7767,
      "step": 116787
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.85537588596344,
      "learning_rate": 0.00029351082924814045,
      "loss": 3.0293,
      "step": 116788
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3329598903656006,
      "learning_rate": 0.00029350673964363314,
      "loss": 2.9614,
      "step": 116789
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7589370012283325,
      "learning_rate": 0.00029350265004033304,
      "loss": 2.8307,
      "step": 116790
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7333073616027832,
      "learning_rate": 0.0002934985604382409,
      "loss": 2.8789,
      "step": 116791
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7912721633911133,
      "learning_rate": 0.00029349447083735765,
      "loss": 2.6744,
      "step": 116792
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8341165781021118,
      "learning_rate": 0.00029349038123768377,
      "loss": 2.982,
      "step": 116793
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0716214179992676,
      "learning_rate": 0.0002934862916392201,
      "loss": 3.046,
      "step": 116794
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0182533264160156,
      "learning_rate": 0.00029348220204196765,
      "loss": 3.0559,
      "step": 116795
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7957737445831299,
      "learning_rate": 0.0002934781124459268,
      "loss": 3.2478,
      "step": 116796
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.7956202030181885,
      "learning_rate": 0.00029347402285109857,
      "loss": 2.975,
      "step": 116797
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0525197982788086,
      "learning_rate": 0.0002934699332574837,
      "loss": 3.108,
      "step": 116798
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6955204010009766,
      "learning_rate": 0.0002934658436650828,
      "loss": 2.8232,
      "step": 116799
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.7476868629455566,
      "learning_rate": 0.0002934617540738967,
      "loss": 2.8368,
      "step": 116800
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8861409425735474,
      "learning_rate": 0.0002934576644839262,
      "loss": 2.9497,
      "step": 116801
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.263200283050537,
      "learning_rate": 0.00029345357489517215,
      "loss": 3.067,
      "step": 116802
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.071164131164551,
      "learning_rate": 0.0002934494853076351,
      "loss": 3.1764,
      "step": 116803
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.5969741344451904,
      "learning_rate": 0.000293445395721316,
      "loss": 2.7989,
      "step": 116804
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.161264419555664,
      "learning_rate": 0.0002934413061362154,
      "loss": 2.8982,
      "step": 116805
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9838722944259644,
      "learning_rate": 0.0002934372165523343,
      "loss": 2.9976,
      "step": 116806
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.860181450843811,
      "learning_rate": 0.0002934331269696732,
      "loss": 3.0796,
      "step": 116807
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0686399936676025,
      "learning_rate": 0.00029342903738823313,
      "loss": 3.0324,
      "step": 116808
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.074918031692505,
      "learning_rate": 0.0002934249478080147,
      "loss": 3.2799,
      "step": 116809
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.977979302406311,
      "learning_rate": 0.0002934208582290188,
      "loss": 3.0818,
      "step": 116810
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3983633518218994,
      "learning_rate": 0.0002934167686512459,
      "loss": 2.9394,
      "step": 116811
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9883935451507568,
      "learning_rate": 0.00029341267907469703,
      "loss": 3.0028,
      "step": 116812
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7767030000686646,
      "learning_rate": 0.00029340858949937287,
      "loss": 3.1352,
      "step": 116813
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.858503818511963,
      "learning_rate": 0.0002934044999252741,
      "loss": 3.0746,
      "step": 116814
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0217041969299316,
      "learning_rate": 0.00029340041035240167,
      "loss": 2.9622,
      "step": 116815
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9030125141143799,
      "learning_rate": 0.0002933963207807563,
      "loss": 2.8957,
      "step": 116816
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9330813884735107,
      "learning_rate": 0.00029339223121033854,
      "loss": 2.8857,
      "step": 116817
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.29945707321167,
      "learning_rate": 0.0002933881416411493,
      "loss": 2.9508,
      "step": 116818
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0552256107330322,
      "learning_rate": 0.00029338405207318934,
      "loss": 2.9502,
      "step": 116819
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8909364938735962,
      "learning_rate": 0.0002933799625064594,
      "loss": 2.9261,
      "step": 116820
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.335844039916992,
      "learning_rate": 0.0002933758729409602,
      "loss": 3.3275,
      "step": 116821
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9355419874191284,
      "learning_rate": 0.0002933717833766928,
      "loss": 3.1602,
      "step": 116822
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3411221504211426,
      "learning_rate": 0.00029336769381365746,
      "loss": 3.1484,
      "step": 116823
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.182499647140503,
      "learning_rate": 0.00029336360425185525,
      "loss": 2.71,
      "step": 116824
    },
    {
      "epoch": 1.52,
      "grad_norm": 5.67221736907959,
      "learning_rate": 0.00029335951469128686,
      "loss": 2.8994,
      "step": 116825
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.637930393218994,
      "learning_rate": 0.00029335542513195305,
      "loss": 2.9887,
      "step": 116826
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.154872417449951,
      "learning_rate": 0.0002933513355738546,
      "loss": 3.0133,
      "step": 116827
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8369086980819702,
      "learning_rate": 0.0002933472460169924,
      "loss": 3.1429,
      "step": 116828
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.2048866748809814,
      "learning_rate": 0.0002933431564613669,
      "loss": 2.9477,
      "step": 116829
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.519623041152954,
      "learning_rate": 0.0002933390669069791,
      "loss": 3.007,
      "step": 116830
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0009689331054688,
      "learning_rate": 0.00029333497735382967,
      "loss": 3.0284,
      "step": 116831
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0929343700408936,
      "learning_rate": 0.0002933308878019194,
      "loss": 2.9466,
      "step": 116832
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.312177896499634,
      "learning_rate": 0.000293326798251249,
      "loss": 3.1733,
      "step": 116833
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4020490646362305,
      "learning_rate": 0.00029332270870181945,
      "loss": 3.0515,
      "step": 116834
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8113911151885986,
      "learning_rate": 0.0002933186191536312,
      "loss": 2.8886,
      "step": 116835
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8768856525421143,
      "learning_rate": 0.0002933145296066851,
      "loss": 2.7875,
      "step": 116836
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9536168575286865,
      "learning_rate": 0.000293310440060982,
      "loss": 2.9868,
      "step": 116837
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1158652305603027,
      "learning_rate": 0.0002933063505165226,
      "loss": 3.067,
      "step": 116838
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.645181894302368,
      "learning_rate": 0.0002933022609733077,
      "loss": 3.0541,
      "step": 116839
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7909051179885864,
      "learning_rate": 0.00029329817143133803,
      "loss": 3.1852,
      "step": 116840
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.7516307830810547,
      "learning_rate": 0.00029329408189061445,
      "loss": 3.0967,
      "step": 116841
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.816828727722168,
      "learning_rate": 0.00029328999235113753,
      "loss": 2.8957,
      "step": 116842
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.589137315750122,
      "learning_rate": 0.0002932859028129081,
      "loss": 2.7854,
      "step": 116843
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8060189485549927,
      "learning_rate": 0.000293281813275927,
      "loss": 3.1706,
      "step": 116844
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9116506576538086,
      "learning_rate": 0.0002932777237401949,
      "loss": 2.9692,
      "step": 116845
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.504708766937256,
      "learning_rate": 0.00029327363420571256,
      "loss": 2.7416,
      "step": 116846
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3382790088653564,
      "learning_rate": 0.000293269544672481,
      "loss": 2.8834,
      "step": 116847
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.525496244430542,
      "learning_rate": 0.00029326545514050056,
      "loss": 2.9724,
      "step": 116848
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3384180068969727,
      "learning_rate": 0.00029326136560977223,
      "loss": 2.9221,
      "step": 116849
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9924805164337158,
      "learning_rate": 0.0002932572760802967,
      "loss": 3.177,
      "step": 116850
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.062408924102783,
      "learning_rate": 0.0002932531865520748,
      "loss": 2.8019,
      "step": 116851
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4628443717956543,
      "learning_rate": 0.0002932490970251073,
      "loss": 2.7715,
      "step": 116852
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8720784187316895,
      "learning_rate": 0.00029324500749939496,
      "loss": 2.8698,
      "step": 116853
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6774089336395264,
      "learning_rate": 0.0002932409179749384,
      "loss": 2.9793,
      "step": 116854
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.934819221496582,
      "learning_rate": 0.0002932368284517385,
      "loss": 2.913,
      "step": 116855
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.846870183944702,
      "learning_rate": 0.00029323273892979603,
      "loss": 2.7954,
      "step": 116856
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6889526844024658,
      "learning_rate": 0.00029322864940911164,
      "loss": 3.278,
      "step": 116857
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2713706493377686,
      "learning_rate": 0.0002932245598896863,
      "loss": 3.0086,
      "step": 116858
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3245742321014404,
      "learning_rate": 0.00029322047037152067,
      "loss": 2.7561,
      "step": 116859
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9751746654510498,
      "learning_rate": 0.0002932163808546153,
      "loss": 3.0721,
      "step": 116860
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.865902304649353,
      "learning_rate": 0.00029321229133897123,
      "loss": 3.2925,
      "step": 116861
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9027948379516602,
      "learning_rate": 0.00029320820182458915,
      "loss": 2.8623,
      "step": 116862
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.84125816822052,
      "learning_rate": 0.00029320411231146973,
      "loss": 3.0746,
      "step": 116863
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9933617115020752,
      "learning_rate": 0.00029320002279961384,
      "loss": 2.9766,
      "step": 116864
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9636245965957642,
      "learning_rate": 0.00029319593328902224,
      "loss": 3.0955,
      "step": 116865
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.094606876373291,
      "learning_rate": 0.00029319184377969563,
      "loss": 3.1783,
      "step": 116866
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1252877712249756,
      "learning_rate": 0.00029318775427163466,
      "loss": 3.1424,
      "step": 116867
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.1476705074310303,
      "learning_rate": 0.0002931836647648403,
      "loss": 2.9526,
      "step": 116868
    },
    {
      "epoch": 1.52,
      "grad_norm": 4.002473831176758,
      "learning_rate": 0.0002931795752593132,
      "loss": 2.6151,
      "step": 116869
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.313544273376465,
      "learning_rate": 0.0002931754857550542,
      "loss": 3.1962,
      "step": 116870
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.324995279312134,
      "learning_rate": 0.000293171396252064,
      "loss": 2.6484,
      "step": 116871
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.3041257858276367,
      "learning_rate": 0.0002931673067503434,
      "loss": 3.1645,
      "step": 116872
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.192683696746826,
      "learning_rate": 0.00029316321724989303,
      "loss": 3.0313,
      "step": 116873
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.937851071357727,
      "learning_rate": 0.00029315912775071374,
      "loss": 2.9519,
      "step": 116874
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1451683044433594,
      "learning_rate": 0.0002931550382528063,
      "loss": 2.905,
      "step": 116875
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1824910640716553,
      "learning_rate": 0.0002931509487561715,
      "loss": 3.0778,
      "step": 116876
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.017277717590332,
      "learning_rate": 0.0002931468592608101,
      "loss": 3.1187,
      "step": 116877
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7326419353485107,
      "learning_rate": 0.00029314276976672284,
      "loss": 3.0494,
      "step": 116878
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.068150758743286,
      "learning_rate": 0.00029313868027391047,
      "loss": 2.9681,
      "step": 116879
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.030207395553589,
      "learning_rate": 0.0002931345907823737,
      "loss": 2.8805,
      "step": 116880
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3134665489196777,
      "learning_rate": 0.00029313050129211335,
      "loss": 3.0311,
      "step": 116881
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7717186212539673,
      "learning_rate": 0.0002931264118031301,
      "loss": 3.1772,
      "step": 116882
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0466513633728027,
      "learning_rate": 0.00029312232231542494,
      "loss": 2.9036,
      "step": 116883
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.053475856781006,
      "learning_rate": 0.00029311823282899836,
      "loss": 2.9683,
      "step": 116884
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6569302082061768,
      "learning_rate": 0.0002931141433438512,
      "loss": 2.7717,
      "step": 116885
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9256616830825806,
      "learning_rate": 0.00029311005385998436,
      "loss": 2.9435,
      "step": 116886
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.625932216644287,
      "learning_rate": 0.00029310596437739846,
      "loss": 2.8054,
      "step": 116887
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9828659296035767,
      "learning_rate": 0.0002931018748960942,
      "loss": 2.7617,
      "step": 116888
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.222313404083252,
      "learning_rate": 0.00029309778541607254,
      "loss": 3.0026,
      "step": 116889
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.279883861541748,
      "learning_rate": 0.0002930936959373341,
      "loss": 3.068,
      "step": 116890
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8795477151870728,
      "learning_rate": 0.00029308960645987967,
      "loss": 2.9683,
      "step": 116891
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1394588947296143,
      "learning_rate": 0.00029308551698370997,
      "loss": 3.1747,
      "step": 116892
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8290766477584839,
      "learning_rate": 0.0002930814275088258,
      "loss": 2.9136,
      "step": 116893
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.9863834381103516,
      "learning_rate": 0.000293077338035228,
      "loss": 2.7609,
      "step": 116894
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9419503211975098,
      "learning_rate": 0.0002930732485629173,
      "loss": 3.0791,
      "step": 116895
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8035446405410767,
      "learning_rate": 0.00029306915909189425,
      "loss": 3.0652,
      "step": 116896
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8019866943359375,
      "learning_rate": 0.00029306506962215983,
      "loss": 3.0574,
      "step": 116897
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0620384216308594,
      "learning_rate": 0.00029306098015371475,
      "loss": 2.8731,
      "step": 116898
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8304816484451294,
      "learning_rate": 0.0002930568906865598,
      "loss": 3.0234,
      "step": 116899
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.00807785987854,
      "learning_rate": 0.00029305280122069566,
      "loss": 2.8783,
      "step": 116900
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.864356279373169,
      "learning_rate": 0.00029304871175612324,
      "loss": 3.0445,
      "step": 116901
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.016859292984009,
      "learning_rate": 0.00029304462229284313,
      "loss": 2.9892,
      "step": 116902
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9338573217391968,
      "learning_rate": 0.0002930405328308561,
      "loss": 3.0793,
      "step": 116903
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.800492525100708,
      "learning_rate": 0.000293036443370163,
      "loss": 2.9283,
      "step": 116904
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8820332288742065,
      "learning_rate": 0.00029303235391076455,
      "loss": 3.0475,
      "step": 116905
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.5631794929504395,
      "learning_rate": 0.0002930282644526615,
      "loss": 2.9086,
      "step": 116906
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.449129819869995,
      "learning_rate": 0.00029302417499585463,
      "loss": 2.7473,
      "step": 116907
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.602674722671509,
      "learning_rate": 0.00029302008554034484,
      "loss": 3.1891,
      "step": 116908
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4312145709991455,
      "learning_rate": 0.00029301599608613267,
      "loss": 3.0726,
      "step": 116909
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4723379611968994,
      "learning_rate": 0.0002930119066332189,
      "loss": 3.0013,
      "step": 116910
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.134634494781494,
      "learning_rate": 0.00029300781718160436,
      "loss": 2.8106,
      "step": 116911
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8780291080474854,
      "learning_rate": 0.00029300372773128974,
      "loss": 2.9851,
      "step": 116912
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9030892848968506,
      "learning_rate": 0.00029299963828227594,
      "loss": 2.7881,
      "step": 116913
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9840288162231445,
      "learning_rate": 0.00029299554883456373,
      "loss": 3.1264,
      "step": 116914
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.842233180999756,
      "learning_rate": 0.00029299145938815364,
      "loss": 2.923,
      "step": 116915
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4432055950164795,
      "learning_rate": 0.0002929873699430466,
      "loss": 3.1842,
      "step": 116916
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.0496561527252197,
      "learning_rate": 0.0002929832804992433,
      "loss": 2.8179,
      "step": 116917
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9607352018356323,
      "learning_rate": 0.0002929791910567446,
      "loss": 2.8946,
      "step": 116918
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.021146297454834,
      "learning_rate": 0.0002929751016155512,
      "loss": 2.9211,
      "step": 116919
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8916385173797607,
      "learning_rate": 0.00029297101217566396,
      "loss": 3.2003,
      "step": 116920
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8568933010101318,
      "learning_rate": 0.0002929669227370834,
      "loss": 2.7325,
      "step": 116921
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9771652221679688,
      "learning_rate": 0.00029296283329981047,
      "loss": 3.0632,
      "step": 116922
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1577532291412354,
      "learning_rate": 0.0002929587438638458,
      "loss": 3.0364,
      "step": 116923
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.154799461364746,
      "learning_rate": 0.0002929546544291903,
      "loss": 2.9583,
      "step": 116924
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.061411142349243,
      "learning_rate": 0.00029295056499584464,
      "loss": 2.9934,
      "step": 116925
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.9289987087249756,
      "learning_rate": 0.0002929464755638097,
      "loss": 3.1084,
      "step": 116926
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.232107400894165,
      "learning_rate": 0.0002929423861330861,
      "loss": 3.1396,
      "step": 116927
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.844704508781433,
      "learning_rate": 0.00029293829670367456,
      "loss": 3.019,
      "step": 116928
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2247729301452637,
      "learning_rate": 0.000292934207275576,
      "loss": 3.0888,
      "step": 116929
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9887322187423706,
      "learning_rate": 0.00029293011784879106,
      "loss": 3.1323,
      "step": 116930
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3723092079162598,
      "learning_rate": 0.00029292602842332055,
      "loss": 2.6438,
      "step": 116931
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7833750247955322,
      "learning_rate": 0.00029292193899916534,
      "loss": 2.9769,
      "step": 116932
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1087446212768555,
      "learning_rate": 0.00029291784957632597,
      "loss": 3.1028,
      "step": 116933
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9332518577575684,
      "learning_rate": 0.00029291376015480326,
      "loss": 2.9303,
      "step": 116934
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8978577852249146,
      "learning_rate": 0.00029290967073459807,
      "loss": 2.8724,
      "step": 116935
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8297245502471924,
      "learning_rate": 0.00029290558131571105,
      "loss": 3.111,
      "step": 116936
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7782745361328125,
      "learning_rate": 0.0002929014918981431,
      "loss": 3.1414,
      "step": 116937
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9004563093185425,
      "learning_rate": 0.00029289740248189495,
      "loss": 2.8447,
      "step": 116938
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7482976913452148,
      "learning_rate": 0.00029289331306696717,
      "loss": 2.9524,
      "step": 116939
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.11555814743042,
      "learning_rate": 0.0002928892236533607,
      "loss": 3.06,
      "step": 116940
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8195762634277344,
      "learning_rate": 0.00029288513424107626,
      "loss": 3.0349,
      "step": 116941
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0834317207336426,
      "learning_rate": 0.0002928810448301146,
      "loss": 2.886,
      "step": 116942
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9871361255645752,
      "learning_rate": 0.0002928769554204765,
      "loss": 3.1592,
      "step": 116943
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6354433298110962,
      "learning_rate": 0.0002928728660121628,
      "loss": 3.0124,
      "step": 116944
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0481762886047363,
      "learning_rate": 0.0002928687766051741,
      "loss": 3.0078,
      "step": 116945
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7745791673660278,
      "learning_rate": 0.00029286468719951116,
      "loss": 2.9667,
      "step": 116946
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9891281127929688,
      "learning_rate": 0.00029286059779517484,
      "loss": 3.0272,
      "step": 116947
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3783161640167236,
      "learning_rate": 0.00029285650839216585,
      "loss": 2.9082,
      "step": 116948
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6895273923873901,
      "learning_rate": 0.000292852418990485,
      "loss": 3.1472,
      "step": 116949
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8963391780853271,
      "learning_rate": 0.00029284832959013314,
      "loss": 3.0553,
      "step": 116950
    },
    {
      "epoch": 1.52,
      "grad_norm": 4.362715244293213,
      "learning_rate": 0.00029284424019111077,
      "loss": 2.9787,
      "step": 116951
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6283388137817383,
      "learning_rate": 0.0002928401507934188,
      "loss": 3.2562,
      "step": 116952
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.618924856185913,
      "learning_rate": 0.000292836061397058,
      "loss": 3.1221,
      "step": 116953
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.9140048027038574,
      "learning_rate": 0.0002928319720020291,
      "loss": 2.8065,
      "step": 116954
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9607818126678467,
      "learning_rate": 0.0002928278826083329,
      "loss": 2.9342,
      "step": 116955
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.81818687915802,
      "learning_rate": 0.0002928237932159701,
      "loss": 2.9455,
      "step": 116956
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1693482398986816,
      "learning_rate": 0.00029281970382494157,
      "loss": 2.7736,
      "step": 116957
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7010040283203125,
      "learning_rate": 0.0002928156144352479,
      "loss": 2.8748,
      "step": 116958
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7041741609573364,
      "learning_rate": 0.00029281152504689,
      "loss": 3.0671,
      "step": 116959
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.7275478839874268,
      "learning_rate": 0.0002928074356598685,
      "loss": 2.6418,
      "step": 116960
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9551198482513428,
      "learning_rate": 0.00029280334627418425,
      "loss": 2.9441,
      "step": 116961
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0052490234375,
      "learning_rate": 0.0002927992568898381,
      "loss": 3.0647,
      "step": 116962
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.979170799255371,
      "learning_rate": 0.0002927951675068306,
      "loss": 2.7825,
      "step": 116963
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.5602127313613892,
      "learning_rate": 0.0002927910781251627,
      "loss": 2.7959,
      "step": 116964
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7762006521224976,
      "learning_rate": 0.000292786988744835,
      "loss": 3.0593,
      "step": 116965
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.6204450130462646,
      "learning_rate": 0.0002927828993658483,
      "loss": 3.1426,
      "step": 116966
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9790034294128418,
      "learning_rate": 0.00029277880998820346,
      "loss": 2.95,
      "step": 116967
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9939385652542114,
      "learning_rate": 0.0002927747206119012,
      "loss": 2.9049,
      "step": 116968
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3465819358825684,
      "learning_rate": 0.0002927706312369422,
      "loss": 2.9195,
      "step": 116969
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1586015224456787,
      "learning_rate": 0.00029276654186332723,
      "loss": 3.1571,
      "step": 116970
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.37359881401062,
      "learning_rate": 0.0002927624524910572,
      "loss": 2.9553,
      "step": 116971
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.59757924079895,
      "learning_rate": 0.00029275836312013276,
      "loss": 2.7383,
      "step": 116972
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.842292070388794,
      "learning_rate": 0.0002927542737505546,
      "loss": 3.2533,
      "step": 116973
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7723424434661865,
      "learning_rate": 0.00029275018438232363,
      "loss": 3.0225,
      "step": 116974
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.5923645496368408,
      "learning_rate": 0.00029274609501544054,
      "loss": 3.0658,
      "step": 116975
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.78069007396698,
      "learning_rate": 0.000292742005649906,
      "loss": 3.051,
      "step": 116976
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9526821374893188,
      "learning_rate": 0.0002927379162857209,
      "loss": 3.0075,
      "step": 116977
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7468582391738892,
      "learning_rate": 0.00029273382692288606,
      "loss": 3.1614,
      "step": 116978
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.518195390701294,
      "learning_rate": 0.000292729737561402,
      "loss": 2.9499,
      "step": 116979
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8276503086090088,
      "learning_rate": 0.00029272564820126965,
      "loss": 2.8133,
      "step": 116980
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.220154285430908,
      "learning_rate": 0.0002927215588424898,
      "loss": 3.0943,
      "step": 116981
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.5447845458984375,
      "learning_rate": 0.00029271746948506314,
      "loss": 3.084,
      "step": 116982
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.814894199371338,
      "learning_rate": 0.0002927133801289904,
      "loss": 2.9286,
      "step": 116983
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8141202926635742,
      "learning_rate": 0.0002927092907742724,
      "loss": 3.0112,
      "step": 116984
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8733761310577393,
      "learning_rate": 0.0002927052014209098,
      "loss": 3.0623,
      "step": 116985
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.478132963180542,
      "learning_rate": 0.00029270111206890356,
      "loss": 2.7533,
      "step": 116986
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8171579837799072,
      "learning_rate": 0.0002926970227182543,
      "loss": 2.8607,
      "step": 116987
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.1324539184570312,
      "learning_rate": 0.0002926929333689628,
      "loss": 2.9489,
      "step": 116988
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4335100650787354,
      "learning_rate": 0.00029268884402102976,
      "loss": 2.9713,
      "step": 116989
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.505650281906128,
      "learning_rate": 0.000292684754674456,
      "loss": 2.9141,
      "step": 116990
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.977579355239868,
      "learning_rate": 0.0002926806653292423,
      "loss": 2.8946,
      "step": 116991
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.868398904800415,
      "learning_rate": 0.0002926765759853894,
      "loss": 3.0255,
      "step": 116992
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.090359687805176,
      "learning_rate": 0.0002926724866428982,
      "loss": 3.1086,
      "step": 116993
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.889736533164978,
      "learning_rate": 0.0002926683973017692,
      "loss": 2.9578,
      "step": 116994
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0109667778015137,
      "learning_rate": 0.00029266430796200327,
      "loss": 3.0258,
      "step": 116995
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.702059268951416,
      "learning_rate": 0.0002926602186236012,
      "loss": 3.0112,
      "step": 116996
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9806876182556152,
      "learning_rate": 0.00029265612928656374,
      "loss": 2.9445,
      "step": 116997
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.649288535118103,
      "learning_rate": 0.00029265203995089157,
      "loss": 2.8507,
      "step": 116998
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9456987380981445,
      "learning_rate": 0.00029264795061658577,
      "loss": 3.1732,
      "step": 116999
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.823509931564331,
      "learning_rate": 0.00029264386128364665,
      "loss": 2.8307,
      "step": 117000
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.925459623336792,
      "learning_rate": 0.00029263977195207515,
      "loss": 3.1749,
      "step": 117001
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.915581464767456,
      "learning_rate": 0.00029263568262187213,
      "loss": 2.8558,
      "step": 117002
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.797653079032898,
      "learning_rate": 0.00029263159329303824,
      "loss": 2.8845,
      "step": 117003
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7751305103302002,
      "learning_rate": 0.0002926275039655743,
      "loss": 2.9821,
      "step": 117004
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.3245511054992676,
      "learning_rate": 0.0002926234146394811,
      "loss": 2.9822,
      "step": 117005
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7402777671813965,
      "learning_rate": 0.0002926193253147593,
      "loss": 2.8751,
      "step": 117006
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.019970655441284,
      "learning_rate": 0.00029261523599140966,
      "loss": 3.0017,
      "step": 117007
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7999849319458008,
      "learning_rate": 0.00029261114666943297,
      "loss": 2.8837,
      "step": 117008
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6719645261764526,
      "learning_rate": 0.00029260705734883007,
      "loss": 3.0908,
      "step": 117009
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.1666457653045654,
      "learning_rate": 0.00029260296802960167,
      "loss": 3.1827,
      "step": 117010
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.9841668605804443,
      "learning_rate": 0.0002925988787117486,
      "loss": 2.9661,
      "step": 117011
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.862457036972046,
      "learning_rate": 0.0002925947893952714,
      "loss": 2.9579,
      "step": 117012
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.027559280395508,
      "learning_rate": 0.00029259070008017093,
      "loss": 3.059,
      "step": 117013
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.656790256500244,
      "learning_rate": 0.00029258661076644806,
      "loss": 3.1053,
      "step": 117014
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.349212884902954,
      "learning_rate": 0.0002925825214541035,
      "loss": 2.9705,
      "step": 117015
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1629703044891357,
      "learning_rate": 0.00029257843214313794,
      "loss": 2.8419,
      "step": 117016
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.2499170303344727,
      "learning_rate": 0.0002925743428335523,
      "loss": 2.5769,
      "step": 117017
    },
    {
      "epoch": 1.52,
      "grad_norm": 4.072699546813965,
      "learning_rate": 0.0002925702535253471,
      "loss": 2.979,
      "step": 117018
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0564334392547607,
      "learning_rate": 0.0002925661642185233,
      "loss": 2.9547,
      "step": 117019
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9857778549194336,
      "learning_rate": 0.00029256207491308155,
      "loss": 2.8815,
      "step": 117020
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0201144218444824,
      "learning_rate": 0.00029255798560902266,
      "loss": 2.9519,
      "step": 117021
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3178842067718506,
      "learning_rate": 0.00029255389630634737,
      "loss": 2.7997,
      "step": 117022
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.729344129562378,
      "learning_rate": 0.00029254980700505655,
      "loss": 3.1617,
      "step": 117023
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7655985355377197,
      "learning_rate": 0.00029254571770515075,
      "loss": 2.879,
      "step": 117024
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.180785894393921,
      "learning_rate": 0.0002925416284066309,
      "loss": 3.1322,
      "step": 117025
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.7000010013580322,
      "learning_rate": 0.00029253753910949765,
      "loss": 2.8959,
      "step": 117026
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.90174400806427,
      "learning_rate": 0.0002925334498137518,
      "loss": 3.0578,
      "step": 117027
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2515676021575928,
      "learning_rate": 0.00029252936051939415,
      "loss": 2.9413,
      "step": 117028
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.214102268218994,
      "learning_rate": 0.00029252527122642556,
      "loss": 3.2151,
      "step": 117029
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4268031120300293,
      "learning_rate": 0.00029252118193484654,
      "loss": 2.8622,
      "step": 117030
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.5256192684173584,
      "learning_rate": 0.00029251709264465795,
      "loss": 2.9597,
      "step": 117031
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8542126417160034,
      "learning_rate": 0.00029251300335586056,
      "loss": 3.2078,
      "step": 117032
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.538728952407837,
      "learning_rate": 0.00029250891406845523,
      "loss": 3.1335,
      "step": 117033
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.6603329181671143,
      "learning_rate": 0.00029250482478244256,
      "loss": 3.0594,
      "step": 117034
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.7083230018615723,
      "learning_rate": 0.0002925007354978235,
      "loss": 3.0675,
      "step": 117035
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9339838027954102,
      "learning_rate": 0.0002924966462145986,
      "loss": 2.9283,
      "step": 117036
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.8520610332489014,
      "learning_rate": 0.0002924925569327687,
      "loss": 2.9054,
      "step": 117037
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2752633094787598,
      "learning_rate": 0.0002924884676523346,
      "loss": 2.8445,
      "step": 117038
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8210479021072388,
      "learning_rate": 0.000292484378373297,
      "loss": 2.7963,
      "step": 117039
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1131279468536377,
      "learning_rate": 0.0002924802890956567,
      "loss": 2.9984,
      "step": 117040
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.1403095722198486,
      "learning_rate": 0.0002924761998194145,
      "loss": 2.9601,
      "step": 117041
    },
    {
      "epoch": 1.52,
      "grad_norm": 4.165814399719238,
      "learning_rate": 0.0002924721105445712,
      "loss": 2.8328,
      "step": 117042
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.952470541000366,
      "learning_rate": 0.0002924680212711274,
      "loss": 3.1503,
      "step": 117043
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.8590006828308105,
      "learning_rate": 0.0002924639319990839,
      "loss": 3.0672,
      "step": 117044
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.5236616134643555,
      "learning_rate": 0.0002924598427284415,
      "loss": 3.197,
      "step": 117045
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.7521541118621826,
      "learning_rate": 0.00029245575345920096,
      "loss": 3.0296,
      "step": 117046
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.9544289112091064,
      "learning_rate": 0.00029245166419136304,
      "loss": 3.2177,
      "step": 117047
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.7272603511810303,
      "learning_rate": 0.0002924475749249285,
      "loss": 3.1214,
      "step": 117048
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.3145856857299805,
      "learning_rate": 0.0002924434856598982,
      "loss": 2.9847,
      "step": 117049
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2900612354278564,
      "learning_rate": 0.00029243939639627265,
      "loss": 2.9559,
      "step": 117050
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.4222793579101562,
      "learning_rate": 0.00029243530713405285,
      "loss": 2.935,
      "step": 117051
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.8560068607330322,
      "learning_rate": 0.0002924312178732394,
      "loss": 3.0413,
      "step": 117052
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.8562171459198,
      "learning_rate": 0.0002924271286138331,
      "loss": 3.2376,
      "step": 117053
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.479426622390747,
      "learning_rate": 0.0002924230393558349,
      "loss": 2.819,
      "step": 117054
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9731343984603882,
      "learning_rate": 0.0002924189500992453,
      "loss": 2.8954,
      "step": 117055
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.4702301025390625,
      "learning_rate": 0.0002924148608440652,
      "loss": 2.7548,
      "step": 117056
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.856760263442993,
      "learning_rate": 0.0002924107715902953,
      "loss": 3.0498,
      "step": 117057
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.7224926948547363,
      "learning_rate": 0.0002924066823379363,
      "loss": 3.032,
      "step": 117058
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1062896251678467,
      "learning_rate": 0.00029240259308698906,
      "loss": 3.0134,
      "step": 117059
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.840097188949585,
      "learning_rate": 0.0002923985038374545,
      "loss": 3.0206,
      "step": 117060
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.6211752891540527,
      "learning_rate": 0.000292394414589333,
      "loss": 3.0493,
      "step": 117061
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1497416496276855,
      "learning_rate": 0.00029239032534262557,
      "loss": 2.9531,
      "step": 117062
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8048996925354004,
      "learning_rate": 0.000292386236097333,
      "loss": 2.9561,
      "step": 117063
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.967361569404602,
      "learning_rate": 0.0002923821468534559,
      "loss": 3.1997,
      "step": 117064
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.3226850032806396,
      "learning_rate": 0.0002923780576109951,
      "loss": 2.7572,
      "step": 117065
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.5836396217346191,
      "learning_rate": 0.0002923739683699514,
      "loss": 2.8889,
      "step": 117066
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8179199695587158,
      "learning_rate": 0.0002923698791303255,
      "loss": 2.7497,
      "step": 117067
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.5062315464019775,
      "learning_rate": 0.0002923657898921182,
      "loss": 2.8268,
      "step": 117068
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6076327562332153,
      "learning_rate": 0.00029236170065533024,
      "loss": 2.8092,
      "step": 117069
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6299940347671509,
      "learning_rate": 0.0002923576114199624,
      "loss": 3.0897,
      "step": 117070
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8282520771026611,
      "learning_rate": 0.0002923535221860154,
      "loss": 3.1429,
      "step": 117071
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0075576305389404,
      "learning_rate": 0.00029234943295349,
      "loss": 3.1011,
      "step": 117072
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8051557540893555,
      "learning_rate": 0.00029234534372238704,
      "loss": 3.0729,
      "step": 117073
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9264750480651855,
      "learning_rate": 0.0002923412544927072,
      "loss": 2.7861,
      "step": 117074
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8942580223083496,
      "learning_rate": 0.0002923371652644512,
      "loss": 2.8521,
      "step": 117075
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8551568984985352,
      "learning_rate": 0.0002923330760376199,
      "loss": 3.2035,
      "step": 117076
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.700096845626831,
      "learning_rate": 0.00029232898681221404,
      "loss": 3.0222,
      "step": 117077
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6842951774597168,
      "learning_rate": 0.00029232489758823453,
      "loss": 3.053,
      "step": 117078
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.152534246444702,
      "learning_rate": 0.0002923208083656818,
      "loss": 2.7337,
      "step": 117079
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7597230672836304,
      "learning_rate": 0.00029231671914455677,
      "loss": 3.0763,
      "step": 117080
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0128750801086426,
      "learning_rate": 0.0002923126299248602,
      "loss": 2.9632,
      "step": 117081
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7919172048568726,
      "learning_rate": 0.00029230854070659286,
      "loss": 2.9982,
      "step": 117082
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.543879270553589,
      "learning_rate": 0.00029230445148975553,
      "loss": 3.0791,
      "step": 117083
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.119502305984497,
      "learning_rate": 0.00029230036227434903,
      "loss": 3.5356,
      "step": 117084
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.2321579456329346,
      "learning_rate": 0.00029229627306037395,
      "loss": 3.2319,
      "step": 117085
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6978710889816284,
      "learning_rate": 0.00029229218384783115,
      "loss": 2.8048,
      "step": 117086
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9407073259353638,
      "learning_rate": 0.00029228809463672134,
      "loss": 3.0915,
      "step": 117087
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.428123712539673,
      "learning_rate": 0.00029228400542704534,
      "loss": 2.8849,
      "step": 117088
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9705891609191895,
      "learning_rate": 0.0002922799162188039,
      "loss": 2.9779,
      "step": 117089
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9921010732650757,
      "learning_rate": 0.0002922758270119979,
      "loss": 2.7152,
      "step": 117090
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.943325161933899,
      "learning_rate": 0.0002922717378066278,
      "loss": 2.9357,
      "step": 117091
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.5084526538848877,
      "learning_rate": 0.0002922676486026946,
      "loss": 3.0765,
      "step": 117092
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7827718257904053,
      "learning_rate": 0.0002922635594001989,
      "loss": 2.8908,
      "step": 117093
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.6854546070098877,
      "learning_rate": 0.0002922594701991416,
      "loss": 3.1426,
      "step": 117094
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.094954013824463,
      "learning_rate": 0.0002922553809995234,
      "loss": 3.1473,
      "step": 117095
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.1404342651367188,
      "learning_rate": 0.00029225129180134523,
      "loss": 2.8759,
      "step": 117096
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8633081912994385,
      "learning_rate": 0.00029224720260460755,
      "loss": 2.7243,
      "step": 117097
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.423356771469116,
      "learning_rate": 0.0002922431134093112,
      "loss": 3.07,
      "step": 117098
    },
    {
      "epoch": 1.52,
      "grad_norm": 4.07905912399292,
      "learning_rate": 0.0002922390242154571,
      "loss": 3.0855,
      "step": 117099
    },
    {
      "epoch": 1.52,
      "grad_norm": 4.396457195281982,
      "learning_rate": 0.00029223493502304585,
      "loss": 2.9322,
      "step": 117100
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.402930498123169,
      "learning_rate": 0.0002922308458320783,
      "loss": 2.9218,
      "step": 117101
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.035883903503418,
      "learning_rate": 0.0002922267566425553,
      "loss": 2.8954,
      "step": 117102
    },
    {
      "epoch": 1.52,
      "grad_norm": 4.950709342956543,
      "learning_rate": 0.00029222266745447734,
      "loss": 2.9712,
      "step": 117103
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.9466712474823,
      "learning_rate": 0.00029221857826784535,
      "loss": 3.1553,
      "step": 117104
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9362242221832275,
      "learning_rate": 0.00029221448908266006,
      "loss": 3.1011,
      "step": 117105
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.61513090133667,
      "learning_rate": 0.0002922103998989223,
      "loss": 2.8376,
      "step": 117106
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.0794057846069336,
      "learning_rate": 0.0002922063107166327,
      "loss": 3.008,
      "step": 117107
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6097747087478638,
      "learning_rate": 0.00029220222153579213,
      "loss": 3.0962,
      "step": 117108
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.456122636795044,
      "learning_rate": 0.0002921981323564014,
      "loss": 2.7888,
      "step": 117109
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1591455936431885,
      "learning_rate": 0.00029219404317846115,
      "loss": 2.8239,
      "step": 117110
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.8364641666412354,
      "learning_rate": 0.00029218995400197214,
      "loss": 2.8671,
      "step": 117111
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6677991151809692,
      "learning_rate": 0.0002921858648269351,
      "loss": 2.8676,
      "step": 117112
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.999010443687439,
      "learning_rate": 0.0002921817756533509,
      "loss": 3.1047,
      "step": 117113
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.067836284637451,
      "learning_rate": 0.00029217768648122025,
      "loss": 2.9368,
      "step": 117114
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.236983060836792,
      "learning_rate": 0.000292173597310544,
      "loss": 3.0689,
      "step": 117115
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.7788161039352417,
      "learning_rate": 0.0002921695081413228,
      "loss": 3.1012,
      "step": 117116
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.6585006713867188,
      "learning_rate": 0.0002921654189735574,
      "loss": 2.8733,
      "step": 117117
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.1020827293395996,
      "learning_rate": 0.0002921613298072486,
      "loss": 2.9271,
      "step": 117118
    },
    {
      "epoch": 1.52,
      "grad_norm": 2.08980131149292,
      "learning_rate": 0.00029215724064239715,
      "loss": 2.7496,
      "step": 117119
    },
    {
      "epoch": 1.52,
      "grad_norm": 1.9936258792877197,
      "learning_rate": 0.0002921531514790038,
      "loss": 3.2756,
      "step": 117120
    },
    {
      "epoch": 1.52,
      "grad_norm": 3.598015785217285,
      "learning_rate": 0.00029214906231706943,
      "loss": 3.0123,
      "step": 117121
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1043879985809326,
      "learning_rate": 0.00029214497315659463,
      "loss": 2.9826,
      "step": 117122
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6429123878479004,
      "learning_rate": 0.0002921408839975802,
      "loss": 3.0944,
      "step": 117123
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.5043859481811523,
      "learning_rate": 0.0002921367948400269,
      "loss": 3.0124,
      "step": 117124
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9133120775222778,
      "learning_rate": 0.00029213270568393556,
      "loss": 2.836,
      "step": 117125
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.1413447856903076,
      "learning_rate": 0.0002921286165293069,
      "loss": 2.863,
      "step": 117126
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.5983238220214844,
      "learning_rate": 0.0002921245273761418,
      "loss": 2.9452,
      "step": 117127
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.4437968730926514,
      "learning_rate": 0.0002921204382244407,
      "loss": 2.9911,
      "step": 117128
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.413255453109741,
      "learning_rate": 0.00029211634907420463,
      "loss": 2.9452,
      "step": 117129
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.74623966217041,
      "learning_rate": 0.0002921122599254343,
      "loss": 3.1155,
      "step": 117130
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3115739822387695,
      "learning_rate": 0.00029210817077813037,
      "loss": 3.0844,
      "step": 117131
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8641976118087769,
      "learning_rate": 0.0002921040816322937,
      "loss": 2.8814,
      "step": 117132
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.200080394744873,
      "learning_rate": 0.0002920999924879252,
      "loss": 2.9773,
      "step": 117133
    },
    {
      "epoch": 1.53,
      "grad_norm": 4.79021692276001,
      "learning_rate": 0.0002920959033450254,
      "loss": 2.7775,
      "step": 117134
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0339460372924805,
      "learning_rate": 0.00029209181420359497,
      "loss": 2.9419,
      "step": 117135
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.747907042503357,
      "learning_rate": 0.0002920877250636349,
      "loss": 2.9698,
      "step": 117136
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.5958082675933838,
      "learning_rate": 0.0002920836359251459,
      "loss": 3.2363,
      "step": 117137
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.2379534244537354,
      "learning_rate": 0.0002920795467881286,
      "loss": 3.0381,
      "step": 117138
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.7212400436401367,
      "learning_rate": 0.000292075457652584,
      "loss": 3.1344,
      "step": 117139
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9838076829910278,
      "learning_rate": 0.0002920713685185127,
      "loss": 2.9919,
      "step": 117140
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8048365116119385,
      "learning_rate": 0.00029206727938591547,
      "loss": 2.7043,
      "step": 117141
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.7693686485290527,
      "learning_rate": 0.000292063190254793,
      "loss": 2.9875,
      "step": 117142
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.1364567279815674,
      "learning_rate": 0.0002920591011251462,
      "loss": 2.8509,
      "step": 117143
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.7953290939331055,
      "learning_rate": 0.0002920550119969758,
      "loss": 2.8368,
      "step": 117144
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8233451843261719,
      "learning_rate": 0.0002920509228702825,
      "loss": 2.8036,
      "step": 117145
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.9575819969177246,
      "learning_rate": 0.00029204683374506703,
      "loss": 2.8845,
      "step": 117146
    },
    {
      "epoch": 1.53,
      "grad_norm": 5.027701377868652,
      "learning_rate": 0.0002920427446213302,
      "loss": 3.0751,
      "step": 117147
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.900405168533325,
      "learning_rate": 0.0002920386554990729,
      "loss": 2.8207,
      "step": 117148
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8251979351043701,
      "learning_rate": 0.0002920345663782956,
      "loss": 3.0087,
      "step": 117149
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.4023709297180176,
      "learning_rate": 0.0002920304772589993,
      "loss": 3.2552,
      "step": 117150
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.2686891555786133,
      "learning_rate": 0.00029202638814118475,
      "loss": 3.2652,
      "step": 117151
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8979196548461914,
      "learning_rate": 0.00029202229902485253,
      "loss": 2.9291,
      "step": 117152
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8565176725387573,
      "learning_rate": 0.00029201820991000356,
      "loss": 2.9474,
      "step": 117153
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9184261560440063,
      "learning_rate": 0.00029201412079663853,
      "loss": 2.9605,
      "step": 117154
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9576979875564575,
      "learning_rate": 0.0002920100316847583,
      "loss": 3.0291,
      "step": 117155
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.4334797859191895,
      "learning_rate": 0.0002920059425743635,
      "loss": 2.8506,
      "step": 117156
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1985697746276855,
      "learning_rate": 0.000292001853465455,
      "loss": 3.1552,
      "step": 117157
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.5559403896331787,
      "learning_rate": 0.00029199776435803347,
      "loss": 2.9169,
      "step": 117158
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3324999809265137,
      "learning_rate": 0.00029199367525209967,
      "loss": 2.8997,
      "step": 117159
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.901009202003479,
      "learning_rate": 0.0002919895861476544,
      "loss": 2.971,
      "step": 117160
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8262779712677002,
      "learning_rate": 0.0002919854970446984,
      "loss": 3.0367,
      "step": 117161
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.438020706176758,
      "learning_rate": 0.00029198140794323255,
      "loss": 2.9303,
      "step": 117162
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1221060752868652,
      "learning_rate": 0.00029197731884325753,
      "loss": 2.9065,
      "step": 117163
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.363922357559204,
      "learning_rate": 0.00029197322974477394,
      "loss": 2.9086,
      "step": 117164
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0303003787994385,
      "learning_rate": 0.00029196914064778275,
      "loss": 3.0302,
      "step": 117165
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1740152835845947,
      "learning_rate": 0.0002919650515522846,
      "loss": 2.9278,
      "step": 117166
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.5544588565826416,
      "learning_rate": 0.00029196096245828035,
      "loss": 3.0305,
      "step": 117167
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.65058970451355,
      "learning_rate": 0.00029195687336577066,
      "loss": 3.1318,
      "step": 117168
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1543262004852295,
      "learning_rate": 0.00029195278427475647,
      "loss": 2.7925,
      "step": 117169
    },
    {
      "epoch": 1.53,
      "grad_norm": 5.132400989532471,
      "learning_rate": 0.0002919486951852383,
      "loss": 2.9213,
      "step": 117170
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.967111587524414,
      "learning_rate": 0.000291944606097217,
      "loss": 3.1748,
      "step": 117171
    },
    {
      "epoch": 1.53,
      "grad_norm": 4.03436803817749,
      "learning_rate": 0.00029194051701069334,
      "loss": 2.8788,
      "step": 117172
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1113638877868652,
      "learning_rate": 0.0002919364279256681,
      "loss": 2.8479,
      "step": 117173
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.852228879928589,
      "learning_rate": 0.00029193233884214207,
      "loss": 3.0007,
      "step": 117174
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7076808214187622,
      "learning_rate": 0.00029192824976011596,
      "loss": 2.9211,
      "step": 117175
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.838724970817566,
      "learning_rate": 0.0002919241606795906,
      "loss": 2.7647,
      "step": 117176
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.5868152379989624,
      "learning_rate": 0.00029192007160056666,
      "loss": 2.901,
      "step": 117177
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7794477939605713,
      "learning_rate": 0.00029191598252304487,
      "loss": 2.8277,
      "step": 117178
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9002989530563354,
      "learning_rate": 0.00029191189344702606,
      "loss": 3.2513,
      "step": 117179
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.549220085144043,
      "learning_rate": 0.000291907804372511,
      "loss": 2.876,
      "step": 117180
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8764970302581787,
      "learning_rate": 0.00029190371529950044,
      "loss": 3.049,
      "step": 117181
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9673793315887451,
      "learning_rate": 0.0002918996262279952,
      "loss": 3.0221,
      "step": 117182
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.315934181213379,
      "learning_rate": 0.0002918955371579959,
      "loss": 2.9115,
      "step": 117183
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.4327306747436523,
      "learning_rate": 0.00029189144808950334,
      "loss": 3.265,
      "step": 117184
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1025443077087402,
      "learning_rate": 0.0002918873590225183,
      "loss": 3.0859,
      "step": 117185
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6691794395446777,
      "learning_rate": 0.00029188326995704164,
      "loss": 2.9445,
      "step": 117186
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8681820631027222,
      "learning_rate": 0.00029187918089307395,
      "loss": 3.0552,
      "step": 117187
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.07112979888916,
      "learning_rate": 0.0002918750918306162,
      "loss": 2.7919,
      "step": 117188
    },
    {
      "epoch": 1.53,
      "grad_norm": 4.370025634765625,
      "learning_rate": 0.00029187100276966896,
      "loss": 2.9765,
      "step": 117189
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.6793372631073,
      "learning_rate": 0.000291866913710233,
      "loss": 2.744,
      "step": 117190
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0911643505096436,
      "learning_rate": 0.0002918628246523092,
      "loss": 3.0251,
      "step": 117191
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.527238130569458,
      "learning_rate": 0.0002918587355958982,
      "loss": 2.9624,
      "step": 117192
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.130743980407715,
      "learning_rate": 0.0002918546465410008,
      "loss": 2.9044,
      "step": 117193
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.4880974292755127,
      "learning_rate": 0.00029185055748761794,
      "loss": 3.1446,
      "step": 117194
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7234268188476562,
      "learning_rate": 0.00029184646843575005,
      "loss": 2.9157,
      "step": 117195
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.713882565498352,
      "learning_rate": 0.0002918423793853981,
      "loss": 3.0265,
      "step": 117196
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.104951858520508,
      "learning_rate": 0.00029183829033656273,
      "loss": 2.9038,
      "step": 117197
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.6853837966918945,
      "learning_rate": 0.00029183420128924486,
      "loss": 3.1267,
      "step": 117198
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9071617126464844,
      "learning_rate": 0.00029183011224344515,
      "loss": 3.0654,
      "step": 117199
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0896174907684326,
      "learning_rate": 0.00029182602319916445,
      "loss": 3.2371,
      "step": 117200
    },
    {
      "epoch": 1.53,
      "grad_norm": 4.249916076660156,
      "learning_rate": 0.0002918219341564034,
      "loss": 3.0033,
      "step": 117201
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.7117977142333984,
      "learning_rate": 0.00029181784511516273,
      "loss": 3.0027,
      "step": 117202
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9314569234848022,
      "learning_rate": 0.00029181375607544333,
      "loss": 2.7251,
      "step": 117203
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1406660079956055,
      "learning_rate": 0.0002918096670372459,
      "loss": 3.1136,
      "step": 117204
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8997130393981934,
      "learning_rate": 0.0002918055780005712,
      "loss": 3.1052,
      "step": 117205
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9211788177490234,
      "learning_rate": 0.0002918014889654201,
      "loss": 2.6563,
      "step": 117206
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.229093551635742,
      "learning_rate": 0.0002917973999317932,
      "loss": 2.9215,
      "step": 117207
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1735496520996094,
      "learning_rate": 0.00029179331089969123,
      "loss": 3.0496,
      "step": 117208
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1250951290130615,
      "learning_rate": 0.0002917892218691151,
      "loss": 3.2574,
      "step": 117209
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2904555797576904,
      "learning_rate": 0.00029178513284006547,
      "loss": 3.1252,
      "step": 117210
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0127992630004883,
      "learning_rate": 0.00029178104381254313,
      "loss": 3.1221,
      "step": 117211
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6529825925827026,
      "learning_rate": 0.000291776954786549,
      "loss": 2.8188,
      "step": 117212
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0390400886535645,
      "learning_rate": 0.0002917728657620836,
      "loss": 2.9619,
      "step": 117213
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3491806983947754,
      "learning_rate": 0.00029176877673914775,
      "loss": 2.8169,
      "step": 117214
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.212794303894043,
      "learning_rate": 0.0002917646877177422,
      "loss": 3.1872,
      "step": 117215
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8252739906311035,
      "learning_rate": 0.0002917605986978678,
      "loss": 2.89,
      "step": 117216
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.67446768283844,
      "learning_rate": 0.00029175650967952526,
      "loss": 2.8788,
      "step": 117217
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7157124280929565,
      "learning_rate": 0.00029175242066271543,
      "loss": 3.2187,
      "step": 117218
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.844884991645813,
      "learning_rate": 0.0002917483316474389,
      "loss": 3.1121,
      "step": 117219
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.120037078857422,
      "learning_rate": 0.0002917442426336965,
      "loss": 3.0421,
      "step": 117220
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8481080532073975,
      "learning_rate": 0.0002917401536214889,
      "loss": 3.0859,
      "step": 117221
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.322490692138672,
      "learning_rate": 0.0002917360646108171,
      "loss": 2.8807,
      "step": 117222
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.5067250728607178,
      "learning_rate": 0.0002917319756016817,
      "loss": 3.1276,
      "step": 117223
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.845969557762146,
      "learning_rate": 0.0002917278865940835,
      "loss": 3.2851,
      "step": 117224
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2178444862365723,
      "learning_rate": 0.0002917237975880232,
      "loss": 3.262,
      "step": 117225
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.5407326221466064,
      "learning_rate": 0.0002917197085835016,
      "loss": 2.9966,
      "step": 117226
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3357462882995605,
      "learning_rate": 0.0002917156195805195,
      "loss": 2.8834,
      "step": 117227
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.940051794052124,
      "learning_rate": 0.0002917115305790776,
      "loss": 3.013,
      "step": 117228
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9064440727233887,
      "learning_rate": 0.00029170744157917664,
      "loss": 2.9403,
      "step": 117229
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7566763162612915,
      "learning_rate": 0.0002917033525808175,
      "loss": 2.9658,
      "step": 117230
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9828484058380127,
      "learning_rate": 0.0002916992635840008,
      "loss": 2.6924,
      "step": 117231
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.0041465759277344,
      "learning_rate": 0.00029169517458872745,
      "loss": 3.0843,
      "step": 117232
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0520503520965576,
      "learning_rate": 0.00029169108559499806,
      "loss": 2.9682,
      "step": 117233
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0990664958953857,
      "learning_rate": 0.00029168699660281344,
      "loss": 3.0436,
      "step": 117234
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.466057300567627,
      "learning_rate": 0.00029168290761217436,
      "loss": 2.8078,
      "step": 117235
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.988228678703308,
      "learning_rate": 0.00029167881862308167,
      "loss": 3.1371,
      "step": 117236
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.067105293273926,
      "learning_rate": 0.0002916747296355359,
      "loss": 2.9749,
      "step": 117237
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.55963397026062,
      "learning_rate": 0.0002916706406495381,
      "loss": 2.9615,
      "step": 117238
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9343918561935425,
      "learning_rate": 0.00029166655166508877,
      "loss": 2.8263,
      "step": 117239
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.046154499053955,
      "learning_rate": 0.0002916624626821889,
      "loss": 3.0067,
      "step": 117240
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9519141912460327,
      "learning_rate": 0.0002916583737008391,
      "loss": 3.1626,
      "step": 117241
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1275055408477783,
      "learning_rate": 0.00029165428472104006,
      "loss": 2.8412,
      "step": 117242
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9464693069458008,
      "learning_rate": 0.0002916501957427928,
      "loss": 3.0162,
      "step": 117243
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.758902668952942,
      "learning_rate": 0.0002916461067660978,
      "loss": 2.7848,
      "step": 117244
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0288050174713135,
      "learning_rate": 0.000291642017790956,
      "loss": 2.78,
      "step": 117245
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7801259756088257,
      "learning_rate": 0.0002916379288173681,
      "loss": 2.98,
      "step": 117246
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.693943738937378,
      "learning_rate": 0.0002916338398453349,
      "loss": 2.9383,
      "step": 117247
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2815170288085938,
      "learning_rate": 0.0002916297508748571,
      "loss": 2.6487,
      "step": 117248
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9615728855133057,
      "learning_rate": 0.00029162566190593555,
      "loss": 2.7768,
      "step": 117249
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.594071626663208,
      "learning_rate": 0.0002916215729385708,
      "loss": 2.8494,
      "step": 117250
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7406893968582153,
      "learning_rate": 0.00029161748397276387,
      "loss": 3.0544,
      "step": 117251
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.1875314712524414,
      "learning_rate": 0.00029161339500851535,
      "loss": 2.925,
      "step": 117252
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9562405347824097,
      "learning_rate": 0.00029160930604582607,
      "loss": 2.667,
      "step": 117253
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3625471591949463,
      "learning_rate": 0.00029160521708469685,
      "loss": 3.0062,
      "step": 117254
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8750536441802979,
      "learning_rate": 0.0002916011281251284,
      "loss": 2.8621,
      "step": 117255
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1495556831359863,
      "learning_rate": 0.0002915970391671213,
      "loss": 2.8934,
      "step": 117256
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9492322206497192,
      "learning_rate": 0.00029159295021067655,
      "loss": 3.0294,
      "step": 117257
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.899400234222412,
      "learning_rate": 0.00029158886125579483,
      "loss": 2.9686,
      "step": 117258
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9844597578048706,
      "learning_rate": 0.0002915847723024769,
      "loss": 3.0228,
      "step": 117259
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1399121284484863,
      "learning_rate": 0.0002915806833507235,
      "loss": 2.7595,
      "step": 117260
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.954024076461792,
      "learning_rate": 0.00029157659440053556,
      "loss": 2.9875,
      "step": 117261
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.7354509830474854,
      "learning_rate": 0.0002915725054519135,
      "loss": 3.014,
      "step": 117262
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.6064014434814453,
      "learning_rate": 0.00029156841650485835,
      "loss": 2.9026,
      "step": 117263
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0620362758636475,
      "learning_rate": 0.0002915643275593707,
      "loss": 3.1548,
      "step": 117264
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.157931089401245,
      "learning_rate": 0.00029156023861545146,
      "loss": 2.868,
      "step": 117265
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.5066170692443848,
      "learning_rate": 0.0002915561496731014,
      "loss": 3.1403,
      "step": 117266
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.667466640472412,
      "learning_rate": 0.00029155206073232123,
      "loss": 2.993,
      "step": 117267
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.041804313659668,
      "learning_rate": 0.0002915479717931116,
      "loss": 3.0728,
      "step": 117268
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8023736476898193,
      "learning_rate": 0.00029154388285547336,
      "loss": 3.0371,
      "step": 117269
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.231796979904175,
      "learning_rate": 0.00029153979391940727,
      "loss": 2.8745,
      "step": 117270
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.414790391921997,
      "learning_rate": 0.0002915357049849141,
      "loss": 3.0412,
      "step": 117271
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.3012280464172363,
      "learning_rate": 0.00029153161605199456,
      "loss": 2.8794,
      "step": 117272
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.933053970336914,
      "learning_rate": 0.0002915275271206497,
      "loss": 2.817,
      "step": 117273
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1481828689575195,
      "learning_rate": 0.00029152343819087976,
      "loss": 2.9303,
      "step": 117274
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.9328019618988037,
      "learning_rate": 0.00029151934926268584,
      "loss": 3.0825,
      "step": 117275
    },
    {
      "epoch": 1.53,
      "grad_norm": 4.093148708343506,
      "learning_rate": 0.0002915152603360686,
      "loss": 2.9695,
      "step": 117276
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.8127639293670654,
      "learning_rate": 0.00029151117141102887,
      "loss": 3.1922,
      "step": 117277
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.5505170822143555,
      "learning_rate": 0.0002915070824875674,
      "loss": 3.1785,
      "step": 117278
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.222583055496216,
      "learning_rate": 0.000291502993565685,
      "loss": 3.0713,
      "step": 117279
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.549323320388794,
      "learning_rate": 0.0002914989046453822,
      "loss": 3.0242,
      "step": 117280
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.9126007556915283,
      "learning_rate": 0.00029149481572666,
      "loss": 2.8934,
      "step": 117281
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8377150297164917,
      "learning_rate": 0.000291490726809519,
      "loss": 3.0341,
      "step": 117282
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0344653129577637,
      "learning_rate": 0.0002914866378939601,
      "loss": 2.9659,
      "step": 117283
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.078407049179077,
      "learning_rate": 0.00029148254897998395,
      "loss": 2.9422,
      "step": 117284
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.778323173522949,
      "learning_rate": 0.0002914784600675915,
      "loss": 3.0718,
      "step": 117285
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.496980905532837,
      "learning_rate": 0.0002914743711567832,
      "loss": 2.8024,
      "step": 117286
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.814781904220581,
      "learning_rate": 0.00029147028224756,
      "loss": 3.1025,
      "step": 117287
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.75847327709198,
      "learning_rate": 0.0002914661933399227,
      "loss": 3.0867,
      "step": 117288
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9920488595962524,
      "learning_rate": 0.0002914621044338719,
      "loss": 3.156,
      "step": 117289
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.112553119659424,
      "learning_rate": 0.00029145801552940846,
      "loss": 3.0601,
      "step": 117290
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.366443157196045,
      "learning_rate": 0.0002914539266265333,
      "loss": 3.058,
      "step": 117291
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.7737205028533936,
      "learning_rate": 0.0002914498377252469,
      "loss": 2.8313,
      "step": 117292
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7054685354232788,
      "learning_rate": 0.0002914457488255501,
      "loss": 3.0948,
      "step": 117293
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.5192043781280518,
      "learning_rate": 0.0002914416599274437,
      "loss": 2.9992,
      "step": 117294
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9892263412475586,
      "learning_rate": 0.0002914375710309285,
      "loss": 2.9411,
      "step": 117295
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.173247814178467,
      "learning_rate": 0.0002914334821360052,
      "loss": 3.1427,
      "step": 117296
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7470778226852417,
      "learning_rate": 0.0002914293932426747,
      "loss": 2.9338,
      "step": 117297
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.956250548362732,
      "learning_rate": 0.0002914253043509375,
      "loss": 2.8724,
      "step": 117298
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3117520809173584,
      "learning_rate": 0.0002914212154607945,
      "loss": 3.1273,
      "step": 117299
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7554066181182861,
      "learning_rate": 0.0002914171265722465,
      "loss": 2.8373,
      "step": 117300
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7185477018356323,
      "learning_rate": 0.00029141303768529414,
      "loss": 3.0034,
      "step": 117301
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.188063859939575,
      "learning_rate": 0.0002914089487999383,
      "loss": 2.8629,
      "step": 117302
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8432165384292603,
      "learning_rate": 0.00029140485991617987,
      "loss": 2.9628,
      "step": 117303
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9198927879333496,
      "learning_rate": 0.0002914007710340192,
      "loss": 2.8158,
      "step": 117304
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9199280738830566,
      "learning_rate": 0.0002913966821534574,
      "loss": 3.0123,
      "step": 117305
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6797559261322021,
      "learning_rate": 0.0002913925932744951,
      "loss": 3.0178,
      "step": 117306
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6866286993026733,
      "learning_rate": 0.000291388504397133,
      "loss": 3.0365,
      "step": 117307
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6281490325927734,
      "learning_rate": 0.000291384415521372,
      "loss": 3.0901,
      "step": 117308
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2259750366210938,
      "learning_rate": 0.0002913803266472128,
      "loss": 3.1374,
      "step": 117309
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3956425189971924,
      "learning_rate": 0.0002913762377746563,
      "loss": 3.176,
      "step": 117310
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.779381275177002,
      "learning_rate": 0.000291372148903703,
      "loss": 2.9372,
      "step": 117311
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7433295249938965,
      "learning_rate": 0.0002913680600343537,
      "loss": 3.0658,
      "step": 117312
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.620198369026184,
      "learning_rate": 0.0002913639711666093,
      "loss": 3.2431,
      "step": 117313
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.0226471424102783,
      "learning_rate": 0.0002913598823004705,
      "loss": 3.0479,
      "step": 117314
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9054372310638428,
      "learning_rate": 0.000291355793435938,
      "loss": 2.7745,
      "step": 117315
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9416078329086304,
      "learning_rate": 0.0002913517045730128,
      "loss": 2.9732,
      "step": 117316
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7428182363510132,
      "learning_rate": 0.00029134761571169546,
      "loss": 2.9354,
      "step": 117317
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0885260105133057,
      "learning_rate": 0.00029134352685198663,
      "loss": 2.9355,
      "step": 117318
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.5536080598831177,
      "learning_rate": 0.0002913394379938872,
      "loss": 2.8079,
      "step": 117319
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.04537296295166,
      "learning_rate": 0.00029133534913739796,
      "loss": 3.1337,
      "step": 117320
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8613306283950806,
      "learning_rate": 0.0002913312602825197,
      "loss": 2.686,
      "step": 117321
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8375200033187866,
      "learning_rate": 0.0002913271714292531,
      "loss": 2.8674,
      "step": 117322
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.175426721572876,
      "learning_rate": 0.0002913230825775989,
      "loss": 3.0411,
      "step": 117323
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8523656129837036,
      "learning_rate": 0.00029131899372755797,
      "loss": 2.831,
      "step": 117324
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8761346340179443,
      "learning_rate": 0.00029131490487913097,
      "loss": 3.0432,
      "step": 117325
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1359755992889404,
      "learning_rate": 0.0002913108160323187,
      "loss": 2.8261,
      "step": 117326
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.859768271446228,
      "learning_rate": 0.00029130672718712184,
      "loss": 2.8177,
      "step": 117327
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9456026554107666,
      "learning_rate": 0.00029130263834354134,
      "loss": 3.1687,
      "step": 117328
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.951742172241211,
      "learning_rate": 0.00029129854950157776,
      "loss": 2.7147,
      "step": 117329
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1120176315307617,
      "learning_rate": 0.00029129446066123197,
      "loss": 3.0899,
      "step": 117330
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.4883337020874023,
      "learning_rate": 0.00029129037182250474,
      "loss": 3.159,
      "step": 117331
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0851168632507324,
      "learning_rate": 0.00029128628298539676,
      "loss": 2.9943,
      "step": 117332
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.5512354373931885,
      "learning_rate": 0.0002912821941499088,
      "loss": 2.9765,
      "step": 117333
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.5357637405395508,
      "learning_rate": 0.0002912781053160417,
      "loss": 2.6666,
      "step": 117334
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8108640909194946,
      "learning_rate": 0.0002912740164837961,
      "loss": 2.8451,
      "step": 117335
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.060040235519409,
      "learning_rate": 0.0002912699276531729,
      "loss": 2.9796,
      "step": 117336
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.204148292541504,
      "learning_rate": 0.0002912658388241727,
      "loss": 3.0331,
      "step": 117337
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2219479084014893,
      "learning_rate": 0.0002912617499967964,
      "loss": 2.823,
      "step": 117338
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.4933643341064453,
      "learning_rate": 0.00029125766117104473,
      "loss": 3.0181,
      "step": 117339
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1235527992248535,
      "learning_rate": 0.0002912535723469184,
      "loss": 3.3398,
      "step": 117340
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.273878335952759,
      "learning_rate": 0.0002912494835244182,
      "loss": 3.0654,
      "step": 117341
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.899506688117981,
      "learning_rate": 0.00029124539470354484,
      "loss": 2.9066,
      "step": 117342
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1757678985595703,
      "learning_rate": 0.00029124130588429914,
      "loss": 3.1078,
      "step": 117343
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9177778959274292,
      "learning_rate": 0.00029123721706668187,
      "loss": 2.9721,
      "step": 117344
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8915072679519653,
      "learning_rate": 0.0002912331282506937,
      "loss": 2.9565,
      "step": 117345
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.116787910461426,
      "learning_rate": 0.00029122903943633565,
      "loss": 2.8913,
      "step": 117346
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1175425052642822,
      "learning_rate": 0.0002912249506236082,
      "loss": 2.9806,
      "step": 117347
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.893211007118225,
      "learning_rate": 0.0002912208618125121,
      "loss": 2.8739,
      "step": 117348
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.25748610496521,
      "learning_rate": 0.00029121677300304825,
      "loss": 2.7665,
      "step": 117349
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1058614253997803,
      "learning_rate": 0.00029121268419521736,
      "loss": 3.0747,
      "step": 117350
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9342883825302124,
      "learning_rate": 0.0002912085953890202,
      "loss": 2.9854,
      "step": 117351
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.810685157775879,
      "learning_rate": 0.00029120450658445764,
      "loss": 3.1106,
      "step": 117352
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.026268243789673,
      "learning_rate": 0.0002912004177815302,
      "loss": 2.8399,
      "step": 117353
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2071428298950195,
      "learning_rate": 0.0002911963289802388,
      "loss": 2.9305,
      "step": 117354
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.898984670639038,
      "learning_rate": 0.00029119224018058415,
      "loss": 3.1112,
      "step": 117355
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.7319180965423584,
      "learning_rate": 0.000291188151382567,
      "loss": 3.086,
      "step": 117356
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3984529972076416,
      "learning_rate": 0.0002911840625861882,
      "loss": 2.8523,
      "step": 117357
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2474920749664307,
      "learning_rate": 0.00029117997379144854,
      "loss": 2.8152,
      "step": 117358
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.091512680053711,
      "learning_rate": 0.0002911758849983486,
      "loss": 2.9518,
      "step": 117359
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2206015586853027,
      "learning_rate": 0.0002911717962068892,
      "loss": 2.9903,
      "step": 117360
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2521796226501465,
      "learning_rate": 0.0002911677074170711,
      "loss": 3.1213,
      "step": 117361
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8493210077285767,
      "learning_rate": 0.00029116361862889517,
      "loss": 2.9362,
      "step": 117362
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.8885154724121094,
      "learning_rate": 0.00029115952984236197,
      "loss": 2.7448,
      "step": 117363
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6668701171875,
      "learning_rate": 0.00029115544105747264,
      "loss": 2.7298,
      "step": 117364
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0774219036102295,
      "learning_rate": 0.0002911513522742275,
      "loss": 3.0089,
      "step": 117365
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.20621919631958,
      "learning_rate": 0.00029114726349262747,
      "loss": 3.098,
      "step": 117366
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.5376176834106445,
      "learning_rate": 0.0002911431747126733,
      "loss": 2.802,
      "step": 117367
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0245862007141113,
      "learning_rate": 0.00029113908593436584,
      "loss": 2.9773,
      "step": 117368
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9668922424316406,
      "learning_rate": 0.00029113499715770577,
      "loss": 2.9223,
      "step": 117369
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9405641555786133,
      "learning_rate": 0.000291130908382694,
      "loss": 3.0705,
      "step": 117370
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.002267599105835,
      "learning_rate": 0.00029112681960933104,
      "loss": 3.0504,
      "step": 117371
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8743054866790771,
      "learning_rate": 0.0002911227308376178,
      "loss": 2.9217,
      "step": 117372
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.67572820186615,
      "learning_rate": 0.000291118642067555,
      "loss": 2.9976,
      "step": 117373
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.667004108428955,
      "learning_rate": 0.00029111455329914335,
      "loss": 3.0351,
      "step": 117374
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7795785665512085,
      "learning_rate": 0.0002911104645323837,
      "loss": 3.1151,
      "step": 117375
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.538191318511963,
      "learning_rate": 0.0002911063757672768,
      "loss": 3.2246,
      "step": 117376
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1905272006988525,
      "learning_rate": 0.00029110228700382354,
      "loss": 2.9906,
      "step": 117377
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1449062824249268,
      "learning_rate": 0.00029109819824202436,
      "loss": 3.1207,
      "step": 117378
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2226803302764893,
      "learning_rate": 0.0002910941094818802,
      "loss": 2.9467,
      "step": 117379
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3301029205322266,
      "learning_rate": 0.00029109002072339184,
      "loss": 2.8966,
      "step": 117380
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.225562334060669,
      "learning_rate": 0.00029108593196656003,
      "loss": 2.8437,
      "step": 117381
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.220949172973633,
      "learning_rate": 0.0002910818432113855,
      "loss": 2.7908,
      "step": 117382
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3781142234802246,
      "learning_rate": 0.0002910777544578691,
      "loss": 3.4602,
      "step": 117383
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3950932025909424,
      "learning_rate": 0.0002910736657060114,
      "loss": 2.8608,
      "step": 117384
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.5510189533233643,
      "learning_rate": 0.0002910695769558133,
      "loss": 2.9261,
      "step": 117385
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.8986151218414307,
      "learning_rate": 0.00029106548820727557,
      "loss": 2.9852,
      "step": 117386
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7801135778427124,
      "learning_rate": 0.0002910613994603989,
      "loss": 3.1962,
      "step": 117387
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.617550849914551,
      "learning_rate": 0.00029105731071518405,
      "loss": 2.9607,
      "step": 117388
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.5723628997802734,
      "learning_rate": 0.000291053221971632,
      "loss": 2.6842,
      "step": 117389
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7006423473358154,
      "learning_rate": 0.00029104913322974314,
      "loss": 3.2548,
      "step": 117390
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.4800965785980225,
      "learning_rate": 0.0002910450444895185,
      "loss": 3.0096,
      "step": 117391
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.3386547565460205,
      "learning_rate": 0.0002910409557509587,
      "loss": 2.7287,
      "step": 117392
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9209086894989014,
      "learning_rate": 0.0002910368670140645,
      "loss": 2.8196,
      "step": 117393
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8549320697784424,
      "learning_rate": 0.00029103277827883676,
      "loss": 3.0451,
      "step": 117394
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.4495584964752197,
      "learning_rate": 0.0002910286895452764,
      "loss": 3.0229,
      "step": 117395
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.783457636833191,
      "learning_rate": 0.0002910246008133837,
      "loss": 3.1575,
      "step": 117396
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.68880295753479,
      "learning_rate": 0.00029102051208315985,
      "loss": 2.7411,
      "step": 117397
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7780319452285767,
      "learning_rate": 0.00029101642335460534,
      "loss": 3.0157,
      "step": 117398
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0133748054504395,
      "learning_rate": 0.0002910123346277211,
      "loss": 3.0875,
      "step": 117399
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.157567024230957,
      "learning_rate": 0.0002910082459025078,
      "loss": 3.1326,
      "step": 117400
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8165361881256104,
      "learning_rate": 0.0002910041571789664,
      "loss": 3.1817,
      "step": 117401
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9655498266220093,
      "learning_rate": 0.0002910000684570974,
      "loss": 3.2661,
      "step": 117402
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.917096734046936,
      "learning_rate": 0.0002909959797369016,
      "loss": 3.1064,
      "step": 117403
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3160240650177,
      "learning_rate": 0.00029099189101837984,
      "loss": 2.9767,
      "step": 117404
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.7786316871643066,
      "learning_rate": 0.00029098780230153286,
      "loss": 3.2187,
      "step": 117405
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9797238111495972,
      "learning_rate": 0.0002909837135863614,
      "loss": 3.0246,
      "step": 117406
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7928650379180908,
      "learning_rate": 0.00029097962487286636,
      "loss": 2.644,
      "step": 117407
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.020836114883423,
      "learning_rate": 0.00029097553616104824,
      "loss": 2.9833,
      "step": 117408
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.067425489425659,
      "learning_rate": 0.00029097144745090803,
      "loss": 3.1481,
      "step": 117409
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8069489002227783,
      "learning_rate": 0.00029096735874244633,
      "loss": 2.9104,
      "step": 117410
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7091495990753174,
      "learning_rate": 0.000290963270035664,
      "loss": 2.7881,
      "step": 117411
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.960252285003662,
      "learning_rate": 0.0002909591813305617,
      "loss": 2.8553,
      "step": 117412
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3152263164520264,
      "learning_rate": 0.0002909550926271404,
      "loss": 2.9198,
      "step": 117413
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.785491943359375,
      "learning_rate": 0.0002909510039254006,
      "loss": 3.1458,
      "step": 117414
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0194008350372314,
      "learning_rate": 0.0002909469152253432,
      "loss": 2.8778,
      "step": 117415
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.383241653442383,
      "learning_rate": 0.00029094282652696897,
      "loss": 2.7493,
      "step": 117416
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.5534913539886475,
      "learning_rate": 0.0002909387378302786,
      "loss": 2.8352,
      "step": 117417
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.439610004425049,
      "learning_rate": 0.00029093464913527295,
      "loss": 2.8248,
      "step": 117418
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.5891354084014893,
      "learning_rate": 0.00029093056044195273,
      "loss": 3.165,
      "step": 117419
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0734827518463135,
      "learning_rate": 0.0002909264717503186,
      "loss": 3.0126,
      "step": 117420
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.084378242492676,
      "learning_rate": 0.00029092238306037144,
      "loss": 2.9215,
      "step": 117421
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8312519788742065,
      "learning_rate": 0.000290918294372112,
      "loss": 2.8581,
      "step": 117422
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.8028390407562256,
      "learning_rate": 0.00029091420568554105,
      "loss": 3.0769,
      "step": 117423
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.21120285987854,
      "learning_rate": 0.0002909101170006593,
      "loss": 2.9537,
      "step": 117424
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.031306266784668,
      "learning_rate": 0.0002909060283174675,
      "loss": 3.1951,
      "step": 117425
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.028998374938965,
      "learning_rate": 0.00029090193963596646,
      "loss": 3.1794,
      "step": 117426
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0115160942077637,
      "learning_rate": 0.0002908978509561569,
      "loss": 2.8042,
      "step": 117427
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7403146028518677,
      "learning_rate": 0.00029089376227803965,
      "loss": 2.7783,
      "step": 117428
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.924691915512085,
      "learning_rate": 0.0002908896736016153,
      "loss": 2.6462,
      "step": 117429
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.854883909225464,
      "learning_rate": 0.0002908855849268848,
      "loss": 2.7597,
      "step": 117430
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.700832724571228,
      "learning_rate": 0.00029088149625384904,
      "loss": 2.7854,
      "step": 117431
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.158062696456909,
      "learning_rate": 0.00029087740758250837,
      "loss": 3.0086,
      "step": 117432
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.491816759109497,
      "learning_rate": 0.00029087331891286376,
      "loss": 2.886,
      "step": 117433
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0502514839172363,
      "learning_rate": 0.000290869230244916,
      "loss": 3.0355,
      "step": 117434
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.638485908508301,
      "learning_rate": 0.00029086514157866585,
      "loss": 3.1153,
      "step": 117435
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9078408479690552,
      "learning_rate": 0.000290861052914114,
      "loss": 2.9726,
      "step": 117436
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.026449680328369,
      "learning_rate": 0.0002908569642512613,
      "loss": 3.1533,
      "step": 117437
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.8811986446380615,
      "learning_rate": 0.00029085287559010853,
      "loss": 3.0881,
      "step": 117438
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.313359260559082,
      "learning_rate": 0.0002908487869306563,
      "loss": 3.1466,
      "step": 117439
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2035112380981445,
      "learning_rate": 0.0002908446982729054,
      "loss": 2.8375,
      "step": 117440
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8918108940124512,
      "learning_rate": 0.00029084060961685667,
      "loss": 2.9605,
      "step": 117441
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0877740383148193,
      "learning_rate": 0.00029083652096251086,
      "loss": 2.9785,
      "step": 117442
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.760471224784851,
      "learning_rate": 0.0002908324323098687,
      "loss": 2.7669,
      "step": 117443
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.5724170207977295,
      "learning_rate": 0.0002908283436589311,
      "loss": 2.938,
      "step": 117444
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.194042205810547,
      "learning_rate": 0.00029082425500969854,
      "loss": 3.1532,
      "step": 117445
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8934704065322876,
      "learning_rate": 0.00029082016636217193,
      "loss": 2.9933,
      "step": 117446
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.6811842918395996,
      "learning_rate": 0.000290816077716352,
      "loss": 2.8761,
      "step": 117447
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9157729148864746,
      "learning_rate": 0.00029081198907223956,
      "loss": 3.2367,
      "step": 117448
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.2847542762756348,
      "learning_rate": 0.00029080790042983537,
      "loss": 3.1528,
      "step": 117449
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.502007007598877,
      "learning_rate": 0.00029080381178914026,
      "loss": 3.05,
      "step": 117450
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8243054151535034,
      "learning_rate": 0.0002907997231501548,
      "loss": 3.053,
      "step": 117451
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.797892451286316,
      "learning_rate": 0.00029079563451287984,
      "loss": 3.0011,
      "step": 117452
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.6858837604522705,
      "learning_rate": 0.0002907915458773161,
      "loss": 2.6836,
      "step": 117453
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1413168907165527,
      "learning_rate": 0.0002907874572434644,
      "loss": 2.7305,
      "step": 117454
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2456796169281006,
      "learning_rate": 0.0002907833686113255,
      "loss": 2.9916,
      "step": 117455
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.407236337661743,
      "learning_rate": 0.0002907792799809003,
      "loss": 3.1948,
      "step": 117456
    },
    {
      "epoch": 1.53,
      "grad_norm": 4.247302532196045,
      "learning_rate": 0.00029077519135218925,
      "loss": 3.0092,
      "step": 117457
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6435163021087646,
      "learning_rate": 0.00029077110272519326,
      "loss": 3.0696,
      "step": 117458
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0817909240722656,
      "learning_rate": 0.0002907670140999131,
      "loss": 2.9778,
      "step": 117459
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1063272953033447,
      "learning_rate": 0.00029076292547634955,
      "loss": 3.0208,
      "step": 117460
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.013843536376953,
      "learning_rate": 0.0002907588368545033,
      "loss": 2.9746,
      "step": 117461
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.084247350692749,
      "learning_rate": 0.00029075474823437534,
      "loss": 2.8992,
      "step": 117462
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9664783477783203,
      "learning_rate": 0.00029075065961596606,
      "loss": 2.9507,
      "step": 117463
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9611185789108276,
      "learning_rate": 0.00029074657099927644,
      "loss": 2.9539,
      "step": 117464
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.279381036758423,
      "learning_rate": 0.0002907424823843072,
      "loss": 2.797,
      "step": 117465
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.7381904125213623,
      "learning_rate": 0.00029073839377105913,
      "loss": 3.0182,
      "step": 117466
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9479116201400757,
      "learning_rate": 0.0002907343051595329,
      "loss": 2.9355,
      "step": 117467
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6828017234802246,
      "learning_rate": 0.00029073021654972954,
      "loss": 3.1376,
      "step": 117468
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.328272581100464,
      "learning_rate": 0.00029072612794164946,
      "loss": 2.9643,
      "step": 117469
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.955840826034546,
      "learning_rate": 0.0002907220393352936,
      "loss": 2.9839,
      "step": 117470
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.432497501373291,
      "learning_rate": 0.0002907179507306626,
      "loss": 2.7631,
      "step": 117471
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9861125946044922,
      "learning_rate": 0.0002907138621277574,
      "loss": 3.1176,
      "step": 117472
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.6949453353881836,
      "learning_rate": 0.00029070977352657856,
      "loss": 3.2244,
      "step": 117473
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.3184311389923096,
      "learning_rate": 0.00029070568492712716,
      "loss": 3.0708,
      "step": 117474
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7492297887802124,
      "learning_rate": 0.00029070159632940364,
      "loss": 3.107,
      "step": 117475
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.495328664779663,
      "learning_rate": 0.00029069750773340884,
      "loss": 2.9484,
      "step": 117476
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.728485584259033,
      "learning_rate": 0.0002906934191391435,
      "loss": 2.9347,
      "step": 117477
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.7824225425720215,
      "learning_rate": 0.0002906893305466085,
      "loss": 2.8163,
      "step": 117478
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.567316770553589,
      "learning_rate": 0.0002906852419558045,
      "loss": 3.1212,
      "step": 117479
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.86698317527771,
      "learning_rate": 0.0002906811533667324,
      "loss": 2.7544,
      "step": 117480
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.387195110321045,
      "learning_rate": 0.0002906770647793927,
      "loss": 2.9165,
      "step": 117481
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.349138021469116,
      "learning_rate": 0.0002906729761937864,
      "loss": 2.8048,
      "step": 117482
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8623499870300293,
      "learning_rate": 0.00029066888760991407,
      "loss": 3.303,
      "step": 117483
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2497706413269043,
      "learning_rate": 0.00029066479902777666,
      "loss": 2.8806,
      "step": 117484
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.303809404373169,
      "learning_rate": 0.0002906607104473748,
      "loss": 2.8682,
      "step": 117485
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.2040083408355713,
      "learning_rate": 0.0002906566218687094,
      "loss": 2.8144,
      "step": 117486
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.171029806137085,
      "learning_rate": 0.00029065253329178104,
      "loss": 3.0777,
      "step": 117487
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0329747200012207,
      "learning_rate": 0.0002906484447165905,
      "loss": 3.155,
      "step": 117488
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.1631298065185547,
      "learning_rate": 0.0002906443561431386,
      "loss": 2.9934,
      "step": 117489
    },
    {
      "epoch": 1.53,
      "grad_norm": 4.245421886444092,
      "learning_rate": 0.00029064026757142606,
      "loss": 3.0946,
      "step": 117490
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.979019284248352,
      "learning_rate": 0.00029063617900145373,
      "loss": 2.9612,
      "step": 117491
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.6424479484558105,
      "learning_rate": 0.00029063209043322236,
      "loss": 3.2116,
      "step": 117492
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.933652400970459,
      "learning_rate": 0.00029062800186673254,
      "loss": 2.7503,
      "step": 117493
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8377104997634888,
      "learning_rate": 0.00029062391330198525,
      "loss": 2.9822,
      "step": 117494
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.67501962184906,
      "learning_rate": 0.0002906198247389811,
      "loss": 3.1538,
      "step": 117495
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9227557182312012,
      "learning_rate": 0.00029061573617772087,
      "loss": 3.171,
      "step": 117496
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.7565605640411377,
      "learning_rate": 0.0002906116476182054,
      "loss": 2.8005,
      "step": 117497
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.263909339904785,
      "learning_rate": 0.00029060755906043544,
      "loss": 2.75,
      "step": 117498
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7428779602050781,
      "learning_rate": 0.00029060347050441165,
      "loss": 3.1186,
      "step": 117499
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.7438912391662598,
      "learning_rate": 0.0002905993819501349,
      "loss": 3.0154,
      "step": 117500
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.9936439990997314,
      "learning_rate": 0.0002905952933976059,
      "loss": 3.0382,
      "step": 117501
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.6801276206970215,
      "learning_rate": 0.00029059120484682536,
      "loss": 3.0042,
      "step": 117502
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.133134365081787,
      "learning_rate": 0.0002905871162977941,
      "loss": 2.8518,
      "step": 117503
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.6088671684265137,
      "learning_rate": 0.0002905830277505129,
      "loss": 2.8158,
      "step": 117504
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.648050546646118,
      "learning_rate": 0.0002905789392049825,
      "loss": 2.8668,
      "step": 117505
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.132629632949829,
      "learning_rate": 0.00029057485066120363,
      "loss": 3.123,
      "step": 117506
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8153655529022217,
      "learning_rate": 0.0002905707621191771,
      "loss": 2.9411,
      "step": 117507
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.259338855743408,
      "learning_rate": 0.00029056667357890367,
      "loss": 2.89,
      "step": 117508
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.9375884532928467,
      "learning_rate": 0.000290562585040384,
      "loss": 3.0357,
      "step": 117509
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.9664716720581055,
      "learning_rate": 0.00029055849650361893,
      "loss": 2.9341,
      "step": 117510
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.932595133781433,
      "learning_rate": 0.00029055440796860925,
      "loss": 3.1208,
      "step": 117511
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.571279287338257,
      "learning_rate": 0.0002905503194353557,
      "loss": 2.7527,
      "step": 117512
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9391098022460938,
      "learning_rate": 0.00029054623090385895,
      "loss": 3.1145,
      "step": 117513
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6676610708236694,
      "learning_rate": 0.00029054214237411984,
      "loss": 2.843,
      "step": 117514
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.370136260986328,
      "learning_rate": 0.0002905380538461392,
      "loss": 3.1282,
      "step": 117515
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8915293216705322,
      "learning_rate": 0.0002905339653199177,
      "loss": 2.8571,
      "step": 117516
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1169443130493164,
      "learning_rate": 0.0002905298767954561,
      "loss": 3.3628,
      "step": 117517
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.974532127380371,
      "learning_rate": 0.0002905257882727552,
      "loss": 2.8224,
      "step": 117518
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1270551681518555,
      "learning_rate": 0.00029052169975181565,
      "loss": 3.4013,
      "step": 117519
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.974452495574951,
      "learning_rate": 0.00029051761123263835,
      "loss": 2.9047,
      "step": 117520
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0306284427642822,
      "learning_rate": 0.00029051352271522397,
      "loss": 3.0088,
      "step": 117521
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.131347179412842,
      "learning_rate": 0.0002905094341995733,
      "loss": 2.9412,
      "step": 117522
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1646759510040283,
      "learning_rate": 0.00029050534568568725,
      "loss": 2.8908,
      "step": 117523
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.0526838302612305,
      "learning_rate": 0.0002905012571735663,
      "loss": 2.9211,
      "step": 117524
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1529722213745117,
      "learning_rate": 0.0002904971686632114,
      "loss": 3.0249,
      "step": 117525
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.70993971824646,
      "learning_rate": 0.00029049308015462324,
      "loss": 2.9967,
      "step": 117526
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7266120910644531,
      "learning_rate": 0.0002904889916478025,
      "loss": 3.0229,
      "step": 117527
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.181040048599243,
      "learning_rate": 0.0002904849031427501,
      "loss": 3.292,
      "step": 117528
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.884746789932251,
      "learning_rate": 0.0002904808146394669,
      "loss": 3.0425,
      "step": 117529
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0356922149658203,
      "learning_rate": 0.00029047672613795333,
      "loss": 2.9456,
      "step": 117530
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7847336530685425,
      "learning_rate": 0.00029047263763821036,
      "loss": 2.9432,
      "step": 117531
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.01747465133667,
      "learning_rate": 0.00029046854914023865,
      "loss": 2.9667,
      "step": 117532
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.68690824508667,
      "learning_rate": 0.000290464460644039,
      "loss": 3.0842,
      "step": 117533
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.7894914150238037,
      "learning_rate": 0.0002904603721496122,
      "loss": 2.827,
      "step": 117534
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1715595722198486,
      "learning_rate": 0.00029045628365695917,
      "loss": 2.7726,
      "step": 117535
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9755038022994995,
      "learning_rate": 0.00029045219516608036,
      "loss": 2.9037,
      "step": 117536
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.7965087890625,
      "learning_rate": 0.00029044810667697667,
      "loss": 3.0755,
      "step": 117537
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7552133798599243,
      "learning_rate": 0.0002904440181896488,
      "loss": 2.9339,
      "step": 117538
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.842088222503662,
      "learning_rate": 0.00029043992970409765,
      "loss": 3.241,
      "step": 117539
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6939464807510376,
      "learning_rate": 0.00029043584122032383,
      "loss": 2.7978,
      "step": 117540
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8960492610931396,
      "learning_rate": 0.00029043175273832833,
      "loss": 2.9977,
      "step": 117541
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.4124324321746826,
      "learning_rate": 0.00029042766425811157,
      "loss": 3.1951,
      "step": 117542
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3254940509796143,
      "learning_rate": 0.00029042357577967453,
      "loss": 2.9304,
      "step": 117543
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.097231388092041,
      "learning_rate": 0.00029041948730301796,
      "loss": 2.7322,
      "step": 117544
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.830866575241089,
      "learning_rate": 0.0002904153988281425,
      "loss": 3.0813,
      "step": 117545
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.8275527954101562,
      "learning_rate": 0.0002904113103550491,
      "loss": 2.692,
      "step": 117546
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8415510654449463,
      "learning_rate": 0.0002904072218837385,
      "loss": 2.9555,
      "step": 117547
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8919498920440674,
      "learning_rate": 0.00029040313341421127,
      "loss": 3.0143,
      "step": 117548
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2311103343963623,
      "learning_rate": 0.00029039904494646826,
      "loss": 2.9016,
      "step": 117549
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0457780361175537,
      "learning_rate": 0.0002903949564805102,
      "loss": 2.7451,
      "step": 117550
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0611186027526855,
      "learning_rate": 0.00029039086801633797,
      "loss": 3.1023,
      "step": 117551
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6055163145065308,
      "learning_rate": 0.0002903867795539522,
      "loss": 3.2337,
      "step": 117552
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.301483154296875,
      "learning_rate": 0.00029038269109335393,
      "loss": 2.9316,
      "step": 117553
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9193001985549927,
      "learning_rate": 0.0002903786026345435,
      "loss": 3.1025,
      "step": 117554
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.270775079727173,
      "learning_rate": 0.0002903745141775219,
      "loss": 3.0555,
      "step": 117555
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.570439338684082,
      "learning_rate": 0.0002903704257222898,
      "loss": 2.8789,
      "step": 117556
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.4887444972991943,
      "learning_rate": 0.00029036633726884807,
      "loss": 3.1441,
      "step": 117557
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9177005290985107,
      "learning_rate": 0.00029036224881719743,
      "loss": 2.9597,
      "step": 117558
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3822193145751953,
      "learning_rate": 0.0002903581603673387,
      "loss": 2.9803,
      "step": 117559
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.9862749576568604,
      "learning_rate": 0.0002903540719192725,
      "loss": 2.9481,
      "step": 117560
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.78889000415802,
      "learning_rate": 0.0002903499834729996,
      "loss": 2.9805,
      "step": 117561
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.607788562774658,
      "learning_rate": 0.00029034589502852093,
      "loss": 2.9149,
      "step": 117562
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0005242824554443,
      "learning_rate": 0.0002903418065858371,
      "loss": 2.7424,
      "step": 117563
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.760927438735962,
      "learning_rate": 0.00029033771814494883,
      "loss": 2.9472,
      "step": 117564
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7444496154785156,
      "learning_rate": 0.00029033362970585716,
      "loss": 3.0494,
      "step": 117565
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.8308017253875732,
      "learning_rate": 0.0002903295412685625,
      "loss": 2.9493,
      "step": 117566
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.7794737815856934,
      "learning_rate": 0.0002903254528330658,
      "loss": 3.168,
      "step": 117567
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2028112411499023,
      "learning_rate": 0.0002903213643993677,
      "loss": 3.095,
      "step": 117568
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2410995960235596,
      "learning_rate": 0.0002903172759674691,
      "loss": 3.0319,
      "step": 117569
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.95371675491333,
      "learning_rate": 0.00029031318753737067,
      "loss": 3.0203,
      "step": 117570
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8673596382141113,
      "learning_rate": 0.0002903090991090732,
      "loss": 3.162,
      "step": 117571
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.173271417617798,
      "learning_rate": 0.00029030501068257763,
      "loss": 3.0891,
      "step": 117572
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.9251160621643066,
      "learning_rate": 0.0002903009222578844,
      "loss": 3.1344,
      "step": 117573
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9238688945770264,
      "learning_rate": 0.0002902968338349944,
      "loss": 3.0113,
      "step": 117574
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.4677281379699707,
      "learning_rate": 0.0002902927454139084,
      "loss": 2.9567,
      "step": 117575
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.976259469985962,
      "learning_rate": 0.00029028865699462714,
      "loss": 3.3408,
      "step": 117576
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.052825689315796,
      "learning_rate": 0.0002902845685771514,
      "loss": 3.0677,
      "step": 117577
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7155492305755615,
      "learning_rate": 0.0002902804801614821,
      "loss": 3.0545,
      "step": 117578
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9804160594940186,
      "learning_rate": 0.0002902763917476197,
      "loss": 2.9778,
      "step": 117579
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.42915415763855,
      "learning_rate": 0.0002902723033355651,
      "loss": 3.0808,
      "step": 117580
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.139756679534912,
      "learning_rate": 0.00029026821492531907,
      "loss": 2.908,
      "step": 117581
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.60357403755188,
      "learning_rate": 0.00029026412651688235,
      "loss": 3.0103,
      "step": 117582
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2328896522521973,
      "learning_rate": 0.00029026003811025576,
      "loss": 2.9688,
      "step": 117583
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.4456164836883545,
      "learning_rate": 0.00029025594970544,
      "loss": 3.0811,
      "step": 117584
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.5310463905334473,
      "learning_rate": 0.0002902518613024359,
      "loss": 3.1252,
      "step": 117585
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.834476113319397,
      "learning_rate": 0.0002902477729012441,
      "loss": 2.7631,
      "step": 117586
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.034607172012329,
      "learning_rate": 0.0002902436845018654,
      "loss": 3.2642,
      "step": 117587
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.9657998085021973,
      "learning_rate": 0.00029023959610430055,
      "loss": 2.9225,
      "step": 117588
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3623058795928955,
      "learning_rate": 0.0002902355077085504,
      "loss": 3.0098,
      "step": 117589
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3147289752960205,
      "learning_rate": 0.0002902314193146157,
      "loss": 2.962,
      "step": 117590
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9552459716796875,
      "learning_rate": 0.0002902273309224971,
      "loss": 3.1284,
      "step": 117591
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9884040355682373,
      "learning_rate": 0.00029022324253219543,
      "loss": 3.1561,
      "step": 117592
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.029567241668701,
      "learning_rate": 0.0002902191541437115,
      "loss": 2.6166,
      "step": 117593
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0582094192504883,
      "learning_rate": 0.00029021506575704594,
      "loss": 2.7656,
      "step": 117594
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.057135581970215,
      "learning_rate": 0.0002902109773721996,
      "loss": 3.0184,
      "step": 117595
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.3242993354797363,
      "learning_rate": 0.0002902068889891733,
      "loss": 3.1481,
      "step": 117596
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.704202175140381,
      "learning_rate": 0.0002902028006079676,
      "loss": 2.8724,
      "step": 117597
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1049416065216064,
      "learning_rate": 0.00029019871222858343,
      "loss": 3.064,
      "step": 117598
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.887436032295227,
      "learning_rate": 0.00029019462385102153,
      "loss": 2.8192,
      "step": 117599
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.2524778842926025,
      "learning_rate": 0.00029019053547528267,
      "loss": 3.0943,
      "step": 117600
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.742159128189087,
      "learning_rate": 0.0002901864471013675,
      "loss": 3.0677,
      "step": 117601
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.517641544342041,
      "learning_rate": 0.0002901823587292769,
      "loss": 3.0845,
      "step": 117602
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8285150527954102,
      "learning_rate": 0.0002901782703590116,
      "loss": 2.8808,
      "step": 117603
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.568942070007324,
      "learning_rate": 0.0002901741819905723,
      "loss": 3.2099,
      "step": 117604
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.891545534133911,
      "learning_rate": 0.0002901700936239598,
      "loss": 2.8667,
      "step": 117605
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.4315054416656494,
      "learning_rate": 0.00029016600525917484,
      "loss": 2.8797,
      "step": 117606
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1953678131103516,
      "learning_rate": 0.0002901619168962183,
      "loss": 3.2127,
      "step": 117607
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9916166067123413,
      "learning_rate": 0.00029015782853509083,
      "loss": 3.0459,
      "step": 117608
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1183886528015137,
      "learning_rate": 0.00029015374017579313,
      "loss": 2.8334,
      "step": 117609
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.6442363262176514,
      "learning_rate": 0.00029014965181832606,
      "loss": 2.9775,
      "step": 117610
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.5536835193634033,
      "learning_rate": 0.00029014556346269036,
      "loss": 3.1068,
      "step": 117611
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9298217296600342,
      "learning_rate": 0.00029014147510888675,
      "loss": 3.0627,
      "step": 117612
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.153792381286621,
      "learning_rate": 0.0002901373867569161,
      "loss": 3.0631,
      "step": 117613
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.196606159210205,
      "learning_rate": 0.0002901332984067792,
      "loss": 2.964,
      "step": 117614
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6947128772735596,
      "learning_rate": 0.00029012921005847655,
      "loss": 2.9349,
      "step": 117615
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.5620263814926147,
      "learning_rate": 0.00029012512171200903,
      "loss": 3.1135,
      "step": 117616
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.241609573364258,
      "learning_rate": 0.0002901210333673775,
      "loss": 2.8482,
      "step": 117617
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2271251678466797,
      "learning_rate": 0.0002901169450245826,
      "loss": 2.9627,
      "step": 117618
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.412724256515503,
      "learning_rate": 0.0002901128566836252,
      "loss": 3.0592,
      "step": 117619
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9420644044876099,
      "learning_rate": 0.00029010876834450615,
      "loss": 2.8501,
      "step": 117620
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7587459087371826,
      "learning_rate": 0.0002901046800072259,
      "loss": 3.0436,
      "step": 117621
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9369615316390991,
      "learning_rate": 0.00029010059167178535,
      "loss": 2.8774,
      "step": 117622
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1203861236572266,
      "learning_rate": 0.00029009650333818536,
      "loss": 2.9276,
      "step": 117623
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2041022777557373,
      "learning_rate": 0.00029009241500642656,
      "loss": 2.9845,
      "step": 117624
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.7947099208831787,
      "learning_rate": 0.00029008832667650977,
      "loss": 2.9625,
      "step": 117625
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.379204511642456,
      "learning_rate": 0.0002900842383484359,
      "loss": 3.2206,
      "step": 117626
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.880507230758667,
      "learning_rate": 0.0002900801500222054,
      "loss": 3.0513,
      "step": 117627
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.0991339683532715,
      "learning_rate": 0.00029007606169781917,
      "loss": 2.8041,
      "step": 117628
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.0933914184570312,
      "learning_rate": 0.00029007197337527804,
      "loss": 3.0249,
      "step": 117629
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.275202512741089,
      "learning_rate": 0.0002900678850545827,
      "loss": 3.1907,
      "step": 117630
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8443284034729004,
      "learning_rate": 0.00029006379673573394,
      "loss": 3.1377,
      "step": 117631
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.0113768577575684,
      "learning_rate": 0.0002900597084187326,
      "loss": 3.0921,
      "step": 117632
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.136448860168457,
      "learning_rate": 0.00029005562010357927,
      "loss": 3.0081,
      "step": 117633
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.2447118759155273,
      "learning_rate": 0.0002900515317902748,
      "loss": 2.9843,
      "step": 117634
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9720274209976196,
      "learning_rate": 0.00029004744347881984,
      "loss": 3.3248,
      "step": 117635
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.485654592514038,
      "learning_rate": 0.00029004335516921536,
      "loss": 2.8766,
      "step": 117636
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.6046035289764404,
      "learning_rate": 0.0002900392668614619,
      "loss": 2.8393,
      "step": 117637
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.581897735595703,
      "learning_rate": 0.0002900351785555604,
      "loss": 2.8894,
      "step": 117638
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.9343037605285645,
      "learning_rate": 0.00029003109025151167,
      "loss": 3.2437,
      "step": 117639
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8604536056518555,
      "learning_rate": 0.0002900270019493162,
      "loss": 2.6883,
      "step": 117640
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1177573204040527,
      "learning_rate": 0.0002900229136489749,
      "loss": 2.8845,
      "step": 117641
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.1068837642669678,
      "learning_rate": 0.00029001882535048854,
      "loss": 3.2076,
      "step": 117642
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.7197837829589844,
      "learning_rate": 0.00029001473705385784,
      "loss": 3.1171,
      "step": 117643
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9386274814605713,
      "learning_rate": 0.0002900106487590836,
      "loss": 2.9716,
      "step": 117644
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9559948444366455,
      "learning_rate": 0.00029000656046616674,
      "loss": 3.232,
      "step": 117645
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.7157206535339355,
      "learning_rate": 0.0002900024721751077,
      "loss": 2.9716,
      "step": 117646
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0341382026672363,
      "learning_rate": 0.0002899983838859074,
      "loss": 3.1012,
      "step": 117647
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9936267137527466,
      "learning_rate": 0.0002899942955985666,
      "loss": 2.7724,
      "step": 117648
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2578608989715576,
      "learning_rate": 0.000289990207313086,
      "loss": 2.8647,
      "step": 117649
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9317643642425537,
      "learning_rate": 0.0002899861190294664,
      "loss": 2.9338,
      "step": 117650
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3131520748138428,
      "learning_rate": 0.00028998203074770873,
      "loss": 2.9371,
      "step": 117651
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8378183841705322,
      "learning_rate": 0.00028997794246781345,
      "loss": 3.0346,
      "step": 117652
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.751213788986206,
      "learning_rate": 0.00028997385418978147,
      "loss": 2.838,
      "step": 117653
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.363007068634033,
      "learning_rate": 0.0002899697659136135,
      "loss": 3.129,
      "step": 117654
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8978803157806396,
      "learning_rate": 0.00028996567763931043,
      "loss": 3.0732,
      "step": 117655
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7748394012451172,
      "learning_rate": 0.00028996158936687283,
      "loss": 3.0476,
      "step": 117656
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.864628553390503,
      "learning_rate": 0.0002899575010963018,
      "loss": 3.0037,
      "step": 117657
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.794583559036255,
      "learning_rate": 0.00028995341282759765,
      "loss": 2.968,
      "step": 117658
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7007876634597778,
      "learning_rate": 0.00028994932456076136,
      "loss": 3.0678,
      "step": 117659
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8971335887908936,
      "learning_rate": 0.0002899452362957937,
      "loss": 2.996,
      "step": 117660
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8199893236160278,
      "learning_rate": 0.0002899411480326954,
      "loss": 2.9664,
      "step": 117661
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9400728940963745,
      "learning_rate": 0.0002899370597714672,
      "loss": 3.102,
      "step": 117662
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.5578747987747192,
      "learning_rate": 0.00028993297151211003,
      "loss": 2.6088,
      "step": 117663
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.07293701171875,
      "learning_rate": 0.00028992888325462445,
      "loss": 2.7209,
      "step": 117664
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6414377689361572,
      "learning_rate": 0.0002899247949990112,
      "loss": 2.9966,
      "step": 117665
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1077239513397217,
      "learning_rate": 0.0002899207067452712,
      "loss": 2.8015,
      "step": 117666
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6538070440292358,
      "learning_rate": 0.00028991661849340507,
      "loss": 3.0372,
      "step": 117667
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7522135972976685,
      "learning_rate": 0.00028991253024341366,
      "loss": 3.1933,
      "step": 117668
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.073850393295288,
      "learning_rate": 0.0002899084419952977,
      "loss": 2.983,
      "step": 117669
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8679696321487427,
      "learning_rate": 0.00028990435374905805,
      "loss": 2.8418,
      "step": 117670
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.5858306884765625,
      "learning_rate": 0.0002899002655046952,
      "loss": 2.8943,
      "step": 117671
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.895255446434021,
      "learning_rate": 0.00028989617726221016,
      "loss": 2.5881,
      "step": 117672
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8646142482757568,
      "learning_rate": 0.0002898920890216036,
      "loss": 2.8728,
      "step": 117673
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.6674249172210693,
      "learning_rate": 0.0002898880007828763,
      "loss": 3.1598,
      "step": 117674
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.169546127319336,
      "learning_rate": 0.00028988391254602903,
      "loss": 2.9919,
      "step": 117675
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.979739189147949,
      "learning_rate": 0.0002898798243110625,
      "loss": 3.0412,
      "step": 117676
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.86100435256958,
      "learning_rate": 0.0002898757360779776,
      "loss": 3.1562,
      "step": 117677
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8466212749481201,
      "learning_rate": 0.00028987164784677485,
      "loss": 3.0143,
      "step": 117678
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9180591106414795,
      "learning_rate": 0.00028986755961745515,
      "loss": 2.7858,
      "step": 117679
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.7827961444854736,
      "learning_rate": 0.0002898634713900193,
      "loss": 2.966,
      "step": 117680
    },
    {
      "epoch": 1.53,
      "grad_norm": 4.211284637451172,
      "learning_rate": 0.00028985938316446815,
      "loss": 3.0823,
      "step": 117681
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.9963231086730957,
      "learning_rate": 0.0002898552949408022,
      "loss": 3.3125,
      "step": 117682
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.824216365814209,
      "learning_rate": 0.0002898512067190223,
      "loss": 3.1194,
      "step": 117683
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.761725425720215,
      "learning_rate": 0.0002898471184991293,
      "loss": 3.3268,
      "step": 117684
    },
    {
      "epoch": 1.53,
      "grad_norm": 6.835761547088623,
      "learning_rate": 0.00028984303028112404,
      "loss": 2.9921,
      "step": 117685
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0811593532562256,
      "learning_rate": 0.000289838942065007,
      "loss": 2.81,
      "step": 117686
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1124746799468994,
      "learning_rate": 0.0002898348538507792,
      "loss": 3.0504,
      "step": 117687
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1685163974761963,
      "learning_rate": 0.0002898307656384412,
      "loss": 3.0645,
      "step": 117688
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.5903226137161255,
      "learning_rate": 0.0002898266774279938,
      "loss": 3.0067,
      "step": 117689
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9437974691390991,
      "learning_rate": 0.0002898225892194379,
      "loss": 2.8999,
      "step": 117690
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8877203464508057,
      "learning_rate": 0.00028981850101277414,
      "loss": 2.9843,
      "step": 117691
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0329408645629883,
      "learning_rate": 0.0002898144128080034,
      "loss": 3.1342,
      "step": 117692
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0588250160217285,
      "learning_rate": 0.00028981032460512635,
      "loss": 2.8216,
      "step": 117693
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6698274612426758,
      "learning_rate": 0.00028980623640414364,
      "loss": 2.7727,
      "step": 117694
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3004846572875977,
      "learning_rate": 0.0002898021482050562,
      "loss": 3.0448,
      "step": 117695
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.4081616401672363,
      "learning_rate": 0.00028979806000786474,
      "loss": 2.9404,
      "step": 117696
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7908719778060913,
      "learning_rate": 0.00028979397181257,
      "loss": 3.1544,
      "step": 117697
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.9558234214782715,
      "learning_rate": 0.0002897898836191727,
      "loss": 2.924,
      "step": 117698
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9038065671920776,
      "learning_rate": 0.00028978579542767385,
      "loss": 2.9971,
      "step": 117699
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9571137428283691,
      "learning_rate": 0.00028978170723807385,
      "loss": 2.8798,
      "step": 117700
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8485983610153198,
      "learning_rate": 0.00028977761905037364,
      "loss": 3.1791,
      "step": 117701
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.106227159500122,
      "learning_rate": 0.00028977353086457393,
      "loss": 2.9169,
      "step": 117702
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.4992077350616455,
      "learning_rate": 0.00028976944268067553,
      "loss": 2.8724,
      "step": 117703
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.9302947521209717,
      "learning_rate": 0.0002897653544986792,
      "loss": 2.8534,
      "step": 117704
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0850648880004883,
      "learning_rate": 0.0002897612663185857,
      "loss": 2.6982,
      "step": 117705
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2331032752990723,
      "learning_rate": 0.00028975717814039585,
      "loss": 2.9992,
      "step": 117706
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.246001958847046,
      "learning_rate": 0.00028975308996411023,
      "loss": 2.8703,
      "step": 117707
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9246054887771606,
      "learning_rate": 0.0002897490017897297,
      "loss": 3.0513,
      "step": 117708
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7444478273391724,
      "learning_rate": 0.0002897449136172551,
      "loss": 3.1354,
      "step": 117709
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.259955406188965,
      "learning_rate": 0.000289740825446687,
      "loss": 3.3073,
      "step": 117710
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7463856935501099,
      "learning_rate": 0.0002897367372780263,
      "loss": 3.0246,
      "step": 117711
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7942513227462769,
      "learning_rate": 0.0002897326491112739,
      "loss": 2.8056,
      "step": 117712
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.787559986114502,
      "learning_rate": 0.00028972856094643026,
      "loss": 2.8432,
      "step": 117713
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1831657886505127,
      "learning_rate": 0.00028972447278349626,
      "loss": 3.1358,
      "step": 117714
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8420140743255615,
      "learning_rate": 0.00028972038462247264,
      "loss": 2.8722,
      "step": 117715
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6582093238830566,
      "learning_rate": 0.00028971629646336024,
      "loss": 2.837,
      "step": 117716
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.387197494506836,
      "learning_rate": 0.0002897122083061598,
      "loss": 2.9385,
      "step": 117717
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7900521755218506,
      "learning_rate": 0.0002897081201508721,
      "loss": 3.2063,
      "step": 117718
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0054550170898438,
      "learning_rate": 0.0002897040319974978,
      "loss": 2.9374,
      "step": 117719
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.213409662246704,
      "learning_rate": 0.0002896999438460377,
      "loss": 2.9897,
      "step": 117720
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.724664568901062,
      "learning_rate": 0.00028969585569649253,
      "loss": 3.1616,
      "step": 117721
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.905674934387207,
      "learning_rate": 0.00028969176754886313,
      "loss": 2.7235,
      "step": 117722
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9714659452438354,
      "learning_rate": 0.00028968767940315023,
      "loss": 3.0003,
      "step": 117723
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0560295581817627,
      "learning_rate": 0.00028968359125935473,
      "loss": 3.1909,
      "step": 117724
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8334088325500488,
      "learning_rate": 0.0002896795031174771,
      "loss": 2.8854,
      "step": 117725
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6327584981918335,
      "learning_rate": 0.0002896754149775182,
      "loss": 2.9227,
      "step": 117726
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6454004049301147,
      "learning_rate": 0.0002896713268394789,
      "loss": 2.9948,
      "step": 117727
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.6496686935424805,
      "learning_rate": 0.0002896672387033599,
      "loss": 2.9383,
      "step": 117728
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.498967409133911,
      "learning_rate": 0.0002896631505691619,
      "loss": 2.9116,
      "step": 117729
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6286762952804565,
      "learning_rate": 0.00028965906243688585,
      "loss": 3.0162,
      "step": 117730
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3632044792175293,
      "learning_rate": 0.0002896549743065323,
      "loss": 3.398,
      "step": 117731
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.138566255569458,
      "learning_rate": 0.00028965088617810206,
      "loss": 2.7264,
      "step": 117732
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.068018674850464,
      "learning_rate": 0.0002896467980515959,
      "loss": 2.8539,
      "step": 117733
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.966892123222351,
      "learning_rate": 0.0002896427099270146,
      "loss": 2.9179,
      "step": 117734
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9986860752105713,
      "learning_rate": 0.0002896386218043589,
      "loss": 2.8153,
      "step": 117735
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8719170093536377,
      "learning_rate": 0.0002896345336836297,
      "loss": 3.1475,
      "step": 117736
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.4011051654815674,
      "learning_rate": 0.0002896304455648276,
      "loss": 2.9895,
      "step": 117737
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.228302478790283,
      "learning_rate": 0.0002896263574479533,
      "loss": 3.153,
      "step": 117738
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.4899373054504395,
      "learning_rate": 0.0002896222693330077,
      "loss": 2.9146,
      "step": 117739
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8899644613265991,
      "learning_rate": 0.0002896181812199915,
      "loss": 2.9567,
      "step": 117740
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.1763477325439453,
      "learning_rate": 0.00028961409310890546,
      "loss": 3.0961,
      "step": 117741
    },
    {
      "epoch": 1.53,
      "grad_norm": 4.580716133117676,
      "learning_rate": 0.0002896100049997505,
      "loss": 3.136,
      "step": 117742
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0439460277557373,
      "learning_rate": 0.0002896059168925271,
      "loss": 3.0449,
      "step": 117743
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.194030284881592,
      "learning_rate": 0.00028960182878723617,
      "loss": 2.805,
      "step": 117744
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.5713958740234375,
      "learning_rate": 0.00028959774068387843,
      "loss": 2.765,
      "step": 117745
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.4780633449554443,
      "learning_rate": 0.0002895936525824547,
      "loss": 3.0552,
      "step": 117746
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2382564544677734,
      "learning_rate": 0.00028958956448296567,
      "loss": 3.07,
      "step": 117747
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.833097219467163,
      "learning_rate": 0.0002895854763854123,
      "loss": 3.1241,
      "step": 117748
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.7397470474243164,
      "learning_rate": 0.00028958138828979503,
      "loss": 2.9413,
      "step": 117749
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0162484645843506,
      "learning_rate": 0.00028957730019611483,
      "loss": 2.9201,
      "step": 117750
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.297499656677246,
      "learning_rate": 0.00028957321210437237,
      "loss": 3.1035,
      "step": 117751
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.218034029006958,
      "learning_rate": 0.0002895691240145684,
      "loss": 3.1907,
      "step": 117752
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9145078659057617,
      "learning_rate": 0.0002895650359267038,
      "loss": 3.0195,
      "step": 117753
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.115689277648926,
      "learning_rate": 0.0002895609478407793,
      "loss": 2.9222,
      "step": 117754
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9794570207595825,
      "learning_rate": 0.0002895568597567956,
      "loss": 2.9703,
      "step": 117755
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.7098662853240967,
      "learning_rate": 0.00028955277167475344,
      "loss": 2.9539,
      "step": 117756
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.863919734954834,
      "learning_rate": 0.0002895486835946536,
      "loss": 2.9225,
      "step": 117757
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.265629529953003,
      "learning_rate": 0.0002895445955164968,
      "loss": 3.0872,
      "step": 117758
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.146512031555176,
      "learning_rate": 0.00028954050744028393,
      "loss": 2.9857,
      "step": 117759
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.5396432876586914,
      "learning_rate": 0.0002895364193660157,
      "loss": 3.1516,
      "step": 117760
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.8092856407165527,
      "learning_rate": 0.00028953233129369284,
      "loss": 3.1961,
      "step": 117761
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.618600606918335,
      "learning_rate": 0.00028952824322331616,
      "loss": 2.7515,
      "step": 117762
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.2529914379119873,
      "learning_rate": 0.0002895241551548863,
      "loss": 2.676,
      "step": 117763
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.866117238998413,
      "learning_rate": 0.00028952006708840415,
      "loss": 3.0409,
      "step": 117764
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6364020109176636,
      "learning_rate": 0.00028951597902387034,
      "loss": 2.9372,
      "step": 117765
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.051098585128784,
      "learning_rate": 0.00028951189096128576,
      "loss": 3.0052,
      "step": 117766
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8229446411132812,
      "learning_rate": 0.0002895078029006511,
      "loss": 3.1078,
      "step": 117767
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.747896909713745,
      "learning_rate": 0.0002895037148419671,
      "loss": 2.9617,
      "step": 117768
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0384469032287598,
      "learning_rate": 0.0002894996267852347,
      "loss": 2.8661,
      "step": 117769
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.23473858833313,
      "learning_rate": 0.0002894955387304544,
      "loss": 3.056,
      "step": 117770
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0988545417785645,
      "learning_rate": 0.00028949145067762706,
      "loss": 3.0577,
      "step": 117771
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9018105268478394,
      "learning_rate": 0.0002894873626267535,
      "loss": 3.0344,
      "step": 117772
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.5641577243804932,
      "learning_rate": 0.0002894832745778345,
      "loss": 3.0208,
      "step": 117773
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1786890029907227,
      "learning_rate": 0.0002894791865308707,
      "loss": 3.0473,
      "step": 117774
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1639513969421387,
      "learning_rate": 0.0002894750984858629,
      "loss": 2.9801,
      "step": 117775
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1373813152313232,
      "learning_rate": 0.0002894710104428119,
      "loss": 3.0645,
      "step": 117776
    },
    {
      "epoch": 1.53,
      "grad_norm": 4.6094536781311035,
      "learning_rate": 0.0002894669224017184,
      "loss": 2.7806,
      "step": 117777
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.059811592102051,
      "learning_rate": 0.0002894628343625832,
      "loss": 3.0661,
      "step": 117778
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9379329681396484,
      "learning_rate": 0.0002894587463254071,
      "loss": 3.0568,
      "step": 117779
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.4985902309417725,
      "learning_rate": 0.00028945465829019083,
      "loss": 2.8202,
      "step": 117780
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.327173948287964,
      "learning_rate": 0.00028945057025693506,
      "loss": 3.1328,
      "step": 117781
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9064853191375732,
      "learning_rate": 0.0002894464822256407,
      "loss": 3.1567,
      "step": 117782
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.569880485534668,
      "learning_rate": 0.0002894423941963084,
      "loss": 3.0606,
      "step": 117783
    },
    {
      "epoch": 1.53,
      "grad_norm": 4.124349117279053,
      "learning_rate": 0.000289438306168939,
      "loss": 2.9364,
      "step": 117784
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.073406457901001,
      "learning_rate": 0.0002894342181435332,
      "loss": 2.9151,
      "step": 117785
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.964210033416748,
      "learning_rate": 0.0002894301301200918,
      "loss": 2.926,
      "step": 117786
    },
    {
      "epoch": 1.53,
      "grad_norm": 4.178759574890137,
      "learning_rate": 0.00028942604209861546,
      "loss": 2.956,
      "step": 117787
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.3326210975646973,
      "learning_rate": 0.00028942195407910505,
      "loss": 3.2473,
      "step": 117788
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.313429832458496,
      "learning_rate": 0.0002894178660615613,
      "loss": 2.983,
      "step": 117789
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.291128396987915,
      "learning_rate": 0.000289413778045985,
      "loss": 3.1125,
      "step": 117790
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3193061351776123,
      "learning_rate": 0.00028940969003237696,
      "loss": 2.9633,
      "step": 117791
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.9837119579315186,
      "learning_rate": 0.00028940560202073774,
      "loss": 2.9579,
      "step": 117792
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.150599479675293,
      "learning_rate": 0.00028940151401106825,
      "loss": 2.8119,
      "step": 117793
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.637738585472107,
      "learning_rate": 0.0002893974260033692,
      "loss": 2.9971,
      "step": 117794
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.22410249710083,
      "learning_rate": 0.0002893933379976413,
      "loss": 2.7809,
      "step": 117795
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.548614263534546,
      "learning_rate": 0.0002893892499938855,
      "loss": 3.1289,
      "step": 117796
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8285222053527832,
      "learning_rate": 0.0002893851619921025,
      "loss": 3.1374,
      "step": 117797
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.026545286178589,
      "learning_rate": 0.00028938107399229283,
      "loss": 3.1781,
      "step": 117798
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6833090782165527,
      "learning_rate": 0.00028937698599445746,
      "loss": 2.9429,
      "step": 117799
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9215270280838013,
      "learning_rate": 0.0002893728979985971,
      "loss": 3.1153,
      "step": 117800
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.140894889831543,
      "learning_rate": 0.0002893688100047126,
      "loss": 3.1425,
      "step": 117801
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.676072120666504,
      "learning_rate": 0.00028936472201280454,
      "loss": 3.1529,
      "step": 117802
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8925944566726685,
      "learning_rate": 0.0002893606340228739,
      "loss": 3.0543,
      "step": 117803
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.8280370235443115,
      "learning_rate": 0.00028935654603492127,
      "loss": 2.8381,
      "step": 117804
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0655887126922607,
      "learning_rate": 0.0002893524580489474,
      "loss": 2.9712,
      "step": 117805
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.779476284980774,
      "learning_rate": 0.0002893483700649531,
      "loss": 2.9086,
      "step": 117806
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.274894952774048,
      "learning_rate": 0.0002893442820829392,
      "loss": 3.088,
      "step": 117807
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8258318901062012,
      "learning_rate": 0.00028934019410290635,
      "loss": 3.021,
      "step": 117808
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.868139386177063,
      "learning_rate": 0.0002893361061248555,
      "loss": 2.9384,
      "step": 117809
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.914601445198059,
      "learning_rate": 0.00028933201814878716,
      "loss": 3.0976,
      "step": 117810
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.0027542114257812,
      "learning_rate": 0.00028932793017470216,
      "loss": 2.7947,
      "step": 117811
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7956446409225464,
      "learning_rate": 0.00028932384220260135,
      "loss": 3.0572,
      "step": 117812
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.6587581634521484,
      "learning_rate": 0.0002893197542324854,
      "loss": 2.9675,
      "step": 117813
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1889452934265137,
      "learning_rate": 0.0002893156662643551,
      "loss": 3.0014,
      "step": 117814
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9094009399414062,
      "learning_rate": 0.0002893115782982114,
      "loss": 3.4857,
      "step": 117815
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.8284311294555664,
      "learning_rate": 0.0002893074903340547,
      "loss": 2.6813,
      "step": 117816
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9320714473724365,
      "learning_rate": 0.00028930340237188595,
      "loss": 3.0336,
      "step": 117817
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8119884729385376,
      "learning_rate": 0.0002892993144117059,
      "loss": 2.8517,
      "step": 117818
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3847408294677734,
      "learning_rate": 0.00028929522645351535,
      "loss": 2.9058,
      "step": 117819
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9555193185806274,
      "learning_rate": 0.00028929113849731497,
      "loss": 3.0706,
      "step": 117820
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.654209852218628,
      "learning_rate": 0.0002892870505431057,
      "loss": 2.9998,
      "step": 117821
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.693306803703308,
      "learning_rate": 0.00028928296259088805,
      "loss": 2.8574,
      "step": 117822
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.058880090713501,
      "learning_rate": 0.0002892788746406629,
      "loss": 3.1518,
      "step": 117823
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.811456561088562,
      "learning_rate": 0.00028927478669243104,
      "loss": 3.1878,
      "step": 117824
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6712408065795898,
      "learning_rate": 0.0002892706987461932,
      "loss": 2.8362,
      "step": 117825
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.947268009185791,
      "learning_rate": 0.0002892666108019501,
      "loss": 3.2078,
      "step": 117826
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.5461370944976807,
      "learning_rate": 0.0002892625228597027,
      "loss": 2.9439,
      "step": 117827
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.336735486984253,
      "learning_rate": 0.00028925843491945145,
      "loss": 2.806,
      "step": 117828
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.142082452774048,
      "learning_rate": 0.0002892543469811972,
      "loss": 3.245,
      "step": 117829
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.094925880432129,
      "learning_rate": 0.00028925025904494084,
      "loss": 2.937,
      "step": 117830
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.746302843093872,
      "learning_rate": 0.0002892461711106831,
      "loss": 2.7889,
      "step": 117831
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7988433837890625,
      "learning_rate": 0.00028924208317842464,
      "loss": 2.8776,
      "step": 117832
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.512608289718628,
      "learning_rate": 0.0002892379952481664,
      "loss": 2.9307,
      "step": 117833
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.184235095977783,
      "learning_rate": 0.00028923390731990894,
      "loss": 2.9841,
      "step": 117834
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.831831932067871,
      "learning_rate": 0.00028922981939365303,
      "loss": 2.686,
      "step": 117835
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9106751680374146,
      "learning_rate": 0.00028922573146939953,
      "loss": 2.8485,
      "step": 117836
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.4416587352752686,
      "learning_rate": 0.0002892216435471492,
      "loss": 2.9535,
      "step": 117837
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6304199695587158,
      "learning_rate": 0.00028921755562690276,
      "loss": 3.1929,
      "step": 117838
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.7978132963180542,
      "learning_rate": 0.000289213467708661,
      "loss": 3.1543,
      "step": 117839
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.890616536140442,
      "learning_rate": 0.00028920937979242474,
      "loss": 2.8465,
      "step": 117840
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3301751613616943,
      "learning_rate": 0.0002892052918781945,
      "loss": 2.8974,
      "step": 117841
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.778108835220337,
      "learning_rate": 0.0002892012039659713,
      "loss": 3.0794,
      "step": 117842
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8482229709625244,
      "learning_rate": 0.00028919711605575576,
      "loss": 3.1631,
      "step": 117843
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.5603375434875488,
      "learning_rate": 0.00028919302814754867,
      "loss": 2.9751,
      "step": 117844
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.069282293319702,
      "learning_rate": 0.00028918894024135077,
      "loss": 2.8627,
      "step": 117845
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6900755167007446,
      "learning_rate": 0.000289184852337163,
      "loss": 3.0495,
      "step": 117846
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0804920196533203,
      "learning_rate": 0.0002891807644349859,
      "loss": 3.0481,
      "step": 117847
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.386749267578125,
      "learning_rate": 0.00028917667653482023,
      "loss": 3.0819,
      "step": 117848
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8772435188293457,
      "learning_rate": 0.00028917258863666684,
      "loss": 3.1288,
      "step": 117849
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.6760302782058716,
      "learning_rate": 0.0002891685007405265,
      "loss": 2.9835,
      "step": 117850
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9840096235275269,
      "learning_rate": 0.0002891644128463999,
      "loss": 2.8826,
      "step": 117851
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.19785475730896,
      "learning_rate": 0.00028916032495428793,
      "loss": 3.1022,
      "step": 117852
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.321735382080078,
      "learning_rate": 0.00028915623706419113,
      "loss": 3.0826,
      "step": 117853
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.816480278968811,
      "learning_rate": 0.0002891521491761105,
      "loss": 2.909,
      "step": 117854
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0577433109283447,
      "learning_rate": 0.00028914806129004664,
      "loss": 2.8861,
      "step": 117855
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3428566455841064,
      "learning_rate": 0.00028914397340600034,
      "loss": 2.9098,
      "step": 117856
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.1203324794769287,
      "learning_rate": 0.0002891398855239724,
      "loss": 2.709,
      "step": 117857
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.07395601272583,
      "learning_rate": 0.00028913579764396367,
      "loss": 2.8391,
      "step": 117858
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.075721502304077,
      "learning_rate": 0.00028913170976597465,
      "loss": 2.9553,
      "step": 117859
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.5707504749298096,
      "learning_rate": 0.00028912762189000626,
      "loss": 2.8178,
      "step": 117860
    },
    {
      "epoch": 1.53,
      "grad_norm": 3.7660229206085205,
      "learning_rate": 0.00028912353401605936,
      "loss": 2.9206,
      "step": 117861
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8236817121505737,
      "learning_rate": 0.00028911944614413456,
      "loss": 2.9952,
      "step": 117862
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2630157470703125,
      "learning_rate": 0.0002891153582742326,
      "loss": 3.1167,
      "step": 117863
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.077528715133667,
      "learning_rate": 0.00028911127040635436,
      "loss": 2.8389,
      "step": 117864
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.0080060958862305,
      "learning_rate": 0.0002891071825405005,
      "loss": 3.147,
      "step": 117865
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.672107219696045,
      "learning_rate": 0.0002891030946766718,
      "loss": 2.8489,
      "step": 117866
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8199199438095093,
      "learning_rate": 0.00028909900681486905,
      "loss": 3.2018,
      "step": 117867
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8850924968719482,
      "learning_rate": 0.0002890949189550931,
      "loss": 2.95,
      "step": 117868
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8738176822662354,
      "learning_rate": 0.00028909083109734447,
      "loss": 3.0039,
      "step": 117869
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.738585352897644,
      "learning_rate": 0.0002890867432416242,
      "loss": 3.0549,
      "step": 117870
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1257681846618652,
      "learning_rate": 0.0002890826553879328,
      "loss": 3.1102,
      "step": 117871
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8561248779296875,
      "learning_rate": 0.0002890785675362712,
      "loss": 2.7486,
      "step": 117872
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.7071313858032227,
      "learning_rate": 0.00028907447968664,
      "loss": 3.0608,
      "step": 117873
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9326194524765015,
      "learning_rate": 0.00028907039183904016,
      "loss": 2.7991,
      "step": 117874
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8152832984924316,
      "learning_rate": 0.00028906630399347225,
      "loss": 3.0259,
      "step": 117875
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8963242769241333,
      "learning_rate": 0.00028906221614993734,
      "loss": 3.0862,
      "step": 117876
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.3375935554504395,
      "learning_rate": 0.0002890581283084358,
      "loss": 3.0355,
      "step": 117877
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.116143226623535,
      "learning_rate": 0.0002890540404689686,
      "loss": 3.2178,
      "step": 117878
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.7820796966552734,
      "learning_rate": 0.0002890499526315364,
      "loss": 2.8756,
      "step": 117879
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8529269695281982,
      "learning_rate": 0.00028904586479614006,
      "loss": 2.8742,
      "step": 117880
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.8262678384780884,
      "learning_rate": 0.00028904177696278027,
      "loss": 3.1472,
      "step": 117881
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.2728283405303955,
      "learning_rate": 0.00028903768913145797,
      "loss": 2.9308,
      "step": 117882
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.895554780960083,
      "learning_rate": 0.00028903360130217364,
      "loss": 3.1228,
      "step": 117883
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.056936264038086,
      "learning_rate": 0.00028902951347492816,
      "loss": 3.1587,
      "step": 117884
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.5630483627319336,
      "learning_rate": 0.0002890254256497223,
      "loss": 3.0757,
      "step": 117885
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.1119749546051025,
      "learning_rate": 0.0002890213378265568,
      "loss": 3.0246,
      "step": 117886
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.6484134197235107,
      "learning_rate": 0.0002890172500054325,
      "loss": 2.9248,
      "step": 117887
    },
    {
      "epoch": 1.53,
      "grad_norm": 2.7611234188079834,
      "learning_rate": 0.0002890131621863502,
      "loss": 3.1145,
      "step": 117888
    },
    {
      "epoch": 1.53,
      "grad_norm": 1.9464037418365479,
      "learning_rate": 0.00028900907436931036,
      "loss": 3.1723,
      "step": 117889
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.960023283958435,
      "learning_rate": 0.00028900498655431403,
      "loss": 3.148,
      "step": 117890
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9316326379776,
      "learning_rate": 0.0002890008987413619,
      "loss": 3.042,
      "step": 117891
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9478302001953125,
      "learning_rate": 0.00028899681093045465,
      "loss": 2.8351,
      "step": 117892
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.710136651992798,
      "learning_rate": 0.0002889927231215931,
      "loss": 2.9841,
      "step": 117893
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.531325578689575,
      "learning_rate": 0.00028898863531477817,
      "loss": 3.0275,
      "step": 117894
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.7596004009246826,
      "learning_rate": 0.0002889845475100103,
      "loss": 2.9545,
      "step": 117895
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.785007119178772,
      "learning_rate": 0.00028898045970729045,
      "loss": 3.0209,
      "step": 117896
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.872514486312866,
      "learning_rate": 0.0002889763719066193,
      "loss": 2.9948,
      "step": 117897
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.608721137046814,
      "learning_rate": 0.0002889722841079977,
      "loss": 3.0559,
      "step": 117898
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.904367685317993,
      "learning_rate": 0.00028896819631142633,
      "loss": 2.9593,
      "step": 117899
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7805358171463013,
      "learning_rate": 0.00028896410851690613,
      "loss": 3.0067,
      "step": 117900
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6736360788345337,
      "learning_rate": 0.00028896002072443757,
      "loss": 3.0564,
      "step": 117901
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.448922872543335,
      "learning_rate": 0.0002889559329340215,
      "loss": 2.9226,
      "step": 117902
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3043456077575684,
      "learning_rate": 0.00028895184514565877,
      "loss": 2.9112,
      "step": 117903
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0653882026672363,
      "learning_rate": 0.00028894775735935016,
      "loss": 3.0014,
      "step": 117904
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7100915908813477,
      "learning_rate": 0.00028894366957509627,
      "loss": 2.905,
      "step": 117905
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0260486602783203,
      "learning_rate": 0.00028893958179289803,
      "loss": 3.0323,
      "step": 117906
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.918475866317749,
      "learning_rate": 0.00028893549401275625,
      "loss": 3.1784,
      "step": 117907
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.7332143783569336,
      "learning_rate": 0.0002889314062346714,
      "loss": 3.1517,
      "step": 117908
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0522119998931885,
      "learning_rate": 0.0002889273184586444,
      "loss": 3.0553,
      "step": 117909
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.08709716796875,
      "learning_rate": 0.0002889232306846761,
      "loss": 2.8739,
      "step": 117910
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.45636248588562,
      "learning_rate": 0.0002889191429127671,
      "loss": 2.8794,
      "step": 117911
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.794373869895935,
      "learning_rate": 0.00028891505514291833,
      "loss": 3.0664,
      "step": 117912
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8270998001098633,
      "learning_rate": 0.00028891096737513055,
      "loss": 2.9164,
      "step": 117913
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0276401042938232,
      "learning_rate": 0.00028890687960940426,
      "loss": 2.763,
      "step": 117914
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7463135719299316,
      "learning_rate": 0.0002889027918457404,
      "loss": 2.9354,
      "step": 117915
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.864626169204712,
      "learning_rate": 0.0002888987040841398,
      "loss": 3.1821,
      "step": 117916
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8352231979370117,
      "learning_rate": 0.00028889461632460305,
      "loss": 2.9195,
      "step": 117917
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.830119013786316,
      "learning_rate": 0.000288890528567131,
      "loss": 2.9779,
      "step": 117918
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4040873050689697,
      "learning_rate": 0.0002888864408117246,
      "loss": 3.0344,
      "step": 117919
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6791305541992188,
      "learning_rate": 0.00028888235305838425,
      "loss": 3.1688,
      "step": 117920
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4679183959960938,
      "learning_rate": 0.0002888782653071109,
      "loss": 2.9808,
      "step": 117921
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9231953620910645,
      "learning_rate": 0.0002888741775579053,
      "loss": 2.984,
      "step": 117922
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.949430465698242,
      "learning_rate": 0.0002888700898107682,
      "loss": 3.071,
      "step": 117923
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0028469562530518,
      "learning_rate": 0.0002888660020657003,
      "loss": 3.2788,
      "step": 117924
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7104575634002686,
      "learning_rate": 0.00028886191432270263,
      "loss": 2.9635,
      "step": 117925
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0164167881011963,
      "learning_rate": 0.0002888578265817756,
      "loss": 2.9551,
      "step": 117926
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.833767294883728,
      "learning_rate": 0.00028885373884292005,
      "loss": 3.031,
      "step": 117927
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.12943172454834,
      "learning_rate": 0.0002888496511061368,
      "loss": 2.9777,
      "step": 117928
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.179018497467041,
      "learning_rate": 0.0002888455633714267,
      "loss": 2.949,
      "step": 117929
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6789000034332275,
      "learning_rate": 0.00028884147563879037,
      "loss": 2.8018,
      "step": 117930
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.021467924118042,
      "learning_rate": 0.0002888373879082287,
      "loss": 2.824,
      "step": 117931
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1495423316955566,
      "learning_rate": 0.00028883330017974234,
      "loss": 3.0144,
      "step": 117932
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.966845154762268,
      "learning_rate": 0.000288829212453332,
      "loss": 3.2733,
      "step": 117933
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.61203670501709,
      "learning_rate": 0.00028882512472899855,
      "loss": 3.0079,
      "step": 117934
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8617371320724487,
      "learning_rate": 0.0002888210370067427,
      "loss": 2.8443,
      "step": 117935
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7326432466506958,
      "learning_rate": 0.00028881694928656527,
      "loss": 3.0502,
      "step": 117936
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4324183464050293,
      "learning_rate": 0.000288812861568467,
      "loss": 2.8911,
      "step": 117937
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3019323348999023,
      "learning_rate": 0.00028880877385244864,
      "loss": 3.0265,
      "step": 117938
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.5442147254943848,
      "learning_rate": 0.00028880468613851095,
      "loss": 2.8707,
      "step": 117939
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0305933952331543,
      "learning_rate": 0.0002888005984266546,
      "loss": 3.0038,
      "step": 117940
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.17962384223938,
      "learning_rate": 0.0002887965107168804,
      "loss": 3.0099,
      "step": 117941
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9578220844268799,
      "learning_rate": 0.0002887924230091892,
      "loss": 2.6397,
      "step": 117942
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8980066776275635,
      "learning_rate": 0.0002887883353035818,
      "loss": 3.2164,
      "step": 117943
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.6466803550720215,
      "learning_rate": 0.0002887842476000588,
      "loss": 3.001,
      "step": 117944
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0220179557800293,
      "learning_rate": 0.0002887801598986209,
      "loss": 2.9503,
      "step": 117945
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.308432102203369,
      "learning_rate": 0.0002887760721992691,
      "loss": 2.9825,
      "step": 117946
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8510229587554932,
      "learning_rate": 0.000288771984502004,
      "loss": 3.0211,
      "step": 117947
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.874428391456604,
      "learning_rate": 0.0002887678968068264,
      "loss": 3.1207,
      "step": 117948
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.250440835952759,
      "learning_rate": 0.0002887638091137371,
      "loss": 3.0121,
      "step": 117949
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3314144611358643,
      "learning_rate": 0.0002887597214227368,
      "loss": 3.0899,
      "step": 117950
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0085854530334473,
      "learning_rate": 0.00028875563373382624,
      "loss": 3.0181,
      "step": 117951
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.226304769515991,
      "learning_rate": 0.00028875154604700624,
      "loss": 3.0669,
      "step": 117952
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6769992113113403,
      "learning_rate": 0.0002887474583622776,
      "loss": 3.0067,
      "step": 117953
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3798818588256836,
      "learning_rate": 0.00028874337067964095,
      "loss": 3.0637,
      "step": 117954
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1669633388519287,
      "learning_rate": 0.0002887392829990972,
      "loss": 2.9905,
      "step": 117955
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8207831382751465,
      "learning_rate": 0.0002887351953206469,
      "loss": 2.8846,
      "step": 117956
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.087294340133667,
      "learning_rate": 0.000288731107644291,
      "loss": 3.0025,
      "step": 117957
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.237809658050537,
      "learning_rate": 0.0002887270199700302,
      "loss": 3.1541,
      "step": 117958
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7337244749069214,
      "learning_rate": 0.00028872293229786526,
      "loss": 2.7656,
      "step": 117959
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6227976083755493,
      "learning_rate": 0.000288718844627797,
      "loss": 3.1317,
      "step": 117960
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2192928791046143,
      "learning_rate": 0.0002887147569598261,
      "loss": 2.771,
      "step": 117961
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7840855121612549,
      "learning_rate": 0.0002887106692939533,
      "loss": 3.1834,
      "step": 117962
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9602257013320923,
      "learning_rate": 0.00028870658163017936,
      "loss": 2.9219,
      "step": 117963
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7505754232406616,
      "learning_rate": 0.00028870249396850515,
      "loss": 3.0246,
      "step": 117964
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2453489303588867,
      "learning_rate": 0.00028869840630893126,
      "loss": 2.9078,
      "step": 117965
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0923218727111816,
      "learning_rate": 0.0002886943186514586,
      "loss": 3.0994,
      "step": 117966
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.143749713897705,
      "learning_rate": 0.00028869023099608806,
      "loss": 3.0613,
      "step": 117967
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.842479944229126,
      "learning_rate": 0.00028868614334282006,
      "loss": 2.8302,
      "step": 117968
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.01884388923645,
      "learning_rate": 0.0002886820556916555,
      "loss": 2.7664,
      "step": 117969
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7091971635818481,
      "learning_rate": 0.00028867796804259513,
      "loss": 3.2502,
      "step": 117970
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.015289783477783,
      "learning_rate": 0.0002886738803956398,
      "loss": 3.0868,
      "step": 117971
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8468596935272217,
      "learning_rate": 0.00028866979275079017,
      "loss": 2.9486,
      "step": 117972
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8019206523895264,
      "learning_rate": 0.00028866570510804705,
      "loss": 2.9551,
      "step": 117973
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3469908237457275,
      "learning_rate": 0.00028866161746741135,
      "loss": 2.9849,
      "step": 117974
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4638619422912598,
      "learning_rate": 0.0002886575298288835,
      "loss": 2.9103,
      "step": 117975
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.58451771736145,
      "learning_rate": 0.00028865344219246444,
      "loss": 2.8895,
      "step": 117976
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8754150867462158,
      "learning_rate": 0.0002886493545581549,
      "loss": 3.1367,
      "step": 117977
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.915542483329773,
      "learning_rate": 0.0002886452669259557,
      "loss": 3.0334,
      "step": 117978
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9935357570648193,
      "learning_rate": 0.0002886411792958675,
      "loss": 3.0196,
      "step": 117979
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6489545106887817,
      "learning_rate": 0.00028863709166789133,
      "loss": 3.0079,
      "step": 117980
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9508674144744873,
      "learning_rate": 0.00028863300404202756,
      "loss": 2.8882,
      "step": 117981
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9097844362258911,
      "learning_rate": 0.0002886289164182771,
      "loss": 2.9719,
      "step": 117982
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.699340581893921,
      "learning_rate": 0.0002886248287966408,
      "loss": 3.3048,
      "step": 117983
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4289004802703857,
      "learning_rate": 0.00028862074117711936,
      "loss": 2.9866,
      "step": 117984
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.509293794631958,
      "learning_rate": 0.00028861665355971346,
      "loss": 2.9344,
      "step": 117985
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9461029767990112,
      "learning_rate": 0.0002886125659444242,
      "loss": 3.0384,
      "step": 117986
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.949479103088379,
      "learning_rate": 0.0002886084783312518,
      "loss": 2.9714,
      "step": 117987
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.065521240234375,
      "learning_rate": 0.0002886043907201974,
      "loss": 3.1235,
      "step": 117988
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.9951260089874268,
      "learning_rate": 0.00028860030311126164,
      "loss": 3.0186,
      "step": 117989
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.431574583053589,
      "learning_rate": 0.0002885962155044453,
      "loss": 2.8093,
      "step": 117990
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.89751136302948,
      "learning_rate": 0.0002885921278997491,
      "loss": 3.0475,
      "step": 117991
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.5499138832092285,
      "learning_rate": 0.000288588040297174,
      "loss": 2.8578,
      "step": 117992
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9542503356933594,
      "learning_rate": 0.0002885839526967205,
      "loss": 3.0729,
      "step": 117993
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.176422357559204,
      "learning_rate": 0.00028857986509838937,
      "loss": 2.7124,
      "step": 117994
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9244023561477661,
      "learning_rate": 0.0002885757775021815,
      "loss": 3.0665,
      "step": 117995
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.322720527648926,
      "learning_rate": 0.00028857168990809766,
      "loss": 3.0333,
      "step": 117996
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0045933723449707,
      "learning_rate": 0.0002885676023161385,
      "loss": 2.9489,
      "step": 117997
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1776604652404785,
      "learning_rate": 0.000288563514726305,
      "loss": 2.9336,
      "step": 117998
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.286435842514038,
      "learning_rate": 0.00028855942713859763,
      "loss": 3.0922,
      "step": 117999
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8874746561050415,
      "learning_rate": 0.00028855533955301723,
      "loss": 2.9484,
      "step": 118000
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.315535545349121,
      "learning_rate": 0.00028855125196956463,
      "loss": 2.8682,
      "step": 118001
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1667916774749756,
      "learning_rate": 0.0002885471643882406,
      "loss": 2.9248,
      "step": 118002
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.230292320251465,
      "learning_rate": 0.0002885430768090458,
      "loss": 2.736,
      "step": 118003
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8971582651138306,
      "learning_rate": 0.0002885389892319813,
      "loss": 2.8292,
      "step": 118004
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.957535743713379,
      "learning_rate": 0.00028853490165704743,
      "loss": 2.9765,
      "step": 118005
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.321377754211426,
      "learning_rate": 0.0002885308140842451,
      "loss": 2.8661,
      "step": 118006
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1525721549987793,
      "learning_rate": 0.00028852672651357515,
      "loss": 3.271,
      "step": 118007
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6941883563995361,
      "learning_rate": 0.0002885226389450383,
      "loss": 3.1386,
      "step": 118008
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.079655408859253,
      "learning_rate": 0.0002885185513786353,
      "loss": 2.9803,
      "step": 118009
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7667847871780396,
      "learning_rate": 0.00028851446381436704,
      "loss": 2.9997,
      "step": 118010
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8949050903320312,
      "learning_rate": 0.00028851037625223403,
      "loss": 3.1284,
      "step": 118011
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.011906147003174,
      "learning_rate": 0.0002885062886922372,
      "loss": 2.7929,
      "step": 118012
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7724802494049072,
      "learning_rate": 0.0002885022011343772,
      "loss": 2.9586,
      "step": 118013
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.617423415184021,
      "learning_rate": 0.00028849811357865493,
      "loss": 3.0104,
      "step": 118014
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0352275371551514,
      "learning_rate": 0.000288494026025071,
      "loss": 3.0862,
      "step": 118015
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.027045965194702,
      "learning_rate": 0.0002884899384736264,
      "loss": 2.8542,
      "step": 118016
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.455286741256714,
      "learning_rate": 0.0002884858509243217,
      "loss": 2.8297,
      "step": 118017
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.505178928375244,
      "learning_rate": 0.0002884817633771576,
      "loss": 2.8983,
      "step": 118018
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9864881038665771,
      "learning_rate": 0.0002884776758321349,
      "loss": 2.985,
      "step": 118019
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9478812217712402,
      "learning_rate": 0.0002884735882892545,
      "loss": 3.0847,
      "step": 118020
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1295862197875977,
      "learning_rate": 0.0002884695007485171,
      "loss": 3.0493,
      "step": 118021
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8739920854568481,
      "learning_rate": 0.0002884654132099235,
      "loss": 3.177,
      "step": 118022
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.3651387691497803,
      "learning_rate": 0.00028846132567347436,
      "loss": 3.1474,
      "step": 118023
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6888381242752075,
      "learning_rate": 0.00028845723813917043,
      "loss": 3.0724,
      "step": 118024
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2385826110839844,
      "learning_rate": 0.0002884531506070125,
      "loss": 2.9793,
      "step": 118025
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0820369720458984,
      "learning_rate": 0.0002884490630770014,
      "loss": 3.2429,
      "step": 118026
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2017998695373535,
      "learning_rate": 0.00028844497554913773,
      "loss": 3.0767,
      "step": 118027
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.7362303733825684,
      "learning_rate": 0.00028844088802342253,
      "loss": 2.9245,
      "step": 118028
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.1820929050445557,
      "learning_rate": 0.0002884368004998563,
      "loss": 2.7865,
      "step": 118029
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9127352237701416,
      "learning_rate": 0.0002884327129784399,
      "loss": 3.0671,
      "step": 118030
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0539422035217285,
      "learning_rate": 0.000288428625459174,
      "loss": 2.8617,
      "step": 118031
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9685536623001099,
      "learning_rate": 0.0002884245379420595,
      "loss": 3.0796,
      "step": 118032
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0828497409820557,
      "learning_rate": 0.0002884204504270971,
      "loss": 3.063,
      "step": 118033
    },
    {
      "epoch": 1.54,
      "grad_norm": 4.046534538269043,
      "learning_rate": 0.0002884163629142876,
      "loss": 2.8369,
      "step": 118034
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.161757230758667,
      "learning_rate": 0.00028841227540363164,
      "loss": 3.1566,
      "step": 118035
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.159958600997925,
      "learning_rate": 0.00028840818789513005,
      "loss": 2.9331,
      "step": 118036
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.118739366531372,
      "learning_rate": 0.00028840410038878363,
      "loss": 3.1225,
      "step": 118037
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7045962810516357,
      "learning_rate": 0.0002884000128845932,
      "loss": 2.9101,
      "step": 118038
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0841481685638428,
      "learning_rate": 0.00028839592538255926,
      "loss": 3.0529,
      "step": 118039
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.919930100440979,
      "learning_rate": 0.0002883918378826828,
      "loss": 2.9215,
      "step": 118040
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7242408990859985,
      "learning_rate": 0.0002883877503849646,
      "loss": 2.8306,
      "step": 118041
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.058882236480713,
      "learning_rate": 0.0002883836628894052,
      "loss": 2.9899,
      "step": 118042
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1343138217926025,
      "learning_rate": 0.0002883795753960056,
      "loss": 2.9095,
      "step": 118043
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8924049139022827,
      "learning_rate": 0.0002883754879047664,
      "loss": 3.0346,
      "step": 118044
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.102602958679199,
      "learning_rate": 0.00028837140041568845,
      "loss": 2.869,
      "step": 118045
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1500556468963623,
      "learning_rate": 0.00028836731292877245,
      "loss": 2.8528,
      "step": 118046
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.52577805519104,
      "learning_rate": 0.0002883632254440193,
      "loss": 2.7699,
      "step": 118047
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.808182716369629,
      "learning_rate": 0.00028835913796142947,
      "loss": 3.1673,
      "step": 118048
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8134745359420776,
      "learning_rate": 0.000288355050481004,
      "loss": 2.9253,
      "step": 118049
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.240813732147217,
      "learning_rate": 0.00028835096300274345,
      "loss": 2.7415,
      "step": 118050
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8320274353027344,
      "learning_rate": 0.0002883468755266488,
      "loss": 2.6244,
      "step": 118051
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9430407285690308,
      "learning_rate": 0.0002883427880527206,
      "loss": 3.1065,
      "step": 118052
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6583362817764282,
      "learning_rate": 0.00028833870058095974,
      "loss": 3.083,
      "step": 118053
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2494938373565674,
      "learning_rate": 0.0002883346131113669,
      "loss": 2.9303,
      "step": 118054
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.897836685180664,
      "learning_rate": 0.0002883305256439429,
      "loss": 2.847,
      "step": 118055
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9627765417099,
      "learning_rate": 0.0002883264381786885,
      "loss": 3.1181,
      "step": 118056
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.822479248046875,
      "learning_rate": 0.0002883223507156043,
      "loss": 2.893,
      "step": 118057
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9288482666015625,
      "learning_rate": 0.00028831826325469135,
      "loss": 3.0004,
      "step": 118058
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.156278371810913,
      "learning_rate": 0.0002883141757959503,
      "loss": 2.9983,
      "step": 118059
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.975404739379883,
      "learning_rate": 0.0002883100883393817,
      "loss": 3.0984,
      "step": 118060
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0665762424468994,
      "learning_rate": 0.0002883060008849866,
      "loss": 3.0483,
      "step": 118061
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.07879638671875,
      "learning_rate": 0.00028830191343276555,
      "loss": 3.0436,
      "step": 118062
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.335514545440674,
      "learning_rate": 0.0002882978259827194,
      "loss": 2.9415,
      "step": 118063
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.822747230529785,
      "learning_rate": 0.0002882937385348489,
      "loss": 2.9795,
      "step": 118064
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3755061626434326,
      "learning_rate": 0.000288289651089155,
      "loss": 2.9969,
      "step": 118065
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7687166929244995,
      "learning_rate": 0.0002882855636456381,
      "loss": 3.0439,
      "step": 118066
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7792019844055176,
      "learning_rate": 0.0002882814762042991,
      "loss": 3.0792,
      "step": 118067
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.01286244392395,
      "learning_rate": 0.0002882773887651388,
      "loss": 3.1518,
      "step": 118068
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7245848178863525,
      "learning_rate": 0.00028827330132815803,
      "loss": 3.0609,
      "step": 118069
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.216111421585083,
      "learning_rate": 0.0002882692138933574,
      "loss": 2.5147,
      "step": 118070
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9027212858200073,
      "learning_rate": 0.00028826512646073794,
      "loss": 3.0166,
      "step": 118071
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.127892255783081,
      "learning_rate": 0.0002882610390303001,
      "loss": 2.8406,
      "step": 118072
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.8654730319976807,
      "learning_rate": 0.0002882569516020447,
      "loss": 3.0472,
      "step": 118073
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8681739568710327,
      "learning_rate": 0.00028825286417597255,
      "loss": 3.1798,
      "step": 118074
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0944125652313232,
      "learning_rate": 0.0002882487767520844,
      "loss": 3.2282,
      "step": 118075
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8419313430786133,
      "learning_rate": 0.000288244689330381,
      "loss": 3.2204,
      "step": 118076
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9252415895462036,
      "learning_rate": 0.0002882406019108634,
      "loss": 2.7603,
      "step": 118077
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.597740411758423,
      "learning_rate": 0.0002882365144935318,
      "loss": 2.6963,
      "step": 118078
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.030120849609375,
      "learning_rate": 0.0002882324270783874,
      "loss": 3.0881,
      "step": 118079
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1211655139923096,
      "learning_rate": 0.00028822833966543073,
      "loss": 2.9682,
      "step": 118080
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9070104360580444,
      "learning_rate": 0.00028822425225466264,
      "loss": 2.7226,
      "step": 118081
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.0128302574157715,
      "learning_rate": 0.0002882201648460839,
      "loss": 3.1591,
      "step": 118082
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.2363808155059814,
      "learning_rate": 0.0002882160774396954,
      "loss": 2.872,
      "step": 118083
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.7668824195861816,
      "learning_rate": 0.00028821199003549756,
      "loss": 2.8653,
      "step": 118084
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.6557886600494385,
      "learning_rate": 0.00028820790263349135,
      "loss": 3.0016,
      "step": 118085
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9027502536773682,
      "learning_rate": 0.00028820381523367755,
      "loss": 3.0638,
      "step": 118086
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4519405364990234,
      "learning_rate": 0.0002881997278360569,
      "loss": 3.0762,
      "step": 118087
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8045756816864014,
      "learning_rate": 0.0002881956404406301,
      "loss": 2.9529,
      "step": 118088
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9451889991760254,
      "learning_rate": 0.0002881915530473981,
      "loss": 3.2687,
      "step": 118089
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3844544887542725,
      "learning_rate": 0.00028818746565636137,
      "loss": 3.0339,
      "step": 118090
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9851266145706177,
      "learning_rate": 0.0002881833782675208,
      "loss": 2.8575,
      "step": 118091
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8211618661880493,
      "learning_rate": 0.00028817929088087717,
      "loss": 2.9545,
      "step": 118092
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9713027477264404,
      "learning_rate": 0.0002881752034964313,
      "loss": 3.0891,
      "step": 118093
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.5342743396759033,
      "learning_rate": 0.0002881711161141838,
      "loss": 3.0541,
      "step": 118094
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.7282721996307373,
      "learning_rate": 0.0002881670287341357,
      "loss": 2.8712,
      "step": 118095
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.434744358062744,
      "learning_rate": 0.0002881629413562874,
      "loss": 2.9226,
      "step": 118096
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.538203716278076,
      "learning_rate": 0.0002881588539806399,
      "loss": 3.1393,
      "step": 118097
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1403214931488037,
      "learning_rate": 0.0002881547666071938,
      "loss": 2.6669,
      "step": 118098
    },
    {
      "epoch": 1.54,
      "grad_norm": 4.205714225769043,
      "learning_rate": 0.00028815067923595,
      "loss": 2.7727,
      "step": 118099
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.0307741165161133,
      "learning_rate": 0.0002881465918669092,
      "loss": 3.1072,
      "step": 118100
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.629225730895996,
      "learning_rate": 0.0002881425045000723,
      "loss": 2.9056,
      "step": 118101
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.7245373725891113,
      "learning_rate": 0.0002881384171354398,
      "loss": 2.8719,
      "step": 118102
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.1635797023773193,
      "learning_rate": 0.00028813432977301267,
      "loss": 2.8806,
      "step": 118103
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3828413486480713,
      "learning_rate": 0.0002881302424127915,
      "loss": 2.9727,
      "step": 118104
    },
    {
      "epoch": 1.54,
      "grad_norm": 4.368602275848389,
      "learning_rate": 0.00028812615505477716,
      "loss": 3.1814,
      "step": 118105
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8168010711669922,
      "learning_rate": 0.0002881220676989704,
      "loss": 2.9922,
      "step": 118106
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8601930141448975,
      "learning_rate": 0.000288117980345372,
      "loss": 3.1813,
      "step": 118107
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.123481273651123,
      "learning_rate": 0.00028811389299398274,
      "loss": 2.9544,
      "step": 118108
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.696695327758789,
      "learning_rate": 0.0002881098056448033,
      "loss": 2.9572,
      "step": 118109
    },
    {
      "epoch": 1.54,
      "grad_norm": 6.201711654663086,
      "learning_rate": 0.0002881057182978344,
      "loss": 3.0534,
      "step": 118110
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8843297958374023,
      "learning_rate": 0.0002881016309530769,
      "loss": 2.8339,
      "step": 118111
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.828662395477295,
      "learning_rate": 0.0002880975436105315,
      "loss": 2.982,
      "step": 118112
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.6290221214294434,
      "learning_rate": 0.00028809345627019904,
      "loss": 3.2162,
      "step": 118113
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.1662204265594482,
      "learning_rate": 0.0002880893689320803,
      "loss": 2.7515,
      "step": 118114
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.1037039756774902,
      "learning_rate": 0.00028808528159617595,
      "loss": 2.8845,
      "step": 118115
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7769454717636108,
      "learning_rate": 0.00028808119426248667,
      "loss": 2.9507,
      "step": 118116
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.544476270675659,
      "learning_rate": 0.0002880771069310134,
      "loss": 2.7465,
      "step": 118117
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2917022705078125,
      "learning_rate": 0.00028807301960175676,
      "loss": 3.0738,
      "step": 118118
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6483806371688843,
      "learning_rate": 0.00028806893227471754,
      "loss": 3.1706,
      "step": 118119
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.5742533206939697,
      "learning_rate": 0.0002880648449498967,
      "loss": 2.9404,
      "step": 118120
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7477346658706665,
      "learning_rate": 0.0002880607576272947,
      "loss": 3.1086,
      "step": 118121
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8464255332946777,
      "learning_rate": 0.0002880566703069125,
      "loss": 2.6711,
      "step": 118122
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7086577415466309,
      "learning_rate": 0.0002880525829887507,
      "loss": 2.826,
      "step": 118123
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3869121074676514,
      "learning_rate": 0.0002880484956728102,
      "loss": 3.1651,
      "step": 118124
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.831407904624939,
      "learning_rate": 0.0002880444083590917,
      "loss": 2.9557,
      "step": 118125
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.5786659717559814,
      "learning_rate": 0.00028804032104759605,
      "loss": 2.8497,
      "step": 118126
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0298125743865967,
      "learning_rate": 0.0002880362337383238,
      "loss": 2.9399,
      "step": 118127
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.364760398864746,
      "learning_rate": 0.0002880321464312759,
      "loss": 3.2367,
      "step": 118128
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6589269638061523,
      "learning_rate": 0.000288028059126453,
      "loss": 2.8504,
      "step": 118129
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.331434726715088,
      "learning_rate": 0.000288023971823856,
      "loss": 2.9649,
      "step": 118130
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.762174606323242,
      "learning_rate": 0.0002880198845234855,
      "loss": 2.9639,
      "step": 118131
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1950507164001465,
      "learning_rate": 0.00028801579722534246,
      "loss": 3.0595,
      "step": 118132
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.935413122177124,
      "learning_rate": 0.0002880117099294274,
      "loss": 3.0271,
      "step": 118133
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.7732181549072266,
      "learning_rate": 0.00028800762263574115,
      "loss": 2.9839,
      "step": 118134
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.714359998703003,
      "learning_rate": 0.00028800353534428453,
      "loss": 3.1849,
      "step": 118135
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7715179920196533,
      "learning_rate": 0.00028799944805505827,
      "loss": 3.0215,
      "step": 118136
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.8545243740081787,
      "learning_rate": 0.00028799536076806325,
      "loss": 3.0977,
      "step": 118137
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.2292027473449707,
      "learning_rate": 0.00028799127348330005,
      "loss": 2.9456,
      "step": 118138
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1098763942718506,
      "learning_rate": 0.0002879871862007695,
      "loss": 3.0888,
      "step": 118139
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3678905963897705,
      "learning_rate": 0.00028798309892047236,
      "loss": 2.7851,
      "step": 118140
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7114477157592773,
      "learning_rate": 0.00028797901164240937,
      "loss": 2.9871,
      "step": 118141
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4559710025787354,
      "learning_rate": 0.00028797492436658125,
      "loss": 2.7851,
      "step": 118142
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.0814170837402344,
      "learning_rate": 0.0002879708370929889,
      "loss": 2.988,
      "step": 118143
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0571248531341553,
      "learning_rate": 0.0002879667498216331,
      "loss": 2.9973,
      "step": 118144
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2000532150268555,
      "learning_rate": 0.0002879626625525144,
      "loss": 2.961,
      "step": 118145
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8095060586929321,
      "learning_rate": 0.00028795857528563363,
      "loss": 3.1109,
      "step": 118146
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4407382011413574,
      "learning_rate": 0.0002879544880209916,
      "loss": 2.9443,
      "step": 118147
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8070917129516602,
      "learning_rate": 0.00028795040075858905,
      "loss": 3.3171,
      "step": 118148
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9451320171356201,
      "learning_rate": 0.0002879463134984268,
      "loss": 2.9431,
      "step": 118149
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.030998706817627,
      "learning_rate": 0.0002879422262405056,
      "loss": 2.8263,
      "step": 118150
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0144121646881104,
      "learning_rate": 0.0002879381389848261,
      "loss": 2.9134,
      "step": 118151
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.084901809692383,
      "learning_rate": 0.0002879340517313891,
      "loss": 2.9772,
      "step": 118152
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0707597732543945,
      "learning_rate": 0.0002879299644801954,
      "loss": 3.0571,
      "step": 118153
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.8798623085021973,
      "learning_rate": 0.0002879258772312457,
      "loss": 3.088,
      "step": 118154
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.0041491985321045,
      "learning_rate": 0.0002879217899845409,
      "loss": 3.0637,
      "step": 118155
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3605763912200928,
      "learning_rate": 0.00028791770274008173,
      "loss": 3.0511,
      "step": 118156
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2593698501586914,
      "learning_rate": 0.0002879136154978688,
      "loss": 2.9533,
      "step": 118157
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2459335327148438,
      "learning_rate": 0.0002879095282579029,
      "loss": 2.9128,
      "step": 118158
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0747454166412354,
      "learning_rate": 0.0002879054410201849,
      "loss": 2.9756,
      "step": 118159
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.444075584411621,
      "learning_rate": 0.0002879013537847155,
      "loss": 3.1801,
      "step": 118160
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6754730939865112,
      "learning_rate": 0.00028789726655149546,
      "loss": 3.0816,
      "step": 118161
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.471571445465088,
      "learning_rate": 0.00028789317932052566,
      "loss": 3.1365,
      "step": 118162
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.2649176120758057,
      "learning_rate": 0.00028788909209180667,
      "loss": 2.9306,
      "step": 118163
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1494698524475098,
      "learning_rate": 0.00028788500486533924,
      "loss": 2.8797,
      "step": 118164
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.692293882369995,
      "learning_rate": 0.0002878809176411243,
      "loss": 2.7707,
      "step": 118165
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4137916564941406,
      "learning_rate": 0.0002878768304191625,
      "loss": 2.947,
      "step": 118166
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.200019598007202,
      "learning_rate": 0.00028787274319945456,
      "loss": 3.0249,
      "step": 118167
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9477118253707886,
      "learning_rate": 0.0002878686559820015,
      "loss": 3.2859,
      "step": 118168
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3829314708709717,
      "learning_rate": 0.00028786456876680376,
      "loss": 2.9715,
      "step": 118169
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3769400119781494,
      "learning_rate": 0.0002878604815538622,
      "loss": 3.18,
      "step": 118170
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.930646300315857,
      "learning_rate": 0.00028785639434317765,
      "loss": 2.7472,
      "step": 118171
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1204993724823,
      "learning_rate": 0.00028785230713475075,
      "loss": 3.1784,
      "step": 118172
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.933117389678955,
      "learning_rate": 0.0002878482199285824,
      "loss": 3.073,
      "step": 118173
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0205273628234863,
      "learning_rate": 0.00028784413272467326,
      "loss": 3.0242,
      "step": 118174
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.706486940383911,
      "learning_rate": 0.00028784004552302424,
      "loss": 2.8594,
      "step": 118175
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.187382221221924,
      "learning_rate": 0.00028783595832363595,
      "loss": 3.1394,
      "step": 118176
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8545918464660645,
      "learning_rate": 0.0002878318711265091,
      "loss": 3.1262,
      "step": 118177
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.084352731704712,
      "learning_rate": 0.00028782778393164453,
      "loss": 2.8966,
      "step": 118178
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.123884916305542,
      "learning_rate": 0.000287823696739043,
      "loss": 3.1363,
      "step": 118179
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8056206703186035,
      "learning_rate": 0.0002878196095487053,
      "loss": 2.9544,
      "step": 118180
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.408452272415161,
      "learning_rate": 0.0002878155223606323,
      "loss": 3.0027,
      "step": 118181
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.960820198059082,
      "learning_rate": 0.0002878114351748245,
      "loss": 3.0645,
      "step": 118182
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.179452896118164,
      "learning_rate": 0.00028780734799128277,
      "loss": 2.7497,
      "step": 118183
    },
    {
      "epoch": 1.54,
      "grad_norm": 4.845864772796631,
      "learning_rate": 0.0002878032608100079,
      "loss": 3.0948,
      "step": 118184
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1477677822113037,
      "learning_rate": 0.0002877991736310006,
      "loss": 3.0459,
      "step": 118185
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3401284217834473,
      "learning_rate": 0.00028779508645426175,
      "loss": 3.1436,
      "step": 118186
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9173636436462402,
      "learning_rate": 0.00028779099927979206,
      "loss": 2.7812,
      "step": 118187
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.478015899658203,
      "learning_rate": 0.0002877869121075922,
      "loss": 2.9236,
      "step": 118188
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.9893410205841064,
      "learning_rate": 0.00028778282493766293,
      "loss": 2.8304,
      "step": 118189
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.1214215755462646,
      "learning_rate": 0.00028777873777000506,
      "loss": 3.0308,
      "step": 118190
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.312567949295044,
      "learning_rate": 0.00028777465060461936,
      "loss": 2.9742,
      "step": 118191
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8734065294265747,
      "learning_rate": 0.0002877705634415066,
      "loss": 2.9259,
      "step": 118192
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.1095499992370605,
      "learning_rate": 0.00028776647628066764,
      "loss": 2.9097,
      "step": 118193
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.643583297729492,
      "learning_rate": 0.000287762389122103,
      "loss": 2.8581,
      "step": 118194
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7750276327133179,
      "learning_rate": 0.0002877583019658135,
      "loss": 2.8811,
      "step": 118195
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9054841995239258,
      "learning_rate": 0.00028775421481180006,
      "loss": 3.1238,
      "step": 118196
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9933850765228271,
      "learning_rate": 0.0002877501276600633,
      "loss": 2.8826,
      "step": 118197
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.1022815704345703,
      "learning_rate": 0.00028774604051060406,
      "loss": 3.0156,
      "step": 118198
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9465826749801636,
      "learning_rate": 0.0002877419533634231,
      "loss": 2.8896,
      "step": 118199
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8684965372085571,
      "learning_rate": 0.0002877378662185211,
      "loss": 2.9924,
      "step": 118200
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8742332458496094,
      "learning_rate": 0.0002877337790758988,
      "loss": 3.1255,
      "step": 118201
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9260051250457764,
      "learning_rate": 0.00028772969193555705,
      "loss": 3.1892,
      "step": 118202
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8701632022857666,
      "learning_rate": 0.0002877256047974966,
      "loss": 3.0475,
      "step": 118203
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.264676094055176,
      "learning_rate": 0.0002877215176617182,
      "loss": 2.8608,
      "step": 118204
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0105504989624023,
      "learning_rate": 0.00028771743052822266,
      "loss": 2.9411,
      "step": 118205
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7275426387786865,
      "learning_rate": 0.0002877133433970106,
      "loss": 3.0429,
      "step": 118206
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0059568881988525,
      "learning_rate": 0.0002877092562680829,
      "loss": 2.8961,
      "step": 118207
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.74079167842865,
      "learning_rate": 0.00028770516914144024,
      "loss": 3.2256,
      "step": 118208
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.765544056892395,
      "learning_rate": 0.0002877010820170834,
      "loss": 3.0557,
      "step": 118209
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9032732248306274,
      "learning_rate": 0.0002876969948950132,
      "loss": 2.9245,
      "step": 118210
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.537259340286255,
      "learning_rate": 0.00028769290777523046,
      "loss": 2.9673,
      "step": 118211
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.374948740005493,
      "learning_rate": 0.0002876888206577357,
      "loss": 3.2974,
      "step": 118212
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.044452428817749,
      "learning_rate": 0.00028768473354252985,
      "loss": 3.2134,
      "step": 118213
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.967774510383606,
      "learning_rate": 0.0002876806464296137,
      "loss": 2.7868,
      "step": 118214
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8649711608886719,
      "learning_rate": 0.0002876765593189879,
      "loss": 2.8535,
      "step": 118215
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9582282304763794,
      "learning_rate": 0.0002876724722106533,
      "loss": 3.3588,
      "step": 118216
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0412635803222656,
      "learning_rate": 0.00028766838510461057,
      "loss": 2.8522,
      "step": 118217
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.242793560028076,
      "learning_rate": 0.0002876642980008605,
      "loss": 2.813,
      "step": 118218
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8736523389816284,
      "learning_rate": 0.0002876602108994039,
      "loss": 2.9615,
      "step": 118219
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.669633626937866,
      "learning_rate": 0.0002876561238002415,
      "loss": 2.9264,
      "step": 118220
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1636390686035156,
      "learning_rate": 0.0002876520367033741,
      "loss": 3.1343,
      "step": 118221
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7260040044784546,
      "learning_rate": 0.0002876479496088024,
      "loss": 2.9781,
      "step": 118222
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7055391073226929,
      "learning_rate": 0.0002876438625165272,
      "loss": 3.022,
      "step": 118223
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.740547776222229,
      "learning_rate": 0.0002876397754265492,
      "loss": 2.9408,
      "step": 118224
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8617660999298096,
      "learning_rate": 0.0002876356883388692,
      "loss": 2.9873,
      "step": 118225
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0091919898986816,
      "learning_rate": 0.0002876316012534879,
      "loss": 3.0176,
      "step": 118226
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.011395215988159,
      "learning_rate": 0.00028762751417040616,
      "loss": 2.7385,
      "step": 118227
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.632193088531494,
      "learning_rate": 0.0002876234270896247,
      "loss": 2.7238,
      "step": 118228
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0664751529693604,
      "learning_rate": 0.00028761934001114443,
      "loss": 3.1248,
      "step": 118229
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.208756685256958,
      "learning_rate": 0.0002876152529349658,
      "loss": 3.3514,
      "step": 118230
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9670428037643433,
      "learning_rate": 0.00028761116586108974,
      "loss": 3.0553,
      "step": 118231
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.0707857608795166,
      "learning_rate": 0.000287607078789517,
      "loss": 3.1619,
      "step": 118232
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.234968662261963,
      "learning_rate": 0.0002876029917202483,
      "loss": 2.9012,
      "step": 118233
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.229834794998169,
      "learning_rate": 0.0002875989046532845,
      "loss": 3.114,
      "step": 118234
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.637804388999939,
      "learning_rate": 0.0002875948175886264,
      "loss": 3.122,
      "step": 118235
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1671862602233887,
      "learning_rate": 0.00028759073052627457,
      "loss": 3.1601,
      "step": 118236
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0203440189361572,
      "learning_rate": 0.0002875866434662298,
      "loss": 2.9398,
      "step": 118237
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7936407327651978,
      "learning_rate": 0.0002875825564084929,
      "loss": 2.9789,
      "step": 118238
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.0201704502105713,
      "learning_rate": 0.0002875784693530647,
      "loss": 2.877,
      "step": 118239
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9511253833770752,
      "learning_rate": 0.0002875743822999458,
      "loss": 3.1575,
      "step": 118240
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7158255577087402,
      "learning_rate": 0.0002875702952491371,
      "loss": 3.1971,
      "step": 118241
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8449159860610962,
      "learning_rate": 0.00028756620820063946,
      "loss": 2.9687,
      "step": 118242
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.173459529876709,
      "learning_rate": 0.0002875621211544534,
      "loss": 3.0784,
      "step": 118243
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.5086426734924316,
      "learning_rate": 0.00028755803411057974,
      "loss": 3.0543,
      "step": 118244
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.953531265258789,
      "learning_rate": 0.00028755394706901925,
      "loss": 2.8956,
      "step": 118245
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.8535633087158203,
      "learning_rate": 0.0002875498600297728,
      "loss": 3.0455,
      "step": 118246
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.008505344390869,
      "learning_rate": 0.00028754577299284096,
      "loss": 2.98,
      "step": 118247
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1239101886749268,
      "learning_rate": 0.00028754168595822484,
      "loss": 2.9173,
      "step": 118248
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.023484230041504,
      "learning_rate": 0.00028753759892592473,
      "loss": 2.991,
      "step": 118249
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.062324285507202,
      "learning_rate": 0.0002875335118959416,
      "loss": 3.0601,
      "step": 118250
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.257566213607788,
      "learning_rate": 0.0002875294248682763,
      "loss": 2.9653,
      "step": 118251
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9776015281677246,
      "learning_rate": 0.00028752533784292953,
      "loss": 2.6019,
      "step": 118252
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.6772046089172363,
      "learning_rate": 0.00028752125081990196,
      "loss": 3.2007,
      "step": 118253
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7829371690750122,
      "learning_rate": 0.00028751716379919463,
      "loss": 3.0672,
      "step": 118254
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.052905559539795,
      "learning_rate": 0.0002875130767808079,
      "loss": 3.0346,
      "step": 118255
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8624498844146729,
      "learning_rate": 0.0002875089897647427,
      "loss": 2.7592,
      "step": 118256
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7459220886230469,
      "learning_rate": 0.0002875049027509999,
      "loss": 3.0321,
      "step": 118257
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.80942702293396,
      "learning_rate": 0.00028750081573958015,
      "loss": 3.0902,
      "step": 118258
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.088016986846924,
      "learning_rate": 0.0002874967287304842,
      "loss": 2.9774,
      "step": 118259
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9673317670822144,
      "learning_rate": 0.000287492641723713,
      "loss": 2.7004,
      "step": 118260
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3160343170166016,
      "learning_rate": 0.000287488554719267,
      "loss": 3.221,
      "step": 118261
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.781172752380371,
      "learning_rate": 0.00028748446771714714,
      "loss": 3.0121,
      "step": 118262
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8383204936981201,
      "learning_rate": 0.0002874803807173542,
      "loss": 2.8468,
      "step": 118263
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9963728189468384,
      "learning_rate": 0.0002874762937198888,
      "loss": 3.1238,
      "step": 118264
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.419034004211426,
      "learning_rate": 0.00028747220672475183,
      "loss": 2.8061,
      "step": 118265
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.602778673171997,
      "learning_rate": 0.0002874681197319442,
      "loss": 2.9875,
      "step": 118266
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.013082981109619,
      "learning_rate": 0.00028746403274146625,
      "loss": 2.7195,
      "step": 118267
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.009188175201416,
      "learning_rate": 0.00028745994575331903,
      "loss": 2.9391,
      "step": 118268
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4992780685424805,
      "learning_rate": 0.0002874558587675032,
      "loss": 2.8525,
      "step": 118269
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0320169925689697,
      "learning_rate": 0.00028745177178401965,
      "loss": 3.0705,
      "step": 118270
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8842017650604248,
      "learning_rate": 0.000287447684802869,
      "loss": 2.8476,
      "step": 118271
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2531492710113525,
      "learning_rate": 0.0002874435978240522,
      "loss": 3.0945,
      "step": 118272
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.834404468536377,
      "learning_rate": 0.0002874395108475697,
      "loss": 2.8896,
      "step": 118273
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7738794088363647,
      "learning_rate": 0.0002874354238734225,
      "loss": 3.0715,
      "step": 118274
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1028363704681396,
      "learning_rate": 0.00028743133690161126,
      "loss": 2.7175,
      "step": 118275
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8476518392562866,
      "learning_rate": 0.00028742724993213677,
      "loss": 2.6605,
      "step": 118276
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1962809562683105,
      "learning_rate": 0.00028742316296499976,
      "loss": 2.8276,
      "step": 118277
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8239336013793945,
      "learning_rate": 0.0002874190760002012,
      "loss": 2.8939,
      "step": 118278
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8258119821548462,
      "learning_rate": 0.00028741498903774157,
      "loss": 2.9425,
      "step": 118279
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.921628713607788,
      "learning_rate": 0.0002874109020776217,
      "loss": 3.3223,
      "step": 118280
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.377831220626831,
      "learning_rate": 0.0002874068151198423,
      "loss": 3.0082,
      "step": 118281
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0583364963531494,
      "learning_rate": 0.00028740272816440434,
      "loss": 3.131,
      "step": 118282
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.787952184677124,
      "learning_rate": 0.00028739864121130835,
      "loss": 2.7951,
      "step": 118283
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2432944774627686,
      "learning_rate": 0.0002873945542605554,
      "loss": 3.0263,
      "step": 118284
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0169570446014404,
      "learning_rate": 0.00028739046731214585,
      "loss": 3.2012,
      "step": 118285
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1553421020507812,
      "learning_rate": 0.0002873863803660807,
      "loss": 2.9914,
      "step": 118286
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4652481079101562,
      "learning_rate": 0.0002873822934223606,
      "loss": 2.9243,
      "step": 118287
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.007185459136963,
      "learning_rate": 0.0002873782064809864,
      "loss": 2.9461,
      "step": 118288
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.193596839904785,
      "learning_rate": 0.0002873741195419588,
      "loss": 2.8933,
      "step": 118289
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.112736463546753,
      "learning_rate": 0.00028737003260527873,
      "loss": 2.8523,
      "step": 118290
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8984471559524536,
      "learning_rate": 0.0002873659456709467,
      "loss": 2.9664,
      "step": 118291
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4345669746398926,
      "learning_rate": 0.0002873618587389636,
      "loss": 2.9726,
      "step": 118292
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9488369226455688,
      "learning_rate": 0.00028735777180933017,
      "loss": 2.8966,
      "step": 118293
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2486014366149902,
      "learning_rate": 0.00028735368488204714,
      "loss": 3.049,
      "step": 118294
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.939962387084961,
      "learning_rate": 0.0002873495979571153,
      "loss": 2.7009,
      "step": 118295
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2491660118103027,
      "learning_rate": 0.0002873455110345355,
      "loss": 3.0228,
      "step": 118296
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9075528383255005,
      "learning_rate": 0.00028734142411430833,
      "loss": 2.8907,
      "step": 118297
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8717387914657593,
      "learning_rate": 0.0002873373371964346,
      "loss": 3.204,
      "step": 118298
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.570178508758545,
      "learning_rate": 0.0002873332502809152,
      "loss": 3.1254,
      "step": 118299
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.8496670722961426,
      "learning_rate": 0.0002873291633677507,
      "loss": 3.0333,
      "step": 118300
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3823959827423096,
      "learning_rate": 0.00028732507645694196,
      "loss": 3.0185,
      "step": 118301
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.900420904159546,
      "learning_rate": 0.00028732098954848973,
      "loss": 3.2125,
      "step": 118302
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.709864854812622,
      "learning_rate": 0.0002873169026423948,
      "loss": 2.9247,
      "step": 118303
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6143701076507568,
      "learning_rate": 0.00028731281573865785,
      "loss": 3.1073,
      "step": 118304
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3910908699035645,
      "learning_rate": 0.0002873087288372797,
      "loss": 3.0022,
      "step": 118305
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4744040966033936,
      "learning_rate": 0.0002873046419382612,
      "loss": 2.7667,
      "step": 118306
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9364900588989258,
      "learning_rate": 0.00028730055504160284,
      "loss": 2.8644,
      "step": 118307
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9346996545791626,
      "learning_rate": 0.0002872964681473056,
      "loss": 3.0548,
      "step": 118308
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6347148418426514,
      "learning_rate": 0.00028729238125537024,
      "loss": 2.99,
      "step": 118309
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9833475351333618,
      "learning_rate": 0.00028728829436579743,
      "loss": 3.0505,
      "step": 118310
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7040462493896484,
      "learning_rate": 0.0002872842074785879,
      "loss": 3.2234,
      "step": 118311
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8235368728637695,
      "learning_rate": 0.00028728012059374257,
      "loss": 3.1455,
      "step": 118312
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7866442203521729,
      "learning_rate": 0.0002872760337112621,
      "loss": 3.2058,
      "step": 118313
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1084163188934326,
      "learning_rate": 0.00028727194683114725,
      "loss": 2.9889,
      "step": 118314
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.102159023284912,
      "learning_rate": 0.0002872678599533988,
      "loss": 3.0267,
      "step": 118315
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.04900860786438,
      "learning_rate": 0.00028726377307801746,
      "loss": 3.1457,
      "step": 118316
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4547946453094482,
      "learning_rate": 0.000287259686205004,
      "loss": 2.9599,
      "step": 118317
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8997063636779785,
      "learning_rate": 0.00028725559933435925,
      "loss": 3.0306,
      "step": 118318
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.115154504776001,
      "learning_rate": 0.00028725151246608387,
      "loss": 2.8711,
      "step": 118319
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.0162925720214844,
      "learning_rate": 0.0002872474256001787,
      "loss": 3.1763,
      "step": 118320
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.313023328781128,
      "learning_rate": 0.00028724333873664465,
      "loss": 3.0279,
      "step": 118321
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7712446451187134,
      "learning_rate": 0.0002872392518754821,
      "loss": 2.9939,
      "step": 118322
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.774117946624756,
      "learning_rate": 0.00028723516501669204,
      "loss": 3.0229,
      "step": 118323
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.702085256576538,
      "learning_rate": 0.0002872310781602752,
      "loss": 2.885,
      "step": 118324
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1304500102996826,
      "learning_rate": 0.0002872269913062324,
      "loss": 3.2774,
      "step": 118325
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.307831287384033,
      "learning_rate": 0.00028722290445456426,
      "loss": 2.7211,
      "step": 118326
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9715324640274048,
      "learning_rate": 0.00028721881760527173,
      "loss": 3.1042,
      "step": 118327
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.77008855342865,
      "learning_rate": 0.00028721473075835546,
      "loss": 2.9842,
      "step": 118328
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6704522371292114,
      "learning_rate": 0.00028721064391381613,
      "loss": 2.9754,
      "step": 118329
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1868362426757812,
      "learning_rate": 0.00028720655707165457,
      "loss": 2.8857,
      "step": 118330
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6670335531234741,
      "learning_rate": 0.0002872024702318716,
      "loss": 3.006,
      "step": 118331
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7684564590454102,
      "learning_rate": 0.0002871983833944679,
      "loss": 2.6674,
      "step": 118332
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9121588468551636,
      "learning_rate": 0.0002871942965594444,
      "loss": 2.9477,
      "step": 118333
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2097930908203125,
      "learning_rate": 0.00028719020972680157,
      "loss": 3.0475,
      "step": 118334
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.915550708770752,
      "learning_rate": 0.0002871861228965403,
      "loss": 2.9446,
      "step": 118335
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7332749366760254,
      "learning_rate": 0.00028718203606866143,
      "loss": 2.9437,
      "step": 118336
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.015382766723633,
      "learning_rate": 0.0002871779492431657,
      "loss": 2.8815,
      "step": 118337
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9222127199172974,
      "learning_rate": 0.00028717386242005377,
      "loss": 3.1944,
      "step": 118338
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7100120782852173,
      "learning_rate": 0.00028716977559932656,
      "loss": 3.1738,
      "step": 118339
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9164783954620361,
      "learning_rate": 0.00028716568878098465,
      "loss": 3.0628,
      "step": 118340
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1693994998931885,
      "learning_rate": 0.0002871616019650289,
      "loss": 2.9146,
      "step": 118341
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.04223370552063,
      "learning_rate": 0.00028715751515145997,
      "loss": 2.9456,
      "step": 118342
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.4016690254211426,
      "learning_rate": 0.0002871534283402788,
      "loss": 3.1487,
      "step": 118343
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.826206088066101,
      "learning_rate": 0.000287149341531486,
      "loss": 3.1844,
      "step": 118344
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8559259176254272,
      "learning_rate": 0.00028714525472508254,
      "loss": 2.9828,
      "step": 118345
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.972375750541687,
      "learning_rate": 0.0002871411679210688,
      "loss": 3.1071,
      "step": 118346
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.7228662967681885,
      "learning_rate": 0.0002871370811194458,
      "loss": 2.867,
      "step": 118347
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.595597505569458,
      "learning_rate": 0.00028713299432021433,
      "loss": 2.9141,
      "step": 118348
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.6279141902923584,
      "learning_rate": 0.000287128907523375,
      "loss": 3.0196,
      "step": 118349
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.240692377090454,
      "learning_rate": 0.0002871248207289287,
      "loss": 2.728,
      "step": 118350
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.079071283340454,
      "learning_rate": 0.00028712073393687626,
      "loss": 2.9936,
      "step": 118351
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6686285734176636,
      "learning_rate": 0.00028711664714721816,
      "loss": 3.0911,
      "step": 118352
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7582049369812012,
      "learning_rate": 0.00028711256035995533,
      "loss": 2.7512,
      "step": 118353
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.319976329803467,
      "learning_rate": 0.00028710847357508854,
      "loss": 3.2869,
      "step": 118354
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7159638404846191,
      "learning_rate": 0.00028710438679261854,
      "loss": 3.0823,
      "step": 118355
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.922536015510559,
      "learning_rate": 0.00028710030001254604,
      "loss": 2.9639,
      "step": 118356
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8774954080581665,
      "learning_rate": 0.00028709621323487196,
      "loss": 3.0027,
      "step": 118357
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1714718341827393,
      "learning_rate": 0.00028709212645959683,
      "loss": 2.992,
      "step": 118358
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.873252034187317,
      "learning_rate": 0.0002870880396867215,
      "loss": 2.9611,
      "step": 118359
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8862197399139404,
      "learning_rate": 0.00028708395291624677,
      "loss": 2.9577,
      "step": 118360
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9528517723083496,
      "learning_rate": 0.0002870798661481734,
      "loss": 3.0168,
      "step": 118361
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.621401309967041,
      "learning_rate": 0.0002870757793825021,
      "loss": 2.9583,
      "step": 118362
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3318872451782227,
      "learning_rate": 0.00028707169261923374,
      "loss": 2.7985,
      "step": 118363
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.5212926864624023,
      "learning_rate": 0.0002870676058583689,
      "loss": 3.0474,
      "step": 118364
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9907642602920532,
      "learning_rate": 0.00028706351909990846,
      "loss": 2.9596,
      "step": 118365
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.678053617477417,
      "learning_rate": 0.0002870594323438531,
      "loss": 2.9348,
      "step": 118366
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.3979101181030273,
      "learning_rate": 0.0002870553455902037,
      "loss": 3.0687,
      "step": 118367
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.779104471206665,
      "learning_rate": 0.0002870512588389609,
      "loss": 2.869,
      "step": 118368
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7495391368865967,
      "learning_rate": 0.0002870471720901257,
      "loss": 3.169,
      "step": 118369
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9574016332626343,
      "learning_rate": 0.00028704308534369853,
      "loss": 3.1413,
      "step": 118370
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2460811138153076,
      "learning_rate": 0.00028703899859968025,
      "loss": 3.0553,
      "step": 118371
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.457606315612793,
      "learning_rate": 0.0002870349118580717,
      "loss": 3.007,
      "step": 118372
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9216910600662231,
      "learning_rate": 0.00028703082511887364,
      "loss": 3.2342,
      "step": 118373
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.6796326637268066,
      "learning_rate": 0.00028702673838208677,
      "loss": 2.8795,
      "step": 118374
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.110989809036255,
      "learning_rate": 0.00028702265164771187,
      "loss": 2.9864,
      "step": 118375
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8740220069885254,
      "learning_rate": 0.0002870185649157498,
      "loss": 2.7571,
      "step": 118376
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.111661672592163,
      "learning_rate": 0.0002870144781862011,
      "loss": 3.113,
      "step": 118377
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4882378578186035,
      "learning_rate": 0.0002870103914590667,
      "loss": 2.9252,
      "step": 118378
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.228447198867798,
      "learning_rate": 0.00028700630473434726,
      "loss": 2.8619,
      "step": 118379
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.580596923828125,
      "learning_rate": 0.00028700221801204365,
      "loss": 3.0199,
      "step": 118380
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.296234369277954,
      "learning_rate": 0.0002869981312921565,
      "loss": 3.0612,
      "step": 118381
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0714125633239746,
      "learning_rate": 0.00028699404457468677,
      "loss": 3.0462,
      "step": 118382
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6562764644622803,
      "learning_rate": 0.00028698995785963506,
      "loss": 2.8149,
      "step": 118383
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8241373300552368,
      "learning_rate": 0.00028698587114700215,
      "loss": 3.0035,
      "step": 118384
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.39927339553833,
      "learning_rate": 0.00028698178443678876,
      "loss": 2.6404,
      "step": 118385
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4379382133483887,
      "learning_rate": 0.00028697769772899573,
      "loss": 3.2263,
      "step": 118386
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6789239645004272,
      "learning_rate": 0.0002869736110236238,
      "loss": 3.0307,
      "step": 118387
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9089750051498413,
      "learning_rate": 0.0002869695243206738,
      "loss": 3.0481,
      "step": 118388
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2101433277130127,
      "learning_rate": 0.00028696543762014626,
      "loss": 2.9122,
      "step": 118389
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.357457160949707,
      "learning_rate": 0.00028696135092204215,
      "loss": 3.0582,
      "step": 118390
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7534116506576538,
      "learning_rate": 0.0002869572642263622,
      "loss": 3.1851,
      "step": 118391
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0567522048950195,
      "learning_rate": 0.00028695317753310714,
      "loss": 3.2304,
      "step": 118392
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0790417194366455,
      "learning_rate": 0.00028694909084227765,
      "loss": 3.099,
      "step": 118393
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9694056510925293,
      "learning_rate": 0.0002869450041538747,
      "loss": 2.9302,
      "step": 118394
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8591238260269165,
      "learning_rate": 0.0002869409174678988,
      "loss": 2.8081,
      "step": 118395
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.659315586090088,
      "learning_rate": 0.0002869368307843509,
      "loss": 2.9916,
      "step": 118396
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.055652618408203,
      "learning_rate": 0.00028693274410323166,
      "loss": 2.9772,
      "step": 118397
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.890577793121338,
      "learning_rate": 0.00028692865742454193,
      "loss": 3.0217,
      "step": 118398
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.1866114139556885,
      "learning_rate": 0.00028692457074828233,
      "loss": 3.0171,
      "step": 118399
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1412086486816406,
      "learning_rate": 0.00028692048407445383,
      "loss": 3.0706,
      "step": 118400
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.13322114944458,
      "learning_rate": 0.00028691639740305693,
      "loss": 3.1705,
      "step": 118401
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6762021780014038,
      "learning_rate": 0.0002869123107340925,
      "loss": 2.9114,
      "step": 118402
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1609418392181396,
      "learning_rate": 0.0002869082240675613,
      "loss": 2.8247,
      "step": 118403
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7738617658615112,
      "learning_rate": 0.0002869041374034642,
      "loss": 3.0353,
      "step": 118404
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7319769859313965,
      "learning_rate": 0.0002869000507418019,
      "loss": 2.9015,
      "step": 118405
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.276611566543579,
      "learning_rate": 0.0002868959640825751,
      "loss": 2.6703,
      "step": 118406
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.355808973312378,
      "learning_rate": 0.00028689187742578454,
      "loss": 3.0626,
      "step": 118407
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.222144365310669,
      "learning_rate": 0.000286887790771431,
      "loss": 3.2296,
      "step": 118408
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.114877462387085,
      "learning_rate": 0.00028688370411951534,
      "loss": 3.118,
      "step": 118409
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.1451597213745117,
      "learning_rate": 0.00028687961747003824,
      "loss": 3.049,
      "step": 118410
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7903839349746704,
      "learning_rate": 0.00028687553082300043,
      "loss": 2.6401,
      "step": 118411
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.5870543718338013,
      "learning_rate": 0.00028687144417840284,
      "loss": 2.9627,
      "step": 118412
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1983659267425537,
      "learning_rate": 0.0002868673575362459,
      "loss": 3.0639,
      "step": 118413
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.0835578441619873,
      "learning_rate": 0.0002868632708965306,
      "loss": 3.0571,
      "step": 118414
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8525837659835815,
      "learning_rate": 0.00028685918425925774,
      "loss": 2.943,
      "step": 118415
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9048104286193848,
      "learning_rate": 0.00028685509762442794,
      "loss": 2.7787,
      "step": 118416
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.363542318344116,
      "learning_rate": 0.0002868510109920421,
      "loss": 3.1112,
      "step": 118417
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9368771314620972,
      "learning_rate": 0.00028684692436210097,
      "loss": 2.8099,
      "step": 118418
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7236052751541138,
      "learning_rate": 0.00028684283773460514,
      "loss": 3.0476,
      "step": 118419
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0097925662994385,
      "learning_rate": 0.00028683875110955545,
      "loss": 2.8165,
      "step": 118420
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9186089038848877,
      "learning_rate": 0.0002868346644869527,
      "loss": 2.7244,
      "step": 118421
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6634304523468018,
      "learning_rate": 0.0002868305778667976,
      "loss": 2.9402,
      "step": 118422
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0549819469451904,
      "learning_rate": 0.00028682649124909095,
      "loss": 2.971,
      "step": 118423
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9733432531356812,
      "learning_rate": 0.00028682240463383365,
      "loss": 3.1209,
      "step": 118424
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2697434425354004,
      "learning_rate": 0.0002868183180210262,
      "loss": 2.8145,
      "step": 118425
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.69870924949646,
      "learning_rate": 0.00028681423141066947,
      "loss": 2.843,
      "step": 118426
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.794568657875061,
      "learning_rate": 0.00028681014480276426,
      "loss": 2.7079,
      "step": 118427
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6824629306793213,
      "learning_rate": 0.00028680605819731125,
      "loss": 3.1286,
      "step": 118428
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2983334064483643,
      "learning_rate": 0.00028680197159431124,
      "loss": 3.0865,
      "step": 118429
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.5089871883392334,
      "learning_rate": 0.0002867978849937651,
      "loss": 2.8951,
      "step": 118430
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.07749605178833,
      "learning_rate": 0.0002867937983956734,
      "loss": 2.9773,
      "step": 118431
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8849244117736816,
      "learning_rate": 0.00028678971180003696,
      "loss": 3.043,
      "step": 118432
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.9305853843688965,
      "learning_rate": 0.00028678562520685657,
      "loss": 3.0437,
      "step": 118433
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.567824602127075,
      "learning_rate": 0.000286781538616133,
      "loss": 3.0002,
      "step": 118434
    },
    {
      "epoch": 1.54,
      "grad_norm": 4.266111373901367,
      "learning_rate": 0.0002867774520278669,
      "loss": 3.0844,
      "step": 118435
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1874794960021973,
      "learning_rate": 0.00028677336544205936,
      "loss": 3.0456,
      "step": 118436
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.1290323734283447,
      "learning_rate": 0.00028676927885871073,
      "loss": 3.0323,
      "step": 118437
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.901165246963501,
      "learning_rate": 0.00028676519227782194,
      "loss": 3.2388,
      "step": 118438
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.6283676624298096,
      "learning_rate": 0.00028676110569939375,
      "loss": 2.8784,
      "step": 118439
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1699655055999756,
      "learning_rate": 0.0002867570191234269,
      "loss": 3.0006,
      "step": 118440
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.422281503677368,
      "learning_rate": 0.0002867529325499222,
      "loss": 3.0206,
      "step": 118441
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.0170063972473145,
      "learning_rate": 0.00028674884597888035,
      "loss": 2.7802,
      "step": 118442
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.303374767303467,
      "learning_rate": 0.0002867447594103023,
      "loss": 3.0153,
      "step": 118443
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4487123489379883,
      "learning_rate": 0.0002867406728441885,
      "loss": 2.9292,
      "step": 118444
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7476719617843628,
      "learning_rate": 0.0002867365862805399,
      "loss": 3.1979,
      "step": 118445
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7819504737854004,
      "learning_rate": 0.0002867324997193572,
      "loss": 3.0073,
      "step": 118446
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8980599641799927,
      "learning_rate": 0.0002867284131606411,
      "loss": 2.893,
      "step": 118447
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1220858097076416,
      "learning_rate": 0.00028672432660439254,
      "loss": 2.8931,
      "step": 118448
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.702318787574768,
      "learning_rate": 0.0002867202400506123,
      "loss": 2.9439,
      "step": 118449
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6480069160461426,
      "learning_rate": 0.00028671615349930085,
      "loss": 3.0621,
      "step": 118450
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7433147430419922,
      "learning_rate": 0.0002867120669504591,
      "loss": 2.9032,
      "step": 118451
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.884019136428833,
      "learning_rate": 0.00028670798040408786,
      "loss": 3.2449,
      "step": 118452
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8589956760406494,
      "learning_rate": 0.0002867038938601879,
      "loss": 3.0213,
      "step": 118453
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9817805290222168,
      "learning_rate": 0.0002866998073187598,
      "loss": 2.961,
      "step": 118454
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7420467138290405,
      "learning_rate": 0.00028669572077980477,
      "loss": 3.0604,
      "step": 118455
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9811550378799438,
      "learning_rate": 0.000286691634243323,
      "loss": 2.6509,
      "step": 118456
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.2164855003356934,
      "learning_rate": 0.00028668754770931555,
      "loss": 2.7738,
      "step": 118457
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0867760181427,
      "learning_rate": 0.0002866834611777831,
      "loss": 2.7792,
      "step": 118458
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.104379415512085,
      "learning_rate": 0.00028667937464872645,
      "loss": 3.0308,
      "step": 118459
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8608254194259644,
      "learning_rate": 0.00028667528812214644,
      "loss": 2.9652,
      "step": 118460
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2678685188293457,
      "learning_rate": 0.0002866712015980438,
      "loss": 2.7935,
      "step": 118461
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.903838038444519,
      "learning_rate": 0.0002866671150764191,
      "loss": 3.0091,
      "step": 118462
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9020140171051025,
      "learning_rate": 0.0002866630285572733,
      "loss": 3.161,
      "step": 118463
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7881888151168823,
      "learning_rate": 0.000286658942040607,
      "loss": 3.0038,
      "step": 118464
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7454419136047363,
      "learning_rate": 0.0002866548555264211,
      "loss": 3.0252,
      "step": 118465
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0655269622802734,
      "learning_rate": 0.00028665076901471633,
      "loss": 2.9179,
      "step": 118466
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.96720552444458,
      "learning_rate": 0.0002866466825054935,
      "loss": 2.9908,
      "step": 118467
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7553118467330933,
      "learning_rate": 0.00028664259599875326,
      "loss": 2.9144,
      "step": 118468
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1635935306549072,
      "learning_rate": 0.0002866385094944964,
      "loss": 2.9407,
      "step": 118469
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9162336587905884,
      "learning_rate": 0.0002866344229927237,
      "loss": 3.0149,
      "step": 118470
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6135927438735962,
      "learning_rate": 0.0002866303364934359,
      "loss": 3.1089,
      "step": 118471
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9158729314804077,
      "learning_rate": 0.0002866262499966337,
      "loss": 2.8743,
      "step": 118472
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.644608497619629,
      "learning_rate": 0.00028662216350231807,
      "loss": 2.901,
      "step": 118473
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1780683994293213,
      "learning_rate": 0.00028661807701048955,
      "loss": 2.8895,
      "step": 118474
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.868996262550354,
      "learning_rate": 0.00028661399052114906,
      "loss": 3.1419,
      "step": 118475
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2243142127990723,
      "learning_rate": 0.00028660990403429724,
      "loss": 3.0454,
      "step": 118476
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.7480640411376953,
      "learning_rate": 0.0002866058175499348,
      "loss": 3.0425,
      "step": 118477
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8261303901672363,
      "learning_rate": 0.0002866017310680627,
      "loss": 3.1749,
      "step": 118478
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7578835487365723,
      "learning_rate": 0.0002865976445886816,
      "loss": 3.1305,
      "step": 118479
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8688675165176392,
      "learning_rate": 0.0002865935581117922,
      "loss": 3.1328,
      "step": 118480
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.865495204925537,
      "learning_rate": 0.0002865894716373953,
      "loss": 3.1426,
      "step": 118481
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.941220760345459,
      "learning_rate": 0.00028658538516549163,
      "loss": 2.9239,
      "step": 118482
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.119187593460083,
      "learning_rate": 0.00028658129869608214,
      "loss": 2.9903,
      "step": 118483
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.193774461746216,
      "learning_rate": 0.00028657721222916734,
      "loss": 3.046,
      "step": 118484
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.683340311050415,
      "learning_rate": 0.0002865731257647481,
      "loss": 3.0926,
      "step": 118485
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.7182230949401855,
      "learning_rate": 0.0002865690393028252,
      "loss": 3.1467,
      "step": 118486
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.088547945022583,
      "learning_rate": 0.0002865649528433993,
      "loss": 3.1525,
      "step": 118487
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.1359505653381348,
      "learning_rate": 0.00028656086638647124,
      "loss": 3.0951,
      "step": 118488
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.711744785308838,
      "learning_rate": 0.0002865567799320418,
      "loss": 3.2894,
      "step": 118489
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.2425503730773926,
      "learning_rate": 0.00028655269348011174,
      "loss": 3.2302,
      "step": 118490
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9581040143966675,
      "learning_rate": 0.0002865486070306818,
      "loss": 2.9107,
      "step": 118491
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8426895141601562,
      "learning_rate": 0.00028654452058375266,
      "loss": 3.0924,
      "step": 118492
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4634010791778564,
      "learning_rate": 0.00028654043413932513,
      "loss": 2.9646,
      "step": 118493
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.408705711364746,
      "learning_rate": 0.00028653634769740006,
      "loss": 3.2208,
      "step": 118494
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.754551649093628,
      "learning_rate": 0.0002865322612579781,
      "loss": 2.9042,
      "step": 118495
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.574615716934204,
      "learning_rate": 0.00028652817482106,
      "loss": 3.1096,
      "step": 118496
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3301193714141846,
      "learning_rate": 0.0002865240883866467,
      "loss": 2.6455,
      "step": 118497
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2342703342437744,
      "learning_rate": 0.00028652000195473874,
      "loss": 2.7269,
      "step": 118498
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.697091579437256,
      "learning_rate": 0.00028651591552533696,
      "loss": 3.0813,
      "step": 118499
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9894952774047852,
      "learning_rate": 0.0002865118290984421,
      "loss": 2.9653,
      "step": 118500
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.9515936374664307,
      "learning_rate": 0.000286507742674055,
      "loss": 2.978,
      "step": 118501
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.4109981060028076,
      "learning_rate": 0.0002865036562521763,
      "loss": 3.0723,
      "step": 118502
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.677694320678711,
      "learning_rate": 0.000286499569832807,
      "loss": 2.9945,
      "step": 118503
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.8069963455200195,
      "learning_rate": 0.0002864954834159475,
      "loss": 2.9265,
      "step": 118504
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.073516607284546,
      "learning_rate": 0.00028649139700159885,
      "loss": 3.1973,
      "step": 118505
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.974253535270691,
      "learning_rate": 0.00028648731058976163,
      "loss": 2.9021,
      "step": 118506
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9148072004318237,
      "learning_rate": 0.00028648322418043667,
      "loss": 2.7603,
      "step": 118507
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.013172149658203,
      "learning_rate": 0.0002864791377736247,
      "loss": 2.8831,
      "step": 118508
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1163206100463867,
      "learning_rate": 0.0002864750513693266,
      "loss": 2.8596,
      "step": 118509
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.196603536605835,
      "learning_rate": 0.00028647096496754314,
      "loss": 3.0609,
      "step": 118510
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.942417025566101,
      "learning_rate": 0.00028646687856827486,
      "loss": 3.0572,
      "step": 118511
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.608964681625366,
      "learning_rate": 0.0002864627921715226,
      "loss": 3.1051,
      "step": 118512
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.6071155071258545,
      "learning_rate": 0.00028645870577728716,
      "loss": 2.8952,
      "step": 118513
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8857990503311157,
      "learning_rate": 0.00028645461938556937,
      "loss": 2.9969,
      "step": 118514
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8507177829742432,
      "learning_rate": 0.00028645053299636986,
      "loss": 2.7759,
      "step": 118515
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2079288959503174,
      "learning_rate": 0.0002864464466096896,
      "loss": 2.833,
      "step": 118516
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3854713439941406,
      "learning_rate": 0.00028644236022552907,
      "loss": 2.9762,
      "step": 118517
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.94814133644104,
      "learning_rate": 0.0002864382738438892,
      "loss": 2.889,
      "step": 118518
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8967723846435547,
      "learning_rate": 0.00028643418746477066,
      "loss": 2.8559,
      "step": 118519
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.1817245483398438,
      "learning_rate": 0.0002864301010881743,
      "loss": 2.9406,
      "step": 118520
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9998494386672974,
      "learning_rate": 0.0002864260147141008,
      "loss": 3.3421,
      "step": 118521
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8688480854034424,
      "learning_rate": 0.00028642192834255113,
      "loss": 2.8043,
      "step": 118522
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8111857175827026,
      "learning_rate": 0.0002864178419735257,
      "loss": 3.2612,
      "step": 118523
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.052168369293213,
      "learning_rate": 0.0002864137556070255,
      "loss": 3.1384,
      "step": 118524
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.196251392364502,
      "learning_rate": 0.0002864096692430512,
      "loss": 3.0932,
      "step": 118525
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7804503440856934,
      "learning_rate": 0.0002864055828816036,
      "loss": 3.1946,
      "step": 118526
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.165964126586914,
      "learning_rate": 0.0002864014965226835,
      "loss": 2.8736,
      "step": 118527
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9360562562942505,
      "learning_rate": 0.0002863974101662917,
      "loss": 3.2172,
      "step": 118528
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.298006772994995,
      "learning_rate": 0.0002863933238124288,
      "loss": 3.107,
      "step": 118529
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.260998487472534,
      "learning_rate": 0.0002863892374610956,
      "loss": 2.965,
      "step": 118530
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2196617126464844,
      "learning_rate": 0.00028638515111229293,
      "loss": 3.039,
      "step": 118531
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1505184173583984,
      "learning_rate": 0.0002863810647660215,
      "loss": 2.6249,
      "step": 118532
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.70598566532135,
      "learning_rate": 0.0002863769784222821,
      "loss": 3.2442,
      "step": 118533
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.850838303565979,
      "learning_rate": 0.0002863728920810756,
      "loss": 3.1396,
      "step": 118534
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4184987545013428,
      "learning_rate": 0.00028636880574240244,
      "loss": 2.985,
      "step": 118535
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.5480592250823975,
      "learning_rate": 0.00028636471940626367,
      "loss": 2.6114,
      "step": 118536
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.474759101867676,
      "learning_rate": 0.00028636063307265993,
      "loss": 3.01,
      "step": 118537
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9339708089828491,
      "learning_rate": 0.000286356546741592,
      "loss": 3.0302,
      "step": 118538
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.305527925491333,
      "learning_rate": 0.0002863524604130606,
      "loss": 3.1442,
      "step": 118539
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.420974016189575,
      "learning_rate": 0.00028634837408706673,
      "loss": 3.1283,
      "step": 118540
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9021508693695068,
      "learning_rate": 0.0002863442877636108,
      "loss": 3.0437,
      "step": 118541
    },
    {
      "epoch": 1.54,
      "grad_norm": 5.04487419128418,
      "learning_rate": 0.0002863402014426937,
      "loss": 2.9241,
      "step": 118542
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7432489395141602,
      "learning_rate": 0.00028633611512431627,
      "loss": 3.1703,
      "step": 118543
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.859697699546814,
      "learning_rate": 0.0002863320288084792,
      "loss": 2.9153,
      "step": 118544
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.829353928565979,
      "learning_rate": 0.0002863279424951832,
      "loss": 2.8868,
      "step": 118545
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0900955200195312,
      "learning_rate": 0.0002863238561844293,
      "loss": 2.9403,
      "step": 118546
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8011155128479004,
      "learning_rate": 0.0002863197698762179,
      "loss": 2.7337,
      "step": 118547
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0692968368530273,
      "learning_rate": 0.00028631568357054987,
      "loss": 3.036,
      "step": 118548
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.5578429698944092,
      "learning_rate": 0.0002863115972674261,
      "loss": 2.9886,
      "step": 118549
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0321362018585205,
      "learning_rate": 0.00028630751096684714,
      "loss": 2.9457,
      "step": 118550
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4662363529205322,
      "learning_rate": 0.00028630342466881396,
      "loss": 3.0458,
      "step": 118551
    },
    {
      "epoch": 1.54,
      "grad_norm": 5.531080722808838,
      "learning_rate": 0.0002862993383733273,
      "loss": 3.0388,
      "step": 118552
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1014010906219482,
      "learning_rate": 0.00028629525208038786,
      "loss": 2.8949,
      "step": 118553
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.1498475074768066,
      "learning_rate": 0.0002862911657899963,
      "loss": 2.9582,
      "step": 118554
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7381709814071655,
      "learning_rate": 0.0002862870795021535,
      "loss": 2.7742,
      "step": 118555
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.323051691055298,
      "learning_rate": 0.0002862829932168601,
      "loss": 3.1296,
      "step": 118556
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.601439952850342,
      "learning_rate": 0.000286278906934117,
      "loss": 3.056,
      "step": 118557
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7628120183944702,
      "learning_rate": 0.000286274820653925,
      "loss": 2.8838,
      "step": 118558
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.822555661201477,
      "learning_rate": 0.0002862707343762847,
      "loss": 2.9266,
      "step": 118559
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.072620391845703,
      "learning_rate": 0.00028626664810119703,
      "loss": 2.9468,
      "step": 118560
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9251431226730347,
      "learning_rate": 0.0002862625618286625,
      "loss": 3.1242,
      "step": 118561
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8727362155914307,
      "learning_rate": 0.00028625847555868207,
      "loss": 3.1999,
      "step": 118562
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.050110101699829,
      "learning_rate": 0.0002862543892912564,
      "loss": 2.8953,
      "step": 118563
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.091458797454834,
      "learning_rate": 0.0002862503030263864,
      "loss": 2.9721,
      "step": 118564
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6744654178619385,
      "learning_rate": 0.0002862462167640727,
      "loss": 2.7876,
      "step": 118565
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7813204526901245,
      "learning_rate": 0.00028624213050431604,
      "loss": 2.9254,
      "step": 118566
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.4319686889648438,
      "learning_rate": 0.00028623804424711733,
      "loss": 2.9142,
      "step": 118567
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.8182780742645264,
      "learning_rate": 0.0002862339579924771,
      "loss": 2.9903,
      "step": 118568
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9386731386184692,
      "learning_rate": 0.00028622987174039627,
      "loss": 2.8554,
      "step": 118569
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.2367122173309326,
      "learning_rate": 0.00028622578549087566,
      "loss": 2.9456,
      "step": 118570
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.7172069549560547,
      "learning_rate": 0.0002862216992439159,
      "loss": 3.1358,
      "step": 118571
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4139463901519775,
      "learning_rate": 0.0002862176129995177,
      "loss": 3.0046,
      "step": 118572
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8295918703079224,
      "learning_rate": 0.0002862135267576819,
      "loss": 2.9276,
      "step": 118573
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.9058470726013184,
      "learning_rate": 0.00028620944051840934,
      "loss": 3.0652,
      "step": 118574
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.8873672485351562,
      "learning_rate": 0.00028620535428170075,
      "loss": 3.046,
      "step": 118575
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3148932456970215,
      "learning_rate": 0.00028620126804755673,
      "loss": 2.9639,
      "step": 118576
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.125302791595459,
      "learning_rate": 0.00028619718181597826,
      "loss": 2.9714,
      "step": 118577
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1988484859466553,
      "learning_rate": 0.000286193095586966,
      "loss": 2.906,
      "step": 118578
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8378082513809204,
      "learning_rate": 0.00028618900936052056,
      "loss": 2.9119,
      "step": 118579
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9347623586654663,
      "learning_rate": 0.0002861849231366429,
      "loss": 2.8394,
      "step": 118580
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3425519466400146,
      "learning_rate": 0.0002861808369153338,
      "loss": 3.0484,
      "step": 118581
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9630409479141235,
      "learning_rate": 0.00028617675069659394,
      "loss": 2.9839,
      "step": 118582
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.125115394592285,
      "learning_rate": 0.00028617266448042404,
      "loss": 3.0087,
      "step": 118583
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7433562278747559,
      "learning_rate": 0.0002861685782668249,
      "loss": 3.1486,
      "step": 118584
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1328916549682617,
      "learning_rate": 0.00028616449205579725,
      "loss": 3.1917,
      "step": 118585
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.9529080390930176,
      "learning_rate": 0.00028616040584734194,
      "loss": 2.9696,
      "step": 118586
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0523033142089844,
      "learning_rate": 0.00028615631964145965,
      "loss": 3.0675,
      "step": 118587
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8493859767913818,
      "learning_rate": 0.0002861522334381511,
      "loss": 2.8085,
      "step": 118588
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7741121053695679,
      "learning_rate": 0.0002861481472374173,
      "loss": 2.8651,
      "step": 118589
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.752871513366699,
      "learning_rate": 0.00028614406103925867,
      "loss": 3.0387,
      "step": 118590
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2358524799346924,
      "learning_rate": 0.00028613997484367614,
      "loss": 2.8089,
      "step": 118591
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0418312549591064,
      "learning_rate": 0.00028613588865067044,
      "loss": 3.0053,
      "step": 118592
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.532343864440918,
      "learning_rate": 0.0002861318024602423,
      "loss": 2.9529,
      "step": 118593
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3378970623016357,
      "learning_rate": 0.0002861277162723926,
      "loss": 2.8647,
      "step": 118594
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.6411972045898438,
      "learning_rate": 0.0002861236300871221,
      "loss": 2.8905,
      "step": 118595
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7770342826843262,
      "learning_rate": 0.00028611954390443136,
      "loss": 3.1756,
      "step": 118596
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.7311835289001465,
      "learning_rate": 0.00028611545772432126,
      "loss": 3.3645,
      "step": 118597
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.489969491958618,
      "learning_rate": 0.00028611137154679254,
      "loss": 2.7688,
      "step": 118598
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.5576038360595703,
      "learning_rate": 0.000286107285371846,
      "loss": 3.0952,
      "step": 118599
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0426626205444336,
      "learning_rate": 0.0002861031991994824,
      "loss": 2.7537,
      "step": 118600
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.5913050174713135,
      "learning_rate": 0.0002860991130297025,
      "loss": 3.0741,
      "step": 118601
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.0430145263671875,
      "learning_rate": 0.000286095026862507,
      "loss": 2.7853,
      "step": 118602
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8940556049346924,
      "learning_rate": 0.0002860909406978967,
      "loss": 3.1627,
      "step": 118603
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.9838875532150269,
      "learning_rate": 0.00028608685453587236,
      "loss": 2.9342,
      "step": 118604
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.7738170623779297,
      "learning_rate": 0.0002860827683764347,
      "loss": 2.9833,
      "step": 118605
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8089951276779175,
      "learning_rate": 0.00028607868221958453,
      "loss": 2.9686,
      "step": 118606
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.494596242904663,
      "learning_rate": 0.00028607459606532274,
      "loss": 3.1123,
      "step": 118607
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0916202068328857,
      "learning_rate": 0.0002860705099136498,
      "loss": 2.8709,
      "step": 118608
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0084500312805176,
      "learning_rate": 0.0002860664237645666,
      "loss": 3.1327,
      "step": 118609
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.573010206222534,
      "learning_rate": 0.0002860623376180739,
      "loss": 2.9689,
      "step": 118610
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7575445175170898,
      "learning_rate": 0.00028605825147417254,
      "loss": 2.8466,
      "step": 118611
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.538208484649658,
      "learning_rate": 0.00028605416533286317,
      "loss": 3.144,
      "step": 118612
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.899959921836853,
      "learning_rate": 0.0002860500791941468,
      "loss": 2.7873,
      "step": 118613
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.126725435256958,
      "learning_rate": 0.00028604599305802374,
      "loss": 2.7962,
      "step": 118614
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8715444803237915,
      "learning_rate": 0.00028604190692449504,
      "loss": 3.0373,
      "step": 118615
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0184707641601562,
      "learning_rate": 0.0002860378207935614,
      "loss": 2.9879,
      "step": 118616
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8986161947250366,
      "learning_rate": 0.0002860337346652236,
      "loss": 2.7881,
      "step": 118617
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7371621131896973,
      "learning_rate": 0.0002860296485394824,
      "loss": 3.1136,
      "step": 118618
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3533105850219727,
      "learning_rate": 0.0002860255624163387,
      "loss": 2.9407,
      "step": 118619
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0143117904663086,
      "learning_rate": 0.00028602147629579297,
      "loss": 2.6539,
      "step": 118620
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8290752172470093,
      "learning_rate": 0.0002860173901778461,
      "loss": 2.9675,
      "step": 118621
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.125527858734131,
      "learning_rate": 0.00028601330406249887,
      "loss": 3.1654,
      "step": 118622
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.476959228515625,
      "learning_rate": 0.00028600921794975204,
      "loss": 2.9996,
      "step": 118623
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.809100866317749,
      "learning_rate": 0.0002860051318396063,
      "loss": 2.7657,
      "step": 118624
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6068962812423706,
      "learning_rate": 0.00028600104573206264,
      "loss": 3.0803,
      "step": 118625
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1944847106933594,
      "learning_rate": 0.00028599695962712153,
      "loss": 2.9553,
      "step": 118626
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.84751558303833,
      "learning_rate": 0.00028599287352478386,
      "loss": 2.87,
      "step": 118627
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.701462984085083,
      "learning_rate": 0.0002859887874250504,
      "loss": 3.1102,
      "step": 118628
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0109617710113525,
      "learning_rate": 0.0002859847013279218,
      "loss": 3.0111,
      "step": 118629
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.947893738746643,
      "learning_rate": 0.00028598061523339897,
      "loss": 3.0029,
      "step": 118630
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.454423427581787,
      "learning_rate": 0.00028597652914148276,
      "loss": 2.8051,
      "step": 118631
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.4404242038726807,
      "learning_rate": 0.0002859724430521736,
      "loss": 3.1194,
      "step": 118632
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.3146684169769287,
      "learning_rate": 0.0002859683569654725,
      "loss": 3.0816,
      "step": 118633
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.993752121925354,
      "learning_rate": 0.00028596427088138003,
      "loss": 2.936,
      "step": 118634
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.840734839439392,
      "learning_rate": 0.00028596018479989715,
      "loss": 2.9684,
      "step": 118635
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0607142448425293,
      "learning_rate": 0.00028595609872102454,
      "loss": 3.0755,
      "step": 118636
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.112297534942627,
      "learning_rate": 0.000285952012644763,
      "loss": 2.9101,
      "step": 118637
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.1698880195617676,
      "learning_rate": 0.00028594792657111323,
      "loss": 3.0581,
      "step": 118638
    },
    {
      "epoch": 1.54,
      "grad_norm": 3.091317892074585,
      "learning_rate": 0.00028594384050007593,
      "loss": 2.7038,
      "step": 118639
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8496330976486206,
      "learning_rate": 0.00028593975443165194,
      "loss": 3.1185,
      "step": 118640
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.155327558517456,
      "learning_rate": 0.0002859356683658421,
      "loss": 2.8461,
      "step": 118641
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.44809889793396,
      "learning_rate": 0.000285931582302647,
      "loss": 2.7785,
      "step": 118642
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.0693325996398926,
      "learning_rate": 0.0002859274962420675,
      "loss": 2.9713,
      "step": 118643
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.7464524507522583,
      "learning_rate": 0.00028592341018410445,
      "loss": 3.0754,
      "step": 118644
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2794852256774902,
      "learning_rate": 0.00028591932412875843,
      "loss": 3.1177,
      "step": 118645
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.474287748336792,
      "learning_rate": 0.0002859152380760303,
      "loss": 3.0893,
      "step": 118646
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.235487699508667,
      "learning_rate": 0.0002859111520259207,
      "loss": 3.0927,
      "step": 118647
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.6800462007522583,
      "learning_rate": 0.0002859070659784305,
      "loss": 3.3006,
      "step": 118648
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.2191104888916016,
      "learning_rate": 0.00028590297993356045,
      "loss": 3.1321,
      "step": 118649
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.245757818222046,
      "learning_rate": 0.00028589889389131143,
      "loss": 3.1435,
      "step": 118650
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.8865547180175781,
      "learning_rate": 0.00028589480785168396,
      "loss": 2.9939,
      "step": 118651
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.845999002456665,
      "learning_rate": 0.00028589072181467895,
      "loss": 2.9583,
      "step": 118652
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.8549771308898926,
      "learning_rate": 0.00028588663578029707,
      "loss": 2.9961,
      "step": 118653
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.7487897872924805,
      "learning_rate": 0.00028588254974853917,
      "loss": 2.9285,
      "step": 118654
    },
    {
      "epoch": 1.54,
      "grad_norm": 1.756827473640442,
      "learning_rate": 0.0002858784637194059,
      "loss": 2.9507,
      "step": 118655
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.836038589477539,
      "learning_rate": 0.0002858743776928982,
      "loss": 2.8989,
      "step": 118656
    },
    {
      "epoch": 1.54,
      "grad_norm": 2.9014692306518555,
      "learning_rate": 0.00028587029166901666,
      "loss": 2.8453,
      "step": 118657
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7766953706741333,
      "learning_rate": 0.00028586620564776213,
      "loss": 2.8747,
      "step": 118658
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2386114597320557,
      "learning_rate": 0.00028586211962913533,
      "loss": 2.7605,
      "step": 118659
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.1872382164001465,
      "learning_rate": 0.000285858033613137,
      "loss": 2.7624,
      "step": 118660
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.825537919998169,
      "learning_rate": 0.00028585394759976794,
      "loss": 3.0974,
      "step": 118661
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.8878567218780518,
      "learning_rate": 0.00028584986158902893,
      "loss": 3.1402,
      "step": 118662
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.155729055404663,
      "learning_rate": 0.0002858457755809207,
      "loss": 2.8594,
      "step": 118663
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1881821155548096,
      "learning_rate": 0.00028584168957544395,
      "loss": 3.1287,
      "step": 118664
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.7241384983062744,
      "learning_rate": 0.0002858376035725995,
      "loss": 3.147,
      "step": 118665
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9370098114013672,
      "learning_rate": 0.00028583351757238815,
      "loss": 3.1492,
      "step": 118666
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6334455013275146,
      "learning_rate": 0.00028582943157481064,
      "loss": 3.154,
      "step": 118667
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8132482767105103,
      "learning_rate": 0.0002858253455798677,
      "loss": 2.9511,
      "step": 118668
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8308974504470825,
      "learning_rate": 0.00028582125958756,
      "loss": 2.9599,
      "step": 118669
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0584614276885986,
      "learning_rate": 0.00028581717359788846,
      "loss": 2.8037,
      "step": 118670
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8631963729858398,
      "learning_rate": 0.0002858130876108538,
      "loss": 2.983,
      "step": 118671
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.5036041736602783,
      "learning_rate": 0.00028580900162645666,
      "loss": 2.9258,
      "step": 118672
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1848113536834717,
      "learning_rate": 0.00028580491564469794,
      "loss": 2.9151,
      "step": 118673
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.5242490768432617,
      "learning_rate": 0.0002858008296655785,
      "loss": 3.0724,
      "step": 118674
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.7493982315063477,
      "learning_rate": 0.0002857967436890988,
      "loss": 2.8611,
      "step": 118675
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9514687061309814,
      "learning_rate": 0.0002857926577152598,
      "loss": 2.8787,
      "step": 118676
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8531161546707153,
      "learning_rate": 0.0002857885717440622,
      "loss": 2.8655,
      "step": 118677
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9428138732910156,
      "learning_rate": 0.0002857844857755067,
      "loss": 3.1375,
      "step": 118678
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.473782539367676,
      "learning_rate": 0.0002857803998095942,
      "loss": 3.0918,
      "step": 118679
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.624518632888794,
      "learning_rate": 0.0002857763138463255,
      "loss": 3.1957,
      "step": 118680
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.5033390522003174,
      "learning_rate": 0.00028577222788570114,
      "loss": 2.8294,
      "step": 118681
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.069704532623291,
      "learning_rate": 0.00028576814192772195,
      "loss": 3.1236,
      "step": 118682
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.5812864303588867,
      "learning_rate": 0.0002857640559723888,
      "loss": 2.9384,
      "step": 118683
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.6998703479766846,
      "learning_rate": 0.0002857599700197023,
      "loss": 2.8455,
      "step": 118684
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7204666137695312,
      "learning_rate": 0.00028575588406966335,
      "loss": 3.009,
      "step": 118685
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.7593140602111816,
      "learning_rate": 0.0002857517981222728,
      "loss": 3.063,
      "step": 118686
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.6082234382629395,
      "learning_rate": 0.0002857477121775311,
      "loss": 2.8584,
      "step": 118687
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2191812992095947,
      "learning_rate": 0.00028574362623543913,
      "loss": 2.8597,
      "step": 118688
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1520230770111084,
      "learning_rate": 0.0002857395402959977,
      "loss": 2.9764,
      "step": 118689
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.279198169708252,
      "learning_rate": 0.0002857354543592076,
      "loss": 2.8613,
      "step": 118690
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.51896595954895,
      "learning_rate": 0.00028573136842506954,
      "loss": 2.9609,
      "step": 118691
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.925661563873291,
      "learning_rate": 0.0002857272824935844,
      "loss": 3.1781,
      "step": 118692
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4617812633514404,
      "learning_rate": 0.0002857231965647527,
      "loss": 2.9619,
      "step": 118693
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.9733400344848633,
      "learning_rate": 0.0002857191106385753,
      "loss": 2.8477,
      "step": 118694
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.3377606868743896,
      "learning_rate": 0.00028571502471505305,
      "loss": 2.837,
      "step": 118695
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.071836471557617,
      "learning_rate": 0.0002857109387941866,
      "loss": 2.8325,
      "step": 118696
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.7714030742645264,
      "learning_rate": 0.00028570685287597676,
      "loss": 2.9159,
      "step": 118697
    },
    {
      "epoch": 1.55,
      "grad_norm": 4.595567226409912,
      "learning_rate": 0.00028570276696042446,
      "loss": 3.0489,
      "step": 118698
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2268972396850586,
      "learning_rate": 0.0002856986810475301,
      "loss": 2.7734,
      "step": 118699
    },
    {
      "epoch": 1.55,
      "grad_norm": 4.177384853363037,
      "learning_rate": 0.00028569459513729466,
      "loss": 2.6532,
      "step": 118700
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.8449320793151855,
      "learning_rate": 0.00028569050922971884,
      "loss": 3.0598,
      "step": 118701
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.60229229927063,
      "learning_rate": 0.00028568642332480344,
      "loss": 2.7176,
      "step": 118702
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.788938283920288,
      "learning_rate": 0.0002856823374225492,
      "loss": 3.027,
      "step": 118703
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.6192283630371094,
      "learning_rate": 0.000285678251522957,
      "loss": 3.1021,
      "step": 118704
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.8712568283081055,
      "learning_rate": 0.00028567416562602735,
      "loss": 3.3411,
      "step": 118705
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.058652639389038,
      "learning_rate": 0.0002856700797317612,
      "loss": 3.2042,
      "step": 118706
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.194471836090088,
      "learning_rate": 0.0002856659938401592,
      "loss": 3.2745,
      "step": 118707
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.047746181488037,
      "learning_rate": 0.00028566190795122217,
      "loss": 3.1092,
      "step": 118708
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.772116184234619,
      "learning_rate": 0.00028565782206495087,
      "loss": 3.095,
      "step": 118709
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.2838263511657715,
      "learning_rate": 0.00028565373618134605,
      "loss": 2.8741,
      "step": 118710
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.308349132537842,
      "learning_rate": 0.0002856496503004086,
      "loss": 2.7278,
      "step": 118711
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9563299417495728,
      "learning_rate": 0.000285645564422139,
      "loss": 3.1368,
      "step": 118712
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2135512828826904,
      "learning_rate": 0.00028564147854653817,
      "loss": 2.9875,
      "step": 118713
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0147907733917236,
      "learning_rate": 0.0002856373926736069,
      "loss": 2.9424,
      "step": 118714
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7081533670425415,
      "learning_rate": 0.0002856333068033459,
      "loss": 2.9749,
      "step": 118715
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0141677856445312,
      "learning_rate": 0.0002856292209357559,
      "loss": 3.0569,
      "step": 118716
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.925558567047119,
      "learning_rate": 0.00028562513507083783,
      "loss": 2.9061,
      "step": 118717
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.7119157314300537,
      "learning_rate": 0.00028562104920859223,
      "loss": 2.9519,
      "step": 118718
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9082783460617065,
      "learning_rate": 0.0002856169633490199,
      "loss": 2.7914,
      "step": 118719
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.280413866043091,
      "learning_rate": 0.0002856128774921217,
      "loss": 3.3342,
      "step": 118720
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.3072774410247803,
      "learning_rate": 0.0002856087916378983,
      "loss": 3.0754,
      "step": 118721
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.038106679916382,
      "learning_rate": 0.00028560470578635054,
      "loss": 2.9122,
      "step": 118722
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8088821172714233,
      "learning_rate": 0.00028560061993747923,
      "loss": 3.2607,
      "step": 118723
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0397844314575195,
      "learning_rate": 0.0002855965340912849,
      "loss": 3.1588,
      "step": 118724
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2925915718078613,
      "learning_rate": 0.0002855924482477684,
      "loss": 2.7746,
      "step": 118725
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.381838798522949,
      "learning_rate": 0.00028558836240693067,
      "loss": 2.9566,
      "step": 118726
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.921784520149231,
      "learning_rate": 0.00028558427656877225,
      "loss": 3.1974,
      "step": 118727
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.13043475151062,
      "learning_rate": 0.00028558019073329397,
      "loss": 2.9313,
      "step": 118728
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0368947982788086,
      "learning_rate": 0.00028557610490049677,
      "loss": 3.0211,
      "step": 118729
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8429473638534546,
      "learning_rate": 0.00028557201907038107,
      "loss": 3.0953,
      "step": 118730
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8319880962371826,
      "learning_rate": 0.00028556793324294784,
      "loss": 2.7569,
      "step": 118731
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2067172527313232,
      "learning_rate": 0.00028556384741819786,
      "loss": 3.0071,
      "step": 118732
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4350929260253906,
      "learning_rate": 0.00028555976159613176,
      "loss": 3.1011,
      "step": 118733
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.779431700706482,
      "learning_rate": 0.00028555567577675037,
      "loss": 2.8133,
      "step": 118734
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9372864961624146,
      "learning_rate": 0.00028555158996005454,
      "loss": 3.328,
      "step": 118735
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.149512767791748,
      "learning_rate": 0.0002855475041460449,
      "loss": 3.0401,
      "step": 118736
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9257993698120117,
      "learning_rate": 0.00028554341833472225,
      "loss": 3.0402,
      "step": 118737
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8491506576538086,
      "learning_rate": 0.0002855393325260873,
      "loss": 2.9667,
      "step": 118738
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8405351638793945,
      "learning_rate": 0.0002855352467201409,
      "loss": 3.1743,
      "step": 118739
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1854360103607178,
      "learning_rate": 0.00028553116091688377,
      "loss": 3.1055,
      "step": 118740
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.82709801197052,
      "learning_rate": 0.00028552707511631675,
      "loss": 3.1866,
      "step": 118741
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9857431650161743,
      "learning_rate": 0.0002855229893184404,
      "loss": 3.0942,
      "step": 118742
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9333624839782715,
      "learning_rate": 0.00028551890352325563,
      "loss": 2.76,
      "step": 118743
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8092265129089355,
      "learning_rate": 0.00028551481773076326,
      "loss": 3.2157,
      "step": 118744
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.941296100616455,
      "learning_rate": 0.00028551073194096383,
      "loss": 2.9716,
      "step": 118745
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1749370098114014,
      "learning_rate": 0.0002855066461538583,
      "loss": 2.874,
      "step": 118746
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.383390426635742,
      "learning_rate": 0.0002855025603694474,
      "loss": 2.7759,
      "step": 118747
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0552074909210205,
      "learning_rate": 0.00028549847458773177,
      "loss": 2.9725,
      "step": 118748
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8739312887191772,
      "learning_rate": 0.00028549438880871224,
      "loss": 3.0083,
      "step": 118749
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.8796911239624023,
      "learning_rate": 0.00028549030303238955,
      "loss": 2.7182,
      "step": 118750
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7950931787490845,
      "learning_rate": 0.0002854862172587646,
      "loss": 3.0539,
      "step": 118751
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9737480878829956,
      "learning_rate": 0.0002854821314878379,
      "loss": 2.8968,
      "step": 118752
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9588643312454224,
      "learning_rate": 0.00028547804571961053,
      "loss": 3.171,
      "step": 118753
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9807767868041992,
      "learning_rate": 0.0002854739599540829,
      "loss": 3.1497,
      "step": 118754
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.091879367828369,
      "learning_rate": 0.000285469874191256,
      "loss": 2.8006,
      "step": 118755
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.9974753856658936,
      "learning_rate": 0.0002854657884311305,
      "loss": 2.9633,
      "step": 118756
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9094246625900269,
      "learning_rate": 0.0002854617026737072,
      "loss": 3.1095,
      "step": 118757
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.766598701477051,
      "learning_rate": 0.00028545761691898684,
      "loss": 3.2475,
      "step": 118758
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.4667794704437256,
      "learning_rate": 0.0002854535311669703,
      "loss": 2.9862,
      "step": 118759
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.516421318054199,
      "learning_rate": 0.00028544944541765806,
      "loss": 2.9236,
      "step": 118760
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9800890684127808,
      "learning_rate": 0.0002854453596710511,
      "loss": 2.9385,
      "step": 118761
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.3400537967681885,
      "learning_rate": 0.0002854412739271501,
      "loss": 3.0733,
      "step": 118762
    },
    {
      "epoch": 1.55,
      "grad_norm": 6.437040328979492,
      "learning_rate": 0.0002854371881859558,
      "loss": 2.8309,
      "step": 118763
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7071611881256104,
      "learning_rate": 0.00028543310244746905,
      "loss": 2.8436,
      "step": 118764
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.8326404094696045,
      "learning_rate": 0.00028542901671169065,
      "loss": 3.1496,
      "step": 118765
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.7327749729156494,
      "learning_rate": 0.0002854249309786212,
      "loss": 3.1083,
      "step": 118766
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.918674111366272,
      "learning_rate": 0.0002854208452482615,
      "loss": 3.0585,
      "step": 118767
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6980558633804321,
      "learning_rate": 0.00028541675952061234,
      "loss": 2.8447,
      "step": 118768
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1416988372802734,
      "learning_rate": 0.0002854126737956745,
      "loss": 3.0365,
      "step": 118769
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.9437255859375,
      "learning_rate": 0.00028540858807344873,
      "loss": 3.0138,
      "step": 118770
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8489985466003418,
      "learning_rate": 0.00028540450235393585,
      "loss": 2.8346,
      "step": 118771
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7056158781051636,
      "learning_rate": 0.00028540041663713646,
      "loss": 3.0545,
      "step": 118772
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6536283493041992,
      "learning_rate": 0.0002853963309230514,
      "loss": 3.0539,
      "step": 118773
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8863894939422607,
      "learning_rate": 0.0002853922452116814,
      "loss": 3.0875,
      "step": 118774
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2841622829437256,
      "learning_rate": 0.00028538815950302735,
      "loss": 3.0169,
      "step": 118775
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8504472970962524,
      "learning_rate": 0.0002853840737970898,
      "loss": 2.9942,
      "step": 118776
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9387483596801758,
      "learning_rate": 0.0002853799880938697,
      "loss": 3.0107,
      "step": 118777
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.065429210662842,
      "learning_rate": 0.00028537590239336786,
      "loss": 3.3228,
      "step": 118778
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9915461540222168,
      "learning_rate": 0.0002853718166955848,
      "loss": 2.7547,
      "step": 118779
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.133758544921875,
      "learning_rate": 0.00028536773100052137,
      "loss": 2.8233,
      "step": 118780
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.719663143157959,
      "learning_rate": 0.0002853636453081784,
      "loss": 2.9295,
      "step": 118781
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8569231033325195,
      "learning_rate": 0.0002853595596185565,
      "loss": 2.8934,
      "step": 118782
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.9441912174224854,
      "learning_rate": 0.0002853554739316566,
      "loss": 2.9407,
      "step": 118783
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.867533802986145,
      "learning_rate": 0.0002853513882474795,
      "loss": 3.0423,
      "step": 118784
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.072558879852295,
      "learning_rate": 0.00028534730256602576,
      "loss": 3.1687,
      "step": 118785
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.066725015640259,
      "learning_rate": 0.00028534321688729623,
      "loss": 3.1144,
      "step": 118786
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4085755348205566,
      "learning_rate": 0.0002853391312112917,
      "loss": 2.944,
      "step": 118787
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0291123390197754,
      "learning_rate": 0.00028533504553801287,
      "loss": 2.9849,
      "step": 118788
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7754275798797607,
      "learning_rate": 0.0002853309598674605,
      "loss": 2.9223,
      "step": 118789
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.315638303756714,
      "learning_rate": 0.00028532687419963556,
      "loss": 3.0201,
      "step": 118790
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7622339725494385,
      "learning_rate": 0.00028532278853453846,
      "loss": 3.1024,
      "step": 118791
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.770387053489685,
      "learning_rate": 0.00028531870287217014,
      "loss": 2.9641,
      "step": 118792
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9089726209640503,
      "learning_rate": 0.00028531461721253135,
      "loss": 3.1177,
      "step": 118793
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.796883225440979,
      "learning_rate": 0.00028531053155562286,
      "loss": 2.9433,
      "step": 118794
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9557747840881348,
      "learning_rate": 0.0002853064459014454,
      "loss": 2.9444,
      "step": 118795
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.3668625354766846,
      "learning_rate": 0.0002853023602499999,
      "loss": 2.7554,
      "step": 118796
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.044100284576416,
      "learning_rate": 0.0002852982746012869,
      "loss": 2.907,
      "step": 118797
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0369749069213867,
      "learning_rate": 0.00028529418895530714,
      "loss": 2.9906,
      "step": 118798
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.3956034183502197,
      "learning_rate": 0.0002852901033120615,
      "loss": 2.9207,
      "step": 118799
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1166183948516846,
      "learning_rate": 0.0002852860176715507,
      "loss": 2.9092,
      "step": 118800
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9019412994384766,
      "learning_rate": 0.0002852819320337755,
      "loss": 2.9493,
      "step": 118801
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.3640668392181396,
      "learning_rate": 0.00028527784639873676,
      "loss": 2.9697,
      "step": 118802
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8061137199401855,
      "learning_rate": 0.00028527376076643504,
      "loss": 2.9883,
      "step": 118803
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.498934030532837,
      "learning_rate": 0.00028526967513687126,
      "loss": 2.8871,
      "step": 118804
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.145756483078003,
      "learning_rate": 0.0002852655895100461,
      "loss": 3.1054,
      "step": 118805
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1562306880950928,
      "learning_rate": 0.00028526150388596033,
      "loss": 3.0406,
      "step": 118806
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9761556386947632,
      "learning_rate": 0.0002852574182646147,
      "loss": 2.965,
      "step": 118807
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.5478954315185547,
      "learning_rate": 0.0002852533326460102,
      "loss": 2.9567,
      "step": 118808
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.114952802658081,
      "learning_rate": 0.0002852492470301472,
      "loss": 2.826,
      "step": 118809
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6852352619171143,
      "learning_rate": 0.00028524516141702666,
      "loss": 2.9362,
      "step": 118810
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.91865873336792,
      "learning_rate": 0.0002852410758066493,
      "loss": 2.8678,
      "step": 118811
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.476320266723633,
      "learning_rate": 0.0002852369901990159,
      "loss": 3.0987,
      "step": 118812
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1282522678375244,
      "learning_rate": 0.00028523290459412726,
      "loss": 3.1965,
      "step": 118813
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.061525583267212,
      "learning_rate": 0.0002852288189919842,
      "loss": 3.0369,
      "step": 118814
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.5843303203582764,
      "learning_rate": 0.00028522473339258727,
      "loss": 2.9084,
      "step": 118815
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.799340009689331,
      "learning_rate": 0.0002852206477959373,
      "loss": 3.1594,
      "step": 118816
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.149524211883545,
      "learning_rate": 0.00028521656220203517,
      "loss": 3.0009,
      "step": 118817
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.008700132369995,
      "learning_rate": 0.00028521247661088153,
      "loss": 3.1251,
      "step": 118818
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.021604537963867,
      "learning_rate": 0.0002852083910224771,
      "loss": 2.916,
      "step": 118819
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1479885578155518,
      "learning_rate": 0.00028520430543682286,
      "loss": 2.9712,
      "step": 118820
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1618082523345947,
      "learning_rate": 0.00028520021985391935,
      "loss": 3.123,
      "step": 118821
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8485175371170044,
      "learning_rate": 0.00028519613427376744,
      "loss": 2.9184,
      "step": 118822
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9377983808517456,
      "learning_rate": 0.00028519204869636777,
      "loss": 2.7801,
      "step": 118823
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9417221546173096,
      "learning_rate": 0.00028518796312172116,
      "loss": 3.1452,
      "step": 118824
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6681208610534668,
      "learning_rate": 0.00028518387754982843,
      "loss": 2.9302,
      "step": 118825
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7561333179473877,
      "learning_rate": 0.0002851797919806903,
      "loss": 2.948,
      "step": 118826
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8024935722351074,
      "learning_rate": 0.00028517570641430756,
      "loss": 3.0463,
      "step": 118827
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0034282207489014,
      "learning_rate": 0.0002851716208506809,
      "loss": 2.7671,
      "step": 118828
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7767345905303955,
      "learning_rate": 0.00028516753528981114,
      "loss": 3.08,
      "step": 118829
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.15309739112854,
      "learning_rate": 0.000285163449731699,
      "loss": 2.9222,
      "step": 118830
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.5973564386367798,
      "learning_rate": 0.00028515936417634517,
      "loss": 2.8029,
      "step": 118831
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8342400789260864,
      "learning_rate": 0.00028515527862375063,
      "loss": 3.0843,
      "step": 118832
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9432653188705444,
      "learning_rate": 0.00028515119307391594,
      "loss": 2.9525,
      "step": 118833
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.5475053787231445,
      "learning_rate": 0.00028514710752684195,
      "loss": 3.0592,
      "step": 118834
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8509591817855835,
      "learning_rate": 0.0002851430219825293,
      "loss": 2.8257,
      "step": 118835
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.622925043106079,
      "learning_rate": 0.00028513893644097895,
      "loss": 3.1177,
      "step": 118836
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.9855599403381348,
      "learning_rate": 0.0002851348509021915,
      "loss": 3.0726,
      "step": 118837
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.726044178009033,
      "learning_rate": 0.0002851307653661679,
      "loss": 3.1416,
      "step": 118838
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7929366827011108,
      "learning_rate": 0.0002851266798329086,
      "loss": 3.1154,
      "step": 118839
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.6922385692596436,
      "learning_rate": 0.00028512259430241453,
      "loss": 3.0842,
      "step": 118840
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0003538131713867,
      "learning_rate": 0.0002851185087746865,
      "loss": 2.9444,
      "step": 118841
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.8912971019744873,
      "learning_rate": 0.0002851144232497252,
      "loss": 3.2252,
      "step": 118842
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8234130144119263,
      "learning_rate": 0.0002851103377275315,
      "loss": 2.8575,
      "step": 118843
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0130815505981445,
      "learning_rate": 0.00028510625220810594,
      "loss": 2.9287,
      "step": 118844
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.041865825653076,
      "learning_rate": 0.00028510216669144957,
      "loss": 3.3381,
      "step": 118845
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.470231771469116,
      "learning_rate": 0.00028509808117756284,
      "loss": 3.3631,
      "step": 118846
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1443326473236084,
      "learning_rate": 0.0002850939956664467,
      "loss": 3.16,
      "step": 118847
    },
    {
      "epoch": 1.55,
      "grad_norm": 4.2977399826049805,
      "learning_rate": 0.0002850899101581018,
      "loss": 2.7737,
      "step": 118848
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.9887938499450684,
      "learning_rate": 0.00028508582465252907,
      "loss": 2.7769,
      "step": 118849
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9262949228286743,
      "learning_rate": 0.0002850817391497292,
      "loss": 2.7153,
      "step": 118850
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9903544187545776,
      "learning_rate": 0.00028507765364970284,
      "loss": 3.0627,
      "step": 118851
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8623214960098267,
      "learning_rate": 0.00028507356815245087,
      "loss": 2.8992,
      "step": 118852
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.5928795337677002,
      "learning_rate": 0.0002850694826579739,
      "loss": 2.9284,
      "step": 118853
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7831116914749146,
      "learning_rate": 0.0002850653971662729,
      "loss": 2.9845,
      "step": 118854
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.0347156524658203,
      "learning_rate": 0.00028506131167734845,
      "loss": 2.8603,
      "step": 118855
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8581340312957764,
      "learning_rate": 0.0002850572261912014,
      "loss": 3.2162,
      "step": 118856
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8705041408538818,
      "learning_rate": 0.00028505314070783266,
      "loss": 3.0515,
      "step": 118857
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9537899494171143,
      "learning_rate": 0.00028504905522724265,
      "loss": 3.2807,
      "step": 118858
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.295975923538208,
      "learning_rate": 0.0002850449697494323,
      "loss": 2.9485,
      "step": 118859
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.6729092597961426,
      "learning_rate": 0.0002850408842744024,
      "loss": 2.8688,
      "step": 118860
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.076298475265503,
      "learning_rate": 0.0002850367988021537,
      "loss": 2.9021,
      "step": 118861
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0005974769592285,
      "learning_rate": 0.0002850327133326869,
      "loss": 3.1846,
      "step": 118862
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.195674180984497,
      "learning_rate": 0.00028502862786600295,
      "loss": 2.9913,
      "step": 118863
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7222175598144531,
      "learning_rate": 0.00028502454240210233,
      "loss": 2.6958,
      "step": 118864
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.482619285583496,
      "learning_rate": 0.00028502045694098595,
      "loss": 3.0283,
      "step": 118865
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9738553762435913,
      "learning_rate": 0.00028501637148265456,
      "loss": 3.0234,
      "step": 118866
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.194774627685547,
      "learning_rate": 0.00028501228602710886,
      "loss": 2.9598,
      "step": 118867
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6500102281570435,
      "learning_rate": 0.00028500820057434967,
      "loss": 2.8964,
      "step": 118868
    },
    {
      "epoch": 1.55,
      "grad_norm": 5.71704626083374,
      "learning_rate": 0.0002850041151243779,
      "loss": 2.9119,
      "step": 118869
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0645151138305664,
      "learning_rate": 0.00028500002967719396,
      "loss": 3.3783,
      "step": 118870
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.202845811843872,
      "learning_rate": 0.00028499594423279885,
      "loss": 2.761,
      "step": 118871
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.024714231491089,
      "learning_rate": 0.0002849918587911932,
      "loss": 3.1143,
      "step": 118872
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.9795117378234863,
      "learning_rate": 0.00028498777335237796,
      "loss": 2.941,
      "step": 118873
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.8163013458251953,
      "learning_rate": 0.0002849836879163537,
      "loss": 3.1968,
      "step": 118874
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7633620500564575,
      "learning_rate": 0.0002849796024831214,
      "loss": 2.9952,
      "step": 118875
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.284403085708618,
      "learning_rate": 0.00028497551705268157,
      "loss": 2.8059,
      "step": 118876
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.0753016471862793,
      "learning_rate": 0.00028497143162503505,
      "loss": 2.8896,
      "step": 118877
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.5378570556640625,
      "learning_rate": 0.00028496734620018263,
      "loss": 2.8755,
      "step": 118878
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1484134197235107,
      "learning_rate": 0.00028496326077812506,
      "loss": 3.0651,
      "step": 118879
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.09352445602417,
      "learning_rate": 0.0002849591753588631,
      "loss": 2.9156,
      "step": 118880
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9907689094543457,
      "learning_rate": 0.00028495508994239764,
      "loss": 3.0257,
      "step": 118881
    },
    {
      "epoch": 1.55,
      "grad_norm": 5.462893486022949,
      "learning_rate": 0.00028495100452872926,
      "loss": 2.7415,
      "step": 118882
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.509770393371582,
      "learning_rate": 0.0002849469191178587,
      "loss": 3.1521,
      "step": 118883
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.412245273590088,
      "learning_rate": 0.0002849428337097868,
      "loss": 2.9338,
      "step": 118884
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1529428958892822,
      "learning_rate": 0.00028493874830451426,
      "loss": 2.9634,
      "step": 118885
    },
    {
      "epoch": 1.55,
      "grad_norm": 4.118352890014648,
      "learning_rate": 0.000284934662902042,
      "loss": 2.8908,
      "step": 118886
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.6805999279022217,
      "learning_rate": 0.0002849305775023707,
      "loss": 3.135,
      "step": 118887
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.708527088165283,
      "learning_rate": 0.000284926492105501,
      "loss": 3.0014,
      "step": 118888
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.931735873222351,
      "learning_rate": 0.0002849224067114338,
      "loss": 3.0982,
      "step": 118889
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.5948381423950195,
      "learning_rate": 0.00028491832132016974,
      "loss": 3.1023,
      "step": 118890
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.6237497329711914,
      "learning_rate": 0.00028491423593170965,
      "loss": 2.948,
      "step": 118891
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.3840813636779785,
      "learning_rate": 0.00028491015054605434,
      "loss": 2.976,
      "step": 118892
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.2184979915618896,
      "learning_rate": 0.0002849060651632046,
      "loss": 3.0163,
      "step": 118893
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9078301191329956,
      "learning_rate": 0.00028490197978316096,
      "loss": 3.257,
      "step": 118894
    },
    {
      "epoch": 1.55,
      "grad_norm": 4.825757026672363,
      "learning_rate": 0.0002848978944059244,
      "loss": 2.9081,
      "step": 118895
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.9815759658813477,
      "learning_rate": 0.00028489380903149554,
      "loss": 3.0761,
      "step": 118896
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.742438316345215,
      "learning_rate": 0.00028488972365987527,
      "loss": 3.2292,
      "step": 118897
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7759584188461304,
      "learning_rate": 0.00028488563829106423,
      "loss": 3.062,
      "step": 118898
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.8026440143585205,
      "learning_rate": 0.0002848815529250634,
      "loss": 2.8133,
      "step": 118899
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8115899562835693,
      "learning_rate": 0.00028487746756187323,
      "loss": 3.0307,
      "step": 118900
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.5266098976135254,
      "learning_rate": 0.0002848733822014946,
      "loss": 2.9989,
      "step": 118901
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.3372488021850586,
      "learning_rate": 0.00028486929684392833,
      "loss": 2.7238,
      "step": 118902
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6700519323349,
      "learning_rate": 0.0002848652114891751,
      "loss": 2.9933,
      "step": 118903
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.184774398803711,
      "learning_rate": 0.00028486112613723577,
      "loss": 3.0654,
      "step": 118904
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8028563261032104,
      "learning_rate": 0.000284857040788111,
      "loss": 2.9617,
      "step": 118905
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.915785312652588,
      "learning_rate": 0.0002848529554418018,
      "loss": 2.795,
      "step": 118906
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0825860500335693,
      "learning_rate": 0.0002848488700983085,
      "loss": 2.814,
      "step": 118907
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.807692527770996,
      "learning_rate": 0.00028484478475763206,
      "loss": 3.1428,
      "step": 118908
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0130512714385986,
      "learning_rate": 0.00028484069941977336,
      "loss": 2.903,
      "step": 118909
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0110814571380615,
      "learning_rate": 0.000284836614084733,
      "loss": 3.0324,
      "step": 118910
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7792598009109497,
      "learning_rate": 0.0002848325287525118,
      "loss": 3.1315,
      "step": 118911
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.561094045639038,
      "learning_rate": 0.0002848284434231106,
      "loss": 2.9607,
      "step": 118912
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.01065993309021,
      "learning_rate": 0.00028482435809653006,
      "loss": 2.7121,
      "step": 118913
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1464076042175293,
      "learning_rate": 0.0002848202727727709,
      "loss": 3.0613,
      "step": 118914
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.020138740539551,
      "learning_rate": 0.00028481618745183397,
      "loss": 2.9279,
      "step": 118915
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.357905149459839,
      "learning_rate": 0.00028481210213372,
      "loss": 2.9442,
      "step": 118916
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.162238359451294,
      "learning_rate": 0.0002848080168184297,
      "loss": 2.8809,
      "step": 118917
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.133119821548462,
      "learning_rate": 0.0002848039315059639,
      "loss": 2.8727,
      "step": 118918
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0838780403137207,
      "learning_rate": 0.0002847998461963234,
      "loss": 3.082,
      "step": 118919
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.6255483627319336,
      "learning_rate": 0.0002847957608895089,
      "loss": 2.7281,
      "step": 118920
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9046255350112915,
      "learning_rate": 0.00028479167558552107,
      "loss": 3.1387,
      "step": 118921
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9970253705978394,
      "learning_rate": 0.0002847875902843608,
      "loss": 2.7332,
      "step": 118922
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.8564767837524414,
      "learning_rate": 0.00028478350498602873,
      "loss": 3.0052,
      "step": 118923
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4328715801239014,
      "learning_rate": 0.0002847794196905258,
      "loss": 2.9868,
      "step": 118924
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0411884784698486,
      "learning_rate": 0.0002847753343978526,
      "loss": 2.7584,
      "step": 118925
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.67414128780365,
      "learning_rate": 0.00028477124910800995,
      "loss": 3.2671,
      "step": 118926
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9493250846862793,
      "learning_rate": 0.0002847671638209986,
      "loss": 3.0083,
      "step": 118927
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.54669451713562,
      "learning_rate": 0.0002847630785368194,
      "loss": 2.8672,
      "step": 118928
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.593539237976074,
      "learning_rate": 0.00028475899325547296,
      "loss": 2.9076,
      "step": 118929
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9164384603500366,
      "learning_rate": 0.0002847549079769602,
      "loss": 2.8868,
      "step": 118930
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1156320571899414,
      "learning_rate": 0.0002847508227012817,
      "loss": 3.0133,
      "step": 118931
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2392451763153076,
      "learning_rate": 0.0002847467374284383,
      "loss": 2.8424,
      "step": 118932
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.5226759910583496,
      "learning_rate": 0.0002847426521584308,
      "loss": 3.279,
      "step": 118933
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9359866380691528,
      "learning_rate": 0.00028473856689125994,
      "loss": 3.1434,
      "step": 118934
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0940170288085938,
      "learning_rate": 0.00028473448162692647,
      "loss": 3.0191,
      "step": 118935
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.368133783340454,
      "learning_rate": 0.0002847303963654312,
      "loss": 2.8338,
      "step": 118936
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8331557512283325,
      "learning_rate": 0.00028472631110677474,
      "loss": 2.9245,
      "step": 118937
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2737839221954346,
      "learning_rate": 0.000284722225850958,
      "loss": 3.0238,
      "step": 118938
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.5205671787261963,
      "learning_rate": 0.00028471814059798166,
      "loss": 3.12,
      "step": 118939
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2636682987213135,
      "learning_rate": 0.00028471405534784646,
      "loss": 2.9,
      "step": 118940
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4695870876312256,
      "learning_rate": 0.00028470997010055323,
      "loss": 2.8336,
      "step": 118941
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.3677308559417725,
      "learning_rate": 0.0002847058848561029,
      "loss": 2.8655,
      "step": 118942
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8393969535827637,
      "learning_rate": 0.0002847017996144958,
      "loss": 2.8726,
      "step": 118943
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0850186347961426,
      "learning_rate": 0.00028469771437573304,
      "loss": 3.1565,
      "step": 118944
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0598301887512207,
      "learning_rate": 0.00028469362913981515,
      "loss": 3.0871,
      "step": 118945
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9471174478530884,
      "learning_rate": 0.0002846895439067431,
      "loss": 2.8795,
      "step": 118946
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.793084740638733,
      "learning_rate": 0.0002846854586765175,
      "loss": 2.7483,
      "step": 118947
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7971713542938232,
      "learning_rate": 0.0002846813734491393,
      "loss": 3.2059,
      "step": 118948
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.758730411529541,
      "learning_rate": 0.000284677288224609,
      "loss": 3.1525,
      "step": 118949
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8595389127731323,
      "learning_rate": 0.00028467320300292745,
      "loss": 3.1475,
      "step": 118950
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6630388498306274,
      "learning_rate": 0.00028466911778409545,
      "loss": 3.0748,
      "step": 118951
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.204458713531494,
      "learning_rate": 0.00028466503256811376,
      "loss": 2.9326,
      "step": 118952
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.23880672454834,
      "learning_rate": 0.00028466094735498315,
      "loss": 3.1404,
      "step": 118953
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7395607233047485,
      "learning_rate": 0.0002846568621447045,
      "loss": 3.0756,
      "step": 118954
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9209553003311157,
      "learning_rate": 0.0002846527769372783,
      "loss": 2.9127,
      "step": 118955
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.785544514656067,
      "learning_rate": 0.0002846486917327054,
      "loss": 2.7716,
      "step": 118956
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6864229440689087,
      "learning_rate": 0.0002846446065309866,
      "loss": 3.0452,
      "step": 118957
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.311065196990967,
      "learning_rate": 0.00028464052133212267,
      "loss": 3.0678,
      "step": 118958
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.039109468460083,
      "learning_rate": 0.0002846364361361143,
      "loss": 2.9667,
      "step": 118959
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.115082025527954,
      "learning_rate": 0.00028463235094296256,
      "loss": 2.8966,
      "step": 118960
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.258814573287964,
      "learning_rate": 0.00028462826575266774,
      "loss": 2.8341,
      "step": 118961
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8990775346755981,
      "learning_rate": 0.0002846241805652308,
      "loss": 3.1644,
      "step": 118962
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9365659952163696,
      "learning_rate": 0.00028462009538065256,
      "loss": 3.2062,
      "step": 118963
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.0421853065490723,
      "learning_rate": 0.0002846160101989337,
      "loss": 2.992,
      "step": 118964
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4062366485595703,
      "learning_rate": 0.00028461192502007507,
      "loss": 3.1057,
      "step": 118965
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8854022026062012,
      "learning_rate": 0.00028460783984407746,
      "loss": 3.202,
      "step": 118966
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0440714359283447,
      "learning_rate": 0.00028460375467094143,
      "loss": 3.1781,
      "step": 118967
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.45810866355896,
      "learning_rate": 0.0002845996695006678,
      "loss": 2.8455,
      "step": 118968
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0621824264526367,
      "learning_rate": 0.00028459558433325746,
      "loss": 3.0347,
      "step": 118969
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1974339485168457,
      "learning_rate": 0.0002845914991687111,
      "loss": 2.9927,
      "step": 118970
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4954917430877686,
      "learning_rate": 0.0002845874140070294,
      "loss": 3.1375,
      "step": 118971
    },
    {
      "epoch": 1.55,
      "grad_norm": 4.251537799835205,
      "learning_rate": 0.00028458332884821324,
      "loss": 2.9505,
      "step": 118972
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.331324815750122,
      "learning_rate": 0.00028457924369226344,
      "loss": 2.7357,
      "step": 118973
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9835336208343506,
      "learning_rate": 0.0002845751585391805,
      "loss": 2.8014,
      "step": 118974
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.9635729789733887,
      "learning_rate": 0.0002845710733889654,
      "loss": 3.12,
      "step": 118975
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.997891426086426,
      "learning_rate": 0.00028456698824161873,
      "loss": 2.8105,
      "step": 118976
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.9487931728363037,
      "learning_rate": 0.00028456290309714137,
      "loss": 2.8487,
      "step": 118977
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.1517691612243652,
      "learning_rate": 0.0002845588179555341,
      "loss": 3.2921,
      "step": 118978
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.0204832553863525,
      "learning_rate": 0.00028455473281679776,
      "loss": 3.1932,
      "step": 118979
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4747304916381836,
      "learning_rate": 0.00028455064768093287,
      "loss": 3.1085,
      "step": 118980
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.349730968475342,
      "learning_rate": 0.0002845465625479403,
      "loss": 3.0241,
      "step": 118981
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.2272520065307617,
      "learning_rate": 0.00028454247741782076,
      "loss": 3.024,
      "step": 118982
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9978746175765991,
      "learning_rate": 0.0002845383922905751,
      "loss": 2.951,
      "step": 118983
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.527970314025879,
      "learning_rate": 0.00028453430716620406,
      "loss": 2.9915,
      "step": 118984
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.284062147140503,
      "learning_rate": 0.00028453022204470854,
      "loss": 3.1265,
      "step": 118985
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.193819046020508,
      "learning_rate": 0.00028452613692608896,
      "loss": 3.0719,
      "step": 118986
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.7042760848999023,
      "learning_rate": 0.00028452205181034627,
      "loss": 3.0248,
      "step": 118987
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1065614223480225,
      "learning_rate": 0.00028451796669748126,
      "loss": 2.964,
      "step": 118988
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.201767683029175,
      "learning_rate": 0.0002845138815874946,
      "loss": 2.994,
      "step": 118989
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.071202516555786,
      "learning_rate": 0.00028450979648038713,
      "loss": 2.7664,
      "step": 118990
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.726654052734375,
      "learning_rate": 0.00028450571137615975,
      "loss": 3.0524,
      "step": 118991
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.407545566558838,
      "learning_rate": 0.00028450162627481287,
      "loss": 3.0364,
      "step": 118992
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1326825618743896,
      "learning_rate": 0.0002844975411763474,
      "loss": 2.9822,
      "step": 118993
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.7529804706573486,
      "learning_rate": 0.00028449345608076417,
      "loss": 2.9256,
      "step": 118994
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.300853729248047,
      "learning_rate": 0.0002844893709880639,
      "loss": 3.1279,
      "step": 118995
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.1930949687957764,
      "learning_rate": 0.00028448528589824736,
      "loss": 2.7523,
      "step": 118996
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7973313331604004,
      "learning_rate": 0.0002844812008113154,
      "loss": 3.2495,
      "step": 118997
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.121964454650879,
      "learning_rate": 0.0002844771157272686,
      "loss": 2.9731,
      "step": 118998
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.03546404838562,
      "learning_rate": 0.00028447303064610776,
      "loss": 2.8556,
      "step": 118999
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.6956169605255127,
      "learning_rate": 0.0002844689455678337,
      "loss": 2.9836,
      "step": 119000
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6904996633529663,
      "learning_rate": 0.0002844648604924471,
      "loss": 2.9722,
      "step": 119001
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.5196433067321777,
      "learning_rate": 0.0002844607754199488,
      "loss": 3.1458,
      "step": 119002
    },
    {
      "epoch": 1.55,
      "grad_norm": 4.046535491943359,
      "learning_rate": 0.0002844566903503396,
      "loss": 3.0516,
      "step": 119003
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6766313314437866,
      "learning_rate": 0.00028445260528362013,
      "loss": 3.0857,
      "step": 119004
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.177417755126953,
      "learning_rate": 0.0002844485202197913,
      "loss": 2.9779,
      "step": 119005
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.831428050994873,
      "learning_rate": 0.00028444443515885374,
      "loss": 2.8163,
      "step": 119006
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.256495475769043,
      "learning_rate": 0.0002844403501008082,
      "loss": 2.8256,
      "step": 119007
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7670542001724243,
      "learning_rate": 0.00028443626504565556,
      "loss": 3.1255,
      "step": 119008
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1629090309143066,
      "learning_rate": 0.0002844321799933965,
      "loss": 2.9991,
      "step": 119009
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8812108039855957,
      "learning_rate": 0.0002844280949440318,
      "loss": 3.1459,
      "step": 119010
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.334643840789795,
      "learning_rate": 0.0002844240098975621,
      "loss": 3.0581,
      "step": 119011
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.257878303527832,
      "learning_rate": 0.0002844199248539884,
      "loss": 2.9843,
      "step": 119012
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1203176975250244,
      "learning_rate": 0.00028441583981331133,
      "loss": 2.9403,
      "step": 119013
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.187774658203125,
      "learning_rate": 0.00028441175477553156,
      "loss": 2.8383,
      "step": 119014
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9436901807785034,
      "learning_rate": 0.00028440766974065007,
      "loss": 3.1623,
      "step": 119015
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1460888385772705,
      "learning_rate": 0.00028440358470866736,
      "loss": 3.3788,
      "step": 119016
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9151337146759033,
      "learning_rate": 0.00028439949967958434,
      "loss": 3.0435,
      "step": 119017
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.668095588684082,
      "learning_rate": 0.00028439541465340173,
      "loss": 2.981,
      "step": 119018
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.348473072052002,
      "learning_rate": 0.0002843913296301204,
      "loss": 3.0901,
      "step": 119019
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8293046951293945,
      "learning_rate": 0.000284387244609741,
      "loss": 3.2441,
      "step": 119020
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.064833402633667,
      "learning_rate": 0.0002843831595922643,
      "loss": 2.9142,
      "step": 119021
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4502224922180176,
      "learning_rate": 0.000284379074577691,
      "loss": 2.9407,
      "step": 119022
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.025261163711548,
      "learning_rate": 0.00028437498956602195,
      "loss": 2.9538,
      "step": 119023
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.247359275817871,
      "learning_rate": 0.00028437090455725786,
      "loss": 2.9791,
      "step": 119024
    },
    {
      "epoch": 1.55,
      "grad_norm": 4.333024501800537,
      "learning_rate": 0.00028436681955139954,
      "loss": 2.9102,
      "step": 119025
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.5733280181884766,
      "learning_rate": 0.0002843627345484477,
      "loss": 3.1796,
      "step": 119026
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9515588283538818,
      "learning_rate": 0.0002843586495484033,
      "loss": 2.797,
      "step": 119027
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.139514446258545,
      "learning_rate": 0.00028435456455126675,
      "loss": 2.9804,
      "step": 119028
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8707194328308105,
      "learning_rate": 0.000284350479557039,
      "loss": 3.0507,
      "step": 119029
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6813544034957886,
      "learning_rate": 0.00028434639456572074,
      "loss": 3.2486,
      "step": 119030
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9649187326431274,
      "learning_rate": 0.0002843423095773128,
      "loss": 2.9076,
      "step": 119031
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1528337001800537,
      "learning_rate": 0.000284338224591816,
      "loss": 3.1427,
      "step": 119032
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4452970027923584,
      "learning_rate": 0.0002843341396092311,
      "loss": 2.9264,
      "step": 119033
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.1795952320098877,
      "learning_rate": 0.0002843300546295586,
      "loss": 2.9236,
      "step": 119034
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.137469530105591,
      "learning_rate": 0.0002843259696527995,
      "loss": 2.857,
      "step": 119035
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2255091667175293,
      "learning_rate": 0.0002843218846789545,
      "loss": 2.8718,
      "step": 119036
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.065592050552368,
      "learning_rate": 0.0002843177997080243,
      "loss": 2.9552,
      "step": 119037
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.369086265563965,
      "learning_rate": 0.00028431371474000976,
      "loss": 3.039,
      "step": 119038
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.966411828994751,
      "learning_rate": 0.0002843096297749116,
      "loss": 3.0222,
      "step": 119039
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8671749830245972,
      "learning_rate": 0.0002843055448127307,
      "loss": 3.1988,
      "step": 119040
    },
    {
      "epoch": 1.55,
      "grad_norm": 4.359755516052246,
      "learning_rate": 0.0002843014598534676,
      "loss": 2.9651,
      "step": 119041
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.420598030090332,
      "learning_rate": 0.0002842973748971231,
      "loss": 2.9368,
      "step": 119042
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1811275482177734,
      "learning_rate": 0.000284293289943698,
      "loss": 2.9551,
      "step": 119043
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.8589818477630615,
      "learning_rate": 0.00028428920499319306,
      "loss": 2.7351,
      "step": 119044
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.609356641769409,
      "learning_rate": 0.00028428512004560915,
      "loss": 2.9822,
      "step": 119045
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.680288076400757,
      "learning_rate": 0.000284281035100947,
      "loss": 2.9295,
      "step": 119046
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9781479835510254,
      "learning_rate": 0.00028427695015920715,
      "loss": 2.9946,
      "step": 119047
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.7226595878601074,
      "learning_rate": 0.0002842728652203906,
      "loss": 2.986,
      "step": 119048
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.116426706314087,
      "learning_rate": 0.00028426878028449794,
      "loss": 3.2863,
      "step": 119049
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.670715093612671,
      "learning_rate": 0.00028426469535153005,
      "loss": 2.7489,
      "step": 119050
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1475777626037598,
      "learning_rate": 0.0002842606104214876,
      "loss": 2.8468,
      "step": 119051
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.165121555328369,
      "learning_rate": 0.0002842565254943716,
      "loss": 3.0679,
      "step": 119052
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.387819290161133,
      "learning_rate": 0.0002842524405701824,
      "loss": 3.2244,
      "step": 119053
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.6892812252044678,
      "learning_rate": 0.00028424835564892103,
      "loss": 2.8873,
      "step": 119054
    },
    {
      "epoch": 1.55,
      "grad_norm": 4.109289169311523,
      "learning_rate": 0.0002842442707305882,
      "loss": 3.2637,
      "step": 119055
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.5321145057678223,
      "learning_rate": 0.00028424018581518464,
      "loss": 3.1236,
      "step": 119056
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.096456527709961,
      "learning_rate": 0.0002842361009027111,
      "loss": 3.1604,
      "step": 119057
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.6833560466766357,
      "learning_rate": 0.0002842320159931685,
      "loss": 2.9239,
      "step": 119058
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.8307418823242188,
      "learning_rate": 0.00028422793108655737,
      "loss": 2.8854,
      "step": 119059
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.3926916122436523,
      "learning_rate": 0.00028422384618287855,
      "loss": 3.1387,
      "step": 119060
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.306668281555176,
      "learning_rate": 0.00028421976128213276,
      "loss": 2.8541,
      "step": 119061
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8631142377853394,
      "learning_rate": 0.0002842156763843209,
      "loss": 2.931,
      "step": 119062
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.0701234340667725,
      "learning_rate": 0.00028421159148944356,
      "loss": 2.7685,
      "step": 119063
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.194934368133545,
      "learning_rate": 0.00028420750659750177,
      "loss": 2.9376,
      "step": 119064
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1500794887542725,
      "learning_rate": 0.000284203421708496,
      "loss": 2.971,
      "step": 119065
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7128705978393555,
      "learning_rate": 0.00028419933682242705,
      "loss": 3.0994,
      "step": 119066
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.0977895259857178,
      "learning_rate": 0.00028419525193929576,
      "loss": 2.9665,
      "step": 119067
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.6068127155303955,
      "learning_rate": 0.00028419116705910287,
      "loss": 2.9492,
      "step": 119068
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0974245071411133,
      "learning_rate": 0.00028418708218184914,
      "loss": 2.8754,
      "step": 119069
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8730506896972656,
      "learning_rate": 0.0002841829973075355,
      "loss": 2.8697,
      "step": 119070
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.022130250930786,
      "learning_rate": 0.0002841789124361624,
      "loss": 3.0691,
      "step": 119071
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0473616123199463,
      "learning_rate": 0.0002841748275677307,
      "loss": 3.0959,
      "step": 119072
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8009600639343262,
      "learning_rate": 0.0002841707427022412,
      "loss": 3.0343,
      "step": 119073
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.306394577026367,
      "learning_rate": 0.0002841666578396947,
      "loss": 3.0151,
      "step": 119074
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7988147735595703,
      "learning_rate": 0.00028416257298009183,
      "loss": 3.1438,
      "step": 119075
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2827882766723633,
      "learning_rate": 0.00028415848812343367,
      "loss": 2.8588,
      "step": 119076
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9747499227523804,
      "learning_rate": 0.00028415440326972057,
      "loss": 3.3481,
      "step": 119077
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9046190977096558,
      "learning_rate": 0.00028415031841895347,
      "loss": 3.1607,
      "step": 119078
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9976104497909546,
      "learning_rate": 0.0002841462335711331,
      "loss": 2.8861,
      "step": 119079
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.844972848892212,
      "learning_rate": 0.0002841421487262603,
      "loss": 2.8051,
      "step": 119080
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.4036941528320312,
      "learning_rate": 0.0002841380638843357,
      "loss": 2.827,
      "step": 119081
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0022659301757812,
      "learning_rate": 0.00028413397904536035,
      "loss": 2.9307,
      "step": 119082
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.913034439086914,
      "learning_rate": 0.00028412989420933455,
      "loss": 2.9756,
      "step": 119083
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0100038051605225,
      "learning_rate": 0.0002841258093762594,
      "loss": 3.0316,
      "step": 119084
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.681727170944214,
      "learning_rate": 0.0002841217245461355,
      "loss": 2.846,
      "step": 119085
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.5608487129211426,
      "learning_rate": 0.00028411763971896375,
      "loss": 2.979,
      "step": 119086
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8451426029205322,
      "learning_rate": 0.00028411355489474475,
      "loss": 2.9069,
      "step": 119087
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.918505311012268,
      "learning_rate": 0.00028410947007347944,
      "loss": 2.9109,
      "step": 119088
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9675774574279785,
      "learning_rate": 0.0002841053852551684,
      "loss": 2.9913,
      "step": 119089
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9569445848464966,
      "learning_rate": 0.00028410130043981255,
      "loss": 3.0304,
      "step": 119090
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.776064395904541,
      "learning_rate": 0.0002840972156274125,
      "loss": 3.2398,
      "step": 119091
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.5258240699768066,
      "learning_rate": 0.0002840931308179691,
      "loss": 3.0957,
      "step": 119092
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9833704233169556,
      "learning_rate": 0.00028408904601148305,
      "loss": 2.7711,
      "step": 119093
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.055532932281494,
      "learning_rate": 0.00028408496120795523,
      "loss": 2.9221,
      "step": 119094
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.8134865760803223,
      "learning_rate": 0.00028408087640738626,
      "loss": 2.9932,
      "step": 119095
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.943455696105957,
      "learning_rate": 0.00028407679160977694,
      "loss": 2.7611,
      "step": 119096
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.2478713989257812,
      "learning_rate": 0.0002840727068151281,
      "loss": 2.8259,
      "step": 119097
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.934350848197937,
      "learning_rate": 0.0002840686220234404,
      "loss": 2.8732,
      "step": 119098
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.903221845626831,
      "learning_rate": 0.00028406453723471466,
      "loss": 2.9176,
      "step": 119099
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.806811571121216,
      "learning_rate": 0.0002840604524489516,
      "loss": 2.7982,
      "step": 119100
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8060612678527832,
      "learning_rate": 0.000284056367666152,
      "loss": 3.0178,
      "step": 119101
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9542834758758545,
      "learning_rate": 0.00028405228288631666,
      "loss": 2.8913,
      "step": 119102
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9860866069793701,
      "learning_rate": 0.0002840481981094463,
      "loss": 2.8686,
      "step": 119103
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.5880982875823975,
      "learning_rate": 0.0002840441133355417,
      "loss": 3.1789,
      "step": 119104
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.837350606918335,
      "learning_rate": 0.00028404002856460353,
      "loss": 3.1569,
      "step": 119105
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.59649920463562,
      "learning_rate": 0.0002840359437966326,
      "loss": 3.2149,
      "step": 119106
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7606674432754517,
      "learning_rate": 0.0002840318590316298,
      "loss": 3.0519,
      "step": 119107
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0749733448028564,
      "learning_rate": 0.0002840277742695957,
      "loss": 2.859,
      "step": 119108
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.6821467876434326,
      "learning_rate": 0.00028402368951053114,
      "loss": 3.088,
      "step": 119109
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.5061800479888916,
      "learning_rate": 0.00028401960475443695,
      "loss": 2.9011,
      "step": 119110
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0611720085144043,
      "learning_rate": 0.0002840155200013138,
      "loss": 2.9595,
      "step": 119111
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.363266944885254,
      "learning_rate": 0.0002840114352511624,
      "loss": 3.075,
      "step": 119112
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.258345127105713,
      "learning_rate": 0.00028400735050398366,
      "loss": 3.0606,
      "step": 119113
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0414366722106934,
      "learning_rate": 0.0002840032657597782,
      "loss": 2.6081,
      "step": 119114
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.413987398147583,
      "learning_rate": 0.0002839991810185468,
      "loss": 3.1137,
      "step": 119115
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9090229272842407,
      "learning_rate": 0.00028399509628029027,
      "loss": 2.9815,
      "step": 119116
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4972922801971436,
      "learning_rate": 0.00028399101154500935,
      "loss": 2.9465,
      "step": 119117
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.953949213027954,
      "learning_rate": 0.00028398692681270483,
      "loss": 2.9244,
      "step": 119118
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2724549770355225,
      "learning_rate": 0.00028398284208337753,
      "loss": 2.8481,
      "step": 119119
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.8610780239105225,
      "learning_rate": 0.000283978757357028,
      "loss": 3.0481,
      "step": 119120
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9476569890975952,
      "learning_rate": 0.00028397467263365715,
      "loss": 2.8946,
      "step": 119121
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9525741338729858,
      "learning_rate": 0.00028397058791326574,
      "loss": 3.3161,
      "step": 119122
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.106788396835327,
      "learning_rate": 0.0002839665031958544,
      "loss": 2.9113,
      "step": 119123
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.212878942489624,
      "learning_rate": 0.00028396241848142406,
      "loss": 3.1203,
      "step": 119124
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.905412435531616,
      "learning_rate": 0.00028395833376997553,
      "loss": 2.975,
      "step": 119125
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8022440671920776,
      "learning_rate": 0.00028395424906150937,
      "loss": 2.9822,
      "step": 119126
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.8307788372039795,
      "learning_rate": 0.00028395016435602634,
      "loss": 3.0508,
      "step": 119127
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9351142644882202,
      "learning_rate": 0.0002839460796535273,
      "loss": 2.8847,
      "step": 119128
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0530271530151367,
      "learning_rate": 0.000283941994954013,
      "loss": 2.9336,
      "step": 119129
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9868992567062378,
      "learning_rate": 0.00028393791025748414,
      "loss": 2.8696,
      "step": 119130
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8250136375427246,
      "learning_rate": 0.0002839338255639417,
      "loss": 2.8372,
      "step": 119131
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.321772336959839,
      "learning_rate": 0.00028392974087338617,
      "loss": 2.9795,
      "step": 119132
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7124152183532715,
      "learning_rate": 0.00028392565618581833,
      "loss": 3.1486,
      "step": 119133
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.6870205402374268,
      "learning_rate": 0.00028392157150123904,
      "loss": 2.9457,
      "step": 119134
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.168856382369995,
      "learning_rate": 0.00028391748681964906,
      "loss": 3.1497,
      "step": 119135
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.454127788543701,
      "learning_rate": 0.0002839134021410491,
      "loss": 3.1918,
      "step": 119136
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.33748459815979,
      "learning_rate": 0.00028390931746544005,
      "loss": 2.9192,
      "step": 119137
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8332500457763672,
      "learning_rate": 0.0002839052327928225,
      "loss": 2.8761,
      "step": 119138
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.023545265197754,
      "learning_rate": 0.00028390114812319717,
      "loss": 3.0579,
      "step": 119139
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.795214295387268,
      "learning_rate": 0.000283897063456565,
      "loss": 2.9982,
      "step": 119140
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.515362024307251,
      "learning_rate": 0.00028389297879292664,
      "loss": 2.9812,
      "step": 119141
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2119064331054688,
      "learning_rate": 0.0002838888941322829,
      "loss": 2.7027,
      "step": 119142
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.8212437629699707,
      "learning_rate": 0.00028388480947463466,
      "loss": 2.8538,
      "step": 119143
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.149120330810547,
      "learning_rate": 0.0002838807248199823,
      "loss": 2.973,
      "step": 119144
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0876269340515137,
      "learning_rate": 0.00028387664016832695,
      "loss": 2.942,
      "step": 119145
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4852612018585205,
      "learning_rate": 0.0002838725555196692,
      "loss": 2.9988,
      "step": 119146
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.315826892852783,
      "learning_rate": 0.0002838684708740098,
      "loss": 2.7849,
      "step": 119147
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9353805780410767,
      "learning_rate": 0.0002838643862313496,
      "loss": 2.7597,
      "step": 119148
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.796988844871521,
      "learning_rate": 0.00028386030159168943,
      "loss": 2.9755,
      "step": 119149
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0649144649505615,
      "learning_rate": 0.0002838562169550298,
      "loss": 2.9294,
      "step": 119150
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8674172163009644,
      "learning_rate": 0.00028385213232137165,
      "loss": 2.7688,
      "step": 119151
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4361987113952637,
      "learning_rate": 0.00028384804769071564,
      "loss": 3.0228,
      "step": 119152
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.151038408279419,
      "learning_rate": 0.00028384396306306255,
      "loss": 2.939,
      "step": 119153
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.102400064468384,
      "learning_rate": 0.00028383987843841324,
      "loss": 2.9525,
      "step": 119154
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9464123249053955,
      "learning_rate": 0.0002838357938167685,
      "loss": 3.1364,
      "step": 119155
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8978675603866577,
      "learning_rate": 0.0002838317091981289,
      "loss": 3.0214,
      "step": 119156
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9290111064910889,
      "learning_rate": 0.00028382762458249527,
      "loss": 3.0529,
      "step": 119157
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1093008518218994,
      "learning_rate": 0.0002838235399698684,
      "loss": 3.0235,
      "step": 119158
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0303359031677246,
      "learning_rate": 0.000283819455360249,
      "loss": 2.9941,
      "step": 119159
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9043129682540894,
      "learning_rate": 0.0002838153707536379,
      "loss": 3.1004,
      "step": 119160
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9867606163024902,
      "learning_rate": 0.000283811286150036,
      "loss": 2.6549,
      "step": 119161
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9661803245544434,
      "learning_rate": 0.00028380720154944365,
      "loss": 3.2736,
      "step": 119162
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0998899936676025,
      "learning_rate": 0.0002838031169518619,
      "loss": 2.9694,
      "step": 119163
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8691312074661255,
      "learning_rate": 0.0002837990323572915,
      "loss": 3.1969,
      "step": 119164
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.977250576019287,
      "learning_rate": 0.0002837949477657331,
      "loss": 2.9317,
      "step": 119165
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.040292978286743,
      "learning_rate": 0.00028379086317718757,
      "loss": 2.87,
      "step": 119166
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9711588621139526,
      "learning_rate": 0.00028378677859165573,
      "loss": 2.8945,
      "step": 119167
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.061326265335083,
      "learning_rate": 0.0002837826940091381,
      "loss": 3.1109,
      "step": 119168
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1160778999328613,
      "learning_rate": 0.00028377860942963555,
      "loss": 3.0209,
      "step": 119169
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.049703598022461,
      "learning_rate": 0.00028377452485314894,
      "loss": 3.0205,
      "step": 119170
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9917227029800415,
      "learning_rate": 0.0002837704402796789,
      "loss": 2.9863,
      "step": 119171
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.022385597229004,
      "learning_rate": 0.00028376635570922625,
      "loss": 2.7437,
      "step": 119172
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4846155643463135,
      "learning_rate": 0.00028376227114179176,
      "loss": 2.9282,
      "step": 119173
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.5026366710662842,
      "learning_rate": 0.0002837581865773763,
      "loss": 3.0433,
      "step": 119174
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9222092628479004,
      "learning_rate": 0.0002837541020159803,
      "loss": 3.12,
      "step": 119175
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0738964080810547,
      "learning_rate": 0.00028375001745760474,
      "loss": 2.9771,
      "step": 119176
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4088220596313477,
      "learning_rate": 0.0002837459329022504,
      "loss": 2.9627,
      "step": 119177
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7541459798812866,
      "learning_rate": 0.00028374184834991804,
      "loss": 3.0809,
      "step": 119178
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.116446018218994,
      "learning_rate": 0.00028373776380060826,
      "loss": 2.9993,
      "step": 119179
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9134701490402222,
      "learning_rate": 0.0002837336792543221,
      "loss": 3.0014,
      "step": 119180
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.008281946182251,
      "learning_rate": 0.00028372959471106005,
      "loss": 2.8316,
      "step": 119181
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.855289101600647,
      "learning_rate": 0.00028372551017082304,
      "loss": 3.1063,
      "step": 119182
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1234312057495117,
      "learning_rate": 0.0002837214256336117,
      "loss": 2.9464,
      "step": 119183
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4306163787841797,
      "learning_rate": 0.00028371734109942687,
      "loss": 2.96,
      "step": 119184
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.060685157775879,
      "learning_rate": 0.0002837132565682693,
      "loss": 3.1321,
      "step": 119185
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8295469284057617,
      "learning_rate": 0.0002837091720401398,
      "loss": 3.0071,
      "step": 119186
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1549108028411865,
      "learning_rate": 0.000283705087515039,
      "loss": 2.7375,
      "step": 119187
    },
    {
      "epoch": 1.55,
      "grad_norm": 4.283400535583496,
      "learning_rate": 0.0002837010029929677,
      "loss": 2.8242,
      "step": 119188
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.037245750427246,
      "learning_rate": 0.0002836969184739268,
      "loss": 2.9832,
      "step": 119189
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9133743047714233,
      "learning_rate": 0.0002836928339579169,
      "loss": 2.6921,
      "step": 119190
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.35023832321167,
      "learning_rate": 0.00028368874944493876,
      "loss": 3.1019,
      "step": 119191
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.543262243270874,
      "learning_rate": 0.0002836846649349933,
      "loss": 2.9528,
      "step": 119192
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.1578657627105713,
      "learning_rate": 0.00028368058042808105,
      "loss": 2.7302,
      "step": 119193
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7190830707550049,
      "learning_rate": 0.0002836764959242029,
      "loss": 3.2773,
      "step": 119194
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9189269542694092,
      "learning_rate": 0.00028367241142335963,
      "loss": 2.9922,
      "step": 119195
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1798009872436523,
      "learning_rate": 0.000283668326925552,
      "loss": 2.9612,
      "step": 119196
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8146822452545166,
      "learning_rate": 0.00028366424243078073,
      "loss": 2.6466,
      "step": 119197
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8158650398254395,
      "learning_rate": 0.00028366015793904664,
      "loss": 2.9897,
      "step": 119198
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.772092819213867,
      "learning_rate": 0.0002836560734503503,
      "loss": 2.8916,
      "step": 119199
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.802037000656128,
      "learning_rate": 0.00028365198896469264,
      "loss": 2.952,
      "step": 119200
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7993896007537842,
      "learning_rate": 0.0002836479044820744,
      "loss": 3.0633,
      "step": 119201
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9136544466018677,
      "learning_rate": 0.0002836438200024963,
      "loss": 3.1593,
      "step": 119202
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.785343050956726,
      "learning_rate": 0.00028363973552595923,
      "loss": 3.0697,
      "step": 119203
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0170583724975586,
      "learning_rate": 0.0002836356510524638,
      "loss": 2.9092,
      "step": 119204
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.681751012802124,
      "learning_rate": 0.00028363156658201076,
      "loss": 2.8651,
      "step": 119205
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7878541946411133,
      "learning_rate": 0.0002836274821146009,
      "loss": 2.9055,
      "step": 119206
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.875978708267212,
      "learning_rate": 0.00028362339765023505,
      "loss": 3.056,
      "step": 119207
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8254542350769043,
      "learning_rate": 0.00028361931318891386,
      "loss": 3.0909,
      "step": 119208
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.121825933456421,
      "learning_rate": 0.0002836152287306382,
      "loss": 2.8897,
      "step": 119209
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7675107717514038,
      "learning_rate": 0.00028361114427540886,
      "loss": 3.1085,
      "step": 119210
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.032773494720459,
      "learning_rate": 0.00028360705982322646,
      "loss": 3.0141,
      "step": 119211
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.426210880279541,
      "learning_rate": 0.00028360297537409176,
      "loss": 2.9927,
      "step": 119212
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.821946382522583,
      "learning_rate": 0.00028359889092800556,
      "loss": 2.9385,
      "step": 119213
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8684391975402832,
      "learning_rate": 0.0002835948064849687,
      "loss": 3.0936,
      "step": 119214
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.986570119857788,
      "learning_rate": 0.0002835907220449818,
      "loss": 2.8219,
      "step": 119215
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1571242809295654,
      "learning_rate": 0.0002835866376080459,
      "loss": 3.0772,
      "step": 119216
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8158012628555298,
      "learning_rate": 0.00028358255317416137,
      "loss": 2.9607,
      "step": 119217
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7286947965621948,
      "learning_rate": 0.00028357846874332913,
      "loss": 2.8713,
      "step": 119218
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.088623523712158,
      "learning_rate": 0.00028357438431555,
      "loss": 3.07,
      "step": 119219
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9914305210113525,
      "learning_rate": 0.0002835702998908247,
      "loss": 3.1058,
      "step": 119220
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7741135358810425,
      "learning_rate": 0.00028356621546915404,
      "loss": 3.0966,
      "step": 119221
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0200347900390625,
      "learning_rate": 0.0002835621310505388,
      "loss": 3.0109,
      "step": 119222
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8143037557601929,
      "learning_rate": 0.00028355804663497957,
      "loss": 3.2322,
      "step": 119223
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0154635906219482,
      "learning_rate": 0.0002835539622224772,
      "loss": 3.0967,
      "step": 119224
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.966870665550232,
      "learning_rate": 0.00028354987781303246,
      "loss": 3.0921,
      "step": 119225
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0058345794677734,
      "learning_rate": 0.00028354579340664606,
      "loss": 2.8574,
      "step": 119226
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.6165037155151367,
      "learning_rate": 0.0002835417090033189,
      "loss": 3.084,
      "step": 119227
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7199666500091553,
      "learning_rate": 0.0002835376246030517,
      "loss": 3.1631,
      "step": 119228
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.7445783615112305,
      "learning_rate": 0.00028353354020584505,
      "loss": 3.0086,
      "step": 119229
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.837081789970398,
      "learning_rate": 0.0002835294558116999,
      "loss": 3.1871,
      "step": 119230
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9825985431671143,
      "learning_rate": 0.00028352537142061683,
      "loss": 3.0382,
      "step": 119231
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9028466939926147,
      "learning_rate": 0.00028352128703259676,
      "loss": 3.0101,
      "step": 119232
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1865057945251465,
      "learning_rate": 0.00028351720264764037,
      "loss": 3.0515,
      "step": 119233
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9234459400177002,
      "learning_rate": 0.0002835131182657486,
      "loss": 2.8599,
      "step": 119234
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9975497722625732,
      "learning_rate": 0.0002835090338869219,
      "loss": 2.9279,
      "step": 119235
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7768990993499756,
      "learning_rate": 0.00028350494951116117,
      "loss": 2.9464,
      "step": 119236
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7885050773620605,
      "learning_rate": 0.0002835008651384672,
      "loss": 2.9797,
      "step": 119237
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6093082427978516,
      "learning_rate": 0.00028349678076884075,
      "loss": 2.8483,
      "step": 119238
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.925524115562439,
      "learning_rate": 0.00028349269640228256,
      "loss": 3.2785,
      "step": 119239
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6713312864303589,
      "learning_rate": 0.0002834886120387934,
      "loss": 2.9974,
      "step": 119240
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8796709775924683,
      "learning_rate": 0.0002834845276783741,
      "loss": 2.8948,
      "step": 119241
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.493901252746582,
      "learning_rate": 0.0002834804433210252,
      "loss": 2.9064,
      "step": 119242
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.079129934310913,
      "learning_rate": 0.0002834763589667476,
      "loss": 2.8236,
      "step": 119243
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6277424097061157,
      "learning_rate": 0.0002834722746155421,
      "loss": 3.0293,
      "step": 119244
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7805368900299072,
      "learning_rate": 0.0002834681902674094,
      "loss": 2.974,
      "step": 119245
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4474036693573,
      "learning_rate": 0.0002834641059223503,
      "loss": 3.0686,
      "step": 119246
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9135452508926392,
      "learning_rate": 0.0002834600215803656,
      "loss": 3.2333,
      "step": 119247
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8288829326629639,
      "learning_rate": 0.00028345593724145594,
      "loss": 3.1799,
      "step": 119248
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.9644649028778076,
      "learning_rate": 0.0002834518529056221,
      "loss": 2.7279,
      "step": 119249
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7003240585327148,
      "learning_rate": 0.00028344776857286484,
      "loss": 3.0768,
      "step": 119250
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2611277103424072,
      "learning_rate": 0.000283443684243185,
      "loss": 2.9889,
      "step": 119251
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4422554969787598,
      "learning_rate": 0.0002834395999165833,
      "loss": 3.3329,
      "step": 119252
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9522993564605713,
      "learning_rate": 0.00028343551559306055,
      "loss": 3.071,
      "step": 119253
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.309579610824585,
      "learning_rate": 0.0002834314312726174,
      "loss": 2.9478,
      "step": 119254
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.047353506088257,
      "learning_rate": 0.0002834273469552546,
      "loss": 3.0068,
      "step": 119255
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.9106807708740234,
      "learning_rate": 0.00028342326264097303,
      "loss": 2.8724,
      "step": 119256
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8538539409637451,
      "learning_rate": 0.00028341917832977334,
      "loss": 2.8302,
      "step": 119257
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1145968437194824,
      "learning_rate": 0.00028341509402165637,
      "loss": 3.0448,
      "step": 119258
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9657533168792725,
      "learning_rate": 0.000283411009716623,
      "loss": 3.0914,
      "step": 119259
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2474782466888428,
      "learning_rate": 0.0002834069254146736,
      "loss": 3.0848,
      "step": 119260
    },
    {
      "epoch": 1.55,
      "grad_norm": 4.518688678741455,
      "learning_rate": 0.0002834028411158092,
      "loss": 2.9549,
      "step": 119261
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.183077573776245,
      "learning_rate": 0.00028339875682003055,
      "loss": 2.8989,
      "step": 119262
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.1990253925323486,
      "learning_rate": 0.0002833946725273384,
      "loss": 2.7447,
      "step": 119263
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.757857084274292,
      "learning_rate": 0.0002833905882377335,
      "loss": 3.2464,
      "step": 119264
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.7778358459472656,
      "learning_rate": 0.0002833865039512167,
      "loss": 2.8327,
      "step": 119265
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.6714417934417725,
      "learning_rate": 0.0002833824196677886,
      "loss": 3.1407,
      "step": 119266
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.8618626594543457,
      "learning_rate": 0.00028337833538744995,
      "loss": 2.8827,
      "step": 119267
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.532309055328369,
      "learning_rate": 0.0002833742511102016,
      "loss": 2.9137,
      "step": 119268
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2331671714782715,
      "learning_rate": 0.0002833701668360443,
      "loss": 3.1039,
      "step": 119269
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1865286827087402,
      "learning_rate": 0.0002833660825649788,
      "loss": 2.7416,
      "step": 119270
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.797400951385498,
      "learning_rate": 0.00028336199829700587,
      "loss": 3.0017,
      "step": 119271
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.332409381866455,
      "learning_rate": 0.0002833579140321263,
      "loss": 3.1118,
      "step": 119272
    },
    {
      "epoch": 1.55,
      "grad_norm": 4.823947906494141,
      "learning_rate": 0.0002833538297703408,
      "loss": 2.9042,
      "step": 119273
    },
    {
      "epoch": 1.55,
      "grad_norm": 4.14385461807251,
      "learning_rate": 0.00028334974551165004,
      "loss": 2.9476,
      "step": 119274
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8575578927993774,
      "learning_rate": 0.00028334566125605496,
      "loss": 2.8105,
      "step": 119275
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.615293025970459,
      "learning_rate": 0.0002833415770035562,
      "loss": 2.8553,
      "step": 119276
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.1664981842041016,
      "learning_rate": 0.0002833374927541546,
      "loss": 3.1632,
      "step": 119277
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2139716148376465,
      "learning_rate": 0.0002833334085078508,
      "loss": 3.2229,
      "step": 119278
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8906729221343994,
      "learning_rate": 0.00028332932426464565,
      "loss": 2.9195,
      "step": 119279
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.820350170135498,
      "learning_rate": 0.00028332524002453997,
      "loss": 2.9281,
      "step": 119280
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1366336345672607,
      "learning_rate": 0.0002833211557875344,
      "loss": 2.769,
      "step": 119281
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.305104970932007,
      "learning_rate": 0.00028331707155362977,
      "loss": 2.9695,
      "step": 119282
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7891221046447754,
      "learning_rate": 0.00028331298732282677,
      "loss": 3.1826,
      "step": 119283
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0800957679748535,
      "learning_rate": 0.0002833089030951262,
      "loss": 2.6714,
      "step": 119284
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2486672401428223,
      "learning_rate": 0.00028330481887052885,
      "loss": 3.0162,
      "step": 119285
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8046733140945435,
      "learning_rate": 0.0002833007346490354,
      "loss": 3.3246,
      "step": 119286
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1676485538482666,
      "learning_rate": 0.00028329665043064667,
      "loss": 3.1253,
      "step": 119287
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.179246425628662,
      "learning_rate": 0.0002832925662153635,
      "loss": 3.1895,
      "step": 119288
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4105398654937744,
      "learning_rate": 0.0002832884820031865,
      "loss": 3.1475,
      "step": 119289
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0036821365356445,
      "learning_rate": 0.0002832843977941164,
      "loss": 2.9967,
      "step": 119290
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6165748834609985,
      "learning_rate": 0.00028328031358815415,
      "loss": 3.0976,
      "step": 119291
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7886767387390137,
      "learning_rate": 0.00028327622938530036,
      "loss": 3.158,
      "step": 119292
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.942712664604187,
      "learning_rate": 0.0002832721451855558,
      "loss": 2.9455,
      "step": 119293
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1171669960021973,
      "learning_rate": 0.0002832680609889213,
      "loss": 3.1794,
      "step": 119294
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.348734140396118,
      "learning_rate": 0.00028326397679539773,
      "loss": 2.9979,
      "step": 119295
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8187679052352905,
      "learning_rate": 0.0002832598926049855,
      "loss": 3.0423,
      "step": 119296
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9449101686477661,
      "learning_rate": 0.00028325580841768566,
      "loss": 3.1777,
      "step": 119297
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8888479471206665,
      "learning_rate": 0.0002832517242334988,
      "loss": 3.0473,
      "step": 119298
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.672576904296875,
      "learning_rate": 0.0002832476400524258,
      "loss": 3.0921,
      "step": 119299
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.138303518295288,
      "learning_rate": 0.0002832435558744674,
      "loss": 2.9293,
      "step": 119300
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8712437152862549,
      "learning_rate": 0.0002832394716996244,
      "loss": 2.9781,
      "step": 119301
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.072317123413086,
      "learning_rate": 0.0002832353875278975,
      "loss": 2.9908,
      "step": 119302
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.5841548442840576,
      "learning_rate": 0.00028323130335928734,
      "loss": 3.0362,
      "step": 119303
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1127922534942627,
      "learning_rate": 0.00028322721919379483,
      "loss": 3.0019,
      "step": 119304
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7501200437545776,
      "learning_rate": 0.0002832231350314207,
      "loss": 3.1359,
      "step": 119305
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2931392192840576,
      "learning_rate": 0.0002832190508721657,
      "loss": 2.8703,
      "step": 119306
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0451200008392334,
      "learning_rate": 0.00028321496671603063,
      "loss": 2.9315,
      "step": 119307
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.113947629928589,
      "learning_rate": 0.0002832108825630163,
      "loss": 3.044,
      "step": 119308
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.058938980102539,
      "learning_rate": 0.0002832067984131233,
      "loss": 2.9975,
      "step": 119309
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8737367391586304,
      "learning_rate": 0.0002832027142663524,
      "loss": 3.1239,
      "step": 119310
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.1595723628997803,
      "learning_rate": 0.00028319863012270447,
      "loss": 3.1372,
      "step": 119311
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9609718322753906,
      "learning_rate": 0.00028319454598218027,
      "loss": 3.0003,
      "step": 119312
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8114104270935059,
      "learning_rate": 0.00028319046184478047,
      "loss": 2.8328,
      "step": 119313
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.3164989948272705,
      "learning_rate": 0.00028318637771050604,
      "loss": 2.9016,
      "step": 119314
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2001383304595947,
      "learning_rate": 0.0002831822935793574,
      "loss": 3.0457,
      "step": 119315
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.915388822555542,
      "learning_rate": 0.00028317820945133554,
      "loss": 2.8787,
      "step": 119316
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.518819808959961,
      "learning_rate": 0.0002831741253264412,
      "loss": 2.8833,
      "step": 119317
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.3000166416168213,
      "learning_rate": 0.00028317004120467505,
      "loss": 2.8313,
      "step": 119318
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0646607875823975,
      "learning_rate": 0.0002831659570860379,
      "loss": 3.069,
      "step": 119319
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9600136280059814,
      "learning_rate": 0.0002831618729705307,
      "loss": 2.909,
      "step": 119320
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0256478786468506,
      "learning_rate": 0.00028315778885815385,
      "loss": 3.1301,
      "step": 119321
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.058614730834961,
      "learning_rate": 0.0002831537047489083,
      "loss": 3.03,
      "step": 119322
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.491143226623535,
      "learning_rate": 0.00028314962064279476,
      "loss": 3.001,
      "step": 119323
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7383350133895874,
      "learning_rate": 0.0002831455365398141,
      "loss": 2.8819,
      "step": 119324
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.255823850631714,
      "learning_rate": 0.00028314145243996694,
      "loss": 3.0049,
      "step": 119325
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.127903699874878,
      "learning_rate": 0.00028313736834325426,
      "loss": 2.9172,
      "step": 119326
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2347395420074463,
      "learning_rate": 0.0002831332842496765,
      "loss": 2.9715,
      "step": 119327
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1012814044952393,
      "learning_rate": 0.00028312920015923464,
      "loss": 2.8902,
      "step": 119328
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1577837467193604,
      "learning_rate": 0.0002831251160719293,
      "loss": 2.8621,
      "step": 119329
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1136696338653564,
      "learning_rate": 0.0002831210319877614,
      "loss": 3.0346,
      "step": 119330
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.832401156425476,
      "learning_rate": 0.0002831169479067316,
      "loss": 2.7611,
      "step": 119331
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.835553765296936,
      "learning_rate": 0.0002831128638288408,
      "loss": 2.9361,
      "step": 119332
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4960663318634033,
      "learning_rate": 0.00028310877975408944,
      "loss": 3.0969,
      "step": 119333
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.7092394828796387,
      "learning_rate": 0.00028310469568247854,
      "loss": 3.054,
      "step": 119334
    },
    {
      "epoch": 1.55,
      "grad_norm": 4.142872333526611,
      "learning_rate": 0.0002831006116140088,
      "loss": 3.0845,
      "step": 119335
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.4205870628356934,
      "learning_rate": 0.00028309652754868094,
      "loss": 2.7901,
      "step": 119336
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.8502371311187744,
      "learning_rate": 0.0002830924434864958,
      "loss": 3.1886,
      "step": 119337
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8585383892059326,
      "learning_rate": 0.00028308835942745424,
      "loss": 2.8961,
      "step": 119338
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.842790365219116,
      "learning_rate": 0.0002830842753715567,
      "loss": 2.963,
      "step": 119339
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.379786968231201,
      "learning_rate": 0.0002830801913188041,
      "loss": 3.0201,
      "step": 119340
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.7882771492004395,
      "learning_rate": 0.0002830761072691972,
      "loss": 2.9286,
      "step": 119341
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2433676719665527,
      "learning_rate": 0.0002830720232227368,
      "loss": 2.7572,
      "step": 119342
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.433760404586792,
      "learning_rate": 0.00028306793917942365,
      "loss": 2.7498,
      "step": 119343
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.8764407634735107,
      "learning_rate": 0.0002830638551392586,
      "loss": 2.9676,
      "step": 119344
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.3898093700408936,
      "learning_rate": 0.0002830597711022422,
      "loss": 3.0253,
      "step": 119345
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.056305408477783,
      "learning_rate": 0.0002830556870683753,
      "loss": 3.0519,
      "step": 119346
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9752620458602905,
      "learning_rate": 0.00028305160303765864,
      "loss": 2.919,
      "step": 119347
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.3279786109924316,
      "learning_rate": 0.000283047519010093,
      "loss": 3.2585,
      "step": 119348
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.8771486282348633,
      "learning_rate": 0.0002830434349856791,
      "loss": 3.1665,
      "step": 119349
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9363129138946533,
      "learning_rate": 0.0002830393509644179,
      "loss": 3.0406,
      "step": 119350
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.53140926361084,
      "learning_rate": 0.00028303526694631,
      "loss": 2.8032,
      "step": 119351
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.420393228530884,
      "learning_rate": 0.0002830311829313561,
      "loss": 2.9197,
      "step": 119352
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.025845527648926,
      "learning_rate": 0.000283027098919557,
      "loss": 2.7434,
      "step": 119353
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8182176351547241,
      "learning_rate": 0.00028302301491091346,
      "loss": 2.9734,
      "step": 119354
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.954367756843567,
      "learning_rate": 0.00028301893090542626,
      "loss": 3.089,
      "step": 119355
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.78834867477417,
      "learning_rate": 0.00028301484690309626,
      "loss": 3.0888,
      "step": 119356
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9888479709625244,
      "learning_rate": 0.00028301076290392405,
      "loss": 2.9231,
      "step": 119357
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8602286577224731,
      "learning_rate": 0.0002830066789079105,
      "loss": 2.7149,
      "step": 119358
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.5868444442749023,
      "learning_rate": 0.0002830025949150563,
      "loss": 2.9767,
      "step": 119359
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.4770150184631348,
      "learning_rate": 0.0002829985109253622,
      "loss": 2.9433,
      "step": 119360
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8991707563400269,
      "learning_rate": 0.000282994426938829,
      "loss": 2.9453,
      "step": 119361
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.433558940887451,
      "learning_rate": 0.0002829903429554576,
      "loss": 2.9163,
      "step": 119362
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.6063737869262695,
      "learning_rate": 0.00028298625897524846,
      "loss": 3.0387,
      "step": 119363
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0045018196105957,
      "learning_rate": 0.00028298217499820254,
      "loss": 2.9345,
      "step": 119364
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7610702514648438,
      "learning_rate": 0.0002829780910243206,
      "loss": 2.892,
      "step": 119365
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9194658994674683,
      "learning_rate": 0.0002829740070536033,
      "loss": 3.0263,
      "step": 119366
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.614685297012329,
      "learning_rate": 0.0002829699230860515,
      "loss": 3.059,
      "step": 119367
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.698615550994873,
      "learning_rate": 0.0002829658391216659,
      "loss": 2.9242,
      "step": 119368
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.940325140953064,
      "learning_rate": 0.0002829617551604472,
      "loss": 3.0585,
      "step": 119369
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9038007259368896,
      "learning_rate": 0.0002829576712023963,
      "loss": 3.0516,
      "step": 119370
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2797300815582275,
      "learning_rate": 0.00028295358724751387,
      "loss": 3.1457,
      "step": 119371
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1304147243499756,
      "learning_rate": 0.0002829495032958007,
      "loss": 2.9375,
      "step": 119372
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.3806240558624268,
      "learning_rate": 0.0002829454193472576,
      "loss": 2.9587,
      "step": 119373
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.238455295562744,
      "learning_rate": 0.0002829413354018852,
      "loss": 2.6815,
      "step": 119374
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.301372528076172,
      "learning_rate": 0.0002829372514596843,
      "loss": 3.0943,
      "step": 119375
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.3091390132904053,
      "learning_rate": 0.00028293316752065574,
      "loss": 2.9875,
      "step": 119376
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0996851921081543,
      "learning_rate": 0.0002829290835848002,
      "loss": 3.0365,
      "step": 119377
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.436832904815674,
      "learning_rate": 0.00028292499965211845,
      "loss": 2.9848,
      "step": 119378
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.3648600578308105,
      "learning_rate": 0.0002829209157226112,
      "loss": 3.0625,
      "step": 119379
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.545417308807373,
      "learning_rate": 0.00028291683179627945,
      "loss": 2.7351,
      "step": 119380
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1110081672668457,
      "learning_rate": 0.00028291274787312375,
      "loss": 2.8767,
      "step": 119381
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.6852402687072754,
      "learning_rate": 0.0002829086639531448,
      "loss": 2.8579,
      "step": 119382
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8460997343063354,
      "learning_rate": 0.0002829045800363435,
      "loss": 3.0962,
      "step": 119383
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.5627801418304443,
      "learning_rate": 0.0002829004961227205,
      "loss": 3.0966,
      "step": 119384
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.017778158187866,
      "learning_rate": 0.00028289641221227667,
      "loss": 3.1175,
      "step": 119385
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.7156364917755127,
      "learning_rate": 0.0002828923283050127,
      "loss": 2.9749,
      "step": 119386
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1571829319000244,
      "learning_rate": 0.0002828882444009295,
      "loss": 2.7524,
      "step": 119387
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.507960081100464,
      "learning_rate": 0.00028288416050002756,
      "loss": 2.91,
      "step": 119388
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6435304880142212,
      "learning_rate": 0.0002828800766023078,
      "loss": 2.9485,
      "step": 119389
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9106401205062866,
      "learning_rate": 0.00028287599270777094,
      "loss": 2.8308,
      "step": 119390
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.810234546661377,
      "learning_rate": 0.00028287190881641776,
      "loss": 3.149,
      "step": 119391
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.954676866531372,
      "learning_rate": 0.00028286782492824904,
      "loss": 2.7935,
      "step": 119392
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.67024827003479,
      "learning_rate": 0.0002828637410432656,
      "loss": 3.0995,
      "step": 119393
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.5614655017852783,
      "learning_rate": 0.000282859657161468,
      "loss": 3.1685,
      "step": 119394
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6864469051361084,
      "learning_rate": 0.0002828555732828571,
      "loss": 2.8908,
      "step": 119395
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.029170036315918,
      "learning_rate": 0.0002828514894074337,
      "loss": 2.7615,
      "step": 119396
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9088026285171509,
      "learning_rate": 0.00028284740553519855,
      "loss": 2.933,
      "step": 119397
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.6700360774993896,
      "learning_rate": 0.00028284332166615233,
      "loss": 2.8238,
      "step": 119398
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.847367286682129,
      "learning_rate": 0.00028283923780029606,
      "loss": 3.0706,
      "step": 119399
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1515896320343018,
      "learning_rate": 0.0002828351539376301,
      "loss": 2.9106,
      "step": 119400
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1026296615600586,
      "learning_rate": 0.00028283107007815545,
      "loss": 3.1471,
      "step": 119401
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.3482472896575928,
      "learning_rate": 0.0002828269862218728,
      "loss": 3.005,
      "step": 119402
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.164080858230591,
      "learning_rate": 0.00028282290236878295,
      "loss": 2.9191,
      "step": 119403
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9332337379455566,
      "learning_rate": 0.0002828188185188867,
      "loss": 3.2088,
      "step": 119404
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.0149145126342773,
      "learning_rate": 0.00028281473467218484,
      "loss": 2.8738,
      "step": 119405
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.047363042831421,
      "learning_rate": 0.0002828106508286779,
      "loss": 3.0282,
      "step": 119406
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2573084831237793,
      "learning_rate": 0.0002828065669883668,
      "loss": 3.1153,
      "step": 119407
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.6435080766677856,
      "learning_rate": 0.0002828024831512523,
      "loss": 3.0189,
      "step": 119408
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.7365082502365112,
      "learning_rate": 0.0002827983993173351,
      "loss": 3.095,
      "step": 119409
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.9729434251785278,
      "learning_rate": 0.00028279431548661606,
      "loss": 3.4496,
      "step": 119410
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2830629348754883,
      "learning_rate": 0.0002827902316590959,
      "loss": 2.9561,
      "step": 119411
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.381033420562744,
      "learning_rate": 0.0002827861478347754,
      "loss": 2.8324,
      "step": 119412
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.749149203300476,
      "learning_rate": 0.00028278206401365514,
      "loss": 3.1033,
      "step": 119413
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.643370270729065,
      "learning_rate": 0.0002827779801957361,
      "loss": 2.8583,
      "step": 119414
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8585370779037476,
      "learning_rate": 0.0002827738963810189,
      "loss": 2.9597,
      "step": 119415
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.6985530853271484,
      "learning_rate": 0.0002827698125695044,
      "loss": 2.9918,
      "step": 119416
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.6877551078796387,
      "learning_rate": 0.0002827657287611934,
      "loss": 2.9235,
      "step": 119417
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.2661545276641846,
      "learning_rate": 0.0002827616449560865,
      "loss": 2.8783,
      "step": 119418
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.9075798988342285,
      "learning_rate": 0.00028275756115418455,
      "loss": 3.0744,
      "step": 119419
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.1528093814849854,
      "learning_rate": 0.0002827534773554882,
      "loss": 2.9181,
      "step": 119420
    },
    {
      "epoch": 1.55,
      "grad_norm": 1.8232635259628296,
      "learning_rate": 0.0002827493935599984,
      "loss": 2.8091,
      "step": 119421
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.176630735397339,
      "learning_rate": 0.0002827453097677158,
      "loss": 3.044,
      "step": 119422
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.1622495651245117,
      "learning_rate": 0.00028274122597864127,
      "loss": 3.1119,
      "step": 119423
    },
    {
      "epoch": 1.55,
      "grad_norm": 3.3159542083740234,
      "learning_rate": 0.0002827371421927753,
      "loss": 2.9444,
      "step": 119424
    },
    {
      "epoch": 1.55,
      "grad_norm": 2.134084939956665,
      "learning_rate": 0.00028273305841011894,
      "loss": 2.9801,
      "step": 119425
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7900190353393555,
      "learning_rate": 0.00028272897463067277,
      "loss": 2.7697,
      "step": 119426
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7371091842651367,
      "learning_rate": 0.00028272489085443757,
      "loss": 2.9197,
      "step": 119427
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.6931591033935547,
      "learning_rate": 0.00028272080708141416,
      "loss": 3.0535,
      "step": 119428
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9540573358535767,
      "learning_rate": 0.00028271672331160345,
      "loss": 2.9943,
      "step": 119429
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.940369963645935,
      "learning_rate": 0.0002827126395450059,
      "loss": 2.8987,
      "step": 119430
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.090973377227783,
      "learning_rate": 0.00028270855578162233,
      "loss": 3.0005,
      "step": 119431
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.578220844268799,
      "learning_rate": 0.0002827044720214536,
      "loss": 2.8822,
      "step": 119432
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.555510997772217,
      "learning_rate": 0.00028270038826450045,
      "loss": 2.892,
      "step": 119433
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.008838653564453,
      "learning_rate": 0.0002826963045107636,
      "loss": 3.0952,
      "step": 119434
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.6250953674316406,
      "learning_rate": 0.00028269222076024393,
      "loss": 3.109,
      "step": 119435
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.383878469467163,
      "learning_rate": 0.0002826881370129421,
      "loss": 3.0332,
      "step": 119436
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.294658660888672,
      "learning_rate": 0.0002826840532688588,
      "loss": 2.8532,
      "step": 119437
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8091328144073486,
      "learning_rate": 0.0002826799695279948,
      "loss": 2.98,
      "step": 119438
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7812703847885132,
      "learning_rate": 0.000282675885790351,
      "loss": 3.3158,
      "step": 119439
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9833496809005737,
      "learning_rate": 0.00028267180205592804,
      "loss": 3.0718,
      "step": 119440
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.5393096208572388,
      "learning_rate": 0.0002826677183247267,
      "loss": 2.8661,
      "step": 119441
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.800504446029663,
      "learning_rate": 0.00028266363459674787,
      "loss": 2.9615,
      "step": 119442
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7410610914230347,
      "learning_rate": 0.00028265955087199217,
      "loss": 2.8639,
      "step": 119443
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8923377990722656,
      "learning_rate": 0.00028265546715046033,
      "loss": 3.1495,
      "step": 119444
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9290764331817627,
      "learning_rate": 0.0002826513834321532,
      "loss": 3.0248,
      "step": 119445
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7562843561172485,
      "learning_rate": 0.00028264729971707144,
      "loss": 3.0014,
      "step": 119446
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2031164169311523,
      "learning_rate": 0.00028264321600521595,
      "loss": 3.0123,
      "step": 119447
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2701730728149414,
      "learning_rate": 0.0002826391322965874,
      "loss": 3.0967,
      "step": 119448
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.1201536655426025,
      "learning_rate": 0.0002826350485911865,
      "loss": 3.0525,
      "step": 119449
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.6672616004943848,
      "learning_rate": 0.0002826309648890142,
      "loss": 2.9807,
      "step": 119450
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.124814510345459,
      "learning_rate": 0.00028262688119007107,
      "loss": 2.7628,
      "step": 119451
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0453622341156006,
      "learning_rate": 0.0002826227974943579,
      "loss": 3.0689,
      "step": 119452
    },
    {
      "epoch": 1.56,
      "grad_norm": 4.128012180328369,
      "learning_rate": 0.00028261871380187546,
      "loss": 2.7982,
      "step": 119453
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.170978307723999,
      "learning_rate": 0.0002826146301126246,
      "loss": 2.8149,
      "step": 119454
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8269593715667725,
      "learning_rate": 0.000282610546426606,
      "loss": 2.8256,
      "step": 119455
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0219807624816895,
      "learning_rate": 0.00028260646274382034,
      "loss": 2.9992,
      "step": 119456
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.346513032913208,
      "learning_rate": 0.0002826023790642686,
      "loss": 3.1322,
      "step": 119457
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2351911067962646,
      "learning_rate": 0.0002825982953879513,
      "loss": 3.0154,
      "step": 119458
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2900876998901367,
      "learning_rate": 0.0002825942117148694,
      "loss": 2.9762,
      "step": 119459
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7396894693374634,
      "learning_rate": 0.0002825901280450235,
      "loss": 2.9229,
      "step": 119460
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.877275824546814,
      "learning_rate": 0.0002825860443784144,
      "loss": 2.6642,
      "step": 119461
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.270395040512085,
      "learning_rate": 0.0002825819607150429,
      "loss": 2.8316,
      "step": 119462
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6884320974349976,
      "learning_rate": 0.00028257787705490977,
      "loss": 2.7234,
      "step": 119463
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.0399205684661865,
      "learning_rate": 0.0002825737933980157,
      "loss": 2.9363,
      "step": 119464
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2952444553375244,
      "learning_rate": 0.0002825697097443615,
      "loss": 3.1218,
      "step": 119465
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.7344446182250977,
      "learning_rate": 0.00028256562609394804,
      "loss": 3.3136,
      "step": 119466
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9086720943450928,
      "learning_rate": 0.00028256154244677583,
      "loss": 2.9831,
      "step": 119467
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9022630453109741,
      "learning_rate": 0.00028255745880284577,
      "loss": 3.0954,
      "step": 119468
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.019197463989258,
      "learning_rate": 0.0002825533751621586,
      "loss": 3.02,
      "step": 119469
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.5345683097839355,
      "learning_rate": 0.0002825492915247151,
      "loss": 2.9241,
      "step": 119470
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0628440380096436,
      "learning_rate": 0.000282545207890516,
      "loss": 2.8786,
      "step": 119471
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.442927360534668,
      "learning_rate": 0.0002825411242595622,
      "loss": 3.1076,
      "step": 119472
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7931381464004517,
      "learning_rate": 0.00028253704063185424,
      "loss": 2.9763,
      "step": 119473
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9567019939422607,
      "learning_rate": 0.0002825329570073929,
      "loss": 3.0186,
      "step": 119474
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1662802696228027,
      "learning_rate": 0.0002825288733861791,
      "loss": 2.998,
      "step": 119475
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.884909749031067,
      "learning_rate": 0.0002825247897682135,
      "loss": 3.1559,
      "step": 119476
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2534584999084473,
      "learning_rate": 0.00028252070615349687,
      "loss": 3.339,
      "step": 119477
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.034446954727173,
      "learning_rate": 0.00028251662254203004,
      "loss": 3.1257,
      "step": 119478
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7610770463943481,
      "learning_rate": 0.0002825125389338136,
      "loss": 2.8806,
      "step": 119479
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0123984813690186,
      "learning_rate": 0.0002825084553288484,
      "loss": 3.0903,
      "step": 119480
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.896794080734253,
      "learning_rate": 0.0002825043717271353,
      "loss": 2.8095,
      "step": 119481
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3198277950286865,
      "learning_rate": 0.00028250028812867487,
      "loss": 3.0762,
      "step": 119482
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9974534511566162,
      "learning_rate": 0.000282496204533468,
      "loss": 3.0608,
      "step": 119483
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.133090019226074,
      "learning_rate": 0.0002824921209415155,
      "loss": 2.9387,
      "step": 119484
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9043126106262207,
      "learning_rate": 0.00028248803735281795,
      "loss": 3.0331,
      "step": 119485
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.051159620285034,
      "learning_rate": 0.0002824839537673762,
      "loss": 3.2512,
      "step": 119486
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.95829701423645,
      "learning_rate": 0.000282479870185191,
      "loss": 2.8816,
      "step": 119487
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0217490196228027,
      "learning_rate": 0.00028247578660626317,
      "loss": 3.0725,
      "step": 119488
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.418583393096924,
      "learning_rate": 0.0002824717030305934,
      "loss": 2.8737,
      "step": 119489
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2169086933135986,
      "learning_rate": 0.0002824676194581826,
      "loss": 2.9823,
      "step": 119490
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.331580877304077,
      "learning_rate": 0.0002824635358890313,
      "loss": 3.0387,
      "step": 119491
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7728880643844604,
      "learning_rate": 0.0002824594523231403,
      "loss": 3.0197,
      "step": 119492
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.1825037002563477,
      "learning_rate": 0.00028245536876051044,
      "loss": 2.9441,
      "step": 119493
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7937031984329224,
      "learning_rate": 0.00028245128520114246,
      "loss": 2.8769,
      "step": 119494
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6891701221466064,
      "learning_rate": 0.0002824472016450371,
      "loss": 3.0704,
      "step": 119495
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.5447611808776855,
      "learning_rate": 0.0002824431180921953,
      "loss": 3.0731,
      "step": 119496
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.139800786972046,
      "learning_rate": 0.0002824390345426175,
      "loss": 2.9099,
      "step": 119497
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2154130935668945,
      "learning_rate": 0.0002824349509963046,
      "loss": 3.1281,
      "step": 119498
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9915316104888916,
      "learning_rate": 0.00028243086745325746,
      "loss": 3.1424,
      "step": 119499
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0770249366760254,
      "learning_rate": 0.0002824267839134767,
      "loss": 2.872,
      "step": 119500
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2157552242279053,
      "learning_rate": 0.00028242270037696313,
      "loss": 2.8881,
      "step": 119501
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0474607944488525,
      "learning_rate": 0.00028241861684371764,
      "loss": 3.2081,
      "step": 119502
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.714706301689148,
      "learning_rate": 0.0002824145333137407,
      "loss": 3.1011,
      "step": 119503
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.83998703956604,
      "learning_rate": 0.0002824104497870333,
      "loss": 3.1596,
      "step": 119504
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.7501068115234375,
      "learning_rate": 0.0002824063662635961,
      "loss": 2.9939,
      "step": 119505
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.261892318725586,
      "learning_rate": 0.0002824022827434298,
      "loss": 3.2206,
      "step": 119506
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4486279487609863,
      "learning_rate": 0.0002823981992265354,
      "loss": 2.9765,
      "step": 119507
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6911675930023193,
      "learning_rate": 0.0002823941157129134,
      "loss": 2.8994,
      "step": 119508
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8761229515075684,
      "learning_rate": 0.0002823900322025648,
      "loss": 2.8621,
      "step": 119509
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.928046703338623,
      "learning_rate": 0.00028238594869549013,
      "loss": 2.8923,
      "step": 119510
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.413954496383667,
      "learning_rate": 0.00028238186519169027,
      "loss": 3.0179,
      "step": 119511
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7353252172470093,
      "learning_rate": 0.00028237778169116595,
      "loss": 2.8934,
      "step": 119512
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9021788835525513,
      "learning_rate": 0.00028237369819391787,
      "loss": 2.9581,
      "step": 119513
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8447998762130737,
      "learning_rate": 0.0002823696146999469,
      "loss": 2.93,
      "step": 119514
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2292137145996094,
      "learning_rate": 0.00028236553120925386,
      "loss": 2.7877,
      "step": 119515
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4010322093963623,
      "learning_rate": 0.0002823614477218393,
      "loss": 2.9465,
      "step": 119516
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.264589786529541,
      "learning_rate": 0.0002823573642377041,
      "loss": 2.8784,
      "step": 119517
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8564443588256836,
      "learning_rate": 0.0002823532807568489,
      "loss": 3.033,
      "step": 119518
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1412198543548584,
      "learning_rate": 0.0002823491972792746,
      "loss": 3.1519,
      "step": 119519
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8015791177749634,
      "learning_rate": 0.00028234511380498197,
      "loss": 2.7908,
      "step": 119520
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1504404544830322,
      "learning_rate": 0.00028234103033397185,
      "loss": 2.8091,
      "step": 119521
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.961166501045227,
      "learning_rate": 0.00028233694686624465,
      "loss": 3.0724,
      "step": 119522
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8671679496765137,
      "learning_rate": 0.0002823328634018014,
      "loss": 3.0767,
      "step": 119523
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.083981513977051,
      "learning_rate": 0.0002823287799406428,
      "loss": 3.0674,
      "step": 119524
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.454530715942383,
      "learning_rate": 0.0002823246964827696,
      "loss": 2.9996,
      "step": 119525
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8743352890014648,
      "learning_rate": 0.0002823206130281826,
      "loss": 2.8588,
      "step": 119526
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.210324287414551,
      "learning_rate": 0.00028231652957688263,
      "loss": 2.862,
      "step": 119527
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7380425930023193,
      "learning_rate": 0.00028231244612887023,
      "loss": 3.2187,
      "step": 119528
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6772321462631226,
      "learning_rate": 0.0002823083626841463,
      "loss": 3.1517,
      "step": 119529
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8479218482971191,
      "learning_rate": 0.00028230427924271157,
      "loss": 3.2374,
      "step": 119530
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.089695692062378,
      "learning_rate": 0.0002823001958045668,
      "loss": 3.0225,
      "step": 119531
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.166011333465576,
      "learning_rate": 0.0002822961123697128,
      "loss": 2.8743,
      "step": 119532
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7609004974365234,
      "learning_rate": 0.0002822920289381503,
      "loss": 3.0445,
      "step": 119533
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.5606322288513184,
      "learning_rate": 0.00028228794550987996,
      "loss": 3.0262,
      "step": 119534
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.4745981693267822,
      "learning_rate": 0.00028228386208490274,
      "loss": 2.9642,
      "step": 119535
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1009459495544434,
      "learning_rate": 0.0002822797786632192,
      "loss": 3.1369,
      "step": 119536
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0201478004455566,
      "learning_rate": 0.0002822756952448302,
      "loss": 2.9852,
      "step": 119537
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8207224607467651,
      "learning_rate": 0.00028227161182973646,
      "loss": 3.0159,
      "step": 119538
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6970480680465698,
      "learning_rate": 0.00028226752841793885,
      "loss": 2.889,
      "step": 119539
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.118525743484497,
      "learning_rate": 0.00028226344500943793,
      "loss": 2.9396,
      "step": 119540
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.796165108680725,
      "learning_rate": 0.0002822593616042346,
      "loss": 3.0468,
      "step": 119541
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6011691093444824,
      "learning_rate": 0.0002822552782023297,
      "loss": 2.9306,
      "step": 119542
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.6113834381103516,
      "learning_rate": 0.00028225119480372374,
      "loss": 2.898,
      "step": 119543
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8261979818344116,
      "learning_rate": 0.00028224711140841766,
      "loss": 2.9175,
      "step": 119544
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.6628077030181885,
      "learning_rate": 0.0002822430280164122,
      "loss": 3.119,
      "step": 119545
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9490751028060913,
      "learning_rate": 0.0002822389446277081,
      "loss": 2.9171,
      "step": 119546
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.038073778152466,
      "learning_rate": 0.0002822348612423061,
      "loss": 2.9405,
      "step": 119547
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8414617776870728,
      "learning_rate": 0.0002822307778602069,
      "loss": 3.0002,
      "step": 119548
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.899771809577942,
      "learning_rate": 0.0002822266944814115,
      "loss": 3.0237,
      "step": 119549
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.349853992462158,
      "learning_rate": 0.00028222261110592037,
      "loss": 2.8069,
      "step": 119550
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9669265747070312,
      "learning_rate": 0.00028221852773373447,
      "loss": 3.0004,
      "step": 119551
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.188314437866211,
      "learning_rate": 0.0002822144443648544,
      "loss": 2.9459,
      "step": 119552
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7913151979446411,
      "learning_rate": 0.00028221036099928106,
      "loss": 3.0642,
      "step": 119553
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.7564449310302734,
      "learning_rate": 0.00028220627763701506,
      "loss": 2.9009,
      "step": 119554
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3056957721710205,
      "learning_rate": 0.00028220219427805735,
      "loss": 3.2116,
      "step": 119555
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.736041784286499,
      "learning_rate": 0.00028219811092240857,
      "loss": 3.0525,
      "step": 119556
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0573508739471436,
      "learning_rate": 0.00028219402757006953,
      "loss": 3.1705,
      "step": 119557
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.049729108810425,
      "learning_rate": 0.0002821899442210409,
      "loss": 2.8125,
      "step": 119558
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8566640615463257,
      "learning_rate": 0.00028218586087532346,
      "loss": 2.9765,
      "step": 119559
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1490938663482666,
      "learning_rate": 0.00028218177753291804,
      "loss": 2.9674,
      "step": 119560
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.5952980518341064,
      "learning_rate": 0.0002821776941938253,
      "loss": 3.0432,
      "step": 119561
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9115732908248901,
      "learning_rate": 0.00028217361085804615,
      "loss": 3.0479,
      "step": 119562
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0956671237945557,
      "learning_rate": 0.00028216952752558135,
      "loss": 3.0783,
      "step": 119563
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9159468412399292,
      "learning_rate": 0.00028216544419643145,
      "loss": 3.1369,
      "step": 119564
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8664544820785522,
      "learning_rate": 0.0002821613608705973,
      "loss": 2.9839,
      "step": 119565
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1193583011627197,
      "learning_rate": 0.00028215727754807974,
      "loss": 3.1475,
      "step": 119566
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.241682767868042,
      "learning_rate": 0.00028215319422887945,
      "loss": 3.1695,
      "step": 119567
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.759518027305603,
      "learning_rate": 0.0002821491109129972,
      "loss": 2.9255,
      "step": 119568
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1225874423980713,
      "learning_rate": 0.00028214502760043396,
      "loss": 2.9633,
      "step": 119569
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7405478954315186,
      "learning_rate": 0.0002821409442911901,
      "loss": 2.882,
      "step": 119570
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.984374761581421,
      "learning_rate": 0.0002821368609852666,
      "loss": 3.0477,
      "step": 119571
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3646934032440186,
      "learning_rate": 0.00028213277768266424,
      "loss": 2.9097,
      "step": 119572
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.1153299808502197,
      "learning_rate": 0.00028212869438338367,
      "loss": 3.0177,
      "step": 119573
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9849399328231812,
      "learning_rate": 0.00028212461108742575,
      "loss": 3.0212,
      "step": 119574
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.6025078296661377,
      "learning_rate": 0.0002821205277947912,
      "loss": 3.0001,
      "step": 119575
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.359076499938965,
      "learning_rate": 0.0002821164445054809,
      "loss": 2.8005,
      "step": 119576
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7187343835830688,
      "learning_rate": 0.0002821123612194953,
      "loss": 2.8777,
      "step": 119577
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1611568927764893,
      "learning_rate": 0.00028210827793683543,
      "loss": 2.9838,
      "step": 119578
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.439739465713501,
      "learning_rate": 0.00028210419465750196,
      "loss": 2.929,
      "step": 119579
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0102665424346924,
      "learning_rate": 0.00028210011138149563,
      "loss": 2.716,
      "step": 119580
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.988398790359497,
      "learning_rate": 0.0002820960281088172,
      "loss": 3.2518,
      "step": 119581
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8583097457885742,
      "learning_rate": 0.00028209194483946767,
      "loss": 2.9569,
      "step": 119582
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3321127891540527,
      "learning_rate": 0.0002820878615734474,
      "loss": 2.8091,
      "step": 119583
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7519159317016602,
      "learning_rate": 0.00028208377831075734,
      "loss": 2.9608,
      "step": 119584
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.141638994216919,
      "learning_rate": 0.0002820796950513983,
      "loss": 2.7426,
      "step": 119585
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7360117435455322,
      "learning_rate": 0.00028207561179537094,
      "loss": 3.247,
      "step": 119586
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8857059478759766,
      "learning_rate": 0.000282071528542676,
      "loss": 2.9185,
      "step": 119587
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8931056261062622,
      "learning_rate": 0.00028206744529331453,
      "loss": 3.1118,
      "step": 119588
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0166056156158447,
      "learning_rate": 0.0002820633620472869,
      "loss": 2.9368,
      "step": 119589
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.925619125366211,
      "learning_rate": 0.00028205927880459397,
      "loss": 2.8896,
      "step": 119590
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8071686029434204,
      "learning_rate": 0.0002820551955652366,
      "loss": 3.0051,
      "step": 119591
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.31801176071167,
      "learning_rate": 0.00028205111232921555,
      "loss": 3.1541,
      "step": 119592
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.6754236221313477,
      "learning_rate": 0.0002820470290965315,
      "loss": 2.9827,
      "step": 119593
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2171852588653564,
      "learning_rate": 0.0002820429458671854,
      "loss": 2.7122,
      "step": 119594
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8714098930358887,
      "learning_rate": 0.0002820388626411777,
      "loss": 3.0516,
      "step": 119595
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.6006977558135986,
      "learning_rate": 0.0002820347794185093,
      "loss": 2.9805,
      "step": 119596
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0951597690582275,
      "learning_rate": 0.000282030696199181,
      "loss": 3.08,
      "step": 119597
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.217506170272827,
      "learning_rate": 0.00028202661298319356,
      "loss": 2.9844,
      "step": 119598
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4535107612609863,
      "learning_rate": 0.0002820225297705476,
      "loss": 3.005,
      "step": 119599
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0522046089172363,
      "learning_rate": 0.00028201844656124424,
      "loss": 3.0834,
      "step": 119600
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8486653566360474,
      "learning_rate": 0.0002820143633552838,
      "loss": 2.7805,
      "step": 119601
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.1687774658203125,
      "learning_rate": 0.00028201028015266725,
      "loss": 2.8376,
      "step": 119602
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3300716876983643,
      "learning_rate": 0.00028200619695339533,
      "loss": 2.8481,
      "step": 119603
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8593090772628784,
      "learning_rate": 0.00028200211375746883,
      "loss": 3.0574,
      "step": 119604
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.548867702484131,
      "learning_rate": 0.0002819980305648884,
      "loss": 3.1056,
      "step": 119605
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6948047876358032,
      "learning_rate": 0.00028199394737565505,
      "loss": 2.8902,
      "step": 119606
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6211371421813965,
      "learning_rate": 0.00028198986418976924,
      "loss": 3.2002,
      "step": 119607
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1091647148132324,
      "learning_rate": 0.0002819857810072319,
      "loss": 2.964,
      "step": 119608
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.196157932281494,
      "learning_rate": 0.00028198169782804366,
      "loss": 2.9433,
      "step": 119609
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.51393985748291,
      "learning_rate": 0.00028197761465220537,
      "loss": 3.1746,
      "step": 119610
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.303351402282715,
      "learning_rate": 0.0002819735314797178,
      "loss": 3.0223,
      "step": 119611
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2746036052703857,
      "learning_rate": 0.0002819694483105818,
      "loss": 2.7305,
      "step": 119612
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.107707977294922,
      "learning_rate": 0.00028196536514479793,
      "loss": 2.9205,
      "step": 119613
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.5748562812805176,
      "learning_rate": 0.00028196128198236704,
      "loss": 2.8129,
      "step": 119614
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.9696431159973145,
      "learning_rate": 0.00028195719882328986,
      "loss": 2.9592,
      "step": 119615
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.268111228942871,
      "learning_rate": 0.0002819531156675672,
      "loss": 2.9195,
      "step": 119616
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.816106915473938,
      "learning_rate": 0.00028194903251519977,
      "loss": 3.0031,
      "step": 119617
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.7556464672088623,
      "learning_rate": 0.00028194494936618843,
      "loss": 3.0875,
      "step": 119618
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1285061836242676,
      "learning_rate": 0.0002819408662205339,
      "loss": 2.9043,
      "step": 119619
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.030367136001587,
      "learning_rate": 0.0002819367830782368,
      "loss": 2.8036,
      "step": 119620
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.7439050674438477,
      "learning_rate": 0.000281932699939298,
      "loss": 2.8291,
      "step": 119621
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4603753089904785,
      "learning_rate": 0.00028192861680371826,
      "loss": 2.8947,
      "step": 119622
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0344338417053223,
      "learning_rate": 0.00028192453367149834,
      "loss": 2.9744,
      "step": 119623
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.494007110595703,
      "learning_rate": 0.000281920450542639,
      "loss": 3.1148,
      "step": 119624
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2654061317443848,
      "learning_rate": 0.000281916367417141,
      "loss": 2.8743,
      "step": 119625
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.188577175140381,
      "learning_rate": 0.000281912284295005,
      "loss": 2.7158,
      "step": 119626
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.109959125518799,
      "learning_rate": 0.000281908201176232,
      "loss": 2.964,
      "step": 119627
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8791462182998657,
      "learning_rate": 0.0002819041180608225,
      "loss": 3.1342,
      "step": 119628
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.94861900806427,
      "learning_rate": 0.0002819000349487774,
      "loss": 3.0817,
      "step": 119629
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.705204963684082,
      "learning_rate": 0.00028189595184009746,
      "loss": 2.8769,
      "step": 119630
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.691826105117798,
      "learning_rate": 0.0002818918687347833,
      "loss": 2.9635,
      "step": 119631
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.021512985229492,
      "learning_rate": 0.0002818877856328359,
      "loss": 2.943,
      "step": 119632
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.826749563217163,
      "learning_rate": 0.0002818837025342558,
      "loss": 2.8852,
      "step": 119633
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.0742132663726807,
      "learning_rate": 0.00028187961943904397,
      "loss": 3.016,
      "step": 119634
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6040209531784058,
      "learning_rate": 0.000281875536347201,
      "loss": 3.1356,
      "step": 119635
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.6188418865203857,
      "learning_rate": 0.00028187145325872775,
      "loss": 3.2758,
      "step": 119636
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0949289798736572,
      "learning_rate": 0.0002818673701736248,
      "loss": 3.08,
      "step": 119637
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0832443237304688,
      "learning_rate": 0.0002818632870918932,
      "loss": 3.048,
      "step": 119638
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1579740047454834,
      "learning_rate": 0.00028185920401353347,
      "loss": 2.9731,
      "step": 119639
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7449864149093628,
      "learning_rate": 0.00028185512093854647,
      "loss": 3.0859,
      "step": 119640
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8742049932479858,
      "learning_rate": 0.000281851037866933,
      "loss": 2.9916,
      "step": 119641
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.474076747894287,
      "learning_rate": 0.0002818469547986937,
      "loss": 3.016,
      "step": 119642
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9453632831573486,
      "learning_rate": 0.0002818428717338295,
      "loss": 2.9805,
      "step": 119643
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4065804481506348,
      "learning_rate": 0.00028183878867234094,
      "loss": 2.8119,
      "step": 119644
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.066392660140991,
      "learning_rate": 0.0002818347056142289,
      "loss": 2.9412,
      "step": 119645
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6277917623519897,
      "learning_rate": 0.0002818306225594942,
      "loss": 3.1142,
      "step": 119646
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9732987880706787,
      "learning_rate": 0.00028182653950813743,
      "loss": 3.1293,
      "step": 119647
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.812217116355896,
      "learning_rate": 0.0002818224564601596,
      "loss": 3.1627,
      "step": 119648
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.744814395904541,
      "learning_rate": 0.00028181837341556125,
      "loss": 3.2656,
      "step": 119649
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8073056936264038,
      "learning_rate": 0.0002818142903743432,
      "loss": 3.2799,
      "step": 119650
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9200018644332886,
      "learning_rate": 0.00028181020733650616,
      "loss": 2.8483,
      "step": 119651
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.6553211212158203,
      "learning_rate": 0.00028180612430205097,
      "loss": 2.723,
      "step": 119652
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9800149202346802,
      "learning_rate": 0.0002818020412709784,
      "loss": 3.0133,
      "step": 119653
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9024951457977295,
      "learning_rate": 0.0002817979582432891,
      "loss": 2.9071,
      "step": 119654
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.4776508808135986,
      "learning_rate": 0.0002817938752189841,
      "loss": 3.0997,
      "step": 119655
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7926992177963257,
      "learning_rate": 0.0002817897921980638,
      "loss": 3.1865,
      "step": 119656
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9960154294967651,
      "learning_rate": 0.00028178570918052913,
      "loss": 3.2496,
      "step": 119657
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.00258731842041,
      "learning_rate": 0.00028178162616638084,
      "loss": 3.0708,
      "step": 119658
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.558428168296814,
      "learning_rate": 0.0002817775431556197,
      "loss": 3.0431,
      "step": 119659
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.279813289642334,
      "learning_rate": 0.0002817734601482464,
      "loss": 2.8143,
      "step": 119660
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2359533309936523,
      "learning_rate": 0.00028176937714426197,
      "loss": 2.7794,
      "step": 119661
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1449553966522217,
      "learning_rate": 0.0002817652941436668,
      "loss": 3.1004,
      "step": 119662
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.098057270050049,
      "learning_rate": 0.00028176121114646175,
      "loss": 3.1536,
      "step": 119663
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.1992111206054688,
      "learning_rate": 0.00028175712815264767,
      "loss": 3.3391,
      "step": 119664
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.835620403289795,
      "learning_rate": 0.00028175304516222534,
      "loss": 2.6904,
      "step": 119665
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8236225843429565,
      "learning_rate": 0.00028174896217519543,
      "loss": 2.9489,
      "step": 119666
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.7273919582366943,
      "learning_rate": 0.00028174487919155886,
      "loss": 2.9221,
      "step": 119667
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9508525133132935,
      "learning_rate": 0.0002817407962113161,
      "loss": 2.8626,
      "step": 119668
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.8515870571136475,
      "learning_rate": 0.00028173671323446806,
      "loss": 2.9986,
      "step": 119669
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.854590892791748,
      "learning_rate": 0.00028173263026101556,
      "loss": 2.9104,
      "step": 119670
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.176302433013916,
      "learning_rate": 0.0002817285472909593,
      "loss": 2.9273,
      "step": 119671
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.407655954360962,
      "learning_rate": 0.00028172446432430005,
      "loss": 2.7913,
      "step": 119672
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.209031105041504,
      "learning_rate": 0.0002817203813610387,
      "loss": 2.7594,
      "step": 119673
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7077175378799438,
      "learning_rate": 0.00028171629840117573,
      "loss": 2.9388,
      "step": 119674
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.847231864929199,
      "learning_rate": 0.000281712215444712,
      "loss": 3.0815,
      "step": 119675
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.837007999420166,
      "learning_rate": 0.0002817081324916484,
      "loss": 3.0972,
      "step": 119676
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.2578036785125732,
      "learning_rate": 0.00028170404954198556,
      "loss": 2.9609,
      "step": 119677
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2783148288726807,
      "learning_rate": 0.0002816999665957243,
      "loss": 2.8795,
      "step": 119678
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0187580585479736,
      "learning_rate": 0.0002816958836528655,
      "loss": 2.9723,
      "step": 119679
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8537571430206299,
      "learning_rate": 0.0002816918007134096,
      "loss": 3.1346,
      "step": 119680
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.601186513900757,
      "learning_rate": 0.0002816877177773576,
      "loss": 2.6757,
      "step": 119681
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2103586196899414,
      "learning_rate": 0.00028168363484471013,
      "loss": 3.2029,
      "step": 119682
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9076119661331177,
      "learning_rate": 0.0002816795519154681,
      "loss": 2.9981,
      "step": 119683
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2905466556549072,
      "learning_rate": 0.0002816754689896321,
      "loss": 2.7722,
      "step": 119684
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0344455242156982,
      "learning_rate": 0.00028167138606720316,
      "loss": 2.9425,
      "step": 119685
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8112610578536987,
      "learning_rate": 0.0002816673031481817,
      "loss": 2.7736,
      "step": 119686
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0058906078338623,
      "learning_rate": 0.0002816632202325687,
      "loss": 3.0835,
      "step": 119687
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.270251512527466,
      "learning_rate": 0.0002816591373203648,
      "loss": 2.871,
      "step": 119688
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.766435980796814,
      "learning_rate": 0.0002816550544115708,
      "loss": 2.6259,
      "step": 119689
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4306106567382812,
      "learning_rate": 0.0002816509715061875,
      "loss": 2.7554,
      "step": 119690
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0486912727355957,
      "learning_rate": 0.00028164688860421574,
      "loss": 3.0526,
      "step": 119691
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1483123302459717,
      "learning_rate": 0.0002816428057056561,
      "loss": 3.1896,
      "step": 119692
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.5355045795440674,
      "learning_rate": 0.0002816387228105093,
      "loss": 3.1398,
      "step": 119693
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.561805486679077,
      "learning_rate": 0.0002816346399187763,
      "loss": 3.146,
      "step": 119694
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.169241428375244,
      "learning_rate": 0.00028163055703045773,
      "loss": 3.0742,
      "step": 119695
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.025552988052368,
      "learning_rate": 0.0002816264741455544,
      "loss": 3.0442,
      "step": 119696
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.972743034362793,
      "learning_rate": 0.00028162239126406713,
      "loss": 3.2127,
      "step": 119697
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.809565544128418,
      "learning_rate": 0.00028161830838599656,
      "loss": 3.091,
      "step": 119698
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0998849868774414,
      "learning_rate": 0.00028161422551134345,
      "loss": 3.0361,
      "step": 119699
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.973426103591919,
      "learning_rate": 0.0002816101426401086,
      "loss": 3.153,
      "step": 119700
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0856075286865234,
      "learning_rate": 0.0002816060597722928,
      "loss": 3.2199,
      "step": 119701
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.795192837715149,
      "learning_rate": 0.00028160197690789673,
      "loss": 2.8874,
      "step": 119702
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2532105445861816,
      "learning_rate": 0.0002815978940469213,
      "loss": 3.1581,
      "step": 119703
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1033010482788086,
      "learning_rate": 0.00028159381118936716,
      "loss": 3.1644,
      "step": 119704
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.021121025085449,
      "learning_rate": 0.000281589728335235,
      "loss": 2.8425,
      "step": 119705
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.250680685043335,
      "learning_rate": 0.0002815856454845257,
      "loss": 2.928,
      "step": 119706
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7857260704040527,
      "learning_rate": 0.00028158156263723995,
      "loss": 3.2027,
      "step": 119707
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0130369663238525,
      "learning_rate": 0.0002815774797933785,
      "loss": 2.9265,
      "step": 119708
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9906381368637085,
      "learning_rate": 0.0002815733969529422,
      "loss": 2.889,
      "step": 119709
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.5905513763427734,
      "learning_rate": 0.0002815693141159318,
      "loss": 3.1352,
      "step": 119710
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4721388816833496,
      "learning_rate": 0.000281565231282348,
      "loss": 2.9325,
      "step": 119711
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2913148403167725,
      "learning_rate": 0.0002815611484521915,
      "loss": 3.0369,
      "step": 119712
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.823460340499878,
      "learning_rate": 0.00028155706562546317,
      "loss": 2.7885,
      "step": 119713
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7009706497192383,
      "learning_rate": 0.0002815529828021637,
      "loss": 2.9432,
      "step": 119714
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3571255207061768,
      "learning_rate": 0.0002815488999822939,
      "loss": 2.7733,
      "step": 119715
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0694985389709473,
      "learning_rate": 0.00028154481716585457,
      "loss": 2.9509,
      "step": 119716
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9979017972946167,
      "learning_rate": 0.0002815407343528463,
      "loss": 2.9464,
      "step": 119717
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9869309663772583,
      "learning_rate": 0.00028153665154327006,
      "loss": 3.0607,
      "step": 119718
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.42160964012146,
      "learning_rate": 0.00028153256873712643,
      "loss": 2.9725,
      "step": 119719
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7666339874267578,
      "learning_rate": 0.00028152848593441626,
      "loss": 2.7162,
      "step": 119720
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8712360858917236,
      "learning_rate": 0.00028152440313514024,
      "loss": 3.0632,
      "step": 119721
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.802087426185608,
      "learning_rate": 0.0002815203203392993,
      "loss": 2.8578,
      "step": 119722
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9471142292022705,
      "learning_rate": 0.000281516237546894,
      "loss": 2.9551,
      "step": 119723
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.497837543487549,
      "learning_rate": 0.00028151215475792515,
      "loss": 2.8414,
      "step": 119724
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0541927814483643,
      "learning_rate": 0.00028150807197239357,
      "loss": 2.8852,
      "step": 119725
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8315372467041016,
      "learning_rate": 0.0002815039891903001,
      "loss": 2.7433,
      "step": 119726
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.5140700340270996,
      "learning_rate": 0.0002814999064116452,
      "loss": 2.8354,
      "step": 119727
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3442435264587402,
      "learning_rate": 0.00028149582363643,
      "loss": 3.0455,
      "step": 119728
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1095893383026123,
      "learning_rate": 0.000281491740864655,
      "loss": 2.9024,
      "step": 119729
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.321894645690918,
      "learning_rate": 0.00028148765809632095,
      "loss": 3.1788,
      "step": 119730
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9242619276046753,
      "learning_rate": 0.0002814835753314288,
      "loss": 3.1686,
      "step": 119731
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.916183352470398,
      "learning_rate": 0.0002814794925699791,
      "loss": 3.0941,
      "step": 119732
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7876732349395752,
      "learning_rate": 0.0002814754098119729,
      "loss": 3.044,
      "step": 119733
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9980648756027222,
      "learning_rate": 0.00028147132705741066,
      "loss": 2.9734,
      "step": 119734
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.095825672149658,
      "learning_rate": 0.0002814672443062932,
      "loss": 3.0181,
      "step": 119735
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8102242946624756,
      "learning_rate": 0.0002814631615586214,
      "loss": 3.1945,
      "step": 119736
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2354485988616943,
      "learning_rate": 0.00028145907881439584,
      "loss": 3.0862,
      "step": 119737
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.989550232887268,
      "learning_rate": 0.00028145499607361746,
      "loss": 2.9994,
      "step": 119738
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.586919903755188,
      "learning_rate": 0.00028145091333628696,
      "loss": 2.9849,
      "step": 119739
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.761565923690796,
      "learning_rate": 0.0002814468306024052,
      "loss": 2.963,
      "step": 119740
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8233693838119507,
      "learning_rate": 0.00028144274787197265,
      "loss": 3.0135,
      "step": 119741
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0610296726226807,
      "learning_rate": 0.0002814386651449903,
      "loss": 2.9516,
      "step": 119742
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.285365343093872,
      "learning_rate": 0.00028143458242145883,
      "loss": 2.8944,
      "step": 119743
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2844080924987793,
      "learning_rate": 0.000281430499701379,
      "loss": 3.0646,
      "step": 119744
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.757737398147583,
      "learning_rate": 0.0002814264169847516,
      "loss": 2.8325,
      "step": 119745
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.268880844116211,
      "learning_rate": 0.00028142233427157757,
      "loss": 3.2169,
      "step": 119746
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.8408114910125732,
      "learning_rate": 0.0002814182515618573,
      "loss": 3.1446,
      "step": 119747
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.5862810611724854,
      "learning_rate": 0.0002814141688555917,
      "loss": 3.0503,
      "step": 119748
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0339574813842773,
      "learning_rate": 0.0002814100861527816,
      "loss": 3.1386,
      "step": 119749
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.602492332458496,
      "learning_rate": 0.0002814060034534277,
      "loss": 2.8817,
      "step": 119750
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.6122875213623047,
      "learning_rate": 0.00028140192075753074,
      "loss": 2.9449,
      "step": 119751
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8924366235733032,
      "learning_rate": 0.0002813978380650917,
      "loss": 3.0689,
      "step": 119752
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.580595016479492,
      "learning_rate": 0.000281393755376111,
      "loss": 2.8981,
      "step": 119753
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.7252261638641357,
      "learning_rate": 0.00028138967269058954,
      "loss": 3.2295,
      "step": 119754
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.500232219696045,
      "learning_rate": 0.00028138559000852814,
      "loss": 3.0059,
      "step": 119755
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.9412426948547363,
      "learning_rate": 0.00028138150732992745,
      "loss": 3.0387,
      "step": 119756
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9611022472381592,
      "learning_rate": 0.0002813774246547883,
      "loss": 3.1079,
      "step": 119757
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0842807292938232,
      "learning_rate": 0.0002813733419831116,
      "loss": 2.9602,
      "step": 119758
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.1077346801757812,
      "learning_rate": 0.0002813692593148978,
      "loss": 3.2092,
      "step": 119759
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.6747710704803467,
      "learning_rate": 0.0002813651766501478,
      "loss": 3.011,
      "step": 119760
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0021169185638428,
      "learning_rate": 0.0002813610939888624,
      "loss": 2.9462,
      "step": 119761
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3503754138946533,
      "learning_rate": 0.00028135701133104226,
      "loss": 3.1929,
      "step": 119762
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7843679189682007,
      "learning_rate": 0.00028135292867668824,
      "loss": 3.1334,
      "step": 119763
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1274266242980957,
      "learning_rate": 0.00028134884602580123,
      "loss": 3.0794,
      "step": 119764
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.765573263168335,
      "learning_rate": 0.0002813447633783816,
      "loss": 3.2094,
      "step": 119765
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.7405643463134766,
      "learning_rate": 0.0002813406807344304,
      "loss": 2.7909,
      "step": 119766
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.948391079902649,
      "learning_rate": 0.0002813365980939483,
      "loss": 3.2261,
      "step": 119767
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.085470676422119,
      "learning_rate": 0.0002813325154569361,
      "loss": 3.1127,
      "step": 119768
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7870993614196777,
      "learning_rate": 0.00028132843282339454,
      "loss": 2.8155,
      "step": 119769
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.5939338207244873,
      "learning_rate": 0.0002813243501933245,
      "loss": 3.0129,
      "step": 119770
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.157409429550171,
      "learning_rate": 0.0002813202675667265,
      "loss": 2.6695,
      "step": 119771
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.147465944290161,
      "learning_rate": 0.00028131618494360133,
      "loss": 3.053,
      "step": 119772
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.117748498916626,
      "learning_rate": 0.0002813121023239499,
      "loss": 3.0155,
      "step": 119773
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.435129165649414,
      "learning_rate": 0.00028130801970777293,
      "loss": 2.9172,
      "step": 119774
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7060216665267944,
      "learning_rate": 0.0002813039370950711,
      "loss": 3.0631,
      "step": 119775
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1054763793945312,
      "learning_rate": 0.00028129985448584524,
      "loss": 3.1547,
      "step": 119776
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0328993797302246,
      "learning_rate": 0.0002812957718800962,
      "loss": 2.9317,
      "step": 119777
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2333528995513916,
      "learning_rate": 0.0002812916892778245,
      "loss": 2.826,
      "step": 119778
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.821981906890869,
      "learning_rate": 0.0002812876066790311,
      "loss": 3.1247,
      "step": 119779
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7568477392196655,
      "learning_rate": 0.0002812835240837166,
      "loss": 2.9101,
      "step": 119780
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2797722816467285,
      "learning_rate": 0.0002812794414918819,
      "loss": 3.0391,
      "step": 119781
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.9638051986694336,
      "learning_rate": 0.00028127535890352766,
      "loss": 3.1029,
      "step": 119782
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.865309953689575,
      "learning_rate": 0.00028127127631865484,
      "loss": 3.0006,
      "step": 119783
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6575886011123657,
      "learning_rate": 0.0002812671937372639,
      "loss": 2.8998,
      "step": 119784
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4931774139404297,
      "learning_rate": 0.0002812631111593558,
      "loss": 3.1155,
      "step": 119785
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6967968940734863,
      "learning_rate": 0.00028125902858493115,
      "loss": 3.2646,
      "step": 119786
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7913603782653809,
      "learning_rate": 0.00028125494601399087,
      "loss": 3.0328,
      "step": 119787
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8438162803649902,
      "learning_rate": 0.0002812508634465356,
      "loss": 3.0953,
      "step": 119788
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.316699266433716,
      "learning_rate": 0.0002812467808825663,
      "loss": 2.9688,
      "step": 119789
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8552762269973755,
      "learning_rate": 0.0002812426983220835,
      "loss": 2.6852,
      "step": 119790
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8530982732772827,
      "learning_rate": 0.00028123861576508796,
      "loss": 2.9417,
      "step": 119791
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8505899906158447,
      "learning_rate": 0.00028123453321158057,
      "loss": 3.1567,
      "step": 119792
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8823339939117432,
      "learning_rate": 0.000281230450661562,
      "loss": 2.96,
      "step": 119793
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.7416093349456787,
      "learning_rate": 0.00028122636811503307,
      "loss": 2.8278,
      "step": 119794
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.5658974647521973,
      "learning_rate": 0.0002812222855719945,
      "loss": 2.8497,
      "step": 119795
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.79147207736969,
      "learning_rate": 0.00028121820303244715,
      "loss": 2.8593,
      "step": 119796
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.866588830947876,
      "learning_rate": 0.00028121412049639163,
      "loss": 3.0328,
      "step": 119797
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.947201132774353,
      "learning_rate": 0.0002812100379638287,
      "loss": 2.8751,
      "step": 119798
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.998880386352539,
      "learning_rate": 0.00028120595543475917,
      "loss": 3.0131,
      "step": 119799
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.136913537979126,
      "learning_rate": 0.00028120187290918387,
      "loss": 3.078,
      "step": 119800
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7253034114837646,
      "learning_rate": 0.00028119779038710354,
      "loss": 2.8926,
      "step": 119801
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6575168371200562,
      "learning_rate": 0.0002811937078685188,
      "loss": 2.994,
      "step": 119802
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0285370349884033,
      "learning_rate": 0.0002811896253534306,
      "loss": 3.1115,
      "step": 119803
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7718853950500488,
      "learning_rate": 0.00028118554284183955,
      "loss": 3.041,
      "step": 119804
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8683781623840332,
      "learning_rate": 0.00028118146033374647,
      "loss": 2.9353,
      "step": 119805
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.7081472873687744,
      "learning_rate": 0.00028117737782915205,
      "loss": 3.0824,
      "step": 119806
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.24544095993042,
      "learning_rate": 0.0002811732953280572,
      "loss": 3.0551,
      "step": 119807
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1511616706848145,
      "learning_rate": 0.00028116921283046257,
      "loss": 2.9454,
      "step": 119808
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.838629961013794,
      "learning_rate": 0.00028116513033636886,
      "loss": 2.7537,
      "step": 119809
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7634679079055786,
      "learning_rate": 0.000281161047845777,
      "loss": 3.0653,
      "step": 119810
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9201123714447021,
      "learning_rate": 0.00028115696535868764,
      "loss": 2.9717,
      "step": 119811
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.6489505767822266,
      "learning_rate": 0.00028115288287510154,
      "loss": 2.9076,
      "step": 119812
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2222673892974854,
      "learning_rate": 0.0002811488003950195,
      "loss": 3.0634,
      "step": 119813
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8723875284194946,
      "learning_rate": 0.00028114471791844224,
      "loss": 3.0545,
      "step": 119814
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0487520694732666,
      "learning_rate": 0.0002811406354453705,
      "loss": 2.7056,
      "step": 119815
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3256113529205322,
      "learning_rate": 0.0002811365529758051,
      "loss": 3.0292,
      "step": 119816
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.753542900085449,
      "learning_rate": 0.00028113247050974674,
      "loss": 3.0624,
      "step": 119817
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3185205459594727,
      "learning_rate": 0.0002811283880471963,
      "loss": 2.7705,
      "step": 119818
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7354228496551514,
      "learning_rate": 0.00028112430558815445,
      "loss": 3.0412,
      "step": 119819
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9333481788635254,
      "learning_rate": 0.00028112022313262185,
      "loss": 3.0125,
      "step": 119820
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.831503987312317,
      "learning_rate": 0.00028111614068059937,
      "loss": 2.8972,
      "step": 119821
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.303995370864868,
      "learning_rate": 0.00028111205823208775,
      "loss": 3.0507,
      "step": 119822
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3072588443756104,
      "learning_rate": 0.00028110797578708775,
      "loss": 3.122,
      "step": 119823
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0546624660491943,
      "learning_rate": 0.0002811038933456002,
      "loss": 3.0485,
      "step": 119824
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3320484161376953,
      "learning_rate": 0.0002810998109076258,
      "loss": 3.0201,
      "step": 119825
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4068217277526855,
      "learning_rate": 0.0002810957284731653,
      "loss": 3.0701,
      "step": 119826
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.14492130279541,
      "learning_rate": 0.00028109164604221936,
      "loss": 2.955,
      "step": 119827
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9532769918441772,
      "learning_rate": 0.0002810875636147889,
      "loss": 2.9595,
      "step": 119828
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9818034172058105,
      "learning_rate": 0.00028108348119087464,
      "loss": 3.2355,
      "step": 119829
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1150095462799072,
      "learning_rate": 0.00028107939877047725,
      "loss": 2.9971,
      "step": 119830
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.374673843383789,
      "learning_rate": 0.00028107531635359777,
      "loss": 2.8739,
      "step": 119831
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9623385667800903,
      "learning_rate": 0.0002810712339402365,
      "loss": 2.9524,
      "step": 119832
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.048020839691162,
      "learning_rate": 0.00028106715153039454,
      "loss": 3.0945,
      "step": 119833
    },
    {
      "epoch": 1.56,
      "grad_norm": 4.393496513366699,
      "learning_rate": 0.00028106306912407253,
      "loss": 2.9786,
      "step": 119834
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.989880084991455,
      "learning_rate": 0.00028105898672127125,
      "loss": 3.0825,
      "step": 119835
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.349428653717041,
      "learning_rate": 0.00028105490432199146,
      "loss": 3.1397,
      "step": 119836
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9718379974365234,
      "learning_rate": 0.000281050821926234,
      "loss": 3.1416,
      "step": 119837
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.970585584640503,
      "learning_rate": 0.0002810467395339995,
      "loss": 2.8485,
      "step": 119838
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9744755029678345,
      "learning_rate": 0.0002810426571452887,
      "loss": 3.0041,
      "step": 119839
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.925416111946106,
      "learning_rate": 0.0002810385747601025,
      "loss": 2.9089,
      "step": 119840
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9383152723312378,
      "learning_rate": 0.00028103449237844156,
      "loss": 3.0632,
      "step": 119841
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.54073429107666,
      "learning_rate": 0.00028103041000030664,
      "loss": 3.0603,
      "step": 119842
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.76557457447052,
      "learning_rate": 0.0002810263276256985,
      "loss": 2.7729,
      "step": 119843
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.815087080001831,
      "learning_rate": 0.0002810222452546182,
      "loss": 3.0752,
      "step": 119844
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.414052724838257,
      "learning_rate": 0.00028101816288706594,
      "loss": 3.0368,
      "step": 119845
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3099842071533203,
      "learning_rate": 0.0002810140805230428,
      "loss": 2.7991,
      "step": 119846
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9832903146743774,
      "learning_rate": 0.00028100999816254956,
      "loss": 2.9649,
      "step": 119847
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0068116188049316,
      "learning_rate": 0.0002810059158055868,
      "loss": 2.9883,
      "step": 119848
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0631871223449707,
      "learning_rate": 0.0002810018334521555,
      "loss": 2.9517,
      "step": 119849
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9428322315216064,
      "learning_rate": 0.00028099775110225646,
      "loss": 3.2276,
      "step": 119850
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.963864326477051,
      "learning_rate": 0.0002809936687558901,
      "loss": 3.0101,
      "step": 119851
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9536948204040527,
      "learning_rate": 0.0002809895864130574,
      "loss": 2.9548,
      "step": 119852
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9333418607711792,
      "learning_rate": 0.0002809855040737591,
      "loss": 3.0874,
      "step": 119853
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.101400136947632,
      "learning_rate": 0.00028098142173799595,
      "loss": 2.8143,
      "step": 119854
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.07802414894104,
      "learning_rate": 0.0002809773394057687,
      "loss": 3.1556,
      "step": 119855
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.775647521018982,
      "learning_rate": 0.0002809732570770783,
      "loss": 3.0601,
      "step": 119856
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.767479419708252,
      "learning_rate": 0.0002809691747519251,
      "loss": 2.9218,
      "step": 119857
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1977341175079346,
      "learning_rate": 0.0002809650924303102,
      "loss": 3.029,
      "step": 119858
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.5405144691467285,
      "learning_rate": 0.0002809610101122342,
      "loss": 3.0533,
      "step": 119859
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9757267236709595,
      "learning_rate": 0.0002809569277976979,
      "loss": 2.9294,
      "step": 119860
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9828494787216187,
      "learning_rate": 0.0002809528454867021,
      "loss": 3.0685,
      "step": 119861
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.5365543365478516,
      "learning_rate": 0.0002809487631792476,
      "loss": 3.0993,
      "step": 119862
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.323268175125122,
      "learning_rate": 0.000280944680875335,
      "loss": 3.0326,
      "step": 119863
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7120283842086792,
      "learning_rate": 0.0002809405985749651,
      "loss": 2.9815,
      "step": 119864
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.035740852355957,
      "learning_rate": 0.0002809365162781388,
      "loss": 2.809,
      "step": 119865
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.3256192207336426,
      "learning_rate": 0.0002809324339848567,
      "loss": 3.0257,
      "step": 119866
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0893166065216064,
      "learning_rate": 0.0002809283516951196,
      "loss": 3.0085,
      "step": 119867
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.4801878929138184,
      "learning_rate": 0.00028092426940892843,
      "loss": 2.9254,
      "step": 119868
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.706375002861023,
      "learning_rate": 0.00028092018712628364,
      "loss": 2.9337,
      "step": 119869
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7223337888717651,
      "learning_rate": 0.0002809161048471862,
      "loss": 2.9286,
      "step": 119870
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.268333911895752,
      "learning_rate": 0.00028091202257163675,
      "loss": 2.9886,
      "step": 119871
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0948522090911865,
      "learning_rate": 0.00028090794029963617,
      "loss": 3.0471,
      "step": 119872
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0825743675231934,
      "learning_rate": 0.00028090385803118516,
      "loss": 3.2443,
      "step": 119873
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2411506175994873,
      "learning_rate": 0.0002808997757662846,
      "loss": 3.1716,
      "step": 119874
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.9948835372924805,
      "learning_rate": 0.000280895693504935,
      "loss": 3.0453,
      "step": 119875
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9090702533721924,
      "learning_rate": 0.00028089161124713723,
      "loss": 2.9916,
      "step": 119876
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3363587856292725,
      "learning_rate": 0.0002808875289928921,
      "loss": 2.9387,
      "step": 119877
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.152097463607788,
      "learning_rate": 0.0002808834467422003,
      "loss": 3.032,
      "step": 119878
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6705749034881592,
      "learning_rate": 0.0002808793644950627,
      "loss": 2.9141,
      "step": 119879
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8154503107070923,
      "learning_rate": 0.00028087528225148,
      "loss": 3.0717,
      "step": 119880
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7580889463424683,
      "learning_rate": 0.00028087120001145296,
      "loss": 2.77,
      "step": 119881
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.6504952907562256,
      "learning_rate": 0.0002808671177749823,
      "loss": 2.995,
      "step": 119882
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2965047359466553,
      "learning_rate": 0.0002808630355420687,
      "loss": 2.8648,
      "step": 119883
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.5812398195266724,
      "learning_rate": 0.0002808589533127131,
      "loss": 2.8425,
      "step": 119884
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.815937876701355,
      "learning_rate": 0.00028085487108691617,
      "loss": 2.7664,
      "step": 119885
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9721752405166626,
      "learning_rate": 0.0002808507888646788,
      "loss": 3.0788,
      "step": 119886
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7646796703338623,
      "learning_rate": 0.0002808467066460015,
      "loss": 3.0171,
      "step": 119887
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.300509452819824,
      "learning_rate": 0.0002808426244308852,
      "loss": 3.0217,
      "step": 119888
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8812165260314941,
      "learning_rate": 0.0002808385422193306,
      "loss": 3.0263,
      "step": 119889
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.385773181915283,
      "learning_rate": 0.0002808344600113384,
      "loss": 2.9271,
      "step": 119890
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4981114864349365,
      "learning_rate": 0.00028083037780690954,
      "loss": 3.0846,
      "step": 119891
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4827122688293457,
      "learning_rate": 0.0002808262956060447,
      "loss": 3.0718,
      "step": 119892
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.412123918533325,
      "learning_rate": 0.0002808222134087445,
      "loss": 3.0289,
      "step": 119893
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0717689990997314,
      "learning_rate": 0.0002808181312150099,
      "loss": 2.9194,
      "step": 119894
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.880115270614624,
      "learning_rate": 0.0002808140490248416,
      "loss": 2.8844,
      "step": 119895
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9653650522232056,
      "learning_rate": 0.00028080996683824025,
      "loss": 2.8997,
      "step": 119896
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9847867488861084,
      "learning_rate": 0.0002808058846552067,
      "loss": 3.191,
      "step": 119897
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7737576961517334,
      "learning_rate": 0.0002808018024757418,
      "loss": 3.2045,
      "step": 119898
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3509533405303955,
      "learning_rate": 0.0002807977202998461,
      "loss": 2.9403,
      "step": 119899
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.114034414291382,
      "learning_rate": 0.00028079363812752044,
      "loss": 3.3185,
      "step": 119900
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0235445499420166,
      "learning_rate": 0.00028078955595876566,
      "loss": 2.83,
      "step": 119901
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3416125774383545,
      "learning_rate": 0.00028078547379358247,
      "loss": 3.028,
      "step": 119902
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.7384872436523438,
      "learning_rate": 0.0002807813916319716,
      "loss": 3.0268,
      "step": 119903
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7781574726104736,
      "learning_rate": 0.0002807773094739339,
      "loss": 2.7915,
      "step": 119904
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.5271462202072144,
      "learning_rate": 0.00028077322731947,
      "loss": 2.8041,
      "step": 119905
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.0948550701141357,
      "learning_rate": 0.0002807691451685807,
      "loss": 2.896,
      "step": 119906
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.865877866744995,
      "learning_rate": 0.0002807650630212668,
      "loss": 3.0326,
      "step": 119907
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.149319887161255,
      "learning_rate": 0.000280760980877529,
      "loss": 2.9715,
      "step": 119908
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.516453742980957,
      "learning_rate": 0.0002807568987373681,
      "loss": 3.1022,
      "step": 119909
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.991891860961914,
      "learning_rate": 0.000280752816600785,
      "loss": 3.0083,
      "step": 119910
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8195117712020874,
      "learning_rate": 0.00028074873446778026,
      "loss": 2.8558,
      "step": 119911
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0012471675872803,
      "learning_rate": 0.0002807446523383546,
      "loss": 3.0416,
      "step": 119912
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9437694549560547,
      "learning_rate": 0.0002807405702125089,
      "loss": 3.0307,
      "step": 119913
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9296478033065796,
      "learning_rate": 0.0002807364880902439,
      "loss": 3.1295,
      "step": 119914
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1274216175079346,
      "learning_rate": 0.0002807324059715603,
      "loss": 3.0946,
      "step": 119915
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4383509159088135,
      "learning_rate": 0.00028072832385645896,
      "loss": 3.0364,
      "step": 119916
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7184131145477295,
      "learning_rate": 0.00028072424174494073,
      "loss": 2.968,
      "step": 119917
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4030354022979736,
      "learning_rate": 0.00028072015963700607,
      "loss": 2.9596,
      "step": 119918
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.3120827674865723,
      "learning_rate": 0.0002807160775326559,
      "loss": 3.1373,
      "step": 119919
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.238539218902588,
      "learning_rate": 0.00028071199543189094,
      "loss": 3.1667,
      "step": 119920
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9188716411590576,
      "learning_rate": 0.00028070791333471205,
      "loss": 3.1336,
      "step": 119921
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.69567608833313,
      "learning_rate": 0.00028070383124111987,
      "loss": 2.9391,
      "step": 119922
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.1187658309936523,
      "learning_rate": 0.00028069974915111536,
      "loss": 2.9979,
      "step": 119923
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0256378650665283,
      "learning_rate": 0.0002806956670646991,
      "loss": 3.0008,
      "step": 119924
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7996838092803955,
      "learning_rate": 0.0002806915849818717,
      "loss": 2.8489,
      "step": 119925
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.2757370471954346,
      "learning_rate": 0.0002806875029026342,
      "loss": 3.0157,
      "step": 119926
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2929961681365967,
      "learning_rate": 0.00028068342082698724,
      "loss": 3.0631,
      "step": 119927
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.894266963005066,
      "learning_rate": 0.0002806793387549316,
      "loss": 3.2616,
      "step": 119928
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6391030550003052,
      "learning_rate": 0.0002806752566864682,
      "loss": 3.0802,
      "step": 119929
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8781019449234009,
      "learning_rate": 0.0002806711746215974,
      "loss": 3.0138,
      "step": 119930
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.994041085243225,
      "learning_rate": 0.0002806670925603203,
      "loss": 2.8637,
      "step": 119931
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.156442165374756,
      "learning_rate": 0.0002806630105026375,
      "loss": 2.9642,
      "step": 119932
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9432488679885864,
      "learning_rate": 0.00028065892844854985,
      "loss": 2.7259,
      "step": 119933
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6737289428710938,
      "learning_rate": 0.00028065484639805806,
      "loss": 2.657,
      "step": 119934
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.840544581413269,
      "learning_rate": 0.000280650764351163,
      "loss": 3.0413,
      "step": 119935
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.03680419921875,
      "learning_rate": 0.0002806466823078652,
      "loss": 3.2528,
      "step": 119936
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1509475708007812,
      "learning_rate": 0.0002806426002681656,
      "loss": 3.384,
      "step": 119937
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.7084107398986816,
      "learning_rate": 0.00028063851823206484,
      "loss": 2.9814,
      "step": 119938
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.04463267326355,
      "learning_rate": 0.00028063443619956376,
      "loss": 2.8864,
      "step": 119939
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7461365461349487,
      "learning_rate": 0.00028063035417066315,
      "loss": 3.0943,
      "step": 119940
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.7568843364715576,
      "learning_rate": 0.0002806262721453638,
      "loss": 2.8954,
      "step": 119941
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.115811824798584,
      "learning_rate": 0.0002806221901236663,
      "loss": 3.2085,
      "step": 119942
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.328230857849121,
      "learning_rate": 0.0002806181081055715,
      "loss": 2.8078,
      "step": 119943
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.9910829067230225,
      "learning_rate": 0.0002806140260910801,
      "loss": 2.8057,
      "step": 119944
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.030468225479126,
      "learning_rate": 0.00028060994408019295,
      "loss": 2.9444,
      "step": 119945
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.445571184158325,
      "learning_rate": 0.0002806058620729108,
      "loss": 3.2285,
      "step": 119946
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.362168550491333,
      "learning_rate": 0.00028060178006923453,
      "loss": 2.9246,
      "step": 119947
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8892197608947754,
      "learning_rate": 0.0002805976980691646,
      "loss": 3.003,
      "step": 119948
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.3160860538482666,
      "learning_rate": 0.0002805936160727019,
      "loss": 3.3814,
      "step": 119949
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7951157093048096,
      "learning_rate": 0.0002805895340798472,
      "loss": 3.0009,
      "step": 119950
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.0394551753997803,
      "learning_rate": 0.00028058545209060136,
      "loss": 2.9719,
      "step": 119951
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6323055028915405,
      "learning_rate": 0.000280581370104965,
      "loss": 2.8907,
      "step": 119952
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.936736822128296,
      "learning_rate": 0.00028057728812293903,
      "loss": 2.7571,
      "step": 119953
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9020302295684814,
      "learning_rate": 0.00028057320614452403,
      "loss": 2.8575,
      "step": 119954
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1756842136383057,
      "learning_rate": 0.00028056912416972083,
      "loss": 3.2812,
      "step": 119955
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.5950874090194702,
      "learning_rate": 0.00028056504219853013,
      "loss": 2.7516,
      "step": 119956
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.407869338989258,
      "learning_rate": 0.0002805609602309528,
      "loss": 2.8091,
      "step": 119957
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.8367788791656494,
      "learning_rate": 0.00028055687826698957,
      "loss": 2.76,
      "step": 119958
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7313416004180908,
      "learning_rate": 0.00028055279630664133,
      "loss": 2.9818,
      "step": 119959
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.6119987964630127,
      "learning_rate": 0.00028054871434990853,
      "loss": 2.8329,
      "step": 119960
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.466984272003174,
      "learning_rate": 0.00028054463239679214,
      "loss": 2.9396,
      "step": 119961
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.033761501312256,
      "learning_rate": 0.0002805405504472928,
      "loss": 3.031,
      "step": 119962
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.854522943496704,
      "learning_rate": 0.00028053646850141134,
      "loss": 3.0636,
      "step": 119963
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0119211673736572,
      "learning_rate": 0.00028053238655914857,
      "loss": 2.8317,
      "step": 119964
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9460608959197998,
      "learning_rate": 0.0002805283046205053,
      "loss": 3.143,
      "step": 119965
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.6225411891937256,
      "learning_rate": 0.0002805242226854821,
      "loss": 2.9255,
      "step": 119966
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.8131163120269775,
      "learning_rate": 0.0002805201407540797,
      "loss": 2.8573,
      "step": 119967
    },
    {
      "epoch": 1.56,
      "grad_norm": 4.450626850128174,
      "learning_rate": 0.00028051605882629905,
      "loss": 2.9243,
      "step": 119968
    },
    {
      "epoch": 1.56,
      "grad_norm": 4.833581447601318,
      "learning_rate": 0.00028051197690214083,
      "loss": 3.025,
      "step": 119969
    },
    {
      "epoch": 1.56,
      "grad_norm": 5.91958475112915,
      "learning_rate": 0.00028050789498160585,
      "loss": 2.9874,
      "step": 119970
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.467590093612671,
      "learning_rate": 0.0002805038130646948,
      "loss": 3.3274,
      "step": 119971
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4136862754821777,
      "learning_rate": 0.0002804997311514085,
      "loss": 2.843,
      "step": 119972
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.678903341293335,
      "learning_rate": 0.00028049564924174757,
      "loss": 3.1668,
      "step": 119973
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9049911499023438,
      "learning_rate": 0.0002804915673357129,
      "loss": 3.0345,
      "step": 119974
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2435433864593506,
      "learning_rate": 0.0002804874854333052,
      "loss": 3.0276,
      "step": 119975
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.551720142364502,
      "learning_rate": 0.0002804834035345253,
      "loss": 2.8011,
      "step": 119976
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0791780948638916,
      "learning_rate": 0.00028047932163937386,
      "loss": 3.1507,
      "step": 119977
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.774299144744873,
      "learning_rate": 0.00028047523974785175,
      "loss": 3.0601,
      "step": 119978
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.3650238513946533,
      "learning_rate": 0.00028047115785995955,
      "loss": 2.9828,
      "step": 119979
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8608546257019043,
      "learning_rate": 0.0002804670759756983,
      "loss": 3.062,
      "step": 119980
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7242532968521118,
      "learning_rate": 0.00028046299409506846,
      "loss": 2.8994,
      "step": 119981
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9539986848831177,
      "learning_rate": 0.0002804589122180709,
      "loss": 2.9509,
      "step": 119982
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.9082770347595215,
      "learning_rate": 0.0002804548303447064,
      "loss": 2.9133,
      "step": 119983
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.5103280544281006,
      "learning_rate": 0.0002804507484749758,
      "loss": 3.1006,
      "step": 119984
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.071640968322754,
      "learning_rate": 0.00028044666660887975,
      "loss": 3.1435,
      "step": 119985
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9972527027130127,
      "learning_rate": 0.000280442584746419,
      "loss": 3.2284,
      "step": 119986
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3793559074401855,
      "learning_rate": 0.00028043850288759435,
      "loss": 3.076,
      "step": 119987
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9664591550827026,
      "learning_rate": 0.00028043442103240656,
      "loss": 3.0104,
      "step": 119988
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.736545443534851,
      "learning_rate": 0.00028043033918085633,
      "loss": 2.9967,
      "step": 119989
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0775938034057617,
      "learning_rate": 0.0002804262573329446,
      "loss": 2.7342,
      "step": 119990
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.019948959350586,
      "learning_rate": 0.0002804221754886719,
      "loss": 2.8275,
      "step": 119991
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8443704843521118,
      "learning_rate": 0.00028041809364803903,
      "loss": 3.048,
      "step": 119992
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8038794994354248,
      "learning_rate": 0.0002804140118110469,
      "loss": 2.9131,
      "step": 119993
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.16759991645813,
      "learning_rate": 0.00028040992997769616,
      "loss": 2.8858,
      "step": 119994
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.8410534858703613,
      "learning_rate": 0.00028040584814798756,
      "loss": 2.9749,
      "step": 119995
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6778199672698975,
      "learning_rate": 0.00028040176632192194,
      "loss": 2.9681,
      "step": 119996
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.6870498657226562,
      "learning_rate": 0.0002803976844995,
      "loss": 2.7329,
      "step": 119997
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.5669138431549072,
      "learning_rate": 0.0002803936026807224,
      "loss": 2.9228,
      "step": 119998
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4282593727111816,
      "learning_rate": 0.00028038952086559005,
      "loss": 2.8504,
      "step": 119999
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9127622842788696,
      "learning_rate": 0.00028038543905410364,
      "loss": 3.0304,
      "step": 120000
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6266167163848877,
      "learning_rate": 0.000280381357246264,
      "loss": 2.913,
      "step": 120001
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.6556015014648438,
      "learning_rate": 0.0002803772754420719,
      "loss": 2.9159,
      "step": 120002
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.2857885360717773,
      "learning_rate": 0.0002803731936415279,
      "loss": 3.128,
      "step": 120003
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.5405406951904297,
      "learning_rate": 0.00028036911184463293,
      "loss": 2.949,
      "step": 120004
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9341440200805664,
      "learning_rate": 0.00028036503005138773,
      "loss": 2.9317,
      "step": 120005
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.830486297607422,
      "learning_rate": 0.000280360948261793,
      "loss": 3.2278,
      "step": 120006
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.361924886703491,
      "learning_rate": 0.00028035686647584956,
      "loss": 2.875,
      "step": 120007
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.146806478500366,
      "learning_rate": 0.0002803527846935583,
      "loss": 3.2663,
      "step": 120008
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.258148431777954,
      "learning_rate": 0.0002803487029149196,
      "loss": 2.9505,
      "step": 120009
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.681970238685608,
      "learning_rate": 0.00028034462113993454,
      "loss": 3.1422,
      "step": 120010
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2352280616760254,
      "learning_rate": 0.0002803405393686038,
      "loss": 3.2405,
      "step": 120011
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2589166164398193,
      "learning_rate": 0.0002803364576009281,
      "loss": 2.9595,
      "step": 120012
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.6849610805511475,
      "learning_rate": 0.0002803323758369082,
      "loss": 3.0034,
      "step": 120013
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.756382703781128,
      "learning_rate": 0.000280328294076545,
      "loss": 3.0282,
      "step": 120014
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8737523555755615,
      "learning_rate": 0.00028032421231983905,
      "loss": 2.9875,
      "step": 120015
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4839320182800293,
      "learning_rate": 0.0002803201305667912,
      "loss": 3.0698,
      "step": 120016
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.47280216217041,
      "learning_rate": 0.0002803160488174022,
      "loss": 3.0335,
      "step": 120017
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0622148513793945,
      "learning_rate": 0.0002803119670716728,
      "loss": 3.0036,
      "step": 120018
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1193058490753174,
      "learning_rate": 0.0002803078853296038,
      "loss": 2.9512,
      "step": 120019
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8332736492156982,
      "learning_rate": 0.00028030380359119606,
      "loss": 2.891,
      "step": 120020
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9851059913635254,
      "learning_rate": 0.00028029972185645007,
      "loss": 3.1114,
      "step": 120021
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3041136264801025,
      "learning_rate": 0.00028029564012536673,
      "loss": 2.8976,
      "step": 120022
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.349774122238159,
      "learning_rate": 0.00028029155839794686,
      "loss": 3.053,
      "step": 120023
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.284393787384033,
      "learning_rate": 0.0002802874766741911,
      "loss": 2.9242,
      "step": 120024
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.891802430152893,
      "learning_rate": 0.0002802833949541003,
      "loss": 3.2617,
      "step": 120025
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9574652910232544,
      "learning_rate": 0.0002802793132376753,
      "loss": 3.0259,
      "step": 120026
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9762804508209229,
      "learning_rate": 0.00028027523152491667,
      "loss": 2.7492,
      "step": 120027
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0087571144104004,
      "learning_rate": 0.0002802711498158252,
      "loss": 3.2327,
      "step": 120028
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.816030263900757,
      "learning_rate": 0.00028026706811040166,
      "loss": 2.8499,
      "step": 120029
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0994694232940674,
      "learning_rate": 0.0002802629864086469,
      "loss": 2.943,
      "step": 120030
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.618344783782959,
      "learning_rate": 0.0002802589047105616,
      "loss": 3.0638,
      "step": 120031
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7519773244857788,
      "learning_rate": 0.0002802548230161467,
      "loss": 2.8742,
      "step": 120032
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9012417793273926,
      "learning_rate": 0.00028025074132540266,
      "loss": 3.1238,
      "step": 120033
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.945906639099121,
      "learning_rate": 0.00028024665963833034,
      "loss": 2.7585,
      "step": 120034
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1423490047454834,
      "learning_rate": 0.0002802425779549306,
      "loss": 2.7775,
      "step": 120035
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.844543695449829,
      "learning_rate": 0.00028023849627520413,
      "loss": 3.3123,
      "step": 120036
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.572632312774658,
      "learning_rate": 0.00028023441459915164,
      "loss": 2.9525,
      "step": 120037
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4429163932800293,
      "learning_rate": 0.0002802303329267741,
      "loss": 3.0315,
      "step": 120038
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.570801258087158,
      "learning_rate": 0.00028022625125807203,
      "loss": 3.1636,
      "step": 120039
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.975234866142273,
      "learning_rate": 0.0002802221695930462,
      "loss": 2.9599,
      "step": 120040
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0192370414733887,
      "learning_rate": 0.0002802180879316975,
      "loss": 3.0098,
      "step": 120041
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.09403920173645,
      "learning_rate": 0.00028021400627402664,
      "loss": 2.9921,
      "step": 120042
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7974038124084473,
      "learning_rate": 0.0002802099246200343,
      "loss": 3.1428,
      "step": 120043
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0044760704040527,
      "learning_rate": 0.0002802058429697214,
      "loss": 2.8755,
      "step": 120044
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7489391565322876,
      "learning_rate": 0.00028020176132308866,
      "loss": 2.8637,
      "step": 120045
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.233218193054199,
      "learning_rate": 0.0002801976796801367,
      "loss": 2.9826,
      "step": 120046
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8395018577575684,
      "learning_rate": 0.0002801935980408663,
      "loss": 3.0024,
      "step": 120047
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.738199234008789,
      "learning_rate": 0.00028018951640527837,
      "loss": 2.9998,
      "step": 120048
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.847109079360962,
      "learning_rate": 0.0002801854347733735,
      "loss": 3.0659,
      "step": 120049
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.807416319847107,
      "learning_rate": 0.0002801813531451526,
      "loss": 3.119,
      "step": 120050
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8633400201797485,
      "learning_rate": 0.0002801772715206165,
      "loss": 2.8522,
      "step": 120051
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0330872535705566,
      "learning_rate": 0.0002801731898997656,
      "loss": 3.1239,
      "step": 120052
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.927600622177124,
      "learning_rate": 0.00028016910828260096,
      "loss": 3.3201,
      "step": 120053
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.35422420501709,
      "learning_rate": 0.00028016502666912326,
      "loss": 3.0138,
      "step": 120054
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.233457565307617,
      "learning_rate": 0.0002801609450593332,
      "loss": 3.0298,
      "step": 120055
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7410963773727417,
      "learning_rate": 0.0002801568634532316,
      "loss": 3.2723,
      "step": 120056
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9849944114685059,
      "learning_rate": 0.0002801527818508194,
      "loss": 2.8819,
      "step": 120057
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8134962320327759,
      "learning_rate": 0.000280148700252097,
      "loss": 2.9266,
      "step": 120058
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8529534339904785,
      "learning_rate": 0.0002801446186570653,
      "loss": 2.9034,
      "step": 120059
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.6135523319244385,
      "learning_rate": 0.0002801405370657252,
      "loss": 3.0535,
      "step": 120060
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.835991621017456,
      "learning_rate": 0.00028013645547807724,
      "loss": 2.7102,
      "step": 120061
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.743828296661377,
      "learning_rate": 0.00028013237389412236,
      "loss": 3.0108,
      "step": 120062
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.078781843185425,
      "learning_rate": 0.00028012829231386125,
      "loss": 2.9102,
      "step": 120063
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.922882080078125,
      "learning_rate": 0.0002801242107372947,
      "loss": 2.7432,
      "step": 120064
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.333500862121582,
      "learning_rate": 0.0002801201291644234,
      "loss": 2.8561,
      "step": 120065
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.140275001525879,
      "learning_rate": 0.00028011604759524807,
      "loss": 3.1386,
      "step": 120066
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.215451240539551,
      "learning_rate": 0.00028011196602976957,
      "loss": 2.9725,
      "step": 120067
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.964341402053833,
      "learning_rate": 0.0002801078844679887,
      "loss": 2.9016,
      "step": 120068
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7945328950881958,
      "learning_rate": 0.0002801038029099061,
      "loss": 3.0722,
      "step": 120069
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8572674989700317,
      "learning_rate": 0.0002800997213555226,
      "loss": 3.139,
      "step": 120070
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9202574491500854,
      "learning_rate": 0.0002800956398048389,
      "loss": 3.0238,
      "step": 120071
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8153188228607178,
      "learning_rate": 0.00028009155825785585,
      "loss": 2.9628,
      "step": 120072
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8032363653182983,
      "learning_rate": 0.0002800874767145741,
      "loss": 3.0369,
      "step": 120073
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7670800685882568,
      "learning_rate": 0.0002800833951749945,
      "loss": 2.9401,
      "step": 120074
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1367502212524414,
      "learning_rate": 0.0002800793136391178,
      "loss": 2.8399,
      "step": 120075
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9344677925109863,
      "learning_rate": 0.0002800752321069447,
      "loss": 2.8343,
      "step": 120076
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.973871946334839,
      "learning_rate": 0.0002800711505784759,
      "loss": 3.1279,
      "step": 120077
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.829561710357666,
      "learning_rate": 0.00028006706905371235,
      "loss": 2.7329,
      "step": 120078
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3114497661590576,
      "learning_rate": 0.00028006298753265475,
      "loss": 3.2909,
      "step": 120079
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.6977601051330566,
      "learning_rate": 0.00028005890601530376,
      "loss": 3.0074,
      "step": 120080
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3828935623168945,
      "learning_rate": 0.0002800548245016602,
      "loss": 2.6241,
      "step": 120081
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9648903608322144,
      "learning_rate": 0.00028005074299172484,
      "loss": 3.4993,
      "step": 120082
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4286842346191406,
      "learning_rate": 0.00028004666148549837,
      "loss": 2.8796,
      "step": 120083
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8827356100082397,
      "learning_rate": 0.0002800425799829816,
      "loss": 3.3012,
      "step": 120084
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0174739360809326,
      "learning_rate": 0.0002800384984841753,
      "loss": 3.0368,
      "step": 120085
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.5216987133026123,
      "learning_rate": 0.0002800344169890803,
      "loss": 2.9545,
      "step": 120086
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0739407539367676,
      "learning_rate": 0.0002800303354976973,
      "loss": 3.0553,
      "step": 120087
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8980821371078491,
      "learning_rate": 0.00028002625401002694,
      "loss": 3.0454,
      "step": 120088
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.9085750579833984,
      "learning_rate": 0.00028002217252607003,
      "loss": 2.8871,
      "step": 120089
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.8262779712677,
      "learning_rate": 0.00028001809104582744,
      "loss": 2.8778,
      "step": 120090
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9895209074020386,
      "learning_rate": 0.00028001400956929987,
      "loss": 3.1345,
      "step": 120091
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9529993534088135,
      "learning_rate": 0.000280009928096488,
      "loss": 3.0564,
      "step": 120092
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.1129233837127686,
      "learning_rate": 0.0002800058466273929,
      "loss": 3.0618,
      "step": 120093
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.5824673175811768,
      "learning_rate": 0.0002800017651620149,
      "loss": 3.2078,
      "step": 120094
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.004254102706909,
      "learning_rate": 0.00027999768370035494,
      "loss": 3.0283,
      "step": 120095
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1975133419036865,
      "learning_rate": 0.0002799936022424138,
      "loss": 2.9031,
      "step": 120096
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8593158721923828,
      "learning_rate": 0.00027998952078819227,
      "loss": 3.0305,
      "step": 120097
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.385223627090454,
      "learning_rate": 0.000279985439337691,
      "loss": 2.7665,
      "step": 120098
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.699798107147217,
      "learning_rate": 0.000279981357890911,
      "loss": 3.0374,
      "step": 120099
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7587310075759888,
      "learning_rate": 0.0002799772764478526,
      "loss": 3.2081,
      "step": 120100
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.55859637260437,
      "learning_rate": 0.0002799731950085169,
      "loss": 3.0763,
      "step": 120101
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.790696144104004,
      "learning_rate": 0.0002799691135729046,
      "loss": 2.9966,
      "step": 120102
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7385069131851196,
      "learning_rate": 0.0002799650321410164,
      "loss": 3.0647,
      "step": 120103
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.024803876876831,
      "learning_rate": 0.000279960950712853,
      "loss": 2.9718,
      "step": 120104
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2485299110412598,
      "learning_rate": 0.00027995686928841543,
      "loss": 2.932,
      "step": 120105
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7712558507919312,
      "learning_rate": 0.00027995278786770405,
      "loss": 2.9493,
      "step": 120106
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0566701889038086,
      "learning_rate": 0.00027994870645071994,
      "loss": 2.8365,
      "step": 120107
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6749593019485474,
      "learning_rate": 0.00027994462503746364,
      "loss": 2.6333,
      "step": 120108
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.099626302719116,
      "learning_rate": 0.0002799405436279361,
      "loss": 2.9671,
      "step": 120109
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.362887382507324,
      "learning_rate": 0.00027993646222213795,
      "loss": 2.7428,
      "step": 120110
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0149893760681152,
      "learning_rate": 0.00027993238082006997,
      "loss": 3.0936,
      "step": 120111
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.5896644592285156,
      "learning_rate": 0.0002799282994217331,
      "loss": 2.8649,
      "step": 120112
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9705402851104736,
      "learning_rate": 0.00027992421802712784,
      "loss": 3.154,
      "step": 120113
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7530181407928467,
      "learning_rate": 0.000279920136636255,
      "loss": 2.8221,
      "step": 120114
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.974582314491272,
      "learning_rate": 0.0002799160552491154,
      "loss": 2.961,
      "step": 120115
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.944478988647461,
      "learning_rate": 0.00027991197386570974,
      "loss": 3.1549,
      "step": 120116
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6224889755249023,
      "learning_rate": 0.0002799078924860389,
      "loss": 2.9866,
      "step": 120117
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9086395502090454,
      "learning_rate": 0.00027990381111010364,
      "loss": 3.0694,
      "step": 120118
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0743117332458496,
      "learning_rate": 0.0002798997297379045,
      "loss": 2.9662,
      "step": 120119
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7742615938186646,
      "learning_rate": 0.00027989564836944245,
      "loss": 3.0499,
      "step": 120120
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0845770835876465,
      "learning_rate": 0.00027989156700471813,
      "loss": 3.1023,
      "step": 120121
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9362748861312866,
      "learning_rate": 0.00027988748564373235,
      "loss": 2.9208,
      "step": 120122
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8567781448364258,
      "learning_rate": 0.00027988340428648583,
      "loss": 3.0688,
      "step": 120123
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.295790433883667,
      "learning_rate": 0.00027987932293297953,
      "loss": 2.8766,
      "step": 120124
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.955251693725586,
      "learning_rate": 0.00027987524158321395,
      "loss": 3.0119,
      "step": 120125
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0722005367279053,
      "learning_rate": 0.0002798711602371899,
      "loss": 2.957,
      "step": 120126
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9418145418167114,
      "learning_rate": 0.0002798670788949081,
      "loss": 2.9938,
      "step": 120127
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2647898197174072,
      "learning_rate": 0.00027986299755636957,
      "loss": 3.1084,
      "step": 120128
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6861923933029175,
      "learning_rate": 0.00027985891622157475,
      "loss": 3.2009,
      "step": 120129
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8811509609222412,
      "learning_rate": 0.0002798548348905247,
      "loss": 2.7509,
      "step": 120130
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.185481309890747,
      "learning_rate": 0.0002798507535632199,
      "loss": 2.8892,
      "step": 120131
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8271610736846924,
      "learning_rate": 0.0002798466722396612,
      "loss": 3.0271,
      "step": 120132
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.2328505516052246,
      "learning_rate": 0.00027984259091984944,
      "loss": 3.1186,
      "step": 120133
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3884425163269043,
      "learning_rate": 0.0002798385096037853,
      "loss": 3.1994,
      "step": 120134
    },
    {
      "epoch": 1.56,
      "grad_norm": 4.621956825256348,
      "learning_rate": 0.0002798344282914696,
      "loss": 2.9951,
      "step": 120135
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.248577833175659,
      "learning_rate": 0.0002798303469829031,
      "loss": 3.1292,
      "step": 120136
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.434861183166504,
      "learning_rate": 0.0002798262656780864,
      "loss": 2.9288,
      "step": 120137
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0765750408172607,
      "learning_rate": 0.0002798221843770205,
      "loss": 3.0546,
      "step": 120138
    },
    {
      "epoch": 1.56,
      "grad_norm": 5.188354969024658,
      "learning_rate": 0.00027981810307970594,
      "loss": 2.7753,
      "step": 120139
    },
    {
      "epoch": 1.56,
      "grad_norm": 4.701647758483887,
      "learning_rate": 0.00027981402178614356,
      "loss": 2.7567,
      "step": 120140
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0362119674682617,
      "learning_rate": 0.00027980994049633414,
      "loss": 2.8231,
      "step": 120141
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9924840927124023,
      "learning_rate": 0.0002798058592102786,
      "loss": 2.9501,
      "step": 120142
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4728708267211914,
      "learning_rate": 0.0002798017779279774,
      "loss": 3.0951,
      "step": 120143
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.5157299041748047,
      "learning_rate": 0.00027979769664943137,
      "loss": 3.2118,
      "step": 120144
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.7425966262817383,
      "learning_rate": 0.0002797936153746414,
      "loss": 3.0062,
      "step": 120145
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1850569248199463,
      "learning_rate": 0.00027978953410360814,
      "loss": 3.1516,
      "step": 120146
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.353562831878662,
      "learning_rate": 0.0002797854528363324,
      "loss": 2.8883,
      "step": 120147
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8733450174331665,
      "learning_rate": 0.000279781371572815,
      "loss": 2.714,
      "step": 120148
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.859520673751831,
      "learning_rate": 0.0002797772903130566,
      "loss": 2.6789,
      "step": 120149
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8345617055892944,
      "learning_rate": 0.00027977320905705794,
      "loss": 3.1112,
      "step": 120150
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6776331663131714,
      "learning_rate": 0.0002797691278048198,
      "loss": 3.0738,
      "step": 120151
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9972304105758667,
      "learning_rate": 0.000279765046556343,
      "loss": 3.0418,
      "step": 120152
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.202753782272339,
      "learning_rate": 0.00027976096531162826,
      "loss": 2.9552,
      "step": 120153
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7413276433944702,
      "learning_rate": 0.0002797568840706764,
      "loss": 3.0655,
      "step": 120154
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.078826427459717,
      "learning_rate": 0.000279752802833488,
      "loss": 2.6178,
      "step": 120155
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9681243896484375,
      "learning_rate": 0.000279748721600064,
      "loss": 2.894,
      "step": 120156
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.076231002807617,
      "learning_rate": 0.0002797446403704051,
      "loss": 3.0281,
      "step": 120157
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.0945630073547363,
      "learning_rate": 0.000279740559144512,
      "loss": 2.896,
      "step": 120158
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.4161553382873535,
      "learning_rate": 0.0002797364779223855,
      "loss": 2.7441,
      "step": 120159
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.822841763496399,
      "learning_rate": 0.0002797323967040265,
      "loss": 3.1075,
      "step": 120160
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.981947660446167,
      "learning_rate": 0.00027972831548943555,
      "loss": 2.9509,
      "step": 120161
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9598537683486938,
      "learning_rate": 0.00027972423427861346,
      "loss": 2.9103,
      "step": 120162
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.278809070587158,
      "learning_rate": 0.0002797201530715611,
      "loss": 3.0882,
      "step": 120163
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8476059436798096,
      "learning_rate": 0.00027971607186827907,
      "loss": 2.7093,
      "step": 120164
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6560248136520386,
      "learning_rate": 0.0002797119906687682,
      "loss": 2.9891,
      "step": 120165
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7413792610168457,
      "learning_rate": 0.00027970790947302935,
      "loss": 2.8121,
      "step": 120166
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.6471238136291504,
      "learning_rate": 0.00027970382828106306,
      "loss": 2.9733,
      "step": 120167
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.1399741172790527,
      "learning_rate": 0.00027969974709287027,
      "loss": 3.1127,
      "step": 120168
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.273283004760742,
      "learning_rate": 0.00027969566590845164,
      "loss": 2.9068,
      "step": 120169
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.358302354812622,
      "learning_rate": 0.000279691584727808,
      "loss": 2.9682,
      "step": 120170
    },
    {
      "epoch": 1.56,
      "grad_norm": 5.12054967880249,
      "learning_rate": 0.00027968750355094013,
      "loss": 3.1796,
      "step": 120171
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.422403335571289,
      "learning_rate": 0.0002796834223778487,
      "loss": 3.031,
      "step": 120172
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9710590839385986,
      "learning_rate": 0.0002796793412085345,
      "loss": 2.954,
      "step": 120173
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.0561890602111816,
      "learning_rate": 0.00027967526004299826,
      "loss": 3.0217,
      "step": 120174
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.1997621059417725,
      "learning_rate": 0.0002796711788812408,
      "loss": 3.1723,
      "step": 120175
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.339282751083374,
      "learning_rate": 0.00027966709772326285,
      "loss": 2.7742,
      "step": 120176
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8032346963882446,
      "learning_rate": 0.0002796630165690651,
      "loss": 3.0529,
      "step": 120177
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.988258719444275,
      "learning_rate": 0.00027965893541864853,
      "loss": 2.6607,
      "step": 120178
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6035451889038086,
      "learning_rate": 0.00027965485427201375,
      "loss": 2.5937,
      "step": 120179
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.3100152015686035,
      "learning_rate": 0.0002796507731291614,
      "loss": 3.0971,
      "step": 120180
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.862555742263794,
      "learning_rate": 0.00027964669199009233,
      "loss": 3.0903,
      "step": 120181
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.7204574346542358,
      "learning_rate": 0.0002796426108548074,
      "loss": 3.1028,
      "step": 120182
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0758488178253174,
      "learning_rate": 0.00027963852972330723,
      "loss": 2.9284,
      "step": 120183
    },
    {
      "epoch": 1.56,
      "grad_norm": 4.494539737701416,
      "learning_rate": 0.0002796344485955927,
      "loss": 2.9178,
      "step": 120184
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.512324571609497,
      "learning_rate": 0.0002796303674716646,
      "loss": 2.9948,
      "step": 120185
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.9990943670272827,
      "learning_rate": 0.0002796262863515235,
      "loss": 3.0536,
      "step": 120186
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.5379092693328857,
      "learning_rate": 0.0002796222052351702,
      "loss": 3.0698,
      "step": 120187
    },
    {
      "epoch": 1.56,
      "grad_norm": 3.160679817199707,
      "learning_rate": 0.0002796181241226056,
      "loss": 2.9747,
      "step": 120188
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8356339931488037,
      "learning_rate": 0.00027961404301383037,
      "loss": 3.0584,
      "step": 120189
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.0294365882873535,
      "learning_rate": 0.0002796099619088452,
      "loss": 2.9149,
      "step": 120190
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.6465904712677002,
      "learning_rate": 0.0002796058808076511,
      "loss": 3.0887,
      "step": 120191
    },
    {
      "epoch": 1.56,
      "grad_norm": 2.375196933746338,
      "learning_rate": 0.00027960179971024847,
      "loss": 3.1367,
      "step": 120192
    },
    {
      "epoch": 1.56,
      "grad_norm": 1.8177571296691895,
      "learning_rate": 0.00027959771861663834,
      "loss": 2.9215,
      "step": 120193
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7880197763442993,
      "learning_rate": 0.00027959363752682134,
      "loss": 3.1302,
      "step": 120194
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9035664796829224,
      "learning_rate": 0.00027958955644079823,
      "loss": 2.8283,
      "step": 120195
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.7758376598358154,
      "learning_rate": 0.00027958547535856984,
      "loss": 2.8092,
      "step": 120196
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2337088584899902,
      "learning_rate": 0.000279581394280137,
      "loss": 2.793,
      "step": 120197
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3769078254699707,
      "learning_rate": 0.00027957731320550026,
      "loss": 3.0496,
      "step": 120198
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9507189989089966,
      "learning_rate": 0.0002795732321346605,
      "loss": 3.2624,
      "step": 120199
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.940459966659546,
      "learning_rate": 0.00027956915106761845,
      "loss": 2.8411,
      "step": 120200
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9898643493652344,
      "learning_rate": 0.00027956507000437487,
      "loss": 3.0074,
      "step": 120201
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.870251178741455,
      "learning_rate": 0.0002795609889449305,
      "loss": 3.0709,
      "step": 120202
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.009655475616455,
      "learning_rate": 0.0002795569078892863,
      "loss": 3.0096,
      "step": 120203
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.360372304916382,
      "learning_rate": 0.0002795528268374427,
      "loss": 2.8244,
      "step": 120204
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.784306287765503,
      "learning_rate": 0.00027954874578940063,
      "loss": 2.8324,
      "step": 120205
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.606147527694702,
      "learning_rate": 0.00027954466474516083,
      "loss": 2.9909,
      "step": 120206
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4535622596740723,
      "learning_rate": 0.0002795405837047241,
      "loss": 2.8376,
      "step": 120207
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.856143832206726,
      "learning_rate": 0.0002795365026680911,
      "loss": 3.0675,
      "step": 120208
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7448924779891968,
      "learning_rate": 0.0002795324216352628,
      "loss": 3.0025,
      "step": 120209
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7872226238250732,
      "learning_rate": 0.0002795283406062397,
      "loss": 2.8928,
      "step": 120210
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0633718967437744,
      "learning_rate": 0.00027952425958102265,
      "loss": 3.1103,
      "step": 120211
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9653488397598267,
      "learning_rate": 0.00027952017855961247,
      "loss": 3.1118,
      "step": 120212
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.098562240600586,
      "learning_rate": 0.0002795160975420098,
      "loss": 3.1227,
      "step": 120213
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.6103919744491577,
      "learning_rate": 0.0002795120165282155,
      "loss": 3.2149,
      "step": 120214
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4197726249694824,
      "learning_rate": 0.0002795079355182305,
      "loss": 3.0359,
      "step": 120215
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7309651374816895,
      "learning_rate": 0.00027950385451205515,
      "loss": 3.0921,
      "step": 120216
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.3810324668884277,
      "learning_rate": 0.00027949977350969044,
      "loss": 2.7613,
      "step": 120217
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.7014827728271484,
      "learning_rate": 0.00027949569251113713,
      "loss": 2.8994,
      "step": 120218
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.6244137287139893,
      "learning_rate": 0.00027949161151639594,
      "loss": 3.1248,
      "step": 120219
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8896729946136475,
      "learning_rate": 0.00027948753052546766,
      "loss": 3.1766,
      "step": 120220
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.608086109161377,
      "learning_rate": 0.0002794834495383531,
      "loss": 3.0044,
      "step": 120221
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.93192195892334,
      "learning_rate": 0.0002794793685550529,
      "loss": 3.1457,
      "step": 120222
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7268214225769043,
      "learning_rate": 0.0002794752875755679,
      "loss": 3.113,
      "step": 120223
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.087214708328247,
      "learning_rate": 0.00027947120659989875,
      "loss": 3.1374,
      "step": 120224
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.40022873878479,
      "learning_rate": 0.00027946712562804636,
      "loss": 2.9084,
      "step": 120225
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.8263840675354004,
      "learning_rate": 0.00027946304466001134,
      "loss": 3.0771,
      "step": 120226
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4756827354431152,
      "learning_rate": 0.0002794589636957947,
      "loss": 3.1325,
      "step": 120227
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9069933891296387,
      "learning_rate": 0.00027945488273539693,
      "loss": 3.1988,
      "step": 120228
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7141915559768677,
      "learning_rate": 0.00027945080177881884,
      "loss": 3.2119,
      "step": 120229
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.49712872505188,
      "learning_rate": 0.00027944672082606126,
      "loss": 3.1978,
      "step": 120230
    },
    {
      "epoch": 1.57,
      "grad_norm": 4.0044050216674805,
      "learning_rate": 0.0002794426398771249,
      "loss": 2.9799,
      "step": 120231
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8861746788024902,
      "learning_rate": 0.00027943855893201056,
      "loss": 3.003,
      "step": 120232
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7045408487319946,
      "learning_rate": 0.000279434477990719,
      "loss": 3.1061,
      "step": 120233
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.932762861251831,
      "learning_rate": 0.000279430397053251,
      "loss": 2.7263,
      "step": 120234
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.072251558303833,
      "learning_rate": 0.00027942631611960726,
      "loss": 3.0044,
      "step": 120235
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1099672317504883,
      "learning_rate": 0.00027942223518978847,
      "loss": 2.9688,
      "step": 120236
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9268550872802734,
      "learning_rate": 0.0002794181542637955,
      "loss": 3.0991,
      "step": 120237
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8592078685760498,
      "learning_rate": 0.0002794140733416291,
      "loss": 2.954,
      "step": 120238
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.228250741958618,
      "learning_rate": 0.00027940999242329,
      "loss": 2.6232,
      "step": 120239
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4492177963256836,
      "learning_rate": 0.00027940591150877903,
      "loss": 2.9392,
      "step": 120240
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7245534658432007,
      "learning_rate": 0.0002794018305980969,
      "loss": 2.7963,
      "step": 120241
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.344640016555786,
      "learning_rate": 0.00027939774969124433,
      "loss": 3.1657,
      "step": 120242
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.270237684249878,
      "learning_rate": 0.00027939366878822203,
      "loss": 2.8309,
      "step": 120243
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.089635133743286,
      "learning_rate": 0.0002793895878890309,
      "loss": 2.864,
      "step": 120244
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.331559896469116,
      "learning_rate": 0.0002793855069936716,
      "loss": 2.9985,
      "step": 120245
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1936330795288086,
      "learning_rate": 0.000279381426102145,
      "loss": 2.916,
      "step": 120246
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9892510175704956,
      "learning_rate": 0.00027937734521445173,
      "loss": 2.9388,
      "step": 120247
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.216811418533325,
      "learning_rate": 0.00027937326433059265,
      "loss": 2.9863,
      "step": 120248
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1791059970855713,
      "learning_rate": 0.0002793691834505684,
      "loss": 2.6443,
      "step": 120249
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3477587699890137,
      "learning_rate": 0.0002793651025743798,
      "loss": 3.0587,
      "step": 120250
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.970265507698059,
      "learning_rate": 0.00027936102170202765,
      "loss": 3.0714,
      "step": 120251
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.596703290939331,
      "learning_rate": 0.00027935694083351274,
      "loss": 3.2404,
      "step": 120252
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.763845205307007,
      "learning_rate": 0.00027935285996883564,
      "loss": 2.9966,
      "step": 120253
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.8882968425750732,
      "learning_rate": 0.0002793487791079973,
      "loss": 2.8532,
      "step": 120254
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9157049655914307,
      "learning_rate": 0.0002793446982509984,
      "loss": 3.1411,
      "step": 120255
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.5473318099975586,
      "learning_rate": 0.0002793406173978397,
      "loss": 3.0313,
      "step": 120256
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.024348735809326,
      "learning_rate": 0.000279336536548522,
      "loss": 3.1813,
      "step": 120257
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.899417757987976,
      "learning_rate": 0.0002793324557030461,
      "loss": 3.113,
      "step": 120258
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.089150905609131,
      "learning_rate": 0.00027932837486141255,
      "loss": 2.9225,
      "step": 120259
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9398149251937866,
      "learning_rate": 0.00027932429402362225,
      "loss": 3.0107,
      "step": 120260
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.248880386352539,
      "learning_rate": 0.000279320213189676,
      "loss": 2.9666,
      "step": 120261
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.239487648010254,
      "learning_rate": 0.0002793161323595745,
      "loss": 3.0138,
      "step": 120262
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1429667472839355,
      "learning_rate": 0.00027931205153331856,
      "loss": 3.2226,
      "step": 120263
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9846998453140259,
      "learning_rate": 0.0002793079707109089,
      "loss": 3.2434,
      "step": 120264
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.449781656265259,
      "learning_rate": 0.00027930388989234623,
      "loss": 3.1973,
      "step": 120265
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.8132917881011963,
      "learning_rate": 0.0002792998090776314,
      "loss": 3.0734,
      "step": 120266
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9652700424194336,
      "learning_rate": 0.0002792957282667651,
      "loss": 2.8696,
      "step": 120267
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.209926128387451,
      "learning_rate": 0.00027929164745974807,
      "loss": 3.0027,
      "step": 120268
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.817365288734436,
      "learning_rate": 0.00027928756665658117,
      "loss": 3.2341,
      "step": 120269
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.774066686630249,
      "learning_rate": 0.0002792834858572652,
      "loss": 3.0177,
      "step": 120270
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8928457498550415,
      "learning_rate": 0.0002792794050618007,
      "loss": 2.9803,
      "step": 120271
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.6605725288391113,
      "learning_rate": 0.00027927532427018855,
      "loss": 3.2376,
      "step": 120272
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1188244819641113,
      "learning_rate": 0.00027927124348242947,
      "loss": 2.9739,
      "step": 120273
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3608479499816895,
      "learning_rate": 0.00027926716269852434,
      "loss": 2.8415,
      "step": 120274
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3716342449188232,
      "learning_rate": 0.00027926308191847377,
      "loss": 2.9843,
      "step": 120275
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.6239429712295532,
      "learning_rate": 0.0002792590011422787,
      "loss": 3.1516,
      "step": 120276
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2569639682769775,
      "learning_rate": 0.0002792549203699397,
      "loss": 3.1751,
      "step": 120277
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.3867013454437256,
      "learning_rate": 0.00027925083960145757,
      "loss": 2.9988,
      "step": 120278
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8934839963912964,
      "learning_rate": 0.00027924675883683314,
      "loss": 2.9529,
      "step": 120279
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9953783750534058,
      "learning_rate": 0.0002792426780760671,
      "loss": 2.8448,
      "step": 120280
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3035433292388916,
      "learning_rate": 0.00027923859731916024,
      "loss": 2.9182,
      "step": 120281
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9978408813476562,
      "learning_rate": 0.00027923451656611346,
      "loss": 3.0257,
      "step": 120282
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8835694789886475,
      "learning_rate": 0.0002792304358169272,
      "loss": 2.9339,
      "step": 120283
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8656028509140015,
      "learning_rate": 0.0002792263550716024,
      "loss": 3.0225,
      "step": 120284
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8376270532608032,
      "learning_rate": 0.0002792222743301399,
      "loss": 2.9143,
      "step": 120285
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.034275770187378,
      "learning_rate": 0.0002792181935925403,
      "loss": 3.034,
      "step": 120286
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3103997707366943,
      "learning_rate": 0.00027921411285880446,
      "loss": 2.9191,
      "step": 120287
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8981966972351074,
      "learning_rate": 0.0002792100321289332,
      "loss": 2.9068,
      "step": 120288
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9314152002334595,
      "learning_rate": 0.000279205951402927,
      "loss": 2.9867,
      "step": 120289
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9208815097808838,
      "learning_rate": 0.00027920187068078693,
      "loss": 3.0086,
      "step": 120290
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0394644737243652,
      "learning_rate": 0.0002791977899625136,
      "loss": 3.1018,
      "step": 120291
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7407699823379517,
      "learning_rate": 0.00027919370924810774,
      "loss": 3.0462,
      "step": 120292
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.275679349899292,
      "learning_rate": 0.0002791896285375702,
      "loss": 3.1223,
      "step": 120293
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9619195461273193,
      "learning_rate": 0.00027918554783090184,
      "loss": 2.9271,
      "step": 120294
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3174548149108887,
      "learning_rate": 0.0002791814671281031,
      "loss": 3.0757,
      "step": 120295
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.073026180267334,
      "learning_rate": 0.00027917738642917493,
      "loss": 3.17,
      "step": 120296
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0387003421783447,
      "learning_rate": 0.0002791733057341181,
      "loss": 2.5845,
      "step": 120297
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7564361095428467,
      "learning_rate": 0.00027916922504293335,
      "loss": 3.1827,
      "step": 120298
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7649792432785034,
      "learning_rate": 0.0002791651443556214,
      "loss": 3.1579,
      "step": 120299
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9237734079360962,
      "learning_rate": 0.00027916106367218324,
      "loss": 2.9292,
      "step": 120300
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0569286346435547,
      "learning_rate": 0.00027915698299261924,
      "loss": 3.0852,
      "step": 120301
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.466977834701538,
      "learning_rate": 0.0002791529023169304,
      "loss": 2.9282,
      "step": 120302
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4476089477539062,
      "learning_rate": 0.0002791488216451174,
      "loss": 2.7284,
      "step": 120303
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3476319313049316,
      "learning_rate": 0.00027914474097718103,
      "loss": 2.9692,
      "step": 120304
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.9743614196777344,
      "learning_rate": 0.00027914066031312204,
      "loss": 2.968,
      "step": 120305
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.8134207725524902,
      "learning_rate": 0.0002791365796529412,
      "loss": 2.982,
      "step": 120306
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2342517375946045,
      "learning_rate": 0.0002791324989966394,
      "loss": 3.0006,
      "step": 120307
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.6483570337295532,
      "learning_rate": 0.00027912841834421714,
      "loss": 2.8956,
      "step": 120308
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9922692775726318,
      "learning_rate": 0.00027912433769567525,
      "loss": 2.9464,
      "step": 120309
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.8401846885681152,
      "learning_rate": 0.0002791202570510146,
      "loss": 2.9097,
      "step": 120310
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.05527663230896,
      "learning_rate": 0.00027911617641023585,
      "loss": 2.8231,
      "step": 120311
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9272977113723755,
      "learning_rate": 0.0002791120957733398,
      "loss": 3.0422,
      "step": 120312
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8436987400054932,
      "learning_rate": 0.00027910801514032733,
      "loss": 2.8806,
      "step": 120313
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.284471273422241,
      "learning_rate": 0.000279103934511199,
      "loss": 2.9442,
      "step": 120314
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.934800386428833,
      "learning_rate": 0.0002790998538859556,
      "loss": 3.0267,
      "step": 120315
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.6024365425109863,
      "learning_rate": 0.00027909577326459795,
      "loss": 2.8773,
      "step": 120316
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.783280372619629,
      "learning_rate": 0.00027909169264712675,
      "loss": 3.0136,
      "step": 120317
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9894609451293945,
      "learning_rate": 0.00027908761203354284,
      "loss": 2.7541,
      "step": 120318
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2371509075164795,
      "learning_rate": 0.00027908353142384705,
      "loss": 3.1978,
      "step": 120319
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8747514486312866,
      "learning_rate": 0.0002790794508180399,
      "loss": 3.0515,
      "step": 120320
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.144941568374634,
      "learning_rate": 0.0002790753702161223,
      "loss": 3.2028,
      "step": 120321
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.5549001693725586,
      "learning_rate": 0.00027907128961809495,
      "loss": 3.0731,
      "step": 120322
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.0045197010040283,
      "learning_rate": 0.00027906720902395865,
      "loss": 2.7696,
      "step": 120323
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.188579559326172,
      "learning_rate": 0.00027906312843371415,
      "loss": 2.9958,
      "step": 120324
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2358791828155518,
      "learning_rate": 0.00027905904784736226,
      "loss": 2.9707,
      "step": 120325
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.906806230545044,
      "learning_rate": 0.0002790549672649037,
      "loss": 3.2099,
      "step": 120326
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.837460994720459,
      "learning_rate": 0.0002790508866863392,
      "loss": 3.0518,
      "step": 120327
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.025688409805298,
      "learning_rate": 0.00027904680611166947,
      "loss": 2.96,
      "step": 120328
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.730703353881836,
      "learning_rate": 0.00027904272554089534,
      "loss": 3.0902,
      "step": 120329
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.034618854522705,
      "learning_rate": 0.00027903864497401756,
      "loss": 2.8467,
      "step": 120330
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9613648653030396,
      "learning_rate": 0.000279034564411037,
      "loss": 2.8384,
      "step": 120331
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.061338186264038,
      "learning_rate": 0.0002790304838519542,
      "loss": 3.0001,
      "step": 120332
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9352307319641113,
      "learning_rate": 0.0002790264032967701,
      "loss": 2.8647,
      "step": 120333
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.335374116897583,
      "learning_rate": 0.00027902232274548537,
      "loss": 3.1401,
      "step": 120334
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.486351251602173,
      "learning_rate": 0.00027901824219810073,
      "loss": 2.8524,
      "step": 120335
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8263927698135376,
      "learning_rate": 0.00027901416165461706,
      "loss": 2.911,
      "step": 120336
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.144291877746582,
      "learning_rate": 0.00027901008111503504,
      "loss": 3.0845,
      "step": 120337
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8533540964126587,
      "learning_rate": 0.0002790060005793554,
      "loss": 3.0428,
      "step": 120338
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8224296569824219,
      "learning_rate": 0.000279001920047579,
      "loss": 2.9428,
      "step": 120339
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1509759426116943,
      "learning_rate": 0.00027899783951970653,
      "loss": 2.9903,
      "step": 120340
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9991683959960938,
      "learning_rate": 0.0002789937589957387,
      "loss": 3.038,
      "step": 120341
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9309911727905273,
      "learning_rate": 0.0002789896784756764,
      "loss": 3.055,
      "step": 120342
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1577579975128174,
      "learning_rate": 0.00027898559795952026,
      "loss": 2.8363,
      "step": 120343
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.108184337615967,
      "learning_rate": 0.00027898151744727115,
      "loss": 2.9047,
      "step": 120344
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9149404764175415,
      "learning_rate": 0.0002789774369389297,
      "loss": 3.2578,
      "step": 120345
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8191051483154297,
      "learning_rate": 0.0002789733564344968,
      "loss": 2.8993,
      "step": 120346
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.5345784425735474,
      "learning_rate": 0.00027896927593397316,
      "loss": 3.1035,
      "step": 120347
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8102099895477295,
      "learning_rate": 0.0002789651954373595,
      "loss": 2.8622,
      "step": 120348
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.306426525115967,
      "learning_rate": 0.00027896111494465665,
      "loss": 2.935,
      "step": 120349
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1701042652130127,
      "learning_rate": 0.0002789570344558652,
      "loss": 2.9356,
      "step": 120350
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.996884822845459,
      "learning_rate": 0.00027895295397098614,
      "loss": 3.0654,
      "step": 120351
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0065367221832275,
      "learning_rate": 0.00027894887349002006,
      "loss": 3.0729,
      "step": 120352
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.077970027923584,
      "learning_rate": 0.00027894479301296784,
      "loss": 2.8791,
      "step": 120353
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.002188205718994,
      "learning_rate": 0.0002789407125398301,
      "loss": 3.0152,
      "step": 120354
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.689039707183838,
      "learning_rate": 0.0002789366320706079,
      "loss": 2.9986,
      "step": 120355
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7071479558944702,
      "learning_rate": 0.0002789325516053016,
      "loss": 3.0721,
      "step": 120356
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0447444915771484,
      "learning_rate": 0.0002789284711439121,
      "loss": 2.8811,
      "step": 120357
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7749327421188354,
      "learning_rate": 0.00027892439068644023,
      "loss": 2.8172,
      "step": 120358
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0486526489257812,
      "learning_rate": 0.00027892031023288674,
      "loss": 3.0161,
      "step": 120359
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.483699321746826,
      "learning_rate": 0.0002789162297832523,
      "loss": 2.9623,
      "step": 120360
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7856706380844116,
      "learning_rate": 0.0002789121493375379,
      "loss": 3.097,
      "step": 120361
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.6754112243652344,
      "learning_rate": 0.00027890806889574406,
      "loss": 2.8788,
      "step": 120362
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.329699754714966,
      "learning_rate": 0.0002789039884578715,
      "loss": 3.0375,
      "step": 120363
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7285226583480835,
      "learning_rate": 0.0002788999080239211,
      "loss": 2.9507,
      "step": 120364
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0081756114959717,
      "learning_rate": 0.0002788958275938937,
      "loss": 2.9633,
      "step": 120365
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.29854679107666,
      "learning_rate": 0.0002788917471677899,
      "loss": 2.6978,
      "step": 120366
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.932715654373169,
      "learning_rate": 0.0002788876667456106,
      "loss": 2.9087,
      "step": 120367
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7398462295532227,
      "learning_rate": 0.00027888358632735646,
      "loss": 2.7458,
      "step": 120368
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0087666511535645,
      "learning_rate": 0.0002788795059130282,
      "loss": 2.904,
      "step": 120369
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9038783311843872,
      "learning_rate": 0.0002788754255026266,
      "loss": 2.888,
      "step": 120370
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.8710246086120605,
      "learning_rate": 0.00027887134509615253,
      "loss": 2.9828,
      "step": 120371
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7779597043991089,
      "learning_rate": 0.00027886726469360663,
      "loss": 2.7924,
      "step": 120372
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8735218048095703,
      "learning_rate": 0.00027886318429498977,
      "loss": 3.2097,
      "step": 120373
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9956210851669312,
      "learning_rate": 0.0002788591039003027,
      "loss": 2.9043,
      "step": 120374
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.091571092605591,
      "learning_rate": 0.00027885502350954603,
      "loss": 2.9981,
      "step": 120375
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.702311396598816,
      "learning_rate": 0.0002788509431227206,
      "loss": 3.0597,
      "step": 120376
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.314039945602417,
      "learning_rate": 0.00027884686273982714,
      "loss": 2.8906,
      "step": 120377
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7003562450408936,
      "learning_rate": 0.00027884278236086653,
      "loss": 3.1056,
      "step": 120378
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1609044075012207,
      "learning_rate": 0.00027883870198583936,
      "loss": 3.0401,
      "step": 120379
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.6879907846450806,
      "learning_rate": 0.0002788346216147467,
      "loss": 2.9468,
      "step": 120380
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8145127296447754,
      "learning_rate": 0.00027883054124758883,
      "loss": 2.9761,
      "step": 120381
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.695871114730835,
      "learning_rate": 0.00027882646088436683,
      "loss": 2.9473,
      "step": 120382
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8474957942962646,
      "learning_rate": 0.0002788223805250814,
      "loss": 3.061,
      "step": 120383
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0038869380950928,
      "learning_rate": 0.0002788183001697333,
      "loss": 2.8816,
      "step": 120384
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9577223062515259,
      "learning_rate": 0.00027881421981832327,
      "loss": 3.054,
      "step": 120385
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.6646828651428223,
      "learning_rate": 0.0002788101394708522,
      "loss": 2.7765,
      "step": 120386
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7688452005386353,
      "learning_rate": 0.00027880605912732054,
      "loss": 2.9961,
      "step": 120387
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.7631211280822754,
      "learning_rate": 0.0002788019787877293,
      "loss": 2.8702,
      "step": 120388
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1439690589904785,
      "learning_rate": 0.0002787978984520792,
      "loss": 3.0845,
      "step": 120389
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8241013288497925,
      "learning_rate": 0.00027879381812037087,
      "loss": 3.1241,
      "step": 120390
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.058948278427124,
      "learning_rate": 0.00027878973779260527,
      "loss": 2.941,
      "step": 120391
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9893831014633179,
      "learning_rate": 0.0002787856574687831,
      "loss": 2.586,
      "step": 120392
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.060163736343384,
      "learning_rate": 0.00027878157714890503,
      "loss": 3.1956,
      "step": 120393
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8251962661743164,
      "learning_rate": 0.0002787774968329718,
      "loss": 2.8591,
      "step": 120394
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.082953929901123,
      "learning_rate": 0.00027877341652098427,
      "loss": 2.9046,
      "step": 120395
    },
    {
      "epoch": 1.57,
      "grad_norm": 4.318995475769043,
      "learning_rate": 0.0002787693362129432,
      "loss": 2.8584,
      "step": 120396
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.6813511848449707,
      "learning_rate": 0.00027876525590884917,
      "loss": 3.051,
      "step": 120397
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.162327527999878,
      "learning_rate": 0.00027876117560870334,
      "loss": 3.0705,
      "step": 120398
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9622790813446045,
      "learning_rate": 0.00027875709531250607,
      "loss": 3.2835,
      "step": 120399
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3591814041137695,
      "learning_rate": 0.0002787530150202582,
      "loss": 2.9208,
      "step": 120400
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.296250343322754,
      "learning_rate": 0.00027874893473196056,
      "loss": 3.043,
      "step": 120401
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.981007695198059,
      "learning_rate": 0.00027874485444761394,
      "loss": 2.982,
      "step": 120402
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.6674433946609497,
      "learning_rate": 0.000278740774167219,
      "loss": 2.9675,
      "step": 120403
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.7219033241271973,
      "learning_rate": 0.0002787366938907767,
      "loss": 3.3041,
      "step": 120404
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9182548522949219,
      "learning_rate": 0.0002787326136182875,
      "loss": 2.8922,
      "step": 120405
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.343287467956543,
      "learning_rate": 0.0002787285333497524,
      "loss": 2.8628,
      "step": 120406
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.5019800662994385,
      "learning_rate": 0.00027872445308517197,
      "loss": 2.8867,
      "step": 120407
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.307436227798462,
      "learning_rate": 0.0002787203728245471,
      "loss": 3.0531,
      "step": 120408
    },
    {
      "epoch": 1.57,
      "grad_norm": 4.541053771972656,
      "learning_rate": 0.0002787162925678785,
      "loss": 2.8138,
      "step": 120409
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.681513547897339,
      "learning_rate": 0.0002787122123151671,
      "loss": 2.8188,
      "step": 120410
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.6578980684280396,
      "learning_rate": 0.0002787081320664133,
      "loss": 3.0125,
      "step": 120411
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3688669204711914,
      "learning_rate": 0.00027870405182161813,
      "loss": 2.9797,
      "step": 120412
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.7077455520629883,
      "learning_rate": 0.0002786999715807823,
      "loss": 3.035,
      "step": 120413
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.670304536819458,
      "learning_rate": 0.0002786958913439065,
      "loss": 3.0562,
      "step": 120414
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7503646612167358,
      "learning_rate": 0.0002786918111109915,
      "loss": 3.1617,
      "step": 120415
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.948270320892334,
      "learning_rate": 0.0002786877308820382,
      "loss": 2.922,
      "step": 120416
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.842397689819336,
      "learning_rate": 0.0002786836506570473,
      "loss": 3.1709,
      "step": 120417
    },
    {
      "epoch": 1.57,
      "grad_norm": 4.863688945770264,
      "learning_rate": 0.00027867957043601943,
      "loss": 3.1526,
      "step": 120418
    },
    {
      "epoch": 1.57,
      "grad_norm": 5.422321319580078,
      "learning_rate": 0.00027867549021895537,
      "loss": 2.645,
      "step": 120419
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.182771682739258,
      "learning_rate": 0.00027867141000585596,
      "loss": 2.949,
      "step": 120420
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3656179904937744,
      "learning_rate": 0.00027866732979672196,
      "loss": 3.2042,
      "step": 120421
    },
    {
      "epoch": 1.57,
      "grad_norm": 6.045296669006348,
      "learning_rate": 0.0002786632495915542,
      "loss": 3.005,
      "step": 120422
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.800387144088745,
      "learning_rate": 0.0002786591693903532,
      "loss": 2.7297,
      "step": 120423
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.6478757858276367,
      "learning_rate": 0.0002786550891931199,
      "loss": 3.0546,
      "step": 120424
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.5571000576019287,
      "learning_rate": 0.0002786510089998551,
      "loss": 3.0483,
      "step": 120425
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.7593307495117188,
      "learning_rate": 0.0002786469288105594,
      "loss": 2.9538,
      "step": 120426
    },
    {
      "epoch": 1.57,
      "grad_norm": 4.895910263061523,
      "learning_rate": 0.0002786428486252337,
      "loss": 3.0165,
      "step": 120427
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8983982801437378,
      "learning_rate": 0.0002786387684438786,
      "loss": 3.0198,
      "step": 120428
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7139941453933716,
      "learning_rate": 0.00027863468826649506,
      "loss": 3.1351,
      "step": 120429
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0163207054138184,
      "learning_rate": 0.0002786306080930837,
      "loss": 2.9039,
      "step": 120430
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.585223913192749,
      "learning_rate": 0.00027862652792364525,
      "loss": 2.9432,
      "step": 120431
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.855154037475586,
      "learning_rate": 0.00027862244775818065,
      "loss": 2.9394,
      "step": 120432
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.5082314014434814,
      "learning_rate": 0.00027861836759669045,
      "loss": 2.9299,
      "step": 120433
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0519073009490967,
      "learning_rate": 0.0002786142874391756,
      "loss": 2.9956,
      "step": 120434
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.416691541671753,
      "learning_rate": 0.00027861020728563667,
      "loss": 2.9835,
      "step": 120435
    },
    {
      "epoch": 1.57,
      "grad_norm": 5.614404678344727,
      "learning_rate": 0.0002786061271360745,
      "loss": 2.9757,
      "step": 120436
    },
    {
      "epoch": 1.57,
      "grad_norm": 6.526351451873779,
      "learning_rate": 0.00027860204699048987,
      "loss": 3.1897,
      "step": 120437
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.886507749557495,
      "learning_rate": 0.00027859796684888346,
      "loss": 2.9986,
      "step": 120438
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0224456787109375,
      "learning_rate": 0.0002785938867112562,
      "loss": 2.8998,
      "step": 120439
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.339651346206665,
      "learning_rate": 0.0002785898065776087,
      "loss": 2.8966,
      "step": 120440
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4217450618743896,
      "learning_rate": 0.0002785857264479418,
      "loss": 2.9051,
      "step": 120441
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1695480346679688,
      "learning_rate": 0.0002785816463222562,
      "loss": 3.0127,
      "step": 120442
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0183377265930176,
      "learning_rate": 0.0002785775662005526,
      "loss": 2.9314,
      "step": 120443
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7831717729568481,
      "learning_rate": 0.00027857348608283187,
      "loss": 3.1468,
      "step": 120444
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.049089193344116,
      "learning_rate": 0.00027856940596909476,
      "loss": 3.0692,
      "step": 120445
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.828826427459717,
      "learning_rate": 0.00027856532585934205,
      "loss": 2.9488,
      "step": 120446
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7382428646087646,
      "learning_rate": 0.0002785612457535744,
      "loss": 3.0501,
      "step": 120447
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.820847511291504,
      "learning_rate": 0.00027855716565179265,
      "loss": 2.8916,
      "step": 120448
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9175645112991333,
      "learning_rate": 0.00027855308555399744,
      "loss": 3.2506,
      "step": 120449
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.303452253341675,
      "learning_rate": 0.00027854900546018965,
      "loss": 2.6996,
      "step": 120450
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3302245140075684,
      "learning_rate": 0.00027854492537037005,
      "loss": 3.1298,
      "step": 120451
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.17915940284729,
      "learning_rate": 0.0002785408452845393,
      "loss": 2.9904,
      "step": 120452
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9754635095596313,
      "learning_rate": 0.0002785367652026983,
      "loss": 2.942,
      "step": 120453
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7818576097488403,
      "learning_rate": 0.00027853268512484765,
      "loss": 3.2088,
      "step": 120454
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.74053692817688,
      "learning_rate": 0.00027852860505098815,
      "loss": 2.8675,
      "step": 120455
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.0447447299957275,
      "learning_rate": 0.00027852452498112063,
      "loss": 2.769,
      "step": 120456
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.104933977127075,
      "learning_rate": 0.0002785204449152458,
      "loss": 3.0698,
      "step": 120457
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3346877098083496,
      "learning_rate": 0.0002785163648533644,
      "loss": 2.872,
      "step": 120458
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0222630500793457,
      "learning_rate": 0.00027851228479547737,
      "loss": 2.816,
      "step": 120459
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1391830444335938,
      "learning_rate": 0.00027850820474158513,
      "loss": 2.8474,
      "step": 120460
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.566542387008667,
      "learning_rate": 0.00027850412469168867,
      "loss": 2.7469,
      "step": 120461
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4301862716674805,
      "learning_rate": 0.0002785000446457887,
      "loss": 3.1869,
      "step": 120462
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.76480233669281,
      "learning_rate": 0.00027849596460388594,
      "loss": 2.8944,
      "step": 120463
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0930449962615967,
      "learning_rate": 0.00027849188456598124,
      "loss": 2.9233,
      "step": 120464
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7756093740463257,
      "learning_rate": 0.0002784878045320754,
      "loss": 2.9658,
      "step": 120465
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8467475175857544,
      "learning_rate": 0.0002784837245021689,
      "loss": 2.7855,
      "step": 120466
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.2699828147888184,
      "learning_rate": 0.00027847964447626276,
      "loss": 2.9724,
      "step": 120467
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.330731153488159,
      "learning_rate": 0.00027847556445435764,
      "loss": 2.887,
      "step": 120468
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3772623538970947,
      "learning_rate": 0.00027847148443645435,
      "loss": 2.8489,
      "step": 120469
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.0565297603607178,
      "learning_rate": 0.00027846740442255357,
      "loss": 3.0685,
      "step": 120470
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9010246992111206,
      "learning_rate": 0.00027846332441265625,
      "loss": 3.0382,
      "step": 120471
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7312625646591187,
      "learning_rate": 0.0002784592444067629,
      "loss": 3.0366,
      "step": 120472
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1270055770874023,
      "learning_rate": 0.0002784551644048743,
      "loss": 3.0081,
      "step": 120473
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.4223549365997314,
      "learning_rate": 0.00027845108440699136,
      "loss": 2.8455,
      "step": 120474
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.31592059135437,
      "learning_rate": 0.00027844700441311476,
      "loss": 3.0254,
      "step": 120475
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9766383171081543,
      "learning_rate": 0.0002784429244232453,
      "loss": 2.9153,
      "step": 120476
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.1950459480285645,
      "learning_rate": 0.0002784388444373838,
      "loss": 2.9008,
      "step": 120477
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.480246067047119,
      "learning_rate": 0.0002784347644555308,
      "loss": 2.916,
      "step": 120478
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.495441436767578,
      "learning_rate": 0.0002784306844776871,
      "loss": 3.2604,
      "step": 120479
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.368274450302124,
      "learning_rate": 0.0002784266045038537,
      "loss": 2.9847,
      "step": 120480
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1590428352355957,
      "learning_rate": 0.00027842252453403113,
      "loss": 2.959,
      "step": 120481
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.609604597091675,
      "learning_rate": 0.0002784184445682202,
      "loss": 3.019,
      "step": 120482
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.4992449283599854,
      "learning_rate": 0.0002784143646064218,
      "loss": 2.9257,
      "step": 120483
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.6392345428466797,
      "learning_rate": 0.00027841028464863657,
      "loss": 3.0316,
      "step": 120484
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2566962242126465,
      "learning_rate": 0.00027840620469486515,
      "loss": 3.0325,
      "step": 120485
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.7321324348449707,
      "learning_rate": 0.00027840212474510847,
      "loss": 2.9062,
      "step": 120486
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.0283401012420654,
      "learning_rate": 0.00027839804479936725,
      "loss": 3.0545,
      "step": 120487
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.7835750579833984,
      "learning_rate": 0.00027839396485764223,
      "loss": 3.0508,
      "step": 120488
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.168260097503662,
      "learning_rate": 0.0002783898849199343,
      "loss": 2.9189,
      "step": 120489
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.634516954421997,
      "learning_rate": 0.000278385804986244,
      "loss": 3.0845,
      "step": 120490
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.953939199447632,
      "learning_rate": 0.00027838172505657214,
      "loss": 2.8285,
      "step": 120491
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.9099621772766113,
      "learning_rate": 0.00027837764513091956,
      "loss": 3.1331,
      "step": 120492
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8935246467590332,
      "learning_rate": 0.000278373565209287,
      "loss": 3.0967,
      "step": 120493
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.293848991394043,
      "learning_rate": 0.0002783694852916751,
      "loss": 3.1192,
      "step": 120494
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4863040447235107,
      "learning_rate": 0.000278365405378085,
      "loss": 3.2119,
      "step": 120495
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.641146421432495,
      "learning_rate": 0.00027836132546851696,
      "loss": 3.0157,
      "step": 120496
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.999389410018921,
      "learning_rate": 0.00027835724556297194,
      "loss": 2.8998,
      "step": 120497
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9414368867874146,
      "learning_rate": 0.00027835316566145077,
      "loss": 3.021,
      "step": 120498
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.139709234237671,
      "learning_rate": 0.00027834908576395415,
      "loss": 3.1085,
      "step": 120499
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.230731248855591,
      "learning_rate": 0.0002783450058704828,
      "loss": 3.1588,
      "step": 120500
    },
    {
      "epoch": 1.57,
      "grad_norm": 5.0845866203308105,
      "learning_rate": 0.00027834092598103764,
      "loss": 3.1107,
      "step": 120501
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.380439043045044,
      "learning_rate": 0.00027833684609561925,
      "loss": 3.0021,
      "step": 120502
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8284767866134644,
      "learning_rate": 0.00027833276621422846,
      "loss": 3.0227,
      "step": 120503
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.9139161109924316,
      "learning_rate": 0.00027832868633686597,
      "loss": 2.8157,
      "step": 120504
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.441141128540039,
      "learning_rate": 0.0002783246064635326,
      "loss": 2.9285,
      "step": 120505
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4925997257232666,
      "learning_rate": 0.0002783205265942291,
      "loss": 3.2434,
      "step": 120506
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.830150842666626,
      "learning_rate": 0.0002783164467289562,
      "loss": 3.0181,
      "step": 120507
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9465601444244385,
      "learning_rate": 0.00027831236686771477,
      "loss": 2.8297,
      "step": 120508
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2800352573394775,
      "learning_rate": 0.00027830828701050543,
      "loss": 3.0024,
      "step": 120509
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2816076278686523,
      "learning_rate": 0.000278304207157329,
      "loss": 2.8403,
      "step": 120510
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.135174036026001,
      "learning_rate": 0.00027830012730818617,
      "loss": 2.8799,
      "step": 120511
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0205039978027344,
      "learning_rate": 0.00027829604746307777,
      "loss": 3.1244,
      "step": 120512
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1985907554626465,
      "learning_rate": 0.00027829196762200456,
      "loss": 2.961,
      "step": 120513
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.748365879058838,
      "learning_rate": 0.00027828788778496734,
      "loss": 2.8854,
      "step": 120514
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.134136199951172,
      "learning_rate": 0.00027828380795196677,
      "loss": 3.0366,
      "step": 120515
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.850677251815796,
      "learning_rate": 0.0002782797281230036,
      "loss": 3.1942,
      "step": 120516
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.55204176902771,
      "learning_rate": 0.00027827564829807877,
      "loss": 3.0993,
      "step": 120517
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.2306277751922607,
      "learning_rate": 0.00027827156847719275,
      "loss": 2.8964,
      "step": 120518
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4069454669952393,
      "learning_rate": 0.0002782674886603465,
      "loss": 3.0256,
      "step": 120519
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7104026079177856,
      "learning_rate": 0.00027826340884754084,
      "loss": 3.0225,
      "step": 120520
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8664003610610962,
      "learning_rate": 0.0002782593290387763,
      "loss": 3.0656,
      "step": 120521
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.124382495880127,
      "learning_rate": 0.0002782552492340538,
      "loss": 3.2717,
      "step": 120522
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9275487661361694,
      "learning_rate": 0.00027825116943337405,
      "loss": 3.0401,
      "step": 120523
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8623634576797485,
      "learning_rate": 0.0002782470896367379,
      "loss": 2.7942,
      "step": 120524
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9278180599212646,
      "learning_rate": 0.0002782430098441459,
      "loss": 2.9338,
      "step": 120525
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.169079303741455,
      "learning_rate": 0.00027823893005559904,
      "loss": 3.1384,
      "step": 120526
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9179688692092896,
      "learning_rate": 0.0002782348502710979,
      "loss": 3.0733,
      "step": 120527
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8561208248138428,
      "learning_rate": 0.00027823077049064337,
      "loss": 3.0124,
      "step": 120528
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.7080941200256348,
      "learning_rate": 0.0002782266907142361,
      "loss": 3.0822,
      "step": 120529
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0990588665008545,
      "learning_rate": 0.0002782226109418769,
      "loss": 3.2136,
      "step": 120530
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.6332175731658936,
      "learning_rate": 0.0002782185311735666,
      "loss": 3.1054,
      "step": 120531
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.7755978107452393,
      "learning_rate": 0.00027821445140930587,
      "loss": 2.8589,
      "step": 120532
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8276417255401611,
      "learning_rate": 0.00027821037164909543,
      "loss": 2.9093,
      "step": 120533
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.437504291534424,
      "learning_rate": 0.0002782062918929361,
      "loss": 3.0657,
      "step": 120534
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9349699020385742,
      "learning_rate": 0.00027820221214082863,
      "loss": 3.1688,
      "step": 120535
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.125046491622925,
      "learning_rate": 0.0002781981323927738,
      "loss": 2.9757,
      "step": 120536
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.294285774230957,
      "learning_rate": 0.0002781940526487723,
      "loss": 2.9984,
      "step": 120537
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2943835258483887,
      "learning_rate": 0.0002781899729088251,
      "loss": 3.109,
      "step": 120538
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.27042293548584,
      "learning_rate": 0.0002781858931729326,
      "loss": 3.0669,
      "step": 120539
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8136063814163208,
      "learning_rate": 0.00027818181344109585,
      "loss": 3.2049,
      "step": 120540
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.077150821685791,
      "learning_rate": 0.0002781777337133155,
      "loss": 2.9228,
      "step": 120541
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7070472240447998,
      "learning_rate": 0.00027817365398959224,
      "loss": 2.9639,
      "step": 120542
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.893179178237915,
      "learning_rate": 0.000278169574269927,
      "loss": 2.7689,
      "step": 120543
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.6600624322891235,
      "learning_rate": 0.00027816549455432054,
      "loss": 3.0525,
      "step": 120544
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8402749300003052,
      "learning_rate": 0.0002781614148427734,
      "loss": 2.9834,
      "step": 120545
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1228184700012207,
      "learning_rate": 0.00027815733513528645,
      "loss": 2.826,
      "step": 120546
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.218489170074463,
      "learning_rate": 0.00027815325543186046,
      "loss": 2.9126,
      "step": 120547
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1294074058532715,
      "learning_rate": 0.00027814917573249626,
      "loss": 2.9964,
      "step": 120548
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9863262176513672,
      "learning_rate": 0.00027814509603719446,
      "loss": 2.966,
      "step": 120549
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0229337215423584,
      "learning_rate": 0.0002781410163459561,
      "loss": 3.1497,
      "step": 120550
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3360769748687744,
      "learning_rate": 0.0002781369366587815,
      "loss": 2.8952,
      "step": 120551
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.970866322517395,
      "learning_rate": 0.00027813285697567174,
      "loss": 3.1209,
      "step": 120552
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9245988130569458,
      "learning_rate": 0.0002781287772966275,
      "loss": 3.0268,
      "step": 120553
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.880888819694519,
      "learning_rate": 0.0002781246976216495,
      "loss": 2.8732,
      "step": 120554
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9811071157455444,
      "learning_rate": 0.00027812061795073857,
      "loss": 3.1467,
      "step": 120555
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.6224937438964844,
      "learning_rate": 0.0002781165382838955,
      "loss": 2.7206,
      "step": 120556
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8360328674316406,
      "learning_rate": 0.0002781124586211209,
      "loss": 3.247,
      "step": 120557
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.52064847946167,
      "learning_rate": 0.0002781083789624156,
      "loss": 3.147,
      "step": 120558
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.288160562515259,
      "learning_rate": 0.0002781042993077803,
      "loss": 2.8534,
      "step": 120559
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9980509281158447,
      "learning_rate": 0.00027810021965721596,
      "loss": 2.931,
      "step": 120560
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.106933116912842,
      "learning_rate": 0.0002780961400107231,
      "loss": 2.8401,
      "step": 120561
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9847018718719482,
      "learning_rate": 0.0002780920603683027,
      "loss": 2.7887,
      "step": 120562
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.474177837371826,
      "learning_rate": 0.0002780879807299553,
      "loss": 2.9771,
      "step": 120563
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.955517292022705,
      "learning_rate": 0.0002780839010956818,
      "loss": 3.0532,
      "step": 120564
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.234421730041504,
      "learning_rate": 0.00027807982146548285,
      "loss": 2.833,
      "step": 120565
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.7482221126556396,
      "learning_rate": 0.0002780757418393593,
      "loss": 2.5974,
      "step": 120566
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2119956016540527,
      "learning_rate": 0.0002780716622173119,
      "loss": 3.1933,
      "step": 120567
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9149651527404785,
      "learning_rate": 0.0002780675825993415,
      "loss": 2.9396,
      "step": 120568
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9900872707366943,
      "learning_rate": 0.00027806350298544864,
      "loss": 2.7094,
      "step": 120569
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9181026220321655,
      "learning_rate": 0.0002780594233756342,
      "loss": 2.9799,
      "step": 120570
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.074286937713623,
      "learning_rate": 0.00027805534376989887,
      "loss": 3.2202,
      "step": 120571
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.075587272644043,
      "learning_rate": 0.0002780512641682435,
      "loss": 3.0548,
      "step": 120572
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4025306701660156,
      "learning_rate": 0.0002780471845706688,
      "loss": 2.8388,
      "step": 120573
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9542688131332397,
      "learning_rate": 0.00027804310497717556,
      "loss": 3.0826,
      "step": 120574
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4350380897521973,
      "learning_rate": 0.0002780390253877647,
      "loss": 2.9401,
      "step": 120575
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8474645614624023,
      "learning_rate": 0.0002780349458024366,
      "loss": 3.0282,
      "step": 120576
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.702258825302124,
      "learning_rate": 0.00027803086622119223,
      "loss": 2.9501,
      "step": 120577
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9770578145980835,
      "learning_rate": 0.00027802678664403237,
      "loss": 3.1657,
      "step": 120578
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.072021007537842,
      "learning_rate": 0.0002780227070709577,
      "loss": 3.0126,
      "step": 120579
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.573401927947998,
      "learning_rate": 0.00027801862750196905,
      "loss": 3.1255,
      "step": 120580
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2223827838897705,
      "learning_rate": 0.00027801454793706723,
      "loss": 2.9713,
      "step": 120581
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4572722911834717,
      "learning_rate": 0.00027801046837625285,
      "loss": 2.7577,
      "step": 120582
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9942430257797241,
      "learning_rate": 0.0002780063888195267,
      "loss": 2.9803,
      "step": 120583
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.18050217628479,
      "learning_rate": 0.00027800230926688967,
      "loss": 2.8932,
      "step": 120584
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.6579442024230957,
      "learning_rate": 0.00027799822971834233,
      "loss": 3.119,
      "step": 120585
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9557946920394897,
      "learning_rate": 0.00027799415017388553,
      "loss": 2.7495,
      "step": 120586
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.296328067779541,
      "learning_rate": 0.0002779900706335202,
      "loss": 3.1532,
      "step": 120587
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3243823051452637,
      "learning_rate": 0.0002779859910972468,
      "loss": 2.7762,
      "step": 120588
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.171093463897705,
      "learning_rate": 0.0002779819115650662,
      "loss": 3.0301,
      "step": 120589
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.095743417739868,
      "learning_rate": 0.00027797783203697924,
      "loss": 3.3548,
      "step": 120590
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.5459909439086914,
      "learning_rate": 0.00027797375251298654,
      "loss": 3.0154,
      "step": 120591
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.6394524574279785,
      "learning_rate": 0.00027796967299308897,
      "loss": 3.4042,
      "step": 120592
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.51155948638916,
      "learning_rate": 0.0002779655934772873,
      "loss": 2.8414,
      "step": 120593
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.385531187057495,
      "learning_rate": 0.0002779615139655823,
      "loss": 3.084,
      "step": 120594
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2896194458007812,
      "learning_rate": 0.00027795743445797454,
      "loss": 3.0928,
      "step": 120595
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.412289619445801,
      "learning_rate": 0.0002779533549544649,
      "loss": 2.9368,
      "step": 120596
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.8943450450897217,
      "learning_rate": 0.0002779492754550542,
      "loss": 3.0626,
      "step": 120597
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.074146032333374,
      "learning_rate": 0.00027794519595974315,
      "loss": 2.9787,
      "step": 120598
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.4148123264312744,
      "learning_rate": 0.00027794111646853255,
      "loss": 3.1774,
      "step": 120599
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1140127182006836,
      "learning_rate": 0.000277937036981423,
      "loss": 3.0025,
      "step": 120600
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.2256479263305664,
      "learning_rate": 0.0002779329574984155,
      "loss": 2.8956,
      "step": 120601
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.734265685081482,
      "learning_rate": 0.0002779288780195106,
      "loss": 3.0244,
      "step": 120602
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9271903038024902,
      "learning_rate": 0.0002779247985447091,
      "loss": 2.9349,
      "step": 120603
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7813210487365723,
      "learning_rate": 0.00027792071907401184,
      "loss": 2.8431,
      "step": 120604
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1564042568206787,
      "learning_rate": 0.00027791663960741956,
      "loss": 2.926,
      "step": 120605
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9378656148910522,
      "learning_rate": 0.000277912560144933,
      "loss": 3.073,
      "step": 120606
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7348219156265259,
      "learning_rate": 0.00027790848068655284,
      "loss": 2.9871,
      "step": 120607
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0578725337982178,
      "learning_rate": 0.00027790440123228,
      "loss": 2.7887,
      "step": 120608
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.123185157775879,
      "learning_rate": 0.0002779003217821151,
      "loss": 2.861,
      "step": 120609
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.921940565109253,
      "learning_rate": 0.00027789624233605896,
      "loss": 2.8181,
      "step": 120610
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.137359142303467,
      "learning_rate": 0.00027789216289411233,
      "loss": 2.8584,
      "step": 120611
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8212127685546875,
      "learning_rate": 0.00027788808345627597,
      "loss": 2.7386,
      "step": 120612
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4106152057647705,
      "learning_rate": 0.0002778840040225506,
      "loss": 2.7236,
      "step": 120613
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0075900554656982,
      "learning_rate": 0.00027787992459293696,
      "loss": 2.9632,
      "step": 120614
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8744122982025146,
      "learning_rate": 0.0002778758451674359,
      "loss": 2.9872,
      "step": 120615
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.013503074645996,
      "learning_rate": 0.00027787176574604823,
      "loss": 2.9624,
      "step": 120616
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9351643323898315,
      "learning_rate": 0.00027786768632877463,
      "loss": 3.1529,
      "step": 120617
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8077772855758667,
      "learning_rate": 0.00027786360691561574,
      "loss": 2.6248,
      "step": 120618
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9895915985107422,
      "learning_rate": 0.0002778595275065724,
      "loss": 3.1755,
      "step": 120619
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0972068309783936,
      "learning_rate": 0.00027785544810164545,
      "loss": 2.9134,
      "step": 120620
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8276913166046143,
      "learning_rate": 0.00027785136870083556,
      "loss": 2.9512,
      "step": 120621
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.9688806533813477,
      "learning_rate": 0.0002778472893041435,
      "loss": 2.6703,
      "step": 120622
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.223315477371216,
      "learning_rate": 0.0002778432099115702,
      "loss": 3.0506,
      "step": 120623
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0434441566467285,
      "learning_rate": 0.0002778391305231162,
      "loss": 2.91,
      "step": 120624
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0952258110046387,
      "learning_rate": 0.0002778350511387822,
      "loss": 2.9169,
      "step": 120625
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.885090708732605,
      "learning_rate": 0.00027783097175856914,
      "loss": 3.1632,
      "step": 120626
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.2316012382507324,
      "learning_rate": 0.00027782689238247776,
      "loss": 2.8664,
      "step": 120627
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0162270069122314,
      "learning_rate": 0.00027782281301050874,
      "loss": 3.2819,
      "step": 120628
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9861104488372803,
      "learning_rate": 0.00027781873364266294,
      "loss": 2.9632,
      "step": 120629
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.232708215713501,
      "learning_rate": 0.00027781465427894106,
      "loss": 3.0529,
      "step": 120630
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9417014122009277,
      "learning_rate": 0.00027781057491934377,
      "loss": 3.0975,
      "step": 120631
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7718079090118408,
      "learning_rate": 0.0002778064955638719,
      "loss": 2.7606,
      "step": 120632
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1528851985931396,
      "learning_rate": 0.00027780241621252626,
      "loss": 3.0558,
      "step": 120633
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.429619550704956,
      "learning_rate": 0.00027779833686530757,
      "loss": 3.0022,
      "step": 120634
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0699546337127686,
      "learning_rate": 0.00027779425752221676,
      "loss": 2.8048,
      "step": 120635
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8591558933258057,
      "learning_rate": 0.0002777901781832542,
      "loss": 2.9325,
      "step": 120636
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2876572608947754,
      "learning_rate": 0.0002777860988484209,
      "loss": 3.0469,
      "step": 120637
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9107308387756348,
      "learning_rate": 0.0002777820195177176,
      "loss": 3.1712,
      "step": 120638
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7997032403945923,
      "learning_rate": 0.000277777940191145,
      "loss": 2.8615,
      "step": 120639
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0816996097564697,
      "learning_rate": 0.00027777386086870394,
      "loss": 2.8864,
      "step": 120640
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2322959899902344,
      "learning_rate": 0.0002777697815503951,
      "loss": 3.0589,
      "step": 120641
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1563334465026855,
      "learning_rate": 0.00027776570223621945,
      "loss": 3.2699,
      "step": 120642
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0045735836029053,
      "learning_rate": 0.00027776162292617745,
      "loss": 2.9947,
      "step": 120643
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8409079313278198,
      "learning_rate": 0.00027775754362026995,
      "loss": 3.0439,
      "step": 120644
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.6446598768234253,
      "learning_rate": 0.00027775346431849776,
      "loss": 2.9657,
      "step": 120645
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.0439693927764893,
      "learning_rate": 0.0002777493850208616,
      "loss": 2.7625,
      "step": 120646
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8324134349822998,
      "learning_rate": 0.0002777453057273623,
      "loss": 3.3165,
      "step": 120647
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7584010362625122,
      "learning_rate": 0.00027774122643800064,
      "loss": 2.9883,
      "step": 120648
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.88334584236145,
      "learning_rate": 0.00027773714715277723,
      "loss": 2.7653,
      "step": 120649
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.210204839706421,
      "learning_rate": 0.00027773306787169286,
      "loss": 2.8661,
      "step": 120650
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.820346713066101,
      "learning_rate": 0.00027772898859474836,
      "loss": 2.8298,
      "step": 120651
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8778518438339233,
      "learning_rate": 0.00027772490932194443,
      "loss": 3.1027,
      "step": 120652
    },
    {
      "epoch": 1.57,
      "grad_norm": 4.056210041046143,
      "learning_rate": 0.00027772083005328193,
      "loss": 3.0563,
      "step": 120653
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2718894481658936,
      "learning_rate": 0.0002777167507887616,
      "loss": 3.2123,
      "step": 120654
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8196282386779785,
      "learning_rate": 0.00027771267152838406,
      "loss": 3.1061,
      "step": 120655
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.227071523666382,
      "learning_rate": 0.0002777085922721501,
      "loss": 2.98,
      "step": 120656
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9188143014907837,
      "learning_rate": 0.0002777045130200606,
      "loss": 2.7948,
      "step": 120657
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3873560428619385,
      "learning_rate": 0.00027770043377211624,
      "loss": 2.9033,
      "step": 120658
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.715278148651123,
      "learning_rate": 0.00027769635452831775,
      "loss": 2.8217,
      "step": 120659
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.196655035018921,
      "learning_rate": 0.00027769227528866606,
      "loss": 2.8954,
      "step": 120660
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9693987369537354,
      "learning_rate": 0.0002776881960531617,
      "loss": 2.957,
      "step": 120661
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.320099353790283,
      "learning_rate": 0.00027768411682180553,
      "loss": 2.7453,
      "step": 120662
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2539079189300537,
      "learning_rate": 0.0002776800375945983,
      "loss": 2.906,
      "step": 120663
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8616834878921509,
      "learning_rate": 0.0002776759583715408,
      "loss": 2.9472,
      "step": 120664
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.910875678062439,
      "learning_rate": 0.0002776718791526337,
      "loss": 2.9,
      "step": 120665
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8612843751907349,
      "learning_rate": 0.000277667799937878,
      "loss": 3.1796,
      "step": 120666
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3200600147247314,
      "learning_rate": 0.0002776637207272741,
      "loss": 3.1188,
      "step": 120667
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.4373397827148438,
      "learning_rate": 0.00027765964152082293,
      "loss": 2.8829,
      "step": 120668
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.391321897506714,
      "learning_rate": 0.00027765556231852526,
      "loss": 2.7921,
      "step": 120669
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.5351192951202393,
      "learning_rate": 0.0002776514831203819,
      "loss": 3.0947,
      "step": 120670
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1895110607147217,
      "learning_rate": 0.00027764740392639353,
      "loss": 3.104,
      "step": 120671
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.5081636905670166,
      "learning_rate": 0.00027764332473656104,
      "loss": 2.7624,
      "step": 120672
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9637123346328735,
      "learning_rate": 0.0002776392455508849,
      "loss": 2.9132,
      "step": 120673
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7065496444702148,
      "learning_rate": 0.0002776351663693661,
      "loss": 3.0183,
      "step": 120674
    },
    {
      "epoch": 1.57,
      "grad_norm": 5.279331207275391,
      "learning_rate": 0.0002776310871920054,
      "loss": 2.9188,
      "step": 120675
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.1704373359680176,
      "learning_rate": 0.00027762700801880347,
      "loss": 3.0122,
      "step": 120676
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4947729110717773,
      "learning_rate": 0.00027762292884976104,
      "loss": 3.2088,
      "step": 120677
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9907307624816895,
      "learning_rate": 0.000277618849684879,
      "loss": 3.0262,
      "step": 120678
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.5947601795196533,
      "learning_rate": 0.0002776147705241581,
      "loss": 3.1271,
      "step": 120679
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.790066719055176,
      "learning_rate": 0.00027761069136759893,
      "loss": 2.7943,
      "step": 120680
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.194976806640625,
      "learning_rate": 0.0002776066122152024,
      "loss": 3.2715,
      "step": 120681
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.6878788471221924,
      "learning_rate": 0.00027760253306696915,
      "loss": 3.0871,
      "step": 120682
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8487812280654907,
      "learning_rate": 0.0002775984539229001,
      "loss": 2.9047,
      "step": 120683
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2029311656951904,
      "learning_rate": 0.0002775943747829959,
      "loss": 3.1658,
      "step": 120684
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7480018138885498,
      "learning_rate": 0.0002775902956472573,
      "loss": 3.1102,
      "step": 120685
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.038115978240967,
      "learning_rate": 0.0002775862165156852,
      "loss": 2.6033,
      "step": 120686
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9445323944091797,
      "learning_rate": 0.00027758213738828013,
      "loss": 2.8302,
      "step": 120687
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8284896612167358,
      "learning_rate": 0.00027757805826504297,
      "loss": 3.0069,
      "step": 120688
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8662769794464111,
      "learning_rate": 0.00027757397914597446,
      "loss": 2.9872,
      "step": 120689
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.910478949546814,
      "learning_rate": 0.00027756990003107546,
      "loss": 3.2404,
      "step": 120690
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.659673810005188,
      "learning_rate": 0.00027756582092034653,
      "loss": 3.1299,
      "step": 120691
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.385033130645752,
      "learning_rate": 0.00027756174181378857,
      "loss": 2.7718,
      "step": 120692
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9904310703277588,
      "learning_rate": 0.0002775576627114024,
      "loss": 2.8488,
      "step": 120693
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8902539014816284,
      "learning_rate": 0.0002775535836131886,
      "loss": 3.0001,
      "step": 120694
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.656764268875122,
      "learning_rate": 0.00027754950451914796,
      "loss": 3.2361,
      "step": 120695
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.024102210998535,
      "learning_rate": 0.0002775454254292814,
      "loss": 3.0279,
      "step": 120696
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9870405197143555,
      "learning_rate": 0.0002775413463435895,
      "loss": 2.8921,
      "step": 120697
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.31602144241333,
      "learning_rate": 0.0002775372672620731,
      "loss": 2.9845,
      "step": 120698
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.797131299972534,
      "learning_rate": 0.00027753318818473293,
      "loss": 2.943,
      "step": 120699
    },
    {
      "epoch": 1.57,
      "grad_norm": 4.403759002685547,
      "learning_rate": 0.0002775291091115698,
      "loss": 3.0761,
      "step": 120700
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.3531694412231445,
      "learning_rate": 0.00027752503004258445,
      "loss": 3.0762,
      "step": 120701
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0035839080810547,
      "learning_rate": 0.0002775209509777776,
      "loss": 2.9705,
      "step": 120702
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4808459281921387,
      "learning_rate": 0.00027751687191715003,
      "loss": 2.7894,
      "step": 120703
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.53029465675354,
      "learning_rate": 0.00027751279286070243,
      "loss": 2.651,
      "step": 120704
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.063828706741333,
      "learning_rate": 0.0002775087138084357,
      "loss": 3.057,
      "step": 120705
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9869515895843506,
      "learning_rate": 0.0002775046347603505,
      "loss": 2.9605,
      "step": 120706
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2782106399536133,
      "learning_rate": 0.0002775005557164476,
      "loss": 2.7545,
      "step": 120707
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2588348388671875,
      "learning_rate": 0.00027749647667672783,
      "loss": 3.0283,
      "step": 120708
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.8819055557250977,
      "learning_rate": 0.0002774923976411919,
      "loss": 3.0222,
      "step": 120709
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7727471590042114,
      "learning_rate": 0.0002774883186098405,
      "loss": 2.7852,
      "step": 120710
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9276472330093384,
      "learning_rate": 0.00027748423958267443,
      "loss": 3.0767,
      "step": 120711
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.1479601860046387,
      "learning_rate": 0.0002774801605596945,
      "loss": 2.9902,
      "step": 120712
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8382407426834106,
      "learning_rate": 0.0002774760815409014,
      "loss": 2.6131,
      "step": 120713
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.738181233406067,
      "learning_rate": 0.0002774720025262959,
      "loss": 2.7956,
      "step": 120714
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4385697841644287,
      "learning_rate": 0.00027746792351587894,
      "loss": 3.2437,
      "step": 120715
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2268741130828857,
      "learning_rate": 0.00027746384450965105,
      "loss": 3.1392,
      "step": 120716
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4482526779174805,
      "learning_rate": 0.00027745976550761295,
      "loss": 3.3244,
      "step": 120717
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0466058254241943,
      "learning_rate": 0.00027745568650976556,
      "loss": 2.866,
      "step": 120718
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7926177978515625,
      "learning_rate": 0.00027745160751610953,
      "loss": 2.9702,
      "step": 120719
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.83371901512146,
      "learning_rate": 0.00027744752852664574,
      "loss": 2.9914,
      "step": 120720
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7974523305892944,
      "learning_rate": 0.000277443449541375,
      "loss": 2.9038,
      "step": 120721
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.266115427017212,
      "learning_rate": 0.0002774393705602978,
      "loss": 3.0556,
      "step": 120722
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0696446895599365,
      "learning_rate": 0.00027743529158341505,
      "loss": 3.0125,
      "step": 120723
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.688895583152771,
      "learning_rate": 0.0002774312126107275,
      "loss": 3.0838,
      "step": 120724
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.030245780944824,
      "learning_rate": 0.0002774271336422359,
      "loss": 2.9948,
      "step": 120725
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9268168210983276,
      "learning_rate": 0.00027742305467794105,
      "loss": 3.0137,
      "step": 120726
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2437353134155273,
      "learning_rate": 0.00027741897571784385,
      "loss": 3.0322,
      "step": 120727
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.246980667114258,
      "learning_rate": 0.0002774148967619447,
      "loss": 2.7193,
      "step": 120728
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.894866943359375,
      "learning_rate": 0.0002774108178102445,
      "loss": 2.8333,
      "step": 120729
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.6015827655792236,
      "learning_rate": 0.00027740673886274416,
      "loss": 2.8824,
      "step": 120730
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0795485973358154,
      "learning_rate": 0.0002774026599194443,
      "loss": 2.6316,
      "step": 120731
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.896796703338623,
      "learning_rate": 0.0002773985809803457,
      "loss": 2.9322,
      "step": 120732
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.00897216796875,
      "learning_rate": 0.00027739450204544925,
      "loss": 3.1752,
      "step": 120733
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.098574161529541,
      "learning_rate": 0.00027739042311475544,
      "loss": 3.1468,
      "step": 120734
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8550443649291992,
      "learning_rate": 0.00027738634418826525,
      "loss": 2.8977,
      "step": 120735
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9363598823547363,
      "learning_rate": 0.0002773822652659793,
      "loss": 2.8432,
      "step": 120736
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.837544560432434,
      "learning_rate": 0.00027737818634789843,
      "loss": 3.0541,
      "step": 120737
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0773305892944336,
      "learning_rate": 0.00027737410743402337,
      "loss": 3.1333,
      "step": 120738
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1566519737243652,
      "learning_rate": 0.00027737002852435506,
      "loss": 2.9869,
      "step": 120739
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8408945798873901,
      "learning_rate": 0.00027736594961889395,
      "loss": 3.0169,
      "step": 120740
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9616608619689941,
      "learning_rate": 0.0002773618707176409,
      "loss": 2.8664,
      "step": 120741
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.163562059402466,
      "learning_rate": 0.0002773577918205967,
      "loss": 3.0764,
      "step": 120742
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9338175058364868,
      "learning_rate": 0.00027735371292776217,
      "loss": 2.9684,
      "step": 120743
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.034754991531372,
      "learning_rate": 0.000277349634039138,
      "loss": 2.7944,
      "step": 120744
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.113516330718994,
      "learning_rate": 0.00027734555515472506,
      "loss": 2.9559,
      "step": 120745
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.6152994632720947,
      "learning_rate": 0.0002773414762745239,
      "loss": 3.1202,
      "step": 120746
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9837106466293335,
      "learning_rate": 0.00027733739739853536,
      "loss": 3.1574,
      "step": 120747
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.097696542739868,
      "learning_rate": 0.00027733331852676025,
      "loss": 3.1271,
      "step": 120748
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.0145301818847656,
      "learning_rate": 0.0002773292396591993,
      "loss": 3.1545,
      "step": 120749
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9938265085220337,
      "learning_rate": 0.0002773251607958533,
      "loss": 2.8389,
      "step": 120750
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0004305839538574,
      "learning_rate": 0.0002773210819367231,
      "loss": 3.108,
      "step": 120751
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.5244784355163574,
      "learning_rate": 0.0002773170030818092,
      "loss": 2.9595,
      "step": 120752
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.7297770977020264,
      "learning_rate": 0.0002773129242311125,
      "loss": 2.9414,
      "step": 120753
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.5535125732421875,
      "learning_rate": 0.0002773088453846337,
      "loss": 3.0739,
      "step": 120754
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0300118923187256,
      "learning_rate": 0.00027730476654237366,
      "loss": 3.2616,
      "step": 120755
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.767706274986267,
      "learning_rate": 0.0002773006877043331,
      "loss": 3.3545,
      "step": 120756
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.16221284866333,
      "learning_rate": 0.0002772966088705129,
      "loss": 3.2666,
      "step": 120757
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8485970497131348,
      "learning_rate": 0.00027729253004091356,
      "loss": 2.9219,
      "step": 120758
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3465843200683594,
      "learning_rate": 0.00027728845121553596,
      "loss": 2.9212,
      "step": 120759
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.725214958190918,
      "learning_rate": 0.00027728437239438083,
      "loss": 3.1714,
      "step": 120760
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.1905503273010254,
      "learning_rate": 0.000277280293577449,
      "loss": 2.952,
      "step": 120761
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.7784526348114014,
      "learning_rate": 0.0002772762147647412,
      "loss": 2.9815,
      "step": 120762
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.746361255645752,
      "learning_rate": 0.0002772721359562583,
      "loss": 2.9405,
      "step": 120763
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.1821982860565186,
      "learning_rate": 0.0002772680571520008,
      "loss": 2.9946,
      "step": 120764
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.973928689956665,
      "learning_rate": 0.0002772639783519696,
      "loss": 3.1818,
      "step": 120765
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2246463298797607,
      "learning_rate": 0.0002772598995561655,
      "loss": 3.068,
      "step": 120766
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.447650671005249,
      "learning_rate": 0.0002772558207645892,
      "loss": 2.8621,
      "step": 120767
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0253279209136963,
      "learning_rate": 0.0002772517419772414,
      "loss": 3.2182,
      "step": 120768
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.933164358139038,
      "learning_rate": 0.00027724766319412304,
      "loss": 3.0931,
      "step": 120769
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.5024092197418213,
      "learning_rate": 0.0002772435844152347,
      "loss": 3.0452,
      "step": 120770
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.533710479736328,
      "learning_rate": 0.0002772395056405773,
      "loss": 3.1167,
      "step": 120771
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.5319740772247314,
      "learning_rate": 0.00027723542687015145,
      "loss": 3.0085,
      "step": 120772
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8193747997283936,
      "learning_rate": 0.0002772313481039579,
      "loss": 3.0827,
      "step": 120773
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4115307331085205,
      "learning_rate": 0.00027722726934199755,
      "loss": 2.8083,
      "step": 120774
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1408443450927734,
      "learning_rate": 0.000277223190584271,
      "loss": 3.0346,
      "step": 120775
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.854807138442993,
      "learning_rate": 0.0002772191118307792,
      "loss": 2.9032,
      "step": 120776
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.3158695697784424,
      "learning_rate": 0.00027721503308152275,
      "loss": 2.9118,
      "step": 120777
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2601606845855713,
      "learning_rate": 0.00027721095433650247,
      "loss": 3.0653,
      "step": 120778
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.669255018234253,
      "learning_rate": 0.00027720687559571903,
      "loss": 3.2295,
      "step": 120779
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.2348315715789795,
      "learning_rate": 0.00027720279685917337,
      "loss": 3.1443,
      "step": 120780
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.496861696243286,
      "learning_rate": 0.00027719871812686606,
      "loss": 2.8872,
      "step": 120781
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8080717325210571,
      "learning_rate": 0.000277194639398798,
      "loss": 2.9439,
      "step": 120782
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3467800617218018,
      "learning_rate": 0.0002771905606749698,
      "loss": 2.9932,
      "step": 120783
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.8297388553619385,
      "learning_rate": 0.00027718648195538235,
      "loss": 3.0277,
      "step": 120784
    },
    {
      "epoch": 1.57,
      "grad_norm": 4.062615394592285,
      "learning_rate": 0.0002771824032400364,
      "loss": 2.9814,
      "step": 120785
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.433419942855835,
      "learning_rate": 0.00027717832452893264,
      "loss": 3.1944,
      "step": 120786
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.232680320739746,
      "learning_rate": 0.00027717424582207183,
      "loss": 2.8018,
      "step": 120787
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2354397773742676,
      "learning_rate": 0.0002771701671194548,
      "loss": 2.8979,
      "step": 120788
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.379692554473877,
      "learning_rate": 0.0002771660884210823,
      "loss": 2.9527,
      "step": 120789
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.656697392463684,
      "learning_rate": 0.000277162009726955,
      "loss": 2.778,
      "step": 120790
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.75555419921875,
      "learning_rate": 0.00027715793103707365,
      "loss": 3.1959,
      "step": 120791
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9303972721099854,
      "learning_rate": 0.00027715385235143925,
      "loss": 2.9626,
      "step": 120792
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.112072467803955,
      "learning_rate": 0.00027714977367005226,
      "loss": 3.116,
      "step": 120793
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1754629611968994,
      "learning_rate": 0.0002771456949929136,
      "loss": 2.8853,
      "step": 120794
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.777454137802124,
      "learning_rate": 0.00027714161632002394,
      "loss": 3.1924,
      "step": 120795
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.7963428497314453,
      "learning_rate": 0.0002771375376513841,
      "loss": 3.3493,
      "step": 120796
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7471927404403687,
      "learning_rate": 0.0002771334589869948,
      "loss": 3.0317,
      "step": 120797
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.784968376159668,
      "learning_rate": 0.00027712938032685685,
      "loss": 3.0503,
      "step": 120798
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7636288404464722,
      "learning_rate": 0.00027712530167097095,
      "loss": 2.8126,
      "step": 120799
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.5171186923980713,
      "learning_rate": 0.00027712122301933804,
      "loss": 3.0835,
      "step": 120800
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.703843355178833,
      "learning_rate": 0.0002771171443719586,
      "loss": 2.9534,
      "step": 120801
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.709615468978882,
      "learning_rate": 0.0002771130657288335,
      "loss": 2.9568,
      "step": 120802
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7195684909820557,
      "learning_rate": 0.0002771089870899635,
      "loss": 3.0319,
      "step": 120803
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.675707459449768,
      "learning_rate": 0.0002771049084553494,
      "loss": 3.1955,
      "step": 120804
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.413794755935669,
      "learning_rate": 0.00027710082982499194,
      "loss": 2.9261,
      "step": 120805
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.357553005218506,
      "learning_rate": 0.00027709675119889194,
      "loss": 3.1659,
      "step": 120806
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1868605613708496,
      "learning_rate": 0.00027709267257705,
      "loss": 2.9372,
      "step": 120807
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.6863203048706055,
      "learning_rate": 0.00027708859395946694,
      "loss": 2.9333,
      "step": 120808
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.653629779815674,
      "learning_rate": 0.0002770845153461436,
      "loss": 2.9154,
      "step": 120809
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.7879250049591064,
      "learning_rate": 0.0002770804367370806,
      "loss": 3.0063,
      "step": 120810
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1549503803253174,
      "learning_rate": 0.00027707635813227884,
      "loss": 2.7062,
      "step": 120811
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.890373706817627,
      "learning_rate": 0.0002770722795317391,
      "loss": 3.2603,
      "step": 120812
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.877836227416992,
      "learning_rate": 0.000277068200935462,
      "loss": 2.7695,
      "step": 120813
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.774440050125122,
      "learning_rate": 0.0002770641223434483,
      "loss": 3.1405,
      "step": 120814
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4940059185028076,
      "learning_rate": 0.0002770600437556988,
      "loss": 2.9232,
      "step": 120815
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.176482677459717,
      "learning_rate": 0.0002770559651722143,
      "loss": 2.9707,
      "step": 120816
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3859987258911133,
      "learning_rate": 0.0002770518865929955,
      "loss": 3.0997,
      "step": 120817
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1530866622924805,
      "learning_rate": 0.00027704780801804336,
      "loss": 3.0561,
      "step": 120818
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.1412925720214844,
      "learning_rate": 0.0002770437294473583,
      "loss": 3.0539,
      "step": 120819
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7645180225372314,
      "learning_rate": 0.0002770396508809413,
      "loss": 3.0679,
      "step": 120820
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7340458631515503,
      "learning_rate": 0.000277035572318793,
      "loss": 2.9906,
      "step": 120821
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.9029784202575684,
      "learning_rate": 0.0002770314937609142,
      "loss": 3.081,
      "step": 120822
    },
    {
      "epoch": 1.57,
      "grad_norm": 4.146409511566162,
      "learning_rate": 0.00027702741520730577,
      "loss": 2.8608,
      "step": 120823
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8270925283432007,
      "learning_rate": 0.0002770233366579684,
      "loss": 2.9644,
      "step": 120824
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.681208848953247,
      "learning_rate": 0.0002770192581129028,
      "loss": 3.0224,
      "step": 120825
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.109720468521118,
      "learning_rate": 0.0002770151795721097,
      "loss": 2.7679,
      "step": 120826
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0977156162261963,
      "learning_rate": 0.0002770111010355899,
      "loss": 3.0134,
      "step": 120827
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.744003415107727,
      "learning_rate": 0.0002770070225033442,
      "loss": 3.1604,
      "step": 120828
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9231514930725098,
      "learning_rate": 0.00027700294397537333,
      "loss": 2.7927,
      "step": 120829
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.3762192726135254,
      "learning_rate": 0.00027699886545167813,
      "loss": 3.019,
      "step": 120830
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.559558391571045,
      "learning_rate": 0.00027699478693225924,
      "loss": 3.0178,
      "step": 120831
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3103394508361816,
      "learning_rate": 0.00027699070841711736,
      "loss": 2.9359,
      "step": 120832
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.457005023956299,
      "learning_rate": 0.00027698662990625336,
      "loss": 2.9513,
      "step": 120833
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9891690015792847,
      "learning_rate": 0.000276982551399668,
      "loss": 2.9172,
      "step": 120834
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1723365783691406,
      "learning_rate": 0.00027697847289736205,
      "loss": 2.7203,
      "step": 120835
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8543246984481812,
      "learning_rate": 0.0002769743943993363,
      "loss": 3.103,
      "step": 120836
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.775547981262207,
      "learning_rate": 0.0002769703159055913,
      "loss": 2.918,
      "step": 120837
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0884673595428467,
      "learning_rate": 0.00027696623741612806,
      "loss": 2.983,
      "step": 120838
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.802666425704956,
      "learning_rate": 0.00027696215893094713,
      "loss": 2.8223,
      "step": 120839
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7343798875808716,
      "learning_rate": 0.0002769580804500494,
      "loss": 2.5868,
      "step": 120840
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2947800159454346,
      "learning_rate": 0.0002769540019734356,
      "loss": 3.0061,
      "step": 120841
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.9865782260894775,
      "learning_rate": 0.00027694992350110655,
      "loss": 2.9514,
      "step": 120842
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0428812503814697,
      "learning_rate": 0.00027694584503306296,
      "loss": 2.9226,
      "step": 120843
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8787585496902466,
      "learning_rate": 0.00027694176656930554,
      "loss": 2.8332,
      "step": 120844
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.644514322280884,
      "learning_rate": 0.000276937688109835,
      "loss": 3.0056,
      "step": 120845
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8808375597000122,
      "learning_rate": 0.0002769336096546522,
      "loss": 3.0316,
      "step": 120846
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0392558574676514,
      "learning_rate": 0.00027692953120375793,
      "loss": 2.8378,
      "step": 120847
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.180382490158081,
      "learning_rate": 0.00027692545275715286,
      "loss": 3.3303,
      "step": 120848
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3350830078125,
      "learning_rate": 0.00027692137431483793,
      "loss": 3.0837,
      "step": 120849
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1221940517425537,
      "learning_rate": 0.00027691729587681356,
      "loss": 3.0444,
      "step": 120850
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0532431602478027,
      "learning_rate": 0.0002769132174430808,
      "loss": 3.152,
      "step": 120851
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0331990718841553,
      "learning_rate": 0.00027690913901364027,
      "loss": 2.9999,
      "step": 120852
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9716849327087402,
      "learning_rate": 0.0002769050605884927,
      "loss": 3.0022,
      "step": 120853
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7231743335723877,
      "learning_rate": 0.00027690098216763895,
      "loss": 3.0405,
      "step": 120854
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.399563789367676,
      "learning_rate": 0.00027689690375107994,
      "loss": 2.9046,
      "step": 120855
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.6544198989868164,
      "learning_rate": 0.00027689282533881605,
      "loss": 3.0854,
      "step": 120856
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.7116100788116455,
      "learning_rate": 0.0002768887469308482,
      "loss": 3.0039,
      "step": 120857
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7993335723876953,
      "learning_rate": 0.0002768846685271772,
      "loss": 3.2055,
      "step": 120858
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.002603769302368,
      "learning_rate": 0.0002768805901278038,
      "loss": 3.1711,
      "step": 120859
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.123591661453247,
      "learning_rate": 0.0002768765117327287,
      "loss": 3.0254,
      "step": 120860
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.948284387588501,
      "learning_rate": 0.0002768724333419527,
      "loss": 3.0944,
      "step": 120861
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0158352851867676,
      "learning_rate": 0.0002768683549554767,
      "loss": 2.9944,
      "step": 120862
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.766176700592041,
      "learning_rate": 0.00027686427657330114,
      "loss": 2.9503,
      "step": 120863
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2261292934417725,
      "learning_rate": 0.00027686019819542695,
      "loss": 3.0786,
      "step": 120864
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.279228925704956,
      "learning_rate": 0.0002768561198218549,
      "loss": 2.9405,
      "step": 120865
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.855084776878357,
      "learning_rate": 0.0002768520414525857,
      "loss": 3.2717,
      "step": 120866
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0259108543395996,
      "learning_rate": 0.0002768479630876203,
      "loss": 2.7553,
      "step": 120867
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.5638766288757324,
      "learning_rate": 0.00027684388472695915,
      "loss": 2.9267,
      "step": 120868
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9148521423339844,
      "learning_rate": 0.0002768398063706032,
      "loss": 2.9126,
      "step": 120869
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.130434036254883,
      "learning_rate": 0.00027683572801855317,
      "loss": 3.0274,
      "step": 120870
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.114593505859375,
      "learning_rate": 0.00027683164967080983,
      "loss": 2.8434,
      "step": 120871
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.983941078186035,
      "learning_rate": 0.00027682757132737386,
      "loss": 3.079,
      "step": 120872
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.136923313140869,
      "learning_rate": 0.0002768234929882462,
      "loss": 2.8951,
      "step": 120873
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.76735520362854,
      "learning_rate": 0.0002768194146534274,
      "loss": 2.9135,
      "step": 120874
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.335430383682251,
      "learning_rate": 0.0002768153363229183,
      "loss": 2.9491,
      "step": 120875
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7989333868026733,
      "learning_rate": 0.0002768112579967197,
      "loss": 2.9499,
      "step": 120876
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.171708822250366,
      "learning_rate": 0.00027680717967483237,
      "loss": 3.1125,
      "step": 120877
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.186248540878296,
      "learning_rate": 0.00027680310135725695,
      "loss": 2.8193,
      "step": 120878
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2896876335144043,
      "learning_rate": 0.00027679902304399433,
      "loss": 2.6489,
      "step": 120879
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.6555016040802,
      "learning_rate": 0.00027679494473504515,
      "loss": 3.0129,
      "step": 120880
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.444058895111084,
      "learning_rate": 0.00027679086643041023,
      "loss": 3.1315,
      "step": 120881
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9179984331130981,
      "learning_rate": 0.0002767867881300904,
      "loss": 2.9626,
      "step": 120882
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.6639409065246582,
      "learning_rate": 0.0002767827098340862,
      "loss": 2.9979,
      "step": 120883
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0380804538726807,
      "learning_rate": 0.0002767786315423987,
      "loss": 3.0004,
      "step": 120884
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.6362380981445312,
      "learning_rate": 0.00027677455325502846,
      "loss": 3.09,
      "step": 120885
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.563934564590454,
      "learning_rate": 0.0002767704749719762,
      "loss": 3.044,
      "step": 120886
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.967464804649353,
      "learning_rate": 0.0002767663966932428,
      "loss": 2.917,
      "step": 120887
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.5352344512939453,
      "learning_rate": 0.0002767623184188289,
      "loss": 3.0324,
      "step": 120888
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.9825053215026855,
      "learning_rate": 0.00027675824014873537,
      "loss": 2.9848,
      "step": 120889
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7728725671768188,
      "learning_rate": 0.00027675416188296286,
      "loss": 2.8414,
      "step": 120890
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.10370135307312,
      "learning_rate": 0.00027675008362151236,
      "loss": 3.1205,
      "step": 120891
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.667332410812378,
      "learning_rate": 0.0002767460053643843,
      "loss": 2.9605,
      "step": 120892
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.887054681777954,
      "learning_rate": 0.00027674192711157967,
      "loss": 3.1254,
      "step": 120893
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8798786401748657,
      "learning_rate": 0.0002767378488630991,
      "loss": 3.226,
      "step": 120894
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9111125469207764,
      "learning_rate": 0.00027673377061894337,
      "loss": 3.1517,
      "step": 120895
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1782734394073486,
      "learning_rate": 0.0002767296923791133,
      "loss": 3.0091,
      "step": 120896
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.731711745262146,
      "learning_rate": 0.0002767256141436098,
      "loss": 2.7694,
      "step": 120897
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.810770869255066,
      "learning_rate": 0.00027672153591243325,
      "loss": 3.2168,
      "step": 120898
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.5642364025115967,
      "learning_rate": 0.00027671745768558464,
      "loss": 2.9848,
      "step": 120899
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7594794034957886,
      "learning_rate": 0.0002767133794630647,
      "loss": 2.9833,
      "step": 120900
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0492846965789795,
      "learning_rate": 0.0002767093012448742,
      "loss": 3.0124,
      "step": 120901
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9590075016021729,
      "learning_rate": 0.0002767052230310138,
      "loss": 3.0782,
      "step": 120902
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8836238384246826,
      "learning_rate": 0.0002767011448214846,
      "loss": 3.0528,
      "step": 120903
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.143033981323242,
      "learning_rate": 0.0002766970666162868,
      "loss": 2.9058,
      "step": 120904
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9814056158065796,
      "learning_rate": 0.00027669298841542154,
      "loss": 3.133,
      "step": 120905
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.004673480987549,
      "learning_rate": 0.00027668891021888955,
      "loss": 2.8029,
      "step": 120906
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7371082305908203,
      "learning_rate": 0.00027668483202669144,
      "loss": 3.0011,
      "step": 120907
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9533803462982178,
      "learning_rate": 0.0002766807538388281,
      "loss": 2.8267,
      "step": 120908
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.817354679107666,
      "learning_rate": 0.00027667667565530027,
      "loss": 2.7701,
      "step": 120909
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9868574142456055,
      "learning_rate": 0.0002766725974761088,
      "loss": 3.2031,
      "step": 120910
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.6306002140045166,
      "learning_rate": 0.00027666851930125417,
      "loss": 2.9916,
      "step": 120911
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3630969524383545,
      "learning_rate": 0.00027666444113073733,
      "loss": 3.0651,
      "step": 120912
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.665593147277832,
      "learning_rate": 0.00027666036296455904,
      "loss": 2.9504,
      "step": 120913
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9870758056640625,
      "learning_rate": 0.00027665628480271994,
      "loss": 3.0962,
      "step": 120914
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.771228551864624,
      "learning_rate": 0.00027665220664522096,
      "loss": 2.8441,
      "step": 120915
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.9949913024902344,
      "learning_rate": 0.0002766481284920629,
      "loss": 2.8239,
      "step": 120916
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0355277061462402,
      "learning_rate": 0.0002766440503432462,
      "loss": 2.8347,
      "step": 120917
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7213786840438843,
      "learning_rate": 0.0002766399721987719,
      "loss": 2.9085,
      "step": 120918
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.735987424850464,
      "learning_rate": 0.0002766358940586406,
      "loss": 3.0054,
      "step": 120919
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0914039611816406,
      "learning_rate": 0.00027663181592285315,
      "loss": 3.0377,
      "step": 120920
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.73836350440979,
      "learning_rate": 0.0002766277377914103,
      "loss": 2.9522,
      "step": 120921
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3217151165008545,
      "learning_rate": 0.0002766236596643129,
      "loss": 2.8931,
      "step": 120922
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.5297038555145264,
      "learning_rate": 0.0002766195815415615,
      "loss": 2.999,
      "step": 120923
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0094292163848877,
      "learning_rate": 0.00027661550342315697,
      "loss": 2.9786,
      "step": 120924
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.476820707321167,
      "learning_rate": 0.0002766114253091,
      "loss": 3.1052,
      "step": 120925
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.404294729232788,
      "learning_rate": 0.0002766073471993915,
      "loss": 3.3894,
      "step": 120926
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0829873085021973,
      "learning_rate": 0.00027660326909403204,
      "loss": 2.9777,
      "step": 120927
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9224435091018677,
      "learning_rate": 0.00027659919099302265,
      "loss": 3.1265,
      "step": 120928
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9402215480804443,
      "learning_rate": 0.0002765951128963638,
      "loss": 3.0688,
      "step": 120929
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.2543795108795166,
      "learning_rate": 0.0002765910348040563,
      "loss": 3.1124,
      "step": 120930
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.9574735164642334,
      "learning_rate": 0.00027658695671610103,
      "loss": 3.1958,
      "step": 120931
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7576454877853394,
      "learning_rate": 0.00027658287863249867,
      "loss": 2.8034,
      "step": 120932
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0138678550720215,
      "learning_rate": 0.00027657880055325003,
      "loss": 3.0691,
      "step": 120933
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.99293851852417,
      "learning_rate": 0.00027657472247835587,
      "loss": 2.9225,
      "step": 120934
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.104949712753296,
      "learning_rate": 0.0002765706444078169,
      "loss": 3.1039,
      "step": 120935
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7790864706039429,
      "learning_rate": 0.0002765665663416338,
      "loss": 3.2626,
      "step": 120936
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9210509061813354,
      "learning_rate": 0.00027656248827980743,
      "loss": 3.0853,
      "step": 120937
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8799933195114136,
      "learning_rate": 0.00027655841022233856,
      "loss": 2.7957,
      "step": 120938
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.7719428539276123,
      "learning_rate": 0.00027655433216922787,
      "loss": 2.8649,
      "step": 120939
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.39521861076355,
      "learning_rate": 0.00027655025412047637,
      "loss": 2.8537,
      "step": 120940
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.8123254776000977,
      "learning_rate": 0.00027654617607608445,
      "loss": 3.0555,
      "step": 120941
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.808244228363037,
      "learning_rate": 0.000276542098036053,
      "loss": 2.6659,
      "step": 120942
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.8874149322509766,
      "learning_rate": 0.0002765380200003829,
      "loss": 2.9392,
      "step": 120943
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.9337027072906494,
      "learning_rate": 0.0002765339419690748,
      "loss": 2.9218,
      "step": 120944
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.5067875385284424,
      "learning_rate": 0.0002765298639421295,
      "loss": 3.038,
      "step": 120945
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.694791555404663,
      "learning_rate": 0.00027652578591954777,
      "loss": 2.8602,
      "step": 120946
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.709059238433838,
      "learning_rate": 0.00027652170790133033,
      "loss": 3.0003,
      "step": 120947
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9305074214935303,
      "learning_rate": 0.00027651762988747795,
      "loss": 3.102,
      "step": 120948
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.1718928813934326,
      "learning_rate": 0.0002765135518779913,
      "loss": 3.0244,
      "step": 120949
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0346641540527344,
      "learning_rate": 0.0002765094738728713,
      "loss": 2.9394,
      "step": 120950
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9443773031234741,
      "learning_rate": 0.0002765053958721186,
      "loss": 2.9573,
      "step": 120951
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9291242361068726,
      "learning_rate": 0.00027650131787573405,
      "loss": 2.935,
      "step": 120952
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.138648271560669,
      "learning_rate": 0.00027649723988371826,
      "loss": 2.8745,
      "step": 120953
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.579718589782715,
      "learning_rate": 0.0002764931618960722,
      "loss": 3.2321,
      "step": 120954
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.3129866123199463,
      "learning_rate": 0.0002764890839127964,
      "loss": 2.9359,
      "step": 120955
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.2058322429656982,
      "learning_rate": 0.0002764850059338917,
      "loss": 2.9835,
      "step": 120956
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.836867094039917,
      "learning_rate": 0.00027648092795935894,
      "loss": 2.8654,
      "step": 120957
    },
    {
      "epoch": 1.57,
      "grad_norm": 3.4336977005004883,
      "learning_rate": 0.0002764768499891988,
      "loss": 3.053,
      "step": 120958
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.0193421840667725,
      "learning_rate": 0.0002764727720234121,
      "loss": 2.8055,
      "step": 120959
    },
    {
      "epoch": 1.57,
      "grad_norm": 2.4108080863952637,
      "learning_rate": 0.0002764686940619995,
      "loss": 3.1121,
      "step": 120960
    },
    {
      "epoch": 1.57,
      "grad_norm": 1.9401968717575073,
      "learning_rate": 0.00027646461610496184,
      "loss": 3.0689,
      "step": 120961
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.157396078109741,
      "learning_rate": 0.00027646053815229987,
      "loss": 3.0091,
      "step": 120962
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8426073789596558,
      "learning_rate": 0.0002764564602040143,
      "loss": 2.7242,
      "step": 120963
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0143790245056152,
      "learning_rate": 0.000276452382260106,
      "loss": 3.0772,
      "step": 120964
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.4931204319000244,
      "learning_rate": 0.0002764483043205755,
      "loss": 2.9145,
      "step": 120965
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.5091311931610107,
      "learning_rate": 0.0002764442263854238,
      "loss": 2.8397,
      "step": 120966
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.560170888900757,
      "learning_rate": 0.0002764401484546515,
      "loss": 3.1518,
      "step": 120967
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0775911808013916,
      "learning_rate": 0.00027643607052825947,
      "loss": 3.0939,
      "step": 120968
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.6239794492721558,
      "learning_rate": 0.00027643199260624844,
      "loss": 2.8867,
      "step": 120969
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.778385877609253,
      "learning_rate": 0.00027642791468861916,
      "loss": 2.8672,
      "step": 120970
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7629609107971191,
      "learning_rate": 0.00027642383677537233,
      "loss": 2.9864,
      "step": 120971
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8787199258804321,
      "learning_rate": 0.0002764197588665087,
      "loss": 2.9391,
      "step": 120972
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0405240058898926,
      "learning_rate": 0.0002764156809620292,
      "loss": 2.8836,
      "step": 120973
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9437439441680908,
      "learning_rate": 0.0002764116030619344,
      "loss": 2.813,
      "step": 120974
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8066786527633667,
      "learning_rate": 0.0002764075251662251,
      "loss": 2.9028,
      "step": 120975
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.476621627807617,
      "learning_rate": 0.00027640344727490216,
      "loss": 3.0191,
      "step": 120976
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9444692134857178,
      "learning_rate": 0.00027639936938796627,
      "loss": 3.148,
      "step": 120977
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0336315631866455,
      "learning_rate": 0.00027639529150541814,
      "loss": 3.0335,
      "step": 120978
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.860942006111145,
      "learning_rate": 0.00027639121362725853,
      "loss": 3.1402,
      "step": 120979
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.099034070968628,
      "learning_rate": 0.00027638713575348827,
      "loss": 3.0653,
      "step": 120980
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.5242571830749512,
      "learning_rate": 0.0002763830578841081,
      "loss": 3.0399,
      "step": 120981
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9414142370224,
      "learning_rate": 0.00027637898001911873,
      "loss": 2.9711,
      "step": 120982
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9789029359817505,
      "learning_rate": 0.00027637490215852114,
      "loss": 3.0237,
      "step": 120983
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.732792615890503,
      "learning_rate": 0.0002763708243023157,
      "loss": 3.1087,
      "step": 120984
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.546808958053589,
      "learning_rate": 0.0002763667464505034,
      "loss": 3.0271,
      "step": 120985
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.983785390853882,
      "learning_rate": 0.000276362668603085,
      "loss": 3.1861,
      "step": 120986
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8254742622375488,
      "learning_rate": 0.0002763585907600612,
      "loss": 3.0244,
      "step": 120987
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.3633768558502197,
      "learning_rate": 0.0002763545129214328,
      "loss": 3.0166,
      "step": 120988
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.6281590461730957,
      "learning_rate": 0.0002763504350872006,
      "loss": 2.9158,
      "step": 120989
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9275797605514526,
      "learning_rate": 0.00027634635725736525,
      "loss": 3.1742,
      "step": 120990
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7668620347976685,
      "learning_rate": 0.0002763422794319275,
      "loss": 2.8191,
      "step": 120991
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.2631044387817383,
      "learning_rate": 0.00027633820161088824,
      "loss": 3.1474,
      "step": 120992
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8813327550888062,
      "learning_rate": 0.0002763341237942481,
      "loss": 3.2318,
      "step": 120993
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.3031489849090576,
      "learning_rate": 0.0002763300459820079,
      "loss": 2.9278,
      "step": 120994
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.117218017578125,
      "learning_rate": 0.0002763259681741685,
      "loss": 2.8517,
      "step": 120995
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9355676174163818,
      "learning_rate": 0.0002763218903707305,
      "loss": 3.0532,
      "step": 120996
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1084964275360107,
      "learning_rate": 0.00027631781257169465,
      "loss": 2.9632,
      "step": 120997
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.8476758003234863,
      "learning_rate": 0.00027631373477706177,
      "loss": 2.9473,
      "step": 120998
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.945294737815857,
      "learning_rate": 0.00027630965698683264,
      "loss": 3.07,
      "step": 120999
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8354533910751343,
      "learning_rate": 0.0002763055792010079,
      "loss": 3.098,
      "step": 121000
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.062138795852661,
      "learning_rate": 0.00027630150141958864,
      "loss": 2.8596,
      "step": 121001
    },
    {
      "epoch": 1.58,
      "grad_norm": 4.331545352935791,
      "learning_rate": 0.0002762974236425752,
      "loss": 3.1398,
      "step": 121002
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.945491313934326,
      "learning_rate": 0.00027629334586996856,
      "loss": 2.826,
      "step": 121003
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.5224061012268066,
      "learning_rate": 0.0002762892681017694,
      "loss": 3.053,
      "step": 121004
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9782568216323853,
      "learning_rate": 0.0002762851903379785,
      "loss": 3.0142,
      "step": 121005
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.273808479309082,
      "learning_rate": 0.0002762811125785966,
      "loss": 3.0589,
      "step": 121006
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.753060817718506,
      "learning_rate": 0.0002762770348236247,
      "loss": 2.7934,
      "step": 121007
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.6935477256774902,
      "learning_rate": 0.00027627295707306315,
      "loss": 2.9803,
      "step": 121008
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9212727546691895,
      "learning_rate": 0.0002762688793269129,
      "loss": 2.8574,
      "step": 121009
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.1191067695617676,
      "learning_rate": 0.0002762648015851748,
      "loss": 2.7728,
      "step": 121010
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.8774378299713135,
      "learning_rate": 0.00027626072384784946,
      "loss": 3.2615,
      "step": 121011
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.485255718231201,
      "learning_rate": 0.0002762566461149377,
      "loss": 3.036,
      "step": 121012
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.151442050933838,
      "learning_rate": 0.0002762525683864404,
      "loss": 2.9783,
      "step": 121013
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.507065534591675,
      "learning_rate": 0.00027624849066235806,
      "loss": 2.7916,
      "step": 121014
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.8532705307006836,
      "learning_rate": 0.0002762444129426916,
      "loss": 2.9136,
      "step": 121015
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.067981719970703,
      "learning_rate": 0.00027624033522744175,
      "loss": 2.9313,
      "step": 121016
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.863182544708252,
      "learning_rate": 0.0002762362575166093,
      "loss": 3.2676,
      "step": 121017
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.388561964035034,
      "learning_rate": 0.0002762321798101949,
      "loss": 3.0249,
      "step": 121018
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2789437770843506,
      "learning_rate": 0.00027622810210819955,
      "loss": 2.7624,
      "step": 121019
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8677009344100952,
      "learning_rate": 0.00027622402441062374,
      "loss": 2.87,
      "step": 121020
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.820888876914978,
      "learning_rate": 0.00027621994671746833,
      "loss": 3.1826,
      "step": 121021
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.8732330799102783,
      "learning_rate": 0.0002762158690287341,
      "loss": 2.765,
      "step": 121022
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.055032730102539,
      "learning_rate": 0.00027621179134442174,
      "loss": 3.035,
      "step": 121023
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.985285758972168,
      "learning_rate": 0.00027620771366453203,
      "loss": 2.7228,
      "step": 121024
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.276071071624756,
      "learning_rate": 0.000276203635989066,
      "loss": 2.9211,
      "step": 121025
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8834123611450195,
      "learning_rate": 0.0002761995583180239,
      "loss": 2.843,
      "step": 121026
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.01885986328125,
      "learning_rate": 0.0002761954806514068,
      "loss": 2.8236,
      "step": 121027
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.909220814704895,
      "learning_rate": 0.0002761914029892154,
      "loss": 3.0475,
      "step": 121028
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.340007781982422,
      "learning_rate": 0.00027618732533145053,
      "loss": 3.039,
      "step": 121029
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9940605163574219,
      "learning_rate": 0.0002761832476781128,
      "loss": 2.8139,
      "step": 121030
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0687592029571533,
      "learning_rate": 0.0002761791700292032,
      "loss": 2.8826,
      "step": 121031
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8485302925109863,
      "learning_rate": 0.00027617509238472235,
      "loss": 2.9275,
      "step": 121032
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1377663612365723,
      "learning_rate": 0.00027617101474467084,
      "loss": 3.1615,
      "step": 121033
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.056748628616333,
      "learning_rate": 0.00027616693710904965,
      "loss": 2.8987,
      "step": 121034
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.732187271118164,
      "learning_rate": 0.0002761628594778595,
      "loss": 3.088,
      "step": 121035
    },
    {
      "epoch": 1.58,
      "grad_norm": 4.6734185218811035,
      "learning_rate": 0.0002761587818511011,
      "loss": 2.89,
      "step": 121036
    },
    {
      "epoch": 1.58,
      "grad_norm": 4.3556718826293945,
      "learning_rate": 0.00027615470422877526,
      "loss": 2.9803,
      "step": 121037
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1059460639953613,
      "learning_rate": 0.00027615062661088266,
      "loss": 2.953,
      "step": 121038
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.493279218673706,
      "learning_rate": 0.0002761465489974242,
      "loss": 3.0491,
      "step": 121039
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.322126626968384,
      "learning_rate": 0.0002761424713884005,
      "loss": 2.9009,
      "step": 121040
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.416588306427002,
      "learning_rate": 0.0002761383937838123,
      "loss": 3.1991,
      "step": 121041
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8331950902938843,
      "learning_rate": 0.0002761343161836605,
      "loss": 3.0842,
      "step": 121042
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.4993468523025513,
      "learning_rate": 0.0002761302385879457,
      "loss": 2.9541,
      "step": 121043
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.685007095336914,
      "learning_rate": 0.00027612616099666887,
      "loss": 2.8647,
      "step": 121044
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.4725263118743896,
      "learning_rate": 0.00027612208340983054,
      "loss": 2.9343,
      "step": 121045
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8249961137771606,
      "learning_rate": 0.00027611800582743164,
      "loss": 3.0524,
      "step": 121046
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.4866716861724854,
      "learning_rate": 0.00027611392824947276,
      "loss": 3.0559,
      "step": 121047
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9698781967163086,
      "learning_rate": 0.0002761098506759548,
      "loss": 2.929,
      "step": 121048
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9443987607955933,
      "learning_rate": 0.00027610577310687844,
      "loss": 2.9586,
      "step": 121049
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1720848083496094,
      "learning_rate": 0.0002761016955422445,
      "loss": 3.0547,
      "step": 121050
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0760393142700195,
      "learning_rate": 0.0002760976179820537,
      "loss": 3.1261,
      "step": 121051
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.840675950050354,
      "learning_rate": 0.00027609354042630683,
      "loss": 2.8525,
      "step": 121052
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1258485317230225,
      "learning_rate": 0.0002760894628750046,
      "loss": 3.0331,
      "step": 121053
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.628098964691162,
      "learning_rate": 0.0002760853853281478,
      "loss": 2.9276,
      "step": 121054
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7708282470703125,
      "learning_rate": 0.0002760813077857371,
      "loss": 3.1805,
      "step": 121055
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9050298929214478,
      "learning_rate": 0.00027607723024777344,
      "loss": 3.1328,
      "step": 121056
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8937588930130005,
      "learning_rate": 0.0002760731527142574,
      "loss": 2.9631,
      "step": 121057
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7750250101089478,
      "learning_rate": 0.0002760690751851898,
      "loss": 3.1473,
      "step": 121058
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.835862159729004,
      "learning_rate": 0.00027606499766057147,
      "loss": 2.8992,
      "step": 121059
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8139920234680176,
      "learning_rate": 0.00027606092014040306,
      "loss": 3.0366,
      "step": 121060
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8909307718276978,
      "learning_rate": 0.00027605684262468547,
      "loss": 2.8495,
      "step": 121061
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.617141842842102,
      "learning_rate": 0.0002760527651134194,
      "loss": 2.9335,
      "step": 121062
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8971868753433228,
      "learning_rate": 0.0002760486876066055,
      "loss": 2.6585,
      "step": 121063
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.151801824569702,
      "learning_rate": 0.0002760446101042445,
      "loss": 2.9561,
      "step": 121064
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1444926261901855,
      "learning_rate": 0.0002760405326063373,
      "loss": 3.0916,
      "step": 121065
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.109067678451538,
      "learning_rate": 0.0002760364551128847,
      "loss": 2.9632,
      "step": 121066
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8867686986923218,
      "learning_rate": 0.0002760323776238873,
      "loss": 3.0753,
      "step": 121067
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.942412257194519,
      "learning_rate": 0.00027602830013934604,
      "loss": 3.1869,
      "step": 121068
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.428615093231201,
      "learning_rate": 0.0002760242226592615,
      "loss": 2.7617,
      "step": 121069
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0010600090026855,
      "learning_rate": 0.0002760201451836345,
      "loss": 3.002,
      "step": 121070
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1035661697387695,
      "learning_rate": 0.0002760160677124658,
      "loss": 2.6247,
      "step": 121071
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.914151430130005,
      "learning_rate": 0.00027601199024575616,
      "loss": 3.1952,
      "step": 121072
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.947572112083435,
      "learning_rate": 0.0002760079127835063,
      "loss": 3.0193,
      "step": 121073
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.69611394405365,
      "learning_rate": 0.0002760038353257172,
      "loss": 3.1442,
      "step": 121074
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9814026355743408,
      "learning_rate": 0.00027599975787238933,
      "loss": 3.1085,
      "step": 121075
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.648308515548706,
      "learning_rate": 0.00027599568042352353,
      "loss": 2.8478,
      "step": 121076
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.6398591995239258,
      "learning_rate": 0.0002759916029791206,
      "loss": 3.034,
      "step": 121077
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.622997522354126,
      "learning_rate": 0.0002759875255391813,
      "loss": 2.6746,
      "step": 121078
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1145477294921875,
      "learning_rate": 0.0002759834481037063,
      "loss": 2.7303,
      "step": 121079
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8495748043060303,
      "learning_rate": 0.0002759793706726967,
      "loss": 3.1625,
      "step": 121080
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.383471965789795,
      "learning_rate": 0.0002759752932461527,
      "loss": 2.6731,
      "step": 121081
    },
    {
      "epoch": 1.58,
      "grad_norm": 4.043235778808594,
      "learning_rate": 0.0002759712158240754,
      "loss": 2.9516,
      "step": 121082
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.5196616649627686,
      "learning_rate": 0.00027596713840646555,
      "loss": 3.1277,
      "step": 121083
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.7482638359069824,
      "learning_rate": 0.00027596306099332384,
      "loss": 2.9016,
      "step": 121084
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.463371753692627,
      "learning_rate": 0.000275958983584651,
      "loss": 2.7853,
      "step": 121085
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7742911577224731,
      "learning_rate": 0.00027595490618044803,
      "loss": 2.9578,
      "step": 121086
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.593248724937439,
      "learning_rate": 0.00027595082878071535,
      "loss": 2.9869,
      "step": 121087
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.655199408531189,
      "learning_rate": 0.00027594675138545386,
      "loss": 2.7353,
      "step": 121088
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.322357416152954,
      "learning_rate": 0.00027594267399466433,
      "loss": 2.8114,
      "step": 121089
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8100154399871826,
      "learning_rate": 0.0002759385966083475,
      "loss": 3.1641,
      "step": 121090
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1630043983459473,
      "learning_rate": 0.00027593451922650416,
      "loss": 3.2323,
      "step": 121091
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.1221117973327637,
      "learning_rate": 0.00027593044184913516,
      "loss": 3.1862,
      "step": 121092
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.979694128036499,
      "learning_rate": 0.000275926364476241,
      "loss": 3.1064,
      "step": 121093
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.118924617767334,
      "learning_rate": 0.0002759222871078226,
      "loss": 3.0239,
      "step": 121094
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.152930974960327,
      "learning_rate": 0.00027591820974388073,
      "loss": 2.821,
      "step": 121095
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.898881196975708,
      "learning_rate": 0.0002759141323844161,
      "loss": 3.201,
      "step": 121096
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7330952882766724,
      "learning_rate": 0.00027591005502942945,
      "loss": 2.8929,
      "step": 121097
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1923704147338867,
      "learning_rate": 0.00027590597767892177,
      "loss": 3.084,
      "step": 121098
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.816350221633911,
      "learning_rate": 0.0002759019003328934,
      "loss": 2.6842,
      "step": 121099
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9014198780059814,
      "learning_rate": 0.0002758978229913454,
      "loss": 2.8617,
      "step": 121100
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.0723321437835693,
      "learning_rate": 0.00027589374565427843,
      "loss": 2.9352,
      "step": 121101
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.518937826156616,
      "learning_rate": 0.00027588966832169326,
      "loss": 3.1607,
      "step": 121102
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8962870836257935,
      "learning_rate": 0.0002758855909935907,
      "loss": 3.0948,
      "step": 121103
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.182333469390869,
      "learning_rate": 0.00027588151366997153,
      "loss": 2.9812,
      "step": 121104
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.8407645225524902,
      "learning_rate": 0.0002758774363508363,
      "loss": 2.8924,
      "step": 121105
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.493819236755371,
      "learning_rate": 0.000275873359036186,
      "loss": 2.7673,
      "step": 121106
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8646025657653809,
      "learning_rate": 0.00027586928172602124,
      "loss": 3.1114,
      "step": 121107
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9795255661010742,
      "learning_rate": 0.0002758652044203428,
      "loss": 3.1366,
      "step": 121108
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7090353965759277,
      "learning_rate": 0.00027586112711915154,
      "loss": 3.2,
      "step": 121109
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.046581983566284,
      "learning_rate": 0.00027585704982244815,
      "loss": 3.0135,
      "step": 121110
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9223335981369019,
      "learning_rate": 0.00027585297253023347,
      "loss": 2.9847,
      "step": 121111
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.02290415763855,
      "learning_rate": 0.0002758488952425081,
      "loss": 2.9058,
      "step": 121112
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9827862977981567,
      "learning_rate": 0.0002758448179592728,
      "loss": 2.9954,
      "step": 121113
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0400888919830322,
      "learning_rate": 0.00027584074068052854,
      "loss": 3.052,
      "step": 121114
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2302887439727783,
      "learning_rate": 0.00027583666340627583,
      "loss": 2.942,
      "step": 121115
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7462706565856934,
      "learning_rate": 0.00027583258613651557,
      "loss": 2.9592,
      "step": 121116
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9127708673477173,
      "learning_rate": 0.0002758285088712486,
      "loss": 3.1442,
      "step": 121117
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.110424757003784,
      "learning_rate": 0.0002758244316104755,
      "loss": 2.8296,
      "step": 121118
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.470254421234131,
      "learning_rate": 0.00027582035435419705,
      "loss": 2.9374,
      "step": 121119
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.806799292564392,
      "learning_rate": 0.000275816277102414,
      "loss": 2.918,
      "step": 121120
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0988688468933105,
      "learning_rate": 0.00027581219985512725,
      "loss": 2.9195,
      "step": 121121
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8650586605072021,
      "learning_rate": 0.00027580812261233744,
      "loss": 2.8415,
      "step": 121122
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8898979425430298,
      "learning_rate": 0.0002758040453740454,
      "loss": 2.9386,
      "step": 121123
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2703139781951904,
      "learning_rate": 0.00027579996814025185,
      "loss": 2.9807,
      "step": 121124
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.7679078578948975,
      "learning_rate": 0.0002757958909109575,
      "loss": 2.799,
      "step": 121125
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.368614435195923,
      "learning_rate": 0.00027579181368616316,
      "loss": 3.2686,
      "step": 121126
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8601288795471191,
      "learning_rate": 0.0002757877364658695,
      "loss": 3.0079,
      "step": 121127
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2360429763793945,
      "learning_rate": 0.0002757836592500774,
      "loss": 2.7491,
      "step": 121128
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2413744926452637,
      "learning_rate": 0.0002757795820387877,
      "loss": 3.0188,
      "step": 121129
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.016730308532715,
      "learning_rate": 0.00027577550483200093,
      "loss": 2.8254,
      "step": 121130
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.5432937145233154,
      "learning_rate": 0.000275771427629718,
      "loss": 2.8042,
      "step": 121131
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.814016342163086,
      "learning_rate": 0.00027576735043193955,
      "loss": 3.057,
      "step": 121132
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2168595790863037,
      "learning_rate": 0.0002757632732386664,
      "loss": 3.0617,
      "step": 121133
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0233466625213623,
      "learning_rate": 0.00027575919604989934,
      "loss": 2.8891,
      "step": 121134
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.496622323989868,
      "learning_rate": 0.00027575511886563915,
      "loss": 2.8046,
      "step": 121135
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2372381687164307,
      "learning_rate": 0.0002757510416858865,
      "loss": 3.1018,
      "step": 121136
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.551696538925171,
      "learning_rate": 0.0002757469645106422,
      "loss": 3.0011,
      "step": 121137
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.395312547683716,
      "learning_rate": 0.000275742887339907,
      "loss": 3.0332,
      "step": 121138
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.379460096359253,
      "learning_rate": 0.00027573881017368165,
      "loss": 3.008,
      "step": 121139
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.300445795059204,
      "learning_rate": 0.00027573473301196685,
      "loss": 3.0434,
      "step": 121140
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.627998113632202,
      "learning_rate": 0.00027573065585476353,
      "loss": 2.8373,
      "step": 121141
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.7811732292175293,
      "learning_rate": 0.0002757265787020723,
      "loss": 3.0588,
      "step": 121142
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.4530224800109863,
      "learning_rate": 0.0002757225015538939,
      "loss": 2.9762,
      "step": 121143
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.6891062259674072,
      "learning_rate": 0.0002757184244102292,
      "loss": 2.8737,
      "step": 121144
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.7439656257629395,
      "learning_rate": 0.00027571434727107896,
      "loss": 2.8555,
      "step": 121145
    },
    {
      "epoch": 1.58,
      "grad_norm": 5.029619216918945,
      "learning_rate": 0.0002757102701364438,
      "loss": 3.0723,
      "step": 121146
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.903848171234131,
      "learning_rate": 0.0002757061930063246,
      "loss": 2.9678,
      "step": 121147
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.900904893875122,
      "learning_rate": 0.000275702115880722,
      "loss": 2.9419,
      "step": 121148
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.8321661949157715,
      "learning_rate": 0.0002756980387596369,
      "loss": 2.9295,
      "step": 121149
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.9671835899353027,
      "learning_rate": 0.00027569396164306995,
      "loss": 2.9249,
      "step": 121150
    },
    {
      "epoch": 1.58,
      "grad_norm": 4.3641204833984375,
      "learning_rate": 0.00027568988453102196,
      "loss": 2.9365,
      "step": 121151
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.255861282348633,
      "learning_rate": 0.0002756858074234937,
      "loss": 2.9535,
      "step": 121152
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.3961546421051025,
      "learning_rate": 0.000275681730320486,
      "loss": 2.8094,
      "step": 121153
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.5869996547698975,
      "learning_rate": 0.00027567765322199944,
      "loss": 2.8928,
      "step": 121154
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.339001178741455,
      "learning_rate": 0.0002756735761280348,
      "loss": 3.1016,
      "step": 121155
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.691628932952881,
      "learning_rate": 0.000275669499038593,
      "loss": 2.9523,
      "step": 121156
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.5889732837677,
      "learning_rate": 0.0002756654219536746,
      "loss": 2.7613,
      "step": 121157
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.994142770767212,
      "learning_rate": 0.0002756613448732805,
      "loss": 3.1133,
      "step": 121158
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.6651363372802734,
      "learning_rate": 0.00027565726779741153,
      "loss": 2.7309,
      "step": 121159
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.50232195854187,
      "learning_rate": 0.00027565319072606824,
      "loss": 2.9745,
      "step": 121160
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.7295050621032715,
      "learning_rate": 0.0002756491136592515,
      "loss": 3.0876,
      "step": 121161
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.899847149848938,
      "learning_rate": 0.00027564503659696193,
      "loss": 3.0364,
      "step": 121162
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.224092960357666,
      "learning_rate": 0.00027564095953920055,
      "loss": 2.7834,
      "step": 121163
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.410525321960449,
      "learning_rate": 0.0002756368824859679,
      "loss": 2.9198,
      "step": 121164
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.848175048828125,
      "learning_rate": 0.00027563280543726494,
      "loss": 3.0274,
      "step": 121165
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9813259840011597,
      "learning_rate": 0.0002756287283930922,
      "loss": 2.7336,
      "step": 121166
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.492919921875,
      "learning_rate": 0.0002756246513534505,
      "loss": 3.101,
      "step": 121167
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0306360721588135,
      "learning_rate": 0.0002756205743183407,
      "loss": 2.8215,
      "step": 121168
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.4770004749298096,
      "learning_rate": 0.00027561649728776344,
      "loss": 2.9175,
      "step": 121169
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.5765225887298584,
      "learning_rate": 0.0002756124202617195,
      "loss": 3.0229,
      "step": 121170
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.014235258102417,
      "learning_rate": 0.0002756083432402099,
      "loss": 3.0762,
      "step": 121171
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.185023784637451,
      "learning_rate": 0.00027560426622323496,
      "loss": 3.0851,
      "step": 121172
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2373194694519043,
      "learning_rate": 0.0002756001892107957,
      "loss": 2.9782,
      "step": 121173
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7304030656814575,
      "learning_rate": 0.00027559611220289283,
      "loss": 3.0215,
      "step": 121174
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.070580005645752,
      "learning_rate": 0.00027559203519952706,
      "loss": 2.9216,
      "step": 121175
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.4590842723846436,
      "learning_rate": 0.0002755879582006992,
      "loss": 2.6987,
      "step": 121176
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.4356024265289307,
      "learning_rate": 0.00027558388120641,
      "loss": 2.9831,
      "step": 121177
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0430967807769775,
      "learning_rate": 0.00027557980421666036,
      "loss": 2.8647,
      "step": 121178
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.0200562477111816,
      "learning_rate": 0.00027557572723145075,
      "loss": 2.7454,
      "step": 121179
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.4923245906829834,
      "learning_rate": 0.0002755716502507821,
      "loss": 3.0998,
      "step": 121180
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.853022813796997,
      "learning_rate": 0.0002755675732746551,
      "loss": 2.8325,
      "step": 121181
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.053640365600586,
      "learning_rate": 0.00027556349630307057,
      "loss": 3.1797,
      "step": 121182
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.6286094188690186,
      "learning_rate": 0.0002755594193360293,
      "loss": 3.0631,
      "step": 121183
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.8523473739624023,
      "learning_rate": 0.00027555534237353206,
      "loss": 3.0166,
      "step": 121184
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0168023109436035,
      "learning_rate": 0.00027555126541557945,
      "loss": 2.9587,
      "step": 121185
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.54445481300354,
      "learning_rate": 0.0002755471884621723,
      "loss": 3.0639,
      "step": 121186
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.2500734329223633,
      "learning_rate": 0.0002755431115133114,
      "loss": 3.0352,
      "step": 121187
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0006754398345947,
      "learning_rate": 0.00027553903456899745,
      "loss": 2.8991,
      "step": 121188
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.216066837310791,
      "learning_rate": 0.0002755349576292313,
      "loss": 2.9916,
      "step": 121189
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9139573574066162,
      "learning_rate": 0.0002755308806940138,
      "loss": 3.1091,
      "step": 121190
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9171128273010254,
      "learning_rate": 0.00027552680376334544,
      "loss": 3.0721,
      "step": 121191
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.909104585647583,
      "learning_rate": 0.00027552272683722707,
      "loss": 3.2117,
      "step": 121192
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0304477214813232,
      "learning_rate": 0.0002755186499156595,
      "loss": 2.8742,
      "step": 121193
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.874381422996521,
      "learning_rate": 0.0002755145729986435,
      "loss": 3.0917,
      "step": 121194
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.4151055812835693,
      "learning_rate": 0.0002755104960861798,
      "loss": 2.9271,
      "step": 121195
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.884372353553772,
      "learning_rate": 0.0002755064191782693,
      "loss": 2.9067,
      "step": 121196
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8991930484771729,
      "learning_rate": 0.00027550234227491247,
      "loss": 2.9174,
      "step": 121197
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1779191493988037,
      "learning_rate": 0.0002754982653761102,
      "loss": 2.8715,
      "step": 121198
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0895614624023438,
      "learning_rate": 0.0002754941884818633,
      "loss": 3.1975,
      "step": 121199
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9859273433685303,
      "learning_rate": 0.0002754901115921725,
      "loss": 2.9618,
      "step": 121200
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.264552593231201,
      "learning_rate": 0.00027548603470703854,
      "loss": 2.9348,
      "step": 121201
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.198174476623535,
      "learning_rate": 0.0002754819578264623,
      "loss": 3.2928,
      "step": 121202
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9063326120376587,
      "learning_rate": 0.0002754778809504443,
      "loss": 2.935,
      "step": 121203
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.757270097732544,
      "learning_rate": 0.00027547380407898545,
      "loss": 3.0194,
      "step": 121204
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.188612699508667,
      "learning_rate": 0.0002754697272120865,
      "loss": 3.1389,
      "step": 121205
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.26056170463562,
      "learning_rate": 0.0002754656503497481,
      "loss": 2.8829,
      "step": 121206
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.450803518295288,
      "learning_rate": 0.00027546157349197115,
      "loss": 2.9593,
      "step": 121207
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9948989152908325,
      "learning_rate": 0.0002754574966387565,
      "loss": 2.9036,
      "step": 121208
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8432484865188599,
      "learning_rate": 0.00027545341979010464,
      "loss": 2.9485,
      "step": 121209
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0864717960357666,
      "learning_rate": 0.0002754493429460164,
      "loss": 2.9379,
      "step": 121210
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2908051013946533,
      "learning_rate": 0.0002754452661064926,
      "loss": 2.8622,
      "step": 121211
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9940290451049805,
      "learning_rate": 0.000275441189271534,
      "loss": 2.7445,
      "step": 121212
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.819271445274353,
      "learning_rate": 0.00027543711244114137,
      "loss": 2.9226,
      "step": 121213
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.090919017791748,
      "learning_rate": 0.00027543303561531555,
      "loss": 2.871,
      "step": 121214
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.8269128799438477,
      "learning_rate": 0.00027542895879405704,
      "loss": 3.0674,
      "step": 121215
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.995749831199646,
      "learning_rate": 0.00027542488197736687,
      "loss": 2.8077,
      "step": 121216
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1876492500305176,
      "learning_rate": 0.0002754208051652456,
      "loss": 2.8603,
      "step": 121217
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1052212715148926,
      "learning_rate": 0.0002754167283576941,
      "loss": 3.1045,
      "step": 121218
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.188877820968628,
      "learning_rate": 0.000275412651554713,
      "loss": 3.1153,
      "step": 121219
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.904343605041504,
      "learning_rate": 0.0002754085747563033,
      "loss": 2.8958,
      "step": 121220
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7147763967514038,
      "learning_rate": 0.0002754044979624655,
      "loss": 2.8079,
      "step": 121221
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.168290853500366,
      "learning_rate": 0.0002754004211732005,
      "loss": 3.0313,
      "step": 121222
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1762003898620605,
      "learning_rate": 0.0002753963443885091,
      "loss": 3.1464,
      "step": 121223
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1190297603607178,
      "learning_rate": 0.0002753922676083919,
      "loss": 3.0084,
      "step": 121224
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.72056245803833,
      "learning_rate": 0.00027538819083284974,
      "loss": 2.9166,
      "step": 121225
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.3214833736419678,
      "learning_rate": 0.00027538411406188346,
      "loss": 3.0186,
      "step": 121226
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.01997709274292,
      "learning_rate": 0.00027538003729549366,
      "loss": 3.2086,
      "step": 121227
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.8684942722320557,
      "learning_rate": 0.00027537596053368114,
      "loss": 2.7137,
      "step": 121228
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.053966999053955,
      "learning_rate": 0.0002753718837764467,
      "loss": 2.8548,
      "step": 121229
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9302531480789185,
      "learning_rate": 0.0002753678070237912,
      "loss": 3.0908,
      "step": 121230
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0253188610076904,
      "learning_rate": 0.0002753637302757152,
      "loss": 3.1596,
      "step": 121231
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0963549613952637,
      "learning_rate": 0.00027535965353221964,
      "loss": 2.9541,
      "step": 121232
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.452265501022339,
      "learning_rate": 0.0002753555767933051,
      "loss": 3.1324,
      "step": 121233
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.396296739578247,
      "learning_rate": 0.00027535150005897243,
      "loss": 2.9002,
      "step": 121234
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0506062507629395,
      "learning_rate": 0.0002753474233292224,
      "loss": 2.9289,
      "step": 121235
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2134220600128174,
      "learning_rate": 0.00027534334660405573,
      "loss": 2.807,
      "step": 121236
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.6806702613830566,
      "learning_rate": 0.00027533926988347325,
      "loss": 3.0534,
      "step": 121237
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.723367929458618,
      "learning_rate": 0.0002753351931674757,
      "loss": 2.9536,
      "step": 121238
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.7719509601593018,
      "learning_rate": 0.0002753311164560637,
      "loss": 2.9575,
      "step": 121239
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.0468838214874268,
      "learning_rate": 0.0002753270397492381,
      "loss": 2.9115,
      "step": 121240
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2686586380004883,
      "learning_rate": 0.00027532296304699967,
      "loss": 3.074,
      "step": 121241
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9628050327301025,
      "learning_rate": 0.0002753188863493492,
      "loss": 3.1406,
      "step": 121242
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9362213611602783,
      "learning_rate": 0.0002753148096562874,
      "loss": 3.1583,
      "step": 121243
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1534745693206787,
      "learning_rate": 0.0002753107329678151,
      "loss": 3.2089,
      "step": 121244
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9685535430908203,
      "learning_rate": 0.000275306656283933,
      "loss": 3.0262,
      "step": 121245
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.5128297805786133,
      "learning_rate": 0.00027530257960464184,
      "loss": 3.0832,
      "step": 121246
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.72617769241333,
      "learning_rate": 0.00027529850292994237,
      "loss": 2.9969,
      "step": 121247
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8045654296875,
      "learning_rate": 0.00027529442625983536,
      "loss": 2.8664,
      "step": 121248
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9994266033172607,
      "learning_rate": 0.00027529034959432157,
      "loss": 3.0372,
      "step": 121249
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.955676794052124,
      "learning_rate": 0.0002752862729334018,
      "loss": 2.9017,
      "step": 121250
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.969843864440918,
      "learning_rate": 0.0002752821962770769,
      "loss": 2.939,
      "step": 121251
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8727331161499023,
      "learning_rate": 0.0002752781196253474,
      "loss": 3.0884,
      "step": 121252
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.981588363647461,
      "learning_rate": 0.00027527404297821413,
      "loss": 2.9645,
      "step": 121253
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9445652961730957,
      "learning_rate": 0.0002752699663356779,
      "loss": 2.9444,
      "step": 121254
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8745722770690918,
      "learning_rate": 0.00027526588969773943,
      "loss": 2.9764,
      "step": 121255
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9163838624954224,
      "learning_rate": 0.0002752618130643995,
      "loss": 3.0835,
      "step": 121256
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.209432363510132,
      "learning_rate": 0.000275257736435659,
      "loss": 3.1069,
      "step": 121257
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8145525455474854,
      "learning_rate": 0.00027525365981151845,
      "loss": 2.9059,
      "step": 121258
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.8079705238342285,
      "learning_rate": 0.0002752495831919787,
      "loss": 3.019,
      "step": 121259
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.001218795776367,
      "learning_rate": 0.0002752455065770405,
      "loss": 3.0155,
      "step": 121260
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9409540891647339,
      "learning_rate": 0.0002752414299667046,
      "loss": 2.95,
      "step": 121261
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.3831706047058105,
      "learning_rate": 0.0002752373533609719,
      "loss": 3.0938,
      "step": 121262
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.982239007949829,
      "learning_rate": 0.00027523327675984307,
      "loss": 2.9631,
      "step": 121263
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8481645584106445,
      "learning_rate": 0.00027522920016331874,
      "loss": 3.1047,
      "step": 121264
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.3173470497131348,
      "learning_rate": 0.00027522512357139973,
      "loss": 3.02,
      "step": 121265
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8587042093276978,
      "learning_rate": 0.0002752210469840869,
      "loss": 3.1007,
      "step": 121266
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7959922552108765,
      "learning_rate": 0.00027521697040138094,
      "loss": 3.0456,
      "step": 121267
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8941067457199097,
      "learning_rate": 0.0002752128938232826,
      "loss": 2.8274,
      "step": 121268
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.7456841468811035,
      "learning_rate": 0.0002752088172497927,
      "loss": 2.8598,
      "step": 121269
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.4563755989074707,
      "learning_rate": 0.0002752047406809119,
      "loss": 3.0099,
      "step": 121270
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.61171293258667,
      "learning_rate": 0.00027520066411664104,
      "loss": 3.0762,
      "step": 121271
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.591212511062622,
      "learning_rate": 0.00027519658755698084,
      "loss": 2.6118,
      "step": 121272
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.2035131454467773,
      "learning_rate": 0.000275192511001932,
      "loss": 3.0703,
      "step": 121273
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7872154712677002,
      "learning_rate": 0.00027518843445149534,
      "loss": 3.1942,
      "step": 121274
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.603008508682251,
      "learning_rate": 0.00027518435790567183,
      "loss": 2.9582,
      "step": 121275
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9515012502670288,
      "learning_rate": 0.0002751802813644618,
      "loss": 3.1214,
      "step": 121276
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.947820782661438,
      "learning_rate": 0.0002751762048278663,
      "loss": 2.9465,
      "step": 121277
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8739169836044312,
      "learning_rate": 0.0002751721282958859,
      "loss": 3.0241,
      "step": 121278
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1985671520233154,
      "learning_rate": 0.00027516805176852155,
      "loss": 2.9822,
      "step": 121279
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9606720209121704,
      "learning_rate": 0.00027516397524577393,
      "loss": 3.0536,
      "step": 121280
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.086778163909912,
      "learning_rate": 0.0002751598987276439,
      "loss": 3.0799,
      "step": 121281
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9746476411819458,
      "learning_rate": 0.000275155822214132,
      "loss": 3.1229,
      "step": 121282
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1723122596740723,
      "learning_rate": 0.0002751517457052391,
      "loss": 2.8489,
      "step": 121283
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.4805870056152344,
      "learning_rate": 0.00027514766920096596,
      "loss": 2.8657,
      "step": 121284
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.014432191848755,
      "learning_rate": 0.0002751435927013133,
      "loss": 3.2832,
      "step": 121285
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0806822776794434,
      "learning_rate": 0.000275139516206282,
      "loss": 3.0483,
      "step": 121286
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8564879894256592,
      "learning_rate": 0.0002751354397158728,
      "loss": 2.8063,
      "step": 121287
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.2175865173339844,
      "learning_rate": 0.0002751313632300863,
      "loss": 2.9986,
      "step": 121288
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.710604667663574,
      "learning_rate": 0.0002751272867489233,
      "loss": 3.0509,
      "step": 121289
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.6737236976623535,
      "learning_rate": 0.00027512321027238463,
      "loss": 2.9722,
      "step": 121290
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.112356662750244,
      "learning_rate": 0.000275119133800471,
      "loss": 2.873,
      "step": 121291
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7280460596084595,
      "learning_rate": 0.00027511505733318327,
      "loss": 2.874,
      "step": 121292
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.5415966510772705,
      "learning_rate": 0.0002751109808705222,
      "loss": 3.2221,
      "step": 121293
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.542660713195801,
      "learning_rate": 0.0002751069044124883,
      "loss": 3.0562,
      "step": 121294
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.007986307144165,
      "learning_rate": 0.00027510282795908253,
      "loss": 2.7452,
      "step": 121295
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.327335834503174,
      "learning_rate": 0.00027509875151030563,
      "loss": 2.8303,
      "step": 121296
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.069622278213501,
      "learning_rate": 0.00027509467506615833,
      "loss": 3.0524,
      "step": 121297
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9235190153121948,
      "learning_rate": 0.0002750905986266414,
      "loss": 3.0579,
      "step": 121298
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.295928955078125,
      "learning_rate": 0.00027508652219175567,
      "loss": 3.0845,
      "step": 121299
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9886541366577148,
      "learning_rate": 0.00027508244576150183,
      "loss": 3.0209,
      "step": 121300
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0385851860046387,
      "learning_rate": 0.0002750783693358805,
      "loss": 2.9895,
      "step": 121301
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0555477142333984,
      "learning_rate": 0.00027507429291489263,
      "loss": 2.9426,
      "step": 121302
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9248206615447998,
      "learning_rate": 0.00027507021649853897,
      "loss": 3.1938,
      "step": 121303
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0753846168518066,
      "learning_rate": 0.00027506614008682015,
      "loss": 3.1183,
      "step": 121304
    },
    {
      "epoch": 1.58,
      "grad_norm": 4.595791339874268,
      "learning_rate": 0.00027506206367973706,
      "loss": 3.0024,
      "step": 121305
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.158040761947632,
      "learning_rate": 0.0002750579872772904,
      "loss": 3.1828,
      "step": 121306
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9346609115600586,
      "learning_rate": 0.000275053910879481,
      "loss": 3.0261,
      "step": 121307
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.034532308578491,
      "learning_rate": 0.00027504983448630946,
      "loss": 3.0886,
      "step": 121308
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.8640239238739014,
      "learning_rate": 0.00027504575809777663,
      "loss": 3.0221,
      "step": 121309
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.604123592376709,
      "learning_rate": 0.0002750416817138832,
      "loss": 2.962,
      "step": 121310
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9998575448989868,
      "learning_rate": 0.0002750376053346301,
      "loss": 2.9884,
      "step": 121311
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9167572259902954,
      "learning_rate": 0.000275033528960018,
      "loss": 2.9237,
      "step": 121312
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.902193546295166,
      "learning_rate": 0.0002750294525900475,
      "loss": 3.0035,
      "step": 121313
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9290908575057983,
      "learning_rate": 0.0002750253762247196,
      "loss": 3.1754,
      "step": 121314
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.120957851409912,
      "learning_rate": 0.000275021299864035,
      "loss": 3.1492,
      "step": 121315
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1512861251831055,
      "learning_rate": 0.0002750172235079943,
      "loss": 3.045,
      "step": 121316
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7926349639892578,
      "learning_rate": 0.0002750131471565984,
      "loss": 2.9711,
      "step": 121317
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9033304452896118,
      "learning_rate": 0.00027500907080984805,
      "loss": 2.8026,
      "step": 121318
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1701691150665283,
      "learning_rate": 0.000275004994467744,
      "loss": 2.8678,
      "step": 121319
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.4235222339630127,
      "learning_rate": 0.000275000918130287,
      "loss": 2.8377,
      "step": 121320
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9071249961853027,
      "learning_rate": 0.0002749968417974777,
      "loss": 2.7341,
      "step": 121321
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.860702633857727,
      "learning_rate": 0.00027499276546931705,
      "loss": 3.0448,
      "step": 121322
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.306767463684082,
      "learning_rate": 0.00027498868914580573,
      "loss": 2.9771,
      "step": 121323
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9228684902191162,
      "learning_rate": 0.0002749846128269445,
      "loss": 2.8857,
      "step": 121324
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7623165845870972,
      "learning_rate": 0.00027498053651273406,
      "loss": 3.007,
      "step": 121325
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8076845407485962,
      "learning_rate": 0.0002749764602031751,
      "loss": 2.899,
      "step": 121326
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8816568851470947,
      "learning_rate": 0.0002749723838982686,
      "loss": 2.9614,
      "step": 121327
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9010754823684692,
      "learning_rate": 0.0002749683075980152,
      "loss": 2.9035,
      "step": 121328
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1318867206573486,
      "learning_rate": 0.0002749642313024157,
      "loss": 2.9567,
      "step": 121329
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.973699927330017,
      "learning_rate": 0.0002749601550114709,
      "loss": 3.1647,
      "step": 121330
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8426944017410278,
      "learning_rate": 0.0002749560787251813,
      "loss": 2.8612,
      "step": 121331
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7912323474884033,
      "learning_rate": 0.00027495200244354793,
      "loss": 2.971,
      "step": 121332
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7619308233261108,
      "learning_rate": 0.0002749479261665714,
      "loss": 3.2177,
      "step": 121333
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8062899112701416,
      "learning_rate": 0.0002749438498942525,
      "loss": 3.012,
      "step": 121334
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.821077823638916,
      "learning_rate": 0.0002749397736265921,
      "loss": 3.0505,
      "step": 121335
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8488847017288208,
      "learning_rate": 0.0002749356973635909,
      "loss": 3.1914,
      "step": 121336
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.187788486480713,
      "learning_rate": 0.0002749316211052496,
      "loss": 2.7858,
      "step": 121337
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2280495166778564,
      "learning_rate": 0.00027492754485156887,
      "loss": 2.993,
      "step": 121338
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1352317333221436,
      "learning_rate": 0.00027492346860254966,
      "loss": 3.1533,
      "step": 121339
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.5650453567504883,
      "learning_rate": 0.00027491939235819264,
      "loss": 3.0655,
      "step": 121340
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.4144153594970703,
      "learning_rate": 0.00027491531611849854,
      "loss": 2.9556,
      "step": 121341
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.002394914627075,
      "learning_rate": 0.0002749112398834683,
      "loss": 3.1247,
      "step": 121342
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9253294467926025,
      "learning_rate": 0.0002749071636531025,
      "loss": 2.8555,
      "step": 121343
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2889251708984375,
      "learning_rate": 0.0002749030874274018,
      "loss": 3.0478,
      "step": 121344
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.267362594604492,
      "learning_rate": 0.00027489901120636716,
      "loss": 3.2057,
      "step": 121345
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7539784908294678,
      "learning_rate": 0.00027489493498999924,
      "loss": 2.9547,
      "step": 121346
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9970407485961914,
      "learning_rate": 0.00027489085877829887,
      "loss": 3.2755,
      "step": 121347
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0329582691192627,
      "learning_rate": 0.00027488678257126686,
      "loss": 3.069,
      "step": 121348
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.244774341583252,
      "learning_rate": 0.00027488270636890375,
      "loss": 2.9399,
      "step": 121349
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.5221666097640991,
      "learning_rate": 0.0002748786301712104,
      "loss": 2.9915,
      "step": 121350
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.905295729637146,
      "learning_rate": 0.0002748745539781876,
      "loss": 2.9323,
      "step": 121351
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8338029384613037,
      "learning_rate": 0.0002748704777898361,
      "loss": 3.0794,
      "step": 121352
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7351856231689453,
      "learning_rate": 0.0002748664016061566,
      "loss": 2.9786,
      "step": 121353
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.5634599924087524,
      "learning_rate": 0.0002748623254271501,
      "loss": 3.0802,
      "step": 121354
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.787144422531128,
      "learning_rate": 0.000274858249252817,
      "loss": 3.0929,
      "step": 121355
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0599400997161865,
      "learning_rate": 0.00027485417308315826,
      "loss": 2.919,
      "step": 121356
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1649723052978516,
      "learning_rate": 0.0002748500969181746,
      "loss": 3.2147,
      "step": 121357
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.239858388900757,
      "learning_rate": 0.00027484602075786676,
      "loss": 3.1245,
      "step": 121358
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.7407641410827637,
      "learning_rate": 0.0002748419446022356,
      "loss": 3.1271,
      "step": 121359
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9476730823516846,
      "learning_rate": 0.0002748378684512818,
      "loss": 2.8337,
      "step": 121360
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9401174783706665,
      "learning_rate": 0.00027483379230500604,
      "loss": 3.2758,
      "step": 121361
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9536124467849731,
      "learning_rate": 0.00027482971616340916,
      "loss": 3.0005,
      "step": 121362
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.744862675666809,
      "learning_rate": 0.0002748256400264919,
      "loss": 2.8935,
      "step": 121363
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9549014568328857,
      "learning_rate": 0.00027482156389425504,
      "loss": 2.7451,
      "step": 121364
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.787853479385376,
      "learning_rate": 0.0002748174877666993,
      "loss": 3.2022,
      "step": 121365
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7904720306396484,
      "learning_rate": 0.0002748134116438257,
      "loss": 3.0068,
      "step": 121366
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2712655067443848,
      "learning_rate": 0.0002748093355256345,
      "loss": 2.8578,
      "step": 121367
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.860685110092163,
      "learning_rate": 0.00027480525941212674,
      "loss": 3.2295,
      "step": 121368
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7818565368652344,
      "learning_rate": 0.00027480118330330323,
      "loss": 3.0138,
      "step": 121369
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1140503883361816,
      "learning_rate": 0.0002747971071991646,
      "loss": 2.6623,
      "step": 121370
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8422266244888306,
      "learning_rate": 0.0002747930310997117,
      "loss": 3.1693,
      "step": 121371
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0700812339782715,
      "learning_rate": 0.0002747889550049454,
      "loss": 2.906,
      "step": 121372
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.815104603767395,
      "learning_rate": 0.0002747848789148661,
      "loss": 3.3099,
      "step": 121373
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1366357803344727,
      "learning_rate": 0.0002747808028294748,
      "loss": 2.9994,
      "step": 121374
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8274972438812256,
      "learning_rate": 0.00027477672674877223,
      "loss": 3.0233,
      "step": 121375
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.937265157699585,
      "learning_rate": 0.00027477265067275917,
      "loss": 2.7504,
      "step": 121376
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.3272650241851807,
      "learning_rate": 0.0002747685746014363,
      "loss": 3.0349,
      "step": 121377
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.27710223197937,
      "learning_rate": 0.00027476449853480446,
      "loss": 2.915,
      "step": 121378
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.5015218257904053,
      "learning_rate": 0.0002747604224728645,
      "loss": 3.0525,
      "step": 121379
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.1076231002807617,
      "learning_rate": 0.0002747563464156169,
      "loss": 2.7456,
      "step": 121380
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.113112688064575,
      "learning_rate": 0.00027475227036306263,
      "loss": 3.0038,
      "step": 121381
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.5673067569732666,
      "learning_rate": 0.00027474819431520234,
      "loss": 3.0063,
      "step": 121382
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1512451171875,
      "learning_rate": 0.00027474411827203686,
      "loss": 2.9822,
      "step": 121383
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8039665222167969,
      "learning_rate": 0.0002747400422335669,
      "loss": 2.9828,
      "step": 121384
    },
    {
      "epoch": 1.58,
      "grad_norm": 4.115480422973633,
      "learning_rate": 0.0002747359661997934,
      "loss": 2.7749,
      "step": 121385
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.9840714931488037,
      "learning_rate": 0.0002747318901707168,
      "loss": 3.0673,
      "step": 121386
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.096407890319824,
      "learning_rate": 0.0002747278141463381,
      "loss": 3.0301,
      "step": 121387
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1815645694732666,
      "learning_rate": 0.0002747237381266578,
      "loss": 3.0205,
      "step": 121388
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2604897022247314,
      "learning_rate": 0.000274719662111677,
      "loss": 2.9903,
      "step": 121389
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.3101119995117188,
      "learning_rate": 0.00027471558610139623,
      "loss": 2.9545,
      "step": 121390
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.9050192832946777,
      "learning_rate": 0.0002747115100958164,
      "loss": 2.867,
      "step": 121391
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0208706855773926,
      "learning_rate": 0.0002747074340949382,
      "loss": 3.0748,
      "step": 121392
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.183706521987915,
      "learning_rate": 0.00027470335809876223,
      "loss": 3.0277,
      "step": 121393
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.5666303634643555,
      "learning_rate": 0.0002746992821072894,
      "loss": 3.1483,
      "step": 121394
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9794228076934814,
      "learning_rate": 0.0002746952061205205,
      "loss": 2.8612,
      "step": 121395
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.786750316619873,
      "learning_rate": 0.0002746911301384562,
      "loss": 2.781,
      "step": 121396
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.824352264404297,
      "learning_rate": 0.00027468705416109734,
      "loss": 3.0921,
      "step": 121397
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.359691858291626,
      "learning_rate": 0.00027468297818844463,
      "loss": 3.0658,
      "step": 121398
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.901408076286316,
      "learning_rate": 0.0002746789022204989,
      "loss": 2.8377,
      "step": 121399
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9271634817123413,
      "learning_rate": 0.0002746748262572607,
      "loss": 3.0174,
      "step": 121400
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.887482762336731,
      "learning_rate": 0.000274670750298731,
      "loss": 2.7566,
      "step": 121401
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.2807812690734863,
      "learning_rate": 0.00027466667434491044,
      "loss": 3.0989,
      "step": 121402
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.7901523113250732,
      "learning_rate": 0.0002746625983957999,
      "loss": 2.9366,
      "step": 121403
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0176916122436523,
      "learning_rate": 0.00027465852245139996,
      "loss": 3.0634,
      "step": 121404
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.762314796447754,
      "learning_rate": 0.00027465444651171155,
      "loss": 2.7483,
      "step": 121405
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.4578745365142822,
      "learning_rate": 0.0002746503705767353,
      "loss": 2.8054,
      "step": 121406
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.461913585662842,
      "learning_rate": 0.0002746462946464721,
      "loss": 2.9625,
      "step": 121407
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.923452615737915,
      "learning_rate": 0.0002746422187209226,
      "loss": 2.9235,
      "step": 121408
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.6693968772888184,
      "learning_rate": 0.00027463814280008763,
      "loss": 2.9633,
      "step": 121409
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8438425064086914,
      "learning_rate": 0.0002746340668839679,
      "loss": 3.2817,
      "step": 121410
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7769393920898438,
      "learning_rate": 0.0002746299909725641,
      "loss": 3.0543,
      "step": 121411
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9462356567382812,
      "learning_rate": 0.0002746259150658771,
      "loss": 3.1767,
      "step": 121412
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2166285514831543,
      "learning_rate": 0.0002746218391639076,
      "loss": 3.026,
      "step": 121413
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7228413820266724,
      "learning_rate": 0.0002746177632666564,
      "loss": 2.9265,
      "step": 121414
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.823782205581665,
      "learning_rate": 0.00027461368737412436,
      "loss": 2.9309,
      "step": 121415
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8879048824310303,
      "learning_rate": 0.00027460961148631193,
      "loss": 3.2177,
      "step": 121416
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8710085153579712,
      "learning_rate": 0.0002746055356032201,
      "loss": 2.9207,
      "step": 121417
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.3696413040161133,
      "learning_rate": 0.00027460145972484963,
      "loss": 3.1202,
      "step": 121418
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.6392204761505127,
      "learning_rate": 0.00027459738385120116,
      "loss": 2.917,
      "step": 121419
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.902298927307129,
      "learning_rate": 0.00027459330798227554,
      "loss": 2.9033,
      "step": 121420
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1436543464660645,
      "learning_rate": 0.0002745892321180736,
      "loss": 3.2124,
      "step": 121421
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.474421977996826,
      "learning_rate": 0.00027458515625859595,
      "loss": 2.9722,
      "step": 121422
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8615005016326904,
      "learning_rate": 0.00027458108040384334,
      "loss": 2.8143,
      "step": 121423
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.831699252128601,
      "learning_rate": 0.0002745770045538166,
      "loss": 2.9226,
      "step": 121424
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.7150964736938477,
      "learning_rate": 0.0002745729287085165,
      "loss": 3.0495,
      "step": 121425
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.122720956802368,
      "learning_rate": 0.0002745688528679437,
      "loss": 3.1389,
      "step": 121426
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.397026538848877,
      "learning_rate": 0.00027456477703209924,
      "loss": 3.0605,
      "step": 121427
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1806516647338867,
      "learning_rate": 0.0002745607012009835,
      "loss": 3.0392,
      "step": 121428
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.3263626098632812,
      "learning_rate": 0.0002745566253745974,
      "loss": 3.0896,
      "step": 121429
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.031048059463501,
      "learning_rate": 0.00027455254955294174,
      "loss": 2.8449,
      "step": 121430
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.315354585647583,
      "learning_rate": 0.00027454847373601716,
      "loss": 2.9058,
      "step": 121431
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.5303401947021484,
      "learning_rate": 0.0002745443979238246,
      "loss": 3.1234,
      "step": 121432
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.4298620223999023,
      "learning_rate": 0.0002745403221163648,
      "loss": 2.7879,
      "step": 121433
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.5831263065338135,
      "learning_rate": 0.0002745362463136383,
      "loss": 3.0562,
      "step": 121434
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.169278860092163,
      "learning_rate": 0.000274532170515646,
      "loss": 2.9645,
      "step": 121435
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1861870288848877,
      "learning_rate": 0.00027452809472238864,
      "loss": 3.1321,
      "step": 121436
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.263894557952881,
      "learning_rate": 0.000274524018933867,
      "loss": 2.7817,
      "step": 121437
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.2204723358154297,
      "learning_rate": 0.00027451994315008183,
      "loss": 3.0742,
      "step": 121438
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.1107752323150635,
      "learning_rate": 0.000274515867371034,
      "loss": 3.1792,
      "step": 121439
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.03279709815979,
      "learning_rate": 0.000274511791596724,
      "loss": 2.8675,
      "step": 121440
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0341711044311523,
      "learning_rate": 0.00027450771582715277,
      "loss": 2.8156,
      "step": 121441
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0479702949523926,
      "learning_rate": 0.00027450364006232107,
      "loss": 2.7157,
      "step": 121442
    },
    {
      "epoch": 1.58,
      "grad_norm": 4.8069987297058105,
      "learning_rate": 0.0002744995643022296,
      "loss": 2.7968,
      "step": 121443
    },
    {
      "epoch": 1.58,
      "grad_norm": 4.9629340171813965,
      "learning_rate": 0.0002744954885468791,
      "loss": 2.9041,
      "step": 121444
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.288076877593994,
      "learning_rate": 0.0002744914127962704,
      "loss": 2.8607,
      "step": 121445
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.949400544166565,
      "learning_rate": 0.00027448733705040437,
      "loss": 2.9055,
      "step": 121446
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2648508548736572,
      "learning_rate": 0.0002744832613092815,
      "loss": 2.9633,
      "step": 121447
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1259822845458984,
      "learning_rate": 0.00027447918557290264,
      "loss": 3.0089,
      "step": 121448
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.4880621433258057,
      "learning_rate": 0.0002744751098412686,
      "loss": 3.0743,
      "step": 121449
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.901168942451477,
      "learning_rate": 0.00027447103411438014,
      "loss": 2.8566,
      "step": 121450
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.251692771911621,
      "learning_rate": 0.0002744669583922379,
      "loss": 3.0004,
      "step": 121451
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8722072839736938,
      "learning_rate": 0.000274462882674843,
      "loss": 3.0803,
      "step": 121452
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.284778356552124,
      "learning_rate": 0.00027445880696219574,
      "loss": 2.968,
      "step": 121453
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7570682764053345,
      "learning_rate": 0.00027445473125429706,
      "loss": 2.9545,
      "step": 121454
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8620917797088623,
      "learning_rate": 0.00027445065555114774,
      "loss": 2.7059,
      "step": 121455
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8256639242172241,
      "learning_rate": 0.0002744465798527485,
      "loss": 2.815,
      "step": 121456
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1281120777130127,
      "learning_rate": 0.00027444250415910017,
      "loss": 3.1629,
      "step": 121457
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.6508443355560303,
      "learning_rate": 0.00027443842847020355,
      "loss": 3.1684,
      "step": 121458
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.761685848236084,
      "learning_rate": 0.0002744343527860592,
      "loss": 2.8931,
      "step": 121459
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8182761669158936,
      "learning_rate": 0.000274430277106668,
      "loss": 3.1124,
      "step": 121460
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.850876808166504,
      "learning_rate": 0.00027442620143203065,
      "loss": 2.8754,
      "step": 121461
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8795435428619385,
      "learning_rate": 0.000274422125762148,
      "loss": 3.1767,
      "step": 121462
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.0207631587982178,
      "learning_rate": 0.00027441805009702073,
      "loss": 2.9348,
      "step": 121463
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.7525126934051514,
      "learning_rate": 0.0002744139744366498,
      "loss": 2.8625,
      "step": 121464
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9898390769958496,
      "learning_rate": 0.00027440989878103563,
      "loss": 3.1768,
      "step": 121465
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9371602535247803,
      "learning_rate": 0.0002744058231301791,
      "loss": 3.1826,
      "step": 121466
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.186880111694336,
      "learning_rate": 0.00027440174748408105,
      "loss": 2.8434,
      "step": 121467
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7995587587356567,
      "learning_rate": 0.0002743976718427422,
      "loss": 3.295,
      "step": 121468
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.93899667263031,
      "learning_rate": 0.0002743935962061633,
      "loss": 3.2518,
      "step": 121469
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8430010080337524,
      "learning_rate": 0.0002743895205743453,
      "loss": 3.2375,
      "step": 121470
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.6745285987854004,
      "learning_rate": 0.0002743854449472886,
      "loss": 3.0432,
      "step": 121471
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.711760401725769,
      "learning_rate": 0.0002743813693249941,
      "loss": 2.8606,
      "step": 121472
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.051067352294922,
      "learning_rate": 0.0002743772937074626,
      "loss": 2.9082,
      "step": 121473
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.293433427810669,
      "learning_rate": 0.0002743732180946949,
      "loss": 2.888,
      "step": 121474
    },
    {
      "epoch": 1.58,
      "grad_norm": 4.46274995803833,
      "learning_rate": 0.00027436914248669166,
      "loss": 2.7367,
      "step": 121475
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.668088436126709,
      "learning_rate": 0.0002743650668834537,
      "loss": 2.6787,
      "step": 121476
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.598796844482422,
      "learning_rate": 0.00027436099128498186,
      "loss": 3.1557,
      "step": 121477
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.8929076194763184,
      "learning_rate": 0.00027435691569127667,
      "loss": 2.8367,
      "step": 121478
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.258759021759033,
      "learning_rate": 0.000274352840102339,
      "loss": 2.8939,
      "step": 121479
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.349705457687378,
      "learning_rate": 0.00027434876451816963,
      "loss": 2.8893,
      "step": 121480
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9244171380996704,
      "learning_rate": 0.0002743446889387693,
      "loss": 3.0247,
      "step": 121481
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7868123054504395,
      "learning_rate": 0.00027434061336413886,
      "loss": 2.8221,
      "step": 121482
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8960039615631104,
      "learning_rate": 0.0002743365377942789,
      "loss": 2.9632,
      "step": 121483
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8484784364700317,
      "learning_rate": 0.0002743324622291903,
      "loss": 3.0974,
      "step": 121484
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.5611684322357178,
      "learning_rate": 0.0002743283866688738,
      "loss": 2.8619,
      "step": 121485
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.19018292427063,
      "learning_rate": 0.0002743243111133301,
      "loss": 2.9509,
      "step": 121486
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9901833534240723,
      "learning_rate": 0.00027432023556256,
      "loss": 3.055,
      "step": 121487
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9711202383041382,
      "learning_rate": 0.0002743161600165643,
      "loss": 2.9354,
      "step": 121488
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.5512354373931885,
      "learning_rate": 0.0002743120844753437,
      "loss": 2.8913,
      "step": 121489
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7407680749893188,
      "learning_rate": 0.0002743080089388989,
      "loss": 2.8833,
      "step": 121490
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.844878911972046,
      "learning_rate": 0.0002743039334072308,
      "loss": 3.1209,
      "step": 121491
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0891196727752686,
      "learning_rate": 0.00027429985788034005,
      "loss": 3.1056,
      "step": 121492
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.6044107675552368,
      "learning_rate": 0.0002742957823582274,
      "loss": 2.9452,
      "step": 121493
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8712694644927979,
      "learning_rate": 0.0002742917068408938,
      "loss": 2.9247,
      "step": 121494
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2901973724365234,
      "learning_rate": 0.00027428763132833966,
      "loss": 2.7604,
      "step": 121495
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.805550217628479,
      "learning_rate": 0.00027428355582056606,
      "loss": 2.9168,
      "step": 121496
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9505420923233032,
      "learning_rate": 0.0002742794803175736,
      "loss": 2.8998,
      "step": 121497
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7787878513336182,
      "learning_rate": 0.000274275404819363,
      "loss": 2.8066,
      "step": 121498
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.996999740600586,
      "learning_rate": 0.0002742713293259352,
      "loss": 2.9622,
      "step": 121499
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2570528984069824,
      "learning_rate": 0.00027426725383729084,
      "loss": 3.018,
      "step": 121500
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1169846057891846,
      "learning_rate": 0.0002742631783534306,
      "loss": 2.9954,
      "step": 121501
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7422678470611572,
      "learning_rate": 0.00027425910287435536,
      "loss": 2.9775,
      "step": 121502
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8013930320739746,
      "learning_rate": 0.00027425502740006583,
      "loss": 2.9658,
      "step": 121503
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.374704360961914,
      "learning_rate": 0.0002742509519305628,
      "loss": 3.0429,
      "step": 121504
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.031324863433838,
      "learning_rate": 0.00027424687646584696,
      "loss": 3.275,
      "step": 121505
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0227699279785156,
      "learning_rate": 0.00027424280100591926,
      "loss": 2.8687,
      "step": 121506
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9313480854034424,
      "learning_rate": 0.00027423872555078015,
      "loss": 2.9167,
      "step": 121507
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.9540727138519287,
      "learning_rate": 0.0002742346501004306,
      "loss": 2.9325,
      "step": 121508
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1009445190429688,
      "learning_rate": 0.00027423057465487125,
      "loss": 2.9709,
      "step": 121509
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9317721128463745,
      "learning_rate": 0.00027422649921410297,
      "loss": 2.9139,
      "step": 121510
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.4051554203033447,
      "learning_rate": 0.0002742224237781265,
      "loss": 2.9259,
      "step": 121511
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9143178462982178,
      "learning_rate": 0.0002742183483469425,
      "loss": 3.0949,
      "step": 121512
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.589775800704956,
      "learning_rate": 0.000274214272920552,
      "loss": 2.9182,
      "step": 121513
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.6680468320846558,
      "learning_rate": 0.00027421019749895533,
      "loss": 2.8716,
      "step": 121514
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9707735776901245,
      "learning_rate": 0.0002742061220821535,
      "loss": 2.9403,
      "step": 121515
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7634406089782715,
      "learning_rate": 0.00027420204667014727,
      "loss": 3.0386,
      "step": 121516
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9716030359268188,
      "learning_rate": 0.0002741979712629374,
      "loss": 3.0205,
      "step": 121517
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0091023445129395,
      "learning_rate": 0.00027419389586052454,
      "loss": 2.8975,
      "step": 121518
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.861256718635559,
      "learning_rate": 0.0002741898204629097,
      "loss": 3.0145,
      "step": 121519
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8492852449417114,
      "learning_rate": 0.0002741857450700933,
      "loss": 3.1736,
      "step": 121520
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.3400633335113525,
      "learning_rate": 0.00027418166968207626,
      "loss": 2.5608,
      "step": 121521
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.5231564044952393,
      "learning_rate": 0.0002741775942988593,
      "loss": 3.1269,
      "step": 121522
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0202832221984863,
      "learning_rate": 0.0002741735189204433,
      "loss": 2.7994,
      "step": 121523
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8942527770996094,
      "learning_rate": 0.0002741694435468289,
      "loss": 3.1253,
      "step": 121524
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.101853370666504,
      "learning_rate": 0.000274165368178017,
      "loss": 2.9454,
      "step": 121525
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.516254186630249,
      "learning_rate": 0.0002741612928140081,
      "loss": 2.9745,
      "step": 121526
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9368605613708496,
      "learning_rate": 0.00027415721745480316,
      "loss": 2.8526,
      "step": 121527
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9949666261672974,
      "learning_rate": 0.0002741531421004028,
      "loss": 2.9978,
      "step": 121528
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9612265825271606,
      "learning_rate": 0.0002741490667508079,
      "loss": 2.9244,
      "step": 121529
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8588683605194092,
      "learning_rate": 0.0002741449914060192,
      "loss": 2.6966,
      "step": 121530
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.778503179550171,
      "learning_rate": 0.0002741409160660375,
      "loss": 2.9871,
      "step": 121531
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.7803735733032227,
      "learning_rate": 0.0002741368407308634,
      "loss": 2.7346,
      "step": 121532
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0533931255340576,
      "learning_rate": 0.00027413276540049777,
      "loss": 3.085,
      "step": 121533
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.882457971572876,
      "learning_rate": 0.0002741286900749413,
      "loss": 2.741,
      "step": 121534
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9213557243347168,
      "learning_rate": 0.00027412461475419486,
      "loss": 3.195,
      "step": 121535
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.5645811557769775,
      "learning_rate": 0.00027412053943825913,
      "loss": 2.8543,
      "step": 121536
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.153850793838501,
      "learning_rate": 0.000274116464127135,
      "loss": 3.062,
      "step": 121537
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.178490400314331,
      "learning_rate": 0.00027411238882082294,
      "loss": 3.0822,
      "step": 121538
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7653383016586304,
      "learning_rate": 0.0002741083135193239,
      "loss": 2.8176,
      "step": 121539
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8852628469467163,
      "learning_rate": 0.0002741042382226386,
      "loss": 2.8117,
      "step": 121540
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1105713844299316,
      "learning_rate": 0.00027410016293076783,
      "loss": 3.1195,
      "step": 121541
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0927677154541016,
      "learning_rate": 0.00027409608764371237,
      "loss": 3.2896,
      "step": 121542
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0294382572174072,
      "learning_rate": 0.00027409201236147304,
      "loss": 3.0938,
      "step": 121543
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.0053765773773193,
      "learning_rate": 0.0002740879370840503,
      "loss": 3.1843,
      "step": 121544
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0395126342773438,
      "learning_rate": 0.0002740838618114452,
      "loss": 2.9384,
      "step": 121545
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7515058517456055,
      "learning_rate": 0.0002740797865436583,
      "loss": 2.9451,
      "step": 121546
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.176208257675171,
      "learning_rate": 0.0002740757112806905,
      "loss": 2.9968,
      "step": 121547
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.3904104232788086,
      "learning_rate": 0.00027407163602254255,
      "loss": 3.1075,
      "step": 121548
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.788242816925049,
      "learning_rate": 0.00027406756076921526,
      "loss": 3.0691,
      "step": 121549
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.255969524383545,
      "learning_rate": 0.00027406348552070915,
      "loss": 3.1403,
      "step": 121550
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.9678001403808594,
      "learning_rate": 0.0002740594102770252,
      "loss": 3.064,
      "step": 121551
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.980222702026367,
      "learning_rate": 0.00027405533503816403,
      "loss": 2.9226,
      "step": 121552
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.573554754257202,
      "learning_rate": 0.0002740512598041265,
      "loss": 3.3367,
      "step": 121553
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2274391651153564,
      "learning_rate": 0.0002740471845749133,
      "loss": 3.2637,
      "step": 121554
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7770164012908936,
      "learning_rate": 0.00027404310935052534,
      "loss": 2.9128,
      "step": 121555
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.346435070037842,
      "learning_rate": 0.0002740390341309632,
      "loss": 3.0666,
      "step": 121556
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.4532392024993896,
      "learning_rate": 0.0002740349589162276,
      "loss": 3.2035,
      "step": 121557
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.9814810752868652,
      "learning_rate": 0.00027403088370631947,
      "loss": 2.9205,
      "step": 121558
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9478118419647217,
      "learning_rate": 0.00027402680850123945,
      "loss": 3.0331,
      "step": 121559
    },
    {
      "epoch": 1.58,
      "grad_norm": 4.9862165451049805,
      "learning_rate": 0.00027402273330098836,
      "loss": 3.0296,
      "step": 121560
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.86885666847229,
      "learning_rate": 0.000274018658105567,
      "loss": 3.1732,
      "step": 121561
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.1144535541534424,
      "learning_rate": 0.000274014582914976,
      "loss": 2.948,
      "step": 121562
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2368452548980713,
      "learning_rate": 0.0002740105077292162,
      "loss": 2.9733,
      "step": 121563
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.5416531562805176,
      "learning_rate": 0.0002740064325482883,
      "loss": 3.1645,
      "step": 121564
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.9380874633789062,
      "learning_rate": 0.00027400235737219306,
      "loss": 3.1042,
      "step": 121565
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.646926164627075,
      "learning_rate": 0.0002739982822009313,
      "loss": 2.923,
      "step": 121566
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.2467193603515625,
      "learning_rate": 0.0002739942070345038,
      "loss": 3.25,
      "step": 121567
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1388580799102783,
      "learning_rate": 0.00027399013187291124,
      "loss": 3.1299,
      "step": 121568
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2311599254608154,
      "learning_rate": 0.00027398605671615444,
      "loss": 3.0739,
      "step": 121569
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.9210615158081055,
      "learning_rate": 0.0002739819815642341,
      "loss": 3.0117,
      "step": 121570
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.385011911392212,
      "learning_rate": 0.00027397790641715093,
      "loss": 3.0248,
      "step": 121571
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.313357353210449,
      "learning_rate": 0.0002739738312749058,
      "loss": 2.8625,
      "step": 121572
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.751550555229187,
      "learning_rate": 0.0002739697561374995,
      "loss": 2.7795,
      "step": 121573
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.088998794555664,
      "learning_rate": 0.0002739656810049326,
      "loss": 2.995,
      "step": 121574
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.844757080078125,
      "learning_rate": 0.000273961605877206,
      "loss": 2.9626,
      "step": 121575
    },
    {
      "epoch": 1.58,
      "grad_norm": 4.529167652130127,
      "learning_rate": 0.00027395753075432056,
      "loss": 3.286,
      "step": 121576
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.8135735988616943,
      "learning_rate": 0.00027395345563627677,
      "loss": 2.9193,
      "step": 121577
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7121813297271729,
      "learning_rate": 0.0002739493805230755,
      "loss": 2.9109,
      "step": 121578
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.6096951961517334,
      "learning_rate": 0.0002739453054147176,
      "loss": 3.1209,
      "step": 121579
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.700850486755371,
      "learning_rate": 0.0002739412303112038,
      "loss": 3.0782,
      "step": 121580
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.8060195446014404,
      "learning_rate": 0.00027393715521253475,
      "loss": 3.4619,
      "step": 121581
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.9240713119506836,
      "learning_rate": 0.0002739330801187113,
      "loss": 3.0075,
      "step": 121582
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.760358214378357,
      "learning_rate": 0.0002739290050297342,
      "loss": 3.1019,
      "step": 121583
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.871544599533081,
      "learning_rate": 0.0002739249299456041,
      "loss": 2.8912,
      "step": 121584
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9665089845657349,
      "learning_rate": 0.0002739208548663219,
      "loss": 3.0154,
      "step": 121585
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.567365884780884,
      "learning_rate": 0.00027391677979188837,
      "loss": 3.0627,
      "step": 121586
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7231601476669312,
      "learning_rate": 0.00027391270472230413,
      "loss": 2.8831,
      "step": 121587
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8728071451187134,
      "learning_rate": 0.00027390862965757,
      "loss": 3.0025,
      "step": 121588
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.3165316581726074,
      "learning_rate": 0.0002739045545976868,
      "loss": 2.844,
      "step": 121589
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2324869632720947,
      "learning_rate": 0.00027390047954265525,
      "loss": 3.2184,
      "step": 121590
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.212965488433838,
      "learning_rate": 0.00027389640449247606,
      "loss": 2.9967,
      "step": 121591
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.052243709564209,
      "learning_rate": 0.00027389232944715003,
      "loss": 3.1423,
      "step": 121592
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.29156231880188,
      "learning_rate": 0.0002738882544066779,
      "loss": 3.2851,
      "step": 121593
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.555071473121643,
      "learning_rate": 0.00027388417937106044,
      "loss": 2.9613,
      "step": 121594
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.788852572441101,
      "learning_rate": 0.00027388010434029834,
      "loss": 3.076,
      "step": 121595
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.9656033515930176,
      "learning_rate": 0.0002738760293143925,
      "loss": 2.8607,
      "step": 121596
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.860238790512085,
      "learning_rate": 0.00027387195429334353,
      "loss": 2.8658,
      "step": 121597
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.3107361793518066,
      "learning_rate": 0.0002738678792771524,
      "loss": 2.8913,
      "step": 121598
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9817724227905273,
      "learning_rate": 0.00027386380426581964,
      "loss": 2.925,
      "step": 121599
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.204129934310913,
      "learning_rate": 0.00027385972925934605,
      "loss": 2.9338,
      "step": 121600
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8563449382781982,
      "learning_rate": 0.00027385565425773245,
      "loss": 3.0428,
      "step": 121601
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9443467855453491,
      "learning_rate": 0.0002738515792609796,
      "loss": 2.9359,
      "step": 121602
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8842029571533203,
      "learning_rate": 0.00027384750426908815,
      "loss": 2.9924,
      "step": 121603
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.189237117767334,
      "learning_rate": 0.00027384342928205913,
      "loss": 2.9017,
      "step": 121604
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7668975591659546,
      "learning_rate": 0.00027383935429989303,
      "loss": 3.1464,
      "step": 121605
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2758240699768066,
      "learning_rate": 0.0002738352793225906,
      "loss": 3.0506,
      "step": 121606
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.18607759475708,
      "learning_rate": 0.0002738312043501527,
      "loss": 2.8724,
      "step": 121607
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8168026208877563,
      "learning_rate": 0.00027382712938258015,
      "loss": 3.049,
      "step": 121608
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7049098014831543,
      "learning_rate": 0.0002738230544198736,
      "loss": 2.7458,
      "step": 121609
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1859183311462402,
      "learning_rate": 0.0002738189794620339,
      "loss": 2.9918,
      "step": 121610
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.752198576927185,
      "learning_rate": 0.00027381490450906166,
      "loss": 3.0106,
      "step": 121611
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.88229238986969,
      "learning_rate": 0.0002738108295609577,
      "loss": 2.7009,
      "step": 121612
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9392317533493042,
      "learning_rate": 0.00027380675461772283,
      "loss": 2.9112,
      "step": 121613
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.094712018966675,
      "learning_rate": 0.0002738026796793578,
      "loss": 2.9034,
      "step": 121614
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.139756202697754,
      "learning_rate": 0.0002737986047458633,
      "loss": 3.0731,
      "step": 121615
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9725874662399292,
      "learning_rate": 0.0002737945298172403,
      "loss": 2.9469,
      "step": 121616
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9230314493179321,
      "learning_rate": 0.00027379045489348924,
      "loss": 3.2236,
      "step": 121617
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.295290231704712,
      "learning_rate": 0.000273786379974611,
      "loss": 2.9027,
      "step": 121618
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0131983757019043,
      "learning_rate": 0.0002737823050606064,
      "loss": 2.8712,
      "step": 121619
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7460496425628662,
      "learning_rate": 0.0002737782301514762,
      "loss": 3.0075,
      "step": 121620
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.403716564178467,
      "learning_rate": 0.00027377415524722107,
      "loss": 2.9692,
      "step": 121621
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8390755653381348,
      "learning_rate": 0.000273770080347842,
      "loss": 3.0295,
      "step": 121622
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.6839404106140137,
      "learning_rate": 0.0002737660054533394,
      "loss": 2.9231,
      "step": 121623
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1810011863708496,
      "learning_rate": 0.0002737619305637142,
      "loss": 2.9146,
      "step": 121624
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.970160961151123,
      "learning_rate": 0.00027375785567896715,
      "loss": 3.0267,
      "step": 121625
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8891044855117798,
      "learning_rate": 0.000273753780799099,
      "loss": 2.8852,
      "step": 121626
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8287135362625122,
      "learning_rate": 0.0002737497059241105,
      "loss": 3.071,
      "step": 121627
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8084639310836792,
      "learning_rate": 0.0002737456310540026,
      "loss": 3.016,
      "step": 121628
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.3633036613464355,
      "learning_rate": 0.00027374155618877573,
      "loss": 3.0251,
      "step": 121629
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2294087409973145,
      "learning_rate": 0.0002737374813284308,
      "loss": 3.0925,
      "step": 121630
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.328461170196533,
      "learning_rate": 0.0002737334064729686,
      "loss": 2.9941,
      "step": 121631
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.214629650115967,
      "learning_rate": 0.0002737293316223898,
      "loss": 2.7865,
      "step": 121632
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.3470003604888916,
      "learning_rate": 0.0002737252567766953,
      "loss": 2.9642,
      "step": 121633
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.8284993171691895,
      "learning_rate": 0.00027372118193588584,
      "loss": 3.0288,
      "step": 121634
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.677783489227295,
      "learning_rate": 0.00027371710709996196,
      "loss": 3.1191,
      "step": 121635
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.601459264755249,
      "learning_rate": 0.0002737130322689246,
      "loss": 3.0869,
      "step": 121636
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.6799960136413574,
      "learning_rate": 0.0002737089574427745,
      "loss": 3.1104,
      "step": 121637
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.5302236080169678,
      "learning_rate": 0.00027370488262151233,
      "loss": 3.147,
      "step": 121638
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.853331208229065,
      "learning_rate": 0.000273700807805139,
      "loss": 3.109,
      "step": 121639
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.6576287746429443,
      "learning_rate": 0.0002736967329936553,
      "loss": 2.7362,
      "step": 121640
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.011552095413208,
      "learning_rate": 0.0002736926581870617,
      "loss": 3.0039,
      "step": 121641
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.890761375427246,
      "learning_rate": 0.00027368858338535913,
      "loss": 2.8288,
      "step": 121642
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.192295789718628,
      "learning_rate": 0.0002736845085885484,
      "loss": 3.2615,
      "step": 121643
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.8114514350891113,
      "learning_rate": 0.0002736804337966302,
      "loss": 3.2275,
      "step": 121644
    },
    {
      "epoch": 1.58,
      "grad_norm": 4.406466007232666,
      "learning_rate": 0.0002736763590096053,
      "loss": 3.0338,
      "step": 121645
    },
    {
      "epoch": 1.58,
      "grad_norm": 4.104698181152344,
      "learning_rate": 0.00027367228422747443,
      "loss": 2.9354,
      "step": 121646
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.7436859607696533,
      "learning_rate": 0.00027366820945023853,
      "loss": 3.0466,
      "step": 121647
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8713468313217163,
      "learning_rate": 0.0002736641346778981,
      "loss": 3.0799,
      "step": 121648
    },
    {
      "epoch": 1.58,
      "grad_norm": 5.609600067138672,
      "learning_rate": 0.00027366005991045394,
      "loss": 3.0947,
      "step": 121649
    },
    {
      "epoch": 1.58,
      "grad_norm": 5.192781925201416,
      "learning_rate": 0.0002736559851479069,
      "loss": 3.0046,
      "step": 121650
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.382655143737793,
      "learning_rate": 0.0002736519103902578,
      "loss": 2.8618,
      "step": 121651
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0565247535705566,
      "learning_rate": 0.0002736478356375072,
      "loss": 3.1299,
      "step": 121652
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.6845741271972656,
      "learning_rate": 0.00027364376088965614,
      "loss": 2.8992,
      "step": 121653
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.6340725421905518,
      "learning_rate": 0.00027363968614670505,
      "loss": 3.0477,
      "step": 121654
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0051236152648926,
      "learning_rate": 0.00027363561140865487,
      "loss": 3.0194,
      "step": 121655
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.093132972717285,
      "learning_rate": 0.00027363153667550626,
      "loss": 2.7227,
      "step": 121656
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1347594261169434,
      "learning_rate": 0.00027362746194726015,
      "loss": 3.0217,
      "step": 121657
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.172619581222534,
      "learning_rate": 0.0002736233872239171,
      "loss": 2.528,
      "step": 121658
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9988751411437988,
      "learning_rate": 0.00027361931250547805,
      "loss": 2.9895,
      "step": 121659
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8690314292907715,
      "learning_rate": 0.00027361523779194364,
      "loss": 3.131,
      "step": 121660
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.4015395641326904,
      "learning_rate": 0.0002736111630833147,
      "loss": 3.0832,
      "step": 121661
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.765080451965332,
      "learning_rate": 0.0002736070883795919,
      "loss": 2.8306,
      "step": 121662
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0272727012634277,
      "learning_rate": 0.000273603013680776,
      "loss": 3.0777,
      "step": 121663
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.210054874420166,
      "learning_rate": 0.0002735989389868678,
      "loss": 2.8681,
      "step": 121664
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8643770217895508,
      "learning_rate": 0.00027359486429786813,
      "loss": 3.0957,
      "step": 121665
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2365646362304688,
      "learning_rate": 0.0002735907896137776,
      "loss": 2.8292,
      "step": 121666
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.7345432043075562,
      "learning_rate": 0.00027358671493459707,
      "loss": 2.806,
      "step": 121667
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.728847861289978,
      "learning_rate": 0.0002735826402603273,
      "loss": 2.839,
      "step": 121668
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.087958335876465,
      "learning_rate": 0.000273578565590969,
      "loss": 2.8977,
      "step": 121669
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1024954319000244,
      "learning_rate": 0.00027357449092652286,
      "loss": 2.8654,
      "step": 121670
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0088930130004883,
      "learning_rate": 0.00027357041626698984,
      "loss": 3.023,
      "step": 121671
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.646446704864502,
      "learning_rate": 0.00027356634161237054,
      "loss": 2.8877,
      "step": 121672
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9834433794021606,
      "learning_rate": 0.0002735622669626657,
      "loss": 3.003,
      "step": 121673
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9256837368011475,
      "learning_rate": 0.00027355819231787617,
      "loss": 3.067,
      "step": 121674
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.3754770755767822,
      "learning_rate": 0.0002735541176780027,
      "loss": 3.2506,
      "step": 121675
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.456594944000244,
      "learning_rate": 0.000273550043043046,
      "loss": 2.9425,
      "step": 121676
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.6217970848083496,
      "learning_rate": 0.0002735459684130069,
      "loss": 2.827,
      "step": 121677
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0193488597869873,
      "learning_rate": 0.000273541893787886,
      "loss": 3.0901,
      "step": 121678
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0794498920440674,
      "learning_rate": 0.0002735378191676842,
      "loss": 3.0224,
      "step": 121679
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.107801914215088,
      "learning_rate": 0.0002735337445524022,
      "loss": 2.9633,
      "step": 121680
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0191569328308105,
      "learning_rate": 0.0002735296699420408,
      "loss": 3.0185,
      "step": 121681
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.80143141746521,
      "learning_rate": 0.0002735255953366008,
      "loss": 3.0777,
      "step": 121682
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.7596869468688965,
      "learning_rate": 0.0002735215207360829,
      "loss": 2.9298,
      "step": 121683
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.2807607650756836,
      "learning_rate": 0.0002735174461404877,
      "loss": 2.9439,
      "step": 121684
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.330819606781006,
      "learning_rate": 0.0002735133715498162,
      "loss": 2.9703,
      "step": 121685
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.349548578262329,
      "learning_rate": 0.000273509296964069,
      "loss": 2.7752,
      "step": 121686
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.0637755393981934,
      "learning_rate": 0.00027350522238324697,
      "loss": 3.0105,
      "step": 121687
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.153984308242798,
      "learning_rate": 0.00027350114780735083,
      "loss": 2.8907,
      "step": 121688
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0199265480041504,
      "learning_rate": 0.0002734970732363814,
      "loss": 3.0322,
      "step": 121689
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9981173276901245,
      "learning_rate": 0.00027349299867033926,
      "loss": 3.051,
      "step": 121690
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.2145142555236816,
      "learning_rate": 0.0002734889241092253,
      "loss": 2.9237,
      "step": 121691
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.08382511138916,
      "learning_rate": 0.0002734848495530402,
      "loss": 2.9572,
      "step": 121692
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2292935848236084,
      "learning_rate": 0.0002734807750017848,
      "loss": 3.2042,
      "step": 121693
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.4772653579711914,
      "learning_rate": 0.00027347670045545984,
      "loss": 3.0018,
      "step": 121694
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9933398962020874,
      "learning_rate": 0.00027347262591406616,
      "loss": 2.9859,
      "step": 121695
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2197043895721436,
      "learning_rate": 0.0002734685513776043,
      "loss": 3.0042,
      "step": 121696
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.174649715423584,
      "learning_rate": 0.00027346447684607517,
      "loss": 3.1274,
      "step": 121697
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.8213629722595215,
      "learning_rate": 0.00027346040231947944,
      "loss": 2.9207,
      "step": 121698
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.106248617172241,
      "learning_rate": 0.0002734563277978179,
      "loss": 2.7908,
      "step": 121699
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.211329221725464,
      "learning_rate": 0.0002734522532810914,
      "loss": 2.9169,
      "step": 121700
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.4127204418182373,
      "learning_rate": 0.0002734481787693007,
      "loss": 2.9954,
      "step": 121701
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.01009202003479,
      "learning_rate": 0.0002734441042624464,
      "loss": 3.1642,
      "step": 121702
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2785956859588623,
      "learning_rate": 0.00027344002976052934,
      "loss": 3.1256,
      "step": 121703
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.1103529930114746,
      "learning_rate": 0.00027343595526355027,
      "loss": 3.0181,
      "step": 121704
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.6331443786621094,
      "learning_rate": 0.00027343188077150996,
      "loss": 3.1234,
      "step": 121705
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9713081121444702,
      "learning_rate": 0.0002734278062844091,
      "loss": 2.9596,
      "step": 121706
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0309524536132812,
      "learning_rate": 0.00027342373180224875,
      "loss": 3.088,
      "step": 121707
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.989659547805786,
      "learning_rate": 0.00027341965732502927,
      "loss": 2.9617,
      "step": 121708
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.3170855045318604,
      "learning_rate": 0.0002734155828527515,
      "loss": 2.9585,
      "step": 121709
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.437937021255493,
      "learning_rate": 0.0002734115083854163,
      "loss": 2.971,
      "step": 121710
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.734410524368286,
      "learning_rate": 0.0002734074339230245,
      "loss": 3.0043,
      "step": 121711
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.9408981800079346,
      "learning_rate": 0.0002734033594655767,
      "loss": 2.9746,
      "step": 121712
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.6275224685668945,
      "learning_rate": 0.0002733992850130737,
      "loss": 3.1596,
      "step": 121713
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9651751518249512,
      "learning_rate": 0.0002733952105655164,
      "loss": 2.9525,
      "step": 121714
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.9362971782684326,
      "learning_rate": 0.0002733911361229053,
      "loss": 2.9297,
      "step": 121715
    },
    {
      "epoch": 1.58,
      "grad_norm": 3.6168811321258545,
      "learning_rate": 0.0002733870616852413,
      "loss": 2.9668,
      "step": 121716
    },
    {
      "epoch": 1.58,
      "grad_norm": 4.163930416107178,
      "learning_rate": 0.00027338298725252525,
      "loss": 3.0592,
      "step": 121717
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.211247444152832,
      "learning_rate": 0.0002733789128247577,
      "loss": 2.9266,
      "step": 121718
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.4240217208862305,
      "learning_rate": 0.0002733748384019395,
      "loss": 3.2555,
      "step": 121719
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.627166509628296,
      "learning_rate": 0.0002733707639840716,
      "loss": 3.1944,
      "step": 121720
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.849013566970825,
      "learning_rate": 0.0002733666895711544,
      "loss": 2.6838,
      "step": 121721
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2524263858795166,
      "learning_rate": 0.00027336261516318886,
      "loss": 2.9494,
      "step": 121722
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9889411926269531,
      "learning_rate": 0.0002733585407601757,
      "loss": 2.9828,
      "step": 121723
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.5391464233398438,
      "learning_rate": 0.0002733544663621157,
      "loss": 2.8831,
      "step": 121724
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.0766966342926025,
      "learning_rate": 0.0002733503919690096,
      "loss": 2.9899,
      "step": 121725
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.31099796295166,
      "learning_rate": 0.0002733463175808583,
      "loss": 2.9373,
      "step": 121726
    },
    {
      "epoch": 1.58,
      "grad_norm": 2.2350919246673584,
      "learning_rate": 0.00027334224319766235,
      "loss": 3.0364,
      "step": 121727
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.908385992050171,
      "learning_rate": 0.0002733381688194225,
      "loss": 3.0213,
      "step": 121728
    },
    {
      "epoch": 1.58,
      "grad_norm": 1.9211143255233765,
      "learning_rate": 0.0002733340944461396,
      "loss": 3.0007,
      "step": 121729
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.983619451522827,
      "learning_rate": 0.0002733300200778144,
      "loss": 2.826,
      "step": 121730
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1706464290618896,
      "learning_rate": 0.00027332594571444766,
      "loss": 3.1505,
      "step": 121731
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4699599742889404,
      "learning_rate": 0.00027332187135604025,
      "loss": 3.2403,
      "step": 121732
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.269325017929077,
      "learning_rate": 0.00027331779700259275,
      "loss": 3.1228,
      "step": 121733
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.4480576515197754,
      "learning_rate": 0.0002733137226541059,
      "loss": 2.92,
      "step": 121734
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.926973342895508,
      "learning_rate": 0.0002733096483105805,
      "loss": 3.0698,
      "step": 121735
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.122720956802368,
      "learning_rate": 0.00027330557397201743,
      "loss": 2.9518,
      "step": 121736
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7115062475204468,
      "learning_rate": 0.0002733014996384173,
      "loss": 2.9932,
      "step": 121737
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.527297258377075,
      "learning_rate": 0.0002732974253097811,
      "loss": 3.0531,
      "step": 121738
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.7587778568267822,
      "learning_rate": 0.00027329335098610924,
      "loss": 3.1622,
      "step": 121739
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.696577787399292,
      "learning_rate": 0.00027328927666740266,
      "loss": 3.1654,
      "step": 121740
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.807729959487915,
      "learning_rate": 0.0002732852023536621,
      "loss": 2.9237,
      "step": 121741
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.234412431716919,
      "learning_rate": 0.00027328112804488833,
      "loss": 3.0654,
      "step": 121742
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2692742347717285,
      "learning_rate": 0.00027327705374108216,
      "loss": 2.9617,
      "step": 121743
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.082918882369995,
      "learning_rate": 0.0002732729794422443,
      "loss": 3.234,
      "step": 121744
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8925564289093018,
      "learning_rate": 0.0002732689051483755,
      "loss": 3.1004,
      "step": 121745
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.324645519256592,
      "learning_rate": 0.00027326483085947644,
      "loss": 2.681,
      "step": 121746
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0099380016326904,
      "learning_rate": 0.000273260756575548,
      "loss": 2.9585,
      "step": 121747
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.791203737258911,
      "learning_rate": 0.00027325668229659085,
      "loss": 2.7535,
      "step": 121748
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3606812953948975,
      "learning_rate": 0.00027325260802260577,
      "loss": 3.0164,
      "step": 121749
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.872357726097107,
      "learning_rate": 0.0002732485337535936,
      "loss": 3.1166,
      "step": 121750
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2231454849243164,
      "learning_rate": 0.00027324445948955504,
      "loss": 3.1036,
      "step": 121751
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.943952798843384,
      "learning_rate": 0.00027324038523049085,
      "loss": 3.162,
      "step": 121752
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.975043535232544,
      "learning_rate": 0.0002732363109764017,
      "loss": 3.3629,
      "step": 121753
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0664665699005127,
      "learning_rate": 0.0002732322367272884,
      "loss": 3.0285,
      "step": 121754
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2116260528564453,
      "learning_rate": 0.0002732281624831518,
      "loss": 2.8663,
      "step": 121755
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1760008335113525,
      "learning_rate": 0.00027322408824399265,
      "loss": 3.0442,
      "step": 121756
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.336320638656616,
      "learning_rate": 0.00027322001400981156,
      "loss": 2.9223,
      "step": 121757
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3216745853424072,
      "learning_rate": 0.00027321593978060934,
      "loss": 3.2841,
      "step": 121758
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.241326093673706,
      "learning_rate": 0.00027321186555638686,
      "loss": 3.1367,
      "step": 121759
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.769927740097046,
      "learning_rate": 0.0002732077913371448,
      "loss": 3.0091,
      "step": 121760
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.755235195159912,
      "learning_rate": 0.0002732037171228839,
      "loss": 2.7512,
      "step": 121761
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9909734725952148,
      "learning_rate": 0.00027319964291360496,
      "loss": 3.0371,
      "step": 121762
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8430677652359009,
      "learning_rate": 0.00027319556870930865,
      "loss": 2.9788,
      "step": 121763
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.801418423652649,
      "learning_rate": 0.0002731914945099958,
      "loss": 3.0349,
      "step": 121764
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0956034660339355,
      "learning_rate": 0.0002731874203156672,
      "loss": 3.0407,
      "step": 121765
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8342362642288208,
      "learning_rate": 0.00027318334612632354,
      "loss": 3.1439,
      "step": 121766
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.157332181930542,
      "learning_rate": 0.00027317927194196566,
      "loss": 3.0639,
      "step": 121767
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9602121114730835,
      "learning_rate": 0.00027317519776259424,
      "loss": 2.8809,
      "step": 121768
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5831034183502197,
      "learning_rate": 0.00027317112358821,
      "loss": 2.8339,
      "step": 121769
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.793969750404358,
      "learning_rate": 0.00027316704941881376,
      "loss": 3.2429,
      "step": 121770
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.169659376144409,
      "learning_rate": 0.00027316297525440623,
      "loss": 3.0104,
      "step": 121771
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0676591396331787,
      "learning_rate": 0.0002731589010949883,
      "loss": 3.021,
      "step": 121772
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2984440326690674,
      "learning_rate": 0.0002731548269405606,
      "loss": 2.8923,
      "step": 121773
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9865226745605469,
      "learning_rate": 0.0002731507527911239,
      "loss": 3.0176,
      "step": 121774
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7212175130844116,
      "learning_rate": 0.0002731466786466791,
      "loss": 3.0571,
      "step": 121775
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1193013191223145,
      "learning_rate": 0.00027314260450722675,
      "loss": 3.0294,
      "step": 121776
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.517646074295044,
      "learning_rate": 0.00027313853037276765,
      "loss": 2.7033,
      "step": 121777
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.154914140701294,
      "learning_rate": 0.00027313445624330265,
      "loss": 3.0386,
      "step": 121778
    },
    {
      "epoch": 1.59,
      "grad_norm": 4.101492881774902,
      "learning_rate": 0.00027313038211883246,
      "loss": 2.9258,
      "step": 121779
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.048149824142456,
      "learning_rate": 0.00027312630799935784,
      "loss": 3.1115,
      "step": 121780
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.059028148651123,
      "learning_rate": 0.00027312223388487966,
      "loss": 2.7754,
      "step": 121781
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9166498184204102,
      "learning_rate": 0.00027311815977539846,
      "loss": 3.0621,
      "step": 121782
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1815249919891357,
      "learning_rate": 0.00027311408567091505,
      "loss": 3.1961,
      "step": 121783
    },
    {
      "epoch": 1.59,
      "grad_norm": 4.0120158195495605,
      "learning_rate": 0.00027311001157143025,
      "loss": 3.088,
      "step": 121784
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.5769991874694824,
      "learning_rate": 0.0002731059374769448,
      "loss": 3.0969,
      "step": 121785
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5191028118133545,
      "learning_rate": 0.0002731018633874595,
      "loss": 2.9695,
      "step": 121786
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5571720600128174,
      "learning_rate": 0.0002730977893029752,
      "loss": 3.1447,
      "step": 121787
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4855690002441406,
      "learning_rate": 0.0002730937152234924,
      "loss": 2.8796,
      "step": 121788
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.0249550342559814,
      "learning_rate": 0.00027308964114901193,
      "loss": 3.0031,
      "step": 121789
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9692878723144531,
      "learning_rate": 0.00027308556707953465,
      "loss": 3.0331,
      "step": 121790
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0674684047698975,
      "learning_rate": 0.0002730814930150613,
      "loss": 2.9028,
      "step": 121791
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.1630141735076904,
      "learning_rate": 0.0002730774189555926,
      "loss": 2.9095,
      "step": 121792
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5743629932403564,
      "learning_rate": 0.0002730733449011294,
      "loss": 2.9229,
      "step": 121793
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9631942510604858,
      "learning_rate": 0.0002730692708516723,
      "loss": 3.1108,
      "step": 121794
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9127979278564453,
      "learning_rate": 0.00027306519680722206,
      "loss": 3.1625,
      "step": 121795
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4267935752868652,
      "learning_rate": 0.00027306112276777956,
      "loss": 2.9983,
      "step": 121796
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5112698078155518,
      "learning_rate": 0.0002730570487333455,
      "loss": 3.1801,
      "step": 121797
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3953487873077393,
      "learning_rate": 0.0002730529747039206,
      "loss": 2.8867,
      "step": 121798
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5183496475219727,
      "learning_rate": 0.0002730489006795058,
      "loss": 2.9601,
      "step": 121799
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9116220474243164,
      "learning_rate": 0.00027304482666010166,
      "loss": 3.1116,
      "step": 121800
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.7643024921417236,
      "learning_rate": 0.00027304075264570894,
      "loss": 3.0121,
      "step": 121801
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.232872247695923,
      "learning_rate": 0.00027303667863632844,
      "loss": 2.9805,
      "step": 121802
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0192253589630127,
      "learning_rate": 0.000273032604631961,
      "loss": 3.0014,
      "step": 121803
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4725217819213867,
      "learning_rate": 0.0002730285306326073,
      "loss": 3.031,
      "step": 121804
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4517874717712402,
      "learning_rate": 0.0002730244566382681,
      "loss": 2.8449,
      "step": 121805
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.4587794542312622,
      "learning_rate": 0.00027302038264894417,
      "loss": 2.7946,
      "step": 121806
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7136168479919434,
      "learning_rate": 0.0002730163086646362,
      "loss": 2.9758,
      "step": 121807
    },
    {
      "epoch": 1.59,
      "grad_norm": 4.08199405670166,
      "learning_rate": 0.00027301223468534504,
      "loss": 2.9229,
      "step": 121808
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.296628713607788,
      "learning_rate": 0.0002730081607110714,
      "loss": 2.7664,
      "step": 121809
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4845869541168213,
      "learning_rate": 0.000273004086741816,
      "loss": 3.0744,
      "step": 121810
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.9092631340026855,
      "learning_rate": 0.0002730000127775799,
      "loss": 3.0682,
      "step": 121811
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.015446424484253,
      "learning_rate": 0.0002729959388183634,
      "loss": 2.9376,
      "step": 121812
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.348289728164673,
      "learning_rate": 0.00027299186486416747,
      "loss": 3.0127,
      "step": 121813
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.844465970993042,
      "learning_rate": 0.0002729877909149929,
      "loss": 2.91,
      "step": 121814
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8928039073944092,
      "learning_rate": 0.00027298371697084036,
      "loss": 2.9719,
      "step": 121815
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9770376682281494,
      "learning_rate": 0.0002729796430317107,
      "loss": 2.8583,
      "step": 121816
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9406658411026,
      "learning_rate": 0.0002729755690976047,
      "loss": 3.0013,
      "step": 121817
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8486419916152954,
      "learning_rate": 0.00027297149516852294,
      "loss": 3.166,
      "step": 121818
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9595341682434082,
      "learning_rate": 0.00027296742124446633,
      "loss": 3.0214,
      "step": 121819
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.6699835062026978,
      "learning_rate": 0.00027296334732543554,
      "loss": 2.8388,
      "step": 121820
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.101529359817505,
      "learning_rate": 0.00027295927341143143,
      "loss": 2.7179,
      "step": 121821
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1034090518951416,
      "learning_rate": 0.00027295519950245467,
      "loss": 3.1078,
      "step": 121822
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0686938762664795,
      "learning_rate": 0.0002729511255985062,
      "loss": 2.9721,
      "step": 121823
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.076076030731201,
      "learning_rate": 0.00027294705169958645,
      "loss": 2.6052,
      "step": 121824
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1013541221618652,
      "learning_rate": 0.00027294297780569636,
      "loss": 3.0051,
      "step": 121825
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.03759503364563,
      "learning_rate": 0.00027293890391683675,
      "loss": 3.0607,
      "step": 121826
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8180854320526123,
      "learning_rate": 0.00027293483003300827,
      "loss": 2.7877,
      "step": 121827
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.845663547515869,
      "learning_rate": 0.0002729307561542117,
      "loss": 3.0037,
      "step": 121828
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.902510404586792,
      "learning_rate": 0.00027292668228044785,
      "loss": 3.188,
      "step": 121829
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2531638145446777,
      "learning_rate": 0.0002729226084117175,
      "loss": 2.8117,
      "step": 121830
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.9915378093719482,
      "learning_rate": 0.0002729185345480213,
      "loss": 2.7063,
      "step": 121831
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4482505321502686,
      "learning_rate": 0.00027291446068935995,
      "loss": 2.9667,
      "step": 121832
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5864081382751465,
      "learning_rate": 0.0002729103868357344,
      "loss": 2.8348,
      "step": 121833
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8392713069915771,
      "learning_rate": 0.00027290631298714527,
      "loss": 3.0303,
      "step": 121834
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.307053565979004,
      "learning_rate": 0.0002729022391435935,
      "loss": 2.9885,
      "step": 121835
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.895622968673706,
      "learning_rate": 0.00027289816530507956,
      "loss": 3.0833,
      "step": 121836
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.963388204574585,
      "learning_rate": 0.0002728940914716045,
      "loss": 3.0537,
      "step": 121837
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4889397621154785,
      "learning_rate": 0.00027289001764316885,
      "loss": 2.8431,
      "step": 121838
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.874060869216919,
      "learning_rate": 0.0002728859438197735,
      "loss": 3.0594,
      "step": 121839
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.892924427986145,
      "learning_rate": 0.0002728818700014191,
      "loss": 3.0128,
      "step": 121840
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.339848041534424,
      "learning_rate": 0.0002728777961881065,
      "loss": 2.9182,
      "step": 121841
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8477455377578735,
      "learning_rate": 0.0002728737223798365,
      "loss": 2.9216,
      "step": 121842
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9254181385040283,
      "learning_rate": 0.0002728696485766097,
      "loss": 3.0775,
      "step": 121843
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.7558023929595947,
      "learning_rate": 0.000272865574778427,
      "loss": 3.0359,
      "step": 121844
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.63154137134552,
      "learning_rate": 0.0002728615009852891,
      "loss": 3.1176,
      "step": 121845
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7681329250335693,
      "learning_rate": 0.0002728574271971967,
      "loss": 3.0011,
      "step": 121846
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0398879051208496,
      "learning_rate": 0.0002728533534141507,
      "loss": 3.0991,
      "step": 121847
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8206669092178345,
      "learning_rate": 0.0002728492796361517,
      "loss": 2.7683,
      "step": 121848
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.800886631011963,
      "learning_rate": 0.00027284520586320053,
      "loss": 3.0135,
      "step": 121849
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0301241874694824,
      "learning_rate": 0.000272841132095298,
      "loss": 2.9335,
      "step": 121850
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9798827171325684,
      "learning_rate": 0.0002728370583324448,
      "loss": 3.1181,
      "step": 121851
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8091486692428589,
      "learning_rate": 0.00027283298457464174,
      "loss": 3.186,
      "step": 121852
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.756882667541504,
      "learning_rate": 0.00027282891082188945,
      "loss": 2.855,
      "step": 121853
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.179882526397705,
      "learning_rate": 0.0002728248370741889,
      "loss": 2.8763,
      "step": 121854
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2460126876831055,
      "learning_rate": 0.0002728207633315406,
      "loss": 2.9907,
      "step": 121855
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3680646419525146,
      "learning_rate": 0.0002728166895939455,
      "loss": 3.1846,
      "step": 121856
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7347593307495117,
      "learning_rate": 0.00027281261586140424,
      "loss": 2.9692,
      "step": 121857
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.017429828643799,
      "learning_rate": 0.00027280854213391765,
      "loss": 2.8327,
      "step": 121858
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9408169984817505,
      "learning_rate": 0.00027280446841148654,
      "loss": 3.1386,
      "step": 121859
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8721590042114258,
      "learning_rate": 0.0002728003946941116,
      "loss": 2.9317,
      "step": 121860
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.805182933807373,
      "learning_rate": 0.0002727963209817935,
      "loss": 2.9709,
      "step": 121861
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.6775932312011719,
      "learning_rate": 0.0002727922472745331,
      "loss": 2.9107,
      "step": 121862
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9821289777755737,
      "learning_rate": 0.0002727881735723311,
      "loss": 2.9432,
      "step": 121863
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1550474166870117,
      "learning_rate": 0.00027278409987518823,
      "loss": 3.2317,
      "step": 121864
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8324114084243774,
      "learning_rate": 0.0002727800261831054,
      "loss": 2.9506,
      "step": 121865
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8862652778625488,
      "learning_rate": 0.0002727759524960834,
      "loss": 2.9988,
      "step": 121866
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1566824913024902,
      "learning_rate": 0.0002727718788141227,
      "loss": 2.7217,
      "step": 121867
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5624914169311523,
      "learning_rate": 0.0002727678051372243,
      "loss": 2.973,
      "step": 121868
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0706918239593506,
      "learning_rate": 0.0002727637314653888,
      "loss": 3.1051,
      "step": 121869
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.759574055671692,
      "learning_rate": 0.00027275965779861703,
      "loss": 3.0231,
      "step": 121870
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1777396202087402,
      "learning_rate": 0.0002727555841369098,
      "loss": 2.7686,
      "step": 121871
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3127052783966064,
      "learning_rate": 0.0002727515104802679,
      "loss": 2.9836,
      "step": 121872
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.5841809511184692,
      "learning_rate": 0.0002727474368286919,
      "loss": 2.9886,
      "step": 121873
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.483567476272583,
      "learning_rate": 0.0002727433631821827,
      "loss": 3.107,
      "step": 121874
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9852170944213867,
      "learning_rate": 0.000272739289540741,
      "loss": 3.2186,
      "step": 121875
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8914521932601929,
      "learning_rate": 0.0002727352159043676,
      "loss": 2.9343,
      "step": 121876
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1309332847595215,
      "learning_rate": 0.00027273114227306325,
      "loss": 2.8473,
      "step": 121877
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0362038612365723,
      "learning_rate": 0.0002727270686468288,
      "loss": 3.0399,
      "step": 121878
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.9587717056274414,
      "learning_rate": 0.00027272299502566476,
      "loss": 2.879,
      "step": 121879
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.430882215499878,
      "learning_rate": 0.000272718921409572,
      "loss": 2.7666,
      "step": 121880
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3537867069244385,
      "learning_rate": 0.0002727148477985514,
      "loss": 3.0139,
      "step": 121881
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.411973476409912,
      "learning_rate": 0.00027271077419260354,
      "loss": 2.8447,
      "step": 121882
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5359177589416504,
      "learning_rate": 0.00027270670059172927,
      "loss": 3.0454,
      "step": 121883
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.760650157928467,
      "learning_rate": 0.0002727026269959295,
      "loss": 2.8867,
      "step": 121884
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9355545043945312,
      "learning_rate": 0.0002726985534052047,
      "loss": 2.9733,
      "step": 121885
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0233829021453857,
      "learning_rate": 0.00027269447981955575,
      "loss": 2.8767,
      "step": 121886
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.260214328765869,
      "learning_rate": 0.0002726904062389834,
      "loss": 2.8438,
      "step": 121887
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.538802146911621,
      "learning_rate": 0.0002726863326634884,
      "loss": 3.038,
      "step": 121888
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9462552070617676,
      "learning_rate": 0.0002726822590930716,
      "loss": 2.7114,
      "step": 121889
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.330766201019287,
      "learning_rate": 0.00027267818552773374,
      "loss": 2.9318,
      "step": 121890
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7800135612487793,
      "learning_rate": 0.0002726741119674754,
      "loss": 3.1005,
      "step": 121891
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4320199489593506,
      "learning_rate": 0.0002726700384122975,
      "loss": 3.0875,
      "step": 121892
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.703307867050171,
      "learning_rate": 0.0002726659648622007,
      "loss": 3.0416,
      "step": 121893
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.545755624771118,
      "learning_rate": 0.00027266189131718585,
      "loss": 2.9664,
      "step": 121894
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0705604553222656,
      "learning_rate": 0.00027265781777725367,
      "loss": 3.096,
      "step": 121895
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8430124521255493,
      "learning_rate": 0.00027265374424240504,
      "loss": 3.1255,
      "step": 121896
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.337336540222168,
      "learning_rate": 0.00027264967071264045,
      "loss": 3.0652,
      "step": 121897
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7747719287872314,
      "learning_rate": 0.00027264559718796083,
      "loss": 3.1007,
      "step": 121898
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8006764650344849,
      "learning_rate": 0.0002726415236683669,
      "loss": 2.9954,
      "step": 121899
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.719079852104187,
      "learning_rate": 0.00027263745015385945,
      "loss": 3.0341,
      "step": 121900
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.768514394760132,
      "learning_rate": 0.0002726333766444392,
      "loss": 2.9202,
      "step": 121901
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1749792098999023,
      "learning_rate": 0.000272629303140107,
      "loss": 3.0013,
      "step": 121902
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.8085238933563232,
      "learning_rate": 0.0002726252296408634,
      "loss": 2.9466,
      "step": 121903
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.872274875640869,
      "learning_rate": 0.0002726211561467094,
      "loss": 3.2016,
      "step": 121904
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.6514105796813965,
      "learning_rate": 0.00027261708265764555,
      "loss": 3.0296,
      "step": 121905
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.864616632461548,
      "learning_rate": 0.0002726130091736727,
      "loss": 2.9397,
      "step": 121906
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.621847629547119,
      "learning_rate": 0.00027260893569479164,
      "loss": 2.9616,
      "step": 121907
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.301468849182129,
      "learning_rate": 0.0002726048622210031,
      "loss": 3.1283,
      "step": 121908
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.1417083740234375,
      "learning_rate": 0.00027260078875230795,
      "loss": 3.0679,
      "step": 121909
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.6731116771698,
      "learning_rate": 0.0002725967152887067,
      "loss": 2.848,
      "step": 121910
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.122926712036133,
      "learning_rate": 0.00027259264183020026,
      "loss": 3.105,
      "step": 121911
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.938851833343506,
      "learning_rate": 0.00027258856837678936,
      "loss": 3.1305,
      "step": 121912
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9833157062530518,
      "learning_rate": 0.00027258449492847475,
      "loss": 3.0541,
      "step": 121913
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.194974899291992,
      "learning_rate": 0.00027258042148525715,
      "loss": 3.1028,
      "step": 121914
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9395148754119873,
      "learning_rate": 0.00027257634804713757,
      "loss": 3.0422,
      "step": 121915
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.532472848892212,
      "learning_rate": 0.0002725722746141164,
      "loss": 2.8722,
      "step": 121916
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.323728084564209,
      "learning_rate": 0.0002725682011861946,
      "loss": 3.0549,
      "step": 121917
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8034017086029053,
      "learning_rate": 0.00027256412776337284,
      "loss": 2.8589,
      "step": 121918
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1761670112609863,
      "learning_rate": 0.0002725600543456519,
      "loss": 3.2168,
      "step": 121919
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.6478044986724854,
      "learning_rate": 0.0002725559809330326,
      "loss": 2.9031,
      "step": 121920
    },
    {
      "epoch": 1.59,
      "grad_norm": 4.656626224517822,
      "learning_rate": 0.0002725519075255158,
      "loss": 2.8019,
      "step": 121921
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2996468544006348,
      "learning_rate": 0.000272547834123102,
      "loss": 2.9085,
      "step": 121922
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.739071011543274,
      "learning_rate": 0.00027254376072579213,
      "loss": 3.078,
      "step": 121923
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.093916893005371,
      "learning_rate": 0.00027253968733358683,
      "loss": 3.2495,
      "step": 121924
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.944380521774292,
      "learning_rate": 0.0002725356139464869,
      "loss": 3.0568,
      "step": 121925
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.779529094696045,
      "learning_rate": 0.00027253154056449313,
      "loss": 2.9881,
      "step": 121926
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9244375228881836,
      "learning_rate": 0.00027252746718760637,
      "loss": 3.1242,
      "step": 121927
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8549479246139526,
      "learning_rate": 0.00027252339381582716,
      "loss": 2.8395,
      "step": 121928
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4398505687713623,
      "learning_rate": 0.00027251932044915645,
      "loss": 3.0218,
      "step": 121929
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.4224531650543213,
      "learning_rate": 0.0002725152470875948,
      "loss": 2.8854,
      "step": 121930
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.0570907592773438,
      "learning_rate": 0.0002725111737311432,
      "loss": 2.9446,
      "step": 121931
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0063178539276123,
      "learning_rate": 0.0002725071003798022,
      "loss": 2.9403,
      "step": 121932
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.8662588596343994,
      "learning_rate": 0.00027250302703357276,
      "loss": 2.9835,
      "step": 121933
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3406081199645996,
      "learning_rate": 0.00027249895369245544,
      "loss": 2.9984,
      "step": 121934
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.816056728363037,
      "learning_rate": 0.00027249488035645107,
      "loss": 3.1742,
      "step": 121935
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.053676128387451,
      "learning_rate": 0.00027249080702556054,
      "loss": 2.8337,
      "step": 121936
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1475000381469727,
      "learning_rate": 0.00027248673369978437,
      "loss": 3.0841,
      "step": 121937
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.62956166267395,
      "learning_rate": 0.00027248266037912345,
      "loss": 2.9983,
      "step": 121938
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8429598808288574,
      "learning_rate": 0.00027247858706357857,
      "loss": 2.9717,
      "step": 121939
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9652421474456787,
      "learning_rate": 0.0002724745137531504,
      "loss": 3.1372,
      "step": 121940
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.8281662464141846,
      "learning_rate": 0.00027247044044783974,
      "loss": 2.8136,
      "step": 121941
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.6541919708251953,
      "learning_rate": 0.0002724663671476473,
      "loss": 3.0976,
      "step": 121942
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2787132263183594,
      "learning_rate": 0.000272462293852574,
      "loss": 2.7952,
      "step": 121943
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1246860027313232,
      "learning_rate": 0.0002724582205626204,
      "loss": 2.9242,
      "step": 121944
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.8942835330963135,
      "learning_rate": 0.0002724541472777874,
      "loss": 3.1304,
      "step": 121945
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.7619519233703613,
      "learning_rate": 0.00027245007399807566,
      "loss": 2.8616,
      "step": 121946
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8609827756881714,
      "learning_rate": 0.0002724460007234859,
      "loss": 3.2519,
      "step": 121947
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.0011870861053467,
      "learning_rate": 0.000272441927454019,
      "loss": 2.9404,
      "step": 121948
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9989449977874756,
      "learning_rate": 0.00027243785418967564,
      "loss": 2.7325,
      "step": 121949
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3014461994171143,
      "learning_rate": 0.0002724337809304566,
      "loss": 2.9821,
      "step": 121950
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.707535743713379,
      "learning_rate": 0.0002724297076763628,
      "loss": 2.9841,
      "step": 121951
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.6420211791992188,
      "learning_rate": 0.00027242563442739476,
      "loss": 2.7799,
      "step": 121952
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1209423542022705,
      "learning_rate": 0.0002724215611835532,
      "loss": 3.035,
      "step": 121953
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.244530439376831,
      "learning_rate": 0.00027241748794483905,
      "loss": 2.9495,
      "step": 121954
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0308854579925537,
      "learning_rate": 0.000272413414711253,
      "loss": 2.7664,
      "step": 121955
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.325190305709839,
      "learning_rate": 0.0002724093414827958,
      "loss": 3.2816,
      "step": 121956
    },
    {
      "epoch": 1.59,
      "grad_norm": 4.670507907867432,
      "learning_rate": 0.0002724052682594684,
      "loss": 2.8085,
      "step": 121957
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.014470338821411,
      "learning_rate": 0.00027240119504127124,
      "loss": 3.0002,
      "step": 121958
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0273728370666504,
      "learning_rate": 0.0002723971218282052,
      "loss": 3.0248,
      "step": 121959
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.944014310836792,
      "learning_rate": 0.00027239304862027106,
      "loss": 2.957,
      "step": 121960
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.973440170288086,
      "learning_rate": 0.00027238897541746954,
      "loss": 2.7727,
      "step": 121961
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.7495381832122803,
      "learning_rate": 0.0002723849022198015,
      "loss": 2.9478,
      "step": 121962
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.78829824924469,
      "learning_rate": 0.0002723808290272677,
      "loss": 2.9569,
      "step": 121963
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.291069746017456,
      "learning_rate": 0.00027237675583986873,
      "loss": 2.8566,
      "step": 121964
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.6429061889648438,
      "learning_rate": 0.0002723726826576054,
      "loss": 3.1023,
      "step": 121965
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.814833879470825,
      "learning_rate": 0.00027236860948047856,
      "loss": 2.8291,
      "step": 121966
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2275936603546143,
      "learning_rate": 0.0002723645363084889,
      "loss": 2.9914,
      "step": 121967
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.724313735961914,
      "learning_rate": 0.0002723604631416372,
      "loss": 2.7845,
      "step": 121968
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4124460220336914,
      "learning_rate": 0.00027235638997992435,
      "loss": 2.934,
      "step": 121969
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.9258174896240234,
      "learning_rate": 0.00027235231682335077,
      "loss": 3.1814,
      "step": 121970
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.8678598403930664,
      "learning_rate": 0.0002723482436719175,
      "loss": 3.1832,
      "step": 121971
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9397609233856201,
      "learning_rate": 0.0002723441705256252,
      "loss": 2.9361,
      "step": 121972
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.6438000202178955,
      "learning_rate": 0.0002723400973844746,
      "loss": 3.0987,
      "step": 121973
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.327340841293335,
      "learning_rate": 0.00027233602424846655,
      "loss": 3.1161,
      "step": 121974
    },
    {
      "epoch": 1.59,
      "grad_norm": 5.804305553436279,
      "learning_rate": 0.00027233195111760173,
      "loss": 3.1611,
      "step": 121975
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8382521867752075,
      "learning_rate": 0.000272327877991881,
      "loss": 2.7972,
      "step": 121976
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7802646160125732,
      "learning_rate": 0.000272323804871305,
      "loss": 2.7204,
      "step": 121977
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2195003032684326,
      "learning_rate": 0.00027231973175587446,
      "loss": 2.9474,
      "step": 121978
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.864715814590454,
      "learning_rate": 0.00027231565864559024,
      "loss": 3.1196,
      "step": 121979
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.862898111343384,
      "learning_rate": 0.000272311585540453,
      "loss": 2.7799,
      "step": 121980
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8314026594161987,
      "learning_rate": 0.00027230751244046366,
      "loss": 2.7179,
      "step": 121981
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0502376556396484,
      "learning_rate": 0.000272303439345623,
      "loss": 2.8139,
      "step": 121982
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0075721740722656,
      "learning_rate": 0.00027229936625593144,
      "loss": 3.3536,
      "step": 121983
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.6839282512664795,
      "learning_rate": 0.00027229529317139,
      "loss": 2.9046,
      "step": 121984
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.114636182785034,
      "learning_rate": 0.00027229122009199934,
      "loss": 3.0314,
      "step": 121985
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.015521287918091,
      "learning_rate": 0.0002722871470177603,
      "loss": 3.0224,
      "step": 121986
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7856780290603638,
      "learning_rate": 0.00027228307394867364,
      "loss": 3.1132,
      "step": 121987
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.6738955974578857,
      "learning_rate": 0.0002722790008847402,
      "loss": 2.9203,
      "step": 121988
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.411849021911621,
      "learning_rate": 0.0002722749278259604,
      "loss": 2.9244,
      "step": 121989
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.010890007019043,
      "learning_rate": 0.0002722708547723353,
      "loss": 2.9724,
      "step": 121990
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9386340379714966,
      "learning_rate": 0.00027226678172386556,
      "loss": 2.9814,
      "step": 121991
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5312488079071045,
      "learning_rate": 0.0002722627086805519,
      "loss": 3.1122,
      "step": 121992
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.0105206966400146,
      "learning_rate": 0.0002722586356423952,
      "loss": 3.0456,
      "step": 121993
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.0325276851654053,
      "learning_rate": 0.00027225456260939623,
      "loss": 2.8915,
      "step": 121994
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8679279088974,
      "learning_rate": 0.00027225048958155556,
      "loss": 2.7253,
      "step": 121995
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.055821180343628,
      "learning_rate": 0.000272246416558874,
      "loss": 2.8253,
      "step": 121996
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.208984851837158,
      "learning_rate": 0.0002722423435413524,
      "loss": 3.1146,
      "step": 121997
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.965053081512451,
      "learning_rate": 0.0002722382705289914,
      "loss": 2.9002,
      "step": 121998
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8192094564437866,
      "learning_rate": 0.00027223419752179194,
      "loss": 3.2453,
      "step": 121999
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.046837091445923,
      "learning_rate": 0.00027223012451975474,
      "loss": 3.088,
      "step": 122000
    },
    {
      "epoch": 1.59,
      "grad_norm": 4.347514629364014,
      "learning_rate": 0.00027222605152288034,
      "loss": 2.9703,
      "step": 122001
    },
    {
      "epoch": 1.59,
      "grad_norm": 4.711451053619385,
      "learning_rate": 0.0002722219785311696,
      "loss": 3.1048,
      "step": 122002
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.876656413078308,
      "learning_rate": 0.00027221790554462344,
      "loss": 2.8581,
      "step": 122003
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.862346649169922,
      "learning_rate": 0.0002722138325632424,
      "loss": 2.794,
      "step": 122004
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.7931320667266846,
      "learning_rate": 0.0002722097595870274,
      "loss": 2.885,
      "step": 122005
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.9956486225128174,
      "learning_rate": 0.00027220568661597916,
      "loss": 2.7299,
      "step": 122006
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8056104183197021,
      "learning_rate": 0.00027220161365009836,
      "loss": 2.8703,
      "step": 122007
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.131054401397705,
      "learning_rate": 0.0002721975406893858,
      "loss": 2.8458,
      "step": 122008
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.725579023361206,
      "learning_rate": 0.0002721934677338422,
      "loss": 3.1459,
      "step": 122009
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9906094074249268,
      "learning_rate": 0.00027218939478346844,
      "loss": 2.8494,
      "step": 122010
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2783241271972656,
      "learning_rate": 0.0002721853218382651,
      "loss": 2.8716,
      "step": 122011
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8856110572814941,
      "learning_rate": 0.00027218124889823316,
      "loss": 2.9923,
      "step": 122012
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.078167676925659,
      "learning_rate": 0.0002721771759633732,
      "loss": 3.0495,
      "step": 122013
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8136827945709229,
      "learning_rate": 0.00027217310303368603,
      "loss": 3.0606,
      "step": 122014
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9498870372772217,
      "learning_rate": 0.0002721690301091724,
      "loss": 2.7772,
      "step": 122015
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1841039657592773,
      "learning_rate": 0.000272164957189833,
      "loss": 3.2371,
      "step": 122016
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.670335531234741,
      "learning_rate": 0.0002721608842756687,
      "loss": 2.7534,
      "step": 122017
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.3881750106811523,
      "learning_rate": 0.0002721568113666803,
      "loss": 2.7816,
      "step": 122018
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8986749649047852,
      "learning_rate": 0.0002721527384628684,
      "loss": 2.911,
      "step": 122019
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8660063743591309,
      "learning_rate": 0.00027214866556423377,
      "loss": 2.9806,
      "step": 122020
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5511999130249023,
      "learning_rate": 0.0002721445926707774,
      "loss": 2.9679,
      "step": 122021
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2821805477142334,
      "learning_rate": 0.00027214051978249974,
      "loss": 3.2121,
      "step": 122022
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9452824592590332,
      "learning_rate": 0.0002721364468994017,
      "loss": 2.993,
      "step": 122023
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9663296937942505,
      "learning_rate": 0.0002721323740214841,
      "loss": 3.1483,
      "step": 122024
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4689629077911377,
      "learning_rate": 0.00027212830114874755,
      "loss": 2.7506,
      "step": 122025
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7201731204986572,
      "learning_rate": 0.0002721242282811929,
      "loss": 3.147,
      "step": 122026
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9969942569732666,
      "learning_rate": 0.0002721201554188208,
      "loss": 3.3123,
      "step": 122027
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.6926193237304688,
      "learning_rate": 0.0002721160825616322,
      "loss": 3.0853,
      "step": 122028
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1195719242095947,
      "learning_rate": 0.0002721120097096277,
      "loss": 2.9206,
      "step": 122029
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.6969772577285767,
      "learning_rate": 0.00027210793686280815,
      "loss": 2.9104,
      "step": 122030
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5353856086730957,
      "learning_rate": 0.0002721038640211742,
      "loss": 2.9064,
      "step": 122031
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.6224513053894043,
      "learning_rate": 0.00027209979118472666,
      "loss": 3.1976,
      "step": 122032
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0247654914855957,
      "learning_rate": 0.0002720957183534663,
      "loss": 2.8317,
      "step": 122033
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2448923587799072,
      "learning_rate": 0.0002720916455273939,
      "loss": 2.9524,
      "step": 122034
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.226301908493042,
      "learning_rate": 0.0002720875727065102,
      "loss": 2.8811,
      "step": 122035
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.091454029083252,
      "learning_rate": 0.00027208349989081603,
      "loss": 2.8386,
      "step": 122036
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8820006847381592,
      "learning_rate": 0.0002720794270803119,
      "loss": 3.1385,
      "step": 122037
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5038537979125977,
      "learning_rate": 0.0002720753542749988,
      "loss": 2.8284,
      "step": 122038
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.37465238571167,
      "learning_rate": 0.0002720712814748774,
      "loss": 2.7305,
      "step": 122039
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.8188467025756836,
      "learning_rate": 0.00027206720867994845,
      "loss": 2.9122,
      "step": 122040
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7769391536712646,
      "learning_rate": 0.0002720631358902128,
      "loss": 2.8564,
      "step": 122041
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5724384784698486,
      "learning_rate": 0.0002720590631056711,
      "loss": 2.8754,
      "step": 122042
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5801565647125244,
      "learning_rate": 0.00027205499032632424,
      "loss": 3.0593,
      "step": 122043
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.8109545707702637,
      "learning_rate": 0.0002720509175521728,
      "loss": 2.8854,
      "step": 122044
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.011823892593384,
      "learning_rate": 0.0002720468447832176,
      "loss": 3.0128,
      "step": 122045
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.5773597955703735,
      "learning_rate": 0.00027204277201945944,
      "loss": 3.1445,
      "step": 122046
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.1015756130218506,
      "learning_rate": 0.0002720386992608991,
      "loss": 2.6424,
      "step": 122047
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.8854246139526367,
      "learning_rate": 0.0002720346265075372,
      "loss": 3.1499,
      "step": 122048
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.096998691558838,
      "learning_rate": 0.00027203055375937476,
      "loss": 3.1622,
      "step": 122049
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8802587985992432,
      "learning_rate": 0.0002720264810164122,
      "loss": 2.8098,
      "step": 122050
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.6422882080078125,
      "learning_rate": 0.00027202240827865054,
      "loss": 3.0627,
      "step": 122051
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9584109783172607,
      "learning_rate": 0.0002720183355460904,
      "loss": 3.2354,
      "step": 122052
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8185756206512451,
      "learning_rate": 0.00027201426281873254,
      "loss": 2.8539,
      "step": 122053
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.847908854484558,
      "learning_rate": 0.0002720101900965778,
      "loss": 3.1036,
      "step": 122054
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8024144172668457,
      "learning_rate": 0.000272006117379627,
      "loss": 2.846,
      "step": 122055
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.493471384048462,
      "learning_rate": 0.00027200204466788064,
      "loss": 2.8633,
      "step": 122056
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.272434949874878,
      "learning_rate": 0.00027199797196133965,
      "loss": 3.2417,
      "step": 122057
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.757458209991455,
      "learning_rate": 0.00027199389926000484,
      "loss": 3.0912,
      "step": 122058
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7732365131378174,
      "learning_rate": 0.0002719898265638768,
      "loss": 2.6104,
      "step": 122059
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.957521677017212,
      "learning_rate": 0.0002719857538729564,
      "loss": 2.8044,
      "step": 122060
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3742165565490723,
      "learning_rate": 0.0002719816811872445,
      "loss": 3.0736,
      "step": 122061
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2175838947296143,
      "learning_rate": 0.0002719776085067416,
      "loss": 2.7593,
      "step": 122062
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3620762825012207,
      "learning_rate": 0.0002719735358314486,
      "loss": 2.8912,
      "step": 122063
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3236424922943115,
      "learning_rate": 0.0002719694631613663,
      "loss": 2.9954,
      "step": 122064
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.964780330657959,
      "learning_rate": 0.0002719653904964953,
      "loss": 2.687,
      "step": 122065
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9847098588943481,
      "learning_rate": 0.0002719613178368366,
      "loss": 3.1022,
      "step": 122066
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9188694953918457,
      "learning_rate": 0.00027195724518239085,
      "loss": 2.9108,
      "step": 122067
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4127395153045654,
      "learning_rate": 0.0002719531725331587,
      "loss": 2.9673,
      "step": 122068
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8565293550491333,
      "learning_rate": 0.00027194909988914097,
      "loss": 2.7979,
      "step": 122069
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0339555740356445,
      "learning_rate": 0.0002719450272503384,
      "loss": 3.366,
      "step": 122070
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9290691614151,
      "learning_rate": 0.0002719409546167518,
      "loss": 3.034,
      "step": 122071
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.072939157485962,
      "learning_rate": 0.0002719368819883819,
      "loss": 2.9912,
      "step": 122072
    },
    {
      "epoch": 1.59,
      "grad_norm": 4.171957969665527,
      "learning_rate": 0.0002719328093652296,
      "loss": 3.1319,
      "step": 122073
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0593760013580322,
      "learning_rate": 0.0002719287367472954,
      "loss": 3.0975,
      "step": 122074
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1240670680999756,
      "learning_rate": 0.00027192466413458017,
      "loss": 3.0892,
      "step": 122075
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3038644790649414,
      "learning_rate": 0.0002719205915270847,
      "loss": 3.1586,
      "step": 122076
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5407726764678955,
      "learning_rate": 0.0002719165189248096,
      "loss": 3.229,
      "step": 122077
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.049302101135254,
      "learning_rate": 0.0002719124463277559,
      "loss": 2.8029,
      "step": 122078
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.126180648803711,
      "learning_rate": 0.0002719083737359243,
      "loss": 2.8532,
      "step": 122079
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7481701374053955,
      "learning_rate": 0.0002719043011493153,
      "loss": 2.8413,
      "step": 122080
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7429336309432983,
      "learning_rate": 0.0002719002285679298,
      "loss": 2.9907,
      "step": 122081
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7456474304199219,
      "learning_rate": 0.00027189615599176864,
      "loss": 2.9914,
      "step": 122082
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8797494173049927,
      "learning_rate": 0.0002718920834208325,
      "loss": 2.9473,
      "step": 122083
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1019420623779297,
      "learning_rate": 0.0002718880108551221,
      "loss": 2.9928,
      "step": 122084
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.936172366142273,
      "learning_rate": 0.00027188393829463844,
      "loss": 3.1835,
      "step": 122085
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.062591552734375,
      "learning_rate": 0.000271879865739382,
      "loss": 3.0828,
      "step": 122086
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9108779430389404,
      "learning_rate": 0.0002718757931893535,
      "loss": 3.2232,
      "step": 122087
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.45212459564209,
      "learning_rate": 0.0002718717206445539,
      "loss": 2.8275,
      "step": 122088
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3197195529937744,
      "learning_rate": 0.0002718676481049838,
      "loss": 2.9648,
      "step": 122089
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.6851160526275635,
      "learning_rate": 0.00027186357557064416,
      "loss": 3.2031,
      "step": 122090
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0088934898376465,
      "learning_rate": 0.0002718595030415357,
      "loss": 3.033,
      "step": 122091
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8123987913131714,
      "learning_rate": 0.0002718554305176589,
      "loss": 2.9743,
      "step": 122092
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.9031505584716797,
      "learning_rate": 0.0002718513579990147,
      "loss": 3.0332,
      "step": 122093
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3201441764831543,
      "learning_rate": 0.00027184728548560394,
      "loss": 2.9428,
      "step": 122094
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9097646474838257,
      "learning_rate": 0.00027184321297742727,
      "loss": 2.8206,
      "step": 122095
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9064617156982422,
      "learning_rate": 0.00027183914047448546,
      "loss": 2.798,
      "step": 122096
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.392963409423828,
      "learning_rate": 0.00027183506797677937,
      "loss": 2.9521,
      "step": 122097
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1970267295837402,
      "learning_rate": 0.00027183099548430965,
      "loss": 3.0348,
      "step": 122098
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8241606950759888,
      "learning_rate": 0.00027182692299707707,
      "loss": 2.952,
      "step": 122099
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8429758548736572,
      "learning_rate": 0.00027182285051508237,
      "loss": 2.8563,
      "step": 122100
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7802363634109497,
      "learning_rate": 0.0002718187780383263,
      "loss": 3.1111,
      "step": 122101
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.179283380508423,
      "learning_rate": 0.00027181470556680967,
      "loss": 3.2403,
      "step": 122102
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.9847519397735596,
      "learning_rate": 0.0002718106331005333,
      "loss": 3.0366,
      "step": 122103
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9061088562011719,
      "learning_rate": 0.0002718065606394978,
      "loss": 3.0764,
      "step": 122104
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.311330795288086,
      "learning_rate": 0.000271802488183704,
      "loss": 3.1547,
      "step": 122105
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.7038004398345947,
      "learning_rate": 0.00027179841573315267,
      "loss": 3.1752,
      "step": 122106
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9081205129623413,
      "learning_rate": 0.00027179434328784456,
      "loss": 2.9366,
      "step": 122107
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7731406688690186,
      "learning_rate": 0.00027179027084778035,
      "loss": 3.0855,
      "step": 122108
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.6197094917297363,
      "learning_rate": 0.00027178619841296086,
      "loss": 2.8434,
      "step": 122109
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7559796571731567,
      "learning_rate": 0.0002717821259833869,
      "loss": 2.9135,
      "step": 122110
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9957163333892822,
      "learning_rate": 0.0002717780535590591,
      "loss": 2.8495,
      "step": 122111
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7650107145309448,
      "learning_rate": 0.00027177398113997835,
      "loss": 3.1637,
      "step": 122112
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7279640436172485,
      "learning_rate": 0.00027176990872614543,
      "loss": 3.1462,
      "step": 122113
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7638520002365112,
      "learning_rate": 0.0002717658363175609,
      "loss": 3.0073,
      "step": 122114
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2686767578125,
      "learning_rate": 0.00027176176391422563,
      "loss": 2.974,
      "step": 122115
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.6729490756988525,
      "learning_rate": 0.0002717576915161405,
      "loss": 3.1488,
      "step": 122116
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.6695661544799805,
      "learning_rate": 0.000271753619123306,
      "loss": 3.127,
      "step": 122117
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9899169206619263,
      "learning_rate": 0.0002717495467357231,
      "loss": 3.1588,
      "step": 122118
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9064288139343262,
      "learning_rate": 0.0002717454743533925,
      "loss": 2.8833,
      "step": 122119
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.0214831829071045,
      "learning_rate": 0.000271741401976315,
      "loss": 3.1799,
      "step": 122120
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.9358527660369873,
      "learning_rate": 0.00027173732960449125,
      "loss": 3.0497,
      "step": 122121
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0022523403167725,
      "learning_rate": 0.0002717332572379221,
      "loss": 2.8908,
      "step": 122122
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3499834537506104,
      "learning_rate": 0.00027172918487660823,
      "loss": 2.8911,
      "step": 122123
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.416597843170166,
      "learning_rate": 0.0002717251125205504,
      "loss": 3.1994,
      "step": 122124
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0448367595672607,
      "learning_rate": 0.00027172104016974947,
      "loss": 3.0074,
      "step": 122125
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.149758815765381,
      "learning_rate": 0.00027171696782420606,
      "loss": 3.1208,
      "step": 122126
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1039111614227295,
      "learning_rate": 0.00027171289548392114,
      "loss": 2.8701,
      "step": 122127
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1090612411499023,
      "learning_rate": 0.00027170882314889525,
      "loss": 2.7818,
      "step": 122128
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7331169843673706,
      "learning_rate": 0.0002717047508191292,
      "loss": 3.0722,
      "step": 122129
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9821968078613281,
      "learning_rate": 0.00027170067849462376,
      "loss": 3.0177,
      "step": 122130
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.714491605758667,
      "learning_rate": 0.00027169660617537973,
      "loss": 3.0226,
      "step": 122131
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.342625856399536,
      "learning_rate": 0.0002716925338613978,
      "loss": 2.9995,
      "step": 122132
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.203364133834839,
      "learning_rate": 0.00027168846155267874,
      "loss": 2.914,
      "step": 122133
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.49680495262146,
      "learning_rate": 0.0002716843892492235,
      "loss": 3.0494,
      "step": 122134
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8045551776885986,
      "learning_rate": 0.00027168031695103253,
      "loss": 3.0015,
      "step": 122135
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.8405308723449707,
      "learning_rate": 0.00027167624465810675,
      "loss": 3.0935,
      "step": 122136
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9897054433822632,
      "learning_rate": 0.00027167217237044684,
      "loss": 3.0289,
      "step": 122137
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9118068218231201,
      "learning_rate": 0.00027166810008805365,
      "loss": 2.9506,
      "step": 122138
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8185592889785767,
      "learning_rate": 0.00027166402781092784,
      "loss": 2.8635,
      "step": 122139
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9492546319961548,
      "learning_rate": 0.00027165995553907044,
      "loss": 2.7219,
      "step": 122140
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0419607162475586,
      "learning_rate": 0.0002716558832724818,
      "loss": 2.8611,
      "step": 122141
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5691702365875244,
      "learning_rate": 0.0002716518110111629,
      "loss": 3.0253,
      "step": 122142
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0698020458221436,
      "learning_rate": 0.0002716477387551144,
      "loss": 3.0852,
      "step": 122143
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2129178047180176,
      "learning_rate": 0.00027164366650433714,
      "loss": 2.7724,
      "step": 122144
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9572808742523193,
      "learning_rate": 0.0002716395942588319,
      "loss": 2.902,
      "step": 122145
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8079699277877808,
      "learning_rate": 0.00027163552201859946,
      "loss": 2.9969,
      "step": 122146
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3265907764434814,
      "learning_rate": 0.00027163144978364044,
      "loss": 2.8786,
      "step": 122147
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.391815423965454,
      "learning_rate": 0.00027162737755395564,
      "loss": 3.1712,
      "step": 122148
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2256646156311035,
      "learning_rate": 0.00027162330532954585,
      "loss": 2.9452,
      "step": 122149
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8451430797576904,
      "learning_rate": 0.00027161923311041185,
      "loss": 2.818,
      "step": 122150
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.990267038345337,
      "learning_rate": 0.00027161516089655433,
      "loss": 2.9491,
      "step": 122151
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8679097890853882,
      "learning_rate": 0.0002716110886879742,
      "loss": 2.9613,
      "step": 122152
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.6854634284973145,
      "learning_rate": 0.00027160701648467207,
      "loss": 3.1805,
      "step": 122153
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0865402221679688,
      "learning_rate": 0.0002716029442866487,
      "loss": 2.937,
      "step": 122154
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.766644835472107,
      "learning_rate": 0.0002715988720939048,
      "loss": 3.0726,
      "step": 122155
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7153347730636597,
      "learning_rate": 0.00027159479990644127,
      "loss": 2.9245,
      "step": 122156
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4383533000946045,
      "learning_rate": 0.0002715907277242588,
      "loss": 2.8259,
      "step": 122157
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0530409812927246,
      "learning_rate": 0.0002715866555473583,
      "loss": 2.9927,
      "step": 122158
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.10526967048645,
      "learning_rate": 0.00027158258337574016,
      "loss": 2.8524,
      "step": 122159
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8272209167480469,
      "learning_rate": 0.00027157851120940545,
      "loss": 2.7594,
      "step": 122160
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.926172137260437,
      "learning_rate": 0.00027157443904835476,
      "loss": 3.0998,
      "step": 122161
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2276864051818848,
      "learning_rate": 0.00027157036689258896,
      "loss": 2.8328,
      "step": 122162
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.960880994796753,
      "learning_rate": 0.00027156629474210875,
      "loss": 3.0414,
      "step": 122163
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8352676630020142,
      "learning_rate": 0.00027156222259691506,
      "loss": 3.3188,
      "step": 122164
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.212526321411133,
      "learning_rate": 0.0002715581504570084,
      "loss": 2.9659,
      "step": 122165
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8537235260009766,
      "learning_rate": 0.0002715540783223895,
      "loss": 3.174,
      "step": 122166
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.6807544231414795,
      "learning_rate": 0.00027155000619305926,
      "loss": 2.9442,
      "step": 122167
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.071587324142456,
      "learning_rate": 0.00027154593406901847,
      "loss": 2.752,
      "step": 122168
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.844330906867981,
      "learning_rate": 0.00027154186195026776,
      "loss": 3.1066,
      "step": 122169
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.5847127437591553,
      "learning_rate": 0.0002715377898368081,
      "loss": 3.1342,
      "step": 122170
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.6752291917800903,
      "learning_rate": 0.00027153371772864005,
      "loss": 3.2131,
      "step": 122171
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1555979251861572,
      "learning_rate": 0.00027152964562576434,
      "loss": 2.8444,
      "step": 122172
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0014913082122803,
      "learning_rate": 0.00027152557352818187,
      "loss": 3.0206,
      "step": 122173
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.006654977798462,
      "learning_rate": 0.0002715215014358933,
      "loss": 3.054,
      "step": 122174
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.7526726722717285,
      "learning_rate": 0.0002715174293488994,
      "loss": 2.9073,
      "step": 122175
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3153586387634277,
      "learning_rate": 0.000271513357267201,
      "loss": 2.6707,
      "step": 122176
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.07635235786438,
      "learning_rate": 0.00027150928519079884,
      "loss": 3.0794,
      "step": 122177
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8877849578857422,
      "learning_rate": 0.00027150521311969356,
      "loss": 3.0885,
      "step": 122178
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9080570936203003,
      "learning_rate": 0.000271501141053886,
      "loss": 2.9606,
      "step": 122179
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9281773567199707,
      "learning_rate": 0.0002714970689933769,
      "loss": 2.769,
      "step": 122180
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.173497200012207,
      "learning_rate": 0.00027149299693816713,
      "loss": 2.9752,
      "step": 122181
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.595503091812134,
      "learning_rate": 0.00027148892488825725,
      "loss": 2.8321,
      "step": 122182
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9709577560424805,
      "learning_rate": 0.0002714848528436483,
      "loss": 3.1445,
      "step": 122183
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.035388231277466,
      "learning_rate": 0.00027148078080434067,
      "loss": 2.8957,
      "step": 122184
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.254232168197632,
      "learning_rate": 0.0002714767087703353,
      "loss": 3.0324,
      "step": 122185
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.682661771774292,
      "learning_rate": 0.00027147263674163303,
      "loss": 2.9699,
      "step": 122186
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9524846076965332,
      "learning_rate": 0.0002714685647182345,
      "loss": 2.8729,
      "step": 122187
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.942049026489258,
      "learning_rate": 0.0002714644927001404,
      "loss": 3.1069,
      "step": 122188
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4513587951660156,
      "learning_rate": 0.00027146042068735177,
      "loss": 3.0613,
      "step": 122189
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.07059383392334,
      "learning_rate": 0.00027145634867986915,
      "loss": 2.883,
      "step": 122190
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.658288836479187,
      "learning_rate": 0.0002714522766776933,
      "loss": 2.9912,
      "step": 122191
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.263314723968506,
      "learning_rate": 0.000271448204680825,
      "loss": 2.8779,
      "step": 122192
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.020730972290039,
      "learning_rate": 0.000271444132689265,
      "loss": 3.2375,
      "step": 122193
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.7263145446777344,
      "learning_rate": 0.0002714400607030141,
      "loss": 3.0209,
      "step": 122194
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3144407272338867,
      "learning_rate": 0.00027143598872207306,
      "loss": 3.0,
      "step": 122195
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.1714611053466797,
      "learning_rate": 0.0002714319167464425,
      "loss": 2.8698,
      "step": 122196
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.4470407962799072,
      "learning_rate": 0.00027142784477612343,
      "loss": 3.0806,
      "step": 122197
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7556325197219849,
      "learning_rate": 0.0002714237728111164,
      "loss": 3.0303,
      "step": 122198
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.202723979949951,
      "learning_rate": 0.0002714197008514222,
      "loss": 3.0868,
      "step": 122199
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.703087091445923,
      "learning_rate": 0.0002714156288970416,
      "loss": 2.7522,
      "step": 122200
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.528629779815674,
      "learning_rate": 0.00027141155694797547,
      "loss": 3.1067,
      "step": 122201
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8880417346954346,
      "learning_rate": 0.00027140748500422443,
      "loss": 3.0416,
      "step": 122202
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4662222862243652,
      "learning_rate": 0.00027140341306578923,
      "loss": 3.2317,
      "step": 122203
    },
    {
      "epoch": 1.59,
      "grad_norm": 4.139022350311279,
      "learning_rate": 0.0002713993411326707,
      "loss": 2.9114,
      "step": 122204
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0123870372772217,
      "learning_rate": 0.0002713952692048696,
      "loss": 3.0301,
      "step": 122205
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2131638526916504,
      "learning_rate": 0.00027139119728238667,
      "loss": 3.1769,
      "step": 122206
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.484010934829712,
      "learning_rate": 0.00027138712536522265,
      "loss": 2.7732,
      "step": 122207
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.095003366470337,
      "learning_rate": 0.00027138305345337827,
      "loss": 3.0453,
      "step": 122208
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.439917802810669,
      "learning_rate": 0.00027137898154685434,
      "loss": 2.6992,
      "step": 122209
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9740403890609741,
      "learning_rate": 0.00027137490964565156,
      "loss": 3.0799,
      "step": 122210
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5044705867767334,
      "learning_rate": 0.0002713708377497707,
      "loss": 3.0143,
      "step": 122211
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9526649713516235,
      "learning_rate": 0.0002713667658592127,
      "loss": 2.9133,
      "step": 122212
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7468122243881226,
      "learning_rate": 0.00027136269397397804,
      "loss": 3.0286,
      "step": 122213
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9486169815063477,
      "learning_rate": 0.00027135862209406764,
      "loss": 3.1986,
      "step": 122214
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9429214000701904,
      "learning_rate": 0.00027135455021948214,
      "loss": 3.261,
      "step": 122215
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.705014228820801,
      "learning_rate": 0.0002713504783502224,
      "loss": 3.0744,
      "step": 122216
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.655458450317383,
      "learning_rate": 0.00027134640648628913,
      "loss": 3.0914,
      "step": 122217
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5702157020568848,
      "learning_rate": 0.00027134233462768314,
      "loss": 2.8395,
      "step": 122218
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.489847421646118,
      "learning_rate": 0.00027133826277440525,
      "loss": 2.869,
      "step": 122219
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7042880058288574,
      "learning_rate": 0.000271334190926456,
      "loss": 3.0404,
      "step": 122220
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.140941619873047,
      "learning_rate": 0.00027133011908383624,
      "loss": 3.0305,
      "step": 122221
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9670737981796265,
      "learning_rate": 0.0002713260472465468,
      "loss": 3.0469,
      "step": 122222
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8034420013427734,
      "learning_rate": 0.0002713219754145884,
      "loss": 2.9553,
      "step": 122223
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7140876054763794,
      "learning_rate": 0.0002713179035879617,
      "loss": 2.933,
      "step": 122224
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.375032901763916,
      "learning_rate": 0.0002713138317666677,
      "loss": 2.9879,
      "step": 122225
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8301440477371216,
      "learning_rate": 0.0002713097599507069,
      "loss": 3.024,
      "step": 122226
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4079599380493164,
      "learning_rate": 0.00027130568814008013,
      "loss": 3.0579,
      "step": 122227
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8313870429992676,
      "learning_rate": 0.0002713016163347882,
      "loss": 3.0227,
      "step": 122228
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9270834922790527,
      "learning_rate": 0.00027129754453483187,
      "loss": 3.2801,
      "step": 122229
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4155917167663574,
      "learning_rate": 0.0002712934727402118,
      "loss": 2.873,
      "step": 122230
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7895876169204712,
      "learning_rate": 0.000271289400950929,
      "loss": 3.0637,
      "step": 122231
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5787973403930664,
      "learning_rate": 0.0002712853291669839,
      "loss": 2.8836,
      "step": 122232
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9765353202819824,
      "learning_rate": 0.0002712812573883774,
      "loss": 2.876,
      "step": 122233
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1381266117095947,
      "learning_rate": 0.00027127718561511025,
      "loss": 2.9501,
      "step": 122234
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.206172227859497,
      "learning_rate": 0.0002712731138471832,
      "loss": 3.0531,
      "step": 122235
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.535949230194092,
      "learning_rate": 0.000271269042084597,
      "loss": 2.8688,
      "step": 122236
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2182180881500244,
      "learning_rate": 0.0002712649703273526,
      "loss": 2.9489,
      "step": 122237
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.095900297164917,
      "learning_rate": 0.00027126089857545043,
      "loss": 3.0621,
      "step": 122238
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.302868127822876,
      "learning_rate": 0.0002712568268288914,
      "loss": 2.901,
      "step": 122239
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.781201720237732,
      "learning_rate": 0.0002712527550876763,
      "loss": 3.0137,
      "step": 122240
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.6794722080230713,
      "learning_rate": 0.00027124868335180583,
      "loss": 2.8306,
      "step": 122241
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.810246467590332,
      "learning_rate": 0.00027124461162128077,
      "loss": 3.1232,
      "step": 122242
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.143498182296753,
      "learning_rate": 0.0002712405398961019,
      "loss": 2.9018,
      "step": 122243
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.093160629272461,
      "learning_rate": 0.0002712364681762701,
      "loss": 2.9557,
      "step": 122244
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8134552240371704,
      "learning_rate": 0.0002712323964617858,
      "loss": 2.9965,
      "step": 122245
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.254446506500244,
      "learning_rate": 0.00027122832475265,
      "loss": 3.1471,
      "step": 122246
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7837060689926147,
      "learning_rate": 0.00027122425304886336,
      "loss": 3.0767,
      "step": 122247
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1111631393432617,
      "learning_rate": 0.00027122018135042667,
      "loss": 2.9682,
      "step": 122248
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.996568202972412,
      "learning_rate": 0.00027121610965734073,
      "loss": 2.9647,
      "step": 122249
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.354033946990967,
      "learning_rate": 0.00027121203796960636,
      "loss": 3.0553,
      "step": 122250
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1214210987091064,
      "learning_rate": 0.00027120796628722404,
      "loss": 2.9195,
      "step": 122251
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.464412212371826,
      "learning_rate": 0.0002712038946101948,
      "loss": 3.0559,
      "step": 122252
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9982823133468628,
      "learning_rate": 0.00027119982293851927,
      "loss": 3.2043,
      "step": 122253
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3895299434661865,
      "learning_rate": 0.00027119575127219816,
      "loss": 2.8499,
      "step": 122254
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1133298873901367,
      "learning_rate": 0.00027119167961123236,
      "loss": 3.0157,
      "step": 122255
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1405222415924072,
      "learning_rate": 0.0002711876079556227,
      "loss": 2.938,
      "step": 122256
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8951128721237183,
      "learning_rate": 0.00027118353630536967,
      "loss": 2.9982,
      "step": 122257
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2749736309051514,
      "learning_rate": 0.0002711794646604742,
      "loss": 2.9947,
      "step": 122258
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2107439041137695,
      "learning_rate": 0.000271175393020937,
      "loss": 3.0801,
      "step": 122259
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.34600830078125,
      "learning_rate": 0.0002711713213867588,
      "loss": 3.0272,
      "step": 122260
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.479424238204956,
      "learning_rate": 0.0002711672497579404,
      "loss": 2.8443,
      "step": 122261
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8736528158187866,
      "learning_rate": 0.00027116317813448274,
      "loss": 3.0122,
      "step": 122262
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4445364475250244,
      "learning_rate": 0.0002711591065163862,
      "loss": 2.9292,
      "step": 122263
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8269497156143188,
      "learning_rate": 0.0002711550349036518,
      "loss": 2.9011,
      "step": 122264
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5575621128082275,
      "learning_rate": 0.0002711509632962801,
      "loss": 2.7844,
      "step": 122265
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.684081792831421,
      "learning_rate": 0.00027114689169427207,
      "loss": 3.2354,
      "step": 122266
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3110580444335938,
      "learning_rate": 0.00027114282009762836,
      "loss": 2.9297,
      "step": 122267
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.088042974472046,
      "learning_rate": 0.00027113874850634987,
      "loss": 2.7978,
      "step": 122268
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7486311197280884,
      "learning_rate": 0.0002711346769204371,
      "loss": 2.9359,
      "step": 122269
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8846120834350586,
      "learning_rate": 0.0002711306053398909,
      "loss": 2.8075,
      "step": 122270
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.759527564048767,
      "learning_rate": 0.0002711265337647122,
      "loss": 3.1457,
      "step": 122271
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9004615545272827,
      "learning_rate": 0.0002711224621949015,
      "loss": 3.0295,
      "step": 122272
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.17698335647583,
      "learning_rate": 0.0002711183906304597,
      "loss": 2.8579,
      "step": 122273
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.33894419670105,
      "learning_rate": 0.0002711143190713876,
      "loss": 2.9506,
      "step": 122274
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.364201545715332,
      "learning_rate": 0.0002711102475176859,
      "loss": 3.1706,
      "step": 122275
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5610270500183105,
      "learning_rate": 0.0002711061759693553,
      "loss": 2.9713,
      "step": 122276
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2886805534362793,
      "learning_rate": 0.0002711021044263966,
      "loss": 2.8994,
      "step": 122277
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9891648292541504,
      "learning_rate": 0.00027109803288881054,
      "loss": 2.8289,
      "step": 122278
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.319892406463623,
      "learning_rate": 0.0002710939613565979,
      "loss": 2.7212,
      "step": 122279
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.985877513885498,
      "learning_rate": 0.0002710898898297596,
      "loss": 2.7301,
      "step": 122280
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1992688179016113,
      "learning_rate": 0.0002710858183082961,
      "loss": 3.129,
      "step": 122281
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8912702798843384,
      "learning_rate": 0.0002710817467922083,
      "loss": 3.1015,
      "step": 122282
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.9934446811676025,
      "learning_rate": 0.00027107767528149695,
      "loss": 2.9842,
      "step": 122283
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8069525957107544,
      "learning_rate": 0.0002710736037761628,
      "loss": 2.8831,
      "step": 122284
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.697063446044922,
      "learning_rate": 0.0002710695322762066,
      "loss": 3.208,
      "step": 122285
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7006241083145142,
      "learning_rate": 0.0002710654607816292,
      "loss": 2.837,
      "step": 122286
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.949392557144165,
      "learning_rate": 0.0002710613892924312,
      "loss": 2.9359,
      "step": 122287
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.55208420753479,
      "learning_rate": 0.0002710573178086134,
      "loss": 3.2699,
      "step": 122288
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.109127998352051,
      "learning_rate": 0.0002710532463301767,
      "loss": 3.1117,
      "step": 122289
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8734463453292847,
      "learning_rate": 0.00027104917485712166,
      "loss": 2.9728,
      "step": 122290
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4499502182006836,
      "learning_rate": 0.00027104510338944916,
      "loss": 3.064,
      "step": 122291
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2092831134796143,
      "learning_rate": 0.00027104103192716,
      "loss": 2.9791,
      "step": 122292
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.6682286262512207,
      "learning_rate": 0.00027103696047025476,
      "loss": 3.1886,
      "step": 122293
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.73819637298584,
      "learning_rate": 0.00027103288901873427,
      "loss": 2.712,
      "step": 122294
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5057570934295654,
      "learning_rate": 0.00027102881757259934,
      "loss": 3.0586,
      "step": 122295
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8695600032806396,
      "learning_rate": 0.0002710247461318507,
      "loss": 2.9508,
      "step": 122296
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.113136053085327,
      "learning_rate": 0.0002710206746964892,
      "loss": 3.0026,
      "step": 122297
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.6516450643539429,
      "learning_rate": 0.00027101660326651547,
      "loss": 2.853,
      "step": 122298
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2099356651306152,
      "learning_rate": 0.0002710125318419302,
      "loss": 2.9666,
      "step": 122299
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.784175157546997,
      "learning_rate": 0.00027100846042273427,
      "loss": 2.8557,
      "step": 122300
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.99811851978302,
      "learning_rate": 0.00027100438900892846,
      "loss": 2.6957,
      "step": 122301
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.986685037612915,
      "learning_rate": 0.00027100031760051346,
      "loss": 3.0169,
      "step": 122302
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.873509407043457,
      "learning_rate": 0.00027099624619749004,
      "loss": 3.0952,
      "step": 122303
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0107154846191406,
      "learning_rate": 0.0002709921747998591,
      "loss": 2.9794,
      "step": 122304
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.465606927871704,
      "learning_rate": 0.0002709881034076211,
      "loss": 3.1783,
      "step": 122305
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.784958839416504,
      "learning_rate": 0.00027098403202077703,
      "loss": 3.0246,
      "step": 122306
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8392857313156128,
      "learning_rate": 0.0002709799606393275,
      "loss": 2.6873,
      "step": 122307
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.039156913757324,
      "learning_rate": 0.00027097588926327344,
      "loss": 3.0877,
      "step": 122308
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1445271968841553,
      "learning_rate": 0.0002709718178926154,
      "loss": 2.8934,
      "step": 122309
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1019089221954346,
      "learning_rate": 0.0002709677465273543,
      "loss": 3.061,
      "step": 122310
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7913508415222168,
      "learning_rate": 0.00027096367516749097,
      "loss": 3.1666,
      "step": 122311
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.830763816833496,
      "learning_rate": 0.0002709596038130259,
      "loss": 2.9544,
      "step": 122312
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8390148878097534,
      "learning_rate": 0.00027095553246396005,
      "loss": 2.86,
      "step": 122313
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4557945728302,
      "learning_rate": 0.00027095146112029404,
      "loss": 2.9437,
      "step": 122314
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7490986585617065,
      "learning_rate": 0.0002709473897820287,
      "loss": 2.9881,
      "step": 122315
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.9723243713378906,
      "learning_rate": 0.0002709433184491648,
      "loss": 2.9278,
      "step": 122316
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0816221237182617,
      "learning_rate": 0.0002709392471217033,
      "loss": 3.0257,
      "step": 122317
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3513312339782715,
      "learning_rate": 0.0002709351757996445,
      "loss": 3.0797,
      "step": 122318
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.351410150527954,
      "learning_rate": 0.00027093110448298945,
      "loss": 2.8307,
      "step": 122319
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.687053918838501,
      "learning_rate": 0.00027092703317173886,
      "loss": 2.9738,
      "step": 122320
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.170480966567993,
      "learning_rate": 0.00027092296186589346,
      "loss": 2.989,
      "step": 122321
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9693447351455688,
      "learning_rate": 0.000270918890565454,
      "loss": 2.711,
      "step": 122322
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.3843612670898438,
      "learning_rate": 0.0002709148192704215,
      "loss": 3.0104,
      "step": 122323
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.39245343208313,
      "learning_rate": 0.0002709107479807963,
      "loss": 3.0484,
      "step": 122324
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9718374013900757,
      "learning_rate": 0.0002709066766965793,
      "loss": 3.0094,
      "step": 122325
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.831904888153076,
      "learning_rate": 0.0002709026054177713,
      "loss": 3.1364,
      "step": 122326
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.294956922531128,
      "learning_rate": 0.0002708985341443731,
      "loss": 2.8652,
      "step": 122327
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1964666843414307,
      "learning_rate": 0.00027089446287638537,
      "loss": 2.6832,
      "step": 122328
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9898737668991089,
      "learning_rate": 0.0002708903916138091,
      "loss": 2.961,
      "step": 122329
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3838891983032227,
      "learning_rate": 0.0002708863203566447,
      "loss": 2.9432,
      "step": 122330
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2852041721343994,
      "learning_rate": 0.0002708822491048931,
      "loss": 2.9582,
      "step": 122331
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8701633214950562,
      "learning_rate": 0.000270878177858555,
      "loss": 3.1587,
      "step": 122332
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.738144040107727,
      "learning_rate": 0.0002708741066176312,
      "loss": 3.2387,
      "step": 122333
    },
    {
      "epoch": 1.59,
      "grad_norm": 4.1216583251953125,
      "learning_rate": 0.00027087003538212246,
      "loss": 2.928,
      "step": 122334
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.465341806411743,
      "learning_rate": 0.00027086596415202966,
      "loss": 2.9653,
      "step": 122335
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.587641954421997,
      "learning_rate": 0.0002708618929273533,
      "loss": 3.0607,
      "step": 122336
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.579638123512268,
      "learning_rate": 0.00027085782170809425,
      "loss": 2.9182,
      "step": 122337
    },
    {
      "epoch": 1.59,
      "grad_norm": 4.5075507164001465,
      "learning_rate": 0.0002708537504942533,
      "loss": 2.895,
      "step": 122338
    },
    {
      "epoch": 1.59,
      "grad_norm": 4.744349002838135,
      "learning_rate": 0.0002708496792858312,
      "loss": 2.955,
      "step": 122339
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.745148181915283,
      "learning_rate": 0.0002708456080828287,
      "loss": 3.0924,
      "step": 122340
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.303466320037842,
      "learning_rate": 0.0002708415368852467,
      "loss": 3.0296,
      "step": 122341
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4517831802368164,
      "learning_rate": 0.0002708374656930856,
      "loss": 2.696,
      "step": 122342
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.8266308307647705,
      "learning_rate": 0.00027083339450634643,
      "loss": 3.1953,
      "step": 122343
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.9082634449005127,
      "learning_rate": 0.00027082932332502983,
      "loss": 2.8238,
      "step": 122344
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.549320697784424,
      "learning_rate": 0.0002708252521491366,
      "loss": 3.0679,
      "step": 122345
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3258395195007324,
      "learning_rate": 0.0002708211809786676,
      "loss": 3.01,
      "step": 122346
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3513054847717285,
      "learning_rate": 0.00027081710981362355,
      "loss": 3.0247,
      "step": 122347
    },
    {
      "epoch": 1.59,
      "grad_norm": 4.533624649047852,
      "learning_rate": 0.00027081303865400503,
      "loss": 3.0196,
      "step": 122348
    },
    {
      "epoch": 1.59,
      "grad_norm": 4.1434125900268555,
      "learning_rate": 0.0002708089674998129,
      "loss": 3.1067,
      "step": 122349
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4355406761169434,
      "learning_rate": 0.000270804896351048,
      "loss": 3.053,
      "step": 122350
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0174002647399902,
      "learning_rate": 0.00027080082520771094,
      "loss": 3.0647,
      "step": 122351
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8162178993225098,
      "learning_rate": 0.00027079675406980265,
      "loss": 3.1239,
      "step": 122352
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.977383613586426,
      "learning_rate": 0.00027079268293732383,
      "loss": 3.0729,
      "step": 122353
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.264023780822754,
      "learning_rate": 0.00027078861181027513,
      "loss": 3.0179,
      "step": 122354
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.009026527404785,
      "learning_rate": 0.00027078454068865735,
      "loss": 3.0647,
      "step": 122355
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.1231491565704346,
      "learning_rate": 0.0002707804695724713,
      "loss": 2.8128,
      "step": 122356
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.208101511001587,
      "learning_rate": 0.0002707763984617177,
      "loss": 3.3544,
      "step": 122357
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.907353401184082,
      "learning_rate": 0.0002707723273563973,
      "loss": 2.9321,
      "step": 122358
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0117814540863037,
      "learning_rate": 0.0002707682562565111,
      "loss": 2.9058,
      "step": 122359
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2199385166168213,
      "learning_rate": 0.0002707641851620594,
      "loss": 2.9912,
      "step": 122360
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8260447978973389,
      "learning_rate": 0.00027076011407304316,
      "loss": 2.8646,
      "step": 122361
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.6648941040039062,
      "learning_rate": 0.00027075604298946326,
      "loss": 2.9184,
      "step": 122362
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7212222814559937,
      "learning_rate": 0.0002707519719113203,
      "loss": 3.0148,
      "step": 122363
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9190022945404053,
      "learning_rate": 0.00027074790083861513,
      "loss": 3.2181,
      "step": 122364
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.919143795967102,
      "learning_rate": 0.0002707438297713486,
      "loss": 2.9614,
      "step": 122365
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4777581691741943,
      "learning_rate": 0.0002707397587095212,
      "loss": 3.0887,
      "step": 122366
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2342193126678467,
      "learning_rate": 0.0002707356876531339,
      "loss": 2.8287,
      "step": 122367
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8404682874679565,
      "learning_rate": 0.0002707316166021873,
      "loss": 2.9015,
      "step": 122368
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.6040055751800537,
      "learning_rate": 0.0002707275455566823,
      "loss": 2.7813,
      "step": 122369
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1625919342041016,
      "learning_rate": 0.00027072347451661963,
      "loss": 2.9026,
      "step": 122370
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.253483295440674,
      "learning_rate": 0.00027071940348200003,
      "loss": 2.9463,
      "step": 122371
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1928796768188477,
      "learning_rate": 0.00027071533245282415,
      "loss": 3.0322,
      "step": 122372
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.089353084564209,
      "learning_rate": 0.0002707112614290929,
      "loss": 3.0826,
      "step": 122373
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.493971347808838,
      "learning_rate": 0.00027070719041080704,
      "loss": 2.8518,
      "step": 122374
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8022093772888184,
      "learning_rate": 0.0002707031193979672,
      "loss": 3.1573,
      "step": 122375
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8973361253738403,
      "learning_rate": 0.00027069904839057416,
      "loss": 2.8876,
      "step": 122376
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.6244460344314575,
      "learning_rate": 0.0002706949773886288,
      "loss": 3.0257,
      "step": 122377
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.894850730895996,
      "learning_rate": 0.0002706909063921318,
      "loss": 3.0629,
      "step": 122378
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9403101205825806,
      "learning_rate": 0.00027068683540108383,
      "loss": 3.0314,
      "step": 122379
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.688408136367798,
      "learning_rate": 0.0002706827644154858,
      "loss": 2.9569,
      "step": 122380
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.6670963764190674,
      "learning_rate": 0.0002706786934353384,
      "loss": 2.8714,
      "step": 122381
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0243451595306396,
      "learning_rate": 0.0002706746224606424,
      "loss": 2.8547,
      "step": 122382
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.135175943374634,
      "learning_rate": 0.00027067055149139843,
      "loss": 2.9662,
      "step": 122383
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4368176460266113,
      "learning_rate": 0.0002706664805276075,
      "loss": 2.9428,
      "step": 122384
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.749523878097534,
      "learning_rate": 0.0002706624095692701,
      "loss": 2.7841,
      "step": 122385
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5798158645629883,
      "learning_rate": 0.0002706583386163872,
      "loss": 2.9813,
      "step": 122386
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2084813117980957,
      "learning_rate": 0.0002706542676689594,
      "loss": 3.0404,
      "step": 122387
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0307931900024414,
      "learning_rate": 0.0002706501967269876,
      "loss": 2.9319,
      "step": 122388
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.388148784637451,
      "learning_rate": 0.0002706461257904724,
      "loss": 2.6822,
      "step": 122389
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.669178009033203,
      "learning_rate": 0.0002706420548594148,
      "loss": 2.9713,
      "step": 122390
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.97701358795166,
      "learning_rate": 0.00027063798393381523,
      "loss": 3.1126,
      "step": 122391
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8441120386123657,
      "learning_rate": 0.00027063391301367464,
      "loss": 2.6621,
      "step": 122392
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8177772760391235,
      "learning_rate": 0.00027062984209899377,
      "loss": 3.042,
      "step": 122393
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.5410711765289307,
      "learning_rate": 0.00027062577118977337,
      "loss": 3.306,
      "step": 122394
    },
    {
      "epoch": 1.59,
      "grad_norm": 6.728127956390381,
      "learning_rate": 0.0002706217002860142,
      "loss": 2.7749,
      "step": 122395
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.566220760345459,
      "learning_rate": 0.0002706176293877171,
      "loss": 3.0494,
      "step": 122396
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.234306573867798,
      "learning_rate": 0.0002706135584948826,
      "loss": 2.8329,
      "step": 122397
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.80342173576355,
      "learning_rate": 0.00027060948760751166,
      "loss": 2.8709,
      "step": 122398
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.0142037868499756,
      "learning_rate": 0.00027060541672560497,
      "loss": 3.188,
      "step": 122399
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8178588151931763,
      "learning_rate": 0.00027060134584916325,
      "loss": 2.976,
      "step": 122400
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.444655179977417,
      "learning_rate": 0.00027059727497818727,
      "loss": 3.0639,
      "step": 122401
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1395139694213867,
      "learning_rate": 0.000270593204112678,
      "loss": 2.9492,
      "step": 122402
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.8948488235473633,
      "learning_rate": 0.0002705891332526358,
      "loss": 2.9063,
      "step": 122403
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0064151287078857,
      "learning_rate": 0.00027058506239806176,
      "loss": 2.7871,
      "step": 122404
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1825006008148193,
      "learning_rate": 0.0002705809915489564,
      "loss": 3.0903,
      "step": 122405
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7204416990280151,
      "learning_rate": 0.00027057692070532065,
      "loss": 2.8632,
      "step": 122406
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3391318321228027,
      "learning_rate": 0.0002705728498671552,
      "loss": 2.9328,
      "step": 122407
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3885111808776855,
      "learning_rate": 0.0002705687790344609,
      "loss": 2.9345,
      "step": 122408
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.0991508960723877,
      "learning_rate": 0.00027056470820723834,
      "loss": 2.9651,
      "step": 122409
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.5277254581451416,
      "learning_rate": 0.0002705606373854883,
      "loss": 2.897,
      "step": 122410
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.823913812637329,
      "learning_rate": 0.00027055656656921166,
      "loss": 3.2044,
      "step": 122411
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.644568920135498,
      "learning_rate": 0.00027055249575840906,
      "loss": 2.8788,
      "step": 122412
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0587594509124756,
      "learning_rate": 0.0002705484249530813,
      "loss": 2.9728,
      "step": 122413
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.029264450073242,
      "learning_rate": 0.0002705443541532293,
      "loss": 3.1423,
      "step": 122414
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2570083141326904,
      "learning_rate": 0.0002705402833588535,
      "loss": 3.0686,
      "step": 122415
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5334789752960205,
      "learning_rate": 0.0002705362125699549,
      "loss": 2.8258,
      "step": 122416
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9841903448104858,
      "learning_rate": 0.0002705321417865341,
      "loss": 3.1149,
      "step": 122417
    },
    {
      "epoch": 1.59,
      "grad_norm": 4.218832969665527,
      "learning_rate": 0.00027052807100859193,
      "loss": 2.824,
      "step": 122418
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.876070499420166,
      "learning_rate": 0.0002705240002361292,
      "loss": 3.0539,
      "step": 122419
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.9981565475463867,
      "learning_rate": 0.0002705199294691467,
      "loss": 2.9123,
      "step": 122420
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8664660453796387,
      "learning_rate": 0.000270515858707645,
      "loss": 2.9616,
      "step": 122421
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.1882307529449463,
      "learning_rate": 0.0002705117879516249,
      "loss": 2.9034,
      "step": 122422
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.5642192363739014,
      "learning_rate": 0.0002705077172010873,
      "loss": 2.9849,
      "step": 122423
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.153881072998047,
      "learning_rate": 0.0002705036464560328,
      "loss": 2.7581,
      "step": 122424
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.068039894104004,
      "learning_rate": 0.0002704995757164623,
      "loss": 3.1619,
      "step": 122425
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.675657272338867,
      "learning_rate": 0.0002704955049823766,
      "loss": 2.9711,
      "step": 122426
    },
    {
      "epoch": 1.59,
      "grad_norm": 4.232056140899658,
      "learning_rate": 0.00027049143425377615,
      "loss": 2.8861,
      "step": 122427
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.852705478668213,
      "learning_rate": 0.0002704873635306619,
      "loss": 2.9271,
      "step": 122428
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7210652828216553,
      "learning_rate": 0.0002704832928130347,
      "loss": 2.9213,
      "step": 122429
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.965064287185669,
      "learning_rate": 0.00027047922210089514,
      "loss": 3.245,
      "step": 122430
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.139291286468506,
      "learning_rate": 0.0002704751513942441,
      "loss": 2.6266,
      "step": 122431
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2091941833496094,
      "learning_rate": 0.00027047108069308236,
      "loss": 2.8563,
      "step": 122432
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3471577167510986,
      "learning_rate": 0.0002704670099974105,
      "loss": 2.7534,
      "step": 122433
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8216644525527954,
      "learning_rate": 0.0002704629393072294,
      "loss": 3.1078,
      "step": 122434
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1401331424713135,
      "learning_rate": 0.00027045886862253973,
      "loss": 3.0217,
      "step": 122435
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.222752571105957,
      "learning_rate": 0.00027045479794334236,
      "loss": 2.8007,
      "step": 122436
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.19488263130188,
      "learning_rate": 0.000270450727269638,
      "loss": 3.1937,
      "step": 122437
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.147629499435425,
      "learning_rate": 0.00027044665660142756,
      "loss": 3.0499,
      "step": 122438
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3186419010162354,
      "learning_rate": 0.0002704425859387115,
      "loss": 2.9948,
      "step": 122439
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.039344310760498,
      "learning_rate": 0.0002704385152814907,
      "loss": 2.9836,
      "step": 122440
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.09615421295166,
      "learning_rate": 0.00027043444462976595,
      "loss": 2.9021,
      "step": 122441
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.864294409751892,
      "learning_rate": 0.000270430373983538,
      "loss": 2.9343,
      "step": 122442
    },
    {
      "epoch": 1.59,
      "grad_norm": 4.153250217437744,
      "learning_rate": 0.00027042630334280757,
      "loss": 3.1438,
      "step": 122443
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0660157203674316,
      "learning_rate": 0.00027042223270757547,
      "loss": 3.0723,
      "step": 122444
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8126593828201294,
      "learning_rate": 0.00027041816207784255,
      "loss": 2.9731,
      "step": 122445
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9099644422531128,
      "learning_rate": 0.0002704140914536094,
      "loss": 2.9193,
      "step": 122446
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7344430685043335,
      "learning_rate": 0.00027041002083487675,
      "loss": 3.0621,
      "step": 122447
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9099317789077759,
      "learning_rate": 0.00027040595022164544,
      "loss": 2.8423,
      "step": 122448
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1663126945495605,
      "learning_rate": 0.00027040187961391624,
      "loss": 2.9962,
      "step": 122449
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9450057744979858,
      "learning_rate": 0.0002703978090116899,
      "loss": 3.1209,
      "step": 122450
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5031096935272217,
      "learning_rate": 0.0002703937384149673,
      "loss": 2.9234,
      "step": 122451
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.044438600540161,
      "learning_rate": 0.0002703896678237489,
      "loss": 2.9887,
      "step": 122452
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2254245281219482,
      "learning_rate": 0.00027038559723803566,
      "loss": 3.0932,
      "step": 122453
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0614421367645264,
      "learning_rate": 0.0002703815266578283,
      "loss": 3.2067,
      "step": 122454
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.154879093170166,
      "learning_rate": 0.0002703774560831275,
      "loss": 2.9555,
      "step": 122455
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9515737295150757,
      "learning_rate": 0.0002703733855139341,
      "loss": 2.8435,
      "step": 122456
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.8629748821258545,
      "learning_rate": 0.000270369314950249,
      "loss": 2.9908,
      "step": 122457
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9190163612365723,
      "learning_rate": 0.00027036524439207266,
      "loss": 2.957,
      "step": 122458
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.6905953884124756,
      "learning_rate": 0.0002703611738394061,
      "loss": 2.9301,
      "step": 122459
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.707569122314453,
      "learning_rate": 0.00027035710329224985,
      "loss": 2.8801,
      "step": 122460
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9774038791656494,
      "learning_rate": 0.0002703530327506048,
      "loss": 3.0024,
      "step": 122461
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.124492883682251,
      "learning_rate": 0.0002703489622144717,
      "loss": 3.1399,
      "step": 122462
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8895392417907715,
      "learning_rate": 0.0002703448916838513,
      "loss": 2.896,
      "step": 122463
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8190693855285645,
      "learning_rate": 0.0002703408211587443,
      "loss": 2.8302,
      "step": 122464
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0100576877593994,
      "learning_rate": 0.0002703367506391515,
      "loss": 2.8595,
      "step": 122465
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.5377402305603027,
      "learning_rate": 0.00027033268012507375,
      "loss": 3.0239,
      "step": 122466
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.0447347164154053,
      "learning_rate": 0.0002703286096165116,
      "loss": 2.9657,
      "step": 122467
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2921276092529297,
      "learning_rate": 0.00027032453911346596,
      "loss": 3.0503,
      "step": 122468
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9084818363189697,
      "learning_rate": 0.0002703204686159376,
      "loss": 3.0884,
      "step": 122469
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8653926849365234,
      "learning_rate": 0.00027031639812392715,
      "loss": 3.0537,
      "step": 122470
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.4548590183258057,
      "learning_rate": 0.00027031232763743545,
      "loss": 2.911,
      "step": 122471
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9215203523635864,
      "learning_rate": 0.00027030825715646325,
      "loss": 2.9353,
      "step": 122472
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.678941249847412,
      "learning_rate": 0.0002703041866810114,
      "loss": 2.992,
      "step": 122473
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7753245830535889,
      "learning_rate": 0.00027030011621108043,
      "loss": 3.0626,
      "step": 122474
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2279560565948486,
      "learning_rate": 0.0002702960457466713,
      "loss": 2.9246,
      "step": 122475
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.1122689247131348,
      "learning_rate": 0.0002702919752877847,
      "loss": 2.939,
      "step": 122476
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2097983360290527,
      "learning_rate": 0.00027028790483442133,
      "loss": 2.7812,
      "step": 122477
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.051544427871704,
      "learning_rate": 0.00027028383438658203,
      "loss": 3.3081,
      "step": 122478
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2111449241638184,
      "learning_rate": 0.00027027976394426755,
      "loss": 2.9145,
      "step": 122479
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2979750633239746,
      "learning_rate": 0.00027027569350747865,
      "loss": 3.1851,
      "step": 122480
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9815648794174194,
      "learning_rate": 0.00027027162307621604,
      "loss": 3.0731,
      "step": 122481
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.808749198913574,
      "learning_rate": 0.0002702675526504804,
      "loss": 3.0732,
      "step": 122482
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9635703563690186,
      "learning_rate": 0.00027026348223027267,
      "loss": 2.972,
      "step": 122483
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.124037265777588,
      "learning_rate": 0.0002702594118155935,
      "loss": 3.054,
      "step": 122484
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.787708282470703,
      "learning_rate": 0.00027025534140644365,
      "loss": 2.9029,
      "step": 122485
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.9251761436462402,
      "learning_rate": 0.0002702512710028239,
      "loss": 3.2708,
      "step": 122486
    },
    {
      "epoch": 1.59,
      "grad_norm": 3.3515703678131104,
      "learning_rate": 0.0002702472006047351,
      "loss": 3.1206,
      "step": 122487
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.6864826679229736,
      "learning_rate": 0.0002702431302121778,
      "loss": 2.9737,
      "step": 122488
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.2035887241363525,
      "learning_rate": 0.0002702390598251529,
      "loss": 2.9458,
      "step": 122489
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.3295342922210693,
      "learning_rate": 0.00027023498944366107,
      "loss": 2.669,
      "step": 122490
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.022123336791992,
      "learning_rate": 0.0002702309190677031,
      "loss": 2.864,
      "step": 122491
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.9455150365829468,
      "learning_rate": 0.00027022684869727986,
      "loss": 3.1762,
      "step": 122492
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.157883644104004,
      "learning_rate": 0.00027022277833239206,
      "loss": 3.0326,
      "step": 122493
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.7442249059677124,
      "learning_rate": 0.0002702187079730403,
      "loss": 2.9331,
      "step": 122494
    },
    {
      "epoch": 1.59,
      "grad_norm": 1.8261646032333374,
      "learning_rate": 0.00027021463761922544,
      "loss": 2.8174,
      "step": 122495
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.150799036026001,
      "learning_rate": 0.00027021056727094826,
      "loss": 3.2019,
      "step": 122496
    },
    {
      "epoch": 1.59,
      "grad_norm": 2.65136456489563,
      "learning_rate": 0.00027020649692820946,
      "loss": 2.8724,
      "step": 122497
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3530895709991455,
      "learning_rate": 0.00027020242659100984,
      "loss": 3.1639,
      "step": 122498
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.6512584686279297,
      "learning_rate": 0.0002701983562593503,
      "loss": 2.9249,
      "step": 122499
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2373054027557373,
      "learning_rate": 0.0002701942859332313,
      "loss": 3.0908,
      "step": 122500
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.274158477783203,
      "learning_rate": 0.0002701902156126538,
      "loss": 3.057,
      "step": 122501
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8176376819610596,
      "learning_rate": 0.00027018614529761845,
      "loss": 3.2372,
      "step": 122502
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.157564163208008,
      "learning_rate": 0.00027018207498812606,
      "loss": 2.8367,
      "step": 122503
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.063534736633301,
      "learning_rate": 0.0002701780046841774,
      "loss": 2.798,
      "step": 122504
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8767306804656982,
      "learning_rate": 0.00027017393438577336,
      "loss": 3.0581,
      "step": 122505
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7656869888305664,
      "learning_rate": 0.00027016986409291436,
      "loss": 3.2284,
      "step": 122506
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.081371784210205,
      "learning_rate": 0.0002701657938056014,
      "loss": 2.9319,
      "step": 122507
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.5831286907196045,
      "learning_rate": 0.00027016172352383513,
      "loss": 3.0242,
      "step": 122508
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8492788076400757,
      "learning_rate": 0.0002701576532476164,
      "loss": 2.9355,
      "step": 122509
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.004457950592041,
      "learning_rate": 0.00027015358297694593,
      "loss": 3.1785,
      "step": 122510
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2717323303222656,
      "learning_rate": 0.00027014951271182446,
      "loss": 3.1239,
      "step": 122511
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.191816806793213,
      "learning_rate": 0.00027014544245225285,
      "loss": 3.1115,
      "step": 122512
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8184822797775269,
      "learning_rate": 0.0002701413721982317,
      "loss": 3.4254,
      "step": 122513
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.777594566345215,
      "learning_rate": 0.0002701373019497618,
      "loss": 3.0968,
      "step": 122514
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.063178300857544,
      "learning_rate": 0.00027013323170684396,
      "loss": 3.2906,
      "step": 122515
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3928911685943604,
      "learning_rate": 0.0002701291614694789,
      "loss": 3.0257,
      "step": 122516
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1049046516418457,
      "learning_rate": 0.0002701250912376674,
      "loss": 2.8782,
      "step": 122517
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0165579319000244,
      "learning_rate": 0.00027012102101141034,
      "loss": 2.9978,
      "step": 122518
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.904775857925415,
      "learning_rate": 0.0002701169507907082,
      "loss": 3.0797,
      "step": 122519
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9843003749847412,
      "learning_rate": 0.0002701128805755619,
      "loss": 2.886,
      "step": 122520
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.4253158569335938,
      "learning_rate": 0.00027010881036597216,
      "loss": 3.3221,
      "step": 122521
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.318147897720337,
      "learning_rate": 0.0002701047401619398,
      "loss": 3.0392,
      "step": 122522
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.187798261642456,
      "learning_rate": 0.00027010066996346546,
      "loss": 2.8916,
      "step": 122523
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.106175661087036,
      "learning_rate": 0.0002700965997705501,
      "loss": 3.2177,
      "step": 122524
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1634433269500732,
      "learning_rate": 0.00027009252958319434,
      "loss": 3.3416,
      "step": 122525
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.904451847076416,
      "learning_rate": 0.00027008845940139885,
      "loss": 2.798,
      "step": 122526
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9685765504837036,
      "learning_rate": 0.00027008438922516444,
      "loss": 3.0391,
      "step": 122527
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9613221883773804,
      "learning_rate": 0.000270080319054492,
      "loss": 2.8225,
      "step": 122528
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.068402051925659,
      "learning_rate": 0.00027007624888938215,
      "loss": 2.9794,
      "step": 122529
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.5797648429870605,
      "learning_rate": 0.00027007217872983583,
      "loss": 2.8292,
      "step": 122530
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.3731303215026855,
      "learning_rate": 0.0002700681085758535,
      "loss": 2.9689,
      "step": 122531
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.824720621109009,
      "learning_rate": 0.00027006403842743613,
      "loss": 2.9356,
      "step": 122532
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.020085573196411,
      "learning_rate": 0.00027005996828458436,
      "loss": 3.0011,
      "step": 122533
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9815186262130737,
      "learning_rate": 0.000270055898147299,
      "loss": 2.9765,
      "step": 122534
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8471235036849976,
      "learning_rate": 0.00027005182801558084,
      "loss": 2.9692,
      "step": 122535
    },
    {
      "epoch": 1.6,
      "grad_norm": 4.017243385314941,
      "learning_rate": 0.0002700477578894308,
      "loss": 3.0073,
      "step": 122536
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.8337929248809814,
      "learning_rate": 0.0002700436877688492,
      "loss": 2.9569,
      "step": 122537
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.046147108078003,
      "learning_rate": 0.00027003961765383717,
      "loss": 3.1418,
      "step": 122538
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.902379035949707,
      "learning_rate": 0.00027003554754439525,
      "loss": 2.8524,
      "step": 122539
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1091842651367188,
      "learning_rate": 0.0002700314774405243,
      "loss": 3.0868,
      "step": 122540
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.116736888885498,
      "learning_rate": 0.0002700274073422251,
      "loss": 3.3879,
      "step": 122541
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0042777061462402,
      "learning_rate": 0.0002700233372494984,
      "loss": 2.9768,
      "step": 122542
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.4323959350585938,
      "learning_rate": 0.0002700192671623449,
      "loss": 3.1022,
      "step": 122543
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2565550804138184,
      "learning_rate": 0.0002700151970807654,
      "loss": 3.0194,
      "step": 122544
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9969172477722168,
      "learning_rate": 0.0002700111270047606,
      "loss": 3.1164,
      "step": 122545
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.172968864440918,
      "learning_rate": 0.0002700070569343313,
      "loss": 2.8018,
      "step": 122546
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8509832620620728,
      "learning_rate": 0.0002700029868694782,
      "loss": 3.0632,
      "step": 122547
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.4294116497039795,
      "learning_rate": 0.00026999891681020227,
      "loss": 2.6558,
      "step": 122548
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1908178329467773,
      "learning_rate": 0.000269994846756504,
      "loss": 2.9477,
      "step": 122549
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.6000125408172607,
      "learning_rate": 0.00026999077670838425,
      "loss": 3.1697,
      "step": 122550
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.4301161766052246,
      "learning_rate": 0.00026998670666584384,
      "loss": 3.0273,
      "step": 122551
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0116469860076904,
      "learning_rate": 0.00026998263662888344,
      "loss": 2.9196,
      "step": 122552
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0243875980377197,
      "learning_rate": 0.00026997856659750376,
      "loss": 2.8489,
      "step": 122553
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.7285728454589844,
      "learning_rate": 0.0002699744965717058,
      "loss": 2.8987,
      "step": 122554
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9717366695404053,
      "learning_rate": 0.00026997042655149,
      "loss": 3.0806,
      "step": 122555
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7527084350585938,
      "learning_rate": 0.0002699663565368573,
      "loss": 3.0364,
      "step": 122556
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.089398145675659,
      "learning_rate": 0.0002699622865278084,
      "loss": 2.8517,
      "step": 122557
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.19547963142395,
      "learning_rate": 0.0002699582165243442,
      "loss": 2.9651,
      "step": 122558
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.144662380218506,
      "learning_rate": 0.0002699541465264652,
      "loss": 3.0567,
      "step": 122559
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.270582437515259,
      "learning_rate": 0.00026995007653417237,
      "loss": 2.757,
      "step": 122560
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.502563714981079,
      "learning_rate": 0.00026994600654746637,
      "loss": 3.0684,
      "step": 122561
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.6386406421661377,
      "learning_rate": 0.0002699419365663479,
      "loss": 2.9551,
      "step": 122562
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3119022846221924,
      "learning_rate": 0.00026993786659081786,
      "loss": 3.14,
      "step": 122563
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9556549787521362,
      "learning_rate": 0.0002699337966208769,
      "loss": 2.8501,
      "step": 122564
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0378448963165283,
      "learning_rate": 0.0002699297266565259,
      "loss": 2.8696,
      "step": 122565
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.5082194805145264,
      "learning_rate": 0.0002699256566977655,
      "loss": 2.9286,
      "step": 122566
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7505451440811157,
      "learning_rate": 0.00026992158674459644,
      "loss": 2.8932,
      "step": 122567
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1240060329437256,
      "learning_rate": 0.00026991751679701956,
      "loss": 3.015,
      "step": 122568
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.9291677474975586,
      "learning_rate": 0.0002699134468550355,
      "loss": 3.0053,
      "step": 122569
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.9212989807128906,
      "learning_rate": 0.0002699093769186452,
      "loss": 2.8655,
      "step": 122570
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.697993516921997,
      "learning_rate": 0.00026990530698784923,
      "loss": 3.1184,
      "step": 122571
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.279003620147705,
      "learning_rate": 0.0002699012370626486,
      "loss": 3.1001,
      "step": 122572
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.7822394371032715,
      "learning_rate": 0.00026989716714304377,
      "loss": 2.6675,
      "step": 122573
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.974810004234314,
      "learning_rate": 0.0002698930972290356,
      "loss": 3.0446,
      "step": 122574
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7295747995376587,
      "learning_rate": 0.0002698890273206249,
      "loss": 3.0316,
      "step": 122575
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3044254779815674,
      "learning_rate": 0.00026988495741781237,
      "loss": 2.8953,
      "step": 122576
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.058748245239258,
      "learning_rate": 0.00026988088752059885,
      "loss": 3.1692,
      "step": 122577
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.5648112297058105,
      "learning_rate": 0.000269876817628985,
      "loss": 3.1372,
      "step": 122578
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.16035795211792,
      "learning_rate": 0.00026987274774297176,
      "loss": 2.981,
      "step": 122579
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.5404763221740723,
      "learning_rate": 0.0002698686778625596,
      "loss": 2.9814,
      "step": 122580
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.9265756607055664,
      "learning_rate": 0.0002698646079877494,
      "loss": 2.8527,
      "step": 122581
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3963377475738525,
      "learning_rate": 0.000269860538118542,
      "loss": 2.7975,
      "step": 122582
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.965340256690979,
      "learning_rate": 0.00026985646825493804,
      "loss": 2.8014,
      "step": 122583
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0572822093963623,
      "learning_rate": 0.00026985239839693833,
      "loss": 2.8617,
      "step": 122584
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8059154748916626,
      "learning_rate": 0.00026984832854454383,
      "loss": 2.9089,
      "step": 122585
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.4926252365112305,
      "learning_rate": 0.00026984425869775494,
      "loss": 3.1354,
      "step": 122586
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8557265996932983,
      "learning_rate": 0.0002698401888565725,
      "loss": 3.0416,
      "step": 122587
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.12964129447937,
      "learning_rate": 0.0002698361190209974,
      "loss": 3.0059,
      "step": 122588
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.010103225708008,
      "learning_rate": 0.0002698320491910303,
      "loss": 2.9375,
      "step": 122589
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0834391117095947,
      "learning_rate": 0.000269827979366672,
      "loss": 2.9772,
      "step": 122590
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.005258083343506,
      "learning_rate": 0.00026982390954792345,
      "loss": 2.9349,
      "step": 122591
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.573489189147949,
      "learning_rate": 0.000269819839734785,
      "loss": 3.0068,
      "step": 122592
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9634137153625488,
      "learning_rate": 0.0002698157699272576,
      "loss": 2.9177,
      "step": 122593
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.4224958419799805,
      "learning_rate": 0.00026981170012534204,
      "loss": 2.7374,
      "step": 122594
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.337463855743408,
      "learning_rate": 0.0002698076303290391,
      "loss": 2.8648,
      "step": 122595
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1926515102386475,
      "learning_rate": 0.00026980356053834945,
      "loss": 3.2329,
      "step": 122596
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.031663179397583,
      "learning_rate": 0.000269799490753274,
      "loss": 3.1605,
      "step": 122597
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9680973291397095,
      "learning_rate": 0.0002697954209738133,
      "loss": 3.2404,
      "step": 122598
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.38926362991333,
      "learning_rate": 0.00026979135119996816,
      "loss": 2.9045,
      "step": 122599
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0994606018066406,
      "learning_rate": 0.0002697872814317394,
      "loss": 2.8873,
      "step": 122600
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.559634804725647,
      "learning_rate": 0.00026978321166912776,
      "loss": 3.2209,
      "step": 122601
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.376621723175049,
      "learning_rate": 0.000269779141912134,
      "loss": 3.1536,
      "step": 122602
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.027395486831665,
      "learning_rate": 0.00026977507216075896,
      "loss": 2.9685,
      "step": 122603
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.055525541305542,
      "learning_rate": 0.00026977100241500323,
      "loss": 2.9011,
      "step": 122604
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8962112665176392,
      "learning_rate": 0.00026976693267486763,
      "loss": 3.0029,
      "step": 122605
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.5052974224090576,
      "learning_rate": 0.0002697628629403529,
      "loss": 3.0409,
      "step": 122606
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.216505527496338,
      "learning_rate": 0.00026975879321145983,
      "loss": 2.9075,
      "step": 122607
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.046374559402466,
      "learning_rate": 0.00026975472348818915,
      "loss": 3.1256,
      "step": 122608
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7154682874679565,
      "learning_rate": 0.0002697506537705418,
      "loss": 2.976,
      "step": 122609
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.6186764240264893,
      "learning_rate": 0.00026974658405851825,
      "loss": 2.8623,
      "step": 122610
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.574418067932129,
      "learning_rate": 0.0002697425143521194,
      "loss": 2.9563,
      "step": 122611
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7577226161956787,
      "learning_rate": 0.00026973844465134596,
      "loss": 3.0442,
      "step": 122612
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0302774906158447,
      "learning_rate": 0.0002697343749561987,
      "loss": 2.8996,
      "step": 122613
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.7745301723480225,
      "learning_rate": 0.0002697303052666784,
      "loss": 2.8979,
      "step": 122614
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0994508266448975,
      "learning_rate": 0.00026972623558278594,
      "loss": 3.295,
      "step": 122615
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.199902057647705,
      "learning_rate": 0.0002697221659045218,
      "loss": 3.0642,
      "step": 122616
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0154507160186768,
      "learning_rate": 0.0002697180962318869,
      "loss": 3.2009,
      "step": 122617
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.068779468536377,
      "learning_rate": 0.00026971402656488197,
      "loss": 2.585,
      "step": 122618
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.136381149291992,
      "learning_rate": 0.0002697099569035078,
      "loss": 3.0184,
      "step": 122619
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.672560930252075,
      "learning_rate": 0.0002697058872477651,
      "loss": 2.7663,
      "step": 122620
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8050868511199951,
      "learning_rate": 0.0002697018175976548,
      "loss": 2.5583,
      "step": 122621
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.6523734331130981,
      "learning_rate": 0.0002696977479531773,
      "loss": 3.1425,
      "step": 122622
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2636239528656006,
      "learning_rate": 0.0002696936783143336,
      "loss": 3.1083,
      "step": 122623
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1541004180908203,
      "learning_rate": 0.0002696896086811244,
      "loss": 2.9855,
      "step": 122624
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.588118076324463,
      "learning_rate": 0.0002696855390535505,
      "loss": 2.6089,
      "step": 122625
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8076245784759521,
      "learning_rate": 0.0002696814694316127,
      "loss": 2.9647,
      "step": 122626
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.711042881011963,
      "learning_rate": 0.0002696773998153117,
      "loss": 2.8329,
      "step": 122627
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7525938749313354,
      "learning_rate": 0.0002696733302046482,
      "loss": 2.8743,
      "step": 122628
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9020036458969116,
      "learning_rate": 0.000269669260599623,
      "loss": 2.7823,
      "step": 122629
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8129160404205322,
      "learning_rate": 0.00026966519100023677,
      "loss": 3.1091,
      "step": 122630
    },
    {
      "epoch": 1.6,
      "grad_norm": 4.903230667114258,
      "learning_rate": 0.0002696611214064904,
      "loss": 2.9913,
      "step": 122631
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9832504987716675,
      "learning_rate": 0.0002696570518183846,
      "loss": 3.0467,
      "step": 122632
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.982956886291504,
      "learning_rate": 0.0002696529822359202,
      "loss": 2.9066,
      "step": 122633
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1928508281707764,
      "learning_rate": 0.00026964891265909786,
      "loss": 3.1589,
      "step": 122634
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.671881914138794,
      "learning_rate": 0.00026964484308791836,
      "loss": 2.907,
      "step": 122635
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1285831928253174,
      "learning_rate": 0.0002696407735223824,
      "loss": 2.8559,
      "step": 122636
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.477985143661499,
      "learning_rate": 0.0002696367039624908,
      "loss": 2.6951,
      "step": 122637
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.0960488319396973,
      "learning_rate": 0.0002696326344082443,
      "loss": 3.058,
      "step": 122638
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.831393003463745,
      "learning_rate": 0.00026962856485964376,
      "loss": 2.96,
      "step": 122639
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9092271327972412,
      "learning_rate": 0.0002696244953166898,
      "loss": 3.0741,
      "step": 122640
    },
    {
      "epoch": 1.6,
      "grad_norm": 4.167811393737793,
      "learning_rate": 0.00026962042577938314,
      "loss": 2.8544,
      "step": 122641
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.043283462524414,
      "learning_rate": 0.00026961635624772476,
      "loss": 2.8963,
      "step": 122642
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8153786659240723,
      "learning_rate": 0.0002696122867217152,
      "loss": 3.0143,
      "step": 122643
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2677228450775146,
      "learning_rate": 0.0002696082172013552,
      "loss": 2.9495,
      "step": 122644
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9675374031066895,
      "learning_rate": 0.0002696041476866457,
      "loss": 3.2107,
      "step": 122645
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.610283136367798,
      "learning_rate": 0.0002696000781775874,
      "loss": 2.9063,
      "step": 122646
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0505447387695312,
      "learning_rate": 0.00026959600867418093,
      "loss": 3.0633,
      "step": 122647
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0610744953155518,
      "learning_rate": 0.0002695919391764272,
      "loss": 2.9976,
      "step": 122648
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.9122350215911865,
      "learning_rate": 0.00026958786968432684,
      "loss": 3.0346,
      "step": 122649
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.260958433151245,
      "learning_rate": 0.0002695838001978807,
      "loss": 2.9604,
      "step": 122650
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0679430961608887,
      "learning_rate": 0.0002695797307170895,
      "loss": 3.033,
      "step": 122651
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.347414493560791,
      "learning_rate": 0.0002695756612419541,
      "loss": 3.0147,
      "step": 122652
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2572460174560547,
      "learning_rate": 0.00026957159177247507,
      "loss": 3.1572,
      "step": 122653
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.294708490371704,
      "learning_rate": 0.0002695675223086532,
      "loss": 2.7565,
      "step": 122654
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.493865489959717,
      "learning_rate": 0.0002695634528504894,
      "loss": 2.7973,
      "step": 122655
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9839892387390137,
      "learning_rate": 0.0002695593833979842,
      "loss": 2.7934,
      "step": 122656
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.37610125541687,
      "learning_rate": 0.0002695553139511387,
      "loss": 2.9509,
      "step": 122657
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8438891172409058,
      "learning_rate": 0.00026955124450995334,
      "loss": 3.1274,
      "step": 122658
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7277764081954956,
      "learning_rate": 0.00026954717507442894,
      "loss": 3.0572,
      "step": 122659
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0934431552886963,
      "learning_rate": 0.0002695431056445663,
      "loss": 3.1416,
      "step": 122660
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.946506977081299,
      "learning_rate": 0.0002695390362203661,
      "loss": 2.9925,
      "step": 122661
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9675378799438477,
      "learning_rate": 0.0002695349668018293,
      "loss": 2.7843,
      "step": 122662
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.766191005706787,
      "learning_rate": 0.0002695308973889564,
      "loss": 2.9381,
      "step": 122663
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.648012399673462,
      "learning_rate": 0.0002695268279817485,
      "loss": 2.9166,
      "step": 122664
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7858750820159912,
      "learning_rate": 0.000269522758580206,
      "loss": 3.2547,
      "step": 122665
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7604632377624512,
      "learning_rate": 0.0002695186891843297,
      "loss": 3.0653,
      "step": 122666
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0373127460479736,
      "learning_rate": 0.00026951461979412054,
      "loss": 2.9456,
      "step": 122667
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7387619018554688,
      "learning_rate": 0.0002695105504095792,
      "loss": 3.1585,
      "step": 122668
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.047563076019287,
      "learning_rate": 0.0002695064810307064,
      "loss": 3.0158,
      "step": 122669
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2127373218536377,
      "learning_rate": 0.00026950241165750303,
      "loss": 2.7471,
      "step": 122670
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2022509574890137,
      "learning_rate": 0.0002694983422899696,
      "loss": 2.9692,
      "step": 122671
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8444074392318726,
      "learning_rate": 0.00026949427292810704,
      "loss": 2.9703,
      "step": 122672
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8783457279205322,
      "learning_rate": 0.00026949020357191605,
      "loss": 3.0852,
      "step": 122673
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7545738220214844,
      "learning_rate": 0.0002694861342213974,
      "loss": 3.1352,
      "step": 122674
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.981331706047058,
      "learning_rate": 0.00026948206487655186,
      "loss": 2.9406,
      "step": 122675
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.4277760982513428,
      "learning_rate": 0.00026947799553738035,
      "loss": 2.9677,
      "step": 122676
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.6648831367492676,
      "learning_rate": 0.0002694739262038833,
      "loss": 2.7246,
      "step": 122677
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.744626760482788,
      "learning_rate": 0.00026946985687606163,
      "loss": 3.0798,
      "step": 122678
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1109824180603027,
      "learning_rate": 0.00026946578755391605,
      "loss": 3.0459,
      "step": 122679
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0004687309265137,
      "learning_rate": 0.0002694617182374474,
      "loss": 2.993,
      "step": 122680
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.444993257522583,
      "learning_rate": 0.00026945764892665635,
      "loss": 2.9339,
      "step": 122681
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.940649390220642,
      "learning_rate": 0.00026945357962154386,
      "loss": 2.7534,
      "step": 122682
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1479928493499756,
      "learning_rate": 0.0002694495103221104,
      "loss": 2.9561,
      "step": 122683
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9634544849395752,
      "learning_rate": 0.00026944544102835684,
      "loss": 3.1296,
      "step": 122684
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.6407601833343506,
      "learning_rate": 0.000269441371740284,
      "loss": 3.0232,
      "step": 122685
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.8074898719787598,
      "learning_rate": 0.0002694373024578925,
      "loss": 3.0075,
      "step": 122686
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1656653881073,
      "learning_rate": 0.0002694332331811832,
      "loss": 3.0642,
      "step": 122687
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8719991445541382,
      "learning_rate": 0.000269429163910157,
      "loss": 3.0993,
      "step": 122688
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.139167308807373,
      "learning_rate": 0.00026942509464481434,
      "loss": 2.9093,
      "step": 122689
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1957340240478516,
      "learning_rate": 0.00026942102538515616,
      "loss": 3.0068,
      "step": 122690
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.062244415283203,
      "learning_rate": 0.0002694169561311832,
      "loss": 3.0859,
      "step": 122691
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.046661615371704,
      "learning_rate": 0.0002694128868828962,
      "loss": 3.1096,
      "step": 122692
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.076152801513672,
      "learning_rate": 0.00026940881764029586,
      "loss": 3.0761,
      "step": 122693
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.7689032554626465,
      "learning_rate": 0.00026940474840338315,
      "loss": 2.9851,
      "step": 122694
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.4302778244018555,
      "learning_rate": 0.0002694006791721586,
      "loss": 3.0418,
      "step": 122695
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.817576289176941,
      "learning_rate": 0.000269396609946623,
      "loss": 2.9296,
      "step": 122696
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.831855058670044,
      "learning_rate": 0.0002693925407267772,
      "loss": 3.0287,
      "step": 122697
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.718258023262024,
      "learning_rate": 0.0002693884715126218,
      "loss": 2.7672,
      "step": 122698
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.866658091545105,
      "learning_rate": 0.00026938440230415777,
      "loss": 2.9934,
      "step": 122699
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8756523132324219,
      "learning_rate": 0.00026938033310138583,
      "loss": 3.0621,
      "step": 122700
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9625566005706787,
      "learning_rate": 0.00026937626390430655,
      "loss": 3.0584,
      "step": 122701
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7596133947372437,
      "learning_rate": 0.0002693721947129208,
      "loss": 2.8045,
      "step": 122702
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.6367223262786865,
      "learning_rate": 0.0002693681255272293,
      "loss": 2.8091,
      "step": 122703
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7600815296173096,
      "learning_rate": 0.0002693640563472329,
      "loss": 2.99,
      "step": 122704
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8291586637496948,
      "learning_rate": 0.00026935998717293226,
      "loss": 3.0418,
      "step": 122705
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7309263944625854,
      "learning_rate": 0.00026935591800432836,
      "loss": 2.9238,
      "step": 122706
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.573336362838745,
      "learning_rate": 0.00026935184884142155,
      "loss": 2.9481,
      "step": 122707
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7390638589859009,
      "learning_rate": 0.0002693477796842129,
      "loss": 2.7527,
      "step": 122708
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8397380113601685,
      "learning_rate": 0.000269343710532703,
      "loss": 3.0065,
      "step": 122709
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.843390941619873,
      "learning_rate": 0.00026933964138689277,
      "loss": 2.704,
      "step": 122710
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.585968017578125,
      "learning_rate": 0.00026933557224678283,
      "loss": 3.1404,
      "step": 122711
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.329026460647583,
      "learning_rate": 0.000269331503112374,
      "loss": 3.0271,
      "step": 122712
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2089531421661377,
      "learning_rate": 0.0002693274339836671,
      "loss": 3.2187,
      "step": 122713
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3490240573883057,
      "learning_rate": 0.0002693233648606627,
      "loss": 2.869,
      "step": 122714
    },
    {
      "epoch": 1.6,
      "grad_norm": 4.301298141479492,
      "learning_rate": 0.0002693192957433617,
      "loss": 3.0134,
      "step": 122715
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.5262190103530884,
      "learning_rate": 0.00026931522663176485,
      "loss": 2.9153,
      "step": 122716
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.7005062103271484,
      "learning_rate": 0.0002693111575258728,
      "loss": 3.179,
      "step": 122717
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.809326410293579,
      "learning_rate": 0.00026930708842568647,
      "loss": 2.9285,
      "step": 122718
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.87912917137146,
      "learning_rate": 0.00026930301933120654,
      "loss": 2.8674,
      "step": 122719
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8553402423858643,
      "learning_rate": 0.00026929895024243374,
      "loss": 2.7198,
      "step": 122720
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9955503940582275,
      "learning_rate": 0.00026929488115936883,
      "loss": 2.9321,
      "step": 122721
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8410518169403076,
      "learning_rate": 0.0002692908120820125,
      "loss": 2.7169,
      "step": 122722
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9338024854660034,
      "learning_rate": 0.00026928674301036573,
      "loss": 3.3382,
      "step": 122723
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.6815364360809326,
      "learning_rate": 0.000269282673944429,
      "loss": 2.8502,
      "step": 122724
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.09717059135437,
      "learning_rate": 0.00026927860488420335,
      "loss": 2.8624,
      "step": 122725
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3962128162384033,
      "learning_rate": 0.0002692745358296893,
      "loss": 3.0107,
      "step": 122726
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.185788631439209,
      "learning_rate": 0.0002692704667808877,
      "loss": 3.157,
      "step": 122727
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0081028938293457,
      "learning_rate": 0.0002692663977377993,
      "loss": 2.8489,
      "step": 122728
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.882646083831787,
      "learning_rate": 0.00026926232870042483,
      "loss": 2.8398,
      "step": 122729
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7923202514648438,
      "learning_rate": 0.0002692582596687651,
      "loss": 2.9492,
      "step": 122730
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.897139310836792,
      "learning_rate": 0.00026925419064282083,
      "loss": 3.1359,
      "step": 122731
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.685073137283325,
      "learning_rate": 0.0002692501216225928,
      "loss": 2.8335,
      "step": 122732
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.616307020187378,
      "learning_rate": 0.00026924605260808174,
      "loss": 3.1808,
      "step": 122733
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.823509693145752,
      "learning_rate": 0.0002692419835992885,
      "loss": 2.9181,
      "step": 122734
    },
    {
      "epoch": 1.6,
      "grad_norm": 5.668654918670654,
      "learning_rate": 0.0002692379145962136,
      "loss": 2.7341,
      "step": 122735
    },
    {
      "epoch": 1.6,
      "grad_norm": 5.767734050750732,
      "learning_rate": 0.00026923384559885804,
      "loss": 3.0283,
      "step": 122736
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.6370818614959717,
      "learning_rate": 0.00026922977660722255,
      "loss": 2.8656,
      "step": 122737
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.631572723388672,
      "learning_rate": 0.00026922570762130774,
      "loss": 3.1056,
      "step": 122738
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.854740619659424,
      "learning_rate": 0.0002692216386411145,
      "loss": 3.082,
      "step": 122739
    },
    {
      "epoch": 1.6,
      "grad_norm": 4.527243137359619,
      "learning_rate": 0.0002692175696666435,
      "loss": 3.0047,
      "step": 122740
    },
    {
      "epoch": 1.6,
      "grad_norm": 4.08924674987793,
      "learning_rate": 0.0002692135006978955,
      "loss": 2.9271,
      "step": 122741
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.385462760925293,
      "learning_rate": 0.0002692094317348714,
      "loss": 2.9315,
      "step": 122742
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.117788076400757,
      "learning_rate": 0.0002692053627775718,
      "loss": 2.7816,
      "step": 122743
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.668938636779785,
      "learning_rate": 0.0002692012938259975,
      "loss": 3.0881,
      "step": 122744
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.751300573348999,
      "learning_rate": 0.0002691972248801492,
      "loss": 2.829,
      "step": 122745
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.086695432662964,
      "learning_rate": 0.00026919315594002775,
      "loss": 3.1148,
      "step": 122746
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8916417360305786,
      "learning_rate": 0.0002691890870056339,
      "loss": 3.0534,
      "step": 122747
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.267852306365967,
      "learning_rate": 0.0002691850180769683,
      "loss": 3.0387,
      "step": 122748
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.1645750999450684,
      "learning_rate": 0.000269180949154032,
      "loss": 2.8763,
      "step": 122749
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.664327621459961,
      "learning_rate": 0.0002691768802368254,
      "loss": 2.8998,
      "step": 122750
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2298314571380615,
      "learning_rate": 0.0002691728113253494,
      "loss": 3.1898,
      "step": 122751
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9639918804168701,
      "learning_rate": 0.0002691687424196047,
      "loss": 3.0917,
      "step": 122752
    },
    {
      "epoch": 1.6,
      "grad_norm": 5.105398654937744,
      "learning_rate": 0.00026916467351959216,
      "loss": 3.0728,
      "step": 122753
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9740302562713623,
      "learning_rate": 0.0002691606046253125,
      "loss": 2.77,
      "step": 122754
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0807905197143555,
      "learning_rate": 0.0002691565357367666,
      "loss": 3.0946,
      "step": 122755
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.842288851737976,
      "learning_rate": 0.0002691524668539549,
      "loss": 2.8391,
      "step": 122756
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9178318977355957,
      "learning_rate": 0.0002691483979768784,
      "loss": 3.0536,
      "step": 122757
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7146631479263306,
      "learning_rate": 0.0002691443291055377,
      "loss": 3.1091,
      "step": 122758
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.935470461845398,
      "learning_rate": 0.00026914026023993373,
      "loss": 3.0205,
      "step": 122759
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2757761478424072,
      "learning_rate": 0.0002691361913800672,
      "loss": 2.9232,
      "step": 122760
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.136648178100586,
      "learning_rate": 0.0002691321225259389,
      "loss": 3.2271,
      "step": 122761
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1816697120666504,
      "learning_rate": 0.00026912805367754943,
      "loss": 3.2252,
      "step": 122762
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8503670692443848,
      "learning_rate": 0.0002691239848348996,
      "loss": 3.166,
      "step": 122763
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7902871370315552,
      "learning_rate": 0.00026911991599799025,
      "loss": 2.9096,
      "step": 122764
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.288997173309326,
      "learning_rate": 0.00026911584716682205,
      "loss": 2.8646,
      "step": 122765
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.035543918609619,
      "learning_rate": 0.0002691117783413958,
      "loss": 3.0528,
      "step": 122766
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8510041236877441,
      "learning_rate": 0.00026910770952171236,
      "loss": 2.8706,
      "step": 122767
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9055581092834473,
      "learning_rate": 0.0002691036407077723,
      "loss": 3.0265,
      "step": 122768
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2462079524993896,
      "learning_rate": 0.00026909957189957644,
      "loss": 3.1831,
      "step": 122769
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8294330835342407,
      "learning_rate": 0.00026909550309712555,
      "loss": 3.1579,
      "step": 122770
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.111447811126709,
      "learning_rate": 0.0002690914343004204,
      "loss": 2.9583,
      "step": 122771
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9065520763397217,
      "learning_rate": 0.0002690873655094617,
      "loss": 2.7823,
      "step": 122772
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.366053819656372,
      "learning_rate": 0.0002690832967242504,
      "loss": 2.9418,
      "step": 122773
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.6194580793380737,
      "learning_rate": 0.000269079227944787,
      "loss": 3.1581,
      "step": 122774
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7925384044647217,
      "learning_rate": 0.0002690751591710723,
      "loss": 2.864,
      "step": 122775
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8667702674865723,
      "learning_rate": 0.0002690710904031071,
      "loss": 3.0414,
      "step": 122776
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2680962085723877,
      "learning_rate": 0.00026906702164089226,
      "loss": 2.8893,
      "step": 122777
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3987061977386475,
      "learning_rate": 0.00026906295288442835,
      "loss": 3.1164,
      "step": 122778
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8905590772628784,
      "learning_rate": 0.0002690588841337163,
      "loss": 2.9398,
      "step": 122779
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3066961765289307,
      "learning_rate": 0.00026905481538875686,
      "loss": 3.0012,
      "step": 122780
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7290966510772705,
      "learning_rate": 0.0002690507466495506,
      "loss": 3.1455,
      "step": 122781
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.149216890335083,
      "learning_rate": 0.00026904667791609836,
      "loss": 2.8709,
      "step": 122782
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.2597126960754395,
      "learning_rate": 0.000269042609188401,
      "loss": 2.7919,
      "step": 122783
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.5985774993896484,
      "learning_rate": 0.0002690385404664591,
      "loss": 3.0306,
      "step": 122784
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.042553186416626,
      "learning_rate": 0.00026903447175027364,
      "loss": 2.8771,
      "step": 122785
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.439955949783325,
      "learning_rate": 0.00026903040303984533,
      "loss": 2.9465,
      "step": 122786
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9625005722045898,
      "learning_rate": 0.00026902633433517473,
      "loss": 3.1103,
      "step": 122787
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0056004524230957,
      "learning_rate": 0.00026902226563626266,
      "loss": 2.9794,
      "step": 122788
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.6559600830078125,
      "learning_rate": 0.00026901819694311,
      "loss": 2.9936,
      "step": 122789
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.6423587799072266,
      "learning_rate": 0.00026901412825571744,
      "loss": 3.2665,
      "step": 122790
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0203123092651367,
      "learning_rate": 0.0002690100595740857,
      "loss": 3.0124,
      "step": 122791
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9948457479476929,
      "learning_rate": 0.00026900599089821575,
      "loss": 2.9804,
      "step": 122792
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.0473902225494385,
      "learning_rate": 0.000269001922228108,
      "loss": 3.081,
      "step": 122793
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.5200066566467285,
      "learning_rate": 0.0002689978535637634,
      "loss": 2.827,
      "step": 122794
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.8429365158081055,
      "learning_rate": 0.0002689937849051827,
      "loss": 2.887,
      "step": 122795
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.142707347869873,
      "learning_rate": 0.00026898971625236663,
      "loss": 2.8977,
      "step": 122796
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.751316785812378,
      "learning_rate": 0.00026898564760531596,
      "loss": 3.1159,
      "step": 122797
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2600135803222656,
      "learning_rate": 0.0002689815789640315,
      "loss": 3.1747,
      "step": 122798
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.45286226272583,
      "learning_rate": 0.00026897751032851393,
      "loss": 2.7765,
      "step": 122799
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.159606695175171,
      "learning_rate": 0.000268973441698764,
      "loss": 3.0367,
      "step": 122800
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3219497203826904,
      "learning_rate": 0.00026896937307478247,
      "loss": 2.9174,
      "step": 122801
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0362987518310547,
      "learning_rate": 0.00026896530445657006,
      "loss": 2.6416,
      "step": 122802
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8626559972763062,
      "learning_rate": 0.0002689612358441277,
      "loss": 3.0353,
      "step": 122803
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8806755542755127,
      "learning_rate": 0.0002689571672374561,
      "loss": 2.9967,
      "step": 122804
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1938111782073975,
      "learning_rate": 0.0002689530986365558,
      "loss": 3.0621,
      "step": 122805
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3041601181030273,
      "learning_rate": 0.0002689490300414277,
      "loss": 2.9088,
      "step": 122806
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.138782024383545,
      "learning_rate": 0.00026894496145207265,
      "loss": 2.9218,
      "step": 122807
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.48930287361145,
      "learning_rate": 0.0002689408928684913,
      "loss": 3.1525,
      "step": 122808
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9861592054367065,
      "learning_rate": 0.0002689368242906844,
      "loss": 2.9508,
      "step": 122809
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9998433589935303,
      "learning_rate": 0.00026893275571865274,
      "loss": 2.7727,
      "step": 122810
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8189659118652344,
      "learning_rate": 0.00026892868715239703,
      "loss": 3.0466,
      "step": 122811
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8372877836227417,
      "learning_rate": 0.00026892461859191817,
      "loss": 3.085,
      "step": 122812
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.24399995803833,
      "learning_rate": 0.0002689205500372167,
      "loss": 3.0816,
      "step": 122813
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1265599727630615,
      "learning_rate": 0.00026891648148829353,
      "loss": 2.9393,
      "step": 122814
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.150792360305786,
      "learning_rate": 0.0002689124129451493,
      "loss": 2.9945,
      "step": 122815
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.6591155529022217,
      "learning_rate": 0.000268908344407785,
      "loss": 3.2198,
      "step": 122816
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.9064042568206787,
      "learning_rate": 0.0002689042758762011,
      "loss": 2.8086,
      "step": 122817
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.015047311782837,
      "learning_rate": 0.0002689002073503985,
      "loss": 2.8432,
      "step": 122818
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.075441360473633,
      "learning_rate": 0.000268896138830378,
      "loss": 2.8765,
      "step": 122819
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.5883209705352783,
      "learning_rate": 0.0002688920703161402,
      "loss": 3.1043,
      "step": 122820
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.7429239749908447,
      "learning_rate": 0.000268888001807686,
      "loss": 3.1577,
      "step": 122821
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9144443273544312,
      "learning_rate": 0.00026888393330501615,
      "loss": 3.1772,
      "step": 122822
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3594810962677,
      "learning_rate": 0.00026887986480813133,
      "loss": 2.8519,
      "step": 122823
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.758713483810425,
      "learning_rate": 0.0002688757963170323,
      "loss": 2.7655,
      "step": 122824
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.050830841064453,
      "learning_rate": 0.0002688717278317198,
      "loss": 3.1725,
      "step": 122825
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7763923406600952,
      "learning_rate": 0.00026886765935219475,
      "loss": 2.7119,
      "step": 122826
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.7558107376098633,
      "learning_rate": 0.0002688635908784577,
      "loss": 3.1859,
      "step": 122827
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.5875399112701416,
      "learning_rate": 0.00026885952241050964,
      "loss": 2.7982,
      "step": 122828
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.066955804824829,
      "learning_rate": 0.00026885545394835105,
      "loss": 2.8324,
      "step": 122829
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9922837018966675,
      "learning_rate": 0.0002688513854919828,
      "loss": 3.0363,
      "step": 122830
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.195180654525757,
      "learning_rate": 0.0002688473170414057,
      "loss": 3.2028,
      "step": 122831
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9464460611343384,
      "learning_rate": 0.0002688432485966205,
      "loss": 2.9374,
      "step": 122832
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0053656101226807,
      "learning_rate": 0.00026883918015762796,
      "loss": 3.2235,
      "step": 122833
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0542097091674805,
      "learning_rate": 0.0002688351117244288,
      "loss": 2.8685,
      "step": 122834
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.208547353744507,
      "learning_rate": 0.0002688310432970237,
      "loss": 2.8797,
      "step": 122835
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.191018581390381,
      "learning_rate": 0.0002688269748754135,
      "loss": 2.9343,
      "step": 122836
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.316965103149414,
      "learning_rate": 0.000268822906459599,
      "loss": 3.2733,
      "step": 122837
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2970025539398193,
      "learning_rate": 0.0002688188380495809,
      "loss": 2.8998,
      "step": 122838
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.036994457244873,
      "learning_rate": 0.00026881476964535995,
      "loss": 2.8042,
      "step": 122839
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.7307286262512207,
      "learning_rate": 0.000268810701246937,
      "loss": 3.1593,
      "step": 122840
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.825483560562134,
      "learning_rate": 0.0002688066328543127,
      "loss": 2.9527,
      "step": 122841
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0766329765319824,
      "learning_rate": 0.0002688025644674878,
      "loss": 2.8942,
      "step": 122842
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9518382549285889,
      "learning_rate": 0.00026879849608646303,
      "loss": 2.9942,
      "step": 122843
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.045027494430542,
      "learning_rate": 0.0002687944277112393,
      "loss": 2.8416,
      "step": 122844
    },
    {
      "epoch": 1.6,
      "grad_norm": 4.98252010345459,
      "learning_rate": 0.00026879035934181724,
      "loss": 2.9573,
      "step": 122845
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2489430904388428,
      "learning_rate": 0.0002687862909781976,
      "loss": 2.9537,
      "step": 122846
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1756434440612793,
      "learning_rate": 0.00026878222262038135,
      "loss": 2.7096,
      "step": 122847
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.829003930091858,
      "learning_rate": 0.000268778154268369,
      "loss": 3.1519,
      "step": 122848
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.5398213863372803,
      "learning_rate": 0.00026877408592216126,
      "loss": 2.8476,
      "step": 122849
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.349938154220581,
      "learning_rate": 0.00026877001758175906,
      "loss": 2.8543,
      "step": 122850
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1043336391448975,
      "learning_rate": 0.00026876594924716315,
      "loss": 3.1295,
      "step": 122851
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.977530837059021,
      "learning_rate": 0.0002687618809183742,
      "loss": 3.0831,
      "step": 122852
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1588122844696045,
      "learning_rate": 0.0002687578125953931,
      "loss": 3.2753,
      "step": 122853
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3206818103790283,
      "learning_rate": 0.0002687537442782204,
      "loss": 3.1064,
      "step": 122854
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.8993349075317383,
      "learning_rate": 0.000268749675966857,
      "loss": 3.067,
      "step": 122855
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.975950837135315,
      "learning_rate": 0.0002687456076613036,
      "loss": 3.0113,
      "step": 122856
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0039010047912598,
      "learning_rate": 0.00026874153936156097,
      "loss": 2.8403,
      "step": 122857
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.6254305839538574,
      "learning_rate": 0.0002687374710676299,
      "loss": 2.9067,
      "step": 122858
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.255800485610962,
      "learning_rate": 0.0002687334027795113,
      "loss": 2.8702,
      "step": 122859
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.808302402496338,
      "learning_rate": 0.00026872933449720554,
      "loss": 2.9671,
      "step": 122860
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.867261528968811,
      "learning_rate": 0.00026872526622071365,
      "loss": 2.7208,
      "step": 122861
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.065770387649536,
      "learning_rate": 0.0002687211979500363,
      "loss": 2.9124,
      "step": 122862
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.5181779861450195,
      "learning_rate": 0.00026871712968517424,
      "loss": 3.0187,
      "step": 122863
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0928170680999756,
      "learning_rate": 0.00026871306142612825,
      "loss": 3.1567,
      "step": 122864
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0947208404541016,
      "learning_rate": 0.00026870899317289927,
      "loss": 3.0312,
      "step": 122865
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.7030725479125977,
      "learning_rate": 0.00026870492492548774,
      "loss": 2.9538,
      "step": 122866
    },
    {
      "epoch": 1.6,
      "grad_norm": 4.160271644592285,
      "learning_rate": 0.00026870085668389453,
      "loss": 3.0977,
      "step": 122867
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9264856576919556,
      "learning_rate": 0.00026869678844812045,
      "loss": 2.9858,
      "step": 122868
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1933555603027344,
      "learning_rate": 0.0002686927202181662,
      "loss": 2.8271,
      "step": 122869
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0227506160736084,
      "learning_rate": 0.0002686886519940326,
      "loss": 2.9208,
      "step": 122870
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9629393815994263,
      "learning_rate": 0.00026868458377572046,
      "loss": 2.9724,
      "step": 122871
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8698248863220215,
      "learning_rate": 0.00026868051556323033,
      "loss": 2.9808,
      "step": 122872
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.402824640274048,
      "learning_rate": 0.0002686764473565631,
      "loss": 2.8345,
      "step": 122873
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1194815635681152,
      "learning_rate": 0.0002686723791557195,
      "loss": 2.973,
      "step": 122874
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.666778087615967,
      "learning_rate": 0.0002686683109607003,
      "loss": 3.1049,
      "step": 122875
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.6336661577224731,
      "learning_rate": 0.00026866424277150624,
      "loss": 2.9801,
      "step": 122876
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3939194679260254,
      "learning_rate": 0.00026866017458813826,
      "loss": 3.0562,
      "step": 122877
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2576725482940674,
      "learning_rate": 0.0002686561064105968,
      "loss": 3.0742,
      "step": 122878
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.655147671699524,
      "learning_rate": 0.0002686520382388827,
      "loss": 2.7239,
      "step": 122879
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7303458452224731,
      "learning_rate": 0.0002686479700729968,
      "loss": 3.0034,
      "step": 122880
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.528614044189453,
      "learning_rate": 0.0002686439019129399,
      "loss": 2.7654,
      "step": 122881
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3810617923736572,
      "learning_rate": 0.0002686398337587126,
      "loss": 2.8509,
      "step": 122882
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.856896996498108,
      "learning_rate": 0.00026863576561031595,
      "loss": 3.0216,
      "step": 122883
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8344991207122803,
      "learning_rate": 0.00026863169746775035,
      "loss": 3.0601,
      "step": 122884
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.6748850345611572,
      "learning_rate": 0.00026862762933101673,
      "loss": 3.1571,
      "step": 122885
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.791100025177002,
      "learning_rate": 0.0002686235612001158,
      "loss": 2.9725,
      "step": 122886
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.848802089691162,
      "learning_rate": 0.00026861949307504835,
      "loss": 2.8806,
      "step": 122887
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.4607040882110596,
      "learning_rate": 0.0002686154249558151,
      "loss": 3.058,
      "step": 122888
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8535658121109009,
      "learning_rate": 0.000268611356842417,
      "loss": 3.0599,
      "step": 122889
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0899765491485596,
      "learning_rate": 0.00026860728873485446,
      "loss": 3.008,
      "step": 122890
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1180050373077393,
      "learning_rate": 0.0002686032206331285,
      "loss": 2.9771,
      "step": 122891
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8207974433898926,
      "learning_rate": 0.00026859915253723974,
      "loss": 2.8461,
      "step": 122892
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.1153295040130615,
      "learning_rate": 0.000268595084447189,
      "loss": 3.0874,
      "step": 122893
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.730334758758545,
      "learning_rate": 0.00026859101636297704,
      "loss": 2.9006,
      "step": 122894
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.8891327381134033,
      "learning_rate": 0.00026858694828460467,
      "loss": 3.0477,
      "step": 122895
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.5396835803985596,
      "learning_rate": 0.00026858288021207257,
      "loss": 3.1326,
      "step": 122896
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.5188052654266357,
      "learning_rate": 0.0002685788121453814,
      "loss": 3.2702,
      "step": 122897
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.366434097290039,
      "learning_rate": 0.00026857474408453205,
      "loss": 3.1122,
      "step": 122898
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.006739854812622,
      "learning_rate": 0.00026857067602952526,
      "loss": 3.0556,
      "step": 122899
    },
    {
      "epoch": 1.6,
      "grad_norm": 5.566104412078857,
      "learning_rate": 0.0002685666079803618,
      "loss": 2.9589,
      "step": 122900
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.5789906978607178,
      "learning_rate": 0.0002685625399370424,
      "loss": 2.9626,
      "step": 122901
    },
    {
      "epoch": 1.6,
      "grad_norm": 4.95021915435791,
      "learning_rate": 0.0002685584718995678,
      "loss": 2.8473,
      "step": 122902
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8300777673721313,
      "learning_rate": 0.00026855440386793877,
      "loss": 2.9601,
      "step": 122903
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.236870050430298,
      "learning_rate": 0.0002685503358421561,
      "loss": 3.097,
      "step": 122904
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.873790740966797,
      "learning_rate": 0.0002685462678222205,
      "loss": 2.853,
      "step": 122905
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.8345654010772705,
      "learning_rate": 0.00026854219980813274,
      "loss": 3.0392,
      "step": 122906
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.844074010848999,
      "learning_rate": 0.0002685381317998936,
      "loss": 2.9787,
      "step": 122907
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.409193992614746,
      "learning_rate": 0.0002685340637975038,
      "loss": 3.046,
      "step": 122908
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2708444595336914,
      "learning_rate": 0.0002685299958009641,
      "loss": 3.115,
      "step": 122909
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.6925251483917236,
      "learning_rate": 0.0002685259278102752,
      "loss": 3.1119,
      "step": 122910
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.7101573944091797,
      "learning_rate": 0.0002685218598254381,
      "loss": 2.7986,
      "step": 122911
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.6756327152252197,
      "learning_rate": 0.0002685177918464532,
      "loss": 3.1364,
      "step": 122912
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.9222254753112793,
      "learning_rate": 0.00026851372387332154,
      "loss": 2.8583,
      "step": 122913
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.211215019226074,
      "learning_rate": 0.0002685096559060438,
      "loss": 2.9938,
      "step": 122914
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.168649911880493,
      "learning_rate": 0.0002685055879446206,
      "loss": 2.8195,
      "step": 122915
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.603639841079712,
      "learning_rate": 0.00026850151998905283,
      "loss": 2.9461,
      "step": 122916
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7710800170898438,
      "learning_rate": 0.00026849745203934126,
      "loss": 2.9375,
      "step": 122917
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8593311309814453,
      "learning_rate": 0.00026849338409548666,
      "loss": 2.6247,
      "step": 122918
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.0426900386810303,
      "learning_rate": 0.00026848931615748963,
      "loss": 3.0656,
      "step": 122919
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9567854404449463,
      "learning_rate": 0.00026848524822535114,
      "loss": 2.9681,
      "step": 122920
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7773877382278442,
      "learning_rate": 0.0002684811802990718,
      "loss": 2.9904,
      "step": 122921
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.301771640777588,
      "learning_rate": 0.0002684771123786523,
      "loss": 3.1521,
      "step": 122922
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.5764811038970947,
      "learning_rate": 0.00026847304446409357,
      "loss": 3.0666,
      "step": 122923
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.580012321472168,
      "learning_rate": 0.0002684689765553963,
      "loss": 2.8619,
      "step": 122924
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7474011182785034,
      "learning_rate": 0.0002684649086525613,
      "loss": 3.2267,
      "step": 122925
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.4225704669952393,
      "learning_rate": 0.00026846084075558927,
      "loss": 2.9939,
      "step": 122926
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2788898944854736,
      "learning_rate": 0.0002684567728644809,
      "loss": 2.8817,
      "step": 122927
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2077319622039795,
      "learning_rate": 0.000268452704979237,
      "loss": 2.84,
      "step": 122928
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.031482696533203,
      "learning_rate": 0.0002684486370998583,
      "loss": 2.9642,
      "step": 122929
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.268925189971924,
      "learning_rate": 0.00026844456922634564,
      "loss": 3.0868,
      "step": 122930
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2668545246124268,
      "learning_rate": 0.0002684405013586997,
      "loss": 2.79,
      "step": 122931
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9214627742767334,
      "learning_rate": 0.00026843643349692145,
      "loss": 2.9924,
      "step": 122932
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.712721824645996,
      "learning_rate": 0.0002684323656410113,
      "loss": 2.9853,
      "step": 122933
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.8786916732788086,
      "learning_rate": 0.0002684282977909702,
      "loss": 2.8124,
      "step": 122934
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8584342002868652,
      "learning_rate": 0.00026842422994679887,
      "loss": 3.1356,
      "step": 122935
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.047736406326294,
      "learning_rate": 0.000268420162108498,
      "loss": 3.015,
      "step": 122936
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9095802307128906,
      "learning_rate": 0.0002684160942760685,
      "loss": 3.0005,
      "step": 122937
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1194276809692383,
      "learning_rate": 0.00026841202644951114,
      "loss": 2.8491,
      "step": 122938
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.6755539178848267,
      "learning_rate": 0.00026840795862882646,
      "loss": 2.889,
      "step": 122939
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.03548264503479,
      "learning_rate": 0.00026840389081401534,
      "loss": 2.9746,
      "step": 122940
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7243887186050415,
      "learning_rate": 0.00026839982300507853,
      "loss": 3.186,
      "step": 122941
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.015855312347412,
      "learning_rate": 0.00026839575520201675,
      "loss": 3.0544,
      "step": 122942
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.5910890102386475,
      "learning_rate": 0.00026839168740483085,
      "loss": 3.016,
      "step": 122943
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3407809734344482,
      "learning_rate": 0.00026838761961352165,
      "loss": 2.7466,
      "step": 122944
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9563301801681519,
      "learning_rate": 0.00026838355182808965,
      "loss": 3.3889,
      "step": 122945
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.9417643547058105,
      "learning_rate": 0.0002683794840485357,
      "loss": 2.8789,
      "step": 122946
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.228303909301758,
      "learning_rate": 0.00026837541627486067,
      "loss": 2.967,
      "step": 122947
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2469892501831055,
      "learning_rate": 0.0002683713485070652,
      "loss": 2.8704,
      "step": 122948
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0096025466918945,
      "learning_rate": 0.00026836728074515015,
      "loss": 2.9183,
      "step": 122949
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.214061975479126,
      "learning_rate": 0.0002683632129891163,
      "loss": 2.995,
      "step": 122950
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.9224131107330322,
      "learning_rate": 0.0002683591452389642,
      "loss": 3.1435,
      "step": 122951
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9906384944915771,
      "learning_rate": 0.00026835507749469476,
      "loss": 2.9239,
      "step": 122952
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.7394168376922607,
      "learning_rate": 0.0002683510097563087,
      "loss": 3.2748,
      "step": 122953
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.1916182041168213,
      "learning_rate": 0.00026834694202380676,
      "loss": 2.7985,
      "step": 122954
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.8987374305725098,
      "learning_rate": 0.00026834287429718974,
      "loss": 2.76,
      "step": 122955
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.975494623184204,
      "learning_rate": 0.00026833880657645853,
      "loss": 3.1451,
      "step": 122956
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7398874759674072,
      "learning_rate": 0.00026833473886161356,
      "loss": 3.0984,
      "step": 122957
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2615244388580322,
      "learning_rate": 0.0002683306711526558,
      "loss": 2.8421,
      "step": 122958
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2505221366882324,
      "learning_rate": 0.0002683266034495859,
      "loss": 3.0729,
      "step": 122959
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3632240295410156,
      "learning_rate": 0.0002683225357524047,
      "loss": 2.7907,
      "step": 122960
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2174034118652344,
      "learning_rate": 0.000268318468061113,
      "loss": 3.1831,
      "step": 122961
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.592020034790039,
      "learning_rate": 0.00026831440037571155,
      "loss": 3.0012,
      "step": 122962
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0302541255950928,
      "learning_rate": 0.000268310332696201,
      "loss": 3.0555,
      "step": 122963
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7447795867919922,
      "learning_rate": 0.0002683062650225821,
      "loss": 3.0024,
      "step": 122964
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.006742238998413,
      "learning_rate": 0.0002683021973548557,
      "loss": 2.9605,
      "step": 122965
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.892924427986145,
      "learning_rate": 0.00026829812969302253,
      "loss": 3.1206,
      "step": 122966
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3234763145446777,
      "learning_rate": 0.0002682940620370833,
      "loss": 2.9685,
      "step": 122967
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9716507196426392,
      "learning_rate": 0.0002682899943870389,
      "loss": 2.9856,
      "step": 122968
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.473670244216919,
      "learning_rate": 0.00026828592674288994,
      "loss": 2.8004,
      "step": 122969
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.6855000257492065,
      "learning_rate": 0.0002682818591046372,
      "loss": 2.7185,
      "step": 122970
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.859201431274414,
      "learning_rate": 0.0002682777914722814,
      "loss": 3.0178,
      "step": 122971
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7376409769058228,
      "learning_rate": 0.0002682737238458234,
      "loss": 3.067,
      "step": 122972
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.010758638381958,
      "learning_rate": 0.0002682696562252639,
      "loss": 2.9443,
      "step": 122973
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7697902917861938,
      "learning_rate": 0.00026826558861060384,
      "loss": 3.0318,
      "step": 122974
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.10748553276062,
      "learning_rate": 0.00026826152100184366,
      "loss": 3.0018,
      "step": 122975
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0377447605133057,
      "learning_rate": 0.00026825745339898425,
      "loss": 3.0134,
      "step": 122976
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1389946937561035,
      "learning_rate": 0.0002682533858020264,
      "loss": 2.8699,
      "step": 122977
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.1806182861328125,
      "learning_rate": 0.0002682493182109708,
      "loss": 2.8666,
      "step": 122978
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.524735689163208,
      "learning_rate": 0.0002682452506258183,
      "loss": 2.6651,
      "step": 122979
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.066221237182617,
      "learning_rate": 0.00026824118304656956,
      "loss": 3.0912,
      "step": 122980
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.9172306060791016,
      "learning_rate": 0.00026823711547322555,
      "loss": 3.0588,
      "step": 122981
    },
    {
      "epoch": 1.6,
      "grad_norm": 5.319395065307617,
      "learning_rate": 0.0002682330479057867,
      "loss": 3.0158,
      "step": 122982
    },
    {
      "epoch": 1.6,
      "grad_norm": 4.633780479431152,
      "learning_rate": 0.000268228980344254,
      "loss": 2.871,
      "step": 122983
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0984718799591064,
      "learning_rate": 0.000268224912788628,
      "loss": 2.935,
      "step": 122984
    },
    {
      "epoch": 1.6,
      "grad_norm": 5.400229454040527,
      "learning_rate": 0.0002682208452389097,
      "loss": 2.9252,
      "step": 122985
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.7526159286499023,
      "learning_rate": 0.0002682167776950997,
      "loss": 2.9281,
      "step": 122986
    },
    {
      "epoch": 1.6,
      "grad_norm": 4.889746189117432,
      "learning_rate": 0.0002682127101571989,
      "loss": 2.9422,
      "step": 122987
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2559895515441895,
      "learning_rate": 0.0002682086426252079,
      "loss": 2.9281,
      "step": 122988
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.671727180480957,
      "learning_rate": 0.00026820457509912747,
      "loss": 2.9526,
      "step": 122989
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.0547800064086914,
      "learning_rate": 0.0002682005075789584,
      "loss": 3.0561,
      "step": 122990
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.6083593368530273,
      "learning_rate": 0.00026819644006470154,
      "loss": 3.0618,
      "step": 122991
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3325517177581787,
      "learning_rate": 0.0002681923725563575,
      "loss": 3.0871,
      "step": 122992
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8427491188049316,
      "learning_rate": 0.00026818830505392715,
      "loss": 2.9685,
      "step": 122993
    },
    {
      "epoch": 1.6,
      "grad_norm": 5.5408034324646,
      "learning_rate": 0.00026818423755741117,
      "loss": 3.0308,
      "step": 122994
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.832127332687378,
      "learning_rate": 0.00026818017006681027,
      "loss": 3.0047,
      "step": 122995
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9343446493148804,
      "learning_rate": 0.0002681761025821254,
      "loss": 2.7094,
      "step": 122996
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0465123653411865,
      "learning_rate": 0.00026817203510335713,
      "loss": 2.9319,
      "step": 122997
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.32796049118042,
      "learning_rate": 0.00026816796763050625,
      "loss": 3.0262,
      "step": 122998
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8952205181121826,
      "learning_rate": 0.00026816390016357365,
      "loss": 2.9583,
      "step": 122999
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2083756923675537,
      "learning_rate": 0.00026815983270255986,
      "loss": 3.2892,
      "step": 123000
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.238394021987915,
      "learning_rate": 0.0002681557652474658,
      "loss": 2.872,
      "step": 123001
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9057778120040894,
      "learning_rate": 0.0002681516977982922,
      "loss": 2.8533,
      "step": 123002
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3619415760040283,
      "learning_rate": 0.0002681476303550398,
      "loss": 2.8322,
      "step": 123003
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1535463333129883,
      "learning_rate": 0.00026814356291770934,
      "loss": 3.0149,
      "step": 123004
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1183626651763916,
      "learning_rate": 0.00026813949548630165,
      "loss": 3.0374,
      "step": 123005
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.5213112831115723,
      "learning_rate": 0.00026813542806081734,
      "loss": 2.8268,
      "step": 123006
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9765872955322266,
      "learning_rate": 0.00026813136064125727,
      "loss": 3.3493,
      "step": 123007
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.728217124938965,
      "learning_rate": 0.0002681272932276222,
      "loss": 2.8092,
      "step": 123008
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2722487449645996,
      "learning_rate": 0.0002681232258199129,
      "loss": 3.1969,
      "step": 123009
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.4677841663360596,
      "learning_rate": 0.0002681191584181301,
      "loss": 3.0092,
      "step": 123010
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9968432188034058,
      "learning_rate": 0.0002681150910222746,
      "loss": 3.0281,
      "step": 123011
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2538490295410156,
      "learning_rate": 0.00026811102363234697,
      "loss": 3.107,
      "step": 123012
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9272372722625732,
      "learning_rate": 0.00026810695624834814,
      "loss": 3.0891,
      "step": 123013
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.102720260620117,
      "learning_rate": 0.0002681028888702788,
      "loss": 3.0843,
      "step": 123014
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.94089674949646,
      "learning_rate": 0.0002680988214981398,
      "loss": 2.8987,
      "step": 123015
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.00982928276062,
      "learning_rate": 0.0002680947541319318,
      "loss": 2.9808,
      "step": 123016
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2142372131347656,
      "learning_rate": 0.00026809068677165573,
      "loss": 3.0875,
      "step": 123017
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8352344036102295,
      "learning_rate": 0.00026808661941731205,
      "loss": 2.6839,
      "step": 123018
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.471475124359131,
      "learning_rate": 0.00026808255206890166,
      "loss": 2.9888,
      "step": 123019
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9142570495605469,
      "learning_rate": 0.00026807848472642533,
      "loss": 3.0784,
      "step": 123020
    },
    {
      "epoch": 1.6,
      "grad_norm": 4.047858715057373,
      "learning_rate": 0.00026807441738988387,
      "loss": 2.9856,
      "step": 123021
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.155282497406006,
      "learning_rate": 0.00026807035005927787,
      "loss": 2.7892,
      "step": 123022
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.5458076000213623,
      "learning_rate": 0.0002680662827346084,
      "loss": 3.0413,
      "step": 123023
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.439940929412842,
      "learning_rate": 0.0002680622154158759,
      "loss": 2.9283,
      "step": 123024
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.870490312576294,
      "learning_rate": 0.00026805814810308123,
      "loss": 2.9747,
      "step": 123025
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.222720146179199,
      "learning_rate": 0.0002680540807962251,
      "loss": 2.881,
      "step": 123026
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.869067430496216,
      "learning_rate": 0.0002680500134953083,
      "loss": 2.8908,
      "step": 123027
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9352500438690186,
      "learning_rate": 0.0002680459462003317,
      "loss": 3.1482,
      "step": 123028
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.8662497997283936,
      "learning_rate": 0.00026804187891129607,
      "loss": 3.2283,
      "step": 123029
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.061959743499756,
      "learning_rate": 0.00026803781162820187,
      "loss": 2.8197,
      "step": 123030
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.4414732456207275,
      "learning_rate": 0.0002680337443510501,
      "loss": 3.1042,
      "step": 123031
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.157435655593872,
      "learning_rate": 0.00026802967707984144,
      "loss": 3.0467,
      "step": 123032
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.6603879928588867,
      "learning_rate": 0.0002680256098145767,
      "loss": 2.9498,
      "step": 123033
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.199782609939575,
      "learning_rate": 0.00026802154255525654,
      "loss": 2.9915,
      "step": 123034
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1572420597076416,
      "learning_rate": 0.00026801747530188195,
      "loss": 2.8094,
      "step": 123035
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.962735652923584,
      "learning_rate": 0.00026801340805445334,
      "loss": 3.1422,
      "step": 123036
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.254884958267212,
      "learning_rate": 0.0002680093408129717,
      "loss": 3.0138,
      "step": 123037
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.316915273666382,
      "learning_rate": 0.00026800527357743765,
      "loss": 2.9962,
      "step": 123038
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.901910662651062,
      "learning_rate": 0.00026800120634785213,
      "loss": 3.1258,
      "step": 123039
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8876640796661377,
      "learning_rate": 0.0002679971391242157,
      "loss": 3.0097,
      "step": 123040
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8864331245422363,
      "learning_rate": 0.0002679930719065294,
      "loss": 2.8296,
      "step": 123041
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8283312320709229,
      "learning_rate": 0.0002679890046947936,
      "loss": 3.1836,
      "step": 123042
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.996522068977356,
      "learning_rate": 0.0002679849374890093,
      "loss": 2.922,
      "step": 123043
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.758644461631775,
      "learning_rate": 0.00026798087028917717,
      "loss": 2.9388,
      "step": 123044
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.2703909873962402,
      "learning_rate": 0.000267976803095298,
      "loss": 3.0343,
      "step": 123045
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8156052827835083,
      "learning_rate": 0.0002679727359073726,
      "loss": 3.0151,
      "step": 123046
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.163548707962036,
      "learning_rate": 0.0002679686687254016,
      "loss": 3.2237,
      "step": 123047
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8780248165130615,
      "learning_rate": 0.000267964601549386,
      "loss": 3.1509,
      "step": 123048
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.044299602508545,
      "learning_rate": 0.00026796053437932625,
      "loss": 2.97,
      "step": 123049
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.981515884399414,
      "learning_rate": 0.00026795646721522324,
      "loss": 3.0065,
      "step": 123050
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.156642198562622,
      "learning_rate": 0.00026795240005707776,
      "loss": 2.9104,
      "step": 123051
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.201432466506958,
      "learning_rate": 0.0002679483329048905,
      "loss": 3.0183,
      "step": 123052
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.4066789150238037,
      "learning_rate": 0.00026794426575866225,
      "loss": 3.1655,
      "step": 123053
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.80179500579834,
      "learning_rate": 0.00026794019861839385,
      "loss": 3.1689,
      "step": 123054
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1464638710021973,
      "learning_rate": 0.0002679361314840859,
      "loss": 3.0374,
      "step": 123055
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9979031085968018,
      "learning_rate": 0.00026793206435573924,
      "loss": 2.8683,
      "step": 123056
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.443498134613037,
      "learning_rate": 0.0002679279972333546,
      "loss": 3.0607,
      "step": 123057
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3367538452148438,
      "learning_rate": 0.00026792393011693274,
      "loss": 2.9623,
      "step": 123058
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9976483583450317,
      "learning_rate": 0.0002679198630064744,
      "loss": 3.1469,
      "step": 123059
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.560394525527954,
      "learning_rate": 0.0002679157959019806,
      "loss": 2.7965,
      "step": 123060
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1527469158172607,
      "learning_rate": 0.00026791172880345163,
      "loss": 3.1972,
      "step": 123061
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.6395500898361206,
      "learning_rate": 0.00026790766171088846,
      "loss": 3.0682,
      "step": 123062
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.455299139022827,
      "learning_rate": 0.0002679035946242919,
      "loss": 2.9603,
      "step": 123063
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9091860055923462,
      "learning_rate": 0.0002678995275436627,
      "loss": 2.9336,
      "step": 123064
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.6168277263641357,
      "learning_rate": 0.00026789546046900156,
      "loss": 2.9154,
      "step": 123065
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.2770209312438965,
      "learning_rate": 0.00026789139340030937,
      "loss": 3.2034,
      "step": 123066
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.691854476928711,
      "learning_rate": 0.00026788732633758664,
      "loss": 2.8895,
      "step": 123067
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.590576171875,
      "learning_rate": 0.0002678832592808343,
      "loss": 2.9483,
      "step": 123068
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.4338462352752686,
      "learning_rate": 0.00026787919223005306,
      "loss": 3.0559,
      "step": 123069
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.0375173091888428,
      "learning_rate": 0.0002678751251852437,
      "loss": 3.1045,
      "step": 123070
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.106574296951294,
      "learning_rate": 0.0002678710581464069,
      "loss": 3.0984,
      "step": 123071
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0914087295532227,
      "learning_rate": 0.00026786699111354357,
      "loss": 3.1818,
      "step": 123072
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.232966661453247,
      "learning_rate": 0.00026786292408665437,
      "loss": 3.142,
      "step": 123073
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.953944444656372,
      "learning_rate": 0.00026785885706574004,
      "loss": 3.0627,
      "step": 123074
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.616827964782715,
      "learning_rate": 0.0002678547900508013,
      "loss": 2.9294,
      "step": 123075
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7889437675476074,
      "learning_rate": 0.00026785072304183894,
      "loss": 3.0348,
      "step": 123076
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1000149250030518,
      "learning_rate": 0.0002678466560388538,
      "loss": 2.7622,
      "step": 123077
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.35029673576355,
      "learning_rate": 0.0002678425890418466,
      "loss": 3.1777,
      "step": 123078
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8854416608810425,
      "learning_rate": 0.000267838522050818,
      "loss": 2.9728,
      "step": 123079
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.17671537399292,
      "learning_rate": 0.000267834455065769,
      "loss": 2.8131,
      "step": 123080
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.025503158569336,
      "learning_rate": 0.00026783038808669997,
      "loss": 3.2088,
      "step": 123081
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.095853805541992,
      "learning_rate": 0.0002678263211136119,
      "loss": 3.2228,
      "step": 123082
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2396411895751953,
      "learning_rate": 0.00026782225414650557,
      "loss": 2.7278,
      "step": 123083
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.632939100265503,
      "learning_rate": 0.0002678181871853818,
      "loss": 3.0006,
      "step": 123084
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2664105892181396,
      "learning_rate": 0.0002678141202302411,
      "loss": 3.0502,
      "step": 123085
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2203075885772705,
      "learning_rate": 0.00026781005328108435,
      "loss": 3.1237,
      "step": 123086
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.516460657119751,
      "learning_rate": 0.0002678059863379124,
      "loss": 3.2777,
      "step": 123087
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9090806245803833,
      "learning_rate": 0.0002678019194007259,
      "loss": 2.5017,
      "step": 123088
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.5982093811035156,
      "learning_rate": 0.0002677978524695256,
      "loss": 3.0543,
      "step": 123089
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.251966953277588,
      "learning_rate": 0.0002677937855443123,
      "loss": 2.7797,
      "step": 123090
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.438030481338501,
      "learning_rate": 0.0002677897186250868,
      "loss": 2.932,
      "step": 123091
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9899386167526245,
      "learning_rate": 0.00026778565171184973,
      "loss": 3.1164,
      "step": 123092
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.5472700595855713,
      "learning_rate": 0.0002677815848046019,
      "loss": 3.1726,
      "step": 123093
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9207346439361572,
      "learning_rate": 0.0002677775179033441,
      "loss": 2.9068,
      "step": 123094
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7852200269699097,
      "learning_rate": 0.00026777345100807714,
      "loss": 2.8556,
      "step": 123095
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9320416450500488,
      "learning_rate": 0.0002677693841188017,
      "loss": 2.9111,
      "step": 123096
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3139498233795166,
      "learning_rate": 0.0002677653172355185,
      "loss": 2.6987,
      "step": 123097
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.172654628753662,
      "learning_rate": 0.0002677612503582283,
      "loss": 2.7651,
      "step": 123098
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9575834274291992,
      "learning_rate": 0.0002677571834869319,
      "loss": 3.1388,
      "step": 123099
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.777662754058838,
      "learning_rate": 0.00026775311662163,
      "loss": 2.7404,
      "step": 123100
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.806452751159668,
      "learning_rate": 0.0002677490497623235,
      "loss": 2.9336,
      "step": 123101
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.393721580505371,
      "learning_rate": 0.0002677449829090131,
      "loss": 3.0106,
      "step": 123102
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9788053035736084,
      "learning_rate": 0.00026774091606169946,
      "loss": 2.7993,
      "step": 123103
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7104886770248413,
      "learning_rate": 0.00026773684922038333,
      "loss": 3.1058,
      "step": 123104
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9093306064605713,
      "learning_rate": 0.0002677327823850656,
      "loss": 3.0533,
      "step": 123105
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.040463924407959,
      "learning_rate": 0.00026772871555574693,
      "loss": 2.9159,
      "step": 123106
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2271604537963867,
      "learning_rate": 0.0002677246487324281,
      "loss": 3.0588,
      "step": 123107
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.194638729095459,
      "learning_rate": 0.00026772058191510995,
      "loss": 2.9403,
      "step": 123108
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.959673523902893,
      "learning_rate": 0.00026771651510379304,
      "loss": 2.9858,
      "step": 123109
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9918971061706543,
      "learning_rate": 0.00026771244829847823,
      "loss": 2.7572,
      "step": 123110
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.7738122940063477,
      "learning_rate": 0.00026770838149916637,
      "loss": 3.0598,
      "step": 123111
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.898598313331604,
      "learning_rate": 0.000267704314705858,
      "loss": 3.177,
      "step": 123112
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.415038585662842,
      "learning_rate": 0.0002677002479185541,
      "loss": 2.9503,
      "step": 123113
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2412116527557373,
      "learning_rate": 0.00026769618113725536,
      "loss": 2.9562,
      "step": 123114
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2706000804901123,
      "learning_rate": 0.0002676921143619626,
      "loss": 2.8318,
      "step": 123115
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8383594751358032,
      "learning_rate": 0.0002676880475926763,
      "loss": 3.0582,
      "step": 123116
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.4813528060913086,
      "learning_rate": 0.00026768398082939747,
      "loss": 2.9609,
      "step": 123117
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.198997735977173,
      "learning_rate": 0.0002676799140721268,
      "loss": 3.2222,
      "step": 123118
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9292203187942505,
      "learning_rate": 0.000267675847320865,
      "loss": 2.9493,
      "step": 123119
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.235297679901123,
      "learning_rate": 0.0002676717805756129,
      "loss": 2.8596,
      "step": 123120
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9161535501480103,
      "learning_rate": 0.00026766771383637133,
      "loss": 2.6719,
      "step": 123121
    },
    {
      "epoch": 1.6,
      "grad_norm": 4.022792339324951,
      "learning_rate": 0.00026766364710314084,
      "loss": 2.7121,
      "step": 123122
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.659064769744873,
      "learning_rate": 0.0002676595803759223,
      "loss": 2.8822,
      "step": 123123
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.259415626525879,
      "learning_rate": 0.0002676555136547164,
      "loss": 3.1091,
      "step": 123124
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0435400009155273,
      "learning_rate": 0.000267651446939524,
      "loss": 2.8996,
      "step": 123125
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.098820924758911,
      "learning_rate": 0.0002676473802303458,
      "loss": 3.1632,
      "step": 123126
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.386838912963867,
      "learning_rate": 0.0002676433135271827,
      "loss": 2.9726,
      "step": 123127
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3458216190338135,
      "learning_rate": 0.00026763924683003516,
      "loss": 3.3563,
      "step": 123128
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.218874931335449,
      "learning_rate": 0.00026763518013890413,
      "loss": 3.0182,
      "step": 123129
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.355635166168213,
      "learning_rate": 0.00026763111345379025,
      "loss": 2.8179,
      "step": 123130
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.057488203048706,
      "learning_rate": 0.00026762704677469446,
      "loss": 3.2944,
      "step": 123131
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.605213165283203,
      "learning_rate": 0.00026762298010161734,
      "loss": 2.9869,
      "step": 123132
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.709689140319824,
      "learning_rate": 0.0002676189134345599,
      "loss": 2.8187,
      "step": 123133
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3572182655334473,
      "learning_rate": 0.00026761484677352256,
      "loss": 3.0787,
      "step": 123134
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9960988759994507,
      "learning_rate": 0.00026761078011850625,
      "loss": 3.0624,
      "step": 123135
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.279822826385498,
      "learning_rate": 0.0002676067134695117,
      "loss": 2.6998,
      "step": 123136
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.0883891582489014,
      "learning_rate": 0.0002676026468265397,
      "loss": 2.9918,
      "step": 123137
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.8938584327697754,
      "learning_rate": 0.0002675985801895909,
      "loss": 2.6378,
      "step": 123138
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2441914081573486,
      "learning_rate": 0.0002675945135586663,
      "loss": 2.7646,
      "step": 123139
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.670987844467163,
      "learning_rate": 0.00026759044693376637,
      "loss": 2.9443,
      "step": 123140
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.637690782546997,
      "learning_rate": 0.000267586380314892,
      "loss": 3.278,
      "step": 123141
    },
    {
      "epoch": 1.6,
      "grad_norm": 4.216619968414307,
      "learning_rate": 0.00026758231370204393,
      "loss": 2.9965,
      "step": 123142
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.820546865463257,
      "learning_rate": 0.0002675782470952229,
      "loss": 2.9514,
      "step": 123143
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.900068759918213,
      "learning_rate": 0.0002675741804944297,
      "loss": 2.8288,
      "step": 123144
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.190782308578491,
      "learning_rate": 0.0002675701138996652,
      "loss": 2.8924,
      "step": 123145
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.313183546066284,
      "learning_rate": 0.0002675660473109299,
      "loss": 3.1356,
      "step": 123146
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.9838387966156006,
      "learning_rate": 0.0002675619807282247,
      "loss": 3.2089,
      "step": 123147
    },
    {
      "epoch": 1.6,
      "grad_norm": 4.104880332946777,
      "learning_rate": 0.00026755791415155033,
      "loss": 2.7983,
      "step": 123148
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.6631193161010742,
      "learning_rate": 0.00026755384758090755,
      "loss": 2.899,
      "step": 123149
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2550342082977295,
      "learning_rate": 0.0002675497810162971,
      "loss": 2.8433,
      "step": 123150
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.0556890964508057,
      "learning_rate": 0.00026754571445771993,
      "loss": 2.8831,
      "step": 123151
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.989060401916504,
      "learning_rate": 0.0002675416479051765,
      "loss": 3.1736,
      "step": 123152
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8248441219329834,
      "learning_rate": 0.0002675375813586676,
      "loss": 2.7749,
      "step": 123153
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0726964473724365,
      "learning_rate": 0.0002675335148181942,
      "loss": 2.9479,
      "step": 123154
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.5889434814453125,
      "learning_rate": 0.0002675294482837568,
      "loss": 3.0257,
      "step": 123155
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.8165385723114014,
      "learning_rate": 0.0002675253817553564,
      "loss": 2.8508,
      "step": 123156
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1602132320404053,
      "learning_rate": 0.0002675213152329937,
      "loss": 2.9929,
      "step": 123157
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.057070016860962,
      "learning_rate": 0.0002675172487166694,
      "loss": 2.9558,
      "step": 123158
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9902071952819824,
      "learning_rate": 0.00026751318220638413,
      "loss": 3.0858,
      "step": 123159
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9344853162765503,
      "learning_rate": 0.00026750911570213876,
      "loss": 2.7432,
      "step": 123160
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9871727228164673,
      "learning_rate": 0.00026750504920393413,
      "loss": 3.2182,
      "step": 123161
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0194411277770996,
      "learning_rate": 0.0002675009827117709,
      "loss": 2.8467,
      "step": 123162
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8931411504745483,
      "learning_rate": 0.0002674969162256499,
      "loss": 3.0911,
      "step": 123163
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9904865026474,
      "learning_rate": 0.0002674928497455718,
      "loss": 3.0081,
      "step": 123164
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0214285850524902,
      "learning_rate": 0.00026748878327153744,
      "loss": 2.9903,
      "step": 123165
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7978878021240234,
      "learning_rate": 0.00026748471680354747,
      "loss": 3.1038,
      "step": 123166
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.828973650932312,
      "learning_rate": 0.00026748065034160273,
      "loss": 2.6381,
      "step": 123167
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.879574179649353,
      "learning_rate": 0.00026747658388570395,
      "loss": 3.1908,
      "step": 123168
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.343454122543335,
      "learning_rate": 0.00026747251743585186,
      "loss": 3.3407,
      "step": 123169
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.6617400646209717,
      "learning_rate": 0.0002674684509920473,
      "loss": 3.1347,
      "step": 123170
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.399902105331421,
      "learning_rate": 0.0002674643845542909,
      "loss": 2.7478,
      "step": 123171
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.716313600540161,
      "learning_rate": 0.0002674603181225836,
      "loss": 2.9728,
      "step": 123172
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.8477730751037598,
      "learning_rate": 0.0002674562516969259,
      "loss": 2.8823,
      "step": 123173
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.9523959159851074,
      "learning_rate": 0.0002674521852773187,
      "loss": 2.8434,
      "step": 123174
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.758664846420288,
      "learning_rate": 0.0002674481188637629,
      "loss": 2.8955,
      "step": 123175
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0997519493103027,
      "learning_rate": 0.000267444052456259,
      "loss": 3.1733,
      "step": 123176
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.5708930492401123,
      "learning_rate": 0.00026743998605480786,
      "loss": 3.0376,
      "step": 123177
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.996425986289978,
      "learning_rate": 0.0002674359196594102,
      "loss": 2.8864,
      "step": 123178
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8126869201660156,
      "learning_rate": 0.00026743185327006696,
      "loss": 3.092,
      "step": 123179
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0830252170562744,
      "learning_rate": 0.0002674277868867787,
      "loss": 3.1721,
      "step": 123180
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.840562105178833,
      "learning_rate": 0.00026742372050954615,
      "loss": 2.9778,
      "step": 123181
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.046415090560913,
      "learning_rate": 0.00026741965413837023,
      "loss": 3.058,
      "step": 123182
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.334050178527832,
      "learning_rate": 0.0002674155877732516,
      "loss": 2.9289,
      "step": 123183
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.202507257461548,
      "learning_rate": 0.00026741152141419096,
      "loss": 3.2539,
      "step": 123184
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.223475217819214,
      "learning_rate": 0.0002674074550611892,
      "loss": 3.0804,
      "step": 123185
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9217677116394043,
      "learning_rate": 0.0002674033887142469,
      "loss": 3.0029,
      "step": 123186
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.08859920501709,
      "learning_rate": 0.0002673993223733651,
      "loss": 2.9931,
      "step": 123187
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7878081798553467,
      "learning_rate": 0.0002673952560385444,
      "loss": 3.0504,
      "step": 123188
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.4243533611297607,
      "learning_rate": 0.0002673911897097854,
      "loss": 3.292,
      "step": 123189
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9765584468841553,
      "learning_rate": 0.00026738712338708896,
      "loss": 2.7629,
      "step": 123190
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2800941467285156,
      "learning_rate": 0.00026738305707045594,
      "loss": 2.9128,
      "step": 123191
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8065626621246338,
      "learning_rate": 0.00026737899075988703,
      "loss": 2.7823,
      "step": 123192
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7532107830047607,
      "learning_rate": 0.0002673749244553829,
      "loss": 3.0143,
      "step": 123193
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7134339809417725,
      "learning_rate": 0.0002673708581569446,
      "loss": 3.1757,
      "step": 123194
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2753746509552,
      "learning_rate": 0.0002673667918645725,
      "loss": 2.9479,
      "step": 123195
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1557364463806152,
      "learning_rate": 0.0002673627255782676,
      "loss": 2.8658,
      "step": 123196
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1192526817321777,
      "learning_rate": 0.0002673586592980305,
      "loss": 3.021,
      "step": 123197
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7233113050460815,
      "learning_rate": 0.0002673545930238621,
      "loss": 2.9327,
      "step": 123198
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.287484884262085,
      "learning_rate": 0.00026735052675576305,
      "loss": 3.0268,
      "step": 123199
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.095170259475708,
      "learning_rate": 0.0002673464604937343,
      "loss": 2.8251,
      "step": 123200
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.6443239450454712,
      "learning_rate": 0.0002673423942377763,
      "loss": 2.9948,
      "step": 123201
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7083953619003296,
      "learning_rate": 0.00026733832798789004,
      "loss": 3.1156,
      "step": 123202
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.257781982421875,
      "learning_rate": 0.00026733426174407616,
      "loss": 2.9395,
      "step": 123203
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8739347457885742,
      "learning_rate": 0.00026733019550633544,
      "loss": 3.0926,
      "step": 123204
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1868765354156494,
      "learning_rate": 0.0002673261292746687,
      "loss": 3.0161,
      "step": 123205
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2097721099853516,
      "learning_rate": 0.00026732206304907677,
      "loss": 3.0435,
      "step": 123206
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2864556312561035,
      "learning_rate": 0.0002673179968295601,
      "loss": 3.0628,
      "step": 123207
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.032046318054199,
      "learning_rate": 0.0002673139306161197,
      "loss": 3.0326,
      "step": 123208
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.16562557220459,
      "learning_rate": 0.0002673098644087562,
      "loss": 3.0453,
      "step": 123209
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.731117010116577,
      "learning_rate": 0.00026730579820747046,
      "loss": 2.7476,
      "step": 123210
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1602728366851807,
      "learning_rate": 0.00026730173201226315,
      "loss": 2.9556,
      "step": 123211
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.00382661819458,
      "learning_rate": 0.0002672976658231352,
      "loss": 2.9449,
      "step": 123212
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.6663360595703125,
      "learning_rate": 0.00026729359964008714,
      "loss": 2.9571,
      "step": 123213
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.230548143386841,
      "learning_rate": 0.00026728953346311974,
      "loss": 3.2091,
      "step": 123214
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3832592964172363,
      "learning_rate": 0.0002672854672922339,
      "loss": 3.1912,
      "step": 123215
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9180357456207275,
      "learning_rate": 0.0002672814011274303,
      "loss": 3.0308,
      "step": 123216
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2267391681671143,
      "learning_rate": 0.0002672773349687097,
      "loss": 2.9073,
      "step": 123217
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.7542622089385986,
      "learning_rate": 0.00026727326881607297,
      "loss": 3.3951,
      "step": 123218
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2433972358703613,
      "learning_rate": 0.00026726920266952064,
      "loss": 2.8964,
      "step": 123219
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0089049339294434,
      "learning_rate": 0.0002672651365290536,
      "loss": 2.7941,
      "step": 123220
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7763482332229614,
      "learning_rate": 0.0002672610703946726,
      "loss": 2.9904,
      "step": 123221
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.8609440326690674,
      "learning_rate": 0.00026725700426637834,
      "loss": 2.896,
      "step": 123222
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.386143207550049,
      "learning_rate": 0.00026725293814417166,
      "loss": 2.8358,
      "step": 123223
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8279895782470703,
      "learning_rate": 0.0002672488720280534,
      "loss": 2.9803,
      "step": 123224
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.0784108638763428,
      "learning_rate": 0.000267244805918024,
      "loss": 2.9168,
      "step": 123225
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2889392375946045,
      "learning_rate": 0.00026724073981408453,
      "loss": 3.0803,
      "step": 123226
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.4337830543518066,
      "learning_rate": 0.00026723667371623554,
      "loss": 2.751,
      "step": 123227
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.097799062728882,
      "learning_rate": 0.00026723260762447787,
      "loss": 2.9426,
      "step": 123228
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3522207736968994,
      "learning_rate": 0.0002672285415388123,
      "loss": 2.9733,
      "step": 123229
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.1785085201263428,
      "learning_rate": 0.0002672244754592397,
      "loss": 2.9864,
      "step": 123230
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9365228414535522,
      "learning_rate": 0.00026722040938576057,
      "loss": 2.7936,
      "step": 123231
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.247044563293457,
      "learning_rate": 0.0002672163433183757,
      "loss": 2.9821,
      "step": 123232
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.204890251159668,
      "learning_rate": 0.000267212277257086,
      "loss": 3.0962,
      "step": 123233
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.231654167175293,
      "learning_rate": 0.00026720821120189214,
      "loss": 2.889,
      "step": 123234
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.016486883163452,
      "learning_rate": 0.00026720414515279495,
      "loss": 2.8609,
      "step": 123235
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.5433385372161865,
      "learning_rate": 0.00026720007910979515,
      "loss": 2.8769,
      "step": 123236
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8162472248077393,
      "learning_rate": 0.00026719601307289344,
      "loss": 2.9583,
      "step": 123237
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.7905726432800293,
      "learning_rate": 0.0002671919470420906,
      "loss": 3.1794,
      "step": 123238
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3003687858581543,
      "learning_rate": 0.00026718788101738736,
      "loss": 2.9402,
      "step": 123239
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.779507637023926,
      "learning_rate": 0.0002671838149987845,
      "loss": 2.9465,
      "step": 123240
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.1078102588653564,
      "learning_rate": 0.0002671797489862828,
      "loss": 2.8019,
      "step": 123241
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8573623895645142,
      "learning_rate": 0.000267175682979883,
      "loss": 2.8983,
      "step": 123242
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.2526872158050537,
      "learning_rate": 0.00026717161697958604,
      "loss": 2.9578,
      "step": 123243
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7947059869766235,
      "learning_rate": 0.00026716755098539236,
      "loss": 2.9348,
      "step": 123244
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.865493893623352,
      "learning_rate": 0.0002671634849973028,
      "loss": 2.9843,
      "step": 123245
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.225895643234253,
      "learning_rate": 0.0002671594190153182,
      "loss": 3.0532,
      "step": 123246
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.9215612411499023,
      "learning_rate": 0.0002671553530394393,
      "loss": 2.9468,
      "step": 123247
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9973883628845215,
      "learning_rate": 0.0002671512870696668,
      "loss": 2.8498,
      "step": 123248
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.264965057373047,
      "learning_rate": 0.00026714722110600164,
      "loss": 2.7915,
      "step": 123249
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7088459730148315,
      "learning_rate": 0.0002671431551484443,
      "loss": 3.0575,
      "step": 123250
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8453642129898071,
      "learning_rate": 0.00026713908919699566,
      "loss": 3.0279,
      "step": 123251
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.6789801120758057,
      "learning_rate": 0.0002671350232516565,
      "loss": 2.9939,
      "step": 123252
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8570846319198608,
      "learning_rate": 0.00026713095731242754,
      "loss": 2.9323,
      "step": 123253
    },
    {
      "epoch": 1.6,
      "grad_norm": 3.555898666381836,
      "learning_rate": 0.0002671268913793096,
      "loss": 3.0267,
      "step": 123254
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.041259765625,
      "learning_rate": 0.0002671228254523034,
      "loss": 3.1701,
      "step": 123255
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.390817642211914,
      "learning_rate": 0.00026711875953140966,
      "loss": 2.6403,
      "step": 123256
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.4964165687561035,
      "learning_rate": 0.0002671146936166292,
      "loss": 3.0332,
      "step": 123257
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.8708646297454834,
      "learning_rate": 0.0002671106277079627,
      "loss": 2.7992,
      "step": 123258
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.046790599822998,
      "learning_rate": 0.0002671065618054109,
      "loss": 3.1244,
      "step": 123259
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.3791685104370117,
      "learning_rate": 0.0002671024959089747,
      "loss": 3.1321,
      "step": 123260
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.6871414184570312,
      "learning_rate": 0.00026709843001865475,
      "loss": 2.9466,
      "step": 123261
    },
    {
      "epoch": 1.6,
      "grad_norm": 2.163376569747925,
      "learning_rate": 0.0002670943641344518,
      "loss": 2.9927,
      "step": 123262
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9176063537597656,
      "learning_rate": 0.0002670902982563667,
      "loss": 2.94,
      "step": 123263
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.9975848197937012,
      "learning_rate": 0.0002670862323844001,
      "loss": 3.1221,
      "step": 123264
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.7731521129608154,
      "learning_rate": 0.0002670821665185527,
      "loss": 2.9689,
      "step": 123265
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.184617519378662,
      "learning_rate": 0.0002670781006588254,
      "loss": 3.0224,
      "step": 123266
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3319244384765625,
      "learning_rate": 0.000267074034805219,
      "loss": 3.2392,
      "step": 123267
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9663610458374023,
      "learning_rate": 0.000267069968957734,
      "loss": 2.5834,
      "step": 123268
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5015101432800293,
      "learning_rate": 0.00026706590311637143,
      "loss": 2.9611,
      "step": 123269
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0151121616363525,
      "learning_rate": 0.0002670618372811319,
      "loss": 3.1359,
      "step": 123270
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.752240538597107,
      "learning_rate": 0.0002670577714520162,
      "loss": 2.8139,
      "step": 123271
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.10133695602417,
      "learning_rate": 0.00026705370562902505,
      "loss": 3.1357,
      "step": 123272
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9879249334335327,
      "learning_rate": 0.0002670496398121593,
      "loss": 3.1215,
      "step": 123273
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5068368911743164,
      "learning_rate": 0.0002670455740014196,
      "loss": 2.9664,
      "step": 123274
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.421983242034912,
      "learning_rate": 0.0002670415081968067,
      "loss": 2.9234,
      "step": 123275
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0295042991638184,
      "learning_rate": 0.00026703744239832145,
      "loss": 3.1713,
      "step": 123276
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1188018321990967,
      "learning_rate": 0.0002670333766059646,
      "loss": 3.0396,
      "step": 123277
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1552164554595947,
      "learning_rate": 0.00026702931081973687,
      "loss": 3.1057,
      "step": 123278
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.520730972290039,
      "learning_rate": 0.000267025245039639,
      "loss": 2.7391,
      "step": 123279
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.096902847290039,
      "learning_rate": 0.00026702117926567174,
      "loss": 3.0171,
      "step": 123280
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8879798650741577,
      "learning_rate": 0.00026701711349783584,
      "loss": 3.0936,
      "step": 123281
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8457707166671753,
      "learning_rate": 0.0002670130477361321,
      "loss": 3.1531,
      "step": 123282
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.4654505252838135,
      "learning_rate": 0.0002670089819805612,
      "loss": 3.0178,
      "step": 123283
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.6201298236846924,
      "learning_rate": 0.00026700491623112406,
      "loss": 3.1135,
      "step": 123284
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8209537267684937,
      "learning_rate": 0.0002670008504878214,
      "loss": 2.9736,
      "step": 123285
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.002992868423462,
      "learning_rate": 0.00026699678475065376,
      "loss": 3.0465,
      "step": 123286
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.6920971870422363,
      "learning_rate": 0.000266992719019622,
      "loss": 2.8641,
      "step": 123287
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7601245641708374,
      "learning_rate": 0.00026698865329472703,
      "loss": 2.8919,
      "step": 123288
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1510908603668213,
      "learning_rate": 0.0002669845875759694,
      "loss": 2.8736,
      "step": 123289
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.102834463119507,
      "learning_rate": 0.00026698052186335,
      "loss": 3.0844,
      "step": 123290
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1206953525543213,
      "learning_rate": 0.00026697645615686964,
      "loss": 3.1746,
      "step": 123291
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.881768822669983,
      "learning_rate": 0.0002669723904565289,
      "loss": 3.1244,
      "step": 123292
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1774024963378906,
      "learning_rate": 0.00026696832476232854,
      "loss": 2.8819,
      "step": 123293
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.75319242477417,
      "learning_rate": 0.0002669642590742695,
      "loss": 3.2146,
      "step": 123294
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.060659646987915,
      "learning_rate": 0.0002669601933923524,
      "loss": 3.1991,
      "step": 123295
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.124396562576294,
      "learning_rate": 0.0002669561277165779,
      "loss": 3.1218,
      "step": 123296
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.19984769821167,
      "learning_rate": 0.00026695206204694716,
      "loss": 3.1306,
      "step": 123297
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5968801975250244,
      "learning_rate": 0.00026694799638346046,
      "loss": 3.0111,
      "step": 123298
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.8554086685180664,
      "learning_rate": 0.00026694393072611874,
      "loss": 3.1105,
      "step": 123299
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.145972728729248,
      "learning_rate": 0.00026693986507492283,
      "loss": 3.1117,
      "step": 123300
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.81129789352417,
      "learning_rate": 0.00026693579942987336,
      "loss": 3.0462,
      "step": 123301
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.771445870399475,
      "learning_rate": 0.00026693173379097116,
      "loss": 2.9763,
      "step": 123302
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.281837224960327,
      "learning_rate": 0.0002669276681582171,
      "loss": 2.8522,
      "step": 123303
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0267133712768555,
      "learning_rate": 0.0002669236025316117,
      "loss": 2.9843,
      "step": 123304
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3039259910583496,
      "learning_rate": 0.00026691953691115584,
      "loss": 2.7161,
      "step": 123305
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9848356246948242,
      "learning_rate": 0.0002669154712968503,
      "loss": 3.1961,
      "step": 123306
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1374075412750244,
      "learning_rate": 0.0002669114056886957,
      "loss": 2.9912,
      "step": 123307
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1256895065307617,
      "learning_rate": 0.00026690734008669295,
      "loss": 2.8896,
      "step": 123308
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8640295267105103,
      "learning_rate": 0.0002669032744908427,
      "loss": 3.0646,
      "step": 123309
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3675363063812256,
      "learning_rate": 0.0002668992089011459,
      "loss": 2.7615,
      "step": 123310
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3665969371795654,
      "learning_rate": 0.00026689514331760313,
      "loss": 2.9968,
      "step": 123311
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3841440677642822,
      "learning_rate": 0.00026689107774021504,
      "loss": 3.0772,
      "step": 123312
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1973674297332764,
      "learning_rate": 0.0002668870121689826,
      "loss": 3.2099,
      "step": 123313
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5710432529449463,
      "learning_rate": 0.0002668829466039065,
      "loss": 2.9539,
      "step": 123314
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.94679594039917,
      "learning_rate": 0.00026687888104498747,
      "loss": 2.8299,
      "step": 123315
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.6387202739715576,
      "learning_rate": 0.0002668748154922264,
      "loss": 2.715,
      "step": 123316
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7828700542449951,
      "learning_rate": 0.0002668707499456238,
      "loss": 3.1108,
      "step": 123317
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.004387378692627,
      "learning_rate": 0.0002668666844051805,
      "loss": 2.9849,
      "step": 123318
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.136503219604492,
      "learning_rate": 0.0002668626188708974,
      "loss": 2.8868,
      "step": 123319
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.4724390506744385,
      "learning_rate": 0.0002668585533427751,
      "loss": 2.8521,
      "step": 123320
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3742642402648926,
      "learning_rate": 0.00026685448782081447,
      "loss": 2.8913,
      "step": 123321
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.865677833557129,
      "learning_rate": 0.0002668504223050163,
      "loss": 2.9585,
      "step": 123322
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.032452344894409,
      "learning_rate": 0.00026684635679538116,
      "loss": 3.0635,
      "step": 123323
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.045849323272705,
      "learning_rate": 0.00026684229129190996,
      "loss": 2.9201,
      "step": 123324
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.6319377422332764,
      "learning_rate": 0.0002668382257946033,
      "loss": 2.8298,
      "step": 123325
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.193913221359253,
      "learning_rate": 0.0002668341603034621,
      "loss": 3.0392,
      "step": 123326
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8826161623001099,
      "learning_rate": 0.00026683009481848706,
      "loss": 2.8282,
      "step": 123327
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3713040351867676,
      "learning_rate": 0.000266826029339679,
      "loss": 3.0194,
      "step": 123328
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.40990948677063,
      "learning_rate": 0.0002668219638670386,
      "loss": 2.7584,
      "step": 123329
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.983621597290039,
      "learning_rate": 0.0002668178984005665,
      "loss": 3.0898,
      "step": 123330
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.6022437810897827,
      "learning_rate": 0.0002668138329402636,
      "loss": 3.0243,
      "step": 123331
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.169212818145752,
      "learning_rate": 0.00026680976748613065,
      "loss": 3.038,
      "step": 123332
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.3994827270507812,
      "learning_rate": 0.00026680570203816843,
      "loss": 2.7927,
      "step": 123333
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.0619351863861084,
      "learning_rate": 0.00026680163659637777,
      "loss": 2.9568,
      "step": 123334
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8763991594314575,
      "learning_rate": 0.0002667975711607592,
      "loss": 2.974,
      "step": 123335
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.063856840133667,
      "learning_rate": 0.0002667935057313135,
      "loss": 3.0199,
      "step": 123336
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.297443151473999,
      "learning_rate": 0.00026678944030804157,
      "loss": 3.2061,
      "step": 123337
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8292312622070312,
      "learning_rate": 0.00026678537489094415,
      "loss": 3.1761,
      "step": 123338
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1837961673736572,
      "learning_rate": 0.0002667813094800219,
      "loss": 3.1564,
      "step": 123339
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0404508113861084,
      "learning_rate": 0.0002667772440752757,
      "loss": 2.9635,
      "step": 123340
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9571306705474854,
      "learning_rate": 0.00026677317867670623,
      "loss": 3.026,
      "step": 123341
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9234676361083984,
      "learning_rate": 0.00026676911328431425,
      "loss": 2.9847,
      "step": 123342
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9721875190734863,
      "learning_rate": 0.00026676504789810044,
      "loss": 2.9031,
      "step": 123343
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.370866298675537,
      "learning_rate": 0.0002667609825180657,
      "loss": 2.9709,
      "step": 123344
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.044957399368286,
      "learning_rate": 0.00026675691714421067,
      "loss": 3.0094,
      "step": 123345
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.065138339996338,
      "learning_rate": 0.0002667528517765363,
      "loss": 3.1447,
      "step": 123346
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7032139301300049,
      "learning_rate": 0.00026674878641504303,
      "loss": 3.0869,
      "step": 123347
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.351781129837036,
      "learning_rate": 0.00026674472105973185,
      "loss": 3.1617,
      "step": 123348
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.0982158184051514,
      "learning_rate": 0.0002667406557106035,
      "loss": 3.0184,
      "step": 123349
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.513883113861084,
      "learning_rate": 0.0002667365903676587,
      "loss": 2.8339,
      "step": 123350
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8110747337341309,
      "learning_rate": 0.0002667325250308981,
      "loss": 3.0321,
      "step": 123351
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.112105131149292,
      "learning_rate": 0.00026672845970032266,
      "loss": 2.9164,
      "step": 123352
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.4288668632507324,
      "learning_rate": 0.00026672439437593293,
      "loss": 3.1145,
      "step": 123353
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.949692726135254,
      "learning_rate": 0.00026672032905772976,
      "loss": 3.0012,
      "step": 123354
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.107865571975708,
      "learning_rate": 0.000266716263745714,
      "loss": 3.1178,
      "step": 123355
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.056119918823242,
      "learning_rate": 0.00026671219843988627,
      "loss": 2.7065,
      "step": 123356
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.151862859725952,
      "learning_rate": 0.00026670813314024736,
      "loss": 3.2089,
      "step": 123357
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.8900527954101562,
      "learning_rate": 0.0002667040678467981,
      "loss": 3.0457,
      "step": 123358
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2047886848449707,
      "learning_rate": 0.0002667000025595391,
      "loss": 3.1944,
      "step": 123359
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.015258312225342,
      "learning_rate": 0.0002666959372784712,
      "loss": 3.0133,
      "step": 123360
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0548388957977295,
      "learning_rate": 0.00026669187200359514,
      "loss": 3.195,
      "step": 123361
    },
    {
      "epoch": 1.61,
      "grad_norm": 4.619134426116943,
      "learning_rate": 0.0002666878067349117,
      "loss": 2.8136,
      "step": 123362
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.874570846557617,
      "learning_rate": 0.00026668374147242175,
      "loss": 3.0844,
      "step": 123363
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.448507070541382,
      "learning_rate": 0.0002666796762161258,
      "loss": 3.0255,
      "step": 123364
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8973532915115356,
      "learning_rate": 0.00026667561096602475,
      "loss": 2.8915,
      "step": 123365
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.628804922103882,
      "learning_rate": 0.00026667154572211936,
      "loss": 3.0815,
      "step": 123366
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.7784814834594727,
      "learning_rate": 0.0002666674804844103,
      "loss": 3.096,
      "step": 123367
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.6855387687683105,
      "learning_rate": 0.00026666341525289836,
      "loss": 2.7041,
      "step": 123368
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.247530460357666,
      "learning_rate": 0.0002666593500275844,
      "loss": 2.9389,
      "step": 123369
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.9886608123779297,
      "learning_rate": 0.00026665528480846915,
      "loss": 2.8157,
      "step": 123370
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.4335861206054688,
      "learning_rate": 0.00026665121959555324,
      "loss": 3.1555,
      "step": 123371
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.806483268737793,
      "learning_rate": 0.0002666471543888375,
      "loss": 2.8995,
      "step": 123372
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.5898818969726562,
      "learning_rate": 0.00026664308918832264,
      "loss": 2.9144,
      "step": 123373
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2833948135375977,
      "learning_rate": 0.00026663902399400946,
      "loss": 2.8932,
      "step": 123374
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.3556342124938965,
      "learning_rate": 0.00026663495880589874,
      "loss": 3.2152,
      "step": 123375
    },
    {
      "epoch": 1.61,
      "grad_norm": 4.7900214195251465,
      "learning_rate": 0.00026663089362399117,
      "loss": 3.0537,
      "step": 123376
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.2117888927459717,
      "learning_rate": 0.00026662682844828773,
      "loss": 2.9889,
      "step": 123377
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.178618907928467,
      "learning_rate": 0.00026662276327878886,
      "loss": 3.1251,
      "step": 123378
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9480564594268799,
      "learning_rate": 0.0002666186981154954,
      "loss": 3.0687,
      "step": 123379
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.188636302947998,
      "learning_rate": 0.0002666146329584082,
      "loss": 2.8782,
      "step": 123380
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.8517212867736816,
      "learning_rate": 0.00026661056780752796,
      "loss": 2.9535,
      "step": 123381
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.147890090942383,
      "learning_rate": 0.0002666065026628555,
      "loss": 3.0201,
      "step": 123382
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2338619232177734,
      "learning_rate": 0.00026660243752439156,
      "loss": 3.1964,
      "step": 123383
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9355149269104004,
      "learning_rate": 0.00026659837239213676,
      "loss": 2.892,
      "step": 123384
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1920855045318604,
      "learning_rate": 0.00026659430726609195,
      "loss": 2.8571,
      "step": 123385
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.8212454319000244,
      "learning_rate": 0.0002665902421462579,
      "loss": 2.9394,
      "step": 123386
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0469682216644287,
      "learning_rate": 0.0002665861770326353,
      "loss": 2.869,
      "step": 123387
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.6764929294586182,
      "learning_rate": 0.000266582111925225,
      "loss": 2.9152,
      "step": 123388
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.886282205581665,
      "learning_rate": 0.0002665780468240279,
      "loss": 3.0177,
      "step": 123389
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.16182804107666,
      "learning_rate": 0.0002665739817290444,
      "loss": 3.156,
      "step": 123390
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.6491103172302246,
      "learning_rate": 0.00026656991664027543,
      "loss": 2.8723,
      "step": 123391
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0652575492858887,
      "learning_rate": 0.0002665658515577217,
      "loss": 3.174,
      "step": 123392
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9679365158081055,
      "learning_rate": 0.0002665617864813841,
      "loss": 2.9573,
      "step": 123393
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8382527828216553,
      "learning_rate": 0.0002665577214112632,
      "loss": 2.8941,
      "step": 123394
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9409937858581543,
      "learning_rate": 0.00026655365634736003,
      "loss": 2.8686,
      "step": 123395
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7805806398391724,
      "learning_rate": 0.00026654959128967503,
      "loss": 2.9799,
      "step": 123396
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0638811588287354,
      "learning_rate": 0.0002665455262382091,
      "loss": 3.151,
      "step": 123397
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1314845085144043,
      "learning_rate": 0.00026654146119296296,
      "loss": 2.9797,
      "step": 123398
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.778835415840149,
      "learning_rate": 0.0002665373961539374,
      "loss": 3.041,
      "step": 123399
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9212371110916138,
      "learning_rate": 0.0002665333311211332,
      "loss": 2.9967,
      "step": 123400
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1408848762512207,
      "learning_rate": 0.00026652926609455123,
      "loss": 3.2135,
      "step": 123401
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8679356575012207,
      "learning_rate": 0.00026652520107419195,
      "loss": 3.01,
      "step": 123402
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1705427169799805,
      "learning_rate": 0.0002665211360600562,
      "loss": 2.8809,
      "step": 123403
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.571010112762451,
      "learning_rate": 0.0002665170710521449,
      "loss": 2.8688,
      "step": 123404
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.027756452560425,
      "learning_rate": 0.00026651300605045865,
      "loss": 2.936,
      "step": 123405
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.45346999168396,
      "learning_rate": 0.0002665089410549983,
      "loss": 3.0386,
      "step": 123406
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9891130924224854,
      "learning_rate": 0.00026650487606576467,
      "loss": 3.0208,
      "step": 123407
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9029347896575928,
      "learning_rate": 0.0002665008110827583,
      "loss": 2.9406,
      "step": 123408
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7010172605514526,
      "learning_rate": 0.00026649674610598,
      "loss": 2.8508,
      "step": 123409
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.900578737258911,
      "learning_rate": 0.00026649268113543073,
      "loss": 2.9534,
      "step": 123410
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8521533012390137,
      "learning_rate": 0.000266488616171111,
      "loss": 3.0166,
      "step": 123411
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2834885120391846,
      "learning_rate": 0.00026648455121302167,
      "loss": 2.9205,
      "step": 123412
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.549330711364746,
      "learning_rate": 0.0002664804862611637,
      "loss": 3.2883,
      "step": 123413
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.7788443565368652,
      "learning_rate": 0.0002664764213155374,
      "loss": 3.0803,
      "step": 123414
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.88810133934021,
      "learning_rate": 0.00026647235637614384,
      "loss": 2.7363,
      "step": 123415
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.830128788948059,
      "learning_rate": 0.0002664682914429837,
      "loss": 3.0968,
      "step": 123416
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8356006145477295,
      "learning_rate": 0.0002664642265160577,
      "loss": 3.0311,
      "step": 123417
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.794860601425171,
      "learning_rate": 0.00026646016159536664,
      "loss": 2.8916,
      "step": 123418
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3886783123016357,
      "learning_rate": 0.0002664560966809114,
      "loss": 2.9857,
      "step": 123419
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0728015899658203,
      "learning_rate": 0.00026645203177269247,
      "loss": 3.1956,
      "step": 123420
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7801830768585205,
      "learning_rate": 0.0002664479668707108,
      "loss": 2.9,
      "step": 123421
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1507973670959473,
      "learning_rate": 0.00026644390197496704,
      "loss": 3.036,
      "step": 123422
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.637524127960205,
      "learning_rate": 0.00026643983708546197,
      "loss": 2.9166,
      "step": 123423
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.8844799995422363,
      "learning_rate": 0.0002664357722021964,
      "loss": 3.1423,
      "step": 123424
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.001918315887451,
      "learning_rate": 0.0002664317073251711,
      "loss": 2.7382,
      "step": 123425
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0420002937316895,
      "learning_rate": 0.0002664276424543868,
      "loss": 3.0808,
      "step": 123426
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.7055089473724365,
      "learning_rate": 0.0002664235775898442,
      "loss": 3.0213,
      "step": 123427
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.0941245555877686,
      "learning_rate": 0.000266419512731544,
      "loss": 2.9982,
      "step": 123428
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.115224838256836,
      "learning_rate": 0.0002664154478794871,
      "loss": 2.9908,
      "step": 123429
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1270980834960938,
      "learning_rate": 0.0002664113830336742,
      "loss": 3.1171,
      "step": 123430
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3361518383026123,
      "learning_rate": 0.0002664073181941062,
      "loss": 2.9563,
      "step": 123431
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.546445608139038,
      "learning_rate": 0.00026640325336078354,
      "loss": 2.9754,
      "step": 123432
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.0246710777282715,
      "learning_rate": 0.00026639918853370726,
      "loss": 2.7815,
      "step": 123433
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.885330080986023,
      "learning_rate": 0.0002663951237128779,
      "loss": 3.0724,
      "step": 123434
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.244563579559326,
      "learning_rate": 0.0002663910588982964,
      "loss": 2.8877,
      "step": 123435
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.19224214553833,
      "learning_rate": 0.0002663869940899633,
      "loss": 3.0158,
      "step": 123436
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8772790431976318,
      "learning_rate": 0.0002663829292878797,
      "loss": 2.9885,
      "step": 123437
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7316261529922485,
      "learning_rate": 0.00026637886449204604,
      "loss": 3.1676,
      "step": 123438
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8556410074234009,
      "learning_rate": 0.00026637479970246315,
      "loss": 3.0101,
      "step": 123439
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8528388738632202,
      "learning_rate": 0.00026637073491913183,
      "loss": 3.046,
      "step": 123440
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.787984848022461,
      "learning_rate": 0.0002663666701420529,
      "loss": 3.0557,
      "step": 123441
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.186992645263672,
      "learning_rate": 0.00026636260537122697,
      "loss": 2.6448,
      "step": 123442
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.294999361038208,
      "learning_rate": 0.00026635854060665483,
      "loss": 2.8584,
      "step": 123443
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.601609706878662,
      "learning_rate": 0.0002663544758483374,
      "loss": 3.2861,
      "step": 123444
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9299207925796509,
      "learning_rate": 0.00026635041109627524,
      "loss": 2.9968,
      "step": 123445
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1819396018981934,
      "learning_rate": 0.0002663463463504691,
      "loss": 2.644,
      "step": 123446
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8849048614501953,
      "learning_rate": 0.0002663422816109199,
      "loss": 3.0175,
      "step": 123447
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.992415428161621,
      "learning_rate": 0.00026633821687762835,
      "loss": 2.872,
      "step": 123448
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.26431941986084,
      "learning_rate": 0.0002663341521505951,
      "loss": 2.9745,
      "step": 123449
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1086080074310303,
      "learning_rate": 0.000266330087429821,
      "loss": 2.9472,
      "step": 123450
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.688415288925171,
      "learning_rate": 0.0002663260227153067,
      "loss": 3.0507,
      "step": 123451
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.032557249069214,
      "learning_rate": 0.00026632195800705305,
      "loss": 2.6848,
      "step": 123452
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0904226303100586,
      "learning_rate": 0.0002663178933050608,
      "loss": 2.9995,
      "step": 123453
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.736698031425476,
      "learning_rate": 0.0002663138286093307,
      "loss": 2.9629,
      "step": 123454
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1404452323913574,
      "learning_rate": 0.00026630976391986346,
      "loss": 3.1519,
      "step": 123455
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.013475179672241,
      "learning_rate": 0.00026630569923665994,
      "loss": 3.0235,
      "step": 123456
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.9238500595092773,
      "learning_rate": 0.0002663016345597208,
      "loss": 3.1842,
      "step": 123457
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.4138407707214355,
      "learning_rate": 0.00026629756988904674,
      "loss": 2.745,
      "step": 123458
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9520069360733032,
      "learning_rate": 0.0002662935052246387,
      "loss": 2.9961,
      "step": 123459
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.030266761779785,
      "learning_rate": 0.00026628944056649725,
      "loss": 2.9487,
      "step": 123460
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1861257553100586,
      "learning_rate": 0.00026628537591462327,
      "loss": 2.8623,
      "step": 123461
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0457561016082764,
      "learning_rate": 0.00026628131126901755,
      "loss": 2.9526,
      "step": 123462
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9224324226379395,
      "learning_rate": 0.0002662772466296807,
      "loss": 3.0768,
      "step": 123463
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.596430778503418,
      "learning_rate": 0.0002662731819966135,
      "loss": 2.928,
      "step": 123464
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.527496576309204,
      "learning_rate": 0.0002662691173698168,
      "loss": 2.9529,
      "step": 123465
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.332953929901123,
      "learning_rate": 0.0002662650527492913,
      "loss": 2.8977,
      "step": 123466
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7528947591781616,
      "learning_rate": 0.00026626098813503776,
      "loss": 3.0165,
      "step": 123467
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.6225041151046753,
      "learning_rate": 0.00026625692352705705,
      "loss": 3.0309,
      "step": 123468
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.089169502258301,
      "learning_rate": 0.0002662528589253497,
      "loss": 3.222,
      "step": 123469
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.074070453643799,
      "learning_rate": 0.0002662487943299166,
      "loss": 2.8512,
      "step": 123470
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.215181827545166,
      "learning_rate": 0.00026624472974075844,
      "loss": 3.0859,
      "step": 123471
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.022841215133667,
      "learning_rate": 0.00026624066515787604,
      "loss": 3.097,
      "step": 123472
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.381629705429077,
      "learning_rate": 0.00026623660058127014,
      "loss": 2.7292,
      "step": 123473
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8312432765960693,
      "learning_rate": 0.00026623253601094167,
      "loss": 2.9019,
      "step": 123474
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.868685245513916,
      "learning_rate": 0.000266228471446891,
      "loss": 3.0043,
      "step": 123475
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7972910404205322,
      "learning_rate": 0.00026622440688911915,
      "loss": 2.8944,
      "step": 123476
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9517273902893066,
      "learning_rate": 0.0002662203423376268,
      "loss": 2.8249,
      "step": 123477
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.7211568355560303,
      "learning_rate": 0.00026621627779241467,
      "loss": 2.9167,
      "step": 123478
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0185422897338867,
      "learning_rate": 0.00026621221325348366,
      "loss": 2.8095,
      "step": 123479
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9690462350845337,
      "learning_rate": 0.0002662081487208345,
      "loss": 2.757,
      "step": 123480
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9720463752746582,
      "learning_rate": 0.00026620408419446783,
      "loss": 2.8727,
      "step": 123481
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.357893466949463,
      "learning_rate": 0.0002662000196743844,
      "loss": 3.1249,
      "step": 123482
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9528011083602905,
      "learning_rate": 0.000266195955160585,
      "loss": 3.0724,
      "step": 123483
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8180568218231201,
      "learning_rate": 0.0002661918906530705,
      "loss": 2.9405,
      "step": 123484
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.971921682357788,
      "learning_rate": 0.00026618782615184147,
      "loss": 2.7613,
      "step": 123485
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.6039528846740723,
      "learning_rate": 0.000266183761656899,
      "loss": 3.0666,
      "step": 123486
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.317891836166382,
      "learning_rate": 0.0002661796971682434,
      "loss": 2.9995,
      "step": 123487
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7274223566055298,
      "learning_rate": 0.00026617563268587565,
      "loss": 2.8197,
      "step": 123488
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1162374019622803,
      "learning_rate": 0.0002661715682097965,
      "loss": 3.0928,
      "step": 123489
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.2796645164489746,
      "learning_rate": 0.00026616750374000667,
      "loss": 2.9611,
      "step": 123490
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9580883979797363,
      "learning_rate": 0.0002661634392765069,
      "loss": 3.0005,
      "step": 123491
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.427192211151123,
      "learning_rate": 0.0002661593748192982,
      "loss": 3.119,
      "step": 123492
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9955817461013794,
      "learning_rate": 0.0002661553103683809,
      "loss": 3.0128,
      "step": 123493
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.893173098564148,
      "learning_rate": 0.00026615124592375603,
      "loss": 3.0028,
      "step": 123494
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0590779781341553,
      "learning_rate": 0.0002661471814854243,
      "loss": 2.8773,
      "step": 123495
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.22190260887146,
      "learning_rate": 0.0002661431170533864,
      "loss": 3.1328,
      "step": 123496
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.1115429401397705,
      "learning_rate": 0.00026613905262764316,
      "loss": 2.9845,
      "step": 123497
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.219543218612671,
      "learning_rate": 0.0002661349882081954,
      "loss": 2.8297,
      "step": 123498
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8466041088104248,
      "learning_rate": 0.0002661309237950437,
      "loss": 2.935,
      "step": 123499
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.8451027870178223,
      "learning_rate": 0.00026612685938818886,
      "loss": 2.9646,
      "step": 123500
    },
    {
      "epoch": 1.61,
      "grad_norm": 4.407993316650391,
      "learning_rate": 0.0002661227949876317,
      "loss": 2.6384,
      "step": 123501
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.503302574157715,
      "learning_rate": 0.00026611873059337294,
      "loss": 3.1673,
      "step": 123502
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9266934394836426,
      "learning_rate": 0.0002661146662054134,
      "loss": 3.2295,
      "step": 123503
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.784165143966675,
      "learning_rate": 0.0002661106018237538,
      "loss": 2.8773,
      "step": 123504
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0874264240264893,
      "learning_rate": 0.0002661065374483949,
      "loss": 3.1353,
      "step": 123505
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.568652629852295,
      "learning_rate": 0.0002661024730793373,
      "loss": 2.9813,
      "step": 123506
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3745687007904053,
      "learning_rate": 0.00026609840871658194,
      "loss": 2.7587,
      "step": 123507
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.126476764678955,
      "learning_rate": 0.0002660943443601295,
      "loss": 2.8232,
      "step": 123508
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.3799870014190674,
      "learning_rate": 0.00026609028000998083,
      "loss": 3.1336,
      "step": 123509
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1784207820892334,
      "learning_rate": 0.00026608621566613655,
      "loss": 3.0362,
      "step": 123510
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.4550113677978516,
      "learning_rate": 0.00026608215132859766,
      "loss": 2.9948,
      "step": 123511
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8521573543548584,
      "learning_rate": 0.0002660780869973646,
      "loss": 2.9983,
      "step": 123512
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.2746150493621826,
      "learning_rate": 0.0002660740226724383,
      "loss": 2.8792,
      "step": 123513
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.4846508502960205,
      "learning_rate": 0.0002660699583538194,
      "loss": 3.0194,
      "step": 123514
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.6235201358795166,
      "learning_rate": 0.00026606589404150875,
      "loss": 2.9399,
      "step": 123515
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.379309892654419,
      "learning_rate": 0.0002660618297355071,
      "loss": 3.177,
      "step": 123516
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5272140502929688,
      "learning_rate": 0.0002660577654358153,
      "loss": 3.0387,
      "step": 123517
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.604816436767578,
      "learning_rate": 0.00026605370114243397,
      "loss": 2.7792,
      "step": 123518
    },
    {
      "epoch": 1.61,
      "grad_norm": 5.012637138366699,
      "learning_rate": 0.00026604963685536383,
      "loss": 2.8516,
      "step": 123519
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2324070930480957,
      "learning_rate": 0.0002660455725746057,
      "loss": 2.8392,
      "step": 123520
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8077733516693115,
      "learning_rate": 0.0002660415083001604,
      "loss": 2.9213,
      "step": 123521
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1565351486206055,
      "learning_rate": 0.0002660374440320285,
      "loss": 3.1354,
      "step": 123522
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.5303096771240234,
      "learning_rate": 0.00026603337977021104,
      "loss": 2.9915,
      "step": 123523
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.031369924545288,
      "learning_rate": 0.00026602931551470853,
      "loss": 3.0583,
      "step": 123524
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.6298413276672363,
      "learning_rate": 0.00026602525126552187,
      "loss": 2.9049,
      "step": 123525
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9744572639465332,
      "learning_rate": 0.00026602118702265167,
      "loss": 3.0364,
      "step": 123526
    },
    {
      "epoch": 1.61,
      "grad_norm": 4.105225563049316,
      "learning_rate": 0.00026601712278609885,
      "loss": 2.8673,
      "step": 123527
    },
    {
      "epoch": 1.61,
      "grad_norm": 5.084389686584473,
      "learning_rate": 0.000266013058555864,
      "loss": 2.898,
      "step": 123528
    },
    {
      "epoch": 1.61,
      "grad_norm": 4.496595859527588,
      "learning_rate": 0.00026600899433194803,
      "loss": 3.0596,
      "step": 123529
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1877996921539307,
      "learning_rate": 0.0002660049301143516,
      "loss": 2.8943,
      "step": 123530
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.552809715270996,
      "learning_rate": 0.00026600086590307555,
      "loss": 2.9061,
      "step": 123531
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.293757677078247,
      "learning_rate": 0.0002659968016981206,
      "loss": 2.8841,
      "step": 123532
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.4063498973846436,
      "learning_rate": 0.00026599273749948737,
      "loss": 2.9585,
      "step": 123533
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1813507080078125,
      "learning_rate": 0.00026598867330717677,
      "loss": 2.9579,
      "step": 123534
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.946169853210449,
      "learning_rate": 0.0002659846091211896,
      "loss": 2.8687,
      "step": 123535
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.078993320465088,
      "learning_rate": 0.00026598054494152644,
      "loss": 2.9097,
      "step": 123536
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1742773056030273,
      "learning_rate": 0.0002659764807681882,
      "loss": 2.7643,
      "step": 123537
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.058461904525757,
      "learning_rate": 0.0002659724166011755,
      "loss": 2.9489,
      "step": 123538
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3814761638641357,
      "learning_rate": 0.0002659683524404892,
      "loss": 3.1536,
      "step": 123539
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.14593243598938,
      "learning_rate": 0.0002659642882861301,
      "loss": 3.1103,
      "step": 123540
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.8279550075531006,
      "learning_rate": 0.00026596022413809885,
      "loss": 3.2021,
      "step": 123541
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7437001466751099,
      "learning_rate": 0.00026595615999639626,
      "loss": 3.1775,
      "step": 123542
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0258610248565674,
      "learning_rate": 0.00026595209586102297,
      "loss": 2.915,
      "step": 123543
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.6138885021209717,
      "learning_rate": 0.00026594803173197985,
      "loss": 2.7629,
      "step": 123544
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.8261220455169678,
      "learning_rate": 0.00026594396760926765,
      "loss": 2.5986,
      "step": 123545
    },
    {
      "epoch": 1.61,
      "grad_norm": 4.789887428283691,
      "learning_rate": 0.0002659399034928871,
      "loss": 2.9108,
      "step": 123546
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3594510555267334,
      "learning_rate": 0.0002659358393828391,
      "loss": 2.8695,
      "step": 123547
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0692005157470703,
      "learning_rate": 0.0002659317752791241,
      "loss": 3.0891,
      "step": 123548
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.112783670425415,
      "learning_rate": 0.00026592771118174306,
      "loss": 3.0114,
      "step": 123549
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.3205673694610596,
      "learning_rate": 0.0002659236470906967,
      "loss": 2.8952,
      "step": 123550
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5270800590515137,
      "learning_rate": 0.00026591958300598576,
      "loss": 2.8797,
      "step": 123551
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5204944610595703,
      "learning_rate": 0.00026591551892761104,
      "loss": 2.8415,
      "step": 123552
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0701911449432373,
      "learning_rate": 0.00026591145485557337,
      "loss": 2.8121,
      "step": 123553
    },
    {
      "epoch": 1.61,
      "grad_norm": 5.105121612548828,
      "learning_rate": 0.0002659073907898733,
      "loss": 2.8597,
      "step": 123554
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.1155757904052734,
      "learning_rate": 0.0002659033267305117,
      "loss": 2.914,
      "step": 123555
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9643360376358032,
      "learning_rate": 0.0002658992626774893,
      "loss": 2.9983,
      "step": 123556
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8040517568588257,
      "learning_rate": 0.0002658951986308069,
      "loss": 3.1263,
      "step": 123557
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.825692653656006,
      "learning_rate": 0.00026589113459046516,
      "loss": 2.8487,
      "step": 123558
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.147651433944702,
      "learning_rate": 0.00026588707055646505,
      "loss": 3.2007,
      "step": 123559
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.072486639022827,
      "learning_rate": 0.0002658830065288071,
      "loss": 3.1077,
      "step": 123560
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.724077582359314,
      "learning_rate": 0.0002658789425074921,
      "loss": 2.8812,
      "step": 123561
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.086097002029419,
      "learning_rate": 0.00026587487849252084,
      "loss": 2.8579,
      "step": 123562
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5485239028930664,
      "learning_rate": 0.0002658708144838941,
      "loss": 2.8339,
      "step": 123563
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.606628894805908,
      "learning_rate": 0.00026586675048161264,
      "loss": 2.9891,
      "step": 123564
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.076695442199707,
      "learning_rate": 0.0002658626864856773,
      "loss": 2.9456,
      "step": 123565
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.8446171283721924,
      "learning_rate": 0.0002658586224960886,
      "loss": 2.7155,
      "step": 123566
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.766385316848755,
      "learning_rate": 0.0002658545585128474,
      "loss": 2.9526,
      "step": 123567
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.005690336227417,
      "learning_rate": 0.00026585049453595454,
      "loss": 3.0239,
      "step": 123568
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.460979461669922,
      "learning_rate": 0.00026584643056541067,
      "loss": 2.849,
      "step": 123569
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.468311309814453,
      "learning_rate": 0.00026584236660121664,
      "loss": 2.8553,
      "step": 123570
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.963939905166626,
      "learning_rate": 0.00026583830264337326,
      "loss": 3.2805,
      "step": 123571
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.8525242805480957,
      "learning_rate": 0.00026583423869188104,
      "loss": 3.0315,
      "step": 123572
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9543001651763916,
      "learning_rate": 0.00026583017474674094,
      "loss": 3.073,
      "step": 123573
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9130005836486816,
      "learning_rate": 0.0002658261108079536,
      "loss": 2.9885,
      "step": 123574
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.6788647174835205,
      "learning_rate": 0.0002658220468755198,
      "loss": 3.23,
      "step": 123575
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.4320430755615234,
      "learning_rate": 0.0002658179829494404,
      "loss": 2.9925,
      "step": 123576
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.491227149963379,
      "learning_rate": 0.00026581391902971604,
      "loss": 3.0351,
      "step": 123577
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0209712982177734,
      "learning_rate": 0.0002658098551163477,
      "loss": 2.7831,
      "step": 123578
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8710925579071045,
      "learning_rate": 0.00026580579120933577,
      "loss": 2.8469,
      "step": 123579
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0656561851501465,
      "learning_rate": 0.0002658017273086812,
      "loss": 2.809,
      "step": 123580
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9057947397232056,
      "learning_rate": 0.00026579766341438475,
      "loss": 2.8683,
      "step": 123581
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3871731758117676,
      "learning_rate": 0.0002657935995264471,
      "loss": 2.9362,
      "step": 123582
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.541505813598633,
      "learning_rate": 0.00026578953564486917,
      "loss": 2.9171,
      "step": 123583
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0149312019348145,
      "learning_rate": 0.00026578547176965166,
      "loss": 3.0453,
      "step": 123584
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.350466728210449,
      "learning_rate": 0.0002657814079007952,
      "loss": 2.9504,
      "step": 123585
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.820756435394287,
      "learning_rate": 0.0002657773440383006,
      "loss": 2.9642,
      "step": 123586
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0244998931884766,
      "learning_rate": 0.0002657732801821686,
      "loss": 2.7617,
      "step": 123587
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.257075309753418,
      "learning_rate": 0.00026576921633240003,
      "loss": 2.9564,
      "step": 123588
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.286839485168457,
      "learning_rate": 0.0002657651524889956,
      "loss": 3.142,
      "step": 123589
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.4897849559783936,
      "learning_rate": 0.0002657610886519562,
      "loss": 3.1396,
      "step": 123590
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0188746452331543,
      "learning_rate": 0.0002657570248212824,
      "loss": 2.7469,
      "step": 123591
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.174560785293579,
      "learning_rate": 0.0002657529609969749,
      "loss": 2.7193,
      "step": 123592
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.244173049926758,
      "learning_rate": 0.00026574889717903463,
      "loss": 3.1389,
      "step": 123593
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7369060516357422,
      "learning_rate": 0.0002657448333674623,
      "loss": 2.8567,
      "step": 123594
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0428924560546875,
      "learning_rate": 0.00026574076956225863,
      "loss": 2.8843,
      "step": 123595
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0848042964935303,
      "learning_rate": 0.0002657367057634245,
      "loss": 3.1753,
      "step": 123596
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.402076005935669,
      "learning_rate": 0.0002657326419709605,
      "loss": 2.78,
      "step": 123597
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.086167573928833,
      "learning_rate": 0.0002657285781848674,
      "loss": 2.8133,
      "step": 123598
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.6903201341629028,
      "learning_rate": 0.000265724514405146,
      "loss": 3.0414,
      "step": 123599
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.922766923904419,
      "learning_rate": 0.0002657204506317971,
      "loss": 3.0861,
      "step": 123600
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8669941425323486,
      "learning_rate": 0.00026571638686482136,
      "loss": 3.1584,
      "step": 123601
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8511322736740112,
      "learning_rate": 0.00026571232310421974,
      "loss": 3.0505,
      "step": 123602
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8111761808395386,
      "learning_rate": 0.00026570825934999273,
      "loss": 2.9154,
      "step": 123603
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8462352752685547,
      "learning_rate": 0.0002657041956021412,
      "loss": 3.094,
      "step": 123604
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8594906330108643,
      "learning_rate": 0.00026570013186066595,
      "loss": 3.0208,
      "step": 123605
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3748323917388916,
      "learning_rate": 0.00026569606812556763,
      "loss": 2.9047,
      "step": 123606
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7246350049972534,
      "learning_rate": 0.0002656920043968471,
      "loss": 2.9783,
      "step": 123607
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2681334018707275,
      "learning_rate": 0.0002656879406745051,
      "loss": 2.9471,
      "step": 123608
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.821352243423462,
      "learning_rate": 0.0002656838769585423,
      "loss": 2.8203,
      "step": 123609
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.822301983833313,
      "learning_rate": 0.0002656798132489596,
      "loss": 3.1535,
      "step": 123610
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8403551578521729,
      "learning_rate": 0.00026567574954575765,
      "loss": 3.1804,
      "step": 123611
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7464120388031006,
      "learning_rate": 0.00026567168584893714,
      "loss": 3.0941,
      "step": 123612
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.276305913925171,
      "learning_rate": 0.00026566762215849896,
      "loss": 2.8049,
      "step": 123613
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.915115237236023,
      "learning_rate": 0.00026566355847444385,
      "loss": 2.9864,
      "step": 123614
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.772544503211975,
      "learning_rate": 0.0002656594947967725,
      "loss": 3.0033,
      "step": 123615
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.04917049407959,
      "learning_rate": 0.0002656554311254857,
      "loss": 2.8626,
      "step": 123616
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.225531816482544,
      "learning_rate": 0.00026565136746058425,
      "loss": 3.2042,
      "step": 123617
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2556281089782715,
      "learning_rate": 0.0002656473038020688,
      "loss": 3.0307,
      "step": 123618
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.111720323562622,
      "learning_rate": 0.00026564324014994017,
      "loss": 3.0001,
      "step": 123619
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.8653616905212402,
      "learning_rate": 0.00026563917650419914,
      "loss": 2.7542,
      "step": 123620
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2161920070648193,
      "learning_rate": 0.0002656351128648464,
      "loss": 3.0552,
      "step": 123621
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.835895299911499,
      "learning_rate": 0.00026563104923188274,
      "loss": 2.8666,
      "step": 123622
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7205818891525269,
      "learning_rate": 0.0002656269856053089,
      "loss": 2.8963,
      "step": 123623
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0819242000579834,
      "learning_rate": 0.00026562292198512575,
      "loss": 3.2709,
      "step": 123624
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7985389232635498,
      "learning_rate": 0.0002656188583713339,
      "loss": 2.6944,
      "step": 123625
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.008608341217041,
      "learning_rate": 0.00026561479476393414,
      "loss": 3.0068,
      "step": 123626
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9767993688583374,
      "learning_rate": 0.00026561073116292724,
      "loss": 3.1747,
      "step": 123627
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2269856929779053,
      "learning_rate": 0.00026560666756831393,
      "loss": 3.0681,
      "step": 123628
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.305938720703125,
      "learning_rate": 0.000265602603980095,
      "loss": 2.8739,
      "step": 123629
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.815260171890259,
      "learning_rate": 0.0002655985403982712,
      "loss": 3.0045,
      "step": 123630
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.451752185821533,
      "learning_rate": 0.0002655944768228432,
      "loss": 3.0362,
      "step": 123631
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9710179567337036,
      "learning_rate": 0.0002655904132538121,
      "loss": 3.0745,
      "step": 123632
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3107893466949463,
      "learning_rate": 0.00026558634969117814,
      "loss": 3.1032,
      "step": 123633
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.7980263233184814,
      "learning_rate": 0.00026558228613494245,
      "loss": 3.033,
      "step": 123634
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.321122169494629,
      "learning_rate": 0.00026557822258510555,
      "loss": 2.8455,
      "step": 123635
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.730769395828247,
      "learning_rate": 0.0002655741590416684,
      "loss": 2.9205,
      "step": 123636
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2242701053619385,
      "learning_rate": 0.00026557009550463157,
      "loss": 3.0857,
      "step": 123637
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.6895716190338135,
      "learning_rate": 0.0002655660319739961,
      "loss": 3.0774,
      "step": 123638
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.810808777809143,
      "learning_rate": 0.0002655619684497624,
      "loss": 3.2292,
      "step": 123639
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0075292587280273,
      "learning_rate": 0.0002655579049319314,
      "loss": 3.0575,
      "step": 123640
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.4051594734191895,
      "learning_rate": 0.0002655538414205038,
      "loss": 2.8352,
      "step": 123641
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.381204605102539,
      "learning_rate": 0.0002655497779154804,
      "loss": 2.8622,
      "step": 123642
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.6277177333831787,
      "learning_rate": 0.000265545714416862,
      "loss": 2.971,
      "step": 123643
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.919884443283081,
      "learning_rate": 0.0002655416509246493,
      "loss": 3.0802,
      "step": 123644
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.709364891052246,
      "learning_rate": 0.0002655375874388431,
      "loss": 2.9118,
      "step": 123645
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.363321542739868,
      "learning_rate": 0.00026553352395944406,
      "loss": 2.9664,
      "step": 123646
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0863728523254395,
      "learning_rate": 0.000265529460486453,
      "loss": 2.9114,
      "step": 123647
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.01042103767395,
      "learning_rate": 0.0002655253970198706,
      "loss": 2.9905,
      "step": 123648
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.9121694564819336,
      "learning_rate": 0.0002655213335596977,
      "loss": 3.1359,
      "step": 123649
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.123359441757202,
      "learning_rate": 0.00026551727010593507,
      "loss": 2.9495,
      "step": 123650
    },
    {
      "epoch": 1.61,
      "grad_norm": 4.246070384979248,
      "learning_rate": 0.0002655132066585835,
      "loss": 3.0693,
      "step": 123651
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5085134506225586,
      "learning_rate": 0.0002655091432176436,
      "loss": 2.9638,
      "step": 123652
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2054667472839355,
      "learning_rate": 0.0002655050797831162,
      "loss": 3.0868,
      "step": 123653
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.071638822555542,
      "learning_rate": 0.00026550101635500206,
      "loss": 3.0697,
      "step": 123654
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.524249792098999,
      "learning_rate": 0.00026549695293330187,
      "loss": 2.7076,
      "step": 123655
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.6178102493286133,
      "learning_rate": 0.0002654928895180165,
      "loss": 3.1492,
      "step": 123656
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5992486476898193,
      "learning_rate": 0.0002654888261091468,
      "loss": 3.1716,
      "step": 123657
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5441837310791016,
      "learning_rate": 0.0002654847627066932,
      "loss": 3.0889,
      "step": 123658
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.24472975730896,
      "learning_rate": 0.00026548069931065665,
      "loss": 3.2853,
      "step": 123659
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0960075855255127,
      "learning_rate": 0.0002654766359210379,
      "loss": 2.8274,
      "step": 123660
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.6473183631896973,
      "learning_rate": 0.00026547257253783766,
      "loss": 3.0811,
      "step": 123661
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9724653959274292,
      "learning_rate": 0.00026546850916105673,
      "loss": 2.9736,
      "step": 123662
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1531522274017334,
      "learning_rate": 0.00026546444579069606,
      "loss": 2.8733,
      "step": 123663
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.103727102279663,
      "learning_rate": 0.00026546038242675603,
      "loss": 2.8021,
      "step": 123664
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7662911415100098,
      "learning_rate": 0.0002654563190692375,
      "loss": 2.8392,
      "step": 123665
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5540599822998047,
      "learning_rate": 0.0002654522557181414,
      "loss": 3.1118,
      "step": 123666
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.1424238681793213,
      "learning_rate": 0.0002654481923734683,
      "loss": 2.8542,
      "step": 123667
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.6469810009002686,
      "learning_rate": 0.0002654441290352191,
      "loss": 2.9647,
      "step": 123668
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9609432220458984,
      "learning_rate": 0.00026544006570339453,
      "loss": 2.9716,
      "step": 123669
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.334639072418213,
      "learning_rate": 0.00026543600237799523,
      "loss": 2.8199,
      "step": 123670
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.4760072231292725,
      "learning_rate": 0.000265431939059022,
      "loss": 2.9173,
      "step": 123671
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5169565677642822,
      "learning_rate": 0.00026542787574647565,
      "loss": 2.8019,
      "step": 123672
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0519917011260986,
      "learning_rate": 0.00026542381244035694,
      "loss": 2.9329,
      "step": 123673
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.911489486694336,
      "learning_rate": 0.0002654197491406666,
      "loss": 3.0005,
      "step": 123674
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.329618453979492,
      "learning_rate": 0.0002654156858474054,
      "loss": 2.9038,
      "step": 123675
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.553990125656128,
      "learning_rate": 0.000265411622560574,
      "loss": 3.0921,
      "step": 123676
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0714361667633057,
      "learning_rate": 0.0002654075592801733,
      "loss": 3.1555,
      "step": 123677
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.282008409500122,
      "learning_rate": 0.00026540349600620393,
      "loss": 2.9232,
      "step": 123678
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.857086181640625,
      "learning_rate": 0.00026539943273866666,
      "loss": 2.8388,
      "step": 123679
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1932687759399414,
      "learning_rate": 0.00026539536947756234,
      "loss": 2.8235,
      "step": 123680
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3975155353546143,
      "learning_rate": 0.00026539130622289185,
      "loss": 3.1947,
      "step": 123681
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0828323364257812,
      "learning_rate": 0.0002653872429746556,
      "loss": 2.962,
      "step": 123682
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8738871812820435,
      "learning_rate": 0.0002653831797328545,
      "loss": 3.0037,
      "step": 123683
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.989790678024292,
      "learning_rate": 0.00026537911649748933,
      "loss": 2.9317,
      "step": 123684
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.053450107574463,
      "learning_rate": 0.0002653750532685609,
      "loss": 3.124,
      "step": 123685
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.554074764251709,
      "learning_rate": 0.0002653709900460698,
      "loss": 3.0455,
      "step": 123686
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7602872848510742,
      "learning_rate": 0.00026536692683001704,
      "loss": 3.0745,
      "step": 123687
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.7986361980438232,
      "learning_rate": 0.0002653628636204031,
      "loss": 2.9251,
      "step": 123688
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.7971880435943604,
      "learning_rate": 0.0002653588004172289,
      "loss": 3.0087,
      "step": 123689
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8820335865020752,
      "learning_rate": 0.0002653547372204951,
      "loss": 2.7418,
      "step": 123690
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9378896951675415,
      "learning_rate": 0.0002653506740302025,
      "loss": 2.8147,
      "step": 123691
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0701215267181396,
      "learning_rate": 0.0002653466108463519,
      "loss": 3.2349,
      "step": 123692
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9077765941619873,
      "learning_rate": 0.00026534254766894406,
      "loss": 2.9636,
      "step": 123693
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8930373191833496,
      "learning_rate": 0.0002653384844979797,
      "loss": 2.977,
      "step": 123694
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8298722505569458,
      "learning_rate": 0.0002653344213334595,
      "loss": 3.1339,
      "step": 123695
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.249255418777466,
      "learning_rate": 0.0002653303581753843,
      "loss": 3.1742,
      "step": 123696
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2437896728515625,
      "learning_rate": 0.00026532629502375486,
      "loss": 3.1609,
      "step": 123697
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.115562915802002,
      "learning_rate": 0.00026532223187857186,
      "loss": 2.9362,
      "step": 123698
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8469504117965698,
      "learning_rate": 0.00026531816873983624,
      "loss": 3.2368,
      "step": 123699
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0597782135009766,
      "learning_rate": 0.00026531410560754853,
      "loss": 2.9885,
      "step": 123700
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.610647201538086,
      "learning_rate": 0.0002653100424817096,
      "loss": 3.1985,
      "step": 123701
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3547067642211914,
      "learning_rate": 0.00026530597936232027,
      "loss": 3.1107,
      "step": 123702
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8201366662979126,
      "learning_rate": 0.00026530191624938107,
      "loss": 3.0914,
      "step": 123703
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.770514965057373,
      "learning_rate": 0.0002652978531428929,
      "loss": 3.021,
      "step": 123704
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8481409549713135,
      "learning_rate": 0.0002652937900428567,
      "loss": 3.1025,
      "step": 123705
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0290660858154297,
      "learning_rate": 0.0002652897269492729,
      "loss": 3.1637,
      "step": 123706
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.481693744659424,
      "learning_rate": 0.0002652856638621424,
      "loss": 3.0752,
      "step": 123707
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0703036785125732,
      "learning_rate": 0.00026528160078146594,
      "loss": 2.893,
      "step": 123708
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.295623302459717,
      "learning_rate": 0.00026527753770724436,
      "loss": 3.1456,
      "step": 123709
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.000666856765747,
      "learning_rate": 0.0002652734746394782,
      "loss": 3.2007,
      "step": 123710
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.706134796142578,
      "learning_rate": 0.00026526941157816846,
      "loss": 2.9399,
      "step": 123711
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.0417869091033936,
      "learning_rate": 0.00026526534852331583,
      "loss": 2.6926,
      "step": 123712
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.08868408203125,
      "learning_rate": 0.00026526128547492097,
      "loss": 2.7461,
      "step": 123713
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.025430202484131,
      "learning_rate": 0.0002652572224329847,
      "loss": 3.0011,
      "step": 123714
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3247623443603516,
      "learning_rate": 0.00026525315939750774,
      "loss": 3.0449,
      "step": 123715
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.38400936126709,
      "learning_rate": 0.00026524909636849094,
      "loss": 2.8939,
      "step": 123716
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.450073719024658,
      "learning_rate": 0.0002652450333459349,
      "loss": 3.0204,
      "step": 123717
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0716466903686523,
      "learning_rate": 0.0002652409703298406,
      "loss": 3.0389,
      "step": 123718
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9811116456985474,
      "learning_rate": 0.00026523690732020857,
      "loss": 2.9176,
      "step": 123719
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.042137384414673,
      "learning_rate": 0.00026523284431703963,
      "loss": 2.8938,
      "step": 123720
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.704464077949524,
      "learning_rate": 0.00026522878132033455,
      "loss": 2.6863,
      "step": 123721
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2393221855163574,
      "learning_rate": 0.00026522471833009413,
      "loss": 3.0921,
      "step": 123722
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1972274780273438,
      "learning_rate": 0.00026522065534631913,
      "loss": 3.0891,
      "step": 123723
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.4157874584198,
      "learning_rate": 0.0002652165923690103,
      "loss": 2.9089,
      "step": 123724
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9420922994613647,
      "learning_rate": 0.0002652125293981683,
      "loss": 3.0745,
      "step": 123725
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.232820510864258,
      "learning_rate": 0.0002652084664337939,
      "loss": 2.864,
      "step": 123726
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0978758335113525,
      "learning_rate": 0.00026520440347588796,
      "loss": 3.1758,
      "step": 123727
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.0165369510650635,
      "learning_rate": 0.00026520034052445113,
      "loss": 3.0417,
      "step": 123728
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9682092666625977,
      "learning_rate": 0.00026519627757948423,
      "loss": 2.8149,
      "step": 123729
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.001225709915161,
      "learning_rate": 0.0002651922146409881,
      "loss": 3.0082,
      "step": 123730
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.651881217956543,
      "learning_rate": 0.0002651881517089633,
      "loss": 3.0127,
      "step": 123731
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8389909267425537,
      "learning_rate": 0.00026518408878341065,
      "loss": 3.0634,
      "step": 123732
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8597242832183838,
      "learning_rate": 0.000265180025864331,
      "loss": 3.0486,
      "step": 123733
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5797653198242188,
      "learning_rate": 0.00026517596295172496,
      "loss": 3.144,
      "step": 123734
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.801750898361206,
      "learning_rate": 0.0002651719000455934,
      "loss": 2.9792,
      "step": 123735
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.14922833442688,
      "learning_rate": 0.0002651678371459372,
      "loss": 2.6685,
      "step": 123736
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3602845668792725,
      "learning_rate": 0.0002651637742527568,
      "loss": 3.1072,
      "step": 123737
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.095521926879883,
      "learning_rate": 0.0002651597113660531,
      "loss": 2.8955,
      "step": 123738
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.39105224609375,
      "learning_rate": 0.0002651556484858269,
      "loss": 2.9083,
      "step": 123739
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2460145950317383,
      "learning_rate": 0.0002651515856120789,
      "loss": 2.6537,
      "step": 123740
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5170602798461914,
      "learning_rate": 0.00026514752274480987,
      "loss": 3.0238,
      "step": 123741
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2432587146759033,
      "learning_rate": 0.0002651434598840207,
      "loss": 3.1057,
      "step": 123742
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.071536064147949,
      "learning_rate": 0.0002651393970297119,
      "loss": 3.3339,
      "step": 123743
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8866547346115112,
      "learning_rate": 0.00026513533418188437,
      "loss": 3.168,
      "step": 123744
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.760819435119629,
      "learning_rate": 0.00026513127134053884,
      "loss": 2.7376,
      "step": 123745
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.253770589828491,
      "learning_rate": 0.00026512720850567603,
      "loss": 2.9696,
      "step": 123746
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9586623907089233,
      "learning_rate": 0.00026512314567729675,
      "loss": 2.9302,
      "step": 123747
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.8186187744140625,
      "learning_rate": 0.00026511908285540187,
      "loss": 2.9762,
      "step": 123748
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9216663837432861,
      "learning_rate": 0.00026511502003999187,
      "loss": 2.9584,
      "step": 123749
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.8655269145965576,
      "learning_rate": 0.0002651109572310676,
      "loss": 3.0915,
      "step": 123750
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.0493855476379395,
      "learning_rate": 0.00026510689442862993,
      "loss": 3.0526,
      "step": 123751
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8733361959457397,
      "learning_rate": 0.0002651028316326795,
      "loss": 3.1786,
      "step": 123752
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.6885559558868408,
      "learning_rate": 0.0002650987688432172,
      "loss": 3.1044,
      "step": 123753
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8650004863739014,
      "learning_rate": 0.0002650947060602437,
      "loss": 3.0595,
      "step": 123754
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.790675401687622,
      "learning_rate": 0.0002650906432837597,
      "loss": 3.0677,
      "step": 123755
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.823472023010254,
      "learning_rate": 0.000265086580513766,
      "loss": 3.1146,
      "step": 123756
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.7224466800689697,
      "learning_rate": 0.0002650825177502634,
      "loss": 3.0535,
      "step": 123757
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.18237042427063,
      "learning_rate": 0.00026507845499325257,
      "loss": 3.2124,
      "step": 123758
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9900789260864258,
      "learning_rate": 0.0002650743922427343,
      "loss": 3.1618,
      "step": 123759
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.830093502998352,
      "learning_rate": 0.0002650703294987095,
      "loss": 3.3284,
      "step": 123760
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.8757481575012207,
      "learning_rate": 0.0002650662667611787,
      "loss": 2.8967,
      "step": 123761
    },
    {
      "epoch": 1.61,
      "grad_norm": 4.847955703735352,
      "learning_rate": 0.0002650622040301427,
      "loss": 2.7421,
      "step": 123762
    },
    {
      "epoch": 1.61,
      "grad_norm": 4.556182861328125,
      "learning_rate": 0.0002650581413056023,
      "loss": 2.9117,
      "step": 123763
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9615046977996826,
      "learning_rate": 0.00026505407858755825,
      "loss": 2.9285,
      "step": 123764
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.584745407104492,
      "learning_rate": 0.0002650500158760113,
      "loss": 3.3253,
      "step": 123765
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.2925028800964355,
      "learning_rate": 0.00026504595317096235,
      "loss": 2.9939,
      "step": 123766
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.6728131771087646,
      "learning_rate": 0.0002650418904724119,
      "loss": 3.1644,
      "step": 123767
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.990207552909851,
      "learning_rate": 0.0002650378277803608,
      "loss": 3.1138,
      "step": 123768
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.029414415359497,
      "learning_rate": 0.00026503376509480985,
      "loss": 2.9891,
      "step": 123769
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.4400808811187744,
      "learning_rate": 0.0002650297024157598,
      "loss": 2.8319,
      "step": 123770
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.62034010887146,
      "learning_rate": 0.0002650256397432113,
      "loss": 2.9419,
      "step": 123771
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0086944103240967,
      "learning_rate": 0.0002650215770771654,
      "loss": 3.0998,
      "step": 123772
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9226263761520386,
      "learning_rate": 0.0002650175144176225,
      "loss": 2.9341,
      "step": 123773
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.359027624130249,
      "learning_rate": 0.0002650134517645835,
      "loss": 3.0149,
      "step": 123774
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.0081210136413574,
      "learning_rate": 0.0002650093891180492,
      "loss": 2.6592,
      "step": 123775
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.195436954498291,
      "learning_rate": 0.0002650053264780203,
      "loss": 2.9746,
      "step": 123776
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.041229486465454,
      "learning_rate": 0.0002650012638444975,
      "loss": 2.8944,
      "step": 123777
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3898684978485107,
      "learning_rate": 0.00026499720121748175,
      "loss": 2.7695,
      "step": 123778
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.23032283782959,
      "learning_rate": 0.0002649931385969737,
      "loss": 3.143,
      "step": 123779
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1415224075317383,
      "learning_rate": 0.000264989075982974,
      "loss": 2.7521,
      "step": 123780
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.384276866912842,
      "learning_rate": 0.00026498501337548347,
      "loss": 2.7245,
      "step": 123781
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1526286602020264,
      "learning_rate": 0.0002649809507745029,
      "loss": 3.0253,
      "step": 123782
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.1862235069274902,
      "learning_rate": 0.00026497688818003304,
      "loss": 2.831,
      "step": 123783
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.338041067123413,
      "learning_rate": 0.00026497282559207467,
      "loss": 2.9557,
      "step": 123784
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.133795976638794,
      "learning_rate": 0.00026496876301062856,
      "loss": 2.7304,
      "step": 123785
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.7049102783203125,
      "learning_rate": 0.00026496470043569536,
      "loss": 3.0374,
      "step": 123786
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.739927887916565,
      "learning_rate": 0.0002649606378672759,
      "loss": 3.1051,
      "step": 123787
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3721461296081543,
      "learning_rate": 0.00026495657530537084,
      "loss": 3.0608,
      "step": 123788
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1480817794799805,
      "learning_rate": 0.0002649525127499811,
      "loss": 2.9943,
      "step": 123789
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3114514350891113,
      "learning_rate": 0.0002649484502011073,
      "loss": 3.1534,
      "step": 123790
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1362240314483643,
      "learning_rate": 0.0002649443876587503,
      "loss": 2.9027,
      "step": 123791
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.1082210540771484,
      "learning_rate": 0.0002649403251229107,
      "loss": 2.8415,
      "step": 123792
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.174133062362671,
      "learning_rate": 0.0002649362625935894,
      "loss": 3.0304,
      "step": 123793
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.002028465270996,
      "learning_rate": 0.00026493220007078725,
      "loss": 2.8648,
      "step": 123794
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.623610496520996,
      "learning_rate": 0.0002649281375545047,
      "loss": 3.0506,
      "step": 123795
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.23109769821167,
      "learning_rate": 0.0002649240750447427,
      "loss": 3.081,
      "step": 123796
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.992838978767395,
      "learning_rate": 0.00026492001254150205,
      "loss": 2.954,
      "step": 123797
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.164320468902588,
      "learning_rate": 0.00026491595004478336,
      "loss": 2.9828,
      "step": 123798
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9210532903671265,
      "learning_rate": 0.00026491188755458744,
      "loss": 3.1213,
      "step": 123799
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9811450242996216,
      "learning_rate": 0.00026490782507091506,
      "loss": 2.8308,
      "step": 123800
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9681729078292847,
      "learning_rate": 0.0002649037625937671,
      "loss": 2.989,
      "step": 123801
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8500374555587769,
      "learning_rate": 0.00026489970012314406,
      "loss": 3.1129,
      "step": 123802
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9810576438903809,
      "learning_rate": 0.00026489563765904695,
      "loss": 3.1457,
      "step": 123803
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0125505924224854,
      "learning_rate": 0.0002648915752014763,
      "loss": 3.0048,
      "step": 123804
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2473464012145996,
      "learning_rate": 0.000264887512750433,
      "loss": 3.0125,
      "step": 123805
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9981796741485596,
      "learning_rate": 0.0002648834503059177,
      "loss": 2.9989,
      "step": 123806
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.963233470916748,
      "learning_rate": 0.00026487938786793133,
      "loss": 2.9356,
      "step": 123807
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.896847128868103,
      "learning_rate": 0.00026487532543647456,
      "loss": 2.862,
      "step": 123808
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.196732759475708,
      "learning_rate": 0.0002648712630115481,
      "loss": 2.9802,
      "step": 123809
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9297093152999878,
      "learning_rate": 0.0002648672005931527,
      "loss": 3.1991,
      "step": 123810
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.4374570846557617,
      "learning_rate": 0.00026486313818128917,
      "loss": 3.0923,
      "step": 123811
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9729037284851074,
      "learning_rate": 0.00026485907577595825,
      "loss": 2.8301,
      "step": 123812
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7881646156311035,
      "learning_rate": 0.00026485501337716064,
      "loss": 3.2645,
      "step": 123813
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.143763303756714,
      "learning_rate": 0.00026485095098489723,
      "loss": 3.1929,
      "step": 123814
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.925122857093811,
      "learning_rate": 0.00026484688859916875,
      "loss": 2.9851,
      "step": 123815
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.024291753768921,
      "learning_rate": 0.0002648428262199758,
      "loss": 2.9554,
      "step": 123816
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.6207056045532227,
      "learning_rate": 0.0002648387638473192,
      "loss": 2.7364,
      "step": 123817
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.6928479671478271,
      "learning_rate": 0.00026483470148119977,
      "loss": 3.0534,
      "step": 123818
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.6641502380371094,
      "learning_rate": 0.00026483063912161823,
      "loss": 2.9161,
      "step": 123819
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.23508358001709,
      "learning_rate": 0.0002648265767685753,
      "loss": 2.9849,
      "step": 123820
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7459300756454468,
      "learning_rate": 0.00026482251442207195,
      "loss": 2.9528,
      "step": 123821
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8001817464828491,
      "learning_rate": 0.00026481845208210856,
      "loss": 2.8454,
      "step": 123822
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2616114616394043,
      "learning_rate": 0.00026481438974868617,
      "loss": 3.0117,
      "step": 123823
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9858418703079224,
      "learning_rate": 0.0002648103274218054,
      "loss": 2.927,
      "step": 123824
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1100785732269287,
      "learning_rate": 0.00026480626510146706,
      "loss": 3.0179,
      "step": 123825
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7360550165176392,
      "learning_rate": 0.0002648022027876719,
      "loss": 2.7595,
      "step": 123826
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2590036392211914,
      "learning_rate": 0.0002647981404804208,
      "loss": 2.6516,
      "step": 123827
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.812554121017456,
      "learning_rate": 0.00026479407817971433,
      "loss": 2.8216,
      "step": 123828
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1586437225341797,
      "learning_rate": 0.0002647900158855532,
      "loss": 2.8991,
      "step": 123829
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8151382207870483,
      "learning_rate": 0.0002647859535979383,
      "loss": 2.9887,
      "step": 123830
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.6794328689575195,
      "learning_rate": 0.0002647818913168704,
      "loss": 2.9645,
      "step": 123831
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.1548633575439453,
      "learning_rate": 0.00026477782904235024,
      "loss": 3.0921,
      "step": 123832
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3449721336364746,
      "learning_rate": 0.0002647737667743786,
      "loss": 2.8302,
      "step": 123833
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.799356460571289,
      "learning_rate": 0.00026476970451295604,
      "loss": 2.9919,
      "step": 123834
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9687252044677734,
      "learning_rate": 0.0002647656422580835,
      "loss": 2.8409,
      "step": 123835
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2719337940216064,
      "learning_rate": 0.0002647615800097617,
      "loss": 3.1937,
      "step": 123836
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.889426589012146,
      "learning_rate": 0.00026475751776799136,
      "loss": 2.9106,
      "step": 123837
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9265140295028687,
      "learning_rate": 0.00026475345553277324,
      "loss": 3.1674,
      "step": 123838
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7816720008850098,
      "learning_rate": 0.0002647493933041083,
      "loss": 2.92,
      "step": 123839
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.562547445297241,
      "learning_rate": 0.000264745331081997,
      "loss": 3.0383,
      "step": 123840
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0795249938964844,
      "learning_rate": 0.00026474126886644016,
      "loss": 2.8509,
      "step": 123841
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.9912092685699463,
      "learning_rate": 0.0002647372066574386,
      "loss": 2.7958,
      "step": 123842
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1947476863861084,
      "learning_rate": 0.00026473314445499307,
      "loss": 2.9281,
      "step": 123843
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.191682815551758,
      "learning_rate": 0.00026472908225910427,
      "loss": 2.9145,
      "step": 123844
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5512585639953613,
      "learning_rate": 0.000264725020069773,
      "loss": 2.883,
      "step": 123845
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.0629892349243164,
      "learning_rate": 0.00026472095788700016,
      "loss": 2.7975,
      "step": 123846
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.8522026538848877,
      "learning_rate": 0.00026471689571078627,
      "loss": 2.9347,
      "step": 123847
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.545767068862915,
      "learning_rate": 0.0002647128335411322,
      "loss": 3.1197,
      "step": 123848
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1115517616271973,
      "learning_rate": 0.0002647087713780386,
      "loss": 2.9514,
      "step": 123849
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9547371864318848,
      "learning_rate": 0.00026470470922150633,
      "loss": 2.9131,
      "step": 123850
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1045780181884766,
      "learning_rate": 0.00026470064707153615,
      "loss": 3.156,
      "step": 123851
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.3627238273620605,
      "learning_rate": 0.00026469658492812886,
      "loss": 2.8292,
      "step": 123852
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9452685117721558,
      "learning_rate": 0.00026469252279128505,
      "loss": 3.0337,
      "step": 123853
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9311364889144897,
      "learning_rate": 0.00026468846066100555,
      "loss": 2.8792,
      "step": 123854
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2972300052642822,
      "learning_rate": 0.0002646843985372911,
      "loss": 3.0268,
      "step": 123855
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5151355266571045,
      "learning_rate": 0.00026468033642014255,
      "loss": 2.9543,
      "step": 123856
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.7293615341186523,
      "learning_rate": 0.00026467627430956057,
      "loss": 3.1138,
      "step": 123857
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.834432601928711,
      "learning_rate": 0.0002646722122055461,
      "loss": 2.907,
      "step": 123858
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5263478755950928,
      "learning_rate": 0.00026466815010809953,
      "loss": 2.9217,
      "step": 123859
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.274590253829956,
      "learning_rate": 0.0002646640880172219,
      "loss": 2.8967,
      "step": 123860
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8186755180358887,
      "learning_rate": 0.0002646600259329138,
      "loss": 2.9274,
      "step": 123861
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.500173807144165,
      "learning_rate": 0.0002646559638551761,
      "loss": 2.7568,
      "step": 123862
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9003912210464478,
      "learning_rate": 0.0002646519017840095,
      "loss": 3.0283,
      "step": 123863
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.879969835281372,
      "learning_rate": 0.000264647839719415,
      "loss": 2.8846,
      "step": 123864
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7136662006378174,
      "learning_rate": 0.0002646437776613929,
      "loss": 2.8806,
      "step": 123865
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.280977725982666,
      "learning_rate": 0.0002646397156099443,
      "loss": 2.8628,
      "step": 123866
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.7194643020629883,
      "learning_rate": 0.00026463565356506973,
      "loss": 2.7979,
      "step": 123867
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.379666328430176,
      "learning_rate": 0.0002646315915267701,
      "loss": 2.6703,
      "step": 123868
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1542861461639404,
      "learning_rate": 0.00026462752949504614,
      "loss": 2.8884,
      "step": 123869
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1626694202423096,
      "learning_rate": 0.00026462346746989864,
      "loss": 2.9061,
      "step": 123870
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.891123652458191,
      "learning_rate": 0.00026461940545132834,
      "loss": 3.0477,
      "step": 123871
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.6124038696289062,
      "learning_rate": 0.0002646153434393359,
      "loss": 2.7016,
      "step": 123872
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0249125957489014,
      "learning_rate": 0.00026461128143392206,
      "loss": 2.7789,
      "step": 123873
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.063471794128418,
      "learning_rate": 0.0002646072194350877,
      "loss": 2.9346,
      "step": 123874
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2729992866516113,
      "learning_rate": 0.00026460315744283354,
      "loss": 3.0878,
      "step": 123875
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.6133804321289062,
      "learning_rate": 0.00026459909545716037,
      "loss": 3.0893,
      "step": 123876
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.815216302871704,
      "learning_rate": 0.0002645950334780688,
      "loss": 2.8426,
      "step": 123877
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1737732887268066,
      "learning_rate": 0.0002645909715055598,
      "loss": 2.9688,
      "step": 123878
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.138002395629883,
      "learning_rate": 0.0002645869095396339,
      "loss": 2.8874,
      "step": 123879
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8299987316131592,
      "learning_rate": 0.000264582847580292,
      "loss": 3.0423,
      "step": 123880
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5826756954193115,
      "learning_rate": 0.0002645787856275348,
      "loss": 2.9864,
      "step": 123881
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7325361967086792,
      "learning_rate": 0.0002645747236813631,
      "loss": 3.293,
      "step": 123882
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9448505640029907,
      "learning_rate": 0.0002645706617417776,
      "loss": 3.0275,
      "step": 123883
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.824669361114502,
      "learning_rate": 0.0002645665998087791,
      "loss": 3.0685,
      "step": 123884
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.871240258216858,
      "learning_rate": 0.0002645625378823683,
      "loss": 2.9336,
      "step": 123885
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9195140600204468,
      "learning_rate": 0.00026455847596254604,
      "loss": 3.1812,
      "step": 123886
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.26750111579895,
      "learning_rate": 0.000264554414049313,
      "loss": 2.9995,
      "step": 123887
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.830003499984741,
      "learning_rate": 0.00026455035214267006,
      "loss": 3.191,
      "step": 123888
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.420581102371216,
      "learning_rate": 0.00026454629024261775,
      "loss": 2.9169,
      "step": 123889
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.416980504989624,
      "learning_rate": 0.000264542228349157,
      "loss": 2.9738,
      "step": 123890
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.013667106628418,
      "learning_rate": 0.0002645381664622885,
      "loss": 3.0047,
      "step": 123891
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.045233726501465,
      "learning_rate": 0.00026453410458201305,
      "loss": 2.9963,
      "step": 123892
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0602850914001465,
      "learning_rate": 0.0002645300427083314,
      "loss": 2.7836,
      "step": 123893
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0939455032348633,
      "learning_rate": 0.0002645259808412443,
      "loss": 3.1074,
      "step": 123894
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8419705629348755,
      "learning_rate": 0.00026452191898075243,
      "loss": 2.9099,
      "step": 123895
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0446627140045166,
      "learning_rate": 0.0002645178571268566,
      "loss": 3.0085,
      "step": 123896
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.6865084171295166,
      "learning_rate": 0.00026451379527955756,
      "loss": 3.1113,
      "step": 123897
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.226722478866577,
      "learning_rate": 0.00026450973343885616,
      "loss": 2.8323,
      "step": 123898
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8237913846969604,
      "learning_rate": 0.000264505671604753,
      "loss": 2.8606,
      "step": 123899
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.4289209842681885,
      "learning_rate": 0.000264501609777249,
      "loss": 2.9598,
      "step": 123900
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7194041013717651,
      "learning_rate": 0.0002644975479563447,
      "loss": 3.1974,
      "step": 123901
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8425464630126953,
      "learning_rate": 0.00026449348614204104,
      "loss": 3.0237,
      "step": 123902
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.384777784347534,
      "learning_rate": 0.00026448942433433863,
      "loss": 2.9744,
      "step": 123903
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.805227756500244,
      "learning_rate": 0.0002644853625332384,
      "loss": 2.9786,
      "step": 123904
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.805203676223755,
      "learning_rate": 0.00026448130073874094,
      "loss": 3.0254,
      "step": 123905
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.4719347953796387,
      "learning_rate": 0.0002644772389508472,
      "loss": 3.0764,
      "step": 123906
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.5463814735412598,
      "learning_rate": 0.0002644731771695577,
      "loss": 3.0535,
      "step": 123907
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2759485244750977,
      "learning_rate": 0.0002644691153948734,
      "loss": 3.1033,
      "step": 123908
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.761546015739441,
      "learning_rate": 0.00026446505362679487,
      "loss": 3.179,
      "step": 123909
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2877438068389893,
      "learning_rate": 0.00026446099186532295,
      "loss": 3.1118,
      "step": 123910
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9099996089935303,
      "learning_rate": 0.0002644569301104584,
      "loss": 2.8717,
      "step": 123911
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.07631254196167,
      "learning_rate": 0.000264452868362202,
      "loss": 2.9434,
      "step": 123912
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9827635288238525,
      "learning_rate": 0.0002644488066205546,
      "loss": 3.2254,
      "step": 123913
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5628092288970947,
      "learning_rate": 0.0002644447448855168,
      "loss": 2.9914,
      "step": 123914
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.03045392036438,
      "learning_rate": 0.0002644406831570893,
      "loss": 3.1055,
      "step": 123915
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.801622986793518,
      "learning_rate": 0.0002644366214352729,
      "loss": 3.0658,
      "step": 123916
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7994890213012695,
      "learning_rate": 0.00026443255972006846,
      "loss": 2.963,
      "step": 123917
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.851584792137146,
      "learning_rate": 0.0002644284980114767,
      "loss": 3.0555,
      "step": 123918
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8459604978561401,
      "learning_rate": 0.00026442443630949843,
      "loss": 3.0235,
      "step": 123919
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.003521203994751,
      "learning_rate": 0.0002644203746141343,
      "loss": 2.9752,
      "step": 123920
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.524327278137207,
      "learning_rate": 0.000264416312925385,
      "loss": 3.0356,
      "step": 123921
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.7484359741210938,
      "learning_rate": 0.0002644122512432514,
      "loss": 2.9923,
      "step": 123922
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.512134075164795,
      "learning_rate": 0.0002644081895677343,
      "loss": 3.0061,
      "step": 123923
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0435075759887695,
      "learning_rate": 0.0002644041278988343,
      "loss": 3.2174,
      "step": 123924
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2863781452178955,
      "learning_rate": 0.00026440006623655235,
      "loss": 2.794,
      "step": 123925
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.015935182571411,
      "learning_rate": 0.0002643960045808891,
      "loss": 3.2171,
      "step": 123926
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.194082498550415,
      "learning_rate": 0.0002643919429318452,
      "loss": 3.165,
      "step": 123927
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9337133169174194,
      "learning_rate": 0.0002643878812894215,
      "loss": 2.9173,
      "step": 123928
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.6355063915252686,
      "learning_rate": 0.00026438381965361883,
      "loss": 2.6929,
      "step": 123929
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.001446485519409,
      "learning_rate": 0.00026437975802443784,
      "loss": 3.0628,
      "step": 123930
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.388463258743286,
      "learning_rate": 0.0002643756964018795,
      "loss": 2.9857,
      "step": 123931
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9269723892211914,
      "learning_rate": 0.0002643716347859442,
      "loss": 2.8323,
      "step": 123932
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.9189066886901855,
      "learning_rate": 0.00026436757317663295,
      "loss": 2.7958,
      "step": 123933
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.511324405670166,
      "learning_rate": 0.0002643635115739464,
      "loss": 3.0811,
      "step": 123934
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1360971927642822,
      "learning_rate": 0.00026435944997788534,
      "loss": 3.0289,
      "step": 123935
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9712408781051636,
      "learning_rate": 0.00026435538838845055,
      "loss": 2.7614,
      "step": 123936
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0112273693084717,
      "learning_rate": 0.0002643513268056429,
      "loss": 2.9831,
      "step": 123937
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8200364112854004,
      "learning_rate": 0.00026434726522946286,
      "loss": 2.9664,
      "step": 123938
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8347185850143433,
      "learning_rate": 0.00026434320365991136,
      "loss": 3.0344,
      "step": 123939
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.440248727798462,
      "learning_rate": 0.0002643391420969891,
      "loss": 2.7999,
      "step": 123940
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.020765542984009,
      "learning_rate": 0.0002643350805406969,
      "loss": 3.0437,
      "step": 123941
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.6853606700897217,
      "learning_rate": 0.00026433101899103544,
      "loss": 2.9949,
      "step": 123942
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5470657348632812,
      "learning_rate": 0.0002643269574480057,
      "loss": 2.9399,
      "step": 123943
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1782047748565674,
      "learning_rate": 0.00026432289591160813,
      "loss": 3.0044,
      "step": 123944
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7182393074035645,
      "learning_rate": 0.00026431883438184355,
      "loss": 2.9756,
      "step": 123945
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8358393907546997,
      "learning_rate": 0.0002643147728587128,
      "loss": 3.0432,
      "step": 123946
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5662026405334473,
      "learning_rate": 0.0002643107113422166,
      "loss": 3.0798,
      "step": 123947
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2480361461639404,
      "learning_rate": 0.0002643066498323557,
      "loss": 2.8581,
      "step": 123948
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0713553428649902,
      "learning_rate": 0.000264302588329131,
      "loss": 2.9896,
      "step": 123949
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.227437973022461,
      "learning_rate": 0.0002642985268325431,
      "loss": 2.9911,
      "step": 123950
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2788212299346924,
      "learning_rate": 0.0002642944653425926,
      "loss": 3.0941,
      "step": 123951
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.002836227416992,
      "learning_rate": 0.0002642904038592805,
      "loss": 2.9195,
      "step": 123952
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8823341131210327,
      "learning_rate": 0.00026428634238260753,
      "loss": 2.7211,
      "step": 123953
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.114036798477173,
      "learning_rate": 0.0002642822809125744,
      "loss": 3.2636,
      "step": 123954
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7649813890457153,
      "learning_rate": 0.00026427821944918187,
      "loss": 2.8452,
      "step": 123955
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.7572383880615234,
      "learning_rate": 0.00026427415799243077,
      "loss": 2.8475,
      "step": 123956
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1008245944976807,
      "learning_rate": 0.0002642700965423217,
      "loss": 3.1281,
      "step": 123957
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.025944471359253,
      "learning_rate": 0.00026426603509885544,
      "loss": 3.1284,
      "step": 123958
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.516704559326172,
      "learning_rate": 0.00026426197366203285,
      "loss": 2.9389,
      "step": 123959
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1438069343566895,
      "learning_rate": 0.0002642579122318546,
      "loss": 3.05,
      "step": 123960
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7606135606765747,
      "learning_rate": 0.0002642538508083216,
      "loss": 2.947,
      "step": 123961
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.580169677734375,
      "learning_rate": 0.0002642497893914343,
      "loss": 2.8846,
      "step": 123962
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.037785053253174,
      "learning_rate": 0.0002642457279811938,
      "loss": 3.0701,
      "step": 123963
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.080906867980957,
      "learning_rate": 0.00026424166657760065,
      "loss": 3.2355,
      "step": 123964
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.592008352279663,
      "learning_rate": 0.0002642376051806556,
      "loss": 3.1483,
      "step": 123965
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0954275131225586,
      "learning_rate": 0.0002642335437903594,
      "loss": 2.9509,
      "step": 123966
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2743723392486572,
      "learning_rate": 0.00026422948240671304,
      "loss": 3.078,
      "step": 123967
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.5802714824676514,
      "learning_rate": 0.000264225421029717,
      "loss": 3.077,
      "step": 123968
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0973687171936035,
      "learning_rate": 0.0002642213596593721,
      "loss": 3.1492,
      "step": 123969
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7964792251586914,
      "learning_rate": 0.0002642172982956792,
      "loss": 2.9269,
      "step": 123970
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0330159664154053,
      "learning_rate": 0.00026421323693863886,
      "loss": 2.9689,
      "step": 123971
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7824033498764038,
      "learning_rate": 0.00026420917558825206,
      "loss": 2.8843,
      "step": 123972
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.6061651706695557,
      "learning_rate": 0.00026420511424451944,
      "loss": 2.8519,
      "step": 123973
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.7030045986175537,
      "learning_rate": 0.0002642010529074417,
      "loss": 3.2177,
      "step": 123974
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.358520269393921,
      "learning_rate": 0.0002641969915770197,
      "loss": 2.7472,
      "step": 123975
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.454352378845215,
      "learning_rate": 0.00026419293025325416,
      "loss": 2.9589,
      "step": 123976
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.8366646766662598,
      "learning_rate": 0.00026418886893614585,
      "loss": 2.8461,
      "step": 123977
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.6786530017852783,
      "learning_rate": 0.0002641848076256955,
      "loss": 2.7778,
      "step": 123978
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7881267070770264,
      "learning_rate": 0.0002641807463219038,
      "loss": 3.0232,
      "step": 123979
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.4525794982910156,
      "learning_rate": 0.0002641766850247717,
      "loss": 2.843,
      "step": 123980
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.129920244216919,
      "learning_rate": 0.0002641726237342997,
      "loss": 2.8452,
      "step": 123981
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9009093046188354,
      "learning_rate": 0.00026416856245048875,
      "loss": 3.1437,
      "step": 123982
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0357279777526855,
      "learning_rate": 0.0002641645011733395,
      "loss": 2.9036,
      "step": 123983
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.106839895248413,
      "learning_rate": 0.00026416043990285274,
      "loss": 3.052,
      "step": 123984
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8985415697097778,
      "learning_rate": 0.00026415637863902935,
      "loss": 3.0351,
      "step": 123985
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.7465314865112305,
      "learning_rate": 0.0002641523173818699,
      "loss": 3.0212,
      "step": 123986
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.477539300918579,
      "learning_rate": 0.0002641482561313752,
      "loss": 3.0615,
      "step": 123987
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.752983808517456,
      "learning_rate": 0.00026414419488754594,
      "loss": 3.0595,
      "step": 123988
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.080789566040039,
      "learning_rate": 0.000264140133650383,
      "loss": 2.8712,
      "step": 123989
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7104910612106323,
      "learning_rate": 0.00026413607241988714,
      "loss": 3.1469,
      "step": 123990
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.195159912109375,
      "learning_rate": 0.000264132011196059,
      "loss": 3.0863,
      "step": 123991
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8934003114700317,
      "learning_rate": 0.00026412794997889954,
      "loss": 3.2938,
      "step": 123992
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0025699138641357,
      "learning_rate": 0.00026412388876840924,
      "loss": 2.9334,
      "step": 123993
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.8830780982971191,
      "learning_rate": 0.00026411982756458896,
      "loss": 3.2021,
      "step": 123994
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.4679970741271973,
      "learning_rate": 0.0002641157663674395,
      "loss": 2.9068,
      "step": 123995
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.772091269493103,
      "learning_rate": 0.0002641117051769616,
      "loss": 2.8908,
      "step": 123996
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.658850908279419,
      "learning_rate": 0.000264107643993156,
      "loss": 3.1478,
      "step": 123997
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0269405841827393,
      "learning_rate": 0.00026410358281602365,
      "loss": 2.8286,
      "step": 123998
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.976102590560913,
      "learning_rate": 0.0002640995216455649,
      "loss": 2.935,
      "step": 123999
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7071231603622437,
      "learning_rate": 0.0002640954604817808,
      "loss": 2.9414,
      "step": 124000
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1963391304016113,
      "learning_rate": 0.00026409139932467206,
      "loss": 2.9513,
      "step": 124001
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9134035110473633,
      "learning_rate": 0.00026408733817423935,
      "loss": 3.1187,
      "step": 124002
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1571826934814453,
      "learning_rate": 0.0002640832770304835,
      "loss": 2.9234,
      "step": 124003
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2787764072418213,
      "learning_rate": 0.00026407921589340533,
      "loss": 3.1242,
      "step": 124004
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9274381399154663,
      "learning_rate": 0.0002640751547630054,
      "loss": 2.8728,
      "step": 124005
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2310140132904053,
      "learning_rate": 0.0002640710936392846,
      "loss": 2.9728,
      "step": 124006
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.086883306503296,
      "learning_rate": 0.0002640670325222437,
      "loss": 3.1393,
      "step": 124007
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0913217067718506,
      "learning_rate": 0.00026406297141188335,
      "loss": 3.0092,
      "step": 124008
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.8498430252075195,
      "learning_rate": 0.00026405891030820437,
      "loss": 2.6531,
      "step": 124009
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9089463949203491,
      "learning_rate": 0.0002640548492112077,
      "loss": 3.2136,
      "step": 124010
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.100581407546997,
      "learning_rate": 0.00026405078812089374,
      "loss": 2.9528,
      "step": 124011
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.0541739463806152,
      "learning_rate": 0.00026404672703726344,
      "loss": 3.2302,
      "step": 124012
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9870373010635376,
      "learning_rate": 0.0002640426659603175,
      "loss": 3.1855,
      "step": 124013
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.580256938934326,
      "learning_rate": 0.00026403860489005674,
      "loss": 2.8055,
      "step": 124014
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.722182273864746,
      "learning_rate": 0.0002640345438264819,
      "loss": 2.911,
      "step": 124015
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2890331745147705,
      "learning_rate": 0.0002640304827695938,
      "loss": 3.1918,
      "step": 124016
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9636847972869873,
      "learning_rate": 0.00026402642171939296,
      "loss": 2.9978,
      "step": 124017
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2390966415405273,
      "learning_rate": 0.00026402236067588037,
      "loss": 3.2476,
      "step": 124018
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.7065978050231934,
      "learning_rate": 0.00026401829963905665,
      "loss": 2.9238,
      "step": 124019
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.282989025115967,
      "learning_rate": 0.00026401423860892263,
      "loss": 2.9586,
      "step": 124020
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.082648992538452,
      "learning_rate": 0.00026401017758547905,
      "loss": 2.9278,
      "step": 124021
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1237332820892334,
      "learning_rate": 0.00026400611656872674,
      "loss": 3.1022,
      "step": 124022
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.3610198497772217,
      "learning_rate": 0.0002640020555586663,
      "loss": 3.1215,
      "step": 124023
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.019178628921509,
      "learning_rate": 0.0002639979945552985,
      "loss": 3.0411,
      "step": 124024
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.635011672973633,
      "learning_rate": 0.0002639939335586242,
      "loss": 2.9457,
      "step": 124025
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.9695682525634766,
      "learning_rate": 0.0002639898725686441,
      "loss": 2.8917,
      "step": 124026
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.29148006439209,
      "learning_rate": 0.0002639858115853589,
      "loss": 2.9758,
      "step": 124027
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.7051267623901367,
      "learning_rate": 0.00026398175060876963,
      "loss": 2.8796,
      "step": 124028
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.2119696140289307,
      "learning_rate": 0.00026397768963887664,
      "loss": 2.9403,
      "step": 124029
    },
    {
      "epoch": 1.61,
      "grad_norm": 1.7053169012069702,
      "learning_rate": 0.00026397362867568093,
      "loss": 2.9677,
      "step": 124030
    },
    {
      "epoch": 1.61,
      "grad_norm": 3.1925973892211914,
      "learning_rate": 0.00026396956771918315,
      "loss": 2.9237,
      "step": 124031
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1361708641052246,
      "learning_rate": 0.00026396550676938417,
      "loss": 2.8047,
      "step": 124032
    },
    {
      "epoch": 1.61,
      "grad_norm": 2.1540658473968506,
      "learning_rate": 0.00026396144582628464,
      "loss": 2.8909,
      "step": 124033
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.502288818359375,
      "learning_rate": 0.0002639573848898855,
      "loss": 3.0217,
      "step": 124034
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8461066484451294,
      "learning_rate": 0.0002639533239601872,
      "loss": 3.02,
      "step": 124035
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.364940881729126,
      "learning_rate": 0.0002639492630371907,
      "loss": 2.9343,
      "step": 124036
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3822484016418457,
      "learning_rate": 0.00026394520212089676,
      "loss": 3.1879,
      "step": 124037
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9528261423110962,
      "learning_rate": 0.000263941141211306,
      "loss": 2.9029,
      "step": 124038
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.81046724319458,
      "learning_rate": 0.00026393708030841926,
      "loss": 2.7708,
      "step": 124039
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9679559469223022,
      "learning_rate": 0.0002639330194122375,
      "loss": 3.0121,
      "step": 124040
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4887657165527344,
      "learning_rate": 0.0002639289585227611,
      "loss": 3.056,
      "step": 124041
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.111145257949829,
      "learning_rate": 0.00026392489763999095,
      "loss": 2.7833,
      "step": 124042
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.344330072402954,
      "learning_rate": 0.0002639208367639279,
      "loss": 3.1316,
      "step": 124043
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8436434268951416,
      "learning_rate": 0.0002639167758945726,
      "loss": 2.9623,
      "step": 124044
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9104489088058472,
      "learning_rate": 0.0002639127150319259,
      "loss": 3.2991,
      "step": 124045
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9462003707885742,
      "learning_rate": 0.0002639086541759885,
      "loss": 2.9611,
      "step": 124046
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8898851871490479,
      "learning_rate": 0.00026390459332676123,
      "loss": 2.6909,
      "step": 124047
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.30639386177063,
      "learning_rate": 0.0002639005324842447,
      "loss": 2.7675,
      "step": 124048
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.10097599029541,
      "learning_rate": 0.00026389647164843977,
      "loss": 2.8413,
      "step": 124049
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.270751953125,
      "learning_rate": 0.00026389241081934713,
      "loss": 3.153,
      "step": 124050
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0121309757232666,
      "learning_rate": 0.0002638883499969676,
      "loss": 2.7964,
      "step": 124051
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2209527492523193,
      "learning_rate": 0.0002638842891813019,
      "loss": 3.0179,
      "step": 124052
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.6965227127075195,
      "learning_rate": 0.00026388022837235084,
      "loss": 2.9791,
      "step": 124053
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1455233097076416,
      "learning_rate": 0.00026387616757011503,
      "loss": 2.8815,
      "step": 124054
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.7235310077667236,
      "learning_rate": 0.0002638721067745954,
      "loss": 3.3253,
      "step": 124055
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.6190872192382812,
      "learning_rate": 0.0002638680459857926,
      "loss": 3.2172,
      "step": 124056
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.704223155975342,
      "learning_rate": 0.00026386398520370744,
      "loss": 2.9743,
      "step": 124057
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8168306350708008,
      "learning_rate": 0.0002638599244283406,
      "loss": 2.9433,
      "step": 124058
    },
    {
      "epoch": 1.62,
      "grad_norm": 4.133424282073975,
      "learning_rate": 0.00026385586365969293,
      "loss": 3.0077,
      "step": 124059
    },
    {
      "epoch": 1.62,
      "grad_norm": 4.012793064117432,
      "learning_rate": 0.0002638518028977651,
      "loss": 2.8672,
      "step": 124060
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.796257972717285,
      "learning_rate": 0.0002638477421425579,
      "loss": 3.3521,
      "step": 124061
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.855089545249939,
      "learning_rate": 0.0002638436813940721,
      "loss": 2.8854,
      "step": 124062
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.345468759536743,
      "learning_rate": 0.0002638396206523085,
      "loss": 2.9576,
      "step": 124063
    },
    {
      "epoch": 1.62,
      "grad_norm": 4.1051788330078125,
      "learning_rate": 0.00026383555991726774,
      "loss": 2.9765,
      "step": 124064
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1106700897216797,
      "learning_rate": 0.00026383149918895066,
      "loss": 3.2032,
      "step": 124065
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.0380446910858154,
      "learning_rate": 0.0002638274384673579,
      "loss": 3.1781,
      "step": 124066
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1060009002685547,
      "learning_rate": 0.00026382337775249036,
      "loss": 2.8964,
      "step": 124067
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.161802291870117,
      "learning_rate": 0.0002638193170443487,
      "loss": 3.1028,
      "step": 124068
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.964613437652588,
      "learning_rate": 0.00026381525634293384,
      "loss": 2.9768,
      "step": 124069
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.958143949508667,
      "learning_rate": 0.00026381119564824633,
      "loss": 3.0648,
      "step": 124070
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.931091070175171,
      "learning_rate": 0.00026380713496028705,
      "loss": 2.9974,
      "step": 124071
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.119292736053467,
      "learning_rate": 0.0002638030742790566,
      "loss": 3.0588,
      "step": 124072
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.7422053813934326,
      "learning_rate": 0.00026379901360455585,
      "loss": 2.8462,
      "step": 124073
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.546354055404663,
      "learning_rate": 0.00026379495293678555,
      "loss": 3.0398,
      "step": 124074
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.6417548656463623,
      "learning_rate": 0.0002637908922757465,
      "loss": 3.095,
      "step": 124075
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.165755033493042,
      "learning_rate": 0.00026378683162143937,
      "loss": 2.9588,
      "step": 124076
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8415117263793945,
      "learning_rate": 0.0002637827709738651,
      "loss": 2.8369,
      "step": 124077
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.152869701385498,
      "learning_rate": 0.00026377871033302417,
      "loss": 2.7578,
      "step": 124078
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9889487028121948,
      "learning_rate": 0.0002637746496989174,
      "loss": 3.0638,
      "step": 124079
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3924050331115723,
      "learning_rate": 0.0002637705890715457,
      "loss": 2.8716,
      "step": 124080
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.822627067565918,
      "learning_rate": 0.0002637665284509097,
      "loss": 2.9496,
      "step": 124081
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8896262645721436,
      "learning_rate": 0.00026376246783701014,
      "loss": 2.9546,
      "step": 124082
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8717626333236694,
      "learning_rate": 0.000263758407229848,
      "loss": 3.0788,
      "step": 124083
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4218688011169434,
      "learning_rate": 0.0002637543466294237,
      "loss": 3.1082,
      "step": 124084
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7155715227127075,
      "learning_rate": 0.00026375028603573816,
      "loss": 3.306,
      "step": 124085
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8658922910690308,
      "learning_rate": 0.0002637462254487921,
      "loss": 3.1576,
      "step": 124086
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9418115615844727,
      "learning_rate": 0.00026374216486858636,
      "loss": 2.9723,
      "step": 124087
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8112871646881104,
      "learning_rate": 0.00026373810429512154,
      "loss": 3.1816,
      "step": 124088
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8610038757324219,
      "learning_rate": 0.00026373404372839873,
      "loss": 3.123,
      "step": 124089
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8521771430969238,
      "learning_rate": 0.00026372998316841824,
      "loss": 3.1063,
      "step": 124090
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.020036458969116,
      "learning_rate": 0.00026372592261518106,
      "loss": 2.9625,
      "step": 124091
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1758713722229004,
      "learning_rate": 0.00026372186206868794,
      "loss": 2.7466,
      "step": 124092
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9305726289749146,
      "learning_rate": 0.0002637178015289396,
      "loss": 2.9748,
      "step": 124093
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8626395463943481,
      "learning_rate": 0.00026371374099593676,
      "loss": 3.1771,
      "step": 124094
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.0076096057891846,
      "learning_rate": 0.0002637096804696804,
      "loss": 3.1199,
      "step": 124095
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2937135696411133,
      "learning_rate": 0.00026370561995017094,
      "loss": 2.9538,
      "step": 124096
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0950958728790283,
      "learning_rate": 0.0002637015594374093,
      "loss": 3.1514,
      "step": 124097
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.618868112564087,
      "learning_rate": 0.0002636974989313962,
      "loss": 2.9531,
      "step": 124098
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.109527587890625,
      "learning_rate": 0.00026369343843213246,
      "loss": 2.9885,
      "step": 124099
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.893231987953186,
      "learning_rate": 0.0002636893779396188,
      "loss": 2.9655,
      "step": 124100
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.9996304512023926,
      "learning_rate": 0.00026368531745385606,
      "loss": 2.9143,
      "step": 124101
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.017216682434082,
      "learning_rate": 0.0002636812569748448,
      "loss": 2.8677,
      "step": 124102
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1157631874084473,
      "learning_rate": 0.0002636771965025859,
      "loss": 2.9674,
      "step": 124103
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.519896984100342,
      "learning_rate": 0.00026367313603708006,
      "loss": 2.8856,
      "step": 124104
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.486982583999634,
      "learning_rate": 0.00026366907557832806,
      "loss": 2.8662,
      "step": 124105
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3740875720977783,
      "learning_rate": 0.0002636650151263307,
      "loss": 3.1101,
      "step": 124106
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.10884690284729,
      "learning_rate": 0.0002636609546810888,
      "loss": 2.8345,
      "step": 124107
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4000747203826904,
      "learning_rate": 0.0002636568942426029,
      "loss": 3.1875,
      "step": 124108
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9738717079162598,
      "learning_rate": 0.0002636528338108738,
      "loss": 3.1014,
      "step": 124109
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2823264598846436,
      "learning_rate": 0.0002636487733859024,
      "loss": 3.0016,
      "step": 124110
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0329511165618896,
      "learning_rate": 0.0002636447129676894,
      "loss": 2.6934,
      "step": 124111
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3489396572113037,
      "learning_rate": 0.0002636406525562355,
      "loss": 2.8379,
      "step": 124112
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.928558349609375,
      "learning_rate": 0.0002636365921515415,
      "loss": 3.2197,
      "step": 124113
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8785648345947266,
      "learning_rate": 0.0002636325317536083,
      "loss": 3.0235,
      "step": 124114
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3147237300872803,
      "learning_rate": 0.0002636284713624363,
      "loss": 2.9769,
      "step": 124115
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.6870882511138916,
      "learning_rate": 0.00026362441097802647,
      "loss": 3.0262,
      "step": 124116
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8440343141555786,
      "learning_rate": 0.00026362035060037957,
      "loss": 2.642,
      "step": 124117
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.018415689468384,
      "learning_rate": 0.00026361629022949635,
      "loss": 2.908,
      "step": 124118
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.6477773189544678,
      "learning_rate": 0.00026361222986537753,
      "loss": 2.8926,
      "step": 124119
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.012094736099243,
      "learning_rate": 0.000263608169508024,
      "loss": 2.9245,
      "step": 124120
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9201146364212036,
      "learning_rate": 0.0002636041091574363,
      "loss": 2.9504,
      "step": 124121
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.163839340209961,
      "learning_rate": 0.00026360004881361527,
      "loss": 2.8919,
      "step": 124122
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.7430429458618164,
      "learning_rate": 0.00026359598847656166,
      "loss": 2.7427,
      "step": 124123
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0844480991363525,
      "learning_rate": 0.0002635919281462763,
      "loss": 2.9647,
      "step": 124124
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.5301663875579834,
      "learning_rate": 0.0002635878678227598,
      "loss": 2.9834,
      "step": 124125
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.889983892440796,
      "learning_rate": 0.0002635838075060132,
      "loss": 3.0273,
      "step": 124126
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1590421199798584,
      "learning_rate": 0.0002635797471960369,
      "loss": 2.8999,
      "step": 124127
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.5912463665008545,
      "learning_rate": 0.0002635756868928318,
      "loss": 2.9724,
      "step": 124128
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8055286407470703,
      "learning_rate": 0.00026357162659639867,
      "loss": 2.7644,
      "step": 124129
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4721550941467285,
      "learning_rate": 0.0002635675663067383,
      "loss": 2.7584,
      "step": 124130
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.85138201713562,
      "learning_rate": 0.00026356350602385137,
      "loss": 2.9629,
      "step": 124131
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.438002109527588,
      "learning_rate": 0.0002635594457477388,
      "loss": 3.1153,
      "step": 124132
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0598013401031494,
      "learning_rate": 0.0002635553854784011,
      "loss": 3.0026,
      "step": 124133
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.261765241622925,
      "learning_rate": 0.0002635513252158392,
      "loss": 3.0006,
      "step": 124134
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.5525338649749756,
      "learning_rate": 0.0002635472649600537,
      "loss": 2.9977,
      "step": 124135
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.6485378742218018,
      "learning_rate": 0.0002635432047110455,
      "loss": 2.6272,
      "step": 124136
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.331115245819092,
      "learning_rate": 0.0002635391444688153,
      "loss": 3.0171,
      "step": 124137
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.448314905166626,
      "learning_rate": 0.0002635350842333639,
      "loss": 2.7901,
      "step": 124138
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.742793560028076,
      "learning_rate": 0.000263531024004692,
      "loss": 3.2002,
      "step": 124139
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9324405193328857,
      "learning_rate": 0.0002635269637828004,
      "loss": 3.0368,
      "step": 124140
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.728331208229065,
      "learning_rate": 0.0002635229035676898,
      "loss": 3.1577,
      "step": 124141
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9853028059005737,
      "learning_rate": 0.00026351884335936094,
      "loss": 2.9465,
      "step": 124142
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.6723688840866089,
      "learning_rate": 0.0002635147831578146,
      "loss": 3.1024,
      "step": 124143
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.6950098276138306,
      "learning_rate": 0.0002635107229630517,
      "loss": 3.1586,
      "step": 124144
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.4692025184631348,
      "learning_rate": 0.00026350666277507267,
      "loss": 3.0369,
      "step": 124145
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.189270257949829,
      "learning_rate": 0.00026350260259387854,
      "loss": 2.9148,
      "step": 124146
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9550201892852783,
      "learning_rate": 0.00026349854241946994,
      "loss": 3.0217,
      "step": 124147
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.007296323776245,
      "learning_rate": 0.00026349448225184764,
      "loss": 2.8558,
      "step": 124148
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.4558682441711426,
      "learning_rate": 0.0002634904220910124,
      "loss": 2.9393,
      "step": 124149
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.985559105873108,
      "learning_rate": 0.00026348636193696505,
      "loss": 3.041,
      "step": 124150
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.6770060062408447,
      "learning_rate": 0.0002634823017897062,
      "loss": 2.9702,
      "step": 124151
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.350870370864868,
      "learning_rate": 0.0002634782416492367,
      "loss": 3.455,
      "step": 124152
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.525604248046875,
      "learning_rate": 0.00026347418151555723,
      "loss": 2.9037,
      "step": 124153
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.6801867485046387,
      "learning_rate": 0.00026347012138866867,
      "loss": 3.1174,
      "step": 124154
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.572984457015991,
      "learning_rate": 0.0002634660612685717,
      "loss": 2.6377,
      "step": 124155
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8585081100463867,
      "learning_rate": 0.0002634620011552671,
      "loss": 3.0127,
      "step": 124156
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8251101970672607,
      "learning_rate": 0.00026345794104875556,
      "loss": 3.1245,
      "step": 124157
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.855191946029663,
      "learning_rate": 0.00026345388094903787,
      "loss": 2.9786,
      "step": 124158
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0579965114593506,
      "learning_rate": 0.00026344982085611476,
      "loss": 2.7982,
      "step": 124159
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7987886667251587,
      "learning_rate": 0.00026344576076998706,
      "loss": 2.9702,
      "step": 124160
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.875114917755127,
      "learning_rate": 0.00026344170069065556,
      "loss": 3.021,
      "step": 124161
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.2000396251678467,
      "learning_rate": 0.0002634376406181209,
      "loss": 2.8352,
      "step": 124162
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7020739316940308,
      "learning_rate": 0.0002634335805523838,
      "loss": 2.9132,
      "step": 124163
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.45784854888916,
      "learning_rate": 0.0002634295204934451,
      "loss": 3.1686,
      "step": 124164
    },
    {
      "epoch": 1.62,
      "grad_norm": 4.3448686599731445,
      "learning_rate": 0.0002634254604413056,
      "loss": 3.1658,
      "step": 124165
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.938268780708313,
      "learning_rate": 0.0002634214003959659,
      "loss": 3.1235,
      "step": 124166
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.293337821960449,
      "learning_rate": 0.0002634173403574269,
      "loss": 2.8967,
      "step": 124167
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7732940912246704,
      "learning_rate": 0.00026341328032568947,
      "loss": 2.9135,
      "step": 124168
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0320656299591064,
      "learning_rate": 0.000263409220300754,
      "loss": 2.953,
      "step": 124169
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.778267502784729,
      "learning_rate": 0.0002634051602826215,
      "loss": 3.1275,
      "step": 124170
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8504972457885742,
      "learning_rate": 0.0002634011002712927,
      "loss": 2.8938,
      "step": 124171
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.242493152618408,
      "learning_rate": 0.0002633970402667683,
      "loss": 3.075,
      "step": 124172
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.459818124771118,
      "learning_rate": 0.000263392980269049,
      "loss": 2.9042,
      "step": 124173
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1684463024139404,
      "learning_rate": 0.00026338892027813585,
      "loss": 3.0784,
      "step": 124174
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.3595163822174072,
      "learning_rate": 0.00026338486029402924,
      "loss": 2.7788,
      "step": 124175
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1882519721984863,
      "learning_rate": 0.0002633808003167301,
      "loss": 3.0101,
      "step": 124176
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.139904499053955,
      "learning_rate": 0.00026337674034623913,
      "loss": 2.8448,
      "step": 124177
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.164691209793091,
      "learning_rate": 0.0002633726803825571,
      "loss": 2.9852,
      "step": 124178
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.2405712604522705,
      "learning_rate": 0.00026336862042568483,
      "loss": 3.2076,
      "step": 124179
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9572876691818237,
      "learning_rate": 0.00026336456047562303,
      "loss": 3.0989,
      "step": 124180
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.3450920581817627,
      "learning_rate": 0.0002633605005323725,
      "loss": 3.0167,
      "step": 124181
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9496673345565796,
      "learning_rate": 0.0002633564405959339,
      "loss": 3.1383,
      "step": 124182
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.977103590965271,
      "learning_rate": 0.00026335238066630795,
      "loss": 2.7441,
      "step": 124183
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4578185081481934,
      "learning_rate": 0.00026334832074349557,
      "loss": 2.7355,
      "step": 124184
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8445560932159424,
      "learning_rate": 0.0002633442608274974,
      "loss": 2.9178,
      "step": 124185
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0264666080474854,
      "learning_rate": 0.0002633402009183142,
      "loss": 2.9564,
      "step": 124186
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.072470188140869,
      "learning_rate": 0.0002633361410159469,
      "loss": 2.9005,
      "step": 124187
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.925909399986267,
      "learning_rate": 0.00026333208112039596,
      "loss": 2.8134,
      "step": 124188
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.801896333694458,
      "learning_rate": 0.00026332802123166227,
      "loss": 2.8307,
      "step": 124189
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.267671585083008,
      "learning_rate": 0.0002633239613497466,
      "loss": 2.8299,
      "step": 124190
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7950401306152344,
      "learning_rate": 0.00026331990147464973,
      "loss": 2.7922,
      "step": 124191
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.892197608947754,
      "learning_rate": 0.0002633158416063723,
      "loss": 2.8951,
      "step": 124192
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.60978102684021,
      "learning_rate": 0.00026331178174491534,
      "loss": 3.0482,
      "step": 124193
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.6847407817840576,
      "learning_rate": 0.0002633077218902793,
      "loss": 3.059,
      "step": 124194
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8371611833572388,
      "learning_rate": 0.000263303662042465,
      "loss": 3.0775,
      "step": 124195
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1832361221313477,
      "learning_rate": 0.00026329960220147325,
      "loss": 2.8549,
      "step": 124196
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.7306954860687256,
      "learning_rate": 0.0002632955423673048,
      "loss": 2.9027,
      "step": 124197
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.5717134475708008,
      "learning_rate": 0.00026329148253996043,
      "loss": 3.0296,
      "step": 124198
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.6487480401992798,
      "learning_rate": 0.000263287422719441,
      "loss": 2.8924,
      "step": 124199
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1139955520629883,
      "learning_rate": 0.00026328336290574696,
      "loss": 2.9328,
      "step": 124200
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4373741149902344,
      "learning_rate": 0.00026327930309887927,
      "loss": 2.8908,
      "step": 124201
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4985053539276123,
      "learning_rate": 0.00026327524329883864,
      "loss": 2.8233,
      "step": 124202
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7078876495361328,
      "learning_rate": 0.0002632711835056258,
      "loss": 3.1428,
      "step": 124203
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.789940357208252,
      "learning_rate": 0.0002632671237192416,
      "loss": 3.0419,
      "step": 124204
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.070725202560425,
      "learning_rate": 0.00026326306393968684,
      "loss": 2.8697,
      "step": 124205
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.248790740966797,
      "learning_rate": 0.00026325900416696203,
      "loss": 2.9941,
      "step": 124206
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.00154709815979,
      "learning_rate": 0.0002632549444010681,
      "loss": 3.0615,
      "step": 124207
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8939459323883057,
      "learning_rate": 0.00026325088464200573,
      "loss": 2.9739,
      "step": 124208
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.253502368927002,
      "learning_rate": 0.0002632468248897757,
      "loss": 2.9493,
      "step": 124209
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.837908148765564,
      "learning_rate": 0.00026324276514437886,
      "loss": 3.231,
      "step": 124210
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1441032886505127,
      "learning_rate": 0.0002632387054058159,
      "loss": 2.6848,
      "step": 124211
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9139984846115112,
      "learning_rate": 0.0002632346456740875,
      "loss": 2.6705,
      "step": 124212
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1951942443847656,
      "learning_rate": 0.0002632305859491945,
      "loss": 2.8074,
      "step": 124213
    },
    {
      "epoch": 1.62,
      "grad_norm": 4.164013862609863,
      "learning_rate": 0.0002632265262311375,
      "loss": 2.9165,
      "step": 124214
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.0731325149536133,
      "learning_rate": 0.0002632224665199175,
      "loss": 3.0269,
      "step": 124215
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9275672435760498,
      "learning_rate": 0.0002632184068155351,
      "loss": 2.9004,
      "step": 124216
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.6177122592926025,
      "learning_rate": 0.00026321434711799126,
      "loss": 2.9881,
      "step": 124217
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.6090259552001953,
      "learning_rate": 0.0002632102874272864,
      "loss": 2.9581,
      "step": 124218
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.733726739883423,
      "learning_rate": 0.00026320622774342144,
      "loss": 2.7204,
      "step": 124219
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8987762928009033,
      "learning_rate": 0.0002632021680663972,
      "loss": 3.1727,
      "step": 124220
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.859619617462158,
      "learning_rate": 0.0002631981083962143,
      "loss": 2.9765,
      "step": 124221
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.088620901107788,
      "learning_rate": 0.00026319404873287355,
      "loss": 3.0199,
      "step": 124222
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.6656832695007324,
      "learning_rate": 0.0002631899890763758,
      "loss": 3.0482,
      "step": 124223
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7501533031463623,
      "learning_rate": 0.00026318592942672176,
      "loss": 2.8974,
      "step": 124224
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.0168063640594482,
      "learning_rate": 0.0002631818697839121,
      "loss": 3.0145,
      "step": 124225
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1431362628936768,
      "learning_rate": 0.0002631778101479476,
      "loss": 3.004,
      "step": 124226
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9654924869537354,
      "learning_rate": 0.00026317375051882906,
      "loss": 2.9562,
      "step": 124227
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.797987461090088,
      "learning_rate": 0.00026316969089655714,
      "loss": 2.633,
      "step": 124228
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3562426567077637,
      "learning_rate": 0.00026316563128113283,
      "loss": 3.0315,
      "step": 124229
    },
    {
      "epoch": 1.62,
      "grad_norm": 4.147334575653076,
      "learning_rate": 0.0002631615716725566,
      "loss": 2.8372,
      "step": 124230
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4325501918792725,
      "learning_rate": 0.0002631575120708294,
      "loss": 2.8651,
      "step": 124231
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8409854173660278,
      "learning_rate": 0.00026315345247595185,
      "loss": 3.0368,
      "step": 124232
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9714382886886597,
      "learning_rate": 0.0002631493928879248,
      "loss": 3.0511,
      "step": 124233
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.91556978225708,
      "learning_rate": 0.00026314533330674896,
      "loss": 2.786,
      "step": 124234
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.0284535884857178,
      "learning_rate": 0.0002631412737324251,
      "loss": 2.9155,
      "step": 124235
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.9350714683532715,
      "learning_rate": 0.00026313721416495404,
      "loss": 2.9503,
      "step": 124236
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.1978275775909424,
      "learning_rate": 0.00026313315460433636,
      "loss": 3.0972,
      "step": 124237
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.898719072341919,
      "learning_rate": 0.00026312909505057296,
      "loss": 2.8886,
      "step": 124238
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3331754207611084,
      "learning_rate": 0.00026312503550366464,
      "loss": 3.099,
      "step": 124239
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.804903030395508,
      "learning_rate": 0.00026312097596361194,
      "loss": 2.8304,
      "step": 124240
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.5061774253845215,
      "learning_rate": 0.00026311691643041585,
      "loss": 2.8772,
      "step": 124241
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1262166500091553,
      "learning_rate": 0.00026311285690407696,
      "loss": 3.2098,
      "step": 124242
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.5586936473846436,
      "learning_rate": 0.00026310879738459614,
      "loss": 3.0726,
      "step": 124243
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8863048553466797,
      "learning_rate": 0.000263104737871974,
      "loss": 3.035,
      "step": 124244
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.859539270401001,
      "learning_rate": 0.00026310067836621146,
      "loss": 3.0419,
      "step": 124245
    },
    {
      "epoch": 1.62,
      "grad_norm": 4.70543098449707,
      "learning_rate": 0.0002630966188673092,
      "loss": 2.7924,
      "step": 124246
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.80591082572937,
      "learning_rate": 0.00026309255937526793,
      "loss": 3.0239,
      "step": 124247
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1466000080108643,
      "learning_rate": 0.0002630884998900885,
      "loss": 2.8458,
      "step": 124248
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9724124670028687,
      "learning_rate": 0.00026308444041177163,
      "loss": 2.8129,
      "step": 124249
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.187429904937744,
      "learning_rate": 0.000263080380940318,
      "loss": 2.9759,
      "step": 124250
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2127597332000732,
      "learning_rate": 0.0002630763214757284,
      "loss": 3.1687,
      "step": 124251
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.063425302505493,
      "learning_rate": 0.00026307226201800364,
      "loss": 3.066,
      "step": 124252
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7778276205062866,
      "learning_rate": 0.0002630682025671445,
      "loss": 3.1431,
      "step": 124253
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.962235450744629,
      "learning_rate": 0.0002630641431231517,
      "loss": 3.0729,
      "step": 124254
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1608362197875977,
      "learning_rate": 0.0002630600836860259,
      "loss": 3.0726,
      "step": 124255
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.306741237640381,
      "learning_rate": 0.0002630560242557679,
      "loss": 2.993,
      "step": 124256
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.954126238822937,
      "learning_rate": 0.00026305196483237853,
      "loss": 2.6485,
      "step": 124257
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1781890392303467,
      "learning_rate": 0.0002630479054158585,
      "loss": 3.2145,
      "step": 124258
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8829617500305176,
      "learning_rate": 0.00026304384600620847,
      "loss": 3.0872,
      "step": 124259
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.056056499481201,
      "learning_rate": 0.0002630397866034295,
      "loss": 3.1873,
      "step": 124260
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.6788164377212524,
      "learning_rate": 0.00026303572720752196,
      "loss": 3.1713,
      "step": 124261
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1586477756500244,
      "learning_rate": 0.0002630316678184868,
      "loss": 2.9892,
      "step": 124262
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1049277782440186,
      "learning_rate": 0.00026302760843632475,
      "loss": 3.1014,
      "step": 124263
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.929460048675537,
      "learning_rate": 0.0002630235490610365,
      "loss": 2.8053,
      "step": 124264
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9716193675994873,
      "learning_rate": 0.000263019489692623,
      "loss": 2.9073,
      "step": 124265
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.233807325363159,
      "learning_rate": 0.0002630154303310849,
      "loss": 2.7818,
      "step": 124266
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.278078079223633,
      "learning_rate": 0.0002630113709764228,
      "loss": 2.9749,
      "step": 124267
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0684401988983154,
      "learning_rate": 0.00026300731162863766,
      "loss": 3.2825,
      "step": 124268
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.144196033477783,
      "learning_rate": 0.00026300325228773014,
      "loss": 3.0707,
      "step": 124269
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8535146713256836,
      "learning_rate": 0.000262999192953701,
      "loss": 2.8755,
      "step": 124270
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.681919574737549,
      "learning_rate": 0.000262995133626551,
      "loss": 2.9387,
      "step": 124271
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.088392734527588,
      "learning_rate": 0.00026299107430628103,
      "loss": 3.0192,
      "step": 124272
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.5367138385772705,
      "learning_rate": 0.0002629870149928916,
      "loss": 2.9764,
      "step": 124273
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1152820587158203,
      "learning_rate": 0.0002629829556863836,
      "loss": 2.846,
      "step": 124274
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.346151113510132,
      "learning_rate": 0.0002629788963867577,
      "loss": 3.0383,
      "step": 124275
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.1660640239715576,
      "learning_rate": 0.00026297483709401473,
      "loss": 2.8792,
      "step": 124276
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.797454357147217,
      "learning_rate": 0.0002629707778081555,
      "loss": 2.7211,
      "step": 124277
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9140000343322754,
      "learning_rate": 0.0002629667185291808,
      "loss": 2.8748,
      "step": 124278
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.395390748977661,
      "learning_rate": 0.00026296265925709113,
      "loss": 3.0684,
      "step": 124279
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.170844316482544,
      "learning_rate": 0.0002629585999918875,
      "loss": 2.9855,
      "step": 124280
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1479671001434326,
      "learning_rate": 0.00026295454073357047,
      "loss": 3.0442,
      "step": 124281
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8739888668060303,
      "learning_rate": 0.0002629504814821409,
      "loss": 3.2169,
      "step": 124282
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0031540393829346,
      "learning_rate": 0.00026294642223759955,
      "loss": 2.9799,
      "step": 124283
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0012521743774414,
      "learning_rate": 0.0002629423629999473,
      "loss": 3.0982,
      "step": 124284
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9290235042572021,
      "learning_rate": 0.0002629383037691846,
      "loss": 2.9455,
      "step": 124285
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1045713424682617,
      "learning_rate": 0.0002629342445453124,
      "loss": 3.1131,
      "step": 124286
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.6130549907684326,
      "learning_rate": 0.00026293018532833144,
      "loss": 2.8317,
      "step": 124287
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0128226280212402,
      "learning_rate": 0.00026292612611824245,
      "loss": 3.3424,
      "step": 124288
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1841440200805664,
      "learning_rate": 0.00026292206691504616,
      "loss": 2.8278,
      "step": 124289
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2368085384368896,
      "learning_rate": 0.0002629180077187435,
      "loss": 2.9332,
      "step": 124290
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3819162845611572,
      "learning_rate": 0.000262913948529335,
      "loss": 2.8416,
      "step": 124291
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9502005577087402,
      "learning_rate": 0.0002629098893468214,
      "loss": 2.948,
      "step": 124292
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.0595903396606445,
      "learning_rate": 0.0002629058301712036,
      "loss": 2.9453,
      "step": 124293
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0372018814086914,
      "learning_rate": 0.00026290177100248235,
      "loss": 3.1947,
      "step": 124294
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.957200527191162,
      "learning_rate": 0.0002628977118406583,
      "loss": 2.887,
      "step": 124295
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9614943265914917,
      "learning_rate": 0.00026289365268573244,
      "loss": 2.6932,
      "step": 124296
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8806331157684326,
      "learning_rate": 0.0002628895935377052,
      "loss": 3.2405,
      "step": 124297
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4751086235046387,
      "learning_rate": 0.0002628855343965775,
      "loss": 2.8802,
      "step": 124298
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.426652431488037,
      "learning_rate": 0.00026288147526235,
      "loss": 3.0378,
      "step": 124299
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.374993085861206,
      "learning_rate": 0.00026287741613502364,
      "loss": 3.1664,
      "step": 124300
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9055237770080566,
      "learning_rate": 0.000262873357014599,
      "loss": 3.3501,
      "step": 124301
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4277381896972656,
      "learning_rate": 0.00026286929790107706,
      "loss": 3.0266,
      "step": 124302
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.006845712661743,
      "learning_rate": 0.0002628652387944583,
      "loss": 3.0804,
      "step": 124303
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9163897037506104,
      "learning_rate": 0.0002628611796947436,
      "loss": 2.8671,
      "step": 124304
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2275078296661377,
      "learning_rate": 0.0002628571206019337,
      "loss": 3.0355,
      "step": 124305
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8959648609161377,
      "learning_rate": 0.0002628530615160293,
      "loss": 3.1434,
      "step": 124306
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0546247959136963,
      "learning_rate": 0.0002628490024370313,
      "loss": 2.9092,
      "step": 124307
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0591330528259277,
      "learning_rate": 0.0002628449433649404,
      "loss": 2.9393,
      "step": 124308
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.026001453399658,
      "learning_rate": 0.00026284088429975736,
      "loss": 2.9396,
      "step": 124309
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0581531524658203,
      "learning_rate": 0.0002628368252414828,
      "loss": 3.3331,
      "step": 124310
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.9118852615356445,
      "learning_rate": 0.0002628327661901176,
      "loss": 2.8719,
      "step": 124311
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.6321194171905518,
      "learning_rate": 0.00026282870714566245,
      "loss": 2.9378,
      "step": 124312
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.217785358428955,
      "learning_rate": 0.00026282464810811816,
      "loss": 2.9669,
      "step": 124313
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8149466514587402,
      "learning_rate": 0.0002628205890774855,
      "loss": 3.028,
      "step": 124314
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.56699800491333,
      "learning_rate": 0.00026281653005376523,
      "loss": 2.7209,
      "step": 124315
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1792423725128174,
      "learning_rate": 0.0002628124710369581,
      "loss": 2.7258,
      "step": 124316
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.692941427230835,
      "learning_rate": 0.0002628084120270647,
      "loss": 3.1663,
      "step": 124317
    },
    {
      "epoch": 1.62,
      "grad_norm": 4.174182415008545,
      "learning_rate": 0.000262804353024086,
      "loss": 2.9366,
      "step": 124318
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.7209677696228027,
      "learning_rate": 0.0002628002940280226,
      "loss": 2.7305,
      "step": 124319
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.180366277694702,
      "learning_rate": 0.0002627962350388754,
      "loss": 2.767,
      "step": 124320
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.439389228820801,
      "learning_rate": 0.00026279217605664515,
      "loss": 2.8414,
      "step": 124321
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.6258106231689453,
      "learning_rate": 0.0002627881170813324,
      "loss": 2.9467,
      "step": 124322
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7939352989196777,
      "learning_rate": 0.00026278405811293814,
      "loss": 2.8637,
      "step": 124323
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7552121877670288,
      "learning_rate": 0.000262779999151463,
      "loss": 3.0596,
      "step": 124324
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.392484664916992,
      "learning_rate": 0.00026277594019690766,
      "loss": 3.0344,
      "step": 124325
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.4510629177093506,
      "learning_rate": 0.00026277188124927307,
      "loss": 2.8113,
      "step": 124326
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.848848819732666,
      "learning_rate": 0.0002627678223085599,
      "loss": 2.7075,
      "step": 124327
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.686872959136963,
      "learning_rate": 0.00026276376337476884,
      "loss": 3.1455,
      "step": 124328
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8209612369537354,
      "learning_rate": 0.0002627597044479008,
      "loss": 3.0962,
      "step": 124329
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.133517026901245,
      "learning_rate": 0.0002627556455279563,
      "loss": 3.0094,
      "step": 124330
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.5866599082946777,
      "learning_rate": 0.0002627515866149363,
      "loss": 3.1673,
      "step": 124331
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2008440494537354,
      "learning_rate": 0.0002627475277088415,
      "loss": 3.0563,
      "step": 124332
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.287051200866699,
      "learning_rate": 0.00026274346880967263,
      "loss": 2.9797,
      "step": 124333
    },
    {
      "epoch": 1.62,
      "grad_norm": 4.860437393188477,
      "learning_rate": 0.00026273940991743046,
      "loss": 3.0189,
      "step": 124334
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9537122249603271,
      "learning_rate": 0.00026273535103211565,
      "loss": 2.9568,
      "step": 124335
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.695626974105835,
      "learning_rate": 0.0002627312921537291,
      "loss": 2.9668,
      "step": 124336
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.629854917526245,
      "learning_rate": 0.0002627272332822715,
      "loss": 2.8497,
      "step": 124337
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8198432922363281,
      "learning_rate": 0.0002627231744177437,
      "loss": 3.0591,
      "step": 124338
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0988783836364746,
      "learning_rate": 0.0002627191155601463,
      "loss": 2.74,
      "step": 124339
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9159473180770874,
      "learning_rate": 0.0002627150567094801,
      "loss": 2.938,
      "step": 124340
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.011488676071167,
      "learning_rate": 0.00026271099786574585,
      "loss": 2.9075,
      "step": 124341
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7277861833572388,
      "learning_rate": 0.00026270693902894437,
      "loss": 3.2552,
      "step": 124342
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9293930530548096,
      "learning_rate": 0.00026270288019907635,
      "loss": 2.986,
      "step": 124343
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1261932849884033,
      "learning_rate": 0.00026269882137614257,
      "loss": 2.7571,
      "step": 124344
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.6878020763397217,
      "learning_rate": 0.0002626947625601439,
      "loss": 3.1199,
      "step": 124345
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.6957741975784302,
      "learning_rate": 0.00026269070375108084,
      "loss": 2.982,
      "step": 124346
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.09015154838562,
      "learning_rate": 0.0002626866449489543,
      "loss": 2.8576,
      "step": 124347
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.013556718826294,
      "learning_rate": 0.00026268258615376504,
      "loss": 2.7775,
      "step": 124348
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0103399753570557,
      "learning_rate": 0.00026267852736551375,
      "loss": 3.0485,
      "step": 124349
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0350332260131836,
      "learning_rate": 0.00026267446858420124,
      "loss": 2.8754,
      "step": 124350
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4960994720458984,
      "learning_rate": 0.0002626704098098284,
      "loss": 3.0618,
      "step": 124351
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.895181894302368,
      "learning_rate": 0.00026266635104239567,
      "loss": 3.2321,
      "step": 124352
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.235893726348877,
      "learning_rate": 0.000262662292281904,
      "loss": 3.0091,
      "step": 124353
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.080500841140747,
      "learning_rate": 0.0002626582335283541,
      "loss": 2.617,
      "step": 124354
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.908932685852051,
      "learning_rate": 0.00026265417478174677,
      "loss": 2.8915,
      "step": 124355
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.4686641693115234,
      "learning_rate": 0.0002626501160420827,
      "loss": 2.8376,
      "step": 124356
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7436840534210205,
      "learning_rate": 0.00026264605730936283,
      "loss": 3.14,
      "step": 124357
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0286905765533447,
      "learning_rate": 0.0002626419985835876,
      "loss": 2.9781,
      "step": 124358
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.053499221801758,
      "learning_rate": 0.000262637939864758,
      "loss": 2.8538,
      "step": 124359
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.451451063156128,
      "learning_rate": 0.0002626338811528746,
      "loss": 3.0198,
      "step": 124360
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.9807004928588867,
      "learning_rate": 0.0002626298224479383,
      "loss": 2.8147,
      "step": 124361
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0436599254608154,
      "learning_rate": 0.0002626257637499499,
      "loss": 2.8882,
      "step": 124362
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3050220012664795,
      "learning_rate": 0.00026262170505891017,
      "loss": 2.9921,
      "step": 124363
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7249155044555664,
      "learning_rate": 0.0002626176463748196,
      "loss": 2.8578,
      "step": 124364
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1710193157196045,
      "learning_rate": 0.0002626135876976791,
      "loss": 3.1327,
      "step": 124365
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.533015489578247,
      "learning_rate": 0.00026260952902748953,
      "loss": 3.2238,
      "step": 124366
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.869461178779602,
      "learning_rate": 0.00026260547036425147,
      "loss": 3.0841,
      "step": 124367
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8099091053009033,
      "learning_rate": 0.00026260141170796584,
      "loss": 2.9005,
      "step": 124368
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0625431537628174,
      "learning_rate": 0.0002625973530586334,
      "loss": 2.9878,
      "step": 124369
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7136224508285522,
      "learning_rate": 0.00026259329441625465,
      "loss": 2.9834,
      "step": 124370
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9316903352737427,
      "learning_rate": 0.00026258923578083055,
      "loss": 2.9016,
      "step": 124371
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.001304864883423,
      "learning_rate": 0.0002625851771523618,
      "loss": 3.1123,
      "step": 124372
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4607181549072266,
      "learning_rate": 0.0002625811185308492,
      "loss": 2.9762,
      "step": 124373
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.505826711654663,
      "learning_rate": 0.0002625770599162935,
      "loss": 2.8402,
      "step": 124374
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3912200927734375,
      "learning_rate": 0.0002625730013086955,
      "loss": 2.9992,
      "step": 124375
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.4992105960845947,
      "learning_rate": 0.0002625689427080558,
      "loss": 2.8831,
      "step": 124376
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.209446430206299,
      "learning_rate": 0.0002625648841143752,
      "loss": 2.7768,
      "step": 124377
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0954649448394775,
      "learning_rate": 0.0002625608255276545,
      "loss": 3.0818,
      "step": 124378
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.017749071121216,
      "learning_rate": 0.00026255676694789446,
      "loss": 3.0161,
      "step": 124379
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3981385231018066,
      "learning_rate": 0.0002625527083750958,
      "loss": 3.2409,
      "step": 124380
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.193352460861206,
      "learning_rate": 0.00026254864980925933,
      "loss": 2.8329,
      "step": 124381
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7978999614715576,
      "learning_rate": 0.00026254459125038597,
      "loss": 2.7808,
      "step": 124382
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.366316795349121,
      "learning_rate": 0.00026254053269847606,
      "loss": 2.8954,
      "step": 124383
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.04703688621521,
      "learning_rate": 0.0002625364741535306,
      "loss": 3.1166,
      "step": 124384
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.897994041442871,
      "learning_rate": 0.0002625324156155503,
      "loss": 2.8769,
      "step": 124385
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1075527667999268,
      "learning_rate": 0.0002625283570845359,
      "loss": 3.1722,
      "step": 124386
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.986067295074463,
      "learning_rate": 0.0002625242985604882,
      "loss": 2.9449,
      "step": 124387
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.376755952835083,
      "learning_rate": 0.00026252024004340814,
      "loss": 2.7945,
      "step": 124388
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9380184412002563,
      "learning_rate": 0.0002625161815332961,
      "loss": 2.8884,
      "step": 124389
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.7763173580169678,
      "learning_rate": 0.00026251212303015297,
      "loss": 2.8416,
      "step": 124390
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.667778968811035,
      "learning_rate": 0.0002625080645339796,
      "loss": 2.8646,
      "step": 124391
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.837429165840149,
      "learning_rate": 0.0002625040060447767,
      "loss": 2.9881,
      "step": 124392
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1170403957366943,
      "learning_rate": 0.00026249994756254494,
      "loss": 3.2364,
      "step": 124393
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.934979796409607,
      "learning_rate": 0.00026249588908728536,
      "loss": 3.078,
      "step": 124394
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.3468377590179443,
      "learning_rate": 0.0002624918306189983,
      "loss": 2.8688,
      "step": 124395
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.267470121383667,
      "learning_rate": 0.00026248777215768474,
      "loss": 3.2427,
      "step": 124396
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.215287446975708,
      "learning_rate": 0.00026248371370334543,
      "loss": 3.0473,
      "step": 124397
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8487908840179443,
      "learning_rate": 0.0002624796552559811,
      "loss": 2.9272,
      "step": 124398
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8325371742248535,
      "learning_rate": 0.0002624755968155925,
      "loss": 2.8913,
      "step": 124399
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.783038854598999,
      "learning_rate": 0.0002624715383821805,
      "loss": 3.0081,
      "step": 124400
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7040843963623047,
      "learning_rate": 0.0002624674799557457,
      "loss": 2.8591,
      "step": 124401
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.893440008163452,
      "learning_rate": 0.0002624634215362889,
      "loss": 3.1053,
      "step": 124402
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.4153220653533936,
      "learning_rate": 0.00026245936312381074,
      "loss": 2.9154,
      "step": 124403
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.7286376953125,
      "learning_rate": 0.0002624553047183122,
      "loss": 3.0093,
      "step": 124404
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.105057716369629,
      "learning_rate": 0.0002624512463197939,
      "loss": 2.9236,
      "step": 124405
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.171705722808838,
      "learning_rate": 0.00026244718792825663,
      "loss": 3.3009,
      "step": 124406
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.659705877304077,
      "learning_rate": 0.0002624431295437012,
      "loss": 2.8562,
      "step": 124407
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.93499755859375,
      "learning_rate": 0.0002624390711661282,
      "loss": 3.1304,
      "step": 124408
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9916003942489624,
      "learning_rate": 0.0002624350127955386,
      "loss": 2.8523,
      "step": 124409
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.006868600845337,
      "learning_rate": 0.0002624309544319329,
      "loss": 2.9678,
      "step": 124410
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.1117844581604004,
      "learning_rate": 0.00026242689607531205,
      "loss": 2.8238,
      "step": 124411
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.876661777496338,
      "learning_rate": 0.0002624228377256768,
      "loss": 2.7507,
      "step": 124412
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8053216934204102,
      "learning_rate": 0.00026241877938302784,
      "loss": 2.7265,
      "step": 124413
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8584030866622925,
      "learning_rate": 0.00026241472104736584,
      "loss": 2.9692,
      "step": 124414
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.805988311767578,
      "learning_rate": 0.0002624106627186918,
      "loss": 2.8297,
      "step": 124415
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0725972652435303,
      "learning_rate": 0.00026240660439700626,
      "loss": 3.243,
      "step": 124416
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.654522657394409,
      "learning_rate": 0.00026240254608231,
      "loss": 2.9703,
      "step": 124417
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.954939365386963,
      "learning_rate": 0.00026239848777460386,
      "loss": 2.9356,
      "step": 124418
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0986580848693848,
      "learning_rate": 0.0002623944294738886,
      "loss": 2.7706,
      "step": 124419
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.978598713874817,
      "learning_rate": 0.00026239037118016477,
      "loss": 3.0391,
      "step": 124420
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.431264877319336,
      "learning_rate": 0.0002623863128934334,
      "loss": 3.1481,
      "step": 124421
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.445342540740967,
      "learning_rate": 0.0002623822546136951,
      "loss": 2.7691,
      "step": 124422
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.243558168411255,
      "learning_rate": 0.0002623781963409507,
      "loss": 2.9368,
      "step": 124423
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7938737869262695,
      "learning_rate": 0.0002623741380752009,
      "loss": 3.0754,
      "step": 124424
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.939958930015564,
      "learning_rate": 0.00026237007981644636,
      "loss": 3.0816,
      "step": 124425
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0517771244049072,
      "learning_rate": 0.000262366021564688,
      "loss": 2.7661,
      "step": 124426
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4654624462127686,
      "learning_rate": 0.00026236196331992645,
      "loss": 3.1203,
      "step": 124427
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.95314884185791,
      "learning_rate": 0.00026235790508216255,
      "loss": 3.16,
      "step": 124428
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2959067821502686,
      "learning_rate": 0.000262353846851397,
      "loss": 3.2235,
      "step": 124429
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0939106941223145,
      "learning_rate": 0.0002623497886276308,
      "loss": 2.9357,
      "step": 124430
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.515568733215332,
      "learning_rate": 0.00026234573041086426,
      "loss": 2.9179,
      "step": 124431
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.981210470199585,
      "learning_rate": 0.00026234167220109836,
      "loss": 2.8794,
      "step": 124432
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.603600025177002,
      "learning_rate": 0.00026233761399833386,
      "loss": 3.0185,
      "step": 124433
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.36246395111084,
      "learning_rate": 0.00026233355580257157,
      "loss": 2.8635,
      "step": 124434
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.029658555984497,
      "learning_rate": 0.00026232949761381214,
      "loss": 3.0366,
      "step": 124435
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8989514112472534,
      "learning_rate": 0.0002623254394320565,
      "loss": 3.0154,
      "step": 124436
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.6786153316497803,
      "learning_rate": 0.0002623213812573051,
      "loss": 2.9864,
      "step": 124437
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.567052125930786,
      "learning_rate": 0.00026231732308955893,
      "loss": 3.0452,
      "step": 124438
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.366652488708496,
      "learning_rate": 0.0002623132649288187,
      "loss": 3.0024,
      "step": 124439
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.023475408554077,
      "learning_rate": 0.0002623092067750851,
      "loss": 2.9133,
      "step": 124440
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3131651878356934,
      "learning_rate": 0.0002623051486283589,
      "loss": 3.2815,
      "step": 124441
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.930225133895874,
      "learning_rate": 0.0002623010904886411,
      "loss": 2.9245,
      "step": 124442
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.230252981185913,
      "learning_rate": 0.00026229703235593205,
      "loss": 3.2211,
      "step": 124443
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8361940383911133,
      "learning_rate": 0.0002622929742302327,
      "loss": 2.8043,
      "step": 124444
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.9519290924072266,
      "learning_rate": 0.0002622889161115438,
      "loss": 3.1941,
      "step": 124445
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.072314739227295,
      "learning_rate": 0.0002622848579998661,
      "loss": 2.9639,
      "step": 124446
    },
    {
      "epoch": 1.62,
      "grad_norm": 4.618081569671631,
      "learning_rate": 0.00026228079989520036,
      "loss": 3.2017,
      "step": 124447
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.1474480628967285,
      "learning_rate": 0.0002622767417975473,
      "loss": 3.0887,
      "step": 124448
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8480411767959595,
      "learning_rate": 0.00026227268370690785,
      "loss": 3.1209,
      "step": 124449
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.5612173080444336,
      "learning_rate": 0.0002622686256232825,
      "loss": 2.9359,
      "step": 124450
    },
    {
      "epoch": 1.62,
      "grad_norm": 4.48383903503418,
      "learning_rate": 0.00026226456754667216,
      "loss": 2.9787,
      "step": 124451
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.52152156829834,
      "learning_rate": 0.00026226050947707755,
      "loss": 3.0404,
      "step": 124452
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.9968557357788086,
      "learning_rate": 0.00026225645141449936,
      "loss": 3.053,
      "step": 124453
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.426321268081665,
      "learning_rate": 0.00026225239335893845,
      "loss": 2.8823,
      "step": 124454
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.069033622741699,
      "learning_rate": 0.00026224833531039563,
      "loss": 2.9105,
      "step": 124455
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.9655299186706543,
      "learning_rate": 0.0002622442772688715,
      "loss": 3.2159,
      "step": 124456
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.349625587463379,
      "learning_rate": 0.0002622402192343668,
      "loss": 2.8815,
      "step": 124457
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2932584285736084,
      "learning_rate": 0.00026223616120688237,
      "loss": 2.7626,
      "step": 124458
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9212141036987305,
      "learning_rate": 0.000262232103186419,
      "loss": 3.133,
      "step": 124459
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.2724037170410156,
      "learning_rate": 0.0002622280451729773,
      "loss": 2.7777,
      "step": 124460
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3712222576141357,
      "learning_rate": 0.0002622239871665583,
      "loss": 3.096,
      "step": 124461
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1346871852874756,
      "learning_rate": 0.0002622199291671625,
      "loss": 2.8586,
      "step": 124462
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.380941390991211,
      "learning_rate": 0.00026221587117479067,
      "loss": 3.0842,
      "step": 124463
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8010895252227783,
      "learning_rate": 0.0002622118131894436,
      "loss": 2.8977,
      "step": 124464
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.5775601863861084,
      "learning_rate": 0.0002622077552111221,
      "loss": 3.0181,
      "step": 124465
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.031860589981079,
      "learning_rate": 0.0002622036972398269,
      "loss": 2.9406,
      "step": 124466
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9080359935760498,
      "learning_rate": 0.0002621996392755589,
      "loss": 2.8614,
      "step": 124467
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8777658939361572,
      "learning_rate": 0.00026219558131831847,
      "loss": 3.0511,
      "step": 124468
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.123715877532959,
      "learning_rate": 0.00026219152336810666,
      "loss": 2.9735,
      "step": 124469
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.4058094024658203,
      "learning_rate": 0.0002621874654249242,
      "loss": 2.9722,
      "step": 124470
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4778130054473877,
      "learning_rate": 0.00026218340748877176,
      "loss": 2.7994,
      "step": 124471
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.7063300609588623,
      "learning_rate": 0.00026217934955965013,
      "loss": 2.9882,
      "step": 124472
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.8833746910095215,
      "learning_rate": 0.0002621752916375602,
      "loss": 2.9215,
      "step": 124473
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.247133731842041,
      "learning_rate": 0.0002621712337225025,
      "loss": 3.1536,
      "step": 124474
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.019002914428711,
      "learning_rate": 0.00026216717581447783,
      "loss": 2.8567,
      "step": 124475
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2798290252685547,
      "learning_rate": 0.000262163117913487,
      "loss": 3.0717,
      "step": 124476
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.135899305343628,
      "learning_rate": 0.00026215906001953083,
      "loss": 3.078,
      "step": 124477
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3330190181732178,
      "learning_rate": 0.00026215500213261,
      "loss": 3.1194,
      "step": 124478
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4344677925109863,
      "learning_rate": 0.0002621509442527254,
      "loss": 3.1019,
      "step": 124479
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7167309522628784,
      "learning_rate": 0.0002621468863798774,
      "loss": 3.0636,
      "step": 124480
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9087680578231812,
      "learning_rate": 0.0002621428285140671,
      "loss": 3.0298,
      "step": 124481
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1062161922454834,
      "learning_rate": 0.0002621387706552952,
      "loss": 3.0304,
      "step": 124482
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1661770343780518,
      "learning_rate": 0.00026213471280356234,
      "loss": 3.189,
      "step": 124483
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.1192409992218018,
      "learning_rate": 0.00026213065495886943,
      "loss": 2.9454,
      "step": 124484
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7997251749038696,
      "learning_rate": 0.0002621265971212172,
      "loss": 3.1156,
      "step": 124485
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.6270487308502197,
      "learning_rate": 0.00026212253929060625,
      "loss": 2.8783,
      "step": 124486
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.214962959289551,
      "learning_rate": 0.00026211848146703745,
      "loss": 2.9818,
      "step": 124487
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.446310520172119,
      "learning_rate": 0.00026211442365051157,
      "loss": 3.0767,
      "step": 124488
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.455848455429077,
      "learning_rate": 0.00026211036584102926,
      "loss": 2.9539,
      "step": 124489
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.075924873352051,
      "learning_rate": 0.0002621063080385914,
      "loss": 3.2853,
      "step": 124490
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.5211145877838135,
      "learning_rate": 0.0002621022502431987,
      "loss": 2.9902,
      "step": 124491
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3684420585632324,
      "learning_rate": 0.0002620981924548519,
      "loss": 3.2172,
      "step": 124492
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0444064140319824,
      "learning_rate": 0.0002620941346735518,
      "loss": 2.8293,
      "step": 124493
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.094592332839966,
      "learning_rate": 0.0002620900768992991,
      "loss": 2.6725,
      "step": 124494
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9468306303024292,
      "learning_rate": 0.0002620860191320945,
      "loss": 2.8809,
      "step": 124495
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.049720287322998,
      "learning_rate": 0.00026208196137193887,
      "loss": 2.8772,
      "step": 124496
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8996825218200684,
      "learning_rate": 0.00026207790361883296,
      "loss": 2.9169,
      "step": 124497
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.063204288482666,
      "learning_rate": 0.0002620738458727774,
      "loss": 3.2084,
      "step": 124498
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.074854850769043,
      "learning_rate": 0.00026206978813377306,
      "loss": 2.9345,
      "step": 124499
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.7869560718536377,
      "learning_rate": 0.00026206573040182075,
      "loss": 3.1044,
      "step": 124500
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4482805728912354,
      "learning_rate": 0.000262061672676921,
      "loss": 2.9627,
      "step": 124501
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1337826251983643,
      "learning_rate": 0.00026205761495907477,
      "loss": 2.8495,
      "step": 124502
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1002838611602783,
      "learning_rate": 0.0002620535572482828,
      "loss": 2.9328,
      "step": 124503
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0868546962738037,
      "learning_rate": 0.0002620494995445457,
      "loss": 2.8986,
      "step": 124504
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8916956186294556,
      "learning_rate": 0.0002620454418478643,
      "loss": 3.1645,
      "step": 124505
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9088982343673706,
      "learning_rate": 0.0002620413841582394,
      "loss": 3.0421,
      "step": 124506
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.252575397491455,
      "learning_rate": 0.00026203732647567176,
      "loss": 3.1228,
      "step": 124507
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8070213794708252,
      "learning_rate": 0.0002620332688001621,
      "loss": 3.1449,
      "step": 124508
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1255006790161133,
      "learning_rate": 0.0002620292111317112,
      "loss": 2.9518,
      "step": 124509
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1902835369110107,
      "learning_rate": 0.00026202515347031966,
      "loss": 3.0773,
      "step": 124510
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2208163738250732,
      "learning_rate": 0.0002620210958159884,
      "loss": 2.7341,
      "step": 124511
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9681962728500366,
      "learning_rate": 0.0002620170381687182,
      "loss": 2.9027,
      "step": 124512
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0344436168670654,
      "learning_rate": 0.0002620129805285097,
      "loss": 2.8975,
      "step": 124513
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9369665384292603,
      "learning_rate": 0.00026200892289536374,
      "loss": 3.0407,
      "step": 124514
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4983301162719727,
      "learning_rate": 0.000262004865269281,
      "loss": 3.0129,
      "step": 124515
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.251448631286621,
      "learning_rate": 0.00026200080765026234,
      "loss": 3.2305,
      "step": 124516
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.035189151763916,
      "learning_rate": 0.0002619967500383084,
      "loss": 2.9577,
      "step": 124517
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3182969093322754,
      "learning_rate": 0.00026199269243341996,
      "loss": 2.7785,
      "step": 124518
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.618769884109497,
      "learning_rate": 0.00026198863483559777,
      "loss": 3.1418,
      "step": 124519
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9722037315368652,
      "learning_rate": 0.00026198457724484263,
      "loss": 2.8826,
      "step": 124520
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3862011432647705,
      "learning_rate": 0.0002619805196611553,
      "loss": 2.9845,
      "step": 124521
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.043794870376587,
      "learning_rate": 0.0002619764620845366,
      "loss": 2.8532,
      "step": 124522
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9402186870574951,
      "learning_rate": 0.0002619724045149871,
      "loss": 3.3496,
      "step": 124523
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8415647745132446,
      "learning_rate": 0.00026196834695250764,
      "loss": 2.8,
      "step": 124524
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8144220113754272,
      "learning_rate": 0.000261964289397099,
      "loss": 3.2711,
      "step": 124525
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7543662786483765,
      "learning_rate": 0.0002619602318487619,
      "loss": 2.9901,
      "step": 124526
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.4415252208709717,
      "learning_rate": 0.0002619561743074971,
      "loss": 3.0094,
      "step": 124527
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9503133296966553,
      "learning_rate": 0.00026195211677330546,
      "loss": 3.1022,
      "step": 124528
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0183119773864746,
      "learning_rate": 0.00026194805924618757,
      "loss": 3.2633,
      "step": 124529
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8201735019683838,
      "learning_rate": 0.00026194400172614425,
      "loss": 2.9311,
      "step": 124530
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.9043736457824707,
      "learning_rate": 0.0002619399442131762,
      "loss": 2.8225,
      "step": 124531
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7330241203308105,
      "learning_rate": 0.0002619358867072843,
      "loss": 3.0337,
      "step": 124532
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.5429069995880127,
      "learning_rate": 0.0002619318292084692,
      "loss": 3.0104,
      "step": 124533
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4621007442474365,
      "learning_rate": 0.0002619277717167318,
      "loss": 2.8516,
      "step": 124534
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7015652656555176,
      "learning_rate": 0.0002619237142320727,
      "loss": 2.9875,
      "step": 124535
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.5896553993225098,
      "learning_rate": 0.00026191965675449264,
      "loss": 2.9319,
      "step": 124536
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8681801557540894,
      "learning_rate": 0.00026191559928399243,
      "loss": 2.9018,
      "step": 124537
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9014575481414795,
      "learning_rate": 0.00026191154182057287,
      "loss": 2.9126,
      "step": 124538
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.5802701711654663,
      "learning_rate": 0.0002619074843642346,
      "loss": 3.0811,
      "step": 124539
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.963465452194214,
      "learning_rate": 0.00026190342691497865,
      "loss": 2.9528,
      "step": 124540
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.20984148979187,
      "learning_rate": 0.00026189936947280544,
      "loss": 2.6439,
      "step": 124541
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8772025108337402,
      "learning_rate": 0.0002618953120377158,
      "loss": 2.9586,
      "step": 124542
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4675164222717285,
      "learning_rate": 0.0002618912546097106,
      "loss": 3.0186,
      "step": 124543
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.339979887008667,
      "learning_rate": 0.0002618871971887905,
      "loss": 3.1701,
      "step": 124544
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9563629627227783,
      "learning_rate": 0.0002618831397749563,
      "loss": 2.9797,
      "step": 124545
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.016979932785034,
      "learning_rate": 0.00026187908236820884,
      "loss": 3.1316,
      "step": 124546
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7802939414978027,
      "learning_rate": 0.0002618750249685487,
      "loss": 3.399,
      "step": 124547
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.250340223312378,
      "learning_rate": 0.0002618709675759767,
      "loss": 2.8244,
      "step": 124548
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.072535753250122,
      "learning_rate": 0.0002618669101904936,
      "loss": 2.9681,
      "step": 124549
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.195722818374634,
      "learning_rate": 0.00026186285281210015,
      "loss": 2.7084,
      "step": 124550
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3272533416748047,
      "learning_rate": 0.0002618587954407971,
      "loss": 3.0212,
      "step": 124551
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1933586597442627,
      "learning_rate": 0.00026185473807658537,
      "loss": 3.1117,
      "step": 124552
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.114161491394043,
      "learning_rate": 0.00026185068071946544,
      "loss": 3.1025,
      "step": 124553
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.186107873916626,
      "learning_rate": 0.0002618466233694382,
      "loss": 2.9475,
      "step": 124554
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.607708692550659,
      "learning_rate": 0.0002618425660265044,
      "loss": 3.0444,
      "step": 124555
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9000022411346436,
      "learning_rate": 0.00026183850869066476,
      "loss": 3.1174,
      "step": 124556
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.773793339729309,
      "learning_rate": 0.0002618344513619201,
      "loss": 2.9104,
      "step": 124557
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1400251388549805,
      "learning_rate": 0.0002618303940402712,
      "loss": 3.1923,
      "step": 124558
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.352790594100952,
      "learning_rate": 0.00026182633672571865,
      "loss": 2.9833,
      "step": 124559
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7301453351974487,
      "learning_rate": 0.0002618222794182633,
      "loss": 2.7643,
      "step": 124560
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.5437428951263428,
      "learning_rate": 0.0002618182221179059,
      "loss": 2.9416,
      "step": 124561
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.54479718208313,
      "learning_rate": 0.0002618141648246473,
      "loss": 3.0525,
      "step": 124562
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.5323572158813477,
      "learning_rate": 0.00026181010753848806,
      "loss": 3.0707,
      "step": 124563
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.273467540740967,
      "learning_rate": 0.0002618060502594292,
      "loss": 2.9265,
      "step": 124564
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9295557737350464,
      "learning_rate": 0.0002618019929874712,
      "loss": 2.9876,
      "step": 124565
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0515220165252686,
      "learning_rate": 0.000261797935722615,
      "loss": 3.0868,
      "step": 124566
    },
    {
      "epoch": 1.62,
      "grad_norm": 4.4782257080078125,
      "learning_rate": 0.00026179387846486115,
      "loss": 2.914,
      "step": 124567
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.909533143043518,
      "learning_rate": 0.00026178982121421063,
      "loss": 3.0218,
      "step": 124568
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4024147987365723,
      "learning_rate": 0.0002617857639706641,
      "loss": 3.1231,
      "step": 124569
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.171875,
      "learning_rate": 0.00026178170673422235,
      "loss": 2.9625,
      "step": 124570
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.174421787261963,
      "learning_rate": 0.00026177764950488606,
      "loss": 3.0346,
      "step": 124571
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2074825763702393,
      "learning_rate": 0.00026177359228265606,
      "loss": 2.9778,
      "step": 124572
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.864709734916687,
      "learning_rate": 0.000261769535067533,
      "loss": 3.2126,
      "step": 124573
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.07348895072937,
      "learning_rate": 0.0002617654778595177,
      "loss": 3.1437,
      "step": 124574
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.226759672164917,
      "learning_rate": 0.0002617614206586109,
      "loss": 2.9573,
      "step": 124575
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1390068531036377,
      "learning_rate": 0.0002617573634648135,
      "loss": 2.8278,
      "step": 124576
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9190789461135864,
      "learning_rate": 0.00026175330627812616,
      "loss": 2.838,
      "step": 124577
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.706127166748047,
      "learning_rate": 0.0002617492490985495,
      "loss": 2.7215,
      "step": 124578
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.178804636001587,
      "learning_rate": 0.00026174519192608437,
      "loss": 2.9548,
      "step": 124579
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.369694948196411,
      "learning_rate": 0.0002617411347607315,
      "loss": 2.9991,
      "step": 124580
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.1452741622924805,
      "learning_rate": 0.00026173707760249177,
      "loss": 2.9037,
      "step": 124581
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.282423973083496,
      "learning_rate": 0.00026173302045136575,
      "loss": 2.9131,
      "step": 124582
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1054375171661377,
      "learning_rate": 0.0002617289633073543,
      "loss": 3.0453,
      "step": 124583
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.266378164291382,
      "learning_rate": 0.0002617249061704583,
      "loss": 3.2224,
      "step": 124584
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.036247730255127,
      "learning_rate": 0.0002617208490406782,
      "loss": 3.1762,
      "step": 124585
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.081082820892334,
      "learning_rate": 0.00026171679191801494,
      "loss": 3.1652,
      "step": 124586
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2331454753875732,
      "learning_rate": 0.0002617127348024693,
      "loss": 3.0363,
      "step": 124587
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.19134259223938,
      "learning_rate": 0.0002617086776940419,
      "loss": 2.834,
      "step": 124588
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.00835919380188,
      "learning_rate": 0.0002617046205927337,
      "loss": 3.1252,
      "step": 124589
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.9643959999084473,
      "learning_rate": 0.00026170056349854526,
      "loss": 3.104,
      "step": 124590
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8049029111862183,
      "learning_rate": 0.0002616965064114774,
      "loss": 2.714,
      "step": 124591
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8878697156906128,
      "learning_rate": 0.00026169244933153097,
      "loss": 2.8873,
      "step": 124592
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.035907745361328,
      "learning_rate": 0.0002616883922587065,
      "loss": 3.2102,
      "step": 124593
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8154534101486206,
      "learning_rate": 0.00026168433519300495,
      "loss": 2.9578,
      "step": 124594
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7545607089996338,
      "learning_rate": 0.0002616802781344271,
      "loss": 3.0383,
      "step": 124595
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.96753990650177,
      "learning_rate": 0.00026167622108297345,
      "loss": 3.2512,
      "step": 124596
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.164196252822876,
      "learning_rate": 0.000261672164038645,
      "loss": 3.0752,
      "step": 124597
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3098206520080566,
      "learning_rate": 0.0002616681070014424,
      "loss": 2.7819,
      "step": 124598
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.091567039489746,
      "learning_rate": 0.00026166404997136645,
      "loss": 3.0997,
      "step": 124599
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4524459838867188,
      "learning_rate": 0.0002616599929484178,
      "loss": 3.1555,
      "step": 124600
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8062899112701416,
      "learning_rate": 0.0002616559359325974,
      "loss": 2.991,
      "step": 124601
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9433008432388306,
      "learning_rate": 0.0002616518789239058,
      "loss": 3.018,
      "step": 124602
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8223415613174438,
      "learning_rate": 0.0002616478219223438,
      "loss": 2.8653,
      "step": 124603
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9127109050750732,
      "learning_rate": 0.00026164376492791223,
      "loss": 3.1385,
      "step": 124604
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8783295154571533,
      "learning_rate": 0.0002616397079406118,
      "loss": 2.915,
      "step": 124605
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.019818067550659,
      "learning_rate": 0.0002616356509604434,
      "loss": 3.0788,
      "step": 124606
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1959025859832764,
      "learning_rate": 0.00026163159398740756,
      "loss": 2.8362,
      "step": 124607
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0036849975585938,
      "learning_rate": 0.0002616275370215051,
      "loss": 2.94,
      "step": 124608
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.191171884536743,
      "learning_rate": 0.00026162348006273685,
      "loss": 3.0258,
      "step": 124609
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0032784938812256,
      "learning_rate": 0.0002616194231111035,
      "loss": 2.9541,
      "step": 124610
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0381875038146973,
      "learning_rate": 0.00026161536616660575,
      "loss": 2.9629,
      "step": 124611
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8221975564956665,
      "learning_rate": 0.0002616113092292445,
      "loss": 3.0883,
      "step": 124612
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0237877368927,
      "learning_rate": 0.00026160725229902056,
      "loss": 3.1091,
      "step": 124613
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8823243379592896,
      "learning_rate": 0.0002616031953759344,
      "loss": 2.939,
      "step": 124614
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.396136999130249,
      "learning_rate": 0.00026159913845998697,
      "loss": 2.9939,
      "step": 124615
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.936845064163208,
      "learning_rate": 0.00026159508155117894,
      "loss": 3.024,
      "step": 124616
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.887650489807129,
      "learning_rate": 0.0002615910246495111,
      "loss": 2.8523,
      "step": 124617
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.394608497619629,
      "learning_rate": 0.0002615869677549843,
      "loss": 2.7903,
      "step": 124618
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2047371864318848,
      "learning_rate": 0.00026158291086759923,
      "loss": 2.8767,
      "step": 124619
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.188328981399536,
      "learning_rate": 0.0002615788539873566,
      "loss": 3.0404,
      "step": 124620
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.425367832183838,
      "learning_rate": 0.00026157479711425713,
      "loss": 3.1661,
      "step": 124621
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1298811435699463,
      "learning_rate": 0.0002615707402483016,
      "loss": 2.97,
      "step": 124622
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8426984548568726,
      "learning_rate": 0.0002615666833894908,
      "loss": 3.0044,
      "step": 124623
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0073330402374268,
      "learning_rate": 0.00026156262653782554,
      "loss": 2.9665,
      "step": 124624
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0949289798736572,
      "learning_rate": 0.0002615585696933066,
      "loss": 2.9495,
      "step": 124625
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2977166175842285,
      "learning_rate": 0.0002615545128559345,
      "loss": 2.8918,
      "step": 124626
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.170891523361206,
      "learning_rate": 0.0002615504560257102,
      "loss": 2.8425,
      "step": 124627
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.616319179534912,
      "learning_rate": 0.00026154639920263436,
      "loss": 3.1694,
      "step": 124628
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.941098690032959,
      "learning_rate": 0.00026154234238670777,
      "loss": 3.1337,
      "step": 124629
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8676211833953857,
      "learning_rate": 0.00026153828557793116,
      "loss": 3.1821,
      "step": 124630
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9988471269607544,
      "learning_rate": 0.00026153422877630547,
      "loss": 3.0445,
      "step": 124631
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0676019191741943,
      "learning_rate": 0.00026153017198183117,
      "loss": 2.9681,
      "step": 124632
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.179337978363037,
      "learning_rate": 0.00026152611519450914,
      "loss": 3.047,
      "step": 124633
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.448178768157959,
      "learning_rate": 0.0002615220584143401,
      "loss": 3.0449,
      "step": 124634
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0712482929229736,
      "learning_rate": 0.0002615180016413248,
      "loss": 2.9221,
      "step": 124635
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0761666297912598,
      "learning_rate": 0.0002615139448754641,
      "loss": 3.2417,
      "step": 124636
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1376612186431885,
      "learning_rate": 0.00026150988811675884,
      "loss": 3.012,
      "step": 124637
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0550172328948975,
      "learning_rate": 0.0002615058313652094,
      "loss": 2.8005,
      "step": 124638
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9848390817642212,
      "learning_rate": 0.0002615017746208168,
      "loss": 2.9891,
      "step": 124639
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1145758628845215,
      "learning_rate": 0.00026149771788358176,
      "loss": 3.1941,
      "step": 124640
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.437384843826294,
      "learning_rate": 0.000261493661153505,
      "loss": 3.0594,
      "step": 124641
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.6117663383483887,
      "learning_rate": 0.0002614896044305873,
      "loss": 3.1584,
      "step": 124642
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9388628005981445,
      "learning_rate": 0.00026148554771482954,
      "loss": 3.1672,
      "step": 124643
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4669861793518066,
      "learning_rate": 0.0002614814910062322,
      "loss": 2.9003,
      "step": 124644
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.1729605197906494,
      "learning_rate": 0.0002614774343047962,
      "loss": 3.2048,
      "step": 124645
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.665419578552246,
      "learning_rate": 0.00026147337761052225,
      "loss": 2.9364,
      "step": 124646
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9864259958267212,
      "learning_rate": 0.0002614693209234112,
      "loss": 2.8935,
      "step": 124647
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3043570518493652,
      "learning_rate": 0.00026146526424346365,
      "loss": 2.9379,
      "step": 124648
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0042595863342285,
      "learning_rate": 0.0002614612075706804,
      "loss": 2.9371,
      "step": 124649
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.298461437225342,
      "learning_rate": 0.00026145715090506247,
      "loss": 2.832,
      "step": 124650
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.053750514984131,
      "learning_rate": 0.00026145309424661017,
      "loss": 2.9188,
      "step": 124651
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8709832429885864,
      "learning_rate": 0.00026144903759532456,
      "loss": 2.9712,
      "step": 124652
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9492267370224,
      "learning_rate": 0.0002614449809512062,
      "loss": 3.0098,
      "step": 124653
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9608814716339111,
      "learning_rate": 0.000261440924314256,
      "loss": 2.6892,
      "step": 124654
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0224575996398926,
      "learning_rate": 0.00026143686768447465,
      "loss": 3.0914,
      "step": 124655
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.6464439630508423,
      "learning_rate": 0.00026143281106186304,
      "loss": 3.1898,
      "step": 124656
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.080172538757324,
      "learning_rate": 0.0002614287544464217,
      "loss": 2.9944,
      "step": 124657
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8239494562149048,
      "learning_rate": 0.0002614246978381515,
      "loss": 2.9424,
      "step": 124658
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.018784761428833,
      "learning_rate": 0.0002614206412370531,
      "loss": 2.9573,
      "step": 124659
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7458096742630005,
      "learning_rate": 0.00026141658464312736,
      "loss": 2.7391,
      "step": 124660
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.188966751098633,
      "learning_rate": 0.000261412528056375,
      "loss": 2.9118,
      "step": 124661
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.1419639587402344,
      "learning_rate": 0.00026140847147679687,
      "loss": 2.9004,
      "step": 124662
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0431671142578125,
      "learning_rate": 0.0002614044149043936,
      "loss": 2.9668,
      "step": 124663
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9781520366668701,
      "learning_rate": 0.0002614003583391659,
      "loss": 2.936,
      "step": 124664
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1245944499969482,
      "learning_rate": 0.00026139630178111464,
      "loss": 2.7722,
      "step": 124665
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.593594551086426,
      "learning_rate": 0.00026139224523024057,
      "loss": 2.9037,
      "step": 124666
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2437820434570312,
      "learning_rate": 0.00026138818868654435,
      "loss": 2.9747,
      "step": 124667
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2512850761413574,
      "learning_rate": 0.0002613841321500269,
      "loss": 3.0167,
      "step": 124668
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.048781156539917,
      "learning_rate": 0.0002613800756206888,
      "loss": 2.8886,
      "step": 124669
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.9441707134246826,
      "learning_rate": 0.0002613760190985309,
      "loss": 3.0861,
      "step": 124670
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.954072952270508,
      "learning_rate": 0.0002613719625835538,
      "loss": 2.9299,
      "step": 124671
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1785783767700195,
      "learning_rate": 0.0002613679060757585,
      "loss": 2.9982,
      "step": 124672
    },
    {
      "epoch": 1.62,
      "grad_norm": 4.532950401306152,
      "learning_rate": 0.0002613638495751456,
      "loss": 2.8975,
      "step": 124673
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.304652690887451,
      "learning_rate": 0.000261359793081716,
      "loss": 3.1033,
      "step": 124674
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.874276638031006,
      "learning_rate": 0.00026135573659547023,
      "loss": 3.1675,
      "step": 124675
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.157059907913208,
      "learning_rate": 0.0002613516801164092,
      "loss": 3.1458,
      "step": 124676
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1348087787628174,
      "learning_rate": 0.0002613476236445336,
      "loss": 3.2568,
      "step": 124677
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.6978273391723633,
      "learning_rate": 0.0002613435671798442,
      "loss": 3.0261,
      "step": 124678
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9820038080215454,
      "learning_rate": 0.0002613395107223417,
      "loss": 3.1053,
      "step": 124679
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.629883289337158,
      "learning_rate": 0.0002613354542720271,
      "loss": 2.9454,
      "step": 124680
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.986642837524414,
      "learning_rate": 0.0002613313978289008,
      "loss": 2.7744,
      "step": 124681
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2556893825531006,
      "learning_rate": 0.00026132734139296376,
      "loss": 3.1466,
      "step": 124682
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.132312774658203,
      "learning_rate": 0.00026132328496421666,
      "loss": 3.2097,
      "step": 124683
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2532734870910645,
      "learning_rate": 0.0002613192285426604,
      "loss": 2.9985,
      "step": 124684
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.4914190769195557,
      "learning_rate": 0.00026131517212829553,
      "loss": 2.9279,
      "step": 124685
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.5249266624450684,
      "learning_rate": 0.000261311115721123,
      "loss": 2.8994,
      "step": 124686
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2688918113708496,
      "learning_rate": 0.00026130705932114344,
      "loss": 2.8632,
      "step": 124687
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2105419635772705,
      "learning_rate": 0.00026130300292835754,
      "loss": 2.8453,
      "step": 124688
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1059083938598633,
      "learning_rate": 0.0002612989465427662,
      "loss": 3.0164,
      "step": 124689
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.024685859680176,
      "learning_rate": 0.0002612948901643701,
      "loss": 3.0022,
      "step": 124690
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9065275192260742,
      "learning_rate": 0.00026129083379317,
      "loss": 2.9438,
      "step": 124691
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1124043464660645,
      "learning_rate": 0.0002612867774291668,
      "loss": 2.9705,
      "step": 124692
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2226271629333496,
      "learning_rate": 0.00026128272107236094,
      "loss": 2.8553,
      "step": 124693
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.764143466949463,
      "learning_rate": 0.0002612786647227534,
      "loss": 2.96,
      "step": 124694
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0146524906158447,
      "learning_rate": 0.0002612746083803449,
      "loss": 2.8251,
      "step": 124695
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4840192794799805,
      "learning_rate": 0.0002612705520451362,
      "loss": 2.9796,
      "step": 124696
    },
    {
      "epoch": 1.62,
      "grad_norm": 5.808394908905029,
      "learning_rate": 0.00026126649571712795,
      "loss": 2.7594,
      "step": 124697
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.337564706802368,
      "learning_rate": 0.0002612624393963212,
      "loss": 3.1173,
      "step": 124698
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8056528568267822,
      "learning_rate": 0.0002612583830827163,
      "loss": 2.9954,
      "step": 124699
    },
    {
      "epoch": 1.62,
      "grad_norm": 4.246174335479736,
      "learning_rate": 0.0002612543267763142,
      "loss": 3.0927,
      "step": 124700
    },
    {
      "epoch": 1.62,
      "grad_norm": 4.712859153747559,
      "learning_rate": 0.00026125027047711573,
      "loss": 3.0873,
      "step": 124701
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.429569959640503,
      "learning_rate": 0.00026124621418512154,
      "loss": 3.0708,
      "step": 124702
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.740299940109253,
      "learning_rate": 0.0002612421579003324,
      "loss": 2.9695,
      "step": 124703
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2630927562713623,
      "learning_rate": 0.0002612381016227492,
      "loss": 2.8126,
      "step": 124704
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.5086214542388916,
      "learning_rate": 0.0002612340453523724,
      "loss": 2.845,
      "step": 124705
    },
    {
      "epoch": 1.62,
      "grad_norm": 4.291853427886963,
      "learning_rate": 0.000261229989089203,
      "loss": 3.0053,
      "step": 124706
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.9327166080474854,
      "learning_rate": 0.0002612259328332416,
      "loss": 3.0307,
      "step": 124707
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.445699691772461,
      "learning_rate": 0.0002612218765844891,
      "loss": 2.9541,
      "step": 124708
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.373281955718994,
      "learning_rate": 0.00026121782034294614,
      "loss": 2.8457,
      "step": 124709
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.866604804992676,
      "learning_rate": 0.0002612137641086135,
      "loss": 3.166,
      "step": 124710
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.922136664390564,
      "learning_rate": 0.0002612097078814921,
      "loss": 3.1598,
      "step": 124711
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8175331354141235,
      "learning_rate": 0.0002612056516615825,
      "loss": 2.957,
      "step": 124712
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.522491693496704,
      "learning_rate": 0.00026120159544888537,
      "loss": 3.0244,
      "step": 124713
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3327813148498535,
      "learning_rate": 0.0002611975392434017,
      "loss": 2.9908,
      "step": 124714
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8045576810836792,
      "learning_rate": 0.0002611934830451321,
      "loss": 3.0633,
      "step": 124715
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.233499526977539,
      "learning_rate": 0.0002611894268540773,
      "loss": 3.0084,
      "step": 124716
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8934509754180908,
      "learning_rate": 0.00026118537067023836,
      "loss": 2.8875,
      "step": 124717
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.245284080505371,
      "learning_rate": 0.0002611813144936156,
      "loss": 3.2333,
      "step": 124718
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0051825046539307,
      "learning_rate": 0.00026117725832421,
      "loss": 3.0406,
      "step": 124719
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0409891605377197,
      "learning_rate": 0.00026117320216202227,
      "loss": 2.8812,
      "step": 124720
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0025553703308105,
      "learning_rate": 0.0002611691460070532,
      "loss": 3.086,
      "step": 124721
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0383167266845703,
      "learning_rate": 0.0002611650898593034,
      "loss": 3.0257,
      "step": 124722
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.253321409225464,
      "learning_rate": 0.000261161033718774,
      "loss": 3.0678,
      "step": 124723
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7663718461990356,
      "learning_rate": 0.0002611569775854653,
      "loss": 3.0986,
      "step": 124724
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.021951675415039,
      "learning_rate": 0.00026115292145937827,
      "loss": 3.0396,
      "step": 124725
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.993854284286499,
      "learning_rate": 0.0002611488653405137,
      "loss": 3.1286,
      "step": 124726
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3211190700531006,
      "learning_rate": 0.0002611448092288722,
      "loss": 3.0016,
      "step": 124727
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.59790301322937,
      "learning_rate": 0.00026114075312445466,
      "loss": 3.0744,
      "step": 124728
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8566468954086304,
      "learning_rate": 0.0002611366970272619,
      "loss": 3.1268,
      "step": 124729
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.117070436477661,
      "learning_rate": 0.00026113264093729447,
      "loss": 3.0578,
      "step": 124730
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.568284273147583,
      "learning_rate": 0.00026112858485455314,
      "loss": 2.9395,
      "step": 124731
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0851104259490967,
      "learning_rate": 0.0002611245287790388,
      "loss": 3.2516,
      "step": 124732
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.7540578842163086,
      "learning_rate": 0.00026112047271075215,
      "loss": 3.1953,
      "step": 124733
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2492260932922363,
      "learning_rate": 0.0002611164166496939,
      "loss": 2.7553,
      "step": 124734
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.459179639816284,
      "learning_rate": 0.000261112360595865,
      "loss": 2.8292,
      "step": 124735
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.264197826385498,
      "learning_rate": 0.00026110830454926593,
      "loss": 2.9389,
      "step": 124736
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9885129928588867,
      "learning_rate": 0.0002611042485098975,
      "loss": 2.9295,
      "step": 124737
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3549249172210693,
      "learning_rate": 0.00026110019247776057,
      "loss": 2.935,
      "step": 124738
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2620062828063965,
      "learning_rate": 0.00026109613645285585,
      "loss": 3.0851,
      "step": 124739
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8383231163024902,
      "learning_rate": 0.0002610920804351841,
      "loss": 2.8763,
      "step": 124740
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.5704095363616943,
      "learning_rate": 0.00026108802442474617,
      "loss": 3.009,
      "step": 124741
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1380274295806885,
      "learning_rate": 0.00026108396842154256,
      "loss": 2.9077,
      "step": 124742
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.073000431060791,
      "learning_rate": 0.0002610799124255742,
      "loss": 3.1128,
      "step": 124743
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8461118936538696,
      "learning_rate": 0.0002610758564368419,
      "loss": 3.1832,
      "step": 124744
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4417061805725098,
      "learning_rate": 0.0002610718004553462,
      "loss": 2.8538,
      "step": 124745
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.292879581451416,
      "learning_rate": 0.0002610677444810881,
      "loss": 2.6677,
      "step": 124746
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0059587955474854,
      "learning_rate": 0.00026106368851406826,
      "loss": 2.951,
      "step": 124747
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.911738872528076,
      "learning_rate": 0.00026105963255428733,
      "loss": 2.8601,
      "step": 124748
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.228977918624878,
      "learning_rate": 0.00026105557660174617,
      "loss": 3.0273,
      "step": 124749
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9304250478744507,
      "learning_rate": 0.0002610515206564455,
      "loss": 3.1094,
      "step": 124750
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2410995960235596,
      "learning_rate": 0.0002610474647183861,
      "loss": 2.9115,
      "step": 124751
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.208669662475586,
      "learning_rate": 0.0002610434087875687,
      "loss": 3.1287,
      "step": 124752
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1973609924316406,
      "learning_rate": 0.0002610393528639942,
      "loss": 3.1482,
      "step": 124753
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.7296524047851562,
      "learning_rate": 0.0002610352969476631,
      "loss": 3.1913,
      "step": 124754
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8858932256698608,
      "learning_rate": 0.00026103124103857625,
      "loss": 2.9985,
      "step": 124755
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.6612257957458496,
      "learning_rate": 0.00026102718513673444,
      "loss": 2.8492,
      "step": 124756
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.9450581073760986,
      "learning_rate": 0.0002610231292421384,
      "loss": 3.0449,
      "step": 124757
    },
    {
      "epoch": 1.62,
      "grad_norm": 4.2957353591918945,
      "learning_rate": 0.00026101907335478896,
      "loss": 3.2148,
      "step": 124758
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.6765248775482178,
      "learning_rate": 0.0002610150174746868,
      "loss": 3.0104,
      "step": 124759
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8859243392944336,
      "learning_rate": 0.00026101096160183266,
      "loss": 2.8853,
      "step": 124760
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.6559345722198486,
      "learning_rate": 0.0002610069057362273,
      "loss": 2.8682,
      "step": 124761
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.8623554706573486,
      "learning_rate": 0.0002610028498778715,
      "loss": 2.9542,
      "step": 124762
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.7678263187408447,
      "learning_rate": 0.000260998794026766,
      "loss": 2.6143,
      "step": 124763
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1984128952026367,
      "learning_rate": 0.0002609947381829116,
      "loss": 2.812,
      "step": 124764
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7260562181472778,
      "learning_rate": 0.000260990682346309,
      "loss": 3.0413,
      "step": 124765
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.9143831729888916,
      "learning_rate": 0.00026098662651695897,
      "loss": 3.141,
      "step": 124766
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.9891533851623535,
      "learning_rate": 0.00026098257069486223,
      "loss": 3.0111,
      "step": 124767
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0877256393432617,
      "learning_rate": 0.0002609785148800197,
      "loss": 2.8571,
      "step": 124768
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9873104095458984,
      "learning_rate": 0.00026097445907243184,
      "loss": 2.9798,
      "step": 124769
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9847004413604736,
      "learning_rate": 0.0002609704032720996,
      "loss": 3.1453,
      "step": 124770
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.7443268299102783,
      "learning_rate": 0.00026096634747902375,
      "loss": 2.8384,
      "step": 124771
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.4683568477630615,
      "learning_rate": 0.00026096229169320496,
      "loss": 2.7625,
      "step": 124772
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.9679789543151855,
      "learning_rate": 0.00026095823591464393,
      "loss": 2.8671,
      "step": 124773
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8652691841125488,
      "learning_rate": 0.00026095418014334163,
      "loss": 3.0012,
      "step": 124774
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9649678468704224,
      "learning_rate": 0.0002609501243792986,
      "loss": 2.8868,
      "step": 124775
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.374361753463745,
      "learning_rate": 0.0002609460686225158,
      "loss": 2.9989,
      "step": 124776
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.0704843997955322,
      "learning_rate": 0.00026094201287299375,
      "loss": 3.0311,
      "step": 124777
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.179295063018799,
      "learning_rate": 0.0002609379571307334,
      "loss": 3.1618,
      "step": 124778
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1714565753936768,
      "learning_rate": 0.0002609339013957353,
      "loss": 3.046,
      "step": 124779
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8648892641067505,
      "learning_rate": 0.0002609298456680004,
      "loss": 3.0478,
      "step": 124780
    },
    {
      "epoch": 1.62,
      "grad_norm": 4.116241931915283,
      "learning_rate": 0.0002609257899475294,
      "loss": 2.8441,
      "step": 124781
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8806376457214355,
      "learning_rate": 0.000260921734234323,
      "loss": 2.807,
      "step": 124782
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0927648544311523,
      "learning_rate": 0.00026091767852838197,
      "loss": 2.9312,
      "step": 124783
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.2571914196014404,
      "learning_rate": 0.0002609136228297072,
      "loss": 3.0417,
      "step": 124784
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.7469022274017334,
      "learning_rate": 0.0002609095671382992,
      "loss": 3.2515,
      "step": 124785
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.959431767463684,
      "learning_rate": 0.00026090551145415883,
      "loss": 3.0386,
      "step": 124786
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8485686779022217,
      "learning_rate": 0.0002609014557772869,
      "loss": 3.1587,
      "step": 124787
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.815864086151123,
      "learning_rate": 0.00026089740010768413,
      "loss": 2.8913,
      "step": 124788
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4947309494018555,
      "learning_rate": 0.0002608933444453513,
      "loss": 2.9127,
      "step": 124789
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.590625286102295,
      "learning_rate": 0.0002608892887902892,
      "loss": 2.9286,
      "step": 124790
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3891162872314453,
      "learning_rate": 0.00026088523314249845,
      "loss": 2.8685,
      "step": 124791
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.1695823669433594,
      "learning_rate": 0.00026088117750197983,
      "loss": 2.8842,
      "step": 124792
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.0446338653564453,
      "learning_rate": 0.00026087712186873414,
      "loss": 2.8368,
      "step": 124793
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.8966073989868164,
      "learning_rate": 0.0002608730662427621,
      "loss": 2.8594,
      "step": 124794
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9244667291641235,
      "learning_rate": 0.00026086901062406456,
      "loss": 2.8452,
      "step": 124795
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4019744396209717,
      "learning_rate": 0.00026086495501264235,
      "loss": 3.0617,
      "step": 124796
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9206990003585815,
      "learning_rate": 0.0002608608994084959,
      "loss": 3.1059,
      "step": 124797
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.3710851669311523,
      "learning_rate": 0.00026085684381162616,
      "loss": 3.0113,
      "step": 124798
    },
    {
      "epoch": 1.62,
      "grad_norm": 1.9304533004760742,
      "learning_rate": 0.00026085278822203387,
      "loss": 2.9439,
      "step": 124799
    },
    {
      "epoch": 1.62,
      "grad_norm": 3.0791540145874023,
      "learning_rate": 0.0002608487326397198,
      "loss": 2.9709,
      "step": 124800
    },
    {
      "epoch": 1.62,
      "grad_norm": 2.4842114448547363,
      "learning_rate": 0.0002608446770646847,
      "loss": 3.1328,
      "step": 124801
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.102954149246216,
      "learning_rate": 0.00026084062149692947,
      "loss": 2.9306,
      "step": 124802
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.310575485229492,
      "learning_rate": 0.0002608365659364545,
      "loss": 3.0229,
      "step": 124803
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.207831382751465,
      "learning_rate": 0.0002608325103832608,
      "loss": 2.7838,
      "step": 124804
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8580974340438843,
      "learning_rate": 0.00026082845483734905,
      "loss": 3.0886,
      "step": 124805
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.979615569114685,
      "learning_rate": 0.00026082439929872006,
      "loss": 3.1324,
      "step": 124806
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.473078966140747,
      "learning_rate": 0.0002608203437673745,
      "loss": 2.9668,
      "step": 124807
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2143068313598633,
      "learning_rate": 0.00026081628824331336,
      "loss": 3.0627,
      "step": 124808
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.854184865951538,
      "learning_rate": 0.00026081223272653707,
      "loss": 3.0274,
      "step": 124809
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.14569354057312,
      "learning_rate": 0.0002608081772170465,
      "loss": 2.7562,
      "step": 124810
    },
    {
      "epoch": 1.63,
      "grad_norm": 4.148781776428223,
      "learning_rate": 0.0002608041217148425,
      "loss": 3.0473,
      "step": 124811
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.575575351715088,
      "learning_rate": 0.0002608000662199257,
      "loss": 3.0577,
      "step": 124812
    },
    {
      "epoch": 1.63,
      "grad_norm": 5.200822353363037,
      "learning_rate": 0.0002607960107322969,
      "loss": 2.9974,
      "step": 124813
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8939213752746582,
      "learning_rate": 0.00026079195525195703,
      "loss": 2.9233,
      "step": 124814
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9264191389083862,
      "learning_rate": 0.0002607878997789065,
      "loss": 2.8914,
      "step": 124815
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.9007976055145264,
      "learning_rate": 0.00026078384431314625,
      "loss": 3.0092,
      "step": 124816
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.8029677867889404,
      "learning_rate": 0.00026077978885467705,
      "loss": 2.8331,
      "step": 124817
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0458946228027344,
      "learning_rate": 0.0002607757334034996,
      "loss": 2.9578,
      "step": 124818
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.472822666168213,
      "learning_rate": 0.0002607716779596147,
      "loss": 2.7821,
      "step": 124819
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.463259220123291,
      "learning_rate": 0.0002607676225230232,
      "loss": 3.2615,
      "step": 124820
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.4245893955230713,
      "learning_rate": 0.0002607635670937256,
      "loss": 2.8765,
      "step": 124821
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0763931274414062,
      "learning_rate": 0.00026075951167172284,
      "loss": 3.3697,
      "step": 124822
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2541139125823975,
      "learning_rate": 0.0002607554562570156,
      "loss": 2.9381,
      "step": 124823
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.587613105773926,
      "learning_rate": 0.00026075140084960463,
      "loss": 3.109,
      "step": 124824
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3078653812408447,
      "learning_rate": 0.00026074734544949077,
      "loss": 3.0336,
      "step": 124825
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7773905992507935,
      "learning_rate": 0.0002607432900566748,
      "loss": 3.316,
      "step": 124826
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8525549173355103,
      "learning_rate": 0.00026073923467115736,
      "loss": 3.1585,
      "step": 124827
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.391883373260498,
      "learning_rate": 0.0002607351792929391,
      "loss": 2.8329,
      "step": 124828
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.4313013553619385,
      "learning_rate": 0.00026073112392202097,
      "loss": 2.981,
      "step": 124829
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.369293451309204,
      "learning_rate": 0.00026072706855840373,
      "loss": 3.0241,
      "step": 124830
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9572405815124512,
      "learning_rate": 0.000260723013202088,
      "loss": 2.918,
      "step": 124831
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.846590995788574,
      "learning_rate": 0.00026071895785307475,
      "loss": 3.1972,
      "step": 124832
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3946382999420166,
      "learning_rate": 0.00026071490251136447,
      "loss": 3.0342,
      "step": 124833
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.0418694019317627,
      "learning_rate": 0.00026071084717695797,
      "loss": 2.9905,
      "step": 124834
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7401220798492432,
      "learning_rate": 0.0002607067918498561,
      "loss": 3.242,
      "step": 124835
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9682644605636597,
      "learning_rate": 0.0002607027365300596,
      "loss": 3.0981,
      "step": 124836
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9212877750396729,
      "learning_rate": 0.0002606986812175692,
      "loss": 3.1505,
      "step": 124837
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7273298501968384,
      "learning_rate": 0.0002606946259123858,
      "loss": 2.8602,
      "step": 124838
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.2330024242401123,
      "learning_rate": 0.0002606905706145098,
      "loss": 2.9183,
      "step": 124839
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9168612957000732,
      "learning_rate": 0.00026068651532394225,
      "loss": 2.9363,
      "step": 124840
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8926483392715454,
      "learning_rate": 0.0002606824600406838,
      "loss": 3.1475,
      "step": 124841
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1112568378448486,
      "learning_rate": 0.0002606784047647352,
      "loss": 2.8962,
      "step": 124842
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9953597784042358,
      "learning_rate": 0.0002606743494960973,
      "loss": 3.0182,
      "step": 124843
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0475664138793945,
      "learning_rate": 0.00026067029423477067,
      "loss": 3.0293,
      "step": 124844
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.478339672088623,
      "learning_rate": 0.0002606662389807563,
      "loss": 2.7717,
      "step": 124845
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1496078968048096,
      "learning_rate": 0.0002606621837340548,
      "loss": 2.9553,
      "step": 124846
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.7042744159698486,
      "learning_rate": 0.00026065812849466693,
      "loss": 3.1194,
      "step": 124847
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.097362756729126,
      "learning_rate": 0.0002606540732625934,
      "loss": 2.5942,
      "step": 124848
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.738364338874817,
      "learning_rate": 0.000260650018037835,
      "loss": 2.9667,
      "step": 124849
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.163027048110962,
      "learning_rate": 0.0002606459628203926,
      "loss": 2.6313,
      "step": 124850
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.0886595249176025,
      "learning_rate": 0.0002606419076102668,
      "loss": 3.0724,
      "step": 124851
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.76818585395813,
      "learning_rate": 0.00026063785240745845,
      "loss": 2.978,
      "step": 124852
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.9555065631866455,
      "learning_rate": 0.0002606337972119683,
      "loss": 2.654,
      "step": 124853
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.826662540435791,
      "learning_rate": 0.00026062974202379697,
      "loss": 3.0267,
      "step": 124854
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0318493843078613,
      "learning_rate": 0.00026062568684294537,
      "loss": 2.9906,
      "step": 124855
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3979814052581787,
      "learning_rate": 0.00026062163166941415,
      "loss": 2.9537,
      "step": 124856
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9170621633529663,
      "learning_rate": 0.0002606175765032042,
      "loss": 2.9714,
      "step": 124857
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2297723293304443,
      "learning_rate": 0.0002606135213443161,
      "loss": 3.0303,
      "step": 124858
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1933135986328125,
      "learning_rate": 0.00026060946619275077,
      "loss": 2.8691,
      "step": 124859
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0070512294769287,
      "learning_rate": 0.00026060541104850883,
      "loss": 2.8607,
      "step": 124860
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8321971893310547,
      "learning_rate": 0.0002606013559115911,
      "loss": 3.0986,
      "step": 124861
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9694658517837524,
      "learning_rate": 0.0002605973007819983,
      "loss": 3.0608,
      "step": 124862
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0679008960723877,
      "learning_rate": 0.0002605932456597313,
      "loss": 2.9046,
      "step": 124863
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9353766441345215,
      "learning_rate": 0.00026058919054479065,
      "loss": 2.9668,
      "step": 124864
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.90964937210083,
      "learning_rate": 0.00026058513543717723,
      "loss": 2.7916,
      "step": 124865
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.5986912250518799,
      "learning_rate": 0.0002605810803368918,
      "loss": 2.9831,
      "step": 124866
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.431955099105835,
      "learning_rate": 0.0002605770252439351,
      "loss": 2.893,
      "step": 124867
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.8228659629821777,
      "learning_rate": 0.00026057297015830784,
      "loss": 2.8835,
      "step": 124868
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8746588230133057,
      "learning_rate": 0.0002605689150800109,
      "loss": 2.9679,
      "step": 124869
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8452399969100952,
      "learning_rate": 0.00026056486000904485,
      "loss": 2.9084,
      "step": 124870
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.970627784729004,
      "learning_rate": 0.0002605608049454105,
      "loss": 2.9456,
      "step": 124871
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.145256996154785,
      "learning_rate": 0.0002605567498891087,
      "loss": 2.8915,
      "step": 124872
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.8683278560638428,
      "learning_rate": 0.00026055269484014017,
      "loss": 2.7508,
      "step": 124873
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9120997190475464,
      "learning_rate": 0.00026054863979850556,
      "loss": 3.0794,
      "step": 124874
    },
    {
      "epoch": 1.63,
      "grad_norm": 4.170407772064209,
      "learning_rate": 0.0002605445847642059,
      "loss": 2.9564,
      "step": 124875
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.997175693511963,
      "learning_rate": 0.00026054052973724156,
      "loss": 3.1805,
      "step": 124876
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.6736748218536377,
      "learning_rate": 0.00026053647471761354,
      "loss": 2.9945,
      "step": 124877
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.711911201477051,
      "learning_rate": 0.0002605324197053225,
      "loss": 2.8567,
      "step": 124878
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.4137165546417236,
      "learning_rate": 0.00026052836470036924,
      "loss": 2.8021,
      "step": 124879
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.750159502029419,
      "learning_rate": 0.0002605243097027545,
      "loss": 2.9903,
      "step": 124880
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.54495906829834,
      "learning_rate": 0.0002605202547124791,
      "loss": 3.0978,
      "step": 124881
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.2584376335144043,
      "learning_rate": 0.0002605161997295437,
      "loss": 3.0831,
      "step": 124882
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9608482122421265,
      "learning_rate": 0.000260512144753949,
      "loss": 2.819,
      "step": 124883
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.915683627128601,
      "learning_rate": 0.0002605080897856959,
      "loss": 2.877,
      "step": 124884
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.2320401668548584,
      "learning_rate": 0.00026050403482478504,
      "loss": 2.8996,
      "step": 124885
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.5987861156463623,
      "learning_rate": 0.0002604999798712173,
      "loss": 3.0923,
      "step": 124886
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0873489379882812,
      "learning_rate": 0.00026049592492499344,
      "loss": 3.0764,
      "step": 124887
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8264697790145874,
      "learning_rate": 0.000260491869986114,
      "loss": 2.8432,
      "step": 124888
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.906125068664551,
      "learning_rate": 0.0002604878150545799,
      "loss": 2.964,
      "step": 124889
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.855567216873169,
      "learning_rate": 0.0002604837601303918,
      "loss": 2.9877,
      "step": 124890
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.9810400009155273,
      "learning_rate": 0.00026047970521355057,
      "loss": 3.1159,
      "step": 124891
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9953811168670654,
      "learning_rate": 0.0002604756503040569,
      "loss": 3.1165,
      "step": 124892
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.6303133964538574,
      "learning_rate": 0.00026047159540191166,
      "loss": 3.0983,
      "step": 124893
    },
    {
      "epoch": 1.63,
      "grad_norm": 5.145571708679199,
      "learning_rate": 0.00026046754050711537,
      "loss": 3.0682,
      "step": 124894
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.3340933322906494,
      "learning_rate": 0.0002604634856196689,
      "loss": 3.0274,
      "step": 124895
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.712106704711914,
      "learning_rate": 0.000260459430739573,
      "loss": 2.7652,
      "step": 124896
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0277576446533203,
      "learning_rate": 0.0002604553758668285,
      "loss": 2.8368,
      "step": 124897
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.8034348487854004,
      "learning_rate": 0.0002604513210014361,
      "loss": 2.9896,
      "step": 124898
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.109365224838257,
      "learning_rate": 0.0002604472661433966,
      "loss": 3.1468,
      "step": 124899
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.496086597442627,
      "learning_rate": 0.0002604432112927106,
      "loss": 2.8114,
      "step": 124900
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8731294870376587,
      "learning_rate": 0.000260439156449379,
      "loss": 3.4419,
      "step": 124901
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8313642740249634,
      "learning_rate": 0.0002604351016134025,
      "loss": 2.8845,
      "step": 124902
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3825511932373047,
      "learning_rate": 0.0002604310467847818,
      "loss": 3.3609,
      "step": 124903
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.0887973308563232,
      "learning_rate": 0.00026042699196351775,
      "loss": 2.7949,
      "step": 124904
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.1252939701080322,
      "learning_rate": 0.0002604229371496112,
      "loss": 3.0021,
      "step": 124905
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8990075588226318,
      "learning_rate": 0.00026041888234306265,
      "loss": 3.0352,
      "step": 124906
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2046427726745605,
      "learning_rate": 0.00026041482754387294,
      "loss": 3.1057,
      "step": 124907
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.455925464630127,
      "learning_rate": 0.0002604107727520429,
      "loss": 2.9238,
      "step": 124908
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2495315074920654,
      "learning_rate": 0.0002604067179675732,
      "loss": 2.9578,
      "step": 124909
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.6876565217971802,
      "learning_rate": 0.0002604026631904647,
      "loss": 2.9999,
      "step": 124910
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.23857045173645,
      "learning_rate": 0.00026039860842071806,
      "loss": 2.7396,
      "step": 124911
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.123243808746338,
      "learning_rate": 0.00026039455365833424,
      "loss": 3.2021,
      "step": 124912
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0771265029907227,
      "learning_rate": 0.0002603904989033136,
      "loss": 2.9658,
      "step": 124913
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8953620195388794,
      "learning_rate": 0.00026038644415565717,
      "loss": 2.7113,
      "step": 124914
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.7100729942321777,
      "learning_rate": 0.00026038238941536565,
      "loss": 2.7556,
      "step": 124915
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8750793933868408,
      "learning_rate": 0.00026037833468243977,
      "loss": 2.9377,
      "step": 124916
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.748443841934204,
      "learning_rate": 0.00026037427995688033,
      "loss": 3.1322,
      "step": 124917
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.6950807571411133,
      "learning_rate": 0.00026037022523868816,
      "loss": 2.9401,
      "step": 124918
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0441231727600098,
      "learning_rate": 0.0002603661705278638,
      "loss": 2.6977,
      "step": 124919
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9883781671524048,
      "learning_rate": 0.00026036211582440815,
      "loss": 3.0793,
      "step": 124920
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8456265926361084,
      "learning_rate": 0.0002603580611283219,
      "loss": 3.0335,
      "step": 124921
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8928066492080688,
      "learning_rate": 0.0002603540064396058,
      "loss": 2.9586,
      "step": 124922
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.6754908561706543,
      "learning_rate": 0.0002603499517582607,
      "loss": 2.8628,
      "step": 124923
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.934415340423584,
      "learning_rate": 0.0002603458970842874,
      "loss": 2.8853,
      "step": 124924
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.4599785804748535,
      "learning_rate": 0.0002603418424176864,
      "loss": 3.1439,
      "step": 124925
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.3711414337158203,
      "learning_rate": 0.0002603377877584586,
      "loss": 2.8775,
      "step": 124926
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.146068811416626,
      "learning_rate": 0.0002603337331066048,
      "loss": 3.0237,
      "step": 124927
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3084304332733154,
      "learning_rate": 0.0002603296784621257,
      "loss": 3.1049,
      "step": 124928
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0649147033691406,
      "learning_rate": 0.00026032562382502197,
      "loss": 2.779,
      "step": 124929
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9998407363891602,
      "learning_rate": 0.0002603215691952947,
      "loss": 2.9786,
      "step": 124930
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.976588249206543,
      "learning_rate": 0.0002603175145729442,
      "loss": 2.9537,
      "step": 124931
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.7123825550079346,
      "learning_rate": 0.0002603134599579714,
      "loss": 3.0336,
      "step": 124932
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.275296449661255,
      "learning_rate": 0.00026030940535037714,
      "loss": 3.2153,
      "step": 124933
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.959524393081665,
      "learning_rate": 0.0002603053507501621,
      "loss": 2.8334,
      "step": 124934
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9123324155807495,
      "learning_rate": 0.000260301296157327,
      "loss": 3.1043,
      "step": 124935
    },
    {
      "epoch": 1.63,
      "grad_norm": 4.444375991821289,
      "learning_rate": 0.0002602972415718728,
      "loss": 3.1537,
      "step": 124936
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.340658664703369,
      "learning_rate": 0.00026029318699379993,
      "loss": 3.1083,
      "step": 124937
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.585516929626465,
      "learning_rate": 0.0002602891324231094,
      "loss": 3.0067,
      "step": 124938
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3886704444885254,
      "learning_rate": 0.00026028507785980185,
      "loss": 3.0776,
      "step": 124939
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0231618881225586,
      "learning_rate": 0.000260281023303878,
      "loss": 3.0507,
      "step": 124940
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2325165271759033,
      "learning_rate": 0.0002602769687553387,
      "loss": 3.0088,
      "step": 124941
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.5302398204803467,
      "learning_rate": 0.00026027291421418464,
      "loss": 2.9574,
      "step": 124942
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0802409648895264,
      "learning_rate": 0.0002602688596804166,
      "loss": 3.1235,
      "step": 124943
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.158872127532959,
      "learning_rate": 0.0002602648051540353,
      "loss": 2.6214,
      "step": 124944
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9746124744415283,
      "learning_rate": 0.0002602607506350416,
      "loss": 2.7092,
      "step": 124945
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9306997060775757,
      "learning_rate": 0.0002602566961234362,
      "loss": 3.2302,
      "step": 124946
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0779800415039062,
      "learning_rate": 0.0002602526416192197,
      "loss": 3.1255,
      "step": 124947
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.814507246017456,
      "learning_rate": 0.0002602485871223931,
      "loss": 3.0236,
      "step": 124948
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9153695106506348,
      "learning_rate": 0.00026024453263295697,
      "loss": 2.9925,
      "step": 124949
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.5953402519226074,
      "learning_rate": 0.0002602404781509121,
      "loss": 2.9999,
      "step": 124950
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3636043071746826,
      "learning_rate": 0.00026023642367625934,
      "loss": 2.8988,
      "step": 124951
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0586862564086914,
      "learning_rate": 0.0002602323692089994,
      "loss": 3.01,
      "step": 124952
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.473646879196167,
      "learning_rate": 0.000260228314749133,
      "loss": 3.1306,
      "step": 124953
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.8932738304138184,
      "learning_rate": 0.0002602242602966609,
      "loss": 2.8803,
      "step": 124954
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9901599884033203,
      "learning_rate": 0.0002602202058515838,
      "loss": 2.9975,
      "step": 124955
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2975242137908936,
      "learning_rate": 0.00026021615141390254,
      "loss": 3.1233,
      "step": 124956
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.4260597229003906,
      "learning_rate": 0.00026021209698361787,
      "loss": 3.3567,
      "step": 124957
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.245748281478882,
      "learning_rate": 0.0002602080425607305,
      "loss": 2.9137,
      "step": 124958
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9715853929519653,
      "learning_rate": 0.0002602039881452413,
      "loss": 2.7217,
      "step": 124959
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.053905963897705,
      "learning_rate": 0.0002601999337371509,
      "loss": 2.6568,
      "step": 124960
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1387526988983154,
      "learning_rate": 0.00026019587933646,
      "loss": 2.5561,
      "step": 124961
    },
    {
      "epoch": 1.63,
      "grad_norm": 4.539322853088379,
      "learning_rate": 0.0002601918249431695,
      "loss": 2.8839,
      "step": 124962
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3698296546936035,
      "learning_rate": 0.00026018777055728003,
      "loss": 2.9681,
      "step": 124963
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.014925718307495,
      "learning_rate": 0.0002601837161787924,
      "loss": 3.047,
      "step": 124964
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.7451021671295166,
      "learning_rate": 0.0002601796618077074,
      "loss": 2.9618,
      "step": 124965
    },
    {
      "epoch": 1.63,
      "grad_norm": 4.139339923858643,
      "learning_rate": 0.0002601756074440259,
      "loss": 2.7271,
      "step": 124966
    },
    {
      "epoch": 1.63,
      "grad_norm": 4.2800750732421875,
      "learning_rate": 0.00026017155308774836,
      "loss": 3.1611,
      "step": 124967
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9991014003753662,
      "learning_rate": 0.00026016749873887567,
      "loss": 3.1244,
      "step": 124968
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8978630304336548,
      "learning_rate": 0.00026016344439740857,
      "loss": 3.1494,
      "step": 124969
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.67560076713562,
      "learning_rate": 0.0002601593900633479,
      "loss": 3.1167,
      "step": 124970
    },
    {
      "epoch": 1.63,
      "grad_norm": 4.240148544311523,
      "learning_rate": 0.00026015533573669434,
      "loss": 2.9078,
      "step": 124971
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.8026397228240967,
      "learning_rate": 0.0002601512814174487,
      "loss": 2.9368,
      "step": 124972
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.060185670852661,
      "learning_rate": 0.0002601472271056116,
      "loss": 3.0696,
      "step": 124973
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9401443004608154,
      "learning_rate": 0.00026014317280118396,
      "loss": 2.9041,
      "step": 124974
    },
    {
      "epoch": 1.63,
      "grad_norm": 4.372086048126221,
      "learning_rate": 0.0002601391185041664,
      "loss": 2.8047,
      "step": 124975
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.9149184226989746,
      "learning_rate": 0.0002601350642145597,
      "loss": 3.0087,
      "step": 124976
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.9304919242858887,
      "learning_rate": 0.0002601310099323647,
      "loss": 3.3454,
      "step": 124977
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.503584384918213,
      "learning_rate": 0.0002601269556575821,
      "loss": 2.9216,
      "step": 124978
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0252790451049805,
      "learning_rate": 0.00026012290139021274,
      "loss": 2.8647,
      "step": 124979
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.7961068153381348,
      "learning_rate": 0.00026011884713025716,
      "loss": 2.8842,
      "step": 124980
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.558436632156372,
      "learning_rate": 0.0002601147928777162,
      "loss": 3.1036,
      "step": 124981
    },
    {
      "epoch": 1.63,
      "grad_norm": 4.010004043579102,
      "learning_rate": 0.0002601107386325907,
      "loss": 2.8422,
      "step": 124982
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.303077220916748,
      "learning_rate": 0.0002601066843948814,
      "loss": 2.815,
      "step": 124983
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8828580379486084,
      "learning_rate": 0.000260102630164589,
      "loss": 3.0988,
      "step": 124984
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0853617191314697,
      "learning_rate": 0.0002600985759417144,
      "loss": 3.2673,
      "step": 124985
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.625417709350586,
      "learning_rate": 0.00026009452172625806,
      "loss": 3.2294,
      "step": 124986
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.07004976272583,
      "learning_rate": 0.00026009046751822095,
      "loss": 3.0252,
      "step": 124987
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7611372470855713,
      "learning_rate": 0.00026008641331760375,
      "loss": 3.0333,
      "step": 124988
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.860970377922058,
      "learning_rate": 0.00026008235912440726,
      "loss": 3.0352,
      "step": 124989
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2246036529541016,
      "learning_rate": 0.0002600783049386322,
      "loss": 2.9541,
      "step": 124990
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.661402463912964,
      "learning_rate": 0.00026007425076027946,
      "loss": 2.9195,
      "step": 124991
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2626852989196777,
      "learning_rate": 0.00026007019658934956,
      "loss": 3.1644,
      "step": 124992
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.854379653930664,
      "learning_rate": 0.00026006614242584335,
      "loss": 3.0896,
      "step": 124993
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9358620643615723,
      "learning_rate": 0.0002600620882697616,
      "loss": 2.7561,
      "step": 124994
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.844172239303589,
      "learning_rate": 0.00026005803412110505,
      "loss": 3.0476,
      "step": 124995
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.3822500705718994,
      "learning_rate": 0.0002600539799798745,
      "loss": 2.8296,
      "step": 124996
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3698480129241943,
      "learning_rate": 0.00026004992584607075,
      "loss": 2.9104,
      "step": 124997
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9832730293273926,
      "learning_rate": 0.0002600458717196944,
      "loss": 2.7688,
      "step": 124998
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1670420169830322,
      "learning_rate": 0.00026004181760074625,
      "loss": 2.9293,
      "step": 124999
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.4953131675720215,
      "learning_rate": 0.00026003776348922704,
      "loss": 3.0714,
      "step": 125000
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.382388114929199,
      "learning_rate": 0.0002600337093851376,
      "loss": 3.0518,
      "step": 125001
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.773638963699341,
      "learning_rate": 0.00026002965528847864,
      "loss": 3.1449,
      "step": 125002
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7152082920074463,
      "learning_rate": 0.00026002560119925107,
      "loss": 3.2149,
      "step": 125003
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.922961950302124,
      "learning_rate": 0.0002600215471174554,
      "loss": 2.7358,
      "step": 125004
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8730905055999756,
      "learning_rate": 0.0002600174930430924,
      "loss": 2.9159,
      "step": 125005
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.5355899333953857,
      "learning_rate": 0.00026001343897616295,
      "loss": 2.7695,
      "step": 125006
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0225157737731934,
      "learning_rate": 0.0002600093849166677,
      "loss": 3.0119,
      "step": 125007
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0560739040374756,
      "learning_rate": 0.0002600053308646075,
      "loss": 3.2124,
      "step": 125008
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0076866149902344,
      "learning_rate": 0.0002600012768199832,
      "loss": 2.8563,
      "step": 125009
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.811478614807129,
      "learning_rate": 0.0002599972227827953,
      "loss": 3.0511,
      "step": 125010
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8127771615982056,
      "learning_rate": 0.00025999316875304466,
      "loss": 3.0361,
      "step": 125011
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9774185419082642,
      "learning_rate": 0.00025998911473073204,
      "loss": 2.7318,
      "step": 125012
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9235610961914062,
      "learning_rate": 0.0002599850607158582,
      "loss": 2.9201,
      "step": 125013
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.034057855606079,
      "learning_rate": 0.00025998100670842386,
      "loss": 3.149,
      "step": 125014
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.4378244876861572,
      "learning_rate": 0.00025997695270843003,
      "loss": 2.7943,
      "step": 125015
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9766184091567993,
      "learning_rate": 0.000259972898715877,
      "loss": 2.9723,
      "step": 125016
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.864210844039917,
      "learning_rate": 0.0002599688447307658,
      "loss": 2.8539,
      "step": 125017
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8648324012756348,
      "learning_rate": 0.0002599647907530972,
      "loss": 2.9494,
      "step": 125018
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.33235502243042,
      "learning_rate": 0.00025996073678287185,
      "loss": 2.9561,
      "step": 125019
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.7538585662841797,
      "learning_rate": 0.00025995668282009055,
      "loss": 3.0261,
      "step": 125020
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8115806579589844,
      "learning_rate": 0.00025995262886475414,
      "loss": 3.028,
      "step": 125021
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0635669231414795,
      "learning_rate": 0.0002599485749168633,
      "loss": 2.8797,
      "step": 125022
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.751220941543579,
      "learning_rate": 0.0002599445209764187,
      "loss": 2.9596,
      "step": 125023
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.7597832679748535,
      "learning_rate": 0.0002599404670434212,
      "loss": 3.1267,
      "step": 125024
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9461095333099365,
      "learning_rate": 0.00025993641311787145,
      "loss": 2.9693,
      "step": 125025
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.291672945022583,
      "learning_rate": 0.00025993235919977034,
      "loss": 3.0595,
      "step": 125026
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.9807193279266357,
      "learning_rate": 0.0002599283052891186,
      "loss": 2.819,
      "step": 125027
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.8185341358184814,
      "learning_rate": 0.0002599242513859169,
      "loss": 3.1992,
      "step": 125028
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.487480401992798,
      "learning_rate": 0.00025992019749016606,
      "loss": 3.0317,
      "step": 125029
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.013805866241455,
      "learning_rate": 0.00025991614360186684,
      "loss": 3.1363,
      "step": 125030
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.715784788131714,
      "learning_rate": 0.0002599120897210199,
      "loss": 3.1452,
      "step": 125031
    },
    {
      "epoch": 1.63,
      "grad_norm": 4.140544891357422,
      "learning_rate": 0.0002599080358476261,
      "loss": 2.9551,
      "step": 125032
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.9629852771759033,
      "learning_rate": 0.0002599039819816861,
      "loss": 2.9774,
      "step": 125033
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1297125816345215,
      "learning_rate": 0.00025989992812320076,
      "loss": 2.7084,
      "step": 125034
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8981345891952515,
      "learning_rate": 0.0002598958742721707,
      "loss": 3.0532,
      "step": 125035
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.188913345336914,
      "learning_rate": 0.00025989182042859685,
      "loss": 3.0407,
      "step": 125036
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.6351468563079834,
      "learning_rate": 0.00025988776659247986,
      "loss": 2.9649,
      "step": 125037
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8047661781311035,
      "learning_rate": 0.00025988371276382044,
      "loss": 3.0918,
      "step": 125038
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9491819143295288,
      "learning_rate": 0.00025987965894261946,
      "loss": 2.8789,
      "step": 125039
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.236222267150879,
      "learning_rate": 0.0002598756051288775,
      "loss": 2.9151,
      "step": 125040
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3882131576538086,
      "learning_rate": 0.0002598715513225955,
      "loss": 3.0236,
      "step": 125041
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9564058780670166,
      "learning_rate": 0.00025986749752377413,
      "loss": 2.954,
      "step": 125042
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7750890254974365,
      "learning_rate": 0.00025986344373241414,
      "loss": 2.9773,
      "step": 125043
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.628237247467041,
      "learning_rate": 0.00025985938994851633,
      "loss": 2.8969,
      "step": 125044
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.376096725463867,
      "learning_rate": 0.00025985533617208135,
      "loss": 2.9623,
      "step": 125045
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8021702766418457,
      "learning_rate": 0.0002598512824031101,
      "loss": 2.9533,
      "step": 125046
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.1229610443115234,
      "learning_rate": 0.0002598472286416032,
      "loss": 3.0267,
      "step": 125047
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1189310550689697,
      "learning_rate": 0.00025984317488756146,
      "loss": 2.7461,
      "step": 125048
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9134174585342407,
      "learning_rate": 0.00025983912114098563,
      "loss": 3.1517,
      "step": 125049
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.050295829772949,
      "learning_rate": 0.00025983506740187643,
      "loss": 3.0921,
      "step": 125050
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.7340686321258545,
      "learning_rate": 0.00025983101367023473,
      "loss": 2.8479,
      "step": 125051
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.276869535446167,
      "learning_rate": 0.00025982695994606123,
      "loss": 2.9574,
      "step": 125052
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.058274984359741,
      "learning_rate": 0.00025982290622935663,
      "loss": 3.1325,
      "step": 125053
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.536651849746704,
      "learning_rate": 0.00025981885252012165,
      "loss": 3.1424,
      "step": 125054
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0724830627441406,
      "learning_rate": 0.0002598147988183571,
      "loss": 2.9131,
      "step": 125055
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2983717918395996,
      "learning_rate": 0.0002598107451240637,
      "loss": 2.8845,
      "step": 125056
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9394444227218628,
      "learning_rate": 0.0002598066914372423,
      "loss": 3.1764,
      "step": 125057
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.095043182373047,
      "learning_rate": 0.00025980263775789375,
      "loss": 3.1125,
      "step": 125058
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.23227596282959,
      "learning_rate": 0.0002597985840860185,
      "loss": 2.7452,
      "step": 125059
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1962480545043945,
      "learning_rate": 0.0002597945304216174,
      "loss": 2.7884,
      "step": 125060
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.4847073554992676,
      "learning_rate": 0.0002597904767646913,
      "loss": 2.7422,
      "step": 125061
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.909834623336792,
      "learning_rate": 0.0002597864231152409,
      "loss": 3.0647,
      "step": 125062
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.1151089668273926,
      "learning_rate": 0.000259782369473267,
      "loss": 3.13,
      "step": 125063
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2974445819854736,
      "learning_rate": 0.00025977831583877034,
      "loss": 2.8902,
      "step": 125064
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.9006171226501465,
      "learning_rate": 0.0002597742622117516,
      "loss": 2.9247,
      "step": 125065
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8123912811279297,
      "learning_rate": 0.00025977020859221163,
      "loss": 3.1321,
      "step": 125066
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9177565574645996,
      "learning_rate": 0.0002597661549801511,
      "loss": 2.9574,
      "step": 125067
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.6950126886367798,
      "learning_rate": 0.0002597621013755707,
      "loss": 3.1738,
      "step": 125068
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1592180728912354,
      "learning_rate": 0.0002597580477784714,
      "loss": 2.9267,
      "step": 125069
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1070094108581543,
      "learning_rate": 0.00025975399418885395,
      "loss": 3.0062,
      "step": 125070
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9283617734909058,
      "learning_rate": 0.00025974994060671885,
      "loss": 2.9458,
      "step": 125071
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.5897529125213623,
      "learning_rate": 0.00025974588703206697,
      "loss": 2.79,
      "step": 125072
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.39658784866333,
      "learning_rate": 0.0002597418334648991,
      "loss": 3.0906,
      "step": 125073
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.4601330757141113,
      "learning_rate": 0.00025973777990521605,
      "loss": 3.1552,
      "step": 125074
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.803800344467163,
      "learning_rate": 0.0002597337263530184,
      "loss": 3.091,
      "step": 125075
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.548689603805542,
      "learning_rate": 0.0002597296728083072,
      "loss": 3.0266,
      "step": 125076
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7902735471725464,
      "learning_rate": 0.0002597256192710829,
      "loss": 2.5747,
      "step": 125077
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0987653732299805,
      "learning_rate": 0.0002597215657413463,
      "loss": 2.9304,
      "step": 125078
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.5245180130004883,
      "learning_rate": 0.0002597175122190983,
      "loss": 3.1414,
      "step": 125079
    },
    {
      "epoch": 1.63,
      "grad_norm": 4.485385894775391,
      "learning_rate": 0.00025971345870433955,
      "loss": 2.9541,
      "step": 125080
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.465167760848999,
      "learning_rate": 0.00025970940519707083,
      "loss": 2.8337,
      "step": 125081
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.9048304557800293,
      "learning_rate": 0.00025970535169729303,
      "loss": 2.8946,
      "step": 125082
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0834343433380127,
      "learning_rate": 0.00025970129820500657,
      "loss": 2.9583,
      "step": 125083
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8241006135940552,
      "learning_rate": 0.0002596972447202125,
      "loss": 3.0896,
      "step": 125084
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.303415536880493,
      "learning_rate": 0.0002596931912429114,
      "loss": 3.0398,
      "step": 125085
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.179227590560913,
      "learning_rate": 0.0002596891377731041,
      "loss": 2.7793,
      "step": 125086
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1324846744537354,
      "learning_rate": 0.00025968508431079137,
      "loss": 2.9263,
      "step": 125087
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7916876077651978,
      "learning_rate": 0.0002596810308559741,
      "loss": 2.9677,
      "step": 125088
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.7330918312072754,
      "learning_rate": 0.0002596769774086527,
      "loss": 2.6413,
      "step": 125089
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.307391405105591,
      "learning_rate": 0.00025967292396882817,
      "loss": 3.1141,
      "step": 125090
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.350700855255127,
      "learning_rate": 0.0002596688705365012,
      "loss": 3.0485,
      "step": 125091
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1209640502929688,
      "learning_rate": 0.0002596648171116725,
      "loss": 2.9549,
      "step": 125092
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0585076808929443,
      "learning_rate": 0.00025966076369434284,
      "loss": 2.8946,
      "step": 125093
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2854220867156982,
      "learning_rate": 0.0002596567102845132,
      "loss": 3.015,
      "step": 125094
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3461267948150635,
      "learning_rate": 0.00025965265688218404,
      "loss": 2.9596,
      "step": 125095
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9567444324493408,
      "learning_rate": 0.0002596486034873562,
      "loss": 2.8591,
      "step": 125096
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.050783395767212,
      "learning_rate": 0.00025964455010003037,
      "loss": 3.2918,
      "step": 125097
    },
    {
      "epoch": 1.63,
      "grad_norm": 4.762973308563232,
      "learning_rate": 0.00025964049672020745,
      "loss": 2.7626,
      "step": 125098
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.9843945503234863,
      "learning_rate": 0.00025963644334788807,
      "loss": 2.7941,
      "step": 125099
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.251746654510498,
      "learning_rate": 0.0002596323899830732,
      "loss": 3.1044,
      "step": 125100
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0946054458618164,
      "learning_rate": 0.0002596283366257633,
      "loss": 2.9669,
      "step": 125101
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.6429364681243896,
      "learning_rate": 0.0002596242832759593,
      "loss": 2.8922,
      "step": 125102
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.908990740776062,
      "learning_rate": 0.0002596202299336618,
      "loss": 2.9459,
      "step": 125103
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.5210494995117188,
      "learning_rate": 0.0002596161765988717,
      "loss": 2.9531,
      "step": 125104
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9029911756515503,
      "learning_rate": 0.00025961212327158976,
      "loss": 3.3207,
      "step": 125105
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.8219048976898193,
      "learning_rate": 0.00025960806995181673,
      "loss": 3.0271,
      "step": 125106
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9472808837890625,
      "learning_rate": 0.00025960401663955337,
      "loss": 3.1869,
      "step": 125107
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9360153675079346,
      "learning_rate": 0.00025959996333480023,
      "loss": 2.9671,
      "step": 125108
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8801718950271606,
      "learning_rate": 0.0002595959100375583,
      "loss": 3.0081,
      "step": 125109
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2704122066497803,
      "learning_rate": 0.00025959185674782816,
      "loss": 3.0423,
      "step": 125110
    },
    {
      "epoch": 1.63,
      "grad_norm": 4.0065789222717285,
      "learning_rate": 0.00025958780346561076,
      "loss": 2.9259,
      "step": 125111
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.783623218536377,
      "learning_rate": 0.0002595837501909067,
      "loss": 3.023,
      "step": 125112
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7865699529647827,
      "learning_rate": 0.0002595796969237169,
      "loss": 3.1493,
      "step": 125113
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.4844086170196533,
      "learning_rate": 0.0002595756436640419,
      "loss": 2.9161,
      "step": 125114
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.7690203189849854,
      "learning_rate": 0.0002595715904118826,
      "loss": 2.8742,
      "step": 125115
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.5209813117980957,
      "learning_rate": 0.00025956753716723955,
      "loss": 3.1676,
      "step": 125116
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.126890182495117,
      "learning_rate": 0.0002595634839301138,
      "loss": 3.0826,
      "step": 125117
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0779683589935303,
      "learning_rate": 0.0002595594307005059,
      "loss": 2.9255,
      "step": 125118
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3602800369262695,
      "learning_rate": 0.0002595553774784167,
      "loss": 2.9641,
      "step": 125119
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2503511905670166,
      "learning_rate": 0.0002595513242638469,
      "loss": 2.8622,
      "step": 125120
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.563739776611328,
      "learning_rate": 0.00025954727105679734,
      "loss": 2.9366,
      "step": 125121
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8765976428985596,
      "learning_rate": 0.0002595432178572687,
      "loss": 3.0314,
      "step": 125122
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3452258110046387,
      "learning_rate": 0.0002595391646652616,
      "loss": 2.8328,
      "step": 125123
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.5309271812438965,
      "learning_rate": 0.000259535111480777,
      "loss": 2.9668,
      "step": 125124
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7966781854629517,
      "learning_rate": 0.0002595310583038157,
      "loss": 2.8763,
      "step": 125125
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.200500249862671,
      "learning_rate": 0.00025952700513437823,
      "loss": 2.8538,
      "step": 125126
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.7294821739196777,
      "learning_rate": 0.0002595229519724654,
      "loss": 3.2853,
      "step": 125127
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9183458089828491,
      "learning_rate": 0.0002595188988180781,
      "loss": 3.1214,
      "step": 125128
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0246024131774902,
      "learning_rate": 0.00025951484567121705,
      "loss": 2.9451,
      "step": 125129
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.720494508743286,
      "learning_rate": 0.0002595107925318829,
      "loss": 2.8656,
      "step": 125130
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.537933826446533,
      "learning_rate": 0.0002595067394000765,
      "loss": 2.7754,
      "step": 125131
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.0350914001464844,
      "learning_rate": 0.0002595026862757985,
      "loss": 2.7223,
      "step": 125132
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3503756523132324,
      "learning_rate": 0.0002594986331590497,
      "loss": 3.4206,
      "step": 125133
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9328093528747559,
      "learning_rate": 0.00025949458004983085,
      "loss": 2.6941,
      "step": 125134
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9033691883087158,
      "learning_rate": 0.00025949052694814275,
      "loss": 2.9868,
      "step": 125135
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9859683513641357,
      "learning_rate": 0.0002594864738539862,
      "loss": 2.9536,
      "step": 125136
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8459144830703735,
      "learning_rate": 0.0002594824207673619,
      "loss": 3.034,
      "step": 125137
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3682403564453125,
      "learning_rate": 0.00025947836768827045,
      "loss": 3.1467,
      "step": 125138
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.214376449584961,
      "learning_rate": 0.0002594743146167128,
      "loss": 2.915,
      "step": 125139
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0718674659729004,
      "learning_rate": 0.00025947026155268964,
      "loss": 3.0415,
      "step": 125140
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.0467517375946045,
      "learning_rate": 0.00025946620849620167,
      "loss": 3.0057,
      "step": 125141
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.739420771598816,
      "learning_rate": 0.0002594621554472497,
      "loss": 2.9632,
      "step": 125142
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8249320983886719,
      "learning_rate": 0.0002594581024058347,
      "loss": 3.0073,
      "step": 125143
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7021141052246094,
      "learning_rate": 0.000259454049371957,
      "loss": 3.1656,
      "step": 125144
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.5549933910369873,
      "learning_rate": 0.00025944999634561753,
      "loss": 2.9811,
      "step": 125145
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9421305656433105,
      "learning_rate": 0.0002594459433268171,
      "loss": 3.1091,
      "step": 125146
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.453677177429199,
      "learning_rate": 0.0002594418903155565,
      "loss": 2.7622,
      "step": 125147
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.587754487991333,
      "learning_rate": 0.00025943783731183635,
      "loss": 3.0412,
      "step": 125148
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.030953884124756,
      "learning_rate": 0.0002594337843156576,
      "loss": 3.0546,
      "step": 125149
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3124935626983643,
      "learning_rate": 0.0002594297313270208,
      "loss": 2.8902,
      "step": 125150
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.225687026977539,
      "learning_rate": 0.0002594256783459267,
      "loss": 3.1025,
      "step": 125151
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9495941400527954,
      "learning_rate": 0.0002594216253723762,
      "loss": 3.1067,
      "step": 125152
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2811670303344727,
      "learning_rate": 0.00025941757240636993,
      "loss": 2.6595,
      "step": 125153
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.32985258102417,
      "learning_rate": 0.00025941351944790874,
      "loss": 3.0529,
      "step": 125154
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.278935670852661,
      "learning_rate": 0.0002594094664969935,
      "loss": 2.9141,
      "step": 125155
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.920570969581604,
      "learning_rate": 0.0002594054135536246,
      "loss": 3.2798,
      "step": 125156
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.379866600036621,
      "learning_rate": 0.00025940136061780307,
      "loss": 2.8088,
      "step": 125157
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.452232599258423,
      "learning_rate": 0.00025939730768952953,
      "loss": 2.8885,
      "step": 125158
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0032010078430176,
      "learning_rate": 0.0002593932547688048,
      "loss": 2.96,
      "step": 125159
    },
    {
      "epoch": 1.63,
      "grad_norm": 4.265075206756592,
      "learning_rate": 0.00025938920185562966,
      "loss": 2.8529,
      "step": 125160
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.282759666442871,
      "learning_rate": 0.000259385148950005,
      "loss": 2.8536,
      "step": 125161
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.913728713989258,
      "learning_rate": 0.0002593810960519312,
      "loss": 3.0946,
      "step": 125162
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.307685613632202,
      "learning_rate": 0.00025937704316140927,
      "loss": 3.2939,
      "step": 125163
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.916351556777954,
      "learning_rate": 0.00025937299027843993,
      "loss": 2.9224,
      "step": 125164
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.871851921081543,
      "learning_rate": 0.00025936893740302386,
      "loss": 2.8235,
      "step": 125165
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8420823812484741,
      "learning_rate": 0.00025936488453516196,
      "loss": 3.1073,
      "step": 125166
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.15454363822937,
      "learning_rate": 0.0002593608316748549,
      "loss": 3.0605,
      "step": 125167
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7674387693405151,
      "learning_rate": 0.00025935677882210335,
      "loss": 2.9572,
      "step": 125168
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0545952320098877,
      "learning_rate": 0.00025935272597690817,
      "loss": 2.8051,
      "step": 125169
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.2125773429870605,
      "learning_rate": 0.0002593486731392701,
      "loss": 3.0978,
      "step": 125170
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9398459196090698,
      "learning_rate": 0.00025934462030918987,
      "loss": 3.2833,
      "step": 125171
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.389017105102539,
      "learning_rate": 0.0002593405674866682,
      "loss": 3.1615,
      "step": 125172
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.762568235397339,
      "learning_rate": 0.00025933651467170606,
      "loss": 2.93,
      "step": 125173
    },
    {
      "epoch": 1.63,
      "grad_norm": 4.658717632293701,
      "learning_rate": 0.0002593324618643039,
      "loss": 2.9485,
      "step": 125174
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.083597421646118,
      "learning_rate": 0.00025932840906446255,
      "loss": 3.0479,
      "step": 125175
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.355710983276367,
      "learning_rate": 0.00025932435627218283,
      "loss": 3.2918,
      "step": 125176
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.6350083351135254,
      "learning_rate": 0.0002593203034874656,
      "loss": 3.0244,
      "step": 125177
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.7053492069244385,
      "learning_rate": 0.00025931625071031134,
      "loss": 3.1171,
      "step": 125178
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.235323667526245,
      "learning_rate": 0.000259312197940721,
      "loss": 2.968,
      "step": 125179
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7621300220489502,
      "learning_rate": 0.00025930814517869543,
      "loss": 3.0732,
      "step": 125180
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8346624374389648,
      "learning_rate": 0.0002593040924242351,
      "loss": 3.1705,
      "step": 125181
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.4863502979278564,
      "learning_rate": 0.00025930003967734096,
      "loss": 2.873,
      "step": 125182
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.0912129878997803,
      "learning_rate": 0.00025929598693801367,
      "loss": 2.8433,
      "step": 125183
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.007206439971924,
      "learning_rate": 0.000259291934206254,
      "loss": 3.1212,
      "step": 125184
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7786407470703125,
      "learning_rate": 0.00025928788148206277,
      "loss": 3.0866,
      "step": 125185
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7831854820251465,
      "learning_rate": 0.00025928382876544084,
      "loss": 3.1786,
      "step": 125186
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.175416946411133,
      "learning_rate": 0.0002592797760563886,
      "loss": 3.2682,
      "step": 125187
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.350874185562134,
      "learning_rate": 0.00025927572335490706,
      "loss": 2.9827,
      "step": 125188
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1493325233459473,
      "learning_rate": 0.000259271670660997,
      "loss": 3.0623,
      "step": 125189
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.9284632205963135,
      "learning_rate": 0.000259267617974659,
      "loss": 2.9868,
      "step": 125190
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.1449427604675293,
      "learning_rate": 0.000259263565295894,
      "loss": 2.8402,
      "step": 125191
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9254659414291382,
      "learning_rate": 0.00025925951262470274,
      "loss": 2.8294,
      "step": 125192
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9550076723098755,
      "learning_rate": 0.00025925545996108583,
      "loss": 3.1827,
      "step": 125193
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.734673261642456,
      "learning_rate": 0.0002592514073050441,
      "loss": 2.8801,
      "step": 125194
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.7750229835510254,
      "learning_rate": 0.00025924735465657824,
      "loss": 2.8497,
      "step": 125195
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.383401870727539,
      "learning_rate": 0.0002592433020156891,
      "loss": 3.0106,
      "step": 125196
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.038858413696289,
      "learning_rate": 0.0002592392493823774,
      "loss": 2.9045,
      "step": 125197
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0836129188537598,
      "learning_rate": 0.00025923519675664393,
      "loss": 2.921,
      "step": 125198
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3531086444854736,
      "learning_rate": 0.0002592311441384895,
      "loss": 3.0759,
      "step": 125199
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1209592819213867,
      "learning_rate": 0.0002592270915279146,
      "loss": 3.0527,
      "step": 125200
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.630969762802124,
      "learning_rate": 0.0002592230389249202,
      "loss": 3.0705,
      "step": 125201
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1334056854248047,
      "learning_rate": 0.00025921898632950697,
      "loss": 3.0726,
      "step": 125202
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3546855449676514,
      "learning_rate": 0.00025921493374167574,
      "loss": 2.9779,
      "step": 125203
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9716750383377075,
      "learning_rate": 0.0002592108811614273,
      "loss": 3.1644,
      "step": 125204
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.911969542503357,
      "learning_rate": 0.00025920682858876227,
      "loss": 2.7955,
      "step": 125205
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.089129686355591,
      "learning_rate": 0.00025920277602368145,
      "loss": 2.7017,
      "step": 125206
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2901506423950195,
      "learning_rate": 0.0002591987234661856,
      "loss": 2.9169,
      "step": 125207
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9871875047683716,
      "learning_rate": 0.00025919467091627547,
      "loss": 3.0139,
      "step": 125208
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.783874750137329,
      "learning_rate": 0.00025919061837395183,
      "loss": 3.3123,
      "step": 125209
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9862335920333862,
      "learning_rate": 0.00025918656583921544,
      "loss": 2.8788,
      "step": 125210
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.6966817378997803,
      "learning_rate": 0.00025918251331206706,
      "loss": 2.8509,
      "step": 125211
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9657784700393677,
      "learning_rate": 0.00025917846079250733,
      "loss": 3.1736,
      "step": 125212
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8270009756088257,
      "learning_rate": 0.00025917440828053724,
      "loss": 3.011,
      "step": 125213
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2325081825256348,
      "learning_rate": 0.00025917035577615726,
      "loss": 2.9472,
      "step": 125214
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.244678258895874,
      "learning_rate": 0.0002591663032793683,
      "loss": 2.7867,
      "step": 125215
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0437941551208496,
      "learning_rate": 0.0002591622507901712,
      "loss": 2.9311,
      "step": 125216
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.705307960510254,
      "learning_rate": 0.0002591581983085665,
      "loss": 2.8503,
      "step": 125217
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.865633487701416,
      "learning_rate": 0.0002591541458345551,
      "loss": 3.0271,
      "step": 125218
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1237006187438965,
      "learning_rate": 0.00025915009336813764,
      "loss": 3.1288,
      "step": 125219
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.899789571762085,
      "learning_rate": 0.00025914604090931503,
      "loss": 3.0834,
      "step": 125220
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3346147537231445,
      "learning_rate": 0.000259141988458088,
      "loss": 2.8557,
      "step": 125221
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1208386421203613,
      "learning_rate": 0.0002591379360144572,
      "loss": 2.9401,
      "step": 125222
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.139672040939331,
      "learning_rate": 0.0002591338835784234,
      "loss": 2.6635,
      "step": 125223
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1041834354400635,
      "learning_rate": 0.00025912983114998735,
      "loss": 2.8985,
      "step": 125224
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.985978126525879,
      "learning_rate": 0.00025912577872914987,
      "loss": 2.9968,
      "step": 125225
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.020357131958008,
      "learning_rate": 0.00025912172631591167,
      "loss": 2.8188,
      "step": 125226
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7678122520446777,
      "learning_rate": 0.0002591176739102735,
      "loss": 2.9826,
      "step": 125227
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.8468806743621826,
      "learning_rate": 0.00025911362151223626,
      "loss": 3.093,
      "step": 125228
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9782726764678955,
      "learning_rate": 0.00025910956912180046,
      "loss": 2.8852,
      "step": 125229
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8663012981414795,
      "learning_rate": 0.0002591055167389669,
      "loss": 3.1146,
      "step": 125230
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.801419973373413,
      "learning_rate": 0.0002591014643637365,
      "loss": 3.0674,
      "step": 125231
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7292128801345825,
      "learning_rate": 0.0002590974119961099,
      "loss": 2.9535,
      "step": 125232
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.6210498809814453,
      "learning_rate": 0.00025909335963608774,
      "loss": 2.8655,
      "step": 125233
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.190174102783203,
      "learning_rate": 0.0002590893072836711,
      "loss": 3.0058,
      "step": 125234
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.433069944381714,
      "learning_rate": 0.00025908525493886043,
      "loss": 3.0293,
      "step": 125235
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8515100479125977,
      "learning_rate": 0.00025908120260165657,
      "loss": 2.8486,
      "step": 125236
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.231377363204956,
      "learning_rate": 0.0002590771502720603,
      "loss": 3.0894,
      "step": 125237
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.4678773880004883,
      "learning_rate": 0.0002590730979500723,
      "loss": 3.2073,
      "step": 125238
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2458536624908447,
      "learning_rate": 0.0002590690456356934,
      "loss": 2.7955,
      "step": 125239
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.7890632152557373,
      "learning_rate": 0.00025906499332892447,
      "loss": 2.9293,
      "step": 125240
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.1682486534118652,
      "learning_rate": 0.0002590609410297661,
      "loss": 3.2045,
      "step": 125241
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8176950216293335,
      "learning_rate": 0.00025905688873821894,
      "loss": 2.9965,
      "step": 125242
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1063332557678223,
      "learning_rate": 0.00025905283645428394,
      "loss": 2.7911,
      "step": 125243
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.5517313480377197,
      "learning_rate": 0.0002590487841779618,
      "loss": 2.8144,
      "step": 125244
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9977158308029175,
      "learning_rate": 0.0002590447319092532,
      "loss": 3.0673,
      "step": 125245
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2432799339294434,
      "learning_rate": 0.00025904067964815903,
      "loss": 2.7038,
      "step": 125246
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1473169326782227,
      "learning_rate": 0.00025903662739468,
      "loss": 3.0261,
      "step": 125247
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.6094353199005127,
      "learning_rate": 0.00025903257514881683,
      "loss": 3.0319,
      "step": 125248
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.109966278076172,
      "learning_rate": 0.00025902852291057016,
      "loss": 3.1717,
      "step": 125249
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7159547805786133,
      "learning_rate": 0.000259024470679941,
      "loss": 2.8233,
      "step": 125250
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7021201848983765,
      "learning_rate": 0.00025902041845692986,
      "loss": 2.6458,
      "step": 125251
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.16467022895813,
      "learning_rate": 0.00025901636624153763,
      "loss": 2.8565,
      "step": 125252
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1732237339019775,
      "learning_rate": 0.00025901231403376517,
      "loss": 2.6667,
      "step": 125253
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1181278228759766,
      "learning_rate": 0.0002590082618336129,
      "loss": 2.9467,
      "step": 125254
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8528861999511719,
      "learning_rate": 0.00025900420964108185,
      "loss": 3.0864,
      "step": 125255
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.8544840812683105,
      "learning_rate": 0.0002590001574561727,
      "loss": 3.2217,
      "step": 125256
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.835808277130127,
      "learning_rate": 0.00025899610527888614,
      "loss": 2.9138,
      "step": 125257
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1022751331329346,
      "learning_rate": 0.00025899205310922296,
      "loss": 3.0099,
      "step": 125258
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.743661880493164,
      "learning_rate": 0.0002589880009471841,
      "loss": 2.8786,
      "step": 125259
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9578957557678223,
      "learning_rate": 0.00025898394879277,
      "loss": 3.1996,
      "step": 125260
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.013585090637207,
      "learning_rate": 0.00025897989664598153,
      "loss": 2.7969,
      "step": 125261
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.492112874984741,
      "learning_rate": 0.00025897584450681956,
      "loss": 2.9023,
      "step": 125262
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.200803518295288,
      "learning_rate": 0.0002589717923752847,
      "loss": 2.9347,
      "step": 125263
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.374291181564331,
      "learning_rate": 0.00025896774025137775,
      "loss": 2.9071,
      "step": 125264
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2989087104797363,
      "learning_rate": 0.0002589636881350996,
      "loss": 2.8953,
      "step": 125265
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.165731906890869,
      "learning_rate": 0.00025895963602645076,
      "loss": 2.8132,
      "step": 125266
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3621129989624023,
      "learning_rate": 0.0002589555839254321,
      "loss": 3.1132,
      "step": 125267
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.192845582962036,
      "learning_rate": 0.0002589515318320444,
      "loss": 2.8297,
      "step": 125268
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.2495827674865723,
      "learning_rate": 0.00025894747974628836,
      "loss": 2.8507,
      "step": 125269
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.2746989727020264,
      "learning_rate": 0.00025894342766816475,
      "loss": 2.829,
      "step": 125270
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7837401628494263,
      "learning_rate": 0.00025893937559767446,
      "loss": 2.8797,
      "step": 125271
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.021531105041504,
      "learning_rate": 0.000258935323534818,
      "loss": 3.0814,
      "step": 125272
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.23752498626709,
      "learning_rate": 0.0002589312714795962,
      "loss": 2.7631,
      "step": 125273
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2158243656158447,
      "learning_rate": 0.0002589272194320099,
      "loss": 2.8749,
      "step": 125274
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7549909353256226,
      "learning_rate": 0.00025892316739205973,
      "loss": 2.6637,
      "step": 125275
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9865416288375854,
      "learning_rate": 0.0002589191153597466,
      "loss": 2.9845,
      "step": 125276
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.047119140625,
      "learning_rate": 0.0002589150633350713,
      "loss": 2.7913,
      "step": 125277
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.87216317653656,
      "learning_rate": 0.0002589110113180343,
      "loss": 3.1098,
      "step": 125278
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0436489582061768,
      "learning_rate": 0.00025890695930863653,
      "loss": 3.1367,
      "step": 125279
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.83617103099823,
      "learning_rate": 0.00025890290730687875,
      "loss": 3.2346,
      "step": 125280
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0058579444885254,
      "learning_rate": 0.00025889885531276167,
      "loss": 2.8791,
      "step": 125281
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.400939702987671,
      "learning_rate": 0.0002588948033262861,
      "loss": 2.8717,
      "step": 125282
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2049081325531006,
      "learning_rate": 0.0002588907513474529,
      "loss": 3.0249,
      "step": 125283
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.054410457611084,
      "learning_rate": 0.0002588866993762625,
      "loss": 2.9989,
      "step": 125284
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.306673288345337,
      "learning_rate": 0.0002588826474127159,
      "loss": 2.9799,
      "step": 125285
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.831634759902954,
      "learning_rate": 0.0002588785954568138,
      "loss": 3.1768,
      "step": 125286
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.5191566944122314,
      "learning_rate": 0.0002588745435085568,
      "loss": 3.0826,
      "step": 125287
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.789829730987549,
      "learning_rate": 0.000258870491567946,
      "loss": 3.0537,
      "step": 125288
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7908744812011719,
      "learning_rate": 0.00025886643963498186,
      "loss": 3.021,
      "step": 125289
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9977408647537231,
      "learning_rate": 0.0002588623877096652,
      "loss": 2.8578,
      "step": 125290
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.233858823776245,
      "learning_rate": 0.00025885833579199696,
      "loss": 3.0586,
      "step": 125291
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.914380431175232,
      "learning_rate": 0.0002588542838819776,
      "loss": 3.0637,
      "step": 125292
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1414718627929688,
      "learning_rate": 0.00025885023197960797,
      "loss": 3.0062,
      "step": 125293
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2507808208465576,
      "learning_rate": 0.00025884618008488886,
      "loss": 3.0233,
      "step": 125294
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.9782557487487793,
      "learning_rate": 0.00025884212819782113,
      "loss": 2.9475,
      "step": 125295
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.6951810121536255,
      "learning_rate": 0.0002588380763184054,
      "loss": 3.2116,
      "step": 125296
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3331942558288574,
      "learning_rate": 0.00025883402444664235,
      "loss": 3.089,
      "step": 125297
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0833072662353516,
      "learning_rate": 0.0002588299725825329,
      "loss": 3.157,
      "step": 125298
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9651302099227905,
      "learning_rate": 0.00025882592072607775,
      "loss": 2.8688,
      "step": 125299
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1036622524261475,
      "learning_rate": 0.0002588218688772776,
      "loss": 3.2406,
      "step": 125300
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8002476692199707,
      "learning_rate": 0.0002588178170361333,
      "loss": 2.9925,
      "step": 125301
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.6858192682266235,
      "learning_rate": 0.0002588137652026455,
      "loss": 2.9678,
      "step": 125302
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.7181615829467773,
      "learning_rate": 0.000258809713376815,
      "loss": 2.8789,
      "step": 125303
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.13429856300354,
      "learning_rate": 0.00025880566155864253,
      "loss": 2.8277,
      "step": 125304
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9219411611557007,
      "learning_rate": 0.0002588016097481289,
      "loss": 2.7908,
      "step": 125305
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3387818336486816,
      "learning_rate": 0.0002587975579452748,
      "loss": 2.9236,
      "step": 125306
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9035415649414062,
      "learning_rate": 0.0002587935061500811,
      "loss": 3.0985,
      "step": 125307
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0329806804656982,
      "learning_rate": 0.0002587894543625484,
      "loss": 3.3106,
      "step": 125308
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.122636318206787,
      "learning_rate": 0.00025878540258267746,
      "loss": 2.9361,
      "step": 125309
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0997726917266846,
      "learning_rate": 0.00025878135081046914,
      "loss": 2.7757,
      "step": 125310
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.6889383792877197,
      "learning_rate": 0.0002587772990459241,
      "loss": 3.0065,
      "step": 125311
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.6923205852508545,
      "learning_rate": 0.00025877324728904326,
      "loss": 2.7728,
      "step": 125312
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.880413770675659,
      "learning_rate": 0.00025876919553982717,
      "loss": 2.9631,
      "step": 125313
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3896872997283936,
      "learning_rate": 0.0002587651437982767,
      "loss": 3.0565,
      "step": 125314
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.114170789718628,
      "learning_rate": 0.00025876109206439255,
      "loss": 2.7949,
      "step": 125315
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.5601370334625244,
      "learning_rate": 0.0002587570403381755,
      "loss": 3.0607,
      "step": 125316
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.744823455810547,
      "learning_rate": 0.0002587529886196262,
      "loss": 2.8452,
      "step": 125317
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.024305820465088,
      "learning_rate": 0.00025874893690874557,
      "loss": 2.8939,
      "step": 125318
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.98550546169281,
      "learning_rate": 0.0002587448852055343,
      "loss": 3.1184,
      "step": 125319
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3417036533355713,
      "learning_rate": 0.00025874083350999326,
      "loss": 2.9474,
      "step": 125320
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0076112747192383,
      "learning_rate": 0.0002587367818221229,
      "loss": 2.9385,
      "step": 125321
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9024499654769897,
      "learning_rate": 0.0002587327301419242,
      "loss": 2.9325,
      "step": 125322
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9644019603729248,
      "learning_rate": 0.00025872867846939786,
      "loss": 3.1955,
      "step": 125323
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8471894264221191,
      "learning_rate": 0.0002587246268045446,
      "loss": 3.1978,
      "step": 125324
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9128139019012451,
      "learning_rate": 0.00025872057514736527,
      "loss": 2.9436,
      "step": 125325
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8021483421325684,
      "learning_rate": 0.00025871652349786066,
      "loss": 2.8211,
      "step": 125326
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9577796459197998,
      "learning_rate": 0.00025871247185603127,
      "loss": 3.1581,
      "step": 125327
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7899293899536133,
      "learning_rate": 0.00025870842022187804,
      "loss": 3.0846,
      "step": 125328
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.086160182952881,
      "learning_rate": 0.0002587043685954017,
      "loss": 3.0918,
      "step": 125329
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0519301891326904,
      "learning_rate": 0.000258700316976603,
      "loss": 3.0022,
      "step": 125330
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.4802677631378174,
      "learning_rate": 0.00025869626536548267,
      "loss": 2.9358,
      "step": 125331
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.6021084785461426,
      "learning_rate": 0.00025869221376204163,
      "loss": 2.957,
      "step": 125332
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1382906436920166,
      "learning_rate": 0.0002586881621662804,
      "loss": 3.0316,
      "step": 125333
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1904351711273193,
      "learning_rate": 0.0002586841105781997,
      "loss": 3.2319,
      "step": 125334
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0904581546783447,
      "learning_rate": 0.0002586800589978005,
      "loss": 3.1657,
      "step": 125335
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.92593252658844,
      "learning_rate": 0.00025867600742508344,
      "loss": 3.0139,
      "step": 125336
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8410385847091675,
      "learning_rate": 0.0002586719558600493,
      "loss": 3.3218,
      "step": 125337
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.080427646636963,
      "learning_rate": 0.00025866790430269893,
      "loss": 3.0877,
      "step": 125338
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.908669114112854,
      "learning_rate": 0.00025866385275303286,
      "loss": 3.0181,
      "step": 125339
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.791121482849121,
      "learning_rate": 0.000258659801211052,
      "loss": 3.0334,
      "step": 125340
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.814322829246521,
      "learning_rate": 0.000258655749676757,
      "loss": 2.8263,
      "step": 125341
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.929988980293274,
      "learning_rate": 0.0002586516981501487,
      "loss": 2.845,
      "step": 125342
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8315660953521729,
      "learning_rate": 0.00025864764663122776,
      "loss": 2.952,
      "step": 125343
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3378243446350098,
      "learning_rate": 0.00025864359511999525,
      "loss": 3.1171,
      "step": 125344
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.2688469886779785,
      "learning_rate": 0.00025863954361645146,
      "loss": 3.2614,
      "step": 125345
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.5419461727142334,
      "learning_rate": 0.0002586354921205974,
      "loss": 2.8243,
      "step": 125346
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1472012996673584,
      "learning_rate": 0.0002586314406324338,
      "loss": 3.0014,
      "step": 125347
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3324694633483887,
      "learning_rate": 0.00025862738915196135,
      "loss": 3.0499,
      "step": 125348
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.112734317779541,
      "learning_rate": 0.0002586233376791809,
      "loss": 3.2116,
      "step": 125349
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1637983322143555,
      "learning_rate": 0.00025861928621409325,
      "loss": 2.997,
      "step": 125350
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0825228691101074,
      "learning_rate": 0.0002586152347566989,
      "loss": 3.0922,
      "step": 125351
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.439117670059204,
      "learning_rate": 0.0002586111833069988,
      "loss": 2.9339,
      "step": 125352
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.124321460723877,
      "learning_rate": 0.00025860713186499364,
      "loss": 3.0512,
      "step": 125353
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.755617618560791,
      "learning_rate": 0.0002586030804306842,
      "loss": 3.1611,
      "step": 125354
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.4951095581054688,
      "learning_rate": 0.00025859902900407126,
      "loss": 3.0024,
      "step": 125355
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.4195683002471924,
      "learning_rate": 0.0002585949775851557,
      "loss": 2.8718,
      "step": 125356
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.5783040523529053,
      "learning_rate": 0.0002585909261739379,
      "loss": 2.9261,
      "step": 125357
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9316823482513428,
      "learning_rate": 0.00025858687477041886,
      "loss": 2.9108,
      "step": 125358
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.8428282737731934,
      "learning_rate": 0.0002585828233745993,
      "loss": 3.0179,
      "step": 125359
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3828377723693848,
      "learning_rate": 0.00025857877198648,
      "loss": 3.0895,
      "step": 125360
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.4449105262756348,
      "learning_rate": 0.0002585747206060616,
      "loss": 3.2944,
      "step": 125361
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9594471454620361,
      "learning_rate": 0.00025857066923334513,
      "loss": 2.957,
      "step": 125362
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8971136808395386,
      "learning_rate": 0.0002585666178683311,
      "loss": 2.9577,
      "step": 125363
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3106939792633057,
      "learning_rate": 0.0002585625665110202,
      "loss": 3.2675,
      "step": 125364
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9827507734298706,
      "learning_rate": 0.0002585585151614134,
      "loss": 3.0142,
      "step": 125365
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.6667115688323975,
      "learning_rate": 0.00025855446381951126,
      "loss": 3.0713,
      "step": 125366
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.746716856956482,
      "learning_rate": 0.0002585504124853147,
      "loss": 3.1008,
      "step": 125367
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.459867477416992,
      "learning_rate": 0.0002585463611588245,
      "loss": 2.8803,
      "step": 125368
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2695798873901367,
      "learning_rate": 0.00025854230984004116,
      "loss": 2.9428,
      "step": 125369
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0954999923706055,
      "learning_rate": 0.00025853825852896565,
      "loss": 2.9653,
      "step": 125370
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3983147144317627,
      "learning_rate": 0.00025853420722559865,
      "loss": 2.903,
      "step": 125371
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.4024558067321777,
      "learning_rate": 0.00025853015592994087,
      "loss": 3.016,
      "step": 125372
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0815255641937256,
      "learning_rate": 0.0002585261046419931,
      "loss": 2.7666,
      "step": 125373
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.006758451461792,
      "learning_rate": 0.0002585220533617563,
      "loss": 3.1328,
      "step": 125374
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.5640900135040283,
      "learning_rate": 0.0002585180020892309,
      "loss": 2.7538,
      "step": 125375
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.5243070125579834,
      "learning_rate": 0.0002585139508244178,
      "loss": 2.9351,
      "step": 125376
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0015676021575928,
      "learning_rate": 0.0002585098995673177,
      "loss": 2.991,
      "step": 125377
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0369210243225098,
      "learning_rate": 0.00025850584831793144,
      "loss": 2.9682,
      "step": 125378
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9411581754684448,
      "learning_rate": 0.00025850179707625966,
      "loss": 2.9443,
      "step": 125379
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.023366689682007,
      "learning_rate": 0.00025849774584230323,
      "loss": 3.2176,
      "step": 125380
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3736064434051514,
      "learning_rate": 0.0002584936946160629,
      "loss": 2.8145,
      "step": 125381
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8993479013442993,
      "learning_rate": 0.00025848964339753933,
      "loss": 2.9637,
      "step": 125382
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2122304439544678,
      "learning_rate": 0.00025848559218673333,
      "loss": 3.2418,
      "step": 125383
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9520069360733032,
      "learning_rate": 0.00025848154098364565,
      "loss": 3.0097,
      "step": 125384
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8903988599777222,
      "learning_rate": 0.00025847748978827695,
      "loss": 2.8782,
      "step": 125385
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.1254703998565674,
      "learning_rate": 0.00025847343860062815,
      "loss": 2.9695,
      "step": 125386
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0786848068237305,
      "learning_rate": 0.00025846938742069995,
      "loss": 2.9142,
      "step": 125387
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7996770143508911,
      "learning_rate": 0.000258465336248493,
      "loss": 2.9944,
      "step": 125388
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.765842080116272,
      "learning_rate": 0.0002584612850840082,
      "loss": 3.0925,
      "step": 125389
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.197747230529785,
      "learning_rate": 0.0002584572339272462,
      "loss": 2.9301,
      "step": 125390
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.6749393939971924,
      "learning_rate": 0.00025845318277820773,
      "loss": 3.2009,
      "step": 125391
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1393513679504395,
      "learning_rate": 0.0002584491316368936,
      "loss": 2.9977,
      "step": 125392
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8067729473114014,
      "learning_rate": 0.0002584450805033047,
      "loss": 3.0079,
      "step": 125393
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3850836753845215,
      "learning_rate": 0.0002584410293774415,
      "loss": 3.0377,
      "step": 125394
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.775404214859009,
      "learning_rate": 0.00025843697825930496,
      "loss": 3.1356,
      "step": 125395
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7423875331878662,
      "learning_rate": 0.00025843292714889575,
      "loss": 2.9971,
      "step": 125396
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7035877704620361,
      "learning_rate": 0.00025842887604621463,
      "loss": 2.8742,
      "step": 125397
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.7428390979766846,
      "learning_rate": 0.0002584248249512624,
      "loss": 3.0092,
      "step": 125398
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.9563605785369873,
      "learning_rate": 0.00025842077386403983,
      "loss": 3.2958,
      "step": 125399
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.856688976287842,
      "learning_rate": 0.0002584167227845475,
      "loss": 3.1507,
      "step": 125400
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0786092281341553,
      "learning_rate": 0.0002584126717127864,
      "loss": 2.9482,
      "step": 125401
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.6494829654693604,
      "learning_rate": 0.00025840862064875706,
      "loss": 2.8026,
      "step": 125402
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.118318796157837,
      "learning_rate": 0.00025840456959246033,
      "loss": 3.2025,
      "step": 125403
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0601916313171387,
      "learning_rate": 0.0002584005185438971,
      "loss": 2.8202,
      "step": 125404
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.763787865638733,
      "learning_rate": 0.000258396467503068,
      "loss": 3.1922,
      "step": 125405
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.938677191734314,
      "learning_rate": 0.0002583924164699737,
      "loss": 2.911,
      "step": 125406
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9499229192733765,
      "learning_rate": 0.0002583883654446151,
      "loss": 2.9817,
      "step": 125407
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.002279758453369,
      "learning_rate": 0.00025838431442699275,
      "loss": 3.0613,
      "step": 125408
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.016368865966797,
      "learning_rate": 0.0002583802634171077,
      "loss": 2.7441,
      "step": 125409
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9190082550048828,
      "learning_rate": 0.00025837621241496045,
      "loss": 3.2562,
      "step": 125410
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.7269558906555176,
      "learning_rate": 0.000258372161420552,
      "loss": 2.7491,
      "step": 125411
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2929131984710693,
      "learning_rate": 0.00025836811043388277,
      "loss": 2.7798,
      "step": 125412
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2979493141174316,
      "learning_rate": 0.00025836405945495373,
      "loss": 2.8319,
      "step": 125413
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.5900135040283203,
      "learning_rate": 0.00025836000848376557,
      "loss": 2.885,
      "step": 125414
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.943294048309326,
      "learning_rate": 0.00025835595752031915,
      "loss": 3.0878,
      "step": 125415
    },
    {
      "epoch": 1.63,
      "grad_norm": 4.232556343078613,
      "learning_rate": 0.0002583519065646151,
      "loss": 2.7408,
      "step": 125416
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.338352680206299,
      "learning_rate": 0.0002583478556166543,
      "loss": 2.8756,
      "step": 125417
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0094404220581055,
      "learning_rate": 0.00025834380467643737,
      "loss": 3.1851,
      "step": 125418
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9563874006271362,
      "learning_rate": 0.0002583397537439651,
      "loss": 3.1254,
      "step": 125419
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7272886037826538,
      "learning_rate": 0.0002583357028192382,
      "loss": 3.0871,
      "step": 125420
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.403517723083496,
      "learning_rate": 0.00025833165190225753,
      "loss": 3.0685,
      "step": 125421
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8251252174377441,
      "learning_rate": 0.0002583276009930238,
      "loss": 2.8163,
      "step": 125422
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9554754495620728,
      "learning_rate": 0.00025832355009153783,
      "loss": 2.9191,
      "step": 125423
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.453888416290283,
      "learning_rate": 0.0002583194991978002,
      "loss": 2.7419,
      "step": 125424
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1215639114379883,
      "learning_rate": 0.00025831544831181173,
      "loss": 3.1302,
      "step": 125425
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1748428344726562,
      "learning_rate": 0.00025831139743357325,
      "loss": 3.0595,
      "step": 125426
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.112372636795044,
      "learning_rate": 0.00025830734656308546,
      "loss": 3.0486,
      "step": 125427
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3866140842437744,
      "learning_rate": 0.0002583032957003491,
      "loss": 2.6441,
      "step": 125428
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.8612451553344727,
      "learning_rate": 0.0002582992448453651,
      "loss": 3.1386,
      "step": 125429
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.8436594009399414,
      "learning_rate": 0.00025829519399813393,
      "loss": 2.7808,
      "step": 125430
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.6500478982925415,
      "learning_rate": 0.0002582911431586565,
      "loss": 2.8251,
      "step": 125431
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1120285987854004,
      "learning_rate": 0.00025828709232693343,
      "loss": 3.2106,
      "step": 125432
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8223515748977661,
      "learning_rate": 0.00025828304150296566,
      "loss": 3.0987,
      "step": 125433
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.979064702987671,
      "learning_rate": 0.00025827899068675385,
      "loss": 2.9985,
      "step": 125434
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.7440807819366455,
      "learning_rate": 0.0002582749398782989,
      "loss": 2.8431,
      "step": 125435
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.295504093170166,
      "learning_rate": 0.00025827088907760126,
      "loss": 2.8834,
      "step": 125436
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.5185608863830566,
      "learning_rate": 0.00025826683828466184,
      "loss": 3.0016,
      "step": 125437
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8039982318878174,
      "learning_rate": 0.00025826278749948147,
      "loss": 2.8721,
      "step": 125438
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0852372646331787,
      "learning_rate": 0.0002582587367220608,
      "loss": 2.9546,
      "step": 125439
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2039644718170166,
      "learning_rate": 0.00025825468595240065,
      "loss": 2.9318,
      "step": 125440
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.714084506034851,
      "learning_rate": 0.0002582506351905018,
      "loss": 2.988,
      "step": 125441
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.041171073913574,
      "learning_rate": 0.00025824658443636487,
      "loss": 3.009,
      "step": 125442
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.257188558578491,
      "learning_rate": 0.00025824253368999064,
      "loss": 2.8003,
      "step": 125443
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.6746007204055786,
      "learning_rate": 0.00025823848295137994,
      "loss": 3.011,
      "step": 125444
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.23093318939209,
      "learning_rate": 0.0002582344322205335,
      "loss": 2.9153,
      "step": 125445
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7824598550796509,
      "learning_rate": 0.0002582303814974521,
      "loss": 3.0045,
      "step": 125446
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.920344591140747,
      "learning_rate": 0.00025822633078213635,
      "loss": 2.9208,
      "step": 125447
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.057243585586548,
      "learning_rate": 0.00025822228007458734,
      "loss": 3.0789,
      "step": 125448
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.5779409408569336,
      "learning_rate": 0.00025821822937480544,
      "loss": 3.1843,
      "step": 125449
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9223072528839111,
      "learning_rate": 0.0002582141786827916,
      "loss": 3.237,
      "step": 125450
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.061981439590454,
      "learning_rate": 0.0002582101279985465,
      "loss": 3.2517,
      "step": 125451
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0249640941619873,
      "learning_rate": 0.0002582060773220709,
      "loss": 3.0997,
      "step": 125452
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9902878999710083,
      "learning_rate": 0.0002582020266533656,
      "loss": 2.9403,
      "step": 125453
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9410456418991089,
      "learning_rate": 0.00025819797599243145,
      "loss": 2.7845,
      "step": 125454
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8445574045181274,
      "learning_rate": 0.000258193925339269,
      "loss": 3.0733,
      "step": 125455
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.210153341293335,
      "learning_rate": 0.00025818987469387904,
      "loss": 3.0481,
      "step": 125456
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.676030158996582,
      "learning_rate": 0.0002581858240562624,
      "loss": 2.936,
      "step": 125457
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1853229999542236,
      "learning_rate": 0.0002581817734264198,
      "loss": 2.9935,
      "step": 125458
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1468594074249268,
      "learning_rate": 0.000258177722804352,
      "loss": 2.6804,
      "step": 125459
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9739515781402588,
      "learning_rate": 0.00025817367219005985,
      "loss": 2.7637,
      "step": 125460
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.2880096435546875,
      "learning_rate": 0.0002581696215835439,
      "loss": 3.0381,
      "step": 125461
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1628103256225586,
      "learning_rate": 0.000258165570984805,
      "loss": 2.9789,
      "step": 125462
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.5059895515441895,
      "learning_rate": 0.0002581615203938439,
      "loss": 3.1337,
      "step": 125463
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.394695520401001,
      "learning_rate": 0.0002581574698106614,
      "loss": 2.6889,
      "step": 125464
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8752611875534058,
      "learning_rate": 0.00025815341923525817,
      "loss": 2.8623,
      "step": 125465
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8957256078720093,
      "learning_rate": 0.00025814936866763515,
      "loss": 3.1237,
      "step": 125466
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.053173542022705,
      "learning_rate": 0.00025814531810779286,
      "loss": 3.2256,
      "step": 125467
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.4363956451416016,
      "learning_rate": 0.0002581412675557321,
      "loss": 2.9255,
      "step": 125468
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2424089908599854,
      "learning_rate": 0.0002581372170114537,
      "loss": 3.0245,
      "step": 125469
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0726065635681152,
      "learning_rate": 0.0002581331664749584,
      "loss": 3.0614,
      "step": 125470
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.8852150440216064,
      "learning_rate": 0.0002581291159462469,
      "loss": 2.9995,
      "step": 125471
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.009760618209839,
      "learning_rate": 0.00025812506542532005,
      "loss": 3.1249,
      "step": 125472
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7691267728805542,
      "learning_rate": 0.00025812101491217847,
      "loss": 3.0179,
      "step": 125473
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.624173641204834,
      "learning_rate": 0.0002581169644068231,
      "loss": 2.9566,
      "step": 125474
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.6097609996795654,
      "learning_rate": 0.00025811291390925444,
      "loss": 3.1155,
      "step": 125475
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1997780799865723,
      "learning_rate": 0.00025810886341947345,
      "loss": 3.206,
      "step": 125476
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.166006565093994,
      "learning_rate": 0.0002581048129374808,
      "loss": 2.9016,
      "step": 125477
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7422715425491333,
      "learning_rate": 0.00025810076246327723,
      "loss": 2.8569,
      "step": 125478
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1747279167175293,
      "learning_rate": 0.0002580967119968635,
      "loss": 3.2314,
      "step": 125479
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.7582640647888184,
      "learning_rate": 0.00025809266153824044,
      "loss": 2.9845,
      "step": 125480
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0075125694274902,
      "learning_rate": 0.0002580886110874087,
      "loss": 3.316,
      "step": 125481
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.289301633834839,
      "learning_rate": 0.0002580845606443692,
      "loss": 2.952,
      "step": 125482
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9108161926269531,
      "learning_rate": 0.0002580805102091224,
      "loss": 2.9025,
      "step": 125483
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.236165761947632,
      "learning_rate": 0.0002580764597816694,
      "loss": 2.9997,
      "step": 125484
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1039304733276367,
      "learning_rate": 0.00025807240936201063,
      "loss": 2.9798,
      "step": 125485
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3843159675598145,
      "learning_rate": 0.0002580683589501471,
      "loss": 2.9292,
      "step": 125486
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.658236026763916,
      "learning_rate": 0.0002580643085460793,
      "loss": 2.6181,
      "step": 125487
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8162707090377808,
      "learning_rate": 0.00025806025814980823,
      "loss": 3.1899,
      "step": 125488
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7887918949127197,
      "learning_rate": 0.00025805620776133455,
      "loss": 2.9356,
      "step": 125489
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9282028675079346,
      "learning_rate": 0.0002580521573806591,
      "loss": 3.0889,
      "step": 125490
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9106518030166626,
      "learning_rate": 0.00025804810700778244,
      "loss": 2.8377,
      "step": 125491
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7129764556884766,
      "learning_rate": 0.0002580440566427054,
      "loss": 2.86,
      "step": 125492
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.133249044418335,
      "learning_rate": 0.00025804000628542885,
      "loss": 2.9411,
      "step": 125493
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.459709882736206,
      "learning_rate": 0.0002580359559359534,
      "loss": 2.944,
      "step": 125494
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7106537818908691,
      "learning_rate": 0.00025803190559427983,
      "loss": 2.8835,
      "step": 125495
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7527598142623901,
      "learning_rate": 0.00025802785526040907,
      "loss": 2.9703,
      "step": 125496
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.434715747833252,
      "learning_rate": 0.0002580238049343416,
      "loss": 3.0312,
      "step": 125497
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9936703443527222,
      "learning_rate": 0.0002580197546160783,
      "loss": 2.8647,
      "step": 125498
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7437031269073486,
      "learning_rate": 0.0002580157043056199,
      "loss": 3.1358,
      "step": 125499
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.7304940223693848,
      "learning_rate": 0.00025801165400296717,
      "loss": 2.7554,
      "step": 125500
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.412653923034668,
      "learning_rate": 0.0002580076037081209,
      "loss": 2.8679,
      "step": 125501
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.039222002029419,
      "learning_rate": 0.00025800355342108194,
      "loss": 3.0043,
      "step": 125502
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9877846240997314,
      "learning_rate": 0.0002579995031418508,
      "loss": 2.9886,
      "step": 125503
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0184388160705566,
      "learning_rate": 0.0002579954528704283,
      "loss": 2.9969,
      "step": 125504
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0840137004852295,
      "learning_rate": 0.00025799140260681524,
      "loss": 2.8889,
      "step": 125505
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.981247901916504,
      "learning_rate": 0.00025798735235101235,
      "loss": 2.9693,
      "step": 125506
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.250094175338745,
      "learning_rate": 0.0002579833021030205,
      "loss": 3.0511,
      "step": 125507
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.76389741897583,
      "learning_rate": 0.0002579792518628404,
      "loss": 2.8255,
      "step": 125508
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0817770957946777,
      "learning_rate": 0.0002579752016304726,
      "loss": 3.1507,
      "step": 125509
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0613887310028076,
      "learning_rate": 0.00025797115140591804,
      "loss": 3.1078,
      "step": 125510
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8761194944381714,
      "learning_rate": 0.00025796710118917743,
      "loss": 2.9596,
      "step": 125511
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.116934299468994,
      "learning_rate": 0.00025796305098025154,
      "loss": 2.916,
      "step": 125512
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8947780132293701,
      "learning_rate": 0.0002579590007791411,
      "loss": 2.8134,
      "step": 125513
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2385902404785156,
      "learning_rate": 0.00025795495058584687,
      "loss": 2.9694,
      "step": 125514
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3224706649780273,
      "learning_rate": 0.0002579509004003697,
      "loss": 2.9006,
      "step": 125515
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9579025506973267,
      "learning_rate": 0.0002579468502227102,
      "loss": 2.9951,
      "step": 125516
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.957413911819458,
      "learning_rate": 0.00025794280005286916,
      "loss": 2.921,
      "step": 125517
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.4326529502868652,
      "learning_rate": 0.00025793874989084737,
      "loss": 3.2438,
      "step": 125518
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9517664909362793,
      "learning_rate": 0.0002579346997366455,
      "loss": 3.0823,
      "step": 125519
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.2226688861846924,
      "learning_rate": 0.0002579306495902644,
      "loss": 2.9311,
      "step": 125520
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7875230312347412,
      "learning_rate": 0.00025792659945170486,
      "loss": 2.947,
      "step": 125521
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7917895317077637,
      "learning_rate": 0.00025792254932096746,
      "loss": 2.9747,
      "step": 125522
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.576791524887085,
      "learning_rate": 0.0002579184991980531,
      "loss": 2.849,
      "step": 125523
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.7696194648742676,
      "learning_rate": 0.0002579144490829625,
      "loss": 3.23,
      "step": 125524
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1718692779541016,
      "learning_rate": 0.0002579103989756963,
      "loss": 3.3532,
      "step": 125525
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1901612281799316,
      "learning_rate": 0.0002579063488762554,
      "loss": 2.8914,
      "step": 125526
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.207763195037842,
      "learning_rate": 0.00025790229878464065,
      "loss": 2.8476,
      "step": 125527
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8921810388565063,
      "learning_rate": 0.0002578982487008525,
      "loss": 3.1146,
      "step": 125528
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1549649238586426,
      "learning_rate": 0.00025789419862489186,
      "loss": 3.0453,
      "step": 125529
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8513330221176147,
      "learning_rate": 0.0002578901485567595,
      "loss": 2.8889,
      "step": 125530
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7721552848815918,
      "learning_rate": 0.00025788609849645614,
      "loss": 3.0777,
      "step": 125531
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3902714252471924,
      "learning_rate": 0.00025788204844398257,
      "loss": 3.1278,
      "step": 125532
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.1163601875305176,
      "learning_rate": 0.00025787799839933965,
      "loss": 2.9207,
      "step": 125533
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.933274745941162,
      "learning_rate": 0.0002578739483625279,
      "loss": 2.8782,
      "step": 125534
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8131253719329834,
      "learning_rate": 0.0002578698983335481,
      "loss": 2.9324,
      "step": 125535
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7360563278198242,
      "learning_rate": 0.0002578658483124011,
      "loss": 3.0598,
      "step": 125536
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.4442782402038574,
      "learning_rate": 0.00025786179829908766,
      "loss": 2.8904,
      "step": 125537
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2184488773345947,
      "learning_rate": 0.0002578577482936085,
      "loss": 2.9142,
      "step": 125538
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8775821924209595,
      "learning_rate": 0.0002578536982959645,
      "loss": 3.021,
      "step": 125539
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.11428165435791,
      "learning_rate": 0.0002578496483061562,
      "loss": 2.8581,
      "step": 125540
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8430612087249756,
      "learning_rate": 0.00025784559832418445,
      "loss": 2.8294,
      "step": 125541
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.977655291557312,
      "learning_rate": 0.00025784154835004996,
      "loss": 3.0307,
      "step": 125542
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8496606349945068,
      "learning_rate": 0.00025783749838375356,
      "loss": 2.8885,
      "step": 125543
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.099290609359741,
      "learning_rate": 0.00025783344842529593,
      "loss": 2.8771,
      "step": 125544
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.179480791091919,
      "learning_rate": 0.000257829398474678,
      "loss": 2.8849,
      "step": 125545
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.760345220565796,
      "learning_rate": 0.0002578253485319002,
      "loss": 3.1717,
      "step": 125546
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0057272911071777,
      "learning_rate": 0.00025782129859696354,
      "loss": 3.2397,
      "step": 125547
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3192851543426514,
      "learning_rate": 0.0002578172486698687,
      "loss": 2.9499,
      "step": 125548
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.15936541557312,
      "learning_rate": 0.00025781319875061635,
      "loss": 2.8276,
      "step": 125549
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.0567867755889893,
      "learning_rate": 0.0002578091488392074,
      "loss": 2.8384,
      "step": 125550
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.013197660446167,
      "learning_rate": 0.00025780509893564256,
      "loss": 2.9659,
      "step": 125551
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.4809577465057373,
      "learning_rate": 0.00025780104903992256,
      "loss": 2.7845,
      "step": 125552
    },
    {
      "epoch": 1.63,
      "grad_norm": 3.2565855979919434,
      "learning_rate": 0.0002577969991520481,
      "loss": 2.8754,
      "step": 125553
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7489514350891113,
      "learning_rate": 0.00025779294927201995,
      "loss": 3.0684,
      "step": 125554
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.621760368347168,
      "learning_rate": 0.00025778889939983884,
      "loss": 2.6809,
      "step": 125555
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.043978691101074,
      "learning_rate": 0.00025778484953550563,
      "loss": 2.9141,
      "step": 125556
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.4023656845092773,
      "learning_rate": 0.00025778079967902107,
      "loss": 2.9324,
      "step": 125557
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8758866786956787,
      "learning_rate": 0.0002577767498303858,
      "loss": 2.955,
      "step": 125558
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.8559978008270264,
      "learning_rate": 0.00025777269998960065,
      "loss": 2.9125,
      "step": 125559
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.3094239234924316,
      "learning_rate": 0.0002577686501566663,
      "loss": 3.1057,
      "step": 125560
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.9903162717819214,
      "learning_rate": 0.00025776460033158355,
      "loss": 3.0652,
      "step": 125561
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8772059679031372,
      "learning_rate": 0.0002577605505143532,
      "loss": 2.8609,
      "step": 125562
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.032130241394043,
      "learning_rate": 0.000257756500704976,
      "loss": 3.135,
      "step": 125563
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8411917686462402,
      "learning_rate": 0.00025775245090345263,
      "loss": 2.8057,
      "step": 125564
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.2402567863464355,
      "learning_rate": 0.0002577484011097838,
      "loss": 3.1001,
      "step": 125565
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.158054828643799,
      "learning_rate": 0.0002577443513239705,
      "loss": 3.0358,
      "step": 125566
    },
    {
      "epoch": 1.63,
      "grad_norm": 2.182130813598633,
      "learning_rate": 0.0002577403015460132,
      "loss": 2.7436,
      "step": 125567
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.7206469774246216,
      "learning_rate": 0.0002577362517759128,
      "loss": 2.7901,
      "step": 125568
    },
    {
      "epoch": 1.63,
      "grad_norm": 1.8790267705917358,
      "learning_rate": 0.00025773220201367005,
      "loss": 2.8543,
      "step": 125569
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.8462705612182617,
      "learning_rate": 0.0002577281522592856,
      "loss": 2.9403,
      "step": 125570
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2382466793060303,
      "learning_rate": 0.00025772410251276035,
      "loss": 3.1082,
      "step": 125571
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.549779176712036,
      "learning_rate": 0.000257720052774095,
      "loss": 3.1997,
      "step": 125572
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0275862216949463,
      "learning_rate": 0.00025771600304329024,
      "loss": 2.8491,
      "step": 125573
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8414981365203857,
      "learning_rate": 0.00025771195332034697,
      "loss": 2.7428,
      "step": 125574
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.894514560699463,
      "learning_rate": 0.00025770790360526584,
      "loss": 2.8399,
      "step": 125575
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.7291479110717773,
      "learning_rate": 0.0002577038538980475,
      "loss": 2.8257,
      "step": 125576
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2930562496185303,
      "learning_rate": 0.0002576998041986929,
      "loss": 2.9611,
      "step": 125577
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.040004253387451,
      "learning_rate": 0.0002576957545072026,
      "loss": 3.3878,
      "step": 125578
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8969155550003052,
      "learning_rate": 0.00025769170482357754,
      "loss": 2.7095,
      "step": 125579
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.011273145675659,
      "learning_rate": 0.00025768765514781836,
      "loss": 2.9118,
      "step": 125580
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.87110435962677,
      "learning_rate": 0.00025768360547992594,
      "loss": 3.0779,
      "step": 125581
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7206547260284424,
      "learning_rate": 0.00025767955581990087,
      "loss": 3.0144,
      "step": 125582
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.955545425415039,
      "learning_rate": 0.00025767550616774396,
      "loss": 2.8977,
      "step": 125583
    },
    {
      "epoch": 1.64,
      "grad_norm": 4.052547931671143,
      "learning_rate": 0.000257671456523456,
      "loss": 2.9666,
      "step": 125584
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1086809635162354,
      "learning_rate": 0.0002576674068870376,
      "loss": 3.1989,
      "step": 125585
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.617008686065674,
      "learning_rate": 0.0002576633572584897,
      "loss": 3.1062,
      "step": 125586
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1558749675750732,
      "learning_rate": 0.000257659307637813,
      "loss": 2.9033,
      "step": 125587
    },
    {
      "epoch": 1.64,
      "grad_norm": 4.1396331787109375,
      "learning_rate": 0.00025765525802500834,
      "loss": 2.9085,
      "step": 125588
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.131218671798706,
      "learning_rate": 0.00025765120842007625,
      "loss": 3.0203,
      "step": 125589
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1906239986419678,
      "learning_rate": 0.00025764715882301757,
      "loss": 2.7452,
      "step": 125590
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8839075565338135,
      "learning_rate": 0.00025764310923383313,
      "loss": 3.0638,
      "step": 125591
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.2667274475097656,
      "learning_rate": 0.0002576390596525236,
      "loss": 2.8047,
      "step": 125592
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9819369316101074,
      "learning_rate": 0.0002576350100790897,
      "loss": 2.9245,
      "step": 125593
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.141897439956665,
      "learning_rate": 0.0002576309605135325,
      "loss": 3.122,
      "step": 125594
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0614898204803467,
      "learning_rate": 0.0002576269109558523,
      "loss": 2.8879,
      "step": 125595
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7474936246871948,
      "learning_rate": 0.0002576228614060501,
      "loss": 3.0872,
      "step": 125596
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7603152990341187,
      "learning_rate": 0.00025761881186412664,
      "loss": 2.975,
      "step": 125597
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2654576301574707,
      "learning_rate": 0.0002576147623300826,
      "loss": 2.9947,
      "step": 125598
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9311392307281494,
      "learning_rate": 0.0002576107128039188,
      "loss": 3.1126,
      "step": 125599
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9607386589050293,
      "learning_rate": 0.00025760666328563606,
      "loss": 3.1507,
      "step": 125600
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7303622961044312,
      "learning_rate": 0.0002576026137752349,
      "loss": 3.0481,
      "step": 125601
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.138763189315796,
      "learning_rate": 0.0002575985642727163,
      "loss": 3.0467,
      "step": 125602
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.883638858795166,
      "learning_rate": 0.0002575945147780808,
      "loss": 2.8854,
      "step": 125603
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.725600004196167,
      "learning_rate": 0.00025759046529132937,
      "loss": 3.0104,
      "step": 125604
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2803895473480225,
      "learning_rate": 0.00025758641581246266,
      "loss": 3.0834,
      "step": 125605
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.189725875854492,
      "learning_rate": 0.0002575823663414815,
      "loss": 2.9129,
      "step": 125606
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.0332231521606445,
      "learning_rate": 0.00025757831687838655,
      "loss": 3.1662,
      "step": 125607
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.312809944152832,
      "learning_rate": 0.00025757426742317854,
      "loss": 3.0211,
      "step": 125608
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4342901706695557,
      "learning_rate": 0.00025757021797585827,
      "loss": 2.8522,
      "step": 125609
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.191394805908203,
      "learning_rate": 0.00025756616853642655,
      "loss": 3.294,
      "step": 125610
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.034686803817749,
      "learning_rate": 0.000257562119104884,
      "loss": 2.8082,
      "step": 125611
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.127702474594116,
      "learning_rate": 0.0002575580696812316,
      "loss": 3.0465,
      "step": 125612
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2427265644073486,
      "learning_rate": 0.00025755402026546987,
      "loss": 2.9644,
      "step": 125613
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7616674900054932,
      "learning_rate": 0.00025754997085759965,
      "loss": 3.324,
      "step": 125614
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.927918791770935,
      "learning_rate": 0.00025754592145762166,
      "loss": 2.793,
      "step": 125615
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.661065101623535,
      "learning_rate": 0.0002575418720655367,
      "loss": 2.9257,
      "step": 125616
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1119682788848877,
      "learning_rate": 0.00025753782268134553,
      "loss": 2.9948,
      "step": 125617
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9526787996292114,
      "learning_rate": 0.00025753377330504896,
      "loss": 2.922,
      "step": 125618
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.0411200523376465,
      "learning_rate": 0.00025752972393664755,
      "loss": 2.925,
      "step": 125619
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.060483455657959,
      "learning_rate": 0.00025752567457614216,
      "loss": 3.0264,
      "step": 125620
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1952221393585205,
      "learning_rate": 0.00025752162522353357,
      "loss": 3.0702,
      "step": 125621
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5140185356140137,
      "learning_rate": 0.0002575175758788225,
      "loss": 2.9815,
      "step": 125622
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4754221439361572,
      "learning_rate": 0.00025751352654200977,
      "loss": 2.9786,
      "step": 125623
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.13368558883667,
      "learning_rate": 0.00025750947721309614,
      "loss": 3.081,
      "step": 125624
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3188581466674805,
      "learning_rate": 0.00025750542789208217,
      "loss": 2.9802,
      "step": 125625
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.059255838394165,
      "learning_rate": 0.0002575013785789687,
      "loss": 2.9362,
      "step": 125626
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.561474084854126,
      "learning_rate": 0.0002574973292737566,
      "loss": 3.1857,
      "step": 125627
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9747356176376343,
      "learning_rate": 0.00025749327997644656,
      "loss": 2.8329,
      "step": 125628
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7576788663864136,
      "learning_rate": 0.0002574892306870393,
      "loss": 2.773,
      "step": 125629
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1762583255767822,
      "learning_rate": 0.0002574851814055357,
      "loss": 2.9642,
      "step": 125630
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8976606130599976,
      "learning_rate": 0.0002574811321319363,
      "loss": 3.3383,
      "step": 125631
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.738732099533081,
      "learning_rate": 0.00025747708286624196,
      "loss": 2.9374,
      "step": 125632
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1232857704162598,
      "learning_rate": 0.00025747303360845346,
      "loss": 3.3795,
      "step": 125633
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1079938411712646,
      "learning_rate": 0.0002574689843585715,
      "loss": 3.1249,
      "step": 125634
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.9920403957366943,
      "learning_rate": 0.00025746493511659684,
      "loss": 2.8204,
      "step": 125635
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0817763805389404,
      "learning_rate": 0.00025746088588253036,
      "loss": 2.6624,
      "step": 125636
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7905356884002686,
      "learning_rate": 0.00025745683665637264,
      "loss": 3.0816,
      "step": 125637
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1760854721069336,
      "learning_rate": 0.00025745278743812444,
      "loss": 2.9582,
      "step": 125638
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2081074714660645,
      "learning_rate": 0.00025744873822778665,
      "loss": 2.9339,
      "step": 125639
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.820764422416687,
      "learning_rate": 0.00025744468902535984,
      "loss": 2.9655,
      "step": 125640
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2832565307617188,
      "learning_rate": 0.0002574406398308449,
      "loss": 3.1403,
      "step": 125641
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8629909753799438,
      "learning_rate": 0.00025743659064424266,
      "loss": 2.8206,
      "step": 125642
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1297707557678223,
      "learning_rate": 0.00025743254146555364,
      "loss": 3.0453,
      "step": 125643
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1079978942871094,
      "learning_rate": 0.0002574284922947788,
      "loss": 3.0684,
      "step": 125644
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3386735916137695,
      "learning_rate": 0.00025742444313191877,
      "loss": 2.8941,
      "step": 125645
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.719120740890503,
      "learning_rate": 0.0002574203939769743,
      "loss": 2.9161,
      "step": 125646
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9780824184417725,
      "learning_rate": 0.00025741634482994623,
      "loss": 2.9064,
      "step": 125647
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.3211700916290283,
      "learning_rate": 0.0002574122956908352,
      "loss": 2.9984,
      "step": 125648
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3562941551208496,
      "learning_rate": 0.00025740824655964215,
      "loss": 2.9483,
      "step": 125649
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1328606605529785,
      "learning_rate": 0.00025740419743636763,
      "loss": 2.8979,
      "step": 125650
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.006598711013794,
      "learning_rate": 0.0002574001483210125,
      "loss": 2.8113,
      "step": 125651
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5375232696533203,
      "learning_rate": 0.00025739609921357747,
      "loss": 2.9394,
      "step": 125652
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8038625717163086,
      "learning_rate": 0.0002573920501140633,
      "loss": 3.1572,
      "step": 125653
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9167237281799316,
      "learning_rate": 0.0002573880010224707,
      "loss": 3.1497,
      "step": 125654
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.115738868713379,
      "learning_rate": 0.0002573839519388006,
      "loss": 2.971,
      "step": 125655
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.7795908451080322,
      "learning_rate": 0.00025737990286305354,
      "loss": 2.8026,
      "step": 125656
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1961371898651123,
      "learning_rate": 0.0002573758537952304,
      "loss": 3.1708,
      "step": 125657
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.349803924560547,
      "learning_rate": 0.0002573718047353319,
      "loss": 3.1108,
      "step": 125658
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.7602462768554688,
      "learning_rate": 0.00025736775568335876,
      "loss": 2.8808,
      "step": 125659
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9971765279769897,
      "learning_rate": 0.0002573637066393117,
      "loss": 3.2469,
      "step": 125660
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9151376485824585,
      "learning_rate": 0.00025735965760319165,
      "loss": 2.8858,
      "step": 125661
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9657543897628784,
      "learning_rate": 0.0002573556085749992,
      "loss": 3.0958,
      "step": 125662
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.115041971206665,
      "learning_rate": 0.0002573515595547351,
      "loss": 2.7143,
      "step": 125663
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9818164110183716,
      "learning_rate": 0.0002573475105424002,
      "loss": 2.9153,
      "step": 125664
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2101829051971436,
      "learning_rate": 0.00025734346153799515,
      "loss": 2.9827,
      "step": 125665
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9520580768585205,
      "learning_rate": 0.00025733941254152084,
      "loss": 2.9414,
      "step": 125666
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3397860527038574,
      "learning_rate": 0.00025733536355297795,
      "loss": 2.9846,
      "step": 125667
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.813514232635498,
      "learning_rate": 0.00025733131457236714,
      "loss": 2.9951,
      "step": 125668
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0368857383728027,
      "learning_rate": 0.0002573272655996893,
      "loss": 3.0447,
      "step": 125669
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9531002044677734,
      "learning_rate": 0.00025732321663494507,
      "loss": 3.0903,
      "step": 125670
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.6998116970062256,
      "learning_rate": 0.00025731916767813526,
      "loss": 3.149,
      "step": 125671
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.8703227043151855,
      "learning_rate": 0.0002573151187292606,
      "loss": 3.0835,
      "step": 125672
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.467635154724121,
      "learning_rate": 0.00025731106978832207,
      "loss": 2.8801,
      "step": 125673
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4956865310668945,
      "learning_rate": 0.00025730702085532,
      "loss": 2.8754,
      "step": 125674
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5861597061157227,
      "learning_rate": 0.0002573029719302554,
      "loss": 3.2163,
      "step": 125675
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.7014496326446533,
      "learning_rate": 0.000257298923013129,
      "loss": 2.9352,
      "step": 125676
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9080740213394165,
      "learning_rate": 0.00025729487410394155,
      "loss": 2.7726,
      "step": 125677
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.6524178981781006,
      "learning_rate": 0.00025729082520269375,
      "loss": 2.7916,
      "step": 125678
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5459141731262207,
      "learning_rate": 0.00025728677630938653,
      "loss": 2.7836,
      "step": 125679
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.636134624481201,
      "learning_rate": 0.0002572827274240204,
      "loss": 2.8574,
      "step": 125680
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.572522521018982,
      "learning_rate": 0.0002572786785465962,
      "loss": 2.8536,
      "step": 125681
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.7644600868225098,
      "learning_rate": 0.00025727462967711464,
      "loss": 2.9525,
      "step": 125682
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2224085330963135,
      "learning_rate": 0.0002572705808155766,
      "loss": 2.9456,
      "step": 125683
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.163973331451416,
      "learning_rate": 0.00025726653196198274,
      "loss": 3.1194,
      "step": 125684
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2185025215148926,
      "learning_rate": 0.000257262483116334,
      "loss": 3.151,
      "step": 125685
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1329169273376465,
      "learning_rate": 0.0002572584342786308,
      "loss": 2.9355,
      "step": 125686
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1599769592285156,
      "learning_rate": 0.00025725438544887415,
      "loss": 2.8892,
      "step": 125687
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0027284622192383,
      "learning_rate": 0.00025725033662706465,
      "loss": 3.0349,
      "step": 125688
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.341399908065796,
      "learning_rate": 0.0002572462878132031,
      "loss": 2.9427,
      "step": 125689
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3166353702545166,
      "learning_rate": 0.0002572422390072903,
      "loss": 2.8946,
      "step": 125690
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9535481929779053,
      "learning_rate": 0.0002572381902093271,
      "loss": 2.9774,
      "step": 125691
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4147002696990967,
      "learning_rate": 0.000257234141419314,
      "loss": 2.748,
      "step": 125692
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3886518478393555,
      "learning_rate": 0.0002572300926372519,
      "loss": 3.2087,
      "step": 125693
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.795853614807129,
      "learning_rate": 0.00025722604386314147,
      "loss": 2.8098,
      "step": 125694
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3666045665740967,
      "learning_rate": 0.00025722199509698357,
      "loss": 2.8302,
      "step": 125695
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9354393482208252,
      "learning_rate": 0.00025721794633877895,
      "loss": 2.8943,
      "step": 125696
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4434168338775635,
      "learning_rate": 0.00025721389758852836,
      "loss": 2.9813,
      "step": 125697
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.27554988861084,
      "learning_rate": 0.00025720984884623245,
      "loss": 3.0001,
      "step": 125698
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.703803062438965,
      "learning_rate": 0.00025720580011189204,
      "loss": 3.0165,
      "step": 125699
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3092610836029053,
      "learning_rate": 0.00025720175138550783,
      "loss": 3.2975,
      "step": 125700
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.241917610168457,
      "learning_rate": 0.0002571977026670807,
      "loss": 2.9885,
      "step": 125701
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0818278789520264,
      "learning_rate": 0.00025719365395661126,
      "loss": 2.7595,
      "step": 125702
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9107846021652222,
      "learning_rate": 0.0002571896052541005,
      "loss": 3.1219,
      "step": 125703
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.826413869857788,
      "learning_rate": 0.0002571855565595488,
      "loss": 3.1173,
      "step": 125704
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.913184642791748,
      "learning_rate": 0.00025718150787295717,
      "loss": 3.0617,
      "step": 125705
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.452749013900757,
      "learning_rate": 0.0002571774591943263,
      "loss": 2.9572,
      "step": 125706
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4283578395843506,
      "learning_rate": 0.0002571734105236569,
      "loss": 2.8039,
      "step": 125707
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2219550609588623,
      "learning_rate": 0.0002571693618609498,
      "loss": 3.0033,
      "step": 125708
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.7805333137512207,
      "learning_rate": 0.0002571653132062059,
      "loss": 2.9782,
      "step": 125709
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.211341142654419,
      "learning_rate": 0.00025716126455942564,
      "loss": 3.0792,
      "step": 125710
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.228360414505005,
      "learning_rate": 0.0002571572159206099,
      "loss": 2.8097,
      "step": 125711
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3682961463928223,
      "learning_rate": 0.0002571531672897594,
      "loss": 2.8955,
      "step": 125712
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.332655191421509,
      "learning_rate": 0.00025714911866687497,
      "loss": 3.1703,
      "step": 125713
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2820091247558594,
      "learning_rate": 0.0002571450700519573,
      "loss": 2.8863,
      "step": 125714
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.474968910217285,
      "learning_rate": 0.00025714102144500724,
      "loss": 3.0502,
      "step": 125715
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8490110635757446,
      "learning_rate": 0.0002571369728460255,
      "loss": 3.154,
      "step": 125716
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1903164386749268,
      "learning_rate": 0.0002571329242550127,
      "loss": 2.9555,
      "step": 125717
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3298282623291016,
      "learning_rate": 0.00025712887567196975,
      "loss": 3.0644,
      "step": 125718
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.033853054046631,
      "learning_rate": 0.0002571248270968974,
      "loss": 2.8777,
      "step": 125719
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3462235927581787,
      "learning_rate": 0.00025712077852979624,
      "loss": 3.1043,
      "step": 125720
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.562958002090454,
      "learning_rate": 0.00025711672997066713,
      "loss": 3.1487,
      "step": 125721
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.870463490486145,
      "learning_rate": 0.0002571126814195111,
      "loss": 3.139,
      "step": 125722
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.7289247512817383,
      "learning_rate": 0.00025710863287632836,
      "loss": 2.9842,
      "step": 125723
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0210721492767334,
      "learning_rate": 0.00025710458434112,
      "loss": 2.9142,
      "step": 125724
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.007235527038574,
      "learning_rate": 0.00025710053581388666,
      "loss": 2.8353,
      "step": 125725
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0239408016204834,
      "learning_rate": 0.0002570964872946292,
      "loss": 3.0918,
      "step": 125726
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.6506590843200684,
      "learning_rate": 0.00025709243878334825,
      "loss": 2.9696,
      "step": 125727
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.299830913543701,
      "learning_rate": 0.0002570883902800448,
      "loss": 2.9421,
      "step": 125728
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9478949308395386,
      "learning_rate": 0.0002570843417847193,
      "loss": 2.7259,
      "step": 125729
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9518334865570068,
      "learning_rate": 0.00025708029329737263,
      "loss": 2.8278,
      "step": 125730
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2252373695373535,
      "learning_rate": 0.00025707624481800557,
      "loss": 2.844,
      "step": 125731
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.763895034790039,
      "learning_rate": 0.00025707219634661877,
      "loss": 2.8628,
      "step": 125732
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3466007709503174,
      "learning_rate": 0.0002570681478832131,
      "loss": 2.9676,
      "step": 125733
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0436742305755615,
      "learning_rate": 0.00025706409942778934,
      "loss": 3.0814,
      "step": 125734
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.222346305847168,
      "learning_rate": 0.0002570600509803481,
      "loss": 2.9117,
      "step": 125735
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9245089292526245,
      "learning_rate": 0.00025705600254089027,
      "loss": 2.9016,
      "step": 125736
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3717658519744873,
      "learning_rate": 0.0002570519541094165,
      "loss": 3.2598,
      "step": 125737
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8564096689224243,
      "learning_rate": 0.00025704790568592756,
      "loss": 3.1483,
      "step": 125738
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.6715328693389893,
      "learning_rate": 0.00025704385727042423,
      "loss": 2.9527,
      "step": 125739
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.215272903442383,
      "learning_rate": 0.00025703980886290735,
      "loss": 3.2142,
      "step": 125740
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.561323404312134,
      "learning_rate": 0.00025703576046337746,
      "loss": 2.7396,
      "step": 125741
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.187971353530884,
      "learning_rate": 0.0002570317120718355,
      "loss": 3.1709,
      "step": 125742
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.8797807693481445,
      "learning_rate": 0.0002570276636882822,
      "loss": 2.9605,
      "step": 125743
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.957805871963501,
      "learning_rate": 0.00025702361531271814,
      "loss": 2.8415,
      "step": 125744
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9687234163284302,
      "learning_rate": 0.00025701956694514425,
      "loss": 3.0218,
      "step": 125745
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2289316654205322,
      "learning_rate": 0.0002570155185855613,
      "loss": 3.0846,
      "step": 125746
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.5631223917007446,
      "learning_rate": 0.0002570114702339699,
      "loss": 2.6072,
      "step": 125747
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8463008403778076,
      "learning_rate": 0.00025700742189037085,
      "loss": 3.0957,
      "step": 125748
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9696794748306274,
      "learning_rate": 0.000257003373554765,
      "loss": 3.1746,
      "step": 125749
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8328571319580078,
      "learning_rate": 0.00025699932522715304,
      "loss": 2.9769,
      "step": 125750
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.478168249130249,
      "learning_rate": 0.00025699527690753573,
      "loss": 3.0665,
      "step": 125751
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.944563388824463,
      "learning_rate": 0.00025699122859591377,
      "loss": 2.7907,
      "step": 125752
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2536377906799316,
      "learning_rate": 0.000256987180292288,
      "loss": 2.9428,
      "step": 125753
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9806751012802124,
      "learning_rate": 0.00025698313199665906,
      "loss": 2.9797,
      "step": 125754
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8462941646575928,
      "learning_rate": 0.0002569790837090278,
      "loss": 2.7096,
      "step": 125755
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3334219455718994,
      "learning_rate": 0.0002569750354293949,
      "loss": 2.8015,
      "step": 125756
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3321926593780518,
      "learning_rate": 0.0002569709871577612,
      "loss": 3.1193,
      "step": 125757
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0222225189208984,
      "learning_rate": 0.00025696693889412745,
      "loss": 2.7932,
      "step": 125758
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0606346130371094,
      "learning_rate": 0.0002569628906384943,
      "loss": 2.9593,
      "step": 125759
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1405155658721924,
      "learning_rate": 0.0002569588423908625,
      "loss": 3.1618,
      "step": 125760
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1152877807617188,
      "learning_rate": 0.00025695479415123294,
      "loss": 3.1565,
      "step": 125761
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9849839210510254,
      "learning_rate": 0.0002569507459196063,
      "loss": 3.1451,
      "step": 125762
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9978418350219727,
      "learning_rate": 0.00025694669769598327,
      "loss": 3.0841,
      "step": 125763
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1990458965301514,
      "learning_rate": 0.0002569426494803648,
      "loss": 3.2653,
      "step": 125764
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.486610174179077,
      "learning_rate": 0.0002569386012727514,
      "loss": 2.9117,
      "step": 125765
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0190231800079346,
      "learning_rate": 0.0002569345530731439,
      "loss": 2.9601,
      "step": 125766
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3083677291870117,
      "learning_rate": 0.0002569305048815431,
      "loss": 2.9949,
      "step": 125767
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8860501050949097,
      "learning_rate": 0.00025692645669794973,
      "loss": 3.0151,
      "step": 125768
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.039738893508911,
      "learning_rate": 0.00025692240852236457,
      "loss": 2.877,
      "step": 125769
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7298592329025269,
      "learning_rate": 0.0002569183603547884,
      "loss": 2.7355,
      "step": 125770
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.957972764968872,
      "learning_rate": 0.00025691431219522184,
      "loss": 2.8223,
      "step": 125771
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2963168621063232,
      "learning_rate": 0.00025691026404366573,
      "loss": 2.7466,
      "step": 125772
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.985274314880371,
      "learning_rate": 0.0002569062159001208,
      "loss": 3.0352,
      "step": 125773
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.696284294128418,
      "learning_rate": 0.00025690216776458785,
      "loss": 2.953,
      "step": 125774
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8150310516357422,
      "learning_rate": 0.0002568981196370676,
      "loss": 3.0939,
      "step": 125775
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.8597829341888428,
      "learning_rate": 0.0002568940715175609,
      "loss": 2.812,
      "step": 125776
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.3144145011901855,
      "learning_rate": 0.0002568900234060682,
      "loss": 3.0525,
      "step": 125777
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.141444206237793,
      "learning_rate": 0.0002568859753025906,
      "loss": 3.0886,
      "step": 125778
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0663599967956543,
      "learning_rate": 0.00025688192720712866,
      "loss": 3.1872,
      "step": 125779
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.6050267219543457,
      "learning_rate": 0.00025687787911968317,
      "loss": 3.1065,
      "step": 125780
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.4061264991760254,
      "learning_rate": 0.0002568738310402549,
      "loss": 3.0463,
      "step": 125781
    },
    {
      "epoch": 1.64,
      "grad_norm": 4.042377471923828,
      "learning_rate": 0.0002568697829688446,
      "loss": 2.7212,
      "step": 125782
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0955686569213867,
      "learning_rate": 0.00025686573490545317,
      "loss": 2.8673,
      "step": 125783
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9424360990524292,
      "learning_rate": 0.00025686168685008104,
      "loss": 3.0465,
      "step": 125784
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.7237424850463867,
      "learning_rate": 0.0002568576388027292,
      "loss": 2.7011,
      "step": 125785
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.4312970638275146,
      "learning_rate": 0.0002568535907633983,
      "loss": 2.9897,
      "step": 125786
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8763254880905151,
      "learning_rate": 0.0002568495427320891,
      "loss": 2.9876,
      "step": 125787
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1831421852111816,
      "learning_rate": 0.00025684549470880247,
      "loss": 2.6684,
      "step": 125788
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2286007404327393,
      "learning_rate": 0.00025684144669353915,
      "loss": 2.5346,
      "step": 125789
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.6605520248413086,
      "learning_rate": 0.0002568373986862997,
      "loss": 2.9726,
      "step": 125790
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.074918270111084,
      "learning_rate": 0.000256833350687085,
      "loss": 3.3006,
      "step": 125791
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0127055644989014,
      "learning_rate": 0.0002568293026958957,
      "loss": 2.9885,
      "step": 125792
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.923665165901184,
      "learning_rate": 0.0002568252547127328,
      "loss": 3.1837,
      "step": 125793
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8531174659729004,
      "learning_rate": 0.00025682120673759684,
      "loss": 3.067,
      "step": 125794
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0361032485961914,
      "learning_rate": 0.00025681715877048873,
      "loss": 2.9403,
      "step": 125795
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.420442581176758,
      "learning_rate": 0.00025681311081140904,
      "loss": 2.866,
      "step": 125796
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5356369018554688,
      "learning_rate": 0.00025680906286035856,
      "loss": 2.9084,
      "step": 125797
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.486046552658081,
      "learning_rate": 0.0002568050149173381,
      "loss": 2.9166,
      "step": 125798
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9239996671676636,
      "learning_rate": 0.0002568009669823485,
      "loss": 3.3544,
      "step": 125799
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1386823654174805,
      "learning_rate": 0.00025679691905539033,
      "loss": 2.969,
      "step": 125800
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.315314292907715,
      "learning_rate": 0.00025679287113646455,
      "loss": 3.1793,
      "step": 125801
    },
    {
      "epoch": 1.64,
      "grad_norm": 4.066714286804199,
      "learning_rate": 0.0002567888232255717,
      "loss": 3.2287,
      "step": 125802
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0434865951538086,
      "learning_rate": 0.0002567847753227126,
      "loss": 3.1436,
      "step": 125803
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.376652717590332,
      "learning_rate": 0.000256780727427888,
      "loss": 2.852,
      "step": 125804
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3779006004333496,
      "learning_rate": 0.0002567766795410987,
      "loss": 3.3353,
      "step": 125805
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3253235816955566,
      "learning_rate": 0.0002567726316623455,
      "loss": 3.0906,
      "step": 125806
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.325352907180786,
      "learning_rate": 0.0002567685837916291,
      "loss": 3.106,
      "step": 125807
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.3498759269714355,
      "learning_rate": 0.0002567645359289502,
      "loss": 2.9179,
      "step": 125808
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8823469877243042,
      "learning_rate": 0.00025676048807430946,
      "loss": 3.0479,
      "step": 125809
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8237296342849731,
      "learning_rate": 0.0002567564402277079,
      "loss": 2.9349,
      "step": 125810
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.035571336746216,
      "learning_rate": 0.0002567523923891461,
      "loss": 3.0997,
      "step": 125811
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3044958114624023,
      "learning_rate": 0.00025674834455862474,
      "loss": 2.7604,
      "step": 125812
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8907783031463623,
      "learning_rate": 0.000256744296736145,
      "loss": 2.7953,
      "step": 125813
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.6667730808258057,
      "learning_rate": 0.000256740248921707,
      "loss": 2.9177,
      "step": 125814
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.010931968688965,
      "learning_rate": 0.0002567362011153119,
      "loss": 2.9182,
      "step": 125815
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7481157779693604,
      "learning_rate": 0.00025673215331696033,
      "loss": 2.9851,
      "step": 125816
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1692333221435547,
      "learning_rate": 0.0002567281055266531,
      "loss": 2.9796,
      "step": 125817
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0786259174346924,
      "learning_rate": 0.0002567240577443909,
      "loss": 2.9721,
      "step": 125818
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.8792314529418945,
      "learning_rate": 0.0002567200099701746,
      "loss": 2.9308,
      "step": 125819
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.375833034515381,
      "learning_rate": 0.00025671596220400487,
      "loss": 2.7687,
      "step": 125820
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8297120332717896,
      "learning_rate": 0.00025671191444588237,
      "loss": 2.9104,
      "step": 125821
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7238349914550781,
      "learning_rate": 0.00025670786669580796,
      "loss": 3.0155,
      "step": 125822
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5081753730773926,
      "learning_rate": 0.00025670381895378234,
      "loss": 2.9165,
      "step": 125823
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7055253982543945,
      "learning_rate": 0.0002566997712198064,
      "loss": 3.0365,
      "step": 125824
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.84407377243042,
      "learning_rate": 0.0002566957234938807,
      "loss": 2.9388,
      "step": 125825
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0051381587982178,
      "learning_rate": 0.0002566916757760061,
      "loss": 3.0572,
      "step": 125826
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9081028699874878,
      "learning_rate": 0.00025668762806618333,
      "loss": 2.8966,
      "step": 125827
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3682496547698975,
      "learning_rate": 0.0002566835803644132,
      "loss": 3.2405,
      "step": 125828
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9156595468521118,
      "learning_rate": 0.0002566795326706964,
      "loss": 2.8956,
      "step": 125829
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8189401626586914,
      "learning_rate": 0.0002566754849850337,
      "loss": 2.95,
      "step": 125830
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0169568061828613,
      "learning_rate": 0.0002566714373074258,
      "loss": 3.0499,
      "step": 125831
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7004653215408325,
      "learning_rate": 0.00025666738963787353,
      "loss": 2.8794,
      "step": 125832
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.041053533554077,
      "learning_rate": 0.0002566633419763776,
      "loss": 2.8977,
      "step": 125833
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7809065580368042,
      "learning_rate": 0.0002566592943229387,
      "loss": 3.0325,
      "step": 125834
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3567495346069336,
      "learning_rate": 0.0002566552466775578,
      "loss": 3.0243,
      "step": 125835
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5556654930114746,
      "learning_rate": 0.0002566511990402354,
      "loss": 3.0644,
      "step": 125836
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.20469069480896,
      "learning_rate": 0.00025664715141097245,
      "loss": 2.8244,
      "step": 125837
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.011017084121704,
      "learning_rate": 0.0002566431037897695,
      "loss": 3.0665,
      "step": 125838
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8105212450027466,
      "learning_rate": 0.0002566390561766275,
      "loss": 2.9775,
      "step": 125839
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.852664589881897,
      "learning_rate": 0.0002566350085715471,
      "loss": 3.1246,
      "step": 125840
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.095227003097534,
      "learning_rate": 0.000256630960974529,
      "loss": 2.951,
      "step": 125841
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7054622173309326,
      "learning_rate": 0.00025662691338557413,
      "loss": 3.2763,
      "step": 125842
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1043803691864014,
      "learning_rate": 0.0002566228658046831,
      "loss": 2.9078,
      "step": 125843
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.885650396347046,
      "learning_rate": 0.0002566188182318567,
      "loss": 3.1451,
      "step": 125844
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8648419380187988,
      "learning_rate": 0.0002566147706670957,
      "loss": 3.0368,
      "step": 125845
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9854028224945068,
      "learning_rate": 0.0002566107231104007,
      "loss": 2.8507,
      "step": 125846
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.195991039276123,
      "learning_rate": 0.0002566066755617727,
      "loss": 3.1366,
      "step": 125847
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.583509922027588,
      "learning_rate": 0.00025660262802121227,
      "loss": 2.8765,
      "step": 125848
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9107810258865356,
      "learning_rate": 0.0002565985804887204,
      "loss": 3.3517,
      "step": 125849
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2008578777313232,
      "learning_rate": 0.00025659453296429755,
      "loss": 3.1893,
      "step": 125850
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1314797401428223,
      "learning_rate": 0.0002565904854479446,
      "loss": 2.984,
      "step": 125851
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4554121494293213,
      "learning_rate": 0.0002565864379396623,
      "loss": 3.217,
      "step": 125852
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0788230895996094,
      "learning_rate": 0.0002565823904394513,
      "loss": 3.0285,
      "step": 125853
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7458953857421875,
      "learning_rate": 0.0002565783429473126,
      "loss": 3.1581,
      "step": 125854
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.5911343097686768,
      "learning_rate": 0.0002565742954632467,
      "loss": 2.8217,
      "step": 125855
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.121387004852295,
      "learning_rate": 0.00025657024798725466,
      "loss": 3.0971,
      "step": 125856
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9072387218475342,
      "learning_rate": 0.0002565662005193369,
      "loss": 3.2346,
      "step": 125857
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.737020492553711,
      "learning_rate": 0.0002565621530594943,
      "loss": 2.9533,
      "step": 125858
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.909743309020996,
      "learning_rate": 0.0002565581056077275,
      "loss": 2.876,
      "step": 125859
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.056649684906006,
      "learning_rate": 0.00025655405816403753,
      "loss": 3.2183,
      "step": 125860
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.413313388824463,
      "learning_rate": 0.0002565500107284249,
      "loss": 2.888,
      "step": 125861
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.7129714488983154,
      "learning_rate": 0.00025654596330089056,
      "loss": 3.138,
      "step": 125862
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.213294506072998,
      "learning_rate": 0.00025654191588143503,
      "loss": 3.1438,
      "step": 125863
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.086677074432373,
      "learning_rate": 0.0002565378684700592,
      "loss": 2.9338,
      "step": 125864
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7414538860321045,
      "learning_rate": 0.00025653382106676376,
      "loss": 2.969,
      "step": 125865
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0531740188598633,
      "learning_rate": 0.00025652977367154954,
      "loss": 2.9407,
      "step": 125866
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.497523546218872,
      "learning_rate": 0.0002565257262844172,
      "loss": 3.0994,
      "step": 125867
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9009822607040405,
      "learning_rate": 0.00025652167890536775,
      "loss": 3.0012,
      "step": 125868
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0392961502075195,
      "learning_rate": 0.0002565176315344016,
      "loss": 2.7707,
      "step": 125869
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.9069368839263916,
      "learning_rate": 0.00025651358417151963,
      "loss": 2.9854,
      "step": 125870
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1163594722747803,
      "learning_rate": 0.0002565095368167226,
      "loss": 3.0739,
      "step": 125871
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2727913856506348,
      "learning_rate": 0.00025650548947001126,
      "loss": 2.8903,
      "step": 125872
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9834564924240112,
      "learning_rate": 0.0002565014421313864,
      "loss": 2.8344,
      "step": 125873
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3955531120300293,
      "learning_rate": 0.00025649739480084887,
      "loss": 2.9521,
      "step": 125874
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1614768505096436,
      "learning_rate": 0.00025649334747839913,
      "loss": 2.9467,
      "step": 125875
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8434916734695435,
      "learning_rate": 0.00025648930016403815,
      "loss": 2.9905,
      "step": 125876
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.19319224357605,
      "learning_rate": 0.00025648525285776655,
      "loss": 3.045,
      "step": 125877
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0735156536102295,
      "learning_rate": 0.0002564812055595852,
      "loss": 3.0948,
      "step": 125878
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9514342546463013,
      "learning_rate": 0.00025647715826949486,
      "loss": 2.9752,
      "step": 125879
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8702666759490967,
      "learning_rate": 0.00025647311098749633,
      "loss": 3.0344,
      "step": 125880
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2694528102874756,
      "learning_rate": 0.0002564690637135901,
      "loss": 3.0041,
      "step": 125881
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1237268447875977,
      "learning_rate": 0.00025646501644777716,
      "loss": 3.0585,
      "step": 125882
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.314441680908203,
      "learning_rate": 0.0002564609691900582,
      "loss": 2.9326,
      "step": 125883
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4903242588043213,
      "learning_rate": 0.00025645692194043395,
      "loss": 3.095,
      "step": 125884
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8664631843566895,
      "learning_rate": 0.00025645287469890516,
      "loss": 3.1102,
      "step": 125885
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9145597219467163,
      "learning_rate": 0.00025644882746547276,
      "loss": 2.9066,
      "step": 125886
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.000516414642334,
      "learning_rate": 0.0002564447802401372,
      "loss": 3.0669,
      "step": 125887
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7794220447540283,
      "learning_rate": 0.00025644073302289936,
      "loss": 3.0041,
      "step": 125888
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9162026643753052,
      "learning_rate": 0.00025643668581376,
      "loss": 2.9074,
      "step": 125889
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.129561185836792,
      "learning_rate": 0.00025643263861271996,
      "loss": 2.9605,
      "step": 125890
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.409923553466797,
      "learning_rate": 0.0002564285914197798,
      "loss": 3.0115,
      "step": 125891
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7810126543045044,
      "learning_rate": 0.0002564245442349406,
      "loss": 3.0308,
      "step": 125892
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.598699927330017,
      "learning_rate": 0.00025642049705820274,
      "loss": 2.9582,
      "step": 125893
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9227776527404785,
      "learning_rate": 0.00025641644988956715,
      "loss": 2.8666,
      "step": 125894
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.2360382080078125,
      "learning_rate": 0.00025641240272903455,
      "loss": 2.8235,
      "step": 125895
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.525592088699341,
      "learning_rate": 0.0002564083555766057,
      "loss": 3.1505,
      "step": 125896
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1251471042633057,
      "learning_rate": 0.0002564043084322814,
      "loss": 2.845,
      "step": 125897
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.4527363777160645,
      "learning_rate": 0.0002564002612960625,
      "loss": 3.1424,
      "step": 125898
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.971752405166626,
      "learning_rate": 0.0002563962141679494,
      "loss": 2.9807,
      "step": 125899
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1627273559570312,
      "learning_rate": 0.00025639216704794317,
      "loss": 3.12,
      "step": 125900
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9916404485702515,
      "learning_rate": 0.00025638811993604436,
      "loss": 3.0254,
      "step": 125901
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4305710792541504,
      "learning_rate": 0.00025638407283225386,
      "loss": 3.0706,
      "step": 125902
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0898892879486084,
      "learning_rate": 0.0002563800257365724,
      "loss": 3.1241,
      "step": 125903
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.310924530029297,
      "learning_rate": 0.0002563759786490008,
      "loss": 2.9374,
      "step": 125904
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.16774582862854,
      "learning_rate": 0.0002563719315695397,
      "loss": 3.2513,
      "step": 125905
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.147352933883667,
      "learning_rate": 0.00025636788449818977,
      "loss": 2.9549,
      "step": 125906
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3903656005859375,
      "learning_rate": 0.00025636383743495196,
      "loss": 3.1427,
      "step": 125907
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.619288921356201,
      "learning_rate": 0.0002563597903798269,
      "loss": 3.0667,
      "step": 125908
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.7355260848999023,
      "learning_rate": 0.0002563557433328154,
      "loss": 2.6525,
      "step": 125909
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.8331851959228516,
      "learning_rate": 0.0002563516962939182,
      "loss": 2.8067,
      "step": 125910
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9821380376815796,
      "learning_rate": 0.000256347649263136,
      "loss": 3.098,
      "step": 125911
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.309870719909668,
      "learning_rate": 0.0002563436022404697,
      "loss": 2.8693,
      "step": 125912
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8412470817565918,
      "learning_rate": 0.00025633955522591983,
      "loss": 2.9095,
      "step": 125913
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.551682949066162,
      "learning_rate": 0.00025633550821948733,
      "loss": 3.0327,
      "step": 125914
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2309865951538086,
      "learning_rate": 0.0002563314612211728,
      "loss": 3.0317,
      "step": 125915
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9326156377792358,
      "learning_rate": 0.00025632741423097714,
      "loss": 2.9633,
      "step": 125916
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.507718086242676,
      "learning_rate": 0.0002563233672489011,
      "loss": 2.9948,
      "step": 125917
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1751534938812256,
      "learning_rate": 0.00025631932027494526,
      "loss": 2.9467,
      "step": 125918
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7829835414886475,
      "learning_rate": 0.00025631527330911055,
      "loss": 3.0795,
      "step": 125919
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.053504467010498,
      "learning_rate": 0.00025631122635139765,
      "loss": 3.2043,
      "step": 125920
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.8351893424987793,
      "learning_rate": 0.00025630717940180726,
      "loss": 2.7944,
      "step": 125921
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3982715606689453,
      "learning_rate": 0.0002563031324603402,
      "loss": 2.8868,
      "step": 125922
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.278181552886963,
      "learning_rate": 0.0002562990855269973,
      "loss": 2.9364,
      "step": 125923
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.861246347427368,
      "learning_rate": 0.00025629503860177913,
      "loss": 2.9718,
      "step": 125924
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2532572746276855,
      "learning_rate": 0.00025629099168468657,
      "loss": 2.8803,
      "step": 125925
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.7056641578674316,
      "learning_rate": 0.0002562869447757203,
      "loss": 2.8037,
      "step": 125926
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7911299467086792,
      "learning_rate": 0.00025628289787488125,
      "loss": 2.834,
      "step": 125927
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2679848670959473,
      "learning_rate": 0.0002562788509821699,
      "loss": 2.9952,
      "step": 125928
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9731853008270264,
      "learning_rate": 0.00025627480409758725,
      "loss": 3.0283,
      "step": 125929
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.009399652481079,
      "learning_rate": 0.00025627075722113384,
      "loss": 2.9391,
      "step": 125930
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8964236974716187,
      "learning_rate": 0.0002562667103528105,
      "loss": 2.9373,
      "step": 125931
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1684212684631348,
      "learning_rate": 0.00025626266349261805,
      "loss": 2.9908,
      "step": 125932
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2632458209991455,
      "learning_rate": 0.0002562586166405572,
      "loss": 2.8249,
      "step": 125933
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2596888542175293,
      "learning_rate": 0.00025625456979662875,
      "loss": 3.1544,
      "step": 125934
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9577221870422363,
      "learning_rate": 0.00025625052296083335,
      "loss": 3.1682,
      "step": 125935
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8040401935577393,
      "learning_rate": 0.0002562464761331718,
      "loss": 2.9528,
      "step": 125936
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2785544395446777,
      "learning_rate": 0.00025624242931364486,
      "loss": 2.8644,
      "step": 125937
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0994081497192383,
      "learning_rate": 0.0002562383825022533,
      "loss": 3.1573,
      "step": 125938
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0449016094207764,
      "learning_rate": 0.0002562343356989978,
      "loss": 2.8713,
      "step": 125939
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2492048740386963,
      "learning_rate": 0.0002562302889038791,
      "loss": 2.888,
      "step": 125940
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.041675090789795,
      "learning_rate": 0.0002562262421168982,
      "loss": 2.8771,
      "step": 125941
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8869056701660156,
      "learning_rate": 0.0002562221953380556,
      "loss": 3.0376,
      "step": 125942
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.127159595489502,
      "learning_rate": 0.00025621814856735206,
      "loss": 2.9249,
      "step": 125943
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8310410976409912,
      "learning_rate": 0.00025621410180478845,
      "loss": 2.8706,
      "step": 125944
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9998390674591064,
      "learning_rate": 0.0002562100550503654,
      "loss": 2.8995,
      "step": 125945
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3367364406585693,
      "learning_rate": 0.00025620600830408375,
      "loss": 2.9318,
      "step": 125946
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7360117435455322,
      "learning_rate": 0.0002562019615659444,
      "loss": 2.7249,
      "step": 125947
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9306339025497437,
      "learning_rate": 0.0002561979148359477,
      "loss": 3.2997,
      "step": 125948
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9527841806411743,
      "learning_rate": 0.00025619386811409476,
      "loss": 3.054,
      "step": 125949
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.6526894569396973,
      "learning_rate": 0.0002561898214003861,
      "loss": 2.9304,
      "step": 125950
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4558792114257812,
      "learning_rate": 0.00025618577469482264,
      "loss": 3.2274,
      "step": 125951
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.89414644241333,
      "learning_rate": 0.00025618172799740505,
      "loss": 2.9962,
      "step": 125952
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5828118324279785,
      "learning_rate": 0.0002561776813081342,
      "loss": 2.8657,
      "step": 125953
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1948976516723633,
      "learning_rate": 0.0002561736346270107,
      "loss": 2.5841,
      "step": 125954
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2800021171569824,
      "learning_rate": 0.0002561695879540353,
      "loss": 3.2423,
      "step": 125955
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1425211429595947,
      "learning_rate": 0.0002561655412892088,
      "loss": 2.8753,
      "step": 125956
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.14342999458313,
      "learning_rate": 0.00025616149463253196,
      "loss": 2.7899,
      "step": 125957
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4847028255462646,
      "learning_rate": 0.00025615744798400554,
      "loss": 2.9496,
      "step": 125958
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0907301902770996,
      "learning_rate": 0.0002561534013436304,
      "loss": 2.725,
      "step": 125959
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0016372203826904,
      "learning_rate": 0.000256149354711407,
      "loss": 3.1485,
      "step": 125960
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.000544548034668,
      "learning_rate": 0.00025614530808733634,
      "loss": 3.0319,
      "step": 125961
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3340365886688232,
      "learning_rate": 0.000256141261471419,
      "loss": 3.0342,
      "step": 125962
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.56301212310791,
      "learning_rate": 0.00025613721486365587,
      "loss": 3.2105,
      "step": 125963
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.5947026014328003,
      "learning_rate": 0.0002561331682640477,
      "loss": 3.0457,
      "step": 125964
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.734724521636963,
      "learning_rate": 0.0002561291216725953,
      "loss": 3.0743,
      "step": 125965
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.282553195953369,
      "learning_rate": 0.0002561250750892992,
      "loss": 2.6882,
      "step": 125966
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2627134323120117,
      "learning_rate": 0.00025612102851416027,
      "loss": 3.0559,
      "step": 125967
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9029014110565186,
      "learning_rate": 0.00025611698194717925,
      "loss": 3.2644,
      "step": 125968
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7310761213302612,
      "learning_rate": 0.00025611293538835696,
      "loss": 3.1119,
      "step": 125969
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7000185251235962,
      "learning_rate": 0.00025610888883769405,
      "loss": 3.1397,
      "step": 125970
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.113830804824829,
      "learning_rate": 0.0002561048422951915,
      "loss": 2.8647,
      "step": 125971
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8727357387542725,
      "learning_rate": 0.00025610079576084973,
      "loss": 2.9159,
      "step": 125972
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9805036783218384,
      "learning_rate": 0.0002560967492346697,
      "loss": 3.0275,
      "step": 125973
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.887521505355835,
      "learning_rate": 0.00025609270271665204,
      "loss": 2.8539,
      "step": 125974
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8483161926269531,
      "learning_rate": 0.0002560886562067976,
      "loss": 2.9675,
      "step": 125975
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.845700979232788,
      "learning_rate": 0.0002560846097051071,
      "loss": 3.3064,
      "step": 125976
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.036193370819092,
      "learning_rate": 0.00025608056321158143,
      "loss": 3.0833,
      "step": 125977
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.6197593212127686,
      "learning_rate": 0.00025607651672622116,
      "loss": 3.0359,
      "step": 125978
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.676189661026001,
      "learning_rate": 0.00025607247024902704,
      "loss": 2.8696,
      "step": 125979
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7438372373580933,
      "learning_rate": 0.00025606842377999984,
      "loss": 2.9713,
      "step": 125980
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2599172592163086,
      "learning_rate": 0.00025606437731914036,
      "loss": 3.2818,
      "step": 125981
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.808732271194458,
      "learning_rate": 0.0002560603308664494,
      "loss": 3.0546,
      "step": 125982
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3467702865600586,
      "learning_rate": 0.0002560562844219276,
      "loss": 3.064,
      "step": 125983
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0474441051483154,
      "learning_rate": 0.0002560522379855759,
      "loss": 2.9238,
      "step": 125984
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.8232100009918213,
      "learning_rate": 0.00025604819155739477,
      "loss": 2.7496,
      "step": 125985
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0583207607269287,
      "learning_rate": 0.00025604414513738516,
      "loss": 3.3263,
      "step": 125986
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.983255386352539,
      "learning_rate": 0.0002560400987255478,
      "loss": 3.1201,
      "step": 125987
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7064236402511597,
      "learning_rate": 0.00025603605232188333,
      "loss": 3.2674,
      "step": 125988
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7256983518600464,
      "learning_rate": 0.00025603200592639263,
      "loss": 2.9847,
      "step": 125989
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.806845784187317,
      "learning_rate": 0.00025602795953907654,
      "loss": 3.0741,
      "step": 125990
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0091311931610107,
      "learning_rate": 0.00025602391315993555,
      "loss": 2.8675,
      "step": 125991
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4511454105377197,
      "learning_rate": 0.00025601986678897053,
      "loss": 3.1443,
      "step": 125992
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2390077114105225,
      "learning_rate": 0.00025601582042618224,
      "loss": 2.874,
      "step": 125993
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0761022567749023,
      "learning_rate": 0.0002560117740715715,
      "loss": 2.7941,
      "step": 125994
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1558709144592285,
      "learning_rate": 0.000256007727725139,
      "loss": 2.9464,
      "step": 125995
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.70784592628479,
      "learning_rate": 0.0002560036813868855,
      "loss": 2.7663,
      "step": 125996
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.819903016090393,
      "learning_rate": 0.00025599963505681174,
      "loss": 2.8158,
      "step": 125997
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.139768123626709,
      "learning_rate": 0.0002559955887349184,
      "loss": 2.8398,
      "step": 125998
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.2086470127105713,
      "learning_rate": 0.0002559915424212064,
      "loss": 2.9694,
      "step": 125999
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.111680030822754,
      "learning_rate": 0.0002559874961156764,
      "loss": 3.0799,
      "step": 126000
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1791305541992188,
      "learning_rate": 0.00025598344981832906,
      "loss": 2.9016,
      "step": 126001
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5095205307006836,
      "learning_rate": 0.00025597940352916536,
      "loss": 2.8053,
      "step": 126002
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9557793140411377,
      "learning_rate": 0.0002559753572481858,
      "loss": 2.997,
      "step": 126003
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1547913551330566,
      "learning_rate": 0.00025597131097539135,
      "loss": 3.0087,
      "step": 126004
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.219273567199707,
      "learning_rate": 0.0002559672647107826,
      "loss": 3.2435,
      "step": 126005
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7958365678787231,
      "learning_rate": 0.0002559632184543604,
      "loss": 2.9328,
      "step": 126006
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5417585372924805,
      "learning_rate": 0.00025595917220612544,
      "loss": 3.1209,
      "step": 126007
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0402166843414307,
      "learning_rate": 0.00025595512596607854,
      "loss": 3.1036,
      "step": 126008
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8674132823944092,
      "learning_rate": 0.00025595107973422034,
      "loss": 2.9121,
      "step": 126009
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1319007873535156,
      "learning_rate": 0.0002559470335105517,
      "loss": 2.9835,
      "step": 126010
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.663602828979492,
      "learning_rate": 0.00025594298729507344,
      "loss": 2.9692,
      "step": 126011
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.6499171257019043,
      "learning_rate": 0.00025593894108778614,
      "loss": 3.0021,
      "step": 126012
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0951437950134277,
      "learning_rate": 0.00025593489488869053,
      "loss": 2.9373,
      "step": 126013
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.120986223220825,
      "learning_rate": 0.00025593084869778764,
      "loss": 3.0787,
      "step": 126014
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.642458915710449,
      "learning_rate": 0.00025592680251507784,
      "loss": 2.9536,
      "step": 126015
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2025396823883057,
      "learning_rate": 0.0002559227563405622,
      "loss": 2.9828,
      "step": 126016
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.515892505645752,
      "learning_rate": 0.0002559187101742413,
      "loss": 3.1332,
      "step": 126017
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.491161346435547,
      "learning_rate": 0.00025591466401611595,
      "loss": 2.9257,
      "step": 126018
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7458674907684326,
      "learning_rate": 0.00025591061786618696,
      "loss": 2.8196,
      "step": 126019
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.383314847946167,
      "learning_rate": 0.00025590657172445497,
      "loss": 3.0707,
      "step": 126020
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.604428768157959,
      "learning_rate": 0.0002559025255909208,
      "loss": 3.1244,
      "step": 126021
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.6097428798675537,
      "learning_rate": 0.00025589847946558514,
      "loss": 2.9266,
      "step": 126022
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.813418388366699,
      "learning_rate": 0.0002558944333484488,
      "loss": 3.1209,
      "step": 126023
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.224548101425171,
      "learning_rate": 0.0002558903872395125,
      "loss": 3.176,
      "step": 126024
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.2722792625427246,
      "learning_rate": 0.000255886341138777,
      "loss": 3.0933,
      "step": 126025
    },
    {
      "epoch": 1.64,
      "grad_norm": 4.284762859344482,
      "learning_rate": 0.0002558822950462432,
      "loss": 2.8752,
      "step": 126026
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3630127906799316,
      "learning_rate": 0.0002558782489619116,
      "loss": 2.8376,
      "step": 126027
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7284257411956787,
      "learning_rate": 0.00025587420288578305,
      "loss": 2.6194,
      "step": 126028
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.771452784538269,
      "learning_rate": 0.00025587015681785834,
      "loss": 2.8605,
      "step": 126029
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.024564266204834,
      "learning_rate": 0.0002558661107581382,
      "loss": 3.0223,
      "step": 126030
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.3843894004821777,
      "learning_rate": 0.0002558620647066234,
      "loss": 3.1937,
      "step": 126031
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.017289876937866,
      "learning_rate": 0.00025585801866331474,
      "loss": 3.1157,
      "step": 126032
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1660211086273193,
      "learning_rate": 0.00025585397262821284,
      "loss": 3.2471,
      "step": 126033
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5606038570404053,
      "learning_rate": 0.00025584992660131846,
      "loss": 2.8927,
      "step": 126034
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.220315456390381,
      "learning_rate": 0.00025584588058263246,
      "loss": 2.7874,
      "step": 126035
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.19785737991333,
      "learning_rate": 0.00025584183457215556,
      "loss": 3.1146,
      "step": 126036
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.202679395675659,
      "learning_rate": 0.00025583778856988845,
      "loss": 2.997,
      "step": 126037
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.316098690032959,
      "learning_rate": 0.0002558337425758321,
      "loss": 2.9924,
      "step": 126038
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.343569755554199,
      "learning_rate": 0.0002558296965899869,
      "loss": 2.9189,
      "step": 126039
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.691875696182251,
      "learning_rate": 0.0002558256506123538,
      "loss": 2.9156,
      "step": 126040
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.987257957458496,
      "learning_rate": 0.0002558216046429336,
      "loss": 2.8907,
      "step": 126041
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.067119598388672,
      "learning_rate": 0.000255817558681727,
      "loss": 3.0431,
      "step": 126042
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.21364164352417,
      "learning_rate": 0.0002558135127287347,
      "loss": 3.122,
      "step": 126043
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8711943626403809,
      "learning_rate": 0.00025580946678395764,
      "loss": 2.9285,
      "step": 126044
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9388222694396973,
      "learning_rate": 0.00025580542084739633,
      "loss": 2.6628,
      "step": 126045
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2376980781555176,
      "learning_rate": 0.00025580137491905164,
      "loss": 2.7584,
      "step": 126046
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9637833833694458,
      "learning_rate": 0.0002557973289989243,
      "loss": 2.8207,
      "step": 126047
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.632702589035034,
      "learning_rate": 0.000255793283087015,
      "loss": 3.0088,
      "step": 126048
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.814002275466919,
      "learning_rate": 0.00025578923718332466,
      "loss": 3.0217,
      "step": 126049
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3845300674438477,
      "learning_rate": 0.00025578519128785385,
      "loss": 3.0554,
      "step": 126050
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9795968532562256,
      "learning_rate": 0.0002557811454006036,
      "loss": 2.8231,
      "step": 126051
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4199600219726562,
      "learning_rate": 0.00025577709952157436,
      "loss": 3.2181,
      "step": 126052
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0568289756774902,
      "learning_rate": 0.0002557730536507669,
      "loss": 2.9544,
      "step": 126053
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.828260064125061,
      "learning_rate": 0.00025576900778818214,
      "loss": 2.8211,
      "step": 126054
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.783416986465454,
      "learning_rate": 0.00025576496193382075,
      "loss": 2.9709,
      "step": 126055
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.482470750808716,
      "learning_rate": 0.00025576091608768347,
      "loss": 2.9408,
      "step": 126056
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9661496877670288,
      "learning_rate": 0.0002557568702497712,
      "loss": 3.2062,
      "step": 126057
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1935720443725586,
      "learning_rate": 0.00025575282442008443,
      "loss": 2.9316,
      "step": 126058
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7640767097473145,
      "learning_rate": 0.00025574877859862403,
      "loss": 2.9347,
      "step": 126059
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3604650497436523,
      "learning_rate": 0.0002557447327853908,
      "loss": 2.7026,
      "step": 126060
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3552937507629395,
      "learning_rate": 0.00025574068698038546,
      "loss": 2.8568,
      "step": 126061
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5319979190826416,
      "learning_rate": 0.00025573664118360876,
      "loss": 3.0655,
      "step": 126062
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2720088958740234,
      "learning_rate": 0.00025573259539506154,
      "loss": 2.8231,
      "step": 126063
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9506417512893677,
      "learning_rate": 0.0002557285496147444,
      "loss": 2.5564,
      "step": 126064
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.992355227470398,
      "learning_rate": 0.00025572450384265817,
      "loss": 3.026,
      "step": 126065
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.700899362564087,
      "learning_rate": 0.00025572045807880354,
      "loss": 3.0629,
      "step": 126066
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.754136800765991,
      "learning_rate": 0.0002557164123231813,
      "loss": 2.9935,
      "step": 126067
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8285930156707764,
      "learning_rate": 0.00025571236657579224,
      "loss": 3.1794,
      "step": 126068
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.754560947418213,
      "learning_rate": 0.0002557083208366372,
      "loss": 2.9494,
      "step": 126069
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.400700807571411,
      "learning_rate": 0.0002557042751057167,
      "loss": 2.9751,
      "step": 126070
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3508782386779785,
      "learning_rate": 0.00025570022938303165,
      "loss": 2.9637,
      "step": 126071
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2204813957214355,
      "learning_rate": 0.0002556961836685827,
      "loss": 2.8418,
      "step": 126072
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.7996985912323,
      "learning_rate": 0.00025569213796237066,
      "loss": 2.9341,
      "step": 126073
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.107684850692749,
      "learning_rate": 0.00025568809226439633,
      "loss": 3.0136,
      "step": 126074
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5047545433044434,
      "learning_rate": 0.0002556840465746605,
      "loss": 2.8786,
      "step": 126075
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9848110675811768,
      "learning_rate": 0.0002556800008931638,
      "loss": 3.1006,
      "step": 126076
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0896859169006348,
      "learning_rate": 0.000255675955219907,
      "loss": 3.0394,
      "step": 126077
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9207329750061035,
      "learning_rate": 0.00025567190955489084,
      "loss": 2.7146,
      "step": 126078
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.83479642868042,
      "learning_rate": 0.00025566786389811613,
      "loss": 2.8747,
      "step": 126079
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.051058053970337,
      "learning_rate": 0.0002556638182495836,
      "loss": 3.1982,
      "step": 126080
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7958941459655762,
      "learning_rate": 0.0002556597726092941,
      "loss": 3.1604,
      "step": 126081
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9607324600219727,
      "learning_rate": 0.0002556557269772482,
      "loss": 3.0375,
      "step": 126082
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.41843581199646,
      "learning_rate": 0.0002556516813534467,
      "loss": 3.0004,
      "step": 126083
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8657675981521606,
      "learning_rate": 0.00025564763573789046,
      "loss": 2.9593,
      "step": 126084
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0325052738189697,
      "learning_rate": 0.0002556435901305801,
      "loss": 3.2426,
      "step": 126085
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8561886548995972,
      "learning_rate": 0.00025563954453151645,
      "loss": 2.9719,
      "step": 126086
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4722161293029785,
      "learning_rate": 0.00025563549894070027,
      "loss": 2.9937,
      "step": 126087
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.945473074913025,
      "learning_rate": 0.00025563145335813226,
      "loss": 3.1236,
      "step": 126088
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.974094271659851,
      "learning_rate": 0.0002556274077838133,
      "loss": 3.0281,
      "step": 126089
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.613879919052124,
      "learning_rate": 0.00025562336221774393,
      "loss": 3.2326,
      "step": 126090
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0400664806365967,
      "learning_rate": 0.000255619316659925,
      "loss": 2.9454,
      "step": 126091
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7861436605453491,
      "learning_rate": 0.00025561527111035736,
      "loss": 2.9674,
      "step": 126092
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.9242351055145264,
      "learning_rate": 0.0002556112255690417,
      "loss": 2.968,
      "step": 126093
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9319618940353394,
      "learning_rate": 0.0002556071800359786,
      "loss": 3.0858,
      "step": 126094
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.676200032234192,
      "learning_rate": 0.00025560313451116905,
      "loss": 3.0962,
      "step": 126095
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3010003566741943,
      "learning_rate": 0.0002555990889946138,
      "loss": 2.8604,
      "step": 126096
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1157002449035645,
      "learning_rate": 0.0002555950434863134,
      "loss": 2.935,
      "step": 126097
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8584163188934326,
      "learning_rate": 0.00025559099798626875,
      "loss": 2.9957,
      "step": 126098
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.6176871061325073,
      "learning_rate": 0.0002555869524944806,
      "loss": 3.1779,
      "step": 126099
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9850000143051147,
      "learning_rate": 0.00025558290701094966,
      "loss": 2.9935,
      "step": 126100
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7967712879180908,
      "learning_rate": 0.0002555788615356767,
      "loss": 2.758,
      "step": 126101
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.033018112182617,
      "learning_rate": 0.0002555748160686624,
      "loss": 2.9874,
      "step": 126102
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.906304955482483,
      "learning_rate": 0.0002555707706099077,
      "loss": 3.1154,
      "step": 126103
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7399381399154663,
      "learning_rate": 0.0002555667251594132,
      "loss": 3.0015,
      "step": 126104
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2364187240600586,
      "learning_rate": 0.00025556267971717965,
      "loss": 3.1955,
      "step": 126105
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.147873640060425,
      "learning_rate": 0.0002555586342832078,
      "loss": 2.9219,
      "step": 126106
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4188122749328613,
      "learning_rate": 0.0002555545888574985,
      "loss": 2.8924,
      "step": 126107
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.710514783859253,
      "learning_rate": 0.00025555054344005235,
      "loss": 2.9023,
      "step": 126108
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7754651308059692,
      "learning_rate": 0.0002555464980308703,
      "loss": 3.0738,
      "step": 126109
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.028886079788208,
      "learning_rate": 0.00025554245262995293,
      "loss": 2.9735,
      "step": 126110
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.46236252784729,
      "learning_rate": 0.0002555384072373012,
      "loss": 2.9302,
      "step": 126111
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.6440976858139038,
      "learning_rate": 0.0002555343618529156,
      "loss": 3.1143,
      "step": 126112
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.094123363494873,
      "learning_rate": 0.000255530316476797,
      "loss": 2.9778,
      "step": 126113
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1668074131011963,
      "learning_rate": 0.0002555262711089461,
      "loss": 3.0547,
      "step": 126114
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9671229124069214,
      "learning_rate": 0.00025552222574936375,
      "loss": 2.6954,
      "step": 126115
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.27432918548584,
      "learning_rate": 0.00025551818039805065,
      "loss": 2.8471,
      "step": 126116
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5343692302703857,
      "learning_rate": 0.0002555141350550076,
      "loss": 3.283,
      "step": 126117
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9975666999816895,
      "learning_rate": 0.0002555100897202354,
      "loss": 2.9876,
      "step": 126118
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9634157419204712,
      "learning_rate": 0.0002555060443937346,
      "loss": 2.8896,
      "step": 126119
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.391542434692383,
      "learning_rate": 0.0002555019990755061,
      "loss": 2.9241,
      "step": 126120
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8678981065750122,
      "learning_rate": 0.0002554979537655505,
      "loss": 2.8588,
      "step": 126121
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9815208911895752,
      "learning_rate": 0.0002554939084638688,
      "loss": 2.7026,
      "step": 126122
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.6900455951690674,
      "learning_rate": 0.00025548986317046156,
      "loss": 3.0564,
      "step": 126123
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.983561635017395,
      "learning_rate": 0.00025548581788532973,
      "loss": 2.9902,
      "step": 126124
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8927631378173828,
      "learning_rate": 0.0002554817726084738,
      "loss": 3.1676,
      "step": 126125
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9352747201919556,
      "learning_rate": 0.00025547772733989463,
      "loss": 3.1048,
      "step": 126126
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0667877197265625,
      "learning_rate": 0.000255473682079593,
      "loss": 2.8294,
      "step": 126127
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4128220081329346,
      "learning_rate": 0.0002554696368275697,
      "loss": 2.7234,
      "step": 126128
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7105848789215088,
      "learning_rate": 0.0002554655915838254,
      "loss": 2.919,
      "step": 126129
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9546706676483154,
      "learning_rate": 0.000255461546348361,
      "loss": 3.1547,
      "step": 126130
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.716064453125,
      "learning_rate": 0.000255457501121177,
      "loss": 3.0004,
      "step": 126131
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9519736766815186,
      "learning_rate": 0.0002554534559022743,
      "loss": 2.8307,
      "step": 126132
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.218686580657959,
      "learning_rate": 0.0002554494106916537,
      "loss": 3.1254,
      "step": 126133
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.516874313354492,
      "learning_rate": 0.0002554453654893159,
      "loss": 2.9947,
      "step": 126134
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3960018157958984,
      "learning_rate": 0.0002554413202952616,
      "loss": 3.2904,
      "step": 126135
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0307493209838867,
      "learning_rate": 0.0002554372751094917,
      "loss": 3.0055,
      "step": 126136
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1487667560577393,
      "learning_rate": 0.00025543322993200676,
      "loss": 2.9742,
      "step": 126137
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7335543632507324,
      "learning_rate": 0.0002554291847628076,
      "loss": 2.9939,
      "step": 126138
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.00616717338562,
      "learning_rate": 0.00025542513960189503,
      "loss": 2.8673,
      "step": 126139
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.5788793563842773,
      "learning_rate": 0.0002554210944492698,
      "loss": 2.8503,
      "step": 126140
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.070073366165161,
      "learning_rate": 0.00025541704930493256,
      "loss": 2.8795,
      "step": 126141
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.233656406402588,
      "learning_rate": 0.0002554130041688843,
      "loss": 2.9688,
      "step": 126142
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7899279594421387,
      "learning_rate": 0.0002554089590411254,
      "loss": 2.9839,
      "step": 126143
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.623422861099243,
      "learning_rate": 0.00025540491392165687,
      "loss": 2.9939,
      "step": 126144
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.512235164642334,
      "learning_rate": 0.0002554008688104795,
      "loss": 3.1252,
      "step": 126145
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9243502616882324,
      "learning_rate": 0.0002553968237075938,
      "loss": 3.0775,
      "step": 126146
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.690779685974121,
      "learning_rate": 0.00025539277861300074,
      "loss": 2.8985,
      "step": 126147
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.007380485534668,
      "learning_rate": 0.00025538873352670113,
      "loss": 3.0221,
      "step": 126148
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8170686960220337,
      "learning_rate": 0.00025538468844869545,
      "loss": 2.8746,
      "step": 126149
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.513171434402466,
      "learning_rate": 0.0002553806433789847,
      "loss": 2.9882,
      "step": 126150
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.236635208129883,
      "learning_rate": 0.00025537659831756935,
      "loss": 2.8707,
      "step": 126151
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.2494213581085205,
      "learning_rate": 0.00025537255326445044,
      "loss": 3.1987,
      "step": 126152
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1883316040039062,
      "learning_rate": 0.0002553685082196286,
      "loss": 3.0615,
      "step": 126153
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5557591915130615,
      "learning_rate": 0.0002553644631831048,
      "loss": 3.0018,
      "step": 126154
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.6397106647491455,
      "learning_rate": 0.00025536041815487937,
      "loss": 2.9657,
      "step": 126155
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3693649768829346,
      "learning_rate": 0.00025535637313495326,
      "loss": 3.0277,
      "step": 126156
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.736665964126587,
      "learning_rate": 0.0002553523281233273,
      "loss": 2.909,
      "step": 126157
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.592144727706909,
      "learning_rate": 0.0002553482831200022,
      "loss": 2.8759,
      "step": 126158
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.648171901702881,
      "learning_rate": 0.0002553442381249786,
      "loss": 3.2428,
      "step": 126159
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.858246088027954,
      "learning_rate": 0.00025534019313825756,
      "loss": 3.1404,
      "step": 126160
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8379286527633667,
      "learning_rate": 0.0002553361481598395,
      "loss": 3.1153,
      "step": 126161
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.549734592437744,
      "learning_rate": 0.00025533210318972525,
      "loss": 3.0825,
      "step": 126162
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9881458282470703,
      "learning_rate": 0.00025532805822791563,
      "loss": 2.8999,
      "step": 126163
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.030337333679199,
      "learning_rate": 0.0002553240132744114,
      "loss": 2.7293,
      "step": 126164
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2752554416656494,
      "learning_rate": 0.0002553199683292132,
      "loss": 3.0259,
      "step": 126165
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.6195530891418457,
      "learning_rate": 0.000255315923392322,
      "loss": 2.7389,
      "step": 126166
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9231072664260864,
      "learning_rate": 0.00025531187846373837,
      "loss": 3.1439,
      "step": 126167
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0901219844818115,
      "learning_rate": 0.000255307833543463,
      "loss": 3.0524,
      "step": 126168
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.026719808578491,
      "learning_rate": 0.0002553037886314968,
      "loss": 2.9946,
      "step": 126169
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.6000959873199463,
      "learning_rate": 0.0002552997437278405,
      "loss": 2.8806,
      "step": 126170
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0123071670532227,
      "learning_rate": 0.0002552956988324948,
      "loss": 3.1351,
      "step": 126171
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3093788623809814,
      "learning_rate": 0.00025529165394546055,
      "loss": 2.8056,
      "step": 126172
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5659642219543457,
      "learning_rate": 0.0002552876090667384,
      "loss": 2.8128,
      "step": 126173
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9434716701507568,
      "learning_rate": 0.00025528356419632906,
      "loss": 3.0714,
      "step": 126174
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.888581395149231,
      "learning_rate": 0.0002552795193342333,
      "loss": 3.026,
      "step": 126175
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8918943405151367,
      "learning_rate": 0.000255275474480452,
      "loss": 2.9174,
      "step": 126176
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9519603252410889,
      "learning_rate": 0.00025527142963498585,
      "loss": 2.8805,
      "step": 126177
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1934211254119873,
      "learning_rate": 0.0002552673847978355,
      "loss": 2.8629,
      "step": 126178
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.839727759361267,
      "learning_rate": 0.00025526333996900194,
      "loss": 3.3952,
      "step": 126179
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1797122955322266,
      "learning_rate": 0.0002552592951484856,
      "loss": 2.9812,
      "step": 126180
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2333712577819824,
      "learning_rate": 0.0002552552503362876,
      "loss": 3.0453,
      "step": 126181
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0583927631378174,
      "learning_rate": 0.0002552512055324083,
      "loss": 2.868,
      "step": 126182
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.6371405124664307,
      "learning_rate": 0.00025524716073684875,
      "loss": 3.0352,
      "step": 126183
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9384080171585083,
      "learning_rate": 0.0002552431159496095,
      "loss": 2.8314,
      "step": 126184
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9514096975326538,
      "learning_rate": 0.00025523907117069156,
      "loss": 2.9479,
      "step": 126185
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.3945486545562744,
      "learning_rate": 0.0002552350264000954,
      "loss": 2.9851,
      "step": 126186
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.158881425857544,
      "learning_rate": 0.0002552309816378219,
      "loss": 2.7676,
      "step": 126187
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2498650550842285,
      "learning_rate": 0.00025522693688387186,
      "loss": 2.8629,
      "step": 126188
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.9085850715637207,
      "learning_rate": 0.0002552228921382459,
      "loss": 3.0255,
      "step": 126189
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.6136257648468018,
      "learning_rate": 0.0002552188474009449,
      "loss": 3.0834,
      "step": 126190
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.6148123741149902,
      "learning_rate": 0.0002552148026719696,
      "loss": 2.7282,
      "step": 126191
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.916153073310852,
      "learning_rate": 0.0002552107579513207,
      "loss": 3.0799,
      "step": 126192
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0964441299438477,
      "learning_rate": 0.0002552067132389989,
      "loss": 3.1445,
      "step": 126193
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.6265807151794434,
      "learning_rate": 0.000255202668535005,
      "loss": 3.0787,
      "step": 126194
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.5004119873046875,
      "learning_rate": 0.0002551986238393399,
      "loss": 3.3294,
      "step": 126195
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1018848419189453,
      "learning_rate": 0.0002551945791520041,
      "loss": 3.0917,
      "step": 126196
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8311710357666016,
      "learning_rate": 0.0002551905344729986,
      "loss": 3.2561,
      "step": 126197
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.5856823921203613,
      "learning_rate": 0.00025518648980232394,
      "loss": 2.9775,
      "step": 126198
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0931568145751953,
      "learning_rate": 0.00025518244513998093,
      "loss": 3.0716,
      "step": 126199
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.98062002658844,
      "learning_rate": 0.00025517840048597034,
      "loss": 3.1152,
      "step": 126200
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9558614492416382,
      "learning_rate": 0.000255174355840293,
      "loss": 2.9389,
      "step": 126201
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8103727102279663,
      "learning_rate": 0.00025517031120294957,
      "loss": 3.2343,
      "step": 126202
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3179996013641357,
      "learning_rate": 0.0002551662665739409,
      "loss": 3.0541,
      "step": 126203
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9917536973953247,
      "learning_rate": 0.0002551622219532676,
      "loss": 2.8081,
      "step": 126204
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1873860359191895,
      "learning_rate": 0.0002551581773409304,
      "loss": 2.734,
      "step": 126205
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8411860466003418,
      "learning_rate": 0.00025515413273693023,
      "loss": 3.192,
      "step": 126206
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.503822088241577,
      "learning_rate": 0.0002551500881412677,
      "loss": 3.1892,
      "step": 126207
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9732788801193237,
      "learning_rate": 0.0002551460435539437,
      "loss": 3.1863,
      "step": 126208
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7005213499069214,
      "learning_rate": 0.0002551419989749589,
      "loss": 3.0772,
      "step": 126209
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0170764923095703,
      "learning_rate": 0.000255137954404314,
      "loss": 2.84,
      "step": 126210
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.8615238666534424,
      "learning_rate": 0.0002551339098420098,
      "loss": 3.1287,
      "step": 126211
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5951690673828125,
      "learning_rate": 0.00025512986528804704,
      "loss": 2.7643,
      "step": 126212
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.076585531234741,
      "learning_rate": 0.0002551258207424265,
      "loss": 2.9601,
      "step": 126213
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.1344354152679443,
      "learning_rate": 0.0002551217762051489,
      "loss": 2.8975,
      "step": 126214
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.894153594970703,
      "learning_rate": 0.00025511773167621513,
      "loss": 2.9534,
      "step": 126215
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9625478982925415,
      "learning_rate": 0.0002551136871556257,
      "loss": 3.1699,
      "step": 126216
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1025545597076416,
      "learning_rate": 0.0002551096426433815,
      "loss": 3.0693,
      "step": 126217
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.5722942352294922,
      "learning_rate": 0.00025510559813948325,
      "loss": 2.8786,
      "step": 126218
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8330581188201904,
      "learning_rate": 0.0002551015536439317,
      "loss": 2.961,
      "step": 126219
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0425832271575928,
      "learning_rate": 0.00025509750915672766,
      "loss": 2.9321,
      "step": 126220
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.162254571914673,
      "learning_rate": 0.00025509346467787194,
      "loss": 3.0268,
      "step": 126221
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8533833026885986,
      "learning_rate": 0.00025508942020736506,
      "loss": 2.7948,
      "step": 126222
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.037557363510132,
      "learning_rate": 0.0002550853757452079,
      "loss": 3.0416,
      "step": 126223
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7711756229400635,
      "learning_rate": 0.00025508133129140126,
      "loss": 3.0953,
      "step": 126224
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0426065921783447,
      "learning_rate": 0.0002550772868459458,
      "loss": 3.1323,
      "step": 126225
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9372953176498413,
      "learning_rate": 0.0002550732424088423,
      "loss": 2.9823,
      "step": 126226
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.62638783454895,
      "learning_rate": 0.00025506919798009176,
      "loss": 2.8071,
      "step": 126227
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7569715976715088,
      "learning_rate": 0.0002550651535596945,
      "loss": 2.7429,
      "step": 126228
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4342432022094727,
      "learning_rate": 0.0002550611091476515,
      "loss": 2.9549,
      "step": 126229
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.08125901222229,
      "learning_rate": 0.0002550570647439635,
      "loss": 2.9783,
      "step": 126230
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1040542125701904,
      "learning_rate": 0.0002550530203486312,
      "loss": 3.0606,
      "step": 126231
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1915199756622314,
      "learning_rate": 0.0002550489759616554,
      "loss": 3.0493,
      "step": 126232
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.084381103515625,
      "learning_rate": 0.000255044931583037,
      "loss": 3.0185,
      "step": 126233
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.07851505279541,
      "learning_rate": 0.00025504088721277644,
      "loss": 2.7861,
      "step": 126234
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0886335372924805,
      "learning_rate": 0.00025503684285087464,
      "loss": 3.1005,
      "step": 126235
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.060065984725952,
      "learning_rate": 0.00025503279849733236,
      "loss": 3.1292,
      "step": 126236
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9741780757904053,
      "learning_rate": 0.00025502875415215026,
      "loss": 3.134,
      "step": 126237
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2997591495513916,
      "learning_rate": 0.00025502470981532925,
      "loss": 3.3502,
      "step": 126238
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1402058601379395,
      "learning_rate": 0.0002550206654868701,
      "loss": 2.6968,
      "step": 126239
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.623061418533325,
      "learning_rate": 0.0002550166211667733,
      "loss": 3.1538,
      "step": 126240
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.048123598098755,
      "learning_rate": 0.00025501257685503975,
      "loss": 3.0347,
      "step": 126241
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.8036322593688965,
      "learning_rate": 0.0002550085325516703,
      "loss": 2.6683,
      "step": 126242
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9591491222381592,
      "learning_rate": 0.0002550044882566655,
      "loss": 2.8335,
      "step": 126243
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2328336238861084,
      "learning_rate": 0.0002550004439700263,
      "loss": 2.7513,
      "step": 126244
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.7042148113250732,
      "learning_rate": 0.0002549963996917533,
      "loss": 2.9804,
      "step": 126245
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8053096532821655,
      "learning_rate": 0.0002549923554218475,
      "loss": 3.2514,
      "step": 126246
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8217428922653198,
      "learning_rate": 0.0002549883111603093,
      "loss": 3.2252,
      "step": 126247
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8288860321044922,
      "learning_rate": 0.00025498426690713966,
      "loss": 2.9367,
      "step": 126248
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.584271192550659,
      "learning_rate": 0.0002549802226623393,
      "loss": 3.0335,
      "step": 126249
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2299275398254395,
      "learning_rate": 0.0002549761784259089,
      "loss": 3.103,
      "step": 126250
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.907965898513794,
      "learning_rate": 0.0002549721341978493,
      "loss": 2.9851,
      "step": 126251
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2298309803009033,
      "learning_rate": 0.0002549680899781614,
      "loss": 2.8368,
      "step": 126252
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2072360515594482,
      "learning_rate": 0.00025496404576684566,
      "loss": 3.0658,
      "step": 126253
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.591599464416504,
      "learning_rate": 0.0002549600015639029,
      "loss": 3.0721,
      "step": 126254
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.775036096572876,
      "learning_rate": 0.000254955957369334,
      "loss": 3.2001,
      "step": 126255
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.080725908279419,
      "learning_rate": 0.00025495191318313957,
      "loss": 3.1338,
      "step": 126256
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8930927515029907,
      "learning_rate": 0.00025494786900532045,
      "loss": 3.1162,
      "step": 126257
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9906034469604492,
      "learning_rate": 0.0002549438248358775,
      "loss": 3.0742,
      "step": 126258
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0477821826934814,
      "learning_rate": 0.00025493978067481123,
      "loss": 3.1672,
      "step": 126259
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7835524082183838,
      "learning_rate": 0.0002549357365221225,
      "loss": 2.9367,
      "step": 126260
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.2778773307800293,
      "learning_rate": 0.00025493169237781206,
      "loss": 2.9323,
      "step": 126261
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.292593240737915,
      "learning_rate": 0.00025492764824188065,
      "loss": 3.0548,
      "step": 126262
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.502669095993042,
      "learning_rate": 0.00025492360411432904,
      "loss": 2.9488,
      "step": 126263
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0521175861358643,
      "learning_rate": 0.00025491955999515814,
      "loss": 3.0059,
      "step": 126264
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.007528066635132,
      "learning_rate": 0.00025491551588436846,
      "loss": 3.1022,
      "step": 126265
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.050773859024048,
      "learning_rate": 0.00025491147178196075,
      "loss": 3.0864,
      "step": 126266
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.446366786956787,
      "learning_rate": 0.0002549074276879359,
      "loss": 2.799,
      "step": 126267
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.775592565536499,
      "learning_rate": 0.0002549033836022946,
      "loss": 2.8526,
      "step": 126268
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.783914566040039,
      "learning_rate": 0.0002548993395250376,
      "loss": 2.9548,
      "step": 126269
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.140097141265869,
      "learning_rate": 0.0002548952954561657,
      "loss": 3.2621,
      "step": 126270
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5227901935577393,
      "learning_rate": 0.00025489125139567965,
      "loss": 2.8133,
      "step": 126271
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.825277090072632,
      "learning_rate": 0.00025488720734358005,
      "loss": 2.8711,
      "step": 126272
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8860105276107788,
      "learning_rate": 0.0002548831632998679,
      "loss": 3.179,
      "step": 126273
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3726887702941895,
      "learning_rate": 0.0002548791192645437,
      "loss": 3.0937,
      "step": 126274
    },
    {
      "epoch": 1.64,
      "grad_norm": 4.912846565246582,
      "learning_rate": 0.0002548750752376084,
      "loss": 2.9761,
      "step": 126275
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8918654918670654,
      "learning_rate": 0.0002548710312190627,
      "loss": 2.9388,
      "step": 126276
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2991390228271484,
      "learning_rate": 0.0002548669872089073,
      "loss": 2.9485,
      "step": 126277
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.153615951538086,
      "learning_rate": 0.00025486294320714294,
      "loss": 2.8423,
      "step": 126278
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9872156381607056,
      "learning_rate": 0.00025485889921377045,
      "loss": 3.036,
      "step": 126279
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.538884162902832,
      "learning_rate": 0.00025485485522879056,
      "loss": 3.0374,
      "step": 126280
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.516845703125,
      "learning_rate": 0.00025485081125220396,
      "loss": 3.0173,
      "step": 126281
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8521417379379272,
      "learning_rate": 0.00025484676728401153,
      "loss": 2.9464,
      "step": 126282
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1541507244110107,
      "learning_rate": 0.0002548427233242138,
      "loss": 2.9642,
      "step": 126283
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.8197786808013916,
      "learning_rate": 0.00025483867937281177,
      "loss": 3.1177,
      "step": 126284
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.322439432144165,
      "learning_rate": 0.000254834635429806,
      "loss": 2.9525,
      "step": 126285
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.791414737701416,
      "learning_rate": 0.00025483059149519733,
      "loss": 2.932,
      "step": 126286
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.237598419189453,
      "learning_rate": 0.0002548265475689866,
      "loss": 2.9205,
      "step": 126287
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8233036994934082,
      "learning_rate": 0.00025482250365117446,
      "loss": 2.9396,
      "step": 126288
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.671168565750122,
      "learning_rate": 0.0002548184597417616,
      "loss": 3.2232,
      "step": 126289
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1837170124053955,
      "learning_rate": 0.00025481441584074884,
      "loss": 3.0883,
      "step": 126290
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8786016702651978,
      "learning_rate": 0.00025481037194813696,
      "loss": 2.8965,
      "step": 126291
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2373242378234863,
      "learning_rate": 0.00025480632806392666,
      "loss": 2.8968,
      "step": 126292
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.804913282394409,
      "learning_rate": 0.0002548022841881187,
      "loss": 3.2069,
      "step": 126293
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4692747592926025,
      "learning_rate": 0.000254798240320714,
      "loss": 2.604,
      "step": 126294
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9647942781448364,
      "learning_rate": 0.00025479419646171303,
      "loss": 2.8576,
      "step": 126295
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.344526767730713,
      "learning_rate": 0.0002547901526111167,
      "loss": 2.7092,
      "step": 126296
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.7992913722991943,
      "learning_rate": 0.0002547861087689257,
      "loss": 2.8146,
      "step": 126297
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4260032176971436,
      "learning_rate": 0.0002547820649351408,
      "loss": 2.9565,
      "step": 126298
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5297224521636963,
      "learning_rate": 0.0002547780211097628,
      "loss": 2.8065,
      "step": 126299
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.827984094619751,
      "learning_rate": 0.0002547739772927925,
      "loss": 2.676,
      "step": 126300
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.3812851905822754,
      "learning_rate": 0.0002547699334842305,
      "loss": 2.9164,
      "step": 126301
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.4752583503723145,
      "learning_rate": 0.0002547658896840776,
      "loss": 2.8629,
      "step": 126302
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.128026008605957,
      "learning_rate": 0.0002547618458923346,
      "loss": 2.9848,
      "step": 126303
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9157313108444214,
      "learning_rate": 0.00025475780210900214,
      "loss": 3.2071,
      "step": 126304
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.664576530456543,
      "learning_rate": 0.00025475375833408116,
      "loss": 2.9034,
      "step": 126305
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.446627378463745,
      "learning_rate": 0.0002547497145675724,
      "loss": 3.1503,
      "step": 126306
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5580801963806152,
      "learning_rate": 0.0002547456708094764,
      "loss": 2.9075,
      "step": 126307
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.8404462337493896,
      "learning_rate": 0.000254741627059794,
      "loss": 3.1228,
      "step": 126308
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.835547685623169,
      "learning_rate": 0.000254737583318526,
      "loss": 3.1235,
      "step": 126309
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2743892669677734,
      "learning_rate": 0.0002547335395856732,
      "loss": 2.9263,
      "step": 126310
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.7409274578094482,
      "learning_rate": 0.0002547294958612362,
      "loss": 2.8067,
      "step": 126311
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.749548077583313,
      "learning_rate": 0.0002547254521452159,
      "loss": 2.835,
      "step": 126312
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3764355182647705,
      "learning_rate": 0.0002547214084376131,
      "loss": 3.0466,
      "step": 126313
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2697224617004395,
      "learning_rate": 0.0002547173647384283,
      "loss": 2.7347,
      "step": 126314
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.265909194946289,
      "learning_rate": 0.00025471332104766247,
      "loss": 3.0727,
      "step": 126315
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9160112142562866,
      "learning_rate": 0.0002547092773653162,
      "loss": 2.9482,
      "step": 126316
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7819124460220337,
      "learning_rate": 0.0002547052336913904,
      "loss": 3.0459,
      "step": 126317
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.14349627494812,
      "learning_rate": 0.0002547011900258857,
      "loss": 2.9713,
      "step": 126318
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1133382320404053,
      "learning_rate": 0.00025469714636880306,
      "loss": 2.8897,
      "step": 126319
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9235919713974,
      "learning_rate": 0.0002546931027201429,
      "loss": 3.1071,
      "step": 126320
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.086777925491333,
      "learning_rate": 0.0002546890590799062,
      "loss": 2.9506,
      "step": 126321
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.59500789642334,
      "learning_rate": 0.00025468501544809364,
      "loss": 2.9456,
      "step": 126322
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.3190596103668213,
      "learning_rate": 0.000254680971824706,
      "loss": 3.0853,
      "step": 126323
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.8239188194274902,
      "learning_rate": 0.000254676928209744,
      "loss": 3.2912,
      "step": 126324
    },
    {
      "epoch": 1.64,
      "grad_norm": 3.388949394226074,
      "learning_rate": 0.00025467288460320856,
      "loss": 2.9689,
      "step": 126325
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.0349180698394775,
      "learning_rate": 0.0002546688410051002,
      "loss": 2.9622,
      "step": 126326
    },
    {
      "epoch": 1.64,
      "grad_norm": 4.200508117675781,
      "learning_rate": 0.0002546647974154197,
      "loss": 2.7709,
      "step": 126327
    },
    {
      "epoch": 1.64,
      "grad_norm": 4.606483459472656,
      "learning_rate": 0.00025466075383416786,
      "loss": 2.9585,
      "step": 126328
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.7175660133361816,
      "learning_rate": 0.0002546567102613455,
      "loss": 2.7835,
      "step": 126329
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2954938411712646,
      "learning_rate": 0.00025465266669695325,
      "loss": 3.2396,
      "step": 126330
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.5762863159179688,
      "learning_rate": 0.0002546486231409921,
      "loss": 2.711,
      "step": 126331
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.2562124729156494,
      "learning_rate": 0.00025464457959346247,
      "loss": 2.995,
      "step": 126332
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.9580284357070923,
      "learning_rate": 0.0002546405360543653,
      "loss": 3.2183,
      "step": 126333
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.1801178455352783,
      "learning_rate": 0.0002546364925237013,
      "loss": 3.1453,
      "step": 126334
    },
    {
      "epoch": 1.64,
      "grad_norm": 2.380053997039795,
      "learning_rate": 0.00025463244900147123,
      "loss": 2.8563,
      "step": 126335
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.7946281433105469,
      "learning_rate": 0.00025462840548767584,
      "loss": 3.122,
      "step": 126336
    },
    {
      "epoch": 1.64,
      "grad_norm": 1.891306757926941,
      "learning_rate": 0.000254624361982316,
      "loss": 3.1989,
      "step": 126337
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.695876955986023,
      "learning_rate": 0.00025462031848539225,
      "loss": 3.0662,
      "step": 126338
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.46065616607666,
      "learning_rate": 0.0002546162749969054,
      "loss": 2.9982,
      "step": 126339
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8451199531555176,
      "learning_rate": 0.0002546122315168563,
      "loss": 2.9845,
      "step": 126340
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0922279357910156,
      "learning_rate": 0.0002546081880452456,
      "loss": 3.1457,
      "step": 126341
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.7099127769470215,
      "learning_rate": 0.0002546041445820741,
      "loss": 2.8998,
      "step": 126342
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.961603879928589,
      "learning_rate": 0.0002546001011273427,
      "loss": 2.8199,
      "step": 126343
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8709110021591187,
      "learning_rate": 0.0002545960576810518,
      "loss": 2.9047,
      "step": 126344
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.1058669090270996,
      "learning_rate": 0.0002545920142432024,
      "loss": 2.9496,
      "step": 126345
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.9021530151367188,
      "learning_rate": 0.0002545879708137952,
      "loss": 3.0206,
      "step": 126346
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.148029327392578,
      "learning_rate": 0.00025458392739283094,
      "loss": 3.1117,
      "step": 126347
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7396085262298584,
      "learning_rate": 0.0002545798839803104,
      "loss": 2.9783,
      "step": 126348
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.811909556388855,
      "learning_rate": 0.00025457584057623437,
      "loss": 3.0078,
      "step": 126349
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.648656129837036,
      "learning_rate": 0.0002545717971806036,
      "loss": 3.0339,
      "step": 126350
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1510729789733887,
      "learning_rate": 0.00025456775379341867,
      "loss": 2.8685,
      "step": 126351
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.138002395629883,
      "learning_rate": 0.00025456371041468045,
      "loss": 2.8939,
      "step": 126352
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.194103956222534,
      "learning_rate": 0.0002545596670443897,
      "loss": 2.9136,
      "step": 126353
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.0782265663146973,
      "learning_rate": 0.0002545556236825472,
      "loss": 2.7357,
      "step": 126354
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.838547945022583,
      "learning_rate": 0.00025455158032915365,
      "loss": 3.1281,
      "step": 126355
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0765223503112793,
      "learning_rate": 0.00025454753698420985,
      "loss": 3.1792,
      "step": 126356
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.468604803085327,
      "learning_rate": 0.00025454349364771654,
      "loss": 2.8396,
      "step": 126357
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2733616828918457,
      "learning_rate": 0.0002545394503196744,
      "loss": 2.9849,
      "step": 126358
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0208823680877686,
      "learning_rate": 0.0002545354070000842,
      "loss": 3.1833,
      "step": 126359
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9376155138015747,
      "learning_rate": 0.00025453136368894675,
      "loss": 2.9683,
      "step": 126360
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9277582168579102,
      "learning_rate": 0.0002545273203862629,
      "loss": 2.9121,
      "step": 126361
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.6134870052337646,
      "learning_rate": 0.0002545232770920331,
      "loss": 2.7788,
      "step": 126362
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.860156774520874,
      "learning_rate": 0.00025451923380625835,
      "loss": 3.1434,
      "step": 126363
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.161928176879883,
      "learning_rate": 0.00025451519052893944,
      "loss": 3.0352,
      "step": 126364
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.309875249862671,
      "learning_rate": 0.00025451114726007687,
      "loss": 3.004,
      "step": 126365
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.944753646850586,
      "learning_rate": 0.00025450710399967157,
      "loss": 3.2394,
      "step": 126366
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.007561683654785,
      "learning_rate": 0.00025450306074772434,
      "loss": 3.0222,
      "step": 126367
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.3601295948028564,
      "learning_rate": 0.00025449901750423574,
      "loss": 2.697,
      "step": 126368
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3838894367218018,
      "learning_rate": 0.00025449497426920667,
      "loss": 2.9979,
      "step": 126369
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7666009664535522,
      "learning_rate": 0.0002544909310426378,
      "loss": 3.0472,
      "step": 126370
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7541340589523315,
      "learning_rate": 0.00025448688782453,
      "loss": 2.7078,
      "step": 126371
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.371814727783203,
      "learning_rate": 0.00025448284461488394,
      "loss": 2.9003,
      "step": 126372
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.325735569000244,
      "learning_rate": 0.00025447880141370036,
      "loss": 2.898,
      "step": 126373
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.274346113204956,
      "learning_rate": 0.00025447475822098006,
      "loss": 3.0447,
      "step": 126374
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1776485443115234,
      "learning_rate": 0.0002544707150367237,
      "loss": 2.8991,
      "step": 126375
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.364331007003784,
      "learning_rate": 0.00025446667186093214,
      "loss": 2.7934,
      "step": 126376
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0520341396331787,
      "learning_rate": 0.000254462628693606,
      "loss": 2.9371,
      "step": 126377
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.470445156097412,
      "learning_rate": 0.00025445858553474617,
      "loss": 3.031,
      "step": 126378
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9028825759887695,
      "learning_rate": 0.0002544545423843534,
      "loss": 3.2474,
      "step": 126379
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.343069553375244,
      "learning_rate": 0.0002544504992424284,
      "loss": 3.0142,
      "step": 126380
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8887360095977783,
      "learning_rate": 0.00025444645610897185,
      "loss": 2.9335,
      "step": 126381
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.013077974319458,
      "learning_rate": 0.0002544424129839846,
      "loss": 2.9742,
      "step": 126382
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9934133291244507,
      "learning_rate": 0.0002544383698674673,
      "loss": 3.0886,
      "step": 126383
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.994267225265503,
      "learning_rate": 0.0002544343267594208,
      "loss": 2.9596,
      "step": 126384
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8014062643051147,
      "learning_rate": 0.0002544302836598458,
      "loss": 2.8972,
      "step": 126385
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2611818313598633,
      "learning_rate": 0.00025442624056874324,
      "loss": 3.1154,
      "step": 126386
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0424137115478516,
      "learning_rate": 0.00025442219748611353,
      "loss": 2.8444,
      "step": 126387
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1691393852233887,
      "learning_rate": 0.00025441815441195763,
      "loss": 3.3039,
      "step": 126388
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.318938732147217,
      "learning_rate": 0.00025441411134627623,
      "loss": 2.9906,
      "step": 126389
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.041926860809326,
      "learning_rate": 0.0002544100682890701,
      "loss": 2.8087,
      "step": 126390
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.598400592803955,
      "learning_rate": 0.00025440602524033997,
      "loss": 3.0497,
      "step": 126391
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.537403106689453,
      "learning_rate": 0.00025440198220008685,
      "loss": 2.9812,
      "step": 126392
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0813655853271484,
      "learning_rate": 0.00025439793916831106,
      "loss": 3.0355,
      "step": 126393
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1265153884887695,
      "learning_rate": 0.00025439389614501356,
      "loss": 2.9418,
      "step": 126394
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8729753494262695,
      "learning_rate": 0.0002543898531301951,
      "loss": 2.9257,
      "step": 126395
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9335894584655762,
      "learning_rate": 0.00025438581012385644,
      "loss": 2.9055,
      "step": 126396
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.210510730743408,
      "learning_rate": 0.0002543817671259983,
      "loss": 3.1098,
      "step": 126397
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7385962009429932,
      "learning_rate": 0.00025437772413662156,
      "loss": 2.8488,
      "step": 126398
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.788698673248291,
      "learning_rate": 0.0002543736811557268,
      "loss": 2.9599,
      "step": 126399
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.752871036529541,
      "learning_rate": 0.0002543696381833148,
      "loss": 3.0342,
      "step": 126400
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.5064713954925537,
      "learning_rate": 0.00025436559521938636,
      "loss": 2.92,
      "step": 126401
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9005651473999023,
      "learning_rate": 0.0002543615522639422,
      "loss": 3.1214,
      "step": 126402
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.08097505569458,
      "learning_rate": 0.0002543575093169831,
      "loss": 3.1343,
      "step": 126403
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.423198699951172,
      "learning_rate": 0.0002543534663785099,
      "loss": 2.8339,
      "step": 126404
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1368672847747803,
      "learning_rate": 0.0002543494234485231,
      "loss": 3.1067,
      "step": 126405
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9110686779022217,
      "learning_rate": 0.0002543453805270237,
      "loss": 2.7947,
      "step": 126406
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9544081687927246,
      "learning_rate": 0.00025434133761401226,
      "loss": 3.0312,
      "step": 126407
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.6006832122802734,
      "learning_rate": 0.00025433729470948966,
      "loss": 2.9381,
      "step": 126408
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.928680658340454,
      "learning_rate": 0.0002543332518134567,
      "loss": 3.0282,
      "step": 126409
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.908584475517273,
      "learning_rate": 0.00025432920892591403,
      "loss": 2.8,
      "step": 126410
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7430158853530884,
      "learning_rate": 0.00025432516604686236,
      "loss": 2.9284,
      "step": 126411
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.645453453063965,
      "learning_rate": 0.00025432112317630255,
      "loss": 2.8491,
      "step": 126412
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.154493808746338,
      "learning_rate": 0.00025431708031423524,
      "loss": 3.1725,
      "step": 126413
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1012039184570312,
      "learning_rate": 0.0002543130374606613,
      "loss": 2.7725,
      "step": 126414
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.093613386154175,
      "learning_rate": 0.0002543089946155814,
      "loss": 3.1309,
      "step": 126415
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4082467555999756,
      "learning_rate": 0.00025430495177899645,
      "loss": 3.0574,
      "step": 126416
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0777573585510254,
      "learning_rate": 0.00025430090895090695,
      "loss": 3.0494,
      "step": 126417
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.519603967666626,
      "learning_rate": 0.0002542968661313138,
      "loss": 3.1511,
      "step": 126418
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2063586711883545,
      "learning_rate": 0.00025429282332021765,
      "loss": 3.0562,
      "step": 126419
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9953664541244507,
      "learning_rate": 0.0002542887805176194,
      "loss": 3.1592,
      "step": 126420
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.871215581893921,
      "learning_rate": 0.00025428473772351966,
      "loss": 3.0317,
      "step": 126421
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.149704694747925,
      "learning_rate": 0.00025428069493791945,
      "loss": 3.0101,
      "step": 126422
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7015624046325684,
      "learning_rate": 0.00025427665216081917,
      "loss": 3.058,
      "step": 126423
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7509939670562744,
      "learning_rate": 0.00025427260939221973,
      "loss": 3.0521,
      "step": 126424
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.076418161392212,
      "learning_rate": 0.00025426856663212185,
      "loss": 2.9015,
      "step": 126425
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3464674949645996,
      "learning_rate": 0.0002542645238805263,
      "loss": 3.0035,
      "step": 126426
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0247819423675537,
      "learning_rate": 0.0002542604811374339,
      "loss": 3.0588,
      "step": 126427
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8843276500701904,
      "learning_rate": 0.0002542564384028454,
      "loss": 3.2159,
      "step": 126428
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.138435125350952,
      "learning_rate": 0.0002542523956767614,
      "loss": 2.7783,
      "step": 126429
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7774518728256226,
      "learning_rate": 0.0002542483529591827,
      "loss": 3.021,
      "step": 126430
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1148455142974854,
      "learning_rate": 0.00025424431025011015,
      "loss": 3.1243,
      "step": 126431
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4401156902313232,
      "learning_rate": 0.0002542402675495444,
      "loss": 3.0869,
      "step": 126432
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9638947248458862,
      "learning_rate": 0.0002542362248574863,
      "loss": 3.1078,
      "step": 126433
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0858962535858154,
      "learning_rate": 0.0002542321821739366,
      "loss": 2.8672,
      "step": 126434
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1703977584838867,
      "learning_rate": 0.00025422813949889595,
      "loss": 3.1108,
      "step": 126435
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3387696743011475,
      "learning_rate": 0.0002542240968323651,
      "loss": 2.8242,
      "step": 126436
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.939465880393982,
      "learning_rate": 0.0002542200541743449,
      "loss": 3.016,
      "step": 126437
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.530015230178833,
      "learning_rate": 0.000254216011524836,
      "loss": 2.9338,
      "step": 126438
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1847715377807617,
      "learning_rate": 0.00025421196888383927,
      "loss": 2.928,
      "step": 126439
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0562286376953125,
      "learning_rate": 0.00025420792625135546,
      "loss": 3.0368,
      "step": 126440
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2538743019104004,
      "learning_rate": 0.0002542038836273852,
      "loss": 2.8916,
      "step": 126441
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.055265426635742,
      "learning_rate": 0.0002541998410119293,
      "loss": 2.8984,
      "step": 126442
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.091897964477539,
      "learning_rate": 0.0002541957984049885,
      "loss": 3.0403,
      "step": 126443
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.5748775005340576,
      "learning_rate": 0.00025419175580656357,
      "loss": 3.056,
      "step": 126444
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9114360809326172,
      "learning_rate": 0.00025418771321665523,
      "loss": 3.1743,
      "step": 126445
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.029022455215454,
      "learning_rate": 0.0002541836706352643,
      "loss": 2.8629,
      "step": 126446
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.3226137161254883,
      "learning_rate": 0.0002541796280623915,
      "loss": 2.8811,
      "step": 126447
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.105194330215454,
      "learning_rate": 0.0002541755854980375,
      "loss": 2.9805,
      "step": 126448
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1362311840057373,
      "learning_rate": 0.0002541715429422032,
      "loss": 3.018,
      "step": 126449
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2403876781463623,
      "learning_rate": 0.0002541675003948893,
      "loss": 2.9087,
      "step": 126450
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.6333377361297607,
      "learning_rate": 0.0002541634578560964,
      "loss": 3.1649,
      "step": 126451
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8982614278793335,
      "learning_rate": 0.0002541594153258255,
      "loss": 3.0666,
      "step": 126452
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8920209407806396,
      "learning_rate": 0.0002541553728040772,
      "loss": 2.9683,
      "step": 126453
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9535505771636963,
      "learning_rate": 0.00025415133029085227,
      "loss": 3.1549,
      "step": 126454
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1733477115631104,
      "learning_rate": 0.00025414728778615144,
      "loss": 3.0496,
      "step": 126455
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.171722650527954,
      "learning_rate": 0.0002541432452899756,
      "loss": 2.9421,
      "step": 126456
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8752273321151733,
      "learning_rate": 0.0002541392028023253,
      "loss": 2.7488,
      "step": 126457
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8816399574279785,
      "learning_rate": 0.0002541351603232014,
      "loss": 2.8943,
      "step": 126458
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4186453819274902,
      "learning_rate": 0.0002541311178526046,
      "loss": 3.0647,
      "step": 126459
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7739746570587158,
      "learning_rate": 0.0002541270753905358,
      "loss": 2.7845,
      "step": 126460
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.593158721923828,
      "learning_rate": 0.00025412303293699556,
      "loss": 2.8511,
      "step": 126461
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.489264726638794,
      "learning_rate": 0.0002541189904919847,
      "loss": 2.8331,
      "step": 126462
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.484870433807373,
      "learning_rate": 0.00025411494805550406,
      "loss": 2.9197,
      "step": 126463
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.236518144607544,
      "learning_rate": 0.0002541109056275543,
      "loss": 2.8488,
      "step": 126464
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0545766353607178,
      "learning_rate": 0.0002541068632081362,
      "loss": 3.0396,
      "step": 126465
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.845887303352356,
      "learning_rate": 0.00025410282079725045,
      "loss": 2.9963,
      "step": 126466
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.448798656463623,
      "learning_rate": 0.0002540987783948979,
      "loss": 2.9787,
      "step": 126467
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.6535017490386963,
      "learning_rate": 0.00025409473600107916,
      "loss": 3.1033,
      "step": 126468
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.359969139099121,
      "learning_rate": 0.0002540906936157951,
      "loss": 3.0141,
      "step": 126469
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9201834201812744,
      "learning_rate": 0.0002540866512390465,
      "loss": 2.9674,
      "step": 126470
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.627732753753662,
      "learning_rate": 0.00025408260887083416,
      "loss": 3.1025,
      "step": 126471
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.697188138961792,
      "learning_rate": 0.0002540785665111586,
      "loss": 2.9264,
      "step": 126472
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.557868003845215,
      "learning_rate": 0.0002540745241600207,
      "loss": 2.932,
      "step": 126473
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.054165840148926,
      "learning_rate": 0.0002540704818174212,
      "loss": 3.2535,
      "step": 126474
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.308454751968384,
      "learning_rate": 0.00025406643948336084,
      "loss": 3.1969,
      "step": 126475
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3602097034454346,
      "learning_rate": 0.00025406239715784046,
      "loss": 2.8548,
      "step": 126476
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2777364253997803,
      "learning_rate": 0.00025405835484086084,
      "loss": 3.0141,
      "step": 126477
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.991499662399292,
      "learning_rate": 0.00025405431253242255,
      "loss": 2.7758,
      "step": 126478
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.103750467300415,
      "learning_rate": 0.0002540502702325264,
      "loss": 3.0928,
      "step": 126479
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.6195225715637207,
      "learning_rate": 0.00025404622794117317,
      "loss": 2.9334,
      "step": 126480
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.143744468688965,
      "learning_rate": 0.0002540421856583636,
      "loss": 3.0667,
      "step": 126481
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.108940362930298,
      "learning_rate": 0.0002540381433840985,
      "loss": 2.9559,
      "step": 126482
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.830521583557129,
      "learning_rate": 0.00025403410111837863,
      "loss": 3.3295,
      "step": 126483
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8735936880111694,
      "learning_rate": 0.0002540300588612046,
      "loss": 2.9155,
      "step": 126484
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9773494005203247,
      "learning_rate": 0.0002540260166125773,
      "loss": 3.0628,
      "step": 126485
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.113363742828369,
      "learning_rate": 0.00025402197437249735,
      "loss": 3.1399,
      "step": 126486
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0021870136260986,
      "learning_rate": 0.0002540179321409656,
      "loss": 3.0252,
      "step": 126487
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8607797622680664,
      "learning_rate": 0.0002540138899179828,
      "loss": 2.891,
      "step": 126488
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.201021671295166,
      "learning_rate": 0.00025400984770354985,
      "loss": 3.012,
      "step": 126489
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.933248281478882,
      "learning_rate": 0.0002540058054976672,
      "loss": 3.008,
      "step": 126490
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.48606276512146,
      "learning_rate": 0.0002540017633003357,
      "loss": 3.0387,
      "step": 126491
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.246530294418335,
      "learning_rate": 0.0002539977211115561,
      "loss": 2.9349,
      "step": 126492
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.359403133392334,
      "learning_rate": 0.0002539936789313293,
      "loss": 2.883,
      "step": 126493
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1478562355041504,
      "learning_rate": 0.0002539896367596559,
      "loss": 3.1779,
      "step": 126494
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9268938302993774,
      "learning_rate": 0.0002539855945965368,
      "loss": 3.0777,
      "step": 126495
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.174281597137451,
      "learning_rate": 0.0002539815524419725,
      "loss": 3.0308,
      "step": 126496
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.982851266860962,
      "learning_rate": 0.00025397751029596397,
      "loss": 2.9647,
      "step": 126497
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.149569511413574,
      "learning_rate": 0.00025397346815851185,
      "loss": 2.8496,
      "step": 126498
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.271411418914795,
      "learning_rate": 0.0002539694260296169,
      "loss": 3.01,
      "step": 126499
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9562010765075684,
      "learning_rate": 0.00025396538390927995,
      "loss": 2.8537,
      "step": 126500
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7638945579528809,
      "learning_rate": 0.00025396134179750184,
      "loss": 3.1425,
      "step": 126501
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.7126314640045166,
      "learning_rate": 0.00025395729969428303,
      "loss": 2.8394,
      "step": 126502
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4314470291137695,
      "learning_rate": 0.0002539532575996244,
      "loss": 3.0642,
      "step": 126503
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1458346843719482,
      "learning_rate": 0.0002539492155135268,
      "loss": 3.0582,
      "step": 126504
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.313302516937256,
      "learning_rate": 0.0002539451734359909,
      "loss": 2.7883,
      "step": 126505
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.205620765686035,
      "learning_rate": 0.0002539411313670174,
      "loss": 2.8817,
      "step": 126506
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.641080856323242,
      "learning_rate": 0.0002539370893066072,
      "loss": 3.1319,
      "step": 126507
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.5384669303894043,
      "learning_rate": 0.000253933047254761,
      "loss": 2.9458,
      "step": 126508
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1103873252868652,
      "learning_rate": 0.0002539290052114794,
      "loss": 2.9484,
      "step": 126509
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4980123043060303,
      "learning_rate": 0.00025392496317676327,
      "loss": 3.309,
      "step": 126510
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.991398811340332,
      "learning_rate": 0.0002539209211506134,
      "loss": 3.0716,
      "step": 126511
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.488393545150757,
      "learning_rate": 0.00025391687913303045,
      "loss": 2.9748,
      "step": 126512
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3710927963256836,
      "learning_rate": 0.00025391283712401525,
      "loss": 3.1817,
      "step": 126513
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7903225421905518,
      "learning_rate": 0.0002539087951235687,
      "loss": 2.892,
      "step": 126514
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.965742588043213,
      "learning_rate": 0.0002539047531316912,
      "loss": 2.6811,
      "step": 126515
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.042858839035034,
      "learning_rate": 0.00025390071114838366,
      "loss": 3.0736,
      "step": 126516
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.108964204788208,
      "learning_rate": 0.00025389666917364687,
      "loss": 3.2038,
      "step": 126517
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2997231483459473,
      "learning_rate": 0.00025389262720748156,
      "loss": 2.8356,
      "step": 126518
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.068678140640259,
      "learning_rate": 0.00025388858524988845,
      "loss": 2.9545,
      "step": 126519
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2163503170013428,
      "learning_rate": 0.0002538845433008685,
      "loss": 2.9099,
      "step": 126520
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.7972896099090576,
      "learning_rate": 0.00025388050136042215,
      "loss": 3.2099,
      "step": 126521
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.969558596611023,
      "learning_rate": 0.00025387645942855026,
      "loss": 2.9556,
      "step": 126522
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.190720319747925,
      "learning_rate": 0.0002538724175052536,
      "loss": 2.8829,
      "step": 126523
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8776096105575562,
      "learning_rate": 0.00025386837559053293,
      "loss": 3.0297,
      "step": 126524
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0982415676116943,
      "learning_rate": 0.00025386433368438906,
      "loss": 2.9978,
      "step": 126525
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.6711803674697876,
      "learning_rate": 0.00025386029178682274,
      "loss": 3.1069,
      "step": 126526
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8221244812011719,
      "learning_rate": 0.0002538562498978346,
      "loss": 3.0529,
      "step": 126527
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.068789005279541,
      "learning_rate": 0.0002538522080174254,
      "loss": 3.1209,
      "step": 126528
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8396706581115723,
      "learning_rate": 0.0002538481661455959,
      "loss": 3.0668,
      "step": 126529
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1230173110961914,
      "learning_rate": 0.00025384412428234703,
      "loss": 2.7903,
      "step": 126530
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.909934639930725,
      "learning_rate": 0.0002538400824276793,
      "loss": 3.182,
      "step": 126531
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.653998851776123,
      "learning_rate": 0.00025383604058159365,
      "loss": 3.1279,
      "step": 126532
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.285878896713257,
      "learning_rate": 0.00025383199874409073,
      "loss": 2.8711,
      "step": 126533
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9338090419769287,
      "learning_rate": 0.0002538279569151713,
      "loss": 2.9166,
      "step": 126534
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.051546573638916,
      "learning_rate": 0.00025382391509483614,
      "loss": 3.0174,
      "step": 126535
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0920937061309814,
      "learning_rate": 0.000253819873283086,
      "loss": 3.1366,
      "step": 126536
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.934519052505493,
      "learning_rate": 0.00025381583147992154,
      "loss": 2.9224,
      "step": 126537
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.5317118167877197,
      "learning_rate": 0.0002538117896853437,
      "loss": 2.7795,
      "step": 126538
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.105320930480957,
      "learning_rate": 0.000253807747899353,
      "loss": 2.9737,
      "step": 126539
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.7691140174865723,
      "learning_rate": 0.0002538037061219504,
      "loss": 3.184,
      "step": 126540
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2463865280151367,
      "learning_rate": 0.00025379966435313655,
      "loss": 3.0389,
      "step": 126541
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3145833015441895,
      "learning_rate": 0.00025379562259291225,
      "loss": 2.8221,
      "step": 126542
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4552416801452637,
      "learning_rate": 0.00025379158084127813,
      "loss": 3.0644,
      "step": 126543
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2442305088043213,
      "learning_rate": 0.00025378753909823513,
      "loss": 3.1074,
      "step": 126544
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1210813522338867,
      "learning_rate": 0.0002537834973637838,
      "loss": 3.1088,
      "step": 126545
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.444937229156494,
      "learning_rate": 0.000253779455637925,
      "loss": 2.879,
      "step": 126546
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8007135391235352,
      "learning_rate": 0.00025377541392065944,
      "loss": 2.7972,
      "step": 126547
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2440900802612305,
      "learning_rate": 0.00025377137221198803,
      "loss": 3.0418,
      "step": 126548
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9793848991394043,
      "learning_rate": 0.0002537673305119113,
      "loss": 2.9346,
      "step": 126549
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8417062759399414,
      "learning_rate": 0.0002537632888204302,
      "loss": 3.021,
      "step": 126550
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7686657905578613,
      "learning_rate": 0.0002537592471375453,
      "loss": 3.3159,
      "step": 126551
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.5915114879608154,
      "learning_rate": 0.0002537552054632574,
      "loss": 2.7783,
      "step": 126552
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0448431968688965,
      "learning_rate": 0.0002537511637975673,
      "loss": 2.8201,
      "step": 126553
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.7322258949279785,
      "learning_rate": 0.00025374712214047565,
      "loss": 2.8962,
      "step": 126554
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0558412075042725,
      "learning_rate": 0.00025374308049198337,
      "loss": 3.0733,
      "step": 126555
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.161172389984131,
      "learning_rate": 0.00025373903885209125,
      "loss": 3.1001,
      "step": 126556
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.091691732406616,
      "learning_rate": 0.0002537349972207998,
      "loss": 2.9571,
      "step": 126557
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.652226448059082,
      "learning_rate": 0.00025373095559810984,
      "loss": 2.7405,
      "step": 126558
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8614745140075684,
      "learning_rate": 0.00025372691398402216,
      "loss": 2.9597,
      "step": 126559
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.9234135150909424,
      "learning_rate": 0.0002537228723785376,
      "loss": 3.158,
      "step": 126560
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3819589614868164,
      "learning_rate": 0.00025371883078165674,
      "loss": 3.0129,
      "step": 126561
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9367512464523315,
      "learning_rate": 0.0002537147891933806,
      "loss": 3.0352,
      "step": 126562
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.134953498840332,
      "learning_rate": 0.00025371074761370964,
      "loss": 3.0548,
      "step": 126563
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.203115224838257,
      "learning_rate": 0.0002537067060426447,
      "loss": 2.9839,
      "step": 126564
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.037285327911377,
      "learning_rate": 0.00025370266448018655,
      "loss": 2.9965,
      "step": 126565
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.564419984817505,
      "learning_rate": 0.00025369862292633596,
      "loss": 3.0639,
      "step": 126566
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2698559761047363,
      "learning_rate": 0.00025369458138109366,
      "loss": 2.945,
      "step": 126567
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.9813766479492188,
      "learning_rate": 0.0002536905398444606,
      "loss": 3.1295,
      "step": 126568
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.6961370706558228,
      "learning_rate": 0.0002536864983164371,
      "loss": 2.5967,
      "step": 126569
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2090508937835693,
      "learning_rate": 0.0002536824567970242,
      "loss": 3.0704,
      "step": 126570
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3575074672698975,
      "learning_rate": 0.0002536784152862227,
      "loss": 3.1312,
      "step": 126571
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0787041187286377,
      "learning_rate": 0.00025367437378403314,
      "loss": 3.0437,
      "step": 126572
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8459542989730835,
      "learning_rate": 0.00025367033229045644,
      "loss": 3.0526,
      "step": 126573
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0329926013946533,
      "learning_rate": 0.0002536662908054934,
      "loss": 3.0532,
      "step": 126574
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.124770402908325,
      "learning_rate": 0.00025366224932914455,
      "loss": 2.9573,
      "step": 126575
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.048255681991577,
      "learning_rate": 0.0002536582078614108,
      "loss": 3.0248,
      "step": 126576
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.06899356842041,
      "learning_rate": 0.00025365416640229285,
      "loss": 3.0963,
      "step": 126577
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0331766605377197,
      "learning_rate": 0.00025365012495179144,
      "loss": 2.9036,
      "step": 126578
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1109516620635986,
      "learning_rate": 0.0002536460835099073,
      "loss": 2.9235,
      "step": 126579
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8925293684005737,
      "learning_rate": 0.0002536420420766413,
      "loss": 2.8837,
      "step": 126580
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8319348096847534,
      "learning_rate": 0.0002536380006519943,
      "loss": 2.9796,
      "step": 126581
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.931859016418457,
      "learning_rate": 0.0002536339592359666,
      "loss": 3.0832,
      "step": 126582
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7444818019866943,
      "learning_rate": 0.0002536299178285593,
      "loss": 2.8443,
      "step": 126583
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8359380960464478,
      "learning_rate": 0.0002536258764297731,
      "loss": 3.0864,
      "step": 126584
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0058248043060303,
      "learning_rate": 0.00025362183503960874,
      "loss": 3.0711,
      "step": 126585
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.955580711364746,
      "learning_rate": 0.00025361779365806693,
      "loss": 3.0044,
      "step": 126586
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9460489749908447,
      "learning_rate": 0.00025361375228514854,
      "loss": 2.7435,
      "step": 126587
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4178593158721924,
      "learning_rate": 0.0002536097109208541,
      "loss": 3.0822,
      "step": 126588
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0445802211761475,
      "learning_rate": 0.0002536056695651845,
      "loss": 3.1538,
      "step": 126589
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.138589859008789,
      "learning_rate": 0.0002536016282181405,
      "loss": 3.1543,
      "step": 126590
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.940506935119629,
      "learning_rate": 0.0002535975868797228,
      "loss": 2.7407,
      "step": 126591
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4474785327911377,
      "learning_rate": 0.00025359354554993225,
      "loss": 2.9314,
      "step": 126592
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2136611938476562,
      "learning_rate": 0.0002535895042287696,
      "loss": 2.9123,
      "step": 126593
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.292036533355713,
      "learning_rate": 0.0002535854629162355,
      "loss": 3.0152,
      "step": 126594
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3458638191223145,
      "learning_rate": 0.0002535814216123306,
      "loss": 3.0086,
      "step": 126595
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.730526089668274,
      "learning_rate": 0.0002535773803170559,
      "loss": 3.1611,
      "step": 126596
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.6684978008270264,
      "learning_rate": 0.00025357333903041197,
      "loss": 2.9849,
      "step": 126597
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9841338396072388,
      "learning_rate": 0.0002535692977523997,
      "loss": 3.1422,
      "step": 126598
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8026998043060303,
      "learning_rate": 0.00025356525648301986,
      "loss": 3.226,
      "step": 126599
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.483699083328247,
      "learning_rate": 0.000253561215222273,
      "loss": 3.058,
      "step": 126600
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.220594644546509,
      "learning_rate": 0.00025355717397016,
      "loss": 2.8083,
      "step": 126601
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.693700075149536,
      "learning_rate": 0.00025355313272668154,
      "loss": 3.0267,
      "step": 126602
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.774076223373413,
      "learning_rate": 0.0002535490914918385,
      "loss": 2.998,
      "step": 126603
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3043973445892334,
      "learning_rate": 0.00025354505026563154,
      "loss": 2.7985,
      "step": 126604
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3712127208709717,
      "learning_rate": 0.00025354100904806154,
      "loss": 3.0513,
      "step": 126605
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.198045253753662,
      "learning_rate": 0.000253536967839129,
      "loss": 3.061,
      "step": 126606
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4062087535858154,
      "learning_rate": 0.00025353292663883483,
      "loss": 3.01,
      "step": 126607
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4644758701324463,
      "learning_rate": 0.0002535288854471798,
      "loss": 2.9243,
      "step": 126608
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.165787696838379,
      "learning_rate": 0.0002535248442641646,
      "loss": 3.0416,
      "step": 126609
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.7538087368011475,
      "learning_rate": 0.00025352080308979006,
      "loss": 3.1261,
      "step": 126610
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.6914178133010864,
      "learning_rate": 0.0002535167619240569,
      "loss": 3.0064,
      "step": 126611
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.022397518157959,
      "learning_rate": 0.0002535127207669658,
      "loss": 3.0643,
      "step": 126612
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0828423500061035,
      "learning_rate": 0.00025350867961851755,
      "loss": 2.8499,
      "step": 126613
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9284249544143677,
      "learning_rate": 0.00025350463847871287,
      "loss": 2.6639,
      "step": 126614
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9112358093261719,
      "learning_rate": 0.00025350059734755264,
      "loss": 2.9498,
      "step": 126615
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2980120182037354,
      "learning_rate": 0.00025349655622503746,
      "loss": 3.1109,
      "step": 126616
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.260044813156128,
      "learning_rate": 0.00025349251511116826,
      "loss": 3.0834,
      "step": 126617
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3372437953948975,
      "learning_rate": 0.00025348847400594567,
      "loss": 2.9763,
      "step": 126618
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3479676246643066,
      "learning_rate": 0.00025348443290937035,
      "loss": 3.034,
      "step": 126619
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4856204986572266,
      "learning_rate": 0.0002534803918214432,
      "loss": 2.6813,
      "step": 126620
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.481006383895874,
      "learning_rate": 0.00025347635074216484,
      "loss": 3.0558,
      "step": 126621
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.8231282234191895,
      "learning_rate": 0.0002534723096715362,
      "loss": 2.7657,
      "step": 126622
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1853809356689453,
      "learning_rate": 0.00025346826860955796,
      "loss": 3.1667,
      "step": 126623
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.300243616104126,
      "learning_rate": 0.0002534642275562308,
      "loss": 3.0037,
      "step": 126624
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3732056617736816,
      "learning_rate": 0.0002534601865115555,
      "loss": 3.2873,
      "step": 126625
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9524939060211182,
      "learning_rate": 0.0002534561454755329,
      "loss": 3.0762,
      "step": 126626
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8979358673095703,
      "learning_rate": 0.0002534521044481636,
      "loss": 2.8862,
      "step": 126627
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.334218978881836,
      "learning_rate": 0.00025344806342944846,
      "loss": 3.0848,
      "step": 126628
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.001296043395996,
      "learning_rate": 0.00025344402241938825,
      "loss": 2.8564,
      "step": 126629
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2124624252319336,
      "learning_rate": 0.0002534399814179836,
      "loss": 2.8606,
      "step": 126630
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.2346322536468506,
      "learning_rate": 0.00025343594042523534,
      "loss": 3.2073,
      "step": 126631
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.367154121398926,
      "learning_rate": 0.00025343189944114427,
      "loss": 2.9028,
      "step": 126632
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.035862922668457,
      "learning_rate": 0.0002534278584657111,
      "loss": 3.0537,
      "step": 126633
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2934272289276123,
      "learning_rate": 0.0002534238174989365,
      "loss": 2.9531,
      "step": 126634
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.3318660259246826,
      "learning_rate": 0.0002534197765408214,
      "loss": 3.0317,
      "step": 126635
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9271767139434814,
      "learning_rate": 0.00025341573559136635,
      "loss": 3.0934,
      "step": 126636
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.141631603240967,
      "learning_rate": 0.0002534116946505722,
      "loss": 2.9422,
      "step": 126637
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.894615650177002,
      "learning_rate": 0.00025340765371843966,
      "loss": 2.9862,
      "step": 126638
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.270526170730591,
      "learning_rate": 0.0002534036127949696,
      "loss": 3.0324,
      "step": 126639
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2645232677459717,
      "learning_rate": 0.00025339957188016266,
      "loss": 3.0325,
      "step": 126640
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.4688751697540283,
      "learning_rate": 0.00025339553097401964,
      "loss": 3.0141,
      "step": 126641
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.036472797393799,
      "learning_rate": 0.00025339149007654125,
      "loss": 3.2208,
      "step": 126642
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4457218647003174,
      "learning_rate": 0.00025338744918772824,
      "loss": 3.0142,
      "step": 126643
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.890037178993225,
      "learning_rate": 0.00025338340830758136,
      "loss": 2.9944,
      "step": 126644
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8127473592758179,
      "learning_rate": 0.00025337936743610143,
      "loss": 2.8852,
      "step": 126645
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.272231340408325,
      "learning_rate": 0.0002533753265732891,
      "loss": 2.8339,
      "step": 126646
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9240906238555908,
      "learning_rate": 0.0002533712857191453,
      "loss": 2.8546,
      "step": 126647
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9558756351470947,
      "learning_rate": 0.00025336724487367066,
      "loss": 2.9712,
      "step": 126648
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.243804693222046,
      "learning_rate": 0.0002533632040368658,
      "loss": 2.7728,
      "step": 126649
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.806638240814209,
      "learning_rate": 0.0002533591632087316,
      "loss": 3.1296,
      "step": 126650
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0050101280212402,
      "learning_rate": 0.0002533551223892689,
      "loss": 2.682,
      "step": 126651
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7069381475448608,
      "learning_rate": 0.0002533510815784783,
      "loss": 3.0789,
      "step": 126652
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.989098072052002,
      "learning_rate": 0.0002533470407763606,
      "loss": 2.7797,
      "step": 126653
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.6436519622802734,
      "learning_rate": 0.0002533429999829167,
      "loss": 2.9799,
      "step": 126654
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1592953205108643,
      "learning_rate": 0.00025333895919814716,
      "loss": 2.8696,
      "step": 126655
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1101317405700684,
      "learning_rate": 0.00025333491842205273,
      "loss": 3.0033,
      "step": 126656
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.144564151763916,
      "learning_rate": 0.0002533308776546342,
      "loss": 2.9456,
      "step": 126657
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.147252321243286,
      "learning_rate": 0.0002533268368958924,
      "loss": 2.8336,
      "step": 126658
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.210387706756592,
      "learning_rate": 0.00025332279614582797,
      "loss": 2.9694,
      "step": 126659
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7892611026763916,
      "learning_rate": 0.0002533187554044419,
      "loss": 2.9663,
      "step": 126660
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3890771865844727,
      "learning_rate": 0.00025331471467173453,
      "loss": 2.5443,
      "step": 126661
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8277785778045654,
      "learning_rate": 0.0002533106739477069,
      "loss": 3.1902,
      "step": 126662
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.436323404312134,
      "learning_rate": 0.0002533066332323597,
      "loss": 2.7087,
      "step": 126663
    },
    {
      "epoch": 1.65,
      "grad_norm": 4.270082950592041,
      "learning_rate": 0.00025330259252569367,
      "loss": 2.916,
      "step": 126664
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.068863868713379,
      "learning_rate": 0.00025329855182770956,
      "loss": 2.984,
      "step": 126665
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.290085554122925,
      "learning_rate": 0.0002532945111384083,
      "loss": 2.9683,
      "step": 126666
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4165635108947754,
      "learning_rate": 0.00025329047045779033,
      "loss": 3.1906,
      "step": 126667
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.5742592811584473,
      "learning_rate": 0.0002532864297858565,
      "loss": 2.9822,
      "step": 126668
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.717470169067383,
      "learning_rate": 0.00025328238912260764,
      "loss": 2.9385,
      "step": 126669
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.150405168533325,
      "learning_rate": 0.0002532783484680444,
      "loss": 3.0171,
      "step": 126670
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.7374579906463623,
      "learning_rate": 0.00025327430782216766,
      "loss": 2.8909,
      "step": 126671
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0502231121063232,
      "learning_rate": 0.0002532702671849783,
      "loss": 3.0641,
      "step": 126672
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3219377994537354,
      "learning_rate": 0.00025326622655647663,
      "loss": 2.8602,
      "step": 126673
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7271963357925415,
      "learning_rate": 0.0002532621859366637,
      "loss": 3.0749,
      "step": 126674
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.2284317016601562,
      "learning_rate": 0.0002532581453255402,
      "loss": 2.9368,
      "step": 126675
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.066617965698242,
      "learning_rate": 0.00025325410472310693,
      "loss": 3.0696,
      "step": 126676
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.056933641433716,
      "learning_rate": 0.00025325006412936453,
      "loss": 3.003,
      "step": 126677
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3179807662963867,
      "learning_rate": 0.00025324602354431406,
      "loss": 3.0197,
      "step": 126678
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0333189964294434,
      "learning_rate": 0.00025324198296795586,
      "loss": 3.0061,
      "step": 126679
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.8046953678131104,
      "learning_rate": 0.0002532379424002908,
      "loss": 2.9748,
      "step": 126680
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.98214852809906,
      "learning_rate": 0.0002532339018413198,
      "loss": 2.9199,
      "step": 126681
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.872910976409912,
      "learning_rate": 0.0002532298612910434,
      "loss": 2.8996,
      "step": 126682
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.417426109313965,
      "learning_rate": 0.00025322582074946247,
      "loss": 3.2292,
      "step": 126683
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.34181547164917,
      "learning_rate": 0.0002532217802165779,
      "loss": 2.9959,
      "step": 126684
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8927457332611084,
      "learning_rate": 0.00025321773969239016,
      "loss": 3.0202,
      "step": 126685
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4419186115264893,
      "learning_rate": 0.00025321369917690015,
      "loss": 2.8359,
      "step": 126686
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.6495230197906494,
      "learning_rate": 0.00025320965867010853,
      "loss": 3.1039,
      "step": 126687
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2758514881134033,
      "learning_rate": 0.0002532056181720161,
      "loss": 2.8753,
      "step": 126688
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.081286907196045,
      "learning_rate": 0.00025320157768262373,
      "loss": 3.0777,
      "step": 126689
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.318866014480591,
      "learning_rate": 0.0002531975372019321,
      "loss": 3.0173,
      "step": 126690
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8804901838302612,
      "learning_rate": 0.0002531934967299419,
      "loss": 2.7902,
      "step": 126691
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.2822465896606445,
      "learning_rate": 0.0002531894562666538,
      "loss": 2.7606,
      "step": 126692
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4461307525634766,
      "learning_rate": 0.00025318541581206876,
      "loss": 2.9752,
      "step": 126693
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.511291980743408,
      "learning_rate": 0.0002531813753661874,
      "loss": 3.1056,
      "step": 126694
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4689486026763916,
      "learning_rate": 0.0002531773349290105,
      "loss": 2.9334,
      "step": 126695
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4266209602355957,
      "learning_rate": 0.00025317329450053896,
      "loss": 3.1075,
      "step": 126696
    },
    {
      "epoch": 1.65,
      "grad_norm": 4.335949420928955,
      "learning_rate": 0.00025316925408077327,
      "loss": 2.9915,
      "step": 126697
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4481382369995117,
      "learning_rate": 0.00025316521366971427,
      "loss": 3.1266,
      "step": 126698
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4385406970977783,
      "learning_rate": 0.00025316117326736276,
      "loss": 2.9529,
      "step": 126699
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.831413507461548,
      "learning_rate": 0.0002531571328737194,
      "loss": 2.835,
      "step": 126700
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1160430908203125,
      "learning_rate": 0.0002531530924887851,
      "loss": 3.0039,
      "step": 126701
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.018923759460449,
      "learning_rate": 0.00025314905211256056,
      "loss": 2.8946,
      "step": 126702
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8797342777252197,
      "learning_rate": 0.00025314501174504653,
      "loss": 3.0446,
      "step": 126703
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2082061767578125,
      "learning_rate": 0.00025314097138624364,
      "loss": 2.7689,
      "step": 126704
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3093338012695312,
      "learning_rate": 0.0002531369310361527,
      "loss": 3.0902,
      "step": 126705
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.522777795791626,
      "learning_rate": 0.0002531328906947745,
      "loss": 3.1114,
      "step": 126706
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.413816213607788,
      "learning_rate": 0.0002531288503621098,
      "loss": 2.8593,
      "step": 126707
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.8672256469726562,
      "learning_rate": 0.0002531248100381594,
      "loss": 3.0253,
      "step": 126708
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.670313835144043,
      "learning_rate": 0.00025312076972292387,
      "loss": 2.9081,
      "step": 126709
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2896862030029297,
      "learning_rate": 0.0002531167294164042,
      "loss": 3.0129,
      "step": 126710
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.175663948059082,
      "learning_rate": 0.0002531126891186009,
      "loss": 3.2863,
      "step": 126711
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9386159181594849,
      "learning_rate": 0.0002531086488295149,
      "loss": 3.0535,
      "step": 126712
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.5543675422668457,
      "learning_rate": 0.0002531046085491468,
      "loss": 3.1525,
      "step": 126713
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.826445460319519,
      "learning_rate": 0.0002531005682774975,
      "loss": 3.0848,
      "step": 126714
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9170199632644653,
      "learning_rate": 0.00025309652801456774,
      "loss": 2.7959,
      "step": 126715
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.742348074913025,
      "learning_rate": 0.00025309248776035815,
      "loss": 2.9486,
      "step": 126716
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1439521312713623,
      "learning_rate": 0.00025308844751486956,
      "loss": 3.0563,
      "step": 126717
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.6516010761260986,
      "learning_rate": 0.0002530844072781028,
      "loss": 2.9496,
      "step": 126718
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0557475090026855,
      "learning_rate": 0.0002530803670500584,
      "loss": 3.0446,
      "step": 126719
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4391722679138184,
      "learning_rate": 0.0002530763268307373,
      "loss": 3.001,
      "step": 126720
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7708712816238403,
      "learning_rate": 0.00025307228662014024,
      "loss": 2.9941,
      "step": 126721
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.1459832191467285,
      "learning_rate": 0.0002530682464182679,
      "loss": 3.0052,
      "step": 126722
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.135608673095703,
      "learning_rate": 0.000253064206225121,
      "loss": 2.7048,
      "step": 126723
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8776882886886597,
      "learning_rate": 0.0002530601660407004,
      "loss": 3.1324,
      "step": 126724
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0134804248809814,
      "learning_rate": 0.0002530561258650069,
      "loss": 2.9941,
      "step": 126725
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.286248207092285,
      "learning_rate": 0.000253052085698041,
      "loss": 3.1293,
      "step": 126726
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1457860469818115,
      "learning_rate": 0.0002530480455398037,
      "loss": 2.93,
      "step": 126727
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7383347749710083,
      "learning_rate": 0.0002530440053902956,
      "loss": 3.0853,
      "step": 126728
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.412179470062256,
      "learning_rate": 0.0002530399652495175,
      "loss": 3.1778,
      "step": 126729
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.984845757484436,
      "learning_rate": 0.00025303592511747013,
      "loss": 3.2901,
      "step": 126730
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2866907119750977,
      "learning_rate": 0.0002530318849941543,
      "loss": 2.9757,
      "step": 126731
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8917101621627808,
      "learning_rate": 0.0002530278448795708,
      "loss": 3.1347,
      "step": 126732
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7584831714630127,
      "learning_rate": 0.00025302380477372033,
      "loss": 3.1111,
      "step": 126733
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.149238109588623,
      "learning_rate": 0.0002530197646766035,
      "loss": 3.1339,
      "step": 126734
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1734325885772705,
      "learning_rate": 0.0002530157245882212,
      "loss": 3.2924,
      "step": 126735
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9282289743423462,
      "learning_rate": 0.0002530116845085742,
      "loss": 2.7233,
      "step": 126736
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7235093116760254,
      "learning_rate": 0.00025300764443766314,
      "loss": 2.948,
      "step": 126737
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2206907272338867,
      "learning_rate": 0.0002530036043754889,
      "loss": 2.7703,
      "step": 126738
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.257753372192383,
      "learning_rate": 0.00025299956432205237,
      "loss": 3.1518,
      "step": 126739
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.612847089767456,
      "learning_rate": 0.00025299552427735384,
      "loss": 2.836,
      "step": 126740
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.5166828632354736,
      "learning_rate": 0.0002529914842413944,
      "loss": 3.0094,
      "step": 126741
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.195734739303589,
      "learning_rate": 0.00025298744421417476,
      "loss": 2.7956,
      "step": 126742
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.998125433921814,
      "learning_rate": 0.0002529834041956956,
      "loss": 2.7512,
      "step": 126743
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.144324541091919,
      "learning_rate": 0.0002529793641859577,
      "loss": 3.1573,
      "step": 126744
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.59005069732666,
      "learning_rate": 0.000252975324184962,
      "loss": 3.0692,
      "step": 126745
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9046125411987305,
      "learning_rate": 0.0002529712841927089,
      "loss": 3.0159,
      "step": 126746
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.863506555557251,
      "learning_rate": 0.00025296724420919935,
      "loss": 2.8889,
      "step": 126747
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9419727325439453,
      "learning_rate": 0.0002529632042344341,
      "loss": 2.9162,
      "step": 126748
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.227933168411255,
      "learning_rate": 0.00025295916426841384,
      "loss": 3.1853,
      "step": 126749
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9167338609695435,
      "learning_rate": 0.00025295512431113936,
      "loss": 2.9624,
      "step": 126750
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0198287963867188,
      "learning_rate": 0.0002529510843626116,
      "loss": 3.006,
      "step": 126751
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2425637245178223,
      "learning_rate": 0.0002529470444228309,
      "loss": 2.8805,
      "step": 126752
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.576939821243286,
      "learning_rate": 0.0002529430044917983,
      "loss": 2.8887,
      "step": 126753
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.237030267715454,
      "learning_rate": 0.00025293896456951445,
      "loss": 2.8574,
      "step": 126754
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.168123960494995,
      "learning_rate": 0.00025293492465598016,
      "loss": 2.9444,
      "step": 126755
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0807552337646484,
      "learning_rate": 0.00025293088475119614,
      "loss": 2.7446,
      "step": 126756
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3536579608917236,
      "learning_rate": 0.0002529268448551633,
      "loss": 2.908,
      "step": 126757
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.5129618644714355,
      "learning_rate": 0.0002529228049678821,
      "loss": 3.0032,
      "step": 126758
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.7514023780822754,
      "learning_rate": 0.00025291876508935345,
      "loss": 3.0732,
      "step": 126759
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.5035321712493896,
      "learning_rate": 0.0002529147252195781,
      "loss": 2.68,
      "step": 126760
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.34377384185791,
      "learning_rate": 0.00025291068535855676,
      "loss": 3.0676,
      "step": 126761
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.032633066177368,
      "learning_rate": 0.00025290664550629025,
      "loss": 2.9574,
      "step": 126762
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.9149577617645264,
      "learning_rate": 0.00025290260566277937,
      "loss": 3.1172,
      "step": 126763
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.947788715362549,
      "learning_rate": 0.00025289856582802467,
      "loss": 3.0748,
      "step": 126764
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.048870086669922,
      "learning_rate": 0.000252894526002027,
      "loss": 2.9326,
      "step": 126765
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.8645670413970947,
      "learning_rate": 0.00025289048618478717,
      "loss": 2.9803,
      "step": 126766
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9456214904785156,
      "learning_rate": 0.00025288644637630584,
      "loss": 2.9471,
      "step": 126767
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.121039867401123,
      "learning_rate": 0.0002528824065765839,
      "loss": 2.8712,
      "step": 126768
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.6992151737213135,
      "learning_rate": 0.00025287836678562206,
      "loss": 3.0889,
      "step": 126769
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1928741931915283,
      "learning_rate": 0.0002528743270034209,
      "loss": 2.9746,
      "step": 126770
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4024105072021484,
      "learning_rate": 0.0002528702872299813,
      "loss": 2.8786,
      "step": 126771
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0816762447357178,
      "learning_rate": 0.000252866247465304,
      "loss": 2.9485,
      "step": 126772
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.4451189041137695,
      "learning_rate": 0.00025286220770938975,
      "loss": 2.9833,
      "step": 126773
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.830430746078491,
      "learning_rate": 0.00025285816796223934,
      "loss": 2.887,
      "step": 126774
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.767865777015686,
      "learning_rate": 0.0002528541282238536,
      "loss": 3.0534,
      "step": 126775
    },
    {
      "epoch": 1.65,
      "grad_norm": 4.2497992515563965,
      "learning_rate": 0.000252850088494233,
      "loss": 3.0286,
      "step": 126776
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.6244983673095703,
      "learning_rate": 0.0002528460487733785,
      "loss": 3.0814,
      "step": 126777
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1507768630981445,
      "learning_rate": 0.00025284200906129076,
      "loss": 3.1421,
      "step": 126778
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.272205114364624,
      "learning_rate": 0.00025283796935797063,
      "loss": 2.8555,
      "step": 126779
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.816256284713745,
      "learning_rate": 0.00025283392966341883,
      "loss": 3.0789,
      "step": 126780
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1542704105377197,
      "learning_rate": 0.000252829889977636,
      "loss": 3.1694,
      "step": 126781
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.6578441858291626,
      "learning_rate": 0.00025282585030062324,
      "loss": 3.0378,
      "step": 126782
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.264430284500122,
      "learning_rate": 0.0002528218106323808,
      "loss": 2.6432,
      "step": 126783
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.1255106925964355,
      "learning_rate": 0.0002528177709729097,
      "loss": 2.9343,
      "step": 126784
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9760477542877197,
      "learning_rate": 0.00025281373132221075,
      "loss": 2.9546,
      "step": 126785
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9422590732574463,
      "learning_rate": 0.00025280969168028455,
      "loss": 3.0006,
      "step": 126786
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9956146478652954,
      "learning_rate": 0.00025280565204713194,
      "loss": 3.0343,
      "step": 126787
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1013920307159424,
      "learning_rate": 0.00025280161242275377,
      "loss": 2.895,
      "step": 126788
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.082918882369995,
      "learning_rate": 0.00025279757280715053,
      "loss": 2.8885,
      "step": 126789
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9672908782958984,
      "learning_rate": 0.0002527935332003231,
      "loss": 2.9759,
      "step": 126790
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0127670764923096,
      "learning_rate": 0.0002527894936022723,
      "loss": 3.286,
      "step": 126791
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.008439540863037,
      "learning_rate": 0.0002527854540129988,
      "loss": 3.2338,
      "step": 126792
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9511396884918213,
      "learning_rate": 0.0002527814144325034,
      "loss": 2.9893,
      "step": 126793
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.004031181335449,
      "learning_rate": 0.0002527773748607868,
      "loss": 2.8609,
      "step": 126794
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7791417837142944,
      "learning_rate": 0.00025277333529784984,
      "loss": 3.0448,
      "step": 126795
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.5515365600585938,
      "learning_rate": 0.00025276929574369317,
      "loss": 3.0778,
      "step": 126796
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2778778076171875,
      "learning_rate": 0.0002527652561983175,
      "loss": 3.0958,
      "step": 126797
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.191046714782715,
      "learning_rate": 0.0002527612166617237,
      "loss": 2.8735,
      "step": 126798
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7351760864257812,
      "learning_rate": 0.0002527571771339125,
      "loss": 2.9903,
      "step": 126799
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.868903398513794,
      "learning_rate": 0.0002527531376148847,
      "loss": 2.9874,
      "step": 126800
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.1466543674468994,
      "learning_rate": 0.0002527490981046409,
      "loss": 3.0776,
      "step": 126801
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2395622730255127,
      "learning_rate": 0.000252745058603182,
      "loss": 2.9067,
      "step": 126802
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0969676971435547,
      "learning_rate": 0.00025274101911050865,
      "loss": 2.8515,
      "step": 126803
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7451164722442627,
      "learning_rate": 0.0002527369796266216,
      "loss": 3.1589,
      "step": 126804
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3470349311828613,
      "learning_rate": 0.00025273294015152166,
      "loss": 3.0283,
      "step": 126805
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8109924793243408,
      "learning_rate": 0.0002527289006852096,
      "loss": 3.0453,
      "step": 126806
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1609418392181396,
      "learning_rate": 0.0002527248612276861,
      "loss": 2.7806,
      "step": 126807
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.136463165283203,
      "learning_rate": 0.0002527208217789519,
      "loss": 2.7379,
      "step": 126808
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4769439697265625,
      "learning_rate": 0.00025271678233900786,
      "loss": 2.8959,
      "step": 126809
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.720684289932251,
      "learning_rate": 0.00025271274290785463,
      "loss": 3.1758,
      "step": 126810
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.7945668697357178,
      "learning_rate": 0.000252708703485493,
      "loss": 3.0725,
      "step": 126811
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9451478719711304,
      "learning_rate": 0.0002527046640719237,
      "loss": 3.0866,
      "step": 126812
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1292996406555176,
      "learning_rate": 0.0002527006246671475,
      "loss": 3.0682,
      "step": 126813
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.195328712463379,
      "learning_rate": 0.00025269658527116514,
      "loss": 3.0698,
      "step": 126814
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.6851067543029785,
      "learning_rate": 0.0002526925458839774,
      "loss": 3.0031,
      "step": 126815
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.17195725440979,
      "learning_rate": 0.00025268850650558497,
      "loss": 3.0855,
      "step": 126816
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0606353282928467,
      "learning_rate": 0.0002526844671359887,
      "loss": 3.0367,
      "step": 126817
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.041987657546997,
      "learning_rate": 0.0002526804277751893,
      "loss": 2.9144,
      "step": 126818
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.566070556640625,
      "learning_rate": 0.00025267638842318746,
      "loss": 2.8964,
      "step": 126819
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.180154323577881,
      "learning_rate": 0.00025267234907998395,
      "loss": 2.7024,
      "step": 126820
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8816832304000854,
      "learning_rate": 0.00025266830974557955,
      "loss": 3.1143,
      "step": 126821
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.9516501426696777,
      "learning_rate": 0.000252664270419975,
      "loss": 2.8536,
      "step": 126822
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.521045446395874,
      "learning_rate": 0.00025266023110317103,
      "loss": 2.8563,
      "step": 126823
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8782384395599365,
      "learning_rate": 0.0002526561917951686,
      "loss": 3.1762,
      "step": 126824
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.5640313625335693,
      "learning_rate": 0.0002526521524959681,
      "loss": 2.945,
      "step": 126825
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.7165534496307373,
      "learning_rate": 0.00025264811320557047,
      "loss": 2.7318,
      "step": 126826
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9195399284362793,
      "learning_rate": 0.0002526440739239764,
      "loss": 2.9566,
      "step": 126827
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0298142433166504,
      "learning_rate": 0.0002526400346511868,
      "loss": 2.823,
      "step": 126828
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3341450691223145,
      "learning_rate": 0.0002526359953872022,
      "loss": 3.1351,
      "step": 126829
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.09283447265625,
      "learning_rate": 0.00025263195613202367,
      "loss": 3.0517,
      "step": 126830
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.891859769821167,
      "learning_rate": 0.0002526279168856516,
      "loss": 2.8945,
      "step": 126831
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0931389331817627,
      "learning_rate": 0.0002526238776480869,
      "loss": 2.8495,
      "step": 126832
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.3897581100463867,
      "learning_rate": 0.0002526198384193303,
      "loss": 2.8539,
      "step": 126833
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3989717960357666,
      "learning_rate": 0.0002526157991993826,
      "loss": 2.8271,
      "step": 126834
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.233503580093384,
      "learning_rate": 0.00025261175998824445,
      "loss": 3.0376,
      "step": 126835
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.5110342502593994,
      "learning_rate": 0.00025260772078591686,
      "loss": 3.1774,
      "step": 126836
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.653843641281128,
      "learning_rate": 0.00025260368159240026,
      "loss": 2.9845,
      "step": 126837
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.7876288890838623,
      "learning_rate": 0.0002525996424076955,
      "loss": 3.0251,
      "step": 126838
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2457518577575684,
      "learning_rate": 0.0002525956032318034,
      "loss": 3.0451,
      "step": 126839
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3977692127227783,
      "learning_rate": 0.00025259156406472465,
      "loss": 3.0165,
      "step": 126840
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8462039232254028,
      "learning_rate": 0.00025258752490646,
      "loss": 2.9746,
      "step": 126841
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.014155149459839,
      "learning_rate": 0.00025258348575701037,
      "loss": 3.2304,
      "step": 126842
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8700757026672363,
      "learning_rate": 0.00025257944661637627,
      "loss": 3.0481,
      "step": 126843
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7768280506134033,
      "learning_rate": 0.0002525754074845585,
      "loss": 2.9492,
      "step": 126844
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.327626943588257,
      "learning_rate": 0.0002525713683615579,
      "loss": 3.0085,
      "step": 126845
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.01525616645813,
      "learning_rate": 0.0002525673292473751,
      "loss": 2.9158,
      "step": 126846
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.6985924243927,
      "learning_rate": 0.000252563290142011,
      "loss": 2.9846,
      "step": 126847
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.6577911376953125,
      "learning_rate": 0.0002525592510454662,
      "loss": 3.0525,
      "step": 126848
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.8614869117736816,
      "learning_rate": 0.0002525552119577418,
      "loss": 2.7082,
      "step": 126849
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7624671459197998,
      "learning_rate": 0.00025255117287883804,
      "loss": 2.9436,
      "step": 126850
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9505696296691895,
      "learning_rate": 0.00025254713380875595,
      "loss": 2.7594,
      "step": 126851
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8290048837661743,
      "learning_rate": 0.00025254309474749624,
      "loss": 2.753,
      "step": 126852
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8246599435806274,
      "learning_rate": 0.00025253905569505965,
      "loss": 3.1905,
      "step": 126853
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.252941846847534,
      "learning_rate": 0.00025253501665144695,
      "loss": 3.1087,
      "step": 126854
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.071591377258301,
      "learning_rate": 0.00025253097761665906,
      "loss": 2.9886,
      "step": 126855
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.81928288936615,
      "learning_rate": 0.00025252693859069636,
      "loss": 3.0518,
      "step": 126856
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.831688165664673,
      "learning_rate": 0.00025252289957355983,
      "loss": 2.8909,
      "step": 126857
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.0514657497406006,
      "learning_rate": 0.0002525188605652502,
      "loss": 3.0328,
      "step": 126858
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0869948863983154,
      "learning_rate": 0.0002525148215657682,
      "loss": 3.0779,
      "step": 126859
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.9241082668304443,
      "learning_rate": 0.0002525107825751146,
      "loss": 2.8836,
      "step": 126860
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.0902202129364014,
      "learning_rate": 0.00025250674359329024,
      "loss": 2.752,
      "step": 126861
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.138540029525757,
      "learning_rate": 0.0002525027046202957,
      "loss": 2.9068,
      "step": 126862
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.418076515197754,
      "learning_rate": 0.0002524986656561317,
      "loss": 3.0808,
      "step": 126863
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.210569143295288,
      "learning_rate": 0.00025249462670079914,
      "loss": 2.8087,
      "step": 126864
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.877263069152832,
      "learning_rate": 0.00025249058775429875,
      "loss": 2.9062,
      "step": 126865
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.387026309967041,
      "learning_rate": 0.0002524865488166313,
      "loss": 3.2003,
      "step": 126866
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0707437992095947,
      "learning_rate": 0.00025248250988779756,
      "loss": 3.2432,
      "step": 126867
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.5197153091430664,
      "learning_rate": 0.0002524784709677981,
      "loss": 2.8434,
      "step": 126868
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.6123714447021484,
      "learning_rate": 0.00025247443205663383,
      "loss": 3.1348,
      "step": 126869
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.504601240158081,
      "learning_rate": 0.0002524703931543054,
      "loss": 2.9913,
      "step": 126870
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.265334367752075,
      "learning_rate": 0.0002524663542608137,
      "loss": 2.9686,
      "step": 126871
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.322938919067383,
      "learning_rate": 0.00025246231537615933,
      "loss": 2.8818,
      "step": 126872
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.1129374504089355,
      "learning_rate": 0.00025245827650034324,
      "loss": 2.9893,
      "step": 126873
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.419942617416382,
      "learning_rate": 0.00025245423763336594,
      "loss": 3.2375,
      "step": 126874
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8243664503097534,
      "learning_rate": 0.0002524501987752283,
      "loss": 3.0768,
      "step": 126875
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.559152841567993,
      "learning_rate": 0.0002524461599259311,
      "loss": 3.1772,
      "step": 126876
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.6439156532287598,
      "learning_rate": 0.000252442121085475,
      "loss": 2.8718,
      "step": 126877
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.9385178089141846,
      "learning_rate": 0.00025243808225386085,
      "loss": 2.8492,
      "step": 126878
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1122875213623047,
      "learning_rate": 0.0002524340434310894,
      "loss": 3.0399,
      "step": 126879
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9585275650024414,
      "learning_rate": 0.00025243000461716136,
      "loss": 2.9071,
      "step": 126880
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9878939390182495,
      "learning_rate": 0.0002524259658120774,
      "loss": 3.2651,
      "step": 126881
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.5845630168914795,
      "learning_rate": 0.0002524219270158384,
      "loss": 3.1124,
      "step": 126882
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.6229051351547241,
      "learning_rate": 0.00025241788822844507,
      "loss": 2.9154,
      "step": 126883
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.868218183517456,
      "learning_rate": 0.0002524138494498981,
      "loss": 3.1309,
      "step": 126884
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.335963487625122,
      "learning_rate": 0.00025240981068019836,
      "loss": 2.9569,
      "step": 126885
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8868334293365479,
      "learning_rate": 0.0002524057719193465,
      "loss": 2.9595,
      "step": 126886
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.761274814605713,
      "learning_rate": 0.0002524017331673433,
      "loss": 3.0372,
      "step": 126887
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9289058446884155,
      "learning_rate": 0.00025239769442418955,
      "loss": 3.0997,
      "step": 126888
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1927666664123535,
      "learning_rate": 0.0002523936556898859,
      "loss": 3.1466,
      "step": 126889
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.740394353866577,
      "learning_rate": 0.0002523896169644332,
      "loss": 2.9115,
      "step": 126890
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1284265518188477,
      "learning_rate": 0.00025238557824783226,
      "loss": 2.8712,
      "step": 126891
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.193284034729004,
      "learning_rate": 0.00025238153954008365,
      "loss": 2.982,
      "step": 126892
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9944599866867065,
      "learning_rate": 0.0002523775008411882,
      "loss": 2.9644,
      "step": 126893
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.482309341430664,
      "learning_rate": 0.00025237346215114674,
      "loss": 2.7832,
      "step": 126894
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0159952640533447,
      "learning_rate": 0.00025236942346995985,
      "loss": 3.0079,
      "step": 126895
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.365544080734253,
      "learning_rate": 0.00025236538479762844,
      "loss": 3.1128,
      "step": 126896
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.6384265422821045,
      "learning_rate": 0.00025236134613415323,
      "loss": 2.9901,
      "step": 126897
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.030323028564453,
      "learning_rate": 0.0002523573074795349,
      "loss": 2.9024,
      "step": 126898
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.081193447113037,
      "learning_rate": 0.0002523532688337743,
      "loss": 2.7131,
      "step": 126899
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8528027534484863,
      "learning_rate": 0.000252349230196872,
      "loss": 3.0928,
      "step": 126900
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9781835079193115,
      "learning_rate": 0.000252345191568829,
      "loss": 3.1171,
      "step": 126901
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8762061595916748,
      "learning_rate": 0.0002523411529496459,
      "loss": 3.0823,
      "step": 126902
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.107093334197998,
      "learning_rate": 0.00025233711433932354,
      "loss": 2.9075,
      "step": 126903
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0612218379974365,
      "learning_rate": 0.0002523330757378625,
      "loss": 2.968,
      "step": 126904
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3200883865356445,
      "learning_rate": 0.0002523290371452637,
      "loss": 3.087,
      "step": 126905
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.170004367828369,
      "learning_rate": 0.0002523249985615278,
      "loss": 2.8969,
      "step": 126906
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.8711652755737305,
      "learning_rate": 0.0002523209599866556,
      "loss": 2.9409,
      "step": 126907
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8928732872009277,
      "learning_rate": 0.0002523169214206478,
      "loss": 3.2193,
      "step": 126908
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.059774398803711,
      "learning_rate": 0.00025231288286350526,
      "loss": 3.2483,
      "step": 126909
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.051420211791992,
      "learning_rate": 0.0002523088443152286,
      "loss": 2.8718,
      "step": 126910
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.019709587097168,
      "learning_rate": 0.00025230480577581864,
      "loss": 3.2339,
      "step": 126911
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2343358993530273,
      "learning_rate": 0.0002523007672452761,
      "loss": 3.0367,
      "step": 126912
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.648792266845703,
      "learning_rate": 0.00025229672872360175,
      "loss": 2.8213,
      "step": 126913
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.6735849380493164,
      "learning_rate": 0.00025229269021079633,
      "loss": 2.9143,
      "step": 126914
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0762014389038086,
      "learning_rate": 0.00025228865170686066,
      "loss": 3.0066,
      "step": 126915
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8860924243927002,
      "learning_rate": 0.0002522846132117955,
      "loss": 3.2341,
      "step": 126916
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8134769201278687,
      "learning_rate": 0.00025228057472560133,
      "loss": 2.9083,
      "step": 126917
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7939373254776,
      "learning_rate": 0.0002522765362482792,
      "loss": 2.9865,
      "step": 126918
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7382750511169434,
      "learning_rate": 0.0002522724977798297,
      "loss": 2.9244,
      "step": 126919
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2559659481048584,
      "learning_rate": 0.0002522684593202537,
      "loss": 2.8422,
      "step": 126920
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.970235586166382,
      "learning_rate": 0.0002522644208695519,
      "loss": 3.0205,
      "step": 126921
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3535001277923584,
      "learning_rate": 0.00025226038242772515,
      "loss": 3.2031,
      "step": 126922
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2073121070861816,
      "learning_rate": 0.000252256343994774,
      "loss": 3.0623,
      "step": 126923
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.114745855331421,
      "learning_rate": 0.00025225230557069923,
      "loss": 3.0592,
      "step": 126924
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3732786178588867,
      "learning_rate": 0.0002522482671555017,
      "loss": 2.8797,
      "step": 126925
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9024080038070679,
      "learning_rate": 0.00025224422874918213,
      "loss": 2.8429,
      "step": 126926
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.091585874557495,
      "learning_rate": 0.0002522401903517412,
      "loss": 2.8281,
      "step": 126927
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.036083936691284,
      "learning_rate": 0.0002522361519631799,
      "loss": 2.8555,
      "step": 126928
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.7947351932525635,
      "learning_rate": 0.00025223211358349864,
      "loss": 3.0442,
      "step": 126929
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9177497625350952,
      "learning_rate": 0.0002522280752126984,
      "loss": 3.1053,
      "step": 126930
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.474754571914673,
      "learning_rate": 0.0002522240368507798,
      "loss": 2.9246,
      "step": 126931
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.429720878601074,
      "learning_rate": 0.00025221999849774367,
      "loss": 2.814,
      "step": 126932
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.697377324104309,
      "learning_rate": 0.00025221596015359077,
      "loss": 3.0772,
      "step": 126933
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.02724289894104,
      "learning_rate": 0.00025221192181832194,
      "loss": 2.8226,
      "step": 126934
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8908474445343018,
      "learning_rate": 0.00025220788349193766,
      "loss": 3.2363,
      "step": 126935
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7270034551620483,
      "learning_rate": 0.00025220384517443884,
      "loss": 3.0927,
      "step": 126936
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1487226486206055,
      "learning_rate": 0.0002521998068658262,
      "loss": 3.0403,
      "step": 126937
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9470888376235962,
      "learning_rate": 0.00025219576856610055,
      "loss": 3.0014,
      "step": 126938
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.0778353214263916,
      "learning_rate": 0.00025219173027526265,
      "loss": 3.0573,
      "step": 126939
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.825134038925171,
      "learning_rate": 0.0002521876919933133,
      "loss": 2.9912,
      "step": 126940
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7315196990966797,
      "learning_rate": 0.00025218365372025307,
      "loss": 3.3067,
      "step": 126941
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.885798215866089,
      "learning_rate": 0.00025217961545608274,
      "loss": 2.9134,
      "step": 126942
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.6487228870391846,
      "learning_rate": 0.00025217557720080315,
      "loss": 3.1055,
      "step": 126943
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.441812753677368,
      "learning_rate": 0.00025217153895441504,
      "loss": 3.1198,
      "step": 126944
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8380107879638672,
      "learning_rate": 0.0002521675007169191,
      "loss": 2.816,
      "step": 126945
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2106001377105713,
      "learning_rate": 0.0002521634624883163,
      "loss": 2.8003,
      "step": 126946
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4104526042938232,
      "learning_rate": 0.00025215942426860707,
      "loss": 2.839,
      "step": 126947
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1028075218200684,
      "learning_rate": 0.0002521553860577923,
      "loss": 2.7631,
      "step": 126948
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8347917795181274,
      "learning_rate": 0.0002521513478558728,
      "loss": 3.1965,
      "step": 126949
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.751307725906372,
      "learning_rate": 0.00025214730966284917,
      "loss": 2.8744,
      "step": 126950
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4239439964294434,
      "learning_rate": 0.0002521432714787223,
      "loss": 3.1546,
      "step": 126951
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8510526418685913,
      "learning_rate": 0.000252139233303493,
      "loss": 2.9732,
      "step": 126952
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.004530668258667,
      "learning_rate": 0.00025213519513716187,
      "loss": 3.0157,
      "step": 126953
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.115277051925659,
      "learning_rate": 0.0002521311569797297,
      "loss": 3.3518,
      "step": 126954
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4524648189544678,
      "learning_rate": 0.0002521271188311972,
      "loss": 3.027,
      "step": 126955
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9489617347717285,
      "learning_rate": 0.0002521230806915652,
      "loss": 2.8868,
      "step": 126956
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.309772253036499,
      "learning_rate": 0.0002521190425608344,
      "loss": 2.7504,
      "step": 126957
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9693119525909424,
      "learning_rate": 0.00025211500443900566,
      "loss": 3.0606,
      "step": 126958
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0281007289886475,
      "learning_rate": 0.0002521109663260796,
      "loss": 2.9109,
      "step": 126959
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.085571050643921,
      "learning_rate": 0.000252106928222057,
      "loss": 2.8269,
      "step": 126960
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0424392223358154,
      "learning_rate": 0.0002521028901269386,
      "loss": 3.199,
      "step": 126961
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7197479009628296,
      "learning_rate": 0.00025209885204072516,
      "loss": 2.9386,
      "step": 126962
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.041778802871704,
      "learning_rate": 0.00025209481396341746,
      "loss": 2.8345,
      "step": 126963
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.6403450965881348,
      "learning_rate": 0.00025209077589501643,
      "loss": 3.1317,
      "step": 126964
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7862399816513062,
      "learning_rate": 0.00025208673783552244,
      "loss": 3.0597,
      "step": 126965
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.176849603652954,
      "learning_rate": 0.0002520826997849365,
      "loss": 3.0072,
      "step": 126966
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.373203992843628,
      "learning_rate": 0.0002520786617432592,
      "loss": 2.9757,
      "step": 126967
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2045211791992188,
      "learning_rate": 0.00025207462371049143,
      "loss": 3.0978,
      "step": 126968
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.6835479736328125,
      "learning_rate": 0.00025207058568663387,
      "loss": 2.9309,
      "step": 126969
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3556416034698486,
      "learning_rate": 0.00025206654767168736,
      "loss": 3.0029,
      "step": 126970
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7938543558120728,
      "learning_rate": 0.00025206250966565256,
      "loss": 3.2725,
      "step": 126971
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.5968854427337646,
      "learning_rate": 0.00025205847166853023,
      "loss": 2.8754,
      "step": 126972
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2759621143341064,
      "learning_rate": 0.00025205443368032113,
      "loss": 3.3417,
      "step": 126973
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.322232246398926,
      "learning_rate": 0.00025205039570102596,
      "loss": 2.8162,
      "step": 126974
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3593406677246094,
      "learning_rate": 0.0002520463577306456,
      "loss": 2.8919,
      "step": 126975
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.401472568511963,
      "learning_rate": 0.00025204231976918076,
      "loss": 3.08,
      "step": 126976
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0224380493164062,
      "learning_rate": 0.0002520382818166321,
      "loss": 2.8413,
      "step": 126977
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.400935649871826,
      "learning_rate": 0.0002520342438730004,
      "loss": 3.0895,
      "step": 126978
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.289144277572632,
      "learning_rate": 0.00025203020593828654,
      "loss": 2.7415,
      "step": 126979
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9601072072982788,
      "learning_rate": 0.00025202616801249105,
      "loss": 3.0288,
      "step": 126980
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.090677499771118,
      "learning_rate": 0.00025202213009561486,
      "loss": 2.7249,
      "step": 126981
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.11649489402771,
      "learning_rate": 0.0002520180921876586,
      "loss": 2.9207,
      "step": 126982
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.261216402053833,
      "learning_rate": 0.00025201405428862324,
      "loss": 3.007,
      "step": 126983
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.01102352142334,
      "learning_rate": 0.0002520100163985092,
      "loss": 3.1892,
      "step": 126984
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.9485535621643066,
      "learning_rate": 0.0002520059785173175,
      "loss": 3.044,
      "step": 126985
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2879605293273926,
      "learning_rate": 0.00025200194064504885,
      "loss": 2.9277,
      "step": 126986
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0163891315460205,
      "learning_rate": 0.00025199790278170384,
      "loss": 3.1706,
      "step": 126987
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1298913955688477,
      "learning_rate": 0.00025199386492728333,
      "loss": 2.8429,
      "step": 126988
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8157777786254883,
      "learning_rate": 0.00025198982708178814,
      "loss": 2.8384,
      "step": 126989
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7793245315551758,
      "learning_rate": 0.00025198578924521886,
      "loss": 2.944,
      "step": 126990
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.106349468231201,
      "learning_rate": 0.00025198175141757635,
      "loss": 2.9869,
      "step": 126991
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2718818187713623,
      "learning_rate": 0.00025197771359886133,
      "loss": 2.8733,
      "step": 126992
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.257290840148926,
      "learning_rate": 0.00025197367578907466,
      "loss": 2.9719,
      "step": 126993
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.5183050632476807,
      "learning_rate": 0.00025196963798821694,
      "loss": 2.8907,
      "step": 126994
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0719339847564697,
      "learning_rate": 0.000251965600196289,
      "loss": 2.9988,
      "step": 126995
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9980326890945435,
      "learning_rate": 0.00025196156241329143,
      "loss": 3.1079,
      "step": 126996
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.356534242630005,
      "learning_rate": 0.00025195752463922516,
      "loss": 2.7823,
      "step": 126997
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.646976947784424,
      "learning_rate": 0.0002519534868740909,
      "loss": 3.0535,
      "step": 126998
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0514378547668457,
      "learning_rate": 0.0002519494491178894,
      "loss": 2.9615,
      "step": 126999
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.42166805267334,
      "learning_rate": 0.00025194541137062143,
      "loss": 3.2096,
      "step": 127000
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2610201835632324,
      "learning_rate": 0.0002519413736322878,
      "loss": 2.8262,
      "step": 127001
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.985338568687439,
      "learning_rate": 0.0002519373359028891,
      "loss": 2.8851,
      "step": 127002
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.193821668624878,
      "learning_rate": 0.0002519332981824261,
      "loss": 2.842,
      "step": 127003
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.15409517288208,
      "learning_rate": 0.00025192926047089964,
      "loss": 2.9029,
      "step": 127004
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.02526593208313,
      "learning_rate": 0.00025192522276831043,
      "loss": 2.9624,
      "step": 127005
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9068288803100586,
      "learning_rate": 0.0002519211850746592,
      "loss": 3.0753,
      "step": 127006
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4624249935150146,
      "learning_rate": 0.0002519171473899469,
      "loss": 3.1073,
      "step": 127007
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.244025945663452,
      "learning_rate": 0.000251913109714174,
      "loss": 3.1941,
      "step": 127008
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9250972270965576,
      "learning_rate": 0.0002519090720473413,
      "loss": 2.8591,
      "step": 127009
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.5824241638183594,
      "learning_rate": 0.00025190503438944964,
      "loss": 3.0782,
      "step": 127010
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.526911973953247,
      "learning_rate": 0.00025190099674049976,
      "loss": 3.0682,
      "step": 127011
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.886765718460083,
      "learning_rate": 0.0002518969591004923,
      "loss": 2.9495,
      "step": 127012
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.046703815460205,
      "learning_rate": 0.00025189292146942836,
      "loss": 2.9521,
      "step": 127013
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2251415252685547,
      "learning_rate": 0.00025188888384730825,
      "loss": 3.0353,
      "step": 127014
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.623018503189087,
      "learning_rate": 0.0002518848462341329,
      "loss": 2.7188,
      "step": 127015
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4682140350341797,
      "learning_rate": 0.00025188080862990306,
      "loss": 2.8643,
      "step": 127016
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.767083168029785,
      "learning_rate": 0.0002518767710346195,
      "loss": 2.8615,
      "step": 127017
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7779603004455566,
      "learning_rate": 0.00025187273344828295,
      "loss": 2.8815,
      "step": 127018
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.6369783878326416,
      "learning_rate": 0.0002518686958708943,
      "loss": 2.9774,
      "step": 127019
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.555704355239868,
      "learning_rate": 0.0002518646583024541,
      "loss": 2.9165,
      "step": 127020
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.091174840927124,
      "learning_rate": 0.00025186062074296307,
      "loss": 2.7544,
      "step": 127021
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1004040241241455,
      "learning_rate": 0.0002518565831924221,
      "loss": 3.0891,
      "step": 127022
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8439593315124512,
      "learning_rate": 0.00025185254565083186,
      "loss": 3.2628,
      "step": 127023
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.409130334854126,
      "learning_rate": 0.0002518485081181932,
      "loss": 3.1491,
      "step": 127024
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.890645980834961,
      "learning_rate": 0.0002518444705945069,
      "loss": 2.9276,
      "step": 127025
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.088972568511963,
      "learning_rate": 0.0002518404330797735,
      "loss": 3.1119,
      "step": 127026
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3896942138671875,
      "learning_rate": 0.0002518363955739939,
      "loss": 3.1343,
      "step": 127027
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.5977160930633545,
      "learning_rate": 0.0002518323580771688,
      "loss": 2.9714,
      "step": 127028
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.2908883094787598,
      "learning_rate": 0.000251828320589299,
      "loss": 2.683,
      "step": 127029
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0405590534210205,
      "learning_rate": 0.00025182428311038517,
      "loss": 2.935,
      "step": 127030
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2542552947998047,
      "learning_rate": 0.0002518202456404283,
      "loss": 2.8884,
      "step": 127031
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.810365676879883,
      "learning_rate": 0.0002518162081794288,
      "loss": 3.1186,
      "step": 127032
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1244447231292725,
      "learning_rate": 0.00025181217072738754,
      "loss": 2.8354,
      "step": 127033
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8958287239074707,
      "learning_rate": 0.00025180813328430537,
      "loss": 3.0263,
      "step": 127034
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.476227045059204,
      "learning_rate": 0.000251804095850183,
      "loss": 3.1919,
      "step": 127035
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3140101432800293,
      "learning_rate": 0.00025180005842502106,
      "loss": 3.1354,
      "step": 127036
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2330543994903564,
      "learning_rate": 0.0002517960210088206,
      "loss": 2.8329,
      "step": 127037
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9818075895309448,
      "learning_rate": 0.00025179198360158203,
      "loss": 3.0638,
      "step": 127038
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.433011054992676,
      "learning_rate": 0.0002517879462033062,
      "loss": 3.0313,
      "step": 127039
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.503124952316284,
      "learning_rate": 0.00025178390881399395,
      "loss": 2.9781,
      "step": 127040
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.128261089324951,
      "learning_rate": 0.000251779871433646,
      "loss": 3.0138,
      "step": 127041
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1046831607818604,
      "learning_rate": 0.00025177583406226303,
      "loss": 3.0951,
      "step": 127042
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.492899179458618,
      "learning_rate": 0.000251771796699846,
      "loss": 2.7407,
      "step": 127043
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.3595306873321533,
      "learning_rate": 0.00025176775934639535,
      "loss": 2.9926,
      "step": 127044
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8466520309448242,
      "learning_rate": 0.000251763722001912,
      "loss": 3.124,
      "step": 127045
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4856128692626953,
      "learning_rate": 0.00025175968466639666,
      "loss": 2.8956,
      "step": 127046
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.8434979915618896,
      "learning_rate": 0.0002517556473398501,
      "loss": 3.1176,
      "step": 127047
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0144593715667725,
      "learning_rate": 0.00025175161002227313,
      "loss": 2.9642,
      "step": 127048
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9218734502792358,
      "learning_rate": 0.00025174757271366645,
      "loss": 3.0793,
      "step": 127049
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.6587586402893066,
      "learning_rate": 0.00025174353541403084,
      "loss": 2.9936,
      "step": 127050
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.078582525253296,
      "learning_rate": 0.000251739498123367,
      "loss": 2.9401,
      "step": 127051
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2055344581604004,
      "learning_rate": 0.00025173546084167564,
      "loss": 3.2486,
      "step": 127052
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0949559211730957,
      "learning_rate": 0.00025173142356895757,
      "loss": 3.0835,
      "step": 127053
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0012550354003906,
      "learning_rate": 0.00025172738630521355,
      "loss": 2.7703,
      "step": 127054
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1637654304504395,
      "learning_rate": 0.00025172334905044434,
      "loss": 3.1423,
      "step": 127055
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.941052794456482,
      "learning_rate": 0.00025171931180465075,
      "loss": 2.9078,
      "step": 127056
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.321441888809204,
      "learning_rate": 0.0002517152745678334,
      "loss": 2.9816,
      "step": 127057
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2116150856018066,
      "learning_rate": 0.00025171123733999304,
      "loss": 2.9301,
      "step": 127058
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9780685901641846,
      "learning_rate": 0.00025170720012113044,
      "loss": 3.1331,
      "step": 127059
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7090152502059937,
      "learning_rate": 0.00025170316291124644,
      "loss": 2.9998,
      "step": 127060
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7057499885559082,
      "learning_rate": 0.0002516991257103417,
      "loss": 3.1516,
      "step": 127061
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2734861373901367,
      "learning_rate": 0.0002516950885184171,
      "loss": 2.8127,
      "step": 127062
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9963722229003906,
      "learning_rate": 0.00025169105133547327,
      "loss": 3.1948,
      "step": 127063
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9895697832107544,
      "learning_rate": 0.0002516870141615109,
      "loss": 2.9975,
      "step": 127064
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0183732509613037,
      "learning_rate": 0.00025168297699653086,
      "loss": 3.04,
      "step": 127065
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.569615602493286,
      "learning_rate": 0.0002516789398405339,
      "loss": 3.0804,
      "step": 127066
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0953052043914795,
      "learning_rate": 0.00025167490269352064,
      "loss": 2.9392,
      "step": 127067
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.286566972732544,
      "learning_rate": 0.0002516708655554921,
      "loss": 2.9973,
      "step": 127068
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9266999959945679,
      "learning_rate": 0.0002516668284264487,
      "loss": 2.8882,
      "step": 127069
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.332968235015869,
      "learning_rate": 0.00025166279130639144,
      "loss": 3.1827,
      "step": 127070
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2424538135528564,
      "learning_rate": 0.000251658754195321,
      "loss": 2.8665,
      "step": 127071
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.489222764968872,
      "learning_rate": 0.000251654717093238,
      "loss": 2.8201,
      "step": 127072
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.7343674898147583,
      "learning_rate": 0.00025165068000014334,
      "loss": 3.236,
      "step": 127073
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1610612869262695,
      "learning_rate": 0.0002516466429160378,
      "loss": 2.9439,
      "step": 127074
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.114098072052002,
      "learning_rate": 0.000251642605840922,
      "loss": 2.729,
      "step": 127075
    },
    {
      "epoch": 1.65,
      "grad_norm": 4.128629207611084,
      "learning_rate": 0.00025163856877479673,
      "loss": 2.9255,
      "step": 127076
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.34193754196167,
      "learning_rate": 0.00025163453171766283,
      "loss": 2.8833,
      "step": 127077
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.8850363492965698,
      "learning_rate": 0.000251630494669521,
      "loss": 2.9147,
      "step": 127078
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.470184564590454,
      "learning_rate": 0.00025162645763037193,
      "loss": 3.1832,
      "step": 127079
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.7457218170166016,
      "learning_rate": 0.00025162242060021645,
      "loss": 3.143,
      "step": 127080
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.347571611404419,
      "learning_rate": 0.00025161838357905524,
      "loss": 3.227,
      "step": 127081
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.125037670135498,
      "learning_rate": 0.000251614346566889,
      "loss": 2.8017,
      "step": 127082
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.120178461074829,
      "learning_rate": 0.0002516103095637186,
      "loss": 2.9543,
      "step": 127083
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.434438705444336,
      "learning_rate": 0.00025160627256954487,
      "loss": 2.8104,
      "step": 127084
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.1541130542755127,
      "learning_rate": 0.0002516022355843684,
      "loss": 3.1516,
      "step": 127085
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0508642196655273,
      "learning_rate": 0.00025159819860819,
      "loss": 3.0264,
      "step": 127086
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.418948173522949,
      "learning_rate": 0.00025159416164101037,
      "loss": 3.1144,
      "step": 127087
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.4727389812469482,
      "learning_rate": 0.0002515901246828303,
      "loss": 2.9369,
      "step": 127088
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.2098166942596436,
      "learning_rate": 0.0002515860877336505,
      "loss": 3.0863,
      "step": 127089
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.470252752304077,
      "learning_rate": 0.00025158205079347184,
      "loss": 3.0153,
      "step": 127090
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.0156891345977783,
      "learning_rate": 0.00025157801386229496,
      "loss": 3.1954,
      "step": 127091
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.1168437004089355,
      "learning_rate": 0.00025157397694012076,
      "loss": 3.2242,
      "step": 127092
    },
    {
      "epoch": 1.65,
      "grad_norm": 4.445827007293701,
      "learning_rate": 0.0002515699400269497,
      "loss": 2.862,
      "step": 127093
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.553621292114258,
      "learning_rate": 0.00025156590312278276,
      "loss": 2.7988,
      "step": 127094
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.1218924522399902,
      "learning_rate": 0.0002515618662276206,
      "loss": 3.1238,
      "step": 127095
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4867897033691406,
      "learning_rate": 0.000251557829341464,
      "loss": 2.9332,
      "step": 127096
    },
    {
      "epoch": 1.65,
      "grad_norm": 4.2455363273620605,
      "learning_rate": 0.0002515537924643137,
      "loss": 2.9215,
      "step": 127097
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.61849045753479,
      "learning_rate": 0.00025154975559617065,
      "loss": 3.1894,
      "step": 127098
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.882790446281433,
      "learning_rate": 0.00025154571873703527,
      "loss": 3.2952,
      "step": 127099
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.314790964126587,
      "learning_rate": 0.0002515416818869084,
      "loss": 2.8993,
      "step": 127100
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.8551008701324463,
      "learning_rate": 0.0002515376450457909,
      "loss": 3.0582,
      "step": 127101
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.4353184700012207,
      "learning_rate": 0.0002515336082136835,
      "loss": 2.7898,
      "step": 127102
    },
    {
      "epoch": 1.65,
      "grad_norm": 1.9361323118209839,
      "learning_rate": 0.0002515295713905868,
      "loss": 3.059,
      "step": 127103
    },
    {
      "epoch": 1.65,
      "grad_norm": 3.3753087520599365,
      "learning_rate": 0.00025152553457650194,
      "loss": 2.9543,
      "step": 127104
    },
    {
      "epoch": 1.65,
      "grad_norm": 2.9185166358947754,
      "learning_rate": 0.00025152149777142916,
      "loss": 2.8478,
      "step": 127105
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2788867950439453,
      "learning_rate": 0.0002515174609753695,
      "loss": 2.9369,
      "step": 127106
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.666637420654297,
      "learning_rate": 0.00025151342418832366,
      "loss": 2.955,
      "step": 127107
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.081831693649292,
      "learning_rate": 0.00025150938741029234,
      "loss": 3.1152,
      "step": 127108
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3078668117523193,
      "learning_rate": 0.0002515053506412764,
      "loss": 3.2715,
      "step": 127109
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1324310302734375,
      "learning_rate": 0.00025150131388127666,
      "loss": 2.9187,
      "step": 127110
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.505495071411133,
      "learning_rate": 0.00025149727713029353,
      "loss": 3.069,
      "step": 127111
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.6671125888824463,
      "learning_rate": 0.00025149324038832804,
      "loss": 3.0499,
      "step": 127112
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.135856866836548,
      "learning_rate": 0.0002514892036553809,
      "loss": 2.8046,
      "step": 127113
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9774447679519653,
      "learning_rate": 0.0002514851669314528,
      "loss": 3.1662,
      "step": 127114
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8458069562911987,
      "learning_rate": 0.0002514811302165445,
      "loss": 3.0659,
      "step": 127115
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.245361566543579,
      "learning_rate": 0.0002514770935106568,
      "loss": 3.1202,
      "step": 127116
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1168150901794434,
      "learning_rate": 0.00025147305681379054,
      "loss": 2.8573,
      "step": 127117
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7477329969406128,
      "learning_rate": 0.00025146902012594625,
      "loss": 2.9135,
      "step": 127118
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7970302104949951,
      "learning_rate": 0.0002514649834471247,
      "loss": 3.2398,
      "step": 127119
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.225132703781128,
      "learning_rate": 0.00025146094677732685,
      "loss": 3.0723,
      "step": 127120
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.758516550064087,
      "learning_rate": 0.00025145691011655325,
      "loss": 2.8881,
      "step": 127121
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.914650559425354,
      "learning_rate": 0.0002514528734648047,
      "loss": 3.0073,
      "step": 127122
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7970638275146484,
      "learning_rate": 0.0002514488368220822,
      "loss": 3.0808,
      "step": 127123
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2902536392211914,
      "learning_rate": 0.0002514448001883861,
      "loss": 2.9899,
      "step": 127124
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5747182369232178,
      "learning_rate": 0.0002514407635637173,
      "loss": 3.3946,
      "step": 127125
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.961469292640686,
      "learning_rate": 0.0002514367269480766,
      "loss": 2.8894,
      "step": 127126
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.928806185722351,
      "learning_rate": 0.00025143269034146477,
      "loss": 2.9389,
      "step": 127127
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5284347534179688,
      "learning_rate": 0.0002514286537438825,
      "loss": 2.9599,
      "step": 127128
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0308949947357178,
      "learning_rate": 0.0002514246171553307,
      "loss": 2.9018,
      "step": 127129
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0361053943634033,
      "learning_rate": 0.0002514205805758098,
      "loss": 2.8154,
      "step": 127130
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1485135555267334,
      "learning_rate": 0.00025141654400532074,
      "loss": 3.0122,
      "step": 127131
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.6723132133483887,
      "learning_rate": 0.0002514125074438643,
      "loss": 3.2182,
      "step": 127132
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9712793827056885,
      "learning_rate": 0.0002514084708914412,
      "loss": 3.1526,
      "step": 127133
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.549940586090088,
      "learning_rate": 0.0002514044343480521,
      "loss": 3.0397,
      "step": 127134
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.0094258785247803,
      "learning_rate": 0.00025140039781369804,
      "loss": 2.9637,
      "step": 127135
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.30306077003479,
      "learning_rate": 0.0002513963612883794,
      "loss": 2.9661,
      "step": 127136
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.7976090908050537,
      "learning_rate": 0.0002513923247720971,
      "loss": 2.9741,
      "step": 127137
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.811767339706421,
      "learning_rate": 0.0002513882882648519,
      "loss": 3.0804,
      "step": 127138
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1432271003723145,
      "learning_rate": 0.0002513842517666445,
      "loss": 3.1551,
      "step": 127139
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.906189203262329,
      "learning_rate": 0.0002513802152774757,
      "loss": 2.9411,
      "step": 127140
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.572373867034912,
      "learning_rate": 0.0002513761787973464,
      "loss": 2.9364,
      "step": 127141
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.849853038787842,
      "learning_rate": 0.000251372142326257,
      "loss": 2.9834,
      "step": 127142
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8472081422805786,
      "learning_rate": 0.0002513681058642085,
      "loss": 3.0429,
      "step": 127143
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3366596698760986,
      "learning_rate": 0.0002513640694112015,
      "loss": 2.9177,
      "step": 127144
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0003092288970947,
      "learning_rate": 0.0002513600329672369,
      "loss": 2.8471,
      "step": 127145
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9409619569778442,
      "learning_rate": 0.00025135599653231536,
      "loss": 3.0132,
      "step": 127146
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.947057843208313,
      "learning_rate": 0.0002513519601064378,
      "loss": 2.7511,
      "step": 127147
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.793333888053894,
      "learning_rate": 0.00025134792368960475,
      "loss": 3.1258,
      "step": 127148
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3997645378112793,
      "learning_rate": 0.000251343887281817,
      "loss": 3.1009,
      "step": 127149
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7796603441238403,
      "learning_rate": 0.0002513398508830753,
      "loss": 2.7222,
      "step": 127150
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3213884830474854,
      "learning_rate": 0.0002513358144933805,
      "loss": 2.7245,
      "step": 127151
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.236562490463257,
      "learning_rate": 0.00025133177811273325,
      "loss": 2.7185,
      "step": 127152
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.9683926105499268,
      "learning_rate": 0.0002513277417411344,
      "loss": 3.2448,
      "step": 127153
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9222625494003296,
      "learning_rate": 0.00025132370537858466,
      "loss": 3.0036,
      "step": 127154
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.984166145324707,
      "learning_rate": 0.00025131966902508475,
      "loss": 2.8106,
      "step": 127155
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5138580799102783,
      "learning_rate": 0.00025131563268063537,
      "loss": 3.25,
      "step": 127156
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.9539906978607178,
      "learning_rate": 0.00025131159634523734,
      "loss": 2.9283,
      "step": 127157
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.575223922729492,
      "learning_rate": 0.00025130756001889143,
      "loss": 3.1771,
      "step": 127158
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8373982906341553,
      "learning_rate": 0.0002513035237015984,
      "loss": 2.9526,
      "step": 127159
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3608734607696533,
      "learning_rate": 0.00025129948739335893,
      "loss": 2.8935,
      "step": 127160
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.2690491676330566,
      "learning_rate": 0.00025129545109417375,
      "loss": 2.8636,
      "step": 127161
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.415745973587036,
      "learning_rate": 0.0002512914148040437,
      "loss": 3.1724,
      "step": 127162
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9410054683685303,
      "learning_rate": 0.0002512873785229696,
      "loss": 2.9051,
      "step": 127163
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1084365844726562,
      "learning_rate": 0.00025128334225095196,
      "loss": 2.9304,
      "step": 127164
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.627091646194458,
      "learning_rate": 0.00025127930598799176,
      "loss": 3.0747,
      "step": 127165
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.7889857292175293,
      "learning_rate": 0.0002512752697340896,
      "loss": 2.858,
      "step": 127166
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.20369815826416,
      "learning_rate": 0.00025127123348924623,
      "loss": 2.9389,
      "step": 127167
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.8068697452545166,
      "learning_rate": 0.00025126719725346254,
      "loss": 2.7227,
      "step": 127168
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.121392250061035,
      "learning_rate": 0.0002512631610267392,
      "loss": 3.023,
      "step": 127169
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.746142864227295,
      "learning_rate": 0.00025125912480907696,
      "loss": 3.0931,
      "step": 127170
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.1073429584503174,
      "learning_rate": 0.0002512550886004766,
      "loss": 2.9768,
      "step": 127171
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9408023357391357,
      "learning_rate": 0.00025125105240093876,
      "loss": 3.051,
      "step": 127172
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.070479154586792,
      "learning_rate": 0.0002512470162104643,
      "loss": 2.8802,
      "step": 127173
    },
    {
      "epoch": 1.66,
      "grad_norm": 4.551516056060791,
      "learning_rate": 0.0002512429800290539,
      "loss": 2.899,
      "step": 127174
    },
    {
      "epoch": 1.66,
      "grad_norm": 4.639544486999512,
      "learning_rate": 0.0002512389438567084,
      "loss": 2.95,
      "step": 127175
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.348118305206299,
      "learning_rate": 0.0002512349076934284,
      "loss": 2.9283,
      "step": 127176
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.464876174926758,
      "learning_rate": 0.000251230871539215,
      "loss": 2.7504,
      "step": 127177
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.855189323425293,
      "learning_rate": 0.0002512268353940685,
      "loss": 2.9227,
      "step": 127178
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.440239191055298,
      "learning_rate": 0.00025122279925798987,
      "loss": 2.9426,
      "step": 127179
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9390462636947632,
      "learning_rate": 0.0002512187631309799,
      "loss": 2.825,
      "step": 127180
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9175529479980469,
      "learning_rate": 0.0002512147270130392,
      "loss": 3.0014,
      "step": 127181
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.182286500930786,
      "learning_rate": 0.00025121069090416865,
      "loss": 3.0008,
      "step": 127182
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8774452209472656,
      "learning_rate": 0.0002512066548043689,
      "loss": 3.0304,
      "step": 127183
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9976766109466553,
      "learning_rate": 0.00025120261871364093,
      "loss": 2.9877,
      "step": 127184
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9748870134353638,
      "learning_rate": 0.0002511985826319852,
      "loss": 3.1911,
      "step": 127185
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2129669189453125,
      "learning_rate": 0.00025119454655940255,
      "loss": 2.9353,
      "step": 127186
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.632698655128479,
      "learning_rate": 0.00025119051049589375,
      "loss": 2.8719,
      "step": 127187
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9226953983306885,
      "learning_rate": 0.00025118647444145957,
      "loss": 3.0449,
      "step": 127188
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.473296642303467,
      "learning_rate": 0.00025118243839610075,
      "loss": 2.8795,
      "step": 127189
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.635361433029175,
      "learning_rate": 0.00025117840235981817,
      "loss": 3.0709,
      "step": 127190
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.224273443222046,
      "learning_rate": 0.00025117436633261225,
      "loss": 2.9801,
      "step": 127191
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2145118713378906,
      "learning_rate": 0.000251170330314484,
      "loss": 2.9412,
      "step": 127192
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0475099086761475,
      "learning_rate": 0.00025116629430543407,
      "loss": 2.9979,
      "step": 127193
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8986146450042725,
      "learning_rate": 0.0002511622583054633,
      "loss": 2.7683,
      "step": 127194
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9821317195892334,
      "learning_rate": 0.0002511582223145724,
      "loss": 3.0037,
      "step": 127195
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9323447942733765,
      "learning_rate": 0.00025115418633276217,
      "loss": 2.8636,
      "step": 127196
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.6408233642578125,
      "learning_rate": 0.0002511501503600332,
      "loss": 2.974,
      "step": 127197
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.933095932006836,
      "learning_rate": 0.0002511461143963863,
      "loss": 3.2356,
      "step": 127198
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.2378103733062744,
      "learning_rate": 0.0002511420784418223,
      "loss": 2.9693,
      "step": 127199
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.5656185150146484,
      "learning_rate": 0.00025113804249634193,
      "loss": 2.9831,
      "step": 127200
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.0697691440582275,
      "learning_rate": 0.00025113400655994586,
      "loss": 3.0415,
      "step": 127201
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.291473388671875,
      "learning_rate": 0.0002511299706326351,
      "loss": 2.9905,
      "step": 127202
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.25567626953125,
      "learning_rate": 0.00025112593471441003,
      "loss": 3.0659,
      "step": 127203
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.782425880432129,
      "learning_rate": 0.00025112189880527157,
      "loss": 2.9503,
      "step": 127204
    },
    {
      "epoch": 1.66,
      "grad_norm": 6.008720874786377,
      "learning_rate": 0.0002511178629052205,
      "loss": 2.8128,
      "step": 127205
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.244788408279419,
      "learning_rate": 0.0002511138270142575,
      "loss": 3.0889,
      "step": 127206
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.44901442527771,
      "learning_rate": 0.0002511097911323834,
      "loss": 3.0265,
      "step": 127207
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.2380425930023193,
      "learning_rate": 0.000251105755259599,
      "loss": 2.8702,
      "step": 127208
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.301391839981079,
      "learning_rate": 0.0002511017193959049,
      "loss": 3.1718,
      "step": 127209
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8248579502105713,
      "learning_rate": 0.00025109768354130183,
      "loss": 2.9061,
      "step": 127210
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.8304784297943115,
      "learning_rate": 0.0002510936476957907,
      "loss": 3.138,
      "step": 127211
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8538401126861572,
      "learning_rate": 0.0002510896118593721,
      "loss": 2.8955,
      "step": 127212
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.4418554306030273,
      "learning_rate": 0.00025108557603204694,
      "loss": 3.1096,
      "step": 127213
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0282983779907227,
      "learning_rate": 0.000251081540213816,
      "loss": 3.2091,
      "step": 127214
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9836176633834839,
      "learning_rate": 0.0002510775044046798,
      "loss": 3.1595,
      "step": 127215
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8838220834732056,
      "learning_rate": 0.00025107346860463924,
      "loss": 2.8867,
      "step": 127216
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9676569700241089,
      "learning_rate": 0.00025106943281369496,
      "loss": 3.158,
      "step": 127217
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0033907890319824,
      "learning_rate": 0.0002510653970318479,
      "loss": 2.8994,
      "step": 127218
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.521806240081787,
      "learning_rate": 0.00025106136125909866,
      "loss": 2.852,
      "step": 127219
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.016726493835449,
      "learning_rate": 0.00025105732549544815,
      "loss": 2.9487,
      "step": 127220
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1902148723602295,
      "learning_rate": 0.0002510532897408969,
      "loss": 3.05,
      "step": 127221
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2387397289276123,
      "learning_rate": 0.00025104925399544576,
      "loss": 2.8638,
      "step": 127222
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2835493087768555,
      "learning_rate": 0.00025104521825909545,
      "loss": 3.1769,
      "step": 127223
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2884747982025146,
      "learning_rate": 0.0002510411825318468,
      "loss": 3.1665,
      "step": 127224
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0675113201141357,
      "learning_rate": 0.00025103714681370054,
      "loss": 2.9902,
      "step": 127225
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0714528560638428,
      "learning_rate": 0.0002510331111046575,
      "loss": 3.2028,
      "step": 127226
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.2240123748779297,
      "learning_rate": 0.0002510290754047182,
      "loss": 2.9327,
      "step": 127227
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8037967681884766,
      "learning_rate": 0.00025102503971388354,
      "loss": 2.9376,
      "step": 127228
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.6557698249816895,
      "learning_rate": 0.0002510210040321542,
      "loss": 2.9356,
      "step": 127229
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1479852199554443,
      "learning_rate": 0.000251016968359531,
      "loss": 2.8868,
      "step": 127230
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.081284999847412,
      "learning_rate": 0.0002510129326960147,
      "loss": 2.851,
      "step": 127231
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5824368000030518,
      "learning_rate": 0.0002510088970416061,
      "loss": 3.0352,
      "step": 127232
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9165459871292114,
      "learning_rate": 0.00025100486139630583,
      "loss": 3.0329,
      "step": 127233
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.8574836254119873,
      "learning_rate": 0.0002510008257601146,
      "loss": 2.9576,
      "step": 127234
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.7018513679504395,
      "learning_rate": 0.0002509967901330333,
      "loss": 3.0131,
      "step": 127235
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.143033504486084,
      "learning_rate": 0.00025099275451506253,
      "loss": 3.0123,
      "step": 127236
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.2142748832702637,
      "learning_rate": 0.00025098871890620323,
      "loss": 3.0567,
      "step": 127237
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.064312219619751,
      "learning_rate": 0.0002509846833064561,
      "loss": 3.034,
      "step": 127238
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.6824592351913452,
      "learning_rate": 0.0002509806477158217,
      "loss": 2.7797,
      "step": 127239
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.6973249912261963,
      "learning_rate": 0.00025097661213430107,
      "loss": 2.9564,
      "step": 127240
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.722041130065918,
      "learning_rate": 0.0002509725765618947,
      "loss": 2.8639,
      "step": 127241
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.208034038543701,
      "learning_rate": 0.00025096854099860346,
      "loss": 2.8627,
      "step": 127242
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9539711475372314,
      "learning_rate": 0.0002509645054444281,
      "loss": 3.0524,
      "step": 127243
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.0534956455230713,
      "learning_rate": 0.00025096046989936947,
      "loss": 2.6906,
      "step": 127244
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.227172613143921,
      "learning_rate": 0.00025095643436342813,
      "loss": 3.0501,
      "step": 127245
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.276297092437744,
      "learning_rate": 0.0002509523988366049,
      "loss": 3.0418,
      "step": 127246
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.205282211303711,
      "learning_rate": 0.0002509483633189006,
      "loss": 2.8641,
      "step": 127247
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1086971759796143,
      "learning_rate": 0.00025094432781031585,
      "loss": 3.0323,
      "step": 127248
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5635433197021484,
      "learning_rate": 0.00025094029231085153,
      "loss": 3.0509,
      "step": 127249
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.210603713989258,
      "learning_rate": 0.00025093625682050827,
      "loss": 3.1118,
      "step": 127250
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.4793920516967773,
      "learning_rate": 0.000250932221339287,
      "loss": 3.0703,
      "step": 127251
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3275320529937744,
      "learning_rate": 0.00025092818586718827,
      "loss": 3.0279,
      "step": 127252
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.8347530364990234,
      "learning_rate": 0.0002509241504042129,
      "loss": 2.8631,
      "step": 127253
    },
    {
      "epoch": 1.66,
      "grad_norm": 4.223409652709961,
      "learning_rate": 0.00025092011495036175,
      "loss": 2.9953,
      "step": 127254
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.397531032562256,
      "learning_rate": 0.00025091607950563543,
      "loss": 2.9666,
      "step": 127255
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.307382345199585,
      "learning_rate": 0.0002509120440700347,
      "loss": 3.0821,
      "step": 127256
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.628819227218628,
      "learning_rate": 0.00025090800864356047,
      "loss": 3.1769,
      "step": 127257
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.2475197315216064,
      "learning_rate": 0.00025090397322621323,
      "loss": 3.0333,
      "step": 127258
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8948659896850586,
      "learning_rate": 0.0002508999378179939,
      "loss": 2.8415,
      "step": 127259
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.08851957321167,
      "learning_rate": 0.00025089590241890317,
      "loss": 2.8664,
      "step": 127260
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.578533172607422,
      "learning_rate": 0.00025089186702894185,
      "loss": 2.8446,
      "step": 127261
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9535787105560303,
      "learning_rate": 0.0002508878316481107,
      "loss": 3.2982,
      "step": 127262
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.9005792140960693,
      "learning_rate": 0.00025088379627641045,
      "loss": 2.925,
      "step": 127263
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2077598571777344,
      "learning_rate": 0.0002508797609138418,
      "loss": 2.8911,
      "step": 127264
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.5539309978485107,
      "learning_rate": 0.00025087572556040544,
      "loss": 2.8983,
      "step": 127265
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.4189248085021973,
      "learning_rate": 0.00025087169021610227,
      "loss": 2.7962,
      "step": 127266
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8883025646209717,
      "learning_rate": 0.000250867654880933,
      "loss": 2.9185,
      "step": 127267
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9475241899490356,
      "learning_rate": 0.0002508636195548983,
      "loss": 2.9363,
      "step": 127268
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.346285343170166,
      "learning_rate": 0.00025085958423799916,
      "loss": 3.2606,
      "step": 127269
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0938186645507812,
      "learning_rate": 0.000250855548930236,
      "loss": 3.0217,
      "step": 127270
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.8117730617523193,
      "learning_rate": 0.0002508515136316097,
      "loss": 2.9543,
      "step": 127271
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.1229357719421387,
      "learning_rate": 0.00025084747834212107,
      "loss": 2.9194,
      "step": 127272
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9676826000213623,
      "learning_rate": 0.0002508434430617708,
      "loss": 2.9244,
      "step": 127273
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9973033666610718,
      "learning_rate": 0.00025083940779055966,
      "loss": 2.9504,
      "step": 127274
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.830960988998413,
      "learning_rate": 0.00025083537252848856,
      "loss": 2.9697,
      "step": 127275
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.7552309036254883,
      "learning_rate": 0.00025083133727555793,
      "loss": 2.7876,
      "step": 127276
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0039615631103516,
      "learning_rate": 0.0002508273020317687,
      "loss": 2.5995,
      "step": 127277
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9389508962631226,
      "learning_rate": 0.00025082326679712157,
      "loss": 2.9644,
      "step": 127278
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.6566357612609863,
      "learning_rate": 0.00025081923157161735,
      "loss": 2.965,
      "step": 127279
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9661999940872192,
      "learning_rate": 0.0002508151963552568,
      "loss": 2.9774,
      "step": 127280
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.6741863489151,
      "learning_rate": 0.0002508111611480407,
      "loss": 3.0342,
      "step": 127281
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.139892578125,
      "learning_rate": 0.0002508071259499696,
      "loss": 2.8495,
      "step": 127282
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0429277420043945,
      "learning_rate": 0.0002508030907610444,
      "loss": 2.9673,
      "step": 127283
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.9708757400512695,
      "learning_rate": 0.0002507990555812659,
      "loss": 3.375,
      "step": 127284
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.685814142227173,
      "learning_rate": 0.0002507950204106347,
      "loss": 2.9383,
      "step": 127285
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.422830581665039,
      "learning_rate": 0.0002507909852491517,
      "loss": 2.8026,
      "step": 127286
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5523719787597656,
      "learning_rate": 0.00025078695009681765,
      "loss": 3.0119,
      "step": 127287
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.076775074005127,
      "learning_rate": 0.00025078291495363316,
      "loss": 2.9164,
      "step": 127288
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7355165481567383,
      "learning_rate": 0.000250778879819599,
      "loss": 3.096,
      "step": 127289
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0685994625091553,
      "learning_rate": 0.000250774844694716,
      "loss": 3.1494,
      "step": 127290
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8694483041763306,
      "learning_rate": 0.00025077080957898495,
      "loss": 2.9424,
      "step": 127291
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8786271810531616,
      "learning_rate": 0.00025076677447240643,
      "loss": 3.0745,
      "step": 127292
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1621434688568115,
      "learning_rate": 0.0002507627393749815,
      "loss": 2.9814,
      "step": 127293
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.381281614303589,
      "learning_rate": 0.00025075870428671056,
      "loss": 2.9327,
      "step": 127294
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.356640100479126,
      "learning_rate": 0.0002507546692075945,
      "loss": 3.0412,
      "step": 127295
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8549972772598267,
      "learning_rate": 0.00025075063413763406,
      "loss": 3.1554,
      "step": 127296
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.52583909034729,
      "learning_rate": 0.00025074659907683,
      "loss": 3.2424,
      "step": 127297
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.766432285308838,
      "learning_rate": 0.0002507425640251831,
      "loss": 2.7879,
      "step": 127298
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7331361770629883,
      "learning_rate": 0.00025073852898269426,
      "loss": 2.8753,
      "step": 127299
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0810723304748535,
      "learning_rate": 0.00025073449394936385,
      "loss": 2.8533,
      "step": 127300
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.375349283218384,
      "learning_rate": 0.00025073045892519283,
      "loss": 2.7826,
      "step": 127301
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.838773250579834,
      "learning_rate": 0.000250726423910182,
      "loss": 2.8167,
      "step": 127302
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8964149951934814,
      "learning_rate": 0.00025072238890433207,
      "loss": 2.9098,
      "step": 127303
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.375744342803955,
      "learning_rate": 0.00025071835390764373,
      "loss": 3.0467,
      "step": 127304
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9950801134109497,
      "learning_rate": 0.00025071431892011793,
      "loss": 3.068,
      "step": 127305
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9985721111297607,
      "learning_rate": 0.0002507102839417551,
      "loss": 2.8395,
      "step": 127306
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0248422622680664,
      "learning_rate": 0.00025070624897255623,
      "loss": 3.1361,
      "step": 127307
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8268163204193115,
      "learning_rate": 0.00025070221401252195,
      "loss": 3.1231,
      "step": 127308
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.0005619525909424,
      "learning_rate": 0.00025069817906165303,
      "loss": 2.8188,
      "step": 127309
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9013166427612305,
      "learning_rate": 0.00025069414411995033,
      "loss": 3.0383,
      "step": 127310
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.123962640762329,
      "learning_rate": 0.00025069010918741456,
      "loss": 2.9292,
      "step": 127311
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.4325711727142334,
      "learning_rate": 0.0002506860742640464,
      "loss": 2.9438,
      "step": 127312
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2939302921295166,
      "learning_rate": 0.0002506820393498465,
      "loss": 2.8327,
      "step": 127313
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.022505283355713,
      "learning_rate": 0.00025067800444481583,
      "loss": 3.2512,
      "step": 127314
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.4130074977874756,
      "learning_rate": 0.00025067396954895504,
      "loss": 3.1804,
      "step": 127315
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.7179574966430664,
      "learning_rate": 0.00025066993466226487,
      "loss": 2.8968,
      "step": 127316
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8509355783462524,
      "learning_rate": 0.00025066589978474613,
      "loss": 3.0135,
      "step": 127317
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.019899606704712,
      "learning_rate": 0.0002506618649163997,
      "loss": 2.9779,
      "step": 127318
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.130941867828369,
      "learning_rate": 0.00025065783005722586,
      "loss": 2.7528,
      "step": 127319
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.029339551925659,
      "learning_rate": 0.0002506537952072258,
      "loss": 2.8997,
      "step": 127320
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.104102611541748,
      "learning_rate": 0.00025064976036640014,
      "loss": 2.9219,
      "step": 127321
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.338364839553833,
      "learning_rate": 0.00025064572553474954,
      "loss": 2.8857,
      "step": 127322
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.8193514347076416,
      "learning_rate": 0.0002506416907122749,
      "loss": 3.2715,
      "step": 127323
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.9058711528778076,
      "learning_rate": 0.0002506376558989769,
      "loss": 2.7883,
      "step": 127324
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8515143394470215,
      "learning_rate": 0.0002506336210948563,
      "loss": 3.1051,
      "step": 127325
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2934486865997314,
      "learning_rate": 0.0002506295862999138,
      "loss": 2.8319,
      "step": 127326
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.117460012435913,
      "learning_rate": 0.00025062555151415025,
      "loss": 2.9565,
      "step": 127327
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8070929050445557,
      "learning_rate": 0.0002506215167375663,
      "loss": 2.9313,
      "step": 127328
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.260319709777832,
      "learning_rate": 0.0002506174819701627,
      "loss": 3.041,
      "step": 127329
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1324973106384277,
      "learning_rate": 0.00025061344721194034,
      "loss": 3.1499,
      "step": 127330
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9463313817977905,
      "learning_rate": 0.0002506094124628998,
      "loss": 3.3974,
      "step": 127331
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5385005474090576,
      "learning_rate": 0.00025060537772304196,
      "loss": 3.1597,
      "step": 127332
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.194596767425537,
      "learning_rate": 0.00025060134299236745,
      "loss": 3.1189,
      "step": 127333
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.156254529953003,
      "learning_rate": 0.0002505973082708771,
      "loss": 3.0028,
      "step": 127334
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.077223539352417,
      "learning_rate": 0.00025059327355857165,
      "loss": 3.1957,
      "step": 127335
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2256124019622803,
      "learning_rate": 0.0002505892388554519,
      "loss": 3.0763,
      "step": 127336
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2060883045196533,
      "learning_rate": 0.0002505852041615185,
      "loss": 3.0403,
      "step": 127337
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.603976011276245,
      "learning_rate": 0.0002505811694767722,
      "loss": 2.8134,
      "step": 127338
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.104020118713379,
      "learning_rate": 0.00025057713480121385,
      "loss": 2.9895,
      "step": 127339
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8969030380249023,
      "learning_rate": 0.00025057310013484413,
      "loss": 3.0096,
      "step": 127340
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5224342346191406,
      "learning_rate": 0.0002505690654776638,
      "loss": 3.1577,
      "step": 127341
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.543400287628174,
      "learning_rate": 0.0002505650308296736,
      "loss": 2.9664,
      "step": 127342
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.596284866333008,
      "learning_rate": 0.0002505609961908743,
      "loss": 3.1495,
      "step": 127343
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0367894172668457,
      "learning_rate": 0.0002505569615612666,
      "loss": 2.8768,
      "step": 127344
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.909287214279175,
      "learning_rate": 0.00025055292694085136,
      "loss": 3.1878,
      "step": 127345
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.9330861568450928,
      "learning_rate": 0.00025054889232962926,
      "loss": 3.128,
      "step": 127346
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0817646980285645,
      "learning_rate": 0.000250544857727601,
      "loss": 2.8406,
      "step": 127347
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.889915943145752,
      "learning_rate": 0.0002505408231347675,
      "loss": 3.0254,
      "step": 127348
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.682905673980713,
      "learning_rate": 0.0002505367885511293,
      "loss": 2.9313,
      "step": 127349
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.0991976261138916,
      "learning_rate": 0.00025053275397668727,
      "loss": 2.8191,
      "step": 127350
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5867562294006348,
      "learning_rate": 0.0002505287194114421,
      "loss": 3.1144,
      "step": 127351
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2469465732574463,
      "learning_rate": 0.0002505246848553946,
      "loss": 2.7663,
      "step": 127352
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.6621571779251099,
      "learning_rate": 0.00025052065030854546,
      "loss": 2.8882,
      "step": 127353
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.0064611434936523,
      "learning_rate": 0.0002505166157708956,
      "loss": 2.824,
      "step": 127354
    },
    {
      "epoch": 1.66,
      "grad_norm": 4.082690715789795,
      "learning_rate": 0.0002505125812424455,
      "loss": 2.8133,
      "step": 127355
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.01784610748291,
      "learning_rate": 0.0002505085467231961,
      "loss": 3.0208,
      "step": 127356
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.821678638458252,
      "learning_rate": 0.00025050451221314806,
      "loss": 2.9924,
      "step": 127357
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.4696340560913086,
      "learning_rate": 0.00025050047771230213,
      "loss": 2.9406,
      "step": 127358
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.375722885131836,
      "learning_rate": 0.00025049644322065913,
      "loss": 2.9162,
      "step": 127359
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0955538749694824,
      "learning_rate": 0.0002504924087382199,
      "loss": 2.9858,
      "step": 127360
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.912842035293579,
      "learning_rate": 0.00025048837426498493,
      "loss": 3.1739,
      "step": 127361
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.185237407684326,
      "learning_rate": 0.00025048433980095515,
      "loss": 2.9583,
      "step": 127362
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.842630386352539,
      "learning_rate": 0.0002504803053461313,
      "loss": 3.0012,
      "step": 127363
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.6110167503356934,
      "learning_rate": 0.00025047627090051395,
      "loss": 2.925,
      "step": 127364
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2722907066345215,
      "learning_rate": 0.0002504722364641041,
      "loss": 3.1317,
      "step": 127365
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8192684650421143,
      "learning_rate": 0.0002504682020369025,
      "loss": 2.9002,
      "step": 127366
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.6471946239471436,
      "learning_rate": 0.0002504641676189097,
      "loss": 2.991,
      "step": 127367
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3844876289367676,
      "learning_rate": 0.0002504601332101265,
      "loss": 2.9639,
      "step": 127368
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.00370454788208,
      "learning_rate": 0.0002504560988105537,
      "loss": 2.8409,
      "step": 127369
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.6208772659301758,
      "learning_rate": 0.0002504520644201921,
      "loss": 2.8486,
      "step": 127370
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1299381256103516,
      "learning_rate": 0.00025044803003904234,
      "loss": 2.9722,
      "step": 127371
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1410436630249023,
      "learning_rate": 0.00025044399566710534,
      "loss": 2.9336,
      "step": 127372
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8914796113967896,
      "learning_rate": 0.00025043996130438167,
      "loss": 2.8988,
      "step": 127373
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2209742069244385,
      "learning_rate": 0.00025043592695087204,
      "loss": 2.6941,
      "step": 127374
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3327176570892334,
      "learning_rate": 0.0002504318926065774,
      "loss": 2.861,
      "step": 127375
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1562302112579346,
      "learning_rate": 0.0002504278582714984,
      "loss": 3.2269,
      "step": 127376
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.050368309020996,
      "learning_rate": 0.0002504238239456358,
      "loss": 2.951,
      "step": 127377
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0495903491973877,
      "learning_rate": 0.00025041978962899044,
      "loss": 2.8587,
      "step": 127378
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3481712341308594,
      "learning_rate": 0.0002504157553215629,
      "loss": 3.0647,
      "step": 127379
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.5799405574798584,
      "learning_rate": 0.00025041172102335395,
      "loss": 3.0175,
      "step": 127380
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.023623466491699,
      "learning_rate": 0.00025040768673436443,
      "loss": 2.8113,
      "step": 127381
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.13114333152771,
      "learning_rate": 0.000250403652454595,
      "loss": 2.8017,
      "step": 127382
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.481269359588623,
      "learning_rate": 0.0002503996181840466,
      "loss": 2.9513,
      "step": 127383
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2272675037384033,
      "learning_rate": 0.00025039558392271974,
      "loss": 2.8293,
      "step": 127384
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.817628026008606,
      "learning_rate": 0.00025039154967061543,
      "loss": 2.7591,
      "step": 127385
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8269895315170288,
      "learning_rate": 0.00025038751542773416,
      "loss": 3.0987,
      "step": 127386
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1886677742004395,
      "learning_rate": 0.00025038348119407674,
      "loss": 2.9929,
      "step": 127387
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.477511405944824,
      "learning_rate": 0.000250379446969644,
      "loss": 3.1633,
      "step": 127388
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9974812269210815,
      "learning_rate": 0.00025037541275443664,
      "loss": 3.0979,
      "step": 127389
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1229705810546875,
      "learning_rate": 0.00025037137854845543,
      "loss": 2.7214,
      "step": 127390
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5602071285247803,
      "learning_rate": 0.00025036734435170127,
      "loss": 3.0458,
      "step": 127391
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5081779956817627,
      "learning_rate": 0.0002503633101641746,
      "loss": 3.0976,
      "step": 127392
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2045557498931885,
      "learning_rate": 0.00025035927598587637,
      "loss": 2.6566,
      "step": 127393
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3721084594726562,
      "learning_rate": 0.00025035524181680726,
      "loss": 3.0231,
      "step": 127394
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1149635314941406,
      "learning_rate": 0.00025035120765696807,
      "loss": 2.7828,
      "step": 127395
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.2526471614837646,
      "learning_rate": 0.0002503471735063595,
      "loss": 2.9824,
      "step": 127396
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3819420337677,
      "learning_rate": 0.00025034313936498243,
      "loss": 2.8785,
      "step": 127397
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.510730743408203,
      "learning_rate": 0.00025033910523283745,
      "loss": 3.0327,
      "step": 127398
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.0909993648529053,
      "learning_rate": 0.0002503350711099253,
      "loss": 3.2408,
      "step": 127399
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.601639986038208,
      "learning_rate": 0.00025033103699624684,
      "loss": 3.035,
      "step": 127400
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.189067840576172,
      "learning_rate": 0.00025032700289180274,
      "loss": 3.1126,
      "step": 127401
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0992000102996826,
      "learning_rate": 0.0002503229687965938,
      "loss": 2.9918,
      "step": 127402
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.8643476963043213,
      "learning_rate": 0.0002503189347106209,
      "loss": 3.0094,
      "step": 127403
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.6703639030456543,
      "learning_rate": 0.00025031490063388454,
      "loss": 2.9227,
      "step": 127404
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.304762840270996,
      "learning_rate": 0.00025031086656638555,
      "loss": 2.9785,
      "step": 127405
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9762810468673706,
      "learning_rate": 0.0002503068325081247,
      "loss": 3.0688,
      "step": 127406
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5101943016052246,
      "learning_rate": 0.00025030279845910275,
      "loss": 2.9285,
      "step": 127407
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.227165937423706,
      "learning_rate": 0.00025029876441932047,
      "loss": 3.1713,
      "step": 127408
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.644134759902954,
      "learning_rate": 0.0002502947303887787,
      "loss": 3.0315,
      "step": 127409
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7383471727371216,
      "learning_rate": 0.00025029069636747797,
      "loss": 3.2252,
      "step": 127410
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.272223472595215,
      "learning_rate": 0.0002502866623554191,
      "loss": 2.9549,
      "step": 127411
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.825300693511963,
      "learning_rate": 0.0002502826283526029,
      "loss": 3.0712,
      "step": 127412
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.586791753768921,
      "learning_rate": 0.0002502785943590301,
      "loss": 2.9747,
      "step": 127413
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.373857259750366,
      "learning_rate": 0.0002502745603747015,
      "loss": 2.807,
      "step": 127414
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9951215982437134,
      "learning_rate": 0.00025027052639961777,
      "loss": 2.9316,
      "step": 127415
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.6852734088897705,
      "learning_rate": 0.0002502664924337797,
      "loss": 2.9628,
      "step": 127416
    },
    {
      "epoch": 1.66,
      "grad_norm": 4.986151218414307,
      "learning_rate": 0.00025026245847718803,
      "loss": 2.8649,
      "step": 127417
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3466579914093018,
      "learning_rate": 0.00025025842452984346,
      "loss": 2.9815,
      "step": 127418
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.6049389839172363,
      "learning_rate": 0.0002502543905917468,
      "loss": 2.9891,
      "step": 127419
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.025834083557129,
      "learning_rate": 0.0002502503566628988,
      "loss": 3.1086,
      "step": 127420
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9602587223052979,
      "learning_rate": 0.0002502463227433002,
      "loss": 2.9657,
      "step": 127421
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.231872081756592,
      "learning_rate": 0.00025024228883295177,
      "loss": 2.9032,
      "step": 127422
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2139127254486084,
      "learning_rate": 0.0002502382549318542,
      "loss": 2.7605,
      "step": 127423
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9049348831176758,
      "learning_rate": 0.0002502342210400083,
      "loss": 3.0594,
      "step": 127424
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2234344482421875,
      "learning_rate": 0.0002502301871574148,
      "loss": 2.8819,
      "step": 127425
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.8018178939819336,
      "learning_rate": 0.0002502261532840744,
      "loss": 3.3784,
      "step": 127426
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2961251735687256,
      "learning_rate": 0.000250222119419988,
      "loss": 2.9562,
      "step": 127427
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3063156604766846,
      "learning_rate": 0.00025021808556515617,
      "loss": 3.0491,
      "step": 127428
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0173401832580566,
      "learning_rate": 0.0002502140517195797,
      "loss": 3.1352,
      "step": 127429
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.85948646068573,
      "learning_rate": 0.0002502100178832594,
      "loss": 3.0611,
      "step": 127430
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.74581241607666,
      "learning_rate": 0.0002502059840561961,
      "loss": 2.963,
      "step": 127431
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.917636513710022,
      "learning_rate": 0.00025020195023839033,
      "loss": 3.0119,
      "step": 127432
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.301335334777832,
      "learning_rate": 0.00025019791642984305,
      "loss": 2.8385,
      "step": 127433
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.7713463306427,
      "learning_rate": 0.00025019388263055486,
      "loss": 3.0705,
      "step": 127434
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.372130870819092,
      "learning_rate": 0.00025018984884052657,
      "loss": 3.0933,
      "step": 127435
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9591799974441528,
      "learning_rate": 0.00025018581505975893,
      "loss": 3.0305,
      "step": 127436
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7760344743728638,
      "learning_rate": 0.00025018178128825266,
      "loss": 2.9663,
      "step": 127437
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8555102348327637,
      "learning_rate": 0.0002501777475260086,
      "loss": 3.0785,
      "step": 127438
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9165894985198975,
      "learning_rate": 0.00025017371377302746,
      "loss": 3.076,
      "step": 127439
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1287314891815186,
      "learning_rate": 0.0002501696800293099,
      "loss": 2.923,
      "step": 127440
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8263227939605713,
      "learning_rate": 0.0002501656462948567,
      "loss": 3.0348,
      "step": 127441
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.22465443611145,
      "learning_rate": 0.0002501616125696687,
      "loss": 3.0539,
      "step": 127442
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.283989429473877,
      "learning_rate": 0.00025015757885374655,
      "loss": 2.9424,
      "step": 127443
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2752737998962402,
      "learning_rate": 0.00025015354514709107,
      "loss": 2.9442,
      "step": 127444
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.172208547592163,
      "learning_rate": 0.00025014951144970314,
      "loss": 2.9132,
      "step": 127445
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.012636184692383,
      "learning_rate": 0.0002501454777615832,
      "loss": 2.9716,
      "step": 127446
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.137232780456543,
      "learning_rate": 0.0002501414440827322,
      "loss": 3.0309,
      "step": 127447
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.907501220703125,
      "learning_rate": 0.0002501374104131508,
      "loss": 3.0301,
      "step": 127448
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8867508172988892,
      "learning_rate": 0.00025013337675283974,
      "loss": 2.9815,
      "step": 127449
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8510676622390747,
      "learning_rate": 0.00025012934310179993,
      "loss": 2.7768,
      "step": 127450
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.930323839187622,
      "learning_rate": 0.00025012530946003195,
      "loss": 3.3071,
      "step": 127451
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.4656119346618652,
      "learning_rate": 0.00025012127582753677,
      "loss": 2.863,
      "step": 127452
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2670679092407227,
      "learning_rate": 0.0002501172422043149,
      "loss": 3.0856,
      "step": 127453
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8353240489959717,
      "learning_rate": 0.00025011320859036714,
      "loss": 2.8958,
      "step": 127454
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.208021402359009,
      "learning_rate": 0.00025010917498569427,
      "loss": 3.227,
      "step": 127455
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.045463800430298,
      "learning_rate": 0.00025010514139029707,
      "loss": 3.0091,
      "step": 127456
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8923137187957764,
      "learning_rate": 0.0002501011078041762,
      "loss": 2.8776,
      "step": 127457
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2412779331207275,
      "learning_rate": 0.0002500970742273327,
      "loss": 2.8846,
      "step": 127458
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.011634588241577,
      "learning_rate": 0.0002500930406597669,
      "loss": 2.9985,
      "step": 127459
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.317120313644409,
      "learning_rate": 0.0002500890071014797,
      "loss": 3.0398,
      "step": 127460
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.168473720550537,
      "learning_rate": 0.000250084973552472,
      "loss": 2.9389,
      "step": 127461
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0208301544189453,
      "learning_rate": 0.00025008094001274437,
      "loss": 3.0708,
      "step": 127462
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.093384265899658,
      "learning_rate": 0.0002500769064822977,
      "loss": 3.1463,
      "step": 127463
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.858715772628784,
      "learning_rate": 0.0002500728729611328,
      "loss": 2.8643,
      "step": 127464
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0597310066223145,
      "learning_rate": 0.0002500688394492501,
      "loss": 3.0198,
      "step": 127465
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5575215816497803,
      "learning_rate": 0.0002500648059466506,
      "loss": 2.8756,
      "step": 127466
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8515033721923828,
      "learning_rate": 0.000250060772453335,
      "loss": 2.942,
      "step": 127467
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1089982986450195,
      "learning_rate": 0.000250056738969304,
      "loss": 3.1492,
      "step": 127468
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3980982303619385,
      "learning_rate": 0.00025005270549455845,
      "loss": 2.9744,
      "step": 127469
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.5700347423553467,
      "learning_rate": 0.00025004867202909915,
      "loss": 2.886,
      "step": 127470
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0408103466033936,
      "learning_rate": 0.00025004463857292657,
      "loss": 3.1438,
      "step": 127471
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.246128559112549,
      "learning_rate": 0.0002500406051260417,
      "loss": 2.8696,
      "step": 127472
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2969629764556885,
      "learning_rate": 0.0002500365716884452,
      "loss": 3.1566,
      "step": 127473
    },
    {
      "epoch": 1.66,
      "grad_norm": 4.152883529663086,
      "learning_rate": 0.00025003253826013787,
      "loss": 3.081,
      "step": 127474
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7655301094055176,
      "learning_rate": 0.00025002850484112036,
      "loss": 3.1464,
      "step": 127475
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2907259464263916,
      "learning_rate": 0.0002500244714313937,
      "loss": 2.7044,
      "step": 127476
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.408154249191284,
      "learning_rate": 0.0002500204380309583,
      "loss": 2.8874,
      "step": 127477
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3980770111083984,
      "learning_rate": 0.00025001640463981497,
      "loss": 2.8307,
      "step": 127478
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.846635341644287,
      "learning_rate": 0.0002500123712579646,
      "loss": 2.8934,
      "step": 127479
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.488077402114868,
      "learning_rate": 0.0002500083378854079,
      "loss": 2.8496,
      "step": 127480
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2804598808288574,
      "learning_rate": 0.0002500043045221455,
      "loss": 2.9685,
      "step": 127481
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.7104430198669434,
      "learning_rate": 0.00025000027116817847,
      "loss": 3.013,
      "step": 127482
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0771126747131348,
      "learning_rate": 0.00024999623782350714,
      "loss": 3.0859,
      "step": 127483
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5441393852233887,
      "learning_rate": 0.00024999220448813247,
      "loss": 2.8524,
      "step": 127484
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.966648817062378,
      "learning_rate": 0.0002499881711620552,
      "loss": 2.9792,
      "step": 127485
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2407662868499756,
      "learning_rate": 0.00024998413784527607,
      "loss": 3.1476,
      "step": 127486
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2810802459716797,
      "learning_rate": 0.00024998010453779586,
      "loss": 3.1229,
      "step": 127487
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3858349323272705,
      "learning_rate": 0.0002499760712396154,
      "loss": 3.2348,
      "step": 127488
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2062790393829346,
      "learning_rate": 0.00024997203795073516,
      "loss": 3.1443,
      "step": 127489
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.024477958679199,
      "learning_rate": 0.0002499680046711561,
      "loss": 3.2154,
      "step": 127490
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.6953163146972656,
      "learning_rate": 0.00024996397140087897,
      "loss": 3.0905,
      "step": 127491
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.06510329246521,
      "learning_rate": 0.0002499599381399044,
      "loss": 3.2208,
      "step": 127492
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1105966567993164,
      "learning_rate": 0.00024995590488823325,
      "loss": 2.7678,
      "step": 127493
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9350428581237793,
      "learning_rate": 0.0002499518716458664,
      "loss": 3.0337,
      "step": 127494
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.507411241531372,
      "learning_rate": 0.00024994783841280434,
      "loss": 2.854,
      "step": 127495
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.025425672531128,
      "learning_rate": 0.00024994380518904784,
      "loss": 2.8704,
      "step": 127496
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.361283302307129,
      "learning_rate": 0.0002499397719745978,
      "loss": 2.9735,
      "step": 127497
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0277867317199707,
      "learning_rate": 0.00024993573876945486,
      "loss": 3.001,
      "step": 127498
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3281519412994385,
      "learning_rate": 0.0002499317055736198,
      "loss": 2.753,
      "step": 127499
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.678194999694824,
      "learning_rate": 0.00024992767238709353,
      "loss": 2.8234,
      "step": 127500
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0600976943969727,
      "learning_rate": 0.0002499236392098766,
      "loss": 2.9475,
      "step": 127501
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9223616123199463,
      "learning_rate": 0.0002499196060419697,
      "loss": 3.2004,
      "step": 127502
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.910895586013794,
      "learning_rate": 0.00024991557288337374,
      "loss": 3.0037,
      "step": 127503
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.6727995872497559,
      "learning_rate": 0.0002499115397340894,
      "loss": 2.7146,
      "step": 127504
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1759650707244873,
      "learning_rate": 0.00024990750659411746,
      "loss": 2.9561,
      "step": 127505
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.4211809635162354,
      "learning_rate": 0.0002499034734634587,
      "loss": 3.0011,
      "step": 127506
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9109684228897095,
      "learning_rate": 0.00024989944034211383,
      "loss": 2.7885,
      "step": 127507
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2160332202911377,
      "learning_rate": 0.0002498954072300836,
      "loss": 3.0045,
      "step": 127508
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.6300129890441895,
      "learning_rate": 0.0002498913741273687,
      "loss": 3.0411,
      "step": 127509
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.116304874420166,
      "learning_rate": 0.00024988734103397,
      "loss": 2.906,
      "step": 127510
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.992393970489502,
      "learning_rate": 0.00024988330794988814,
      "loss": 3.1519,
      "step": 127511
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.6152303218841553,
      "learning_rate": 0.000249879274875124,
      "loss": 2.8489,
      "step": 127512
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.016244411468506,
      "learning_rate": 0.00024987524180967817,
      "loss": 2.8578,
      "step": 127513
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9713563919067383,
      "learning_rate": 0.0002498712087535515,
      "loss": 3.0234,
      "step": 127514
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.11214542388916,
      "learning_rate": 0.0002498671757067447,
      "loss": 2.9428,
      "step": 127515
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.353574514389038,
      "learning_rate": 0.0002498631426692586,
      "loss": 3.2125,
      "step": 127516
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5146944522857666,
      "learning_rate": 0.00024985910964109385,
      "loss": 2.9595,
      "step": 127517
    },
    {
      "epoch": 1.66,
      "grad_norm": 4.088860511779785,
      "learning_rate": 0.0002498550766222512,
      "loss": 3.1804,
      "step": 127518
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.2305843830108643,
      "learning_rate": 0.0002498510436127315,
      "loss": 2.8446,
      "step": 127519
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.2946512699127197,
      "learning_rate": 0.0002498470106125354,
      "loss": 2.9225,
      "step": 127520
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.016958475112915,
      "learning_rate": 0.0002498429776216637,
      "loss": 2.9068,
      "step": 127521
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.059718132019043,
      "learning_rate": 0.0002498389446401171,
      "loss": 3.2261,
      "step": 127522
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.389462947845459,
      "learning_rate": 0.0002498349116678965,
      "loss": 2.781,
      "step": 127523
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3746447563171387,
      "learning_rate": 0.0002498308787050024,
      "loss": 2.8673,
      "step": 127524
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2722702026367188,
      "learning_rate": 0.0002498268457514358,
      "loss": 2.9917,
      "step": 127525
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8828685283660889,
      "learning_rate": 0.0002498228128071973,
      "loss": 2.8134,
      "step": 127526
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.079481840133667,
      "learning_rate": 0.00024981877987228764,
      "loss": 3.0785,
      "step": 127527
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.1969523429870605,
      "learning_rate": 0.0002498147469467076,
      "loss": 2.9734,
      "step": 127528
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.4923367500305176,
      "learning_rate": 0.000249810714030458,
      "loss": 2.806,
      "step": 127529
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.6157389879226685,
      "learning_rate": 0.00024980668112353955,
      "loss": 3.0817,
      "step": 127530
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.6784708499908447,
      "learning_rate": 0.00024980264822595304,
      "loss": 2.8191,
      "step": 127531
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5413217544555664,
      "learning_rate": 0.00024979861533769906,
      "loss": 3.0448,
      "step": 127532
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.491490602493286,
      "learning_rate": 0.0002497945824587785,
      "loss": 3.0615,
      "step": 127533
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7398875951766968,
      "learning_rate": 0.000249790549589192,
      "loss": 2.8749,
      "step": 127534
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5294668674468994,
      "learning_rate": 0.00024978651672894045,
      "loss": 2.6851,
      "step": 127535
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.38634991645813,
      "learning_rate": 0.0002497824838780245,
      "loss": 2.977,
      "step": 127536
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7747447490692139,
      "learning_rate": 0.0002497784510364451,
      "loss": 2.945,
      "step": 127537
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1511456966400146,
      "learning_rate": 0.0002497744182042027,
      "loss": 2.9353,
      "step": 127538
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7611724138259888,
      "learning_rate": 0.0002497703853812982,
      "loss": 3.0033,
      "step": 127539
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.529869556427002,
      "learning_rate": 0.0002497663525677323,
      "loss": 2.8197,
      "step": 127540
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9649032354354858,
      "learning_rate": 0.0002497623197635057,
      "loss": 3.109,
      "step": 127541
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.902758002281189,
      "learning_rate": 0.0002497582869686194,
      "loss": 3.0692,
      "step": 127542
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2688257694244385,
      "learning_rate": 0.000249754254183074,
      "loss": 3.211,
      "step": 127543
    },
    {
      "epoch": 1.66,
      "grad_norm": 4.794511318206787,
      "learning_rate": 0.00024975022140687013,
      "loss": 2.7461,
      "step": 127544
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.160165309906006,
      "learning_rate": 0.00024974618864000863,
      "loss": 3.1048,
      "step": 127545
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.04988431930542,
      "learning_rate": 0.0002497421558824903,
      "loss": 3.2237,
      "step": 127546
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.910959005355835,
      "learning_rate": 0.00024973812313431584,
      "loss": 2.7849,
      "step": 127547
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.268932342529297,
      "learning_rate": 0.000249734090395486,
      "loss": 3.0331,
      "step": 127548
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8792179822921753,
      "learning_rate": 0.0002497300576660017,
      "loss": 2.982,
      "step": 127549
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9992737770080566,
      "learning_rate": 0.00024972602494586333,
      "loss": 2.9409,
      "step": 127550
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.808646559715271,
      "learning_rate": 0.0002497219922350719,
      "loss": 3.0972,
      "step": 127551
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8871994018554688,
      "learning_rate": 0.0002497179595336281,
      "loss": 2.7971,
      "step": 127552
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1521108150482178,
      "learning_rate": 0.00024971392684153265,
      "loss": 3.2191,
      "step": 127553
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3858067989349365,
      "learning_rate": 0.0002497098941587864,
      "loss": 2.9552,
      "step": 127554
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9325577020645142,
      "learning_rate": 0.0002497058614853901,
      "loss": 3.098,
      "step": 127555
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.5069169998168945,
      "learning_rate": 0.0002497018288213443,
      "loss": 2.9731,
      "step": 127556
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9957513809204102,
      "learning_rate": 0.00024969779616664987,
      "loss": 2.9738,
      "step": 127557
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.907616138458252,
      "learning_rate": 0.0002496937635213076,
      "loss": 2.9341,
      "step": 127558
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.8150086402893066,
      "learning_rate": 0.0002496897308853182,
      "loss": 3.1398,
      "step": 127559
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.1081056594848633,
      "learning_rate": 0.0002496856982586824,
      "loss": 3.1246,
      "step": 127560
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.6964291334152222,
      "learning_rate": 0.0002496816656414011,
      "loss": 3.0348,
      "step": 127561
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.4012060165405273,
      "learning_rate": 0.0002496776330334749,
      "loss": 2.9919,
      "step": 127562
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3709728717803955,
      "learning_rate": 0.0002496736004349045,
      "loss": 2.9993,
      "step": 127563
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3074629306793213,
      "learning_rate": 0.0002496695678456907,
      "loss": 3.0234,
      "step": 127564
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.416240930557251,
      "learning_rate": 0.00024966553526583436,
      "loss": 2.9427,
      "step": 127565
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0511491298675537,
      "learning_rate": 0.0002496615026953361,
      "loss": 2.7094,
      "step": 127566
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7648258209228516,
      "learning_rate": 0.0002496574701341968,
      "loss": 2.7971,
      "step": 127567
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0157928466796875,
      "learning_rate": 0.00024965343758241704,
      "loss": 3.1397,
      "step": 127568
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0995893478393555,
      "learning_rate": 0.0002496494050399977,
      "loss": 2.8794,
      "step": 127569
    },
    {
      "epoch": 1.66,
      "grad_norm": 4.2231268882751465,
      "learning_rate": 0.00024964537250693946,
      "loss": 2.659,
      "step": 127570
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.6567587852478027,
      "learning_rate": 0.00024964133998324305,
      "loss": 2.8099,
      "step": 127571
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.282212734222412,
      "learning_rate": 0.0002496373074689093,
      "loss": 3.0328,
      "step": 127572
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.864683747291565,
      "learning_rate": 0.00024963327496393907,
      "loss": 3.0839,
      "step": 127573
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1561226844787598,
      "learning_rate": 0.0002496292424683328,
      "loss": 3.1078,
      "step": 127574
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9682273864746094,
      "learning_rate": 0.00024962520998209146,
      "loss": 3.2636,
      "step": 127575
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9476611614227295,
      "learning_rate": 0.00024962117750521566,
      "loss": 2.9706,
      "step": 127576
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.7453858852386475,
      "learning_rate": 0.0002496171450377063,
      "loss": 2.8522,
      "step": 127577
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.441412925720215,
      "learning_rate": 0.00024961311257956404,
      "loss": 2.8617,
      "step": 127578
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0806472301483154,
      "learning_rate": 0.0002496090801307898,
      "loss": 3.0149,
      "step": 127579
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.772660970687866,
      "learning_rate": 0.00024960504769138407,
      "loss": 2.9613,
      "step": 127580
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7400615215301514,
      "learning_rate": 0.00024960101526134764,
      "loss": 3.089,
      "step": 127581
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1874351501464844,
      "learning_rate": 0.00024959698284068144,
      "loss": 2.9107,
      "step": 127582
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2026443481445312,
      "learning_rate": 0.000249592950429386,
      "loss": 2.9992,
      "step": 127583
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.1245083808898926,
      "learning_rate": 0.00024958891802746223,
      "loss": 2.7469,
      "step": 127584
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.162682294845581,
      "learning_rate": 0.00024958488563491085,
      "loss": 2.976,
      "step": 127585
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2081856727600098,
      "learning_rate": 0.00024958085325173273,
      "loss": 2.7749,
      "step": 127586
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8125908374786377,
      "learning_rate": 0.0002495768208779283,
      "loss": 2.7785,
      "step": 127587
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3615920543670654,
      "learning_rate": 0.0002495727885134985,
      "loss": 3.0525,
      "step": 127588
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.312701463699341,
      "learning_rate": 0.0002495687561584441,
      "loss": 3.0345,
      "step": 127589
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.137295961380005,
      "learning_rate": 0.0002495647238127658,
      "loss": 2.9935,
      "step": 127590
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5798373222351074,
      "learning_rate": 0.0002495606914764644,
      "loss": 2.9,
      "step": 127591
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8566278219223022,
      "learning_rate": 0.0002495566591495407,
      "loss": 2.8747,
      "step": 127592
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.6532092094421387,
      "learning_rate": 0.00024955262683199527,
      "loss": 3.0997,
      "step": 127593
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.3184444904327393,
      "learning_rate": 0.000249548594523829,
      "loss": 2.8443,
      "step": 127594
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.9533653259277344,
      "learning_rate": 0.00024954456222504254,
      "loss": 3.2171,
      "step": 127595
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9589760303497314,
      "learning_rate": 0.0002495405299356367,
      "loss": 2.8552,
      "step": 127596
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7435802221298218,
      "learning_rate": 0.0002495364976556122,
      "loss": 2.8678,
      "step": 127597
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1461005210876465,
      "learning_rate": 0.00024953246538496994,
      "loss": 2.8033,
      "step": 127598
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0192654132843018,
      "learning_rate": 0.0002495284331237105,
      "loss": 3.1096,
      "step": 127599
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.032146692276001,
      "learning_rate": 0.0002495244008718347,
      "loss": 2.9525,
      "step": 127600
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3635706901550293,
      "learning_rate": 0.0002495203686293432,
      "loss": 2.9774,
      "step": 127601
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.813407301902771,
      "learning_rate": 0.0002495163363962368,
      "loss": 3.1101,
      "step": 127602
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9325294494628906,
      "learning_rate": 0.00024951230417251635,
      "loss": 2.872,
      "step": 127603
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.329300880432129,
      "learning_rate": 0.0002495082719581825,
      "loss": 3.0543,
      "step": 127604
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9373921155929565,
      "learning_rate": 0.000249504239753236,
      "loss": 3.0378,
      "step": 127605
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.4537787437438965,
      "learning_rate": 0.0002495002075576776,
      "loss": 3.0643,
      "step": 127606
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8363765478134155,
      "learning_rate": 0.000249496175371508,
      "loss": 3.0852,
      "step": 127607
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9449856281280518,
      "learning_rate": 0.00024949214319472813,
      "loss": 3.1865,
      "step": 127608
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.6489917039871216,
      "learning_rate": 0.00024948811102733855,
      "loss": 3.1031,
      "step": 127609
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.4064934253692627,
      "learning_rate": 0.00024948407886934016,
      "loss": 2.9686,
      "step": 127610
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.05051326751709,
      "learning_rate": 0.00024948004672073357,
      "loss": 2.9089,
      "step": 127611
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.186797857284546,
      "learning_rate": 0.0002494760145815196,
      "loss": 3.1661,
      "step": 127612
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9244261980056763,
      "learning_rate": 0.00024947198245169896,
      "loss": 2.9924,
      "step": 127613
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.517554759979248,
      "learning_rate": 0.00024946795033127246,
      "loss": 2.7245,
      "step": 127614
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.768209457397461,
      "learning_rate": 0.0002494639182202409,
      "loss": 2.9266,
      "step": 127615
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9791899919509888,
      "learning_rate": 0.0002494598861186049,
      "loss": 2.996,
      "step": 127616
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9436326026916504,
      "learning_rate": 0.0002494558540263653,
      "loss": 2.8123,
      "step": 127617
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5281808376312256,
      "learning_rate": 0.0002494518219435227,
      "loss": 2.9463,
      "step": 127618
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3119869232177734,
      "learning_rate": 0.000249447789870078,
      "loss": 2.9854,
      "step": 127619
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.102015733718872,
      "learning_rate": 0.0002494437578060319,
      "loss": 2.6908,
      "step": 127620
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3652429580688477,
      "learning_rate": 0.0002494397257513852,
      "loss": 2.7133,
      "step": 127621
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.088435173034668,
      "learning_rate": 0.0002494356937061387,
      "loss": 3.0524,
      "step": 127622
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.43800687789917,
      "learning_rate": 0.00024943166167029293,
      "loss": 2.897,
      "step": 127623
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.7198240756988525,
      "learning_rate": 0.0002494276296438488,
      "loss": 2.8903,
      "step": 127624
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.7230730056762695,
      "learning_rate": 0.00024942359762680696,
      "loss": 2.9955,
      "step": 127625
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.4564871788024902,
      "learning_rate": 0.00024941956561916833,
      "loss": 2.9834,
      "step": 127626
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.609152317047119,
      "learning_rate": 0.0002494155336209335,
      "loss": 3.137,
      "step": 127627
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.4237518310546875,
      "learning_rate": 0.00024941150163210337,
      "loss": 2.9908,
      "step": 127628
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.87021541595459,
      "learning_rate": 0.0002494074696526785,
      "loss": 3.2342,
      "step": 127629
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1819846630096436,
      "learning_rate": 0.00024940343768265974,
      "loss": 3.0338,
      "step": 127630
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7180728912353516,
      "learning_rate": 0.0002493994057220479,
      "loss": 3.1856,
      "step": 127631
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0721936225891113,
      "learning_rate": 0.00024939537377084356,
      "loss": 3.0621,
      "step": 127632
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.4904696941375732,
      "learning_rate": 0.0002493913418290476,
      "loss": 3.0407,
      "step": 127633
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.7278892993927,
      "learning_rate": 0.00024938730989666096,
      "loss": 3.1129,
      "step": 127634
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1200761795043945,
      "learning_rate": 0.00024938327797368394,
      "loss": 2.6881,
      "step": 127635
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1406025886535645,
      "learning_rate": 0.0002493792460601176,
      "loss": 3.0674,
      "step": 127636
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3360211849212646,
      "learning_rate": 0.0002493752141559626,
      "loss": 2.9799,
      "step": 127637
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.0613861083984375,
      "learning_rate": 0.0002493711822612196,
      "loss": 2.9293,
      "step": 127638
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9151426553726196,
      "learning_rate": 0.00024936715037588954,
      "loss": 2.9276,
      "step": 127639
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.711467742919922,
      "learning_rate": 0.00024936311849997324,
      "loss": 2.939,
      "step": 127640
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.6757781505584717,
      "learning_rate": 0.00024935908663347106,
      "loss": 2.8802,
      "step": 127641
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9677274227142334,
      "learning_rate": 0.0002493550547763841,
      "loss": 2.6937,
      "step": 127642
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.369886875152588,
      "learning_rate": 0.00024935102292871294,
      "loss": 3.0396,
      "step": 127643
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.181633710861206,
      "learning_rate": 0.00024934699109045835,
      "loss": 2.8497,
      "step": 127644
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8795325756072998,
      "learning_rate": 0.0002493429592616212,
      "loss": 2.9631,
      "step": 127645
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9829837083816528,
      "learning_rate": 0.0002493389274422021,
      "loss": 2.9276,
      "step": 127646
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.275301694869995,
      "learning_rate": 0.0002493348956322019,
      "loss": 2.9899,
      "step": 127647
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.9377381801605225,
      "learning_rate": 0.0002493308638316213,
      "loss": 2.9532,
      "step": 127648
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9977898597717285,
      "learning_rate": 0.00024932683204046096,
      "loss": 3.2327,
      "step": 127649
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9108612537384033,
      "learning_rate": 0.0002493228002587217,
      "loss": 2.8707,
      "step": 127650
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1259429454803467,
      "learning_rate": 0.0002493187684864044,
      "loss": 3.0484,
      "step": 127651
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.45664644241333,
      "learning_rate": 0.0002493147367235096,
      "loss": 2.9952,
      "step": 127652
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.5966901779174805,
      "learning_rate": 0.00024931070497003833,
      "loss": 3.0949,
      "step": 127653
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.6761640310287476,
      "learning_rate": 0.000249306673225991,
      "loss": 2.9066,
      "step": 127654
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.492603302001953,
      "learning_rate": 0.00024930264149136856,
      "loss": 3.082,
      "step": 127655
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.075559139251709,
      "learning_rate": 0.00024929860976617165,
      "loss": 2.884,
      "step": 127656
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9622031450271606,
      "learning_rate": 0.0002492945780504011,
      "loss": 2.9286,
      "step": 127657
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1612658500671387,
      "learning_rate": 0.00024929054634405766,
      "loss": 2.9202,
      "step": 127658
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.6761012077331543,
      "learning_rate": 0.0002492865146471422,
      "loss": 2.9369,
      "step": 127659
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.127687931060791,
      "learning_rate": 0.0002492824829596552,
      "loss": 2.9388,
      "step": 127660
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7856398820877075,
      "learning_rate": 0.00024927845128159756,
      "loss": 3.0336,
      "step": 127661
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7379660606384277,
      "learning_rate": 0.00024927441961297,
      "loss": 2.8994,
      "step": 127662
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1816351413726807,
      "learning_rate": 0.0002492703879537733,
      "loss": 2.8791,
      "step": 127663
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.079939365386963,
      "learning_rate": 0.00024926635630400816,
      "loss": 2.8953,
      "step": 127664
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0695719718933105,
      "learning_rate": 0.0002492623246636755,
      "loss": 2.915,
      "step": 127665
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0468480587005615,
      "learning_rate": 0.0002492582930327758,
      "loss": 3.0088,
      "step": 127666
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9127991199493408,
      "learning_rate": 0.00024925426141130994,
      "loss": 3.0373,
      "step": 127667
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5954270362854004,
      "learning_rate": 0.0002492502297992787,
      "loss": 2.8888,
      "step": 127668
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1697328090667725,
      "learning_rate": 0.00024924619819668276,
      "loss": 3.0979,
      "step": 127669
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.977115273475647,
      "learning_rate": 0.0002492421666035229,
      "loss": 3.0704,
      "step": 127670
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7757869958877563,
      "learning_rate": 0.0002492381350198,
      "loss": 2.8715,
      "step": 127671
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.357438564300537,
      "learning_rate": 0.00024923410344551467,
      "loss": 3.1763,
      "step": 127672
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0931789875030518,
      "learning_rate": 0.00024923007188066757,
      "loss": 3.0269,
      "step": 127673
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7572182416915894,
      "learning_rate": 0.0002492260403252596,
      "loss": 2.9311,
      "step": 127674
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3228378295898438,
      "learning_rate": 0.0002492220087792915,
      "loss": 3.0262,
      "step": 127675
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.4734811782836914,
      "learning_rate": 0.0002492179772427639,
      "loss": 2.9139,
      "step": 127676
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.6796889305114746,
      "learning_rate": 0.0002492139457156778,
      "loss": 3.08,
      "step": 127677
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8696601390838623,
      "learning_rate": 0.0002492099141980337,
      "loss": 2.9152,
      "step": 127678
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3109235763549805,
      "learning_rate": 0.0002492058826898324,
      "loss": 2.9827,
      "step": 127679
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.6618857383728027,
      "learning_rate": 0.00024920185119107466,
      "loss": 2.8635,
      "step": 127680
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9325898885726929,
      "learning_rate": 0.00024919781970176127,
      "loss": 2.933,
      "step": 127681
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2694547176361084,
      "learning_rate": 0.000249193788221893,
      "loss": 3.1621,
      "step": 127682
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.760098457336426,
      "learning_rate": 0.00024918975675147065,
      "loss": 2.9651,
      "step": 127683
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.6523704528808594,
      "learning_rate": 0.00024918572529049473,
      "loss": 3.1891,
      "step": 127684
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.150768280029297,
      "learning_rate": 0.0002491816938389663,
      "loss": 2.954,
      "step": 127685
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0284523963928223,
      "learning_rate": 0.0002491776623968858,
      "loss": 3.0075,
      "step": 127686
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.9041645526885986,
      "learning_rate": 0.0002491736309642542,
      "loss": 2.8343,
      "step": 127687
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.358924388885498,
      "learning_rate": 0.00024916959954107216,
      "loss": 2.9145,
      "step": 127688
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.4378387928009033,
      "learning_rate": 0.0002491655681273405,
      "loss": 2.9287,
      "step": 127689
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.142080307006836,
      "learning_rate": 0.00024916153672305984,
      "loss": 3.1589,
      "step": 127690
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3207995891571045,
      "learning_rate": 0.00024915750532823104,
      "loss": 2.9283,
      "step": 127691
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.0072154998779297,
      "learning_rate": 0.0002491534739428549,
      "loss": 2.9407,
      "step": 127692
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.459556818008423,
      "learning_rate": 0.000249149442566932,
      "loss": 2.9609,
      "step": 127693
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.7608678340911865,
      "learning_rate": 0.0002491454112004632,
      "loss": 3.0515,
      "step": 127694
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0488343238830566,
      "learning_rate": 0.00024914137984344926,
      "loss": 3.0072,
      "step": 127695
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.7059171199798584,
      "learning_rate": 0.00024913734849589086,
      "loss": 3.1043,
      "step": 127696
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.8444714546203613,
      "learning_rate": 0.00024913331715778876,
      "loss": 3.1445,
      "step": 127697
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.768214702606201,
      "learning_rate": 0.0002491292858291438,
      "loss": 2.8949,
      "step": 127698
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0324933528900146,
      "learning_rate": 0.00024912525450995667,
      "loss": 3.0405,
      "step": 127699
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.944734573364258,
      "learning_rate": 0.00024912122320022804,
      "loss": 2.878,
      "step": 127700
    },
    {
      "epoch": 1.66,
      "grad_norm": 4.942764759063721,
      "learning_rate": 0.00024911719189995885,
      "loss": 2.852,
      "step": 127701
    },
    {
      "epoch": 1.66,
      "grad_norm": 4.468669414520264,
      "learning_rate": 0.0002491131606091496,
      "loss": 2.8889,
      "step": 127702
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0539469718933105,
      "learning_rate": 0.00024910912932780125,
      "loss": 2.7726,
      "step": 127703
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.971391439437866,
      "learning_rate": 0.0002491050980559144,
      "loss": 3.1256,
      "step": 127704
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.944789409637451,
      "learning_rate": 0.00024910106679348995,
      "loss": 2.8182,
      "step": 127705
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.943564534187317,
      "learning_rate": 0.00024909703554052857,
      "loss": 2.9466,
      "step": 127706
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9148622751235962,
      "learning_rate": 0.0002490930042970311,
      "loss": 3.1657,
      "step": 127707
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.819706678390503,
      "learning_rate": 0.00024908897306299803,
      "loss": 2.997,
      "step": 127708
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2729549407958984,
      "learning_rate": 0.0002490849418384304,
      "loss": 2.9113,
      "step": 127709
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8847755193710327,
      "learning_rate": 0.00024908091062332874,
      "loss": 3.0654,
      "step": 127710
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.107841730117798,
      "learning_rate": 0.00024907687941769387,
      "loss": 3.2655,
      "step": 127711
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9109795093536377,
      "learning_rate": 0.0002490728482215267,
      "loss": 3.295,
      "step": 127712
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3198211193084717,
      "learning_rate": 0.0002490688170348278,
      "loss": 2.8096,
      "step": 127713
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.799856424331665,
      "learning_rate": 0.00024906478585759803,
      "loss": 3.1289,
      "step": 127714
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0996720790863037,
      "learning_rate": 0.000249060754689838,
      "loss": 2.9721,
      "step": 127715
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.395059108734131,
      "learning_rate": 0.0002490567235315485,
      "loss": 3.07,
      "step": 127716
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.322404146194458,
      "learning_rate": 0.0002490526923827304,
      "loss": 3.0402,
      "step": 127717
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.964190721511841,
      "learning_rate": 0.0002490486612433843,
      "loss": 3.0405,
      "step": 127718
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2305853366851807,
      "learning_rate": 0.00024904463011351103,
      "loss": 3.0725,
      "step": 127719
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9935941696166992,
      "learning_rate": 0.0002490405989931114,
      "loss": 2.7513,
      "step": 127720
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1314542293548584,
      "learning_rate": 0.00024903656788218605,
      "loss": 3.0872,
      "step": 127721
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.941368818283081,
      "learning_rate": 0.00024903253678073574,
      "loss": 2.9181,
      "step": 127722
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9160003662109375,
      "learning_rate": 0.00024902850568876125,
      "loss": 3.0875,
      "step": 127723
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9456759691238403,
      "learning_rate": 0.0002490244746062633,
      "loss": 2.956,
      "step": 127724
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.984127163887024,
      "learning_rate": 0.00024902044353324266,
      "loss": 2.9595,
      "step": 127725
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3173940181732178,
      "learning_rate": 0.0002490164124697002,
      "loss": 3.0875,
      "step": 127726
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.338172435760498,
      "learning_rate": 0.00024901238141563644,
      "loss": 2.517,
      "step": 127727
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.8134913444519043,
      "learning_rate": 0.0002490083503710522,
      "loss": 2.698,
      "step": 127728
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1628916263580322,
      "learning_rate": 0.00024900431933594835,
      "loss": 3.0437,
      "step": 127729
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1501352787017822,
      "learning_rate": 0.0002490002883103255,
      "loss": 3.2012,
      "step": 127730
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5254597663879395,
      "learning_rate": 0.0002489962572941845,
      "loss": 2.8744,
      "step": 127731
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2891862392425537,
      "learning_rate": 0.00024899222628752617,
      "loss": 3.0307,
      "step": 127732
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.010807514190674,
      "learning_rate": 0.000248988195290351,
      "loss": 3.0642,
      "step": 127733
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.236299991607666,
      "learning_rate": 0.0002489841643026599,
      "loss": 2.9818,
      "step": 127734
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.773759126663208,
      "learning_rate": 0.00024898013332445363,
      "loss": 2.7561,
      "step": 127735
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7878282070159912,
      "learning_rate": 0.00024897610235573287,
      "loss": 2.9084,
      "step": 127736
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9778045415878296,
      "learning_rate": 0.00024897207139649844,
      "loss": 2.8787,
      "step": 127737
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0462095737457275,
      "learning_rate": 0.0002489680404467512,
      "loss": 2.8602,
      "step": 127738
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.257638454437256,
      "learning_rate": 0.00024896400950649163,
      "loss": 2.9479,
      "step": 127739
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.070250988006592,
      "learning_rate": 0.0002489599785757206,
      "loss": 2.9658,
      "step": 127740
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.615182638168335,
      "learning_rate": 0.0002489559476544389,
      "loss": 3.0461,
      "step": 127741
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.044191360473633,
      "learning_rate": 0.00024895191674264726,
      "loss": 2.8633,
      "step": 127742
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5912232398986816,
      "learning_rate": 0.0002489478858403464,
      "loss": 2.8014,
      "step": 127743
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3629512786865234,
      "learning_rate": 0.0002489438549475372,
      "loss": 2.9806,
      "step": 127744
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0993692874908447,
      "learning_rate": 0.0002489398240642202,
      "loss": 2.9699,
      "step": 127745
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8689860105514526,
      "learning_rate": 0.0002489357931903962,
      "loss": 2.892,
      "step": 127746
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.18571400642395,
      "learning_rate": 0.0002489317623260661,
      "loss": 3.0219,
      "step": 127747
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8964661359786987,
      "learning_rate": 0.00024892773147123045,
      "loss": 2.9251,
      "step": 127748
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.264681816101074,
      "learning_rate": 0.00024892370062589016,
      "loss": 2.8819,
      "step": 127749
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.900010585784912,
      "learning_rate": 0.000248919669790046,
      "loss": 3.0006,
      "step": 127750
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8399238586425781,
      "learning_rate": 0.00024891563896369857,
      "loss": 2.9284,
      "step": 127751
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.63996958732605,
      "learning_rate": 0.0002489116081468487,
      "loss": 2.8141,
      "step": 127752
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9382821321487427,
      "learning_rate": 0.00024890757733949705,
      "loss": 2.9117,
      "step": 127753
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8476214408874512,
      "learning_rate": 0.0002489035465416445,
      "loss": 2.8824,
      "step": 127754
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.572352170944214,
      "learning_rate": 0.0002488995157532917,
      "loss": 3.0402,
      "step": 127755
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5141897201538086,
      "learning_rate": 0.00024889548497443963,
      "loss": 2.7864,
      "step": 127756
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8753421306610107,
      "learning_rate": 0.00024889145420508863,
      "loss": 3.2264,
      "step": 127757
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1106204986572266,
      "learning_rate": 0.00024888742344523975,
      "loss": 2.7757,
      "step": 127758
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0324509143829346,
      "learning_rate": 0.0002488833926948937,
      "loss": 2.9361,
      "step": 127759
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.578364133834839,
      "learning_rate": 0.0002488793619540511,
      "loss": 3.1073,
      "step": 127760
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.044004201889038,
      "learning_rate": 0.00024887533122271285,
      "loss": 3.1063,
      "step": 127761
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7073227167129517,
      "learning_rate": 0.0002488713005008797,
      "loss": 2.996,
      "step": 127762
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.6770973205566406,
      "learning_rate": 0.00024886726978855234,
      "loss": 2.9974,
      "step": 127763
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1586413383483887,
      "learning_rate": 0.0002488632390857314,
      "loss": 3.2677,
      "step": 127764
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8091745376586914,
      "learning_rate": 0.0002488592083924178,
      "loss": 2.9969,
      "step": 127765
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2717478275299072,
      "learning_rate": 0.00024885517770861223,
      "loss": 2.9232,
      "step": 127766
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8656201362609863,
      "learning_rate": 0.00024885114703431546,
      "loss": 2.9798,
      "step": 127767
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.136542797088623,
      "learning_rate": 0.0002488471163695283,
      "loss": 3.0853,
      "step": 127768
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2526602745056152,
      "learning_rate": 0.00024884308571425135,
      "loss": 2.9276,
      "step": 127769
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2004482746124268,
      "learning_rate": 0.00024883905506848546,
      "loss": 2.9446,
      "step": 127770
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5646634101867676,
      "learning_rate": 0.00024883502443223134,
      "loss": 2.9815,
      "step": 127771
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1260714530944824,
      "learning_rate": 0.00024883099380548974,
      "loss": 3.0422,
      "step": 127772
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8132703304290771,
      "learning_rate": 0.0002488269631882614,
      "loss": 2.8317,
      "step": 127773
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7319391965866089,
      "learning_rate": 0.0002488229325805472,
      "loss": 2.9637,
      "step": 127774
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0858242511749268,
      "learning_rate": 0.0002488189019823477,
      "loss": 3.0213,
      "step": 127775
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.235570192337036,
      "learning_rate": 0.00024881487139366375,
      "loss": 2.9451,
      "step": 127776
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3080286979675293,
      "learning_rate": 0.0002488108408144961,
      "loss": 3.0123,
      "step": 127777
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9641342163085938,
      "learning_rate": 0.00024880681024484546,
      "loss": 2.9211,
      "step": 127778
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.616989254951477,
      "learning_rate": 0.0002488027796847126,
      "loss": 2.9529,
      "step": 127779
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.877201557159424,
      "learning_rate": 0.00024879874913409823,
      "loss": 2.9368,
      "step": 127780
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.55108904838562,
      "learning_rate": 0.00024879471859300323,
      "loss": 3.0993,
      "step": 127781
    },
    {
      "epoch": 1.66,
      "grad_norm": 4.955845832824707,
      "learning_rate": 0.0002487906880614282,
      "loss": 2.8521,
      "step": 127782
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.092108964920044,
      "learning_rate": 0.00024878665753937394,
      "loss": 2.9412,
      "step": 127783
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.399625062942505,
      "learning_rate": 0.0002487826270268413,
      "loss": 3.0041,
      "step": 127784
    },
    {
      "epoch": 1.66,
      "grad_norm": 4.670199871063232,
      "learning_rate": 0.00024877859652383083,
      "loss": 2.9474,
      "step": 127785
    },
    {
      "epoch": 1.66,
      "grad_norm": 4.876738548278809,
      "learning_rate": 0.00024877456603034336,
      "loss": 3.022,
      "step": 127786
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.521679639816284,
      "learning_rate": 0.0002487705355463798,
      "loss": 2.9541,
      "step": 127787
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.069399356842041,
      "learning_rate": 0.0002487665050719407,
      "loss": 2.9525,
      "step": 127788
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.4472811222076416,
      "learning_rate": 0.0002487624746070268,
      "loss": 3.0145,
      "step": 127789
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.946631669998169,
      "learning_rate": 0.00024875844415163894,
      "loss": 3.086,
      "step": 127790
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.806567907333374,
      "learning_rate": 0.000248754413705778,
      "loss": 3.0682,
      "step": 127791
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0925490856170654,
      "learning_rate": 0.0002487503832694444,
      "loss": 3.149,
      "step": 127792
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1435890197753906,
      "learning_rate": 0.0002487463528426392,
      "loss": 2.9217,
      "step": 127793
    },
    {
      "epoch": 1.66,
      "grad_norm": 4.334035396575928,
      "learning_rate": 0.000248742322425363,
      "loss": 2.8951,
      "step": 127794
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.02310848236084,
      "learning_rate": 0.0002487382920176165,
      "loss": 2.9357,
      "step": 127795
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7935361862182617,
      "learning_rate": 0.00024873426161940054,
      "loss": 3.1725,
      "step": 127796
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2277023792266846,
      "learning_rate": 0.00024873023123071585,
      "loss": 2.8087,
      "step": 127797
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.050238847732544,
      "learning_rate": 0.0002487262008515632,
      "loss": 2.9992,
      "step": 127798
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.935472249984741,
      "learning_rate": 0.0002487221704819434,
      "loss": 2.8255,
      "step": 127799
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5850343704223633,
      "learning_rate": 0.000248718140121857,
      "loss": 3.1591,
      "step": 127800
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.058366060256958,
      "learning_rate": 0.00024871410977130487,
      "loss": 2.8728,
      "step": 127801
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0495212078094482,
      "learning_rate": 0.0002487100794302878,
      "loss": 3.1443,
      "step": 127802
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.6428029537200928,
      "learning_rate": 0.0002487060490988064,
      "loss": 2.8589,
      "step": 127803
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3626911640167236,
      "learning_rate": 0.0002487020187768616,
      "loss": 2.7766,
      "step": 127804
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0991246700286865,
      "learning_rate": 0.00024869798846445415,
      "loss": 2.8479,
      "step": 127805
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1491668224334717,
      "learning_rate": 0.0002486939581615846,
      "loss": 3.1409,
      "step": 127806
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.777686834335327,
      "learning_rate": 0.00024868992786825376,
      "loss": 3.1482,
      "step": 127807
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.6085286140441895,
      "learning_rate": 0.00024868589758446255,
      "loss": 2.9384,
      "step": 127808
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.350188732147217,
      "learning_rate": 0.0002486818673102115,
      "loss": 3.0161,
      "step": 127809
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2889366149902344,
      "learning_rate": 0.0002486778370455015,
      "loss": 2.9095,
      "step": 127810
    },
    {
      "epoch": 1.66,
      "grad_norm": 4.9846906661987305,
      "learning_rate": 0.00024867380679033336,
      "loss": 2.9218,
      "step": 127811
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.4099295139312744,
      "learning_rate": 0.00024866977654470763,
      "loss": 2.8286,
      "step": 127812
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1159727573394775,
      "learning_rate": 0.00024866574630862516,
      "loss": 2.9702,
      "step": 127813
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.331115961074829,
      "learning_rate": 0.00024866171608208665,
      "loss": 3.1735,
      "step": 127814
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.6716713905334473,
      "learning_rate": 0.0002486576858650929,
      "loss": 3.1501,
      "step": 127815
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.800344228744507,
      "learning_rate": 0.0002486536556576447,
      "loss": 3.2415,
      "step": 127816
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.85005521774292,
      "learning_rate": 0.00024864962545974285,
      "loss": 2.892,
      "step": 127817
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.6927013397216797,
      "learning_rate": 0.00024864559527138793,
      "loss": 3.0251,
      "step": 127818
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.001264810562134,
      "learning_rate": 0.0002486415650925807,
      "loss": 2.8797,
      "step": 127819
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.738579273223877,
      "learning_rate": 0.000248637534923322,
      "loss": 3.2378,
      "step": 127820
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.6966235637664795,
      "learning_rate": 0.00024863350476361253,
      "loss": 3.0669,
      "step": 127821
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8286250829696655,
      "learning_rate": 0.0002486294746134531,
      "loss": 2.899,
      "step": 127822
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.2333669662475586,
      "learning_rate": 0.00024862544447284453,
      "loss": 2.8881,
      "step": 127823
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.050283432006836,
      "learning_rate": 0.0002486214143417873,
      "loss": 3.1593,
      "step": 127824
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.593759298324585,
      "learning_rate": 0.00024861738422028235,
      "loss": 2.9963,
      "step": 127825
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.229034900665283,
      "learning_rate": 0.0002486133541083304,
      "loss": 3.0271,
      "step": 127826
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.983047604560852,
      "learning_rate": 0.00024860932400593217,
      "loss": 3.1589,
      "step": 127827
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.019671678543091,
      "learning_rate": 0.00024860529391308847,
      "loss": 2.8626,
      "step": 127828
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5301315784454346,
      "learning_rate": 0.00024860126382980017,
      "loss": 2.8398,
      "step": 127829
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.928282618522644,
      "learning_rate": 0.00024859723375606765,
      "loss": 2.9209,
      "step": 127830
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5603694915771484,
      "learning_rate": 0.00024859320369189194,
      "loss": 3.027,
      "step": 127831
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7740932703018188,
      "learning_rate": 0.00024858917363727374,
      "loss": 3.0322,
      "step": 127832
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9502991437911987,
      "learning_rate": 0.00024858514359221375,
      "loss": 3.119,
      "step": 127833
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9230533838272095,
      "learning_rate": 0.00024858111355671275,
      "loss": 2.9048,
      "step": 127834
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.867488980293274,
      "learning_rate": 0.0002485770835307716,
      "loss": 3.0041,
      "step": 127835
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.217668294906616,
      "learning_rate": 0.00024857305351439087,
      "loss": 2.6633,
      "step": 127836
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8884766101837158,
      "learning_rate": 0.00024856902350757135,
      "loss": 2.8239,
      "step": 127837
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9975543022155762,
      "learning_rate": 0.0002485649935103138,
      "loss": 2.7808,
      "step": 127838
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0089175701141357,
      "learning_rate": 0.000248560963522619,
      "loss": 2.9965,
      "step": 127839
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0325839519500732,
      "learning_rate": 0.0002485569335444877,
      "loss": 2.8234,
      "step": 127840
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8498047590255737,
      "learning_rate": 0.00024855290357592077,
      "loss": 2.9643,
      "step": 127841
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0206074714660645,
      "learning_rate": 0.0002485488736169186,
      "loss": 3.0277,
      "step": 127842
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9362722635269165,
      "learning_rate": 0.0002485448436674823,
      "loss": 3.0007,
      "step": 127843
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8462731838226318,
      "learning_rate": 0.0002485408137276124,
      "loss": 2.9495,
      "step": 127844
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5348942279815674,
      "learning_rate": 0.0002485367837973098,
      "loss": 2.9771,
      "step": 127845
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9316250085830688,
      "learning_rate": 0.00024853275387657514,
      "loss": 3.0014,
      "step": 127846
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8202763795852661,
      "learning_rate": 0.00024852872396540924,
      "loss": 3.1533,
      "step": 127847
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.4382944107055664,
      "learning_rate": 0.00024852469406381295,
      "loss": 3.0316,
      "step": 127848
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.1001627445220947,
      "learning_rate": 0.00024852066417178675,
      "loss": 3.0207,
      "step": 127849
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2184526920318604,
      "learning_rate": 0.0002485166342893315,
      "loss": 3.2475,
      "step": 127850
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.314340591430664,
      "learning_rate": 0.00024851260441644804,
      "loss": 3.2154,
      "step": 127851
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.7261431217193604,
      "learning_rate": 0.00024850857455313704,
      "loss": 2.9796,
      "step": 127852
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9428911209106445,
      "learning_rate": 0.0002485045446993992,
      "loss": 3.0647,
      "step": 127853
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.112684726715088,
      "learning_rate": 0.00024850051485523556,
      "loss": 3.0259,
      "step": 127854
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.2274281978607178,
      "learning_rate": 0.0002484964850206465,
      "loss": 2.934,
      "step": 127855
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.405608892440796,
      "learning_rate": 0.0002484924551956329,
      "loss": 2.7506,
      "step": 127856
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.438481569290161,
      "learning_rate": 0.00024848842538019553,
      "loss": 3.2131,
      "step": 127857
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8676236867904663,
      "learning_rate": 0.0002484843955743351,
      "loss": 3.1643,
      "step": 127858
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5279409885406494,
      "learning_rate": 0.0002484803657780524,
      "loss": 3.028,
      "step": 127859
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.2485580444335938,
      "learning_rate": 0.00024847633599134827,
      "loss": 2.8969,
      "step": 127860
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.0701944828033447,
      "learning_rate": 0.0002484723062142233,
      "loss": 3.143,
      "step": 127861
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.5719664096832275,
      "learning_rate": 0.00024846827644667833,
      "loss": 2.8102,
      "step": 127862
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.9895533323287964,
      "learning_rate": 0.000248464246688714,
      "loss": 2.971,
      "step": 127863
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.8879477977752686,
      "learning_rate": 0.0002484602169403312,
      "loss": 2.9272,
      "step": 127864
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8946444988250732,
      "learning_rate": 0.00024845618720153057,
      "loss": 2.8145,
      "step": 127865
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.516718864440918,
      "learning_rate": 0.000248452157472313,
      "loss": 3.2262,
      "step": 127866
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0795364379882812,
      "learning_rate": 0.00024844812775267906,
      "loss": 3.0186,
      "step": 127867
    },
    {
      "epoch": 1.66,
      "grad_norm": 1.8138489723205566,
      "learning_rate": 0.00024844409804262963,
      "loss": 2.7885,
      "step": 127868
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.759369373321533,
      "learning_rate": 0.0002484400683421655,
      "loss": 2.8398,
      "step": 127869
    },
    {
      "epoch": 1.66,
      "grad_norm": 3.5504305362701416,
      "learning_rate": 0.00024843603865128713,
      "loss": 2.9348,
      "step": 127870
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.3119914531707764,
      "learning_rate": 0.00024843200896999557,
      "loss": 2.9823,
      "step": 127871
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.0873262882232666,
      "learning_rate": 0.00024842797929829156,
      "loss": 3.2788,
      "step": 127872
    },
    {
      "epoch": 1.66,
      "grad_norm": 2.217907190322876,
      "learning_rate": 0.0002484239496361757,
      "loss": 2.8474,
      "step": 127873
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3491132259368896,
      "learning_rate": 0.0002484199199836487,
      "loss": 3.1759,
      "step": 127874
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.073498249053955,
      "learning_rate": 0.0002484158903407115,
      "loss": 3.2392,
      "step": 127875
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3221230506896973,
      "learning_rate": 0.00024841186070736484,
      "loss": 2.8762,
      "step": 127876
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.89693284034729,
      "learning_rate": 0.0002484078310836093,
      "loss": 3.0037,
      "step": 127877
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.5823166370391846,
      "learning_rate": 0.0002484038014694458,
      "loss": 2.9871,
      "step": 127878
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1894304752349854,
      "learning_rate": 0.00024839977186487486,
      "loss": 3.1791,
      "step": 127879
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9510798454284668,
      "learning_rate": 0.00024839574226989745,
      "loss": 3.1139,
      "step": 127880
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.1027092933654785,
      "learning_rate": 0.0002483917126845142,
      "loss": 2.9026,
      "step": 127881
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.1084721088409424,
      "learning_rate": 0.0002483876831087259,
      "loss": 3.0033,
      "step": 127882
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9714770317077637,
      "learning_rate": 0.00024838365354253343,
      "loss": 3.0994,
      "step": 127883
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.129507303237915,
      "learning_rate": 0.00024837962398593737,
      "loss": 2.9825,
      "step": 127884
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.5122194290161133,
      "learning_rate": 0.0002483755944389385,
      "loss": 2.7197,
      "step": 127885
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.303417205810547,
      "learning_rate": 0.00024837156490153755,
      "loss": 3.1365,
      "step": 127886
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.716570258140564,
      "learning_rate": 0.00024836753537373524,
      "loss": 2.7852,
      "step": 127887
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.902398943901062,
      "learning_rate": 0.00024836350585553246,
      "loss": 3.227,
      "step": 127888
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9025388956069946,
      "learning_rate": 0.00024835947634692986,
      "loss": 2.9148,
      "step": 127889
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.006993532180786,
      "learning_rate": 0.0002483554468479283,
      "loss": 2.9178,
      "step": 127890
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8935469388961792,
      "learning_rate": 0.0002483514173585284,
      "loss": 2.782,
      "step": 127891
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.8900041580200195,
      "learning_rate": 0.00024834738787873087,
      "loss": 3.0987,
      "step": 127892
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.270427703857422,
      "learning_rate": 0.00024834335840853656,
      "loss": 2.6851,
      "step": 127893
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4634697437286377,
      "learning_rate": 0.0002483393289479462,
      "loss": 2.7458,
      "step": 127894
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0359840393066406,
      "learning_rate": 0.00024833529949696046,
      "loss": 2.9651,
      "step": 127895
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7084898948669434,
      "learning_rate": 0.0002483312700555804,
      "loss": 3.0955,
      "step": 127896
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.746717691421509,
      "learning_rate": 0.0002483272406238063,
      "loss": 2.8232,
      "step": 127897
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.329097032546997,
      "learning_rate": 0.0002483232112016392,
      "loss": 2.8906,
      "step": 127898
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.179086923599243,
      "learning_rate": 0.00024831918178907977,
      "loss": 2.7465,
      "step": 127899
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.8324029445648193,
      "learning_rate": 0.0002483151523861288,
      "loss": 2.753,
      "step": 127900
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.2453577518463135,
      "learning_rate": 0.00024831112299278704,
      "loss": 2.902,
      "step": 127901
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9335918426513672,
      "learning_rate": 0.0002483070936090553,
      "loss": 3.0728,
      "step": 127902
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.002302408218384,
      "learning_rate": 0.00024830306423493417,
      "loss": 3.0068,
      "step": 127903
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.134145975112915,
      "learning_rate": 0.0002482990348704245,
      "loss": 2.9077,
      "step": 127904
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.429605007171631,
      "learning_rate": 0.00024829500551552694,
      "loss": 2.9159,
      "step": 127905
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.6865284442901611,
      "learning_rate": 0.0002482909761702423,
      "loss": 3.0346,
      "step": 127906
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.0364184379577637,
      "learning_rate": 0.0002482869468345714,
      "loss": 2.8565,
      "step": 127907
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.663449764251709,
      "learning_rate": 0.000248282917508515,
      "loss": 3.0647,
      "step": 127908
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4350991249084473,
      "learning_rate": 0.0002482788881920737,
      "loss": 2.9976,
      "step": 127909
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.091968536376953,
      "learning_rate": 0.00024827485888524834,
      "loss": 2.9352,
      "step": 127910
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.359762668609619,
      "learning_rate": 0.00024827082958803963,
      "loss": 2.6692,
      "step": 127911
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.093050241470337,
      "learning_rate": 0.0002482668003004484,
      "loss": 3.0704,
      "step": 127912
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.122699022293091,
      "learning_rate": 0.0002482627710224753,
      "loss": 3.1503,
      "step": 127913
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.414479970932007,
      "learning_rate": 0.00024825874175412113,
      "loss": 3.1389,
      "step": 127914
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.292738676071167,
      "learning_rate": 0.0002482547124953868,
      "loss": 2.7129,
      "step": 127915
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.062930107116699,
      "learning_rate": 0.0002482506832462727,
      "loss": 2.8732,
      "step": 127916
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.268409013748169,
      "learning_rate": 0.00024824665400677987,
      "loss": 3.0461,
      "step": 127917
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3328559398651123,
      "learning_rate": 0.0002482426247769089,
      "loss": 2.7337,
      "step": 127918
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.5001087188720703,
      "learning_rate": 0.0002482385955566606,
      "loss": 2.8579,
      "step": 127919
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.417999029159546,
      "learning_rate": 0.0002482345663460357,
      "loss": 2.9872,
      "step": 127920
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.44657564163208,
      "learning_rate": 0.00024823053714503516,
      "loss": 2.9667,
      "step": 127921
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.305569648742676,
      "learning_rate": 0.0002482265079536594,
      "loss": 3.1188,
      "step": 127922
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.6966397762298584,
      "learning_rate": 0.0002482224787719093,
      "loss": 2.8041,
      "step": 127923
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.6208927631378174,
      "learning_rate": 0.0002482184495997856,
      "loss": 2.9572,
      "step": 127924
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0243477821350098,
      "learning_rate": 0.0002482144204372891,
      "loss": 2.9609,
      "step": 127925
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.8252999782562256,
      "learning_rate": 0.0002482103912844205,
      "loss": 3.2451,
      "step": 127926
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.714416027069092,
      "learning_rate": 0.0002482063621411807,
      "loss": 2.8658,
      "step": 127927
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.0895402431488037,
      "learning_rate": 0.00024820233300757013,
      "loss": 2.7996,
      "step": 127928
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.265925645828247,
      "learning_rate": 0.0002481983038835898,
      "loss": 2.8244,
      "step": 127929
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.0486605167388916,
      "learning_rate": 0.0002481942747692404,
      "loss": 2.7989,
      "step": 127930
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.8172965049743652,
      "learning_rate": 0.0002481902456645226,
      "loss": 2.7801,
      "step": 127931
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.045531749725342,
      "learning_rate": 0.00024818621656943725,
      "loss": 2.8277,
      "step": 127932
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.050278902053833,
      "learning_rate": 0.00024818218748398515,
      "loss": 3.0861,
      "step": 127933
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7142274379730225,
      "learning_rate": 0.00024817815840816684,
      "loss": 3.1161,
      "step": 127934
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.786548137664795,
      "learning_rate": 0.0002481741293419832,
      "loss": 3.0549,
      "step": 127935
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.462714910507202,
      "learning_rate": 0.000248170100285435,
      "loss": 2.9862,
      "step": 127936
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1598896980285645,
      "learning_rate": 0.0002481660712385229,
      "loss": 2.8276,
      "step": 127937
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.222593069076538,
      "learning_rate": 0.0002481620422012477,
      "loss": 3.1611,
      "step": 127938
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.1827545166015625,
      "learning_rate": 0.0002481580131736104,
      "loss": 2.9445,
      "step": 127939
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.7777353525161743,
      "learning_rate": 0.00024815398415561126,
      "loss": 2.9526,
      "step": 127940
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.3325726985931396,
      "learning_rate": 0.0002481499551472513,
      "loss": 2.8615,
      "step": 127941
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9588836431503296,
      "learning_rate": 0.0002481459261485312,
      "loss": 2.952,
      "step": 127942
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.336568832397461,
      "learning_rate": 0.0002481418971594518,
      "loss": 3.1881,
      "step": 127943
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.9574778079986572,
      "learning_rate": 0.00024813786818001383,
      "loss": 2.965,
      "step": 127944
    },
    {
      "epoch": 1.67,
      "grad_norm": 5.376585483551025,
      "learning_rate": 0.000248133839210218,
      "loss": 2.8743,
      "step": 127945
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.284563064575195,
      "learning_rate": 0.00024812981025006516,
      "loss": 3.1019,
      "step": 127946
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9562327861785889,
      "learning_rate": 0.00024812578129955584,
      "loss": 2.9672,
      "step": 127947
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.553643226623535,
      "learning_rate": 0.00024812175235869094,
      "loss": 3.0645,
      "step": 127948
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.528365135192871,
      "learning_rate": 0.00024811772342747115,
      "loss": 2.9834,
      "step": 127949
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.016119003295898,
      "learning_rate": 0.0002481136945058973,
      "loss": 3.1024,
      "step": 127950
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.151501417160034,
      "learning_rate": 0.0002481096655939701,
      "loss": 2.976,
      "step": 127951
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8257689476013184,
      "learning_rate": 0.0002481056366916903,
      "loss": 3.0754,
      "step": 127952
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.5673062801361084,
      "learning_rate": 0.00024810160779905867,
      "loss": 3.0223,
      "step": 127953
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.086452007293701,
      "learning_rate": 0.0002480975789160758,
      "loss": 2.8803,
      "step": 127954
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.461557149887085,
      "learning_rate": 0.00024809355004274265,
      "loss": 2.93,
      "step": 127955
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8767282962799072,
      "learning_rate": 0.0002480895211790599,
      "loss": 2.7035,
      "step": 127956
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.8640549182891846,
      "learning_rate": 0.00024808549232502834,
      "loss": 2.8242,
      "step": 127957
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.775771141052246,
      "learning_rate": 0.0002480814634806486,
      "loss": 2.8437,
      "step": 127958
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.4718289375305176,
      "learning_rate": 0.00024807743464592144,
      "loss": 2.8728,
      "step": 127959
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.6766319274902344,
      "learning_rate": 0.0002480734058208477,
      "loss": 3.0157,
      "step": 127960
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2155885696411133,
      "learning_rate": 0.0002480693770054282,
      "loss": 3.0,
      "step": 127961
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9973607063293457,
      "learning_rate": 0.0002480653481996635,
      "loss": 2.7262,
      "step": 127962
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.154729127883911,
      "learning_rate": 0.0002480613194035545,
      "loss": 2.9582,
      "step": 127963
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.704993724822998,
      "learning_rate": 0.00024805729061710177,
      "loss": 3.0833,
      "step": 127964
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.57542610168457,
      "learning_rate": 0.00024805326184030625,
      "loss": 2.8247,
      "step": 127965
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.249521017074585,
      "learning_rate": 0.00024804923307316854,
      "loss": 2.9242,
      "step": 127966
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9887261390686035,
      "learning_rate": 0.00024804520431568944,
      "loss": 3.0141,
      "step": 127967
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.9768824577331543,
      "learning_rate": 0.0002480411755678698,
      "loss": 2.893,
      "step": 127968
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.2513155937194824,
      "learning_rate": 0.00024803714682971033,
      "loss": 2.8308,
      "step": 127969
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.5333940982818604,
      "learning_rate": 0.0002480331181012117,
      "loss": 3.0801,
      "step": 127970
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0112197399139404,
      "learning_rate": 0.0002480290893823746,
      "loss": 2.9172,
      "step": 127971
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.11287260055542,
      "learning_rate": 0.00024802506067319994,
      "loss": 2.8772,
      "step": 127972
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.796536922454834,
      "learning_rate": 0.00024802103197368836,
      "loss": 2.9451,
      "step": 127973
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.9705584049224854,
      "learning_rate": 0.00024801700328384065,
      "loss": 3.1258,
      "step": 127974
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.848996639251709,
      "learning_rate": 0.00024801297460365774,
      "loss": 3.0548,
      "step": 127975
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0816900730133057,
      "learning_rate": 0.00024800894593314,
      "loss": 2.9221,
      "step": 127976
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9713512659072876,
      "learning_rate": 0.0002480049172722884,
      "loss": 3.1164,
      "step": 127977
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0955862998962402,
      "learning_rate": 0.0002480008886211037,
      "loss": 3.0867,
      "step": 127978
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.221682071685791,
      "learning_rate": 0.0002479968599795866,
      "loss": 2.8184,
      "step": 127979
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.098569631576538,
      "learning_rate": 0.00024799283134773793,
      "loss": 3.2045,
      "step": 127980
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0528221130371094,
      "learning_rate": 0.00024798880272555826,
      "loss": 3.0887,
      "step": 127981
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2532362937927246,
      "learning_rate": 0.00024798477411304865,
      "loss": 3.0606,
      "step": 127982
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0890040397644043,
      "learning_rate": 0.00024798074551020947,
      "loss": 3.0822,
      "step": 127983
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9603055715560913,
      "learning_rate": 0.0002479767169170417,
      "loss": 2.8748,
      "step": 127984
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9159085750579834,
      "learning_rate": 0.00024797268833354604,
      "loss": 2.871,
      "step": 127985
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.7226570844650269,
      "learning_rate": 0.0002479686597597232,
      "loss": 3.0646,
      "step": 127986
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.477137327194214,
      "learning_rate": 0.00024796463119557404,
      "loss": 2.8546,
      "step": 127987
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.164429187774658,
      "learning_rate": 0.0002479606026410993,
      "loss": 3.2847,
      "step": 127988
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7568137645721436,
      "learning_rate": 0.00024795657409629955,
      "loss": 2.7985,
      "step": 127989
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.8846473693847656,
      "learning_rate": 0.0002479525455611757,
      "loss": 2.9421,
      "step": 127990
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9710803031921387,
      "learning_rate": 0.0002479485170357284,
      "loss": 3.1573,
      "step": 127991
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1598570346832275,
      "learning_rate": 0.0002479444885199585,
      "loss": 2.7439,
      "step": 127992
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0975372791290283,
      "learning_rate": 0.00024794046001386667,
      "loss": 2.876,
      "step": 127993
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9484868049621582,
      "learning_rate": 0.0002479364315174538,
      "loss": 3.068,
      "step": 127994
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.157879590988159,
      "learning_rate": 0.0002479324030307204,
      "loss": 3.2149,
      "step": 127995
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2320783138275146,
      "learning_rate": 0.0002479283745536674,
      "loss": 2.9756,
      "step": 127996
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2315409183502197,
      "learning_rate": 0.0002479243460862955,
      "loss": 2.9798,
      "step": 127997
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.6481248140335083,
      "learning_rate": 0.0002479203176286054,
      "loss": 2.7445,
      "step": 127998
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.002707004547119,
      "learning_rate": 0.0002479162891805979,
      "loss": 2.788,
      "step": 127999
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8839876651763916,
      "learning_rate": 0.0002479122607422739,
      "loss": 3.0638,
      "step": 128000
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9482765197753906,
      "learning_rate": 0.00024790823231363385,
      "loss": 2.783,
      "step": 128001
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9530792236328125,
      "learning_rate": 0.00024790420389467866,
      "loss": 3.0982,
      "step": 128002
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.1627964973449707,
      "learning_rate": 0.00024790017548540904,
      "loss": 2.8292,
      "step": 128003
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.929853916168213,
      "learning_rate": 0.0002478961470858257,
      "loss": 3.2314,
      "step": 128004
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1106302738189697,
      "learning_rate": 0.0002478921186959296,
      "loss": 3.1872,
      "step": 128005
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0071897506713867,
      "learning_rate": 0.00024788809031572136,
      "loss": 2.8277,
      "step": 128006
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.0962302684783936,
      "learning_rate": 0.0002478840619452016,
      "loss": 2.8318,
      "step": 128007
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2164413928985596,
      "learning_rate": 0.0002478800335843712,
      "loss": 3.1154,
      "step": 128008
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3006293773651123,
      "learning_rate": 0.0002478760052332309,
      "loss": 2.8907,
      "step": 128009
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.27470588684082,
      "learning_rate": 0.0002478719768917814,
      "loss": 2.8809,
      "step": 128010
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.838914155960083,
      "learning_rate": 0.0002478679485600235,
      "loss": 3.0983,
      "step": 128011
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.6250736713409424,
      "learning_rate": 0.000247863920237958,
      "loss": 2.9706,
      "step": 128012
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4273922443389893,
      "learning_rate": 0.0002478598919255855,
      "loss": 3.0032,
      "step": 128013
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.29754900932312,
      "learning_rate": 0.00024785586362290686,
      "loss": 2.9799,
      "step": 128014
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9473751783370972,
      "learning_rate": 0.00024785183532992274,
      "loss": 2.9744,
      "step": 128015
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9313082695007324,
      "learning_rate": 0.000247847807046634,
      "loss": 3.0968,
      "step": 128016
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.298327922821045,
      "learning_rate": 0.0002478437787730413,
      "loss": 3.211,
      "step": 128017
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3864989280700684,
      "learning_rate": 0.0002478397505091456,
      "loss": 2.8728,
      "step": 128018
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.284532070159912,
      "learning_rate": 0.00024783572225494727,
      "loss": 2.7496,
      "step": 128019
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8458846807479858,
      "learning_rate": 0.0002478316940104473,
      "loss": 3.0939,
      "step": 128020
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2064223289489746,
      "learning_rate": 0.00024782766577564647,
      "loss": 2.9494,
      "step": 128021
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3006694316864014,
      "learning_rate": 0.0002478236375505454,
      "loss": 2.7361,
      "step": 128022
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.028526782989502,
      "learning_rate": 0.0002478196093351449,
      "loss": 3.0883,
      "step": 128023
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8143341541290283,
      "learning_rate": 0.00024781558112944586,
      "loss": 3.0882,
      "step": 128024
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.9759838581085205,
      "learning_rate": 0.0002478115529334487,
      "loss": 3.0512,
      "step": 128025
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1082727909088135,
      "learning_rate": 0.00024780752474715443,
      "loss": 3.0673,
      "step": 128026
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1493592262268066,
      "learning_rate": 0.0002478034965705637,
      "loss": 2.954,
      "step": 128027
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.616962194442749,
      "learning_rate": 0.0002477994684036773,
      "loss": 2.8592,
      "step": 128028
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.1777007579803467,
      "learning_rate": 0.000247795440246496,
      "loss": 3.073,
      "step": 128029
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.3196544647216797,
      "learning_rate": 0.0002477914120990205,
      "loss": 3.0028,
      "step": 128030
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.062718629837036,
      "learning_rate": 0.0002477873839612516,
      "loss": 3.0238,
      "step": 128031
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.058501720428467,
      "learning_rate": 0.0002477833558331899,
      "loss": 2.975,
      "step": 128032
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.7995272874832153,
      "learning_rate": 0.00024777932771483633,
      "loss": 3.0583,
      "step": 128033
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.7944754362106323,
      "learning_rate": 0.0002477752996061916,
      "loss": 2.9654,
      "step": 128034
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.7872719764709473,
      "learning_rate": 0.00024777127150725633,
      "loss": 3.0845,
      "step": 128035
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.048529624938965,
      "learning_rate": 0.0002477672434180315,
      "loss": 2.8824,
      "step": 128036
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1100001335144043,
      "learning_rate": 0.0002477632153385176,
      "loss": 3.0291,
      "step": 128037
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0314972400665283,
      "learning_rate": 0.0002477591872687156,
      "loss": 3.0077,
      "step": 128038
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.229862689971924,
      "learning_rate": 0.0002477551592086261,
      "loss": 2.7765,
      "step": 128039
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.9616599082946777,
      "learning_rate": 0.0002477511311582499,
      "loss": 2.9107,
      "step": 128040
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8891233205795288,
      "learning_rate": 0.0002477471031175877,
      "loss": 2.8941,
      "step": 128041
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9932388067245483,
      "learning_rate": 0.00024774307508664044,
      "loss": 3.0838,
      "step": 128042
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.6674931049346924,
      "learning_rate": 0.0002477390470654086,
      "loss": 3.0012,
      "step": 128043
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2663707733154297,
      "learning_rate": 0.0002477350190538931,
      "loss": 3.0494,
      "step": 128044
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.7853628396987915,
      "learning_rate": 0.0002477309910520947,
      "loss": 3.0372,
      "step": 128045
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.425121307373047,
      "learning_rate": 0.00024772696306001404,
      "loss": 3.0277,
      "step": 128046
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9524154663085938,
      "learning_rate": 0.00024772293507765194,
      "loss": 3.1157,
      "step": 128047
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0323543548583984,
      "learning_rate": 0.00024771890710500907,
      "loss": 3.1336,
      "step": 128048
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7729694843292236,
      "learning_rate": 0.00024771487914208634,
      "loss": 3.0371,
      "step": 128049
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9104723930358887,
      "learning_rate": 0.00024771085118888435,
      "loss": 3.1248,
      "step": 128050
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4466819763183594,
      "learning_rate": 0.0002477068232454039,
      "loss": 3.0249,
      "step": 128051
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.030673027038574,
      "learning_rate": 0.00024770279531164573,
      "loss": 2.9126,
      "step": 128052
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9155610799789429,
      "learning_rate": 0.0002476987673876107,
      "loss": 2.9953,
      "step": 128053
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.170267343521118,
      "learning_rate": 0.0002476947394732993,
      "loss": 2.8003,
      "step": 128054
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2411863803863525,
      "learning_rate": 0.00024769071156871254,
      "loss": 3.0763,
      "step": 128055
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0022151470184326,
      "learning_rate": 0.00024768668367385104,
      "loss": 3.1208,
      "step": 128056
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.946292519569397,
      "learning_rate": 0.00024768265578871553,
      "loss": 2.9026,
      "step": 128057
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0728249549865723,
      "learning_rate": 0.00024767862791330684,
      "loss": 3.0038,
      "step": 128058
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9614126682281494,
      "learning_rate": 0.0002476746000476256,
      "loss": 2.9908,
      "step": 128059
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.513969898223877,
      "learning_rate": 0.0002476705721916728,
      "loss": 2.6954,
      "step": 128060
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.097954273223877,
      "learning_rate": 0.000247666544345449,
      "loss": 2.9587,
      "step": 128061
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.075424909591675,
      "learning_rate": 0.0002476625165089549,
      "loss": 3.0544,
      "step": 128062
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.5527822971343994,
      "learning_rate": 0.00024765848868219133,
      "loss": 2.8438,
      "step": 128063
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.8150923252105713,
      "learning_rate": 0.000247654460865159,
      "loss": 2.7924,
      "step": 128064
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4889822006225586,
      "learning_rate": 0.00024765043305785875,
      "loss": 2.9211,
      "step": 128065
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.7891509532928467,
      "learning_rate": 0.0002476464052602912,
      "loss": 3.2505,
      "step": 128066
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2827348709106445,
      "learning_rate": 0.0002476423774724574,
      "loss": 3.07,
      "step": 128067
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.1432487964630127,
      "learning_rate": 0.00024763834969435773,
      "loss": 2.9911,
      "step": 128068
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.5240639448165894,
      "learning_rate": 0.000247634321925993,
      "loss": 3.1434,
      "step": 128069
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.7820028066635132,
      "learning_rate": 0.00024763029416736406,
      "loss": 3.0551,
      "step": 128070
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3781394958496094,
      "learning_rate": 0.0002476262664184717,
      "loss": 2.7522,
      "step": 128071
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.916375160217285,
      "learning_rate": 0.00024762223867931657,
      "loss": 2.9573,
      "step": 128072
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2322757244110107,
      "learning_rate": 0.00024761821094989954,
      "loss": 2.8451,
      "step": 128073
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.459794282913208,
      "learning_rate": 0.00024761418323022115,
      "loss": 3.0075,
      "step": 128074
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.974830389022827,
      "learning_rate": 0.00024761015552028236,
      "loss": 2.7034,
      "step": 128075
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.951754093170166,
      "learning_rate": 0.00024760612782008377,
      "loss": 2.8754,
      "step": 128076
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0476772785186768,
      "learning_rate": 0.0002476021001296262,
      "loss": 3.1441,
      "step": 128077
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9214271306991577,
      "learning_rate": 0.00024759807244891034,
      "loss": 2.9674,
      "step": 128078
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.5354700088500977,
      "learning_rate": 0.0002475940447779372,
      "loss": 2.9002,
      "step": 128079
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.975721836090088,
      "learning_rate": 0.00024759001711670715,
      "loss": 2.762,
      "step": 128080
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.112201452255249,
      "learning_rate": 0.00024758598946522106,
      "loss": 3.0965,
      "step": 128081
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8057976961135864,
      "learning_rate": 0.0002475819618234798,
      "loss": 2.8825,
      "step": 128082
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.786285400390625,
      "learning_rate": 0.00024757793419148395,
      "loss": 2.8295,
      "step": 128083
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.6077170372009277,
      "learning_rate": 0.00024757390656923446,
      "loss": 3.1672,
      "step": 128084
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1032869815826416,
      "learning_rate": 0.00024756987895673205,
      "loss": 2.79,
      "step": 128085
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.400639533996582,
      "learning_rate": 0.0002475658513539772,
      "loss": 2.9053,
      "step": 128086
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.329765558242798,
      "learning_rate": 0.000247561823760971,
      "loss": 2.9273,
      "step": 128087
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.84397292137146,
      "learning_rate": 0.0002475577961777139,
      "loss": 3.1116,
      "step": 128088
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7009944915771484,
      "learning_rate": 0.0002475537686042069,
      "loss": 2.9695,
      "step": 128089
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2273290157318115,
      "learning_rate": 0.00024754974104045053,
      "loss": 3.0908,
      "step": 128090
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0526914596557617,
      "learning_rate": 0.00024754571348644587,
      "loss": 3.0311,
      "step": 128091
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.481107711791992,
      "learning_rate": 0.0002475416859421933,
      "loss": 3.1601,
      "step": 128092
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0126841068267822,
      "learning_rate": 0.0002475376584076937,
      "loss": 2.8344,
      "step": 128093
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.418900728225708,
      "learning_rate": 0.00024753363088294786,
      "loss": 2.8534,
      "step": 128094
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1094377040863037,
      "learning_rate": 0.00024752960336795653,
      "loss": 2.9951,
      "step": 128095
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1945996284484863,
      "learning_rate": 0.0002475255758627204,
      "loss": 3.0171,
      "step": 128096
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9670259952545166,
      "learning_rate": 0.0002475215483672404,
      "loss": 2.6834,
      "step": 128097
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0908758640289307,
      "learning_rate": 0.000247517520881517,
      "loss": 2.9211,
      "step": 128098
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.80775785446167,
      "learning_rate": 0.0002475134934055511,
      "loss": 2.7855,
      "step": 128099
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.708129644393921,
      "learning_rate": 0.00024750946593934346,
      "loss": 3.1239,
      "step": 128100
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4102542400360107,
      "learning_rate": 0.0002475054384828947,
      "loss": 3.0362,
      "step": 128101
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.072619676589966,
      "learning_rate": 0.0002475014110362058,
      "loss": 3.1855,
      "step": 128102
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.938012719154358,
      "learning_rate": 0.0002474973835992774,
      "loss": 2.8789,
      "step": 128103
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9288007020950317,
      "learning_rate": 0.00024749335617211014,
      "loss": 3.2301,
      "step": 128104
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.061948537826538,
      "learning_rate": 0.00024748932875470487,
      "loss": 3.0354,
      "step": 128105
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.431546926498413,
      "learning_rate": 0.0002474853013470623,
      "loss": 2.8595,
      "step": 128106
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9990215301513672,
      "learning_rate": 0.0002474812739491832,
      "loss": 2.989,
      "step": 128107
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0208065509796143,
      "learning_rate": 0.00024747724656106834,
      "loss": 3.1426,
      "step": 128108
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.6664326190948486,
      "learning_rate": 0.0002474732191827186,
      "loss": 3.0701,
      "step": 128109
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.361410140991211,
      "learning_rate": 0.0002474691918141344,
      "loss": 2.8465,
      "step": 128110
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.436954975128174,
      "learning_rate": 0.0002474651644553167,
      "loss": 3.1967,
      "step": 128111
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.1703174114227295,
      "learning_rate": 0.0002474611371062662,
      "loss": 3.0094,
      "step": 128112
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.813255786895752,
      "learning_rate": 0.00024745710976698367,
      "loss": 3.2176,
      "step": 128113
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.34867787361145,
      "learning_rate": 0.0002474530824374699,
      "loss": 3.1024,
      "step": 128114
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0679783821105957,
      "learning_rate": 0.00024744905511772555,
      "loss": 3.1898,
      "step": 128115
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.295583724975586,
      "learning_rate": 0.0002474450278077516,
      "loss": 2.7205,
      "step": 128116
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.266852378845215,
      "learning_rate": 0.00024744100050754837,
      "loss": 2.8171,
      "step": 128117
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2680306434631348,
      "learning_rate": 0.00024743697321711694,
      "loss": 2.9867,
      "step": 128118
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8519229888916016,
      "learning_rate": 0.000247432945936458,
      "loss": 3.1233,
      "step": 128119
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3928568363189697,
      "learning_rate": 0.00024742891866557217,
      "loss": 3.1259,
      "step": 128120
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0462169647216797,
      "learning_rate": 0.00024742489140446033,
      "loss": 2.6237,
      "step": 128121
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9187567234039307,
      "learning_rate": 0.00024742086415312323,
      "loss": 3.0935,
      "step": 128122
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.619626760482788,
      "learning_rate": 0.0002474168369115616,
      "loss": 2.9769,
      "step": 128123
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1687893867492676,
      "learning_rate": 0.00024741280967977616,
      "loss": 2.7446,
      "step": 128124
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.028310775756836,
      "learning_rate": 0.00024740878245776765,
      "loss": 2.7625,
      "step": 128125
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.183943510055542,
      "learning_rate": 0.0002474047552455368,
      "loss": 3.2731,
      "step": 128126
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8036171197891235,
      "learning_rate": 0.0002474007280430845,
      "loss": 3.1484,
      "step": 128127
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2610371112823486,
      "learning_rate": 0.00024739670085041133,
      "loss": 3.0023,
      "step": 128128
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8441834449768066,
      "learning_rate": 0.00024739267366751807,
      "loss": 2.9496,
      "step": 128129
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9979400634765625,
      "learning_rate": 0.0002473886464944056,
      "loss": 2.9662,
      "step": 128130
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9388716220855713,
      "learning_rate": 0.00024738461933107456,
      "loss": 3.0754,
      "step": 128131
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.782313823699951,
      "learning_rate": 0.00024738059217752566,
      "loss": 2.8691,
      "step": 128132
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.318307638168335,
      "learning_rate": 0.0002473765650337597,
      "loss": 3.0519,
      "step": 128133
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2717881202697754,
      "learning_rate": 0.0002473725378997775,
      "loss": 3.2004,
      "step": 128134
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9521280527114868,
      "learning_rate": 0.0002473685107755797,
      "loss": 2.9001,
      "step": 128135
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1890127658843994,
      "learning_rate": 0.000247364483661167,
      "loss": 2.9397,
      "step": 128136
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2997570037841797,
      "learning_rate": 0.00024736045655654037,
      "loss": 2.9553,
      "step": 128137
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9852104187011719,
      "learning_rate": 0.00024735642946170037,
      "loss": 3.0876,
      "step": 128138
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.392293691635132,
      "learning_rate": 0.0002473524023766478,
      "loss": 3.2168,
      "step": 128139
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.5138092041015625,
      "learning_rate": 0.00024734837530138344,
      "loss": 3.0562,
      "step": 128140
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.898438811302185,
      "learning_rate": 0.000247344348235908,
      "loss": 2.9038,
      "step": 128141
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1869120597839355,
      "learning_rate": 0.0002473403211802222,
      "loss": 3.167,
      "step": 128142
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.13533878326416,
      "learning_rate": 0.00024733629413432683,
      "loss": 2.974,
      "step": 128143
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0628936290740967,
      "learning_rate": 0.0002473322670982227,
      "loss": 3.3292,
      "step": 128144
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7032904624938965,
      "learning_rate": 0.00024732824007191043,
      "loss": 2.7499,
      "step": 128145
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2775208950042725,
      "learning_rate": 0.0002473242130553909,
      "loss": 3.1108,
      "step": 128146
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.7610862255096436,
      "learning_rate": 0.00024732018604866474,
      "loss": 3.2475,
      "step": 128147
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8847657442092896,
      "learning_rate": 0.00024731615905173275,
      "loss": 2.9712,
      "step": 128148
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.1233999729156494,
      "learning_rate": 0.0002473121320645957,
      "loss": 2.8848,
      "step": 128149
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9088897705078125,
      "learning_rate": 0.0002473081050872543,
      "loss": 2.9645,
      "step": 128150
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.769132137298584,
      "learning_rate": 0.0002473040781197093,
      "loss": 2.9647,
      "step": 128151
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4556376934051514,
      "learning_rate": 0.0002473000511619616,
      "loss": 3.3131,
      "step": 128152
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4067413806915283,
      "learning_rate": 0.0002472960242140117,
      "loss": 2.7341,
      "step": 128153
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1651880741119385,
      "learning_rate": 0.0002472919972758605,
      "loss": 3.2236,
      "step": 128154
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.867680788040161,
      "learning_rate": 0.0002472879703475086,
      "loss": 2.9351,
      "step": 128155
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.7623695135116577,
      "learning_rate": 0.00024728394342895697,
      "loss": 3.0403,
      "step": 128156
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.94914174079895,
      "learning_rate": 0.00024727991652020623,
      "loss": 2.999,
      "step": 128157
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.069500207901001,
      "learning_rate": 0.00024727588962125723,
      "loss": 2.8736,
      "step": 128158
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.7559982538223267,
      "learning_rate": 0.00024727186273211055,
      "loss": 2.8089,
      "step": 128159
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2131550312042236,
      "learning_rate": 0.00024726783585276706,
      "loss": 3.0263,
      "step": 128160
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.675001859664917,
      "learning_rate": 0.0002472638089832274,
      "loss": 2.9478,
      "step": 128161
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.5602831840515137,
      "learning_rate": 0.00024725978212349243,
      "loss": 2.8775,
      "step": 128162
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.178171396255493,
      "learning_rate": 0.00024725575527356287,
      "loss": 3.1589,
      "step": 128163
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.4396891593933105,
      "learning_rate": 0.0002472517284334396,
      "loss": 2.9802,
      "step": 128164
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1758196353912354,
      "learning_rate": 0.00024724770160312306,
      "loss": 2.8219,
      "step": 128165
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.309509038925171,
      "learning_rate": 0.0002472436747826142,
      "loss": 2.9857,
      "step": 128166
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8339028358459473,
      "learning_rate": 0.00024723964797191366,
      "loss": 2.9726,
      "step": 128167
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.9516613483428955,
      "learning_rate": 0.00024723562117102233,
      "loss": 2.7215,
      "step": 128168
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.6128087043762207,
      "learning_rate": 0.00024723159437994093,
      "loss": 3.1299,
      "step": 128169
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.095996379852295,
      "learning_rate": 0.00024722756759867025,
      "loss": 3.0486,
      "step": 128170
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1491832733154297,
      "learning_rate": 0.00024722354082721084,
      "loss": 3.0353,
      "step": 128171
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.1851391792297363,
      "learning_rate": 0.0002472195140655636,
      "loss": 3.0821,
      "step": 128172
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.225170373916626,
      "learning_rate": 0.0002472154873137292,
      "loss": 3.013,
      "step": 128173
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0938076972961426,
      "learning_rate": 0.0002472114605717085,
      "loss": 3.0853,
      "step": 128174
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9025973081588745,
      "learning_rate": 0.0002472074338395021,
      "loss": 2.8465,
      "step": 128175
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.811286449432373,
      "learning_rate": 0.00024720340711711106,
      "loss": 2.9322,
      "step": 128176
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2623136043548584,
      "learning_rate": 0.0002471993804045357,
      "loss": 3.1343,
      "step": 128177
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7248473167419434,
      "learning_rate": 0.000247195353701777,
      "loss": 3.0664,
      "step": 128178
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2568325996398926,
      "learning_rate": 0.00024719132700883566,
      "loss": 2.7589,
      "step": 128179
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8443419933319092,
      "learning_rate": 0.0002471873003257125,
      "loss": 3.1783,
      "step": 128180
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.6572532653808594,
      "learning_rate": 0.00024718327365240813,
      "loss": 2.7164,
      "step": 128181
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.39508318901062,
      "learning_rate": 0.0002471792469889235,
      "loss": 3.1374,
      "step": 128182
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7381043434143066,
      "learning_rate": 0.0002471752203352593,
      "loss": 3.1512,
      "step": 128183
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1415634155273438,
      "learning_rate": 0.0002471711936914161,
      "loss": 3.1299,
      "step": 128184
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0138869285583496,
      "learning_rate": 0.0002471671670573948,
      "loss": 2.9588,
      "step": 128185
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2764554023742676,
      "learning_rate": 0.0002471631404331961,
      "loss": 3.0609,
      "step": 128186
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7922236919403076,
      "learning_rate": 0.00024715911381882074,
      "loss": 2.8969,
      "step": 128187
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.11171293258667,
      "learning_rate": 0.00024715508721426954,
      "loss": 3.07,
      "step": 128188
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7294511795043945,
      "learning_rate": 0.0002471510606195433,
      "loss": 2.9898,
      "step": 128189
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0096850395202637,
      "learning_rate": 0.00024714703403464264,
      "loss": 2.8895,
      "step": 128190
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2218451499938965,
      "learning_rate": 0.00024714300745956824,
      "loss": 2.9873,
      "step": 128191
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8569098711013794,
      "learning_rate": 0.000247138980894321,
      "loss": 2.8721,
      "step": 128192
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9202392101287842,
      "learning_rate": 0.0002471349543389016,
      "loss": 2.8656,
      "step": 128193
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.8344879150390625,
      "learning_rate": 0.0002471309277933108,
      "loss": 2.7457,
      "step": 128194
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.104893207550049,
      "learning_rate": 0.0002471269012575495,
      "loss": 3.0598,
      "step": 128195
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0865769386291504,
      "learning_rate": 0.0002471228747316182,
      "loss": 2.8475,
      "step": 128196
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9999794960021973,
      "learning_rate": 0.00024711884821551774,
      "loss": 2.7577,
      "step": 128197
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.813596725463867,
      "learning_rate": 0.0002471148217092489,
      "loss": 3.0321,
      "step": 128198
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8269528150558472,
      "learning_rate": 0.0002471107952128124,
      "loss": 2.981,
      "step": 128199
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.031216859817505,
      "learning_rate": 0.000247106768726209,
      "loss": 2.9719,
      "step": 128200
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.636674404144287,
      "learning_rate": 0.0002471027422494396,
      "loss": 2.9428,
      "step": 128201
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.005120277404785,
      "learning_rate": 0.0002470987157825046,
      "loss": 2.9682,
      "step": 128202
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.405632734298706,
      "learning_rate": 0.00024709468932540503,
      "loss": 2.7609,
      "step": 128203
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.253483295440674,
      "learning_rate": 0.00024709066287814153,
      "loss": 3.1035,
      "step": 128204
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8689597845077515,
      "learning_rate": 0.00024708663644071484,
      "loss": 2.9478,
      "step": 128205
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1525139808654785,
      "learning_rate": 0.0002470826100131258,
      "loss": 3.0769,
      "step": 128206
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.10296630859375,
      "learning_rate": 0.00024707858359537517,
      "loss": 2.7413,
      "step": 128207
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9843961000442505,
      "learning_rate": 0.00024707455718746355,
      "loss": 2.9675,
      "step": 128208
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.352144479751587,
      "learning_rate": 0.0002470705307893918,
      "loss": 3.0436,
      "step": 128209
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3526864051818848,
      "learning_rate": 0.00024706650440116054,
      "loss": 2.8204,
      "step": 128210
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.2043678760528564,
      "learning_rate": 0.00024706247802277066,
      "loss": 2.9336,
      "step": 128211
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1787374019622803,
      "learning_rate": 0.0002470584516542229,
      "loss": 2.9519,
      "step": 128212
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1751773357391357,
      "learning_rate": 0.00024705442529551796,
      "loss": 3.1341,
      "step": 128213
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3740904331207275,
      "learning_rate": 0.00024705039894665665,
      "loss": 3.0417,
      "step": 128214
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.795518159866333,
      "learning_rate": 0.00024704637260763963,
      "loss": 3.0923,
      "step": 128215
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0031137466430664,
      "learning_rate": 0.00024704234627846765,
      "loss": 3.0707,
      "step": 128216
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9922794103622437,
      "learning_rate": 0.0002470383199591415,
      "loss": 3.1735,
      "step": 128217
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.946082592010498,
      "learning_rate": 0.00024703429364966197,
      "loss": 3.0162,
      "step": 128218
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.8446919918060303,
      "learning_rate": 0.0002470302673500298,
      "loss": 3.0041,
      "step": 128219
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9492006301879883,
      "learning_rate": 0.0002470262410602456,
      "loss": 2.9077,
      "step": 128220
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.241734027862549,
      "learning_rate": 0.0002470222147803103,
      "loss": 3.2814,
      "step": 128221
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4434237480163574,
      "learning_rate": 0.0002470181885102246,
      "loss": 2.7509,
      "step": 128222
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.840506911277771,
      "learning_rate": 0.0002470141622499891,
      "loss": 2.8324,
      "step": 128223
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2422213554382324,
      "learning_rate": 0.00024701013599960473,
      "loss": 3.1064,
      "step": 128224
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.8253958225250244,
      "learning_rate": 0.00024700610975907225,
      "loss": 3.0335,
      "step": 128225
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.3987953662872314,
      "learning_rate": 0.00024700208352839223,
      "loss": 3.025,
      "step": 128226
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8827407360076904,
      "learning_rate": 0.00024699805730756555,
      "loss": 3.2429,
      "step": 128227
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8352339267730713,
      "learning_rate": 0.0002469940310965929,
      "loss": 2.7769,
      "step": 128228
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7995765209198,
      "learning_rate": 0.00024699000489547513,
      "loss": 3.069,
      "step": 128229
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.403435707092285,
      "learning_rate": 0.0002469859787042129,
      "loss": 2.8111,
      "step": 128230
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.521606922149658,
      "learning_rate": 0.000246981952522807,
      "loss": 2.9539,
      "step": 128231
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.099890947341919,
      "learning_rate": 0.00024697792635125813,
      "loss": 2.9864,
      "step": 128232
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.077803611755371,
      "learning_rate": 0.000246973900189567,
      "loss": 2.9418,
      "step": 128233
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.638563871383667,
      "learning_rate": 0.0002469698740377345,
      "loss": 2.9848,
      "step": 128234
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.171630382537842,
      "learning_rate": 0.00024696584789576123,
      "loss": 2.7306,
      "step": 128235
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9629799127578735,
      "learning_rate": 0.0002469618217636481,
      "loss": 3.0055,
      "step": 128236
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.111524820327759,
      "learning_rate": 0.00024695779564139577,
      "loss": 2.954,
      "step": 128237
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.646533966064453,
      "learning_rate": 0.0002469537695290049,
      "loss": 2.8347,
      "step": 128238
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.563342332839966,
      "learning_rate": 0.00024694974342647635,
      "loss": 2.8226,
      "step": 128239
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.565800666809082,
      "learning_rate": 0.0002469457173338109,
      "loss": 2.8461,
      "step": 128240
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3310837745666504,
      "learning_rate": 0.00024694169125100916,
      "loss": 2.9839,
      "step": 128241
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.24304461479187,
      "learning_rate": 0.000246937665178072,
      "loss": 2.8268,
      "step": 128242
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.478965997695923,
      "learning_rate": 0.00024693363911500025,
      "loss": 3.094,
      "step": 128243
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1796343326568604,
      "learning_rate": 0.00024692961306179437,
      "loss": 3.0445,
      "step": 128244
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.262314796447754,
      "learning_rate": 0.00024692558701845533,
      "loss": 3.0715,
      "step": 128245
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.326277732849121,
      "learning_rate": 0.0002469215609849838,
      "loss": 2.8051,
      "step": 128246
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0437674522399902,
      "learning_rate": 0.00024691753496138057,
      "loss": 3.0937,
      "step": 128247
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8889014720916748,
      "learning_rate": 0.00024691350894764637,
      "loss": 3.177,
      "step": 128248
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.150735378265381,
      "learning_rate": 0.00024690948294378193,
      "loss": 2.9478,
      "step": 128249
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9379276037216187,
      "learning_rate": 0.0002469054569497882,
      "loss": 3.212,
      "step": 128250
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9229085445404053,
      "learning_rate": 0.00024690143096566555,
      "loss": 3.0049,
      "step": 128251
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2700531482696533,
      "learning_rate": 0.00024689740499141496,
      "loss": 3.2864,
      "step": 128252
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2134487628936768,
      "learning_rate": 0.0002468933790270371,
      "loss": 3.0351,
      "step": 128253
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8701063394546509,
      "learning_rate": 0.0002468893530725328,
      "loss": 3.2445,
      "step": 128254
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.089336395263672,
      "learning_rate": 0.00024688532712790286,
      "loss": 3.009,
      "step": 128255
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.017409324645996,
      "learning_rate": 0.00024688130119314796,
      "loss": 2.8803,
      "step": 128256
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.063356876373291,
      "learning_rate": 0.00024687727526826874,
      "loss": 2.803,
      "step": 128257
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1561102867126465,
      "learning_rate": 0.000246873249353266,
      "loss": 2.8602,
      "step": 128258
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.022449493408203,
      "learning_rate": 0.00024686922344814056,
      "loss": 3.119,
      "step": 128259
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.845179796218872,
      "learning_rate": 0.00024686519755289313,
      "loss": 2.8227,
      "step": 128260
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4479193687438965,
      "learning_rate": 0.00024686117166752445,
      "loss": 2.672,
      "step": 128261
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.489199161529541,
      "learning_rate": 0.0002468571457920354,
      "loss": 3.1303,
      "step": 128262
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4805991649627686,
      "learning_rate": 0.0002468531199264265,
      "loss": 3.1414,
      "step": 128263
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.5345351696014404,
      "learning_rate": 0.00024684909407069865,
      "loss": 3.0942,
      "step": 128264
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.192077875137329,
      "learning_rate": 0.0002468450682248525,
      "loss": 3.1578,
      "step": 128265
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2853012084960938,
      "learning_rate": 0.0002468410423888888,
      "loss": 3.0136,
      "step": 128266
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.089841604232788,
      "learning_rate": 0.0002468370165628085,
      "loss": 3.0063,
      "step": 128267
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3187718391418457,
      "learning_rate": 0.00024683299074661224,
      "loss": 3.0939,
      "step": 128268
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0498974323272705,
      "learning_rate": 0.0002468289649403006,
      "loss": 3.1071,
      "step": 128269
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.157207727432251,
      "learning_rate": 0.00024682493914387446,
      "loss": 2.8437,
      "step": 128270
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.387326955795288,
      "learning_rate": 0.0002468209133573346,
      "loss": 3.0088,
      "step": 128271
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.192697525024414,
      "learning_rate": 0.0002468168875806817,
      "loss": 2.9868,
      "step": 128272
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.573063850402832,
      "learning_rate": 0.00024681286181391657,
      "loss": 3.0246,
      "step": 128273
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.511613607406616,
      "learning_rate": 0.0002468088360570401,
      "loss": 2.961,
      "step": 128274
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2027173042297363,
      "learning_rate": 0.00024680481031005266,
      "loss": 2.9483,
      "step": 128275
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9314709901809692,
      "learning_rate": 0.00024680078457295525,
      "loss": 3.0412,
      "step": 128276
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7926392555236816,
      "learning_rate": 0.0002467967588457486,
      "loss": 2.9678,
      "step": 128277
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1266062259674072,
      "learning_rate": 0.00024679273312843336,
      "loss": 2.8384,
      "step": 128278
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.192791223526001,
      "learning_rate": 0.00024678870742101045,
      "loss": 2.9848,
      "step": 128279
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.255089521408081,
      "learning_rate": 0.0002467846817234806,
      "loss": 2.8966,
      "step": 128280
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.881418466567993,
      "learning_rate": 0.00024678065603584435,
      "loss": 3.0297,
      "step": 128281
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.5059099197387695,
      "learning_rate": 0.0002467766303581026,
      "loss": 3.0512,
      "step": 128282
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.293294906616211,
      "learning_rate": 0.00024677260469025607,
      "loss": 2.957,
      "step": 128283
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9569357633590698,
      "learning_rate": 0.0002467685790323055,
      "loss": 2.9328,
      "step": 128284
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.7933602333068848,
      "learning_rate": 0.0002467645533842517,
      "loss": 3.0344,
      "step": 128285
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.06101655960083,
      "learning_rate": 0.00024676052774609547,
      "loss": 2.8338,
      "step": 128286
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9315186738967896,
      "learning_rate": 0.0002467565021178374,
      "loss": 2.9501,
      "step": 128287
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.042910099029541,
      "learning_rate": 0.0002467524764994782,
      "loss": 2.7665,
      "step": 128288
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2361717224121094,
      "learning_rate": 0.0002467484508910188,
      "loss": 3.1588,
      "step": 128289
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.610103130340576,
      "learning_rate": 0.0002467444252924598,
      "loss": 2.9682,
      "step": 128290
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.7651351690292358,
      "learning_rate": 0.000246740399703802,
      "loss": 2.896,
      "step": 128291
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9859733581542969,
      "learning_rate": 0.0002467363741250464,
      "loss": 2.8054,
      "step": 128292
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.142847776412964,
      "learning_rate": 0.00024673234855619334,
      "loss": 3.2953,
      "step": 128293
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9453147649765015,
      "learning_rate": 0.00024672832299724374,
      "loss": 2.6898,
      "step": 128294
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.735595464706421,
      "learning_rate": 0.0002467242974481983,
      "loss": 3.2016,
      "step": 128295
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0383286476135254,
      "learning_rate": 0.00024672027190905785,
      "loss": 3.0692,
      "step": 128296
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.7383924722671509,
      "learning_rate": 0.00024671624637982313,
      "loss": 3.4045,
      "step": 128297
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7263102531433105,
      "learning_rate": 0.00024671222086049494,
      "loss": 2.7557,
      "step": 128298
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.738581895828247,
      "learning_rate": 0.0002467081953510739,
      "loss": 3.0502,
      "step": 128299
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.9693565368652344,
      "learning_rate": 0.0002467041698515608,
      "loss": 2.9187,
      "step": 128300
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7927448749542236,
      "learning_rate": 0.0002467001443619564,
      "loss": 3.026,
      "step": 128301
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.330047369003296,
      "learning_rate": 0.0002466961188822614,
      "loss": 3.0547,
      "step": 128302
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.163012742996216,
      "learning_rate": 0.0002466920934124766,
      "loss": 2.873,
      "step": 128303
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1464571952819824,
      "learning_rate": 0.00024668806795260286,
      "loss": 3.2033,
      "step": 128304
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.208730459213257,
      "learning_rate": 0.00024668404250264073,
      "loss": 2.8162,
      "step": 128305
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.124393939971924,
      "learning_rate": 0.0002466800170625911,
      "loss": 2.9593,
      "step": 128306
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.091027021408081,
      "learning_rate": 0.0002466759916324546,
      "loss": 3.1342,
      "step": 128307
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.614863395690918,
      "learning_rate": 0.00024667196621223203,
      "loss": 3.0505,
      "step": 128308
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8517330884933472,
      "learning_rate": 0.00024666794080192416,
      "loss": 2.9763,
      "step": 128309
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.5490200519561768,
      "learning_rate": 0.0002466639154015318,
      "loss": 3.1772,
      "step": 128310
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9579875469207764,
      "learning_rate": 0.00024665989001105556,
      "loss": 2.9495,
      "step": 128311
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.7950122356414795,
      "learning_rate": 0.00024665586463049624,
      "loss": 2.8746,
      "step": 128312
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.980194330215454,
      "learning_rate": 0.0002466518392598546,
      "loss": 2.9402,
      "step": 128313
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.673006534576416,
      "learning_rate": 0.0002466478138991315,
      "loss": 2.9815,
      "step": 128314
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1010584831237793,
      "learning_rate": 0.00024664378854832744,
      "loss": 3.0498,
      "step": 128315
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.040066719055176,
      "learning_rate": 0.0002466397632074433,
      "loss": 3.0835,
      "step": 128316
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9144419431686401,
      "learning_rate": 0.00024663573787647995,
      "loss": 3.1133,
      "step": 128317
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3391225337982178,
      "learning_rate": 0.00024663171255543797,
      "loss": 3.0796,
      "step": 128318
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.527111530303955,
      "learning_rate": 0.00024662768724431817,
      "loss": 2.842,
      "step": 128319
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.5240252017974854,
      "learning_rate": 0.0002466236619431212,
      "loss": 3.159,
      "step": 128320
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.036391258239746,
      "learning_rate": 0.00024661963665184805,
      "loss": 3.1571,
      "step": 128321
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.239854574203491,
      "learning_rate": 0.00024661561137049925,
      "loss": 2.9894,
      "step": 128322
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1259994506835938,
      "learning_rate": 0.00024661158609907567,
      "loss": 2.9851,
      "step": 128323
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1079769134521484,
      "learning_rate": 0.0002466075608375779,
      "loss": 2.8569,
      "step": 128324
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.6654455661773682,
      "learning_rate": 0.00024660353558600683,
      "loss": 3.2362,
      "step": 128325
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.16066575050354,
      "learning_rate": 0.00024659951034436314,
      "loss": 3.0434,
      "step": 128326
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9504483938217163,
      "learning_rate": 0.00024659548511264765,
      "loss": 2.8233,
      "step": 128327
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.7082571983337402,
      "learning_rate": 0.0002465914598908611,
      "loss": 2.8643,
      "step": 128328
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.287950038909912,
      "learning_rate": 0.00024658743467900426,
      "loss": 2.8594,
      "step": 128329
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2167298793792725,
      "learning_rate": 0.0002465834094770777,
      "loss": 2.7844,
      "step": 128330
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8774845600128174,
      "learning_rate": 0.0002465793842850823,
      "loss": 3.0056,
      "step": 128331
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1877105236053467,
      "learning_rate": 0.0002465753591030188,
      "loss": 2.8343,
      "step": 128332
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.5415241718292236,
      "learning_rate": 0.00024657133393088794,
      "loss": 3.067,
      "step": 128333
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0175235271453857,
      "learning_rate": 0.0002465673087686905,
      "loss": 3.071,
      "step": 128334
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.166043996810913,
      "learning_rate": 0.00024656328361642737,
      "loss": 3.0338,
      "step": 128335
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.9812536239624023,
      "learning_rate": 0.00024655925847409893,
      "loss": 2.9677,
      "step": 128336
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.5402140617370605,
      "learning_rate": 0.0002465552333417062,
      "loss": 2.7928,
      "step": 128337
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.055936336517334,
      "learning_rate": 0.0002465512082192498,
      "loss": 3.0835,
      "step": 128338
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.8919222354888916,
      "learning_rate": 0.0002465471831067306,
      "loss": 2.9847,
      "step": 128339
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.478874683380127,
      "learning_rate": 0.00024654315800414926,
      "loss": 3.1189,
      "step": 128340
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.128098726272583,
      "learning_rate": 0.00024653913291150666,
      "loss": 3.0834,
      "step": 128341
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.628603219985962,
      "learning_rate": 0.0002465351078288034,
      "loss": 3.0039,
      "step": 128342
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.465831756591797,
      "learning_rate": 0.00024653108275604017,
      "loss": 2.9502,
      "step": 128343
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2890520095825195,
      "learning_rate": 0.00024652705769321784,
      "loss": 3.3071,
      "step": 128344
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.359921932220459,
      "learning_rate": 0.00024652303264033717,
      "loss": 2.6207,
      "step": 128345
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8315508365631104,
      "learning_rate": 0.0002465190075973989,
      "loss": 3.0552,
      "step": 128346
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.949147343635559,
      "learning_rate": 0.00024651498256440384,
      "loss": 3.1944,
      "step": 128347
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.243802547454834,
      "learning_rate": 0.00024651095754135255,
      "loss": 2.7248,
      "step": 128348
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4184317588806152,
      "learning_rate": 0.0002465069325282459,
      "loss": 2.9391,
      "step": 128349
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2401177883148193,
      "learning_rate": 0.00024650290752508455,
      "loss": 2.9194,
      "step": 128350
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1215903759002686,
      "learning_rate": 0.00024649888253186936,
      "loss": 2.9875,
      "step": 128351
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.5572783946990967,
      "learning_rate": 0.000246494857548601,
      "loss": 3.0566,
      "step": 128352
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1701509952545166,
      "learning_rate": 0.00024649083257528045,
      "loss": 3.0769,
      "step": 128353
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8328169584274292,
      "learning_rate": 0.00024648680761190815,
      "loss": 3.2271,
      "step": 128354
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9173396825790405,
      "learning_rate": 0.0002464827826584849,
      "loss": 3.0267,
      "step": 128355
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.338005542755127,
      "learning_rate": 0.0002464787577150115,
      "loss": 2.8687,
      "step": 128356
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.350597381591797,
      "learning_rate": 0.00024647473278148875,
      "loss": 2.9542,
      "step": 128357
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4467105865478516,
      "learning_rate": 0.00024647070785791735,
      "loss": 2.8124,
      "step": 128358
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.338099718093872,
      "learning_rate": 0.0002464666829442982,
      "loss": 2.9,
      "step": 128359
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8851391077041626,
      "learning_rate": 0.0002464626580406318,
      "loss": 2.8861,
      "step": 128360
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.107687473297119,
      "learning_rate": 0.0002464586331469189,
      "loss": 2.9382,
      "step": 128361
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.277252197265625,
      "learning_rate": 0.00024645460826316044,
      "loss": 2.901,
      "step": 128362
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3558406829833984,
      "learning_rate": 0.00024645058338935706,
      "loss": 2.9509,
      "step": 128363
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.340888261795044,
      "learning_rate": 0.0002464465585255095,
      "loss": 3.0399,
      "step": 128364
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.321484088897705,
      "learning_rate": 0.0002464425336716187,
      "loss": 2.9674,
      "step": 128365
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.60225248336792,
      "learning_rate": 0.00024643850882768515,
      "loss": 2.7407,
      "step": 128366
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.429461717605591,
      "learning_rate": 0.0002464344839937096,
      "loss": 3.045,
      "step": 128367
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.9341742992401123,
      "learning_rate": 0.0002464304591696929,
      "loss": 2.8702,
      "step": 128368
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9021689891815186,
      "learning_rate": 0.0002464264343556358,
      "loss": 2.9268,
      "step": 128369
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.117844343185425,
      "learning_rate": 0.0002464224095515391,
      "loss": 2.9114,
      "step": 128370
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0424933433532715,
      "learning_rate": 0.0002464183847574036,
      "loss": 2.7206,
      "step": 128371
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.895761013031006,
      "learning_rate": 0.00024641435997322975,
      "loss": 3.0524,
      "step": 128372
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3436520099639893,
      "learning_rate": 0.0002464103351990185,
      "loss": 2.8347,
      "step": 128373
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2158944606781006,
      "learning_rate": 0.0002464063104347706,
      "loss": 2.8634,
      "step": 128374
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9740139245986938,
      "learning_rate": 0.00024640228568048676,
      "loss": 2.6852,
      "step": 128375
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9249122142791748,
      "learning_rate": 0.0002463982609361678,
      "loss": 2.7748,
      "step": 128376
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0252559185028076,
      "learning_rate": 0.0002463942362018145,
      "loss": 2.9115,
      "step": 128377
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.558612585067749,
      "learning_rate": 0.00024639021147742734,
      "loss": 2.9801,
      "step": 128378
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.129448652267456,
      "learning_rate": 0.0002463861867630073,
      "loss": 3.0979,
      "step": 128379
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.672934651374817,
      "learning_rate": 0.0002463821620585551,
      "loss": 2.8631,
      "step": 128380
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.240683078765869,
      "learning_rate": 0.00024637813736407146,
      "loss": 2.867,
      "step": 128381
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.285371780395508,
      "learning_rate": 0.00024637411267955714,
      "loss": 3.205,
      "step": 128382
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1470961570739746,
      "learning_rate": 0.0002463700880050129,
      "loss": 3.0031,
      "step": 128383
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.223820209503174,
      "learning_rate": 0.00024636606334043953,
      "loss": 3.1375,
      "step": 128384
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9112571477890015,
      "learning_rate": 0.00024636203868583765,
      "loss": 3.2025,
      "step": 128385
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.7943658828735352,
      "learning_rate": 0.0002463580140412081,
      "loss": 2.9069,
      "step": 128386
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3960180282592773,
      "learning_rate": 0.0002463539894065516,
      "loss": 3.0739,
      "step": 128387
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9251525402069092,
      "learning_rate": 0.00024634996478186885,
      "loss": 3.0878,
      "step": 128388
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9831981658935547,
      "learning_rate": 0.0002463459401671607,
      "loss": 3.0824,
      "step": 128389
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.46828031539917,
      "learning_rate": 0.00024634191556242787,
      "loss": 3.0384,
      "step": 128390
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3698620796203613,
      "learning_rate": 0.0002463378909676711,
      "loss": 2.7884,
      "step": 128391
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.13972806930542,
      "learning_rate": 0.0002463338663828911,
      "loss": 3.1267,
      "step": 128392
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.375366687774658,
      "learning_rate": 0.00024632984180808867,
      "loss": 3.0207,
      "step": 128393
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.945037364959717,
      "learning_rate": 0.0002463258172432645,
      "loss": 2.7527,
      "step": 128394
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2808897495269775,
      "learning_rate": 0.0002463217926884194,
      "loss": 3.077,
      "step": 128395
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.432035207748413,
      "learning_rate": 0.0002463177681435541,
      "loss": 2.8954,
      "step": 128396
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.425459146499634,
      "learning_rate": 0.0002463137436086693,
      "loss": 2.9223,
      "step": 128397
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.52618145942688,
      "learning_rate": 0.00024630971908376584,
      "loss": 2.8354,
      "step": 128398
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.859168767929077,
      "learning_rate": 0.0002463056945688444,
      "loss": 3.1483,
      "step": 128399
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.940782308578491,
      "learning_rate": 0.0002463016700639057,
      "loss": 2.9481,
      "step": 128400
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3711655139923096,
      "learning_rate": 0.0002462976455689506,
      "loss": 3.2822,
      "step": 128401
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1710197925567627,
      "learning_rate": 0.0002462936210839798,
      "loss": 2.8912,
      "step": 128402
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.5272486209869385,
      "learning_rate": 0.00024628959660899394,
      "loss": 3.114,
      "step": 128403
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1574440002441406,
      "learning_rate": 0.00024628557214399386,
      "loss": 2.9827,
      "step": 128404
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0790255069732666,
      "learning_rate": 0.0002462815476889803,
      "loss": 3.0021,
      "step": 128405
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4125237464904785,
      "learning_rate": 0.0002462775232439541,
      "loss": 2.9381,
      "step": 128406
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.8173389434814453,
      "learning_rate": 0.0002462734988089159,
      "loss": 3.1981,
      "step": 128407
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2744429111480713,
      "learning_rate": 0.00024626947438386647,
      "loss": 2.8858,
      "step": 128408
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1469218730926514,
      "learning_rate": 0.00024626544996880653,
      "loss": 3.0551,
      "step": 128409
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3568899631500244,
      "learning_rate": 0.00024626142556373687,
      "loss": 2.9739,
      "step": 128410
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1644599437713623,
      "learning_rate": 0.0002462574011686582,
      "loss": 3.0391,
      "step": 128411
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.1106483936309814,
      "learning_rate": 0.0002462533767835713,
      "loss": 2.9745,
      "step": 128412
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.036407232284546,
      "learning_rate": 0.00024624935240847695,
      "loss": 2.8293,
      "step": 128413
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.5002667903900146,
      "learning_rate": 0.0002462453280433759,
      "loss": 3.034,
      "step": 128414
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.052283763885498,
      "learning_rate": 0.0002462413036882688,
      "loss": 3.0861,
      "step": 128415
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9268847703933716,
      "learning_rate": 0.0002462372793431564,
      "loss": 2.6761,
      "step": 128416
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.409181594848633,
      "learning_rate": 0.00024623325500803953,
      "loss": 2.884,
      "step": 128417
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.272103786468506,
      "learning_rate": 0.000246229230682919,
      "loss": 2.9299,
      "step": 128418
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9082756042480469,
      "learning_rate": 0.00024622520636779534,
      "loss": 3.0605,
      "step": 128419
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.6419717073440552,
      "learning_rate": 0.0002462211820626696,
      "loss": 3.1385,
      "step": 128420
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.025181531906128,
      "learning_rate": 0.00024621715776754227,
      "loss": 3.0347,
      "step": 128421
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.951245903968811,
      "learning_rate": 0.00024621313348241415,
      "loss": 2.8736,
      "step": 128422
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.736391305923462,
      "learning_rate": 0.000246209109207286,
      "loss": 2.7944,
      "step": 128423
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.905328631401062,
      "learning_rate": 0.00024620508494215866,
      "loss": 2.8874,
      "step": 128424
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.1597814559936523,
      "learning_rate": 0.00024620106068703275,
      "loss": 3.0369,
      "step": 128425
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9172282218933105,
      "learning_rate": 0.00024619703644190925,
      "loss": 3.1024,
      "step": 128426
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0775020122528076,
      "learning_rate": 0.00024619301220678855,
      "loss": 2.8584,
      "step": 128427
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.967705488204956,
      "learning_rate": 0.0002461889879816716,
      "loss": 2.8412,
      "step": 128428
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.670820713043213,
      "learning_rate": 0.0002461849637665592,
      "loss": 3.0916,
      "step": 128429
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7482335567474365,
      "learning_rate": 0.000246180939561452,
      "loss": 2.9183,
      "step": 128430
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0690746307373047,
      "learning_rate": 0.0002461769153663507,
      "loss": 2.8431,
      "step": 128431
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.706094741821289,
      "learning_rate": 0.0002461728911812564,
      "loss": 3.0174,
      "step": 128432
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.525282382965088,
      "learning_rate": 0.00024616886700616934,
      "loss": 3.144,
      "step": 128433
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1517906188964844,
      "learning_rate": 0.00024616484284109054,
      "loss": 2.9183,
      "step": 128434
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.6076505184173584,
      "learning_rate": 0.0002461608186860207,
      "loss": 2.8817,
      "step": 128435
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.122504711151123,
      "learning_rate": 0.00024615679454096056,
      "loss": 3.0097,
      "step": 128436
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.421919107437134,
      "learning_rate": 0.0002461527704059109,
      "loss": 2.7619,
      "step": 128437
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9096983671188354,
      "learning_rate": 0.0002461487462808726,
      "loss": 3.1398,
      "step": 128438
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.354175329208374,
      "learning_rate": 0.0002461447221658462,
      "loss": 3.2503,
      "step": 128439
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.169062614440918,
      "learning_rate": 0.0002461406980608324,
      "loss": 2.8715,
      "step": 128440
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.3762705326080322,
      "learning_rate": 0.0002461366739658321,
      "loss": 3.0636,
      "step": 128441
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9330430030822754,
      "learning_rate": 0.00024613264988084606,
      "loss": 3.0519,
      "step": 128442
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.599418878555298,
      "learning_rate": 0.00024612862580587487,
      "loss": 2.9549,
      "step": 128443
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.5134201049804688,
      "learning_rate": 0.0002461246017409196,
      "loss": 3.0459,
      "step": 128444
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.855625033378601,
      "learning_rate": 0.00024612057768598066,
      "loss": 3.1376,
      "step": 128445
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.9354171752929688,
      "learning_rate": 0.00024611655364105883,
      "loss": 2.7614,
      "step": 128446
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7699825763702393,
      "learning_rate": 0.00024611252960615504,
      "loss": 2.9869,
      "step": 128447
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.206871271133423,
      "learning_rate": 0.0002461085055812699,
      "loss": 3.0825,
      "step": 128448
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0498249530792236,
      "learning_rate": 0.00024610448156640424,
      "loss": 2.8147,
      "step": 128449
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.769906044006348,
      "learning_rate": 0.00024610045756155876,
      "loss": 2.9349,
      "step": 128450
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.4817428588867188,
      "learning_rate": 0.0002460964335667344,
      "loss": 2.9445,
      "step": 128451
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.331472158432007,
      "learning_rate": 0.0002460924095819315,
      "loss": 3.2138,
      "step": 128452
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.5595638751983643,
      "learning_rate": 0.00024608838560715113,
      "loss": 3.2171,
      "step": 128453
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7908918857574463,
      "learning_rate": 0.0002460843616423939,
      "loss": 3.209,
      "step": 128454
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.132486820220947,
      "learning_rate": 0.00024608033768766063,
      "loss": 2.8321,
      "step": 128455
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4568817615509033,
      "learning_rate": 0.000246076313742952,
      "loss": 3.0215,
      "step": 128456
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8532506227493286,
      "learning_rate": 0.000246072289808269,
      "loss": 2.9945,
      "step": 128457
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.817780017852783,
      "learning_rate": 0.00024606826588361195,
      "loss": 3.0089,
      "step": 128458
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.2407588958740234,
      "learning_rate": 0.0002460642419689819,
      "loss": 2.7611,
      "step": 128459
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.936923027038574,
      "learning_rate": 0.00024606021806437953,
      "loss": 2.8166,
      "step": 128460
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0038585662841797,
      "learning_rate": 0.00024605619416980554,
      "loss": 2.8621,
      "step": 128461
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1344571113586426,
      "learning_rate": 0.00024605217028526074,
      "loss": 2.977,
      "step": 128462
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.2751994132995605,
      "learning_rate": 0.000246048146410746,
      "loss": 2.802,
      "step": 128463
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.0929861068725586,
      "learning_rate": 0.0002460441225462618,
      "loss": 2.8977,
      "step": 128464
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.965840458869934,
      "learning_rate": 0.000246040098691809,
      "loss": 3.1613,
      "step": 128465
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3297955989837646,
      "learning_rate": 0.00024603607484738837,
      "loss": 3.2191,
      "step": 128466
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.59175181388855,
      "learning_rate": 0.0002460320510130007,
      "loss": 3.0158,
      "step": 128467
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.4607479572296143,
      "learning_rate": 0.0002460280271886466,
      "loss": 3.0498,
      "step": 128468
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.6826329231262207,
      "learning_rate": 0.0002460240033743271,
      "loss": 3.0112,
      "step": 128469
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8063406944274902,
      "learning_rate": 0.0002460199795700426,
      "loss": 3.131,
      "step": 128470
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.124053716659546,
      "learning_rate": 0.00024601595577579405,
      "loss": 2.956,
      "step": 128471
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.9653637409210205,
      "learning_rate": 0.00024601193199158214,
      "loss": 2.986,
      "step": 128472
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3179445266723633,
      "learning_rate": 0.0002460079082174076,
      "loss": 2.9606,
      "step": 128473
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8768410682678223,
      "learning_rate": 0.0002460038844532712,
      "loss": 2.9693,
      "step": 128474
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.214958429336548,
      "learning_rate": 0.00024599986069917377,
      "loss": 2.775,
      "step": 128475
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9276244640350342,
      "learning_rate": 0.000245995836955116,
      "loss": 2.6478,
      "step": 128476
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9897973537445068,
      "learning_rate": 0.00024599181322109857,
      "loss": 2.8478,
      "step": 128477
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8451521396636963,
      "learning_rate": 0.0002459877894971223,
      "loss": 2.8422,
      "step": 128478
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2022743225097656,
      "learning_rate": 0.00024598376578318786,
      "loss": 3.0253,
      "step": 128479
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4787118434906006,
      "learning_rate": 0.0002459797420792961,
      "loss": 3.1059,
      "step": 128480
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.822723150253296,
      "learning_rate": 0.00024597571838544776,
      "loss": 3.0735,
      "step": 128481
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8812437057495117,
      "learning_rate": 0.0002459716947016435,
      "loss": 3.0236,
      "step": 128482
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8123899698257446,
      "learning_rate": 0.0002459676710278842,
      "loss": 2.9657,
      "step": 128483
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2986490726470947,
      "learning_rate": 0.00024596364736417044,
      "loss": 2.7862,
      "step": 128484
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1060433387756348,
      "learning_rate": 0.0002459596237105031,
      "loss": 3.1235,
      "step": 128485
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0099942684173584,
      "learning_rate": 0.0002459556000668828,
      "loss": 2.9117,
      "step": 128486
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.460625410079956,
      "learning_rate": 0.00024595157643331054,
      "loss": 2.9904,
      "step": 128487
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.135960817337036,
      "learning_rate": 0.0002459475528097868,
      "loss": 2.9725,
      "step": 128488
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.648380756378174,
      "learning_rate": 0.0002459435291963124,
      "loss": 2.9641,
      "step": 128489
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.9938786029815674,
      "learning_rate": 0.0002459395055928882,
      "loss": 3.0367,
      "step": 128490
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.224022388458252,
      "learning_rate": 0.0002459354819995148,
      "loss": 2.8622,
      "step": 128491
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.278542995452881,
      "learning_rate": 0.00024593145841619304,
      "loss": 3.1404,
      "step": 128492
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.906611442565918,
      "learning_rate": 0.0002459274348429237,
      "loss": 3.0358,
      "step": 128493
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.9919180870056152,
      "learning_rate": 0.0002459234112797074,
      "loss": 2.9963,
      "step": 128494
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4107227325439453,
      "learning_rate": 0.0002459193877265449,
      "loss": 2.9783,
      "step": 128495
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9950840473175049,
      "learning_rate": 0.00024591536418343713,
      "loss": 3.0346,
      "step": 128496
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.884546160697937,
      "learning_rate": 0.00024591134065038466,
      "loss": 2.9711,
      "step": 128497
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4053478240966797,
      "learning_rate": 0.00024590731712738833,
      "loss": 3.192,
      "step": 128498
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8988566398620605,
      "learning_rate": 0.00024590329361444884,
      "loss": 3.1096,
      "step": 128499
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.108008623123169,
      "learning_rate": 0.0002458992701115669,
      "loss": 3.1179,
      "step": 128500
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.9320387840270996,
      "learning_rate": 0.0002458952466187433,
      "loss": 2.6481,
      "step": 128501
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2448184490203857,
      "learning_rate": 0.00024589122313597886,
      "loss": 2.967,
      "step": 128502
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8582172393798828,
      "learning_rate": 0.0002458871996632742,
      "loss": 3.0356,
      "step": 128503
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.8871893882751465,
      "learning_rate": 0.00024588317620063017,
      "loss": 2.9971,
      "step": 128504
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.149502992630005,
      "learning_rate": 0.00024587915274804756,
      "loss": 2.9779,
      "step": 128505
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9546645879745483,
      "learning_rate": 0.00024587512930552695,
      "loss": 2.98,
      "step": 128506
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1206295490264893,
      "learning_rate": 0.00024587110587306915,
      "loss": 2.7722,
      "step": 128507
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.238006830215454,
      "learning_rate": 0.00024586708245067496,
      "loss": 3.248,
      "step": 128508
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.8760056495666504,
      "learning_rate": 0.00024586305903834504,
      "loss": 2.9187,
      "step": 128509
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.972289562225342,
      "learning_rate": 0.00024585903563608026,
      "loss": 2.7543,
      "step": 128510
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.022083282470703,
      "learning_rate": 0.00024585501224388143,
      "loss": 2.9242,
      "step": 128511
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9709166288375854,
      "learning_rate": 0.00024585098886174904,
      "loss": 3.19,
      "step": 128512
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.301920175552368,
      "learning_rate": 0.00024584696548968395,
      "loss": 2.8489,
      "step": 128513
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.018026113510132,
      "learning_rate": 0.000245842942127687,
      "loss": 3.2166,
      "step": 128514
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.276094675064087,
      "learning_rate": 0.0002458389187757588,
      "loss": 2.8508,
      "step": 128515
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9388480186462402,
      "learning_rate": 0.00024583489543390016,
      "loss": 2.9019,
      "step": 128516
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9836221933364868,
      "learning_rate": 0.0002458308721021119,
      "loss": 3.1748,
      "step": 128517
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.0222742557525635,
      "learning_rate": 0.00024582684878039475,
      "loss": 2.9823,
      "step": 128518
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.568474292755127,
      "learning_rate": 0.0002458228254687493,
      "loss": 2.9551,
      "step": 128519
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.678093433380127,
      "learning_rate": 0.00024581880216717646,
      "loss": 3.1217,
      "step": 128520
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8984520435333252,
      "learning_rate": 0.0002458147788756769,
      "loss": 3.2179,
      "step": 128521
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.02095890045166,
      "learning_rate": 0.0002458107555942514,
      "loss": 3.0107,
      "step": 128522
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.01015567779541,
      "learning_rate": 0.0002458067323229007,
      "loss": 2.9587,
      "step": 128523
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.4298882484436035,
      "learning_rate": 0.0002458027090616257,
      "loss": 3.0005,
      "step": 128524
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1308658123016357,
      "learning_rate": 0.0002457986858104269,
      "loss": 3.0151,
      "step": 128525
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.925269365310669,
      "learning_rate": 0.00024579466256930506,
      "loss": 2.9932,
      "step": 128526
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1625239849090576,
      "learning_rate": 0.000245790639338261,
      "loss": 2.8707,
      "step": 128527
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.910757064819336,
      "learning_rate": 0.0002457866161172956,
      "loss": 2.917,
      "step": 128528
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.361192464828491,
      "learning_rate": 0.0002457825929064094,
      "loss": 2.8999,
      "step": 128529
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.22617244720459,
      "learning_rate": 0.0002457785697056034,
      "loss": 2.8789,
      "step": 128530
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.6832070350646973,
      "learning_rate": 0.0002457745465148781,
      "loss": 2.9202,
      "step": 128531
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.6589114665985107,
      "learning_rate": 0.0002457705233342342,
      "loss": 3.2051,
      "step": 128532
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.298354387283325,
      "learning_rate": 0.0002457665001636727,
      "loss": 3.0327,
      "step": 128533
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.272420644760132,
      "learning_rate": 0.0002457624770031942,
      "loss": 3.0915,
      "step": 128534
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.7703579664230347,
      "learning_rate": 0.00024575845385279953,
      "loss": 3.2209,
      "step": 128535
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.201092004776001,
      "learning_rate": 0.0002457544307124895,
      "loss": 3.1478,
      "step": 128536
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.120931386947632,
      "learning_rate": 0.0002457504075822646,
      "loss": 3.0057,
      "step": 128537
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8284835815429688,
      "learning_rate": 0.0002457463844621257,
      "loss": 3.0205,
      "step": 128538
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2283098697662354,
      "learning_rate": 0.00024574236135207355,
      "loss": 2.9067,
      "step": 128539
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.568422555923462,
      "learning_rate": 0.00024573833825210895,
      "loss": 3.1296,
      "step": 128540
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.090202569961548,
      "learning_rate": 0.0002457343151622326,
      "loss": 2.9997,
      "step": 128541
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9333267211914062,
      "learning_rate": 0.0002457302920824454,
      "loss": 2.9606,
      "step": 128542
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.236591339111328,
      "learning_rate": 0.0002457262690127479,
      "loss": 2.8897,
      "step": 128543
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.962159514427185,
      "learning_rate": 0.0002457222459531408,
      "loss": 2.8443,
      "step": 128544
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1349174976348877,
      "learning_rate": 0.00024571822290362505,
      "loss": 2.7774,
      "step": 128545
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.91791033744812,
      "learning_rate": 0.00024571419986420127,
      "loss": 3.0443,
      "step": 128546
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8911112546920776,
      "learning_rate": 0.00024571017683487026,
      "loss": 3.0613,
      "step": 128547
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.399296998977661,
      "learning_rate": 0.00024570615381563286,
      "loss": 3.1596,
      "step": 128548
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.161149263381958,
      "learning_rate": 0.0002457021308064896,
      "loss": 3.1896,
      "step": 128549
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.418131113052368,
      "learning_rate": 0.00024569810780744134,
      "loss": 2.933,
      "step": 128550
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2269978523254395,
      "learning_rate": 0.00024569408481848884,
      "loss": 3.0074,
      "step": 128551
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.942170262336731,
      "learning_rate": 0.00024569006183963277,
      "loss": 2.9548,
      "step": 128552
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.6971261501312256,
      "learning_rate": 0.000245686038870874,
      "loss": 2.6908,
      "step": 128553
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.18076229095459,
      "learning_rate": 0.00024568201591221333,
      "loss": 3.0318,
      "step": 128554
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.738542318344116,
      "learning_rate": 0.00024567799296365127,
      "loss": 3.0108,
      "step": 128555
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7670352458953857,
      "learning_rate": 0.0002456739700251887,
      "loss": 3.1246,
      "step": 128556
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9666069746017456,
      "learning_rate": 0.0002456699470968264,
      "loss": 2.9062,
      "step": 128557
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0106539726257324,
      "learning_rate": 0.00024566592417856506,
      "loss": 3.0864,
      "step": 128558
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.272599697113037,
      "learning_rate": 0.0002456619012704054,
      "loss": 2.9203,
      "step": 128559
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1258065700531006,
      "learning_rate": 0.0002456578783723484,
      "loss": 3.1156,
      "step": 128560
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9228464365005493,
      "learning_rate": 0.00024565385548439445,
      "loss": 3.0204,
      "step": 128561
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8467427492141724,
      "learning_rate": 0.00024564983260654455,
      "loss": 2.8757,
      "step": 128562
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.686977505683899,
      "learning_rate": 0.0002456458097387993,
      "loss": 2.9557,
      "step": 128563
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.981199264526367,
      "learning_rate": 0.00024564178688115957,
      "loss": 2.9292,
      "step": 128564
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.979170560836792,
      "learning_rate": 0.00024563776403362607,
      "loss": 2.8759,
      "step": 128565
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4705889225006104,
      "learning_rate": 0.00024563374119619956,
      "loss": 2.932,
      "step": 128566
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.366151809692383,
      "learning_rate": 0.0002456297183688807,
      "loss": 2.5927,
      "step": 128567
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.1584761142730713,
      "learning_rate": 0.00024562569555167037,
      "loss": 3.2085,
      "step": 128568
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.932924270629883,
      "learning_rate": 0.00024562167274456916,
      "loss": 3.0808,
      "step": 128569
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.425882339477539,
      "learning_rate": 0.00024561764994757797,
      "loss": 2.68,
      "step": 128570
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8055671453475952,
      "learning_rate": 0.00024561362716069745,
      "loss": 3.2248,
      "step": 128571
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.9764726161956787,
      "learning_rate": 0.0002456096043839284,
      "loss": 2.9056,
      "step": 128572
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.953385829925537,
      "learning_rate": 0.0002456055816172716,
      "loss": 2.5816,
      "step": 128573
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.537590265274048,
      "learning_rate": 0.0002456015588607277,
      "loss": 2.9867,
      "step": 128574
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.9714417457580566,
      "learning_rate": 0.0002455975361142975,
      "loss": 2.9243,
      "step": 128575
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3750829696655273,
      "learning_rate": 0.00024559351337798173,
      "loss": 2.9263,
      "step": 128576
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.5383660793304443,
      "learning_rate": 0.0002455894906517812,
      "loss": 2.8763,
      "step": 128577
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.3838415145874023,
      "learning_rate": 0.0002455854679356966,
      "loss": 2.9692,
      "step": 128578
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2150719165802,
      "learning_rate": 0.00024558144522972866,
      "loss": 3.0221,
      "step": 128579
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8531173467636108,
      "learning_rate": 0.00024557742253387814,
      "loss": 2.9422,
      "step": 128580
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.125553846359253,
      "learning_rate": 0.00024557339984814577,
      "loss": 3.0248,
      "step": 128581
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.229689121246338,
      "learning_rate": 0.00024556937717253247,
      "loss": 2.9998,
      "step": 128582
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.2534000873565674,
      "learning_rate": 0.00024556535450703874,
      "loss": 2.8657,
      "step": 128583
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.9492321014404297,
      "learning_rate": 0.00024556133185166544,
      "loss": 3.0018,
      "step": 128584
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1555325984954834,
      "learning_rate": 0.0002455573092064134,
      "loss": 3.0101,
      "step": 128585
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.418672561645508,
      "learning_rate": 0.0002455532865712832,
      "loss": 2.9391,
      "step": 128586
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.8739194869995117,
      "learning_rate": 0.0002455492639462757,
      "loss": 3.0268,
      "step": 128587
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.072234869003296,
      "learning_rate": 0.0002455452413313916,
      "loss": 2.8842,
      "step": 128588
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9214285612106323,
      "learning_rate": 0.00024554121872663175,
      "loss": 3.014,
      "step": 128589
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9480093717575073,
      "learning_rate": 0.00024553719613199676,
      "loss": 3.0247,
      "step": 128590
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9007256031036377,
      "learning_rate": 0.00024553317354748745,
      "loss": 2.8741,
      "step": 128591
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0583043098449707,
      "learning_rate": 0.0002455291509731045,
      "loss": 3.0873,
      "step": 128592
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0643622875213623,
      "learning_rate": 0.0002455251284088487,
      "loss": 3.0495,
      "step": 128593
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.886217713356018,
      "learning_rate": 0.00024552110585472083,
      "loss": 2.9232,
      "step": 128594
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.011244773864746,
      "learning_rate": 0.0002455170833107216,
      "loss": 2.9794,
      "step": 128595
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.653621196746826,
      "learning_rate": 0.0002455130607768518,
      "loss": 2.8456,
      "step": 128596
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.742695927619934,
      "learning_rate": 0.00024550903825311225,
      "loss": 3.0019,
      "step": 128597
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1254258155822754,
      "learning_rate": 0.00024550501573950353,
      "loss": 2.9301,
      "step": 128598
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.5731186866760254,
      "learning_rate": 0.00024550099323602636,
      "loss": 3.0949,
      "step": 128599
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.301577091217041,
      "learning_rate": 0.00024549697074268166,
      "loss": 3.4695,
      "step": 128600
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.940885543823242,
      "learning_rate": 0.00024549294825947006,
      "loss": 2.9828,
      "step": 128601
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0149855613708496,
      "learning_rate": 0.0002454889257863924,
      "loss": 2.9815,
      "step": 128602
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.275615692138672,
      "learning_rate": 0.00024548490332344943,
      "loss": 3.2641,
      "step": 128603
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.007941722869873,
      "learning_rate": 0.0002454808808706418,
      "loss": 2.9373,
      "step": 128604
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.3430747985839844,
      "learning_rate": 0.00024547685842797026,
      "loss": 3.1457,
      "step": 128605
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0581395626068115,
      "learning_rate": 0.0002454728359954356,
      "loss": 3.1834,
      "step": 128606
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9681330919265747,
      "learning_rate": 0.00024546881357303857,
      "loss": 3.0804,
      "step": 128607
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.476536989212036,
      "learning_rate": 0.0002454647911607799,
      "loss": 2.9706,
      "step": 128608
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.2504191398620605,
      "learning_rate": 0.00024546076875866054,
      "loss": 3.076,
      "step": 128609
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.1254630088806152,
      "learning_rate": 0.0002454567463666809,
      "loss": 3.1307,
      "step": 128610
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9551318883895874,
      "learning_rate": 0.0002454527239848418,
      "loss": 3.1105,
      "step": 128611
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.567296028137207,
      "learning_rate": 0.00024544870161314415,
      "loss": 3.0183,
      "step": 128612
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.446878433227539,
      "learning_rate": 0.0002454446792515886,
      "loss": 2.7115,
      "step": 128613
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.342536926269531,
      "learning_rate": 0.0002454406569001759,
      "loss": 2.8542,
      "step": 128614
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3276407718658447,
      "learning_rate": 0.00024543663455890697,
      "loss": 2.6868,
      "step": 128615
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0200839042663574,
      "learning_rate": 0.00024543261222778227,
      "loss": 3.0797,
      "step": 128616
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.727423191070557,
      "learning_rate": 0.00024542858990680267,
      "loss": 3.0304,
      "step": 128617
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.37563419342041,
      "learning_rate": 0.0002454245675959689,
      "loss": 2.9593,
      "step": 128618
    },
    {
      "epoch": 1.67,
      "grad_norm": 4.3885178565979,
      "learning_rate": 0.00024542054529528174,
      "loss": 2.7637,
      "step": 128619
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.7244653701782227,
      "learning_rate": 0.00024541652300474193,
      "loss": 2.967,
      "step": 128620
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.5212182998657227,
      "learning_rate": 0.0002454125007243504,
      "loss": 2.9028,
      "step": 128621
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7023086547851562,
      "learning_rate": 0.00024540847845410757,
      "loss": 2.8956,
      "step": 128622
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.6638801097869873,
      "learning_rate": 0.00024540445619401427,
      "loss": 2.9213,
      "step": 128623
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.5629913806915283,
      "learning_rate": 0.0002454004339440714,
      "loss": 2.8745,
      "step": 128624
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2507424354553223,
      "learning_rate": 0.00024539641170427955,
      "loss": 3.0919,
      "step": 128625
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.1235756874084473,
      "learning_rate": 0.00024539238947463954,
      "loss": 3.2617,
      "step": 128626
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.5379416942596436,
      "learning_rate": 0.00024538836725515226,
      "loss": 2.7844,
      "step": 128627
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.8425142765045166,
      "learning_rate": 0.0002453843450458182,
      "loss": 3.061,
      "step": 128628
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3882734775543213,
      "learning_rate": 0.0002453803228466382,
      "loss": 2.6951,
      "step": 128629
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.0804860591888428,
      "learning_rate": 0.00024537630065761307,
      "loss": 2.6374,
      "step": 128630
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.198949098587036,
      "learning_rate": 0.0002453722784787435,
      "loss": 3.0517,
      "step": 128631
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.682880163192749,
      "learning_rate": 0.0002453682563100302,
      "loss": 2.9434,
      "step": 128632
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3775787353515625,
      "learning_rate": 0.0002453642341514742,
      "loss": 3.029,
      "step": 128633
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.855482578277588,
      "learning_rate": 0.0002453602120030758,
      "loss": 2.88,
      "step": 128634
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.3924124240875244,
      "learning_rate": 0.00024535618986483605,
      "loss": 2.9851,
      "step": 128635
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.3621816635131836,
      "learning_rate": 0.00024535216773675553,
      "loss": 3.0043,
      "step": 128636
    },
    {
      "epoch": 1.67,
      "grad_norm": 1.9885674715042114,
      "learning_rate": 0.00024534814561883513,
      "loss": 2.9989,
      "step": 128637
    },
    {
      "epoch": 1.67,
      "grad_norm": 3.5387165546417236,
      "learning_rate": 0.0002453441235110755,
      "loss": 2.8833,
      "step": 128638
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.4619624614715576,
      "learning_rate": 0.0002453401014134776,
      "loss": 3.1313,
      "step": 128639
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.7171733379364014,
      "learning_rate": 0.00024533607932604185,
      "loss": 3.0348,
      "step": 128640
    },
    {
      "epoch": 1.67,
      "grad_norm": 2.2027525901794434,
      "learning_rate": 0.00024533205724876916,
      "loss": 3.067,
      "step": 128641
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8803141117095947,
      "learning_rate": 0.00024532803518166027,
      "loss": 3.1387,
      "step": 128642
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.5301597118377686,
      "learning_rate": 0.00024532401312471594,
      "loss": 2.9715,
      "step": 128643
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.508859395980835,
      "learning_rate": 0.0002453199910779369,
      "loss": 2.9742,
      "step": 128644
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2491490840911865,
      "learning_rate": 0.00024531596904132404,
      "loss": 2.9625,
      "step": 128645
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.006159543991089,
      "learning_rate": 0.0002453119470148779,
      "loss": 2.8365,
      "step": 128646
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1140904426574707,
      "learning_rate": 0.0002453079249985992,
      "loss": 2.7364,
      "step": 128647
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.232146978378296,
      "learning_rate": 0.0002453039029924888,
      "loss": 2.627,
      "step": 128648
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.260964870452881,
      "learning_rate": 0.0002452998809965475,
      "loss": 2.9982,
      "step": 128649
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.838538408279419,
      "learning_rate": 0.000245295859010776,
      "loss": 2.8455,
      "step": 128650
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.231182098388672,
      "learning_rate": 0.00024529183703517497,
      "loss": 3.2412,
      "step": 128651
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.6900105476379395,
      "learning_rate": 0.0002452878150697454,
      "loss": 3.2207,
      "step": 128652
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.3184802532196045,
      "learning_rate": 0.00024528379311448766,
      "loss": 3.016,
      "step": 128653
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.876368761062622,
      "learning_rate": 0.00024527977116940274,
      "loss": 3.1603,
      "step": 128654
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9100193977355957,
      "learning_rate": 0.00024527574923449133,
      "loss": 2.7947,
      "step": 128655
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8274558782577515,
      "learning_rate": 0.00024527172730975423,
      "loss": 3.0749,
      "step": 128656
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.219181776046753,
      "learning_rate": 0.0002452677053951921,
      "loss": 3.2107,
      "step": 128657
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.413461208343506,
      "learning_rate": 0.0002452636834908058,
      "loss": 2.7885,
      "step": 128658
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9142593145370483,
      "learning_rate": 0.000245259661596596,
      "loss": 2.8277,
      "step": 128659
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4611713886260986,
      "learning_rate": 0.00024525563971256346,
      "loss": 2.8509,
      "step": 128660
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3831238746643066,
      "learning_rate": 0.0002452516178387089,
      "loss": 3.1302,
      "step": 128661
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0463781356811523,
      "learning_rate": 0.00024524759597503314,
      "loss": 3.0925,
      "step": 128662
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9847887754440308,
      "learning_rate": 0.0002452435741215368,
      "loss": 2.9151,
      "step": 128663
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9064186811447144,
      "learning_rate": 0.00024523955227822084,
      "loss": 2.944,
      "step": 128664
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1190578937530518,
      "learning_rate": 0.0002452355304450858,
      "loss": 3.0209,
      "step": 128665
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.211989402770996,
      "learning_rate": 0.0002452315086221325,
      "loss": 3.0212,
      "step": 128666
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.396441698074341,
      "learning_rate": 0.00024522748680936174,
      "loss": 3.0823,
      "step": 128667
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0527048110961914,
      "learning_rate": 0.0002452234650067742,
      "loss": 2.8442,
      "step": 128668
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.277966022491455,
      "learning_rate": 0.00024521944321437067,
      "loss": 2.9992,
      "step": 128669
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.272615432739258,
      "learning_rate": 0.0002452154214321519,
      "loss": 3.0039,
      "step": 128670
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.035254955291748,
      "learning_rate": 0.00024521139966011854,
      "loss": 2.9722,
      "step": 128671
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3526856899261475,
      "learning_rate": 0.0002452073778982715,
      "loss": 2.8038,
      "step": 128672
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.687808036804199,
      "learning_rate": 0.0002452033561466113,
      "loss": 2.9837,
      "step": 128673
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.370248794555664,
      "learning_rate": 0.00024519933440513903,
      "loss": 3.0471,
      "step": 128674
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1372873783111572,
      "learning_rate": 0.00024519531267385515,
      "loss": 3.2195,
      "step": 128675
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.754987359046936,
      "learning_rate": 0.0002451912909527605,
      "loss": 2.7918,
      "step": 128676
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.09152889251709,
      "learning_rate": 0.0002451872692418558,
      "loss": 2.9558,
      "step": 128677
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.264878749847412,
      "learning_rate": 0.00024518324754114183,
      "loss": 2.9613,
      "step": 128678
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.0937843322753906,
      "learning_rate": 0.0002451792258506193,
      "loss": 3.0759,
      "step": 128679
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.868146300315857,
      "learning_rate": 0.000245175204170289,
      "loss": 2.7214,
      "step": 128680
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1530632972717285,
      "learning_rate": 0.00024517118250015174,
      "loss": 2.9089,
      "step": 128681
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.6736700534820557,
      "learning_rate": 0.00024516716084020814,
      "loss": 2.9851,
      "step": 128682
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.8452799320220947,
      "learning_rate": 0.00024516313919045903,
      "loss": 2.9585,
      "step": 128683
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9361528158187866,
      "learning_rate": 0.00024515911755090505,
      "loss": 2.9999,
      "step": 128684
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.742647409439087,
      "learning_rate": 0.000245155095921547,
      "loss": 2.9921,
      "step": 128685
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0408923625946045,
      "learning_rate": 0.00024515107430238573,
      "loss": 2.9482,
      "step": 128686
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1629655361175537,
      "learning_rate": 0.0002451470526934219,
      "loss": 2.9198,
      "step": 128687
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1887989044189453,
      "learning_rate": 0.0002451430310946564,
      "loss": 3.1923,
      "step": 128688
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7756367921829224,
      "learning_rate": 0.00024513900950608967,
      "loss": 3.0246,
      "step": 128689
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8635276556015015,
      "learning_rate": 0.00024513498792772264,
      "loss": 3.1104,
      "step": 128690
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.276273012161255,
      "learning_rate": 0.00024513096635955605,
      "loss": 2.8065,
      "step": 128691
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1528689861297607,
      "learning_rate": 0.0002451269448015907,
      "loss": 3.0396,
      "step": 128692
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.867116689682007,
      "learning_rate": 0.00024512292325382725,
      "loss": 2.7468,
      "step": 128693
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.507817029953003,
      "learning_rate": 0.00024511890171626666,
      "loss": 2.9468,
      "step": 128694
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.6776318550109863,
      "learning_rate": 0.00024511488018890933,
      "loss": 2.9525,
      "step": 128695
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.045747995376587,
      "learning_rate": 0.00024511085867175614,
      "loss": 2.8715,
      "step": 128696
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.903594970703125,
      "learning_rate": 0.00024510683716480794,
      "loss": 3.107,
      "step": 128697
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8693500757217407,
      "learning_rate": 0.0002451028156680654,
      "loss": 3.0869,
      "step": 128698
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.397623300552368,
      "learning_rate": 0.0002450987941815293,
      "loss": 3.0851,
      "step": 128699
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9939316511154175,
      "learning_rate": 0.00024509477270520045,
      "loss": 2.862,
      "step": 128700
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9128831624984741,
      "learning_rate": 0.0002450907512390794,
      "loss": 2.8351,
      "step": 128701
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.404952049255371,
      "learning_rate": 0.00024508672978316705,
      "loss": 2.9151,
      "step": 128702
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2164766788482666,
      "learning_rate": 0.0002450827083374641,
      "loss": 2.7321,
      "step": 128703
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2645456790924072,
      "learning_rate": 0.0002450786869019713,
      "loss": 2.8724,
      "step": 128704
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0893337726593018,
      "learning_rate": 0.0002450746654766894,
      "loss": 3.0106,
      "step": 128705
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.330293655395508,
      "learning_rate": 0.0002450706440616193,
      "loss": 2.7891,
      "step": 128706
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.951680898666382,
      "learning_rate": 0.0002450666226567615,
      "loss": 2.8537,
      "step": 128707
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.541990280151367,
      "learning_rate": 0.00024506260126211686,
      "loss": 2.9583,
      "step": 128708
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.810809850692749,
      "learning_rate": 0.00024505857987768605,
      "loss": 3.0361,
      "step": 128709
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0554511547088623,
      "learning_rate": 0.00024505455850346995,
      "loss": 2.954,
      "step": 128710
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0448875427246094,
      "learning_rate": 0.0002450505371394692,
      "loss": 2.9223,
      "step": 128711
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.1281800270080566,
      "learning_rate": 0.0002450465157856848,
      "loss": 2.8739,
      "step": 128712
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.417752504348755,
      "learning_rate": 0.0002450424944421171,
      "loss": 2.7456,
      "step": 128713
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.886454463005066,
      "learning_rate": 0.000245038473108767,
      "loss": 3.1732,
      "step": 128714
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7702161073684692,
      "learning_rate": 0.0002450344517856353,
      "loss": 2.8618,
      "step": 128715
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5294268131256104,
      "learning_rate": 0.00024503043047272277,
      "loss": 3.04,
      "step": 128716
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3514509201049805,
      "learning_rate": 0.00024502640917003013,
      "loss": 3.074,
      "step": 128717
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.107374906539917,
      "learning_rate": 0.0002450223878775581,
      "loss": 2.9191,
      "step": 128718
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1658999919891357,
      "learning_rate": 0.00024501836659530755,
      "loss": 3.0333,
      "step": 128719
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2544994354248047,
      "learning_rate": 0.00024501434532327903,
      "loss": 2.8239,
      "step": 128720
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8776578903198242,
      "learning_rate": 0.0002450103240614734,
      "loss": 2.868,
      "step": 128721
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7464628219604492,
      "learning_rate": 0.0002450063028098913,
      "loss": 2.8456,
      "step": 128722
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1453518867492676,
      "learning_rate": 0.00024500228156853365,
      "loss": 3.086,
      "step": 128723
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7487214803695679,
      "learning_rate": 0.00024499826033740107,
      "loss": 2.7807,
      "step": 128724
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.318260669708252,
      "learning_rate": 0.0002449942391164945,
      "loss": 2.8283,
      "step": 128725
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.088655948638916,
      "learning_rate": 0.00024499021790581443,
      "loss": 2.9748,
      "step": 128726
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.574247360229492,
      "learning_rate": 0.0002449861967053617,
      "loss": 2.9388,
      "step": 128727
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.738382577896118,
      "learning_rate": 0.00024498217551513707,
      "loss": 3.1017,
      "step": 128728
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.7570433616638184,
      "learning_rate": 0.0002449781543351413,
      "loss": 3.065,
      "step": 128729
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3985836505889893,
      "learning_rate": 0.0002449741331653751,
      "loss": 2.8208,
      "step": 128730
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2163515090942383,
      "learning_rate": 0.0002449701120058394,
      "loss": 2.9051,
      "step": 128731
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.423976182937622,
      "learning_rate": 0.00024496609085653464,
      "loss": 3.0616,
      "step": 128732
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0945048332214355,
      "learning_rate": 0.0002449620697174618,
      "loss": 2.9808,
      "step": 128733
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.437274932861328,
      "learning_rate": 0.00024495804858862144,
      "loss": 2.7223,
      "step": 128734
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8940352201461792,
      "learning_rate": 0.0002449540274700145,
      "loss": 3.0481,
      "step": 128735
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.848754644393921,
      "learning_rate": 0.00024495000636164165,
      "loss": 2.9335,
      "step": 128736
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9696671962738037,
      "learning_rate": 0.0002449459852635037,
      "loss": 2.7013,
      "step": 128737
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.130932092666626,
      "learning_rate": 0.00024494196417560127,
      "loss": 2.8139,
      "step": 128738
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5851550102233887,
      "learning_rate": 0.0002449379430979351,
      "loss": 2.8644,
      "step": 128739
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.936869502067566,
      "learning_rate": 0.000244933922030506,
      "loss": 3.103,
      "step": 128740
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8533012866973877,
      "learning_rate": 0.0002449299009733148,
      "loss": 3.0114,
      "step": 128741
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.879951238632202,
      "learning_rate": 0.00024492587992636214,
      "loss": 3.0188,
      "step": 128742
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.380389213562012,
      "learning_rate": 0.00024492185888964885,
      "loss": 2.9595,
      "step": 128743
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2922286987304688,
      "learning_rate": 0.0002449178378631756,
      "loss": 3.0732,
      "step": 128744
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0803425312042236,
      "learning_rate": 0.00024491381684694314,
      "loss": 3.0585,
      "step": 128745
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5463364124298096,
      "learning_rate": 0.00024490979584095224,
      "loss": 3.0058,
      "step": 128746
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.2969186305999756,
      "learning_rate": 0.00024490577484520364,
      "loss": 2.8667,
      "step": 128747
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4186952114105225,
      "learning_rate": 0.0002449017538596981,
      "loss": 2.9422,
      "step": 128748
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8248625993728638,
      "learning_rate": 0.00024489773288443646,
      "loss": 2.9059,
      "step": 128749
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.435964345932007,
      "learning_rate": 0.00024489371191941924,
      "loss": 2.8272,
      "step": 128750
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.1318929195404053,
      "learning_rate": 0.0002448896909646474,
      "loss": 2.9541,
      "step": 128751
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.126749277114868,
      "learning_rate": 0.00024488567002012155,
      "loss": 2.9244,
      "step": 128752
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.7715866565704346,
      "learning_rate": 0.00024488164908584254,
      "loss": 2.8654,
      "step": 128753
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.44123911857605,
      "learning_rate": 0.000244877628161811,
      "loss": 2.8354,
      "step": 128754
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.524571418762207,
      "learning_rate": 0.00024487360724802785,
      "loss": 2.5353,
      "step": 128755
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.9827497005462646,
      "learning_rate": 0.00024486958634449367,
      "loss": 3.147,
      "step": 128756
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5198378562927246,
      "learning_rate": 0.0002448655654512093,
      "loss": 2.9195,
      "step": 128757
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.6988403797149658,
      "learning_rate": 0.0002448615445681754,
      "loss": 3.1838,
      "step": 128758
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.585712432861328,
      "learning_rate": 0.0002448575236953929,
      "loss": 3.0381,
      "step": 128759
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.806440830230713,
      "learning_rate": 0.0002448535028328624,
      "loss": 3.0607,
      "step": 128760
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.105655193328857,
      "learning_rate": 0.0002448494819805846,
      "loss": 3.1972,
      "step": 128761
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.36649227142334,
      "learning_rate": 0.0002448454611385604,
      "loss": 2.6655,
      "step": 128762
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0610945224761963,
      "learning_rate": 0.0002448414403067904,
      "loss": 2.7698,
      "step": 128763
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.539659023284912,
      "learning_rate": 0.00024483741948527543,
      "loss": 3.1526,
      "step": 128764
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.411428451538086,
      "learning_rate": 0.00024483339867401624,
      "loss": 3.1736,
      "step": 128765
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.326000452041626,
      "learning_rate": 0.0002448293778730136,
      "loss": 3.2664,
      "step": 128766
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.8335535526275635,
      "learning_rate": 0.0002448253570822682,
      "loss": 3.1044,
      "step": 128767
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.103764295578003,
      "learning_rate": 0.0002448213363017808,
      "loss": 3.0785,
      "step": 128768
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.610572099685669,
      "learning_rate": 0.00024481731553155215,
      "loss": 3.2894,
      "step": 128769
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7765017747879028,
      "learning_rate": 0.00024481329477158293,
      "loss": 2.917,
      "step": 128770
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1542177200317383,
      "learning_rate": 0.00024480927402187406,
      "loss": 2.8391,
      "step": 128771
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9936068058013916,
      "learning_rate": 0.00024480525328242613,
      "loss": 3.1443,
      "step": 128772
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.118255376815796,
      "learning_rate": 0.00024480123255324007,
      "loss": 2.8527,
      "step": 128773
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2714786529541016,
      "learning_rate": 0.0002447972118343164,
      "loss": 2.9264,
      "step": 128774
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0168707370758057,
      "learning_rate": 0.0002447931911256559,
      "loss": 3.0443,
      "step": 128775
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7692536115646362,
      "learning_rate": 0.0002447891704272595,
      "loss": 2.7741,
      "step": 128776
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.769301414489746,
      "learning_rate": 0.00024478514973912775,
      "loss": 2.878,
      "step": 128777
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.909790277481079,
      "learning_rate": 0.00024478112906126153,
      "loss": 3.0081,
      "step": 128778
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0172200202941895,
      "learning_rate": 0.00024477710839366167,
      "loss": 3.12,
      "step": 128779
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8826310634613037,
      "learning_rate": 0.0002447730877363286,
      "loss": 2.8559,
      "step": 128780
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0592775344848633,
      "learning_rate": 0.00024476906708926333,
      "loss": 3.0312,
      "step": 128781
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5448713302612305,
      "learning_rate": 0.0002447650464524665,
      "loss": 3.0526,
      "step": 128782
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.55134916305542,
      "learning_rate": 0.0002447610258259389,
      "loss": 2.9361,
      "step": 128783
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2481141090393066,
      "learning_rate": 0.0002447570052096813,
      "loss": 2.9009,
      "step": 128784
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.521152973175049,
      "learning_rate": 0.0002447529846036943,
      "loss": 2.7192,
      "step": 128785
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.210181713104248,
      "learning_rate": 0.000244748964007979,
      "loss": 2.9916,
      "step": 128786
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.336446523666382,
      "learning_rate": 0.0002447449434225357,
      "loss": 3.055,
      "step": 128787
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2557506561279297,
      "learning_rate": 0.00024474092284736544,
      "loss": 2.9666,
      "step": 128788
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.990950345993042,
      "learning_rate": 0.00024473690228246886,
      "loss": 3.3435,
      "step": 128789
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.730947494506836,
      "learning_rate": 0.0002447328817278467,
      "loss": 2.7457,
      "step": 128790
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.035545587539673,
      "learning_rate": 0.00024472886118349983,
      "loss": 2.7195,
      "step": 128791
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.533324718475342,
      "learning_rate": 0.00024472484064942896,
      "loss": 3.0775,
      "step": 128792
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.532085418701172,
      "learning_rate": 0.00024472082012563464,
      "loss": 2.8115,
      "step": 128793
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.137106418609619,
      "learning_rate": 0.0002447167996121178,
      "loss": 3.1639,
      "step": 128794
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0925180912017822,
      "learning_rate": 0.00024471277910887915,
      "loss": 3.0556,
      "step": 128795
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7847682237625122,
      "learning_rate": 0.00024470875861591943,
      "loss": 3.3039,
      "step": 128796
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9215235710144043,
      "learning_rate": 0.0002447047381332394,
      "loss": 3.0594,
      "step": 128797
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9613784551620483,
      "learning_rate": 0.0002447007176608399,
      "loss": 3.1245,
      "step": 128798
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.7197792530059814,
      "learning_rate": 0.00024469669719872153,
      "loss": 2.9533,
      "step": 128799
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1674327850341797,
      "learning_rate": 0.000244692676746885,
      "loss": 2.9259,
      "step": 128800
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3375866413116455,
      "learning_rate": 0.0002446886563053312,
      "loss": 2.8625,
      "step": 128801
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.2902517318725586,
      "learning_rate": 0.00024468463587406077,
      "loss": 2.9093,
      "step": 128802
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1532421112060547,
      "learning_rate": 0.00024468061545307457,
      "loss": 2.9921,
      "step": 128803
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.753139019012451,
      "learning_rate": 0.0002446765950423734,
      "loss": 2.9422,
      "step": 128804
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1346535682678223,
      "learning_rate": 0.0002446725746419578,
      "loss": 2.709,
      "step": 128805
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.277637481689453,
      "learning_rate": 0.00024466855425182854,
      "loss": 2.8566,
      "step": 128806
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.988472580909729,
      "learning_rate": 0.00024466453387198646,
      "loss": 3.048,
      "step": 128807
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9492894411087036,
      "learning_rate": 0.0002446605135024323,
      "loss": 3.0205,
      "step": 128808
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8794690370559692,
      "learning_rate": 0.00024465649314316676,
      "loss": 3.0023,
      "step": 128809
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9225497245788574,
      "learning_rate": 0.00024465247279419083,
      "loss": 2.7615,
      "step": 128810
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9730740785598755,
      "learning_rate": 0.00024464845245550484,
      "loss": 2.8546,
      "step": 128811
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.015042304992676,
      "learning_rate": 0.0002446444321271098,
      "loss": 3.06,
      "step": 128812
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.059859275817871,
      "learning_rate": 0.0002446404118090064,
      "loss": 3.0145,
      "step": 128813
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.057995319366455,
      "learning_rate": 0.0002446363915011954,
      "loss": 2.7744,
      "step": 128814
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.357999086380005,
      "learning_rate": 0.00024463237120367755,
      "loss": 2.9071,
      "step": 128815
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.029508590698242,
      "learning_rate": 0.00024462835091645365,
      "loss": 2.8426,
      "step": 128816
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.3119921684265137,
      "learning_rate": 0.0002446243306395243,
      "loss": 2.8375,
      "step": 128817
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.3384811878204346,
      "learning_rate": 0.00024462031037289034,
      "loss": 2.8384,
      "step": 128818
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0319132804870605,
      "learning_rate": 0.0002446162901165525,
      "loss": 3.0433,
      "step": 128819
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.516085147857666,
      "learning_rate": 0.0002446122698705115,
      "loss": 3.0783,
      "step": 128820
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9958546161651611,
      "learning_rate": 0.0002446082496347682,
      "loss": 2.8251,
      "step": 128821
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.776262879371643,
      "learning_rate": 0.00024460422940932334,
      "loss": 3.1222,
      "step": 128822
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9454345703125,
      "learning_rate": 0.0002446002091941775,
      "loss": 2.8236,
      "step": 128823
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2137162685394287,
      "learning_rate": 0.00024459618898933154,
      "loss": 2.849,
      "step": 128824
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.556239128112793,
      "learning_rate": 0.00024459216879478616,
      "loss": 3.0951,
      "step": 128825
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2453205585479736,
      "learning_rate": 0.0002445881486105421,
      "loss": 2.9757,
      "step": 128826
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8535139560699463,
      "learning_rate": 0.0002445841284366002,
      "loss": 2.7695,
      "step": 128827
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0409257411956787,
      "learning_rate": 0.00024458010827296123,
      "loss": 2.9245,
      "step": 128828
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5865631103515625,
      "learning_rate": 0.0002445760881196259,
      "loss": 3.0934,
      "step": 128829
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3884928226470947,
      "learning_rate": 0.0002445720679765948,
      "loss": 2.7519,
      "step": 128830
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1465365886688232,
      "learning_rate": 0.0002445680478438688,
      "loss": 3.1986,
      "step": 128831
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.072273015975952,
      "learning_rate": 0.00024456402772144863,
      "loss": 2.9539,
      "step": 128832
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5222222805023193,
      "learning_rate": 0.0002445600076093351,
      "loss": 2.9059,
      "step": 128833
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4810376167297363,
      "learning_rate": 0.00024455598750752896,
      "loss": 2.7629,
      "step": 128834
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2733561992645264,
      "learning_rate": 0.0002445519674160308,
      "loss": 2.9523,
      "step": 128835
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.1035897731781006,
      "learning_rate": 0.00024454794733484166,
      "loss": 3.0591,
      "step": 128836
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9961967468261719,
      "learning_rate": 0.00024454392726396196,
      "loss": 3.0688,
      "step": 128837
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1158649921417236,
      "learning_rate": 0.00024453990720339254,
      "loss": 2.6724,
      "step": 128838
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9152982234954834,
      "learning_rate": 0.00024453588715313426,
      "loss": 3.16,
      "step": 128839
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.862894296646118,
      "learning_rate": 0.0002445318671131879,
      "loss": 2.9618,
      "step": 128840
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.192315101623535,
      "learning_rate": 0.000244527847083554,
      "loss": 2.7702,
      "step": 128841
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.010366916656494,
      "learning_rate": 0.0002445238270642334,
      "loss": 2.9985,
      "step": 128842
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0567750930786133,
      "learning_rate": 0.000244519807055227,
      "loss": 3.0557,
      "step": 128843
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8136652708053589,
      "learning_rate": 0.0002445157870565353,
      "loss": 2.975,
      "step": 128844
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0575711727142334,
      "learning_rate": 0.00024451176706815914,
      "loss": 3.0131,
      "step": 128845
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0222325325012207,
      "learning_rate": 0.0002445077470900994,
      "loss": 2.8248,
      "step": 128846
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.753830909729004,
      "learning_rate": 0.00024450372712235666,
      "loss": 2.959,
      "step": 128847
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.97950279712677,
      "learning_rate": 0.0002444997071649317,
      "loss": 2.9571,
      "step": 128848
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3153014183044434,
      "learning_rate": 0.0002444956872178253,
      "loss": 2.8793,
      "step": 128849
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3213188648223877,
      "learning_rate": 0.0002444916672810382,
      "loss": 3.0094,
      "step": 128850
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0708248615264893,
      "learning_rate": 0.0002444876473545712,
      "loss": 3.1802,
      "step": 128851
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1621696949005127,
      "learning_rate": 0.000244483627438425,
      "loss": 2.8383,
      "step": 128852
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.07594895362854,
      "learning_rate": 0.0002444796075326003,
      "loss": 2.9083,
      "step": 128853
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.9721579551696777,
      "learning_rate": 0.00024447558763709786,
      "loss": 3.0343,
      "step": 128854
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.318051815032959,
      "learning_rate": 0.00024447156775191846,
      "loss": 3.0404,
      "step": 128855
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.810749053955078,
      "learning_rate": 0.00024446754787706284,
      "loss": 2.9583,
      "step": 128856
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.8043370246887207,
      "learning_rate": 0.0002444635280125318,
      "loss": 2.9911,
      "step": 128857
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1559691429138184,
      "learning_rate": 0.00024445950815832606,
      "loss": 2.6154,
      "step": 128858
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.201503276824951,
      "learning_rate": 0.00024445548831444636,
      "loss": 2.933,
      "step": 128859
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3560469150543213,
      "learning_rate": 0.00024445146848089337,
      "loss": 3.0282,
      "step": 128860
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2315430641174316,
      "learning_rate": 0.0002444474486576679,
      "loss": 2.886,
      "step": 128861
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.517301082611084,
      "learning_rate": 0.0002444434288447706,
      "loss": 2.9771,
      "step": 128862
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1329188346862793,
      "learning_rate": 0.00024443940904220245,
      "loss": 3.0608,
      "step": 128863
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.7909021377563477,
      "learning_rate": 0.000244435389249964,
      "loss": 3.1795,
      "step": 128864
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0866737365722656,
      "learning_rate": 0.0002444313694680562,
      "loss": 3.0499,
      "step": 128865
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0140702724456787,
      "learning_rate": 0.00024442734969647953,
      "loss": 2.8543,
      "step": 128866
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.7634873390197754,
      "learning_rate": 0.0002444233299352349,
      "loss": 2.8782,
      "step": 128867
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2620599269866943,
      "learning_rate": 0.000244419310184323,
      "loss": 2.972,
      "step": 128868
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8510180711746216,
      "learning_rate": 0.00024441529044374457,
      "loss": 2.9043,
      "step": 128869
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1713778972625732,
      "learning_rate": 0.0002444112707135004,
      "loss": 3.0981,
      "step": 128870
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.15539813041687,
      "learning_rate": 0.00024440725099359134,
      "loss": 2.817,
      "step": 128871
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.556632041931152,
      "learning_rate": 0.00024440323128401795,
      "loss": 2.9051,
      "step": 128872
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.7366106510162354,
      "learning_rate": 0.000244399211584781,
      "loss": 2.9314,
      "step": 128873
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.01981258392334,
      "learning_rate": 0.0002443951918958813,
      "loss": 2.9748,
      "step": 128874
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8795487880706787,
      "learning_rate": 0.00024439117221731956,
      "loss": 2.7956,
      "step": 128875
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.65920352935791,
      "learning_rate": 0.00024438715254909655,
      "loss": 3.1255,
      "step": 128876
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.5208816528320312,
      "learning_rate": 0.0002443831328912132,
      "loss": 3.1874,
      "step": 128877
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.064255952835083,
      "learning_rate": 0.0002443791132436699,
      "loss": 3.0995,
      "step": 128878
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7294672727584839,
      "learning_rate": 0.00024437509360646765,
      "loss": 2.9284,
      "step": 128879
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.929801106452942,
      "learning_rate": 0.000244371073979607,
      "loss": 2.9896,
      "step": 128880
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3927361965179443,
      "learning_rate": 0.0002443670543630889,
      "loss": 3.1531,
      "step": 128881
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5703623294830322,
      "learning_rate": 0.00024436303475691397,
      "loss": 2.9793,
      "step": 128882
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.115642786026001,
      "learning_rate": 0.00024435901516108315,
      "loss": 3.0604,
      "step": 128883
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9331998825073242,
      "learning_rate": 0.0002443549955755969,
      "loss": 2.9971,
      "step": 128884
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.291991949081421,
      "learning_rate": 0.00024435097600045613,
      "loss": 2.8878,
      "step": 128885
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.105623960494995,
      "learning_rate": 0.00024434695643566157,
      "loss": 2.9917,
      "step": 128886
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.3046112060546875,
      "learning_rate": 0.0002443429368812139,
      "loss": 2.911,
      "step": 128887
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9260203838348389,
      "learning_rate": 0.000244338917337114,
      "loss": 2.9434,
      "step": 128888
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.317660093307495,
      "learning_rate": 0.00024433489780336266,
      "loss": 2.7102,
      "step": 128889
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.832708716392517,
      "learning_rate": 0.00024433087827996033,
      "loss": 3.1322,
      "step": 128890
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.6682422161102295,
      "learning_rate": 0.000244326858766908,
      "loss": 2.8892,
      "step": 128891
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8429244756698608,
      "learning_rate": 0.0002443228392642063,
      "loss": 3.0189,
      "step": 128892
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.070537567138672,
      "learning_rate": 0.0002443188197718561,
      "loss": 2.9586,
      "step": 128893
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.005194664001465,
      "learning_rate": 0.00024431480028985807,
      "loss": 3.0191,
      "step": 128894
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4012112617492676,
      "learning_rate": 0.0002443107808182131,
      "loss": 2.8367,
      "step": 128895
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3461062908172607,
      "learning_rate": 0.00024430676135692164,
      "loss": 3.1048,
      "step": 128896
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.822330355644226,
      "learning_rate": 0.0002443027419059846,
      "loss": 2.9528,
      "step": 128897
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1702253818511963,
      "learning_rate": 0.0002442987224654028,
      "loss": 3.035,
      "step": 128898
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.3743386268615723,
      "learning_rate": 0.0002442947030351769,
      "loss": 2.8956,
      "step": 128899
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9714826345443726,
      "learning_rate": 0.0002442906836153076,
      "loss": 2.7971,
      "step": 128900
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8641139268875122,
      "learning_rate": 0.0002442866642057959,
      "loss": 2.5501,
      "step": 128901
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.6895477771759033,
      "learning_rate": 0.00024428264480664223,
      "loss": 2.6344,
      "step": 128902
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.025948524475098,
      "learning_rate": 0.00024427862541784746,
      "loss": 2.8308,
      "step": 128903
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.898846387863159,
      "learning_rate": 0.00024427460603941234,
      "loss": 3.1902,
      "step": 128904
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3916964530944824,
      "learning_rate": 0.00024427058667133764,
      "loss": 2.9306,
      "step": 128905
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.8026788234710693,
      "learning_rate": 0.00024426656731362407,
      "loss": 3.2607,
      "step": 128906
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.680433988571167,
      "learning_rate": 0.00024426254796627254,
      "loss": 2.9017,
      "step": 128907
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4662299156188965,
      "learning_rate": 0.0002442585286292835,
      "loss": 3.0597,
      "step": 128908
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.782884120941162,
      "learning_rate": 0.00024425450930265784,
      "loss": 2.8719,
      "step": 128909
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1128053665161133,
      "learning_rate": 0.0002442504899863964,
      "loss": 3.0516,
      "step": 128910
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4321508407592773,
      "learning_rate": 0.0002442464706804998,
      "loss": 2.8447,
      "step": 128911
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.79798424243927,
      "learning_rate": 0.00024424245138496883,
      "loss": 3.0598,
      "step": 128912
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.043511152267456,
      "learning_rate": 0.00024423843209980426,
      "loss": 2.8249,
      "step": 128913
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0320024490356445,
      "learning_rate": 0.00024423441282500684,
      "loss": 3.0359,
      "step": 128914
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.6720775365829468,
      "learning_rate": 0.00024423039356057726,
      "loss": 3.0112,
      "step": 128915
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.0947868824005127,
      "learning_rate": 0.00024422637430651634,
      "loss": 3.0671,
      "step": 128916
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.40720534324646,
      "learning_rate": 0.00024422235506282473,
      "loss": 2.8799,
      "step": 128917
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.866590142250061,
      "learning_rate": 0.00024421833582950324,
      "loss": 2.8611,
      "step": 128918
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.451512575149536,
      "learning_rate": 0.0002442143166065526,
      "loss": 2.8434,
      "step": 128919
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.720078706741333,
      "learning_rate": 0.00024421029739397367,
      "loss": 2.8959,
      "step": 128920
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.9437062740325928,
      "learning_rate": 0.0002442062781917671,
      "loss": 2.8986,
      "step": 128921
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0551037788391113,
      "learning_rate": 0.0002442022589999336,
      "loss": 3.0331,
      "step": 128922
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.0726094245910645,
      "learning_rate": 0.0002441982398184739,
      "loss": 2.6925,
      "step": 128923
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.054384708404541,
      "learning_rate": 0.0002441942206473888,
      "loss": 2.8872,
      "step": 128924
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.9290080070495605,
      "learning_rate": 0.00024419020148667904,
      "loss": 2.8986,
      "step": 128925
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.792649030685425,
      "learning_rate": 0.0002441861823363455,
      "loss": 2.9213,
      "step": 128926
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9938387870788574,
      "learning_rate": 0.00024418216319638865,
      "loss": 3.0511,
      "step": 128927
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.353740692138672,
      "learning_rate": 0.00024417814406680954,
      "loss": 3.0882,
      "step": 128928
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.251112461090088,
      "learning_rate": 0.0002441741249476086,
      "loss": 2.9657,
      "step": 128929
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4999284744262695,
      "learning_rate": 0.0002441701058387869,
      "loss": 2.9625,
      "step": 128930
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.30220627784729,
      "learning_rate": 0.0002441660867403449,
      "loss": 2.8207,
      "step": 128931
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0664567947387695,
      "learning_rate": 0.0002441620676522836,
      "loss": 3.1547,
      "step": 128932
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.1426329612731934,
      "learning_rate": 0.0002441580485746035,
      "loss": 2.8561,
      "step": 128933
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.260305881500244,
      "learning_rate": 0.00024415402950730557,
      "loss": 2.9135,
      "step": 128934
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4855868816375732,
      "learning_rate": 0.00024415001045039044,
      "loss": 3.1183,
      "step": 128935
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.891028642654419,
      "learning_rate": 0.00024414599140385883,
      "loss": 2.6575,
      "step": 128936
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8279143571853638,
      "learning_rate": 0.0002441419723677116,
      "loss": 2.9398,
      "step": 128937
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2690274715423584,
      "learning_rate": 0.00024413795334194942,
      "loss": 3.0865,
      "step": 128938
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8274619579315186,
      "learning_rate": 0.000244133934326573,
      "loss": 3.2053,
      "step": 128939
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0654311180114746,
      "learning_rate": 0.00024412991532158317,
      "loss": 3.0248,
      "step": 128940
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9502997398376465,
      "learning_rate": 0.0002441258963269806,
      "loss": 2.9366,
      "step": 128941
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.196946382522583,
      "learning_rate": 0.00024412187734276612,
      "loss": 3.0138,
      "step": 128942
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.818422794342041,
      "learning_rate": 0.00024411785836894048,
      "loss": 2.9849,
      "step": 128943
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9917683601379395,
      "learning_rate": 0.0002441138394055044,
      "loss": 3.0452,
      "step": 128944
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7341324090957642,
      "learning_rate": 0.00024410982045245853,
      "loss": 2.8031,
      "step": 128945
    },
    {
      "epoch": 1.68,
      "grad_norm": 5.919418811798096,
      "learning_rate": 0.0002441058015098037,
      "loss": 2.953,
      "step": 128946
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.004733085632324,
      "learning_rate": 0.00024410178257754065,
      "loss": 2.986,
      "step": 128947
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9127908945083618,
      "learning_rate": 0.00024409776365567014,
      "loss": 3.0738,
      "step": 128948
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.416893482208252,
      "learning_rate": 0.00024409374474419293,
      "loss": 3.0969,
      "step": 128949
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.6757938861846924,
      "learning_rate": 0.0002440897258431099,
      "loss": 2.9048,
      "step": 128950
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7907248735427856,
      "learning_rate": 0.00024408570695242144,
      "loss": 3.2246,
      "step": 128951
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0905401706695557,
      "learning_rate": 0.00024408168807212857,
      "loss": 2.9666,
      "step": 128952
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1931405067443848,
      "learning_rate": 0.00024407766920223192,
      "loss": 3.0513,
      "step": 128953
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.896231770515442,
      "learning_rate": 0.0002440736503427323,
      "loss": 2.8129,
      "step": 128954
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.6904847621917725,
      "learning_rate": 0.00024406963149363047,
      "loss": 2.9164,
      "step": 128955
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9437243938446045,
      "learning_rate": 0.00024406561265492727,
      "loss": 2.8791,
      "step": 128956
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.723410129547119,
      "learning_rate": 0.0002440615938266232,
      "loss": 3.171,
      "step": 128957
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.1280651092529297,
      "learning_rate": 0.0002440575750087191,
      "loss": 2.8499,
      "step": 128958
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.077719211578369,
      "learning_rate": 0.00024405355620121577,
      "loss": 2.9505,
      "step": 128959
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9442497491836548,
      "learning_rate": 0.00024404953740411397,
      "loss": 3.1786,
      "step": 128960
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4138615131378174,
      "learning_rate": 0.0002440455186174144,
      "loss": 2.8947,
      "step": 128961
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9512182474136353,
      "learning_rate": 0.00024404149984111795,
      "loss": 3.1378,
      "step": 128962
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.243084669113159,
      "learning_rate": 0.00024403748107522513,
      "loss": 3.0023,
      "step": 128963
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9017066955566406,
      "learning_rate": 0.0002440334623197368,
      "loss": 3.1297,
      "step": 128964
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2126379013061523,
      "learning_rate": 0.00024402944357465367,
      "loss": 3.0356,
      "step": 128965
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.846564292907715,
      "learning_rate": 0.00024402542483997655,
      "loss": 2.9533,
      "step": 128966
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.6841886043548584,
      "learning_rate": 0.00024402140611570614,
      "loss": 3.1867,
      "step": 128967
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8011784553527832,
      "learning_rate": 0.00024401738740184334,
      "loss": 2.9908,
      "step": 128968
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.20459246635437,
      "learning_rate": 0.00024401336869838864,
      "loss": 2.7766,
      "step": 128969
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.542870044708252,
      "learning_rate": 0.0002440093500053429,
      "loss": 2.6935,
      "step": 128970
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2427287101745605,
      "learning_rate": 0.00024400533132270692,
      "loss": 2.9047,
      "step": 128971
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1691226959228516,
      "learning_rate": 0.0002440013126504814,
      "loss": 2.7757,
      "step": 128972
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3804428577423096,
      "learning_rate": 0.00024399729398866707,
      "loss": 3.057,
      "step": 128973
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.073878765106201,
      "learning_rate": 0.00024399327533726484,
      "loss": 2.9912,
      "step": 128974
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.946919560432434,
      "learning_rate": 0.00024398925669627517,
      "loss": 2.9612,
      "step": 128975
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9931156635284424,
      "learning_rate": 0.00024398523806569894,
      "loss": 3.1101,
      "step": 128976
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4654433727264404,
      "learning_rate": 0.00024398121944553696,
      "loss": 2.9542,
      "step": 128977
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5114805698394775,
      "learning_rate": 0.0002439772008357899,
      "loss": 3.1078,
      "step": 128978
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2930870056152344,
      "learning_rate": 0.00024397318223645854,
      "loss": 2.8993,
      "step": 128979
    },
    {
      "epoch": 1.68,
      "grad_norm": 5.022522449493408,
      "learning_rate": 0.00024396916364754375,
      "loss": 3.0503,
      "step": 128980
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.7073237895965576,
      "learning_rate": 0.00024396514506904604,
      "loss": 2.8125,
      "step": 128981
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.798647165298462,
      "learning_rate": 0.00024396112650096623,
      "loss": 2.9114,
      "step": 128982
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3592171669006348,
      "learning_rate": 0.00024395710794330514,
      "loss": 2.7884,
      "step": 128983
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.403033494949341,
      "learning_rate": 0.00024395308939606346,
      "loss": 2.8071,
      "step": 128984
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.1416969299316406,
      "learning_rate": 0.00024394907085924195,
      "loss": 3.0317,
      "step": 128985
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.845640182495117,
      "learning_rate": 0.00024394505233284137,
      "loss": 3.0663,
      "step": 128986
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.3412303924560547,
      "learning_rate": 0.00024394103381686263,
      "loss": 3.1439,
      "step": 128987
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8416386842727661,
      "learning_rate": 0.00024393701531130616,
      "loss": 2.7407,
      "step": 128988
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5935728549957275,
      "learning_rate": 0.00024393299681617285,
      "loss": 3.0602,
      "step": 128989
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.675114870071411,
      "learning_rate": 0.00024392897833146345,
      "loss": 2.9381,
      "step": 128990
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2503507137298584,
      "learning_rate": 0.00024392495985717873,
      "loss": 2.8398,
      "step": 128991
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8556121587753296,
      "learning_rate": 0.0002439209413933194,
      "loss": 2.9004,
      "step": 128992
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2327880859375,
      "learning_rate": 0.00024391692293988638,
      "loss": 3.0132,
      "step": 128993
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5284085273742676,
      "learning_rate": 0.0002439129044968801,
      "loss": 2.9522,
      "step": 128994
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.9930996894836426,
      "learning_rate": 0.00024390888606430148,
      "loss": 2.8429,
      "step": 128995
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.7981486320495605,
      "learning_rate": 0.00024390486764215124,
      "loss": 3.0381,
      "step": 128996
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.6738483905792236,
      "learning_rate": 0.00024390084923043022,
      "loss": 2.8545,
      "step": 128997
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3933603763580322,
      "learning_rate": 0.00024389683082913903,
      "loss": 3.0672,
      "step": 128998
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.629297733306885,
      "learning_rate": 0.0002438928124382786,
      "loss": 2.9613,
      "step": 128999
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.71291184425354,
      "learning_rate": 0.00024388879405784941,
      "loss": 2.7453,
      "step": 129000
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.045290470123291,
      "learning_rate": 0.0002438847756878524,
      "loss": 3.0055,
      "step": 129001
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9102944135665894,
      "learning_rate": 0.00024388075732828825,
      "loss": 3.143,
      "step": 129002
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.034069061279297,
      "learning_rate": 0.00024387673897915773,
      "loss": 2.8221,
      "step": 129003
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.852014422416687,
      "learning_rate": 0.0002438727206404616,
      "loss": 3.0053,
      "step": 129004
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9710557460784912,
      "learning_rate": 0.00024386870231220067,
      "loss": 2.6333,
      "step": 129005
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5997045040130615,
      "learning_rate": 0.00024386468399437554,
      "loss": 3.0448,
      "step": 129006
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9743454456329346,
      "learning_rate": 0.00024386066568698697,
      "loss": 2.8813,
      "step": 129007
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.324275493621826,
      "learning_rate": 0.0002438566473900358,
      "loss": 2.9412,
      "step": 129008
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.193516492843628,
      "learning_rate": 0.00024385262910352272,
      "loss": 2.9827,
      "step": 129009
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2233407497406006,
      "learning_rate": 0.0002438486108274485,
      "loss": 2.894,
      "step": 129010
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9221773147583008,
      "learning_rate": 0.00024384459256181394,
      "loss": 2.8309,
      "step": 129011
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9484211206436157,
      "learning_rate": 0.0002438405743066197,
      "loss": 2.863,
      "step": 129012
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4134061336517334,
      "learning_rate": 0.0002438365560618666,
      "loss": 3.1845,
      "step": 129013
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.112926483154297,
      "learning_rate": 0.00024383253782755528,
      "loss": 3.1969,
      "step": 129014
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0531933307647705,
      "learning_rate": 0.00024382851960368654,
      "loss": 2.8913,
      "step": 129015
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2079896926879883,
      "learning_rate": 0.00024382450139026114,
      "loss": 3.0486,
      "step": 129016
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.564343214035034,
      "learning_rate": 0.00024382048318727988,
      "loss": 2.8458,
      "step": 129017
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.557283401489258,
      "learning_rate": 0.00024381646499474342,
      "loss": 3.2171,
      "step": 129018
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8930082321166992,
      "learning_rate": 0.00024381244681265254,
      "loss": 3.0939,
      "step": 129019
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2018792629241943,
      "learning_rate": 0.00024380842864100806,
      "loss": 2.9312,
      "step": 129020
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8637027740478516,
      "learning_rate": 0.00024380441047981054,
      "loss": 3.0436,
      "step": 129021
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2126307487487793,
      "learning_rate": 0.00024380039232906088,
      "loss": 3.0838,
      "step": 129022
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0946474075317383,
      "learning_rate": 0.00024379637418875982,
      "loss": 2.9833,
      "step": 129023
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8572075366973877,
      "learning_rate": 0.00024379235605890802,
      "loss": 3.0297,
      "step": 129024
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.584702968597412,
      "learning_rate": 0.0002437883379395063,
      "loss": 2.9687,
      "step": 129025
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2051427364349365,
      "learning_rate": 0.0002437843198305554,
      "loss": 2.6943,
      "step": 129026
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9891289472579956,
      "learning_rate": 0.0002437803017320561,
      "loss": 3.086,
      "step": 129027
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.77993106842041,
      "learning_rate": 0.00024377628364400904,
      "loss": 3.0675,
      "step": 129028
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1336891651153564,
      "learning_rate": 0.0002437722655664151,
      "loss": 2.7966,
      "step": 129029
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.493828058242798,
      "learning_rate": 0.00024376824749927487,
      "loss": 3.1562,
      "step": 129030
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.708082914352417,
      "learning_rate": 0.0002437642294425892,
      "loss": 3.0892,
      "step": 129031
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.940606951713562,
      "learning_rate": 0.00024376021139635882,
      "loss": 2.8689,
      "step": 129032
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2000415325164795,
      "learning_rate": 0.0002437561933605845,
      "loss": 2.8747,
      "step": 129033
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.1544623374938965,
      "learning_rate": 0.000243752175335267,
      "loss": 3.0666,
      "step": 129034
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.006274700164795,
      "learning_rate": 0.00024374815732040704,
      "loss": 2.8916,
      "step": 129035
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9901164770126343,
      "learning_rate": 0.00024374413931600527,
      "loss": 2.9969,
      "step": 129036
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.283059597015381,
      "learning_rate": 0.00024374012132206258,
      "loss": 2.7807,
      "step": 129037
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.843546748161316,
      "learning_rate": 0.00024373610333857965,
      "loss": 3.1626,
      "step": 129038
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0275375843048096,
      "learning_rate": 0.0002437320853655572,
      "loss": 3.1956,
      "step": 129039
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0272138118743896,
      "learning_rate": 0.00024372806740299605,
      "loss": 3.0271,
      "step": 129040
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.477393388748169,
      "learning_rate": 0.00024372404945089706,
      "loss": 3.0289,
      "step": 129041
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.095182180404663,
      "learning_rate": 0.00024372003150926067,
      "loss": 2.8419,
      "step": 129042
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.787453055381775,
      "learning_rate": 0.00024371601357808781,
      "loss": 2.7575,
      "step": 129043
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2364249229431152,
      "learning_rate": 0.0002437119956573792,
      "loss": 3.0501,
      "step": 129044
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9282201528549194,
      "learning_rate": 0.00024370797774713558,
      "loss": 3.2432,
      "step": 129045
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.6067166328430176,
      "learning_rate": 0.00024370395984735774,
      "loss": 3.0466,
      "step": 129046
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5880870819091797,
      "learning_rate": 0.00024369994195804647,
      "loss": 2.9773,
      "step": 129047
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8166850805282593,
      "learning_rate": 0.00024369592407920236,
      "loss": 2.6385,
      "step": 129048
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5277085304260254,
      "learning_rate": 0.00024369190621082625,
      "loss": 2.8659,
      "step": 129049
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7943545579910278,
      "learning_rate": 0.00024368788835291881,
      "loss": 2.7843,
      "step": 129050
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.991487741470337,
      "learning_rate": 0.0002436838705054809,
      "loss": 2.9981,
      "step": 129051
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.297926187515259,
      "learning_rate": 0.00024367985266851325,
      "loss": 2.9808,
      "step": 129052
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.7615268230438232,
      "learning_rate": 0.00024367583484201654,
      "loss": 2.6493,
      "step": 129053
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.06347393989563,
      "learning_rate": 0.0002436718170259917,
      "loss": 2.9942,
      "step": 129054
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3033361434936523,
      "learning_rate": 0.0002436677992204392,
      "loss": 3.0216,
      "step": 129055
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.3977861404418945,
      "learning_rate": 0.00024366378142535994,
      "loss": 3.0074,
      "step": 129056
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4519057273864746,
      "learning_rate": 0.0002436597636407546,
      "loss": 3.1132,
      "step": 129057
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.8014283180236816,
      "learning_rate": 0.00024365574586662398,
      "loss": 3.1312,
      "step": 129058
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.799188256263733,
      "learning_rate": 0.00024365172810296884,
      "loss": 3.1792,
      "step": 129059
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.900570034980774,
      "learning_rate": 0.00024364771034979005,
      "loss": 2.8273,
      "step": 129060
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.1823911666870117,
      "learning_rate": 0.0002436436926070881,
      "loss": 3.0005,
      "step": 129061
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.05349588394165,
      "learning_rate": 0.0002436396748748638,
      "loss": 3.022,
      "step": 129062
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.682966709136963,
      "learning_rate": 0.00024363565715311794,
      "loss": 2.9651,
      "step": 129063
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9294512271881104,
      "learning_rate": 0.00024363163944185134,
      "loss": 2.9049,
      "step": 129064
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.8921830654144287,
      "learning_rate": 0.00024362762174106464,
      "loss": 2.7966,
      "step": 129065
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.7654974460601807,
      "learning_rate": 0.00024362360405075876,
      "loss": 2.8131,
      "step": 129066
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0437848567962646,
      "learning_rate": 0.0002436195863709342,
      "loss": 2.9469,
      "step": 129067
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.476182699203491,
      "learning_rate": 0.00024361556870159182,
      "loss": 3.0319,
      "step": 129068
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9512572288513184,
      "learning_rate": 0.00024361155104273238,
      "loss": 2.9592,
      "step": 129069
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.0521907806396484,
      "learning_rate": 0.00024360753339435658,
      "loss": 2.9719,
      "step": 129070
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.8329715728759766,
      "learning_rate": 0.0002436035157564652,
      "loss": 3.0764,
      "step": 129071
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9089882373809814,
      "learning_rate": 0.00024359949812905918,
      "loss": 2.9541,
      "step": 129072
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.158967971801758,
      "learning_rate": 0.0002435954805121389,
      "loss": 2.8057,
      "step": 129073
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0279343128204346,
      "learning_rate": 0.0002435914629057053,
      "loss": 2.9535,
      "step": 129074
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1420907974243164,
      "learning_rate": 0.00024358744530975912,
      "loss": 2.9008,
      "step": 129075
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.359218120574951,
      "learning_rate": 0.0002435834277243011,
      "loss": 2.8189,
      "step": 129076
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2531068325042725,
      "learning_rate": 0.00024357941014933198,
      "loss": 2.8414,
      "step": 129077
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.8965835571289062,
      "learning_rate": 0.00024357539258485262,
      "loss": 2.9156,
      "step": 129078
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8655576705932617,
      "learning_rate": 0.00024357137503086356,
      "loss": 2.6619,
      "step": 129079
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.8847193717956543,
      "learning_rate": 0.00024356735748736562,
      "loss": 3.1428,
      "step": 129080
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7840496301651,
      "learning_rate": 0.0002435633399543596,
      "loss": 3.0313,
      "step": 129081
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3795011043548584,
      "learning_rate": 0.00024355932243184621,
      "loss": 2.966,
      "step": 129082
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2841453552246094,
      "learning_rate": 0.0002435553049198262,
      "loss": 3.0656,
      "step": 129083
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.0778331756591797,
      "learning_rate": 0.0002435512874183005,
      "loss": 2.6969,
      "step": 129084
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.246246576309204,
      "learning_rate": 0.0002435472699272695,
      "loss": 3.3154,
      "step": 129085
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7485719919204712,
      "learning_rate": 0.00024354325244673413,
      "loss": 3.2385,
      "step": 129086
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.226792097091675,
      "learning_rate": 0.00024353923497669517,
      "loss": 2.8857,
      "step": 129087
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.33963942527771,
      "learning_rate": 0.0002435352175171533,
      "loss": 3.0669,
      "step": 129088
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0257112979888916,
      "learning_rate": 0.00024353120006810928,
      "loss": 2.7942,
      "step": 129089
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8433893918991089,
      "learning_rate": 0.00024352718262956406,
      "loss": 2.9814,
      "step": 129090
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.758413076400757,
      "learning_rate": 0.000243523165201518,
      "loss": 3.0803,
      "step": 129091
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.984621286392212,
      "learning_rate": 0.00024351914778397214,
      "loss": 3.1431,
      "step": 129092
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9022585153579712,
      "learning_rate": 0.00024351513037692708,
      "loss": 3.0843,
      "step": 129093
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1527979373931885,
      "learning_rate": 0.00024351111298038361,
      "loss": 2.9352,
      "step": 129094
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9550775289535522,
      "learning_rate": 0.00024350709559434257,
      "loss": 2.9614,
      "step": 129095
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.0757999420166016,
      "learning_rate": 0.0002435030782188046,
      "loss": 2.88,
      "step": 129096
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4777064323425293,
      "learning_rate": 0.00024349906085377053,
      "loss": 3.2142,
      "step": 129097
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1122426986694336,
      "learning_rate": 0.00024349504349924098,
      "loss": 2.9218,
      "step": 129098
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.604737281799316,
      "learning_rate": 0.00024349102615521675,
      "loss": 3.2187,
      "step": 129099
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.988656520843506,
      "learning_rate": 0.0002434870088216986,
      "loss": 3.0691,
      "step": 129100
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.129798889160156,
      "learning_rate": 0.0002434829914986873,
      "loss": 2.8956,
      "step": 129101
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2046802043914795,
      "learning_rate": 0.00024347897418618364,
      "loss": 2.9597,
      "step": 129102
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0792768001556396,
      "learning_rate": 0.0002434749568841882,
      "loss": 2.9166,
      "step": 129103
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.5567150115966797,
      "learning_rate": 0.00024347093959270193,
      "loss": 3.0731,
      "step": 129104
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5244407653808594,
      "learning_rate": 0.00024346692231172544,
      "loss": 2.8896,
      "step": 129105
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0479660034179688,
      "learning_rate": 0.0002434629050412595,
      "loss": 2.9172,
      "step": 129106
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.2725021839141846,
      "learning_rate": 0.00024345888778130488,
      "loss": 3.0585,
      "step": 129107
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2477896213531494,
      "learning_rate": 0.00024345487053186238,
      "loss": 2.9923,
      "step": 129108
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.998934030532837,
      "learning_rate": 0.00024345085329293258,
      "loss": 2.77,
      "step": 129109
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0402660369873047,
      "learning_rate": 0.0002434468360645164,
      "loss": 3.157,
      "step": 129110
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0944645404815674,
      "learning_rate": 0.00024344281884661447,
      "loss": 2.8803,
      "step": 129111
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.056760787963867,
      "learning_rate": 0.0002434388016392277,
      "loss": 2.9781,
      "step": 129112
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0468993186950684,
      "learning_rate": 0.00024343478444235664,
      "loss": 2.7824,
      "step": 129113
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.244258165359497,
      "learning_rate": 0.0002434307672560021,
      "loss": 3.197,
      "step": 129114
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8097800016403198,
      "learning_rate": 0.00024342675008016493,
      "loss": 2.9889,
      "step": 129115
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.627542495727539,
      "learning_rate": 0.0002434227329148457,
      "loss": 3.0832,
      "step": 129116
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.6019341945648193,
      "learning_rate": 0.0002434187157600453,
      "loss": 2.8134,
      "step": 129117
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.872244834899902,
      "learning_rate": 0.00024341469861576442,
      "loss": 2.8173,
      "step": 129118
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.285418748855591,
      "learning_rate": 0.00024341068148200383,
      "loss": 2.9228,
      "step": 129119
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.717695951461792,
      "learning_rate": 0.00024340666435876426,
      "loss": 3.0787,
      "step": 129120
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2530343532562256,
      "learning_rate": 0.0002434026472460465,
      "loss": 2.8078,
      "step": 129121
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1954774856567383,
      "learning_rate": 0.0002433986301438512,
      "loss": 2.8871,
      "step": 129122
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4598228931427,
      "learning_rate": 0.00024339461305217917,
      "loss": 2.9754,
      "step": 129123
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.1994564533233643,
      "learning_rate": 0.0002433905959710311,
      "loss": 2.8326,
      "step": 129124
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2928967475891113,
      "learning_rate": 0.00024338657890040784,
      "loss": 3.1598,
      "step": 129125
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.434640407562256,
      "learning_rate": 0.00024338256184031014,
      "loss": 3.223,
      "step": 129126
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7857251167297363,
      "learning_rate": 0.00024337854479073866,
      "loss": 3.1903,
      "step": 129127
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9528553485870361,
      "learning_rate": 0.00024337452775169413,
      "loss": 2.9099,
      "step": 129128
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0744471549987793,
      "learning_rate": 0.00024337051072317737,
      "loss": 2.9439,
      "step": 129129
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0914695262908936,
      "learning_rate": 0.00024336649370518905,
      "loss": 2.9382,
      "step": 129130
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.296989679336548,
      "learning_rate": 0.00024336247669773001,
      "loss": 2.8287,
      "step": 129131
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9646165370941162,
      "learning_rate": 0.00024335845970080094,
      "loss": 3.1455,
      "step": 129132
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0019843578338623,
      "learning_rate": 0.00024335444271440272,
      "loss": 2.9232,
      "step": 129133
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8797481060028076,
      "learning_rate": 0.00024335042573853588,
      "loss": 3.1108,
      "step": 129134
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8813331127166748,
      "learning_rate": 0.00024334640877320124,
      "loss": 3.1604,
      "step": 129135
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9905909299850464,
      "learning_rate": 0.00024334239181839957,
      "loss": 2.9485,
      "step": 129136
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.096106767654419,
      "learning_rate": 0.00024333837487413164,
      "loss": 2.7567,
      "step": 129137
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4314053058624268,
      "learning_rate": 0.00024333435794039819,
      "loss": 3.0722,
      "step": 129138
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8969820737838745,
      "learning_rate": 0.00024333034101720004,
      "loss": 3.0241,
      "step": 129139
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.437530279159546,
      "learning_rate": 0.00024332632410453773,
      "loss": 2.9366,
      "step": 129140
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3008859157562256,
      "learning_rate": 0.0002433223072024121,
      "loss": 2.8394,
      "step": 129141
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1588852405548096,
      "learning_rate": 0.00024331829031082398,
      "loss": 2.9318,
      "step": 129142
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8282750844955444,
      "learning_rate": 0.000243314273429774,
      "loss": 2.7911,
      "step": 129143
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2816572189331055,
      "learning_rate": 0.00024331025655926304,
      "loss": 3.3183,
      "step": 129144
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.957624912261963,
      "learning_rate": 0.00024330623969929185,
      "loss": 2.9083,
      "step": 129145
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.463833808898926,
      "learning_rate": 0.00024330222284986097,
      "loss": 2.7134,
      "step": 129146
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.773322343826294,
      "learning_rate": 0.0002432982060109713,
      "loss": 3.0549,
      "step": 129147
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0989270210266113,
      "learning_rate": 0.00024329418918262355,
      "loss": 2.9057,
      "step": 129148
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.6900112628936768,
      "learning_rate": 0.00024329017236481853,
      "loss": 3.0287,
      "step": 129149
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.5193986892700195,
      "learning_rate": 0.00024328615555755683,
      "loss": 2.8974,
      "step": 129150
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.481942892074585,
      "learning_rate": 0.00024328213876083954,
      "loss": 3.0123,
      "step": 129151
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1631195545196533,
      "learning_rate": 0.00024327812197466698,
      "loss": 2.9251,
      "step": 129152
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0925135612487793,
      "learning_rate": 0.0002432741051990401,
      "loss": 3.0518,
      "step": 129153
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.9978854656219482,
      "learning_rate": 0.00024327008843395967,
      "loss": 2.8679,
      "step": 129154
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.933748245239258,
      "learning_rate": 0.00024326607167942632,
      "loss": 3.0444,
      "step": 129155
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.412473678588867,
      "learning_rate": 0.00024326205493544096,
      "loss": 2.8624,
      "step": 129156
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2999112606048584,
      "learning_rate": 0.00024325803820200435,
      "loss": 3.0286,
      "step": 129157
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.738823413848877,
      "learning_rate": 0.000243254021479117,
      "loss": 3.1848,
      "step": 129158
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.5089170932769775,
      "learning_rate": 0.00024325000476677983,
      "loss": 3.0472,
      "step": 129159
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.146069049835205,
      "learning_rate": 0.00024324598806499355,
      "loss": 2.7605,
      "step": 129160
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2676846981048584,
      "learning_rate": 0.0002432419713737589,
      "loss": 2.7582,
      "step": 129161
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2124102115631104,
      "learning_rate": 0.00024323795469307664,
      "loss": 2.9586,
      "step": 129162
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.166942596435547,
      "learning_rate": 0.00024323393802294763,
      "loss": 2.7982,
      "step": 129163
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.7889890670776367,
      "learning_rate": 0.0002432299213633724,
      "loss": 2.9753,
      "step": 129164
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.230544328689575,
      "learning_rate": 0.0002432259047143518,
      "loss": 3.0434,
      "step": 129165
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9678634405136108,
      "learning_rate": 0.00024322188807588655,
      "loss": 2.9124,
      "step": 129166
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.6867592334747314,
      "learning_rate": 0.00024321787144797747,
      "loss": 3.1059,
      "step": 129167
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.267289161682129,
      "learning_rate": 0.00024321385483062522,
      "loss": 2.8732,
      "step": 129168
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.911492943763733,
      "learning_rate": 0.00024320983822383071,
      "loss": 3.0805,
      "step": 129169
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2833545207977295,
      "learning_rate": 0.00024320582162759444,
      "loss": 3.1635,
      "step": 129170
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.1714837551116943,
      "learning_rate": 0.00024320180504191732,
      "loss": 2.9812,
      "step": 129171
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5686023235321045,
      "learning_rate": 0.0002431977884668,
      "loss": 2.9108,
      "step": 129172
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9286710023880005,
      "learning_rate": 0.00024319377190224332,
      "loss": 2.8316,
      "step": 129173
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2055857181549072,
      "learning_rate": 0.00024318975534824794,
      "loss": 2.851,
      "step": 129174
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1050214767456055,
      "learning_rate": 0.00024318573880481484,
      "loss": 3.1154,
      "step": 129175
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.574012517929077,
      "learning_rate": 0.00024318172227194443,
      "loss": 3.1375,
      "step": 129176
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.178285837173462,
      "learning_rate": 0.00024317770574963762,
      "loss": 2.7493,
      "step": 129177
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4413607120513916,
      "learning_rate": 0.00024317368923789515,
      "loss": 3.015,
      "step": 129178
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9953893423080444,
      "learning_rate": 0.00024316967273671776,
      "loss": 2.9623,
      "step": 129179
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2331128120422363,
      "learning_rate": 0.0002431656562461062,
      "loss": 3.0748,
      "step": 129180
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9276115894317627,
      "learning_rate": 0.0002431616397660612,
      "loss": 3.3255,
      "step": 129181
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.784276008605957,
      "learning_rate": 0.00024315762329658367,
      "loss": 3.0155,
      "step": 129182
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.869810938835144,
      "learning_rate": 0.0002431536068376741,
      "loss": 2.9434,
      "step": 129183
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.096770763397217,
      "learning_rate": 0.00024314959038933333,
      "loss": 2.9338,
      "step": 129184
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.125237226486206,
      "learning_rate": 0.00024314557395156212,
      "loss": 2.9453,
      "step": 129185
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.057201385498047,
      "learning_rate": 0.0002431415575243612,
      "loss": 2.6395,
      "step": 129186
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.087116003036499,
      "learning_rate": 0.0002431375411077314,
      "loss": 3.0336,
      "step": 129187
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.415741205215454,
      "learning_rate": 0.00024313352470167342,
      "loss": 3.2302,
      "step": 129188
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0193214416503906,
      "learning_rate": 0.000243129508306188,
      "loss": 3.0598,
      "step": 129189
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3540613651275635,
      "learning_rate": 0.00024312549192127578,
      "loss": 2.7587,
      "step": 129190
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.089137315750122,
      "learning_rate": 0.0002431214755469377,
      "loss": 3.0185,
      "step": 129191
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3002638816833496,
      "learning_rate": 0.0002431174591831743,
      "loss": 3.1426,
      "step": 129192
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9311000108718872,
      "learning_rate": 0.0002431134428299865,
      "loss": 2.9231,
      "step": 129193
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1845457553863525,
      "learning_rate": 0.00024310942648737507,
      "loss": 3.0126,
      "step": 129194
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.8130557537078857,
      "learning_rate": 0.00024310541015534054,
      "loss": 2.8621,
      "step": 129195
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.556367874145508,
      "learning_rate": 0.0002431013938338839,
      "loss": 2.708,
      "step": 129196
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.337625741958618,
      "learning_rate": 0.00024309737752300568,
      "loss": 2.7793,
      "step": 129197
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8949459791183472,
      "learning_rate": 0.00024309336122270677,
      "loss": 3.0891,
      "step": 129198
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.284576892852783,
      "learning_rate": 0.0002430893449329879,
      "loss": 2.864,
      "step": 129199
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4096460342407227,
      "learning_rate": 0.00024308532865384983,
      "loss": 3.0376,
      "step": 129200
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.6443116664886475,
      "learning_rate": 0.0002430813123852932,
      "loss": 2.8356,
      "step": 129201
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.04181170463562,
      "learning_rate": 0.00024307729612731884,
      "loss": 3.3756,
      "step": 129202
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3022098541259766,
      "learning_rate": 0.00024307327987992748,
      "loss": 3.1569,
      "step": 129203
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.6939327716827393,
      "learning_rate": 0.00024306926364311992,
      "loss": 2.8921,
      "step": 129204
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.220771074295044,
      "learning_rate": 0.0002430652474168968,
      "loss": 2.9984,
      "step": 129205
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9621403217315674,
      "learning_rate": 0.000243061231201259,
      "loss": 2.8148,
      "step": 129206
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2380759716033936,
      "learning_rate": 0.00024305721499620714,
      "loss": 2.9181,
      "step": 129207
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8036226034164429,
      "learning_rate": 0.00024305319880174201,
      "loss": 3.0584,
      "step": 129208
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2118709087371826,
      "learning_rate": 0.00024304918261786434,
      "loss": 2.8743,
      "step": 129209
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.8797411918640137,
      "learning_rate": 0.00024304516644457495,
      "loss": 3.1832,
      "step": 129210
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.710453510284424,
      "learning_rate": 0.00024304115028187453,
      "loss": 2.7398,
      "step": 129211
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.318403482437134,
      "learning_rate": 0.0002430371341297639,
      "loss": 3.0024,
      "step": 129212
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.019681692123413,
      "learning_rate": 0.00024303311798824366,
      "loss": 2.9684,
      "step": 129213
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2840583324432373,
      "learning_rate": 0.00024302910185731462,
      "loss": 3.2511,
      "step": 129214
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.722513198852539,
      "learning_rate": 0.00024302508573697762,
      "loss": 2.8174,
      "step": 129215
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2666587829589844,
      "learning_rate": 0.00024302106962723324,
      "loss": 3.1707,
      "step": 129216
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.6437602043151855,
      "learning_rate": 0.00024301705352808235,
      "loss": 2.9537,
      "step": 129217
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8886711597442627,
      "learning_rate": 0.00024301303743952583,
      "loss": 3.1769,
      "step": 129218
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5215461254119873,
      "learning_rate": 0.00024300902136156408,
      "loss": 2.9063,
      "step": 129219
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.685580253601074,
      "learning_rate": 0.00024300500529419804,
      "loss": 2.8821,
      "step": 129220
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.7612271308898926,
      "learning_rate": 0.00024300098923742846,
      "loss": 2.8559,
      "step": 129221
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.088994264602661,
      "learning_rate": 0.00024299697319125609,
      "loss": 3.4085,
      "step": 129222
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4421660900115967,
      "learning_rate": 0.00024299295715568163,
      "loss": 3.0653,
      "step": 129223
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.704396963119507,
      "learning_rate": 0.00024298894113070597,
      "loss": 2.7153,
      "step": 129224
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3881518840789795,
      "learning_rate": 0.00024298492511632964,
      "loss": 2.9367,
      "step": 129225
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.312110662460327,
      "learning_rate": 0.00024298090911255347,
      "loss": 3.1051,
      "step": 129226
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.472370147705078,
      "learning_rate": 0.00024297689311937826,
      "loss": 2.9958,
      "step": 129227
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.911434531211853,
      "learning_rate": 0.00024297287713680467,
      "loss": 2.9945,
      "step": 129228
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.265293598175049,
      "learning_rate": 0.00024296886116483357,
      "loss": 2.8581,
      "step": 129229
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5605902671813965,
      "learning_rate": 0.0002429648452034657,
      "loss": 2.9396,
      "step": 129230
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2807209491729736,
      "learning_rate": 0.00024296082925270163,
      "loss": 2.8237,
      "step": 129231
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.6589269638061523,
      "learning_rate": 0.00024295681331254221,
      "loss": 2.5267,
      "step": 129232
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8866767883300781,
      "learning_rate": 0.00024295279738298822,
      "loss": 2.835,
      "step": 129233
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.303061485290527,
      "learning_rate": 0.00024294878146404036,
      "loss": 3.0348,
      "step": 129234
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.4105567932128906,
      "learning_rate": 0.00024294476555569944,
      "loss": 2.744,
      "step": 129235
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1170427799224854,
      "learning_rate": 0.00024294074965796624,
      "loss": 2.8606,
      "step": 129236
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.592839479446411,
      "learning_rate": 0.00024293673377084134,
      "loss": 2.9624,
      "step": 129237
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3740665912628174,
      "learning_rate": 0.00024293271789432554,
      "loss": 2.8763,
      "step": 129238
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.318981409072876,
      "learning_rate": 0.00024292870202841967,
      "loss": 2.9737,
      "step": 129239
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1053242683410645,
      "learning_rate": 0.00024292468617312442,
      "loss": 2.9459,
      "step": 129240
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8352179527282715,
      "learning_rate": 0.00024292067032844053,
      "loss": 3.0596,
      "step": 129241
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9980319738388062,
      "learning_rate": 0.0002429166544943689,
      "loss": 2.7979,
      "step": 129242
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4950900077819824,
      "learning_rate": 0.00024291263867091002,
      "loss": 2.8476,
      "step": 129243
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9834469556808472,
      "learning_rate": 0.00024290862285806475,
      "loss": 2.9707,
      "step": 129244
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.962770938873291,
      "learning_rate": 0.00024290460705583383,
      "loss": 3.0893,
      "step": 129245
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.009392261505127,
      "learning_rate": 0.00024290059126421806,
      "loss": 2.9748,
      "step": 129246
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7407606840133667,
      "learning_rate": 0.0002428965754832181,
      "loss": 2.887,
      "step": 129247
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4015557765960693,
      "learning_rate": 0.0002428925597128348,
      "loss": 2.9222,
      "step": 129248
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8105283975601196,
      "learning_rate": 0.00024288854395306894,
      "loss": 2.9494,
      "step": 129249
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9875028133392334,
      "learning_rate": 0.00024288452820392107,
      "loss": 2.8013,
      "step": 129250
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.257755756378174,
      "learning_rate": 0.00024288051246539204,
      "loss": 2.8892,
      "step": 129251
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2765591144561768,
      "learning_rate": 0.0002428764967374826,
      "loss": 2.9036,
      "step": 129252
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3358142375946045,
      "learning_rate": 0.00024287248102019347,
      "loss": 2.9587,
      "step": 129253
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5506784915924072,
      "learning_rate": 0.00024286846531352543,
      "loss": 2.8701,
      "step": 129254
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.162921905517578,
      "learning_rate": 0.0002428644496174794,
      "loss": 2.9863,
      "step": 129255
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0997791290283203,
      "learning_rate": 0.00024286043393205578,
      "loss": 3.0541,
      "step": 129256
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8579597473144531,
      "learning_rate": 0.00024285641825725547,
      "loss": 2.9554,
      "step": 129257
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5684683322906494,
      "learning_rate": 0.00024285240259307926,
      "loss": 2.9064,
      "step": 129258
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9214248657226562,
      "learning_rate": 0.00024284838693952785,
      "loss": 2.9975,
      "step": 129259
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.440248489379883,
      "learning_rate": 0.00024284437129660203,
      "loss": 2.9525,
      "step": 129260
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3685433864593506,
      "learning_rate": 0.00024284035566430261,
      "loss": 2.9566,
      "step": 129261
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9264978170394897,
      "learning_rate": 0.00024283634004263014,
      "loss": 3.1293,
      "step": 129262
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.6529626846313477,
      "learning_rate": 0.0002428323244315855,
      "loss": 3.0187,
      "step": 129263
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.328554391860962,
      "learning_rate": 0.00024282830883116938,
      "loss": 3.0989,
      "step": 129264
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1125271320343018,
      "learning_rate": 0.00024282429324138256,
      "loss": 3.2319,
      "step": 129265
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.5189263820648193,
      "learning_rate": 0.00024282027766222577,
      "loss": 2.8832,
      "step": 129266
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.0095627307891846,
      "learning_rate": 0.0002428162620936999,
      "loss": 2.9829,
      "step": 129267
    },
    {
      "epoch": 1.68,
      "grad_norm": 5.409648418426514,
      "learning_rate": 0.00024281224653580546,
      "loss": 2.9506,
      "step": 129268
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.100445032119751,
      "learning_rate": 0.0002428082309885433,
      "loss": 3.0238,
      "step": 129269
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.096529006958008,
      "learning_rate": 0.00024280421545191415,
      "loss": 3.3136,
      "step": 129270
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.7912609577178955,
      "learning_rate": 0.00024280019992591878,
      "loss": 2.7134,
      "step": 129271
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.079064130783081,
      "learning_rate": 0.00024279618441055793,
      "loss": 3.121,
      "step": 129272
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.8012776374816895,
      "learning_rate": 0.0002427921689058324,
      "loss": 2.9919,
      "step": 129273
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0684304237365723,
      "learning_rate": 0.0002427881534117429,
      "loss": 2.7585,
      "step": 129274
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3083605766296387,
      "learning_rate": 0.00024278413792829014,
      "loss": 3.0009,
      "step": 129275
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4295902252197266,
      "learning_rate": 0.00024278012245547485,
      "loss": 3.0401,
      "step": 129276
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.749267816543579,
      "learning_rate": 0.00024277610699329782,
      "loss": 2.9393,
      "step": 129277
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9554517269134521,
      "learning_rate": 0.0002427720915417598,
      "loss": 2.9237,
      "step": 129278
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.7827446460723877,
      "learning_rate": 0.0002427680761008616,
      "loss": 2.8403,
      "step": 129279
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8626259565353394,
      "learning_rate": 0.0002427640606706038,
      "loss": 2.7535,
      "step": 129280
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.232503652572632,
      "learning_rate": 0.00024276004525098732,
      "loss": 2.9847,
      "step": 129281
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9832526445388794,
      "learning_rate": 0.00024275602984201276,
      "loss": 3.1569,
      "step": 129282
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.241499185562134,
      "learning_rate": 0.00024275201444368095,
      "loss": 3.0736,
      "step": 129283
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.2918734550476074,
      "learning_rate": 0.0002427479990559926,
      "loss": 2.8845,
      "step": 129284
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.297898769378662,
      "learning_rate": 0.00024274398367894853,
      "loss": 3.1647,
      "step": 129285
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.728548288345337,
      "learning_rate": 0.00024273996831254943,
      "loss": 2.6676,
      "step": 129286
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.368609666824341,
      "learning_rate": 0.000242735952956796,
      "loss": 3.2807,
      "step": 129287
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2173635959625244,
      "learning_rate": 0.00024273193761168913,
      "loss": 2.8302,
      "step": 129288
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.155349016189575,
      "learning_rate": 0.00024272792227722937,
      "loss": 2.8285,
      "step": 129289
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.022775411605835,
      "learning_rate": 0.0002427239069534176,
      "loss": 2.6887,
      "step": 129290
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0565524101257324,
      "learning_rate": 0.0002427198916402546,
      "loss": 3.075,
      "step": 129291
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.083024024963379,
      "learning_rate": 0.00024271587633774096,
      "loss": 3.1101,
      "step": 129292
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1321284770965576,
      "learning_rate": 0.00024271186104587756,
      "loss": 3.0736,
      "step": 129293
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2529990673065186,
      "learning_rate": 0.00024270784576466508,
      "loss": 2.896,
      "step": 129294
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7941575050354004,
      "learning_rate": 0.0002427038304941043,
      "loss": 3.0784,
      "step": 129295
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7951089143753052,
      "learning_rate": 0.00024269981523419604,
      "loss": 2.6667,
      "step": 129296
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0691592693328857,
      "learning_rate": 0.00024269579998494094,
      "loss": 2.9481,
      "step": 129297
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.5547780990600586,
      "learning_rate": 0.00024269178474633975,
      "loss": 3.034,
      "step": 129298
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.291980743408203,
      "learning_rate": 0.00024268776951839318,
      "loss": 3.0206,
      "step": 129299
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4150912761688232,
      "learning_rate": 0.0002426837543011021,
      "loss": 3.3363,
      "step": 129300
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4675393104553223,
      "learning_rate": 0.00024267973909446712,
      "loss": 3.0971,
      "step": 129301
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9006900787353516,
      "learning_rate": 0.00024267572389848912,
      "loss": 3.0955,
      "step": 129302
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9916795492172241,
      "learning_rate": 0.0002426717087131689,
      "loss": 2.8639,
      "step": 129303
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0732133388519287,
      "learning_rate": 0.00024266769353850694,
      "loss": 3.2591,
      "step": 129304
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.020148515701294,
      "learning_rate": 0.00024266367837450414,
      "loss": 2.7258,
      "step": 129305
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.495154857635498,
      "learning_rate": 0.00024265966322116128,
      "loss": 3.1208,
      "step": 129306
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.355635404586792,
      "learning_rate": 0.00024265564807847902,
      "loss": 2.9523,
      "step": 129307
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.053201198577881,
      "learning_rate": 0.0002426516329464582,
      "loss": 2.9359,
      "step": 129308
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.043999433517456,
      "learning_rate": 0.00024264761782509962,
      "loss": 2.8267,
      "step": 129309
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9219915866851807,
      "learning_rate": 0.00024264360271440385,
      "loss": 3.0123,
      "step": 129310
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1113510131835938,
      "learning_rate": 0.00024263958761437166,
      "loss": 2.9276,
      "step": 129311
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.128507614135742,
      "learning_rate": 0.00024263557252500388,
      "loss": 2.9915,
      "step": 129312
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.050309181213379,
      "learning_rate": 0.00024263155744630124,
      "loss": 2.7105,
      "step": 129313
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7830554246902466,
      "learning_rate": 0.00024262754237826446,
      "loss": 3.0672,
      "step": 129314
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.828503131866455,
      "learning_rate": 0.0002426235273208943,
      "loss": 3.3084,
      "step": 129315
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.72046160697937,
      "learning_rate": 0.00024261951227419164,
      "loss": 2.7,
      "step": 129316
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.203500986099243,
      "learning_rate": 0.000242615497238157,
      "loss": 2.8741,
      "step": 129317
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3643393516540527,
      "learning_rate": 0.00024261148221279118,
      "loss": 3.0564,
      "step": 129318
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8814903497695923,
      "learning_rate": 0.00024260746719809495,
      "loss": 3.1794,
      "step": 129319
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0062930583953857,
      "learning_rate": 0.00024260345219406914,
      "loss": 3.0913,
      "step": 129320
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.744727373123169,
      "learning_rate": 0.0002425994372007144,
      "loss": 3.1163,
      "step": 129321
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9598698616027832,
      "learning_rate": 0.00024259542221803163,
      "loss": 2.9261,
      "step": 129322
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9884552955627441,
      "learning_rate": 0.00024259140724602137,
      "loss": 2.9504,
      "step": 129323
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.502366304397583,
      "learning_rate": 0.0002425873922846844,
      "loss": 2.9798,
      "step": 129324
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.674708127975464,
      "learning_rate": 0.00024258337733402152,
      "loss": 2.8105,
      "step": 129325
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.832997441291809,
      "learning_rate": 0.0002425793623940335,
      "loss": 3.1174,
      "step": 129326
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1392624378204346,
      "learning_rate": 0.00024257534746472103,
      "loss": 2.9468,
      "step": 129327
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.017021417617798,
      "learning_rate": 0.00024257133254608506,
      "loss": 3.1502,
      "step": 129328
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8480366468429565,
      "learning_rate": 0.000242567317638126,
      "loss": 2.9138,
      "step": 129329
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.179490804672241,
      "learning_rate": 0.00024256330274084476,
      "loss": 2.7743,
      "step": 129330
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.977027177810669,
      "learning_rate": 0.0002425592878542421,
      "loss": 3.1754,
      "step": 129331
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5568923950195312,
      "learning_rate": 0.00024255527297831873,
      "loss": 3.218,
      "step": 129332
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3028247356414795,
      "learning_rate": 0.00024255125811307543,
      "loss": 2.9626,
      "step": 129333
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.083721160888672,
      "learning_rate": 0.0002425472432585131,
      "loss": 2.7948,
      "step": 129334
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7042217254638672,
      "learning_rate": 0.00024254322841463214,
      "loss": 2.7385,
      "step": 129335
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.06915020942688,
      "learning_rate": 0.00024253921358143352,
      "loss": 3.0777,
      "step": 129336
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3434202671051025,
      "learning_rate": 0.0002425351987589179,
      "loss": 2.9782,
      "step": 129337
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9192051887512207,
      "learning_rate": 0.0002425311839470861,
      "loss": 2.9403,
      "step": 129338
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0143866539001465,
      "learning_rate": 0.00024252716914593882,
      "loss": 3.0199,
      "step": 129339
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.6418874263763428,
      "learning_rate": 0.000242523154355477,
      "loss": 2.7985,
      "step": 129340
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8688273429870605,
      "learning_rate": 0.00024251913957570103,
      "loss": 3.0387,
      "step": 129341
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.607184410095215,
      "learning_rate": 0.00024251512480661185,
      "loss": 2.8757,
      "step": 129342
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.854515314102173,
      "learning_rate": 0.0002425111100482102,
      "loss": 3.0668,
      "step": 129343
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.103846549987793,
      "learning_rate": 0.0002425070953004968,
      "loss": 2.9631,
      "step": 129344
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9247132539749146,
      "learning_rate": 0.00024250308056347246,
      "loss": 2.9661,
      "step": 129345
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.8267810344696045,
      "learning_rate": 0.00024249906583713798,
      "loss": 2.798,
      "step": 129346
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.340780735015869,
      "learning_rate": 0.00024249505112149388,
      "loss": 2.9553,
      "step": 129347
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.00805926322937,
      "learning_rate": 0.00024249103641654104,
      "loss": 2.8931,
      "step": 129348
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.0720458030700684,
      "learning_rate": 0.0002424870217222802,
      "loss": 3.0238,
      "step": 129349
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0774710178375244,
      "learning_rate": 0.00024248300703871216,
      "loss": 2.8178,
      "step": 129350
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.2537076473236084,
      "learning_rate": 0.00024247899236583755,
      "loss": 3.1077,
      "step": 129351
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9776157140731812,
      "learning_rate": 0.0002424749777036573,
      "loss": 3.0102,
      "step": 129352
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0541815757751465,
      "learning_rate": 0.00024247096305217197,
      "loss": 3.2022,
      "step": 129353
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.886824131011963,
      "learning_rate": 0.00024246694841138233,
      "loss": 3.3281,
      "step": 129354
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.087048292160034,
      "learning_rate": 0.00024246293378128915,
      "loss": 2.8592,
      "step": 129355
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7507362365722656,
      "learning_rate": 0.00024245891916189325,
      "loss": 3.0562,
      "step": 129356
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7799735069274902,
      "learning_rate": 0.0002424549045531953,
      "loss": 2.9168,
      "step": 129357
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0310771465301514,
      "learning_rate": 0.00024245088995519614,
      "loss": 3.048,
      "step": 129358
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.139577865600586,
      "learning_rate": 0.00024244687536789644,
      "loss": 2.9432,
      "step": 129359
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.165942907333374,
      "learning_rate": 0.00024244286079129688,
      "loss": 2.9807,
      "step": 129360
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.058873176574707,
      "learning_rate": 0.0002424388462253983,
      "loss": 2.8403,
      "step": 129361
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2020044326782227,
      "learning_rate": 0.00024243483167020138,
      "loss": 2.9022,
      "step": 129362
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.8304388523101807,
      "learning_rate": 0.00024243081712570696,
      "loss": 3.1253,
      "step": 129363
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4485645294189453,
      "learning_rate": 0.0002424268025919158,
      "loss": 3.0269,
      "step": 129364
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0432612895965576,
      "learning_rate": 0.0002424227880688285,
      "loss": 3.1222,
      "step": 129365
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9370856285095215,
      "learning_rate": 0.00024241877355644595,
      "loss": 2.9416,
      "step": 129366
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.290701866149902,
      "learning_rate": 0.0002424147590547688,
      "loss": 3.0756,
      "step": 129367
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.548844337463379,
      "learning_rate": 0.0002424107445637978,
      "loss": 3.0517,
      "step": 129368
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0855438709259033,
      "learning_rate": 0.00024240673008353377,
      "loss": 2.9749,
      "step": 129369
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2153592109680176,
      "learning_rate": 0.00024240271561397746,
      "loss": 3.0345,
      "step": 129370
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.555177927017212,
      "learning_rate": 0.00024239870115512946,
      "loss": 3.1389,
      "step": 129371
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.300912618637085,
      "learning_rate": 0.0002423946867069907,
      "loss": 2.6907,
      "step": 129372
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1382532119750977,
      "learning_rate": 0.0002423906722695619,
      "loss": 3.0147,
      "step": 129373
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9646414518356323,
      "learning_rate": 0.0002423866578428437,
      "loss": 2.8829,
      "step": 129374
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5359930992126465,
      "learning_rate": 0.00024238264342683687,
      "loss": 2.9544,
      "step": 129375
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.8208470344543457,
      "learning_rate": 0.0002423786290215423,
      "loss": 3.1148,
      "step": 129376
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9152520895004272,
      "learning_rate": 0.00024237461462696055,
      "loss": 2.9554,
      "step": 129377
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.6218719482421875,
      "learning_rate": 0.00024237060024309242,
      "loss": 3.1242,
      "step": 129378
    },
    {
      "epoch": 1.68,
      "grad_norm": 4.365382671356201,
      "learning_rate": 0.00024236658586993873,
      "loss": 3.1308,
      "step": 129379
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.7083916664123535,
      "learning_rate": 0.00024236257150750024,
      "loss": 3.1658,
      "step": 129380
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2171099185943604,
      "learning_rate": 0.00024235855715577757,
      "loss": 2.8474,
      "step": 129381
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.8341269493103027,
      "learning_rate": 0.0002423545428147715,
      "loss": 2.7771,
      "step": 129382
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.172600507736206,
      "learning_rate": 0.0002423505284844829,
      "loss": 2.8121,
      "step": 129383
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0279440879821777,
      "learning_rate": 0.00024234651416491236,
      "loss": 3.0516,
      "step": 129384
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.968580961227417,
      "learning_rate": 0.0002423424998560607,
      "loss": 3.2321,
      "step": 129385
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0149097442626953,
      "learning_rate": 0.00024233848555792862,
      "loss": 2.9812,
      "step": 129386
    },
    {
      "epoch": 1.68,
      "grad_norm": 3.2312793731689453,
      "learning_rate": 0.00024233447127051692,
      "loss": 3.1418,
      "step": 129387
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3574979305267334,
      "learning_rate": 0.00024233045699382637,
      "loss": 2.9274,
      "step": 129388
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4963419437408447,
      "learning_rate": 0.00024232644272785775,
      "loss": 3.2191,
      "step": 129389
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2284114360809326,
      "learning_rate": 0.00024232242847261161,
      "loss": 3.1622,
      "step": 129390
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.7318812608718872,
      "learning_rate": 0.00024231841422808886,
      "loss": 3.2742,
      "step": 129391
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3401598930358887,
      "learning_rate": 0.00024231439999429014,
      "loss": 2.8693,
      "step": 129392
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.284457206726074,
      "learning_rate": 0.0002423103857712163,
      "loss": 2.946,
      "step": 129393
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.0107429027557373,
      "learning_rate": 0.00024230637155886809,
      "loss": 2.8765,
      "step": 129394
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.5951268672943115,
      "learning_rate": 0.0002423023573572463,
      "loss": 3.0562,
      "step": 129395
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1816296577453613,
      "learning_rate": 0.00024229834316635146,
      "loss": 2.9162,
      "step": 129396
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.4441871643066406,
      "learning_rate": 0.00024229432898618445,
      "loss": 2.8809,
      "step": 129397
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.1720359325408936,
      "learning_rate": 0.000242290314816746,
      "loss": 2.9571,
      "step": 129398
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.3698244094848633,
      "learning_rate": 0.0002422863006580369,
      "loss": 3.0944,
      "step": 129399
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2663838863372803,
      "learning_rate": 0.00024228228651005786,
      "loss": 2.9886,
      "step": 129400
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.6631250381469727,
      "learning_rate": 0.00024227827237280976,
      "loss": 3.0928,
      "step": 129401
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.2258224487304688,
      "learning_rate": 0.0002422742582462931,
      "loss": 2.9155,
      "step": 129402
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.902923583984375,
      "learning_rate": 0.00024227024413050872,
      "loss": 2.873,
      "step": 129403
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.9601956605911255,
      "learning_rate": 0.0002422662300254574,
      "loss": 2.8575,
      "step": 129404
    },
    {
      "epoch": 1.68,
      "grad_norm": 2.9902617931365967,
      "learning_rate": 0.00024226221593113986,
      "loss": 2.7971,
      "step": 129405
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.938759207725525,
      "learning_rate": 0.0002422582018475569,
      "loss": 3.0335,
      "step": 129406
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8429856300354004,
      "learning_rate": 0.00024225418777470934,
      "loss": 3.1291,
      "step": 129407
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.832612156867981,
      "learning_rate": 0.00024225017371259767,
      "loss": 2.9649,
      "step": 129408
    },
    {
      "epoch": 1.68,
      "grad_norm": 1.8818720579147339,
      "learning_rate": 0.00024224615966122283,
      "loss": 3.0339,
      "step": 129409
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8353914022445679,
      "learning_rate": 0.00024224214562058548,
      "loss": 2.8899,
      "step": 129410
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.5891854763031006,
      "learning_rate": 0.0002422381315906864,
      "loss": 2.7528,
      "step": 129411
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.119990587234497,
      "learning_rate": 0.00024223411757152638,
      "loss": 3.0585,
      "step": 129412
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9976282119750977,
      "learning_rate": 0.00024223010356310624,
      "loss": 3.1145,
      "step": 129413
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.773434042930603,
      "learning_rate": 0.00024222608956542645,
      "loss": 2.8677,
      "step": 129414
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.865386962890625,
      "learning_rate": 0.00024222207557848797,
      "loss": 2.9584,
      "step": 129415
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0826706886291504,
      "learning_rate": 0.00024221806160229148,
      "loss": 3.0705,
      "step": 129416
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0930368900299072,
      "learning_rate": 0.00024221404763683776,
      "loss": 3.3428,
      "step": 129417
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.178795099258423,
      "learning_rate": 0.0002422100336821275,
      "loss": 2.8647,
      "step": 129418
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1789166927337646,
      "learning_rate": 0.00024220601973816162,
      "loss": 2.9831,
      "step": 129419
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0121283531188965,
      "learning_rate": 0.00024220200580494063,
      "loss": 3.0841,
      "step": 129420
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1254725456237793,
      "learning_rate": 0.00024219799188246536,
      "loss": 2.9457,
      "step": 129421
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.06659197807312,
      "learning_rate": 0.0002421939779707366,
      "loss": 2.8682,
      "step": 129422
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.6872519254684448,
      "learning_rate": 0.00024218996406975504,
      "loss": 2.8186,
      "step": 129423
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.19122314453125,
      "learning_rate": 0.00024218595017952148,
      "loss": 2.9604,
      "step": 129424
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.940224289894104,
      "learning_rate": 0.00024218193630003677,
      "loss": 2.9243,
      "step": 129425
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2568628787994385,
      "learning_rate": 0.00024217792243130137,
      "loss": 2.9714,
      "step": 129426
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3605968952178955,
      "learning_rate": 0.00024217390857331623,
      "loss": 3.0028,
      "step": 129427
    },
    {
      "epoch": 1.69,
      "grad_norm": 4.007929801940918,
      "learning_rate": 0.000242169894726082,
      "loss": 2.7956,
      "step": 129428
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4567153453826904,
      "learning_rate": 0.00024216588088959951,
      "loss": 3.1287,
      "step": 129429
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.859236478805542,
      "learning_rate": 0.00024216186706386948,
      "loss": 2.8921,
      "step": 129430
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.559779405593872,
      "learning_rate": 0.00024215785324889277,
      "loss": 3.229,
      "step": 129431
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4796195030212402,
      "learning_rate": 0.0002421538394446699,
      "loss": 3.1548,
      "step": 129432
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8888678550720215,
      "learning_rate": 0.0002421498256512017,
      "loss": 3.0655,
      "step": 129433
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2709357738494873,
      "learning_rate": 0.00024214581186848892,
      "loss": 2.9275,
      "step": 129434
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9103035926818848,
      "learning_rate": 0.00024214179809653235,
      "loss": 3.1963,
      "step": 129435
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.265007257461548,
      "learning_rate": 0.00024213778433533273,
      "loss": 3.2142,
      "step": 129436
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.151376724243164,
      "learning_rate": 0.00024213377058489088,
      "loss": 3.1577,
      "step": 129437
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.9707791805267334,
      "learning_rate": 0.00024212975684520733,
      "loss": 3.0929,
      "step": 129438
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4139883518218994,
      "learning_rate": 0.00024212574311628296,
      "loss": 3.1528,
      "step": 129439
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.7891316413879395,
      "learning_rate": 0.00024212172939811855,
      "loss": 3.0188,
      "step": 129440
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.4354443550109863,
      "learning_rate": 0.00024211771569071473,
      "loss": 2.9345,
      "step": 129441
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6178882122039795,
      "learning_rate": 0.00024211370199407235,
      "loss": 2.9151,
      "step": 129442
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.7202786207199097,
      "learning_rate": 0.00024210968830819227,
      "loss": 3.2746,
      "step": 129443
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4337563514709473,
      "learning_rate": 0.00024210567463307494,
      "loss": 2.8443,
      "step": 129444
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.6272814273834229,
      "learning_rate": 0.00024210166096872126,
      "loss": 2.9586,
      "step": 129445
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.9156079292297363,
      "learning_rate": 0.00024209764731513198,
      "loss": 2.9619,
      "step": 129446
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.855229616165161,
      "learning_rate": 0.00024209363367230785,
      "loss": 2.804,
      "step": 129447
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.323082685470581,
      "learning_rate": 0.0002420896200402496,
      "loss": 3.1162,
      "step": 129448
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.127558469772339,
      "learning_rate": 0.00024208560641895797,
      "loss": 3.2718,
      "step": 129449
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.487305164337158,
      "learning_rate": 0.00024208159280843385,
      "loss": 2.9298,
      "step": 129450
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9671847820281982,
      "learning_rate": 0.00024207757920867776,
      "loss": 3.0722,
      "step": 129451
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1294045448303223,
      "learning_rate": 0.0002420735656196905,
      "loss": 2.9984,
      "step": 129452
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9663947820663452,
      "learning_rate": 0.0002420695520414729,
      "loss": 3.0137,
      "step": 129453
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0153281688690186,
      "learning_rate": 0.0002420655384740256,
      "loss": 3.1358,
      "step": 129454
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.8623554706573486,
      "learning_rate": 0.00024206152491734945,
      "loss": 2.9642,
      "step": 129455
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9656202793121338,
      "learning_rate": 0.00024205751137144525,
      "loss": 2.8695,
      "step": 129456
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.9001283645629883,
      "learning_rate": 0.00024205349783631354,
      "loss": 2.875,
      "step": 129457
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1071863174438477,
      "learning_rate": 0.00024204948431195524,
      "loss": 3.1164,
      "step": 129458
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2574052810668945,
      "learning_rate": 0.000242045470798371,
      "loss": 2.9721,
      "step": 129459
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2292532920837402,
      "learning_rate": 0.0002420414572955616,
      "loss": 2.9485,
      "step": 129460
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3057587146759033,
      "learning_rate": 0.00024203744380352777,
      "loss": 2.7824,
      "step": 129461
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0909583568573,
      "learning_rate": 0.00024203343032227034,
      "loss": 2.6269,
      "step": 129462
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0168044567108154,
      "learning_rate": 0.00024202941685178993,
      "loss": 3.1471,
      "step": 129463
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.462808609008789,
      "learning_rate": 0.00024202540339208733,
      "loss": 3.1365,
      "step": 129464
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9987804889678955,
      "learning_rate": 0.0002420213899431634,
      "loss": 2.7326,
      "step": 129465
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.7826868295669556,
      "learning_rate": 0.0002420173765050187,
      "loss": 3.0042,
      "step": 129466
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.6688592433929443,
      "learning_rate": 0.00024201336307765406,
      "loss": 2.9992,
      "step": 129467
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.548384428024292,
      "learning_rate": 0.00024200934966107034,
      "loss": 2.821,
      "step": 129468
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9484772682189941,
      "learning_rate": 0.00024200533625526804,
      "loss": 3.1534,
      "step": 129469
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1709189414978027,
      "learning_rate": 0.00024200132286024807,
      "loss": 2.9276,
      "step": 129470
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.02651047706604,
      "learning_rate": 0.00024199730947601118,
      "loss": 3.1331,
      "step": 129471
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.342411518096924,
      "learning_rate": 0.00024199329610255813,
      "loss": 2.904,
      "step": 129472
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6207385063171387,
      "learning_rate": 0.00024198928273988954,
      "loss": 3.0852,
      "step": 129473
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0024704933166504,
      "learning_rate": 0.00024198526938800635,
      "loss": 3.1549,
      "step": 129474
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9165387153625488,
      "learning_rate": 0.00024198125604690904,
      "loss": 2.9873,
      "step": 129475
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.8087098598480225,
      "learning_rate": 0.0002419772427165986,
      "loss": 2.9092,
      "step": 129476
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.5446324348449707,
      "learning_rate": 0.00024197322939707567,
      "loss": 2.9755,
      "step": 129477
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.923681616783142,
      "learning_rate": 0.000241969216088341,
      "loss": 3.2102,
      "step": 129478
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1611669063568115,
      "learning_rate": 0.0002419652027903954,
      "loss": 3.1163,
      "step": 129479
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.015986680984497,
      "learning_rate": 0.00024196118950323955,
      "loss": 3.0821,
      "step": 129480
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.2206201553344727,
      "learning_rate": 0.00024195717622687419,
      "loss": 3.0387,
      "step": 129481
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8047395944595337,
      "learning_rate": 0.00024195316296130005,
      "loss": 2.9451,
      "step": 129482
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.9000706672668457,
      "learning_rate": 0.00024194914970651792,
      "loss": 2.9204,
      "step": 129483
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8646831512451172,
      "learning_rate": 0.00024194513646252856,
      "loss": 2.9735,
      "step": 129484
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6598832607269287,
      "learning_rate": 0.00024194112322933266,
      "loss": 2.8344,
      "step": 129485
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9154750108718872,
      "learning_rate": 0.00024193711000693115,
      "loss": 3.0659,
      "step": 129486
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.827003002166748,
      "learning_rate": 0.0002419330967953245,
      "loss": 3.0216,
      "step": 129487
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.6034011840820312,
      "learning_rate": 0.00024192908359451358,
      "loss": 2.8868,
      "step": 129488
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.32186222076416,
      "learning_rate": 0.00024192507040449914,
      "loss": 2.7854,
      "step": 129489
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.343092918395996,
      "learning_rate": 0.0002419210572252819,
      "loss": 3.1564,
      "step": 129490
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.085563898086548,
      "learning_rate": 0.00024191704405686267,
      "loss": 3.0181,
      "step": 129491
    },
    {
      "epoch": 1.69,
      "grad_norm": 4.678747653961182,
      "learning_rate": 0.0002419130308992423,
      "loss": 2.6092,
      "step": 129492
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0823068618774414,
      "learning_rate": 0.00024190901775242125,
      "loss": 3.0902,
      "step": 129493
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9338585138320923,
      "learning_rate": 0.00024190500461640038,
      "loss": 3.023,
      "step": 129494
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.345536470413208,
      "learning_rate": 0.00024190099149118047,
      "loss": 3.0777,
      "step": 129495
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1334574222564697,
      "learning_rate": 0.0002418969783767623,
      "loss": 2.9938,
      "step": 129496
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.970067024230957,
      "learning_rate": 0.00024189296527314655,
      "loss": 2.9672,
      "step": 129497
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.7384746074676514,
      "learning_rate": 0.00024188895218033412,
      "loss": 3.2585,
      "step": 129498
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.013709545135498,
      "learning_rate": 0.00024188493909832551,
      "loss": 3.1299,
      "step": 129499
    },
    {
      "epoch": 1.69,
      "grad_norm": 4.663693428039551,
      "learning_rate": 0.0002418809260271216,
      "loss": 2.6508,
      "step": 129500
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1387269496917725,
      "learning_rate": 0.0002418769129667231,
      "loss": 3.2905,
      "step": 129501
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8048361539840698,
      "learning_rate": 0.00024187289991713082,
      "loss": 3.0017,
      "step": 129502
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4889583587646484,
      "learning_rate": 0.0002418688868783454,
      "loss": 2.8323,
      "step": 129503
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.674071788787842,
      "learning_rate": 0.00024186487385036783,
      "loss": 2.8434,
      "step": 129504
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.17763352394104,
      "learning_rate": 0.00024186086083319855,
      "loss": 3.0712,
      "step": 129505
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.5833652019500732,
      "learning_rate": 0.0002418568478268384,
      "loss": 2.9598,
      "step": 129506
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8144725561141968,
      "learning_rate": 0.00024185283483128817,
      "loss": 2.7966,
      "step": 129507
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1941347122192383,
      "learning_rate": 0.00024184882184654863,
      "loss": 3.0873,
      "step": 129508
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0445096492767334,
      "learning_rate": 0.00024184480887262042,
      "loss": 2.7758,
      "step": 129509
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.041130781173706,
      "learning_rate": 0.00024184079590950456,
      "loss": 3.1441,
      "step": 129510
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.528215169906616,
      "learning_rate": 0.00024183678295720143,
      "loss": 2.9151,
      "step": 129511
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.6791125535964966,
      "learning_rate": 0.00024183277001571196,
      "loss": 2.9451,
      "step": 129512
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9331300258636475,
      "learning_rate": 0.00024182875708503686,
      "loss": 3.0957,
      "step": 129513
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.7849743366241455,
      "learning_rate": 0.0002418247441651769,
      "loss": 3.0326,
      "step": 129514
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.029664993286133,
      "learning_rate": 0.0002418207312561328,
      "loss": 2.9457,
      "step": 129515
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3250279426574707,
      "learning_rate": 0.00024181671835790534,
      "loss": 2.9721,
      "step": 129516
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3828577995300293,
      "learning_rate": 0.00024181270547049538,
      "loss": 2.7477,
      "step": 129517
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.0040500164031982,
      "learning_rate": 0.0002418086925939034,
      "loss": 2.9358,
      "step": 129518
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8284220695495605,
      "learning_rate": 0.0002418046797281303,
      "loss": 2.9909,
      "step": 129519
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.131842613220215,
      "learning_rate": 0.00024180066687317682,
      "loss": 2.7256,
      "step": 129520
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.7230498790740967,
      "learning_rate": 0.0002417966540290436,
      "loss": 2.9985,
      "step": 129521
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.072791814804077,
      "learning_rate": 0.00024179264119573158,
      "loss": 2.9774,
      "step": 129522
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8813750743865967,
      "learning_rate": 0.0002417886283732415,
      "loss": 3.0223,
      "step": 129523
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.059286594390869,
      "learning_rate": 0.0002417846155615739,
      "loss": 2.9989,
      "step": 129524
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9764925241470337,
      "learning_rate": 0.00024178060276072963,
      "loss": 2.9429,
      "step": 129525
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1934173107147217,
      "learning_rate": 0.00024177658997070946,
      "loss": 2.7907,
      "step": 129526
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.168440103530884,
      "learning_rate": 0.00024177257719151408,
      "loss": 3.1361,
      "step": 129527
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.154900312423706,
      "learning_rate": 0.0002417685644231443,
      "loss": 2.943,
      "step": 129528
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4144673347473145,
      "learning_rate": 0.000241764551665601,
      "loss": 3.0455,
      "step": 129529
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.24665904045105,
      "learning_rate": 0.0002417605389188846,
      "loss": 3.2208,
      "step": 129530
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6605005264282227,
      "learning_rate": 0.00024175652618299605,
      "loss": 3.053,
      "step": 129531
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.5323588848114014,
      "learning_rate": 0.00024175251345793603,
      "loss": 3.037,
      "step": 129532
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8644311428070068,
      "learning_rate": 0.0002417485007437053,
      "loss": 2.9898,
      "step": 129533
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2245357036590576,
      "learning_rate": 0.00024174448804030466,
      "loss": 3.0996,
      "step": 129534
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.227447271347046,
      "learning_rate": 0.00024174047534773497,
      "loss": 3.0035,
      "step": 129535
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2523937225341797,
      "learning_rate": 0.00024173646266599665,
      "loss": 2.8392,
      "step": 129536
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9531896114349365,
      "learning_rate": 0.00024173244999509065,
      "loss": 2.9624,
      "step": 129537
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.108074903488159,
      "learning_rate": 0.00024172843733501765,
      "loss": 3.1224,
      "step": 129538
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9299993515014648,
      "learning_rate": 0.00024172442468577848,
      "loss": 3.1155,
      "step": 129539
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.704822301864624,
      "learning_rate": 0.00024172041204737382,
      "loss": 2.9503,
      "step": 129540
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.5662901401519775,
      "learning_rate": 0.00024171639941980447,
      "loss": 2.8798,
      "step": 129541
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.257566452026367,
      "learning_rate": 0.0002417123868030712,
      "loss": 2.9528,
      "step": 129542
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9820475578308105,
      "learning_rate": 0.00024170837419717456,
      "loss": 2.9891,
      "step": 129543
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.018113374710083,
      "learning_rate": 0.0002417043616021155,
      "loss": 2.8847,
      "step": 129544
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1181063652038574,
      "learning_rate": 0.00024170034901789466,
      "loss": 2.8129,
      "step": 129545
    },
    {
      "epoch": 1.69,
      "grad_norm": 4.337831020355225,
      "learning_rate": 0.00024169633644451286,
      "loss": 2.8953,
      "step": 129546
    },
    {
      "epoch": 1.69,
      "grad_norm": 5.465143203735352,
      "learning_rate": 0.00024169232388197086,
      "loss": 3.0277,
      "step": 129547
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.5578064918518066,
      "learning_rate": 0.00024168831133026928,
      "loss": 2.757,
      "step": 129548
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.47757625579834,
      "learning_rate": 0.00024168429878940894,
      "loss": 2.8878,
      "step": 129549
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8561418056488037,
      "learning_rate": 0.0002416802862593907,
      "loss": 2.9794,
      "step": 129550
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.071126937866211,
      "learning_rate": 0.0002416762737402151,
      "loss": 3.0239,
      "step": 129551
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3263373374938965,
      "learning_rate": 0.00024167226123188295,
      "loss": 2.9282,
      "step": 129552
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9353100061416626,
      "learning_rate": 0.00024166824873439515,
      "loss": 2.9697,
      "step": 129553
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.238680601119995,
      "learning_rate": 0.00024166423624775223,
      "loss": 3.0279,
      "step": 129554
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.251549243927002,
      "learning_rate": 0.00024166022377195505,
      "loss": 3.0608,
      "step": 129555
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.480069875717163,
      "learning_rate": 0.0002416562113070043,
      "loss": 3.0964,
      "step": 129556
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1893484592437744,
      "learning_rate": 0.00024165219885290083,
      "loss": 2.9942,
      "step": 129557
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1142525672912598,
      "learning_rate": 0.00024164818640964525,
      "loss": 2.8258,
      "step": 129558
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1686508655548096,
      "learning_rate": 0.0002416441739772385,
      "loss": 3.155,
      "step": 129559
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2492942810058594,
      "learning_rate": 0.00024164016155568108,
      "loss": 3.0561,
      "step": 129560
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8968548774719238,
      "learning_rate": 0.00024163614914497387,
      "loss": 3.0703,
      "step": 129561
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0023038387298584,
      "learning_rate": 0.0002416321367451176,
      "loss": 2.9697,
      "step": 129562
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4660420417785645,
      "learning_rate": 0.000241628124356113,
      "loss": 2.7613,
      "step": 129563
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9443680047988892,
      "learning_rate": 0.0002416241119779609,
      "loss": 2.856,
      "step": 129564
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3296971321105957,
      "learning_rate": 0.000241620099610662,
      "loss": 2.9341,
      "step": 129565
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.925529956817627,
      "learning_rate": 0.00024161608725421693,
      "loss": 2.8529,
      "step": 129566
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.098646402359009,
      "learning_rate": 0.0002416120749086266,
      "loss": 2.7894,
      "step": 129567
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8620814085006714,
      "learning_rate": 0.0002416080625738916,
      "loss": 2.7997,
      "step": 129568
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.7550435066223145,
      "learning_rate": 0.00024160405025001284,
      "loss": 2.8918,
      "step": 129569
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.190554141998291,
      "learning_rate": 0.00024160003793699094,
      "loss": 3.0126,
      "step": 129570
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9052300453186035,
      "learning_rate": 0.00024159602563482684,
      "loss": 3.2412,
      "step": 129571
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.105341672897339,
      "learning_rate": 0.00024159201334352103,
      "loss": 2.9642,
      "step": 129572
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.124232530593872,
      "learning_rate": 0.00024158800106307434,
      "loss": 3.028,
      "step": 129573
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2611355781555176,
      "learning_rate": 0.00024158398879348754,
      "loss": 3.0653,
      "step": 129574
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9445112943649292,
      "learning_rate": 0.0002415799765347614,
      "loss": 2.9183,
      "step": 129575
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1438705921173096,
      "learning_rate": 0.00024157596428689662,
      "loss": 3.2086,
      "step": 129576
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9881529808044434,
      "learning_rate": 0.0002415719520498941,
      "loss": 3.0569,
      "step": 129577
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4491310119628906,
      "learning_rate": 0.00024156793982375434,
      "loss": 3.0966,
      "step": 129578
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.207091808319092,
      "learning_rate": 0.00024156392760847823,
      "loss": 3.0631,
      "step": 129579
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9265310764312744,
      "learning_rate": 0.00024155991540406645,
      "loss": 2.8524,
      "step": 129580
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.366769313812256,
      "learning_rate": 0.00024155590321051982,
      "loss": 3.0706,
      "step": 129581
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2197184562683105,
      "learning_rate": 0.00024155189102783903,
      "loss": 2.934,
      "step": 129582
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9869970083236694,
      "learning_rate": 0.00024154787885602484,
      "loss": 2.8127,
      "step": 129583
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.390261173248291,
      "learning_rate": 0.00024154386669507816,
      "loss": 2.5501,
      "step": 129584
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.75904381275177,
      "learning_rate": 0.0002415398545449994,
      "loss": 2.9216,
      "step": 129585
    },
    {
      "epoch": 1.69,
      "grad_norm": 4.2319817543029785,
      "learning_rate": 0.00024153584240578955,
      "loss": 3.1811,
      "step": 129586
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.1107029914855957,
      "learning_rate": 0.0002415318302774492,
      "loss": 2.7343,
      "step": 129587
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.130094528198242,
      "learning_rate": 0.0002415278181599793,
      "loss": 2.8641,
      "step": 129588
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8401219844818115,
      "learning_rate": 0.00024152380605338043,
      "loss": 2.8801,
      "step": 129589
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6142566204071045,
      "learning_rate": 0.00024151979395765352,
      "loss": 2.8765,
      "step": 129590
    },
    {
      "epoch": 1.69,
      "grad_norm": 4.774649620056152,
      "learning_rate": 0.00024151578187279904,
      "loss": 2.9063,
      "step": 129591
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.0294623374938965,
      "learning_rate": 0.00024151176979881784,
      "loss": 2.779,
      "step": 129592
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8807021379470825,
      "learning_rate": 0.00024150775773571078,
      "loss": 2.8599,
      "step": 129593
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.6191039085388184,
      "learning_rate": 0.00024150374568347851,
      "loss": 2.8648,
      "step": 129594
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.7619478702545166,
      "learning_rate": 0.00024149973364212177,
      "loss": 2.9037,
      "step": 129595
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.782397985458374,
      "learning_rate": 0.0002414957216116415,
      "loss": 2.8176,
      "step": 129596
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.051623582839966,
      "learning_rate": 0.00024149170959203813,
      "loss": 3.0054,
      "step": 129597
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9989068508148193,
      "learning_rate": 0.00024148769758331257,
      "loss": 3.1131,
      "step": 129598
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.9388818740844727,
      "learning_rate": 0.00024148368558546556,
      "loss": 2.8236,
      "step": 129599
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0830483436584473,
      "learning_rate": 0.0002414796735984978,
      "loss": 2.7927,
      "step": 129600
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9767907857894897,
      "learning_rate": 0.0002414756616224101,
      "loss": 3.1131,
      "step": 129601
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1188292503356934,
      "learning_rate": 0.0002414716496572033,
      "loss": 2.97,
      "step": 129602
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.164278030395508,
      "learning_rate": 0.00024146763770287787,
      "loss": 3.1106,
      "step": 129603
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.0595626831054688,
      "learning_rate": 0.00024146362575943475,
      "loss": 3.0071,
      "step": 129604
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.002431631088257,
      "learning_rate": 0.00024145961382687466,
      "loss": 3.0085,
      "step": 129605
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8988276720046997,
      "learning_rate": 0.00024145560190519828,
      "loss": 2.8274,
      "step": 129606
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9105401039123535,
      "learning_rate": 0.00024145158999440644,
      "loss": 3.0118,
      "step": 129607
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0023741722106934,
      "learning_rate": 0.00024144757809450003,
      "loss": 2.745,
      "step": 129608
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1426703929901123,
      "learning_rate": 0.00024144356620547943,
      "loss": 3.2247,
      "step": 129609
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.9407198429107666,
      "learning_rate": 0.00024143955432734559,
      "loss": 2.7628,
      "step": 129610
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3791017532348633,
      "learning_rate": 0.00024143554246009921,
      "loss": 2.9999,
      "step": 129611
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0061280727386475,
      "learning_rate": 0.0002414315306037411,
      "loss": 3.0449,
      "step": 129612
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9256621599197388,
      "learning_rate": 0.000241427518758272,
      "loss": 2.9017,
      "step": 129613
    },
    {
      "epoch": 1.69,
      "grad_norm": 4.353074550628662,
      "learning_rate": 0.00024142350692369272,
      "loss": 2.9468,
      "step": 129614
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.268625497817993,
      "learning_rate": 0.0002414194951000038,
      "loss": 3.3032,
      "step": 129615
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9545451402664185,
      "learning_rate": 0.0002414154832872061,
      "loss": 3.0962,
      "step": 129616
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.359528064727783,
      "learning_rate": 0.00024141147148530037,
      "loss": 3.0192,
      "step": 129617
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.915412425994873,
      "learning_rate": 0.00024140745969428734,
      "loss": 2.9044,
      "step": 129618
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.0187621116638184,
      "learning_rate": 0.00024140344791416778,
      "loss": 2.6899,
      "step": 129619
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.80731463432312,
      "learning_rate": 0.00024139943614494256,
      "loss": 2.922,
      "step": 129620
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.414262056350708,
      "learning_rate": 0.00024139542438661216,
      "loss": 2.9714,
      "step": 129621
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.152611494064331,
      "learning_rate": 0.00024139141263917745,
      "loss": 3.0832,
      "step": 129622
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4631619453430176,
      "learning_rate": 0.00024138740090263918,
      "loss": 3.202,
      "step": 129623
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.998864769935608,
      "learning_rate": 0.00024138338917699813,
      "loss": 2.9623,
      "step": 129624
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.056032180786133,
      "learning_rate": 0.00024137937746225496,
      "loss": 2.9168,
      "step": 129625
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.9668924808502197,
      "learning_rate": 0.00024137536575841056,
      "loss": 2.6546,
      "step": 129626
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8870283365249634,
      "learning_rate": 0.0002413713540654656,
      "loss": 3.0515,
      "step": 129627
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2943694591522217,
      "learning_rate": 0.00024136734238342075,
      "loss": 2.7767,
      "step": 129628
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1447389125823975,
      "learning_rate": 0.0002413633307122768,
      "loss": 3.1822,
      "step": 129629
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9861544370651245,
      "learning_rate": 0.00024135931905203454,
      "loss": 3.0481,
      "step": 129630
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9666413068771362,
      "learning_rate": 0.00024135530740269464,
      "loss": 2.9378,
      "step": 129631
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.5914969444274902,
      "learning_rate": 0.000241351295764258,
      "loss": 2.6932,
      "step": 129632
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.494562864303589,
      "learning_rate": 0.00024134728413672518,
      "loss": 3.1626,
      "step": 129633
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1587226390838623,
      "learning_rate": 0.00024134327252009707,
      "loss": 3.1668,
      "step": 129634
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.439547061920166,
      "learning_rate": 0.0002413392609143743,
      "loss": 3.1004,
      "step": 129635
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0219168663024902,
      "learning_rate": 0.00024133524931955767,
      "loss": 2.7831,
      "step": 129636
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0098111629486084,
      "learning_rate": 0.0002413312377356479,
      "loss": 3.1926,
      "step": 129637
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4880642890930176,
      "learning_rate": 0.00024132722616264588,
      "loss": 2.6996,
      "step": 129638
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.129018545150757,
      "learning_rate": 0.00024132321460055214,
      "loss": 2.9476,
      "step": 129639
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4895179271698,
      "learning_rate": 0.0002413192030493675,
      "loss": 2.7469,
      "step": 129640
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.472329616546631,
      "learning_rate": 0.0002413151915090928,
      "loss": 3.0627,
      "step": 129641
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.311285972595215,
      "learning_rate": 0.00024131117997972868,
      "loss": 2.961,
      "step": 129642
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.051746129989624,
      "learning_rate": 0.0002413071684612759,
      "loss": 3.0025,
      "step": 129643
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.676250696182251,
      "learning_rate": 0.00024130315695373528,
      "loss": 3.0289,
      "step": 129644
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.129453420639038,
      "learning_rate": 0.00024129914545710748,
      "loss": 3.1539,
      "step": 129645
    },
    {
      "epoch": 1.69,
      "grad_norm": 4.474221229553223,
      "learning_rate": 0.00024129513397139324,
      "loss": 2.8071,
      "step": 129646
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2667911052703857,
      "learning_rate": 0.00024129112249659333,
      "loss": 2.9886,
      "step": 129647
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0532710552215576,
      "learning_rate": 0.00024128711103270852,
      "loss": 2.8446,
      "step": 129648
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.498856544494629,
      "learning_rate": 0.00024128309957973965,
      "loss": 2.8337,
      "step": 129649
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.954885482788086,
      "learning_rate": 0.00024127908813768726,
      "loss": 3.2296,
      "step": 129650
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.366144895553589,
      "learning_rate": 0.00024127507670655227,
      "loss": 3.0956,
      "step": 129651
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1449334621429443,
      "learning_rate": 0.00024127106528633527,
      "loss": 3.1433,
      "step": 129652
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8005691766738892,
      "learning_rate": 0.00024126705387703714,
      "loss": 3.005,
      "step": 129653
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6063687801361084,
      "learning_rate": 0.0002412630424786585,
      "loss": 2.9065,
      "step": 129654
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.007894277572632,
      "learning_rate": 0.0002412590310912002,
      "loss": 2.9284,
      "step": 129655
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.765014410018921,
      "learning_rate": 0.00024125501971466307,
      "loss": 3.0541,
      "step": 129656
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.821709394454956,
      "learning_rate": 0.00024125100834904766,
      "loss": 3.381,
      "step": 129657
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1136629581451416,
      "learning_rate": 0.00024124699699435478,
      "loss": 2.7646,
      "step": 129658
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.469527244567871,
      "learning_rate": 0.00024124298565058515,
      "loss": 2.8848,
      "step": 129659
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.326371908187866,
      "learning_rate": 0.0002412389743177396,
      "loss": 3.0751,
      "step": 129660
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9981886148452759,
      "learning_rate": 0.00024123496299581883,
      "loss": 3.0891,
      "step": 129661
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.75481915473938,
      "learning_rate": 0.00024123095168482358,
      "loss": 2.7539,
      "step": 129662
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.844710350036621,
      "learning_rate": 0.00024122694038475472,
      "loss": 2.7581,
      "step": 129663
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3408126831054688,
      "learning_rate": 0.00024122292909561278,
      "loss": 3.0317,
      "step": 129664
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8500503301620483,
      "learning_rate": 0.0002412189178173986,
      "loss": 2.993,
      "step": 129665
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.262838840484619,
      "learning_rate": 0.00024121490655011293,
      "loss": 3.1685,
      "step": 129666
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.444143295288086,
      "learning_rate": 0.00024121089529375653,
      "loss": 2.9594,
      "step": 129667
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0646424293518066,
      "learning_rate": 0.0002412068840483301,
      "loss": 2.9005,
      "step": 129668
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.94285249710083,
      "learning_rate": 0.0002412028728138346,
      "loss": 3.125,
      "step": 129669
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.5244686603546143,
      "learning_rate": 0.0002411988615902704,
      "loss": 2.8211,
      "step": 129670
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.928885817527771,
      "learning_rate": 0.0002411948503776385,
      "loss": 3.0407,
      "step": 129671
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.298236131668091,
      "learning_rate": 0.00024119083917593955,
      "loss": 3.1422,
      "step": 129672
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8249361515045166,
      "learning_rate": 0.00024118682798517435,
      "loss": 3.1067,
      "step": 129673
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.4682047367095947,
      "learning_rate": 0.00024118281680534363,
      "loss": 3.0504,
      "step": 129674
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.648012161254883,
      "learning_rate": 0.00024117880563644827,
      "loss": 3.172,
      "step": 129675
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.0377814769744873,
      "learning_rate": 0.00024117479447848875,
      "loss": 3.2043,
      "step": 129676
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1358141899108887,
      "learning_rate": 0.00024117078333146593,
      "loss": 2.8891,
      "step": 129677
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.69016170501709,
      "learning_rate": 0.00024116677219538057,
      "loss": 3.101,
      "step": 129678
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.530465841293335,
      "learning_rate": 0.00024116276107023345,
      "loss": 3.02,
      "step": 129679
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.318192958831787,
      "learning_rate": 0.00024115874995602523,
      "loss": 3.0437,
      "step": 129680
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9387482404708862,
      "learning_rate": 0.00024115473885275688,
      "loss": 3.0501,
      "step": 129681
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9833369255065918,
      "learning_rate": 0.00024115072776042886,
      "loss": 2.9145,
      "step": 129682
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1133933067321777,
      "learning_rate": 0.00024114671667904198,
      "loss": 2.6863,
      "step": 129683
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1335787773132324,
      "learning_rate": 0.00024114270560859707,
      "loss": 2.8781,
      "step": 129684
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.573809862136841,
      "learning_rate": 0.0002411386945490948,
      "loss": 2.7571,
      "step": 129685
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2927091121673584,
      "learning_rate": 0.00024113468350053603,
      "loss": 2.7939,
      "step": 129686
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.92792546749115,
      "learning_rate": 0.00024113067246292152,
      "loss": 2.8943,
      "step": 129687
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.221597909927368,
      "learning_rate": 0.0002411266614362518,
      "loss": 2.8791,
      "step": 129688
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0432868003845215,
      "learning_rate": 0.00024112265042052774,
      "loss": 3.0518,
      "step": 129689
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.223318338394165,
      "learning_rate": 0.0002411186394157501,
      "loss": 3.0648,
      "step": 129690
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.717520236968994,
      "learning_rate": 0.00024111462842191963,
      "loss": 2.9988,
      "step": 129691
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8200466632843018,
      "learning_rate": 0.00024111061743903705,
      "loss": 3.1119,
      "step": 129692
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8010767698287964,
      "learning_rate": 0.00024110660646710325,
      "loss": 2.7156,
      "step": 129693
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8336561918258667,
      "learning_rate": 0.0002411025955061187,
      "loss": 2.9395,
      "step": 129694
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3505353927612305,
      "learning_rate": 0.00024109858455608428,
      "loss": 2.924,
      "step": 129695
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.557736396789551,
      "learning_rate": 0.0002410945736170008,
      "loss": 2.7538,
      "step": 129696
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8742543458938599,
      "learning_rate": 0.0002410905626888689,
      "loss": 2.9076,
      "step": 129697
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4940671920776367,
      "learning_rate": 0.00024108655177168938,
      "loss": 2.7638,
      "step": 129698
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.089855194091797,
      "learning_rate": 0.00024108254086546314,
      "loss": 2.9848,
      "step": 129699
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.352098226547241,
      "learning_rate": 0.00024107852997019063,
      "loss": 2.8902,
      "step": 129700
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9343554973602295,
      "learning_rate": 0.00024107451908587273,
      "loss": 3.0149,
      "step": 129701
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.588318347930908,
      "learning_rate": 0.00024107050821251016,
      "loss": 2.9824,
      "step": 129702
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.179565191268921,
      "learning_rate": 0.0002410664973501037,
      "loss": 3.052,
      "step": 129703
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1206459999084473,
      "learning_rate": 0.0002410624864986541,
      "loss": 2.99,
      "step": 129704
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9552785158157349,
      "learning_rate": 0.00024105847565816227,
      "loss": 3.1473,
      "step": 129705
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6255381107330322,
      "learning_rate": 0.00024105446482862862,
      "loss": 2.9218,
      "step": 129706
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.222140312194824,
      "learning_rate": 0.00024105045401005406,
      "loss": 3.2093,
      "step": 129707
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.292687177658081,
      "learning_rate": 0.00024104644320243934,
      "loss": 2.9304,
      "step": 129708
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4364047050476074,
      "learning_rate": 0.00024104243240578516,
      "loss": 2.963,
      "step": 129709
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.281040668487549,
      "learning_rate": 0.00024103842162009233,
      "loss": 2.9987,
      "step": 129710
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.047006845474243,
      "learning_rate": 0.00024103441084536163,
      "loss": 2.6256,
      "step": 129711
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9142502546310425,
      "learning_rate": 0.0002410304000815938,
      "loss": 3.0298,
      "step": 129712
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4990062713623047,
      "learning_rate": 0.0002410263893287894,
      "loss": 2.955,
      "step": 129713
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9372187852859497,
      "learning_rate": 0.0002410223785869493,
      "loss": 2.776,
      "step": 129714
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.8209774494171143,
      "learning_rate": 0.0002410183678560743,
      "loss": 2.9947,
      "step": 129715
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.225090980529785,
      "learning_rate": 0.00024101435713616508,
      "loss": 2.964,
      "step": 129716
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2341580390930176,
      "learning_rate": 0.0002410103464272224,
      "loss": 3.0745,
      "step": 129717
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.345613956451416,
      "learning_rate": 0.0002410063357292471,
      "loss": 3.3376,
      "step": 129718
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1593010425567627,
      "learning_rate": 0.00024100232504223982,
      "loss": 3.0213,
      "step": 129719
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9654492139816284,
      "learning_rate": 0.00024099831436620123,
      "loss": 2.9061,
      "step": 129720
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4991860389709473,
      "learning_rate": 0.0002409943037011322,
      "loss": 2.7815,
      "step": 129721
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.5948855876922607,
      "learning_rate": 0.00024099029304703343,
      "loss": 2.8244,
      "step": 129722
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.067674398422241,
      "learning_rate": 0.00024098628240390568,
      "loss": 3.1116,
      "step": 129723
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.7462928295135498,
      "learning_rate": 0.00024098227177174976,
      "loss": 3.13,
      "step": 129724
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.9271581172943115,
      "learning_rate": 0.00024097826115056627,
      "loss": 2.9988,
      "step": 129725
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.138735294342041,
      "learning_rate": 0.00024097425054035615,
      "loss": 3.1192,
      "step": 129726
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3702375888824463,
      "learning_rate": 0.00024097023994111992,
      "loss": 2.8625,
      "step": 129727
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0330448150634766,
      "learning_rate": 0.00024096622935285844,
      "loss": 2.9585,
      "step": 129728
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.1408681869506836,
      "learning_rate": 0.00024096221877557245,
      "loss": 3.0261,
      "step": 129729
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9721183776855469,
      "learning_rate": 0.00024095820820926276,
      "loss": 2.9015,
      "step": 129730
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.870004653930664,
      "learning_rate": 0.00024095419765393,
      "loss": 2.9707,
      "step": 129731
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.195424795150757,
      "learning_rate": 0.000240950187109575,
      "loss": 3.1045,
      "step": 129732
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1295738220214844,
      "learning_rate": 0.00024094617657619848,
      "loss": 2.9734,
      "step": 129733
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.9353513717651367,
      "learning_rate": 0.00024094216605380117,
      "loss": 2.9134,
      "step": 129734
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.772763729095459,
      "learning_rate": 0.00024093815554238377,
      "loss": 2.8151,
      "step": 129735
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8644235134124756,
      "learning_rate": 0.0002409341450419472,
      "loss": 3.0268,
      "step": 129736
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.581352472305298,
      "learning_rate": 0.00024093013455249202,
      "loss": 2.9497,
      "step": 129737
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.243934392929077,
      "learning_rate": 0.00024092612407401902,
      "loss": 2.9101,
      "step": 129738
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.191391706466675,
      "learning_rate": 0.00024092211360652896,
      "loss": 2.8764,
      "step": 129739
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2142105102539062,
      "learning_rate": 0.0002409181031500226,
      "loss": 2.9344,
      "step": 129740
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.9536733627319336,
      "learning_rate": 0.00024091409270450076,
      "loss": 2.9805,
      "step": 129741
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.200413227081299,
      "learning_rate": 0.0002409100822699641,
      "loss": 2.9367,
      "step": 129742
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8425617218017578,
      "learning_rate": 0.00024090607184641327,
      "loss": 2.9169,
      "step": 129743
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.181818962097168,
      "learning_rate": 0.00024090206143384917,
      "loss": 3.0119,
      "step": 129744
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.846628189086914,
      "learning_rate": 0.00024089805103227248,
      "loss": 2.8215,
      "step": 129745
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.9753329753875732,
      "learning_rate": 0.00024089404064168393,
      "loss": 2.9551,
      "step": 129746
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0513315200805664,
      "learning_rate": 0.00024089003026208432,
      "loss": 2.9985,
      "step": 129747
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3287410736083984,
      "learning_rate": 0.00024088601989347453,
      "loss": 3.0458,
      "step": 129748
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1344034671783447,
      "learning_rate": 0.00024088200953585495,
      "loss": 3.2913,
      "step": 129749
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.5042202472686768,
      "learning_rate": 0.00024087799918922657,
      "loss": 3.0561,
      "step": 129750
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9039788246154785,
      "learning_rate": 0.00024087398885359006,
      "loss": 2.7966,
      "step": 129751
    },
    {
      "epoch": 1.69,
      "grad_norm": 4.152010440826416,
      "learning_rate": 0.00024086997852894619,
      "loss": 2.8519,
      "step": 129752
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.793592691421509,
      "learning_rate": 0.0002408659682152957,
      "loss": 2.7802,
      "step": 129753
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9637880325317383,
      "learning_rate": 0.00024086195791263952,
      "loss": 2.9069,
      "step": 129754
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.7475656270980835,
      "learning_rate": 0.00024085794762097803,
      "loss": 3.0299,
      "step": 129755
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1356310844421387,
      "learning_rate": 0.00024085393734031219,
      "loss": 3.1084,
      "step": 129756
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.49657940864563,
      "learning_rate": 0.0002408499270706427,
      "loss": 2.6007,
      "step": 129757
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9478086233139038,
      "learning_rate": 0.00024084591681197034,
      "loss": 2.994,
      "step": 129758
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8763290643692017,
      "learning_rate": 0.00024084190656429588,
      "loss": 2.9764,
      "step": 129759
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.3770558834075928,
      "learning_rate": 0.0002408378963276201,
      "loss": 2.9929,
      "step": 129760
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1620900630950928,
      "learning_rate": 0.00024083388610194359,
      "loss": 3.0361,
      "step": 129761
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2912399768829346,
      "learning_rate": 0.00024082987588726714,
      "loss": 2.878,
      "step": 129762
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0929837226867676,
      "learning_rate": 0.00024082586568359156,
      "loss": 2.9101,
      "step": 129763
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.7250046730041504,
      "learning_rate": 0.0002408218554909175,
      "loss": 2.9917,
      "step": 129764
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3023135662078857,
      "learning_rate": 0.00024081784530924585,
      "loss": 2.795,
      "step": 129765
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0690879821777344,
      "learning_rate": 0.0002408138351385774,
      "loss": 2.9619,
      "step": 129766
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9285751581192017,
      "learning_rate": 0.0002408098249789126,
      "loss": 2.892,
      "step": 129767
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.3085520267486572,
      "learning_rate": 0.00024080581483025242,
      "loss": 2.9887,
      "step": 129768
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.6093409061431885,
      "learning_rate": 0.00024080180469259754,
      "loss": 2.95,
      "step": 129769
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.070093870162964,
      "learning_rate": 0.00024079779456594869,
      "loss": 2.9733,
      "step": 129770
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.27093505859375,
      "learning_rate": 0.0002407937844503067,
      "loss": 2.8659,
      "step": 129771
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.1227595806121826,
      "learning_rate": 0.00024078977434567234,
      "loss": 2.7027,
      "step": 129772
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.045948028564453,
      "learning_rate": 0.00024078576425204618,
      "loss": 3.0394,
      "step": 129773
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.079071521759033,
      "learning_rate": 0.00024078175416942908,
      "loss": 3.0418,
      "step": 129774
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3754615783691406,
      "learning_rate": 0.00024077774409782176,
      "loss": 2.9078,
      "step": 129775
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.059713840484619,
      "learning_rate": 0.00024077373403722497,
      "loss": 2.9129,
      "step": 129776
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.8408219814300537,
      "learning_rate": 0.00024076972398763945,
      "loss": 2.9904,
      "step": 129777
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.747997999191284,
      "learning_rate": 0.00024076571394906606,
      "loss": 2.8894,
      "step": 129778
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.067678213119507,
      "learning_rate": 0.00024076170392150538,
      "loss": 2.9587,
      "step": 129779
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.45979905128479,
      "learning_rate": 0.00024075769390495818,
      "loss": 2.654,
      "step": 129780
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6327338218688965,
      "learning_rate": 0.00024075368389942525,
      "loss": 2.9208,
      "step": 129781
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.284390926361084,
      "learning_rate": 0.0002407496739049073,
      "loss": 3.0027,
      "step": 129782
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6868021488189697,
      "learning_rate": 0.00024074566392140513,
      "loss": 2.9714,
      "step": 129783
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3811774253845215,
      "learning_rate": 0.00024074165394891948,
      "loss": 3.1173,
      "step": 129784
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.457319498062134,
      "learning_rate": 0.00024073764398745118,
      "loss": 2.7875,
      "step": 129785
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4867217540740967,
      "learning_rate": 0.00024073363403700075,
      "loss": 2.7737,
      "step": 129786
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1256916522979736,
      "learning_rate": 0.00024072962409756905,
      "loss": 2.9829,
      "step": 129787
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8516216278076172,
      "learning_rate": 0.0002407256141691568,
      "loss": 2.7687,
      "step": 129788
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2508444786071777,
      "learning_rate": 0.00024072160425176484,
      "loss": 3.0627,
      "step": 129789
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8607265949249268,
      "learning_rate": 0.00024071759434539383,
      "loss": 2.7431,
      "step": 129790
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3253896236419678,
      "learning_rate": 0.00024071358445004466,
      "loss": 2.9848,
      "step": 129791
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.0156965255737305,
      "learning_rate": 0.00024070957456571781,
      "loss": 3.1954,
      "step": 129792
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.180859088897705,
      "learning_rate": 0.0002407055646924142,
      "loss": 2.9204,
      "step": 129793
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.332505464553833,
      "learning_rate": 0.0002407015548301345,
      "loss": 3.253,
      "step": 129794
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.1511287689208984,
      "learning_rate": 0.00024069754497887955,
      "loss": 3.0881,
      "step": 129795
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.7254061698913574,
      "learning_rate": 0.00024069353513865002,
      "loss": 3.221,
      "step": 129796
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4078245162963867,
      "learning_rate": 0.0002406895253094468,
      "loss": 3.055,
      "step": 129797
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2743759155273438,
      "learning_rate": 0.00024068551549127041,
      "loss": 2.9992,
      "step": 129798
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.9930081367492676,
      "learning_rate": 0.0002406815056841217,
      "loss": 3.0165,
      "step": 129799
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4657299518585205,
      "learning_rate": 0.00024067749588800144,
      "loss": 3.2611,
      "step": 129800
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.342428207397461,
      "learning_rate": 0.0002406734861029103,
      "loss": 2.9492,
      "step": 129801
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.1872315406799316,
      "learning_rate": 0.0002406694763288491,
      "loss": 2.9601,
      "step": 129802
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.575345277786255,
      "learning_rate": 0.00024066546656581863,
      "loss": 2.9299,
      "step": 129803
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1483314037323,
      "learning_rate": 0.00024066145681381955,
      "loss": 2.8894,
      "step": 129804
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8903323411941528,
      "learning_rate": 0.00024065744707285264,
      "loss": 2.9479,
      "step": 129805
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.226210594177246,
      "learning_rate": 0.00024065343734291854,
      "loss": 2.7704,
      "step": 129806
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.7125658988952637,
      "learning_rate": 0.00024064942762401815,
      "loss": 2.8622,
      "step": 129807
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.07789945602417,
      "learning_rate": 0.0002406454179161521,
      "loss": 3.0378,
      "step": 129808
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9463788270950317,
      "learning_rate": 0.00024064140821932126,
      "loss": 3.0023,
      "step": 129809
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.7800474166870117,
      "learning_rate": 0.00024063739853352628,
      "loss": 2.963,
      "step": 129810
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3622875213623047,
      "learning_rate": 0.00024063338885876796,
      "loss": 2.9706,
      "step": 129811
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.0095481872558594,
      "learning_rate": 0.00024062937919504693,
      "loss": 3.2759,
      "step": 129812
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.752399444580078,
      "learning_rate": 0.00024062536954236406,
      "loss": 2.8071,
      "step": 129813
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.5999886989593506,
      "learning_rate": 0.00024062135990072006,
      "loss": 2.9831,
      "step": 129814
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.5562305450439453,
      "learning_rate": 0.0002406173502701157,
      "loss": 2.685,
      "step": 129815
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2087342739105225,
      "learning_rate": 0.00024061334065055164,
      "loss": 3.0288,
      "step": 129816
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3094208240509033,
      "learning_rate": 0.0002406093310420287,
      "loss": 2.9223,
      "step": 129817
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0239901542663574,
      "learning_rate": 0.00024060532144454765,
      "loss": 3.0219,
      "step": 129818
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4275286197662354,
      "learning_rate": 0.00024060131185810914,
      "loss": 3.0824,
      "step": 129819
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8211382627487183,
      "learning_rate": 0.00024059730228271397,
      "loss": 2.92,
      "step": 129820
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.209522247314453,
      "learning_rate": 0.00024059329271836294,
      "loss": 3.2845,
      "step": 129821
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3409762382507324,
      "learning_rate": 0.00024058928316505665,
      "loss": 3.007,
      "step": 129822
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.953359603881836,
      "learning_rate": 0.00024058527362279594,
      "loss": 2.8508,
      "step": 129823
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.045280694961548,
      "learning_rate": 0.00024058126409158158,
      "loss": 3.2075,
      "step": 129824
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9542722702026367,
      "learning_rate": 0.00024057725457141433,
      "loss": 2.8187,
      "step": 129825
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.4279212951660156,
      "learning_rate": 0.00024057324506229484,
      "loss": 2.9461,
      "step": 129826
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.334338665008545,
      "learning_rate": 0.00024056923556422394,
      "loss": 3.0891,
      "step": 129827
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9475196599960327,
      "learning_rate": 0.00024056522607720233,
      "loss": 3.1109,
      "step": 129828
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1188933849334717,
      "learning_rate": 0.00024056121660123072,
      "loss": 2.7042,
      "step": 129829
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.1233835220336914,
      "learning_rate": 0.00024055720713630992,
      "loss": 3.0493,
      "step": 129830
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4127838611602783,
      "learning_rate": 0.00024055319768244063,
      "loss": 3.0946,
      "step": 129831
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8767304420471191,
      "learning_rate": 0.00024054918823962365,
      "loss": 2.978,
      "step": 129832
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.7256522178649902,
      "learning_rate": 0.00024054517880785982,
      "loss": 3.4095,
      "step": 129833
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1464319229125977,
      "learning_rate": 0.00024054116938714964,
      "loss": 3.0567,
      "step": 129834
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8185288906097412,
      "learning_rate": 0.00024053715997749396,
      "loss": 3.0532,
      "step": 129835
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.5131866931915283,
      "learning_rate": 0.00024053315057889356,
      "loss": 2.5361,
      "step": 129836
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3257579803466797,
      "learning_rate": 0.00024052914119134918,
      "loss": 2.8012,
      "step": 129837
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.937917709350586,
      "learning_rate": 0.00024052513181486155,
      "loss": 2.8522,
      "step": 129838
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.571162700653076,
      "learning_rate": 0.00024052112244943153,
      "loss": 2.7789,
      "step": 129839
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.902313232421875,
      "learning_rate": 0.00024051711309505967,
      "loss": 2.9005,
      "step": 129840
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8730840682983398,
      "learning_rate": 0.00024051310375174676,
      "loss": 2.9837,
      "step": 129841
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.6124260425567627,
      "learning_rate": 0.0002405090944194936,
      "loss": 3.107,
      "step": 129842
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.376734972000122,
      "learning_rate": 0.0002405050850983009,
      "loss": 2.8709,
      "step": 129843
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.289497137069702,
      "learning_rate": 0.00024050107578816948,
      "loss": 3.015,
      "step": 129844
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1762218475341797,
      "learning_rate": 0.0002404970664891001,
      "loss": 3.0165,
      "step": 129845
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.490297555923462,
      "learning_rate": 0.00024049305720109335,
      "loss": 2.835,
      "step": 129846
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0695362091064453,
      "learning_rate": 0.00024048904792415003,
      "loss": 2.9884,
      "step": 129847
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.294651746749878,
      "learning_rate": 0.000240485038658271,
      "loss": 3.0394,
      "step": 129848
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0757601261138916,
      "learning_rate": 0.00024048102940345685,
      "loss": 2.9042,
      "step": 129849
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.029309034347534,
      "learning_rate": 0.0002404770201597084,
      "loss": 3.0942,
      "step": 129850
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2745656967163086,
      "learning_rate": 0.00024047301092702645,
      "loss": 2.7276,
      "step": 129851
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8242366313934326,
      "learning_rate": 0.0002404690017054118,
      "loss": 2.9221,
      "step": 129852
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.2776122093200684,
      "learning_rate": 0.00024046499249486493,
      "loss": 3.0825,
      "step": 129853
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6999130249023438,
      "learning_rate": 0.00024046098329538672,
      "loss": 3.1191,
      "step": 129854
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8404977321624756,
      "learning_rate": 0.000240456974106978,
      "loss": 2.9123,
      "step": 129855
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.7089436054229736,
      "learning_rate": 0.0002404529649296394,
      "loss": 2.8027,
      "step": 129856
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.7691082954406738,
      "learning_rate": 0.00024044895576337175,
      "loss": 2.903,
      "step": 129857
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2539145946502686,
      "learning_rate": 0.00024044494660817588,
      "loss": 2.915,
      "step": 129858
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.152681827545166,
      "learning_rate": 0.0002404409374640523,
      "loss": 2.938,
      "step": 129859
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.7860420942306519,
      "learning_rate": 0.00024043692833100185,
      "loss": 2.976,
      "step": 129860
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0928328037261963,
      "learning_rate": 0.00024043291920902533,
      "loss": 3.1304,
      "step": 129861
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.874995470046997,
      "learning_rate": 0.00024042891009812346,
      "loss": 2.6587,
      "step": 129862
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.895276427268982,
      "learning_rate": 0.00024042490099829695,
      "loss": 2.9581,
      "step": 129863
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1044182777404785,
      "learning_rate": 0.00024042089190954669,
      "loss": 3.1062,
      "step": 129864
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.896383285522461,
      "learning_rate": 0.00024041688283187322,
      "loss": 3.0037,
      "step": 129865
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1651804447174072,
      "learning_rate": 0.00024041287376527735,
      "loss": 3.2341,
      "step": 129866
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.05018949508667,
      "learning_rate": 0.00024040886470975988,
      "loss": 2.7151,
      "step": 129867
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9719256162643433,
      "learning_rate": 0.0002404048556653215,
      "loss": 3.1268,
      "step": 129868
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0518946647644043,
      "learning_rate": 0.000240400846631963,
      "loss": 2.8451,
      "step": 129869
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.079730987548828,
      "learning_rate": 0.00024039683760968526,
      "loss": 2.8513,
      "step": 129870
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.5323991775512695,
      "learning_rate": 0.00024039282859848873,
      "loss": 2.7879,
      "step": 129871
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0193874835968018,
      "learning_rate": 0.00024038881959837424,
      "loss": 2.9818,
      "step": 129872
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0488526821136475,
      "learning_rate": 0.00024038481060934265,
      "loss": 3.0043,
      "step": 129873
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.263674259185791,
      "learning_rate": 0.00024038080163139465,
      "loss": 2.9962,
      "step": 129874
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3083040714263916,
      "learning_rate": 0.00024037679266453098,
      "loss": 2.9027,
      "step": 129875
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.04199481010437,
      "learning_rate": 0.0002403727837087525,
      "loss": 3.0882,
      "step": 129876
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.124370574951172,
      "learning_rate": 0.0002403687747640597,
      "loss": 3.0026,
      "step": 129877
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0565576553344727,
      "learning_rate": 0.0002403647658304535,
      "loss": 3.1453,
      "step": 129878
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3501877784729004,
      "learning_rate": 0.0002403607569079346,
      "loss": 2.9418,
      "step": 129879
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.817665457725525,
      "learning_rate": 0.0002403567479965038,
      "loss": 3.0466,
      "step": 129880
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.326568126678467,
      "learning_rate": 0.00024035273909616176,
      "loss": 2.8245,
      "step": 129881
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.376302480697632,
      "learning_rate": 0.00024034873020690943,
      "loss": 3.1125,
      "step": 129882
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4639008045196533,
      "learning_rate": 0.00024034472132874725,
      "loss": 2.8651,
      "step": 129883
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2987759113311768,
      "learning_rate": 0.00024034071246167616,
      "loss": 2.9887,
      "step": 129884
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.869357943534851,
      "learning_rate": 0.0002403367036056968,
      "loss": 3.0106,
      "step": 129885
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.38110613822937,
      "learning_rate": 0.00024033269476080997,
      "loss": 2.9725,
      "step": 129886
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.256943464279175,
      "learning_rate": 0.00024032868592701646,
      "loss": 2.9064,
      "step": 129887
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8412567377090454,
      "learning_rate": 0.00024032467710431707,
      "loss": 2.814,
      "step": 129888
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6350791454315186,
      "learning_rate": 0.00024032066829271232,
      "loss": 2.8561,
      "step": 129889
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.758116602897644,
      "learning_rate": 0.0002403166594922031,
      "loss": 2.9339,
      "step": 129890
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9780625104904175,
      "learning_rate": 0.00024031265070279017,
      "loss": 3.2039,
      "step": 129891
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.159531593322754,
      "learning_rate": 0.0002403086419244742,
      "loss": 3.4045,
      "step": 129892
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0250868797302246,
      "learning_rate": 0.00024030463315725598,
      "loss": 2.7465,
      "step": 129893
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.7500550746917725,
      "learning_rate": 0.00024030062440113633,
      "loss": 2.9864,
      "step": 129894
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2764759063720703,
      "learning_rate": 0.0002402966156561159,
      "loss": 2.7689,
      "step": 129895
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0704619884490967,
      "learning_rate": 0.00024029260692219544,
      "loss": 2.975,
      "step": 129896
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8644014596939087,
      "learning_rate": 0.00024028859819937568,
      "loss": 3.0422,
      "step": 129897
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2806742191314697,
      "learning_rate": 0.0002402845894876574,
      "loss": 2.8444,
      "step": 129898
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.0174784660339355,
      "learning_rate": 0.00024028058078704137,
      "loss": 3.0273,
      "step": 129899
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.29978609085083,
      "learning_rate": 0.00024027657209752833,
      "loss": 3.2178,
      "step": 129900
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9561151266098022,
      "learning_rate": 0.00024027256341911893,
      "loss": 2.7402,
      "step": 129901
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.5381417274475098,
      "learning_rate": 0.00024026855475181404,
      "loss": 2.9789,
      "step": 129902
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0932018756866455,
      "learning_rate": 0.00024026454609561438,
      "loss": 3.1144,
      "step": 129903
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.807542562484741,
      "learning_rate": 0.00024026053745052062,
      "loss": 3.0033,
      "step": 129904
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6216585636138916,
      "learning_rate": 0.00024025652881653353,
      "loss": 3.0129,
      "step": 129905
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.684197187423706,
      "learning_rate": 0.00024025252019365396,
      "loss": 3.0193,
      "step": 129906
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.5187127590179443,
      "learning_rate": 0.0002402485115818825,
      "loss": 2.9282,
      "step": 129907
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.940866470336914,
      "learning_rate": 0.00024024450298122,
      "loss": 2.9042,
      "step": 129908
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9062669277191162,
      "learning_rate": 0.00024024049439166716,
      "loss": 2.8748,
      "step": 129909
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.62949275970459,
      "learning_rate": 0.00024023648581322478,
      "loss": 2.8623,
      "step": 129910
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.37580943107605,
      "learning_rate": 0.00024023247724589352,
      "loss": 2.9289,
      "step": 129911
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.07741117477417,
      "learning_rate": 0.00024022846868967425,
      "loss": 2.8652,
      "step": 129912
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.6953002214431763,
      "learning_rate": 0.00024022446014456753,
      "loss": 2.9941,
      "step": 129913
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.441638231277466,
      "learning_rate": 0.00024022045161057427,
      "loss": 3.0824,
      "step": 129914
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.745595932006836,
      "learning_rate": 0.0002402164430876951,
      "loss": 2.8708,
      "step": 129915
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.970841407775879,
      "learning_rate": 0.00024021243457593086,
      "loss": 2.987,
      "step": 129916
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9879512786865234,
      "learning_rate": 0.00024020842607528232,
      "loss": 2.7724,
      "step": 129917
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.298161268234253,
      "learning_rate": 0.00024020441758575007,
      "loss": 3.1789,
      "step": 129918
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.894200325012207,
      "learning_rate": 0.00024020040910733504,
      "loss": 2.9462,
      "step": 129919
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0631890296936035,
      "learning_rate": 0.00024019640064003778,
      "loss": 3.2136,
      "step": 129920
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.031298875808716,
      "learning_rate": 0.00024019239218385917,
      "loss": 2.869,
      "step": 129921
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2115464210510254,
      "learning_rate": 0.00024018838373879992,
      "loss": 2.8101,
      "step": 129922
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9533270597457886,
      "learning_rate": 0.0002401843753048608,
      "loss": 2.857,
      "step": 129923
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.011997699737549,
      "learning_rate": 0.00024018036688204256,
      "loss": 2.9606,
      "step": 129924
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9872076511383057,
      "learning_rate": 0.00024017635847034595,
      "loss": 2.9312,
      "step": 129925
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1032867431640625,
      "learning_rate": 0.0002401723500697716,
      "loss": 3.1047,
      "step": 129926
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1531190872192383,
      "learning_rate": 0.00024016834168032034,
      "loss": 2.8742,
      "step": 129927
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3088483810424805,
      "learning_rate": 0.0002401643333019929,
      "loss": 3.2323,
      "step": 129928
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0179316997528076,
      "learning_rate": 0.00024016032493479004,
      "loss": 3.0848,
      "step": 129929
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.309835433959961,
      "learning_rate": 0.00024015631657871251,
      "loss": 3.2147,
      "step": 129930
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.078280448913574,
      "learning_rate": 0.00024015230823376123,
      "loss": 3.0164,
      "step": 129931
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.993011474609375,
      "learning_rate": 0.00024014829989993657,
      "loss": 3.0025,
      "step": 129932
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.937082290649414,
      "learning_rate": 0.00024014429157723954,
      "loss": 2.8118,
      "step": 129933
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9776111841201782,
      "learning_rate": 0.00024014028326567075,
      "loss": 3.097,
      "step": 129934
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9943010807037354,
      "learning_rate": 0.00024013627496523104,
      "loss": 3.0112,
      "step": 129935
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.6661603450775146,
      "learning_rate": 0.00024013226667592114,
      "loss": 2.995,
      "step": 129936
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8854758739471436,
      "learning_rate": 0.00024012825839774186,
      "loss": 2.851,
      "step": 129937
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9524458646774292,
      "learning_rate": 0.0002401242501306938,
      "loss": 2.9999,
      "step": 129938
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.051774263381958,
      "learning_rate": 0.00024012024187477772,
      "loss": 2.7214,
      "step": 129939
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.103813409805298,
      "learning_rate": 0.00024011623362999444,
      "loss": 3.1582,
      "step": 129940
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.883684754371643,
      "learning_rate": 0.00024011222539634472,
      "loss": 3.2216,
      "step": 129941
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.181666374206543,
      "learning_rate": 0.0002401082171738292,
      "loss": 3.1672,
      "step": 129942
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.315117359161377,
      "learning_rate": 0.00024010420896244884,
      "loss": 2.9852,
      "step": 129943
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.431204319000244,
      "learning_rate": 0.00024010020076220415,
      "loss": 2.9504,
      "step": 129944
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4053943157196045,
      "learning_rate": 0.00024009619257309592,
      "loss": 2.9509,
      "step": 129945
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1848604679107666,
      "learning_rate": 0.00024009218439512497,
      "loss": 2.8592,
      "step": 129946
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2506537437438965,
      "learning_rate": 0.000240088176228292,
      "loss": 3.1443,
      "step": 129947
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.167255163192749,
      "learning_rate": 0.00024008416807259775,
      "loss": 2.9592,
      "step": 129948
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.7618799209594727,
      "learning_rate": 0.00024008015992804313,
      "loss": 3.0421,
      "step": 129949
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.40925669670105,
      "learning_rate": 0.00024007615179462863,
      "loss": 3.0157,
      "step": 129950
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9058817625045776,
      "learning_rate": 0.00024007214367235507,
      "loss": 2.9295,
      "step": 129951
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0855844020843506,
      "learning_rate": 0.00024006813556122326,
      "loss": 2.6422,
      "step": 129952
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.825416922569275,
      "learning_rate": 0.00024006412746123388,
      "loss": 2.894,
      "step": 129953
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9819480180740356,
      "learning_rate": 0.00024006011937238774,
      "loss": 3.1067,
      "step": 129954
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4575252532958984,
      "learning_rate": 0.00024005611129468567,
      "loss": 3.0107,
      "step": 129955
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0100269317626953,
      "learning_rate": 0.00024005210322812818,
      "loss": 2.8695,
      "step": 129956
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8972845077514648,
      "learning_rate": 0.00024004809517271612,
      "loss": 3.0981,
      "step": 129957
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.150428056716919,
      "learning_rate": 0.00024004408712845028,
      "loss": 2.8634,
      "step": 129958
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.6528136730194092,
      "learning_rate": 0.00024004007909533134,
      "loss": 3.1515,
      "step": 129959
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9954705238342285,
      "learning_rate": 0.0002400360710733601,
      "loss": 2.8439,
      "step": 129960
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.7405436038970947,
      "learning_rate": 0.00024003206306253742,
      "loss": 2.804,
      "step": 129961
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.165212631225586,
      "learning_rate": 0.0002400280550628638,
      "loss": 3.1063,
      "step": 129962
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0725574493408203,
      "learning_rate": 0.00024002404707434004,
      "loss": 2.9832,
      "step": 129963
    },
    {
      "epoch": 1.69,
      "grad_norm": 5.643299579620361,
      "learning_rate": 0.00024002003909696698,
      "loss": 2.8871,
      "step": 129964
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.306177854537964,
      "learning_rate": 0.00024001603113074535,
      "loss": 2.6039,
      "step": 129965
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8953417539596558,
      "learning_rate": 0.00024001202317567582,
      "loss": 3.0169,
      "step": 129966
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.501370429992676,
      "learning_rate": 0.00024000801523175935,
      "loss": 2.8836,
      "step": 129967
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.955423593521118,
      "learning_rate": 0.00024000400729899636,
      "loss": 2.8776,
      "step": 129968
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9163100719451904,
      "learning_rate": 0.0002399999993773878,
      "loss": 2.7801,
      "step": 129969
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9578419923782349,
      "learning_rate": 0.00023999599146693436,
      "loss": 3.175,
      "step": 129970
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.872494697570801,
      "learning_rate": 0.00023999198356763676,
      "loss": 2.9847,
      "step": 129971
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.9844372272491455,
      "learning_rate": 0.00023998797567949585,
      "loss": 3.0363,
      "step": 129972
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.897040605545044,
      "learning_rate": 0.00023998396780251237,
      "loss": 3.1103,
      "step": 129973
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2823808193206787,
      "learning_rate": 0.00023997995993668692,
      "loss": 2.846,
      "step": 129974
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.122884511947632,
      "learning_rate": 0.0002399759520820203,
      "loss": 2.8853,
      "step": 129975
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.7613214254379272,
      "learning_rate": 0.00023997194423851332,
      "loss": 3.1139,
      "step": 129976
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.922837257385254,
      "learning_rate": 0.00023996793640616664,
      "loss": 2.9886,
      "step": 129977
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8071428537368774,
      "learning_rate": 0.0002399639285849811,
      "loss": 3.122,
      "step": 129978
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.163348913192749,
      "learning_rate": 0.00023995992077495743,
      "loss": 3.1905,
      "step": 129979
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2537782192230225,
      "learning_rate": 0.0002399559129760964,
      "loss": 2.9424,
      "step": 129980
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.37290096282959,
      "learning_rate": 0.00023995190518839857,
      "loss": 2.9431,
      "step": 129981
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2549989223480225,
      "learning_rate": 0.00023994789741186483,
      "loss": 2.9772,
      "step": 129982
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.088874340057373,
      "learning_rate": 0.00023994388964649593,
      "loss": 3.1522,
      "step": 129983
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8835688829421997,
      "learning_rate": 0.00023993988189229259,
      "loss": 2.9238,
      "step": 129984
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.987669587135315,
      "learning_rate": 0.00023993587414925554,
      "loss": 2.9145,
      "step": 129985
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.7172203063964844,
      "learning_rate": 0.00023993186641738562,
      "loss": 2.9735,
      "step": 129986
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9419432878494263,
      "learning_rate": 0.00023992785869668347,
      "loss": 3.0551,
      "step": 129987
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1312191486358643,
      "learning_rate": 0.00023992385098714985,
      "loss": 3.168,
      "step": 129988
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4027578830718994,
      "learning_rate": 0.00023991984328878549,
      "loss": 2.8728,
      "step": 129989
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.5008976459503174,
      "learning_rate": 0.0002399158356015912,
      "loss": 2.6936,
      "step": 129990
    },
    {
      "epoch": 1.69,
      "grad_norm": 4.334393501281738,
      "learning_rate": 0.00023991182792556764,
      "loss": 3.0237,
      "step": 129991
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.952223300933838,
      "learning_rate": 0.0002399078202607157,
      "loss": 3.106,
      "step": 129992
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.912570834159851,
      "learning_rate": 0.000239903812607036,
      "loss": 2.8107,
      "step": 129993
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8918671607971191,
      "learning_rate": 0.00023989980496452927,
      "loss": 2.985,
      "step": 129994
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0392813682556152,
      "learning_rate": 0.00023989579733319638,
      "loss": 3.239,
      "step": 129995
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0539677143096924,
      "learning_rate": 0.0002398917897130379,
      "loss": 3.1806,
      "step": 129996
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.113494873046875,
      "learning_rate": 0.0002398877821040547,
      "loss": 3.0577,
      "step": 129997
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.0350944995880127,
      "learning_rate": 0.00023988377450624755,
      "loss": 2.7982,
      "step": 129998
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9739480018615723,
      "learning_rate": 0.00023987976691961708,
      "loss": 2.9602,
      "step": 129999
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6278045177459717,
      "learning_rate": 0.00023987575934416412,
      "loss": 3.117,
      "step": 130000
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3305044174194336,
      "learning_rate": 0.00023987175177988935,
      "loss": 3.0459,
      "step": 130001
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.136422872543335,
      "learning_rate": 0.00023986774422679364,
      "loss": 3.0534,
      "step": 130002
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6582510471343994,
      "learning_rate": 0.0002398637366848776,
      "loss": 3.0463,
      "step": 130003
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.873828411102295,
      "learning_rate": 0.00023985972915414208,
      "loss": 3.0364,
      "step": 130004
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.1812171936035156,
      "learning_rate": 0.0002398557216345877,
      "loss": 3.0826,
      "step": 130005
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.326488971710205,
      "learning_rate": 0.00023985171412621527,
      "loss": 3.0837,
      "step": 130006
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.342440128326416,
      "learning_rate": 0.00023984770662902554,
      "loss": 3.0587,
      "step": 130007
    },
    {
      "epoch": 1.69,
      "grad_norm": 5.482456207275391,
      "learning_rate": 0.0002398436991430193,
      "loss": 2.9304,
      "step": 130008
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.711841344833374,
      "learning_rate": 0.0002398396916681973,
      "loss": 2.9445,
      "step": 130009
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.200159788131714,
      "learning_rate": 0.00023983568420456018,
      "loss": 2.9199,
      "step": 130010
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9249975681304932,
      "learning_rate": 0.00023983167675210873,
      "loss": 3.0547,
      "step": 130011
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1726558208465576,
      "learning_rate": 0.0002398276693108437,
      "loss": 2.8902,
      "step": 130012
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.9207797050476074,
      "learning_rate": 0.00023982366188076586,
      "loss": 3.0264,
      "step": 130013
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2639477252960205,
      "learning_rate": 0.0002398196544618759,
      "loss": 2.9524,
      "step": 130014
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8433232307434082,
      "learning_rate": 0.0002398156470541746,
      "loss": 3.0197,
      "step": 130015
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0504391193389893,
      "learning_rate": 0.0002398116396576629,
      "loss": 2.989,
      "step": 130016
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.340651512145996,
      "learning_rate": 0.00023980763227234118,
      "loss": 3.0045,
      "step": 130017
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9108961820602417,
      "learning_rate": 0.00023980362489821035,
      "loss": 3.289,
      "step": 130018
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2607905864715576,
      "learning_rate": 0.00023979961753527117,
      "loss": 2.9362,
      "step": 130019
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4332401752471924,
      "learning_rate": 0.00023979561018352434,
      "loss": 2.8486,
      "step": 130020
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.324445962905884,
      "learning_rate": 0.0002397916028429707,
      "loss": 2.8981,
      "step": 130021
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.108557939529419,
      "learning_rate": 0.00023978759551361107,
      "loss": 3.1383,
      "step": 130022
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.356966972351074,
      "learning_rate": 0.0002397835881954459,
      "loss": 2.9509,
      "step": 130023
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1645400524139404,
      "learning_rate": 0.0002397795808884761,
      "loss": 3.1263,
      "step": 130024
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0225601196289062,
      "learning_rate": 0.00023977557359270243,
      "loss": 3.0494,
      "step": 130025
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1927900314331055,
      "learning_rate": 0.00023977156630812562,
      "loss": 3.1557,
      "step": 130026
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.607856035232544,
      "learning_rate": 0.0002397675590347464,
      "loss": 3.3958,
      "step": 130027
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9440810680389404,
      "learning_rate": 0.00023976355177256564,
      "loss": 2.9552,
      "step": 130028
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.255464553833008,
      "learning_rate": 0.00023975954452158386,
      "loss": 3.0965,
      "step": 130029
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1630513668060303,
      "learning_rate": 0.00023975553728180195,
      "loss": 3.0696,
      "step": 130030
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.035335063934326,
      "learning_rate": 0.00023975153005322057,
      "loss": 2.6967,
      "step": 130031
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.637289524078369,
      "learning_rate": 0.00023974752283584052,
      "loss": 2.9661,
      "step": 130032
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.766188859939575,
      "learning_rate": 0.00023974351562966258,
      "loss": 2.9432,
      "step": 130033
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2611918449401855,
      "learning_rate": 0.00023973950843468757,
      "loss": 3.1546,
      "step": 130034
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.5171384811401367,
      "learning_rate": 0.000239735501250916,
      "loss": 2.8009,
      "step": 130035
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.364596128463745,
      "learning_rate": 0.00023973149407834874,
      "loss": 2.9514,
      "step": 130036
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.012338638305664,
      "learning_rate": 0.0002397274869169865,
      "loss": 3.0758,
      "step": 130037
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.181713342666626,
      "learning_rate": 0.0002397234797668301,
      "loss": 3.0305,
      "step": 130038
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.9140446186065674,
      "learning_rate": 0.00023971947262788022,
      "loss": 2.9393,
      "step": 130039
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1943466663360596,
      "learning_rate": 0.00023971546550013775,
      "loss": 3.0109,
      "step": 130040
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3377106189727783,
      "learning_rate": 0.0002397114583836032,
      "loss": 2.7427,
      "step": 130041
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8539549112319946,
      "learning_rate": 0.00023970745127827742,
      "loss": 2.9603,
      "step": 130042
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.209547996520996,
      "learning_rate": 0.00023970344418416115,
      "loss": 3.1805,
      "step": 130043
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1021013259887695,
      "learning_rate": 0.00023969943710125518,
      "loss": 3.0852,
      "step": 130044
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.021904706954956,
      "learning_rate": 0.00023969543002956022,
      "loss": 2.9405,
      "step": 130045
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9655135869979858,
      "learning_rate": 0.00023969142296907713,
      "loss": 3.0618,
      "step": 130046
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.917143702507019,
      "learning_rate": 0.0002396874159198064,
      "loss": 3.1928,
      "step": 130047
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2990527153015137,
      "learning_rate": 0.00023968340888174894,
      "loss": 2.9218,
      "step": 130048
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8770954608917236,
      "learning_rate": 0.00023967940185490547,
      "loss": 2.926,
      "step": 130049
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.923728346824646,
      "learning_rate": 0.00023967539483927674,
      "loss": 2.9564,
      "step": 130050
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3653907775878906,
      "learning_rate": 0.00023967138783486348,
      "loss": 2.8851,
      "step": 130051
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.19915509223938,
      "learning_rate": 0.00023966738084166647,
      "loss": 2.8721,
      "step": 130052
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2383430004119873,
      "learning_rate": 0.00023966337385968655,
      "loss": 3.2685,
      "step": 130053
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8735405206680298,
      "learning_rate": 0.0002396593668889242,
      "loss": 3.0768,
      "step": 130054
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.296137571334839,
      "learning_rate": 0.00023965535992938038,
      "loss": 2.913,
      "step": 130055
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1077959537506104,
      "learning_rate": 0.0002396513529810557,
      "loss": 2.817,
      "step": 130056
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.62426495552063,
      "learning_rate": 0.00023964734604395096,
      "loss": 2.9545,
      "step": 130057
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8880712985992432,
      "learning_rate": 0.00023964333911806696,
      "loss": 3.0425,
      "step": 130058
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.952652931213379,
      "learning_rate": 0.00023963933220340456,
      "loss": 3.0758,
      "step": 130059
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.087653636932373,
      "learning_rate": 0.00023963532529996414,
      "loss": 3.0849,
      "step": 130060
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.8166794776916504,
      "learning_rate": 0.00023963131840774667,
      "loss": 2.8468,
      "step": 130061
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.5389950275421143,
      "learning_rate": 0.00023962731152675295,
      "loss": 2.914,
      "step": 130062
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.241570234298706,
      "learning_rate": 0.00023962330465698357,
      "loss": 2.8708,
      "step": 130063
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.109450578689575,
      "learning_rate": 0.00023961929779843942,
      "loss": 3.1582,
      "step": 130064
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3679072856903076,
      "learning_rate": 0.00023961529095112126,
      "loss": 3.0686,
      "step": 130065
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2565436363220215,
      "learning_rate": 0.00023961128411502963,
      "loss": 2.9158,
      "step": 130066
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4094908237457275,
      "learning_rate": 0.00023960727729016544,
      "loss": 3.0588,
      "step": 130067
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.5285255908966064,
      "learning_rate": 0.00023960327047652937,
      "loss": 2.9868,
      "step": 130068
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2847068309783936,
      "learning_rate": 0.0002395992636741222,
      "loss": 3.2771,
      "step": 130069
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1833832263946533,
      "learning_rate": 0.00023959525688294466,
      "loss": 2.8922,
      "step": 130070
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6674461364746094,
      "learning_rate": 0.00023959125010299755,
      "loss": 2.871,
      "step": 130071
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.537114143371582,
      "learning_rate": 0.0002395872433342816,
      "loss": 3.0479,
      "step": 130072
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.502533197402954,
      "learning_rate": 0.00023958323657679749,
      "loss": 2.849,
      "step": 130073
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.9652116298675537,
      "learning_rate": 0.00023957922983054592,
      "loss": 2.8803,
      "step": 130074
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9127169847488403,
      "learning_rate": 0.00023957522309552777,
      "loss": 3.2689,
      "step": 130075
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9694058895111084,
      "learning_rate": 0.0002395712163717437,
      "loss": 3.2524,
      "step": 130076
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.5456125736236572,
      "learning_rate": 0.00023956720965919454,
      "loss": 3.0057,
      "step": 130077
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.836313009262085,
      "learning_rate": 0.0002395632029578809,
      "loss": 2.9811,
      "step": 130078
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4758336544036865,
      "learning_rate": 0.00023955919626780371,
      "loss": 2.8717,
      "step": 130079
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.132439613342285,
      "learning_rate": 0.0002395551895889635,
      "loss": 3.3783,
      "step": 130080
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.51714825630188,
      "learning_rate": 0.00023955118292136115,
      "loss": 3.0639,
      "step": 130081
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0175931453704834,
      "learning_rate": 0.00023954717626499736,
      "loss": 3.0205,
      "step": 130082
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.009796142578125,
      "learning_rate": 0.00023954316961987294,
      "loss": 2.8861,
      "step": 130083
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1797935962677,
      "learning_rate": 0.00023953916298598857,
      "loss": 2.9198,
      "step": 130084
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0730321407318115,
      "learning_rate": 0.000239535156363345,
      "loss": 3.0752,
      "step": 130085
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.8590848445892334,
      "learning_rate": 0.00023953114975194302,
      "loss": 2.8257,
      "step": 130086
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8031582832336426,
      "learning_rate": 0.00023952714315178327,
      "loss": 2.7489,
      "step": 130087
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6076107025146484,
      "learning_rate": 0.00023952313656286662,
      "loss": 2.7974,
      "step": 130088
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1889846324920654,
      "learning_rate": 0.00023951912998519377,
      "loss": 2.9918,
      "step": 130089
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.23134708404541,
      "learning_rate": 0.00023951512341876543,
      "loss": 2.7259,
      "step": 130090
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0074117183685303,
      "learning_rate": 0.00023951111686358232,
      "loss": 3.1213,
      "step": 130091
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8996777534484863,
      "learning_rate": 0.0002395071103196453,
      "loss": 2.905,
      "step": 130092
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8758518695831299,
      "learning_rate": 0.000239503103786955,
      "loss": 2.9933,
      "step": 130093
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.235816717147827,
      "learning_rate": 0.00023949909726551227,
      "loss": 2.981,
      "step": 130094
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.992933988571167,
      "learning_rate": 0.00023949509075531784,
      "loss": 2.8946,
      "step": 130095
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.522089958190918,
      "learning_rate": 0.00023949108425637235,
      "loss": 2.9593,
      "step": 130096
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1460518836975098,
      "learning_rate": 0.0002394870777686766,
      "loss": 3.1002,
      "step": 130097
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2545061111450195,
      "learning_rate": 0.0002394830712922313,
      "loss": 3.0716,
      "step": 130098
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.306331157684326,
      "learning_rate": 0.0002394790648270373,
      "loss": 2.6783,
      "step": 130099
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.8391411304473877,
      "learning_rate": 0.00023947505837309526,
      "loss": 3.0208,
      "step": 130100
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.022956371307373,
      "learning_rate": 0.0002394710519304061,
      "loss": 2.9721,
      "step": 130101
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.3285672664642334,
      "learning_rate": 0.00023946704549897025,
      "loss": 2.8703,
      "step": 130102
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.702847957611084,
      "learning_rate": 0.00023946303907878863,
      "loss": 2.989,
      "step": 130103
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.811610698699951,
      "learning_rate": 0.00023945903266986199,
      "loss": 2.8822,
      "step": 130104
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8063920736312866,
      "learning_rate": 0.00023945502627219104,
      "loss": 3.3284,
      "step": 130105
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.721388578414917,
      "learning_rate": 0.00023945101988577655,
      "loss": 2.6408,
      "step": 130106
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.648106336593628,
      "learning_rate": 0.00023944701351061938,
      "loss": 2.9307,
      "step": 130107
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0500574111938477,
      "learning_rate": 0.00023944300714672006,
      "loss": 3.0247,
      "step": 130108
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.288372278213501,
      "learning_rate": 0.00023943900079407938,
      "loss": 3.0053,
      "step": 130109
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.7484941482543945,
      "learning_rate": 0.00023943499445269818,
      "loss": 3.3025,
      "step": 130110
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.184039831161499,
      "learning_rate": 0.00023943098812257715,
      "loss": 2.7525,
      "step": 130111
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.99288272857666,
      "learning_rate": 0.00023942698180371702,
      "loss": 2.8791,
      "step": 130112
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.132476329803467,
      "learning_rate": 0.00023942297549611868,
      "loss": 2.9009,
      "step": 130113
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.134603500366211,
      "learning_rate": 0.00023941896919978264,
      "loss": 2.9177,
      "step": 130114
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.19209361076355,
      "learning_rate": 0.00023941496291470974,
      "loss": 2.9615,
      "step": 130115
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.708636999130249,
      "learning_rate": 0.00023941095664090076,
      "loss": 2.9759,
      "step": 130116
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.091219663619995,
      "learning_rate": 0.00023940695037835642,
      "loss": 2.972,
      "step": 130117
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.9962143898010254,
      "learning_rate": 0.00023940294412707746,
      "loss": 3.3524,
      "step": 130118
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.6823623180389404,
      "learning_rate": 0.00023939893788706466,
      "loss": 2.9831,
      "step": 130119
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.771531581878662,
      "learning_rate": 0.00023939493165831886,
      "loss": 3.0059,
      "step": 130120
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.442060947418213,
      "learning_rate": 0.00023939092544084055,
      "loss": 3.0182,
      "step": 130121
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.7626203298568726,
      "learning_rate": 0.00023938691923463062,
      "loss": 2.9022,
      "step": 130122
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9427075386047363,
      "learning_rate": 0.00023938291303968979,
      "loss": 2.8889,
      "step": 130123
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9008930921554565,
      "learning_rate": 0.00023937890685601885,
      "loss": 3.0015,
      "step": 130124
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0096702575683594,
      "learning_rate": 0.0002393749006836185,
      "loss": 3.0456,
      "step": 130125
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.1923105716705322,
      "learning_rate": 0.00023937089452248964,
      "loss": 2.9377,
      "step": 130126
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.026954412460327,
      "learning_rate": 0.00023936688837263273,
      "loss": 2.9287,
      "step": 130127
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.7657352685928345,
      "learning_rate": 0.00023936288223404866,
      "loss": 3.0797,
      "step": 130128
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.027923345565796,
      "learning_rate": 0.00023935887610673816,
      "loss": 3.0118,
      "step": 130129
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.111400842666626,
      "learning_rate": 0.00023935486999070204,
      "loss": 3.074,
      "step": 130130
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.220175266265869,
      "learning_rate": 0.00023935086388594094,
      "loss": 2.9553,
      "step": 130131
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.5043554306030273,
      "learning_rate": 0.00023934685779245584,
      "loss": 2.8308,
      "step": 130132
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9064362049102783,
      "learning_rate": 0.00023934285171024714,
      "loss": 3.038,
      "step": 130133
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6623010635375977,
      "learning_rate": 0.00023933884563931575,
      "loss": 2.9977,
      "step": 130134
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1004812717437744,
      "learning_rate": 0.00023933483957966242,
      "loss": 3.0243,
      "step": 130135
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.957451581954956,
      "learning_rate": 0.00023933083353128788,
      "loss": 3.0217,
      "step": 130136
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.269026756286621,
      "learning_rate": 0.0002393268274941929,
      "loss": 2.9508,
      "step": 130137
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.7325263023376465,
      "learning_rate": 0.00023932282146837833,
      "loss": 2.6959,
      "step": 130138
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0280473232269287,
      "learning_rate": 0.00023931881545384467,
      "loss": 2.9237,
      "step": 130139
    },
    {
      "epoch": 1.69,
      "grad_norm": 6.23120641708374,
      "learning_rate": 0.00023931480945059278,
      "loss": 2.7919,
      "step": 130140
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4721925258636475,
      "learning_rate": 0.0002393108034586234,
      "loss": 2.9575,
      "step": 130141
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0533969402313232,
      "learning_rate": 0.00023930679747793734,
      "loss": 3.0732,
      "step": 130142
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.510666847229004,
      "learning_rate": 0.00023930279150853522,
      "loss": 3.0458,
      "step": 130143
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.8783347606658936,
      "learning_rate": 0.00023929878555041805,
      "loss": 2.8331,
      "step": 130144
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.537578582763672,
      "learning_rate": 0.00023929477960358617,
      "loss": 3.2377,
      "step": 130145
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.278008460998535,
      "learning_rate": 0.0002392907736680406,
      "loss": 3.2759,
      "step": 130146
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1470069885253906,
      "learning_rate": 0.00023928676774378203,
      "loss": 3.0644,
      "step": 130147
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.207494020462036,
      "learning_rate": 0.00023928276183081117,
      "loss": 3.0484,
      "step": 130148
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.0910444259643555,
      "learning_rate": 0.00023927875592912878,
      "loss": 3.0413,
      "step": 130149
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6838295459747314,
      "learning_rate": 0.00023927475003873575,
      "loss": 2.9206,
      "step": 130150
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.055241107940674,
      "learning_rate": 0.0002392707441596326,
      "loss": 3.1262,
      "step": 130151
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.5969581604003906,
      "learning_rate": 0.00023926673829182012,
      "loss": 2.9839,
      "step": 130152
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.599724769592285,
      "learning_rate": 0.0002392627324352991,
      "loss": 3.0658,
      "step": 130153
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.1448700428009033,
      "learning_rate": 0.00023925872659007032,
      "loss": 2.9474,
      "step": 130154
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.3562262058258057,
      "learning_rate": 0.0002392547207561345,
      "loss": 2.8241,
      "step": 130155
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.17167592048645,
      "learning_rate": 0.00023925071493349239,
      "loss": 2.7928,
      "step": 130156
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9901758432388306,
      "learning_rate": 0.00023924670912214472,
      "loss": 3.1485,
      "step": 130157
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9862874746322632,
      "learning_rate": 0.0002392427033220922,
      "loss": 2.9651,
      "step": 130158
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.2716968059539795,
      "learning_rate": 0.00023923869753333558,
      "loss": 2.9456,
      "step": 130159
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.7090296745300293,
      "learning_rate": 0.00023923469175587565,
      "loss": 3.1501,
      "step": 130160
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.9476404190063477,
      "learning_rate": 0.00023923068598971314,
      "loss": 2.8505,
      "step": 130161
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6914730072021484,
      "learning_rate": 0.00023922668023484883,
      "loss": 2.8515,
      "step": 130162
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4944589138031006,
      "learning_rate": 0.0002392226744912834,
      "loss": 3.1074,
      "step": 130163
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.083219289779663,
      "learning_rate": 0.00023921866875901767,
      "loss": 3.0215,
      "step": 130164
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.7851524353027344,
      "learning_rate": 0.00023921466303805227,
      "loss": 3.0637,
      "step": 130165
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.235438585281372,
      "learning_rate": 0.000239210657328388,
      "loss": 3.0432,
      "step": 130166
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.107757568359375,
      "learning_rate": 0.00023920665163002565,
      "loss": 2.8893,
      "step": 130167
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.9198112487792969,
      "learning_rate": 0.00023920264594296596,
      "loss": 3.3626,
      "step": 130168
    },
    {
      "epoch": 1.69,
      "grad_norm": 1.8239363431930542,
      "learning_rate": 0.00023919864026720962,
      "loss": 3.0271,
      "step": 130169
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.4170916080474854,
      "learning_rate": 0.0002391946346027574,
      "loss": 2.863,
      "step": 130170
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.09574818611145,
      "learning_rate": 0.00023919062894961008,
      "loss": 3.2034,
      "step": 130171
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.349046468734741,
      "learning_rate": 0.00023918662330776828,
      "loss": 3.2338,
      "step": 130172
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6593620777130127,
      "learning_rate": 0.0002391826176772329,
      "loss": 2.7929,
      "step": 130173
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.424227714538574,
      "learning_rate": 0.00023917861205800464,
      "loss": 2.8711,
      "step": 130174
    },
    {
      "epoch": 1.69,
      "grad_norm": 3.226449966430664,
      "learning_rate": 0.00023917460645008417,
      "loss": 3.1247,
      "step": 130175
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.6361236572265625,
      "learning_rate": 0.00023917060085347228,
      "loss": 3.0381,
      "step": 130176
    },
    {
      "epoch": 1.69,
      "grad_norm": 2.0296521186828613,
      "learning_rate": 0.00023916659526816974,
      "loss": 3.0814,
      "step": 130177
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0913689136505127,
      "learning_rate": 0.00023916258969417734,
      "loss": 3.1529,
      "step": 130178
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.126790761947632,
      "learning_rate": 0.0002391585841314957,
      "loss": 3.0103,
      "step": 130179
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5339956283569336,
      "learning_rate": 0.0002391545785801257,
      "loss": 2.8694,
      "step": 130180
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4320971965789795,
      "learning_rate": 0.0002391505730400679,
      "loss": 3.094,
      "step": 130181
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0979814529418945,
      "learning_rate": 0.00023914656751132323,
      "loss": 2.9795,
      "step": 130182
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2227869033813477,
      "learning_rate": 0.00023914256199389234,
      "loss": 3.114,
      "step": 130183
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.290804862976074,
      "learning_rate": 0.00023913855648777593,
      "loss": 3.1151,
      "step": 130184
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.055926561355591,
      "learning_rate": 0.00023913455099297488,
      "loss": 2.8932,
      "step": 130185
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.901700496673584,
      "learning_rate": 0.00023913054550948992,
      "loss": 3.14,
      "step": 130186
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2788548469543457,
      "learning_rate": 0.00023912654003732173,
      "loss": 3.0662,
      "step": 130187
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9540380239486694,
      "learning_rate": 0.00023912253457647099,
      "loss": 2.9647,
      "step": 130188
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.818806529045105,
      "learning_rate": 0.00023911852912693855,
      "loss": 3.0485,
      "step": 130189
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8969029188156128,
      "learning_rate": 0.0002391145236887251,
      "loss": 2.9159,
      "step": 130190
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8729358911514282,
      "learning_rate": 0.0002391105182618314,
      "loss": 2.9677,
      "step": 130191
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3546361923217773,
      "learning_rate": 0.00023910651284625827,
      "loss": 2.8984,
      "step": 130192
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0400304794311523,
      "learning_rate": 0.00023910250744200645,
      "loss": 2.9058,
      "step": 130193
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.744925856590271,
      "learning_rate": 0.00023909850204907653,
      "loss": 3.208,
      "step": 130194
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.0717155933380127,
      "learning_rate": 0.0002390944966674693,
      "loss": 2.9655,
      "step": 130195
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7831401824951172,
      "learning_rate": 0.0002390904912971856,
      "loss": 3.0177,
      "step": 130196
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3838937282562256,
      "learning_rate": 0.00023908648593822613,
      "loss": 3.0332,
      "step": 130197
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.865302324295044,
      "learning_rate": 0.0002390824805905916,
      "loss": 2.9491,
      "step": 130198
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.886481285095215,
      "learning_rate": 0.00023907847525428294,
      "loss": 3.2989,
      "step": 130199
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.093940019607544,
      "learning_rate": 0.0002390744699293006,
      "loss": 2.8231,
      "step": 130200
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4082417488098145,
      "learning_rate": 0.00023907046461564546,
      "loss": 2.9013,
      "step": 130201
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.120969772338867,
      "learning_rate": 0.00023906645931331827,
      "loss": 3.0332,
      "step": 130202
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9551392793655396,
      "learning_rate": 0.00023906245402231978,
      "loss": 2.8466,
      "step": 130203
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4616689682006836,
      "learning_rate": 0.00023905844874265076,
      "loss": 3.0858,
      "step": 130204
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9535868167877197,
      "learning_rate": 0.00023905444347431203,
      "loss": 2.9899,
      "step": 130205
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7808828353881836,
      "learning_rate": 0.00023905043821730406,
      "loss": 2.9963,
      "step": 130206
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9073154926300049,
      "learning_rate": 0.00023904643297162782,
      "loss": 3.1662,
      "step": 130207
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.170207977294922,
      "learning_rate": 0.000239042427737284,
      "loss": 2.7329,
      "step": 130208
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9102134704589844,
      "learning_rate": 0.0002390384225142733,
      "loss": 3.0355,
      "step": 130209
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4486324787139893,
      "learning_rate": 0.00023903441730259653,
      "loss": 3.0636,
      "step": 130210
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.856059193611145,
      "learning_rate": 0.00023903041210225455,
      "loss": 2.8132,
      "step": 130211
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0639171600341797,
      "learning_rate": 0.00023902640691324785,
      "loss": 2.9545,
      "step": 130212
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8989590406417847,
      "learning_rate": 0.0002390224017355773,
      "loss": 3.1188,
      "step": 130213
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4703104496002197,
      "learning_rate": 0.0002390183965692436,
      "loss": 2.9539,
      "step": 130214
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1576335430145264,
      "learning_rate": 0.00023901439141424754,
      "loss": 2.9427,
      "step": 130215
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.016383647918701,
      "learning_rate": 0.00023901038627058984,
      "loss": 2.9962,
      "step": 130216
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1322970390319824,
      "learning_rate": 0.0002390063811382714,
      "loss": 2.8882,
      "step": 130217
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5860657691955566,
      "learning_rate": 0.00023900237601729273,
      "loss": 2.904,
      "step": 130218
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9493454694747925,
      "learning_rate": 0.00023899837090765464,
      "loss": 2.8728,
      "step": 130219
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8474314212799072,
      "learning_rate": 0.0002389943658093579,
      "loss": 2.8918,
      "step": 130220
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.595237970352173,
      "learning_rate": 0.00023899036072240324,
      "loss": 3.0275,
      "step": 130221
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1312055587768555,
      "learning_rate": 0.00023898635564679148,
      "loss": 3.0016,
      "step": 130222
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3424229621887207,
      "learning_rate": 0.0002389823505825234,
      "loss": 3.0433,
      "step": 130223
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.146784782409668,
      "learning_rate": 0.00023897834552959955,
      "loss": 3.0354,
      "step": 130224
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.8721134662628174,
      "learning_rate": 0.00023897434048802073,
      "loss": 2.987,
      "step": 130225
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.7063896656036377,
      "learning_rate": 0.0002389703354577878,
      "loss": 2.909,
      "step": 130226
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.48610258102417,
      "learning_rate": 0.00023896633043890138,
      "loss": 2.7976,
      "step": 130227
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6548454761505127,
      "learning_rate": 0.00023896232543136228,
      "loss": 2.9871,
      "step": 130228
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.0714714527130127,
      "learning_rate": 0.0002389583204351714,
      "loss": 2.9736,
      "step": 130229
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9315094947814941,
      "learning_rate": 0.00023895431545032915,
      "loss": 2.8904,
      "step": 130230
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.620884418487549,
      "learning_rate": 0.0002389503104768365,
      "loss": 3.154,
      "step": 130231
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.470257043838501,
      "learning_rate": 0.00023894630551469405,
      "loss": 3.3262,
      "step": 130232
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.00480580329895,
      "learning_rate": 0.0002389423005639027,
      "loss": 3.0741,
      "step": 130233
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1309878826141357,
      "learning_rate": 0.0002389382956244631,
      "loss": 3.0199,
      "step": 130234
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.787520408630371,
      "learning_rate": 0.00023893429069637618,
      "loss": 2.933,
      "step": 130235
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.630751848220825,
      "learning_rate": 0.0002389302857796424,
      "loss": 2.8979,
      "step": 130236
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4659066200256348,
      "learning_rate": 0.0002389262808742626,
      "loss": 2.8684,
      "step": 130237
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4047610759735107,
      "learning_rate": 0.00023892227598023758,
      "loss": 2.8133,
      "step": 130238
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0823326110839844,
      "learning_rate": 0.00023891827109756805,
      "loss": 3.1433,
      "step": 130239
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6143667697906494,
      "learning_rate": 0.00023891426622625478,
      "loss": 2.962,
      "step": 130240
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.932990074157715,
      "learning_rate": 0.00023891026136629861,
      "loss": 3.0248,
      "step": 130241
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.0743227005004883,
      "learning_rate": 0.00023890625651770006,
      "loss": 3.1304,
      "step": 130242
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5708017349243164,
      "learning_rate": 0.00023890225168046,
      "loss": 2.8216,
      "step": 130243
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.332571506500244,
      "learning_rate": 0.00023889824685457915,
      "loss": 2.8735,
      "step": 130244
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.168199062347412,
      "learning_rate": 0.00023889424204005826,
      "loss": 3.0767,
      "step": 130245
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.8642830848693848,
      "learning_rate": 0.0002388902372368981,
      "loss": 3.0705,
      "step": 130246
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.8579697608947754,
      "learning_rate": 0.00023888623244509947,
      "loss": 2.9659,
      "step": 130247
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9848934412002563,
      "learning_rate": 0.000238882227664663,
      "loss": 3.0396,
      "step": 130248
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1220526695251465,
      "learning_rate": 0.00023887822289558948,
      "loss": 2.8796,
      "step": 130249
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.45877742767334,
      "learning_rate": 0.00023887421813787964,
      "loss": 2.9315,
      "step": 130250
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4364821910858154,
      "learning_rate": 0.0002388702133915342,
      "loss": 3.0751,
      "step": 130251
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.91306734085083,
      "learning_rate": 0.00023886620865655396,
      "loss": 2.822,
      "step": 130252
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.932767629623413,
      "learning_rate": 0.00023886220393293963,
      "loss": 2.9494,
      "step": 130253
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5520334243774414,
      "learning_rate": 0.00023885819922069205,
      "loss": 3.1398,
      "step": 130254
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.2102742195129395,
      "learning_rate": 0.00023885419451981184,
      "loss": 2.9178,
      "step": 130255
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0636515617370605,
      "learning_rate": 0.00023885018983029984,
      "loss": 2.8968,
      "step": 130256
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9120545387268066,
      "learning_rate": 0.00023884618515215662,
      "loss": 3.0842,
      "step": 130257
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8398393392562866,
      "learning_rate": 0.00023884218048538314,
      "loss": 3.2009,
      "step": 130258
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.7726263999938965,
      "learning_rate": 0.00023883817582998,
      "loss": 2.9255,
      "step": 130259
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4906537532806396,
      "learning_rate": 0.0002388341711859481,
      "loss": 3.0301,
      "step": 130260
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9831011295318604,
      "learning_rate": 0.000238830166553288,
      "loss": 3.0332,
      "step": 130261
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7373943328857422,
      "learning_rate": 0.0002388261619320005,
      "loss": 2.9321,
      "step": 130262
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.645648241043091,
      "learning_rate": 0.00023882215732208645,
      "loss": 3.1892,
      "step": 130263
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6986629962921143,
      "learning_rate": 0.00023881815272354645,
      "loss": 2.785,
      "step": 130264
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.091876745223999,
      "learning_rate": 0.00023881414813638132,
      "loss": 2.8672,
      "step": 130265
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2954461574554443,
      "learning_rate": 0.00023881014356059188,
      "loss": 3.0732,
      "step": 130266
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2450037002563477,
      "learning_rate": 0.00023880613899617865,
      "loss": 3.0322,
      "step": 130267
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.79953670501709,
      "learning_rate": 0.00023880213444314258,
      "loss": 3.2495,
      "step": 130268
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.21260142326355,
      "learning_rate": 0.00023879812990148434,
      "loss": 2.9696,
      "step": 130269
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9223155975341797,
      "learning_rate": 0.00023879412537120478,
      "loss": 3.0201,
      "step": 130270
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.198591470718384,
      "learning_rate": 0.0002387901208523044,
      "loss": 3.0229,
      "step": 130271
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.107100486755371,
      "learning_rate": 0.0002387861163447842,
      "loss": 2.8917,
      "step": 130272
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0712621212005615,
      "learning_rate": 0.00023878211184864476,
      "loss": 3.0031,
      "step": 130273
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.245734930038452,
      "learning_rate": 0.0002387781073638869,
      "loss": 2.9164,
      "step": 130274
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.22440242767334,
      "learning_rate": 0.0002387741028905113,
      "loss": 3.1105,
      "step": 130275
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1757752895355225,
      "learning_rate": 0.00023877009842851877,
      "loss": 3.1875,
      "step": 130276
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.497831106185913,
      "learning_rate": 0.00023876609397791002,
      "loss": 3.0186,
      "step": 130277
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.20479416847229,
      "learning_rate": 0.00023876208953868593,
      "loss": 3.1148,
      "step": 130278
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6067545413970947,
      "learning_rate": 0.00023875808511084705,
      "loss": 2.9785,
      "step": 130279
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3358101844787598,
      "learning_rate": 0.00023875408069439416,
      "loss": 3.0958,
      "step": 130280
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5996179580688477,
      "learning_rate": 0.00023875007628932805,
      "loss": 3.1399,
      "step": 130281
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9953116178512573,
      "learning_rate": 0.00023874607189564945,
      "loss": 3.014,
      "step": 130282
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8785536289215088,
      "learning_rate": 0.00023874206751335912,
      "loss": 2.8332,
      "step": 130283
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.133371114730835,
      "learning_rate": 0.00023873806314245793,
      "loss": 2.6132,
      "step": 130284
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2555949687957764,
      "learning_rate": 0.00023873405878294633,
      "loss": 2.995,
      "step": 130285
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9919019937515259,
      "learning_rate": 0.00023873005443482526,
      "loss": 3.1399,
      "step": 130286
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.37558913230896,
      "learning_rate": 0.0002387260500980954,
      "loss": 2.8953,
      "step": 130287
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.162659168243408,
      "learning_rate": 0.00023872204577275754,
      "loss": 3.0904,
      "step": 130288
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.406716823577881,
      "learning_rate": 0.00023871804145881242,
      "loss": 2.9939,
      "step": 130289
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.107992172241211,
      "learning_rate": 0.0002387140371562609,
      "loss": 2.8412,
      "step": 130290
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.006472587585449,
      "learning_rate": 0.00023871003286510343,
      "loss": 3.199,
      "step": 130291
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3535573482513428,
      "learning_rate": 0.00023870602858534098,
      "loss": 2.8637,
      "step": 130292
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.792881727218628,
      "learning_rate": 0.0002387020243169742,
      "loss": 3.003,
      "step": 130293
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8062174320220947,
      "learning_rate": 0.00023869802006000384,
      "loss": 3.1166,
      "step": 130294
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.24375057220459,
      "learning_rate": 0.00023869401581443073,
      "loss": 2.9577,
      "step": 130295
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.361776113510132,
      "learning_rate": 0.0002386900115802557,
      "loss": 2.8645,
      "step": 130296
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9507209062576294,
      "learning_rate": 0.00023868600735747918,
      "loss": 2.9636,
      "step": 130297
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9203287363052368,
      "learning_rate": 0.0002386820031461021,
      "loss": 3.0685,
      "step": 130298
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9587560892105103,
      "learning_rate": 0.0002386779989461252,
      "loss": 3.1344,
      "step": 130299
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4511446952819824,
      "learning_rate": 0.00023867399475754924,
      "loss": 2.9717,
      "step": 130300
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4640934467315674,
      "learning_rate": 0.0002386699905803749,
      "loss": 3.1661,
      "step": 130301
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7749954462051392,
      "learning_rate": 0.0002386659864146031,
      "loss": 3.003,
      "step": 130302
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.782880425453186,
      "learning_rate": 0.00023866198226023434,
      "loss": 3.0352,
      "step": 130303
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.365447759628296,
      "learning_rate": 0.00023865797811726947,
      "loss": 3.0397,
      "step": 130304
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.102766752243042,
      "learning_rate": 0.00023865397398570924,
      "loss": 3.1796,
      "step": 130305
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3206984996795654,
      "learning_rate": 0.00023864996986555442,
      "loss": 3.0491,
      "step": 130306
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0736353397369385,
      "learning_rate": 0.00023864596575680567,
      "loss": 2.9393,
      "step": 130307
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.161806106567383,
      "learning_rate": 0.00023864196165946394,
      "loss": 2.9274,
      "step": 130308
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.012775182723999,
      "learning_rate": 0.00023863795757352973,
      "loss": 2.958,
      "step": 130309
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.23464298248291,
      "learning_rate": 0.00023863395349900388,
      "loss": 3.0177,
      "step": 130310
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2107760906219482,
      "learning_rate": 0.0002386299494358871,
      "loss": 2.9481,
      "step": 130311
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8769395351409912,
      "learning_rate": 0.00023862594538418021,
      "loss": 3.2066,
      "step": 130312
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6473751068115234,
      "learning_rate": 0.0002386219413438839,
      "loss": 2.9429,
      "step": 130313
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.8959567546844482,
      "learning_rate": 0.00023861793731499905,
      "loss": 2.9081,
      "step": 130314
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.140743732452393,
      "learning_rate": 0.00023861393329752616,
      "loss": 3.2629,
      "step": 130315
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9667805433273315,
      "learning_rate": 0.00023860992929146613,
      "loss": 2.9493,
      "step": 130316
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9790414571762085,
      "learning_rate": 0.0002386059252968196,
      "loss": 3.0037,
      "step": 130317
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4778225421905518,
      "learning_rate": 0.00023860192131358745,
      "loss": 3.1425,
      "step": 130318
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.788647174835205,
      "learning_rate": 0.00023859791734177034,
      "loss": 3.0311,
      "step": 130319
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.5429279804229736,
      "learning_rate": 0.00023859391338136904,
      "loss": 2.8387,
      "step": 130320
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7718175649642944,
      "learning_rate": 0.0002385899094323844,
      "loss": 2.9074,
      "step": 130321
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5470523834228516,
      "learning_rate": 0.00023858590549481697,
      "loss": 3.1671,
      "step": 130322
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.1135478019714355,
      "learning_rate": 0.00023858190156866754,
      "loss": 2.7701,
      "step": 130323
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.227116107940674,
      "learning_rate": 0.0002385778976539369,
      "loss": 2.9151,
      "step": 130324
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4312222003936768,
      "learning_rate": 0.00023857389375062582,
      "loss": 2.7111,
      "step": 130325
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7091954946517944,
      "learning_rate": 0.00023856988985873498,
      "loss": 3.0684,
      "step": 130326
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5864064693450928,
      "learning_rate": 0.0002385658859782653,
      "loss": 2.9438,
      "step": 130327
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.497225522994995,
      "learning_rate": 0.00023856188210921722,
      "loss": 2.7278,
      "step": 130328
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.0214648246765137,
      "learning_rate": 0.00023855787825159168,
      "loss": 2.9398,
      "step": 130329
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9961695671081543,
      "learning_rate": 0.0002385538744053894,
      "loss": 3.0099,
      "step": 130330
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4263858795166016,
      "learning_rate": 0.00023854987057061108,
      "loss": 2.9621,
      "step": 130331
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.9247961044311523,
      "learning_rate": 0.00023854586674725752,
      "loss": 2.7266,
      "step": 130332
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2770779132843018,
      "learning_rate": 0.00023854186293532958,
      "loss": 3.2475,
      "step": 130333
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1122775077819824,
      "learning_rate": 0.00023853785913482774,
      "loss": 3.0042,
      "step": 130334
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0952160358428955,
      "learning_rate": 0.00023853385534575286,
      "loss": 3.083,
      "step": 130335
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8018537759780884,
      "learning_rate": 0.00023852985156810568,
      "loss": 3.0766,
      "step": 130336
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.078733444213867,
      "learning_rate": 0.00023852584780188697,
      "loss": 3.3247,
      "step": 130337
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.092517852783203,
      "learning_rate": 0.0002385218440470975,
      "loss": 2.9519,
      "step": 130338
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0665664672851562,
      "learning_rate": 0.000238517840303738,
      "loss": 2.7171,
      "step": 130339
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2400197982788086,
      "learning_rate": 0.0002385138365718092,
      "loss": 2.6967,
      "step": 130340
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3331661224365234,
      "learning_rate": 0.0002385098328513118,
      "loss": 3.0618,
      "step": 130341
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7554717063903809,
      "learning_rate": 0.00023850582914224658,
      "loss": 2.863,
      "step": 130342
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.929059624671936,
      "learning_rate": 0.00023850182544461426,
      "loss": 3.1524,
      "step": 130343
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.896797776222229,
      "learning_rate": 0.00023849782175841563,
      "loss": 2.9459,
      "step": 130344
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0681827068328857,
      "learning_rate": 0.00023849381808365146,
      "loss": 2.9544,
      "step": 130345
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3566150665283203,
      "learning_rate": 0.0002384898144203224,
      "loss": 3.0817,
      "step": 130346
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1955184936523438,
      "learning_rate": 0.00023848581076842926,
      "loss": 2.8496,
      "step": 130347
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.611757516860962,
      "learning_rate": 0.00023848180712797282,
      "loss": 2.9512,
      "step": 130348
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.171746015548706,
      "learning_rate": 0.00023847780349895372,
      "loss": 2.8136,
      "step": 130349
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8721381425857544,
      "learning_rate": 0.00023847379988137276,
      "loss": 3.0103,
      "step": 130350
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7720870971679688,
      "learning_rate": 0.0002384697962752307,
      "loss": 3.2667,
      "step": 130351
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.273019790649414,
      "learning_rate": 0.00023846579268052825,
      "loss": 3.1995,
      "step": 130352
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7581732273101807,
      "learning_rate": 0.00023846178909726616,
      "loss": 2.8934,
      "step": 130353
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.6389033794403076,
      "learning_rate": 0.00023845778552544517,
      "loss": 3.0256,
      "step": 130354
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9188446998596191,
      "learning_rate": 0.00023845378196506615,
      "loss": 3.1382,
      "step": 130355
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8835506439208984,
      "learning_rate": 0.0002384497784161296,
      "loss": 2.8323,
      "step": 130356
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.4329240322113037,
      "learning_rate": 0.00023844577487863651,
      "loss": 3.0278,
      "step": 130357
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.198641300201416,
      "learning_rate": 0.00023844177135258745,
      "loss": 2.9506,
      "step": 130358
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4595282077789307,
      "learning_rate": 0.0002384377678379832,
      "loss": 3.1108,
      "step": 130359
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.6433571577072144,
      "learning_rate": 0.00023843376433482454,
      "loss": 2.9996,
      "step": 130360
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.841671943664551,
      "learning_rate": 0.00023842976084311222,
      "loss": 2.9129,
      "step": 130361
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.029233694076538,
      "learning_rate": 0.000238425757362847,
      "loss": 2.8993,
      "step": 130362
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0548999309539795,
      "learning_rate": 0.00023842175389402964,
      "loss": 3.1347,
      "step": 130363
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8396759033203125,
      "learning_rate": 0.00023841775043666072,
      "loss": 3.2225,
      "step": 130364
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9003480672836304,
      "learning_rate": 0.00023841374699074116,
      "loss": 2.7487,
      "step": 130365
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3531930446624756,
      "learning_rate": 0.00023840974355627162,
      "loss": 2.838,
      "step": 130366
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0066628456115723,
      "learning_rate": 0.00023840574013325285,
      "loss": 3.1542,
      "step": 130367
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.260059118270874,
      "learning_rate": 0.00023840173672168562,
      "loss": 3.0492,
      "step": 130368
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8648757934570312,
      "learning_rate": 0.00023839773332157084,
      "loss": 3.2311,
      "step": 130369
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.757885456085205,
      "learning_rate": 0.00023839372993290895,
      "loss": 3.1041,
      "step": 130370
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9917255640029907,
      "learning_rate": 0.00023838972655570079,
      "loss": 2.8777,
      "step": 130371
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.279571533203125,
      "learning_rate": 0.00023838572318994717,
      "loss": 2.9595,
      "step": 130372
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2155020236968994,
      "learning_rate": 0.0002383817198356488,
      "loss": 2.8283,
      "step": 130373
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0928280353546143,
      "learning_rate": 0.00023837771649280645,
      "loss": 3.2634,
      "step": 130374
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0095889568328857,
      "learning_rate": 0.0002383737131614209,
      "loss": 3.1499,
      "step": 130375
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1635069847106934,
      "learning_rate": 0.0002383697098414928,
      "loss": 2.7995,
      "step": 130376
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0147814750671387,
      "learning_rate": 0.00023836570653302288,
      "loss": 2.721,
      "step": 130377
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1458022594451904,
      "learning_rate": 0.00023836170323601196,
      "loss": 2.6883,
      "step": 130378
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.400311231613159,
      "learning_rate": 0.0002383576999504608,
      "loss": 2.9852,
      "step": 130379
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3082339763641357,
      "learning_rate": 0.00023835369667637006,
      "loss": 3.228,
      "step": 130380
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9186937808990479,
      "learning_rate": 0.00023834969341374066,
      "loss": 3.0156,
      "step": 130381
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1607837677001953,
      "learning_rate": 0.0002383456901625731,
      "loss": 3.128,
      "step": 130382
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8870933055877686,
      "learning_rate": 0.00023834168692286824,
      "loss": 2.8281,
      "step": 130383
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3594367504119873,
      "learning_rate": 0.0002383376836946268,
      "loss": 3.0194,
      "step": 130384
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.073634624481201,
      "learning_rate": 0.00023833368047784957,
      "loss": 3.0328,
      "step": 130385
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0118508338928223,
      "learning_rate": 0.0002383296772725373,
      "loss": 2.9958,
      "step": 130386
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.910058617591858,
      "learning_rate": 0.00023832567407869069,
      "loss": 3.0929,
      "step": 130387
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.139193534851074,
      "learning_rate": 0.0002383216708963106,
      "loss": 2.973,
      "step": 130388
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0580437183380127,
      "learning_rate": 0.00023831766772539756,
      "loss": 2.9305,
      "step": 130389
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3581156730651855,
      "learning_rate": 0.00023831366456595243,
      "loss": 2.8173,
      "step": 130390
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.156353712081909,
      "learning_rate": 0.00023830966141797597,
      "loss": 3.1619,
      "step": 130391
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.6645973920822144,
      "learning_rate": 0.00023830565828146887,
      "loss": 3.1216,
      "step": 130392
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3957324028015137,
      "learning_rate": 0.00023830165515643196,
      "loss": 2.8867,
      "step": 130393
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.7796177864074707,
      "learning_rate": 0.00023829765204286607,
      "loss": 2.9319,
      "step": 130394
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.467343330383301,
      "learning_rate": 0.00023829364894077166,
      "loss": 3.1552,
      "step": 130395
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.176994562149048,
      "learning_rate": 0.0002382896458501496,
      "loss": 2.9591,
      "step": 130396
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4117023944854736,
      "learning_rate": 0.0002382856427710007,
      "loss": 3.0469,
      "step": 130397
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.4584691524505615,
      "learning_rate": 0.00023828163970332562,
      "loss": 2.988,
      "step": 130398
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.362860679626465,
      "learning_rate": 0.0002382776366471252,
      "loss": 2.8493,
      "step": 130399
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.126676321029663,
      "learning_rate": 0.00023827363360240023,
      "loss": 2.9784,
      "step": 130400
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.7036478519439697,
      "learning_rate": 0.00023826963056915125,
      "loss": 2.8565,
      "step": 130401
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.261371374130249,
      "learning_rate": 0.0002382656275473791,
      "loss": 2.9549,
      "step": 130402
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7126407623291016,
      "learning_rate": 0.0002382616245370845,
      "loss": 3.168,
      "step": 130403
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.0394811630249023,
      "learning_rate": 0.00023825762153826827,
      "loss": 2.8957,
      "step": 130404
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.321300745010376,
      "learning_rate": 0.0002382536185509311,
      "loss": 2.8923,
      "step": 130405
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.418701171875,
      "learning_rate": 0.00023824961557507388,
      "loss": 2.9876,
      "step": 130406
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2069528102874756,
      "learning_rate": 0.00023824561261069714,
      "loss": 3.1846,
      "step": 130407
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.240363597869873,
      "learning_rate": 0.00023824160965780163,
      "loss": 3.1748,
      "step": 130408
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.440286636352539,
      "learning_rate": 0.0002382376067163882,
      "loss": 2.9854,
      "step": 130409
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.036642551422119,
      "learning_rate": 0.00023823360378645757,
      "loss": 2.9098,
      "step": 130410
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0176873207092285,
      "learning_rate": 0.00023822960086801047,
      "loss": 2.9522,
      "step": 130411
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9627443552017212,
      "learning_rate": 0.0002382255979610478,
      "loss": 2.947,
      "step": 130412
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2569198608398438,
      "learning_rate": 0.00023822159506557,
      "loss": 2.9792,
      "step": 130413
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7682188749313354,
      "learning_rate": 0.000238217592181578,
      "loss": 2.8605,
      "step": 130414
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8355040550231934,
      "learning_rate": 0.0002382135893090725,
      "loss": 2.7864,
      "step": 130415
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8920092582702637,
      "learning_rate": 0.00023820958644805426,
      "loss": 2.936,
      "step": 130416
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.158067464828491,
      "learning_rate": 0.00023820558359852404,
      "loss": 3.0109,
      "step": 130417
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4348294734954834,
      "learning_rate": 0.00023820158076048268,
      "loss": 2.6365,
      "step": 130418
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9615535736083984,
      "learning_rate": 0.0002381975779339307,
      "loss": 3.1305,
      "step": 130419
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1517064571380615,
      "learning_rate": 0.00023819357511886894,
      "loss": 2.8439,
      "step": 130420
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8982549905776978,
      "learning_rate": 0.00023818957231529816,
      "loss": 3.0501,
      "step": 130421
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8698495626449585,
      "learning_rate": 0.0002381855695232191,
      "loss": 3.1001,
      "step": 130422
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.812759518623352,
      "learning_rate": 0.00023818156674263255,
      "loss": 2.8134,
      "step": 130423
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8969987630844116,
      "learning_rate": 0.00023817756397353924,
      "loss": 2.9629,
      "step": 130424
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2616422176361084,
      "learning_rate": 0.0002381735612159399,
      "loss": 2.9197,
      "step": 130425
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.922959566116333,
      "learning_rate": 0.0002381695584698352,
      "loss": 2.8524,
      "step": 130426
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9890453815460205,
      "learning_rate": 0.00023816555573522594,
      "loss": 2.6746,
      "step": 130427
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.397739887237549,
      "learning_rate": 0.00023816155301211288,
      "loss": 3.1743,
      "step": 130428
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.427215814590454,
      "learning_rate": 0.00023815755030049676,
      "loss": 2.9953,
      "step": 130429
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9130687713623047,
      "learning_rate": 0.00023815354760037833,
      "loss": 2.9049,
      "step": 130430
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.0402841567993164,
      "learning_rate": 0.0002381495449117583,
      "loss": 3.1602,
      "step": 130431
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.337388277053833,
      "learning_rate": 0.00023814554223463753,
      "loss": 2.9035,
      "step": 130432
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.0225517749786377,
      "learning_rate": 0.00023814153956901654,
      "loss": 3.0855,
      "step": 130433
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0275614261627197,
      "learning_rate": 0.00023813753691489625,
      "loss": 3.0552,
      "step": 130434
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0891053676605225,
      "learning_rate": 0.00023813353427227737,
      "loss": 3.3248,
      "step": 130435
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6878108978271484,
      "learning_rate": 0.00023812953164116068,
      "loss": 3.023,
      "step": 130436
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.624629497528076,
      "learning_rate": 0.0002381255290215468,
      "loss": 2.7016,
      "step": 130437
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8424605131149292,
      "learning_rate": 0.00023812152641343657,
      "loss": 3.261,
      "step": 130438
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3935399055480957,
      "learning_rate": 0.00023811752381683071,
      "loss": 2.787,
      "step": 130439
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.1061441898345947,
      "learning_rate": 0.00023811352123173003,
      "loss": 2.9399,
      "step": 130440
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4277713298797607,
      "learning_rate": 0.00023810951865813518,
      "loss": 2.8748,
      "step": 130441
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5028462409973145,
      "learning_rate": 0.00023810551609604696,
      "loss": 2.9511,
      "step": 130442
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.169909954071045,
      "learning_rate": 0.00023810151354546606,
      "loss": 2.8464,
      "step": 130443
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.630870819091797,
      "learning_rate": 0.00023809751100639325,
      "loss": 3.2473,
      "step": 130444
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.532160997390747,
      "learning_rate": 0.00023809350847882926,
      "loss": 3.2755,
      "step": 130445
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.149385452270508,
      "learning_rate": 0.00023808950596277488,
      "loss": 3.0962,
      "step": 130446
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.448091983795166,
      "learning_rate": 0.00023808550345823088,
      "loss": 3.0319,
      "step": 130447
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8309268951416016,
      "learning_rate": 0.00023808150096519797,
      "loss": 2.9863,
      "step": 130448
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2814180850982666,
      "learning_rate": 0.00023807749848367679,
      "loss": 2.8978,
      "step": 130449
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7828915119171143,
      "learning_rate": 0.0002380734960136682,
      "loss": 3.0466,
      "step": 130450
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.108360767364502,
      "learning_rate": 0.0002380694935551729,
      "loss": 2.9756,
      "step": 130451
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7858856916427612,
      "learning_rate": 0.00023806549110819166,
      "loss": 3.2389,
      "step": 130452
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.261305570602417,
      "learning_rate": 0.0002380614886727252,
      "loss": 2.8439,
      "step": 130453
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8960387706756592,
      "learning_rate": 0.00023805748624877436,
      "loss": 2.976,
      "step": 130454
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5764849185943604,
      "learning_rate": 0.0002380534838363398,
      "loss": 3.0553,
      "step": 130455
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.740242838859558,
      "learning_rate": 0.00023804948143542215,
      "loss": 2.8937,
      "step": 130456
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3055922985076904,
      "learning_rate": 0.00023804547904602232,
      "loss": 2.9329,
      "step": 130457
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2145352363586426,
      "learning_rate": 0.000238041476668141,
      "loss": 2.9049,
      "step": 130458
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3896491527557373,
      "learning_rate": 0.00023803747430177895,
      "loss": 2.8908,
      "step": 130459
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9886398315429688,
      "learning_rate": 0.0002380334719469369,
      "loss": 2.9904,
      "step": 130460
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.947422742843628,
      "learning_rate": 0.00023802946960361574,
      "loss": 2.7177,
      "step": 130461
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.226041793823242,
      "learning_rate": 0.00023802546727181588,
      "loss": 2.8126,
      "step": 130462
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.926841139793396,
      "learning_rate": 0.00023802146495153828,
      "loss": 2.7565,
      "step": 130463
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.081185817718506,
      "learning_rate": 0.0002380174626427837,
      "loss": 2.9236,
      "step": 130464
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.3842053413391113,
      "learning_rate": 0.0002380134603455528,
      "loss": 2.9593,
      "step": 130465
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.523993492126465,
      "learning_rate": 0.0002380094580598464,
      "loss": 3.2478,
      "step": 130466
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7706130743026733,
      "learning_rate": 0.00023800545578566532,
      "loss": 2.9275,
      "step": 130467
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.267737627029419,
      "learning_rate": 0.0002380014535230101,
      "loss": 3.0005,
      "step": 130468
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.456087112426758,
      "learning_rate": 0.00023799745127188154,
      "loss": 2.9363,
      "step": 130469
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.343109130859375,
      "learning_rate": 0.00023799344903228047,
      "loss": 2.8676,
      "step": 130470
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0192112922668457,
      "learning_rate": 0.00023798944680420754,
      "loss": 3.0105,
      "step": 130471
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0248947143554688,
      "learning_rate": 0.00023798544458766352,
      "loss": 2.9015,
      "step": 130472
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.4366116523742676,
      "learning_rate": 0.00023798144238264938,
      "loss": 2.8278,
      "step": 130473
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.7081689834594727,
      "learning_rate": 0.00023797744018916547,
      "loss": 3.0942,
      "step": 130474
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2462425231933594,
      "learning_rate": 0.00023797343800721277,
      "loss": 3.1145,
      "step": 130475
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.205331802368164,
      "learning_rate": 0.00023796943583679196,
      "loss": 2.8292,
      "step": 130476
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0616261959075928,
      "learning_rate": 0.0002379654336779038,
      "loss": 3.1471,
      "step": 130477
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5663771629333496,
      "learning_rate": 0.00023796143153054905,
      "loss": 3.1442,
      "step": 130478
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.8349344730377197,
      "learning_rate": 0.00023795742939472858,
      "loss": 3.0486,
      "step": 130479
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.281784772872925,
      "learning_rate": 0.00023795342727044283,
      "loss": 2.9149,
      "step": 130480
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.560432434082031,
      "learning_rate": 0.00023794942515769274,
      "loss": 2.7649,
      "step": 130481
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9068516492843628,
      "learning_rate": 0.00023794542305647897,
      "loss": 2.9548,
      "step": 130482
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9991141557693481,
      "learning_rate": 0.00023794142096680238,
      "loss": 3.0248,
      "step": 130483
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.480316400527954,
      "learning_rate": 0.00023793741888866363,
      "loss": 2.9336,
      "step": 130484
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0646936893463135,
      "learning_rate": 0.00023793341682206361,
      "loss": 2.8611,
      "step": 130485
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.71732497215271,
      "learning_rate": 0.00023792941476700278,
      "loss": 2.8944,
      "step": 130486
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9507273435592651,
      "learning_rate": 0.00023792541272348207,
      "loss": 2.9725,
      "step": 130487
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.12788987159729,
      "learning_rate": 0.00023792141069150222,
      "loss": 3.3104,
      "step": 130488
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3846611976623535,
      "learning_rate": 0.00023791740867106393,
      "loss": 2.9512,
      "step": 130489
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.284977912902832,
      "learning_rate": 0.000237913406662168,
      "loss": 2.8289,
      "step": 130490
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8169630765914917,
      "learning_rate": 0.0002379094046648152,
      "loss": 3.1212,
      "step": 130491
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1371068954467773,
      "learning_rate": 0.00023790540267900614,
      "loss": 3.072,
      "step": 130492
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8730682134628296,
      "learning_rate": 0.00023790140070474163,
      "loss": 2.9875,
      "step": 130493
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9412468671798706,
      "learning_rate": 0.0002378973987420224,
      "loss": 3.0347,
      "step": 130494
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.777697205543518,
      "learning_rate": 0.00023789339679084924,
      "loss": 2.9233,
      "step": 130495
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.258350372314453,
      "learning_rate": 0.00023788939485122284,
      "loss": 3.1454,
      "step": 130496
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.898698568344116,
      "learning_rate": 0.00023788539292314413,
      "loss": 2.99,
      "step": 130497
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3421308994293213,
      "learning_rate": 0.00023788139100661354,
      "loss": 2.9938,
      "step": 130498
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.7668113708496094,
      "learning_rate": 0.000237877389101632,
      "loss": 3.1101,
      "step": 130499
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.844989776611328,
      "learning_rate": 0.00023787338720820023,
      "loss": 2.9551,
      "step": 130500
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.005234479904175,
      "learning_rate": 0.00023786938532631893,
      "loss": 2.952,
      "step": 130501
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.9754526615142822,
      "learning_rate": 0.0002378653834559889,
      "loss": 3.1869,
      "step": 130502
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.525961399078369,
      "learning_rate": 0.00023786138159721107,
      "loss": 2.9813,
      "step": 130503
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1983771324157715,
      "learning_rate": 0.00023785737974998575,
      "loss": 2.9328,
      "step": 130504
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.114442825317383,
      "learning_rate": 0.00023785337791431397,
      "loss": 3.2517,
      "step": 130505
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.8883910179138184,
      "learning_rate": 0.0002378493760901964,
      "loss": 2.8468,
      "step": 130506
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.9362924098968506,
      "learning_rate": 0.00023784537427763383,
      "loss": 2.971,
      "step": 130507
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6889617443084717,
      "learning_rate": 0.00023784137247662694,
      "loss": 2.9515,
      "step": 130508
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2119293212890625,
      "learning_rate": 0.00023783737068717664,
      "loss": 3.0111,
      "step": 130509
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8922580480575562,
      "learning_rate": 0.00023783336890928348,
      "loss": 2.7608,
      "step": 130510
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.066079139709473,
      "learning_rate": 0.00023782936714294823,
      "loss": 2.8915,
      "step": 130511
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.3595643043518066,
      "learning_rate": 0.00023782536538817164,
      "loss": 3.11,
      "step": 130512
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.507399559020996,
      "learning_rate": 0.00023782136364495455,
      "loss": 2.9244,
      "step": 130513
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.224191904067993,
      "learning_rate": 0.00023781736191329762,
      "loss": 3.1023,
      "step": 130514
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.929002046585083,
      "learning_rate": 0.00023781336019320162,
      "loss": 3.0414,
      "step": 130515
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.020720958709717,
      "learning_rate": 0.0002378093584846673,
      "loss": 2.8856,
      "step": 130516
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.236705303192139,
      "learning_rate": 0.00023780535678769545,
      "loss": 2.8271,
      "step": 130517
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.859916925430298,
      "learning_rate": 0.0002378013551022867,
      "loss": 3.0628,
      "step": 130518
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.672208786010742,
      "learning_rate": 0.00023779735342844183,
      "loss": 2.9438,
      "step": 130519
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4354002475738525,
      "learning_rate": 0.00023779335176616161,
      "loss": 3.367,
      "step": 130520
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.997249722480774,
      "learning_rate": 0.0002377893501154468,
      "loss": 3.0965,
      "step": 130521
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.449561357498169,
      "learning_rate": 0.00023778534847629815,
      "loss": 2.9561,
      "step": 130522
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7047022581100464,
      "learning_rate": 0.0002377813468487163,
      "loss": 3.0024,
      "step": 130523
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.867863893508911,
      "learning_rate": 0.0002377773452327022,
      "loss": 3.0082,
      "step": 130524
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8981578350067139,
      "learning_rate": 0.00023777334362825637,
      "loss": 3.1786,
      "step": 130525
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.648433208465576,
      "learning_rate": 0.00023776934203537965,
      "loss": 2.8433,
      "step": 130526
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.671443223953247,
      "learning_rate": 0.0002377653404540728,
      "loss": 3.0249,
      "step": 130527
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0651817321777344,
      "learning_rate": 0.0002377613388843366,
      "loss": 2.8908,
      "step": 130528
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.217535972595215,
      "learning_rate": 0.00023775733732617167,
      "loss": 2.8449,
      "step": 130529
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.954435110092163,
      "learning_rate": 0.00023775333577957884,
      "loss": 2.8681,
      "step": 130530
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.9453747272491455,
      "learning_rate": 0.0002377493342445589,
      "loss": 2.81,
      "step": 130531
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4383695125579834,
      "learning_rate": 0.00023774533272111247,
      "loss": 3.0056,
      "step": 130532
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.1517014503479004,
      "learning_rate": 0.00023774133120924034,
      "loss": 3.1445,
      "step": 130533
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0463128089904785,
      "learning_rate": 0.00023773732970894338,
      "loss": 3.0851,
      "step": 130534
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8632322549819946,
      "learning_rate": 0.0002377333282202221,
      "loss": 3.0419,
      "step": 130535
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1407268047332764,
      "learning_rate": 0.0002377293267430774,
      "loss": 2.9727,
      "step": 130536
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.166273593902588,
      "learning_rate": 0.00023772532527751,
      "loss": 2.9681,
      "step": 130537
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7997024059295654,
      "learning_rate": 0.00023772132382352066,
      "loss": 3.1669,
      "step": 130538
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.146846055984497,
      "learning_rate": 0.0002377173223811101,
      "loss": 2.8307,
      "step": 130539
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8500895500183105,
      "learning_rate": 0.00023771332095027912,
      "loss": 2.8119,
      "step": 130540
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2814040184020996,
      "learning_rate": 0.0002377093195310283,
      "loss": 2.8735,
      "step": 130541
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8684558868408203,
      "learning_rate": 0.00023770531812335852,
      "loss": 2.7431,
      "step": 130542
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8316885232925415,
      "learning_rate": 0.00023770131672727054,
      "loss": 2.8269,
      "step": 130543
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.757333755493164,
      "learning_rate": 0.000237697315342765,
      "loss": 2.8914,
      "step": 130544
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.6683056354522705,
      "learning_rate": 0.00023769331396984272,
      "loss": 3.0587,
      "step": 130545
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.117818355560303,
      "learning_rate": 0.00023768931260850456,
      "loss": 2.8132,
      "step": 130546
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3012945652008057,
      "learning_rate": 0.00023768531125875102,
      "loss": 2.8372,
      "step": 130547
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.122267723083496,
      "learning_rate": 0.0002376813099205829,
      "loss": 2.7943,
      "step": 130548
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3129000663757324,
      "learning_rate": 0.00023767730859400107,
      "loss": 2.7786,
      "step": 130549
    },
    {
      "epoch": 1.7,
      "grad_norm": 5.242281913757324,
      "learning_rate": 0.00023767330727900618,
      "loss": 2.8254,
      "step": 130550
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.4045772552490234,
      "learning_rate": 0.000237669305975599,
      "loss": 2.8808,
      "step": 130551
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.13377046585083,
      "learning_rate": 0.00023766530468378043,
      "loss": 3.113,
      "step": 130552
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.374445676803589,
      "learning_rate": 0.00023766130340355089,
      "loss": 2.8153,
      "step": 130553
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.029758930206299,
      "learning_rate": 0.00023765730213491128,
      "loss": 2.9094,
      "step": 130554
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.088303327560425,
      "learning_rate": 0.00023765330087786236,
      "loss": 3.0859,
      "step": 130555
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.816593885421753,
      "learning_rate": 0.0002376492996324049,
      "loss": 2.9996,
      "step": 130556
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9518201351165771,
      "learning_rate": 0.0002376452983985396,
      "loss": 2.9611,
      "step": 130557
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1621510982513428,
      "learning_rate": 0.00023764129717626732,
      "loss": 2.6669,
      "step": 130558
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.682399272918701,
      "learning_rate": 0.00023763729596558861,
      "loss": 2.9396,
      "step": 130559
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.505174160003662,
      "learning_rate": 0.0002376332947665043,
      "loss": 2.8929,
      "step": 130560
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1790812015533447,
      "learning_rate": 0.00023762929357901507,
      "loss": 3.3911,
      "step": 130561
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1980245113372803,
      "learning_rate": 0.0002376252924031218,
      "loss": 2.8088,
      "step": 130562
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8658583164215088,
      "learning_rate": 0.00023762129123882516,
      "loss": 2.8998,
      "step": 130563
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.807666778564453,
      "learning_rate": 0.000237617290086126,
      "loss": 2.931,
      "step": 130564
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5131049156188965,
      "learning_rate": 0.00023761328894502485,
      "loss": 2.9313,
      "step": 130565
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.997076153755188,
      "learning_rate": 0.00023760928781552256,
      "loss": 3.2387,
      "step": 130566
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.809616208076477,
      "learning_rate": 0.0002376052866976199,
      "loss": 2.8332,
      "step": 130567
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.8199234008789062,
      "learning_rate": 0.0002376012855913176,
      "loss": 2.756,
      "step": 130568
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.3008408546447754,
      "learning_rate": 0.00023759728449661638,
      "loss": 3.06,
      "step": 130569
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.601499557495117,
      "learning_rate": 0.00023759328341351713,
      "loss": 2.9526,
      "step": 130570
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1837852001190186,
      "learning_rate": 0.0002375892823420203,
      "loss": 2.8003,
      "step": 130571
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.057312488555908,
      "learning_rate": 0.00023758528128212689,
      "loss": 3.0215,
      "step": 130572
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.585925817489624,
      "learning_rate": 0.00023758128023383747,
      "loss": 2.9853,
      "step": 130573
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9583348035812378,
      "learning_rate": 0.0002375772791971529,
      "loss": 2.9608,
      "step": 130574
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.758328437805176,
      "learning_rate": 0.00023757327817207393,
      "loss": 2.7829,
      "step": 130575
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.0179443359375,
      "learning_rate": 0.00023756927715860136,
      "loss": 3.0852,
      "step": 130576
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.7290472984313965,
      "learning_rate": 0.0002375652761567357,
      "loss": 3.1364,
      "step": 130577
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.423781394958496,
      "learning_rate": 0.00023756127516647783,
      "loss": 3.0577,
      "step": 130578
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9987093210220337,
      "learning_rate": 0.00023755727418782856,
      "loss": 2.7748,
      "step": 130579
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.402036428451538,
      "learning_rate": 0.00023755327322078846,
      "loss": 2.9804,
      "step": 130580
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.048081159591675,
      "learning_rate": 0.0002375492722653585,
      "loss": 2.9702,
      "step": 130581
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1079726219177246,
      "learning_rate": 0.00023754527132153927,
      "loss": 3.2037,
      "step": 130582
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.149606466293335,
      "learning_rate": 0.00023754127038933166,
      "loss": 3.016,
      "step": 130583
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.85516357421875,
      "learning_rate": 0.00023753726946873616,
      "loss": 2.8914,
      "step": 130584
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8425179719924927,
      "learning_rate": 0.00023753326855975373,
      "loss": 2.974,
      "step": 130585
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5348899364471436,
      "learning_rate": 0.00023752926766238498,
      "loss": 2.789,
      "step": 130586
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0422003269195557,
      "learning_rate": 0.00023752526677663075,
      "loss": 3.07,
      "step": 130587
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4653685092926025,
      "learning_rate": 0.00023752126590249173,
      "loss": 2.9581,
      "step": 130588
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8894340991973877,
      "learning_rate": 0.00023751726503996886,
      "loss": 3.0784,
      "step": 130589
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.626314640045166,
      "learning_rate": 0.00023751326418906251,
      "loss": 2.9252,
      "step": 130590
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.345745801925659,
      "learning_rate": 0.0002375092633497737,
      "loss": 3.1193,
      "step": 130591
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.394371509552002,
      "learning_rate": 0.00023750526252210307,
      "loss": 3.041,
      "step": 130592
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3758647441864014,
      "learning_rate": 0.00023750126170605138,
      "loss": 2.7762,
      "step": 130593
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.987470030784607,
      "learning_rate": 0.00023749726090161935,
      "loss": 3.1411,
      "step": 130594
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1534533500671387,
      "learning_rate": 0.00023749326010880794,
      "loss": 3.0703,
      "step": 130595
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0195465087890625,
      "learning_rate": 0.00023748925932761758,
      "loss": 3.0876,
      "step": 130596
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2168147563934326,
      "learning_rate": 0.00023748525855804914,
      "loss": 2.8166,
      "step": 130597
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.007044553756714,
      "learning_rate": 0.00023748125780010337,
      "loss": 2.708,
      "step": 130598
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.736081600189209,
      "learning_rate": 0.000237477257053781,
      "loss": 2.9879,
      "step": 130599
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8027012348175049,
      "learning_rate": 0.00023747325631908283,
      "loss": 3.1681,
      "step": 130600
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6205990314483643,
      "learning_rate": 0.00023746925559600958,
      "loss": 2.9716,
      "step": 130601
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.825251817703247,
      "learning_rate": 0.000237465254884562,
      "loss": 2.7536,
      "step": 130602
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.012599468231201,
      "learning_rate": 0.00023746125418474073,
      "loss": 2.7963,
      "step": 130603
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.0816798210144043,
      "learning_rate": 0.00023745725349654662,
      "loss": 3.2435,
      "step": 130604
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9260960817337036,
      "learning_rate": 0.00023745325281998036,
      "loss": 3.0829,
      "step": 130605
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9384691715240479,
      "learning_rate": 0.0002374492521550427,
      "loss": 2.7551,
      "step": 130606
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.827784538269043,
      "learning_rate": 0.0002374452515017345,
      "loss": 2.9567,
      "step": 130607
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.282076835632324,
      "learning_rate": 0.00023744125086005635,
      "loss": 2.9142,
      "step": 130608
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.044306755065918,
      "learning_rate": 0.00023743725023000911,
      "loss": 2.8147,
      "step": 130609
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6850156784057617,
      "learning_rate": 0.00023743324961159341,
      "loss": 3.2113,
      "step": 130610
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2538723945617676,
      "learning_rate": 0.00023742924900481004,
      "loss": 3.0406,
      "step": 130611
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1738815307617188,
      "learning_rate": 0.00023742524840965974,
      "loss": 2.9962,
      "step": 130612
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7745158672332764,
      "learning_rate": 0.00023742124782614334,
      "loss": 3.2895,
      "step": 130613
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9779119491577148,
      "learning_rate": 0.00023741724725426147,
      "loss": 2.7615,
      "step": 130614
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0202131271362305,
      "learning_rate": 0.00023741324669401492,
      "loss": 2.8992,
      "step": 130615
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9725598096847534,
      "learning_rate": 0.00023740924614540446,
      "loss": 2.9321,
      "step": 130616
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.6625832319259644,
      "learning_rate": 0.00023740524560843075,
      "loss": 3.1556,
      "step": 130617
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.5834877490997314,
      "learning_rate": 0.00023740124508309457,
      "loss": 2.9921,
      "step": 130618
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.231461524963379,
      "learning_rate": 0.0002373972445693968,
      "loss": 2.84,
      "step": 130619
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9859319925308228,
      "learning_rate": 0.00023739324406733795,
      "loss": 2.8786,
      "step": 130620
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4138381481170654,
      "learning_rate": 0.00023738924357691888,
      "loss": 3.0895,
      "step": 130621
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.9493801593780518,
      "learning_rate": 0.00023738524309814034,
      "loss": 2.9856,
      "step": 130622
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.356064558029175,
      "learning_rate": 0.00023738124263100312,
      "loss": 3.0099,
      "step": 130623
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.69619083404541,
      "learning_rate": 0.00023737724217550785,
      "loss": 3.0869,
      "step": 130624
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.7634623050689697,
      "learning_rate": 0.0002373732417316554,
      "loss": 3.0362,
      "step": 130625
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5976176261901855,
      "learning_rate": 0.0002373692412994464,
      "loss": 2.8641,
      "step": 130626
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.215847969055176,
      "learning_rate": 0.00023736524087888163,
      "loss": 3.0839,
      "step": 130627
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7396029233932495,
      "learning_rate": 0.00023736124046996182,
      "loss": 3.0984,
      "step": 130628
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2380077838897705,
      "learning_rate": 0.00023735724007268775,
      "loss": 3.0435,
      "step": 130629
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.8588435649871826,
      "learning_rate": 0.00023735323968706017,
      "loss": 3.0865,
      "step": 130630
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2164149284362793,
      "learning_rate": 0.00023734923931307989,
      "loss": 2.8884,
      "step": 130631
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8897825479507446,
      "learning_rate": 0.0002373452389507475,
      "loss": 3.0469,
      "step": 130632
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.635183572769165,
      "learning_rate": 0.0002373412386000638,
      "loss": 3.1875,
      "step": 130633
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8743839263916016,
      "learning_rate": 0.0002373372382610295,
      "loss": 3.1052,
      "step": 130634
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4875006675720215,
      "learning_rate": 0.00023733323793364541,
      "loss": 2.9478,
      "step": 130635
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9303914308547974,
      "learning_rate": 0.0002373292376179123,
      "loss": 2.8,
      "step": 130636
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.4833617210388184,
      "learning_rate": 0.00023732523731383095,
      "loss": 2.7911,
      "step": 130637
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.7422971725463867,
      "learning_rate": 0.0002373212370214019,
      "loss": 3.1063,
      "step": 130638
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7803524732589722,
      "learning_rate": 0.00023731723674062604,
      "loss": 3.0889,
      "step": 130639
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9571247100830078,
      "learning_rate": 0.00023731323647150406,
      "loss": 3.1076,
      "step": 130640
    },
    {
      "epoch": 1.7,
      "grad_norm": 5.136011600494385,
      "learning_rate": 0.00023730923621403675,
      "loss": 2.9553,
      "step": 130641
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.41882061958313,
      "learning_rate": 0.00023730523596822482,
      "loss": 3.2476,
      "step": 130642
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1634583473205566,
      "learning_rate": 0.00023730123573406916,
      "loss": 2.9843,
      "step": 130643
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.478607177734375,
      "learning_rate": 0.00023729723551157026,
      "loss": 2.9505,
      "step": 130644
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5390336513519287,
      "learning_rate": 0.00023729323530072898,
      "loss": 2.8529,
      "step": 130645
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9373522996902466,
      "learning_rate": 0.00023728923510154607,
      "loss": 3.0709,
      "step": 130646
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.6558499336242676,
      "learning_rate": 0.0002372852349140223,
      "loss": 2.8566,
      "step": 130647
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.998356580734253,
      "learning_rate": 0.00023728123473815838,
      "loss": 3.19,
      "step": 130648
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.202333927154541,
      "learning_rate": 0.00023727723457395505,
      "loss": 3.1333,
      "step": 130649
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9316163063049316,
      "learning_rate": 0.0002372732344214132,
      "loss": 2.931,
      "step": 130650
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.861454486846924,
      "learning_rate": 0.00023726923428053334,
      "loss": 3.1562,
      "step": 130651
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.003295660018921,
      "learning_rate": 0.00023726523415131627,
      "loss": 2.9428,
      "step": 130652
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0394492149353027,
      "learning_rate": 0.0002372612340337628,
      "loss": 3.2254,
      "step": 130653
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1227972507476807,
      "learning_rate": 0.00023725723392787362,
      "loss": 2.9408,
      "step": 130654
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1042611598968506,
      "learning_rate": 0.00023725323383364953,
      "loss": 2.9949,
      "step": 130655
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.615384817123413,
      "learning_rate": 0.00023724923375109138,
      "loss": 2.8864,
      "step": 130656
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6462643146514893,
      "learning_rate": 0.00023724523368019964,
      "loss": 2.8763,
      "step": 130657
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2806460857391357,
      "learning_rate": 0.0002372412336209752,
      "loss": 2.7119,
      "step": 130658
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.1161394119262695,
      "learning_rate": 0.0002372372335734188,
      "loss": 2.7755,
      "step": 130659
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8680708408355713,
      "learning_rate": 0.0002372332335375312,
      "loss": 3.0321,
      "step": 130660
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.9554426670074463,
      "learning_rate": 0.0002372292335133131,
      "loss": 2.8793,
      "step": 130661
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.177914619445801,
      "learning_rate": 0.00023722523350076541,
      "loss": 3.1317,
      "step": 130662
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.7770936489105225,
      "learning_rate": 0.00023722123349988864,
      "loss": 2.8998,
      "step": 130663
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8338701725006104,
      "learning_rate": 0.00023721723351068357,
      "loss": 3.0043,
      "step": 130664
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.781001091003418,
      "learning_rate": 0.00023721323353315105,
      "loss": 3.0799,
      "step": 130665
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.496896266937256,
      "learning_rate": 0.00023720923356729173,
      "loss": 2.8587,
      "step": 130666
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.8645482063293457,
      "learning_rate": 0.00023720523361310642,
      "loss": 2.9538,
      "step": 130667
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1171581745147705,
      "learning_rate": 0.00023720123367059598,
      "loss": 2.8993,
      "step": 130668
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.583064556121826,
      "learning_rate": 0.0002371972337397609,
      "loss": 2.5558,
      "step": 130669
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.291581869125366,
      "learning_rate": 0.000237193233820602,
      "loss": 3.0901,
      "step": 130670
    },
    {
      "epoch": 1.7,
      "grad_norm": 5.011423587799072,
      "learning_rate": 0.00023718923391312006,
      "loss": 2.931,
      "step": 130671
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7824722528457642,
      "learning_rate": 0.00023718523401731586,
      "loss": 2.8548,
      "step": 130672
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.7832446098327637,
      "learning_rate": 0.00023718123413319012,
      "loss": 3.0807,
      "step": 130673
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.844270944595337,
      "learning_rate": 0.00023717723426074368,
      "loss": 3.0563,
      "step": 130674
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.950987458229065,
      "learning_rate": 0.00023717323439997705,
      "loss": 2.9427,
      "step": 130675
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9863512516021729,
      "learning_rate": 0.0002371692345508911,
      "loss": 2.9474,
      "step": 130676
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8757362365722656,
      "learning_rate": 0.00023716523471348659,
      "loss": 2.923,
      "step": 130677
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.1177265644073486,
      "learning_rate": 0.00023716123488776422,
      "loss": 2.8419,
      "step": 130678
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.774832010269165,
      "learning_rate": 0.00023715723507372477,
      "loss": 2.9768,
      "step": 130679
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6478235721588135,
      "learning_rate": 0.0002371532352713691,
      "loss": 2.7552,
      "step": 130680
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8185179233551025,
      "learning_rate": 0.00023714923548069774,
      "loss": 3.0849,
      "step": 130681
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.264516830444336,
      "learning_rate": 0.00023714523570171148,
      "loss": 2.8611,
      "step": 130682
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7319203615188599,
      "learning_rate": 0.00023714123593441115,
      "loss": 2.8118,
      "step": 130683
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.391296148300171,
      "learning_rate": 0.00023713723617879744,
      "loss": 2.6954,
      "step": 130684
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5574755668640137,
      "learning_rate": 0.00023713323643487108,
      "loss": 2.6573,
      "step": 130685
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.098041296005249,
      "learning_rate": 0.00023712923670263298,
      "loss": 3.0433,
      "step": 130686
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1743972301483154,
      "learning_rate": 0.00023712523698208365,
      "loss": 2.8364,
      "step": 130687
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9004658460617065,
      "learning_rate": 0.00023712123727322385,
      "loss": 2.9147,
      "step": 130688
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8580200672149658,
      "learning_rate": 0.00023711723757605446,
      "loss": 2.7978,
      "step": 130689
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9556561708450317,
      "learning_rate": 0.0002371132378905761,
      "loss": 2.9771,
      "step": 130690
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3776612281799316,
      "learning_rate": 0.00023710923821678963,
      "loss": 3.103,
      "step": 130691
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0563738346099854,
      "learning_rate": 0.00023710523855469581,
      "loss": 3.1241,
      "step": 130692
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.678969144821167,
      "learning_rate": 0.0002371012389042952,
      "loss": 2.9619,
      "step": 130693
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1505379676818848,
      "learning_rate": 0.00023709723926558876,
      "loss": 2.8864,
      "step": 130694
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7942370176315308,
      "learning_rate": 0.00023709323963857704,
      "loss": 2.9818,
      "step": 130695
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6434438228607178,
      "learning_rate": 0.0002370892400232609,
      "loss": 2.8514,
      "step": 130696
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0264158248901367,
      "learning_rate": 0.0002370852404196411,
      "loss": 3.0409,
      "step": 130697
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3783693313598633,
      "learning_rate": 0.00023708124082771832,
      "loss": 3.128,
      "step": 130698
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2910141944885254,
      "learning_rate": 0.0002370772412474933,
      "loss": 2.8115,
      "step": 130699
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7929234504699707,
      "learning_rate": 0.00023707324167896677,
      "loss": 2.9483,
      "step": 130700
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9698317050933838,
      "learning_rate": 0.00023706924212213966,
      "loss": 3.0243,
      "step": 130701
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3285765647888184,
      "learning_rate": 0.0002370652425770124,
      "loss": 3.0387,
      "step": 130702
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1092026233673096,
      "learning_rate": 0.000237061243043586,
      "loss": 3.0464,
      "step": 130703
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8632323741912842,
      "learning_rate": 0.00023705724352186111,
      "loss": 3.2483,
      "step": 130704
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2841570377349854,
      "learning_rate": 0.0002370532440118384,
      "loss": 2.7662,
      "step": 130705
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.39681077003479,
      "learning_rate": 0.00023704924451351867,
      "loss": 3.0222,
      "step": 130706
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0834600925445557,
      "learning_rate": 0.00023704524502690273,
      "loss": 3.2675,
      "step": 130707
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2215023040771484,
      "learning_rate": 0.00023704124555199127,
      "loss": 3.1887,
      "step": 130708
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.511139154434204,
      "learning_rate": 0.00023703724608878498,
      "loss": 2.7623,
      "step": 130709
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5812342166900635,
      "learning_rate": 0.00023703324663728472,
      "loss": 2.8672,
      "step": 130710
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.763362169265747,
      "learning_rate": 0.0002370292471974911,
      "loss": 2.8646,
      "step": 130711
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.34263014793396,
      "learning_rate": 0.00023702524776940498,
      "loss": 2.7172,
      "step": 130712
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9488182067871094,
      "learning_rate": 0.000237021248353027,
      "loss": 3.1457,
      "step": 130713
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9098892211914062,
      "learning_rate": 0.000237017248948358,
      "loss": 2.6874,
      "step": 130714
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.365163803100586,
      "learning_rate": 0.00023701324955539873,
      "loss": 3.0233,
      "step": 130715
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.023764133453369,
      "learning_rate": 0.0002370092501741498,
      "loss": 2.8054,
      "step": 130716
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8257880210876465,
      "learning_rate": 0.00023700525080461214,
      "loss": 2.9337,
      "step": 130717
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.571493148803711,
      "learning_rate": 0.00023700125144678633,
      "loss": 3.033,
      "step": 130718
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9826045036315918,
      "learning_rate": 0.00023699725210067315,
      "loss": 2.9286,
      "step": 130719
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8415992259979248,
      "learning_rate": 0.00023699325276627337,
      "loss": 2.9524,
      "step": 130720
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.420931816101074,
      "learning_rate": 0.00023698925344358774,
      "loss": 2.9079,
      "step": 130721
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3962175846099854,
      "learning_rate": 0.00023698525413261702,
      "loss": 3.1566,
      "step": 130722
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.053314685821533,
      "learning_rate": 0.000236981254833362,
      "loss": 3.0506,
      "step": 130723
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.111206531524658,
      "learning_rate": 0.00023697725554582328,
      "loss": 2.8063,
      "step": 130724
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6565449237823486,
      "learning_rate": 0.00023697325627000166,
      "loss": 3.1187,
      "step": 130725
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8800994157791138,
      "learning_rate": 0.0002369692570058979,
      "loss": 2.8705,
      "step": 130726
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.152038335800171,
      "learning_rate": 0.00023696525775351277,
      "loss": 3.0772,
      "step": 130727
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8917244672775269,
      "learning_rate": 0.00023696125851284692,
      "loss": 3.0212,
      "step": 130728
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.9097466468811035,
      "learning_rate": 0.00023695725928390136,
      "loss": 3.037,
      "step": 130729
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9898908138275146,
      "learning_rate": 0.00023695326006667648,
      "loss": 2.9031,
      "step": 130730
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3259623050689697,
      "learning_rate": 0.00023694926086117318,
      "loss": 2.9664,
      "step": 130731
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4332191944122314,
      "learning_rate": 0.00023694526166739222,
      "loss": 2.8112,
      "step": 130732
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0325276851654053,
      "learning_rate": 0.0002369412624853343,
      "loss": 2.7919,
      "step": 130733
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.330460786819458,
      "learning_rate": 0.00023693726331500023,
      "loss": 3.0136,
      "step": 130734
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8702349662780762,
      "learning_rate": 0.0002369332641563908,
      "loss": 3.2199,
      "step": 130735
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7486276626586914,
      "learning_rate": 0.00023692926500950653,
      "loss": 2.7198,
      "step": 130736
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9464209079742432,
      "learning_rate": 0.00023692526587434833,
      "loss": 3.1745,
      "step": 130737
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0159623622894287,
      "learning_rate": 0.0002369212667509169,
      "loss": 2.8967,
      "step": 130738
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9137468338012695,
      "learning_rate": 0.000236917267639213,
      "loss": 2.8781,
      "step": 130739
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.727668523788452,
      "learning_rate": 0.0002369132685392374,
      "loss": 2.9265,
      "step": 130740
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2301855087280273,
      "learning_rate": 0.00023690926945099092,
      "loss": 2.9907,
      "step": 130741
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.948081612586975,
      "learning_rate": 0.00023690527037447404,
      "loss": 2.9852,
      "step": 130742
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.349637031555176,
      "learning_rate": 0.0002369012713096877,
      "loss": 3.0053,
      "step": 130743
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2290234565734863,
      "learning_rate": 0.0002368972722566326,
      "loss": 3.0317,
      "step": 130744
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.309981346130371,
      "learning_rate": 0.00023689327321530946,
      "loss": 3.0175,
      "step": 130745
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0855820178985596,
      "learning_rate": 0.00023688927418571905,
      "loss": 2.8535,
      "step": 130746
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.361159563064575,
      "learning_rate": 0.00023688527516786226,
      "loss": 3.1257,
      "step": 130747
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9876412153244019,
      "learning_rate": 0.0002368812761617396,
      "loss": 2.8172,
      "step": 130748
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1145923137664795,
      "learning_rate": 0.00023687727716735184,
      "loss": 2.9485,
      "step": 130749
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9779149293899536,
      "learning_rate": 0.00023687327818469982,
      "loss": 3.1793,
      "step": 130750
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.767744779586792,
      "learning_rate": 0.0002368692792137842,
      "loss": 3.031,
      "step": 130751
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.026059627532959,
      "learning_rate": 0.00023686528025460585,
      "loss": 2.8869,
      "step": 130752
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5373761653900146,
      "learning_rate": 0.00023686128130716554,
      "loss": 3.1287,
      "step": 130753
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.105790853500366,
      "learning_rate": 0.00023685728237146376,
      "loss": 3.1164,
      "step": 130754
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3053736686706543,
      "learning_rate": 0.00023685328344750142,
      "loss": 3.0582,
      "step": 130755
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.604262351989746,
      "learning_rate": 0.00023684928453527923,
      "loss": 2.9464,
      "step": 130756
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1516997814178467,
      "learning_rate": 0.000236845285634798,
      "loss": 3.261,
      "step": 130757
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9191871881484985,
      "learning_rate": 0.00023684128674605835,
      "loss": 2.9765,
      "step": 130758
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.9308488368988037,
      "learning_rate": 0.00023683728786906127,
      "loss": 2.8445,
      "step": 130759
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3911972045898438,
      "learning_rate": 0.00023683328900380717,
      "loss": 2.828,
      "step": 130760
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3579518795013428,
      "learning_rate": 0.000236829290150297,
      "loss": 3.0904,
      "step": 130761
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9750299453735352,
      "learning_rate": 0.0002368252913085314,
      "loss": 3.0678,
      "step": 130762
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.133878231048584,
      "learning_rate": 0.00023682129247851123,
      "loss": 2.991,
      "step": 130763
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.045346260070801,
      "learning_rate": 0.00023681729366023714,
      "loss": 2.9128,
      "step": 130764
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9658596515655518,
      "learning_rate": 0.00023681329485371005,
      "loss": 2.7169,
      "step": 130765
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9007929563522339,
      "learning_rate": 0.00023680929605893045,
      "loss": 3.1342,
      "step": 130766
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.085590124130249,
      "learning_rate": 0.00023680529727589914,
      "loss": 2.9142,
      "step": 130767
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6992132663726807,
      "learning_rate": 0.00023680129850461694,
      "loss": 2.8551,
      "step": 130768
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0951006412506104,
      "learning_rate": 0.00023679729974508462,
      "loss": 2.7902,
      "step": 130769
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5536069869995117,
      "learning_rate": 0.00023679330099730283,
      "loss": 3.0597,
      "step": 130770
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.7892558574676514,
      "learning_rate": 0.0002367893022612725,
      "loss": 2.9041,
      "step": 130771
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9794034957885742,
      "learning_rate": 0.00023678530353699406,
      "loss": 2.725,
      "step": 130772
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8972278833389282,
      "learning_rate": 0.0002367813048244685,
      "loss": 3.0506,
      "step": 130773
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.3881664276123047,
      "learning_rate": 0.00023677730612369643,
      "loss": 3.002,
      "step": 130774
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.988048791885376,
      "learning_rate": 0.00023677330743467867,
      "loss": 3.1002,
      "step": 130775
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.11452317237854,
      "learning_rate": 0.00023676930875741594,
      "loss": 3.0912,
      "step": 130776
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.457390546798706,
      "learning_rate": 0.00023676531009190907,
      "loss": 2.9053,
      "step": 130777
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.531869888305664,
      "learning_rate": 0.00023676131143815873,
      "loss": 3.0607,
      "step": 130778
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.2607421875,
      "learning_rate": 0.00023675731279616555,
      "loss": 3.1232,
      "step": 130779
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.019076347351074,
      "learning_rate": 0.00023675331416593041,
      "loss": 3.0588,
      "step": 130780
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1164894104003906,
      "learning_rate": 0.00023674931554745402,
      "loss": 3.3391,
      "step": 130781
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2595105171203613,
      "learning_rate": 0.0002367453169407371,
      "loss": 2.7259,
      "step": 130782
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.575267791748047,
      "learning_rate": 0.00023674131834578046,
      "loss": 3.1423,
      "step": 130783
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.2622146606445312,
      "learning_rate": 0.00023673731976258484,
      "loss": 2.8652,
      "step": 130784
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.886568546295166,
      "learning_rate": 0.00023673332119115097,
      "loss": 3.0229,
      "step": 130785
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9752650260925293,
      "learning_rate": 0.00023672932263147948,
      "loss": 2.9858,
      "step": 130786
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0730855464935303,
      "learning_rate": 0.00023672532408357123,
      "loss": 3.277,
      "step": 130787
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.9274981021881104,
      "learning_rate": 0.00023672132554742692,
      "loss": 2.9903,
      "step": 130788
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.655261754989624,
      "learning_rate": 0.00023671732702304732,
      "loss": 2.8601,
      "step": 130789
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5966644287109375,
      "learning_rate": 0.00023671332851043323,
      "loss": 3.1818,
      "step": 130790
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8103210926055908,
      "learning_rate": 0.00023670933000958524,
      "loss": 2.9296,
      "step": 130791
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0294349193573,
      "learning_rate": 0.00023670533152050417,
      "loss": 2.8681,
      "step": 130792
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.855103015899658,
      "learning_rate": 0.0002367013330431909,
      "loss": 2.7983,
      "step": 130793
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.182075023651123,
      "learning_rate": 0.00023669733457764593,
      "loss": 2.901,
      "step": 130794
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.125437021255493,
      "learning_rate": 0.00023669333612387012,
      "loss": 2.649,
      "step": 130795
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6604714393615723,
      "learning_rate": 0.0002366893376818643,
      "loss": 2.8879,
      "step": 130796
    },
    {
      "epoch": 1.7,
      "grad_norm": 4.1552581787109375,
      "learning_rate": 0.00023668533925162906,
      "loss": 3.1145,
      "step": 130797
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.935884475708008,
      "learning_rate": 0.0002366813408331652,
      "loss": 3.0314,
      "step": 130798
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3475522994995117,
      "learning_rate": 0.00023667734242647349,
      "loss": 2.8021,
      "step": 130799
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.209639549255371,
      "learning_rate": 0.0002366733440315547,
      "loss": 3.0294,
      "step": 130800
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.8841066360473633,
      "learning_rate": 0.00023666934564840948,
      "loss": 3.0644,
      "step": 130801
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.602074146270752,
      "learning_rate": 0.0002366653472770387,
      "loss": 3.0038,
      "step": 130802
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.06703519821167,
      "learning_rate": 0.00023666134891744294,
      "loss": 3.174,
      "step": 130803
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4129369258880615,
      "learning_rate": 0.00023665735056962303,
      "loss": 3.0417,
      "step": 130804
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.552125930786133,
      "learning_rate": 0.00023665335223357976,
      "loss": 3.0852,
      "step": 130805
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.268697500228882,
      "learning_rate": 0.00023664935390931376,
      "loss": 3.0588,
      "step": 130806
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7801342010498047,
      "learning_rate": 0.00023664535559682595,
      "loss": 3.1238,
      "step": 130807
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5820460319519043,
      "learning_rate": 0.00023664135729611695,
      "loss": 3.0008,
      "step": 130808
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.040839195251465,
      "learning_rate": 0.00023663735900718747,
      "loss": 2.7358,
      "step": 130809
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.279137134552002,
      "learning_rate": 0.0002366333607300383,
      "loss": 2.9122,
      "step": 130810
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1238741874694824,
      "learning_rate": 0.00023662936246467012,
      "loss": 2.8358,
      "step": 130811
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.8364949226379395,
      "learning_rate": 0.0002366253642110838,
      "loss": 3.1664,
      "step": 130812
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5406343936920166,
      "learning_rate": 0.00023662136596928,
      "loss": 2.6284,
      "step": 130813
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.28391170501709,
      "learning_rate": 0.00023661736773925964,
      "loss": 2.9778,
      "step": 130814
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2340121269226074,
      "learning_rate": 0.00023661336952102317,
      "loss": 2.792,
      "step": 130815
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.6245043277740479,
      "learning_rate": 0.00023660937131457144,
      "loss": 2.8072,
      "step": 130816
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.8826334476470947,
      "learning_rate": 0.00023660537311990528,
      "loss": 2.9692,
      "step": 130817
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.115013837814331,
      "learning_rate": 0.00023660137493702532,
      "loss": 3.0564,
      "step": 130818
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1124441623687744,
      "learning_rate": 0.0002365973767659324,
      "loss": 3.1363,
      "step": 130819
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1566643714904785,
      "learning_rate": 0.00023659337860662733,
      "loss": 2.9464,
      "step": 130820
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1124651432037354,
      "learning_rate": 0.00023658938045911063,
      "loss": 2.8279,
      "step": 130821
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.268763303756714,
      "learning_rate": 0.00023658538232338314,
      "loss": 3.2177,
      "step": 130822
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3906476497650146,
      "learning_rate": 0.00023658138419944567,
      "loss": 2.9588,
      "step": 130823
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3181755542755127,
      "learning_rate": 0.00023657738608729887,
      "loss": 2.9808,
      "step": 130824
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.487623453140259,
      "learning_rate": 0.00023657338798694356,
      "loss": 2.8645,
      "step": 130825
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.5393497943878174,
      "learning_rate": 0.00023656938989838057,
      "loss": 3.2234,
      "step": 130826
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0519790649414062,
      "learning_rate": 0.00023656539182161042,
      "loss": 2.7015,
      "step": 130827
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6032660007476807,
      "learning_rate": 0.00023656139375663395,
      "loss": 2.9,
      "step": 130828
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1741585731506348,
      "learning_rate": 0.0002365573957034519,
      "loss": 2.9153,
      "step": 130829
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1853933334350586,
      "learning_rate": 0.00023655339766206506,
      "loss": 3.2796,
      "step": 130830
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.816046714782715,
      "learning_rate": 0.0002365493996324741,
      "loss": 3.0517,
      "step": 130831
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.093566656112671,
      "learning_rate": 0.00023654540161467994,
      "loss": 3.0902,
      "step": 130832
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4922261238098145,
      "learning_rate": 0.0002365414036086831,
      "loss": 2.9033,
      "step": 130833
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0696325302124023,
      "learning_rate": 0.0002365374056144844,
      "loss": 2.9954,
      "step": 130834
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1692869663238525,
      "learning_rate": 0.00023653340763208456,
      "loss": 2.9265,
      "step": 130835
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.330235719680786,
      "learning_rate": 0.00023652940966148437,
      "loss": 3.3057,
      "step": 130836
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.400161027908325,
      "learning_rate": 0.00023652541170268455,
      "loss": 3.1502,
      "step": 130837
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.7819900512695312,
      "learning_rate": 0.00023652141375568602,
      "loss": 3.0226,
      "step": 130838
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.02982497215271,
      "learning_rate": 0.0002365174158204892,
      "loss": 3.1145,
      "step": 130839
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7891955375671387,
      "learning_rate": 0.00023651341789709498,
      "loss": 3.0827,
      "step": 130840
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.187786102294922,
      "learning_rate": 0.00023650941998550415,
      "loss": 3.0522,
      "step": 130841
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.073180675506592,
      "learning_rate": 0.0002365054220857174,
      "loss": 2.9505,
      "step": 130842
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6042728424072266,
      "learning_rate": 0.0002365014241977355,
      "loss": 2.8601,
      "step": 130843
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.163933038711548,
      "learning_rate": 0.00023649742632155928,
      "loss": 3.0456,
      "step": 130844
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.094297170639038,
      "learning_rate": 0.0002364934284571893,
      "loss": 2.9944,
      "step": 130845
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.4029576778411865,
      "learning_rate": 0.00023648943060462636,
      "loss": 3.0621,
      "step": 130846
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.829345464706421,
      "learning_rate": 0.00023648543276387122,
      "loss": 2.9514,
      "step": 130847
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0756638050079346,
      "learning_rate": 0.00023648143493492467,
      "loss": 3.1414,
      "step": 130848
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.1689605712890625,
      "learning_rate": 0.00023647743711778743,
      "loss": 2.7792,
      "step": 130849
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1445205211639404,
      "learning_rate": 0.0002364734393124602,
      "loss": 2.9367,
      "step": 130850
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.084970235824585,
      "learning_rate": 0.0002364694415189439,
      "loss": 2.8809,
      "step": 130851
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.112668752670288,
      "learning_rate": 0.000236465443737239,
      "loss": 2.7527,
      "step": 130852
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0342531204223633,
      "learning_rate": 0.00023646144596734637,
      "loss": 2.9015,
      "step": 130853
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.01450252532959,
      "learning_rate": 0.0002364574482092668,
      "loss": 3.0807,
      "step": 130854
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9152076244354248,
      "learning_rate": 0.00023645345046300093,
      "loss": 3.0338,
      "step": 130855
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.164013624191284,
      "learning_rate": 0.0002364494527285496,
      "loss": 2.9769,
      "step": 130856
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3284289836883545,
      "learning_rate": 0.00023644545500591362,
      "loss": 3.1646,
      "step": 130857
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.225543975830078,
      "learning_rate": 0.00023644145729509352,
      "loss": 3.0681,
      "step": 130858
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.375676393508911,
      "learning_rate": 0.00023643745959609012,
      "loss": 2.8586,
      "step": 130859
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0343353748321533,
      "learning_rate": 0.00023643346190890422,
      "loss": 3.1973,
      "step": 130860
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9702881574630737,
      "learning_rate": 0.00023642946423353654,
      "loss": 3.095,
      "step": 130861
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.8777966499328613,
      "learning_rate": 0.00023642546656998782,
      "loss": 2.7211,
      "step": 130862
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.632899045944214,
      "learning_rate": 0.00023642146891825894,
      "loss": 2.8681,
      "step": 130863
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1298484802246094,
      "learning_rate": 0.00023641747127835038,
      "loss": 3.165,
      "step": 130864
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.055680751800537,
      "learning_rate": 0.00023641347365026301,
      "loss": 2.9547,
      "step": 130865
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.355074167251587,
      "learning_rate": 0.00023640947603399757,
      "loss": 2.917,
      "step": 130866
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2529687881469727,
      "learning_rate": 0.00023640547842955484,
      "loss": 3.1648,
      "step": 130867
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9427088499069214,
      "learning_rate": 0.00023640148083693547,
      "loss": 2.9867,
      "step": 130868
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.986551284790039,
      "learning_rate": 0.00023639748325614038,
      "loss": 2.845,
      "step": 130869
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.043867588043213,
      "learning_rate": 0.0002363934856871702,
      "loss": 2.6355,
      "step": 130870
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6428489685058594,
      "learning_rate": 0.0002363894881300256,
      "loss": 2.8845,
      "step": 130871
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7856979370117188,
      "learning_rate": 0.00023638549058470736,
      "loss": 2.7536,
      "step": 130872
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.1271417140960693,
      "learning_rate": 0.00023638149305121628,
      "loss": 3.1485,
      "step": 130873
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7745834589004517,
      "learning_rate": 0.0002363774955295531,
      "loss": 2.8262,
      "step": 130874
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1191258430480957,
      "learning_rate": 0.00023637349801971857,
      "loss": 2.6199,
      "step": 130875
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.7979657649993896,
      "learning_rate": 0.0002363695005217134,
      "loss": 2.8987,
      "step": 130876
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.8971171379089355,
      "learning_rate": 0.00023636550303553835,
      "loss": 2.6768,
      "step": 130877
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3658266067504883,
      "learning_rate": 0.0002363615055611941,
      "loss": 2.7712,
      "step": 130878
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3442347049713135,
      "learning_rate": 0.00023635750809868146,
      "loss": 3.163,
      "step": 130879
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3723487854003906,
      "learning_rate": 0.00023635351064800117,
      "loss": 2.8563,
      "step": 130880
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3239431381225586,
      "learning_rate": 0.00023634951320915403,
      "loss": 3.3501,
      "step": 130881
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9675381183624268,
      "learning_rate": 0.00023634551578214063,
      "loss": 3.181,
      "step": 130882
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8372607231140137,
      "learning_rate": 0.00023634151836696181,
      "loss": 2.9886,
      "step": 130883
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.2561233043670654,
      "learning_rate": 0.00023633752096361828,
      "loss": 2.8559,
      "step": 130884
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0959229469299316,
      "learning_rate": 0.0002363335235721109,
      "loss": 2.7894,
      "step": 130885
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9365860223770142,
      "learning_rate": 0.00023632952619244028,
      "loss": 2.8141,
      "step": 130886
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5047154426574707,
      "learning_rate": 0.00023632552882460724,
      "loss": 3.0351,
      "step": 130887
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1374921798706055,
      "learning_rate": 0.0002363215314686124,
      "loss": 3.0752,
      "step": 130888
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.198862314224243,
      "learning_rate": 0.0002363175341244566,
      "loss": 2.9148,
      "step": 130889
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.9958853721618652,
      "learning_rate": 0.0002363135367921406,
      "loss": 3.0224,
      "step": 130890
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3320226669311523,
      "learning_rate": 0.0002363095394716651,
      "loss": 3.1302,
      "step": 130891
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.8749539852142334,
      "learning_rate": 0.0002363055421630309,
      "loss": 3.004,
      "step": 130892
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5609066486358643,
      "learning_rate": 0.00023630154486623873,
      "loss": 3.1223,
      "step": 130893
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1269419193267822,
      "learning_rate": 0.00023629754758128924,
      "loss": 2.8954,
      "step": 130894
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.7519214153289795,
      "learning_rate": 0.00023629355030818324,
      "loss": 3.1135,
      "step": 130895
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3223702907562256,
      "learning_rate": 0.00023628955304692145,
      "loss": 2.9018,
      "step": 130896
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8907246589660645,
      "learning_rate": 0.00023628555579750465,
      "loss": 3.2465,
      "step": 130897
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9159809350967407,
      "learning_rate": 0.00023628155855993357,
      "loss": 2.8993,
      "step": 130898
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.071967124938965,
      "learning_rate": 0.0002362775613342091,
      "loss": 2.8489,
      "step": 130899
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.329840660095215,
      "learning_rate": 0.00023627356412033167,
      "loss": 3.0175,
      "step": 130900
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.147249460220337,
      "learning_rate": 0.0002362695669183022,
      "loss": 2.8908,
      "step": 130901
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.083263874053955,
      "learning_rate": 0.0002362655697281214,
      "loss": 3.0805,
      "step": 130902
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1490437984466553,
      "learning_rate": 0.00023626157254979006,
      "loss": 2.8086,
      "step": 130903
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0061838626861572,
      "learning_rate": 0.0002362575753833089,
      "loss": 3.1471,
      "step": 130904
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3134560585021973,
      "learning_rate": 0.00023625357822867877,
      "loss": 2.922,
      "step": 130905
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9554411172866821,
      "learning_rate": 0.0002362495810859002,
      "loss": 2.9239,
      "step": 130906
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0699799060821533,
      "learning_rate": 0.00023624558395497403,
      "loss": 2.818,
      "step": 130907
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1982297897338867,
      "learning_rate": 0.00023624158683590096,
      "loss": 3.2139,
      "step": 130908
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.7397513389587402,
      "learning_rate": 0.00023623758972868183,
      "loss": 3.0209,
      "step": 130909
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8531490564346313,
      "learning_rate": 0.0002362335926333173,
      "loss": 3.1342,
      "step": 130910
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2968642711639404,
      "learning_rate": 0.00023622959554980833,
      "loss": 2.864,
      "step": 130911
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6244399547576904,
      "learning_rate": 0.00023622559847815533,
      "loss": 2.7368,
      "step": 130912
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0771491527557373,
      "learning_rate": 0.0002362216014183592,
      "loss": 3.098,
      "step": 130913
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.977473258972168,
      "learning_rate": 0.00023621760437042067,
      "loss": 2.9059,
      "step": 130914
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9747172594070435,
      "learning_rate": 0.0002362136073343405,
      "loss": 3.0054,
      "step": 130915
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0641047954559326,
      "learning_rate": 0.00023620961031011942,
      "loss": 2.84,
      "step": 130916
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1160731315612793,
      "learning_rate": 0.00023620561329775817,
      "loss": 2.7634,
      "step": 130917
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0808980464935303,
      "learning_rate": 0.00023620161629725762,
      "loss": 2.8294,
      "step": 130918
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3137409687042236,
      "learning_rate": 0.00023619761930861828,
      "loss": 3.0912,
      "step": 130919
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2607481479644775,
      "learning_rate": 0.00023619362233184103,
      "loss": 3.0815,
      "step": 130920
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.056300640106201,
      "learning_rate": 0.00023618962536692655,
      "loss": 3.0424,
      "step": 130921
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2885637283325195,
      "learning_rate": 0.00023618562841387565,
      "loss": 2.9859,
      "step": 130922
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8551716804504395,
      "learning_rate": 0.00023618163147268902,
      "loss": 2.8689,
      "step": 130923
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0051229000091553,
      "learning_rate": 0.00023617763454336757,
      "loss": 3.1753,
      "step": 130924
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.8089243173599243,
      "learning_rate": 0.00023617363762591176,
      "loss": 2.9514,
      "step": 130925
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.7593059539794922,
      "learning_rate": 0.0002361696407203225,
      "loss": 3.0653,
      "step": 130926
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0409228801727295,
      "learning_rate": 0.00023616564382660053,
      "loss": 2.8564,
      "step": 130927
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.6673853397369385,
      "learning_rate": 0.00023616164694474653,
      "loss": 2.9573,
      "step": 130928
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.3768696784973145,
      "learning_rate": 0.0002361576500747613,
      "loss": 3.0073,
      "step": 130929
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.729339122772217,
      "learning_rate": 0.00023615365321664566,
      "loss": 2.9544,
      "step": 130930
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2324135303497314,
      "learning_rate": 0.00023614965637040018,
      "loss": 2.9585,
      "step": 130931
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0225820541381836,
      "learning_rate": 0.00023614565953602567,
      "loss": 3.0501,
      "step": 130932
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5229625701904297,
      "learning_rate": 0.00023614166271352288,
      "loss": 2.7859,
      "step": 130933
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9983665943145752,
      "learning_rate": 0.0002361376659028926,
      "loss": 3.3236,
      "step": 130934
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9073829650878906,
      "learning_rate": 0.0002361336691041355,
      "loss": 2.8915,
      "step": 130935
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.345658302307129,
      "learning_rate": 0.00023612967231725248,
      "loss": 3.0834,
      "step": 130936
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2859411239624023,
      "learning_rate": 0.00023612567554224403,
      "loss": 3.1118,
      "step": 130937
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.0876808166503906,
      "learning_rate": 0.00023612167877911104,
      "loss": 2.6224,
      "step": 130938
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.5261378288269043,
      "learning_rate": 0.00023611768202785424,
      "loss": 3.0148,
      "step": 130939
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.9461677074432373,
      "learning_rate": 0.00023611368528847433,
      "loss": 2.9143,
      "step": 130940
    },
    {
      "epoch": 1.7,
      "grad_norm": 1.768739104270935,
      "learning_rate": 0.00023610968856097213,
      "loss": 3.02,
      "step": 130941
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.2379026412963867,
      "learning_rate": 0.00023610569184534847,
      "loss": 3.0484,
      "step": 130942
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.1377787590026855,
      "learning_rate": 0.00023610169514160382,
      "loss": 2.9124,
      "step": 130943
    },
    {
      "epoch": 1.7,
      "grad_norm": 2.349698781967163,
      "learning_rate": 0.00023609769844973907,
      "loss": 3.0358,
      "step": 130944
    },
    {
      "epoch": 1.7,
      "grad_norm": 3.2364745140075684,
      "learning_rate": 0.00023609370176975498,
      "loss": 3.0561,
      "step": 130945
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2551417350769043,
      "learning_rate": 0.00023608970510165229,
      "loss": 2.6727,
      "step": 130946
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.498016357421875,
      "learning_rate": 0.0002360857084454317,
      "loss": 3.1785,
      "step": 130947
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.166827440261841,
      "learning_rate": 0.00023608171180109413,
      "loss": 2.9115,
      "step": 130948
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2476110458374023,
      "learning_rate": 0.00023607771516864006,
      "loss": 3.011,
      "step": 130949
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2416489124298096,
      "learning_rate": 0.0002360737185480703,
      "loss": 2.8268,
      "step": 130950
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.309818983078003,
      "learning_rate": 0.00023606972193938568,
      "loss": 2.6606,
      "step": 130951
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4368443489074707,
      "learning_rate": 0.00023606572534258692,
      "loss": 3.0368,
      "step": 130952
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.5668094158172607,
      "learning_rate": 0.0002360617287576747,
      "loss": 2.9187,
      "step": 130953
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1564443111419678,
      "learning_rate": 0.00023605773218464993,
      "loss": 2.7945,
      "step": 130954
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2681007385253906,
      "learning_rate": 0.00023605373562351318,
      "loss": 3.0567,
      "step": 130955
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.108008623123169,
      "learning_rate": 0.0002360497390742652,
      "loss": 2.9121,
      "step": 130956
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1355488300323486,
      "learning_rate": 0.00023604574253690682,
      "loss": 2.9148,
      "step": 130957
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.8946807384490967,
      "learning_rate": 0.00023604174601143873,
      "loss": 2.9629,
      "step": 130958
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.927414894104004,
      "learning_rate": 0.00023603774949786165,
      "loss": 3.0556,
      "step": 130959
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.870298385620117,
      "learning_rate": 0.00023603375299617647,
      "loss": 2.8498,
      "step": 130960
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.07405161857605,
      "learning_rate": 0.0002360297565063837,
      "loss": 3.1378,
      "step": 130961
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4815866947174072,
      "learning_rate": 0.00023602576002848433,
      "loss": 2.9279,
      "step": 130962
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.755786418914795,
      "learning_rate": 0.0002360217635624789,
      "loss": 2.924,
      "step": 130963
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.984025478363037,
      "learning_rate": 0.00023601776710836821,
      "loss": 3.0718,
      "step": 130964
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9869530200958252,
      "learning_rate": 0.00023601377066615302,
      "loss": 3.1459,
      "step": 130965
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.932753562927246,
      "learning_rate": 0.00023600977423583416,
      "loss": 2.8121,
      "step": 130966
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.370708703994751,
      "learning_rate": 0.00023600577781741222,
      "loss": 2.9485,
      "step": 130967
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8744059801101685,
      "learning_rate": 0.00023600178141088804,
      "loss": 2.9011,
      "step": 130968
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3095340728759766,
      "learning_rate": 0.0002359977850162624,
      "loss": 3.1101,
      "step": 130969
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.6061456203460693,
      "learning_rate": 0.00023599378863353585,
      "loss": 2.8007,
      "step": 130970
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.360274314880371,
      "learning_rate": 0.00023598979226270933,
      "loss": 2.9182,
      "step": 130971
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7518523931503296,
      "learning_rate": 0.00023598579590378355,
      "loss": 3.0821,
      "step": 130972
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9376189708709717,
      "learning_rate": 0.00023598179955675917,
      "loss": 2.8791,
      "step": 130973
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2251884937286377,
      "learning_rate": 0.000235977803221637,
      "loss": 2.8368,
      "step": 130974
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3119096755981445,
      "learning_rate": 0.0002359738068984177,
      "loss": 3.039,
      "step": 130975
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.5982415676116943,
      "learning_rate": 0.0002359698105871022,
      "loss": 3.0423,
      "step": 130976
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.8294286727905273,
      "learning_rate": 0.00023596581428769105,
      "loss": 2.8771,
      "step": 130977
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3692142963409424,
      "learning_rate": 0.0002359618180001851,
      "loss": 3.1287,
      "step": 130978
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.7608916759490967,
      "learning_rate": 0.000235957821724585,
      "loss": 2.8948,
      "step": 130979
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4163947105407715,
      "learning_rate": 0.00023595382546089157,
      "loss": 3.1506,
      "step": 130980
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2400858402252197,
      "learning_rate": 0.00023594982920910554,
      "loss": 2.9431,
      "step": 130981
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2004895210266113,
      "learning_rate": 0.0002359458329692276,
      "loss": 2.9639,
      "step": 130982
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3750526905059814,
      "learning_rate": 0.0002359418367412586,
      "loss": 2.9722,
      "step": 130983
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.501918315887451,
      "learning_rate": 0.00023593784052519922,
      "loss": 2.9142,
      "step": 130984
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1512184143066406,
      "learning_rate": 0.00023593384432105027,
      "loss": 2.9542,
      "step": 130985
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.841097354888916,
      "learning_rate": 0.0002359298481288123,
      "loss": 2.8328,
      "step": 130986
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.11208438873291,
      "learning_rate": 0.00023592585194848623,
      "loss": 3.0321,
      "step": 130987
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0053961277008057,
      "learning_rate": 0.00023592185578007272,
      "loss": 3.2579,
      "step": 130988
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7457362413406372,
      "learning_rate": 0.00023591785962357259,
      "loss": 3.1252,
      "step": 130989
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9429852962493896,
      "learning_rate": 0.0002359138634789865,
      "loss": 3.1512,
      "step": 130990
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.5993711948394775,
      "learning_rate": 0.00023590986734631537,
      "loss": 3.0392,
      "step": 130991
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4201273918151855,
      "learning_rate": 0.00023590587122555967,
      "loss": 3.1052,
      "step": 130992
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2160425186157227,
      "learning_rate": 0.0002359018751167203,
      "loss": 3.0837,
      "step": 130993
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1349945068359375,
      "learning_rate": 0.00023589787901979797,
      "loss": 3.0184,
      "step": 130994
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.6153273582458496,
      "learning_rate": 0.0002358938829347934,
      "loss": 2.8576,
      "step": 130995
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3311779499053955,
      "learning_rate": 0.00023588988686170745,
      "loss": 3.0241,
      "step": 130996
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.136261463165283,
      "learning_rate": 0.00023588589080054086,
      "loss": 2.8605,
      "step": 130997
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8064548969268799,
      "learning_rate": 0.00023588189475129415,
      "loss": 2.8113,
      "step": 130998
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.173356056213379,
      "learning_rate": 0.0002358778987139682,
      "loss": 2.951,
      "step": 130999
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9988704919815063,
      "learning_rate": 0.00023587390268856379,
      "loss": 2.944,
      "step": 131000
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3262765407562256,
      "learning_rate": 0.00023586990667508163,
      "loss": 2.9526,
      "step": 131001
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.2732656002044678,
      "learning_rate": 0.00023586591067352244,
      "loss": 3.051,
      "step": 131002
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0994319915771484,
      "learning_rate": 0.00023586191468388714,
      "loss": 2.8095,
      "step": 131003
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0701117515563965,
      "learning_rate": 0.0002358579187061762,
      "loss": 2.7562,
      "step": 131004
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.6168551445007324,
      "learning_rate": 0.00023585392274039048,
      "loss": 3.1253,
      "step": 131005
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.402449369430542,
      "learning_rate": 0.0002358499267865307,
      "loss": 2.9854,
      "step": 131006
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7653989791870117,
      "learning_rate": 0.00023584593084459763,
      "loss": 3.1667,
      "step": 131007
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9784622192382812,
      "learning_rate": 0.00023584193491459204,
      "loss": 2.8643,
      "step": 131008
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.973798394203186,
      "learning_rate": 0.00023583793899651477,
      "loss": 3.2689,
      "step": 131009
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3516604900360107,
      "learning_rate": 0.00023583394309036626,
      "loss": 2.8282,
      "step": 131010
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.8240857124328613,
      "learning_rate": 0.0002358299471961475,
      "loss": 2.864,
      "step": 131011
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9483487606048584,
      "learning_rate": 0.00023582595131385915,
      "loss": 3.0192,
      "step": 131012
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.7657909393310547,
      "learning_rate": 0.00023582195544350195,
      "loss": 2.897,
      "step": 131013
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.9617059230804443,
      "learning_rate": 0.00023581795958507664,
      "loss": 2.9514,
      "step": 131014
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3888652324676514,
      "learning_rate": 0.00023581396373858414,
      "loss": 2.8544,
      "step": 131015
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1504061222076416,
      "learning_rate": 0.0002358099679040249,
      "loss": 3.021,
      "step": 131016
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9297229051589966,
      "learning_rate": 0.0002358059720813998,
      "loss": 3.2452,
      "step": 131017
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.502279281616211,
      "learning_rate": 0.0002358019762707096,
      "loss": 2.7638,
      "step": 131018
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4788830280303955,
      "learning_rate": 0.000235797980471955,
      "loss": 3.0098,
      "step": 131019
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.055487632751465,
      "learning_rate": 0.00023579398468513677,
      "loss": 2.8886,
      "step": 131020
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.7614734172821045,
      "learning_rate": 0.0002357899889102558,
      "loss": 2.7642,
      "step": 131021
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.6534488201141357,
      "learning_rate": 0.0002357859931473125,
      "loss": 2.86,
      "step": 131022
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7948671579360962,
      "learning_rate": 0.00023578199739630784,
      "loss": 2.9556,
      "step": 131023
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0861010551452637,
      "learning_rate": 0.00023577800165724253,
      "loss": 2.937,
      "step": 131024
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.863536834716797,
      "learning_rate": 0.00023577400593011727,
      "loss": 3.1753,
      "step": 131025
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.8528590202331543,
      "learning_rate": 0.00023577001021493285,
      "loss": 3.3281,
      "step": 131026
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0248353481292725,
      "learning_rate": 0.00023576601451169015,
      "loss": 3.2774,
      "step": 131027
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.6075572967529297,
      "learning_rate": 0.0002357620188203896,
      "loss": 2.8889,
      "step": 131028
    },
    {
      "epoch": 1.71,
      "grad_norm": 4.6142988204956055,
      "learning_rate": 0.00023575802314103212,
      "loss": 2.7311,
      "step": 131029
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.0134410858154297,
      "learning_rate": 0.0002357540274736184,
      "loss": 3.1474,
      "step": 131030
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4793319702148438,
      "learning_rate": 0.00023575003181814926,
      "loss": 3.0909,
      "step": 131031
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0140798091888428,
      "learning_rate": 0.00023574603617462536,
      "loss": 2.7528,
      "step": 131032
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.450134038925171,
      "learning_rate": 0.00023574204054304767,
      "loss": 2.9708,
      "step": 131033
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.813056468963623,
      "learning_rate": 0.00023573804492341656,
      "loss": 2.8845,
      "step": 131034
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3954451084136963,
      "learning_rate": 0.00023573404931573298,
      "loss": 2.9676,
      "step": 131035
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4187307357788086,
      "learning_rate": 0.00023573005371999768,
      "loss": 2.9592,
      "step": 131036
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.832564353942871,
      "learning_rate": 0.00023572605813621135,
      "loss": 2.9989,
      "step": 131037
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.485445261001587,
      "learning_rate": 0.0002357220625643748,
      "loss": 2.9086,
      "step": 131038
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.465203046798706,
      "learning_rate": 0.0002357180670044888,
      "loss": 3.0778,
      "step": 131039
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.615999937057495,
      "learning_rate": 0.00023571407145655393,
      "loss": 3.0362,
      "step": 131040
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.2016923427581787,
      "learning_rate": 0.00023571007592057102,
      "loss": 2.9559,
      "step": 131041
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.8646435737609863,
      "learning_rate": 0.00023570608039654082,
      "loss": 3.1906,
      "step": 131042
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0064730644226074,
      "learning_rate": 0.00023570208488446408,
      "loss": 3.1816,
      "step": 131043
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.416297674179077,
      "learning_rate": 0.00023569808938434154,
      "loss": 2.9361,
      "step": 131044
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.9688541889190674,
      "learning_rate": 0.000235694093896174,
      "loss": 2.9279,
      "step": 131045
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.8148672580718994,
      "learning_rate": 0.00023569009841996208,
      "loss": 2.8548,
      "step": 131046
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.013421058654785,
      "learning_rate": 0.00023568610295570663,
      "loss": 3.2095,
      "step": 131047
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.9302475452423096,
      "learning_rate": 0.0002356821075034083,
      "loss": 2.8842,
      "step": 131048
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.732856273651123,
      "learning_rate": 0.00023567811206306788,
      "loss": 3.0109,
      "step": 131049
    },
    {
      "epoch": 1.71,
      "grad_norm": 4.156578540802002,
      "learning_rate": 0.0002356741166346861,
      "loss": 2.7447,
      "step": 131050
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.180990219116211,
      "learning_rate": 0.00023567012121826372,
      "loss": 2.7935,
      "step": 131051
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0860230922698975,
      "learning_rate": 0.00023566612581380156,
      "loss": 3.2459,
      "step": 131052
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.7942440509796143,
      "learning_rate": 0.0002356621304213002,
      "loss": 3.0208,
      "step": 131053
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.926827907562256,
      "learning_rate": 0.00023565813504076053,
      "loss": 3.1599,
      "step": 131054
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.681238889694214,
      "learning_rate": 0.00023565413967218318,
      "loss": 2.8254,
      "step": 131055
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.225724935531616,
      "learning_rate": 0.0002356501443155689,
      "loss": 2.9057,
      "step": 131056
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1254494190216064,
      "learning_rate": 0.0002356461489709185,
      "loss": 2.7592,
      "step": 131057
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.42971134185791,
      "learning_rate": 0.0002356421536382328,
      "loss": 2.7892,
      "step": 131058
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.154285430908203,
      "learning_rate": 0.0002356381583175123,
      "loss": 3.0255,
      "step": 131059
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3832271099090576,
      "learning_rate": 0.00023563416300875791,
      "loss": 2.9304,
      "step": 131060
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.6982109546661377,
      "learning_rate": 0.00023563016771197042,
      "loss": 2.838,
      "step": 131061
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4626386165618896,
      "learning_rate": 0.00023562617242715045,
      "loss": 2.9424,
      "step": 131062
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.389183521270752,
      "learning_rate": 0.00023562217715429875,
      "loss": 2.8227,
      "step": 131063
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.721543788909912,
      "learning_rate": 0.0002356181818934162,
      "loss": 2.9928,
      "step": 131064
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1082491874694824,
      "learning_rate": 0.00023561418664450336,
      "loss": 2.9538,
      "step": 131065
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8183348178863525,
      "learning_rate": 0.00023561019140756105,
      "loss": 2.7269,
      "step": 131066
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.3835039138793945,
      "learning_rate": 0.00023560619618259002,
      "loss": 3.0307,
      "step": 131067
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8649595975875854,
      "learning_rate": 0.0002356022009695911,
      "loss": 3.0812,
      "step": 131068
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9445418119430542,
      "learning_rate": 0.0002355982057685649,
      "loss": 3.1438,
      "step": 131069
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9032979011535645,
      "learning_rate": 0.00023559421057951224,
      "loss": 2.971,
      "step": 131070
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9538170099258423,
      "learning_rate": 0.00023559021540243378,
      "loss": 2.9367,
      "step": 131071
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9529565572738647,
      "learning_rate": 0.0002355862202373303,
      "loss": 3.0594,
      "step": 131072
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9377003908157349,
      "learning_rate": 0.00023558222508420257,
      "loss": 3.0762,
      "step": 131073
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4618520736694336,
      "learning_rate": 0.0002355782299430513,
      "loss": 2.8494,
      "step": 131074
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2065205574035645,
      "learning_rate": 0.00023557423481387725,
      "loss": 2.9547,
      "step": 131075
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8969823122024536,
      "learning_rate": 0.0002355702396966813,
      "loss": 2.9223,
      "step": 131076
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.016322374343872,
      "learning_rate": 0.00023556624459146396,
      "loss": 3.201,
      "step": 131077
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.036935329437256,
      "learning_rate": 0.00023556224949822607,
      "loss": 2.8627,
      "step": 131078
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0272369384765625,
      "learning_rate": 0.00023555825441696837,
      "loss": 2.8273,
      "step": 131079
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.970855236053467,
      "learning_rate": 0.00023555425934769156,
      "loss": 2.9004,
      "step": 131080
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.733133316040039,
      "learning_rate": 0.00023555026429039649,
      "loss": 3.0878,
      "step": 131081
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.382812261581421,
      "learning_rate": 0.00023554626924508396,
      "loss": 2.9273,
      "step": 131082
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4924330711364746,
      "learning_rate": 0.00023554227421175442,
      "loss": 3.1025,
      "step": 131083
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8828998804092407,
      "learning_rate": 0.00023553827919040886,
      "loss": 2.7967,
      "step": 131084
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.38782000541687,
      "learning_rate": 0.00023553428418104791,
      "loss": 3.0662,
      "step": 131085
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1910736560821533,
      "learning_rate": 0.00023553028918367236,
      "loss": 2.8041,
      "step": 131086
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2362797260284424,
      "learning_rate": 0.00023552629419828293,
      "loss": 2.8656,
      "step": 131087
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2676901817321777,
      "learning_rate": 0.0002355222992248805,
      "loss": 3.0752,
      "step": 131088
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.695850133895874,
      "learning_rate": 0.0002355183042634656,
      "loss": 2.944,
      "step": 131089
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.868216872215271,
      "learning_rate": 0.00023551430931403902,
      "loss": 2.9174,
      "step": 131090
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.167745590209961,
      "learning_rate": 0.0002355103143766016,
      "loss": 2.8994,
      "step": 131091
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8911457061767578,
      "learning_rate": 0.000235506319451154,
      "loss": 3.1143,
      "step": 131092
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.296539068222046,
      "learning_rate": 0.00023550232453769702,
      "loss": 2.8229,
      "step": 131093
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3431813716888428,
      "learning_rate": 0.00023549832963623145,
      "loss": 3.049,
      "step": 131094
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.03267502784729,
      "learning_rate": 0.0002354943347467579,
      "loss": 2.6713,
      "step": 131095
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.792895793914795,
      "learning_rate": 0.0002354903398692771,
      "loss": 2.8737,
      "step": 131096
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.461840867996216,
      "learning_rate": 0.0002354863450037899,
      "loss": 3.1451,
      "step": 131097
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.212594985961914,
      "learning_rate": 0.000235482350150297,
      "loss": 3.2121,
      "step": 131098
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.6626601219177246,
      "learning_rate": 0.00023547835530879916,
      "loss": 2.9316,
      "step": 131099
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.9731502532958984,
      "learning_rate": 0.00023547436047929724,
      "loss": 2.9015,
      "step": 131100
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.5622313022613525,
      "learning_rate": 0.0002354703656617917,
      "loss": 2.7086,
      "step": 131101
    },
    {
      "epoch": 1.71,
      "grad_norm": 4.624825477600098,
      "learning_rate": 0.00023546637085628344,
      "loss": 2.7678,
      "step": 131102
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9882110357284546,
      "learning_rate": 0.00023546237606277324,
      "loss": 3.0018,
      "step": 131103
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0196986198425293,
      "learning_rate": 0.00023545838128126177,
      "loss": 2.9186,
      "step": 131104
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3728787899017334,
      "learning_rate": 0.00023545438651174978,
      "loss": 3.1673,
      "step": 131105
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8102587461471558,
      "learning_rate": 0.0002354503917542382,
      "loss": 2.861,
      "step": 131106
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.430450677871704,
      "learning_rate": 0.00023544639700872748,
      "loss": 2.9794,
      "step": 131107
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.6153957843780518,
      "learning_rate": 0.00023544240227521852,
      "loss": 3.0475,
      "step": 131108
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3759961128234863,
      "learning_rate": 0.000235438407553712,
      "loss": 3.2514,
      "step": 131109
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.1818273067474365,
      "learning_rate": 0.0002354344128442087,
      "loss": 3.0604,
      "step": 131110
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.6351773738861084,
      "learning_rate": 0.00023543041814670938,
      "loss": 3.2014,
      "step": 131111
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.471000909805298,
      "learning_rate": 0.00023542642346121488,
      "loss": 3.2625,
      "step": 131112
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.5785231590270996,
      "learning_rate": 0.0002354224287877257,
      "loss": 3.1668,
      "step": 131113
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1469414234161377,
      "learning_rate": 0.0002354184341262427,
      "loss": 3.0574,
      "step": 131114
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.068794012069702,
      "learning_rate": 0.00023541443947676664,
      "loss": 3.1459,
      "step": 131115
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.8904571533203125,
      "learning_rate": 0.00023541044483929826,
      "loss": 3.0068,
      "step": 131116
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8994630575180054,
      "learning_rate": 0.00023540645021383829,
      "loss": 2.9683,
      "step": 131117
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.091078758239746,
      "learning_rate": 0.0002354024556003875,
      "loss": 2.9401,
      "step": 131118
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0118179321289062,
      "learning_rate": 0.0002353984609989467,
      "loss": 2.7027,
      "step": 131119
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2411129474639893,
      "learning_rate": 0.00023539446640951647,
      "loss": 2.6977,
      "step": 131120
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9129658937454224,
      "learning_rate": 0.0002353904718320976,
      "loss": 2.9769,
      "step": 131121
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3993208408355713,
      "learning_rate": 0.00023538647726669085,
      "loss": 3.0163,
      "step": 131122
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.313772201538086,
      "learning_rate": 0.00023538248271329698,
      "loss": 3.0164,
      "step": 131123
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8822274208068848,
      "learning_rate": 0.00023537848817191674,
      "loss": 3.0667,
      "step": 131124
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.828540563583374,
      "learning_rate": 0.00023537449364255096,
      "loss": 3.0008,
      "step": 131125
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.5694284439086914,
      "learning_rate": 0.0002353704991252002,
      "loss": 2.8686,
      "step": 131126
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.1206822395324707,
      "learning_rate": 0.00023536650461986524,
      "loss": 2.8944,
      "step": 131127
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3991804122924805,
      "learning_rate": 0.00023536251012654688,
      "loss": 3.2335,
      "step": 131128
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.360278367996216,
      "learning_rate": 0.00023535851564524587,
      "loss": 2.8816,
      "step": 131129
    },
    {
      "epoch": 1.71,
      "grad_norm": 4.19070291519165,
      "learning_rate": 0.0002353545211759629,
      "loss": 2.9324,
      "step": 131130
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.9302611351013184,
      "learning_rate": 0.0002353505267186989,
      "loss": 2.8219,
      "step": 131131
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0333011150360107,
      "learning_rate": 0.00023534653227345434,
      "loss": 2.9237,
      "step": 131132
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.86049485206604,
      "learning_rate": 0.00023534253784023005,
      "loss": 3.1167,
      "step": 131133
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.694758653640747,
      "learning_rate": 0.0002353385434190268,
      "loss": 3.2454,
      "step": 131134
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.8959155082702637,
      "learning_rate": 0.0002353345490098453,
      "loss": 3.2778,
      "step": 131135
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.7477574348449707,
      "learning_rate": 0.0002353305546126864,
      "loss": 2.8982,
      "step": 131136
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.0688517093658447,
      "learning_rate": 0.00023532656022755082,
      "loss": 3.0715,
      "step": 131137
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.8871915340423584,
      "learning_rate": 0.00023532256585443917,
      "loss": 3.015,
      "step": 131138
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1522648334503174,
      "learning_rate": 0.00023531857149335233,
      "loss": 3.0307,
      "step": 131139
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1505510807037354,
      "learning_rate": 0.00023531457714429097,
      "loss": 3.0219,
      "step": 131140
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7168139219284058,
      "learning_rate": 0.00023531058280725582,
      "loss": 3.279,
      "step": 131141
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2933599948883057,
      "learning_rate": 0.00023530658848224765,
      "loss": 2.9402,
      "step": 131142
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0283312797546387,
      "learning_rate": 0.00023530259416926729,
      "loss": 2.7568,
      "step": 131143
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9906216859817505,
      "learning_rate": 0.0002352985998683153,
      "loss": 2.906,
      "step": 131144
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9898616075515747,
      "learning_rate": 0.00023529460557939258,
      "loss": 2.9812,
      "step": 131145
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7743377685546875,
      "learning_rate": 0.0002352906113024998,
      "loss": 3.1017,
      "step": 131146
    },
    {
      "epoch": 1.71,
      "grad_norm": 4.635537624359131,
      "learning_rate": 0.0002352866170376377,
      "loss": 2.9782,
      "step": 131147
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2439374923706055,
      "learning_rate": 0.00023528262278480706,
      "loss": 2.9696,
      "step": 131148
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0385096073150635,
      "learning_rate": 0.0002352786285440086,
      "loss": 3.0751,
      "step": 131149
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.8584935665130615,
      "learning_rate": 0.00023527463431524307,
      "loss": 3.0137,
      "step": 131150
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.601069450378418,
      "learning_rate": 0.00023527064009851118,
      "loss": 3.089,
      "step": 131151
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2669079303741455,
      "learning_rate": 0.0002352666458938137,
      "loss": 2.7501,
      "step": 131152
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0063984394073486,
      "learning_rate": 0.00023526265170115143,
      "loss": 2.7101,
      "step": 131153
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3359460830688477,
      "learning_rate": 0.000235258657520525,
      "loss": 2.9835,
      "step": 131154
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8778630495071411,
      "learning_rate": 0.00023525466335193527,
      "loss": 3.1597,
      "step": 131155
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4839189052581787,
      "learning_rate": 0.00023525066919538284,
      "loss": 2.7368,
      "step": 131156
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8494266271591187,
      "learning_rate": 0.00023524667505086857,
      "loss": 2.7853,
      "step": 131157
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.7559597492218018,
      "learning_rate": 0.0002352426809183931,
      "loss": 2.8458,
      "step": 131158
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3138551712036133,
      "learning_rate": 0.00023523868679795726,
      "loss": 3.191,
      "step": 131159
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0207929611206055,
      "learning_rate": 0.0002352346926895619,
      "loss": 2.7561,
      "step": 131160
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.010504722595215,
      "learning_rate": 0.00023523069859320757,
      "loss": 2.9071,
      "step": 131161
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.9236199855804443,
      "learning_rate": 0.00023522670450889504,
      "loss": 2.9976,
      "step": 131162
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.0028812885284424,
      "learning_rate": 0.00023522271043662505,
      "loss": 2.9543,
      "step": 131163
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9904123544692993,
      "learning_rate": 0.00023521871637639845,
      "loss": 3.0871,
      "step": 131164
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.052389621734619,
      "learning_rate": 0.00023521472232821585,
      "loss": 3.1927,
      "step": 131165
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.1820831298828125,
      "learning_rate": 0.00023521072829207806,
      "loss": 3.0717,
      "step": 131166
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.202770709991455,
      "learning_rate": 0.00023520673426798597,
      "loss": 2.7773,
      "step": 131167
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9278035163879395,
      "learning_rate": 0.00023520274025594004,
      "loss": 3.1727,
      "step": 131168
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.6850922107696533,
      "learning_rate": 0.00023519874625594115,
      "loss": 2.9813,
      "step": 131169
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.150141477584839,
      "learning_rate": 0.00023519475226799002,
      "loss": 2.9484,
      "step": 131170
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2159717082977295,
      "learning_rate": 0.0002351907582920874,
      "loss": 2.8323,
      "step": 131171
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.453897476196289,
      "learning_rate": 0.00023518676432823408,
      "loss": 3.0136,
      "step": 131172
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.4483377933502197,
      "learning_rate": 0.00023518277037643085,
      "loss": 2.8631,
      "step": 131173
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1521871089935303,
      "learning_rate": 0.00023517877643667827,
      "loss": 3.1992,
      "step": 131174
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.338834047317505,
      "learning_rate": 0.00023517478250897718,
      "loss": 2.8339,
      "step": 131175
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.6504831314086914,
      "learning_rate": 0.0002351707885933283,
      "loss": 2.8592,
      "step": 131176
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2927358150482178,
      "learning_rate": 0.00023516679468973237,
      "loss": 3.1643,
      "step": 131177
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1784818172454834,
      "learning_rate": 0.00023516280079819023,
      "loss": 3.0461,
      "step": 131178
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.517219305038452,
      "learning_rate": 0.00023515880691870263,
      "loss": 3.0405,
      "step": 131179
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1391801834106445,
      "learning_rate": 0.00023515481305127014,
      "loss": 3.2473,
      "step": 131180
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.944309711456299,
      "learning_rate": 0.00023515081919589354,
      "loss": 2.9357,
      "step": 131181
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3849892616271973,
      "learning_rate": 0.00023514682535257365,
      "loss": 3.0198,
      "step": 131182
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.572904348373413,
      "learning_rate": 0.00023514283152131118,
      "loss": 2.9845,
      "step": 131183
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.519132614135742,
      "learning_rate": 0.0002351388377021069,
      "loss": 3.0749,
      "step": 131184
    },
    {
      "epoch": 1.71,
      "grad_norm": 4.048482894897461,
      "learning_rate": 0.00023513484389496152,
      "loss": 2.814,
      "step": 131185
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3141095638275146,
      "learning_rate": 0.00023513085009987594,
      "loss": 3.0152,
      "step": 131186
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8831149339675903,
      "learning_rate": 0.00023512685631685062,
      "loss": 2.8558,
      "step": 131187
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.5376904010772705,
      "learning_rate": 0.00023512286254588646,
      "loss": 2.9079,
      "step": 131188
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.659881830215454,
      "learning_rate": 0.00023511886878698417,
      "loss": 2.8039,
      "step": 131189
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.7817649841308594,
      "learning_rate": 0.0002351148750401445,
      "loss": 3.0078,
      "step": 131190
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.163666009902954,
      "learning_rate": 0.00023511088130536822,
      "loss": 2.9277,
      "step": 131191
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.7744638919830322,
      "learning_rate": 0.00023510688758265614,
      "loss": 2.8492,
      "step": 131192
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.803849458694458,
      "learning_rate": 0.00023510289387200883,
      "loss": 2.7167,
      "step": 131193
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.434027671813965,
      "learning_rate": 0.00023509890017342708,
      "loss": 2.9638,
      "step": 131194
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.353220224380493,
      "learning_rate": 0.0002350949064869117,
      "loss": 3.0148,
      "step": 131195
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1486051082611084,
      "learning_rate": 0.00023509091281246342,
      "loss": 2.9486,
      "step": 131196
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0268447399139404,
      "learning_rate": 0.00023508691915008291,
      "loss": 2.9239,
      "step": 131197
    },
    {
      "epoch": 1.71,
      "grad_norm": 4.086313724517822,
      "learning_rate": 0.00023508292549977112,
      "loss": 3.0276,
      "step": 131198
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.74108624458313,
      "learning_rate": 0.00023507893186152854,
      "loss": 3.0678,
      "step": 131199
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9288123846054077,
      "learning_rate": 0.00023507493823535596,
      "loss": 3.1189,
      "step": 131200
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7071022987365723,
      "learning_rate": 0.00023507094462125422,
      "loss": 3.1516,
      "step": 131201
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.71580970287323,
      "learning_rate": 0.00023506695101922399,
      "loss": 2.8304,
      "step": 131202
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.198971748352051,
      "learning_rate": 0.00023506295742926602,
      "loss": 2.8289,
      "step": 131203
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.8188512325286865,
      "learning_rate": 0.00023505896385138123,
      "loss": 2.9024,
      "step": 131204
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8317031860351562,
      "learning_rate": 0.0002350549702855701,
      "loss": 2.8798,
      "step": 131205
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.96165132522583,
      "learning_rate": 0.00023505097673183343,
      "loss": 2.866,
      "step": 131206
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.711039662361145,
      "learning_rate": 0.00023504698319017206,
      "loss": 2.9619,
      "step": 131207
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2566564083099365,
      "learning_rate": 0.00023504298966058664,
      "loss": 2.9995,
      "step": 131208
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.205355167388916,
      "learning_rate": 0.00023503899614307796,
      "loss": 2.8993,
      "step": 131209
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3708765506744385,
      "learning_rate": 0.00023503500263764688,
      "loss": 2.8124,
      "step": 131210
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8973108530044556,
      "learning_rate": 0.0002350310091442939,
      "loss": 3.1127,
      "step": 131211
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.5480539798736572,
      "learning_rate": 0.00023502701566301986,
      "loss": 3.1881,
      "step": 131212
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2302231788635254,
      "learning_rate": 0.00023502302219382558,
      "loss": 3.072,
      "step": 131213
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.007219076156616,
      "learning_rate": 0.0002350190287367117,
      "loss": 2.7384,
      "step": 131214
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.567755699157715,
      "learning_rate": 0.00023501503529167902,
      "loss": 2.797,
      "step": 131215
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.783583164215088,
      "learning_rate": 0.00023501104185872843,
      "loss": 2.9565,
      "step": 131216
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4486498832702637,
      "learning_rate": 0.00023500704843786035,
      "loss": 2.9503,
      "step": 131217
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.677767038345337,
      "learning_rate": 0.00023500305502907568,
      "loss": 2.6563,
      "step": 131218
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.336348295211792,
      "learning_rate": 0.0002349990616323752,
      "loss": 3.1412,
      "step": 131219
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.6727728843688965,
      "learning_rate": 0.00023499506824775963,
      "loss": 3.1445,
      "step": 131220
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0585832595825195,
      "learning_rate": 0.00023499107487522965,
      "loss": 2.8952,
      "step": 131221
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8669509887695312,
      "learning_rate": 0.00023498708151478614,
      "loss": 3.1439,
      "step": 131222
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8467899560928345,
      "learning_rate": 0.0002349830881664298,
      "loss": 2.8755,
      "step": 131223
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1051464080810547,
      "learning_rate": 0.00023497909483016124,
      "loss": 3.2472,
      "step": 131224
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1878111362457275,
      "learning_rate": 0.00023497510150598128,
      "loss": 2.9354,
      "step": 131225
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9810009002685547,
      "learning_rate": 0.00023497110819389067,
      "loss": 2.9095,
      "step": 131226
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0714728832244873,
      "learning_rate": 0.0002349671148938902,
      "loss": 3.1868,
      "step": 131227
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.085660934448242,
      "learning_rate": 0.00023496312160598058,
      "loss": 2.8827,
      "step": 131228
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7624074220657349,
      "learning_rate": 0.0002349591283301625,
      "loss": 2.97,
      "step": 131229
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2739417552948,
      "learning_rate": 0.0002349551350664368,
      "loss": 2.8806,
      "step": 131230
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.185302972793579,
      "learning_rate": 0.00023495114181480416,
      "loss": 2.9109,
      "step": 131231
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9304510354995728,
      "learning_rate": 0.0002349471485752653,
      "loss": 3.1512,
      "step": 131232
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.70854115486145,
      "learning_rate": 0.00023494315534782097,
      "loss": 3.01,
      "step": 131233
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1130285263061523,
      "learning_rate": 0.00023493916213247203,
      "loss": 3.029,
      "step": 131234
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1402502059936523,
      "learning_rate": 0.00023493516892921906,
      "loss": 2.6961,
      "step": 131235
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.338510274887085,
      "learning_rate": 0.00023493117573806287,
      "loss": 2.8625,
      "step": 131236
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.067699670791626,
      "learning_rate": 0.0002349271825590042,
      "loss": 3.1979,
      "step": 131237
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9156064987182617,
      "learning_rate": 0.00023492318939204382,
      "loss": 3.0601,
      "step": 131238
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0049824714660645,
      "learning_rate": 0.00023491919623718242,
      "loss": 2.9012,
      "step": 131239
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.6894594430923462,
      "learning_rate": 0.00023491520309442082,
      "loss": 2.9923,
      "step": 131240
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9632402658462524,
      "learning_rate": 0.00023491120996375968,
      "loss": 3.1537,
      "step": 131241
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.045609712600708,
      "learning_rate": 0.00023490721684519972,
      "loss": 2.9995,
      "step": 131242
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.5530364513397217,
      "learning_rate": 0.00023490322373874176,
      "loss": 3.0594,
      "step": 131243
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.174384832382202,
      "learning_rate": 0.00023489923064438654,
      "loss": 3.1481,
      "step": 131244
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2080299854278564,
      "learning_rate": 0.00023489523756213486,
      "loss": 2.7895,
      "step": 131245
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3156683444976807,
      "learning_rate": 0.00023489124449198735,
      "loss": 2.9725,
      "step": 131246
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9323458671569824,
      "learning_rate": 0.00023488725143394474,
      "loss": 2.8202,
      "step": 131247
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.506603240966797,
      "learning_rate": 0.0002348832583880078,
      "loss": 3.0048,
      "step": 131248
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9094175100326538,
      "learning_rate": 0.0002348792653541773,
      "loss": 3.0666,
      "step": 131249
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9464961290359497,
      "learning_rate": 0.00023487527233245398,
      "loss": 3.0198,
      "step": 131250
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0948116779327393,
      "learning_rate": 0.00023487127932283856,
      "loss": 3.1317,
      "step": 131251
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3842055797576904,
      "learning_rate": 0.0002348672863253319,
      "loss": 3.2051,
      "step": 131252
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.225343704223633,
      "learning_rate": 0.0002348632933399346,
      "loss": 2.9053,
      "step": 131253
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2521369457244873,
      "learning_rate": 0.0002348593003666474,
      "loss": 2.9988,
      "step": 131254
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9053937196731567,
      "learning_rate": 0.00023485530740547107,
      "loss": 2.9872,
      "step": 131255
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.578176736831665,
      "learning_rate": 0.0002348513144564064,
      "loss": 2.9446,
      "step": 131256
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.568380355834961,
      "learning_rate": 0.00023484732151945407,
      "loss": 2.8525,
      "step": 131257
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.960764765739441,
      "learning_rate": 0.00023484332859461486,
      "loss": 3.1952,
      "step": 131258
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.6245813369750977,
      "learning_rate": 0.00023483933568188965,
      "loss": 3.2164,
      "step": 131259
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.986918330192566,
      "learning_rate": 0.0002348353427812789,
      "loss": 3.0868,
      "step": 131260
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0872015953063965,
      "learning_rate": 0.00023483134989278344,
      "loss": 3.2633,
      "step": 131261
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1031439304351807,
      "learning_rate": 0.0002348273570164041,
      "loss": 2.9405,
      "step": 131262
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2174038887023926,
      "learning_rate": 0.00023482336415214163,
      "loss": 3.2,
      "step": 131263
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.5977485179901123,
      "learning_rate": 0.00023481937129999668,
      "loss": 2.8006,
      "step": 131264
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2832272052764893,
      "learning_rate": 0.00023481537845997016,
      "loss": 3.0687,
      "step": 131265
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3002705574035645,
      "learning_rate": 0.00023481138563206258,
      "loss": 2.9943,
      "step": 131266
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0898685455322266,
      "learning_rate": 0.00023480739281627483,
      "loss": 2.9544,
      "step": 131267
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.941593050956726,
      "learning_rate": 0.00023480340001260755,
      "loss": 3.2158,
      "step": 131268
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.19527006149292,
      "learning_rate": 0.00023479940722106159,
      "loss": 2.9966,
      "step": 131269
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0477869510650635,
      "learning_rate": 0.00023479541444163763,
      "loss": 3.1244,
      "step": 131270
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8224753141403198,
      "learning_rate": 0.0002347914216743366,
      "loss": 3.0714,
      "step": 131271
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9762953519821167,
      "learning_rate": 0.0002347874289191589,
      "loss": 3.0068,
      "step": 131272
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2691521644592285,
      "learning_rate": 0.00023478343617610545,
      "loss": 3.0551,
      "step": 131273
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.284836530685425,
      "learning_rate": 0.00023477944344517702,
      "loss": 3.1284,
      "step": 131274
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4721839427948,
      "learning_rate": 0.0002347754507263743,
      "loss": 3.0408,
      "step": 131275
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.581207036972046,
      "learning_rate": 0.0002347714580196981,
      "loss": 3.1955,
      "step": 131276
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9318172931671143,
      "learning_rate": 0.00023476746532514916,
      "loss": 3.1331,
      "step": 131277
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.7172112464904785,
      "learning_rate": 0.0002347634726427281,
      "loss": 2.8714,
      "step": 131278
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.6663241386413574,
      "learning_rate": 0.00023475947997243576,
      "loss": 2.7549,
      "step": 131279
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.2664554119110107,
      "learning_rate": 0.00023475548731427285,
      "loss": 2.7498,
      "step": 131280
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.536949396133423,
      "learning_rate": 0.0002347514946682401,
      "loss": 3.091,
      "step": 131281
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.493906021118164,
      "learning_rate": 0.00023474750203433833,
      "loss": 3.1229,
      "step": 131282
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4902970790863037,
      "learning_rate": 0.00023474350941256832,
      "loss": 2.9504,
      "step": 131283
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.0160655975341797,
      "learning_rate": 0.0002347395168029306,
      "loss": 2.9329,
      "step": 131284
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4812910556793213,
      "learning_rate": 0.00023473552420542605,
      "loss": 2.9748,
      "step": 131285
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.235261917114258,
      "learning_rate": 0.0002347315316200554,
      "loss": 2.7343,
      "step": 131286
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.3952629566192627,
      "learning_rate": 0.00023472753904681937,
      "loss": 2.9386,
      "step": 131287
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.4390711784362793,
      "learning_rate": 0.00023472354648571873,
      "loss": 2.9936,
      "step": 131288
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.241874933242798,
      "learning_rate": 0.00023471955393675436,
      "loss": 3.1671,
      "step": 131289
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.7701480388641357,
      "learning_rate": 0.0002347155613999267,
      "loss": 2.7386,
      "step": 131290
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.5873680114746094,
      "learning_rate": 0.00023471156887523665,
      "loss": 2.9813,
      "step": 131291
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2910315990448,
      "learning_rate": 0.000234707576362685,
      "loss": 3.0008,
      "step": 131292
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.157895565032959,
      "learning_rate": 0.0002347035838622724,
      "loss": 2.967,
      "step": 131293
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.6985270977020264,
      "learning_rate": 0.00023469959137399965,
      "loss": 3.1534,
      "step": 131294
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8763161897659302,
      "learning_rate": 0.00023469559889786762,
      "loss": 2.8219,
      "step": 131295
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8396461009979248,
      "learning_rate": 0.00023469160643387673,
      "loss": 3.0794,
      "step": 131296
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.901005268096924,
      "learning_rate": 0.00023468761398202795,
      "loss": 3.1428,
      "step": 131297
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.945150136947632,
      "learning_rate": 0.00023468362154232197,
      "loss": 3.0444,
      "step": 131298
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.19802188873291,
      "learning_rate": 0.00023467962911475954,
      "loss": 3.2009,
      "step": 131299
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.7021079063415527,
      "learning_rate": 0.0002346756366993414,
      "loss": 3.0098,
      "step": 131300
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.2546823024749756,
      "learning_rate": 0.0002346716442960684,
      "loss": 2.9195,
      "step": 131301
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.7175474166870117,
      "learning_rate": 0.00023466765190494106,
      "loss": 3.2362,
      "step": 131302
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0923283100128174,
      "learning_rate": 0.00023466365952596024,
      "loss": 3.1311,
      "step": 131303
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.8368070125579834,
      "learning_rate": 0.00023465966715912665,
      "loss": 3.0745,
      "step": 131304
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.296712875366211,
      "learning_rate": 0.0002346556748044411,
      "loss": 3.2254,
      "step": 131305
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.05185604095459,
      "learning_rate": 0.0002346516824619043,
      "loss": 3.3601,
      "step": 131306
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9055718183517456,
      "learning_rate": 0.000234647690131517,
      "loss": 3.0889,
      "step": 131307
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.267258882522583,
      "learning_rate": 0.00023464369781327996,
      "loss": 2.948,
      "step": 131308
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.576810121536255,
      "learning_rate": 0.00023463970550719384,
      "loss": 3.0918,
      "step": 131309
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3710360527038574,
      "learning_rate": 0.0002346357132132594,
      "loss": 2.9715,
      "step": 131310
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8694266080856323,
      "learning_rate": 0.00023463172093147745,
      "loss": 2.8968,
      "step": 131311
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1674962043762207,
      "learning_rate": 0.00023462772866184866,
      "loss": 2.9034,
      "step": 131312
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.2184853553771973,
      "learning_rate": 0.0002346237364043739,
      "loss": 2.9275,
      "step": 131313
    },
    {
      "epoch": 1.71,
      "grad_norm": 5.131682872772217,
      "learning_rate": 0.00023461974415905375,
      "loss": 3.1464,
      "step": 131314
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.059150457382202,
      "learning_rate": 0.00023461575192588907,
      "loss": 3.0753,
      "step": 131315
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.465662956237793,
      "learning_rate": 0.00023461175970488054,
      "loss": 3.0344,
      "step": 131316
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.48397159576416,
      "learning_rate": 0.00023460776749602887,
      "loss": 3.2872,
      "step": 131317
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.776674509048462,
      "learning_rate": 0.00023460377529933488,
      "loss": 3.0649,
      "step": 131318
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1314337253570557,
      "learning_rate": 0.00023459978311479927,
      "loss": 2.8014,
      "step": 131319
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4301726818084717,
      "learning_rate": 0.00023459579094242284,
      "loss": 2.9452,
      "step": 131320
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9833749532699585,
      "learning_rate": 0.00023459179878220624,
      "loss": 2.9135,
      "step": 131321
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.8378636837005615,
      "learning_rate": 0.0002345878066341503,
      "loss": 3.0383,
      "step": 131322
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.314734697341919,
      "learning_rate": 0.00023458381449825572,
      "loss": 2.9385,
      "step": 131323
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.233818531036377,
      "learning_rate": 0.0002345798223745232,
      "loss": 3.0271,
      "step": 131324
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.8668711185455322,
      "learning_rate": 0.00023457583026295355,
      "loss": 2.96,
      "step": 131325
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.116732597351074,
      "learning_rate": 0.0002345718381635475,
      "loss": 2.8302,
      "step": 131326
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4244871139526367,
      "learning_rate": 0.00023456784607630576,
      "loss": 3.2203,
      "step": 131327
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2937026023864746,
      "learning_rate": 0.00023456385400122908,
      "loss": 2.987,
      "step": 131328
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.3081443309783936,
      "learning_rate": 0.00023455986193831823,
      "loss": 2.696,
      "step": 131329
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.326399803161621,
      "learning_rate": 0.000234555869887574,
      "loss": 3.2143,
      "step": 131330
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3813602924346924,
      "learning_rate": 0.00023455187784899697,
      "loss": 3.0304,
      "step": 131331
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.2853925228118896,
      "learning_rate": 0.00023454788582258807,
      "loss": 3.0785,
      "step": 131332
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1540725231170654,
      "learning_rate": 0.00023454389380834787,
      "loss": 3.05,
      "step": 131333
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.6978697776794434,
      "learning_rate": 0.00023453990180627725,
      "loss": 3.0738,
      "step": 131334
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.067511558532715,
      "learning_rate": 0.00023453590981637688,
      "loss": 2.9893,
      "step": 131335
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8952326774597168,
      "learning_rate": 0.0002345319178386475,
      "loss": 3.1031,
      "step": 131336
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.362255811691284,
      "learning_rate": 0.0002345279258730899,
      "loss": 3.2258,
      "step": 131337
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.254666566848755,
      "learning_rate": 0.00023452393391970485,
      "loss": 2.9713,
      "step": 131338
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.646646738052368,
      "learning_rate": 0.00023451994197849294,
      "loss": 3.0164,
      "step": 131339
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2649176120758057,
      "learning_rate": 0.00023451595004945504,
      "loss": 2.8473,
      "step": 131340
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8845409154891968,
      "learning_rate": 0.00023451195813259184,
      "loss": 3.048,
      "step": 131341
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3634824752807617,
      "learning_rate": 0.0002345079662279041,
      "loss": 2.8935,
      "step": 131342
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3153693675994873,
      "learning_rate": 0.0002345039743353926,
      "loss": 3.0937,
      "step": 131343
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.892634630203247,
      "learning_rate": 0.00023449998245505814,
      "loss": 3.0359,
      "step": 131344
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.052943468093872,
      "learning_rate": 0.00023449599058690123,
      "loss": 2.7522,
      "step": 131345
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.464524030685425,
      "learning_rate": 0.00023449199873092278,
      "loss": 2.9048,
      "step": 131346
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.091993808746338,
      "learning_rate": 0.0002344880068871235,
      "loss": 3.0145,
      "step": 131347
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.0407557487487793,
      "learning_rate": 0.00023448401505550413,
      "loss": 3.0091,
      "step": 131348
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8016066551208496,
      "learning_rate": 0.0002344800232360654,
      "loss": 2.6893,
      "step": 131349
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.60213565826416,
      "learning_rate": 0.00023447603142880827,
      "loss": 3.2504,
      "step": 131350
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.629143476486206,
      "learning_rate": 0.0002344720396337331,
      "loss": 3.1499,
      "step": 131351
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8885138034820557,
      "learning_rate": 0.00023446804785084082,
      "loss": 2.9496,
      "step": 131352
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.063659191131592,
      "learning_rate": 0.00023446405608013214,
      "loss": 2.8713,
      "step": 131353
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1386935710906982,
      "learning_rate": 0.00023446006432160786,
      "loss": 2.939,
      "step": 131354
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.795204758644104,
      "learning_rate": 0.0002344560725752687,
      "loss": 3.0016,
      "step": 131355
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.991201400756836,
      "learning_rate": 0.0002344520808411155,
      "loss": 2.9755,
      "step": 131356
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3234376907348633,
      "learning_rate": 0.00023444808911914878,
      "loss": 3.2023,
      "step": 131357
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.212294816970825,
      "learning_rate": 0.0002344440974093694,
      "loss": 3.0265,
      "step": 131358
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.465487003326416,
      "learning_rate": 0.00023444010571177807,
      "loss": 2.9239,
      "step": 131359
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.059102773666382,
      "learning_rate": 0.00023443611402637558,
      "loss": 2.921,
      "step": 131360
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9399734735488892,
      "learning_rate": 0.0002344321223531627,
      "loss": 3.1031,
      "step": 131361
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0051116943359375,
      "learning_rate": 0.00023442813069214017,
      "loss": 3.0125,
      "step": 131362
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.481462001800537,
      "learning_rate": 0.00023442413904330864,
      "loss": 3.0429,
      "step": 131363
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.598701238632202,
      "learning_rate": 0.00023442014740666884,
      "loss": 3.1231,
      "step": 131364
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.048804998397827,
      "learning_rate": 0.0002344161557822216,
      "loss": 2.9879,
      "step": 131365
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.867483615875244,
      "learning_rate": 0.00023441216416996764,
      "loss": 2.8496,
      "step": 131366
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.546201705932617,
      "learning_rate": 0.0002344081725699077,
      "loss": 3.0037,
      "step": 131367
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.608657121658325,
      "learning_rate": 0.0002344041809820426,
      "loss": 3.1785,
      "step": 131368
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.3504035472869873,
      "learning_rate": 0.00023440018940637294,
      "loss": 2.8349,
      "step": 131369
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.2398993968963623,
      "learning_rate": 0.00023439619784289945,
      "loss": 3.0498,
      "step": 131370
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.196507692337036,
      "learning_rate": 0.000234392206291623,
      "loss": 3.0061,
      "step": 131371
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9189691543579102,
      "learning_rate": 0.00023438821475254429,
      "loss": 2.9463,
      "step": 131372
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.044698476791382,
      "learning_rate": 0.000234384223225664,
      "loss": 2.6735,
      "step": 131373
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.744296073913574,
      "learning_rate": 0.00023438023171098307,
      "loss": 3.2525,
      "step": 131374
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.1472392082214355,
      "learning_rate": 0.00023437624020850197,
      "loss": 2.9755,
      "step": 131375
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9638522863388062,
      "learning_rate": 0.00023437224871822155,
      "loss": 3.1891,
      "step": 131376
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.9882853031158447,
      "learning_rate": 0.0002343682572401426,
      "loss": 2.9415,
      "step": 131377
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.18583345413208,
      "learning_rate": 0.0002343642657742658,
      "loss": 3.1113,
      "step": 131378
    },
    {
      "epoch": 1.71,
      "grad_norm": 4.030152320861816,
      "learning_rate": 0.00023436027432059193,
      "loss": 2.8661,
      "step": 131379
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4347078800201416,
      "learning_rate": 0.00023435628287912184,
      "loss": 2.8872,
      "step": 131380
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2755205631256104,
      "learning_rate": 0.00023435229144985607,
      "loss": 3.1421,
      "step": 131381
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.565518379211426,
      "learning_rate": 0.0002343483000327954,
      "loss": 3.1847,
      "step": 131382
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.0615062713623047,
      "learning_rate": 0.00023434430862794066,
      "loss": 3.0545,
      "step": 131383
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.3790509700775146,
      "learning_rate": 0.00023434031723529252,
      "loss": 2.9704,
      "step": 131384
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0059449672698975,
      "learning_rate": 0.00023433632585485176,
      "loss": 3.2711,
      "step": 131385
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2106120586395264,
      "learning_rate": 0.00023433233448661914,
      "loss": 2.873,
      "step": 131386
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1416590213775635,
      "learning_rate": 0.00023432834313059548,
      "loss": 3.1511,
      "step": 131387
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9842491149902344,
      "learning_rate": 0.00023432435178678132,
      "loss": 3.1005,
      "step": 131388
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1798691749572754,
      "learning_rate": 0.00023432036045517747,
      "loss": 3.0755,
      "step": 131389
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.085695266723633,
      "learning_rate": 0.00023431636913578472,
      "loss": 2.9639,
      "step": 131390
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7323570251464844,
      "learning_rate": 0.0002343123778286038,
      "loss": 2.884,
      "step": 131391
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1973507404327393,
      "learning_rate": 0.00023430838653363544,
      "loss": 3.2232,
      "step": 131392
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.7046096324920654,
      "learning_rate": 0.00023430439525088054,
      "loss": 2.9867,
      "step": 131393
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9944082498550415,
      "learning_rate": 0.00023430040398033953,
      "loss": 3.0509,
      "step": 131394
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.338945150375366,
      "learning_rate": 0.00023429641272201335,
      "loss": 3.2347,
      "step": 131395
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.6051080226898193,
      "learning_rate": 0.0002342924214759027,
      "loss": 2.8493,
      "step": 131396
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9742262363433838,
      "learning_rate": 0.00023428843024200834,
      "loss": 2.8167,
      "step": 131397
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.715833902359009,
      "learning_rate": 0.00023428443902033096,
      "loss": 3.0667,
      "step": 131398
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3904757499694824,
      "learning_rate": 0.00023428044781087143,
      "loss": 3.0638,
      "step": 131399
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9265292882919312,
      "learning_rate": 0.0002342764566136304,
      "loss": 2.9358,
      "step": 131400
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.13626766204834,
      "learning_rate": 0.00023427246542860855,
      "loss": 3.2038,
      "step": 131401
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.6986429691314697,
      "learning_rate": 0.0002342684742558067,
      "loss": 3.025,
      "step": 131402
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4187734127044678,
      "learning_rate": 0.0002342644830952256,
      "loss": 2.7461,
      "step": 131403
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9885547161102295,
      "learning_rate": 0.00023426049194686594,
      "loss": 2.8853,
      "step": 131404
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8783745765686035,
      "learning_rate": 0.0002342565008107286,
      "loss": 2.9048,
      "step": 131405
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1906309127807617,
      "learning_rate": 0.00023425250968681414,
      "loss": 2.9167,
      "step": 131406
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.825706958770752,
      "learning_rate": 0.00023424851857512338,
      "loss": 2.982,
      "step": 131407
    },
    {
      "epoch": 1.71,
      "grad_norm": 4.110381603240967,
      "learning_rate": 0.00023424452747565708,
      "loss": 2.7466,
      "step": 131408
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8571640253067017,
      "learning_rate": 0.00023424053638841592,
      "loss": 2.8362,
      "step": 131409
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.011160135269165,
      "learning_rate": 0.00023423654531340072,
      "loss": 2.8432,
      "step": 131410
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.1625192165374756,
      "learning_rate": 0.00023423255425061222,
      "loss": 3.1476,
      "step": 131411
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.83618426322937,
      "learning_rate": 0.00023422856320005108,
      "loss": 2.7788,
      "step": 131412
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1386163234710693,
      "learning_rate": 0.00023422457216171807,
      "loss": 3.0559,
      "step": 131413
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.6609086990356445,
      "learning_rate": 0.00023422058113561404,
      "loss": 3.0581,
      "step": 131414
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.272031545639038,
      "learning_rate": 0.00023421659012173957,
      "loss": 2.863,
      "step": 131415
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.062775135040283,
      "learning_rate": 0.00023421259912009547,
      "loss": 2.8912,
      "step": 131416
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.039611577987671,
      "learning_rate": 0.00023420860813068258,
      "loss": 3.0016,
      "step": 131417
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4896299839019775,
      "learning_rate": 0.00023420461715350146,
      "loss": 3.076,
      "step": 131418
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0742812156677246,
      "learning_rate": 0.00023420062618855297,
      "loss": 3.0614,
      "step": 131419
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.211257219314575,
      "learning_rate": 0.0002341966352358378,
      "loss": 3.3251,
      "step": 131420
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.815277338027954,
      "learning_rate": 0.00023419264429535682,
      "loss": 2.7813,
      "step": 131421
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8879414796829224,
      "learning_rate": 0.00023418865336711058,
      "loss": 3.0208,
      "step": 131422
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.249326705932617,
      "learning_rate": 0.00023418466245109996,
      "loss": 2.9336,
      "step": 131423
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.985615611076355,
      "learning_rate": 0.00023418067154732563,
      "loss": 2.9809,
      "step": 131424
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.126260995864868,
      "learning_rate": 0.0002341766806557883,
      "loss": 2.9876,
      "step": 131425
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.290802240371704,
      "learning_rate": 0.0002341726897764888,
      "loss": 3.0394,
      "step": 131426
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.699852228164673,
      "learning_rate": 0.00023416869890942785,
      "loss": 2.8755,
      "step": 131427
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4331469535827637,
      "learning_rate": 0.0002341647080546062,
      "loss": 3.0889,
      "step": 131428
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9131420850753784,
      "learning_rate": 0.00023416071721202463,
      "loss": 2.8518,
      "step": 131429
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.2148964405059814,
      "learning_rate": 0.00023415672638168374,
      "loss": 2.9571,
      "step": 131430
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9465985298156738,
      "learning_rate": 0.00023415273556358436,
      "loss": 3.0339,
      "step": 131431
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1762590408325195,
      "learning_rate": 0.00023414874475772725,
      "loss": 3.0193,
      "step": 131432
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8938987255096436,
      "learning_rate": 0.00023414475396411308,
      "loss": 2.9361,
      "step": 131433
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.039268732070923,
      "learning_rate": 0.00023414076318274267,
      "loss": 2.8028,
      "step": 131434
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8622323274612427,
      "learning_rate": 0.00023413677241361687,
      "loss": 2.9577,
      "step": 131435
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0319979190826416,
      "learning_rate": 0.00023413278165673615,
      "loss": 3.0019,
      "step": 131436
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9913382530212402,
      "learning_rate": 0.00023412879091210141,
      "loss": 3.2494,
      "step": 131437
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8100976943969727,
      "learning_rate": 0.00023412480017971337,
      "loss": 2.886,
      "step": 131438
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4308619499206543,
      "learning_rate": 0.00023412080945957274,
      "loss": 2.8167,
      "step": 131439
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.92742121219635,
      "learning_rate": 0.0002341168187516803,
      "loss": 3.0573,
      "step": 131440
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0960090160369873,
      "learning_rate": 0.00023411282805603693,
      "loss": 2.6486,
      "step": 131441
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3174166679382324,
      "learning_rate": 0.0002341088373726431,
      "loss": 2.9061,
      "step": 131442
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7239511013031006,
      "learning_rate": 0.0002341048467014997,
      "loss": 2.9931,
      "step": 131443
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.9699277877807617,
      "learning_rate": 0.0002341008560426074,
      "loss": 2.9001,
      "step": 131444
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0754144191741943,
      "learning_rate": 0.00023409686539596703,
      "loss": 2.9216,
      "step": 131445
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8940891027450562,
      "learning_rate": 0.0002340928747615793,
      "loss": 2.8819,
      "step": 131446
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2821171283721924,
      "learning_rate": 0.0002340888841394451,
      "loss": 2.8382,
      "step": 131447
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0373311042785645,
      "learning_rate": 0.00023408489352956484,
      "loss": 3.1834,
      "step": 131448
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4118552207946777,
      "learning_rate": 0.00023408090293193946,
      "loss": 2.887,
      "step": 131449
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.4761834144592285,
      "learning_rate": 0.0002340769123465697,
      "loss": 2.9526,
      "step": 131450
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0791122913360596,
      "learning_rate": 0.00023407292177345625,
      "loss": 2.9986,
      "step": 131451
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.578838348388672,
      "learning_rate": 0.00023406893121259993,
      "loss": 2.7869,
      "step": 131452
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3087399005889893,
      "learning_rate": 0.00023406494066400145,
      "loss": 3.2182,
      "step": 131453
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.50045108795166,
      "learning_rate": 0.00023406095012766163,
      "loss": 2.9399,
      "step": 131454
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.81255042552948,
      "learning_rate": 0.00023405695960358101,
      "loss": 3.2631,
      "step": 131455
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.870535135269165,
      "learning_rate": 0.00023405296909176043,
      "loss": 2.9908,
      "step": 131456
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.594099283218384,
      "learning_rate": 0.00023404897859220067,
      "loss": 2.6827,
      "step": 131457
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7567983865737915,
      "learning_rate": 0.00023404498810490244,
      "loss": 3.3186,
      "step": 131458
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0685274600982666,
      "learning_rate": 0.0002340409976298665,
      "loss": 3.0389,
      "step": 131459
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2178683280944824,
      "learning_rate": 0.00023403700716709372,
      "loss": 3.0302,
      "step": 131460
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3756959438323975,
      "learning_rate": 0.00023403301671658455,
      "loss": 3.2851,
      "step": 131461
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9419989585876465,
      "learning_rate": 0.0002340290262783399,
      "loss": 3.0311,
      "step": 131462
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9970524311065674,
      "learning_rate": 0.0002340250358523605,
      "loss": 2.8068,
      "step": 131463
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.207643985748291,
      "learning_rate": 0.0002340210454386471,
      "loss": 3.0863,
      "step": 131464
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.367647171020508,
      "learning_rate": 0.00023401705503720043,
      "loss": 2.9802,
      "step": 131465
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8522790670394897,
      "learning_rate": 0.00023401306464802135,
      "loss": 3.2692,
      "step": 131466
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0243794918060303,
      "learning_rate": 0.00023400907427111037,
      "loss": 2.5688,
      "step": 131467
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.172419309616089,
      "learning_rate": 0.00023400508390646836,
      "loss": 2.8386,
      "step": 131468
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.609957456588745,
      "learning_rate": 0.00023400109355409604,
      "loss": 2.9737,
      "step": 131469
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.218468189239502,
      "learning_rate": 0.00023399710321399416,
      "loss": 3.1387,
      "step": 131470
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1326663494110107,
      "learning_rate": 0.00023399311288616346,
      "loss": 3.0151,
      "step": 131471
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.535440683364868,
      "learning_rate": 0.0002339891225706048,
      "loss": 2.8537,
      "step": 131472
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0625526905059814,
      "learning_rate": 0.0002339851322673187,
      "loss": 3.0761,
      "step": 131473
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9665169715881348,
      "learning_rate": 0.00023398114197630603,
      "loss": 3.0152,
      "step": 131474
    },
    {
      "epoch": 1.71,
      "grad_norm": 4.523484706878662,
      "learning_rate": 0.00023397715169756747,
      "loss": 2.7886,
      "step": 131475
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.6201016902923584,
      "learning_rate": 0.00023397316143110384,
      "loss": 3.022,
      "step": 131476
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.141364812850952,
      "learning_rate": 0.00023396917117691582,
      "loss": 3.2598,
      "step": 131477
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.7619731426239014,
      "learning_rate": 0.00023396518093500432,
      "loss": 2.9534,
      "step": 131478
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9886341094970703,
      "learning_rate": 0.00023396119070536982,
      "loss": 3.1744,
      "step": 131479
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0944809913635254,
      "learning_rate": 0.00023395720048801315,
      "loss": 2.877,
      "step": 131480
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.218827962875366,
      "learning_rate": 0.00023395321028293512,
      "loss": 2.9286,
      "step": 131481
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.9703824520111084,
      "learning_rate": 0.00023394922009013642,
      "loss": 2.9735,
      "step": 131482
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0390398502349854,
      "learning_rate": 0.00023394522990961782,
      "loss": 2.9481,
      "step": 131483
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4905149936676025,
      "learning_rate": 0.00023394123974138014,
      "loss": 2.9183,
      "step": 131484
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1422603130340576,
      "learning_rate": 0.0002339372495854239,
      "loss": 2.8019,
      "step": 131485
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3897039890289307,
      "learning_rate": 0.00023393325944175004,
      "loss": 3.3076,
      "step": 131486
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.552112340927124,
      "learning_rate": 0.00023392926931035916,
      "loss": 2.551,
      "step": 131487
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.5552866458892822,
      "learning_rate": 0.0002339252791912521,
      "loss": 3.0552,
      "step": 131488
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2295312881469727,
      "learning_rate": 0.0002339212890844296,
      "loss": 3.032,
      "step": 131489
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.244013547897339,
      "learning_rate": 0.00023391729898989243,
      "loss": 2.9712,
      "step": 131490
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.8185667991638184,
      "learning_rate": 0.00023391330890764123,
      "loss": 2.8793,
      "step": 131491
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.119412422180176,
      "learning_rate": 0.00023390931883767683,
      "loss": 2.8601,
      "step": 131492
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0166008472442627,
      "learning_rate": 0.00023390532877999985,
      "loss": 3.0412,
      "step": 131493
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.85226571559906,
      "learning_rate": 0.00023390133873461117,
      "loss": 3.0486,
      "step": 131494
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.851418137550354,
      "learning_rate": 0.00023389734870151143,
      "loss": 2.7076,
      "step": 131495
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.5937108993530273,
      "learning_rate": 0.00023389335868070153,
      "loss": 3.069,
      "step": 131496
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.142993927001953,
      "learning_rate": 0.00023388936867218204,
      "loss": 3.3588,
      "step": 131497
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.219198226928711,
      "learning_rate": 0.00023388537867595374,
      "loss": 2.935,
      "step": 131498
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.6906559467315674,
      "learning_rate": 0.00023388138869201745,
      "loss": 2.8686,
      "step": 131499
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.239518880844116,
      "learning_rate": 0.00023387739872037384,
      "loss": 3.0775,
      "step": 131500
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1545677185058594,
      "learning_rate": 0.00023387340876102362,
      "loss": 3.0373,
      "step": 131501
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0700786113739014,
      "learning_rate": 0.00023386941881396766,
      "loss": 2.9529,
      "step": 131502
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.5465261936187744,
      "learning_rate": 0.00023386542887920657,
      "loss": 2.9851,
      "step": 131503
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0866098403930664,
      "learning_rate": 0.00023386143895674113,
      "loss": 2.7722,
      "step": 131504
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.5098063945770264,
      "learning_rate": 0.0002338574490465721,
      "loss": 3.0118,
      "step": 131505
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.057332992553711,
      "learning_rate": 0.00023385345914870033,
      "loss": 2.8967,
      "step": 131506
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.5526578426361084,
      "learning_rate": 0.00023384946926312633,
      "loss": 2.8068,
      "step": 131507
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2272560596466064,
      "learning_rate": 0.00023384547938985105,
      "loss": 2.8021,
      "step": 131508
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2929279804229736,
      "learning_rate": 0.00023384148952887505,
      "loss": 3.0249,
      "step": 131509
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.9560298919677734,
      "learning_rate": 0.0002338374996801992,
      "loss": 3.088,
      "step": 131510
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8832608461380005,
      "learning_rate": 0.0002338335098438242,
      "loss": 2.9262,
      "step": 131511
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.153517484664917,
      "learning_rate": 0.0002338295200197508,
      "loss": 3.064,
      "step": 131512
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8783103227615356,
      "learning_rate": 0.0002338255302079798,
      "loss": 2.9409,
      "step": 131513
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2684946060180664,
      "learning_rate": 0.00023382154040851188,
      "loss": 3.1475,
      "step": 131514
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3835878372192383,
      "learning_rate": 0.0002338175506213477,
      "loss": 3.0862,
      "step": 131515
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1160898208618164,
      "learning_rate": 0.00023381356084648815,
      "loss": 3.0864,
      "step": 131516
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9022012948989868,
      "learning_rate": 0.00023380957108393386,
      "loss": 3.3275,
      "step": 131517
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.969340205192566,
      "learning_rate": 0.00023380558133368563,
      "loss": 2.8517,
      "step": 131518
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.5444962978363037,
      "learning_rate": 0.0002338015915957442,
      "loss": 2.9785,
      "step": 131519
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8699491024017334,
      "learning_rate": 0.00023379760187011028,
      "loss": 2.9221,
      "step": 131520
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.352069854736328,
      "learning_rate": 0.00023379361215678475,
      "loss": 2.8551,
      "step": 131521
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.254810094833374,
      "learning_rate": 0.00023378962245576817,
      "loss": 2.8012,
      "step": 131522
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.182528257369995,
      "learning_rate": 0.00023378563276706132,
      "loss": 3.237,
      "step": 131523
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0164670944213867,
      "learning_rate": 0.00023378164309066497,
      "loss": 2.9322,
      "step": 131524
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.568446159362793,
      "learning_rate": 0.00023377765342657983,
      "loss": 2.9176,
      "step": 131525
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7439539432525635,
      "learning_rate": 0.0002337736637748067,
      "loss": 2.9589,
      "step": 131526
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.276986837387085,
      "learning_rate": 0.00023376967413534644,
      "loss": 3.1197,
      "step": 131527
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.854884624481201,
      "learning_rate": 0.00023376568450819953,
      "loss": 3.0871,
      "step": 131528
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.912562370300293,
      "learning_rate": 0.0002337616948933668,
      "loss": 3.0286,
      "step": 131529
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.147320032119751,
      "learning_rate": 0.00023375770529084904,
      "loss": 3.1356,
      "step": 131530
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.8137807846069336,
      "learning_rate": 0.00023375371570064695,
      "loss": 3.0957,
      "step": 131531
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9742692708969116,
      "learning_rate": 0.00023374972612276134,
      "loss": 2.8836,
      "step": 131532
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1735875606536865,
      "learning_rate": 0.000233745736557193,
      "loss": 2.8646,
      "step": 131533
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.940517783164978,
      "learning_rate": 0.00023374174700394244,
      "loss": 2.9508,
      "step": 131534
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.131748914718628,
      "learning_rate": 0.00023373775746301056,
      "loss": 3.1073,
      "step": 131535
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.029118061065674,
      "learning_rate": 0.00023373376793439803,
      "loss": 2.8173,
      "step": 131536
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.861721396446228,
      "learning_rate": 0.00023372977841810573,
      "loss": 3.0497,
      "step": 131537
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0189549922943115,
      "learning_rate": 0.00023372578891413427,
      "loss": 2.8812,
      "step": 131538
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1364328861236572,
      "learning_rate": 0.00023372179942248456,
      "loss": 3.2324,
      "step": 131539
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.95766282081604,
      "learning_rate": 0.00023371780994315707,
      "loss": 2.9247,
      "step": 131540
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.955018162727356,
      "learning_rate": 0.00023371382047615273,
      "loss": 3.154,
      "step": 131541
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.126967430114746,
      "learning_rate": 0.00023370983102147225,
      "loss": 2.9035,
      "step": 131542
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9942210912704468,
      "learning_rate": 0.0002337058415791163,
      "loss": 2.8167,
      "step": 131543
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2252352237701416,
      "learning_rate": 0.00023370185214908576,
      "loss": 2.8069,
      "step": 131544
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.548171043395996,
      "learning_rate": 0.0002336978627313814,
      "loss": 2.9528,
      "step": 131545
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.61594295501709,
      "learning_rate": 0.00023369387332600373,
      "loss": 3.0442,
      "step": 131546
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9537334442138672,
      "learning_rate": 0.0002336898839329536,
      "loss": 2.8556,
      "step": 131547
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.9921488761901855,
      "learning_rate": 0.00023368589455223182,
      "loss": 3.0449,
      "step": 131548
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.863936424255371,
      "learning_rate": 0.000233681905183839,
      "loss": 3.0021,
      "step": 131549
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2409720420837402,
      "learning_rate": 0.00023367791582777606,
      "loss": 3.1658,
      "step": 131550
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.2286860942840576,
      "learning_rate": 0.00023367392648404372,
      "loss": 2.8346,
      "step": 131551
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2326974868774414,
      "learning_rate": 0.00023366993715264253,
      "loss": 2.9183,
      "step": 131552
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.242150068283081,
      "learning_rate": 0.00023366594783357335,
      "loss": 2.9927,
      "step": 131553
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9207874536514282,
      "learning_rate": 0.00023366195852683692,
      "loss": 3.0416,
      "step": 131554
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.570819854736328,
      "learning_rate": 0.000233657969232434,
      "loss": 3.0635,
      "step": 131555
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1002418994903564,
      "learning_rate": 0.0002336539799503653,
      "loss": 3.0403,
      "step": 131556
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8835599422454834,
      "learning_rate": 0.0002336499906806317,
      "loss": 3.1387,
      "step": 131557
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.3323566913604736,
      "learning_rate": 0.00023364600142323374,
      "loss": 2.903,
      "step": 131558
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7951043844223022,
      "learning_rate": 0.00023364201217817218,
      "loss": 3.0269,
      "step": 131559
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.950839638710022,
      "learning_rate": 0.00023363802294544786,
      "loss": 3.0417,
      "step": 131560
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.8805694580078125,
      "learning_rate": 0.00023363403372506146,
      "loss": 3.1941,
      "step": 131561
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.4932940006256104,
      "learning_rate": 0.00023363004451701377,
      "loss": 3.0071,
      "step": 131562
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.0361106395721436,
      "learning_rate": 0.0002336260553213056,
      "loss": 3.037,
      "step": 131563
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0998191833496094,
      "learning_rate": 0.0002336220661379375,
      "loss": 2.9983,
      "step": 131564
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2071714401245117,
      "learning_rate": 0.00023361807696691033,
      "loss": 2.9397,
      "step": 131565
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.682255744934082,
      "learning_rate": 0.0002336140878082248,
      "loss": 3.2276,
      "step": 131566
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.7133734226226807,
      "learning_rate": 0.00023361009866188162,
      "loss": 3.0034,
      "step": 131567
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9095522165298462,
      "learning_rate": 0.00023360610952788164,
      "loss": 3.1141,
      "step": 131568
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8371306657791138,
      "learning_rate": 0.00023360212040622564,
      "loss": 2.9625,
      "step": 131569
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.9251906871795654,
      "learning_rate": 0.0002335981312969141,
      "loss": 2.964,
      "step": 131570
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.367563009262085,
      "learning_rate": 0.00023359414219994794,
      "loss": 3.059,
      "step": 131571
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.5213685035705566,
      "learning_rate": 0.0002335901531153279,
      "loss": 3.0277,
      "step": 131572
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7503831386566162,
      "learning_rate": 0.00023358616404305468,
      "loss": 3.2266,
      "step": 131573
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.545469284057617,
      "learning_rate": 0.00023358217498312906,
      "loss": 3.2082,
      "step": 131574
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.509073257446289,
      "learning_rate": 0.00023357818593555183,
      "loss": 3.2455,
      "step": 131575
    },
    {
      "epoch": 1.71,
      "grad_norm": 4.000346660614014,
      "learning_rate": 0.00023357419690032367,
      "loss": 3.033,
      "step": 131576
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.072510004043579,
      "learning_rate": 0.00023357020787744527,
      "loss": 3.0827,
      "step": 131577
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0375585556030273,
      "learning_rate": 0.0002335662188669174,
      "loss": 3.0341,
      "step": 131578
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4630677700042725,
      "learning_rate": 0.00023356222986874083,
      "loss": 3.1135,
      "step": 131579
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.882973551750183,
      "learning_rate": 0.0002335582408829163,
      "loss": 3.0925,
      "step": 131580
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0661206245422363,
      "learning_rate": 0.0002335542519094446,
      "loss": 3.1591,
      "step": 131581
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8235195875167847,
      "learning_rate": 0.00023355026294832635,
      "loss": 3.0307,
      "step": 131582
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1111490726470947,
      "learning_rate": 0.00023354627399956238,
      "loss": 2.9268,
      "step": 131583
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.982469081878662,
      "learning_rate": 0.00023354228506315347,
      "loss": 2.9572,
      "step": 131584
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9469622373580933,
      "learning_rate": 0.00023353829613910023,
      "loss": 3.1722,
      "step": 131585
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0274624824523926,
      "learning_rate": 0.0002335343072274035,
      "loss": 3.1038,
      "step": 131586
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.0532150268554688,
      "learning_rate": 0.000233530318328064,
      "loss": 3.0013,
      "step": 131587
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.00561785697937,
      "learning_rate": 0.0002335263294410825,
      "loss": 3.2835,
      "step": 131588
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7799650430679321,
      "learning_rate": 0.00023352234056645964,
      "loss": 3.4631,
      "step": 131589
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1282806396484375,
      "learning_rate": 0.00023351835170419625,
      "loss": 2.8321,
      "step": 131590
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4367377758026123,
      "learning_rate": 0.00023351436285429315,
      "loss": 3.0009,
      "step": 131591
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7863364219665527,
      "learning_rate": 0.00023351037401675087,
      "loss": 2.9411,
      "step": 131592
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1842870712280273,
      "learning_rate": 0.0002335063851915703,
      "loss": 2.944,
      "step": 131593
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2838659286499023,
      "learning_rate": 0.00023350239637875218,
      "loss": 3.0674,
      "step": 131594
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1394102573394775,
      "learning_rate": 0.00023349840757829718,
      "loss": 2.8647,
      "step": 131595
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4434592723846436,
      "learning_rate": 0.00023349441879020607,
      "loss": 2.7929,
      "step": 131596
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.873526096343994,
      "learning_rate": 0.0002334904300144796,
      "loss": 3.0515,
      "step": 131597
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9467943906784058,
      "learning_rate": 0.0002334864412511186,
      "loss": 2.9816,
      "step": 131598
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7374740839004517,
      "learning_rate": 0.00023348245250012366,
      "loss": 2.8511,
      "step": 131599
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1782326698303223,
      "learning_rate": 0.00023347846376149564,
      "loss": 3.0503,
      "step": 131600
    },
    {
      "epoch": 1.71,
      "grad_norm": 4.317387104034424,
      "learning_rate": 0.00023347447503523514,
      "loss": 2.8578,
      "step": 131601
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.9952147006988525,
      "learning_rate": 0.00023347048632134303,
      "loss": 2.834,
      "step": 131602
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.656752824783325,
      "learning_rate": 0.00023346649761982002,
      "loss": 3.245,
      "step": 131603
    },
    {
      "epoch": 1.71,
      "grad_norm": 4.259069919586182,
      "learning_rate": 0.0002334625089306668,
      "loss": 2.9456,
      "step": 131604
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.51788592338562,
      "learning_rate": 0.00023345852025388424,
      "loss": 2.8453,
      "step": 131605
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.191098690032959,
      "learning_rate": 0.000233454531589473,
      "loss": 3.0141,
      "step": 131606
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.179304838180542,
      "learning_rate": 0.00023345054293743376,
      "loss": 2.9462,
      "step": 131607
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.509824514389038,
      "learning_rate": 0.0002334465542977673,
      "loss": 3.219,
      "step": 131608
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.1471707820892334,
      "learning_rate": 0.00023344256567047443,
      "loss": 3.1131,
      "step": 131609
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8202744722366333,
      "learning_rate": 0.0002334385770555558,
      "loss": 3.0963,
      "step": 131610
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.2429628372192383,
      "learning_rate": 0.0002334345884530122,
      "loss": 3.039,
      "step": 131611
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.5940210819244385,
      "learning_rate": 0.0002334305998628445,
      "loss": 3.3354,
      "step": 131612
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8477903604507446,
      "learning_rate": 0.0002334266112850532,
      "loss": 2.8819,
      "step": 131613
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0086798667907715,
      "learning_rate": 0.00023342262271963912,
      "loss": 2.9644,
      "step": 131614
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.3811073303222656,
      "learning_rate": 0.00023341863416660304,
      "loss": 3.1441,
      "step": 131615
    },
    {
      "epoch": 1.71,
      "grad_norm": 4.049637317657471,
      "learning_rate": 0.00023341464562594568,
      "loss": 2.8056,
      "step": 131616
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9785680770874023,
      "learning_rate": 0.00023341065709766784,
      "loss": 3.1236,
      "step": 131617
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.231419563293457,
      "learning_rate": 0.00023340666858177032,
      "loss": 3.0593,
      "step": 131618
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.7898991107940674,
      "learning_rate": 0.00023340268007825357,
      "loss": 2.9552,
      "step": 131619
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4648044109344482,
      "learning_rate": 0.0002333986915871186,
      "loss": 2.8981,
      "step": 131620
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7301088571548462,
      "learning_rate": 0.00023339470310836604,
      "loss": 2.8112,
      "step": 131621
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.107079029083252,
      "learning_rate": 0.00023339071464199666,
      "loss": 2.9902,
      "step": 131622
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.505946636199951,
      "learning_rate": 0.00023338672618801123,
      "loss": 2.9635,
      "step": 131623
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0941832065582275,
      "learning_rate": 0.00023338273774641053,
      "loss": 2.9021,
      "step": 131624
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.182710886001587,
      "learning_rate": 0.00023337874931719517,
      "loss": 3.0733,
      "step": 131625
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3106021881103516,
      "learning_rate": 0.00023337476090036594,
      "loss": 2.973,
      "step": 131626
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7628642320632935,
      "learning_rate": 0.0002333707724959236,
      "loss": 2.9294,
      "step": 131627
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9935194253921509,
      "learning_rate": 0.00023336678410386887,
      "loss": 2.9379,
      "step": 131628
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.606388568878174,
      "learning_rate": 0.00023336279572420255,
      "loss": 2.949,
      "step": 131629
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4445278644561768,
      "learning_rate": 0.00023335880735692543,
      "loss": 2.8103,
      "step": 131630
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3629252910614014,
      "learning_rate": 0.00023335481900203808,
      "loss": 2.9276,
      "step": 131631
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1632492542266846,
      "learning_rate": 0.00023335083065954127,
      "loss": 3.1556,
      "step": 131632
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.745985984802246,
      "learning_rate": 0.00023334684232943583,
      "loss": 3.4616,
      "step": 131633
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.6737799644470215,
      "learning_rate": 0.00023334285401172247,
      "loss": 3.1375,
      "step": 131634
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9651437997817993,
      "learning_rate": 0.00023333886570640194,
      "loss": 3.0077,
      "step": 131635
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.8623440265655518,
      "learning_rate": 0.0002333348774134751,
      "loss": 3.1568,
      "step": 131636
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0352463722229004,
      "learning_rate": 0.00023333088913294247,
      "loss": 3.1189,
      "step": 131637
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.111056327819824,
      "learning_rate": 0.00023332690086480483,
      "loss": 3.1364,
      "step": 131638
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8942618370056152,
      "learning_rate": 0.000233322912609063,
      "loss": 3.0742,
      "step": 131639
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9163365364074707,
      "learning_rate": 0.0002333189243657177,
      "loss": 2.8617,
      "step": 131640
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.047638177871704,
      "learning_rate": 0.00023331493613476965,
      "loss": 2.854,
      "step": 131641
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.057708978652954,
      "learning_rate": 0.00023331094791621978,
      "loss": 3.0917,
      "step": 131642
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0380380153656006,
      "learning_rate": 0.00023330695971006851,
      "loss": 3.0861,
      "step": 131643
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9147111177444458,
      "learning_rate": 0.00023330297151631673,
      "loss": 3.2998,
      "step": 131644
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1622626781463623,
      "learning_rate": 0.00023329898333496521,
      "loss": 3.1773,
      "step": 131645
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0921621322631836,
      "learning_rate": 0.00023329499516601466,
      "loss": 2.9255,
      "step": 131646
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.0255954265594482,
      "learning_rate": 0.00023329100700946582,
      "loss": 2.8764,
      "step": 131647
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.131448984146118,
      "learning_rate": 0.00023328701886531956,
      "loss": 3.1449,
      "step": 131648
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0050857067108154,
      "learning_rate": 0.00023328303073357636,
      "loss": 2.9621,
      "step": 131649
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.140327215194702,
      "learning_rate": 0.00023327904261423717,
      "loss": 3.1773,
      "step": 131650
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.07566237449646,
      "learning_rate": 0.0002332750545073026,
      "loss": 3.0227,
      "step": 131651
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4056780338287354,
      "learning_rate": 0.00023327106641277348,
      "loss": 2.9305,
      "step": 131652
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9309091567993164,
      "learning_rate": 0.00023326707833065056,
      "loss": 3.1276,
      "step": 131653
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1558339595794678,
      "learning_rate": 0.00023326309026093452,
      "loss": 3.0534,
      "step": 131654
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3242413997650146,
      "learning_rate": 0.00023325910220362627,
      "loss": 2.9672,
      "step": 131655
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.146671772003174,
      "learning_rate": 0.00023325511415872623,
      "loss": 2.8525,
      "step": 131656
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.117926597595215,
      "learning_rate": 0.0002332511261262354,
      "loss": 2.7047,
      "step": 131657
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7960782051086426,
      "learning_rate": 0.00023324713810615443,
      "loss": 3.0715,
      "step": 131658
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.548409938812256,
      "learning_rate": 0.00023324315009848405,
      "loss": 2.9538,
      "step": 131659
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0209600925445557,
      "learning_rate": 0.00023323916210322504,
      "loss": 2.8608,
      "step": 131660
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9614325761795044,
      "learning_rate": 0.00023323517412037826,
      "loss": 2.9316,
      "step": 131661
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.893057346343994,
      "learning_rate": 0.00023323118614994418,
      "loss": 3.0481,
      "step": 131662
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2749109268188477,
      "learning_rate": 0.0002332271981919237,
      "loss": 2.9165,
      "step": 131663
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.048755168914795,
      "learning_rate": 0.00023322321024631755,
      "loss": 2.501,
      "step": 131664
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.754300117492676,
      "learning_rate": 0.00023321922231312644,
      "loss": 2.9227,
      "step": 131665
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.8307549953460693,
      "learning_rate": 0.00023321523439235117,
      "loss": 3.1602,
      "step": 131666
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.9663712978363037,
      "learning_rate": 0.0002332112464839925,
      "loss": 3.0329,
      "step": 131667
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9924511909484863,
      "learning_rate": 0.0002332072585880511,
      "loss": 2.9913,
      "step": 131668
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.4641032218933105,
      "learning_rate": 0.0002332032707045277,
      "loss": 2.7088,
      "step": 131669
    },
    {
      "epoch": 1.71,
      "grad_norm": 5.781528472900391,
      "learning_rate": 0.000233199282833423,
      "loss": 2.8899,
      "step": 131670
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.410801649093628,
      "learning_rate": 0.00023319529497473792,
      "loss": 3.1274,
      "step": 131671
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3646018505096436,
      "learning_rate": 0.00023319130712847301,
      "loss": 2.904,
      "step": 131672
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8645141124725342,
      "learning_rate": 0.0002331873192946292,
      "loss": 2.94,
      "step": 131673
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.720632314682007,
      "learning_rate": 0.00023318333147320707,
      "loss": 2.7434,
      "step": 131674
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9836581945419312,
      "learning_rate": 0.0002331793436642075,
      "loss": 2.9703,
      "step": 131675
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8300694227218628,
      "learning_rate": 0.00023317535586763103,
      "loss": 2.8803,
      "step": 131676
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1309807300567627,
      "learning_rate": 0.00023317136808347858,
      "loss": 3.1073,
      "step": 131677
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.522745132446289,
      "learning_rate": 0.00023316738031175079,
      "loss": 3.0919,
      "step": 131678
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.38112735748291,
      "learning_rate": 0.00023316339255244852,
      "loss": 2.9668,
      "step": 131679
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9963445663452148,
      "learning_rate": 0.0002331594048055724,
      "loss": 3.1473,
      "step": 131680
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.105936288833618,
      "learning_rate": 0.0002331554170711232,
      "loss": 3.127,
      "step": 131681
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8077747821807861,
      "learning_rate": 0.00023315142934910162,
      "loss": 3.0854,
      "step": 131682
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8203691244125366,
      "learning_rate": 0.00023314744163950856,
      "loss": 3.0996,
      "step": 131683
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.162719964981079,
      "learning_rate": 0.00023314345394234462,
      "loss": 3.0063,
      "step": 131684
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.268340826034546,
      "learning_rate": 0.00023313946625761058,
      "loss": 2.9163,
      "step": 131685
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8519026041030884,
      "learning_rate": 0.00023313547858530714,
      "loss": 2.9233,
      "step": 131686
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.9002814292907715,
      "learning_rate": 0.00023313149092543507,
      "loss": 2.8328,
      "step": 131687
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.9420663118362427,
      "learning_rate": 0.00023312750327799512,
      "loss": 2.9518,
      "step": 131688
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.660762906074524,
      "learning_rate": 0.000233123515642988,
      "loss": 3.1337,
      "step": 131689
    },
    {
      "epoch": 1.71,
      "grad_norm": 5.099096775054932,
      "learning_rate": 0.0002331195280204146,
      "loss": 3.0983,
      "step": 131690
    },
    {
      "epoch": 1.71,
      "grad_norm": 5.529872417449951,
      "learning_rate": 0.00023311554041027552,
      "loss": 2.9261,
      "step": 131691
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.2141730785369873,
      "learning_rate": 0.00023311155281257145,
      "loss": 2.9619,
      "step": 131692
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.182950735092163,
      "learning_rate": 0.0002331075652273032,
      "loss": 3.3378,
      "step": 131693
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8515206575393677,
      "learning_rate": 0.0002331035776544715,
      "loss": 2.9424,
      "step": 131694
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.8708746433258057,
      "learning_rate": 0.00023309959009407716,
      "loss": 3.1737,
      "step": 131695
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.676001787185669,
      "learning_rate": 0.00023309560254612083,
      "loss": 2.7876,
      "step": 131696
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.4412670135498047,
      "learning_rate": 0.00023309161501060345,
      "loss": 2.9923,
      "step": 131697
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.342519760131836,
      "learning_rate": 0.00023308762748752544,
      "loss": 3.1116,
      "step": 131698
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.4528141021728516,
      "learning_rate": 0.00023308363997688773,
      "loss": 3.061,
      "step": 131699
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.2398221492767334,
      "learning_rate": 0.000233079652478691,
      "loss": 2.9142,
      "step": 131700
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.6466658115386963,
      "learning_rate": 0.00023307566499293603,
      "loss": 3.1395,
      "step": 131701
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3758621215820312,
      "learning_rate": 0.0002330716775196236,
      "loss": 3.0836,
      "step": 131702
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.289933681488037,
      "learning_rate": 0.00023306769005875452,
      "loss": 2.6842,
      "step": 131703
    },
    {
      "epoch": 1.71,
      "grad_norm": 3.4544601440429688,
      "learning_rate": 0.00023306370261032925,
      "loss": 2.852,
      "step": 131704
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.8865015506744385,
      "learning_rate": 0.00023305971517434876,
      "loss": 2.7727,
      "step": 131705
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.7770836353302002,
      "learning_rate": 0.0002330557277508137,
      "loss": 3.3952,
      "step": 131706
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.0100598335266113,
      "learning_rate": 0.00023305174033972482,
      "loss": 2.917,
      "step": 131707
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.004392147064209,
      "learning_rate": 0.00023304775294108294,
      "loss": 3.0147,
      "step": 131708
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1013827323913574,
      "learning_rate": 0.00023304376555488885,
      "loss": 2.7842,
      "step": 131709
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.1082136631011963,
      "learning_rate": 0.00023303977818114308,
      "loss": 2.7046,
      "step": 131710
    },
    {
      "epoch": 1.71,
      "grad_norm": 1.8839565515518188,
      "learning_rate": 0.00023303579081984644,
      "loss": 3.0386,
      "step": 131711
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.072559118270874,
      "learning_rate": 0.00023303180347099975,
      "loss": 2.6339,
      "step": 131712
    },
    {
      "epoch": 1.71,
      "grad_norm": 2.3750858306884766,
      "learning_rate": 0.00023302781613460368,
      "loss": 3.237,
      "step": 131713
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.029613494873047,
      "learning_rate": 0.00023302382881065903,
      "loss": 2.8303,
      "step": 131714
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3053791522979736,
      "learning_rate": 0.0002330198414991666,
      "loss": 2.8967,
      "step": 131715
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.580171823501587,
      "learning_rate": 0.00023301585420012696,
      "loss": 3.1605,
      "step": 131716
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1479737758636475,
      "learning_rate": 0.00023301186691354094,
      "loss": 3.012,
      "step": 131717
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3901686668395996,
      "learning_rate": 0.00023300787963940922,
      "loss": 3.1803,
      "step": 131718
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3670148849487305,
      "learning_rate": 0.00023300389237773263,
      "loss": 2.9099,
      "step": 131719
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.639346122741699,
      "learning_rate": 0.0002329999051285119,
      "loss": 3.0188,
      "step": 131720
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.6919264793395996,
      "learning_rate": 0.00023299591789174774,
      "loss": 2.849,
      "step": 131721
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4739840030670166,
      "learning_rate": 0.00023299193066744105,
      "loss": 2.7853,
      "step": 131722
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.190680742263794,
      "learning_rate": 0.00023298794345559225,
      "loss": 2.858,
      "step": 131723
    },
    {
      "epoch": 1.72,
      "grad_norm": 4.219095706939697,
      "learning_rate": 0.00023298395625620232,
      "loss": 2.884,
      "step": 131724
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2367734909057617,
      "learning_rate": 0.00023297996906927192,
      "loss": 3.0449,
      "step": 131725
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8963712453842163,
      "learning_rate": 0.00023297598189480178,
      "loss": 2.9168,
      "step": 131726
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4957101345062256,
      "learning_rate": 0.00023297199473279267,
      "loss": 2.9134,
      "step": 131727
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.8695337772369385,
      "learning_rate": 0.00023296800758324548,
      "loss": 3.0777,
      "step": 131728
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.150303840637207,
      "learning_rate": 0.0002329640204461607,
      "loss": 3.1256,
      "step": 131729
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4491066932678223,
      "learning_rate": 0.00023296003332153912,
      "loss": 2.772,
      "step": 131730
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2760000228881836,
      "learning_rate": 0.00023295604620938157,
      "loss": 3.0794,
      "step": 131731
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.629359483718872,
      "learning_rate": 0.00023295205910968872,
      "loss": 2.8041,
      "step": 131732
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.581937074661255,
      "learning_rate": 0.0002329480720224614,
      "loss": 3.0021,
      "step": 131733
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.100504159927368,
      "learning_rate": 0.00023294408494770042,
      "loss": 2.9786,
      "step": 131734
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4888834953308105,
      "learning_rate": 0.00023294009788540622,
      "loss": 2.9683,
      "step": 131735
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.337996482849121,
      "learning_rate": 0.00023293611083557975,
      "loss": 3.0403,
      "step": 131736
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0751984119415283,
      "learning_rate": 0.0002329321237982217,
      "loss": 3.0927,
      "step": 131737
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.05757474899292,
      "learning_rate": 0.0002329281367733329,
      "loss": 3.0618,
      "step": 131738
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3720598220825195,
      "learning_rate": 0.00023292414976091398,
      "loss": 2.9732,
      "step": 131739
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.604393243789673,
      "learning_rate": 0.00023292016276096585,
      "loss": 3.0539,
      "step": 131740
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0607097148895264,
      "learning_rate": 0.000232916175773489,
      "loss": 2.9994,
      "step": 131741
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4005560874938965,
      "learning_rate": 0.00023291218879848433,
      "loss": 2.7847,
      "step": 131742
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1313767433166504,
      "learning_rate": 0.0002329082018359525,
      "loss": 2.8522,
      "step": 131743
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9956802129745483,
      "learning_rate": 0.00023290421488589435,
      "loss": 2.9534,
      "step": 131744
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0488476753234863,
      "learning_rate": 0.00023290022794831056,
      "loss": 3.1982,
      "step": 131745
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.78230881690979,
      "learning_rate": 0.000232896241023202,
      "loss": 2.6916,
      "step": 131746
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.6307313442230225,
      "learning_rate": 0.00023289225411056918,
      "loss": 2.9025,
      "step": 131747
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1134564876556396,
      "learning_rate": 0.00023288826721041297,
      "loss": 2.9223,
      "step": 131748
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9983104467391968,
      "learning_rate": 0.00023288428032273406,
      "loss": 2.8397,
      "step": 131749
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.102684497833252,
      "learning_rate": 0.00023288029344753326,
      "loss": 2.9016,
      "step": 131750
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.366497039794922,
      "learning_rate": 0.0002328763065848113,
      "loss": 2.8768,
      "step": 131751
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1985697746276855,
      "learning_rate": 0.00023287231973456892,
      "loss": 3.1231,
      "step": 131752
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2662320137023926,
      "learning_rate": 0.00023286833289680686,
      "loss": 2.518,
      "step": 131753
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4001412391662598,
      "learning_rate": 0.00023286434607152576,
      "loss": 3.0039,
      "step": 131754
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.319082736968994,
      "learning_rate": 0.0002328603592587265,
      "loss": 3.1693,
      "step": 131755
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0877604484558105,
      "learning_rate": 0.0002328563724584097,
      "loss": 2.9541,
      "step": 131756
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8849763870239258,
      "learning_rate": 0.0002328523856705762,
      "loss": 3.1782,
      "step": 131757
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8254846334457397,
      "learning_rate": 0.00023284839889522676,
      "loss": 3.0497,
      "step": 131758
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3912627696990967,
      "learning_rate": 0.00023284441213236202,
      "loss": 3.1813,
      "step": 131759
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.478214979171753,
      "learning_rate": 0.0002328404253819828,
      "loss": 3.0362,
      "step": 131760
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8502317667007446,
      "learning_rate": 0.00023283643864408982,
      "loss": 3.0124,
      "step": 131761
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9533987045288086,
      "learning_rate": 0.00023283245191868377,
      "loss": 3.1592,
      "step": 131762
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4379470348358154,
      "learning_rate": 0.00023282846520576546,
      "loss": 3.0384,
      "step": 131763
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.536689519882202,
      "learning_rate": 0.00023282447850533563,
      "loss": 2.9367,
      "step": 131764
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5852255821228027,
      "learning_rate": 0.00023282049181739495,
      "loss": 2.7098,
      "step": 131765
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9557793140411377,
      "learning_rate": 0.00023281650514194423,
      "loss": 3.0376,
      "step": 131766
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.284101724624634,
      "learning_rate": 0.0002328125184789842,
      "loss": 3.1333,
      "step": 131767
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3038578033447266,
      "learning_rate": 0.0002328085318285156,
      "loss": 2.6833,
      "step": 131768
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.211327314376831,
      "learning_rate": 0.00023280454519053912,
      "loss": 2.9902,
      "step": 131769
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.270242929458618,
      "learning_rate": 0.00023280055856505558,
      "loss": 3.1813,
      "step": 131770
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8870091438293457,
      "learning_rate": 0.00023279657195206566,
      "loss": 2.9274,
      "step": 131771
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4376399517059326,
      "learning_rate": 0.00023279258535157014,
      "loss": 2.9427,
      "step": 131772
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.99618661403656,
      "learning_rate": 0.00023278859876356973,
      "loss": 3.0068,
      "step": 131773
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5632336139678955,
      "learning_rate": 0.0002327846121880652,
      "loss": 3.1358,
      "step": 131774
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.379647970199585,
      "learning_rate": 0.00023278062562505733,
      "loss": 2.9363,
      "step": 131775
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.7212672233581543,
      "learning_rate": 0.00023277663907454678,
      "loss": 2.8365,
      "step": 131776
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.290529251098633,
      "learning_rate": 0.00023277265253653431,
      "loss": 3.1629,
      "step": 131777
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1950626373291016,
      "learning_rate": 0.00023276866601102064,
      "loss": 2.7959,
      "step": 131778
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.6267426013946533,
      "learning_rate": 0.00023276467949800655,
      "loss": 2.7759,
      "step": 131779
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.857349395751953,
      "learning_rate": 0.0002327606929974928,
      "loss": 3.0678,
      "step": 131780
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.534261465072632,
      "learning_rate": 0.00023275670650948008,
      "loss": 3.06,
      "step": 131781
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.782402992248535,
      "learning_rate": 0.0002327527200339693,
      "loss": 2.9104,
      "step": 131782
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.0819756984710693,
      "learning_rate": 0.00023274873357096088,
      "loss": 2.8467,
      "step": 131783
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.510904312133789,
      "learning_rate": 0.00023274474712045579,
      "loss": 2.9084,
      "step": 131784
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5256247520446777,
      "learning_rate": 0.0002327407606824547,
      "loss": 3.0227,
      "step": 131785
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0674712657928467,
      "learning_rate": 0.00023273677425695842,
      "loss": 2.9268,
      "step": 131786
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5057942867279053,
      "learning_rate": 0.00023273278784396762,
      "loss": 2.7587,
      "step": 131787
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8525651693344116,
      "learning_rate": 0.00023272880144348303,
      "loss": 2.8536,
      "step": 131788
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2692668437957764,
      "learning_rate": 0.00023272481505550558,
      "loss": 3.179,
      "step": 131789
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.493366003036499,
      "learning_rate": 0.00023272082868003573,
      "loss": 3.0234,
      "step": 131790
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0350143909454346,
      "learning_rate": 0.00023271684231707432,
      "loss": 2.8439,
      "step": 131791
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.772750973701477,
      "learning_rate": 0.00023271285596662217,
      "loss": 3.123,
      "step": 131792
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0813775062561035,
      "learning_rate": 0.00023270886962867992,
      "loss": 3.0265,
      "step": 131793
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.212052583694458,
      "learning_rate": 0.0002327048833032484,
      "loss": 3.296,
      "step": 131794
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2748537063598633,
      "learning_rate": 0.00023270089699032841,
      "loss": 3.1952,
      "step": 131795
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.167759895324707,
      "learning_rate": 0.00023269691068992048,
      "loss": 3.0593,
      "step": 131796
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.142195224761963,
      "learning_rate": 0.00023269292440202547,
      "loss": 2.8463,
      "step": 131797
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9547414779663086,
      "learning_rate": 0.00023268893812664412,
      "loss": 3.1731,
      "step": 131798
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.123605489730835,
      "learning_rate": 0.00023268495186377715,
      "loss": 3.0006,
      "step": 131799
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1498067378997803,
      "learning_rate": 0.00023268096561342532,
      "loss": 2.9795,
      "step": 131800
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.418100595474243,
      "learning_rate": 0.0002326769793755895,
      "loss": 2.939,
      "step": 131801
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.9343409538269043,
      "learning_rate": 0.00023267299315027018,
      "loss": 2.9923,
      "step": 131802
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3152565956115723,
      "learning_rate": 0.00023266900693746822,
      "loss": 3.0776,
      "step": 131803
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9718962907791138,
      "learning_rate": 0.00023266502073718436,
      "loss": 3.0207,
      "step": 131804
    },
    {
      "epoch": 1.72,
      "grad_norm": 5.891678333282471,
      "learning_rate": 0.00023266103454941934,
      "loss": 3.0308,
      "step": 131805
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.723045825958252,
      "learning_rate": 0.0002326570483741739,
      "loss": 3.1939,
      "step": 131806
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.639873504638672,
      "learning_rate": 0.00023265306221144895,
      "loss": 2.9427,
      "step": 131807
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.421276092529297,
      "learning_rate": 0.0002326490760612449,
      "loss": 3.1188,
      "step": 131808
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.792738437652588,
      "learning_rate": 0.00023264508992356268,
      "loss": 2.8716,
      "step": 131809
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.8110291957855225,
      "learning_rate": 0.000232641103798403,
      "loss": 3.1739,
      "step": 131810
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2732553482055664,
      "learning_rate": 0.00023263711768576657,
      "loss": 3.1859,
      "step": 131811
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2255055904388428,
      "learning_rate": 0.00023263313158565423,
      "loss": 3.0851,
      "step": 131812
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.9918363094329834,
      "learning_rate": 0.00023262914549806676,
      "loss": 2.9453,
      "step": 131813
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2218470573425293,
      "learning_rate": 0.0002326251594230047,
      "loss": 3.0598,
      "step": 131814
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8228967189788818,
      "learning_rate": 0.00023262117336046885,
      "loss": 3.0422,
      "step": 131815
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8429813385009766,
      "learning_rate": 0.00023261718731046007,
      "loss": 2.8008,
      "step": 131816
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9019588232040405,
      "learning_rate": 0.00023261320127297897,
      "loss": 2.9754,
      "step": 131817
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0883541107177734,
      "learning_rate": 0.00023260921524802635,
      "loss": 2.7935,
      "step": 131818
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.473902702331543,
      "learning_rate": 0.0002326052292356031,
      "loss": 2.8447,
      "step": 131819
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0710129737854004,
      "learning_rate": 0.00023260124323570965,
      "loss": 2.904,
      "step": 131820
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.038266658782959,
      "learning_rate": 0.00023259725724834694,
      "loss": 2.8283,
      "step": 131821
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1756768226623535,
      "learning_rate": 0.00023259327127351565,
      "loss": 2.7802,
      "step": 131822
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.653323769569397,
      "learning_rate": 0.00023258928531121655,
      "loss": 3.1332,
      "step": 131823
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1816203594207764,
      "learning_rate": 0.00023258529936145037,
      "loss": 2.9243,
      "step": 131824
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1555960178375244,
      "learning_rate": 0.000232581313424218,
      "loss": 3.0735,
      "step": 131825
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.714090347290039,
      "learning_rate": 0.00023257732749951986,
      "loss": 3.1304,
      "step": 131826
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.6271891593933105,
      "learning_rate": 0.0002325733415873569,
      "loss": 2.6836,
      "step": 131827
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0362257957458496,
      "learning_rate": 0.00023256935568772985,
      "loss": 3.0435,
      "step": 131828
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.200270891189575,
      "learning_rate": 0.0002325653698006394,
      "loss": 2.9271,
      "step": 131829
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.102898597717285,
      "learning_rate": 0.00023256138392608634,
      "loss": 3.2054,
      "step": 131830
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9715877771377563,
      "learning_rate": 0.0002325573980640715,
      "loss": 3.1649,
      "step": 131831
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.597609043121338,
      "learning_rate": 0.0002325534122145954,
      "loss": 3.0242,
      "step": 131832
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.122265100479126,
      "learning_rate": 0.0002325494263776589,
      "loss": 2.8993,
      "step": 131833
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3697614669799805,
      "learning_rate": 0.00023254544055326272,
      "loss": 3.0535,
      "step": 131834
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.278266668319702,
      "learning_rate": 0.00023254145474140762,
      "loss": 2.9661,
      "step": 131835
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0435290336608887,
      "learning_rate": 0.00023253746894209433,
      "loss": 2.8658,
      "step": 131836
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.189310312271118,
      "learning_rate": 0.0002325334831553237,
      "loss": 3.1961,
      "step": 131837
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8911527395248413,
      "learning_rate": 0.00023252949738109628,
      "loss": 3.1192,
      "step": 131838
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8315048217773438,
      "learning_rate": 0.00023252551161941288,
      "loss": 3.0416,
      "step": 131839
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.125399589538574,
      "learning_rate": 0.0002325215258702743,
      "loss": 2.9925,
      "step": 131840
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8479242324829102,
      "learning_rate": 0.0002325175401336812,
      "loss": 2.7831,
      "step": 131841
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9750473499298096,
      "learning_rate": 0.00023251355440963436,
      "loss": 3.0247,
      "step": 131842
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.881217360496521,
      "learning_rate": 0.00023250956869813464,
      "loss": 3.0943,
      "step": 131843
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8332979679107666,
      "learning_rate": 0.00023250558299918258,
      "loss": 2.935,
      "step": 131844
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.108067274093628,
      "learning_rate": 0.00023250159731277902,
      "loss": 2.8997,
      "step": 131845
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9704186916351318,
      "learning_rate": 0.00023249761163892467,
      "loss": 2.9033,
      "step": 131846
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9345636367797852,
      "learning_rate": 0.00023249362597762028,
      "loss": 3.0482,
      "step": 131847
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.735295534133911,
      "learning_rate": 0.00023248964032886662,
      "loss": 2.9218,
      "step": 131848
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.7462053298950195,
      "learning_rate": 0.00023248565469266444,
      "loss": 2.9776,
      "step": 131849
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8778088092803955,
      "learning_rate": 0.00023248166906901442,
      "loss": 2.9473,
      "step": 131850
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.4678115844726562,
      "learning_rate": 0.0002324776834579173,
      "loss": 2.9997,
      "step": 131851
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9628357887268066,
      "learning_rate": 0.00023247369785937393,
      "loss": 2.7686,
      "step": 131852
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.043555736541748,
      "learning_rate": 0.0002324697122733849,
      "loss": 2.8342,
      "step": 131853
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1423425674438477,
      "learning_rate": 0.00023246572669995103,
      "loss": 3.039,
      "step": 131854
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8581172227859497,
      "learning_rate": 0.00023246174113907307,
      "loss": 3.096,
      "step": 131855
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3913838863372803,
      "learning_rate": 0.0002324577555907518,
      "loss": 2.9449,
      "step": 131856
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.267643928527832,
      "learning_rate": 0.00023245377005498785,
      "loss": 3.1942,
      "step": 131857
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1948466300964355,
      "learning_rate": 0.00023244978453178204,
      "loss": 2.8066,
      "step": 131858
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.647212266921997,
      "learning_rate": 0.0002324457990211351,
      "loss": 2.6958,
      "step": 131859
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3713157176971436,
      "learning_rate": 0.00023244181352304773,
      "loss": 3.131,
      "step": 131860
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9309468269348145,
      "learning_rate": 0.00023243782803752068,
      "loss": 3.322,
      "step": 131861
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.261868476867676,
      "learning_rate": 0.00023243384256455482,
      "loss": 3.027,
      "step": 131862
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1243319511413574,
      "learning_rate": 0.00023242985710415068,
      "loss": 2.9859,
      "step": 131863
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.924659490585327,
      "learning_rate": 0.00023242587165630911,
      "loss": 2.9891,
      "step": 131864
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2830193042755127,
      "learning_rate": 0.00023242188622103085,
      "loss": 2.9479,
      "step": 131865
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2621805667877197,
      "learning_rate": 0.00023241790079831675,
      "loss": 2.8287,
      "step": 131866
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2097902297973633,
      "learning_rate": 0.0002324139153881673,
      "loss": 3.1194,
      "step": 131867
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9132919311523438,
      "learning_rate": 0.0002324099299905835,
      "loss": 2.957,
      "step": 131868
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.6119883060455322,
      "learning_rate": 0.0002324059446055659,
      "loss": 2.9232,
      "step": 131869
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.296947717666626,
      "learning_rate": 0.00023240195923311527,
      "loss": 2.9489,
      "step": 131870
    },
    {
      "epoch": 1.72,
      "grad_norm": 4.8243303298950195,
      "learning_rate": 0.0002323979738732324,
      "loss": 2.7691,
      "step": 131871
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.363950729370117,
      "learning_rate": 0.00023239398852591803,
      "loss": 3.0773,
      "step": 131872
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.536736249923706,
      "learning_rate": 0.00023239000319117293,
      "loss": 2.8406,
      "step": 131873
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.5362842082977295,
      "learning_rate": 0.00023238601786899787,
      "loss": 3.0982,
      "step": 131874
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.9146862030029297,
      "learning_rate": 0.00023238203255939347,
      "loss": 3.0334,
      "step": 131875
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.174126625061035,
      "learning_rate": 0.00023237804726236046,
      "loss": 2.9829,
      "step": 131876
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.697291851043701,
      "learning_rate": 0.00023237406197789968,
      "loss": 2.7853,
      "step": 131877
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.309112787246704,
      "learning_rate": 0.00023237007670601185,
      "loss": 2.9194,
      "step": 131878
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.666863203048706,
      "learning_rate": 0.0002323660914466977,
      "loss": 2.9554,
      "step": 131879
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.645590305328369,
      "learning_rate": 0.00023236210619995804,
      "loss": 2.872,
      "step": 131880
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.7850044965744019,
      "learning_rate": 0.00023235812096579348,
      "loss": 2.9399,
      "step": 131881
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.82592511177063,
      "learning_rate": 0.00023235413574420477,
      "loss": 2.808,
      "step": 131882
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.32368803024292,
      "learning_rate": 0.00023235015053519275,
      "loss": 2.9439,
      "step": 131883
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.840344190597534,
      "learning_rate": 0.0002323461653387581,
      "loss": 2.9701,
      "step": 131884
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.454181432723999,
      "learning_rate": 0.00023234218015490154,
      "loss": 3.0443,
      "step": 131885
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.114628553390503,
      "learning_rate": 0.000232338194983624,
      "loss": 2.8303,
      "step": 131886
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.2486166954040527,
      "learning_rate": 0.00023233420982492596,
      "loss": 2.9574,
      "step": 131887
    },
    {
      "epoch": 1.72,
      "grad_norm": 4.1439433097839355,
      "learning_rate": 0.00023233022467880827,
      "loss": 3.147,
      "step": 131888
    },
    {
      "epoch": 1.72,
      "grad_norm": 4.274075508117676,
      "learning_rate": 0.00023232623954527162,
      "loss": 2.8831,
      "step": 131889
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9961060285568237,
      "learning_rate": 0.00023232225442431682,
      "loss": 2.9438,
      "step": 131890
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2003417015075684,
      "learning_rate": 0.0002323182693159446,
      "loss": 3.0255,
      "step": 131891
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4854910373687744,
      "learning_rate": 0.0002323142842201558,
      "loss": 2.9804,
      "step": 131892
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.16996169090271,
      "learning_rate": 0.00023231029913695094,
      "loss": 3.1533,
      "step": 131893
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3307912349700928,
      "learning_rate": 0.00023230631406633088,
      "loss": 3.036,
      "step": 131894
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.161325216293335,
      "learning_rate": 0.00023230232900829637,
      "loss": 3.1032,
      "step": 131895
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.271859645843506,
      "learning_rate": 0.0002322983439628481,
      "loss": 2.9622,
      "step": 131896
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.145822286605835,
      "learning_rate": 0.00023229435892998686,
      "loss": 2.8974,
      "step": 131897
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.13350510597229,
      "learning_rate": 0.0002322903739097135,
      "loss": 2.8677,
      "step": 131898
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.7378978729248047,
      "learning_rate": 0.00023228638890202853,
      "loss": 3.0377,
      "step": 131899
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9647948741912842,
      "learning_rate": 0.00023228240390693274,
      "loss": 2.8937,
      "step": 131900
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.109797716140747,
      "learning_rate": 0.00023227841892442698,
      "loss": 2.7313,
      "step": 131901
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.9470744132995605,
      "learning_rate": 0.00023227443395451192,
      "loss": 3.3345,
      "step": 131902
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4575071334838867,
      "learning_rate": 0.00023227044899718833,
      "loss": 2.8714,
      "step": 131903
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.646057367324829,
      "learning_rate": 0.00023226646405245712,
      "loss": 2.8488,
      "step": 131904
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3052845001220703,
      "learning_rate": 0.00023226247912031868,
      "loss": 2.989,
      "step": 131905
    },
    {
      "epoch": 1.72,
      "grad_norm": 4.264540195465088,
      "learning_rate": 0.00023225849420077392,
      "loss": 2.8158,
      "step": 131906
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5360512733459473,
      "learning_rate": 0.00023225450929382355,
      "loss": 2.9634,
      "step": 131907
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4690632820129395,
      "learning_rate": 0.0002322505243994684,
      "loss": 3.0865,
      "step": 131908
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1674609184265137,
      "learning_rate": 0.00023224653951770913,
      "loss": 3.1598,
      "step": 131909
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.748257875442505,
      "learning_rate": 0.00023224255464854664,
      "loss": 2.9852,
      "step": 131910
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.958404779434204,
      "learning_rate": 0.00023223856979198143,
      "loss": 2.9245,
      "step": 131911
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.051311731338501,
      "learning_rate": 0.00023223458494801431,
      "loss": 3.1315,
      "step": 131912
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9753128290176392,
      "learning_rate": 0.0002322306001166461,
      "loss": 2.6914,
      "step": 131913
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.8016464710235596,
      "learning_rate": 0.00023222661529787746,
      "loss": 2.8784,
      "step": 131914
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.766188383102417,
      "learning_rate": 0.00023222263049170922,
      "loss": 2.9979,
      "step": 131915
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.8347859382629395,
      "learning_rate": 0.00023221864569814216,
      "loss": 3.0773,
      "step": 131916
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9582432508468628,
      "learning_rate": 0.00023221466091717682,
      "loss": 2.9336,
      "step": 131917
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.417314291000366,
      "learning_rate": 0.00023221067614881405,
      "loss": 2.9945,
      "step": 131918
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.663065195083618,
      "learning_rate": 0.0002322066913930546,
      "loss": 3.1756,
      "step": 131919
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5602617263793945,
      "learning_rate": 0.0002322027066498992,
      "loss": 3.0101,
      "step": 131920
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.126589059829712,
      "learning_rate": 0.0002321987219193486,
      "loss": 3.0526,
      "step": 131921
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.411703109741211,
      "learning_rate": 0.00023219473720140354,
      "loss": 2.8519,
      "step": 131922
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.67622447013855,
      "learning_rate": 0.00023219075249606487,
      "loss": 2.9569,
      "step": 131923
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8567147254943848,
      "learning_rate": 0.00023218676780333315,
      "loss": 2.8376,
      "step": 131924
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.912482500076294,
      "learning_rate": 0.00023218278312320912,
      "loss": 2.9885,
      "step": 131925
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.6282906532287598,
      "learning_rate": 0.0002321787984556936,
      "loss": 2.8474,
      "step": 131926
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.719331979751587,
      "learning_rate": 0.00023217481380078734,
      "loss": 2.7075,
      "step": 131927
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9186583757400513,
      "learning_rate": 0.00023217082915849105,
      "loss": 2.88,
      "step": 131928
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.7386540174484253,
      "learning_rate": 0.00023216684452880565,
      "loss": 2.8669,
      "step": 131929
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8191356658935547,
      "learning_rate": 0.00023216285991173156,
      "loss": 2.8523,
      "step": 131930
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2182767391204834,
      "learning_rate": 0.00023215887530726964,
      "loss": 3.0965,
      "step": 131931
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9081313610076904,
      "learning_rate": 0.0002321548907154207,
      "loss": 3.0737,
      "step": 131932
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8862615823745728,
      "learning_rate": 0.00023215090613618549,
      "loss": 2.8871,
      "step": 131933
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.6771036386489868,
      "learning_rate": 0.00023214692156956463,
      "loss": 2.7807,
      "step": 131934
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5687994956970215,
      "learning_rate": 0.00023214293701555904,
      "loss": 2.8444,
      "step": 131935
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.835186243057251,
      "learning_rate": 0.00023213895247416928,
      "loss": 2.9336,
      "step": 131936
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.975403070449829,
      "learning_rate": 0.00023213496794539625,
      "loss": 3.1296,
      "step": 131937
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1519582271575928,
      "learning_rate": 0.00023213098342924054,
      "loss": 3.0254,
      "step": 131938
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0823333263397217,
      "learning_rate": 0.00023212699892570296,
      "loss": 3.0614,
      "step": 131939
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9189172983169556,
      "learning_rate": 0.00023212301443478425,
      "loss": 2.7962,
      "step": 131940
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.416208267211914,
      "learning_rate": 0.00023211902995648521,
      "loss": 3.0423,
      "step": 131941
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3182060718536377,
      "learning_rate": 0.00023211504549080648,
      "loss": 3.3362,
      "step": 131942
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.48712420463562,
      "learning_rate": 0.00023211106103774882,
      "loss": 2.9747,
      "step": 131943
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2974863052368164,
      "learning_rate": 0.00023210707659731308,
      "loss": 2.9385,
      "step": 131944
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0340349674224854,
      "learning_rate": 0.00023210309216949984,
      "loss": 3.0615,
      "step": 131945
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.947072982788086,
      "learning_rate": 0.00023209910775430994,
      "loss": 2.9071,
      "step": 131946
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.845348596572876,
      "learning_rate": 0.0002320951233517441,
      "loss": 2.7331,
      "step": 131947
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0363762378692627,
      "learning_rate": 0.00023209113896180308,
      "loss": 3.1158,
      "step": 131948
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.701425552368164,
      "learning_rate": 0.00023208715458448756,
      "loss": 3.0712,
      "step": 131949
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.3995022773742676,
      "learning_rate": 0.0002320831702197983,
      "loss": 2.9469,
      "step": 131950
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.637697219848633,
      "learning_rate": 0.00023207918586773613,
      "loss": 3.0564,
      "step": 131951
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.003713607788086,
      "learning_rate": 0.00023207520152830169,
      "loss": 2.9191,
      "step": 131952
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.6628448963165283,
      "learning_rate": 0.00023207121720149578,
      "loss": 2.9867,
      "step": 131953
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9469094276428223,
      "learning_rate": 0.00023206723288731907,
      "loss": 2.8991,
      "step": 131954
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1345651149749756,
      "learning_rate": 0.00023206324858577232,
      "loss": 2.8809,
      "step": 131955
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8102591037750244,
      "learning_rate": 0.00023205926429685633,
      "loss": 3.1189,
      "step": 131956
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.17673397064209,
      "learning_rate": 0.0002320552800205718,
      "loss": 2.8495,
      "step": 131957
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3093793392181396,
      "learning_rate": 0.00023205129575691954,
      "loss": 2.9285,
      "step": 131958
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.514958381652832,
      "learning_rate": 0.00023204731150590019,
      "loss": 2.7336,
      "step": 131959
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.204054832458496,
      "learning_rate": 0.00023204332726751447,
      "loss": 2.8458,
      "step": 131960
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.891585350036621,
      "learning_rate": 0.00023203934304176323,
      "loss": 2.8971,
      "step": 131961
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.618119716644287,
      "learning_rate": 0.0002320353588286471,
      "loss": 3.114,
      "step": 131962
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.085740804672241,
      "learning_rate": 0.0002320313746281669,
      "loss": 3.0259,
      "step": 131963
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9851264953613281,
      "learning_rate": 0.00023202739044032335,
      "loss": 3.045,
      "step": 131964
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8343505859375,
      "learning_rate": 0.00023202340626511734,
      "loss": 3.1363,
      "step": 131965
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.239720106124878,
      "learning_rate": 0.0002320194221025493,
      "loss": 3.0144,
      "step": 131966
    },
    {
      "epoch": 1.72,
      "grad_norm": 4.106170177459717,
      "learning_rate": 0.00023201543795262018,
      "loss": 3.2024,
      "step": 131967
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5625293254852295,
      "learning_rate": 0.00023201145381533062,
      "loss": 2.9191,
      "step": 131968
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.442021131515503,
      "learning_rate": 0.00023200746969068148,
      "loss": 3.0667,
      "step": 131969
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.026543140411377,
      "learning_rate": 0.00023200348557867337,
      "loss": 3.0264,
      "step": 131970
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.417346239089966,
      "learning_rate": 0.00023199950147930726,
      "loss": 2.8128,
      "step": 131971
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.971517562866211,
      "learning_rate": 0.00023199551739258356,
      "loss": 3.308,
      "step": 131972
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.288522958755493,
      "learning_rate": 0.0002319915333185032,
      "loss": 2.8684,
      "step": 131973
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1636388301849365,
      "learning_rate": 0.00023198754925706693,
      "loss": 3.0683,
      "step": 131974
    },
    {
      "epoch": 1.72,
      "grad_norm": 5.019983291625977,
      "learning_rate": 0.00023198356520827542,
      "loss": 2.8161,
      "step": 131975
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.602461576461792,
      "learning_rate": 0.00023197958117212946,
      "loss": 3.0165,
      "step": 131976
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.7642006874084473,
      "learning_rate": 0.00023197559714862987,
      "loss": 3.0409,
      "step": 131977
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.6674201488494873,
      "learning_rate": 0.00023197161313777724,
      "loss": 3.0965,
      "step": 131978
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0835020542144775,
      "learning_rate": 0.00023196762913957233,
      "loss": 2.9388,
      "step": 131979
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.885850191116333,
      "learning_rate": 0.00023196364515401592,
      "loss": 3.3226,
      "step": 131980
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.172912836074829,
      "learning_rate": 0.00023195966118110876,
      "loss": 3.0378,
      "step": 131981
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9437711238861084,
      "learning_rate": 0.0002319556772208516,
      "loss": 2.8379,
      "step": 131982
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3350629806518555,
      "learning_rate": 0.00023195169327324517,
      "loss": 2.9581,
      "step": 131983
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.5605204105377197,
      "learning_rate": 0.0002319477093382903,
      "loss": 3.0353,
      "step": 131984
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.312654733657837,
      "learning_rate": 0.0002319437254159875,
      "loss": 3.1133,
      "step": 131985
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2626099586486816,
      "learning_rate": 0.00023193974150633768,
      "loss": 3.2409,
      "step": 131986
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.6390483379364014,
      "learning_rate": 0.00023193575760934153,
      "loss": 2.8541,
      "step": 131987
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9709922075271606,
      "learning_rate": 0.0002319317737249998,
      "loss": 2.9858,
      "step": 131988
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.6615296602249146,
      "learning_rate": 0.00023192778985331328,
      "loss": 3.0879,
      "step": 131989
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.909554123878479,
      "learning_rate": 0.00023192380599428274,
      "loss": 3.057,
      "step": 131990
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1165390014648438,
      "learning_rate": 0.00023191982214790875,
      "loss": 2.7953,
      "step": 131991
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.7023661136627197,
      "learning_rate": 0.00023191583831419214,
      "loss": 2.9284,
      "step": 131992
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.9035866260528564,
      "learning_rate": 0.00023191185449313368,
      "loss": 2.8495,
      "step": 131993
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.009289503097534,
      "learning_rate": 0.0002319078706847341,
      "loss": 3.3316,
      "step": 131994
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1126186847686768,
      "learning_rate": 0.0002319038868889941,
      "loss": 3.1408,
      "step": 131995
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1676859855651855,
      "learning_rate": 0.0002318999031059146,
      "loss": 2.8705,
      "step": 131996
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0312294960021973,
      "learning_rate": 0.00023189591933549605,
      "loss": 2.8625,
      "step": 131997
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1268796920776367,
      "learning_rate": 0.00023189193557773935,
      "loss": 2.9741,
      "step": 131998
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.424311876296997,
      "learning_rate": 0.00023188795183264524,
      "loss": 2.8103,
      "step": 131999
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2705130577087402,
      "learning_rate": 0.0002318839681002144,
      "loss": 2.9475,
      "step": 132000
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2786080837249756,
      "learning_rate": 0.00023187998438044765,
      "loss": 2.9586,
      "step": 132001
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.834885835647583,
      "learning_rate": 0.00023187600067334582,
      "loss": 2.8967,
      "step": 132002
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2551283836364746,
      "learning_rate": 0.00023187201697890937,
      "loss": 2.9876,
      "step": 132003
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.220543384552002,
      "learning_rate": 0.00023186803329713923,
      "loss": 2.6846,
      "step": 132004
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9179823398590088,
      "learning_rate": 0.00023186404962803608,
      "loss": 2.8192,
      "step": 132005
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0515530109405518,
      "learning_rate": 0.00023186006597160073,
      "loss": 2.9589,
      "step": 132006
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.098702907562256,
      "learning_rate": 0.00023185608232783385,
      "loss": 3.1646,
      "step": 132007
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2636420726776123,
      "learning_rate": 0.00023185209869673636,
      "loss": 3.013,
      "step": 132008
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.2978107929229736,
      "learning_rate": 0.0002318481150783087,
      "loss": 2.8116,
      "step": 132009
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.833222508430481,
      "learning_rate": 0.00023184413147255177,
      "loss": 3.0235,
      "step": 132010
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.203007221221924,
      "learning_rate": 0.0002318401478794663,
      "loss": 2.9388,
      "step": 132011
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9277077913284302,
      "learning_rate": 0.00023183616429905306,
      "loss": 3.1517,
      "step": 132012
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.0008444786071777,
      "learning_rate": 0.00023183218073131277,
      "loss": 3.0999,
      "step": 132013
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.358515501022339,
      "learning_rate": 0.00023182819717624625,
      "loss": 2.9784,
      "step": 132014
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.878303050994873,
      "learning_rate": 0.00023182421363385404,
      "loss": 3.1467,
      "step": 132015
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0744643211364746,
      "learning_rate": 0.00023182023010413698,
      "loss": 2.9492,
      "step": 132016
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.063297986984253,
      "learning_rate": 0.00023181624658709585,
      "loss": 3.0712,
      "step": 132017
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3819189071655273,
      "learning_rate": 0.00023181226308273138,
      "loss": 2.869,
      "step": 132018
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9157699346542358,
      "learning_rate": 0.00023180827959104426,
      "loss": 2.8697,
      "step": 132019
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8788233995437622,
      "learning_rate": 0.00023180429611203534,
      "loss": 2.835,
      "step": 132020
    },
    {
      "epoch": 1.72,
      "grad_norm": 4.674648761749268,
      "learning_rate": 0.0002318003126457053,
      "loss": 2.8551,
      "step": 132021
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.804626941680908,
      "learning_rate": 0.00023179632919205482,
      "loss": 3.0055,
      "step": 132022
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.153902530670166,
      "learning_rate": 0.00023179234575108467,
      "loss": 3.0892,
      "step": 132023
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9472073316574097,
      "learning_rate": 0.00023178836232279562,
      "loss": 3.0291,
      "step": 132024
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.504730224609375,
      "learning_rate": 0.00023178437890718838,
      "loss": 2.9137,
      "step": 132025
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.294315814971924,
      "learning_rate": 0.0002317803955042638,
      "loss": 3.1111,
      "step": 132026
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.668121576309204,
      "learning_rate": 0.00023177641211402246,
      "loss": 2.8759,
      "step": 132027
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5281412601470947,
      "learning_rate": 0.00023177242873646518,
      "loss": 3.366,
      "step": 132028
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.421915292739868,
      "learning_rate": 0.00023176844537159275,
      "loss": 2.865,
      "step": 132029
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.331481456756592,
      "learning_rate": 0.0002317644620194058,
      "loss": 2.8656,
      "step": 132030
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4026811122894287,
      "learning_rate": 0.00023176047867990508,
      "loss": 3.0794,
      "step": 132031
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4506356716156006,
      "learning_rate": 0.00023175649535309148,
      "loss": 3.1918,
      "step": 132032
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.2537097930908203,
      "learning_rate": 0.0002317525120389656,
      "loss": 3.1593,
      "step": 132033
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8296819925308228,
      "learning_rate": 0.00023174852873752814,
      "loss": 3.3104,
      "step": 132034
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.910645604133606,
      "learning_rate": 0.00023174454544877998,
      "loss": 3.1685,
      "step": 132035
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.248012065887451,
      "learning_rate": 0.0002317405621727218,
      "loss": 2.8888,
      "step": 132036
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1012203693389893,
      "learning_rate": 0.00023173657890935434,
      "loss": 2.8326,
      "step": 132037
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.06307315826416,
      "learning_rate": 0.00023173259565867837,
      "loss": 2.8974,
      "step": 132038
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0533370971679688,
      "learning_rate": 0.00023172861242069456,
      "loss": 3.0851,
      "step": 132039
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.086622476577759,
      "learning_rate": 0.00023172462919540364,
      "loss": 3.0963,
      "step": 132040
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2471652030944824,
      "learning_rate": 0.00023172064598280646,
      "loss": 3.1188,
      "step": 132041
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.432126760482788,
      "learning_rate": 0.00023171666278290368,
      "loss": 2.9854,
      "step": 132042
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.805452585220337,
      "learning_rate": 0.00023171267959569612,
      "loss": 2.9511,
      "step": 132043
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.517547369003296,
      "learning_rate": 0.00023170869642118446,
      "loss": 2.9159,
      "step": 132044
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.1613447666168213,
      "learning_rate": 0.00023170471325936938,
      "loss": 3.0366,
      "step": 132045
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4627277851104736,
      "learning_rate": 0.00023170073011025166,
      "loss": 2.9243,
      "step": 132046
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.302755117416382,
      "learning_rate": 0.00023169674697383206,
      "loss": 3.0886,
      "step": 132047
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5417089462280273,
      "learning_rate": 0.00023169276385011134,
      "loss": 3.09,
      "step": 132048
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.255589485168457,
      "learning_rate": 0.00023168878073909026,
      "loss": 3.0094,
      "step": 132049
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.912729501724243,
      "learning_rate": 0.00023168479764076958,
      "loss": 2.7878,
      "step": 132050
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.940354585647583,
      "learning_rate": 0.00023168081455514998,
      "loss": 2.9945,
      "step": 132051
    },
    {
      "epoch": 1.72,
      "grad_norm": 4.219034671783447,
      "learning_rate": 0.00023167683148223207,
      "loss": 3.0394,
      "step": 132052
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.2879157066345215,
      "learning_rate": 0.00023167284842201682,
      "loss": 2.8604,
      "step": 132053
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.148862361907959,
      "learning_rate": 0.00023166886537450486,
      "loss": 2.9561,
      "step": 132054
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9804327487945557,
      "learning_rate": 0.0002316648823396969,
      "loss": 3.028,
      "step": 132055
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.428224802017212,
      "learning_rate": 0.00023166089931759377,
      "loss": 3.0267,
      "step": 132056
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.718035936355591,
      "learning_rate": 0.00023165691630819626,
      "loss": 2.759,
      "step": 132057
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.207113027572632,
      "learning_rate": 0.00023165293331150493,
      "loss": 3.0404,
      "step": 132058
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1062796115875244,
      "learning_rate": 0.0002316489503275206,
      "loss": 3.1562,
      "step": 132059
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3989737033843994,
      "learning_rate": 0.00023164496735624405,
      "loss": 3.1822,
      "step": 132060
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.600062370300293,
      "learning_rate": 0.00023164098439767595,
      "loss": 2.9303,
      "step": 132061
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.234178304672241,
      "learning_rate": 0.0002316370014518171,
      "loss": 3.0714,
      "step": 132062
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.445265531539917,
      "learning_rate": 0.00023163301851866834,
      "loss": 3.1049,
      "step": 132063
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.306631326675415,
      "learning_rate": 0.00023162903559823013,
      "loss": 2.8264,
      "step": 132064
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.028975486755371,
      "learning_rate": 0.00023162505269050344,
      "loss": 2.993,
      "step": 132065
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.095287561416626,
      "learning_rate": 0.0002316210697954889,
      "loss": 2.9469,
      "step": 132066
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8967361450195312,
      "learning_rate": 0.0002316170869131873,
      "loss": 2.9636,
      "step": 132067
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.091660737991333,
      "learning_rate": 0.00023161310404359937,
      "loss": 2.8904,
      "step": 132068
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.115009069442749,
      "learning_rate": 0.000231609121186726,
      "loss": 2.7974,
      "step": 132069
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.376432418823242,
      "learning_rate": 0.00023160513834256764,
      "loss": 3.0196,
      "step": 132070
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8288075923919678,
      "learning_rate": 0.0002316011555111252,
      "loss": 2.6371,
      "step": 132071
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2536916732788086,
      "learning_rate": 0.0002315971726923994,
      "loss": 3.0345,
      "step": 132072
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1958537101745605,
      "learning_rate": 0.00023159318988639094,
      "loss": 3.0709,
      "step": 132073
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0401625633239746,
      "learning_rate": 0.00023158920709310062,
      "loss": 3.0552,
      "step": 132074
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.14872407913208,
      "learning_rate": 0.00023158522431252927,
      "loss": 3.3581,
      "step": 132075
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.7260441780090332,
      "learning_rate": 0.0002315812415446774,
      "loss": 3.1571,
      "step": 132076
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.6121675968170166,
      "learning_rate": 0.0002315772587895459,
      "loss": 3.0084,
      "step": 132077
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9745666980743408,
      "learning_rate": 0.00023157327604713543,
      "loss": 2.893,
      "step": 132078
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1118218898773193,
      "learning_rate": 0.0002315692933174468,
      "loss": 2.8727,
      "step": 132079
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5638132095336914,
      "learning_rate": 0.00023156531060048073,
      "loss": 2.7921,
      "step": 132080
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1414077281951904,
      "learning_rate": 0.0002315613278962381,
      "loss": 2.9078,
      "step": 132081
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.060084104537964,
      "learning_rate": 0.00023155734520471935,
      "loss": 3.1152,
      "step": 132082
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0671942234039307,
      "learning_rate": 0.00023155336252592541,
      "loss": 2.9315,
      "step": 132083
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.924814224243164,
      "learning_rate": 0.00023154937985985698,
      "loss": 2.8884,
      "step": 132084
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0219533443450928,
      "learning_rate": 0.0002315453972065148,
      "loss": 2.8752,
      "step": 132085
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8934019804000854,
      "learning_rate": 0.0002315414145658997,
      "loss": 3.169,
      "step": 132086
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.6960880756378174,
      "learning_rate": 0.00023153743193801235,
      "loss": 2.8146,
      "step": 132087
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1188721656799316,
      "learning_rate": 0.00023153344932285344,
      "loss": 2.9653,
      "step": 132088
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8679782152175903,
      "learning_rate": 0.00023152946672042374,
      "loss": 3.06,
      "step": 132089
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.775038719177246,
      "learning_rate": 0.00023152548413072396,
      "loss": 2.7888,
      "step": 132090
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0551838874816895,
      "learning_rate": 0.00023152150155375493,
      "loss": 3.0843,
      "step": 132091
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1491143703460693,
      "learning_rate": 0.00023151751898951734,
      "loss": 2.8501,
      "step": 132092
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.908647894859314,
      "learning_rate": 0.00023151353643801207,
      "loss": 2.9379,
      "step": 132093
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.081305742263794,
      "learning_rate": 0.0002315095538992396,
      "loss": 2.9885,
      "step": 132094
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8194419145584106,
      "learning_rate": 0.00023150557137320077,
      "loss": 3.1269,
      "step": 132095
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1030161380767822,
      "learning_rate": 0.00023150158885989634,
      "loss": 2.9041,
      "step": 132096
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.314957618713379,
      "learning_rate": 0.00023149760635932708,
      "loss": 2.7801,
      "step": 132097
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.281020402908325,
      "learning_rate": 0.0002314936238714937,
      "loss": 2.9978,
      "step": 132098
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0861756801605225,
      "learning_rate": 0.00023148964139639713,
      "loss": 2.8842,
      "step": 132099
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8343918323516846,
      "learning_rate": 0.00023148565893403773,
      "loss": 3.231,
      "step": 132100
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.0134401321411133,
      "learning_rate": 0.0002314816764844165,
      "loss": 3.0371,
      "step": 132101
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1694819927215576,
      "learning_rate": 0.00023147769404753405,
      "loss": 2.9332,
      "step": 132102
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8771262168884277,
      "learning_rate": 0.00023147371162339123,
      "loss": 2.969,
      "step": 132103
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8035609722137451,
      "learning_rate": 0.00023146972921198871,
      "loss": 3.2142,
      "step": 132104
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.7854728698730469,
      "learning_rate": 0.0002314657468133274,
      "loss": 3.1205,
      "step": 132105
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5283308029174805,
      "learning_rate": 0.00023146176442740782,
      "loss": 3.3662,
      "step": 132106
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.737757682800293,
      "learning_rate": 0.00023145778205423074,
      "loss": 3.0687,
      "step": 132107
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2974863052368164,
      "learning_rate": 0.000231453799693797,
      "loss": 3.083,
      "step": 132108
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4504282474517822,
      "learning_rate": 0.00023144981734610724,
      "loss": 3.0143,
      "step": 132109
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1052756309509277,
      "learning_rate": 0.00023144583501116228,
      "loss": 3.0,
      "step": 132110
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.3278310298919678,
      "learning_rate": 0.0002314418526889629,
      "loss": 2.7336,
      "step": 132111
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.792769432067871,
      "learning_rate": 0.0002314378703795097,
      "loss": 2.7295,
      "step": 132112
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5152201652526855,
      "learning_rate": 0.00023143388808280358,
      "loss": 2.9515,
      "step": 132113
    },
    {
      "epoch": 1.72,
      "grad_norm": 4.124300003051758,
      "learning_rate": 0.0002314299057988451,
      "loss": 3.0707,
      "step": 132114
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.0875422954559326,
      "learning_rate": 0.0002314259235276351,
      "loss": 3.0329,
      "step": 132115
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.687488079071045,
      "learning_rate": 0.00023142194126917436,
      "loss": 3.1115,
      "step": 132116
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2713327407836914,
      "learning_rate": 0.0002314179590234635,
      "loss": 3.0659,
      "step": 132117
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.6649632453918457,
      "learning_rate": 0.00023141397679050344,
      "loss": 3.1848,
      "step": 132118
    },
    {
      "epoch": 1.72,
      "grad_norm": 4.296339988708496,
      "learning_rate": 0.00023140999457029477,
      "loss": 2.9227,
      "step": 132119
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.3362789154052734,
      "learning_rate": 0.00023140601236283835,
      "loss": 2.6661,
      "step": 132120
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.980716347694397,
      "learning_rate": 0.00023140203016813472,
      "loss": 3.0881,
      "step": 132121
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.8307769298553467,
      "learning_rate": 0.00023139804798618478,
      "loss": 2.9333,
      "step": 132122
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.1943166255950928,
      "learning_rate": 0.00023139406581698923,
      "loss": 3.0207,
      "step": 132123
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.641132116317749,
      "learning_rate": 0.00023139008366054889,
      "loss": 2.9883,
      "step": 132124
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.560718536376953,
      "learning_rate": 0.00023138610151686437,
      "loss": 2.8397,
      "step": 132125
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.887439250946045,
      "learning_rate": 0.00023138211938593647,
      "loss": 2.8573,
      "step": 132126
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.682831287384033,
      "learning_rate": 0.0002313781372677659,
      "loss": 3.0259,
      "step": 132127
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.0173280239105225,
      "learning_rate": 0.00023137415516235354,
      "loss": 2.8973,
      "step": 132128
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.1315126419067383,
      "learning_rate": 0.00023137017306969996,
      "loss": 3.0178,
      "step": 132129
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8503239154815674,
      "learning_rate": 0.000231366190989806,
      "loss": 3.1154,
      "step": 132130
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1475656032562256,
      "learning_rate": 0.00023136220892267225,
      "loss": 3.0602,
      "step": 132131
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.509244441986084,
      "learning_rate": 0.00023135822686829962,
      "loss": 2.9424,
      "step": 132132
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.393341541290283,
      "learning_rate": 0.00023135424482668878,
      "loss": 2.9905,
      "step": 132133
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3355250358581543,
      "learning_rate": 0.0002313502627978405,
      "loss": 3.1231,
      "step": 132134
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.13340163230896,
      "learning_rate": 0.00023134628078175554,
      "loss": 3.1337,
      "step": 132135
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.6549134254455566,
      "learning_rate": 0.00023134229877843458,
      "loss": 3.0729,
      "step": 132136
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.3242857456207275,
      "learning_rate": 0.00023133831678787836,
      "loss": 3.0485,
      "step": 132137
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.194706439971924,
      "learning_rate": 0.00023133433481008762,
      "loss": 3.2581,
      "step": 132138
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.597020387649536,
      "learning_rate": 0.00023133035284506314,
      "loss": 2.903,
      "step": 132139
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.50234055519104,
      "learning_rate": 0.00023132637089280566,
      "loss": 2.9451,
      "step": 132140
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.303133487701416,
      "learning_rate": 0.00023132238895331585,
      "loss": 2.87,
      "step": 132141
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.896397352218628,
      "learning_rate": 0.00023131840702659466,
      "loss": 2.8932,
      "step": 132142
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.679720401763916,
      "learning_rate": 0.00023131442511264256,
      "loss": 3.1012,
      "step": 132143
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2755870819091797,
      "learning_rate": 0.00023131044321146038,
      "loss": 2.8956,
      "step": 132144
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3606455326080322,
      "learning_rate": 0.00023130646132304893,
      "loss": 2.9891,
      "step": 132145
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.7618727684020996,
      "learning_rate": 0.00023130247944740887,
      "loss": 2.8664,
      "step": 132146
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.7039382457733154,
      "learning_rate": 0.000231298497584541,
      "loss": 3.0078,
      "step": 132147
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8800711631774902,
      "learning_rate": 0.00023129451573444614,
      "loss": 2.8674,
      "step": 132148
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.6154568195343018,
      "learning_rate": 0.0002312905338971248,
      "loss": 3.0591,
      "step": 132149
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0641286373138428,
      "learning_rate": 0.00023128655207257785,
      "loss": 2.8231,
      "step": 132150
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9298923015594482,
      "learning_rate": 0.00023128257026080602,
      "loss": 2.8111,
      "step": 132151
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.786922574043274,
      "learning_rate": 0.00023127858846181006,
      "loss": 2.8431,
      "step": 132152
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0382332801818848,
      "learning_rate": 0.0002312746066755907,
      "loss": 2.9639,
      "step": 132153
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8541193008422852,
      "learning_rate": 0.00023127062490214883,
      "loss": 3.2349,
      "step": 132154
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3142330646514893,
      "learning_rate": 0.00023126664314148494,
      "loss": 3.1805,
      "step": 132155
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0781869888305664,
      "learning_rate": 0.00023126266139359984,
      "loss": 2.8903,
      "step": 132156
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.095428705215454,
      "learning_rate": 0.0002312586796584943,
      "loss": 2.872,
      "step": 132157
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3464395999908447,
      "learning_rate": 0.0002312546979361691,
      "loss": 3.1532,
      "step": 132158
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.5725808143615723,
      "learning_rate": 0.00023125071622662497,
      "loss": 2.9849,
      "step": 132159
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1429195404052734,
      "learning_rate": 0.00023124673452986274,
      "loss": 2.8703,
      "step": 132160
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9499754905700684,
      "learning_rate": 0.00023124275284588288,
      "loss": 2.8536,
      "step": 132161
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.097846508026123,
      "learning_rate": 0.00023123877117468632,
      "loss": 2.7961,
      "step": 132162
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3359668254852295,
      "learning_rate": 0.00023123478951627373,
      "loss": 3.2397,
      "step": 132163
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9977785348892212,
      "learning_rate": 0.00023123080787064594,
      "loss": 2.8133,
      "step": 132164
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.0366992950439453,
      "learning_rate": 0.00023122682623780364,
      "loss": 3.0817,
      "step": 132165
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.5609569549560547,
      "learning_rate": 0.00023122284461774768,
      "loss": 2.8673,
      "step": 132166
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.981490969657898,
      "learning_rate": 0.00023121886301047856,
      "loss": 3.1343,
      "step": 132167
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.37210750579834,
      "learning_rate": 0.00023121488141599717,
      "loss": 2.8453,
      "step": 132168
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.1200318336486816,
      "learning_rate": 0.00023121089983430424,
      "loss": 2.9825,
      "step": 132169
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.387261390686035,
      "learning_rate": 0.00023120691826540045,
      "loss": 2.918,
      "step": 132170
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1232831478118896,
      "learning_rate": 0.00023120293670928665,
      "loss": 2.9213,
      "step": 132171
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5022151470184326,
      "learning_rate": 0.0002311989551659636,
      "loss": 2.8172,
      "step": 132172
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0215234756469727,
      "learning_rate": 0.00023119497363543188,
      "loss": 3.324,
      "step": 132173
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9010266065597534,
      "learning_rate": 0.0002311909921176923,
      "loss": 3.0563,
      "step": 132174
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0585501194000244,
      "learning_rate": 0.0002311870106127456,
      "loss": 2.9963,
      "step": 132175
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.8589601516723633,
      "learning_rate": 0.00023118302912059253,
      "loss": 3.0789,
      "step": 132176
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.532741069793701,
      "learning_rate": 0.00023117904764123384,
      "loss": 3.1842,
      "step": 132177
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.540369749069214,
      "learning_rate": 0.00023117506617467042,
      "loss": 2.8244,
      "step": 132178
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.7278990745544434,
      "learning_rate": 0.0002311710847209027,
      "loss": 3.0369,
      "step": 132179
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.5787549018859863,
      "learning_rate": 0.0002311671032799316,
      "loss": 3.1152,
      "step": 132180
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1344025135040283,
      "learning_rate": 0.0002311631218517578,
      "loss": 2.8822,
      "step": 132181
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0750272274017334,
      "learning_rate": 0.00023115914043638207,
      "loss": 2.9394,
      "step": 132182
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2711615562438965,
      "learning_rate": 0.0002311551590338052,
      "loss": 3.0596,
      "step": 132183
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1314592361450195,
      "learning_rate": 0.00023115117764402788,
      "loss": 3.1334,
      "step": 132184
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8429820537567139,
      "learning_rate": 0.00023114719626705094,
      "loss": 2.9034,
      "step": 132185
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.182375431060791,
      "learning_rate": 0.0002311432149028749,
      "loss": 3.1098,
      "step": 132186
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0563931465148926,
      "learning_rate": 0.0002311392335515007,
      "loss": 2.9672,
      "step": 132187
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0511929988861084,
      "learning_rate": 0.00023113525221292897,
      "loss": 2.908,
      "step": 132188
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4131112098693848,
      "learning_rate": 0.0002311312708871605,
      "loss": 3.2703,
      "step": 132189
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.8161380290985107,
      "learning_rate": 0.00023112728957419603,
      "loss": 3.0628,
      "step": 132190
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.746098756790161,
      "learning_rate": 0.00023112330827403647,
      "loss": 2.826,
      "step": 132191
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2059383392333984,
      "learning_rate": 0.0002311193269866822,
      "loss": 3.0278,
      "step": 132192
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1130857467651367,
      "learning_rate": 0.00023111534571213416,
      "loss": 3.1744,
      "step": 132193
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.2975831031799316,
      "learning_rate": 0.0002311113644503931,
      "loss": 2.8185,
      "step": 132194
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0678350925445557,
      "learning_rate": 0.00023110738320145972,
      "loss": 3.087,
      "step": 132195
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2314982414245605,
      "learning_rate": 0.00023110340196533477,
      "loss": 2.9333,
      "step": 132196
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.152313709259033,
      "learning_rate": 0.0002310994207420191,
      "loss": 3.0837,
      "step": 132197
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.105445146560669,
      "learning_rate": 0.0002310954395315133,
      "loss": 2.9824,
      "step": 132198
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0505902767181396,
      "learning_rate": 0.0002310914583338181,
      "loss": 3.0115,
      "step": 132199
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.1205029487609863,
      "learning_rate": 0.00023108747714893431,
      "loss": 2.8102,
      "step": 132200
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2511494159698486,
      "learning_rate": 0.0002310834959768627,
      "loss": 2.842,
      "step": 132201
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.5049564838409424,
      "learning_rate": 0.0002310795148176039,
      "loss": 2.9375,
      "step": 132202
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3893749713897705,
      "learning_rate": 0.00023107553367115885,
      "loss": 3.205,
      "step": 132203
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.30793833732605,
      "learning_rate": 0.00023107155253752806,
      "loss": 2.977,
      "step": 132204
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.121083974838257,
      "learning_rate": 0.00023106757141671242,
      "loss": 2.9108,
      "step": 132205
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0555403232574463,
      "learning_rate": 0.00023106359030871258,
      "loss": 2.9348,
      "step": 132206
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.448810338973999,
      "learning_rate": 0.0002310596092135293,
      "loss": 3.0986,
      "step": 132207
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3028407096862793,
      "learning_rate": 0.00023105562813116335,
      "loss": 3.0245,
      "step": 132208
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.023780584335327,
      "learning_rate": 0.00023105164706161555,
      "loss": 3.0417,
      "step": 132209
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.064560890197754,
      "learning_rate": 0.00023104766600488643,
      "loss": 3.0727,
      "step": 132210
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8854870796203613,
      "learning_rate": 0.00023104368496097688,
      "loss": 2.9733,
      "step": 132211
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3784499168395996,
      "learning_rate": 0.00023103970392988768,
      "loss": 3.1482,
      "step": 132212
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.7651461362838745,
      "learning_rate": 0.00023103572291161944,
      "loss": 3.0868,
      "step": 132213
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9530363082885742,
      "learning_rate": 0.00023103174190617295,
      "loss": 3.1439,
      "step": 132214
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.495694637298584,
      "learning_rate": 0.00023102776091354904,
      "loss": 2.7959,
      "step": 132215
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1042752265930176,
      "learning_rate": 0.0002310237799337483,
      "loss": 2.859,
      "step": 132216
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8892912864685059,
      "learning_rate": 0.00023101979896677152,
      "loss": 2.929,
      "step": 132217
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0513885021209717,
      "learning_rate": 0.0002310158180126195,
      "loss": 2.8198,
      "step": 132218
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.7539007663726807,
      "learning_rate": 0.0002310118370712929,
      "loss": 2.9608,
      "step": 132219
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8190901279449463,
      "learning_rate": 0.0002310078561427926,
      "loss": 3.1322,
      "step": 132220
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.20269513130188,
      "learning_rate": 0.0002310038752271192,
      "loss": 2.9284,
      "step": 132221
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3056750297546387,
      "learning_rate": 0.00023099989432427348,
      "loss": 3.0285,
      "step": 132222
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.029050350189209,
      "learning_rate": 0.0002309959134342561,
      "loss": 2.8402,
      "step": 132223
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.068068265914917,
      "learning_rate": 0.00023099193255706797,
      "loss": 3.055,
      "step": 132224
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9382286071777344,
      "learning_rate": 0.0002309879516927097,
      "loss": 2.9067,
      "step": 132225
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2365009784698486,
      "learning_rate": 0.00023098397084118206,
      "loss": 2.9556,
      "step": 132226
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.149099349975586,
      "learning_rate": 0.00023097999000248597,
      "loss": 3.1055,
      "step": 132227
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9985339641571045,
      "learning_rate": 0.00023097600917662182,
      "loss": 3.1221,
      "step": 132228
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.733017086982727,
      "learning_rate": 0.00023097202836359058,
      "loss": 2.9292,
      "step": 132229
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4411206245422363,
      "learning_rate": 0.00023096804756339294,
      "loss": 2.9463,
      "step": 132230
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9521759748458862,
      "learning_rate": 0.00023096406677602962,
      "loss": 3.0168,
      "step": 132231
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.627660036087036,
      "learning_rate": 0.00023096008600150138,
      "loss": 2.8852,
      "step": 132232
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3083581924438477,
      "learning_rate": 0.0002309561052398091,
      "loss": 3.1331,
      "step": 132233
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1306934356689453,
      "learning_rate": 0.00023095212449095328,
      "loss": 2.9775,
      "step": 132234
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.944266676902771,
      "learning_rate": 0.00023094814375493474,
      "loss": 3.0448,
      "step": 132235
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0637400150299072,
      "learning_rate": 0.00023094416303175424,
      "loss": 2.7608,
      "step": 132236
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0317554473876953,
      "learning_rate": 0.00023094018232141253,
      "loss": 2.8115,
      "step": 132237
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4284558296203613,
      "learning_rate": 0.00023093620162391033,
      "loss": 3.0609,
      "step": 132238
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9735254049301147,
      "learning_rate": 0.00023093222093924852,
      "loss": 2.7362,
      "step": 132239
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9621365070343018,
      "learning_rate": 0.00023092824026742763,
      "loss": 3.028,
      "step": 132240
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0821609497070312,
      "learning_rate": 0.00023092425960844845,
      "loss": 2.9552,
      "step": 132241
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4801814556121826,
      "learning_rate": 0.00023092027896231177,
      "loss": 2.9737,
      "step": 132242
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.155231475830078,
      "learning_rate": 0.0002309162983290183,
      "loss": 3.105,
      "step": 132243
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.091919422149658,
      "learning_rate": 0.0002309123177085688,
      "loss": 2.9177,
      "step": 132244
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.0029361248016357,
      "learning_rate": 0.00023090833710096413,
      "loss": 2.9385,
      "step": 132245
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.9982242584228516,
      "learning_rate": 0.00023090435650620478,
      "loss": 2.87,
      "step": 132246
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4060609340667725,
      "learning_rate": 0.0002309003759242916,
      "loss": 2.9851,
      "step": 132247
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.6706615686416626,
      "learning_rate": 0.00023089639535522536,
      "loss": 2.9568,
      "step": 132248
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.248685121536255,
      "learning_rate": 0.00023089241479900676,
      "loss": 2.9149,
      "step": 132249
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8663421869277954,
      "learning_rate": 0.0002308884342556366,
      "loss": 3.0135,
      "step": 132250
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9871805906295776,
      "learning_rate": 0.00023088445372511558,
      "loss": 2.7646,
      "step": 132251
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1598188877105713,
      "learning_rate": 0.00023088047320744457,
      "loss": 2.7826,
      "step": 132252
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.857508897781372,
      "learning_rate": 0.000230876492702624,
      "loss": 2.9753,
      "step": 132253
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.195892333984375,
      "learning_rate": 0.00023087251221065488,
      "loss": 2.9146,
      "step": 132254
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5297961235046387,
      "learning_rate": 0.00023086853173153783,
      "loss": 2.8215,
      "step": 132255
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0535855293273926,
      "learning_rate": 0.0002308645512652736,
      "loss": 2.9798,
      "step": 132256
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.5071496963500977,
      "learning_rate": 0.00023086057081186297,
      "loss": 2.9625,
      "step": 132257
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.431816339492798,
      "learning_rate": 0.00023085659037130681,
      "loss": 3.038,
      "step": 132258
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.421384572982788,
      "learning_rate": 0.00023085260994360558,
      "loss": 3.0647,
      "step": 132259
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.929869532585144,
      "learning_rate": 0.00023084862952876014,
      "loss": 2.8568,
      "step": 132260
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.4867775440216064,
      "learning_rate": 0.00023084464912677126,
      "loss": 2.7978,
      "step": 132261
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.257608413696289,
      "learning_rate": 0.00023084066873763968,
      "loss": 3.0412,
      "step": 132262
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1165008544921875,
      "learning_rate": 0.0002308366883613661,
      "loss": 3.0936,
      "step": 132263
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.311199426651001,
      "learning_rate": 0.0002308327079979514,
      "loss": 3.0297,
      "step": 132264
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9297922849655151,
      "learning_rate": 0.00023082872764739612,
      "loss": 2.8031,
      "step": 132265
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.593937873840332,
      "learning_rate": 0.0002308247473097011,
      "loss": 3.0638,
      "step": 132266
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.592121124267578,
      "learning_rate": 0.000230820766984867,
      "loss": 3.1084,
      "step": 132267
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.29282546043396,
      "learning_rate": 0.00023081678667289464,
      "loss": 2.8733,
      "step": 132268
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2603845596313477,
      "learning_rate": 0.00023081280637378477,
      "loss": 2.9231,
      "step": 132269
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2731504440307617,
      "learning_rate": 0.00023080882608753824,
      "loss": 2.8045,
      "step": 132270
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0514166355133057,
      "learning_rate": 0.00023080484581415548,
      "loss": 2.985,
      "step": 132271
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.274672746658325,
      "learning_rate": 0.00023080086555363744,
      "loss": 2.7828,
      "step": 132272
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0481886863708496,
      "learning_rate": 0.00023079688530598484,
      "loss": 2.8764,
      "step": 132273
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.1694891452789307,
      "learning_rate": 0.00023079290507119836,
      "loss": 2.8518,
      "step": 132274
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.303251266479492,
      "learning_rate": 0.0002307889248492788,
      "loss": 2.8806,
      "step": 132275
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.133918046951294,
      "learning_rate": 0.00023078494464022703,
      "loss": 2.9247,
      "step": 132276
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.955124020576477,
      "learning_rate": 0.00023078096444404354,
      "loss": 3.109,
      "step": 132277
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.917782783508301,
      "learning_rate": 0.00023077698426072913,
      "loss": 2.6567,
      "step": 132278
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.7066214084625244,
      "learning_rate": 0.0002307730040902846,
      "loss": 2.9293,
      "step": 132279
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9551599025726318,
      "learning_rate": 0.00023076902393271068,
      "loss": 3.1312,
      "step": 132280
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3368284702301025,
      "learning_rate": 0.00023076504378800812,
      "loss": 2.742,
      "step": 132281
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.6172590255737305,
      "learning_rate": 0.00023076106365617775,
      "loss": 3.1289,
      "step": 132282
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.969832181930542,
      "learning_rate": 0.00023075708353722006,
      "loss": 2.9793,
      "step": 132283
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3570778369903564,
      "learning_rate": 0.000230753103431136,
      "loss": 3.1185,
      "step": 132284
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.047118663787842,
      "learning_rate": 0.00023074912333792617,
      "loss": 2.9483,
      "step": 132285
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5545175075531006,
      "learning_rate": 0.00023074514325759145,
      "loss": 2.9446,
      "step": 132286
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5942606925964355,
      "learning_rate": 0.00023074116319013246,
      "loss": 2.9003,
      "step": 132287
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.8986213207244873,
      "learning_rate": 0.00023073718313555008,
      "loss": 2.9154,
      "step": 132288
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.735518455505371,
      "learning_rate": 0.0002307332030938449,
      "loss": 2.9323,
      "step": 132289
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.067734479904175,
      "learning_rate": 0.0002307292230650178,
      "loss": 3.1214,
      "step": 132290
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4225821495056152,
      "learning_rate": 0.00023072524304906938,
      "loss": 3.0671,
      "step": 132291
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.542020797729492,
      "learning_rate": 0.00023072126304600048,
      "loss": 2.7435,
      "step": 132292
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.9351861476898193,
      "learning_rate": 0.00023071728305581172,
      "loss": 3.1945,
      "step": 132293
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.022933006286621,
      "learning_rate": 0.00023071330307850406,
      "loss": 3.147,
      "step": 132294
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.853390097618103,
      "learning_rate": 0.000230709323114078,
      "loss": 2.8618,
      "step": 132295
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9972878694534302,
      "learning_rate": 0.0002307053431625344,
      "loss": 3.0752,
      "step": 132296
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.8938612937927246,
      "learning_rate": 0.00023070136322387406,
      "loss": 3.023,
      "step": 132297
    },
    {
      "epoch": 1.72,
      "grad_norm": 4.826164245605469,
      "learning_rate": 0.00023069738329809756,
      "loss": 2.8043,
      "step": 132298
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4894602298736572,
      "learning_rate": 0.00023069340338520574,
      "loss": 2.8785,
      "step": 132299
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3456060886383057,
      "learning_rate": 0.0002306894234851994,
      "loss": 2.7028,
      "step": 132300
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.318995237350464,
      "learning_rate": 0.0002306854435980791,
      "loss": 3.2555,
      "step": 132301
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3542640209198,
      "learning_rate": 0.00023068146372384572,
      "loss": 2.7987,
      "step": 132302
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3532235622406006,
      "learning_rate": 0.00023067748386249994,
      "loss": 2.7697,
      "step": 132303
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2480835914611816,
      "learning_rate": 0.0002306735040140426,
      "loss": 3.0925,
      "step": 132304
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.874030113220215,
      "learning_rate": 0.0002306695241784743,
      "loss": 2.8412,
      "step": 132305
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.721353054046631,
      "learning_rate": 0.0002306655443557959,
      "loss": 2.9457,
      "step": 132306
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.486304998397827,
      "learning_rate": 0.00023066156454600804,
      "loss": 2.8426,
      "step": 132307
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.170705556869507,
      "learning_rate": 0.00023065758474911146,
      "loss": 2.833,
      "step": 132308
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.7980539798736572,
      "learning_rate": 0.000230653604965107,
      "loss": 2.8764,
      "step": 132309
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.569291353225708,
      "learning_rate": 0.00023064962519399532,
      "loss": 3.0962,
      "step": 132310
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4380247592926025,
      "learning_rate": 0.00023064564543577725,
      "loss": 2.9956,
      "step": 132311
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9664807319641113,
      "learning_rate": 0.00023064166569045346,
      "loss": 2.9785,
      "step": 132312
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.335803747177124,
      "learning_rate": 0.00023063768595802463,
      "loss": 2.9638,
      "step": 132313
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.6334619522094727,
      "learning_rate": 0.00023063370623849157,
      "loss": 3.0645,
      "step": 132314
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.5977683067321777,
      "learning_rate": 0.00023062972653185499,
      "loss": 2.8754,
      "step": 132315
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1015684604644775,
      "learning_rate": 0.00023062574683811566,
      "loss": 2.9789,
      "step": 132316
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.9231326580047607,
      "learning_rate": 0.00023062176715727434,
      "loss": 2.9456,
      "step": 132317
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.891301155090332,
      "learning_rate": 0.00023061778748933176,
      "loss": 2.8145,
      "step": 132318
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.301971912384033,
      "learning_rate": 0.0002306138078342887,
      "loss": 3.0034,
      "step": 132319
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9352117776870728,
      "learning_rate": 0.00023060982819214575,
      "loss": 3.2082,
      "step": 132320
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.088719606399536,
      "learning_rate": 0.00023060584856290377,
      "loss": 3.0505,
      "step": 132321
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.731128692626953,
      "learning_rate": 0.00023060186894656345,
      "loss": 2.9158,
      "step": 132322
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.7734225988388062,
      "learning_rate": 0.00023059788934312557,
      "loss": 2.8388,
      "step": 132323
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0044965744018555,
      "learning_rate": 0.0002305939097525908,
      "loss": 2.8626,
      "step": 132324
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.369471549987793,
      "learning_rate": 0.00023058993017496013,
      "loss": 3.1158,
      "step": 132325
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8700168132781982,
      "learning_rate": 0.00023058595061023392,
      "loss": 2.7492,
      "step": 132326
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.01393723487854,
      "learning_rate": 0.00023058197105841312,
      "loss": 2.9973,
      "step": 132327
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0256595611572266,
      "learning_rate": 0.0002305779915194984,
      "loss": 3.0047,
      "step": 132328
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.18312931060791,
      "learning_rate": 0.00023057401199349058,
      "loss": 2.7944,
      "step": 132329
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.06546688079834,
      "learning_rate": 0.00023057003248039035,
      "loss": 2.9961,
      "step": 132330
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9739960432052612,
      "learning_rate": 0.0002305660529801986,
      "loss": 3.1645,
      "step": 132331
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.835663080215454,
      "learning_rate": 0.0002305620734929158,
      "loss": 2.8853,
      "step": 132332
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9637460708618164,
      "learning_rate": 0.00023055809401854283,
      "loss": 3.0675,
      "step": 132333
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.6292343139648438,
      "learning_rate": 0.0002305541145570804,
      "loss": 2.9408,
      "step": 132334
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1738476753234863,
      "learning_rate": 0.0002305501351085293,
      "loss": 2.8943,
      "step": 132335
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9779167175292969,
      "learning_rate": 0.00023054615567289023,
      "loss": 3.0345,
      "step": 132336
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.258765935897827,
      "learning_rate": 0.00023054217625016404,
      "loss": 2.8908,
      "step": 132337
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8273240327835083,
      "learning_rate": 0.00023053819684035128,
      "loss": 2.8904,
      "step": 132338
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.16972017288208,
      "learning_rate": 0.00023053421744345277,
      "loss": 2.9851,
      "step": 132339
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9231934547424316,
      "learning_rate": 0.00023053023805946925,
      "loss": 2.9035,
      "step": 132340
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8450345993041992,
      "learning_rate": 0.00023052625868840145,
      "loss": 2.9061,
      "step": 132341
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.246948003768921,
      "learning_rate": 0.0002305222793302502,
      "loss": 2.8905,
      "step": 132342
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.370335578918457,
      "learning_rate": 0.00023051829998501622,
      "loss": 3.0602,
      "step": 132343
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.419698476791382,
      "learning_rate": 0.00023051432065270013,
      "loss": 2.8828,
      "step": 132344
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.517709255218506,
      "learning_rate": 0.0002305103413333027,
      "loss": 3.1223,
      "step": 132345
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4183037281036377,
      "learning_rate": 0.00023050636202682473,
      "loss": 3.1388,
      "step": 132346
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.524479866027832,
      "learning_rate": 0.00023050238273326692,
      "loss": 3.0446,
      "step": 132347
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.7366907596588135,
      "learning_rate": 0.00023049840345263004,
      "loss": 3.0341,
      "step": 132348
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.7686591148376465,
      "learning_rate": 0.00023049442418491493,
      "loss": 2.8918,
      "step": 132349
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.907705545425415,
      "learning_rate": 0.00023049044493012212,
      "loss": 3.179,
      "step": 132350
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.3914098739624023,
      "learning_rate": 0.00023048646568825243,
      "loss": 2.9715,
      "step": 132351
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.883955478668213,
      "learning_rate": 0.00023048248645930663,
      "loss": 3.102,
      "step": 132352
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9973825216293335,
      "learning_rate": 0.00023047850724328543,
      "loss": 3.1157,
      "step": 132353
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.236689329147339,
      "learning_rate": 0.0002304745280401896,
      "loss": 2.9626,
      "step": 132354
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.984250783920288,
      "learning_rate": 0.00023047054885002002,
      "loss": 2.9044,
      "step": 132355
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.29695463180542,
      "learning_rate": 0.0002304665696727771,
      "loss": 3.1009,
      "step": 132356
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.3288991451263428,
      "learning_rate": 0.00023046259050846178,
      "loss": 2.7929,
      "step": 132357
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1369216442108154,
      "learning_rate": 0.0002304586113570748,
      "loss": 3.0155,
      "step": 132358
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9652107954025269,
      "learning_rate": 0.00023045463221861687,
      "loss": 2.9781,
      "step": 132359
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8886098861694336,
      "learning_rate": 0.00023045065309308871,
      "loss": 2.9321,
      "step": 132360
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8384028673171997,
      "learning_rate": 0.00023044667398049124,
      "loss": 2.9993,
      "step": 132361
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.874598503112793,
      "learning_rate": 0.00023044269488082492,
      "loss": 2.7942,
      "step": 132362
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9976962804794312,
      "learning_rate": 0.00023043871579409058,
      "loss": 3.0051,
      "step": 132363
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5690741539001465,
      "learning_rate": 0.000230434736720289,
      "loss": 2.8622,
      "step": 132364
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.421907663345337,
      "learning_rate": 0.00023043075765942096,
      "loss": 2.9623,
      "step": 132365
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9795498847961426,
      "learning_rate": 0.0002304267786114871,
      "loss": 3.0055,
      "step": 132366
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8442671298980713,
      "learning_rate": 0.00023042279957648842,
      "loss": 2.9747,
      "step": 132367
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0307130813598633,
      "learning_rate": 0.00023041882055442524,
      "loss": 2.9019,
      "step": 132368
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.460799217224121,
      "learning_rate": 0.00023041484154529858,
      "loss": 3.0554,
      "step": 132369
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.754042625427246,
      "learning_rate": 0.00023041086254910908,
      "loss": 2.9599,
      "step": 132370
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0903966426849365,
      "learning_rate": 0.0002304068835658575,
      "loss": 2.9804,
      "step": 132371
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.040825843811035,
      "learning_rate": 0.00023040290459554462,
      "loss": 3.1926,
      "step": 132372
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.305067777633667,
      "learning_rate": 0.0002303989256381712,
      "loss": 3.0284,
      "step": 132373
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.9131271839141846,
      "learning_rate": 0.00023039494669373797,
      "loss": 3.0067,
      "step": 132374
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.173959493637085,
      "learning_rate": 0.00023039096776224555,
      "loss": 2.9511,
      "step": 132375
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2281198501586914,
      "learning_rate": 0.00023038698884369477,
      "loss": 2.776,
      "step": 132376
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.3370912075042725,
      "learning_rate": 0.00023038300993808637,
      "loss": 2.5822,
      "step": 132377
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.9865293502807617,
      "learning_rate": 0.00023037903104542105,
      "loss": 2.9535,
      "step": 132378
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9062590599060059,
      "learning_rate": 0.00023037505216569966,
      "loss": 2.8115,
      "step": 132379
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1152703762054443,
      "learning_rate": 0.00023037107329892278,
      "loss": 2.7874,
      "step": 132380
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.832059621810913,
      "learning_rate": 0.00023036709444509125,
      "loss": 2.7342,
      "step": 132381
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0743696689605713,
      "learning_rate": 0.00023036311560420587,
      "loss": 2.8945,
      "step": 132382
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.168630838394165,
      "learning_rate": 0.00023035913677626723,
      "loss": 2.8016,
      "step": 132383
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.9421186447143555,
      "learning_rate": 0.00023035515796127615,
      "loss": 3.1163,
      "step": 132384
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1988167762756348,
      "learning_rate": 0.00023035117915923335,
      "loss": 3.1081,
      "step": 132385
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1428465843200684,
      "learning_rate": 0.0002303472003701396,
      "loss": 2.7374,
      "step": 132386
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.0151851177215576,
      "learning_rate": 0.00023034322159399564,
      "loss": 2.7761,
      "step": 132387
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.014021635055542,
      "learning_rate": 0.00023033924283080213,
      "loss": 3.1212,
      "step": 132388
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9165056943893433,
      "learning_rate": 0.00023033526408055993,
      "loss": 2.8336,
      "step": 132389
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3969554901123047,
      "learning_rate": 0.00023033128534326967,
      "loss": 3.0082,
      "step": 132390
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3546712398529053,
      "learning_rate": 0.00023032730661893218,
      "loss": 2.9033,
      "step": 132391
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.122999906539917,
      "learning_rate": 0.0002303233279075482,
      "loss": 3.163,
      "step": 132392
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.4927902221679688,
      "learning_rate": 0.00023031934920911834,
      "loss": 3.0673,
      "step": 132393
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2495970726013184,
      "learning_rate": 0.00023031537052364343,
      "loss": 2.8625,
      "step": 132394
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.3976197242736816,
      "learning_rate": 0.00023031139185112422,
      "loss": 3.1884,
      "step": 132395
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.9215846061706543,
      "learning_rate": 0.0002303074131915615,
      "loss": 3.0108,
      "step": 132396
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4858968257904053,
      "learning_rate": 0.0002303034345449559,
      "loss": 2.9892,
      "step": 132397
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9364515542984009,
      "learning_rate": 0.00023029945591130822,
      "loss": 3.1311,
      "step": 132398
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.670847177505493,
      "learning_rate": 0.00023029547729061918,
      "loss": 2.9017,
      "step": 132399
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1611239910125732,
      "learning_rate": 0.00023029149868288953,
      "loss": 2.9946,
      "step": 132400
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.9146008491516113,
      "learning_rate": 0.00023028752008811995,
      "loss": 3.0175,
      "step": 132401
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1125526428222656,
      "learning_rate": 0.00023028354150631127,
      "loss": 2.7545,
      "step": 132402
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8824360370635986,
      "learning_rate": 0.00023027956293746426,
      "loss": 3.0354,
      "step": 132403
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4082651138305664,
      "learning_rate": 0.00023027558438157959,
      "loss": 2.8546,
      "step": 132404
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1957221031188965,
      "learning_rate": 0.00023027160583865796,
      "loss": 3.0513,
      "step": 132405
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0954856872558594,
      "learning_rate": 0.00023026762730870014,
      "loss": 3.1792,
      "step": 132406
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0401079654693604,
      "learning_rate": 0.00023026364879170688,
      "loss": 2.9538,
      "step": 132407
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.201537609100342,
      "learning_rate": 0.00023025967028767892,
      "loss": 2.9343,
      "step": 132408
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8431808948516846,
      "learning_rate": 0.00023025569179661702,
      "loss": 3.0817,
      "step": 132409
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9710142612457275,
      "learning_rate": 0.00023025171331852204,
      "loss": 3.0608,
      "step": 132410
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.2033615112304688,
      "learning_rate": 0.0002302477348533944,
      "loss": 3.011,
      "step": 132411
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.8252270221710205,
      "learning_rate": 0.00023024375640123506,
      "loss": 3.0765,
      "step": 132412
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0444750785827637,
      "learning_rate": 0.00023023977796204472,
      "loss": 3.0173,
      "step": 132413
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1110944747924805,
      "learning_rate": 0.00023023579953582412,
      "loss": 3.0079,
      "step": 132414
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.286620616912842,
      "learning_rate": 0.000230231821122574,
      "loss": 3.2577,
      "step": 132415
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.114466667175293,
      "learning_rate": 0.00023022784272229527,
      "loss": 2.8515,
      "step": 132416
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8210054636001587,
      "learning_rate": 0.00023022386433498834,
      "loss": 2.943,
      "step": 132417
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.949580430984497,
      "learning_rate": 0.0002302198859606541,
      "loss": 3.0739,
      "step": 132418
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5649101734161377,
      "learning_rate": 0.0002302159075992933,
      "loss": 2.9647,
      "step": 132419
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.568500280380249,
      "learning_rate": 0.00023021192925090671,
      "loss": 3.1829,
      "step": 132420
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4856648445129395,
      "learning_rate": 0.000230207950915495,
      "loss": 2.8746,
      "step": 132421
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.122009754180908,
      "learning_rate": 0.00023020397259305913,
      "loss": 2.813,
      "step": 132422
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.2094321250915527,
      "learning_rate": 0.00023019999428359946,
      "loss": 3.1399,
      "step": 132423
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.9123427867889404,
      "learning_rate": 0.000230196015987117,
      "loss": 3.2312,
      "step": 132424
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0520615577697754,
      "learning_rate": 0.00023019203770361236,
      "loss": 2.9547,
      "step": 132425
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.924081802368164,
      "learning_rate": 0.00023018805943308634,
      "loss": 2.7775,
      "step": 132426
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.0024163722991943,
      "learning_rate": 0.00023018408117553974,
      "loss": 3.0805,
      "step": 132427
    },
    {
      "epoch": 1.72,
      "grad_norm": 4.42119836807251,
      "learning_rate": 0.00023018010293097328,
      "loss": 3.1321,
      "step": 132428
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0742549896240234,
      "learning_rate": 0.0002301761246993876,
      "loss": 3.0236,
      "step": 132429
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0276787281036377,
      "learning_rate": 0.00023017214648078345,
      "loss": 2.8409,
      "step": 132430
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3305585384368896,
      "learning_rate": 0.00023016816827516168,
      "loss": 2.9059,
      "step": 132431
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5406274795532227,
      "learning_rate": 0.0002301641900825229,
      "loss": 2.9019,
      "step": 132432
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0054376125335693,
      "learning_rate": 0.00023016021190286793,
      "loss": 2.8185,
      "step": 132433
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0765020847320557,
      "learning_rate": 0.00023015623373619766,
      "loss": 2.9397,
      "step": 132434
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0889663696289062,
      "learning_rate": 0.00023015225558251247,
      "loss": 2.9938,
      "step": 132435
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.4751884937286377,
      "learning_rate": 0.00023014827744181334,
      "loss": 2.7667,
      "step": 132436
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5578176975250244,
      "learning_rate": 0.00023014429931410095,
      "loss": 3.1051,
      "step": 132437
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.8613513708114624,
      "learning_rate": 0.00023014032119937609,
      "loss": 2.9429,
      "step": 132438
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.02001690864563,
      "learning_rate": 0.00023013634309763942,
      "loss": 2.7995,
      "step": 132439
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.028812885284424,
      "learning_rate": 0.00023013236500889184,
      "loss": 2.8268,
      "step": 132440
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0334434509277344,
      "learning_rate": 0.00023012838693313388,
      "loss": 3.0887,
      "step": 132441
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.255526542663574,
      "learning_rate": 0.00023012440887036634,
      "loss": 3.1975,
      "step": 132442
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4692089557647705,
      "learning_rate": 0.00023012043082059003,
      "loss": 2.6958,
      "step": 132443
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9690141677856445,
      "learning_rate": 0.00023011645278380564,
      "loss": 3.0827,
      "step": 132444
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.147324562072754,
      "learning_rate": 0.0002301124747600139,
      "loss": 3.1078,
      "step": 132445
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3516671657562256,
      "learning_rate": 0.0002301084967492157,
      "loss": 2.7741,
      "step": 132446
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.041902542114258,
      "learning_rate": 0.00023010451875141153,
      "loss": 2.9825,
      "step": 132447
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.570789098739624,
      "learning_rate": 0.00023010054076660224,
      "loss": 3.1061,
      "step": 132448
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1785895824432373,
      "learning_rate": 0.0002300965627947886,
      "loss": 3.1475,
      "step": 132449
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.162843704223633,
      "learning_rate": 0.0002300925848359713,
      "loss": 2.9688,
      "step": 132450
    },
    {
      "epoch": 1.72,
      "grad_norm": 4.457993030548096,
      "learning_rate": 0.00023008860689015113,
      "loss": 3.0977,
      "step": 132451
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.030107259750366,
      "learning_rate": 0.00023008462895732882,
      "loss": 3.2397,
      "step": 132452
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.4250123500823975,
      "learning_rate": 0.0002300806510375052,
      "loss": 3.0508,
      "step": 132453
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9077003002166748,
      "learning_rate": 0.0002300766731306808,
      "loss": 3.1432,
      "step": 132454
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.797917366027832,
      "learning_rate": 0.00023007269523685645,
      "loss": 3.0448,
      "step": 132455
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.675753116607666,
      "learning_rate": 0.0002300687173560329,
      "loss": 2.9263,
      "step": 132456
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1186883449554443,
      "learning_rate": 0.0002300647394882109,
      "loss": 3.0723,
      "step": 132457
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.098970651626587,
      "learning_rate": 0.0002300607616333912,
      "loss": 3.0527,
      "step": 132458
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.391162872314453,
      "learning_rate": 0.00023005678379157463,
      "loss": 3.1408,
      "step": 132459
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.773860216140747,
      "learning_rate": 0.00023005280596276174,
      "loss": 3.1405,
      "step": 132460
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.610304355621338,
      "learning_rate": 0.00023004882814695328,
      "loss": 2.895,
      "step": 132461
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3207333087921143,
      "learning_rate": 0.00023004485034415015,
      "loss": 2.7958,
      "step": 132462
    },
    {
      "epoch": 1.72,
      "grad_norm": 5.398081302642822,
      "learning_rate": 0.00023004087255435292,
      "loss": 3.0438,
      "step": 132463
    },
    {
      "epoch": 1.72,
      "grad_norm": 4.173153400421143,
      "learning_rate": 0.00023003689477756249,
      "loss": 3.0657,
      "step": 132464
    },
    {
      "epoch": 1.72,
      "grad_norm": 4.371714115142822,
      "learning_rate": 0.00023003291701377955,
      "loss": 2.9739,
      "step": 132465
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1442999839782715,
      "learning_rate": 0.0002300289392630048,
      "loss": 3.1948,
      "step": 132466
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.3340179920196533,
      "learning_rate": 0.0002300249615252389,
      "loss": 3.0484,
      "step": 132467
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.5105843544006348,
      "learning_rate": 0.00023002098380048273,
      "loss": 3.1393,
      "step": 132468
    },
    {
      "epoch": 1.72,
      "grad_norm": 5.225512981414795,
      "learning_rate": 0.000230017006088737,
      "loss": 3.0702,
      "step": 132469
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.9548299312591553,
      "learning_rate": 0.00023001302839000237,
      "loss": 2.7844,
      "step": 132470
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0683701038360596,
      "learning_rate": 0.00023000905070427974,
      "loss": 2.9177,
      "step": 132471
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.8360564708709717,
      "learning_rate": 0.00023000507303156965,
      "loss": 3.0162,
      "step": 132472
    },
    {
      "epoch": 1.72,
      "grad_norm": 3.178351640701294,
      "learning_rate": 0.00023000109537187297,
      "loss": 2.933,
      "step": 132473
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9314452409744263,
      "learning_rate": 0.00022999711772519048,
      "loss": 2.9149,
      "step": 132474
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.272351026535034,
      "learning_rate": 0.00022999314009152275,
      "loss": 3.084,
      "step": 132475
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.9658756256103516,
      "learning_rate": 0.00022998916247087066,
      "loss": 3.199,
      "step": 132476
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.047736644744873,
      "learning_rate": 0.00022998518486323492,
      "loss": 3.0501,
      "step": 132477
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0968098640441895,
      "learning_rate": 0.00022998120726861623,
      "loss": 2.8932,
      "step": 132478
    },
    {
      "epoch": 1.72,
      "grad_norm": 1.7389825582504272,
      "learning_rate": 0.00022997722968701536,
      "loss": 3.1793,
      "step": 132479
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.1998119354248047,
      "learning_rate": 0.00022997325211843297,
      "loss": 3.2037,
      "step": 132480
    },
    {
      "epoch": 1.72,
      "grad_norm": 2.0186030864715576,
      "learning_rate": 0.00022996927456287003,
      "loss": 3.1023,
      "step": 132481
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8663160800933838,
      "learning_rate": 0.000229965297020327,
      "loss": 2.8285,
      "step": 132482
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0064873695373535,
      "learning_rate": 0.00022996131949080484,
      "loss": 2.7047,
      "step": 132483
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.120861530303955,
      "learning_rate": 0.00022995734197430413,
      "loss": 2.8167,
      "step": 132484
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7423250675201416,
      "learning_rate": 0.00022995336447082564,
      "loss": 3.0804,
      "step": 132485
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4321508407592773,
      "learning_rate": 0.00022994938698037018,
      "loss": 3.0588,
      "step": 132486
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.140805721282959,
      "learning_rate": 0.00022994540950293843,
      "loss": 3.0593,
      "step": 132487
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.768223762512207,
      "learning_rate": 0.00022994143203853123,
      "loss": 3.0497,
      "step": 132488
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.200632095336914,
      "learning_rate": 0.00022993745458714925,
      "loss": 3.248,
      "step": 132489
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.524261474609375,
      "learning_rate": 0.00022993347714879314,
      "loss": 2.9844,
      "step": 132490
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9828377962112427,
      "learning_rate": 0.0002299294997234637,
      "loss": 3.0586,
      "step": 132491
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.2341132164001465,
      "learning_rate": 0.00022992552231116168,
      "loss": 3.0313,
      "step": 132492
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.070604085922241,
      "learning_rate": 0.00022992154491188787,
      "loss": 2.7734,
      "step": 132493
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3029463291168213,
      "learning_rate": 0.00022991756752564294,
      "loss": 3.0284,
      "step": 132494
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2391233444213867,
      "learning_rate": 0.0002299135901524278,
      "loss": 3.0261,
      "step": 132495
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1931238174438477,
      "learning_rate": 0.0002299096127922429,
      "loss": 3.1886,
      "step": 132496
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.093029260635376,
      "learning_rate": 0.00022990563544508915,
      "loss": 3.0899,
      "step": 132497
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9738744497299194,
      "learning_rate": 0.00022990165811096725,
      "loss": 3.1392,
      "step": 132498
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8638375997543335,
      "learning_rate": 0.00022989768078987796,
      "loss": 2.9394,
      "step": 132499
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.597860813140869,
      "learning_rate": 0.00022989370348182203,
      "loss": 2.9467,
      "step": 132500
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2283713817596436,
      "learning_rate": 0.0002298897261868003,
      "loss": 2.8191,
      "step": 132501
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.411722183227539,
      "learning_rate": 0.00022988574890481323,
      "loss": 2.7442,
      "step": 132502
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.897888660430908,
      "learning_rate": 0.0002298817716358618,
      "loss": 2.8885,
      "step": 132503
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2617862224578857,
      "learning_rate": 0.00022987779437994658,
      "loss": 3.0479,
      "step": 132504
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9774808883666992,
      "learning_rate": 0.00022987381713706847,
      "loss": 3.1885,
      "step": 132505
    },
    {
      "epoch": 1.73,
      "grad_norm": 4.682857513427734,
      "learning_rate": 0.0002298698399072281,
      "loss": 3.1877,
      "step": 132506
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.429664373397827,
      "learning_rate": 0.00022986586269042642,
      "loss": 2.7797,
      "step": 132507
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.665947198867798,
      "learning_rate": 0.00022986188548666385,
      "loss": 3.0419,
      "step": 132508
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.100724220275879,
      "learning_rate": 0.00022985790829594129,
      "loss": 3.1337,
      "step": 132509
    },
    {
      "epoch": 1.73,
      "grad_norm": 4.386014938354492,
      "learning_rate": 0.00022985393111825942,
      "loss": 3.109,
      "step": 132510
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.561946392059326,
      "learning_rate": 0.00022984995395361908,
      "loss": 3.0908,
      "step": 132511
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.717534065246582,
      "learning_rate": 0.00022984597680202095,
      "loss": 2.8506,
      "step": 132512
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3499972820281982,
      "learning_rate": 0.00022984199966346588,
      "loss": 2.8371,
      "step": 132513
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.388530731201172,
      "learning_rate": 0.0002298380225379544,
      "loss": 2.7166,
      "step": 132514
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7939014434814453,
      "learning_rate": 0.00022983404542548737,
      "loss": 2.9816,
      "step": 132515
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.9142322540283203,
      "learning_rate": 0.0002298300683260655,
      "loss": 3.0325,
      "step": 132516
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.1964449882507324,
      "learning_rate": 0.00022982609123968956,
      "loss": 2.9002,
      "step": 132517
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.18920636177063,
      "learning_rate": 0.00022982211416636023,
      "loss": 2.765,
      "step": 132518
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.141223430633545,
      "learning_rate": 0.00022981813710607832,
      "loss": 2.8956,
      "step": 132519
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.1936991214752197,
      "learning_rate": 0.00022981416005884468,
      "loss": 2.9154,
      "step": 132520
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.95143461227417,
      "learning_rate": 0.00022981018302465977,
      "loss": 3.1146,
      "step": 132521
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.389906406402588,
      "learning_rate": 0.0002298062060035245,
      "loss": 2.8508,
      "step": 132522
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8189706802368164,
      "learning_rate": 0.00022980222899543953,
      "loss": 3.1442,
      "step": 132523
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5189192295074463,
      "learning_rate": 0.0002297982520004057,
      "loss": 2.9974,
      "step": 132524
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.702004075050354,
      "learning_rate": 0.00022979427501842369,
      "loss": 2.8923,
      "step": 132525
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8674931526184082,
      "learning_rate": 0.00022979029804949438,
      "loss": 3.047,
      "step": 132526
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.740549921989441,
      "learning_rate": 0.00022978632109361822,
      "loss": 2.9695,
      "step": 132527
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9500384330749512,
      "learning_rate": 0.00022978234415079612,
      "loss": 2.8879,
      "step": 132528
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.530439853668213,
      "learning_rate": 0.00022977836722102882,
      "loss": 2.8645,
      "step": 132529
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.1195180416107178,
      "learning_rate": 0.00022977439030431705,
      "loss": 2.9877,
      "step": 132530
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7132937908172607,
      "learning_rate": 0.00022977041340066157,
      "loss": 3.036,
      "step": 132531
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9660438299179077,
      "learning_rate": 0.00022976643651006316,
      "loss": 2.9298,
      "step": 132532
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.3998751640319824,
      "learning_rate": 0.00022976245963252242,
      "loss": 3.1238,
      "step": 132533
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.6035664081573486,
      "learning_rate": 0.0002297584827680401,
      "loss": 3.0082,
      "step": 132534
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.155590057373047,
      "learning_rate": 0.0002297545059166171,
      "loss": 3.206,
      "step": 132535
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.050992488861084,
      "learning_rate": 0.000229750529078254,
      "loss": 2.7793,
      "step": 132536
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7830190658569336,
      "learning_rate": 0.00022974655225295159,
      "loss": 2.8441,
      "step": 132537
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7308027744293213,
      "learning_rate": 0.0002297425754407108,
      "loss": 3.1634,
      "step": 132538
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.052485227584839,
      "learning_rate": 0.00022973859864153202,
      "loss": 3.0889,
      "step": 132539
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.609715700149536,
      "learning_rate": 0.00022973462185541617,
      "loss": 3.092,
      "step": 132540
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2539541721343994,
      "learning_rate": 0.00022973064508236403,
      "loss": 2.8563,
      "step": 132541
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8763409852981567,
      "learning_rate": 0.00022972666832237623,
      "loss": 3.0119,
      "step": 132542
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.0498688220977783,
      "learning_rate": 0.00022972269157545358,
      "loss": 2.796,
      "step": 132543
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0136985778808594,
      "learning_rate": 0.00022971871484159695,
      "loss": 2.8108,
      "step": 132544
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.258683919906616,
      "learning_rate": 0.00022971473812080683,
      "loss": 2.9488,
      "step": 132545
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.090428590774536,
      "learning_rate": 0.000229710761413084,
      "loss": 2.7591,
      "step": 132546
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.947816252708435,
      "learning_rate": 0.00022970678471842933,
      "loss": 3.074,
      "step": 132547
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.7803977727890015,
      "learning_rate": 0.00022970280803684347,
      "loss": 3.0023,
      "step": 132548
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.7914762496948242,
      "learning_rate": 0.0002296988313683272,
      "loss": 3.1145,
      "step": 132549
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.098210334777832,
      "learning_rate": 0.0002296948547128813,
      "loss": 3.008,
      "step": 132550
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.198179244995117,
      "learning_rate": 0.00022969087807050645,
      "loss": 2.8583,
      "step": 132551
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.479995012283325,
      "learning_rate": 0.0002296869014412033,
      "loss": 2.905,
      "step": 132552
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4013612270355225,
      "learning_rate": 0.00022968292482497272,
      "loss": 2.8667,
      "step": 132553
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.033515453338623,
      "learning_rate": 0.0002296789482218154,
      "loss": 2.952,
      "step": 132554
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1033737659454346,
      "learning_rate": 0.0002296749716317321,
      "loss": 3.0597,
      "step": 132555
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3595502376556396,
      "learning_rate": 0.0002296709950547236,
      "loss": 2.9522,
      "step": 132556
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9891877174377441,
      "learning_rate": 0.00022966701849079055,
      "loss": 3.3752,
      "step": 132557
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9406723976135254,
      "learning_rate": 0.00022966304193993374,
      "loss": 2.9338,
      "step": 132558
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9253051280975342,
      "learning_rate": 0.00022965906540215388,
      "loss": 3.1768,
      "step": 132559
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.409956216812134,
      "learning_rate": 0.00022965508887745173,
      "loss": 3.1555,
      "step": 132560
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.01678204536438,
      "learning_rate": 0.00022965111236582803,
      "loss": 3.0241,
      "step": 132561
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.102848529815674,
      "learning_rate": 0.00022964713586728355,
      "loss": 3.0395,
      "step": 132562
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.8408660888671875,
      "learning_rate": 0.00022964315938181896,
      "loss": 3.1649,
      "step": 132563
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3209292888641357,
      "learning_rate": 0.00022963918290943498,
      "loss": 2.8256,
      "step": 132564
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.12321138381958,
      "learning_rate": 0.00022963520645013255,
      "loss": 3.0664,
      "step": 132565
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1524739265441895,
      "learning_rate": 0.00022963123000391215,
      "loss": 2.9548,
      "step": 132566
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9231607913970947,
      "learning_rate": 0.00022962725357077464,
      "loss": 2.8736,
      "step": 132567
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3943796157836914,
      "learning_rate": 0.0002296232771507208,
      "loss": 2.9611,
      "step": 132568
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5781798362731934,
      "learning_rate": 0.0002296193007437513,
      "loss": 2.8004,
      "step": 132569
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1067445278167725,
      "learning_rate": 0.00022961532434986686,
      "loss": 2.962,
      "step": 132570
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.8176803588867188,
      "learning_rate": 0.00022961134796906828,
      "loss": 2.9294,
      "step": 132571
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.6471757888793945,
      "learning_rate": 0.00022960737160135627,
      "loss": 2.8095,
      "step": 132572
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1375975608825684,
      "learning_rate": 0.00022960339524673163,
      "loss": 2.8363,
      "step": 132573
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.301180601119995,
      "learning_rate": 0.00022959941890519507,
      "loss": 2.8321,
      "step": 132574
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7426023483276367,
      "learning_rate": 0.00022959544257674725,
      "loss": 2.9746,
      "step": 132575
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7717485427856445,
      "learning_rate": 0.00022959146626138895,
      "loss": 3.0595,
      "step": 132576
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8839415311813354,
      "learning_rate": 0.00022958748995912092,
      "loss": 3.0677,
      "step": 132577
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.6986019611358643,
      "learning_rate": 0.00022958351366994393,
      "loss": 2.9841,
      "step": 132578
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8460850715637207,
      "learning_rate": 0.00022957953739385868,
      "loss": 2.8916,
      "step": 132579
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.046102285385132,
      "learning_rate": 0.00022957556113086604,
      "loss": 3.1132,
      "step": 132580
    },
    {
      "epoch": 1.73,
      "grad_norm": 4.070009231567383,
      "learning_rate": 0.0002295715848809665,
      "loss": 2.9413,
      "step": 132581
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.1233069896698,
      "learning_rate": 0.00022956760864416097,
      "loss": 2.7659,
      "step": 132582
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9655871391296387,
      "learning_rate": 0.0002295636324204501,
      "loss": 2.981,
      "step": 132583
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5889623165130615,
      "learning_rate": 0.00022955965620983474,
      "loss": 3.0019,
      "step": 132584
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3389179706573486,
      "learning_rate": 0.0002295556800123155,
      "loss": 2.7533,
      "step": 132585
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.013392925262451,
      "learning_rate": 0.00022955170382789325,
      "loss": 3.2383,
      "step": 132586
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1573758125305176,
      "learning_rate": 0.00022954772765656876,
      "loss": 2.8895,
      "step": 132587
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8609119653701782,
      "learning_rate": 0.00022954375149834255,
      "loss": 2.909,
      "step": 132588
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.970719575881958,
      "learning_rate": 0.0002295397753532155,
      "loss": 2.902,
      "step": 132589
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2331929206848145,
      "learning_rate": 0.00022953579922118834,
      "loss": 2.9957,
      "step": 132590
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.7841001749038696,
      "learning_rate": 0.0002295318231022618,
      "loss": 3.0018,
      "step": 132591
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.642489433288574,
      "learning_rate": 0.00022952784699643662,
      "loss": 3.0492,
      "step": 132592
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1342408657073975,
      "learning_rate": 0.00022952387090371368,
      "loss": 2.736,
      "step": 132593
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8616408109664917,
      "learning_rate": 0.00022951989482409343,
      "loss": 2.9087,
      "step": 132594
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.078280210494995,
      "learning_rate": 0.00022951591875757679,
      "loss": 2.8167,
      "step": 132595
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.113645553588867,
      "learning_rate": 0.00022951194270416446,
      "loss": 2.9645,
      "step": 132596
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.7708370685577393,
      "learning_rate": 0.0002295079666638572,
      "loss": 3.0264,
      "step": 132597
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8942302465438843,
      "learning_rate": 0.00022950399063665576,
      "loss": 3.2582,
      "step": 132598
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8504271507263184,
      "learning_rate": 0.00022950001462256092,
      "loss": 3.1005,
      "step": 132599
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8996853828430176,
      "learning_rate": 0.00022949603862157326,
      "loss": 2.7236,
      "step": 132600
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.271045684814453,
      "learning_rate": 0.00022949206263369362,
      "loss": 2.8857,
      "step": 132601
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1208791732788086,
      "learning_rate": 0.00022948808665892276,
      "loss": 2.9793,
      "step": 132602
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.933279037475586,
      "learning_rate": 0.00022948411069726138,
      "loss": 3.0214,
      "step": 132603
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.955147624015808,
      "learning_rate": 0.00022948013474871021,
      "loss": 3.2342,
      "step": 132604
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.14217472076416,
      "learning_rate": 0.0002294761588132702,
      "loss": 2.8768,
      "step": 132605
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3705716133117676,
      "learning_rate": 0.00022947218289094172,
      "loss": 3.0444,
      "step": 132606
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1156229972839355,
      "learning_rate": 0.0002294682069817257,
      "loss": 2.9911,
      "step": 132607
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2688777446746826,
      "learning_rate": 0.00022946423108562288,
      "loss": 2.5082,
      "step": 132608
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3271477222442627,
      "learning_rate": 0.00022946025520263402,
      "loss": 2.9154,
      "step": 132609
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.277655839920044,
      "learning_rate": 0.00022945627933275978,
      "loss": 3.1542,
      "step": 132610
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3915657997131348,
      "learning_rate": 0.00022945230347600114,
      "loss": 2.6149,
      "step": 132611
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.803936719894409,
      "learning_rate": 0.00022944832763235846,
      "loss": 3.2056,
      "step": 132612
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3251876831054688,
      "learning_rate": 0.0002294443518018327,
      "loss": 2.8248,
      "step": 132613
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1216914653778076,
      "learning_rate": 0.00022944037598442456,
      "loss": 2.8999,
      "step": 132614
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.260538101196289,
      "learning_rate": 0.00022943640018013476,
      "loss": 3.055,
      "step": 132615
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1118290424346924,
      "learning_rate": 0.0002294324243889641,
      "loss": 2.8911,
      "step": 132616
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.165090799331665,
      "learning_rate": 0.00022942844861091342,
      "loss": 3.0342,
      "step": 132617
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.0707437992095947,
      "learning_rate": 0.00022942447284598314,
      "loss": 2.9927,
      "step": 132618
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.07197904586792,
      "learning_rate": 0.00022942049709417424,
      "loss": 3.0164,
      "step": 132619
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0745208263397217,
      "learning_rate": 0.00022941652135548737,
      "loss": 3.0033,
      "step": 132620
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9483625888824463,
      "learning_rate": 0.00022941254562992331,
      "loss": 3.183,
      "step": 132621
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0598199367523193,
      "learning_rate": 0.00022940856991748282,
      "loss": 3.2362,
      "step": 132622
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.6230599880218506,
      "learning_rate": 0.0002294045942181667,
      "loss": 2.9486,
      "step": 132623
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.048227071762085,
      "learning_rate": 0.00022940061853197547,
      "loss": 2.9493,
      "step": 132624
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3068978786468506,
      "learning_rate": 0.00022939664285891,
      "loss": 2.8607,
      "step": 132625
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.6716678142547607,
      "learning_rate": 0.00022939266719897104,
      "loss": 2.8334,
      "step": 132626
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.019834041595459,
      "learning_rate": 0.0002293886915521593,
      "loss": 3.0307,
      "step": 132627
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.017414093017578,
      "learning_rate": 0.00022938471591847556,
      "loss": 2.9607,
      "step": 132628
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0216591358184814,
      "learning_rate": 0.00022938074029792065,
      "loss": 2.8562,
      "step": 132629
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2064292430877686,
      "learning_rate": 0.00022937676469049506,
      "loss": 2.9944,
      "step": 132630
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.322760820388794,
      "learning_rate": 0.0002293727890961997,
      "loss": 2.8402,
      "step": 132631
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8109381198883057,
      "learning_rate": 0.00022936881351503523,
      "loss": 2.828,
      "step": 132632
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2043802738189697,
      "learning_rate": 0.00022936483794700244,
      "loss": 3.0824,
      "step": 132633
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.6551952362060547,
      "learning_rate": 0.00022936086239210202,
      "loss": 2.6331,
      "step": 132634
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1920456886291504,
      "learning_rate": 0.00022935688685033487,
      "loss": 2.7904,
      "step": 132635
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.103750467300415,
      "learning_rate": 0.0002293529113217016,
      "loss": 2.9774,
      "step": 132636
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9359009265899658,
      "learning_rate": 0.0002293489358062029,
      "loss": 3.0918,
      "step": 132637
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7624292373657227,
      "learning_rate": 0.00022934496030383952,
      "loss": 3.1954,
      "step": 132638
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2790634632110596,
      "learning_rate": 0.0002293409848146123,
      "loss": 3.0981,
      "step": 132639
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.944653272628784,
      "learning_rate": 0.0002293370093385219,
      "loss": 2.8194,
      "step": 132640
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.19735050201416,
      "learning_rate": 0.00022933303387556915,
      "loss": 2.9076,
      "step": 132641
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.165858268737793,
      "learning_rate": 0.00022932905842575462,
      "loss": 2.9149,
      "step": 132642
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0643396377563477,
      "learning_rate": 0.0002293250829890793,
      "loss": 3.0158,
      "step": 132643
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.167757749557495,
      "learning_rate": 0.00022932110756554365,
      "loss": 3.1932,
      "step": 132644
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.243049144744873,
      "learning_rate": 0.00022931713215514855,
      "loss": 3.1779,
      "step": 132645
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1269917488098145,
      "learning_rate": 0.00022931315675789476,
      "loss": 2.8074,
      "step": 132646
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3978428840637207,
      "learning_rate": 0.000229309181373783,
      "loss": 2.7984,
      "step": 132647
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.451789617538452,
      "learning_rate": 0.00022930520600281397,
      "loss": 3.0376,
      "step": 132648
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.308485746383667,
      "learning_rate": 0.00022930123064498847,
      "loss": 3.0219,
      "step": 132649
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.0251388549804688,
      "learning_rate": 0.0002292972553003072,
      "loss": 3.0152,
      "step": 132650
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8665217161178589,
      "learning_rate": 0.0002292932799687709,
      "loss": 2.9388,
      "step": 132651
    },
    {
      "epoch": 1.73,
      "grad_norm": 4.345704555511475,
      "learning_rate": 0.00022928930465038026,
      "loss": 2.9084,
      "step": 132652
    },
    {
      "epoch": 1.73,
      "grad_norm": 5.358154296875,
      "learning_rate": 0.0002292853293451361,
      "loss": 3.0491,
      "step": 132653
    },
    {
      "epoch": 1.73,
      "grad_norm": 4.307859420776367,
      "learning_rate": 0.0002292813540530392,
      "loss": 2.9161,
      "step": 132654
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5497941970825195,
      "learning_rate": 0.00022927737877409017,
      "loss": 2.873,
      "step": 132655
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.501390218734741,
      "learning_rate": 0.00022927340350828982,
      "loss": 2.8021,
      "step": 132656
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.539909839630127,
      "learning_rate": 0.00022926942825563892,
      "loss": 2.9723,
      "step": 132657
    },
    {
      "epoch": 1.73,
      "grad_norm": 4.521163463592529,
      "learning_rate": 0.00022926545301613812,
      "loss": 2.9065,
      "step": 132658
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2694923877716064,
      "learning_rate": 0.00022926147778978817,
      "loss": 3.1009,
      "step": 132659
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.528726816177368,
      "learning_rate": 0.00022925750257658998,
      "loss": 3.3429,
      "step": 132660
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.526515483856201,
      "learning_rate": 0.00022925352737654403,
      "loss": 3.0501,
      "step": 132661
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.55601167678833,
      "learning_rate": 0.00022924955218965118,
      "loss": 2.8431,
      "step": 132662
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.0728564262390137,
      "learning_rate": 0.0002292455770159122,
      "loss": 2.8423,
      "step": 132663
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.101327657699585,
      "learning_rate": 0.00022924160185532782,
      "loss": 2.8179,
      "step": 132664
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.031881093978882,
      "learning_rate": 0.0002292376267078988,
      "loss": 2.9598,
      "step": 132665
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2853569984436035,
      "learning_rate": 0.00022923365157362586,
      "loss": 2.6678,
      "step": 132666
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4893555641174316,
      "learning_rate": 0.00022922967645250966,
      "loss": 2.9515,
      "step": 132667
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.8161981105804443,
      "learning_rate": 0.00022922570134455099,
      "loss": 2.8276,
      "step": 132668
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.7995350360870361,
      "learning_rate": 0.00022922172624975057,
      "loss": 2.7963,
      "step": 132669
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.21583890914917,
      "learning_rate": 0.00022921775116810917,
      "loss": 2.906,
      "step": 132670
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.834684133529663,
      "learning_rate": 0.00022921377609962757,
      "loss": 2.8357,
      "step": 132671
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9454658031463623,
      "learning_rate": 0.00022920980104430656,
      "loss": 3.1887,
      "step": 132672
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1357266902923584,
      "learning_rate": 0.00022920582600214666,
      "loss": 3.1659,
      "step": 132673
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.7451822757720947,
      "learning_rate": 0.00022920185097314875,
      "loss": 2.9543,
      "step": 132674
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.371795415878296,
      "learning_rate": 0.00022919787595731354,
      "loss": 2.9843,
      "step": 132675
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0033907890319824,
      "learning_rate": 0.00022919390095464177,
      "loss": 2.7671,
      "step": 132676
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.106494903564453,
      "learning_rate": 0.00022918992596513422,
      "loss": 3.1829,
      "step": 132677
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.8571605682373047,
      "learning_rate": 0.0002291859509887917,
      "loss": 3.1656,
      "step": 132678
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0173184871673584,
      "learning_rate": 0.00022918197602561473,
      "loss": 3.0613,
      "step": 132679
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4248359203338623,
      "learning_rate": 0.00022917800107560417,
      "loss": 3.1386,
      "step": 132680
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.020232915878296,
      "learning_rate": 0.00022917402613876076,
      "loss": 2.8598,
      "step": 132681
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.222806215286255,
      "learning_rate": 0.00022917005121508524,
      "loss": 2.8866,
      "step": 132682
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9437718391418457,
      "learning_rate": 0.00022916607630457832,
      "loss": 3.0087,
      "step": 132683
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8097001314163208,
      "learning_rate": 0.0002291621014072409,
      "loss": 3.2829,
      "step": 132684
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9525195360183716,
      "learning_rate": 0.00022915812652307346,
      "loss": 3.0656,
      "step": 132685
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.028294324874878,
      "learning_rate": 0.00022915415165207688,
      "loss": 3.3193,
      "step": 132686
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4705464839935303,
      "learning_rate": 0.00022915017679425186,
      "loss": 3.088,
      "step": 132687
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0807063579559326,
      "learning_rate": 0.00022914620194959914,
      "loss": 2.9237,
      "step": 132688
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0968329906463623,
      "learning_rate": 0.00022914222711811953,
      "loss": 3.089,
      "step": 132689
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.90447998046875,
      "learning_rate": 0.0002291382522998138,
      "loss": 3.0316,
      "step": 132690
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2647504806518555,
      "learning_rate": 0.00022913427749468248,
      "loss": 2.9764,
      "step": 132691
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5512313842773438,
      "learning_rate": 0.0002291303027027265,
      "loss": 2.7372,
      "step": 132692
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.739311933517456,
      "learning_rate": 0.00022912632792394642,
      "loss": 2.8193,
      "step": 132693
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.094390392303467,
      "learning_rate": 0.00022912235315834318,
      "loss": 3.2116,
      "step": 132694
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0879604816436768,
      "learning_rate": 0.0002291183784059174,
      "loss": 2.6818,
      "step": 132695
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.046537399291992,
      "learning_rate": 0.00022911440366667,
      "loss": 3.0167,
      "step": 132696
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.140070915222168,
      "learning_rate": 0.00022911042894060144,
      "loss": 2.8923,
      "step": 132697
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.011331796646118,
      "learning_rate": 0.00022910645422771257,
      "loss": 2.7314,
      "step": 132698
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0008883476257324,
      "learning_rate": 0.00022910247952800418,
      "loss": 2.9951,
      "step": 132699
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7098991870880127,
      "learning_rate": 0.00022909850484147693,
      "loss": 3.3009,
      "step": 132700
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9983628988265991,
      "learning_rate": 0.00022909453016813164,
      "loss": 2.9828,
      "step": 132701
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9608829021453857,
      "learning_rate": 0.00022909055550796915,
      "loss": 3.0082,
      "step": 132702
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5803544521331787,
      "learning_rate": 0.00022908658086098993,
      "loss": 2.8305,
      "step": 132703
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0675222873687744,
      "learning_rate": 0.00022908260622719485,
      "loss": 3.0659,
      "step": 132704
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.369847059249878,
      "learning_rate": 0.00022907863160658462,
      "loss": 2.8205,
      "step": 132705
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4182045459747314,
      "learning_rate": 0.00022907465699916005,
      "loss": 2.9827,
      "step": 132706
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8949189186096191,
      "learning_rate": 0.00022907068240492182,
      "loss": 3.0989,
      "step": 132707
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.1856820583343506,
      "learning_rate": 0.00022906670782387085,
      "loss": 2.965,
      "step": 132708
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.2196402549743652,
      "learning_rate": 0.00022906273325600754,
      "loss": 2.9159,
      "step": 132709
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3813114166259766,
      "learning_rate": 0.00022905875870133285,
      "loss": 2.9416,
      "step": 132710
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.358304262161255,
      "learning_rate": 0.00022905478415984748,
      "loss": 2.9758,
      "step": 132711
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9818251132965088,
      "learning_rate": 0.00022905080963155213,
      "loss": 3.1075,
      "step": 132712
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.790639877319336,
      "learning_rate": 0.00022904683511644757,
      "loss": 3.0521,
      "step": 132713
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.099653482437134,
      "learning_rate": 0.0002290428606145347,
      "loss": 3.3153,
      "step": 132714
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8107821941375732,
      "learning_rate": 0.000229038886125814,
      "loss": 3.151,
      "step": 132715
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.588254928588867,
      "learning_rate": 0.00022903491165028622,
      "loss": 2.8657,
      "step": 132716
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9247300624847412,
      "learning_rate": 0.00022903093718795224,
      "loss": 2.9765,
      "step": 132717
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.393306255340576,
      "learning_rate": 0.00022902696273881278,
      "loss": 2.9216,
      "step": 132718
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.906514286994934,
      "learning_rate": 0.00022902298830286853,
      "loss": 2.483,
      "step": 132719
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.348663091659546,
      "learning_rate": 0.0002290190138801202,
      "loss": 3.0686,
      "step": 132720
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.236301898956299,
      "learning_rate": 0.00022901503947056874,
      "loss": 3.1575,
      "step": 132721
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.155938148498535,
      "learning_rate": 0.00022901106507421462,
      "loss": 2.8414,
      "step": 132722
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3940436840057373,
      "learning_rate": 0.00022900709069105865,
      "loss": 3.1228,
      "step": 132723
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9932774305343628,
      "learning_rate": 0.0002290031163211016,
      "loss": 2.8843,
      "step": 132724
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.085517168045044,
      "learning_rate": 0.00022899914196434422,
      "loss": 2.8752,
      "step": 132725
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.6594016551971436,
      "learning_rate": 0.00022899516762078724,
      "loss": 2.8812,
      "step": 132726
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.6735687255859375,
      "learning_rate": 0.00022899119329043154,
      "loss": 3.0838,
      "step": 132727
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.199820041656494,
      "learning_rate": 0.00022898721897327757,
      "loss": 3.2182,
      "step": 132728
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.51416015625,
      "learning_rate": 0.00022898324466932623,
      "loss": 3.1219,
      "step": 132729
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.505476474761963,
      "learning_rate": 0.00022897927037857823,
      "loss": 2.844,
      "step": 132730
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.001943826675415,
      "learning_rate": 0.00022897529610103434,
      "loss": 3.0412,
      "step": 132731
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1242003440856934,
      "learning_rate": 0.00022897132183669527,
      "loss": 2.7483,
      "step": 132732
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.581308126449585,
      "learning_rate": 0.00022896734758556185,
      "loss": 3.0669,
      "step": 132733
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1256771087646484,
      "learning_rate": 0.00022896337334763465,
      "loss": 2.968,
      "step": 132734
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0841362476348877,
      "learning_rate": 0.00022895939912291458,
      "loss": 2.8877,
      "step": 132735
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.186469793319702,
      "learning_rate": 0.00022895542491140224,
      "loss": 2.9396,
      "step": 132736
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.273906707763672,
      "learning_rate": 0.00022895145071309843,
      "loss": 2.9966,
      "step": 132737
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3200888633728027,
      "learning_rate": 0.0002289474765280039,
      "loss": 3.2997,
      "step": 132738
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.025865316390991,
      "learning_rate": 0.0002289435023561194,
      "loss": 2.7383,
      "step": 132739
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.723541021347046,
      "learning_rate": 0.00022893952819744558,
      "loss": 3.0607,
      "step": 132740
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9998996257781982,
      "learning_rate": 0.00022893555405198326,
      "loss": 3.113,
      "step": 132741
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1906261444091797,
      "learning_rate": 0.00022893157991973325,
      "loss": 2.8911,
      "step": 132742
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1138906478881836,
      "learning_rate": 0.00022892760580069615,
      "loss": 3.1191,
      "step": 132743
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2096176147460938,
      "learning_rate": 0.00022892363169487267,
      "loss": 2.9401,
      "step": 132744
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1611225605010986,
      "learning_rate": 0.00022891965760226375,
      "loss": 3.1413,
      "step": 132745
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1084845066070557,
      "learning_rate": 0.00022891568352286995,
      "loss": 2.9801,
      "step": 132746
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.3686511516571045,
      "learning_rate": 0.000228911709456692,
      "loss": 2.9963,
      "step": 132747
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.844805359840393,
      "learning_rate": 0.00022890773540373079,
      "loss": 3.1178,
      "step": 132748
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8901463747024536,
      "learning_rate": 0.000228903761363987,
      "loss": 3.0825,
      "step": 132749
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.490689516067505,
      "learning_rate": 0.00022889978733746128,
      "loss": 3.1625,
      "step": 132750
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2370879650115967,
      "learning_rate": 0.00022889581332415448,
      "loss": 2.5716,
      "step": 132751
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5486953258514404,
      "learning_rate": 0.00022889183932406728,
      "loss": 3.1014,
      "step": 132752
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3859763145446777,
      "learning_rate": 0.00022888786533720035,
      "loss": 3.204,
      "step": 132753
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3621323108673096,
      "learning_rate": 0.00022888389136355457,
      "loss": 3.0371,
      "step": 132754
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.784226655960083,
      "learning_rate": 0.0002288799174031306,
      "loss": 3.3524,
      "step": 132755
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.09130859375,
      "learning_rate": 0.00022887594345592923,
      "loss": 2.7202,
      "step": 132756
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1265435218811035,
      "learning_rate": 0.0002288719695219512,
      "loss": 3.0322,
      "step": 132757
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1054697036743164,
      "learning_rate": 0.00022886799560119716,
      "loss": 2.754,
      "step": 132758
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.815380096435547,
      "learning_rate": 0.00022886402169366787,
      "loss": 2.7478,
      "step": 132759
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.2437479496002197,
      "learning_rate": 0.00022886004779936413,
      "loss": 3.1349,
      "step": 132760
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.124610424041748,
      "learning_rate": 0.00022885607391828663,
      "loss": 2.9302,
      "step": 132761
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.236783504486084,
      "learning_rate": 0.0002288521000504361,
      "loss": 2.9949,
      "step": 132762
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3667995929718018,
      "learning_rate": 0.00022884812619581349,
      "loss": 2.896,
      "step": 132763
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4369235038757324,
      "learning_rate": 0.0002288441523544192,
      "loss": 2.8429,
      "step": 132764
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4829330444335938,
      "learning_rate": 0.00022884017852625415,
      "loss": 2.8258,
      "step": 132765
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.817907452583313,
      "learning_rate": 0.00022883620471131903,
      "loss": 2.8727,
      "step": 132766
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0231359004974365,
      "learning_rate": 0.0002288322309096146,
      "loss": 2.8995,
      "step": 132767
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.8346669673919678,
      "learning_rate": 0.0002288282571211416,
      "loss": 2.767,
      "step": 132768
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.6871535778045654,
      "learning_rate": 0.00022882428334590092,
      "loss": 2.6146,
      "step": 132769
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.670832633972168,
      "learning_rate": 0.00022882030958389302,
      "loss": 3.0017,
      "step": 132770
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.7147536277770996,
      "learning_rate": 0.00022881633583511876,
      "loss": 2.9773,
      "step": 132771
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0858871936798096,
      "learning_rate": 0.0002288123620995789,
      "loss": 3.321,
      "step": 132772
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.417921304702759,
      "learning_rate": 0.00022880838837727413,
      "loss": 3.1132,
      "step": 132773
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.712770938873291,
      "learning_rate": 0.00022880441466820522,
      "loss": 3.1018,
      "step": 132774
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3243372440338135,
      "learning_rate": 0.00022880044097237307,
      "loss": 2.7176,
      "step": 132775
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2672855854034424,
      "learning_rate": 0.00022879646728977813,
      "loss": 3.0699,
      "step": 132776
    },
    {
      "epoch": 1.73,
      "grad_norm": 4.709262847900391,
      "learning_rate": 0.00022879249362042127,
      "loss": 2.8391,
      "step": 132777
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.330695390701294,
      "learning_rate": 0.00022878851996430325,
      "loss": 2.8418,
      "step": 132778
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2260913848876953,
      "learning_rate": 0.00022878454632142476,
      "loss": 2.9119,
      "step": 132779
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8408222198486328,
      "learning_rate": 0.0002287805726917866,
      "loss": 2.8516,
      "step": 132780
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7877016067504883,
      "learning_rate": 0.00022877659907538956,
      "loss": 3.0366,
      "step": 132781
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.762969970703125,
      "learning_rate": 0.00022877262547223417,
      "loss": 2.7903,
      "step": 132782
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.292734384536743,
      "learning_rate": 0.00022876865188232133,
      "loss": 2.9682,
      "step": 132783
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5438742637634277,
      "learning_rate": 0.00022876467830565173,
      "loss": 3.0083,
      "step": 132784
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.1271183490753174,
      "learning_rate": 0.0002287607047422261,
      "loss": 2.944,
      "step": 132785
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.667010545730591,
      "learning_rate": 0.00022875673119204521,
      "loss": 2.7178,
      "step": 132786
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9628479480743408,
      "learning_rate": 0.0002287527576551098,
      "loss": 3.1695,
      "step": 132787
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5228121280670166,
      "learning_rate": 0.0002287487841314207,
      "loss": 2.6054,
      "step": 132788
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.4200186729431152,
      "learning_rate": 0.00022874481062097848,
      "loss": 3.0734,
      "step": 132789
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.7428979873657227,
      "learning_rate": 0.0002287408371237839,
      "loss": 2.9514,
      "step": 132790
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1899008750915527,
      "learning_rate": 0.00022873686363983773,
      "loss": 3.1118,
      "step": 132791
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.851499319076538,
      "learning_rate": 0.0002287328901691407,
      "loss": 2.8876,
      "step": 132792
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2016122341156006,
      "learning_rate": 0.00022872891671169365,
      "loss": 3.0887,
      "step": 132793
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.700212240219116,
      "learning_rate": 0.00022872494326749731,
      "loss": 3.2205,
      "step": 132794
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3564562797546387,
      "learning_rate": 0.00022872096983655223,
      "loss": 2.8006,
      "step": 132795
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.073066473007202,
      "learning_rate": 0.0002287169964188593,
      "loss": 3.15,
      "step": 132796
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1846652030944824,
      "learning_rate": 0.00022871302301441916,
      "loss": 2.926,
      "step": 132797
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.6784181594848633,
      "learning_rate": 0.0002287090496232327,
      "loss": 2.8826,
      "step": 132798
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7387630939483643,
      "learning_rate": 0.00022870507624530046,
      "loss": 3.0383,
      "step": 132799
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.363715887069702,
      "learning_rate": 0.00022870110288062353,
      "loss": 3.0857,
      "step": 132800
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.155117988586426,
      "learning_rate": 0.00022869712952920226,
      "loss": 2.8735,
      "step": 132801
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.706478476524353,
      "learning_rate": 0.0002286931561910375,
      "loss": 2.9762,
      "step": 132802
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.855290174484253,
      "learning_rate": 0.00022868918286613003,
      "loss": 2.9322,
      "step": 132803
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.016195297241211,
      "learning_rate": 0.0002286852095544806,
      "loss": 3.12,
      "step": 132804
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.117561101913452,
      "learning_rate": 0.00022868123625608994,
      "loss": 3.1593,
      "step": 132805
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9927443265914917,
      "learning_rate": 0.00022867726297095894,
      "loss": 3.0642,
      "step": 132806
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.4225738048553467,
      "learning_rate": 0.00022867328969908803,
      "loss": 2.8515,
      "step": 132807
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0140182971954346,
      "learning_rate": 0.0002286693164404781,
      "loss": 2.8596,
      "step": 132808
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0394089221954346,
      "learning_rate": 0.00022866534319512986,
      "loss": 3.2395,
      "step": 132809
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7277772426605225,
      "learning_rate": 0.00022866136996304413,
      "loss": 3.0442,
      "step": 132810
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.037060022354126,
      "learning_rate": 0.00022865739674422157,
      "loss": 2.9139,
      "step": 132811
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.017127513885498,
      "learning_rate": 0.00022865342353866307,
      "loss": 2.8293,
      "step": 132812
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5106256008148193,
      "learning_rate": 0.00022864945034636912,
      "loss": 2.9659,
      "step": 132813
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1385862827301025,
      "learning_rate": 0.0002286454771673406,
      "loss": 2.9514,
      "step": 132814
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8401494026184082,
      "learning_rate": 0.00022864150400157823,
      "loss": 2.7714,
      "step": 132815
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9997789859771729,
      "learning_rate": 0.00022863753084908275,
      "loss": 2.878,
      "step": 132816
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.925154685974121,
      "learning_rate": 0.0002286335577098549,
      "loss": 3.1039,
      "step": 132817
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9266585111618042,
      "learning_rate": 0.00022862958458389547,
      "loss": 2.9821,
      "step": 132818
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2332546710968018,
      "learning_rate": 0.00022862561147120515,
      "loss": 3.0279,
      "step": 132819
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5530543327331543,
      "learning_rate": 0.0002286216383717846,
      "loss": 3.0915,
      "step": 132820
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0075838565826416,
      "learning_rate": 0.00022861766528563468,
      "loss": 2.9931,
      "step": 132821
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3608286380767822,
      "learning_rate": 0.00022861369221275604,
      "loss": 2.8435,
      "step": 132822
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.045320749282837,
      "learning_rate": 0.0002286097191531495,
      "loss": 3.1976,
      "step": 132823
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5085794925689697,
      "learning_rate": 0.0002286057461068158,
      "loss": 2.7603,
      "step": 132824
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7842729091644287,
      "learning_rate": 0.0002286017730737556,
      "loss": 3.131,
      "step": 132825
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.031461477279663,
      "learning_rate": 0.00022859780005396964,
      "loss": 3.1575,
      "step": 132826
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.230487585067749,
      "learning_rate": 0.00022859382704745878,
      "loss": 2.9564,
      "step": 132827
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0813071727752686,
      "learning_rate": 0.00022858985405422363,
      "loss": 3.0293,
      "step": 132828
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9256008863449097,
      "learning_rate": 0.00022858588107426493,
      "loss": 3.105,
      "step": 132829
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5983941555023193,
      "learning_rate": 0.00022858190810758357,
      "loss": 2.9512,
      "step": 132830
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.518270254135132,
      "learning_rate": 0.0002285779351541801,
      "loss": 3.0801,
      "step": 132831
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1819007396698,
      "learning_rate": 0.00022857396221405535,
      "loss": 3.0233,
      "step": 132832
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4549739360809326,
      "learning_rate": 0.00022856998928720999,
      "loss": 2.7642,
      "step": 132833
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.829884648323059,
      "learning_rate": 0.00022856601637364496,
      "loss": 2.9158,
      "step": 132834
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.028646469116211,
      "learning_rate": 0.00022856204347336073,
      "loss": 3.0323,
      "step": 132835
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3422484397888184,
      "learning_rate": 0.00022855807058635825,
      "loss": 2.7815,
      "step": 132836
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2392406463623047,
      "learning_rate": 0.00022855409771263817,
      "loss": 2.8078,
      "step": 132837
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0429370403289795,
      "learning_rate": 0.00022855012485220117,
      "loss": 2.8176,
      "step": 132838
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9140822887420654,
      "learning_rate": 0.00022854615200504805,
      "loss": 3.0163,
      "step": 132839
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.077406644821167,
      "learning_rate": 0.00022854217917117956,
      "loss": 2.6807,
      "step": 132840
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0449166297912598,
      "learning_rate": 0.0002285382063505965,
      "loss": 2.9182,
      "step": 132841
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.472158193588257,
      "learning_rate": 0.00022853423354329952,
      "loss": 2.8304,
      "step": 132842
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.249959707260132,
      "learning_rate": 0.00022853026074928933,
      "loss": 3.1187,
      "step": 132843
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0253255367279053,
      "learning_rate": 0.0002285262879685667,
      "loss": 3.0149,
      "step": 132844
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.7682496309280396,
      "learning_rate": 0.0002285223152011324,
      "loss": 2.7912,
      "step": 132845
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.059835195541382,
      "learning_rate": 0.00022851834244698712,
      "loss": 3.0748,
      "step": 132846
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0633931159973145,
      "learning_rate": 0.00022851436970613165,
      "loss": 2.7545,
      "step": 132847
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9960002899169922,
      "learning_rate": 0.00022851039697856682,
      "loss": 2.9921,
      "step": 132848
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.997596263885498,
      "learning_rate": 0.00022850642426429314,
      "loss": 2.9559,
      "step": 132849
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5265157222747803,
      "learning_rate": 0.00022850245156331148,
      "loss": 3.0647,
      "step": 132850
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8292665481567383,
      "learning_rate": 0.00022849847887562253,
      "loss": 2.9511,
      "step": 132851
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4150149822235107,
      "learning_rate": 0.00022849450620122708,
      "loss": 2.8647,
      "step": 132852
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3304600715637207,
      "learning_rate": 0.00022849053354012587,
      "loss": 2.936,
      "step": 132853
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3119728565216064,
      "learning_rate": 0.0002284865608923196,
      "loss": 3.3129,
      "step": 132854
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.277451515197754,
      "learning_rate": 0.00022848258825780915,
      "loss": 3.1653,
      "step": 132855
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.199110507965088,
      "learning_rate": 0.000228478615636595,
      "loss": 3.1022,
      "step": 132856
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.863807201385498,
      "learning_rate": 0.00022847464302867807,
      "loss": 2.8122,
      "step": 132857
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2622621059417725,
      "learning_rate": 0.00022847067043405903,
      "loss": 2.7545,
      "step": 132858
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.7641377449035645,
      "learning_rate": 0.00022846669785273863,
      "loss": 2.8547,
      "step": 132859
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.413496494293213,
      "learning_rate": 0.00022846272528471764,
      "loss": 2.9487,
      "step": 132860
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0600850582122803,
      "learning_rate": 0.0002284587527299969,
      "loss": 2.8916,
      "step": 132861
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4031670093536377,
      "learning_rate": 0.00022845478018857692,
      "loss": 2.7536,
      "step": 132862
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.848159909248352,
      "learning_rate": 0.0002284508076604585,
      "loss": 2.9655,
      "step": 132863
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.022679328918457,
      "learning_rate": 0.00022844683514564243,
      "loss": 2.9056,
      "step": 132864
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3322086334228516,
      "learning_rate": 0.00022844286264412947,
      "loss": 2.96,
      "step": 132865
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3737335205078125,
      "learning_rate": 0.00022843889015592033,
      "loss": 2.8851,
      "step": 132866
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9475256204605103,
      "learning_rate": 0.00022843491768101588,
      "loss": 3.0089,
      "step": 132867
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3051397800445557,
      "learning_rate": 0.0002284309452194166,
      "loss": 2.9306,
      "step": 132868
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.9035863876342773,
      "learning_rate": 0.00022842697277112336,
      "loss": 2.9261,
      "step": 132869
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.989891767501831,
      "learning_rate": 0.0002284230003361369,
      "loss": 3.0175,
      "step": 132870
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7495133876800537,
      "learning_rate": 0.00022841902791445793,
      "loss": 2.8928,
      "step": 132871
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.9671738147735596,
      "learning_rate": 0.00022841505550608724,
      "loss": 2.6993,
      "step": 132872
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.1979012489318848,
      "learning_rate": 0.00022841108311102566,
      "loss": 2.8937,
      "step": 132873
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2213869094848633,
      "learning_rate": 0.0002284071107292737,
      "loss": 3.1117,
      "step": 132874
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.551546096801758,
      "learning_rate": 0.0002284031383608322,
      "loss": 2.9375,
      "step": 132875
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.665820360183716,
      "learning_rate": 0.00022839916600570192,
      "loss": 2.9211,
      "step": 132876
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.178600788116455,
      "learning_rate": 0.00022839519366388357,
      "loss": 2.9709,
      "step": 132877
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4953036308288574,
      "learning_rate": 0.0002283912213353779,
      "loss": 3.2416,
      "step": 132878
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3131651878356934,
      "learning_rate": 0.0002283872490201858,
      "loss": 2.8445,
      "step": 132879
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.391536235809326,
      "learning_rate": 0.0002283832767183077,
      "loss": 3.1385,
      "step": 132880
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.6258652210235596,
      "learning_rate": 0.0002283793044297445,
      "loss": 2.7345,
      "step": 132881
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9817510843276978,
      "learning_rate": 0.00022837533215449698,
      "loss": 2.8839,
      "step": 132882
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.682117462158203,
      "learning_rate": 0.00022837135989256583,
      "loss": 2.881,
      "step": 132883
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.0289158821105957,
      "learning_rate": 0.0002283673876439518,
      "loss": 3.0222,
      "step": 132884
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.6407549381256104,
      "learning_rate": 0.0002283634154086557,
      "loss": 2.9565,
      "step": 132885
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9775887727737427,
      "learning_rate": 0.00022835944318667814,
      "loss": 3.1083,
      "step": 132886
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.350922107696533,
      "learning_rate": 0.00022835547097801987,
      "loss": 3.1446,
      "step": 132887
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1577954292297363,
      "learning_rate": 0.00022835149878268164,
      "loss": 2.8546,
      "step": 132888
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.354546546936035,
      "learning_rate": 0.00022834752660066425,
      "loss": 3.0079,
      "step": 132889
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0354957580566406,
      "learning_rate": 0.00022834355443196843,
      "loss": 2.8591,
      "step": 132890
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.138334274291992,
      "learning_rate": 0.000228339582276595,
      "loss": 2.8874,
      "step": 132891
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.165245532989502,
      "learning_rate": 0.00022833561013454448,
      "loss": 2.9862,
      "step": 132892
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.927155613899231,
      "learning_rate": 0.00022833163800581774,
      "loss": 2.8704,
      "step": 132893
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.227385997772217,
      "learning_rate": 0.00022832766589041546,
      "loss": 3.1296,
      "step": 132894
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1814260482788086,
      "learning_rate": 0.00022832369378833841,
      "loss": 2.9587,
      "step": 132895
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.5555012226104736,
      "learning_rate": 0.00022831972169958737,
      "loss": 2.8848,
      "step": 132896
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8625705242156982,
      "learning_rate": 0.00022831574962416318,
      "loss": 2.9894,
      "step": 132897
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0475049018859863,
      "learning_rate": 0.00022831177756206633,
      "loss": 2.9798,
      "step": 132898
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8978753089904785,
      "learning_rate": 0.00022830780551329763,
      "loss": 2.7697,
      "step": 132899
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.783177614212036,
      "learning_rate": 0.0002283038334778579,
      "loss": 2.8942,
      "step": 132900
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.632096529006958,
      "learning_rate": 0.0002282998614557478,
      "loss": 2.9155,
      "step": 132901
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2300267219543457,
      "learning_rate": 0.00022829588944696814,
      "loss": 3.0037,
      "step": 132902
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.569179058074951,
      "learning_rate": 0.0002282919174515197,
      "loss": 3.1235,
      "step": 132903
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8681095838546753,
      "learning_rate": 0.0002282879454694031,
      "loss": 3.0033,
      "step": 132904
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9097522497177124,
      "learning_rate": 0.0002282839735006191,
      "loss": 3.035,
      "step": 132905
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.1693308353424072,
      "learning_rate": 0.00022828000154516848,
      "loss": 2.9196,
      "step": 132906
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4956698417663574,
      "learning_rate": 0.00022827602960305194,
      "loss": 2.7343,
      "step": 132907
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3445658683776855,
      "learning_rate": 0.00022827205767427018,
      "loss": 2.7085,
      "step": 132908
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9106781482696533,
      "learning_rate": 0.00022826808575882415,
      "loss": 3.1548,
      "step": 132909
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5258066654205322,
      "learning_rate": 0.00022826411385671433,
      "loss": 3.1462,
      "step": 132910
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.898179054260254,
      "learning_rate": 0.00022826014196794165,
      "loss": 2.9443,
      "step": 132911
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.072965621948242,
      "learning_rate": 0.00022825617009250668,
      "loss": 2.909,
      "step": 132912
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.481038808822632,
      "learning_rate": 0.00022825219823041025,
      "loss": 3.1101,
      "step": 132913
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.407127618789673,
      "learning_rate": 0.0002282482263816531,
      "loss": 2.9279,
      "step": 132914
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.121148109436035,
      "learning_rate": 0.000228244254546236,
      "loss": 2.9298,
      "step": 132915
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5829410552978516,
      "learning_rate": 0.00022824028272415965,
      "loss": 2.7953,
      "step": 132916
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1427087783813477,
      "learning_rate": 0.00022823631091542471,
      "loss": 3.2067,
      "step": 132917
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7089154720306396,
      "learning_rate": 0.00022823233912003203,
      "loss": 2.9492,
      "step": 132918
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.5871055126190186,
      "learning_rate": 0.00022822836733798238,
      "loss": 3.0338,
      "step": 132919
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1556246280670166,
      "learning_rate": 0.00022822439556927633,
      "loss": 3.1397,
      "step": 132920
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2839138507843018,
      "learning_rate": 0.00022822042381391474,
      "loss": 2.8908,
      "step": 132921
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.116604804992676,
      "learning_rate": 0.00022821645207189841,
      "loss": 2.879,
      "step": 132922
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.218907356262207,
      "learning_rate": 0.0002282124803432279,
      "loss": 2.9261,
      "step": 132923
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1016509532928467,
      "learning_rate": 0.00022820850862790407,
      "loss": 2.8872,
      "step": 132924
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3685128688812256,
      "learning_rate": 0.00022820453692592763,
      "loss": 2.9385,
      "step": 132925
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0704970359802246,
      "learning_rate": 0.0002282005652372994,
      "loss": 2.7951,
      "step": 132926
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.063009262084961,
      "learning_rate": 0.00022819659356201994,
      "loss": 2.9251,
      "step": 132927
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0375101566314697,
      "learning_rate": 0.0002281926219000902,
      "loss": 3.0083,
      "step": 132928
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.064511299133301,
      "learning_rate": 0.0002281886502515107,
      "loss": 3.1933,
      "step": 132929
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.18430495262146,
      "learning_rate": 0.0002281846786162823,
      "loss": 2.7982,
      "step": 132930
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8879351615905762,
      "learning_rate": 0.00022818070699440573,
      "loss": 3.1018,
      "step": 132931
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.061990737915039,
      "learning_rate": 0.00022817673538588174,
      "loss": 2.8243,
      "step": 132932
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0825939178466797,
      "learning_rate": 0.00022817276379071105,
      "loss": 2.9693,
      "step": 132933
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.483433485031128,
      "learning_rate": 0.00022816879220889443,
      "loss": 3.0993,
      "step": 132934
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.07342267036438,
      "learning_rate": 0.00022816482064043253,
      "loss": 2.8677,
      "step": 132935
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8769768476486206,
      "learning_rate": 0.00022816084908532618,
      "loss": 3.1718,
      "step": 132936
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.173424005508423,
      "learning_rate": 0.00022815687754357606,
      "loss": 2.953,
      "step": 132937
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.4784116744995117,
      "learning_rate": 0.00022815290601518293,
      "loss": 2.8789,
      "step": 132938
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8646799325942993,
      "learning_rate": 0.00022814893450014752,
      "loss": 3.095,
      "step": 132939
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.581712484359741,
      "learning_rate": 0.00022814496299847068,
      "loss": 3.1564,
      "step": 132940
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.175370216369629,
      "learning_rate": 0.00022814099151015297,
      "loss": 3.0865,
      "step": 132941
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9271137714385986,
      "learning_rate": 0.00022813702003519518,
      "loss": 2.9834,
      "step": 132942
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.377041816711426,
      "learning_rate": 0.0002281330485735981,
      "loss": 2.9609,
      "step": 132943
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9171139001846313,
      "learning_rate": 0.00022812907712536242,
      "loss": 2.9453,
      "step": 132944
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0624048709869385,
      "learning_rate": 0.00022812510569048894,
      "loss": 3.0685,
      "step": 132945
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.6311168670654297,
      "learning_rate": 0.00022812113426897844,
      "loss": 3.2583,
      "step": 132946
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9226233959197998,
      "learning_rate": 0.00022811716286083147,
      "loss": 3.084,
      "step": 132947
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8301504850387573,
      "learning_rate": 0.00022811319146604888,
      "loss": 3.2014,
      "step": 132948
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.184652328491211,
      "learning_rate": 0.0002281092200846314,
      "loss": 3.0983,
      "step": 132949
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1880135536193848,
      "learning_rate": 0.00022810524871657976,
      "loss": 3.2467,
      "step": 132950
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1675455570220947,
      "learning_rate": 0.00022810127736189474,
      "loss": 2.8627,
      "step": 132951
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9172089099884033,
      "learning_rate": 0.00022809730602057717,
      "loss": 2.903,
      "step": 132952
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.47939133644104,
      "learning_rate": 0.0002280933346926275,
      "loss": 2.9482,
      "step": 132953
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5100083351135254,
      "learning_rate": 0.00022808936337804668,
      "loss": 2.9373,
      "step": 132954
    },
    {
      "epoch": 1.73,
      "grad_norm": 4.235262393951416,
      "learning_rate": 0.00022808539207683538,
      "loss": 2.8289,
      "step": 132955
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.220655918121338,
      "learning_rate": 0.00022808142078899437,
      "loss": 2.9741,
      "step": 132956
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.226022481918335,
      "learning_rate": 0.0002280774495145244,
      "loss": 3.3287,
      "step": 132957
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9938900470733643,
      "learning_rate": 0.00022807347825342628,
      "loss": 2.9442,
      "step": 132958
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.025893449783325,
      "learning_rate": 0.00022806950700570057,
      "loss": 2.8416,
      "step": 132959
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5363824367523193,
      "learning_rate": 0.00022806553577134808,
      "loss": 2.8052,
      "step": 132960
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9800838232040405,
      "learning_rate": 0.00022806156455036956,
      "loss": 2.9269,
      "step": 132961
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0070714950561523,
      "learning_rate": 0.00022805759334276575,
      "loss": 2.946,
      "step": 132962
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0848333835601807,
      "learning_rate": 0.00022805362214853742,
      "loss": 2.997,
      "step": 132963
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.480691432952881,
      "learning_rate": 0.00022804965096768538,
      "loss": 2.8487,
      "step": 132964
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9969183206558228,
      "learning_rate": 0.00022804567980021015,
      "loss": 2.9373,
      "step": 132965
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3453657627105713,
      "learning_rate": 0.00022804170864611258,
      "loss": 3.1043,
      "step": 132966
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4246904850006104,
      "learning_rate": 0.00022803773750539342,
      "loss": 3.1525,
      "step": 132967
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.690556526184082,
      "learning_rate": 0.00022803376637805343,
      "loss": 3.0976,
      "step": 132968
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.319211959838867,
      "learning_rate": 0.00022802979526409325,
      "loss": 3.0915,
      "step": 132969
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.896589994430542,
      "learning_rate": 0.00022802582416351388,
      "loss": 2.8574,
      "step": 132970
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.92016863822937,
      "learning_rate": 0.00022802185307631571,
      "loss": 3.12,
      "step": 132971
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.856258749961853,
      "learning_rate": 0.00022801788200249966,
      "loss": 3.1251,
      "step": 132972
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2124598026275635,
      "learning_rate": 0.0002280139109420664,
      "loss": 2.8868,
      "step": 132973
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4007227420806885,
      "learning_rate": 0.00022800993989501676,
      "loss": 2.872,
      "step": 132974
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.404268503189087,
      "learning_rate": 0.00022800596886135143,
      "loss": 2.8708,
      "step": 132975
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.106398582458496,
      "learning_rate": 0.00022800199784107125,
      "loss": 3.2061,
      "step": 132976
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2887792587280273,
      "learning_rate": 0.00022799802683417676,
      "loss": 3.0682,
      "step": 132977
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.0160088539123535,
      "learning_rate": 0.00022799405584066876,
      "loss": 3.0427,
      "step": 132978
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.1485648155212402,
      "learning_rate": 0.00022799008486054805,
      "loss": 3.1869,
      "step": 132979
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.6930105686187744,
      "learning_rate": 0.00022798611389381534,
      "loss": 3.1093,
      "step": 132980
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.685513496398926,
      "learning_rate": 0.00022798214294047136,
      "loss": 2.8909,
      "step": 132981
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.1863534450531006,
      "learning_rate": 0.000227978172000517,
      "loss": 2.8114,
      "step": 132982
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1284360885620117,
      "learning_rate": 0.0002279742010739527,
      "loss": 2.8419,
      "step": 132983
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.868162751197815,
      "learning_rate": 0.00022797023016077938,
      "loss": 2.9777,
      "step": 132984
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.931354284286499,
      "learning_rate": 0.00022796625926099775,
      "loss": 3.0045,
      "step": 132985
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3455450534820557,
      "learning_rate": 0.00022796228837460857,
      "loss": 3.0283,
      "step": 132986
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0704233646392822,
      "learning_rate": 0.00022795831750161254,
      "loss": 3.0701,
      "step": 132987
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3660056591033936,
      "learning_rate": 0.00022795434664201042,
      "loss": 3.0277,
      "step": 132988
    },
    {
      "epoch": 1.73,
      "grad_norm": 4.500794887542725,
      "learning_rate": 0.00022795037579580307,
      "loss": 2.7054,
      "step": 132989
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.424502372741699,
      "learning_rate": 0.00022794640496299096,
      "loss": 2.9517,
      "step": 132990
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9729983806610107,
      "learning_rate": 0.00022794243414357503,
      "loss": 2.6953,
      "step": 132991
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8584975004196167,
      "learning_rate": 0.00022793846333755594,
      "loss": 2.9992,
      "step": 132992
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.8085596561431885,
      "learning_rate": 0.00022793449254493442,
      "loss": 3.0479,
      "step": 132993
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.393770456314087,
      "learning_rate": 0.00022793052176571125,
      "loss": 3.0033,
      "step": 132994
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4852194786071777,
      "learning_rate": 0.00022792655099988725,
      "loss": 2.993,
      "step": 132995
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.221846342086792,
      "learning_rate": 0.000227922580247463,
      "loss": 2.8797,
      "step": 132996
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.69898521900177,
      "learning_rate": 0.0002279186095084393,
      "loss": 2.8721,
      "step": 132997
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.7415165901184082,
      "learning_rate": 0.00022791463878281688,
      "loss": 2.9996,
      "step": 132998
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0630075931549072,
      "learning_rate": 0.00022791066807059652,
      "loss": 2.9656,
      "step": 132999
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.488309144973755,
      "learning_rate": 0.00022790669737177886,
      "loss": 2.8978,
      "step": 133000
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4650983810424805,
      "learning_rate": 0.0002279027266863648,
      "loss": 2.7905,
      "step": 133001
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.980684280395508,
      "learning_rate": 0.00022789875601435494,
      "loss": 2.9404,
      "step": 133002
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2440829277038574,
      "learning_rate": 0.00022789478535575007,
      "loss": 2.9361,
      "step": 133003
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.85441255569458,
      "learning_rate": 0.0002278908147105509,
      "loss": 3.0876,
      "step": 133004
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.0840466022491455,
      "learning_rate": 0.0002278868440787582,
      "loss": 2.8995,
      "step": 133005
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0356459617614746,
      "learning_rate": 0.00022788287346037269,
      "loss": 3.1232,
      "step": 133006
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.042957305908203,
      "learning_rate": 0.00022787890285539517,
      "loss": 2.8826,
      "step": 133007
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.048586368560791,
      "learning_rate": 0.00022787493226382623,
      "loss": 2.8769,
      "step": 133008
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9865039587020874,
      "learning_rate": 0.00022787096168566673,
      "loss": 3.014,
      "step": 133009
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.6519501209259033,
      "learning_rate": 0.00022786699112091745,
      "loss": 2.9187,
      "step": 133010
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.531290292739868,
      "learning_rate": 0.000227863020569579,
      "loss": 3.095,
      "step": 133011
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0178327560424805,
      "learning_rate": 0.00022785905003165215,
      "loss": 2.8553,
      "step": 133012
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.225148916244507,
      "learning_rate": 0.00022785507950713775,
      "loss": 3.155,
      "step": 133013
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.46516489982605,
      "learning_rate": 0.00022785110899603637,
      "loss": 2.9872,
      "step": 133014
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.188143253326416,
      "learning_rate": 0.00022784713849834885,
      "loss": 2.8625,
      "step": 133015
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3511452674865723,
      "learning_rate": 0.00022784316801407588,
      "loss": 2.8494,
      "step": 133016
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8333635330200195,
      "learning_rate": 0.00022783919754321825,
      "loss": 2.9129,
      "step": 133017
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0334253311157227,
      "learning_rate": 0.00022783522708577674,
      "loss": 2.9653,
      "step": 133018
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7690913677215576,
      "learning_rate": 0.000227831256641752,
      "loss": 2.826,
      "step": 133019
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.827323079109192,
      "learning_rate": 0.00022782728621114476,
      "loss": 2.909,
      "step": 133020
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1462934017181396,
      "learning_rate": 0.00022782331579395578,
      "loss": 3.2103,
      "step": 133021
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9955060482025146,
      "learning_rate": 0.00022781934539018578,
      "loss": 3.0344,
      "step": 133022
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3656601905822754,
      "learning_rate": 0.00022781537499983555,
      "loss": 2.9762,
      "step": 133023
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.239199638366699,
      "learning_rate": 0.00022781140462290584,
      "loss": 2.7987,
      "step": 133024
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4501380920410156,
      "learning_rate": 0.0002278074342593974,
      "loss": 2.8887,
      "step": 133025
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9755067825317383,
      "learning_rate": 0.00022780346390931087,
      "loss": 2.9953,
      "step": 133026
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.044369697570801,
      "learning_rate": 0.00022779949357264702,
      "loss": 3.04,
      "step": 133027
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.6976311206817627,
      "learning_rate": 0.00022779552324940657,
      "loss": 3.2053,
      "step": 133028
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2406325340270996,
      "learning_rate": 0.00022779155293959033,
      "loss": 3.0426,
      "step": 133029
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9767481088638306,
      "learning_rate": 0.000227787582643199,
      "loss": 2.8174,
      "step": 133030
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.6659374237060547,
      "learning_rate": 0.00022778361236023341,
      "loss": 2.6796,
      "step": 133031
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.0033633708953857,
      "learning_rate": 0.00022777964209069415,
      "loss": 2.9759,
      "step": 133032
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.832014322280884,
      "learning_rate": 0.00022777567183458197,
      "loss": 3.1483,
      "step": 133033
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9782005548477173,
      "learning_rate": 0.00022777170159189765,
      "loss": 2.8912,
      "step": 133034
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1997249126434326,
      "learning_rate": 0.00022776773136264197,
      "loss": 2.9668,
      "step": 133035
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.645419120788574,
      "learning_rate": 0.0002277637611468156,
      "loss": 2.9247,
      "step": 133036
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.904810667037964,
      "learning_rate": 0.00022775979094441946,
      "loss": 2.6292,
      "step": 133037
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0744917392730713,
      "learning_rate": 0.000227755820755454,
      "loss": 2.7838,
      "step": 133038
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.172412395477295,
      "learning_rate": 0.0002277518505799201,
      "loss": 2.9207,
      "step": 133039
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.785822868347168,
      "learning_rate": 0.0002277478804178185,
      "loss": 2.8066,
      "step": 133040
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9822862148284912,
      "learning_rate": 0.0002277439102691499,
      "loss": 2.7991,
      "step": 133041
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.345701217651367,
      "learning_rate": 0.00022773994013391513,
      "loss": 3.046,
      "step": 133042
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0785560607910156,
      "learning_rate": 0.00022773597001211494,
      "loss": 3.0566,
      "step": 133043
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.163147211074829,
      "learning_rate": 0.00022773199990374992,
      "loss": 3.0683,
      "step": 133044
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9131652116775513,
      "learning_rate": 0.00022772802980882084,
      "loss": 2.9464,
      "step": 133045
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9514949321746826,
      "learning_rate": 0.00022772405972732853,
      "loss": 2.7999,
      "step": 133046
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1551430225372314,
      "learning_rate": 0.00022772008965927363,
      "loss": 2.8814,
      "step": 133047
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.9689550399780273,
      "learning_rate": 0.00022771611960465698,
      "loss": 2.8654,
      "step": 133048
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.49609375,
      "learning_rate": 0.00022771214956347936,
      "loss": 2.9528,
      "step": 133049
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.77290415763855,
      "learning_rate": 0.00022770817953574128,
      "loss": 2.8461,
      "step": 133050
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.004368305206299,
      "learning_rate": 0.00022770420952144364,
      "loss": 2.9742,
      "step": 133051
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1560044288635254,
      "learning_rate": 0.00022770023952058715,
      "loss": 2.783,
      "step": 133052
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0366275310516357,
      "learning_rate": 0.00022769626953317252,
      "loss": 3.0834,
      "step": 133053
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0615901947021484,
      "learning_rate": 0.00022769229955920057,
      "loss": 3.2137,
      "step": 133054
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1019771099090576,
      "learning_rate": 0.00022768832959867196,
      "loss": 2.8668,
      "step": 133055
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0852584838867188,
      "learning_rate": 0.0002276843596515876,
      "loss": 2.9159,
      "step": 133056
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1804327964782715,
      "learning_rate": 0.00022768038971794792,
      "loss": 2.8858,
      "step": 133057
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.2073910236358643,
      "learning_rate": 0.00022767641979775383,
      "loss": 2.9445,
      "step": 133058
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0482676029205322,
      "learning_rate": 0.0002276724498910061,
      "loss": 2.8971,
      "step": 133059
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1614110469818115,
      "learning_rate": 0.00022766847999770536,
      "loss": 2.9779,
      "step": 133060
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.070218324661255,
      "learning_rate": 0.00022766451011785243,
      "loss": 2.8976,
      "step": 133061
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.794406533241272,
      "learning_rate": 0.0002276605402514482,
      "loss": 2.7223,
      "step": 133062
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.205456495285034,
      "learning_rate": 0.0002276565703984931,
      "loss": 2.84,
      "step": 133063
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3028132915496826,
      "learning_rate": 0.00022765260055898796,
      "loss": 2.7397,
      "step": 133064
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0656702518463135,
      "learning_rate": 0.00022764863073293363,
      "loss": 3.182,
      "step": 133065
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9748868942260742,
      "learning_rate": 0.00022764466092033077,
      "loss": 3.0878,
      "step": 133066
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.509880542755127,
      "learning_rate": 0.0002276406911211801,
      "loss": 3.0229,
      "step": 133067
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.616206407546997,
      "learning_rate": 0.00022763672133548258,
      "loss": 2.9169,
      "step": 133068
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9498991966247559,
      "learning_rate": 0.0002276327515632386,
      "loss": 2.8864,
      "step": 133069
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.99252188205719,
      "learning_rate": 0.00022762878180444904,
      "loss": 3.0453,
      "step": 133070
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.837393045425415,
      "learning_rate": 0.00022762481205911467,
      "loss": 3.1096,
      "step": 133071
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8522496223449707,
      "learning_rate": 0.00022762084232723622,
      "loss": 3.0522,
      "step": 133072
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1182162761688232,
      "learning_rate": 0.0002276168726088144,
      "loss": 2.7995,
      "step": 133073
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1006839275360107,
      "learning_rate": 0.00022761290290385013,
      "loss": 2.9278,
      "step": 133074
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5449025630950928,
      "learning_rate": 0.00022760893321234385,
      "loss": 2.8117,
      "step": 133075
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.287123680114746,
      "learning_rate": 0.00022760496353429644,
      "loss": 2.9864,
      "step": 133076
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8778401613235474,
      "learning_rate": 0.00022760099386970862,
      "loss": 2.9649,
      "step": 133077
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7727270126342773,
      "learning_rate": 0.00022759702421858114,
      "loss": 3.1382,
      "step": 133078
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3710885047912598,
      "learning_rate": 0.00022759305458091474,
      "loss": 3.0091,
      "step": 133079
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0715067386627197,
      "learning_rate": 0.00022758908495671022,
      "loss": 2.9141,
      "step": 133080
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2688047885894775,
      "learning_rate": 0.00022758511534596827,
      "loss": 2.9178,
      "step": 133081
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.410493850708008,
      "learning_rate": 0.00022758114574868954,
      "loss": 3.1439,
      "step": 133082
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.1800529956817627,
      "learning_rate": 0.00022757717616487484,
      "loss": 3.0457,
      "step": 133083
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.251828908920288,
      "learning_rate": 0.00022757320659452495,
      "loss": 2.8497,
      "step": 133084
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3535406589508057,
      "learning_rate": 0.0002275692370376405,
      "loss": 3.0998,
      "step": 133085
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.299572706222534,
      "learning_rate": 0.0002275652674942224,
      "loss": 2.9018,
      "step": 133086
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2011032104492188,
      "learning_rate": 0.00022756129796427122,
      "loss": 3.0575,
      "step": 133087
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.001188039779663,
      "learning_rate": 0.00022755732844778782,
      "loss": 3.0731,
      "step": 133088
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9286746978759766,
      "learning_rate": 0.00022755335894477283,
      "loss": 2.9538,
      "step": 133089
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8858962059020996,
      "learning_rate": 0.000227549389455227,
      "loss": 2.9591,
      "step": 133090
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2435526847839355,
      "learning_rate": 0.00022754541997915112,
      "loss": 2.9387,
      "step": 133091
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.121455669403076,
      "learning_rate": 0.00022754145051654602,
      "loss": 3.0478,
      "step": 133092
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.50970196723938,
      "learning_rate": 0.0002275374810674122,
      "loss": 3.2736,
      "step": 133093
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.9089584350585938,
      "learning_rate": 0.0002275335116317506,
      "loss": 3.1447,
      "step": 133094
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.860166072845459,
      "learning_rate": 0.0002275295422095619,
      "loss": 2.9901,
      "step": 133095
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9579815864562988,
      "learning_rate": 0.00022752557280084678,
      "loss": 3.0783,
      "step": 133096
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4681835174560547,
      "learning_rate": 0.00022752160340560603,
      "loss": 3.0435,
      "step": 133097
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7611327171325684,
      "learning_rate": 0.00022751763402384045,
      "loss": 3.0882,
      "step": 133098
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9857654571533203,
      "learning_rate": 0.0002275136646555506,
      "loss": 3.08,
      "step": 133099
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.912014365196228,
      "learning_rate": 0.00022750969530073738,
      "loss": 2.8749,
      "step": 133100
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2960681915283203,
      "learning_rate": 0.00022750572595940145,
      "loss": 2.9727,
      "step": 133101
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.818753480911255,
      "learning_rate": 0.00022750175663154368,
      "loss": 3.0955,
      "step": 133102
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.246645450592041,
      "learning_rate": 0.00022749778731716457,
      "loss": 2.9806,
      "step": 133103
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2052981853485107,
      "learning_rate": 0.00022749381801626513,
      "loss": 2.9148,
      "step": 133104
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.421912670135498,
      "learning_rate": 0.00022748984872884585,
      "loss": 2.7588,
      "step": 133105
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8128371238708496,
      "learning_rate": 0.00022748587945490756,
      "loss": 2.7784,
      "step": 133106
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2035021781921387,
      "learning_rate": 0.00022748191019445106,
      "loss": 2.7046,
      "step": 133107
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.951409935951233,
      "learning_rate": 0.000227477940947477,
      "loss": 3.0345,
      "step": 133108
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5235908031463623,
      "learning_rate": 0.00022747397171398617,
      "loss": 2.9988,
      "step": 133109
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0926401615142822,
      "learning_rate": 0.00022747000249397943,
      "loss": 3.2341,
      "step": 133110
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.8228800296783447,
      "learning_rate": 0.00022746603328745727,
      "loss": 2.8251,
      "step": 133111
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.0027198791503906,
      "learning_rate": 0.00022746206409442052,
      "loss": 3.0155,
      "step": 133112
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.129596710205078,
      "learning_rate": 0.00022745809491486995,
      "loss": 3.0793,
      "step": 133113
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0030834674835205,
      "learning_rate": 0.00022745412574880631,
      "loss": 3.1449,
      "step": 133114
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0105412006378174,
      "learning_rate": 0.00022745015659623029,
      "loss": 2.905,
      "step": 133115
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4623513221740723,
      "learning_rate": 0.0002274461874571428,
      "loss": 3.1076,
      "step": 133116
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.807974338531494,
      "learning_rate": 0.0002274422183315443,
      "loss": 2.9139,
      "step": 133117
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8937432765960693,
      "learning_rate": 0.00022743824921943564,
      "loss": 3.0045,
      "step": 133118
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.088597536087036,
      "learning_rate": 0.00022743428012081762,
      "loss": 3.0093,
      "step": 133119
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.395616054534912,
      "learning_rate": 0.00022743031103569092,
      "loss": 2.9861,
      "step": 133120
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8154226541519165,
      "learning_rate": 0.0002274263419640563,
      "loss": 2.9611,
      "step": 133121
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.879052758216858,
      "learning_rate": 0.00022742237290591447,
      "loss": 2.9864,
      "step": 133122
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.278804063796997,
      "learning_rate": 0.00022741840386126633,
      "loss": 2.9051,
      "step": 133123
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.843631625175476,
      "learning_rate": 0.00022741443483011235,
      "loss": 3.0532,
      "step": 133124
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0937047004699707,
      "learning_rate": 0.00022741046581245338,
      "loss": 2.9102,
      "step": 133125
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9944019317626953,
      "learning_rate": 0.00022740649680829024,
      "loss": 2.839,
      "step": 133126
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8156050443649292,
      "learning_rate": 0.00022740252781762354,
      "loss": 2.7612,
      "step": 133127
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9963300228118896,
      "learning_rate": 0.0002273985588404541,
      "loss": 2.8329,
      "step": 133128
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.6595542430877686,
      "learning_rate": 0.00022739458987678277,
      "loss": 3.302,
      "step": 133129
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.456019401550293,
      "learning_rate": 0.00022739062092661002,
      "loss": 3.0888,
      "step": 133130
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.002018451690674,
      "learning_rate": 0.00022738665198993672,
      "loss": 2.9623,
      "step": 133131
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0631117820739746,
      "learning_rate": 0.00022738268306676362,
      "loss": 2.9361,
      "step": 133132
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3173277378082275,
      "learning_rate": 0.00022737871415709146,
      "loss": 3.1266,
      "step": 133133
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8280117511749268,
      "learning_rate": 0.00022737474526092095,
      "loss": 3.0478,
      "step": 133134
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8897879123687744,
      "learning_rate": 0.00022737077637825301,
      "loss": 2.7048,
      "step": 133135
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9640964269638062,
      "learning_rate": 0.00022736680750908803,
      "loss": 3.0952,
      "step": 133136
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.536418914794922,
      "learning_rate": 0.00022736283865342697,
      "loss": 3.2479,
      "step": 133137
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9706605672836304,
      "learning_rate": 0.00022735886981127052,
      "loss": 2.7555,
      "step": 133138
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.188368797302246,
      "learning_rate": 0.00022735490098261944,
      "loss": 2.9018,
      "step": 133139
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.093106985092163,
      "learning_rate": 0.00022735093216747442,
      "loss": 2.9382,
      "step": 133140
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9603917598724365,
      "learning_rate": 0.0002273469633658364,
      "loss": 3.0743,
      "step": 133141
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1809256076812744,
      "learning_rate": 0.0002273429945777058,
      "loss": 3.3104,
      "step": 133142
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9203277826309204,
      "learning_rate": 0.0002273390258030835,
      "loss": 2.9533,
      "step": 133143
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.841629981994629,
      "learning_rate": 0.00022733505704197027,
      "loss": 3.0681,
      "step": 133144
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.911022424697876,
      "learning_rate": 0.00022733108829436682,
      "loss": 3.1391,
      "step": 133145
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.44700026512146,
      "learning_rate": 0.00022732711956027386,
      "loss": 2.9528,
      "step": 133146
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0343432426452637,
      "learning_rate": 0.00022732315083969236,
      "loss": 2.9315,
      "step": 133147
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2362966537475586,
      "learning_rate": 0.00022731918213262265,
      "loss": 2.92,
      "step": 133148
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9096463918685913,
      "learning_rate": 0.00022731521343906573,
      "loss": 2.7201,
      "step": 133149
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.890072226524353,
      "learning_rate": 0.00022731124475902225,
      "loss": 2.8183,
      "step": 133150
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9360551834106445,
      "learning_rate": 0.000227307276092493,
      "loss": 3.2408,
      "step": 133151
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0940439701080322,
      "learning_rate": 0.00022730330743947868,
      "loss": 3.0563,
      "step": 133152
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.539989471435547,
      "learning_rate": 0.00022729933879998018,
      "loss": 2.9144,
      "step": 133153
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.286752462387085,
      "learning_rate": 0.000227295370173998,
      "loss": 2.9266,
      "step": 133154
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1913301944732666,
      "learning_rate": 0.00022729140156153298,
      "loss": 2.9085,
      "step": 133155
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.743093252182007,
      "learning_rate": 0.00022728743296258585,
      "loss": 2.9377,
      "step": 133156
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.681230068206787,
      "learning_rate": 0.00022728346437715739,
      "loss": 2.9886,
      "step": 133157
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8018691539764404,
      "learning_rate": 0.00022727949580524825,
      "loss": 3.0372,
      "step": 133158
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.817789077758789,
      "learning_rate": 0.0002272755272468594,
      "loss": 3.1333,
      "step": 133159
    },
    {
      "epoch": 1.73,
      "grad_norm": 4.06902551651001,
      "learning_rate": 0.00022727155870199125,
      "loss": 3.1431,
      "step": 133160
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7548885345458984,
      "learning_rate": 0.00022726759017064469,
      "loss": 3.0166,
      "step": 133161
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0272130966186523,
      "learning_rate": 0.00022726362165282048,
      "loss": 3.0016,
      "step": 133162
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.0127358436584473,
      "learning_rate": 0.00022725965314851932,
      "loss": 2.9865,
      "step": 133163
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.6235527992248535,
      "learning_rate": 0.00022725568465774192,
      "loss": 3.0037,
      "step": 133164
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.027592420578003,
      "learning_rate": 0.00022725171618048924,
      "loss": 2.6311,
      "step": 133165
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.524949789047241,
      "learning_rate": 0.00022724774771676172,
      "loss": 2.8121,
      "step": 133166
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.211305856704712,
      "learning_rate": 0.0002272437792665602,
      "loss": 3.0595,
      "step": 133167
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1793999671936035,
      "learning_rate": 0.00022723981082988542,
      "loss": 3.0555,
      "step": 133168
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2227611541748047,
      "learning_rate": 0.00022723584240673817,
      "loss": 2.8492,
      "step": 133169
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.296793222427368,
      "learning_rate": 0.0002272318739971191,
      "loss": 2.8705,
      "step": 133170
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.312596082687378,
      "learning_rate": 0.0002272279056010291,
      "loss": 3.323,
      "step": 133171
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7357044219970703,
      "learning_rate": 0.00022722393721846882,
      "loss": 2.8569,
      "step": 133172
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8209246397018433,
      "learning_rate": 0.00022721996884943888,
      "loss": 2.9941,
      "step": 133173
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.4177982807159424,
      "learning_rate": 0.00022721600049394012,
      "loss": 3.1163,
      "step": 133174
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.028484582901001,
      "learning_rate": 0.00022721203215197332,
      "loss": 3.2381,
      "step": 133175
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3688435554504395,
      "learning_rate": 0.00022720806382353918,
      "loss": 3.2625,
      "step": 133176
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.1510913372039795,
      "learning_rate": 0.00022720409550863846,
      "loss": 3.0418,
      "step": 133177
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9157993793487549,
      "learning_rate": 0.0002272001272072718,
      "loss": 2.7502,
      "step": 133178
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.604663610458374,
      "learning_rate": 0.00022719615891944006,
      "loss": 3.299,
      "step": 133179
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.431051731109619,
      "learning_rate": 0.00022719219064514396,
      "loss": 2.8452,
      "step": 133180
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.7290035486221313,
      "learning_rate": 0.00022718822238438412,
      "loss": 3.1626,
      "step": 133181
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.468186378479004,
      "learning_rate": 0.00022718425413716141,
      "loss": 2.8865,
      "step": 133182
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.113762378692627,
      "learning_rate": 0.00022718028590347657,
      "loss": 2.8512,
      "step": 133183
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.43435001373291,
      "learning_rate": 0.0002271763176833302,
      "loss": 2.9435,
      "step": 133184
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2539405822753906,
      "learning_rate": 0.00022717234947672318,
      "loss": 2.8959,
      "step": 133185
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.089578866958618,
      "learning_rate": 0.00022716838128365618,
      "loss": 2.8907,
      "step": 133186
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.246098041534424,
      "learning_rate": 0.00022716441310412997,
      "loss": 2.8389,
      "step": 133187
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.307650089263916,
      "learning_rate": 0.00022716044493814523,
      "loss": 3.0096,
      "step": 133188
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.2693161964416504,
      "learning_rate": 0.00022715647678570277,
      "loss": 3.1624,
      "step": 133189
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8873674869537354,
      "learning_rate": 0.00022715250864680333,
      "loss": 3.1744,
      "step": 133190
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0142929553985596,
      "learning_rate": 0.00022714854052144758,
      "loss": 2.8995,
      "step": 133191
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0293564796447754,
      "learning_rate": 0.00022714457240963622,
      "loss": 3.0642,
      "step": 133192
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8986738920211792,
      "learning_rate": 0.00022714060431137012,
      "loss": 2.9112,
      "step": 133193
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0092344284057617,
      "learning_rate": 0.00022713663622665002,
      "loss": 2.9949,
      "step": 133194
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.584891676902771,
      "learning_rate": 0.0002271326681554765,
      "loss": 3.1496,
      "step": 133195
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0255870819091797,
      "learning_rate": 0.00022712870009785049,
      "loss": 3.0757,
      "step": 133196
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.023264169692993,
      "learning_rate": 0.00022712473205377254,
      "loss": 2.9389,
      "step": 133197
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.8392858505249023,
      "learning_rate": 0.0002271207640232435,
      "loss": 3.0177,
      "step": 133198
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.960640788078308,
      "learning_rate": 0.00022711679600626405,
      "loss": 3.0898,
      "step": 133199
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.193986415863037,
      "learning_rate": 0.000227112828002835,
      "loss": 2.8478,
      "step": 133200
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.337681531906128,
      "learning_rate": 0.00022710886001295712,
      "loss": 2.9861,
      "step": 133201
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5930254459381104,
      "learning_rate": 0.00022710489203663106,
      "loss": 3.0828,
      "step": 133202
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.354212760925293,
      "learning_rate": 0.00022710092407385752,
      "loss": 2.8382,
      "step": 133203
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8946609497070312,
      "learning_rate": 0.0002270969561246373,
      "loss": 2.7675,
      "step": 133204
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.897707462310791,
      "learning_rate": 0.00022709298818897108,
      "loss": 3.0185,
      "step": 133205
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.6366052627563477,
      "learning_rate": 0.0002270890202668597,
      "loss": 3.0619,
      "step": 133206
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.179734468460083,
      "learning_rate": 0.00022708505235830383,
      "loss": 2.8335,
      "step": 133207
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5233588218688965,
      "learning_rate": 0.00022708108446330437,
      "loss": 3.0529,
      "step": 133208
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.981414794921875,
      "learning_rate": 0.00022707711658186177,
      "loss": 3.0321,
      "step": 133209
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.4687654972076416,
      "learning_rate": 0.00022707314871397695,
      "loss": 3.2794,
      "step": 133210
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.995460033416748,
      "learning_rate": 0.00022706918085965054,
      "loss": 3.022,
      "step": 133211
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9525402784347534,
      "learning_rate": 0.00022706521301888338,
      "loss": 3.2344,
      "step": 133212
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.5353596210479736,
      "learning_rate": 0.00022706124519167618,
      "loss": 2.9684,
      "step": 133213
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.5240566730499268,
      "learning_rate": 0.00022705727737802982,
      "loss": 3.1244,
      "step": 133214
    },
    {
      "epoch": 1.73,
      "grad_norm": 4.170065402984619,
      "learning_rate": 0.00022705330957794472,
      "loss": 3.0493,
      "step": 133215
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.304199695587158,
      "learning_rate": 0.00022704934179142177,
      "loss": 2.7754,
      "step": 133216
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.814201593399048,
      "learning_rate": 0.00022704537401846178,
      "loss": 3.1113,
      "step": 133217
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.2433340549468994,
      "learning_rate": 0.0002270414062590654,
      "loss": 3.1324,
      "step": 133218
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.5921247005462646,
      "learning_rate": 0.00022703743851323343,
      "loss": 3.1651,
      "step": 133219
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8509284257888794,
      "learning_rate": 0.00022703347078096667,
      "loss": 3.2366,
      "step": 133220
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.029644012451172,
      "learning_rate": 0.00022702950306226566,
      "loss": 3.0816,
      "step": 133221
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.003121852874756,
      "learning_rate": 0.00022702553535713121,
      "loss": 3.056,
      "step": 133222
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7544937133789062,
      "learning_rate": 0.00022702156766556413,
      "loss": 2.8614,
      "step": 133223
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.028381109237671,
      "learning_rate": 0.0002270175999875651,
      "loss": 3.0005,
      "step": 133224
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3415634632110596,
      "learning_rate": 0.00022701363232313486,
      "loss": 2.9175,
      "step": 133225
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0590627193450928,
      "learning_rate": 0.0002270096646722743,
      "loss": 2.8942,
      "step": 133226
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.169954299926758,
      "learning_rate": 0.00022700569703498392,
      "loss": 3.0772,
      "step": 133227
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.5975253582000732,
      "learning_rate": 0.00022700172941126455,
      "loss": 2.9164,
      "step": 133228
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.408818244934082,
      "learning_rate": 0.00022699776180111692,
      "loss": 3.0087,
      "step": 133229
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.114901542663574,
      "learning_rate": 0.00022699379420454177,
      "loss": 2.7634,
      "step": 133230
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.146289825439453,
      "learning_rate": 0.0002269898266215399,
      "loss": 2.9988,
      "step": 133231
    },
    {
      "epoch": 1.73,
      "grad_norm": 4.32598352432251,
      "learning_rate": 0.0002269858590521121,
      "loss": 2.8109,
      "step": 133232
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.8731781244277954,
      "learning_rate": 0.00022698189149625888,
      "loss": 3.0292,
      "step": 133233
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.345768451690674,
      "learning_rate": 0.0002269779239539811,
      "loss": 2.9082,
      "step": 133234
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.7176308631896973,
      "learning_rate": 0.0002269739564252795,
      "loss": 2.986,
      "step": 133235
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.748284101486206,
      "learning_rate": 0.00022696998891015484,
      "loss": 2.9828,
      "step": 133236
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9216572046279907,
      "learning_rate": 0.00022696602140860782,
      "loss": 3.0014,
      "step": 133237
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.440199375152588,
      "learning_rate": 0.00022696205392063934,
      "loss": 2.9059,
      "step": 133238
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.522548198699951,
      "learning_rate": 0.00022695808644624985,
      "loss": 2.7772,
      "step": 133239
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.174386978149414,
      "learning_rate": 0.00022695411898544023,
      "loss": 3.0542,
      "step": 133240
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.3045811653137207,
      "learning_rate": 0.00022695015153821122,
      "loss": 2.8617,
      "step": 133241
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.133126735687256,
      "learning_rate": 0.00022694618410456352,
      "loss": 3.0809,
      "step": 133242
    },
    {
      "epoch": 1.73,
      "grad_norm": 4.229096412658691,
      "learning_rate": 0.00022694221668449795,
      "loss": 2.787,
      "step": 133243
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.350027084350586,
      "learning_rate": 0.00022693824927801535,
      "loss": 3.0634,
      "step": 133244
    },
    {
      "epoch": 1.73,
      "grad_norm": 1.9441494941711426,
      "learning_rate": 0.00022693428188511614,
      "loss": 3.0921,
      "step": 133245
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.7570338249206543,
      "learning_rate": 0.00022693031450580123,
      "loss": 3.2065,
      "step": 133246
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.0396761894226074,
      "learning_rate": 0.00022692634714007137,
      "loss": 2.9531,
      "step": 133247
    },
    {
      "epoch": 1.73,
      "grad_norm": 3.6992130279541016,
      "learning_rate": 0.00022692237978792724,
      "loss": 3.1909,
      "step": 133248
    },
    {
      "epoch": 1.73,
      "grad_norm": 2.717820882797241,
      "learning_rate": 0.00022691841244936969,
      "loss": 2.7351,
      "step": 133249
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1530559062957764,
      "learning_rate": 0.00022691444512439946,
      "loss": 2.88,
      "step": 133250
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3236279487609863,
      "learning_rate": 0.00022691047781301708,
      "loss": 2.9289,
      "step": 133251
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.044433355331421,
      "learning_rate": 0.00022690651051522346,
      "loss": 3.0842,
      "step": 133252
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1314709186553955,
      "learning_rate": 0.0002269025432310193,
      "loss": 3.0775,
      "step": 133253
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8167368173599243,
      "learning_rate": 0.0002268985759604053,
      "loss": 2.9518,
      "step": 133254
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.464465618133545,
      "learning_rate": 0.00022689460870338227,
      "loss": 3.0166,
      "step": 133255
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.5064048767089844,
      "learning_rate": 0.0002268906414599509,
      "loss": 2.9483,
      "step": 133256
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.919824242591858,
      "learning_rate": 0.00022688667423011204,
      "loss": 3.0643,
      "step": 133257
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.7210540771484375,
      "learning_rate": 0.00022688270701386624,
      "loss": 2.8415,
      "step": 133258
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3789432048797607,
      "learning_rate": 0.00022687873981121432,
      "loss": 2.8558,
      "step": 133259
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.174191474914551,
      "learning_rate": 0.000226874772622157,
      "loss": 3.0813,
      "step": 133260
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2231645584106445,
      "learning_rate": 0.00022687080544669505,
      "loss": 2.8968,
      "step": 133261
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9099935293197632,
      "learning_rate": 0.0002268668382848292,
      "loss": 2.8793,
      "step": 133262
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0772271156311035,
      "learning_rate": 0.00022686287113656025,
      "loss": 3.1547,
      "step": 133263
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0546014308929443,
      "learning_rate": 0.00022685890400188884,
      "loss": 3.0565,
      "step": 133264
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0512442588806152,
      "learning_rate": 0.00022685493688081573,
      "loss": 2.937,
      "step": 133265
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0691041946411133,
      "learning_rate": 0.00022685096977334164,
      "loss": 3.0478,
      "step": 133266
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0132579803466797,
      "learning_rate": 0.00022684700267946734,
      "loss": 2.9089,
      "step": 133267
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9911041259765625,
      "learning_rate": 0.00022684303559919355,
      "loss": 2.972,
      "step": 133268
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2991063594818115,
      "learning_rate": 0.0002268390685325211,
      "loss": 3.1269,
      "step": 133269
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9568618535995483,
      "learning_rate": 0.0002268351014794506,
      "loss": 2.986,
      "step": 133270
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2419164180755615,
      "learning_rate": 0.00022683113443998278,
      "loss": 3.1187,
      "step": 133271
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.60917067527771,
      "learning_rate": 0.00022682716741411857,
      "loss": 3.2319,
      "step": 133272
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3185195922851562,
      "learning_rate": 0.00022682320040185849,
      "loss": 2.8621,
      "step": 133273
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2418863773345947,
      "learning_rate": 0.00022681923340320332,
      "loss": 3.0276,
      "step": 133274
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.0345280170440674,
      "learning_rate": 0.00022681526641815393,
      "loss": 2.9958,
      "step": 133275
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2032153606414795,
      "learning_rate": 0.00022681129944671088,
      "loss": 2.9933,
      "step": 133276
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8858364820480347,
      "learning_rate": 0.00022680733248887499,
      "loss": 2.9345,
      "step": 133277
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2852249145507812,
      "learning_rate": 0.00022680336554464702,
      "loss": 3.0241,
      "step": 133278
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.165055751800537,
      "learning_rate": 0.00022679939861402777,
      "loss": 3.0205,
      "step": 133279
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.651214599609375,
      "learning_rate": 0.00022679543169701777,
      "loss": 2.9267,
      "step": 133280
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.253615140914917,
      "learning_rate": 0.00022679146479361799,
      "loss": 2.9909,
      "step": 133281
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9388960599899292,
      "learning_rate": 0.00022678749790382897,
      "loss": 3.078,
      "step": 133282
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0537850856781006,
      "learning_rate": 0.00022678353102765153,
      "loss": 2.9482,
      "step": 133283
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.7310311794281006,
      "learning_rate": 0.00022677956416508642,
      "loss": 2.8865,
      "step": 133284
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0487332344055176,
      "learning_rate": 0.0002267755973161344,
      "loss": 2.9198,
      "step": 133285
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3249094486236572,
      "learning_rate": 0.00022677163048079622,
      "loss": 2.9233,
      "step": 133286
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0579001903533936,
      "learning_rate": 0.0002267676636590726,
      "loss": 2.9711,
      "step": 133287
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.531430721282959,
      "learning_rate": 0.00022676369685096414,
      "loss": 2.9557,
      "step": 133288
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8206408023834229,
      "learning_rate": 0.00022675973005647173,
      "loss": 3.0232,
      "step": 133289
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.219435453414917,
      "learning_rate": 0.00022675576327559605,
      "loss": 3.0125,
      "step": 133290
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.35996675491333,
      "learning_rate": 0.00022675179650833788,
      "loss": 2.8857,
      "step": 133291
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3426830768585205,
      "learning_rate": 0.0002267478297546979,
      "loss": 2.9891,
      "step": 133292
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.5898966789245605,
      "learning_rate": 0.00022674386301467704,
      "loss": 2.865,
      "step": 133293
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.331911563873291,
      "learning_rate": 0.00022673989628827575,
      "loss": 2.7633,
      "step": 133294
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4446616172790527,
      "learning_rate": 0.00022673592957549486,
      "loss": 3.1398,
      "step": 133295
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.392186403274536,
      "learning_rate": 0.0002267319628763352,
      "loss": 2.9553,
      "step": 133296
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9022326469421387,
      "learning_rate": 0.0002267279961907974,
      "loss": 3.2866,
      "step": 133297
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9453306198120117,
      "learning_rate": 0.00022672402951888225,
      "loss": 2.8141,
      "step": 133298
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.519735813140869,
      "learning_rate": 0.00022672006286059065,
      "loss": 2.914,
      "step": 133299
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9828771352767944,
      "learning_rate": 0.00022671609621592302,
      "loss": 2.7225,
      "step": 133300
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4378468990325928,
      "learning_rate": 0.00022671212958488027,
      "loss": 3.0296,
      "step": 133301
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8741997480392456,
      "learning_rate": 0.0002267081629674631,
      "loss": 3.0295,
      "step": 133302
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.546506404876709,
      "learning_rate": 0.00022670419636367227,
      "loss": 3.0414,
      "step": 133303
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0353310108184814,
      "learning_rate": 0.00022670022977350852,
      "loss": 3.0012,
      "step": 133304
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.941667914390564,
      "learning_rate": 0.0002266962631969727,
      "loss": 2.7978,
      "step": 133305
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9041494131088257,
      "learning_rate": 0.0002266922966340653,
      "loss": 2.8777,
      "step": 133306
    },
    {
      "epoch": 1.74,
      "grad_norm": 4.009775161743164,
      "learning_rate": 0.00022668833008478723,
      "loss": 2.9342,
      "step": 133307
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0223309993743896,
      "learning_rate": 0.00022668436354913912,
      "loss": 3.0061,
      "step": 133308
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1260414123535156,
      "learning_rate": 0.00022668039702712178,
      "loss": 3.068,
      "step": 133309
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.269782543182373,
      "learning_rate": 0.00022667643051873596,
      "loss": 2.8999,
      "step": 133310
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.607794761657715,
      "learning_rate": 0.00022667246402398251,
      "loss": 2.9406,
      "step": 133311
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.921987533569336,
      "learning_rate": 0.00022666849754286188,
      "loss": 2.9084,
      "step": 133312
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.199165105819702,
      "learning_rate": 0.00022666453107537496,
      "loss": 3.1899,
      "step": 133313
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.74327552318573,
      "learning_rate": 0.00022666056462152253,
      "loss": 2.9467,
      "step": 133314
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0704493522644043,
      "learning_rate": 0.00022665659818130526,
      "loss": 3.1138,
      "step": 133315
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.159250497817993,
      "learning_rate": 0.00022665263175472388,
      "loss": 2.9244,
      "step": 133316
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.28389048576355,
      "learning_rate": 0.00022664866534177931,
      "loss": 2.8339,
      "step": 133317
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9954159259796143,
      "learning_rate": 0.00022664469894247206,
      "loss": 3.1883,
      "step": 133318
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.000265598297119,
      "learning_rate": 0.00022664073255680288,
      "loss": 3.3081,
      "step": 133319
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9458086490631104,
      "learning_rate": 0.0002266367661847726,
      "loss": 2.7494,
      "step": 133320
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.527768850326538,
      "learning_rate": 0.00022663279982638193,
      "loss": 3.2646,
      "step": 133321
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.989314317703247,
      "learning_rate": 0.0002266288334816316,
      "loss": 2.9517,
      "step": 133322
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.687955379486084,
      "learning_rate": 0.00022662486715052234,
      "loss": 2.912,
      "step": 133323
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.7646334171295166,
      "learning_rate": 0.00022662090083305506,
      "loss": 3.2678,
      "step": 133324
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2128961086273193,
      "learning_rate": 0.00022661693452923022,
      "loss": 2.9305,
      "step": 133325
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1913795471191406,
      "learning_rate": 0.00022661296823904862,
      "loss": 3.0666,
      "step": 133326
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.986894130706787,
      "learning_rate": 0.0002266090019625111,
      "loss": 2.9795,
      "step": 133327
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.737508535385132,
      "learning_rate": 0.00022660503569961837,
      "loss": 2.8431,
      "step": 133328
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.0738072395324707,
      "learning_rate": 0.0002266010694503711,
      "loss": 3.0267,
      "step": 133329
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9248414039611816,
      "learning_rate": 0.00022659710321477024,
      "loss": 3.018,
      "step": 133330
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.5564565658569336,
      "learning_rate": 0.0002265931369928162,
      "loss": 3.009,
      "step": 133331
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.6455414295196533,
      "learning_rate": 0.00022658917078450993,
      "loss": 3.1161,
      "step": 133332
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8920419216156006,
      "learning_rate": 0.0002265852045898521,
      "loss": 2.8988,
      "step": 133333
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3536109924316406,
      "learning_rate": 0.00022658123840884345,
      "loss": 3.2136,
      "step": 133334
    },
    {
      "epoch": 1.74,
      "grad_norm": 4.299994468688965,
      "learning_rate": 0.00022657727224148475,
      "loss": 3.2016,
      "step": 133335
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.507505416870117,
      "learning_rate": 0.00022657330608777683,
      "loss": 3.2612,
      "step": 133336
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9563897848129272,
      "learning_rate": 0.0002265693399477202,
      "loss": 2.9711,
      "step": 133337
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8277521133422852,
      "learning_rate": 0.00022656537382131572,
      "loss": 2.9461,
      "step": 133338
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1152889728546143,
      "learning_rate": 0.00022656140770856414,
      "loss": 2.8786,
      "step": 133339
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.039527416229248,
      "learning_rate": 0.00022655744160946615,
      "loss": 3.2188,
      "step": 133340
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8950550556182861,
      "learning_rate": 0.00022655347552402253,
      "loss": 2.8947,
      "step": 133341
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.7753913402557373,
      "learning_rate": 0.00022654950945223413,
      "loss": 2.8746,
      "step": 133342
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1200461387634277,
      "learning_rate": 0.00022654554339410142,
      "loss": 2.8292,
      "step": 133343
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.5954136848449707,
      "learning_rate": 0.00022654157734962533,
      "loss": 2.7589,
      "step": 133344
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0430214405059814,
      "learning_rate": 0.0002265376113188065,
      "loss": 3.1661,
      "step": 133345
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9195023775100708,
      "learning_rate": 0.0002265336453016457,
      "loss": 2.9071,
      "step": 133346
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4102864265441895,
      "learning_rate": 0.00022652967929814375,
      "loss": 2.7157,
      "step": 133347
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.379746675491333,
      "learning_rate": 0.00022652571330830133,
      "loss": 3.0189,
      "step": 133348
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9419944286346436,
      "learning_rate": 0.00022652174733211918,
      "loss": 3.0569,
      "step": 133349
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.729377269744873,
      "learning_rate": 0.00022651778136959796,
      "loss": 3.0854,
      "step": 133350
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1997859477996826,
      "learning_rate": 0.0002265138154207385,
      "loss": 3.1421,
      "step": 133351
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.7376084327697754,
      "learning_rate": 0.00022650984948554146,
      "loss": 3.0738,
      "step": 133352
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.893423557281494,
      "learning_rate": 0.00022650588356400769,
      "loss": 3.0186,
      "step": 133353
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.7476727962493896,
      "learning_rate": 0.00022650191765613786,
      "loss": 2.9226,
      "step": 133354
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.7514005899429321,
      "learning_rate": 0.00022649795176193265,
      "loss": 3.2435,
      "step": 133355
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.6727044582366943,
      "learning_rate": 0.00022649398588139296,
      "loss": 3.3018,
      "step": 133356
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.604654550552368,
      "learning_rate": 0.00022649002001451935,
      "loss": 2.9787,
      "step": 133357
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.5320847034454346,
      "learning_rate": 0.00022648605416131261,
      "loss": 3.0773,
      "step": 133358
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.276075601577759,
      "learning_rate": 0.00022648208832177352,
      "loss": 2.6542,
      "step": 133359
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.647397994995117,
      "learning_rate": 0.0002264781224959029,
      "loss": 3.1496,
      "step": 133360
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.058415174484253,
      "learning_rate": 0.00022647415668370125,
      "loss": 2.8866,
      "step": 133361
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9780495166778564,
      "learning_rate": 0.00022647019088516948,
      "loss": 3.0791,
      "step": 133362
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.399777889251709,
      "learning_rate": 0.00022646622510030833,
      "loss": 2.9048,
      "step": 133363
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1092612743377686,
      "learning_rate": 0.00022646225932911848,
      "loss": 2.9902,
      "step": 133364
    },
    {
      "epoch": 1.74,
      "grad_norm": 4.146815776824951,
      "learning_rate": 0.00022645829357160067,
      "loss": 2.9261,
      "step": 133365
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.902509927749634,
      "learning_rate": 0.00022645432782775568,
      "loss": 3.0466,
      "step": 133366
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.061753273010254,
      "learning_rate": 0.0002264503620975842,
      "loss": 3.1259,
      "step": 133367
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.005229949951172,
      "learning_rate": 0.00022644639638108694,
      "loss": 3.0867,
      "step": 133368
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.5915091037750244,
      "learning_rate": 0.0002264424306782647,
      "loss": 2.9231,
      "step": 133369
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.1918928623199463,
      "learning_rate": 0.00022643846498911823,
      "loss": 2.9793,
      "step": 133370
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4996235370635986,
      "learning_rate": 0.00022643449931364828,
      "loss": 2.9391,
      "step": 133371
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.941695213317871,
      "learning_rate": 0.00022643053365185552,
      "loss": 2.959,
      "step": 133372
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1466774940490723,
      "learning_rate": 0.0002264265680037407,
      "loss": 3.1688,
      "step": 133373
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.5900015830993652,
      "learning_rate": 0.00022642260236930455,
      "loss": 2.9344,
      "step": 133374
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.630746364593506,
      "learning_rate": 0.0002264186367485478,
      "loss": 2.7847,
      "step": 133375
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1987318992614746,
      "learning_rate": 0.00022641467114147126,
      "loss": 2.852,
      "step": 133376
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.0403285026550293,
      "learning_rate": 0.00022641070554807555,
      "loss": 2.7533,
      "step": 133377
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.374636173248291,
      "learning_rate": 0.00022640673996836164,
      "loss": 3.0901,
      "step": 133378
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.7060089111328125,
      "learning_rate": 0.00022640277440233003,
      "loss": 2.8911,
      "step": 133379
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.266138792037964,
      "learning_rate": 0.00022639880884998148,
      "loss": 2.8614,
      "step": 133380
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.926363229751587,
      "learning_rate": 0.0002263948433113168,
      "loss": 2.844,
      "step": 133381
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8658279180526733,
      "learning_rate": 0.0002263908777863367,
      "loss": 2.9735,
      "step": 133382
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9943015575408936,
      "learning_rate": 0.0002263869122750419,
      "loss": 2.9327,
      "step": 133383
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.7897975444793701,
      "learning_rate": 0.00022638294677743335,
      "loss": 2.9213,
      "step": 133384
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.451247215270996,
      "learning_rate": 0.00022637898129351143,
      "loss": 2.9709,
      "step": 133385
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.6118133068084717,
      "learning_rate": 0.00022637501582327706,
      "loss": 2.9228,
      "step": 133386
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8609178066253662,
      "learning_rate": 0.00022637105036673095,
      "loss": 2.9289,
      "step": 133387
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1495537757873535,
      "learning_rate": 0.00022636708492387386,
      "loss": 3.0436,
      "step": 133388
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9564762115478516,
      "learning_rate": 0.00022636311949470654,
      "loss": 3.0487,
      "step": 133389
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9362050294876099,
      "learning_rate": 0.00022635915407922966,
      "loss": 2.8199,
      "step": 133390
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.01346492767334,
      "learning_rate": 0.0002263551886774442,
      "loss": 3.0232,
      "step": 133391
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3588645458221436,
      "learning_rate": 0.0002263512232893505,
      "loss": 3.0946,
      "step": 133392
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1238694190979004,
      "learning_rate": 0.00022634725791494955,
      "loss": 2.7951,
      "step": 133393
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.793832302093506,
      "learning_rate": 0.000226343292554242,
      "loss": 2.9543,
      "step": 133394
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.088181495666504,
      "learning_rate": 0.00022633932720722864,
      "loss": 2.7754,
      "step": 133395
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.110513210296631,
      "learning_rate": 0.0002263353618739102,
      "loss": 2.7931,
      "step": 133396
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0719780921936035,
      "learning_rate": 0.0002263313965542875,
      "loss": 3.2626,
      "step": 133397
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3763070106506348,
      "learning_rate": 0.0002263274312483611,
      "loss": 2.9321,
      "step": 133398
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.6246752738952637,
      "learning_rate": 0.0002263234659561318,
      "loss": 3.0632,
      "step": 133399
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1255900859832764,
      "learning_rate": 0.00022631950067760034,
      "loss": 2.9569,
      "step": 133400
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.199796199798584,
      "learning_rate": 0.0002263155354127675,
      "loss": 2.9035,
      "step": 133401
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2714452743530273,
      "learning_rate": 0.00022631157016163398,
      "loss": 2.8937,
      "step": 133402
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0447070598602295,
      "learning_rate": 0.00022630760492420068,
      "loss": 3.1597,
      "step": 133403
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.349436044692993,
      "learning_rate": 0.00022630363970046802,
      "loss": 3.3301,
      "step": 133404
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.826207160949707,
      "learning_rate": 0.00022629967449043693,
      "loss": 2.9212,
      "step": 133405
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.076880931854248,
      "learning_rate": 0.00022629570929410812,
      "loss": 3.0779,
      "step": 133406
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2060296535491943,
      "learning_rate": 0.00022629174411148233,
      "loss": 2.7335,
      "step": 133407
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9795243740081787,
      "learning_rate": 0.0002262877789425603,
      "loss": 3.0779,
      "step": 133408
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.187129020690918,
      "learning_rate": 0.0002262838137873429,
      "loss": 3.1952,
      "step": 133409
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9639390707015991,
      "learning_rate": 0.00022627984864583058,
      "loss": 2.9559,
      "step": 133410
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2525720596313477,
      "learning_rate": 0.00022627588351802422,
      "loss": 3.0534,
      "step": 133411
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1524839401245117,
      "learning_rate": 0.00022627191840392458,
      "loss": 3.0375,
      "step": 133412
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.00752329826355,
      "learning_rate": 0.0002262679533035324,
      "loss": 2.8523,
      "step": 133413
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.6098642349243164,
      "learning_rate": 0.00022626398821684837,
      "loss": 2.8186,
      "step": 133414
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4623806476593018,
      "learning_rate": 0.0002262600231438734,
      "loss": 3.0496,
      "step": 133415
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1323165893554688,
      "learning_rate": 0.00022625605808460793,
      "loss": 2.9077,
      "step": 133416
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8459527492523193,
      "learning_rate": 0.0002262520930390529,
      "loss": 2.9766,
      "step": 133417
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8787550926208496,
      "learning_rate": 0.00022624812800720896,
      "loss": 2.9564,
      "step": 133418
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.270564079284668,
      "learning_rate": 0.00022624416298907692,
      "loss": 2.8829,
      "step": 133419
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.17242169380188,
      "learning_rate": 0.00022624019798465745,
      "loss": 2.9991,
      "step": 133420
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.113726854324341,
      "learning_rate": 0.00022623623299395144,
      "loss": 2.8344,
      "step": 133421
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0120623111724854,
      "learning_rate": 0.00022623226801695941,
      "loss": 3.0118,
      "step": 133422
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9952691793441772,
      "learning_rate": 0.00022622830305368217,
      "loss": 3.1052,
      "step": 133423
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.648214817047119,
      "learning_rate": 0.00022622433810412051,
      "loss": 2.9718,
      "step": 133424
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9340770244598389,
      "learning_rate": 0.0002262203731682751,
      "loss": 2.9656,
      "step": 133425
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2104110717773438,
      "learning_rate": 0.00022621640824614672,
      "loss": 3.0745,
      "step": 133426
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.0407791137695312,
      "learning_rate": 0.00022621244333773625,
      "loss": 3.0277,
      "step": 133427
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.868164300918579,
      "learning_rate": 0.00022620847844304415,
      "loss": 2.8898,
      "step": 133428
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2369508743286133,
      "learning_rate": 0.00022620451356207126,
      "loss": 2.9152,
      "step": 133429
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.419348955154419,
      "learning_rate": 0.0002262005486948184,
      "loss": 2.9155,
      "step": 133430
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1008052825927734,
      "learning_rate": 0.0002261965838412862,
      "loss": 2.9986,
      "step": 133431
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9895210266113281,
      "learning_rate": 0.00022619261900147546,
      "loss": 2.8649,
      "step": 133432
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8710049390792847,
      "learning_rate": 0.00022618865417538695,
      "loss": 2.8396,
      "step": 133433
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0563626289367676,
      "learning_rate": 0.0002261846893630214,
      "loss": 2.6275,
      "step": 133434
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.928420066833496,
      "learning_rate": 0.00022618072456437945,
      "loss": 3.0119,
      "step": 133435
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8234734535217285,
      "learning_rate": 0.00022617675977946186,
      "loss": 2.6157,
      "step": 133436
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.7210373878479004,
      "learning_rate": 0.00022617279500826944,
      "loss": 2.9871,
      "step": 133437
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.346330165863037,
      "learning_rate": 0.00022616883025080288,
      "loss": 2.9965,
      "step": 133438
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2382731437683105,
      "learning_rate": 0.00022616486550706296,
      "loss": 2.8491,
      "step": 133439
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.8481643199920654,
      "learning_rate": 0.00022616090077705038,
      "loss": 2.9402,
      "step": 133440
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9340707063674927,
      "learning_rate": 0.00022615693606076588,
      "loss": 3.06,
      "step": 133441
    },
    {
      "epoch": 1.74,
      "grad_norm": 4.887431621551514,
      "learning_rate": 0.0002261529713582102,
      "loss": 2.7902,
      "step": 133442
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0680267810821533,
      "learning_rate": 0.00022614900666938402,
      "loss": 3.0153,
      "step": 133443
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.122807741165161,
      "learning_rate": 0.00022614504199428817,
      "loss": 3.1514,
      "step": 133444
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9818288087844849,
      "learning_rate": 0.0002261410773329234,
      "loss": 2.7894,
      "step": 133445
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8545238971710205,
      "learning_rate": 0.00022613711268529033,
      "loss": 2.9331,
      "step": 133446
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.574834108352661,
      "learning_rate": 0.00022613314805138978,
      "loss": 3.4415,
      "step": 133447
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.6801726818084717,
      "learning_rate": 0.00022612918343122254,
      "loss": 2.9837,
      "step": 133448
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8922677040100098,
      "learning_rate": 0.00022612521882478922,
      "loss": 3.0639,
      "step": 133449
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.1529481410980225,
      "learning_rate": 0.00022612125423209057,
      "loss": 2.8163,
      "step": 133450
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1750502586364746,
      "learning_rate": 0.00022611728965312743,
      "loss": 2.8848,
      "step": 133451
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.7990655899047852,
      "learning_rate": 0.0002261133250879005,
      "loss": 2.9667,
      "step": 133452
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0404045581817627,
      "learning_rate": 0.00022610936053641043,
      "loss": 2.8415,
      "step": 133453
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2861499786376953,
      "learning_rate": 0.0002261053959986581,
      "loss": 2.9097,
      "step": 133454
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.58251690864563,
      "learning_rate": 0.00022610143147464414,
      "loss": 2.8656,
      "step": 133455
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8878567218780518,
      "learning_rate": 0.0002260974669643693,
      "loss": 3.0541,
      "step": 133456
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.601405382156372,
      "learning_rate": 0.00022609350246783434,
      "loss": 3.0294,
      "step": 133457
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.7083921432495117,
      "learning_rate": 0.00022608953798504007,
      "loss": 3.0001,
      "step": 133458
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.7900164127349854,
      "learning_rate": 0.00022608557351598706,
      "loss": 3.0092,
      "step": 133459
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.083991289138794,
      "learning_rate": 0.00022608160906067615,
      "loss": 3.0174,
      "step": 133460
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.637096881866455,
      "learning_rate": 0.00022607764461910808,
      "loss": 2.957,
      "step": 133461
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.7473152875900269,
      "learning_rate": 0.0002260736801912835,
      "loss": 2.9958,
      "step": 133462
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8413488864898682,
      "learning_rate": 0.00022606971577720337,
      "loss": 2.9123,
      "step": 133463
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8255199193954468,
      "learning_rate": 0.00022606575137686823,
      "loss": 3.0864,
      "step": 133464
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.875038743019104,
      "learning_rate": 0.0002260617869902788,
      "loss": 2.9496,
      "step": 133465
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3551433086395264,
      "learning_rate": 0.0002260578226174359,
      "loss": 3.1473,
      "step": 133466
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1677401065826416,
      "learning_rate": 0.00022605385825834022,
      "loss": 2.8592,
      "step": 133467
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9580438137054443,
      "learning_rate": 0.00022604989391299251,
      "loss": 2.9849,
      "step": 133468
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.787459135055542,
      "learning_rate": 0.00022604592958139357,
      "loss": 2.9473,
      "step": 133469
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.284799098968506,
      "learning_rate": 0.0002260419652635442,
      "loss": 3.0465,
      "step": 133470
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2845160961151123,
      "learning_rate": 0.00022603800095944486,
      "loss": 2.8784,
      "step": 133471
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2255876064300537,
      "learning_rate": 0.00022603403666909648,
      "loss": 3.1999,
      "step": 133472
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.904834508895874,
      "learning_rate": 0.0002260300723924998,
      "loss": 3.1547,
      "step": 133473
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1208934783935547,
      "learning_rate": 0.00022602610812965548,
      "loss": 3.165,
      "step": 133474
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.410137891769409,
      "learning_rate": 0.00022602214388056432,
      "loss": 2.8558,
      "step": 133475
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.824927806854248,
      "learning_rate": 0.00022601817964522714,
      "loss": 2.9113,
      "step": 133476
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.244786024093628,
      "learning_rate": 0.0002260142154236445,
      "loss": 2.9784,
      "step": 133477
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.195512533187866,
      "learning_rate": 0.00022601025121581718,
      "loss": 2.9062,
      "step": 133478
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1705925464630127,
      "learning_rate": 0.00022600628702174595,
      "loss": 3.0642,
      "step": 133479
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8085356950759888,
      "learning_rate": 0.00022600232284143156,
      "loss": 3.0686,
      "step": 133480
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2046263217926025,
      "learning_rate": 0.00022599835867487473,
      "loss": 3.0117,
      "step": 133481
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2541983127593994,
      "learning_rate": 0.00022599439452207634,
      "loss": 2.9688,
      "step": 133482
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2974300384521484,
      "learning_rate": 0.00022599043038303685,
      "loss": 3.1709,
      "step": 133483
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8392342329025269,
      "learning_rate": 0.00022598646625775714,
      "loss": 2.9349,
      "step": 133484
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8938099145889282,
      "learning_rate": 0.00022598250214623795,
      "loss": 2.8329,
      "step": 133485
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8199769258499146,
      "learning_rate": 0.00022597853804848,
      "loss": 3.0597,
      "step": 133486
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9317809343338013,
      "learning_rate": 0.00022597457396448403,
      "loss": 3.0251,
      "step": 133487
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.5156493186950684,
      "learning_rate": 0.00022597060989425094,
      "loss": 2.8611,
      "step": 133488
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.15315318107605,
      "learning_rate": 0.00022596664583778117,
      "loss": 3.0541,
      "step": 133489
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1914303302764893,
      "learning_rate": 0.00022596268179507557,
      "loss": 2.9517,
      "step": 133490
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3597424030303955,
      "learning_rate": 0.00022595871776613492,
      "loss": 2.5989,
      "step": 133491
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.370805263519287,
      "learning_rate": 0.00022595475375095995,
      "loss": 2.9084,
      "step": 133492
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1247177124023438,
      "learning_rate": 0.0002259507897495514,
      "loss": 3.0303,
      "step": 133493
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9196819067001343,
      "learning_rate": 0.0002259468257619101,
      "loss": 2.8508,
      "step": 133494
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2632925510406494,
      "learning_rate": 0.00022594286178803658,
      "loss": 3.0918,
      "step": 133495
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.945535659790039,
      "learning_rate": 0.00022593889782793167,
      "loss": 2.982,
      "step": 133496
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8497262001037598,
      "learning_rate": 0.00022593493388159611,
      "loss": 3.0965,
      "step": 133497
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.234891653060913,
      "learning_rate": 0.00022593096994903063,
      "loss": 3.093,
      "step": 133498
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.431772470474243,
      "learning_rate": 0.00022592700603023602,
      "loss": 2.8341,
      "step": 133499
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1291873455047607,
      "learning_rate": 0.0002259230421252131,
      "loss": 2.8416,
      "step": 133500
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.950058102607727,
      "learning_rate": 0.00022591907823396232,
      "loss": 2.9869,
      "step": 133501
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.6872916221618652,
      "learning_rate": 0.0002259151143564846,
      "loss": 2.9204,
      "step": 133502
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.022486925125122,
      "learning_rate": 0.00022591115049278064,
      "loss": 3.0508,
      "step": 133503
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.200615644454956,
      "learning_rate": 0.00022590718664285123,
      "loss": 2.7894,
      "step": 133504
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.040207862854004,
      "learning_rate": 0.00022590322280669706,
      "loss": 2.9833,
      "step": 133505
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.013176679611206,
      "learning_rate": 0.00022589925898431904,
      "loss": 3.1065,
      "step": 133506
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1122758388519287,
      "learning_rate": 0.00022589529517571757,
      "loss": 3.0116,
      "step": 133507
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.288651943206787,
      "learning_rate": 0.00022589133138089357,
      "loss": 2.7657,
      "step": 133508
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.572180986404419,
      "learning_rate": 0.00022588736759984778,
      "loss": 3.0298,
      "step": 133509
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1647915840148926,
      "learning_rate": 0.00022588340383258095,
      "loss": 3.0933,
      "step": 133510
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2111563682556152,
      "learning_rate": 0.00022587944007909378,
      "loss": 2.8916,
      "step": 133511
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1215813159942627,
      "learning_rate": 0.00022587547633938713,
      "loss": 2.9498,
      "step": 133512
    },
    {
      "epoch": 1.74,
      "grad_norm": 4.044262409210205,
      "learning_rate": 0.0002258715126134615,
      "loss": 2.8428,
      "step": 133513
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.6155316829681396,
      "learning_rate": 0.00022586754890131777,
      "loss": 2.9715,
      "step": 133514
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.884843349456787,
      "learning_rate": 0.00022586358520295665,
      "loss": 2.8575,
      "step": 133515
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.89926016330719,
      "learning_rate": 0.0002258596215183789,
      "loss": 2.6251,
      "step": 133516
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.952445387840271,
      "learning_rate": 0.00022585565784758526,
      "loss": 3.0274,
      "step": 133517
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.5223870277404785,
      "learning_rate": 0.00022585169419057642,
      "loss": 3.2317,
      "step": 133518
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.422769784927368,
      "learning_rate": 0.0002258477305473533,
      "loss": 2.9136,
      "step": 133519
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2103958129882812,
      "learning_rate": 0.00022584376691791634,
      "loss": 2.9889,
      "step": 133520
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1554665565490723,
      "learning_rate": 0.00022583980330226645,
      "loss": 2.8462,
      "step": 133521
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.177046060562134,
      "learning_rate": 0.00022583583970040434,
      "loss": 3.0325,
      "step": 133522
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.7069164514541626,
      "learning_rate": 0.0002258318761123307,
      "loss": 2.9337,
      "step": 133523
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0593090057373047,
      "learning_rate": 0.00022582791253804635,
      "loss": 3.087,
      "step": 133524
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.1827781200408936,
      "learning_rate": 0.00022582394897755205,
      "loss": 2.9307,
      "step": 133525
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0317883491516113,
      "learning_rate": 0.0002258199854308485,
      "loss": 3.02,
      "step": 133526
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.7749462127685547,
      "learning_rate": 0.00022581602189793634,
      "loss": 3.0356,
      "step": 133527
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2615654468536377,
      "learning_rate": 0.00022581205837881636,
      "loss": 3.0734,
      "step": 133528
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4566617012023926,
      "learning_rate": 0.00022580809487348933,
      "loss": 3.0506,
      "step": 133529
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1314918994903564,
      "learning_rate": 0.00022580413138195596,
      "loss": 2.9977,
      "step": 133530
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.6659668684005737,
      "learning_rate": 0.0002258001679042171,
      "loss": 3.0997,
      "step": 133531
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.103173017501831,
      "learning_rate": 0.0002257962044402733,
      "loss": 2.9847,
      "step": 133532
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2433507442474365,
      "learning_rate": 0.0002257922409901255,
      "loss": 2.838,
      "step": 133533
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1777288913726807,
      "learning_rate": 0.00022578827755377422,
      "loss": 2.8325,
      "step": 133534
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3714253902435303,
      "learning_rate": 0.00022578431413122028,
      "loss": 2.8331,
      "step": 133535
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.066220998764038,
      "learning_rate": 0.00022578035072246447,
      "loss": 3.4486,
      "step": 133536
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.52699875831604,
      "learning_rate": 0.00022577638732750754,
      "loss": 2.8849,
      "step": 133537
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8895620107650757,
      "learning_rate": 0.0002257724239463501,
      "loss": 3.1366,
      "step": 133538
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.5375313758850098,
      "learning_rate": 0.000225768460578993,
      "loss": 3.1275,
      "step": 133539
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.11773681640625,
      "learning_rate": 0.000225764497225437,
      "loss": 3.0239,
      "step": 133540
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4845492839813232,
      "learning_rate": 0.00022576053388568269,
      "loss": 2.9403,
      "step": 133541
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.202857255935669,
      "learning_rate": 0.00022575657055973094,
      "loss": 3.0107,
      "step": 133542
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3423662185668945,
      "learning_rate": 0.0002257526072475825,
      "loss": 2.7554,
      "step": 133543
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.9952831268310547,
      "learning_rate": 0.000225748643949238,
      "loss": 2.8975,
      "step": 133544
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.287540912628174,
      "learning_rate": 0.0002257446806646982,
      "loss": 2.9206,
      "step": 133545
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.5045597553253174,
      "learning_rate": 0.00022574071739396386,
      "loss": 2.9582,
      "step": 133546
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.5754621028900146,
      "learning_rate": 0.00022573675413703579,
      "loss": 2.8264,
      "step": 133547
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.712400197982788,
      "learning_rate": 0.0002257327908939146,
      "loss": 2.8635,
      "step": 133548
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.21418833732605,
      "learning_rate": 0.00022572882766460118,
      "loss": 3.0187,
      "step": 133549
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.8464620113372803,
      "learning_rate": 0.00022572486444909605,
      "loss": 2.8259,
      "step": 133550
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2034077644348145,
      "learning_rate": 0.00022572090124740014,
      "loss": 2.7278,
      "step": 133551
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.124375820159912,
      "learning_rate": 0.00022571693805951406,
      "loss": 2.9364,
      "step": 133552
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1007442474365234,
      "learning_rate": 0.00022571297488543863,
      "loss": 3.0614,
      "step": 133553
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.19909405708313,
      "learning_rate": 0.00022570901172517456,
      "loss": 3.1599,
      "step": 133554
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3131461143493652,
      "learning_rate": 0.0002257050485787227,
      "loss": 2.9204,
      "step": 133555
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.313140392303467,
      "learning_rate": 0.00022570108544608356,
      "loss": 3.126,
      "step": 133556
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0672190189361572,
      "learning_rate": 0.00022569712232725796,
      "loss": 2.9331,
      "step": 133557
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1111998558044434,
      "learning_rate": 0.00022569315922224672,
      "loss": 3.0516,
      "step": 133558
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8251752853393555,
      "learning_rate": 0.0002256891961310505,
      "loss": 2.9754,
      "step": 133559
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.6582369804382324,
      "learning_rate": 0.00022568523305367002,
      "loss": 2.8003,
      "step": 133560
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.488192558288574,
      "learning_rate": 0.00022568126999010626,
      "loss": 3.0495,
      "step": 133561
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0330660343170166,
      "learning_rate": 0.00022567730694035958,
      "loss": 2.9205,
      "step": 133562
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.242715358734131,
      "learning_rate": 0.0002256733439044309,
      "loss": 2.8323,
      "step": 133563
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.943690776824951,
      "learning_rate": 0.00022566938088232097,
      "loss": 2.8521,
      "step": 133564
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1192708015441895,
      "learning_rate": 0.0002256654178740305,
      "loss": 2.8799,
      "step": 133565
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.328313112258911,
      "learning_rate": 0.0002256614548795602,
      "loss": 3.1167,
      "step": 133566
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.25772762298584,
      "learning_rate": 0.000225657491898911,
      "loss": 3.0266,
      "step": 133567
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.1418745517730713,
      "learning_rate": 0.00022565352893208333,
      "loss": 2.9847,
      "step": 133568
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0682129859924316,
      "learning_rate": 0.00022564956597907808,
      "loss": 2.9535,
      "step": 133569
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.1838090419769287,
      "learning_rate": 0.00022564560303989597,
      "loss": 2.7523,
      "step": 133570
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.142902374267578,
      "learning_rate": 0.00022564164011453777,
      "loss": 2.8372,
      "step": 133571
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2598235607147217,
      "learning_rate": 0.00022563767720300412,
      "loss": 3.099,
      "step": 133572
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1407132148742676,
      "learning_rate": 0.00022563371430529605,
      "loss": 3.0745,
      "step": 133573
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.844299554824829,
      "learning_rate": 0.0002256297514214139,
      "loss": 3.2864,
      "step": 133574
    },
    {
      "epoch": 1.74,
      "grad_norm": 4.442634582519531,
      "learning_rate": 0.0002256257885513586,
      "loss": 2.8643,
      "step": 133575
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4406745433807373,
      "learning_rate": 0.00022562182569513085,
      "loss": 3.1226,
      "step": 133576
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1258676052093506,
      "learning_rate": 0.00022561786285273144,
      "loss": 2.8988,
      "step": 133577
    },
    {
      "epoch": 1.74,
      "grad_norm": 4.8741326332092285,
      "learning_rate": 0.00022561390002416104,
      "loss": 2.962,
      "step": 133578
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2827064990997314,
      "learning_rate": 0.00022560993720942055,
      "loss": 2.9105,
      "step": 133579
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0127170085906982,
      "learning_rate": 0.00022560597440851045,
      "loss": 2.9648,
      "step": 133580
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.7958142757415771,
      "learning_rate": 0.0002256020116214316,
      "loss": 2.8172,
      "step": 133581
    },
    {
      "epoch": 1.74,
      "grad_norm": 4.189291477203369,
      "learning_rate": 0.00022559804884818475,
      "loss": 2.9182,
      "step": 133582
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.579510450363159,
      "learning_rate": 0.00022559408608877066,
      "loss": 2.9393,
      "step": 133583
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.44170880317688,
      "learning_rate": 0.00022559012334318998,
      "loss": 3.0635,
      "step": 133584
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8261197805404663,
      "learning_rate": 0.00022558616061144353,
      "loss": 3.018,
      "step": 133585
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.444467782974243,
      "learning_rate": 0.00022558219789353215,
      "loss": 3.0881,
      "step": 133586
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.5595359802246094,
      "learning_rate": 0.0002255782351894563,
      "loss": 3.1267,
      "step": 133587
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.234706401824951,
      "learning_rate": 0.00022557427249921685,
      "loss": 3.1155,
      "step": 133588
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0627996921539307,
      "learning_rate": 0.0002255703098228146,
      "loss": 3.3125,
      "step": 133589
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.152153968811035,
      "learning_rate": 0.00022556634716025016,
      "loss": 2.976,
      "step": 133590
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0536134243011475,
      "learning_rate": 0.0002255623845115244,
      "loss": 2.9549,
      "step": 133591
    },
    {
      "epoch": 1.74,
      "grad_norm": 4.34507942199707,
      "learning_rate": 0.00022555842187663814,
      "loss": 2.92,
      "step": 133592
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9065808057785034,
      "learning_rate": 0.00022555445925559177,
      "loss": 2.8825,
      "step": 133593
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.862653374671936,
      "learning_rate": 0.0002255504966483863,
      "loss": 3.0014,
      "step": 133594
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.7374801635742188,
      "learning_rate": 0.00022554653405502237,
      "loss": 2.9071,
      "step": 133595
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.350719451904297,
      "learning_rate": 0.00022554257147550073,
      "loss": 2.8859,
      "step": 133596
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.92862606048584,
      "learning_rate": 0.00022553860890982215,
      "loss": 2.8941,
      "step": 133597
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.048117160797119,
      "learning_rate": 0.00022553464635798748,
      "loss": 2.9087,
      "step": 133598
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.825915813446045,
      "learning_rate": 0.00022553068381999723,
      "loss": 3.1963,
      "step": 133599
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.0975334644317627,
      "learning_rate": 0.00022552672129585213,
      "loss": 2.9577,
      "step": 133600
    },
    {
      "epoch": 1.74,
      "grad_norm": 5.298733711242676,
      "learning_rate": 0.00022552275878555313,
      "loss": 3.0128,
      "step": 133601
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2112321853637695,
      "learning_rate": 0.00022551879628910076,
      "loss": 2.8144,
      "step": 133602
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.146592617034912,
      "learning_rate": 0.00022551483380649594,
      "loss": 2.7518,
      "step": 133603
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.511603355407715,
      "learning_rate": 0.00022551087133773937,
      "loss": 2.8674,
      "step": 133604
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.624074697494507,
      "learning_rate": 0.00022550690888283167,
      "loss": 2.8516,
      "step": 133605
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.0707175731658936,
      "learning_rate": 0.0002255029464417736,
      "loss": 2.7701,
      "step": 133606
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3878750801086426,
      "learning_rate": 0.00022549898401456593,
      "loss": 3.1279,
      "step": 133607
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.780261754989624,
      "learning_rate": 0.00022549502160120944,
      "loss": 2.894,
      "step": 133608
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.2130587100982666,
      "learning_rate": 0.00022549105920170484,
      "loss": 2.7702,
      "step": 133609
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.4935145378112793,
      "learning_rate": 0.00022548709681605294,
      "loss": 2.9067,
      "step": 133610
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2261643409729004,
      "learning_rate": 0.00022548313444425432,
      "loss": 2.5508,
      "step": 133611
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.031169891357422,
      "learning_rate": 0.00022547917208630975,
      "loss": 3.123,
      "step": 133612
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.966593027114868,
      "learning_rate": 0.00022547520974222003,
      "loss": 3.0078,
      "step": 133613
    },
    {
      "epoch": 1.74,
      "grad_norm": 4.031469345092773,
      "learning_rate": 0.00022547124741198587,
      "loss": 3.1151,
      "step": 133614
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.3884286880493164,
      "learning_rate": 0.00022546728509560798,
      "loss": 2.9458,
      "step": 133615
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0025382041931152,
      "learning_rate": 0.00022546332279308726,
      "loss": 3.1742,
      "step": 133616
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.007110834121704,
      "learning_rate": 0.00022545936050442426,
      "loss": 3.0165,
      "step": 133617
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2267963886260986,
      "learning_rate": 0.00022545539822961975,
      "loss": 3.1185,
      "step": 133618
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.5038604736328125,
      "learning_rate": 0.00022545143596867443,
      "loss": 2.9139,
      "step": 133619
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.872563123703003,
      "learning_rate": 0.00022544747372158912,
      "loss": 3.0165,
      "step": 133620
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.5645294189453125,
      "learning_rate": 0.00022544351148836458,
      "loss": 2.9743,
      "step": 133621
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.3187649250030518,
      "learning_rate": 0.0002254395492690015,
      "loss": 3.005,
      "step": 133622
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9964120388031006,
      "learning_rate": 0.00022543558706350058,
      "loss": 2.8454,
      "step": 133623
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.625318765640259,
      "learning_rate": 0.0002254316248718626,
      "loss": 2.9044,
      "step": 133624
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.7110886573791504,
      "learning_rate": 0.00022542766269408832,
      "loss": 2.9595,
      "step": 133625
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.327538013458252,
      "learning_rate": 0.00022542370053017842,
      "loss": 2.8062,
      "step": 133626
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2724199295043945,
      "learning_rate": 0.00022541973838013362,
      "loss": 2.9939,
      "step": 133627
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.225707530975342,
      "learning_rate": 0.00022541577624395483,
      "loss": 3.1006,
      "step": 133628
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8503975868225098,
      "learning_rate": 0.0002254118141216425,
      "loss": 2.9275,
      "step": 133629
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1552858352661133,
      "learning_rate": 0.00022540785201319758,
      "loss": 3.1199,
      "step": 133630
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8500272035598755,
      "learning_rate": 0.00022540388991862075,
      "loss": 3.1183,
      "step": 133631
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3998310565948486,
      "learning_rate": 0.0002253999278379128,
      "loss": 2.9109,
      "step": 133632
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.195810556411743,
      "learning_rate": 0.00022539596577107434,
      "loss": 2.9176,
      "step": 133633
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9077873229980469,
      "learning_rate": 0.00022539200371810624,
      "loss": 2.9138,
      "step": 133634
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.041086435317993,
      "learning_rate": 0.0002253880416790091,
      "loss": 2.6907,
      "step": 133635
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.007974624633789,
      "learning_rate": 0.00022538407965378375,
      "loss": 3.043,
      "step": 133636
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.245624303817749,
      "learning_rate": 0.00022538011764243094,
      "loss": 2.7931,
      "step": 133637
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1904962062835693,
      "learning_rate": 0.0002253761556449513,
      "loss": 3.0633,
      "step": 133638
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2255828380584717,
      "learning_rate": 0.00022537219366134577,
      "loss": 2.9696,
      "step": 133639
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0050644874572754,
      "learning_rate": 0.00022536823169161496,
      "loss": 2.94,
      "step": 133640
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.171088695526123,
      "learning_rate": 0.0002253642697357595,
      "loss": 3.0583,
      "step": 133641
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9530858993530273,
      "learning_rate": 0.00022536030779378023,
      "loss": 2.8152,
      "step": 133642
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.448007583618164,
      "learning_rate": 0.0002253563458656779,
      "loss": 3.041,
      "step": 133643
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.625441312789917,
      "learning_rate": 0.00022535238395145327,
      "loss": 2.8663,
      "step": 133644
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.312634229660034,
      "learning_rate": 0.00022534842205110698,
      "loss": 2.7593,
      "step": 133645
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.321370840072632,
      "learning_rate": 0.00022534446016464,
      "loss": 2.897,
      "step": 133646
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.249804973602295,
      "learning_rate": 0.00022534049829205273,
      "loss": 2.8423,
      "step": 133647
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.741140604019165,
      "learning_rate": 0.0002253365364333461,
      "loss": 3.0104,
      "step": 133648
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1309447288513184,
      "learning_rate": 0.0002253325745885208,
      "loss": 2.8829,
      "step": 133649
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2794675827026367,
      "learning_rate": 0.0002253286127575776,
      "loss": 3.2929,
      "step": 133650
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.695305585861206,
      "learning_rate": 0.0002253246509405172,
      "loss": 3.025,
      "step": 133651
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9147101640701294,
      "learning_rate": 0.00022532068913734036,
      "loss": 2.8833,
      "step": 133652
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9741456508636475,
      "learning_rate": 0.00022531672734804798,
      "loss": 3.0985,
      "step": 133653
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.1710453033447266,
      "learning_rate": 0.00022531276557264047,
      "loss": 2.7421,
      "step": 133654
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.6973743438720703,
      "learning_rate": 0.00022530880381111873,
      "loss": 3.0357,
      "step": 133655
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9816685914993286,
      "learning_rate": 0.00022530484206348349,
      "loss": 2.9981,
      "step": 133656
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.694988965988159,
      "learning_rate": 0.0002253008803297355,
      "loss": 2.8796,
      "step": 133657
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0929667949676514,
      "learning_rate": 0.0002252969186098755,
      "loss": 2.8138,
      "step": 133658
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.208073616027832,
      "learning_rate": 0.00022529295690390427,
      "loss": 3.054,
      "step": 133659
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9358186721801758,
      "learning_rate": 0.00022528899521182243,
      "loss": 3.0495,
      "step": 133660
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8673962354660034,
      "learning_rate": 0.00022528503353363074,
      "loss": 2.7575,
      "step": 133661
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8070374727249146,
      "learning_rate": 0.00022528107186933,
      "loss": 3.1031,
      "step": 133662
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4373703002929688,
      "learning_rate": 0.00022527711021892092,
      "loss": 2.9026,
      "step": 133663
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0852837562561035,
      "learning_rate": 0.00022527314858240423,
      "loss": 2.8321,
      "step": 133664
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.093566656112671,
      "learning_rate": 0.0002252691869597808,
      "loss": 2.97,
      "step": 133665
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.424971342086792,
      "learning_rate": 0.0002252652253510511,
      "loss": 3.1963,
      "step": 133666
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4861154556274414,
      "learning_rate": 0.00022526126375621602,
      "loss": 2.763,
      "step": 133667
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1537582874298096,
      "learning_rate": 0.00022525730217527626,
      "loss": 3.0183,
      "step": 133668
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0765414237976074,
      "learning_rate": 0.00022525334060823258,
      "loss": 2.9371,
      "step": 133669
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.979076862335205,
      "learning_rate": 0.00022524937905508573,
      "loss": 2.9022,
      "step": 133670
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8370044231414795,
      "learning_rate": 0.00022524541751583655,
      "loss": 2.9992,
      "step": 133671
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.794729471206665,
      "learning_rate": 0.00022524145599048554,
      "loss": 3.2222,
      "step": 133672
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4456567764282227,
      "learning_rate": 0.00022523749447903355,
      "loss": 3.0935,
      "step": 133673
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9547327756881714,
      "learning_rate": 0.00022523353298148133,
      "loss": 2.8789,
      "step": 133674
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.667731523513794,
      "learning_rate": 0.0002252295714978296,
      "loss": 3.1696,
      "step": 133675
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.166403293609619,
      "learning_rate": 0.00022522561002807912,
      "loss": 2.9173,
      "step": 133676
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.229377508163452,
      "learning_rate": 0.00022522164857223073,
      "loss": 2.7421,
      "step": 133677
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.110301971435547,
      "learning_rate": 0.00022521768713028488,
      "loss": 3.0467,
      "step": 133678
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.0711300373077393,
      "learning_rate": 0.0002252137257022425,
      "loss": 3.0171,
      "step": 133679
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.7788186073303223,
      "learning_rate": 0.00022520976428810433,
      "loss": 2.9648,
      "step": 133680
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.781676173210144,
      "learning_rate": 0.00022520580288787104,
      "loss": 2.8565,
      "step": 133681
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2420732975006104,
      "learning_rate": 0.0002252018415015434,
      "loss": 3.0914,
      "step": 133682
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3869729042053223,
      "learning_rate": 0.0002251978801291223,
      "loss": 2.8174,
      "step": 133683
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.0109646320343018,
      "learning_rate": 0.0002251939187706082,
      "loss": 2.8866,
      "step": 133684
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2227163314819336,
      "learning_rate": 0.00022518995742600197,
      "loss": 2.9736,
      "step": 133685
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.114717483520508,
      "learning_rate": 0.0002251859960953043,
      "loss": 2.9774,
      "step": 133686
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.5592076778411865,
      "learning_rate": 0.00022518203477851596,
      "loss": 2.5657,
      "step": 133687
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1081459522247314,
      "learning_rate": 0.00022517807347563774,
      "loss": 2.9977,
      "step": 133688
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.5557289123535156,
      "learning_rate": 0.00022517411218667043,
      "loss": 3.0305,
      "step": 133689
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.635859251022339,
      "learning_rate": 0.00022517015091161457,
      "loss": 2.772,
      "step": 133690
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9988212585449219,
      "learning_rate": 0.00022516618965047092,
      "loss": 2.9607,
      "step": 133691
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.953937292098999,
      "learning_rate": 0.00022516222840324036,
      "loss": 2.8729,
      "step": 133692
    },
    {
      "epoch": 1.74,
      "grad_norm": 4.45883846282959,
      "learning_rate": 0.00022515826716992354,
      "loss": 2.7819,
      "step": 133693
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3974344730377197,
      "learning_rate": 0.00022515430595052116,
      "loss": 3.2267,
      "step": 133694
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8986910581588745,
      "learning_rate": 0.00022515034474503422,
      "loss": 2.85,
      "step": 133695
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.952331304550171,
      "learning_rate": 0.00022514638355346307,
      "loss": 3.0287,
      "step": 133696
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.122663974761963,
      "learning_rate": 0.0002251424223758086,
      "loss": 3.0475,
      "step": 133697
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4408376216888428,
      "learning_rate": 0.0002251384612120716,
      "loss": 3.3258,
      "step": 133698
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.8333046436309814,
      "learning_rate": 0.00022513450006225274,
      "loss": 2.9765,
      "step": 133699
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8493635654449463,
      "learning_rate": 0.0002251305389263528,
      "loss": 2.8258,
      "step": 133700
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0900192260742188,
      "learning_rate": 0.0002251265778043726,
      "loss": 3.0143,
      "step": 133701
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.1395792961120605,
      "learning_rate": 0.00022512261669631274,
      "loss": 2.9725,
      "step": 133702
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.2375175952911377,
      "learning_rate": 0.00022511865560217396,
      "loss": 2.9818,
      "step": 133703
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.050276517868042,
      "learning_rate": 0.00022511469452195704,
      "loss": 2.9579,
      "step": 133704
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.065389633178711,
      "learning_rate": 0.0002251107334556627,
      "loss": 3.1864,
      "step": 133705
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.408449411392212,
      "learning_rate": 0.0002251067724032917,
      "loss": 2.8566,
      "step": 133706
    },
    {
      "epoch": 1.74,
      "grad_norm": 4.156426429748535,
      "learning_rate": 0.00022510281136484483,
      "loss": 3.2765,
      "step": 133707
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.3278160095214844,
      "learning_rate": 0.00022509885034032267,
      "loss": 3.1128,
      "step": 133708
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.5181610584259033,
      "learning_rate": 0.00022509488932972614,
      "loss": 2.8614,
      "step": 133709
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3091585636138916,
      "learning_rate": 0.0002250909283330558,
      "loss": 3.0573,
      "step": 133710
    },
    {
      "epoch": 1.74,
      "grad_norm": 4.212341785430908,
      "learning_rate": 0.00022508696735031248,
      "loss": 2.9298,
      "step": 133711
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.799956798553467,
      "learning_rate": 0.00022508300638149692,
      "loss": 2.8309,
      "step": 133712
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0645751953125,
      "learning_rate": 0.00022507904542660987,
      "loss": 2.9291,
      "step": 133713
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.6466009616851807,
      "learning_rate": 0.00022507508448565202,
      "loss": 2.976,
      "step": 133714
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.8774943351745605,
      "learning_rate": 0.0002250711235586241,
      "loss": 2.8387,
      "step": 133715
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0274884700775146,
      "learning_rate": 0.00022506716264552687,
      "loss": 3.0292,
      "step": 133716
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0796542167663574,
      "learning_rate": 0.00022506320174636114,
      "loss": 2.9264,
      "step": 133717
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1629419326782227,
      "learning_rate": 0.0002250592408611275,
      "loss": 2.8791,
      "step": 133718
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.7326788902282715,
      "learning_rate": 0.0002250552799898268,
      "loss": 2.9688,
      "step": 133719
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9150053262710571,
      "learning_rate": 0.00022505131913245978,
      "loss": 2.9149,
      "step": 133720
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0270421504974365,
      "learning_rate": 0.00022504735828902706,
      "loss": 3.0444,
      "step": 133721
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.477485418319702,
      "learning_rate": 0.00022504339745952945,
      "loss": 3.0103,
      "step": 133722
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.187203884124756,
      "learning_rate": 0.00022503943664396768,
      "loss": 2.9245,
      "step": 133723
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1624369621276855,
      "learning_rate": 0.0002250354758423426,
      "loss": 2.8837,
      "step": 133724
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.425549030303955,
      "learning_rate": 0.00022503151505465474,
      "loss": 3.2085,
      "step": 133725
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.926071047782898,
      "learning_rate": 0.00022502755428090499,
      "loss": 2.8753,
      "step": 133726
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.110316038131714,
      "learning_rate": 0.000225023593521094,
      "loss": 2.8886,
      "step": 133727
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.120922327041626,
      "learning_rate": 0.00022501963277522251,
      "loss": 3.1794,
      "step": 133728
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8870576620101929,
      "learning_rate": 0.00022501567204329134,
      "loss": 3.0305,
      "step": 133729
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.455085277557373,
      "learning_rate": 0.0002250117113253011,
      "loss": 2.8356,
      "step": 133730
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2119972705841064,
      "learning_rate": 0.0002250077506212527,
      "loss": 2.9994,
      "step": 133731
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.352276563644409,
      "learning_rate": 0.0002250037899311468,
      "loss": 3.0614,
      "step": 133732
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.454721450805664,
      "learning_rate": 0.000224999829254984,
      "loss": 3.0191,
      "step": 133733
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3647565841674805,
      "learning_rate": 0.0002249958685927652,
      "loss": 2.7669,
      "step": 133734
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.270146608352661,
      "learning_rate": 0.00022499190794449104,
      "loss": 3.0453,
      "step": 133735
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.298511028289795,
      "learning_rate": 0.00022498794731016229,
      "loss": 3.1439,
      "step": 133736
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.107059955596924,
      "learning_rate": 0.0002249839866897797,
      "loss": 2.9574,
      "step": 133737
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.9923019409179688,
      "learning_rate": 0.00022498002608334417,
      "loss": 2.9466,
      "step": 133738
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1870861053466797,
      "learning_rate": 0.00022497606549085611,
      "loss": 2.8857,
      "step": 133739
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.0101523399353027,
      "learning_rate": 0.00022497210491231642,
      "loss": 2.9741,
      "step": 133740
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.6515724658966064,
      "learning_rate": 0.0002249681443477259,
      "loss": 3.0267,
      "step": 133741
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.055574893951416,
      "learning_rate": 0.0002249641837970851,
      "loss": 3.1353,
      "step": 133742
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.7500951290130615,
      "learning_rate": 0.00022496022326039497,
      "loss": 3.1169,
      "step": 133743
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.542137861251831,
      "learning_rate": 0.00022495626273765618,
      "loss": 3.003,
      "step": 133744
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.649296283721924,
      "learning_rate": 0.0002249523022288694,
      "loss": 3.1969,
      "step": 133745
    },
    {
      "epoch": 1.74,
      "grad_norm": 4.4167938232421875,
      "learning_rate": 0.00022494834173403537,
      "loss": 2.9305,
      "step": 133746
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0971925258636475,
      "learning_rate": 0.00022494438125315487,
      "loss": 2.8311,
      "step": 133747
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.7265924215316772,
      "learning_rate": 0.0002249404207862286,
      "loss": 2.9588,
      "step": 133748
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.556720733642578,
      "learning_rate": 0.00022493646033325734,
      "loss": 2.824,
      "step": 133749
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0422403812408447,
      "learning_rate": 0.00022493249989424195,
      "loss": 3.0593,
      "step": 133750
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.1658358573913574,
      "learning_rate": 0.0002249285394691829,
      "loss": 2.9179,
      "step": 133751
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.026972532272339,
      "learning_rate": 0.00022492457905808104,
      "loss": 3.0422,
      "step": 133752
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.823157548904419,
      "learning_rate": 0.00022492061866093708,
      "loss": 3.0961,
      "step": 133753
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.2711570262908936,
      "learning_rate": 0.00022491665827775183,
      "loss": 2.9865,
      "step": 133754
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.020808696746826,
      "learning_rate": 0.00022491269790852602,
      "loss": 3.0513,
      "step": 133755
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.261876344680786,
      "learning_rate": 0.00022490873755326045,
      "loss": 3.0694,
      "step": 133756
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.335278034210205,
      "learning_rate": 0.0002249047772119556,
      "loss": 2.8873,
      "step": 133757
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.8972830772399902,
      "learning_rate": 0.0002249008168846124,
      "loss": 2.9926,
      "step": 133758
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3239805698394775,
      "learning_rate": 0.00022489685657123157,
      "loss": 2.8224,
      "step": 133759
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.205587863922119,
      "learning_rate": 0.00022489289627181385,
      "loss": 3.1259,
      "step": 133760
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.4685468673706055,
      "learning_rate": 0.00022488893598635994,
      "loss": 3.0585,
      "step": 133761
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.136904001235962,
      "learning_rate": 0.00022488497571487073,
      "loss": 2.7902,
      "step": 133762
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.000500202178955,
      "learning_rate": 0.00022488101545734666,
      "loss": 3.0124,
      "step": 133763
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0838422775268555,
      "learning_rate": 0.00022487705521378866,
      "loss": 3.0206,
      "step": 133764
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8751417398452759,
      "learning_rate": 0.00022487309498419743,
      "loss": 3.1391,
      "step": 133765
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9937015771865845,
      "learning_rate": 0.00022486913476857368,
      "loss": 3.275,
      "step": 133766
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.903709888458252,
      "learning_rate": 0.00022486517456691822,
      "loss": 3.0185,
      "step": 133767
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1741673946380615,
      "learning_rate": 0.00022486121437923184,
      "loss": 2.9902,
      "step": 133768
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.7952661514282227,
      "learning_rate": 0.00022485725420551506,
      "loss": 2.8306,
      "step": 133769
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.684863567352295,
      "learning_rate": 0.00022485329404576873,
      "loss": 2.9168,
      "step": 133770
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2181591987609863,
      "learning_rate": 0.00022484933389999357,
      "loss": 2.9268,
      "step": 133771
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.7695006132125854,
      "learning_rate": 0.00022484537376819035,
      "loss": 3.0885,
      "step": 133772
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.7594528198242188,
      "learning_rate": 0.00022484141365035983,
      "loss": 2.979,
      "step": 133773
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.962014675140381,
      "learning_rate": 0.0002248374535465028,
      "loss": 3.1357,
      "step": 133774
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.5178399085998535,
      "learning_rate": 0.00022483349345661982,
      "loss": 2.8567,
      "step": 133775
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.332214832305908,
      "learning_rate": 0.00022482953338071165,
      "loss": 2.691,
      "step": 133776
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.304154396057129,
      "learning_rate": 0.00022482557331877915,
      "loss": 3.0436,
      "step": 133777
    },
    {
      "epoch": 1.74,
      "grad_norm": 4.662277698516846,
      "learning_rate": 0.00022482161327082295,
      "loss": 2.8436,
      "step": 133778
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.934805154800415,
      "learning_rate": 0.00022481765323684386,
      "loss": 2.8967,
      "step": 133779
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.176835060119629,
      "learning_rate": 0.00022481369321684268,
      "loss": 3.0655,
      "step": 133780
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8888734579086304,
      "learning_rate": 0.00022480973321082,
      "loss": 2.7919,
      "step": 133781
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3468000888824463,
      "learning_rate": 0.00022480577321877654,
      "loss": 3.0033,
      "step": 133782
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.6217002868652344,
      "learning_rate": 0.0002248018132407131,
      "loss": 2.9174,
      "step": 133783
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.606614351272583,
      "learning_rate": 0.00022479785327663047,
      "loss": 2.7404,
      "step": 133784
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2382960319519043,
      "learning_rate": 0.0002247938933265293,
      "loss": 3.0069,
      "step": 133785
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.6457314491271973,
      "learning_rate": 0.0002247899333904104,
      "loss": 2.9728,
      "step": 133786
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.6624858379364014,
      "learning_rate": 0.00022478597346827457,
      "loss": 2.8512,
      "step": 133787
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.338070869445801,
      "learning_rate": 0.00022478201356012235,
      "loss": 2.8518,
      "step": 133788
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.897225856781006,
      "learning_rate": 0.00022477805366595454,
      "loss": 2.7968,
      "step": 133789
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.7492480278015137,
      "learning_rate": 0.00022477409378577191,
      "loss": 2.9883,
      "step": 133790
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.5181972980499268,
      "learning_rate": 0.00022477013391957522,
      "loss": 3.1584,
      "step": 133791
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8417376279830933,
      "learning_rate": 0.00022476617406736517,
      "loss": 3.1852,
      "step": 133792
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.204129457473755,
      "learning_rate": 0.00022476221422914258,
      "loss": 2.8391,
      "step": 133793
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.853135824203491,
      "learning_rate": 0.0002247582544049081,
      "loss": 2.8794,
      "step": 133794
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.7492785453796387,
      "learning_rate": 0.00022475429459466244,
      "loss": 2.6578,
      "step": 133795
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.043672800064087,
      "learning_rate": 0.00022475033479840635,
      "loss": 2.9717,
      "step": 133796
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.63299822807312,
      "learning_rate": 0.0002247463750161406,
      "loss": 3.0302,
      "step": 133797
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9266914129257202,
      "learning_rate": 0.0002247424152478659,
      "loss": 2.8906,
      "step": 133798
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.043198823928833,
      "learning_rate": 0.0002247384554935831,
      "loss": 2.8541,
      "step": 133799
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.549755334854126,
      "learning_rate": 0.00022473449575329278,
      "loss": 2.9473,
      "step": 133800
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8513758182525635,
      "learning_rate": 0.0002247305360269958,
      "loss": 2.7969,
      "step": 133801
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.966402292251587,
      "learning_rate": 0.0002247265763146928,
      "loss": 2.9818,
      "step": 133802
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.6827707290649414,
      "learning_rate": 0.0002247226166163845,
      "loss": 3.1438,
      "step": 133803
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.323093891143799,
      "learning_rate": 0.00022471865693207167,
      "loss": 2.9456,
      "step": 133804
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1544578075408936,
      "learning_rate": 0.00022471469726175516,
      "loss": 3.2502,
      "step": 133805
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.6419529914855957,
      "learning_rate": 0.00022471073760543553,
      "loss": 2.8398,
      "step": 133806
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.143784284591675,
      "learning_rate": 0.00022470677796311366,
      "loss": 2.8599,
      "step": 133807
    },
    {
      "epoch": 1.74,
      "grad_norm": 4.8544158935546875,
      "learning_rate": 0.00022470281833479017,
      "loss": 2.943,
      "step": 133808
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2747154235839844,
      "learning_rate": 0.0002246988587204659,
      "loss": 3.0058,
      "step": 133809
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4021854400634766,
      "learning_rate": 0.00022469489912014149,
      "loss": 3.0675,
      "step": 133810
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.7788400650024414,
      "learning_rate": 0.00022469093953381776,
      "loss": 3.0295,
      "step": 133811
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3428890705108643,
      "learning_rate": 0.00022468697996149534,
      "loss": 3.2864,
      "step": 133812
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.60145902633667,
      "learning_rate": 0.00022468302040317504,
      "loss": 3.0823,
      "step": 133813
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.6552438735961914,
      "learning_rate": 0.00022467906085885762,
      "loss": 2.8743,
      "step": 133814
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4508097171783447,
      "learning_rate": 0.00022467510132854373,
      "loss": 2.9049,
      "step": 133815
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.558631658554077,
      "learning_rate": 0.00022467114181223428,
      "loss": 3.1374,
      "step": 133816
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.283449172973633,
      "learning_rate": 0.00022466718230992988,
      "loss": 2.9312,
      "step": 133817
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.042640447616577,
      "learning_rate": 0.00022466322282163118,
      "loss": 3.0925,
      "step": 133818
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.370269536972046,
      "learning_rate": 0.00022465926334733902,
      "loss": 3.0953,
      "step": 133819
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4692580699920654,
      "learning_rate": 0.0002246553038870541,
      "loss": 3.1448,
      "step": 133820
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.638334274291992,
      "learning_rate": 0.00022465134444077723,
      "loss": 2.9381,
      "step": 133821
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.09613037109375,
      "learning_rate": 0.00022464738500850906,
      "loss": 3.1116,
      "step": 133822
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.081868886947632,
      "learning_rate": 0.00022464342559025052,
      "loss": 3.0504,
      "step": 133823
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.243236780166626,
      "learning_rate": 0.00022463946618600205,
      "loss": 2.7257,
      "step": 133824
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2160801887512207,
      "learning_rate": 0.0002246355067957645,
      "loss": 2.8822,
      "step": 133825
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9633917808532715,
      "learning_rate": 0.00022463154741953867,
      "loss": 3.1077,
      "step": 133826
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3283791542053223,
      "learning_rate": 0.00022462758805732525,
      "loss": 2.8686,
      "step": 133827
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8725694417953491,
      "learning_rate": 0.00022462362870912496,
      "loss": 2.8778,
      "step": 133828
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3252155780792236,
      "learning_rate": 0.00022461966937493868,
      "loss": 2.9584,
      "step": 133829
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.11027455329895,
      "learning_rate": 0.00022461571005476697,
      "loss": 2.7861,
      "step": 133830
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2183244228363037,
      "learning_rate": 0.00022461175074861057,
      "loss": 2.8452,
      "step": 133831
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.307404041290283,
      "learning_rate": 0.00022460779145647025,
      "loss": 2.8008,
      "step": 133832
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.4133799076080322,
      "learning_rate": 0.0002246038321783468,
      "loss": 2.985,
      "step": 133833
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.976514220237732,
      "learning_rate": 0.0002245998729142409,
      "loss": 3.0405,
      "step": 133834
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.489912986755371,
      "learning_rate": 0.00022459591366415346,
      "loss": 3.0679,
      "step": 133835
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.8074939250946045,
      "learning_rate": 0.00022459195442808494,
      "loss": 3.074,
      "step": 133836
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2220945358276367,
      "learning_rate": 0.00022458799520603615,
      "loss": 2.8528,
      "step": 133837
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.033447265625,
      "learning_rate": 0.00022458403599800788,
      "loss": 3.1222,
      "step": 133838
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.247929334640503,
      "learning_rate": 0.00022458007680400092,
      "loss": 3.0141,
      "step": 133839
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.5074689388275146,
      "learning_rate": 0.00022457611762401593,
      "loss": 2.8623,
      "step": 133840
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.7009711265563965,
      "learning_rate": 0.00022457215845805376,
      "loss": 2.9975,
      "step": 133841
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3133175373077393,
      "learning_rate": 0.00022456819930611493,
      "loss": 2.7631,
      "step": 133842
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.5079588890075684,
      "learning_rate": 0.0002245642401682003,
      "loss": 2.975,
      "step": 133843
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.596283435821533,
      "learning_rate": 0.00022456028104431058,
      "loss": 3.2179,
      "step": 133844
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.922417640686035,
      "learning_rate": 0.00022455632193444657,
      "loss": 2.7375,
      "step": 133845
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0870347023010254,
      "learning_rate": 0.00022455236283860894,
      "loss": 2.7909,
      "step": 133846
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.8487350940704346,
      "learning_rate": 0.0002245484037567986,
      "loss": 2.9769,
      "step": 133847
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.51763653755188,
      "learning_rate": 0.00022454444468901597,
      "loss": 2.9763,
      "step": 133848
    },
    {
      "epoch": 1.74,
      "grad_norm": 4.772449016571045,
      "learning_rate": 0.00022454048563526197,
      "loss": 3.0657,
      "step": 133849
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.394418954849243,
      "learning_rate": 0.0002245365265955373,
      "loss": 3.0512,
      "step": 133850
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.656254291534424,
      "learning_rate": 0.00022453256756984277,
      "loss": 3.0586,
      "step": 133851
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.565505266189575,
      "learning_rate": 0.000224528608558179,
      "loss": 2.9871,
      "step": 133852
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.489382028579712,
      "learning_rate": 0.0002245246495605468,
      "loss": 3.0392,
      "step": 133853
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3963232040405273,
      "learning_rate": 0.00022452069057694705,
      "loss": 3.0949,
      "step": 133854
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4947731494903564,
      "learning_rate": 0.00022451673160738022,
      "loss": 3.0315,
      "step": 133855
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.119813919067383,
      "learning_rate": 0.00022451277265184708,
      "loss": 2.8644,
      "step": 133856
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.6726365089416504,
      "learning_rate": 0.00022450881371034848,
      "loss": 2.8784,
      "step": 133857
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.1435775756835938,
      "learning_rate": 0.00022450485478288514,
      "loss": 3.1603,
      "step": 133858
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.529350519180298,
      "learning_rate": 0.00022450089586945773,
      "loss": 3.0578,
      "step": 133859
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.3023481369018555,
      "learning_rate": 0.00022449693697006718,
      "loss": 2.9518,
      "step": 133860
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4504706859588623,
      "learning_rate": 0.00022449297808471393,
      "loss": 3.006,
      "step": 133861
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3227055072784424,
      "learning_rate": 0.00022448901921339886,
      "loss": 3.0932,
      "step": 133862
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.7903598546981812,
      "learning_rate": 0.0002244850603561227,
      "loss": 3.1872,
      "step": 133863
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2621805667877197,
      "learning_rate": 0.00022448110151288624,
      "loss": 2.9561,
      "step": 133864
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.8593931198120117,
      "learning_rate": 0.00022447714268369014,
      "loss": 2.8844,
      "step": 133865
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.491389513015747,
      "learning_rate": 0.0002244731838685353,
      "loss": 2.819,
      "step": 133866
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3290770053863525,
      "learning_rate": 0.00022446922506742217,
      "loss": 2.9195,
      "step": 133867
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.361985445022583,
      "learning_rate": 0.00022446526628035164,
      "loss": 2.9596,
      "step": 133868
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.877033233642578,
      "learning_rate": 0.00022446130750732445,
      "loss": 2.9511,
      "step": 133869
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0177135467529297,
      "learning_rate": 0.00022445734874834136,
      "loss": 3.2251,
      "step": 133870
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9768165349960327,
      "learning_rate": 0.00022445339000340303,
      "loss": 3.1668,
      "step": 133871
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.616394281387329,
      "learning_rate": 0.0002244494312725104,
      "loss": 2.9023,
      "step": 133872
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.852940320968628,
      "learning_rate": 0.00022444547255566388,
      "loss": 2.8948,
      "step": 133873
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9816945791244507,
      "learning_rate": 0.0002244415138528644,
      "loss": 3.1422,
      "step": 133874
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.7805731296539307,
      "learning_rate": 0.00022443755516411267,
      "loss": 3.0986,
      "step": 133875
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1323211193084717,
      "learning_rate": 0.00022443359648940942,
      "loss": 3.1729,
      "step": 133876
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.245551586151123,
      "learning_rate": 0.0002244296378287554,
      "loss": 3.1095,
      "step": 133877
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.112868070602417,
      "learning_rate": 0.0002244256791821514,
      "loss": 3.0752,
      "step": 133878
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.9343740940093994,
      "learning_rate": 0.0002244217205495981,
      "loss": 3.1519,
      "step": 133879
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.557962656021118,
      "learning_rate": 0.0002244177619310961,
      "loss": 2.8799,
      "step": 133880
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1609179973602295,
      "learning_rate": 0.00022441380332664633,
      "loss": 3.0907,
      "step": 133881
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.139181613922119,
      "learning_rate": 0.00022440984473624947,
      "loss": 2.7656,
      "step": 133882
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.6930747032165527,
      "learning_rate": 0.0002244058861599062,
      "loss": 3.2257,
      "step": 133883
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0469563007354736,
      "learning_rate": 0.0002244019275976174,
      "loss": 2.9402,
      "step": 133884
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.564443588256836,
      "learning_rate": 0.00022439796904938364,
      "loss": 3.1022,
      "step": 133885
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.272754430770874,
      "learning_rate": 0.00022439401051520578,
      "loss": 3.067,
      "step": 133886
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9295176267623901,
      "learning_rate": 0.0002243900519950844,
      "loss": 2.8913,
      "step": 133887
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.414238691329956,
      "learning_rate": 0.0002243860934890204,
      "loss": 2.9286,
      "step": 133888
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4362425804138184,
      "learning_rate": 0.00022438213499701443,
      "loss": 3.1051,
      "step": 133889
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0910632610321045,
      "learning_rate": 0.00022437817651906733,
      "loss": 2.7196,
      "step": 133890
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4325685501098633,
      "learning_rate": 0.00022437421805517966,
      "loss": 3.1375,
      "step": 133891
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.595776081085205,
      "learning_rate": 0.00022437025960535227,
      "loss": 2.939,
      "step": 133892
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.274268627166748,
      "learning_rate": 0.00022436630116958597,
      "loss": 2.9081,
      "step": 133893
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.091404676437378,
      "learning_rate": 0.00022436234274788127,
      "loss": 3.0718,
      "step": 133894
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0193183422088623,
      "learning_rate": 0.00022435838434023907,
      "loss": 2.8775,
      "step": 133895
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.474388837814331,
      "learning_rate": 0.00022435442594666015,
      "loss": 2.9853,
      "step": 133896
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4570164680480957,
      "learning_rate": 0.00022435046756714505,
      "loss": 2.8475,
      "step": 133897
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.484485626220703,
      "learning_rate": 0.0002243465092016947,
      "loss": 2.7647,
      "step": 133898
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.6202447414398193,
      "learning_rate": 0.00022434255085030974,
      "loss": 3.0527,
      "step": 133899
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0130579471588135,
      "learning_rate": 0.000224338592512991,
      "loss": 2.9064,
      "step": 133900
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.7914539575576782,
      "learning_rate": 0.00022433463418973905,
      "loss": 2.9081,
      "step": 133901
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9125829935073853,
      "learning_rate": 0.00022433067588055477,
      "loss": 2.8645,
      "step": 133902
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4754152297973633,
      "learning_rate": 0.00022432671758543883,
      "loss": 2.9693,
      "step": 133903
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8596704006195068,
      "learning_rate": 0.00022432275930439196,
      "loss": 3.1258,
      "step": 133904
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.569469451904297,
      "learning_rate": 0.0002243188010374149,
      "loss": 2.7703,
      "step": 133905
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.693298578262329,
      "learning_rate": 0.00022431484278450842,
      "loss": 3.1007,
      "step": 133906
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.05574631690979,
      "learning_rate": 0.00022431088454567322,
      "loss": 2.9677,
      "step": 133907
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.502042531967163,
      "learning_rate": 0.00022430692632091024,
      "loss": 2.9786,
      "step": 133908
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0426993370056152,
      "learning_rate": 0.00022430296811021987,
      "loss": 2.9683,
      "step": 133909
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.07061767578125,
      "learning_rate": 0.000224299009913603,
      "loss": 2.8188,
      "step": 133910
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9873533248901367,
      "learning_rate": 0.0002242950517310604,
      "loss": 2.9279,
      "step": 133911
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.8744630813598633,
      "learning_rate": 0.00022429109356259273,
      "loss": 2.9619,
      "step": 133912
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.437772750854492,
      "learning_rate": 0.00022428713540820084,
      "loss": 2.9249,
      "step": 133913
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3482444286346436,
      "learning_rate": 0.00022428317726788544,
      "loss": 3.1466,
      "step": 133914
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.505993604660034,
      "learning_rate": 0.00022427921914164716,
      "loss": 2.741,
      "step": 133915
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.155973434448242,
      "learning_rate": 0.00022427526102948678,
      "loss": 2.7658,
      "step": 133916
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9374748468399048,
      "learning_rate": 0.0002242713029314051,
      "loss": 3.113,
      "step": 133917
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.934735655784607,
      "learning_rate": 0.00022426734484740278,
      "loss": 3.038,
      "step": 133918
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3219215869903564,
      "learning_rate": 0.00022426338677748059,
      "loss": 3.081,
      "step": 133919
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.264042377471924,
      "learning_rate": 0.00022425942872163925,
      "loss": 2.9016,
      "step": 133920
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3837692737579346,
      "learning_rate": 0.00022425547067987967,
      "loss": 2.8658,
      "step": 133921
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.136413812637329,
      "learning_rate": 0.00022425151265220228,
      "loss": 3.0077,
      "step": 133922
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1800503730773926,
      "learning_rate": 0.00022424755463860797,
      "loss": 2.9819,
      "step": 133923
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9798935651779175,
      "learning_rate": 0.00022424359663909748,
      "loss": 3.0325,
      "step": 133924
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.711107015609741,
      "learning_rate": 0.00022423963865367152,
      "loss": 2.9089,
      "step": 133925
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3895609378814697,
      "learning_rate": 0.00022423568068233088,
      "loss": 2.9859,
      "step": 133926
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.9141311645507812,
      "learning_rate": 0.00022423172272507632,
      "loss": 2.9575,
      "step": 133927
    },
    {
      "epoch": 1.74,
      "grad_norm": 5.191263675689697,
      "learning_rate": 0.00022422776478190845,
      "loss": 3.3087,
      "step": 133928
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.198179244995117,
      "learning_rate": 0.00022422380685282802,
      "loss": 2.8501,
      "step": 133929
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.086045026779175,
      "learning_rate": 0.00022421984893783583,
      "loss": 3.1654,
      "step": 133930
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2412877082824707,
      "learning_rate": 0.0002242158910369326,
      "loss": 3.0368,
      "step": 133931
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.612480401992798,
      "learning_rate": 0.0002242119331501191,
      "loss": 2.8499,
      "step": 133932
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2985763549804688,
      "learning_rate": 0.00022420797527739612,
      "loss": 2.9765,
      "step": 133933
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1182827949523926,
      "learning_rate": 0.0002242040174187642,
      "loss": 3.0349,
      "step": 133934
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9307732582092285,
      "learning_rate": 0.00022420005957422416,
      "loss": 2.9005,
      "step": 133935
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.740077257156372,
      "learning_rate": 0.00022419610174377678,
      "loss": 3.0036,
      "step": 133936
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9937536716461182,
      "learning_rate": 0.00022419214392742275,
      "loss": 3.1348,
      "step": 133937
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.579977035522461,
      "learning_rate": 0.00022418818612516286,
      "loss": 3.0255,
      "step": 133938
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1134703159332275,
      "learning_rate": 0.00022418422833699793,
      "loss": 3.0228,
      "step": 133939
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2532942295074463,
      "learning_rate": 0.00022418027056292846,
      "loss": 3.0357,
      "step": 133940
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.045785665512085,
      "learning_rate": 0.0002241763128029553,
      "loss": 3.0472,
      "step": 133941
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.553278923034668,
      "learning_rate": 0.00022417235505707922,
      "loss": 2.8988,
      "step": 133942
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.983924388885498,
      "learning_rate": 0.00022416839732530093,
      "loss": 2.9799,
      "step": 133943
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1056206226348877,
      "learning_rate": 0.00022416443960762114,
      "loss": 2.8771,
      "step": 133944
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3082082271575928,
      "learning_rate": 0.00022416048190404076,
      "loss": 2.9867,
      "step": 133945
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9215608835220337,
      "learning_rate": 0.00022415652421456023,
      "loss": 3.249,
      "step": 133946
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2238309383392334,
      "learning_rate": 0.00022415256653918046,
      "loss": 3.1329,
      "step": 133947
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.286552667617798,
      "learning_rate": 0.00022414860887790217,
      "loss": 3.2458,
      "step": 133948
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.16536021232605,
      "learning_rate": 0.00022414465123072604,
      "loss": 2.818,
      "step": 133949
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.248471260070801,
      "learning_rate": 0.00022414069359765284,
      "loss": 2.9071,
      "step": 133950
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2855610847473145,
      "learning_rate": 0.0002241367359786835,
      "loss": 2.8288,
      "step": 133951
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.238506555557251,
      "learning_rate": 0.00022413277837381843,
      "loss": 2.9268,
      "step": 133952
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1629340648651123,
      "learning_rate": 0.00022412882078305852,
      "loss": 3.0119,
      "step": 133953
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9536399841308594,
      "learning_rate": 0.00022412486320640447,
      "loss": 3.1116,
      "step": 133954
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2474775314331055,
      "learning_rate": 0.00022412090564385704,
      "loss": 2.8953,
      "step": 133955
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1762213706970215,
      "learning_rate": 0.00022411694809541696,
      "loss": 2.7761,
      "step": 133956
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.135362148284912,
      "learning_rate": 0.00022411299056108511,
      "loss": 2.9038,
      "step": 133957
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2593765258789062,
      "learning_rate": 0.000224109033040862,
      "loss": 3.1683,
      "step": 133958
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.394070863723755,
      "learning_rate": 0.00022410507553474837,
      "loss": 2.8927,
      "step": 133959
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.757319450378418,
      "learning_rate": 0.00022410111804274505,
      "loss": 3.0269,
      "step": 133960
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.9919583797454834,
      "learning_rate": 0.00022409716056485284,
      "loss": 3.1023,
      "step": 133961
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.036536693572998,
      "learning_rate": 0.00022409320310107236,
      "loss": 3.1698,
      "step": 133962
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.5470566749572754,
      "learning_rate": 0.00022408924565140448,
      "loss": 3.0621,
      "step": 133963
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0683209896087646,
      "learning_rate": 0.00022408528821584977,
      "loss": 2.8301,
      "step": 133964
    },
    {
      "epoch": 1.74,
      "grad_norm": 4.033900260925293,
      "learning_rate": 0.000224081330794409,
      "loss": 3.0418,
      "step": 133965
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.370286226272583,
      "learning_rate": 0.000224077373387083,
      "loss": 3.0818,
      "step": 133966
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.363684892654419,
      "learning_rate": 0.00022407341599387239,
      "loss": 3.011,
      "step": 133967
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.6987648010253906,
      "learning_rate": 0.00022406945861477796,
      "loss": 2.9991,
      "step": 133968
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.88871693611145,
      "learning_rate": 0.00022406550124980055,
      "loss": 2.961,
      "step": 133969
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.6303293704986572,
      "learning_rate": 0.0002240615438989407,
      "loss": 2.8475,
      "step": 133970
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.756455183029175,
      "learning_rate": 0.00022405758656219934,
      "loss": 3.0733,
      "step": 133971
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2577052116394043,
      "learning_rate": 0.00022405362923957705,
      "loss": 2.9414,
      "step": 133972
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.5459296703338623,
      "learning_rate": 0.0002240496719310746,
      "loss": 2.8144,
      "step": 133973
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.6671128273010254,
      "learning_rate": 0.00022404571463669273,
      "loss": 2.9391,
      "step": 133974
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.897003173828125,
      "learning_rate": 0.00022404175735643233,
      "loss": 2.7698,
      "step": 133975
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8684885501861572,
      "learning_rate": 0.0002240378000902939,
      "loss": 2.9621,
      "step": 133976
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.773526430130005,
      "learning_rate": 0.00022403384283827826,
      "loss": 3.1983,
      "step": 133977
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.173234701156616,
      "learning_rate": 0.00022402988560038623,
      "loss": 3.1371,
      "step": 133978
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8469302654266357,
      "learning_rate": 0.0002240259283766184,
      "loss": 2.9236,
      "step": 133979
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8696768283843994,
      "learning_rate": 0.0002240219711669756,
      "loss": 2.8898,
      "step": 133980
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0060136318206787,
      "learning_rate": 0.00022401801397145863,
      "loss": 3.1205,
      "step": 133981
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.1388981342315674,
      "learning_rate": 0.00022401405679006806,
      "loss": 2.8799,
      "step": 133982
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4615087509155273,
      "learning_rate": 0.00022401009962280473,
      "loss": 3.1615,
      "step": 133983
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0424089431762695,
      "learning_rate": 0.00022400614246966932,
      "loss": 2.9142,
      "step": 133984
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.17171311378479,
      "learning_rate": 0.0002240021853306627,
      "loss": 2.9895,
      "step": 133985
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.589858055114746,
      "learning_rate": 0.00022399822820578545,
      "loss": 2.815,
      "step": 133986
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0319595336914062,
      "learning_rate": 0.00022399427109503833,
      "loss": 2.8913,
      "step": 133987
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.465362310409546,
      "learning_rate": 0.0002239903139984222,
      "loss": 2.7174,
      "step": 133988
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.026867151260376,
      "learning_rate": 0.00022398635691593766,
      "loss": 3.2584,
      "step": 133989
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.226238489151001,
      "learning_rate": 0.00022398239984758547,
      "loss": 3.0511,
      "step": 133990
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.6476452350616455,
      "learning_rate": 0.00022397844279336637,
      "loss": 2.8407,
      "step": 133991
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.3882877826690674,
      "learning_rate": 0.00022397448575328118,
      "loss": 2.9127,
      "step": 133992
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.309499502182007,
      "learning_rate": 0.00022397052872733054,
      "loss": 2.8608,
      "step": 133993
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.874359130859375,
      "learning_rate": 0.00022396657171551527,
      "loss": 3.0643,
      "step": 133994
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.80037522315979,
      "learning_rate": 0.00022396261471783595,
      "loss": 2.939,
      "step": 133995
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.990017294883728,
      "learning_rate": 0.00022395865773429345,
      "loss": 2.9418,
      "step": 133996
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1113321781158447,
      "learning_rate": 0.00022395470076488845,
      "loss": 3.0589,
      "step": 133997
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.4122653007507324,
      "learning_rate": 0.0002239507438096217,
      "loss": 2.8419,
      "step": 133998
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.9988770484924316,
      "learning_rate": 0.00022394678686849396,
      "loss": 2.8609,
      "step": 133999
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.392380714416504,
      "learning_rate": 0.00022394282994150607,
      "loss": 2.9752,
      "step": 134000
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.8568027019500732,
      "learning_rate": 0.0002239388730286585,
      "loss": 3.1001,
      "step": 134001
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.2261509895324707,
      "learning_rate": 0.00022393491612995216,
      "loss": 3.1137,
      "step": 134002
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.7982378005981445,
      "learning_rate": 0.00022393095924538774,
      "loss": 2.8108,
      "step": 134003
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.6203763484954834,
      "learning_rate": 0.00022392700237496596,
      "loss": 3.0807,
      "step": 134004
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9899663925170898,
      "learning_rate": 0.00022392304551868765,
      "loss": 2.9225,
      "step": 134005
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.049112558364868,
      "learning_rate": 0.00022391908867655356,
      "loss": 2.9949,
      "step": 134006
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.575221300125122,
      "learning_rate": 0.00022391513184856428,
      "loss": 2.6794,
      "step": 134007
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.1624224185943604,
      "learning_rate": 0.00022391117503472053,
      "loss": 2.8689,
      "step": 134008
    },
    {
      "epoch": 1.74,
      "grad_norm": 1.9355121850967407,
      "learning_rate": 0.00022390721823502317,
      "loss": 3.0368,
      "step": 134009
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.1844537258148193,
      "learning_rate": 0.0002239032614494729,
      "loss": 2.8963,
      "step": 134010
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.710566997528076,
      "learning_rate": 0.00022389930467807043,
      "loss": 2.9733,
      "step": 134011
    },
    {
      "epoch": 1.74,
      "grad_norm": 3.259209632873535,
      "learning_rate": 0.00022389534792081667,
      "loss": 2.8791,
      "step": 134012
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.0532310009002686,
      "learning_rate": 0.00022389139117771206,
      "loss": 3.1464,
      "step": 134013
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.029747486114502,
      "learning_rate": 0.00022388743444875744,
      "loss": 3.1673,
      "step": 134014
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.7636988162994385,
      "learning_rate": 0.00022388347773395366,
      "loss": 2.8013,
      "step": 134015
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.4266717433929443,
      "learning_rate": 0.00022387952103330135,
      "loss": 3.3204,
      "step": 134016
    },
    {
      "epoch": 1.74,
      "grad_norm": 2.2625927925109863,
      "learning_rate": 0.00022387556434680123,
      "loss": 3.0606,
      "step": 134017
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.786264181137085,
      "learning_rate": 0.00022387160767445427,
      "loss": 2.8248,
      "step": 134018
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3807785511016846,
      "learning_rate": 0.00022386765101626085,
      "loss": 3.0552,
      "step": 134019
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.9999988079071045,
      "learning_rate": 0.00022386369437222184,
      "loss": 2.7708,
      "step": 134020
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7230910062789917,
      "learning_rate": 0.00022385973774233806,
      "loss": 2.8548,
      "step": 134021
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9323328733444214,
      "learning_rate": 0.0002238557811266102,
      "loss": 3.112,
      "step": 134022
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.9307196140289307,
      "learning_rate": 0.00022385182452503897,
      "loss": 2.9904,
      "step": 134023
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3909475803375244,
      "learning_rate": 0.00022384786793762523,
      "loss": 2.8008,
      "step": 134024
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1380083560943604,
      "learning_rate": 0.0002238439113643695,
      "loss": 2.572,
      "step": 134025
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9350793361663818,
      "learning_rate": 0.00022383995480527262,
      "loss": 3.0533,
      "step": 134026
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3480050563812256,
      "learning_rate": 0.00022383599826033534,
      "loss": 3.0482,
      "step": 134027
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3640899658203125,
      "learning_rate": 0.00022383204172955832,
      "loss": 2.9955,
      "step": 134028
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9705110788345337,
      "learning_rate": 0.00022382808521294247,
      "loss": 2.958,
      "step": 134029
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1144769191741943,
      "learning_rate": 0.00022382412871048847,
      "loss": 3.068,
      "step": 134030
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1050662994384766,
      "learning_rate": 0.00022382017222219688,
      "loss": 2.9732,
      "step": 134031
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0244314670562744,
      "learning_rate": 0.00022381621574806855,
      "loss": 2.8981,
      "step": 134032
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9759502410888672,
      "learning_rate": 0.00022381225928810427,
      "loss": 2.9043,
      "step": 134033
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0296404361724854,
      "learning_rate": 0.00022380830284230474,
      "loss": 3.1225,
      "step": 134034
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0405452251434326,
      "learning_rate": 0.00022380434641067063,
      "loss": 2.987,
      "step": 134035
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.368135929107666,
      "learning_rate": 0.00022380038999320287,
      "loss": 2.9406,
      "step": 134036
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1102869510650635,
      "learning_rate": 0.00022379643358990195,
      "loss": 2.8944,
      "step": 134037
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9817816019058228,
      "learning_rate": 0.0002237924772007687,
      "loss": 3.068,
      "step": 134038
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.874635934829712,
      "learning_rate": 0.00022378852082580386,
      "loss": 2.8542,
      "step": 134039
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.2251129150390625,
      "learning_rate": 0.00022378456446500817,
      "loss": 2.9313,
      "step": 134040
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3483383655548096,
      "learning_rate": 0.00022378060811838237,
      "loss": 2.7418,
      "step": 134041
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.878795862197876,
      "learning_rate": 0.00022377665178592736,
      "loss": 2.9242,
      "step": 134042
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8784501552581787,
      "learning_rate": 0.00022377269546764353,
      "loss": 2.8592,
      "step": 134043
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4494333267211914,
      "learning_rate": 0.00022376873916353182,
      "loss": 2.9412,
      "step": 134044
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.2654473781585693,
      "learning_rate": 0.00022376478287359293,
      "loss": 3.1476,
      "step": 134045
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.094393014907837,
      "learning_rate": 0.00022376082659782763,
      "loss": 3.052,
      "step": 134046
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.887926459312439,
      "learning_rate": 0.0002237568703362366,
      "loss": 3.1205,
      "step": 134047
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.100022315979004,
      "learning_rate": 0.00022375291408882077,
      "loss": 3.0786,
      "step": 134048
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2597737312316895,
      "learning_rate": 0.00022374895785558056,
      "loss": 3.161,
      "step": 134049
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.368809700012207,
      "learning_rate": 0.00022374500163651684,
      "loss": 3.1196,
      "step": 134050
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.0655064582824707,
      "learning_rate": 0.0002237410454316304,
      "loss": 2.944,
      "step": 134051
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.8413100242614746,
      "learning_rate": 0.0002237370892409219,
      "loss": 2.9246,
      "step": 134052
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.014979362487793,
      "learning_rate": 0.00022373313306439215,
      "loss": 3.1065,
      "step": 134053
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.0818512439727783,
      "learning_rate": 0.00022372917690204183,
      "loss": 2.755,
      "step": 134054
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2411508560180664,
      "learning_rate": 0.00022372522075387184,
      "loss": 2.6186,
      "step": 134055
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0570998191833496,
      "learning_rate": 0.00022372126461988261,
      "loss": 2.9922,
      "step": 134056
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.080963611602783,
      "learning_rate": 0.00022371730850007505,
      "loss": 2.8294,
      "step": 134057
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2992537021636963,
      "learning_rate": 0.0002237133523944499,
      "loss": 3.1934,
      "step": 134058
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8367207050323486,
      "learning_rate": 0.0002237093963030079,
      "loss": 2.8574,
      "step": 134059
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.323441505432129,
      "learning_rate": 0.00022370544022574975,
      "loss": 2.8511,
      "step": 134060
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.6301684379577637,
      "learning_rate": 0.0002237014841626762,
      "loss": 3.0504,
      "step": 134061
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2105658054351807,
      "learning_rate": 0.00022369752811378808,
      "loss": 2.8553,
      "step": 134062
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.002671241760254,
      "learning_rate": 0.00022369357207908591,
      "loss": 3.1974,
      "step": 134063
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.598937511444092,
      "learning_rate": 0.00022368961605857054,
      "loss": 2.9934,
      "step": 134064
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.337021589279175,
      "learning_rate": 0.00022368566005224273,
      "loss": 2.791,
      "step": 134065
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.40988826751709,
      "learning_rate": 0.00022368170406010318,
      "loss": 2.75,
      "step": 134066
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.3852884769439697,
      "learning_rate": 0.00022367774808215274,
      "loss": 3.0203,
      "step": 134067
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.654787540435791,
      "learning_rate": 0.00022367379211839195,
      "loss": 2.8787,
      "step": 134068
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4237444400787354,
      "learning_rate": 0.00022366983616882164,
      "loss": 2.943,
      "step": 134069
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.150603771209717,
      "learning_rate": 0.00022366588023344262,
      "loss": 3.2907,
      "step": 134070
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.177321672439575,
      "learning_rate": 0.0002236619243122555,
      "loss": 2.9154,
      "step": 134071
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.188896656036377,
      "learning_rate": 0.00022365796840526105,
      "loss": 2.6183,
      "step": 134072
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.632887363433838,
      "learning_rate": 0.00022365401251246009,
      "loss": 2.9545,
      "step": 134073
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3221988677978516,
      "learning_rate": 0.00022365005663385328,
      "loss": 2.895,
      "step": 134074
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4691030979156494,
      "learning_rate": 0.0002236461007694413,
      "loss": 2.9902,
      "step": 134075
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.983213186264038,
      "learning_rate": 0.00022364214491922497,
      "loss": 2.9439,
      "step": 134076
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8621641397476196,
      "learning_rate": 0.00022363818908320508,
      "loss": 2.9258,
      "step": 134077
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.2819793224334717,
      "learning_rate": 0.00022363423326138222,
      "loss": 3.0252,
      "step": 134078
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2044858932495117,
      "learning_rate": 0.0002236302774537573,
      "loss": 2.7848,
      "step": 134079
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.312896251678467,
      "learning_rate": 0.00022362632166033082,
      "loss": 2.8668,
      "step": 134080
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.200631856918335,
      "learning_rate": 0.0002236223658811037,
      "loss": 2.8993,
      "step": 134081
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9172946214675903,
      "learning_rate": 0.00022361841011607662,
      "loss": 2.9996,
      "step": 134082
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.628019094467163,
      "learning_rate": 0.00022361445436525028,
      "loss": 3.3084,
      "step": 134083
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.7141451835632324,
      "learning_rate": 0.00022361049862862557,
      "loss": 2.7717,
      "step": 134084
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7340528964996338,
      "learning_rate": 0.00022360654290620307,
      "loss": 2.8854,
      "step": 134085
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1895482540130615,
      "learning_rate": 0.0002236025871979835,
      "loss": 2.9874,
      "step": 134086
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9512858390808105,
      "learning_rate": 0.00022359863150396764,
      "loss": 2.9634,
      "step": 134087
    },
    {
      "epoch": 1.75,
      "grad_norm": 5.463162422180176,
      "learning_rate": 0.00022359467582415627,
      "loss": 3.0389,
      "step": 134088
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.291800022125244,
      "learning_rate": 0.00022359072015855005,
      "loss": 2.9149,
      "step": 134089
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3008856773376465,
      "learning_rate": 0.00022358676450714982,
      "loss": 3.0274,
      "step": 134090
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5092875957489014,
      "learning_rate": 0.00022358280886995637,
      "loss": 3.0651,
      "step": 134091
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8451521396636963,
      "learning_rate": 0.00022357885324697014,
      "loss": 2.979,
      "step": 134092
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.8967714309692383,
      "learning_rate": 0.0002235748976381921,
      "loss": 3.1455,
      "step": 134093
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9846813678741455,
      "learning_rate": 0.00022357094204362285,
      "loss": 3.23,
      "step": 134094
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.12343692779541,
      "learning_rate": 0.00022356698646326326,
      "loss": 2.7753,
      "step": 134095
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8308600187301636,
      "learning_rate": 0.00022356303089711402,
      "loss": 3.0246,
      "step": 134096
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.097963809967041,
      "learning_rate": 0.00022355907534517597,
      "loss": 3.2506,
      "step": 134097
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.965041160583496,
      "learning_rate": 0.0002235551198074496,
      "loss": 3.0335,
      "step": 134098
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8782521486282349,
      "learning_rate": 0.00022355116428393577,
      "loss": 2.8808,
      "step": 134099
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4594054222106934,
      "learning_rate": 0.00022354720877463524,
      "loss": 2.8455,
      "step": 134100
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.6445772647857666,
      "learning_rate": 0.00022354325327954869,
      "loss": 3.0237,
      "step": 134101
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.084779977798462,
      "learning_rate": 0.00022353929779867692,
      "loss": 2.9335,
      "step": 134102
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2992193698883057,
      "learning_rate": 0.00022353534233202077,
      "loss": 2.8926,
      "step": 134103
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3039510250091553,
      "learning_rate": 0.0002235313868795807,
      "loss": 3.2388,
      "step": 134104
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1080260276794434,
      "learning_rate": 0.0002235274314413576,
      "loss": 2.9216,
      "step": 134105
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3297970294952393,
      "learning_rate": 0.0002235234760173522,
      "loss": 2.9782,
      "step": 134106
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.141338586807251,
      "learning_rate": 0.00022351952060756518,
      "loss": 2.9887,
      "step": 134107
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.4348318576812744,
      "learning_rate": 0.00022351556521199738,
      "loss": 2.8818,
      "step": 134108
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9747438430786133,
      "learning_rate": 0.0002235116098306496,
      "loss": 2.898,
      "step": 134109
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.137476921081543,
      "learning_rate": 0.00022350765446352234,
      "loss": 2.9379,
      "step": 134110
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.546868085861206,
      "learning_rate": 0.00022350369911061643,
      "loss": 3.4021,
      "step": 134111
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9541680812835693,
      "learning_rate": 0.0002234997437719326,
      "loss": 2.7707,
      "step": 134112
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.0168843269348145,
      "learning_rate": 0.00022349578844747166,
      "loss": 3.0973,
      "step": 134113
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.639486789703369,
      "learning_rate": 0.00022349183313723428,
      "loss": 2.9127,
      "step": 134114
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3284571170806885,
      "learning_rate": 0.00022348787784122134,
      "loss": 2.7055,
      "step": 134115
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.223515272140503,
      "learning_rate": 0.00022348392255943332,
      "loss": 3.1962,
      "step": 134116
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9259759187698364,
      "learning_rate": 0.00022347996729187106,
      "loss": 3.0398,
      "step": 134117
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.929706335067749,
      "learning_rate": 0.00022347601203853536,
      "loss": 2.9838,
      "step": 134118
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.345351457595825,
      "learning_rate": 0.00022347205679942685,
      "loss": 3.0632,
      "step": 134119
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.4494197368621826,
      "learning_rate": 0.0002234681015745464,
      "loss": 3.0423,
      "step": 134120
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9066494703292847,
      "learning_rate": 0.00022346414636389465,
      "loss": 3.1367,
      "step": 134121
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.929683804512024,
      "learning_rate": 0.00022346019116747248,
      "loss": 2.8682,
      "step": 134122
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9550238847732544,
      "learning_rate": 0.00022345623598528039,
      "loss": 3.0287,
      "step": 134123
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7944942712783813,
      "learning_rate": 0.00022345228081731923,
      "loss": 3.0918,
      "step": 134124
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0694100856781006,
      "learning_rate": 0.0002234483256635897,
      "loss": 2.9508,
      "step": 134125
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7818163633346558,
      "learning_rate": 0.0002234443705240926,
      "loss": 3.0487,
      "step": 134126
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0446789264678955,
      "learning_rate": 0.00022344041539882866,
      "loss": 2.989,
      "step": 134127
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5448710918426514,
      "learning_rate": 0.00022343646028779866,
      "loss": 3.1376,
      "step": 134128
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.29936146736145,
      "learning_rate": 0.0002234325051910032,
      "loss": 3.1301,
      "step": 134129
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.143181800842285,
      "learning_rate": 0.00022342855010844308,
      "loss": 3.1475,
      "step": 134130
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.7287254333496094,
      "learning_rate": 0.000223424595040119,
      "loss": 3.2239,
      "step": 134131
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9410008192062378,
      "learning_rate": 0.00022342063998603172,
      "loss": 2.8734,
      "step": 134132
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1884241104125977,
      "learning_rate": 0.00022341668494618204,
      "loss": 3.0968,
      "step": 134133
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1564159393310547,
      "learning_rate": 0.00022341272992057074,
      "loss": 2.9608,
      "step": 134134
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3774406909942627,
      "learning_rate": 0.00022340877490919836,
      "loss": 2.952,
      "step": 134135
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8811801671981812,
      "learning_rate": 0.0002234048199120657,
      "loss": 3.1081,
      "step": 134136
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.995229959487915,
      "learning_rate": 0.00022340086492917354,
      "loss": 2.9306,
      "step": 134137
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1180667877197266,
      "learning_rate": 0.0002233969099605226,
      "loss": 2.9648,
      "step": 134138
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1324849128723145,
      "learning_rate": 0.00022339295500611362,
      "loss": 2.8262,
      "step": 134139
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2158937454223633,
      "learning_rate": 0.0002233890000659475,
      "loss": 3.1497,
      "step": 134140
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9853838682174683,
      "learning_rate": 0.00022338504514002466,
      "loss": 3.0224,
      "step": 134141
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1715641021728516,
      "learning_rate": 0.00022338109022834596,
      "loss": 2.8882,
      "step": 134142
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4263885021209717,
      "learning_rate": 0.00022337713533091222,
      "loss": 3.0074,
      "step": 134143
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2091941833496094,
      "learning_rate": 0.00022337318044772408,
      "loss": 3.0173,
      "step": 134144
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.13301157951355,
      "learning_rate": 0.0002233692255787823,
      "loss": 2.8872,
      "step": 134145
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.3365392684936523,
      "learning_rate": 0.0002233652707240877,
      "loss": 2.7745,
      "step": 134146
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0635414123535156,
      "learning_rate": 0.00022336131588364096,
      "loss": 3.0701,
      "step": 134147
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.363523483276367,
      "learning_rate": 0.00022335736105744272,
      "loss": 2.8903,
      "step": 134148
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.615060806274414,
      "learning_rate": 0.0002233534062454938,
      "loss": 3.0753,
      "step": 134149
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0825350284576416,
      "learning_rate": 0.0002233494514477949,
      "loss": 3.0285,
      "step": 134150
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.129957914352417,
      "learning_rate": 0.00022334549666434684,
      "loss": 2.7926,
      "step": 134151
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.521674871444702,
      "learning_rate": 0.0002233415418951503,
      "loss": 3.0267,
      "step": 134152
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0293822288513184,
      "learning_rate": 0.000223337587140206,
      "loss": 3.1393,
      "step": 134153
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4440362453460693,
      "learning_rate": 0.00022333363239951474,
      "loss": 3.0031,
      "step": 134154
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9927016496658325,
      "learning_rate": 0.00022332967767307713,
      "loss": 3.1597,
      "step": 134155
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.459916353225708,
      "learning_rate": 0.00022332572296089397,
      "loss": 2.8933,
      "step": 134156
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.839023470878601,
      "learning_rate": 0.000223321768262966,
      "loss": 3.0376,
      "step": 134157
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.356844425201416,
      "learning_rate": 0.00022331781357929408,
      "loss": 2.9183,
      "step": 134158
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4947118759155273,
      "learning_rate": 0.00022331385890987873,
      "loss": 3.0459,
      "step": 134159
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5817785263061523,
      "learning_rate": 0.00022330990425472078,
      "loss": 2.7726,
      "step": 134160
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.775128126144409,
      "learning_rate": 0.00022330594961382099,
      "loss": 3.0566,
      "step": 134161
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.9035661220550537,
      "learning_rate": 0.0002233019949871801,
      "loss": 3.0438,
      "step": 134162
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.056643009185791,
      "learning_rate": 0.00022329804037479875,
      "loss": 3.0744,
      "step": 134163
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5160634517669678,
      "learning_rate": 0.00022329408577667783,
      "loss": 2.8588,
      "step": 134164
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.6369683742523193,
      "learning_rate": 0.0002232901311928179,
      "loss": 3.2077,
      "step": 134165
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.480187177658081,
      "learning_rate": 0.0002232861766232198,
      "loss": 2.801,
      "step": 134166
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.244257926940918,
      "learning_rate": 0.00022328222206788426,
      "loss": 2.9086,
      "step": 134167
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.408071279525757,
      "learning_rate": 0.000223278267526812,
      "loss": 3.0508,
      "step": 134168
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.433762550354004,
      "learning_rate": 0.00022327431300000377,
      "loss": 2.9595,
      "step": 134169
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0877153873443604,
      "learning_rate": 0.00022327035848746036,
      "loss": 3.0104,
      "step": 134170
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.6310503482818604,
      "learning_rate": 0.00022326640398918234,
      "loss": 2.8736,
      "step": 134171
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.406212329864502,
      "learning_rate": 0.00022326244950517053,
      "loss": 3.2317,
      "step": 134172
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.120028495788574,
      "learning_rate": 0.00022325849503542573,
      "loss": 2.8757,
      "step": 134173
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.91278076171875,
      "learning_rate": 0.0002232545405799486,
      "loss": 2.9564,
      "step": 134174
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.004974365234375,
      "learning_rate": 0.00022325058613873987,
      "loss": 2.9239,
      "step": 134175
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.568288803100586,
      "learning_rate": 0.0002232466317118005,
      "loss": 2.9585,
      "step": 134176
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0828187465667725,
      "learning_rate": 0.00022324267729913083,
      "loss": 2.8963,
      "step": 134177
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.967872977256775,
      "learning_rate": 0.00022323872290073185,
      "loss": 2.9303,
      "step": 134178
    },
    {
      "epoch": 1.75,
      "grad_norm": 4.128557205200195,
      "learning_rate": 0.00022323476851660422,
      "loss": 3.2223,
      "step": 134179
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.9135425090789795,
      "learning_rate": 0.00022323081414674865,
      "loss": 3.0963,
      "step": 134180
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3810174465179443,
      "learning_rate": 0.00022322685979116598,
      "loss": 2.992,
      "step": 134181
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.150069236755371,
      "learning_rate": 0.00022322290544985702,
      "loss": 2.8745,
      "step": 134182
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9616756439208984,
      "learning_rate": 0.0002232189511228222,
      "loss": 2.8496,
      "step": 134183
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.064708709716797,
      "learning_rate": 0.00022321499681006242,
      "loss": 2.79,
      "step": 134184
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.6540782451629639,
      "learning_rate": 0.00022321104251157847,
      "loss": 3.0758,
      "step": 134185
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2085113525390625,
      "learning_rate": 0.00022320708822737103,
      "loss": 2.8652,
      "step": 134186
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.311228036880493,
      "learning_rate": 0.0002232031339574408,
      "loss": 3.0237,
      "step": 134187
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4977893829345703,
      "learning_rate": 0.0002231991797017886,
      "loss": 3.0378,
      "step": 134188
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0689611434936523,
      "learning_rate": 0.0002231952254604152,
      "loss": 3.1282,
      "step": 134189
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2463488578796387,
      "learning_rate": 0.0002231912712333212,
      "loss": 2.8492,
      "step": 134190
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9923501014709473,
      "learning_rate": 0.00022318731702050733,
      "loss": 3.0573,
      "step": 134191
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.59434175491333,
      "learning_rate": 0.00022318336282197443,
      "loss": 2.9089,
      "step": 134192
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.573491334915161,
      "learning_rate": 0.00022317940863772316,
      "loss": 2.9435,
      "step": 134193
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.7409934997558594,
      "learning_rate": 0.0002231754544677543,
      "loss": 3.0785,
      "step": 134194
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3424251079559326,
      "learning_rate": 0.00022317150031206872,
      "loss": 2.9755,
      "step": 134195
    },
    {
      "epoch": 1.75,
      "grad_norm": 4.415227890014648,
      "learning_rate": 0.00022316754617066685,
      "loss": 2.8641,
      "step": 134196
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.7715532779693604,
      "learning_rate": 0.00022316359204354962,
      "loss": 2.8913,
      "step": 134197
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.057095527648926,
      "learning_rate": 0.0002231596379307177,
      "loss": 2.9375,
      "step": 134198
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.8837709426879883,
      "learning_rate": 0.00022315568383217188,
      "loss": 3.083,
      "step": 134199
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.633185386657715,
      "learning_rate": 0.00022315172974791283,
      "loss": 3.1983,
      "step": 134200
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.2456772327423096,
      "learning_rate": 0.00022314777567794148,
      "loss": 3.0166,
      "step": 134201
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4780139923095703,
      "learning_rate": 0.00022314382162225827,
      "loss": 2.9367,
      "step": 134202
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.736860752105713,
      "learning_rate": 0.00022313986758086405,
      "loss": 2.9247,
      "step": 134203
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1229987144470215,
      "learning_rate": 0.00022313591355375967,
      "loss": 2.8212,
      "step": 134204
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8870785236358643,
      "learning_rate": 0.0002231319595409457,
      "loss": 3.1798,
      "step": 134205
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8877226114273071,
      "learning_rate": 0.00022312800554242295,
      "loss": 3.1281,
      "step": 134206
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3046438694000244,
      "learning_rate": 0.00022312405155819226,
      "loss": 3.0045,
      "step": 134207
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5677874088287354,
      "learning_rate": 0.0002231200975882542,
      "loss": 3.0265,
      "step": 134208
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0070865154266357,
      "learning_rate": 0.0002231161436326095,
      "loss": 3.0876,
      "step": 134209
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2393863201141357,
      "learning_rate": 0.00022311218969125904,
      "loss": 2.8947,
      "step": 134210
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4568846225738525,
      "learning_rate": 0.0002231082357642034,
      "loss": 2.9651,
      "step": 134211
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.154552936553955,
      "learning_rate": 0.0002231042818514434,
      "loss": 2.9655,
      "step": 134212
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3884775638580322,
      "learning_rate": 0.00022310032795297993,
      "loss": 2.764,
      "step": 134213
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.272264003753662,
      "learning_rate": 0.00022309637406881343,
      "loss": 2.933,
      "step": 134214
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7625443935394287,
      "learning_rate": 0.00022309242019894472,
      "loss": 3.0006,
      "step": 134215
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.409212827682495,
      "learning_rate": 0.00022308846634337462,
      "loss": 3.0717,
      "step": 134216
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2385551929473877,
      "learning_rate": 0.00022308451250210382,
      "loss": 2.8815,
      "step": 134217
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.444661855697632,
      "learning_rate": 0.00022308055867513305,
      "loss": 2.9351,
      "step": 134218
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.9478135108947754,
      "learning_rate": 0.0002230766048624632,
      "loss": 2.773,
      "step": 134219
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5509471893310547,
      "learning_rate": 0.00022307265106409472,
      "loss": 3.16,
      "step": 134220
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.626746416091919,
      "learning_rate": 0.0002230686972800285,
      "loss": 3.2369,
      "step": 134221
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8480396270751953,
      "learning_rate": 0.00022306474351026524,
      "loss": 2.8831,
      "step": 134222
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.049901247024536,
      "learning_rate": 0.0002230607897548057,
      "loss": 2.9829,
      "step": 134223
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.6754770278930664,
      "learning_rate": 0.0002230568360136506,
      "loss": 3.0674,
      "step": 134224
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.513583183288574,
      "learning_rate": 0.00022305288228680086,
      "loss": 3.2499,
      "step": 134225
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.075976610183716,
      "learning_rate": 0.00022304892857425687,
      "loss": 3.2124,
      "step": 134226
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9066429138183594,
      "learning_rate": 0.0002230449748760196,
      "loss": 2.9461,
      "step": 134227
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.351794481277466,
      "learning_rate": 0.00022304102119208966,
      "loss": 3.066,
      "step": 134228
    },
    {
      "epoch": 1.75,
      "grad_norm": 4.1269073486328125,
      "learning_rate": 0.00022303706752246785,
      "loss": 2.8925,
      "step": 134229
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.960092544555664,
      "learning_rate": 0.0002230331138671549,
      "loss": 2.9573,
      "step": 134230
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.025625228881836,
      "learning_rate": 0.00022302916022615168,
      "loss": 3.1593,
      "step": 134231
    },
    {
      "epoch": 1.75,
      "grad_norm": 4.610191345214844,
      "learning_rate": 0.00022302520659945872,
      "loss": 2.9974,
      "step": 134232
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1341214179992676,
      "learning_rate": 0.00022302125298707677,
      "loss": 2.7897,
      "step": 134233
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.066152572631836,
      "learning_rate": 0.00022301729938900664,
      "loss": 2.9513,
      "step": 134234
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3460984230041504,
      "learning_rate": 0.00022301334580524906,
      "loss": 3.0249,
      "step": 134235
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.7865850925445557,
      "learning_rate": 0.0002230093922358047,
      "loss": 2.8183,
      "step": 134236
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.685992479324341,
      "learning_rate": 0.00022300543868067447,
      "loss": 2.8878,
      "step": 134237
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5328941345214844,
      "learning_rate": 0.00022300148513985888,
      "loss": 2.855,
      "step": 134238
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1089181900024414,
      "learning_rate": 0.00022299753161335884,
      "loss": 2.8383,
      "step": 134239
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1926631927490234,
      "learning_rate": 0.00022299357810117495,
      "loss": 2.9068,
      "step": 134240
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2318451404571533,
      "learning_rate": 0.000222989624603308,
      "loss": 2.9382,
      "step": 134241
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9011403322219849,
      "learning_rate": 0.00022298567111975873,
      "loss": 2.8714,
      "step": 134242
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0506207942962646,
      "learning_rate": 0.00022298171765052798,
      "loss": 3.2541,
      "step": 134243
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.131293296813965,
      "learning_rate": 0.0002229777641956163,
      "loss": 2.8378,
      "step": 134244
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4502758979797363,
      "learning_rate": 0.00022297381075502448,
      "loss": 2.9532,
      "step": 134245
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.9600658416748047,
      "learning_rate": 0.0002229698573287534,
      "loss": 2.8313,
      "step": 134246
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9679726362228394,
      "learning_rate": 0.00022296590391680357,
      "loss": 2.8441,
      "step": 134247
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1112754344940186,
      "learning_rate": 0.00022296195051917586,
      "loss": 3.1538,
      "step": 134248
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.374559164047241,
      "learning_rate": 0.000222957997135871,
      "loss": 2.851,
      "step": 134249
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9206161499023438,
      "learning_rate": 0.00022295404376688967,
      "loss": 2.7064,
      "step": 134250
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2100720405578613,
      "learning_rate": 0.00022295009041223263,
      "loss": 3.0311,
      "step": 134251
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2841410636901855,
      "learning_rate": 0.0002229461370719006,
      "loss": 3.102,
      "step": 134252
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.427614450454712,
      "learning_rate": 0.00022294218374589438,
      "loss": 2.852,
      "step": 134253
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.6452629566192627,
      "learning_rate": 0.0002229382304342147,
      "loss": 3.1242,
      "step": 134254
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3628008365631104,
      "learning_rate": 0.0002229342771368622,
      "loss": 3.0523,
      "step": 134255
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.329352617263794,
      "learning_rate": 0.00022293032385383774,
      "loss": 2.9683,
      "step": 134256
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.9273712635040283,
      "learning_rate": 0.00022292637058514192,
      "loss": 2.9574,
      "step": 134257
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.4906082153320312,
      "learning_rate": 0.00022292241733077556,
      "loss": 2.7731,
      "step": 134258
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8790150880813599,
      "learning_rate": 0.00022291846409073934,
      "loss": 2.9564,
      "step": 134259
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.132859230041504,
      "learning_rate": 0.00022291451086503403,
      "loss": 2.9435,
      "step": 134260
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.918153762817383,
      "learning_rate": 0.00022291055765366046,
      "loss": 3.0896,
      "step": 134261
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.451547145843506,
      "learning_rate": 0.00022290660445661926,
      "loss": 2.9334,
      "step": 134262
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9157177209854126,
      "learning_rate": 0.00022290265127391116,
      "loss": 3.274,
      "step": 134263
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.228724956512451,
      "learning_rate": 0.00022289869810553683,
      "loss": 3.0642,
      "step": 134264
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.100158929824829,
      "learning_rate": 0.00022289474495149717,
      "loss": 2.9089,
      "step": 134265
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.4475889205932617,
      "learning_rate": 0.0002228907918117928,
      "loss": 2.9171,
      "step": 134266
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9394820928573608,
      "learning_rate": 0.0002228868386864245,
      "loss": 2.9215,
      "step": 134267
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.9726109504699707,
      "learning_rate": 0.00022288288557539309,
      "loss": 3.0969,
      "step": 134268
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.5515952110290527,
      "learning_rate": 0.00022287893247869905,
      "loss": 3.0928,
      "step": 134269
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0065155029296875,
      "learning_rate": 0.00022287497939634334,
      "loss": 2.9976,
      "step": 134270
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5012612342834473,
      "learning_rate": 0.00022287102632832662,
      "loss": 3.0636,
      "step": 134271
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.199061870574951,
      "learning_rate": 0.0002228670732746496,
      "loss": 3.1282,
      "step": 134272
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.603060483932495,
      "learning_rate": 0.00022286312023531304,
      "loss": 2.9694,
      "step": 134273
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9028643369674683,
      "learning_rate": 0.00022285916721031783,
      "loss": 2.9337,
      "step": 134274
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9723984003067017,
      "learning_rate": 0.00022285521419966445,
      "loss": 2.8812,
      "step": 134275
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0793139934539795,
      "learning_rate": 0.00022285126120335372,
      "loss": 3.0208,
      "step": 134276
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9677565097808838,
      "learning_rate": 0.00022284730822138638,
      "loss": 3.0622,
      "step": 134277
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.16426157951355,
      "learning_rate": 0.00022284335525376322,
      "loss": 2.8976,
      "step": 134278
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.288978338241577,
      "learning_rate": 0.0002228394023004849,
      "loss": 2.9473,
      "step": 134279
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.044586658477783,
      "learning_rate": 0.00022283544936155237,
      "loss": 2.8686,
      "step": 134280
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.845133662223816,
      "learning_rate": 0.000222831496436966,
      "loss": 2.7849,
      "step": 134281
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.7657504081726074,
      "learning_rate": 0.00022282754352672673,
      "loss": 2.976,
      "step": 134282
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3838002681732178,
      "learning_rate": 0.0002228235906308353,
      "loss": 2.9207,
      "step": 134283
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8547301292419434,
      "learning_rate": 0.0002228196377492924,
      "loss": 3.0631,
      "step": 134284
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.396284580230713,
      "learning_rate": 0.00022281568488209875,
      "loss": 2.8774,
      "step": 134285
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0935208797454834,
      "learning_rate": 0.0002228117320292553,
      "loss": 3.0488,
      "step": 134286
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.717655897140503,
      "learning_rate": 0.00022280777919076245,
      "loss": 2.8919,
      "step": 134287
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.214463949203491,
      "learning_rate": 0.00022280382636662112,
      "loss": 2.9239,
      "step": 134288
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.456568956375122,
      "learning_rate": 0.00022279987355683202,
      "loss": 2.7554,
      "step": 134289
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.296844005584717,
      "learning_rate": 0.00022279592076139586,
      "loss": 3.1791,
      "step": 134290
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.9265036582946777,
      "learning_rate": 0.0002227919679803134,
      "loss": 2.9775,
      "step": 134291
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1050243377685547,
      "learning_rate": 0.00022278801521358551,
      "loss": 2.9215,
      "step": 134292
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.127027988433838,
      "learning_rate": 0.00022278406246121265,
      "loss": 2.9946,
      "step": 134293
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.064750909805298,
      "learning_rate": 0.00022278010972319568,
      "loss": 2.7782,
      "step": 134294
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.251377582550049,
      "learning_rate": 0.00022277615699953533,
      "loss": 2.9511,
      "step": 134295
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.089233875274658,
      "learning_rate": 0.00022277220429023237,
      "loss": 2.931,
      "step": 134296
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.097939968109131,
      "learning_rate": 0.00022276825159528755,
      "loss": 2.9465,
      "step": 134297
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1790316104888916,
      "learning_rate": 0.00022276429891470164,
      "loss": 3.1515,
      "step": 134298
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9149836301803589,
      "learning_rate": 0.0002227603462484752,
      "loss": 3.013,
      "step": 134299
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7933478355407715,
      "learning_rate": 0.00022275639359660906,
      "loss": 2.8543,
      "step": 134300
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.9203269481658936,
      "learning_rate": 0.000222752440959104,
      "loss": 3.0149,
      "step": 134301
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.7075512409210205,
      "learning_rate": 0.0002227484883359607,
      "loss": 2.8384,
      "step": 134302
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8358439207077026,
      "learning_rate": 0.00022274453572717988,
      "loss": 3.0577,
      "step": 134303
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.439876079559326,
      "learning_rate": 0.00022274058313276248,
      "loss": 2.9659,
      "step": 134304
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9446099996566772,
      "learning_rate": 0.0002227366305527089,
      "loss": 3.0251,
      "step": 134305
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3661715984344482,
      "learning_rate": 0.0002227326779870201,
      "loss": 3.0412,
      "step": 134306
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7673529386520386,
      "learning_rate": 0.0002227287254356967,
      "loss": 2.9177,
      "step": 134307
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.323333978652954,
      "learning_rate": 0.0002227247728987395,
      "loss": 2.9099,
      "step": 134308
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.370743989944458,
      "learning_rate": 0.00022272082037614924,
      "loss": 2.9737,
      "step": 134309
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.7909796237945557,
      "learning_rate": 0.00022271686786792672,
      "loss": 2.8942,
      "step": 134310
    },
    {
      "epoch": 1.75,
      "grad_norm": 4.779947757720947,
      "learning_rate": 0.0002227129153740725,
      "loss": 2.8639,
      "step": 134311
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.638927936553955,
      "learning_rate": 0.00022270896289458742,
      "loss": 3.1205,
      "step": 134312
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.382050037384033,
      "learning_rate": 0.0002227050104294722,
      "loss": 2.8875,
      "step": 134313
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.2709076404571533,
      "learning_rate": 0.00022270105797872757,
      "loss": 2.7845,
      "step": 134314
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.667708158493042,
      "learning_rate": 0.00022269710554235426,
      "loss": 2.9354,
      "step": 134315
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.847553253173828,
      "learning_rate": 0.00022269315312035314,
      "loss": 2.6992,
      "step": 134316
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.0804946422576904,
      "learning_rate": 0.00022268920071272474,
      "loss": 3.1996,
      "step": 134317
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.078964948654175,
      "learning_rate": 0.00022268524831946983,
      "loss": 3.0482,
      "step": 134318
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.756265640258789,
      "learning_rate": 0.00022268129594058922,
      "loss": 2.921,
      "step": 134319
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8571624755859375,
      "learning_rate": 0.0002226773435760836,
      "loss": 2.8873,
      "step": 134320
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.526669979095459,
      "learning_rate": 0.00022267339122595368,
      "loss": 3.0658,
      "step": 134321
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.632546901702881,
      "learning_rate": 0.00022266943889020027,
      "loss": 2.9981,
      "step": 134322
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1292214393615723,
      "learning_rate": 0.00022266548656882417,
      "loss": 3.2258,
      "step": 134323
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.103472948074341,
      "learning_rate": 0.00022266153426182602,
      "loss": 2.9286,
      "step": 134324
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.246246576309204,
      "learning_rate": 0.00022265758196920643,
      "loss": 3.1401,
      "step": 134325
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.036684989929199,
      "learning_rate": 0.0002226536296909663,
      "loss": 3.2017,
      "step": 134326
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9861618280410767,
      "learning_rate": 0.0002226496774271063,
      "loss": 3.0255,
      "step": 134327
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.595492362976074,
      "learning_rate": 0.0002226457251776272,
      "loss": 3.0111,
      "step": 134328
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0178544521331787,
      "learning_rate": 0.00022264177294252976,
      "loss": 3.0218,
      "step": 134329
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.152061700820923,
      "learning_rate": 0.00022263782072181462,
      "loss": 2.9694,
      "step": 134330
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1872570514678955,
      "learning_rate": 0.00022263386851548267,
      "loss": 2.9303,
      "step": 134331
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.175229549407959,
      "learning_rate": 0.00022262991632353442,
      "loss": 3.1345,
      "step": 134332
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8989334106445312,
      "learning_rate": 0.00022262596414597075,
      "loss": 2.837,
      "step": 134333
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.215357542037964,
      "learning_rate": 0.00022262201198279237,
      "loss": 2.9185,
      "step": 134334
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.727238178253174,
      "learning_rate": 0.00022261805983400012,
      "loss": 3.1142,
      "step": 134335
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4771955013275146,
      "learning_rate": 0.00022261410769959452,
      "loss": 3.0378,
      "step": 134336
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.422274351119995,
      "learning_rate": 0.00022261015557957647,
      "loss": 3.2174,
      "step": 134337
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8268824815750122,
      "learning_rate": 0.00022260620347394668,
      "loss": 2.9559,
      "step": 134338
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.464052677154541,
      "learning_rate": 0.00022260225138270577,
      "loss": 2.6454,
      "step": 134339
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.6686959266662598,
      "learning_rate": 0.0002225982993058546,
      "loss": 2.6817,
      "step": 134340
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0588605403900146,
      "learning_rate": 0.00022259434724339395,
      "loss": 3.0638,
      "step": 134341
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.8978641033172607,
      "learning_rate": 0.00022259039519532438,
      "loss": 2.9491,
      "step": 134342
    },
    {
      "epoch": 1.75,
      "grad_norm": 4.171449661254883,
      "learning_rate": 0.00022258644316164674,
      "loss": 3.0896,
      "step": 134343
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.297706127166748,
      "learning_rate": 0.0002225824911423617,
      "loss": 3.0554,
      "step": 134344
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3726179599761963,
      "learning_rate": 0.0002225785391374701,
      "loss": 2.929,
      "step": 134345
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3242664337158203,
      "learning_rate": 0.00022257458714697258,
      "loss": 2.9886,
      "step": 134346
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.501300811767578,
      "learning_rate": 0.00022257063517086995,
      "loss": 3.1302,
      "step": 134347
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.786664605140686,
      "learning_rate": 0.00022256668320916288,
      "loss": 2.9582,
      "step": 134348
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9847502708435059,
      "learning_rate": 0.00022256273126185206,
      "loss": 2.9772,
      "step": 134349
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.377866268157959,
      "learning_rate": 0.00022255877932893833,
      "loss": 3.3391,
      "step": 134350
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.9802238941192627,
      "learning_rate": 0.00022255482741042237,
      "loss": 2.905,
      "step": 134351
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1499404907226562,
      "learning_rate": 0.00022255087550630495,
      "loss": 3.0223,
      "step": 134352
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8799753189086914,
      "learning_rate": 0.0002225469236165869,
      "loss": 2.9535,
      "step": 134353
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.6846556663513184,
      "learning_rate": 0.0002225429717412687,
      "loss": 2.8778,
      "step": 134354
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.049402952194214,
      "learning_rate": 0.00022253901988035122,
      "loss": 3.0828,
      "step": 134355
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.91388738155365,
      "learning_rate": 0.00022253506803383518,
      "loss": 3.0901,
      "step": 134356
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2958171367645264,
      "learning_rate": 0.00022253111620172138,
      "loss": 3.0781,
      "step": 134357
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.495309829711914,
      "learning_rate": 0.0002225271643840105,
      "loss": 2.893,
      "step": 134358
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.07484769821167,
      "learning_rate": 0.0002225232125807034,
      "loss": 3.1324,
      "step": 134359
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.264298677444458,
      "learning_rate": 0.00022251926079180058,
      "loss": 2.8261,
      "step": 134360
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8995546102523804,
      "learning_rate": 0.00022251530901730286,
      "loss": 2.9784,
      "step": 134361
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1171987056732178,
      "learning_rate": 0.00022251135725721104,
      "loss": 3.1662,
      "step": 134362
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9766979217529297,
      "learning_rate": 0.0002225074055115258,
      "loss": 2.9074,
      "step": 134363
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9899466037750244,
      "learning_rate": 0.00022250345378024792,
      "loss": 3.0358,
      "step": 134364
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3054451942443848,
      "learning_rate": 0.00022249950206337823,
      "loss": 3.091,
      "step": 134365
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.259387254714966,
      "learning_rate": 0.0002224955503609172,
      "loss": 3.047,
      "step": 134366
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.805943250656128,
      "learning_rate": 0.00022249159867286574,
      "loss": 3.1465,
      "step": 134367
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1438913345336914,
      "learning_rate": 0.0002224876469992245,
      "loss": 3.1867,
      "step": 134368
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.0119919776916504,
      "learning_rate": 0.00022248369533999434,
      "loss": 2.7548,
      "step": 134369
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3338522911071777,
      "learning_rate": 0.0002224797436951759,
      "loss": 2.8119,
      "step": 134370
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5614075660705566,
      "learning_rate": 0.00022247579206477005,
      "loss": 3.0154,
      "step": 134371
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.828972578048706,
      "learning_rate": 0.0002224718404487773,
      "loss": 2.9511,
      "step": 134372
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.7107770442962646,
      "learning_rate": 0.0002224678888471985,
      "loss": 3.3028,
      "step": 134373
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8917882442474365,
      "learning_rate": 0.00022246393726003434,
      "loss": 3.0709,
      "step": 134374
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.7218117713928223,
      "learning_rate": 0.00022245998568728564,
      "loss": 3.0114,
      "step": 134375
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2999653816223145,
      "learning_rate": 0.0002224560341289531,
      "loss": 2.9914,
      "step": 134376
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9944713115692139,
      "learning_rate": 0.00022245208258503754,
      "loss": 3.067,
      "step": 134377
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3334083557128906,
      "learning_rate": 0.0002224481310555395,
      "loss": 2.988,
      "step": 134378
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1391425132751465,
      "learning_rate": 0.0002224441795404598,
      "loss": 2.8751,
      "step": 134379
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1011064052581787,
      "learning_rate": 0.00022244022803979925,
      "loss": 3.0032,
      "step": 134380
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.117832899093628,
      "learning_rate": 0.0002224362765535584,
      "loss": 2.8931,
      "step": 134381
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.271569013595581,
      "learning_rate": 0.00022243232508173825,
      "loss": 3.1547,
      "step": 134382
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5830671787261963,
      "learning_rate": 0.00022242837362433944,
      "loss": 2.8299,
      "step": 134383
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.143613815307617,
      "learning_rate": 0.00022242442218136255,
      "loss": 3.1258,
      "step": 134384
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.1919023990631104,
      "learning_rate": 0.00022242047075280843,
      "loss": 3.112,
      "step": 134385
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7973995208740234,
      "learning_rate": 0.0002224165193386778,
      "loss": 2.9307,
      "step": 134386
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.038724899291992,
      "learning_rate": 0.0002224125679389714,
      "loss": 3.0655,
      "step": 134387
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.7051336765289307,
      "learning_rate": 0.00022240861655369,
      "loss": 2.9474,
      "step": 134388
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.862424373626709,
      "learning_rate": 0.00022240466518283426,
      "loss": 2.9497,
      "step": 134389
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9953820705413818,
      "learning_rate": 0.00022240071382640512,
      "loss": 3.0735,
      "step": 134390
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.115945339202881,
      "learning_rate": 0.00022239676248440298,
      "loss": 3.2645,
      "step": 134391
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4345576763153076,
      "learning_rate": 0.0002223928111568288,
      "loss": 3.0936,
      "step": 134392
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1983158588409424,
      "learning_rate": 0.00022238885984368325,
      "loss": 3.0791,
      "step": 134393
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.236179828643799,
      "learning_rate": 0.00022238490854496705,
      "loss": 2.9598,
      "step": 134394
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.447436571121216,
      "learning_rate": 0.00022238095726068098,
      "loss": 2.6071,
      "step": 134395
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.114476442337036,
      "learning_rate": 0.0002223770059908259,
      "loss": 2.8901,
      "step": 134396
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8391525745391846,
      "learning_rate": 0.00022237305473540224,
      "loss": 2.972,
      "step": 134397
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7562720775604248,
      "learning_rate": 0.00022236910349441092,
      "loss": 2.9739,
      "step": 134398
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.367030620574951,
      "learning_rate": 0.0002223651522678526,
      "loss": 2.6149,
      "step": 134399
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1129376888275146,
      "learning_rate": 0.00022236120105572808,
      "loss": 3.2341,
      "step": 134400
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.158243417739868,
      "learning_rate": 0.0002223572498580381,
      "loss": 3.1739,
      "step": 134401
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.414402961730957,
      "learning_rate": 0.0002223532986747835,
      "loss": 3.0189,
      "step": 134402
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.227048635482788,
      "learning_rate": 0.00022234934750596473,
      "loss": 2.9482,
      "step": 134403
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3987786769866943,
      "learning_rate": 0.00022234539635158273,
      "loss": 2.9471,
      "step": 134404
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5903589725494385,
      "learning_rate": 0.00022234144521163818,
      "loss": 2.9576,
      "step": 134405
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.191615343093872,
      "learning_rate": 0.00022233749408613179,
      "loss": 2.8482,
      "step": 134406
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3613040447235107,
      "learning_rate": 0.00022233354297506434,
      "loss": 2.9121,
      "step": 134407
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.9070889949798584,
      "learning_rate": 0.00022232959187843668,
      "loss": 2.7323,
      "step": 134408
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2467234134674072,
      "learning_rate": 0.0002223256407962493,
      "loss": 2.8855,
      "step": 134409
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.073342800140381,
      "learning_rate": 0.00022232168972850303,
      "loss": 2.9889,
      "step": 134410
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0061428546905518,
      "learning_rate": 0.00022231773867519862,
      "loss": 3.2078,
      "step": 134411
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.626612901687622,
      "learning_rate": 0.00022231378763633682,
      "loss": 2.8307,
      "step": 134412
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1056292057037354,
      "learning_rate": 0.00022230983661191835,
      "loss": 2.9939,
      "step": 134413
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.843044638633728,
      "learning_rate": 0.00022230588560194403,
      "loss": 3.1168,
      "step": 134414
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9139933586120605,
      "learning_rate": 0.00022230193460641442,
      "loss": 3.1213,
      "step": 134415
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9821255207061768,
      "learning_rate": 0.00022229798362533045,
      "loss": 3.1082,
      "step": 134416
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9194995164871216,
      "learning_rate": 0.00022229403265869267,
      "loss": 3.1471,
      "step": 134417
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.6184234619140625,
      "learning_rate": 0.00022229008170650185,
      "loss": 3.1854,
      "step": 134418
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.194265604019165,
      "learning_rate": 0.0002222861307687588,
      "loss": 3.0096,
      "step": 134419
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0946807861328125,
      "learning_rate": 0.0002222821798454643,
      "loss": 3.0403,
      "step": 134420
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.479485034942627,
      "learning_rate": 0.00022227822893661892,
      "loss": 2.9578,
      "step": 134421
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.70889949798584,
      "learning_rate": 0.00022227427804222356,
      "loss": 3.1738,
      "step": 134422
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7700501680374146,
      "learning_rate": 0.00022227032716227887,
      "loss": 2.9805,
      "step": 134423
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.305116653442383,
      "learning_rate": 0.00022226637629678555,
      "loss": 2.9819,
      "step": 134424
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.8300392627716064,
      "learning_rate": 0.0002222624254457444,
      "loss": 3.1733,
      "step": 134425
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.7154195308685303,
      "learning_rate": 0.00022225847460915615,
      "loss": 3.0138,
      "step": 134426
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.8290305137634277,
      "learning_rate": 0.00022225452378702148,
      "loss": 2.9613,
      "step": 134427
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.305515766143799,
      "learning_rate": 0.00022225057297934115,
      "loss": 2.7315,
      "step": 134428
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.7488911151885986,
      "learning_rate": 0.00022224662218611591,
      "loss": 3.1131,
      "step": 134429
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.6003551483154297,
      "learning_rate": 0.00022224267140734654,
      "loss": 2.9746,
      "step": 134430
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0939996242523193,
      "learning_rate": 0.00022223872064303373,
      "loss": 2.8569,
      "step": 134431
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1495280265808105,
      "learning_rate": 0.00022223476989317818,
      "loss": 3.0881,
      "step": 134432
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.8937313556671143,
      "learning_rate": 0.00022223081915778066,
      "loss": 2.7834,
      "step": 134433
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2982566356658936,
      "learning_rate": 0.00022222686843684187,
      "loss": 2.9627,
      "step": 134434
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0832231044769287,
      "learning_rate": 0.00022222291773036257,
      "loss": 2.9971,
      "step": 134435
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3950459957122803,
      "learning_rate": 0.00022221896703834352,
      "loss": 2.9654,
      "step": 134436
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0771305561065674,
      "learning_rate": 0.00022221501636078545,
      "loss": 3.1032,
      "step": 134437
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1375622749328613,
      "learning_rate": 0.00022221106569768911,
      "loss": 3.1067,
      "step": 134438
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.161836862564087,
      "learning_rate": 0.00022220711504905514,
      "loss": 3.1374,
      "step": 134439
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.110089063644409,
      "learning_rate": 0.00022220316441488435,
      "loss": 3.0775,
      "step": 134440
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.099961042404175,
      "learning_rate": 0.00022219921379517744,
      "loss": 2.9736,
      "step": 134441
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.157975673675537,
      "learning_rate": 0.00022219526318993516,
      "loss": 3.0268,
      "step": 134442
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.156477928161621,
      "learning_rate": 0.00022219131259915826,
      "loss": 2.9972,
      "step": 134443
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0082015991210938,
      "learning_rate": 0.00022218736202284756,
      "loss": 3.1694,
      "step": 134444
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5480833053588867,
      "learning_rate": 0.00022218341146100363,
      "loss": 2.9589,
      "step": 134445
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9173860549926758,
      "learning_rate": 0.00022217946091362723,
      "loss": 2.9928,
      "step": 134446
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8212311267852783,
      "learning_rate": 0.00022217551038071915,
      "loss": 2.9509,
      "step": 134447
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.149395704269409,
      "learning_rate": 0.0002221715598622801,
      "loss": 2.8627,
      "step": 134448
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3491921424865723,
      "learning_rate": 0.00022216760935831083,
      "loss": 3.1429,
      "step": 134449
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.0256736278533936,
      "learning_rate": 0.00022216365886881223,
      "loss": 2.8356,
      "step": 134450
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9658786058425903,
      "learning_rate": 0.00022215970839378472,
      "loss": 3.0915,
      "step": 134451
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9630039930343628,
      "learning_rate": 0.00022215575793322918,
      "loss": 3.0931,
      "step": 134452
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1061623096466064,
      "learning_rate": 0.00022215180748714634,
      "loss": 2.6806,
      "step": 134453
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5541388988494873,
      "learning_rate": 0.00022214785705553699,
      "loss": 3.1891,
      "step": 134454
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.990857720375061,
      "learning_rate": 0.0002221439066384018,
      "loss": 2.997,
      "step": 134455
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2285051345825195,
      "learning_rate": 0.00022213995623574154,
      "loss": 3.0595,
      "step": 134456
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.337477207183838,
      "learning_rate": 0.00022213600584755703,
      "loss": 3.0711,
      "step": 134457
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4333550930023193,
      "learning_rate": 0.00022213205547384882,
      "loss": 2.7371,
      "step": 134458
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9448809623718262,
      "learning_rate": 0.0002221281051146177,
      "loss": 3.012,
      "step": 134459
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.348240852355957,
      "learning_rate": 0.00022212415476986444,
      "loss": 2.9618,
      "step": 134460
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0638277530670166,
      "learning_rate": 0.0002221202044395898,
      "loss": 2.9299,
      "step": 134461
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.6040618419647217,
      "learning_rate": 0.00022211625412379445,
      "loss": 2.9627,
      "step": 134462
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3319642543792725,
      "learning_rate": 0.00022211230382247928,
      "loss": 3.005,
      "step": 134463
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.477674961090088,
      "learning_rate": 0.0002221083535356448,
      "loss": 3.1265,
      "step": 134464
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3811933994293213,
      "learning_rate": 0.00022210440326329183,
      "loss": 2.9556,
      "step": 134465
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5228285789489746,
      "learning_rate": 0.00022210045300542113,
      "loss": 3.098,
      "step": 134466
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2906532287597656,
      "learning_rate": 0.00022209650276203342,
      "loss": 2.8894,
      "step": 134467
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2063722610473633,
      "learning_rate": 0.00022209255253312945,
      "loss": 2.9519,
      "step": 134468
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.7878167629241943,
      "learning_rate": 0.00022208860231871005,
      "loss": 3.1023,
      "step": 134469
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.803992986679077,
      "learning_rate": 0.00022208465211877578,
      "loss": 3.114,
      "step": 134470
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2943124771118164,
      "learning_rate": 0.0002220807019333274,
      "loss": 2.9806,
      "step": 134471
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.356119394302368,
      "learning_rate": 0.0002220767517623657,
      "loss": 2.7839,
      "step": 134472
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.226771116256714,
      "learning_rate": 0.0002220728016058914,
      "loss": 2.8506,
      "step": 134473
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.49369215965271,
      "learning_rate": 0.0002220688514639052,
      "loss": 3.1624,
      "step": 134474
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.749229907989502,
      "learning_rate": 0.00022206490133640808,
      "loss": 3.2738,
      "step": 134475
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5544698238372803,
      "learning_rate": 0.0002220609512234004,
      "loss": 2.9822,
      "step": 134476
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2544267177581787,
      "learning_rate": 0.00022205700112488304,
      "loss": 2.8382,
      "step": 134477
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.247217893600464,
      "learning_rate": 0.00022205305104085673,
      "loss": 3.2124,
      "step": 134478
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.184476137161255,
      "learning_rate": 0.0002220491009713223,
      "loss": 2.7705,
      "step": 134479
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.204206943511963,
      "learning_rate": 0.00022204515091628037,
      "loss": 2.739,
      "step": 134480
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1634204387664795,
      "learning_rate": 0.00022204120087573187,
      "loss": 2.9794,
      "step": 134481
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1758689880371094,
      "learning_rate": 0.00022203725084967725,
      "loss": 2.8621,
      "step": 134482
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1083834171295166,
      "learning_rate": 0.00022203330083811734,
      "loss": 2.8663,
      "step": 134483
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1067097187042236,
      "learning_rate": 0.00022202935084105293,
      "loss": 3.2823,
      "step": 134484
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7398641109466553,
      "learning_rate": 0.00022202540085848474,
      "loss": 2.8922,
      "step": 134485
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4943809509277344,
      "learning_rate": 0.00022202145089041353,
      "loss": 2.9964,
      "step": 134486
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1104984283447266,
      "learning_rate": 0.0002220175009368401,
      "loss": 2.8142,
      "step": 134487
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.351128101348877,
      "learning_rate": 0.00022201355099776496,
      "loss": 2.9407,
      "step": 134488
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.597140312194824,
      "learning_rate": 0.00022200960107318898,
      "loss": 3.2194,
      "step": 134489
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.422478199005127,
      "learning_rate": 0.0002220056511631129,
      "loss": 3.1358,
      "step": 134490
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.773188591003418,
      "learning_rate": 0.0002220017012675374,
      "loss": 2.9653,
      "step": 134491
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2323832511901855,
      "learning_rate": 0.0002219977513864633,
      "loss": 3.1053,
      "step": 134492
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.636082410812378,
      "learning_rate": 0.0002219938015198914,
      "loss": 3.2019,
      "step": 134493
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1076910495758057,
      "learning_rate": 0.0002219898516678222,
      "loss": 2.9854,
      "step": 134494
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5620689392089844,
      "learning_rate": 0.00022198590183025656,
      "loss": 2.9547,
      "step": 134495
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8656079769134521,
      "learning_rate": 0.0002219819520071952,
      "loss": 2.9608,
      "step": 134496
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0946292877197266,
      "learning_rate": 0.0002219780021986389,
      "loss": 2.9914,
      "step": 134497
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.127105712890625,
      "learning_rate": 0.00022197405240458835,
      "loss": 3.2121,
      "step": 134498
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2965855598449707,
      "learning_rate": 0.00022197010262504433,
      "loss": 3.2331,
      "step": 134499
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2937283515930176,
      "learning_rate": 0.00022196615286000757,
      "loss": 2.9412,
      "step": 134500
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.374995470046997,
      "learning_rate": 0.0002219622031094787,
      "loss": 2.708,
      "step": 134501
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.231139659881592,
      "learning_rate": 0.00022195825337345854,
      "loss": 2.9653,
      "step": 134502
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.679643392562866,
      "learning_rate": 0.0002219543036519478,
      "loss": 3.1913,
      "step": 134503
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8596776723861694,
      "learning_rate": 0.00022195035394494727,
      "loss": 3.1873,
      "step": 134504
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2568199634552,
      "learning_rate": 0.00022194640425245767,
      "loss": 3.0491,
      "step": 134505
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.1948251724243164,
      "learning_rate": 0.00022194245457447963,
      "loss": 2.7845,
      "step": 134506
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3118255138397217,
      "learning_rate": 0.00022193850491101404,
      "loss": 3.1862,
      "step": 134507
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.090981960296631,
      "learning_rate": 0.00022193455526206148,
      "loss": 2.843,
      "step": 134508
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.8218460083007812,
      "learning_rate": 0.00022193060562762277,
      "loss": 2.7651,
      "step": 134509
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1065330505371094,
      "learning_rate": 0.00022192665600769866,
      "loss": 3.3189,
      "step": 134510
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0283796787261963,
      "learning_rate": 0.0002219227064022899,
      "loss": 3.044,
      "step": 134511
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9577391147613525,
      "learning_rate": 0.00022191875681139712,
      "loss": 2.9154,
      "step": 134512
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0296237468719482,
      "learning_rate": 0.0002219148072350211,
      "loss": 3.0111,
      "step": 134513
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.688614845275879,
      "learning_rate": 0.0002219108576731626,
      "loss": 3.026,
      "step": 134514
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3484249114990234,
      "learning_rate": 0.00022190690812582244,
      "loss": 3.2859,
      "step": 134515
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9529814720153809,
      "learning_rate": 0.00022190295859300115,
      "loss": 2.9875,
      "step": 134516
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0192172527313232,
      "learning_rate": 0.00022189900907469963,
      "loss": 2.9034,
      "step": 134517
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.717731475830078,
      "learning_rate": 0.00022189505957091857,
      "loss": 3.0696,
      "step": 134518
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.475186347961426,
      "learning_rate": 0.00022189111008165865,
      "loss": 3.0745,
      "step": 134519
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.578502655029297,
      "learning_rate": 0.00022188716060692062,
      "loss": 2.8053,
      "step": 134520
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.251417875289917,
      "learning_rate": 0.00022188321114670527,
      "loss": 2.8205,
      "step": 134521
    },
    {
      "epoch": 1.75,
      "grad_norm": 4.62555456161499,
      "learning_rate": 0.00022187926170101337,
      "loss": 2.9982,
      "step": 134522
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.252101421356201,
      "learning_rate": 0.00022187531226984555,
      "loss": 3.0834,
      "step": 134523
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0724267959594727,
      "learning_rate": 0.0002218713628532026,
      "loss": 2.9231,
      "step": 134524
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.095879554748535,
      "learning_rate": 0.00022186741345108519,
      "loss": 2.9016,
      "step": 134525
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1691107749938965,
      "learning_rate": 0.0002218634640634941,
      "loss": 2.9996,
      "step": 134526
    },
    {
      "epoch": 1.75,
      "grad_norm": 5.870037078857422,
      "learning_rate": 0.00022185951469043006,
      "loss": 2.9885,
      "step": 134527
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.497119188308716,
      "learning_rate": 0.00022185556533189382,
      "loss": 2.8632,
      "step": 134528
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7993019819259644,
      "learning_rate": 0.00022185161598788617,
      "loss": 3.0709,
      "step": 134529
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.841352939605713,
      "learning_rate": 0.0002218476666584078,
      "loss": 3.1568,
      "step": 134530
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.0622363090515137,
      "learning_rate": 0.00022184371734345934,
      "loss": 3.3714,
      "step": 134531
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.619140625,
      "learning_rate": 0.00022183976804304162,
      "loss": 2.9471,
      "step": 134532
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.8520805835723877,
      "learning_rate": 0.00022183581875715537,
      "loss": 3.1004,
      "step": 134533
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.449387311935425,
      "learning_rate": 0.0002218318694858013,
      "loss": 2.9749,
      "step": 134534
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1926684379577637,
      "learning_rate": 0.00022182792022898014,
      "loss": 3.1288,
      "step": 134535
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4389400482177734,
      "learning_rate": 0.0002218239709866928,
      "loss": 3.1493,
      "step": 134536
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3807361125946045,
      "learning_rate": 0.00022182002175893972,
      "loss": 2.8337,
      "step": 134537
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7983508110046387,
      "learning_rate": 0.0002218160725457218,
      "loss": 2.8429,
      "step": 134538
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1249451637268066,
      "learning_rate": 0.00022181212334703972,
      "loss": 3.0034,
      "step": 134539
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.600703239440918,
      "learning_rate": 0.00022180817416289422,
      "loss": 3.056,
      "step": 134540
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.9937517642974854,
      "learning_rate": 0.0002218042249932861,
      "loss": 2.7562,
      "step": 134541
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9939775466918945,
      "learning_rate": 0.00022180027583821617,
      "loss": 3.03,
      "step": 134542
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.0344831943511963,
      "learning_rate": 0.00022179632669768493,
      "loss": 3.1517,
      "step": 134543
    },
    {
      "epoch": 1.75,
      "grad_norm": 4.647885322570801,
      "learning_rate": 0.00022179237757169318,
      "loss": 2.9818,
      "step": 134544
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9238883256912231,
      "learning_rate": 0.00022178842846024173,
      "loss": 3.0859,
      "step": 134545
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.8964316844940186,
      "learning_rate": 0.00022178447936333133,
      "loss": 2.9763,
      "step": 134546
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.760840654373169,
      "learning_rate": 0.00022178053028096262,
      "loss": 3.1357,
      "step": 134547
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.629004716873169,
      "learning_rate": 0.00022177658121313654,
      "loss": 2.9252,
      "step": 134548
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1033878326416016,
      "learning_rate": 0.0002217726321598535,
      "loss": 2.9876,
      "step": 134549
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8245160579681396,
      "learning_rate": 0.00022176868312111445,
      "loss": 3.0222,
      "step": 134550
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1414506435394287,
      "learning_rate": 0.00022176473409692008,
      "loss": 2.9118,
      "step": 134551
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.123878240585327,
      "learning_rate": 0.00022176078508727106,
      "loss": 2.9664,
      "step": 134552
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.6887707710266113,
      "learning_rate": 0.00022175683609216827,
      "loss": 3.1892,
      "step": 134553
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8588911294937134,
      "learning_rate": 0.00022175288711161244,
      "loss": 3.184,
      "step": 134554
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.9000844955444336,
      "learning_rate": 0.00022174893814560413,
      "loss": 2.5949,
      "step": 134555
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.209911823272705,
      "learning_rate": 0.00022174498919414414,
      "loss": 2.9781,
      "step": 134556
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7131985425949097,
      "learning_rate": 0.00022174104025723322,
      "loss": 3.0048,
      "step": 134557
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8957470655441284,
      "learning_rate": 0.00022173709133487216,
      "loss": 3.1405,
      "step": 134558
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.0179991722106934,
      "learning_rate": 0.00022173314242706163,
      "loss": 3.1543,
      "step": 134559
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3612828254699707,
      "learning_rate": 0.00022172919353380253,
      "loss": 2.9807,
      "step": 134560
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.184579372406006,
      "learning_rate": 0.0002217252446550953,
      "loss": 2.983,
      "step": 134561
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.566878318786621,
      "learning_rate": 0.0002217212957909408,
      "loss": 2.9677,
      "step": 134562
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4164633750915527,
      "learning_rate": 0.00022171734694133985,
      "loss": 2.833,
      "step": 134563
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9541606903076172,
      "learning_rate": 0.00022171339810629309,
      "loss": 2.9988,
      "step": 134564
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.003898859024048,
      "learning_rate": 0.0002217094492858013,
      "loss": 2.9041,
      "step": 134565
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.42257022857666,
      "learning_rate": 0.00022170550047986533,
      "loss": 3.1004,
      "step": 134566
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4273598194122314,
      "learning_rate": 0.00022170155168848565,
      "loss": 3.0962,
      "step": 134567
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3406777381896973,
      "learning_rate": 0.00022169760291166311,
      "loss": 2.7642,
      "step": 134568
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1265156269073486,
      "learning_rate": 0.0002216936541493985,
      "loss": 2.9257,
      "step": 134569
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3020119667053223,
      "learning_rate": 0.00022168970540169247,
      "loss": 2.7713,
      "step": 134570
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.342200756072998,
      "learning_rate": 0.00022168575666854583,
      "loss": 3.1404,
      "step": 134571
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0581254959106445,
      "learning_rate": 0.00022168180794995943,
      "loss": 2.9952,
      "step": 134572
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1265480518341064,
      "learning_rate": 0.00022167785924593372,
      "loss": 3.1358,
      "step": 134573
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9883394241333008,
      "learning_rate": 0.00022167391055646956,
      "loss": 2.9653,
      "step": 134574
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1587913036346436,
      "learning_rate": 0.00022166996188156774,
      "loss": 3.0365,
      "step": 134575
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0490574836730957,
      "learning_rate": 0.0002216660132212289,
      "loss": 3.0042,
      "step": 134576
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3203020095825195,
      "learning_rate": 0.00022166206457545384,
      "loss": 2.8503,
      "step": 134577
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.741113543510437,
      "learning_rate": 0.00022165811594424342,
      "loss": 3.1006,
      "step": 134578
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.259514093399048,
      "learning_rate": 0.00022165416732759812,
      "loss": 3.0203,
      "step": 134579
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7582824230194092,
      "learning_rate": 0.00022165021872551878,
      "loss": 2.826,
      "step": 134580
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.006164789199829,
      "learning_rate": 0.00022164627013800613,
      "loss": 3.1914,
      "step": 134581
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7859541177749634,
      "learning_rate": 0.00022164232156506093,
      "loss": 3.0443,
      "step": 134582
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.165081262588501,
      "learning_rate": 0.00022163837300668388,
      "loss": 2.7922,
      "step": 134583
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8675564527511597,
      "learning_rate": 0.00022163442446287583,
      "loss": 2.6785,
      "step": 134584
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9944710731506348,
      "learning_rate": 0.0002216304759336374,
      "loss": 3.1055,
      "step": 134585
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9115573167800903,
      "learning_rate": 0.00022162652741896928,
      "loss": 2.8158,
      "step": 134586
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.244502305984497,
      "learning_rate": 0.00022162257891887227,
      "loss": 3.0016,
      "step": 134587
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0708131790161133,
      "learning_rate": 0.00022161863043334713,
      "loss": 2.7752,
      "step": 134588
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3542239665985107,
      "learning_rate": 0.00022161468196239454,
      "loss": 2.953,
      "step": 134589
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.025209665298462,
      "learning_rate": 0.00022161073350601527,
      "loss": 3.1295,
      "step": 134590
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0065572261810303,
      "learning_rate": 0.0002216067850642101,
      "loss": 2.972,
      "step": 134591
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.184760332107544,
      "learning_rate": 0.0002216028366369797,
      "loss": 2.9183,
      "step": 134592
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7756632566452026,
      "learning_rate": 0.00022159888822432475,
      "loss": 3.0656,
      "step": 134593
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.979487657546997,
      "learning_rate": 0.00022159493982624608,
      "loss": 3.052,
      "step": 134594
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.03535795211792,
      "learning_rate": 0.00022159099144274436,
      "loss": 2.9662,
      "step": 134595
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.014831066131592,
      "learning_rate": 0.0002215870430738204,
      "loss": 2.8572,
      "step": 134596
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1955924034118652,
      "learning_rate": 0.00022158309471947488,
      "loss": 2.8739,
      "step": 134597
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2386603355407715,
      "learning_rate": 0.0002215791463797085,
      "loss": 2.7759,
      "step": 134598
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0932681560516357,
      "learning_rate": 0.00022157519805452212,
      "loss": 2.7859,
      "step": 134599
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.630774974822998,
      "learning_rate": 0.0002215712497439163,
      "loss": 3.0335,
      "step": 134600
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9362856149673462,
      "learning_rate": 0.00022156730144789192,
      "loss": 3.0387,
      "step": 134601
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.862108826637268,
      "learning_rate": 0.0002215633531664496,
      "loss": 2.7803,
      "step": 134602
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9760985374450684,
      "learning_rate": 0.0002215594048995902,
      "loss": 3.246,
      "step": 134603
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.2248024940490723,
      "learning_rate": 0.00022155545664731435,
      "loss": 2.8231,
      "step": 134604
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9958293437957764,
      "learning_rate": 0.00022155150840962284,
      "loss": 2.8227,
      "step": 134605
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4103612899780273,
      "learning_rate": 0.0002215475601865164,
      "loss": 3.1114,
      "step": 134606
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.8602566719055176,
      "learning_rate": 0.00022154361197799572,
      "loss": 2.8026,
      "step": 134607
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.923043131828308,
      "learning_rate": 0.00022153966378406158,
      "loss": 3.0739,
      "step": 134608
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.245434284210205,
      "learning_rate": 0.00022153571560471475,
      "loss": 3.2553,
      "step": 134609
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0423805713653564,
      "learning_rate": 0.0002215317674399558,
      "loss": 2.9103,
      "step": 134610
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0618879795074463,
      "learning_rate": 0.00022152781928978563,
      "loss": 2.821,
      "step": 134611
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.253843307495117,
      "learning_rate": 0.0002215238711542049,
      "loss": 3.1052,
      "step": 134612
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0134897232055664,
      "learning_rate": 0.00022151992303321436,
      "loss": 3.2862,
      "step": 134613
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1273398399353027,
      "learning_rate": 0.0002215159749268148,
      "loss": 2.9901,
      "step": 134614
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2696428298950195,
      "learning_rate": 0.00022151202683500692,
      "loss": 2.9853,
      "step": 134615
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.01001238822937,
      "learning_rate": 0.00022150807875779137,
      "loss": 2.5666,
      "step": 134616
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3924403190612793,
      "learning_rate": 0.00022150413069516893,
      "loss": 2.7608,
      "step": 134617
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1447582244873047,
      "learning_rate": 0.0002215001826471404,
      "loss": 2.8564,
      "step": 134618
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9726803302764893,
      "learning_rate": 0.00022149623461370648,
      "loss": 2.6178,
      "step": 134619
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.245213031768799,
      "learning_rate": 0.00022149228659486782,
      "loss": 2.9288,
      "step": 134620
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.449183940887451,
      "learning_rate": 0.0002214883385906254,
      "loss": 2.9807,
      "step": 134621
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9847984313964844,
      "learning_rate": 0.00022148439060097963,
      "loss": 2.7283,
      "step": 134622
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4189887046813965,
      "learning_rate": 0.0002214804426259314,
      "loss": 3.0614,
      "step": 134623
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.120675802230835,
      "learning_rate": 0.00022147649466548145,
      "loss": 3.122,
      "step": 134624
    },
    {
      "epoch": 1.75,
      "grad_norm": 4.053261756896973,
      "learning_rate": 0.0002214725467196305,
      "loss": 3.0682,
      "step": 134625
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.623685121536255,
      "learning_rate": 0.00022146859878837923,
      "loss": 3.022,
      "step": 134626
    },
    {
      "epoch": 1.75,
      "grad_norm": 5.030381679534912,
      "learning_rate": 0.00022146465087172863,
      "loss": 3.1521,
      "step": 134627
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.567861318588257,
      "learning_rate": 0.00022146070296967911,
      "loss": 2.9137,
      "step": 134628
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.569441556930542,
      "learning_rate": 0.0002214567550822315,
      "loss": 2.8407,
      "step": 134629
    },
    {
      "epoch": 1.75,
      "grad_norm": 4.98086404800415,
      "learning_rate": 0.0002214528072093865,
      "loss": 3.0074,
      "step": 134630
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.792726755142212,
      "learning_rate": 0.000221448859351145,
      "loss": 2.8258,
      "step": 134631
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9054851531982422,
      "learning_rate": 0.0002214449115075076,
      "loss": 2.9558,
      "step": 134632
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8014776706695557,
      "learning_rate": 0.0002214409636784752,
      "loss": 2.9853,
      "step": 134633
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.228314161300659,
      "learning_rate": 0.0002214370158640483,
      "loss": 3.1338,
      "step": 134634
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.734170436859131,
      "learning_rate": 0.00022143306806422772,
      "loss": 2.9373,
      "step": 134635
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4299917221069336,
      "learning_rate": 0.00022142912027901424,
      "loss": 2.8955,
      "step": 134636
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1106321811676025,
      "learning_rate": 0.00022142517250840857,
      "loss": 3.0373,
      "step": 134637
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.288771152496338,
      "learning_rate": 0.0002214212247524114,
      "loss": 3.0177,
      "step": 134638
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.498410224914551,
      "learning_rate": 0.00022141727701102367,
      "loss": 2.8673,
      "step": 134639
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.903983473777771,
      "learning_rate": 0.0002214133292842458,
      "loss": 3.1054,
      "step": 134640
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5123558044433594,
      "learning_rate": 0.0002214093815720787,
      "loss": 3.0951,
      "step": 134641
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.556224822998047,
      "learning_rate": 0.00022140543387452307,
      "loss": 3.0388,
      "step": 134642
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8109651803970337,
      "learning_rate": 0.00022140148619157968,
      "loss": 3.0711,
      "step": 134643
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.8969786167144775,
      "learning_rate": 0.0002213975385232492,
      "loss": 2.7578,
      "step": 134644
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.200125217437744,
      "learning_rate": 0.00022139359086953257,
      "loss": 3.0374,
      "step": 134645
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0779218673706055,
      "learning_rate": 0.0002213896432304302,
      "loss": 3.0036,
      "step": 134646
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.9146385192871094,
      "learning_rate": 0.00022138569560594296,
      "loss": 2.5535,
      "step": 134647
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.582571029663086,
      "learning_rate": 0.00022138174799607164,
      "loss": 2.8782,
      "step": 134648
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0254058837890625,
      "learning_rate": 0.00022137780040081692,
      "loss": 3.225,
      "step": 134649
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.331587076187134,
      "learning_rate": 0.00022137385282017954,
      "loss": 2.836,
      "step": 134650
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.7251811027526855,
      "learning_rate": 0.0002213699052541604,
      "loss": 2.7118,
      "step": 134651
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2354650497436523,
      "learning_rate": 0.0002213659577027599,
      "loss": 2.9826,
      "step": 134652
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0458383560180664,
      "learning_rate": 0.000221362010165979,
      "loss": 3.1042,
      "step": 134653
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.665999174118042,
      "learning_rate": 0.00022135806264381836,
      "loss": 2.9691,
      "step": 134654
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.49967622756958,
      "learning_rate": 0.0002213541151362788,
      "loss": 2.7553,
      "step": 134655
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2830870151519775,
      "learning_rate": 0.0002213501676433609,
      "loss": 3.1783,
      "step": 134656
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4366354942321777,
      "learning_rate": 0.00022134622016506557,
      "loss": 2.9292,
      "step": 134657
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.209996223449707,
      "learning_rate": 0.00022134227270139353,
      "loss": 2.956,
      "step": 134658
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5295183658599854,
      "learning_rate": 0.00022133832525234536,
      "loss": 3.0572,
      "step": 134659
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2605128288269043,
      "learning_rate": 0.0002213343778179219,
      "loss": 2.831,
      "step": 134660
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.799311876296997,
      "learning_rate": 0.0002213304303981238,
      "loss": 2.8842,
      "step": 134661
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.410917043685913,
      "learning_rate": 0.00022132648299295188,
      "loss": 2.9271,
      "step": 134662
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.291626214981079,
      "learning_rate": 0.0002213225356024069,
      "loss": 2.7677,
      "step": 134663
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.396103858947754,
      "learning_rate": 0.00022131858822648962,
      "loss": 2.9949,
      "step": 134664
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.819348692893982,
      "learning_rate": 0.00022131464086520058,
      "loss": 2.7652,
      "step": 134665
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.999913215637207,
      "learning_rate": 0.00022131069351854064,
      "loss": 3.0821,
      "step": 134666
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.8478126525878906,
      "learning_rate": 0.00022130674618651053,
      "loss": 3.273,
      "step": 134667
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4830236434936523,
      "learning_rate": 0.00022130279886911096,
      "loss": 2.7929,
      "step": 134668
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.3370587825775146,
      "learning_rate": 0.0002212988515663427,
      "loss": 2.8642,
      "step": 134669
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.418073892593384,
      "learning_rate": 0.0002212949042782066,
      "loss": 2.8059,
      "step": 134670
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5505669116973877,
      "learning_rate": 0.00022129095700470315,
      "loss": 2.9204,
      "step": 134671
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0770926475524902,
      "learning_rate": 0.00022128700974583318,
      "loss": 3.0766,
      "step": 134672
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.158025026321411,
      "learning_rate": 0.00022128306250159745,
      "loss": 3.1452,
      "step": 134673
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1303510665893555,
      "learning_rate": 0.00022127911527199665,
      "loss": 2.9644,
      "step": 134674
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.565011501312256,
      "learning_rate": 0.0002212751680570316,
      "loss": 3.1722,
      "step": 134675
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.171692132949829,
      "learning_rate": 0.00022127122085670303,
      "loss": 2.5718,
      "step": 134676
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.48396372795105,
      "learning_rate": 0.00022126727367101161,
      "loss": 2.8861,
      "step": 134677
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1972553730010986,
      "learning_rate": 0.000221263326499958,
      "loss": 2.9751,
      "step": 134678
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.750802755355835,
      "learning_rate": 0.00022125937934354309,
      "loss": 3.0509,
      "step": 134679
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1354076862335205,
      "learning_rate": 0.00022125543220176755,
      "loss": 2.8443,
      "step": 134680
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.6180953979492188,
      "learning_rate": 0.00022125148507463207,
      "loss": 2.9766,
      "step": 134681
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.174740791320801,
      "learning_rate": 0.00022124753796213747,
      "loss": 2.8273,
      "step": 134682
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.9368062019348145,
      "learning_rate": 0.0002212435908642844,
      "loss": 2.9207,
      "step": 134683
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2580478191375732,
      "learning_rate": 0.0002212396437810737,
      "loss": 2.8925,
      "step": 134684
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4841933250427246,
      "learning_rate": 0.00022123569671250598,
      "loss": 3.0529,
      "step": 134685
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.682049512863159,
      "learning_rate": 0.00022123174965858202,
      "loss": 2.9932,
      "step": 134686
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.762122392654419,
      "learning_rate": 0.0002212278026193026,
      "loss": 3.0881,
      "step": 134687
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.6730170249938965,
      "learning_rate": 0.00022122385559466843,
      "loss": 3.0745,
      "step": 134688
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.235224485397339,
      "learning_rate": 0.0002212199085846802,
      "loss": 2.9148,
      "step": 134689
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.710402011871338,
      "learning_rate": 0.00022121596158933865,
      "loss": 2.9622,
      "step": 134690
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.655564546585083,
      "learning_rate": 0.00022121201460864463,
      "loss": 2.9484,
      "step": 134691
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.469494581222534,
      "learning_rate": 0.00022120806764259872,
      "loss": 3.0451,
      "step": 134692
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1196091175079346,
      "learning_rate": 0.00022120412069120168,
      "loss": 2.9325,
      "step": 134693
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0695579051971436,
      "learning_rate": 0.00022120017375445436,
      "loss": 3.0439,
      "step": 134694
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0626330375671387,
      "learning_rate": 0.0002211962268323574,
      "loss": 2.9484,
      "step": 134695
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8057283163070679,
      "learning_rate": 0.0002211922799249115,
      "loss": 3.002,
      "step": 134696
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.048063278198242,
      "learning_rate": 0.00022118833303211744,
      "loss": 2.9871,
      "step": 134697
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0235209465026855,
      "learning_rate": 0.00022118438615397596,
      "loss": 2.8467,
      "step": 134698
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0306575298309326,
      "learning_rate": 0.00022118043929048788,
      "loss": 2.8447,
      "step": 134699
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2603161334991455,
      "learning_rate": 0.00022117649244165382,
      "loss": 3.0276,
      "step": 134700
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.063209295272827,
      "learning_rate": 0.00022117254560747449,
      "loss": 2.8093,
      "step": 134701
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1543266773223877,
      "learning_rate": 0.00022116859878795067,
      "loss": 3.1302,
      "step": 134702
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9099284410476685,
      "learning_rate": 0.0002211646519830831,
      "loss": 3.0106,
      "step": 134703
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9939491748809814,
      "learning_rate": 0.0002211607051928725,
      "loss": 2.8893,
      "step": 134704
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.894341230392456,
      "learning_rate": 0.00022115675841731958,
      "loss": 3.1893,
      "step": 134705
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.258040428161621,
      "learning_rate": 0.0002211528116564253,
      "loss": 2.9824,
      "step": 134706
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.807723045349121,
      "learning_rate": 0.00022114886491019002,
      "loss": 3.0257,
      "step": 134707
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2452075481414795,
      "learning_rate": 0.0002211449181786147,
      "loss": 2.8954,
      "step": 134708
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.983915090560913,
      "learning_rate": 0.0002211409714617,
      "loss": 2.9576,
      "step": 134709
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.5470693111419678,
      "learning_rate": 0.00022113702475944668,
      "loss": 3.125,
      "step": 134710
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.179769515991211,
      "learning_rate": 0.00022113307807185547,
      "loss": 2.7954,
      "step": 134711
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.11814546585083,
      "learning_rate": 0.00022112913139892728,
      "loss": 3.3669,
      "step": 134712
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.8900365829467773,
      "learning_rate": 0.0002211251847406625,
      "loss": 2.9392,
      "step": 134713
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.468214988708496,
      "learning_rate": 0.00022112123809706204,
      "loss": 3.0304,
      "step": 134714
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.14725399017334,
      "learning_rate": 0.00022111729146812663,
      "loss": 3.1544,
      "step": 134715
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.8902297019958496,
      "learning_rate": 0.00022111334485385703,
      "loss": 2.9483,
      "step": 134716
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1605992317199707,
      "learning_rate": 0.00022110939825425392,
      "loss": 2.8399,
      "step": 134717
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1860544681549072,
      "learning_rate": 0.00022110545166931822,
      "loss": 3.1096,
      "step": 134718
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8141772747039795,
      "learning_rate": 0.00022110150509905036,
      "loss": 2.9657,
      "step": 134719
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8017573356628418,
      "learning_rate": 0.00022109755854345122,
      "loss": 2.924,
      "step": 134720
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.2732317447662354,
      "learning_rate": 0.00022109361200252156,
      "loss": 2.8068,
      "step": 134721
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.6416358947753906,
      "learning_rate": 0.00022108966547626204,
      "loss": 2.9631,
      "step": 134722
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.088667392730713,
      "learning_rate": 0.00022108571896467343,
      "loss": 2.9731,
      "step": 134723
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.7075867652893066,
      "learning_rate": 0.00022108177246775654,
      "loss": 2.9239,
      "step": 134724
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1654281616210938,
      "learning_rate": 0.0002210778259855121,
      "loss": 2.9084,
      "step": 134725
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.435391426086426,
      "learning_rate": 0.0002210738795179407,
      "loss": 3.0014,
      "step": 134726
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1899328231811523,
      "learning_rate": 0.00022106993306504318,
      "loss": 3.1269,
      "step": 134727
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2920188903808594,
      "learning_rate": 0.0002210659866268202,
      "loss": 3.1108,
      "step": 134728
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.293849468231201,
      "learning_rate": 0.00022106204020327255,
      "loss": 2.8833,
      "step": 134729
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.133530616760254,
      "learning_rate": 0.00022105809379440098,
      "loss": 2.9442,
      "step": 134730
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.912992000579834,
      "learning_rate": 0.00022105414740020629,
      "loss": 2.9262,
      "step": 134731
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.7578704357147217,
      "learning_rate": 0.00022105020102068903,
      "loss": 2.7193,
      "step": 134732
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.376796007156372,
      "learning_rate": 0.00022104625465585,
      "loss": 2.9401,
      "step": 134733
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.3870697021484375,
      "learning_rate": 0.00022104230830569002,
      "loss": 2.8007,
      "step": 134734
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.622920036315918,
      "learning_rate": 0.00022103836197020967,
      "loss": 3.0543,
      "step": 134735
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.709104537963867,
      "learning_rate": 0.00022103441564940988,
      "loss": 2.9247,
      "step": 134736
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2902491092681885,
      "learning_rate": 0.00022103046934329133,
      "loss": 2.9651,
      "step": 134737
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0251810550689697,
      "learning_rate": 0.00022102652305185463,
      "loss": 2.883,
      "step": 134738
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1161248683929443,
      "learning_rate": 0.00022102257677510055,
      "loss": 2.7361,
      "step": 134739
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9574238061904907,
      "learning_rate": 0.0002210186305130299,
      "loss": 2.799,
      "step": 134740
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.4897544384002686,
      "learning_rate": 0.00022101468426564331,
      "loss": 2.9518,
      "step": 134741
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.02549409866333,
      "learning_rate": 0.00022101073803294165,
      "loss": 2.9457,
      "step": 134742
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.570591449737549,
      "learning_rate": 0.0002210067918149257,
      "loss": 2.8597,
      "step": 134743
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.139493703842163,
      "learning_rate": 0.00022100284561159597,
      "loss": 2.8835,
      "step": 134744
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.241978406906128,
      "learning_rate": 0.00022099889942295324,
      "loss": 2.9671,
      "step": 134745
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2382619380950928,
      "learning_rate": 0.0002209949532489984,
      "loss": 3.0785,
      "step": 134746
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2001776695251465,
      "learning_rate": 0.00022099100708973204,
      "loss": 2.8145,
      "step": 134747
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8941774368286133,
      "learning_rate": 0.00022098706094515492,
      "loss": 3.0515,
      "step": 134748
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.023324489593506,
      "learning_rate": 0.00022098311481526792,
      "loss": 2.7313,
      "step": 134749
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.129127264022827,
      "learning_rate": 0.00022097916870007152,
      "loss": 2.8689,
      "step": 134750
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7069112062454224,
      "learning_rate": 0.00022097522259956665,
      "loss": 3.1024,
      "step": 134751
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.934601306915283,
      "learning_rate": 0.00022097127651375393,
      "loss": 2.9533,
      "step": 134752
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.04667329788208,
      "learning_rate": 0.00022096733044263414,
      "loss": 2.9082,
      "step": 134753
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9361903667449951,
      "learning_rate": 0.00022096338438620805,
      "loss": 3.0391,
      "step": 134754
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.933194637298584,
      "learning_rate": 0.00022095943834447645,
      "loss": 3.3,
      "step": 134755
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.7867963314056396,
      "learning_rate": 0.00022095549231743988,
      "loss": 2.857,
      "step": 134756
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.077615737915039,
      "learning_rate": 0.00022095154630509913,
      "loss": 2.7058,
      "step": 134757
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.2958011627197266,
      "learning_rate": 0.00022094760030745501,
      "loss": 2.9856,
      "step": 134758
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.7879786491394043,
      "learning_rate": 0.00022094365432450823,
      "loss": 2.8955,
      "step": 134759
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.282994508743286,
      "learning_rate": 0.0002209397083562595,
      "loss": 2.8822,
      "step": 134760
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9742865562438965,
      "learning_rate": 0.0002209357624027097,
      "loss": 3.0409,
      "step": 134761
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0480034351348877,
      "learning_rate": 0.00022093181646385933,
      "loss": 2.9418,
      "step": 134762
    },
    {
      "epoch": 1.75,
      "grad_norm": 3.6376993656158447,
      "learning_rate": 0.0002209278705397092,
      "loss": 2.7721,
      "step": 134763
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.385713815689087,
      "learning_rate": 0.00022092392463026007,
      "loss": 3.0584,
      "step": 134764
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0713393688201904,
      "learning_rate": 0.00022091997873551272,
      "loss": 2.9565,
      "step": 134765
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.100931406021118,
      "learning_rate": 0.00022091603285546781,
      "loss": 3.0875,
      "step": 134766
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.817002296447754,
      "learning_rate": 0.00022091208699012615,
      "loss": 2.8278,
      "step": 134767
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.061387300491333,
      "learning_rate": 0.00022090814113948838,
      "loss": 3.063,
      "step": 134768
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7641241550445557,
      "learning_rate": 0.00022090419530355532,
      "loss": 3.0319,
      "step": 134769
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8590160608291626,
      "learning_rate": 0.00022090024948232762,
      "loss": 2.9436,
      "step": 134770
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0401482582092285,
      "learning_rate": 0.00022089630367580607,
      "loss": 2.9951,
      "step": 134771
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.462521553039551,
      "learning_rate": 0.0002208923578839914,
      "loss": 3.0203,
      "step": 134772
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8995996713638306,
      "learning_rate": 0.00022088841210688437,
      "loss": 2.8054,
      "step": 134773
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.8598757982254028,
      "learning_rate": 0.00022088446634448562,
      "loss": 2.9771,
      "step": 134774
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.1058077812194824,
      "learning_rate": 0.0002208805205967959,
      "loss": 3.2745,
      "step": 134775
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9558584690093994,
      "learning_rate": 0.00022087657486381609,
      "loss": 3.1484,
      "step": 134776
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0896332263946533,
      "learning_rate": 0.00022087262914554677,
      "loss": 2.9826,
      "step": 134777
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0875890254974365,
      "learning_rate": 0.0002208686834419887,
      "loss": 2.8529,
      "step": 134778
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.26043701171875,
      "learning_rate": 0.0002208647377531427,
      "loss": 2.8631,
      "step": 134779
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.7509956359863281,
      "learning_rate": 0.0002208607920790094,
      "loss": 2.918,
      "step": 134780
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.046152114868164,
      "learning_rate": 0.00022085684641958952,
      "loss": 2.9313,
      "step": 134781
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.90472674369812,
      "learning_rate": 0.0002208529007748839,
      "loss": 3.1386,
      "step": 134782
    },
    {
      "epoch": 1.75,
      "grad_norm": 1.9589626789093018,
      "learning_rate": 0.00022084895514489327,
      "loss": 2.8136,
      "step": 134783
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.451387643814087,
      "learning_rate": 0.00022084500952961823,
      "loss": 3.0363,
      "step": 134784
    },
    {
      "epoch": 1.75,
      "grad_norm": 2.0453076362609863,
      "learning_rate": 0.00022084106392905966,
      "loss": 2.8968,
      "step": 134785
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4993340969085693,
      "learning_rate": 0.0002208371183432182,
      "loss": 2.7359,
      "step": 134786
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.129122257232666,
      "learning_rate": 0.00022083317277209457,
      "loss": 3.1423,
      "step": 134787
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.852471113204956,
      "learning_rate": 0.00022082922721568956,
      "loss": 2.8391,
      "step": 134788
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1704564094543457,
      "learning_rate": 0.00022082528167400393,
      "loss": 3.1445,
      "step": 134789
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.9974093437194824,
      "learning_rate": 0.00022082133614703838,
      "loss": 2.7432,
      "step": 134790
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4782800674438477,
      "learning_rate": 0.0002208173906347936,
      "loss": 2.8823,
      "step": 134791
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.172348737716675,
      "learning_rate": 0.00022081344513727043,
      "loss": 2.9023,
      "step": 134792
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2042336463928223,
      "learning_rate": 0.0002208094996544695,
      "loss": 3.0385,
      "step": 134793
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5856640338897705,
      "learning_rate": 0.00022080555418639152,
      "loss": 3.1255,
      "step": 134794
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.8546359539031982,
      "learning_rate": 0.00022080160873303733,
      "loss": 2.9269,
      "step": 134795
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9534120559692383,
      "learning_rate": 0.00022079766329440758,
      "loss": 2.9783,
      "step": 134796
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8934510946273804,
      "learning_rate": 0.00022079371787050308,
      "loss": 3.1481,
      "step": 134797
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.8725438117980957,
      "learning_rate": 0.0002207897724613246,
      "loss": 3.0284,
      "step": 134798
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.65474009513855,
      "learning_rate": 0.00022078582706687273,
      "loss": 2.8232,
      "step": 134799
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.379190444946289,
      "learning_rate": 0.0002207818816871482,
      "loss": 2.902,
      "step": 134800
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.6620254516601562,
      "learning_rate": 0.00022077793632215188,
      "loss": 2.8296,
      "step": 134801
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4052000045776367,
      "learning_rate": 0.0002207739909718844,
      "loss": 2.8498,
      "step": 134802
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2579996585845947,
      "learning_rate": 0.00022077004563634655,
      "loss": 3.0778,
      "step": 134803
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.7911217212677002,
      "learning_rate": 0.00022076610031553917,
      "loss": 2.9919,
      "step": 134804
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3709323406219482,
      "learning_rate": 0.0002207621550094627,
      "loss": 3.0888,
      "step": 134805
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.493424654006958,
      "learning_rate": 0.00022075820971811808,
      "loss": 3.017,
      "step": 134806
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5871939659118652,
      "learning_rate": 0.00022075426444150597,
      "loss": 2.874,
      "step": 134807
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0013394355773926,
      "learning_rate": 0.00022075031917962717,
      "loss": 2.8335,
      "step": 134808
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4332637786865234,
      "learning_rate": 0.0002207463739324824,
      "loss": 2.9799,
      "step": 134809
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.022338628768921,
      "learning_rate": 0.00022074242870007244,
      "loss": 3.1096,
      "step": 134810
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.6250818967819214,
      "learning_rate": 0.0002207384834823979,
      "loss": 2.8549,
      "step": 134811
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0596351623535156,
      "learning_rate": 0.00022073453827945954,
      "loss": 2.9032,
      "step": 134812
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.7996327877044678,
      "learning_rate": 0.00022073059309125808,
      "loss": 2.9963,
      "step": 134813
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2866909503936768,
      "learning_rate": 0.00022072664791779437,
      "loss": 3.0925,
      "step": 134814
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5463831424713135,
      "learning_rate": 0.00022072270275906908,
      "loss": 2.9979,
      "step": 134815
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.154902458190918,
      "learning_rate": 0.000220718757615083,
      "loss": 2.8389,
      "step": 134816
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.748605728149414,
      "learning_rate": 0.0002207148124858367,
      "loss": 2.8495,
      "step": 134817
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.123269557952881,
      "learning_rate": 0.000220710867371331,
      "loss": 2.4595,
      "step": 134818
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.408066511154175,
      "learning_rate": 0.0002207069222715667,
      "loss": 3.0195,
      "step": 134819
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9307373762130737,
      "learning_rate": 0.0002207029771865444,
      "loss": 3.109,
      "step": 134820
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.17608380317688,
      "learning_rate": 0.00022069903211626495,
      "loss": 3.0618,
      "step": 134821
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4739911556243896,
      "learning_rate": 0.0002206950870607292,
      "loss": 3.0108,
      "step": 134822
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.007009267807007,
      "learning_rate": 0.0002206911420199376,
      "loss": 2.8302,
      "step": 134823
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.234562635421753,
      "learning_rate": 0.000220687196993891,
      "loss": 2.8457,
      "step": 134824
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.802943229675293,
      "learning_rate": 0.00022068325198259015,
      "loss": 2.9983,
      "step": 134825
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.031524419784546,
      "learning_rate": 0.0002206793069860358,
      "loss": 3.02,
      "step": 134826
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1688833236694336,
      "learning_rate": 0.0002206753620042286,
      "loss": 3.1472,
      "step": 134827
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.817667007446289,
      "learning_rate": 0.00022067141703716953,
      "loss": 3.2104,
      "step": 134828
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1810996532440186,
      "learning_rate": 0.00022066747208485896,
      "loss": 2.8598,
      "step": 134829
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.149789571762085,
      "learning_rate": 0.00022066352714729788,
      "loss": 2.893,
      "step": 134830
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.5306310653686523,
      "learning_rate": 0.0002206595822244869,
      "loss": 3.1082,
      "step": 134831
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.301563024520874,
      "learning_rate": 0.00022065563731642679,
      "loss": 2.8933,
      "step": 134832
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.046206474304199,
      "learning_rate": 0.0002206516924231183,
      "loss": 2.7948,
      "step": 134833
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9525578022003174,
      "learning_rate": 0.00022064774754456236,
      "loss": 3.0211,
      "step": 134834
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.5472636222839355,
      "learning_rate": 0.00022064380268075928,
      "loss": 2.7648,
      "step": 134835
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.6078944206237793,
      "learning_rate": 0.00022063985783171006,
      "loss": 2.8581,
      "step": 134836
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.325155735015869,
      "learning_rate": 0.00022063591299741536,
      "loss": 3.1193,
      "step": 134837
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.115121364593506,
      "learning_rate": 0.000220631968177876,
      "loss": 2.8932,
      "step": 134838
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.363354206085205,
      "learning_rate": 0.00022062802337309258,
      "loss": 2.9413,
      "step": 134839
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.650891065597534,
      "learning_rate": 0.00022062407858306607,
      "loss": 2.9357,
      "step": 134840
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.082531452178955,
      "learning_rate": 0.00022062013380779694,
      "loss": 3.2409,
      "step": 134841
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.533959150314331,
      "learning_rate": 0.000220616189047286,
      "loss": 2.9498,
      "step": 134842
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4265310764312744,
      "learning_rate": 0.00022061224430153402,
      "loss": 2.9878,
      "step": 134843
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.816126823425293,
      "learning_rate": 0.0002206082995705417,
      "loss": 2.9017,
      "step": 134844
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1952173709869385,
      "learning_rate": 0.00022060435485430984,
      "loss": 3.1189,
      "step": 134845
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.351973295211792,
      "learning_rate": 0.00022060041015283923,
      "loss": 2.8998,
      "step": 134846
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9338228702545166,
      "learning_rate": 0.00022059646546613033,
      "loss": 2.9824,
      "step": 134847
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1192543506622314,
      "learning_rate": 0.00022059252079418412,
      "loss": 3.1196,
      "step": 134848
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.128152370452881,
      "learning_rate": 0.00022058857613700122,
      "loss": 2.914,
      "step": 134849
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.058494806289673,
      "learning_rate": 0.0002205846314945824,
      "loss": 3.1007,
      "step": 134850
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.7066650390625,
      "learning_rate": 0.0002205806868669284,
      "loss": 3.0632,
      "step": 134851
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3179891109466553,
      "learning_rate": 0.00022057674225404,
      "loss": 3.1641,
      "step": 134852
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.0430710315704346,
      "learning_rate": 0.00022057279765591792,
      "loss": 2.9748,
      "step": 134853
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4553420543670654,
      "learning_rate": 0.0002205688530725628,
      "loss": 3.0442,
      "step": 134854
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8533594608306885,
      "learning_rate": 0.0002205649085039754,
      "loss": 2.9508,
      "step": 134855
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1781516075134277,
      "learning_rate": 0.00022056096395015647,
      "loss": 3.0376,
      "step": 134856
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1445624828338623,
      "learning_rate": 0.00022055701941110678,
      "loss": 2.9348,
      "step": 134857
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2889997959136963,
      "learning_rate": 0.00022055307488682703,
      "loss": 2.9673,
      "step": 134858
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.139414072036743,
      "learning_rate": 0.000220549130377318,
      "loss": 2.9004,
      "step": 134859
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0432288646698,
      "learning_rate": 0.00022054518588258038,
      "loss": 2.8005,
      "step": 134860
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5743844509124756,
      "learning_rate": 0.00022054124140261495,
      "loss": 2.9974,
      "step": 134861
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.469881296157837,
      "learning_rate": 0.0002205372969374223,
      "loss": 3.197,
      "step": 134862
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1530356407165527,
      "learning_rate": 0.00022053335248700334,
      "loss": 2.8881,
      "step": 134863
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.928272247314453,
      "learning_rate": 0.0002205294080513587,
      "loss": 3.065,
      "step": 134864
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3913819789886475,
      "learning_rate": 0.00022052546363048918,
      "loss": 2.9668,
      "step": 134865
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0679311752319336,
      "learning_rate": 0.00022052151922439544,
      "loss": 3.2257,
      "step": 134866
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8950064182281494,
      "learning_rate": 0.00022051757483307824,
      "loss": 3.0344,
      "step": 134867
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.277604103088379,
      "learning_rate": 0.00022051363045653838,
      "loss": 3.0451,
      "step": 134868
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.697908401489258,
      "learning_rate": 0.00022050968609477645,
      "loss": 2.9383,
      "step": 134869
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2585790157318115,
      "learning_rate": 0.00022050574174779333,
      "loss": 2.979,
      "step": 134870
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1207005977630615,
      "learning_rate": 0.0002205017974155897,
      "loss": 2.932,
      "step": 134871
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.4651944637298584,
      "learning_rate": 0.0002204978530981663,
      "loss": 3.1691,
      "step": 134872
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0462427139282227,
      "learning_rate": 0.0002204939087955238,
      "loss": 3.3153,
      "step": 134873
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.036505937576294,
      "learning_rate": 0.00022048996450766293,
      "loss": 3.0326,
      "step": 134874
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.402111053466797,
      "learning_rate": 0.0002204860202345846,
      "loss": 2.991,
      "step": 134875
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.7025563716888428,
      "learning_rate": 0.00022048207597628934,
      "loss": 2.8811,
      "step": 134876
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.644803762435913,
      "learning_rate": 0.00022047813173277809,
      "loss": 2.7695,
      "step": 134877
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5340521335601807,
      "learning_rate": 0.00022047418750405134,
      "loss": 2.9141,
      "step": 134878
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2774622440338135,
      "learning_rate": 0.00022047024329010994,
      "loss": 3.2649,
      "step": 134879
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3471720218658447,
      "learning_rate": 0.00022046629909095459,
      "loss": 2.6784,
      "step": 134880
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.109823226928711,
      "learning_rate": 0.00022046235490658607,
      "loss": 2.9676,
      "step": 134881
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.466721296310425,
      "learning_rate": 0.0002204584107370052,
      "loss": 3.0931,
      "step": 134882
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.883775234222412,
      "learning_rate": 0.00022045446658221256,
      "loss": 2.7807,
      "step": 134883
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1605939865112305,
      "learning_rate": 0.00022045052244220895,
      "loss": 2.9791,
      "step": 134884
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.467796564102173,
      "learning_rate": 0.00022044657831699505,
      "loss": 2.8293,
      "step": 134885
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8862214088439941,
      "learning_rate": 0.00022044263420657162,
      "loss": 2.9816,
      "step": 134886
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.039182662963867,
      "learning_rate": 0.0002204386901109394,
      "loss": 3.0482,
      "step": 134887
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.15082049369812,
      "learning_rate": 0.00022043474603009914,
      "loss": 2.897,
      "step": 134888
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.040032148361206,
      "learning_rate": 0.00022043080196405173,
      "loss": 3.1043,
      "step": 134889
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0602729320526123,
      "learning_rate": 0.00022042685791279757,
      "loss": 2.8593,
      "step": 134890
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.8134024143218994,
      "learning_rate": 0.00022042291387633757,
      "loss": 2.922,
      "step": 134891
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0863778591156006,
      "learning_rate": 0.00022041896985467243,
      "loss": 3.0571,
      "step": 134892
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2190494537353516,
      "learning_rate": 0.0002204150258478029,
      "loss": 3.0112,
      "step": 134893
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.9267430305480957,
      "learning_rate": 0.00022041108185572974,
      "loss": 3.1116,
      "step": 134894
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.323185443878174,
      "learning_rate": 0.0002204071378784538,
      "loss": 2.9635,
      "step": 134895
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.282480001449585,
      "learning_rate": 0.00022040319391597552,
      "loss": 2.855,
      "step": 134896
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.314436674118042,
      "learning_rate": 0.00022039924996829582,
      "loss": 2.7668,
      "step": 134897
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.482621908187866,
      "learning_rate": 0.0002203953060354154,
      "loss": 3.2508,
      "step": 134898
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3708231449127197,
      "learning_rate": 0.000220391362117335,
      "loss": 3.033,
      "step": 134899
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4561212062835693,
      "learning_rate": 0.00022038741821405532,
      "loss": 3.1352,
      "step": 134900
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.198249101638794,
      "learning_rate": 0.00022038347432557726,
      "loss": 3.0662,
      "step": 134901
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9591790437698364,
      "learning_rate": 0.00022037953045190127,
      "loss": 2.8454,
      "step": 134902
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.8428902626037598,
      "learning_rate": 0.00022037558659302825,
      "loss": 2.8554,
      "step": 134903
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9615416526794434,
      "learning_rate": 0.0002203716427489589,
      "loss": 2.7156,
      "step": 134904
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4231925010681152,
      "learning_rate": 0.000220367698919694,
      "loss": 2.9068,
      "step": 134905
    },
    {
      "epoch": 1.76,
      "grad_norm": 4.181301593780518,
      "learning_rate": 0.00022036375510523418,
      "loss": 2.7811,
      "step": 134906
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.6673688888549805,
      "learning_rate": 0.00022035981130558044,
      "loss": 2.7838,
      "step": 134907
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.8913793563842773,
      "learning_rate": 0.00022035586752073313,
      "loss": 3.0819,
      "step": 134908
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.045837879180908,
      "learning_rate": 0.0002203519237506932,
      "loss": 2.9538,
      "step": 134909
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.3920273780822754,
      "learning_rate": 0.00022034797999546135,
      "loss": 2.7136,
      "step": 134910
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5315330028533936,
      "learning_rate": 0.00022034403625503828,
      "loss": 2.9302,
      "step": 134911
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.2098262310028076,
      "learning_rate": 0.00022034009252942482,
      "loss": 3.2156,
      "step": 134912
    },
    {
      "epoch": 1.76,
      "grad_norm": 5.530247688293457,
      "learning_rate": 0.00022033614881862172,
      "loss": 2.9583,
      "step": 134913
    },
    {
      "epoch": 1.76,
      "grad_norm": 4.003887176513672,
      "learning_rate": 0.00022033220512262952,
      "loss": 2.9604,
      "step": 134914
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.2691268920898438,
      "learning_rate": 0.00022032826144144908,
      "loss": 2.6475,
      "step": 134915
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.1270008087158203,
      "learning_rate": 0.0002203243177750811,
      "loss": 2.8953,
      "step": 134916
    },
    {
      "epoch": 1.76,
      "grad_norm": 4.0560455322265625,
      "learning_rate": 0.00022032037412352633,
      "loss": 2.7869,
      "step": 134917
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.9494194984436035,
      "learning_rate": 0.00022031643048678556,
      "loss": 2.9595,
      "step": 134918
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.9653844833374023,
      "learning_rate": 0.0002203124868648594,
      "loss": 2.9151,
      "step": 134919
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.264381170272827,
      "learning_rate": 0.0002203085432577488,
      "loss": 2.9422,
      "step": 134920
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.78550386428833,
      "learning_rate": 0.00022030459966545423,
      "loss": 2.832,
      "step": 134921
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.0050830841064453,
      "learning_rate": 0.00022030065608797655,
      "loss": 3.5291,
      "step": 134922
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.343669891357422,
      "learning_rate": 0.00022029671252531645,
      "loss": 2.7846,
      "step": 134923
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2745018005371094,
      "learning_rate": 0.0002202927689774747,
      "loss": 2.9955,
      "step": 134924
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.013371706008911,
      "learning_rate": 0.00022028882544445206,
      "loss": 3.3142,
      "step": 134925
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.8237102031707764,
      "learning_rate": 0.00022028488192624935,
      "loss": 2.9884,
      "step": 134926
    },
    {
      "epoch": 1.76,
      "grad_norm": 4.946517467498779,
      "learning_rate": 0.00022028093842286705,
      "loss": 2.9689,
      "step": 134927
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.520622730255127,
      "learning_rate": 0.00022027699493430602,
      "loss": 3.0088,
      "step": 134928
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1422178745269775,
      "learning_rate": 0.000220273051460567,
      "loss": 2.8472,
      "step": 134929
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.3109891414642334,
      "learning_rate": 0.00022026910800165071,
      "loss": 3.059,
      "step": 134930
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.633195161819458,
      "learning_rate": 0.00022026516455755796,
      "loss": 2.9872,
      "step": 134931
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2358217239379883,
      "learning_rate": 0.00022026122112828953,
      "loss": 2.6154,
      "step": 134932
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.33674693107605,
      "learning_rate": 0.00022025727771384586,
      "loss": 2.9525,
      "step": 134933
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.904158115386963,
      "learning_rate": 0.0002202533343142279,
      "loss": 2.9402,
      "step": 134934
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9066177606582642,
      "learning_rate": 0.0002202493909294364,
      "loss": 3.1285,
      "step": 134935
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8837714195251465,
      "learning_rate": 0.00022024544755947195,
      "loss": 2.9545,
      "step": 134936
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2951037883758545,
      "learning_rate": 0.00022024150420433546,
      "loss": 2.9556,
      "step": 134937
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.038365125656128,
      "learning_rate": 0.00022023756086402764,
      "loss": 3.0879,
      "step": 134938
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.214831590652466,
      "learning_rate": 0.0002202336175385491,
      "loss": 3.0489,
      "step": 134939
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1543283462524414,
      "learning_rate": 0.0002202296742279006,
      "loss": 3.2273,
      "step": 134940
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.044882297515869,
      "learning_rate": 0.0002202257309320829,
      "loss": 3.0317,
      "step": 134941
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4272618293762207,
      "learning_rate": 0.00022022178765109676,
      "loss": 2.9726,
      "step": 134942
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2014753818511963,
      "learning_rate": 0.0002202178443849429,
      "loss": 2.9104,
      "step": 134943
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.074165105819702,
      "learning_rate": 0.0002202139011336221,
      "loss": 2.9584,
      "step": 134944
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8299435377120972,
      "learning_rate": 0.00022020995789713506,
      "loss": 2.9827,
      "step": 134945
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0924153327941895,
      "learning_rate": 0.00022020601467548237,
      "loss": 3.015,
      "step": 134946
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2306153774261475,
      "learning_rate": 0.000220202071468665,
      "loss": 3.2066,
      "step": 134947
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.748415946960449,
      "learning_rate": 0.00022019812827668347,
      "loss": 2.8101,
      "step": 134948
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1163175106048584,
      "learning_rate": 0.00022019418509953864,
      "loss": 2.7166,
      "step": 134949
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.818333148956299,
      "learning_rate": 0.00022019024193723128,
      "loss": 2.8599,
      "step": 134950
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8022065162658691,
      "learning_rate": 0.000220186298789762,
      "loss": 3.0224,
      "step": 134951
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0401453971862793,
      "learning_rate": 0.00022018235565713168,
      "loss": 3.0411,
      "step": 134952
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2133026123046875,
      "learning_rate": 0.00022017841253934092,
      "loss": 3.246,
      "step": 134953
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9825834035873413,
      "learning_rate": 0.00022017446943639043,
      "loss": 2.9662,
      "step": 134954
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1046769618988037,
      "learning_rate": 0.00022017052634828107,
      "loss": 3.3252,
      "step": 134955
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0044620037078857,
      "learning_rate": 0.00022016658327501357,
      "loss": 3.0478,
      "step": 134956
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.6681573390960693,
      "learning_rate": 0.00022016264021658855,
      "loss": 3.1266,
      "step": 134957
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.400020122528076,
      "learning_rate": 0.00022015869717300677,
      "loss": 2.9732,
      "step": 134958
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.792661190032959,
      "learning_rate": 0.00022015475414426905,
      "loss": 3.0015,
      "step": 134959
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3370561599731445,
      "learning_rate": 0.00022015081113037604,
      "loss": 3.0396,
      "step": 134960
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.6660218238830566,
      "learning_rate": 0.00022014686813132852,
      "loss": 2.9597,
      "step": 134961
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.059755563735962,
      "learning_rate": 0.00022014292514712725,
      "loss": 2.8662,
      "step": 134962
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9727181196212769,
      "learning_rate": 0.00022013898217777283,
      "loss": 3.1258,
      "step": 134963
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.6818699836730957,
      "learning_rate": 0.00022013503922326612,
      "loss": 2.977,
      "step": 134964
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.6061618328094482,
      "learning_rate": 0.00022013109628360778,
      "loss": 3.0134,
      "step": 134965
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8995428085327148,
      "learning_rate": 0.0002201271533587986,
      "loss": 2.7509,
      "step": 134966
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0053036212921143,
      "learning_rate": 0.00022012321044883934,
      "loss": 2.9836,
      "step": 134967
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.375769853591919,
      "learning_rate": 0.00022011926755373067,
      "loss": 2.9232,
      "step": 134968
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3676416873931885,
      "learning_rate": 0.0002201153246734733,
      "loss": 2.9879,
      "step": 134969
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2750942707061768,
      "learning_rate": 0.00022011138180806796,
      "loss": 2.8452,
      "step": 134970
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.7173550128936768,
      "learning_rate": 0.00022010743895751548,
      "loss": 2.9492,
      "step": 134971
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0168449878692627,
      "learning_rate": 0.0002201034961218165,
      "loss": 2.8472,
      "step": 134972
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.581902265548706,
      "learning_rate": 0.0002200995533009718,
      "loss": 2.9079,
      "step": 134973
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.8568382263183594,
      "learning_rate": 0.0002200956104949822,
      "loss": 2.8757,
      "step": 134974
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0954458713531494,
      "learning_rate": 0.00022009166770384822,
      "loss": 2.7007,
      "step": 134975
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8935316801071167,
      "learning_rate": 0.00022008772492757074,
      "loss": 3.2033,
      "step": 134976
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2684357166290283,
      "learning_rate": 0.0002200837821661504,
      "loss": 3.2648,
      "step": 134977
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.46370267868042,
      "learning_rate": 0.00022007983941958802,
      "loss": 2.9544,
      "step": 134978
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1126022338867188,
      "learning_rate": 0.00022007589668788434,
      "loss": 2.9244,
      "step": 134979
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.008756399154663,
      "learning_rate": 0.00022007195397104017,
      "loss": 3.07,
      "step": 134980
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.163952112197876,
      "learning_rate": 0.000220068011269056,
      "loss": 2.9495,
      "step": 134981
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9729056358337402,
      "learning_rate": 0.00022006406858193268,
      "loss": 2.733,
      "step": 134982
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.1465506553649902,
      "learning_rate": 0.00022006012590967099,
      "loss": 2.7516,
      "step": 134983
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.093545436859131,
      "learning_rate": 0.0002200561832522716,
      "loss": 3.0415,
      "step": 134984
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8365955352783203,
      "learning_rate": 0.00022005224060973527,
      "loss": 3.1117,
      "step": 134985
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.1237294673919678,
      "learning_rate": 0.00022004829798206278,
      "loss": 2.8967,
      "step": 134986
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.9818572998046875,
      "learning_rate": 0.0002200443553692549,
      "loss": 3.1442,
      "step": 134987
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9977649450302124,
      "learning_rate": 0.00022004041277131218,
      "loss": 2.8541,
      "step": 134988
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.859524965286255,
      "learning_rate": 0.00022003647018823544,
      "loss": 2.9444,
      "step": 134989
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2409329414367676,
      "learning_rate": 0.00022003252762002547,
      "loss": 2.8358,
      "step": 134990
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.208122491836548,
      "learning_rate": 0.00022002858506668294,
      "loss": 2.754,
      "step": 134991
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.861886978149414,
      "learning_rate": 0.0002200246425282086,
      "loss": 3.0011,
      "step": 134992
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0153117179870605,
      "learning_rate": 0.00022002070000460335,
      "loss": 3.2332,
      "step": 134993
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3809914588928223,
      "learning_rate": 0.0002200167574958676,
      "loss": 3.0178,
      "step": 134994
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9759117364883423,
      "learning_rate": 0.00022001281500200223,
      "loss": 3.0605,
      "step": 134995
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4790430068969727,
      "learning_rate": 0.000220008872523008,
      "loss": 3.0899,
      "step": 134996
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.0361738204956055,
      "learning_rate": 0.00022000493005888564,
      "loss": 2.9842,
      "step": 134997
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3495261669158936,
      "learning_rate": 0.00022000098760963587,
      "loss": 3.0458,
      "step": 134998
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.149473190307617,
      "learning_rate": 0.00021999704517525954,
      "loss": 3.2644,
      "step": 134999
    },
    {
      "epoch": 1.76,
      "grad_norm": 4.75639009475708,
      "learning_rate": 0.0002199931027557572,
      "loss": 2.7618,
      "step": 135000
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.038766622543335,
      "learning_rate": 0.00021998916035112956,
      "loss": 3.1159,
      "step": 135001
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.396716356277466,
      "learning_rate": 0.0002199852179613775,
      "loss": 2.9227,
      "step": 135002
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.380244731903076,
      "learning_rate": 0.0002199812755865017,
      "loss": 2.6853,
      "step": 135003
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1994993686676025,
      "learning_rate": 0.0002199773332265029,
      "loss": 2.9133,
      "step": 135004
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5359857082366943,
      "learning_rate": 0.0002199733908813819,
      "loss": 2.871,
      "step": 135005
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.7071421146392822,
      "learning_rate": 0.0002199694485511393,
      "loss": 2.8556,
      "step": 135006
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1632063388824463,
      "learning_rate": 0.00021996550623577585,
      "loss": 3.0804,
      "step": 135007
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.4790072441101074,
      "learning_rate": 0.00021996156393529233,
      "loss": 3.204,
      "step": 135008
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.531909942626953,
      "learning_rate": 0.00021995762164968944,
      "loss": 3.1704,
      "step": 135009
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.054405927658081,
      "learning_rate": 0.00021995367937896799,
      "loss": 2.9417,
      "step": 135010
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9761254787445068,
      "learning_rate": 0.00021994973712312872,
      "loss": 2.8085,
      "step": 135011
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.468294143676758,
      "learning_rate": 0.00021994579488217225,
      "loss": 3.0114,
      "step": 135012
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.842768669128418,
      "learning_rate": 0.00021994185265609935,
      "loss": 2.8673,
      "step": 135013
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4240353107452393,
      "learning_rate": 0.00021993791044491075,
      "loss": 2.7753,
      "step": 135014
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1043643951416016,
      "learning_rate": 0.00021993396824860722,
      "loss": 2.7924,
      "step": 135015
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3914365768432617,
      "learning_rate": 0.00021993002606718947,
      "loss": 3.0143,
      "step": 135016
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.886535406112671,
      "learning_rate": 0.00021992608390065835,
      "loss": 3.1717,
      "step": 135017
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.169039726257324,
      "learning_rate": 0.00021992214174901438,
      "loss": 2.9464,
      "step": 135018
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.162061929702759,
      "learning_rate": 0.00021991819961225838,
      "loss": 3.0805,
      "step": 135019
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0569777488708496,
      "learning_rate": 0.0002199142574903911,
      "loss": 3.0455,
      "step": 135020
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2179226875305176,
      "learning_rate": 0.0002199103153834133,
      "loss": 2.9524,
      "step": 135021
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.727468729019165,
      "learning_rate": 0.00021990637329132563,
      "loss": 2.8308,
      "step": 135022
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.260000705718994,
      "learning_rate": 0.00021990243121412906,
      "loss": 3.0108,
      "step": 135023
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.879061222076416,
      "learning_rate": 0.00021989848915182397,
      "loss": 2.824,
      "step": 135024
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3504090309143066,
      "learning_rate": 0.00021989454710441127,
      "loss": 2.8595,
      "step": 135025
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9304741621017456,
      "learning_rate": 0.00021989060507189174,
      "loss": 3.0265,
      "step": 135026
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2413089275360107,
      "learning_rate": 0.000219886663054266,
      "loss": 3.1094,
      "step": 135027
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8054686784744263,
      "learning_rate": 0.00021988272105153482,
      "loss": 2.788,
      "step": 135028
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9340482950210571,
      "learning_rate": 0.0002198787790636991,
      "loss": 2.9505,
      "step": 135029
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9446065425872803,
      "learning_rate": 0.00021987483709075934,
      "loss": 3.0458,
      "step": 135030
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.0351314544677734,
      "learning_rate": 0.00021987089513271637,
      "loss": 2.9822,
      "step": 135031
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.112586498260498,
      "learning_rate": 0.00021986695318957085,
      "loss": 2.9001,
      "step": 135032
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1441266536712646,
      "learning_rate": 0.00021986301126132365,
      "loss": 3.2261,
      "step": 135033
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.393855094909668,
      "learning_rate": 0.00021985906934797537,
      "loss": 2.9515,
      "step": 135034
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1211156845092773,
      "learning_rate": 0.00021985512744952693,
      "loss": 2.9497,
      "step": 135035
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.907489061355591,
      "learning_rate": 0.0002198511855659788,
      "loss": 2.7015,
      "step": 135036
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8689428567886353,
      "learning_rate": 0.0002198472436973319,
      "loss": 3.0193,
      "step": 135037
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3006982803344727,
      "learning_rate": 0.00021984330184358688,
      "loss": 3.1379,
      "step": 135038
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9450209140777588,
      "learning_rate": 0.00021983936000474451,
      "loss": 2.945,
      "step": 135039
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.234977960586548,
      "learning_rate": 0.00021983541818080553,
      "loss": 2.9589,
      "step": 135040
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3930344581604004,
      "learning_rate": 0.00021983147637177073,
      "loss": 2.8157,
      "step": 135041
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.024629592895508,
      "learning_rate": 0.00021982753457764064,
      "loss": 2.9636,
      "step": 135042
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.833317756652832,
      "learning_rate": 0.0002198235927984162,
      "loss": 3.1014,
      "step": 135043
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5755743980407715,
      "learning_rate": 0.00021981965103409808,
      "loss": 3.1433,
      "step": 135044
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.446516752243042,
      "learning_rate": 0.00021981570928468697,
      "loss": 3.1451,
      "step": 135045
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.043649911880493,
      "learning_rate": 0.0002198117675501836,
      "loss": 2.8111,
      "step": 135046
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.484577178955078,
      "learning_rate": 0.00021980782583058881,
      "loss": 2.9331,
      "step": 135047
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.067516803741455,
      "learning_rate": 0.0002198038841259032,
      "loss": 2.9042,
      "step": 135048
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2691025733947754,
      "learning_rate": 0.00021979994243612755,
      "loss": 2.8294,
      "step": 135049
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.5656704902648926,
      "learning_rate": 0.00021979600076126266,
      "loss": 2.9856,
      "step": 135050
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.533543586730957,
      "learning_rate": 0.00021979205910130915,
      "loss": 2.9932,
      "step": 135051
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9561491012573242,
      "learning_rate": 0.0002197881174562679,
      "loss": 3.1016,
      "step": 135052
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1022307872772217,
      "learning_rate": 0.00021978417582613945,
      "loss": 2.8095,
      "step": 135053
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.960389494895935,
      "learning_rate": 0.00021978023421092476,
      "loss": 2.9851,
      "step": 135054
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0126564502716064,
      "learning_rate": 0.00021977629261062432,
      "loss": 2.9254,
      "step": 135055
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0871896743774414,
      "learning_rate": 0.000219772351025239,
      "loss": 2.9403,
      "step": 135056
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.7716171741485596,
      "learning_rate": 0.00021976840945476953,
      "loss": 2.818,
      "step": 135057
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.674551010131836,
      "learning_rate": 0.0002197644678992166,
      "loss": 3.0041,
      "step": 135058
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.3700063228607178,
      "learning_rate": 0.00021976052635858107,
      "loss": 2.9011,
      "step": 135059
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9611550569534302,
      "learning_rate": 0.00021975658483286358,
      "loss": 2.6809,
      "step": 135060
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.852039337158203,
      "learning_rate": 0.00021975264332206473,
      "loss": 2.8762,
      "step": 135061
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0914313793182373,
      "learning_rate": 0.00021974870182618544,
      "loss": 2.9073,
      "step": 135062
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9684176445007324,
      "learning_rate": 0.00021974476034522636,
      "loss": 3.035,
      "step": 135063
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9726907014846802,
      "learning_rate": 0.0002197408188791882,
      "loss": 2.8494,
      "step": 135064
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9056956768035889,
      "learning_rate": 0.0002197368774280718,
      "loss": 3.0056,
      "step": 135065
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0035314559936523,
      "learning_rate": 0.00021973293599187793,
      "loss": 3.0178,
      "step": 135066
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8098058700561523,
      "learning_rate": 0.00021972899457060709,
      "loss": 2.9963,
      "step": 135067
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9801528453826904,
      "learning_rate": 0.0002197250531642602,
      "loss": 3.0538,
      "step": 135068
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3935883045196533,
      "learning_rate": 0.00021972111177283784,
      "loss": 2.8879,
      "step": 135069
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.120887041091919,
      "learning_rate": 0.00021971717039634091,
      "loss": 2.9908,
      "step": 135070
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.946446180343628,
      "learning_rate": 0.00021971322903477006,
      "loss": 2.8956,
      "step": 135071
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0413401126861572,
      "learning_rate": 0.00021970928768812615,
      "loss": 2.9246,
      "step": 135072
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0381360054016113,
      "learning_rate": 0.0002197053463564097,
      "loss": 2.8641,
      "step": 135073
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.040867328643799,
      "learning_rate": 0.00021970140503962152,
      "loss": 2.7941,
      "step": 135074
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.894510507583618,
      "learning_rate": 0.00021969746373776234,
      "loss": 2.8154,
      "step": 135075
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.274013042449951,
      "learning_rate": 0.00021969352245083297,
      "loss": 2.9367,
      "step": 135076
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4939005374908447,
      "learning_rate": 0.00021968958117883405,
      "loss": 2.8944,
      "step": 135077
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.259854793548584,
      "learning_rate": 0.00021968563992176652,
      "loss": 2.8835,
      "step": 135078
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.384119987487793,
      "learning_rate": 0.00021968169867963078,
      "loss": 2.8148,
      "step": 135079
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1130917072296143,
      "learning_rate": 0.00021967775745242773,
      "loss": 2.9045,
      "step": 135080
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.448521852493286,
      "learning_rate": 0.00021967381624015813,
      "loss": 2.8259,
      "step": 135081
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.746162176132202,
      "learning_rate": 0.00021966987504282263,
      "loss": 2.7128,
      "step": 135082
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.552670478820801,
      "learning_rate": 0.00021966593386042206,
      "loss": 3.1559,
      "step": 135083
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.95797598361969,
      "learning_rate": 0.00021966199269295725,
      "loss": 2.8454,
      "step": 135084
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.732545852661133,
      "learning_rate": 0.00021965805154042865,
      "loss": 2.772,
      "step": 135085
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.06740403175354,
      "learning_rate": 0.00021965411040283714,
      "loss": 3.1009,
      "step": 135086
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0459089279174805,
      "learning_rate": 0.0002196501692801834,
      "loss": 2.9617,
      "step": 135087
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8517088890075684,
      "learning_rate": 0.00021964622817246827,
      "loss": 3.1468,
      "step": 135088
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3109757900238037,
      "learning_rate": 0.0002196422870796924,
      "loss": 2.8439,
      "step": 135089
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.6892335414886475,
      "learning_rate": 0.00021963834600185667,
      "loss": 2.8557,
      "step": 135090
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2035953998565674,
      "learning_rate": 0.00021963440493896156,
      "loss": 2.9016,
      "step": 135091
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.581892251968384,
      "learning_rate": 0.00021963046389100792,
      "loss": 3.0218,
      "step": 135092
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3249943256378174,
      "learning_rate": 0.0002196265228579965,
      "loss": 3.0674,
      "step": 135093
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.178176164627075,
      "learning_rate": 0.00021962258183992806,
      "loss": 2.9147,
      "step": 135094
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4894654750823975,
      "learning_rate": 0.00021961864083680326,
      "loss": 3.0503,
      "step": 135095
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9000327587127686,
      "learning_rate": 0.000219614699848623,
      "loss": 3.1017,
      "step": 135096
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5100841522216797,
      "learning_rate": 0.00021961075887538782,
      "loss": 3.035,
      "step": 135097
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.958604097366333,
      "learning_rate": 0.00021960681791709845,
      "loss": 3.0674,
      "step": 135098
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1145029067993164,
      "learning_rate": 0.00021960287697375572,
      "loss": 2.852,
      "step": 135099
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1529388427734375,
      "learning_rate": 0.00021959893604536033,
      "loss": 3.0082,
      "step": 135100
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.2273123264312744,
      "learning_rate": 0.00021959499513191297,
      "loss": 3.0396,
      "step": 135101
    },
    {
      "epoch": 1.76,
      "grad_norm": 4.238652229309082,
      "learning_rate": 0.0002195910542334146,
      "loss": 2.7673,
      "step": 135102
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4419894218444824,
      "learning_rate": 0.0002195871133498656,
      "loss": 3.0371,
      "step": 135103
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1912333965301514,
      "learning_rate": 0.00021958317248126693,
      "loss": 2.8917,
      "step": 135104
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.6358935832977295,
      "learning_rate": 0.00021957923162761923,
      "loss": 2.958,
      "step": 135105
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4882607460021973,
      "learning_rate": 0.00021957529078892328,
      "loss": 2.767,
      "step": 135106
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5369842052459717,
      "learning_rate": 0.00021957134996517978,
      "loss": 2.9913,
      "step": 135107
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.8252971172332764,
      "learning_rate": 0.00021956740915638962,
      "loss": 3.1453,
      "step": 135108
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5345051288604736,
      "learning_rate": 0.00021956346836255326,
      "loss": 2.9655,
      "step": 135109
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.2089920043945312,
      "learning_rate": 0.00021955952758367163,
      "loss": 2.7997,
      "step": 135110
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5605015754699707,
      "learning_rate": 0.00021955558681974535,
      "loss": 2.9785,
      "step": 135111
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8537073135375977,
      "learning_rate": 0.0002195516460707752,
      "loss": 3.0966,
      "step": 135112
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9938392639160156,
      "learning_rate": 0.00021954770533676196,
      "loss": 3.0471,
      "step": 135113
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.382704019546509,
      "learning_rate": 0.0002195437646177064,
      "loss": 3.0391,
      "step": 135114
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.251142740249634,
      "learning_rate": 0.00021953982391360904,
      "loss": 3.0653,
      "step": 135115
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.176015853881836,
      "learning_rate": 0.00021953588322447078,
      "loss": 3.0438,
      "step": 135116
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2454404830932617,
      "learning_rate": 0.00021953194255029234,
      "loss": 2.9161,
      "step": 135117
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9603145122528076,
      "learning_rate": 0.0002195280018910744,
      "loss": 3.0617,
      "step": 135118
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0864779949188232,
      "learning_rate": 0.0002195240612468177,
      "loss": 2.9841,
      "step": 135119
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.347733974456787,
      "learning_rate": 0.000219520120617523,
      "loss": 2.7681,
      "step": 135120
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.297938108444214,
      "learning_rate": 0.00021951618000319115,
      "loss": 3.1267,
      "step": 135121
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9350881576538086,
      "learning_rate": 0.0002195122394038227,
      "loss": 3.0676,
      "step": 135122
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1399829387664795,
      "learning_rate": 0.00021950829881941841,
      "loss": 3.105,
      "step": 135123
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.645453929901123,
      "learning_rate": 0.00021950435824997903,
      "loss": 2.9697,
      "step": 135124
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8721212148666382,
      "learning_rate": 0.00021950041769550535,
      "loss": 2.7066,
      "step": 135125
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9188296794891357,
      "learning_rate": 0.000219496477155998,
      "loss": 2.775,
      "step": 135126
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.083780527114868,
      "learning_rate": 0.00021949253663145786,
      "loss": 2.9503,
      "step": 135127
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.249955415725708,
      "learning_rate": 0.00021948859612188557,
      "loss": 2.923,
      "step": 135128
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.433626890182495,
      "learning_rate": 0.0002194846556272819,
      "loss": 2.8248,
      "step": 135129
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5456738471984863,
      "learning_rate": 0.00021948071514764743,
      "loss": 2.9303,
      "step": 135130
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0591700077056885,
      "learning_rate": 0.00021947677468298307,
      "loss": 2.8455,
      "step": 135131
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9962918758392334,
      "learning_rate": 0.00021947283423328948,
      "loss": 3.0498,
      "step": 135132
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.087876558303833,
      "learning_rate": 0.00021946889379856747,
      "loss": 2.8447,
      "step": 135133
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0067241191864014,
      "learning_rate": 0.00021946495337881768,
      "loss": 2.8706,
      "step": 135134
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9226877689361572,
      "learning_rate": 0.00021946101297404085,
      "loss": 3.1717,
      "step": 135135
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9477064609527588,
      "learning_rate": 0.0002194570725842378,
      "loss": 2.8325,
      "step": 135136
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8839665651321411,
      "learning_rate": 0.00021945313220940918,
      "loss": 3.2083,
      "step": 135137
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.403501272201538,
      "learning_rate": 0.0002194491918495557,
      "loss": 2.7053,
      "step": 135138
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0085692405700684,
      "learning_rate": 0.00021944525150467816,
      "loss": 2.9281,
      "step": 135139
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.6646487712860107,
      "learning_rate": 0.00021944131117477729,
      "loss": 2.7018,
      "step": 135140
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8894696235656738,
      "learning_rate": 0.00021943737085985374,
      "loss": 2.986,
      "step": 135141
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1392154693603516,
      "learning_rate": 0.00021943343055990834,
      "loss": 3.157,
      "step": 135142
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9461297988891602,
      "learning_rate": 0.00021942949027494175,
      "loss": 3.0034,
      "step": 135143
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.396554946899414,
      "learning_rate": 0.00021942555000495484,
      "loss": 2.8072,
      "step": 135144
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3107402324676514,
      "learning_rate": 0.00021942160974994823,
      "loss": 2.9446,
      "step": 135145
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.9423317909240723,
      "learning_rate": 0.00021941766950992262,
      "loss": 2.6833,
      "step": 135146
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9456162452697754,
      "learning_rate": 0.00021941372928487872,
      "loss": 2.8177,
      "step": 135147
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.7771711349487305,
      "learning_rate": 0.00021940978907481742,
      "loss": 3.048,
      "step": 135148
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.151191234588623,
      "learning_rate": 0.00021940584887973928,
      "loss": 2.9053,
      "step": 135149
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3152685165405273,
      "learning_rate": 0.00021940190869964513,
      "loss": 3.0332,
      "step": 135150
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.3282511234283447,
      "learning_rate": 0.00021939796853453586,
      "loss": 2.8573,
      "step": 135151
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1510608196258545,
      "learning_rate": 0.00021939402838441185,
      "loss": 2.9245,
      "step": 135152
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0279746055603027,
      "learning_rate": 0.00021939008824927402,
      "loss": 3.1492,
      "step": 135153
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.08579421043396,
      "learning_rate": 0.00021938614812912313,
      "loss": 2.9801,
      "step": 135154
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.279021978378296,
      "learning_rate": 0.00021938220802395988,
      "loss": 3.0759,
      "step": 135155
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.112619400024414,
      "learning_rate": 0.00021937826793378494,
      "loss": 2.9491,
      "step": 135156
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.461232900619507,
      "learning_rate": 0.0002193743278585993,
      "loss": 3.0882,
      "step": 135157
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.908851146697998,
      "learning_rate": 0.00021937038779840332,
      "loss": 3.2178,
      "step": 135158
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.123729705810547,
      "learning_rate": 0.00021936644775319788,
      "loss": 2.9259,
      "step": 135159
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5338375568389893,
      "learning_rate": 0.0002193625077229838,
      "loss": 3.0507,
      "step": 135160
    },
    {
      "epoch": 1.76,
      "grad_norm": 5.083040237426758,
      "learning_rate": 0.00021935856770776172,
      "loss": 2.7965,
      "step": 135161
    },
    {
      "epoch": 1.76,
      "grad_norm": 4.673126220703125,
      "learning_rate": 0.0002193546277075324,
      "loss": 3.1492,
      "step": 135162
    },
    {
      "epoch": 1.76,
      "grad_norm": 4.098447799682617,
      "learning_rate": 0.00021935068772229675,
      "loss": 3.2671,
      "step": 135163
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.25832462310791,
      "learning_rate": 0.00021934674775205516,
      "loss": 2.9604,
      "step": 135164
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.4858055114746094,
      "learning_rate": 0.00021934280779680853,
      "loss": 3.0811,
      "step": 135165
    },
    {
      "epoch": 1.76,
      "grad_norm": 5.041034698486328,
      "learning_rate": 0.0002193388678565576,
      "loss": 2.7425,
      "step": 135166
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.7010228633880615,
      "learning_rate": 0.0002193349279313031,
      "loss": 3.1055,
      "step": 135167
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1902897357940674,
      "learning_rate": 0.00021933098802104579,
      "loss": 3.2658,
      "step": 135168
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2480900287628174,
      "learning_rate": 0.0002193270481257865,
      "loss": 2.9865,
      "step": 135169
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.855757713317871,
      "learning_rate": 0.00021932310824552563,
      "loss": 2.9005,
      "step": 135170
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.397871971130371,
      "learning_rate": 0.0002193191683802642,
      "loss": 2.7973,
      "step": 135171
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.776230812072754,
      "learning_rate": 0.00021931522853000282,
      "loss": 3.0737,
      "step": 135172
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.110612630844116,
      "learning_rate": 0.00021931128869474227,
      "loss": 3.0969,
      "step": 135173
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.531881093978882,
      "learning_rate": 0.00021930734887448325,
      "loss": 2.8684,
      "step": 135174
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.639620780944824,
      "learning_rate": 0.00021930340906922666,
      "loss": 3.0985,
      "step": 135175
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.92556095123291,
      "learning_rate": 0.000219299469278973,
      "loss": 2.996,
      "step": 135176
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0106489658355713,
      "learning_rate": 0.00021929552950372303,
      "loss": 2.9884,
      "step": 135177
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.302854061126709,
      "learning_rate": 0.00021929158974347762,
      "loss": 2.8398,
      "step": 135178
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1107518672943115,
      "learning_rate": 0.00021928764999823737,
      "loss": 3.1619,
      "step": 135179
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0302860736846924,
      "learning_rate": 0.00021928371026800304,
      "loss": 2.903,
      "step": 135180
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.123586416244507,
      "learning_rate": 0.0002192797705527756,
      "loss": 2.926,
      "step": 135181
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8567172288894653,
      "learning_rate": 0.0002192758308525554,
      "loss": 2.9836,
      "step": 135182
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.21531343460083,
      "learning_rate": 0.00021927189116734334,
      "loss": 2.9242,
      "step": 135183
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0714259147644043,
      "learning_rate": 0.0002192679514971402,
      "loss": 2.8452,
      "step": 135184
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2690913677215576,
      "learning_rate": 0.00021926401184194663,
      "loss": 2.9322,
      "step": 135185
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.655834913253784,
      "learning_rate": 0.0002192600722017634,
      "loss": 2.9333,
      "step": 135186
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0557949542999268,
      "learning_rate": 0.00021925613257659128,
      "loss": 3.0401,
      "step": 135187
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.715348958969116,
      "learning_rate": 0.0002192521929664311,
      "loss": 2.8726,
      "step": 135188
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4654619693756104,
      "learning_rate": 0.00021924825337128328,
      "loss": 2.9265,
      "step": 135189
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.492379665374756,
      "learning_rate": 0.0002192443137911488,
      "loss": 3.0853,
      "step": 135190
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3812787532806396,
      "learning_rate": 0.0002192403742260283,
      "loss": 2.9612,
      "step": 135191
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1901028156280518,
      "learning_rate": 0.00021923643467592255,
      "loss": 2.8951,
      "step": 135192
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.0386147499084473,
      "learning_rate": 0.00021923249514083223,
      "loss": 2.8964,
      "step": 135193
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.934424877166748,
      "learning_rate": 0.00021922855562075827,
      "loss": 2.7892,
      "step": 135194
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.23215913772583,
      "learning_rate": 0.00021922461611570114,
      "loss": 2.7369,
      "step": 135195
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.939410924911499,
      "learning_rate": 0.00021922067662566165,
      "loss": 2.9367,
      "step": 135196
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.166111707687378,
      "learning_rate": 0.00021921673715064057,
      "loss": 2.7947,
      "step": 135197
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.057551622390747,
      "learning_rate": 0.00021921279769063863,
      "loss": 3.0041,
      "step": 135198
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4837067127227783,
      "learning_rate": 0.00021920885824565654,
      "loss": 2.8233,
      "step": 135199
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8985989093780518,
      "learning_rate": 0.00021920491881569518,
      "loss": 2.8184,
      "step": 135200
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0435075759887695,
      "learning_rate": 0.00021920097940075504,
      "loss": 2.9896,
      "step": 135201
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.524024724960327,
      "learning_rate": 0.00021919704000083695,
      "loss": 3.0428,
      "step": 135202
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.7251768112182617,
      "learning_rate": 0.00021919310061594162,
      "loss": 2.9957,
      "step": 135203
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0640711784362793,
      "learning_rate": 0.0002191891612460699,
      "loss": 2.9907,
      "step": 135204
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.459787607192993,
      "learning_rate": 0.00021918522189122238,
      "loss": 2.949,
      "step": 135205
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.766732096672058,
      "learning_rate": 0.00021918128255139996,
      "loss": 3.037,
      "step": 135206
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2410595417022705,
      "learning_rate": 0.0002191773432266032,
      "loss": 3.0891,
      "step": 135207
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.012031316757202,
      "learning_rate": 0.00021917340391683287,
      "loss": 2.8017,
      "step": 135208
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.935908317565918,
      "learning_rate": 0.00021916946462208973,
      "loss": 3.2073,
      "step": 135209
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3892102241516113,
      "learning_rate": 0.00021916552534237453,
      "loss": 3.2087,
      "step": 135210
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2772576808929443,
      "learning_rate": 0.00021916158607768795,
      "loss": 2.8362,
      "step": 135211
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.788487672805786,
      "learning_rate": 0.0002191576468280308,
      "loss": 2.822,
      "step": 135212
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0483322143554688,
      "learning_rate": 0.00021915370759340377,
      "loss": 2.8864,
      "step": 135213
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5306789875030518,
      "learning_rate": 0.00021914976837380764,
      "loss": 2.9573,
      "step": 135214
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.239039659500122,
      "learning_rate": 0.000219145829169243,
      "loss": 3.004,
      "step": 135215
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.85217547416687,
      "learning_rate": 0.0002191418899797107,
      "loss": 2.9213,
      "step": 135216
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3885228633880615,
      "learning_rate": 0.00021913795080521145,
      "loss": 2.9489,
      "step": 135217
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.049201488494873,
      "learning_rate": 0.00021913401164574603,
      "loss": 2.9867,
      "step": 135218
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5607516765594482,
      "learning_rate": 0.0002191300725013151,
      "loss": 2.8667,
      "step": 135219
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.651624917984009,
      "learning_rate": 0.00021912613337191937,
      "loss": 2.9228,
      "step": 135220
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2710635662078857,
      "learning_rate": 0.00021912219425755967,
      "loss": 2.9566,
      "step": 135221
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1101019382476807,
      "learning_rate": 0.00021911825515823666,
      "loss": 2.9336,
      "step": 135222
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.596501350402832,
      "learning_rate": 0.00021911431607395107,
      "loss": 2.9925,
      "step": 135223
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1327359676361084,
      "learning_rate": 0.00021911037700470375,
      "loss": 3.0539,
      "step": 135224
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5343306064605713,
      "learning_rate": 0.00021910643795049523,
      "loss": 3.1588,
      "step": 135225
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1150100231170654,
      "learning_rate": 0.00021910249891132636,
      "loss": 3.0216,
      "step": 135226
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.3619983196258545,
      "learning_rate": 0.0002190985598871979,
      "loss": 2.9661,
      "step": 135227
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5999655723571777,
      "learning_rate": 0.0002190946208781106,
      "loss": 3.0835,
      "step": 135228
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1275930404663086,
      "learning_rate": 0.00021909068188406505,
      "loss": 2.8538,
      "step": 135229
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9981855154037476,
      "learning_rate": 0.0002190867429050621,
      "loss": 3.2324,
      "step": 135230
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.61808180809021,
      "learning_rate": 0.00021908280394110244,
      "loss": 2.8364,
      "step": 135231
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.356867790222168,
      "learning_rate": 0.0002190788649921868,
      "loss": 2.9722,
      "step": 135232
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2228100299835205,
      "learning_rate": 0.00021907492605831592,
      "loss": 3.0512,
      "step": 135233
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.389481544494629,
      "learning_rate": 0.00021907098713949054,
      "loss": 2.9429,
      "step": 135234
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5317282676696777,
      "learning_rate": 0.00021906704823571147,
      "loss": 3.2002,
      "step": 135235
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.750996470451355,
      "learning_rate": 0.00021906310934697936,
      "loss": 2.8788,
      "step": 135236
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4053869247436523,
      "learning_rate": 0.00021905917047329487,
      "loss": 2.8246,
      "step": 135237
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.216348648071289,
      "learning_rate": 0.00021905523161465883,
      "loss": 2.9889,
      "step": 135238
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.940999150276184,
      "learning_rate": 0.00021905129277107193,
      "loss": 2.8509,
      "step": 135239
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.9227705001831055,
      "learning_rate": 0.00021904735394253494,
      "loss": 2.7778,
      "step": 135240
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0371129512786865,
      "learning_rate": 0.00021904341512904852,
      "loss": 3.2882,
      "step": 135241
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.121434450149536,
      "learning_rate": 0.00021903947633061365,
      "loss": 3.0174,
      "step": 135242
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2155678272247314,
      "learning_rate": 0.00021903553754723073,
      "loss": 2.8316,
      "step": 135243
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.219809055328369,
      "learning_rate": 0.00021903159877890063,
      "loss": 2.9558,
      "step": 135244
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4299886226654053,
      "learning_rate": 0.00021902766002562403,
      "loss": 3.0793,
      "step": 135245
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.860368013381958,
      "learning_rate": 0.0002190237212874018,
      "loss": 2.8946,
      "step": 135246
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.543839693069458,
      "learning_rate": 0.00021901978256423454,
      "loss": 2.7359,
      "step": 135247
    },
    {
      "epoch": 1.76,
      "grad_norm": 4.158854007720947,
      "learning_rate": 0.0002190158438561232,
      "loss": 3.1232,
      "step": 135248
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.107299327850342,
      "learning_rate": 0.00021901190516306814,
      "loss": 2.8448,
      "step": 135249
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.0286102294921875,
      "learning_rate": 0.00021900796648507037,
      "loss": 3.064,
      "step": 135250
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.533087968826294,
      "learning_rate": 0.00021900402782213052,
      "loss": 3.0362,
      "step": 135251
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.6035077571868896,
      "learning_rate": 0.00021900008917424935,
      "loss": 3.0312,
      "step": 135252
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9189106225967407,
      "learning_rate": 0.00021899615054142758,
      "loss": 3.1242,
      "step": 135253
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.7141205072402954,
      "learning_rate": 0.00021899221192366597,
      "loss": 2.9543,
      "step": 135254
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.354180097579956,
      "learning_rate": 0.00021898827332096535,
      "loss": 2.799,
      "step": 135255
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.986507773399353,
      "learning_rate": 0.00021898433473332624,
      "loss": 3.1035,
      "step": 135256
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.846723198890686,
      "learning_rate": 0.00021898039616074946,
      "loss": 3.054,
      "step": 135257
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.098198413848877,
      "learning_rate": 0.00021897645760323573,
      "loss": 3.2397,
      "step": 135258
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.940773606300354,
      "learning_rate": 0.0002189725190607858,
      "loss": 2.8486,
      "step": 135259
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.53326678276062,
      "learning_rate": 0.00021896858053340045,
      "loss": 2.9982,
      "step": 135260
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.9528231620788574,
      "learning_rate": 0.00021896464202108052,
      "loss": 3.0723,
      "step": 135261
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.7822363376617432,
      "learning_rate": 0.00021896070352382638,
      "loss": 2.8108,
      "step": 135262
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.201369285583496,
      "learning_rate": 0.00021895676504163903,
      "loss": 2.7173,
      "step": 135263
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.200279951095581,
      "learning_rate": 0.00021895282657451916,
      "loss": 3.1499,
      "step": 135264
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3361382484436035,
      "learning_rate": 0.00021894888812246745,
      "loss": 3.0205,
      "step": 135265
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9816981554031372,
      "learning_rate": 0.00021894494968548465,
      "loss": 2.9159,
      "step": 135266
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2066802978515625,
      "learning_rate": 0.00021894101126357173,
      "loss": 2.8882,
      "step": 135267
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.7557082176208496,
      "learning_rate": 0.000218937072856729,
      "loss": 3.0458,
      "step": 135268
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3048174381256104,
      "learning_rate": 0.00021893313446495744,
      "loss": 2.8754,
      "step": 135269
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1926169395446777,
      "learning_rate": 0.00021892919608825775,
      "loss": 2.9005,
      "step": 135270
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.036000967025757,
      "learning_rate": 0.0002189252577266306,
      "loss": 2.8203,
      "step": 135271
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.0853567123413086,
      "learning_rate": 0.00021892131938007682,
      "loss": 3.0643,
      "step": 135272
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.346341371536255,
      "learning_rate": 0.00021891738104859717,
      "loss": 3.0294,
      "step": 135273
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1984212398529053,
      "learning_rate": 0.00021891344273219224,
      "loss": 2.9618,
      "step": 135274
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1999549865722656,
      "learning_rate": 0.00021890950443086282,
      "loss": 3.1857,
      "step": 135275
    },
    {
      "epoch": 1.76,
      "grad_norm": 4.166535377502441,
      "learning_rate": 0.00021890556614460963,
      "loss": 3.2187,
      "step": 135276
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.628542184829712,
      "learning_rate": 0.00021890162787343345,
      "loss": 3.0521,
      "step": 135277
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.495511531829834,
      "learning_rate": 0.000218897689617335,
      "loss": 2.7191,
      "step": 135278
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1687374114990234,
      "learning_rate": 0.0002188937513763151,
      "loss": 2.9776,
      "step": 135279
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1088547706604004,
      "learning_rate": 0.0002188898131503743,
      "loss": 2.7557,
      "step": 135280
    },
    {
      "epoch": 1.76,
      "grad_norm": 5.4416184425354,
      "learning_rate": 0.00021888587493951333,
      "loss": 3.1751,
      "step": 135281
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.873138427734375,
      "learning_rate": 0.00021888193674373307,
      "loss": 3.058,
      "step": 135282
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1660373210906982,
      "learning_rate": 0.00021887799856303415,
      "loss": 3.0868,
      "step": 135283
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8773868083953857,
      "learning_rate": 0.00021887406039741738,
      "loss": 3.0821,
      "step": 135284
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.93103289604187,
      "learning_rate": 0.00021887012224688357,
      "loss": 2.7963,
      "step": 135285
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.252169609069824,
      "learning_rate": 0.0002188661841114332,
      "loss": 3.0918,
      "step": 135286
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.776708722114563,
      "learning_rate": 0.00021886224599106715,
      "loss": 2.8774,
      "step": 135287
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0058274269104004,
      "learning_rate": 0.00021885830788578615,
      "loss": 2.9083,
      "step": 135288
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.057227849960327,
      "learning_rate": 0.0002188543697955909,
      "loss": 3.0726,
      "step": 135289
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.702276349067688,
      "learning_rate": 0.00021885043172048215,
      "loss": 3.0158,
      "step": 135290
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3131020069122314,
      "learning_rate": 0.0002188464936604608,
      "loss": 2.8155,
      "step": 135291
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3957746028900146,
      "learning_rate": 0.00021884255561552726,
      "loss": 2.8745,
      "step": 135292
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.7304725646972656,
      "learning_rate": 0.00021883861758568242,
      "loss": 3.0768,
      "step": 135293
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.122708559036255,
      "learning_rate": 0.000218834679570927,
      "loss": 3.2359,
      "step": 135294
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.757823944091797,
      "learning_rate": 0.0002188307415712618,
      "loss": 3.0792,
      "step": 135295
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.175344467163086,
      "learning_rate": 0.00021882680358668744,
      "loss": 2.9649,
      "step": 135296
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.6032919883728027,
      "learning_rate": 0.0002188228656172048,
      "loss": 2.7709,
      "step": 135297
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5928761959075928,
      "learning_rate": 0.0002188189276628145,
      "loss": 3.0285,
      "step": 135298
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9924300909042358,
      "learning_rate": 0.00021881498972351726,
      "loss": 2.7922,
      "step": 135299
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.8763973712921143,
      "learning_rate": 0.00021881105179931385,
      "loss": 2.8965,
      "step": 135300
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.8815994262695312,
      "learning_rate": 0.00021880711389020497,
      "loss": 2.8829,
      "step": 135301
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2953579425811768,
      "learning_rate": 0.00021880317599619136,
      "loss": 2.9017,
      "step": 135302
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.841928005218506,
      "learning_rate": 0.00021879923811727388,
      "loss": 2.9536,
      "step": 135303
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.6730847358703613,
      "learning_rate": 0.00021879530025345308,
      "loss": 2.9169,
      "step": 135304
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1643991470336914,
      "learning_rate": 0.00021879136240472976,
      "loss": 3.0733,
      "step": 135305
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.1726274490356445,
      "learning_rate": 0.00021878742457110472,
      "loss": 2.8696,
      "step": 135306
    },
    {
      "epoch": 1.76,
      "grad_norm": 4.1072773933410645,
      "learning_rate": 0.0002187834867525786,
      "loss": 2.7928,
      "step": 135307
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.678363800048828,
      "learning_rate": 0.00021877954894915214,
      "loss": 3.0301,
      "step": 135308
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1405622959136963,
      "learning_rate": 0.00021877561116082615,
      "loss": 3.038,
      "step": 135309
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.026251792907715,
      "learning_rate": 0.00021877167338760122,
      "loss": 2.9147,
      "step": 135310
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.458682060241699,
      "learning_rate": 0.0002187677356294782,
      "loss": 3.1187,
      "step": 135311
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.676602363586426,
      "learning_rate": 0.0002187637978864578,
      "loss": 3.1876,
      "step": 135312
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9523836374282837,
      "learning_rate": 0.00021875986015854079,
      "loss": 3.175,
      "step": 135313
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2798681259155273,
      "learning_rate": 0.00021875592244572785,
      "loss": 2.8195,
      "step": 135314
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.6110947132110596,
      "learning_rate": 0.00021875198474801967,
      "loss": 3.0051,
      "step": 135315
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.7490402460098267,
      "learning_rate": 0.00021874804706541705,
      "loss": 3.2804,
      "step": 135316
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0973289012908936,
      "learning_rate": 0.00021874410939792067,
      "loss": 2.9037,
      "step": 135317
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.484708786010742,
      "learning_rate": 0.0002187401717455313,
      "loss": 3.0405,
      "step": 135318
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.539783000946045,
      "learning_rate": 0.0002187362341082497,
      "loss": 2.9584,
      "step": 135319
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3714468479156494,
      "learning_rate": 0.00021873229648607657,
      "loss": 2.8812,
      "step": 135320
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0219919681549072,
      "learning_rate": 0.00021872835887901265,
      "loss": 3.2032,
      "step": 135321
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4564743041992188,
      "learning_rate": 0.00021872442128705863,
      "loss": 2.9549,
      "step": 135322
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0611891746520996,
      "learning_rate": 0.0002187204837102153,
      "loss": 3.3589,
      "step": 135323
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.7351950407028198,
      "learning_rate": 0.00021871654614848327,
      "loss": 3.0053,
      "step": 135324
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.7922801971435547,
      "learning_rate": 0.00021871260860186346,
      "loss": 2.9367,
      "step": 135325
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.91546630859375,
      "learning_rate": 0.00021870867107035648,
      "loss": 2.9686,
      "step": 135326
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9388011693954468,
      "learning_rate": 0.00021870473355396314,
      "loss": 2.9224,
      "step": 135327
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.166886329650879,
      "learning_rate": 0.00021870079605268414,
      "loss": 3.0393,
      "step": 135328
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9890656471252441,
      "learning_rate": 0.00021869685856652012,
      "loss": 2.8155,
      "step": 135329
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.96146821975708,
      "learning_rate": 0.0002186929210954719,
      "loss": 3.2409,
      "step": 135330
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.0378830432891846,
      "learning_rate": 0.0002186889836395402,
      "loss": 2.8709,
      "step": 135331
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1290459632873535,
      "learning_rate": 0.00021868504619872576,
      "loss": 2.9798,
      "step": 135332
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.899587392807007,
      "learning_rate": 0.0002186811087730293,
      "loss": 2.8292,
      "step": 135333
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4509694576263428,
      "learning_rate": 0.00021867717136245167,
      "loss": 2.9606,
      "step": 135334
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.6188130378723145,
      "learning_rate": 0.00021867323396699341,
      "loss": 2.9238,
      "step": 135335
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.8753607273101807,
      "learning_rate": 0.00021866929658665528,
      "loss": 3.0855,
      "step": 135336
    },
    {
      "epoch": 1.76,
      "grad_norm": 4.125838756561279,
      "learning_rate": 0.00021866535922143807,
      "loss": 3.0354,
      "step": 135337
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.495448589324951,
      "learning_rate": 0.0002186614218713425,
      "loss": 3.2408,
      "step": 135338
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8982737064361572,
      "learning_rate": 0.00021865748453636936,
      "loss": 2.996,
      "step": 135339
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.169048309326172,
      "learning_rate": 0.0002186535472165194,
      "loss": 2.8113,
      "step": 135340
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.664768934249878,
      "learning_rate": 0.00021864960991179315,
      "loss": 3.115,
      "step": 135341
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.2917864322662354,
      "learning_rate": 0.00021864567262219154,
      "loss": 2.8693,
      "step": 135342
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.478816509246826,
      "learning_rate": 0.00021864173534771514,
      "loss": 2.9133,
      "step": 135343
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9702485799789429,
      "learning_rate": 0.00021863779808836486,
      "loss": 3.1365,
      "step": 135344
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2314674854278564,
      "learning_rate": 0.00021863386084414128,
      "loss": 2.981,
      "step": 135345
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.023289442062378,
      "learning_rate": 0.0002186299236150454,
      "loss": 2.805,
      "step": 135346
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.315502166748047,
      "learning_rate": 0.00021862598640107756,
      "loss": 2.9232,
      "step": 135347
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4605822563171387,
      "learning_rate": 0.00021862204920223871,
      "loss": 3.1433,
      "step": 135348
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.8883867263793945,
      "learning_rate": 0.00021861811201852958,
      "loss": 2.9195,
      "step": 135349
    },
    {
      "epoch": 1.76,
      "grad_norm": 4.11727237701416,
      "learning_rate": 0.00021861417484995082,
      "loss": 3.0769,
      "step": 135350
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.7384819984436035,
      "learning_rate": 0.0002186102376965033,
      "loss": 2.9502,
      "step": 135351
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.360849618911743,
      "learning_rate": 0.00021860630055818775,
      "loss": 2.7725,
      "step": 135352
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0503575801849365,
      "learning_rate": 0.00021860236343500473,
      "loss": 3.0059,
      "step": 135353
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.805495500564575,
      "learning_rate": 0.00021859842632695502,
      "loss": 3.0479,
      "step": 135354
    },
    {
      "epoch": 1.76,
      "grad_norm": 5.089657783508301,
      "learning_rate": 0.00021859448923403947,
      "loss": 3.0131,
      "step": 135355
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.744800567626953,
      "learning_rate": 0.00021859055215625868,
      "loss": 2.9617,
      "step": 135356
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.022779941558838,
      "learning_rate": 0.00021858661509361344,
      "loss": 2.9485,
      "step": 135357
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.242159128189087,
      "learning_rate": 0.00021858267804610463,
      "loss": 2.7585,
      "step": 135358
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.8210699558258057,
      "learning_rate": 0.00021857874101373272,
      "loss": 2.8808,
      "step": 135359
    },
    {
      "epoch": 1.76,
      "grad_norm": 4.837576389312744,
      "learning_rate": 0.00021857480399649855,
      "loss": 3.0647,
      "step": 135360
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.152876853942871,
      "learning_rate": 0.00021857086699440285,
      "loss": 3.0285,
      "step": 135361
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.775543212890625,
      "learning_rate": 0.0002185669300074464,
      "loss": 2.8341,
      "step": 135362
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1895387172698975,
      "learning_rate": 0.00021856299303562985,
      "loss": 2.8218,
      "step": 135363
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4723448753356934,
      "learning_rate": 0.00021855905607895412,
      "loss": 2.8332,
      "step": 135364
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.203958511352539,
      "learning_rate": 0.00021855511913741968,
      "loss": 2.6986,
      "step": 135365
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.08739972114563,
      "learning_rate": 0.00021855118221102737,
      "loss": 2.9733,
      "step": 135366
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.164968967437744,
      "learning_rate": 0.00021854724529977797,
      "loss": 2.8892,
      "step": 135367
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2991678714752197,
      "learning_rate": 0.00021854330840367212,
      "loss": 2.6604,
      "step": 135368
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.717072010040283,
      "learning_rate": 0.00021853937152271064,
      "loss": 2.9879,
      "step": 135369
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.065495252609253,
      "learning_rate": 0.00021853543465689433,
      "loss": 3.0663,
      "step": 135370
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.792334794998169,
      "learning_rate": 0.00021853149780622373,
      "loss": 3.0747,
      "step": 135371
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.973098874092102,
      "learning_rate": 0.00021852756097069962,
      "loss": 3.0729,
      "step": 135372
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.433472156524658,
      "learning_rate": 0.0002185236241503228,
      "loss": 2.9549,
      "step": 135373
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2993810176849365,
      "learning_rate": 0.000218519687345094,
      "loss": 3.0892,
      "step": 135374
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.977240800857544,
      "learning_rate": 0.00021851575055501386,
      "loss": 2.8166,
      "step": 135375
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.534400701522827,
      "learning_rate": 0.00021851181378008337,
      "loss": 3.1465,
      "step": 135376
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2637016773223877,
      "learning_rate": 0.00021850787702030293,
      "loss": 2.9872,
      "step": 135377
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.42268967628479,
      "learning_rate": 0.0002185039402756734,
      "loss": 2.7713,
      "step": 135378
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.580714702606201,
      "learning_rate": 0.00021850000354619554,
      "loss": 3.0762,
      "step": 135379
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.023268938064575,
      "learning_rate": 0.00021849606683187005,
      "loss": 2.908,
      "step": 135380
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.636064291000366,
      "learning_rate": 0.0002184921301326977,
      "loss": 2.94,
      "step": 135381
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5858101844787598,
      "learning_rate": 0.00021848819344867925,
      "loss": 2.9875,
      "step": 135382
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0599188804626465,
      "learning_rate": 0.0002184842567798154,
      "loss": 3.1421,
      "step": 135383
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3826262950897217,
      "learning_rate": 0.00021848032012610678,
      "loss": 2.8604,
      "step": 135384
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0257177352905273,
      "learning_rate": 0.0002184763834875542,
      "loss": 3.2514,
      "step": 135385
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.963213562965393,
      "learning_rate": 0.00021847244686415846,
      "loss": 2.7708,
      "step": 135386
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.132394790649414,
      "learning_rate": 0.00021846851025592018,
      "loss": 2.9214,
      "step": 135387
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.024517059326172,
      "learning_rate": 0.00021846457366284016,
      "loss": 3.0155,
      "step": 135388
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1741085052490234,
      "learning_rate": 0.00021846063708491915,
      "loss": 3.2004,
      "step": 135389
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9556245803833008,
      "learning_rate": 0.00021845670052215787,
      "loss": 2.9762,
      "step": 135390
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0549888610839844,
      "learning_rate": 0.00021845276397455693,
      "loss": 3.0352,
      "step": 135391
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.503462791442871,
      "learning_rate": 0.0002184488274421172,
      "loss": 3.1131,
      "step": 135392
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9365662336349487,
      "learning_rate": 0.00021844489092483937,
      "loss": 2.9674,
      "step": 135393
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.047865390777588,
      "learning_rate": 0.00021844095442272415,
      "loss": 2.8579,
      "step": 135394
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4761292934417725,
      "learning_rate": 0.00021843701793577242,
      "loss": 2.7646,
      "step": 135395
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.0280466079711914,
      "learning_rate": 0.00021843308146398467,
      "loss": 2.9205,
      "step": 135396
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.6924141645431519,
      "learning_rate": 0.00021842914500736182,
      "loss": 2.8033,
      "step": 135397
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.097074031829834,
      "learning_rate": 0.0002184252085659045,
      "loss": 2.7526,
      "step": 135398
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.15899395942688,
      "learning_rate": 0.00021842127213961342,
      "loss": 3.0292,
      "step": 135399
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3554110527038574,
      "learning_rate": 0.0002184173357284894,
      "loss": 3.0232,
      "step": 135400
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0289247035980225,
      "learning_rate": 0.0002184133993325332,
      "loss": 2.8666,
      "step": 135401
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3253047466278076,
      "learning_rate": 0.00021840946295174544,
      "loss": 2.8047,
      "step": 135402
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8543215990066528,
      "learning_rate": 0.00021840552658612686,
      "loss": 2.995,
      "step": 135403
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8945051431655884,
      "learning_rate": 0.00021840159023567824,
      "loss": 3.013,
      "step": 135404
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3405027389526367,
      "learning_rate": 0.00021839765390040038,
      "loss": 3.0866,
      "step": 135405
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.006822109222412,
      "learning_rate": 0.00021839371758029387,
      "loss": 3.1327,
      "step": 135406
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.405421495437622,
      "learning_rate": 0.00021838978127535958,
      "loss": 2.899,
      "step": 135407
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.334972381591797,
      "learning_rate": 0.0002183858449855981,
      "loss": 3.196,
      "step": 135408
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.166304588317871,
      "learning_rate": 0.00021838190871101024,
      "loss": 2.8724,
      "step": 135409
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.7487587928771973,
      "learning_rate": 0.00021837797245159675,
      "loss": 2.7913,
      "step": 135410
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4424679279327393,
      "learning_rate": 0.00021837403620735829,
      "loss": 3.0366,
      "step": 135411
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.874755859375,
      "learning_rate": 0.0002183700999782957,
      "loss": 2.9886,
      "step": 135412
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2236227989196777,
      "learning_rate": 0.00021836616376440964,
      "loss": 2.86,
      "step": 135413
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.063829183578491,
      "learning_rate": 0.00021836222756570084,
      "loss": 3.2291,
      "step": 135414
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.314295530319214,
      "learning_rate": 0.00021835829138217003,
      "loss": 2.9507,
      "step": 135415
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.4227511882781982,
      "learning_rate": 0.00021835435521381794,
      "loss": 2.8005,
      "step": 135416
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.423795700073242,
      "learning_rate": 0.0002183504190606453,
      "loss": 3.1415,
      "step": 135417
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.2065157890319824,
      "learning_rate": 0.0002183464829226529,
      "loss": 3.2347,
      "step": 135418
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.409543991088867,
      "learning_rate": 0.00021834254679984152,
      "loss": 2.9358,
      "step": 135419
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5956664085388184,
      "learning_rate": 0.0002183386106922117,
      "loss": 2.971,
      "step": 135420
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.445866346359253,
      "learning_rate": 0.00021833467459976425,
      "loss": 2.9917,
      "step": 135421
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.196096181869507,
      "learning_rate": 0.00021833073852249994,
      "loss": 2.9348,
      "step": 135422
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.7640089988708496,
      "learning_rate": 0.00021832680246041948,
      "loss": 2.9184,
      "step": 135423
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.225067377090454,
      "learning_rate": 0.0002183228664135236,
      "loss": 2.8227,
      "step": 135424
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.8113324642181396,
      "learning_rate": 0.00021831893038181316,
      "loss": 2.809,
      "step": 135425
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.2917747497558594,
      "learning_rate": 0.00021831499436528867,
      "loss": 2.8365,
      "step": 135426
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.154822826385498,
      "learning_rate": 0.000218311058363951,
      "loss": 2.8818,
      "step": 135427
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.757322072982788,
      "learning_rate": 0.0002183071223778008,
      "loss": 3.0293,
      "step": 135428
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9804869890213013,
      "learning_rate": 0.00021830318640683884,
      "loss": 2.8983,
      "step": 135429
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.627856731414795,
      "learning_rate": 0.00021829925045106587,
      "loss": 2.9232,
      "step": 135430
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.457707643508911,
      "learning_rate": 0.00021829531451048273,
      "loss": 2.993,
      "step": 135431
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.698493719100952,
      "learning_rate": 0.00021829137858508994,
      "loss": 3.1036,
      "step": 135432
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.884474754333496,
      "learning_rate": 0.00021828744267488832,
      "loss": 2.8946,
      "step": 135433
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.93096661567688,
      "learning_rate": 0.0002182835067798786,
      "loss": 2.6233,
      "step": 135434
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.324798345565796,
      "learning_rate": 0.00021827957090006148,
      "loss": 2.9814,
      "step": 135435
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.252373218536377,
      "learning_rate": 0.0002182756350354378,
      "loss": 2.9436,
      "step": 135436
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2104830741882324,
      "learning_rate": 0.0002182716991860083,
      "loss": 3.1075,
      "step": 135437
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.84445321559906,
      "learning_rate": 0.0002182677633517735,
      "loss": 3.0958,
      "step": 135438
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2890260219573975,
      "learning_rate": 0.0002182638275327343,
      "loss": 3.1946,
      "step": 135439
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.975676417350769,
      "learning_rate": 0.00021825989172889138,
      "loss": 2.8676,
      "step": 135440
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.116276502609253,
      "learning_rate": 0.00021825595594024546,
      "loss": 3.0999,
      "step": 135441
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.978236198425293,
      "learning_rate": 0.00021825202016679735,
      "loss": 3.0864,
      "step": 135442
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1766602993011475,
      "learning_rate": 0.00021824808440854782,
      "loss": 3.0728,
      "step": 135443
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.482855796813965,
      "learning_rate": 0.0002182441486654974,
      "loss": 2.6484,
      "step": 135444
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9850788116455078,
      "learning_rate": 0.00021824021293764698,
      "loss": 3.0192,
      "step": 135445
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.133765459060669,
      "learning_rate": 0.00021823627722499718,
      "loss": 2.7304,
      "step": 135446
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.225429058074951,
      "learning_rate": 0.00021823234152754884,
      "loss": 3.0642,
      "step": 135447
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.575411558151245,
      "learning_rate": 0.00021822840584530262,
      "loss": 2.8123,
      "step": 135448
    },
    {
      "epoch": 1.76,
      "grad_norm": 4.565279006958008,
      "learning_rate": 0.00021822447017825943,
      "loss": 2.6724,
      "step": 135449
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9731467962265015,
      "learning_rate": 0.00021822053452641974,
      "loss": 2.8758,
      "step": 135450
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5132553577423096,
      "learning_rate": 0.0002182165988897844,
      "loss": 2.8809,
      "step": 135451
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.292280673980713,
      "learning_rate": 0.0002182126632683541,
      "loss": 2.8426,
      "step": 135452
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.025393486022949,
      "learning_rate": 0.00021820872766212967,
      "loss": 2.9592,
      "step": 135453
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2266647815704346,
      "learning_rate": 0.00021820479207111172,
      "loss": 3.258,
      "step": 135454
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.061880111694336,
      "learning_rate": 0.00021820085649530108,
      "loss": 2.7731,
      "step": 135455
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0243024826049805,
      "learning_rate": 0.00021819692093469855,
      "loss": 2.959,
      "step": 135456
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.083226442337036,
      "learning_rate": 0.00021819298538930463,
      "loss": 3.075,
      "step": 135457
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.951397657394409,
      "learning_rate": 0.0002181890498591202,
      "loss": 2.8946,
      "step": 135458
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4312963485717773,
      "learning_rate": 0.00021818511434414594,
      "loss": 2.8955,
      "step": 135459
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.7333908081054688,
      "learning_rate": 0.00021818117884438265,
      "loss": 3.0759,
      "step": 135460
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0549118518829346,
      "learning_rate": 0.000218177243359831,
      "loss": 3.1137,
      "step": 135461
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.165757179260254,
      "learning_rate": 0.00021817330789049186,
      "loss": 2.8445,
      "step": 135462
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2586441040039062,
      "learning_rate": 0.00021816937243636573,
      "loss": 2.8687,
      "step": 135463
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1661620140075684,
      "learning_rate": 0.00021816543699745345,
      "loss": 2.9788,
      "step": 135464
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.8301546573638916,
      "learning_rate": 0.0002181615015737558,
      "loss": 3.0299,
      "step": 135465
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.721750497817993,
      "learning_rate": 0.00021815756616527346,
      "loss": 2.8961,
      "step": 135466
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.298365592956543,
      "learning_rate": 0.00021815363077200712,
      "loss": 2.7909,
      "step": 135467
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.4776527881622314,
      "learning_rate": 0.00021814969539395775,
      "loss": 2.9751,
      "step": 135468
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.238691568374634,
      "learning_rate": 0.00021814576003112573,
      "loss": 3.0745,
      "step": 135469
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2291746139526367,
      "learning_rate": 0.000218141824683512,
      "loss": 2.8855,
      "step": 135470
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.189208507537842,
      "learning_rate": 0.0002181378893511172,
      "loss": 2.9213,
      "step": 135471
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4136345386505127,
      "learning_rate": 0.00021813395403394214,
      "loss": 2.8549,
      "step": 135472
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.7090213298797607,
      "learning_rate": 0.00021813001873198753,
      "loss": 3.2106,
      "step": 135473
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9158843755722046,
      "learning_rate": 0.00021812608344525414,
      "loss": 3.0812,
      "step": 135474
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1110661029815674,
      "learning_rate": 0.00021812214817374268,
      "loss": 2.8643,
      "step": 135475
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.25101900100708,
      "learning_rate": 0.0002181182129174538,
      "loss": 3.12,
      "step": 135476
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.8020431995391846,
      "learning_rate": 0.00021811427767638825,
      "loss": 2.9208,
      "step": 135477
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.493119955062866,
      "learning_rate": 0.00021811034245054683,
      "loss": 3.1366,
      "step": 135478
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2509348392486572,
      "learning_rate": 0.00021810640723993023,
      "loss": 3.1795,
      "step": 135479
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5652103424072266,
      "learning_rate": 0.00021810247204453927,
      "loss": 3.0029,
      "step": 135480
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1367111206054688,
      "learning_rate": 0.00021809853686437456,
      "loss": 2.9661,
      "step": 135481
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9289029836654663,
      "learning_rate": 0.00021809460169943691,
      "loss": 3.0192,
      "step": 135482
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.193250894546509,
      "learning_rate": 0.00021809066654972695,
      "loss": 2.8487,
      "step": 135483
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.311760902404785,
      "learning_rate": 0.0002180867314152455,
      "loss": 2.9694,
      "step": 135484
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3381402492523193,
      "learning_rate": 0.00021808279629599323,
      "loss": 2.9931,
      "step": 135485
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.773216485977173,
      "learning_rate": 0.00021807886119197106,
      "loss": 2.7861,
      "step": 135486
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.530602216720581,
      "learning_rate": 0.00021807492610317945,
      "loss": 2.8149,
      "step": 135487
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5139267444610596,
      "learning_rate": 0.00021807099102961927,
      "loss": 2.9986,
      "step": 135488
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.04187273979187,
      "learning_rate": 0.00021806705597129133,
      "loss": 3.0863,
      "step": 135489
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.781839370727539,
      "learning_rate": 0.00021806312092819614,
      "loss": 2.8984,
      "step": 135490
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4154489040374756,
      "learning_rate": 0.00021805918590033467,
      "loss": 3.0584,
      "step": 135491
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.707747220993042,
      "learning_rate": 0.0002180552508877075,
      "loss": 3.0828,
      "step": 135492
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4336209297180176,
      "learning_rate": 0.0002180513158903154,
      "loss": 3.1517,
      "step": 135493
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5657331943511963,
      "learning_rate": 0.00021804738090815912,
      "loss": 3.2212,
      "step": 135494
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.306122303009033,
      "learning_rate": 0.00021804344594123932,
      "loss": 3.0181,
      "step": 135495
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.504319667816162,
      "learning_rate": 0.00021803951098955685,
      "loss": 2.9281,
      "step": 135496
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3334248065948486,
      "learning_rate": 0.00021803557605311243,
      "loss": 2.9234,
      "step": 135497
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8192360401153564,
      "learning_rate": 0.00021803164113190672,
      "loss": 3.1633,
      "step": 135498
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.124976873397827,
      "learning_rate": 0.00021802770622594044,
      "loss": 2.9997,
      "step": 135499
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.142479419708252,
      "learning_rate": 0.00021802377133521434,
      "loss": 2.8162,
      "step": 135500
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.38126540184021,
      "learning_rate": 0.00021801983645972916,
      "loss": 3.0778,
      "step": 135501
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4634969234466553,
      "learning_rate": 0.00021801590159948564,
      "loss": 3.023,
      "step": 135502
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.8292511701583862,
      "learning_rate": 0.00021801196675448457,
      "loss": 2.8601,
      "step": 135503
    },
    {
      "epoch": 1.76,
      "grad_norm": 5.2708048820495605,
      "learning_rate": 0.00021800803192472667,
      "loss": 3.183,
      "step": 135504
    },
    {
      "epoch": 1.76,
      "grad_norm": 4.178829193115234,
      "learning_rate": 0.00021800409711021253,
      "loss": 2.8737,
      "step": 135505
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.9132652282714844,
      "learning_rate": 0.00021800016231094297,
      "loss": 2.9023,
      "step": 135506
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9592567682266235,
      "learning_rate": 0.00021799622752691878,
      "loss": 3.1306,
      "step": 135507
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.8330166339874268,
      "learning_rate": 0.00021799229275814056,
      "loss": 2.7,
      "step": 135508
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.203237533569336,
      "learning_rate": 0.00021798835800460915,
      "loss": 3.2455,
      "step": 135509
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.476635694503784,
      "learning_rate": 0.00021798442326632537,
      "loss": 2.6971,
      "step": 135510
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.286543369293213,
      "learning_rate": 0.00021798048854328975,
      "loss": 3.0871,
      "step": 135511
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5252721309661865,
      "learning_rate": 0.00021797655383550305,
      "loss": 2.9165,
      "step": 135512
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.955538034439087,
      "learning_rate": 0.0002179726191429661,
      "loss": 3.2122,
      "step": 135513
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.437805414199829,
      "learning_rate": 0.00021796868446567958,
      "loss": 3.0853,
      "step": 135514
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4849066734313965,
      "learning_rate": 0.0002179647498036442,
      "loss": 2.8984,
      "step": 135515
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9869495630264282,
      "learning_rate": 0.0002179608151568609,
      "loss": 2.868,
      "step": 135516
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.477546453475952,
      "learning_rate": 0.00021795688052533006,
      "loss": 3.1004,
      "step": 135517
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9275147914886475,
      "learning_rate": 0.0002179529459090526,
      "loss": 2.8598,
      "step": 135518
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.6930320262908936,
      "learning_rate": 0.00021794901130802927,
      "loss": 3.0159,
      "step": 135519
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.750364303588867,
      "learning_rate": 0.0002179450767222607,
      "loss": 2.9985,
      "step": 135520
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.4919183254241943,
      "learning_rate": 0.00021794114215174774,
      "loss": 2.6861,
      "step": 135521
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.51631498336792,
      "learning_rate": 0.00021793720759649107,
      "loss": 3.0097,
      "step": 135522
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5072593688964844,
      "learning_rate": 0.00021793327305649152,
      "loss": 3.0188,
      "step": 135523
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.08392596244812,
      "learning_rate": 0.0002179293385317496,
      "loss": 2.9962,
      "step": 135524
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.640869617462158,
      "learning_rate": 0.00021792540402226616,
      "loss": 2.7696,
      "step": 135525
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4796090126037598,
      "learning_rate": 0.00021792146952804195,
      "loss": 3.0794,
      "step": 135526
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1624958515167236,
      "learning_rate": 0.00021791753504907772,
      "loss": 3.0692,
      "step": 135527
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.516841411590576,
      "learning_rate": 0.00021791360058537416,
      "loss": 2.9858,
      "step": 135528
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.6688530445098877,
      "learning_rate": 0.00021790966613693212,
      "loss": 2.9748,
      "step": 135529
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9700274467468262,
      "learning_rate": 0.00021790573170375204,
      "loss": 3.0231,
      "step": 135530
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.073882818222046,
      "learning_rate": 0.00021790179728583493,
      "loss": 2.8645,
      "step": 135531
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.034421443939209,
      "learning_rate": 0.00021789786288318137,
      "loss": 3.0641,
      "step": 135532
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2262561321258545,
      "learning_rate": 0.00021789392849579217,
      "loss": 3.0679,
      "step": 135533
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4826512336730957,
      "learning_rate": 0.00021788999412366802,
      "loss": 2.9029,
      "step": 135534
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.413750410079956,
      "learning_rate": 0.0002178860597668098,
      "loss": 3.2998,
      "step": 135535
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1312477588653564,
      "learning_rate": 0.00021788212542521798,
      "loss": 2.9988,
      "step": 135536
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.6107022762298584,
      "learning_rate": 0.00021787819109889345,
      "loss": 2.8682,
      "step": 135537
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9604756832122803,
      "learning_rate": 0.00021787425678783688,
      "loss": 3.2112,
      "step": 135538
    },
    {
      "epoch": 1.76,
      "grad_norm": 3.155003786087036,
      "learning_rate": 0.00021787032249204907,
      "loss": 3.102,
      "step": 135539
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.421442985534668,
      "learning_rate": 0.0002178663882115307,
      "loss": 2.6407,
      "step": 135540
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9424512386322021,
      "learning_rate": 0.00021786245394628265,
      "loss": 3.0206,
      "step": 135541
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2444608211517334,
      "learning_rate": 0.00021785851969630539,
      "loss": 3.0585,
      "step": 135542
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.6765429973602295,
      "learning_rate": 0.0002178545854615998,
      "loss": 3.0957,
      "step": 135543
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.4984662532806396,
      "learning_rate": 0.00021785065124216657,
      "loss": 3.2431,
      "step": 135544
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.2309441566467285,
      "learning_rate": 0.00021784671703800645,
      "loss": 2.853,
      "step": 135545
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.0951220989227295,
      "learning_rate": 0.00021784278284912018,
      "loss": 2.9755,
      "step": 135546
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.51471209526062,
      "learning_rate": 0.00021783884867550862,
      "loss": 3.078,
      "step": 135547
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9420394897460938,
      "learning_rate": 0.00021783491451717225,
      "loss": 3.1047,
      "step": 135548
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.1191043853759766,
      "learning_rate": 0.00021783098037411194,
      "loss": 3.005,
      "step": 135549
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3615682125091553,
      "learning_rate": 0.00021782704624632833,
      "loss": 3.1879,
      "step": 135550
    },
    {
      "epoch": 1.76,
      "grad_norm": 1.9100016355514526,
      "learning_rate": 0.0002178231121338223,
      "loss": 2.8149,
      "step": 135551
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.5749034881591797,
      "learning_rate": 0.00021781917803659443,
      "loss": 2.9238,
      "step": 135552
    },
    {
      "epoch": 1.76,
      "grad_norm": 2.3511874675750732,
      "learning_rate": 0.0002178152439546457,
      "loss": 2.9395,
      "step": 135553
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3398194313049316,
      "learning_rate": 0.00021781130988797657,
      "loss": 2.8193,
      "step": 135554
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0374605655670166,
      "learning_rate": 0.0002178073758365878,
      "loss": 3.1373,
      "step": 135555
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0427474975585938,
      "learning_rate": 0.00021780344180048026,
      "loss": 2.8097,
      "step": 135556
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4599695205688477,
      "learning_rate": 0.00021779950777965455,
      "loss": 3.1617,
      "step": 135557
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.893103003501892,
      "learning_rate": 0.00021779557377411149,
      "loss": 2.885,
      "step": 135558
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2357795238494873,
      "learning_rate": 0.00021779163978385196,
      "loss": 2.8757,
      "step": 135559
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0578818321228027,
      "learning_rate": 0.0002177877058088763,
      "loss": 2.7405,
      "step": 135560
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9010355472564697,
      "learning_rate": 0.0002177837718491855,
      "loss": 3.077,
      "step": 135561
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.9068546295166016,
      "learning_rate": 0.00021777983790478026,
      "loss": 3.1251,
      "step": 135562
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.334357738494873,
      "learning_rate": 0.00021777590397566127,
      "loss": 2.8705,
      "step": 135563
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2635393142700195,
      "learning_rate": 0.00021777197006182933,
      "loss": 2.8965,
      "step": 135564
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.407371759414673,
      "learning_rate": 0.00021776803616328517,
      "loss": 3.0706,
      "step": 135565
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3997232913970947,
      "learning_rate": 0.00021776410228002943,
      "loss": 2.697,
      "step": 135566
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0724542140960693,
      "learning_rate": 0.00021776016841206294,
      "loss": 2.9765,
      "step": 135567
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9844439029693604,
      "learning_rate": 0.00021775623455938634,
      "loss": 3.0781,
      "step": 135568
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.004345178604126,
      "learning_rate": 0.00021775230072200043,
      "loss": 2.9477,
      "step": 135569
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5809690952301025,
      "learning_rate": 0.00021774836689990589,
      "loss": 3.0853,
      "step": 135570
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1312711238861084,
      "learning_rate": 0.00021774443309310353,
      "loss": 3.0497,
      "step": 135571
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.3975577354431152,
      "learning_rate": 0.00021774049930159398,
      "loss": 3.2929,
      "step": 135572
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.114450693130493,
      "learning_rate": 0.00021773656552537808,
      "loss": 2.929,
      "step": 135573
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.766294479370117,
      "learning_rate": 0.00021773263176445647,
      "loss": 2.8765,
      "step": 135574
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2706680297851562,
      "learning_rate": 0.00021772869801882995,
      "loss": 2.8149,
      "step": 135575
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.9440598487854004,
      "learning_rate": 0.00021772476428849917,
      "loss": 3.0163,
      "step": 135576
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.928567409515381,
      "learning_rate": 0.00021772083057346498,
      "loss": 3.146,
      "step": 135577
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.031979560852051,
      "learning_rate": 0.00021771689687372797,
      "loss": 3.0063,
      "step": 135578
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4572627544403076,
      "learning_rate": 0.0002177129631892889,
      "loss": 3.0431,
      "step": 135579
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9333386421203613,
      "learning_rate": 0.00021770902952014863,
      "loss": 2.8266,
      "step": 135580
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.251634120941162,
      "learning_rate": 0.00021770509586630784,
      "loss": 2.7336,
      "step": 135581
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.358983039855957,
      "learning_rate": 0.00021770116222776716,
      "loss": 2.8841,
      "step": 135582
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2637643814086914,
      "learning_rate": 0.0002176972286045274,
      "loss": 2.7932,
      "step": 135583
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8584017753601074,
      "learning_rate": 0.0002176932949965893,
      "loss": 2.8493,
      "step": 135584
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.7352421283721924,
      "learning_rate": 0.0002176893614039535,
      "loss": 3.1348,
      "step": 135585
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.263763189315796,
      "learning_rate": 0.00021768542782662082,
      "loss": 3.0232,
      "step": 135586
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3233768939971924,
      "learning_rate": 0.000217681494264592,
      "loss": 3.3325,
      "step": 135587
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.898677945137024,
      "learning_rate": 0.0002176775607178677,
      "loss": 2.9197,
      "step": 135588
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9509994983673096,
      "learning_rate": 0.00021767362718644881,
      "loss": 2.6423,
      "step": 135589
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1551904678344727,
      "learning_rate": 0.0002176696936703359,
      "loss": 3.2565,
      "step": 135590
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.0178701877593994,
      "learning_rate": 0.00021766576016952975,
      "loss": 3.058,
      "step": 135591
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2924790382385254,
      "learning_rate": 0.00021766182668403103,
      "loss": 2.9378,
      "step": 135592
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.770965099334717,
      "learning_rate": 0.00021765789321384052,
      "loss": 2.8721,
      "step": 135593
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3442022800445557,
      "learning_rate": 0.00021765395975895902,
      "loss": 3.0886,
      "step": 135594
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5308923721313477,
      "learning_rate": 0.00021765002631938716,
      "loss": 2.9486,
      "step": 135595
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9614062309265137,
      "learning_rate": 0.00021764609289512586,
      "loss": 3.1483,
      "step": 135596
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.314377784729004,
      "learning_rate": 0.00021764215948617558,
      "loss": 3.2838,
      "step": 135597
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0046706199645996,
      "learning_rate": 0.00021763822609253714,
      "loss": 2.8189,
      "step": 135598
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.496443271636963,
      "learning_rate": 0.00021763429271421135,
      "loss": 3.0699,
      "step": 135599
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.24025297164917,
      "learning_rate": 0.00021763035935119888,
      "loss": 2.7886,
      "step": 135600
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.407658100128174,
      "learning_rate": 0.00021762642600350052,
      "loss": 2.755,
      "step": 135601
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4360318183898926,
      "learning_rate": 0.00021762249267111704,
      "loss": 2.8916,
      "step": 135602
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5371503829956055,
      "learning_rate": 0.000217618559354049,
      "loss": 3.1593,
      "step": 135603
    },
    {
      "epoch": 1.77,
      "grad_norm": 4.602314472198486,
      "learning_rate": 0.0002176146260522972,
      "loss": 2.6883,
      "step": 135604
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.7366580963134766,
      "learning_rate": 0.00021761069276586242,
      "loss": 2.7483,
      "step": 135605
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0841610431671143,
      "learning_rate": 0.00021760675949474532,
      "loss": 3.0082,
      "step": 135606
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.7438695430755615,
      "learning_rate": 0.00021760282623894673,
      "loss": 3.3025,
      "step": 135607
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2352912425994873,
      "learning_rate": 0.00021759889299846743,
      "loss": 3.0907,
      "step": 135608
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0369741916656494,
      "learning_rate": 0.0002175949597733079,
      "loss": 3.1085,
      "step": 135609
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.273136615753174,
      "learning_rate": 0.0002175910265634691,
      "loss": 2.8679,
      "step": 135610
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.300488233566284,
      "learning_rate": 0.0002175870933689516,
      "loss": 3.1508,
      "step": 135611
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0818850994110107,
      "learning_rate": 0.00021758316018975627,
      "loss": 3.1417,
      "step": 135612
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9702250957489014,
      "learning_rate": 0.0002175792270258838,
      "loss": 2.7824,
      "step": 135613
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9269986152648926,
      "learning_rate": 0.000217575293877335,
      "loss": 3.1967,
      "step": 135614
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.6361234188079834,
      "learning_rate": 0.0002175713607441104,
      "loss": 2.7797,
      "step": 135615
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.89499568939209,
      "learning_rate": 0.00021756742762621084,
      "loss": 2.7681,
      "step": 135616
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3103432655334473,
      "learning_rate": 0.00021756349452363698,
      "loss": 3.0785,
      "step": 135617
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3062546253204346,
      "learning_rate": 0.0002175595614363897,
      "loss": 3.13,
      "step": 135618
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.9913883209228516,
      "learning_rate": 0.00021755562836446961,
      "loss": 3.049,
      "step": 135619
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0317909717559814,
      "learning_rate": 0.00021755169530787767,
      "loss": 2.9951,
      "step": 135620
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1704883575439453,
      "learning_rate": 0.00021754776226661429,
      "loss": 2.8838,
      "step": 135621
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.569119930267334,
      "learning_rate": 0.00021754382924068027,
      "loss": 2.8611,
      "step": 135622
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3102171421051025,
      "learning_rate": 0.00021753989623007647,
      "loss": 2.8637,
      "step": 135623
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4078357219696045,
      "learning_rate": 0.00021753596323480356,
      "loss": 3.0504,
      "step": 135624
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.7284430265426636,
      "learning_rate": 0.00021753203025486224,
      "loss": 3.0091,
      "step": 135625
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.7895102500915527,
      "learning_rate": 0.0002175280972902534,
      "loss": 2.6651,
      "step": 135626
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.324741840362549,
      "learning_rate": 0.0002175241643409775,
      "loss": 2.8968,
      "step": 135627
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9826730489730835,
      "learning_rate": 0.00021752023140703548,
      "loss": 3.0656,
      "step": 135628
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5966339111328125,
      "learning_rate": 0.00021751629848842797,
      "loss": 3.062,
      "step": 135629
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.950305461883545,
      "learning_rate": 0.0002175123655851557,
      "loss": 2.7272,
      "step": 135630
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.132995843887329,
      "learning_rate": 0.00021750843269721952,
      "loss": 2.9929,
      "step": 135631
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.591000556945801,
      "learning_rate": 0.00021750449982462013,
      "loss": 2.9814,
      "step": 135632
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5613627433776855,
      "learning_rate": 0.00021750056696735813,
      "loss": 3.1082,
      "step": 135633
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5964114665985107,
      "learning_rate": 0.0002174966341254343,
      "loss": 2.859,
      "step": 135634
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0736217498779297,
      "learning_rate": 0.0002174927012988494,
      "loss": 3.0863,
      "step": 135635
    },
    {
      "epoch": 1.77,
      "grad_norm": 4.849337577819824,
      "learning_rate": 0.0002174887684876042,
      "loss": 2.9114,
      "step": 135636
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.1132874488830566,
      "learning_rate": 0.0002174848356916994,
      "loss": 3.0444,
      "step": 135637
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9106656312942505,
      "learning_rate": 0.0002174809029111358,
      "loss": 2.9965,
      "step": 135638
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.561575412750244,
      "learning_rate": 0.000217476970145914,
      "loss": 2.8657,
      "step": 135639
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.126788854598999,
      "learning_rate": 0.00021747303739603474,
      "loss": 3.0943,
      "step": 135640
    },
    {
      "epoch": 1.77,
      "grad_norm": 4.7761054039001465,
      "learning_rate": 0.00021746910466149882,
      "loss": 2.9192,
      "step": 135641
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3400046825408936,
      "learning_rate": 0.00021746517194230693,
      "loss": 3.1371,
      "step": 135642
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9500133991241455,
      "learning_rate": 0.00021746123923845986,
      "loss": 2.9209,
      "step": 135643
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9708809852600098,
      "learning_rate": 0.0002174573065499584,
      "loss": 3.0023,
      "step": 135644
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4227588176727295,
      "learning_rate": 0.00021745337387680305,
      "loss": 2.9813,
      "step": 135645
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0944106578826904,
      "learning_rate": 0.00021744944121899467,
      "loss": 3.061,
      "step": 135646
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1167566776275635,
      "learning_rate": 0.00021744550857653408,
      "loss": 2.9133,
      "step": 135647
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.6500537395477295,
      "learning_rate": 0.00021744157594942183,
      "loss": 3.0102,
      "step": 135648
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.945185661315918,
      "learning_rate": 0.00021743764333765882,
      "loss": 2.9604,
      "step": 135649
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.525890827178955,
      "learning_rate": 0.0002174337107412457,
      "loss": 2.9672,
      "step": 135650
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8926570415496826,
      "learning_rate": 0.00021742977816018326,
      "loss": 3.0438,
      "step": 135651
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4972755908966064,
      "learning_rate": 0.0002174258455944721,
      "loss": 2.7832,
      "step": 135652
    },
    {
      "epoch": 1.77,
      "grad_norm": 4.480269908905029,
      "learning_rate": 0.00021742191304411308,
      "loss": 2.8953,
      "step": 135653
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.913662075996399,
      "learning_rate": 0.00021741798050910685,
      "loss": 2.8559,
      "step": 135654
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4026942253112793,
      "learning_rate": 0.00021741404798945416,
      "loss": 2.8578,
      "step": 135655
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.041473865509033,
      "learning_rate": 0.00021741011548515575,
      "loss": 2.4598,
      "step": 135656
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.8094396591186523,
      "learning_rate": 0.00021740618299621243,
      "loss": 3.0106,
      "step": 135657
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2386467456817627,
      "learning_rate": 0.00021740225052262484,
      "loss": 3.0027,
      "step": 135658
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0084457397460938,
      "learning_rate": 0.00021739831806439377,
      "loss": 2.9829,
      "step": 135659
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3576653003692627,
      "learning_rate": 0.00021739438562151984,
      "loss": 2.903,
      "step": 135660
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4434115886688232,
      "learning_rate": 0.00021739045319400387,
      "loss": 2.7615,
      "step": 135661
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.157763719558716,
      "learning_rate": 0.00021738652078184657,
      "loss": 3.0802,
      "step": 135662
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.267282485961914,
      "learning_rate": 0.0002173825883850487,
      "loss": 2.7342,
      "step": 135663
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.169567584991455,
      "learning_rate": 0.00021737865600361095,
      "loss": 2.9215,
      "step": 135664
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9568768739700317,
      "learning_rate": 0.00021737472363753403,
      "loss": 2.9401,
      "step": 135665
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8876183032989502,
      "learning_rate": 0.00021737079128681883,
      "loss": 2.8783,
      "step": 135666
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.019390821456909,
      "learning_rate": 0.00021736685895146581,
      "loss": 3.2123,
      "step": 135667
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.454758405685425,
      "learning_rate": 0.00021736292663147592,
      "loss": 2.529,
      "step": 135668
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.469597101211548,
      "learning_rate": 0.00021735899432684988,
      "loss": 3.1144,
      "step": 135669
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.312070846557617,
      "learning_rate": 0.00021735506203758825,
      "loss": 2.8701,
      "step": 135670
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.941220998764038,
      "learning_rate": 0.00021735112976369197,
      "loss": 2.8573,
      "step": 135671
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0246026515960693,
      "learning_rate": 0.00021734719750516156,
      "loss": 2.8524,
      "step": 135672
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.0232908725738525,
      "learning_rate": 0.00021734326526199802,
      "loss": 2.8547,
      "step": 135673
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9196466207504272,
      "learning_rate": 0.00021733933303420185,
      "loss": 2.9821,
      "step": 135674
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.625308036804199,
      "learning_rate": 0.00021733540082177386,
      "loss": 2.9603,
      "step": 135675
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0715575218200684,
      "learning_rate": 0.00021733146862471474,
      "loss": 3.2303,
      "step": 135676
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.826385736465454,
      "learning_rate": 0.00021732753644302527,
      "loss": 2.8955,
      "step": 135677
    },
    {
      "epoch": 1.77,
      "grad_norm": 5.148196220397949,
      "learning_rate": 0.00021732360427670618,
      "loss": 2.8088,
      "step": 135678
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2265000343322754,
      "learning_rate": 0.0002173196721257582,
      "loss": 3.109,
      "step": 135679
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2308716773986816,
      "learning_rate": 0.00021731573999018213,
      "loss": 2.9651,
      "step": 135680
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1313657760620117,
      "learning_rate": 0.00021731180786997858,
      "loss": 2.9955,
      "step": 135681
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5711710453033447,
      "learning_rate": 0.00021730787576514828,
      "loss": 3.1772,
      "step": 135682
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9440662860870361,
      "learning_rate": 0.00021730394367569205,
      "loss": 3.0443,
      "step": 135683
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2684919834136963,
      "learning_rate": 0.0002173000116016105,
      "loss": 2.7695,
      "step": 135684
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.909335732460022,
      "learning_rate": 0.00021729607954290448,
      "loss": 3.0897,
      "step": 135685
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0495452880859375,
      "learning_rate": 0.00021729214749957465,
      "loss": 3.042,
      "step": 135686
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9154269695281982,
      "learning_rate": 0.00021728821547162196,
      "loss": 2.9698,
      "step": 135687
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.261599063873291,
      "learning_rate": 0.0002172842834590468,
      "loss": 2.8587,
      "step": 135688
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2909345626831055,
      "learning_rate": 0.00021728035146185003,
      "loss": 2.987,
      "step": 135689
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9564743041992188,
      "learning_rate": 0.0002172764194800324,
      "loss": 2.8199,
      "step": 135690
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8056846857070923,
      "learning_rate": 0.00021727248751359467,
      "loss": 3.0396,
      "step": 135691
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9051322937011719,
      "learning_rate": 0.00021726855556253752,
      "loss": 3.0408,
      "step": 135692
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5737874507904053,
      "learning_rate": 0.00021726462362686187,
      "loss": 3.0831,
      "step": 135693
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.3996026515960693,
      "learning_rate": 0.00021726069170656815,
      "loss": 3.1121,
      "step": 135694
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.8972949981689453,
      "learning_rate": 0.00021725675980165723,
      "loss": 3.0475,
      "step": 135695
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9997496604919434,
      "learning_rate": 0.00021725282791212983,
      "loss": 2.8743,
      "step": 135696
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8246432542800903,
      "learning_rate": 0.0002172488960379867,
      "loss": 2.9728,
      "step": 135697
    },
    {
      "epoch": 1.77,
      "grad_norm": 4.225031852722168,
      "learning_rate": 0.00021724496417922855,
      "loss": 3.004,
      "step": 135698
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.703335762023926,
      "learning_rate": 0.00021724103233585626,
      "loss": 2.8474,
      "step": 135699
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5125298500061035,
      "learning_rate": 0.00021723710050787032,
      "loss": 2.8567,
      "step": 135700
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0594828128814697,
      "learning_rate": 0.0002172331686952715,
      "loss": 3.0881,
      "step": 135701
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.9279212951660156,
      "learning_rate": 0.00021722923689806067,
      "loss": 2.9313,
      "step": 135702
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.209453582763672,
      "learning_rate": 0.0002172253051162384,
      "loss": 2.7965,
      "step": 135703
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.731365203857422,
      "learning_rate": 0.00021722137334980558,
      "loss": 2.9773,
      "step": 135704
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.247875690460205,
      "learning_rate": 0.00021721744159876297,
      "loss": 3.0939,
      "step": 135705
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2571208477020264,
      "learning_rate": 0.00021721350986311113,
      "loss": 2.9208,
      "step": 135706
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.396322727203369,
      "learning_rate": 0.0002172095781428508,
      "loss": 2.859,
      "step": 135707
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.3647994995117188,
      "learning_rate": 0.00021720564643798278,
      "loss": 3.0646,
      "step": 135708
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0555708408355713,
      "learning_rate": 0.00021720171474850779,
      "loss": 3.1022,
      "step": 135709
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.32964825630188,
      "learning_rate": 0.0002171977830744266,
      "loss": 2.7701,
      "step": 135710
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.941767692565918,
      "learning_rate": 0.00021719385141573996,
      "loss": 3.0389,
      "step": 135711
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.042292594909668,
      "learning_rate": 0.0002171899197724485,
      "loss": 2.6937,
      "step": 135712
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8798991441726685,
      "learning_rate": 0.00021718598814455292,
      "loss": 2.9472,
      "step": 135713
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1620230674743652,
      "learning_rate": 0.00021718205653205408,
      "loss": 2.7614,
      "step": 135714
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9799442291259766,
      "learning_rate": 0.00021717812493495268,
      "loss": 3.1052,
      "step": 135715
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8834474086761475,
      "learning_rate": 0.0002171741933532494,
      "loss": 2.9194,
      "step": 135716
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.142975330352783,
      "learning_rate": 0.0002171702617869451,
      "loss": 3.3554,
      "step": 135717
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.033555269241333,
      "learning_rate": 0.00021716633023604034,
      "loss": 2.8243,
      "step": 135718
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8542120456695557,
      "learning_rate": 0.00021716239870053585,
      "loss": 2.929,
      "step": 135719
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.371941089630127,
      "learning_rate": 0.0002171584671804325,
      "loss": 2.941,
      "step": 135720
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.155055284500122,
      "learning_rate": 0.0002171545356757309,
      "loss": 2.9595,
      "step": 135721
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9587956666946411,
      "learning_rate": 0.0002171506041864319,
      "loss": 2.8692,
      "step": 135722
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9597457647323608,
      "learning_rate": 0.00021714667271253613,
      "loss": 2.8055,
      "step": 135723
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.606999158859253,
      "learning_rate": 0.00021714274125404448,
      "loss": 3.0684,
      "step": 135724
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.7886290550231934,
      "learning_rate": 0.00021713880981095746,
      "loss": 2.8058,
      "step": 135725
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.581312417984009,
      "learning_rate": 0.00021713487838327585,
      "loss": 3.061,
      "step": 135726
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1383843421936035,
      "learning_rate": 0.00021713094697100045,
      "loss": 2.8986,
      "step": 135727
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.6272268295288086,
      "learning_rate": 0.000217127015574132,
      "loss": 3.0735,
      "step": 135728
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4478771686553955,
      "learning_rate": 0.00021712308419267117,
      "loss": 2.7092,
      "step": 135729
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9038805961608887,
      "learning_rate": 0.00021711915282661887,
      "loss": 3.0547,
      "step": 135730
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2327466011047363,
      "learning_rate": 0.00021711522147597556,
      "loss": 3.1806,
      "step": 135731
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0266811847686768,
      "learning_rate": 0.00021711129014074208,
      "loss": 3.0744,
      "step": 135732
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.632934808731079,
      "learning_rate": 0.0002171073588209192,
      "loss": 2.7953,
      "step": 135733
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9615366458892822,
      "learning_rate": 0.0002171034275165076,
      "loss": 2.7406,
      "step": 135734
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0704760551452637,
      "learning_rate": 0.00021709949622750803,
      "loss": 2.8509,
      "step": 135735
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.967165470123291,
      "learning_rate": 0.0002170955649539214,
      "loss": 2.897,
      "step": 135736
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1480724811553955,
      "learning_rate": 0.00021709163369574807,
      "loss": 2.9494,
      "step": 135737
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.9189884662628174,
      "learning_rate": 0.00021708770245298904,
      "loss": 3.0733,
      "step": 135738
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1737728118896484,
      "learning_rate": 0.00021708377122564495,
      "loss": 3.0196,
      "step": 135739
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0986149311065674,
      "learning_rate": 0.00021707984001371653,
      "loss": 2.91,
      "step": 135740
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.149413824081421,
      "learning_rate": 0.00021707590881720455,
      "loss": 2.9729,
      "step": 135741
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.763131618499756,
      "learning_rate": 0.00021707197763610979,
      "loss": 2.9314,
      "step": 135742
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.655097484588623,
      "learning_rate": 0.00021706804647043292,
      "loss": 2.9382,
      "step": 135743
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3390846252441406,
      "learning_rate": 0.0002170641153201746,
      "loss": 3.0276,
      "step": 135744
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9975119829177856,
      "learning_rate": 0.00021706018418533562,
      "loss": 2.9809,
      "step": 135745
    },
    {
      "epoch": 1.77,
      "grad_norm": 4.99574613571167,
      "learning_rate": 0.00021705625306591672,
      "loss": 2.7162,
      "step": 135746
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4033596515655518,
      "learning_rate": 0.0002170523219619186,
      "loss": 2.9125,
      "step": 135747
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.3777384757995605,
      "learning_rate": 0.00021704839087334214,
      "loss": 2.8749,
      "step": 135748
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4909331798553467,
      "learning_rate": 0.00021704445980018784,
      "loss": 2.7114,
      "step": 135749
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.268832206726074,
      "learning_rate": 0.00021704052874245655,
      "loss": 2.9927,
      "step": 135750
    },
    {
      "epoch": 1.77,
      "grad_norm": 4.52301549911499,
      "learning_rate": 0.00021703659770014907,
      "loss": 2.9714,
      "step": 135751
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.9235119819641113,
      "learning_rate": 0.00021703266667326597,
      "loss": 2.7902,
      "step": 135752
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.01173734664917,
      "learning_rate": 0.00021702873566180805,
      "loss": 2.8998,
      "step": 135753
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1180896759033203,
      "learning_rate": 0.00021702480466577616,
      "loss": 3.0009,
      "step": 135754
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4909818172454834,
      "learning_rate": 0.00021702087368517087,
      "loss": 3.046,
      "step": 135755
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.290822744369507,
      "learning_rate": 0.00021701694271999291,
      "loss": 3.0922,
      "step": 135756
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.903787136077881,
      "learning_rate": 0.0002170130117702431,
      "loss": 2.7861,
      "step": 135757
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.1821436882019043,
      "learning_rate": 0.0002170090808359222,
      "loss": 2.9825,
      "step": 135758
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3110690116882324,
      "learning_rate": 0.00021700514991703082,
      "loss": 3.1285,
      "step": 135759
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9063732624053955,
      "learning_rate": 0.00021700121901356982,
      "loss": 3.0968,
      "step": 135760
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.369816780090332,
      "learning_rate": 0.00021699728812553973,
      "loss": 2.9283,
      "step": 135761
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.628514528274536,
      "learning_rate": 0.0002169933572529415,
      "loss": 2.9768,
      "step": 135762
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.180793523788452,
      "learning_rate": 0.00021698942639577574,
      "loss": 2.829,
      "step": 135763
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1107139587402344,
      "learning_rate": 0.00021698549555404316,
      "loss": 2.8662,
      "step": 135764
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.11179256439209,
      "learning_rate": 0.00021698156472774467,
      "loss": 3.0634,
      "step": 135765
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.12060284614563,
      "learning_rate": 0.00021697763391688085,
      "loss": 2.9701,
      "step": 135766
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9571104049682617,
      "learning_rate": 0.00021697370312145242,
      "loss": 3.0972,
      "step": 135767
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1506187915802,
      "learning_rate": 0.00021696977234146012,
      "loss": 2.9299,
      "step": 135768
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4735825061798096,
      "learning_rate": 0.00021696584157690474,
      "loss": 2.9912,
      "step": 135769
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.55908203125,
      "learning_rate": 0.00021696191082778694,
      "loss": 2.8939,
      "step": 135770
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.381504774093628,
      "learning_rate": 0.00021695798009410745,
      "loss": 2.7942,
      "step": 135771
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.163224697113037,
      "learning_rate": 0.00021695404937586722,
      "loss": 2.7843,
      "step": 135772
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4096944332122803,
      "learning_rate": 0.0002169501186730667,
      "loss": 3.0066,
      "step": 135773
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.628333330154419,
      "learning_rate": 0.00021694618798570668,
      "loss": 2.8086,
      "step": 135774
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.9905056953430176,
      "learning_rate": 0.00021694225731378792,
      "loss": 2.5636,
      "step": 135775
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.8844199180603027,
      "learning_rate": 0.0002169383266573112,
      "loss": 3.0195,
      "step": 135776
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.258172035217285,
      "learning_rate": 0.0002169343960162772,
      "loss": 3.0591,
      "step": 135777
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.690695285797119,
      "learning_rate": 0.0002169304653906868,
      "loss": 3.1324,
      "step": 135778
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.0694570541381836,
      "learning_rate": 0.00021692653478054042,
      "loss": 2.9727,
      "step": 135779
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8547779321670532,
      "learning_rate": 0.00021692260418583898,
      "loss": 3.2035,
      "step": 135780
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.799166440963745,
      "learning_rate": 0.00021691867360658324,
      "loss": 2.9241,
      "step": 135781
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1260738372802734,
      "learning_rate": 0.00021691474304277384,
      "loss": 2.9356,
      "step": 135782
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.090902805328369,
      "learning_rate": 0.0002169108124944116,
      "loss": 2.857,
      "step": 135783
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.839661717414856,
      "learning_rate": 0.00021690688196149727,
      "loss": 2.842,
      "step": 135784
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4771294593811035,
      "learning_rate": 0.00021690295144403143,
      "loss": 3.1995,
      "step": 135785
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.145014524459839,
      "learning_rate": 0.0002168990209420149,
      "loss": 3.1718,
      "step": 135786
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4105916023254395,
      "learning_rate": 0.00021689509045544843,
      "loss": 2.7842,
      "step": 135787
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.766448736190796,
      "learning_rate": 0.00021689115998433273,
      "loss": 2.8686,
      "step": 135788
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.048156261444092,
      "learning_rate": 0.00021688722952866853,
      "loss": 2.8582,
      "step": 135789
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.6262340545654297,
      "learning_rate": 0.00021688329908845657,
      "loss": 2.9113,
      "step": 135790
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1764986515045166,
      "learning_rate": 0.00021687936866369764,
      "loss": 2.6279,
      "step": 135791
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3502397537231445,
      "learning_rate": 0.00021687543825439236,
      "loss": 2.9885,
      "step": 135792
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.039458751678467,
      "learning_rate": 0.00021687150786054143,
      "loss": 2.9148,
      "step": 135793
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0681352615356445,
      "learning_rate": 0.0002168675774821457,
      "loss": 3.0179,
      "step": 135794
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.96358323097229,
      "learning_rate": 0.00021686364711920585,
      "loss": 2.936,
      "step": 135795
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.264040231704712,
      "learning_rate": 0.00021685971677172261,
      "loss": 2.9415,
      "step": 135796
    },
    {
      "epoch": 1.77,
      "grad_norm": 4.400299072265625,
      "learning_rate": 0.00021685578643969688,
      "loss": 2.9609,
      "step": 135797
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9147982597351074,
      "learning_rate": 0.00021685185612312907,
      "loss": 2.8554,
      "step": 135798
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2465007305145264,
      "learning_rate": 0.00021684792582202003,
      "loss": 2.9699,
      "step": 135799
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0931642055511475,
      "learning_rate": 0.00021684399553637058,
      "loss": 3.0287,
      "step": 135800
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9142719507217407,
      "learning_rate": 0.0002168400652661814,
      "loss": 2.8449,
      "step": 135801
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1566107273101807,
      "learning_rate": 0.0002168361350114532,
      "loss": 3.0548,
      "step": 135802
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0616512298583984,
      "learning_rate": 0.0002168322047721869,
      "loss": 3.0663,
      "step": 135803
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4197866916656494,
      "learning_rate": 0.0002168282745483829,
      "loss": 2.6686,
      "step": 135804
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.7849783897399902,
      "learning_rate": 0.0002168243443400421,
      "loss": 2.7824,
      "step": 135805
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.7154629230499268,
      "learning_rate": 0.00021682041414716524,
      "loss": 2.9198,
      "step": 135806
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.222247362136841,
      "learning_rate": 0.000216816483969753,
      "loss": 2.7985,
      "step": 135807
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.588782787322998,
      "learning_rate": 0.00021681255380780618,
      "loss": 3.1816,
      "step": 135808
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0220837593078613,
      "learning_rate": 0.00021680862366132557,
      "loss": 3.2442,
      "step": 135809
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5289745330810547,
      "learning_rate": 0.00021680469353031173,
      "loss": 2.7219,
      "step": 135810
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2369461059570312,
      "learning_rate": 0.00021680076341476542,
      "loss": 2.9794,
      "step": 135811
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.102997303009033,
      "learning_rate": 0.00021679683331468744,
      "loss": 3.1219,
      "step": 135812
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.140367031097412,
      "learning_rate": 0.0002167929032300785,
      "loss": 2.9331,
      "step": 135813
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.310528039932251,
      "learning_rate": 0.00021678897316093935,
      "loss": 2.8618,
      "step": 135814
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3310062885284424,
      "learning_rate": 0.0002167850431072708,
      "loss": 3.2313,
      "step": 135815
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0849385261535645,
      "learning_rate": 0.00021678111306907337,
      "loss": 2.7503,
      "step": 135816
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.094630002975464,
      "learning_rate": 0.00021677718304634788,
      "loss": 2.6598,
      "step": 135817
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.174262523651123,
      "learning_rate": 0.00021677325303909512,
      "loss": 2.8036,
      "step": 135818
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5558760166168213,
      "learning_rate": 0.00021676932304731572,
      "loss": 2.8713,
      "step": 135819
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9265520572662354,
      "learning_rate": 0.00021676539307101053,
      "loss": 3.0145,
      "step": 135820
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5812599658966064,
      "learning_rate": 0.00021676146311018032,
      "loss": 2.8111,
      "step": 135821
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0709431171417236,
      "learning_rate": 0.00021675753316482561,
      "loss": 3.1885,
      "step": 135822
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.475328207015991,
      "learning_rate": 0.00021675360323494724,
      "loss": 3.0989,
      "step": 135823
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.129516839981079,
      "learning_rate": 0.00021674967332054596,
      "loss": 2.8499,
      "step": 135824
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.801508665084839,
      "learning_rate": 0.00021674574342162248,
      "loss": 2.7395,
      "step": 135825
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0894949436187744,
      "learning_rate": 0.00021674181353817756,
      "loss": 3.114,
      "step": 135826
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.7993488311767578,
      "learning_rate": 0.00021673788367021195,
      "loss": 3.0961,
      "step": 135827
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3980743885040283,
      "learning_rate": 0.0002167339538177263,
      "loss": 3.0338,
      "step": 135828
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3015506267547607,
      "learning_rate": 0.00021673002398072137,
      "loss": 2.8959,
      "step": 135829
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.818176507949829,
      "learning_rate": 0.0002167260941591979,
      "loss": 3.1619,
      "step": 135830
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.166740894317627,
      "learning_rate": 0.0002167221643531566,
      "loss": 2.7364,
      "step": 135831
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.110551357269287,
      "learning_rate": 0.00021671823456259823,
      "loss": 2.9759,
      "step": 135832
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2366626262664795,
      "learning_rate": 0.0002167143047875236,
      "loss": 2.9981,
      "step": 135833
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1561903953552246,
      "learning_rate": 0.00021671037502793327,
      "loss": 2.9792,
      "step": 135834
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1646416187286377,
      "learning_rate": 0.0002167064452838281,
      "loss": 2.9239,
      "step": 135835
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.082515239715576,
      "learning_rate": 0.0002167025155552087,
      "loss": 3.0413,
      "step": 135836
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.802675247192383,
      "learning_rate": 0.00021669858584207595,
      "loss": 2.92,
      "step": 135837
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.27731990814209,
      "learning_rate": 0.00021669465614443042,
      "loss": 3.0307,
      "step": 135838
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.987302541732788,
      "learning_rate": 0.00021669072646227303,
      "loss": 3.0421,
      "step": 135839
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.427067518234253,
      "learning_rate": 0.0002166867967956043,
      "loss": 2.9295,
      "step": 135840
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.101516008377075,
      "learning_rate": 0.0002166828671444251,
      "loss": 2.857,
      "step": 135841
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2108540534973145,
      "learning_rate": 0.00021667893750873619,
      "loss": 2.9665,
      "step": 135842
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0689682960510254,
      "learning_rate": 0.00021667500788853823,
      "loss": 3.0561,
      "step": 135843
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0676190853118896,
      "learning_rate": 0.00021667107828383185,
      "loss": 2.8,
      "step": 135844
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.120504140853882,
      "learning_rate": 0.00021666714869461804,
      "loss": 2.9376,
      "step": 135845
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.6428844928741455,
      "learning_rate": 0.00021666321912089729,
      "loss": 2.8848,
      "step": 135846
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9434796571731567,
      "learning_rate": 0.0002166592895626704,
      "loss": 2.9046,
      "step": 135847
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.33662486076355,
      "learning_rate": 0.00021665536001993813,
      "loss": 3.1599,
      "step": 135848
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0226590633392334,
      "learning_rate": 0.00021665143049270122,
      "loss": 3.144,
      "step": 135849
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.5126380920410156,
      "learning_rate": 0.00021664750098096045,
      "loss": 3.0503,
      "step": 135850
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9058215618133545,
      "learning_rate": 0.00021664357148471646,
      "loss": 2.9452,
      "step": 135851
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2698113918304443,
      "learning_rate": 0.00021663964200396995,
      "loss": 2.9749,
      "step": 135852
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9732831716537476,
      "learning_rate": 0.0002166357125387217,
      "loss": 2.9608,
      "step": 135853
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.444565534591675,
      "learning_rate": 0.00021663178308897244,
      "loss": 2.8647,
      "step": 135854
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5280797481536865,
      "learning_rate": 0.00021662785365472292,
      "loss": 2.8872,
      "step": 135855
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.6006243228912354,
      "learning_rate": 0.00021662392423597385,
      "loss": 2.8471,
      "step": 135856
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0652670860290527,
      "learning_rate": 0.00021661999483272603,
      "loss": 2.9577,
      "step": 135857
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0143802165985107,
      "learning_rate": 0.00021661606544498012,
      "loss": 3.0846,
      "step": 135858
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.037931203842163,
      "learning_rate": 0.0002166121360727368,
      "loss": 2.8202,
      "step": 135859
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4066271781921387,
      "learning_rate": 0.00021660820671599688,
      "loss": 2.8735,
      "step": 135860
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3511335849761963,
      "learning_rate": 0.00021660427737476103,
      "loss": 2.7642,
      "step": 135861
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.320566177368164,
      "learning_rate": 0.00021660034804903003,
      "loss": 3.0266,
      "step": 135862
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0295329093933105,
      "learning_rate": 0.00021659641873880461,
      "loss": 2.9476,
      "step": 135863
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.3145883083343506,
      "learning_rate": 0.00021659248944408562,
      "loss": 2.9898,
      "step": 135864
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.732663631439209,
      "learning_rate": 0.0002165885601648735,
      "loss": 2.9596,
      "step": 135865
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.6620774269104004,
      "learning_rate": 0.00021658463090116917,
      "loss": 2.9463,
      "step": 135866
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.339484214782715,
      "learning_rate": 0.00021658070165297334,
      "loss": 2.9464,
      "step": 135867
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.192765474319458,
      "learning_rate": 0.0002165767724202867,
      "loss": 3.157,
      "step": 135868
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.7514630556106567,
      "learning_rate": 0.00021657284320311006,
      "loss": 2.977,
      "step": 135869
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3823964595794678,
      "learning_rate": 0.00021656891400144418,
      "loss": 3.1069,
      "step": 135870
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8157950639724731,
      "learning_rate": 0.00021656498481528963,
      "loss": 3.0669,
      "step": 135871
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.11834716796875,
      "learning_rate": 0.00021656105564464722,
      "loss": 2.7826,
      "step": 135872
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3621785640716553,
      "learning_rate": 0.00021655712648951765,
      "loss": 3.1575,
      "step": 135873
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8476762771606445,
      "learning_rate": 0.0002165531973499017,
      "loss": 2.9526,
      "step": 135874
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2183516025543213,
      "learning_rate": 0.00021654926822580007,
      "loss": 2.8282,
      "step": 135875
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.7994961738586426,
      "learning_rate": 0.00021654533911721365,
      "loss": 3.0467,
      "step": 135876
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0450870990753174,
      "learning_rate": 0.00021654141002414292,
      "loss": 2.9814,
      "step": 135877
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.872005581855774,
      "learning_rate": 0.00021653748094658872,
      "loss": 2.8966,
      "step": 135878
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2169649600982666,
      "learning_rate": 0.00021653355188455174,
      "loss": 3.0538,
      "step": 135879
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.243269205093384,
      "learning_rate": 0.0002165296228380328,
      "loss": 3.0042,
      "step": 135880
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8775769472122192,
      "learning_rate": 0.00021652569380703251,
      "loss": 3.0145,
      "step": 135881
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.141761302947998,
      "learning_rate": 0.00021652176479155185,
      "loss": 2.9799,
      "step": 135882
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.9044253826141357,
      "learning_rate": 0.00021651783579159124,
      "loss": 3.0148,
      "step": 135883
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.171508312225342,
      "learning_rate": 0.0002165139068071515,
      "loss": 2.7911,
      "step": 135884
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8691246509552002,
      "learning_rate": 0.00021650997783823344,
      "loss": 3.1919,
      "step": 135885
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.2660703659057617,
      "learning_rate": 0.00021650604888483774,
      "loss": 2.7287,
      "step": 135886
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.0979294776916504,
      "learning_rate": 0.00021650211994696513,
      "loss": 3.0377,
      "step": 135887
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.500267505645752,
      "learning_rate": 0.0002164981910246165,
      "loss": 2.7348,
      "step": 135888
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.7331101894378662,
      "learning_rate": 0.00021649426211779228,
      "loss": 2.9982,
      "step": 135889
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.8502190113067627,
      "learning_rate": 0.00021649033322649336,
      "loss": 2.9854,
      "step": 135890
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.481879949569702,
      "learning_rate": 0.00021648640435072047,
      "loss": 2.802,
      "step": 135891
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.391711473464966,
      "learning_rate": 0.00021648247549047432,
      "loss": 3.0694,
      "step": 135892
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8485984802246094,
      "learning_rate": 0.00021647854664575566,
      "loss": 3.0084,
      "step": 135893
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8708529472351074,
      "learning_rate": 0.00021647461781656538,
      "loss": 2.8216,
      "step": 135894
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1323037147521973,
      "learning_rate": 0.00021647068900290385,
      "loss": 3.0301,
      "step": 135895
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.729034185409546,
      "learning_rate": 0.00021646676020477205,
      "loss": 2.9011,
      "step": 135896
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8973063230514526,
      "learning_rate": 0.00021646283142217064,
      "loss": 3.1369,
      "step": 135897
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.113107442855835,
      "learning_rate": 0.00021645890265510037,
      "loss": 2.8352,
      "step": 135898
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2501134872436523,
      "learning_rate": 0.00021645497390356195,
      "loss": 2.9914,
      "step": 135899
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.748751640319824,
      "learning_rate": 0.0002164510451675563,
      "loss": 3.1089,
      "step": 135900
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8555001020431519,
      "learning_rate": 0.0002164471164470838,
      "loss": 2.8075,
      "step": 135901
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3090977668762207,
      "learning_rate": 0.00021644318774214537,
      "loss": 2.7857,
      "step": 135902
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.085433006286621,
      "learning_rate": 0.0002164392590527417,
      "loss": 3.0159,
      "step": 135903
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.021688938140869,
      "learning_rate": 0.00021643533037887363,
      "loss": 2.8571,
      "step": 135904
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.086256980895996,
      "learning_rate": 0.00021643140172054176,
      "loss": 3.1156,
      "step": 135905
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2041685581207275,
      "learning_rate": 0.00021642747307774697,
      "loss": 3.0427,
      "step": 135906
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0418448448181152,
      "learning_rate": 0.0002164235444504898,
      "loss": 3.3404,
      "step": 135907
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8299551010131836,
      "learning_rate": 0.0002164196158387711,
      "loss": 3.0038,
      "step": 135908
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.224832057952881,
      "learning_rate": 0.00021641568724259152,
      "loss": 2.7293,
      "step": 135909
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9636147022247314,
      "learning_rate": 0.00021641175866195185,
      "loss": 2.8755,
      "step": 135910
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9685823917388916,
      "learning_rate": 0.00021640783009685283,
      "loss": 2.9253,
      "step": 135911
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.130099296569824,
      "learning_rate": 0.00021640390154729523,
      "loss": 2.9703,
      "step": 135912
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0368270874023438,
      "learning_rate": 0.00021639997301327966,
      "loss": 2.9146,
      "step": 135913
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.36159348487854,
      "learning_rate": 0.00021639604449480695,
      "loss": 2.6526,
      "step": 135914
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.74021577835083,
      "learning_rate": 0.00021639211599187774,
      "loss": 2.8306,
      "step": 135915
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8054777383804321,
      "learning_rate": 0.00021638818750449284,
      "loss": 2.8384,
      "step": 135916
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1075310707092285,
      "learning_rate": 0.0002163842590326529,
      "loss": 3.1405,
      "step": 135917
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5831098556518555,
      "learning_rate": 0.0002163803305763588,
      "loss": 2.7876,
      "step": 135918
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.930304527282715,
      "learning_rate": 0.00021637640213561114,
      "loss": 3.0505,
      "step": 135919
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.075282096862793,
      "learning_rate": 0.0002163724737104107,
      "loss": 3.1726,
      "step": 135920
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.574354887008667,
      "learning_rate": 0.0002163685453007582,
      "loss": 3.0617,
      "step": 135921
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.304292917251587,
      "learning_rate": 0.00021636461690665432,
      "loss": 3.0492,
      "step": 135922
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.12032151222229,
      "learning_rate": 0.00021636068852809985,
      "loss": 3.0499,
      "step": 135923
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9769396781921387,
      "learning_rate": 0.0002163567601650955,
      "loss": 2.9807,
      "step": 135924
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9279022216796875,
      "learning_rate": 0.00021635283181764206,
      "loss": 3.0459,
      "step": 135925
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.262644052505493,
      "learning_rate": 0.00021634890348574016,
      "loss": 3.0873,
      "step": 135926
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.178784132003784,
      "learning_rate": 0.0002163449751693906,
      "loss": 3.2059,
      "step": 135927
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.979696273803711,
      "learning_rate": 0.00021634104686859407,
      "loss": 2.9478,
      "step": 135928
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0494818687438965,
      "learning_rate": 0.00021633711858335132,
      "loss": 3.1017,
      "step": 135929
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5138351917266846,
      "learning_rate": 0.00021633319031366305,
      "loss": 3.1608,
      "step": 135930
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.705857038497925,
      "learning_rate": 0.0002163292620595301,
      "loss": 3.0339,
      "step": 135931
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.984062910079956,
      "learning_rate": 0.00021632533382095304,
      "loss": 2.9465,
      "step": 135932
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9532557725906372,
      "learning_rate": 0.0002163214055979327,
      "loss": 2.9102,
      "step": 135933
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8883562088012695,
      "learning_rate": 0.00021631747739046987,
      "loss": 2.9573,
      "step": 135934
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.7956831455230713,
      "learning_rate": 0.0002163135491985651,
      "loss": 2.7902,
      "step": 135935
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.423565149307251,
      "learning_rate": 0.00021630962102221925,
      "loss": 3.1442,
      "step": 135936
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.102529764175415,
      "learning_rate": 0.00021630569286143307,
      "loss": 2.9958,
      "step": 135937
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1481759548187256,
      "learning_rate": 0.00021630176471620718,
      "loss": 2.8842,
      "step": 135938
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.1727516651153564,
      "learning_rate": 0.00021629783658654237,
      "loss": 2.8553,
      "step": 135939
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.8966786861419678,
      "learning_rate": 0.00021629390847243936,
      "loss": 3.1274,
      "step": 135940
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1434011459350586,
      "learning_rate": 0.00021628998037389894,
      "loss": 2.9269,
      "step": 135941
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.206521987915039,
      "learning_rate": 0.0002162860522909218,
      "loss": 3.4056,
      "step": 135942
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.8910343647003174,
      "learning_rate": 0.0002162821242235087,
      "loss": 3.0504,
      "step": 135943
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.009392261505127,
      "learning_rate": 0.00021627819617166025,
      "loss": 3.1463,
      "step": 135944
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.826176643371582,
      "learning_rate": 0.00021627426813537726,
      "loss": 2.9472,
      "step": 135945
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.9000115394592285,
      "learning_rate": 0.0002162703401146605,
      "loss": 3.1102,
      "step": 135946
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.891895055770874,
      "learning_rate": 0.00021626641210951064,
      "loss": 3.0941,
      "step": 135947
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.296869993209839,
      "learning_rate": 0.00021626248411992841,
      "loss": 2.7594,
      "step": 135948
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.640815258026123,
      "learning_rate": 0.0002162585561459147,
      "loss": 3.1914,
      "step": 135949
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1213178634643555,
      "learning_rate": 0.00021625462818747,
      "loss": 3.0833,
      "step": 135950
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.114820957183838,
      "learning_rate": 0.0002162507002445951,
      "loss": 3.1761,
      "step": 135951
    },
    {
      "epoch": 1.77,
      "grad_norm": 4.131230354309082,
      "learning_rate": 0.00021624677231729084,
      "loss": 2.9164,
      "step": 135952
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.504112482070923,
      "learning_rate": 0.00021624284440555788,
      "loss": 2.9747,
      "step": 135953
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.7173558473587036,
      "learning_rate": 0.0002162389165093969,
      "loss": 2.9694,
      "step": 135954
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9496666193008423,
      "learning_rate": 0.00021623498862880885,
      "loss": 2.8921,
      "step": 135955
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2455904483795166,
      "learning_rate": 0.00021623106076379422,
      "loss": 2.9835,
      "step": 135956
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.115363836288452,
      "learning_rate": 0.00021622713291435375,
      "loss": 3.2143,
      "step": 135957
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.104853391647339,
      "learning_rate": 0.00021622320508048822,
      "loss": 3.076,
      "step": 135958
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.679612159729004,
      "learning_rate": 0.00021621927726219844,
      "loss": 2.9245,
      "step": 135959
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8063852787017822,
      "learning_rate": 0.00021621534945948507,
      "loss": 2.9098,
      "step": 135960
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2151060104370117,
      "learning_rate": 0.00021621142167234893,
      "loss": 2.8871,
      "step": 135961
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0288002490997314,
      "learning_rate": 0.00021620749390079057,
      "loss": 2.7016,
      "step": 135962
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.7716856002807617,
      "learning_rate": 0.0002162035661448108,
      "loss": 3.0283,
      "step": 135963
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0557641983032227,
      "learning_rate": 0.00021619963840441043,
      "loss": 3.2046,
      "step": 135964
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5963196754455566,
      "learning_rate": 0.00021619571067959002,
      "loss": 3.0244,
      "step": 135965
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5824897289276123,
      "learning_rate": 0.00021619178297035052,
      "loss": 2.9002,
      "step": 135966
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3308451175689697,
      "learning_rate": 0.0002161878552766926,
      "loss": 3.0105,
      "step": 135967
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.71517276763916,
      "learning_rate": 0.00021618392759861681,
      "loss": 2.949,
      "step": 135968
    },
    {
      "epoch": 1.77,
      "grad_norm": 8.144266128540039,
      "learning_rate": 0.00021617999993612405,
      "loss": 3.0403,
      "step": 135969
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0296497344970703,
      "learning_rate": 0.000216176072289215,
      "loss": 2.7865,
      "step": 135970
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.167905569076538,
      "learning_rate": 0.00021617214465789045,
      "loss": 3.0792,
      "step": 135971
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5377373695373535,
      "learning_rate": 0.000216168217042151,
      "loss": 2.9578,
      "step": 135972
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.506347894668579,
      "learning_rate": 0.00021616428944199763,
      "loss": 2.8973,
      "step": 135973
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.808166980743408,
      "learning_rate": 0.00021616036185743078,
      "loss": 3.139,
      "step": 135974
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.102370500564575,
      "learning_rate": 0.00021615643428845127,
      "loss": 3.1268,
      "step": 135975
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.37571382522583,
      "learning_rate": 0.0002161525067350599,
      "loss": 2.6932,
      "step": 135976
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.104064464569092,
      "learning_rate": 0.00021614857919725729,
      "loss": 3.084,
      "step": 135977
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1012027263641357,
      "learning_rate": 0.00021614465167504431,
      "loss": 2.9869,
      "step": 135978
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.464073419570923,
      "learning_rate": 0.0002161407241684217,
      "loss": 3.1636,
      "step": 135979
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.429208755493164,
      "learning_rate": 0.00021613679667739007,
      "loss": 2.8544,
      "step": 135980
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0822527408599854,
      "learning_rate": 0.00021613286920195012,
      "loss": 3.1495,
      "step": 135981
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8430086374282837,
      "learning_rate": 0.00021612894174210265,
      "loss": 2.8325,
      "step": 135982
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.024183750152588,
      "learning_rate": 0.00021612501429784842,
      "loss": 2.6155,
      "step": 135983
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.482290029525757,
      "learning_rate": 0.00021612108686918814,
      "loss": 3.0348,
      "step": 135984
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0747933387756348,
      "learning_rate": 0.00021611715945612264,
      "loss": 3.0182,
      "step": 135985
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.423459053039551,
      "learning_rate": 0.00021611323205865241,
      "loss": 2.7501,
      "step": 135986
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.262104034423828,
      "learning_rate": 0.00021610930467677833,
      "loss": 3.111,
      "step": 135987
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.775042772293091,
      "learning_rate": 0.0002161053773105011,
      "loss": 2.932,
      "step": 135988
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.271439790725708,
      "learning_rate": 0.0002161014499598215,
      "loss": 3.1059,
      "step": 135989
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9616550207138062,
      "learning_rate": 0.00021609752262474017,
      "loss": 3.0022,
      "step": 135990
    },
    {
      "epoch": 1.77,
      "grad_norm": 4.348871231079102,
      "learning_rate": 0.00021609359530525794,
      "loss": 3.0111,
      "step": 135991
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2301952838897705,
      "learning_rate": 0.0002160896680013756,
      "loss": 2.953,
      "step": 135992
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9870673418045044,
      "learning_rate": 0.00021608574071309365,
      "loss": 2.7468,
      "step": 135993
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.235105037689209,
      "learning_rate": 0.00021608181344041292,
      "loss": 2.7179,
      "step": 135994
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.8489058017730713,
      "learning_rate": 0.00021607788618333422,
      "loss": 2.7958,
      "step": 135995
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1023409366607666,
      "learning_rate": 0.00021607395894185816,
      "loss": 3.2898,
      "step": 135996
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8408735990524292,
      "learning_rate": 0.00021607003171598561,
      "loss": 3.0758,
      "step": 135997
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1707992553710938,
      "learning_rate": 0.0002160661045057173,
      "loss": 2.9796,
      "step": 135998
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.279780387878418,
      "learning_rate": 0.0002160621773110538,
      "loss": 2.9444,
      "step": 135999
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.01192045211792,
      "learning_rate": 0.00021605825013199586,
      "loss": 2.8118,
      "step": 136000
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2786097526550293,
      "learning_rate": 0.00021605432296854433,
      "loss": 3.231,
      "step": 136001
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.217778444290161,
      "learning_rate": 0.00021605039582069989,
      "loss": 2.8019,
      "step": 136002
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.98195219039917,
      "learning_rate": 0.00021604646868846324,
      "loss": 2.9153,
      "step": 136003
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.33563494682312,
      "learning_rate": 0.00021604254157183526,
      "loss": 2.8632,
      "step": 136004
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.894914150238037,
      "learning_rate": 0.00021603861447081645,
      "loss": 2.8698,
      "step": 136005
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.7487518787384033,
      "learning_rate": 0.0002160346873854076,
      "loss": 2.7634,
      "step": 136006
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.158773899078369,
      "learning_rate": 0.00021603076031560957,
      "loss": 3.1888,
      "step": 136007
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.1286373138427734,
      "learning_rate": 0.00021602683326142294,
      "loss": 2.9769,
      "step": 136008
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8063446283340454,
      "learning_rate": 0.00021602290622284854,
      "loss": 2.7061,
      "step": 136009
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.278022289276123,
      "learning_rate": 0.00021601897919988709,
      "loss": 3.0795,
      "step": 136010
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2829251289367676,
      "learning_rate": 0.00021601505219253926,
      "loss": 3.1826,
      "step": 136011
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.0912039279937744,
      "learning_rate": 0.0002160111252008059,
      "loss": 3.1186,
      "step": 136012
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1030678749084473,
      "learning_rate": 0.00021600719822468755,
      "loss": 2.9864,
      "step": 136013
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.606200695037842,
      "learning_rate": 0.00021600327126418512,
      "loss": 3.0735,
      "step": 136014
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.714247226715088,
      "learning_rate": 0.00021599934431929918,
      "loss": 3.1389,
      "step": 136015
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0041379928588867,
      "learning_rate": 0.00021599541739003065,
      "loss": 3.0029,
      "step": 136016
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.092164993286133,
      "learning_rate": 0.0002159914904763801,
      "loss": 3.0198,
      "step": 136017
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2077057361602783,
      "learning_rate": 0.0002159875635783483,
      "loss": 3.1189,
      "step": 136018
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1363162994384766,
      "learning_rate": 0.0002159836366959361,
      "loss": 2.8987,
      "step": 136019
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1775434017181396,
      "learning_rate": 0.00021597970982914404,
      "loss": 2.8964,
      "step": 136020
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2669501304626465,
      "learning_rate": 0.00021597578297797294,
      "loss": 3.1084,
      "step": 136021
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.167738199234009,
      "learning_rate": 0.00021597185614242353,
      "loss": 2.891,
      "step": 136022
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.7469193935394287,
      "learning_rate": 0.00021596792932249658,
      "loss": 3.1448,
      "step": 136023
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2379488945007324,
      "learning_rate": 0.00021596400251819273,
      "loss": 2.8578,
      "step": 136024
    },
    {
      "epoch": 1.77,
      "grad_norm": 4.002244472503662,
      "learning_rate": 0.00021596007572951273,
      "loss": 2.9431,
      "step": 136025
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0462021827697754,
      "learning_rate": 0.00021595614895645744,
      "loss": 3.0046,
      "step": 136026
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.287999391555786,
      "learning_rate": 0.0002159522221990274,
      "loss": 3.1466,
      "step": 136027
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.22951602935791,
      "learning_rate": 0.0002159482954572235,
      "loss": 2.7289,
      "step": 136028
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.619598865509033,
      "learning_rate": 0.00021594436873104638,
      "loss": 2.8737,
      "step": 136029
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.276057720184326,
      "learning_rate": 0.00021594044202049673,
      "loss": 3.003,
      "step": 136030
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3982534408569336,
      "learning_rate": 0.00021593651532557536,
      "loss": 2.972,
      "step": 136031
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.326777935028076,
      "learning_rate": 0.00021593258864628298,
      "loss": 2.6111,
      "step": 136032
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2029387950897217,
      "learning_rate": 0.0002159286619826203,
      "loss": 2.9088,
      "step": 136033
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.134429454803467,
      "learning_rate": 0.00021592473533458824,
      "loss": 2.9973,
      "step": 136034
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.596639633178711,
      "learning_rate": 0.0002159208087021872,
      "loss": 2.8001,
      "step": 136035
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.1313579082489014,
      "learning_rate": 0.00021591688208541805,
      "loss": 3.0473,
      "step": 136036
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.367122173309326,
      "learning_rate": 0.00021591295548428155,
      "loss": 2.9462,
      "step": 136037
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1651015281677246,
      "learning_rate": 0.00021590902889877844,
      "loss": 2.9709,
      "step": 136038
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.298959255218506,
      "learning_rate": 0.00021590510232890944,
      "loss": 2.9666,
      "step": 136039
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.227600574493408,
      "learning_rate": 0.00021590117577467534,
      "loss": 3.0904,
      "step": 136040
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.477262258529663,
      "learning_rate": 0.00021589724923607673,
      "loss": 3.1997,
      "step": 136041
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0489442348480225,
      "learning_rate": 0.00021589332271311436,
      "loss": 3.1252,
      "step": 136042
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.262533187866211,
      "learning_rate": 0.00021588939620578905,
      "loss": 3.0975,
      "step": 136043
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0785722732543945,
      "learning_rate": 0.00021588546971410146,
      "loss": 3.0871,
      "step": 136044
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.321287155151367,
      "learning_rate": 0.00021588154323805238,
      "loss": 3.0287,
      "step": 136045
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.215372323989868,
      "learning_rate": 0.00021587761677764258,
      "loss": 3.0315,
      "step": 136046
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0315444469451904,
      "learning_rate": 0.00021587369033287268,
      "loss": 2.8973,
      "step": 136047
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.661803960800171,
      "learning_rate": 0.00021586976390374336,
      "loss": 2.8456,
      "step": 136048
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.084728479385376,
      "learning_rate": 0.0002158658374902555,
      "loss": 2.9457,
      "step": 136049
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2634096145629883,
      "learning_rate": 0.00021586191109240974,
      "loss": 2.7817,
      "step": 136050
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.409654140472412,
      "learning_rate": 0.00021585798471020684,
      "loss": 2.9385,
      "step": 136051
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.81555438041687,
      "learning_rate": 0.00021585405834364767,
      "loss": 2.9666,
      "step": 136052
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.2613136768341064,
      "learning_rate": 0.00021585013199273268,
      "loss": 3.0343,
      "step": 136053
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.8723065853118896,
      "learning_rate": 0.00021584620565746275,
      "loss": 3.0198,
      "step": 136054
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2641584873199463,
      "learning_rate": 0.00021584227933783858,
      "loss": 2.9928,
      "step": 136055
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3324272632598877,
      "learning_rate": 0.00021583835303386092,
      "loss": 2.9367,
      "step": 136056
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0823466777801514,
      "learning_rate": 0.00021583442674553052,
      "loss": 2.9415,
      "step": 136057
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.00011944770813,
      "learning_rate": 0.00021583050047284809,
      "loss": 2.9035,
      "step": 136058
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3237600326538086,
      "learning_rate": 0.00021582657421581445,
      "loss": 3.0413,
      "step": 136059
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1585910320281982,
      "learning_rate": 0.00021582264797443014,
      "loss": 2.9495,
      "step": 136060
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.1779181957244873,
      "learning_rate": 0.000215818721748696,
      "loss": 2.8681,
      "step": 136061
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.886617660522461,
      "learning_rate": 0.00021581479553861273,
      "loss": 2.9882,
      "step": 136062
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4542922973632812,
      "learning_rate": 0.0002158108693441811,
      "loss": 2.9919,
      "step": 136063
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3016257286071777,
      "learning_rate": 0.0002158069431654018,
      "loss": 3.2214,
      "step": 136064
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.863845109939575,
      "learning_rate": 0.00021580301700227568,
      "loss": 2.8086,
      "step": 136065
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.178408622741699,
      "learning_rate": 0.00021579909085480328,
      "loss": 2.991,
      "step": 136066
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.6695609092712402,
      "learning_rate": 0.0002157951647229854,
      "loss": 2.9019,
      "step": 136067
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0558035373687744,
      "learning_rate": 0.0002157912386068228,
      "loss": 3.0572,
      "step": 136068
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.297090530395508,
      "learning_rate": 0.0002157873125063162,
      "loss": 3.1878,
      "step": 136069
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2196128368377686,
      "learning_rate": 0.00021578338642146632,
      "loss": 2.9129,
      "step": 136070
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2384252548217773,
      "learning_rate": 0.00021577946035227402,
      "loss": 2.7516,
      "step": 136071
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5937936305999756,
      "learning_rate": 0.00021577553429873977,
      "loss": 3.4247,
      "step": 136072
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8022371530532837,
      "learning_rate": 0.0002157716082608645,
      "loss": 2.9882,
      "step": 136073
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.520284414291382,
      "learning_rate": 0.00021576768223864883,
      "loss": 2.9793,
      "step": 136074
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2624459266662598,
      "learning_rate": 0.0002157637562320935,
      "loss": 3.0086,
      "step": 136075
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1852986812591553,
      "learning_rate": 0.00021575983024119932,
      "loss": 2.8735,
      "step": 136076
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.253795623779297,
      "learning_rate": 0.0002157559042659671,
      "loss": 2.8296,
      "step": 136077
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8709619045257568,
      "learning_rate": 0.00021575197830639732,
      "loss": 2.9022,
      "step": 136078
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.13334584236145,
      "learning_rate": 0.00021574805236249087,
      "loss": 2.9,
      "step": 136079
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.215682029724121,
      "learning_rate": 0.0002157441264342484,
      "loss": 2.7703,
      "step": 136080
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2771928310394287,
      "learning_rate": 0.0002157402005216707,
      "loss": 3.0182,
      "step": 136081
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3124639987945557,
      "learning_rate": 0.00021573627462475852,
      "loss": 2.9827,
      "step": 136082
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.293511152267456,
      "learning_rate": 0.0002157323487435126,
      "loss": 3.0023,
      "step": 136083
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.344072103500366,
      "learning_rate": 0.0002157284228779336,
      "loss": 2.8307,
      "step": 136084
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.138833522796631,
      "learning_rate": 0.00021572449702802224,
      "loss": 3.0389,
      "step": 136085
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.6297383308410645,
      "learning_rate": 0.0002157205711937793,
      "loss": 2.8778,
      "step": 136086
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.158437490463257,
      "learning_rate": 0.00021571664537520545,
      "loss": 3.019,
      "step": 136087
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.971495270729065,
      "learning_rate": 0.0002157127195723015,
      "loss": 3.1036,
      "step": 136088
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4062857627868652,
      "learning_rate": 0.00021570879378506828,
      "loss": 2.9852,
      "step": 136089
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3360793590545654,
      "learning_rate": 0.00021570486801350623,
      "loss": 3.0658,
      "step": 136090
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.054880142211914,
      "learning_rate": 0.00021570094225761626,
      "loss": 3.1963,
      "step": 136091
    },
    {
      "epoch": 1.77,
      "grad_norm": 4.256932735443115,
      "learning_rate": 0.00021569701651739907,
      "loss": 3.0677,
      "step": 136092
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.611567258834839,
      "learning_rate": 0.0002156930907928554,
      "loss": 3.0962,
      "step": 136093
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.7127795219421387,
      "learning_rate": 0.00021568916508398595,
      "loss": 2.9658,
      "step": 136094
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3438429832458496,
      "learning_rate": 0.00021568523939079155,
      "loss": 3.0832,
      "step": 136095
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.867424249649048,
      "learning_rate": 0.0002156813137132729,
      "loss": 2.8208,
      "step": 136096
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9294676780700684,
      "learning_rate": 0.0002156773880514306,
      "loss": 2.8749,
      "step": 136097
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1305477619171143,
      "learning_rate": 0.00021567346240526544,
      "loss": 3.0053,
      "step": 136098
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.764821767807007,
      "learning_rate": 0.00021566953677477817,
      "loss": 3.114,
      "step": 136099
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.94035804271698,
      "learning_rate": 0.00021566561115996953,
      "loss": 3.0737,
      "step": 136100
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.441443920135498,
      "learning_rate": 0.00021566168556084035,
      "loss": 3.1124,
      "step": 136101
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.615609884262085,
      "learning_rate": 0.00021565775997739114,
      "loss": 3.007,
      "step": 136102
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8075430393218994,
      "learning_rate": 0.00021565383440962275,
      "loss": 2.9203,
      "step": 136103
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1931047439575195,
      "learning_rate": 0.00021564990885753605,
      "loss": 2.9852,
      "step": 136104
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.611112356185913,
      "learning_rate": 0.00021564598332113146,
      "loss": 3.0977,
      "step": 136105
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9245737791061401,
      "learning_rate": 0.0002156420578004099,
      "loss": 2.9922,
      "step": 136106
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5268709659576416,
      "learning_rate": 0.00021563813229537215,
      "loss": 2.7606,
      "step": 136107
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.740032196044922,
      "learning_rate": 0.00021563420680601882,
      "loss": 2.9538,
      "step": 136108
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.062797784805298,
      "learning_rate": 0.00021563028133235064,
      "loss": 3.0159,
      "step": 136109
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8526231050491333,
      "learning_rate": 0.00021562635587436842,
      "loss": 3.0252,
      "step": 136110
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.104496479034424,
      "learning_rate": 0.00021562243043207288,
      "loss": 2.7518,
      "step": 136111
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.70940899848938,
      "learning_rate": 0.00021561850500546471,
      "loss": 2.9485,
      "step": 136112
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.780123233795166,
      "learning_rate": 0.00021561457959454468,
      "loss": 2.7282,
      "step": 136113
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8451662063598633,
      "learning_rate": 0.0002156106541993134,
      "loss": 2.6579,
      "step": 136114
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.9219086170196533,
      "learning_rate": 0.00021560672881977175,
      "loss": 2.8451,
      "step": 136115
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5226991176605225,
      "learning_rate": 0.00021560280345592038,
      "loss": 2.9134,
      "step": 136116
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.0000457763671875,
      "learning_rate": 0.00021559887810776004,
      "loss": 3.0285,
      "step": 136117
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.121142864227295,
      "learning_rate": 0.00021559495277529155,
      "loss": 3.1677,
      "step": 136118
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.727297067642212,
      "learning_rate": 0.00021559102745851553,
      "loss": 3.0048,
      "step": 136119
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.6459832191467285,
      "learning_rate": 0.00021558710215743265,
      "loss": 2.7831,
      "step": 136120
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.38698673248291,
      "learning_rate": 0.00021558317687204372,
      "loss": 2.8719,
      "step": 136121
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5357587337493896,
      "learning_rate": 0.0002155792516023495,
      "loss": 3.2629,
      "step": 136122
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0862886905670166,
      "learning_rate": 0.00021557532634835066,
      "loss": 3.0822,
      "step": 136123
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.124800443649292,
      "learning_rate": 0.000215571401110048,
      "loss": 3.0755,
      "step": 136124
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.000734567642212,
      "learning_rate": 0.00021556747588744225,
      "loss": 2.9639,
      "step": 136125
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5159401893615723,
      "learning_rate": 0.0002155635506805341,
      "loss": 3.0457,
      "step": 136126
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2254135608673096,
      "learning_rate": 0.00021555962548932422,
      "loss": 3.0521,
      "step": 136127
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0101447105407715,
      "learning_rate": 0.0002155557003138134,
      "loss": 3.2711,
      "step": 136128
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.160875082015991,
      "learning_rate": 0.00021555177515400237,
      "loss": 3.0676,
      "step": 136129
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1672260761260986,
      "learning_rate": 0.0002155478500098919,
      "loss": 3.055,
      "step": 136130
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.190615177154541,
      "learning_rate": 0.0002155439248814826,
      "loss": 2.9106,
      "step": 136131
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.88185453414917,
      "learning_rate": 0.00021553999976877544,
      "loss": 2.9774,
      "step": 136132
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9380953311920166,
      "learning_rate": 0.0002155360746717709,
      "loss": 2.9539,
      "step": 136133
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1482033729553223,
      "learning_rate": 0.00021553214959046973,
      "loss": 3.0625,
      "step": 136134
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.224405527114868,
      "learning_rate": 0.00021552822452487277,
      "loss": 3.0075,
      "step": 136135
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.073535919189453,
      "learning_rate": 0.0002155242994749807,
      "loss": 3.0008,
      "step": 136136
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2705471515655518,
      "learning_rate": 0.00021552037444079426,
      "loss": 3.3322,
      "step": 136137
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0593905448913574,
      "learning_rate": 0.0002155164494223143,
      "loss": 2.8454,
      "step": 136138
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4082741737365723,
      "learning_rate": 0.0002155125244195413,
      "loss": 2.84,
      "step": 136139
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.380885601043701,
      "learning_rate": 0.0002155085994324761,
      "loss": 3.1473,
      "step": 136140
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.504976272583008,
      "learning_rate": 0.00021550467446111948,
      "loss": 2.8226,
      "step": 136141
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1981284618377686,
      "learning_rate": 0.00021550074950547212,
      "loss": 2.7619,
      "step": 136142
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2598390579223633,
      "learning_rate": 0.00021549682456553472,
      "loss": 2.9386,
      "step": 136143
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4018194675445557,
      "learning_rate": 0.00021549289964130822,
      "loss": 2.9575,
      "step": 136144
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4485647678375244,
      "learning_rate": 0.0002154889747327931,
      "loss": 2.9197,
      "step": 136145
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9669380187988281,
      "learning_rate": 0.00021548504983999014,
      "loss": 2.9513,
      "step": 136146
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.260770559310913,
      "learning_rate": 0.0002154811249629001,
      "loss": 2.9694,
      "step": 136147
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.990912914276123,
      "learning_rate": 0.0002154772001015237,
      "loss": 3.0968,
      "step": 136148
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.035158157348633,
      "learning_rate": 0.00021547327525586168,
      "loss": 2.8479,
      "step": 136149
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5943613052368164,
      "learning_rate": 0.00021546935042591494,
      "loss": 2.831,
      "step": 136150
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4115869998931885,
      "learning_rate": 0.0002154654256116839,
      "loss": 2.9403,
      "step": 136151
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.907597541809082,
      "learning_rate": 0.00021546150081316943,
      "loss": 3.0359,
      "step": 136152
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.979290246963501,
      "learning_rate": 0.00021545757603037228,
      "loss": 3.0972,
      "step": 136153
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0509369373321533,
      "learning_rate": 0.00021545365126329316,
      "loss": 2.9833,
      "step": 136154
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.974452495574951,
      "learning_rate": 0.00021544972651193277,
      "loss": 2.9765,
      "step": 136155
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.260366916656494,
      "learning_rate": 0.00021544580177629201,
      "loss": 3.1405,
      "step": 136156
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.697326421737671,
      "learning_rate": 0.00021544187705637138,
      "loss": 3.0323,
      "step": 136157
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.1955337524414062,
      "learning_rate": 0.00021543795235217167,
      "loss": 2.6683,
      "step": 136158
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.008251905441284,
      "learning_rate": 0.00021543402766369363,
      "loss": 3.1147,
      "step": 136159
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4855246543884277,
      "learning_rate": 0.000215430102990938,
      "loss": 2.9464,
      "step": 136160
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3744914531707764,
      "learning_rate": 0.0002154261783339055,
      "loss": 3.0212,
      "step": 136161
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.7717013359069824,
      "learning_rate": 0.00021542225369259704,
      "loss": 3.0587,
      "step": 136162
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.168860673904419,
      "learning_rate": 0.00021541832906701308,
      "loss": 3.112,
      "step": 136163
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.541393518447876,
      "learning_rate": 0.00021541440445715435,
      "loss": 3.152,
      "step": 136164
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.6153786182403564,
      "learning_rate": 0.00021541047986302177,
      "loss": 2.8213,
      "step": 136165
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1152937412261963,
      "learning_rate": 0.00021540655528461594,
      "loss": 3.0216,
      "step": 136166
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.7210694551467896,
      "learning_rate": 0.00021540263072193763,
      "loss": 2.9321,
      "step": 136167
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.137805461883545,
      "learning_rate": 0.0002153987061749877,
      "loss": 2.8164,
      "step": 136168
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0474300384521484,
      "learning_rate": 0.00021539478164376658,
      "loss": 2.9262,
      "step": 136169
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.7353647947311401,
      "learning_rate": 0.00021539085712827524,
      "loss": 3.0644,
      "step": 136170
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5688631534576416,
      "learning_rate": 0.0002153869326285143,
      "loss": 3.2888,
      "step": 136171
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.249586820602417,
      "learning_rate": 0.00021538300814448452,
      "loss": 2.7384,
      "step": 136172
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.713655710220337,
      "learning_rate": 0.0002153790836761866,
      "loss": 3.0407,
      "step": 136173
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.7190911769866943,
      "learning_rate": 0.0002153751592236215,
      "loss": 3.0382,
      "step": 136174
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2600619792938232,
      "learning_rate": 0.0002153712347867896,
      "loss": 3.0823,
      "step": 136175
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1567389965057373,
      "learning_rate": 0.00021536731036569178,
      "loss": 2.8575,
      "step": 136176
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9568986892700195,
      "learning_rate": 0.0002153633859603288,
      "loss": 3.115,
      "step": 136177
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.3584818840026855,
      "learning_rate": 0.00021535946157070137,
      "loss": 3.0123,
      "step": 136178
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1449711322784424,
      "learning_rate": 0.00021535553719681018,
      "loss": 3.1244,
      "step": 136179
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5466623306274414,
      "learning_rate": 0.00021535161283865603,
      "loss": 2.8116,
      "step": 136180
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.030867099761963,
      "learning_rate": 0.00021534768849623966,
      "loss": 3.1079,
      "step": 136181
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.135624885559082,
      "learning_rate": 0.0002153437641695617,
      "loss": 2.8859,
      "step": 136182
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.009594202041626,
      "learning_rate": 0.00021533983985862288,
      "loss": 3.2894,
      "step": 136183
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.90348482131958,
      "learning_rate": 0.00021533591556342402,
      "loss": 3.0084,
      "step": 136184
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5344200134277344,
      "learning_rate": 0.00021533199128396583,
      "loss": 3.368,
      "step": 136185
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5908408164978027,
      "learning_rate": 0.00021532806702024906,
      "loss": 2.8276,
      "step": 136186
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3779969215393066,
      "learning_rate": 0.00021532414277227434,
      "loss": 3.064,
      "step": 136187
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.098365068435669,
      "learning_rate": 0.0002153202185400425,
      "loss": 2.7904,
      "step": 136188
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.4066689014434814,
      "learning_rate": 0.00021531629432355417,
      "loss": 2.7936,
      "step": 136189
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4674291610717773,
      "learning_rate": 0.0002153123701228102,
      "loss": 2.943,
      "step": 136190
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.957348108291626,
      "learning_rate": 0.0002153084459378112,
      "loss": 2.6342,
      "step": 136191
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2003159523010254,
      "learning_rate": 0.00021530452176855798,
      "loss": 3.2152,
      "step": 136192
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.3310048580169678,
      "learning_rate": 0.0002153005976150513,
      "loss": 2.8457,
      "step": 136193
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.528595447540283,
      "learning_rate": 0.00021529667347729175,
      "loss": 3.0333,
      "step": 136194
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3073668479919434,
      "learning_rate": 0.0002152927493552802,
      "loss": 3.1481,
      "step": 136195
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.351933717727661,
      "learning_rate": 0.00021528882524901737,
      "loss": 2.9147,
      "step": 136196
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.272822618484497,
      "learning_rate": 0.00021528490115850385,
      "loss": 2.7617,
      "step": 136197
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1689088344573975,
      "learning_rate": 0.0002152809770837405,
      "loss": 2.9781,
      "step": 136198
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.000976324081421,
      "learning_rate": 0.0002152770530247281,
      "loss": 2.9752,
      "step": 136199
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.974968671798706,
      "learning_rate": 0.00021527312898146717,
      "loss": 2.9169,
      "step": 136200
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4853546619415283,
      "learning_rate": 0.00021526920495395862,
      "loss": 3.0326,
      "step": 136201
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2884445190429688,
      "learning_rate": 0.00021526528094220312,
      "loss": 2.8835,
      "step": 136202
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0867931842803955,
      "learning_rate": 0.00021526135694620145,
      "loss": 2.7397,
      "step": 136203
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5927343368530273,
      "learning_rate": 0.00021525743296595422,
      "loss": 3.0116,
      "step": 136204
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.168835163116455,
      "learning_rate": 0.0002152535090014623,
      "loss": 2.9801,
      "step": 136205
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.730853796005249,
      "learning_rate": 0.0002152495850527263,
      "loss": 2.9366,
      "step": 136206
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9562323093414307,
      "learning_rate": 0.000215245661119747,
      "loss": 3.0239,
      "step": 136207
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.8463170528411865,
      "learning_rate": 0.00021524173720252515,
      "loss": 2.8557,
      "step": 136208
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1049835681915283,
      "learning_rate": 0.00021523781330106144,
      "loss": 2.9091,
      "step": 136209
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4789798259735107,
      "learning_rate": 0.00021523388941535666,
      "loss": 2.9357,
      "step": 136210
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.060814380645752,
      "learning_rate": 0.00021522996554541155,
      "loss": 2.8621,
      "step": 136211
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.399312734603882,
      "learning_rate": 0.0002152260416912267,
      "loss": 3.0214,
      "step": 136212
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.646679162979126,
      "learning_rate": 0.00021522211785280292,
      "loss": 2.9914,
      "step": 136213
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.378405809402466,
      "learning_rate": 0.00021521819403014098,
      "loss": 3.0009,
      "step": 136214
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.302861452102661,
      "learning_rate": 0.00021521427022324155,
      "loss": 3.0706,
      "step": 136215
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.097775936126709,
      "learning_rate": 0.0002152103464321054,
      "loss": 2.8603,
      "step": 136216
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2211239337921143,
      "learning_rate": 0.00021520642265673338,
      "loss": 3.0017,
      "step": 136217
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9019676446914673,
      "learning_rate": 0.00021520249889712595,
      "loss": 2.9687,
      "step": 136218
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1510190963745117,
      "learning_rate": 0.00021519857515328394,
      "loss": 2.6118,
      "step": 136219
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.818446397781372,
      "learning_rate": 0.00021519465142520817,
      "loss": 3.0704,
      "step": 136220
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9763158559799194,
      "learning_rate": 0.00021519072771289928,
      "loss": 2.9589,
      "step": 136221
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.6620166301727295,
      "learning_rate": 0.00021518680401635807,
      "loss": 3.0211,
      "step": 136222
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.871312379837036,
      "learning_rate": 0.00021518288033558534,
      "loss": 3.0062,
      "step": 136223
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0435421466827393,
      "learning_rate": 0.0002151789566705816,
      "loss": 3.0792,
      "step": 136224
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.309828281402588,
      "learning_rate": 0.00021517503302134769,
      "loss": 3.0523,
      "step": 136225
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9504927396774292,
      "learning_rate": 0.00021517110938788428,
      "loss": 3.1536,
      "step": 136226
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.067063570022583,
      "learning_rate": 0.00021516718577019225,
      "loss": 2.8572,
      "step": 136227
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0092761516571045,
      "learning_rate": 0.00021516326216827219,
      "loss": 2.8381,
      "step": 136228
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.8788564205169678,
      "learning_rate": 0.00021515933858212504,
      "loss": 2.9326,
      "step": 136229
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.248814105987549,
      "learning_rate": 0.00021515541501175122,
      "loss": 2.9455,
      "step": 136230
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1552677154541016,
      "learning_rate": 0.00021515149145715165,
      "loss": 2.9252,
      "step": 136231
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.6423940658569336,
      "learning_rate": 0.00021514756791832695,
      "loss": 3.042,
      "step": 136232
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.31410813331604,
      "learning_rate": 0.00021514364439527797,
      "loss": 3.0296,
      "step": 136233
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.569571018218994,
      "learning_rate": 0.00021513972088800534,
      "loss": 3.1794,
      "step": 136234
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.6402430534362793,
      "learning_rate": 0.00021513579739651005,
      "loss": 2.886,
      "step": 136235
    },
    {
      "epoch": 1.77,
      "grad_norm": 5.402826309204102,
      "learning_rate": 0.00021513187392079242,
      "loss": 2.7441,
      "step": 136236
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.188997745513916,
      "learning_rate": 0.0002151279504608534,
      "loss": 3.0193,
      "step": 136237
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.965582847595215,
      "learning_rate": 0.0002151240270166937,
      "loss": 2.9561,
      "step": 136238
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.158292531967163,
      "learning_rate": 0.00021512010358831402,
      "loss": 3.1983,
      "step": 136239
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.6545190811157227,
      "learning_rate": 0.00021511618017571512,
      "loss": 3.0974,
      "step": 136240
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.283906936645508,
      "learning_rate": 0.0002151122567788979,
      "loss": 2.9215,
      "step": 136241
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0764567852020264,
      "learning_rate": 0.00021510833339786274,
      "loss": 3.0334,
      "step": 136242
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.085995674133301,
      "learning_rate": 0.00021510441003261057,
      "loss": 2.9443,
      "step": 136243
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0266337394714355,
      "learning_rate": 0.00021510048668314205,
      "loss": 2.9954,
      "step": 136244
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2427167892456055,
      "learning_rate": 0.000215096563349458,
      "loss": 3.1285,
      "step": 136245
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.580096483230591,
      "learning_rate": 0.00021509264003155906,
      "loss": 2.9088,
      "step": 136246
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.09668231010437,
      "learning_rate": 0.0002150887167294462,
      "loss": 3.0506,
      "step": 136247
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.941300392150879,
      "learning_rate": 0.00021508479344311974,
      "loss": 3.0558,
      "step": 136248
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.892270803451538,
      "learning_rate": 0.00021508087017258067,
      "loss": 2.9594,
      "step": 136249
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9435912370681763,
      "learning_rate": 0.00021507694691782963,
      "loss": 3.0442,
      "step": 136250
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.6212005615234375,
      "learning_rate": 0.00021507302367886744,
      "loss": 3.1503,
      "step": 136251
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.001133918762207,
      "learning_rate": 0.00021506910045569473,
      "loss": 3.16,
      "step": 136252
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.247769355773926,
      "learning_rate": 0.0002150651772483124,
      "loss": 2.7224,
      "step": 136253
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.6368558406829834,
      "learning_rate": 0.00021506125405672098,
      "loss": 3.0153,
      "step": 136254
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.065319061279297,
      "learning_rate": 0.00021505733088092127,
      "loss": 2.7911,
      "step": 136255
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.5351483821868896,
      "learning_rate": 0.00021505340772091396,
      "loss": 2.9402,
      "step": 136256
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.6383213996887207,
      "learning_rate": 0.00021504948457669985,
      "loss": 2.9969,
      "step": 136257
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.5077855587005615,
      "learning_rate": 0.00021504556144827966,
      "loss": 2.7408,
      "step": 136258
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.112123727798462,
      "learning_rate": 0.0002150416383356541,
      "loss": 3.0377,
      "step": 136259
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8855165243148804,
      "learning_rate": 0.00021503771523882396,
      "loss": 3.0557,
      "step": 136260
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.9439609050750732,
      "learning_rate": 0.00021503379215778986,
      "loss": 2.9252,
      "step": 136261
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.081221580505371,
      "learning_rate": 0.00021502986909255255,
      "loss": 2.9062,
      "step": 136262
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9086358547210693,
      "learning_rate": 0.00021502594604311284,
      "loss": 3.1534,
      "step": 136263
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0005581378936768,
      "learning_rate": 0.00021502202300947135,
      "loss": 3.0483,
      "step": 136264
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.7835676670074463,
      "learning_rate": 0.00021501809999162892,
      "loss": 3.0888,
      "step": 136265
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9967999458312988,
      "learning_rate": 0.00021501417698958632,
      "loss": 2.8911,
      "step": 136266
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.015129566192627,
      "learning_rate": 0.00021501025400334412,
      "loss": 2.8247,
      "step": 136267
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2403712272644043,
      "learning_rate": 0.000215006331032903,
      "loss": 2.8602,
      "step": 136268
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2067036628723145,
      "learning_rate": 0.00021500240807826391,
      "loss": 3.225,
      "step": 136269
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.981630563735962,
      "learning_rate": 0.00021499848513942743,
      "loss": 3.1345,
      "step": 136270
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.1654837131500244,
      "learning_rate": 0.00021499456221639438,
      "loss": 2.8014,
      "step": 136271
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3976223468780518,
      "learning_rate": 0.0002149906393091655,
      "loss": 3.1104,
      "step": 136272
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.328944444656372,
      "learning_rate": 0.00021498671641774142,
      "loss": 2.8823,
      "step": 136273
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.972597360610962,
      "learning_rate": 0.00021498279354212292,
      "loss": 2.8688,
      "step": 136274
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.6112725734710693,
      "learning_rate": 0.00021497887068231065,
      "loss": 3.0233,
      "step": 136275
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.089972496032715,
      "learning_rate": 0.00021497494783830542,
      "loss": 3.0788,
      "step": 136276
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.8936948776245117,
      "learning_rate": 0.00021497102501010802,
      "loss": 3.1517,
      "step": 136277
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.280367136001587,
      "learning_rate": 0.00021496710219771908,
      "loss": 2.6483,
      "step": 136278
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.481583833694458,
      "learning_rate": 0.00021496317940113938,
      "loss": 3.1806,
      "step": 136279
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.9901282787323,
      "learning_rate": 0.00021495925662036967,
      "loss": 2.7586,
      "step": 136280
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.2335493564605713,
      "learning_rate": 0.00021495533385541057,
      "loss": 2.8433,
      "step": 136281
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.4140431880950928,
      "learning_rate": 0.00021495141110626287,
      "loss": 2.993,
      "step": 136282
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2234013080596924,
      "learning_rate": 0.0002149474883729273,
      "loss": 2.9664,
      "step": 136283
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.7172582149505615,
      "learning_rate": 0.0002149435656554047,
      "loss": 2.8064,
      "step": 136284
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.923748016357422,
      "learning_rate": 0.0002149396429536956,
      "loss": 2.9809,
      "step": 136285
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.023548126220703,
      "learning_rate": 0.00021493572026780086,
      "loss": 2.9857,
      "step": 136286
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3840270042419434,
      "learning_rate": 0.00021493179759772118,
      "loss": 3.0472,
      "step": 136287
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.341061592102051,
      "learning_rate": 0.0002149278749434573,
      "loss": 2.9228,
      "step": 136288
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4300971031188965,
      "learning_rate": 0.00021492395230500986,
      "loss": 3.1696,
      "step": 136289
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1497199535369873,
      "learning_rate": 0.00021492002968237975,
      "loss": 2.8668,
      "step": 136290
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0242245197296143,
      "learning_rate": 0.00021491610707556754,
      "loss": 2.941,
      "step": 136291
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.290271520614624,
      "learning_rate": 0.00021491218448457407,
      "loss": 2.7394,
      "step": 136292
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0233535766601562,
      "learning_rate": 0.0002149082619094,
      "loss": 2.8778,
      "step": 136293
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.149211883544922,
      "learning_rate": 0.0002149043393500461,
      "loss": 3.0079,
      "step": 136294
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.9013571739196777,
      "learning_rate": 0.00021490041680651312,
      "loss": 3.0176,
      "step": 136295
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3417391777038574,
      "learning_rate": 0.00021489649427880182,
      "loss": 3.1138,
      "step": 136296
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0624475479125977,
      "learning_rate": 0.0002148925717669128,
      "loss": 3.1361,
      "step": 136297
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.1628334522247314,
      "learning_rate": 0.0002148886492708468,
      "loss": 3.1069,
      "step": 136298
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4056150913238525,
      "learning_rate": 0.00021488472679060465,
      "loss": 2.9536,
      "step": 136299
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.9684394598007202,
      "learning_rate": 0.000214880804326187,
      "loss": 2.9199,
      "step": 136300
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.504808187484741,
      "learning_rate": 0.00021487688187759468,
      "loss": 2.8321,
      "step": 136301
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.661588191986084,
      "learning_rate": 0.00021487295944482842,
      "loss": 2.7461,
      "step": 136302
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.4649202823638916,
      "learning_rate": 0.00021486903702788878,
      "loss": 3.0207,
      "step": 136303
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.570484161376953,
      "learning_rate": 0.00021486511462677661,
      "loss": 2.9789,
      "step": 136304
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.0883193016052246,
      "learning_rate": 0.0002148611922414926,
      "loss": 2.9708,
      "step": 136305
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.228600025177002,
      "learning_rate": 0.0002148572698720375,
      "loss": 2.9879,
      "step": 136306
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1537654399871826,
      "learning_rate": 0.00021485334751841203,
      "loss": 3.1632,
      "step": 136307
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.2505667209625244,
      "learning_rate": 0.00021484942518061706,
      "loss": 3.0965,
      "step": 136308
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.3997180461883545,
      "learning_rate": 0.00021484550285865312,
      "loss": 2.7656,
      "step": 136309
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8298914432525635,
      "learning_rate": 0.00021484158055252097,
      "loss": 3.1028,
      "step": 136310
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.633335828781128,
      "learning_rate": 0.00021483765826222135,
      "loss": 3.1397,
      "step": 136311
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.181506872177124,
      "learning_rate": 0.00021483373598775503,
      "loss": 2.8197,
      "step": 136312
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.2904727458953857,
      "learning_rate": 0.00021482981372912273,
      "loss": 2.9132,
      "step": 136313
    },
    {
      "epoch": 1.77,
      "grad_norm": 1.8971792459487915,
      "learning_rate": 0.0002148258914863253,
      "loss": 2.9887,
      "step": 136314
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.4057068824768066,
      "learning_rate": 0.00021482196925936322,
      "loss": 2.9774,
      "step": 136315
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.681334972381592,
      "learning_rate": 0.00021481804704823735,
      "loss": 3.1771,
      "step": 136316
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.481250762939453,
      "learning_rate": 0.00021481412485294845,
      "loss": 2.9499,
      "step": 136317
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.1111128330230713,
      "learning_rate": 0.00021481020267349716,
      "loss": 2.866,
      "step": 136318
    },
    {
      "epoch": 1.77,
      "grad_norm": 3.513671398162842,
      "learning_rate": 0.00021480628050988426,
      "loss": 3.0438,
      "step": 136319
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.6837637424468994,
      "learning_rate": 0.00021480235836211065,
      "loss": 3.1236,
      "step": 136320
    },
    {
      "epoch": 1.77,
      "grad_norm": 2.300305128097534,
      "learning_rate": 0.0002147984362301767,
      "loss": 2.8953,
      "step": 136321
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4378409385681152,
      "learning_rate": 0.00021479451411408337,
      "loss": 3.0775,
      "step": 136322
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5681204795837402,
      "learning_rate": 0.00021479059201383132,
      "loss": 3.0386,
      "step": 136323
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.099440097808838,
      "learning_rate": 0.00021478666992942136,
      "loss": 2.6846,
      "step": 136324
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.064745903015137,
      "learning_rate": 0.00021478274786085414,
      "loss": 2.755,
      "step": 136325
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.7948968410491943,
      "learning_rate": 0.0002147788258081304,
      "loss": 3.0541,
      "step": 136326
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0802035331726074,
      "learning_rate": 0.00021477490377125105,
      "loss": 2.9532,
      "step": 136327
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.062955856323242,
      "learning_rate": 0.0002147709817502165,
      "loss": 3.0632,
      "step": 136328
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8999327421188354,
      "learning_rate": 0.00021476705974502762,
      "loss": 3.0619,
      "step": 136329
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.016099691390991,
      "learning_rate": 0.00021476313775568514,
      "loss": 3.0595,
      "step": 136330
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2087438106536865,
      "learning_rate": 0.00021475921578218985,
      "loss": 3.0279,
      "step": 136331
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.244298219680786,
      "learning_rate": 0.0002147552938245424,
      "loss": 3.1895,
      "step": 136332
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3122048377990723,
      "learning_rate": 0.0002147513718827437,
      "loss": 2.8994,
      "step": 136333
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2439568042755127,
      "learning_rate": 0.00021474744995679418,
      "loss": 2.9865,
      "step": 136334
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.8306925296783447,
      "learning_rate": 0.00021474352804669474,
      "loss": 2.7593,
      "step": 136335
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3451671600341797,
      "learning_rate": 0.00021473960615244609,
      "loss": 3.2272,
      "step": 136336
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.001110315322876,
      "learning_rate": 0.0002147356842740489,
      "loss": 2.9898,
      "step": 136337
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1351277828216553,
      "learning_rate": 0.000214731762411504,
      "loss": 2.9584,
      "step": 136338
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.6602673530578613,
      "learning_rate": 0.0002147278405648122,
      "loss": 3.3006,
      "step": 136339
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9231778383255005,
      "learning_rate": 0.00021472391873397396,
      "loss": 3.0711,
      "step": 136340
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.362579584121704,
      "learning_rate": 0.0002147199969189902,
      "loss": 2.9003,
      "step": 136341
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.858905792236328,
      "learning_rate": 0.00021471607511986155,
      "loss": 3.0392,
      "step": 136342
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.928958535194397,
      "learning_rate": 0.00021471215333658883,
      "loss": 3.1493,
      "step": 136343
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0667800903320312,
      "learning_rate": 0.0002147082315691727,
      "loss": 2.8822,
      "step": 136344
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2334957122802734,
      "learning_rate": 0.00021470430981761408,
      "loss": 2.9426,
      "step": 136345
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.486208200454712,
      "learning_rate": 0.00021470038808191338,
      "loss": 2.9796,
      "step": 136346
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.940670371055603,
      "learning_rate": 0.00021469646636207152,
      "loss": 2.8495,
      "step": 136347
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9706122875213623,
      "learning_rate": 0.00021469254465808916,
      "loss": 2.7485,
      "step": 136348
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.608546733856201,
      "learning_rate": 0.00021468862296996708,
      "loss": 2.7564,
      "step": 136349
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4036359786987305,
      "learning_rate": 0.00021468470129770598,
      "loss": 3.0838,
      "step": 136350
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.386990547180176,
      "learning_rate": 0.00021468077964130675,
      "loss": 2.8836,
      "step": 136351
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.346322536468506,
      "learning_rate": 0.00021467685800076987,
      "loss": 2.9369,
      "step": 136352
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.8481175899505615,
      "learning_rate": 0.00021467293637609616,
      "loss": 2.9681,
      "step": 136353
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9036710262298584,
      "learning_rate": 0.00021466901476728634,
      "loss": 2.7666,
      "step": 136354
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.345355272293091,
      "learning_rate": 0.00021466509317434119,
      "loss": 3.0121,
      "step": 136355
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2327704429626465,
      "learning_rate": 0.0002146611715972614,
      "loss": 2.6508,
      "step": 136356
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9530128240585327,
      "learning_rate": 0.00021465725003604776,
      "loss": 3.1254,
      "step": 136357
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9938910007476807,
      "learning_rate": 0.00021465332849070093,
      "loss": 2.9705,
      "step": 136358
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.458897590637207,
      "learning_rate": 0.00021464940696122164,
      "loss": 3.118,
      "step": 136359
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.983927011489868,
      "learning_rate": 0.0002146454854476106,
      "loss": 3.1316,
      "step": 136360
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0754435062408447,
      "learning_rate": 0.0002146415639498686,
      "loss": 3.1354,
      "step": 136361
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0485005378723145,
      "learning_rate": 0.0002146376424679963,
      "loss": 2.9388,
      "step": 136362
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.371572971343994,
      "learning_rate": 0.0002146337210019946,
      "loss": 2.848,
      "step": 136363
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.168827533721924,
      "learning_rate": 0.000214629799551864,
      "loss": 3.0197,
      "step": 136364
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.826077699661255,
      "learning_rate": 0.00021462587811760538,
      "loss": 3.1495,
      "step": 136365
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.693744421005249,
      "learning_rate": 0.0002146219566992194,
      "loss": 3.1063,
      "step": 136366
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.2697532176971436,
      "learning_rate": 0.0002146180352967068,
      "loss": 3.1224,
      "step": 136367
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1637604236602783,
      "learning_rate": 0.00021461411391006832,
      "loss": 3.0633,
      "step": 136368
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4506568908691406,
      "learning_rate": 0.00021461019253930474,
      "loss": 2.9048,
      "step": 136369
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.734365940093994,
      "learning_rate": 0.0002146062711844167,
      "loss": 2.8479,
      "step": 136370
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.120574951171875,
      "learning_rate": 0.00021460234984540491,
      "loss": 2.9859,
      "step": 136371
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.2487921714782715,
      "learning_rate": 0.00021459842852227025,
      "loss": 2.9777,
      "step": 136372
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.046203136444092,
      "learning_rate": 0.0002145945072150133,
      "loss": 3.1497,
      "step": 136373
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.5219509601593018,
      "learning_rate": 0.0002145905859236348,
      "loss": 3.0644,
      "step": 136374
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.440772771835327,
      "learning_rate": 0.00021458666464813564,
      "loss": 2.9636,
      "step": 136375
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3942348957061768,
      "learning_rate": 0.00021458274338851635,
      "loss": 2.6734,
      "step": 136376
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.023292064666748,
      "learning_rate": 0.00021457882214477773,
      "loss": 2.9014,
      "step": 136377
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.8893988132476807,
      "learning_rate": 0.00021457490091692056,
      "loss": 2.9742,
      "step": 136378
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3873448371887207,
      "learning_rate": 0.00021457097970494553,
      "loss": 3.082,
      "step": 136379
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1266541481018066,
      "learning_rate": 0.00021456705850885334,
      "loss": 3.0489,
      "step": 136380
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.530622959136963,
      "learning_rate": 0.00021456313732864482,
      "loss": 2.7873,
      "step": 136381
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.05224609375,
      "learning_rate": 0.00021455921616432054,
      "loss": 3.1029,
      "step": 136382
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.519030809402466,
      "learning_rate": 0.00021455529501588133,
      "loss": 3.0394,
      "step": 136383
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.152911424636841,
      "learning_rate": 0.00021455137388332792,
      "loss": 2.9559,
      "step": 136384
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.018655300140381,
      "learning_rate": 0.00021454745276666093,
      "loss": 2.9158,
      "step": 136385
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.0198163986206055,
      "learning_rate": 0.00021454353166588128,
      "loss": 3.2144,
      "step": 136386
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3656816482543945,
      "learning_rate": 0.00021453961058098963,
      "loss": 3.1471,
      "step": 136387
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1863977909088135,
      "learning_rate": 0.00021453568951198667,
      "loss": 2.9779,
      "step": 136388
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0632407665252686,
      "learning_rate": 0.00021453176845887307,
      "loss": 3.1554,
      "step": 136389
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.5239689350128174,
      "learning_rate": 0.00021452784742164967,
      "loss": 2.8334,
      "step": 136390
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.657701015472412,
      "learning_rate": 0.00021452392640031715,
      "loss": 2.7172,
      "step": 136391
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.355116367340088,
      "learning_rate": 0.00021452000539487622,
      "loss": 3.1218,
      "step": 136392
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9577629566192627,
      "learning_rate": 0.00021451608440532763,
      "loss": 3.1334,
      "step": 136393
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3040945529937744,
      "learning_rate": 0.00021451216343167226,
      "loss": 3.0026,
      "step": 136394
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.426079750061035,
      "learning_rate": 0.00021450824247391055,
      "loss": 3.1479,
      "step": 136395
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9977303743362427,
      "learning_rate": 0.00021450432153204336,
      "loss": 2.9182,
      "step": 136396
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.383551836013794,
      "learning_rate": 0.00021450040060607146,
      "loss": 3.1397,
      "step": 136397
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.887437343597412,
      "learning_rate": 0.00021449647969599555,
      "loss": 3.0497,
      "step": 136398
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.974721074104309,
      "learning_rate": 0.00021449255880181633,
      "loss": 3.0366,
      "step": 136399
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1545932292938232,
      "learning_rate": 0.00021448863792353473,
      "loss": 2.9,
      "step": 136400
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1308753490448,
      "learning_rate": 0.00021448471706115115,
      "loss": 2.921,
      "step": 136401
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9816523790359497,
      "learning_rate": 0.00021448079621466648,
      "loss": 3.1883,
      "step": 136402
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2364823818206787,
      "learning_rate": 0.0002144768753840814,
      "loss": 3.0179,
      "step": 136403
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.028679370880127,
      "learning_rate": 0.00021447295456939676,
      "loss": 3.1584,
      "step": 136404
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.7413744926452637,
      "learning_rate": 0.0002144690337706132,
      "loss": 3.1484,
      "step": 136405
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2131338119506836,
      "learning_rate": 0.00021446511298773156,
      "loss": 3.0347,
      "step": 136406
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.282857656478882,
      "learning_rate": 0.00021446119222075233,
      "loss": 2.8125,
      "step": 136407
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.68594491481781,
      "learning_rate": 0.0002144572714696764,
      "loss": 2.9492,
      "step": 136408
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8494349718093872,
      "learning_rate": 0.00021445335073450446,
      "loss": 2.8835,
      "step": 136409
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.7990033626556396,
      "learning_rate": 0.00021444943001523728,
      "loss": 3.1013,
      "step": 136410
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.0120222568511963,
      "learning_rate": 0.0002144455093118755,
      "loss": 3.0321,
      "step": 136411
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5201451778411865,
      "learning_rate": 0.00021444158862442015,
      "loss": 3.0813,
      "step": 136412
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0825629234313965,
      "learning_rate": 0.00021443766795287153,
      "loss": 3.0358,
      "step": 136413
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.822929620742798,
      "learning_rate": 0.0002144337472972306,
      "loss": 2.9163,
      "step": 136414
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1687676906585693,
      "learning_rate": 0.000214429826657498,
      "loss": 2.9316,
      "step": 136415
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9572842121124268,
      "learning_rate": 0.00021442590603367454,
      "loss": 3.1988,
      "step": 136416
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2376766204833984,
      "learning_rate": 0.00021442198542576094,
      "loss": 3.0445,
      "step": 136417
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1759777069091797,
      "learning_rate": 0.00021441806483375804,
      "loss": 3.1535,
      "step": 136418
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0995523929595947,
      "learning_rate": 0.00021441414425766626,
      "loss": 2.9868,
      "step": 136419
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.989872455596924,
      "learning_rate": 0.00021441022369748656,
      "loss": 2.9421,
      "step": 136420
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.120877265930176,
      "learning_rate": 0.00021440630315321956,
      "loss": 3.0222,
      "step": 136421
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.023728847503662,
      "learning_rate": 0.00021440238262486607,
      "loss": 2.8236,
      "step": 136422
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.041402578353882,
      "learning_rate": 0.0002143984621124268,
      "loss": 2.933,
      "step": 136423
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9114465713500977,
      "learning_rate": 0.00021439454161590257,
      "loss": 2.8317,
      "step": 136424
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.272625207901001,
      "learning_rate": 0.00021439062113529393,
      "loss": 2.9065,
      "step": 136425
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.339812994003296,
      "learning_rate": 0.00021438670067060167,
      "loss": 2.9174,
      "step": 136426
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.172412395477295,
      "learning_rate": 0.0002143827802218265,
      "loss": 2.9487,
      "step": 136427
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.16542387008667,
      "learning_rate": 0.00021437885978896923,
      "loss": 2.8559,
      "step": 136428
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.907318115234375,
      "learning_rate": 0.00021437493937203055,
      "loss": 2.9641,
      "step": 136429
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.218641996383667,
      "learning_rate": 0.0002143710189710113,
      "loss": 3.2651,
      "step": 136430
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.1963419914245605,
      "learning_rate": 0.00021436709858591198,
      "loss": 3.0216,
      "step": 136431
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.7942001819610596,
      "learning_rate": 0.0002143631782167334,
      "loss": 2.9294,
      "step": 136432
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4872708320617676,
      "learning_rate": 0.00021435925786347633,
      "loss": 2.9036,
      "step": 136433
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.529007911682129,
      "learning_rate": 0.00021435533752614148,
      "loss": 3.0733,
      "step": 136434
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3808770179748535,
      "learning_rate": 0.00021435141720472966,
      "loss": 2.8666,
      "step": 136435
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1785941123962402,
      "learning_rate": 0.0002143474968992416,
      "loss": 3.0449,
      "step": 136436
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1705617904663086,
      "learning_rate": 0.00021434357660967782,
      "loss": 3.1084,
      "step": 136437
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0498857498168945,
      "learning_rate": 0.0002143396563360392,
      "loss": 3.0975,
      "step": 136438
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1415529251098633,
      "learning_rate": 0.00021433573607832648,
      "loss": 3.0027,
      "step": 136439
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8281524181365967,
      "learning_rate": 0.0002143318158365403,
      "loss": 3.2885,
      "step": 136440
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8245288133621216,
      "learning_rate": 0.00021432789561068148,
      "loss": 2.8946,
      "step": 136441
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9295746088027954,
      "learning_rate": 0.00021432397540075088,
      "loss": 2.8854,
      "step": 136442
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.53365421295166,
      "learning_rate": 0.00021432005520674894,
      "loss": 3.1552,
      "step": 136443
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9462738037109375,
      "learning_rate": 0.0002143161350286765,
      "loss": 2.9455,
      "step": 136444
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.527824640274048,
      "learning_rate": 0.00021431221486653435,
      "loss": 2.9215,
      "step": 136445
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.324028968811035,
      "learning_rate": 0.0002143082947203231,
      "loss": 2.9176,
      "step": 136446
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.16867733001709,
      "learning_rate": 0.0002143043745900436,
      "loss": 2.7694,
      "step": 136447
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.246490001678467,
      "learning_rate": 0.00021430045447569658,
      "loss": 2.8243,
      "step": 136448
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2639944553375244,
      "learning_rate": 0.00021429653437728276,
      "loss": 2.7503,
      "step": 136449
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.9145843982696533,
      "learning_rate": 0.00021429261429480273,
      "loss": 3.0956,
      "step": 136450
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.8197946548461914,
      "learning_rate": 0.00021428869422825732,
      "loss": 3.1289,
      "step": 136451
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5075395107269287,
      "learning_rate": 0.00021428477417764728,
      "loss": 3.0991,
      "step": 136452
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.198070526123047,
      "learning_rate": 0.00021428085414297332,
      "loss": 2.9462,
      "step": 136453
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.130572319030762,
      "learning_rate": 0.00021427693412423617,
      "loss": 2.9818,
      "step": 136454
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.3491103649139404,
      "learning_rate": 0.0002142730141214366,
      "loss": 2.7182,
      "step": 136455
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.456857442855835,
      "learning_rate": 0.00021426909413457522,
      "loss": 3.012,
      "step": 136456
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.274503707885742,
      "learning_rate": 0.00021426517416365292,
      "loss": 2.8935,
      "step": 136457
    },
    {
      "epoch": 1.78,
      "grad_norm": 5.082656383514404,
      "learning_rate": 0.0002142612542086703,
      "loss": 2.9684,
      "step": 136458
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.884948492050171,
      "learning_rate": 0.00021425733426962812,
      "loss": 2.9365,
      "step": 136459
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8623480796813965,
      "learning_rate": 0.00021425341434652708,
      "loss": 2.8564,
      "step": 136460
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.14941668510437,
      "learning_rate": 0.00021424949443936807,
      "loss": 2.918,
      "step": 136461
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.99555504322052,
      "learning_rate": 0.0002142455745481516,
      "loss": 3.0013,
      "step": 136462
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3384547233581543,
      "learning_rate": 0.0002142416546728785,
      "loss": 2.937,
      "step": 136463
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.872215747833252,
      "learning_rate": 0.00021423773481354954,
      "loss": 2.8408,
      "step": 136464
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0677590370178223,
      "learning_rate": 0.00021423381497016536,
      "loss": 2.9383,
      "step": 136465
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.7897142171859741,
      "learning_rate": 0.00021422989514272673,
      "loss": 3.0234,
      "step": 136466
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.116645336151123,
      "learning_rate": 0.00021422597533123444,
      "loss": 2.9177,
      "step": 136467
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.169738292694092,
      "learning_rate": 0.00021422205553568914,
      "loss": 3.0516,
      "step": 136468
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.490618944168091,
      "learning_rate": 0.00021421813575609152,
      "loss": 2.9537,
      "step": 136469
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8312147855758667,
      "learning_rate": 0.0002142142159924424,
      "loss": 3.103,
      "step": 136470
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0089194774627686,
      "learning_rate": 0.00021421029624474255,
      "loss": 2.9633,
      "step": 136471
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.756330728530884,
      "learning_rate": 0.00021420637651299254,
      "loss": 2.9304,
      "step": 136472
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1393179893493652,
      "learning_rate": 0.00021420245679719326,
      "loss": 3.079,
      "step": 136473
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2861666679382324,
      "learning_rate": 0.0002141985370973453,
      "loss": 3.1068,
      "step": 136474
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1195528507232666,
      "learning_rate": 0.00021419461741344943,
      "loss": 3.072,
      "step": 136475
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9446512460708618,
      "learning_rate": 0.0002141906977455064,
      "loss": 2.9794,
      "step": 136476
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.532674789428711,
      "learning_rate": 0.00021418677809351697,
      "loss": 2.8729,
      "step": 136477
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3638827800750732,
      "learning_rate": 0.00021418285845748182,
      "loss": 3.1349,
      "step": 136478
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.114535093307495,
      "learning_rate": 0.0002141789388374018,
      "loss": 2.7657,
      "step": 136479
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.682098388671875,
      "learning_rate": 0.00021417501923327745,
      "loss": 2.8189,
      "step": 136480
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.571965217590332,
      "learning_rate": 0.00021417109964510955,
      "loss": 2.8724,
      "step": 136481
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1655349731445312,
      "learning_rate": 0.0002141671800728989,
      "loss": 2.8813,
      "step": 136482
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9266921281814575,
      "learning_rate": 0.00021416326051664614,
      "loss": 2.7632,
      "step": 136483
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.039349317550659,
      "learning_rate": 0.0002141593409763521,
      "loss": 2.6693,
      "step": 136484
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1261184215545654,
      "learning_rate": 0.00021415542145201757,
      "loss": 2.8649,
      "step": 136485
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9189183712005615,
      "learning_rate": 0.00021415150194364303,
      "loss": 3.0614,
      "step": 136486
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3211421966552734,
      "learning_rate": 0.00021414758245122935,
      "loss": 3.1042,
      "step": 136487
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.6060880422592163,
      "learning_rate": 0.00021414366297477724,
      "loss": 2.9729,
      "step": 136488
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3552112579345703,
      "learning_rate": 0.00021413974351428745,
      "loss": 2.9356,
      "step": 136489
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.7333054542541504,
      "learning_rate": 0.0002141358240697607,
      "loss": 2.845,
      "step": 136490
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.158550500869751,
      "learning_rate": 0.00021413190464119786,
      "loss": 3.0243,
      "step": 136491
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.75974178314209,
      "learning_rate": 0.00021412798522859943,
      "loss": 3.0199,
      "step": 136492
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.5105957984924316,
      "learning_rate": 0.00021412406583196618,
      "loss": 2.8391,
      "step": 136493
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0609796047210693,
      "learning_rate": 0.00021412014645129894,
      "loss": 3.2378,
      "step": 136494
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.124286651611328,
      "learning_rate": 0.0002141162270865983,
      "loss": 3.0455,
      "step": 136495
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.668527126312256,
      "learning_rate": 0.0002141123077378651,
      "loss": 3.0583,
      "step": 136496
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4097468852996826,
      "learning_rate": 0.00021410838840510025,
      "loss": 3.1016,
      "step": 136497
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.4985342025756836,
      "learning_rate": 0.00021410446908830407,
      "loss": 2.972,
      "step": 136498
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.989599347114563,
      "learning_rate": 0.00021410054978747753,
      "loss": 3.0137,
      "step": 136499
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.3725035190582275,
      "learning_rate": 0.00021409663050262127,
      "loss": 3.0217,
      "step": 136500
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4595205783843994,
      "learning_rate": 0.00021409271123373611,
      "loss": 3.0189,
      "step": 136501
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2603402137756348,
      "learning_rate": 0.0002140887919808227,
      "loss": 2.8631,
      "step": 136502
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.4600021839141846,
      "learning_rate": 0.00021408487274388198,
      "loss": 3.189,
      "step": 136503
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.432921409606934,
      "learning_rate": 0.00021408095352291434,
      "loss": 3.0531,
      "step": 136504
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.526660919189453,
      "learning_rate": 0.00021407703431792067,
      "loss": 2.6293,
      "step": 136505
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9479949474334717,
      "learning_rate": 0.00021407311512890173,
      "loss": 2.9216,
      "step": 136506
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0796608924865723,
      "learning_rate": 0.00021406919595585817,
      "loss": 3.0804,
      "step": 136507
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.3235366344451904,
      "learning_rate": 0.0002140652767987908,
      "loss": 2.995,
      "step": 136508
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9028685092926025,
      "learning_rate": 0.00021406135765770045,
      "loss": 2.9291,
      "step": 136509
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.387355089187622,
      "learning_rate": 0.0002140574385325876,
      "loss": 2.9922,
      "step": 136510
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8695344924926758,
      "learning_rate": 0.00021405351942345305,
      "loss": 2.981,
      "step": 136511
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3658864498138428,
      "learning_rate": 0.0002140496003302976,
      "loss": 2.9536,
      "step": 136512
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9787495136260986,
      "learning_rate": 0.000214045681253122,
      "loss": 2.7689,
      "step": 136513
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.298649549484253,
      "learning_rate": 0.00021404176219192686,
      "loss": 2.9793,
      "step": 136514
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.7298903465270996,
      "learning_rate": 0.00021403784314671316,
      "loss": 2.8585,
      "step": 136515
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0468931198120117,
      "learning_rate": 0.00021403392411748127,
      "loss": 3.1727,
      "step": 136516
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.281479597091675,
      "learning_rate": 0.0002140300051042321,
      "loss": 3.0016,
      "step": 136517
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.669560432434082,
      "learning_rate": 0.0002140260861069664,
      "loss": 2.9695,
      "step": 136518
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1024441719055176,
      "learning_rate": 0.0002140221671256849,
      "loss": 2.9073,
      "step": 136519
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0835342407226562,
      "learning_rate": 0.00021401824816038822,
      "loss": 2.989,
      "step": 136520
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.418102979660034,
      "learning_rate": 0.0002140143292110772,
      "loss": 2.902,
      "step": 136521
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1659421920776367,
      "learning_rate": 0.00021401041027775274,
      "loss": 3.1435,
      "step": 136522
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.937215566635132,
      "learning_rate": 0.00021400649136041518,
      "loss": 3.197,
      "step": 136523
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.227856397628784,
      "learning_rate": 0.00021400257245906542,
      "loss": 3.2024,
      "step": 136524
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9890915155410767,
      "learning_rate": 0.00021399865357370424,
      "loss": 2.9599,
      "step": 136525
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.3929545879364014,
      "learning_rate": 0.00021399473470433232,
      "loss": 2.8474,
      "step": 136526
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.847358226776123,
      "learning_rate": 0.00021399081585095039,
      "loss": 2.9558,
      "step": 136527
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.0233209133148193,
      "learning_rate": 0.00021398689701355935,
      "loss": 2.9197,
      "step": 136528
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.443952798843384,
      "learning_rate": 0.0002139829781921596,
      "loss": 2.7955,
      "step": 136529
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2485132217407227,
      "learning_rate": 0.0002139790593867521,
      "loss": 3.2122,
      "step": 136530
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.210629940032959,
      "learning_rate": 0.00021397514059733747,
      "loss": 3.3544,
      "step": 136531
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.2839443683624268,
      "learning_rate": 0.0002139712218239165,
      "loss": 3.0307,
      "step": 136532
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4184186458587646,
      "learning_rate": 0.0002139673030664899,
      "loss": 3.1113,
      "step": 136533
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0567684173583984,
      "learning_rate": 0.00021396338432505852,
      "loss": 2.9552,
      "step": 136534
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.212414264678955,
      "learning_rate": 0.00021395946559962285,
      "loss": 3.019,
      "step": 136535
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2594897747039795,
      "learning_rate": 0.0002139555468901837,
      "loss": 3.0692,
      "step": 136536
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.519826889038086,
      "learning_rate": 0.0002139516281967419,
      "loss": 2.9383,
      "step": 136537
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.5083811283111572,
      "learning_rate": 0.00021394770951929808,
      "loss": 3.0147,
      "step": 136538
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3812153339385986,
      "learning_rate": 0.00021394379085785302,
      "loss": 2.8862,
      "step": 136539
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3685078620910645,
      "learning_rate": 0.00021393987221240754,
      "loss": 2.9793,
      "step": 136540
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0876548290252686,
      "learning_rate": 0.0002139359535829622,
      "loss": 2.9368,
      "step": 136541
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.8140339851379395,
      "learning_rate": 0.00021393203496951777,
      "loss": 2.9636,
      "step": 136542
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.453826427459717,
      "learning_rate": 0.00021392811637207496,
      "loss": 3.0741,
      "step": 136543
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.8418123722076416,
      "learning_rate": 0.00021392419779063456,
      "loss": 2.8371,
      "step": 136544
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1603541374206543,
      "learning_rate": 0.00021392027922519723,
      "loss": 3.2695,
      "step": 136545
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9699642658233643,
      "learning_rate": 0.0002139163606757639,
      "loss": 3.2247,
      "step": 136546
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0903446674346924,
      "learning_rate": 0.00021391244214233503,
      "loss": 3.0338,
      "step": 136547
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.032034158706665,
      "learning_rate": 0.00021390852362491143,
      "loss": 2.803,
      "step": 136548
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8114666938781738,
      "learning_rate": 0.00021390460512349393,
      "loss": 2.9808,
      "step": 136549
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4098188877105713,
      "learning_rate": 0.0002139006866380832,
      "loss": 2.7051,
      "step": 136550
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.103118896484375,
      "learning_rate": 0.0002138967681686799,
      "loss": 2.9356,
      "step": 136551
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1131339073181152,
      "learning_rate": 0.00021389284971528487,
      "loss": 2.7977,
      "step": 136552
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.135145902633667,
      "learning_rate": 0.00021388893127789873,
      "loss": 2.9861,
      "step": 136553
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.062241554260254,
      "learning_rate": 0.00021388501285652227,
      "loss": 2.8516,
      "step": 136554
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.480337381362915,
      "learning_rate": 0.00021388109445115625,
      "loss": 3.0143,
      "step": 136555
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.6962876319885254,
      "learning_rate": 0.00021387717606180138,
      "loss": 2.8925,
      "step": 136556
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4956254959106445,
      "learning_rate": 0.0002138732576884583,
      "loss": 3.0526,
      "step": 136557
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3203752040863037,
      "learning_rate": 0.00021386933933112786,
      "loss": 3.1814,
      "step": 136558
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.134439706802368,
      "learning_rate": 0.00021386542098981073,
      "loss": 3.2462,
      "step": 136559
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5258700847625732,
      "learning_rate": 0.0002138615026645076,
      "loss": 2.85,
      "step": 136560
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0944488048553467,
      "learning_rate": 0.00021385758435521927,
      "loss": 2.9229,
      "step": 136561
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.119504690170288,
      "learning_rate": 0.00021385366606194642,
      "loss": 3.2632,
      "step": 136562
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4460926055908203,
      "learning_rate": 0.00021384974778468983,
      "loss": 2.8144,
      "step": 136563
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9680325984954834,
      "learning_rate": 0.00021384582952345025,
      "loss": 2.9185,
      "step": 136564
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.6679818630218506,
      "learning_rate": 0.0002138419112782283,
      "loss": 3.0025,
      "step": 136565
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8858851194381714,
      "learning_rate": 0.00021383799304902474,
      "loss": 3.0016,
      "step": 136566
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1117966175079346,
      "learning_rate": 0.0002138340748358403,
      "loss": 2.7257,
      "step": 136567
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2334141731262207,
      "learning_rate": 0.00021383015663867578,
      "loss": 3.1394,
      "step": 136568
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9545663595199585,
      "learning_rate": 0.00021382623845753185,
      "loss": 2.9139,
      "step": 136569
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.0896174907684326,
      "learning_rate": 0.00021382232029240936,
      "loss": 3.0426,
      "step": 136570
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2759504318237305,
      "learning_rate": 0.00021381840214330878,
      "loss": 2.9344,
      "step": 136571
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1448912620544434,
      "learning_rate": 0.000213814484010231,
      "loss": 2.8909,
      "step": 136572
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3771543502807617,
      "learning_rate": 0.00021381056589317676,
      "loss": 3.0229,
      "step": 136573
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.711350202560425,
      "learning_rate": 0.00021380664779214675,
      "loss": 3.1466,
      "step": 136574
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.0303916931152344,
      "learning_rate": 0.0002138027297071417,
      "loss": 2.8756,
      "step": 136575
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.138335943222046,
      "learning_rate": 0.00021379881163816248,
      "loss": 3.0649,
      "step": 136576
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.461820125579834,
      "learning_rate": 0.0002137948935852096,
      "loss": 2.8929,
      "step": 136577
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.6267664432525635,
      "learning_rate": 0.00021379097554828384,
      "loss": 2.8836,
      "step": 136578
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8519657850265503,
      "learning_rate": 0.00021378705752738596,
      "loss": 3.0961,
      "step": 136579
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5780692100524902,
      "learning_rate": 0.00021378313952251672,
      "loss": 3.0831,
      "step": 136580
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.932164192199707,
      "learning_rate": 0.00021377922153367678,
      "loss": 2.9146,
      "step": 136581
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4435603618621826,
      "learning_rate": 0.00021377530356086707,
      "loss": 3.0425,
      "step": 136582
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3069136142730713,
      "learning_rate": 0.00021377138560408802,
      "loss": 3.0917,
      "step": 136583
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5439813137054443,
      "learning_rate": 0.00021376746766334052,
      "loss": 2.7726,
      "step": 136584
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1527321338653564,
      "learning_rate": 0.00021376354973862523,
      "loss": 2.7176,
      "step": 136585
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.665292501449585,
      "learning_rate": 0.00021375963182994294,
      "loss": 3.0665,
      "step": 136586
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.997127890586853,
      "learning_rate": 0.00021375571393729442,
      "loss": 3.0211,
      "step": 136587
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.117577314376831,
      "learning_rate": 0.00021375179606068027,
      "loss": 2.8077,
      "step": 136588
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1900620460510254,
      "learning_rate": 0.00021374787820010145,
      "loss": 3.0142,
      "step": 136589
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.769514799118042,
      "learning_rate": 0.00021374396035555836,
      "loss": 2.8,
      "step": 136590
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5048601627349854,
      "learning_rate": 0.0002137400425270519,
      "loss": 2.934,
      "step": 136591
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.104513645172119,
      "learning_rate": 0.00021373612471458286,
      "loss": 3.1348,
      "step": 136592
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5542049407958984,
      "learning_rate": 0.00021373220691815186,
      "loss": 2.7887,
      "step": 136593
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.384333372116089,
      "learning_rate": 0.00021372828913775968,
      "loss": 3.2029,
      "step": 136594
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.465263605117798,
      "learning_rate": 0.00021372437137340714,
      "loss": 3.0051,
      "step": 136595
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.050220251083374,
      "learning_rate": 0.00021372045362509477,
      "loss": 3.074,
      "step": 136596
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4148313999176025,
      "learning_rate": 0.0002137165358928234,
      "loss": 2.8438,
      "step": 136597
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.145228862762451,
      "learning_rate": 0.00021371261817659373,
      "loss": 3.053,
      "step": 136598
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5438103675842285,
      "learning_rate": 0.0002137087004764065,
      "loss": 3.0759,
      "step": 136599
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2756600379943848,
      "learning_rate": 0.0002137047827922625,
      "loss": 2.7085,
      "step": 136600
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.027608633041382,
      "learning_rate": 0.0002137008651241625,
      "loss": 2.8566,
      "step": 136601
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4583523273468018,
      "learning_rate": 0.00021369694747210705,
      "loss": 3.2662,
      "step": 136602
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9791438579559326,
      "learning_rate": 0.0002136930298360969,
      "loss": 2.8828,
      "step": 136603
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.661870241165161,
      "learning_rate": 0.0002136891122161329,
      "loss": 2.9451,
      "step": 136604
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9992479085922241,
      "learning_rate": 0.00021368519461221574,
      "loss": 2.9833,
      "step": 136605
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.266472816467285,
      "learning_rate": 0.0002136812770243461,
      "loss": 3.2673,
      "step": 136606
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.135775566101074,
      "learning_rate": 0.00021367735945252483,
      "loss": 2.8437,
      "step": 136607
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.864114284515381,
      "learning_rate": 0.0002136734418967525,
      "loss": 3.0203,
      "step": 136608
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3044352531433105,
      "learning_rate": 0.0002136695243570299,
      "loss": 3.104,
      "step": 136609
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5457966327667236,
      "learning_rate": 0.00021366560683335771,
      "loss": 3.1643,
      "step": 136610
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.9148526191711426,
      "learning_rate": 0.0002136616893257368,
      "loss": 2.8906,
      "step": 136611
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.289896011352539,
      "learning_rate": 0.00021365777183416772,
      "loss": 3.189,
      "step": 136612
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.012167930603027,
      "learning_rate": 0.0002136538543586515,
      "loss": 2.9308,
      "step": 136613
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9870264530181885,
      "learning_rate": 0.00021364993689918847,
      "loss": 2.8983,
      "step": 136614
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.154664993286133,
      "learning_rate": 0.0002136460194557796,
      "loss": 2.7483,
      "step": 136615
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.128300666809082,
      "learning_rate": 0.00021364210202842547,
      "loss": 2.9713,
      "step": 136616
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0188302993774414,
      "learning_rate": 0.000213638184617127,
      "loss": 3.2067,
      "step": 136617
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9923077821731567,
      "learning_rate": 0.00021363426722188482,
      "loss": 2.7789,
      "step": 136618
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0831758975982666,
      "learning_rate": 0.00021363034984269973,
      "loss": 2.8433,
      "step": 136619
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0480551719665527,
      "learning_rate": 0.0002136264324795723,
      "loss": 3.1821,
      "step": 136620
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.568019390106201,
      "learning_rate": 0.00021362251513250334,
      "loss": 2.7693,
      "step": 136621
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.194314479827881,
      "learning_rate": 0.00021361859780149358,
      "loss": 2.9055,
      "step": 136622
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.9562978744506836,
      "learning_rate": 0.00021361468048654373,
      "loss": 2.9884,
      "step": 136623
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.028498411178589,
      "learning_rate": 0.00021361076318765458,
      "loss": 2.7107,
      "step": 136624
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9742259979248047,
      "learning_rate": 0.00021360684590482687,
      "loss": 3.0818,
      "step": 136625
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1421396732330322,
      "learning_rate": 0.0002136029286380613,
      "loss": 2.7405,
      "step": 136626
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0547878742218018,
      "learning_rate": 0.00021359901138735848,
      "loss": 2.8874,
      "step": 136627
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9988616704940796,
      "learning_rate": 0.00021359509415271922,
      "loss": 3.016,
      "step": 136628
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8242076635360718,
      "learning_rate": 0.00021359117693414432,
      "loss": 2.817,
      "step": 136629
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0929763317108154,
      "learning_rate": 0.00021358725973163442,
      "loss": 3.1059,
      "step": 136630
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.019899606704712,
      "learning_rate": 0.00021358334254519034,
      "loss": 3.086,
      "step": 136631
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.274397611618042,
      "learning_rate": 0.00021357942537481267,
      "loss": 3.0299,
      "step": 136632
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1152212619781494,
      "learning_rate": 0.00021357550822050228,
      "loss": 3.1509,
      "step": 136633
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.087900400161743,
      "learning_rate": 0.0002135715910822598,
      "loss": 2.9368,
      "step": 136634
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.868430495262146,
      "learning_rate": 0.000213567673960086,
      "loss": 2.711,
      "step": 136635
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.350794553756714,
      "learning_rate": 0.00021356375685398156,
      "loss": 2.6995,
      "step": 136636
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0896456241607666,
      "learning_rate": 0.00021355983976394734,
      "loss": 2.8511,
      "step": 136637
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8680922985076904,
      "learning_rate": 0.0002135559226899839,
      "loss": 2.705,
      "step": 136638
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5348169803619385,
      "learning_rate": 0.00021355200563209206,
      "loss": 3.0553,
      "step": 136639
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.8152945041656494,
      "learning_rate": 0.00021354808859027255,
      "loss": 2.6321,
      "step": 136640
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2490062713623047,
      "learning_rate": 0.00021354417156452612,
      "loss": 3.0392,
      "step": 136641
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4249894618988037,
      "learning_rate": 0.00021354025455485337,
      "loss": 2.9398,
      "step": 136642
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.7499278783798218,
      "learning_rate": 0.00021353633756125524,
      "loss": 2.8003,
      "step": 136643
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.68869686126709,
      "learning_rate": 0.00021353242058373223,
      "loss": 3.0344,
      "step": 136644
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1906633377075195,
      "learning_rate": 0.00021352850362228517,
      "loss": 3.1881,
      "step": 136645
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.678110122680664,
      "learning_rate": 0.00021352458667691485,
      "loss": 2.8405,
      "step": 136646
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.819357991218567,
      "learning_rate": 0.00021352066974762187,
      "loss": 2.7539,
      "step": 136647
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.867351770401001,
      "learning_rate": 0.00021351675283440717,
      "loss": 3.0241,
      "step": 136648
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1779019832611084,
      "learning_rate": 0.00021351283593727127,
      "loss": 2.8758,
      "step": 136649
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2760958671569824,
      "learning_rate": 0.00021350891905621498,
      "loss": 3.0964,
      "step": 136650
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8783378601074219,
      "learning_rate": 0.00021350500219123893,
      "loss": 3.0609,
      "step": 136651
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.264171600341797,
      "learning_rate": 0.00021350108534234398,
      "loss": 2.7555,
      "step": 136652
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9351950883865356,
      "learning_rate": 0.00021349716850953078,
      "loss": 3.0138,
      "step": 136653
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9413076639175415,
      "learning_rate": 0.0002134932516928001,
      "loss": 2.9506,
      "step": 136654
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0612874031066895,
      "learning_rate": 0.00021348933489215272,
      "loss": 3.0034,
      "step": 136655
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.101264476776123,
      "learning_rate": 0.00021348541810758934,
      "loss": 3.1786,
      "step": 136656
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0338656902313232,
      "learning_rate": 0.00021348150133911054,
      "loss": 2.9119,
      "step": 136657
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.303769826889038,
      "learning_rate": 0.0002134775845867172,
      "loss": 2.9965,
      "step": 136658
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1887259483337402,
      "learning_rate": 0.00021347366785041,
      "loss": 2.7842,
      "step": 136659
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3410377502441406,
      "learning_rate": 0.0002134697511301897,
      "loss": 2.9366,
      "step": 136660
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.308091878890991,
      "learning_rate": 0.00021346583442605698,
      "loss": 3.3677,
      "step": 136661
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2353103160858154,
      "learning_rate": 0.00021346191773801272,
      "loss": 3.1788,
      "step": 136662
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3487637042999268,
      "learning_rate": 0.00021345800106605737,
      "loss": 3.0057,
      "step": 136663
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0628461837768555,
      "learning_rate": 0.00021345408441019186,
      "loss": 3.1478,
      "step": 136664
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1226003170013428,
      "learning_rate": 0.00021345016777041685,
      "loss": 3.0098,
      "step": 136665
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.4358112812042236,
      "learning_rate": 0.0002134462511467331,
      "loss": 3.0978,
      "step": 136666
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.091141939163208,
      "learning_rate": 0.00021344233453914134,
      "loss": 2.9451,
      "step": 136667
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.299191951751709,
      "learning_rate": 0.00021343841794764233,
      "loss": 2.9519,
      "step": 136668
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0804765224456787,
      "learning_rate": 0.00021343450137223667,
      "loss": 3.0344,
      "step": 136669
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.109567880630493,
      "learning_rate": 0.0002134305848129252,
      "loss": 3.0236,
      "step": 136670
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3010311126708984,
      "learning_rate": 0.0002134266682697086,
      "loss": 2.7946,
      "step": 136671
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.131287097930908,
      "learning_rate": 0.00021342275174258764,
      "loss": 3.0773,
      "step": 136672
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.130521059036255,
      "learning_rate": 0.000213418835231563,
      "loss": 2.8035,
      "step": 136673
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4865658283233643,
      "learning_rate": 0.00021341491873663556,
      "loss": 3.2202,
      "step": 136674
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9107956886291504,
      "learning_rate": 0.00021341100225780578,
      "loss": 3.1298,
      "step": 136675
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0379958152770996,
      "learning_rate": 0.00021340708579507456,
      "loss": 2.8684,
      "step": 136676
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9237653017044067,
      "learning_rate": 0.00021340316934844255,
      "loss": 2.8987,
      "step": 136677
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.035353183746338,
      "learning_rate": 0.00021339925291791057,
      "loss": 2.9347,
      "step": 136678
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.137687921524048,
      "learning_rate": 0.0002133953365034793,
      "loss": 2.8478,
      "step": 136679
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0749804973602295,
      "learning_rate": 0.0002133914201051496,
      "loss": 3.0943,
      "step": 136680
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2277369499206543,
      "learning_rate": 0.0002133875037229219,
      "loss": 3.0119,
      "step": 136681
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.212261199951172,
      "learning_rate": 0.00021338358735679714,
      "loss": 2.9701,
      "step": 136682
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.031123638153076,
      "learning_rate": 0.000213379671006776,
      "loss": 3.0815,
      "step": 136683
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.7453486919403076,
      "learning_rate": 0.0002133757546728592,
      "loss": 2.8918,
      "step": 136684
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0751192569732666,
      "learning_rate": 0.0002133718383550475,
      "loss": 2.8113,
      "step": 136685
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.8079779148101807,
      "learning_rate": 0.00021336792205334172,
      "loss": 3.1413,
      "step": 136686
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.324690341949463,
      "learning_rate": 0.00021336400576774236,
      "loss": 2.9311,
      "step": 136687
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.145278215408325,
      "learning_rate": 0.00021336008949825029,
      "loss": 2.9311,
      "step": 136688
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1685099601745605,
      "learning_rate": 0.00021335617324486615,
      "loss": 3.1836,
      "step": 136689
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.416374444961548,
      "learning_rate": 0.00021335225700759081,
      "loss": 2.9274,
      "step": 136690
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.082379341125488,
      "learning_rate": 0.00021334834078642485,
      "loss": 2.8125,
      "step": 136691
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5790045261383057,
      "learning_rate": 0.00021334442458136924,
      "loss": 3.1352,
      "step": 136692
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.2090530395507812,
      "learning_rate": 0.00021334050839242442,
      "loss": 3.0458,
      "step": 136693
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5834290981292725,
      "learning_rate": 0.00021333659221959122,
      "loss": 2.6479,
      "step": 136694
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.453162431716919,
      "learning_rate": 0.00021333267606287037,
      "loss": 2.6971,
      "step": 136695
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.275108575820923,
      "learning_rate": 0.00021332875992226264,
      "loss": 3.0529,
      "step": 136696
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.227788209915161,
      "learning_rate": 0.00021332484379776872,
      "loss": 3.0573,
      "step": 136697
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.6222636699676514,
      "learning_rate": 0.00021332092768938944,
      "loss": 2.8699,
      "step": 136698
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.231463670730591,
      "learning_rate": 0.00021331701159712536,
      "loss": 2.9182,
      "step": 136699
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.252748489379883,
      "learning_rate": 0.00021331309552097725,
      "loss": 3.0926,
      "step": 136700
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.7543768882751465,
      "learning_rate": 0.0002133091794609459,
      "loss": 3.0718,
      "step": 136701
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.7959156036376953,
      "learning_rate": 0.00021330526341703195,
      "loss": 2.9825,
      "step": 136702
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9647200107574463,
      "learning_rate": 0.00021330134738923624,
      "loss": 2.87,
      "step": 136703
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9544521570205688,
      "learning_rate": 0.00021329743137755958,
      "loss": 3.1454,
      "step": 136704
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2546985149383545,
      "learning_rate": 0.00021329351538200243,
      "loss": 3.0642,
      "step": 136705
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3859546184539795,
      "learning_rate": 0.00021328959940256564,
      "loss": 3.0009,
      "step": 136706
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.017006874084473,
      "learning_rate": 0.00021328568343924998,
      "loss": 2.7675,
      "step": 136707
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.184723377227783,
      "learning_rate": 0.00021328176749205608,
      "loss": 3.1364,
      "step": 136708
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5537502765655518,
      "learning_rate": 0.00021327785156098478,
      "loss": 2.8162,
      "step": 136709
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4405040740966797,
      "learning_rate": 0.00021327393564603682,
      "loss": 3.0463,
      "step": 136710
    },
    {
      "epoch": 1.78,
      "grad_norm": 5.45560884475708,
      "learning_rate": 0.00021327001974721287,
      "loss": 2.8603,
      "step": 136711
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4783265590667725,
      "learning_rate": 0.00021326610386451363,
      "loss": 3.0198,
      "step": 136712
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9880530834197998,
      "learning_rate": 0.00021326218799793984,
      "loss": 2.8925,
      "step": 136713
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0174665451049805,
      "learning_rate": 0.0002132582721474922,
      "loss": 3.1264,
      "step": 136714
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3450610637664795,
      "learning_rate": 0.00021325435631317153,
      "loss": 3.0191,
      "step": 136715
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9545202255249023,
      "learning_rate": 0.00021325044049497858,
      "loss": 2.879,
      "step": 136716
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9307206869125366,
      "learning_rate": 0.00021324652469291398,
      "loss": 2.9022,
      "step": 136717
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.8565499782562256,
      "learning_rate": 0.00021324260890697846,
      "loss": 2.9373,
      "step": 136718
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9005695581436157,
      "learning_rate": 0.0002132386931371728,
      "loss": 3.1713,
      "step": 136719
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.744760751724243,
      "learning_rate": 0.00021323477738349763,
      "loss": 3.0477,
      "step": 136720
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.455523729324341,
      "learning_rate": 0.0002132308616459538,
      "loss": 2.8333,
      "step": 136721
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9918415546417236,
      "learning_rate": 0.000213226945924542,
      "loss": 3.0447,
      "step": 136722
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.178539991378784,
      "learning_rate": 0.00021322303021926298,
      "loss": 3.024,
      "step": 136723
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.227714776992798,
      "learning_rate": 0.00021321911453011738,
      "loss": 2.9016,
      "step": 136724
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.2739851474761963,
      "learning_rate": 0.00021321519885710607,
      "loss": 2.7485,
      "step": 136725
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3203630447387695,
      "learning_rate": 0.0002132112832002296,
      "loss": 3.0273,
      "step": 136726
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1892824172973633,
      "learning_rate": 0.0002132073675594888,
      "loss": 3.0411,
      "step": 136727
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.321533441543579,
      "learning_rate": 0.00021320345193488442,
      "loss": 3.1784,
      "step": 136728
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4722275733947754,
      "learning_rate": 0.00021319953632641716,
      "loss": 2.749,
      "step": 136729
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3304431438446045,
      "learning_rate": 0.0002131956207340877,
      "loss": 3.048,
      "step": 136730
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5491421222686768,
      "learning_rate": 0.00021319170515789686,
      "loss": 3.1602,
      "step": 136731
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.529339075088501,
      "learning_rate": 0.00021318778959784538,
      "loss": 2.6845,
      "step": 136732
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5184824466705322,
      "learning_rate": 0.0002131838740539338,
      "loss": 3.0872,
      "step": 136733
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.370556354522705,
      "learning_rate": 0.000213179958526163,
      "loss": 2.7317,
      "step": 136734
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.392812490463257,
      "learning_rate": 0.00021317604301453378,
      "loss": 2.9359,
      "step": 136735
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1320059299468994,
      "learning_rate": 0.0002131721275190467,
      "loss": 2.9124,
      "step": 136736
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.817568063735962,
      "learning_rate": 0.00021316821203970254,
      "loss": 2.9917,
      "step": 136737
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4285428524017334,
      "learning_rate": 0.00021316429657650208,
      "loss": 2.9,
      "step": 136738
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.107448101043701,
      "learning_rate": 0.00021316038112944595,
      "loss": 2.5487,
      "step": 136739
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9459658861160278,
      "learning_rate": 0.00021315646569853506,
      "loss": 2.9546,
      "step": 136740
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1432199478149414,
      "learning_rate": 0.00021315255028377006,
      "loss": 3.1034,
      "step": 136741
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1766226291656494,
      "learning_rate": 0.00021314863488515153,
      "loss": 2.7976,
      "step": 136742
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1081504821777344,
      "learning_rate": 0.00021314471950268027,
      "loss": 3.0535,
      "step": 136743
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.387057304382324,
      "learning_rate": 0.00021314080413635712,
      "loss": 2.8682,
      "step": 136744
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3842287063598633,
      "learning_rate": 0.00021313688878618267,
      "loss": 2.8883,
      "step": 136745
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0022566318511963,
      "learning_rate": 0.00021313297345215774,
      "loss": 2.8331,
      "step": 136746
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9841831922531128,
      "learning_rate": 0.00021312905813428317,
      "loss": 2.8255,
      "step": 136747
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.1251397132873535,
      "learning_rate": 0.00021312514283255944,
      "loss": 3.0842,
      "step": 136748
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.603675365447998,
      "learning_rate": 0.00021312122754698734,
      "loss": 2.9048,
      "step": 136749
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.038883924484253,
      "learning_rate": 0.00021311731227756768,
      "loss": 2.9496,
      "step": 136750
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5966365337371826,
      "learning_rate": 0.00021311339702430112,
      "loss": 3.009,
      "step": 136751
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.1637628078460693,
      "learning_rate": 0.00021310948178718844,
      "loss": 3.0416,
      "step": 136752
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.0202348232269287,
      "learning_rate": 0.00021310556656623045,
      "loss": 2.723,
      "step": 136753
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0382368564605713,
      "learning_rate": 0.00021310165136142765,
      "loss": 3.021,
      "step": 136754
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1584055423736572,
      "learning_rate": 0.00021309773617278095,
      "loss": 3.0121,
      "step": 136755
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.7495932579040527,
      "learning_rate": 0.00021309382100029092,
      "loss": 3.0959,
      "step": 136756
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.7298378944396973,
      "learning_rate": 0.00021308990584395846,
      "loss": 3.0073,
      "step": 136757
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2845559120178223,
      "learning_rate": 0.00021308599070378424,
      "loss": 3.1288,
      "step": 136758
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.7870352268218994,
      "learning_rate": 0.00021308207557976908,
      "loss": 2.8739,
      "step": 136759
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.06563138961792,
      "learning_rate": 0.00021307816047191346,
      "loss": 2.9713,
      "step": 136760
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.823169469833374,
      "learning_rate": 0.00021307424538021826,
      "loss": 3.1019,
      "step": 136761
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.67427659034729,
      "learning_rate": 0.00021307033030468423,
      "loss": 2.9771,
      "step": 136762
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2173449993133545,
      "learning_rate": 0.000213066415245312,
      "loss": 3.0636,
      "step": 136763
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9996373653411865,
      "learning_rate": 0.00021306250020210245,
      "loss": 3.143,
      "step": 136764
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1657209396362305,
      "learning_rate": 0.0002130585851750563,
      "loss": 2.9023,
      "step": 136765
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.6531224250793457,
      "learning_rate": 0.00021305467016417406,
      "loss": 2.8548,
      "step": 136766
    },
    {
      "epoch": 1.78,
      "grad_norm": 5.154167175292969,
      "learning_rate": 0.0002130507551694566,
      "loss": 2.8894,
      "step": 136767
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.039468288421631,
      "learning_rate": 0.00021304684019090466,
      "loss": 2.7554,
      "step": 136768
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1354286670684814,
      "learning_rate": 0.00021304292522851897,
      "loss": 2.9427,
      "step": 136769
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.361783027648926,
      "learning_rate": 0.00021303901028230023,
      "loss": 2.9772,
      "step": 136770
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.882282257080078,
      "learning_rate": 0.0002130350953522493,
      "loss": 3.3849,
      "step": 136771
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.010568618774414,
      "learning_rate": 0.00021303118043836667,
      "loss": 3.0335,
      "step": 136772
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.480989694595337,
      "learning_rate": 0.00021302726554065315,
      "loss": 2.9791,
      "step": 136773
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1710989475250244,
      "learning_rate": 0.00021302335065910953,
      "loss": 2.9147,
      "step": 136774
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.427490234375,
      "learning_rate": 0.00021301943579373652,
      "loss": 2.9867,
      "step": 136775
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.8458995819091797,
      "learning_rate": 0.00021301552094453484,
      "loss": 2.6593,
      "step": 136776
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.96051025390625,
      "learning_rate": 0.00021301160611150532,
      "loss": 2.9388,
      "step": 136777
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.209134817123413,
      "learning_rate": 0.00021300769129464848,
      "loss": 3.0739,
      "step": 136778
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1019225120544434,
      "learning_rate": 0.00021300377649396512,
      "loss": 2.818,
      "step": 136779
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5421009063720703,
      "learning_rate": 0.00021299986170945603,
      "loss": 3.0734,
      "step": 136780
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.0392560958862305,
      "learning_rate": 0.00021299594694112193,
      "loss": 2.6649,
      "step": 136781
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.738581895828247,
      "learning_rate": 0.00021299203218896347,
      "loss": 2.7268,
      "step": 136782
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.962415099143982,
      "learning_rate": 0.0002129881174529816,
      "loss": 3.139,
      "step": 136783
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4482336044311523,
      "learning_rate": 0.00021298420273317676,
      "loss": 3.0913,
      "step": 136784
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.171036720275879,
      "learning_rate": 0.0002129802880295498,
      "loss": 3.0559,
      "step": 136785
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.6768581867218018,
      "learning_rate": 0.00021297637334210138,
      "loss": 2.7823,
      "step": 136786
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4975366592407227,
      "learning_rate": 0.00021297245867083237,
      "loss": 3.0796,
      "step": 136787
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.490950345993042,
      "learning_rate": 0.00021296854401574338,
      "loss": 3.2136,
      "step": 136788
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9875154495239258,
      "learning_rate": 0.00021296462937683518,
      "loss": 2.6691,
      "step": 136789
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8906309604644775,
      "learning_rate": 0.00021296071475410868,
      "loss": 3.0151,
      "step": 136790
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.311478614807129,
      "learning_rate": 0.00021295680014756427,
      "loss": 3.0778,
      "step": 136791
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0188589096069336,
      "learning_rate": 0.00021295288555720282,
      "loss": 2.9236,
      "step": 136792
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3480167388916016,
      "learning_rate": 0.00021294897098302507,
      "loss": 2.7334,
      "step": 136793
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0977752208709717,
      "learning_rate": 0.00021294505642503172,
      "loss": 3.0603,
      "step": 136794
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0775463581085205,
      "learning_rate": 0.0002129411418832236,
      "loss": 3.0054,
      "step": 136795
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.065077543258667,
      "learning_rate": 0.00021293722735760143,
      "loss": 3.0309,
      "step": 136796
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8154758214950562,
      "learning_rate": 0.00021293331284816578,
      "loss": 2.8905,
      "step": 136797
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0123908519744873,
      "learning_rate": 0.0002129293983549175,
      "loss": 3.0492,
      "step": 136798
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.592949628829956,
      "learning_rate": 0.00021292548387785723,
      "loss": 3.2233,
      "step": 136799
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.500047206878662,
      "learning_rate": 0.00021292156941698576,
      "loss": 2.9715,
      "step": 136800
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.7196078300476074,
      "learning_rate": 0.00021291765497230382,
      "loss": 3.1536,
      "step": 136801
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.911378264427185,
      "learning_rate": 0.0002129137405438122,
      "loss": 3.1057,
      "step": 136802
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.2335684299468994,
      "learning_rate": 0.00021290982613151157,
      "loss": 2.7046,
      "step": 136803
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.21003794670105,
      "learning_rate": 0.00021290591173540258,
      "loss": 2.9273,
      "step": 136804
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.479132652282715,
      "learning_rate": 0.00021290199735548605,
      "loss": 3.0902,
      "step": 136805
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.989655017852783,
      "learning_rate": 0.00021289808299176265,
      "loss": 3.0982,
      "step": 136806
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.6780309677124023,
      "learning_rate": 0.00021289416864423317,
      "loss": 2.7946,
      "step": 136807
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.1354284286499023,
      "learning_rate": 0.00021289025431289833,
      "loss": 3.0487,
      "step": 136808
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.168266773223877,
      "learning_rate": 0.0002128863399977588,
      "loss": 2.9287,
      "step": 136809
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8654980659484863,
      "learning_rate": 0.0002128824256988154,
      "loss": 3.1097,
      "step": 136810
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5791659355163574,
      "learning_rate": 0.00021287851141606873,
      "loss": 2.729,
      "step": 136811
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9509238004684448,
      "learning_rate": 0.0002128745971495196,
      "loss": 3.1179,
      "step": 136812
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9413416385650635,
      "learning_rate": 0.00021287068289916873,
      "loss": 2.9084,
      "step": 136813
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.7744956016540527,
      "learning_rate": 0.0002128667686650169,
      "loss": 2.8954,
      "step": 136814
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.559483528137207,
      "learning_rate": 0.0002128628544470647,
      "loss": 3.1889,
      "step": 136815
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0193965435028076,
      "learning_rate": 0.000212858940245313,
      "loss": 3.0041,
      "step": 136816
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.393444776535034,
      "learning_rate": 0.00021285502605976247,
      "loss": 3.142,
      "step": 136817
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.6785337924957275,
      "learning_rate": 0.0002128511118904138,
      "loss": 2.834,
      "step": 136818
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.0992050170898438,
      "learning_rate": 0.00021284719773726773,
      "loss": 3.0788,
      "step": 136819
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.281524896621704,
      "learning_rate": 0.0002128432836003251,
      "loss": 3.026,
      "step": 136820
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4973649978637695,
      "learning_rate": 0.0002128393694795865,
      "loss": 2.7606,
      "step": 136821
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0987071990966797,
      "learning_rate": 0.0002128354553750527,
      "loss": 3.1288,
      "step": 136822
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2502543926239014,
      "learning_rate": 0.00021283154128672437,
      "loss": 2.9909,
      "step": 136823
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.262112140655518,
      "learning_rate": 0.00021282762721460243,
      "loss": 2.9328,
      "step": 136824
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1478726863861084,
      "learning_rate": 0.0002128237131586874,
      "loss": 2.9611,
      "step": 136825
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5795366764068604,
      "learning_rate": 0.00021281979911898016,
      "loss": 2.9623,
      "step": 136826
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.395387887954712,
      "learning_rate": 0.0002128158850954813,
      "loss": 2.8418,
      "step": 136827
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.3237879276275635,
      "learning_rate": 0.00021281197108819158,
      "loss": 3.1853,
      "step": 136828
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.379763126373291,
      "learning_rate": 0.00021280805709711183,
      "loss": 3.0212,
      "step": 136829
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.795912265777588,
      "learning_rate": 0.00021280414312224263,
      "loss": 2.9153,
      "step": 136830
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.217942953109741,
      "learning_rate": 0.00021280022916358483,
      "loss": 2.9704,
      "step": 136831
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.683021068572998,
      "learning_rate": 0.00021279631522113922,
      "loss": 3.0909,
      "step": 136832
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.364150047302246,
      "learning_rate": 0.0002127924012949063,
      "loss": 2.8568,
      "step": 136833
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.174551010131836,
      "learning_rate": 0.00021278848738488692,
      "loss": 2.8714,
      "step": 136834
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0066306591033936,
      "learning_rate": 0.00021278457349108184,
      "loss": 3.2623,
      "step": 136835
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5174500942230225,
      "learning_rate": 0.00021278065961349168,
      "loss": 2.9406,
      "step": 136836
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.2823126316070557,
      "learning_rate": 0.00021277674575211728,
      "loss": 3.0638,
      "step": 136837
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.407876491546631,
      "learning_rate": 0.00021277283190695949,
      "loss": 2.8559,
      "step": 136838
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.144510507583618,
      "learning_rate": 0.00021276891807801873,
      "loss": 2.8829,
      "step": 136839
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.824782609939575,
      "learning_rate": 0.00021276500426529587,
      "loss": 2.9327,
      "step": 136840
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.549729347229004,
      "learning_rate": 0.00021276109046879162,
      "loss": 2.995,
      "step": 136841
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2909462451934814,
      "learning_rate": 0.00021275717668850676,
      "loss": 3.0312,
      "step": 136842
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3387303352355957,
      "learning_rate": 0.00021275326292444197,
      "loss": 3.1225,
      "step": 136843
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9988470077514648,
      "learning_rate": 0.00021274934917659815,
      "loss": 2.9122,
      "step": 136844
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.8393211364746094,
      "learning_rate": 0.0002127454354449757,
      "loss": 2.8974,
      "step": 136845
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.3938522338867188,
      "learning_rate": 0.00021274152172957555,
      "loss": 3.3403,
      "step": 136846
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2252039909362793,
      "learning_rate": 0.00021273760803039842,
      "loss": 2.8928,
      "step": 136847
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9870569705963135,
      "learning_rate": 0.00021273369434744496,
      "loss": 2.8611,
      "step": 136848
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3398807048797607,
      "learning_rate": 0.000212729780680716,
      "loss": 2.7041,
      "step": 136849
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9193177223205566,
      "learning_rate": 0.00021272586703021234,
      "loss": 2.9359,
      "step": 136850
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.4697811603546143,
      "learning_rate": 0.00021272195339593444,
      "loss": 3.0654,
      "step": 136851
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.6110239028930664,
      "learning_rate": 0.00021271803977788317,
      "loss": 3.0868,
      "step": 136852
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.161778688430786,
      "learning_rate": 0.0002127141261760593,
      "loss": 2.8473,
      "step": 136853
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1689369678497314,
      "learning_rate": 0.0002127102125904635,
      "loss": 2.9007,
      "step": 136854
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.451716899871826,
      "learning_rate": 0.00021270629902109652,
      "loss": 3.0355,
      "step": 136855
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.650923728942871,
      "learning_rate": 0.0002127023854679591,
      "loss": 2.9761,
      "step": 136856
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.22830867767334,
      "learning_rate": 0.0002126984719310521,
      "loss": 3.1088,
      "step": 136857
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.275946855545044,
      "learning_rate": 0.00021269455841037592,
      "loss": 3.1796,
      "step": 136858
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.594453811645508,
      "learning_rate": 0.0002126906449059315,
      "loss": 2.9007,
      "step": 136859
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1143198013305664,
      "learning_rate": 0.00021268673141771954,
      "loss": 3.1259,
      "step": 136860
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.818438172340393,
      "learning_rate": 0.00021268281794574077,
      "loss": 3.0686,
      "step": 136861
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.552384853363037,
      "learning_rate": 0.0002126789044899959,
      "loss": 2.8225,
      "step": 136862
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2292115688323975,
      "learning_rate": 0.00021267499105048578,
      "loss": 2.7783,
      "step": 136863
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9456510543823242,
      "learning_rate": 0.00021267107762721094,
      "loss": 2.9058,
      "step": 136864
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.233182907104492,
      "learning_rate": 0.00021266716422017214,
      "loss": 2.9432,
      "step": 136865
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.522214889526367,
      "learning_rate": 0.00021266325082937025,
      "loss": 2.9591,
      "step": 136866
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0853230953216553,
      "learning_rate": 0.00021265933745480582,
      "loss": 3.0045,
      "step": 136867
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.22663950920105,
      "learning_rate": 0.0002126554240964797,
      "loss": 3.0758,
      "step": 136868
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.343045949935913,
      "learning_rate": 0.00021265151075439273,
      "loss": 3.0116,
      "step": 136869
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4230005741119385,
      "learning_rate": 0.00021264759742854535,
      "loss": 2.9119,
      "step": 136870
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0078208446502686,
      "learning_rate": 0.00021264368411893844,
      "loss": 2.92,
      "step": 136871
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1249806880950928,
      "learning_rate": 0.0002126397708255727,
      "loss": 3.1399,
      "step": 136872
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3891844749450684,
      "learning_rate": 0.0002126358575484489,
      "loss": 3.1876,
      "step": 136873
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9684900045394897,
      "learning_rate": 0.00021263194428756773,
      "loss": 2.7522,
      "step": 136874
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.6612350940704346,
      "learning_rate": 0.00021262803104293002,
      "loss": 3.148,
      "step": 136875
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2038211822509766,
      "learning_rate": 0.00021262411781453635,
      "loss": 3.2822,
      "step": 136876
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8293251991271973,
      "learning_rate": 0.00021262020460238748,
      "loss": 3.0589,
      "step": 136877
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0543415546417236,
      "learning_rate": 0.00021261629140648412,
      "loss": 2.9085,
      "step": 136878
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.30051851272583,
      "learning_rate": 0.00021261237822682713,
      "loss": 3.0601,
      "step": 136879
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.172619581222534,
      "learning_rate": 0.00021260846506341707,
      "loss": 2.8878,
      "step": 136880
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8828243017196655,
      "learning_rate": 0.0002126045519162549,
      "loss": 2.9872,
      "step": 136881
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.0364341735839844,
      "learning_rate": 0.0002126006387853411,
      "loss": 3.0036,
      "step": 136882
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.319305181503296,
      "learning_rate": 0.00021259672567067644,
      "loss": 2.8439,
      "step": 136883
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0217621326446533,
      "learning_rate": 0.0002125928125722617,
      "loss": 2.9927,
      "step": 136884
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3439600467681885,
      "learning_rate": 0.00021258889949009763,
      "loss": 2.9386,
      "step": 136885
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5161421298980713,
      "learning_rate": 0.00021258498642418494,
      "loss": 2.8942,
      "step": 136886
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.298447847366333,
      "learning_rate": 0.0002125810733745245,
      "loss": 2.8825,
      "step": 136887
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.292668581008911,
      "learning_rate": 0.00021257716034111672,
      "loss": 3.1679,
      "step": 136888
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5141749382019043,
      "learning_rate": 0.00021257324732396247,
      "loss": 2.7884,
      "step": 136889
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1444735527038574,
      "learning_rate": 0.00021256933432306255,
      "loss": 3.0454,
      "step": 136890
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8800263404846191,
      "learning_rate": 0.00021256542133841761,
      "loss": 3.1247,
      "step": 136891
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.975120186805725,
      "learning_rate": 0.00021256150837002842,
      "loss": 2.8705,
      "step": 136892
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5008602142333984,
      "learning_rate": 0.00021255759541789578,
      "loss": 3.0222,
      "step": 136893
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.039112091064453,
      "learning_rate": 0.00021255368248202034,
      "loss": 2.6191,
      "step": 136894
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2071170806884766,
      "learning_rate": 0.0002125497695624027,
      "loss": 2.9147,
      "step": 136895
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5604615211486816,
      "learning_rate": 0.00021254585665904374,
      "loss": 2.9425,
      "step": 136896
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.67809796333313,
      "learning_rate": 0.00021254194377194418,
      "loss": 3.1464,
      "step": 136897
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5841774940490723,
      "learning_rate": 0.00021253803090110472,
      "loss": 3.0451,
      "step": 136898
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5965211391448975,
      "learning_rate": 0.00021253411804652615,
      "loss": 3.0201,
      "step": 136899
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.6672587394714355,
      "learning_rate": 0.00021253020520820904,
      "loss": 3.4028,
      "step": 136900
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.27909779548645,
      "learning_rate": 0.00021252629238615426,
      "loss": 2.9868,
      "step": 136901
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.992745041847229,
      "learning_rate": 0.0002125223795803625,
      "loss": 2.9329,
      "step": 136902
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9625036716461182,
      "learning_rate": 0.00021251846679083447,
      "loss": 3.0289,
      "step": 136903
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3041021823883057,
      "learning_rate": 0.0002125145540175709,
      "loss": 3.1655,
      "step": 136904
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9607447385787964,
      "learning_rate": 0.0002125106412605726,
      "loss": 2.734,
      "step": 136905
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.38557505607605,
      "learning_rate": 0.00021250672851984013,
      "loss": 2.9005,
      "step": 136906
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.7815824747085571,
      "learning_rate": 0.00021250281579537433,
      "loss": 2.9303,
      "step": 136907
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.7244949340820312,
      "learning_rate": 0.0002124989030871759,
      "loss": 2.8523,
      "step": 136908
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8854690790176392,
      "learning_rate": 0.00021249499039524563,
      "loss": 3.2408,
      "step": 136909
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.538153648376465,
      "learning_rate": 0.00021249107771958414,
      "loss": 2.8567,
      "step": 136910
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.355374813079834,
      "learning_rate": 0.0002124871650601923,
      "loss": 3.0498,
      "step": 136911
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.474761962890625,
      "learning_rate": 0.00021248325241707064,
      "loss": 2.9032,
      "step": 136912
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1614255905151367,
      "learning_rate": 0.00021247933979022,
      "loss": 3.019,
      "step": 136913
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.868423342704773,
      "learning_rate": 0.00021247542717964114,
      "loss": 2.918,
      "step": 136914
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1109955310821533,
      "learning_rate": 0.0002124715145853347,
      "loss": 3.0361,
      "step": 136915
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.641357421875,
      "learning_rate": 0.00021246760200730155,
      "loss": 3.1204,
      "step": 136916
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8746005296707153,
      "learning_rate": 0.00021246368944554235,
      "loss": 2.8947,
      "step": 136917
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2062690258026123,
      "learning_rate": 0.0002124597769000577,
      "loss": 3.2243,
      "step": 136918
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.987048864364624,
      "learning_rate": 0.00021245586437084843,
      "loss": 3.1426,
      "step": 136919
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9670952558517456,
      "learning_rate": 0.0002124519518579153,
      "loss": 2.9873,
      "step": 136920
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.259706497192383,
      "learning_rate": 0.000212448039361259,
      "loss": 2.8948,
      "step": 136921
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8652139902114868,
      "learning_rate": 0.00021244412688088023,
      "loss": 2.8615,
      "step": 136922
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.097193479537964,
      "learning_rate": 0.00021244021441677976,
      "loss": 3.0016,
      "step": 136923
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8851815462112427,
      "learning_rate": 0.00021243630196895845,
      "loss": 2.8135,
      "step": 136924
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8789142370224,
      "learning_rate": 0.00021243238953741676,
      "loss": 2.91,
      "step": 136925
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.682310104370117,
      "learning_rate": 0.00021242847712215553,
      "loss": 3.0628,
      "step": 136926
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1154167652130127,
      "learning_rate": 0.0002124245647231755,
      "loss": 3.1934,
      "step": 136927
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8862512111663818,
      "learning_rate": 0.0002124206523404774,
      "loss": 2.6209,
      "step": 136928
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5414791107177734,
      "learning_rate": 0.00021241673997406195,
      "loss": 3.1672,
      "step": 136929
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.847177028656006,
      "learning_rate": 0.00021241282762392996,
      "loss": 3.1364,
      "step": 136930
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.150177001953125,
      "learning_rate": 0.000212408915290082,
      "loss": 2.9146,
      "step": 136931
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.116777181625366,
      "learning_rate": 0.0002124050029725189,
      "loss": 2.6632,
      "step": 136932
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.1826624870300293,
      "learning_rate": 0.00021240109067124135,
      "loss": 3.3063,
      "step": 136933
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3865549564361572,
      "learning_rate": 0.00021239717838625005,
      "loss": 2.9769,
      "step": 136934
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1505353450775146,
      "learning_rate": 0.0002123932661175458,
      "loss": 3.0403,
      "step": 136935
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4110405445098877,
      "learning_rate": 0.00021238935386512942,
      "loss": 2.9571,
      "step": 136936
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3776395320892334,
      "learning_rate": 0.0002123854416290014,
      "loss": 2.93,
      "step": 136937
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9934648275375366,
      "learning_rate": 0.00021238152940916254,
      "loss": 3.1093,
      "step": 136938
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3106577396392822,
      "learning_rate": 0.00021237761720561363,
      "loss": 3.1801,
      "step": 136939
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9522784948349,
      "learning_rate": 0.00021237370501835543,
      "loss": 3.2079,
      "step": 136940
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.7316930294036865,
      "learning_rate": 0.00021236979284738853,
      "loss": 2.9152,
      "step": 136941
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.6202166080474854,
      "learning_rate": 0.0002123658806927139,
      "loss": 3.2244,
      "step": 136942
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9960074424743652,
      "learning_rate": 0.00021236196855433197,
      "loss": 2.9321,
      "step": 136943
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8693574666976929,
      "learning_rate": 0.00021235805643224365,
      "loss": 2.9638,
      "step": 136944
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1416282653808594,
      "learning_rate": 0.00021235414432644955,
      "loss": 3.0085,
      "step": 136945
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0702669620513916,
      "learning_rate": 0.00021235023223695052,
      "loss": 3.1486,
      "step": 136946
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1372270584106445,
      "learning_rate": 0.0002123463201637472,
      "loss": 3.1443,
      "step": 136947
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0108280181884766,
      "learning_rate": 0.00021234240810684052,
      "loss": 2.9117,
      "step": 136948
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.880858898162842,
      "learning_rate": 0.00021233849606623088,
      "loss": 3.0175,
      "step": 136949
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0864975452423096,
      "learning_rate": 0.0002123345840419192,
      "loss": 3.0301,
      "step": 136950
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.6084017753601074,
      "learning_rate": 0.00021233067203390617,
      "loss": 2.8864,
      "step": 136951
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3575778007507324,
      "learning_rate": 0.00021232676004219247,
      "loss": 3.1124,
      "step": 136952
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1051735877990723,
      "learning_rate": 0.00021232284806677892,
      "loss": 2.7725,
      "step": 136953
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.052725315093994,
      "learning_rate": 0.00021231893610766634,
      "loss": 2.8353,
      "step": 136954
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.490654945373535,
      "learning_rate": 0.0002123150241648552,
      "loss": 2.9005,
      "step": 136955
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0539560317993164,
      "learning_rate": 0.00021231111223834632,
      "loss": 2.9986,
      "step": 136956
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.7391936779022217,
      "learning_rate": 0.0002123072003281405,
      "loss": 3.0912,
      "step": 136957
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3854522705078125,
      "learning_rate": 0.00021230328843423844,
      "loss": 2.8044,
      "step": 136958
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.817023754119873,
      "learning_rate": 0.00021229937655664082,
      "loss": 2.9017,
      "step": 136959
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3616037368774414,
      "learning_rate": 0.00021229546469534854,
      "loss": 2.9562,
      "step": 136960
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.6197283267974854,
      "learning_rate": 0.00021229155285036204,
      "loss": 2.7155,
      "step": 136961
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2452428340911865,
      "learning_rate": 0.00021228764102168224,
      "loss": 3.2127,
      "step": 136962
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.524710178375244,
      "learning_rate": 0.00021228372920930978,
      "loss": 2.874,
      "step": 136963
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1988372802734375,
      "learning_rate": 0.00021227981741324544,
      "loss": 2.8528,
      "step": 136964
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.230825424194336,
      "learning_rate": 0.00021227590563348996,
      "loss": 3.0329,
      "step": 136965
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.082961320877075,
      "learning_rate": 0.0002122719938700441,
      "loss": 2.7478,
      "step": 136966
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0675015449523926,
      "learning_rate": 0.0002122680821229085,
      "loss": 2.7665,
      "step": 136967
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4368221759796143,
      "learning_rate": 0.0002122641703920839,
      "loss": 2.9531,
      "step": 136968
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.0310332775115967,
      "learning_rate": 0.000212260258677571,
      "loss": 3.0767,
      "step": 136969
    },
    {
      "epoch": 1.78,
      "grad_norm": 5.305634021759033,
      "learning_rate": 0.0002122563469793706,
      "loss": 2.9373,
      "step": 136970
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.2920632362365723,
      "learning_rate": 0.0002122524352974834,
      "loss": 2.62,
      "step": 136971
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.3629257678985596,
      "learning_rate": 0.00021224852363191027,
      "loss": 2.9127,
      "step": 136972
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.486020803451538,
      "learning_rate": 0.00021224461198265164,
      "loss": 3.0595,
      "step": 136973
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4145216941833496,
      "learning_rate": 0.00021224070034970842,
      "loss": 3.0927,
      "step": 136974
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.830198764801025,
      "learning_rate": 0.00021223678873308132,
      "loss": 2.8125,
      "step": 136975
    },
    {
      "epoch": 1.78,
      "grad_norm": 7.581862449645996,
      "learning_rate": 0.000212232877132771,
      "loss": 2.8117,
      "step": 136976
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.583261251449585,
      "learning_rate": 0.00021222896554877832,
      "loss": 2.9728,
      "step": 136977
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8938871622085571,
      "learning_rate": 0.00021222505398110395,
      "loss": 3.0908,
      "step": 136978
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0747928619384766,
      "learning_rate": 0.0002122211424297486,
      "loss": 2.7789,
      "step": 136979
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.3220431804656982,
      "learning_rate": 0.00021221723089471294,
      "loss": 3.2223,
      "step": 136980
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.4822659492492676,
      "learning_rate": 0.00021221331937599776,
      "loss": 2.96,
      "step": 136981
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.988739013671875,
      "learning_rate": 0.00021220940787360378,
      "loss": 3.0059,
      "step": 136982
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.019885778427124,
      "learning_rate": 0.0002122054963875317,
      "loss": 2.995,
      "step": 136983
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.826874256134033,
      "learning_rate": 0.00021220158491778242,
      "loss": 2.9716,
      "step": 136984
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2102890014648438,
      "learning_rate": 0.0002121976734643564,
      "loss": 2.8541,
      "step": 136985
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.623677968978882,
      "learning_rate": 0.0002121937620272545,
      "loss": 3.0939,
      "step": 136986
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.043470859527588,
      "learning_rate": 0.00021218985060647745,
      "loss": 3.2485,
      "step": 136987
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.250866174697876,
      "learning_rate": 0.00021218593920202595,
      "loss": 2.6771,
      "step": 136988
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.9940125942230225,
      "learning_rate": 0.00021218202781390073,
      "loss": 3.2861,
      "step": 136989
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.0973961353302,
      "learning_rate": 0.00021217811644210253,
      "loss": 3.0358,
      "step": 136990
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0498735904693604,
      "learning_rate": 0.00021217420508663217,
      "loss": 3.1736,
      "step": 136991
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.620706796646118,
      "learning_rate": 0.0002121702937474902,
      "loss": 3.1098,
      "step": 136992
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.901454210281372,
      "learning_rate": 0.00021216638242467742,
      "loss": 2.989,
      "step": 136993
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8315480947494507,
      "learning_rate": 0.0002121624711181946,
      "loss": 3.0326,
      "step": 136994
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.092043399810791,
      "learning_rate": 0.00021215855982804243,
      "loss": 2.6606,
      "step": 136995
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9533127546310425,
      "learning_rate": 0.00021215464855422163,
      "loss": 3.0101,
      "step": 136996
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.8192520141601562,
      "learning_rate": 0.00021215073729673298,
      "loss": 3.1164,
      "step": 136997
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9890230894088745,
      "learning_rate": 0.0002121468260555771,
      "loss": 3.0247,
      "step": 136998
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.229801893234253,
      "learning_rate": 0.00021214291483075479,
      "loss": 2.8887,
      "step": 136999
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8906819820404053,
      "learning_rate": 0.00021213900362226682,
      "loss": 2.8857,
      "step": 137000
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9817408323287964,
      "learning_rate": 0.00021213509243011387,
      "loss": 2.9002,
      "step": 137001
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.133883237838745,
      "learning_rate": 0.0002121311812542966,
      "loss": 3.0011,
      "step": 137002
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.4737112522125244,
      "learning_rate": 0.0002121272700948159,
      "loss": 2.9755,
      "step": 137003
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.155733585357666,
      "learning_rate": 0.0002121233589516723,
      "loss": 3.226,
      "step": 137004
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.409752130508423,
      "learning_rate": 0.00021211944782486666,
      "loss": 3.1725,
      "step": 137005
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8936333656311035,
      "learning_rate": 0.00021211553671439965,
      "loss": 3.0963,
      "step": 137006
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.0988714694976807,
      "learning_rate": 0.00021211162562027208,
      "loss": 3.0615,
      "step": 137007
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.347023010253906,
      "learning_rate": 0.0002121077145424846,
      "loss": 3.002,
      "step": 137008
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2047715187072754,
      "learning_rate": 0.000212103803481038,
      "loss": 3.0685,
      "step": 137009
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1151609420776367,
      "learning_rate": 0.0002120998924359329,
      "loss": 3.0021,
      "step": 137010
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.354999303817749,
      "learning_rate": 0.00021209598140717003,
      "loss": 2.7253,
      "step": 137011
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.598811149597168,
      "learning_rate": 0.00021209207039475024,
      "loss": 2.8987,
      "step": 137012
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.138082265853882,
      "learning_rate": 0.0002120881593986742,
      "loss": 2.9456,
      "step": 137013
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.356173038482666,
      "learning_rate": 0.0002120842484189426,
      "loss": 2.9714,
      "step": 137014
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0001535415649414,
      "learning_rate": 0.0002120803374555563,
      "loss": 3.3418,
      "step": 137015
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.3894476890563965,
      "learning_rate": 0.00021207642650851582,
      "loss": 2.9011,
      "step": 137016
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.7837932109832764,
      "learning_rate": 0.00021207251557782202,
      "loss": 2.9211,
      "step": 137017
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8242321014404297,
      "learning_rate": 0.00021206860466347558,
      "loss": 3.0622,
      "step": 137018
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2256288528442383,
      "learning_rate": 0.00021206469376547723,
      "loss": 2.8748,
      "step": 137019
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.3188507556915283,
      "learning_rate": 0.00021206078288382775,
      "loss": 2.9174,
      "step": 137020
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.179708003997803,
      "learning_rate": 0.00021205687201852793,
      "loss": 2.8533,
      "step": 137021
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.6006593704223633,
      "learning_rate": 0.0002120529611695783,
      "loss": 3.1193,
      "step": 137022
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.033257007598877,
      "learning_rate": 0.00021204905033697966,
      "loss": 2.7948,
      "step": 137023
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5702922344207764,
      "learning_rate": 0.00021204513952073276,
      "loss": 3.0158,
      "step": 137024
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.348325490951538,
      "learning_rate": 0.00021204122872083833,
      "loss": 3.055,
      "step": 137025
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8581798076629639,
      "learning_rate": 0.00021203731793729708,
      "loss": 2.8851,
      "step": 137026
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0099563598632812,
      "learning_rate": 0.0002120334071701099,
      "loss": 2.9429,
      "step": 137027
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4663736820220947,
      "learning_rate": 0.0002120294964192772,
      "loss": 2.9863,
      "step": 137028
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.531428098678589,
      "learning_rate": 0.00021202558568479997,
      "loss": 3.1442,
      "step": 137029
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.144768714904785,
      "learning_rate": 0.00021202167496667878,
      "loss": 3.0084,
      "step": 137030
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8887104988098145,
      "learning_rate": 0.0002120177642649144,
      "loss": 3.2524,
      "step": 137031
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.612466812133789,
      "learning_rate": 0.00021201385357950763,
      "loss": 2.904,
      "step": 137032
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3769893646240234,
      "learning_rate": 0.00021200994291045925,
      "loss": 3.007,
      "step": 137033
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1891047954559326,
      "learning_rate": 0.00021200603225776977,
      "loss": 3.2731,
      "step": 137034
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.055738687515259,
      "learning_rate": 0.00021200212162144,
      "loss": 2.7855,
      "step": 137035
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.258615493774414,
      "learning_rate": 0.00021199821100147077,
      "loss": 3.0572,
      "step": 137036
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.176070213317871,
      "learning_rate": 0.00021199430039786266,
      "loss": 2.8662,
      "step": 137037
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.933145761489868,
      "learning_rate": 0.0002119903898106165,
      "loss": 3.1196,
      "step": 137038
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.879822850227356,
      "learning_rate": 0.00021198647923973308,
      "loss": 3.0086,
      "step": 137039
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.601585626602173,
      "learning_rate": 0.00021198256868521295,
      "loss": 3.0006,
      "step": 137040
    },
    {
      "epoch": 1.78,
      "grad_norm": 4.0026044845581055,
      "learning_rate": 0.00021197865814705694,
      "loss": 3.2093,
      "step": 137041
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.584444046020508,
      "learning_rate": 0.0002119747476252657,
      "loss": 2.9589,
      "step": 137042
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9168453216552734,
      "learning_rate": 0.00021197083711984007,
      "loss": 2.8658,
      "step": 137043
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2387211322784424,
      "learning_rate": 0.00021196692663078066,
      "loss": 3.0826,
      "step": 137044
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.257138729095459,
      "learning_rate": 0.00021196301615808845,
      "loss": 3.0553,
      "step": 137045
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4846975803375244,
      "learning_rate": 0.0002119591057017638,
      "loss": 2.7805,
      "step": 137046
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1618998050689697,
      "learning_rate": 0.00021195519526180764,
      "loss": 2.8787,
      "step": 137047
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9912214279174805,
      "learning_rate": 0.00021195128483822068,
      "loss": 2.9332,
      "step": 137048
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.3118879795074463,
      "learning_rate": 0.0002119473744310036,
      "loss": 3.0337,
      "step": 137049
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.053405523300171,
      "learning_rate": 0.0002119434640401572,
      "loss": 2.8334,
      "step": 137050
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.045077323913574,
      "learning_rate": 0.00021193955366568228,
      "loss": 2.9751,
      "step": 137051
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.0887248516082764,
      "learning_rate": 0.00021193564330757934,
      "loss": 3.2638,
      "step": 137052
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.6187078952789307,
      "learning_rate": 0.00021193173296584923,
      "loss": 2.9743,
      "step": 137053
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.161916494369507,
      "learning_rate": 0.00021192782264049266,
      "loss": 2.7914,
      "step": 137054
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.9442403316497803,
      "learning_rate": 0.00021192391233151037,
      "loss": 3.168,
      "step": 137055
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.560123920440674,
      "learning_rate": 0.00021192000203890313,
      "loss": 3.1688,
      "step": 137056
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.983515739440918,
      "learning_rate": 0.00021191609176267158,
      "loss": 2.9059,
      "step": 137057
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8084733486175537,
      "learning_rate": 0.00021191218150281666,
      "loss": 2.7998,
      "step": 137058
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4925293922424316,
      "learning_rate": 0.00021190827125933874,
      "loss": 3.0008,
      "step": 137059
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.5387041568756104,
      "learning_rate": 0.00021190436103223878,
      "loss": 2.7504,
      "step": 137060
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.659816265106201,
      "learning_rate": 0.0002119004508215175,
      "loss": 2.9776,
      "step": 137061
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9815173149108887,
      "learning_rate": 0.0002118965406271755,
      "loss": 3.0073,
      "step": 137062
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.6508071422576904,
      "learning_rate": 0.0002118926304492136,
      "loss": 2.9216,
      "step": 137063
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.679819107055664,
      "learning_rate": 0.0002118887202876327,
      "loss": 2.9499,
      "step": 137064
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1326608657836914,
      "learning_rate": 0.00021188481014243322,
      "loss": 2.9179,
      "step": 137065
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.7065765857696533,
      "learning_rate": 0.000211880900013616,
      "loss": 3.0212,
      "step": 137066
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0022828578948975,
      "learning_rate": 0.0002118769899011818,
      "loss": 2.9164,
      "step": 137067
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9174517393112183,
      "learning_rate": 0.00021187307980513132,
      "loss": 2.7719,
      "step": 137068
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.4751992225646973,
      "learning_rate": 0.0002118691697254653,
      "loss": 2.9333,
      "step": 137069
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.081085443496704,
      "learning_rate": 0.00021186525966218454,
      "loss": 2.8181,
      "step": 137070
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1881957054138184,
      "learning_rate": 0.0002118613496152897,
      "loss": 2.882,
      "step": 137071
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.043347120285034,
      "learning_rate": 0.0002118574395847814,
      "loss": 3.0611,
      "step": 137072
    },
    {
      "epoch": 1.78,
      "grad_norm": 3.5804061889648438,
      "learning_rate": 0.00021185352957066043,
      "loss": 2.9048,
      "step": 137073
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0878117084503174,
      "learning_rate": 0.00021184961957292764,
      "loss": 2.9042,
      "step": 137074
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4360551834106445,
      "learning_rate": 0.00021184570959158363,
      "loss": 2.7884,
      "step": 137075
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8845402002334595,
      "learning_rate": 0.00021184179962662923,
      "loss": 3.0326,
      "step": 137076
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.525825023651123,
      "learning_rate": 0.00021183788967806504,
      "loss": 3.2126,
      "step": 137077
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.4858956336975098,
      "learning_rate": 0.00021183397974589192,
      "loss": 3.0213,
      "step": 137078
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.603513479232788,
      "learning_rate": 0.00021183006983011045,
      "loss": 2.9903,
      "step": 137079
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.063509941101074,
      "learning_rate": 0.00021182615993072147,
      "loss": 3.166,
      "step": 137080
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.6757724285125732,
      "learning_rate": 0.00021182225004772563,
      "loss": 2.9296,
      "step": 137081
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9468032121658325,
      "learning_rate": 0.00021181834018112376,
      "loss": 2.794,
      "step": 137082
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.0776143074035645,
      "learning_rate": 0.0002118144303309165,
      "loss": 3.1387,
      "step": 137083
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.013262987136841,
      "learning_rate": 0.00021181052049710458,
      "loss": 3.2282,
      "step": 137084
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.9196596145629883,
      "learning_rate": 0.00021180661067968877,
      "loss": 2.9771,
      "step": 137085
    },
    {
      "epoch": 1.78,
      "grad_norm": 1.8951969146728516,
      "learning_rate": 0.00021180270087866983,
      "loss": 2.9966,
      "step": 137086
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.621074676513672,
      "learning_rate": 0.00021179879109404834,
      "loss": 2.8057,
      "step": 137087
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.2223219871520996,
      "learning_rate": 0.00021179488132582518,
      "loss": 2.8527,
      "step": 137088
    },
    {
      "epoch": 1.78,
      "grad_norm": 2.1411242485046387,
      "learning_rate": 0.000211790971574001,
      "loss": 3.1401,
      "step": 137089
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.062645196914673,
      "learning_rate": 0.0002117870618385765,
      "loss": 3.2244,
      "step": 137090
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.7037603855133057,
      "learning_rate": 0.00021178315211955247,
      "loss": 3.0452,
      "step": 137091
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.858875036239624,
      "learning_rate": 0.00021177924241692962,
      "loss": 3.0208,
      "step": 137092
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.045140266418457,
      "learning_rate": 0.00021177533273070872,
      "loss": 2.9637,
      "step": 137093
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.314911365509033,
      "learning_rate": 0.0002117714230608905,
      "loss": 2.8655,
      "step": 137094
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1550023555755615,
      "learning_rate": 0.00021176751340747552,
      "loss": 3.1838,
      "step": 137095
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.249025344848633,
      "learning_rate": 0.00021176360377046465,
      "loss": 2.868,
      "step": 137096
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3573851585388184,
      "learning_rate": 0.00021175969414985858,
      "loss": 2.909,
      "step": 137097
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0362119674682617,
      "learning_rate": 0.00021175578454565803,
      "loss": 3.0194,
      "step": 137098
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.225864887237549,
      "learning_rate": 0.0002117518749578638,
      "loss": 3.0028,
      "step": 137099
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0636703968048096,
      "learning_rate": 0.00021174796538647665,
      "loss": 2.8147,
      "step": 137100
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9350461959838867,
      "learning_rate": 0.0002117440558314971,
      "loss": 2.6487,
      "step": 137101
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9236582517623901,
      "learning_rate": 0.00021174014629292602,
      "loss": 2.8841,
      "step": 137102
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9715960025787354,
      "learning_rate": 0.00021173623677076407,
      "loss": 2.9554,
      "step": 137103
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.514132261276245,
      "learning_rate": 0.00021173232726501206,
      "loss": 2.9931,
      "step": 137104
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.524275302886963,
      "learning_rate": 0.00021172841777567061,
      "loss": 3.1477,
      "step": 137105
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.247999668121338,
      "learning_rate": 0.0002117245083027407,
      "loss": 2.7716,
      "step": 137106
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3197720050811768,
      "learning_rate": 0.00021172059884622276,
      "loss": 3.0257,
      "step": 137107
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.415478229522705,
      "learning_rate": 0.00021171668940611762,
      "loss": 2.7332,
      "step": 137108
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2471678256988525,
      "learning_rate": 0.00021171277998242597,
      "loss": 3.0941,
      "step": 137109
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.091705560684204,
      "learning_rate": 0.0002117088705751486,
      "loss": 2.9921,
      "step": 137110
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3237338066101074,
      "learning_rate": 0.00021170496118428625,
      "loss": 2.9925,
      "step": 137111
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.922238826751709,
      "learning_rate": 0.00021170105180983966,
      "loss": 2.7434,
      "step": 137112
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9959444999694824,
      "learning_rate": 0.00021169714245180944,
      "loss": 2.899,
      "step": 137113
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.24542236328125,
      "learning_rate": 0.00021169323311019642,
      "loss": 3.1182,
      "step": 137114
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3880832195281982,
      "learning_rate": 0.00021168932378500126,
      "loss": 2.905,
      "step": 137115
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.9792630672454834,
      "learning_rate": 0.0002116854144762247,
      "loss": 2.8273,
      "step": 137116
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4443283081054688,
      "learning_rate": 0.0002116815051838675,
      "loss": 3.0443,
      "step": 137117
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2941086292266846,
      "learning_rate": 0.0002116775959079305,
      "loss": 3.0672,
      "step": 137118
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.221001386642456,
      "learning_rate": 0.00021167368664841418,
      "loss": 2.9193,
      "step": 137119
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.8614234924316406,
      "learning_rate": 0.00021166977740531942,
      "loss": 2.8158,
      "step": 137120
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9639266729354858,
      "learning_rate": 0.00021166586817864685,
      "loss": 2.8517,
      "step": 137121
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.982093095779419,
      "learning_rate": 0.00021166195896839734,
      "loss": 2.8617,
      "step": 137122
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.7726709842681885,
      "learning_rate": 0.0002116580497745715,
      "loss": 2.9944,
      "step": 137123
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.790511131286621,
      "learning_rate": 0.0002116541405971701,
      "loss": 3.1963,
      "step": 137124
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2223494052886963,
      "learning_rate": 0.000211650231436194,
      "loss": 2.8084,
      "step": 137125
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4296071529388428,
      "learning_rate": 0.00021164632229164364,
      "loss": 3.22,
      "step": 137126
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.163492202758789,
      "learning_rate": 0.0002116424131635199,
      "loss": 2.9886,
      "step": 137127
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.6979169845581055,
      "learning_rate": 0.00021163850405182352,
      "loss": 3.0109,
      "step": 137128
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.181354284286499,
      "learning_rate": 0.00021163459495655517,
      "loss": 3.0167,
      "step": 137129
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.336986780166626,
      "learning_rate": 0.00021163068587771568,
      "loss": 2.6973,
      "step": 137130
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.644186019897461,
      "learning_rate": 0.00021162677681530582,
      "loss": 2.9105,
      "step": 137131
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0271286964416504,
      "learning_rate": 0.00021162286776932606,
      "loss": 3.1542,
      "step": 137132
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1087820529937744,
      "learning_rate": 0.00021161895873977727,
      "loss": 2.9796,
      "step": 137133
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0598387718200684,
      "learning_rate": 0.00021161504972666022,
      "loss": 3.0663,
      "step": 137134
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1536178588867188,
      "learning_rate": 0.00021161114072997558,
      "loss": 2.9621,
      "step": 137135
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9490073919296265,
      "learning_rate": 0.00021160723174972412,
      "loss": 3.0358,
      "step": 137136
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2168116569519043,
      "learning_rate": 0.0002116033227859066,
      "loss": 3.2432,
      "step": 137137
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4053797721862793,
      "learning_rate": 0.00021159941383852362,
      "loss": 2.8923,
      "step": 137138
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.6061437129974365,
      "learning_rate": 0.000211595504907576,
      "loss": 3.0047,
      "step": 137139
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9108144044876099,
      "learning_rate": 0.00021159159599306443,
      "loss": 3.0512,
      "step": 137140
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.913996696472168,
      "learning_rate": 0.0002115876870949896,
      "loss": 2.8431,
      "step": 137141
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0443787574768066,
      "learning_rate": 0.00021158377821335237,
      "loss": 2.8601,
      "step": 137142
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.97164249420166,
      "learning_rate": 0.00021157986934815346,
      "loss": 2.7589,
      "step": 137143
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.9995555877685547,
      "learning_rate": 0.00021157596049939338,
      "loss": 2.7084,
      "step": 137144
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0771994590759277,
      "learning_rate": 0.00021157205166707303,
      "loss": 2.8832,
      "step": 137145
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.321160316467285,
      "learning_rate": 0.00021156814285119312,
      "loss": 2.7513,
      "step": 137146
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.8174383640289307,
      "learning_rate": 0.00021156423405175435,
      "loss": 2.8773,
      "step": 137147
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8093219995498657,
      "learning_rate": 0.0002115603252687574,
      "loss": 2.8001,
      "step": 137148
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.032423496246338,
      "learning_rate": 0.00021155641650220323,
      "loss": 3.0432,
      "step": 137149
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.103713274002075,
      "learning_rate": 0.0002115525077520923,
      "loss": 3.0841,
      "step": 137150
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0877535343170166,
      "learning_rate": 0.00021154859901842537,
      "loss": 2.7763,
      "step": 137151
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.067391872406006,
      "learning_rate": 0.00021154469030120322,
      "loss": 2.932,
      "step": 137152
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.248919725418091,
      "learning_rate": 0.00021154078160042666,
      "loss": 2.9961,
      "step": 137153
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.653965711593628,
      "learning_rate": 0.00021153687291609625,
      "loss": 2.9387,
      "step": 137154
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2811520099639893,
      "learning_rate": 0.00021153296424821289,
      "loss": 3.1465,
      "step": 137155
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1113955974578857,
      "learning_rate": 0.00021152905559677725,
      "loss": 3.0404,
      "step": 137156
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9728268384933472,
      "learning_rate": 0.00021152514696178996,
      "loss": 2.9249,
      "step": 137157
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4045796394348145,
      "learning_rate": 0.00021152123834325182,
      "loss": 2.9934,
      "step": 137158
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2172365188598633,
      "learning_rate": 0.00021151732974116353,
      "loss": 2.8931,
      "step": 137159
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9750754833221436,
      "learning_rate": 0.00021151342115552585,
      "loss": 3.0737,
      "step": 137160
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0314483642578125,
      "learning_rate": 0.00021150951258633958,
      "loss": 2.9803,
      "step": 137161
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.6725680828094482,
      "learning_rate": 0.00021150560403360524,
      "loss": 2.9436,
      "step": 137162
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.0524659156799316,
      "learning_rate": 0.00021150169549732377,
      "loss": 3.089,
      "step": 137163
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.257906198501587,
      "learning_rate": 0.00021149778697749576,
      "loss": 2.9749,
      "step": 137164
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.009664297103882,
      "learning_rate": 0.00021149387847412197,
      "loss": 3.1547,
      "step": 137165
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.542752504348755,
      "learning_rate": 0.00021148996998720317,
      "loss": 3.2883,
      "step": 137166
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.234355926513672,
      "learning_rate": 0.00021148606151674007,
      "loss": 3.067,
      "step": 137167
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1843628883361816,
      "learning_rate": 0.0002114821530627333,
      "loss": 2.9516,
      "step": 137168
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.038954257965088,
      "learning_rate": 0.00021147824462518374,
      "loss": 3.2084,
      "step": 137169
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9731067419052124,
      "learning_rate": 0.00021147433620409206,
      "loss": 3.0007,
      "step": 137170
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.386904001235962,
      "learning_rate": 0.00021147042779945892,
      "loss": 3.0461,
      "step": 137171
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.246927499771118,
      "learning_rate": 0.00021146651941128508,
      "loss": 2.7664,
      "step": 137172
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.02640962600708,
      "learning_rate": 0.0002114626110395714,
      "loss": 3.2359,
      "step": 137173
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.068516492843628,
      "learning_rate": 0.0002114587026843184,
      "loss": 3.2127,
      "step": 137174
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.306323766708374,
      "learning_rate": 0.00021145479434552688,
      "loss": 3.1676,
      "step": 137175
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9054815769195557,
      "learning_rate": 0.00021145088602319762,
      "loss": 2.795,
      "step": 137176
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2171218395233154,
      "learning_rate": 0.00021144697771733134,
      "loss": 2.9414,
      "step": 137177
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.7776530981063843,
      "learning_rate": 0.0002114430694279287,
      "loss": 2.8494,
      "step": 137178
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1391959190368652,
      "learning_rate": 0.0002114391611549905,
      "loss": 3.0528,
      "step": 137179
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.213928461074829,
      "learning_rate": 0.00021143525289851745,
      "loss": 2.8086,
      "step": 137180
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.609546184539795,
      "learning_rate": 0.00021143134465851016,
      "loss": 3.2304,
      "step": 137181
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2717700004577637,
      "learning_rate": 0.0002114274364349695,
      "loss": 2.8039,
      "step": 137182
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.350367307662964,
      "learning_rate": 0.00021142352822789613,
      "loss": 2.7077,
      "step": 137183
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.124027729034424,
      "learning_rate": 0.00021141962003729081,
      "loss": 3.1685,
      "step": 137184
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.806346893310547,
      "learning_rate": 0.0002114157118631544,
      "loss": 2.9733,
      "step": 137185
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.2222378253936768,
      "learning_rate": 0.0002114118037054873,
      "loss": 2.8852,
      "step": 137186
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.039771556854248,
      "learning_rate": 0.0002114078955642905,
      "loss": 2.7849,
      "step": 137187
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.25614333152771,
      "learning_rate": 0.00021140398743956459,
      "loss": 2.9609,
      "step": 137188
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2740495204925537,
      "learning_rate": 0.00021140007933131034,
      "loss": 3.0803,
      "step": 137189
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.5649361610412598,
      "learning_rate": 0.00021139617123952853,
      "loss": 3.0044,
      "step": 137190
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.8352839946746826,
      "learning_rate": 0.00021139226316421985,
      "loss": 2.9883,
      "step": 137191
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.126253843307495,
      "learning_rate": 0.0002113883551053851,
      "loss": 2.9366,
      "step": 137192
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0928964614868164,
      "learning_rate": 0.00021138444706302482,
      "loss": 3.1263,
      "step": 137193
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9509822130203247,
      "learning_rate": 0.00021138053903713985,
      "loss": 2.8453,
      "step": 137194
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.543808937072754,
      "learning_rate": 0.0002113766310277309,
      "loss": 2.761,
      "step": 137195
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.289130449295044,
      "learning_rate": 0.00021137272303479874,
      "loss": 2.8865,
      "step": 137196
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.9377033710479736,
      "learning_rate": 0.00021136881505834403,
      "loss": 2.9997,
      "step": 137197
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9870425462722778,
      "learning_rate": 0.00021136490709836766,
      "loss": 2.8383,
      "step": 137198
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0646860599517822,
      "learning_rate": 0.0002113609991548701,
      "loss": 2.9542,
      "step": 137199
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.232621431350708,
      "learning_rate": 0.0002113570912278522,
      "loss": 2.731,
      "step": 137200
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.62147855758667,
      "learning_rate": 0.0002113531833173147,
      "loss": 2.9905,
      "step": 137201
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4391837120056152,
      "learning_rate": 0.0002113492754232583,
      "loss": 2.8642,
      "step": 137202
    },
    {
      "epoch": 1.79,
      "grad_norm": 4.406681537628174,
      "learning_rate": 0.00021134536754568374,
      "loss": 2.6099,
      "step": 137203
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.1187894344329834,
      "learning_rate": 0.0002113414596845919,
      "loss": 3.0108,
      "step": 137204
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0050981044769287,
      "learning_rate": 0.00021133755183998324,
      "loss": 3.098,
      "step": 137205
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.7390190362930298,
      "learning_rate": 0.0002113336440118586,
      "loss": 2.7681,
      "step": 137206
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3821349143981934,
      "learning_rate": 0.00021132973620021866,
      "loss": 2.764,
      "step": 137207
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.24743390083313,
      "learning_rate": 0.00021132582840506427,
      "loss": 3.0637,
      "step": 137208
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0059814453125,
      "learning_rate": 0.00021132192062639602,
      "loss": 2.9035,
      "step": 137209
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4672954082489014,
      "learning_rate": 0.00021131801286421485,
      "loss": 2.7976,
      "step": 137210
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.2875404357910156,
      "learning_rate": 0.00021131410511852122,
      "loss": 2.5322,
      "step": 137211
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.42907452583313,
      "learning_rate": 0.00021131019738931595,
      "loss": 2.8598,
      "step": 137212
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0794427394866943,
      "learning_rate": 0.00021130628967659982,
      "loss": 3.1874,
      "step": 137213
    },
    {
      "epoch": 1.79,
      "grad_norm": 4.0496978759765625,
      "learning_rate": 0.0002113023819803735,
      "loss": 2.8914,
      "step": 137214
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9588176012039185,
      "learning_rate": 0.0002112984743006377,
      "loss": 3.1429,
      "step": 137215
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2555172443389893,
      "learning_rate": 0.0002112945666373934,
      "loss": 3.051,
      "step": 137216
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9027273654937744,
      "learning_rate": 0.00021129065899064094,
      "loss": 2.9756,
      "step": 137217
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.952305555343628,
      "learning_rate": 0.00021128675136038126,
      "loss": 3.1505,
      "step": 137218
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.223607063293457,
      "learning_rate": 0.00021128284374661498,
      "loss": 2.9033,
      "step": 137219
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8060622215270996,
      "learning_rate": 0.00021127893614934293,
      "loss": 3.0356,
      "step": 137220
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.911397099494934,
      "learning_rate": 0.0002112750285685658,
      "loss": 2.9673,
      "step": 137221
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2800445556640625,
      "learning_rate": 0.00021127112100428444,
      "loss": 2.6878,
      "step": 137222
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.850279688835144,
      "learning_rate": 0.00021126721345649934,
      "loss": 3.1799,
      "step": 137223
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.7890923023223877,
      "learning_rate": 0.00021126330592521135,
      "loss": 2.8726,
      "step": 137224
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8559612035751343,
      "learning_rate": 0.00021125939841042115,
      "loss": 2.7717,
      "step": 137225
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1557366847991943,
      "learning_rate": 0.00021125549091212953,
      "loss": 2.9009,
      "step": 137226
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.9475204944610596,
      "learning_rate": 0.00021125158343033714,
      "loss": 2.9526,
      "step": 137227
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9066935777664185,
      "learning_rate": 0.00021124767596504494,
      "loss": 2.9584,
      "step": 137228
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3415017127990723,
      "learning_rate": 0.00021124376851625336,
      "loss": 2.9867,
      "step": 137229
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.19765567779541,
      "learning_rate": 0.0002112398610839632,
      "loss": 2.9527,
      "step": 137230
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0299150943756104,
      "learning_rate": 0.00021123595366817524,
      "loss": 2.7677,
      "step": 137231
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9961962699890137,
      "learning_rate": 0.00021123204626889017,
      "loss": 3.2015,
      "step": 137232
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.172248125076294,
      "learning_rate": 0.00021122813888610877,
      "loss": 2.9068,
      "step": 137233
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.7713572978973389,
      "learning_rate": 0.00021122423151983183,
      "loss": 2.8969,
      "step": 137234
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.04903244972229,
      "learning_rate": 0.00021122032417005986,
      "loss": 3.0449,
      "step": 137235
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8120933771133423,
      "learning_rate": 0.0002112164168367937,
      "loss": 3.1088,
      "step": 137236
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.8922996520996094,
      "learning_rate": 0.0002112125095200341,
      "loss": 3.0923,
      "step": 137237
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.979264259338379,
      "learning_rate": 0.0002112086022197818,
      "loss": 2.906,
      "step": 137238
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.5275754928588867,
      "learning_rate": 0.00021120469493603745,
      "loss": 2.8636,
      "step": 137239
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.9713194370269775,
      "learning_rate": 0.00021120078766880197,
      "loss": 2.9813,
      "step": 137240
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8517704010009766,
      "learning_rate": 0.0002111968804180758,
      "loss": 3.0728,
      "step": 137241
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.171849489212036,
      "learning_rate": 0.00021119297318385983,
      "loss": 3.0424,
      "step": 137242
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.545173168182373,
      "learning_rate": 0.00021118906596615474,
      "loss": 3.1923,
      "step": 137243
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3676536083221436,
      "learning_rate": 0.00021118515876496132,
      "loss": 3.0554,
      "step": 137244
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0945687294006348,
      "learning_rate": 0.0002111812515802802,
      "loss": 3.0413,
      "step": 137245
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3895175457000732,
      "learning_rate": 0.00021117734441211225,
      "loss": 3.0015,
      "step": 137246
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.253913164138794,
      "learning_rate": 0.00021117343726045803,
      "loss": 2.9253,
      "step": 137247
    },
    {
      "epoch": 1.79,
      "grad_norm": 4.052210330963135,
      "learning_rate": 0.0002111695301253184,
      "loss": 2.8621,
      "step": 137248
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0213639736175537,
      "learning_rate": 0.00021116562300669403,
      "loss": 2.9908,
      "step": 137249
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.071790933609009,
      "learning_rate": 0.0002111617159045856,
      "loss": 2.698,
      "step": 137250
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.8490536212921143,
      "learning_rate": 0.00021115780881899386,
      "loss": 2.8237,
      "step": 137251
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.200751781463623,
      "learning_rate": 0.00021115390174991966,
      "loss": 3.0784,
      "step": 137252
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1139755249023438,
      "learning_rate": 0.0002111499946973636,
      "loss": 2.9002,
      "step": 137253
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8780041933059692,
      "learning_rate": 0.0002111460876613264,
      "loss": 3.0982,
      "step": 137254
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4125516414642334,
      "learning_rate": 0.00021114218064180882,
      "loss": 2.8917,
      "step": 137255
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4124176502227783,
      "learning_rate": 0.0002111382736388116,
      "loss": 2.9686,
      "step": 137256
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.137779474258423,
      "learning_rate": 0.00021113436665233542,
      "loss": 2.9886,
      "step": 137257
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.078374147415161,
      "learning_rate": 0.00021113045968238107,
      "loss": 2.9824,
      "step": 137258
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2187111377716064,
      "learning_rate": 0.0002111265527289493,
      "loss": 3.1155,
      "step": 137259
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.10087251663208,
      "learning_rate": 0.0002111226457920407,
      "loss": 2.9471,
      "step": 137260
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.294621229171753,
      "learning_rate": 0.00021111873887165605,
      "loss": 2.9955,
      "step": 137261
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.043488025665283,
      "learning_rate": 0.00021111483196779618,
      "loss": 3.0641,
      "step": 137262
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3853282928466797,
      "learning_rate": 0.00021111092508046175,
      "loss": 2.9131,
      "step": 137263
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.2190775871276855,
      "learning_rate": 0.0002111070182096534,
      "loss": 2.9382,
      "step": 137264
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4885895252227783,
      "learning_rate": 0.00021110311135537203,
      "loss": 2.9299,
      "step": 137265
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4678351879119873,
      "learning_rate": 0.00021109920451761822,
      "loss": 3.0596,
      "step": 137266
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9284626245498657,
      "learning_rate": 0.0002110952976963927,
      "loss": 3.0115,
      "step": 137267
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.6068520545959473,
      "learning_rate": 0.00021109139089169627,
      "loss": 2.9517,
      "step": 137268
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4194509983062744,
      "learning_rate": 0.0002110874841035297,
      "loss": 2.8626,
      "step": 137269
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3106627464294434,
      "learning_rate": 0.00021108357733189357,
      "loss": 2.9549,
      "step": 137270
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.5840606689453125,
      "learning_rate": 0.00021107967057678873,
      "loss": 2.9593,
      "step": 137271
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.021848678588867,
      "learning_rate": 0.0002110757638382158,
      "loss": 2.8729,
      "step": 137272
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2257742881774902,
      "learning_rate": 0.00021107185711617555,
      "loss": 3.134,
      "step": 137273
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9316414594650269,
      "learning_rate": 0.00021106795041066876,
      "loss": 2.9053,
      "step": 137274
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4335615634918213,
      "learning_rate": 0.00021106404372169611,
      "loss": 2.9373,
      "step": 137275
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.453850269317627,
      "learning_rate": 0.00021106013704925834,
      "loss": 3.155,
      "step": 137276
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2810163497924805,
      "learning_rate": 0.00021105623039335624,
      "loss": 2.844,
      "step": 137277
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9893718957901,
      "learning_rate": 0.00021105232375399043,
      "loss": 2.8953,
      "step": 137278
    },
    {
      "epoch": 1.79,
      "grad_norm": 4.201704025268555,
      "learning_rate": 0.0002110484171311616,
      "loss": 3.0565,
      "step": 137279
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.319056510925293,
      "learning_rate": 0.00021104451052487055,
      "loss": 2.7624,
      "step": 137280
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.061964750289917,
      "learning_rate": 0.00021104060393511805,
      "loss": 2.8677,
      "step": 137281
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9504547119140625,
      "learning_rate": 0.00021103669736190472,
      "loss": 2.7134,
      "step": 137282
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.033250570297241,
      "learning_rate": 0.0002110327908052315,
      "loss": 3.127,
      "step": 137283
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.1101393699645996,
      "learning_rate": 0.00021102888426509885,
      "loss": 2.8521,
      "step": 137284
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4789059162139893,
      "learning_rate": 0.00021102497774150763,
      "loss": 2.8336,
      "step": 137285
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1316781044006348,
      "learning_rate": 0.00021102107123445853,
      "loss": 2.9853,
      "step": 137286
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2853519916534424,
      "learning_rate": 0.00021101716474395226,
      "loss": 2.9791,
      "step": 137287
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.383805274963379,
      "learning_rate": 0.00021101325826998962,
      "loss": 2.8577,
      "step": 137288
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.51768159866333,
      "learning_rate": 0.00021100935181257138,
      "loss": 3.0555,
      "step": 137289
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1291308403015137,
      "learning_rate": 0.00021100544537169807,
      "loss": 3.023,
      "step": 137290
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2437970638275146,
      "learning_rate": 0.00021100153894737053,
      "loss": 3.1177,
      "step": 137291
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.245565176010132,
      "learning_rate": 0.0002109976325395895,
      "loss": 3.1075,
      "step": 137292
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.6850922107696533,
      "learning_rate": 0.0002109937261483557,
      "loss": 2.7755,
      "step": 137293
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8556185960769653,
      "learning_rate": 0.00021098981977366979,
      "loss": 2.8896,
      "step": 137294
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9854590892791748,
      "learning_rate": 0.00021098591341553273,
      "loss": 2.7707,
      "step": 137295
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9489527940750122,
      "learning_rate": 0.00021098200707394494,
      "loss": 3.0947,
      "step": 137296
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.498871088027954,
      "learning_rate": 0.00021097810074890728,
      "loss": 2.837,
      "step": 137297
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.2644753456115723,
      "learning_rate": 0.00021097419444042045,
      "loss": 2.9485,
      "step": 137298
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.7605286836624146,
      "learning_rate": 0.0002109702881484852,
      "loss": 2.9269,
      "step": 137299
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.050426959991455,
      "learning_rate": 0.00021096638187310224,
      "loss": 2.8629,
      "step": 137300
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.701422691345215,
      "learning_rate": 0.00021096247561427244,
      "loss": 2.9894,
      "step": 137301
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.252032995223999,
      "learning_rate": 0.0002109585693719963,
      "loss": 2.914,
      "step": 137302
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.289384603500366,
      "learning_rate": 0.00021095466314627464,
      "loss": 2.8196,
      "step": 137303
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1194489002227783,
      "learning_rate": 0.00021095075693710819,
      "loss": 2.8049,
      "step": 137304
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.899999737739563,
      "learning_rate": 0.00021094685074449763,
      "loss": 3.0074,
      "step": 137305
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9639661312103271,
      "learning_rate": 0.00021094294456844377,
      "loss": 3.0235,
      "step": 137306
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.2848172187805176,
      "learning_rate": 0.0002109390384089474,
      "loss": 2.8742,
      "step": 137307
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.1050190925598145,
      "learning_rate": 0.00021093513226600906,
      "loss": 2.9817,
      "step": 137308
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.7846992015838623,
      "learning_rate": 0.00021093122613962954,
      "loss": 2.9857,
      "step": 137309
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8779399394989014,
      "learning_rate": 0.00021092732002980956,
      "loss": 2.9844,
      "step": 137310
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.601393222808838,
      "learning_rate": 0.0002109234139365499,
      "loss": 3.1314,
      "step": 137311
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.567842721939087,
      "learning_rate": 0.00021091950785985124,
      "loss": 2.9964,
      "step": 137312
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0616354942321777,
      "learning_rate": 0.00021091560179971447,
      "loss": 3.1138,
      "step": 137313
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8821512460708618,
      "learning_rate": 0.00021091169575614005,
      "loss": 2.9389,
      "step": 137314
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.37859845161438,
      "learning_rate": 0.0002109077897291288,
      "loss": 3.2112,
      "step": 137315
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.003638505935669,
      "learning_rate": 0.00021090388371868148,
      "loss": 3.0172,
      "step": 137316
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1138296127319336,
      "learning_rate": 0.00021089997772479882,
      "loss": 3.1061,
      "step": 137317
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.009340524673462,
      "learning_rate": 0.00021089607174748154,
      "loss": 2.7605,
      "step": 137318
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.890838623046875,
      "learning_rate": 0.0002108921657867305,
      "loss": 2.5394,
      "step": 137319
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.020516872406006,
      "learning_rate": 0.00021088825984254614,
      "loss": 3.1091,
      "step": 137320
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.213963270187378,
      "learning_rate": 0.00021088435391492931,
      "loss": 3.1339,
      "step": 137321
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.5401744842529297,
      "learning_rate": 0.0002108804480038808,
      "loss": 3.0775,
      "step": 137322
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9107015132904053,
      "learning_rate": 0.0002108765421094013,
      "loss": 2.8759,
      "step": 137323
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3340258598327637,
      "learning_rate": 0.0002108726362314915,
      "loss": 2.8968,
      "step": 137324
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9946900606155396,
      "learning_rate": 0.00021086873037015223,
      "loss": 3.1622,
      "step": 137325
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.5626752376556396,
      "learning_rate": 0.0002108648245253842,
      "loss": 3.0911,
      "step": 137326
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.8140857219696045,
      "learning_rate": 0.00021086091869718796,
      "loss": 2.8175,
      "step": 137327
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0455172061920166,
      "learning_rate": 0.0002108570128855644,
      "loss": 2.7626,
      "step": 137328
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2274279594421387,
      "learning_rate": 0.00021085310709051416,
      "loss": 2.8825,
      "step": 137329
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.1787631511688232,
      "learning_rate": 0.00021084920131203804,
      "loss": 2.9083,
      "step": 137330
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.966508150100708,
      "learning_rate": 0.0002108452955501367,
      "loss": 2.9129,
      "step": 137331
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.937742829322815,
      "learning_rate": 0.00021084138980481104,
      "loss": 2.9169,
      "step": 137332
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.471418619155884,
      "learning_rate": 0.00021083748407606156,
      "loss": 3.026,
      "step": 137333
    },
    {
      "epoch": 1.79,
      "grad_norm": 4.083041191101074,
      "learning_rate": 0.00021083357836388907,
      "loss": 3.1384,
      "step": 137334
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.917621612548828,
      "learning_rate": 0.00021082967266829427,
      "loss": 2.7567,
      "step": 137335
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.5479400157928467,
      "learning_rate": 0.00021082576698927793,
      "loss": 2.8451,
      "step": 137336
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.7527880668640137,
      "learning_rate": 0.00021082186132684072,
      "loss": 2.9746,
      "step": 137337
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.605506658554077,
      "learning_rate": 0.00021081795568098354,
      "loss": 2.858,
      "step": 137338
    },
    {
      "epoch": 1.79,
      "grad_norm": 4.168867588043213,
      "learning_rate": 0.00021081405005170695,
      "loss": 3.0551,
      "step": 137339
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.8959789276123047,
      "learning_rate": 0.0002108101444390117,
      "loss": 3.0953,
      "step": 137340
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.7740581035614014,
      "learning_rate": 0.0002108062388428985,
      "loss": 3.0197,
      "step": 137341
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9068825244903564,
      "learning_rate": 0.00021080233326336805,
      "loss": 2.9035,
      "step": 137342
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.358032703399658,
      "learning_rate": 0.0002107984277004212,
      "loss": 3.0617,
      "step": 137343
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4531967639923096,
      "learning_rate": 0.00021079452215405864,
      "loss": 3.0833,
      "step": 137344
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.115163803100586,
      "learning_rate": 0.000210790616624281,
      "loss": 3.1155,
      "step": 137345
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.2293078899383545,
      "learning_rate": 0.00021078671111108906,
      "loss": 3.0936,
      "step": 137346
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.8681952953338623,
      "learning_rate": 0.0002107828056144836,
      "loss": 2.8632,
      "step": 137347
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.019050359725952,
      "learning_rate": 0.00021077890013446526,
      "loss": 2.8545,
      "step": 137348
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0278730392456055,
      "learning_rate": 0.00021077499467103485,
      "loss": 2.8908,
      "step": 137349
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.674801826477051,
      "learning_rate": 0.00021077108922419306,
      "loss": 2.9697,
      "step": 137350
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9085217714309692,
      "learning_rate": 0.00021076718379394054,
      "loss": 2.9713,
      "step": 137351
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3990020751953125,
      "learning_rate": 0.00021076327838027813,
      "loss": 2.7618,
      "step": 137352
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.3708629608154297,
      "learning_rate": 0.00021075937298320647,
      "loss": 3.1109,
      "step": 137353
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.149439573287964,
      "learning_rate": 0.0002107554676027264,
      "loss": 3.264,
      "step": 137354
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2641613483428955,
      "learning_rate": 0.0002107515622388385,
      "loss": 2.9972,
      "step": 137355
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.978494644165039,
      "learning_rate": 0.0002107476568915437,
      "loss": 3.0659,
      "step": 137356
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.5164148807525635,
      "learning_rate": 0.00021074375156084248,
      "loss": 2.689,
      "step": 137357
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.733443260192871,
      "learning_rate": 0.00021073984624673563,
      "loss": 3.0711,
      "step": 137358
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3567402362823486,
      "learning_rate": 0.00021073594094922398,
      "loss": 3.0375,
      "step": 137359
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.175940752029419,
      "learning_rate": 0.00021073203566830822,
      "loss": 3.0047,
      "step": 137360
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.199951648712158,
      "learning_rate": 0.00021072813040398913,
      "loss": 2.8152,
      "step": 137361
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.10465407371521,
      "learning_rate": 0.00021072422515626735,
      "loss": 2.9456,
      "step": 137362
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.61137318611145,
      "learning_rate": 0.00021072031992514352,
      "loss": 3.0162,
      "step": 137363
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.5811469554901123,
      "learning_rate": 0.0002107164147106185,
      "loss": 2.8889,
      "step": 137364
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8771275281906128,
      "learning_rate": 0.000210712509512693,
      "loss": 2.9067,
      "step": 137365
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3172943592071533,
      "learning_rate": 0.00021070860433136768,
      "loss": 2.7077,
      "step": 137366
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.8701233863830566,
      "learning_rate": 0.00021070469916664337,
      "loss": 3.1301,
      "step": 137367
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.007567882537842,
      "learning_rate": 0.00021070079401852085,
      "loss": 3.1486,
      "step": 137368
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.441014051437378,
      "learning_rate": 0.00021069688888700062,
      "loss": 3.0436,
      "step": 137369
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8294748067855835,
      "learning_rate": 0.0002106929837720835,
      "loss": 2.9723,
      "step": 137370
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.223970413208008,
      "learning_rate": 0.00021068907867377026,
      "loss": 2.9385,
      "step": 137371
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2489209175109863,
      "learning_rate": 0.00021068517359206158,
      "loss": 2.9064,
      "step": 137372
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.8069050312042236,
      "learning_rate": 0.00021068126852695824,
      "loss": 2.7291,
      "step": 137373
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.6605751514434814,
      "learning_rate": 0.00021067736347846108,
      "loss": 3.031,
      "step": 137374
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1370649337768555,
      "learning_rate": 0.0002106734584465705,
      "loss": 2.9655,
      "step": 137375
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9514267444610596,
      "learning_rate": 0.00021066955343128745,
      "loss": 3.0124,
      "step": 137376
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.086111307144165,
      "learning_rate": 0.0002106656484326126,
      "loss": 3.0384,
      "step": 137377
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.5708484649658203,
      "learning_rate": 0.00021066174345054668,
      "loss": 3.0611,
      "step": 137378
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3903143405914307,
      "learning_rate": 0.0002106578384850905,
      "loss": 3.1118,
      "step": 137379
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9562621116638184,
      "learning_rate": 0.00021065393353624474,
      "loss": 3.1561,
      "step": 137380
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3099617958068848,
      "learning_rate": 0.00021065002860401005,
      "loss": 3.1958,
      "step": 137381
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.532273530960083,
      "learning_rate": 0.0002106461236883872,
      "loss": 2.8235,
      "step": 137382
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9560564756393433,
      "learning_rate": 0.0002106422187893769,
      "loss": 3.0471,
      "step": 137383
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4300408363342285,
      "learning_rate": 0.0002106383139069799,
      "loss": 3.0183,
      "step": 137384
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9692440032958984,
      "learning_rate": 0.0002106344090411969,
      "loss": 2.9063,
      "step": 137385
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.665728807449341,
      "learning_rate": 0.00021063050419202882,
      "loss": 2.923,
      "step": 137386
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9247435331344604,
      "learning_rate": 0.00021062659935947608,
      "loss": 2.7976,
      "step": 137387
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.5989928245544434,
      "learning_rate": 0.00021062269454353953,
      "loss": 3.0812,
      "step": 137388
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.159510612487793,
      "learning_rate": 0.0002106187897442199,
      "loss": 3.0546,
      "step": 137389
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.358052968978882,
      "learning_rate": 0.00021061488496151797,
      "loss": 2.8182,
      "step": 137390
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9928568601608276,
      "learning_rate": 0.00021061098019543439,
      "loss": 3.2716,
      "step": 137391
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.520846128463745,
      "learning_rate": 0.0002106070754459699,
      "loss": 2.9436,
      "step": 137392
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1595513820648193,
      "learning_rate": 0.00021060317071312535,
      "loss": 2.9664,
      "step": 137393
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.16933012008667,
      "learning_rate": 0.0002105992659969013,
      "loss": 3.0229,
      "step": 137394
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2045342922210693,
      "learning_rate": 0.00021059536129729847,
      "loss": 3.1765,
      "step": 137395
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.873138427734375,
      "learning_rate": 0.0002105914566143177,
      "loss": 2.7377,
      "step": 137396
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0459554195404053,
      "learning_rate": 0.00021058755194795962,
      "loss": 2.8261,
      "step": 137397
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9263174533843994,
      "learning_rate": 0.00021058364729822506,
      "loss": 2.8827,
      "step": 137398
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.570810079574585,
      "learning_rate": 0.00021057974266511478,
      "loss": 2.7543,
      "step": 137399
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0830905437469482,
      "learning_rate": 0.0002105758380486293,
      "loss": 2.8711,
      "step": 137400
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.301798105239868,
      "learning_rate": 0.00021057193344876943,
      "loss": 2.8581,
      "step": 137401
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.245415687561035,
      "learning_rate": 0.00021056802886553592,
      "loss": 3.0763,
      "step": 137402
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.2068326473236084,
      "learning_rate": 0.00021056412429892955,
      "loss": 2.8642,
      "step": 137403
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.957794427871704,
      "learning_rate": 0.00021056021974895093,
      "loss": 2.8484,
      "step": 137404
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9093873500823975,
      "learning_rate": 0.00021055631521560103,
      "loss": 2.8189,
      "step": 137405
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.241858720779419,
      "learning_rate": 0.00021055241069888029,
      "loss": 2.973,
      "step": 137406
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.211772918701172,
      "learning_rate": 0.0002105485061987895,
      "loss": 2.7755,
      "step": 137407
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0119826793670654,
      "learning_rate": 0.00021054460171532945,
      "loss": 2.7841,
      "step": 137408
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0646770000457764,
      "learning_rate": 0.00021054069724850085,
      "loss": 3.0283,
      "step": 137409
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3951430320739746,
      "learning_rate": 0.00021053679279830443,
      "loss": 2.9609,
      "step": 137410
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3733410835266113,
      "learning_rate": 0.00021053288836474102,
      "loss": 2.7966,
      "step": 137411
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.418165445327759,
      "learning_rate": 0.00021052898394781111,
      "loss": 2.9465,
      "step": 137412
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.0817177295684814,
      "learning_rate": 0.00021052507954751558,
      "loss": 3.0384,
      "step": 137413
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1117756366729736,
      "learning_rate": 0.0002105211751638551,
      "loss": 3.038,
      "step": 137414
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.006357431411743,
      "learning_rate": 0.00021051727079683043,
      "loss": 2.7517,
      "step": 137415
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.6917834281921387,
      "learning_rate": 0.00021051336644644226,
      "loss": 2.9901,
      "step": 137416
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.159358501434326,
      "learning_rate": 0.0002105094621126915,
      "loss": 2.8953,
      "step": 137417
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0942931175231934,
      "learning_rate": 0.0002105055577955786,
      "loss": 3.073,
      "step": 137418
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1558940410614014,
      "learning_rate": 0.00021050165349510442,
      "loss": 2.9176,
      "step": 137419
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.005650758743286,
      "learning_rate": 0.00021049774921126963,
      "loss": 3.1289,
      "step": 137420
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.7293132543563843,
      "learning_rate": 0.000210493844944075,
      "loss": 3.081,
      "step": 137421
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0498485565185547,
      "learning_rate": 0.00021048994069352124,
      "loss": 3.0077,
      "step": 137422
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.060134172439575,
      "learning_rate": 0.00021048603645960922,
      "loss": 2.9901,
      "step": 137423
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3897440433502197,
      "learning_rate": 0.00021048213224233948,
      "loss": 2.9363,
      "step": 137424
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.07759428024292,
      "learning_rate": 0.00021047822804171274,
      "loss": 2.9261,
      "step": 137425
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1564414501190186,
      "learning_rate": 0.00021047432385772985,
      "loss": 3.0726,
      "step": 137426
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.870413064956665,
      "learning_rate": 0.00021047041969039142,
      "loss": 3.1182,
      "step": 137427
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.856487512588501,
      "learning_rate": 0.00021046651553969823,
      "loss": 2.8165,
      "step": 137428
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9003926515579224,
      "learning_rate": 0.00021046261140565103,
      "loss": 3.0301,
      "step": 137429
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.173787832260132,
      "learning_rate": 0.00021045870728825055,
      "loss": 3.1442,
      "step": 137430
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9451625347137451,
      "learning_rate": 0.00021045480318749747,
      "loss": 2.9463,
      "step": 137431
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.426884412765503,
      "learning_rate": 0.00021045089910339253,
      "loss": 2.8304,
      "step": 137432
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.112928867340088,
      "learning_rate": 0.00021044699503593642,
      "loss": 3.1047,
      "step": 137433
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1476950645446777,
      "learning_rate": 0.0002104430909851299,
      "loss": 2.8299,
      "step": 137434
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3061108589172363,
      "learning_rate": 0.00021043918695097375,
      "loss": 2.9512,
      "step": 137435
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3264830112457275,
      "learning_rate": 0.00021043528293346862,
      "loss": 2.8121,
      "step": 137436
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3958981037139893,
      "learning_rate": 0.00021043137893261526,
      "loss": 2.8353,
      "step": 137437
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.3941128253936768,
      "learning_rate": 0.00021042747494841437,
      "loss": 2.5777,
      "step": 137438
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.7915573120117188,
      "learning_rate": 0.00021042357098086678,
      "loss": 3.0306,
      "step": 137439
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.201279401779175,
      "learning_rate": 0.0002104196670299731,
      "loss": 3.0205,
      "step": 137440
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.704014539718628,
      "learning_rate": 0.00021041576309573413,
      "loss": 3.1019,
      "step": 137441
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.6087660789489746,
      "learning_rate": 0.00021041185917815047,
      "loss": 2.9443,
      "step": 137442
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.2057509422302246,
      "learning_rate": 0.00021040795527722299,
      "loss": 3.0355,
      "step": 137443
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.059593439102173,
      "learning_rate": 0.00021040405139295234,
      "loss": 3.0395,
      "step": 137444
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.9908738136291504,
      "learning_rate": 0.00021040014752533927,
      "loss": 3.1058,
      "step": 137445
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.728989362716675,
      "learning_rate": 0.00021039624367438458,
      "loss": 3.0517,
      "step": 137446
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.901956081390381,
      "learning_rate": 0.0002103923398400889,
      "loss": 2.9168,
      "step": 137447
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2985453605651855,
      "learning_rate": 0.00021038843602245292,
      "loss": 3.0263,
      "step": 137448
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.1680068969726562,
      "learning_rate": 0.00021038453222147743,
      "loss": 2.9982,
      "step": 137449
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.745931386947632,
      "learning_rate": 0.00021038062843716312,
      "loss": 2.8628,
      "step": 137450
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.152333974838257,
      "learning_rate": 0.00021037672466951075,
      "loss": 2.9704,
      "step": 137451
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.022122621536255,
      "learning_rate": 0.00021037282091852108,
      "loss": 3.1388,
      "step": 137452
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.6389431953430176,
      "learning_rate": 0.00021036891718419486,
      "loss": 2.8153,
      "step": 137453
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.1031477451324463,
      "learning_rate": 0.00021036501346653264,
      "loss": 2.7756,
      "step": 137454
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.064973831176758,
      "learning_rate": 0.00021036110976553528,
      "loss": 3.0313,
      "step": 137455
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.5563833713531494,
      "learning_rate": 0.00021035720608120347,
      "loss": 2.9829,
      "step": 137456
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.320397138595581,
      "learning_rate": 0.00021035330241353796,
      "loss": 3.2844,
      "step": 137457
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.81551194190979,
      "learning_rate": 0.00021034939876253946,
      "loss": 2.9397,
      "step": 137458
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.464057683944702,
      "learning_rate": 0.0002103454951282087,
      "loss": 3.3954,
      "step": 137459
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9724102020263672,
      "learning_rate": 0.0002103415915105465,
      "loss": 3.0199,
      "step": 137460
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.015512228012085,
      "learning_rate": 0.0002103376879095534,
      "loss": 3.1027,
      "step": 137461
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2830588817596436,
      "learning_rate": 0.00021033378432523022,
      "loss": 3.143,
      "step": 137462
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2486202716827393,
      "learning_rate": 0.00021032988075757767,
      "loss": 2.9424,
      "step": 137463
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.373566150665283,
      "learning_rate": 0.00021032597720659646,
      "loss": 3.1295,
      "step": 137464
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9001548290252686,
      "learning_rate": 0.00021032207367228736,
      "loss": 2.9065,
      "step": 137465
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0884957313537598,
      "learning_rate": 0.00021031817015465122,
      "loss": 2.6552,
      "step": 137466
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4884867668151855,
      "learning_rate": 0.0002103142666536885,
      "loss": 3.0328,
      "step": 137467
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.24178409576416,
      "learning_rate": 0.00021031036316940003,
      "loss": 2.8894,
      "step": 137468
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.336305856704712,
      "learning_rate": 0.00021030645970178657,
      "loss": 2.9569,
      "step": 137469
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9561078548431396,
      "learning_rate": 0.00021030255625084884,
      "loss": 3.0833,
      "step": 137470
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8275525569915771,
      "learning_rate": 0.00021029865281658756,
      "loss": 2.6918,
      "step": 137471
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9884811639785767,
      "learning_rate": 0.00021029474939900355,
      "loss": 2.8179,
      "step": 137472
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.5821444988250732,
      "learning_rate": 0.0002102908459980973,
      "loss": 2.9944,
      "step": 137473
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2778713703155518,
      "learning_rate": 0.00021028694261386973,
      "loss": 3.036,
      "step": 137474
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.962290644645691,
      "learning_rate": 0.0002102830392463215,
      "loss": 2.8843,
      "step": 137475
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.482048273086548,
      "learning_rate": 0.00021027913589545335,
      "loss": 3.175,
      "step": 137476
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.787771224975586,
      "learning_rate": 0.00021027523256126596,
      "loss": 2.7486,
      "step": 137477
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3776700496673584,
      "learning_rate": 0.00021027132924376026,
      "loss": 2.8177,
      "step": 137478
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1418073177337646,
      "learning_rate": 0.00021026742594293667,
      "loss": 3.0597,
      "step": 137479
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.971832036972046,
      "learning_rate": 0.00021026352265879607,
      "loss": 3.0008,
      "step": 137480
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9952893257141113,
      "learning_rate": 0.00021025961939133918,
      "loss": 2.9313,
      "step": 137481
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.106539249420166,
      "learning_rate": 0.0002102557161405667,
      "loss": 3.1411,
      "step": 137482
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1008338928222656,
      "learning_rate": 0.0002102518129064794,
      "loss": 2.8131,
      "step": 137483
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.484008550643921,
      "learning_rate": 0.00021024790968907813,
      "loss": 3.061,
      "step": 137484
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.270498275756836,
      "learning_rate": 0.0002102440064883633,
      "loss": 3.0467,
      "step": 137485
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0651814937591553,
      "learning_rate": 0.00021024010330433578,
      "loss": 2.7999,
      "step": 137486
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.000232696533203,
      "learning_rate": 0.00021023620013699637,
      "loss": 2.7524,
      "step": 137487
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9369311332702637,
      "learning_rate": 0.00021023229698634573,
      "loss": 2.858,
      "step": 137488
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4713032245635986,
      "learning_rate": 0.00021022839385238456,
      "loss": 2.9312,
      "step": 137489
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3638224601745605,
      "learning_rate": 0.00021022449073511383,
      "loss": 3.1008,
      "step": 137490
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.395207166671753,
      "learning_rate": 0.00021022058763453386,
      "loss": 3.0577,
      "step": 137491
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1201908588409424,
      "learning_rate": 0.00021021668455064563,
      "loss": 3.1843,
      "step": 137492
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0267138481140137,
      "learning_rate": 0.00021021278148344977,
      "loss": 2.8818,
      "step": 137493
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9581356048583984,
      "learning_rate": 0.00021020887843294713,
      "loss": 2.9732,
      "step": 137494
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1046066284179688,
      "learning_rate": 0.00021020497539913827,
      "loss": 2.9197,
      "step": 137495
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.7875096797943115,
      "learning_rate": 0.00021020107238202415,
      "loss": 2.8926,
      "step": 137496
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.548250913619995,
      "learning_rate": 0.00021019716938160522,
      "loss": 2.9808,
      "step": 137497
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.5203840732574463,
      "learning_rate": 0.00021019326639788236,
      "loss": 2.7582,
      "step": 137498
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.144684314727783,
      "learning_rate": 0.0002101893634308562,
      "loss": 3.0163,
      "step": 137499
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.962804079055786,
      "learning_rate": 0.00021018546048052762,
      "loss": 2.9291,
      "step": 137500
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4098801612854004,
      "learning_rate": 0.0002101815575468972,
      "loss": 3.2029,
      "step": 137501
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.001521348953247,
      "learning_rate": 0.00021017765462996585,
      "loss": 3.005,
      "step": 137502
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9891119003295898,
      "learning_rate": 0.0002101737517297341,
      "loss": 2.9724,
      "step": 137503
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.8343024253845215,
      "learning_rate": 0.00021016984884620268,
      "loss": 3.0609,
      "step": 137504
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.364910840988159,
      "learning_rate": 0.00021016594597937242,
      "loss": 2.669,
      "step": 137505
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.836090326309204,
      "learning_rate": 0.00021016204312924398,
      "loss": 2.8289,
      "step": 137506
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4096758365631104,
      "learning_rate": 0.0002101581402958181,
      "loss": 3.0231,
      "step": 137507
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2720913887023926,
      "learning_rate": 0.00021015423747909563,
      "loss": 2.886,
      "step": 137508
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2307090759277344,
      "learning_rate": 0.00021015033467907713,
      "loss": 2.9856,
      "step": 137509
    },
    {
      "epoch": 1.79,
      "grad_norm": 4.97037935256958,
      "learning_rate": 0.00021014643189576335,
      "loss": 2.7229,
      "step": 137510
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1751928329467773,
      "learning_rate": 0.00021014252912915506,
      "loss": 2.7848,
      "step": 137511
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2127044200897217,
      "learning_rate": 0.00021013862637925295,
      "loss": 2.962,
      "step": 137512
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4377939701080322,
      "learning_rate": 0.0002101347236460577,
      "loss": 2.9549,
      "step": 137513
    },
    {
      "epoch": 1.79,
      "grad_norm": 4.051802158355713,
      "learning_rate": 0.00021013082092957028,
      "loss": 3.1274,
      "step": 137514
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.399789571762085,
      "learning_rate": 0.0002101269182297911,
      "loss": 2.9469,
      "step": 137515
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.130755662918091,
      "learning_rate": 0.00021012301554672108,
      "loss": 2.8879,
      "step": 137516
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.760205030441284,
      "learning_rate": 0.0002101191128803609,
      "loss": 2.7039,
      "step": 137517
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0225961208343506,
      "learning_rate": 0.0002101152102307112,
      "loss": 2.9723,
      "step": 137518
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9329642057418823,
      "learning_rate": 0.00021011130759777282,
      "loss": 2.7116,
      "step": 137519
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8170926570892334,
      "learning_rate": 0.00021010740498154647,
      "loss": 2.9231,
      "step": 137520
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2463629245758057,
      "learning_rate": 0.00021010350238203282,
      "loss": 2.9893,
      "step": 137521
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.213809013366699,
      "learning_rate": 0.0002100995997992326,
      "loss": 2.9948,
      "step": 137522
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2503039836883545,
      "learning_rate": 0.00021009569723314664,
      "loss": 3.0028,
      "step": 137523
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.9926061630249023,
      "learning_rate": 0.00021009179468377555,
      "loss": 2.8236,
      "step": 137524
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0170671939849854,
      "learning_rate": 0.00021008789215112004,
      "loss": 3.0171,
      "step": 137525
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.914726734161377,
      "learning_rate": 0.00021008398963518092,
      "loss": 2.6779,
      "step": 137526
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8613537549972534,
      "learning_rate": 0.00021008008713595895,
      "loss": 2.8098,
      "step": 137527
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.041313409805298,
      "learning_rate": 0.0002100761846534547,
      "loss": 2.9935,
      "step": 137528
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.561389684677124,
      "learning_rate": 0.00021007228218766901,
      "loss": 2.9164,
      "step": 137529
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.5648410320281982,
      "learning_rate": 0.00021006837973860256,
      "loss": 3.1094,
      "step": 137530
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0060861110687256,
      "learning_rate": 0.00021006447730625616,
      "loss": 3.0625,
      "step": 137531
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2143518924713135,
      "learning_rate": 0.00021006057489063044,
      "loss": 2.7899,
      "step": 137532
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4885613918304443,
      "learning_rate": 0.00021005667249172617,
      "loss": 2.9735,
      "step": 137533
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2120351791381836,
      "learning_rate": 0.00021005277010954405,
      "loss": 2.8323,
      "step": 137534
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9472007751464844,
      "learning_rate": 0.0002100488677440848,
      "loss": 2.9027,
      "step": 137535
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2026376724243164,
      "learning_rate": 0.0002100449653953491,
      "loss": 3.0618,
      "step": 137536
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.274153709411621,
      "learning_rate": 0.00021004106306333787,
      "loss": 3.0307,
      "step": 137537
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2566397190093994,
      "learning_rate": 0.0002100371607480517,
      "loss": 3.059,
      "step": 137538
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.7266154289245605,
      "learning_rate": 0.00021003325844949126,
      "loss": 2.8989,
      "step": 137539
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1892249584198,
      "learning_rate": 0.00021002935616765733,
      "loss": 3.0213,
      "step": 137540
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.259260654449463,
      "learning_rate": 0.00021002545390255063,
      "loss": 3.0168,
      "step": 137541
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1570870876312256,
      "learning_rate": 0.00021002155165417193,
      "loss": 2.8655,
      "step": 137542
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8482775688171387,
      "learning_rate": 0.0002100176494225219,
      "loss": 3.2556,
      "step": 137543
    },
    {
      "epoch": 1.79,
      "grad_norm": 4.706493377685547,
      "learning_rate": 0.0002100137472076013,
      "loss": 2.8891,
      "step": 137544
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.9238085746765137,
      "learning_rate": 0.00021000984500941093,
      "loss": 2.7154,
      "step": 137545
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3055496215820312,
      "learning_rate": 0.00021000594282795132,
      "loss": 3.0054,
      "step": 137546
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.070436954498291,
      "learning_rate": 0.0002100020406632233,
      "loss": 2.9668,
      "step": 137547
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.423772096633911,
      "learning_rate": 0.00020999813851522764,
      "loss": 2.9243,
      "step": 137548
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.019233226776123,
      "learning_rate": 0.000209994236383965,
      "loss": 3.051,
      "step": 137549
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3547213077545166,
      "learning_rate": 0.0002099903342694361,
      "loss": 2.9718,
      "step": 137550
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.6737797260284424,
      "learning_rate": 0.00020998643217164185,
      "loss": 2.7432,
      "step": 137551
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.8468377590179443,
      "learning_rate": 0.00020998253009058267,
      "loss": 3.0787,
      "step": 137552
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.0513596534729004,
      "learning_rate": 0.00020997862802625948,
      "loss": 3.0761,
      "step": 137553
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3852431774139404,
      "learning_rate": 0.00020997472597867294,
      "loss": 2.9688,
      "step": 137554
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.5915517807006836,
      "learning_rate": 0.0002099708239478238,
      "loss": 3.0046,
      "step": 137555
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.857975721359253,
      "learning_rate": 0.0002099669219337128,
      "loss": 2.9146,
      "step": 137556
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3569509983062744,
      "learning_rate": 0.00020996301993634074,
      "loss": 2.9454,
      "step": 137557
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.6046600341796875,
      "learning_rate": 0.00020995911795570812,
      "loss": 2.8416,
      "step": 137558
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.190213203430176,
      "learning_rate": 0.00020995521599181583,
      "loss": 3.1785,
      "step": 137559
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0615134239196777,
      "learning_rate": 0.0002099513140446646,
      "loss": 2.8082,
      "step": 137560
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.693406105041504,
      "learning_rate": 0.00020994741211425506,
      "loss": 2.9159,
      "step": 137561
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.543605089187622,
      "learning_rate": 0.000209943510200588,
      "loss": 3.1543,
      "step": 137562
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.256132125854492,
      "learning_rate": 0.00020993960830366427,
      "loss": 2.9233,
      "step": 137563
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.8113999366760254,
      "learning_rate": 0.0002099357064234844,
      "loss": 2.8825,
      "step": 137564
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9900827407836914,
      "learning_rate": 0.0002099318045600491,
      "loss": 3.0499,
      "step": 137565
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.001896619796753,
      "learning_rate": 0.00020992790271335922,
      "loss": 2.9474,
      "step": 137566
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0429539680480957,
      "learning_rate": 0.00020992400088341544,
      "loss": 3.0639,
      "step": 137567
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.364166259765625,
      "learning_rate": 0.00020992009907021847,
      "loss": 3.2117,
      "step": 137568
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.092420816421509,
      "learning_rate": 0.0002099161972737692,
      "loss": 2.9189,
      "step": 137569
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1941628456115723,
      "learning_rate": 0.0002099122954940681,
      "loss": 2.9468,
      "step": 137570
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9228075742721558,
      "learning_rate": 0.00020990839373111599,
      "loss": 2.8435,
      "step": 137571
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0735204219818115,
      "learning_rate": 0.00020990449198491363,
      "loss": 2.7525,
      "step": 137572
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.12522292137146,
      "learning_rate": 0.00020990059025546167,
      "loss": 3.0108,
      "step": 137573
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8924731016159058,
      "learning_rate": 0.0002098966885427609,
      "loss": 3.0755,
      "step": 137574
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.06294584274292,
      "learning_rate": 0.0002098927868468122,
      "loss": 2.9657,
      "step": 137575
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0589821338653564,
      "learning_rate": 0.00020988888516761597,
      "loss": 3.121,
      "step": 137576
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1456172466278076,
      "learning_rate": 0.00020988498350517313,
      "loss": 3.1249,
      "step": 137577
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.8118598461151123,
      "learning_rate": 0.00020988108185948437,
      "loss": 2.8414,
      "step": 137578
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.777857542037964,
      "learning_rate": 0.00020987718023055037,
      "loss": 3.0884,
      "step": 137579
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.623011589050293,
      "learning_rate": 0.000209873278618372,
      "loss": 3.0211,
      "step": 137580
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8246465921401978,
      "learning_rate": 0.00020986937702294995,
      "loss": 2.6537,
      "step": 137581
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0319528579711914,
      "learning_rate": 0.00020986547544428477,
      "loss": 3.028,
      "step": 137582
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.5737366676330566,
      "learning_rate": 0.00020986157388237726,
      "loss": 2.7519,
      "step": 137583
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.370863437652588,
      "learning_rate": 0.00020985767233722822,
      "loss": 2.936,
      "step": 137584
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.301307201385498,
      "learning_rate": 0.00020985377080883836,
      "loss": 2.9508,
      "step": 137585
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.5745227336883545,
      "learning_rate": 0.00020984986929720832,
      "loss": 2.9592,
      "step": 137586
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.263918161392212,
      "learning_rate": 0.0002098459678023391,
      "loss": 3.2651,
      "step": 137587
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.8626461029052734,
      "learning_rate": 0.00020984206632423103,
      "loss": 2.9676,
      "step": 137588
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.329193115234375,
      "learning_rate": 0.00020983816486288504,
      "loss": 2.9939,
      "step": 137589
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2213125228881836,
      "learning_rate": 0.00020983426341830184,
      "loss": 3.0187,
      "step": 137590
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.831411361694336,
      "learning_rate": 0.00020983036199048217,
      "loss": 2.9125,
      "step": 137591
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.980984687805176,
      "learning_rate": 0.00020982646057942668,
      "loss": 2.9141,
      "step": 137592
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.7237308025360107,
      "learning_rate": 0.00020982255918513624,
      "loss": 2.9914,
      "step": 137593
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3214757442474365,
      "learning_rate": 0.00020981865780761157,
      "loss": 2.8423,
      "step": 137594
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.0626893043518066,
      "learning_rate": 0.00020981475644685316,
      "loss": 2.9246,
      "step": 137595
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.672783851623535,
      "learning_rate": 0.00020981085510286192,
      "loss": 3.2187,
      "step": 137596
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.1099753379821777,
      "learning_rate": 0.00020980695377563856,
      "loss": 3.097,
      "step": 137597
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.462860107421875,
      "learning_rate": 0.00020980305246518375,
      "loss": 2.7936,
      "step": 137598
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.104931592941284,
      "learning_rate": 0.00020979915117149827,
      "loss": 2.9511,
      "step": 137599
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.732924222946167,
      "learning_rate": 0.00020979524989458292,
      "loss": 3.0198,
      "step": 137600
    },
    {
      "epoch": 1.79,
      "grad_norm": 4.263284683227539,
      "learning_rate": 0.00020979134863443832,
      "loss": 3.0303,
      "step": 137601
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.688286066055298,
      "learning_rate": 0.0002097874473910651,
      "loss": 3.0686,
      "step": 137602
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4459142684936523,
      "learning_rate": 0.00020978354616446417,
      "loss": 2.7983,
      "step": 137603
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.6083385944366455,
      "learning_rate": 0.00020977964495463614,
      "loss": 2.8854,
      "step": 137604
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.516840696334839,
      "learning_rate": 0.00020977574376158177,
      "loss": 3.0442,
      "step": 137605
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9879623651504517,
      "learning_rate": 0.00020977184258530186,
      "loss": 2.9596,
      "step": 137606
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3369171619415283,
      "learning_rate": 0.00020976794142579702,
      "loss": 3.011,
      "step": 137607
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.236501693725586,
      "learning_rate": 0.0002097640402830681,
      "loss": 3.2316,
      "step": 137608
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.176621913909912,
      "learning_rate": 0.0002097601391571157,
      "loss": 2.7936,
      "step": 137609
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9585423469543457,
      "learning_rate": 0.00020975623804794053,
      "loss": 2.7811,
      "step": 137610
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.169404983520508,
      "learning_rate": 0.00020975233695554342,
      "loss": 2.7958,
      "step": 137611
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2777140140533447,
      "learning_rate": 0.00020974843587992512,
      "loss": 3.0247,
      "step": 137612
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.902492642402649,
      "learning_rate": 0.0002097445348210862,
      "loss": 2.9351,
      "step": 137613
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.158066511154175,
      "learning_rate": 0.00020974063377902748,
      "loss": 3.0625,
      "step": 137614
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9818205833435059,
      "learning_rate": 0.0002097367327537498,
      "loss": 3.0698,
      "step": 137615
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.244023561477661,
      "learning_rate": 0.0002097328317452536,
      "loss": 2.8961,
      "step": 137616
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.526707887649536,
      "learning_rate": 0.00020972893075353981,
      "loss": 2.6165,
      "step": 137617
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8211839199066162,
      "learning_rate": 0.0002097250297786092,
      "loss": 3.094,
      "step": 137618
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2863688468933105,
      "learning_rate": 0.00020972112882046236,
      "loss": 2.8232,
      "step": 137619
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3713531494140625,
      "learning_rate": 0.00020971722787910003,
      "loss": 2.9619,
      "step": 137620
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.236804485321045,
      "learning_rate": 0.00020971332695452302,
      "loss": 3.0438,
      "step": 137621
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1064748764038086,
      "learning_rate": 0.00020970942604673203,
      "loss": 3.1466,
      "step": 137622
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.67414927482605,
      "learning_rate": 0.00020970552515572771,
      "loss": 3.1944,
      "step": 137623
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.30617094039917,
      "learning_rate": 0.0002097016242815109,
      "loss": 3.3534,
      "step": 137624
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.257094621658325,
      "learning_rate": 0.00020969772342408218,
      "loss": 2.9263,
      "step": 137625
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.5244665145874023,
      "learning_rate": 0.0002096938225834424,
      "loss": 2.9999,
      "step": 137626
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9543647766113281,
      "learning_rate": 0.00020968992175959217,
      "loss": 3.2738,
      "step": 137627
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.319020986557007,
      "learning_rate": 0.00020968602095253235,
      "loss": 3.075,
      "step": 137628
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.057023525238037,
      "learning_rate": 0.00020968212016226362,
      "loss": 3.1127,
      "step": 137629
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.7237515449523926,
      "learning_rate": 0.0002096782193887868,
      "loss": 2.8881,
      "step": 137630
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9837532043457031,
      "learning_rate": 0.00020967431863210234,
      "loss": 2.818,
      "step": 137631
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.0690298080444336,
      "learning_rate": 0.00020967041789221117,
      "loss": 2.9874,
      "step": 137632
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.8340494632720947,
      "learning_rate": 0.00020966651716911394,
      "loss": 2.9019,
      "step": 137633
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.9846746921539307,
      "learning_rate": 0.0002096626164628114,
      "loss": 2.8441,
      "step": 137634
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.603295087814331,
      "learning_rate": 0.00020965871577330436,
      "loss": 3.0861,
      "step": 137635
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.197903633117676,
      "learning_rate": 0.00020965481510059353,
      "loss": 2.942,
      "step": 137636
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.2503409385681152,
      "learning_rate": 0.00020965091444467946,
      "loss": 2.8676,
      "step": 137637
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8730639219284058,
      "learning_rate": 0.000209647013805563,
      "loss": 3.0658,
      "step": 137638
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.399919271469116,
      "learning_rate": 0.00020964311318324487,
      "loss": 2.8583,
      "step": 137639
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9918951988220215,
      "learning_rate": 0.00020963921257772576,
      "loss": 2.9631,
      "step": 137640
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.69277286529541,
      "learning_rate": 0.00020963531198900647,
      "loss": 2.7446,
      "step": 137641
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.1631217002868652,
      "learning_rate": 0.0002096314114170878,
      "loss": 3.0601,
      "step": 137642
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.560856580734253,
      "learning_rate": 0.00020962751086197018,
      "loss": 2.9717,
      "step": 137643
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.240060329437256,
      "learning_rate": 0.00020962361032365458,
      "loss": 2.8238,
      "step": 137644
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.7600631713867188,
      "learning_rate": 0.0002096197098021416,
      "loss": 3.1343,
      "step": 137645
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.118063449859619,
      "learning_rate": 0.00020961580929743204,
      "loss": 2.9755,
      "step": 137646
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.020993947982788,
      "learning_rate": 0.00020961190880952664,
      "loss": 3.0849,
      "step": 137647
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0259788036346436,
      "learning_rate": 0.0002096080083384262,
      "loss": 2.8729,
      "step": 137648
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.614046335220337,
      "learning_rate": 0.0002096041078841312,
      "loss": 2.837,
      "step": 137649
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.789839506149292,
      "learning_rate": 0.00020960020744664252,
      "loss": 2.9463,
      "step": 137650
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2587225437164307,
      "learning_rate": 0.00020959630702596085,
      "loss": 3.1402,
      "step": 137651
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.662280321121216,
      "learning_rate": 0.00020959240662208692,
      "loss": 3.0469,
      "step": 137652
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.1705784797668457,
      "learning_rate": 0.0002095885062350215,
      "loss": 3.0435,
      "step": 137653
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.21809983253479,
      "learning_rate": 0.00020958460586476538,
      "loss": 3.1986,
      "step": 137654
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.5131750106811523,
      "learning_rate": 0.00020958070551131908,
      "loss": 2.9721,
      "step": 137655
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.813382863998413,
      "learning_rate": 0.00020957680517468344,
      "loss": 2.9947,
      "step": 137656
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.288569927215576,
      "learning_rate": 0.00020957290485485916,
      "loss": 2.8303,
      "step": 137657
    },
    {
      "epoch": 1.79,
      "grad_norm": 4.332691669464111,
      "learning_rate": 0.00020956900455184702,
      "loss": 3.0485,
      "step": 137658
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.8476598262786865,
      "learning_rate": 0.00020956510426564767,
      "loss": 3.0808,
      "step": 137659
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3623409271240234,
      "learning_rate": 0.0002095612039962619,
      "loss": 2.8631,
      "step": 137660
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.205305337905884,
      "learning_rate": 0.00020955730374369052,
      "loss": 3.0456,
      "step": 137661
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1767702102661133,
      "learning_rate": 0.00020955340350793403,
      "loss": 2.8888,
      "step": 137662
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.114316940307617,
      "learning_rate": 0.00020954950328899323,
      "loss": 3.3438,
      "step": 137663
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.32616925239563,
      "learning_rate": 0.00020954560308686894,
      "loss": 3.078,
      "step": 137664
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1040632724761963,
      "learning_rate": 0.0002095417029015618,
      "loss": 2.8683,
      "step": 137665
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.741037130355835,
      "learning_rate": 0.00020953780273307254,
      "loss": 3.0206,
      "step": 137666
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.127267599105835,
      "learning_rate": 0.0002095339025814021,
      "loss": 2.916,
      "step": 137667
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.0234909057617188,
      "learning_rate": 0.00020953000244655084,
      "loss": 2.968,
      "step": 137668
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.237830400466919,
      "learning_rate": 0.00020952610232851968,
      "loss": 2.8453,
      "step": 137669
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2989978790283203,
      "learning_rate": 0.00020952220222730932,
      "loss": 2.8244,
      "step": 137670
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.729490041732788,
      "learning_rate": 0.0002095183021429205,
      "loss": 3.1691,
      "step": 137671
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.912925958633423,
      "learning_rate": 0.00020951440207535391,
      "loss": 2.89,
      "step": 137672
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1854217052459717,
      "learning_rate": 0.00020951050202461046,
      "loss": 2.8851,
      "step": 137673
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4614439010620117,
      "learning_rate": 0.00020950660199069057,
      "loss": 3.0793,
      "step": 137674
    },
    {
      "epoch": 1.79,
      "grad_norm": 4.920124053955078,
      "learning_rate": 0.0002095027019735951,
      "loss": 2.9166,
      "step": 137675
    },
    {
      "epoch": 1.79,
      "grad_norm": 4.366616725921631,
      "learning_rate": 0.00020949880197332482,
      "loss": 2.916,
      "step": 137676
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.4188051223754883,
      "learning_rate": 0.0002094949019898804,
      "loss": 2.873,
      "step": 137677
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.7707815170288086,
      "learning_rate": 0.0002094910020232626,
      "loss": 2.9755,
      "step": 137678
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.172938346862793,
      "learning_rate": 0.00020948710207347224,
      "loss": 2.991,
      "step": 137679
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.7146053314208984,
      "learning_rate": 0.00020948320214050984,
      "loss": 2.9646,
      "step": 137680
    },
    {
      "epoch": 1.79,
      "grad_norm": 4.009091854095459,
      "learning_rate": 0.00020947930222437619,
      "loss": 2.9729,
      "step": 137681
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.365201473236084,
      "learning_rate": 0.00020947540232507206,
      "loss": 2.8745,
      "step": 137682
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1250035762786865,
      "learning_rate": 0.00020947150244259816,
      "loss": 2.9342,
      "step": 137683
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.50280499458313,
      "learning_rate": 0.0002094676025769552,
      "loss": 3.1888,
      "step": 137684
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0853114128112793,
      "learning_rate": 0.00020946370272814405,
      "loss": 2.7715,
      "step": 137685
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1056954860687256,
      "learning_rate": 0.00020945980289616516,
      "loss": 3.1183,
      "step": 137686
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.772982597351074,
      "learning_rate": 0.00020945590308101946,
      "loss": 3.0058,
      "step": 137687
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.720248222351074,
      "learning_rate": 0.0002094520032827076,
      "loss": 2.9987,
      "step": 137688
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.939113140106201,
      "learning_rate": 0.0002094481035012303,
      "loss": 2.8837,
      "step": 137689
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.191739559173584,
      "learning_rate": 0.00020944420373658832,
      "loss": 3.2594,
      "step": 137690
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.240917682647705,
      "learning_rate": 0.00020944030398878244,
      "loss": 2.837,
      "step": 137691
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0244076251983643,
      "learning_rate": 0.00020943640425781322,
      "loss": 3.0865,
      "step": 137692
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3208205699920654,
      "learning_rate": 0.0002094325045436816,
      "loss": 3.0394,
      "step": 137693
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.0917747020721436,
      "learning_rate": 0.0002094286048463881,
      "loss": 2.8309,
      "step": 137694
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.5541083812713623,
      "learning_rate": 0.0002094247051659335,
      "loss": 3.0752,
      "step": 137695
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.291430950164795,
      "learning_rate": 0.0002094208055023186,
      "loss": 3.1378,
      "step": 137696
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.211642026901245,
      "learning_rate": 0.00020941690585554411,
      "loss": 2.8848,
      "step": 137697
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.3292338848114014,
      "learning_rate": 0.00020941300622561067,
      "loss": 3.0739,
      "step": 137698
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.141714096069336,
      "learning_rate": 0.0002094091066125191,
      "loss": 3.0059,
      "step": 137699
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.466552257537842,
      "learning_rate": 0.00020940520701627014,
      "loss": 3.004,
      "step": 137700
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.6065704822540283,
      "learning_rate": 0.00020940130743686436,
      "loss": 3.0166,
      "step": 137701
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9582794904708862,
      "learning_rate": 0.0002093974078743026,
      "loss": 2.896,
      "step": 137702
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2746217250823975,
      "learning_rate": 0.00020939350832858568,
      "loss": 2.8733,
      "step": 137703
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8170567750930786,
      "learning_rate": 0.00020938960879971413,
      "loss": 2.8075,
      "step": 137704
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9152977466583252,
      "learning_rate": 0.00020938570928768872,
      "loss": 2.9862,
      "step": 137705
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.8619422912597656,
      "learning_rate": 0.0002093818097925103,
      "loss": 2.9492,
      "step": 137706
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9189928770065308,
      "learning_rate": 0.00020937791031417952,
      "loss": 2.815,
      "step": 137707
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9323457479476929,
      "learning_rate": 0.00020937401085269701,
      "loss": 2.9916,
      "step": 137708
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.97048819065094,
      "learning_rate": 0.0002093701114080637,
      "loss": 2.8906,
      "step": 137709
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3214170932769775,
      "learning_rate": 0.00020936621198028006,
      "loss": 2.8884,
      "step": 137710
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8840174674987793,
      "learning_rate": 0.00020936231256934702,
      "loss": 2.915,
      "step": 137711
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8972456455230713,
      "learning_rate": 0.00020935841317526523,
      "loss": 3.1073,
      "step": 137712
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2910313606262207,
      "learning_rate": 0.0002093545137980354,
      "loss": 2.739,
      "step": 137713
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.271402597427368,
      "learning_rate": 0.00020935061443765835,
      "loss": 3.1849,
      "step": 137714
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.783147096633911,
      "learning_rate": 0.00020934671509413472,
      "loss": 2.9358,
      "step": 137715
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.959823489189148,
      "learning_rate": 0.00020934281576746525,
      "loss": 3.046,
      "step": 137716
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.5160984992980957,
      "learning_rate": 0.0002093389164576506,
      "loss": 2.9123,
      "step": 137717
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.8840010166168213,
      "learning_rate": 0.00020933501716469154,
      "loss": 2.8931,
      "step": 137718
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9167563915252686,
      "learning_rate": 0.00020933111788858883,
      "loss": 2.9015,
      "step": 137719
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9638564586639404,
      "learning_rate": 0.00020932721862934318,
      "loss": 3.0284,
      "step": 137720
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.217088222503662,
      "learning_rate": 0.00020932331938695544,
      "loss": 2.9303,
      "step": 137721
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.892165184020996,
      "learning_rate": 0.0002093194201614261,
      "loss": 2.8805,
      "step": 137722
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9943898916244507,
      "learning_rate": 0.00020931552095275593,
      "loss": 3.2376,
      "step": 137723
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0991921424865723,
      "learning_rate": 0.0002093116217609458,
      "loss": 3.1392,
      "step": 137724
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.609163999557495,
      "learning_rate": 0.0002093077225859963,
      "loss": 3.0397,
      "step": 137725
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.40169620513916,
      "learning_rate": 0.00020930382342790823,
      "loss": 3.3542,
      "step": 137726
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0698368549346924,
      "learning_rate": 0.00020929992428668227,
      "loss": 2.9981,
      "step": 137727
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1569273471832275,
      "learning_rate": 0.00020929602516231931,
      "loss": 3.0293,
      "step": 137728
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3071093559265137,
      "learning_rate": 0.00020929212605481977,
      "loss": 3.1548,
      "step": 137729
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2697913646698,
      "learning_rate": 0.00020928822696418455,
      "loss": 3.0561,
      "step": 137730
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.5494258403778076,
      "learning_rate": 0.0002092843278904144,
      "loss": 3.0456,
      "step": 137731
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2396984100341797,
      "learning_rate": 0.00020928042883350994,
      "loss": 3.1085,
      "step": 137732
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.046506643295288,
      "learning_rate": 0.00020927652979347205,
      "loss": 2.7944,
      "step": 137733
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9229611158370972,
      "learning_rate": 0.00020927263077030143,
      "loss": 3.0216,
      "step": 137734
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9453105926513672,
      "learning_rate": 0.00020926873176399863,
      "loss": 2.9193,
      "step": 137735
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.182126522064209,
      "learning_rate": 0.00020926483277456444,
      "loss": 2.848,
      "step": 137736
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0184125900268555,
      "learning_rate": 0.0002092609338019997,
      "loss": 2.9391,
      "step": 137737
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.436171531677246,
      "learning_rate": 0.00020925703484630503,
      "loss": 2.8366,
      "step": 137738
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1115171909332275,
      "learning_rate": 0.00020925313590748122,
      "loss": 3.0014,
      "step": 137739
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9661946296691895,
      "learning_rate": 0.00020924923698552905,
      "loss": 2.9524,
      "step": 137740
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.7509806156158447,
      "learning_rate": 0.00020924533808044905,
      "loss": 3.0658,
      "step": 137741
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3343300819396973,
      "learning_rate": 0.00020924143919224202,
      "loss": 2.8795,
      "step": 137742
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1157240867614746,
      "learning_rate": 0.00020923754032090879,
      "loss": 3.2355,
      "step": 137743
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.619729518890381,
      "learning_rate": 0.00020923364146644997,
      "loss": 3.166,
      "step": 137744
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.8956072330474854,
      "learning_rate": 0.00020922974262886635,
      "loss": 2.9104,
      "step": 137745
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.6451146602630615,
      "learning_rate": 0.00020922584380815877,
      "loss": 2.9447,
      "step": 137746
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.6247658729553223,
      "learning_rate": 0.00020922194500432766,
      "loss": 3.0385,
      "step": 137747
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.131284475326538,
      "learning_rate": 0.0002092180462173739,
      "loss": 3.1265,
      "step": 137748
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.923779845237732,
      "learning_rate": 0.00020921414744729826,
      "loss": 3.0895,
      "step": 137749
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.6159892082214355,
      "learning_rate": 0.00020921024869410138,
      "loss": 2.7664,
      "step": 137750
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.9166717529296875,
      "learning_rate": 0.00020920634995778405,
      "loss": 2.9803,
      "step": 137751
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.509369373321533,
      "learning_rate": 0.00020920245123834708,
      "loss": 2.8822,
      "step": 137752
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.654587984085083,
      "learning_rate": 0.000209198552535791,
      "loss": 2.995,
      "step": 137753
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1430442333221436,
      "learning_rate": 0.0002091946538501166,
      "loss": 3.155,
      "step": 137754
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.699037551879883,
      "learning_rate": 0.00020919075518132464,
      "loss": 2.7837,
      "step": 137755
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.949159860610962,
      "learning_rate": 0.00020918685652941582,
      "loss": 2.8031,
      "step": 137756
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8638070821762085,
      "learning_rate": 0.00020918295789439087,
      "loss": 3.0535,
      "step": 137757
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.5038514137268066,
      "learning_rate": 0.00020917905927625067,
      "loss": 2.9611,
      "step": 137758
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.179680585861206,
      "learning_rate": 0.00020917516067499566,
      "loss": 3.0547,
      "step": 137759
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.502624750137329,
      "learning_rate": 0.0002091712620906267,
      "loss": 2.8519,
      "step": 137760
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3132691383361816,
      "learning_rate": 0.0002091673635231445,
      "loss": 3.1546,
      "step": 137761
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.387427806854248,
      "learning_rate": 0.0002091634649725498,
      "loss": 2.8274,
      "step": 137762
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.959679126739502,
      "learning_rate": 0.0002091595664388434,
      "loss": 2.8407,
      "step": 137763
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.999872088432312,
      "learning_rate": 0.00020915566792202602,
      "loss": 2.9963,
      "step": 137764
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.609452724456787,
      "learning_rate": 0.00020915176942209817,
      "loss": 3.0541,
      "step": 137765
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.0339465141296387,
      "learning_rate": 0.00020914787093906077,
      "loss": 3.166,
      "step": 137766
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.05532169342041,
      "learning_rate": 0.0002091439724729145,
      "loss": 3.126,
      "step": 137767
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4707415103912354,
      "learning_rate": 0.00020914007402366,
      "loss": 2.8489,
      "step": 137768
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.5041322708129883,
      "learning_rate": 0.00020913617559129813,
      "loss": 2.8574,
      "step": 137769
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.6896603107452393,
      "learning_rate": 0.0002091322771758297,
      "loss": 2.9024,
      "step": 137770
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.183831214904785,
      "learning_rate": 0.00020912837877725513,
      "loss": 2.7463,
      "step": 137771
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9229003190994263,
      "learning_rate": 0.00020912448039557536,
      "loss": 2.8312,
      "step": 137772
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.633577823638916,
      "learning_rate": 0.00020912058203079103,
      "loss": 2.8397,
      "step": 137773
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2097177505493164,
      "learning_rate": 0.0002091166836829029,
      "loss": 2.9434,
      "step": 137774
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1524484157562256,
      "learning_rate": 0.00020911278535191172,
      "loss": 2.873,
      "step": 137775
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.8778111934661865,
      "learning_rate": 0.00020910888703781823,
      "loss": 2.6668,
      "step": 137776
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.106849193572998,
      "learning_rate": 0.0002091049887406231,
      "loss": 2.775,
      "step": 137777
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9701454639434814,
      "learning_rate": 0.00020910109046032703,
      "loss": 3.1789,
      "step": 137778
    },
    {
      "epoch": 1.79,
      "grad_norm": 4.861339092254639,
      "learning_rate": 0.00020909719219693075,
      "loss": 2.9139,
      "step": 137779
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.243199586868286,
      "learning_rate": 0.00020909329395043508,
      "loss": 2.958,
      "step": 137780
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.6308255195617676,
      "learning_rate": 0.00020908939572084062,
      "loss": 2.8779,
      "step": 137781
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8860348463058472,
      "learning_rate": 0.00020908549750814825,
      "loss": 2.8867,
      "step": 137782
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0961952209472656,
      "learning_rate": 0.00020908159931235854,
      "loss": 3.0697,
      "step": 137783
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.701472520828247,
      "learning_rate": 0.00020907770113347233,
      "loss": 2.7234,
      "step": 137784
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4275667667388916,
      "learning_rate": 0.00020907380297149027,
      "loss": 2.6935,
      "step": 137785
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8032492399215698,
      "learning_rate": 0.00020906990482641303,
      "loss": 2.9212,
      "step": 137786
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3608286380767822,
      "learning_rate": 0.00020906600669824148,
      "loss": 2.8127,
      "step": 137787
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.211320638656616,
      "learning_rate": 0.00020906210858697628,
      "loss": 3.0882,
      "step": 137788
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3199143409729004,
      "learning_rate": 0.00020905821049261813,
      "loss": 3.0017,
      "step": 137789
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1957080364227295,
      "learning_rate": 0.00020905431241516776,
      "loss": 2.8663,
      "step": 137790
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0218818187713623,
      "learning_rate": 0.00020905041435462587,
      "loss": 3.0211,
      "step": 137791
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9556628465652466,
      "learning_rate": 0.00020904651631099337,
      "loss": 3.1309,
      "step": 137792
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.190200090408325,
      "learning_rate": 0.00020904261828427073,
      "loss": 2.9981,
      "step": 137793
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9703506231307983,
      "learning_rate": 0.0002090387202744588,
      "loss": 3.1903,
      "step": 137794
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8603965044021606,
      "learning_rate": 0.00020903482228155833,
      "loss": 2.8641,
      "step": 137795
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.871014952659607,
      "learning_rate": 0.00020903092430556996,
      "loss": 2.8909,
      "step": 137796
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9848846197128296,
      "learning_rate": 0.00020902702634649443,
      "loss": 2.9221,
      "step": 137797
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.033080816268921,
      "learning_rate": 0.00020902312840433252,
      "loss": 3.0375,
      "step": 137798
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1826720237731934,
      "learning_rate": 0.00020901923047908502,
      "loss": 3.1069,
      "step": 137799
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9706227779388428,
      "learning_rate": 0.00020901533257075243,
      "loss": 3.0594,
      "step": 137800
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9616734981536865,
      "learning_rate": 0.00020901143467933573,
      "loss": 3.1173,
      "step": 137801
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0019896030426025,
      "learning_rate": 0.00020900753680483543,
      "loss": 3.0329,
      "step": 137802
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.938611388206482,
      "learning_rate": 0.00020900363894725234,
      "loss": 3.0753,
      "step": 137803
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.7634222507476807,
      "learning_rate": 0.0002089997411065872,
      "loss": 2.9599,
      "step": 137804
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1170995235443115,
      "learning_rate": 0.00020899584328284073,
      "loss": 3.0833,
      "step": 137805
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.1307239532470703,
      "learning_rate": 0.0002089919454760137,
      "loss": 2.9217,
      "step": 137806
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.5451741218566895,
      "learning_rate": 0.00020898804768610678,
      "loss": 2.8644,
      "step": 137807
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1892638206481934,
      "learning_rate": 0.00020898414991312068,
      "loss": 2.864,
      "step": 137808
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8630554676055908,
      "learning_rate": 0.0002089802521570561,
      "loss": 3.1154,
      "step": 137809
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.445070266723633,
      "learning_rate": 0.00020897635441791387,
      "loss": 3.119,
      "step": 137810
    },
    {
      "epoch": 1.79,
      "grad_norm": 4.418510913848877,
      "learning_rate": 0.00020897245669569459,
      "loss": 2.9586,
      "step": 137811
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3467772006988525,
      "learning_rate": 0.00020896855899039908,
      "loss": 2.8446,
      "step": 137812
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.438882350921631,
      "learning_rate": 0.00020896466130202815,
      "loss": 2.9619,
      "step": 137813
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2096638679504395,
      "learning_rate": 0.00020896076363058228,
      "loss": 2.8943,
      "step": 137814
    },
    {
      "epoch": 1.79,
      "grad_norm": 5.209461212158203,
      "learning_rate": 0.0002089568659760623,
      "loss": 2.8657,
      "step": 137815
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9886927604675293,
      "learning_rate": 0.000208952968338469,
      "loss": 2.7488,
      "step": 137816
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.321042537689209,
      "learning_rate": 0.00020894907071780303,
      "loss": 3.1259,
      "step": 137817
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.089024782180786,
      "learning_rate": 0.00020894517311406517,
      "loss": 3.075,
      "step": 137818
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.9613406658172607,
      "learning_rate": 0.00020894127552725623,
      "loss": 3.0833,
      "step": 137819
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.6458144187927246,
      "learning_rate": 0.00020893737795737674,
      "loss": 2.9466,
      "step": 137820
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.456127166748047,
      "learning_rate": 0.00020893348040442743,
      "loss": 2.9034,
      "step": 137821
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8777778148651123,
      "learning_rate": 0.00020892958286840914,
      "loss": 2.8843,
      "step": 137822
    },
    {
      "epoch": 1.79,
      "grad_norm": 4.006077289581299,
      "learning_rate": 0.00020892568534932262,
      "loss": 2.9983,
      "step": 137823
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.192669630050659,
      "learning_rate": 0.00020892178784716846,
      "loss": 3.1128,
      "step": 137824
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.1420695781707764,
      "learning_rate": 0.00020891789036194764,
      "loss": 3.0479,
      "step": 137825
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3446433544158936,
      "learning_rate": 0.00020891399289366054,
      "loss": 2.8589,
      "step": 137826
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.3988821506500244,
      "learning_rate": 0.00020891009544230803,
      "loss": 2.6883,
      "step": 137827
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.6123478412628174,
      "learning_rate": 0.00020890619800789087,
      "loss": 2.8872,
      "step": 137828
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9791076183319092,
      "learning_rate": 0.0002089023005904098,
      "loss": 2.9432,
      "step": 137829
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.3098294734954834,
      "learning_rate": 0.0002088984031898655,
      "loss": 2.8797,
      "step": 137830
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.042555093765259,
      "learning_rate": 0.0002088945058062588,
      "loss": 2.9413,
      "step": 137831
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.198362112045288,
      "learning_rate": 0.00020889060843959023,
      "loss": 3.0752,
      "step": 137832
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.477540969848633,
      "learning_rate": 0.00020888671108986058,
      "loss": 2.8122,
      "step": 137833
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4318923950195312,
      "learning_rate": 0.00020888281375707068,
      "loss": 2.6992,
      "step": 137834
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.982300043106079,
      "learning_rate": 0.00020887891644122113,
      "loss": 3.3188,
      "step": 137835
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.4091591835021973,
      "learning_rate": 0.00020887501914231273,
      "loss": 3.117,
      "step": 137836
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.50634503364563,
      "learning_rate": 0.00020887112186034632,
      "loss": 2.8227,
      "step": 137837
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9574315547943115,
      "learning_rate": 0.00020886722459532235,
      "loss": 2.809,
      "step": 137838
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.8615531921386719,
      "learning_rate": 0.00020886332734724165,
      "loss": 2.7896,
      "step": 137839
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.554389238357544,
      "learning_rate": 0.00020885943011610502,
      "loss": 2.9643,
      "step": 137840
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.5548102855682373,
      "learning_rate": 0.00020885553290191311,
      "loss": 2.9013,
      "step": 137841
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.7508902549743652,
      "learning_rate": 0.00020885163570466675,
      "loss": 3.0518,
      "step": 137842
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.24504017829895,
      "learning_rate": 0.00020884773852436665,
      "loss": 2.7722,
      "step": 137843
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.154728412628174,
      "learning_rate": 0.00020884384136101336,
      "loss": 2.9603,
      "step": 137844
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9155839681625366,
      "learning_rate": 0.00020883994421460773,
      "loss": 2.9594,
      "step": 137845
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.392341136932373,
      "learning_rate": 0.00020883604708515045,
      "loss": 3.1744,
      "step": 137846
    },
    {
      "epoch": 1.79,
      "grad_norm": 1.9412059783935547,
      "learning_rate": 0.00020883214997264232,
      "loss": 3.0673,
      "step": 137847
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.453580856323242,
      "learning_rate": 0.00020882825287708395,
      "loss": 2.6687,
      "step": 137848
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.324655055999756,
      "learning_rate": 0.0002088243557984763,
      "loss": 3.2268,
      "step": 137849
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.0877439975738525,
      "learning_rate": 0.0002088204587368198,
      "loss": 2.8547,
      "step": 137850
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.449846029281616,
      "learning_rate": 0.00020881656169211528,
      "loss": 2.7581,
      "step": 137851
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.9680395126342773,
      "learning_rate": 0.00020881266466436348,
      "loss": 3.2264,
      "step": 137852
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2140707969665527,
      "learning_rate": 0.00020880876765356514,
      "loss": 3.218,
      "step": 137853
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.2289295196533203,
      "learning_rate": 0.00020880487065972098,
      "loss": 2.7464,
      "step": 137854
    },
    {
      "epoch": 1.79,
      "grad_norm": 2.5193192958831787,
      "learning_rate": 0.0002088009736828317,
      "loss": 2.8073,
      "step": 137855
    },
    {
      "epoch": 1.79,
      "grad_norm": 4.364963531494141,
      "learning_rate": 0.00020879707672289818,
      "loss": 2.9541,
      "step": 137856
    },
    {
      "epoch": 1.79,
      "grad_norm": 3.1591532230377197,
      "learning_rate": 0.00020879317977992088,
      "loss": 2.841,
      "step": 137857
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.766788959503174,
      "learning_rate": 0.0002087892828539006,
      "loss": 2.8853,
      "step": 137858
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.1735377311706543,
      "learning_rate": 0.00020878538594483817,
      "loss": 3.0209,
      "step": 137859
    },
    {
      "epoch": 1.8,
      "grad_norm": 4.529143810272217,
      "learning_rate": 0.00020878148905273421,
      "loss": 3.0544,
      "step": 137860
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.4637198448181152,
      "learning_rate": 0.00020877759217758954,
      "loss": 2.911,
      "step": 137861
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.089794397354126,
      "learning_rate": 0.000208773695319405,
      "loss": 2.8112,
      "step": 137862
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8227026462554932,
      "learning_rate": 0.00020876979847818093,
      "loss": 2.8806,
      "step": 137863
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5108535289764404,
      "learning_rate": 0.00020876590165391833,
      "loss": 3.0428,
      "step": 137864
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.6928622722625732,
      "learning_rate": 0.00020876200484661782,
      "loss": 2.8835,
      "step": 137865
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.445981502532959,
      "learning_rate": 0.00020875810805628026,
      "loss": 3.0203,
      "step": 137866
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.619386672973633,
      "learning_rate": 0.00020875421128290622,
      "loss": 2.9226,
      "step": 137867
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.2794997692108154,
      "learning_rate": 0.0002087503145264966,
      "loss": 3.1124,
      "step": 137868
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.9021878242492676,
      "learning_rate": 0.000208746417787052,
      "loss": 2.8834,
      "step": 137869
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7270452976226807,
      "learning_rate": 0.00020874252106457307,
      "loss": 2.8194,
      "step": 137870
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.174696207046509,
      "learning_rate": 0.0002087386243590607,
      "loss": 2.9023,
      "step": 137871
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3240082263946533,
      "learning_rate": 0.0002087347276705155,
      "loss": 3.1962,
      "step": 137872
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.421436071395874,
      "learning_rate": 0.00020873083099893824,
      "loss": 2.9377,
      "step": 137873
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9576812982559204,
      "learning_rate": 0.00020872693434432972,
      "loss": 2.9207,
      "step": 137874
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8516080379486084,
      "learning_rate": 0.00020872303770669049,
      "loss": 2.754,
      "step": 137875
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4562911987304688,
      "learning_rate": 0.00020871914108602142,
      "loss": 2.9757,
      "step": 137876
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1032156944274902,
      "learning_rate": 0.0002087152444823232,
      "loss": 2.7903,
      "step": 137877
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8980774879455566,
      "learning_rate": 0.00020871134789559646,
      "loss": 3.0245,
      "step": 137878
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.893823266029358,
      "learning_rate": 0.00020870745132584205,
      "loss": 2.9421,
      "step": 137879
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9948590993881226,
      "learning_rate": 0.00020870355477306069,
      "loss": 2.8409,
      "step": 137880
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.750324249267578,
      "learning_rate": 0.00020869965823725302,
      "loss": 2.881,
      "step": 137881
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.123169183731079,
      "learning_rate": 0.0002086957617184198,
      "loss": 2.9481,
      "step": 137882
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.028789758682251,
      "learning_rate": 0.00020869186521656172,
      "loss": 3.1283,
      "step": 137883
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3988561630249023,
      "learning_rate": 0.00020868796873167966,
      "loss": 2.7489,
      "step": 137884
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9633034467697144,
      "learning_rate": 0.00020868407226377417,
      "loss": 2.9956,
      "step": 137885
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.7996952533721924,
      "learning_rate": 0.00020868017581284607,
      "loss": 3.0085,
      "step": 137886
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1988437175750732,
      "learning_rate": 0.00020867627937889597,
      "loss": 2.9351,
      "step": 137887
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9908215999603271,
      "learning_rate": 0.0002086723829619247,
      "loss": 2.8776,
      "step": 137888
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8610615730285645,
      "learning_rate": 0.00020866848656193298,
      "loss": 3.0187,
      "step": 137889
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1526482105255127,
      "learning_rate": 0.00020866459017892147,
      "loss": 2.9316,
      "step": 137890
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.193556070327759,
      "learning_rate": 0.00020866069381289103,
      "loss": 3.072,
      "step": 137891
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.981610655784607,
      "learning_rate": 0.00020865679746384224,
      "loss": 3.0374,
      "step": 137892
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.965758800506592,
      "learning_rate": 0.00020865290113177587,
      "loss": 2.8136,
      "step": 137893
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.027616024017334,
      "learning_rate": 0.00020864900481669263,
      "loss": 3.0561,
      "step": 137894
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0339362621307373,
      "learning_rate": 0.00020864510851859326,
      "loss": 3.0106,
      "step": 137895
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3645429611206055,
      "learning_rate": 0.0002086412122374785,
      "loss": 3.1151,
      "step": 137896
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.420227289199829,
      "learning_rate": 0.00020863731597334906,
      "loss": 2.9402,
      "step": 137897
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0434556007385254,
      "learning_rate": 0.0002086334197262058,
      "loss": 2.9363,
      "step": 137898
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1096479892730713,
      "learning_rate": 0.0002086295234960492,
      "loss": 2.9566,
      "step": 137899
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2398688793182373,
      "learning_rate": 0.00020862562728288006,
      "loss": 3.1057,
      "step": 137900
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.8415069580078125,
      "learning_rate": 0.00020862173108669916,
      "loss": 2.7929,
      "step": 137901
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.267545461654663,
      "learning_rate": 0.0002086178349075072,
      "loss": 3.1122,
      "step": 137902
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6255877017974854,
      "learning_rate": 0.0002086139387453049,
      "loss": 2.7266,
      "step": 137903
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.641152858734131,
      "learning_rate": 0.0002086100426000931,
      "loss": 3.0381,
      "step": 137904
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9704232215881348,
      "learning_rate": 0.00020860614647187232,
      "loss": 2.9635,
      "step": 137905
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.779250383377075,
      "learning_rate": 0.0002086022503606434,
      "loss": 3.1361,
      "step": 137906
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6422886848449707,
      "learning_rate": 0.00020859835426640706,
      "loss": 3.0129,
      "step": 137907
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.1018176078796387,
      "learning_rate": 0.00020859445818916395,
      "loss": 2.9821,
      "step": 137908
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.30926251411438,
      "learning_rate": 0.0002085905621289149,
      "loss": 3.0095,
      "step": 137909
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3893041610717773,
      "learning_rate": 0.00020858666608566072,
      "loss": 2.9825,
      "step": 137910
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.947230577468872,
      "learning_rate": 0.00020858277005940184,
      "loss": 3.0225,
      "step": 137911
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.3574025630950928,
      "learning_rate": 0.00020857887405013919,
      "loss": 2.9692,
      "step": 137912
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.734246015548706,
      "learning_rate": 0.00020857497805787343,
      "loss": 2.9199,
      "step": 137913
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.034080982208252,
      "learning_rate": 0.0002085710820826053,
      "loss": 2.8089,
      "step": 137914
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5155458450317383,
      "learning_rate": 0.00020856718612433556,
      "loss": 2.7671,
      "step": 137915
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.897080659866333,
      "learning_rate": 0.00020856329018306498,
      "loss": 3.0869,
      "step": 137916
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7974116802215576,
      "learning_rate": 0.00020855939425879413,
      "loss": 2.9453,
      "step": 137917
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.242630958557129,
      "learning_rate": 0.0002085554983515238,
      "loss": 3.1676,
      "step": 137918
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4148097038269043,
      "learning_rate": 0.00020855160246125476,
      "loss": 2.8465,
      "step": 137919
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.268080711364746,
      "learning_rate": 0.00020854770658798767,
      "loss": 2.8259,
      "step": 137920
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0217342376708984,
      "learning_rate": 0.00020854381073172332,
      "loss": 2.6544,
      "step": 137921
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.283869743347168,
      "learning_rate": 0.00020853991489246234,
      "loss": 2.7254,
      "step": 137922
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.026876449584961,
      "learning_rate": 0.00020853601907020567,
      "loss": 2.7012,
      "step": 137923
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.257568836212158,
      "learning_rate": 0.0002085321232649538,
      "loss": 2.9266,
      "step": 137924
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.604344129562378,
      "learning_rate": 0.00020852822747670745,
      "loss": 2.8369,
      "step": 137925
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.178344964981079,
      "learning_rate": 0.0002085243317054675,
      "loss": 2.8726,
      "step": 137926
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3412537574768066,
      "learning_rate": 0.00020852043595123457,
      "loss": 2.9654,
      "step": 137927
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.7941006422042847,
      "learning_rate": 0.00020851654021400944,
      "loss": 2.9908,
      "step": 137928
    },
    {
      "epoch": 1.8,
      "grad_norm": 5.4055609703063965,
      "learning_rate": 0.00020851264449379291,
      "loss": 2.8673,
      "step": 137929
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7463042736053467,
      "learning_rate": 0.00020850874879058552,
      "loss": 2.7128,
      "step": 137930
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3917136192321777,
      "learning_rate": 0.00020850485310438803,
      "loss": 2.9875,
      "step": 137931
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9728554487228394,
      "learning_rate": 0.00020850095743520124,
      "loss": 2.9276,
      "step": 137932
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.263204574584961,
      "learning_rate": 0.00020849706178302589,
      "loss": 2.9068,
      "step": 137933
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.243962049484253,
      "learning_rate": 0.0002084931661478626,
      "loss": 2.9266,
      "step": 137934
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5015032291412354,
      "learning_rate": 0.00020848927052971228,
      "loss": 2.8251,
      "step": 137935
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9676423072814941,
      "learning_rate": 0.00020848537492857544,
      "loss": 2.9367,
      "step": 137936
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.07840633392334,
      "learning_rate": 0.0002084814793444529,
      "loss": 2.8926,
      "step": 137937
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2700436115264893,
      "learning_rate": 0.00020847758377734538,
      "loss": 2.704,
      "step": 137938
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.18603777885437,
      "learning_rate": 0.00020847368822725362,
      "loss": 3.0497,
      "step": 137939
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6354002952575684,
      "learning_rate": 0.00020846979269417828,
      "loss": 3.0773,
      "step": 137940
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.0568199157714844,
      "learning_rate": 0.0002084658971781203,
      "loss": 3.1031,
      "step": 137941
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.98849356174469,
      "learning_rate": 0.0002084620016790801,
      "loss": 2.8707,
      "step": 137942
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.385962724685669,
      "learning_rate": 0.00020845810619705854,
      "loss": 3.0188,
      "step": 137943
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.3289568424224854,
      "learning_rate": 0.0002084542107320563,
      "loss": 2.8302,
      "step": 137944
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.4197440147399902,
      "learning_rate": 0.0002084503152840742,
      "loss": 3.0021,
      "step": 137945
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0059926509857178,
      "learning_rate": 0.00020844641985311293,
      "loss": 2.8319,
      "step": 137946
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.375640869140625,
      "learning_rate": 0.0002084425244391733,
      "loss": 3.1534,
      "step": 137947
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.593047618865967,
      "learning_rate": 0.0002084386290422558,
      "loss": 2.948,
      "step": 137948
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.409566879272461,
      "learning_rate": 0.0002084347336623613,
      "loss": 2.8814,
      "step": 137949
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.7848336696624756,
      "learning_rate": 0.0002084308382994905,
      "loss": 3.0323,
      "step": 137950
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4984943866729736,
      "learning_rate": 0.00020842694295364415,
      "loss": 2.8398,
      "step": 137951
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4000232219696045,
      "learning_rate": 0.00020842304762482298,
      "loss": 3.0355,
      "step": 137952
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3383278846740723,
      "learning_rate": 0.00020841915231302772,
      "loss": 2.9303,
      "step": 137953
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5359854698181152,
      "learning_rate": 0.00020841525701825904,
      "loss": 2.9723,
      "step": 137954
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.9685797691345215,
      "learning_rate": 0.0002084113617405177,
      "loss": 2.7758,
      "step": 137955
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.355455160140991,
      "learning_rate": 0.00020840746647980438,
      "loss": 2.929,
      "step": 137956
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.074860095977783,
      "learning_rate": 0.00020840357123611984,
      "loss": 2.992,
      "step": 137957
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.0083115100860596,
      "learning_rate": 0.0002083996760094648,
      "loss": 3.0067,
      "step": 137958
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.623520851135254,
      "learning_rate": 0.0002083957807998401,
      "loss": 2.7393,
      "step": 137959
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.508089065551758,
      "learning_rate": 0.00020839188560724622,
      "loss": 3.1695,
      "step": 137960
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.531217336654663,
      "learning_rate": 0.00020838799043168413,
      "loss": 3.0525,
      "step": 137961
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.170832872390747,
      "learning_rate": 0.00020838409527315437,
      "loss": 2.8827,
      "step": 137962
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7678868770599365,
      "learning_rate": 0.00020838020013165773,
      "loss": 3.0057,
      "step": 137963
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.434903621673584,
      "learning_rate": 0.00020837630500719495,
      "loss": 2.9427,
      "step": 137964
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9185810089111328,
      "learning_rate": 0.0002083724098997668,
      "loss": 2.9223,
      "step": 137965
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9949510097503662,
      "learning_rate": 0.00020836851480937388,
      "loss": 2.9315,
      "step": 137966
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1025168895721436,
      "learning_rate": 0.00020836461973601698,
      "loss": 3.0406,
      "step": 137967
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3533833026885986,
      "learning_rate": 0.0002083607246796969,
      "loss": 2.8961,
      "step": 137968
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5245916843414307,
      "learning_rate": 0.0002083568296404142,
      "loss": 2.9459,
      "step": 137969
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6716525554656982,
      "learning_rate": 0.00020835293461816975,
      "loss": 2.9162,
      "step": 137970
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0521504878997803,
      "learning_rate": 0.00020834903961296426,
      "loss": 3.063,
      "step": 137971
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1695969104766846,
      "learning_rate": 0.00020834514462479833,
      "loss": 2.7063,
      "step": 137972
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.7556604146957397,
      "learning_rate": 0.00020834124965367275,
      "loss": 3.1272,
      "step": 137973
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1787376403808594,
      "learning_rate": 0.0002083373546995883,
      "loss": 2.89,
      "step": 137974
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.7443739175796509,
      "learning_rate": 0.00020833345976254566,
      "loss": 3.079,
      "step": 137975
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1222705841064453,
      "learning_rate": 0.00020832956484254564,
      "loss": 2.7774,
      "step": 137976
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.10018253326416,
      "learning_rate": 0.00020832566993958887,
      "loss": 2.9783,
      "step": 137977
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0794174671173096,
      "learning_rate": 0.000208321775053676,
      "loss": 2.938,
      "step": 137978
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.026237726211548,
      "learning_rate": 0.0002083178801848079,
      "loss": 3.2596,
      "step": 137979
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.967652440071106,
      "learning_rate": 0.0002083139853329852,
      "loss": 2.9428,
      "step": 137980
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.7871019840240479,
      "learning_rate": 0.00020831009049820866,
      "loss": 2.9442,
      "step": 137981
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.7907805442810059,
      "learning_rate": 0.000208306195680479,
      "loss": 2.9963,
      "step": 137982
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.477525234222412,
      "learning_rate": 0.00020830230087979707,
      "loss": 2.9245,
      "step": 137983
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.330195665359497,
      "learning_rate": 0.00020829840609616336,
      "loss": 2.9701,
      "step": 137984
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.622659206390381,
      "learning_rate": 0.00020829451132957873,
      "loss": 2.9579,
      "step": 137985
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8864665031433105,
      "learning_rate": 0.00020829061658004386,
      "loss": 3.0238,
      "step": 137986
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.447922706604004,
      "learning_rate": 0.00020828672184755945,
      "loss": 2.9433,
      "step": 137987
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.897429943084717,
      "learning_rate": 0.00020828282713212634,
      "loss": 2.8634,
      "step": 137988
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.417689800262451,
      "learning_rate": 0.00020827893243374515,
      "loss": 3.0808,
      "step": 137989
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.120979070663452,
      "learning_rate": 0.0002082750377524168,
      "loss": 2.9233,
      "step": 137990
    },
    {
      "epoch": 1.8,
      "grad_norm": 4.476851463317871,
      "learning_rate": 0.00020827114308814168,
      "loss": 2.9943,
      "step": 137991
    },
    {
      "epoch": 1.8,
      "grad_norm": 6.27003812789917,
      "learning_rate": 0.0002082672484409207,
      "loss": 3.0035,
      "step": 137992
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.588369131088257,
      "learning_rate": 0.00020826335381075455,
      "loss": 2.999,
      "step": 137993
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3888704776763916,
      "learning_rate": 0.000208259459197644,
      "loss": 3.2291,
      "step": 137994
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.7449111938476562,
      "learning_rate": 0.00020825556460158973,
      "loss": 2.9219,
      "step": 137995
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.9716506004333496,
      "learning_rate": 0.00020825167002259265,
      "loss": 2.9914,
      "step": 137996
    },
    {
      "epoch": 1.8,
      "grad_norm": 4.138346195220947,
      "learning_rate": 0.00020824777546065316,
      "loss": 2.9459,
      "step": 137997
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.047769784927368,
      "learning_rate": 0.00020824388091577215,
      "loss": 2.794,
      "step": 137998
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.675642251968384,
      "learning_rate": 0.0002082399863879503,
      "loss": 2.9556,
      "step": 137999
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.1803319454193115,
      "learning_rate": 0.00020823609187718838,
      "loss": 3.1712,
      "step": 138000
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.8559374809265137,
      "learning_rate": 0.00020823219738348714,
      "loss": 2.9484,
      "step": 138001
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.4257309436798096,
      "learning_rate": 0.0002082283029068474,
      "loss": 2.6946,
      "step": 138002
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1622183322906494,
      "learning_rate": 0.0002082244084472696,
      "loss": 2.7922,
      "step": 138003
    },
    {
      "epoch": 1.8,
      "grad_norm": 5.330806732177734,
      "learning_rate": 0.0002082205140047546,
      "loss": 3.0127,
      "step": 138004
    },
    {
      "epoch": 1.8,
      "grad_norm": 4.484810829162598,
      "learning_rate": 0.00020821661957930315,
      "loss": 3.0169,
      "step": 138005
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5422446727752686,
      "learning_rate": 0.00020821272517091596,
      "loss": 2.9177,
      "step": 138006
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.164944648742676,
      "learning_rate": 0.00020820883077959373,
      "loss": 2.803,
      "step": 138007
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9198495149612427,
      "learning_rate": 0.00020820493640533739,
      "loss": 2.9557,
      "step": 138008
    },
    {
      "epoch": 1.8,
      "grad_norm": 4.68251895904541,
      "learning_rate": 0.00020820104204814733,
      "loss": 2.8506,
      "step": 138009
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.981922149658203,
      "learning_rate": 0.00020819714770802447,
      "loss": 3.1405,
      "step": 138010
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.229001760482788,
      "learning_rate": 0.00020819325338496946,
      "loss": 2.8777,
      "step": 138011
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0539422035217285,
      "learning_rate": 0.00020818935907898305,
      "loss": 2.9668,
      "step": 138012
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.7922730445861816,
      "learning_rate": 0.00020818546479006597,
      "loss": 2.9174,
      "step": 138013
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.9391300678253174,
      "learning_rate": 0.00020818157051821908,
      "loss": 3.0324,
      "step": 138014
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5148074626922607,
      "learning_rate": 0.00020817767626344285,
      "loss": 2.5876,
      "step": 138015
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2126572132110596,
      "learning_rate": 0.00020817378202573809,
      "loss": 2.8691,
      "step": 138016
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.018991231918335,
      "learning_rate": 0.0002081698878051056,
      "loss": 2.9509,
      "step": 138017
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7529826164245605,
      "learning_rate": 0.00020816599360154605,
      "loss": 3.0881,
      "step": 138018
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.533111810684204,
      "learning_rate": 0.00020816209941506016,
      "loss": 3.0458,
      "step": 138019
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.7886933088302612,
      "learning_rate": 0.0002081582052456488,
      "loss": 2.8814,
      "step": 138020
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9966927766799927,
      "learning_rate": 0.00020815431109331243,
      "loss": 2.7903,
      "step": 138021
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.197141647338867,
      "learning_rate": 0.00020815041695805192,
      "loss": 3.0493,
      "step": 138022
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7398838996887207,
      "learning_rate": 0.000208146522839868,
      "loss": 3.064,
      "step": 138023
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1279799938201904,
      "learning_rate": 0.00020814262873876139,
      "loss": 2.9717,
      "step": 138024
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9336520433425903,
      "learning_rate": 0.00020813873465473274,
      "loss": 2.7378,
      "step": 138025
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.697314739227295,
      "learning_rate": 0.00020813484058778298,
      "loss": 2.7817,
      "step": 138026
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9685564041137695,
      "learning_rate": 0.00020813094653791256,
      "loss": 2.9726,
      "step": 138027
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9228925704956055,
      "learning_rate": 0.00020812705250512233,
      "loss": 2.9012,
      "step": 138028
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4045166969299316,
      "learning_rate": 0.00020812315848941303,
      "loss": 3.0314,
      "step": 138029
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8887947797775269,
      "learning_rate": 0.00020811926449078538,
      "loss": 2.8988,
      "step": 138030
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9823377132415771,
      "learning_rate": 0.00020811537050924002,
      "loss": 2.9753,
      "step": 138031
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1147544384002686,
      "learning_rate": 0.00020811147654477795,
      "loss": 2.7532,
      "step": 138032
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.461634874343872,
      "learning_rate": 0.00020810758259739954,
      "loss": 2.887,
      "step": 138033
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1385726928710938,
      "learning_rate": 0.00020810368866710566,
      "loss": 2.836,
      "step": 138034
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8336312770843506,
      "learning_rate": 0.00020809979475389705,
      "loss": 2.9833,
      "step": 138035
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1587376594543457,
      "learning_rate": 0.00020809590085777446,
      "loss": 2.5692,
      "step": 138036
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.301025390625,
      "learning_rate": 0.00020809200697873852,
      "loss": 3.0215,
      "step": 138037
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9225382804870605,
      "learning_rate": 0.00020808811311679018,
      "loss": 2.9909,
      "step": 138038
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8103865385055542,
      "learning_rate": 0.00020808421927192983,
      "loss": 3.0511,
      "step": 138039
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.149622917175293,
      "learning_rate": 0.00020808032544415833,
      "loss": 2.9759,
      "step": 138040
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3568689823150635,
      "learning_rate": 0.0002080764316334765,
      "loss": 2.9636,
      "step": 138041
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.458081007003784,
      "learning_rate": 0.00020807253783988494,
      "loss": 2.9188,
      "step": 138042
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3980729579925537,
      "learning_rate": 0.00020806864406338447,
      "loss": 3.1931,
      "step": 138043
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2915139198303223,
      "learning_rate": 0.00020806475030397582,
      "loss": 2.9533,
      "step": 138044
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.882143259048462,
      "learning_rate": 0.00020806085656165964,
      "loss": 2.7489,
      "step": 138045
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.059786081314087,
      "learning_rate": 0.0002080569628364367,
      "loss": 2.9674,
      "step": 138046
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.500166416168213,
      "learning_rate": 0.0002080530691283077,
      "loss": 3.1659,
      "step": 138047
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.656135320663452,
      "learning_rate": 0.0002080491754372733,
      "loss": 2.7438,
      "step": 138048
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.420941114425659,
      "learning_rate": 0.00020804528176333434,
      "loss": 3.1026,
      "step": 138049
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9976017475128174,
      "learning_rate": 0.00020804138810649153,
      "loss": 2.935,
      "step": 138050
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3297319412231445,
      "learning_rate": 0.00020803749446674552,
      "loss": 3.1529,
      "step": 138051
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1828439235687256,
      "learning_rate": 0.00020803360084409707,
      "loss": 2.9418,
      "step": 138052
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.063129425048828,
      "learning_rate": 0.00020802970723854698,
      "loss": 3.1599,
      "step": 138053
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.470947742462158,
      "learning_rate": 0.00020802581365009581,
      "loss": 3.0007,
      "step": 138054
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.034327745437622,
      "learning_rate": 0.00020802192007874441,
      "loss": 3.115,
      "step": 138055
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3416314125061035,
      "learning_rate": 0.00020801802652449348,
      "loss": 2.8724,
      "step": 138056
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.106107234954834,
      "learning_rate": 0.00020801413298734376,
      "loss": 2.9314,
      "step": 138057
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2799112796783447,
      "learning_rate": 0.0002080102394672959,
      "loss": 2.9815,
      "step": 138058
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1226091384887695,
      "learning_rate": 0.0002080063459643507,
      "loss": 3.1139,
      "step": 138059
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.290114164352417,
      "learning_rate": 0.0002080024524785089,
      "loss": 2.7686,
      "step": 138060
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0313282012939453,
      "learning_rate": 0.00020799855900977113,
      "loss": 3.0797,
      "step": 138061
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5643718242645264,
      "learning_rate": 0.00020799466555813814,
      "loss": 2.9708,
      "step": 138062
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.891388177871704,
      "learning_rate": 0.00020799077212361073,
      "loss": 3.1108,
      "step": 138063
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0488812923431396,
      "learning_rate": 0.00020798687870618955,
      "loss": 3.025,
      "step": 138064
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2537548542022705,
      "learning_rate": 0.00020798298530587533,
      "loss": 2.8833,
      "step": 138065
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4896302223205566,
      "learning_rate": 0.00020797909192266879,
      "loss": 2.9096,
      "step": 138066
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.1085045337677,
      "learning_rate": 0.0002079751985565708,
      "loss": 3.013,
      "step": 138067
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1231703758239746,
      "learning_rate": 0.0002079713052075818,
      "loss": 2.8898,
      "step": 138068
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5006771087646484,
      "learning_rate": 0.00020796741187570277,
      "loss": 2.9562,
      "step": 138069
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4577813148498535,
      "learning_rate": 0.00020796351856093435,
      "loss": 2.9772,
      "step": 138070
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.806036114692688,
      "learning_rate": 0.00020795962526327715,
      "loss": 3.0168,
      "step": 138071
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.0116755962371826,
      "learning_rate": 0.00020795573198273206,
      "loss": 3.1017,
      "step": 138072
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.905028223991394,
      "learning_rate": 0.0002079518387192997,
      "loss": 2.9684,
      "step": 138073
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5546622276306152,
      "learning_rate": 0.00020794794547298082,
      "loss": 2.9874,
      "step": 138074
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.192880392074585,
      "learning_rate": 0.00020794405224377628,
      "loss": 3.0356,
      "step": 138075
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.6521074771881104,
      "learning_rate": 0.00020794015903168657,
      "loss": 3.0647,
      "step": 138076
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.408353567123413,
      "learning_rate": 0.00020793626583671254,
      "loss": 3.0961,
      "step": 138077
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2736005783081055,
      "learning_rate": 0.00020793237265885484,
      "loss": 3.0603,
      "step": 138078
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.438349485397339,
      "learning_rate": 0.00020792847949811433,
      "loss": 3.0672,
      "step": 138079
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.1569149494171143,
      "learning_rate": 0.00020792458635449158,
      "loss": 2.7227,
      "step": 138080
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.2305943965911865,
      "learning_rate": 0.00020792069322798755,
      "loss": 3.0562,
      "step": 138081
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3769757747650146,
      "learning_rate": 0.00020791680011860269,
      "loss": 3.0118,
      "step": 138082
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0702927112579346,
      "learning_rate": 0.0002079129070263378,
      "loss": 2.8788,
      "step": 138083
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4257073402404785,
      "learning_rate": 0.00020790901395119365,
      "loss": 2.8955,
      "step": 138084
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1978042125701904,
      "learning_rate": 0.00020790512089317097,
      "loss": 3.018,
      "step": 138085
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.27182674407959,
      "learning_rate": 0.00020790122785227044,
      "loss": 3.1835,
      "step": 138086
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7695653438568115,
      "learning_rate": 0.00020789733482849297,
      "loss": 2.9599,
      "step": 138087
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.948766827583313,
      "learning_rate": 0.00020789344182183897,
      "loss": 2.8016,
      "step": 138088
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3525125980377197,
      "learning_rate": 0.00020788954883230933,
      "loss": 3.0664,
      "step": 138089
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.737928867340088,
      "learning_rate": 0.00020788565585990476,
      "loss": 2.8927,
      "step": 138090
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.366549253463745,
      "learning_rate": 0.00020788176290462599,
      "loss": 3.0487,
      "step": 138091
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3887133598327637,
      "learning_rate": 0.0002078778699664737,
      "loss": 2.6485,
      "step": 138092
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.7309350967407227,
      "learning_rate": 0.00020787397704544883,
      "loss": 3.0396,
      "step": 138093
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.0280306339263916,
      "learning_rate": 0.0002078700841415518,
      "loss": 3.0287,
      "step": 138094
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1628284454345703,
      "learning_rate": 0.00020786619125478343,
      "loss": 3.1336,
      "step": 138095
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.057666778564453,
      "learning_rate": 0.00020786229838514448,
      "loss": 2.7842,
      "step": 138096
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2082955837249756,
      "learning_rate": 0.00020785840553263566,
      "loss": 3.1357,
      "step": 138097
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.9363975524902344,
      "learning_rate": 0.00020785451269725774,
      "loss": 2.918,
      "step": 138098
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.120471954345703,
      "learning_rate": 0.0002078506198790115,
      "loss": 2.996,
      "step": 138099
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.148477554321289,
      "learning_rate": 0.00020784672707789743,
      "loss": 3.1095,
      "step": 138100
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.331843852996826,
      "learning_rate": 0.00020784283429391644,
      "loss": 2.9859,
      "step": 138101
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.1828408241271973,
      "learning_rate": 0.00020783894152706916,
      "loss": 2.9341,
      "step": 138102
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.611098051071167,
      "learning_rate": 0.00020783504877735639,
      "loss": 3.0423,
      "step": 138103
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9544391632080078,
      "learning_rate": 0.0002078311560447788,
      "loss": 2.9179,
      "step": 138104
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.154183864593506,
      "learning_rate": 0.00020782726332933729,
      "loss": 2.9651,
      "step": 138105
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.814615249633789,
      "learning_rate": 0.00020782337063103229,
      "loss": 3.0698,
      "step": 138106
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8850423097610474,
      "learning_rate": 0.0002078194779498647,
      "loss": 2.9412,
      "step": 138107
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9383984804153442,
      "learning_rate": 0.00020781558528583518,
      "loss": 3.1473,
      "step": 138108
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9263265132904053,
      "learning_rate": 0.00020781169263894446,
      "loss": 3.1534,
      "step": 138109
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.075157880783081,
      "learning_rate": 0.0002078078000091933,
      "loss": 2.9691,
      "step": 138110
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9956341981887817,
      "learning_rate": 0.00020780390739658254,
      "loss": 3.0891,
      "step": 138111
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.493598461151123,
      "learning_rate": 0.00020780001480111266,
      "loss": 2.7969,
      "step": 138112
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1891584396362305,
      "learning_rate": 0.00020779612222278447,
      "loss": 2.7964,
      "step": 138113
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.715047597885132,
      "learning_rate": 0.00020779222966159876,
      "loss": 2.9619,
      "step": 138114
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.958805799484253,
      "learning_rate": 0.0002077883371175562,
      "loss": 2.767,
      "step": 138115
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0379390716552734,
      "learning_rate": 0.00020778444459065753,
      "loss": 3.0899,
      "step": 138116
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.490363121032715,
      "learning_rate": 0.00020778055208090358,
      "loss": 3.026,
      "step": 138117
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7443389892578125,
      "learning_rate": 0.00020777665958829483,
      "loss": 3.2838,
      "step": 138118
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.903857469558716,
      "learning_rate": 0.0002077727671128322,
      "loss": 3.068,
      "step": 138119
    },
    {
      "epoch": 1.8,
      "grad_norm": 4.3695783615112305,
      "learning_rate": 0.0002077688746545163,
      "loss": 2.9755,
      "step": 138120
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.37750244140625,
      "learning_rate": 0.00020776498221334794,
      "loss": 2.9822,
      "step": 138121
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.46824049949646,
      "learning_rate": 0.0002077610897893278,
      "loss": 3.0051,
      "step": 138122
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.252427816390991,
      "learning_rate": 0.00020775719738245658,
      "loss": 3.0853,
      "step": 138123
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3609893321990967,
      "learning_rate": 0.0002077533049927352,
      "loss": 2.8702,
      "step": 138124
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.503802537918091,
      "learning_rate": 0.0002077494126201641,
      "loss": 2.821,
      "step": 138125
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3617494106292725,
      "learning_rate": 0.0002077455202647441,
      "loss": 2.885,
      "step": 138126
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2808780670166016,
      "learning_rate": 0.00020774162792647598,
      "loss": 3.0016,
      "step": 138127
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7455201148986816,
      "learning_rate": 0.00020773773560536042,
      "loss": 2.9765,
      "step": 138128
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.675593376159668,
      "learning_rate": 0.00020773384330139815,
      "loss": 3.0359,
      "step": 138129
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9616413116455078,
      "learning_rate": 0.00020772995101459002,
      "loss": 2.9299,
      "step": 138130
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1186399459838867,
      "learning_rate": 0.0002077260587449365,
      "loss": 3.0146,
      "step": 138131
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.384077310562134,
      "learning_rate": 0.00020772216649243846,
      "loss": 3.1652,
      "step": 138132
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9307013750076294,
      "learning_rate": 0.00020771827425709665,
      "loss": 3.0471,
      "step": 138133
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0260353088378906,
      "learning_rate": 0.0002077143820389117,
      "loss": 3.2214,
      "step": 138134
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.259495735168457,
      "learning_rate": 0.00020771048983788442,
      "loss": 3.0591,
      "step": 138135
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0350117683410645,
      "learning_rate": 0.00020770659765401554,
      "loss": 3.0031,
      "step": 138136
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.14534068107605,
      "learning_rate": 0.0002077027054873057,
      "loss": 2.9342,
      "step": 138137
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9657950401306152,
      "learning_rate": 0.00020769881333775576,
      "loss": 2.9988,
      "step": 138138
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1356382369995117,
      "learning_rate": 0.00020769492120536624,
      "loss": 2.9847,
      "step": 138139
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1107406616210938,
      "learning_rate": 0.000207691029090138,
      "loss": 3.1924,
      "step": 138140
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.239865779876709,
      "learning_rate": 0.00020768713699207173,
      "loss": 2.8348,
      "step": 138141
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2506821155548096,
      "learning_rate": 0.00020768324491116823,
      "loss": 3.0424,
      "step": 138142
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2458107471466064,
      "learning_rate": 0.0002076793528474281,
      "loss": 2.826,
      "step": 138143
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.856499195098877,
      "learning_rate": 0.00020767546080085214,
      "loss": 2.9494,
      "step": 138144
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.844555139541626,
      "learning_rate": 0.0002076715687714411,
      "loss": 2.7537,
      "step": 138145
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.525273561477661,
      "learning_rate": 0.00020766767675919556,
      "loss": 2.9131,
      "step": 138146
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8262534141540527,
      "learning_rate": 0.00020766378476411637,
      "loss": 2.4824,
      "step": 138147
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.866328477859497,
      "learning_rate": 0.00020765989278620432,
      "loss": 2.9772,
      "step": 138148
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8471494913101196,
      "learning_rate": 0.0002076560008254599,
      "loss": 2.904,
      "step": 138149
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7214317321777344,
      "learning_rate": 0.00020765210888188404,
      "loss": 2.9596,
      "step": 138150
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0911052227020264,
      "learning_rate": 0.0002076482169554774,
      "loss": 3.0807,
      "step": 138151
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0354955196380615,
      "learning_rate": 0.0002076443250462407,
      "loss": 2.9391,
      "step": 138152
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2933266162872314,
      "learning_rate": 0.00020764043315417465,
      "loss": 2.6829,
      "step": 138153
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.305427074432373,
      "learning_rate": 0.00020763654127928005,
      "loss": 3.0726,
      "step": 138154
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.509045124053955,
      "learning_rate": 0.00020763264942155749,
      "loss": 2.8481,
      "step": 138155
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.368868350982666,
      "learning_rate": 0.00020762875758100773,
      "loss": 3.0215,
      "step": 138156
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.129410982131958,
      "learning_rate": 0.00020762486575763158,
      "loss": 2.9246,
      "step": 138157
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4038262367248535,
      "learning_rate": 0.00020762097395142967,
      "loss": 3.0739,
      "step": 138158
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3582887649536133,
      "learning_rate": 0.00020761708216240286,
      "loss": 3.0087,
      "step": 138159
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2783193588256836,
      "learning_rate": 0.0002076131903905518,
      "loss": 2.7938,
      "step": 138160
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.3446717262268066,
      "learning_rate": 0.00020760929863587705,
      "loss": 2.9768,
      "step": 138161
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2051968574523926,
      "learning_rate": 0.00020760540689837952,
      "loss": 3.0207,
      "step": 138162
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9891350269317627,
      "learning_rate": 0.00020760151517805993,
      "loss": 2.8263,
      "step": 138163
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9258040189743042,
      "learning_rate": 0.0002075976234749189,
      "loss": 2.9757,
      "step": 138164
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4467103481292725,
      "learning_rate": 0.0002075937317889572,
      "loss": 2.9884,
      "step": 138165
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.482997417449951,
      "learning_rate": 0.00020758984012017576,
      "loss": 3.1761,
      "step": 138166
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.8336615562438965,
      "learning_rate": 0.00020758594846857496,
      "loss": 2.9335,
      "step": 138167
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2466039657592773,
      "learning_rate": 0.0002075820568341557,
      "loss": 2.9827,
      "step": 138168
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8940125703811646,
      "learning_rate": 0.00020757816521691863,
      "loss": 2.9627,
      "step": 138169
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5124025344848633,
      "learning_rate": 0.00020757427361686457,
      "loss": 2.8298,
      "step": 138170
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.351529121398926,
      "learning_rate": 0.00020757038203399414,
      "loss": 3.0455,
      "step": 138171
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.1593306064605713,
      "learning_rate": 0.0002075664904683083,
      "loss": 3.0082,
      "step": 138172
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3775877952575684,
      "learning_rate": 0.00020756259891980747,
      "loss": 3.0322,
      "step": 138173
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9628912210464478,
      "learning_rate": 0.00020755870738849248,
      "loss": 2.9477,
      "step": 138174
    },
    {
      "epoch": 1.8,
      "grad_norm": 5.022336959838867,
      "learning_rate": 0.00020755481587436407,
      "loss": 2.7633,
      "step": 138175
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.9654059410095215,
      "learning_rate": 0.00020755092437742294,
      "loss": 2.9291,
      "step": 138176
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.365966320037842,
      "learning_rate": 0.00020754703289766987,
      "loss": 3.0432,
      "step": 138177
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2043864727020264,
      "learning_rate": 0.00020754314143510568,
      "loss": 2.8105,
      "step": 138178
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5973901748657227,
      "learning_rate": 0.0002075392499897308,
      "loss": 3.0206,
      "step": 138179
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.027028799057007,
      "learning_rate": 0.0002075353585615462,
      "loss": 2.8238,
      "step": 138180
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4392218589782715,
      "learning_rate": 0.00020753146715055246,
      "loss": 3.0459,
      "step": 138181
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.8757052421569824,
      "learning_rate": 0.00020752757575675038,
      "loss": 3.0486,
      "step": 138182
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0770106315612793,
      "learning_rate": 0.00020752368438014065,
      "loss": 3.0068,
      "step": 138183
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.0290346145629883,
      "learning_rate": 0.00020751979302072415,
      "loss": 3.0981,
      "step": 138184
    },
    {
      "epoch": 1.8,
      "grad_norm": 6.181454181671143,
      "learning_rate": 0.00020751590167850134,
      "loss": 3.0257,
      "step": 138185
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.948577404022217,
      "learning_rate": 0.00020751201035347307,
      "loss": 3.075,
      "step": 138186
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2346031665802,
      "learning_rate": 0.00020750811904564012,
      "loss": 2.828,
      "step": 138187
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3186099529266357,
      "learning_rate": 0.00020750422775500307,
      "loss": 3.0229,
      "step": 138188
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.1394500732421875,
      "learning_rate": 0.0002075003364815628,
      "loss": 3.0834,
      "step": 138189
    },
    {
      "epoch": 1.8,
      "grad_norm": 7.329949855804443,
      "learning_rate": 0.0002074964452253199,
      "loss": 2.8871,
      "step": 138190
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3530311584472656,
      "learning_rate": 0.00020749255398627532,
      "loss": 2.886,
      "step": 138191
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.406047821044922,
      "learning_rate": 0.00020748866276442947,
      "loss": 2.9601,
      "step": 138192
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6394131183624268,
      "learning_rate": 0.00020748477155978326,
      "loss": 3.003,
      "step": 138193
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.066275119781494,
      "learning_rate": 0.00020748088037233735,
      "loss": 2.6989,
      "step": 138194
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.822256088256836,
      "learning_rate": 0.0002074769892020925,
      "loss": 2.8824,
      "step": 138195
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3909659385681152,
      "learning_rate": 0.0002074730980490494,
      "loss": 2.9538,
      "step": 138196
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9586477279663086,
      "learning_rate": 0.00020746920691320895,
      "loss": 3.0303,
      "step": 138197
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.87143075466156,
      "learning_rate": 0.0002074653157945716,
      "loss": 2.8649,
      "step": 138198
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0074403285980225,
      "learning_rate": 0.00020746142469313817,
      "loss": 3.0072,
      "step": 138199
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.377573013305664,
      "learning_rate": 0.00020745753360890942,
      "loss": 2.9592,
      "step": 138200
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9012051820755005,
      "learning_rate": 0.0002074536425418861,
      "loss": 3.0619,
      "step": 138201
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.73507022857666,
      "learning_rate": 0.00020744975149206886,
      "loss": 3.2138,
      "step": 138202
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.8426661491394043,
      "learning_rate": 0.00020744586045945857,
      "loss": 2.942,
      "step": 138203
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.209456443786621,
      "learning_rate": 0.00020744196944405576,
      "loss": 2.8175,
      "step": 138204
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3830599784851074,
      "learning_rate": 0.0002074380784458612,
      "loss": 3.014,
      "step": 138205
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.021991014480591,
      "learning_rate": 0.00020743418746487565,
      "loss": 3.0419,
      "step": 138206
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.050426483154297,
      "learning_rate": 0.00020743029650109978,
      "loss": 2.939,
      "step": 138207
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2545814514160156,
      "learning_rate": 0.00020742640555453446,
      "loss": 2.8441,
      "step": 138208
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3578929901123047,
      "learning_rate": 0.0002074225146251804,
      "loss": 2.9098,
      "step": 138209
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.653898239135742,
      "learning_rate": 0.00020741862371303812,
      "loss": 2.9052,
      "step": 138210
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3135201930999756,
      "learning_rate": 0.00020741473281810847,
      "loss": 2.8615,
      "step": 138211
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2656748294830322,
      "learning_rate": 0.00020741084194039215,
      "loss": 2.9536,
      "step": 138212
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.171422004699707,
      "learning_rate": 0.00020740695107988993,
      "loss": 2.9933,
      "step": 138213
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9501324892044067,
      "learning_rate": 0.0002074030602366025,
      "loss": 2.9769,
      "step": 138214
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9642378091812134,
      "learning_rate": 0.00020739916941053073,
      "loss": 3.0487,
      "step": 138215
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.4373278617858887,
      "learning_rate": 0.00020739527860167502,
      "loss": 2.9069,
      "step": 138216
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.161135673522949,
      "learning_rate": 0.0002073913878100363,
      "loss": 2.634,
      "step": 138217
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1527178287506104,
      "learning_rate": 0.00020738749703561531,
      "loss": 2.8612,
      "step": 138218
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4023985862731934,
      "learning_rate": 0.00020738360627841268,
      "loss": 3.0745,
      "step": 138219
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.495967388153076,
      "learning_rate": 0.00020737971553842923,
      "loss": 2.8953,
      "step": 138220
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.013362169265747,
      "learning_rate": 0.00020737582481566565,
      "loss": 2.6717,
      "step": 138221
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0708866119384766,
      "learning_rate": 0.00020737193411012273,
      "loss": 2.8104,
      "step": 138222
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.9029736518859863,
      "learning_rate": 0.00020736804342180097,
      "loss": 2.9899,
      "step": 138223
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.784440040588379,
      "learning_rate": 0.00020736415275070128,
      "loss": 2.8172,
      "step": 138224
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7064273357391357,
      "learning_rate": 0.00020736026209682433,
      "loss": 2.7452,
      "step": 138225
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6260170936584473,
      "learning_rate": 0.00020735637146017085,
      "loss": 2.9667,
      "step": 138226
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5257177352905273,
      "learning_rate": 0.00020735248084074166,
      "loss": 2.8937,
      "step": 138227
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0376691818237305,
      "learning_rate": 0.00020734859023853732,
      "loss": 2.991,
      "step": 138228
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.042224168777466,
      "learning_rate": 0.0002073446996535587,
      "loss": 2.8844,
      "step": 138229
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0282349586486816,
      "learning_rate": 0.00020734080908580636,
      "loss": 2.9784,
      "step": 138230
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8757492303848267,
      "learning_rate": 0.00020733691853528114,
      "loss": 3.0811,
      "step": 138231
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7934529781341553,
      "learning_rate": 0.00020733302800198368,
      "loss": 2.8937,
      "step": 138232
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.003898859024048,
      "learning_rate": 0.00020732913748591487,
      "loss": 2.9313,
      "step": 138233
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.877051830291748,
      "learning_rate": 0.00020732524698707525,
      "loss": 3.0678,
      "step": 138234
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1159756183624268,
      "learning_rate": 0.0002073213565054656,
      "loss": 2.8702,
      "step": 138235
    },
    {
      "epoch": 1.8,
      "grad_norm": 4.298975467681885,
      "learning_rate": 0.0002073174660410867,
      "loss": 3.1567,
      "step": 138236
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.94893479347229,
      "learning_rate": 0.00020731357559393925,
      "loss": 2.7582,
      "step": 138237
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7716972827911377,
      "learning_rate": 0.00020730968516402394,
      "loss": 2.6616,
      "step": 138238
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.321713924407959,
      "learning_rate": 0.00020730579475134158,
      "loss": 2.8389,
      "step": 138239
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7448620796203613,
      "learning_rate": 0.0002073019043558927,
      "loss": 2.9853,
      "step": 138240
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.141176700592041,
      "learning_rate": 0.00020729801397767817,
      "loss": 2.5728,
      "step": 138241
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.1707472801208496,
      "learning_rate": 0.0002072941236166987,
      "loss": 2.9092,
      "step": 138242
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.7834264039993286,
      "learning_rate": 0.000207290233272955,
      "loss": 3.2245,
      "step": 138243
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9075982570648193,
      "learning_rate": 0.00020728634294644784,
      "loss": 2.8199,
      "step": 138244
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.49773907661438,
      "learning_rate": 0.00020728245263717791,
      "loss": 3.0925,
      "step": 138245
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.9435558319091797,
      "learning_rate": 0.00020727856234514588,
      "loss": 2.8144,
      "step": 138246
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.065110206604004,
      "learning_rate": 0.00020727467207035246,
      "loss": 3.2244,
      "step": 138247
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0461277961730957,
      "learning_rate": 0.00020727078181279851,
      "loss": 2.9653,
      "step": 138248
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2313780784606934,
      "learning_rate": 0.00020726689157248463,
      "loss": 2.9265,
      "step": 138249
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.041506052017212,
      "learning_rate": 0.00020726300134941153,
      "loss": 2.8678,
      "step": 138250
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2389190196990967,
      "learning_rate": 0.0002072591111435802,
      "loss": 2.7494,
      "step": 138251
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1971166133880615,
      "learning_rate": 0.000207255220954991,
      "loss": 2.9517,
      "step": 138252
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0628786087036133,
      "learning_rate": 0.0002072513307836448,
      "loss": 2.9747,
      "step": 138253
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1299800872802734,
      "learning_rate": 0.0002072474406295423,
      "loss": 2.764,
      "step": 138254
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3630261421203613,
      "learning_rate": 0.0002072435504926843,
      "loss": 2.9423,
      "step": 138255
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.058561325073242,
      "learning_rate": 0.00020723966037307142,
      "loss": 3.1018,
      "step": 138256
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.630528450012207,
      "learning_rate": 0.00020723577027070448,
      "loss": 2.8083,
      "step": 138257
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.600085735321045,
      "learning_rate": 0.0002072318801855843,
      "loss": 2.6833,
      "step": 138258
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1253950595855713,
      "learning_rate": 0.0002072279901177113,
      "loss": 3.0427,
      "step": 138259
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.130770444869995,
      "learning_rate": 0.0002072241000670864,
      "loss": 3.0808,
      "step": 138260
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4736552238464355,
      "learning_rate": 0.00020722021003371027,
      "loss": 3.0773,
      "step": 138261
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.7747387886047363,
      "learning_rate": 0.00020721632001758367,
      "loss": 2.932,
      "step": 138262
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.021153688430786,
      "learning_rate": 0.00020721243001870727,
      "loss": 2.8924,
      "step": 138263
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6439332962036133,
      "learning_rate": 0.000207208540037082,
      "loss": 3.1302,
      "step": 138264
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9516704082489014,
      "learning_rate": 0.00020720465007270831,
      "loss": 2.7956,
      "step": 138265
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.741284132003784,
      "learning_rate": 0.00020720076012558695,
      "loss": 3.1274,
      "step": 138266
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9842884540557861,
      "learning_rate": 0.00020719687019571877,
      "loss": 3.0491,
      "step": 138267
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.911109209060669,
      "learning_rate": 0.00020719298028310445,
      "loss": 2.8723,
      "step": 138268
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.290750026702881,
      "learning_rate": 0.0002071890903877447,
      "loss": 2.9609,
      "step": 138269
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8583821058273315,
      "learning_rate": 0.00020718520050964033,
      "loss": 3.0933,
      "step": 138270
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0563273429870605,
      "learning_rate": 0.00020718131064879192,
      "loss": 3.0898,
      "step": 138271
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.737749695777893,
      "learning_rate": 0.0002071774208052002,
      "loss": 2.9003,
      "step": 138272
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.732558250427246,
      "learning_rate": 0.00020717353097886597,
      "loss": 3.0449,
      "step": 138273
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3929197788238525,
      "learning_rate": 0.00020716964116978997,
      "loss": 2.8258,
      "step": 138274
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1249959468841553,
      "learning_rate": 0.00020716575137797285,
      "loss": 2.975,
      "step": 138275
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.101738691329956,
      "learning_rate": 0.0002071618616034155,
      "loss": 2.9885,
      "step": 138276
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9253414869308472,
      "learning_rate": 0.0002071579718461184,
      "loss": 2.8342,
      "step": 138277
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0379066467285156,
      "learning_rate": 0.00020715408210608238,
      "loss": 3.1951,
      "step": 138278
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.151545524597168,
      "learning_rate": 0.00020715019238330817,
      "loss": 2.7393,
      "step": 138279
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2787299156188965,
      "learning_rate": 0.0002071463026777965,
      "loss": 3.1919,
      "step": 138280
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8645073175430298,
      "learning_rate": 0.00020714241298954805,
      "loss": 3.0098,
      "step": 138281
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.276520252227783,
      "learning_rate": 0.00020713852331856372,
      "loss": 2.9877,
      "step": 138282
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2433853149414062,
      "learning_rate": 0.00020713463366484397,
      "loss": 2.9473,
      "step": 138283
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.741184711456299,
      "learning_rate": 0.00020713074402838966,
      "loss": 2.6257,
      "step": 138284
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1503679752349854,
      "learning_rate": 0.00020712685440920148,
      "loss": 2.9924,
      "step": 138285
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.120711088180542,
      "learning_rate": 0.00020712296480728017,
      "loss": 2.816,
      "step": 138286
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.9932167530059814,
      "learning_rate": 0.00020711907522262646,
      "loss": 3.0388,
      "step": 138287
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.143768072128296,
      "learning_rate": 0.0002071151856552412,
      "loss": 2.7858,
      "step": 138288
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3962552547454834,
      "learning_rate": 0.00020711129610512485,
      "loss": 2.7933,
      "step": 138289
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0576767921447754,
      "learning_rate": 0.00020710740657227825,
      "loss": 2.9509,
      "step": 138290
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0267913341522217,
      "learning_rate": 0.00020710351705670215,
      "loss": 3.057,
      "step": 138291
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8504555225372314,
      "learning_rate": 0.00020709962755839726,
      "loss": 2.9948,
      "step": 138292
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4164862632751465,
      "learning_rate": 0.00020709573807736428,
      "loss": 3.1491,
      "step": 138293
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9234832525253296,
      "learning_rate": 0.00020709184861360412,
      "loss": 3.0948,
      "step": 138294
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1503751277923584,
      "learning_rate": 0.0002070879591671172,
      "loss": 3.265,
      "step": 138295
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.537743091583252,
      "learning_rate": 0.00020708406973790437,
      "loss": 2.9849,
      "step": 138296
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2930285930633545,
      "learning_rate": 0.00020708018032596637,
      "loss": 3.036,
      "step": 138297
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.281798839569092,
      "learning_rate": 0.00020707629093130392,
      "loss": 2.985,
      "step": 138298
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.287694215774536,
      "learning_rate": 0.00020707240155391776,
      "loss": 3.008,
      "step": 138299
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1523540019989014,
      "learning_rate": 0.0002070685121938087,
      "loss": 3.1976,
      "step": 138300
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.7348027229309082,
      "learning_rate": 0.00020706462285097722,
      "loss": 3.0614,
      "step": 138301
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.175246238708496,
      "learning_rate": 0.00020706073352542417,
      "loss": 2.7549,
      "step": 138302
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.412405014038086,
      "learning_rate": 0.00020705684421715032,
      "loss": 2.9752,
      "step": 138303
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.548745632171631,
      "learning_rate": 0.00020705295492615635,
      "loss": 2.9483,
      "step": 138304
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.919808268547058,
      "learning_rate": 0.00020704906565244296,
      "loss": 3.0357,
      "step": 138305
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.183443784713745,
      "learning_rate": 0.00020704517639601102,
      "loss": 2.8358,
      "step": 138306
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1970152854919434,
      "learning_rate": 0.00020704128715686113,
      "loss": 2.9481,
      "step": 138307
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1422321796417236,
      "learning_rate": 0.0002070373979349939,
      "loss": 2.8481,
      "step": 138308
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.824887752532959,
      "learning_rate": 0.00020703350873041025,
      "loss": 2.9244,
      "step": 138309
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6027402877807617,
      "learning_rate": 0.00020702961954311078,
      "loss": 2.784,
      "step": 138310
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0418131351470947,
      "learning_rate": 0.00020702573037309624,
      "loss": 2.8033,
      "step": 138311
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4219727516174316,
      "learning_rate": 0.00020702184122036748,
      "loss": 2.9176,
      "step": 138312
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.289257526397705,
      "learning_rate": 0.00020701795208492502,
      "loss": 2.7839,
      "step": 138313
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.355853796005249,
      "learning_rate": 0.00020701406296676973,
      "loss": 2.9148,
      "step": 138314
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4110829830169678,
      "learning_rate": 0.00020701017386590224,
      "loss": 3.0056,
      "step": 138315
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3559186458587646,
      "learning_rate": 0.00020700628478232332,
      "loss": 3.0738,
      "step": 138316
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9068511724472046,
      "learning_rate": 0.00020700239571603367,
      "loss": 2.9215,
      "step": 138317
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8780648708343506,
      "learning_rate": 0.00020699850666703408,
      "loss": 2.8452,
      "step": 138318
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3785481452941895,
      "learning_rate": 0.0002069946176353252,
      "loss": 2.748,
      "step": 138319
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0193986892700195,
      "learning_rate": 0.00020699072862090777,
      "loss": 3.2864,
      "step": 138320
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2770884037017822,
      "learning_rate": 0.00020698683962378257,
      "loss": 3.1105,
      "step": 138321
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8244742155075073,
      "learning_rate": 0.0002069829506439502,
      "loss": 2.6603,
      "step": 138322
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4955825805664062,
      "learning_rate": 0.00020697906168141148,
      "loss": 2.9022,
      "step": 138323
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.801954507827759,
      "learning_rate": 0.00020697517273616712,
      "loss": 2.9527,
      "step": 138324
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.311927080154419,
      "learning_rate": 0.00020697128380821783,
      "loss": 3.0775,
      "step": 138325
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3345282077789307,
      "learning_rate": 0.00020696739489756434,
      "loss": 2.9448,
      "step": 138326
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3221569061279297,
      "learning_rate": 0.00020696350600420732,
      "loss": 2.8643,
      "step": 138327
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5634238719940186,
      "learning_rate": 0.00020695961712814754,
      "loss": 2.8225,
      "step": 138328
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.457505464553833,
      "learning_rate": 0.0002069557282693858,
      "loss": 2.9378,
      "step": 138329
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8194481134414673,
      "learning_rate": 0.00020695183942792268,
      "loss": 2.7935,
      "step": 138330
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.605135917663574,
      "learning_rate": 0.00020694795060375906,
      "loss": 2.9023,
      "step": 138331
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1303646564483643,
      "learning_rate": 0.00020694406179689546,
      "loss": 2.9003,
      "step": 138332
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0738461017608643,
      "learning_rate": 0.00020694017300733276,
      "loss": 3.0803,
      "step": 138333
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.916632652282715,
      "learning_rate": 0.00020693628423507164,
      "loss": 3.0868,
      "step": 138334
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.436725616455078,
      "learning_rate": 0.00020693239548011277,
      "loss": 2.8808,
      "step": 138335
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3378775119781494,
      "learning_rate": 0.00020692850674245702,
      "loss": 2.8348,
      "step": 138336
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1356287002563477,
      "learning_rate": 0.00020692461802210502,
      "loss": 2.9108,
      "step": 138337
    },
    {
      "epoch": 1.8,
      "grad_norm": 6.553559303283691,
      "learning_rate": 0.00020692072931905746,
      "loss": 2.8717,
      "step": 138338
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.0406653881073,
      "learning_rate": 0.00020691684063331504,
      "loss": 2.8881,
      "step": 138339
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0584988594055176,
      "learning_rate": 0.00020691295196487858,
      "loss": 2.9161,
      "step": 138340
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.011612892150879,
      "learning_rate": 0.00020690906331374871,
      "loss": 3.0686,
      "step": 138341
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.878149151802063,
      "learning_rate": 0.00020690517467992624,
      "loss": 3.2096,
      "step": 138342
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9628040790557861,
      "learning_rate": 0.00020690128606341198,
      "loss": 2.956,
      "step": 138343
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.929657220840454,
      "learning_rate": 0.00020689739746420637,
      "loss": 3.0249,
      "step": 138344
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4182562828063965,
      "learning_rate": 0.00020689350888231033,
      "loss": 2.9059,
      "step": 138345
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.451785087585449,
      "learning_rate": 0.00020688962031772452,
      "loss": 2.9702,
      "step": 138346
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.172459363937378,
      "learning_rate": 0.0002068857317704497,
      "loss": 3.0724,
      "step": 138347
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3432207107543945,
      "learning_rate": 0.00020688184324048656,
      "loss": 3.0396,
      "step": 138348
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.8905787467956543,
      "learning_rate": 0.00020687795472783602,
      "loss": 2.8814,
      "step": 138349
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.805084705352783,
      "learning_rate": 0.00020687406623249842,
      "loss": 2.9364,
      "step": 138350
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.848503589630127,
      "learning_rate": 0.00020687017775447474,
      "loss": 2.936,
      "step": 138351
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1587343215942383,
      "learning_rate": 0.00020686628929376564,
      "loss": 3.2083,
      "step": 138352
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7584056854248047,
      "learning_rate": 0.0002068624008503719,
      "loss": 3.2111,
      "step": 138353
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.777625322341919,
      "learning_rate": 0.00020685851242429414,
      "loss": 2.9843,
      "step": 138354
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2107841968536377,
      "learning_rate": 0.00020685462401553325,
      "loss": 3.2419,
      "step": 138355
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.8627028465270996,
      "learning_rate": 0.00020685073562408976,
      "loss": 2.8002,
      "step": 138356
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.160156726837158,
      "learning_rate": 0.00020684684724996447,
      "loss": 3.0461,
      "step": 138357
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.8406593799591064,
      "learning_rate": 0.00020684295889315815,
      "loss": 2.7492,
      "step": 138358
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3354508876800537,
      "learning_rate": 0.00020683907055367142,
      "loss": 2.9691,
      "step": 138359
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.431696653366089,
      "learning_rate": 0.00020683518223150508,
      "loss": 2.8227,
      "step": 138360
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.697638750076294,
      "learning_rate": 0.00020683129392665996,
      "loss": 2.941,
      "step": 138361
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.8796164989471436,
      "learning_rate": 0.00020682740563913654,
      "loss": 3.0898,
      "step": 138362
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3166048526763916,
      "learning_rate": 0.0002068235173689357,
      "loss": 2.9259,
      "step": 138363
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9219461679458618,
      "learning_rate": 0.0002068196291160581,
      "loss": 2.9908,
      "step": 138364
    },
    {
      "epoch": 1.8,
      "grad_norm": 4.3614301681518555,
      "learning_rate": 0.00020681574088050447,
      "loss": 2.7409,
      "step": 138365
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.02714204788208,
      "learning_rate": 0.00020681185266227558,
      "loss": 2.8312,
      "step": 138366
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.3332998752593994,
      "learning_rate": 0.0002068079644613722,
      "loss": 2.8637,
      "step": 138367
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.618999719619751,
      "learning_rate": 0.0002068040762777949,
      "loss": 3.0956,
      "step": 138368
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0014026165008545,
      "learning_rate": 0.0002068001881115445,
      "loss": 2.8094,
      "step": 138369
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5381522178649902,
      "learning_rate": 0.00020679629996262167,
      "loss": 3.0985,
      "step": 138370
    },
    {
      "epoch": 1.8,
      "grad_norm": 4.079587459564209,
      "learning_rate": 0.00020679241183102715,
      "loss": 2.7577,
      "step": 138371
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.853032112121582,
      "learning_rate": 0.00020678852371676173,
      "loss": 3.2303,
      "step": 138372
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.226522207260132,
      "learning_rate": 0.0002067846356198262,
      "loss": 2.9885,
      "step": 138373
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.2982850074768066,
      "learning_rate": 0.00020678074754022102,
      "loss": 3.2896,
      "step": 138374
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0936617851257324,
      "learning_rate": 0.00020677685947794705,
      "loss": 2.9448,
      "step": 138375
    },
    {
      "epoch": 1.8,
      "grad_norm": 5.483571529388428,
      "learning_rate": 0.00020677297143300503,
      "loss": 3.0981,
      "step": 138376
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.987039566040039,
      "learning_rate": 0.00020676908340539572,
      "loss": 2.9339,
      "step": 138377
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3354413509368896,
      "learning_rate": 0.0002067651953951197,
      "loss": 2.8217,
      "step": 138378
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9646023511886597,
      "learning_rate": 0.00020676130740217798,
      "loss": 3.0864,
      "step": 138379
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2122013568878174,
      "learning_rate": 0.000206757419426571,
      "loss": 3.0847,
      "step": 138380
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.257512331008911,
      "learning_rate": 0.00020675353146829952,
      "loss": 3.0125,
      "step": 138381
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.9290931224823,
      "learning_rate": 0.0002067496435273643,
      "loss": 2.8964,
      "step": 138382
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2028753757476807,
      "learning_rate": 0.00020674575560376612,
      "loss": 3.1585,
      "step": 138383
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3454854488372803,
      "learning_rate": 0.00020674186769750568,
      "loss": 2.9681,
      "step": 138384
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.272460460662842,
      "learning_rate": 0.00020673797980858376,
      "loss": 2.9745,
      "step": 138385
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2711613178253174,
      "learning_rate": 0.00020673409193700096,
      "loss": 2.8223,
      "step": 138386
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7621800899505615,
      "learning_rate": 0.00020673020408275798,
      "loss": 3.085,
      "step": 138387
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2390284538269043,
      "learning_rate": 0.00020672631624585566,
      "loss": 3.0223,
      "step": 138388
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8796985149383545,
      "learning_rate": 0.00020672242842629466,
      "loss": 3.0747,
      "step": 138389
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.9549360275268555,
      "learning_rate": 0.00020671854062407572,
      "loss": 2.7712,
      "step": 138390
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3317394256591797,
      "learning_rate": 0.0002067146528391996,
      "loss": 2.7398,
      "step": 138391
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.024089813232422,
      "learning_rate": 0.00020671076507166708,
      "loss": 3.0363,
      "step": 138392
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.9679033756256104,
      "learning_rate": 0.00020670687732147864,
      "loss": 3.0655,
      "step": 138393
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8931788206100464,
      "learning_rate": 0.00020670298958863517,
      "loss": 3.0123,
      "step": 138394
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9872206449508667,
      "learning_rate": 0.0002066991018731374,
      "loss": 2.9639,
      "step": 138395
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5702714920043945,
      "learning_rate": 0.000206695214174986,
      "loss": 2.8813,
      "step": 138396
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0697858333587646,
      "learning_rate": 0.00020669132649418176,
      "loss": 3.025,
      "step": 138397
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2218587398529053,
      "learning_rate": 0.00020668743883072541,
      "loss": 2.895,
      "step": 138398
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8952308893203735,
      "learning_rate": 0.00020668355118461757,
      "loss": 2.8234,
      "step": 138399
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0525026321411133,
      "learning_rate": 0.00020667966355585903,
      "loss": 3.1331,
      "step": 138400
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7527213096618652,
      "learning_rate": 0.00020667577594445045,
      "loss": 2.8304,
      "step": 138401
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.245553493499756,
      "learning_rate": 0.00020667188835039266,
      "loss": 2.9191,
      "step": 138402
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8607534170150757,
      "learning_rate": 0.0002066680007736863,
      "loss": 3.0692,
      "step": 138403
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.927448272705078,
      "learning_rate": 0.00020666411321433218,
      "loss": 2.9864,
      "step": 138404
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.044583797454834,
      "learning_rate": 0.0002066602256723309,
      "loss": 3.1161,
      "step": 138405
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.191150188446045,
      "learning_rate": 0.0002066563381476833,
      "loss": 2.9967,
      "step": 138406
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1870014667510986,
      "learning_rate": 0.00020665245064038998,
      "loss": 3.0382,
      "step": 138407
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.9701027870178223,
      "learning_rate": 0.00020664856315045176,
      "loss": 3.0694,
      "step": 138408
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.097543478012085,
      "learning_rate": 0.0002066446756778693,
      "loss": 2.9878,
      "step": 138409
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0642173290252686,
      "learning_rate": 0.00020664078822264347,
      "loss": 3.0024,
      "step": 138410
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.740886926651001,
      "learning_rate": 0.0002066369007847748,
      "loss": 2.8986,
      "step": 138411
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4836676120758057,
      "learning_rate": 0.00020663301336426405,
      "loss": 3.017,
      "step": 138412
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.254795551300049,
      "learning_rate": 0.00020662912596111207,
      "loss": 3.1452,
      "step": 138413
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.691897392272949,
      "learning_rate": 0.00020662523857531946,
      "loss": 2.8033,
      "step": 138414
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.888455390930176,
      "learning_rate": 0.000206621351206887,
      "loss": 2.8724,
      "step": 138415
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.401204824447632,
      "learning_rate": 0.00020661746385581542,
      "loss": 2.8875,
      "step": 138416
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0284228324890137,
      "learning_rate": 0.0002066135765221053,
      "loss": 2.896,
      "step": 138417
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6109066009521484,
      "learning_rate": 0.00020660968920575753,
      "loss": 3.1852,
      "step": 138418
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6271722316741943,
      "learning_rate": 0.00020660580190677281,
      "loss": 3.1281,
      "step": 138419
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.7933812141418457,
      "learning_rate": 0.00020660191462515184,
      "loss": 2.9925,
      "step": 138420
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7371597290039062,
      "learning_rate": 0.00020659802736089534,
      "loss": 3.0155,
      "step": 138421
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.150242328643799,
      "learning_rate": 0.00020659414011400403,
      "loss": 2.7462,
      "step": 138422
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5490264892578125,
      "learning_rate": 0.00020659025288447864,
      "loss": 3.0655,
      "step": 138423
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.033250331878662,
      "learning_rate": 0.00020658636567231982,
      "loss": 2.8194,
      "step": 138424
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3321709632873535,
      "learning_rate": 0.0002065824784775284,
      "loss": 3.1375,
      "step": 138425
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.034806966781616,
      "learning_rate": 0.00020657859130010506,
      "loss": 2.9949,
      "step": 138426
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.092473268508911,
      "learning_rate": 0.0002065747041400505,
      "loss": 3.0513,
      "step": 138427
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2717769145965576,
      "learning_rate": 0.00020657081699736556,
      "loss": 3.0137,
      "step": 138428
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.874561071395874,
      "learning_rate": 0.00020656692987205082,
      "loss": 3.0903,
      "step": 138429
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1565775871276855,
      "learning_rate": 0.000206563042764107,
      "loss": 2.8879,
      "step": 138430
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6657094955444336,
      "learning_rate": 0.00020655915567353487,
      "loss": 3.0004,
      "step": 138431
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.266345500946045,
      "learning_rate": 0.0002065552686003352,
      "loss": 2.9712,
      "step": 138432
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.2820146083831787,
      "learning_rate": 0.0002065513815445086,
      "loss": 3.087,
      "step": 138433
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9848767518997192,
      "learning_rate": 0.00020654749450605604,
      "loss": 2.9114,
      "step": 138434
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.8833703994750977,
      "learning_rate": 0.00020654360748497796,
      "loss": 2.9605,
      "step": 138435
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9040229320526123,
      "learning_rate": 0.0002065397204812751,
      "loss": 2.8362,
      "step": 138436
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7881760597229004,
      "learning_rate": 0.00020653583349494837,
      "loss": 3.0514,
      "step": 138437
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7555360794067383,
      "learning_rate": 0.00020653194652599835,
      "loss": 2.9288,
      "step": 138438
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0089452266693115,
      "learning_rate": 0.0002065280595744258,
      "loss": 3.0345,
      "step": 138439
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1567463874816895,
      "learning_rate": 0.0002065241726402316,
      "loss": 3.1057,
      "step": 138440
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8500231504440308,
      "learning_rate": 0.00020652028572341614,
      "loss": 2.8635,
      "step": 138441
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.449126720428467,
      "learning_rate": 0.00020651639882398036,
      "loss": 2.9337,
      "step": 138442
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.538168430328369,
      "learning_rate": 0.00020651251194192495,
      "loss": 3.044,
      "step": 138443
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5468037128448486,
      "learning_rate": 0.00020650862507725064,
      "loss": 3.0223,
      "step": 138444
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.530285596847534,
      "learning_rate": 0.0002065047382299581,
      "loss": 2.9189,
      "step": 138445
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.167375326156616,
      "learning_rate": 0.0002065008514000483,
      "loss": 2.9026,
      "step": 138446
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.629880905151367,
      "learning_rate": 0.0002064969645875215,
      "loss": 3.0404,
      "step": 138447
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1445934772491455,
      "learning_rate": 0.00020649307779237877,
      "loss": 2.9036,
      "step": 138448
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0756032466888428,
      "learning_rate": 0.00020648919101462073,
      "loss": 3.014,
      "step": 138449
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9234989881515503,
      "learning_rate": 0.0002064853042542481,
      "loss": 2.9359,
      "step": 138450
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7765090465545654,
      "learning_rate": 0.00020648141751126165,
      "loss": 2.9537,
      "step": 138451
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.11671781539917,
      "learning_rate": 0.00020647753078566216,
      "loss": 3.0014,
      "step": 138452
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.085421085357666,
      "learning_rate": 0.00020647364407745016,
      "loss": 2.6122,
      "step": 138453
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.725917100906372,
      "learning_rate": 0.00020646975738662646,
      "loss": 2.8451,
      "step": 138454
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1727161407470703,
      "learning_rate": 0.0002064658707131918,
      "loss": 2.9635,
      "step": 138455
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.646813154220581,
      "learning_rate": 0.00020646198405714693,
      "loss": 2.939,
      "step": 138456
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.424586296081543,
      "learning_rate": 0.00020645809741849248,
      "loss": 2.8907,
      "step": 138457
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5359253883361816,
      "learning_rate": 0.0002064542107972293,
      "loss": 3.0858,
      "step": 138458
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.9071028232574463,
      "learning_rate": 0.00020645032419335812,
      "loss": 2.942,
      "step": 138459
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8878145217895508,
      "learning_rate": 0.0002064464376068795,
      "loss": 3.2426,
      "step": 138460
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.315601348876953,
      "learning_rate": 0.00020644255103779424,
      "loss": 3.0756,
      "step": 138461
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.057596445083618,
      "learning_rate": 0.00020643866448610308,
      "loss": 2.9766,
      "step": 138462
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.412707567214966,
      "learning_rate": 0.00020643477795180676,
      "loss": 2.7325,
      "step": 138463
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.556910753250122,
      "learning_rate": 0.00020643089143490591,
      "loss": 2.991,
      "step": 138464
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.092505931854248,
      "learning_rate": 0.00020642700493540153,
      "loss": 2.9672,
      "step": 138465
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9429570436477661,
      "learning_rate": 0.00020642311845329395,
      "loss": 2.7353,
      "step": 138466
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6707663536071777,
      "learning_rate": 0.00020641923198858414,
      "loss": 2.8484,
      "step": 138467
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.101405620574951,
      "learning_rate": 0.00020641534554127273,
      "loss": 2.9887,
      "step": 138468
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2812838554382324,
      "learning_rate": 0.00020641145911136046,
      "loss": 2.8542,
      "step": 138469
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0960450172424316,
      "learning_rate": 0.0002064075726988481,
      "loss": 3.0445,
      "step": 138470
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.220111131668091,
      "learning_rate": 0.00020640368630373644,
      "loss": 3.0351,
      "step": 138471
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.064134120941162,
      "learning_rate": 0.00020639979992602599,
      "loss": 3.1058,
      "step": 138472
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9991222620010376,
      "learning_rate": 0.00020639591356571755,
      "loss": 2.9213,
      "step": 138473
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6656413078308105,
      "learning_rate": 0.00020639202722281188,
      "loss": 2.9433,
      "step": 138474
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.005917549133301,
      "learning_rate": 0.00020638814089730972,
      "loss": 2.927,
      "step": 138475
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2983925342559814,
      "learning_rate": 0.00020638425458921177,
      "loss": 2.8978,
      "step": 138476
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.786285638809204,
      "learning_rate": 0.0002063803682985189,
      "loss": 2.6529,
      "step": 138477
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3224008083343506,
      "learning_rate": 0.00020637648202523153,
      "loss": 3.0189,
      "step": 138478
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.603013753890991,
      "learning_rate": 0.00020637259576935057,
      "loss": 2.8374,
      "step": 138479
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7706568241119385,
      "learning_rate": 0.00020636870953087667,
      "loss": 3.0525,
      "step": 138480
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.217250108718872,
      "learning_rate": 0.00020636482330981063,
      "loss": 3.1345,
      "step": 138481
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.1034739017486572,
      "learning_rate": 0.00020636093710615312,
      "loss": 3.0377,
      "step": 138482
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.8578357696533203,
      "learning_rate": 0.000206357050919905,
      "loss": 2.7095,
      "step": 138483
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8573579788208008,
      "learning_rate": 0.00020635316475106673,
      "loss": 2.8694,
      "step": 138484
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9796910285949707,
      "learning_rate": 0.00020634927859963922,
      "loss": 3.1195,
      "step": 138485
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3022520542144775,
      "learning_rate": 0.00020634539246562313,
      "loss": 2.987,
      "step": 138486
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1802823543548584,
      "learning_rate": 0.0002063415063490192,
      "loss": 3.1472,
      "step": 138487
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.4178035259246826,
      "learning_rate": 0.00020633762024982813,
      "loss": 2.9858,
      "step": 138488
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.474189281463623,
      "learning_rate": 0.00020633373416805074,
      "loss": 3.013,
      "step": 138489
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1177334785461426,
      "learning_rate": 0.00020632984810368764,
      "loss": 3.0363,
      "step": 138490
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.9921987056732178,
      "learning_rate": 0.00020632596205673963,
      "loss": 3.0149,
      "step": 138491
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7326748371124268,
      "learning_rate": 0.0002063220760272073,
      "loss": 3.1532,
      "step": 138492
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.7327585220336914,
      "learning_rate": 0.00020631819001509148,
      "loss": 2.9227,
      "step": 138493
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1446967124938965,
      "learning_rate": 0.00020631430402039293,
      "loss": 2.6811,
      "step": 138494
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1421170234680176,
      "learning_rate": 0.00020631041804311233,
      "loss": 3.0239,
      "step": 138495
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6316745281219482,
      "learning_rate": 0.00020630653208325033,
      "loss": 2.9482,
      "step": 138496
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.871302604675293,
      "learning_rate": 0.00020630264614080772,
      "loss": 3.1551,
      "step": 138497
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3903443813323975,
      "learning_rate": 0.00020629876021578528,
      "loss": 2.9964,
      "step": 138498
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8900424242019653,
      "learning_rate": 0.00020629487430818365,
      "loss": 2.9344,
      "step": 138499
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.959183692932129,
      "learning_rate": 0.0002062909884180035,
      "loss": 2.7873,
      "step": 138500
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.343817949295044,
      "learning_rate": 0.00020628710254524572,
      "loss": 3.0025,
      "step": 138501
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.106306791305542,
      "learning_rate": 0.0002062832166899109,
      "loss": 3.0632,
      "step": 138502
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5906124114990234,
      "learning_rate": 0.00020627933085199976,
      "loss": 3.1064,
      "step": 138503
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5581421852111816,
      "learning_rate": 0.0002062754450315131,
      "loss": 2.9177,
      "step": 138504
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0437850952148438,
      "learning_rate": 0.00020627155922845164,
      "loss": 3.034,
      "step": 138505
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4620182514190674,
      "learning_rate": 0.000206267673442816,
      "loss": 2.9346,
      "step": 138506
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.126549243927002,
      "learning_rate": 0.000206263787674607,
      "loss": 3.0806,
      "step": 138507
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2061479091644287,
      "learning_rate": 0.00020625990192382534,
      "loss": 2.9268,
      "step": 138508
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.016908884048462,
      "learning_rate": 0.0002062560161904717,
      "loss": 2.8962,
      "step": 138509
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.002620220184326,
      "learning_rate": 0.00020625213047454683,
      "loss": 3.0635,
      "step": 138510
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6844594478607178,
      "learning_rate": 0.00020624824477605147,
      "loss": 3.0008,
      "step": 138511
    },
    {
      "epoch": 1.8,
      "grad_norm": 4.942826271057129,
      "learning_rate": 0.00020624435909498634,
      "loss": 2.8775,
      "step": 138512
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.227922201156616,
      "learning_rate": 0.00020624047343135223,
      "loss": 2.941,
      "step": 138513
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5870821475982666,
      "learning_rate": 0.0002062365877851497,
      "loss": 2.9436,
      "step": 138514
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.655319929122925,
      "learning_rate": 0.00020623270215637958,
      "loss": 2.9597,
      "step": 138515
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.146186113357544,
      "learning_rate": 0.00020622881654504252,
      "loss": 3.0431,
      "step": 138516
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6979193687438965,
      "learning_rate": 0.00020622493095113933,
      "loss": 3.1573,
      "step": 138517
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9914878606796265,
      "learning_rate": 0.00020622104537467069,
      "loss": 3.2447,
      "step": 138518
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.789813995361328,
      "learning_rate": 0.00020621715981563742,
      "loss": 2.8385,
      "step": 138519
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.921262741088867,
      "learning_rate": 0.00020621327427404004,
      "loss": 3.1193,
      "step": 138520
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9288707971572876,
      "learning_rate": 0.0002062093887498794,
      "loss": 3.0044,
      "step": 138521
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3866260051727295,
      "learning_rate": 0.0002062055032431562,
      "loss": 2.8606,
      "step": 138522
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.091644763946533,
      "learning_rate": 0.00020620161775387118,
      "loss": 3.0337,
      "step": 138523
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1082100868225098,
      "learning_rate": 0.000206197732282025,
      "loss": 2.8642,
      "step": 138524
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9264848232269287,
      "learning_rate": 0.0002061938468276185,
      "loss": 2.7955,
      "step": 138525
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4329848289489746,
      "learning_rate": 0.0002061899613906524,
      "loss": 2.7948,
      "step": 138526
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9779616594314575,
      "learning_rate": 0.00020618607597112728,
      "loss": 3.0351,
      "step": 138527
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1562302112579346,
      "learning_rate": 0.0002061821905690439,
      "loss": 3.2073,
      "step": 138528
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2434656620025635,
      "learning_rate": 0.00020617830518440306,
      "loss": 2.8107,
      "step": 138529
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9369362592697144,
      "learning_rate": 0.0002061744198172054,
      "loss": 3.0154,
      "step": 138530
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.211965560913086,
      "learning_rate": 0.00020617053446745173,
      "loss": 2.6445,
      "step": 138531
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.289263963699341,
      "learning_rate": 0.0002061666491351428,
      "loss": 3.3673,
      "step": 138532
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.018162727355957,
      "learning_rate": 0.00020616276382027918,
      "loss": 2.8853,
      "step": 138533
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.052842140197754,
      "learning_rate": 0.0002061588785228617,
      "loss": 2.8946,
      "step": 138534
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0950100421905518,
      "learning_rate": 0.00020615499324289102,
      "loss": 3.0445,
      "step": 138535
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.095442295074463,
      "learning_rate": 0.00020615110798036788,
      "loss": 2.9025,
      "step": 138536
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9798394441604614,
      "learning_rate": 0.00020614722273529305,
      "loss": 3.0435,
      "step": 138537
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.262643575668335,
      "learning_rate": 0.00020614333750766732,
      "loss": 2.8092,
      "step": 138538
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9809986352920532,
      "learning_rate": 0.00020613945229749122,
      "loss": 3.0561,
      "step": 138539
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.105475425720215,
      "learning_rate": 0.00020613556710476555,
      "loss": 3.0151,
      "step": 138540
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5809929370880127,
      "learning_rate": 0.00020613168192949103,
      "loss": 2.9149,
      "step": 138541
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.444091320037842,
      "learning_rate": 0.0002061277967716684,
      "loss": 2.9448,
      "step": 138542
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.935797691345215,
      "learning_rate": 0.00020612391163129848,
      "loss": 2.823,
      "step": 138543
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1084859371185303,
      "learning_rate": 0.00020612002650838193,
      "loss": 2.758,
      "step": 138544
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5103280544281006,
      "learning_rate": 0.00020611614140291935,
      "loss": 2.9112,
      "step": 138545
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0180938243865967,
      "learning_rate": 0.0002061122563149115,
      "loss": 2.8724,
      "step": 138546
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.255599021911621,
      "learning_rate": 0.00020610837124435921,
      "loss": 3.0442,
      "step": 138547
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.150566339492798,
      "learning_rate": 0.00020610448619126314,
      "loss": 2.87,
      "step": 138548
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6161625385284424,
      "learning_rate": 0.000206100601155624,
      "loss": 2.943,
      "step": 138549
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4660661220550537,
      "learning_rate": 0.00020609671613744265,
      "loss": 2.9212,
      "step": 138550
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.6326239109039307,
      "learning_rate": 0.0002060928311367196,
      "loss": 2.9065,
      "step": 138551
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2785236835479736,
      "learning_rate": 0.00020608894615345563,
      "loss": 2.8585,
      "step": 138552
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0383787155151367,
      "learning_rate": 0.00020608506118765152,
      "loss": 2.8741,
      "step": 138553
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.085965394973755,
      "learning_rate": 0.00020608117623930795,
      "loss": 3.097,
      "step": 138554
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.067044734954834,
      "learning_rate": 0.0002060772913084257,
      "loss": 3.0104,
      "step": 138555
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3847835063934326,
      "learning_rate": 0.00020607340639500554,
      "loss": 2.9677,
      "step": 138556
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7809736728668213,
      "learning_rate": 0.00020606952149904801,
      "loss": 2.8762,
      "step": 138557
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.371220350265503,
      "learning_rate": 0.00020606563662055392,
      "loss": 2.8696,
      "step": 138558
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.9561336040496826,
      "learning_rate": 0.00020606175175952397,
      "loss": 2.8468,
      "step": 138559
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.9245944023132324,
      "learning_rate": 0.00020605786691595894,
      "loss": 3.0821,
      "step": 138560
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6637864112854004,
      "learning_rate": 0.00020605398208985954,
      "loss": 3.0489,
      "step": 138561
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5851855278015137,
      "learning_rate": 0.0002060500972812266,
      "loss": 3.1323,
      "step": 138562
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.243169069290161,
      "learning_rate": 0.00020604621249006058,
      "loss": 2.9966,
      "step": 138563
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.990568161010742,
      "learning_rate": 0.00020604232771636232,
      "loss": 2.6757,
      "step": 138564
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.3213212490081787,
      "learning_rate": 0.00020603844296013263,
      "loss": 2.8496,
      "step": 138565
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.6686251163482666,
      "learning_rate": 0.00020603455822137208,
      "loss": 2.7988,
      "step": 138566
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6045048236846924,
      "learning_rate": 0.00020603067350008158,
      "loss": 3.1352,
      "step": 138567
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.4737062454223633,
      "learning_rate": 0.0002060267887962618,
      "loss": 2.9535,
      "step": 138568
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.360102653503418,
      "learning_rate": 0.00020602290410991333,
      "loss": 3.2329,
      "step": 138569
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4124906063079834,
      "learning_rate": 0.00020601901944103694,
      "loss": 3.1188,
      "step": 138570
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1741957664489746,
      "learning_rate": 0.00020601513478963343,
      "loss": 3.0123,
      "step": 138571
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.2510902881622314,
      "learning_rate": 0.00020601125015570345,
      "loss": 2.8585,
      "step": 138572
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.94963800907135,
      "learning_rate": 0.00020600736553924776,
      "loss": 3.0891,
      "step": 138573
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8730885982513428,
      "learning_rate": 0.00020600348094026714,
      "loss": 3.0406,
      "step": 138574
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0884745121002197,
      "learning_rate": 0.00020599959635876224,
      "loss": 2.9569,
      "step": 138575
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.015336036682129,
      "learning_rate": 0.00020599571179473372,
      "loss": 2.8476,
      "step": 138576
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.027317523956299,
      "learning_rate": 0.0002059918272481824,
      "loss": 2.8914,
      "step": 138577
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9637436866760254,
      "learning_rate": 0.00020598794271910897,
      "loss": 3.1315,
      "step": 138578
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.782557249069214,
      "learning_rate": 0.00020598405820751415,
      "loss": 2.9895,
      "step": 138579
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6353659629821777,
      "learning_rate": 0.0002059801737133987,
      "loss": 2.9829,
      "step": 138580
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.5697240829467773,
      "learning_rate": 0.0002059762892367633,
      "loss": 2.7844,
      "step": 138581
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.752622127532959,
      "learning_rate": 0.00020597240477760866,
      "loss": 3.1671,
      "step": 138582
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2226483821868896,
      "learning_rate": 0.00020596852033593558,
      "loss": 2.6957,
      "step": 138583
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.036235809326172,
      "learning_rate": 0.00020596463591174464,
      "loss": 3.1993,
      "step": 138584
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0405051708221436,
      "learning_rate": 0.00020596075150503668,
      "loss": 2.9998,
      "step": 138585
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.155787229537964,
      "learning_rate": 0.00020595686711581248,
      "loss": 2.8186,
      "step": 138586
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.946415901184082,
      "learning_rate": 0.00020595298274407256,
      "loss": 2.6966,
      "step": 138587
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9695159196853638,
      "learning_rate": 0.00020594909838981778,
      "loss": 3.0839,
      "step": 138588
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.1003968715667725,
      "learning_rate": 0.00020594521405304888,
      "loss": 2.9716,
      "step": 138589
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.13900089263916,
      "learning_rate": 0.0002059413297337665,
      "loss": 2.8802,
      "step": 138590
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.0394790172576904,
      "learning_rate": 0.00020593744543197145,
      "loss": 2.8696,
      "step": 138591
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9959633350372314,
      "learning_rate": 0.0002059335611476643,
      "loss": 2.9523,
      "step": 138592
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1020607948303223,
      "learning_rate": 0.000205929676880846,
      "loss": 2.9703,
      "step": 138593
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.101294755935669,
      "learning_rate": 0.0002059257926315171,
      "loss": 3.1464,
      "step": 138594
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9638152122497559,
      "learning_rate": 0.00020592190839967836,
      "loss": 2.8876,
      "step": 138595
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.9148659706115723,
      "learning_rate": 0.00020591802418533047,
      "loss": 3.0424,
      "step": 138596
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.7435500621795654,
      "learning_rate": 0.00020591413998847434,
      "loss": 2.9395,
      "step": 138597
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.323934555053711,
      "learning_rate": 0.00020591025580911042,
      "loss": 3.1914,
      "step": 138598
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.2757773399353027,
      "learning_rate": 0.00020590637164723965,
      "loss": 2.9208,
      "step": 138599
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.982008934020996,
      "learning_rate": 0.0002059024875028626,
      "loss": 3.0448,
      "step": 138600
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.091395139694214,
      "learning_rate": 0.00020589860337598,
      "loss": 3.0785,
      "step": 138601
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.8659160137176514,
      "learning_rate": 0.00020589471926659266,
      "loss": 2.916,
      "step": 138602
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.97910475730896,
      "learning_rate": 0.00020589083517470128,
      "loss": 2.9385,
      "step": 138603
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.4125351905822754,
      "learning_rate": 0.00020588695110030665,
      "loss": 3.0295,
      "step": 138604
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.100581645965576,
      "learning_rate": 0.00020588306704340936,
      "loss": 2.8941,
      "step": 138605
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.398958206176758,
      "learning_rate": 0.00020587918300401015,
      "loss": 2.7315,
      "step": 138606
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.9455344676971436,
      "learning_rate": 0.0002058752989821098,
      "loss": 2.8849,
      "step": 138607
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.5179452896118164,
      "learning_rate": 0.00020587141497770896,
      "loss": 3.0043,
      "step": 138608
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.329591751098633,
      "learning_rate": 0.00020586753099080846,
      "loss": 2.8588,
      "step": 138609
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.143906593322754,
      "learning_rate": 0.0002058636470214089,
      "loss": 3.0777,
      "step": 138610
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.8640122413635254,
      "learning_rate": 0.00020585976306951122,
      "loss": 2.814,
      "step": 138611
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.8365063667297363,
      "learning_rate": 0.0002058558791351159,
      "loss": 2.7334,
      "step": 138612
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.3964195251464844,
      "learning_rate": 0.00020585199521822367,
      "loss": 3.2056,
      "step": 138613
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.25429105758667,
      "learning_rate": 0.0002058481113188354,
      "loss": 3.1831,
      "step": 138614
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.8784940242767334,
      "learning_rate": 0.00020584422743695173,
      "loss": 2.9482,
      "step": 138615
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.6568822860717773,
      "learning_rate": 0.00020584034357257335,
      "loss": 3.0934,
      "step": 138616
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.243760824203491,
      "learning_rate": 0.00020583645972570118,
      "loss": 3.0461,
      "step": 138617
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9407130479812622,
      "learning_rate": 0.00020583257589633567,
      "loss": 3.123,
      "step": 138618
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.1403591632843018,
      "learning_rate": 0.00020582869208447767,
      "loss": 2.9957,
      "step": 138619
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.1714367866516113,
      "learning_rate": 0.00020582480829012788,
      "loss": 2.9261,
      "step": 138620
    },
    {
      "epoch": 1.8,
      "grad_norm": 2.101294755935669,
      "learning_rate": 0.00020582092451328703,
      "loss": 3.1572,
      "step": 138621
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9212257862091064,
      "learning_rate": 0.00020581704075395589,
      "loss": 2.8304,
      "step": 138622
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.337805986404419,
      "learning_rate": 0.0002058131570121352,
      "loss": 2.9896,
      "step": 138623
    },
    {
      "epoch": 1.8,
      "grad_norm": 3.5466136932373047,
      "learning_rate": 0.00020580927328782552,
      "loss": 3.0646,
      "step": 138624
    },
    {
      "epoch": 1.8,
      "grad_norm": 1.9081227779388428,
      "learning_rate": 0.00020580538958102771,
      "loss": 2.8453,
      "step": 138625
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1985485553741455,
      "learning_rate": 0.00020580150589174243,
      "loss": 3.0937,
      "step": 138626
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.631936550140381,
      "learning_rate": 0.00020579762221997042,
      "loss": 2.8774,
      "step": 138627
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.3291096687316895,
      "learning_rate": 0.00020579373856571245,
      "loss": 2.9943,
      "step": 138628
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5724713802337646,
      "learning_rate": 0.00020578985492896928,
      "loss": 2.8862,
      "step": 138629
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0402028560638428,
      "learning_rate": 0.00020578597130974143,
      "loss": 2.9056,
      "step": 138630
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.9651601314544678,
      "learning_rate": 0.00020578208770802975,
      "loss": 2.9181,
      "step": 138631
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5007948875427246,
      "learning_rate": 0.000205778204123835,
      "loss": 2.793,
      "step": 138632
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.7055318355560303,
      "learning_rate": 0.00020577432055715782,
      "loss": 2.9378,
      "step": 138633
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8944772481918335,
      "learning_rate": 0.000205770437007999,
      "loss": 3.2422,
      "step": 138634
    },
    {
      "epoch": 1.81,
      "grad_norm": 4.346309185028076,
      "learning_rate": 0.00020576655347635933,
      "loss": 2.9867,
      "step": 138635
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.292642593383789,
      "learning_rate": 0.00020576266996223934,
      "loss": 3.0131,
      "step": 138636
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.125094175338745,
      "learning_rate": 0.00020575878646563982,
      "loss": 2.9515,
      "step": 138637
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.551231622695923,
      "learning_rate": 0.00020575490298656152,
      "loss": 2.7857,
      "step": 138638
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.9873244762420654,
      "learning_rate": 0.00020575101952500518,
      "loss": 2.7157,
      "step": 138639
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.6530182361602783,
      "learning_rate": 0.00020574713608097152,
      "loss": 3.0359,
      "step": 138640
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.382824182510376,
      "learning_rate": 0.00020574325265446136,
      "loss": 3.0412,
      "step": 138641
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.788739562034607,
      "learning_rate": 0.00020573936924547516,
      "loss": 2.7524,
      "step": 138642
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.189579963684082,
      "learning_rate": 0.00020573548585401378,
      "loss": 3.074,
      "step": 138643
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.690359592437744,
      "learning_rate": 0.000205731602480078,
      "loss": 2.7507,
      "step": 138644
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3501498699188232,
      "learning_rate": 0.00020572771912366847,
      "loss": 3.1475,
      "step": 138645
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6780829429626465,
      "learning_rate": 0.0002057238357847859,
      "loss": 2.7233,
      "step": 138646
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.011427640914917,
      "learning_rate": 0.00020571995246343122,
      "loss": 2.8539,
      "step": 138647
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.4039700031280518,
      "learning_rate": 0.00020571606915960485,
      "loss": 3.1494,
      "step": 138648
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6252639293670654,
      "learning_rate": 0.00020571218587330767,
      "loss": 2.9863,
      "step": 138649
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.348870277404785,
      "learning_rate": 0.00020570830260454033,
      "loss": 3.3075,
      "step": 138650
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.031703233718872,
      "learning_rate": 0.00020570441935330362,
      "loss": 2.834,
      "step": 138651
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.2169933319091797,
      "learning_rate": 0.00020570053611959822,
      "loss": 2.8496,
      "step": 138652
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0632970333099365,
      "learning_rate": 0.000205696652903425,
      "loss": 2.8908,
      "step": 138653
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.912949323654175,
      "learning_rate": 0.00020569276970478444,
      "loss": 2.9236,
      "step": 138654
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.217115640640259,
      "learning_rate": 0.00020568888652367737,
      "loss": 3.1498,
      "step": 138655
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9217489957809448,
      "learning_rate": 0.00020568500336010453,
      "loss": 2.6995,
      "step": 138656
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.552476167678833,
      "learning_rate": 0.00020568112021406663,
      "loss": 3.0029,
      "step": 138657
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2570180892944336,
      "learning_rate": 0.00020567723708556435,
      "loss": 2.8722,
      "step": 138658
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.7087509632110596,
      "learning_rate": 0.00020567335397459846,
      "loss": 2.8839,
      "step": 138659
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3560330867767334,
      "learning_rate": 0.00020566947088116982,
      "loss": 3.1004,
      "step": 138660
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2045514583587646,
      "learning_rate": 0.00020566558780527887,
      "loss": 2.9522,
      "step": 138661
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.2769758701324463,
      "learning_rate": 0.00020566170474692647,
      "loss": 2.9645,
      "step": 138662
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.317164182662964,
      "learning_rate": 0.00020565782170611335,
      "loss": 2.9892,
      "step": 138663
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9871554374694824,
      "learning_rate": 0.0002056539386828402,
      "loss": 2.9149,
      "step": 138664
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.844156503677368,
      "learning_rate": 0.00020565005567710777,
      "loss": 2.9751,
      "step": 138665
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.293333053588867,
      "learning_rate": 0.00020564617268891685,
      "loss": 3.0715,
      "step": 138666
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.8615264892578125,
      "learning_rate": 0.00020564228971826811,
      "loss": 2.8643,
      "step": 138667
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8609806299209595,
      "learning_rate": 0.00020563840676516212,
      "loss": 2.907,
      "step": 138668
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.7070878744125366,
      "learning_rate": 0.00020563452382959976,
      "loss": 2.831,
      "step": 138669
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.145667791366577,
      "learning_rate": 0.0002056306409115818,
      "loss": 2.4703,
      "step": 138670
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.531026840209961,
      "learning_rate": 0.00020562675801110878,
      "loss": 2.929,
      "step": 138671
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1460719108581543,
      "learning_rate": 0.00020562287512818165,
      "loss": 3.1438,
      "step": 138672
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.149646520614624,
      "learning_rate": 0.0002056189922628009,
      "loss": 2.9664,
      "step": 138673
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5888307094573975,
      "learning_rate": 0.00020561510941496746,
      "loss": 3.0338,
      "step": 138674
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.7496209144592285,
      "learning_rate": 0.00020561122658468186,
      "loss": 3.1047,
      "step": 138675
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.103804111480713,
      "learning_rate": 0.00020560734377194495,
      "loss": 2.8788,
      "step": 138676
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.231337308883667,
      "learning_rate": 0.0002056034609767574,
      "loss": 3.1328,
      "step": 138677
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3440582752227783,
      "learning_rate": 0.00020559957819912003,
      "loss": 3.1237,
      "step": 138678
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.687218427658081,
      "learning_rate": 0.00020559569543903339,
      "loss": 2.9977,
      "step": 138679
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.8667733669281006,
      "learning_rate": 0.0002055918126964983,
      "loss": 2.8398,
      "step": 138680
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.185335636138916,
      "learning_rate": 0.0002055879299715155,
      "loss": 2.8769,
      "step": 138681
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8947534561157227,
      "learning_rate": 0.00020558404726408571,
      "loss": 3.1558,
      "step": 138682
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2573795318603516,
      "learning_rate": 0.0002055801645742096,
      "loss": 2.9773,
      "step": 138683
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4784185886383057,
      "learning_rate": 0.00020557628190188794,
      "loss": 3.0499,
      "step": 138684
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6135799884796143,
      "learning_rate": 0.00020557239924712137,
      "loss": 3.2505,
      "step": 138685
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2858986854553223,
      "learning_rate": 0.0002055685166099107,
      "loss": 3.0693,
      "step": 138686
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.213752269744873,
      "learning_rate": 0.00020556463399025665,
      "loss": 2.7072,
      "step": 138687
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.8299503326416016,
      "learning_rate": 0.0002055607513881599,
      "loss": 3.0329,
      "step": 138688
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.43975830078125,
      "learning_rate": 0.00020555686880362122,
      "loss": 2.968,
      "step": 138689
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.786191940307617,
      "learning_rate": 0.0002055529862366413,
      "loss": 3.1415,
      "step": 138690
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.625589609146118,
      "learning_rate": 0.00020554910368722083,
      "loss": 3.0045,
      "step": 138691
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.9396250247955322,
      "learning_rate": 0.00020554522115536056,
      "loss": 3.0226,
      "step": 138692
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.025996208190918,
      "learning_rate": 0.00020554133864106119,
      "loss": 2.919,
      "step": 138693
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5526764392852783,
      "learning_rate": 0.00020553745614432348,
      "loss": 2.9193,
      "step": 138694
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9091209173202515,
      "learning_rate": 0.00020553357366514817,
      "loss": 2.6651,
      "step": 138695
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3896145820617676,
      "learning_rate": 0.00020552969120353606,
      "loss": 2.9976,
      "step": 138696
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6153690814971924,
      "learning_rate": 0.0002055258087594876,
      "loss": 3.0516,
      "step": 138697
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0366337299346924,
      "learning_rate": 0.00020552192633300375,
      "loss": 2.9316,
      "step": 138698
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.304516315460205,
      "learning_rate": 0.0002055180439240851,
      "loss": 2.9521,
      "step": 138699
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5892035961151123,
      "learning_rate": 0.00020551416153273246,
      "loss": 2.9868,
      "step": 138700
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.2735698223114014,
      "learning_rate": 0.00020551027915894648,
      "loss": 2.8191,
      "step": 138701
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3200387954711914,
      "learning_rate": 0.00020550639680272812,
      "loss": 2.9765,
      "step": 138702
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1346685886383057,
      "learning_rate": 0.00020550251446407771,
      "loss": 2.9963,
      "step": 138703
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.8292622566223145,
      "learning_rate": 0.0002054986321429962,
      "loss": 2.8534,
      "step": 138704
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.243863344192505,
      "learning_rate": 0.00020549474983948425,
      "loss": 2.6153,
      "step": 138705
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.353419780731201,
      "learning_rate": 0.00020549086755354264,
      "loss": 2.7767,
      "step": 138706
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.246170997619629,
      "learning_rate": 0.00020548698528517203,
      "loss": 2.815,
      "step": 138707
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.422863483428955,
      "learning_rate": 0.0002054831030343733,
      "loss": 3.0615,
      "step": 138708
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.619849681854248,
      "learning_rate": 0.00020547922080114695,
      "loss": 2.7722,
      "step": 138709
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.160170555114746,
      "learning_rate": 0.0002054753385854938,
      "loss": 2.9793,
      "step": 138710
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.098038911819458,
      "learning_rate": 0.00020547145638741458,
      "loss": 2.815,
      "step": 138711
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1097359657287598,
      "learning_rate": 0.00020546757420690997,
      "loss": 2.859,
      "step": 138712
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.296023368835449,
      "learning_rate": 0.00020546369204398074,
      "loss": 2.904,
      "step": 138713
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.30639910697937,
      "learning_rate": 0.0002054598098986277,
      "loss": 3.0272,
      "step": 138714
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.00009822845459,
      "learning_rate": 0.0002054559277708513,
      "loss": 3.1809,
      "step": 138715
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.142514228820801,
      "learning_rate": 0.00020545204566065252,
      "loss": 3.0212,
      "step": 138716
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9265285730361938,
      "learning_rate": 0.00020544816356803195,
      "loss": 3.1415,
      "step": 138717
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1216650009155273,
      "learning_rate": 0.00020544428149299034,
      "loss": 2.9851,
      "step": 138718
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.6744059324264526,
      "learning_rate": 0.00020544039943552842,
      "loss": 3.2161,
      "step": 138719
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1684165000915527,
      "learning_rate": 0.00020543651739564703,
      "loss": 3.0205,
      "step": 138720
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.945809006690979,
      "learning_rate": 0.00020543263537334668,
      "loss": 3.1568,
      "step": 138721
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.827183485031128,
      "learning_rate": 0.0002054287533686282,
      "loss": 2.897,
      "step": 138722
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.086522102355957,
      "learning_rate": 0.00020542487138149228,
      "loss": 2.9257,
      "step": 138723
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0582475662231445,
      "learning_rate": 0.00020542098941193966,
      "loss": 2.9805,
      "step": 138724
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.965632677078247,
      "learning_rate": 0.00020541710745997105,
      "loss": 2.8269,
      "step": 138725
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2296066284179688,
      "learning_rate": 0.0002054132255255872,
      "loss": 3.171,
      "step": 138726
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9655295610427856,
      "learning_rate": 0.00020540934360878895,
      "loss": 2.9282,
      "step": 138727
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.013788938522339,
      "learning_rate": 0.00020540546170957674,
      "loss": 2.9038,
      "step": 138728
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.023013114929199,
      "learning_rate": 0.00020540157982795146,
      "loss": 2.998,
      "step": 138729
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5573184490203857,
      "learning_rate": 0.0002053976979639138,
      "loss": 2.8115,
      "step": 138730
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.7825844287872314,
      "learning_rate": 0.0002053938161174645,
      "loss": 3.0065,
      "step": 138731
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9293015003204346,
      "learning_rate": 0.0002053899342886043,
      "loss": 2.9799,
      "step": 138732
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1731643676757812,
      "learning_rate": 0.000205386052477334,
      "loss": 3.2403,
      "step": 138733
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.642392635345459,
      "learning_rate": 0.0002053821706836541,
      "loss": 2.9113,
      "step": 138734
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4035894870758057,
      "learning_rate": 0.0002053782889075654,
      "loss": 2.807,
      "step": 138735
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1729791164398193,
      "learning_rate": 0.0002053744071490687,
      "loss": 2.9428,
      "step": 138736
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.237154960632324,
      "learning_rate": 0.0002053705254081647,
      "loss": 2.816,
      "step": 138737
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.356426954269409,
      "learning_rate": 0.00020536664368485407,
      "loss": 2.7568,
      "step": 138738
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0681514739990234,
      "learning_rate": 0.00020536276197913772,
      "loss": 2.7645,
      "step": 138739
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.447768449783325,
      "learning_rate": 0.00020535888029101607,
      "loss": 2.9218,
      "step": 138740
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.705589532852173,
      "learning_rate": 0.00020535499862049,
      "loss": 3.0132,
      "step": 138741
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.172327995300293,
      "learning_rate": 0.00020535111696756022,
      "loss": 2.9954,
      "step": 138742
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1571707725524902,
      "learning_rate": 0.00020534723533222743,
      "loss": 2.9002,
      "step": 138743
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9753113985061646,
      "learning_rate": 0.00020534335371449243,
      "loss": 3.134,
      "step": 138744
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.988192081451416,
      "learning_rate": 0.00020533947211435599,
      "loss": 3.112,
      "step": 138745
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.8576595783233643,
      "learning_rate": 0.0002053355905318186,
      "loss": 2.8088,
      "step": 138746
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4877302646636963,
      "learning_rate": 0.00020533170896688108,
      "loss": 2.8359,
      "step": 138747
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.349356174468994,
      "learning_rate": 0.0002053278274195442,
      "loss": 3.1154,
      "step": 138748
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.217602491378784,
      "learning_rate": 0.0002053239458898087,
      "loss": 3.1895,
      "step": 138749
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4852406978607178,
      "learning_rate": 0.00020532006437767528,
      "loss": 2.7602,
      "step": 138750
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.151322841644287,
      "learning_rate": 0.00020531618288314462,
      "loss": 2.9754,
      "step": 138751
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.8304150104522705,
      "learning_rate": 0.00020531230140621756,
      "loss": 3.0215,
      "step": 138752
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.933525562286377,
      "learning_rate": 0.0002053084199468946,
      "loss": 3.0564,
      "step": 138753
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.448732852935791,
      "learning_rate": 0.00020530453850517663,
      "loss": 2.7185,
      "step": 138754
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.046875,
      "learning_rate": 0.0002053006570810643,
      "loss": 2.9235,
      "step": 138755
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.329392194747925,
      "learning_rate": 0.0002052967756745584,
      "loss": 3.1043,
      "step": 138756
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4258344173431396,
      "learning_rate": 0.0002052928942856597,
      "loss": 2.9055,
      "step": 138757
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4562201499938965,
      "learning_rate": 0.00020528901291436873,
      "loss": 2.943,
      "step": 138758
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2065269947052,
      "learning_rate": 0.0002052851315606864,
      "loss": 2.884,
      "step": 138759
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9724003076553345,
      "learning_rate": 0.0002052812502246133,
      "loss": 3.1,
      "step": 138760
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3418314456939697,
      "learning_rate": 0.00020527736890615017,
      "loss": 2.791,
      "step": 138761
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9891916513442993,
      "learning_rate": 0.00020527348760529778,
      "loss": 3.0419,
      "step": 138762
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.053546667098999,
      "learning_rate": 0.0002052696063220569,
      "loss": 3.3009,
      "step": 138763
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.585064172744751,
      "learning_rate": 0.00020526572505642817,
      "loss": 2.7726,
      "step": 138764
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.573988199234009,
      "learning_rate": 0.00020526184380841226,
      "loss": 3.1247,
      "step": 138765
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.0482370853424072,
      "learning_rate": 0.00020525796257801006,
      "loss": 3.0407,
      "step": 138766
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.947828769683838,
      "learning_rate": 0.00020525408136522213,
      "loss": 2.8959,
      "step": 138767
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.278839349746704,
      "learning_rate": 0.00020525020017004927,
      "loss": 2.9599,
      "step": 138768
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.873977780342102,
      "learning_rate": 0.00020524631899249222,
      "loss": 3.3244,
      "step": 138769
    },
    {
      "epoch": 1.81,
      "grad_norm": 4.19296407699585,
      "learning_rate": 0.0002052424378325516,
      "loss": 2.7755,
      "step": 138770
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.095946788787842,
      "learning_rate": 0.00020523855669022823,
      "loss": 3.0832,
      "step": 138771
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9159995317459106,
      "learning_rate": 0.0002052346755655228,
      "loss": 3.0583,
      "step": 138772
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0815012454986572,
      "learning_rate": 0.000205230794458436,
      "loss": 3.0988,
      "step": 138773
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2727608680725098,
      "learning_rate": 0.00020522691336896865,
      "loss": 2.975,
      "step": 138774
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2721548080444336,
      "learning_rate": 0.00020522303229712142,
      "loss": 2.9619,
      "step": 138775
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3148422241210938,
      "learning_rate": 0.00020521915124289497,
      "loss": 2.7975,
      "step": 138776
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.294529676437378,
      "learning_rate": 0.00020521527020629006,
      "loss": 2.8338,
      "step": 138777
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0492522716522217,
      "learning_rate": 0.00020521138918730742,
      "loss": 2.9593,
      "step": 138778
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.810920000076294,
      "learning_rate": 0.00020520750818594778,
      "loss": 2.8006,
      "step": 138779
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0387189388275146,
      "learning_rate": 0.00020520362720221182,
      "loss": 2.8765,
      "step": 138780
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.15910005569458,
      "learning_rate": 0.00020519974623610043,
      "loss": 2.9679,
      "step": 138781
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.600980281829834,
      "learning_rate": 0.00020519586528761408,
      "loss": 3.101,
      "step": 138782
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.7216410636901855,
      "learning_rate": 0.0002051919843567536,
      "loss": 3.2297,
      "step": 138783
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3998162746429443,
      "learning_rate": 0.00020518810344351976,
      "loss": 2.9832,
      "step": 138784
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9382634162902832,
      "learning_rate": 0.00020518422254791322,
      "loss": 3.107,
      "step": 138785
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1237401962280273,
      "learning_rate": 0.0002051803416699347,
      "loss": 2.8886,
      "step": 138786
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.7666308879852295,
      "learning_rate": 0.0002051764608095851,
      "loss": 2.7576,
      "step": 138787
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5988574028015137,
      "learning_rate": 0.00020517257996686484,
      "loss": 3.0744,
      "step": 138788
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0487263202667236,
      "learning_rate": 0.00020516869914177476,
      "loss": 3.094,
      "step": 138789
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6728675365448,
      "learning_rate": 0.00020516481833431565,
      "loss": 2.9103,
      "step": 138790
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4975647926330566,
      "learning_rate": 0.00020516093754448822,
      "loss": 3.0655,
      "step": 138791
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.60528302192688,
      "learning_rate": 0.00020515705677229309,
      "loss": 2.987,
      "step": 138792
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.389265298843384,
      "learning_rate": 0.00020515317601773112,
      "loss": 3.1893,
      "step": 138793
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.9871814250946045,
      "learning_rate": 0.00020514929528080305,
      "loss": 3.1439,
      "step": 138794
    },
    {
      "epoch": 1.81,
      "grad_norm": 4.047893047332764,
      "learning_rate": 0.0002051454145615094,
      "loss": 3.0839,
      "step": 138795
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.518463373184204,
      "learning_rate": 0.00020514153385985096,
      "loss": 2.7619,
      "step": 138796
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6187963485717773,
      "learning_rate": 0.00020513765317582857,
      "loss": 3.1248,
      "step": 138797
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.1552579402923584,
      "learning_rate": 0.00020513377250944286,
      "loss": 2.7542,
      "step": 138798
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.683098316192627,
      "learning_rate": 0.00020512989186069456,
      "loss": 2.8447,
      "step": 138799
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.057246685028076,
      "learning_rate": 0.00020512601122958454,
      "loss": 2.7927,
      "step": 138800
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.7694714069366455,
      "learning_rate": 0.00020512213061611324,
      "loss": 2.9005,
      "step": 138801
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0335633754730225,
      "learning_rate": 0.00020511825002028156,
      "loss": 3.125,
      "step": 138802
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.099254608154297,
      "learning_rate": 0.00020511436944209018,
      "loss": 2.9206,
      "step": 138803
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4534668922424316,
      "learning_rate": 0.00020511048888153982,
      "loss": 2.8765,
      "step": 138804
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1544911861419678,
      "learning_rate": 0.00020510660833863122,
      "loss": 2.881,
      "step": 138805
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.527411460876465,
      "learning_rate": 0.00020510272781336524,
      "loss": 2.9016,
      "step": 138806
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.16973876953125,
      "learning_rate": 0.00020509884730574227,
      "loss": 3.0725,
      "step": 138807
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.240779399871826,
      "learning_rate": 0.00020509496681576326,
      "loss": 2.7953,
      "step": 138808
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1184701919555664,
      "learning_rate": 0.00020509108634342884,
      "loss": 3.2284,
      "step": 138809
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2572903633117676,
      "learning_rate": 0.00020508720588873985,
      "loss": 3.2493,
      "step": 138810
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0183932781219482,
      "learning_rate": 0.0002050833254516969,
      "loss": 2.7413,
      "step": 138811
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2908709049224854,
      "learning_rate": 0.00020507944503230088,
      "loss": 3.136,
      "step": 138812
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.207014560699463,
      "learning_rate": 0.00020507556463055226,
      "loss": 2.9511,
      "step": 138813
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.383873462677002,
      "learning_rate": 0.00020507168424645187,
      "loss": 2.9519,
      "step": 138814
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0392560958862305,
      "learning_rate": 0.00020506780388000046,
      "loss": 2.7887,
      "step": 138815
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.02061128616333,
      "learning_rate": 0.0002050639235311987,
      "loss": 2.9958,
      "step": 138816
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.8607938289642334,
      "learning_rate": 0.00020506004320004743,
      "loss": 3.0448,
      "step": 138817
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.493075370788574,
      "learning_rate": 0.00020505616288654733,
      "loss": 2.9965,
      "step": 138818
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.968076467514038,
      "learning_rate": 0.00020505228259069902,
      "loss": 2.9996,
      "step": 138819
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5587213039398193,
      "learning_rate": 0.0002050484023125033,
      "loss": 2.9747,
      "step": 138820
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.09631085395813,
      "learning_rate": 0.0002050445220519608,
      "loss": 2.8664,
      "step": 138821
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4257986545562744,
      "learning_rate": 0.00020504064180907233,
      "loss": 3.1264,
      "step": 138822
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.023024797439575,
      "learning_rate": 0.0002050367615838386,
      "loss": 3.0669,
      "step": 138823
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.783134698867798,
      "learning_rate": 0.0002050328813762605,
      "loss": 2.9712,
      "step": 138824
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.438528060913086,
      "learning_rate": 0.0002050290011863384,
      "loss": 2.8025,
      "step": 138825
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3072752952575684,
      "learning_rate": 0.00020502512101407326,
      "loss": 2.9698,
      "step": 138826
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.959444522857666,
      "learning_rate": 0.0002050212408594657,
      "loss": 2.9735,
      "step": 138827
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1416971683502197,
      "learning_rate": 0.0002050173607225165,
      "loss": 3.1914,
      "step": 138828
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.8908350467681885,
      "learning_rate": 0.00020501348060322636,
      "loss": 2.7874,
      "step": 138829
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.320089340209961,
      "learning_rate": 0.0002050096005015961,
      "loss": 2.9952,
      "step": 138830
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.7865149974823,
      "learning_rate": 0.00020500572041762626,
      "loss": 3.1563,
      "step": 138831
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.881569504737854,
      "learning_rate": 0.00020500184035131765,
      "loss": 2.9534,
      "step": 138832
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.297179698944092,
      "learning_rate": 0.000204997960302671,
      "loss": 2.7016,
      "step": 138833
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2948660850524902,
      "learning_rate": 0.00020499408027168698,
      "loss": 2.7302,
      "step": 138834
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5591893196105957,
      "learning_rate": 0.00020499020025836642,
      "loss": 2.8966,
      "step": 138835
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.31791615486145,
      "learning_rate": 0.00020498632026270998,
      "loss": 3.1048,
      "step": 138836
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4557762145996094,
      "learning_rate": 0.0002049824402847184,
      "loss": 3.16,
      "step": 138837
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.608110189437866,
      "learning_rate": 0.0002049785603243923,
      "loss": 3.0016,
      "step": 138838
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.1348700523376465,
      "learning_rate": 0.0002049746803817325,
      "loss": 2.7146,
      "step": 138839
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.564107894897461,
      "learning_rate": 0.0002049708004567397,
      "loss": 3.0646,
      "step": 138840
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2254087924957275,
      "learning_rate": 0.00020496692054941458,
      "loss": 2.9789,
      "step": 138841
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1317074298858643,
      "learning_rate": 0.000204963040659758,
      "loss": 2.8288,
      "step": 138842
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.282114028930664,
      "learning_rate": 0.00020495916078777055,
      "loss": 2.9636,
      "step": 138843
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0037930011749268,
      "learning_rate": 0.00020495528093345298,
      "loss": 3.0916,
      "step": 138844
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.671172857284546,
      "learning_rate": 0.00020495140109680603,
      "loss": 2.9652,
      "step": 138845
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4472239017486572,
      "learning_rate": 0.00020494752127783034,
      "loss": 3.0222,
      "step": 138846
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.7632100582122803,
      "learning_rate": 0.00020494364147652676,
      "loss": 3.0078,
      "step": 138847
    },
    {
      "epoch": 1.81,
      "grad_norm": 4.833434104919434,
      "learning_rate": 0.00020493976169289597,
      "loss": 2.9312,
      "step": 138848
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.001262664794922,
      "learning_rate": 0.00020493588192693863,
      "loss": 2.938,
      "step": 138849
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2310428619384766,
      "learning_rate": 0.00020493200217865549,
      "loss": 2.9937,
      "step": 138850
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4795756340026855,
      "learning_rate": 0.00020492812244804736,
      "loss": 2.8191,
      "step": 138851
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.6870334148406982,
      "learning_rate": 0.00020492424273511482,
      "loss": 3.1032,
      "step": 138852
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8000198602676392,
      "learning_rate": 0.00020492036303985868,
      "loss": 3.1424,
      "step": 138853
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.81131911277771,
      "learning_rate": 0.00020491648336227967,
      "loss": 3.005,
      "step": 138854
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9977144002914429,
      "learning_rate": 0.00020491260370237842,
      "loss": 3.0001,
      "step": 138855
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9918835163116455,
      "learning_rate": 0.00020490872406015576,
      "loss": 3.1199,
      "step": 138856
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4724783897399902,
      "learning_rate": 0.00020490484443561234,
      "loss": 2.7137,
      "step": 138857
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1948461532592773,
      "learning_rate": 0.0002049009648287489,
      "loss": 3.0362,
      "step": 138858
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3436293601989746,
      "learning_rate": 0.0002048970852395662,
      "loss": 3.0165,
      "step": 138859
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.518371105194092,
      "learning_rate": 0.00020489320566806487,
      "loss": 2.7892,
      "step": 138860
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9626840353012085,
      "learning_rate": 0.00020488932611424572,
      "loss": 3.0946,
      "step": 138861
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.090786933898926,
      "learning_rate": 0.00020488544657810945,
      "loss": 2.9719,
      "step": 138862
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.664780855178833,
      "learning_rate": 0.00020488156705965674,
      "loss": 3.0906,
      "step": 138863
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3048665523529053,
      "learning_rate": 0.00020487768755888833,
      "loss": 2.7856,
      "step": 138864
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.485804319381714,
      "learning_rate": 0.000204873808075805,
      "loss": 2.685,
      "step": 138865
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3447296619415283,
      "learning_rate": 0.00020486992861040742,
      "loss": 3.0297,
      "step": 138866
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2902073860168457,
      "learning_rate": 0.00020486604916269635,
      "loss": 2.662,
      "step": 138867
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6358425617218018,
      "learning_rate": 0.00020486216973267243,
      "loss": 3.0634,
      "step": 138868
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8495720624923706,
      "learning_rate": 0.00020485829032033644,
      "loss": 2.7943,
      "step": 138869
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.184926748275757,
      "learning_rate": 0.00020485441092568904,
      "loss": 3.0584,
      "step": 138870
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.062941074371338,
      "learning_rate": 0.00020485053154873102,
      "loss": 3.1701,
      "step": 138871
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.807718515396118,
      "learning_rate": 0.0002048466521894631,
      "loss": 2.6798,
      "step": 138872
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.9775123596191406,
      "learning_rate": 0.0002048427728478861,
      "loss": 3.1985,
      "step": 138873
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.082335948944092,
      "learning_rate": 0.00020483889352400047,
      "loss": 2.9419,
      "step": 138874
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3382277488708496,
      "learning_rate": 0.0002048350142178071,
      "loss": 3.0716,
      "step": 138875
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0441291332244873,
      "learning_rate": 0.00020483113492930673,
      "loss": 3.133,
      "step": 138876
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.829264521598816,
      "learning_rate": 0.00020482725565850002,
      "loss": 2.8363,
      "step": 138877
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9545005559921265,
      "learning_rate": 0.00020482337640538776,
      "loss": 2.7866,
      "step": 138878
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.422539710998535,
      "learning_rate": 0.00020481949716997068,
      "loss": 2.9426,
      "step": 138879
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0929014682769775,
      "learning_rate": 0.0002048156179522494,
      "loss": 2.9043,
      "step": 138880
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.011504650115967,
      "learning_rate": 0.00020481173875222468,
      "loss": 2.9848,
      "step": 138881
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8694252967834473,
      "learning_rate": 0.0002048078595698972,
      "loss": 3.1295,
      "step": 138882
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.263077974319458,
      "learning_rate": 0.0002048039804052678,
      "loss": 3.2132,
      "step": 138883
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.449147939682007,
      "learning_rate": 0.0002048001012583371,
      "loss": 3.072,
      "step": 138884
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5325584411621094,
      "learning_rate": 0.000204796222129106,
      "loss": 3.0121,
      "step": 138885
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1133430004119873,
      "learning_rate": 0.00020479234301757493,
      "loss": 3.1304,
      "step": 138886
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.276524543762207,
      "learning_rate": 0.0002047884639237448,
      "loss": 3.0537,
      "step": 138887
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9787788391113281,
      "learning_rate": 0.00020478458484761623,
      "loss": 2.8745,
      "step": 138888
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4899678230285645,
      "learning_rate": 0.00020478070578919006,
      "loss": 2.9223,
      "step": 138889
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3561511039733887,
      "learning_rate": 0.00020477682674846693,
      "loss": 2.8108,
      "step": 138890
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9242992401123047,
      "learning_rate": 0.00020477294772544775,
      "loss": 2.9606,
      "step": 138891
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6393229961395264,
      "learning_rate": 0.00020476906872013293,
      "loss": 2.912,
      "step": 138892
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.500513792037964,
      "learning_rate": 0.0002047651897325233,
      "loss": 2.9809,
      "step": 138893
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0306830406188965,
      "learning_rate": 0.00020476131076261966,
      "loss": 3.0713,
      "step": 138894
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2336490154266357,
      "learning_rate": 0.00020475743181042267,
      "loss": 2.8791,
      "step": 138895
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9752343893051147,
      "learning_rate": 0.00020475355287593306,
      "loss": 3.086,
      "step": 138896
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.067014455795288,
      "learning_rate": 0.0002047496739591517,
      "loss": 3.0307,
      "step": 138897
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.231708526611328,
      "learning_rate": 0.0002047457950600791,
      "loss": 2.7323,
      "step": 138898
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.302931070327759,
      "learning_rate": 0.000204741916178716,
      "loss": 2.8131,
      "step": 138899
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5462446212768555,
      "learning_rate": 0.00020473803731506317,
      "loss": 3.0841,
      "step": 138900
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.5838072299957275,
      "learning_rate": 0.00020473415846912132,
      "loss": 3.0541,
      "step": 138901
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.652297019958496,
      "learning_rate": 0.00020473027964089125,
      "loss": 2.9551,
      "step": 138902
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4571990966796875,
      "learning_rate": 0.0002047264008303737,
      "loss": 2.9539,
      "step": 138903
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9063273668289185,
      "learning_rate": 0.00020472252203756922,
      "loss": 2.9373,
      "step": 138904
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.197298049926758,
      "learning_rate": 0.00020471864326247858,
      "loss": 3.1045,
      "step": 138905
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5995559692382812,
      "learning_rate": 0.00020471476450510258,
      "loss": 2.7789,
      "step": 138906
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3718929290771484,
      "learning_rate": 0.00020471088576544188,
      "loss": 2.9671,
      "step": 138907
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.180922746658325,
      "learning_rate": 0.00020470700704349726,
      "loss": 2.8679,
      "step": 138908
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3977839946746826,
      "learning_rate": 0.00020470312833926948,
      "loss": 2.9999,
      "step": 138909
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0507731437683105,
      "learning_rate": 0.00020469924965275908,
      "loss": 2.7732,
      "step": 138910
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8977503776550293,
      "learning_rate": 0.0002046953709839669,
      "loss": 3.2061,
      "step": 138911
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0257720947265625,
      "learning_rate": 0.00020469149233289365,
      "loss": 3.0023,
      "step": 138912
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4248387813568115,
      "learning_rate": 0.00020468761369954005,
      "loss": 3.1256,
      "step": 138913
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1796727180480957,
      "learning_rate": 0.0002046837350839068,
      "loss": 3.1434,
      "step": 138914
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1205530166625977,
      "learning_rate": 0.00020467985648599482,
      "loss": 3.0593,
      "step": 138915
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9673718214035034,
      "learning_rate": 0.00020467597790580446,
      "loss": 3.2377,
      "step": 138916
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.880117177963257,
      "learning_rate": 0.00020467209934333665,
      "loss": 2.9893,
      "step": 138917
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.005258798599243,
      "learning_rate": 0.00020466822079859216,
      "loss": 3.2051,
      "step": 138918
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1955342292785645,
      "learning_rate": 0.0002046643422715716,
      "loss": 2.7561,
      "step": 138919
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.532003879547119,
      "learning_rate": 0.00020466046376227574,
      "loss": 2.9474,
      "step": 138920
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.109924554824829,
      "learning_rate": 0.00020465658527070544,
      "loss": 3.186,
      "step": 138921
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.305335521697998,
      "learning_rate": 0.0002046527067968611,
      "loss": 2.8175,
      "step": 138922
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.4285600185394287,
      "learning_rate": 0.00020464882834074366,
      "loss": 3.0258,
      "step": 138923
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3698580265045166,
      "learning_rate": 0.00020464494990235382,
      "loss": 2.9642,
      "step": 138924
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4935481548309326,
      "learning_rate": 0.00020464107148169226,
      "loss": 3.1562,
      "step": 138925
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.8960628509521484,
      "learning_rate": 0.0002046371930787597,
      "loss": 2.8747,
      "step": 138926
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1235620975494385,
      "learning_rate": 0.00020463331469355696,
      "loss": 3.0628,
      "step": 138927
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4375813007354736,
      "learning_rate": 0.00020462943632608476,
      "loss": 3.1209,
      "step": 138928
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.183035373687744,
      "learning_rate": 0.00020462555797634365,
      "loss": 2.9513,
      "step": 138929
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.28495192527771,
      "learning_rate": 0.00020462167964433446,
      "loss": 2.775,
      "step": 138930
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.224876642227173,
      "learning_rate": 0.00020461780133005785,
      "loss": 2.9745,
      "step": 138931
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6209919452667236,
      "learning_rate": 0.0002046139230335146,
      "loss": 2.6268,
      "step": 138932
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.436460494995117,
      "learning_rate": 0.00020461004475470543,
      "loss": 3.1087,
      "step": 138933
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.582876205444336,
      "learning_rate": 0.00020460616649363116,
      "loss": 2.9905,
      "step": 138934
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.3985440731048584,
      "learning_rate": 0.00020460228825029228,
      "loss": 2.8507,
      "step": 138935
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0264151096343994,
      "learning_rate": 0.00020459841002468972,
      "loss": 2.9325,
      "step": 138936
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0409038066864014,
      "learning_rate": 0.00020459453181682406,
      "loss": 3.2579,
      "step": 138937
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.8560757637023926,
      "learning_rate": 0.00020459065362669605,
      "loss": 2.8311,
      "step": 138938
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3309972286224365,
      "learning_rate": 0.00020458677545430652,
      "loss": 3.0099,
      "step": 138939
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.242497682571411,
      "learning_rate": 0.0002045828972996561,
      "loss": 2.8758,
      "step": 138940
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1581804752349854,
      "learning_rate": 0.00020457901916274545,
      "loss": 3.1341,
      "step": 138941
    },
    {
      "epoch": 1.81,
      "grad_norm": 6.4060211181640625,
      "learning_rate": 0.0002045751410435754,
      "loss": 3.1214,
      "step": 138942
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.8715953826904297,
      "learning_rate": 0.00020457126294214667,
      "loss": 2.9951,
      "step": 138943
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.425076723098755,
      "learning_rate": 0.00020456738485845992,
      "loss": 3.0323,
      "step": 138944
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4056549072265625,
      "learning_rate": 0.00020456350679251583,
      "loss": 3.0325,
      "step": 138945
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5582594871520996,
      "learning_rate": 0.0002045596287443153,
      "loss": 3.0143,
      "step": 138946
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.027376890182495,
      "learning_rate": 0.00020455575071385884,
      "loss": 2.7889,
      "step": 138947
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4524269104003906,
      "learning_rate": 0.0002045518727011473,
      "loss": 3.0204,
      "step": 138948
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.7854959964752197,
      "learning_rate": 0.00020454799470618135,
      "loss": 3.1681,
      "step": 138949
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4424500465393066,
      "learning_rate": 0.00020454411672896178,
      "loss": 3.0652,
      "step": 138950
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.9913625717163086,
      "learning_rate": 0.00020454023876948922,
      "loss": 3.0619,
      "step": 138951
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8689996004104614,
      "learning_rate": 0.00020453636082776449,
      "loss": 2.9826,
      "step": 138952
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.275179386138916,
      "learning_rate": 0.00020453248290378822,
      "loss": 2.9173,
      "step": 138953
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.753682851791382,
      "learning_rate": 0.0002045286049975611,
      "loss": 2.8826,
      "step": 138954
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.253673791885376,
      "learning_rate": 0.00020452472710908394,
      "loss": 3.0636,
      "step": 138955
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3264832496643066,
      "learning_rate": 0.00020452084923835745,
      "loss": 3.1184,
      "step": 138956
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3619096279144287,
      "learning_rate": 0.00020451697138538241,
      "loss": 3.2242,
      "step": 138957
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.36188006401062,
      "learning_rate": 0.00020451309355015943,
      "loss": 2.9954,
      "step": 138958
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1147351264953613,
      "learning_rate": 0.00020450921573268922,
      "loss": 2.7934,
      "step": 138959
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.159757375717163,
      "learning_rate": 0.00020450533793297255,
      "loss": 3.0381,
      "step": 138960
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5664591789245605,
      "learning_rate": 0.00020450146015101015,
      "loss": 2.9152,
      "step": 138961
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2869372367858887,
      "learning_rate": 0.00020449758238680273,
      "loss": 3.1344,
      "step": 138962
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.366565227508545,
      "learning_rate": 0.000204493704640351,
      "loss": 3.1389,
      "step": 138963
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8183149099349976,
      "learning_rate": 0.00020448982691165582,
      "loss": 2.9463,
      "step": 138964
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0299770832061768,
      "learning_rate": 0.00020448594920071766,
      "loss": 2.9121,
      "step": 138965
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5544545650482178,
      "learning_rate": 0.00020448207150753736,
      "loss": 2.8166,
      "step": 138966
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2161548137664795,
      "learning_rate": 0.00020447819383211566,
      "loss": 2.9373,
      "step": 138967
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9393970966339111,
      "learning_rate": 0.0002044743161744533,
      "loss": 2.9155,
      "step": 138968
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3079113960266113,
      "learning_rate": 0.00020447043853455093,
      "loss": 2.7221,
      "step": 138969
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9146522283554077,
      "learning_rate": 0.0002044665609124094,
      "loss": 2.9622,
      "step": 138970
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0726916790008545,
      "learning_rate": 0.00020446268330802925,
      "loss": 3.0821,
      "step": 138971
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.165541887283325,
      "learning_rate": 0.0002044588057214113,
      "loss": 2.9434,
      "step": 138972
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3214104175567627,
      "learning_rate": 0.00020445492815255621,
      "loss": 2.8042,
      "step": 138973
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.690619707107544,
      "learning_rate": 0.0002044510506014648,
      "loss": 2.9651,
      "step": 138974
    },
    {
      "epoch": 1.81,
      "grad_norm": 4.900639533996582,
      "learning_rate": 0.00020444717306813775,
      "loss": 2.8697,
      "step": 138975
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.780210018157959,
      "learning_rate": 0.00020444329555257588,
      "loss": 2.9547,
      "step": 138976
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.362638235092163,
      "learning_rate": 0.0002044394180547797,
      "loss": 2.997,
      "step": 138977
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.099714756011963,
      "learning_rate": 0.00020443554057475002,
      "loss": 3.1213,
      "step": 138978
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.944441556930542,
      "learning_rate": 0.00020443166311248756,
      "loss": 2.85,
      "step": 138979
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.760227918624878,
      "learning_rate": 0.0002044277856679931,
      "loss": 3.2097,
      "step": 138980
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3211095333099365,
      "learning_rate": 0.00020442390824126726,
      "loss": 2.9459,
      "step": 138981
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.341808795928955,
      "learning_rate": 0.000204420030832311,
      "loss": 2.9583,
      "step": 138982
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.386378288269043,
      "learning_rate": 0.0002044161534411247,
      "loss": 2.806,
      "step": 138983
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.536105155944824,
      "learning_rate": 0.00020441227606770928,
      "loss": 2.8793,
      "step": 138984
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.681589126586914,
      "learning_rate": 0.0002044083987120654,
      "loss": 3.2052,
      "step": 138985
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.932312250137329,
      "learning_rate": 0.00020440452137419375,
      "loss": 3.1108,
      "step": 138986
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9896090030670166,
      "learning_rate": 0.00020440064405409517,
      "loss": 2.8281,
      "step": 138987
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1962358951568604,
      "learning_rate": 0.00020439676675177045,
      "loss": 3.2324,
      "step": 138988
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9668035507202148,
      "learning_rate": 0.00020439288946722,
      "loss": 2.9174,
      "step": 138989
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.047903299331665,
      "learning_rate": 0.00020438901220044478,
      "loss": 3.1604,
      "step": 138990
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.062055826187134,
      "learning_rate": 0.00020438513495144538,
      "loss": 2.9928,
      "step": 138991
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0719637870788574,
      "learning_rate": 0.0002043812577202226,
      "loss": 2.9236,
      "step": 138992
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.892486810684204,
      "learning_rate": 0.00020437738050677718,
      "loss": 2.7868,
      "step": 138993
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9860762357711792,
      "learning_rate": 0.0002043735033111098,
      "loss": 2.8618,
      "step": 138994
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.953872561454773,
      "learning_rate": 0.00020436962613322132,
      "loss": 2.888,
      "step": 138995
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0776851177215576,
      "learning_rate": 0.0002043657489731122,
      "loss": 2.9675,
      "step": 138996
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6742210388183594,
      "learning_rate": 0.00020436187183078328,
      "loss": 2.9833,
      "step": 138997
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1349711418151855,
      "learning_rate": 0.00020435799470623536,
      "loss": 2.9033,
      "step": 138998
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3603296279907227,
      "learning_rate": 0.00020435411759946897,
      "loss": 3.1889,
      "step": 138999
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9734702110290527,
      "learning_rate": 0.00020435024051048504,
      "loss": 2.8817,
      "step": 139000
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8475316762924194,
      "learning_rate": 0.00020434636343928434,
      "loss": 3.2329,
      "step": 139001
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3412277698516846,
      "learning_rate": 0.0002043424863858673,
      "loss": 3.1079,
      "step": 139002
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0474300384521484,
      "learning_rate": 0.00020433860935023477,
      "loss": 2.8816,
      "step": 139003
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.250715970993042,
      "learning_rate": 0.00020433473233238755,
      "loss": 2.915,
      "step": 139004
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.899023413658142,
      "learning_rate": 0.0002043308553323263,
      "loss": 3.2398,
      "step": 139005
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.7484447956085205,
      "learning_rate": 0.00020432697835005175,
      "loss": 2.9512,
      "step": 139006
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.271278142929077,
      "learning_rate": 0.00020432310138556475,
      "loss": 3.0417,
      "step": 139007
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.7331377267837524,
      "learning_rate": 0.00020431922443886575,
      "loss": 2.8984,
      "step": 139008
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.040985107421875,
      "learning_rate": 0.0002043153475099556,
      "loss": 3.0155,
      "step": 139009
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4546308517456055,
      "learning_rate": 0.00020431147059883504,
      "loss": 2.8406,
      "step": 139010
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5679891109466553,
      "learning_rate": 0.0002043075937055048,
      "loss": 2.9508,
      "step": 139011
    },
    {
      "epoch": 1.81,
      "grad_norm": 4.216846942901611,
      "learning_rate": 0.0002043037168299656,
      "loss": 2.6187,
      "step": 139012
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.961771011352539,
      "learning_rate": 0.00020429983997221824,
      "loss": 2.9146,
      "step": 139013
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.870561957359314,
      "learning_rate": 0.00020429596313226324,
      "loss": 2.7562,
      "step": 139014
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0488059520721436,
      "learning_rate": 0.00020429208631010142,
      "loss": 2.8201,
      "step": 139015
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.105416774749756,
      "learning_rate": 0.0002042882095057335,
      "loss": 3.1358,
      "step": 139016
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.604907274246216,
      "learning_rate": 0.0002042843327191602,
      "loss": 2.9751,
      "step": 139017
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9063258171081543,
      "learning_rate": 0.00020428045595038226,
      "loss": 2.9517,
      "step": 139018
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9312820434570312,
      "learning_rate": 0.00020427657919940047,
      "loss": 3.0772,
      "step": 139019
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.8664486408233643,
      "learning_rate": 0.00020427270246621543,
      "loss": 2.9304,
      "step": 139020
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.3440685272216797,
      "learning_rate": 0.0002042688257508279,
      "loss": 2.9846,
      "step": 139021
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.423771858215332,
      "learning_rate": 0.00020426494905323857,
      "loss": 2.9534,
      "step": 139022
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.212724208831787,
      "learning_rate": 0.00020426107237344818,
      "loss": 3.1785,
      "step": 139023
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.417236089706421,
      "learning_rate": 0.00020425719571145746,
      "loss": 3.039,
      "step": 139024
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.35063099861145,
      "learning_rate": 0.00020425331906726723,
      "loss": 2.8997,
      "step": 139025
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.79825758934021,
      "learning_rate": 0.000204249442440878,
      "loss": 3.2452,
      "step": 139026
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.112205743789673,
      "learning_rate": 0.00020424556583229067,
      "loss": 2.9836,
      "step": 139027
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5235791206359863,
      "learning_rate": 0.0002042416892415059,
      "loss": 2.9851,
      "step": 139028
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6456291675567627,
      "learning_rate": 0.0002042378126685244,
      "loss": 2.9703,
      "step": 139029
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.7556467056274414,
      "learning_rate": 0.00020423393611334687,
      "loss": 3.1899,
      "step": 139030
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1364505290985107,
      "learning_rate": 0.00020423005957597408,
      "loss": 3.0106,
      "step": 139031
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5268306732177734,
      "learning_rate": 0.00020422618305640674,
      "loss": 3.0087,
      "step": 139032
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.3847403526306152,
      "learning_rate": 0.00020422230655464554,
      "loss": 3.3956,
      "step": 139033
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.435270309448242,
      "learning_rate": 0.00020421843007069118,
      "loss": 2.808,
      "step": 139034
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4816906452178955,
      "learning_rate": 0.00020421455360454454,
      "loss": 2.9869,
      "step": 139035
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1498448848724365,
      "learning_rate": 0.00020421067715620616,
      "loss": 2.9838,
      "step": 139036
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.408945322036743,
      "learning_rate": 0.00020420680072567683,
      "loss": 2.9152,
      "step": 139037
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.261220693588257,
      "learning_rate": 0.00020420292431295726,
      "loss": 3.1113,
      "step": 139038
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.5158896446228027,
      "learning_rate": 0.00020419904791804812,
      "loss": 3.0903,
      "step": 139039
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4873032569885254,
      "learning_rate": 0.00020419517154095023,
      "loss": 2.9124,
      "step": 139040
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.9057419300079346,
      "learning_rate": 0.00020419129518166426,
      "loss": 2.7915,
      "step": 139041
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.377295732498169,
      "learning_rate": 0.00020418741884019098,
      "loss": 2.7961,
      "step": 139042
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1176321506500244,
      "learning_rate": 0.00020418354251653105,
      "loss": 2.9456,
      "step": 139043
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9517290592193604,
      "learning_rate": 0.00020417966621068522,
      "loss": 3.1456,
      "step": 139044
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.325075387954712,
      "learning_rate": 0.00020417578992265413,
      "loss": 3.2785,
      "step": 139045
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.9168541431427,
      "learning_rate": 0.0002041719136524386,
      "loss": 3.0433,
      "step": 139046
    },
    {
      "epoch": 1.81,
      "grad_norm": 4.091524124145508,
      "learning_rate": 0.00020416803740003932,
      "loss": 3.1611,
      "step": 139047
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.9644575119018555,
      "learning_rate": 0.000204164161165457,
      "loss": 3.0972,
      "step": 139048
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4507389068603516,
      "learning_rate": 0.0002041602849486925,
      "loss": 3.0827,
      "step": 139049
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9726907014846802,
      "learning_rate": 0.0002041564087497463,
      "loss": 3.0024,
      "step": 139050
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2339742183685303,
      "learning_rate": 0.0002041525325686192,
      "loss": 2.9449,
      "step": 139051
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5496883392333984,
      "learning_rate": 0.00020414865640531197,
      "loss": 2.8596,
      "step": 139052
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2183175086975098,
      "learning_rate": 0.00020414478025982533,
      "loss": 2.7936,
      "step": 139053
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.184136152267456,
      "learning_rate": 0.00020414090413215996,
      "loss": 3.1034,
      "step": 139054
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.480631113052368,
      "learning_rate": 0.00020413702802231674,
      "loss": 2.879,
      "step": 139055
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.199903964996338,
      "learning_rate": 0.00020413315193029612,
      "loss": 2.8987,
      "step": 139056
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.694316864013672,
      "learning_rate": 0.00020412927585609897,
      "loss": 2.9684,
      "step": 139057
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9427738189697266,
      "learning_rate": 0.000204125399799726,
      "loss": 2.7493,
      "step": 139058
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3228366374969482,
      "learning_rate": 0.0002041215237611779,
      "loss": 2.6576,
      "step": 139059
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0129919052124023,
      "learning_rate": 0.00020411764774045548,
      "loss": 2.7285,
      "step": 139060
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0987279415130615,
      "learning_rate": 0.00020411377173755937,
      "loss": 2.8794,
      "step": 139061
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.036703586578369,
      "learning_rate": 0.00020410989575249042,
      "loss": 3.0394,
      "step": 139062
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.912703037261963,
      "learning_rate": 0.00020410601978524916,
      "loss": 2.8222,
      "step": 139063
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.42295503616333,
      "learning_rate": 0.00020410214383583637,
      "loss": 3.0196,
      "step": 139064
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.096494436264038,
      "learning_rate": 0.00020409826790425281,
      "loss": 3.0719,
      "step": 139065
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.274385690689087,
      "learning_rate": 0.0002040943919904992,
      "loss": 3.0658,
      "step": 139066
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2510576248168945,
      "learning_rate": 0.00020409051609457623,
      "loss": 3.0422,
      "step": 139067
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.882798671722412,
      "learning_rate": 0.00020408664021648483,
      "loss": 2.7425,
      "step": 139068
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5592143535614014,
      "learning_rate": 0.00020408276435622536,
      "loss": 2.8635,
      "step": 139069
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.050816297531128,
      "learning_rate": 0.0002040788885137987,
      "loss": 2.9593,
      "step": 139070
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9595115184783936,
      "learning_rate": 0.0002040750126892056,
      "loss": 2.9836,
      "step": 139071
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.8655526638031006,
      "learning_rate": 0.00020407113688244682,
      "loss": 2.8515,
      "step": 139072
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.9102303981781006,
      "learning_rate": 0.00020406726109352294,
      "loss": 3.042,
      "step": 139073
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.481358289718628,
      "learning_rate": 0.00020406338532243494,
      "loss": 2.8819,
      "step": 139074
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.352879524230957,
      "learning_rate": 0.00020405950956918324,
      "loss": 3.2006,
      "step": 139075
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.105949878692627,
      "learning_rate": 0.00020405563383376867,
      "loss": 3.0212,
      "step": 139076
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.2497339248657227,
      "learning_rate": 0.000204051758116192,
      "loss": 3.0661,
      "step": 139077
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9382017850875854,
      "learning_rate": 0.0002040478824164539,
      "loss": 2.9815,
      "step": 139078
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6477622985839844,
      "learning_rate": 0.0002040440067345551,
      "loss": 2.9938,
      "step": 139079
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5891993045806885,
      "learning_rate": 0.00020404013107049647,
      "loss": 3.0535,
      "step": 139080
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0234622955322266,
      "learning_rate": 0.00020403625542427846,
      "loss": 2.7813,
      "step": 139081
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9083974361419678,
      "learning_rate": 0.00020403237979590194,
      "loss": 3.1093,
      "step": 139082
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.641653537750244,
      "learning_rate": 0.00020402850418536761,
      "loss": 3.2558,
      "step": 139083
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.238691806793213,
      "learning_rate": 0.00020402462859267617,
      "loss": 2.9426,
      "step": 139084
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.18843936920166,
      "learning_rate": 0.00020402075301782836,
      "loss": 3.0042,
      "step": 139085
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9418110847473145,
      "learning_rate": 0.00020401687746082503,
      "loss": 3.0747,
      "step": 139086
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.567842721939087,
      "learning_rate": 0.00020401300192166667,
      "loss": 3.0922,
      "step": 139087
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.984675407409668,
      "learning_rate": 0.00020400912640035408,
      "loss": 2.6291,
      "step": 139088
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0600268840789795,
      "learning_rate": 0.00020400525089688803,
      "loss": 2.9834,
      "step": 139089
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8643403053283691,
      "learning_rate": 0.0002040013754112692,
      "loss": 2.9638,
      "step": 139090
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.260498285293579,
      "learning_rate": 0.00020399749994349833,
      "loss": 2.8218,
      "step": 139091
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.7835640907287598,
      "learning_rate": 0.00020399362449357631,
      "loss": 2.9472,
      "step": 139092
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1264352798461914,
      "learning_rate": 0.00020398974906150349,
      "loss": 2.8635,
      "step": 139093
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.84694504737854,
      "learning_rate": 0.00020398587364728083,
      "loss": 3.0668,
      "step": 139094
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2953948974609375,
      "learning_rate": 0.00020398199825090896,
      "loss": 2.7181,
      "step": 139095
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5158164501190186,
      "learning_rate": 0.00020397812287238867,
      "loss": 3.1729,
      "step": 139096
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.624202013015747,
      "learning_rate": 0.00020397424751172072,
      "loss": 3.045,
      "step": 139097
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9651871919631958,
      "learning_rate": 0.00020397037216890581,
      "loss": 3.1051,
      "step": 139098
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.080059051513672,
      "learning_rate": 0.00020396649684394452,
      "loss": 3.1306,
      "step": 139099
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9874169826507568,
      "learning_rate": 0.0002039626215368377,
      "loss": 3.0084,
      "step": 139100
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.7904843091964722,
      "learning_rate": 0.000203958746247586,
      "loss": 3.1178,
      "step": 139101
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.22943115234375,
      "learning_rate": 0.00020395487097619024,
      "loss": 2.8801,
      "step": 139102
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2344257831573486,
      "learning_rate": 0.00020395099572265103,
      "loss": 2.9858,
      "step": 139103
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.561727285385132,
      "learning_rate": 0.00020394712048696926,
      "loss": 3.0,
      "step": 139104
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.261814832687378,
      "learning_rate": 0.00020394324526914548,
      "loss": 2.8898,
      "step": 139105
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.032759189605713,
      "learning_rate": 0.00020393937006918042,
      "loss": 3.1453,
      "step": 139106
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.876934289932251,
      "learning_rate": 0.00020393549488707485,
      "loss": 3.2273,
      "step": 139107
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.385040760040283,
      "learning_rate": 0.0002039316197228295,
      "loss": 2.8136,
      "step": 139108
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.864013910293579,
      "learning_rate": 0.00020392774457644506,
      "loss": 2.8821,
      "step": 139109
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.031672954559326,
      "learning_rate": 0.00020392386944792233,
      "loss": 2.7878,
      "step": 139110
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.8306474685668945,
      "learning_rate": 0.00020391999433726186,
      "loss": 3.1818,
      "step": 139111
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.243065357208252,
      "learning_rate": 0.0002039161192444646,
      "loss": 3.0205,
      "step": 139112
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.152005195617676,
      "learning_rate": 0.00020391224416953105,
      "loss": 2.8949,
      "step": 139113
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.479487657546997,
      "learning_rate": 0.00020390836911246202,
      "loss": 3.0917,
      "step": 139114
    },
    {
      "epoch": 1.81,
      "grad_norm": 4.840065956115723,
      "learning_rate": 0.00020390449407325828,
      "loss": 3.2281,
      "step": 139115
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.6245124340057373,
      "learning_rate": 0.00020390061905192053,
      "loss": 2.714,
      "step": 139116
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2906579971313477,
      "learning_rate": 0.00020389674404844944,
      "loss": 3.1287,
      "step": 139117
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.7883095741271973,
      "learning_rate": 0.00020389286906284575,
      "loss": 2.8956,
      "step": 139118
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.418839931488037,
      "learning_rate": 0.00020388899409511024,
      "loss": 2.9358,
      "step": 139119
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5224204063415527,
      "learning_rate": 0.0002038851191452435,
      "loss": 2.9377,
      "step": 139120
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.0966436862945557,
      "learning_rate": 0.00020388124421324635,
      "loss": 2.8853,
      "step": 139121
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8653032779693604,
      "learning_rate": 0.00020387736929911956,
      "loss": 2.729,
      "step": 139122
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.726860761642456,
      "learning_rate": 0.0002038734944028637,
      "loss": 2.997,
      "step": 139123
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2586896419525146,
      "learning_rate": 0.00020386961952447962,
      "loss": 2.8166,
      "step": 139124
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4636054039001465,
      "learning_rate": 0.00020386574466396795,
      "loss": 2.9981,
      "step": 139125
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9619375467300415,
      "learning_rate": 0.00020386186982132947,
      "loss": 3.169,
      "step": 139126
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.039266586303711,
      "learning_rate": 0.00020385799499656493,
      "loss": 2.9086,
      "step": 139127
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.6767516136169434,
      "learning_rate": 0.00020385412018967495,
      "loss": 2.9807,
      "step": 139128
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.106055736541748,
      "learning_rate": 0.00020385024540066037,
      "loss": 2.7282,
      "step": 139129
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2181572914123535,
      "learning_rate": 0.00020384637062952177,
      "loss": 2.9522,
      "step": 139130
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.272508144378662,
      "learning_rate": 0.00020384249587625998,
      "loss": 3.0159,
      "step": 139131
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3637638092041016,
      "learning_rate": 0.00020383862114087563,
      "loss": 2.9071,
      "step": 139132
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.558295726776123,
      "learning_rate": 0.00020383474642336952,
      "loss": 3.0672,
      "step": 139133
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.451648235321045,
      "learning_rate": 0.00020383087172374243,
      "loss": 3.0157,
      "step": 139134
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8472837209701538,
      "learning_rate": 0.000203826997041995,
      "loss": 2.8133,
      "step": 139135
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1823627948760986,
      "learning_rate": 0.00020382312237812784,
      "loss": 2.8387,
      "step": 139136
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.76239812374115,
      "learning_rate": 0.0002038192477321418,
      "loss": 2.9694,
      "step": 139137
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.444827079772949,
      "learning_rate": 0.00020381537310403756,
      "loss": 2.7448,
      "step": 139138
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.251631021499634,
      "learning_rate": 0.0002038114984938159,
      "loss": 3.1284,
      "step": 139139
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.233689308166504,
      "learning_rate": 0.00020380762390147743,
      "loss": 3.3266,
      "step": 139140
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.9588913917541504,
      "learning_rate": 0.00020380374932702313,
      "loss": 2.8688,
      "step": 139141
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3950273990631104,
      "learning_rate": 0.0002037998747704534,
      "loss": 3.0548,
      "step": 139142
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9938442707061768,
      "learning_rate": 0.00020379600023176904,
      "loss": 2.9984,
      "step": 139143
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1751596927642822,
      "learning_rate": 0.00020379212571097088,
      "loss": 2.9818,
      "step": 139144
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.2188961505889893,
      "learning_rate": 0.00020378825120805953,
      "loss": 2.8756,
      "step": 139145
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0514492988586426,
      "learning_rate": 0.00020378437672303578,
      "loss": 3.1917,
      "step": 139146
    },
    {
      "epoch": 1.81,
      "grad_norm": 5.904061317443848,
      "learning_rate": 0.00020378050225590045,
      "loss": 3.04,
      "step": 139147
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.7632997035980225,
      "learning_rate": 0.00020377662780665402,
      "loss": 2.7945,
      "step": 139148
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6065075397491455,
      "learning_rate": 0.00020377275337529735,
      "loss": 3.0013,
      "step": 139149
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.267059803009033,
      "learning_rate": 0.00020376887896183109,
      "loss": 2.9248,
      "step": 139150
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.645843982696533,
      "learning_rate": 0.00020376500456625605,
      "loss": 2.9273,
      "step": 139151
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.8916115760803223,
      "learning_rate": 0.00020376113018857292,
      "loss": 3.0207,
      "step": 139152
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.873915672302246,
      "learning_rate": 0.00020375725582878252,
      "loss": 3.1013,
      "step": 139153
    },
    {
      "epoch": 1.81,
      "grad_norm": 5.961907386779785,
      "learning_rate": 0.00020375338148688536,
      "loss": 2.9777,
      "step": 139154
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2144787311553955,
      "learning_rate": 0.00020374950716288225,
      "loss": 3.0423,
      "step": 139155
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.7385294437408447,
      "learning_rate": 0.0002037456328567739,
      "loss": 3.1408,
      "step": 139156
    },
    {
      "epoch": 1.81,
      "grad_norm": 4.513820648193359,
      "learning_rate": 0.0002037417585685611,
      "loss": 3.1079,
      "step": 139157
    },
    {
      "epoch": 1.81,
      "grad_norm": 4.779109477996826,
      "learning_rate": 0.00020373788429824447,
      "loss": 2.7974,
      "step": 139158
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.0710153579711914,
      "learning_rate": 0.00020373401004582491,
      "loss": 2.7853,
      "step": 139159
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3139054775238037,
      "learning_rate": 0.00020373013581130293,
      "loss": 3.1021,
      "step": 139160
    },
    {
      "epoch": 1.81,
      "grad_norm": 4.360237121582031,
      "learning_rate": 0.00020372626159467934,
      "loss": 2.859,
      "step": 139161
    },
    {
      "epoch": 1.81,
      "grad_norm": 6.513521671295166,
      "learning_rate": 0.00020372238739595485,
      "loss": 2.936,
      "step": 139162
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.9028306007385254,
      "learning_rate": 0.00020371851321513017,
      "loss": 3.2274,
      "step": 139163
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.4515509605407715,
      "learning_rate": 0.00020371463905220603,
      "loss": 2.9678,
      "step": 139164
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.239194869995117,
      "learning_rate": 0.0002037107649071833,
      "loss": 2.8941,
      "step": 139165
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.3128321170806885,
      "learning_rate": 0.00020370689078006243,
      "loss": 2.9386,
      "step": 139166
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.8702986240386963,
      "learning_rate": 0.00020370301667084427,
      "loss": 3.162,
      "step": 139167
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.0146095752716064,
      "learning_rate": 0.0002036991425795295,
      "loss": 3.2495,
      "step": 139168
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6135692596435547,
      "learning_rate": 0.0002036952685061189,
      "loss": 2.8048,
      "step": 139169
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.865844488143921,
      "learning_rate": 0.0002036913944506132,
      "loss": 2.9969,
      "step": 139170
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.410710334777832,
      "learning_rate": 0.00020368752041301311,
      "loss": 2.9882,
      "step": 139171
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.8498082160949707,
      "learning_rate": 0.0002036836463933193,
      "loss": 2.9342,
      "step": 139172
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8910329341888428,
      "learning_rate": 0.0002036797723915325,
      "loss": 2.8808,
      "step": 139173
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4949464797973633,
      "learning_rate": 0.00020367589840765342,
      "loss": 2.847,
      "step": 139174
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.8152358531951904,
      "learning_rate": 0.00020367202444168285,
      "loss": 3.0118,
      "step": 139175
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.227961778640747,
      "learning_rate": 0.00020366815049362145,
      "loss": 2.8203,
      "step": 139176
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.186279535293579,
      "learning_rate": 0.0002036642765634701,
      "loss": 3.1031,
      "step": 139177
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.307096004486084,
      "learning_rate": 0.00020366040265122924,
      "loss": 2.7736,
      "step": 139178
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.238671064376831,
      "learning_rate": 0.0002036565287568997,
      "loss": 3.0491,
      "step": 139179
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.173741579055786,
      "learning_rate": 0.00020365265488048225,
      "loss": 3.0403,
      "step": 139180
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9148141145706177,
      "learning_rate": 0.00020364878102197763,
      "loss": 2.9147,
      "step": 139181
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.537778377532959,
      "learning_rate": 0.00020364490718138647,
      "loss": 2.9802,
      "step": 139182
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.377561330795288,
      "learning_rate": 0.00020364103335870966,
      "loss": 3.1949,
      "step": 139183
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9447402954101562,
      "learning_rate": 0.0002036371595539477,
      "loss": 3.1007,
      "step": 139184
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.50068998336792,
      "learning_rate": 0.00020363328576710144,
      "loss": 2.9987,
      "step": 139185
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1214001178741455,
      "learning_rate": 0.00020362941199817154,
      "loss": 2.7972,
      "step": 139186
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.398487091064453,
      "learning_rate": 0.00020362553824715876,
      "loss": 3.0421,
      "step": 139187
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9033236503601074,
      "learning_rate": 0.00020362166451406378,
      "loss": 2.6734,
      "step": 139188
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.3449010848999023,
      "learning_rate": 0.00020361779079888746,
      "loss": 3.029,
      "step": 139189
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.326681613922119,
      "learning_rate": 0.0002036139171016304,
      "loss": 3.0375,
      "step": 139190
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.2111263275146484,
      "learning_rate": 0.00020361004342229322,
      "loss": 2.9113,
      "step": 139191
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.3026134967803955,
      "learning_rate": 0.0002036061697608768,
      "loss": 3.1088,
      "step": 139192
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.177072525024414,
      "learning_rate": 0.00020360229611738183,
      "loss": 3.0281,
      "step": 139193
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4164772033691406,
      "learning_rate": 0.000203598422491809,
      "loss": 2.8721,
      "step": 139194
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3423912525177,
      "learning_rate": 0.00020359454888415902,
      "loss": 2.8536,
      "step": 139195
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.114495277404785,
      "learning_rate": 0.00020359067529443273,
      "loss": 2.8817,
      "step": 139196
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.9202566146850586,
      "learning_rate": 0.0002035868017226307,
      "loss": 3.1069,
      "step": 139197
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2694928646087646,
      "learning_rate": 0.0002035829281687537,
      "loss": 3.0187,
      "step": 139198
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1081323623657227,
      "learning_rate": 0.00020357905463280242,
      "loss": 3.0704,
      "step": 139199
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9834067821502686,
      "learning_rate": 0.0002035751811147776,
      "loss": 3.082,
      "step": 139200
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2431700229644775,
      "learning_rate": 0.00020357130761468,
      "loss": 2.8296,
      "step": 139201
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9717084169387817,
      "learning_rate": 0.00020356743413251036,
      "loss": 2.9022,
      "step": 139202
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8988109827041626,
      "learning_rate": 0.0002035635606682693,
      "loss": 2.9868,
      "step": 139203
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1374690532684326,
      "learning_rate": 0.0002035596872219577,
      "loss": 3.0715,
      "step": 139204
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3533811569213867,
      "learning_rate": 0.00020355581379357605,
      "loss": 2.9205,
      "step": 139205
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.290961265563965,
      "learning_rate": 0.0002035519403831252,
      "loss": 2.9245,
      "step": 139206
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3137199878692627,
      "learning_rate": 0.00020354806699060586,
      "loss": 2.979,
      "step": 139207
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1475863456726074,
      "learning_rate": 0.00020354419361601886,
      "loss": 3.0563,
      "step": 139208
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9133819341659546,
      "learning_rate": 0.0002035403202593647,
      "loss": 2.854,
      "step": 139209
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.8736352920532227,
      "learning_rate": 0.00020353644692064428,
      "loss": 3.2137,
      "step": 139210
    },
    {
      "epoch": 1.81,
      "grad_norm": 4.535904407501221,
      "learning_rate": 0.00020353257359985826,
      "loss": 2.858,
      "step": 139211
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.047173500061035,
      "learning_rate": 0.0002035287002970073,
      "loss": 2.9337,
      "step": 139212
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8586328029632568,
      "learning_rate": 0.00020352482701209223,
      "loss": 2.8463,
      "step": 139213
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6258511543273926,
      "learning_rate": 0.00020352095374511373,
      "loss": 2.9507,
      "step": 139214
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9047863483428955,
      "learning_rate": 0.00020351708049607246,
      "loss": 2.8897,
      "step": 139215
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8482376337051392,
      "learning_rate": 0.00020351320726496916,
      "loss": 2.7903,
      "step": 139216
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8858864307403564,
      "learning_rate": 0.00020350933405180463,
      "loss": 3.1452,
      "step": 139217
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4282584190368652,
      "learning_rate": 0.00020350546085657947,
      "loss": 2.9906,
      "step": 139218
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5642406940460205,
      "learning_rate": 0.00020350158767929458,
      "loss": 2.9055,
      "step": 139219
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.9789891242980957,
      "learning_rate": 0.00020349771451995055,
      "loss": 3.0984,
      "step": 139220
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.7452938556671143,
      "learning_rate": 0.00020349384137854806,
      "loss": 3.0412,
      "step": 139221
    },
    {
      "epoch": 1.81,
      "grad_norm": 4.555758476257324,
      "learning_rate": 0.00020348996825508785,
      "loss": 3.0784,
      "step": 139222
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.9674360752105713,
      "learning_rate": 0.00020348609514957071,
      "loss": 2.9158,
      "step": 139223
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.261387825012207,
      "learning_rate": 0.00020348222206199732,
      "loss": 2.9929,
      "step": 139224
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9535683393478394,
      "learning_rate": 0.00020347834899236842,
      "loss": 2.9228,
      "step": 139225
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.7953391075134277,
      "learning_rate": 0.00020347447594068485,
      "loss": 2.8018,
      "step": 139226
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.989468574523926,
      "learning_rate": 0.00020347060290694705,
      "loss": 2.9074,
      "step": 139227
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0175390243530273,
      "learning_rate": 0.0002034667298911559,
      "loss": 3.0012,
      "step": 139228
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.063758134841919,
      "learning_rate": 0.00020346285689331207,
      "loss": 2.8849,
      "step": 139229
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.7733193635940552,
      "learning_rate": 0.00020345898391341635,
      "loss": 2.8424,
      "step": 139230
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.165559768676758,
      "learning_rate": 0.0002034551109514694,
      "loss": 2.9547,
      "step": 139231
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.243945837020874,
      "learning_rate": 0.00020345123800747213,
      "loss": 3.1292,
      "step": 139232
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0311009883880615,
      "learning_rate": 0.00020344736508142496,
      "loss": 2.9617,
      "step": 139233
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.180400848388672,
      "learning_rate": 0.00020344349217332874,
      "loss": 3.1292,
      "step": 139234
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.0234923362731934,
      "learning_rate": 0.00020343961928318422,
      "loss": 3.0855,
      "step": 139235
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.221572160720825,
      "learning_rate": 0.00020343574641099206,
      "loss": 2.9521,
      "step": 139236
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.887014627456665,
      "learning_rate": 0.00020343187355675304,
      "loss": 2.9803,
      "step": 139237
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2760000228881836,
      "learning_rate": 0.00020342800072046797,
      "loss": 3.1575,
      "step": 139238
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6299688816070557,
      "learning_rate": 0.00020342412790213732,
      "loss": 2.8536,
      "step": 139239
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.8278377056121826,
      "learning_rate": 0.000203420255101762,
      "loss": 3.0202,
      "step": 139240
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.167489767074585,
      "learning_rate": 0.00020341638231934265,
      "loss": 2.5829,
      "step": 139241
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.588060140609741,
      "learning_rate": 0.00020341250955488,
      "loss": 2.9843,
      "step": 139242
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.3535268306732178,
      "learning_rate": 0.00020340863680837478,
      "loss": 2.7149,
      "step": 139243
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5270209312438965,
      "learning_rate": 0.00020340476407982786,
      "loss": 2.914,
      "step": 139244
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4413716793060303,
      "learning_rate": 0.00020340089136923972,
      "loss": 3.0775,
      "step": 139245
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.3575024604797363,
      "learning_rate": 0.00020339701867661114,
      "loss": 3.1444,
      "step": 139246
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.521620988845825,
      "learning_rate": 0.0002033931460019429,
      "loss": 2.9993,
      "step": 139247
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1252100467681885,
      "learning_rate": 0.00020338927334523566,
      "loss": 2.8013,
      "step": 139248
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.264512538909912,
      "learning_rate": 0.00020338540070649025,
      "loss": 3.1416,
      "step": 139249
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.783383846282959,
      "learning_rate": 0.0002033815280857074,
      "loss": 3.029,
      "step": 139250
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.682579278945923,
      "learning_rate": 0.00020337765548288761,
      "loss": 2.6379,
      "step": 139251
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.577831983566284,
      "learning_rate": 0.00020337378289803177,
      "loss": 2.9067,
      "step": 139252
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.090118408203125,
      "learning_rate": 0.00020336991033114053,
      "loss": 3.0812,
      "step": 139253
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2913849353790283,
      "learning_rate": 0.00020336603778221466,
      "loss": 2.9587,
      "step": 139254
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.520378589630127,
      "learning_rate": 0.00020336216525125485,
      "loss": 3.095,
      "step": 139255
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.196902275085449,
      "learning_rate": 0.00020335829273826202,
      "loss": 3.1419,
      "step": 139256
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6572062969207764,
      "learning_rate": 0.00020335442024323655,
      "loss": 2.8979,
      "step": 139257
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2777624130249023,
      "learning_rate": 0.00020335054776617932,
      "loss": 3.0469,
      "step": 139258
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6123785972595215,
      "learning_rate": 0.00020334667530709106,
      "loss": 3.1476,
      "step": 139259
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4653666019439697,
      "learning_rate": 0.00020334280286597243,
      "loss": 3.159,
      "step": 139260
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.5738842487335205,
      "learning_rate": 0.00020333893044282425,
      "loss": 3.043,
      "step": 139261
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.5559682846069336,
      "learning_rate": 0.00020333505803764716,
      "loss": 2.9444,
      "step": 139262
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.363260507583618,
      "learning_rate": 0.00020333118565044202,
      "loss": 3.0547,
      "step": 139263
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.79573655128479,
      "learning_rate": 0.00020332731328120932,
      "loss": 2.8938,
      "step": 139264
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.009140968322754,
      "learning_rate": 0.00020332344092994993,
      "loss": 3.0402,
      "step": 139265
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4385814666748047,
      "learning_rate": 0.00020331956859666453,
      "loss": 2.8821,
      "step": 139266
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9895083904266357,
      "learning_rate": 0.00020331569628135384,
      "loss": 2.7659,
      "step": 139267
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.7213807106018066,
      "learning_rate": 0.00020331182398401857,
      "loss": 2.9496,
      "step": 139268
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.249577760696411,
      "learning_rate": 0.00020330795170465956,
      "loss": 3.0333,
      "step": 139269
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1833810806274414,
      "learning_rate": 0.00020330407944327736,
      "loss": 2.7793,
      "step": 139270
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.3347418308258057,
      "learning_rate": 0.00020330020719987275,
      "loss": 2.817,
      "step": 139271
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3355329036712646,
      "learning_rate": 0.00020329633497444643,
      "loss": 2.9395,
      "step": 139272
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.79600191116333,
      "learning_rate": 0.00020329246276699914,
      "loss": 3.0476,
      "step": 139273
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.044717788696289,
      "learning_rate": 0.00020328859057753163,
      "loss": 2.9782,
      "step": 139274
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.9397289752960205,
      "learning_rate": 0.00020328471840604472,
      "loss": 3.0,
      "step": 139275
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.584812641143799,
      "learning_rate": 0.00020328084625253887,
      "loss": 2.7523,
      "step": 139276
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1436898708343506,
      "learning_rate": 0.00020327697411701493,
      "loss": 3.1641,
      "step": 139277
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.264633893966675,
      "learning_rate": 0.00020327310199947366,
      "loss": 2.9881,
      "step": 139278
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6709790229797363,
      "learning_rate": 0.0002032692298999157,
      "loss": 3.1367,
      "step": 139279
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.724635362625122,
      "learning_rate": 0.00020326535781834182,
      "loss": 2.7937,
      "step": 139280
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9627025127410889,
      "learning_rate": 0.00020326148575475287,
      "loss": 2.7917,
      "step": 139281
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9401264190673828,
      "learning_rate": 0.00020325761370914935,
      "loss": 3.2918,
      "step": 139282
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.992510437965393,
      "learning_rate": 0.00020325374168153205,
      "loss": 2.6634,
      "step": 139283
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4768660068511963,
      "learning_rate": 0.0002032498696719017,
      "loss": 3.1877,
      "step": 139284
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.2944445610046387,
      "learning_rate": 0.00020324599768025903,
      "loss": 2.8786,
      "step": 139285
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4431748390197754,
      "learning_rate": 0.00020324212570660473,
      "loss": 3.1809,
      "step": 139286
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3354744911193848,
      "learning_rate": 0.00020323825375093962,
      "loss": 2.9954,
      "step": 139287
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5995092391967773,
      "learning_rate": 0.00020323438181326427,
      "loss": 2.9438,
      "step": 139288
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6342718601226807,
      "learning_rate": 0.00020323050989357955,
      "loss": 3.1598,
      "step": 139289
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.423119068145752,
      "learning_rate": 0.00020322663799188608,
      "loss": 2.8642,
      "step": 139290
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.051567316055298,
      "learning_rate": 0.00020322276610818454,
      "loss": 3.0612,
      "step": 139291
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1327264308929443,
      "learning_rate": 0.00020321889424247577,
      "loss": 2.99,
      "step": 139292
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4736835956573486,
      "learning_rate": 0.00020321502239476047,
      "loss": 3.065,
      "step": 139293
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5810964107513428,
      "learning_rate": 0.00020321115056503924,
      "loss": 2.7646,
      "step": 139294
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4211878776550293,
      "learning_rate": 0.00020320727875331292,
      "loss": 2.9026,
      "step": 139295
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2317497730255127,
      "learning_rate": 0.00020320340695958224,
      "loss": 3.0052,
      "step": 139296
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.8884599208831787,
      "learning_rate": 0.0002031995351838478,
      "loss": 2.7499,
      "step": 139297
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.363583564758301,
      "learning_rate": 0.0002031956634261104,
      "loss": 2.9243,
      "step": 139298
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.599738597869873,
      "learning_rate": 0.00020319179168637087,
      "loss": 2.7824,
      "step": 139299
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.739112138748169,
      "learning_rate": 0.00020318791996462967,
      "loss": 2.8584,
      "step": 139300
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3751347064971924,
      "learning_rate": 0.0002031840482608877,
      "loss": 3.0989,
      "step": 139301
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.095485210418701,
      "learning_rate": 0.00020318017657514563,
      "loss": 2.8813,
      "step": 139302
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4291200637817383,
      "learning_rate": 0.0002031763049074043,
      "loss": 3.0478,
      "step": 139303
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9388442039489746,
      "learning_rate": 0.0002031724332576642,
      "loss": 2.8175,
      "step": 139304
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.077300786972046,
      "learning_rate": 0.00020316856162592628,
      "loss": 3.0951,
      "step": 139305
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9883021116256714,
      "learning_rate": 0.00020316469001219106,
      "loss": 3.0946,
      "step": 139306
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2398879528045654,
      "learning_rate": 0.00020316081841645936,
      "loss": 3.0283,
      "step": 139307
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.9073166847229004,
      "learning_rate": 0.00020315694683873192,
      "loss": 3.0957,
      "step": 139308
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.491666793823242,
      "learning_rate": 0.00020315307527900937,
      "loss": 2.9252,
      "step": 139309
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8634133338928223,
      "learning_rate": 0.00020314920373729254,
      "loss": 3.137,
      "step": 139310
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.7783102989196777,
      "learning_rate": 0.0002031453322135822,
      "loss": 2.9118,
      "step": 139311
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9590414762496948,
      "learning_rate": 0.00020314146070787888,
      "loss": 3.052,
      "step": 139312
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8646647930145264,
      "learning_rate": 0.00020313758922018335,
      "loss": 3.2022,
      "step": 139313
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0467922687530518,
      "learning_rate": 0.0002031337177504964,
      "loss": 3.0749,
      "step": 139314
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.535172700881958,
      "learning_rate": 0.00020312984629881876,
      "loss": 3.1396,
      "step": 139315
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.028207302093506,
      "learning_rate": 0.00020312597486515102,
      "loss": 3.1212,
      "step": 139316
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3064992427825928,
      "learning_rate": 0.00020312210344949417,
      "loss": 2.7843,
      "step": 139317
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.9239962100982666,
      "learning_rate": 0.0002031182320518486,
      "loss": 2.8647,
      "step": 139318
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0661423206329346,
      "learning_rate": 0.0002031143606722152,
      "loss": 2.9282,
      "step": 139319
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.170488119125366,
      "learning_rate": 0.00020311048931059465,
      "loss": 2.7762,
      "step": 139320
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1000871658325195,
      "learning_rate": 0.00020310661796698768,
      "loss": 2.718,
      "step": 139321
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.825279712677002,
      "learning_rate": 0.00020310274664139504,
      "loss": 2.93,
      "step": 139322
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3724324703216553,
      "learning_rate": 0.00020309887533381745,
      "loss": 3.2546,
      "step": 139323
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1399757862091064,
      "learning_rate": 0.00020309500404425569,
      "loss": 2.6689,
      "step": 139324
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.8672791719436646,
      "learning_rate": 0.0002030911327727103,
      "loss": 2.8421,
      "step": 139325
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2346551418304443,
      "learning_rate": 0.00020308726151918209,
      "loss": 3.2375,
      "step": 139326
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.116054058074951,
      "learning_rate": 0.0002030833902836718,
      "loss": 2.8783,
      "step": 139327
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6740951538085938,
      "learning_rate": 0.0002030795190661801,
      "loss": 2.9426,
      "step": 139328
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9981132745742798,
      "learning_rate": 0.00020307564786670778,
      "loss": 3.2179,
      "step": 139329
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1196036338806152,
      "learning_rate": 0.00020307177668525564,
      "loss": 3.0725,
      "step": 139330
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.83304762840271,
      "learning_rate": 0.00020306790552182416,
      "loss": 2.857,
      "step": 139331
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.422518730163574,
      "learning_rate": 0.0002030640343764142,
      "loss": 2.8804,
      "step": 139332
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9982924461364746,
      "learning_rate": 0.00020306016324902646,
      "loss": 2.8618,
      "step": 139333
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.375530481338501,
      "learning_rate": 0.00020305629213966168,
      "loss": 3.2023,
      "step": 139334
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.332167387008667,
      "learning_rate": 0.00020305242104832056,
      "loss": 3.0634,
      "step": 139335
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6562559604644775,
      "learning_rate": 0.00020304854997500393,
      "loss": 3.0216,
      "step": 139336
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0753631591796875,
      "learning_rate": 0.00020304467891971227,
      "loss": 2.9839,
      "step": 139337
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.177297830581665,
      "learning_rate": 0.00020304080788244647,
      "loss": 2.8817,
      "step": 139338
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.194612979888916,
      "learning_rate": 0.00020303693686320718,
      "loss": 3.0106,
      "step": 139339
    },
    {
      "epoch": 1.81,
      "grad_norm": 4.785726070404053,
      "learning_rate": 0.00020303306586199514,
      "loss": 3.0127,
      "step": 139340
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9439172744750977,
      "learning_rate": 0.00020302919487881116,
      "loss": 3.1688,
      "step": 139341
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3293981552124023,
      "learning_rate": 0.00020302532391365592,
      "loss": 2.644,
      "step": 139342
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1373538970947266,
      "learning_rate": 0.00020302145296653004,
      "loss": 2.9395,
      "step": 139343
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.2032976150512695,
      "learning_rate": 0.0002030175820374343,
      "loss": 2.8896,
      "step": 139344
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.9993042945861816,
      "learning_rate": 0.0002030137111263694,
      "loss": 2.9283,
      "step": 139345
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.52764630317688,
      "learning_rate": 0.00020300984023333607,
      "loss": 3.1053,
      "step": 139346
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.8027946949005127,
      "learning_rate": 0.00020300596935833503,
      "loss": 2.8615,
      "step": 139347
    },
    {
      "epoch": 1.81,
      "grad_norm": 4.18364953994751,
      "learning_rate": 0.00020300209850136722,
      "loss": 3.0206,
      "step": 139348
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0054988861083984,
      "learning_rate": 0.00020299822766243296,
      "loss": 3.0827,
      "step": 139349
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9524933099746704,
      "learning_rate": 0.00020299435684153314,
      "loss": 3.0764,
      "step": 139350
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.3249058723449707,
      "learning_rate": 0.00020299048603866852,
      "loss": 2.9114,
      "step": 139351
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.8412587642669678,
      "learning_rate": 0.00020298661525383982,
      "loss": 2.9002,
      "step": 139352
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3354148864746094,
      "learning_rate": 0.00020298274448704774,
      "loss": 3.1355,
      "step": 139353
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2590794563293457,
      "learning_rate": 0.00020297887373829312,
      "loss": 2.9571,
      "step": 139354
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.606890916824341,
      "learning_rate": 0.00020297500300757643,
      "loss": 2.8121,
      "step": 139355
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.6976423263549805,
      "learning_rate": 0.0002029711322948985,
      "loss": 2.9618,
      "step": 139356
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1788718700408936,
      "learning_rate": 0.00020296726160026011,
      "loss": 2.8055,
      "step": 139357
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9366786479949951,
      "learning_rate": 0.00020296339092366186,
      "loss": 2.9419,
      "step": 139358
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.19755482673645,
      "learning_rate": 0.00020295952026510463,
      "loss": 2.9033,
      "step": 139359
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.2780203819274902,
      "learning_rate": 0.00020295564962458917,
      "loss": 2.9376,
      "step": 139360
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.340890884399414,
      "learning_rate": 0.0002029517790021159,
      "loss": 3.0333,
      "step": 139361
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3999712467193604,
      "learning_rate": 0.0002029479083976858,
      "loss": 3.0427,
      "step": 139362
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.196532726287842,
      "learning_rate": 0.00020294403781129946,
      "loss": 2.9578,
      "step": 139363
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4266433715820312,
      "learning_rate": 0.0002029401672429577,
      "loss": 3.0552,
      "step": 139364
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4504315853118896,
      "learning_rate": 0.00020293629669266116,
      "loss": 3.0938,
      "step": 139365
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.746544599533081,
      "learning_rate": 0.0002029324261604107,
      "loss": 2.8012,
      "step": 139366
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.5591979026794434,
      "learning_rate": 0.00020292855564620687,
      "loss": 2.8503,
      "step": 139367
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5377731323242188,
      "learning_rate": 0.00020292468515005043,
      "loss": 2.9067,
      "step": 139368
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.779102087020874,
      "learning_rate": 0.00020292081467194212,
      "loss": 3.0331,
      "step": 139369
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.4287028312683105,
      "learning_rate": 0.00020291694421188268,
      "loss": 2.8982,
      "step": 139370
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.512939929962158,
      "learning_rate": 0.0002029130737698728,
      "loss": 2.9817,
      "step": 139371
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.534528970718384,
      "learning_rate": 0.00020290920334591325,
      "loss": 2.9525,
      "step": 139372
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2619035243988037,
      "learning_rate": 0.00020290533294000474,
      "loss": 2.7026,
      "step": 139373
    },
    {
      "epoch": 1.81,
      "grad_norm": 5.504823684692383,
      "learning_rate": 0.0002029014625521479,
      "loss": 2.8084,
      "step": 139374
    },
    {
      "epoch": 1.81,
      "grad_norm": 5.310528755187988,
      "learning_rate": 0.0002028975921823435,
      "loss": 2.9673,
      "step": 139375
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3508472442626953,
      "learning_rate": 0.00020289372183059226,
      "loss": 2.9932,
      "step": 139376
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.5364739894866943,
      "learning_rate": 0.00020288985149689493,
      "loss": 2.984,
      "step": 139377
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.4240176677703857,
      "learning_rate": 0.0002028859811812523,
      "loss": 2.9744,
      "step": 139378
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1488029956817627,
      "learning_rate": 0.0002028821108836649,
      "loss": 3.0323,
      "step": 139379
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.944535255432129,
      "learning_rate": 0.00020287824060413352,
      "loss": 2.9105,
      "step": 139380
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.2668206691741943,
      "learning_rate": 0.00020287437034265901,
      "loss": 2.8732,
      "step": 139381
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.9207046031951904,
      "learning_rate": 0.0002028705000992419,
      "loss": 3.2822,
      "step": 139382
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.20477294921875,
      "learning_rate": 0.00020286662987388304,
      "loss": 3.0623,
      "step": 139383
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.897184371948242,
      "learning_rate": 0.00020286275966658313,
      "loss": 2.998,
      "step": 139384
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.0381531715393066,
      "learning_rate": 0.00020285888947734282,
      "loss": 3.0089,
      "step": 139385
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.1189041137695312,
      "learning_rate": 0.0002028550193061629,
      "loss": 3.1388,
      "step": 139386
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.383700370788574,
      "learning_rate": 0.00020285114915304401,
      "loss": 3.1189,
      "step": 139387
    },
    {
      "epoch": 1.81,
      "grad_norm": 3.143812656402588,
      "learning_rate": 0.00020284727901798705,
      "loss": 3.0631,
      "step": 139388
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.03234601020813,
      "learning_rate": 0.00020284340890099252,
      "loss": 2.8632,
      "step": 139389
    },
    {
      "epoch": 1.81,
      "grad_norm": 1.95330011844635,
      "learning_rate": 0.00020283953880206122,
      "loss": 2.9345,
      "step": 139390
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.3101789951324463,
      "learning_rate": 0.00020283566872119396,
      "loss": 3.1845,
      "step": 139391
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.136265754699707,
      "learning_rate": 0.00020283179865839132,
      "loss": 2.922,
      "step": 139392
    },
    {
      "epoch": 1.81,
      "grad_norm": 2.284449815750122,
      "learning_rate": 0.0002028279286136541,
      "loss": 2.8657,
      "step": 139393
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.70582914352417,
      "learning_rate": 0.000202824058586983,
      "loss": 3.0244,
      "step": 139394
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0795836448669434,
      "learning_rate": 0.00020282018857837878,
      "loss": 2.8006,
      "step": 139395
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0235307216644287,
      "learning_rate": 0.00020281631858784204,
      "loss": 2.6764,
      "step": 139396
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.596450090408325,
      "learning_rate": 0.00020281244861537369,
      "loss": 3.1685,
      "step": 139397
    },
    {
      "epoch": 1.82,
      "grad_norm": 4.097341537475586,
      "learning_rate": 0.0002028085786609743,
      "loss": 3.2591,
      "step": 139398
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2414839267730713,
      "learning_rate": 0.00020280470872464457,
      "loss": 3.0285,
      "step": 139399
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9754189252853394,
      "learning_rate": 0.00020280083880638527,
      "loss": 2.9277,
      "step": 139400
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.059551954269409,
      "learning_rate": 0.00020279696890619718,
      "loss": 3.0903,
      "step": 139401
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.2061095237731934,
      "learning_rate": 0.00020279309902408098,
      "loss": 3.1086,
      "step": 139402
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.1897661685943604,
      "learning_rate": 0.00020278922916003745,
      "loss": 2.7998,
      "step": 139403
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7911760807037354,
      "learning_rate": 0.0002027853593140671,
      "loss": 3.1135,
      "step": 139404
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.9088973999023438,
      "learning_rate": 0.00020278148948617083,
      "loss": 3.0407,
      "step": 139405
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.8136250972747803,
      "learning_rate": 0.0002027776196763493,
      "loss": 2.936,
      "step": 139406
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0522966384887695,
      "learning_rate": 0.00020277374988460326,
      "loss": 2.8206,
      "step": 139407
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5846915245056152,
      "learning_rate": 0.00020276988011093343,
      "loss": 3.0825,
      "step": 139408
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.365666627883911,
      "learning_rate": 0.00020276601035534058,
      "loss": 3.1475,
      "step": 139409
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.195690631866455,
      "learning_rate": 0.0002027621406178253,
      "loss": 2.982,
      "step": 139410
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.846142292022705,
      "learning_rate": 0.00020275827089838834,
      "loss": 2.6259,
      "step": 139411
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9499177932739258,
      "learning_rate": 0.00020275440119703046,
      "loss": 3.0615,
      "step": 139412
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.063077926635742,
      "learning_rate": 0.00020275053151375239,
      "loss": 3.1003,
      "step": 139413
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.986386775970459,
      "learning_rate": 0.00020274666184855482,
      "loss": 3.0472,
      "step": 139414
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8840707540512085,
      "learning_rate": 0.0002027427922014386,
      "loss": 2.8759,
      "step": 139415
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3421218395233154,
      "learning_rate": 0.00020273892257240418,
      "loss": 2.8725,
      "step": 139416
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9719802141189575,
      "learning_rate": 0.0002027350529614525,
      "loss": 2.9955,
      "step": 139417
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9900256395339966,
      "learning_rate": 0.00020273118336858417,
      "loss": 2.871,
      "step": 139418
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8646199703216553,
      "learning_rate": 0.00020272731379379993,
      "loss": 2.9053,
      "step": 139419
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.009781837463379,
      "learning_rate": 0.00020272344423710053,
      "loss": 2.9378,
      "step": 139420
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.708822011947632,
      "learning_rate": 0.00020271957469848682,
      "loss": 2.9431,
      "step": 139421
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.7065579891204834,
      "learning_rate": 0.00020271570517795922,
      "loss": 2.8626,
      "step": 139422
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6327784061431885,
      "learning_rate": 0.00020271183567551864,
      "loss": 3.0413,
      "step": 139423
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.360872268676758,
      "learning_rate": 0.00020270796619116575,
      "loss": 2.8132,
      "step": 139424
    },
    {
      "epoch": 1.82,
      "grad_norm": 4.599592208862305,
      "learning_rate": 0.00020270409672490128,
      "loss": 3.0302,
      "step": 139425
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.729220390319824,
      "learning_rate": 0.00020270022727672597,
      "loss": 2.9507,
      "step": 139426
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.062772274017334,
      "learning_rate": 0.00020269635784664068,
      "loss": 2.7958,
      "step": 139427
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1435866355895996,
      "learning_rate": 0.0002026924884346458,
      "loss": 2.7359,
      "step": 139428
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1317789554595947,
      "learning_rate": 0.00020268861904074222,
      "loss": 2.8806,
      "step": 139429
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.4078853130340576,
      "learning_rate": 0.00020268474966493068,
      "loss": 2.8851,
      "step": 139430
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.45866322517395,
      "learning_rate": 0.0002026808803072119,
      "loss": 3.1189,
      "step": 139431
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.8715755939483643,
      "learning_rate": 0.00020267701096758652,
      "loss": 3.1106,
      "step": 139432
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1782455444335938,
      "learning_rate": 0.0002026731416460555,
      "loss": 2.8231,
      "step": 139433
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.91328501701355,
      "learning_rate": 0.00020266927234261926,
      "loss": 2.8523,
      "step": 139434
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8483834266662598,
      "learning_rate": 0.00020266540305727862,
      "loss": 2.7256,
      "step": 139435
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.948793649673462,
      "learning_rate": 0.0002026615337900343,
      "loss": 2.9247,
      "step": 139436
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7000505924224854,
      "learning_rate": 0.00020265766454088708,
      "loss": 3.0143,
      "step": 139437
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1171345710754395,
      "learning_rate": 0.00020265379530983764,
      "loss": 3.0392,
      "step": 139438
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0903842449188232,
      "learning_rate": 0.0002026499260968868,
      "loss": 3.1441,
      "step": 139439
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.54885196685791,
      "learning_rate": 0.00020264605690203504,
      "loss": 2.5728,
      "step": 139440
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7414557933807373,
      "learning_rate": 0.00020264218772528324,
      "loss": 2.9329,
      "step": 139441
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.501572608947754,
      "learning_rate": 0.00020263831856663208,
      "loss": 2.9171,
      "step": 139442
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.103908061981201,
      "learning_rate": 0.0002026344494260823,
      "loss": 2.7026,
      "step": 139443
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.594675302505493,
      "learning_rate": 0.0002026305803036346,
      "loss": 2.8455,
      "step": 139444
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2623658180236816,
      "learning_rate": 0.00020262671119928986,
      "loss": 2.9388,
      "step": 139445
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2437245845794678,
      "learning_rate": 0.0002026228421130485,
      "loss": 2.8769,
      "step": 139446
    },
    {
      "epoch": 1.82,
      "grad_norm": 4.002963542938232,
      "learning_rate": 0.0002026189730449114,
      "loss": 2.8342,
      "step": 139447
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2239315509796143,
      "learning_rate": 0.0002026151039948793,
      "loss": 3.0872,
      "step": 139448
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1631619930267334,
      "learning_rate": 0.00020261123496295285,
      "loss": 3.0455,
      "step": 139449
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.092893123626709,
      "learning_rate": 0.00020260736594913283,
      "loss": 2.9386,
      "step": 139450
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0328214168548584,
      "learning_rate": 0.00020260349695342008,
      "loss": 2.8831,
      "step": 139451
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8348734378814697,
      "learning_rate": 0.00020259962797581501,
      "loss": 3.0166,
      "step": 139452
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0257701873779297,
      "learning_rate": 0.00020259575901631854,
      "loss": 2.9076,
      "step": 139453
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.645123243331909,
      "learning_rate": 0.00020259189007493136,
      "loss": 2.7381,
      "step": 139454
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9817618131637573,
      "learning_rate": 0.0002025880211516542,
      "loss": 3.0757,
      "step": 139455
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9476146697998047,
      "learning_rate": 0.00020258415224648773,
      "loss": 3.0628,
      "step": 139456
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0329670906066895,
      "learning_rate": 0.0002025802833594327,
      "loss": 2.661,
      "step": 139457
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.00410532951355,
      "learning_rate": 0.00020257641449049,
      "loss": 3.181,
      "step": 139458
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.369220018386841,
      "learning_rate": 0.00020257254563966,
      "loss": 3.103,
      "step": 139459
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2433440685272217,
      "learning_rate": 0.00020256867680694366,
      "loss": 2.8626,
      "step": 139460
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.7862693071365356,
      "learning_rate": 0.00020256480799234165,
      "loss": 3.1119,
      "step": 139461
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.533712148666382,
      "learning_rate": 0.00020256093919585462,
      "loss": 2.818,
      "step": 139462
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.049791097640991,
      "learning_rate": 0.00020255707041748341,
      "loss": 2.7654,
      "step": 139463
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9005318880081177,
      "learning_rate": 0.00020255320165722869,
      "loss": 2.7656,
      "step": 139464
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5433778762817383,
      "learning_rate": 0.00020254933291509122,
      "loss": 2.9878,
      "step": 139465
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1879830360412598,
      "learning_rate": 0.00020254546419107156,
      "loss": 3.0087,
      "step": 139466
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.301295042037964,
      "learning_rate": 0.00020254159548517057,
      "loss": 3.0419,
      "step": 139467
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4839601516723633,
      "learning_rate": 0.0002025377267973889,
      "loss": 3.3707,
      "step": 139468
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4803946018218994,
      "learning_rate": 0.00020253385812772734,
      "loss": 3.0409,
      "step": 139469
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.269817352294922,
      "learning_rate": 0.00020252998947618663,
      "loss": 3.0986,
      "step": 139470
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3884294033050537,
      "learning_rate": 0.0002025261208427674,
      "loss": 2.7896,
      "step": 139471
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5628859996795654,
      "learning_rate": 0.0002025222522274704,
      "loss": 3.0087,
      "step": 139472
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.7091469764709473,
      "learning_rate": 0.00020251838363029637,
      "loss": 3.1136,
      "step": 139473
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.62725830078125,
      "learning_rate": 0.00020251451505124595,
      "loss": 2.7629,
      "step": 139474
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4567081928253174,
      "learning_rate": 0.00020251064649031995,
      "loss": 3.0671,
      "step": 139475
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.794588327407837,
      "learning_rate": 0.0002025067779475191,
      "loss": 3.0343,
      "step": 139476
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.9202497005462646,
      "learning_rate": 0.00020250290942284404,
      "loss": 3.0251,
      "step": 139477
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6252400875091553,
      "learning_rate": 0.00020249904091629555,
      "loss": 2.9271,
      "step": 139478
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.586606979370117,
      "learning_rate": 0.00020249517242787427,
      "loss": 2.9456,
      "step": 139479
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.0682930946350098,
      "learning_rate": 0.00020249130395758112,
      "loss": 2.9467,
      "step": 139480
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4011402130126953,
      "learning_rate": 0.00020248743550541656,
      "loss": 3.1549,
      "step": 139481
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0761735439300537,
      "learning_rate": 0.00020248356707138146,
      "loss": 3.0423,
      "step": 139482
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1557867527008057,
      "learning_rate": 0.00020247969865547653,
      "loss": 2.8841,
      "step": 139483
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.892271876335144,
      "learning_rate": 0.0002024758302577024,
      "loss": 3.3211,
      "step": 139484
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2862637042999268,
      "learning_rate": 0.00020247196187805986,
      "loss": 2.7845,
      "step": 139485
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0886716842651367,
      "learning_rate": 0.00020246809351654962,
      "loss": 2.93,
      "step": 139486
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.151343584060669,
      "learning_rate": 0.0002024642251731725,
      "loss": 2.8348,
      "step": 139487
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.6049962043762207,
      "learning_rate": 0.00020246035684792912,
      "loss": 2.915,
      "step": 139488
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3327105045318604,
      "learning_rate": 0.0002024564885408201,
      "loss": 2.9851,
      "step": 139489
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.292780876159668,
      "learning_rate": 0.0002024526202518463,
      "loss": 3.0532,
      "step": 139490
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.548281192779541,
      "learning_rate": 0.00020244875198100837,
      "loss": 2.9761,
      "step": 139491
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5592942237854004,
      "learning_rate": 0.00020244488372830706,
      "loss": 3.0636,
      "step": 139492
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.396524667739868,
      "learning_rate": 0.0002024410154937431,
      "loss": 2.8038,
      "step": 139493
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.877176523208618,
      "learning_rate": 0.00020243714727731732,
      "loss": 2.8776,
      "step": 139494
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.323796510696411,
      "learning_rate": 0.00020243327907903017,
      "loss": 2.6723,
      "step": 139495
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.093989849090576,
      "learning_rate": 0.00020242941089888253,
      "loss": 3.0806,
      "step": 139496
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5233616828918457,
      "learning_rate": 0.0002024255427368751,
      "loss": 2.8958,
      "step": 139497
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9771442413330078,
      "learning_rate": 0.0002024216745930086,
      "loss": 3.1254,
      "step": 139498
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5398943424224854,
      "learning_rate": 0.0002024178064672838,
      "loss": 3.1618,
      "step": 139499
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.198460340499878,
      "learning_rate": 0.00020241393835970144,
      "loss": 2.9777,
      "step": 139500
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2196638584136963,
      "learning_rate": 0.00020241007027026208,
      "loss": 3.022,
      "step": 139501
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0817575454711914,
      "learning_rate": 0.00020240620219896647,
      "loss": 2.7176,
      "step": 139502
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9477916955947876,
      "learning_rate": 0.00020240233414581547,
      "loss": 3.1588,
      "step": 139503
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7922260761260986,
      "learning_rate": 0.00020239846611080968,
      "loss": 3.0668,
      "step": 139504
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.3799808025360107,
      "learning_rate": 0.00020239459809394986,
      "loss": 3.0672,
      "step": 139505
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.475735664367676,
      "learning_rate": 0.00020239073009523686,
      "loss": 3.0817,
      "step": 139506
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9619193077087402,
      "learning_rate": 0.00020238686211467114,
      "loss": 2.7688,
      "step": 139507
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9233942031860352,
      "learning_rate": 0.00020238299415225355,
      "loss": 2.8322,
      "step": 139508
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.054532289505005,
      "learning_rate": 0.00020237912620798477,
      "loss": 3.1881,
      "step": 139509
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9346832036972046,
      "learning_rate": 0.00020237525828186557,
      "loss": 2.8824,
      "step": 139510
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9775012731552124,
      "learning_rate": 0.00020237139037389666,
      "loss": 2.8179,
      "step": 139511
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.749518394470215,
      "learning_rate": 0.00020236752248407885,
      "loss": 3.1154,
      "step": 139512
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4096882343292236,
      "learning_rate": 0.0002023636546124127,
      "loss": 3.1029,
      "step": 139513
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.240222215652466,
      "learning_rate": 0.00020235978675889893,
      "loss": 2.9327,
      "step": 139514
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5280818939208984,
      "learning_rate": 0.00020235591892353835,
      "loss": 3.0988,
      "step": 139515
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9801161289215088,
      "learning_rate": 0.0002023520511063316,
      "loss": 3.2089,
      "step": 139516
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.091597318649292,
      "learning_rate": 0.0002023481833072795,
      "loss": 3.0573,
      "step": 139517
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0660510063171387,
      "learning_rate": 0.00020234431552638283,
      "loss": 2.9493,
      "step": 139518
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.856739044189453,
      "learning_rate": 0.00020234044776364205,
      "loss": 3.0938,
      "step": 139519
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.285822629928589,
      "learning_rate": 0.00020233658001905805,
      "loss": 2.9344,
      "step": 139520
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1551060676574707,
      "learning_rate": 0.0002023327122926315,
      "loss": 3.1632,
      "step": 139521
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6318109035491943,
      "learning_rate": 0.00020232884458436319,
      "loss": 2.8636,
      "step": 139522
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.431325674057007,
      "learning_rate": 0.00020232497689425373,
      "loss": 3.0451,
      "step": 139523
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.646366834640503,
      "learning_rate": 0.00020232110922230393,
      "loss": 3.0809,
      "step": 139524
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.188136577606201,
      "learning_rate": 0.00020231724156851458,
      "loss": 2.9379,
      "step": 139525
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.064603090286255,
      "learning_rate": 0.00020231337393288623,
      "loss": 2.9264,
      "step": 139526
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.044875383377075,
      "learning_rate": 0.0002023095063154196,
      "loss": 2.9404,
      "step": 139527
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.378746747970581,
      "learning_rate": 0.00020230563871611552,
      "loss": 2.9795,
      "step": 139528
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1054043769836426,
      "learning_rate": 0.00020230177113497464,
      "loss": 3.0404,
      "step": 139529
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.759817361831665,
      "learning_rate": 0.00020229790357199776,
      "loss": 2.9265,
      "step": 139530
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.742008686065674,
      "learning_rate": 0.0002022940360271856,
      "loss": 3.1542,
      "step": 139531
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0566952228546143,
      "learning_rate": 0.00020229016850053873,
      "loss": 3.0296,
      "step": 139532
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.395932674407959,
      "learning_rate": 0.00020228630099205795,
      "loss": 2.7233,
      "step": 139533
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6727657318115234,
      "learning_rate": 0.000202282433501744,
      "loss": 2.9583,
      "step": 139534
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6158499717712402,
      "learning_rate": 0.00020227856602959759,
      "loss": 2.7262,
      "step": 139535
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.8597707748413086,
      "learning_rate": 0.00020227469857561945,
      "loss": 2.8246,
      "step": 139536
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.180927038192749,
      "learning_rate": 0.0002022708311398104,
      "loss": 3.2666,
      "step": 139537
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.2765512466430664,
      "learning_rate": 0.00020226696372217093,
      "loss": 3.1134,
      "step": 139538
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.313821315765381,
      "learning_rate": 0.00020226309632270185,
      "loss": 3.1156,
      "step": 139539
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.423696994781494,
      "learning_rate": 0.0002022592289414039,
      "loss": 2.9909,
      "step": 139540
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.050126791000366,
      "learning_rate": 0.00020225536157827788,
      "loss": 2.9619,
      "step": 139541
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1917431354522705,
      "learning_rate": 0.00020225149423332435,
      "loss": 3.103,
      "step": 139542
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.355891466140747,
      "learning_rate": 0.00020224762690654427,
      "loss": 2.972,
      "step": 139543
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.961488962173462,
      "learning_rate": 0.00020224375959793813,
      "loss": 2.7984,
      "step": 139544
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.266388177871704,
      "learning_rate": 0.00020223989230750664,
      "loss": 2.8963,
      "step": 139545
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.982117176055908,
      "learning_rate": 0.00020223602503525063,
      "loss": 2.9648,
      "step": 139546
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.011476755142212,
      "learning_rate": 0.00020223215778117078,
      "loss": 3.0787,
      "step": 139547
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1507089138031006,
      "learning_rate": 0.00020222829054526782,
      "loss": 3.2545,
      "step": 139548
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.082573890686035,
      "learning_rate": 0.00020222442332754254,
      "loss": 3.0258,
      "step": 139549
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.5980935096740723,
      "learning_rate": 0.00020222055612799557,
      "loss": 2.8575,
      "step": 139550
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7406203746795654,
      "learning_rate": 0.0002022166889466276,
      "loss": 3.0116,
      "step": 139551
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.338979482650757,
      "learning_rate": 0.00020221282178343937,
      "loss": 3.164,
      "step": 139552
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.329014539718628,
      "learning_rate": 0.00020220895463843165,
      "loss": 3.0417,
      "step": 139553
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.8357152938842773,
      "learning_rate": 0.0002022050875116051,
      "loss": 2.9974,
      "step": 139554
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1554343700408936,
      "learning_rate": 0.00020220122040296056,
      "loss": 3.1494,
      "step": 139555
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9763578176498413,
      "learning_rate": 0.0002021973533124986,
      "loss": 2.6171,
      "step": 139556
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9142061471939087,
      "learning_rate": 0.00020219348624022002,
      "loss": 2.8495,
      "step": 139557
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6936635971069336,
      "learning_rate": 0.0002021896191861255,
      "loss": 2.9679,
      "step": 139558
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2716662883758545,
      "learning_rate": 0.00020218575215021574,
      "loss": 3.119,
      "step": 139559
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0351357460021973,
      "learning_rate": 0.00020218188513249153,
      "loss": 3.0183,
      "step": 139560
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.176596164703369,
      "learning_rate": 0.0002021780181329536,
      "loss": 2.9719,
      "step": 139561
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.509694814682007,
      "learning_rate": 0.00020217415115160255,
      "loss": 2.8145,
      "step": 139562
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.702399969100952,
      "learning_rate": 0.0002021702841884392,
      "loss": 2.9559,
      "step": 139563
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.402961015701294,
      "learning_rate": 0.00020216641724346424,
      "loss": 2.9399,
      "step": 139564
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.445132255554199,
      "learning_rate": 0.0002021625503166784,
      "loss": 3.0308,
      "step": 139565
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1795706748962402,
      "learning_rate": 0.00020215868340808235,
      "loss": 2.8576,
      "step": 139566
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8075722455978394,
      "learning_rate": 0.00020215481651767692,
      "loss": 2.8365,
      "step": 139567
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.965450644493103,
      "learning_rate": 0.0002021509496454627,
      "loss": 2.973,
      "step": 139568
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.335350513458252,
      "learning_rate": 0.00020214708279144044,
      "loss": 2.9868,
      "step": 139569
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8966329097747803,
      "learning_rate": 0.0002021432159556109,
      "loss": 2.8579,
      "step": 139570
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.08378005027771,
      "learning_rate": 0.00020213934913797482,
      "loss": 2.8421,
      "step": 139571
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.2438313961029053,
      "learning_rate": 0.00020213548233853293,
      "loss": 3.1183,
      "step": 139572
    },
    {
      "epoch": 1.82,
      "grad_norm": 4.703845024108887,
      "learning_rate": 0.00020213161555728584,
      "loss": 2.9053,
      "step": 139573
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.1288037300109863,
      "learning_rate": 0.00020212774879423432,
      "loss": 3.2524,
      "step": 139574
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1877596378326416,
      "learning_rate": 0.00020212388204937908,
      "loss": 3.0036,
      "step": 139575
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.883521795272827,
      "learning_rate": 0.0002021200153227209,
      "loss": 2.9548,
      "step": 139576
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.258871555328369,
      "learning_rate": 0.0002021161486142604,
      "loss": 2.9231,
      "step": 139577
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7130370140075684,
      "learning_rate": 0.0002021122819239984,
      "loss": 3.2448,
      "step": 139578
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.8460404872894287,
      "learning_rate": 0.00020210841525193567,
      "loss": 3.0361,
      "step": 139579
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2054951190948486,
      "learning_rate": 0.0002021045485980727,
      "loss": 3.1205,
      "step": 139580
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2354419231414795,
      "learning_rate": 0.00020210068196241038,
      "loss": 3.0922,
      "step": 139581
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1345839500427246,
      "learning_rate": 0.00020209681534494938,
      "loss": 3.186,
      "step": 139582
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.173539876937866,
      "learning_rate": 0.0002020929487456904,
      "loss": 2.9921,
      "step": 139583
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.20904278755188,
      "learning_rate": 0.00020208908216463422,
      "loss": 3.004,
      "step": 139584
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7118582725524902,
      "learning_rate": 0.00020208521560178164,
      "loss": 2.8062,
      "step": 139585
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8866690397262573,
      "learning_rate": 0.0002020813490571332,
      "loss": 2.7467,
      "step": 139586
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.750859260559082,
      "learning_rate": 0.00020207748253068958,
      "loss": 2.7545,
      "step": 139587
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.8509764671325684,
      "learning_rate": 0.00020207361602245165,
      "loss": 2.9306,
      "step": 139588
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0894906520843506,
      "learning_rate": 0.0002020697495324201,
      "loss": 2.8609,
      "step": 139589
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0765907764434814,
      "learning_rate": 0.00020206588306059562,
      "loss": 2.8418,
      "step": 139590
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0090835094451904,
      "learning_rate": 0.00020206201660697895,
      "loss": 2.8804,
      "step": 139591
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.186906099319458,
      "learning_rate": 0.00020205815017157092,
      "loss": 3.1566,
      "step": 139592
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.6678311824798584,
      "learning_rate": 0.000202054283754372,
      "loss": 2.9851,
      "step": 139593
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.116921901702881,
      "learning_rate": 0.00020205041735538303,
      "loss": 2.8935,
      "step": 139594
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7145214080810547,
      "learning_rate": 0.00020204655097460475,
      "loss": 2.9324,
      "step": 139595
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0468761920928955,
      "learning_rate": 0.00020204268461203786,
      "loss": 3.0956,
      "step": 139596
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2189340591430664,
      "learning_rate": 0.00020203881826768308,
      "loss": 3.0974,
      "step": 139597
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1781082153320312,
      "learning_rate": 0.00020203495194154127,
      "loss": 3.1215,
      "step": 139598
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.990840196609497,
      "learning_rate": 0.00020203108563361286,
      "loss": 2.9181,
      "step": 139599
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.025783061981201,
      "learning_rate": 0.00020202721934389873,
      "loss": 3.2167,
      "step": 139600
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0696651935577393,
      "learning_rate": 0.00020202335307239962,
      "loss": 2.7694,
      "step": 139601
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.542271375656128,
      "learning_rate": 0.0002020194868191162,
      "loss": 3.1453,
      "step": 139602
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0233001708984375,
      "learning_rate": 0.0002020156205840492,
      "loss": 3.0237,
      "step": 139603
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.738717794418335,
      "learning_rate": 0.0002020117543671995,
      "loss": 2.8892,
      "step": 139604
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.243399143218994,
      "learning_rate": 0.00020200788816856748,
      "loss": 2.8445,
      "step": 139605
    },
    {
      "epoch": 1.82,
      "grad_norm": 4.109769344329834,
      "learning_rate": 0.0002020040219881541,
      "loss": 3.0921,
      "step": 139606
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3523709774017334,
      "learning_rate": 0.00020200015582596,
      "loss": 2.8125,
      "step": 139607
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.364069700241089,
      "learning_rate": 0.0002019962896819859,
      "loss": 2.7563,
      "step": 139608
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5165019035339355,
      "learning_rate": 0.00020199242355623255,
      "loss": 3.03,
      "step": 139609
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0648465156555176,
      "learning_rate": 0.0002019885574487008,
      "loss": 2.8113,
      "step": 139610
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0076277256011963,
      "learning_rate": 0.0002019846913593911,
      "loss": 2.9778,
      "step": 139611
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.978872537612915,
      "learning_rate": 0.0002019808252883043,
      "loss": 2.8179,
      "step": 139612
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7458128929138184,
      "learning_rate": 0.0002019769592354411,
      "loss": 3.126,
      "step": 139613
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4172089099884033,
      "learning_rate": 0.0002019730932008022,
      "loss": 2.7986,
      "step": 139614
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.242518663406372,
      "learning_rate": 0.0002019692271843884,
      "loss": 2.8398,
      "step": 139615
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4129409790039062,
      "learning_rate": 0.0002019653611862005,
      "loss": 2.9795,
      "step": 139616
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.0319368839263916,
      "learning_rate": 0.00020196149520623898,
      "loss": 2.8791,
      "step": 139617
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0445501804351807,
      "learning_rate": 0.00020195762924450462,
      "loss": 2.8341,
      "step": 139618
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.8765735626220703,
      "learning_rate": 0.00020195376330099822,
      "loss": 2.8175,
      "step": 139619
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.9896411895751953,
      "learning_rate": 0.00020194989737572046,
      "loss": 2.974,
      "step": 139620
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.580496311187744,
      "learning_rate": 0.00020194603146867206,
      "loss": 2.8233,
      "step": 139621
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8703680038452148,
      "learning_rate": 0.00020194216557985388,
      "loss": 3.028,
      "step": 139622
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2519383430480957,
      "learning_rate": 0.00020193829970926636,
      "loss": 3.2166,
      "step": 139623
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.496500015258789,
      "learning_rate": 0.00020193443385691035,
      "loss": 3.2573,
      "step": 139624
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2554948329925537,
      "learning_rate": 0.0002019305680227866,
      "loss": 3.2524,
      "step": 139625
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2189347743988037,
      "learning_rate": 0.0002019267022068958,
      "loss": 3.0606,
      "step": 139626
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.26163387298584,
      "learning_rate": 0.00020192283640923869,
      "loss": 3.0892,
      "step": 139627
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.123572587966919,
      "learning_rate": 0.0002019189706298161,
      "loss": 3.0935,
      "step": 139628
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.22605037689209,
      "learning_rate": 0.00020191510486862848,
      "loss": 2.8855,
      "step": 139629
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9367036819458008,
      "learning_rate": 0.00020191123912567674,
      "loss": 2.9769,
      "step": 139630
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.478118896484375,
      "learning_rate": 0.00020190737340096152,
      "loss": 2.7895,
      "step": 139631
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.49780011177063,
      "learning_rate": 0.00020190350769448353,
      "loss": 2.8816,
      "step": 139632
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5308220386505127,
      "learning_rate": 0.0002018996420062436,
      "loss": 2.9071,
      "step": 139633
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.772559404373169,
      "learning_rate": 0.0002018957763362424,
      "loss": 2.8494,
      "step": 139634
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.8861911296844482,
      "learning_rate": 0.00020189191068448062,
      "loss": 2.9527,
      "step": 139635
    },
    {
      "epoch": 1.82,
      "grad_norm": 4.1234331130981445,
      "learning_rate": 0.00020188804505095895,
      "loss": 3.0382,
      "step": 139636
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6285455226898193,
      "learning_rate": 0.00020188417943567814,
      "loss": 3.0425,
      "step": 139637
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.35121488571167,
      "learning_rate": 0.00020188031383863895,
      "loss": 2.9263,
      "step": 139638
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0281031131744385,
      "learning_rate": 0.000201876448259842,
      "loss": 2.9762,
      "step": 139639
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1062917709350586,
      "learning_rate": 0.00020187258269928817,
      "loss": 2.9604,
      "step": 139640
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.88369083404541,
      "learning_rate": 0.000201868717156978,
      "loss": 2.8895,
      "step": 139641
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.400541067123413,
      "learning_rate": 0.00020186485163291236,
      "loss": 2.7695,
      "step": 139642
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.150576591491699,
      "learning_rate": 0.0002018609861270918,
      "loss": 3.0477,
      "step": 139643
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.079070568084717,
      "learning_rate": 0.00020185712063951715,
      "loss": 3.3236,
      "step": 139644
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.3943302631378174,
      "learning_rate": 0.00020185325517018914,
      "loss": 2.8477,
      "step": 139645
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1867165565490723,
      "learning_rate": 0.0002018493897191085,
      "loss": 2.7847,
      "step": 139646
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5109703540802,
      "learning_rate": 0.00020184552428627585,
      "loss": 2.9382,
      "step": 139647
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2522220611572266,
      "learning_rate": 0.00020184165887169198,
      "loss": 2.7041,
      "step": 139648
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4790499210357666,
      "learning_rate": 0.00020183779347535767,
      "loss": 2.8233,
      "step": 139649
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0100607872009277,
      "learning_rate": 0.0002018339280972735,
      "loss": 2.9235,
      "step": 139650
    },
    {
      "epoch": 1.82,
      "grad_norm": 4.830508708953857,
      "learning_rate": 0.00020183006273744025,
      "loss": 2.9001,
      "step": 139651
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.9001471996307373,
      "learning_rate": 0.0002018261973958587,
      "loss": 2.9437,
      "step": 139652
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.690592050552368,
      "learning_rate": 0.00020182233207252943,
      "loss": 2.8739,
      "step": 139653
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.4091060161590576,
      "learning_rate": 0.00020181846676745329,
      "loss": 2.8447,
      "step": 139654
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3239879608154297,
      "learning_rate": 0.00020181460148063093,
      "loss": 3.16,
      "step": 139655
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.116262674331665,
      "learning_rate": 0.00020181073621206315,
      "loss": 2.9029,
      "step": 139656
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5641567707061768,
      "learning_rate": 0.00020180687096175054,
      "loss": 2.7001,
      "step": 139657
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.135551691055298,
      "learning_rate": 0.00020180300572969387,
      "loss": 2.7553,
      "step": 139658
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.871882915496826,
      "learning_rate": 0.000201799140515894,
      "loss": 2.9365,
      "step": 139659
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.1280722618103027,
      "learning_rate": 0.00020179527532035143,
      "loss": 2.9137,
      "step": 139660
    },
    {
      "epoch": 1.82,
      "grad_norm": 4.594491004943848,
      "learning_rate": 0.00020179141014306694,
      "loss": 3.0766,
      "step": 139661
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.2314231395721436,
      "learning_rate": 0.00020178754498404128,
      "loss": 2.912,
      "step": 139662
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9671080112457275,
      "learning_rate": 0.0002017836798432752,
      "loss": 3.0476,
      "step": 139663
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0088133811950684,
      "learning_rate": 0.00020177981472076944,
      "loss": 2.9312,
      "step": 139664
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.9738693237304688,
      "learning_rate": 0.00020177594961652467,
      "loss": 2.9484,
      "step": 139665
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.118764877319336,
      "learning_rate": 0.00020177208453054155,
      "loss": 3.0161,
      "step": 139666
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.389249086380005,
      "learning_rate": 0.00020176821946282084,
      "loss": 2.9832,
      "step": 139667
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.111375093460083,
      "learning_rate": 0.0002017643544133633,
      "loss": 2.8083,
      "step": 139668
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8891870975494385,
      "learning_rate": 0.0002017604893821696,
      "loss": 2.9441,
      "step": 139669
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9769259691238403,
      "learning_rate": 0.00020175662436924046,
      "loss": 3.0836,
      "step": 139670
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1125736236572266,
      "learning_rate": 0.00020175275937457679,
      "loss": 3.0894,
      "step": 139671
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.020252227783203,
      "learning_rate": 0.00020174889439817894,
      "loss": 3.0552,
      "step": 139672
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.7338825464248657,
      "learning_rate": 0.00020174502944004787,
      "loss": 2.9409,
      "step": 139673
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.048070192337036,
      "learning_rate": 0.00020174116450018427,
      "loss": 3.073,
      "step": 139674
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9809045791625977,
      "learning_rate": 0.0002017372995785888,
      "loss": 3.403,
      "step": 139675
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.22983980178833,
      "learning_rate": 0.00020173343467526226,
      "loss": 2.7646,
      "step": 139676
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.744460105895996,
      "learning_rate": 0.00020172956979020543,
      "loss": 2.8299,
      "step": 139677
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0248725414276123,
      "learning_rate": 0.00020172570492341883,
      "loss": 2.9343,
      "step": 139678
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8897355794906616,
      "learning_rate": 0.00020172184007490325,
      "loss": 2.9964,
      "step": 139679
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3518717288970947,
      "learning_rate": 0.00020171797524465944,
      "loss": 3.1422,
      "step": 139680
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.9041895866394043,
      "learning_rate": 0.00020171411043268811,
      "loss": 3.0118,
      "step": 139681
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2338244915008545,
      "learning_rate": 0.00020171024563899,
      "loss": 2.887,
      "step": 139682
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3656134605407715,
      "learning_rate": 0.00020170638086356593,
      "loss": 2.9739,
      "step": 139683
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.07236909866333,
      "learning_rate": 0.00020170251610641637,
      "loss": 3.1298,
      "step": 139684
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.594996213912964,
      "learning_rate": 0.00020169865136754217,
      "loss": 3.1387,
      "step": 139685
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.047255516052246,
      "learning_rate": 0.00020169478664694402,
      "loss": 2.6584,
      "step": 139686
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.047973871231079,
      "learning_rate": 0.0002016909219446227,
      "loss": 3.0722,
      "step": 139687
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.336371660232544,
      "learning_rate": 0.00020168705726057888,
      "loss": 3.031,
      "step": 139688
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.922520160675049,
      "learning_rate": 0.00020168319259481342,
      "loss": 3.0366,
      "step": 139689
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.292099714279175,
      "learning_rate": 0.00020167932794732678,
      "loss": 2.7689,
      "step": 139690
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4672601222991943,
      "learning_rate": 0.0002016754633181198,
      "loss": 3.1875,
      "step": 139691
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.245054244995117,
      "learning_rate": 0.00020167159870719325,
      "loss": 3.0208,
      "step": 139692
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1503584384918213,
      "learning_rate": 0.00020166773411454776,
      "loss": 3.0785,
      "step": 139693
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.705230474472046,
      "learning_rate": 0.0002016638695401841,
      "loss": 3.0096,
      "step": 139694
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.8533270359039307,
      "learning_rate": 0.00020166000498410313,
      "loss": 3.2285,
      "step": 139695
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.258293867111206,
      "learning_rate": 0.00020165614044630528,
      "loss": 2.9041,
      "step": 139696
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.904874324798584,
      "learning_rate": 0.00020165227592679143,
      "loss": 3.242,
      "step": 139697
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.6110241413116455,
      "learning_rate": 0.00020164841142556223,
      "loss": 3.0915,
      "step": 139698
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3810791969299316,
      "learning_rate": 0.0002016445469426185,
      "loss": 2.9312,
      "step": 139699
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1757872104644775,
      "learning_rate": 0.00020164068247796086,
      "loss": 3.0307,
      "step": 139700
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8911426067352295,
      "learning_rate": 0.00020163681803159027,
      "loss": 2.9162,
      "step": 139701
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9904662370681763,
      "learning_rate": 0.00020163295360350707,
      "loss": 3.0627,
      "step": 139702
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3953914642333984,
      "learning_rate": 0.00020162908919371217,
      "loss": 3.0282,
      "step": 139703
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5813000202178955,
      "learning_rate": 0.00020162522480220624,
      "loss": 3.0021,
      "step": 139704
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6002113819122314,
      "learning_rate": 0.0002016213604289901,
      "loss": 3.1835,
      "step": 139705
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1999692916870117,
      "learning_rate": 0.00020161749607406436,
      "loss": 2.9079,
      "step": 139706
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.185185432434082,
      "learning_rate": 0.00020161363173742996,
      "loss": 2.8219,
      "step": 139707
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.9531009197235107,
      "learning_rate": 0.0002016097674190873,
      "loss": 2.9209,
      "step": 139708
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.9883923530578613,
      "learning_rate": 0.00020160590311903723,
      "loss": 2.8618,
      "step": 139709
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.525761842727661,
      "learning_rate": 0.0002016020388372805,
      "loss": 2.7448,
      "step": 139710
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.830244779586792,
      "learning_rate": 0.00020159817457381774,
      "loss": 3.0937,
      "step": 139711
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.24324893951416,
      "learning_rate": 0.00020159431032864983,
      "loss": 2.7891,
      "step": 139712
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.684994697570801,
      "learning_rate": 0.00020159044610177748,
      "loss": 3.1053,
      "step": 139713
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.0197975635528564,
      "learning_rate": 0.0002015865818932012,
      "loss": 3.0507,
      "step": 139714
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.012488842010498,
      "learning_rate": 0.00020158271770292181,
      "loss": 2.9974,
      "step": 139715
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2931313514709473,
      "learning_rate": 0.00020157885353094007,
      "loss": 2.9118,
      "step": 139716
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.0441436767578125,
      "learning_rate": 0.00020157498937725668,
      "loss": 2.9,
      "step": 139717
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.4662935733795166,
      "learning_rate": 0.00020157112524187237,
      "loss": 3.1472,
      "step": 139718
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2178895473480225,
      "learning_rate": 0.00020156726112478793,
      "loss": 3.0916,
      "step": 139719
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2376413345336914,
      "learning_rate": 0.00020156339702600393,
      "loss": 2.8861,
      "step": 139720
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7531535625457764,
      "learning_rate": 0.0002015595329455211,
      "loss": 2.8469,
      "step": 139721
    },
    {
      "epoch": 1.82,
      "grad_norm": 4.444784164428711,
      "learning_rate": 0.00020155566888334025,
      "loss": 2.9051,
      "step": 139722
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3468427658081055,
      "learning_rate": 0.00020155180483946204,
      "loss": 2.9983,
      "step": 139723
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4953908920288086,
      "learning_rate": 0.00020154794081388722,
      "loss": 2.8708,
      "step": 139724
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.312253713607788,
      "learning_rate": 0.00020154407680661647,
      "loss": 2.871,
      "step": 139725
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.4679088592529297,
      "learning_rate": 0.0002015402128176506,
      "loss": 2.8936,
      "step": 139726
    },
    {
      "epoch": 1.82,
      "grad_norm": 4.118516445159912,
      "learning_rate": 0.00020153634884699027,
      "loss": 2.8029,
      "step": 139727
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6996042728424072,
      "learning_rate": 0.00020153248489463617,
      "loss": 2.819,
      "step": 139728
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.8792150020599365,
      "learning_rate": 0.00020152862096058896,
      "loss": 2.6965,
      "step": 139729
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.155104160308838,
      "learning_rate": 0.00020152475704484946,
      "loss": 3.0978,
      "step": 139730
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.687270402908325,
      "learning_rate": 0.0002015208931474184,
      "loss": 3.1638,
      "step": 139731
    },
    {
      "epoch": 1.82,
      "grad_norm": 4.292994022369385,
      "learning_rate": 0.00020151702926829654,
      "loss": 2.8457,
      "step": 139732
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.47499418258667,
      "learning_rate": 0.00020151316540748443,
      "loss": 3.0916,
      "step": 139733
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1185622215270996,
      "learning_rate": 0.00020150930156498294,
      "loss": 2.89,
      "step": 139734
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0687038898468018,
      "learning_rate": 0.00020150543774079269,
      "loss": 2.8887,
      "step": 139735
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.611185073852539,
      "learning_rate": 0.00020150157393491444,
      "loss": 2.8981,
      "step": 139736
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.25374174118042,
      "learning_rate": 0.00020149771014734888,
      "loss": 2.9914,
      "step": 139737
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.090278148651123,
      "learning_rate": 0.0002014938463780968,
      "loss": 2.8148,
      "step": 139738
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.9133095741271973,
      "learning_rate": 0.00020148998262715888,
      "loss": 3.0781,
      "step": 139739
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.562030076980591,
      "learning_rate": 0.00020148611889453575,
      "loss": 2.8322,
      "step": 139740
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.9709179401397705,
      "learning_rate": 0.00020148225518022833,
      "loss": 2.9948,
      "step": 139741
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9324122667312622,
      "learning_rate": 0.00020147839148423714,
      "loss": 3.042,
      "step": 139742
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4016225337982178,
      "learning_rate": 0.00020147452780656298,
      "loss": 3.1839,
      "step": 139743
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.505763292312622,
      "learning_rate": 0.00020147066414720664,
      "loss": 3.1483,
      "step": 139744
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.468010663986206,
      "learning_rate": 0.0002014668005061687,
      "loss": 2.9134,
      "step": 139745
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3800392150878906,
      "learning_rate": 0.0002014629368834499,
      "loss": 3.0197,
      "step": 139746
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0025129318237305,
      "learning_rate": 0.00020145907327905104,
      "loss": 3.0565,
      "step": 139747
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1152613162994385,
      "learning_rate": 0.00020145520969297285,
      "loss": 2.7797,
      "step": 139748
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0530614852905273,
      "learning_rate": 0.00020145134612521595,
      "loss": 2.9885,
      "step": 139749
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.197335720062256,
      "learning_rate": 0.00020144748257578112,
      "loss": 3.0583,
      "step": 139750
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.507655620574951,
      "learning_rate": 0.00020144361904466906,
      "loss": 3.0438,
      "step": 139751
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.205101728439331,
      "learning_rate": 0.00020143975553188042,
      "loss": 2.8019,
      "step": 139752
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.470581531524658,
      "learning_rate": 0.00020143589203741607,
      "loss": 2.9282,
      "step": 139753
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1284592151641846,
      "learning_rate": 0.0002014320285612766,
      "loss": 3.1355,
      "step": 139754
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.318956136703491,
      "learning_rate": 0.0002014281651034628,
      "loss": 2.9144,
      "step": 139755
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6392662525177,
      "learning_rate": 0.00020142430166397547,
      "loss": 2.9637,
      "step": 139756
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.9146728515625,
      "learning_rate": 0.0002014204382428151,
      "loss": 2.7579,
      "step": 139757
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.754483699798584,
      "learning_rate": 0.00020141657483998252,
      "loss": 2.8625,
      "step": 139758
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.371849775314331,
      "learning_rate": 0.0002014127114554785,
      "loss": 2.8472,
      "step": 139759
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4145450592041016,
      "learning_rate": 0.00020140884808930368,
      "loss": 3.0577,
      "step": 139760
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2481741905212402,
      "learning_rate": 0.00020140498474145887,
      "loss": 2.9395,
      "step": 139761
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0927822589874268,
      "learning_rate": 0.0002014011214119448,
      "loss": 3.2131,
      "step": 139762
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5588393211364746,
      "learning_rate": 0.000201397258100762,
      "loss": 3.1274,
      "step": 139763
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9212993383407593,
      "learning_rate": 0.00020139339480791134,
      "loss": 2.5634,
      "step": 139764
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.34588885307312,
      "learning_rate": 0.00020138953153339348,
      "loss": 2.8275,
      "step": 139765
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9234559535980225,
      "learning_rate": 0.0002013856682772092,
      "loss": 2.8673,
      "step": 139766
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.26608943939209,
      "learning_rate": 0.00020138180503935917,
      "loss": 2.8372,
      "step": 139767
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.714907646179199,
      "learning_rate": 0.00020137794181984423,
      "loss": 3.0871,
      "step": 139768
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.307985544204712,
      "learning_rate": 0.00020137407861866492,
      "loss": 2.913,
      "step": 139769
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6261723041534424,
      "learning_rate": 0.00020137021543582196,
      "loss": 2.86,
      "step": 139770
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.450997829437256,
      "learning_rate": 0.0002013663522713162,
      "loss": 2.8375,
      "step": 139771
    },
    {
      "epoch": 1.82,
      "grad_norm": 4.411840915679932,
      "learning_rate": 0.00020136248912514828,
      "loss": 2.7537,
      "step": 139772
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.144622325897217,
      "learning_rate": 0.00020135862599731893,
      "loss": 2.726,
      "step": 139773
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.096576452255249,
      "learning_rate": 0.000201354762887829,
      "loss": 2.9976,
      "step": 139774
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.395402431488037,
      "learning_rate": 0.00020135089979667897,
      "loss": 2.9371,
      "step": 139775
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0770649909973145,
      "learning_rate": 0.00020134703672386965,
      "loss": 2.9514,
      "step": 139776
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2640581130981445,
      "learning_rate": 0.00020134317366940176,
      "loss": 3.0395,
      "step": 139777
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1505119800567627,
      "learning_rate": 0.00020133931063327608,
      "loss": 2.9765,
      "step": 139778
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.952304720878601,
      "learning_rate": 0.00020133544761549322,
      "loss": 3.122,
      "step": 139779
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9055765867233276,
      "learning_rate": 0.00020133158461605418,
      "loss": 2.9361,
      "step": 139780
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1757426261901855,
      "learning_rate": 0.00020132772163495926,
      "loss": 2.8814,
      "step": 139781
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.7402915954589844,
      "learning_rate": 0.00020132385867220942,
      "loss": 3.1106,
      "step": 139782
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1365034580230713,
      "learning_rate": 0.00020131999572780532,
      "loss": 2.795,
      "step": 139783
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2200276851654053,
      "learning_rate": 0.0002013161328017477,
      "loss": 2.8708,
      "step": 139784
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.7762597799301147,
      "learning_rate": 0.00020131226989403725,
      "loss": 2.9578,
      "step": 139785
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1228461265563965,
      "learning_rate": 0.00020130840700467487,
      "loss": 3.1669,
      "step": 139786
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.403445243835449,
      "learning_rate": 0.00020130454413366098,
      "loss": 2.9825,
      "step": 139787
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.594881534576416,
      "learning_rate": 0.00020130068128099645,
      "loss": 2.9715,
      "step": 139788
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.253835439682007,
      "learning_rate": 0.000201296818446682,
      "loss": 2.703,
      "step": 139789
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.679971218109131,
      "learning_rate": 0.0002012929556307183,
      "loss": 2.8028,
      "step": 139790
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7300655841827393,
      "learning_rate": 0.00020128909283310611,
      "loss": 3.0593,
      "step": 139791
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.107410430908203,
      "learning_rate": 0.00020128523005384614,
      "loss": 3.0421,
      "step": 139792
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.03240704536438,
      "learning_rate": 0.00020128136729293927,
      "loss": 2.9583,
      "step": 139793
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.0435874462127686,
      "learning_rate": 0.00020127750455038588,
      "loss": 2.7756,
      "step": 139794
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.037708044052124,
      "learning_rate": 0.0002012736418261869,
      "loss": 2.9065,
      "step": 139795
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.278149366378784,
      "learning_rate": 0.000201269779120343,
      "loss": 3.153,
      "step": 139796
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.7918235063552856,
      "learning_rate": 0.0002012659164328549,
      "loss": 2.7274,
      "step": 139797
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.1123781204223633,
      "learning_rate": 0.00020126205376372336,
      "loss": 2.9529,
      "step": 139798
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7765872478485107,
      "learning_rate": 0.00020125819111294917,
      "loss": 3.1883,
      "step": 139799
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.9688949584960938,
      "learning_rate": 0.00020125432848053283,
      "loss": 3.0567,
      "step": 139800
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1267263889312744,
      "learning_rate": 0.00020125046586647516,
      "loss": 3.096,
      "step": 139801
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.906562566757202,
      "learning_rate": 0.0002012466032707769,
      "loss": 3.0908,
      "step": 139802
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.080540657043457,
      "learning_rate": 0.0002012427406934388,
      "loss": 3.1391,
      "step": 139803
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0614430904388428,
      "learning_rate": 0.00020123887813446145,
      "loss": 2.9272,
      "step": 139804
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1136608123779297,
      "learning_rate": 0.00020123501559384584,
      "loss": 2.8816,
      "step": 139805
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4952361583709717,
      "learning_rate": 0.00020123115307159241,
      "loss": 2.9596,
      "step": 139806
    },
    {
      "epoch": 1.82,
      "grad_norm": 4.458527088165283,
      "learning_rate": 0.00020122729056770196,
      "loss": 2.7705,
      "step": 139807
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.268493175506592,
      "learning_rate": 0.0002012234280821752,
      "loss": 2.9531,
      "step": 139808
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1736583709716797,
      "learning_rate": 0.00020121956561501282,
      "loss": 3.0995,
      "step": 139809
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2551679611206055,
      "learning_rate": 0.00020121570316621566,
      "loss": 3.0055,
      "step": 139810
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.808852195739746,
      "learning_rate": 0.00020121184073578448,
      "loss": 3.1897,
      "step": 139811
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5646936893463135,
      "learning_rate": 0.00020120797832371977,
      "loss": 3.0356,
      "step": 139812
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1004815101623535,
      "learning_rate": 0.0002012041159300223,
      "loss": 2.8037,
      "step": 139813
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4869847297668457,
      "learning_rate": 0.0002012002535546929,
      "loss": 3.0707,
      "step": 139814
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.8011839389801025,
      "learning_rate": 0.00020119639119773222,
      "loss": 2.7455,
      "step": 139815
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2012243270874023,
      "learning_rate": 0.000201192528859141,
      "loss": 2.9885,
      "step": 139816
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.059277296066284,
      "learning_rate": 0.00020118866653892002,
      "loss": 3.0362,
      "step": 139817
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.764673948287964,
      "learning_rate": 0.0002011848042370699,
      "loss": 2.9697,
      "step": 139818
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.9359164237976074,
      "learning_rate": 0.0002011809419535914,
      "loss": 2.9499,
      "step": 139819
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1731014251708984,
      "learning_rate": 0.0002011770796884852,
      "loss": 2.8483,
      "step": 139820
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.017524242401123,
      "learning_rate": 0.000201173217441752,
      "loss": 2.9366,
      "step": 139821
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9722671508789062,
      "learning_rate": 0.0002011693552133926,
      "loss": 2.9496,
      "step": 139822
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6820642948150635,
      "learning_rate": 0.00020116549300340774,
      "loss": 2.9656,
      "step": 139823
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.611633539199829,
      "learning_rate": 0.000201161630811798,
      "loss": 2.8666,
      "step": 139824
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3229868412017822,
      "learning_rate": 0.0002011577686385642,
      "loss": 3.2156,
      "step": 139825
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.38071870803833,
      "learning_rate": 0.00020115390648370714,
      "loss": 3.1013,
      "step": 139826
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9909114837646484,
      "learning_rate": 0.0002011500443472273,
      "loss": 2.9097,
      "step": 139827
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.1250252723693848,
      "learning_rate": 0.00020114618222912556,
      "loss": 2.9954,
      "step": 139828
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.258718729019165,
      "learning_rate": 0.0002011423201294027,
      "loss": 3.0805,
      "step": 139829
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.211413860321045,
      "learning_rate": 0.00020113845804805924,
      "loss": 3.0496,
      "step": 139830
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8972944021224976,
      "learning_rate": 0.00020113459598509605,
      "loss": 2.7728,
      "step": 139831
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.019585371017456,
      "learning_rate": 0.00020113073394051377,
      "loss": 2.6508,
      "step": 139832
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.344261884689331,
      "learning_rate": 0.0002011268719143132,
      "loss": 3.1445,
      "step": 139833
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.925198554992676,
      "learning_rate": 0.000201123009906495,
      "loss": 2.7926,
      "step": 139834
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7097277641296387,
      "learning_rate": 0.00020111914791705993,
      "loss": 2.7731,
      "step": 139835
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2891252040863037,
      "learning_rate": 0.00020111528594600865,
      "loss": 2.7833,
      "step": 139836
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.79015851020813,
      "learning_rate": 0.00020111142399334187,
      "loss": 2.9464,
      "step": 139837
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4623494148254395,
      "learning_rate": 0.00020110756205906036,
      "loss": 3.152,
      "step": 139838
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.647953510284424,
      "learning_rate": 0.0002011037001431648,
      "loss": 3.0434,
      "step": 139839
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5237863063812256,
      "learning_rate": 0.00020109983824565599,
      "loss": 3.0356,
      "step": 139840
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.101897716522217,
      "learning_rate": 0.00020109597636653462,
      "loss": 2.852,
      "step": 139841
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.4829745292663574,
      "learning_rate": 0.00020109211450580126,
      "loss": 3.2335,
      "step": 139842
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.9128928184509277,
      "learning_rate": 0.0002010882526634568,
      "loss": 3.1107,
      "step": 139843
    },
    {
      "epoch": 1.82,
      "grad_norm": 4.383021831512451,
      "learning_rate": 0.00020108439083950188,
      "loss": 2.7409,
      "step": 139844
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5597591400146484,
      "learning_rate": 0.0002010805290339372,
      "loss": 2.9514,
      "step": 139845
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.928546190261841,
      "learning_rate": 0.00020107666724676358,
      "loss": 3.0671,
      "step": 139846
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5604770183563232,
      "learning_rate": 0.0002010728054779818,
      "loss": 2.7873,
      "step": 139847
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.382591962814331,
      "learning_rate": 0.00020106894372759227,
      "loss": 2.968,
      "step": 139848
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.0045437812805176,
      "learning_rate": 0.0002010650819955959,
      "loss": 3.1375,
      "step": 139849
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.950766086578369,
      "learning_rate": 0.00020106122028199343,
      "loss": 3.0075,
      "step": 139850
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.609558343887329,
      "learning_rate": 0.00020105735858678555,
      "loss": 2.9563,
      "step": 139851
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1536636352539062,
      "learning_rate": 0.00020105349690997294,
      "loss": 2.9057,
      "step": 139852
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4961998462677,
      "learning_rate": 0.0002010496352515565,
      "loss": 3.0075,
      "step": 139853
    },
    {
      "epoch": 1.82,
      "grad_norm": 4.3060526847839355,
      "learning_rate": 0.00020104577361153668,
      "loss": 3.1837,
      "step": 139854
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.2279114723205566,
      "learning_rate": 0.00020104191198991436,
      "loss": 2.7922,
      "step": 139855
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8572434186935425,
      "learning_rate": 0.00020103805038669014,
      "loss": 2.9182,
      "step": 139856
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.850740432739258,
      "learning_rate": 0.00020103418880186485,
      "loss": 2.9905,
      "step": 139857
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.866264820098877,
      "learning_rate": 0.00020103032723543918,
      "loss": 2.9714,
      "step": 139858
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5108444690704346,
      "learning_rate": 0.00020102646568741382,
      "loss": 3.0012,
      "step": 139859
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1361770629882812,
      "learning_rate": 0.00020102260415778963,
      "loss": 3.103,
      "step": 139860
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0409634113311768,
      "learning_rate": 0.0002010187426465671,
      "loss": 3.0371,
      "step": 139861
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.962531566619873,
      "learning_rate": 0.000201014881153747,
      "loss": 3.0384,
      "step": 139862
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0608983039855957,
      "learning_rate": 0.0002010110196793302,
      "loss": 2.8981,
      "step": 139863
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.9746015071868896,
      "learning_rate": 0.00020100715822331724,
      "loss": 2.9929,
      "step": 139864
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.610652446746826,
      "learning_rate": 0.00020100329678570896,
      "loss": 2.7833,
      "step": 139865
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7546982765197754,
      "learning_rate": 0.00020099943536650613,
      "loss": 2.5981,
      "step": 139866
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.8793821334838867,
      "learning_rate": 0.00020099557396570928,
      "loss": 3.2323,
      "step": 139867
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5313029289245605,
      "learning_rate": 0.00020099171258331917,
      "loss": 2.9245,
      "step": 139868
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6027886867523193,
      "learning_rate": 0.00020098785121933663,
      "loss": 3.0772,
      "step": 139869
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.0225424766540527,
      "learning_rate": 0.00020098398987376231,
      "loss": 3.3954,
      "step": 139870
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7915046215057373,
      "learning_rate": 0.00020098012854659694,
      "loss": 2.9007,
      "step": 139871
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.327714443206787,
      "learning_rate": 0.00020097626723784132,
      "loss": 2.772,
      "step": 139872
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.626039981842041,
      "learning_rate": 0.00020097240594749597,
      "loss": 3.0463,
      "step": 139873
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5366404056549072,
      "learning_rate": 0.00020096854467556175,
      "loss": 3.0651,
      "step": 139874
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4103891849517822,
      "learning_rate": 0.0002009646834220393,
      "loss": 2.8488,
      "step": 139875
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6539995670318604,
      "learning_rate": 0.00020096082218692938,
      "loss": 3.1895,
      "step": 139876
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.125175714492798,
      "learning_rate": 0.00020095696097023271,
      "loss": 3.1362,
      "step": 139877
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.706139326095581,
      "learning_rate": 0.00020095309977195022,
      "loss": 2.8396,
      "step": 139878
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2964112758636475,
      "learning_rate": 0.00020094923859208222,
      "loss": 2.8774,
      "step": 139879
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.982707977294922,
      "learning_rate": 0.00020094537743062962,
      "loss": 2.7822,
      "step": 139880
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3691446781158447,
      "learning_rate": 0.0002009415162875932,
      "loss": 3.0873,
      "step": 139881
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.111177444458008,
      "learning_rate": 0.00020093765516297356,
      "loss": 3.0688,
      "step": 139882
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.499145269393921,
      "learning_rate": 0.00020093379405677152,
      "loss": 2.8168,
      "step": 139883
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3894360065460205,
      "learning_rate": 0.00020092993296898787,
      "loss": 2.8216,
      "step": 139884
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.975381851196289,
      "learning_rate": 0.0002009260718996231,
      "loss": 3.0678,
      "step": 139885
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2007462978363037,
      "learning_rate": 0.00020092221084867803,
      "loss": 2.7526,
      "step": 139886
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.182812213897705,
      "learning_rate": 0.0002009183498161534,
      "loss": 3.0119,
      "step": 139887
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9086625576019287,
      "learning_rate": 0.0002009144888020499,
      "loss": 3.0935,
      "step": 139888
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.338263511657715,
      "learning_rate": 0.00020091062780636825,
      "loss": 3.0706,
      "step": 139889
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.279250144958496,
      "learning_rate": 0.00020090676682910934,
      "loss": 3.2437,
      "step": 139890
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0985846519470215,
      "learning_rate": 0.00020090290587027363,
      "loss": 2.9772,
      "step": 139891
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1155619621276855,
      "learning_rate": 0.00020089904492986193,
      "loss": 2.7628,
      "step": 139892
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2436580657958984,
      "learning_rate": 0.00020089518400787494,
      "loss": 3.0045,
      "step": 139893
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.664139986038208,
      "learning_rate": 0.00020089132310431346,
      "loss": 3.0728,
      "step": 139894
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4326040744781494,
      "learning_rate": 0.0002008874622191781,
      "loss": 2.95,
      "step": 139895
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2455577850341797,
      "learning_rate": 0.00020088360135246978,
      "loss": 2.8877,
      "step": 139896
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5699808597564697,
      "learning_rate": 0.00020087974050418895,
      "loss": 2.9589,
      "step": 139897
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.5372865200042725,
      "learning_rate": 0.00020087587967433646,
      "loss": 2.8085,
      "step": 139898
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5386080741882324,
      "learning_rate": 0.00020087201886291296,
      "loss": 2.8802,
      "step": 139899
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4595189094543457,
      "learning_rate": 0.00020086815806991926,
      "loss": 2.8531,
      "step": 139900
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2698323726654053,
      "learning_rate": 0.00020086429729535604,
      "loss": 2.97,
      "step": 139901
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.73835825920105,
      "learning_rate": 0.0002008604365392241,
      "loss": 2.9913,
      "step": 139902
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.101810932159424,
      "learning_rate": 0.00020085657580152402,
      "loss": 3.0517,
      "step": 139903
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2712621688842773,
      "learning_rate": 0.00020085271508225655,
      "loss": 2.7626,
      "step": 139904
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.412604808807373,
      "learning_rate": 0.0002008488543814224,
      "loss": 3.1113,
      "step": 139905
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.04258394241333,
      "learning_rate": 0.00020084499369902234,
      "loss": 2.7894,
      "step": 139906
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.217216730117798,
      "learning_rate": 0.0002008411330350571,
      "loss": 2.9713,
      "step": 139907
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0345914363861084,
      "learning_rate": 0.0002008372723895274,
      "loss": 2.875,
      "step": 139908
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1322476863861084,
      "learning_rate": 0.00020083341176243386,
      "loss": 2.9609,
      "step": 139909
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.438034772872925,
      "learning_rate": 0.00020082955115377732,
      "loss": 3.0886,
      "step": 139910
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.9845759868621826,
      "learning_rate": 0.0002008256905635584,
      "loss": 2.8366,
      "step": 139911
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4010303020477295,
      "learning_rate": 0.00020082182999177781,
      "loss": 3.0199,
      "step": 139912
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6745500564575195,
      "learning_rate": 0.00020081796943843634,
      "loss": 3.0485,
      "step": 139913
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.229013204574585,
      "learning_rate": 0.00020081410890353474,
      "loss": 3.0184,
      "step": 139914
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0993845462799072,
      "learning_rate": 0.00020081024838707363,
      "loss": 3.0149,
      "step": 139915
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6803700923919678,
      "learning_rate": 0.00020080638788905374,
      "loss": 2.8651,
      "step": 139916
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4287455081939697,
      "learning_rate": 0.00020080252740947583,
      "loss": 3.014,
      "step": 139917
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.8128654956817627,
      "learning_rate": 0.00020079866694834066,
      "loss": 2.9412,
      "step": 139918
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4128921031951904,
      "learning_rate": 0.00020079480650564882,
      "loss": 2.9065,
      "step": 139919
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.322164535522461,
      "learning_rate": 0.00020079094608140113,
      "loss": 2.8411,
      "step": 139920
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.297757625579834,
      "learning_rate": 0.00020078708567559828,
      "loss": 2.974,
      "step": 139921
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.9066312313079834,
      "learning_rate": 0.00020078322528824092,
      "loss": 2.968,
      "step": 139922
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.1848225593566895,
      "learning_rate": 0.0002007793649193299,
      "loss": 3.1132,
      "step": 139923
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.145758867263794,
      "learning_rate": 0.00020077550456886584,
      "loss": 2.9258,
      "step": 139924
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1038155555725098,
      "learning_rate": 0.00020077164423684953,
      "loss": 3.0955,
      "step": 139925
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.300304651260376,
      "learning_rate": 0.0002007677839232816,
      "loss": 3.058,
      "step": 139926
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.2723710536956787,
      "learning_rate": 0.00020076392362816285,
      "loss": 3.1071,
      "step": 139927
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.018941879272461,
      "learning_rate": 0.00020076006335149394,
      "loss": 3.1333,
      "step": 139928
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9046229124069214,
      "learning_rate": 0.00020075620309327556,
      "loss": 2.9344,
      "step": 139929
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.110252618789673,
      "learning_rate": 0.00020075234285350853,
      "loss": 3.0113,
      "step": 139930
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1462597846984863,
      "learning_rate": 0.00020074848263219346,
      "loss": 3.2214,
      "step": 139931
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.504091501235962,
      "learning_rate": 0.00020074462242933124,
      "loss": 2.9748,
      "step": 139932
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7262964248657227,
      "learning_rate": 0.0002007407622449224,
      "loss": 2.9496,
      "step": 139933
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4916324615478516,
      "learning_rate": 0.0002007369020789677,
      "loss": 2.9738,
      "step": 139934
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.9018702507019043,
      "learning_rate": 0.0002007330419314679,
      "loss": 2.9817,
      "step": 139935
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.3292627334594727,
      "learning_rate": 0.00020072918180242367,
      "loss": 2.761,
      "step": 139936
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.852466106414795,
      "learning_rate": 0.00020072532169183578,
      "loss": 3.0504,
      "step": 139937
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5330402851104736,
      "learning_rate": 0.00020072146159970492,
      "loss": 3.0673,
      "step": 139938
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.288304328918457,
      "learning_rate": 0.00020071760152603192,
      "loss": 3.1419,
      "step": 139939
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.350947856903076,
      "learning_rate": 0.0002007137414708173,
      "loss": 3.0248,
      "step": 139940
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4847519397735596,
      "learning_rate": 0.00020070988143406187,
      "loss": 3.1432,
      "step": 139941
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7127373218536377,
      "learning_rate": 0.00020070602141576634,
      "loss": 2.723,
      "step": 139942
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.085824966430664,
      "learning_rate": 0.0002007021614159314,
      "loss": 2.9798,
      "step": 139943
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.09352970123291,
      "learning_rate": 0.00020069830143455784,
      "loss": 3.2247,
      "step": 139944
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.016428232192993,
      "learning_rate": 0.00020069444147164642,
      "loss": 2.7574,
      "step": 139945
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.2333953380584717,
      "learning_rate": 0.00020069058152719768,
      "loss": 3.0387,
      "step": 139946
    },
    {
      "epoch": 1.82,
      "grad_norm": 4.568493366241455,
      "learning_rate": 0.00020068672160121248,
      "loss": 2.8311,
      "step": 139947
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1776092052459717,
      "learning_rate": 0.00020068286169369143,
      "loss": 2.9366,
      "step": 139948
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2464964389801025,
      "learning_rate": 0.00020067900180463537,
      "loss": 2.8878,
      "step": 139949
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.365468740463257,
      "learning_rate": 0.00020067514193404488,
      "loss": 3.0964,
      "step": 139950
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.349334239959717,
      "learning_rate": 0.00020067128208192095,
      "loss": 2.8932,
      "step": 139951
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.659507989883423,
      "learning_rate": 0.00020066742224826394,
      "loss": 2.8617,
      "step": 139952
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0091285705566406,
      "learning_rate": 0.00020066356243307474,
      "loss": 2.9603,
      "step": 139953
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.309433937072754,
      "learning_rate": 0.00020065970263635408,
      "loss": 3.2215,
      "step": 139954
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6090352535247803,
      "learning_rate": 0.00020065584285810264,
      "loss": 2.8929,
      "step": 139955
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1888129711151123,
      "learning_rate": 0.00020065198309832116,
      "loss": 2.9384,
      "step": 139956
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.373617649078369,
      "learning_rate": 0.00020064812335701047,
      "loss": 2.9709,
      "step": 139957
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.396411657333374,
      "learning_rate": 0.00020064426363417107,
      "loss": 3.0194,
      "step": 139958
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.028473138809204,
      "learning_rate": 0.00020064040392980375,
      "loss": 3.06,
      "step": 139959
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5616557598114014,
      "learning_rate": 0.00020063654424390924,
      "loss": 3.0578,
      "step": 139960
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8910291194915771,
      "learning_rate": 0.00020063268457648828,
      "loss": 2.9626,
      "step": 139961
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.000472068786621,
      "learning_rate": 0.0002006288249275416,
      "loss": 2.8469,
      "step": 139962
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.9924299716949463,
      "learning_rate": 0.00020062496529707,
      "loss": 3.0644,
      "step": 139963
    },
    {
      "epoch": 1.82,
      "grad_norm": 5.924160003662109,
      "learning_rate": 0.00020062110568507398,
      "loss": 2.8766,
      "step": 139964
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.987797498703003,
      "learning_rate": 0.00020061724609155438,
      "loss": 3.1101,
      "step": 139965
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.874924421310425,
      "learning_rate": 0.0002006133865165119,
      "loss": 3.2692,
      "step": 139966
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.588994264602661,
      "learning_rate": 0.00020060952695994728,
      "loss": 2.9409,
      "step": 139967
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.26693058013916,
      "learning_rate": 0.0002006056674218612,
      "loss": 2.7779,
      "step": 139968
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.2016983032226562,
      "learning_rate": 0.00020060180790225455,
      "loss": 2.8312,
      "step": 139969
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.939396381378174,
      "learning_rate": 0.00020059794840112776,
      "loss": 3.0353,
      "step": 139970
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.115961790084839,
      "learning_rate": 0.00020059408891848167,
      "loss": 2.9246,
      "step": 139971
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8263370990753174,
      "learning_rate": 0.0002005902294543171,
      "loss": 2.9934,
      "step": 139972
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3676388263702393,
      "learning_rate": 0.00020058637000863456,
      "loss": 2.865,
      "step": 139973
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4609131813049316,
      "learning_rate": 0.00020058251058143497,
      "loss": 3.1142,
      "step": 139974
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9536503553390503,
      "learning_rate": 0.0002005786511727191,
      "loss": 3.0561,
      "step": 139975
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4494831562042236,
      "learning_rate": 0.00020057479178248742,
      "loss": 3.0929,
      "step": 139976
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3500125408172607,
      "learning_rate": 0.0002005709324107407,
      "loss": 3.115,
      "step": 139977
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9739964008331299,
      "learning_rate": 0.00020056707305747976,
      "loss": 3.1841,
      "step": 139978
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8564600944519043,
      "learning_rate": 0.00020056321372270526,
      "loss": 3.0375,
      "step": 139979
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1795876026153564,
      "learning_rate": 0.00020055935440641795,
      "loss": 2.9061,
      "step": 139980
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.821196436882019,
      "learning_rate": 0.00020055549510861865,
      "loss": 2.9339,
      "step": 139981
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.693321943283081,
      "learning_rate": 0.0002005516358293078,
      "loss": 2.8666,
      "step": 139982
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9494580030441284,
      "learning_rate": 0.00020054777656848638,
      "loss": 3.2753,
      "step": 139983
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2789950370788574,
      "learning_rate": 0.00020054391732615494,
      "loss": 2.9139,
      "step": 139984
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.58835768699646,
      "learning_rate": 0.00020054005810231425,
      "loss": 2.5918,
      "step": 139985
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0910043716430664,
      "learning_rate": 0.00020053619889696505,
      "loss": 3.2354,
      "step": 139986
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5238404273986816,
      "learning_rate": 0.00020053233971010814,
      "loss": 2.7991,
      "step": 139987
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.2378432750701904,
      "learning_rate": 0.0002005284805417441,
      "loss": 2.8323,
      "step": 139988
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.923396110534668,
      "learning_rate": 0.00020052462139187366,
      "loss": 2.8385,
      "step": 139989
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.739234685897827,
      "learning_rate": 0.00020052076226049754,
      "loss": 3.2089,
      "step": 139990
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1319849491119385,
      "learning_rate": 0.00020051690314761654,
      "loss": 2.9653,
      "step": 139991
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.48152494430542,
      "learning_rate": 0.0002005130440532313,
      "loss": 2.9107,
      "step": 139992
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1788246631622314,
      "learning_rate": 0.00020050918497734255,
      "loss": 3.1664,
      "step": 139993
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.588190793991089,
      "learning_rate": 0.00020050532591995107,
      "loss": 3.0971,
      "step": 139994
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.124307870864868,
      "learning_rate": 0.00020050146688105756,
      "loss": 2.7962,
      "step": 139995
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.010737180709839,
      "learning_rate": 0.00020049760786066266,
      "loss": 2.8191,
      "step": 139996
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.461829423904419,
      "learning_rate": 0.0002004937488587671,
      "loss": 3.0407,
      "step": 139997
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.002302646636963,
      "learning_rate": 0.00020048988987537164,
      "loss": 2.9279,
      "step": 139998
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3633291721343994,
      "learning_rate": 0.00020048603091047697,
      "loss": 2.9191,
      "step": 139999
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.103769302368164,
      "learning_rate": 0.00020048217196408393,
      "loss": 2.9729,
      "step": 140000
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.7281668186187744,
      "learning_rate": 0.00020047831303619305,
      "loss": 3.2382,
      "step": 140001
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4532201290130615,
      "learning_rate": 0.00020047445412680516,
      "loss": 3.1075,
      "step": 140002
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9407931566238403,
      "learning_rate": 0.0002004705952359209,
      "loss": 2.8724,
      "step": 140003
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8888602256774902,
      "learning_rate": 0.00020046673636354108,
      "loss": 2.9348,
      "step": 140004
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5488569736480713,
      "learning_rate": 0.00020046287750966632,
      "loss": 2.9736,
      "step": 140005
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.039602756500244,
      "learning_rate": 0.0002004590186742975,
      "loss": 3.0218,
      "step": 140006
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.59779691696167,
      "learning_rate": 0.00020045515985743514,
      "loss": 2.7145,
      "step": 140007
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.940797209739685,
      "learning_rate": 0.00020045130105908002,
      "loss": 2.8007,
      "step": 140008
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.745252847671509,
      "learning_rate": 0.00020044744227923299,
      "loss": 2.9629,
      "step": 140009
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.750584125518799,
      "learning_rate": 0.00020044358351789456,
      "loss": 3.0993,
      "step": 140010
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4739396572113037,
      "learning_rate": 0.00020043972477506558,
      "loss": 2.946,
      "step": 140011
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.208862543106079,
      "learning_rate": 0.00020043586605074677,
      "loss": 3.0684,
      "step": 140012
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3149003982543945,
      "learning_rate": 0.00020043200734493877,
      "loss": 3.1361,
      "step": 140013
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.9813692569732666,
      "learning_rate": 0.0002004281486576423,
      "loss": 2.9006,
      "step": 140014
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9636824131011963,
      "learning_rate": 0.00020042428998885814,
      "loss": 3.3223,
      "step": 140015
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.219738245010376,
      "learning_rate": 0.000200420431338587,
      "loss": 3.1549,
      "step": 140016
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.1937265396118164,
      "learning_rate": 0.00020041657270682963,
      "loss": 2.8923,
      "step": 140017
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.152153253555298,
      "learning_rate": 0.0002004127140935867,
      "loss": 3.0401,
      "step": 140018
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4632346630096436,
      "learning_rate": 0.00020040885549885883,
      "loss": 2.8679,
      "step": 140019
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0941808223724365,
      "learning_rate": 0.0002004049969226469,
      "loss": 3.2736,
      "step": 140020
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.82833194732666,
      "learning_rate": 0.00020040113836495153,
      "loss": 2.9211,
      "step": 140021
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.987463355064392,
      "learning_rate": 0.00020039727982577347,
      "loss": 2.948,
      "step": 140022
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.233879804611206,
      "learning_rate": 0.00020039342130511345,
      "loss": 3.0611,
      "step": 140023
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.099057197570801,
      "learning_rate": 0.00020038956280297226,
      "loss": 2.8468,
      "step": 140024
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8064560890197754,
      "learning_rate": 0.00020038570431935043,
      "loss": 3.1132,
      "step": 140025
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9821308851242065,
      "learning_rate": 0.00020038184585424878,
      "loss": 2.9812,
      "step": 140026
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0148942470550537,
      "learning_rate": 0.000200377987407668,
      "loss": 2.8754,
      "step": 140027
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.733367919921875,
      "learning_rate": 0.00020037412897960886,
      "loss": 2.7861,
      "step": 140028
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.915594220161438,
      "learning_rate": 0.00020037027057007205,
      "loss": 2.9178,
      "step": 140029
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0580458641052246,
      "learning_rate": 0.00020036641217905843,
      "loss": 3.0861,
      "step": 140030
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.008898973464966,
      "learning_rate": 0.00020036255380656838,
      "loss": 3.0448,
      "step": 140031
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.019272804260254,
      "learning_rate": 0.00020035869545260285,
      "loss": 2.7952,
      "step": 140032
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5063393115997314,
      "learning_rate": 0.00020035483711716254,
      "loss": 2.9154,
      "step": 140033
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8925913572311401,
      "learning_rate": 0.00020035097880024815,
      "loss": 3.1937,
      "step": 140034
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2296624183654785,
      "learning_rate": 0.00020034712050186036,
      "loss": 2.7466,
      "step": 140035
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7581777572631836,
      "learning_rate": 0.0002003432622220001,
      "loss": 2.8217,
      "step": 140036
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0025393962860107,
      "learning_rate": 0.00020033940396066774,
      "loss": 3.0809,
      "step": 140037
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.278701066970825,
      "learning_rate": 0.00020033554571786415,
      "loss": 2.9984,
      "step": 140038
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.749667167663574,
      "learning_rate": 0.0002003316874935901,
      "loss": 2.8302,
      "step": 140039
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0923991203308105,
      "learning_rate": 0.00020032782928784627,
      "loss": 3.1767,
      "step": 140040
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.038208484649658,
      "learning_rate": 0.00020032397110063336,
      "loss": 3.1127,
      "step": 140041
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1789586544036865,
      "learning_rate": 0.00020032011293195223,
      "loss": 3.0549,
      "step": 140042
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.377243995666504,
      "learning_rate": 0.00020031625478180332,
      "loss": 2.9326,
      "step": 140043
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.5287485122680664,
      "learning_rate": 0.00020031239665018754,
      "loss": 2.7383,
      "step": 140044
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2153422832489014,
      "learning_rate": 0.0002003085385371056,
      "loss": 2.9466,
      "step": 140045
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9259471893310547,
      "learning_rate": 0.00020030468044255812,
      "loss": 3.0547,
      "step": 140046
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7649147510528564,
      "learning_rate": 0.0002003008223665459,
      "loss": 3.0023,
      "step": 140047
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.160649538040161,
      "learning_rate": 0.0002002969643090698,
      "loss": 2.9828,
      "step": 140048
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.273979902267456,
      "learning_rate": 0.00020029310627013022,
      "loss": 2.9915,
      "step": 140049
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.377664804458618,
      "learning_rate": 0.00020028924824972802,
      "loss": 3.0807,
      "step": 140050
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.370633602142334,
      "learning_rate": 0.00020028539024786397,
      "loss": 2.7608,
      "step": 140051
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.306150436401367,
      "learning_rate": 0.0002002815322645387,
      "loss": 3.1263,
      "step": 140052
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.2011730670928955,
      "learning_rate": 0.00020027767429975298,
      "loss": 2.9911,
      "step": 140053
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0585219860076904,
      "learning_rate": 0.00020027381635350771,
      "loss": 2.8659,
      "step": 140054
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4338762760162354,
      "learning_rate": 0.00020026995842580323,
      "loss": 2.8848,
      "step": 140055
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5534136295318604,
      "learning_rate": 0.0002002661005166405,
      "loss": 3.0979,
      "step": 140056
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2498209476470947,
      "learning_rate": 0.00020026224262602012,
      "loss": 2.9812,
      "step": 140057
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.055570363998413,
      "learning_rate": 0.00020025838475394293,
      "loss": 3.1254,
      "step": 140058
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2102439403533936,
      "learning_rate": 0.00020025452690040959,
      "loss": 2.9705,
      "step": 140059
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3082823753356934,
      "learning_rate": 0.00020025066906542078,
      "loss": 3.0743,
      "step": 140060
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.337468385696411,
      "learning_rate": 0.00020024681124897737,
      "loss": 2.8639,
      "step": 140061
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4869308471679688,
      "learning_rate": 0.00020024295345107989,
      "loss": 2.7095,
      "step": 140062
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.277229070663452,
      "learning_rate": 0.0002002390956717291,
      "loss": 3.2155,
      "step": 140063
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.9267332553863525,
      "learning_rate": 0.0002002352379109257,
      "loss": 2.8803,
      "step": 140064
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8579457998275757,
      "learning_rate": 0.00020023138016867048,
      "loss": 2.6233,
      "step": 140065
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2080562114715576,
      "learning_rate": 0.0002002275224449642,
      "loss": 2.8179,
      "step": 140066
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4470901489257812,
      "learning_rate": 0.00020022366473980752,
      "loss": 2.96,
      "step": 140067
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2881526947021484,
      "learning_rate": 0.0002002198070532011,
      "loss": 2.9525,
      "step": 140068
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9756501913070679,
      "learning_rate": 0.00020021594938514565,
      "loss": 2.9732,
      "step": 140069
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6032626628875732,
      "learning_rate": 0.00020021209173564196,
      "loss": 2.7456,
      "step": 140070
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.499398708343506,
      "learning_rate": 0.00020020823410469074,
      "loss": 2.9004,
      "step": 140071
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.901068687438965,
      "learning_rate": 0.0002002043764922927,
      "loss": 2.7607,
      "step": 140072
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.498009204864502,
      "learning_rate": 0.00020020051889844866,
      "loss": 3.0224,
      "step": 140073
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0955259799957275,
      "learning_rate": 0.00020019666132315908,
      "loss": 2.9435,
      "step": 140074
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.3042924404144287,
      "learning_rate": 0.00020019280376642486,
      "loss": 3.0563,
      "step": 140075
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1785566806793213,
      "learning_rate": 0.00020018894622824665,
      "loss": 2.8147,
      "step": 140076
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8893778324127197,
      "learning_rate": 0.00020018508870862524,
      "loss": 2.8078,
      "step": 140077
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.174044609069824,
      "learning_rate": 0.00020018123120756128,
      "loss": 3.1021,
      "step": 140078
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.178025007247925,
      "learning_rate": 0.00020017737372505557,
      "loss": 2.8744,
      "step": 140079
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2518224716186523,
      "learning_rate": 0.00020017351626110876,
      "loss": 2.6842,
      "step": 140080
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.39037823677063,
      "learning_rate": 0.00020016965881572156,
      "loss": 2.9491,
      "step": 140081
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8558423519134521,
      "learning_rate": 0.00020016580138889468,
      "loss": 2.9942,
      "step": 140082
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9132972955703735,
      "learning_rate": 0.00020016194398062885,
      "loss": 3.0428,
      "step": 140083
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1421258449554443,
      "learning_rate": 0.0002001580865909248,
      "loss": 2.9955,
      "step": 140084
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5461995601654053,
      "learning_rate": 0.0002001542292197833,
      "loss": 3.0042,
      "step": 140085
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9026340246200562,
      "learning_rate": 0.00020015037186720498,
      "loss": 3.0672,
      "step": 140086
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.141582727432251,
      "learning_rate": 0.00020014651453319066,
      "loss": 3.0666,
      "step": 140087
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5232653617858887,
      "learning_rate": 0.0002001426572177409,
      "loss": 3.0952,
      "step": 140088
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0278587341308594,
      "learning_rate": 0.00020013879992085654,
      "loss": 3.1781,
      "step": 140089
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.1773033142089844,
      "learning_rate": 0.0002001349426425382,
      "loss": 2.7958,
      "step": 140090
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6522085666656494,
      "learning_rate": 0.00020013108538278676,
      "loss": 3.1381,
      "step": 140091
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9003785848617554,
      "learning_rate": 0.00020012722814160278,
      "loss": 3.066,
      "step": 140092
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.982254981994629,
      "learning_rate": 0.000200123370918987,
      "loss": 3.0114,
      "step": 140093
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5697450637817383,
      "learning_rate": 0.00020011951371494028,
      "loss": 2.8884,
      "step": 140094
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.782196283340454,
      "learning_rate": 0.00020011565652946315,
      "loss": 2.9733,
      "step": 140095
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.064574956893921,
      "learning_rate": 0.00020011179936255637,
      "loss": 2.9353,
      "step": 140096
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.2955055236816406,
      "learning_rate": 0.00020010794221422078,
      "loss": 2.9121,
      "step": 140097
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1554980278015137,
      "learning_rate": 0.00020010408508445697,
      "loss": 2.9686,
      "step": 140098
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6508312225341797,
      "learning_rate": 0.00020010022797326566,
      "loss": 2.9884,
      "step": 140099
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1466281414031982,
      "learning_rate": 0.00020009637088064762,
      "loss": 3.0342,
      "step": 140100
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9939419031143188,
      "learning_rate": 0.0002000925138066036,
      "loss": 2.8325,
      "step": 140101
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.742973566055298,
      "learning_rate": 0.00020008865675113423,
      "loss": 3.0731,
      "step": 140102
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7792856693267822,
      "learning_rate": 0.0002000847997142403,
      "loss": 2.91,
      "step": 140103
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4689767360687256,
      "learning_rate": 0.00020008094269592246,
      "loss": 2.9541,
      "step": 140104
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.785300612449646,
      "learning_rate": 0.00020007708569618144,
      "loss": 2.8022,
      "step": 140105
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0794103145599365,
      "learning_rate": 0.00020007322871501797,
      "loss": 2.9886,
      "step": 140106
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.7532877922058105,
      "learning_rate": 0.00020006937175243275,
      "loss": 2.9731,
      "step": 140107
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7558493614196777,
      "learning_rate": 0.00020006551480842658,
      "loss": 2.9751,
      "step": 140108
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1916098594665527,
      "learning_rate": 0.00020006165788300021,
      "loss": 2.8969,
      "step": 140109
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.85552179813385,
      "learning_rate": 0.00020005780097615415,
      "loss": 3.4013,
      "step": 140110
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.0230603218078613,
      "learning_rate": 0.00020005394408788922,
      "loss": 2.9838,
      "step": 140111
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.843848705291748,
      "learning_rate": 0.00020005008721820617,
      "loss": 2.8023,
      "step": 140112
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2653136253356934,
      "learning_rate": 0.00020004623036710566,
      "loss": 2.9002,
      "step": 140113
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.451056718826294,
      "learning_rate": 0.00020004237353458848,
      "loss": 2.9206,
      "step": 140114
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.8210413455963135,
      "learning_rate": 0.0002000385167206554,
      "loss": 3.0558,
      "step": 140115
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.524223566055298,
      "learning_rate": 0.00020003465992530692,
      "loss": 3.1963,
      "step": 140116
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.4330432415008545,
      "learning_rate": 0.0002000308031485439,
      "loss": 2.8383,
      "step": 140117
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3913826942443848,
      "learning_rate": 0.00020002694639036705,
      "loss": 2.9326,
      "step": 140118
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.6522059440612793,
      "learning_rate": 0.00020002308965077708,
      "loss": 2.7044,
      "step": 140119
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.352907180786133,
      "learning_rate": 0.00020001923292977465,
      "loss": 3.0816,
      "step": 140120
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.745833158493042,
      "learning_rate": 0.00020001537622736074,
      "loss": 3.0348,
      "step": 140121
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.2745773792266846,
      "learning_rate": 0.00020001151954353568,
      "loss": 3.1894,
      "step": 140122
    },
    {
      "epoch": 1.82,
      "grad_norm": 5.042169570922852,
      "learning_rate": 0.00020000766287830036,
      "loss": 2.8064,
      "step": 140123
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.250986337661743,
      "learning_rate": 0.00020000380623165552,
      "loss": 2.93,
      "step": 140124
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9360274076461792,
      "learning_rate": 0.00019999994960360188,
      "loss": 3.0834,
      "step": 140125
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.173083543777466,
      "learning_rate": 0.00019999609299414014,
      "loss": 2.9707,
      "step": 140126
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3911190032958984,
      "learning_rate": 0.00019999223640327097,
      "loss": 2.8784,
      "step": 140127
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.637241840362549,
      "learning_rate": 0.0001999883798309953,
      "loss": 2.884,
      "step": 140128
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.7483978271484375,
      "learning_rate": 0.00019998452327731352,
      "loss": 2.7182,
      "step": 140129
    },
    {
      "epoch": 1.82,
      "grad_norm": 4.653371810913086,
      "learning_rate": 0.0001999806667422265,
      "loss": 3.2624,
      "step": 140130
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0474050045013428,
      "learning_rate": 0.000199976810225735,
      "loss": 2.8624,
      "step": 140131
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.390408515930176,
      "learning_rate": 0.00019997295372783968,
      "loss": 3.0554,
      "step": 140132
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.8267407417297363,
      "learning_rate": 0.0001999690972485413,
      "loss": 3.0828,
      "step": 140133
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.8072690963745117,
      "learning_rate": 0.00019996524078784065,
      "loss": 3.0038,
      "step": 140134
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0139458179473877,
      "learning_rate": 0.00019996138434573822,
      "loss": 2.9474,
      "step": 140135
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.9780967235565186,
      "learning_rate": 0.00019995752792223485,
      "loss": 2.9098,
      "step": 140136
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0660743713378906,
      "learning_rate": 0.0001999536715173313,
      "loss": 3.2493,
      "step": 140137
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0469305515289307,
      "learning_rate": 0.0001999498151310282,
      "loss": 3.0372,
      "step": 140138
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.434959650039673,
      "learning_rate": 0.00019994595876332636,
      "loss": 3.249,
      "step": 140139
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.976578712463379,
      "learning_rate": 0.00019994210241422659,
      "loss": 2.8822,
      "step": 140140
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1947197914123535,
      "learning_rate": 0.00019993824608372933,
      "loss": 2.9613,
      "step": 140141
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.6995866298675537,
      "learning_rate": 0.0001999343897718354,
      "loss": 3.2208,
      "step": 140142
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.529481887817383,
      "learning_rate": 0.00019993053347854562,
      "loss": 2.8064,
      "step": 140143
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.990669846534729,
      "learning_rate": 0.00019992667720386057,
      "loss": 3.0922,
      "step": 140144
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3334429264068604,
      "learning_rate": 0.0001999228209477811,
      "loss": 3.008,
      "step": 140145
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.9621145725250244,
      "learning_rate": 0.00019991896471030797,
      "loss": 2.7444,
      "step": 140146
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0794942378997803,
      "learning_rate": 0.0001999151084914417,
      "loss": 2.9454,
      "step": 140147
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5510988235473633,
      "learning_rate": 0.00019991125229118303,
      "loss": 3.0575,
      "step": 140148
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1497554779052734,
      "learning_rate": 0.00019990739610953282,
      "loss": 2.9754,
      "step": 140149
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.636770725250244,
      "learning_rate": 0.00019990353994649168,
      "loss": 2.9925,
      "step": 140150
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0577714443206787,
      "learning_rate": 0.00019989968380206038,
      "loss": 3.0666,
      "step": 140151
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5491607189178467,
      "learning_rate": 0.00019989582767623973,
      "loss": 3.1568,
      "step": 140152
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.1127243041992188,
      "learning_rate": 0.00019989197156903017,
      "loss": 2.9819,
      "step": 140153
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.188502788543701,
      "learning_rate": 0.00019988811548043265,
      "loss": 2.8175,
      "step": 140154
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.3444981575012207,
      "learning_rate": 0.00019988425941044777,
      "loss": 2.7164,
      "step": 140155
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.4820072650909424,
      "learning_rate": 0.00019988040335907635,
      "loss": 2.8888,
      "step": 140156
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.5230579376220703,
      "learning_rate": 0.000199876547326319,
      "loss": 3.0788,
      "step": 140157
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.186690330505371,
      "learning_rate": 0.00019987269131217664,
      "loss": 2.9866,
      "step": 140158
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.938224196434021,
      "learning_rate": 0.00019986883531664972,
      "loss": 3.3205,
      "step": 140159
    },
    {
      "epoch": 1.82,
      "grad_norm": 1.8846933841705322,
      "learning_rate": 0.00019986497933973906,
      "loss": 3.2594,
      "step": 140160
    },
    {
      "epoch": 1.82,
      "grad_norm": 2.0071280002593994,
      "learning_rate": 0.00019986112338144538,
      "loss": 2.8504,
      "step": 140161
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6123948097229004,
      "learning_rate": 0.0001998572674417694,
      "loss": 3.2561,
      "step": 140162
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9910064935684204,
      "learning_rate": 0.00019985341152071187,
      "loss": 2.849,
      "step": 140163
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6546008586883545,
      "learning_rate": 0.0001998495556182736,
      "loss": 2.7731,
      "step": 140164
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.04972505569458,
      "learning_rate": 0.00019984569973445507,
      "loss": 2.8025,
      "step": 140165
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.247002124786377,
      "learning_rate": 0.0001998418438692571,
      "loss": 2.9217,
      "step": 140166
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4751954078674316,
      "learning_rate": 0.00019983798802268045,
      "loss": 2.8866,
      "step": 140167
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3782873153686523,
      "learning_rate": 0.00019983413219472579,
      "loss": 2.9624,
      "step": 140168
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8711472749710083,
      "learning_rate": 0.00019983027638539386,
      "loss": 2.9379,
      "step": 140169
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9067304134368896,
      "learning_rate": 0.00019982642059468542,
      "loss": 3.0636,
      "step": 140170
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.643893241882324,
      "learning_rate": 0.00019982256482260107,
      "loss": 2.6865,
      "step": 140171
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.5868213176727295,
      "learning_rate": 0.0001998187090691417,
      "loss": 2.8999,
      "step": 140172
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3133161067962646,
      "learning_rate": 0.0001998148533343078,
      "loss": 3.0509,
      "step": 140173
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.293067216873169,
      "learning_rate": 0.00019981099761810024,
      "loss": 3.0581,
      "step": 140174
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0924289226531982,
      "learning_rate": 0.00019980714192051975,
      "loss": 2.7733,
      "step": 140175
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.152402639389038,
      "learning_rate": 0.00019980328624156704,
      "loss": 3.2765,
      "step": 140176
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.199129819869995,
      "learning_rate": 0.0001997994305812427,
      "loss": 3.0531,
      "step": 140177
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4978387355804443,
      "learning_rate": 0.00019979557493954753,
      "loss": 2.7703,
      "step": 140178
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.138215065002441,
      "learning_rate": 0.00019979171931648235,
      "loss": 3.1644,
      "step": 140179
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6484572887420654,
      "learning_rate": 0.0001997878637120477,
      "loss": 3.0412,
      "step": 140180
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2142457962036133,
      "learning_rate": 0.0001997840081262444,
      "loss": 3.0276,
      "step": 140181
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.135504961013794,
      "learning_rate": 0.00019978015255907316,
      "loss": 2.8216,
      "step": 140182
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.575608015060425,
      "learning_rate": 0.00019977629701053466,
      "loss": 2.6834,
      "step": 140183
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0482914447784424,
      "learning_rate": 0.00019977244148062964,
      "loss": 2.8796,
      "step": 140184
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4524872303009033,
      "learning_rate": 0.0001997685859693588,
      "loss": 2.9331,
      "step": 140185
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.996711015701294,
      "learning_rate": 0.00019976473047672293,
      "loss": 2.9883,
      "step": 140186
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3954548835754395,
      "learning_rate": 0.00019976087500272265,
      "loss": 2.9047,
      "step": 140187
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.090768575668335,
      "learning_rate": 0.00019975701954735875,
      "loss": 2.8495,
      "step": 140188
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.086973190307617,
      "learning_rate": 0.00019975316411063187,
      "loss": 2.9887,
      "step": 140189
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0468127727508545,
      "learning_rate": 0.00019974930869254276,
      "loss": 2.8729,
      "step": 140190
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.365797758102417,
      "learning_rate": 0.00019974545329309214,
      "loss": 2.9823,
      "step": 140191
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.0712385177612305,
      "learning_rate": 0.00019974159791228072,
      "loss": 2.6398,
      "step": 140192
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3031442165374756,
      "learning_rate": 0.00019973774255010936,
      "loss": 3.1618,
      "step": 140193
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.4150068759918213,
      "learning_rate": 0.00019973388720657853,
      "loss": 2.9669,
      "step": 140194
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.9262712001800537,
      "learning_rate": 0.00019973003188168912,
      "loss": 2.9039,
      "step": 140195
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4230945110321045,
      "learning_rate": 0.00019972617657544175,
      "loss": 2.8924,
      "step": 140196
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.691894292831421,
      "learning_rate": 0.00019972232128783715,
      "loss": 2.8611,
      "step": 140197
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.217928171157837,
      "learning_rate": 0.0001997184660188761,
      "loss": 2.9227,
      "step": 140198
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.4770772457122803,
      "learning_rate": 0.00019971461076855927,
      "loss": 3.1754,
      "step": 140199
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.310746431350708,
      "learning_rate": 0.00019971075553688736,
      "loss": 2.9005,
      "step": 140200
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.899562358856201,
      "learning_rate": 0.00019970690032386124,
      "loss": 3.0684,
      "step": 140201
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.223252058029175,
      "learning_rate": 0.00019970304512948142,
      "loss": 3.0928,
      "step": 140202
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.402845621109009,
      "learning_rate": 0.00019969918995374863,
      "loss": 3.1331,
      "step": 140203
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.071226119995117,
      "learning_rate": 0.0001996953347966637,
      "loss": 3.0264,
      "step": 140204
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.071462154388428,
      "learning_rate": 0.0001996914796582273,
      "loss": 2.8246,
      "step": 140205
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.8140511512756348,
      "learning_rate": 0.00019968762453844013,
      "loss": 2.9564,
      "step": 140206
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5545146465301514,
      "learning_rate": 0.00019968376943730308,
      "loss": 3.06,
      "step": 140207
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.268263578414917,
      "learning_rate": 0.00019967991435481655,
      "loss": 3.0698,
      "step": 140208
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.954313278198242,
      "learning_rate": 0.00019967605929098144,
      "loss": 3.0615,
      "step": 140209
    },
    {
      "epoch": 1.83,
      "grad_norm": 5.122531414031982,
      "learning_rate": 0.00019967220424579844,
      "loss": 3.0331,
      "step": 140210
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9653971195220947,
      "learning_rate": 0.00019966834921926826,
      "loss": 2.8338,
      "step": 140211
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4503073692321777,
      "learning_rate": 0.00019966449421139162,
      "loss": 3.3252,
      "step": 140212
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.749018430709839,
      "learning_rate": 0.00019966063922216936,
      "loss": 2.7136,
      "step": 140213
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.0202300548553467,
      "learning_rate": 0.00019965678425160201,
      "loss": 2.9921,
      "step": 140214
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.518399715423584,
      "learning_rate": 0.00019965292929969032,
      "loss": 3.1274,
      "step": 140215
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1742138862609863,
      "learning_rate": 0.00019964907436643506,
      "loss": 2.7754,
      "step": 140216
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.141615390777588,
      "learning_rate": 0.00019964521945183691,
      "loss": 3.1435,
      "step": 140217
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.6427485942840576,
      "learning_rate": 0.00019964136455589664,
      "loss": 2.86,
      "step": 140218
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.356886386871338,
      "learning_rate": 0.00019963750967861508,
      "loss": 2.9103,
      "step": 140219
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.870851993560791,
      "learning_rate": 0.00019963365481999264,
      "loss": 3.0791,
      "step": 140220
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.020589351654053,
      "learning_rate": 0.00019962979998003016,
      "loss": 2.805,
      "step": 140221
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5384464263916016,
      "learning_rate": 0.00019962594515872842,
      "loss": 2.9558,
      "step": 140222
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.42111349105835,
      "learning_rate": 0.00019962209035608814,
      "loss": 3.0338,
      "step": 140223
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.4174535274505615,
      "learning_rate": 0.00019961823557210998,
      "loss": 3.0684,
      "step": 140224
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.348047971725464,
      "learning_rate": 0.00019961438080679485,
      "loss": 3.1053,
      "step": 140225
    },
    {
      "epoch": 1.83,
      "grad_norm": 5.051072597503662,
      "learning_rate": 0.00019961052606014314,
      "loss": 2.8914,
      "step": 140226
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.519973039627075,
      "learning_rate": 0.00019960667133215573,
      "loss": 2.8163,
      "step": 140227
    },
    {
      "epoch": 1.83,
      "grad_norm": 5.9299635887146,
      "learning_rate": 0.00019960281662283335,
      "loss": 2.7179,
      "step": 140228
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.2182235717773438,
      "learning_rate": 0.00019959896193217668,
      "loss": 2.6619,
      "step": 140229
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1471784114837646,
      "learning_rate": 0.0001995951072601865,
      "loss": 2.9058,
      "step": 140230
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.6624979972839355,
      "learning_rate": 0.00019959125260686356,
      "loss": 2.8262,
      "step": 140231
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.222092628479004,
      "learning_rate": 0.0001995873979722084,
      "loss": 3.0678,
      "step": 140232
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.6241910457611084,
      "learning_rate": 0.0001995835433562218,
      "loss": 2.6262,
      "step": 140233
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8051937818527222,
      "learning_rate": 0.00019957968875890456,
      "loss": 2.9251,
      "step": 140234
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8013381958007812,
      "learning_rate": 0.00019957583418025735,
      "loss": 2.9597,
      "step": 140235
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.40498685836792,
      "learning_rate": 0.0001995719796202809,
      "loss": 2.962,
      "step": 140236
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7570431232452393,
      "learning_rate": 0.00019956812507897602,
      "loss": 3.083,
      "step": 140237
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.129476547241211,
      "learning_rate": 0.00019956427055634317,
      "loss": 3.0119,
      "step": 140238
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0885424613952637,
      "learning_rate": 0.00019956041605238327,
      "loss": 3.1695,
      "step": 140239
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9169714450836182,
      "learning_rate": 0.00019955656156709695,
      "loss": 2.9665,
      "step": 140240
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.146902084350586,
      "learning_rate": 0.00019955270710048497,
      "loss": 3.0322,
      "step": 140241
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.28418231010437,
      "learning_rate": 0.000199548852652548,
      "loss": 3.0178,
      "step": 140242
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.139249563217163,
      "learning_rate": 0.00019954499822328697,
      "loss": 2.8359,
      "step": 140243
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1237010955810547,
      "learning_rate": 0.00019954114381270227,
      "loss": 2.9983,
      "step": 140244
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2945363521575928,
      "learning_rate": 0.00019953728942079482,
      "loss": 3.0278,
      "step": 140245
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.8142945766448975,
      "learning_rate": 0.00019953343504756525,
      "loss": 2.789,
      "step": 140246
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3026535511016846,
      "learning_rate": 0.00019952958069301428,
      "loss": 2.9104,
      "step": 140247
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.024600028991699,
      "learning_rate": 0.00019952572635714268,
      "loss": 2.9142,
      "step": 140248
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.772012948989868,
      "learning_rate": 0.0001995218720399513,
      "loss": 3.0934,
      "step": 140249
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.13175368309021,
      "learning_rate": 0.00019951801774144052,
      "loss": 3.0026,
      "step": 140250
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.187242031097412,
      "learning_rate": 0.00019951416346161127,
      "loss": 3.1213,
      "step": 140251
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.942674160003662,
      "learning_rate": 0.00019951030920046422,
      "loss": 3.2113,
      "step": 140252
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.080641508102417,
      "learning_rate": 0.0001995064549580001,
      "loss": 2.925,
      "step": 140253
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.428614616394043,
      "learning_rate": 0.00019950260073421964,
      "loss": 3.0537,
      "step": 140254
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.06837797164917,
      "learning_rate": 0.00019949874652912364,
      "loss": 3.0662,
      "step": 140255
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.405184268951416,
      "learning_rate": 0.00019949489234271267,
      "loss": 3.1945,
      "step": 140256
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3907854557037354,
      "learning_rate": 0.00019949103817498744,
      "loss": 2.7861,
      "step": 140257
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.286050796508789,
      "learning_rate": 0.00019948718402594873,
      "loss": 2.8745,
      "step": 140258
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.9061193466186523,
      "learning_rate": 0.00019948332989559725,
      "loss": 2.7834,
      "step": 140259
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1892006397247314,
      "learning_rate": 0.0001994794757839337,
      "loss": 3.1064,
      "step": 140260
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1797924041748047,
      "learning_rate": 0.00019947562169095887,
      "loss": 2.8906,
      "step": 140261
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.5287113189697266,
      "learning_rate": 0.00019947176761667343,
      "loss": 2.8679,
      "step": 140262
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.141822338104248,
      "learning_rate": 0.0001994679135610781,
      "loss": 2.9014,
      "step": 140263
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.165736675262451,
      "learning_rate": 0.0001994640595241735,
      "loss": 3.0186,
      "step": 140264
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6093342304229736,
      "learning_rate": 0.00019946020550596044,
      "loss": 2.8836,
      "step": 140265
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5748207569122314,
      "learning_rate": 0.00019945635150643966,
      "loss": 2.9503,
      "step": 140266
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.530245780944824,
      "learning_rate": 0.00019945249752561177,
      "loss": 2.8237,
      "step": 140267
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2386231422424316,
      "learning_rate": 0.0001994486435634777,
      "loss": 3.1496,
      "step": 140268
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0242574214935303,
      "learning_rate": 0.0001994447896200379,
      "loss": 2.9586,
      "step": 140269
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.8092856407165527,
      "learning_rate": 0.00019944093569529325,
      "loss": 2.953,
      "step": 140270
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.0904808044433594,
      "learning_rate": 0.0001994370817892445,
      "loss": 3.1139,
      "step": 140271
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4154131412506104,
      "learning_rate": 0.0001994332279018922,
      "loss": 2.9487,
      "step": 140272
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1488022804260254,
      "learning_rate": 0.00019942937403323718,
      "loss": 2.81,
      "step": 140273
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.8234121799468994,
      "learning_rate": 0.0001994255201832802,
      "loss": 2.9793,
      "step": 140274
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.656947612762451,
      "learning_rate": 0.00019942166635202185,
      "loss": 2.7894,
      "step": 140275
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7501461505889893,
      "learning_rate": 0.0001994178125394629,
      "loss": 3.0459,
      "step": 140276
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2270543575286865,
      "learning_rate": 0.00019941395874560408,
      "loss": 3.3666,
      "step": 140277
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4844298362731934,
      "learning_rate": 0.00019941010497044617,
      "loss": 2.9409,
      "step": 140278
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.1045939922332764,
      "learning_rate": 0.00019940625121398975,
      "loss": 2.9116,
      "step": 140279
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2823402881622314,
      "learning_rate": 0.00019940239747623568,
      "loss": 2.9221,
      "step": 140280
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.021517038345337,
      "learning_rate": 0.00019939854375718453,
      "loss": 2.9695,
      "step": 140281
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.558881998062134,
      "learning_rate": 0.0001993946900568371,
      "loss": 3.0005,
      "step": 140282
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.013634443283081,
      "learning_rate": 0.00019939083637519412,
      "loss": 2.8921,
      "step": 140283
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.7572977542877197,
      "learning_rate": 0.00019938698271225626,
      "loss": 2.8517,
      "step": 140284
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9529842138290405,
      "learning_rate": 0.0001993831290680243,
      "loss": 2.9855,
      "step": 140285
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9375507831573486,
      "learning_rate": 0.00019937927544249893,
      "loss": 2.8081,
      "step": 140286
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.225611448287964,
      "learning_rate": 0.0001993754218356808,
      "loss": 2.9533,
      "step": 140287
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.606562852859497,
      "learning_rate": 0.0001993715682475707,
      "loss": 2.7847,
      "step": 140288
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.261406898498535,
      "learning_rate": 0.00019936771467816926,
      "loss": 2.9373,
      "step": 140289
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.210165023803711,
      "learning_rate": 0.00019936386112747728,
      "loss": 2.9201,
      "step": 140290
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7301712036132812,
      "learning_rate": 0.0001993600075954955,
      "loss": 2.9742,
      "step": 140291
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0639359951019287,
      "learning_rate": 0.0001993561540822247,
      "loss": 3.37,
      "step": 140292
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1784908771514893,
      "learning_rate": 0.00019935230058766538,
      "loss": 3.0448,
      "step": 140293
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1103322505950928,
      "learning_rate": 0.00019934844711181834,
      "loss": 3.2491,
      "step": 140294
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2037391662597656,
      "learning_rate": 0.0001993445936546843,
      "loss": 2.8146,
      "step": 140295
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0495026111602783,
      "learning_rate": 0.00019934074021626407,
      "loss": 2.9025,
      "step": 140296
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.524183511734009,
      "learning_rate": 0.00019933688679655824,
      "loss": 3.0639,
      "step": 140297
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.45003080368042,
      "learning_rate": 0.00019933303339556775,
      "loss": 2.9432,
      "step": 140298
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1865036487579346,
      "learning_rate": 0.00019932918001329297,
      "loss": 2.9837,
      "step": 140299
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2243175506591797,
      "learning_rate": 0.00019932532664973483,
      "loss": 2.7513,
      "step": 140300
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9638383388519287,
      "learning_rate": 0.00019932147330489403,
      "loss": 3.0286,
      "step": 140301
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.372697353363037,
      "learning_rate": 0.00019931761997877125,
      "loss": 3.1042,
      "step": 140302
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9921751022338867,
      "learning_rate": 0.00019931376667136721,
      "loss": 2.9122,
      "step": 140303
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9561492204666138,
      "learning_rate": 0.00019930991338268277,
      "loss": 2.972,
      "step": 140304
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.9147346019744873,
      "learning_rate": 0.00019930606011271838,
      "loss": 2.7446,
      "step": 140305
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.501889944076538,
      "learning_rate": 0.00019930220686147494,
      "loss": 2.6747,
      "step": 140306
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3739311695098877,
      "learning_rate": 0.00019929835362895312,
      "loss": 2.9034,
      "step": 140307
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3112354278564453,
      "learning_rate": 0.0001992945004151536,
      "loss": 2.7654,
      "step": 140308
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.5350921154022217,
      "learning_rate": 0.00019929064722007714,
      "loss": 2.8191,
      "step": 140309
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.682685613632202,
      "learning_rate": 0.00019928679404372456,
      "loss": 3.0906,
      "step": 140310
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.0102765560150146,
      "learning_rate": 0.0001992829408860964,
      "loss": 2.9592,
      "step": 140311
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.600213050842285,
      "learning_rate": 0.0001992790877471934,
      "loss": 2.7615,
      "step": 140312
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0481019020080566,
      "learning_rate": 0.00019927523462701633,
      "loss": 2.6846,
      "step": 140313
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0312724113464355,
      "learning_rate": 0.00019927138152556586,
      "loss": 3.0552,
      "step": 140314
    },
    {
      "epoch": 1.83,
      "grad_norm": 5.477151393890381,
      "learning_rate": 0.00019926752844284278,
      "loss": 3.0196,
      "step": 140315
    },
    {
      "epoch": 1.83,
      "grad_norm": 6.1838483810424805,
      "learning_rate": 0.00019926367537884788,
      "loss": 2.8567,
      "step": 140316
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.4272894859313965,
      "learning_rate": 0.00019925982233358165,
      "loss": 2.9025,
      "step": 140317
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3906171321868896,
      "learning_rate": 0.0001992559693070449,
      "loss": 2.9734,
      "step": 140318
    },
    {
      "epoch": 1.83,
      "grad_norm": 5.7771687507629395,
      "learning_rate": 0.0001992521162992384,
      "loss": 3.1693,
      "step": 140319
    },
    {
      "epoch": 1.83,
      "grad_norm": 5.5161356925964355,
      "learning_rate": 0.0001992482633101628,
      "loss": 2.8064,
      "step": 140320
    },
    {
      "epoch": 1.83,
      "grad_norm": 6.991918087005615,
      "learning_rate": 0.00019924441033981888,
      "loss": 3.0397,
      "step": 140321
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9719555377960205,
      "learning_rate": 0.00019924055738820742,
      "loss": 3.1224,
      "step": 140322
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.106333017349243,
      "learning_rate": 0.00019923670445532894,
      "loss": 2.9881,
      "step": 140323
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.4837474822998047,
      "learning_rate": 0.00019923285154118422,
      "loss": 2.8761,
      "step": 140324
    },
    {
      "epoch": 1.83,
      "grad_norm": 5.020177841186523,
      "learning_rate": 0.0001992289986457741,
      "loss": 3.0036,
      "step": 140325
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9780336618423462,
      "learning_rate": 0.0001992251457690991,
      "loss": 2.8601,
      "step": 140326
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.969709038734436,
      "learning_rate": 0.0001992212929111601,
      "loss": 3.0026,
      "step": 140327
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.188204288482666,
      "learning_rate": 0.0001992174400719578,
      "loss": 2.9806,
      "step": 140328
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0200235843658447,
      "learning_rate": 0.000199213587251493,
      "loss": 2.7548,
      "step": 140329
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.216465950012207,
      "learning_rate": 0.0001992097344497661,
      "loss": 3.2506,
      "step": 140330
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1147491931915283,
      "learning_rate": 0.00019920588166677807,
      "loss": 3.036,
      "step": 140331
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.882920742034912,
      "learning_rate": 0.00019920202890252953,
      "loss": 2.9903,
      "step": 140332
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.053464412689209,
      "learning_rate": 0.00019919817615702133,
      "loss": 3.0637,
      "step": 140333
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9838836193084717,
      "learning_rate": 0.000199194323430254,
      "loss": 3.0309,
      "step": 140334
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1407337188720703,
      "learning_rate": 0.0001991904707222285,
      "loss": 2.9607,
      "step": 140335
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5148258209228516,
      "learning_rate": 0.00019918661803294526,
      "loss": 3.0265,
      "step": 140336
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.196629524230957,
      "learning_rate": 0.00019918276536240513,
      "loss": 3.3643,
      "step": 140337
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2860591411590576,
      "learning_rate": 0.00019917891271060883,
      "loss": 2.959,
      "step": 140338
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1271209716796875,
      "learning_rate": 0.00019917506007755708,
      "loss": 2.9394,
      "step": 140339
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3202216625213623,
      "learning_rate": 0.00019917120746325062,
      "loss": 3.0087,
      "step": 140340
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0682551860809326,
      "learning_rate": 0.0001991673548676902,
      "loss": 3.2353,
      "step": 140341
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.822267532348633,
      "learning_rate": 0.00019916350229087638,
      "loss": 3.0169,
      "step": 140342
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2017982006073,
      "learning_rate": 0.00019915964973280998,
      "loss": 2.8851,
      "step": 140343
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.948869228363037,
      "learning_rate": 0.00019915579719349168,
      "loss": 3.0186,
      "step": 140344
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3897452354431152,
      "learning_rate": 0.00019915194467292222,
      "loss": 3.2277,
      "step": 140345
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.398258686065674,
      "learning_rate": 0.00019914809217110233,
      "loss": 2.9098,
      "step": 140346
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0102779865264893,
      "learning_rate": 0.00019914423968803277,
      "loss": 2.9691,
      "step": 140347
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.015659809112549,
      "learning_rate": 0.0001991403872237142,
      "loss": 2.8618,
      "step": 140348
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0793697834014893,
      "learning_rate": 0.00019913653477814731,
      "loss": 3.0432,
      "step": 140349
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9265247583389282,
      "learning_rate": 0.00019913268235133278,
      "loss": 3.2662,
      "step": 140350
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.174363374710083,
      "learning_rate": 0.00019912882994327138,
      "loss": 2.9409,
      "step": 140351
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.375025987625122,
      "learning_rate": 0.0001991249775539639,
      "loss": 2.8978,
      "step": 140352
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9772154092788696,
      "learning_rate": 0.00019912112518341102,
      "loss": 2.9936,
      "step": 140353
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1427228450775146,
      "learning_rate": 0.00019911727283161338,
      "loss": 2.864,
      "step": 140354
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9243497848510742,
      "learning_rate": 0.0001991134204985718,
      "loss": 2.9626,
      "step": 140355
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.319298028945923,
      "learning_rate": 0.00019910956818428686,
      "loss": 3.0207,
      "step": 140356
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6798501014709473,
      "learning_rate": 0.00019910571588875936,
      "loss": 3.1646,
      "step": 140357
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3425841331481934,
      "learning_rate": 0.00019910186361199003,
      "loss": 3.1426,
      "step": 140358
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1129281520843506,
      "learning_rate": 0.00019909801135397964,
      "loss": 3.0772,
      "step": 140359
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9394694566726685,
      "learning_rate": 0.00019909415911472872,
      "loss": 2.9877,
      "step": 140360
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.76827073097229,
      "learning_rate": 0.00019909030689423812,
      "loss": 3.1839,
      "step": 140361
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.263913154602051,
      "learning_rate": 0.0001990864546925086,
      "loss": 2.9267,
      "step": 140362
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.949283480644226,
      "learning_rate": 0.00019908260250954082,
      "loss": 2.8325,
      "step": 140363
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.07629132270813,
      "learning_rate": 0.00019907875034533542,
      "loss": 3.0399,
      "step": 140364
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.320096731185913,
      "learning_rate": 0.00019907489819989327,
      "loss": 3.2594,
      "step": 140365
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2979140281677246,
      "learning_rate": 0.0001990710460732149,
      "loss": 2.6864,
      "step": 140366
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.295516014099121,
      "learning_rate": 0.0001990671939653012,
      "loss": 3.3527,
      "step": 140367
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.636791467666626,
      "learning_rate": 0.00019906334187615271,
      "loss": 2.9099,
      "step": 140368
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.126537561416626,
      "learning_rate": 0.00019905948980577034,
      "loss": 2.8072,
      "step": 140369
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.347910165786743,
      "learning_rate": 0.00019905563775415475,
      "loss": 2.9925,
      "step": 140370
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.155003547668457,
      "learning_rate": 0.0001990517857213066,
      "loss": 3.0109,
      "step": 140371
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7860264778137207,
      "learning_rate": 0.00019904793370722663,
      "loss": 3.0234,
      "step": 140372
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.629756450653076,
      "learning_rate": 0.0001990440817119155,
      "loss": 3.0951,
      "step": 140373
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3468017578125,
      "learning_rate": 0.00019904022973537397,
      "loss": 3.0968,
      "step": 140374
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2894349098205566,
      "learning_rate": 0.00019903637777760283,
      "loss": 3.2376,
      "step": 140375
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.944668769836426,
      "learning_rate": 0.00019903252583860264,
      "loss": 3.0806,
      "step": 140376
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.294455051422119,
      "learning_rate": 0.0001990286739183744,
      "loss": 3.0205,
      "step": 140377
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.115746259689331,
      "learning_rate": 0.0001990248220169185,
      "loss": 3.0598,
      "step": 140378
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.8570921421051025,
      "learning_rate": 0.00019902097013423578,
      "loss": 2.7525,
      "step": 140379
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6113698482513428,
      "learning_rate": 0.000199017118270327,
      "loss": 2.9086,
      "step": 140380
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8846595287322998,
      "learning_rate": 0.00019901326642519283,
      "loss": 3.0738,
      "step": 140381
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9615600109100342,
      "learning_rate": 0.00019900941459883396,
      "loss": 3.061,
      "step": 140382
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0001957416534424,
      "learning_rate": 0.00019900556279125133,
      "loss": 2.8827,
      "step": 140383
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7701053619384766,
      "learning_rate": 0.00019900171100244532,
      "loss": 2.9445,
      "step": 140384
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9150216579437256,
      "learning_rate": 0.0001989978592324168,
      "loss": 3.2324,
      "step": 140385
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.577214479446411,
      "learning_rate": 0.00019899400748116648,
      "loss": 2.9349,
      "step": 140386
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1769156455993652,
      "learning_rate": 0.00019899015574869505,
      "loss": 2.8213,
      "step": 140387
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7581777572631836,
      "learning_rate": 0.00019898630403500332,
      "loss": 2.7768,
      "step": 140388
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.982333779335022,
      "learning_rate": 0.00019898245234009206,
      "loss": 2.989,
      "step": 140389
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.38567852973938,
      "learning_rate": 0.00019897860066396168,
      "loss": 2.9565,
      "step": 140390
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.275336980819702,
      "learning_rate": 0.00019897474900661314,
      "loss": 3.066,
      "step": 140391
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.688364028930664,
      "learning_rate": 0.00019897089736804709,
      "loss": 2.9371,
      "step": 140392
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.8436970710754395,
      "learning_rate": 0.0001989670457482643,
      "loss": 2.7121,
      "step": 140393
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.688023805618286,
      "learning_rate": 0.00019896319414726535,
      "loss": 2.6997,
      "step": 140394
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.08125901222229,
      "learning_rate": 0.0001989593425650511,
      "loss": 2.9046,
      "step": 140395
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9769501686096191,
      "learning_rate": 0.00019895549100162234,
      "loss": 2.7751,
      "step": 140396
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.113243341445923,
      "learning_rate": 0.00019895163945697955,
      "loss": 3.1358,
      "step": 140397
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2335920333862305,
      "learning_rate": 0.00019894778793112352,
      "loss": 2.868,
      "step": 140398
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8792853355407715,
      "learning_rate": 0.00019894393642405503,
      "loss": 3.1166,
      "step": 140399
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.484509229660034,
      "learning_rate": 0.00019894008493577477,
      "loss": 2.9946,
      "step": 140400
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0048465728759766,
      "learning_rate": 0.00019893623346628348,
      "loss": 2.8886,
      "step": 140401
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3682754039764404,
      "learning_rate": 0.0001989323820155819,
      "loss": 3.033,
      "step": 140402
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.193125009536743,
      "learning_rate": 0.00019892853058367063,
      "loss": 2.9152,
      "step": 140403
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7842211723327637,
      "learning_rate": 0.00019892467917055048,
      "loss": 3.0582,
      "step": 140404
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9654895067214966,
      "learning_rate": 0.00019892082777622208,
      "loss": 2.8982,
      "step": 140405
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0261428356170654,
      "learning_rate": 0.00019891697640068623,
      "loss": 2.7445,
      "step": 140406
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1482479572296143,
      "learning_rate": 0.00019891312504394362,
      "loss": 3.2263,
      "step": 140407
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.179669141769409,
      "learning_rate": 0.0001989092737059951,
      "loss": 3.0686,
      "step": 140408
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9723620414733887,
      "learning_rate": 0.00019890542238684114,
      "loss": 2.9902,
      "step": 140409
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5595662593841553,
      "learning_rate": 0.00019890157108648257,
      "loss": 2.9483,
      "step": 140410
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3154749870300293,
      "learning_rate": 0.00019889771980492009,
      "loss": 3.1826,
      "step": 140411
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9648109674453735,
      "learning_rate": 0.0001988938685421544,
      "loss": 2.9475,
      "step": 140412
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4133870601654053,
      "learning_rate": 0.0001988900172981863,
      "loss": 3.0501,
      "step": 140413
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5482020378112793,
      "learning_rate": 0.00019888616607301656,
      "loss": 2.9998,
      "step": 140414
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.905834197998047,
      "learning_rate": 0.0001988823148666457,
      "loss": 2.9706,
      "step": 140415
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2073521614074707,
      "learning_rate": 0.00019887846367907448,
      "loss": 2.9096,
      "step": 140416
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8625173568725586,
      "learning_rate": 0.0001988746125103037,
      "loss": 2.8002,
      "step": 140417
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.352665424346924,
      "learning_rate": 0.00019887076136033396,
      "loss": 2.9506,
      "step": 140418
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.476712465286255,
      "learning_rate": 0.00019886691022916615,
      "loss": 3.1187,
      "step": 140419
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4307048320770264,
      "learning_rate": 0.00019886305911680097,
      "loss": 3.0883,
      "step": 140420
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3506555557250977,
      "learning_rate": 0.00019885920802323895,
      "loss": 2.9016,
      "step": 140421
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.26245379447937,
      "learning_rate": 0.00019885535694848087,
      "loss": 2.7116,
      "step": 140422
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9738290309906006,
      "learning_rate": 0.00019885150589252755,
      "loss": 2.8989,
      "step": 140423
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.189948558807373,
      "learning_rate": 0.0001988476548553796,
      "loss": 2.9178,
      "step": 140424
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.667600393295288,
      "learning_rate": 0.00019884380383703782,
      "loss": 2.9876,
      "step": 140425
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6605184078216553,
      "learning_rate": 0.00019883995283750296,
      "loss": 3.2849,
      "step": 140426
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9156075716018677,
      "learning_rate": 0.0001988361018567756,
      "loss": 3.0566,
      "step": 140427
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8743984699249268,
      "learning_rate": 0.00019883225089485643,
      "loss": 2.7972,
      "step": 140428
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0552077293395996,
      "learning_rate": 0.0001988283999517463,
      "loss": 3.3453,
      "step": 140429
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1387882232666016,
      "learning_rate": 0.0001988245490274459,
      "loss": 3.0316,
      "step": 140430
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8724486827850342,
      "learning_rate": 0.0001988206981219559,
      "loss": 3.0463,
      "step": 140431
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2350504398345947,
      "learning_rate": 0.00019881684723527712,
      "loss": 2.9017,
      "step": 140432
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2844436168670654,
      "learning_rate": 0.0001988129963674102,
      "loss": 3.0718,
      "step": 140433
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2851955890655518,
      "learning_rate": 0.0001988091455183558,
      "loss": 2.992,
      "step": 140434
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1193127632141113,
      "learning_rate": 0.00019880529468811468,
      "loss": 2.8901,
      "step": 140435
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4452474117279053,
      "learning_rate": 0.00019880144387668755,
      "loss": 2.959,
      "step": 140436
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.388380765914917,
      "learning_rate": 0.00019879759308407517,
      "loss": 3.1094,
      "step": 140437
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.268782377243042,
      "learning_rate": 0.00019879374231027827,
      "loss": 3.0559,
      "step": 140438
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.749197006225586,
      "learning_rate": 0.00019878989155529744,
      "loss": 2.8991,
      "step": 140439
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.9068052768707275,
      "learning_rate": 0.00019878604081913359,
      "loss": 3.0367,
      "step": 140440
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6372339725494385,
      "learning_rate": 0.0001987821901017872,
      "loss": 2.9225,
      "step": 140441
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9272208213806152,
      "learning_rate": 0.00019877833940325915,
      "loss": 3.0956,
      "step": 140442
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1091160774230957,
      "learning_rate": 0.00019877448872355013,
      "loss": 2.9529,
      "step": 140443
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.553199052810669,
      "learning_rate": 0.00019877063806266092,
      "loss": 3.1231,
      "step": 140444
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.064263105392456,
      "learning_rate": 0.00019876678742059204,
      "loss": 2.8265,
      "step": 140445
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.176215648651123,
      "learning_rate": 0.00019876293679734437,
      "loss": 2.8105,
      "step": 140446
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2954442501068115,
      "learning_rate": 0.00019875908619291865,
      "loss": 2.9691,
      "step": 140447
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.2586216926574707,
      "learning_rate": 0.00019875523560731543,
      "loss": 3.2578,
      "step": 140448
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9832185506820679,
      "learning_rate": 0.00019875138504053553,
      "loss": 2.7116,
      "step": 140449
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4580891132354736,
      "learning_rate": 0.0001987475344925797,
      "loss": 3.2326,
      "step": 140450
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4734675884246826,
      "learning_rate": 0.0001987436839634486,
      "loss": 2.9188,
      "step": 140451
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3892266750335693,
      "learning_rate": 0.00019873983345314298,
      "loss": 3.2064,
      "step": 140452
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.391911268234253,
      "learning_rate": 0.00019873598296166344,
      "loss": 2.9369,
      "step": 140453
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0439021587371826,
      "learning_rate": 0.0001987321324890109,
      "loss": 3.0084,
      "step": 140454
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5899851322174072,
      "learning_rate": 0.00019872828203518594,
      "loss": 2.9662,
      "step": 140455
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.8360393047332764,
      "learning_rate": 0.00019872443160018935,
      "loss": 3.0525,
      "step": 140456
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.207207441329956,
      "learning_rate": 0.00019872058118402172,
      "loss": 2.8945,
      "step": 140457
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1151928901672363,
      "learning_rate": 0.0001987167307866839,
      "loss": 2.8742,
      "step": 140458
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2601699829101562,
      "learning_rate": 0.0001987128804081765,
      "loss": 3.0537,
      "step": 140459
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.8115592002868652,
      "learning_rate": 0.0001987090300485003,
      "loss": 3.0129,
      "step": 140460
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3159682750701904,
      "learning_rate": 0.000198705179707656,
      "loss": 3.0983,
      "step": 140461
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.633620262145996,
      "learning_rate": 0.00019870132938564438,
      "loss": 2.8539,
      "step": 140462
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2411367893218994,
      "learning_rate": 0.0001986974790824661,
      "loss": 3.1149,
      "step": 140463
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5455775260925293,
      "learning_rate": 0.0001986936287981218,
      "loss": 3.0883,
      "step": 140464
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6694087982177734,
      "learning_rate": 0.00019868977853261226,
      "loss": 2.8951,
      "step": 140465
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2376437187194824,
      "learning_rate": 0.00019868592828593823,
      "loss": 2.8226,
      "step": 140466
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.200990676879883,
      "learning_rate": 0.00019868207805810034,
      "loss": 2.9762,
      "step": 140467
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.8559398651123047,
      "learning_rate": 0.00019867822784909944,
      "loss": 2.8411,
      "step": 140468
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.353905200958252,
      "learning_rate": 0.00019867437765893624,
      "loss": 2.7944,
      "step": 140469
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.293783187866211,
      "learning_rate": 0.00019867052748761128,
      "loss": 3.1796,
      "step": 140470
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2962570190429688,
      "learning_rate": 0.0001986666773351254,
      "loss": 2.9148,
      "step": 140471
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.2402729988098145,
      "learning_rate": 0.00019866282720147927,
      "loss": 2.7209,
      "step": 140472
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.6618869304656982,
      "learning_rate": 0.00019865897708667364,
      "loss": 2.7655,
      "step": 140473
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8278840780258179,
      "learning_rate": 0.00019865512699070924,
      "loss": 2.7578,
      "step": 140474
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.120526075363159,
      "learning_rate": 0.0001986512769135869,
      "loss": 3.0311,
      "step": 140475
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.045058250427246,
      "learning_rate": 0.00019864742685530703,
      "loss": 2.9766,
      "step": 140476
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2390670776367188,
      "learning_rate": 0.00019864357681587051,
      "loss": 3.0467,
      "step": 140477
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8787437677383423,
      "learning_rate": 0.00019863972679527812,
      "loss": 3.0953,
      "step": 140478
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3740456104278564,
      "learning_rate": 0.0001986358767935305,
      "loss": 3.0334,
      "step": 140479
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.150249481201172,
      "learning_rate": 0.00019863202681062836,
      "loss": 3.2343,
      "step": 140480
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4375901222229004,
      "learning_rate": 0.00019862817684657255,
      "loss": 3.002,
      "step": 140481
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.9094951152801514,
      "learning_rate": 0.0001986243269013636,
      "loss": 2.892,
      "step": 140482
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8115209341049194,
      "learning_rate": 0.00019862047697500224,
      "loss": 2.8705,
      "step": 140483
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.679854154586792,
      "learning_rate": 0.00019861662706748933,
      "loss": 2.8008,
      "step": 140484
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.551420211791992,
      "learning_rate": 0.00019861277717882542,
      "loss": 2.9379,
      "step": 140485
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2303316593170166,
      "learning_rate": 0.00019860892730901136,
      "loss": 2.8812,
      "step": 140486
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.597982168197632,
      "learning_rate": 0.00019860507745804792,
      "loss": 2.9254,
      "step": 140487
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6415140628814697,
      "learning_rate": 0.0001986012276259356,
      "loss": 2.8271,
      "step": 140488
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.318969964981079,
      "learning_rate": 0.00019859737781267522,
      "loss": 2.8982,
      "step": 140489
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4194998741149902,
      "learning_rate": 0.00019859352801826748,
      "loss": 3.0298,
      "step": 140490
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7980329990386963,
      "learning_rate": 0.00019858967824271315,
      "loss": 2.998,
      "step": 140491
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.553439140319824,
      "learning_rate": 0.00019858582848601288,
      "loss": 2.9676,
      "step": 140492
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4276413917541504,
      "learning_rate": 0.00019858197874816758,
      "loss": 3.0165,
      "step": 140493
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6991822719573975,
      "learning_rate": 0.00019857812902917765,
      "loss": 2.9067,
      "step": 140494
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.213792562484741,
      "learning_rate": 0.00019857427932904397,
      "loss": 3.0786,
      "step": 140495
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.77004075050354,
      "learning_rate": 0.00019857042964776725,
      "loss": 3.103,
      "step": 140496
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.634404182434082,
      "learning_rate": 0.00019856657998534822,
      "loss": 2.7368,
      "step": 140497
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0239970684051514,
      "learning_rate": 0.00019856273034178756,
      "loss": 3.1866,
      "step": 140498
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1135547161102295,
      "learning_rate": 0.0001985588807170861,
      "loss": 3.0078,
      "step": 140499
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.716888904571533,
      "learning_rate": 0.00019855503111124438,
      "loss": 3.0099,
      "step": 140500
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.762077569961548,
      "learning_rate": 0.00019855118152426317,
      "loss": 2.6574,
      "step": 140501
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.335672616958618,
      "learning_rate": 0.0001985473319561432,
      "loss": 2.9499,
      "step": 140502
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.8244071006774902,
      "learning_rate": 0.00019854348240688522,
      "loss": 2.8688,
      "step": 140503
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2256715297698975,
      "learning_rate": 0.0001985396328764899,
      "loss": 3.2317,
      "step": 140504
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.422151803970337,
      "learning_rate": 0.0001985357833649581,
      "loss": 3.0371,
      "step": 140505
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.9739692211151123,
      "learning_rate": 0.00019853193387229034,
      "loss": 3.1129,
      "step": 140506
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.7151622772216797,
      "learning_rate": 0.00019852808439848737,
      "loss": 2.9411,
      "step": 140507
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4642064571380615,
      "learning_rate": 0.00019852423494354994,
      "loss": 3.0168,
      "step": 140508
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.903102159500122,
      "learning_rate": 0.00019852038550747877,
      "loss": 2.8748,
      "step": 140509
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.7405409812927246,
      "learning_rate": 0.00019851653609027458,
      "loss": 2.9525,
      "step": 140510
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.641954183578491,
      "learning_rate": 0.0001985126866919382,
      "loss": 2.9844,
      "step": 140511
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.310300588607788,
      "learning_rate": 0.0001985088373124701,
      "loss": 3.0487,
      "step": 140512
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1528921127319336,
      "learning_rate": 0.00019850498795187115,
      "loss": 2.7322,
      "step": 140513
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.9539237022399902,
      "learning_rate": 0.00019850113861014202,
      "loss": 3.0703,
      "step": 140514
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0514156818389893,
      "learning_rate": 0.00019849728928728345,
      "loss": 2.8895,
      "step": 140515
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.347893476486206,
      "learning_rate": 0.00019849343998329613,
      "loss": 3.0057,
      "step": 140516
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.949613094329834,
      "learning_rate": 0.00019848959069818092,
      "loss": 3.1784,
      "step": 140517
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.969573974609375,
      "learning_rate": 0.00019848574143193831,
      "loss": 2.9983,
      "step": 140518
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7951815128326416,
      "learning_rate": 0.00019848189218456914,
      "loss": 2.9498,
      "step": 140519
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.944086790084839,
      "learning_rate": 0.00019847804295607406,
      "loss": 2.8437,
      "step": 140520
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1037042140960693,
      "learning_rate": 0.00019847419374645386,
      "loss": 2.9261,
      "step": 140521
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.262502908706665,
      "learning_rate": 0.0001984703445557092,
      "loss": 3.2328,
      "step": 140522
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.274059772491455,
      "learning_rate": 0.0001984664953838409,
      "loss": 2.9952,
      "step": 140523
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2054224014282227,
      "learning_rate": 0.00019846264623084948,
      "loss": 2.9363,
      "step": 140524
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.590625762939453,
      "learning_rate": 0.0001984587970967359,
      "loss": 2.8428,
      "step": 140525
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1705434322357178,
      "learning_rate": 0.00019845494798150063,
      "loss": 2.7566,
      "step": 140526
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7056407928466797,
      "learning_rate": 0.00019845109888514453,
      "loss": 2.9671,
      "step": 140527
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.912391185760498,
      "learning_rate": 0.00019844724980766832,
      "loss": 3.1311,
      "step": 140528
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1484460830688477,
      "learning_rate": 0.00019844340074907262,
      "loss": 2.8813,
      "step": 140529
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6597108840942383,
      "learning_rate": 0.0001984395517093583,
      "loss": 2.9758,
      "step": 140530
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0843241214752197,
      "learning_rate": 0.00019843570268852592,
      "loss": 2.9439,
      "step": 140531
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.7437803745269775,
      "learning_rate": 0.00019843185368657634,
      "loss": 3.1243,
      "step": 140532
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.6300454139709473,
      "learning_rate": 0.0001984280047035101,
      "loss": 2.8774,
      "step": 140533
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.1230297088623047,
      "learning_rate": 0.00019842415573932803,
      "loss": 2.8735,
      "step": 140534
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.203664779663086,
      "learning_rate": 0.00019842030679403081,
      "loss": 2.9198,
      "step": 140535
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.122222661972046,
      "learning_rate": 0.00019841645786761926,
      "loss": 3.0137,
      "step": 140536
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.9058594703674316,
      "learning_rate": 0.00019841260896009394,
      "loss": 2.7654,
      "step": 140537
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.39835786819458,
      "learning_rate": 0.00019840876007145563,
      "loss": 3.2553,
      "step": 140538
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2424283027648926,
      "learning_rate": 0.00019840491120170513,
      "loss": 3.0314,
      "step": 140539
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9010305404663086,
      "learning_rate": 0.000198401062350843,
      "loss": 2.9639,
      "step": 140540
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.1628849506378174,
      "learning_rate": 0.00019839721351887002,
      "loss": 3.1773,
      "step": 140541
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.2388927936553955,
      "learning_rate": 0.00019839336470578697,
      "loss": 3.0509,
      "step": 140542
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.996439456939697,
      "learning_rate": 0.0001983895159115945,
      "loss": 3.1241,
      "step": 140543
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3407881259918213,
      "learning_rate": 0.0001983856671362933,
      "loss": 3.2407,
      "step": 140544
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4005932807922363,
      "learning_rate": 0.00019838181837988412,
      "loss": 2.938,
      "step": 140545
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.305779457092285,
      "learning_rate": 0.0001983779696423677,
      "loss": 2.7363,
      "step": 140546
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.641044855117798,
      "learning_rate": 0.00019837412092374473,
      "loss": 3.115,
      "step": 140547
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3415141105651855,
      "learning_rate": 0.000198370272224016,
      "loss": 2.9515,
      "step": 140548
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0393528938293457,
      "learning_rate": 0.00019836642354318203,
      "loss": 3.0929,
      "step": 140549
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.234879493713379,
      "learning_rate": 0.0001983625748812437,
      "loss": 2.8476,
      "step": 140550
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7671961784362793,
      "learning_rate": 0.00019835872623820167,
      "loss": 3.0916,
      "step": 140551
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.613931655883789,
      "learning_rate": 0.0001983548776140567,
      "loss": 2.6152,
      "step": 140552
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.8469505310058594,
      "learning_rate": 0.00019835102900880947,
      "loss": 2.7414,
      "step": 140553
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.023007869720459,
      "learning_rate": 0.0001983471804224608,
      "loss": 3.0112,
      "step": 140554
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.241473436355591,
      "learning_rate": 0.0001983433318550112,
      "loss": 2.7945,
      "step": 140555
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.8041646480560303,
      "learning_rate": 0.0001983394833064615,
      "loss": 3.088,
      "step": 140556
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.8460264205932617,
      "learning_rate": 0.0001983356347768124,
      "loss": 2.8189,
      "step": 140557
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.7913905382156372,
      "learning_rate": 0.00019833178626606465,
      "loss": 2.7464,
      "step": 140558
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2633960247039795,
      "learning_rate": 0.00019832793777421892,
      "loss": 2.928,
      "step": 140559
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.5369338989257812,
      "learning_rate": 0.00019832408930127606,
      "loss": 2.8794,
      "step": 140560
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.0186924934387207,
      "learning_rate": 0.00019832024084723654,
      "loss": 2.8147,
      "step": 140561
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.1670947074890137,
      "learning_rate": 0.00019831639241210124,
      "loss": 3.0919,
      "step": 140562
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0606882572174072,
      "learning_rate": 0.00019831254399587083,
      "loss": 3.1622,
      "step": 140563
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0834109783172607,
      "learning_rate": 0.00019830869559854603,
      "loss": 2.7483,
      "step": 140564
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.9033267498016357,
      "learning_rate": 0.00019830484722012758,
      "loss": 3.1044,
      "step": 140565
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7091405391693115,
      "learning_rate": 0.00019830099886061628,
      "loss": 2.8194,
      "step": 140566
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.4359962940216064,
      "learning_rate": 0.00019829715052001263,
      "loss": 2.9003,
      "step": 140567
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8961284160614014,
      "learning_rate": 0.00019829330219831744,
      "loss": 2.9707,
      "step": 140568
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.593817710876465,
      "learning_rate": 0.00019828945389553147,
      "loss": 3.0449,
      "step": 140569
    },
    {
      "epoch": 1.83,
      "grad_norm": 5.337581157684326,
      "learning_rate": 0.00019828560561165542,
      "loss": 2.7255,
      "step": 140570
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0253090858459473,
      "learning_rate": 0.00019828175734668997,
      "loss": 2.7619,
      "step": 140571
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3237764835357666,
      "learning_rate": 0.000198277909100636,
      "loss": 2.8557,
      "step": 140572
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.099729061126709,
      "learning_rate": 0.00019827406087349393,
      "loss": 3.1468,
      "step": 140573
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.500182867050171,
      "learning_rate": 0.0001982702126652647,
      "loss": 3.0265,
      "step": 140574
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.693172454833984,
      "learning_rate": 0.00019826636447594894,
      "loss": 2.864,
      "step": 140575
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.786987066268921,
      "learning_rate": 0.00019826251630554733,
      "loss": 3.1265,
      "step": 140576
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.107729911804199,
      "learning_rate": 0.00019825866815406066,
      "loss": 3.1676,
      "step": 140577
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.825032114982605,
      "learning_rate": 0.00019825482002148976,
      "loss": 3.0912,
      "step": 140578
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2475879192352295,
      "learning_rate": 0.00019825097190783508,
      "loss": 2.8221,
      "step": 140579
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.376826763153076,
      "learning_rate": 0.0001982471238130975,
      "loss": 2.8883,
      "step": 140580
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2352521419525146,
      "learning_rate": 0.00019824327573727762,
      "loss": 3.1737,
      "step": 140581
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6911587715148926,
      "learning_rate": 0.0001982394276803763,
      "loss": 2.9525,
      "step": 140582
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.276444911956787,
      "learning_rate": 0.00019823557964239417,
      "loss": 2.9737,
      "step": 140583
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6105315685272217,
      "learning_rate": 0.00019823173162333211,
      "loss": 3.0784,
      "step": 140584
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.9627034664154053,
      "learning_rate": 0.00019822788362319053,
      "loss": 3.065,
      "step": 140585
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.138089656829834,
      "learning_rate": 0.00019822403564197032,
      "loss": 2.9403,
      "step": 140586
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4302680492401123,
      "learning_rate": 0.00019822018767967224,
      "loss": 3.1248,
      "step": 140587
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3097572326660156,
      "learning_rate": 0.00019821633973629688,
      "loss": 2.9447,
      "step": 140588
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.112269401550293,
      "learning_rate": 0.00019821249181184507,
      "loss": 2.9808,
      "step": 140589
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.158114194869995,
      "learning_rate": 0.0001982086439063176,
      "loss": 3.1365,
      "step": 140590
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4073379039764404,
      "learning_rate": 0.0001982047960197149,
      "loss": 2.9174,
      "step": 140591
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.795527458190918,
      "learning_rate": 0.0001982009481520379,
      "loss": 2.7645,
      "step": 140592
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8989102840423584,
      "learning_rate": 0.0001981971003032872,
      "loss": 2.9784,
      "step": 140593
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0478358268737793,
      "learning_rate": 0.00019819325247346362,
      "loss": 3.1805,
      "step": 140594
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7984731197357178,
      "learning_rate": 0.00019818940466256784,
      "loss": 3.0666,
      "step": 140595
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3963522911071777,
      "learning_rate": 0.00019818555687060055,
      "loss": 3.0127,
      "step": 140596
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.4600648880004883,
      "learning_rate": 0.00019818170909756262,
      "loss": 3.002,
      "step": 140597
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8502871990203857,
      "learning_rate": 0.00019817786134345453,
      "loss": 2.8312,
      "step": 140598
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4412648677825928,
      "learning_rate": 0.00019817401360827707,
      "loss": 3.0672,
      "step": 140599
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2999494075775146,
      "learning_rate": 0.000198170165892031,
      "loss": 2.8086,
      "step": 140600
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9801902770996094,
      "learning_rate": 0.00019816631819471702,
      "loss": 2.7017,
      "step": 140601
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4438998699188232,
      "learning_rate": 0.00019816247051633585,
      "loss": 3.0024,
      "step": 140602
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2094216346740723,
      "learning_rate": 0.00019815862285688825,
      "loss": 3.0266,
      "step": 140603
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.025326728820801,
      "learning_rate": 0.00019815477521637487,
      "loss": 2.977,
      "step": 140604
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.552657127380371,
      "learning_rate": 0.00019815092759479638,
      "loss": 3.1273,
      "step": 140605
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9859592914581299,
      "learning_rate": 0.00019814707999215355,
      "loss": 2.974,
      "step": 140606
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4585556983947754,
      "learning_rate": 0.0001981432324084471,
      "loss": 2.9525,
      "step": 140607
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.811556100845337,
      "learning_rate": 0.0001981393848436778,
      "loss": 3.0207,
      "step": 140608
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9086406230926514,
      "learning_rate": 0.0001981355372978464,
      "loss": 2.7497,
      "step": 140609
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3198904991149902,
      "learning_rate": 0.00019813168977095337,
      "loss": 3.0868,
      "step": 140610
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.10402250289917,
      "learning_rate": 0.00019812784226299962,
      "loss": 2.9942,
      "step": 140611
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.5084850788116455,
      "learning_rate": 0.00019812399477398581,
      "loss": 3.1705,
      "step": 140612
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.05994176864624,
      "learning_rate": 0.00019812014730391267,
      "loss": 3.0041,
      "step": 140613
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6268484592437744,
      "learning_rate": 0.00019811629985278092,
      "loss": 3.0228,
      "step": 140614
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0982871055603027,
      "learning_rate": 0.00019811245242059135,
      "loss": 2.9781,
      "step": 140615
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.393212080001831,
      "learning_rate": 0.00019810860500734455,
      "loss": 3.2028,
      "step": 140616
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.614978790283203,
      "learning_rate": 0.00019810475761304134,
      "loss": 3.0885,
      "step": 140617
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.9855411052703857,
      "learning_rate": 0.00019810091023768228,
      "loss": 3.1851,
      "step": 140618
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.230733871459961,
      "learning_rate": 0.0001980970628812682,
      "loss": 2.8259,
      "step": 140619
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8155570030212402,
      "learning_rate": 0.0001980932155437998,
      "loss": 3.2879,
      "step": 140620
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.9261393547058105,
      "learning_rate": 0.00019808936822527789,
      "loss": 2.8454,
      "step": 140621
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.743410110473633,
      "learning_rate": 0.000198085520925703,
      "loss": 2.9494,
      "step": 140622
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3959357738494873,
      "learning_rate": 0.00019808167364507594,
      "loss": 3.1323,
      "step": 140623
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0805602073669434,
      "learning_rate": 0.00019807782638339743,
      "loss": 2.8856,
      "step": 140624
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.882063388824463,
      "learning_rate": 0.00019807397914066817,
      "loss": 2.9633,
      "step": 140625
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.8744051456451416,
      "learning_rate": 0.0001980701319168889,
      "loss": 2.8761,
      "step": 140626
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2761919498443604,
      "learning_rate": 0.00019806628471206035,
      "loss": 2.9285,
      "step": 140627
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9710321426391602,
      "learning_rate": 0.0001980624375261831,
      "loss": 3.0441,
      "step": 140628
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0956246852874756,
      "learning_rate": 0.00019805859035925804,
      "loss": 2.6195,
      "step": 140629
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.713405132293701,
      "learning_rate": 0.0001980547432112858,
      "loss": 3.0431,
      "step": 140630
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.300701856613159,
      "learning_rate": 0.00019805089608226713,
      "loss": 2.7834,
      "step": 140631
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6940793991088867,
      "learning_rate": 0.0001980470489722027,
      "loss": 3.1002,
      "step": 140632
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8955031633377075,
      "learning_rate": 0.0001980432018810933,
      "loss": 2.9948,
      "step": 140633
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.284743785858154,
      "learning_rate": 0.0001980393548089395,
      "loss": 2.8706,
      "step": 140634
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.37455677986145,
      "learning_rate": 0.00019803550775574215,
      "loss": 3.0092,
      "step": 140635
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4546377658843994,
      "learning_rate": 0.0001980316607215019,
      "loss": 2.846,
      "step": 140636
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.911332130432129,
      "learning_rate": 0.00019802781370621946,
      "loss": 2.6704,
      "step": 140637
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.876441240310669,
      "learning_rate": 0.0001980239667098957,
      "loss": 2.9807,
      "step": 140638
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.0611109733581543,
      "learning_rate": 0.00019802011973253119,
      "loss": 2.8679,
      "step": 140639
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2477681636810303,
      "learning_rate": 0.00019801627277412658,
      "loss": 3.1475,
      "step": 140640
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.7641167640686035,
      "learning_rate": 0.0001980124258346827,
      "loss": 2.6778,
      "step": 140641
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7625653743743896,
      "learning_rate": 0.0001980085789142002,
      "loss": 2.8576,
      "step": 140642
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1049115657806396,
      "learning_rate": 0.00019800473201267985,
      "loss": 3.1552,
      "step": 140643
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3700156211853027,
      "learning_rate": 0.00019800088513012234,
      "loss": 2.6124,
      "step": 140644
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4818713665008545,
      "learning_rate": 0.00019799703826652856,
      "loss": 2.5956,
      "step": 140645
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.284278631210327,
      "learning_rate": 0.00019799319142189885,
      "loss": 3.052,
      "step": 140646
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.22550630569458,
      "learning_rate": 0.00019798934459623417,
      "loss": 3.1331,
      "step": 140647
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.095785140991211,
      "learning_rate": 0.00019798549778953523,
      "loss": 2.844,
      "step": 140648
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1243693828582764,
      "learning_rate": 0.00019798165100180266,
      "loss": 2.9717,
      "step": 140649
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9233232736587524,
      "learning_rate": 0.00019797780423303724,
      "loss": 2.9333,
      "step": 140650
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.043992042541504,
      "learning_rate": 0.0001979739574832398,
      "loss": 3.0028,
      "step": 140651
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8835607767105103,
      "learning_rate": 0.00019797011075241083,
      "loss": 2.9141,
      "step": 140652
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.090027332305908,
      "learning_rate": 0.00019796626404055114,
      "loss": 3.0106,
      "step": 140653
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.498816728591919,
      "learning_rate": 0.0001979624173476614,
      "loss": 2.9719,
      "step": 140654
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.754425048828125,
      "learning_rate": 0.00019795857067374242,
      "loss": 2.6975,
      "step": 140655
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.062988042831421,
      "learning_rate": 0.00019795472401879484,
      "loss": 2.7293,
      "step": 140656
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.802428722381592,
      "learning_rate": 0.00019795087738281951,
      "loss": 3.0338,
      "step": 140657
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.155595541000366,
      "learning_rate": 0.00019794703076581697,
      "loss": 2.7933,
      "step": 140658
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.034430742263794,
      "learning_rate": 0.00019794318416778794,
      "loss": 3.1666,
      "step": 140659
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9388947486877441,
      "learning_rate": 0.00019793933758873326,
      "loss": 2.9853,
      "step": 140660
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.036710023880005,
      "learning_rate": 0.00019793549102865352,
      "loss": 2.7215,
      "step": 140661
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.286160469055176,
      "learning_rate": 0.00019793164448754958,
      "loss": 2.7717,
      "step": 140662
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2635514736175537,
      "learning_rate": 0.00019792779796542201,
      "loss": 3.055,
      "step": 140663
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.051591634750366,
      "learning_rate": 0.00019792395146227173,
      "loss": 3.0066,
      "step": 140664
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4897170066833496,
      "learning_rate": 0.0001979201049780992,
      "loss": 2.9282,
      "step": 140665
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.3745291233062744,
      "learning_rate": 0.00019791625851290522,
      "loss": 2.9435,
      "step": 140666
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.493359327316284,
      "learning_rate": 0.00019791241206669054,
      "loss": 3.2651,
      "step": 140667
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.0391650199890137,
      "learning_rate": 0.0001979085656394559,
      "loss": 2.5,
      "step": 140668
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.853263020515442,
      "learning_rate": 0.000197904719231202,
      "loss": 3.0181,
      "step": 140669
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.9891295433044434,
      "learning_rate": 0.0001979008728419296,
      "loss": 2.9257,
      "step": 140670
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0832860469818115,
      "learning_rate": 0.0001978970264716393,
      "loss": 2.8514,
      "step": 140671
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.114292621612549,
      "learning_rate": 0.0001978931801203318,
      "loss": 2.8249,
      "step": 140672
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.432013511657715,
      "learning_rate": 0.00019788933378800792,
      "loss": 2.9103,
      "step": 140673
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2281763553619385,
      "learning_rate": 0.00019788548747466837,
      "loss": 2.9569,
      "step": 140674
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3748483657836914,
      "learning_rate": 0.0001978816411803138,
      "loss": 2.8604,
      "step": 140675
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6128897666931152,
      "learning_rate": 0.0001978777949049451,
      "loss": 2.9946,
      "step": 140676
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1883561611175537,
      "learning_rate": 0.0001978739486485627,
      "loss": 3.2219,
      "step": 140677
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.323951005935669,
      "learning_rate": 0.00019787010241116747,
      "loss": 2.9091,
      "step": 140678
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6063754558563232,
      "learning_rate": 0.0001978662561927601,
      "loss": 2.6921,
      "step": 140679
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5975704193115234,
      "learning_rate": 0.00019786240999334134,
      "loss": 2.9444,
      "step": 140680
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.181893825531006,
      "learning_rate": 0.0001978585638129119,
      "loss": 2.9165,
      "step": 140681
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2013466358184814,
      "learning_rate": 0.00019785471765147258,
      "loss": 2.8464,
      "step": 140682
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.8883860111236572,
      "learning_rate": 0.00019785087150902387,
      "loss": 2.9578,
      "step": 140683
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9667073488235474,
      "learning_rate": 0.00019784702538556663,
      "loss": 3.1232,
      "step": 140684
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9944978952407837,
      "learning_rate": 0.00019784317928110156,
      "loss": 2.9264,
      "step": 140685
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.306041717529297,
      "learning_rate": 0.00019783933319562935,
      "loss": 3.1982,
      "step": 140686
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.053161144256592,
      "learning_rate": 0.00019783548712915076,
      "loss": 3.1485,
      "step": 140687
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.443650007247925,
      "learning_rate": 0.0001978316410816666,
      "loss": 3.0596,
      "step": 140688
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.02213716506958,
      "learning_rate": 0.00019782779505317732,
      "loss": 2.6921,
      "step": 140689
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5033416748046875,
      "learning_rate": 0.00019782394904368378,
      "loss": 2.9187,
      "step": 140690
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4919795989990234,
      "learning_rate": 0.00019782010305318673,
      "loss": 2.8326,
      "step": 140691
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.629615068435669,
      "learning_rate": 0.00019781625708168682,
      "loss": 3.0725,
      "step": 140692
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.602057933807373,
      "learning_rate": 0.00019781241112918482,
      "loss": 3.0058,
      "step": 140693
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.174830436706543,
      "learning_rate": 0.00019780856519568154,
      "loss": 2.8919,
      "step": 140694
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5824878215789795,
      "learning_rate": 0.00019780471928117742,
      "loss": 2.9354,
      "step": 140695
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5348355770111084,
      "learning_rate": 0.0001978008733856734,
      "loss": 2.8427,
      "step": 140696
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5318565368652344,
      "learning_rate": 0.0001977970275091701,
      "loss": 3.2984,
      "step": 140697
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.160998821258545,
      "learning_rate": 0.00019779318165166824,
      "loss": 2.9418,
      "step": 140698
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.190772533416748,
      "learning_rate": 0.00019778933581316856,
      "loss": 3.119,
      "step": 140699
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.387749433517456,
      "learning_rate": 0.00019778548999367185,
      "loss": 3.0449,
      "step": 140700
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5863349437713623,
      "learning_rate": 0.00019778164419317874,
      "loss": 2.8098,
      "step": 140701
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.296903133392334,
      "learning_rate": 0.00019777779841168987,
      "loss": 3.1074,
      "step": 140702
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.21975040435791,
      "learning_rate": 0.00019777395264920607,
      "loss": 3.2463,
      "step": 140703
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.790719747543335,
      "learning_rate": 0.00019777010690572798,
      "loss": 2.914,
      "step": 140704
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.893938064575195,
      "learning_rate": 0.00019776626118125638,
      "loss": 2.9536,
      "step": 140705
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.8453266620635986,
      "learning_rate": 0.00019776241547579204,
      "loss": 2.7994,
      "step": 140706
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5272185802459717,
      "learning_rate": 0.00019775856978933553,
      "loss": 3.0416,
      "step": 140707
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.8161141872406006,
      "learning_rate": 0.00019775472412188766,
      "loss": 3.0159,
      "step": 140708
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.132052421569824,
      "learning_rate": 0.0001977508784734491,
      "loss": 2.9423,
      "step": 140709
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.8317677974700928,
      "learning_rate": 0.00019774703284402057,
      "loss": 2.7493,
      "step": 140710
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.575881242752075,
      "learning_rate": 0.00019774318723360276,
      "loss": 2.951,
      "step": 140711
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.118436098098755,
      "learning_rate": 0.00019773934164219652,
      "loss": 3.0298,
      "step": 140712
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.9080440998077393,
      "learning_rate": 0.00019773549606980239,
      "loss": 2.987,
      "step": 140713
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.17404842376709,
      "learning_rate": 0.0001977316505164212,
      "loss": 2.9913,
      "step": 140714
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8403644561767578,
      "learning_rate": 0.0001977278049820536,
      "loss": 2.726,
      "step": 140715
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.141350269317627,
      "learning_rate": 0.00019772395946670036,
      "loss": 3.0831,
      "step": 140716
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0837349891662598,
      "learning_rate": 0.00019772011397036214,
      "loss": 3.0444,
      "step": 140717
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.870391368865967,
      "learning_rate": 0.00019771626849303977,
      "loss": 2.9413,
      "step": 140718
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.86694073677063,
      "learning_rate": 0.00019771242303473377,
      "loss": 2.9677,
      "step": 140719
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2260632514953613,
      "learning_rate": 0.000197708577595445,
      "loss": 3.2341,
      "step": 140720
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.164911985397339,
      "learning_rate": 0.00019770473217517408,
      "loss": 2.8016,
      "step": 140721
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.206043243408203,
      "learning_rate": 0.00019770088677392183,
      "loss": 2.9397,
      "step": 140722
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.425023078918457,
      "learning_rate": 0.00019769704139168892,
      "loss": 2.9122,
      "step": 140723
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5681304931640625,
      "learning_rate": 0.00019769319602847606,
      "loss": 2.8475,
      "step": 140724
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.57915997505188,
      "learning_rate": 0.000197689350684284,
      "loss": 3.1427,
      "step": 140725
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.397517681121826,
      "learning_rate": 0.00019768550535911338,
      "loss": 2.8991,
      "step": 140726
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.501513957977295,
      "learning_rate": 0.00019768166005296496,
      "loss": 3.02,
      "step": 140727
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.670724630355835,
      "learning_rate": 0.0001976778147658394,
      "loss": 2.9807,
      "step": 140728
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1789398193359375,
      "learning_rate": 0.0001976739694977375,
      "loss": 3.1701,
      "step": 140729
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.625098705291748,
      "learning_rate": 0.00019767012424866002,
      "loss": 2.8233,
      "step": 140730
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.2566912174224854,
      "learning_rate": 0.00019766627901860755,
      "loss": 3.0217,
      "step": 140731
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6169254779815674,
      "learning_rate": 0.0001976624338075808,
      "loss": 2.8868,
      "step": 140732
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3954789638519287,
      "learning_rate": 0.0001976585886155806,
      "loss": 2.9172,
      "step": 140733
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.9827067852020264,
      "learning_rate": 0.00019765474344260749,
      "loss": 2.9218,
      "step": 140734
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0743789672851562,
      "learning_rate": 0.00019765089828866237,
      "loss": 2.8805,
      "step": 140735
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.710491418838501,
      "learning_rate": 0.00019764705315374588,
      "loss": 3.0347,
      "step": 140736
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3173563480377197,
      "learning_rate": 0.00019764320803785884,
      "loss": 2.7421,
      "step": 140737
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.915803909301758,
      "learning_rate": 0.00019763936294100174,
      "loss": 3.0867,
      "step": 140738
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.9187655448913574,
      "learning_rate": 0.0001976355178631754,
      "loss": 3.0059,
      "step": 140739
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7430083751678467,
      "learning_rate": 0.00019763167280438057,
      "loss": 2.9685,
      "step": 140740
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1227729320526123,
      "learning_rate": 0.00019762782776461797,
      "loss": 3.0763,
      "step": 140741
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.509127378463745,
      "learning_rate": 0.0001976239827438882,
      "loss": 2.7361,
      "step": 140742
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.9550893306732178,
      "learning_rate": 0.0001976201377421923,
      "loss": 2.8729,
      "step": 140743
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3139777183532715,
      "learning_rate": 0.00019761629275953055,
      "loss": 2.8399,
      "step": 140744
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9327129125595093,
      "learning_rate": 0.0001976124477959039,
      "loss": 2.8424,
      "step": 140745
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3373894691467285,
      "learning_rate": 0.000197608602851313,
      "loss": 3.1672,
      "step": 140746
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.689654588699341,
      "learning_rate": 0.00019760475792575857,
      "loss": 2.8121,
      "step": 140747
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.0949184894561768,
      "learning_rate": 0.0001976009130192414,
      "loss": 2.8459,
      "step": 140748
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.441246271133423,
      "learning_rate": 0.0001975970681317623,
      "loss": 2.952,
      "step": 140749
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.262937068939209,
      "learning_rate": 0.00019759322326332165,
      "loss": 3.1354,
      "step": 140750
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.8850278854370117,
      "learning_rate": 0.0001975893784139204,
      "loss": 3.0387,
      "step": 140751
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.886657238006592,
      "learning_rate": 0.00019758553358355919,
      "loss": 2.8901,
      "step": 140752
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.436737537384033,
      "learning_rate": 0.00019758168877223877,
      "loss": 3.3076,
      "step": 140753
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6124448776245117,
      "learning_rate": 0.00019757784397995984,
      "loss": 3.1202,
      "step": 140754
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.128758668899536,
      "learning_rate": 0.00019757399920672326,
      "loss": 2.7969,
      "step": 140755
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.799802780151367,
      "learning_rate": 0.00019757015445252948,
      "loss": 3.0078,
      "step": 140756
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.9354610443115234,
      "learning_rate": 0.00019756630971737933,
      "loss": 2.9789,
      "step": 140757
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.255533695220947,
      "learning_rate": 0.00019756246500127355,
      "loss": 3.1118,
      "step": 140758
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.590388059616089,
      "learning_rate": 0.00019755862030421283,
      "loss": 2.8406,
      "step": 140759
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.292117118835449,
      "learning_rate": 0.0001975547756261979,
      "loss": 2.9697,
      "step": 140760
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.628661632537842,
      "learning_rate": 0.00019755093096722958,
      "loss": 2.9373,
      "step": 140761
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.725518226623535,
      "learning_rate": 0.00019754708632730838,
      "loss": 3.0514,
      "step": 140762
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.846679925918579,
      "learning_rate": 0.0001975432417064351,
      "loss": 3.0426,
      "step": 140763
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0105044841766357,
      "learning_rate": 0.00019753939710461047,
      "loss": 3.0277,
      "step": 140764
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.281140089035034,
      "learning_rate": 0.00019753555252183518,
      "loss": 3.1877,
      "step": 140765
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.9596681594848633,
      "learning_rate": 0.00019753170795811,
      "loss": 2.9877,
      "step": 140766
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3254520893096924,
      "learning_rate": 0.0001975278634134357,
      "loss": 2.8981,
      "step": 140767
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5039899349212646,
      "learning_rate": 0.0001975240188878128,
      "loss": 2.8209,
      "step": 140768
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0651464462280273,
      "learning_rate": 0.00019752017438124213,
      "loss": 2.8931,
      "step": 140769
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3905789852142334,
      "learning_rate": 0.00019751632989372436,
      "loss": 3.1932,
      "step": 140770
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3580377101898193,
      "learning_rate": 0.00019751248542526024,
      "loss": 2.7713,
      "step": 140771
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.679809808731079,
      "learning_rate": 0.00019750864097585054,
      "loss": 2.9656,
      "step": 140772
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.1189417839050293,
      "learning_rate": 0.00019750479654549597,
      "loss": 3.2206,
      "step": 140773
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.34796404838562,
      "learning_rate": 0.0001975009521341971,
      "loss": 2.9653,
      "step": 140774
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.585386037826538,
      "learning_rate": 0.00019749710774195475,
      "loss": 3.2727,
      "step": 140775
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.486435651779175,
      "learning_rate": 0.00019749326336876964,
      "loss": 2.7534,
      "step": 140776
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.269026041030884,
      "learning_rate": 0.0001974894190146424,
      "loss": 2.8972,
      "step": 140777
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5997304916381836,
      "learning_rate": 0.00019748557467957385,
      "loss": 2.9928,
      "step": 140778
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1998066902160645,
      "learning_rate": 0.0001974817303635648,
      "loss": 3.2764,
      "step": 140779
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.1104583740234375,
      "learning_rate": 0.0001974778860666157,
      "loss": 3.0501,
      "step": 140780
    },
    {
      "epoch": 1.83,
      "grad_norm": 5.6099700927734375,
      "learning_rate": 0.0001974740417887274,
      "loss": 2.7887,
      "step": 140781
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.6298675537109375,
      "learning_rate": 0.00019747019752990058,
      "loss": 2.7577,
      "step": 140782
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.6194918155670166,
      "learning_rate": 0.00019746635329013603,
      "loss": 2.9008,
      "step": 140783
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.114593267440796,
      "learning_rate": 0.0001974625090694344,
      "loss": 2.9759,
      "step": 140784
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7803406715393066,
      "learning_rate": 0.0001974586648677965,
      "loss": 3.0236,
      "step": 140785
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.766203880310059,
      "learning_rate": 0.00019745482068522297,
      "loss": 2.6185,
      "step": 140786
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.35206413269043,
      "learning_rate": 0.00019745097652171443,
      "loss": 2.9276,
      "step": 140787
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.112004280090332,
      "learning_rate": 0.00019744713237727172,
      "loss": 2.8914,
      "step": 140788
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.059319496154785,
      "learning_rate": 0.00019744328825189552,
      "loss": 3.1042,
      "step": 140789
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.28566837310791,
      "learning_rate": 0.0001974394441455865,
      "loss": 2.6755,
      "step": 140790
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.264516592025757,
      "learning_rate": 0.0001974356000583455,
      "loss": 3.0093,
      "step": 140791
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.3217320442199707,
      "learning_rate": 0.0001974317559901732,
      "loss": 2.9281,
      "step": 140792
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0951428413391113,
      "learning_rate": 0.00019742791194107026,
      "loss": 2.979,
      "step": 140793
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3336148262023926,
      "learning_rate": 0.0001974240679110373,
      "loss": 3.0484,
      "step": 140794
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.264126777648926,
      "learning_rate": 0.00019742022390007523,
      "loss": 3.0936,
      "step": 140795
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3186872005462646,
      "learning_rate": 0.00019741637990818465,
      "loss": 2.9036,
      "step": 140796
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.09165620803833,
      "learning_rate": 0.00019741253593536626,
      "loss": 3.08,
      "step": 140797
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.15731143951416,
      "learning_rate": 0.0001974086919816209,
      "loss": 3.1748,
      "step": 140798
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.537161111831665,
      "learning_rate": 0.0001974048480469491,
      "loss": 2.7291,
      "step": 140799
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7828948497772217,
      "learning_rate": 0.00019740100413135183,
      "loss": 2.873,
      "step": 140800
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8856438398361206,
      "learning_rate": 0.00019739716023482954,
      "loss": 2.8023,
      "step": 140801
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8555318117141724,
      "learning_rate": 0.00019739331635738302,
      "loss": 2.9546,
      "step": 140802
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9745351076126099,
      "learning_rate": 0.00019738947249901308,
      "loss": 2.9249,
      "step": 140803
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.331623077392578,
      "learning_rate": 0.0001973856286597204,
      "loss": 2.8855,
      "step": 140804
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0070199966430664,
      "learning_rate": 0.00019738178483950561,
      "loss": 3.1981,
      "step": 140805
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6323037147521973,
      "learning_rate": 0.0001973779410383695,
      "loss": 3.054,
      "step": 140806
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9473583698272705,
      "learning_rate": 0.00019737409725631273,
      "loss": 2.9181,
      "step": 140807
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7290141582489014,
      "learning_rate": 0.00019737025349333615,
      "loss": 3.0633,
      "step": 140808
    },
    {
      "epoch": 1.83,
      "grad_norm": 4.70356559753418,
      "learning_rate": 0.0001973664097494403,
      "loss": 3.0717,
      "step": 140809
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.7634074687957764,
      "learning_rate": 0.00019736256602462604,
      "loss": 2.9236,
      "step": 140810
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5378737449645996,
      "learning_rate": 0.00019735872231889398,
      "loss": 3.1302,
      "step": 140811
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.294550895690918,
      "learning_rate": 0.00019735487863224482,
      "loss": 3.0604,
      "step": 140812
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5251269340515137,
      "learning_rate": 0.00019735103496467936,
      "loss": 3.1834,
      "step": 140813
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6659319400787354,
      "learning_rate": 0.00019734719131619827,
      "loss": 3.0878,
      "step": 140814
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1157665252685547,
      "learning_rate": 0.00019734334768680233,
      "loss": 2.7906,
      "step": 140815
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.980496883392334,
      "learning_rate": 0.0001973395040764922,
      "loss": 2.8904,
      "step": 140816
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.664074659347534,
      "learning_rate": 0.00019733566048526854,
      "loss": 2.8781,
      "step": 140817
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.038285970687866,
      "learning_rate": 0.0001973318169131321,
      "loss": 3.1953,
      "step": 140818
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.019841194152832,
      "learning_rate": 0.00019732797336008366,
      "loss": 2.9527,
      "step": 140819
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9047808647155762,
      "learning_rate": 0.00019732412982612383,
      "loss": 2.7391,
      "step": 140820
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4414992332458496,
      "learning_rate": 0.0001973202863112534,
      "loss": 2.9886,
      "step": 140821
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.228241443634033,
      "learning_rate": 0.00019731644281547323,
      "loss": 2.9112,
      "step": 140822
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.8667378425598145,
      "learning_rate": 0.0001973125993387837,
      "loss": 2.7582,
      "step": 140823
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.180656909942627,
      "learning_rate": 0.00019730875588118573,
      "loss": 2.8491,
      "step": 140824
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.139631986618042,
      "learning_rate": 0.00019730491244268,
      "loss": 2.8788,
      "step": 140825
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2828636169433594,
      "learning_rate": 0.0001973010690232672,
      "loss": 2.8106,
      "step": 140826
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2397408485412598,
      "learning_rate": 0.00019729722562294806,
      "loss": 3.0662,
      "step": 140827
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.070373773574829,
      "learning_rate": 0.00019729338224172346,
      "loss": 3.0345,
      "step": 140828
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.3185222148895264,
      "learning_rate": 0.00019728953887959385,
      "loss": 3.1586,
      "step": 140829
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9281671047210693,
      "learning_rate": 0.00019728569553656002,
      "loss": 3.1353,
      "step": 140830
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9143500328063965,
      "learning_rate": 0.0001972818522126227,
      "loss": 3.1279,
      "step": 140831
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1196377277374268,
      "learning_rate": 0.00019727800890778266,
      "loss": 3.0216,
      "step": 140832
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.195535898208618,
      "learning_rate": 0.00019727416562204057,
      "loss": 2.9925,
      "step": 140833
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.331479549407959,
      "learning_rate": 0.00019727032235539727,
      "loss": 3.1263,
      "step": 140834
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2574353218078613,
      "learning_rate": 0.00019726647910785325,
      "loss": 3.1029,
      "step": 140835
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0531513690948486,
      "learning_rate": 0.00019726263587940932,
      "loss": 2.7343,
      "step": 140836
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.480128288269043,
      "learning_rate": 0.00019725879267006618,
      "loss": 2.9211,
      "step": 140837
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1721343994140625,
      "learning_rate": 0.0001972549494798246,
      "loss": 2.8198,
      "step": 140838
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.404301404953003,
      "learning_rate": 0.00019725110630868523,
      "loss": 2.8539,
      "step": 140839
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.221863269805908,
      "learning_rate": 0.00019724726315664896,
      "loss": 2.9725,
      "step": 140840
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9786089658737183,
      "learning_rate": 0.00019724342002371627,
      "loss": 2.9204,
      "step": 140841
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.874331474304199,
      "learning_rate": 0.00019723957690988796,
      "loss": 3.1567,
      "step": 140842
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.900214433670044,
      "learning_rate": 0.0001972357338151647,
      "loss": 3.0382,
      "step": 140843
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9689905643463135,
      "learning_rate": 0.0001972318907395473,
      "loss": 2.9521,
      "step": 140844
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1682841777801514,
      "learning_rate": 0.00019722804768303642,
      "loss": 2.9386,
      "step": 140845
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9232704639434814,
      "learning_rate": 0.0001972242046456329,
      "loss": 2.9799,
      "step": 140846
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.922349691390991,
      "learning_rate": 0.00019722036162733725,
      "loss": 2.878,
      "step": 140847
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.947539210319519,
      "learning_rate": 0.00019721651862815028,
      "loss": 3.0454,
      "step": 140848
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.099050521850586,
      "learning_rate": 0.0001972126756480727,
      "loss": 3.1791,
      "step": 140849
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1242129802703857,
      "learning_rate": 0.00019720883268710518,
      "loss": 2.9449,
      "step": 140850
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.630507230758667,
      "learning_rate": 0.0001972049897452485,
      "loss": 3.1671,
      "step": 140851
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.094716787338257,
      "learning_rate": 0.00019720114682250347,
      "loss": 2.8559,
      "step": 140852
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6243083477020264,
      "learning_rate": 0.00019719730391887057,
      "loss": 3.1072,
      "step": 140853
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.492661952972412,
      "learning_rate": 0.00019719346103435064,
      "loss": 2.857,
      "step": 140854
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4072444438934326,
      "learning_rate": 0.00019718961816894435,
      "loss": 3.1277,
      "step": 140855
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8847872018814087,
      "learning_rate": 0.0001971857753226525,
      "loss": 3.1969,
      "step": 140856
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6270663738250732,
      "learning_rate": 0.0001971819324954757,
      "loss": 2.9734,
      "step": 140857
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7267396450042725,
      "learning_rate": 0.00019717808968741478,
      "loss": 2.954,
      "step": 140858
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.5212979316711426,
      "learning_rate": 0.00019717424689847047,
      "loss": 2.815,
      "step": 140859
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9849424362182617,
      "learning_rate": 0.0001971704041286433,
      "loss": 2.8807,
      "step": 140860
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.929572582244873,
      "learning_rate": 0.0001971665613779341,
      "loss": 2.9202,
      "step": 140861
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1975929737091064,
      "learning_rate": 0.00019716271864634357,
      "loss": 2.9827,
      "step": 140862
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.5920801162719727,
      "learning_rate": 0.00019715887593387243,
      "loss": 3.1033,
      "step": 140863
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8372912406921387,
      "learning_rate": 0.0001971550332405214,
      "loss": 3.1642,
      "step": 140864
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.042135000228882,
      "learning_rate": 0.00019715119056629133,
      "loss": 2.85,
      "step": 140865
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.074126720428467,
      "learning_rate": 0.00019714734791118267,
      "loss": 2.9862,
      "step": 140866
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9396592378616333,
      "learning_rate": 0.00019714350527519621,
      "loss": 3.0527,
      "step": 140867
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.266483783721924,
      "learning_rate": 0.00019713966265833279,
      "loss": 2.9136,
      "step": 140868
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.326892852783203,
      "learning_rate": 0.00019713582006059297,
      "loss": 3.0758,
      "step": 140869
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4230833053588867,
      "learning_rate": 0.00019713197748197762,
      "loss": 2.8292,
      "step": 140870
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4711296558380127,
      "learning_rate": 0.00019712813492248746,
      "loss": 2.913,
      "step": 140871
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2108404636383057,
      "learning_rate": 0.000197124292382123,
      "loss": 3.0458,
      "step": 140872
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8645586967468262,
      "learning_rate": 0.00019712044986088507,
      "loss": 2.9347,
      "step": 140873
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1658034324645996,
      "learning_rate": 0.0001971166073587744,
      "loss": 2.8083,
      "step": 140874
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1731317043304443,
      "learning_rate": 0.0001971127648757917,
      "loss": 2.8657,
      "step": 140875
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.710117816925049,
      "learning_rate": 0.00019710892241193767,
      "loss": 3.0873,
      "step": 140876
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.239518642425537,
      "learning_rate": 0.00019710507996721307,
      "loss": 2.7804,
      "step": 140877
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.8610341548919678,
      "learning_rate": 0.00019710123754161863,
      "loss": 3.0291,
      "step": 140878
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2926886081695557,
      "learning_rate": 0.0001970973951351549,
      "loss": 3.1226,
      "step": 140879
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.809685230255127,
      "learning_rate": 0.00019709355274782274,
      "loss": 2.946,
      "step": 140880
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.192284107208252,
      "learning_rate": 0.00019708971037962284,
      "loss": 3.187,
      "step": 140881
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.1854031085968018,
      "learning_rate": 0.00019708586803055586,
      "loss": 2.9459,
      "step": 140882
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9883882999420166,
      "learning_rate": 0.00019708202570062268,
      "loss": 3.1568,
      "step": 140883
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9389132261276245,
      "learning_rate": 0.00019707818338982378,
      "loss": 3.0778,
      "step": 140884
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3450872898101807,
      "learning_rate": 0.00019707434109816007,
      "loss": 2.9162,
      "step": 140885
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.93246328830719,
      "learning_rate": 0.00019707049882563212,
      "loss": 3.1747,
      "step": 140886
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.020489454269409,
      "learning_rate": 0.00019706665657224075,
      "loss": 3.0829,
      "step": 140887
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9909214973449707,
      "learning_rate": 0.00019706281433798657,
      "loss": 3.0505,
      "step": 140888
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4422709941864014,
      "learning_rate": 0.00019705897212287044,
      "loss": 2.7513,
      "step": 140889
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0555927753448486,
      "learning_rate": 0.00019705512992689294,
      "loss": 2.9946,
      "step": 140890
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4434146881103516,
      "learning_rate": 0.00019705128775005483,
      "loss": 2.7931,
      "step": 140891
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2777421474456787,
      "learning_rate": 0.00019704744559235685,
      "loss": 2.983,
      "step": 140892
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8675318956375122,
      "learning_rate": 0.00019704360345379967,
      "loss": 3.0111,
      "step": 140893
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.7198641300201416,
      "learning_rate": 0.00019703976133438403,
      "loss": 2.9613,
      "step": 140894
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6241648197174072,
      "learning_rate": 0.0001970359192341107,
      "loss": 3.0565,
      "step": 140895
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.505868911743164,
      "learning_rate": 0.00019703207715298027,
      "loss": 3.0606,
      "step": 140896
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8738597631454468,
      "learning_rate": 0.00019702823509099352,
      "loss": 2.759,
      "step": 140897
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.870562791824341,
      "learning_rate": 0.0001970243930481512,
      "loss": 2.9396,
      "step": 140898
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.8705310821533203,
      "learning_rate": 0.000197020551024454,
      "loss": 3.0601,
      "step": 140899
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8746049404144287,
      "learning_rate": 0.00019701670901990258,
      "loss": 2.9789,
      "step": 140900
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.28655743598938,
      "learning_rate": 0.00019701286703449774,
      "loss": 3.0568,
      "step": 140901
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.7513985633850098,
      "learning_rate": 0.00019700902506824012,
      "loss": 2.8136,
      "step": 140902
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9385502338409424,
      "learning_rate": 0.00019700518312113042,
      "loss": 2.9527,
      "step": 140903
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.208449602127075,
      "learning_rate": 0.00019700134119316945,
      "loss": 3.0338,
      "step": 140904
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.3443808555603027,
      "learning_rate": 0.00019699749928435786,
      "loss": 3.053,
      "step": 140905
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8096340894699097,
      "learning_rate": 0.0001969936573946964,
      "loss": 3.0884,
      "step": 140906
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.9584364891052246,
      "learning_rate": 0.00019698981552418584,
      "loss": 2.8868,
      "step": 140907
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9456186294555664,
      "learning_rate": 0.00019698597367282674,
      "loss": 2.968,
      "step": 140908
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.218536615371704,
      "learning_rate": 0.00019698213184061986,
      "loss": 3.058,
      "step": 140909
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9998003244400024,
      "learning_rate": 0.00019697829002756595,
      "loss": 3.0728,
      "step": 140910
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.9283010959625244,
      "learning_rate": 0.00019697444823366575,
      "loss": 2.8567,
      "step": 140911
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.863368511199951,
      "learning_rate": 0.00019697060645891995,
      "loss": 2.8053,
      "step": 140912
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.947967052459717,
      "learning_rate": 0.00019696676470332933,
      "loss": 2.7697,
      "step": 140913
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2893636226654053,
      "learning_rate": 0.00019696292296689447,
      "loss": 2.8749,
      "step": 140914
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.374319553375244,
      "learning_rate": 0.0001969590812496161,
      "loss": 2.9315,
      "step": 140915
    },
    {
      "epoch": 1.83,
      "grad_norm": 3.348935604095459,
      "learning_rate": 0.00019695523955149497,
      "loss": 3.2564,
      "step": 140916
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.8969954252243042,
      "learning_rate": 0.00019695139787253186,
      "loss": 3.066,
      "step": 140917
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0403754711151123,
      "learning_rate": 0.0001969475562127274,
      "loss": 2.9992,
      "step": 140918
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.168813467025757,
      "learning_rate": 0.00019694371457208246,
      "loss": 2.9528,
      "step": 140919
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2635886669158936,
      "learning_rate": 0.0001969398729505975,
      "loss": 3.079,
      "step": 140920
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4007747173309326,
      "learning_rate": 0.00019693603134827336,
      "loss": 3.0141,
      "step": 140921
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.4595513343811035,
      "learning_rate": 0.00019693218976511077,
      "loss": 3.0142,
      "step": 140922
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0794148445129395,
      "learning_rate": 0.00019692834820111044,
      "loss": 3.0376,
      "step": 140923
    },
    {
      "epoch": 1.83,
      "grad_norm": 1.9452948570251465,
      "learning_rate": 0.00019692450665627305,
      "loss": 2.9891,
      "step": 140924
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.2196295261383057,
      "learning_rate": 0.00019692066513059934,
      "loss": 3.0553,
      "step": 140925
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.6138033866882324,
      "learning_rate": 0.00019691682362409018,
      "loss": 2.8894,
      "step": 140926
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.0822184085845947,
      "learning_rate": 0.000196912982136746,
      "loss": 3.1686,
      "step": 140927
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.139291286468506,
      "learning_rate": 0.0001969091406685676,
      "loss": 2.8781,
      "step": 140928
    },
    {
      "epoch": 1.83,
      "grad_norm": 2.09649395942688,
      "learning_rate": 0.00019690529921955578,
      "loss": 2.9123,
      "step": 140929
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.5489931106567383,
      "learning_rate": 0.00019690145778971123,
      "loss": 2.9179,
      "step": 140930
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.720484972000122,
      "learning_rate": 0.0001968976163790346,
      "loss": 2.9075,
      "step": 140931
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3705742359161377,
      "learning_rate": 0.00019689377498752678,
      "loss": 3.0383,
      "step": 140932
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4111971855163574,
      "learning_rate": 0.00019688993361518823,
      "loss": 2.9971,
      "step": 140933
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.839951992034912,
      "learning_rate": 0.00019688609226201979,
      "loss": 3.0462,
      "step": 140934
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.8859895467758179,
      "learning_rate": 0.00019688225092802218,
      "loss": 2.7434,
      "step": 140935
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.057979106903076,
      "learning_rate": 0.00019687840961319609,
      "loss": 2.9977,
      "step": 140936
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2886059284210205,
      "learning_rate": 0.00019687456831754226,
      "loss": 3.1126,
      "step": 140937
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5453178882598877,
      "learning_rate": 0.0001968707270410615,
      "loss": 2.8334,
      "step": 140938
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.254423141479492,
      "learning_rate": 0.00019686688578375434,
      "loss": 2.9262,
      "step": 140939
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.468928575515747,
      "learning_rate": 0.00019686304454562156,
      "loss": 2.8475,
      "step": 140940
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9499659538269043,
      "learning_rate": 0.00019685920332666385,
      "loss": 3.0852,
      "step": 140941
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.1206610202789307,
      "learning_rate": 0.000196855362126882,
      "loss": 2.9452,
      "step": 140942
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.607360601425171,
      "learning_rate": 0.00019685152094627668,
      "loss": 3.0845,
      "step": 140943
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.029404640197754,
      "learning_rate": 0.00019684767978484869,
      "loss": 2.8729,
      "step": 140944
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.779657006263733,
      "learning_rate": 0.00019684383864259858,
      "loss": 2.8058,
      "step": 140945
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.7831358909606934,
      "learning_rate": 0.00019683999751952713,
      "loss": 2.8363,
      "step": 140946
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2939186096191406,
      "learning_rate": 0.0001968361564156351,
      "loss": 2.9759,
      "step": 140947
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3921542167663574,
      "learning_rate": 0.00019683231533092313,
      "loss": 2.9894,
      "step": 140948
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.062225341796875,
      "learning_rate": 0.00019682847426539203,
      "loss": 3.0776,
      "step": 140949
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.130474805831909,
      "learning_rate": 0.0001968246332190426,
      "loss": 3.1687,
      "step": 140950
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3328216075897217,
      "learning_rate": 0.00019682079219187523,
      "loss": 3.0093,
      "step": 140951
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5889194011688232,
      "learning_rate": 0.00019681695118389084,
      "loss": 2.9923,
      "step": 140952
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.068098783493042,
      "learning_rate": 0.00019681311019509017,
      "loss": 2.8892,
      "step": 140953
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.504335641860962,
      "learning_rate": 0.00019680926922547383,
      "loss": 3.0106,
      "step": 140954
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.285236120223999,
      "learning_rate": 0.0001968054282750426,
      "loss": 3.1027,
      "step": 140955
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.384495258331299,
      "learning_rate": 0.00019680158734379735,
      "loss": 2.9112,
      "step": 140956
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4072773456573486,
      "learning_rate": 0.0001967977464317385,
      "loss": 3.1445,
      "step": 140957
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.164597749710083,
      "learning_rate": 0.0001967939055388669,
      "loss": 3.1478,
      "step": 140958
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.251603364944458,
      "learning_rate": 0.00019679006466518325,
      "loss": 2.9026,
      "step": 140959
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3217248916625977,
      "learning_rate": 0.00019678622381068828,
      "loss": 3.0862,
      "step": 140960
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9614402055740356,
      "learning_rate": 0.00019678238297538272,
      "loss": 2.8385,
      "step": 140961
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.314364194869995,
      "learning_rate": 0.00019677854215926737,
      "loss": 3.1463,
      "step": 140962
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0963594913482666,
      "learning_rate": 0.0001967747013623427,
      "loss": 3.0781,
      "step": 140963
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.251521110534668,
      "learning_rate": 0.00019677086058460954,
      "loss": 2.7136,
      "step": 140964
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.986133098602295,
      "learning_rate": 0.00019676701982606867,
      "loss": 2.8893,
      "step": 140965
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.237440347671509,
      "learning_rate": 0.00019676317908672077,
      "loss": 3.312,
      "step": 140966
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.8405399322509766,
      "learning_rate": 0.0001967593383665665,
      "loss": 3.0377,
      "step": 140967
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7031164169311523,
      "learning_rate": 0.00019675549766560673,
      "loss": 2.9265,
      "step": 140968
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.454275131225586,
      "learning_rate": 0.00019675165698384192,
      "loss": 2.982,
      "step": 140969
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.634951591491699,
      "learning_rate": 0.00019674781632127305,
      "loss": 2.857,
      "step": 140970
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.140042543411255,
      "learning_rate": 0.00019674397567790065,
      "loss": 3.2315,
      "step": 140971
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.740434408187866,
      "learning_rate": 0.00019674013505372546,
      "loss": 3.063,
      "step": 140972
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7887392044067383,
      "learning_rate": 0.00019673629444874824,
      "loss": 2.8978,
      "step": 140973
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1673734188079834,
      "learning_rate": 0.00019673245386296978,
      "loss": 2.9368,
      "step": 140974
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.314861297607422,
      "learning_rate": 0.00019672861329639063,
      "loss": 2.7791,
      "step": 140975
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5918004512786865,
      "learning_rate": 0.00019672477274901157,
      "loss": 3.047,
      "step": 140976
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.324369192123413,
      "learning_rate": 0.0001967209322208334,
      "loss": 2.9723,
      "step": 140977
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.902105689048767,
      "learning_rate": 0.00019671709171185665,
      "loss": 2.9256,
      "step": 140978
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.7079148292541504,
      "learning_rate": 0.0001967132512220822,
      "loss": 2.8312,
      "step": 140979
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.895955801010132,
      "learning_rate": 0.00019670941075151076,
      "loss": 3.0168,
      "step": 140980
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0243959426879883,
      "learning_rate": 0.00019670557030014292,
      "loss": 2.8926,
      "step": 140981
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1082165241241455,
      "learning_rate": 0.00019670172986797948,
      "loss": 2.9054,
      "step": 140982
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2865402698516846,
      "learning_rate": 0.00019669788945502113,
      "loss": 2.9563,
      "step": 140983
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.9091999530792236,
      "learning_rate": 0.0001966940490612686,
      "loss": 2.7529,
      "step": 140984
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.8863495588302612,
      "learning_rate": 0.0001966902086867226,
      "loss": 3.0686,
      "step": 140985
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.910656213760376,
      "learning_rate": 0.00019668636833138387,
      "loss": 3.1158,
      "step": 140986
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.6537013053894043,
      "learning_rate": 0.00019668252799525304,
      "loss": 2.7211,
      "step": 140987
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7429585456848145,
      "learning_rate": 0.0001966786876783309,
      "loss": 2.9209,
      "step": 140988
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.425387382507324,
      "learning_rate": 0.0001966748473806181,
      "loss": 2.9207,
      "step": 140989
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.6049857139587402,
      "learning_rate": 0.0001966710071021154,
      "loss": 2.7677,
      "step": 140990
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.251352310180664,
      "learning_rate": 0.00019666716684282363,
      "loss": 2.7807,
      "step": 140991
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.167121410369873,
      "learning_rate": 0.00019666332660274327,
      "loss": 3.2434,
      "step": 140992
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0069661140441895,
      "learning_rate": 0.00019665948638187522,
      "loss": 2.9258,
      "step": 140993
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.921167254447937,
      "learning_rate": 0.00019665564618022007,
      "loss": 2.9966,
      "step": 140994
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.423977851867676,
      "learning_rate": 0.00019665180599777855,
      "loss": 3.1173,
      "step": 140995
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.248710870742798,
      "learning_rate": 0.00019664796583455149,
      "loss": 2.866,
      "step": 140996
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.461963176727295,
      "learning_rate": 0.00019664412569053943,
      "loss": 2.9324,
      "step": 140997
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.389148473739624,
      "learning_rate": 0.00019664028556574327,
      "loss": 2.923,
      "step": 140998
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0651795864105225,
      "learning_rate": 0.00019663644546016372,
      "loss": 3.1103,
      "step": 140999
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3001317977905273,
      "learning_rate": 0.00019663260537380124,
      "loss": 3.0111,
      "step": 141000
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.199531078338623,
      "learning_rate": 0.00019662876530665674,
      "loss": 2.9574,
      "step": 141001
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.097318649291992,
      "learning_rate": 0.0001966249252587309,
      "loss": 2.8013,
      "step": 141002
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.323737621307373,
      "learning_rate": 0.00019662108523002447,
      "loss": 3.0925,
      "step": 141003
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.201797962188721,
      "learning_rate": 0.0001966172452205381,
      "loss": 3.1025,
      "step": 141004
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3930444717407227,
      "learning_rate": 0.00019661340523027267,
      "loss": 3.0952,
      "step": 141005
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.173004627227783,
      "learning_rate": 0.0001966095652592286,
      "loss": 3.012,
      "step": 141006
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.448202133178711,
      "learning_rate": 0.0001966057253074068,
      "loss": 3.0755,
      "step": 141007
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9054614305496216,
      "learning_rate": 0.00019660188537480794,
      "loss": 3.1675,
      "step": 141008
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.196026563644409,
      "learning_rate": 0.00019659804546143275,
      "loss": 2.7556,
      "step": 141009
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9113832712173462,
      "learning_rate": 0.00019659420556728194,
      "loss": 3.0045,
      "step": 141010
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0975871086120605,
      "learning_rate": 0.00019659036569235632,
      "loss": 2.7426,
      "step": 141011
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.7299792766571045,
      "learning_rate": 0.00019658652583665637,
      "loss": 2.9736,
      "step": 141012
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.037860870361328,
      "learning_rate": 0.00019658268600018298,
      "loss": 2.8163,
      "step": 141013
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.045029640197754,
      "learning_rate": 0.00019657884618293678,
      "loss": 3.0376,
      "step": 141014
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.347027540206909,
      "learning_rate": 0.00019657500638491855,
      "loss": 3.0152,
      "step": 141015
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.111416816711426,
      "learning_rate": 0.00019657116660612894,
      "loss": 3.0233,
      "step": 141016
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1695501804351807,
      "learning_rate": 0.0001965673268465689,
      "loss": 2.9043,
      "step": 141017
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9326627254486084,
      "learning_rate": 0.00019656348710623875,
      "loss": 3.1607,
      "step": 141018
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.172900915145874,
      "learning_rate": 0.00019655964738513943,
      "loss": 2.9926,
      "step": 141019
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5481300354003906,
      "learning_rate": 0.00019655580768327161,
      "loss": 3.042,
      "step": 141020
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5231332778930664,
      "learning_rate": 0.00019655196800063605,
      "loss": 3.0476,
      "step": 141021
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0394797325134277,
      "learning_rate": 0.00019654812833723342,
      "loss": 2.8469,
      "step": 141022
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.766880989074707,
      "learning_rate": 0.00019654428869306457,
      "loss": 3.0584,
      "step": 141023
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.6221845149993896,
      "learning_rate": 0.00019654044906812994,
      "loss": 3.0047,
      "step": 141024
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1163885593414307,
      "learning_rate": 0.00019653660946243042,
      "loss": 2.9554,
      "step": 141025
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1576061248779297,
      "learning_rate": 0.0001965327698759667,
      "loss": 3.1295,
      "step": 141026
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.471907377243042,
      "learning_rate": 0.00019652893030873948,
      "loss": 3.2535,
      "step": 141027
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.097905158996582,
      "learning_rate": 0.00019652509076074947,
      "loss": 2.7547,
      "step": 141028
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.025564670562744,
      "learning_rate": 0.00019652125123199754,
      "loss": 3.0484,
      "step": 141029
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9214264154434204,
      "learning_rate": 0.00019651741172248415,
      "loss": 2.9929,
      "step": 141030
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9964901208877563,
      "learning_rate": 0.0001965135722322101,
      "loss": 3.2398,
      "step": 141031
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1170942783355713,
      "learning_rate": 0.00019650973276117616,
      "loss": 2.9801,
      "step": 141032
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4112539291381836,
      "learning_rate": 0.00019650589330938298,
      "loss": 3.1137,
      "step": 141033
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7681100368499756,
      "learning_rate": 0.00019650205387683134,
      "loss": 2.9208,
      "step": 141034
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3138949871063232,
      "learning_rate": 0.00019649821446352206,
      "loss": 2.8644,
      "step": 141035
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.206836700439453,
      "learning_rate": 0.00019649437506945552,
      "loss": 3.0273,
      "step": 141036
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.2044365406036377,
      "learning_rate": 0.0001964905356946327,
      "loss": 2.9889,
      "step": 141037
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.2310144901275635,
      "learning_rate": 0.00019648669633905418,
      "loss": 2.9851,
      "step": 141038
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.400470495223999,
      "learning_rate": 0.0001964828570027208,
      "loss": 3.2206,
      "step": 141039
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.640550374984741,
      "learning_rate": 0.00019647901768563318,
      "loss": 3.1953,
      "step": 141040
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7400054931640625,
      "learning_rate": 0.0001964751783877922,
      "loss": 2.9755,
      "step": 141041
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3433470726013184,
      "learning_rate": 0.00019647133910919833,
      "loss": 2.9574,
      "step": 141042
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.685468912124634,
      "learning_rate": 0.00019646749984985235,
      "loss": 2.9887,
      "step": 141043
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.073460817337036,
      "learning_rate": 0.00019646366060975504,
      "loss": 2.8477,
      "step": 141044
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.060229539871216,
      "learning_rate": 0.00019645982138890707,
      "loss": 2.98,
      "step": 141045
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3174831867218018,
      "learning_rate": 0.0001964559821873092,
      "loss": 2.8626,
      "step": 141046
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.5476303100585938,
      "learning_rate": 0.0001964521430049622,
      "loss": 2.9896,
      "step": 141047
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.4453125,
      "learning_rate": 0.00019644830384186665,
      "loss": 3.0273,
      "step": 141048
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3043627738952637,
      "learning_rate": 0.0001964444646980233,
      "loss": 2.7896,
      "step": 141049
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.366853713989258,
      "learning_rate": 0.00019644062557343283,
      "loss": 3.0178,
      "step": 141050
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2033634185791016,
      "learning_rate": 0.00019643678646809603,
      "loss": 2.7526,
      "step": 141051
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.233492612838745,
      "learning_rate": 0.0001964329473820136,
      "loss": 2.9091,
      "step": 141052
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9659178256988525,
      "learning_rate": 0.00019642910831518625,
      "loss": 2.8295,
      "step": 141053
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9443601369857788,
      "learning_rate": 0.0001964252692676147,
      "loss": 2.9322,
      "step": 141054
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.8678699731826782,
      "learning_rate": 0.0001964214302392996,
      "loss": 2.8006,
      "step": 141055
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4858102798461914,
      "learning_rate": 0.00019641759123024174,
      "loss": 2.9675,
      "step": 141056
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.072782039642334,
      "learning_rate": 0.00019641375224044175,
      "loss": 3.1889,
      "step": 141057
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.282731533050537,
      "learning_rate": 0.00019640991326990044,
      "loss": 2.9673,
      "step": 141058
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.06219744682312,
      "learning_rate": 0.0001964060743186185,
      "loss": 3.2808,
      "step": 141059
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.638713836669922,
      "learning_rate": 0.00019640223538659664,
      "loss": 2.9093,
      "step": 141060
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2013099193573,
      "learning_rate": 0.0001963983964738355,
      "loss": 2.9716,
      "step": 141061
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3096044063568115,
      "learning_rate": 0.00019639455758033592,
      "loss": 2.9083,
      "step": 141062
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.168097496032715,
      "learning_rate": 0.0001963907187060985,
      "loss": 3.1248,
      "step": 141063
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5699541568756104,
      "learning_rate": 0.000196386879851124,
      "loss": 2.888,
      "step": 141064
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.181060314178467,
      "learning_rate": 0.00019638304101541316,
      "loss": 2.9641,
      "step": 141065
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2524054050445557,
      "learning_rate": 0.0001963792021989667,
      "loss": 2.9862,
      "step": 141066
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9892525672912598,
      "learning_rate": 0.00019637536340178524,
      "loss": 3.0408,
      "step": 141067
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3285815715789795,
      "learning_rate": 0.00019637152462386955,
      "loss": 3.1015,
      "step": 141068
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.404740571975708,
      "learning_rate": 0.00019636768586522044,
      "loss": 3.1193,
      "step": 141069
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.111600160598755,
      "learning_rate": 0.00019636384712583843,
      "loss": 2.87,
      "step": 141070
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1201112270355225,
      "learning_rate": 0.00019636000840572438,
      "loss": 3.1156,
      "step": 141071
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.543222665786743,
      "learning_rate": 0.000196356169704879,
      "loss": 2.5612,
      "step": 141072
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.0722339153289795,
      "learning_rate": 0.00019635233102330293,
      "loss": 2.8258,
      "step": 141073
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.8702393770217896,
      "learning_rate": 0.0001963484923609969,
      "loss": 2.7923,
      "step": 141074
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9433445930480957,
      "learning_rate": 0.0001963446537179616,
      "loss": 2.9709,
      "step": 141075
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.81024694442749,
      "learning_rate": 0.00019634081509419787,
      "loss": 2.9893,
      "step": 141076
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.6536476612091064,
      "learning_rate": 0.00019633697648970634,
      "loss": 3.2626,
      "step": 141077
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.9298150539398193,
      "learning_rate": 0.00019633313790448772,
      "loss": 2.9598,
      "step": 141078
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.112220287322998,
      "learning_rate": 0.0001963292993385427,
      "loss": 2.8411,
      "step": 141079
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.651200532913208,
      "learning_rate": 0.00019632546079187196,
      "loss": 2.9868,
      "step": 141080
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.759129762649536,
      "learning_rate": 0.00019632162226447634,
      "loss": 2.9381,
      "step": 141081
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.886080026626587,
      "learning_rate": 0.00019631778375635647,
      "loss": 2.9864,
      "step": 141082
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4767706394195557,
      "learning_rate": 0.0001963139452675131,
      "loss": 3.1153,
      "step": 141083
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1359899044036865,
      "learning_rate": 0.00019631010679794697,
      "loss": 2.8418,
      "step": 141084
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.8773746490478516,
      "learning_rate": 0.00019630626834765867,
      "loss": 3.042,
      "step": 141085
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.8143301010131836,
      "learning_rate": 0.000196302429916649,
      "loss": 2.9047,
      "step": 141086
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5355722904205322,
      "learning_rate": 0.0001962985915049187,
      "loss": 3.0844,
      "step": 141087
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0559072494506836,
      "learning_rate": 0.0001962947531124684,
      "loss": 2.7762,
      "step": 141088
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.199479103088379,
      "learning_rate": 0.00019629091473929885,
      "loss": 3.0075,
      "step": 141089
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4654765129089355,
      "learning_rate": 0.00019628707638541092,
      "loss": 2.8114,
      "step": 141090
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7876620292663574,
      "learning_rate": 0.0001962832380508051,
      "loss": 3.0791,
      "step": 141091
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.160604953765869,
      "learning_rate": 0.00019627939973548212,
      "loss": 2.9765,
      "step": 141092
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3052313327789307,
      "learning_rate": 0.00019627556143944278,
      "loss": 3.3495,
      "step": 141093
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.133417129516602,
      "learning_rate": 0.00019627172316268778,
      "loss": 2.9925,
      "step": 141094
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1307356357574463,
      "learning_rate": 0.00019626788490521784,
      "loss": 2.9604,
      "step": 141095
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7112576961517334,
      "learning_rate": 0.00019626404666703374,
      "loss": 2.6769,
      "step": 141096
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.043543338775635,
      "learning_rate": 0.00019626020844813603,
      "loss": 2.9919,
      "step": 141097
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.176914691925049,
      "learning_rate": 0.0001962563702485255,
      "loss": 3.0283,
      "step": 141098
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.626952886581421,
      "learning_rate": 0.00019625253206820287,
      "loss": 3.0351,
      "step": 141099
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.2660725116729736,
      "learning_rate": 0.0001962486939071688,
      "loss": 2.9171,
      "step": 141100
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2027246952056885,
      "learning_rate": 0.00019624485576542411,
      "loss": 3.1829,
      "step": 141101
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.491881847381592,
      "learning_rate": 0.00019624101764296957,
      "loss": 3.0844,
      "step": 141102
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.602069616317749,
      "learning_rate": 0.00019623717953980567,
      "loss": 3.2061,
      "step": 141103
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.3310635089874268,
      "learning_rate": 0.0001962333414559332,
      "loss": 2.8908,
      "step": 141104
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.067279815673828,
      "learning_rate": 0.00019622950339135296,
      "loss": 3.0476,
      "step": 141105
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1519784927368164,
      "learning_rate": 0.00019622566534606555,
      "loss": 2.7964,
      "step": 141106
    },
    {
      "epoch": 1.84,
      "grad_norm": 5.694123268127441,
      "learning_rate": 0.0001962218273200718,
      "loss": 2.8934,
      "step": 141107
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.299760818481445,
      "learning_rate": 0.00019621798931337248,
      "loss": 3.0355,
      "step": 141108
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.680050849914551,
      "learning_rate": 0.00019621415132596806,
      "loss": 2.7799,
      "step": 141109
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.10261869430542,
      "learning_rate": 0.00019621031335785943,
      "loss": 2.7546,
      "step": 141110
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.30972957611084,
      "learning_rate": 0.00019620647540904724,
      "loss": 3.1298,
      "step": 141111
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.1876463890075684,
      "learning_rate": 0.0001962026374795322,
      "loss": 2.8231,
      "step": 141112
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.604176044464111,
      "learning_rate": 0.00019619879956931508,
      "loss": 2.655,
      "step": 141113
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.135622501373291,
      "learning_rate": 0.00019619496167839668,
      "loss": 3.2022,
      "step": 141114
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7088186740875244,
      "learning_rate": 0.00019619112380677746,
      "loss": 2.8055,
      "step": 141115
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.431338310241699,
      "learning_rate": 0.00019618728595445826,
      "loss": 2.8085,
      "step": 141116
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.168070316314697,
      "learning_rate": 0.0001961834481214398,
      "loss": 3.1394,
      "step": 141117
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.2370848655700684,
      "learning_rate": 0.00019617961030772284,
      "loss": 2.6021,
      "step": 141118
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.590561389923096,
      "learning_rate": 0.00019617577251330805,
      "loss": 3.1097,
      "step": 141119
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.457252264022827,
      "learning_rate": 0.00019617193473819622,
      "loss": 2.9067,
      "step": 141120
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.9832987785339355,
      "learning_rate": 0.00019616809698238787,
      "loss": 3.0888,
      "step": 141121
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.633010387420654,
      "learning_rate": 0.00019616425924588388,
      "loss": 2.8462,
      "step": 141122
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.9705116748809814,
      "learning_rate": 0.00019616042152868486,
      "loss": 3.2355,
      "step": 141123
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2233519554138184,
      "learning_rate": 0.0001961565838307916,
      "loss": 2.7106,
      "step": 141124
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.006814956665039,
      "learning_rate": 0.0001961527461522048,
      "loss": 2.9306,
      "step": 141125
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1786084175109863,
      "learning_rate": 0.00019614890849292517,
      "loss": 3.1162,
      "step": 141126
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.191429853439331,
      "learning_rate": 0.0001961450708529535,
      "loss": 3.0535,
      "step": 141127
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2287607192993164,
      "learning_rate": 0.00019614123323229032,
      "loss": 3.0096,
      "step": 141128
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.225339651107788,
      "learning_rate": 0.0001961373956309365,
      "loss": 3.0742,
      "step": 141129
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4618380069732666,
      "learning_rate": 0.0001961335580488926,
      "loss": 2.9424,
      "step": 141130
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4809134006500244,
      "learning_rate": 0.0001961297204861595,
      "loss": 3.0043,
      "step": 141131
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.188539743423462,
      "learning_rate": 0.00019612588294273781,
      "loss": 2.8701,
      "step": 141132
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.8694489002227783,
      "learning_rate": 0.00019612204541862843,
      "loss": 2.7165,
      "step": 141133
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.515726327896118,
      "learning_rate": 0.0001961182079138318,
      "loss": 3.0103,
      "step": 141134
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4288012981414795,
      "learning_rate": 0.00019611437042834872,
      "loss": 3.0328,
      "step": 141135
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9046391248703003,
      "learning_rate": 0.00019611053296218,
      "loss": 2.8191,
      "step": 141136
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2386507987976074,
      "learning_rate": 0.00019610669551532625,
      "loss": 2.8633,
      "step": 141137
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9382696151733398,
      "learning_rate": 0.00019610285808778822,
      "loss": 2.7309,
      "step": 141138
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.027961492538452,
      "learning_rate": 0.0001960990206795668,
      "loss": 2.8189,
      "step": 141139
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.216456890106201,
      "learning_rate": 0.00019609518329066235,
      "loss": 2.7869,
      "step": 141140
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0134236812591553,
      "learning_rate": 0.0001960913459210758,
      "loss": 2.973,
      "step": 141141
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.6180036067962646,
      "learning_rate": 0.00019608750857080782,
      "loss": 2.9492,
      "step": 141142
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0147294998168945,
      "learning_rate": 0.00019608367123985916,
      "loss": 2.9959,
      "step": 141143
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1317906379699707,
      "learning_rate": 0.00019607983392823047,
      "loss": 3.1709,
      "step": 141144
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9574241638183594,
      "learning_rate": 0.0001960759966359226,
      "loss": 3.0958,
      "step": 141145
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0765798091888428,
      "learning_rate": 0.0001960721593629361,
      "loss": 2.7836,
      "step": 141146
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.848670244216919,
      "learning_rate": 0.00019606832210927174,
      "loss": 3.0625,
      "step": 141147
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.083775043487549,
      "learning_rate": 0.00019606448487493024,
      "loss": 3.129,
      "step": 141148
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.307722330093384,
      "learning_rate": 0.00019606064765991226,
      "loss": 3.1422,
      "step": 141149
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.027022361755371,
      "learning_rate": 0.00019605681046421864,
      "loss": 2.9232,
      "step": 141150
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1246979236602783,
      "learning_rate": 0.00019605297328785003,
      "loss": 3.097,
      "step": 141151
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.8902522325515747,
      "learning_rate": 0.00019604913613080713,
      "loss": 2.9871,
      "step": 141152
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.202260971069336,
      "learning_rate": 0.0001960452989930907,
      "loss": 2.9696,
      "step": 141153
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7379448413848877,
      "learning_rate": 0.00019604146187470134,
      "loss": 2.9434,
      "step": 141154
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.352077007293701,
      "learning_rate": 0.0001960376247756398,
      "loss": 2.805,
      "step": 141155
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4337806701660156,
      "learning_rate": 0.00019603378769590687,
      "loss": 2.9227,
      "step": 141156
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.791921854019165,
      "learning_rate": 0.00019602995063550325,
      "loss": 2.7468,
      "step": 141157
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.949637770652771,
      "learning_rate": 0.00019602611359442962,
      "loss": 3.2192,
      "step": 141158
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4234721660614014,
      "learning_rate": 0.00019602227657268662,
      "loss": 2.8717,
      "step": 141159
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.437788248062134,
      "learning_rate": 0.00019601843957027507,
      "loss": 3.0267,
      "step": 141160
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.585279941558838,
      "learning_rate": 0.00019601460258719574,
      "loss": 3.112,
      "step": 141161
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.282761573791504,
      "learning_rate": 0.00019601076562344923,
      "loss": 2.859,
      "step": 141162
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.179596185684204,
      "learning_rate": 0.0001960069286790363,
      "loss": 3.026,
      "step": 141163
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.329446792602539,
      "learning_rate": 0.00019600309175395755,
      "loss": 2.8898,
      "step": 141164
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.298098564147949,
      "learning_rate": 0.00019599925484821383,
      "loss": 3.0284,
      "step": 141165
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5231330394744873,
      "learning_rate": 0.0001959954179618058,
      "loss": 2.9349,
      "step": 141166
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.555170774459839,
      "learning_rate": 0.0001959915810947342,
      "loss": 3.0598,
      "step": 141167
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.466445207595825,
      "learning_rate": 0.00019598774424699976,
      "loss": 2.8092,
      "step": 141168
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3145508766174316,
      "learning_rate": 0.0001959839074186032,
      "loss": 3.174,
      "step": 141169
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9100178480148315,
      "learning_rate": 0.0001959800706095451,
      "loss": 3.066,
      "step": 141170
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3596303462982178,
      "learning_rate": 0.00019597623381982626,
      "loss": 2.9613,
      "step": 141171
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.24811053276062,
      "learning_rate": 0.00019597239704944745,
      "loss": 3.0431,
      "step": 141172
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2096753120422363,
      "learning_rate": 0.00019596856029840932,
      "loss": 2.905,
      "step": 141173
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.805244207382202,
      "learning_rate": 0.00019596472356671256,
      "loss": 3.0068,
      "step": 141174
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.520552635192871,
      "learning_rate": 0.00019596088685435806,
      "loss": 2.9495,
      "step": 141175
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.034954071044922,
      "learning_rate": 0.00019595705016134632,
      "loss": 2.8317,
      "step": 141176
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5818681716918945,
      "learning_rate": 0.00019595321348767807,
      "loss": 3.1378,
      "step": 141177
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.066953420639038,
      "learning_rate": 0.0001959493768333541,
      "loss": 2.9847,
      "step": 141178
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4042694568634033,
      "learning_rate": 0.00019594554019837512,
      "loss": 3.0718,
      "step": 141179
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7813198566436768,
      "learning_rate": 0.0001959417035827418,
      "loss": 2.8697,
      "step": 141180
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9369226694107056,
      "learning_rate": 0.00019593786698645502,
      "loss": 2.7877,
      "step": 141181
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9423235654830933,
      "learning_rate": 0.00019593403040951525,
      "loss": 3.1291,
      "step": 141182
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.66803240776062,
      "learning_rate": 0.0001959301938519233,
      "loss": 2.8306,
      "step": 141183
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9781426191329956,
      "learning_rate": 0.00019592635731367986,
      "loss": 3.0331,
      "step": 141184
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.146414279937744,
      "learning_rate": 0.0001959225207947857,
      "loss": 2.8494,
      "step": 141185
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.201849937438965,
      "learning_rate": 0.00019591868429524151,
      "loss": 2.9766,
      "step": 141186
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4969968795776367,
      "learning_rate": 0.00019591484781504811,
      "loss": 2.8701,
      "step": 141187
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.255823850631714,
      "learning_rate": 0.00019591101135420596,
      "loss": 2.685,
      "step": 141188
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9730477333068848,
      "learning_rate": 0.00019590717491271598,
      "loss": 3.2318,
      "step": 141189
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3338894844055176,
      "learning_rate": 0.00019590333849057879,
      "loss": 2.9095,
      "step": 141190
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.0736865997314453,
      "learning_rate": 0.00019589950208779512,
      "loss": 2.8085,
      "step": 141191
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.848749876022339,
      "learning_rate": 0.00019589566570436573,
      "loss": 2.8872,
      "step": 141192
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3674721717834473,
      "learning_rate": 0.00019589182934029128,
      "loss": 3.143,
      "step": 141193
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.6184613704681396,
      "learning_rate": 0.0001958879929955726,
      "loss": 2.8955,
      "step": 141194
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.1813817024230957,
      "learning_rate": 0.00019588415667021024,
      "loss": 2.9202,
      "step": 141195
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1823270320892334,
      "learning_rate": 0.00019588032036420495,
      "loss": 3.0135,
      "step": 141196
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.041090488433838,
      "learning_rate": 0.00019587648407755746,
      "loss": 2.8285,
      "step": 141197
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.365943431854248,
      "learning_rate": 0.00019587264781026855,
      "loss": 3.0699,
      "step": 141198
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.415889024734497,
      "learning_rate": 0.00019586881156233885,
      "loss": 2.9799,
      "step": 141199
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.2601606845855713,
      "learning_rate": 0.0001958649753337692,
      "loss": 3.0234,
      "step": 141200
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0598392486572266,
      "learning_rate": 0.00019586113912456015,
      "loss": 3.0505,
      "step": 141201
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.6397035121917725,
      "learning_rate": 0.00019585730293471244,
      "loss": 2.8814,
      "step": 141202
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7487404346466064,
      "learning_rate": 0.0001958534667642268,
      "loss": 2.8761,
      "step": 141203
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.6116387844085693,
      "learning_rate": 0.000195849630613104,
      "loss": 3.0147,
      "step": 141204
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.891763687133789,
      "learning_rate": 0.00019584579448134475,
      "loss": 2.9174,
      "step": 141205
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4044578075408936,
      "learning_rate": 0.0001958419583689498,
      "loss": 3.2132,
      "step": 141206
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.001098871231079,
      "learning_rate": 0.0001958381222759197,
      "loss": 2.7847,
      "step": 141207
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.249022960662842,
      "learning_rate": 0.00019583428620225526,
      "loss": 3.0578,
      "step": 141208
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.512450933456421,
      "learning_rate": 0.00019583045014795719,
      "loss": 3.0518,
      "step": 141209
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.954597234725952,
      "learning_rate": 0.0001958266141130262,
      "loss": 2.8398,
      "step": 141210
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.483680009841919,
      "learning_rate": 0.00019582277809746302,
      "loss": 2.9753,
      "step": 141211
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.171160936355591,
      "learning_rate": 0.00019581894210126844,
      "loss": 2.9802,
      "step": 141212
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7958595752716064,
      "learning_rate": 0.000195815106124443,
      "loss": 2.8985,
      "step": 141213
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7601304054260254,
      "learning_rate": 0.00019581127016698747,
      "loss": 2.9578,
      "step": 141214
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.3651328086853027,
      "learning_rate": 0.0001958074342289026,
      "loss": 2.8919,
      "step": 141215
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4392526149749756,
      "learning_rate": 0.0001958035983101891,
      "loss": 2.9754,
      "step": 141216
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.001101016998291,
      "learning_rate": 0.00019579976241084773,
      "loss": 2.9642,
      "step": 141217
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5957694053649902,
      "learning_rate": 0.0001957959265308792,
      "loss": 3.0035,
      "step": 141218
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.177203416824341,
      "learning_rate": 0.00019579209067028405,
      "loss": 2.9397,
      "step": 141219
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4647064208984375,
      "learning_rate": 0.00019578825482906317,
      "loss": 2.8519,
      "step": 141220
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.612056255340576,
      "learning_rate": 0.0001957844190072172,
      "loss": 3.1295,
      "step": 141221
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.206425666809082,
      "learning_rate": 0.00019578058320474684,
      "loss": 2.955,
      "step": 141222
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.652874708175659,
      "learning_rate": 0.00019577674742165288,
      "loss": 2.8741,
      "step": 141223
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.075007915496826,
      "learning_rate": 0.00019577291165793608,
      "loss": 3.0516,
      "step": 141224
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0710296630859375,
      "learning_rate": 0.00019576907591359698,
      "loss": 2.8978,
      "step": 141225
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.385373115539551,
      "learning_rate": 0.00019576524018863635,
      "loss": 3.0538,
      "step": 141226
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9854423999786377,
      "learning_rate": 0.00019576140448305493,
      "loss": 3.0447,
      "step": 141227
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.1985526084899902,
      "learning_rate": 0.00019575756879685344,
      "loss": 3.0685,
      "step": 141228
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2672276496887207,
      "learning_rate": 0.00019575373313003257,
      "loss": 3.3819,
      "step": 141229
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.229832649230957,
      "learning_rate": 0.0001957498974825931,
      "loss": 2.9703,
      "step": 141230
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4511947631835938,
      "learning_rate": 0.00019574606185453574,
      "loss": 2.8274,
      "step": 141231
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7053117752075195,
      "learning_rate": 0.00019574222624586107,
      "loss": 2.8113,
      "step": 141232
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.7217681407928467,
      "learning_rate": 0.00019573839065656988,
      "loss": 3.1232,
      "step": 141233
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.337029457092285,
      "learning_rate": 0.0001957345550866629,
      "loss": 2.8824,
      "step": 141234
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0884687900543213,
      "learning_rate": 0.00019573071953614085,
      "loss": 3.0176,
      "step": 141235
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2999231815338135,
      "learning_rate": 0.00019572688400500444,
      "loss": 2.9126,
      "step": 141236
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0955698490142822,
      "learning_rate": 0.00019572304849325438,
      "loss": 3.0346,
      "step": 141237
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2868635654449463,
      "learning_rate": 0.0001957192130008914,
      "loss": 2.7473,
      "step": 141238
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5734915733337402,
      "learning_rate": 0.0001957153775279161,
      "loss": 2.9009,
      "step": 141239
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.8264923095703125,
      "learning_rate": 0.0001957115420743293,
      "loss": 3.0665,
      "step": 141240
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0354325771331787,
      "learning_rate": 0.00019570770664013168,
      "loss": 3.1627,
      "step": 141241
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9324393272399902,
      "learning_rate": 0.00019570387122532404,
      "loss": 2.9998,
      "step": 141242
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0128300189971924,
      "learning_rate": 0.00019570003582990698,
      "loss": 3.0059,
      "step": 141243
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.9714879989624023,
      "learning_rate": 0.0001956962004538812,
      "loss": 2.7512,
      "step": 141244
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0545711517333984,
      "learning_rate": 0.0001956923650972476,
      "loss": 2.918,
      "step": 141245
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4133286476135254,
      "learning_rate": 0.00019568852976000663,
      "loss": 3.2593,
      "step": 141246
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.151824474334717,
      "learning_rate": 0.00019568469444215916,
      "loss": 2.8507,
      "step": 141247
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0607495307922363,
      "learning_rate": 0.00019568085914370594,
      "loss": 3.0696,
      "step": 141248
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4399075508117676,
      "learning_rate": 0.0001956770238646475,
      "loss": 2.8982,
      "step": 141249
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4598920345306396,
      "learning_rate": 0.00019567318860498473,
      "loss": 2.9674,
      "step": 141250
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2294368743896484,
      "learning_rate": 0.0001956693533647183,
      "loss": 2.8426,
      "step": 141251
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5424885749816895,
      "learning_rate": 0.00019566551814384888,
      "loss": 3.1154,
      "step": 141252
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3742029666900635,
      "learning_rate": 0.00019566168294237726,
      "loss": 3.1222,
      "step": 141253
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3677947521209717,
      "learning_rate": 0.00019565784776030408,
      "loss": 2.7875,
      "step": 141254
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0998737812042236,
      "learning_rate": 0.00019565401259763005,
      "loss": 3.0092,
      "step": 141255
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2522709369659424,
      "learning_rate": 0.00019565017745435594,
      "loss": 2.9891,
      "step": 141256
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7419421672821045,
      "learning_rate": 0.00019564634233048235,
      "loss": 2.9583,
      "step": 141257
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4067320823669434,
      "learning_rate": 0.00019564250722601016,
      "loss": 2.8823,
      "step": 141258
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.219139575958252,
      "learning_rate": 0.00019563867214093995,
      "loss": 3.1919,
      "step": 141259
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.043635845184326,
      "learning_rate": 0.00019563483707527255,
      "loss": 3.0798,
      "step": 141260
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0366618633270264,
      "learning_rate": 0.0001956310020290086,
      "loss": 3.1139,
      "step": 141261
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2241785526275635,
      "learning_rate": 0.00019562716700214877,
      "loss": 3.1835,
      "step": 141262
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3337440490722656,
      "learning_rate": 0.00019562333199469377,
      "loss": 3.1701,
      "step": 141263
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.268125295639038,
      "learning_rate": 0.00019561949700664446,
      "loss": 2.8398,
      "step": 141264
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.626920223236084,
      "learning_rate": 0.00019561566203800138,
      "loss": 3.1355,
      "step": 141265
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.2949206829071045,
      "learning_rate": 0.0001956118270887653,
      "loss": 2.8576,
      "step": 141266
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.150409698486328,
      "learning_rate": 0.00019560799215893713,
      "loss": 2.9507,
      "step": 141267
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.268212080001831,
      "learning_rate": 0.00019560415724851726,
      "loss": 2.8486,
      "step": 141268
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.305068016052246,
      "learning_rate": 0.00019560032235750658,
      "loss": 3.136,
      "step": 141269
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.025602102279663,
      "learning_rate": 0.0001955964874859057,
      "loss": 2.9545,
      "step": 141270
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9894927740097046,
      "learning_rate": 0.00019559265263371548,
      "loss": 2.7956,
      "step": 141271
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0479633808135986,
      "learning_rate": 0.00019558881780093652,
      "loss": 2.8823,
      "step": 141272
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2808549404144287,
      "learning_rate": 0.00019558498298756968,
      "loss": 3.0395,
      "step": 141273
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.8237587213516235,
      "learning_rate": 0.00019558114819361548,
      "loss": 2.9146,
      "step": 141274
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.601865530014038,
      "learning_rate": 0.0001955773134190747,
      "loss": 3.0087,
      "step": 141275
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.748708963394165,
      "learning_rate": 0.00019557347866394807,
      "loss": 3.2555,
      "step": 141276
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.5587899684906006,
      "learning_rate": 0.0001955696439282363,
      "loss": 2.9327,
      "step": 141277
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.36686372756958,
      "learning_rate": 0.00019556580921194005,
      "loss": 2.739,
      "step": 141278
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3794074058532715,
      "learning_rate": 0.00019556197451506028,
      "loss": 2.7514,
      "step": 141279
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.6321487426757812,
      "learning_rate": 0.00019555813983759734,
      "loss": 3.1573,
      "step": 141280
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.6738429069519043,
      "learning_rate": 0.00019555430517955214,
      "loss": 2.8762,
      "step": 141281
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.009870767593384,
      "learning_rate": 0.00019555047054092538,
      "loss": 2.872,
      "step": 141282
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0731215476989746,
      "learning_rate": 0.0001955466359217177,
      "loss": 2.8921,
      "step": 141283
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.956274151802063,
      "learning_rate": 0.00019554280132192992,
      "loss": 2.8931,
      "step": 141284
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0485517978668213,
      "learning_rate": 0.00019553896674156284,
      "loss": 2.9771,
      "step": 141285
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1400458812713623,
      "learning_rate": 0.00019553513218061692,
      "loss": 2.7641,
      "step": 141286
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9801377058029175,
      "learning_rate": 0.00019553129763909293,
      "loss": 3.0834,
      "step": 141287
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.472933292388916,
      "learning_rate": 0.0001955274631169917,
      "loss": 2.8161,
      "step": 141288
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2861385345458984,
      "learning_rate": 0.00019552362861431386,
      "loss": 3.0335,
      "step": 141289
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1298601627349854,
      "learning_rate": 0.00019551979413106018,
      "loss": 3.008,
      "step": 141290
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.613905668258667,
      "learning_rate": 0.00019551595966723142,
      "loss": 2.9342,
      "step": 141291
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.145327568054199,
      "learning_rate": 0.00019551212522282813,
      "loss": 2.9922,
      "step": 141292
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1990268230438232,
      "learning_rate": 0.00019550829079785108,
      "loss": 2.8404,
      "step": 141293
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4488141536712646,
      "learning_rate": 0.000195504456392301,
      "loss": 3.1766,
      "step": 141294
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7977983951568604,
      "learning_rate": 0.00019550062200617867,
      "loss": 2.9483,
      "step": 141295
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.122007131576538,
      "learning_rate": 0.00019549678763948472,
      "loss": 2.986,
      "step": 141296
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.209162712097168,
      "learning_rate": 0.00019549295329222,
      "loss": 2.8873,
      "step": 141297
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.86661958694458,
      "learning_rate": 0.000195489118964385,
      "loss": 2.8659,
      "step": 141298
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.996180295944214,
      "learning_rate": 0.00019548528465598055,
      "loss": 2.7798,
      "step": 141299
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.6053900718688965,
      "learning_rate": 0.00019548145036700737,
      "loss": 2.9761,
      "step": 141300
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.0177788734436035,
      "learning_rate": 0.00019547761609746612,
      "loss": 2.9607,
      "step": 141301
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.641939878463745,
      "learning_rate": 0.00019547378184735757,
      "loss": 3.1171,
      "step": 141302
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1639931201934814,
      "learning_rate": 0.00019546994761668256,
      "loss": 3.0079,
      "step": 141303
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.085890054702759,
      "learning_rate": 0.00019546611340544152,
      "loss": 2.6814,
      "step": 141304
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.151360273361206,
      "learning_rate": 0.00019546227921363532,
      "loss": 3.0551,
      "step": 141305
    },
    {
      "epoch": 1.84,
      "grad_norm": 5.116128921508789,
      "learning_rate": 0.00019545844504126466,
      "loss": 3.0794,
      "step": 141306
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.685901403427124,
      "learning_rate": 0.00019545461088833023,
      "loss": 2.6736,
      "step": 141307
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3711371421813965,
      "learning_rate": 0.00019545077675483277,
      "loss": 2.8176,
      "step": 141308
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.13543963432312,
      "learning_rate": 0.00019544694264077308,
      "loss": 3.0543,
      "step": 141309
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.783718585968018,
      "learning_rate": 0.00019544310854615167,
      "loss": 2.9443,
      "step": 141310
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.058701515197754,
      "learning_rate": 0.0001954392744709694,
      "loss": 3.0966,
      "step": 141311
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.321798801422119,
      "learning_rate": 0.0001954354404152269,
      "loss": 3.1166,
      "step": 141312
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.668142795562744,
      "learning_rate": 0.00019543160637892492,
      "loss": 2.8608,
      "step": 141313
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5051164627075195,
      "learning_rate": 0.0001954277723620642,
      "loss": 2.8964,
      "step": 141314
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.525407552719116,
      "learning_rate": 0.00019542393836464552,
      "loss": 2.9208,
      "step": 141315
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.547534227371216,
      "learning_rate": 0.00019542010438666942,
      "loss": 2.9368,
      "step": 141316
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.128626585006714,
      "learning_rate": 0.00019541627042813666,
      "loss": 2.8002,
      "step": 141317
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.507904052734375,
      "learning_rate": 0.00019541243648904804,
      "loss": 3.1296,
      "step": 141318
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0140750408172607,
      "learning_rate": 0.00019540860256940417,
      "loss": 3.2324,
      "step": 141319
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.045226573944092,
      "learning_rate": 0.00019540476866920583,
      "loss": 2.9682,
      "step": 141320
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.104306697845459,
      "learning_rate": 0.00019540093478845382,
      "loss": 2.9771,
      "step": 141321
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.9437167644500732,
      "learning_rate": 0.00019539710092714868,
      "loss": 2.9541,
      "step": 141322
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.295989990234375,
      "learning_rate": 0.00019539326708529123,
      "loss": 2.6753,
      "step": 141323
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.40144681930542,
      "learning_rate": 0.00019538943326288206,
      "loss": 2.982,
      "step": 141324
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2486391067504883,
      "learning_rate": 0.00019538559945992198,
      "loss": 2.901,
      "step": 141325
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0634047985076904,
      "learning_rate": 0.00019538176567641174,
      "loss": 3.064,
      "step": 141326
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.793409585952759,
      "learning_rate": 0.00019537793191235194,
      "loss": 2.8307,
      "step": 141327
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.197263717651367,
      "learning_rate": 0.00019537409816774347,
      "loss": 3.1152,
      "step": 141328
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0378589630126953,
      "learning_rate": 0.00019537026444258685,
      "loss": 3.0247,
      "step": 141329
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4569942951202393,
      "learning_rate": 0.00019536643073688291,
      "loss": 3.2331,
      "step": 141330
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0434186458587646,
      "learning_rate": 0.0001953625970506323,
      "loss": 3.2044,
      "step": 141331
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4390804767608643,
      "learning_rate": 0.00019535876338383574,
      "loss": 3.0925,
      "step": 141332
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3627352714538574,
      "learning_rate": 0.000195354929736494,
      "loss": 2.9492,
      "step": 141333
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7178666591644287,
      "learning_rate": 0.00019535109610860777,
      "loss": 3.1118,
      "step": 141334
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.6615569591522217,
      "learning_rate": 0.0001953472625001777,
      "loss": 3.0567,
      "step": 141335
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.5867769718170166,
      "learning_rate": 0.00019534342891120457,
      "loss": 2.7439,
      "step": 141336
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.874783992767334,
      "learning_rate": 0.00019533959534168905,
      "loss": 3.0879,
      "step": 141337
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.018853187561035,
      "learning_rate": 0.00019533576179163193,
      "loss": 3.0416,
      "step": 141338
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1077022552490234,
      "learning_rate": 0.00019533192826103377,
      "loss": 2.9949,
      "step": 141339
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.155861854553223,
      "learning_rate": 0.0001953280947498955,
      "loss": 3.0516,
      "step": 141340
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.245412588119507,
      "learning_rate": 0.00019532426125821764,
      "loss": 3.0174,
      "step": 141341
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.037872076034546,
      "learning_rate": 0.00019532042778600098,
      "loss": 3.1311,
      "step": 141342
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5266873836517334,
      "learning_rate": 0.0001953165943332462,
      "loss": 3.0842,
      "step": 141343
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.147918224334717,
      "learning_rate": 0.0001953127608999541,
      "loss": 2.8222,
      "step": 141344
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.533268690109253,
      "learning_rate": 0.00019530892748612533,
      "loss": 3.0287,
      "step": 141345
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.286740779876709,
      "learning_rate": 0.00019530509409176064,
      "loss": 3.1411,
      "step": 141346
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9542648792266846,
      "learning_rate": 0.00019530126071686064,
      "loss": 3.1764,
      "step": 141347
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4475042819976807,
      "learning_rate": 0.0001952974273614261,
      "loss": 3.047,
      "step": 141348
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.418870449066162,
      "learning_rate": 0.00019529359402545776,
      "loss": 2.7578,
      "step": 141349
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0743141174316406,
      "learning_rate": 0.00019528976070895635,
      "loss": 2.8345,
      "step": 141350
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5360512733459473,
      "learning_rate": 0.00019528592741192247,
      "loss": 2.9198,
      "step": 141351
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2292227745056152,
      "learning_rate": 0.00019528209413435712,
      "loss": 2.9916,
      "step": 141352
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.220210552215576,
      "learning_rate": 0.00019527826087626063,
      "loss": 2.9834,
      "step": 141353
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.952761173248291,
      "learning_rate": 0.00019527442763763388,
      "loss": 3.0964,
      "step": 141354
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7094345092773438,
      "learning_rate": 0.00019527059441847762,
      "loss": 3.1024,
      "step": 141355
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.326075553894043,
      "learning_rate": 0.00019526676121879255,
      "loss": 2.8759,
      "step": 141356
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4089608192443848,
      "learning_rate": 0.00019526292803857934,
      "loss": 3.0306,
      "step": 141357
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.6297526359558105,
      "learning_rate": 0.00019525909487783883,
      "loss": 2.9809,
      "step": 141358
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.199749231338501,
      "learning_rate": 0.00019525526173657154,
      "loss": 3.0453,
      "step": 141359
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0768229961395264,
      "learning_rate": 0.00019525142861477826,
      "loss": 3.1973,
      "step": 141360
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.267277240753174,
      "learning_rate": 0.00019524759551245972,
      "loss": 3.0573,
      "step": 141361
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.6670925617218018,
      "learning_rate": 0.00019524376242961665,
      "loss": 2.8903,
      "step": 141362
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.210148572921753,
      "learning_rate": 0.00019523992936624972,
      "loss": 3.1134,
      "step": 141363
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.6604020595550537,
      "learning_rate": 0.00019523609632235978,
      "loss": 3.2481,
      "step": 141364
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1799678802490234,
      "learning_rate": 0.00019523226329794732,
      "loss": 2.8604,
      "step": 141365
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.752122640609741,
      "learning_rate": 0.00019522843029301314,
      "loss": 2.9037,
      "step": 141366
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.224130153656006,
      "learning_rate": 0.00019522459730755802,
      "loss": 2.9563,
      "step": 141367
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3671319484710693,
      "learning_rate": 0.00019522076434158256,
      "loss": 2.76,
      "step": 141368
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5538792610168457,
      "learning_rate": 0.00019521693139508756,
      "loss": 2.848,
      "step": 141369
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.1206037998199463,
      "learning_rate": 0.00019521309846807385,
      "loss": 3.0286,
      "step": 141370
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.032970428466797,
      "learning_rate": 0.00019520926556054188,
      "loss": 2.913,
      "step": 141371
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9472519159317017,
      "learning_rate": 0.0001952054326724925,
      "loss": 2.8303,
      "step": 141372
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1852681636810303,
      "learning_rate": 0.0001952015998039264,
      "loss": 2.9708,
      "step": 141373
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.093313694000244,
      "learning_rate": 0.0001951977669548443,
      "loss": 2.9006,
      "step": 141374
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4739813804626465,
      "learning_rate": 0.00019519393412524688,
      "loss": 2.7524,
      "step": 141375
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.251335620880127,
      "learning_rate": 0.00019519010131513503,
      "loss": 2.9052,
      "step": 141376
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9325577020645142,
      "learning_rate": 0.0001951862685245092,
      "loss": 2.8476,
      "step": 141377
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7562708854675293,
      "learning_rate": 0.0001951824357533702,
      "loss": 2.9127,
      "step": 141378
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.300419807434082,
      "learning_rate": 0.00019517860300171882,
      "loss": 3.1673,
      "step": 141379
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1893508434295654,
      "learning_rate": 0.00019517477026955563,
      "loss": 3.0784,
      "step": 141380
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2221810817718506,
      "learning_rate": 0.0001951709375568815,
      "loss": 2.8982,
      "step": 141381
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3166663646698,
      "learning_rate": 0.00019516710486369718,
      "loss": 2.8713,
      "step": 141382
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2554855346679688,
      "learning_rate": 0.00019516327219000314,
      "loss": 3.1268,
      "step": 141383
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9899019002914429,
      "learning_rate": 0.00019515943953580023,
      "loss": 3.0656,
      "step": 141384
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1556339263916016,
      "learning_rate": 0.00019515560690108917,
      "loss": 3.0647,
      "step": 141385
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2053847312927246,
      "learning_rate": 0.00019515177428587062,
      "loss": 3.0183,
      "step": 141386
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.8398146629333496,
      "learning_rate": 0.00019514794169014538,
      "loss": 2.9565,
      "step": 141387
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.4283251762390137,
      "learning_rate": 0.0001951441091139142,
      "loss": 2.9935,
      "step": 141388
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2374558448791504,
      "learning_rate": 0.00019514027655717764,
      "loss": 2.9499,
      "step": 141389
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.9034249782562256,
      "learning_rate": 0.00019513644401993646,
      "loss": 2.8152,
      "step": 141390
    },
    {
      "epoch": 1.84,
      "grad_norm": 5.231801986694336,
      "learning_rate": 0.00019513261150219137,
      "loss": 3.0877,
      "step": 141391
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3885247707366943,
      "learning_rate": 0.00019512877900394315,
      "loss": 2.861,
      "step": 141392
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.316007614135742,
      "learning_rate": 0.00019512494652519242,
      "loss": 2.9618,
      "step": 141393
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.399580478668213,
      "learning_rate": 0.00019512111406593997,
      "loss": 2.8834,
      "step": 141394
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4768290519714355,
      "learning_rate": 0.00019511728162618664,
      "loss": 2.8497,
      "step": 141395
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5052578449249268,
      "learning_rate": 0.00019511344920593283,
      "loss": 2.9825,
      "step": 141396
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.222168445587158,
      "learning_rate": 0.00019510961680517937,
      "loss": 2.7954,
      "step": 141397
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.07702898979187,
      "learning_rate": 0.00019510578442392707,
      "loss": 2.6581,
      "step": 141398
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3762192726135254,
      "learning_rate": 0.00019510195206217656,
      "loss": 3.025,
      "step": 141399
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.283012866973877,
      "learning_rate": 0.00019509811971992857,
      "loss": 2.8009,
      "step": 141400
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2757558822631836,
      "learning_rate": 0.00019509428739718394,
      "loss": 3.0953,
      "step": 141401
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5021562576293945,
      "learning_rate": 0.00019509045509394317,
      "loss": 2.8173,
      "step": 141402
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.49074125289917,
      "learning_rate": 0.00019508662281020705,
      "loss": 2.8104,
      "step": 141403
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.08449649810791,
      "learning_rate": 0.0001950827905459763,
      "loss": 2.7576,
      "step": 141404
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.328636646270752,
      "learning_rate": 0.00019507895830125164,
      "loss": 2.9396,
      "step": 141405
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.3134658336639404,
      "learning_rate": 0.0001950751260760338,
      "loss": 2.9984,
      "step": 141406
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.102919578552246,
      "learning_rate": 0.00019507129387032356,
      "loss": 2.9478,
      "step": 141407
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.891890525817871,
      "learning_rate": 0.00019506746168412146,
      "loss": 2.682,
      "step": 141408
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.8039517402648926,
      "learning_rate": 0.0001950636295174283,
      "loss": 2.9454,
      "step": 141409
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0999810695648193,
      "learning_rate": 0.0001950597973702448,
      "loss": 2.9044,
      "step": 141410
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.8052897453308105,
      "learning_rate": 0.00019505596524257163,
      "loss": 3.2261,
      "step": 141411
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.8155322074890137,
      "learning_rate": 0.00019505213313440953,
      "loss": 2.8914,
      "step": 141412
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4553542137145996,
      "learning_rate": 0.00019504830104575933,
      "loss": 2.824,
      "step": 141413
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3656444549560547,
      "learning_rate": 0.00019504446897662152,
      "loss": 2.916,
      "step": 141414
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0195531845092773,
      "learning_rate": 0.00019504063692699704,
      "loss": 2.9288,
      "step": 141415
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.634888172149658,
      "learning_rate": 0.0001950368048968864,
      "loss": 2.6393,
      "step": 141416
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.7905781269073486,
      "learning_rate": 0.00019503297288629037,
      "loss": 2.8546,
      "step": 141417
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0708537101745605,
      "learning_rate": 0.00019502914089520972,
      "loss": 2.907,
      "step": 141418
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.268296241760254,
      "learning_rate": 0.0001950253089236452,
      "loss": 2.9592,
      "step": 141419
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0066471099853516,
      "learning_rate": 0.00019502147697159737,
      "loss": 2.8682,
      "step": 141420
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.3162155151367188,
      "learning_rate": 0.00019501764503906705,
      "loss": 2.8373,
      "step": 141421
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.896354675292969,
      "learning_rate": 0.000195013813126055,
      "loss": 2.623,
      "step": 141422
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.232581853866577,
      "learning_rate": 0.0001950099812325618,
      "loss": 3.0293,
      "step": 141423
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3626863956451416,
      "learning_rate": 0.00019500614935858822,
      "loss": 3.1425,
      "step": 141424
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.416520833969116,
      "learning_rate": 0.00019500231750413502,
      "loss": 2.8047,
      "step": 141425
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.966221332550049,
      "learning_rate": 0.00019499848566920287,
      "loss": 2.847,
      "step": 141426
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.703552484512329,
      "learning_rate": 0.00019499465385379244,
      "loss": 3.0143,
      "step": 141427
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.418858289718628,
      "learning_rate": 0.00019499082205790444,
      "loss": 2.8075,
      "step": 141428
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.1083452701568604,
      "learning_rate": 0.00019498699028153976,
      "loss": 3.0299,
      "step": 141429
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3764376640319824,
      "learning_rate": 0.00019498315852469887,
      "loss": 2.9041,
      "step": 141430
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.272754192352295,
      "learning_rate": 0.0001949793267873827,
      "loss": 2.8446,
      "step": 141431
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.186779737472534,
      "learning_rate": 0.00019497549506959178,
      "loss": 2.7859,
      "step": 141432
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9196046590805054,
      "learning_rate": 0.0001949716633713269,
      "loss": 2.9158,
      "step": 141433
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7644550800323486,
      "learning_rate": 0.00019496783169258873,
      "loss": 2.8056,
      "step": 141434
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0660102367401123,
      "learning_rate": 0.00019496400003337808,
      "loss": 2.9829,
      "step": 141435
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.338047504425049,
      "learning_rate": 0.00019496016839369565,
      "loss": 3.1096,
      "step": 141436
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1435341835021973,
      "learning_rate": 0.0001949563367735421,
      "loss": 2.8614,
      "step": 141437
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.854189395904541,
      "learning_rate": 0.00019495250517291806,
      "loss": 3.0397,
      "step": 141438
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.913731336593628,
      "learning_rate": 0.0001949486735918244,
      "loss": 2.9491,
      "step": 141439
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.499173164367676,
      "learning_rate": 0.00019494484203026171,
      "loss": 2.9502,
      "step": 141440
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.560473442077637,
      "learning_rate": 0.0001949410104882308,
      "loss": 2.6075,
      "step": 141441
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4289000034332275,
      "learning_rate": 0.00019493717896573232,
      "loss": 2.9612,
      "step": 141442
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5483574867248535,
      "learning_rate": 0.00019493334746276708,
      "loss": 2.9403,
      "step": 141443
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4760901927948,
      "learning_rate": 0.0001949295159793356,
      "loss": 2.8445,
      "step": 141444
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5171170234680176,
      "learning_rate": 0.0001949256845154388,
      "loss": 2.9387,
      "step": 141445
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.297339677810669,
      "learning_rate": 0.0001949218530710772,
      "loss": 2.8829,
      "step": 141446
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2018938064575195,
      "learning_rate": 0.00019491802164625164,
      "loss": 2.8936,
      "step": 141447
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.600816249847412,
      "learning_rate": 0.00019491419024096282,
      "loss": 2.9834,
      "step": 141448
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4024198055267334,
      "learning_rate": 0.00019491035885521154,
      "loss": 3.0063,
      "step": 141449
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.071079969406128,
      "learning_rate": 0.0001949065274889983,
      "loss": 2.8541,
      "step": 141450
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5034170150756836,
      "learning_rate": 0.0001949026961423239,
      "loss": 2.8549,
      "step": 141451
    },
    {
      "epoch": 1.84,
      "grad_norm": 5.053578853607178,
      "learning_rate": 0.00019489886481518907,
      "loss": 2.8707,
      "step": 141452
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.107861042022705,
      "learning_rate": 0.00019489503350759456,
      "loss": 3.0518,
      "step": 141453
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.8197758197784424,
      "learning_rate": 0.000194891202219541,
      "loss": 2.9323,
      "step": 141454
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.670555591583252,
      "learning_rate": 0.0001948873709510293,
      "loss": 2.7714,
      "step": 141455
    },
    {
      "epoch": 1.84,
      "grad_norm": 5.288483142852783,
      "learning_rate": 0.0001948835397020599,
      "loss": 2.958,
      "step": 141456
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.028173446655273,
      "learning_rate": 0.00019487970847263366,
      "loss": 2.9063,
      "step": 141457
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.592461347579956,
      "learning_rate": 0.00019487587726275125,
      "loss": 3.0812,
      "step": 141458
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0858824253082275,
      "learning_rate": 0.00019487204607241337,
      "loss": 3.2519,
      "step": 141459
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.654643535614014,
      "learning_rate": 0.00019486821490162078,
      "loss": 3.1536,
      "step": 141460
    },
    {
      "epoch": 1.84,
      "grad_norm": 7.003575325012207,
      "learning_rate": 0.00019486438375037414,
      "loss": 3.04,
      "step": 141461
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.5995728969573975,
      "learning_rate": 0.0001948605526186744,
      "loss": 3.1029,
      "step": 141462
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.281519651412964,
      "learning_rate": 0.00019485672150652188,
      "loss": 3.0657,
      "step": 141463
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.436809539794922,
      "learning_rate": 0.00019485289041391748,
      "loss": 3.1386,
      "step": 141464
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.905471086502075,
      "learning_rate": 0.00019484905934086193,
      "loss": 2.8981,
      "step": 141465
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.4901983737945557,
      "learning_rate": 0.00019484522828735592,
      "loss": 2.9304,
      "step": 141466
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.304198980331421,
      "learning_rate": 0.00019484139725340016,
      "loss": 3.024,
      "step": 141467
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.221985340118408,
      "learning_rate": 0.0001948375662389955,
      "loss": 2.9048,
      "step": 141468
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.203435182571411,
      "learning_rate": 0.00019483373524414242,
      "loss": 2.9271,
      "step": 141469
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.057966709136963,
      "learning_rate": 0.0001948299042688417,
      "loss": 3.1398,
      "step": 141470
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.9584712982177734,
      "learning_rate": 0.00019482607331309412,
      "loss": 3.1594,
      "step": 141471
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.373629331588745,
      "learning_rate": 0.00019482224237690033,
      "loss": 2.8002,
      "step": 141472
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.0469181537628174,
      "learning_rate": 0.0001948184114602611,
      "loss": 2.8669,
      "step": 141473
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.590075731277466,
      "learning_rate": 0.0001948145805631772,
      "loss": 2.7622,
      "step": 141474
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.452892541885376,
      "learning_rate": 0.00019481074968564913,
      "loss": 2.968,
      "step": 141475
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.8359822034835815,
      "learning_rate": 0.00019480691882767777,
      "loss": 3.0413,
      "step": 141476
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9229093790054321,
      "learning_rate": 0.00019480308798926373,
      "loss": 3.0625,
      "step": 141477
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1041452884674072,
      "learning_rate": 0.00019479925717040783,
      "loss": 2.9788,
      "step": 141478
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3093669414520264,
      "learning_rate": 0.00019479542637111072,
      "loss": 2.9785,
      "step": 141479
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1734325885772705,
      "learning_rate": 0.00019479159559137327,
      "loss": 3.0622,
      "step": 141480
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0280027389526367,
      "learning_rate": 0.00019478776483119586,
      "loss": 3.096,
      "step": 141481
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.100757598876953,
      "learning_rate": 0.00019478393409057946,
      "loss": 2.9828,
      "step": 141482
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2386512756347656,
      "learning_rate": 0.0001947801033695247,
      "loss": 2.6792,
      "step": 141483
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.229715585708618,
      "learning_rate": 0.00019477627266803227,
      "loss": 3.205,
      "step": 141484
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.255842685699463,
      "learning_rate": 0.00019477244198610295,
      "loss": 2.5935,
      "step": 141485
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.317479372024536,
      "learning_rate": 0.00019476861132373754,
      "loss": 3.1179,
      "step": 141486
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.467371940612793,
      "learning_rate": 0.0001947647806809365,
      "loss": 2.9511,
      "step": 141487
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.454637050628662,
      "learning_rate": 0.00019476095005770065,
      "loss": 2.883,
      "step": 141488
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.38454270362854,
      "learning_rate": 0.00019475711945403075,
      "loss": 2.9753,
      "step": 141489
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0042614936828613,
      "learning_rate": 0.00019475328886992753,
      "loss": 3.0452,
      "step": 141490
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.327071189880371,
      "learning_rate": 0.00019474945830539158,
      "loss": 3.208,
      "step": 141491
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.169194221496582,
      "learning_rate": 0.00019474562776042385,
      "loss": 2.9945,
      "step": 141492
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.002925395965576,
      "learning_rate": 0.00019474179723502478,
      "loss": 3.0076,
      "step": 141493
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.7558443546295166,
      "learning_rate": 0.0001947379667291952,
      "loss": 2.9061,
      "step": 141494
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.504023551940918,
      "learning_rate": 0.0001947341362429358,
      "loss": 3.1625,
      "step": 141495
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4758052825927734,
      "learning_rate": 0.00019473030577624731,
      "loss": 3.2109,
      "step": 141496
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.9138903617858887,
      "learning_rate": 0.00019472647532913049,
      "loss": 2.909,
      "step": 141497
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.184134006500244,
      "learning_rate": 0.000194722644901586,
      "loss": 3.054,
      "step": 141498
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.557981014251709,
      "learning_rate": 0.0001947188144936146,
      "loss": 2.8564,
      "step": 141499
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0597686767578125,
      "learning_rate": 0.0001947149841052169,
      "loss": 2.9567,
      "step": 141500
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3456013202667236,
      "learning_rate": 0.00019471115373639368,
      "loss": 2.9862,
      "step": 141501
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9783378839492798,
      "learning_rate": 0.00019470732338714558,
      "loss": 2.8935,
      "step": 141502
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.365616798400879,
      "learning_rate": 0.00019470349305747347,
      "loss": 2.971,
      "step": 141503
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4094791412353516,
      "learning_rate": 0.00019469966274737795,
      "loss": 2.9409,
      "step": 141504
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.9622151851654053,
      "learning_rate": 0.00019469583245685973,
      "loss": 2.9168,
      "step": 141505
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2336208820343018,
      "learning_rate": 0.00019469200218591952,
      "loss": 3.2155,
      "step": 141506
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.791745662689209,
      "learning_rate": 0.0001946881719345581,
      "loss": 2.8305,
      "step": 141507
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.129937171936035,
      "learning_rate": 0.00019468434170277614,
      "loss": 3.1235,
      "step": 141508
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9985806941986084,
      "learning_rate": 0.0001946805114905743,
      "loss": 3.0496,
      "step": 141509
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.555824041366577,
      "learning_rate": 0.00019467668129795343,
      "loss": 2.8433,
      "step": 141510
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4010729789733887,
      "learning_rate": 0.00019467285112491405,
      "loss": 3.2228,
      "step": 141511
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4544966220855713,
      "learning_rate": 0.00019466902097145704,
      "loss": 2.8487,
      "step": 141512
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3589296340942383,
      "learning_rate": 0.00019466519083758297,
      "loss": 3.0528,
      "step": 141513
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.815988302230835,
      "learning_rate": 0.00019466136072329273,
      "loss": 2.8648,
      "step": 141514
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.8525443077087402,
      "learning_rate": 0.00019465753062858688,
      "loss": 2.8105,
      "step": 141515
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.356149196624756,
      "learning_rate": 0.0001946537005534662,
      "loss": 3.0768,
      "step": 141516
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4257073402404785,
      "learning_rate": 0.0001946498704979314,
      "loss": 2.9183,
      "step": 141517
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4339263439178467,
      "learning_rate": 0.00019464604046198314,
      "loss": 3.0162,
      "step": 141518
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.1028618812561035,
      "learning_rate": 0.00019464221044562213,
      "loss": 2.9531,
      "step": 141519
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.6680564880371094,
      "learning_rate": 0.00019463838044884917,
      "loss": 3.0192,
      "step": 141520
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9787794351577759,
      "learning_rate": 0.00019463455047166498,
      "loss": 3.0335,
      "step": 141521
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3783175945281982,
      "learning_rate": 0.0001946307205140702,
      "loss": 2.928,
      "step": 141522
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.9479763507843018,
      "learning_rate": 0.00019462689057606552,
      "loss": 2.8944,
      "step": 141523
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4831480979919434,
      "learning_rate": 0.00019462306065765164,
      "loss": 2.898,
      "step": 141524
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.8575496673583984,
      "learning_rate": 0.00019461923075882938,
      "loss": 2.8585,
      "step": 141525
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.324622392654419,
      "learning_rate": 0.00019461540087959935,
      "loss": 3.0788,
      "step": 141526
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.576740026473999,
      "learning_rate": 0.00019461157101996235,
      "loss": 2.9305,
      "step": 141527
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4595096111297607,
      "learning_rate": 0.00019460774117991908,
      "loss": 3.0718,
      "step": 141528
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.6423068046569824,
      "learning_rate": 0.00019460391135947024,
      "loss": 3.0677,
      "step": 141529
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9009613990783691,
      "learning_rate": 0.00019460008155861646,
      "loss": 2.9277,
      "step": 141530
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4602482318878174,
      "learning_rate": 0.00019459625177735846,
      "loss": 2.8471,
      "step": 141531
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.618170738220215,
      "learning_rate": 0.00019459242201569707,
      "loss": 3.0224,
      "step": 141532
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.3011207580566406,
      "learning_rate": 0.00019458859227363293,
      "loss": 2.903,
      "step": 141533
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.378329277038574,
      "learning_rate": 0.00019458476255116675,
      "loss": 3.21,
      "step": 141534
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.723249912261963,
      "learning_rate": 0.00019458093284829936,
      "loss": 3.0035,
      "step": 141535
    },
    {
      "epoch": 1.84,
      "grad_norm": 4.026178359985352,
      "learning_rate": 0.00019457710316503126,
      "loss": 2.974,
      "step": 141536
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.3766672611236572,
      "learning_rate": 0.00019457327350136324,
      "loss": 2.9598,
      "step": 141537
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.074711322784424,
      "learning_rate": 0.0001945694438572961,
      "loss": 2.942,
      "step": 141538
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0472590923309326,
      "learning_rate": 0.00019456561423283043,
      "loss": 3.1775,
      "step": 141539
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.016331672668457,
      "learning_rate": 0.000194561784627967,
      "loss": 2.9799,
      "step": 141540
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.200136423110962,
      "learning_rate": 0.00019455795504270672,
      "loss": 2.9563,
      "step": 141541
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.156583786010742,
      "learning_rate": 0.00019455412547704991,
      "loss": 2.8454,
      "step": 141542
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.6661789417266846,
      "learning_rate": 0.00019455029593099752,
      "loss": 2.6193,
      "step": 141543
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5380337238311768,
      "learning_rate": 0.00019454646640455018,
      "loss": 2.6346,
      "step": 141544
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2197940349578857,
      "learning_rate": 0.00019454263689770867,
      "loss": 2.7433,
      "step": 141545
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3487656116485596,
      "learning_rate": 0.00019453880741047367,
      "loss": 2.9751,
      "step": 141546
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.426020383834839,
      "learning_rate": 0.00019453497794284604,
      "loss": 3.0138,
      "step": 141547
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2198946475982666,
      "learning_rate": 0.00019453114849482617,
      "loss": 2.9391,
      "step": 141548
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.443732976913452,
      "learning_rate": 0.000194527319066415,
      "loss": 3.0001,
      "step": 141549
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.735427737236023,
      "learning_rate": 0.00019452348965761322,
      "loss": 2.8163,
      "step": 141550
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.127511739730835,
      "learning_rate": 0.00019451966026842147,
      "loss": 2.8694,
      "step": 141551
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.205583333969116,
      "learning_rate": 0.0001945158308988405,
      "loss": 2.9489,
      "step": 141552
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4345998764038086,
      "learning_rate": 0.00019451200154887115,
      "loss": 3.0422,
      "step": 141553
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5093233585357666,
      "learning_rate": 0.00019450817221851392,
      "loss": 2.9062,
      "step": 141554
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0566225051879883,
      "learning_rate": 0.00019450434290776956,
      "loss": 2.8577,
      "step": 141555
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9422147274017334,
      "learning_rate": 0.00019450051361663887,
      "loss": 3.1989,
      "step": 141556
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5509347915649414,
      "learning_rate": 0.0001944966843451225,
      "loss": 2.9818,
      "step": 141557
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.6346466541290283,
      "learning_rate": 0.0001944928550932212,
      "loss": 3.0043,
      "step": 141558
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5765135288238525,
      "learning_rate": 0.00019448902586093584,
      "loss": 2.7028,
      "step": 141559
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0026094913482666,
      "learning_rate": 0.00019448519664826678,
      "loss": 2.9359,
      "step": 141560
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3868801593780518,
      "learning_rate": 0.00019448136745521495,
      "loss": 3.0508,
      "step": 141561
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.557236909866333,
      "learning_rate": 0.00019447753828178102,
      "loss": 2.8291,
      "step": 141562
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2990405559539795,
      "learning_rate": 0.00019447370912796565,
      "loss": 2.9945,
      "step": 141563
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1357314586639404,
      "learning_rate": 0.00019446987999376965,
      "loss": 3.1213,
      "step": 141564
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7272560596466064,
      "learning_rate": 0.00019446605087919383,
      "loss": 2.9927,
      "step": 141565
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0543787479400635,
      "learning_rate": 0.00019446222178423866,
      "loss": 3.029,
      "step": 141566
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.848702311515808,
      "learning_rate": 0.00019445839270890492,
      "loss": 2.9228,
      "step": 141567
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.142632484436035,
      "learning_rate": 0.00019445456365319337,
      "loss": 2.9058,
      "step": 141568
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.452242136001587,
      "learning_rate": 0.0001944507346171047,
      "loss": 3.2638,
      "step": 141569
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.4629831314086914,
      "learning_rate": 0.0001944469056006396,
      "loss": 2.8333,
      "step": 141570
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1518895626068115,
      "learning_rate": 0.000194443076603799,
      "loss": 3.1072,
      "step": 141571
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.302485466003418,
      "learning_rate": 0.00019443924762658325,
      "loss": 2.9359,
      "step": 141572
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9720640182495117,
      "learning_rate": 0.00019443541866899326,
      "loss": 3.1806,
      "step": 141573
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9150078296661377,
      "learning_rate": 0.0001944315897310297,
      "loss": 2.9011,
      "step": 141574
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1953418254852295,
      "learning_rate": 0.0001944277608126933,
      "loss": 3.0735,
      "step": 141575
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1928300857543945,
      "learning_rate": 0.00019442393191398483,
      "loss": 2.8939,
      "step": 141576
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.289611339569092,
      "learning_rate": 0.00019442010303490502,
      "loss": 2.6296,
      "step": 141577
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5379040241241455,
      "learning_rate": 0.0001944162741754544,
      "loss": 2.8397,
      "step": 141578
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.708747386932373,
      "learning_rate": 0.00019441244533563375,
      "loss": 2.9571,
      "step": 141579
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4541208744049072,
      "learning_rate": 0.00019440861651544384,
      "loss": 3.0877,
      "step": 141580
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.376417636871338,
      "learning_rate": 0.00019440478771488536,
      "loss": 2.7712,
      "step": 141581
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.7671265602111816,
      "learning_rate": 0.00019440095893395902,
      "loss": 2.7739,
      "step": 141582
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4161202907562256,
      "learning_rate": 0.0001943971301726656,
      "loss": 2.8329,
      "step": 141583
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.343888998031616,
      "learning_rate": 0.00019439330143100576,
      "loss": 2.7412,
      "step": 141584
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5078070163726807,
      "learning_rate": 0.00019438947270898014,
      "loss": 2.9285,
      "step": 141585
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2770180702209473,
      "learning_rate": 0.0001943856440065895,
      "loss": 3.0896,
      "step": 141586
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2001070976257324,
      "learning_rate": 0.00019438181532383455,
      "loss": 3.0333,
      "step": 141587
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4244494438171387,
      "learning_rate": 0.00019437798666071605,
      "loss": 3.0063,
      "step": 141588
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.1725854873657227,
      "learning_rate": 0.0001943741580172347,
      "loss": 3.0515,
      "step": 141589
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2152769565582275,
      "learning_rate": 0.00019437032939339111,
      "loss": 3.1858,
      "step": 141590
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.734000325202942,
      "learning_rate": 0.00019436650078918618,
      "loss": 2.9232,
      "step": 141591
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3351993560791016,
      "learning_rate": 0.00019436267220462045,
      "loss": 2.9494,
      "step": 141592
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.572755813598633,
      "learning_rate": 0.00019435884363969464,
      "loss": 2.9827,
      "step": 141593
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.621854782104492,
      "learning_rate": 0.0001943550150944096,
      "loss": 3.221,
      "step": 141594
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9979097843170166,
      "learning_rate": 0.00019435118656876586,
      "loss": 2.9581,
      "step": 141595
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.390472650527954,
      "learning_rate": 0.00019434735806276438,
      "loss": 2.9557,
      "step": 141596
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0586819648742676,
      "learning_rate": 0.00019434352957640557,
      "loss": 3.0598,
      "step": 141597
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.8893907070159912,
      "learning_rate": 0.00019433970110969043,
      "loss": 2.6997,
      "step": 141598
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2998251914978027,
      "learning_rate": 0.00019433587266261943,
      "loss": 3.0545,
      "step": 141599
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9790195226669312,
      "learning_rate": 0.0001943320442351934,
      "loss": 3.1379,
      "step": 141600
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.7225794792175293,
      "learning_rate": 0.00019432821582741303,
      "loss": 2.9641,
      "step": 141601
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4626097679138184,
      "learning_rate": 0.00019432438743927912,
      "loss": 2.8303,
      "step": 141602
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.063167095184326,
      "learning_rate": 0.00019432055907079223,
      "loss": 3.2407,
      "step": 141603
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.522343397140503,
      "learning_rate": 0.00019431673072195315,
      "loss": 3.0407,
      "step": 141604
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.804749011993408,
      "learning_rate": 0.00019431290239276257,
      "loss": 3.045,
      "step": 141605
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.132096767425537,
      "learning_rate": 0.00019430907408322126,
      "loss": 3.0397,
      "step": 141606
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1204583644866943,
      "learning_rate": 0.00019430524579332987,
      "loss": 3.0286,
      "step": 141607
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3052563667297363,
      "learning_rate": 0.00019430141752308915,
      "loss": 3.0571,
      "step": 141608
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.335655689239502,
      "learning_rate": 0.00019429758927249973,
      "loss": 3.0036,
      "step": 141609
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.232414484024048,
      "learning_rate": 0.00019429376104156243,
      "loss": 3.0222,
      "step": 141610
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1163904666900635,
      "learning_rate": 0.0001942899328302779,
      "loss": 3.1851,
      "step": 141611
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.9523072242736816,
      "learning_rate": 0.00019428610463864682,
      "loss": 2.7397,
      "step": 141612
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9607475996017456,
      "learning_rate": 0.00019428227646667008,
      "loss": 3.0028,
      "step": 141613
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3738691806793213,
      "learning_rate": 0.0001942784483143482,
      "loss": 2.9764,
      "step": 141614
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.8914616107940674,
      "learning_rate": 0.00019427462018168195,
      "loss": 3.1488,
      "step": 141615
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0524258613586426,
      "learning_rate": 0.00019427079206867198,
      "loss": 2.9192,
      "step": 141616
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.473670244216919,
      "learning_rate": 0.00019426696397531912,
      "loss": 3.0179,
      "step": 141617
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.007258415222168,
      "learning_rate": 0.00019426313590162399,
      "loss": 3.0565,
      "step": 141618
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.047464370727539,
      "learning_rate": 0.00019425930784758733,
      "loss": 3.0228,
      "step": 141619
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0832319259643555,
      "learning_rate": 0.00019425547981321003,
      "loss": 2.787,
      "step": 141620
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3239622116088867,
      "learning_rate": 0.00019425165179849248,
      "loss": 2.7677,
      "step": 141621
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.132915496826172,
      "learning_rate": 0.00019424782380343556,
      "loss": 3.1036,
      "step": 141622
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4509987831115723,
      "learning_rate": 0.00019424399582803996,
      "loss": 2.9881,
      "step": 141623
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0423429012298584,
      "learning_rate": 0.00019424016787230637,
      "loss": 2.8185,
      "step": 141624
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.123246192932129,
      "learning_rate": 0.00019423633993623558,
      "loss": 3.2852,
      "step": 141625
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.020514488220215,
      "learning_rate": 0.00019423251201982833,
      "loss": 3.0173,
      "step": 141626
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9047458171844482,
      "learning_rate": 0.00019422868412308515,
      "loss": 3.0027,
      "step": 141627
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4194774627685547,
      "learning_rate": 0.00019422485624600683,
      "loss": 3.1379,
      "step": 141628
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0158040523529053,
      "learning_rate": 0.0001942210283885941,
      "loss": 2.9624,
      "step": 141629
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4688546657562256,
      "learning_rate": 0.00019421720055084774,
      "loss": 3.1497,
      "step": 141630
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0197863578796387,
      "learning_rate": 0.00019421337273276833,
      "loss": 3.1423,
      "step": 141631
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.384704828262329,
      "learning_rate": 0.00019420954493435682,
      "loss": 3.1846,
      "step": 141632
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9448814392089844,
      "learning_rate": 0.00019420571715561365,
      "loss": 3.1373,
      "step": 141633
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5263586044311523,
      "learning_rate": 0.00019420188939653956,
      "loss": 3.0051,
      "step": 141634
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.843216896057129,
      "learning_rate": 0.0001941980616571354,
      "loss": 2.917,
      "step": 141635
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.6880338191986084,
      "learning_rate": 0.00019419423393740178,
      "loss": 2.8742,
      "step": 141636
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.130385398864746,
      "learning_rate": 0.00019419040623733949,
      "loss": 2.7501,
      "step": 141637
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.011716365814209,
      "learning_rate": 0.00019418657855694926,
      "loss": 3.1826,
      "step": 141638
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.8256735801696777,
      "learning_rate": 0.00019418275089623166,
      "loss": 3.1272,
      "step": 141639
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3981802463531494,
      "learning_rate": 0.00019417892325518748,
      "loss": 3.0728,
      "step": 141640
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.8624393939971924,
      "learning_rate": 0.00019417509563381745,
      "loss": 3.0504,
      "step": 141641
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2340407371520996,
      "learning_rate": 0.00019417126803212222,
      "loss": 2.8495,
      "step": 141642
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9359290599822998,
      "learning_rate": 0.0001941674404501026,
      "loss": 2.9549,
      "step": 141643
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5656816959381104,
      "learning_rate": 0.00019416361288775933,
      "loss": 2.9114,
      "step": 141644
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3499600887298584,
      "learning_rate": 0.00019415978534509298,
      "loss": 2.8683,
      "step": 141645
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9580481052398682,
      "learning_rate": 0.0001941559578221043,
      "loss": 2.6963,
      "step": 141646
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.16815185546875,
      "learning_rate": 0.00019415213031879398,
      "loss": 3.2424,
      "step": 141647
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2653696537017822,
      "learning_rate": 0.00019414830283516282,
      "loss": 3.0006,
      "step": 141648
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5328025817871094,
      "learning_rate": 0.00019414447537121146,
      "loss": 2.772,
      "step": 141649
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.13541579246521,
      "learning_rate": 0.00019414064792694081,
      "loss": 2.991,
      "step": 141650
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4826180934906006,
      "learning_rate": 0.00019413682050235126,
      "loss": 2.8131,
      "step": 141651
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.103963613510132,
      "learning_rate": 0.00019413299309744368,
      "loss": 2.9249,
      "step": 141652
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.094888687133789,
      "learning_rate": 0.00019412916571221877,
      "loss": 2.6597,
      "step": 141653
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.8645825386047363,
      "learning_rate": 0.00019412533834667722,
      "loss": 3.332,
      "step": 141654
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2641143798828125,
      "learning_rate": 0.0001941215110008198,
      "loss": 2.8971,
      "step": 141655
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.182446241378784,
      "learning_rate": 0.00019411768367464733,
      "loss": 2.855,
      "step": 141656
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9346730709075928,
      "learning_rate": 0.0001941138563681602,
      "loss": 2.8969,
      "step": 141657
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.3400051593780518,
      "learning_rate": 0.0001941100290813594,
      "loss": 2.9338,
      "step": 141658
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.036799907684326,
      "learning_rate": 0.00019410620181424545,
      "loss": 2.9398,
      "step": 141659
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.766821026802063,
      "learning_rate": 0.00019410237456681916,
      "loss": 3.1093,
      "step": 141660
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9994264841079712,
      "learning_rate": 0.0001940985473390813,
      "loss": 3.1747,
      "step": 141661
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.516108274459839,
      "learning_rate": 0.00019409472013103249,
      "loss": 3.0198,
      "step": 141662
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.708980083465576,
      "learning_rate": 0.00019409089294267356,
      "loss": 2.9358,
      "step": 141663
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5680785179138184,
      "learning_rate": 0.00019408706577400504,
      "loss": 2.9253,
      "step": 141664
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.4106457233428955,
      "learning_rate": 0.00019408323862502776,
      "loss": 3.0538,
      "step": 141665
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.382899284362793,
      "learning_rate": 0.00019407941149574234,
      "loss": 2.9809,
      "step": 141666
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1547653675079346,
      "learning_rate": 0.0001940755843861496,
      "loss": 2.891,
      "step": 141667
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.4789772033691406,
      "learning_rate": 0.0001940717572962502,
      "loss": 2.9741,
      "step": 141668
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.177811622619629,
      "learning_rate": 0.00019406793022604495,
      "loss": 3.0597,
      "step": 141669
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9901013374328613,
      "learning_rate": 0.0001940641031755344,
      "loss": 2.9816,
      "step": 141670
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.242971658706665,
      "learning_rate": 0.00019406027614471936,
      "loss": 2.96,
      "step": 141671
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.512291669845581,
      "learning_rate": 0.00019405644913360044,
      "loss": 2.9363,
      "step": 141672
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.55509352684021,
      "learning_rate": 0.00019405262214217844,
      "loss": 3.0155,
      "step": 141673
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.161742687225342,
      "learning_rate": 0.0001940487951704541,
      "loss": 2.9459,
      "step": 141674
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.210883617401123,
      "learning_rate": 0.0001940449682184281,
      "loss": 2.9497,
      "step": 141675
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0538926124572754,
      "learning_rate": 0.0001940411412861012,
      "loss": 3.1158,
      "step": 141676
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.6676621437072754,
      "learning_rate": 0.00019403731437347396,
      "loss": 3.0791,
      "step": 141677
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.25978422164917,
      "learning_rate": 0.00019403348748054714,
      "loss": 3.2603,
      "step": 141678
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2780771255493164,
      "learning_rate": 0.00019402966060732158,
      "loss": 2.8777,
      "step": 141679
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.2183125019073486,
      "learning_rate": 0.00019402583375379784,
      "loss": 2.936,
      "step": 141680
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.9057834148406982,
      "learning_rate": 0.00019402200691997676,
      "loss": 2.8083,
      "step": 141681
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9875096082687378,
      "learning_rate": 0.00019401818010585895,
      "loss": 2.9297,
      "step": 141682
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.6138713359832764,
      "learning_rate": 0.00019401435331144523,
      "loss": 2.9399,
      "step": 141683
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.279269218444824,
      "learning_rate": 0.0001940105265367362,
      "loss": 3.016,
      "step": 141684
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.560826063156128,
      "learning_rate": 0.00019400669978173255,
      "loss": 3.0711,
      "step": 141685
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.8904972076416016,
      "learning_rate": 0.0001940028730464351,
      "loss": 2.999,
      "step": 141686
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9304370880126953,
      "learning_rate": 0.00019399904633084455,
      "loss": 2.9521,
      "step": 141687
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.712751865386963,
      "learning_rate": 0.00019399521963496156,
      "loss": 2.9926,
      "step": 141688
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9952369928359985,
      "learning_rate": 0.00019399139295878681,
      "loss": 3.075,
      "step": 141689
    },
    {
      "epoch": 1.84,
      "grad_norm": 1.9445539712905884,
      "learning_rate": 0.00019398756630232116,
      "loss": 2.8894,
      "step": 141690
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.396789312362671,
      "learning_rate": 0.00019398373966556515,
      "loss": 2.7833,
      "step": 141691
    },
    {
      "epoch": 1.84,
      "grad_norm": 3.1252567768096924,
      "learning_rate": 0.00019397991304851954,
      "loss": 3.0209,
      "step": 141692
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.5234408378601074,
      "learning_rate": 0.0001939760864511852,
      "loss": 3.0095,
      "step": 141693
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.303029775619507,
      "learning_rate": 0.00019397225987356255,
      "loss": 2.9534,
      "step": 141694
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.1778781414031982,
      "learning_rate": 0.00019396843331565252,
      "loss": 2.9197,
      "step": 141695
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.140010356903076,
      "learning_rate": 0.0001939646067774557,
      "loss": 2.8397,
      "step": 141696
    },
    {
      "epoch": 1.84,
      "grad_norm": 2.0935566425323486,
      "learning_rate": 0.00019396078025897293,
      "loss": 3.023,
      "step": 141697
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.027620792388916,
      "learning_rate": 0.00019395695376020489,
      "loss": 3.1395,
      "step": 141698
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9912184476852417,
      "learning_rate": 0.00019395312728115224,
      "loss": 3.0733,
      "step": 141699
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1457784175872803,
      "learning_rate": 0.00019394930082181565,
      "loss": 3.0027,
      "step": 141700
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.7743031978607178,
      "learning_rate": 0.0001939454743821959,
      "loss": 3.2706,
      "step": 141701
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1457479000091553,
      "learning_rate": 0.0001939416479622937,
      "loss": 3.0095,
      "step": 141702
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3755712509155273,
      "learning_rate": 0.00019393782156210972,
      "loss": 3.0695,
      "step": 141703
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9724400043487549,
      "learning_rate": 0.00019393399518164472,
      "loss": 2.9545,
      "step": 141704
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1536190509796143,
      "learning_rate": 0.0001939301688208995,
      "loss": 3.1112,
      "step": 141705
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5954973697662354,
      "learning_rate": 0.00019392634247987456,
      "loss": 3.0296,
      "step": 141706
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.297883987426758,
      "learning_rate": 0.0001939225161585707,
      "loss": 2.7834,
      "step": 141707
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9683855772018433,
      "learning_rate": 0.00019391868985698865,
      "loss": 2.7255,
      "step": 141708
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.154980182647705,
      "learning_rate": 0.0001939148635751291,
      "loss": 2.9313,
      "step": 141709
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.468939781188965,
      "learning_rate": 0.00019391103731299282,
      "loss": 2.9784,
      "step": 141710
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.238415002822876,
      "learning_rate": 0.00019390721107058058,
      "loss": 2.7903,
      "step": 141711
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.143524646759033,
      "learning_rate": 0.00019390338484789286,
      "loss": 2.8757,
      "step": 141712
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.090071678161621,
      "learning_rate": 0.0001938995586449305,
      "loss": 3.1874,
      "step": 141713
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.941400408744812,
      "learning_rate": 0.00019389573246169424,
      "loss": 3.03,
      "step": 141714
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2615435123443604,
      "learning_rate": 0.00019389190629818477,
      "loss": 3.0427,
      "step": 141715
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.011399507522583,
      "learning_rate": 0.00019388808015440278,
      "loss": 3.0576,
      "step": 141716
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3150312900543213,
      "learning_rate": 0.00019388425403034913,
      "loss": 2.7831,
      "step": 141717
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.384063482284546,
      "learning_rate": 0.00019388042792602424,
      "loss": 3.0798,
      "step": 141718
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0847251415252686,
      "learning_rate": 0.00019387660184142901,
      "loss": 3.2175,
      "step": 141719
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4303712844848633,
      "learning_rate": 0.00019387277577656415,
      "loss": 3.0972,
      "step": 141720
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.8806419372558594,
      "learning_rate": 0.0001938689497314303,
      "loss": 2.6399,
      "step": 141721
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1676273345947266,
      "learning_rate": 0.00019386512370602826,
      "loss": 3.008,
      "step": 141722
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.451512098312378,
      "learning_rate": 0.00019386129770035878,
      "loss": 3.1566,
      "step": 141723
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3470218181610107,
      "learning_rate": 0.00019385747171442238,
      "loss": 3.0855,
      "step": 141724
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2286555767059326,
      "learning_rate": 0.00019385364574821986,
      "loss": 2.8408,
      "step": 141725
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.175813913345337,
      "learning_rate": 0.000193849819801752,
      "loss": 3.0128,
      "step": 141726
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.0179412364959717,
      "learning_rate": 0.00019384599387501945,
      "loss": 2.9499,
      "step": 141727
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3506627082824707,
      "learning_rate": 0.00019384216796802288,
      "loss": 2.9994,
      "step": 141728
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4106667041778564,
      "learning_rate": 0.00019383834208076313,
      "loss": 2.9441,
      "step": 141729
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2283642292022705,
      "learning_rate": 0.0001938345162132409,
      "loss": 3.034,
      "step": 141730
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1647608280181885,
      "learning_rate": 0.00019383069036545676,
      "loss": 3.3101,
      "step": 141731
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.104314088821411,
      "learning_rate": 0.00019382686453741147,
      "loss": 3.1058,
      "step": 141732
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.7791106700897217,
      "learning_rate": 0.00019382303872910576,
      "loss": 2.9171,
      "step": 141733
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1634089946746826,
      "learning_rate": 0.00019381921294054036,
      "loss": 2.8647,
      "step": 141734
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.450956344604492,
      "learning_rate": 0.00019381538717171598,
      "loss": 3.0562,
      "step": 141735
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9234681129455566,
      "learning_rate": 0.00019381156142263343,
      "loss": 3.221,
      "step": 141736
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2092862129211426,
      "learning_rate": 0.00019380773569329323,
      "loss": 2.9462,
      "step": 141737
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0168650150299072,
      "learning_rate": 0.00019380390998369613,
      "loss": 3.0709,
      "step": 141738
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0694737434387207,
      "learning_rate": 0.00019380008429384292,
      "loss": 3.0399,
      "step": 141739
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5255725383758545,
      "learning_rate": 0.00019379625862373428,
      "loss": 2.776,
      "step": 141740
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4142727851867676,
      "learning_rate": 0.00019379243297337092,
      "loss": 2.95,
      "step": 141741
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.769923686981201,
      "learning_rate": 0.00019378860734275364,
      "loss": 2.9331,
      "step": 141742
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6144464015960693,
      "learning_rate": 0.000193784781731883,
      "loss": 3.0247,
      "step": 141743
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0394487380981445,
      "learning_rate": 0.00019378095614075972,
      "loss": 3.0072,
      "step": 141744
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.209195613861084,
      "learning_rate": 0.00019377713056938458,
      "loss": 2.7618,
      "step": 141745
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.268648862838745,
      "learning_rate": 0.0001937733050177583,
      "loss": 3.0441,
      "step": 141746
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0360798835754395,
      "learning_rate": 0.00019376947948588155,
      "loss": 2.7247,
      "step": 141747
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.949330449104309,
      "learning_rate": 0.00019376565397375515,
      "loss": 2.9132,
      "step": 141748
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.119384527206421,
      "learning_rate": 0.00019376182848137964,
      "loss": 2.7493,
      "step": 141749
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.1065776348114014,
      "learning_rate": 0.0001937580030087558,
      "loss": 3.0393,
      "step": 141750
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1511402130126953,
      "learning_rate": 0.00019375417755588436,
      "loss": 2.9956,
      "step": 141751
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9316402673721313,
      "learning_rate": 0.000193750352122766,
      "loss": 3.0409,
      "step": 141752
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.192110538482666,
      "learning_rate": 0.0001937465267094015,
      "loss": 2.9003,
      "step": 141753
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.38657283782959,
      "learning_rate": 0.0001937427013157916,
      "loss": 2.9458,
      "step": 141754
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2753396034240723,
      "learning_rate": 0.00019373887594193684,
      "loss": 3.0839,
      "step": 141755
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1036858558654785,
      "learning_rate": 0.000193735050587838,
      "loss": 2.7536,
      "step": 141756
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1534740924835205,
      "learning_rate": 0.00019373122525349586,
      "loss": 3.0068,
      "step": 141757
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.9077582359313965,
      "learning_rate": 0.00019372739993891107,
      "loss": 2.8468,
      "step": 141758
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0960426330566406,
      "learning_rate": 0.00019372357464408438,
      "loss": 3.0409,
      "step": 141759
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.399923324584961,
      "learning_rate": 0.0001937197493690166,
      "loss": 2.9006,
      "step": 141760
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.033167600631714,
      "learning_rate": 0.00019371592411370817,
      "loss": 2.987,
      "step": 141761
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4223506450653076,
      "learning_rate": 0.00019371209887816,
      "loss": 3.0297,
      "step": 141762
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.8960026502609253,
      "learning_rate": 0.00019370827366237272,
      "loss": 2.8802,
      "step": 141763
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4194514751434326,
      "learning_rate": 0.00019370444846634712,
      "loss": 2.778,
      "step": 141764
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4553472995758057,
      "learning_rate": 0.00019370062329008384,
      "loss": 3.0765,
      "step": 141765
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9722875356674194,
      "learning_rate": 0.0001936967981335837,
      "loss": 2.8938,
      "step": 141766
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2537829875946045,
      "learning_rate": 0.00019369297299684727,
      "loss": 2.7421,
      "step": 141767
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1289803981781006,
      "learning_rate": 0.00019368914787987537,
      "loss": 3.1726,
      "step": 141768
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.037307024002075,
      "learning_rate": 0.0001936853227826686,
      "loss": 3.0336,
      "step": 141769
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.006002187728882,
      "learning_rate": 0.00019368149770522774,
      "loss": 2.9529,
      "step": 141770
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.501674175262451,
      "learning_rate": 0.00019367767264755347,
      "loss": 2.894,
      "step": 141771
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.364419460296631,
      "learning_rate": 0.00019367384760964667,
      "loss": 2.7163,
      "step": 141772
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6144778728485107,
      "learning_rate": 0.00019367002259150777,
      "loss": 2.9738,
      "step": 141773
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1711301803588867,
      "learning_rate": 0.00019366619759313765,
      "loss": 2.8633,
      "step": 141774
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4866881370544434,
      "learning_rate": 0.00019366237261453703,
      "loss": 2.809,
      "step": 141775
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.632143259048462,
      "learning_rate": 0.00019365854765570655,
      "loss": 3.1091,
      "step": 141776
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.502077579498291,
      "learning_rate": 0.00019365472271664692,
      "loss": 2.9491,
      "step": 141777
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.3658344745635986,
      "learning_rate": 0.00019365089779735896,
      "loss": 2.6772,
      "step": 141778
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.680021047592163,
      "learning_rate": 0.00019364707289784328,
      "loss": 2.974,
      "step": 141779
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.353416681289673,
      "learning_rate": 0.0001936432480181006,
      "loss": 3.0876,
      "step": 141780
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.789904832839966,
      "learning_rate": 0.00019363942315813164,
      "loss": 2.9984,
      "step": 141781
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.5557994842529297,
      "learning_rate": 0.00019363559831793713,
      "loss": 2.9116,
      "step": 141782
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2874703407287598,
      "learning_rate": 0.0001936317734975178,
      "loss": 3.3542,
      "step": 141783
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.500304937362671,
      "learning_rate": 0.00019362794869687435,
      "loss": 2.9508,
      "step": 141784
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.585160970687866,
      "learning_rate": 0.00019362412391600738,
      "loss": 2.8237,
      "step": 141785
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.7259747982025146,
      "learning_rate": 0.00019362029915491775,
      "loss": 2.9599,
      "step": 141786
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.031404972076416,
      "learning_rate": 0.00019361647441360608,
      "loss": 3.0203,
      "step": 141787
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5353927612304688,
      "learning_rate": 0.00019361264969207313,
      "loss": 3.0308,
      "step": 141788
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.1902530193328857,
      "learning_rate": 0.0001936088249903196,
      "loss": 2.6995,
      "step": 141789
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.8958539962768555,
      "learning_rate": 0.0001936050003083463,
      "loss": 2.7717,
      "step": 141790
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.843334197998047,
      "learning_rate": 0.00019360117564615371,
      "loss": 3.1439,
      "step": 141791
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.726853847503662,
      "learning_rate": 0.0001935973510037427,
      "loss": 2.7357,
      "step": 141792
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.064594030380249,
      "learning_rate": 0.0001935935263811139,
      "loss": 3.0205,
      "step": 141793
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0201449394226074,
      "learning_rate": 0.00019358970177826813,
      "loss": 2.7514,
      "step": 141794
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.576514720916748,
      "learning_rate": 0.000193585877195206,
      "loss": 2.9342,
      "step": 141795
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.750648021697998,
      "learning_rate": 0.0001935820526319283,
      "loss": 3.0567,
      "step": 141796
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.9337105751037598,
      "learning_rate": 0.00019357822808843583,
      "loss": 3.1207,
      "step": 141797
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.616151809692383,
      "learning_rate": 0.00019357440356472904,
      "loss": 2.9961,
      "step": 141798
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0878663063049316,
      "learning_rate": 0.00019357057906080875,
      "loss": 2.8919,
      "step": 141799
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.726975917816162,
      "learning_rate": 0.00019356675457667574,
      "loss": 3.0219,
      "step": 141800
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.817194700241089,
      "learning_rate": 0.00019356293011233065,
      "loss": 3.0119,
      "step": 141801
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.003736972808838,
      "learning_rate": 0.00019355910566777425,
      "loss": 3.0168,
      "step": 141802
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1404271125793457,
      "learning_rate": 0.0001935552812430073,
      "loss": 2.9422,
      "step": 141803
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4347639083862305,
      "learning_rate": 0.00019355145683803033,
      "loss": 2.8262,
      "step": 141804
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.649624824523926,
      "learning_rate": 0.0001935476324528442,
      "loss": 2.724,
      "step": 141805
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6593868732452393,
      "learning_rate": 0.00019354380808744948,
      "loss": 2.9904,
      "step": 141806
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.225050449371338,
      "learning_rate": 0.00019353998374184705,
      "loss": 2.9311,
      "step": 141807
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.26938796043396,
      "learning_rate": 0.00019353615941603754,
      "loss": 2.9325,
      "step": 141808
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1400766372680664,
      "learning_rate": 0.0001935323351100218,
      "loss": 2.8969,
      "step": 141809
    },
    {
      "epoch": 1.85,
      "grad_norm": 5.05060338973999,
      "learning_rate": 0.00019352851082380022,
      "loss": 2.9332,
      "step": 141810
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1200685501098633,
      "learning_rate": 0.00019352468655737376,
      "loss": 2.6336,
      "step": 141811
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.173523187637329,
      "learning_rate": 0.00019352086231074305,
      "loss": 2.8482,
      "step": 141812
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.8123860359191895,
      "learning_rate": 0.0001935170380839088,
      "loss": 3.0096,
      "step": 141813
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.5340657234191895,
      "learning_rate": 0.0001935132138768718,
      "loss": 2.8766,
      "step": 141814
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.08500599861145,
      "learning_rate": 0.0001935093896896328,
      "loss": 3.0308,
      "step": 141815
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.565446615219116,
      "learning_rate": 0.0001935055655221923,
      "loss": 3.0651,
      "step": 141816
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.4699063301086426,
      "learning_rate": 0.00019350174137455108,
      "loss": 3.0954,
      "step": 141817
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.4892985820770264,
      "learning_rate": 0.00019349791724670995,
      "loss": 2.7515,
      "step": 141818
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3158211708068848,
      "learning_rate": 0.00019349409313866952,
      "loss": 2.8895,
      "step": 141819
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5446958541870117,
      "learning_rate": 0.00019349026905043058,
      "loss": 3.0515,
      "step": 141820
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.504936695098877,
      "learning_rate": 0.00019348644498199394,
      "loss": 2.9382,
      "step": 141821
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4959654808044434,
      "learning_rate": 0.00019348262093336004,
      "loss": 2.8312,
      "step": 141822
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.7045392990112305,
      "learning_rate": 0.00019347879690452974,
      "loss": 3.1058,
      "step": 141823
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.1239824295043945,
      "learning_rate": 0.0001934749728955037,
      "loss": 3.0695,
      "step": 141824
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.8108855485916138,
      "learning_rate": 0.00019347114890628275,
      "loss": 3.0164,
      "step": 141825
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.267338991165161,
      "learning_rate": 0.0001934673249368675,
      "loss": 3.0214,
      "step": 141826
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.5297560691833496,
      "learning_rate": 0.00019346350098725876,
      "loss": 3.2602,
      "step": 141827
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.8115906715393066,
      "learning_rate": 0.00019345967705745707,
      "loss": 3.0482,
      "step": 141828
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5554261207580566,
      "learning_rate": 0.00019345585314746328,
      "loss": 2.9744,
      "step": 141829
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.28572678565979,
      "learning_rate": 0.00019345202925727801,
      "loss": 3.1646,
      "step": 141830
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.620753288269043,
      "learning_rate": 0.00019344820538690202,
      "loss": 2.9808,
      "step": 141831
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.0136544704437256,
      "learning_rate": 0.00019344438153633603,
      "loss": 3.0063,
      "step": 141832
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3081600666046143,
      "learning_rate": 0.00019344055770558087,
      "loss": 2.9141,
      "step": 141833
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.8868823051452637,
      "learning_rate": 0.000193436733894637,
      "loss": 3.0699,
      "step": 141834
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.793193817138672,
      "learning_rate": 0.00019343291010350523,
      "loss": 2.8456,
      "step": 141835
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2155067920684814,
      "learning_rate": 0.00019342908633218633,
      "loss": 3.033,
      "step": 141836
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5704824924468994,
      "learning_rate": 0.00019342526258068097,
      "loss": 2.8688,
      "step": 141837
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.778810977935791,
      "learning_rate": 0.00019342143884898983,
      "loss": 2.9216,
      "step": 141838
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.233530044555664,
      "learning_rate": 0.00019341761513711378,
      "loss": 3.0232,
      "step": 141839
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.9456839561462402,
      "learning_rate": 0.00019341379144505332,
      "loss": 2.7179,
      "step": 141840
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.164064407348633,
      "learning_rate": 0.00019340996777280923,
      "loss": 2.9533,
      "step": 141841
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.697458267211914,
      "learning_rate": 0.00019340614412038227,
      "loss": 3.0674,
      "step": 141842
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5363929271698,
      "learning_rate": 0.00019340232048777305,
      "loss": 3.0399,
      "step": 141843
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.908934235572815,
      "learning_rate": 0.0001933984968749824,
      "loss": 2.9257,
      "step": 141844
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.965059518814087,
      "learning_rate": 0.00019339467328201112,
      "loss": 2.7962,
      "step": 141845
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.949489593505859,
      "learning_rate": 0.0001933908497088596,
      "loss": 2.9433,
      "step": 141846
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.797541379928589,
      "learning_rate": 0.0001933870261555288,
      "loss": 2.7778,
      "step": 141847
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.869018316268921,
      "learning_rate": 0.00019338320262201933,
      "loss": 2.9296,
      "step": 141848
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.327892541885376,
      "learning_rate": 0.00019337937910833192,
      "loss": 3.0011,
      "step": 141849
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.477426052093506,
      "learning_rate": 0.00019337555561446733,
      "loss": 3.1007,
      "step": 141850
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9867658615112305,
      "learning_rate": 0.0001933717321404263,
      "loss": 3.0351,
      "step": 141851
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.307133674621582,
      "learning_rate": 0.00019336790868620946,
      "loss": 2.8941,
      "step": 141852
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0635650157928467,
      "learning_rate": 0.00019336408525181745,
      "loss": 2.9988,
      "step": 141853
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.906313419342041,
      "learning_rate": 0.00019336026183725108,
      "loss": 2.8188,
      "step": 141854
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0998342037200928,
      "learning_rate": 0.00019335643844251108,
      "loss": 3.0569,
      "step": 141855
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1847472190856934,
      "learning_rate": 0.00019335261506759813,
      "loss": 3.2558,
      "step": 141856
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.037753105163574,
      "learning_rate": 0.00019334879171251302,
      "loss": 2.9502,
      "step": 141857
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.8353161811828613,
      "learning_rate": 0.00019334496837725628,
      "loss": 2.9093,
      "step": 141858
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.348459720611572,
      "learning_rate": 0.00019334114506182874,
      "loss": 3.0093,
      "step": 141859
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0428836345672607,
      "learning_rate": 0.00019333732176623114,
      "loss": 3.055,
      "step": 141860
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1765336990356445,
      "learning_rate": 0.0001933334984904641,
      "loss": 2.9097,
      "step": 141861
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6869685649871826,
      "learning_rate": 0.00019332967523452837,
      "loss": 3.2388,
      "step": 141862
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.2939350605010986,
      "learning_rate": 0.00019332585199842466,
      "loss": 3.1254,
      "step": 141863
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5371885299682617,
      "learning_rate": 0.00019332202878215375,
      "loss": 2.9405,
      "step": 141864
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9322534799575806,
      "learning_rate": 0.00019331820558571625,
      "loss": 3.3236,
      "step": 141865
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2763965129852295,
      "learning_rate": 0.0001933143824091129,
      "loss": 2.874,
      "step": 141866
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.232455730438232,
      "learning_rate": 0.00019331055925234447,
      "loss": 2.6065,
      "step": 141867
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.84194278717041,
      "learning_rate": 0.00019330673611541157,
      "loss": 3.0979,
      "step": 141868
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.0410149097442627,
      "learning_rate": 0.00019330291299831498,
      "loss": 2.755,
      "step": 141869
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.1695613861083984,
      "learning_rate": 0.0001932990899010554,
      "loss": 2.9757,
      "step": 141870
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4038240909576416,
      "learning_rate": 0.00019329526682363354,
      "loss": 2.8756,
      "step": 141871
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.465404987335205,
      "learning_rate": 0.00019329144376605006,
      "loss": 3.0738,
      "step": 141872
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1771185398101807,
      "learning_rate": 0.00019328762072830572,
      "loss": 2.9003,
      "step": 141873
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.5921945571899414,
      "learning_rate": 0.0001932837977104013,
      "loss": 2.9349,
      "step": 141874
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.3380167484283447,
      "learning_rate": 0.00019327997471233735,
      "loss": 2.7053,
      "step": 141875
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6905202865600586,
      "learning_rate": 0.00019327615173411473,
      "loss": 2.907,
      "step": 141876
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.627413511276245,
      "learning_rate": 0.00019327232877573407,
      "loss": 2.9617,
      "step": 141877
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9578408002853394,
      "learning_rate": 0.00019326850583719607,
      "loss": 3.0359,
      "step": 141878
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9746811389923096,
      "learning_rate": 0.00019326468291850146,
      "loss": 2.9695,
      "step": 141879
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.7898674011230469,
      "learning_rate": 0.00019326086001965098,
      "loss": 2.9531,
      "step": 141880
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1091721057891846,
      "learning_rate": 0.00019325703714064536,
      "loss": 2.8668,
      "step": 141881
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.209683656692505,
      "learning_rate": 0.0001932532142814853,
      "loss": 2.8024,
      "step": 141882
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.8938790559768677,
      "learning_rate": 0.0001932493914421714,
      "loss": 2.7015,
      "step": 141883
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9210036993026733,
      "learning_rate": 0.00019324556862270443,
      "loss": 3.3805,
      "step": 141884
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.001438617706299,
      "learning_rate": 0.00019324174582308518,
      "loss": 2.9784,
      "step": 141885
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1346828937530518,
      "learning_rate": 0.00019323792304331425,
      "loss": 3.0986,
      "step": 141886
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3005449771881104,
      "learning_rate": 0.00019323410028339243,
      "loss": 3.0197,
      "step": 141887
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.404466152191162,
      "learning_rate": 0.00019323027754332054,
      "loss": 3.1337,
      "step": 141888
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.329591751098633,
      "learning_rate": 0.00019322645482309903,
      "loss": 2.8273,
      "step": 141889
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1213319301605225,
      "learning_rate": 0.00019322263212272873,
      "loss": 3.0022,
      "step": 141890
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2170774936676025,
      "learning_rate": 0.00019321880944221035,
      "loss": 3.1271,
      "step": 141891
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6501102447509766,
      "learning_rate": 0.00019321498678154464,
      "loss": 3.0455,
      "step": 141892
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.308579683303833,
      "learning_rate": 0.00019321116414073225,
      "loss": 3.1833,
      "step": 141893
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.342772960662842,
      "learning_rate": 0.00019320734151977406,
      "loss": 3.0584,
      "step": 141894
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3073620796203613,
      "learning_rate": 0.0001932035189186705,
      "loss": 2.8596,
      "step": 141895
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.965824604034424,
      "learning_rate": 0.00019319969633742245,
      "loss": 3.3081,
      "step": 141896
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.237614393234253,
      "learning_rate": 0.0001931958737760306,
      "loss": 3.1328,
      "step": 141897
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.1309268474578857,
      "learning_rate": 0.0001931920512344956,
      "loss": 2.7537,
      "step": 141898
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.187988519668579,
      "learning_rate": 0.00019318822871281822,
      "loss": 2.9852,
      "step": 141899
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.7553963661193848,
      "learning_rate": 0.00019318440621099933,
      "loss": 2.7714,
      "step": 141900
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5605087280273438,
      "learning_rate": 0.00019318058372903932,
      "loss": 2.9651,
      "step": 141901
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5149199962615967,
      "learning_rate": 0.0001931767612669391,
      "loss": 2.8861,
      "step": 141902
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3890788555145264,
      "learning_rate": 0.0001931729388246993,
      "loss": 2.9044,
      "step": 141903
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.660043716430664,
      "learning_rate": 0.00019316911640232068,
      "loss": 3.07,
      "step": 141904
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.398466110229492,
      "learning_rate": 0.00019316529399980388,
      "loss": 3.0516,
      "step": 141905
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.012913942337036,
      "learning_rate": 0.00019316147161714986,
      "loss": 3.1343,
      "step": 141906
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5065746307373047,
      "learning_rate": 0.00019315764925435902,
      "loss": 2.9109,
      "step": 141907
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.774641752243042,
      "learning_rate": 0.00019315382691143215,
      "loss": 3.1837,
      "step": 141908
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5654871463775635,
      "learning_rate": 0.00019315000458837,
      "loss": 3.0382,
      "step": 141909
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5697219371795654,
      "learning_rate": 0.00019314618228517328,
      "loss": 3.088,
      "step": 141910
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4211606979370117,
      "learning_rate": 0.00019314236000184275,
      "loss": 3.0478,
      "step": 141911
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2616958618164062,
      "learning_rate": 0.00019313853773837917,
      "loss": 3.0332,
      "step": 141912
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.23722505569458,
      "learning_rate": 0.00019313471549478302,
      "loss": 3.0265,
      "step": 141913
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9818223714828491,
      "learning_rate": 0.00019313089327105515,
      "loss": 3.0395,
      "step": 141914
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3026785850524902,
      "learning_rate": 0.00019312707106719626,
      "loss": 2.8261,
      "step": 141915
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.206212043762207,
      "learning_rate": 0.00019312324888320705,
      "loss": 2.7368,
      "step": 141916
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3361058235168457,
      "learning_rate": 0.0001931194267190883,
      "loss": 3.0205,
      "step": 141917
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2190845012664795,
      "learning_rate": 0.0001931156045748407,
      "loss": 2.8138,
      "step": 141918
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2731876373291016,
      "learning_rate": 0.0001931117824504649,
      "loss": 2.9869,
      "step": 141919
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.262495756149292,
      "learning_rate": 0.00019310796034596156,
      "loss": 3.1347,
      "step": 141920
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.0825486183166504,
      "learning_rate": 0.00019310413826133151,
      "loss": 2.8481,
      "step": 141921
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3218865394592285,
      "learning_rate": 0.0001931003161965754,
      "loss": 3.0245,
      "step": 141922
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2313525676727295,
      "learning_rate": 0.00019309649415169398,
      "loss": 2.8202,
      "step": 141923
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0184152126312256,
      "learning_rate": 0.0001930926721266881,
      "loss": 3.0171,
      "step": 141924
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.588071584701538,
      "learning_rate": 0.00019308885012155812,
      "loss": 3.0112,
      "step": 141925
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.183661699295044,
      "learning_rate": 0.00019308502813630496,
      "loss": 2.7682,
      "step": 141926
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9846843481063843,
      "learning_rate": 0.0001930812061709293,
      "loss": 2.6429,
      "step": 141927
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2132930755615234,
      "learning_rate": 0.00019307738422543187,
      "loss": 3.1131,
      "step": 141928
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4434454441070557,
      "learning_rate": 0.00019307356229981336,
      "loss": 2.9445,
      "step": 141929
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0552539825439453,
      "learning_rate": 0.0001930697403940745,
      "loss": 2.9271,
      "step": 141930
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9033266305923462,
      "learning_rate": 0.00019306591850821615,
      "loss": 2.9755,
      "step": 141931
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.8392254114151,
      "learning_rate": 0.00019306209664223873,
      "loss": 2.9428,
      "step": 141932
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.193148374557495,
      "learning_rate": 0.00019305827479614306,
      "loss": 3.0089,
      "step": 141933
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.282311201095581,
      "learning_rate": 0.0001930544529699299,
      "loss": 2.8276,
      "step": 141934
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.281160593032837,
      "learning_rate": 0.00019305063116359988,
      "loss": 3.3259,
      "step": 141935
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.104957342147827,
      "learning_rate": 0.00019304680937715383,
      "loss": 3.0428,
      "step": 141936
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.7664403915405273,
      "learning_rate": 0.0001930429876105925,
      "loss": 2.8789,
      "step": 141937
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5557103157043457,
      "learning_rate": 0.00019303916586391637,
      "loss": 3.119,
      "step": 141938
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.6916279792785645,
      "learning_rate": 0.00019303534413712628,
      "loss": 2.6855,
      "step": 141939
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.620626926422119,
      "learning_rate": 0.00019303152243022297,
      "loss": 2.7716,
      "step": 141940
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2953872680664062,
      "learning_rate": 0.00019302770074320708,
      "loss": 2.8843,
      "step": 141941
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1469805240631104,
      "learning_rate": 0.00019302387907607936,
      "loss": 2.8538,
      "step": 141942
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.920232892036438,
      "learning_rate": 0.0001930200574288406,
      "loss": 2.8053,
      "step": 141943
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2299487590789795,
      "learning_rate": 0.0001930162358014914,
      "loss": 3.2954,
      "step": 141944
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.942256212234497,
      "learning_rate": 0.00019301241419403245,
      "loss": 2.8723,
      "step": 141945
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.053546905517578,
      "learning_rate": 0.00019300859260646457,
      "loss": 3.1856,
      "step": 141946
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0998926162719727,
      "learning_rate": 0.00019300477103878836,
      "loss": 3.1284,
      "step": 141947
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.9680891036987305,
      "learning_rate": 0.0001930009494910046,
      "loss": 2.7149,
      "step": 141948
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.374889373779297,
      "learning_rate": 0.000192997127963114,
      "loss": 3.0267,
      "step": 141949
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.394908905029297,
      "learning_rate": 0.00019299330645511725,
      "loss": 3.1184,
      "step": 141950
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3785412311553955,
      "learning_rate": 0.00019298948496701505,
      "loss": 3.1073,
      "step": 141951
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.955313205718994,
      "learning_rate": 0.00019298566349880818,
      "loss": 2.9908,
      "step": 141952
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.15220308303833,
      "learning_rate": 0.00019298184205049717,
      "loss": 2.8653,
      "step": 141953
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.706503391265869,
      "learning_rate": 0.00019297802062208293,
      "loss": 2.857,
      "step": 141954
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.064436197280884,
      "learning_rate": 0.0001929741992135662,
      "loss": 3.0182,
      "step": 141955
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.968836784362793,
      "learning_rate": 0.00019297037782494745,
      "loss": 3.091,
      "step": 141956
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5251498222351074,
      "learning_rate": 0.0001929665564562275,
      "loss": 2.7367,
      "step": 141957
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9465210437774658,
      "learning_rate": 0.00019296273510740713,
      "loss": 2.9133,
      "step": 141958
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5938141345977783,
      "learning_rate": 0.0001929589137784871,
      "loss": 3.0296,
      "step": 141959
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.106393337249756,
      "learning_rate": 0.00019295509246946795,
      "loss": 3.1312,
      "step": 141960
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.059159755706787,
      "learning_rate": 0.0001929512711803505,
      "loss": 3.26,
      "step": 141961
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0607821941375732,
      "learning_rate": 0.0001929474499111354,
      "loss": 2.9902,
      "step": 141962
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.159276008605957,
      "learning_rate": 0.0001929436286618234,
      "loss": 2.7245,
      "step": 141963
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.959960699081421,
      "learning_rate": 0.00019293980743241516,
      "loss": 2.9498,
      "step": 141964
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1877691745758057,
      "learning_rate": 0.00019293598622291145,
      "loss": 2.786,
      "step": 141965
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.66906476020813,
      "learning_rate": 0.000192932165033313,
      "loss": 2.939,
      "step": 141966
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.833167791366577,
      "learning_rate": 0.00019292834386362053,
      "loss": 2.8814,
      "step": 141967
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.2068729400634766,
      "learning_rate": 0.00019292452271383462,
      "loss": 2.7957,
      "step": 141968
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.226256847381592,
      "learning_rate": 0.00019292070158395604,
      "loss": 3.0045,
      "step": 141969
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.9648144245147705,
      "learning_rate": 0.00019291688047398556,
      "loss": 2.7611,
      "step": 141970
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9872926473617554,
      "learning_rate": 0.00019291305938392382,
      "loss": 2.999,
      "step": 141971
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.217205286026001,
      "learning_rate": 0.00019290923831377155,
      "loss": 2.8258,
      "step": 141972
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.199334144592285,
      "learning_rate": 0.00019290541726352966,
      "loss": 3.1392,
      "step": 141973
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.397249221801758,
      "learning_rate": 0.00019290159623319846,
      "loss": 3.0876,
      "step": 141974
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1729953289031982,
      "learning_rate": 0.00019289777522277896,
      "loss": 2.9975,
      "step": 141975
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.204803228378296,
      "learning_rate": 0.00019289395423227173,
      "loss": 2.9957,
      "step": 141976
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5012848377227783,
      "learning_rate": 0.0001928901332616776,
      "loss": 2.8284,
      "step": 141977
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.446042060852051,
      "learning_rate": 0.00019288631231099716,
      "loss": 3.0065,
      "step": 141978
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.498234987258911,
      "learning_rate": 0.0001928824913802313,
      "loss": 2.9497,
      "step": 141979
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.684565782546997,
      "learning_rate": 0.00019287867046938051,
      "loss": 3.1356,
      "step": 141980
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.3739476203918457,
      "learning_rate": 0.00019287484957844562,
      "loss": 2.938,
      "step": 141981
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.084329128265381,
      "learning_rate": 0.00019287102870742725,
      "loss": 2.8667,
      "step": 141982
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.7977728843688965,
      "learning_rate": 0.00019286720785632622,
      "loss": 2.7503,
      "step": 141983
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2067806720733643,
      "learning_rate": 0.0001928633870251432,
      "loss": 2.9431,
      "step": 141984
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.17763614654541,
      "learning_rate": 0.00019285956621387904,
      "loss": 2.8947,
      "step": 141985
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0788462162017822,
      "learning_rate": 0.00019285574542253413,
      "loss": 3.0239,
      "step": 141986
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2414474487304688,
      "learning_rate": 0.0001928519246511094,
      "loss": 2.913,
      "step": 141987
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.942330241203308,
      "learning_rate": 0.0001928481038996055,
      "loss": 2.9111,
      "step": 141988
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.22049880027771,
      "learning_rate": 0.0001928442831680232,
      "loss": 2.934,
      "step": 141989
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.086906909942627,
      "learning_rate": 0.0001928404624563631,
      "loss": 2.8355,
      "step": 141990
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1203131675720215,
      "learning_rate": 0.00019283664176462614,
      "loss": 3.1809,
      "step": 141991
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6712536811828613,
      "learning_rate": 0.00019283282109281276,
      "loss": 2.8075,
      "step": 141992
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1394660472869873,
      "learning_rate": 0.00019282900044092377,
      "loss": 3.0325,
      "step": 141993
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4986321926116943,
      "learning_rate": 0.0001928251798089599,
      "loss": 2.765,
      "step": 141994
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2061245441436768,
      "learning_rate": 0.00019282135919692186,
      "loss": 3.1623,
      "step": 141995
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.466158628463745,
      "learning_rate": 0.00019281753860481032,
      "loss": 2.5754,
      "step": 141996
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0087969303131104,
      "learning_rate": 0.00019281371803262604,
      "loss": 2.9751,
      "step": 141997
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.168025016784668,
      "learning_rate": 0.0001928098974803698,
      "loss": 3.2253,
      "step": 141998
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.115997314453125,
      "learning_rate": 0.0001928060769480422,
      "loss": 2.963,
      "step": 141999
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4988558292388916,
      "learning_rate": 0.00019280225643564385,
      "loss": 3.0206,
      "step": 142000
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1711292266845703,
      "learning_rate": 0.00019279843594317568,
      "loss": 3.1278,
      "step": 142001
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.796573281288147,
      "learning_rate": 0.00019279461547063828,
      "loss": 2.8875,
      "step": 142002
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.124389886856079,
      "learning_rate": 0.00019279079501803237,
      "loss": 3.0342,
      "step": 142003
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3003456592559814,
      "learning_rate": 0.00019278697458535878,
      "loss": 3.2587,
      "step": 142004
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.251081705093384,
      "learning_rate": 0.00019278315417261803,
      "loss": 2.9893,
      "step": 142005
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.901534914970398,
      "learning_rate": 0.0001927793337798109,
      "loss": 2.7836,
      "step": 142006
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9284002780914307,
      "learning_rate": 0.00019277551340693812,
      "loss": 2.9173,
      "step": 142007
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6244568824768066,
      "learning_rate": 0.0001927716930540004,
      "loss": 3.0008,
      "step": 142008
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.095200777053833,
      "learning_rate": 0.00019276787272099846,
      "loss": 3.0996,
      "step": 142009
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.015770196914673,
      "learning_rate": 0.00019276405240793308,
      "loss": 2.9984,
      "step": 142010
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3654823303222656,
      "learning_rate": 0.00019276023211480477,
      "loss": 3.1335,
      "step": 142011
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.7724199295043945,
      "learning_rate": 0.0001927564118416144,
      "loss": 2.6947,
      "step": 142012
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.05977725982666,
      "learning_rate": 0.00019275259158836258,
      "loss": 2.9952,
      "step": 142013
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9747377634048462,
      "learning_rate": 0.00019274877135505008,
      "loss": 2.8791,
      "step": 142014
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9923433065414429,
      "learning_rate": 0.00019274495114167766,
      "loss": 2.9269,
      "step": 142015
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9451082944869995,
      "learning_rate": 0.00019274113094824605,
      "loss": 3.0241,
      "step": 142016
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.7204997539520264,
      "learning_rate": 0.0001927373107747558,
      "loss": 3.1556,
      "step": 142017
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.332988739013672,
      "learning_rate": 0.0001927334906212077,
      "loss": 2.8107,
      "step": 142018
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.7661988735198975,
      "learning_rate": 0.00019272967048760247,
      "loss": 2.9854,
      "step": 142019
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.1678357124328613,
      "learning_rate": 0.00019272585037394084,
      "loss": 2.9982,
      "step": 142020
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6662211418151855,
      "learning_rate": 0.00019272203028022344,
      "loss": 2.9526,
      "step": 142021
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.0587363243103027,
      "learning_rate": 0.0001927182102064512,
      "loss": 3.2382,
      "step": 142022
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.7089076042175293,
      "learning_rate": 0.0001927143901526246,
      "loss": 2.8837,
      "step": 142023
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4569180011749268,
      "learning_rate": 0.00019271057011874433,
      "loss": 3.1047,
      "step": 142024
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0361266136169434,
      "learning_rate": 0.00019270675010481123,
      "loss": 2.9258,
      "step": 142025
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4731433391571045,
      "learning_rate": 0.00019270293011082595,
      "loss": 2.9311,
      "step": 142026
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.237321615219116,
      "learning_rate": 0.00019269911013678923,
      "loss": 3.1242,
      "step": 142027
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1773059368133545,
      "learning_rate": 0.00019269529018270184,
      "loss": 2.9278,
      "step": 142028
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9890093803405762,
      "learning_rate": 0.00019269147024856444,
      "loss": 2.9939,
      "step": 142029
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.616028070449829,
      "learning_rate": 0.0001926876503343776,
      "loss": 2.9039,
      "step": 142030
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.0979928970336914,
      "learning_rate": 0.00019268383044014221,
      "loss": 2.9826,
      "step": 142031
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.271641254425049,
      "learning_rate": 0.0001926800105658589,
      "loss": 2.8796,
      "step": 142032
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.091783285140991,
      "learning_rate": 0.00019267619071152843,
      "loss": 3.0742,
      "step": 142033
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.107949733734131,
      "learning_rate": 0.0001926723708771515,
      "loss": 3.0857,
      "step": 142034
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.0617973804473877,
      "learning_rate": 0.00019266855106272874,
      "loss": 3.0552,
      "step": 142035
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.463002920150757,
      "learning_rate": 0.00019266473126826103,
      "loss": 3.0225,
      "step": 142036
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.975171446800232,
      "learning_rate": 0.00019266091149374885,
      "loss": 2.943,
      "step": 142037
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.293931722640991,
      "learning_rate": 0.00019265709173919311,
      "loss": 2.862,
      "step": 142038
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2041070461273193,
      "learning_rate": 0.00019265327200459434,
      "loss": 2.9196,
      "step": 142039
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2024998664855957,
      "learning_rate": 0.0001926494522899535,
      "loss": 2.9105,
      "step": 142040
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3828868865966797,
      "learning_rate": 0.00019264563259527108,
      "loss": 2.9481,
      "step": 142041
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.081822395324707,
      "learning_rate": 0.00019264181292054784,
      "loss": 3.0058,
      "step": 142042
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.291370153427124,
      "learning_rate": 0.0001926379932657846,
      "loss": 2.9949,
      "step": 142043
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.291825532913208,
      "learning_rate": 0.0001926341736309819,
      "loss": 3.1622,
      "step": 142044
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9970641136169434,
      "learning_rate": 0.00019263035401614052,
      "loss": 2.8484,
      "step": 142045
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9683316946029663,
      "learning_rate": 0.00019262653442126128,
      "loss": 2.687,
      "step": 142046
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.537766456604004,
      "learning_rate": 0.00019262271484634473,
      "loss": 2.9316,
      "step": 142047
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.393145799636841,
      "learning_rate": 0.00019261889529139162,
      "loss": 3.024,
      "step": 142048
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2516746520996094,
      "learning_rate": 0.00019261507575640268,
      "loss": 2.9332,
      "step": 142049
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2032973766326904,
      "learning_rate": 0.00019261125624137865,
      "loss": 2.9921,
      "step": 142050
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.7695271968841553,
      "learning_rate": 0.00019260743674632026,
      "loss": 2.9355,
      "step": 142051
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4541726112365723,
      "learning_rate": 0.00019260361727122818,
      "loss": 2.883,
      "step": 142052
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.312786102294922,
      "learning_rate": 0.00019259979781610306,
      "loss": 3.1759,
      "step": 142053
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3713953495025635,
      "learning_rate": 0.00019259597838094564,
      "loss": 2.7472,
      "step": 142054
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3831493854522705,
      "learning_rate": 0.00019259215896575668,
      "loss": 3.0419,
      "step": 142055
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.680776357650757,
      "learning_rate": 0.0001925883395705369,
      "loss": 2.89,
      "step": 142056
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4318957328796387,
      "learning_rate": 0.0001925845201952869,
      "loss": 2.9188,
      "step": 142057
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3081541061401367,
      "learning_rate": 0.00019258070084000764,
      "loss": 3.076,
      "step": 142058
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.401188611984253,
      "learning_rate": 0.0001925768815046995,
      "loss": 2.8032,
      "step": 142059
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3417704105377197,
      "learning_rate": 0.00019257306218936336,
      "loss": 2.9371,
      "step": 142060
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3070640563964844,
      "learning_rate": 0.00019256924289399994,
      "loss": 3.0361,
      "step": 142061
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2516958713531494,
      "learning_rate": 0.00019256542361860987,
      "loss": 3.0182,
      "step": 142062
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.389802932739258,
      "learning_rate": 0.00019256160436319399,
      "loss": 3.1245,
      "step": 142063
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.915501594543457,
      "learning_rate": 0.00019255778512775287,
      "loss": 3.008,
      "step": 142064
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3372769355773926,
      "learning_rate": 0.00019255396591228742,
      "loss": 3.2653,
      "step": 142065
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2158734798431396,
      "learning_rate": 0.00019255014671679813,
      "loss": 2.8032,
      "step": 142066
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6968834400177,
      "learning_rate": 0.00019254632754128577,
      "loss": 2.9401,
      "step": 142067
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6044692993164062,
      "learning_rate": 0.00019254250838575106,
      "loss": 3.0889,
      "step": 142068
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6374268531799316,
      "learning_rate": 0.00019253868925019473,
      "loss": 2.9713,
      "step": 142069
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2490651607513428,
      "learning_rate": 0.00019253487013461755,
      "loss": 3.0508,
      "step": 142070
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.8231589794158936,
      "learning_rate": 0.00019253105103902022,
      "loss": 2.8141,
      "step": 142071
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.505291223526001,
      "learning_rate": 0.0001925272319634033,
      "loss": 3.069,
      "step": 142072
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.7165141105651855,
      "learning_rate": 0.00019252341290776755,
      "loss": 2.8174,
      "step": 142073
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.8643758296966553,
      "learning_rate": 0.00019251959387211378,
      "loss": 2.9279,
      "step": 142074
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.886760950088501,
      "learning_rate": 0.00019251577485644264,
      "loss": 2.8979,
      "step": 142075
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0567219257354736,
      "learning_rate": 0.00019251195586075486,
      "loss": 2.8663,
      "step": 142076
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1686408519744873,
      "learning_rate": 0.0001925081368850512,
      "loss": 2.981,
      "step": 142077
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0012857913970947,
      "learning_rate": 0.00019250431792933225,
      "loss": 3.0898,
      "step": 142078
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9405618906021118,
      "learning_rate": 0.00019250049899359874,
      "loss": 2.879,
      "step": 142079
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.201063871383667,
      "learning_rate": 0.00019249668007785144,
      "loss": 2.9836,
      "step": 142080
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3984577655792236,
      "learning_rate": 0.00019249286118209102,
      "loss": 3.14,
      "step": 142081
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2940971851348877,
      "learning_rate": 0.00019248904230631818,
      "loss": 2.9808,
      "step": 142082
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.917875051498413,
      "learning_rate": 0.00019248522345053382,
      "loss": 2.9408,
      "step": 142083
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.2709245681762695,
      "learning_rate": 0.00019248140461473835,
      "loss": 3.1789,
      "step": 142084
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.563199758529663,
      "learning_rate": 0.00019247758579893262,
      "loss": 3.001,
      "step": 142085
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.306631326675415,
      "learning_rate": 0.00019247376700311735,
      "loss": 2.8806,
      "step": 142086
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.7645177841186523,
      "learning_rate": 0.00019246994822729317,
      "loss": 3.0904,
      "step": 142087
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.950768947601318,
      "learning_rate": 0.00019246612947146092,
      "loss": 2.9663,
      "step": 142088
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.321206092834473,
      "learning_rate": 0.00019246231073562136,
      "loss": 3.0935,
      "step": 142089
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.355074405670166,
      "learning_rate": 0.00019245849201977495,
      "loss": 2.8976,
      "step": 142090
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.1872715950012207,
      "learning_rate": 0.00019245467332392255,
      "loss": 2.9486,
      "step": 142091
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.1635613441467285,
      "learning_rate": 0.00019245085464806485,
      "loss": 3.1326,
      "step": 142092
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.371550559997559,
      "learning_rate": 0.00019244703599220257,
      "loss": 2.7437,
      "step": 142093
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3015925884246826,
      "learning_rate": 0.0001924432173563364,
      "loss": 3.1232,
      "step": 142094
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.60257887840271,
      "learning_rate": 0.00019243939874046722,
      "loss": 2.8811,
      "step": 142095
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1993093490600586,
      "learning_rate": 0.00019243558014459545,
      "loss": 2.8618,
      "step": 142096
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.651886463165283,
      "learning_rate": 0.00019243176156872192,
      "loss": 2.888,
      "step": 142097
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.9781270027160645,
      "learning_rate": 0.00019242794301284737,
      "loss": 2.9708,
      "step": 142098
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.961989641189575,
      "learning_rate": 0.0001924241244769725,
      "loss": 2.8639,
      "step": 142099
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.337114095687866,
      "learning_rate": 0.000192420305961098,
      "loss": 3.175,
      "step": 142100
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.792396306991577,
      "learning_rate": 0.00019241648746522473,
      "loss": 3.1031,
      "step": 142101
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.5262227058410645,
      "learning_rate": 0.00019241266898935313,
      "loss": 2.9462,
      "step": 142102
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.0001096725463867,
      "learning_rate": 0.00019240885053348406,
      "loss": 3.1102,
      "step": 142103
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1443188190460205,
      "learning_rate": 0.00019240503209761825,
      "loss": 2.9614,
      "step": 142104
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.7035975456237793,
      "learning_rate": 0.0001924012136817563,
      "loss": 2.8512,
      "step": 142105
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1572656631469727,
      "learning_rate": 0.00019239739528589903,
      "loss": 3.0892,
      "step": 142106
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.8289642333984375,
      "learning_rate": 0.00019239357691004725,
      "loss": 3.0254,
      "step": 142107
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.655943870544434,
      "learning_rate": 0.00019238975855420142,
      "loss": 3.2032,
      "step": 142108
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5591237545013428,
      "learning_rate": 0.0001923859402183623,
      "loss": 2.9477,
      "step": 142109
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.045164108276367,
      "learning_rate": 0.00019238212190253072,
      "loss": 3.0791,
      "step": 142110
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.163015604019165,
      "learning_rate": 0.00019237830360670734,
      "loss": 2.9815,
      "step": 142111
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.940843105316162,
      "learning_rate": 0.0001923744853308929,
      "loss": 2.8834,
      "step": 142112
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.508676052093506,
      "learning_rate": 0.00019237066707508807,
      "loss": 2.9619,
      "step": 142113
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.01985502243042,
      "learning_rate": 0.00019236684883929358,
      "loss": 2.9749,
      "step": 142114
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.016443967819214,
      "learning_rate": 0.00019236303062351005,
      "loss": 2.952,
      "step": 142115
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2491190433502197,
      "learning_rate": 0.00019235921242773832,
      "loss": 2.9177,
      "step": 142116
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.181440591812134,
      "learning_rate": 0.00019235539425197897,
      "loss": 2.9045,
      "step": 142117
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2544960975646973,
      "learning_rate": 0.00019235157609623284,
      "loss": 3.1011,
      "step": 142118
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.316847324371338,
      "learning_rate": 0.00019234775796050062,
      "loss": 2.9298,
      "step": 142119
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.1680703163146973,
      "learning_rate": 0.00019234393984478293,
      "loss": 2.8241,
      "step": 142120
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6208555698394775,
      "learning_rate": 0.0001923401217490806,
      "loss": 3.0062,
      "step": 142121
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1880083084106445,
      "learning_rate": 0.0001923363036733942,
      "loss": 2.7449,
      "step": 142122
    },
    {
      "epoch": 1.85,
      "grad_norm": 5.369137763977051,
      "learning_rate": 0.0001923324856177245,
      "loss": 3.1851,
      "step": 142123
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.816325664520264,
      "learning_rate": 0.00019232866758207227,
      "loss": 2.8657,
      "step": 142124
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.3423526287078857,
      "learning_rate": 0.0001923248495664382,
      "loss": 3.0585,
      "step": 142125
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4123711585998535,
      "learning_rate": 0.0001923210315708229,
      "loss": 3.1916,
      "step": 142126
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.355205059051514,
      "learning_rate": 0.0001923172135952272,
      "loss": 2.8641,
      "step": 142127
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.25050687789917,
      "learning_rate": 0.0001923133956396518,
      "loss": 3.1019,
      "step": 142128
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2264039516448975,
      "learning_rate": 0.00019230957770409728,
      "loss": 3.2137,
      "step": 142129
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9176632165908813,
      "learning_rate": 0.00019230575978856448,
      "loss": 3.1585,
      "step": 142130
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.455885648727417,
      "learning_rate": 0.00019230194189305409,
      "loss": 3.1306,
      "step": 142131
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.3633954524993896,
      "learning_rate": 0.00019229812401756679,
      "loss": 2.7577,
      "step": 142132
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.202415704727173,
      "learning_rate": 0.0001922943061621033,
      "loss": 2.9003,
      "step": 142133
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0769879817962646,
      "learning_rate": 0.00019229048832666433,
      "loss": 2.9974,
      "step": 142134
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9895952939987183,
      "learning_rate": 0.00019228667051125062,
      "loss": 2.9819,
      "step": 142135
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.053647041320801,
      "learning_rate": 0.00019228285271586282,
      "loss": 2.9544,
      "step": 142136
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.964218258857727,
      "learning_rate": 0.00019227903494050167,
      "loss": 2.9848,
      "step": 142137
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4175307750701904,
      "learning_rate": 0.00019227521718516793,
      "loss": 2.9539,
      "step": 142138
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6452033519744873,
      "learning_rate": 0.00019227139944986217,
      "loss": 3.0434,
      "step": 142139
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0165555477142334,
      "learning_rate": 0.00019226758173458523,
      "loss": 2.9295,
      "step": 142140
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0510945320129395,
      "learning_rate": 0.00019226376403933782,
      "loss": 2.8499,
      "step": 142141
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.431471824645996,
      "learning_rate": 0.00019225994636412052,
      "loss": 2.9773,
      "step": 142142
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.701932668685913,
      "learning_rate": 0.00019225612870893424,
      "loss": 3.1002,
      "step": 142143
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.204425096511841,
      "learning_rate": 0.00019225231107377957,
      "loss": 2.9281,
      "step": 142144
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.8029026985168457,
      "learning_rate": 0.0001922484934586572,
      "loss": 3.0242,
      "step": 142145
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.8567579984664917,
      "learning_rate": 0.0001922446758635678,
      "loss": 3.0475,
      "step": 142146
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.8675341606140137,
      "learning_rate": 0.0001922408582885122,
      "loss": 2.9912,
      "step": 142147
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.693507671356201,
      "learning_rate": 0.00019223704073349107,
      "loss": 3.1274,
      "step": 142148
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.199559211730957,
      "learning_rate": 0.0001922332231985051,
      "loss": 3.0461,
      "step": 142149
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.7052507400512695,
      "learning_rate": 0.00019222940568355506,
      "loss": 3.1793,
      "step": 142150
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.3604846000671387,
      "learning_rate": 0.00019222558818864156,
      "loss": 3.3315,
      "step": 142151
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.004182815551758,
      "learning_rate": 0.00019222177071376534,
      "loss": 2.7144,
      "step": 142152
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4960546493530273,
      "learning_rate": 0.00019221795325892715,
      "loss": 2.8722,
      "step": 142153
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9601085186004639,
      "learning_rate": 0.00019221413582412765,
      "loss": 3.0405,
      "step": 142154
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6691787242889404,
      "learning_rate": 0.00019221031840936756,
      "loss": 2.9038,
      "step": 142155
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.185533046722412,
      "learning_rate": 0.00019220650101464773,
      "loss": 3.096,
      "step": 142156
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.196483612060547,
      "learning_rate": 0.00019220268363996865,
      "loss": 3.0858,
      "step": 142157
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.342860460281372,
      "learning_rate": 0.00019219886628533113,
      "loss": 2.8178,
      "step": 142158
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.338517427444458,
      "learning_rate": 0.0001921950489507359,
      "loss": 3.1998,
      "step": 142159
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1452343463897705,
      "learning_rate": 0.00019219123163618357,
      "loss": 3.1691,
      "step": 142160
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.492283344268799,
      "learning_rate": 0.00019218741434167498,
      "loss": 2.989,
      "step": 142161
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.8785784244537354,
      "learning_rate": 0.0001921835970672109,
      "loss": 2.7487,
      "step": 142162
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2883968353271484,
      "learning_rate": 0.0001921797798127918,
      "loss": 3.0399,
      "step": 142163
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.033038854598999,
      "learning_rate": 0.00019217596257841847,
      "loss": 2.8532,
      "step": 142164
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.795868158340454,
      "learning_rate": 0.00019217214536409172,
      "loss": 3.0463,
      "step": 142165
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1023504734039307,
      "learning_rate": 0.00019216832816981217,
      "loss": 2.7785,
      "step": 142166
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.646261215209961,
      "learning_rate": 0.0001921645109955806,
      "loss": 2.9621,
      "step": 142167
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2047970294952393,
      "learning_rate": 0.0001921606938413978,
      "loss": 2.7323,
      "step": 142168
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.0468690395355225,
      "learning_rate": 0.00019215687670726423,
      "loss": 2.8473,
      "step": 142169
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.922217607498169,
      "learning_rate": 0.0001921530595931807,
      "loss": 2.998,
      "step": 142170
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.548886299133301,
      "learning_rate": 0.000192149242499148,
      "loss": 2.8986,
      "step": 142171
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.6296274662017822,
      "learning_rate": 0.00019214542542516673,
      "loss": 2.8888,
      "step": 142172
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.610358238220215,
      "learning_rate": 0.00019214160837123772,
      "loss": 3.0066,
      "step": 142173
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.460069179534912,
      "learning_rate": 0.00019213779133736176,
      "loss": 2.9449,
      "step": 142174
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2931160926818848,
      "learning_rate": 0.00019213397432353923,
      "loss": 2.9294,
      "step": 142175
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.8614418506622314,
      "learning_rate": 0.00019213015732977108,
      "loss": 2.6888,
      "step": 142176
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.260827541351318,
      "learning_rate": 0.00019212634035605796,
      "loss": 2.8941,
      "step": 142177
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.002692222595215,
      "learning_rate": 0.00019212252340240058,
      "loss": 2.8455,
      "step": 142178
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3327324390411377,
      "learning_rate": 0.00019211870646879966,
      "loss": 3.1863,
      "step": 142179
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.366636037826538,
      "learning_rate": 0.00019211488955525602,
      "loss": 3.1519,
      "step": 142180
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.915968894958496,
      "learning_rate": 0.00019211107266177015,
      "loss": 3.0064,
      "step": 142181
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.96561336517334,
      "learning_rate": 0.0001921072557883429,
      "loss": 2.762,
      "step": 142182
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.954678535461426,
      "learning_rate": 0.0001921034389349749,
      "loss": 2.8384,
      "step": 142183
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.322723865509033,
      "learning_rate": 0.0001920996221016669,
      "loss": 3.146,
      "step": 142184
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.412130117416382,
      "learning_rate": 0.00019209580528841967,
      "loss": 2.9332,
      "step": 142185
    },
    {
      "epoch": 1.85,
      "grad_norm": 5.2865753173828125,
      "learning_rate": 0.00019209198849523394,
      "loss": 3.0382,
      "step": 142186
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.159654140472412,
      "learning_rate": 0.00019208817172211025,
      "loss": 3.1231,
      "step": 142187
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.8388087749481201,
      "learning_rate": 0.00019208435496904938,
      "loss": 3.0562,
      "step": 142188
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2489476203918457,
      "learning_rate": 0.0001920805382360521,
      "loss": 3.0175,
      "step": 142189
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6607465744018555,
      "learning_rate": 0.00019207672152311907,
      "loss": 2.7367,
      "step": 142190
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.950870990753174,
      "learning_rate": 0.000192072904830251,
      "loss": 3.0536,
      "step": 142191
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.229196548461914,
      "learning_rate": 0.00019206908815744866,
      "loss": 2.9863,
      "step": 142192
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.8360819816589355,
      "learning_rate": 0.00019206527150471278,
      "loss": 2.9487,
      "step": 142193
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0912890434265137,
      "learning_rate": 0.0001920614548720439,
      "loss": 2.9771,
      "step": 142194
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2627899646759033,
      "learning_rate": 0.00019205763825944286,
      "loss": 3.0381,
      "step": 142195
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5555012226104736,
      "learning_rate": 0.00019205382166691032,
      "loss": 2.9507,
      "step": 142196
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.771244525909424,
      "learning_rate": 0.000192050005094447,
      "loss": 2.9812,
      "step": 142197
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.044889211654663,
      "learning_rate": 0.00019204618854205363,
      "loss": 3.189,
      "step": 142198
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.250476837158203,
      "learning_rate": 0.00019204237200973106,
      "loss": 3.0069,
      "step": 142199
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.314643621444702,
      "learning_rate": 0.00019203855549747967,
      "loss": 3.1818,
      "step": 142200
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.3648202419281006,
      "learning_rate": 0.00019203473900530042,
      "loss": 2.9692,
      "step": 142201
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1974453926086426,
      "learning_rate": 0.00019203092253319392,
      "loss": 3.0372,
      "step": 142202
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.240772247314453,
      "learning_rate": 0.00019202710608116091,
      "loss": 3.0252,
      "step": 142203
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.029547691345215,
      "learning_rate": 0.0001920232896492021,
      "loss": 2.9267,
      "step": 142204
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.0445401668548584,
      "learning_rate": 0.00019201947323731832,
      "loss": 3.1663,
      "step": 142205
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.734955310821533,
      "learning_rate": 0.00019201565684551003,
      "loss": 2.6293,
      "step": 142206
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3785674571990967,
      "learning_rate": 0.00019201184047377807,
      "loss": 2.8724,
      "step": 142207
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.377109527587891,
      "learning_rate": 0.00019200802412212319,
      "loss": 3.0125,
      "step": 142208
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.7634241580963135,
      "learning_rate": 0.000192004207790546,
      "loss": 3.045,
      "step": 142209
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.645817279815674,
      "learning_rate": 0.00019200039147904726,
      "loss": 3.1105,
      "step": 142210
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9983364343643188,
      "learning_rate": 0.00019199657518762777,
      "loss": 2.9186,
      "step": 142211
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.558692216873169,
      "learning_rate": 0.00019199275891628812,
      "loss": 2.9061,
      "step": 142212
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.229503154754639,
      "learning_rate": 0.00019198894266502905,
      "loss": 3.0007,
      "step": 142213
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1596198081970215,
      "learning_rate": 0.00019198512643385125,
      "loss": 2.9661,
      "step": 142214
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.57027268409729,
      "learning_rate": 0.00019198131022275545,
      "loss": 2.9148,
      "step": 142215
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.321437358856201,
      "learning_rate": 0.00019197749403174238,
      "loss": 2.9297,
      "step": 142216
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.528973340988159,
      "learning_rate": 0.0001919736778608128,
      "loss": 2.9386,
      "step": 142217
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.021595001220703,
      "learning_rate": 0.00019196986170996727,
      "loss": 2.6828,
      "step": 142218
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.187007188796997,
      "learning_rate": 0.0001919660455792066,
      "loss": 3.0663,
      "step": 142219
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.9362151622772217,
      "learning_rate": 0.00019196222946853153,
      "loss": 2.7623,
      "step": 142220
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.151909828186035,
      "learning_rate": 0.00019195841337794266,
      "loss": 3.2346,
      "step": 142221
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.276139736175537,
      "learning_rate": 0.00019195459730744077,
      "loss": 3.0071,
      "step": 142222
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.156933069229126,
      "learning_rate": 0.0001919507812570266,
      "loss": 3.0792,
      "step": 142223
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1413049697875977,
      "learning_rate": 0.00019194696522670075,
      "loss": 3.149,
      "step": 142224
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.442075490951538,
      "learning_rate": 0.00019194314921646403,
      "loss": 2.7685,
      "step": 142225
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.792219638824463,
      "learning_rate": 0.00019193933322631708,
      "loss": 2.9123,
      "step": 142226
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.152981996536255,
      "learning_rate": 0.00019193551725626076,
      "loss": 3.1039,
      "step": 142227
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2014143466949463,
      "learning_rate": 0.0001919317013062956,
      "loss": 2.9004,
      "step": 142228
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.2817835807800293,
      "learning_rate": 0.0001919278853764224,
      "loss": 2.9485,
      "step": 142229
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.397547721862793,
      "learning_rate": 0.00019192406946664183,
      "loss": 2.8631,
      "step": 142230
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.486485004425049,
      "learning_rate": 0.00019192025357695457,
      "loss": 3.0376,
      "step": 142231
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9353041648864746,
      "learning_rate": 0.0001919164377073614,
      "loss": 2.7939,
      "step": 142232
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.605536699295044,
      "learning_rate": 0.00019191262185786298,
      "loss": 3.2548,
      "step": 142233
    },
    {
      "epoch": 1.85,
      "grad_norm": 5.917660236358643,
      "learning_rate": 0.00019190880602846012,
      "loss": 2.9527,
      "step": 142234
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.707002639770508,
      "learning_rate": 0.00019190499021915346,
      "loss": 2.8305,
      "step": 142235
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9541748762130737,
      "learning_rate": 0.00019190117442994365,
      "loss": 3.0732,
      "step": 142236
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.518404722213745,
      "learning_rate": 0.00019189735866083146,
      "loss": 2.9165,
      "step": 142237
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.0934150218963623,
      "learning_rate": 0.00019189354291181755,
      "loss": 2.9109,
      "step": 142238
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.6028048992156982,
      "learning_rate": 0.00019188972718290273,
      "loss": 2.9562,
      "step": 142239
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.582151174545288,
      "learning_rate": 0.0001918859114740876,
      "loss": 3.0047,
      "step": 142240
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.27885103225708,
      "learning_rate": 0.00019188209578537306,
      "loss": 2.7245,
      "step": 142241
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.824535608291626,
      "learning_rate": 0.00019187828011675955,
      "loss": 3.052,
      "step": 142242
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.050856351852417,
      "learning_rate": 0.00019187446446824793,
      "loss": 3.1286,
      "step": 142243
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.16532039642334,
      "learning_rate": 0.00019187064883983884,
      "loss": 2.7904,
      "step": 142244
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.011537551879883,
      "learning_rate": 0.00019186683323153304,
      "loss": 3.0806,
      "step": 142245
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.387218952178955,
      "learning_rate": 0.00019186301764333128,
      "loss": 2.9557,
      "step": 142246
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.867250919342041,
      "learning_rate": 0.00019185920207523435,
      "loss": 3.1013,
      "step": 142247
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.446532726287842,
      "learning_rate": 0.0001918553865272427,
      "loss": 3.1139,
      "step": 142248
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.269442319869995,
      "learning_rate": 0.00019185157099935716,
      "loss": 3.0963,
      "step": 142249
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.399188280105591,
      "learning_rate": 0.00019184775549157848,
      "loss": 3.0423,
      "step": 142250
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.008124351501465,
      "learning_rate": 0.0001918439400039073,
      "loss": 3.1046,
      "step": 142251
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.8561779260635376,
      "learning_rate": 0.0001918401245363444,
      "loss": 3.2155,
      "step": 142252
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.372462272644043,
      "learning_rate": 0.00019183630908889063,
      "loss": 3.1299,
      "step": 142253
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.8041465282440186,
      "learning_rate": 0.00019183249366154636,
      "loss": 2.9618,
      "step": 142254
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.318164348602295,
      "learning_rate": 0.00019182867825431248,
      "loss": 2.9691,
      "step": 142255
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.4744229316711426,
      "learning_rate": 0.0001918248628671897,
      "loss": 2.7397,
      "step": 142256
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.476179599761963,
      "learning_rate": 0.0001918210475001787,
      "loss": 2.9129,
      "step": 142257
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.311622381210327,
      "learning_rate": 0.0001918172321532802,
      "loss": 2.917,
      "step": 142258
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9091572761535645,
      "learning_rate": 0.00019181341682649494,
      "loss": 2.8653,
      "step": 142259
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.451577663421631,
      "learning_rate": 0.00019180960151982373,
      "loss": 3.2487,
      "step": 142260
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4890127182006836,
      "learning_rate": 0.00019180578623326705,
      "loss": 2.8732,
      "step": 142261
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.934451699256897,
      "learning_rate": 0.0001918019709668257,
      "loss": 3.0111,
      "step": 142262
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5073533058166504,
      "learning_rate": 0.0001917981557205004,
      "loss": 2.7755,
      "step": 142263
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.009582996368408,
      "learning_rate": 0.00019179434049429187,
      "loss": 3.0536,
      "step": 142264
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.793210506439209,
      "learning_rate": 0.00019179052528820083,
      "loss": 3.3704,
      "step": 142265
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.8732733726501465,
      "learning_rate": 0.00019178671010222807,
      "loss": 2.9605,
      "step": 142266
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.540552854537964,
      "learning_rate": 0.00019178289493637407,
      "loss": 2.8782,
      "step": 142267
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4962069988250732,
      "learning_rate": 0.00019177907979063972,
      "loss": 2.8987,
      "step": 142268
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3529601097106934,
      "learning_rate": 0.00019177526466502564,
      "loss": 3.2658,
      "step": 142269
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.794215440750122,
      "learning_rate": 0.0001917714495595326,
      "loss": 2.9767,
      "step": 142270
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.031104564666748,
      "learning_rate": 0.0001917676344741613,
      "loss": 2.9066,
      "step": 142271
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.339693307876587,
      "learning_rate": 0.00019176381940891254,
      "loss": 3.0641,
      "step": 142272
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.659365177154541,
      "learning_rate": 0.00019176000436378683,
      "loss": 2.8873,
      "step": 142273
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.378631591796875,
      "learning_rate": 0.00019175618933878495,
      "loss": 3.1465,
      "step": 142274
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.6124045848846436,
      "learning_rate": 0.00019175237433390768,
      "loss": 2.8839,
      "step": 142275
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1859004497528076,
      "learning_rate": 0.00019174855934915564,
      "loss": 2.9658,
      "step": 142276
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.055666923522949,
      "learning_rate": 0.00019174474438452961,
      "loss": 3.0642,
      "step": 142277
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.359373092651367,
      "learning_rate": 0.0001917409294400304,
      "loss": 3.0131,
      "step": 142278
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3928487300872803,
      "learning_rate": 0.00019173711451565844,
      "loss": 2.8384,
      "step": 142279
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3912296295166016,
      "learning_rate": 0.00019173329961141465,
      "loss": 3.0455,
      "step": 142280
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1204898357391357,
      "learning_rate": 0.00019172948472729963,
      "loss": 2.7678,
      "step": 142281
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0354514122009277,
      "learning_rate": 0.00019172566986331416,
      "loss": 3.257,
      "step": 142282
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4674558639526367,
      "learning_rate": 0.00019172185501945896,
      "loss": 3.1054,
      "step": 142283
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6269264221191406,
      "learning_rate": 0.00019171804019573483,
      "loss": 2.7986,
      "step": 142284
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.389331579208374,
      "learning_rate": 0.00019171422539214223,
      "loss": 3.0595,
      "step": 142285
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.034184694290161,
      "learning_rate": 0.00019171041060868198,
      "loss": 2.6437,
      "step": 142286
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4242348670959473,
      "learning_rate": 0.0001917065958453548,
      "loss": 2.9993,
      "step": 142287
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.502516031265259,
      "learning_rate": 0.00019170278110216141,
      "loss": 2.6213,
      "step": 142288
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0645711421966553,
      "learning_rate": 0.00019169896637910257,
      "loss": 3.0376,
      "step": 142289
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.350358009338379,
      "learning_rate": 0.000191695151676179,
      "loss": 2.8103,
      "step": 142290
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.411325454711914,
      "learning_rate": 0.00019169133699339125,
      "loss": 2.9008,
      "step": 142291
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1785223484039307,
      "learning_rate": 0.00019168752233074014,
      "loss": 2.8064,
      "step": 142292
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3668832778930664,
      "learning_rate": 0.00019168370768822633,
      "loss": 2.9166,
      "step": 142293
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3730485439300537,
      "learning_rate": 0.0001916798930658506,
      "loss": 2.9086,
      "step": 142294
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.229771375656128,
      "learning_rate": 0.00019167607846361356,
      "loss": 2.8884,
      "step": 142295
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.767009735107422,
      "learning_rate": 0.0001916722638815161,
      "loss": 3.0289,
      "step": 142296
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.309415102005005,
      "learning_rate": 0.0001916684493195588,
      "loss": 2.8999,
      "step": 142297
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.917891502380371,
      "learning_rate": 0.00019166463477774233,
      "loss": 2.7375,
      "step": 142298
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6574020385742188,
      "learning_rate": 0.00019166082025606743,
      "loss": 2.8065,
      "step": 142299
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.633535861968994,
      "learning_rate": 0.0001916570057545348,
      "loss": 2.8975,
      "step": 142300
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0285892486572266,
      "learning_rate": 0.00019165319127314523,
      "loss": 2.9272,
      "step": 142301
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.9468672275543213,
      "learning_rate": 0.0001916493768118994,
      "loss": 2.8061,
      "step": 142302
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.035008430480957,
      "learning_rate": 0.00019164556237079796,
      "loss": 2.9684,
      "step": 142303
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.823035478591919,
      "learning_rate": 0.00019164174794984167,
      "loss": 3.2439,
      "step": 142304
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9531300067901611,
      "learning_rate": 0.00019163793354903125,
      "loss": 2.9826,
      "step": 142305
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4006240367889404,
      "learning_rate": 0.00019163411916836733,
      "loss": 2.998,
      "step": 142306
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.104106903076172,
      "learning_rate": 0.00019163030480785068,
      "loss": 3.1193,
      "step": 142307
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.252948522567749,
      "learning_rate": 0.00019162649046748206,
      "loss": 2.9457,
      "step": 142308
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1914761066436768,
      "learning_rate": 0.00019162267614726206,
      "loss": 3.0048,
      "step": 142309
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9500771760940552,
      "learning_rate": 0.00019161886184719147,
      "loss": 2.9637,
      "step": 142310
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2053043842315674,
      "learning_rate": 0.00019161504756727096,
      "loss": 2.7022,
      "step": 142311
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1844029426574707,
      "learning_rate": 0.00019161123330750134,
      "loss": 2.9802,
      "step": 142312
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1255321502685547,
      "learning_rate": 0.00019160741906788315,
      "loss": 2.735,
      "step": 142313
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.7913143634796143,
      "learning_rate": 0.00019160360484841724,
      "loss": 2.9086,
      "step": 142314
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.875176191329956,
      "learning_rate": 0.00019159979064910425,
      "loss": 2.9101,
      "step": 142315
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0736069679260254,
      "learning_rate": 0.00019159597646994486,
      "loss": 2.8006,
      "step": 142316
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.8723127841949463,
      "learning_rate": 0.00019159216231093984,
      "loss": 2.9013,
      "step": 142317
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.263414144515991,
      "learning_rate": 0.00019158834817208987,
      "loss": 2.9929,
      "step": 142318
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2612898349761963,
      "learning_rate": 0.00019158453405339573,
      "loss": 2.8636,
      "step": 142319
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.522928237915039,
      "learning_rate": 0.00019158071995485813,
      "loss": 3.0199,
      "step": 142320
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.080144166946411,
      "learning_rate": 0.00019157690587647761,
      "loss": 2.8621,
      "step": 142321
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.251631259918213,
      "learning_rate": 0.000191573091818255,
      "loss": 2.804,
      "step": 142322
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1327598094940186,
      "learning_rate": 0.000191569277780191,
      "loss": 2.957,
      "step": 142323
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.392526865005493,
      "learning_rate": 0.00019156546376228632,
      "loss": 2.7652,
      "step": 142324
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6104776859283447,
      "learning_rate": 0.00019156164976454166,
      "loss": 2.9762,
      "step": 142325
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.169959306716919,
      "learning_rate": 0.00019155783578695779,
      "loss": 2.948,
      "step": 142326
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.496488571166992,
      "learning_rate": 0.0001915540218295354,
      "loss": 2.8066,
      "step": 142327
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.144489049911499,
      "learning_rate": 0.00019155020789227506,
      "loss": 2.6308,
      "step": 142328
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.333991527557373,
      "learning_rate": 0.00019154639397517758,
      "loss": 2.9398,
      "step": 142329
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4480843544006348,
      "learning_rate": 0.00019154258007824372,
      "loss": 2.8759,
      "step": 142330
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.943545341491699,
      "learning_rate": 0.00019153876620147412,
      "loss": 2.9063,
      "step": 142331
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0658586025238037,
      "learning_rate": 0.0001915349523448695,
      "loss": 2.7332,
      "step": 142332
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.6766936779022217,
      "learning_rate": 0.0001915311385084307,
      "loss": 2.9459,
      "step": 142333
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.05737566947937,
      "learning_rate": 0.0001915273246921582,
      "loss": 2.6635,
      "step": 142334
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.402501106262207,
      "learning_rate": 0.00019152351089605281,
      "loss": 2.8899,
      "step": 142335
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.791201591491699,
      "learning_rate": 0.00019151969712011524,
      "loss": 2.9497,
      "step": 142336
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.520279884338379,
      "learning_rate": 0.00019151588336434626,
      "loss": 2.9445,
      "step": 142337
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3180696964263916,
      "learning_rate": 0.00019151206962874645,
      "loss": 2.9329,
      "step": 142338
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.7314114570617676,
      "learning_rate": 0.00019150825591331673,
      "loss": 3.0396,
      "step": 142339
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.030054807662964,
      "learning_rate": 0.0001915044422180576,
      "loss": 2.8584,
      "step": 142340
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.488480567932129,
      "learning_rate": 0.0001915006285429698,
      "loss": 2.8422,
      "step": 142341
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.0519702434539795,
      "learning_rate": 0.00019149681488805407,
      "loss": 2.8441,
      "step": 142342
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.18408203125,
      "learning_rate": 0.00019149300125331114,
      "loss": 3.2043,
      "step": 142343
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.120351791381836,
      "learning_rate": 0.00019148918763874174,
      "loss": 2.9061,
      "step": 142344
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.199908494949341,
      "learning_rate": 0.00019148537404434668,
      "loss": 2.9486,
      "step": 142345
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6989669799804688,
      "learning_rate": 0.00019148156047012638,
      "loss": 3.1087,
      "step": 142346
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.347768783569336,
      "learning_rate": 0.0001914777469160817,
      "loss": 2.9445,
      "step": 142347
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.372802972793579,
      "learning_rate": 0.00019147393338221337,
      "loss": 2.9928,
      "step": 142348
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.9037483930587769,
      "learning_rate": 0.00019147011986852213,
      "loss": 2.795,
      "step": 142349
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1696817874908447,
      "learning_rate": 0.00019146630637500856,
      "loss": 2.9774,
      "step": 142350
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5403807163238525,
      "learning_rate": 0.00019146249290167362,
      "loss": 2.9959,
      "step": 142351
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1457204818725586,
      "learning_rate": 0.00019145867944851772,
      "loss": 3.3237,
      "step": 142352
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1236400604248047,
      "learning_rate": 0.00019145486601554172,
      "loss": 3.2016,
      "step": 142353
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.873842477798462,
      "learning_rate": 0.00019145105260274625,
      "loss": 2.9038,
      "step": 142354
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1513116359710693,
      "learning_rate": 0.00019144723921013214,
      "loss": 3.0024,
      "step": 142355
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5537595748901367,
      "learning_rate": 0.0001914434258377,
      "loss": 2.9544,
      "step": 142356
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.4517416954040527,
      "learning_rate": 0.00019143961248545074,
      "loss": 2.5998,
      "step": 142357
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5399670600891113,
      "learning_rate": 0.00019143579915338474,
      "loss": 2.8466,
      "step": 142358
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.9416139125823975,
      "learning_rate": 0.00019143198584150293,
      "loss": 2.9724,
      "step": 142359
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.4804110527038574,
      "learning_rate": 0.0001914281725498059,
      "loss": 2.8658,
      "step": 142360
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.055936336517334,
      "learning_rate": 0.00019142435927829441,
      "loss": 3.1246,
      "step": 142361
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.018634557723999,
      "learning_rate": 0.00019142054602696925,
      "loss": 3.0793,
      "step": 142362
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3992068767547607,
      "learning_rate": 0.00019141673279583115,
      "loss": 3.1335,
      "step": 142363
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.7198100090026855,
      "learning_rate": 0.00019141291958488062,
      "loss": 2.8039,
      "step": 142364
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0489227771759033,
      "learning_rate": 0.00019140910639411847,
      "loss": 3.0699,
      "step": 142365
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1546571254730225,
      "learning_rate": 0.0001914052932235454,
      "loss": 3.0792,
      "step": 142366
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.120321035385132,
      "learning_rate": 0.00019140148007316214,
      "loss": 3.1876,
      "step": 142367
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2398431301116943,
      "learning_rate": 0.0001913976669429694,
      "loss": 2.9565,
      "step": 142368
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.700604438781738,
      "learning_rate": 0.00019139385383296805,
      "loss": 2.7815,
      "step": 142369
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.525092840194702,
      "learning_rate": 0.00019139004074315844,
      "loss": 2.7598,
      "step": 142370
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.158621072769165,
      "learning_rate": 0.0001913862276735415,
      "loss": 2.94,
      "step": 142371
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4432947635650635,
      "learning_rate": 0.0001913824146241179,
      "loss": 2.8602,
      "step": 142372
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.11555552482605,
      "learning_rate": 0.00019137860159488838,
      "loss": 3.0777,
      "step": 142373
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.300706148147583,
      "learning_rate": 0.00019137478858585364,
      "loss": 3.0223,
      "step": 142374
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3062596321105957,
      "learning_rate": 0.00019137097559701448,
      "loss": 3.0731,
      "step": 142375
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.927701711654663,
      "learning_rate": 0.00019136716262837137,
      "loss": 3.3409,
      "step": 142376
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.28414249420166,
      "learning_rate": 0.00019136334967992518,
      "loss": 3.0425,
      "step": 142377
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4274563789367676,
      "learning_rate": 0.0001913595367516766,
      "loss": 2.907,
      "step": 142378
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5561258792877197,
      "learning_rate": 0.0001913557238436263,
      "loss": 2.8656,
      "step": 142379
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.6171207427978516,
      "learning_rate": 0.00019135191095577504,
      "loss": 2.789,
      "step": 142380
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.3244943618774414,
      "learning_rate": 0.00019134809808812357,
      "loss": 2.926,
      "step": 142381
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2759761810302734,
      "learning_rate": 0.00019134428524067253,
      "loss": 2.8482,
      "step": 142382
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.372875213623047,
      "learning_rate": 0.00019134047241342258,
      "loss": 2.8309,
      "step": 142383
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2098169326782227,
      "learning_rate": 0.0001913366596063745,
      "loss": 3.0539,
      "step": 142384
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.247990608215332,
      "learning_rate": 0.00019133284681952897,
      "loss": 2.9369,
      "step": 142385
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0266709327697754,
      "learning_rate": 0.00019132903405288674,
      "loss": 3.2519,
      "step": 142386
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3499832153320312,
      "learning_rate": 0.00019132522130644854,
      "loss": 2.8687,
      "step": 142387
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.089303970336914,
      "learning_rate": 0.00019132140858021496,
      "loss": 3.0179,
      "step": 142388
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2258875370025635,
      "learning_rate": 0.00019131759587418686,
      "loss": 2.9655,
      "step": 142389
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.027324914932251,
      "learning_rate": 0.0001913137831883648,
      "loss": 3.0892,
      "step": 142390
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1527018547058105,
      "learning_rate": 0.00019130997052274956,
      "loss": 3.051,
      "step": 142391
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1645851135253906,
      "learning_rate": 0.00019130615787734184,
      "loss": 2.9058,
      "step": 142392
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.077232837677002,
      "learning_rate": 0.00019130234525214238,
      "loss": 2.9663,
      "step": 142393
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.1786391735076904,
      "learning_rate": 0.0001912985326471519,
      "loss": 2.8802,
      "step": 142394
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2156753540039062,
      "learning_rate": 0.00019129472006237103,
      "loss": 3.1489,
      "step": 142395
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.7514634132385254,
      "learning_rate": 0.00019129090749780053,
      "loss": 2.6842,
      "step": 142396
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.041724681854248,
      "learning_rate": 0.00019128709495344113,
      "loss": 2.9672,
      "step": 142397
    },
    {
      "epoch": 1.85,
      "grad_norm": 1.8890358209609985,
      "learning_rate": 0.00019128328242929349,
      "loss": 2.9131,
      "step": 142398
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.18229341506958,
      "learning_rate": 0.00019127946992535832,
      "loss": 3.193,
      "step": 142399
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3701179027557373,
      "learning_rate": 0.0001912756574416364,
      "loss": 2.8749,
      "step": 142400
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.511289358139038,
      "learning_rate": 0.00019127184497812837,
      "loss": 3.1911,
      "step": 142401
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.474428653717041,
      "learning_rate": 0.0001912680325348349,
      "loss": 2.963,
      "step": 142402
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1575911045074463,
      "learning_rate": 0.00019126422011175683,
      "loss": 2.9768,
      "step": 142403
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4250714778900146,
      "learning_rate": 0.00019126040770889478,
      "loss": 2.9634,
      "step": 142404
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2724802494049072,
      "learning_rate": 0.00019125659532624945,
      "loss": 2.8606,
      "step": 142405
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.201141357421875,
      "learning_rate": 0.00019125278296382162,
      "loss": 3.0934,
      "step": 142406
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4763314723968506,
      "learning_rate": 0.00019124897062161188,
      "loss": 3.0403,
      "step": 142407
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3241002559661865,
      "learning_rate": 0.000191245158299621,
      "loss": 3.1774,
      "step": 142408
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.926514148712158,
      "learning_rate": 0.00019124134599784974,
      "loss": 2.9351,
      "step": 142409
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5688135623931885,
      "learning_rate": 0.00019123753371629875,
      "loss": 3.0099,
      "step": 142410
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0583746433258057,
      "learning_rate": 0.00019123372145496884,
      "loss": 2.8384,
      "step": 142411
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.412329912185669,
      "learning_rate": 0.00019122990921386057,
      "loss": 2.9472,
      "step": 142412
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.317873477935791,
      "learning_rate": 0.0001912260969929747,
      "loss": 3.024,
      "step": 142413
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.8474793434143066,
      "learning_rate": 0.00019122228479231192,
      "loss": 2.7839,
      "step": 142414
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2103519439697266,
      "learning_rate": 0.000191218472611873,
      "loss": 3.0616,
      "step": 142415
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.4017558097839355,
      "learning_rate": 0.00019121466045165863,
      "loss": 2.9625,
      "step": 142416
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3242762088775635,
      "learning_rate": 0.00019121084831166948,
      "loss": 2.9623,
      "step": 142417
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.344862699508667,
      "learning_rate": 0.00019120703619190642,
      "loss": 2.9383,
      "step": 142418
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.246286392211914,
      "learning_rate": 0.00019120322409236988,
      "loss": 2.889,
      "step": 142419
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.8872721195220947,
      "learning_rate": 0.00019119941201306076,
      "loss": 2.9604,
      "step": 142420
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.036184787750244,
      "learning_rate": 0.0001911955999539797,
      "loss": 3.1531,
      "step": 142421
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2668590545654297,
      "learning_rate": 0.00019119178791512738,
      "loss": 2.9107,
      "step": 142422
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.5802035331726074,
      "learning_rate": 0.00019118797589650464,
      "loss": 2.8001,
      "step": 142423
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6517012119293213,
      "learning_rate": 0.0001911841638981122,
      "loss": 2.8625,
      "step": 142424
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.650043487548828,
      "learning_rate": 0.00019118035191995056,
      "loss": 2.8934,
      "step": 142425
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.8862624168395996,
      "learning_rate": 0.00019117653996202056,
      "loss": 2.9514,
      "step": 142426
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.3227527141571045,
      "learning_rate": 0.00019117272802432288,
      "loss": 2.9009,
      "step": 142427
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6388449668884277,
      "learning_rate": 0.00019116891610685823,
      "loss": 2.8938,
      "step": 142428
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0334019660949707,
      "learning_rate": 0.00019116510420962734,
      "loss": 2.9274,
      "step": 142429
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.938387393951416,
      "learning_rate": 0.00019116129233263105,
      "loss": 2.8016,
      "step": 142430
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.8013880252838135,
      "learning_rate": 0.0001911574804758698,
      "loss": 2.9643,
      "step": 142431
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.083735942840576,
      "learning_rate": 0.00019115366863934442,
      "loss": 2.7634,
      "step": 142432
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.103675365447998,
      "learning_rate": 0.00019114985682305564,
      "loss": 2.7401,
      "step": 142433
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.7900307178497314,
      "learning_rate": 0.00019114604502700415,
      "loss": 3.1254,
      "step": 142434
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0430188179016113,
      "learning_rate": 0.00019114223325119068,
      "loss": 2.818,
      "step": 142435
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.351961851119995,
      "learning_rate": 0.00019113842149561604,
      "loss": 2.8645,
      "step": 142436
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2970290184020996,
      "learning_rate": 0.00019113460976028068,
      "loss": 3.1303,
      "step": 142437
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4972927570343018,
      "learning_rate": 0.00019113079804518546,
      "loss": 2.8144,
      "step": 142438
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.9600019454956055,
      "learning_rate": 0.0001911269863503311,
      "loss": 2.9444,
      "step": 142439
    },
    {
      "epoch": 1.85,
      "grad_norm": 4.160566806793213,
      "learning_rate": 0.00019112317467571827,
      "loss": 2.8118,
      "step": 142440
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.9432060718536377,
      "learning_rate": 0.0001911193630213477,
      "loss": 2.8077,
      "step": 142441
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.805161714553833,
      "learning_rate": 0.0001911155513872202,
      "loss": 3.1322,
      "step": 142442
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2350597381591797,
      "learning_rate": 0.00019111173977333628,
      "loss": 2.8884,
      "step": 142443
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.7864415645599365,
      "learning_rate": 0.00019110792817969674,
      "loss": 2.7039,
      "step": 142444
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.890122413635254,
      "learning_rate": 0.00019110411660630227,
      "loss": 2.8954,
      "step": 142445
    },
    {
      "epoch": 1.85,
      "grad_norm": 3.695978879928589,
      "learning_rate": 0.0001911003050531536,
      "loss": 2.6998,
      "step": 142446
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.6285877227783203,
      "learning_rate": 0.00019109649352025147,
      "loss": 3.1613,
      "step": 142447
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.8901307582855225,
      "learning_rate": 0.00019109268200759666,
      "loss": 2.9966,
      "step": 142448
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.506847858428955,
      "learning_rate": 0.00019108887051518965,
      "loss": 3.0155,
      "step": 142449
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1144614219665527,
      "learning_rate": 0.00019108505904303128,
      "loss": 3.0656,
      "step": 142450
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0082781314849854,
      "learning_rate": 0.00019108124759112226,
      "loss": 3.2073,
      "step": 142451
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4114997386932373,
      "learning_rate": 0.00019107743615946328,
      "loss": 3.0712,
      "step": 142452
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1586360931396484,
      "learning_rate": 0.0001910736247480551,
      "loss": 3.0709,
      "step": 142453
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.8068490028381348,
      "learning_rate": 0.00019106981335689844,
      "loss": 2.8853,
      "step": 142454
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.1560025215148926,
      "learning_rate": 0.00019106600198599388,
      "loss": 2.912,
      "step": 142455
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2247350215911865,
      "learning_rate": 0.0001910621906353422,
      "loss": 3.1014,
      "step": 142456
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.0425963401794434,
      "learning_rate": 0.00019105837930494414,
      "loss": 2.9151,
      "step": 142457
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.210719585418701,
      "learning_rate": 0.00019105456799480035,
      "loss": 2.817,
      "step": 142458
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.145230293273926,
      "learning_rate": 0.00019105075670491158,
      "loss": 2.8813,
      "step": 142459
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.284306764602661,
      "learning_rate": 0.00019104694543527854,
      "loss": 3.014,
      "step": 142460
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.4421944618225098,
      "learning_rate": 0.00019104313418590203,
      "loss": 2.7767,
      "step": 142461
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.020667314529419,
      "learning_rate": 0.00019103932295678257,
      "loss": 2.9048,
      "step": 142462
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.2721261978149414,
      "learning_rate": 0.00019103551174792094,
      "loss": 3.1561,
      "step": 142463
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.8110713958740234,
      "learning_rate": 0.00019103170055931787,
      "loss": 2.8958,
      "step": 142464
    },
    {
      "epoch": 1.85,
      "grad_norm": 2.242621898651123,
      "learning_rate": 0.00019102788939097403,
      "loss": 3.1885,
      "step": 142465
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.811981201171875,
      "learning_rate": 0.0001910240782428902,
      "loss": 3.212,
      "step": 142466
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.856112003326416,
      "learning_rate": 0.00019102026711506715,
      "loss": 2.9822,
      "step": 142467
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.89081871509552,
      "learning_rate": 0.0001910164560075054,
      "loss": 2.9724,
      "step": 142468
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0535776615142822,
      "learning_rate": 0.0001910126449202057,
      "loss": 2.9508,
      "step": 142469
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.261901617050171,
      "learning_rate": 0.00019100883385316883,
      "loss": 3.0624,
      "step": 142470
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.8687336444854736,
      "learning_rate": 0.0001910050228063955,
      "loss": 2.8149,
      "step": 142471
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.169566631317139,
      "learning_rate": 0.00019100121177988638,
      "loss": 2.9012,
      "step": 142472
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.066279649734497,
      "learning_rate": 0.00019099740077364226,
      "loss": 3.0735,
      "step": 142473
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.545475959777832,
      "learning_rate": 0.00019099358978766373,
      "loss": 3.0027,
      "step": 142474
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9322460889816284,
      "learning_rate": 0.00019098977882195153,
      "loss": 2.8652,
      "step": 142475
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1052563190460205,
      "learning_rate": 0.0001909859678765064,
      "loss": 3.1031,
      "step": 142476
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4325079917907715,
      "learning_rate": 0.00019098215695132902,
      "loss": 2.8435,
      "step": 142477
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.0993993282318115,
      "learning_rate": 0.0001909783460464201,
      "loss": 2.9717,
      "step": 142478
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3354861736297607,
      "learning_rate": 0.00019097453516178046,
      "loss": 2.993,
      "step": 142479
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2327804565429688,
      "learning_rate": 0.00019097072429741062,
      "loss": 2.8816,
      "step": 142480
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.393998384475708,
      "learning_rate": 0.00019096691345331143,
      "loss": 3.0292,
      "step": 142481
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.768874406814575,
      "learning_rate": 0.00019096310262948355,
      "loss": 2.82,
      "step": 142482
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.8772850036621094,
      "learning_rate": 0.0001909592918259276,
      "loss": 2.9239,
      "step": 142483
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1642611026763916,
      "learning_rate": 0.00019095548104264442,
      "loss": 3.3443,
      "step": 142484
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.9246838092803955,
      "learning_rate": 0.00019095167027963474,
      "loss": 3.2106,
      "step": 142485
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5258185863494873,
      "learning_rate": 0.00019094785953689916,
      "loss": 3.1073,
      "step": 142486
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2922275066375732,
      "learning_rate": 0.00019094404881443843,
      "loss": 3.2225,
      "step": 142487
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.825726270675659,
      "learning_rate": 0.0001909402381122533,
      "loss": 2.8868,
      "step": 142488
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.49993634223938,
      "learning_rate": 0.00019093642743034438,
      "loss": 3.0889,
      "step": 142489
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9617664813995361,
      "learning_rate": 0.00019093261676871243,
      "loss": 3.0107,
      "step": 142490
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0250532627105713,
      "learning_rate": 0.00019092880612735824,
      "loss": 2.811,
      "step": 142491
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.140544891357422,
      "learning_rate": 0.00019092499550628237,
      "loss": 2.9431,
      "step": 142492
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.1884491443634033,
      "learning_rate": 0.0001909211849054856,
      "loss": 2.8601,
      "step": 142493
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.831296920776367,
      "learning_rate": 0.00019091737432496865,
      "loss": 2.7448,
      "step": 142494
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.264643430709839,
      "learning_rate": 0.00019091356376473218,
      "loss": 3.1125,
      "step": 142495
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.111786127090454,
      "learning_rate": 0.00019090975322477708,
      "loss": 2.9116,
      "step": 142496
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5918407440185547,
      "learning_rate": 0.0001909059427051039,
      "loss": 2.7435,
      "step": 142497
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.6553518772125244,
      "learning_rate": 0.00019090213220571326,
      "loss": 3.0587,
      "step": 142498
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0130105018615723,
      "learning_rate": 0.00019089832172660594,
      "loss": 2.8551,
      "step": 142499
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.8138130903244019,
      "learning_rate": 0.00019089451126778274,
      "loss": 2.8744,
      "step": 142500
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.138857364654541,
      "learning_rate": 0.0001908907008292443,
      "loss": 3.1401,
      "step": 142501
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.551471471786499,
      "learning_rate": 0.00019088689041099136,
      "loss": 3.2774,
      "step": 142502
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.293435573577881,
      "learning_rate": 0.0001908830800130247,
      "loss": 2.9257,
      "step": 142503
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0887441635131836,
      "learning_rate": 0.00019087926963534487,
      "loss": 2.8752,
      "step": 142504
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0187857151031494,
      "learning_rate": 0.00019087545927795255,
      "loss": 2.8171,
      "step": 142505
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.015881299972534,
      "learning_rate": 0.0001908716489408486,
      "loss": 2.9809,
      "step": 142506
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2826218605041504,
      "learning_rate": 0.00019086783862403363,
      "loss": 2.9312,
      "step": 142507
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.30641508102417,
      "learning_rate": 0.00019086402832750845,
      "loss": 2.822,
      "step": 142508
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7453808784484863,
      "learning_rate": 0.00019086021805127379,
      "loss": 3.051,
      "step": 142509
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4302618503570557,
      "learning_rate": 0.00019085640779533015,
      "loss": 3.1458,
      "step": 142510
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1296141147613525,
      "learning_rate": 0.00019085259755967837,
      "loss": 2.8812,
      "step": 142511
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.228506088256836,
      "learning_rate": 0.00019084878734431914,
      "loss": 2.8755,
      "step": 142512
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.977186679840088,
      "learning_rate": 0.0001908449771492532,
      "loss": 3.0362,
      "step": 142513
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9839792251586914,
      "learning_rate": 0.00019084116697448125,
      "loss": 3.1196,
      "step": 142514
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.923139810562134,
      "learning_rate": 0.00019083735682000406,
      "loss": 3.0679,
      "step": 142515
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.711007833480835,
      "learning_rate": 0.00019083354668582222,
      "loss": 2.9158,
      "step": 142516
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.867242455482483,
      "learning_rate": 0.00019082973657193644,
      "loss": 2.8488,
      "step": 142517
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2881033420562744,
      "learning_rate": 0.00019082592647834748,
      "loss": 2.8997,
      "step": 142518
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.296287775039673,
      "learning_rate": 0.00019082211640505603,
      "loss": 3.0636,
      "step": 142519
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.6841657161712646,
      "learning_rate": 0.0001908183063520628,
      "loss": 2.8176,
      "step": 142520
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.062490224838257,
      "learning_rate": 0.00019081449631936864,
      "loss": 2.9931,
      "step": 142521
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.104518175125122,
      "learning_rate": 0.00019081068630697403,
      "loss": 2.6619,
      "step": 142522
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7719905376434326,
      "learning_rate": 0.00019080687631487975,
      "loss": 3.0664,
      "step": 142523
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4267418384552,
      "learning_rate": 0.00019080306634308654,
      "loss": 3.0627,
      "step": 142524
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.007150888442993,
      "learning_rate": 0.0001907992563915951,
      "loss": 3.0115,
      "step": 142525
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7421247959136963,
      "learning_rate": 0.00019079544646040614,
      "loss": 2.894,
      "step": 142526
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.187575101852417,
      "learning_rate": 0.0001907916365495204,
      "loss": 3.2441,
      "step": 142527
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7973618507385254,
      "learning_rate": 0.0001907878266589386,
      "loss": 2.9122,
      "step": 142528
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1935651302337646,
      "learning_rate": 0.00019078401678866133,
      "loss": 2.727,
      "step": 142529
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.890878677368164,
      "learning_rate": 0.00019078020693868937,
      "loss": 2.9387,
      "step": 142530
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0767195224761963,
      "learning_rate": 0.00019077639710902346,
      "loss": 2.9415,
      "step": 142531
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.149156093597412,
      "learning_rate": 0.0001907725872996642,
      "loss": 2.8834,
      "step": 142532
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.082578659057617,
      "learning_rate": 0.00019076877751061244,
      "loss": 2.989,
      "step": 142533
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.026998996734619,
      "learning_rate": 0.00019076496774186896,
      "loss": 2.7597,
      "step": 142534
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4128077030181885,
      "learning_rate": 0.00019076115799343416,
      "loss": 2.9461,
      "step": 142535
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.124626398086548,
      "learning_rate": 0.00019075734826530898,
      "loss": 2.8192,
      "step": 142536
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.6134493350982666,
      "learning_rate": 0.00019075353855749403,
      "loss": 2.9915,
      "step": 142537
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.8839455842971802,
      "learning_rate": 0.0001907497288699901,
      "loss": 2.6194,
      "step": 142538
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.9146692752838135,
      "learning_rate": 0.0001907459192027978,
      "loss": 3.0206,
      "step": 142539
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4052894115448,
      "learning_rate": 0.00019074210955591803,
      "loss": 2.827,
      "step": 142540
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.515429973602295,
      "learning_rate": 0.0001907382999293513,
      "loss": 3.0389,
      "step": 142541
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2530455589294434,
      "learning_rate": 0.0001907344903230983,
      "loss": 3.0552,
      "step": 142542
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0937979221343994,
      "learning_rate": 0.0001907306807371599,
      "loss": 3.1755,
      "step": 142543
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5469748973846436,
      "learning_rate": 0.00019072687117153671,
      "loss": 2.9413,
      "step": 142544
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.9550893306732178,
      "learning_rate": 0.00019072306162622944,
      "loss": 2.7653,
      "step": 142545
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.965800404548645,
      "learning_rate": 0.0001907192521012389,
      "loss": 3.09,
      "step": 142546
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.999704122543335,
      "learning_rate": 0.00019071544259656567,
      "loss": 2.9738,
      "step": 142547
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.537447929382324,
      "learning_rate": 0.00019071163311221043,
      "loss": 3.0357,
      "step": 142548
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.189039707183838,
      "learning_rate": 0.000190707823648174,
      "loss": 3.133,
      "step": 142549
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.060885190963745,
      "learning_rate": 0.00019070401420445704,
      "loss": 3.0277,
      "step": 142550
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.6870956420898438,
      "learning_rate": 0.00019070020478106027,
      "loss": 3.2479,
      "step": 142551
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.0310518741607666,
      "learning_rate": 0.0001906963953779845,
      "loss": 2.698,
      "step": 142552
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3202452659606934,
      "learning_rate": 0.00019069258599523024,
      "loss": 3.1309,
      "step": 142553
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1668598651885986,
      "learning_rate": 0.0001906887766327983,
      "loss": 2.8388,
      "step": 142554
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.8836190700531006,
      "learning_rate": 0.00019068496729068935,
      "loss": 3.0961,
      "step": 142555
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.822230339050293,
      "learning_rate": 0.00019068115796890412,
      "loss": 3.095,
      "step": 142556
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.4213719367980957,
      "learning_rate": 0.00019067734866744332,
      "loss": 2.885,
      "step": 142557
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.804072856903076,
      "learning_rate": 0.00019067353938630776,
      "loss": 2.9808,
      "step": 142558
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0102014541625977,
      "learning_rate": 0.00019066973012549807,
      "loss": 2.8698,
      "step": 142559
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.9765520095825195,
      "learning_rate": 0.00019066592088501485,
      "loss": 2.7674,
      "step": 142560
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.0455071926116943,
      "learning_rate": 0.00019066211166485886,
      "loss": 3.056,
      "step": 142561
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.9776828289031982,
      "learning_rate": 0.0001906583024650309,
      "loss": 3.1319,
      "step": 142562
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.9670791625976562,
      "learning_rate": 0.00019065449328553162,
      "loss": 2.7339,
      "step": 142563
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9803553819656372,
      "learning_rate": 0.0001906506841263618,
      "loss": 2.9087,
      "step": 142564
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.1682591438293457,
      "learning_rate": 0.000190646874987522,
      "loss": 2.9179,
      "step": 142565
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.168980360031128,
      "learning_rate": 0.00019064306586901307,
      "loss": 3.2185,
      "step": 142566
    },
    {
      "epoch": 1.86,
      "grad_norm": 5.427933692932129,
      "learning_rate": 0.00019063925677083562,
      "loss": 2.8277,
      "step": 142567
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.83801007270813,
      "learning_rate": 0.00019063544769299035,
      "loss": 2.8744,
      "step": 142568
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2394399642944336,
      "learning_rate": 0.00019063163863547808,
      "loss": 3.0851,
      "step": 142569
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4850196838378906,
      "learning_rate": 0.00019062782959829947,
      "loss": 2.9351,
      "step": 142570
    },
    {
      "epoch": 1.86,
      "grad_norm": 5.672062397003174,
      "learning_rate": 0.00019062402058145513,
      "loss": 2.6784,
      "step": 142571
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.866070032119751,
      "learning_rate": 0.0001906202115849459,
      "loss": 2.7501,
      "step": 142572
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.458102703094482,
      "learning_rate": 0.00019061640260877247,
      "loss": 2.8478,
      "step": 142573
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.9275238513946533,
      "learning_rate": 0.00019061259365293546,
      "loss": 2.9662,
      "step": 142574
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.714010715484619,
      "learning_rate": 0.00019060878471743566,
      "loss": 2.8611,
      "step": 142575
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4855239391326904,
      "learning_rate": 0.0001906049758022738,
      "loss": 2.9563,
      "step": 142576
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1572978496551514,
      "learning_rate": 0.00019060116690745044,
      "loss": 3.0608,
      "step": 142577
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1664247512817383,
      "learning_rate": 0.00019059735803296642,
      "loss": 3.0173,
      "step": 142578
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.159752607345581,
      "learning_rate": 0.00019059354917882242,
      "loss": 2.91,
      "step": 142579
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2684075832366943,
      "learning_rate": 0.0001905897403450192,
      "loss": 3.1698,
      "step": 142580
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.374830722808838,
      "learning_rate": 0.00019058593153155735,
      "loss": 3.0663,
      "step": 142581
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.486100196838379,
      "learning_rate": 0.0001905821227384377,
      "loss": 2.8218,
      "step": 142582
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.138620376586914,
      "learning_rate": 0.00019057831396566085,
      "loss": 3.0138,
      "step": 142583
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9596349000930786,
      "learning_rate": 0.00019057450521322753,
      "loss": 2.8045,
      "step": 142584
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9798413515090942,
      "learning_rate": 0.0001905706964811385,
      "loss": 3.1171,
      "step": 142585
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3436248302459717,
      "learning_rate": 0.0001905668877693944,
      "loss": 3.1225,
      "step": 142586
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.339141607284546,
      "learning_rate": 0.00019056307907799606,
      "loss": 2.8443,
      "step": 142587
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.783259868621826,
      "learning_rate": 0.00019055927040694418,
      "loss": 2.8968,
      "step": 142588
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0943024158477783,
      "learning_rate": 0.0001905554617562393,
      "loss": 3.0676,
      "step": 142589
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0386805534362793,
      "learning_rate": 0.0001905516531258822,
      "loss": 3.0902,
      "step": 142590
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.089686393737793,
      "learning_rate": 0.0001905478445158736,
      "loss": 2.9381,
      "step": 142591
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5047061443328857,
      "learning_rate": 0.00019054403592621427,
      "loss": 3.0629,
      "step": 142592
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5508430004119873,
      "learning_rate": 0.00019054022735690482,
      "loss": 3.0538,
      "step": 142593
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.725815534591675,
      "learning_rate": 0.00019053641880794605,
      "loss": 2.708,
      "step": 142594
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.706202268600464,
      "learning_rate": 0.00019053261027933874,
      "loss": 2.8349,
      "step": 142595
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.326533317565918,
      "learning_rate": 0.00019052880177108337,
      "loss": 3.0595,
      "step": 142596
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.82147479057312,
      "learning_rate": 0.00019052499328318072,
      "loss": 3.0946,
      "step": 142597
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5013980865478516,
      "learning_rate": 0.00019052118481563155,
      "loss": 2.8669,
      "step": 142598
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.054793357849121,
      "learning_rate": 0.00019051737636843657,
      "loss": 2.9155,
      "step": 142599
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4733970165252686,
      "learning_rate": 0.00019051356794159649,
      "loss": 3.119,
      "step": 142600
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.990070104598999,
      "learning_rate": 0.0001905097595351121,
      "loss": 3.151,
      "step": 142601
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.856806755065918,
      "learning_rate": 0.00019050595114898392,
      "loss": 2.9426,
      "step": 142602
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.077816963195801,
      "learning_rate": 0.00019050214278321272,
      "loss": 3.1214,
      "step": 142603
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0324137210845947,
      "learning_rate": 0.0001904983344377993,
      "loss": 2.8234,
      "step": 142604
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5043418407440186,
      "learning_rate": 0.00019049452611274425,
      "loss": 3.2961,
      "step": 142605
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.150933265686035,
      "learning_rate": 0.00019049071780804833,
      "loss": 3.1325,
      "step": 142606
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.532661199569702,
      "learning_rate": 0.00019048690952371236,
      "loss": 2.9457,
      "step": 142607
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.039795398712158,
      "learning_rate": 0.00019048310125973687,
      "loss": 3.0254,
      "step": 142608
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.396649122238159,
      "learning_rate": 0.0001904792930161226,
      "loss": 3.0105,
      "step": 142609
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9811903238296509,
      "learning_rate": 0.00019047548479287033,
      "loss": 3.0368,
      "step": 142610
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0985159873962402,
      "learning_rate": 0.00019047167658998074,
      "loss": 2.8677,
      "step": 142611
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0959835052490234,
      "learning_rate": 0.00019046786840745453,
      "loss": 3.0115,
      "step": 142612
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1985859870910645,
      "learning_rate": 0.0001904640602452925,
      "loss": 2.9225,
      "step": 142613
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4513931274414062,
      "learning_rate": 0.00019046025210349513,
      "loss": 2.9276,
      "step": 142614
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.2884950637817383,
      "learning_rate": 0.00019045644398206333,
      "loss": 2.9047,
      "step": 142615
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3955883979797363,
      "learning_rate": 0.00019045263588099773,
      "loss": 3.0078,
      "step": 142616
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3813514709472656,
      "learning_rate": 0.00019044882780029903,
      "loss": 3.0262,
      "step": 142617
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.344094753265381,
      "learning_rate": 0.00019044501973996796,
      "loss": 3.0579,
      "step": 142618
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.811394691467285,
      "learning_rate": 0.00019044121170000536,
      "loss": 3.1548,
      "step": 142619
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0074307918548584,
      "learning_rate": 0.00019043740368041167,
      "loss": 2.904,
      "step": 142620
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4529569149017334,
      "learning_rate": 0.00019043359568118775,
      "loss": 3.0064,
      "step": 142621
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0970120429992676,
      "learning_rate": 0.00019042978770233428,
      "loss": 2.7384,
      "step": 142622
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.7906384468078613,
      "learning_rate": 0.000190425979743852,
      "loss": 3.0174,
      "step": 142623
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.8919804096221924,
      "learning_rate": 0.0001904221718057416,
      "loss": 2.9112,
      "step": 142624
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.116598606109619,
      "learning_rate": 0.0001904183638880039,
      "loss": 2.9491,
      "step": 142625
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0119001865386963,
      "learning_rate": 0.00019041455599063938,
      "loss": 2.6764,
      "step": 142626
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.9410417079925537,
      "learning_rate": 0.00019041074811364887,
      "loss": 2.872,
      "step": 142627
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.4406840801239014,
      "learning_rate": 0.00019040694025703305,
      "loss": 3.0153,
      "step": 142628
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.718834400177002,
      "learning_rate": 0.00019040313242079266,
      "loss": 2.7803,
      "step": 142629
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.45668625831604,
      "learning_rate": 0.0001903993246049284,
      "loss": 2.6497,
      "step": 142630
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1868643760681152,
      "learning_rate": 0.00019039551680944108,
      "loss": 3.0889,
      "step": 142631
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.203280448913574,
      "learning_rate": 0.00019039170903433118,
      "loss": 2.9644,
      "step": 142632
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.056447744369507,
      "learning_rate": 0.00019038790127959954,
      "loss": 3.0714,
      "step": 142633
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7400460243225098,
      "learning_rate": 0.00019038409354524684,
      "loss": 2.9001,
      "step": 142634
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1121671199798584,
      "learning_rate": 0.00019038028583127383,
      "loss": 2.8533,
      "step": 142635
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9216828346252441,
      "learning_rate": 0.00019037647813768116,
      "loss": 3.0442,
      "step": 142636
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.76348876953125,
      "learning_rate": 0.00019037267046446976,
      "loss": 2.9631,
      "step": 142637
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.9820797443389893,
      "learning_rate": 0.00019036886281163998,
      "loss": 2.9479,
      "step": 142638
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7086050510406494,
      "learning_rate": 0.00019036505517919271,
      "loss": 3.2786,
      "step": 142639
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5699944496154785,
      "learning_rate": 0.0001903612475671286,
      "loss": 2.9826,
      "step": 142640
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1616647243499756,
      "learning_rate": 0.00019035743997544846,
      "loss": 3.0167,
      "step": 142641
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9123075008392334,
      "learning_rate": 0.00019035363240415293,
      "loss": 3.1325,
      "step": 142642
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.2306900024414062,
      "learning_rate": 0.00019034982485324284,
      "loss": 2.9363,
      "step": 142643
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.482145071029663,
      "learning_rate": 0.0001903460173227187,
      "loss": 3.0166,
      "step": 142644
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.213560104370117,
      "learning_rate": 0.00019034220981258126,
      "loss": 3.0143,
      "step": 142645
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.73282527923584,
      "learning_rate": 0.00019033840232283132,
      "loss": 2.8715,
      "step": 142646
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.129629373550415,
      "learning_rate": 0.00019033459485346953,
      "loss": 2.9524,
      "step": 142647
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7953779697418213,
      "learning_rate": 0.00019033078740449653,
      "loss": 3.2626,
      "step": 142648
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.073875665664673,
      "learning_rate": 0.00019032697997591326,
      "loss": 3.0214,
      "step": 142649
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.260280132293701,
      "learning_rate": 0.00019032317256772023,
      "loss": 2.8412,
      "step": 142650
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.227931261062622,
      "learning_rate": 0.00019031936517991817,
      "loss": 3.1534,
      "step": 142651
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.374706268310547,
      "learning_rate": 0.00019031555781250776,
      "loss": 3.1125,
      "step": 142652
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1430306434631348,
      "learning_rate": 0.0001903117504654898,
      "loss": 3.1229,
      "step": 142653
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4949376583099365,
      "learning_rate": 0.00019030794313886493,
      "loss": 2.9979,
      "step": 142654
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.761272668838501,
      "learning_rate": 0.00019030413583263399,
      "loss": 2.8176,
      "step": 142655
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.220885753631592,
      "learning_rate": 0.00019030032854679745,
      "loss": 3.0925,
      "step": 142656
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4557042121887207,
      "learning_rate": 0.0001902965212813562,
      "loss": 2.8588,
      "step": 142657
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.619568109512329,
      "learning_rate": 0.00019029271403631095,
      "loss": 3.0289,
      "step": 142658
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1425769329071045,
      "learning_rate": 0.0001902889068116623,
      "loss": 2.9115,
      "step": 142659
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.773045063018799,
      "learning_rate": 0.00019028509960741097,
      "loss": 3.1085,
      "step": 142660
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.6953752040863037,
      "learning_rate": 0.00019028129242355774,
      "loss": 3.138,
      "step": 142661
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.497116804122925,
      "learning_rate": 0.00019027748526010335,
      "loss": 3.0391,
      "step": 142662
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0779247283935547,
      "learning_rate": 0.0001902736781170484,
      "loss": 3.0295,
      "step": 142663
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.0499908924102783,
      "learning_rate": 0.00019026987099439365,
      "loss": 3.1319,
      "step": 142664
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.469237804412842,
      "learning_rate": 0.00019026606389213984,
      "loss": 2.9724,
      "step": 142665
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.006533145904541,
      "learning_rate": 0.0001902622568102876,
      "loss": 3.0482,
      "step": 142666
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1604628562927246,
      "learning_rate": 0.00019025844974883763,
      "loss": 3.0466,
      "step": 142667
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2533836364746094,
      "learning_rate": 0.0001902546427077908,
      "loss": 2.79,
      "step": 142668
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.8990039825439453,
      "learning_rate": 0.00019025083568714762,
      "loss": 3.2529,
      "step": 142669
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4013099670410156,
      "learning_rate": 0.0001902470286869089,
      "loss": 2.9993,
      "step": 142670
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5048835277557373,
      "learning_rate": 0.0001902432217070753,
      "loss": 3.0128,
      "step": 142671
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.482201337814331,
      "learning_rate": 0.00019023941474764764,
      "loss": 3.0675,
      "step": 142672
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.030170440673828,
      "learning_rate": 0.00019023560780862648,
      "loss": 2.966,
      "step": 142673
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.13464617729187,
      "learning_rate": 0.00019023180089001264,
      "loss": 2.8105,
      "step": 142674
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1589252948760986,
      "learning_rate": 0.00019022799399180673,
      "loss": 3.2387,
      "step": 142675
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2372806072235107,
      "learning_rate": 0.0001902241871140095,
      "loss": 3.0772,
      "step": 142676
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.257423400878906,
      "learning_rate": 0.0001902203802566217,
      "loss": 2.8837,
      "step": 142677
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.1295089721679688,
      "learning_rate": 0.00019021657341964396,
      "loss": 3.0063,
      "step": 142678
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0181281566619873,
      "learning_rate": 0.00019021276660307712,
      "loss": 3.094,
      "step": 142679
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0566678047180176,
      "learning_rate": 0.00019020895980692175,
      "loss": 2.9729,
      "step": 142680
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.619448184967041,
      "learning_rate": 0.0001902051530311786,
      "loss": 2.829,
      "step": 142681
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.813880205154419,
      "learning_rate": 0.00019020134627584836,
      "loss": 3.0325,
      "step": 142682
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.013491153717041,
      "learning_rate": 0.00019019753954093176,
      "loss": 3.3327,
      "step": 142683
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.8475587368011475,
      "learning_rate": 0.00019019373282642952,
      "loss": 2.7709,
      "step": 142684
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4616830348968506,
      "learning_rate": 0.00019018992613234237,
      "loss": 3.0007,
      "step": 142685
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.408601760864258,
      "learning_rate": 0.00019018611945867105,
      "loss": 2.9384,
      "step": 142686
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0476417541503906,
      "learning_rate": 0.0001901823128054161,
      "loss": 3.0462,
      "step": 142687
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.977331519126892,
      "learning_rate": 0.00019017850617257832,
      "loss": 2.8636,
      "step": 142688
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.247682809829712,
      "learning_rate": 0.00019017469956015846,
      "loss": 2.943,
      "step": 142689
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.402003526687622,
      "learning_rate": 0.00019017089296815719,
      "loss": 3.1066,
      "step": 142690
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5716168880462646,
      "learning_rate": 0.00019016708639657523,
      "loss": 2.9952,
      "step": 142691
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.204230308532715,
      "learning_rate": 0.0001901632798454134,
      "loss": 2.7927,
      "step": 142692
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3313496112823486,
      "learning_rate": 0.00019015947331467214,
      "loss": 3.0198,
      "step": 142693
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.274116039276123,
      "learning_rate": 0.00019015566680435233,
      "loss": 2.9093,
      "step": 142694
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.255013942718506,
      "learning_rate": 0.00019015186031445463,
      "loss": 3.0229,
      "step": 142695
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5837907791137695,
      "learning_rate": 0.0001901480538449798,
      "loss": 2.7749,
      "step": 142696
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7971601486206055,
      "learning_rate": 0.00019014424739592853,
      "loss": 2.8725,
      "step": 142697
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.743276596069336,
      "learning_rate": 0.00019014044096730162,
      "loss": 2.9212,
      "step": 142698
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1867778301239014,
      "learning_rate": 0.00019013663455909953,
      "loss": 3.113,
      "step": 142699
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2027862071990967,
      "learning_rate": 0.00019013282817132316,
      "loss": 2.98,
      "step": 142700
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9176000356674194,
      "learning_rate": 0.00019012902180397316,
      "loss": 2.9014,
      "step": 142701
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.9485130310058594,
      "learning_rate": 0.00019012521545705023,
      "loss": 2.6945,
      "step": 142702
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.973008632659912,
      "learning_rate": 0.00019012140913055514,
      "loss": 3.0034,
      "step": 142703
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4252028465270996,
      "learning_rate": 0.0001901176028244886,
      "loss": 2.9254,
      "step": 142704
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.59302020072937,
      "learning_rate": 0.00019011379653885118,
      "loss": 2.9692,
      "step": 142705
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.316790819168091,
      "learning_rate": 0.0001901099902736437,
      "loss": 3.2297,
      "step": 142706
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.094517707824707,
      "learning_rate": 0.00019010618402886688,
      "loss": 2.7322,
      "step": 142707
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.249253034591675,
      "learning_rate": 0.0001901023778045213,
      "loss": 2.814,
      "step": 142708
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.165250301361084,
      "learning_rate": 0.00019009857160060786,
      "loss": 3.0182,
      "step": 142709
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7065155506134033,
      "learning_rate": 0.00019009476541712725,
      "loss": 2.9823,
      "step": 142710
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.442993402481079,
      "learning_rate": 0.00019009095925407997,
      "loss": 2.8841,
      "step": 142711
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.3880720138549805,
      "learning_rate": 0.00019008715311146686,
      "loss": 3.0801,
      "step": 142712
    },
    {
      "epoch": 1.86,
      "grad_norm": 5.545668125152588,
      "learning_rate": 0.0001900833469892886,
      "loss": 2.7285,
      "step": 142713
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.398939847946167,
      "learning_rate": 0.00019007954088754597,
      "loss": 2.9841,
      "step": 142714
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.0060391426086426,
      "learning_rate": 0.0001900757348062396,
      "loss": 3.1185,
      "step": 142715
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.524613857269287,
      "learning_rate": 0.00019007192874537034,
      "loss": 2.9847,
      "step": 142716
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.271158456802368,
      "learning_rate": 0.0001900681227049387,
      "loss": 2.9502,
      "step": 142717
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9397022724151611,
      "learning_rate": 0.00019006431668494543,
      "loss": 3.0808,
      "step": 142718
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.790719985961914,
      "learning_rate": 0.00019006051068539128,
      "loss": 2.8347,
      "step": 142719
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1270289421081543,
      "learning_rate": 0.00019005670470627697,
      "loss": 2.8982,
      "step": 142720
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0372965335845947,
      "learning_rate": 0.00019005289874760318,
      "loss": 2.9891,
      "step": 142721
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9072315692901611,
      "learning_rate": 0.00019004909280937078,
      "loss": 3.1051,
      "step": 142722
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1693243980407715,
      "learning_rate": 0.0001900452868915802,
      "loss": 3.1397,
      "step": 142723
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1006641387939453,
      "learning_rate": 0.00019004148099423224,
      "loss": 2.9298,
      "step": 142724
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.291609525680542,
      "learning_rate": 0.00019003767511732768,
      "loss": 3.0223,
      "step": 142725
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.827827215194702,
      "learning_rate": 0.0001900338692608672,
      "loss": 2.9505,
      "step": 142726
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1061649322509766,
      "learning_rate": 0.00019003006342485147,
      "loss": 3.1043,
      "step": 142727
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.579237222671509,
      "learning_rate": 0.00019002625760928125,
      "loss": 2.822,
      "step": 142728
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5879974365234375,
      "learning_rate": 0.00019002245181415733,
      "loss": 2.9778,
      "step": 142729
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.046997547149658,
      "learning_rate": 0.0001900186460394802,
      "loss": 2.9588,
      "step": 142730
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3814754486083984,
      "learning_rate": 0.0001900148402852507,
      "loss": 2.9246,
      "step": 142731
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2228810787200928,
      "learning_rate": 0.00019001103455146945,
      "loss": 2.952,
      "step": 142732
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0016841888427734,
      "learning_rate": 0.00019000722883813734,
      "loss": 2.9041,
      "step": 142733
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.925263524055481,
      "learning_rate": 0.00019000342314525487,
      "loss": 2.8836,
      "step": 142734
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.100621461868286,
      "learning_rate": 0.00018999961747282298,
      "loss": 2.9294,
      "step": 142735
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.693631649017334,
      "learning_rate": 0.0001899958118208421,
      "loss": 2.7144,
      "step": 142736
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2648351192474365,
      "learning_rate": 0.0001899920061893131,
      "loss": 2.8856,
      "step": 142737
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9786792993545532,
      "learning_rate": 0.00018998820057823665,
      "loss": 2.9923,
      "step": 142738
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.467825412750244,
      "learning_rate": 0.00018998439498761347,
      "loss": 2.8372,
      "step": 142739
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9912832975387573,
      "learning_rate": 0.0001899805894174443,
      "loss": 2.9631,
      "step": 142740
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1200780868530273,
      "learning_rate": 0.00018997678386772988,
      "loss": 2.9332,
      "step": 142741
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2066619396209717,
      "learning_rate": 0.00018997297833847079,
      "loss": 3.1649,
      "step": 142742
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5487523078918457,
      "learning_rate": 0.00018996917282966777,
      "loss": 2.7212,
      "step": 142743
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3590734004974365,
      "learning_rate": 0.00018996536734132156,
      "loss": 3.016,
      "step": 142744
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.8908865451812744,
      "learning_rate": 0.0001899615618734329,
      "loss": 2.8635,
      "step": 142745
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.1062111854553223,
      "learning_rate": 0.00018995775642600239,
      "loss": 3.1433,
      "step": 142746
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7202210426330566,
      "learning_rate": 0.00018995395099903094,
      "loss": 3.1775,
      "step": 142747
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5554816722869873,
      "learning_rate": 0.00018995014559251903,
      "loss": 2.9583,
      "step": 142748
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3195767402648926,
      "learning_rate": 0.0001899463402064675,
      "loss": 3.0104,
      "step": 142749
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.18900465965271,
      "learning_rate": 0.00018994253484087704,
      "loss": 2.8895,
      "step": 142750
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.211629867553711,
      "learning_rate": 0.0001899387294957483,
      "loss": 3.2573,
      "step": 142751
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1515138149261475,
      "learning_rate": 0.000189934924171082,
      "loss": 2.9904,
      "step": 142752
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.049783706665039,
      "learning_rate": 0.000189931118866879,
      "loss": 3.2341,
      "step": 142753
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1387360095977783,
      "learning_rate": 0.00018992731358313976,
      "loss": 2.9096,
      "step": 142754
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.891234874725342,
      "learning_rate": 0.00018992350831986514,
      "loss": 3.2643,
      "step": 142755
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.6065359115600586,
      "learning_rate": 0.0001899197030770558,
      "loss": 3.0158,
      "step": 142756
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9755806922912598,
      "learning_rate": 0.00018991589785471253,
      "loss": 3.1053,
      "step": 142757
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0686159133911133,
      "learning_rate": 0.00018991209265283593,
      "loss": 2.9243,
      "step": 142758
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2234768867492676,
      "learning_rate": 0.00018990828747142676,
      "loss": 2.7329,
      "step": 142759
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0803725719451904,
      "learning_rate": 0.00018990448231048573,
      "loss": 2.9203,
      "step": 142760
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2916033267974854,
      "learning_rate": 0.0001899006771700135,
      "loss": 3.0294,
      "step": 142761
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0680696964263916,
      "learning_rate": 0.00018989687205001082,
      "loss": 2.99,
      "step": 142762
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.8929773569107056,
      "learning_rate": 0.00018989306695047837,
      "loss": 3.0844,
      "step": 142763
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.04548716545105,
      "learning_rate": 0.00018988926187141694,
      "loss": 2.9611,
      "step": 142764
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.04514217376709,
      "learning_rate": 0.0001898854568128272,
      "loss": 3.088,
      "step": 142765
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.216282367706299,
      "learning_rate": 0.00018988165177470973,
      "loss": 3.0151,
      "step": 142766
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.404447317123413,
      "learning_rate": 0.00018987784675706537,
      "loss": 2.8748,
      "step": 142767
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9262357950210571,
      "learning_rate": 0.00018987404175989484,
      "loss": 2.9242,
      "step": 142768
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.065776824951172,
      "learning_rate": 0.00018987023678319873,
      "loss": 3.122,
      "step": 142769
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3848748207092285,
      "learning_rate": 0.00018986643182697786,
      "loss": 2.6927,
      "step": 142770
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.8982336521148682,
      "learning_rate": 0.000189862626891233,
      "loss": 3.0061,
      "step": 142771
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4291200637817383,
      "learning_rate": 0.00018985882197596467,
      "loss": 3.3016,
      "step": 142772
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.502305746078491,
      "learning_rate": 0.00018985501708117364,
      "loss": 3.1804,
      "step": 142773
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.391880989074707,
      "learning_rate": 0.0001898512122068606,
      "loss": 2.8527,
      "step": 142774
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2160773277282715,
      "learning_rate": 0.00018984740735302635,
      "loss": 3.244,
      "step": 142775
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.172299861907959,
      "learning_rate": 0.00018984360251967153,
      "loss": 3.1336,
      "step": 142776
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.259317636489868,
      "learning_rate": 0.000189839797706797,
      "loss": 2.9735,
      "step": 142777
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.115357875823975,
      "learning_rate": 0.0001898359929144032,
      "loss": 2.7368,
      "step": 142778
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.6310415267944336,
      "learning_rate": 0.000189832188142491,
      "loss": 2.8096,
      "step": 142779
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.353590488433838,
      "learning_rate": 0.00018982838339106102,
      "loss": 3.1484,
      "step": 142780
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.371384620666504,
      "learning_rate": 0.00018982457866011405,
      "loss": 2.9005,
      "step": 142781
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9919615983963013,
      "learning_rate": 0.00018982077394965075,
      "loss": 2.9396,
      "step": 142782
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.459000587463379,
      "learning_rate": 0.00018981696925967202,
      "loss": 3.0231,
      "step": 142783
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.236783266067505,
      "learning_rate": 0.00018981316459017823,
      "loss": 2.7194,
      "step": 142784
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9790360927581787,
      "learning_rate": 0.00018980935994117025,
      "loss": 2.9196,
      "step": 142785
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.7097136974334717,
      "learning_rate": 0.00018980555531264884,
      "loss": 2.9368,
      "step": 142786
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4990344047546387,
      "learning_rate": 0.0001898017507046146,
      "loss": 3.1139,
      "step": 142787
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.704702615737915,
      "learning_rate": 0.00018979794611706833,
      "loss": 2.8796,
      "step": 142788
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.88969349861145,
      "learning_rate": 0.0001897941415500108,
      "loss": 3.052,
      "step": 142789
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.730947494506836,
      "learning_rate": 0.00018979033700344247,
      "loss": 3.0832,
      "step": 142790
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3315584659576416,
      "learning_rate": 0.00018978653247736426,
      "loss": 2.8891,
      "step": 142791
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.776299476623535,
      "learning_rate": 0.0001897827279717768,
      "loss": 3.1765,
      "step": 142792
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9305617809295654,
      "learning_rate": 0.00018977892348668077,
      "loss": 3.022,
      "step": 142793
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4892830848693848,
      "learning_rate": 0.00018977511902207696,
      "loss": 2.7915,
      "step": 142794
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3991904258728027,
      "learning_rate": 0.000189771314577966,
      "loss": 2.9398,
      "step": 142795
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.260594129562378,
      "learning_rate": 0.0001897675101543488,
      "loss": 2.8556,
      "step": 142796
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.400670051574707,
      "learning_rate": 0.0001897637057512258,
      "loss": 2.8701,
      "step": 142797
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.21203875541687,
      "learning_rate": 0.00018975990136859775,
      "loss": 3.0632,
      "step": 142798
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9754379987716675,
      "learning_rate": 0.00018975609700646543,
      "loss": 2.9466,
      "step": 142799
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.212512731552124,
      "learning_rate": 0.00018975229266482953,
      "loss": 3.0673,
      "step": 142800
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.358327865600586,
      "learning_rate": 0.00018974848834369074,
      "loss": 3.1207,
      "step": 142801
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7184128761291504,
      "learning_rate": 0.00018974468404304998,
      "loss": 3.1108,
      "step": 142802
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.093406915664673,
      "learning_rate": 0.0001897408797629076,
      "loss": 3.2736,
      "step": 142803
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.0382955074310303,
      "learning_rate": 0.00018973707550326444,
      "loss": 3.0799,
      "step": 142804
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3666441440582275,
      "learning_rate": 0.00018973327126412125,
      "loss": 2.9352,
      "step": 142805
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9111863374710083,
      "learning_rate": 0.00018972946704547876,
      "loss": 2.8235,
      "step": 142806
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.681483507156372,
      "learning_rate": 0.0001897256628473376,
      "loss": 3.1481,
      "step": 142807
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.6317601203918457,
      "learning_rate": 0.0001897218586696987,
      "loss": 2.8508,
      "step": 142808
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3162055015563965,
      "learning_rate": 0.00018971805451256245,
      "loss": 3.1351,
      "step": 142809
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.584413528442383,
      "learning_rate": 0.00018971425037592969,
      "loss": 2.9797,
      "step": 142810
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9999570846557617,
      "learning_rate": 0.0001897104462598011,
      "loss": 3.2614,
      "step": 142811
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.839962959289551,
      "learning_rate": 0.00018970664216417745,
      "loss": 3.1892,
      "step": 142812
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.29453444480896,
      "learning_rate": 0.0001897028380890594,
      "loss": 2.9201,
      "step": 142813
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.378342628479004,
      "learning_rate": 0.0001896990340344478,
      "loss": 3.0243,
      "step": 142814
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.072767734527588,
      "learning_rate": 0.00018969523000034316,
      "loss": 2.9923,
      "step": 142815
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9914884567260742,
      "learning_rate": 0.0001896914259867462,
      "loss": 3.0588,
      "step": 142816
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.083726167678833,
      "learning_rate": 0.0001896876219936577,
      "loss": 2.6452,
      "step": 142817
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0625,
      "learning_rate": 0.00018968381802107834,
      "loss": 2.9811,
      "step": 142818
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.6454617977142334,
      "learning_rate": 0.00018968001406900886,
      "loss": 3.1676,
      "step": 142819
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.47969126701355,
      "learning_rate": 0.0001896762101374501,
      "loss": 2.9819,
      "step": 142820
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.542008876800537,
      "learning_rate": 0.00018967240622640242,
      "loss": 2.95,
      "step": 142821
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9069205522537231,
      "learning_rate": 0.00018966860233586677,
      "loss": 2.9676,
      "step": 142822
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.6907193660736084,
      "learning_rate": 0.00018966479846584382,
      "loss": 2.8934,
      "step": 142823
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0049726963043213,
      "learning_rate": 0.00018966099461633425,
      "loss": 3.0255,
      "step": 142824
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0683634281158447,
      "learning_rate": 0.00018965719078733876,
      "loss": 2.9654,
      "step": 142825
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9926369190216064,
      "learning_rate": 0.00018965338697885816,
      "loss": 3.2584,
      "step": 142826
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.014620065689087,
      "learning_rate": 0.00018964958319089308,
      "loss": 3.2067,
      "step": 142827
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.98842453956604,
      "learning_rate": 0.00018964577942344417,
      "loss": 2.7083,
      "step": 142828
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.204442262649536,
      "learning_rate": 0.0001896419756765122,
      "loss": 3.0953,
      "step": 142829
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.1525161266326904,
      "learning_rate": 0.00018963817195009783,
      "loss": 2.9838,
      "step": 142830
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1120550632476807,
      "learning_rate": 0.00018963436824420184,
      "loss": 2.835,
      "step": 142831
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.08858585357666,
      "learning_rate": 0.00018963056455882497,
      "loss": 2.9186,
      "step": 142832
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.072218179702759,
      "learning_rate": 0.00018962676089396782,
      "loss": 3.1035,
      "step": 142833
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2319400310516357,
      "learning_rate": 0.00018962295724963115,
      "loss": 3.0852,
      "step": 142834
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.323087692260742,
      "learning_rate": 0.0001896191536258156,
      "loss": 3.0938,
      "step": 142835
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.727079153060913,
      "learning_rate": 0.00018961535002252195,
      "loss": 3.079,
      "step": 142836
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4557273387908936,
      "learning_rate": 0.0001896115464397509,
      "loss": 2.9249,
      "step": 142837
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.988016963005066,
      "learning_rate": 0.00018960774287750317,
      "loss": 3.1435,
      "step": 142838
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2734696865081787,
      "learning_rate": 0.00018960393933577946,
      "loss": 2.9006,
      "step": 142839
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0244228839874268,
      "learning_rate": 0.0001896001358145804,
      "loss": 2.9553,
      "step": 142840
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4714648723602295,
      "learning_rate": 0.00018959633231390677,
      "loss": 2.7702,
      "step": 142841
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.419637680053711,
      "learning_rate": 0.00018959252883375932,
      "loss": 2.7294,
      "step": 142842
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.985715627670288,
      "learning_rate": 0.00018958872537413866,
      "loss": 2.8961,
      "step": 142843
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2272231578826904,
      "learning_rate": 0.00018958492193504558,
      "loss": 2.7364,
      "step": 142844
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.313430070877075,
      "learning_rate": 0.00018958111851648073,
      "loss": 2.9635,
      "step": 142845
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.9087295532226562,
      "learning_rate": 0.00018957731511844478,
      "loss": 2.8375,
      "step": 142846
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.0525312423706055,
      "learning_rate": 0.0001895735117409385,
      "loss": 2.9678,
      "step": 142847
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0376274585723877,
      "learning_rate": 0.0001895697083839626,
      "loss": 2.707,
      "step": 142848
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.553532600402832,
      "learning_rate": 0.00018956590504751785,
      "loss": 2.8916,
      "step": 142849
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.197122573852539,
      "learning_rate": 0.00018956210173160488,
      "loss": 2.9602,
      "step": 142850
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.7904841899871826,
      "learning_rate": 0.00018955829843622434,
      "loss": 2.8898,
      "step": 142851
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.478306293487549,
      "learning_rate": 0.00018955449516137694,
      "loss": 2.6451,
      "step": 142852
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.682769536972046,
      "learning_rate": 0.00018955069190706354,
      "loss": 3.2992,
      "step": 142853
    },
    {
      "epoch": 1.86,
      "grad_norm": 5.138718605041504,
      "learning_rate": 0.0001895468886732847,
      "loss": 3.1705,
      "step": 142854
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5113797187805176,
      "learning_rate": 0.00018954308546004117,
      "loss": 3.042,
      "step": 142855
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.3468141555786133,
      "learning_rate": 0.0001895392822673338,
      "loss": 2.8448,
      "step": 142856
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2098498344421387,
      "learning_rate": 0.00018953547909516304,
      "loss": 2.7042,
      "step": 142857
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.4904210567474365,
      "learning_rate": 0.00018953167594352972,
      "loss": 2.6189,
      "step": 142858
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1874566078186035,
      "learning_rate": 0.00018952787281243454,
      "loss": 2.9985,
      "step": 142859
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.6105682849884033,
      "learning_rate": 0.0001895240697018782,
      "loss": 2.8119,
      "step": 142860
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0924904346466064,
      "learning_rate": 0.00018952026661186146,
      "loss": 2.9538,
      "step": 142861
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.717398166656494,
      "learning_rate": 0.00018951646354238496,
      "loss": 3.1733,
      "step": 142862
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.9562580585479736,
      "learning_rate": 0.00018951266049344956,
      "loss": 2.9519,
      "step": 142863
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.058509349822998,
      "learning_rate": 0.00018950885746505573,
      "loss": 2.9821,
      "step": 142864
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.6447980403900146,
      "learning_rate": 0.00018950505445720429,
      "loss": 3.0086,
      "step": 142865
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2714781761169434,
      "learning_rate": 0.00018950125146989595,
      "loss": 2.9015,
      "step": 142866
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.412637233734131,
      "learning_rate": 0.00018949744850313137,
      "loss": 3.0563,
      "step": 142867
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.573362112045288,
      "learning_rate": 0.0001894936455569114,
      "loss": 2.8475,
      "step": 142868
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9066250324249268,
      "learning_rate": 0.00018948984263123667,
      "loss": 2.9719,
      "step": 142869
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.107102155685425,
      "learning_rate": 0.00018948603972610773,
      "loss": 2.9908,
      "step": 142870
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.243462562561035,
      "learning_rate": 0.0001894822368415255,
      "loss": 3.0917,
      "step": 142871
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.8665199279785156,
      "learning_rate": 0.00018947843397749056,
      "loss": 2.8126,
      "step": 142872
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7005279064178467,
      "learning_rate": 0.00018947463113400372,
      "loss": 3.027,
      "step": 142873
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.007871389389038,
      "learning_rate": 0.00018947082831106556,
      "loss": 2.7499,
      "step": 142874
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.376621723175049,
      "learning_rate": 0.000189467025508677,
      "loss": 2.9876,
      "step": 142875
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.144039154052734,
      "learning_rate": 0.00018946322272683853,
      "loss": 2.8541,
      "step": 142876
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.830509662628174,
      "learning_rate": 0.0001894594199655509,
      "loss": 3.0575,
      "step": 142877
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2522778511047363,
      "learning_rate": 0.00018945561722481487,
      "loss": 2.9693,
      "step": 142878
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.8988736867904663,
      "learning_rate": 0.00018945181450463107,
      "loss": 2.7272,
      "step": 142879
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1387345790863037,
      "learning_rate": 0.00018944801180500033,
      "loss": 2.7531,
      "step": 142880
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.700458526611328,
      "learning_rate": 0.00018944420912592338,
      "loss": 2.9021,
      "step": 142881
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.2911593914031982,
      "learning_rate": 0.0001894404064674007,
      "loss": 2.9842,
      "step": 142882
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.589606761932373,
      "learning_rate": 0.0001894366038294332,
      "loss": 2.9519,
      "step": 142883
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0310490131378174,
      "learning_rate": 0.00018943280121202144,
      "loss": 2.8868,
      "step": 142884
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1076128482818604,
      "learning_rate": 0.00018942899861516626,
      "loss": 3.0638,
      "step": 142885
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.100536823272705,
      "learning_rate": 0.00018942519603886834,
      "loss": 2.786,
      "step": 142886
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0534322261810303,
      "learning_rate": 0.00018942139348312846,
      "loss": 2.8745,
      "step": 142887
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5438575744628906,
      "learning_rate": 0.00018941759094794706,
      "loss": 2.9598,
      "step": 142888
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1132922172546387,
      "learning_rate": 0.0001894137884333251,
      "loss": 3.1538,
      "step": 142889
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2235848903656006,
      "learning_rate": 0.00018940998593926312,
      "loss": 2.7437,
      "step": 142890
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.498988389968872,
      "learning_rate": 0.000189406183465762,
      "loss": 2.8794,
      "step": 142891
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.472010850906372,
      "learning_rate": 0.00018940238101282229,
      "loss": 3.0617,
      "step": 142892
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.935795307159424,
      "learning_rate": 0.0001893985785804449,
      "loss": 2.9498,
      "step": 142893
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7969188690185547,
      "learning_rate": 0.0001893947761686303,
      "loss": 3.0069,
      "step": 142894
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.1772522926330566,
      "learning_rate": 0.00018939097377737926,
      "loss": 2.9253,
      "step": 142895
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.010010719299316,
      "learning_rate": 0.00018938717140669259,
      "loss": 2.8978,
      "step": 142896
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1559929847717285,
      "learning_rate": 0.0001893833690565709,
      "loss": 3.0987,
      "step": 142897
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.274904251098633,
      "learning_rate": 0.0001893795667270149,
      "loss": 3.0337,
      "step": 142898
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.363525390625,
      "learning_rate": 0.0001893757644180255,
      "loss": 3.0173,
      "step": 142899
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4772353172302246,
      "learning_rate": 0.00018937196212960307,
      "loss": 3.0394,
      "step": 142900
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4330642223358154,
      "learning_rate": 0.0001893681598617485,
      "loss": 2.9613,
      "step": 142901
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.223388195037842,
      "learning_rate": 0.00018936435761446246,
      "loss": 3.0208,
      "step": 142902
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.889992117881775,
      "learning_rate": 0.0001893605553877457,
      "loss": 2.8791,
      "step": 142903
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.296459674835205,
      "learning_rate": 0.0001893567531815989,
      "loss": 3.0325,
      "step": 142904
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.183825969696045,
      "learning_rate": 0.00018935295099602287,
      "loss": 2.9696,
      "step": 142905
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.290037155151367,
      "learning_rate": 0.00018934914883101807,
      "loss": 2.897,
      "step": 142906
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.6822452545166016,
      "learning_rate": 0.00018934534668658542,
      "loss": 2.7921,
      "step": 142907
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2771196365356445,
      "learning_rate": 0.0001893415445627255,
      "loss": 3.1849,
      "step": 142908
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5579147338867188,
      "learning_rate": 0.00018933774245943912,
      "loss": 3.138,
      "step": 142909
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4917385578155518,
      "learning_rate": 0.00018933394037672693,
      "loss": 3.0696,
      "step": 142910
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9879248142242432,
      "learning_rate": 0.00018933013831458969,
      "loss": 2.8716,
      "step": 142911
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3555822372436523,
      "learning_rate": 0.0001893263362730281,
      "loss": 2.9333,
      "step": 142912
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.756598711013794,
      "learning_rate": 0.00018932253425204274,
      "loss": 2.9211,
      "step": 142913
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3668253421783447,
      "learning_rate": 0.00018931873225163444,
      "loss": 2.9178,
      "step": 142914
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0940258502960205,
      "learning_rate": 0.00018931493027180384,
      "loss": 2.9147,
      "step": 142915
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0894675254821777,
      "learning_rate": 0.0001893111283125517,
      "loss": 3.0088,
      "step": 142916
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.377596855163574,
      "learning_rate": 0.00018930732637387876,
      "loss": 2.6991,
      "step": 142917
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3369529247283936,
      "learning_rate": 0.00018930352445578567,
      "loss": 2.7534,
      "step": 142918
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.46905517578125,
      "learning_rate": 0.00018929972255827312,
      "loss": 3.239,
      "step": 142919
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.8864827156066895,
      "learning_rate": 0.00018929592068134184,
      "loss": 2.8507,
      "step": 142920
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3147292137145996,
      "learning_rate": 0.00018929211882499252,
      "loss": 2.7942,
      "step": 142921
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2840099334716797,
      "learning_rate": 0.0001892883169892259,
      "loss": 2.9285,
      "step": 142922
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2661502361297607,
      "learning_rate": 0.0001892845151740427,
      "loss": 2.8825,
      "step": 142923
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.9810643196105957,
      "learning_rate": 0.00018928071337944357,
      "loss": 2.8029,
      "step": 142924
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3001420497894287,
      "learning_rate": 0.00018927691160542924,
      "loss": 2.6075,
      "step": 142925
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.190415143966675,
      "learning_rate": 0.0001892731098520005,
      "loss": 3.0496,
      "step": 142926
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.70436429977417,
      "learning_rate": 0.0001892693081191579,
      "loss": 2.9695,
      "step": 142927
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.757312059402466,
      "learning_rate": 0.0001892655064069022,
      "loss": 3.1096,
      "step": 142928
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2440903186798096,
      "learning_rate": 0.00018926170471523417,
      "loss": 2.8566,
      "step": 142929
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.6008312702178955,
      "learning_rate": 0.0001892579030441545,
      "loss": 2.879,
      "step": 142930
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.9155311584472656,
      "learning_rate": 0.00018925410139366384,
      "loss": 2.8982,
      "step": 142931
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2939233779907227,
      "learning_rate": 0.00018925029976376293,
      "loss": 3.0039,
      "step": 142932
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1237549781799316,
      "learning_rate": 0.00018924649815445254,
      "loss": 3.0052,
      "step": 142933
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5624449253082275,
      "learning_rate": 0.0001892426965657333,
      "loss": 2.6924,
      "step": 142934
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2016425132751465,
      "learning_rate": 0.00018923889499760586,
      "loss": 2.9028,
      "step": 142935
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.081535816192627,
      "learning_rate": 0.0001892350934500711,
      "loss": 2.9256,
      "step": 142936
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.6500003337860107,
      "learning_rate": 0.00018923129192312957,
      "loss": 2.8062,
      "step": 142937
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1842854022979736,
      "learning_rate": 0.00018922749041678204,
      "loss": 2.9518,
      "step": 142938
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.903468132019043,
      "learning_rate": 0.00018922368893102917,
      "loss": 3.0255,
      "step": 142939
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.251617908477783,
      "learning_rate": 0.00018921988746587172,
      "loss": 2.9329,
      "step": 142940
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.597763776779175,
      "learning_rate": 0.00018921608602131046,
      "loss": 2.8493,
      "step": 142941
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.358445882797241,
      "learning_rate": 0.00018921228459734603,
      "loss": 3.0302,
      "step": 142942
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.152989625930786,
      "learning_rate": 0.00018920848319397909,
      "loss": 3.1876,
      "step": 142943
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3436012268066406,
      "learning_rate": 0.0001892046818112103,
      "loss": 3.0376,
      "step": 142944
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1953110694885254,
      "learning_rate": 0.00018920088044904052,
      "loss": 2.7834,
      "step": 142945
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2457847595214844,
      "learning_rate": 0.00018919707910747036,
      "loss": 2.7565,
      "step": 142946
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.433661699295044,
      "learning_rate": 0.00018919327778650055,
      "loss": 3.1382,
      "step": 142947
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.418553590774536,
      "learning_rate": 0.00018918947648613194,
      "loss": 3.0985,
      "step": 142948
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3096671104431152,
      "learning_rate": 0.000189185675206365,
      "loss": 3.2199,
      "step": 142949
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7987921237945557,
      "learning_rate": 0.0001891818739472005,
      "loss": 2.9914,
      "step": 142950
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1174581050872803,
      "learning_rate": 0.00018917807270863916,
      "loss": 3.0706,
      "step": 142951
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2841436862945557,
      "learning_rate": 0.00018917427149068176,
      "loss": 2.9436,
      "step": 142952
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.170252799987793,
      "learning_rate": 0.00018917047029332892,
      "loss": 2.9949,
      "step": 142953
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1983888149261475,
      "learning_rate": 0.0001891666691165815,
      "loss": 2.7853,
      "step": 142954
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.015932083129883,
      "learning_rate": 0.00018916286796044,
      "loss": 3.0566,
      "step": 142955
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.292402982711792,
      "learning_rate": 0.0001891590668249052,
      "loss": 3.0285,
      "step": 142956
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7528817653656006,
      "learning_rate": 0.0001891552657099778,
      "loss": 3.0761,
      "step": 142957
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.097078323364258,
      "learning_rate": 0.00018915146461565856,
      "loss": 2.9381,
      "step": 142958
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.369373083114624,
      "learning_rate": 0.00018914766354194814,
      "loss": 2.9878,
      "step": 142959
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5586371421813965,
      "learning_rate": 0.00018914386248884737,
      "loss": 3.0499,
      "step": 142960
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4661083221435547,
      "learning_rate": 0.00018914006145635674,
      "loss": 2.9033,
      "step": 142961
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.513673782348633,
      "learning_rate": 0.0001891362604444771,
      "loss": 3.0078,
      "step": 142962
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.8988546133041382,
      "learning_rate": 0.00018913245945320907,
      "loss": 3.0373,
      "step": 142963
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.261887073516846,
      "learning_rate": 0.00018912865848255342,
      "loss": 2.7689,
      "step": 142964
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.64337420463562,
      "learning_rate": 0.00018912485753251084,
      "loss": 2.8264,
      "step": 142965
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.485530376434326,
      "learning_rate": 0.00018912105660308216,
      "loss": 2.7276,
      "step": 142966
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.492440700531006,
      "learning_rate": 0.00018911725569426788,
      "loss": 3.091,
      "step": 142967
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.8288218975067139,
      "learning_rate": 0.00018911345480606876,
      "loss": 2.9625,
      "step": 142968
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0302236080169678,
      "learning_rate": 0.0001891096539384856,
      "loss": 3.1018,
      "step": 142969
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.109959363937378,
      "learning_rate": 0.000189105853091519,
      "loss": 3.0182,
      "step": 142970
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5609419345855713,
      "learning_rate": 0.00018910205226516972,
      "loss": 3.1029,
      "step": 142971
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.419306993484497,
      "learning_rate": 0.0001890982514594386,
      "loss": 2.9837,
      "step": 142972
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.967937707901001,
      "learning_rate": 0.00018909445067432607,
      "loss": 2.9457,
      "step": 142973
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.8201982975006104,
      "learning_rate": 0.00018909064990983295,
      "loss": 2.9272,
      "step": 142974
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.242598056793213,
      "learning_rate": 0.00018908684916596003,
      "loss": 3.04,
      "step": 142975
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.068664312362671,
      "learning_rate": 0.00018908304844270795,
      "loss": 3.1666,
      "step": 142976
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.149535894393921,
      "learning_rate": 0.00018907924774007738,
      "loss": 3.1038,
      "step": 142977
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.9668776988983154,
      "learning_rate": 0.0001890754470580692,
      "loss": 3.167,
      "step": 142978
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4390294551849365,
      "learning_rate": 0.0001890716463966839,
      "loss": 2.7692,
      "step": 142979
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3642923831939697,
      "learning_rate": 0.00018906784575592226,
      "loss": 3.1247,
      "step": 142980
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.9058213233947754,
      "learning_rate": 0.00018906404513578502,
      "loss": 2.841,
      "step": 142981
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7616357803344727,
      "learning_rate": 0.00018906024453627283,
      "loss": 3.0814,
      "step": 142982
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.67704176902771,
      "learning_rate": 0.00018905644395738645,
      "loss": 3.0077,
      "step": 142983
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.867229461669922,
      "learning_rate": 0.0001890526433991267,
      "loss": 2.8594,
      "step": 142984
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.00337815284729,
      "learning_rate": 0.00018904884286149404,
      "loss": 2.9406,
      "step": 142985
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7073724269866943,
      "learning_rate": 0.0001890450423444893,
      "loss": 2.9022,
      "step": 142986
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.4032375812530518,
      "learning_rate": 0.00018904124184811316,
      "loss": 2.7091,
      "step": 142987
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.347245693206787,
      "learning_rate": 0.0001890374413723663,
      "loss": 3.0643,
      "step": 142988
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.8217687606811523,
      "learning_rate": 0.00018903364091724957,
      "loss": 3.1353,
      "step": 142989
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.0355217456817627,
      "learning_rate": 0.00018902984048276366,
      "loss": 3.1533,
      "step": 142990
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0056724548339844,
      "learning_rate": 0.0001890260400689091,
      "loss": 3.2232,
      "step": 142991
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.139293670654297,
      "learning_rate": 0.00018902223967568667,
      "loss": 3.0346,
      "step": 142992
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.668647050857544,
      "learning_rate": 0.0001890184393030971,
      "loss": 2.87,
      "step": 142993
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.708495616912842,
      "learning_rate": 0.00018901463895114112,
      "loss": 3.1513,
      "step": 142994
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.741149425506592,
      "learning_rate": 0.00018901083861981935,
      "loss": 3.072,
      "step": 142995
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.787109375,
      "learning_rate": 0.0001890070383091326,
      "loss": 3.0148,
      "step": 142996
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.343186140060425,
      "learning_rate": 0.0001890032380190817,
      "loss": 3.0638,
      "step": 142997
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.1701927185058594,
      "learning_rate": 0.000188999437749667,
      "loss": 2.8985,
      "step": 142998
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.444664239883423,
      "learning_rate": 0.00018899563750088947,
      "loss": 2.9019,
      "step": 142999
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.3673415184020996,
      "learning_rate": 0.0001889918372727497,
      "loss": 2.917,
      "step": 143000
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.945634603500366,
      "learning_rate": 0.0001889880370652484,
      "loss": 2.8575,
      "step": 143001
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3165082931518555,
      "learning_rate": 0.00018898423687838643,
      "loss": 3.2573,
      "step": 143002
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.292678117752075,
      "learning_rate": 0.0001889804367121644,
      "loss": 3.1156,
      "step": 143003
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4301798343658447,
      "learning_rate": 0.00018897663656658296,
      "loss": 2.6794,
      "step": 143004
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.2590456008911133,
      "learning_rate": 0.00018897283644164282,
      "loss": 2.8455,
      "step": 143005
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.11846923828125,
      "learning_rate": 0.00018896903633734474,
      "loss": 2.8916,
      "step": 143006
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3396174907684326,
      "learning_rate": 0.0001889652362536894,
      "loss": 3.0319,
      "step": 143007
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.978792428970337,
      "learning_rate": 0.00018896143619067754,
      "loss": 2.942,
      "step": 143008
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2947497367858887,
      "learning_rate": 0.0001889576361483099,
      "loss": 2.8931,
      "step": 143009
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.507866144180298,
      "learning_rate": 0.00018895383612658705,
      "loss": 2.7427,
      "step": 143010
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4916508197784424,
      "learning_rate": 0.00018895003612550985,
      "loss": 3.1168,
      "step": 143011
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7892370223999023,
      "learning_rate": 0.0001889462361450789,
      "loss": 3.0386,
      "step": 143012
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4584474563598633,
      "learning_rate": 0.00018894243618529488,
      "loss": 3.1121,
      "step": 143013
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.328251361846924,
      "learning_rate": 0.0001889386362461586,
      "loss": 2.8746,
      "step": 143014
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9736560583114624,
      "learning_rate": 0.00018893483632767078,
      "loss": 3.0625,
      "step": 143015
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.1223859786987305,
      "learning_rate": 0.00018893103642983198,
      "loss": 2.9244,
      "step": 143016
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.318568706512451,
      "learning_rate": 0.00018892723655264304,
      "loss": 2.9979,
      "step": 143017
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7025351524353027,
      "learning_rate": 0.00018892343669610465,
      "loss": 2.9941,
      "step": 143018
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3716611862182617,
      "learning_rate": 0.0001889196368602175,
      "loss": 3.0424,
      "step": 143019
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.6735808849334717,
      "learning_rate": 0.00018891583704498219,
      "loss": 2.9697,
      "step": 143020
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.103590250015259,
      "learning_rate": 0.00018891203725039963,
      "loss": 3.0236,
      "step": 143021
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.8133435249328613,
      "learning_rate": 0.00018890823747647037,
      "loss": 2.8397,
      "step": 143022
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.38875675201416,
      "learning_rate": 0.00018890443772319514,
      "loss": 3.1658,
      "step": 143023
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.4319868087768555,
      "learning_rate": 0.0001889006379905747,
      "loss": 3.0839,
      "step": 143024
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.9123528003692627,
      "learning_rate": 0.00018889683827860975,
      "loss": 3.0343,
      "step": 143025
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.374530076980591,
      "learning_rate": 0.00018889303858730093,
      "loss": 3.1438,
      "step": 143026
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2377490997314453,
      "learning_rate": 0.00018888923891664908,
      "loss": 3.0217,
      "step": 143027
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.6616411209106445,
      "learning_rate": 0.00018888543926665475,
      "loss": 2.8285,
      "step": 143028
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.067598819732666,
      "learning_rate": 0.00018888163963731868,
      "loss": 3.1326,
      "step": 143029
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.321866273880005,
      "learning_rate": 0.00018887784002864165,
      "loss": 2.8857,
      "step": 143030
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.0022776126861572,
      "learning_rate": 0.0001888740404406243,
      "loss": 2.7672,
      "step": 143031
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4842262268066406,
      "learning_rate": 0.00018887024087326738,
      "loss": 3.0625,
      "step": 143032
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1889891624450684,
      "learning_rate": 0.00018886644132657169,
      "loss": 2.7301,
      "step": 143033
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4886021614074707,
      "learning_rate": 0.0001888626418005377,
      "loss": 2.9996,
      "step": 143034
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2555294036865234,
      "learning_rate": 0.00018885884229516626,
      "loss": 2.9263,
      "step": 143035
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3502628803253174,
      "learning_rate": 0.00018885504281045804,
      "loss": 2.978,
      "step": 143036
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3439176082611084,
      "learning_rate": 0.0001888512433464138,
      "loss": 2.7488,
      "step": 143037
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.822751045227051,
      "learning_rate": 0.00018884744390303417,
      "loss": 2.803,
      "step": 143038
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1156115531921387,
      "learning_rate": 0.00018884364448032003,
      "loss": 3.0385,
      "step": 143039
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2909061908721924,
      "learning_rate": 0.00018883984507827185,
      "loss": 2.858,
      "step": 143040
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.252927780151367,
      "learning_rate": 0.00018883604569689042,
      "loss": 3.0028,
      "step": 143041
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.0826244354248047,
      "learning_rate": 0.0001888322463361765,
      "loss": 2.9317,
      "step": 143042
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3385164737701416,
      "learning_rate": 0.00018882844699613074,
      "loss": 2.9522,
      "step": 143043
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2592380046844482,
      "learning_rate": 0.0001888246476767539,
      "loss": 3.033,
      "step": 143044
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.149468421936035,
      "learning_rate": 0.0001888208483780467,
      "loss": 2.9553,
      "step": 143045
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2637598514556885,
      "learning_rate": 0.00018881704910000973,
      "loss": 3.139,
      "step": 143046
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.565833330154419,
      "learning_rate": 0.0001888132498426438,
      "loss": 2.92,
      "step": 143047
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1325759887695312,
      "learning_rate": 0.00018880945060594954,
      "loss": 3.0381,
      "step": 143048
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.6541521549224854,
      "learning_rate": 0.00018880565138992773,
      "loss": 2.9965,
      "step": 143049
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.103888511657715,
      "learning_rate": 0.00018880185219457906,
      "loss": 3.0881,
      "step": 143050
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.1825180053710938,
      "learning_rate": 0.0001887980530199043,
      "loss": 2.9848,
      "step": 143051
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.776033878326416,
      "learning_rate": 0.000188794253865904,
      "loss": 3.0804,
      "step": 143052
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.618333101272583,
      "learning_rate": 0.00018879045473257894,
      "loss": 2.8503,
      "step": 143053
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7374815940856934,
      "learning_rate": 0.00018878665561992982,
      "loss": 2.8962,
      "step": 143054
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.6736855506896973,
      "learning_rate": 0.00018878285652795736,
      "loss": 3.0206,
      "step": 143055
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.040816068649292,
      "learning_rate": 0.00018877905745666226,
      "loss": 2.9552,
      "step": 143056
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.8781330585479736,
      "learning_rate": 0.00018877525840604536,
      "loss": 3.167,
      "step": 143057
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.702649116516113,
      "learning_rate": 0.00018877145937610713,
      "loss": 2.9736,
      "step": 143058
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.624440908432007,
      "learning_rate": 0.0001887676603668484,
      "loss": 2.7903,
      "step": 143059
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.474862575531006,
      "learning_rate": 0.00018876386137826984,
      "loss": 2.939,
      "step": 143060
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4301598072052,
      "learning_rate": 0.00018876006241037217,
      "loss": 2.9421,
      "step": 143061
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2719244956970215,
      "learning_rate": 0.0001887562634631561,
      "loss": 2.9817,
      "step": 143062
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.9809634685516357,
      "learning_rate": 0.00018875246453662237,
      "loss": 3.0413,
      "step": 143063
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.453202962875366,
      "learning_rate": 0.00018874866563077178,
      "loss": 2.9889,
      "step": 143064
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.399806976318359,
      "learning_rate": 0.00018874486674560475,
      "loss": 3.0862,
      "step": 143065
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.149595260620117,
      "learning_rate": 0.00018874106788112217,
      "loss": 2.9162,
      "step": 143066
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.720816135406494,
      "learning_rate": 0.00018873726903732472,
      "loss": 2.7904,
      "step": 143067
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.702467918395996,
      "learning_rate": 0.00018873347021421314,
      "loss": 2.802,
      "step": 143068
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.565298557281494,
      "learning_rate": 0.00018872967141178807,
      "loss": 2.9723,
      "step": 143069
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.5908591747283936,
      "learning_rate": 0.0001887258726300504,
      "loss": 2.8344,
      "step": 143070
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7336997985839844,
      "learning_rate": 0.00018872207386900057,
      "loss": 2.6363,
      "step": 143071
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.8828961849212646,
      "learning_rate": 0.0001887182751286394,
      "loss": 3.0329,
      "step": 143072
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.444641590118408,
      "learning_rate": 0.00018871447640896762,
      "loss": 2.9381,
      "step": 143073
    },
    {
      "epoch": 1.86,
      "grad_norm": 5.48537540435791,
      "learning_rate": 0.00018871067770998587,
      "loss": 3.0245,
      "step": 143074
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.700690746307373,
      "learning_rate": 0.00018870687903169495,
      "loss": 3.0366,
      "step": 143075
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0904829502105713,
      "learning_rate": 0.00018870308037409564,
      "loss": 3.0563,
      "step": 143076
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7008960247039795,
      "learning_rate": 0.0001886992817371884,
      "loss": 3.0569,
      "step": 143077
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3157618045806885,
      "learning_rate": 0.00018869548312097403,
      "loss": 3.0344,
      "step": 143078
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.3502044677734375,
      "learning_rate": 0.0001886916845254533,
      "loss": 2.9455,
      "step": 143079
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1202189922332764,
      "learning_rate": 0.00018868788595062693,
      "loss": 3.1822,
      "step": 143080
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.219245433807373,
      "learning_rate": 0.00018868408739649553,
      "loss": 3.0873,
      "step": 143081
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.6824584007263184,
      "learning_rate": 0.00018868028886305998,
      "loss": 2.962,
      "step": 143082
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.105607032775879,
      "learning_rate": 0.00018867649035032077,
      "loss": 3.1688,
      "step": 143083
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.6603195667266846,
      "learning_rate": 0.0001886726918582787,
      "loss": 2.9892,
      "step": 143084
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.153710126876831,
      "learning_rate": 0.00018866889338693446,
      "loss": 2.9344,
      "step": 143085
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1384894847869873,
      "learning_rate": 0.00018866509493628878,
      "loss": 2.8061,
      "step": 143086
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0947608947753906,
      "learning_rate": 0.00018866129650634242,
      "loss": 2.9722,
      "step": 143087
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3642990589141846,
      "learning_rate": 0.00018865749809709607,
      "loss": 2.9214,
      "step": 143088
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.275693655014038,
      "learning_rate": 0.00018865369970855027,
      "loss": 2.983,
      "step": 143089
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2284889221191406,
      "learning_rate": 0.0001886499013407059,
      "loss": 2.9676,
      "step": 143090
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1094541549682617,
      "learning_rate": 0.00018864610299356357,
      "loss": 2.9548,
      "step": 143091
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.154327154159546,
      "learning_rate": 0.00018864230466712406,
      "loss": 3.1456,
      "step": 143092
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3050222396850586,
      "learning_rate": 0.00018863850636138802,
      "loss": 3.0453,
      "step": 143093
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1694626808166504,
      "learning_rate": 0.00018863470807635628,
      "loss": 3.0771,
      "step": 143094
    },
    {
      "epoch": 1.86,
      "grad_norm": 5.04877233505249,
      "learning_rate": 0.00018863090981202944,
      "loss": 2.9692,
      "step": 143095
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.967999219894409,
      "learning_rate": 0.00018862711156840815,
      "loss": 3.0407,
      "step": 143096
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.456906318664551,
      "learning_rate": 0.00018862331334549317,
      "loss": 2.9552,
      "step": 143097
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2298123836517334,
      "learning_rate": 0.0001886195151432852,
      "loss": 2.8691,
      "step": 143098
    },
    {
      "epoch": 1.86,
      "grad_norm": 6.092550277709961,
      "learning_rate": 0.000188615716961785,
      "loss": 2.9011,
      "step": 143099
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.51924467086792,
      "learning_rate": 0.00018861191880099328,
      "loss": 3.052,
      "step": 143100
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.07647705078125,
      "learning_rate": 0.00018860812066091067,
      "loss": 2.9784,
      "step": 143101
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.02683424949646,
      "learning_rate": 0.0001886043225415379,
      "loss": 3.0405,
      "step": 143102
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5026066303253174,
      "learning_rate": 0.0001886005244428758,
      "loss": 2.8795,
      "step": 143103
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.367115497589111,
      "learning_rate": 0.00018859672636492485,
      "loss": 3.0361,
      "step": 143104
    },
    {
      "epoch": 1.86,
      "grad_norm": 6.266107559204102,
      "learning_rate": 0.00018859292830768586,
      "loss": 2.9481,
      "step": 143105
    },
    {
      "epoch": 1.86,
      "grad_norm": 6.162874698638916,
      "learning_rate": 0.00018858913027115963,
      "loss": 3.0292,
      "step": 143106
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.986901044845581,
      "learning_rate": 0.00018858533225534673,
      "loss": 2.8762,
      "step": 143107
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.402804136276245,
      "learning_rate": 0.00018858153426024792,
      "loss": 2.8409,
      "step": 143108
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.562551259994507,
      "learning_rate": 0.00018857773628586387,
      "loss": 2.9515,
      "step": 143109
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.030644416809082,
      "learning_rate": 0.00018857393833219543,
      "loss": 3.4068,
      "step": 143110
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.7975146770477295,
      "learning_rate": 0.00018857014039924309,
      "loss": 3.1334,
      "step": 143111
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.4432971477508545,
      "learning_rate": 0.00018856634248700776,
      "loss": 2.9374,
      "step": 143112
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7332892417907715,
      "learning_rate": 0.00018856254459549,
      "loss": 2.7437,
      "step": 143113
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.777750253677368,
      "learning_rate": 0.00018855874672469054,
      "loss": 2.9458,
      "step": 143114
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.74462628364563,
      "learning_rate": 0.0001885549488746101,
      "loss": 2.8159,
      "step": 143115
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.1536624431610107,
      "learning_rate": 0.0001885511510452494,
      "loss": 2.9635,
      "step": 143116
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9404808282852173,
      "learning_rate": 0.00018854735323660924,
      "loss": 2.8499,
      "step": 143117
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2394022941589355,
      "learning_rate": 0.0001885435554486902,
      "loss": 2.975,
      "step": 143118
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5833561420440674,
      "learning_rate": 0.000188539757681493,
      "loss": 2.9865,
      "step": 143119
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.358585834503174,
      "learning_rate": 0.0001885359599350183,
      "loss": 2.7638,
      "step": 143120
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0726141929626465,
      "learning_rate": 0.00018853216220926692,
      "loss": 2.9874,
      "step": 143121
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.065037727355957,
      "learning_rate": 0.00018852836450423952,
      "loss": 3.0871,
      "step": 143122
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1622424125671387,
      "learning_rate": 0.00018852456681993674,
      "loss": 3.0687,
      "step": 143123
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2291946411132812,
      "learning_rate": 0.00018852076915635955,
      "loss": 3.1597,
      "step": 143124
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5985827445983887,
      "learning_rate": 0.0001885169715135083,
      "loss": 2.767,
      "step": 143125
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9566508531570435,
      "learning_rate": 0.0001885131738913838,
      "loss": 3.1618,
      "step": 143126
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2218170166015625,
      "learning_rate": 0.00018850937628998686,
      "loss": 3.1003,
      "step": 143127
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1398744583129883,
      "learning_rate": 0.0001885055787093181,
      "loss": 2.8552,
      "step": 143128
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1526296138763428,
      "learning_rate": 0.0001885017811493783,
      "loss": 3.1059,
      "step": 143129
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3086252212524414,
      "learning_rate": 0.0001884979836101681,
      "loss": 2.9914,
      "step": 143130
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0782318115234375,
      "learning_rate": 0.00018849418609168835,
      "loss": 3.2603,
      "step": 143131
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.9312856197357178,
      "learning_rate": 0.0001884903885939395,
      "loss": 2.7114,
      "step": 143132
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.35952091217041,
      "learning_rate": 0.0001884865911169224,
      "loss": 3.0381,
      "step": 143133
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4471206665039062,
      "learning_rate": 0.00018848279366063777,
      "loss": 2.8253,
      "step": 143134
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.488722085952759,
      "learning_rate": 0.0001884789962250863,
      "loss": 2.831,
      "step": 143135
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9054830074310303,
      "learning_rate": 0.00018847519881026864,
      "loss": 2.7533,
      "step": 143136
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.491328239440918,
      "learning_rate": 0.0001884714014161857,
      "loss": 2.9213,
      "step": 143137
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1801044940948486,
      "learning_rate": 0.00018846760404283788,
      "loss": 3.0925,
      "step": 143138
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.173715114593506,
      "learning_rate": 0.00018846380669022605,
      "loss": 2.8344,
      "step": 143139
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1178760528564453,
      "learning_rate": 0.00018846000935835095,
      "loss": 2.8597,
      "step": 143140
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1464009284973145,
      "learning_rate": 0.00018845621204721316,
      "loss": 2.9768,
      "step": 143141
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.5986456871032715,
      "learning_rate": 0.0001884524147568135,
      "loss": 2.9685,
      "step": 143142
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.432271957397461,
      "learning_rate": 0.0001884486174871528,
      "loss": 2.7525,
      "step": 143143
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.9628148078918457,
      "learning_rate": 0.00018844482023823146,
      "loss": 3.205,
      "step": 143144
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1370139122009277,
      "learning_rate": 0.00018844102301005035,
      "loss": 3.0931,
      "step": 143145
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.0817599296569824,
      "learning_rate": 0.00018843722580261013,
      "loss": 2.7844,
      "step": 143146
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.6372838020324707,
      "learning_rate": 0.00018843342861591157,
      "loss": 3.1962,
      "step": 143147
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1541192531585693,
      "learning_rate": 0.00018842963144995533,
      "loss": 2.8452,
      "step": 143148
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.822814702987671,
      "learning_rate": 0.0001884258343047422,
      "loss": 3.0499,
      "step": 143149
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.9843995571136475,
      "learning_rate": 0.00018842203718027277,
      "loss": 2.9629,
      "step": 143150
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2934765815734863,
      "learning_rate": 0.00018841824007654774,
      "loss": 2.8938,
      "step": 143151
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.306962251663208,
      "learning_rate": 0.00018841444299356788,
      "loss": 2.9076,
      "step": 143152
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2426934242248535,
      "learning_rate": 0.00018841064593133386,
      "loss": 3.0042,
      "step": 143153
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2857120037078857,
      "learning_rate": 0.00018840684888984645,
      "loss": 2.955,
      "step": 143154
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9557894468307495,
      "learning_rate": 0.0001884030518691064,
      "loss": 3.1059,
      "step": 143155
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.474621057510376,
      "learning_rate": 0.0001883992548691142,
      "loss": 3.0034,
      "step": 143156
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.284015417098999,
      "learning_rate": 0.00018839545788987074,
      "loss": 2.8539,
      "step": 143157
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1379218101501465,
      "learning_rate": 0.0001883916609313766,
      "loss": 2.919,
      "step": 143158
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2423207759857178,
      "learning_rate": 0.00018838786399363262,
      "loss": 2.9008,
      "step": 143159
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4526119232177734,
      "learning_rate": 0.0001883840670766394,
      "loss": 2.9053,
      "step": 143160
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2193493843078613,
      "learning_rate": 0.0001883802701803978,
      "loss": 2.7485,
      "step": 143161
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.393869161605835,
      "learning_rate": 0.00018837647330490831,
      "loss": 3.1039,
      "step": 143162
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.11232852935791,
      "learning_rate": 0.00018837267645017177,
      "loss": 2.8918,
      "step": 143163
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7717514038085938,
      "learning_rate": 0.00018836887961618882,
      "loss": 3.0422,
      "step": 143164
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.8915183544158936,
      "learning_rate": 0.00018836508280296025,
      "loss": 2.8572,
      "step": 143165
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3035640716552734,
      "learning_rate": 0.00018836128601048665,
      "loss": 3.0812,
      "step": 143166
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.436491012573242,
      "learning_rate": 0.00018835748923876896,
      "loss": 3.0238,
      "step": 143167
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0784285068511963,
      "learning_rate": 0.0001883536924878076,
      "loss": 2.7852,
      "step": 143168
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.8900775909423828,
      "learning_rate": 0.00018834989575760338,
      "loss": 3.111,
      "step": 143169
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.36983060836792,
      "learning_rate": 0.00018834609904815703,
      "loss": 2.6666,
      "step": 143170
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.187619686126709,
      "learning_rate": 0.00018834230235946923,
      "loss": 3.0624,
      "step": 143171
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.04823899269104,
      "learning_rate": 0.00018833850569154072,
      "loss": 3.0977,
      "step": 143172
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.771398663520813,
      "learning_rate": 0.00018833470904437233,
      "loss": 3.0355,
      "step": 143173
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.824432611465454,
      "learning_rate": 0.0001883309124179645,
      "loss": 2.896,
      "step": 143174
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.8315391540527344,
      "learning_rate": 0.00018832711581231803,
      "loss": 2.8531,
      "step": 143175
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3153676986694336,
      "learning_rate": 0.00018832331922743366,
      "loss": 2.9631,
      "step": 143176
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.0849781036376953,
      "learning_rate": 0.00018831952266331212,
      "loss": 3.2538,
      "step": 143177
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.358057737350464,
      "learning_rate": 0.00018831572611995408,
      "loss": 2.8728,
      "step": 143178
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.083616018295288,
      "learning_rate": 0.00018831192959736033,
      "loss": 3.1612,
      "step": 143179
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.282296657562256,
      "learning_rate": 0.0001883081330955315,
      "loss": 2.891,
      "step": 143180
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2216591835021973,
      "learning_rate": 0.00018830433661446816,
      "loss": 3.1411,
      "step": 143181
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.798313617706299,
      "learning_rate": 0.00018830054015417122,
      "loss": 2.8025,
      "step": 143182
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.8013502359390259,
      "learning_rate": 0.00018829674371464132,
      "loss": 2.8866,
      "step": 143183
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2672410011291504,
      "learning_rate": 0.00018829294729587914,
      "loss": 2.8721,
      "step": 143184
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.425419330596924,
      "learning_rate": 0.00018828915089788548,
      "loss": 2.9568,
      "step": 143185
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4484012126922607,
      "learning_rate": 0.00018828535452066093,
      "loss": 3.1445,
      "step": 143186
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3028619289398193,
      "learning_rate": 0.00018828155816420628,
      "loss": 2.9693,
      "step": 143187
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.3567967414855957,
      "learning_rate": 0.00018827776182852216,
      "loss": 3.2198,
      "step": 143188
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.740116596221924,
      "learning_rate": 0.00018827396551360928,
      "loss": 3.1812,
      "step": 143189
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.4239661693573,
      "learning_rate": 0.0001882701692194684,
      "loss": 3.1029,
      "step": 143190
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.030022621154785,
      "learning_rate": 0.00018826637294610027,
      "loss": 3.0746,
      "step": 143191
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.106825351715088,
      "learning_rate": 0.00018826257669350547,
      "loss": 2.8954,
      "step": 143192
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.904184103012085,
      "learning_rate": 0.00018825878046168474,
      "loss": 2.832,
      "step": 143193
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.616037130355835,
      "learning_rate": 0.00018825498425063884,
      "loss": 3.045,
      "step": 143194
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.016059160232544,
      "learning_rate": 0.00018825118806036848,
      "loss": 2.9485,
      "step": 143195
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.552720069885254,
      "learning_rate": 0.0001882473918908743,
      "loss": 2.7658,
      "step": 143196
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.6342885494232178,
      "learning_rate": 0.00018824359574215701,
      "loss": 3.2575,
      "step": 143197
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.182511329650879,
      "learning_rate": 0.00018823979961421746,
      "loss": 3.2677,
      "step": 143198
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2097601890563965,
      "learning_rate": 0.00018823600350705613,
      "loss": 3.0528,
      "step": 143199
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.8903740644454956,
      "learning_rate": 0.00018823220742067385,
      "loss": 2.6659,
      "step": 143200
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.07255220413208,
      "learning_rate": 0.0001882284113550713,
      "loss": 2.9363,
      "step": 143201
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2543580532073975,
      "learning_rate": 0.00018822461531024928,
      "loss": 2.6681,
      "step": 143202
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1006710529327393,
      "learning_rate": 0.00018822081928620838,
      "loss": 2.6981,
      "step": 143203
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.262277126312256,
      "learning_rate": 0.00018821702328294934,
      "loss": 3.1748,
      "step": 143204
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1367783546447754,
      "learning_rate": 0.00018821322730047283,
      "loss": 3.0747,
      "step": 143205
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.1557137966156006,
      "learning_rate": 0.00018820943133877958,
      "loss": 2.9986,
      "step": 143206
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9880291223526,
      "learning_rate": 0.00018820563539787032,
      "loss": 2.8925,
      "step": 143207
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.6866369247436523,
      "learning_rate": 0.00018820183947774576,
      "loss": 3.0632,
      "step": 143208
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.000131130218506,
      "learning_rate": 0.00018819804357840663,
      "loss": 3.0495,
      "step": 143209
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.748148202896118,
      "learning_rate": 0.00018819424769985364,
      "loss": 2.9688,
      "step": 143210
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.413996458053589,
      "learning_rate": 0.00018819045184208732,
      "loss": 2.7253,
      "step": 143211
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0795233249664307,
      "learning_rate": 0.00018818665600510851,
      "loss": 3.0646,
      "step": 143212
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9776911735534668,
      "learning_rate": 0.00018818286018891796,
      "loss": 3.2195,
      "step": 143213
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2643790245056152,
      "learning_rate": 0.00018817906439351628,
      "loss": 3.0169,
      "step": 143214
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.693363904953003,
      "learning_rate": 0.00018817526861890425,
      "loss": 3.0136,
      "step": 143215
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5935492515563965,
      "learning_rate": 0.00018817147286508267,
      "loss": 3.127,
      "step": 143216
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.0634829998016357,
      "learning_rate": 0.000188167677132052,
      "loss": 3.0139,
      "step": 143217
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.022144079208374,
      "learning_rate": 0.00018816388141981307,
      "loss": 2.9681,
      "step": 143218
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.039123058319092,
      "learning_rate": 0.00018816008572836659,
      "loss": 3.2119,
      "step": 143219
    },
    {
      "epoch": 1.86,
      "grad_norm": 3.436605453491211,
      "learning_rate": 0.0001881562900577132,
      "loss": 3.2697,
      "step": 143220
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.9512717723846436,
      "learning_rate": 0.00018815249440785374,
      "loss": 2.8941,
      "step": 143221
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.585050582885742,
      "learning_rate": 0.00018814869877878894,
      "loss": 3.0831,
      "step": 143222
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4529566764831543,
      "learning_rate": 0.0001881449031705193,
      "loss": 3.0805,
      "step": 143223
    },
    {
      "epoch": 1.86,
      "grad_norm": 4.548404693603516,
      "learning_rate": 0.00018814110758304562,
      "loss": 3.1287,
      "step": 143224
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.5476295948028564,
      "learning_rate": 0.00018813731201636864,
      "loss": 3.194,
      "step": 143225
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.046353816986084,
      "learning_rate": 0.000188133516470489,
      "loss": 3.0164,
      "step": 143226
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.7015373706817627,
      "learning_rate": 0.00018812972094540747,
      "loss": 3.172,
      "step": 143227
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.490664482116699,
      "learning_rate": 0.0001881259254411249,
      "loss": 2.996,
      "step": 143228
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.4935829639434814,
      "learning_rate": 0.00018812212995764164,
      "loss": 2.8651,
      "step": 143229
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.429724931716919,
      "learning_rate": 0.00018811833449495864,
      "loss": 2.8993,
      "step": 143230
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2579233646392822,
      "learning_rate": 0.00018811453905307656,
      "loss": 2.9908,
      "step": 143231
    },
    {
      "epoch": 1.86,
      "grad_norm": 1.9774309396743774,
      "learning_rate": 0.00018811074363199605,
      "loss": 3.0427,
      "step": 143232
    },
    {
      "epoch": 1.86,
      "grad_norm": 2.2260899543762207,
      "learning_rate": 0.0001881069482317179,
      "loss": 2.9136,
      "step": 143233
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4496374130249023,
      "learning_rate": 0.0001881031528522429,
      "loss": 2.6789,
      "step": 143234
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2084033489227295,
      "learning_rate": 0.00018809935749357153,
      "loss": 2.7186,
      "step": 143235
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.215557336807251,
      "learning_rate": 0.00018809556215570458,
      "loss": 3.0422,
      "step": 143236
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6455330848693848,
      "learning_rate": 0.00018809176683864278,
      "loss": 2.8441,
      "step": 143237
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0672600269317627,
      "learning_rate": 0.00018808797154238683,
      "loss": 3.1583,
      "step": 143238
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.5848026275634766,
      "learning_rate": 0.00018808417626693742,
      "loss": 2.864,
      "step": 143239
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.420171022415161,
      "learning_rate": 0.00018808038101229545,
      "loss": 2.9356,
      "step": 143240
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3233039379119873,
      "learning_rate": 0.00018807658577846127,
      "loss": 2.9399,
      "step": 143241
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.430873394012451,
      "learning_rate": 0.00018807279056543577,
      "loss": 2.8456,
      "step": 143242
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.004049777984619,
      "learning_rate": 0.0001880689953732197,
      "loss": 2.8682,
      "step": 143243
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1615042686462402,
      "learning_rate": 0.0001880652002018137,
      "loss": 3.1113,
      "step": 143244
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.561042070388794,
      "learning_rate": 0.00018806140505121845,
      "loss": 2.869,
      "step": 143245
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.124521017074585,
      "learning_rate": 0.00018805760992143486,
      "loss": 2.9247,
      "step": 143246
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.122633218765259,
      "learning_rate": 0.0001880538148124633,
      "loss": 3.0129,
      "step": 143247
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.1101267337799072,
      "learning_rate": 0.0001880500197243047,
      "loss": 2.814,
      "step": 143248
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.452341318130493,
      "learning_rate": 0.00018804622465695974,
      "loss": 2.7393,
      "step": 143249
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2880945205688477,
      "learning_rate": 0.00018804242961042905,
      "loss": 2.7274,
      "step": 143250
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5640218257904053,
      "learning_rate": 0.0001880386345847134,
      "loss": 2.7514,
      "step": 143251
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3650825023651123,
      "learning_rate": 0.0001880348395798136,
      "loss": 3.2787,
      "step": 143252
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0932810306549072,
      "learning_rate": 0.00018803104459573012,
      "loss": 3.0129,
      "step": 143253
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1903696060180664,
      "learning_rate": 0.0001880272496324638,
      "loss": 3.0066,
      "step": 143254
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5719213485717773,
      "learning_rate": 0.00018802345469001532,
      "loss": 3.0715,
      "step": 143255
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0916755199432373,
      "learning_rate": 0.00018801965976838537,
      "loss": 3.1468,
      "step": 143256
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.094395160675049,
      "learning_rate": 0.00018801586486757467,
      "loss": 2.7451,
      "step": 143257
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.176659345626831,
      "learning_rate": 0.00018801206998758408,
      "loss": 2.9629,
      "step": 143258
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.145911931991577,
      "learning_rate": 0.00018800827512841404,
      "loss": 2.869,
      "step": 143259
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.211162805557251,
      "learning_rate": 0.00018800448029006537,
      "loss": 2.9839,
      "step": 143260
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.385056495666504,
      "learning_rate": 0.0001880006854725388,
      "loss": 2.8826,
      "step": 143261
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2093353271484375,
      "learning_rate": 0.000187996890675835,
      "loss": 3.1204,
      "step": 143262
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3362877368927,
      "learning_rate": 0.00018799309589995466,
      "loss": 2.6673,
      "step": 143263
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.9994711875915527,
      "learning_rate": 0.00018798930114489855,
      "loss": 3.1671,
      "step": 143264
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1007509231567383,
      "learning_rate": 0.0001879855064106675,
      "loss": 3.1383,
      "step": 143265
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7058961391448975,
      "learning_rate": 0.00018798171169726185,
      "loss": 2.7249,
      "step": 143266
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.066453695297241,
      "learning_rate": 0.0001879779170046826,
      "loss": 3.0049,
      "step": 143267
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.865898609161377,
      "learning_rate": 0.00018797412233293034,
      "loss": 2.9655,
      "step": 143268
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.3140532970428467,
      "learning_rate": 0.0001879703276820058,
      "loss": 2.8908,
      "step": 143269
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2590272426605225,
      "learning_rate": 0.00018796653305190968,
      "loss": 3.0208,
      "step": 143270
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0708906650543213,
      "learning_rate": 0.00018796273844264277,
      "loss": 3.311,
      "step": 143271
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.978196382522583,
      "learning_rate": 0.0001879589438542057,
      "loss": 3.1287,
      "step": 143272
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.294130563735962,
      "learning_rate": 0.00018795514928659913,
      "loss": 2.7511,
      "step": 143273
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.8723297119140625,
      "learning_rate": 0.00018795135473982378,
      "loss": 3.2312,
      "step": 143274
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5205955505371094,
      "learning_rate": 0.0001879475602138804,
      "loss": 3.1052,
      "step": 143275
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.126178741455078,
      "learning_rate": 0.0001879437657087697,
      "loss": 3.1706,
      "step": 143276
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6989452838897705,
      "learning_rate": 0.00018793997122449244,
      "loss": 2.9629,
      "step": 143277
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.849193572998047,
      "learning_rate": 0.00018793617676104917,
      "loss": 2.9935,
      "step": 143278
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0226643085479736,
      "learning_rate": 0.00018793238231844074,
      "loss": 2.7542,
      "step": 143279
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7163736820220947,
      "learning_rate": 0.00018792858789666773,
      "loss": 2.8447,
      "step": 143280
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.136125087738037,
      "learning_rate": 0.00018792479349573094,
      "loss": 3.0877,
      "step": 143281
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4837255477905273,
      "learning_rate": 0.00018792099911563102,
      "loss": 2.9188,
      "step": 143282
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.16461443901062,
      "learning_rate": 0.00018791720475636877,
      "loss": 2.873,
      "step": 143283
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.8412516117095947,
      "learning_rate": 0.00018791341041794478,
      "loss": 2.6695,
      "step": 143284
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.716740608215332,
      "learning_rate": 0.0001879096161003598,
      "loss": 2.8609,
      "step": 143285
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.8707956075668335,
      "learning_rate": 0.00018790582180361454,
      "loss": 3.0832,
      "step": 143286
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7722725868225098,
      "learning_rate": 0.00018790202752770973,
      "loss": 2.7597,
      "step": 143287
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.124662160873413,
      "learning_rate": 0.000187898233272646,
      "loss": 2.8848,
      "step": 143288
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.372716188430786,
      "learning_rate": 0.0001878944390384242,
      "loss": 2.9469,
      "step": 143289
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.1329925060272217,
      "learning_rate": 0.00018789064482504483,
      "loss": 2.7891,
      "step": 143290
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9628721475601196,
      "learning_rate": 0.00018788685063250875,
      "loss": 3.0003,
      "step": 143291
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0351200103759766,
      "learning_rate": 0.0001878830564608166,
      "loss": 2.8579,
      "step": 143292
    },
    {
      "epoch": 1.87,
      "grad_norm": 4.004268646240234,
      "learning_rate": 0.0001878792623099691,
      "loss": 2.8479,
      "step": 143293
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7798349857330322,
      "learning_rate": 0.00018787546817996704,
      "loss": 2.8028,
      "step": 143294
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9707624912261963,
      "learning_rate": 0.00018787167407081104,
      "loss": 3.1359,
      "step": 143295
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.094808340072632,
      "learning_rate": 0.00018786787998250175,
      "loss": 2.8438,
      "step": 143296
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0517578125,
      "learning_rate": 0.0001878640859150399,
      "loss": 2.9945,
      "step": 143297
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.119598150253296,
      "learning_rate": 0.0001878602918684263,
      "loss": 2.6713,
      "step": 143298
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.62855863571167,
      "learning_rate": 0.00018785649784266154,
      "loss": 3.0348,
      "step": 143299
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.975469708442688,
      "learning_rate": 0.00018785270383774643,
      "loss": 2.7779,
      "step": 143300
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.079685688018799,
      "learning_rate": 0.0001878489098536817,
      "loss": 3.0202,
      "step": 143301
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.414071559906006,
      "learning_rate": 0.00018784511589046786,
      "loss": 2.7265,
      "step": 143302
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2247848510742188,
      "learning_rate": 0.00018784132194810574,
      "loss": 3.0874,
      "step": 143303
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5354740619659424,
      "learning_rate": 0.000187837528026596,
      "loss": 2.8887,
      "step": 143304
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.128265857696533,
      "learning_rate": 0.00018783373412593942,
      "loss": 2.8986,
      "step": 143305
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0838749408721924,
      "learning_rate": 0.00018782994024613663,
      "loss": 3.0599,
      "step": 143306
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1112420558929443,
      "learning_rate": 0.00018782614638718852,
      "loss": 2.9741,
      "step": 143307
    },
    {
      "epoch": 1.87,
      "grad_norm": 4.56187105178833,
      "learning_rate": 0.0001878223525490955,
      "loss": 2.9574,
      "step": 143308
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1806066036224365,
      "learning_rate": 0.00018781855873185844,
      "loss": 3.1282,
      "step": 143309
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4764161109924316,
      "learning_rate": 0.00018781476493547804,
      "loss": 2.8113,
      "step": 143310
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.990530252456665,
      "learning_rate": 0.000187810971159955,
      "loss": 3.0086,
      "step": 143311
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.9503796100616455,
      "learning_rate": 0.00018780717740529,
      "loss": 3.1491,
      "step": 143312
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.820434808731079,
      "learning_rate": 0.00018780338367148388,
      "loss": 2.8754,
      "step": 143313
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.071899652481079,
      "learning_rate": 0.0001877995899585371,
      "loss": 2.8743,
      "step": 143314
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4323861598968506,
      "learning_rate": 0.0001877957962664505,
      "loss": 2.8357,
      "step": 143315
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1077301502227783,
      "learning_rate": 0.0001877920025952248,
      "loss": 2.7704,
      "step": 143316
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1989612579345703,
      "learning_rate": 0.00018778820894486064,
      "loss": 2.829,
      "step": 143317
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9196500778198242,
      "learning_rate": 0.00018778441531535882,
      "loss": 2.7761,
      "step": 143318
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.1652026176452637,
      "learning_rate": 0.0001877806217067201,
      "loss": 2.9689,
      "step": 143319
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7029929161071777,
      "learning_rate": 0.00018777682811894493,
      "loss": 3.098,
      "step": 143320
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.307572841644287,
      "learning_rate": 0.00018777303455203421,
      "loss": 2.8075,
      "step": 143321
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.961320400238037,
      "learning_rate": 0.0001877692410059886,
      "loss": 3.0402,
      "step": 143322
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.900497317314148,
      "learning_rate": 0.00018776544748080876,
      "loss": 2.9673,
      "step": 143323
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.183419704437256,
      "learning_rate": 0.00018776165397649548,
      "loss": 2.6845,
      "step": 143324
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.215543031692505,
      "learning_rate": 0.00018775786049304955,
      "loss": 3.0087,
      "step": 143325
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.424311399459839,
      "learning_rate": 0.00018775406703047142,
      "loss": 3.0907,
      "step": 143326
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2331459522247314,
      "learning_rate": 0.00018775027358876193,
      "loss": 2.9608,
      "step": 143327
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4006478786468506,
      "learning_rate": 0.00018774648016792176,
      "loss": 2.9667,
      "step": 143328
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0256619453430176,
      "learning_rate": 0.0001877426867679517,
      "loss": 3.0433,
      "step": 143329
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2080368995666504,
      "learning_rate": 0.0001877388933888523,
      "loss": 3.0815,
      "step": 143330
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.983894109725952,
      "learning_rate": 0.00018773510003062445,
      "loss": 2.6304,
      "step": 143331
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2946865558624268,
      "learning_rate": 0.00018773130669326885,
      "loss": 3.0278,
      "step": 143332
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.20857572555542,
      "learning_rate": 0.000187727513376786,
      "loss": 3.158,
      "step": 143333
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6636199951171875,
      "learning_rate": 0.0001877237200811767,
      "loss": 3.1453,
      "step": 143334
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.310727834701538,
      "learning_rate": 0.00018771992680644171,
      "loss": 3.1236,
      "step": 143335
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.326967477798462,
      "learning_rate": 0.0001877161335525817,
      "loss": 2.7974,
      "step": 143336
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9780081510543823,
      "learning_rate": 0.00018771234031959735,
      "loss": 3.0437,
      "step": 143337
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.147587537765503,
      "learning_rate": 0.00018770854710748955,
      "loss": 2.996,
      "step": 143338
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.41017746925354,
      "learning_rate": 0.00018770475391625873,
      "loss": 2.776,
      "step": 143339
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.607180595397949,
      "learning_rate": 0.00018770096074590572,
      "loss": 2.9808,
      "step": 143340
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1588423252105713,
      "learning_rate": 0.00018769716759643122,
      "loss": 2.659,
      "step": 143341
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0921120643615723,
      "learning_rate": 0.00018769337446783592,
      "loss": 2.7594,
      "step": 143342
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.731834650039673,
      "learning_rate": 0.00018768958136012052,
      "loss": 2.9406,
      "step": 143343
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.434727907180786,
      "learning_rate": 0.0001876857882732859,
      "loss": 2.8975,
      "step": 143344
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.438746929168701,
      "learning_rate": 0.0001876819952073325,
      "loss": 2.8321,
      "step": 143345
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4559342861175537,
      "learning_rate": 0.00018767820216226112,
      "loss": 2.7341,
      "step": 143346
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.993762731552124,
      "learning_rate": 0.00018767440913807247,
      "loss": 2.8697,
      "step": 143347
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.73274564743042,
      "learning_rate": 0.0001876706161347673,
      "loss": 2.679,
      "step": 143348
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.7828714847564697,
      "learning_rate": 0.00018766682315234627,
      "loss": 2.5579,
      "step": 143349
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.8403661251068115,
      "learning_rate": 0.00018766303019081022,
      "loss": 2.9499,
      "step": 143350
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.7154643535614014,
      "learning_rate": 0.0001876592372501596,
      "loss": 2.9004,
      "step": 143351
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7167391777038574,
      "learning_rate": 0.00018765544433039524,
      "loss": 3.1888,
      "step": 143352
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.71628999710083,
      "learning_rate": 0.0001876516514315179,
      "loss": 2.7199,
      "step": 143353
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.5385327339172363,
      "learning_rate": 0.00018764785855352817,
      "loss": 2.8363,
      "step": 143354
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1138999462127686,
      "learning_rate": 0.0001876440656964269,
      "loss": 3.103,
      "step": 143355
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.711618423461914,
      "learning_rate": 0.00018764027286021471,
      "loss": 2.8146,
      "step": 143356
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.4427337646484375,
      "learning_rate": 0.00018763648004489238,
      "loss": 2.4508,
      "step": 143357
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4140045642852783,
      "learning_rate": 0.00018763268725046045,
      "loss": 3.0412,
      "step": 143358
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7690954208374023,
      "learning_rate": 0.00018762889447691968,
      "loss": 2.9422,
      "step": 143359
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.3995590209960938,
      "learning_rate": 0.00018762510172427087,
      "loss": 2.956,
      "step": 143360
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.3535830974578857,
      "learning_rate": 0.0001876213089925147,
      "loss": 2.7556,
      "step": 143361
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.594569683074951,
      "learning_rate": 0.00018761751628165193,
      "loss": 2.9167,
      "step": 143362
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4454753398895264,
      "learning_rate": 0.00018761372359168304,
      "loss": 3.0598,
      "step": 143363
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.544055461883545,
      "learning_rate": 0.00018760993092260898,
      "loss": 2.9351,
      "step": 143364
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4049742221832275,
      "learning_rate": 0.0001876061382744303,
      "loss": 2.9114,
      "step": 143365
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3605313301086426,
      "learning_rate": 0.00018760234564714777,
      "loss": 2.9486,
      "step": 143366
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.771329641342163,
      "learning_rate": 0.00018759855304076203,
      "loss": 2.8583,
      "step": 143367
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.478764533996582,
      "learning_rate": 0.00018759476045527394,
      "loss": 3.2878,
      "step": 143368
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3721518516540527,
      "learning_rate": 0.00018759096789068406,
      "loss": 2.9581,
      "step": 143369
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.9295952320098877,
      "learning_rate": 0.00018758717534699316,
      "loss": 2.6651,
      "step": 143370
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.8323795795440674,
      "learning_rate": 0.00018758338282420193,
      "loss": 2.9434,
      "step": 143371
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.845329999923706,
      "learning_rate": 0.00018757959032231103,
      "loss": 2.9265,
      "step": 143372
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.1365573406219482,
      "learning_rate": 0.0001875757978413212,
      "loss": 2.8191,
      "step": 143373
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2073891162872314,
      "learning_rate": 0.00018757200538123325,
      "loss": 3.1518,
      "step": 143374
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.321092367172241,
      "learning_rate": 0.0001875682129420477,
      "loss": 2.8937,
      "step": 143375
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0131032466888428,
      "learning_rate": 0.00018756442052376534,
      "loss": 2.7696,
      "step": 143376
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.32246470451355,
      "learning_rate": 0.00018756062812638687,
      "loss": 3.023,
      "step": 143377
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.307478427886963,
      "learning_rate": 0.0001875568357499131,
      "loss": 3.0918,
      "step": 143378
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.135226249694824,
      "learning_rate": 0.0001875530433943445,
      "loss": 2.8969,
      "step": 143379
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0970993041992188,
      "learning_rate": 0.000187549251059682,
      "loss": 2.8022,
      "step": 143380
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1288344860076904,
      "learning_rate": 0.00018754545874592618,
      "loss": 3.0273,
      "step": 143381
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.305521249771118,
      "learning_rate": 0.0001875416664530778,
      "loss": 3.0715,
      "step": 143382
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1630680561065674,
      "learning_rate": 0.00018753787418113754,
      "loss": 3.0437,
      "step": 143383
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1081738471984863,
      "learning_rate": 0.00018753408193010612,
      "loss": 2.8818,
      "step": 143384
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1290252208709717,
      "learning_rate": 0.00018753028969998418,
      "loss": 2.8189,
      "step": 143385
    },
    {
      "epoch": 1.87,
      "grad_norm": 5.396340370178223,
      "learning_rate": 0.00018752649749077262,
      "loss": 3.0062,
      "step": 143386
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.3997702598571777,
      "learning_rate": 0.00018752270530247191,
      "loss": 2.9633,
      "step": 143387
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.515855550765991,
      "learning_rate": 0.00018751891313508284,
      "loss": 3.1049,
      "step": 143388
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.412594795227051,
      "learning_rate": 0.00018751512098860615,
      "loss": 3.0335,
      "step": 143389
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.863957405090332,
      "learning_rate": 0.00018751132886304253,
      "loss": 3.0421,
      "step": 143390
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.6336045265197754,
      "learning_rate": 0.00018750753675839262,
      "loss": 2.6892,
      "step": 143391
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.0705559253692627,
      "learning_rate": 0.00018750374467465733,
      "loss": 2.9505,
      "step": 143392
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.30876088142395,
      "learning_rate": 0.0001874999526118371,
      "loss": 2.8653,
      "step": 143393
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5043864250183105,
      "learning_rate": 0.00018749616056993278,
      "loss": 3.0975,
      "step": 143394
    },
    {
      "epoch": 1.87,
      "grad_norm": 4.9756760597229,
      "learning_rate": 0.00018749236854894504,
      "loss": 3.0888,
      "step": 143395
    },
    {
      "epoch": 1.87,
      "grad_norm": 4.271320343017578,
      "learning_rate": 0.00018748857654887454,
      "loss": 3.0025,
      "step": 143396
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.55918550491333,
      "learning_rate": 0.00018748478456972207,
      "loss": 2.7346,
      "step": 143397
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4151418209075928,
      "learning_rate": 0.00018748099261148833,
      "loss": 2.8757,
      "step": 143398
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1922099590301514,
      "learning_rate": 0.00018747720067417405,
      "loss": 3.2067,
      "step": 143399
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.4259378910064697,
      "learning_rate": 0.00018747340875777983,
      "loss": 3.2871,
      "step": 143400
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7113378047943115,
      "learning_rate": 0.00018746961686230638,
      "loss": 3.0894,
      "step": 143401
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2970314025878906,
      "learning_rate": 0.00018746582498775446,
      "loss": 3.1692,
      "step": 143402
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.127082586288452,
      "learning_rate": 0.00018746203313412479,
      "loss": 3.0787,
      "step": 143403
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.264754295349121,
      "learning_rate": 0.00018745824130141805,
      "loss": 3.0639,
      "step": 143404
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.059276580810547,
      "learning_rate": 0.00018745444948963506,
      "loss": 2.9492,
      "step": 143405
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.1006429195404053,
      "learning_rate": 0.0001874506576987763,
      "loss": 3.094,
      "step": 143406
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.265638828277588,
      "learning_rate": 0.0001874468659288426,
      "loss": 3.0449,
      "step": 143407
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.584174871444702,
      "learning_rate": 0.0001874430741798346,
      "loss": 2.6314,
      "step": 143408
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6564576625823975,
      "learning_rate": 0.0001874392824517531,
      "loss": 3.0587,
      "step": 143409
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3406972885131836,
      "learning_rate": 0.00018743549074459875,
      "loss": 3.1139,
      "step": 143410
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7567026615142822,
      "learning_rate": 0.0001874316990583724,
      "loss": 3.1246,
      "step": 143411
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.167015552520752,
      "learning_rate": 0.0001874279073930745,
      "loss": 2.8918,
      "step": 143412
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6697821617126465,
      "learning_rate": 0.00018742411574870584,
      "loss": 2.7405,
      "step": 143413
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7991867065429688,
      "learning_rate": 0.00018742032412526723,
      "loss": 3.0732,
      "step": 143414
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6410768032073975,
      "learning_rate": 0.00018741653252275924,
      "loss": 3.0809,
      "step": 143415
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7736871242523193,
      "learning_rate": 0.00018741274094118268,
      "loss": 3.2088,
      "step": 143416
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.916308879852295,
      "learning_rate": 0.0001874089493805383,
      "loss": 3.1008,
      "step": 143417
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.357989549636841,
      "learning_rate": 0.00018740515784082664,
      "loss": 2.9987,
      "step": 143418
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.503228187561035,
      "learning_rate": 0.00018740136632204845,
      "loss": 3.005,
      "step": 143419
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.88900089263916,
      "learning_rate": 0.00018739757482420452,
      "loss": 2.8515,
      "step": 143420
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.045243263244629,
      "learning_rate": 0.00018739378334729548,
      "loss": 3.0794,
      "step": 143421
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.869542121887207,
      "learning_rate": 0.00018738999189132208,
      "loss": 2.8598,
      "step": 143422
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2163586616516113,
      "learning_rate": 0.0001873862004562851,
      "loss": 2.831,
      "step": 143423
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3012256622314453,
      "learning_rate": 0.00018738240904218504,
      "loss": 2.7264,
      "step": 143424
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0314347743988037,
      "learning_rate": 0.0001873786176490227,
      "loss": 2.8697,
      "step": 143425
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0217573642730713,
      "learning_rate": 0.0001873748262767988,
      "loss": 2.8635,
      "step": 143426
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.896926164627075,
      "learning_rate": 0.0001873710349255141,
      "loss": 2.9874,
      "step": 143427
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.7975659370422363,
      "learning_rate": 0.0001873672435951692,
      "loss": 3.0098,
      "step": 143428
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5445785522460938,
      "learning_rate": 0.00018736345228576498,
      "loss": 2.9415,
      "step": 143429
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.8397560119628906,
      "learning_rate": 0.00018735966099730188,
      "loss": 3.0132,
      "step": 143430
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2585225105285645,
      "learning_rate": 0.00018735586972978084,
      "loss": 3.1933,
      "step": 143431
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4191744327545166,
      "learning_rate": 0.00018735207848320236,
      "loss": 3.0043,
      "step": 143432
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9575748443603516,
      "learning_rate": 0.0001873482872575673,
      "loss": 3.0384,
      "step": 143433
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.065746307373047,
      "learning_rate": 0.00018734449605287633,
      "loss": 2.7683,
      "step": 143434
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2782633304595947,
      "learning_rate": 0.00018734070486913027,
      "loss": 2.9963,
      "step": 143435
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.396202325820923,
      "learning_rate": 0.00018733691370632953,
      "loss": 2.9859,
      "step": 143436
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.018369197845459,
      "learning_rate": 0.000187333122564475,
      "loss": 2.6715,
      "step": 143437
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5530989170074463,
      "learning_rate": 0.0001873293314435674,
      "loss": 2.6498,
      "step": 143438
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6401612758636475,
      "learning_rate": 0.00018732554034360744,
      "loss": 3.0779,
      "step": 143439
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5934834480285645,
      "learning_rate": 0.0001873217492645957,
      "loss": 3.1889,
      "step": 143440
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.865928292274475,
      "learning_rate": 0.00018731795820653313,
      "loss": 2.9431,
      "step": 143441
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6705262660980225,
      "learning_rate": 0.00018731416716942018,
      "loss": 2.9667,
      "step": 143442
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.346085548400879,
      "learning_rate": 0.00018731037615325762,
      "loss": 3.003,
      "step": 143443
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4324660301208496,
      "learning_rate": 0.0001873065851580462,
      "loss": 2.853,
      "step": 143444
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9754663705825806,
      "learning_rate": 0.0001873027941837866,
      "loss": 2.9426,
      "step": 143445
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.8180227279663086,
      "learning_rate": 0.0001872990032304796,
      "loss": 2.957,
      "step": 143446
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.2278003692626953,
      "learning_rate": 0.00018729521229812586,
      "loss": 3.0188,
      "step": 143447
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.133932113647461,
      "learning_rate": 0.000187291421386726,
      "loss": 2.8616,
      "step": 143448
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2951343059539795,
      "learning_rate": 0.00018728763049628085,
      "loss": 2.7197,
      "step": 143449
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5240447521209717,
      "learning_rate": 0.00018728383962679097,
      "loss": 2.7964,
      "step": 143450
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.514719247817993,
      "learning_rate": 0.00018728004877825724,
      "loss": 3.17,
      "step": 143451
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.115629196166992,
      "learning_rate": 0.00018727625795068018,
      "loss": 2.8022,
      "step": 143452
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.389662742614746,
      "learning_rate": 0.00018727246714406073,
      "loss": 3.2413,
      "step": 143453
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.687201738357544,
      "learning_rate": 0.00018726867635839934,
      "loss": 2.9806,
      "step": 143454
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.723078727722168,
      "learning_rate": 0.00018726488559369687,
      "loss": 2.8009,
      "step": 143455
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.741016149520874,
      "learning_rate": 0.00018726109484995404,
      "loss": 3.0478,
      "step": 143456
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4381184577941895,
      "learning_rate": 0.00018725730412717142,
      "loss": 3.0361,
      "step": 143457
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5372419357299805,
      "learning_rate": 0.0001872535134253498,
      "loss": 2.9131,
      "step": 143458
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7140345573425293,
      "learning_rate": 0.00018724972274448992,
      "loss": 2.9083,
      "step": 143459
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0681543350219727,
      "learning_rate": 0.00018724593208459242,
      "loss": 2.9784,
      "step": 143460
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.986213207244873,
      "learning_rate": 0.000187242141445658,
      "loss": 2.9017,
      "step": 143461
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0599212646484375,
      "learning_rate": 0.00018723835082768743,
      "loss": 2.6986,
      "step": 143462
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.234968423843384,
      "learning_rate": 0.00018723456023068142,
      "loss": 2.8662,
      "step": 143463
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.509812593460083,
      "learning_rate": 0.00018723076965464057,
      "loss": 2.7494,
      "step": 143464
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.357557535171509,
      "learning_rate": 0.00018722697909956566,
      "loss": 2.9434,
      "step": 143465
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.8820831775665283,
      "learning_rate": 0.00018722318856545744,
      "loss": 2.978,
      "step": 143466
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1232059001922607,
      "learning_rate": 0.00018721939805231653,
      "loss": 2.884,
      "step": 143467
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.393010139465332,
      "learning_rate": 0.00018721560756014357,
      "loss": 3.1949,
      "step": 143468
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1080336570739746,
      "learning_rate": 0.00018721181708893946,
      "loss": 2.912,
      "step": 143469
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0785272121429443,
      "learning_rate": 0.00018720802663870477,
      "loss": 2.9845,
      "step": 143470
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7381575107574463,
      "learning_rate": 0.00018720423620944025,
      "loss": 2.9595,
      "step": 143471
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.7815675735473633,
      "learning_rate": 0.00018720044580114662,
      "loss": 3.2046,
      "step": 143472
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.7514069080352783,
      "learning_rate": 0.00018719665541382455,
      "loss": 2.9558,
      "step": 143473
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6815459728240967,
      "learning_rate": 0.00018719286504747468,
      "loss": 3.0579,
      "step": 143474
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.467951774597168,
      "learning_rate": 0.0001871890747020978,
      "loss": 2.5889,
      "step": 143475
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.8077809810638428,
      "learning_rate": 0.0001871852843776946,
      "loss": 2.757,
      "step": 143476
    },
    {
      "epoch": 1.87,
      "grad_norm": 4.50642204284668,
      "learning_rate": 0.0001871814940742658,
      "loss": 3.0586,
      "step": 143477
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.076542615890503,
      "learning_rate": 0.00018717770379181222,
      "loss": 2.9392,
      "step": 143478
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.886894941329956,
      "learning_rate": 0.00018717391353033428,
      "loss": 3.2951,
      "step": 143479
    },
    {
      "epoch": 1.87,
      "grad_norm": 4.526960372924805,
      "learning_rate": 0.0001871701232898329,
      "loss": 2.8219,
      "step": 143480
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.08636212348938,
      "learning_rate": 0.00018716633307030867,
      "loss": 3.024,
      "step": 143481
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.481140375137329,
      "learning_rate": 0.00018716254287176235,
      "loss": 3.0684,
      "step": 143482
    },
    {
      "epoch": 1.87,
      "grad_norm": 4.448212146759033,
      "learning_rate": 0.00018715875269419464,
      "loss": 2.9723,
      "step": 143483
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.21978759765625,
      "learning_rate": 0.0001871549625376064,
      "loss": 3.0804,
      "step": 143484
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.8350894451141357,
      "learning_rate": 0.00018715117240199804,
      "loss": 2.8089,
      "step": 143485
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.8634932041168213,
      "learning_rate": 0.00018714738228737044,
      "loss": 2.7593,
      "step": 143486
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.002255439758301,
      "learning_rate": 0.00018714359219372423,
      "loss": 2.8844,
      "step": 143487
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2435975074768066,
      "learning_rate": 0.00018713980212106015,
      "loss": 2.9272,
      "step": 143488
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.902015209197998,
      "learning_rate": 0.00018713601206937896,
      "loss": 3.1411,
      "step": 143489
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.292309522628784,
      "learning_rate": 0.00018713222203868142,
      "loss": 2.793,
      "step": 143490
    },
    {
      "epoch": 1.87,
      "grad_norm": 4.078633785247803,
      "learning_rate": 0.00018712843202896797,
      "loss": 3.1698,
      "step": 143491
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.678056478500366,
      "learning_rate": 0.0001871246420402395,
      "loss": 3.0159,
      "step": 143492
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.8018579483032227,
      "learning_rate": 0.00018712085207249672,
      "loss": 2.9348,
      "step": 143493
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.9233860969543457,
      "learning_rate": 0.00018711706212574025,
      "loss": 3.1093,
      "step": 143494
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.8380343914031982,
      "learning_rate": 0.00018711327219997088,
      "loss": 2.9359,
      "step": 143495
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.313145637512207,
      "learning_rate": 0.0001871094822951894,
      "loss": 2.8105,
      "step": 143496
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2811203002929688,
      "learning_rate": 0.00018710569241139628,
      "loss": 2.8942,
      "step": 143497
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.192643880844116,
      "learning_rate": 0.00018710190254859235,
      "loss": 2.6155,
      "step": 143498
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.278106451034546,
      "learning_rate": 0.00018709811270677828,
      "loss": 2.7638,
      "step": 143499
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.733361005783081,
      "learning_rate": 0.00018709432288595482,
      "loss": 2.9413,
      "step": 143500
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.694797992706299,
      "learning_rate": 0.00018709053308612266,
      "loss": 3.0157,
      "step": 143501
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2409515380859375,
      "learning_rate": 0.00018708674330728263,
      "loss": 2.8914,
      "step": 143502
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2455239295959473,
      "learning_rate": 0.0001870829535494352,
      "loss": 2.9843,
      "step": 143503
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.973494529724121,
      "learning_rate": 0.0001870791638125811,
      "loss": 3.0327,
      "step": 143504
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6930270195007324,
      "learning_rate": 0.0001870753740967212,
      "loss": 2.9886,
      "step": 143505
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7984554767608643,
      "learning_rate": 0.0001870715844018561,
      "loss": 3.3104,
      "step": 143506
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.858213186264038,
      "learning_rate": 0.0001870677947279865,
      "loss": 2.9854,
      "step": 143507
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6318717002868652,
      "learning_rate": 0.00018706400507511326,
      "loss": 3.0087,
      "step": 143508
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1521434783935547,
      "learning_rate": 0.00018706021544323686,
      "loss": 2.8833,
      "step": 143509
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.189732313156128,
      "learning_rate": 0.00018705642583235804,
      "loss": 2.8137,
      "step": 143510
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9808201789855957,
      "learning_rate": 0.00018705263624247762,
      "loss": 2.788,
      "step": 143511
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1550052165985107,
      "learning_rate": 0.0001870488466735962,
      "loss": 2.9227,
      "step": 143512
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4862844944000244,
      "learning_rate": 0.00018704505712571455,
      "loss": 2.8648,
      "step": 143513
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3206112384796143,
      "learning_rate": 0.0001870412675988335,
      "loss": 3.1554,
      "step": 143514
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9606246948242188,
      "learning_rate": 0.00018703747809295347,
      "loss": 2.7427,
      "step": 143515
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5616021156311035,
      "learning_rate": 0.00018703368860807532,
      "loss": 3.0623,
      "step": 143516
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0219247341156006,
      "learning_rate": 0.00018702989914419973,
      "loss": 3.0324,
      "step": 143517
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1225051879882812,
      "learning_rate": 0.0001870261097013274,
      "loss": 3.0168,
      "step": 143518
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.242486000061035,
      "learning_rate": 0.00018702232027945908,
      "loss": 2.8751,
      "step": 143519
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2140936851501465,
      "learning_rate": 0.0001870185308785955,
      "loss": 3.0564,
      "step": 143520
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.63570237159729,
      "learning_rate": 0.00018701474149873727,
      "loss": 2.8192,
      "step": 143521
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.391360282897949,
      "learning_rate": 0.00018701095213988507,
      "loss": 2.8332,
      "step": 143522
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6420950889587402,
      "learning_rate": 0.0001870071628020397,
      "loss": 3.0436,
      "step": 143523
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.4351537227630615,
      "learning_rate": 0.00018700337348520182,
      "loss": 3.0742,
      "step": 143524
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3281002044677734,
      "learning_rate": 0.00018699958418937215,
      "loss": 3.0133,
      "step": 143525
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.2531204223632812,
      "learning_rate": 0.00018699579491455152,
      "loss": 2.7777,
      "step": 143526
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.880948305130005,
      "learning_rate": 0.00018699200566074032,
      "loss": 2.92,
      "step": 143527
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1914045810699463,
      "learning_rate": 0.00018698821642793953,
      "loss": 3.0145,
      "step": 143528
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.3340542316436768,
      "learning_rate": 0.00018698442721614973,
      "loss": 2.9919,
      "step": 143529
    },
    {
      "epoch": 1.87,
      "grad_norm": 4.314737796783447,
      "learning_rate": 0.00018698063802537163,
      "loss": 2.8143,
      "step": 143530
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.758240222930908,
      "learning_rate": 0.000186976848855606,
      "loss": 2.6376,
      "step": 143531
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2780230045318604,
      "learning_rate": 0.0001869730597068535,
      "loss": 3.1039,
      "step": 143532
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.118473768234253,
      "learning_rate": 0.00018696927057911496,
      "loss": 2.9791,
      "step": 143533
    },
    {
      "epoch": 1.87,
      "grad_norm": 5.6571831703186035,
      "learning_rate": 0.00018696548147239083,
      "loss": 2.7631,
      "step": 143534
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.723301649093628,
      "learning_rate": 0.00018696169238668198,
      "loss": 3.0494,
      "step": 143535
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.300128221511841,
      "learning_rate": 0.00018695790332198903,
      "loss": 2.9446,
      "step": 143536
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.419738531112671,
      "learning_rate": 0.0001869541142783128,
      "loss": 2.9213,
      "step": 143537
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.0108397006988525,
      "learning_rate": 0.00018695032525565392,
      "loss": 2.9253,
      "step": 143538
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.395123243331909,
      "learning_rate": 0.00018694653625401315,
      "loss": 2.8858,
      "step": 143539
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.3565025329589844,
      "learning_rate": 0.00018694274727339117,
      "loss": 2.9108,
      "step": 143540
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1410131454467773,
      "learning_rate": 0.00018693895831378859,
      "loss": 3.0633,
      "step": 143541
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1446373462677,
      "learning_rate": 0.00018693516937520621,
      "loss": 3.2658,
      "step": 143542
    },
    {
      "epoch": 1.87,
      "grad_norm": 4.007960319519043,
      "learning_rate": 0.0001869313804576447,
      "loss": 2.7693,
      "step": 143543
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3881924152374268,
      "learning_rate": 0.0001869275915611048,
      "loss": 2.9726,
      "step": 143544
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.604213237762451,
      "learning_rate": 0.00018692380268558724,
      "loss": 2.9754,
      "step": 143545
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1868441104888916,
      "learning_rate": 0.00018692001383109258,
      "loss": 2.9819,
      "step": 143546
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.428103446960449,
      "learning_rate": 0.0001869162249976217,
      "loss": 3.1782,
      "step": 143547
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2324249744415283,
      "learning_rate": 0.00018691243618517523,
      "loss": 3.0031,
      "step": 143548
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.439326763153076,
      "learning_rate": 0.00018690864739375385,
      "loss": 3.0358,
      "step": 143549
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.487053155899048,
      "learning_rate": 0.00018690485862335828,
      "loss": 2.9863,
      "step": 143550
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.307830333709717,
      "learning_rate": 0.00018690106987398933,
      "loss": 2.9396,
      "step": 143551
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.655052661895752,
      "learning_rate": 0.00018689728114564745,
      "loss": 3.0131,
      "step": 143552
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.4038519859313965,
      "learning_rate": 0.0001868934924383336,
      "loss": 2.9672,
      "step": 143553
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5978922843933105,
      "learning_rate": 0.00018688970375204831,
      "loss": 2.8735,
      "step": 143554
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3865368366241455,
      "learning_rate": 0.00018688591508679242,
      "loss": 2.9453,
      "step": 143555
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5214433670043945,
      "learning_rate": 0.00018688212644256654,
      "loss": 2.9127,
      "step": 143556
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3200316429138184,
      "learning_rate": 0.0001868783378193715,
      "loss": 2.6516,
      "step": 143557
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3384108543395996,
      "learning_rate": 0.00018687454921720783,
      "loss": 3.0415,
      "step": 143558
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.111780881881714,
      "learning_rate": 0.00018687076063607632,
      "loss": 2.8899,
      "step": 143559
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2999677658081055,
      "learning_rate": 0.00018686697207597765,
      "loss": 2.9989,
      "step": 143560
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.146744728088379,
      "learning_rate": 0.00018686318353691253,
      "loss": 2.871,
      "step": 143561
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.0384085178375244,
      "learning_rate": 0.00018685939501888178,
      "loss": 3.0565,
      "step": 143562
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.283381462097168,
      "learning_rate": 0.000186855606521886,
      "loss": 3.2981,
      "step": 143563
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3500571250915527,
      "learning_rate": 0.0001868518180459258,
      "loss": 3.0184,
      "step": 143564
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5962741374969482,
      "learning_rate": 0.00018684802959100202,
      "loss": 3.2147,
      "step": 143565
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.534341812133789,
      "learning_rate": 0.00018684424115711533,
      "loss": 2.9657,
      "step": 143566
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2474558353424072,
      "learning_rate": 0.0001868404527442664,
      "loss": 2.7593,
      "step": 143567
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2146668434143066,
      "learning_rate": 0.000186836664352456,
      "loss": 3.0974,
      "step": 143568
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.327977180480957,
      "learning_rate": 0.0001868328759816849,
      "loss": 2.9644,
      "step": 143569
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1946604251861572,
      "learning_rate": 0.00018682908763195362,
      "loss": 2.9186,
      "step": 143570
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1502842903137207,
      "learning_rate": 0.00018682529930326287,
      "loss": 2.8552,
      "step": 143571
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.156341314315796,
      "learning_rate": 0.0001868215109956135,
      "loss": 2.9152,
      "step": 143572
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9852927923202515,
      "learning_rate": 0.00018681772270900615,
      "loss": 2.8797,
      "step": 143573
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.1085214614868164,
      "learning_rate": 0.00018681393444344146,
      "loss": 3.1024,
      "step": 143574
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3887062072753906,
      "learning_rate": 0.00018681014619892036,
      "loss": 2.8109,
      "step": 143575
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2787046432495117,
      "learning_rate": 0.0001868063579754433,
      "loss": 3.1628,
      "step": 143576
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1883349418640137,
      "learning_rate": 0.00018680256977301104,
      "loss": 3.2614,
      "step": 143577
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.189166784286499,
      "learning_rate": 0.0001867987815916243,
      "loss": 2.8477,
      "step": 143578
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.187028646469116,
      "learning_rate": 0.00018679499343128382,
      "loss": 2.7785,
      "step": 143579
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1495466232299805,
      "learning_rate": 0.0001867912052919903,
      "loss": 2.9852,
      "step": 143580
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.8641036748886108,
      "learning_rate": 0.00018678741717374455,
      "loss": 3.0771,
      "step": 143581
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.82835054397583,
      "learning_rate": 0.00018678362907654703,
      "loss": 2.9568,
      "step": 143582
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.324395179748535,
      "learning_rate": 0.0001867798410003986,
      "loss": 3.2391,
      "step": 143583
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0995442867279053,
      "learning_rate": 0.00018677605294529988,
      "loss": 3.0409,
      "step": 143584
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5830066204071045,
      "learning_rate": 0.00018677226491125163,
      "loss": 3.1001,
      "step": 143585
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9630281925201416,
      "learning_rate": 0.0001867684768982546,
      "loss": 2.8148,
      "step": 143586
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.170412540435791,
      "learning_rate": 0.00018676468890630956,
      "loss": 3.2719,
      "step": 143587
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3618152141571045,
      "learning_rate": 0.00018676090093541693,
      "loss": 3.0113,
      "step": 143588
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.201082468032837,
      "learning_rate": 0.00018675711298557764,
      "loss": 2.8234,
      "step": 143589
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.527759552001953,
      "learning_rate": 0.00018675332505679233,
      "loss": 2.9619,
      "step": 143590
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2654659748077393,
      "learning_rate": 0.0001867495371490617,
      "loss": 2.9744,
      "step": 143591
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.699418544769287,
      "learning_rate": 0.0001867457492623865,
      "loss": 3.0501,
      "step": 143592
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.128425359725952,
      "learning_rate": 0.00018674196139676746,
      "loss": 2.8204,
      "step": 143593
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0613553524017334,
      "learning_rate": 0.0001867381735522052,
      "loss": 3.1203,
      "step": 143594
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5944254398345947,
      "learning_rate": 0.00018673438572870037,
      "loss": 2.7653,
      "step": 143595
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6601250171661377,
      "learning_rate": 0.00018673059792625378,
      "loss": 3.0157,
      "step": 143596
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.8011265993118286,
      "learning_rate": 0.0001867268101448661,
      "loss": 2.9138,
      "step": 143597
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2480971813201904,
      "learning_rate": 0.0001867230223845381,
      "loss": 3.0649,
      "step": 143598
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.65820050239563,
      "learning_rate": 0.00018671923464527035,
      "loss": 2.8454,
      "step": 143599
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2195804119110107,
      "learning_rate": 0.0001867154469270638,
      "loss": 2.8372,
      "step": 143600
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.995794653892517,
      "learning_rate": 0.00018671165922991886,
      "loss": 3.1308,
      "step": 143601
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.218348503112793,
      "learning_rate": 0.00018670787155383636,
      "loss": 3.0889,
      "step": 143602
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0191643238067627,
      "learning_rate": 0.000186704083898817,
      "loss": 2.7826,
      "step": 143603
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.200899839401245,
      "learning_rate": 0.0001867002962648615,
      "loss": 2.9108,
      "step": 143604
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2125751972198486,
      "learning_rate": 0.00018669650865197054,
      "loss": 2.9854,
      "step": 143605
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1010334491729736,
      "learning_rate": 0.00018669272106014494,
      "loss": 2.9678,
      "step": 143606
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.723581075668335,
      "learning_rate": 0.00018668893348938525,
      "loss": 3.0043,
      "step": 143607
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4038827419281006,
      "learning_rate": 0.00018668514593969218,
      "loss": 3.0383,
      "step": 143608
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.527280807495117,
      "learning_rate": 0.00018668135841106646,
      "loss": 2.9754,
      "step": 143609
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2793381214141846,
      "learning_rate": 0.00018667757090350885,
      "loss": 2.9031,
      "step": 143610
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2474937438964844,
      "learning_rate": 0.00018667378341702002,
      "loss": 3.1757,
      "step": 143611
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.495093584060669,
      "learning_rate": 0.00018666999595160077,
      "loss": 3.0654,
      "step": 143612
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.167128086090088,
      "learning_rate": 0.00018666620850725158,
      "loss": 2.9297,
      "step": 143613
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.8258917331695557,
      "learning_rate": 0.0001866624210839733,
      "loss": 2.6979,
      "step": 143614
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3064684867858887,
      "learning_rate": 0.0001866586336817666,
      "loss": 2.9277,
      "step": 143615
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.187708854675293,
      "learning_rate": 0.0001866548463006322,
      "loss": 2.9607,
      "step": 143616
    },
    {
      "epoch": 1.87,
      "grad_norm": 4.222177505493164,
      "learning_rate": 0.00018665105894057085,
      "loss": 2.8569,
      "step": 143617
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.544522762298584,
      "learning_rate": 0.00018664727160158328,
      "loss": 2.9624,
      "step": 143618
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.014188051223755,
      "learning_rate": 0.00018664348428367,
      "loss": 2.9405,
      "step": 143619
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0229575634002686,
      "learning_rate": 0.00018663969698683186,
      "loss": 2.982,
      "step": 143620
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.145956516265869,
      "learning_rate": 0.0001866359097110695,
      "loss": 3.1351,
      "step": 143621
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.238520383834839,
      "learning_rate": 0.00018663212245638374,
      "loss": 2.9301,
      "step": 143622
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.254347085952759,
      "learning_rate": 0.00018662833522277515,
      "loss": 2.8744,
      "step": 143623
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2100844383239746,
      "learning_rate": 0.00018662454801024456,
      "loss": 2.8575,
      "step": 143624
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.058349370956421,
      "learning_rate": 0.00018662076081879265,
      "loss": 3.1068,
      "step": 143625
    },
    {
      "epoch": 1.87,
      "grad_norm": 4.543475151062012,
      "learning_rate": 0.00018661697364841994,
      "loss": 2.8617,
      "step": 143626
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4703257083892822,
      "learning_rate": 0.00018661318649912734,
      "loss": 2.9976,
      "step": 143627
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.138622522354126,
      "learning_rate": 0.00018660939937091548,
      "loss": 3.1145,
      "step": 143628
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0000033378601074,
      "learning_rate": 0.00018660561226378506,
      "loss": 2.7271,
      "step": 143629
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.9201529026031494,
      "learning_rate": 0.00018660182517773682,
      "loss": 2.8605,
      "step": 143630
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.988925576210022,
      "learning_rate": 0.00018659803811277143,
      "loss": 2.9316,
      "step": 143631
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.872496485710144,
      "learning_rate": 0.00018659425106888966,
      "loss": 3.1633,
      "step": 143632
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0815014839172363,
      "learning_rate": 0.0001865904640460921,
      "loss": 3.0188,
      "step": 143633
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.8035085201263428,
      "learning_rate": 0.00018658667704437952,
      "loss": 3.0333,
      "step": 143634
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2249929904937744,
      "learning_rate": 0.0001865828900637526,
      "loss": 3.1258,
      "step": 143635
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0576231479644775,
      "learning_rate": 0.0001865791031042121,
      "loss": 2.8578,
      "step": 143636
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.982264757156372,
      "learning_rate": 0.0001865753161657587,
      "loss": 2.921,
      "step": 143637
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.898007869720459,
      "learning_rate": 0.000186571529248393,
      "loss": 3.0821,
      "step": 143638
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.0542569160461426,
      "learning_rate": 0.00018656774235211585,
      "loss": 2.8253,
      "step": 143639
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.8513073921203613,
      "learning_rate": 0.00018656395547692796,
      "loss": 2.783,
      "step": 143640
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3882460594177246,
      "learning_rate": 0.0001865601686228299,
      "loss": 3.0695,
      "step": 143641
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9085558652877808,
      "learning_rate": 0.0001865563817898225,
      "loss": 2.914,
      "step": 143642
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.076246500015259,
      "learning_rate": 0.0001865525949779063,
      "loss": 2.8533,
      "step": 143643
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2056620121002197,
      "learning_rate": 0.0001865488081870822,
      "loss": 2.881,
      "step": 143644
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0996744632720947,
      "learning_rate": 0.00018654502141735079,
      "loss": 2.9424,
      "step": 143645
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0060338973999023,
      "learning_rate": 0.0001865412346687128,
      "loss": 2.818,
      "step": 143646
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1793150901794434,
      "learning_rate": 0.00018653744794116896,
      "loss": 3.175,
      "step": 143647
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0183680057525635,
      "learning_rate": 0.00018653366123472,
      "loss": 2.7817,
      "step": 143648
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.027852773666382,
      "learning_rate": 0.00018652987454936652,
      "loss": 3.1903,
      "step": 143649
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3402676582336426,
      "learning_rate": 0.00018652608788510925,
      "loss": 2.8901,
      "step": 143650
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.8115555047988892,
      "learning_rate": 0.0001865223012419489,
      "loss": 2.7226,
      "step": 143651
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0781960487365723,
      "learning_rate": 0.00018651851461988625,
      "loss": 2.6448,
      "step": 143652
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0045576095581055,
      "learning_rate": 0.00018651472801892193,
      "loss": 3.0115,
      "step": 143653
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.9573094844818115,
      "learning_rate": 0.00018651094143905673,
      "loss": 2.9917,
      "step": 143654
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.011223793029785,
      "learning_rate": 0.00018650715488029127,
      "loss": 2.8134,
      "step": 143655
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7708537578582764,
      "learning_rate": 0.00018650336834262618,
      "loss": 2.9306,
      "step": 143656
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.966025471687317,
      "learning_rate": 0.00018649958182606228,
      "loss": 3.0161,
      "step": 143657
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.576446294784546,
      "learning_rate": 0.0001864957953306003,
      "loss": 3.1487,
      "step": 143658
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.8850557804107666,
      "learning_rate": 0.00018649200885624084,
      "loss": 2.862,
      "step": 143659
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9736114740371704,
      "learning_rate": 0.0001864882224029847,
      "loss": 2.8996,
      "step": 143660
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1305294036865234,
      "learning_rate": 0.00018648443597083262,
      "loss": 2.8617,
      "step": 143661
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6066336631774902,
      "learning_rate": 0.00018648064955978513,
      "loss": 3.0927,
      "step": 143662
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1890709400177,
      "learning_rate": 0.00018647686316984304,
      "loss": 3.0702,
      "step": 143663
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7337608337402344,
      "learning_rate": 0.000186473076801007,
      "loss": 2.9986,
      "step": 143664
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.264880418777466,
      "learning_rate": 0.00018646929045327778,
      "loss": 3.0102,
      "step": 143665
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.143042802810669,
      "learning_rate": 0.00018646550412665608,
      "loss": 3.2684,
      "step": 143666
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5955967903137207,
      "learning_rate": 0.00018646171782114268,
      "loss": 3.0298,
      "step": 143667
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.0051348209381104,
      "learning_rate": 0.00018645793153673813,
      "loss": 2.9892,
      "step": 143668
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4654653072357178,
      "learning_rate": 0.00018645414527344314,
      "loss": 3.0978,
      "step": 143669
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.0029845237731934,
      "learning_rate": 0.00018645035903125848,
      "loss": 2.9927,
      "step": 143670
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2744176387786865,
      "learning_rate": 0.00018644657281018483,
      "loss": 3.2793,
      "step": 143671
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.9884774684906006,
      "learning_rate": 0.00018644278661022294,
      "loss": 3.2665,
      "step": 143672
    },
    {
      "epoch": 1.87,
      "grad_norm": 4.17931604385376,
      "learning_rate": 0.00018643900043137357,
      "loss": 2.8246,
      "step": 143673
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.48026180267334,
      "learning_rate": 0.0001864352142736372,
      "loss": 2.9637,
      "step": 143674
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2862019538879395,
      "learning_rate": 0.00018643142813701467,
      "loss": 2.8064,
      "step": 143675
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3904945850372314,
      "learning_rate": 0.0001864276420215067,
      "loss": 2.8795,
      "step": 143676
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.89294695854187,
      "learning_rate": 0.00018642385592711398,
      "loss": 2.846,
      "step": 143677
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.663398504257202,
      "learning_rate": 0.00018642006985383723,
      "loss": 3.1206,
      "step": 143678
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4683587551116943,
      "learning_rate": 0.0001864162838016772,
      "loss": 2.9716,
      "step": 143679
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.013869524002075,
      "learning_rate": 0.00018641249777063442,
      "loss": 3.0348,
      "step": 143680
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.7774882316589355,
      "learning_rate": 0.00018640871176070972,
      "loss": 2.9815,
      "step": 143681
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1242425441741943,
      "learning_rate": 0.00018640492577190376,
      "loss": 3.006,
      "step": 143682
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.127028226852417,
      "learning_rate": 0.00018640113980421732,
      "loss": 3.0372,
      "step": 143683
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2859671115875244,
      "learning_rate": 0.000186397353857651,
      "loss": 3.011,
      "step": 143684
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6057348251342773,
      "learning_rate": 0.0001863935679322057,
      "loss": 3.0824,
      "step": 143685
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1248557567596436,
      "learning_rate": 0.00018638978202788182,
      "loss": 2.9455,
      "step": 143686
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.786559581756592,
      "learning_rate": 0.0001863859961446803,
      "loss": 3.1784,
      "step": 143687
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.14021635055542,
      "learning_rate": 0.0001863822102826017,
      "loss": 3.0702,
      "step": 143688
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1147301197052,
      "learning_rate": 0.00018637842444164682,
      "loss": 2.9772,
      "step": 143689
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.059419870376587,
      "learning_rate": 0.00018637463862181632,
      "loss": 3.0685,
      "step": 143690
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1628870964050293,
      "learning_rate": 0.00018637085282311103,
      "loss": 2.8878,
      "step": 143691
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3636631965637207,
      "learning_rate": 0.00018636706704553148,
      "loss": 3.1372,
      "step": 143692
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.810410261154175,
      "learning_rate": 0.0001863632812890784,
      "loss": 2.8794,
      "step": 143693
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1709628105163574,
      "learning_rate": 0.00018635949555375253,
      "loss": 2.783,
      "step": 143694
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0626378059387207,
      "learning_rate": 0.00018635570983955457,
      "loss": 3.1605,
      "step": 143695
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.053990364074707,
      "learning_rate": 0.00018635192414648527,
      "loss": 2.9626,
      "step": 143696
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2553229331970215,
      "learning_rate": 0.00018634813847454536,
      "loss": 3.14,
      "step": 143697
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1452951431274414,
      "learning_rate": 0.00018634435282373536,
      "loss": 2.7859,
      "step": 143698
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.090292453765869,
      "learning_rate": 0.0001863405671940561,
      "loss": 2.8351,
      "step": 143699
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6316428184509277,
      "learning_rate": 0.0001863367815855083,
      "loss": 3.0121,
      "step": 143700
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.686370849609375,
      "learning_rate": 0.00018633299599809264,
      "loss": 2.9553,
      "step": 143701
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1114964485168457,
      "learning_rate": 0.00018632921043180978,
      "loss": 3.2247,
      "step": 143702
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1071600914001465,
      "learning_rate": 0.0001863254248866606,
      "loss": 3.0159,
      "step": 143703
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1335136890411377,
      "learning_rate": 0.00018632163936264554,
      "loss": 3.0058,
      "step": 143704
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.518512487411499,
      "learning_rate": 0.00018631785385976544,
      "loss": 3.1046,
      "step": 143705
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5466666221618652,
      "learning_rate": 0.00018631406837802105,
      "loss": 2.7797,
      "step": 143706
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.794101357460022,
      "learning_rate": 0.00018631028291741294,
      "loss": 2.7968,
      "step": 143707
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.5845143795013428,
      "learning_rate": 0.00018630649747794194,
      "loss": 2.9766,
      "step": 143708
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.214259624481201,
      "learning_rate": 0.00018630271205960877,
      "loss": 2.7249,
      "step": 143709
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1611032485961914,
      "learning_rate": 0.00018629892666241404,
      "loss": 3.047,
      "step": 143710
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.6319053173065186,
      "learning_rate": 0.00018629514128635846,
      "loss": 2.9074,
      "step": 143711
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5028700828552246,
      "learning_rate": 0.0001862913559314428,
      "loss": 3.0707,
      "step": 143712
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.808253049850464,
      "learning_rate": 0.0001862875705976676,
      "loss": 2.9013,
      "step": 143713
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.657585620880127,
      "learning_rate": 0.00018628378528503378,
      "loss": 2.9148,
      "step": 143714
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0185277462005615,
      "learning_rate": 0.000186279999993542,
      "loss": 2.8485,
      "step": 143715
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.182366371154785,
      "learning_rate": 0.00018627621472319284,
      "loss": 2.9086,
      "step": 143716
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2591617107391357,
      "learning_rate": 0.0001862724294739871,
      "loss": 2.9389,
      "step": 143717
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.9242804050445557,
      "learning_rate": 0.00018626864424592545,
      "loss": 2.9785,
      "step": 143718
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.8945624828338623,
      "learning_rate": 0.0001862648590390086,
      "loss": 3.2185,
      "step": 143719
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1950409412384033,
      "learning_rate": 0.00018626107385323726,
      "loss": 2.9152,
      "step": 143720
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.0102150440216064,
      "learning_rate": 0.00018625728868861222,
      "loss": 2.7986,
      "step": 143721
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.622622013092041,
      "learning_rate": 0.000186253503545134,
      "loss": 3.0054,
      "step": 143722
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4238691329956055,
      "learning_rate": 0.00018624971842280344,
      "loss": 3.0197,
      "step": 143723
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7589728832244873,
      "learning_rate": 0.00018624593332162122,
      "loss": 2.946,
      "step": 143724
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1288492679595947,
      "learning_rate": 0.00018624214824158793,
      "loss": 2.7942,
      "step": 143725
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3135666847229004,
      "learning_rate": 0.00018623836318270445,
      "loss": 2.9006,
      "step": 143726
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.7664918899536133,
      "learning_rate": 0.00018623457814497137,
      "loss": 2.9639,
      "step": 143727
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.153911590576172,
      "learning_rate": 0.00018623079312838953,
      "loss": 3.0136,
      "step": 143728
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.247340440750122,
      "learning_rate": 0.0001862270081329594,
      "loss": 3.0025,
      "step": 143729
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.681291103363037,
      "learning_rate": 0.0001862232231586819,
      "loss": 3.0641,
      "step": 143730
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.869670033454895,
      "learning_rate": 0.0001862194382055576,
      "loss": 3.2282,
      "step": 143731
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2674567699432373,
      "learning_rate": 0.00018621565327358732,
      "loss": 2.7227,
      "step": 143732
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6390562057495117,
      "learning_rate": 0.00018621186836277162,
      "loss": 3.0713,
      "step": 143733
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1916403770446777,
      "learning_rate": 0.00018620808347311142,
      "loss": 2.8679,
      "step": 143734
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1726512908935547,
      "learning_rate": 0.00018620429860460714,
      "loss": 2.8729,
      "step": 143735
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9705958366394043,
      "learning_rate": 0.00018620051375725966,
      "loss": 2.9374,
      "step": 143736
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1508705615997314,
      "learning_rate": 0.00018619672893106963,
      "loss": 2.829,
      "step": 143737
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.917271614074707,
      "learning_rate": 0.00018619294412603783,
      "loss": 2.6923,
      "step": 143738
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2522830963134766,
      "learning_rate": 0.0001861891593421649,
      "loss": 3.0945,
      "step": 143739
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.139429807662964,
      "learning_rate": 0.0001861853745794516,
      "loss": 2.9407,
      "step": 143740
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.0090291500091553,
      "learning_rate": 0.00018618158983789852,
      "loss": 3.0071,
      "step": 143741
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.118316173553467,
      "learning_rate": 0.00018617780511750645,
      "loss": 3.0721,
      "step": 143742
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.481884717941284,
      "learning_rate": 0.00018617402041827603,
      "loss": 3.097,
      "step": 143743
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.198843002319336,
      "learning_rate": 0.00018617023574020802,
      "loss": 2.8149,
      "step": 143744
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9979400634765625,
      "learning_rate": 0.00018616645108330315,
      "loss": 2.8267,
      "step": 143745
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3306403160095215,
      "learning_rate": 0.00018616266644756215,
      "loss": 2.916,
      "step": 143746
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.4373409748077393,
      "learning_rate": 0.00018615888183298558,
      "loss": 3.0831,
      "step": 143747
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.635845899581909,
      "learning_rate": 0.00018615509723957422,
      "loss": 3.1486,
      "step": 143748
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5907742977142334,
      "learning_rate": 0.00018615131266732876,
      "loss": 2.8384,
      "step": 143749
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5450689792633057,
      "learning_rate": 0.0001861475281162499,
      "loss": 3.0018,
      "step": 143750
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.410000801086426,
      "learning_rate": 0.0001861437435863384,
      "loss": 3.0151,
      "step": 143751
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.328169584274292,
      "learning_rate": 0.00018613995907759508,
      "loss": 2.8188,
      "step": 143752
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.939995527267456,
      "learning_rate": 0.00018613617459002028,
      "loss": 2.984,
      "step": 143753
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.183948516845703,
      "learning_rate": 0.00018613239012361496,
      "loss": 2.8534,
      "step": 143754
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.76674747467041,
      "learning_rate": 0.0001861286056783798,
      "loss": 3.111,
      "step": 143755
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.796379327774048,
      "learning_rate": 0.00018612482125431544,
      "loss": 2.6852,
      "step": 143756
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.147231340408325,
      "learning_rate": 0.00018612103685142266,
      "loss": 2.8821,
      "step": 143757
    },
    {
      "epoch": 1.87,
      "grad_norm": 4.080848693847656,
      "learning_rate": 0.00018611725246970226,
      "loss": 2.8163,
      "step": 143758
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.233734607696533,
      "learning_rate": 0.00018611346810915467,
      "loss": 2.9106,
      "step": 143759
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.8514372110366821,
      "learning_rate": 0.00018610968376978074,
      "loss": 2.7062,
      "step": 143760
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5974276065826416,
      "learning_rate": 0.00018610589945158116,
      "loss": 3.1159,
      "step": 143761
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.541412353515625,
      "learning_rate": 0.00018610211515455665,
      "loss": 2.9782,
      "step": 143762
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.9782156944274902,
      "learning_rate": 0.0001860983308787079,
      "loss": 2.9057,
      "step": 143763
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.806957483291626,
      "learning_rate": 0.00018609454662403573,
      "loss": 2.9626,
      "step": 143764
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.100994825363159,
      "learning_rate": 0.00018609076239054065,
      "loss": 2.9931,
      "step": 143765
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.1106879711151123,
      "learning_rate": 0.0001860869781782234,
      "loss": 2.9513,
      "step": 143766
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6682231426239014,
      "learning_rate": 0.00018608319398708475,
      "loss": 2.8723,
      "step": 143767
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.977147102355957,
      "learning_rate": 0.00018607940981712543,
      "loss": 2.8695,
      "step": 143768
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.16994571685791,
      "learning_rate": 0.00018607562566834603,
      "loss": 2.8875,
      "step": 143769
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.091686248779297,
      "learning_rate": 0.0001860718415407475,
      "loss": 3.1309,
      "step": 143770
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0463640689849854,
      "learning_rate": 0.00018606805743433022,
      "loss": 2.9187,
      "step": 143771
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1153273582458496,
      "learning_rate": 0.00018606427334909504,
      "loss": 3.0706,
      "step": 143772
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1105434894561768,
      "learning_rate": 0.00018606048928504262,
      "loss": 3.1747,
      "step": 143773
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.290266513824463,
      "learning_rate": 0.00018605670524217378,
      "loss": 3.1387,
      "step": 143774
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2157552242279053,
      "learning_rate": 0.0001860529212204891,
      "loss": 3.0464,
      "step": 143775
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6795668601989746,
      "learning_rate": 0.00018604913721998947,
      "loss": 2.8085,
      "step": 143776
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3797409534454346,
      "learning_rate": 0.00018604535324067534,
      "loss": 2.9374,
      "step": 143777
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9701834917068481,
      "learning_rate": 0.00018604156928254752,
      "loss": 3.1536,
      "step": 143778
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1777279376983643,
      "learning_rate": 0.00018603778534560675,
      "loss": 2.8709,
      "step": 143779
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.304164171218872,
      "learning_rate": 0.00018603400142985367,
      "loss": 3.0731,
      "step": 143780
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.277660846710205,
      "learning_rate": 0.000186030217535289,
      "loss": 2.9518,
      "step": 143781
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.9773669242858887,
      "learning_rate": 0.00018602643366191364,
      "loss": 3.17,
      "step": 143782
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.466160535812378,
      "learning_rate": 0.000186022649809728,
      "loss": 2.8515,
      "step": 143783
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2681353092193604,
      "learning_rate": 0.00018601886597873285,
      "loss": 2.8792,
      "step": 143784
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.668346643447876,
      "learning_rate": 0.00018601508216892898,
      "loss": 2.9219,
      "step": 143785
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.703216552734375,
      "learning_rate": 0.00018601129838031701,
      "loss": 2.8574,
      "step": 143786
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.177877187728882,
      "learning_rate": 0.00018600751461289776,
      "loss": 3.0973,
      "step": 143787
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3056321144104004,
      "learning_rate": 0.00018600373086667194,
      "loss": 2.8361,
      "step": 143788
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5927321910858154,
      "learning_rate": 0.00018599994714164005,
      "loss": 3.0017,
      "step": 143789
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.1941652297973633,
      "learning_rate": 0.000185996163437803,
      "loss": 3.0025,
      "step": 143790
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.667541742324829,
      "learning_rate": 0.00018599237975516135,
      "loss": 2.9832,
      "step": 143791
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3677356243133545,
      "learning_rate": 0.00018598859609371586,
      "loss": 2.7845,
      "step": 143792
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9243817329406738,
      "learning_rate": 0.00018598481245346728,
      "loss": 3.0549,
      "step": 143793
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6127705574035645,
      "learning_rate": 0.0001859810288344162,
      "loss": 2.9888,
      "step": 143794
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.096104860305786,
      "learning_rate": 0.0001859772452365636,
      "loss": 2.9875,
      "step": 143795
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5219175815582275,
      "learning_rate": 0.00018597346165990988,
      "loss": 3.2515,
      "step": 143796
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.8775064945220947,
      "learning_rate": 0.00018596967810445577,
      "loss": 2.7902,
      "step": 143797
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9588311910629272,
      "learning_rate": 0.0001859658945702021,
      "loss": 3.1587,
      "step": 143798
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6355783939361572,
      "learning_rate": 0.0001859621110571495,
      "loss": 2.9409,
      "step": 143799
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4283812046051025,
      "learning_rate": 0.00018595832756529868,
      "loss": 2.8311,
      "step": 143800
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2989702224731445,
      "learning_rate": 0.00018595454409465044,
      "loss": 2.7171,
      "step": 143801
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.282407522201538,
      "learning_rate": 0.0001859507606452054,
      "loss": 3.0772,
      "step": 143802
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.4584240913391113,
      "learning_rate": 0.0001859469772169642,
      "loss": 2.9643,
      "step": 143803
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.484480619430542,
      "learning_rate": 0.00018594319380992765,
      "loss": 2.6033,
      "step": 143804
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.505086660385132,
      "learning_rate": 0.00018593941042409638,
      "loss": 3.1783,
      "step": 143805
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2613234519958496,
      "learning_rate": 0.00018593562705947113,
      "loss": 2.8811,
      "step": 143806
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.609849452972412,
      "learning_rate": 0.00018593184371605266,
      "loss": 3.1606,
      "step": 143807
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.00311017036438,
      "learning_rate": 0.00018592806039384154,
      "loss": 3.1127,
      "step": 143808
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.8119093179702759,
      "learning_rate": 0.00018592427709283865,
      "loss": 2.9677,
      "step": 143809
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5386581420898438,
      "learning_rate": 0.00018592049381304447,
      "loss": 3.0113,
      "step": 143810
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.696315288543701,
      "learning_rate": 0.00018591671055445986,
      "loss": 3.0427,
      "step": 143811
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6214888095855713,
      "learning_rate": 0.0001859129273170855,
      "loss": 2.9114,
      "step": 143812
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0154662132263184,
      "learning_rate": 0.0001859091441009221,
      "loss": 3.0383,
      "step": 143813
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4397644996643066,
      "learning_rate": 0.0001859053609059703,
      "loss": 2.9466,
      "step": 143814
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0947611331939697,
      "learning_rate": 0.00018590157773223088,
      "loss": 2.8418,
      "step": 143815
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1205642223358154,
      "learning_rate": 0.00018589779457970455,
      "loss": 2.9428,
      "step": 143816
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2463603019714355,
      "learning_rate": 0.00018589401144839188,
      "loss": 3.0689,
      "step": 143817
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2035062313079834,
      "learning_rate": 0.0001858902283382937,
      "loss": 2.9033,
      "step": 143818
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.657709836959839,
      "learning_rate": 0.00018588644524941073,
      "loss": 3.1953,
      "step": 143819
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.297243595123291,
      "learning_rate": 0.00018588266218174358,
      "loss": 2.9949,
      "step": 143820
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5087389945983887,
      "learning_rate": 0.000185878879135293,
      "loss": 2.9039,
      "step": 143821
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3874025344848633,
      "learning_rate": 0.00018587509611005964,
      "loss": 3.231,
      "step": 143822
    },
    {
      "epoch": 1.87,
      "grad_norm": 4.406549453735352,
      "learning_rate": 0.00018587131310604437,
      "loss": 2.8763,
      "step": 143823
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.71246075630188,
      "learning_rate": 0.00018586753012324772,
      "loss": 2.7804,
      "step": 143824
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.188520908355713,
      "learning_rate": 0.00018586374716167046,
      "loss": 2.9459,
      "step": 143825
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0690956115722656,
      "learning_rate": 0.00018585996422131325,
      "loss": 3.0782,
      "step": 143826
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.8644704818725586,
      "learning_rate": 0.00018585618130217683,
      "loss": 3.3465,
      "step": 143827
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.156346321105957,
      "learning_rate": 0.0001858523984042619,
      "loss": 2.874,
      "step": 143828
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.169783592224121,
      "learning_rate": 0.00018584861552756917,
      "loss": 2.9458,
      "step": 143829
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1249918937683105,
      "learning_rate": 0.00018584483267209933,
      "loss": 3.1766,
      "step": 143830
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9873284101486206,
      "learning_rate": 0.00018584104983785323,
      "loss": 3.1099,
      "step": 143831
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0541343688964844,
      "learning_rate": 0.00018583726702483126,
      "loss": 2.7704,
      "step": 143832
    },
    {
      "epoch": 1.87,
      "grad_norm": 4.687586307525635,
      "learning_rate": 0.00018583348423303433,
      "loss": 3.1851,
      "step": 143833
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7167022228240967,
      "learning_rate": 0.00018582970146246313,
      "loss": 3.0744,
      "step": 143834
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1409263610839844,
      "learning_rate": 0.0001858259187131183,
      "loss": 3.0713,
      "step": 143835
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.8107590675354004,
      "learning_rate": 0.00018582213598500064,
      "loss": 2.8889,
      "step": 143836
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.8375349044799805,
      "learning_rate": 0.0001858183532781109,
      "loss": 2.9144,
      "step": 143837
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2129948139190674,
      "learning_rate": 0.00018581457059244956,
      "loss": 2.9815,
      "step": 143838
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3821778297424316,
      "learning_rate": 0.00018581078792801744,
      "loss": 2.9821,
      "step": 143839
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.363049268722534,
      "learning_rate": 0.00018580700528481527,
      "loss": 3.0575,
      "step": 143840
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.172578811645508,
      "learning_rate": 0.00018580322266284368,
      "loss": 2.8076,
      "step": 143841
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.342798948287964,
      "learning_rate": 0.00018579944006210346,
      "loss": 2.98,
      "step": 143842
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.967907190322876,
      "learning_rate": 0.00018579565748259543,
      "loss": 2.8569,
      "step": 143843
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.8438329696655273,
      "learning_rate": 0.00018579187492431996,
      "loss": 3.089,
      "step": 143844
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1828508377075195,
      "learning_rate": 0.000185788092387278,
      "loss": 2.9606,
      "step": 143845
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.8909777402877808,
      "learning_rate": 0.00018578430987147015,
      "loss": 2.9644,
      "step": 143846
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.504347324371338,
      "learning_rate": 0.0001857805273768972,
      "loss": 3.117,
      "step": 143847
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2210962772369385,
      "learning_rate": 0.00018577674490355975,
      "loss": 2.9027,
      "step": 143848
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.0421390533447266,
      "learning_rate": 0.0001857729624514587,
      "loss": 3.0277,
      "step": 143849
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1404519081115723,
      "learning_rate": 0.00018576918002059448,
      "loss": 3.1235,
      "step": 143850
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.412729263305664,
      "learning_rate": 0.00018576539761096795,
      "loss": 2.8905,
      "step": 143851
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9683030843734741,
      "learning_rate": 0.00018576161522257978,
      "loss": 3.0933,
      "step": 143852
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.04644513130188,
      "learning_rate": 0.00018575783285543063,
      "loss": 3.1818,
      "step": 143853
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1539573669433594,
      "learning_rate": 0.00018575405050952134,
      "loss": 2.9082,
      "step": 143854
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.169050931930542,
      "learning_rate": 0.00018575026818485262,
      "loss": 3.0775,
      "step": 143855
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.002265691757202,
      "learning_rate": 0.00018574648588142494,
      "loss": 3.0673,
      "step": 143856
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3945367336273193,
      "learning_rate": 0.0001857427035992392,
      "loss": 2.8648,
      "step": 143857
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1003332138061523,
      "learning_rate": 0.00018573892133829596,
      "loss": 2.9575,
      "step": 143858
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2652134895324707,
      "learning_rate": 0.00018573513909859605,
      "loss": 3.035,
      "step": 143859
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.337548017501831,
      "learning_rate": 0.00018573135688014015,
      "loss": 2.8622,
      "step": 143860
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.617424249649048,
      "learning_rate": 0.00018572757468292894,
      "loss": 2.7469,
      "step": 143861
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.4113621711730957,
      "learning_rate": 0.00018572379250696326,
      "loss": 2.9379,
      "step": 143862
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.207972764968872,
      "learning_rate": 0.0001857200103522436,
      "loss": 2.9834,
      "step": 143863
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.898205280303955,
      "learning_rate": 0.00018571622821877067,
      "loss": 3.2313,
      "step": 143864
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.9416325092315674,
      "learning_rate": 0.0001857124461065453,
      "loss": 3.092,
      "step": 143865
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.0193698406219482,
      "learning_rate": 0.0001857086640155681,
      "loss": 3.0538,
      "step": 143866
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9331084489822388,
      "learning_rate": 0.00018570488194583987,
      "loss": 3.0626,
      "step": 143867
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5001769065856934,
      "learning_rate": 0.00018570109989736138,
      "loss": 2.9947,
      "step": 143868
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.122602939605713,
      "learning_rate": 0.00018569731787013304,
      "loss": 3.104,
      "step": 143869
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.7576590776443481,
      "learning_rate": 0.00018569353586415574,
      "loss": 2.8844,
      "step": 143870
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6749420166015625,
      "learning_rate": 0.00018568975387943019,
      "loss": 3.139,
      "step": 143871
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1090617179870605,
      "learning_rate": 0.00018568597191595705,
      "loss": 3.0466,
      "step": 143872
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4999959468841553,
      "learning_rate": 0.00018568218997373707,
      "loss": 3.0132,
      "step": 143873
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.542314291000366,
      "learning_rate": 0.0001856784080527711,
      "loss": 3.023,
      "step": 143874
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.276677131652832,
      "learning_rate": 0.00018567462615305945,
      "loss": 3.141,
      "step": 143875
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4902873039245605,
      "learning_rate": 0.00018567084427460309,
      "loss": 2.8169,
      "step": 143876
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9892092943191528,
      "learning_rate": 0.0001856670624174027,
      "loss": 2.9448,
      "step": 143877
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.490736961364746,
      "learning_rate": 0.00018566328058145892,
      "loss": 2.9392,
      "step": 143878
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9422204494476318,
      "learning_rate": 0.0001856594987667725,
      "loss": 2.8704,
      "step": 143879
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4034066200256348,
      "learning_rate": 0.00018565571697334426,
      "loss": 2.8585,
      "step": 143880
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.032351493835449,
      "learning_rate": 0.0001856519352011747,
      "loss": 3.1076,
      "step": 143881
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.837425708770752,
      "learning_rate": 0.00018564815345026455,
      "loss": 3.2345,
      "step": 143882
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5695037841796875,
      "learning_rate": 0.00018564437172061458,
      "loss": 2.7965,
      "step": 143883
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4165947437286377,
      "learning_rate": 0.0001856405900122255,
      "loss": 2.9724,
      "step": 143884
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.938471555709839,
      "learning_rate": 0.00018563680832509797,
      "loss": 3.0116,
      "step": 143885
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.8368723392486572,
      "learning_rate": 0.00018563302665923286,
      "loss": 3.0874,
      "step": 143886
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7113306522369385,
      "learning_rate": 0.0001856292450146306,
      "loss": 3.0721,
      "step": 143887
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2861874103546143,
      "learning_rate": 0.00018562546339129203,
      "loss": 2.5813,
      "step": 143888
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0639896392822266,
      "learning_rate": 0.00018562168178921782,
      "loss": 2.9578,
      "step": 143889
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1496105194091797,
      "learning_rate": 0.00018561790020840872,
      "loss": 3.0459,
      "step": 143890
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.365731716156006,
      "learning_rate": 0.0001856141186488654,
      "loss": 3.24,
      "step": 143891
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.8251049518585205,
      "learning_rate": 0.00018561033711058864,
      "loss": 2.9902,
      "step": 143892
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4402105808258057,
      "learning_rate": 0.00018560655559357904,
      "loss": 3.1469,
      "step": 143893
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4074723720550537,
      "learning_rate": 0.00018560277409783736,
      "loss": 3.2558,
      "step": 143894
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.11871600151062,
      "learning_rate": 0.0001855989926233642,
      "loss": 3.1231,
      "step": 143895
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.368910074234009,
      "learning_rate": 0.00018559521117016045,
      "loss": 2.9664,
      "step": 143896
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.2142629623413086,
      "learning_rate": 0.00018559142973822662,
      "loss": 2.9339,
      "step": 143897
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.463144063949585,
      "learning_rate": 0.00018558764832756362,
      "loss": 2.8773,
      "step": 143898
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3451733589172363,
      "learning_rate": 0.00018558386693817193,
      "loss": 2.9268,
      "step": 143899
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.7892091274261475,
      "learning_rate": 0.00018558008557005237,
      "loss": 3.1494,
      "step": 143900
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.756047487258911,
      "learning_rate": 0.0001855763042232057,
      "loss": 3.089,
      "step": 143901
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2674176692962646,
      "learning_rate": 0.0001855725228976325,
      "loss": 2.9983,
      "step": 143902
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.609170436859131,
      "learning_rate": 0.00018556874159333353,
      "loss": 3.1109,
      "step": 143903
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.994666576385498,
      "learning_rate": 0.00018556496031030953,
      "loss": 3.2619,
      "step": 143904
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.0750858783721924,
      "learning_rate": 0.00018556117904856113,
      "loss": 2.9926,
      "step": 143905
    },
    {
      "epoch": 1.87,
      "grad_norm": 4.02899169921875,
      "learning_rate": 0.00018555739780808905,
      "loss": 3.0474,
      "step": 143906
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4694581031799316,
      "learning_rate": 0.000185553616588894,
      "loss": 3.1244,
      "step": 143907
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0632779598236084,
      "learning_rate": 0.0001855498353909768,
      "loss": 2.7812,
      "step": 143908
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9645614624023438,
      "learning_rate": 0.00018554605421433794,
      "loss": 2.9088,
      "step": 143909
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.711240530014038,
      "learning_rate": 0.00018554227305897828,
      "loss": 3.2773,
      "step": 143910
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.2065773010253906,
      "learning_rate": 0.00018553849192489842,
      "loss": 3.0534,
      "step": 143911
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.185918092727661,
      "learning_rate": 0.00018553471081209915,
      "loss": 2.9117,
      "step": 143912
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.13297176361084,
      "learning_rate": 0.00018553092972058111,
      "loss": 2.9686,
      "step": 143913
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.4705848693847656,
      "learning_rate": 0.00018552714865034502,
      "loss": 3.0726,
      "step": 143914
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.2771377563476562,
      "learning_rate": 0.00018552336760139167,
      "loss": 3.1233,
      "step": 143915
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.024906635284424,
      "learning_rate": 0.00018551958657372173,
      "loss": 2.8868,
      "step": 143916
    },
    {
      "epoch": 1.87,
      "grad_norm": 4.533663272857666,
      "learning_rate": 0.00018551580556733574,
      "loss": 3.128,
      "step": 143917
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.6303226947784424,
      "learning_rate": 0.00018551202458223453,
      "loss": 3.0508,
      "step": 143918
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.177690029144287,
      "learning_rate": 0.00018550824361841884,
      "loss": 3.0122,
      "step": 143919
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.963853120803833,
      "learning_rate": 0.00018550446267588926,
      "loss": 2.8808,
      "step": 143920
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.7039341926574707,
      "learning_rate": 0.00018550068175464662,
      "loss": 3.1047,
      "step": 143921
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.261867046356201,
      "learning_rate": 0.00018549690085469163,
      "loss": 3.1349,
      "step": 143922
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.564141273498535,
      "learning_rate": 0.00018549311997602486,
      "loss": 2.7794,
      "step": 143923
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.668689250946045,
      "learning_rate": 0.00018548933911864704,
      "loss": 2.8326,
      "step": 143924
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.611860990524292,
      "learning_rate": 0.00018548555828255895,
      "loss": 3.1909,
      "step": 143925
    },
    {
      "epoch": 1.87,
      "grad_norm": 5.728092670440674,
      "learning_rate": 0.00018548177746776124,
      "loss": 3.0054,
      "step": 143926
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4202747344970703,
      "learning_rate": 0.0001854779966742546,
      "loss": 2.7927,
      "step": 143927
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0546910762786865,
      "learning_rate": 0.00018547421590203982,
      "loss": 3.0441,
      "step": 143928
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5371549129486084,
      "learning_rate": 0.0001854704351511176,
      "loss": 2.7853,
      "step": 143929
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.007582187652588,
      "learning_rate": 0.0001854666544214885,
      "loss": 3.0991,
      "step": 143930
    },
    {
      "epoch": 1.87,
      "grad_norm": 4.985220432281494,
      "learning_rate": 0.00018546287371315335,
      "loss": 2.761,
      "step": 143931
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9765520095825195,
      "learning_rate": 0.0001854590930261128,
      "loss": 2.9785,
      "step": 143932
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.004120349884033,
      "learning_rate": 0.0001854553123603675,
      "loss": 3.0675,
      "step": 143933
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3738062381744385,
      "learning_rate": 0.00018545153171591828,
      "loss": 2.9161,
      "step": 143934
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5707294940948486,
      "learning_rate": 0.0001854477510927659,
      "loss": 2.9576,
      "step": 143935
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.683192729949951,
      "learning_rate": 0.00018544397049091079,
      "loss": 2.6659,
      "step": 143936
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.270564317703247,
      "learning_rate": 0.00018544018991035384,
      "loss": 2.9712,
      "step": 143937
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.719843626022339,
      "learning_rate": 0.00018543640935109575,
      "loss": 2.9066,
      "step": 143938
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.142469644546509,
      "learning_rate": 0.0001854326288131371,
      "loss": 3.1149,
      "step": 143939
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.090542793273926,
      "learning_rate": 0.00018542884829647876,
      "loss": 2.8776,
      "step": 143940
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9212267398834229,
      "learning_rate": 0.0001854250678011215,
      "loss": 2.7553,
      "step": 143941
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1976139545440674,
      "learning_rate": 0.0001854212873270657,
      "loss": 2.8137,
      "step": 143942
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.661367893218994,
      "learning_rate": 0.00018541750687431225,
      "loss": 2.7495,
      "step": 143943
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.389478921890259,
      "learning_rate": 0.0001854137264428619,
      "loss": 3.1537,
      "step": 143944
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.276909828186035,
      "learning_rate": 0.00018540994603271523,
      "loss": 2.9429,
      "step": 143945
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.145871877670288,
      "learning_rate": 0.00018540616564387307,
      "loss": 2.9024,
      "step": 143946
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.8549189567565918,
      "learning_rate": 0.0001854023852763362,
      "loss": 2.7007,
      "step": 143947
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.037956714630127,
      "learning_rate": 0.000185398604930105,
      "loss": 2.6791,
      "step": 143948
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.339367151260376,
      "learning_rate": 0.00018539482460518044,
      "loss": 3.0079,
      "step": 143949
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9838833808898926,
      "learning_rate": 0.0001853910443015631,
      "loss": 2.99,
      "step": 143950
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.8642598390579224,
      "learning_rate": 0.00018538726401925372,
      "loss": 2.991,
      "step": 143951
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.291050910949707,
      "learning_rate": 0.00018538348375825303,
      "loss": 2.7543,
      "step": 143952
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1207525730133057,
      "learning_rate": 0.00018537970351856185,
      "loss": 2.8053,
      "step": 143953
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.061058759689331,
      "learning_rate": 0.00018537592330018063,
      "loss": 2.9583,
      "step": 143954
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.075127124786377,
      "learning_rate": 0.00018537214310311017,
      "loss": 3.1308,
      "step": 143955
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.300816535949707,
      "learning_rate": 0.00018536836292735117,
      "loss": 2.8455,
      "step": 143956
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.312434196472168,
      "learning_rate": 0.00018536458277290438,
      "loss": 3.1947,
      "step": 143957
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9892098903656006,
      "learning_rate": 0.00018536080263977046,
      "loss": 2.793,
      "step": 143958
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7410197257995605,
      "learning_rate": 0.0001853570225279503,
      "loss": 3.3797,
      "step": 143959
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.166416645050049,
      "learning_rate": 0.00018535324243744425,
      "loss": 3.0707,
      "step": 143960
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0128605365753174,
      "learning_rate": 0.00018534946236825324,
      "loss": 2.9496,
      "step": 143961
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.0255722999572754,
      "learning_rate": 0.00018534568232037791,
      "loss": 3.0,
      "step": 143962
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0760390758514404,
      "learning_rate": 0.00018534190229381898,
      "loss": 3.0229,
      "step": 143963
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.124772548675537,
      "learning_rate": 0.00018533812228857717,
      "loss": 2.5864,
      "step": 143964
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1632285118103027,
      "learning_rate": 0.00018533434230465326,
      "loss": 2.8858,
      "step": 143965
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.050013542175293,
      "learning_rate": 0.00018533056234204778,
      "loss": 3.1232,
      "step": 143966
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.1197993755340576,
      "learning_rate": 0.0001853267824007615,
      "loss": 2.9837,
      "step": 143967
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.80665922164917,
      "learning_rate": 0.00018532300248079512,
      "loss": 3.0249,
      "step": 143968
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.0101256370544434,
      "learning_rate": 0.00018531922258214936,
      "loss": 2.9805,
      "step": 143969
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1833670139312744,
      "learning_rate": 0.00018531544270482492,
      "loss": 2.8289,
      "step": 143970
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.4091172218322754,
      "learning_rate": 0.00018531166284882267,
      "loss": 2.9555,
      "step": 143971
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9338657855987549,
      "learning_rate": 0.00018530788301414293,
      "loss": 3.2351,
      "step": 143972
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.818472146987915,
      "learning_rate": 0.00018530410320078672,
      "loss": 3.095,
      "step": 143973
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1812198162078857,
      "learning_rate": 0.00018530032340875462,
      "loss": 2.6042,
      "step": 143974
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.169302225112915,
      "learning_rate": 0.0001852965436380473,
      "loss": 2.8798,
      "step": 143975
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0991930961608887,
      "learning_rate": 0.00018529276388866555,
      "loss": 2.7443,
      "step": 143976
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.8697190284729004,
      "learning_rate": 0.00018528898416061008,
      "loss": 2.7831,
      "step": 143977
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.241205930709839,
      "learning_rate": 0.00018528520445388163,
      "loss": 3.0371,
      "step": 143978
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3830552101135254,
      "learning_rate": 0.0001852814247684807,
      "loss": 2.9247,
      "step": 143979
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.0607857704162598,
      "learning_rate": 0.0001852776451044081,
      "loss": 2.9872,
      "step": 143980
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.020235776901245,
      "learning_rate": 0.0001852738654616646,
      "loss": 3.0275,
      "step": 143981
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3882837295532227,
      "learning_rate": 0.00018527008584025083,
      "loss": 3.0133,
      "step": 143982
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.307446241378784,
      "learning_rate": 0.0001852663062401676,
      "loss": 2.8593,
      "step": 143983
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.155363082885742,
      "learning_rate": 0.00018526252666141546,
      "loss": 2.9687,
      "step": 143984
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2943241596221924,
      "learning_rate": 0.00018525874710399523,
      "loss": 2.9427,
      "step": 143985
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9535272121429443,
      "learning_rate": 0.0001852549675679075,
      "loss": 2.9313,
      "step": 143986
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.5154011249542236,
      "learning_rate": 0.00018525118805315307,
      "loss": 2.7046,
      "step": 143987
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.162662982940674,
      "learning_rate": 0.00018524740855973255,
      "loss": 2.7962,
      "step": 143988
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1771628856658936,
      "learning_rate": 0.00018524362908764683,
      "loss": 2.9564,
      "step": 143989
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.51033353805542,
      "learning_rate": 0.0001852398496368964,
      "loss": 2.8206,
      "step": 143990
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.362565279006958,
      "learning_rate": 0.00018523607020748203,
      "loss": 2.8383,
      "step": 143991
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.1967365741729736,
      "learning_rate": 0.00018523229079940446,
      "loss": 3.3559,
      "step": 143992
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.086911201477051,
      "learning_rate": 0.00018522851141266445,
      "loss": 2.8884,
      "step": 143993
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.3799283504486084,
      "learning_rate": 0.0001852247320472625,
      "loss": 2.8553,
      "step": 143994
    },
    {
      "epoch": 1.87,
      "grad_norm": 1.9709830284118652,
      "learning_rate": 0.0001852209527031995,
      "loss": 3.0549,
      "step": 143995
    },
    {
      "epoch": 1.87,
      "grad_norm": 3.028005599975586,
      "learning_rate": 0.00018521717338047613,
      "loss": 3.0066,
      "step": 143996
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.7743451595306396,
      "learning_rate": 0.00018521339407909302,
      "loss": 3.0569,
      "step": 143997
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.810140371322632,
      "learning_rate": 0.00018520961479905087,
      "loss": 2.5851,
      "step": 143998
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.309818744659424,
      "learning_rate": 0.00018520583554035045,
      "loss": 2.9204,
      "step": 143999
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.882216691970825,
      "learning_rate": 0.00018520205630299244,
      "loss": 3.3838,
      "step": 144000
    },
    {
      "epoch": 1.87,
      "grad_norm": 2.2108347415924072,
      "learning_rate": 0.00018519827708697752,
      "loss": 2.7147,
      "step": 144001
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1805598735809326,
      "learning_rate": 0.00018519449789230645,
      "loss": 3.0938,
      "step": 144002
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3441970348358154,
      "learning_rate": 0.00018519071871897985,
      "loss": 2.8394,
      "step": 144003
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6486010551452637,
      "learning_rate": 0.00018518693956699848,
      "loss": 2.7472,
      "step": 144004
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.376183032989502,
      "learning_rate": 0.00018518316043636299,
      "loss": 3.0057,
      "step": 144005
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2946360111236572,
      "learning_rate": 0.0001851793813270741,
      "loss": 3.2179,
      "step": 144006
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2791402339935303,
      "learning_rate": 0.00018517560223913263,
      "loss": 2.8068,
      "step": 144007
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.312988758087158,
      "learning_rate": 0.00018517182317253917,
      "loss": 3.1009,
      "step": 144008
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1067163944244385,
      "learning_rate": 0.0001851680441272944,
      "loss": 2.7282,
      "step": 144009
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5016560554504395,
      "learning_rate": 0.000185164265103399,
      "loss": 3.0531,
      "step": 144010
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.5591249465942383,
      "learning_rate": 0.00018516048610085374,
      "loss": 3.0813,
      "step": 144011
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.103255033493042,
      "learning_rate": 0.00018515670711965936,
      "loss": 2.9133,
      "step": 144012
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3279032707214355,
      "learning_rate": 0.0001851529281598165,
      "loss": 2.8469,
      "step": 144013
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3996965885162354,
      "learning_rate": 0.000185149149221326,
      "loss": 3.0615,
      "step": 144014
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.593977928161621,
      "learning_rate": 0.00018514537030418833,
      "loss": 2.906,
      "step": 144015
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9394253492355347,
      "learning_rate": 0.00018514159140840423,
      "loss": 2.9355,
      "step": 144016
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2810940742492676,
      "learning_rate": 0.00018513781253397455,
      "loss": 2.7428,
      "step": 144017
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8201701641082764,
      "learning_rate": 0.0001851340336808999,
      "loss": 2.8843,
      "step": 144018
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.574948310852051,
      "learning_rate": 0.000185130254849181,
      "loss": 3.301,
      "step": 144019
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.6663174629211426,
      "learning_rate": 0.0001851264760388187,
      "loss": 2.6711,
      "step": 144020
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1947181224823,
      "learning_rate": 0.0001851226972498134,
      "loss": 3.0483,
      "step": 144021
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.331054449081421,
      "learning_rate": 0.00018511891848216596,
      "loss": 3.0329,
      "step": 144022
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.2582149505615234,
      "learning_rate": 0.00018511513973587708,
      "loss": 2.9476,
      "step": 144023
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.22206974029541,
      "learning_rate": 0.00018511136101094752,
      "loss": 2.7248,
      "step": 144024
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9608250856399536,
      "learning_rate": 0.00018510758230737788,
      "loss": 2.7917,
      "step": 144025
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0300800800323486,
      "learning_rate": 0.000185103803625169,
      "loss": 2.9041,
      "step": 144026
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.352062463760376,
      "learning_rate": 0.00018510002496432144,
      "loss": 3.2181,
      "step": 144027
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2359182834625244,
      "learning_rate": 0.00018509624632483587,
      "loss": 2.8851,
      "step": 144028
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.980682373046875,
      "learning_rate": 0.00018509246770671316,
      "loss": 2.9214,
      "step": 144029
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4280881881713867,
      "learning_rate": 0.00018508868910995391,
      "loss": 3.0887,
      "step": 144030
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.8185458183288574,
      "learning_rate": 0.00018508491053455882,
      "loss": 3.1536,
      "step": 144031
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.145354986190796,
      "learning_rate": 0.00018508113198052877,
      "loss": 2.7071,
      "step": 144032
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8866474628448486,
      "learning_rate": 0.00018507735344786415,
      "loss": 3.0177,
      "step": 144033
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.550525188446045,
      "learning_rate": 0.00018507357493656584,
      "loss": 2.9872,
      "step": 144034
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.94960880279541,
      "learning_rate": 0.0001850697964466345,
      "loss": 2.8657,
      "step": 144035
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0536763668060303,
      "learning_rate": 0.00018506601797807086,
      "loss": 3.1025,
      "step": 144036
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.499286413192749,
      "learning_rate": 0.0001850622395308756,
      "loss": 2.806,
      "step": 144037
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.15344500541687,
      "learning_rate": 0.00018505846110504959,
      "loss": 2.9828,
      "step": 144038
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9563850164413452,
      "learning_rate": 0.00018505468270059325,
      "loss": 3.0844,
      "step": 144039
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.271023988723755,
      "learning_rate": 0.0001850509043175074,
      "loss": 2.9945,
      "step": 144040
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8490452766418457,
      "learning_rate": 0.00018504712595579276,
      "loss": 3.1431,
      "step": 144041
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.1985654830932617,
      "learning_rate": 0.00018504334761545007,
      "loss": 2.9675,
      "step": 144042
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4490015506744385,
      "learning_rate": 0.00018503956929647993,
      "loss": 2.8222,
      "step": 144043
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.544084310531616,
      "learning_rate": 0.0001850357909988833,
      "loss": 2.9639,
      "step": 144044
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.2991178035736084,
      "learning_rate": 0.00018503201272266053,
      "loss": 2.8606,
      "step": 144045
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.69998836517334,
      "learning_rate": 0.00018502823446781247,
      "loss": 3.0302,
      "step": 144046
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.523221492767334,
      "learning_rate": 0.00018502445623433983,
      "loss": 2.728,
      "step": 144047
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.251260757446289,
      "learning_rate": 0.00018502067802224332,
      "loss": 2.8105,
      "step": 144048
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.972029209136963,
      "learning_rate": 0.00018501689983152362,
      "loss": 2.8251,
      "step": 144049
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8725674152374268,
      "learning_rate": 0.00018501312166218162,
      "loss": 3.0065,
      "step": 144050
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9749914407730103,
      "learning_rate": 0.00018500934351421764,
      "loss": 2.9863,
      "step": 144051
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1904067993164062,
      "learning_rate": 0.00018500556538763268,
      "loss": 3.0352,
      "step": 144052
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.5015108585357666,
      "learning_rate": 0.0001850017872824273,
      "loss": 2.8369,
      "step": 144053
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2836387157440186,
      "learning_rate": 0.0001849980091986023,
      "loss": 3.018,
      "step": 144054
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.908832311630249,
      "learning_rate": 0.0001849942311361583,
      "loss": 2.8887,
      "step": 144055
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9026652574539185,
      "learning_rate": 0.00018499045309509625,
      "loss": 3.0606,
      "step": 144056
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.0688629150390625,
      "learning_rate": 0.00018498667507541643,
      "loss": 2.8601,
      "step": 144057
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8324801921844482,
      "learning_rate": 0.0001849828970771198,
      "loss": 2.711,
      "step": 144058
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5234179496765137,
      "learning_rate": 0.000184979119100207,
      "loss": 2.9994,
      "step": 144059
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.579045534133911,
      "learning_rate": 0.00018497534114467876,
      "loss": 2.9522,
      "step": 144060
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9939780235290527,
      "learning_rate": 0.00018497156321053577,
      "loss": 2.9538,
      "step": 144061
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1651201248168945,
      "learning_rate": 0.00018496778529777878,
      "loss": 2.7368,
      "step": 144062
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1195144653320312,
      "learning_rate": 0.00018496400740640852,
      "loss": 3.1017,
      "step": 144063
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.129964828491211,
      "learning_rate": 0.00018496022953642553,
      "loss": 2.6612,
      "step": 144064
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.136798858642578,
      "learning_rate": 0.00018495645168783057,
      "loss": 2.8485,
      "step": 144065
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.401658535003662,
      "learning_rate": 0.00018495267386062445,
      "loss": 2.8875,
      "step": 144066
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0141170024871826,
      "learning_rate": 0.00018494889605480774,
      "loss": 2.8996,
      "step": 144067
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2984325885772705,
      "learning_rate": 0.0001849451182703812,
      "loss": 2.8237,
      "step": 144068
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1865341663360596,
      "learning_rate": 0.0001849413405073456,
      "loss": 3.0448,
      "step": 144069
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.106609344482422,
      "learning_rate": 0.00018493756276570161,
      "loss": 2.7431,
      "step": 144070
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8247082233428955,
      "learning_rate": 0.0001849337850454498,
      "loss": 2.929,
      "step": 144071
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0825355052948,
      "learning_rate": 0.000184930007346591,
      "loss": 3.0474,
      "step": 144072
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.3449440002441406,
      "learning_rate": 0.00018492622966912586,
      "loss": 3.0064,
      "step": 144073
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8297669887542725,
      "learning_rate": 0.00018492245201305512,
      "loss": 3.1272,
      "step": 144074
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.996673345565796,
      "learning_rate": 0.00018491867437837954,
      "loss": 2.8864,
      "step": 144075
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.58449649810791,
      "learning_rate": 0.00018491489676509968,
      "loss": 3.054,
      "step": 144076
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3052783012390137,
      "learning_rate": 0.00018491111917321633,
      "loss": 2.758,
      "step": 144077
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4156079292297363,
      "learning_rate": 0.00018490734160273015,
      "loss": 3.2026,
      "step": 144078
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.9005894660949707,
      "learning_rate": 0.0001849035640536419,
      "loss": 3.1248,
      "step": 144079
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1533701419830322,
      "learning_rate": 0.0001848997865259522,
      "loss": 2.818,
      "step": 144080
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.8344216346740723,
      "learning_rate": 0.0001848960090196619,
      "loss": 3.0026,
      "step": 144081
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.502483367919922,
      "learning_rate": 0.0001848922315347715,
      "loss": 2.8005,
      "step": 144082
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.483450412750244,
      "learning_rate": 0.00018488845407128185,
      "loss": 2.9862,
      "step": 144083
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.437488317489624,
      "learning_rate": 0.00018488467662919358,
      "loss": 3.0679,
      "step": 144084
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.179593086242676,
      "learning_rate": 0.00018488089920850752,
      "loss": 2.8725,
      "step": 144085
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2872512340545654,
      "learning_rate": 0.00018487712180922415,
      "loss": 3.0026,
      "step": 144086
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2045156955718994,
      "learning_rate": 0.0001848733444313444,
      "loss": 2.8693,
      "step": 144087
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.304006338119507,
      "learning_rate": 0.00018486956707486883,
      "loss": 2.8406,
      "step": 144088
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.673200845718384,
      "learning_rate": 0.00018486578973979817,
      "loss": 3.0837,
      "step": 144089
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.071335792541504,
      "learning_rate": 0.0001848620124261331,
      "loss": 3.3062,
      "step": 144090
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2087035179138184,
      "learning_rate": 0.00018485823513387438,
      "loss": 2.8347,
      "step": 144091
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.199772596359253,
      "learning_rate": 0.00018485445786302273,
      "loss": 2.9029,
      "step": 144092
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.055558443069458,
      "learning_rate": 0.00018485068061357884,
      "loss": 2.9193,
      "step": 144093
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.565192222595215,
      "learning_rate": 0.00018484690338554328,
      "loss": 2.9285,
      "step": 144094
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2710506916046143,
      "learning_rate": 0.0001848431261789169,
      "loss": 3.0459,
      "step": 144095
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4226038455963135,
      "learning_rate": 0.00018483934899370035,
      "loss": 2.7178,
      "step": 144096
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.9021010398864746,
      "learning_rate": 0.00018483557182989428,
      "loss": 2.7755,
      "step": 144097
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1809146404266357,
      "learning_rate": 0.00018483179468749953,
      "loss": 3.1481,
      "step": 144098
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.028430700302124,
      "learning_rate": 0.00018482801756651676,
      "loss": 3.0638,
      "step": 144099
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8384830951690674,
      "learning_rate": 0.00018482424046694658,
      "loss": 3.0366,
      "step": 144100
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.144479990005493,
      "learning_rate": 0.00018482046338878975,
      "loss": 2.8838,
      "step": 144101
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.348175525665283,
      "learning_rate": 0.00018481668633204694,
      "loss": 3.1076,
      "step": 144102
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.424229860305786,
      "learning_rate": 0.00018481290929671889,
      "loss": 2.8947,
      "step": 144103
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.358123302459717,
      "learning_rate": 0.00018480913228280632,
      "loss": 3.0598,
      "step": 144104
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.0371813774108887,
      "learning_rate": 0.00018480535529031002,
      "loss": 3.1721,
      "step": 144105
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.213597297668457,
      "learning_rate": 0.00018480157831923043,
      "loss": 2.9522,
      "step": 144106
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2289633750915527,
      "learning_rate": 0.0001847978013695684,
      "loss": 3.0423,
      "step": 144107
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.502903699874878,
      "learning_rate": 0.00018479402444132468,
      "loss": 2.8519,
      "step": 144108
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0511248111724854,
      "learning_rate": 0.00018479024753449984,
      "loss": 3.1052,
      "step": 144109
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.9859187602996826,
      "learning_rate": 0.00018478647064909475,
      "loss": 2.981,
      "step": 144110
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.276268482208252,
      "learning_rate": 0.00018478269378511013,
      "loss": 3.1251,
      "step": 144111
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.265476703643799,
      "learning_rate": 0.00018477891694254644,
      "loss": 2.9955,
      "step": 144112
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.150015115737915,
      "learning_rate": 0.00018477514012140453,
      "loss": 3.1555,
      "step": 144113
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4671425819396973,
      "learning_rate": 0.0001847713633216851,
      "loss": 2.8546,
      "step": 144114
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3432281017303467,
      "learning_rate": 0.00018476758654338885,
      "loss": 2.8168,
      "step": 144115
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.0367112159729004,
      "learning_rate": 0.0001847638097865165,
      "loss": 2.9836,
      "step": 144116
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.2742137908935547,
      "learning_rate": 0.00018476003305106884,
      "loss": 2.6794,
      "step": 144117
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.87044620513916,
      "learning_rate": 0.00018475625633704632,
      "loss": 2.8743,
      "step": 144118
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.500488519668579,
      "learning_rate": 0.00018475247964444983,
      "loss": 3.0779,
      "step": 144119
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9724066257476807,
      "learning_rate": 0.00018474870297328,
      "loss": 2.926,
      "step": 144120
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.398869514465332,
      "learning_rate": 0.0001847449263235376,
      "loss": 2.8132,
      "step": 144121
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4632558822631836,
      "learning_rate": 0.00018474114969522324,
      "loss": 3.0982,
      "step": 144122
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1003663539886475,
      "learning_rate": 0.00018473737308833782,
      "loss": 3.0027,
      "step": 144123
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9405211210250854,
      "learning_rate": 0.0001847335965028818,
      "loss": 3.0695,
      "step": 144124
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5306384563446045,
      "learning_rate": 0.00018472981993885592,
      "loss": 3.0387,
      "step": 144125
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0397861003875732,
      "learning_rate": 0.00018472604339626097,
      "loss": 2.6807,
      "step": 144126
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.508495807647705,
      "learning_rate": 0.00018472226687509766,
      "loss": 2.9515,
      "step": 144127
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.585559606552124,
      "learning_rate": 0.00018471849037536662,
      "loss": 2.9102,
      "step": 144128
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.443542003631592,
      "learning_rate": 0.00018471471389706862,
      "loss": 2.8934,
      "step": 144129
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.9999313354492188,
      "learning_rate": 0.0001847109374402044,
      "loss": 2.819,
      "step": 144130
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1459617614746094,
      "learning_rate": 0.00018470716100477453,
      "loss": 2.9887,
      "step": 144131
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9646505117416382,
      "learning_rate": 0.00018470338459077972,
      "loss": 2.9336,
      "step": 144132
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0795629024505615,
      "learning_rate": 0.00018469960819822082,
      "loss": 2.9013,
      "step": 144133
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.964231252670288,
      "learning_rate": 0.00018469583182709832,
      "loss": 2.7825,
      "step": 144134
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.216115713119507,
      "learning_rate": 0.0001846920554774131,
      "loss": 2.9536,
      "step": 144135
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.0251166820526123,
      "learning_rate": 0.00018468827914916593,
      "loss": 2.9079,
      "step": 144136
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8306686878204346,
      "learning_rate": 0.00018468450284235728,
      "loss": 3.0192,
      "step": 144137
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1003215312957764,
      "learning_rate": 0.00018468072655698794,
      "loss": 2.9686,
      "step": 144138
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.767038583755493,
      "learning_rate": 0.0001846769502930586,
      "loss": 2.9029,
      "step": 144139
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9637757539749146,
      "learning_rate": 0.00018467317405057002,
      "loss": 3.1092,
      "step": 144140
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.473292589187622,
      "learning_rate": 0.00018466939782952287,
      "loss": 2.889,
      "step": 144141
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.081212043762207,
      "learning_rate": 0.00018466562162991796,
      "loss": 2.7656,
      "step": 144142
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.489562511444092,
      "learning_rate": 0.00018466184545175578,
      "loss": 3.1625,
      "step": 144143
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2105863094329834,
      "learning_rate": 0.00018465806929503716,
      "loss": 2.9714,
      "step": 144144
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.7233123779296875,
      "learning_rate": 0.00018465429315976277,
      "loss": 3.0226,
      "step": 144145
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.2383978366851807,
      "learning_rate": 0.0001846505170459333,
      "loss": 2.7535,
      "step": 144146
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.7140138149261475,
      "learning_rate": 0.00018464674095354953,
      "loss": 3.0927,
      "step": 144147
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0093603134155273,
      "learning_rate": 0.00018464296488261218,
      "loss": 2.9638,
      "step": 144148
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.426720142364502,
      "learning_rate": 0.00018463918883312175,
      "loss": 3.0972,
      "step": 144149
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.3745410442352295,
      "learning_rate": 0.00018463541280507912,
      "loss": 3.1343,
      "step": 144150
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.189586877822876,
      "learning_rate": 0.00018463163679848492,
      "loss": 2.9479,
      "step": 144151
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.285975217819214,
      "learning_rate": 0.00018462786081333989,
      "loss": 3.134,
      "step": 144152
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.7147676944732666,
      "learning_rate": 0.0001846240848496447,
      "loss": 2.7899,
      "step": 144153
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.540102958679199,
      "learning_rate": 0.00018462030890740012,
      "loss": 3.092,
      "step": 144154
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9569745063781738,
      "learning_rate": 0.00018461653298660683,
      "loss": 2.8985,
      "step": 144155
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.864722967147827,
      "learning_rate": 0.00018461275708726542,
      "loss": 2.8572,
      "step": 144156
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1818950176239014,
      "learning_rate": 0.0001846089812093767,
      "loss": 2.7614,
      "step": 144157
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2532958984375,
      "learning_rate": 0.00018460520535294132,
      "loss": 2.9186,
      "step": 144158
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.3895835876464844,
      "learning_rate": 0.00018460142951796005,
      "loss": 2.9442,
      "step": 144159
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.8267500400543213,
      "learning_rate": 0.0001845976537044336,
      "loss": 2.9009,
      "step": 144160
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.0360360145568848,
      "learning_rate": 0.00018459387791236253,
      "loss": 2.8951,
      "step": 144161
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0286829471588135,
      "learning_rate": 0.00018459010214174777,
      "loss": 3.0202,
      "step": 144162
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.706563949584961,
      "learning_rate": 0.00018458632639258978,
      "loss": 3.083,
      "step": 144163
    },
    {
      "epoch": 1.88,
      "grad_norm": 5.2181077003479,
      "learning_rate": 0.00018458255066488934,
      "loss": 2.9459,
      "step": 144164
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.520102024078369,
      "learning_rate": 0.00018457877495864721,
      "loss": 2.8672,
      "step": 144165
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.398465871810913,
      "learning_rate": 0.00018457499927386416,
      "loss": 3.0355,
      "step": 144166
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.167914390563965,
      "learning_rate": 0.0001845712236105407,
      "loss": 2.8288,
      "step": 144167
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.788658380508423,
      "learning_rate": 0.00018456744796867765,
      "loss": 3.0669,
      "step": 144168
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.160223960876465,
      "learning_rate": 0.00018456367234827572,
      "loss": 2.8409,
      "step": 144169
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.263681173324585,
      "learning_rate": 0.00018455989674933553,
      "loss": 2.7802,
      "step": 144170
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.8053195476531982,
      "learning_rate": 0.00018455612117185784,
      "loss": 3.0727,
      "step": 144171
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6351373195648193,
      "learning_rate": 0.0001845523456158434,
      "loss": 2.8968,
      "step": 144172
    },
    {
      "epoch": 1.88,
      "grad_norm": 5.147750377655029,
      "learning_rate": 0.0001845485700812928,
      "loss": 2.8574,
      "step": 144173
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8566935062408447,
      "learning_rate": 0.00018454479456820683,
      "loss": 3.4267,
      "step": 144174
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2983922958374023,
      "learning_rate": 0.00018454101907658613,
      "loss": 2.9949,
      "step": 144175
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.492408037185669,
      "learning_rate": 0.00018453724360643144,
      "loss": 3.1827,
      "step": 144176
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.960507869720459,
      "learning_rate": 0.0001845334681577435,
      "loss": 2.8882,
      "step": 144177
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.389700412750244,
      "learning_rate": 0.00018452969273052302,
      "loss": 2.6096,
      "step": 144178
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.8762595653533936,
      "learning_rate": 0.00018452591732477056,
      "loss": 2.9643,
      "step": 144179
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.69891619682312,
      "learning_rate": 0.00018452214194048688,
      "loss": 2.9907,
      "step": 144180
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9390647411346436,
      "learning_rate": 0.00018451836657767276,
      "loss": 2.9679,
      "step": 144181
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.693864583969116,
      "learning_rate": 0.00018451459123632885,
      "loss": 2.8635,
      "step": 144182
    },
    {
      "epoch": 1.88,
      "grad_norm": 5.390488624572754,
      "learning_rate": 0.00018451081591645585,
      "loss": 3.0505,
      "step": 144183
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.43070650100708,
      "learning_rate": 0.00018450704061805453,
      "loss": 3.1905,
      "step": 144184
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8502235412597656,
      "learning_rate": 0.00018450326534112546,
      "loss": 2.8096,
      "step": 144185
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8423244953155518,
      "learning_rate": 0.00018449949008566944,
      "loss": 2.9802,
      "step": 144186
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.7862305641174316,
      "learning_rate": 0.00018449571485168714,
      "loss": 2.9437,
      "step": 144187
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.482447385787964,
      "learning_rate": 0.00018449193963917924,
      "loss": 2.8392,
      "step": 144188
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2066173553466797,
      "learning_rate": 0.00018448816444814647,
      "loss": 2.9835,
      "step": 144189
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6571404933929443,
      "learning_rate": 0.00018448438927858967,
      "loss": 2.9373,
      "step": 144190
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.8151371479034424,
      "learning_rate": 0.0001844806141305093,
      "loss": 2.7062,
      "step": 144191
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.0113816261291504,
      "learning_rate": 0.0001844768390039061,
      "loss": 2.9546,
      "step": 144192
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3878750801086426,
      "learning_rate": 0.00018447306389878087,
      "loss": 2.9311,
      "step": 144193
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.7706563472747803,
      "learning_rate": 0.00018446928881513428,
      "loss": 3.2028,
      "step": 144194
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5858585834503174,
      "learning_rate": 0.00018446551375296707,
      "loss": 3.0193,
      "step": 144195
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.7232673168182373,
      "learning_rate": 0.00018446173871227984,
      "loss": 3.149,
      "step": 144196
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3624014854431152,
      "learning_rate": 0.00018445796369307351,
      "loss": 2.6551,
      "step": 144197
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1488304138183594,
      "learning_rate": 0.00018445418869534848,
      "loss": 3.1343,
      "step": 144198
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.332103729248047,
      "learning_rate": 0.00018445041371910562,
      "loss": 3.3273,
      "step": 144199
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.30865478515625,
      "learning_rate": 0.00018444663876434556,
      "loss": 3.1277,
      "step": 144200
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5132715702056885,
      "learning_rate": 0.00018444286383106911,
      "loss": 3.0204,
      "step": 144201
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1652145385742188,
      "learning_rate": 0.0001844390889192769,
      "loss": 3.2441,
      "step": 144202
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9646230936050415,
      "learning_rate": 0.00018443531402896976,
      "loss": 3.1726,
      "step": 144203
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1642603874206543,
      "learning_rate": 0.00018443153916014812,
      "loss": 2.93,
      "step": 144204
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.508113384246826,
      "learning_rate": 0.00018442776431281288,
      "loss": 2.9213,
      "step": 144205
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2509031295776367,
      "learning_rate": 0.00018442398948696471,
      "loss": 2.7245,
      "step": 144206
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5038821697235107,
      "learning_rate": 0.0001844202146826043,
      "loss": 3.0301,
      "step": 144207
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.279451847076416,
      "learning_rate": 0.00018441643989973233,
      "loss": 2.7692,
      "step": 144208
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2332775592803955,
      "learning_rate": 0.00018441266513834968,
      "loss": 3.0018,
      "step": 144209
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9603865146636963,
      "learning_rate": 0.00018440889039845673,
      "loss": 2.8555,
      "step": 144210
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.67155122756958,
      "learning_rate": 0.00018440511568005436,
      "loss": 2.9902,
      "step": 144211
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.085508346557617,
      "learning_rate": 0.00018440134098314325,
      "loss": 2.9977,
      "step": 144212
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.260870933532715,
      "learning_rate": 0.00018439756630772415,
      "loss": 2.9022,
      "step": 144213
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.023535966873169,
      "learning_rate": 0.00018439379165379773,
      "loss": 3.0435,
      "step": 144214
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.135139226913452,
      "learning_rate": 0.00018439001702136477,
      "loss": 2.9173,
      "step": 144215
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9830381870269775,
      "learning_rate": 0.00018438624241042578,
      "loss": 3.0116,
      "step": 144216
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.299774169921875,
      "learning_rate": 0.00018438246782098155,
      "loss": 2.982,
      "step": 144217
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.0289392471313477,
      "learning_rate": 0.00018437869325303285,
      "loss": 2.9611,
      "step": 144218
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5727553367614746,
      "learning_rate": 0.0001843749187065803,
      "loss": 2.8751,
      "step": 144219
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.336643934249878,
      "learning_rate": 0.00018437114418162463,
      "loss": 3.1428,
      "step": 144220
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9475798606872559,
      "learning_rate": 0.00018436736967816672,
      "loss": 2.997,
      "step": 144221
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.047340154647827,
      "learning_rate": 0.00018436359519620693,
      "loss": 2.8348,
      "step": 144222
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6699624061584473,
      "learning_rate": 0.00018435982073574614,
      "loss": 2.9847,
      "step": 144223
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.03656268119812,
      "learning_rate": 0.00018435604629678503,
      "loss": 2.8315,
      "step": 144224
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3161678314208984,
      "learning_rate": 0.00018435227187932432,
      "loss": 2.8455,
      "step": 144225
    },
    {
      "epoch": 1.88,
      "grad_norm": 5.208148002624512,
      "learning_rate": 0.00018434849748336473,
      "loss": 2.9199,
      "step": 144226
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.9706311225891113,
      "learning_rate": 0.00018434472310890703,
      "loss": 2.8627,
      "step": 144227
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.324617624282837,
      "learning_rate": 0.00018434094875595175,
      "loss": 3.0053,
      "step": 144228
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.036184787750244,
      "learning_rate": 0.00018433717442449963,
      "loss": 2.9621,
      "step": 144229
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.181688070297241,
      "learning_rate": 0.00018433340011455142,
      "loss": 3.1485,
      "step": 144230
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.184622049331665,
      "learning_rate": 0.0001843296258261078,
      "loss": 3.1339,
      "step": 144231
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.431835651397705,
      "learning_rate": 0.0001843258515591695,
      "loss": 3.0678,
      "step": 144232
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1831748485565186,
      "learning_rate": 0.00018432207731373736,
      "loss": 3.2186,
      "step": 144233
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.09529185295105,
      "learning_rate": 0.00018431830308981176,
      "loss": 3.0086,
      "step": 144234
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3547465801239014,
      "learning_rate": 0.00018431452888739358,
      "loss": 2.9924,
      "step": 144235
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1164302825927734,
      "learning_rate": 0.0001843107547064835,
      "loss": 2.9764,
      "step": 144236
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1412649154663086,
      "learning_rate": 0.0001843069805470823,
      "loss": 3.0791,
      "step": 144237
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.13018798828125,
      "learning_rate": 0.00018430320640919058,
      "loss": 2.9189,
      "step": 144238
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3303327560424805,
      "learning_rate": 0.00018429943229280918,
      "loss": 2.6631,
      "step": 144239
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.177828311920166,
      "learning_rate": 0.0001842956581979386,
      "loss": 2.8764,
      "step": 144240
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.30531907081604,
      "learning_rate": 0.00018429188412457962,
      "loss": 2.8557,
      "step": 144241
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5718703269958496,
      "learning_rate": 0.00018428811007273297,
      "loss": 3.0024,
      "step": 144242
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.492051839828491,
      "learning_rate": 0.00018428433604239934,
      "loss": 3.0699,
      "step": 144243
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.071284055709839,
      "learning_rate": 0.00018428056203357947,
      "loss": 3.1753,
      "step": 144244
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.707545757293701,
      "learning_rate": 0.00018427678804627405,
      "loss": 2.5754,
      "step": 144245
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.197370767593384,
      "learning_rate": 0.00018427301408048373,
      "loss": 3.1205,
      "step": 144246
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.709836006164551,
      "learning_rate": 0.00018426924013620924,
      "loss": 2.8944,
      "step": 144247
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.050973415374756,
      "learning_rate": 0.00018426546621345128,
      "loss": 3.1349,
      "step": 144248
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.5813522338867188,
      "learning_rate": 0.00018426169231221056,
      "loss": 3.0851,
      "step": 144249
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.619006872177124,
      "learning_rate": 0.00018425791843248778,
      "loss": 2.9729,
      "step": 144250
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6379804611206055,
      "learning_rate": 0.00018425414457428366,
      "loss": 2.645,
      "step": 144251
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.185546875,
      "learning_rate": 0.00018425037073759883,
      "loss": 2.9269,
      "step": 144252
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9834247827529907,
      "learning_rate": 0.00018424659692243406,
      "loss": 3.0046,
      "step": 144253
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1814193725585938,
      "learning_rate": 0.00018424282312879008,
      "loss": 2.8643,
      "step": 144254
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.217237949371338,
      "learning_rate": 0.00018423904935666748,
      "loss": 3.1999,
      "step": 144255
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1065526008605957,
      "learning_rate": 0.000184235275606067,
      "loss": 2.9823,
      "step": 144256
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1676039695739746,
      "learning_rate": 0.0001842315018769895,
      "loss": 2.8637,
      "step": 144257
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.825597047805786,
      "learning_rate": 0.00018422772816943542,
      "loss": 3.2003,
      "step": 144258
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8396847248077393,
      "learning_rate": 0.0001842239544834056,
      "loss": 3.5185,
      "step": 144259
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.37400484085083,
      "learning_rate": 0.00018422018081890075,
      "loss": 2.7089,
      "step": 144260
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.276038408279419,
      "learning_rate": 0.00018421640717592163,
      "loss": 3.027,
      "step": 144261
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.0388355255126953,
      "learning_rate": 0.0001842126335544688,
      "loss": 2.5572,
      "step": 144262
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6359899044036865,
      "learning_rate": 0.000184208859954543,
      "loss": 3.0654,
      "step": 144263
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.841474533081055,
      "learning_rate": 0.00018420508637614506,
      "loss": 3.1126,
      "step": 144264
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6624999046325684,
      "learning_rate": 0.00018420131281927547,
      "loss": 2.9696,
      "step": 144265
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.34108829498291,
      "learning_rate": 0.00018419753928393507,
      "loss": 2.7784,
      "step": 144266
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0075807571411133,
      "learning_rate": 0.00018419376577012453,
      "loss": 2.9157,
      "step": 144267
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6972339153289795,
      "learning_rate": 0.00018418999227784463,
      "loss": 2.8773,
      "step": 144268
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.29215145111084,
      "learning_rate": 0.0001841862188070959,
      "loss": 3.1626,
      "step": 144269
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4928693771362305,
      "learning_rate": 0.0001841824453578792,
      "loss": 3.0902,
      "step": 144270
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3415307998657227,
      "learning_rate": 0.00018417867193019514,
      "loss": 3.064,
      "step": 144271
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0788450241088867,
      "learning_rate": 0.00018417489852404448,
      "loss": 3.168,
      "step": 144272
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.1237571239471436,
      "learning_rate": 0.00018417112513942785,
      "loss": 3.2471,
      "step": 144273
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.38061785697937,
      "learning_rate": 0.00018416735177634598,
      "loss": 3.1105,
      "step": 144274
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.733548879623413,
      "learning_rate": 0.0001841635784347996,
      "loss": 3.0905,
      "step": 144275
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.568504571914673,
      "learning_rate": 0.00018415980511478952,
      "loss": 3.1111,
      "step": 144276
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.272796630859375,
      "learning_rate": 0.00018415603181631617,
      "loss": 2.8899,
      "step": 144277
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2218446731567383,
      "learning_rate": 0.00018415225853938048,
      "loss": 2.8865,
      "step": 144278
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2607216835021973,
      "learning_rate": 0.000184148485283983,
      "loss": 2.7791,
      "step": 144279
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4076015949249268,
      "learning_rate": 0.0001841447120501245,
      "loss": 2.8989,
      "step": 144280
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9301344156265259,
      "learning_rate": 0.0001841409388378057,
      "loss": 3.1501,
      "step": 144281
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.965404510498047,
      "learning_rate": 0.00018413716564702744,
      "loss": 2.9914,
      "step": 144282
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.652949094772339,
      "learning_rate": 0.0001841333924777901,
      "loss": 2.729,
      "step": 144283
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9064581394195557,
      "learning_rate": 0.0001841296193300946,
      "loss": 2.7879,
      "step": 144284
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.452749729156494,
      "learning_rate": 0.00018412584620394157,
      "loss": 3.0658,
      "step": 144285
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.199448585510254,
      "learning_rate": 0.00018412207309933173,
      "loss": 2.7693,
      "step": 144286
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.016233444213867,
      "learning_rate": 0.00018411830001626578,
      "loss": 3.0068,
      "step": 144287
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.9334404468536377,
      "learning_rate": 0.00018411452695474455,
      "loss": 2.9591,
      "step": 144288
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.597325563430786,
      "learning_rate": 0.0001841107539147685,
      "loss": 2.9021,
      "step": 144289
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.9499199390411377,
      "learning_rate": 0.00018410698089633843,
      "loss": 2.9604,
      "step": 144290
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.252460479736328,
      "learning_rate": 0.0001841032078994551,
      "loss": 2.6937,
      "step": 144291
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.037045955657959,
      "learning_rate": 0.00018409943492411914,
      "loss": 3.0078,
      "step": 144292
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.143138885498047,
      "learning_rate": 0.00018409566197033128,
      "loss": 2.9738,
      "step": 144293
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.419980049133301,
      "learning_rate": 0.0001840918890380923,
      "loss": 3.0423,
      "step": 144294
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.1725051403045654,
      "learning_rate": 0.00018408811612740278,
      "loss": 3.0072,
      "step": 144295
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.980731248855591,
      "learning_rate": 0.00018408434323826346,
      "loss": 2.6763,
      "step": 144296
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0576345920562744,
      "learning_rate": 0.00018408057037067502,
      "loss": 2.8843,
      "step": 144297
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.114521026611328,
      "learning_rate": 0.00018407679752463819,
      "loss": 2.924,
      "step": 144298
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.887937068939209,
      "learning_rate": 0.00018407302470015372,
      "loss": 2.8751,
      "step": 144299
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.162410020828247,
      "learning_rate": 0.00018406925189722235,
      "loss": 3.0933,
      "step": 144300
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.183823823928833,
      "learning_rate": 0.00018406547911584454,
      "loss": 3.043,
      "step": 144301
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.324448823928833,
      "learning_rate": 0.00018406170635602118,
      "loss": 2.9333,
      "step": 144302
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9879437685012817,
      "learning_rate": 0.0001840579336177529,
      "loss": 2.7926,
      "step": 144303
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.038872718811035,
      "learning_rate": 0.00018405416090104052,
      "loss": 2.8839,
      "step": 144304
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.085057497024536,
      "learning_rate": 0.0001840503882058846,
      "loss": 3.1651,
      "step": 144305
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4261529445648193,
      "learning_rate": 0.00018404661553228605,
      "loss": 3.1086,
      "step": 144306
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.118673801422119,
      "learning_rate": 0.00018404284288024527,
      "loss": 3.0327,
      "step": 144307
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.966955542564392,
      "learning_rate": 0.00018403907024976316,
      "loss": 3.0459,
      "step": 144308
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.9852418899536133,
      "learning_rate": 0.00018403529764084031,
      "loss": 3.2648,
      "step": 144309
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0952839851379395,
      "learning_rate": 0.00018403152505347753,
      "loss": 2.7709,
      "step": 144310
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6528379917144775,
      "learning_rate": 0.0001840277524876755,
      "loss": 3.2498,
      "step": 144311
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.473085403442383,
      "learning_rate": 0.00018402397994343497,
      "loss": 3.0413,
      "step": 144312
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.938005208969116,
      "learning_rate": 0.00018402020742075648,
      "loss": 2.9211,
      "step": 144313
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6413419246673584,
      "learning_rate": 0.00018401643491964084,
      "loss": 3.1773,
      "step": 144314
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3706021308898926,
      "learning_rate": 0.0001840126624400887,
      "loss": 3.2872,
      "step": 144315
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3300371170043945,
      "learning_rate": 0.0001840088899821008,
      "loss": 2.9065,
      "step": 144316
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.420229196548462,
      "learning_rate": 0.00018400511754567786,
      "loss": 2.8593,
      "step": 144317
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.520153045654297,
      "learning_rate": 0.00018400134513082064,
      "loss": 2.9325,
      "step": 144318
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.925297737121582,
      "learning_rate": 0.0001839975727375297,
      "loss": 3.1605,
      "step": 144319
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2453620433807373,
      "learning_rate": 0.00018399380036580575,
      "loss": 2.8427,
      "step": 144320
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3511197566986084,
      "learning_rate": 0.00018399002801564954,
      "loss": 2.8964,
      "step": 144321
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4344167709350586,
      "learning_rate": 0.0001839862556870618,
      "loss": 2.9824,
      "step": 144322
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3121044635772705,
      "learning_rate": 0.00018398248338004317,
      "loss": 2.8197,
      "step": 144323
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.052548408508301,
      "learning_rate": 0.00018397871109459456,
      "loss": 3.1219,
      "step": 144324
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.106393814086914,
      "learning_rate": 0.00018397493883071634,
      "loss": 3.1316,
      "step": 144325
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.8858522176742554,
      "learning_rate": 0.00018397116658840939,
      "loss": 2.6113,
      "step": 144326
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.53088641166687,
      "learning_rate": 0.00018396739436767434,
      "loss": 3.0509,
      "step": 144327
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8768012523651123,
      "learning_rate": 0.00018396362216851198,
      "loss": 3.006,
      "step": 144328
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4586880207061768,
      "learning_rate": 0.00018395984999092297,
      "loss": 2.87,
      "step": 144329
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4162943363189697,
      "learning_rate": 0.00018395607783490805,
      "loss": 3.0232,
      "step": 144330
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4879612922668457,
      "learning_rate": 0.00018395230570046793,
      "loss": 3.0383,
      "step": 144331
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.567230701446533,
      "learning_rate": 0.0001839485335876032,
      "loss": 3.098,
      "step": 144332
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8715882301330566,
      "learning_rate": 0.0001839447614963146,
      "loss": 2.6773,
      "step": 144333
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.614104986190796,
      "learning_rate": 0.00018394098942660288,
      "loss": 2.9467,
      "step": 144334
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.327286720275879,
      "learning_rate": 0.00018393721737846872,
      "loss": 2.89,
      "step": 144335
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.9561386108398438,
      "learning_rate": 0.00018393344535191285,
      "loss": 2.9399,
      "step": 144336
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.138122320175171,
      "learning_rate": 0.00018392967334693598,
      "loss": 2.8213,
      "step": 144337
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.340360164642334,
      "learning_rate": 0.0001839259013635387,
      "loss": 2.9337,
      "step": 144338
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.213308334350586,
      "learning_rate": 0.00018392212940172186,
      "loss": 3.0927,
      "step": 144339
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.086019277572632,
      "learning_rate": 0.000183918357461486,
      "loss": 2.9652,
      "step": 144340
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.373279094696045,
      "learning_rate": 0.00018391458554283191,
      "loss": 2.8127,
      "step": 144341
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1975576877593994,
      "learning_rate": 0.00018391081364576035,
      "loss": 2.7206,
      "step": 144342
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.620737314224243,
      "learning_rate": 0.00018390704177027197,
      "loss": 2.9279,
      "step": 144343
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.767210483551025,
      "learning_rate": 0.00018390326991636738,
      "loss": 3.0083,
      "step": 144344
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.625265121459961,
      "learning_rate": 0.00018389949808404746,
      "loss": 2.9718,
      "step": 144345
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.138160228729248,
      "learning_rate": 0.00018389572627331278,
      "loss": 3.1141,
      "step": 144346
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9329177141189575,
      "learning_rate": 0.00018389195448416403,
      "loss": 3.0716,
      "step": 144347
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.217149019241333,
      "learning_rate": 0.000183888182716602,
      "loss": 3.0071,
      "step": 144348
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8324623107910156,
      "learning_rate": 0.00018388441097062736,
      "loss": 2.8217,
      "step": 144349
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4666500091552734,
      "learning_rate": 0.0001838806392462408,
      "loss": 2.7624,
      "step": 144350
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.201076030731201,
      "learning_rate": 0.00018387686754344298,
      "loss": 2.8167,
      "step": 144351
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.2862446308135986,
      "learning_rate": 0.00018387309586223463,
      "loss": 2.9191,
      "step": 144352
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.160684823989868,
      "learning_rate": 0.00018386932420261652,
      "loss": 2.9761,
      "step": 144353
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.122227668762207,
      "learning_rate": 0.00018386555256458926,
      "loss": 2.9095,
      "step": 144354
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.8804770708084106,
      "learning_rate": 0.00018386178094815367,
      "loss": 2.9196,
      "step": 144355
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.44488525390625,
      "learning_rate": 0.00018385800935331027,
      "loss": 2.9691,
      "step": 144356
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0990824699401855,
      "learning_rate": 0.00018385423778005986,
      "loss": 2.8199,
      "step": 144357
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5828912258148193,
      "learning_rate": 0.00018385046622840313,
      "loss": 3.0016,
      "step": 144358
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.746692657470703,
      "learning_rate": 0.0001838466946983408,
      "loss": 3.1315,
      "step": 144359
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3182387351989746,
      "learning_rate": 0.00018384292318987365,
      "loss": 3.1017,
      "step": 144360
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.702059030532837,
      "learning_rate": 0.00018383915170300228,
      "loss": 3.1308,
      "step": 144361
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.031829357147217,
      "learning_rate": 0.00018383538023772732,
      "loss": 3.1308,
      "step": 144362
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5092761516571045,
      "learning_rate": 0.00018383160879404956,
      "loss": 3.0517,
      "step": 144363
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.0239226818084717,
      "learning_rate": 0.00018382783737196974,
      "loss": 2.9844,
      "step": 144364
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.205904722213745,
      "learning_rate": 0.0001838240659714885,
      "loss": 2.8007,
      "step": 144365
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.9654338359832764,
      "learning_rate": 0.0001838202945926065,
      "loss": 2.8561,
      "step": 144366
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1172118186950684,
      "learning_rate": 0.0001838165232353247,
      "loss": 2.9712,
      "step": 144367
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3191006183624268,
      "learning_rate": 0.00018381275189964343,
      "loss": 3.1602,
      "step": 144368
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.206465005874634,
      "learning_rate": 0.00018380898058556358,
      "loss": 2.9899,
      "step": 144369
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.132150173187256,
      "learning_rate": 0.0001838052092930858,
      "loss": 3.0377,
      "step": 144370
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2301900386810303,
      "learning_rate": 0.0001838014380222109,
      "loss": 2.9439,
      "step": 144371
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2053327560424805,
      "learning_rate": 0.00018379766677293946,
      "loss": 2.9822,
      "step": 144372
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.973018169403076,
      "learning_rate": 0.00018379389554527235,
      "loss": 3.113,
      "step": 144373
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3955953121185303,
      "learning_rate": 0.00018379012433921004,
      "loss": 2.7393,
      "step": 144374
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1223881244659424,
      "learning_rate": 0.00018378635315475334,
      "loss": 3.1061,
      "step": 144375
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8651068210601807,
      "learning_rate": 0.00018378258199190295,
      "loss": 2.8554,
      "step": 144376
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.8577678203582764,
      "learning_rate": 0.00018377881085065954,
      "loss": 2.6826,
      "step": 144377
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2808408737182617,
      "learning_rate": 0.00018377503973102387,
      "loss": 3.0187,
      "step": 144378
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5492095947265625,
      "learning_rate": 0.0001837712686329967,
      "loss": 2.9213,
      "step": 144379
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6392438411712646,
      "learning_rate": 0.00018376749755657858,
      "loss": 3.079,
      "step": 144380
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.7409636974334717,
      "learning_rate": 0.00018376372650177026,
      "loss": 2.6747,
      "step": 144381
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9573894739151,
      "learning_rate": 0.00018375995546857245,
      "loss": 3.0128,
      "step": 144382
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0908899307250977,
      "learning_rate": 0.00018375618445698585,
      "loss": 2.9051,
      "step": 144383
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4832420349121094,
      "learning_rate": 0.00018375241346701117,
      "loss": 2.9567,
      "step": 144384
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.307424545288086,
      "learning_rate": 0.00018374864249864924,
      "loss": 3.0181,
      "step": 144385
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.247201681137085,
      "learning_rate": 0.0001837448715519005,
      "loss": 3.0144,
      "step": 144386
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.39742374420166,
      "learning_rate": 0.00018374110062676578,
      "loss": 2.7622,
      "step": 144387
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9767450094223022,
      "learning_rate": 0.0001837373297232458,
      "loss": 3.0333,
      "step": 144388
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.414215564727783,
      "learning_rate": 0.00018373355884134126,
      "loss": 3.0943,
      "step": 144389
    },
    {
      "epoch": 1.88,
      "grad_norm": 5.312950611114502,
      "learning_rate": 0.0001837297879810528,
      "loss": 3.1233,
      "step": 144390
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2960638999938965,
      "learning_rate": 0.00018372601714238134,
      "loss": 2.8587,
      "step": 144391
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.05912709236145,
      "learning_rate": 0.00018372224632532728,
      "loss": 2.9777,
      "step": 144392
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.894076108932495,
      "learning_rate": 0.00018371847552989143,
      "loss": 3.0092,
      "step": 144393
    },
    {
      "epoch": 1.88,
      "grad_norm": 5.538387298583984,
      "learning_rate": 0.00018371470475607449,
      "loss": 2.7725,
      "step": 144394
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4101336002349854,
      "learning_rate": 0.0001837109340038772,
      "loss": 2.8827,
      "step": 144395
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.265133857727051,
      "learning_rate": 0.00018370716327330026,
      "loss": 2.8999,
      "step": 144396
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4502854347229004,
      "learning_rate": 0.00018370339256434433,
      "loss": 2.8918,
      "step": 144397
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.6911113262176514,
      "learning_rate": 0.00018369962187701029,
      "loss": 2.8318,
      "step": 144398
    },
    {
      "epoch": 1.88,
      "grad_norm": 5.207029342651367,
      "learning_rate": 0.00018369585121129852,
      "loss": 2.8642,
      "step": 144399
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.889782190322876,
      "learning_rate": 0.00018369208056720992,
      "loss": 3.0734,
      "step": 144400
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3881075382232666,
      "learning_rate": 0.00018368830994474513,
      "loss": 3.1529,
      "step": 144401
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1456105709075928,
      "learning_rate": 0.0001836845393439049,
      "loss": 2.8038,
      "step": 144402
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.934978485107422,
      "learning_rate": 0.0001836807687646899,
      "loss": 2.9048,
      "step": 144403
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.012686014175415,
      "learning_rate": 0.00018367699820710096,
      "loss": 3.101,
      "step": 144404
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.1252939701080322,
      "learning_rate": 0.00018367322767113856,
      "loss": 3.0578,
      "step": 144405
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1750242710113525,
      "learning_rate": 0.00018366945715680348,
      "loss": 2.849,
      "step": 144406
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.059793710708618,
      "learning_rate": 0.00018366568666409642,
      "loss": 3.0751,
      "step": 144407
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.6437225341796875,
      "learning_rate": 0.00018366191619301816,
      "loss": 2.8225,
      "step": 144408
    },
    {
      "epoch": 1.88,
      "grad_norm": 7.91348123550415,
      "learning_rate": 0.00018365814574356935,
      "loss": 2.9004,
      "step": 144409
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.7866764068603516,
      "learning_rate": 0.00018365437531575075,
      "loss": 2.8316,
      "step": 144410
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.379638195037842,
      "learning_rate": 0.00018365060490956292,
      "loss": 2.9691,
      "step": 144411
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1567580699920654,
      "learning_rate": 0.00018364683452500662,
      "loss": 2.853,
      "step": 144412
    },
    {
      "epoch": 1.88,
      "grad_norm": 7.372668743133545,
      "learning_rate": 0.00018364306416208256,
      "loss": 2.8775,
      "step": 144413
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.377887487411499,
      "learning_rate": 0.0001836392938207915,
      "loss": 2.6372,
      "step": 144414
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.398844242095947,
      "learning_rate": 0.00018363552350113402,
      "loss": 2.8364,
      "step": 144415
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8970298767089844,
      "learning_rate": 0.00018363175320311106,
      "loss": 3.1702,
      "step": 144416
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.798928141593933,
      "learning_rate": 0.00018362798292672304,
      "loss": 3.2126,
      "step": 144417
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.57222056388855,
      "learning_rate": 0.00018362421267197073,
      "loss": 2.9024,
      "step": 144418
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.090930461883545,
      "learning_rate": 0.00018362044243885493,
      "loss": 2.7268,
      "step": 144419
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.34023380279541,
      "learning_rate": 0.00018361667222737627,
      "loss": 3.0934,
      "step": 144420
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.781096935272217,
      "learning_rate": 0.00018361290203753542,
      "loss": 3.0524,
      "step": 144421
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3876709938049316,
      "learning_rate": 0.00018360913186933327,
      "loss": 2.7214,
      "step": 144422
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.35040020942688,
      "learning_rate": 0.00018360536172277032,
      "loss": 3.031,
      "step": 144423
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.7096269130706787,
      "learning_rate": 0.0001836015915978473,
      "loss": 3.0689,
      "step": 144424
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.316614627838135,
      "learning_rate": 0.0001835978214945649,
      "loss": 2.8307,
      "step": 144425
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8639752864837646,
      "learning_rate": 0.0001835940514129239,
      "loss": 3.1348,
      "step": 144426
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.096320867538452,
      "learning_rate": 0.00018359028135292498,
      "loss": 2.9412,
      "step": 144427
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.4682366847991943,
      "learning_rate": 0.00018358651131456885,
      "loss": 2.8756,
      "step": 144428
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.226076364517212,
      "learning_rate": 0.00018358274129785614,
      "loss": 2.8468,
      "step": 144429
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2445154190063477,
      "learning_rate": 0.00018357897130278763,
      "loss": 2.952,
      "step": 144430
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.379873752593994,
      "learning_rate": 0.00018357520132936396,
      "loss": 2.9432,
      "step": 144431
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.218292474746704,
      "learning_rate": 0.00018357143137758585,
      "loss": 2.8222,
      "step": 144432
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.5512161254882812,
      "learning_rate": 0.000183567661447454,
      "loss": 2.984,
      "step": 144433
    },
    {
      "epoch": 1.88,
      "grad_norm": 5.164968967437744,
      "learning_rate": 0.00018356389153896915,
      "loss": 3.1165,
      "step": 144434
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.7697553634643555,
      "learning_rate": 0.00018356012165213198,
      "loss": 3.0783,
      "step": 144435
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.196573495864868,
      "learning_rate": 0.00018355635178694314,
      "loss": 2.9971,
      "step": 144436
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4229626655578613,
      "learning_rate": 0.00018355258194340337,
      "loss": 3.0973,
      "step": 144437
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.7331316471099854,
      "learning_rate": 0.00018354881212151344,
      "loss": 3.0688,
      "step": 144438
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.6344666481018066,
      "learning_rate": 0.00018354504232127395,
      "loss": 2.8248,
      "step": 144439
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.057950973510742,
      "learning_rate": 0.00018354127254268564,
      "loss": 3.001,
      "step": 144440
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.430283308029175,
      "learning_rate": 0.00018353750278574918,
      "loss": 3.0294,
      "step": 144441
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.252427101135254,
      "learning_rate": 0.00018353373305046527,
      "loss": 2.8817,
      "step": 144442
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0828890800476074,
      "learning_rate": 0.00018352996333683466,
      "loss": 2.8683,
      "step": 144443
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.298874616622925,
      "learning_rate": 0.000183526193644858,
      "loss": 2.9878,
      "step": 144444
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.6021382808685303,
      "learning_rate": 0.00018352242397453613,
      "loss": 2.8855,
      "step": 144445
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.258427143096924,
      "learning_rate": 0.00018351865432586964,
      "loss": 2.9512,
      "step": 144446
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.355614185333252,
      "learning_rate": 0.00018351488469885917,
      "loss": 2.973,
      "step": 144447
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.7024285793304443,
      "learning_rate": 0.00018351111509350542,
      "loss": 2.7798,
      "step": 144448
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.6549813747406006,
      "learning_rate": 0.00018350734550980918,
      "loss": 2.9899,
      "step": 144449
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.1953816413879395,
      "learning_rate": 0.00018350357594777112,
      "loss": 2.9821,
      "step": 144450
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.99920654296875,
      "learning_rate": 0.00018349980640739198,
      "loss": 2.8166,
      "step": 144451
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.432417631149292,
      "learning_rate": 0.00018349603688867252,
      "loss": 3.0134,
      "step": 144452
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.355642080307007,
      "learning_rate": 0.00018349226739161322,
      "loss": 2.8092,
      "step": 144453
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.7388532161712646,
      "learning_rate": 0.00018348849791621495,
      "loss": 3.0241,
      "step": 144454
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2803525924682617,
      "learning_rate": 0.00018348472846247833,
      "loss": 3.0513,
      "step": 144455
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.271550416946411,
      "learning_rate": 0.00018348095903040408,
      "loss": 3.0987,
      "step": 144456
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.7722580432891846,
      "learning_rate": 0.00018347718961999294,
      "loss": 2.8783,
      "step": 144457
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2997660636901855,
      "learning_rate": 0.0001834734202312457,
      "loss": 2.8664,
      "step": 144458
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.395620107650757,
      "learning_rate": 0.00018346965086416285,
      "loss": 2.8647,
      "step": 144459
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9734514951705933,
      "learning_rate": 0.00018346588151874519,
      "loss": 3.0347,
      "step": 144460
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2241132259368896,
      "learning_rate": 0.0001834621121949934,
      "loss": 3.0897,
      "step": 144461
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0096747875213623,
      "learning_rate": 0.00018345834289290824,
      "loss": 2.8697,
      "step": 144462
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.9225246906280518,
      "learning_rate": 0.00018345457361249037,
      "loss": 2.7825,
      "step": 144463
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.207688570022583,
      "learning_rate": 0.00018345080435374045,
      "loss": 2.9051,
      "step": 144464
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.7704355716705322,
      "learning_rate": 0.00018344703511665938,
      "loss": 2.8537,
      "step": 144465
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5482566356658936,
      "learning_rate": 0.0001834432659012476,
      "loss": 3.1673,
      "step": 144466
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.15315842628479,
      "learning_rate": 0.00018343949670750589,
      "loss": 2.9597,
      "step": 144467
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.440613269805908,
      "learning_rate": 0.00018343572753543497,
      "loss": 2.8858,
      "step": 144468
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.241882085800171,
      "learning_rate": 0.00018343195838503557,
      "loss": 2.8006,
      "step": 144469
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.115830183029175,
      "learning_rate": 0.00018342818925630837,
      "loss": 2.944,
      "step": 144470
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.0698649883270264,
      "learning_rate": 0.00018342442014925418,
      "loss": 2.8057,
      "step": 144471
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1590888500213623,
      "learning_rate": 0.00018342065106387347,
      "loss": 3.0199,
      "step": 144472
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.442946434020996,
      "learning_rate": 0.00018341688200016707,
      "loss": 3.1491,
      "step": 144473
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9816616773605347,
      "learning_rate": 0.00018341311295813566,
      "loss": 2.9973,
      "step": 144474
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.033817768096924,
      "learning_rate": 0.00018340934393777994,
      "loss": 2.9365,
      "step": 144475
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.310661554336548,
      "learning_rate": 0.00018340557493910065,
      "loss": 2.914,
      "step": 144476
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.0149946212768555,
      "learning_rate": 0.0001834018059620986,
      "loss": 3.0399,
      "step": 144477
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.8564075231552124,
      "learning_rate": 0.0001833980370067742,
      "loss": 2.9624,
      "step": 144478
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.31074595451355,
      "learning_rate": 0.00018339426807312837,
      "loss": 3.01,
      "step": 144479
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.600984811782837,
      "learning_rate": 0.00018339049916116164,
      "loss": 2.9898,
      "step": 144480
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.9093403816223145,
      "learning_rate": 0.00018338673027087492,
      "loss": 3.0704,
      "step": 144481
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2114038467407227,
      "learning_rate": 0.00018338296140226875,
      "loss": 2.862,
      "step": 144482
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.935549020767212,
      "learning_rate": 0.00018337919255534405,
      "loss": 2.9184,
      "step": 144483
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3286311626434326,
      "learning_rate": 0.0001833754237301012,
      "loss": 2.9117,
      "step": 144484
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.65576171875,
      "learning_rate": 0.00018337165492654105,
      "loss": 2.9753,
      "step": 144485
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.7864327430725098,
      "learning_rate": 0.00018336788614466433,
      "loss": 2.7583,
      "step": 144486
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.739774227142334,
      "learning_rate": 0.00018336411738447173,
      "loss": 2.7688,
      "step": 144487
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.215068817138672,
      "learning_rate": 0.00018336034864596394,
      "loss": 2.9717,
      "step": 144488
    },
    {
      "epoch": 1.88,
      "grad_norm": 5.1750407218933105,
      "learning_rate": 0.00018335657992914176,
      "loss": 2.9503,
      "step": 144489
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.449719429016113,
      "learning_rate": 0.0001833528112340057,
      "loss": 2.828,
      "step": 144490
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.727891445159912,
      "learning_rate": 0.00018334904256055656,
      "loss": 2.9692,
      "step": 144491
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1826939582824707,
      "learning_rate": 0.000183345273908795,
      "loss": 3.2276,
      "step": 144492
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.134395122528076,
      "learning_rate": 0.00018334150527872178,
      "loss": 2.9732,
      "step": 144493
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.27132248878479,
      "learning_rate": 0.00018333773667033755,
      "loss": 3.0124,
      "step": 144494
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.2951555252075195,
      "learning_rate": 0.00018333396808364318,
      "loss": 2.9215,
      "step": 144495
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0106871128082275,
      "learning_rate": 0.00018333019951863913,
      "loss": 2.9052,
      "step": 144496
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1804347038269043,
      "learning_rate": 0.00018332643097532619,
      "loss": 3.0416,
      "step": 144497
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.274772882461548,
      "learning_rate": 0.00018332266245370501,
      "loss": 3.1357,
      "step": 144498
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3765676021575928,
      "learning_rate": 0.00018331889395377642,
      "loss": 2.8678,
      "step": 144499
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.586500883102417,
      "learning_rate": 0.00018331512547554104,
      "loss": 2.8658,
      "step": 144500
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4994354248046875,
      "learning_rate": 0.00018331135701899967,
      "loss": 2.9215,
      "step": 144501
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5704638957977295,
      "learning_rate": 0.00018330758858415284,
      "loss": 2.8744,
      "step": 144502
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1386497020721436,
      "learning_rate": 0.00018330382017100127,
      "loss": 2.9763,
      "step": 144503
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.407933473587036,
      "learning_rate": 0.00018330005177954578,
      "loss": 3.0943,
      "step": 144504
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.8247694969177246,
      "learning_rate": 0.000183296283409787,
      "loss": 3.0395,
      "step": 144505
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6646668910980225,
      "learning_rate": 0.00018329251506172563,
      "loss": 3.203,
      "step": 144506
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0390889644622803,
      "learning_rate": 0.00018328874673536244,
      "loss": 3.0056,
      "step": 144507
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.566316604614258,
      "learning_rate": 0.0001832849784306981,
      "loss": 2.9465,
      "step": 144508
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1509506702423096,
      "learning_rate": 0.0001832812101477332,
      "loss": 3.1867,
      "step": 144509
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6202967166900635,
      "learning_rate": 0.00018327744188646858,
      "loss": 3.0949,
      "step": 144510
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.430682897567749,
      "learning_rate": 0.00018327367364690482,
      "loss": 2.8841,
      "step": 144511
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3981096744537354,
      "learning_rate": 0.0001832699054290427,
      "loss": 3.0558,
      "step": 144512
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0944998264312744,
      "learning_rate": 0.000183266137232883,
      "loss": 3.1435,
      "step": 144513
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3336806297302246,
      "learning_rate": 0.00018326236905842624,
      "loss": 3.1662,
      "step": 144514
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0271334648132324,
      "learning_rate": 0.00018325860090567328,
      "loss": 2.8873,
      "step": 144515
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8170056343078613,
      "learning_rate": 0.0001832548327746247,
      "loss": 2.7351,
      "step": 144516
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3863415718078613,
      "learning_rate": 0.00018325106466528127,
      "loss": 2.9657,
      "step": 144517
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5492758750915527,
      "learning_rate": 0.00018324729657764362,
      "loss": 2.9447,
      "step": 144518
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.3038833141326904,
      "learning_rate": 0.00018324352851171256,
      "loss": 2.8508,
      "step": 144519
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2356834411621094,
      "learning_rate": 0.0001832397604674887,
      "loss": 2.9686,
      "step": 144520
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.613752841949463,
      "learning_rate": 0.00018323599244497276,
      "loss": 2.9189,
      "step": 144521
    },
    {
      "epoch": 1.88,
      "grad_norm": 9.20887279510498,
      "learning_rate": 0.00018323222444416553,
      "loss": 2.6212,
      "step": 144522
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.236178159713745,
      "learning_rate": 0.00018322845646506758,
      "loss": 3.0083,
      "step": 144523
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.330310106277466,
      "learning_rate": 0.0001832246885076796,
      "loss": 3.0156,
      "step": 144524
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.750379800796509,
      "learning_rate": 0.0001832209205720025,
      "loss": 2.8887,
      "step": 144525
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.762610673904419,
      "learning_rate": 0.00018321715265803672,
      "loss": 2.7533,
      "step": 144526
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.925628185272217,
      "learning_rate": 0.00018321338476578311,
      "loss": 2.9253,
      "step": 144527
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.464338541030884,
      "learning_rate": 0.00018320961689524234,
      "loss": 3.2106,
      "step": 144528
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2467639446258545,
      "learning_rate": 0.00018320584904641508,
      "loss": 2.8827,
      "step": 144529
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1686811447143555,
      "learning_rate": 0.00018320208121930208,
      "loss": 3.1705,
      "step": 144530
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2370553016662598,
      "learning_rate": 0.00018319831341390402,
      "loss": 2.9287,
      "step": 144531
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.297024726867676,
      "learning_rate": 0.00018319454563022166,
      "loss": 2.6921,
      "step": 144532
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.114322662353516,
      "learning_rate": 0.00018319077786825553,
      "loss": 3.157,
      "step": 144533
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.384321689605713,
      "learning_rate": 0.00018318701012800647,
      "loss": 3.0816,
      "step": 144534
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6422507762908936,
      "learning_rate": 0.00018318324240947518,
      "loss": 2.8895,
      "step": 144535
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.786865234375,
      "learning_rate": 0.0001831794747126623,
      "loss": 3.1362,
      "step": 144536
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.097578763961792,
      "learning_rate": 0.0001831757070375686,
      "loss": 3.1121,
      "step": 144537
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.3650126457214355,
      "learning_rate": 0.00018317193938419477,
      "loss": 2.7666,
      "step": 144538
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.640937566757202,
      "learning_rate": 0.0001831681717525414,
      "loss": 3.0209,
      "step": 144539
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.201035499572754,
      "learning_rate": 0.00018316440414260925,
      "loss": 2.7181,
      "step": 144540
    },
    {
      "epoch": 1.88,
      "grad_norm": 5.39193868637085,
      "learning_rate": 0.00018316063655439912,
      "loss": 3.0786,
      "step": 144541
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.159201622009277,
      "learning_rate": 0.0001831568689879116,
      "loss": 2.8491,
      "step": 144542
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5966885089874268,
      "learning_rate": 0.0001831531014431474,
      "loss": 2.9252,
      "step": 144543
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8171982765197754,
      "learning_rate": 0.00018314933392010735,
      "loss": 3.1242,
      "step": 144544
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.478424310684204,
      "learning_rate": 0.00018314556641879198,
      "loss": 3.1353,
      "step": 144545
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.140751838684082,
      "learning_rate": 0.00018314179893920204,
      "loss": 2.947,
      "step": 144546
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.5806944370269775,
      "learning_rate": 0.00018313803148133821,
      "loss": 2.8952,
      "step": 144547
    },
    {
      "epoch": 1.88,
      "grad_norm": 6.176846027374268,
      "learning_rate": 0.00018313426404520126,
      "loss": 3.023,
      "step": 144548
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.080025672912598,
      "learning_rate": 0.00018313049663079182,
      "loss": 2.8065,
      "step": 144549
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.9125521183013916,
      "learning_rate": 0.0001831267292381108,
      "loss": 2.8372,
      "step": 144550
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.632437229156494,
      "learning_rate": 0.0001831229618671586,
      "loss": 2.8229,
      "step": 144551
    },
    {
      "epoch": 1.88,
      "grad_norm": 5.418876647949219,
      "learning_rate": 0.00018311919451793603,
      "loss": 2.9822,
      "step": 144552
    },
    {
      "epoch": 1.88,
      "grad_norm": 6.060988903045654,
      "learning_rate": 0.0001831154271904438,
      "loss": 3.1234,
      "step": 144553
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.760153293609619,
      "learning_rate": 0.0001831116598846826,
      "loss": 2.791,
      "step": 144554
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2691879272460938,
      "learning_rate": 0.00018310789260065317,
      "loss": 3.2261,
      "step": 144555
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.041978597640991,
      "learning_rate": 0.00018310412533835633,
      "loss": 3.0601,
      "step": 144556
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.7345945835113525,
      "learning_rate": 0.0001831003580977925,
      "loss": 2.815,
      "step": 144557
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.169689178466797,
      "learning_rate": 0.0001830965908789625,
      "loss": 2.9558,
      "step": 144558
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.725973606109619,
      "learning_rate": 0.00018309282368186706,
      "loss": 2.9746,
      "step": 144559
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9895167350769043,
      "learning_rate": 0.0001830890565065069,
      "loss": 2.86,
      "step": 144560
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3128302097320557,
      "learning_rate": 0.0001830852893528827,
      "loss": 3.0398,
      "step": 144561
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0145809650421143,
      "learning_rate": 0.0001830815222209952,
      "loss": 2.8634,
      "step": 144562
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.901637554168701,
      "learning_rate": 0.000183077755110845,
      "loss": 2.8148,
      "step": 144563
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5759592056274414,
      "learning_rate": 0.00018307398802243282,
      "loss": 3.2082,
      "step": 144564
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2600719928741455,
      "learning_rate": 0.00018307022095575943,
      "loss": 2.9614,
      "step": 144565
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5734686851501465,
      "learning_rate": 0.00018306645391082544,
      "loss": 2.9512,
      "step": 144566
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9531888961791992,
      "learning_rate": 0.00018306268688763166,
      "loss": 3.0578,
      "step": 144567
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3262720108032227,
      "learning_rate": 0.00018305891988617883,
      "loss": 2.727,
      "step": 144568
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4442548751831055,
      "learning_rate": 0.00018305515290646738,
      "loss": 2.9056,
      "step": 144569
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.074244737625122,
      "learning_rate": 0.00018305138594849825,
      "loss": 3.1375,
      "step": 144570
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6686503887176514,
      "learning_rate": 0.00018304761901227206,
      "loss": 3.059,
      "step": 144571
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.606428861618042,
      "learning_rate": 0.00018304385209778952,
      "loss": 3.0748,
      "step": 144572
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.569880485534668,
      "learning_rate": 0.00018304008520505136,
      "loss": 2.7983,
      "step": 144573
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4085581302642822,
      "learning_rate": 0.00018303631833405833,
      "loss": 2.9714,
      "step": 144574
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.249282121658325,
      "learning_rate": 0.00018303255148481096,
      "loss": 2.9619,
      "step": 144575
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.364635467529297,
      "learning_rate": 0.00018302878465731006,
      "loss": 3.0962,
      "step": 144576
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.098283290863037,
      "learning_rate": 0.00018302501785155627,
      "loss": 2.9577,
      "step": 144577
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.180492401123047,
      "learning_rate": 0.0001830212510675504,
      "loss": 3.1025,
      "step": 144578
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.9415268898010254,
      "learning_rate": 0.00018301748430529305,
      "loss": 2.8696,
      "step": 144579
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3968420028686523,
      "learning_rate": 0.0001830137175647851,
      "loss": 2.9363,
      "step": 144580
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.935739517211914,
      "learning_rate": 0.00018300995084602698,
      "loss": 2.9256,
      "step": 144581
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9510433673858643,
      "learning_rate": 0.00018300618414901948,
      "loss": 3.1242,
      "step": 144582
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.7149341106414795,
      "learning_rate": 0.00018300241747376338,
      "loss": 3.2056,
      "step": 144583
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.427061080932617,
      "learning_rate": 0.00018299865082025936,
      "loss": 2.8489,
      "step": 144584
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.112842559814453,
      "learning_rate": 0.00018299488418850804,
      "loss": 3.0201,
      "step": 144585
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.223029136657715,
      "learning_rate": 0.00018299111757851035,
      "loss": 3.0292,
      "step": 144586
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0812904834747314,
      "learning_rate": 0.00018298735099026665,
      "loss": 3.0369,
      "step": 144587
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2518527507781982,
      "learning_rate": 0.00018298358442377788,
      "loss": 2.9874,
      "step": 144588
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.307739496231079,
      "learning_rate": 0.00018297981787904463,
      "loss": 3.0748,
      "step": 144589
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3910484313964844,
      "learning_rate": 0.00018297605135606762,
      "loss": 2.8291,
      "step": 144590
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1454110145568848,
      "learning_rate": 0.0001829722848548476,
      "loss": 3.0984,
      "step": 144591
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.2184674739837646,
      "learning_rate": 0.00018296851837538536,
      "loss": 2.9462,
      "step": 144592
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4795565605163574,
      "learning_rate": 0.00018296475191768136,
      "loss": 2.942,
      "step": 144593
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.394244909286499,
      "learning_rate": 0.0001829609854817364,
      "loss": 2.807,
      "step": 144594
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3899037837982178,
      "learning_rate": 0.00018295721906755124,
      "loss": 2.8301,
      "step": 144595
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0607311725616455,
      "learning_rate": 0.0001829534526751265,
      "loss": 2.5583,
      "step": 144596
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2177608013153076,
      "learning_rate": 0.00018294968630446293,
      "loss": 2.8268,
      "step": 144597
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1644506454467773,
      "learning_rate": 0.00018294591995556125,
      "loss": 3.0579,
      "step": 144598
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2920374870300293,
      "learning_rate": 0.0001829421536284222,
      "loss": 2.9035,
      "step": 144599
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5976979732513428,
      "learning_rate": 0.0001829383873230464,
      "loss": 2.8401,
      "step": 144600
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2490882873535156,
      "learning_rate": 0.00018293462103943447,
      "loss": 3.1452,
      "step": 144601
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0856094360351562,
      "learning_rate": 0.00018293085477758724,
      "loss": 3.0572,
      "step": 144602
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8822989463806152,
      "learning_rate": 0.00018292708853750536,
      "loss": 3.1635,
      "step": 144603
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0075583457946777,
      "learning_rate": 0.00018292332231918955,
      "loss": 3.071,
      "step": 144604
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6756415367126465,
      "learning_rate": 0.00018291955612264056,
      "loss": 3.1121,
      "step": 144605
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.263916492462158,
      "learning_rate": 0.000182915789947859,
      "loss": 2.9158,
      "step": 144606
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.8893418312072754,
      "learning_rate": 0.0001829120237948456,
      "loss": 3.2171,
      "step": 144607
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8470373153686523,
      "learning_rate": 0.00018290825766360106,
      "loss": 2.8256,
      "step": 144608
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.286618947982788,
      "learning_rate": 0.00018290449155412608,
      "loss": 2.9052,
      "step": 144609
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.534467935562134,
      "learning_rate": 0.00018290072546642137,
      "loss": 2.782,
      "step": 144610
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.013226270675659,
      "learning_rate": 0.00018289695940048765,
      "loss": 3.0216,
      "step": 144611
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2793989181518555,
      "learning_rate": 0.0001828931933563256,
      "loss": 3.004,
      "step": 144612
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.986680030822754,
      "learning_rate": 0.00018288942733393586,
      "loss": 2.9082,
      "step": 144613
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.745887517929077,
      "learning_rate": 0.00018288566133331922,
      "loss": 2.7925,
      "step": 144614
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0099658966064453,
      "learning_rate": 0.00018288189535447635,
      "loss": 3.0196,
      "step": 144615
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5579497814178467,
      "learning_rate": 0.00018287812939740794,
      "loss": 2.8415,
      "step": 144616
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.7659988403320312,
      "learning_rate": 0.00018287436346211473,
      "loss": 2.8333,
      "step": 144617
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2987380027770996,
      "learning_rate": 0.00018287059754859734,
      "loss": 3.0069,
      "step": 144618
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.380204677581787,
      "learning_rate": 0.0001828668316568565,
      "loss": 3.1442,
      "step": 144619
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.362020254135132,
      "learning_rate": 0.00018286306578689296,
      "loss": 2.9556,
      "step": 144620
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.437148332595825,
      "learning_rate": 0.00018285929993870737,
      "loss": 2.7678,
      "step": 144621
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.28045654296875,
      "learning_rate": 0.00018285553411230053,
      "loss": 3.0872,
      "step": 144622
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0079824924468994,
      "learning_rate": 0.00018285176830767298,
      "loss": 3.0044,
      "step": 144623
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.7690670490264893,
      "learning_rate": 0.0001828480025248255,
      "loss": 2.6822,
      "step": 144624
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.091506004333496,
      "learning_rate": 0.00018284423676375879,
      "loss": 3.2407,
      "step": 144625
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6492879390716553,
      "learning_rate": 0.00018284047102447353,
      "loss": 2.8784,
      "step": 144626
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1264564990997314,
      "learning_rate": 0.00018283670530697043,
      "loss": 2.7693,
      "step": 144627
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.986423373222351,
      "learning_rate": 0.00018283293961125026,
      "loss": 2.7395,
      "step": 144628
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.7885894775390625,
      "learning_rate": 0.0001828291739373137,
      "loss": 2.9207,
      "step": 144629
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5551235675811768,
      "learning_rate": 0.00018282540828516133,
      "loss": 3.0308,
      "step": 144630
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.263901948928833,
      "learning_rate": 0.00018282164265479389,
      "loss": 2.9137,
      "step": 144631
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2118303775787354,
      "learning_rate": 0.00018281787704621218,
      "loss": 2.9145,
      "step": 144632
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.8500275611877441,
      "learning_rate": 0.00018281411145941678,
      "loss": 3.1132,
      "step": 144633
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.556861162185669,
      "learning_rate": 0.00018281034589440849,
      "loss": 3.0279,
      "step": 144634
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2506511211395264,
      "learning_rate": 0.00018280658035118804,
      "loss": 3.0561,
      "step": 144635
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6123600006103516,
      "learning_rate": 0.00018280281482975597,
      "loss": 2.6501,
      "step": 144636
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4850964546203613,
      "learning_rate": 0.00018279904933011308,
      "loss": 2.7181,
      "step": 144637
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.12477707862854,
      "learning_rate": 0.00018279528385226006,
      "loss": 2.7104,
      "step": 144638
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8159642219543457,
      "learning_rate": 0.00018279151839619762,
      "loss": 2.9887,
      "step": 144639
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.283579111099243,
      "learning_rate": 0.00018278775296192643,
      "loss": 3.0318,
      "step": 144640
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.299041748046875,
      "learning_rate": 0.0001827839875494473,
      "loss": 2.8731,
      "step": 144641
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.392350912094116,
      "learning_rate": 0.00018278022215876077,
      "loss": 2.8238,
      "step": 144642
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.950913429260254,
      "learning_rate": 0.00018277645678986757,
      "loss": 3.1077,
      "step": 144643
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.8440849781036377,
      "learning_rate": 0.00018277269144276846,
      "loss": 3.1007,
      "step": 144644
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6135361194610596,
      "learning_rate": 0.0001827689261174641,
      "loss": 3.244,
      "step": 144645
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1956417560577393,
      "learning_rate": 0.0001827651608139552,
      "loss": 2.9092,
      "step": 144646
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.423943281173706,
      "learning_rate": 0.00018276139553224268,
      "loss": 2.795,
      "step": 144647
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9471036195755005,
      "learning_rate": 0.00018275763027232681,
      "loss": 3.0067,
      "step": 144648
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3001091480255127,
      "learning_rate": 0.00018275386503420856,
      "loss": 3.0206,
      "step": 144649
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9982208013534546,
      "learning_rate": 0.00018275009981788852,
      "loss": 2.828,
      "step": 144650
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1375725269317627,
      "learning_rate": 0.00018274633462336752,
      "loss": 3.0041,
      "step": 144651
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1246094703674316,
      "learning_rate": 0.00018274256945064615,
      "loss": 3.0686,
      "step": 144652
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3082685470581055,
      "learning_rate": 0.00018273880429972532,
      "loss": 2.7714,
      "step": 144653
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4584133625030518,
      "learning_rate": 0.00018273503917060538,
      "loss": 2.8815,
      "step": 144654
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.9064536094665527,
      "learning_rate": 0.00018273127406328725,
      "loss": 3.0664,
      "step": 144655
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.420677900314331,
      "learning_rate": 0.00018272750897777156,
      "loss": 2.9809,
      "step": 144656
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2946460247039795,
      "learning_rate": 0.00018272374391405905,
      "loss": 2.7186,
      "step": 144657
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5914053916931152,
      "learning_rate": 0.0001827199788721504,
      "loss": 2.8553,
      "step": 144658
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.020324945449829,
      "learning_rate": 0.00018271621385204645,
      "loss": 2.9187,
      "step": 144659
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.131814479827881,
      "learning_rate": 0.00018271244885374765,
      "loss": 2.9499,
      "step": 144660
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3005483150482178,
      "learning_rate": 0.00018270868387725483,
      "loss": 3.0563,
      "step": 144661
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0126636028289795,
      "learning_rate": 0.00018270491892256863,
      "loss": 3.1261,
      "step": 144662
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.597036600112915,
      "learning_rate": 0.00018270115398968985,
      "loss": 2.9022,
      "step": 144663
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.308009386062622,
      "learning_rate": 0.00018269738907861915,
      "loss": 3.061,
      "step": 144664
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1986114978790283,
      "learning_rate": 0.0001826936241893572,
      "loss": 3.0206,
      "step": 144665
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2030880451202393,
      "learning_rate": 0.00018268985932190485,
      "loss": 2.7989,
      "step": 144666
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8532230854034424,
      "learning_rate": 0.00018268609447626253,
      "loss": 2.7963,
      "step": 144667
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.0827813148498535,
      "learning_rate": 0.00018268232965243107,
      "loss": 2.8855,
      "step": 144668
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6300394535064697,
      "learning_rate": 0.0001826785648504112,
      "loss": 2.902,
      "step": 144669
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0257530212402344,
      "learning_rate": 0.00018267480007020363,
      "loss": 3.2658,
      "step": 144670
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.3398537635803223,
      "learning_rate": 0.00018267103531180898,
      "loss": 3.1328,
      "step": 144671
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.559458017349243,
      "learning_rate": 0.00018266727057522812,
      "loss": 3.1149,
      "step": 144672
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3709371089935303,
      "learning_rate": 0.00018266350586046152,
      "loss": 3.0101,
      "step": 144673
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6012632846832275,
      "learning_rate": 0.00018265974116751002,
      "loss": 2.8772,
      "step": 144674
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.39459228515625,
      "learning_rate": 0.00018265597649637426,
      "loss": 3.0692,
      "step": 144675
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.7426910400390625,
      "learning_rate": 0.00018265221184705495,
      "loss": 2.8448,
      "step": 144676
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.194671392440796,
      "learning_rate": 0.00018264844721955283,
      "loss": 2.804,
      "step": 144677
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0574612617492676,
      "learning_rate": 0.0001826446826138687,
      "loss": 3.0191,
      "step": 144678
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.38914155960083,
      "learning_rate": 0.00018264091803000304,
      "loss": 2.9865,
      "step": 144679
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.327471971511841,
      "learning_rate": 0.0001826371534679566,
      "loss": 2.8838,
      "step": 144680
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8270015716552734,
      "learning_rate": 0.00018263338892773016,
      "loss": 2.8189,
      "step": 144681
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2841906547546387,
      "learning_rate": 0.0001826296244093244,
      "loss": 3.0212,
      "step": 144682
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.128384590148926,
      "learning_rate": 0.00018262585991273998,
      "loss": 2.9079,
      "step": 144683
    },
    {
      "epoch": 1.88,
      "grad_norm": 6.492472171783447,
      "learning_rate": 0.0001826220954379778,
      "loss": 2.9579,
      "step": 144684
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8905701637268066,
      "learning_rate": 0.00018261833098503818,
      "loss": 3.0171,
      "step": 144685
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.448320150375366,
      "learning_rate": 0.0001826145665539221,
      "loss": 2.8184,
      "step": 144686
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.440439224243164,
      "learning_rate": 0.00018261080214463017,
      "loss": 3.1216,
      "step": 144687
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.841278553009033,
      "learning_rate": 0.0001826070377571631,
      "loss": 2.884,
      "step": 144688
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.9940683841705322,
      "learning_rate": 0.00018260327339152157,
      "loss": 3.0691,
      "step": 144689
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.230527639389038,
      "learning_rate": 0.00018259950904770642,
      "loss": 2.8225,
      "step": 144690
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.089545488357544,
      "learning_rate": 0.00018259574472571816,
      "loss": 2.9777,
      "step": 144691
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.752685785293579,
      "learning_rate": 0.0001825919804255576,
      "loss": 2.8461,
      "step": 144692
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.915698766708374,
      "learning_rate": 0.00018258821614722536,
      "loss": 3.0718,
      "step": 144693
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.650834560394287,
      "learning_rate": 0.0001825844518907222,
      "loss": 3.0915,
      "step": 144694
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.382805109024048,
      "learning_rate": 0.00018258068765604883,
      "loss": 3.0743,
      "step": 144695
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0173771381378174,
      "learning_rate": 0.00018257692344320593,
      "loss": 2.9583,
      "step": 144696
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0334997177124023,
      "learning_rate": 0.00018257315925219417,
      "loss": 3.1572,
      "step": 144697
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6505982875823975,
      "learning_rate": 0.00018256939508301428,
      "loss": 3.1552,
      "step": 144698
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.0641167163848877,
      "learning_rate": 0.000182565630935667,
      "loss": 3.0688,
      "step": 144699
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.6695899963378906,
      "learning_rate": 0.00018256186681015293,
      "loss": 2.7837,
      "step": 144700
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.164142608642578,
      "learning_rate": 0.00018255810270647284,
      "loss": 3.1147,
      "step": 144701
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.175715208053589,
      "learning_rate": 0.0001825543386246275,
      "loss": 3.168,
      "step": 144702
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.619016408920288,
      "learning_rate": 0.00018255057456461742,
      "loss": 2.9353,
      "step": 144703
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4086570739746094,
      "learning_rate": 0.0001825468105264434,
      "loss": 3.003,
      "step": 144704
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.35833740234375,
      "learning_rate": 0.00018254304651010614,
      "loss": 2.8733,
      "step": 144705
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.197251558303833,
      "learning_rate": 0.00018253928251560643,
      "loss": 2.9361,
      "step": 144706
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4991114139556885,
      "learning_rate": 0.0001825355185429448,
      "loss": 2.9203,
      "step": 144707
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.2272891998291016,
      "learning_rate": 0.00018253175459212212,
      "loss": 3.2737,
      "step": 144708
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5582902431488037,
      "learning_rate": 0.00018252799066313896,
      "loss": 2.8532,
      "step": 144709
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1600308418273926,
      "learning_rate": 0.00018252422675599608,
      "loss": 2.9229,
      "step": 144710
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1604912281036377,
      "learning_rate": 0.00018252046287069412,
      "loss": 2.908,
      "step": 144711
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.655555486679077,
      "learning_rate": 0.0001825166990072338,
      "loss": 3.0149,
      "step": 144712
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5384650230407715,
      "learning_rate": 0.00018251293516561597,
      "loss": 2.789,
      "step": 144713
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.139749050140381,
      "learning_rate": 0.00018250917134584113,
      "loss": 2.9221,
      "step": 144714
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.068990707397461,
      "learning_rate": 0.00018250540754791007,
      "loss": 3.2566,
      "step": 144715
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.190572738647461,
      "learning_rate": 0.0001825016437718234,
      "loss": 2.9826,
      "step": 144716
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.0053699016571045,
      "learning_rate": 0.00018249788001758192,
      "loss": 2.9908,
      "step": 144717
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4445853233337402,
      "learning_rate": 0.00018249411628518632,
      "loss": 2.9651,
      "step": 144718
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.249011278152466,
      "learning_rate": 0.00018249035257463726,
      "loss": 2.9765,
      "step": 144719
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.312248468399048,
      "learning_rate": 0.00018248658888593558,
      "loss": 3.0173,
      "step": 144720
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.063775539398193,
      "learning_rate": 0.00018248282521908174,
      "loss": 2.9074,
      "step": 144721
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3980939388275146,
      "learning_rate": 0.0001824790615740766,
      "loss": 2.7136,
      "step": 144722
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.0982420444488525,
      "learning_rate": 0.00018247529795092078,
      "loss": 2.7203,
      "step": 144723
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.174410343170166,
      "learning_rate": 0.00018247153434961503,
      "loss": 3.2223,
      "step": 144724
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.554267168045044,
      "learning_rate": 0.00018246777077016006,
      "loss": 2.925,
      "step": 144725
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.253657341003418,
      "learning_rate": 0.00018246400721255666,
      "loss": 3.0721,
      "step": 144726
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.8245422840118408,
      "learning_rate": 0.00018246024367680528,
      "loss": 3.0096,
      "step": 144727
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3822765350341797,
      "learning_rate": 0.00018245648016290678,
      "loss": 2.9119,
      "step": 144728
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6612377166748047,
      "learning_rate": 0.0001824527166708618,
      "loss": 3.0693,
      "step": 144729
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.1804094314575195,
      "learning_rate": 0.00018244895320067113,
      "loss": 3.0181,
      "step": 144730
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4072108268737793,
      "learning_rate": 0.00018244518975233542,
      "loss": 2.9092,
      "step": 144731
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5745620727539062,
      "learning_rate": 0.00018244142632585533,
      "loss": 2.964,
      "step": 144732
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.677985906600952,
      "learning_rate": 0.00018243766292123177,
      "loss": 3.0545,
      "step": 144733
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.7922866344451904,
      "learning_rate": 0.00018243389953846508,
      "loss": 2.8448,
      "step": 144734
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.2334351539611816,
      "learning_rate": 0.00018243013617755618,
      "loss": 2.9601,
      "step": 144735
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5037291049957275,
      "learning_rate": 0.00018242637283850575,
      "loss": 2.9913,
      "step": 144736
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.346410274505615,
      "learning_rate": 0.00018242260952131445,
      "loss": 2.7249,
      "step": 144737
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.260463237762451,
      "learning_rate": 0.000182418846225983,
      "loss": 2.8587,
      "step": 144738
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1785776615142822,
      "learning_rate": 0.0001824150829525123,
      "loss": 2.7792,
      "step": 144739
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0208683013916016,
      "learning_rate": 0.00018241131970090266,
      "loss": 2.893,
      "step": 144740
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.6363656520843506,
      "learning_rate": 0.000182407556471155,
      "loss": 3.0178,
      "step": 144741
    },
    {
      "epoch": 1.88,
      "grad_norm": 4.734258651733398,
      "learning_rate": 0.00018240379326327,
      "loss": 2.6402,
      "step": 144742
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.081256866455078,
      "learning_rate": 0.00018240003007724838,
      "loss": 3.0727,
      "step": 144743
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.8500685691833496,
      "learning_rate": 0.00018239626691309076,
      "loss": 3.1593,
      "step": 144744
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.741856336593628,
      "learning_rate": 0.00018239250377079802,
      "loss": 2.7233,
      "step": 144745
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4705445766448975,
      "learning_rate": 0.00018238874065037064,
      "loss": 3.0244,
      "step": 144746
    },
    {
      "epoch": 1.88,
      "grad_norm": 1.963124394416809,
      "learning_rate": 0.00018238497755180942,
      "loss": 2.7324,
      "step": 144747
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.287627935409546,
      "learning_rate": 0.000182381214475115,
      "loss": 2.8892,
      "step": 144748
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0415186882019043,
      "learning_rate": 0.00018237745142028817,
      "loss": 3.06,
      "step": 144749
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.5040481090545654,
      "learning_rate": 0.0001823736883873296,
      "loss": 2.8078,
      "step": 144750
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.3413665294647217,
      "learning_rate": 0.00018236992537624011,
      "loss": 2.9231,
      "step": 144751
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.8896608352661133,
      "learning_rate": 0.0001823661623870201,
      "loss": 3.0427,
      "step": 144752
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.025498390197754,
      "learning_rate": 0.0001823623994196705,
      "loss": 2.8993,
      "step": 144753
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.1201648712158203,
      "learning_rate": 0.00018235863647419193,
      "loss": 3.005,
      "step": 144754
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0760657787323,
      "learning_rate": 0.0001823548735505851,
      "loss": 2.904,
      "step": 144755
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.8717610836029053,
      "learning_rate": 0.0001823511106488507,
      "loss": 2.8524,
      "step": 144756
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.226421594619751,
      "learning_rate": 0.0001823473477689896,
      "loss": 2.8856,
      "step": 144757
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.7405741214752197,
      "learning_rate": 0.00018234358491100224,
      "loss": 2.8758,
      "step": 144758
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.4067091941833496,
      "learning_rate": 0.00018233982207488942,
      "loss": 3.0517,
      "step": 144759
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.897705078125,
      "learning_rate": 0.00018233605926065184,
      "loss": 3.0546,
      "step": 144760
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.672750949859619,
      "learning_rate": 0.00018233229646829019,
      "loss": 2.9327,
      "step": 144761
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.875551700592041,
      "learning_rate": 0.00018232853369780524,
      "loss": 3.1812,
      "step": 144762
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.125992774963379,
      "learning_rate": 0.00018232477094919767,
      "loss": 3.1423,
      "step": 144763
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.897430896759033,
      "learning_rate": 0.0001823210082224681,
      "loss": 2.8182,
      "step": 144764
    },
    {
      "epoch": 1.88,
      "grad_norm": 3.2195522785186768,
      "learning_rate": 0.00018231724551761723,
      "loss": 3.0457,
      "step": 144765
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.250192880630493,
      "learning_rate": 0.00018231348283464583,
      "loss": 2.8138,
      "step": 144766
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.0860891342163086,
      "learning_rate": 0.0001823097201735546,
      "loss": 3.0098,
      "step": 144767
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.362548351287842,
      "learning_rate": 0.00018230595753434417,
      "loss": 3.2321,
      "step": 144768
    },
    {
      "epoch": 1.88,
      "grad_norm": 2.161215305328369,
      "learning_rate": 0.00018230219491701543,
      "loss": 3.0594,
      "step": 144769
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.066964864730835,
      "learning_rate": 0.00018229843232156883,
      "loss": 2.8298,
      "step": 144770
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.345696210861206,
      "learning_rate": 0.0001822946697480051,
      "loss": 3.0497,
      "step": 144771
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5575478076934814,
      "learning_rate": 0.00018229090719632508,
      "loss": 2.9387,
      "step": 144772
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.280319929122925,
      "learning_rate": 0.0001822871446665294,
      "loss": 2.8208,
      "step": 144773
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3353111743927,
      "learning_rate": 0.00018228338215861873,
      "loss": 3.1718,
      "step": 144774
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1925153732299805,
      "learning_rate": 0.0001822796196725939,
      "loss": 2.84,
      "step": 144775
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.60461163520813,
      "learning_rate": 0.00018227585720845548,
      "loss": 2.6122,
      "step": 144776
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.538069009780884,
      "learning_rate": 0.0001822720947662041,
      "loss": 2.7755,
      "step": 144777
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.110900402069092,
      "learning_rate": 0.00018226833234584065,
      "loss": 2.7438,
      "step": 144778
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6546051502227783,
      "learning_rate": 0.00018226456994736569,
      "loss": 2.8252,
      "step": 144779
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.155092477798462,
      "learning_rate": 0.00018226080757078,
      "loss": 2.8501,
      "step": 144780
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.8278875350952148,
      "learning_rate": 0.00018225704521608428,
      "loss": 2.873,
      "step": 144781
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.29190731048584,
      "learning_rate": 0.00018225328288327912,
      "loss": 3.1541,
      "step": 144782
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5715432167053223,
      "learning_rate": 0.00018224952057236535,
      "loss": 2.82,
      "step": 144783
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.455528974533081,
      "learning_rate": 0.00018224575828334366,
      "loss": 2.8325,
      "step": 144784
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.457850694656372,
      "learning_rate": 0.0001822419960162146,
      "loss": 2.9017,
      "step": 144785
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6882545948028564,
      "learning_rate": 0.00018223823377097901,
      "loss": 3.212,
      "step": 144786
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2505359649658203,
      "learning_rate": 0.00018223447154763764,
      "loss": 2.8912,
      "step": 144787
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1282100677490234,
      "learning_rate": 0.00018223070934619102,
      "loss": 3.034,
      "step": 144788
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0603959560394287,
      "learning_rate": 0.00018222694716663992,
      "loss": 3.0659,
      "step": 144789
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2517364025115967,
      "learning_rate": 0.00018222318500898504,
      "loss": 2.8468,
      "step": 144790
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4362332820892334,
      "learning_rate": 0.0001822194228732272,
      "loss": 2.8402,
      "step": 144791
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.560915470123291,
      "learning_rate": 0.00018221566075936693,
      "loss": 2.9247,
      "step": 144792
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.287625312805176,
      "learning_rate": 0.00018221189866740503,
      "loss": 3.1271,
      "step": 144793
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2810864448547363,
      "learning_rate": 0.00018220813659734214,
      "loss": 2.8265,
      "step": 144794
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.364037036895752,
      "learning_rate": 0.0001822043745491789,
      "loss": 2.7401,
      "step": 144795
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.4426615238189697,
      "learning_rate": 0.00018220061252291616,
      "loss": 3.1383,
      "step": 144796
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1381893157958984,
      "learning_rate": 0.00018219685051855454,
      "loss": 3.0004,
      "step": 144797
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.553736448287964,
      "learning_rate": 0.00018219308853609478,
      "loss": 2.9474,
      "step": 144798
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.794156312942505,
      "learning_rate": 0.0001821893265755375,
      "loss": 2.9145,
      "step": 144799
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1422252655029297,
      "learning_rate": 0.00018218556463688352,
      "loss": 3.0614,
      "step": 144800
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.632028341293335,
      "learning_rate": 0.0001821818027201334,
      "loss": 3.1082,
      "step": 144801
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.95695960521698,
      "learning_rate": 0.0001821780408252879,
      "loss": 3.3748,
      "step": 144802
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2343099117279053,
      "learning_rate": 0.00018217427895234775,
      "loss": 2.8494,
      "step": 144803
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0105299949645996,
      "learning_rate": 0.00018217051710131362,
      "loss": 3.0371,
      "step": 144804
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.8154938220977783,
      "learning_rate": 0.00018216675527218623,
      "loss": 2.7901,
      "step": 144805
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9576444625854492,
      "learning_rate": 0.0001821629934649663,
      "loss": 2.8173,
      "step": 144806
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.188779592514038,
      "learning_rate": 0.00018215923167965443,
      "loss": 2.9369,
      "step": 144807
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.880446195602417,
      "learning_rate": 0.00018215546991625137,
      "loss": 2.83,
      "step": 144808
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.2005999088287354,
      "learning_rate": 0.00018215170817475787,
      "loss": 2.9866,
      "step": 144809
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.060810089111328,
      "learning_rate": 0.00018214794645517458,
      "loss": 2.9,
      "step": 144810
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3096303939819336,
      "learning_rate": 0.0001821441847575022,
      "loss": 3.0798,
      "step": 144811
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.341447114944458,
      "learning_rate": 0.00018214042308174156,
      "loss": 3.0367,
      "step": 144812
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6473886966705322,
      "learning_rate": 0.00018213666142789315,
      "loss": 3.153,
      "step": 144813
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.295377254486084,
      "learning_rate": 0.0001821328997959577,
      "loss": 3.2067,
      "step": 144814
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.027409791946411,
      "learning_rate": 0.000182129138185936,
      "loss": 2.8803,
      "step": 144815
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3561840057373047,
      "learning_rate": 0.00018212537659782874,
      "loss": 2.9429,
      "step": 144816
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.502485513687134,
      "learning_rate": 0.00018212161503163658,
      "loss": 2.9056,
      "step": 144817
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4515063762664795,
      "learning_rate": 0.00018211785348736035,
      "loss": 2.6888,
      "step": 144818
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.77179217338562,
      "learning_rate": 0.0001821140919650005,
      "loss": 3.0749,
      "step": 144819
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.551650285720825,
      "learning_rate": 0.0001821103304645579,
      "loss": 3.0053,
      "step": 144820
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5162994861602783,
      "learning_rate": 0.00018210656898603317,
      "loss": 2.7828,
      "step": 144821
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6616008281707764,
      "learning_rate": 0.00018210280752942713,
      "loss": 2.9884,
      "step": 144822
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.0515763759613037,
      "learning_rate": 0.00018209904609474032,
      "loss": 3.0279,
      "step": 144823
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.8473567962646484,
      "learning_rate": 0.0001820952846819737,
      "loss": 3.0798,
      "step": 144824
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.046637535095215,
      "learning_rate": 0.00018209152329112766,
      "loss": 2.8181,
      "step": 144825
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.485273838043213,
      "learning_rate": 0.00018208776192220303,
      "loss": 3.1536,
      "step": 144826
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.087169885635376,
      "learning_rate": 0.0001820840005752005,
      "loss": 2.8629,
      "step": 144827
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.59586501121521,
      "learning_rate": 0.00018208023925012076,
      "loss": 2.9545,
      "step": 144828
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.147111177444458,
      "learning_rate": 0.00018207647794696457,
      "loss": 2.87,
      "step": 144829
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4563896656036377,
      "learning_rate": 0.0001820727166657327,
      "loss": 2.9501,
      "step": 144830
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1274073123931885,
      "learning_rate": 0.0001820689554064256,
      "loss": 3.1364,
      "step": 144831
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.246094226837158,
      "learning_rate": 0.00018206519416904412,
      "loss": 2.7903,
      "step": 144832
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4327852725982666,
      "learning_rate": 0.00018206143295358896,
      "loss": 2.9578,
      "step": 144833
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5935940742492676,
      "learning_rate": 0.0001820576717600608,
      "loss": 2.7427,
      "step": 144834
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.715787410736084,
      "learning_rate": 0.00018205391058846035,
      "loss": 3.0046,
      "step": 144835
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0391221046447754,
      "learning_rate": 0.0001820501494387884,
      "loss": 2.9879,
      "step": 144836
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.736325263977051,
      "learning_rate": 0.00018204638831104545,
      "loss": 2.8475,
      "step": 144837
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3126330375671387,
      "learning_rate": 0.0001820426272052323,
      "loss": 2.7372,
      "step": 144838
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3829493522644043,
      "learning_rate": 0.00018203886612134964,
      "loss": 2.9317,
      "step": 144839
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1434895992279053,
      "learning_rate": 0.0001820351050593982,
      "loss": 2.878,
      "step": 144840
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.631354570388794,
      "learning_rate": 0.00018203134401937864,
      "loss": 2.9961,
      "step": 144841
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.7005176544189453,
      "learning_rate": 0.0001820275830012918,
      "loss": 2.9093,
      "step": 144842
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.664074659347534,
      "learning_rate": 0.00018202382200513815,
      "loss": 2.9954,
      "step": 144843
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.648510694503784,
      "learning_rate": 0.00018202006103091851,
      "loss": 3.0225,
      "step": 144844
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.8134543895721436,
      "learning_rate": 0.00018201630007863356,
      "loss": 2.5767,
      "step": 144845
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0251412391662598,
      "learning_rate": 0.00018201253914828402,
      "loss": 2.9895,
      "step": 144846
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3808138370513916,
      "learning_rate": 0.00018200877823987055,
      "loss": 2.9259,
      "step": 144847
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.010643482208252,
      "learning_rate": 0.000182005017353394,
      "loss": 3.0039,
      "step": 144848
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.513667583465576,
      "learning_rate": 0.0001820012564888548,
      "loss": 3.0696,
      "step": 144849
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0405683517456055,
      "learning_rate": 0.00018199749564625387,
      "loss": 3.0943,
      "step": 144850
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.891751766204834,
      "learning_rate": 0.0001819937348255918,
      "loss": 3.0385,
      "step": 144851
    },
    {
      "epoch": 1.89,
      "grad_norm": 4.963748455047607,
      "learning_rate": 0.00018198997402686927,
      "loss": 2.7059,
      "step": 144852
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.4106922149658203,
      "learning_rate": 0.00018198621325008708,
      "loss": 2.9723,
      "step": 144853
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6741278171539307,
      "learning_rate": 0.00018198245249524602,
      "loss": 2.7654,
      "step": 144854
    },
    {
      "epoch": 1.89,
      "grad_norm": 4.16860818862915,
      "learning_rate": 0.00018197869176234653,
      "loss": 2.8648,
      "step": 144855
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.54744291305542,
      "learning_rate": 0.00018197493105138936,
      "loss": 2.6841,
      "step": 144856
    },
    {
      "epoch": 1.89,
      "grad_norm": 4.0422563552856445,
      "learning_rate": 0.00018197117036237534,
      "loss": 2.8916,
      "step": 144857
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.7020084857940674,
      "learning_rate": 0.00018196740969530505,
      "loss": 2.9167,
      "step": 144858
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4525246620178223,
      "learning_rate": 0.0001819636490501793,
      "loss": 2.9901,
      "step": 144859
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.70468807220459,
      "learning_rate": 0.00018195988842699878,
      "loss": 2.8556,
      "step": 144860
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.728278875350952,
      "learning_rate": 0.00018195612782576415,
      "loss": 3.1531,
      "step": 144861
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.274381399154663,
      "learning_rate": 0.00018195236724647605,
      "loss": 3.0404,
      "step": 144862
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.81630802154541,
      "learning_rate": 0.0001819486066891352,
      "loss": 3.028,
      "step": 144863
    },
    {
      "epoch": 1.89,
      "grad_norm": 4.483477592468262,
      "learning_rate": 0.00018194484615374236,
      "loss": 2.8266,
      "step": 144864
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.940783739089966,
      "learning_rate": 0.00018194108564029818,
      "loss": 3.0207,
      "step": 144865
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.222104787826538,
      "learning_rate": 0.00018193732514880338,
      "loss": 2.8726,
      "step": 144866
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.029177665710449,
      "learning_rate": 0.00018193356467925876,
      "loss": 2.8691,
      "step": 144867
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.091336965560913,
      "learning_rate": 0.00018192980423166493,
      "loss": 3.2718,
      "step": 144868
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.860532522201538,
      "learning_rate": 0.00018192604380602245,
      "loss": 2.9482,
      "step": 144869
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.566817283630371,
      "learning_rate": 0.00018192228340233217,
      "loss": 2.8473,
      "step": 144870
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9365938901901245,
      "learning_rate": 0.0001819185230205948,
      "loss": 3.0271,
      "step": 144871
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5518388748168945,
      "learning_rate": 0.000181914762660811,
      "loss": 2.9284,
      "step": 144872
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.252749443054199,
      "learning_rate": 0.00018191100232298147,
      "loss": 2.9722,
      "step": 144873
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0694737434387207,
      "learning_rate": 0.00018190724200710693,
      "loss": 2.7493,
      "step": 144874
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.475677251815796,
      "learning_rate": 0.00018190348171318808,
      "loss": 3.2033,
      "step": 144875
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6078097820281982,
      "learning_rate": 0.00018189972144122557,
      "loss": 2.9699,
      "step": 144876
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3423309326171875,
      "learning_rate": 0.00018189596119122013,
      "loss": 2.9331,
      "step": 144877
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1652560234069824,
      "learning_rate": 0.00018189220096317244,
      "loss": 3.0749,
      "step": 144878
    },
    {
      "epoch": 1.89,
      "grad_norm": 4.118443489074707,
      "learning_rate": 0.0001818884407570833,
      "loss": 3.2633,
      "step": 144879
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.6768553256988525,
      "learning_rate": 0.0001818846805729532,
      "loss": 2.7314,
      "step": 144880
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4921677112579346,
      "learning_rate": 0.00018188092041078306,
      "loss": 3.1402,
      "step": 144881
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.072772979736328,
      "learning_rate": 0.00018187716027057347,
      "loss": 2.9729,
      "step": 144882
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4645702838897705,
      "learning_rate": 0.00018187340015232516,
      "loss": 2.8548,
      "step": 144883
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.912090301513672,
      "learning_rate": 0.00018186964005603877,
      "loss": 2.8006,
      "step": 144884
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.6770544052124023,
      "learning_rate": 0.00018186587998171512,
      "loss": 3.1009,
      "step": 144885
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0274720191955566,
      "learning_rate": 0.00018186211992935478,
      "loss": 2.8078,
      "step": 144886
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.355013608932495,
      "learning_rate": 0.0001818583598989585,
      "loss": 3.0866,
      "step": 144887
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.464815139770508,
      "learning_rate": 0.00018185459989052694,
      "loss": 2.9374,
      "step": 144888
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4351251125335693,
      "learning_rate": 0.00018185083990406088,
      "loss": 2.8998,
      "step": 144889
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.8891146183013916,
      "learning_rate": 0.00018184707993956103,
      "loss": 2.8259,
      "step": 144890
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.399472951889038,
      "learning_rate": 0.00018184331999702803,
      "loss": 3.07,
      "step": 144891
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.3248066902160645,
      "learning_rate": 0.00018183956007646255,
      "loss": 3.258,
      "step": 144892
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.550802230834961,
      "learning_rate": 0.00018183580017786528,
      "loss": 3.0503,
      "step": 144893
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1717941761016846,
      "learning_rate": 0.000181832040301237,
      "loss": 3.0483,
      "step": 144894
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.2718770503997803,
      "learning_rate": 0.00018182828044657835,
      "loss": 2.9587,
      "step": 144895
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.088848829269409,
      "learning_rate": 0.0001818245206138901,
      "loss": 3.0006,
      "step": 144896
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.160835027694702,
      "learning_rate": 0.000181820760803173,
      "loss": 2.8839,
      "step": 144897
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.546685218811035,
      "learning_rate": 0.00018181700101442753,
      "loss": 2.9051,
      "step": 144898
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.555521249771118,
      "learning_rate": 0.0001818132412476545,
      "loss": 2.7776,
      "step": 144899
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.3905739784240723,
      "learning_rate": 0.0001818094815028546,
      "loss": 2.9429,
      "step": 144900
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.869159698486328,
      "learning_rate": 0.0001818057217800286,
      "loss": 3.0055,
      "step": 144901
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.2207465171813965,
      "learning_rate": 0.0001818019620791771,
      "loss": 2.9049,
      "step": 144902
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.454838514328003,
      "learning_rate": 0.000181798202400301,
      "loss": 2.9541,
      "step": 144903
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.8540117740631104,
      "learning_rate": 0.0001817944427434007,
      "loss": 2.8901,
      "step": 144904
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.3772428035736084,
      "learning_rate": 0.00018179068310847707,
      "loss": 2.9075,
      "step": 144905
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.782294750213623,
      "learning_rate": 0.00018178692349553078,
      "loss": 2.8447,
      "step": 144906
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3259122371673584,
      "learning_rate": 0.00018178316390456253,
      "loss": 3.1131,
      "step": 144907
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.1957058906555176,
      "learning_rate": 0.000181779404335573,
      "loss": 2.9236,
      "step": 144908
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5716071128845215,
      "learning_rate": 0.00018177564478856305,
      "loss": 3.072,
      "step": 144909
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0331058502197266,
      "learning_rate": 0.0001817718852635331,
      "loss": 3.2866,
      "step": 144910
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1523704528808594,
      "learning_rate": 0.00018176812576048402,
      "loss": 3.0003,
      "step": 144911
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.126142978668213,
      "learning_rate": 0.00018176436627941644,
      "loss": 3.0853,
      "step": 144912
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.242335557937622,
      "learning_rate": 0.00018176060682033115,
      "loss": 2.7424,
      "step": 144913
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.188840866088867,
      "learning_rate": 0.00018175684738322873,
      "loss": 2.8582,
      "step": 144914
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.361982822418213,
      "learning_rate": 0.0001817530879681101,
      "loss": 2.9663,
      "step": 144915
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0087523460388184,
      "learning_rate": 0.0001817493285749757,
      "loss": 3.0686,
      "step": 144916
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6735386848449707,
      "learning_rate": 0.0001817455692038263,
      "loss": 3.0255,
      "step": 144917
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0518417358398438,
      "learning_rate": 0.00018174180985466263,
      "loss": 2.9875,
      "step": 144918
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.256521463394165,
      "learning_rate": 0.0001817380505274854,
      "loss": 3.1887,
      "step": 144919
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.144940137863159,
      "learning_rate": 0.00018173429122229532,
      "loss": 2.9658,
      "step": 144920
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1447203159332275,
      "learning_rate": 0.00018173053193909315,
      "loss": 3.2076,
      "step": 144921
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0852344036102295,
      "learning_rate": 0.00018172677267787942,
      "loss": 3.2472,
      "step": 144922
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.702564239501953,
      "learning_rate": 0.0001817230134386549,
      "loss": 3.1225,
      "step": 144923
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3947670459747314,
      "learning_rate": 0.0001817192542214203,
      "loss": 2.9931,
      "step": 144924
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3063254356384277,
      "learning_rate": 0.00018171549502617632,
      "loss": 2.9966,
      "step": 144925
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.8800758123397827,
      "learning_rate": 0.0001817117358529237,
      "loss": 2.867,
      "step": 144926
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0175185203552246,
      "learning_rate": 0.00018170797670166315,
      "loss": 3.1958,
      "step": 144927
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9487638473510742,
      "learning_rate": 0.00018170421757239525,
      "loss": 3.0334,
      "step": 144928
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.337104082107544,
      "learning_rate": 0.00018170045846512076,
      "loss": 2.8863,
      "step": 144929
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5803709030151367,
      "learning_rate": 0.00018169669937984038,
      "loss": 3.1678,
      "step": 144930
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.256941080093384,
      "learning_rate": 0.00018169294031655485,
      "loss": 2.9221,
      "step": 144931
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3830013275146484,
      "learning_rate": 0.0001816891812752648,
      "loss": 2.9704,
      "step": 144932
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3464195728302,
      "learning_rate": 0.00018168542225597097,
      "loss": 2.7114,
      "step": 144933
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5278570652008057,
      "learning_rate": 0.0001816816632586742,
      "loss": 3.1556,
      "step": 144934
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2138242721557617,
      "learning_rate": 0.0001816779042833749,
      "loss": 2.8916,
      "step": 144935
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.4386537075042725,
      "learning_rate": 0.0001816741453300739,
      "loss": 2.9686,
      "step": 144936
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0991270542144775,
      "learning_rate": 0.00018167038639877193,
      "loss": 3.0315,
      "step": 144937
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.9587368965148926,
      "learning_rate": 0.00018166662748946965,
      "loss": 2.9597,
      "step": 144938
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.391031265258789,
      "learning_rate": 0.00018166286860216775,
      "loss": 2.9051,
      "step": 144939
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.783261775970459,
      "learning_rate": 0.00018165910973686714,
      "loss": 2.8342,
      "step": 144940
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.7165963649749756,
      "learning_rate": 0.00018165535089356818,
      "loss": 2.8398,
      "step": 144941
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3968310356140137,
      "learning_rate": 0.00018165159207227176,
      "loss": 2.7877,
      "step": 144942
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9555578231811523,
      "learning_rate": 0.00018164783327297848,
      "loss": 2.9977,
      "step": 144943
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.878201484680176,
      "learning_rate": 0.00018164407449568917,
      "loss": 2.7214,
      "step": 144944
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3903777599334717,
      "learning_rate": 0.00018164031574040442,
      "loss": 3.093,
      "step": 144945
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.009387969970703,
      "learning_rate": 0.0001816365570071251,
      "loss": 2.9349,
      "step": 144946
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.975593090057373,
      "learning_rate": 0.00018163279829585166,
      "loss": 2.741,
      "step": 144947
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2035164833068848,
      "learning_rate": 0.0001816290396065849,
      "loss": 3.1551,
      "step": 144948
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.2430496215820312,
      "learning_rate": 0.00018162528093932558,
      "loss": 2.8779,
      "step": 144949
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.380795955657959,
      "learning_rate": 0.00018162152229407428,
      "loss": 3.1706,
      "step": 144950
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4383962154388428,
      "learning_rate": 0.00018161776367083186,
      "loss": 2.9022,
      "step": 144951
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9925845861434937,
      "learning_rate": 0.00018161400506959893,
      "loss": 3.0093,
      "step": 144952
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.735870599746704,
      "learning_rate": 0.0001816102464903762,
      "loss": 2.9195,
      "step": 144953
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2667856216430664,
      "learning_rate": 0.00018160648793316434,
      "loss": 3.1419,
      "step": 144954
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0426108837127686,
      "learning_rate": 0.00018160272939796404,
      "loss": 3.1769,
      "step": 144955
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5826754570007324,
      "learning_rate": 0.00018159897088477602,
      "loss": 3.0869,
      "step": 144956
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.369751214981079,
      "learning_rate": 0.000181595212393601,
      "loss": 2.7235,
      "step": 144957
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.156484842300415,
      "learning_rate": 0.00018159145392443975,
      "loss": 2.7584,
      "step": 144958
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.70443058013916,
      "learning_rate": 0.00018158769547729276,
      "loss": 2.6746,
      "step": 144959
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.022045850753784,
      "learning_rate": 0.00018158393705216096,
      "loss": 3.0407,
      "step": 144960
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.232999801635742,
      "learning_rate": 0.00018158017864904486,
      "loss": 2.9654,
      "step": 144961
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.301304817199707,
      "learning_rate": 0.00018157642026794525,
      "loss": 2.8637,
      "step": 144962
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.401581287384033,
      "learning_rate": 0.00018157266190886283,
      "loss": 2.9685,
      "step": 144963
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2239832878112793,
      "learning_rate": 0.00018156890357179834,
      "loss": 2.9961,
      "step": 144964
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.168009042739868,
      "learning_rate": 0.00018156514525675236,
      "loss": 3.1668,
      "step": 144965
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6368353366851807,
      "learning_rate": 0.00018156138696372564,
      "loss": 2.7885,
      "step": 144966
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.177011728286743,
      "learning_rate": 0.00018155762869271898,
      "loss": 3.051,
      "step": 144967
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.3011906147003174,
      "learning_rate": 0.00018155387044373292,
      "loss": 2.6941,
      "step": 144968
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5662460327148438,
      "learning_rate": 0.0001815501122167682,
      "loss": 2.6776,
      "step": 144969
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5847837924957275,
      "learning_rate": 0.00018154635401182567,
      "loss": 3.0058,
      "step": 144970
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.728102922439575,
      "learning_rate": 0.0001815425958289058,
      "loss": 3.0194,
      "step": 144971
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.5278518199920654,
      "learning_rate": 0.00018153883766800944,
      "loss": 3.0866,
      "step": 144972
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9468389749526978,
      "learning_rate": 0.0001815350795291372,
      "loss": 2.8405,
      "step": 144973
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9498302936553955,
      "learning_rate": 0.0001815313214122899,
      "loss": 2.9834,
      "step": 144974
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.8543009757995605,
      "learning_rate": 0.00018152756331746815,
      "loss": 2.9113,
      "step": 144975
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.0068113803863525,
      "learning_rate": 0.00018152380524467268,
      "loss": 2.9611,
      "step": 144976
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9813661575317383,
      "learning_rate": 0.00018152004719390412,
      "loss": 3.0727,
      "step": 144977
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.8792511224746704,
      "learning_rate": 0.00018151628916516322,
      "loss": 2.9254,
      "step": 144978
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0565614700317383,
      "learning_rate": 0.00018151253115845068,
      "loss": 2.7855,
      "step": 144979
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.091661214828491,
      "learning_rate": 0.00018150877317376718,
      "loss": 2.8579,
      "step": 144980
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.740321159362793,
      "learning_rate": 0.00018150501521111345,
      "loss": 2.7367,
      "step": 144981
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.804015636444092,
      "learning_rate": 0.0001815012572704903,
      "loss": 3.0448,
      "step": 144982
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.47733736038208,
      "learning_rate": 0.00018149749935189818,
      "loss": 2.8982,
      "step": 144983
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.390723943710327,
      "learning_rate": 0.00018149374145533793,
      "loss": 2.9803,
      "step": 144984
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0837483406066895,
      "learning_rate": 0.00018148998358081018,
      "loss": 3.204,
      "step": 144985
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1843416690826416,
      "learning_rate": 0.00018148622572831572,
      "loss": 2.9874,
      "step": 144986
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4652493000030518,
      "learning_rate": 0.00018148246789785518,
      "loss": 3.0672,
      "step": 144987
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.716684579849243,
      "learning_rate": 0.00018147871008942942,
      "loss": 2.8908,
      "step": 144988
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.963810682296753,
      "learning_rate": 0.0001814749523030389,
      "loss": 3.0389,
      "step": 144989
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2905075550079346,
      "learning_rate": 0.00018147119453868442,
      "loss": 3.1097,
      "step": 144990
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3359296321868896,
      "learning_rate": 0.00018146743679636667,
      "loss": 3.2422,
      "step": 144991
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.8389265537261963,
      "learning_rate": 0.00018146367907608636,
      "loss": 2.9929,
      "step": 144992
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4104037284851074,
      "learning_rate": 0.0001814599213778442,
      "loss": 2.8972,
      "step": 144993
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.8800519704818726,
      "learning_rate": 0.000181456163701641,
      "loss": 2.9849,
      "step": 144994
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6087024211883545,
      "learning_rate": 0.00018145240604747721,
      "loss": 2.9016,
      "step": 144995
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3361282348632812,
      "learning_rate": 0.00018144864841535365,
      "loss": 2.8983,
      "step": 144996
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.4434144496917725,
      "learning_rate": 0.00018144489080527103,
      "loss": 3.0926,
      "step": 144997
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.183215379714966,
      "learning_rate": 0.00018144113321723004,
      "loss": 3.1492,
      "step": 144998
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2733170986175537,
      "learning_rate": 0.0001814373756512314,
      "loss": 3.0633,
      "step": 144999
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.0933756828308105,
      "learning_rate": 0.00018143361810727578,
      "loss": 3.1023,
      "step": 145000
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0531351566314697,
      "learning_rate": 0.00018142986058536394,
      "loss": 2.9714,
      "step": 145001
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0223171710968018,
      "learning_rate": 0.00018142610308549649,
      "loss": 3.2863,
      "step": 145002
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.377882242202759,
      "learning_rate": 0.00018142234560767414,
      "loss": 3.0931,
      "step": 145003
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9788461923599243,
      "learning_rate": 0.00018141858815189763,
      "loss": 3.0731,
      "step": 145004
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.049569845199585,
      "learning_rate": 0.00018141483071816764,
      "loss": 3.0113,
      "step": 145005
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.228285789489746,
      "learning_rate": 0.00018141107330648488,
      "loss": 3.0923,
      "step": 145006
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0744290351867676,
      "learning_rate": 0.00018140731591685013,
      "loss": 2.6322,
      "step": 145007
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.658527135848999,
      "learning_rate": 0.00018140355854926387,
      "loss": 2.9504,
      "step": 145008
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.293229341506958,
      "learning_rate": 0.00018139980120372696,
      "loss": 2.9776,
      "step": 145009
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.285226821899414,
      "learning_rate": 0.00018139604388024008,
      "loss": 3.0876,
      "step": 145010
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.186180591583252,
      "learning_rate": 0.00018139228657880385,
      "loss": 2.7535,
      "step": 145011
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9909398555755615,
      "learning_rate": 0.00018138852929941906,
      "loss": 3.0704,
      "step": 145012
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.14139986038208,
      "learning_rate": 0.00018138477204208655,
      "loss": 2.8734,
      "step": 145013
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2565340995788574,
      "learning_rate": 0.00018138101480680668,
      "loss": 2.7442,
      "step": 145014
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.7598626613616943,
      "learning_rate": 0.00018137725759358032,
      "loss": 2.8379,
      "step": 145015
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.8903071880340576,
      "learning_rate": 0.00018137350040240818,
      "loss": 3.1415,
      "step": 145016
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.3593649864196777,
      "learning_rate": 0.00018136974323329096,
      "loss": 3.0772,
      "step": 145017
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.2905561923980713,
      "learning_rate": 0.0001813659860862293,
      "loss": 2.8589,
      "step": 145018
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4692931175231934,
      "learning_rate": 0.00018136222896122413,
      "loss": 2.9119,
      "step": 145019
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1954457759857178,
      "learning_rate": 0.00018135847185827577,
      "loss": 3.037,
      "step": 145020
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.3421785831451416,
      "learning_rate": 0.00018135471477738517,
      "loss": 3.1194,
      "step": 145021
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.143651247024536,
      "learning_rate": 0.00018135095771855292,
      "loss": 2.9503,
      "step": 145022
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.8118157386779785,
      "learning_rate": 0.00018134720068177976,
      "loss": 2.9504,
      "step": 145023
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3826560974121094,
      "learning_rate": 0.00018134344366706643,
      "loss": 2.9131,
      "step": 145024
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0886902809143066,
      "learning_rate": 0.0001813396866744137,
      "loss": 2.736,
      "step": 145025
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0192039012908936,
      "learning_rate": 0.00018133592970382206,
      "loss": 3.0831,
      "step": 145026
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.699162244796753,
      "learning_rate": 0.00018133217275529228,
      "loss": 2.8608,
      "step": 145027
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6606900691986084,
      "learning_rate": 0.00018132841582882512,
      "loss": 2.7707,
      "step": 145028
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.291747570037842,
      "learning_rate": 0.00018132465892442125,
      "loss": 3.0355,
      "step": 145029
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.718032121658325,
      "learning_rate": 0.00018132090204208133,
      "loss": 2.8264,
      "step": 145030
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2687253952026367,
      "learning_rate": 0.00018131714518180623,
      "loss": 3.2655,
      "step": 145031
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.189269781112671,
      "learning_rate": 0.00018131338834359643,
      "loss": 3.0417,
      "step": 145032
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.123380184173584,
      "learning_rate": 0.00018130963152745267,
      "loss": 2.9944,
      "step": 145033
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3813986778259277,
      "learning_rate": 0.0001813058747333757,
      "loss": 2.9004,
      "step": 145034
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.698526620864868,
      "learning_rate": 0.0001813021179613662,
      "loss": 3.2733,
      "step": 145035
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.199638843536377,
      "learning_rate": 0.0001812983612114249,
      "loss": 2.9132,
      "step": 145036
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6177852153778076,
      "learning_rate": 0.00018129460448355257,
      "loss": 2.93,
      "step": 145037
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.212392568588257,
      "learning_rate": 0.00018129084777774966,
      "loss": 2.8534,
      "step": 145038
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3600943088531494,
      "learning_rate": 0.00018128709109401712,
      "loss": 2.9548,
      "step": 145039
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.482170343399048,
      "learning_rate": 0.00018128333443235547,
      "loss": 3.0143,
      "step": 145040
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.764371871948242,
      "learning_rate": 0.0001812795777927655,
      "loss": 2.974,
      "step": 145041
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1706645488739014,
      "learning_rate": 0.0001812758211752479,
      "loss": 2.8166,
      "step": 145042
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.8755539655685425,
      "learning_rate": 0.00018127206457980345,
      "loss": 2.8724,
      "step": 145043
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.952813982963562,
      "learning_rate": 0.0001812683080064327,
      "loss": 2.642,
      "step": 145044
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.045799970626831,
      "learning_rate": 0.00018126455145513646,
      "loss": 2.7272,
      "step": 145045
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.544311046600342,
      "learning_rate": 0.00018126079492591533,
      "loss": 3.1474,
      "step": 145046
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.7738211154937744,
      "learning_rate": 0.00018125703841877008,
      "loss": 3.0751,
      "step": 145047
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.8486218452453613,
      "learning_rate": 0.00018125328193370135,
      "loss": 2.8989,
      "step": 145048
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.167618989944458,
      "learning_rate": 0.00018124952547070994,
      "loss": 3.0017,
      "step": 145049
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5038390159606934,
      "learning_rate": 0.00018124576902979646,
      "loss": 3.0306,
      "step": 145050
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.348041296005249,
      "learning_rate": 0.0001812420126109616,
      "loss": 2.9618,
      "step": 145051
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1894338130950928,
      "learning_rate": 0.00018123825621420617,
      "loss": 2.8245,
      "step": 145052
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.7657840251922607,
      "learning_rate": 0.0001812344998395307,
      "loss": 3.1826,
      "step": 145053
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2436206340789795,
      "learning_rate": 0.00018123074348693602,
      "loss": 3.0199,
      "step": 145054
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3640058040618896,
      "learning_rate": 0.00018122698715642278,
      "loss": 3.12,
      "step": 145055
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.199208974838257,
      "learning_rate": 0.00018122323084799168,
      "loss": 2.7114,
      "step": 145056
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9169833660125732,
      "learning_rate": 0.00018121947456164344,
      "loss": 3.0063,
      "step": 145057
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6911442279815674,
      "learning_rate": 0.0001812157182973787,
      "loss": 2.8701,
      "step": 145058
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.232663869857788,
      "learning_rate": 0.00018121196205519828,
      "loss": 2.9378,
      "step": 145059
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.414937973022461,
      "learning_rate": 0.00018120820583510276,
      "loss": 2.839,
      "step": 145060
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.232938051223755,
      "learning_rate": 0.0001812044496370929,
      "loss": 3.0018,
      "step": 145061
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1956675052642822,
      "learning_rate": 0.0001812006934611693,
      "loss": 3.0218,
      "step": 145062
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4763097763061523,
      "learning_rate": 0.0001811969373073328,
      "loss": 2.9142,
      "step": 145063
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.548452138900757,
      "learning_rate": 0.00018119318117558396,
      "loss": 3.1464,
      "step": 145064
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.387127161026001,
      "learning_rate": 0.0001811894250659236,
      "loss": 2.9295,
      "step": 145065
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5816428661346436,
      "learning_rate": 0.00018118566897835242,
      "loss": 3.1335,
      "step": 145066
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.8703010082244873,
      "learning_rate": 0.000181181912912871,
      "loss": 2.9285,
      "step": 145067
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6712963581085205,
      "learning_rate": 0.00018117815686948015,
      "loss": 2.9703,
      "step": 145068
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.312361240386963,
      "learning_rate": 0.00018117440084818048,
      "loss": 2.9992,
      "step": 145069
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.016244888305664,
      "learning_rate": 0.0001811706448489727,
      "loss": 2.8431,
      "step": 145070
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.623868227005005,
      "learning_rate": 0.00018116688887185757,
      "loss": 2.9184,
      "step": 145071
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.047431230545044,
      "learning_rate": 0.00018116313291683577,
      "loss": 3.1117,
      "step": 145072
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.363485336303711,
      "learning_rate": 0.00018115937698390798,
      "loss": 2.9505,
      "step": 145073
    },
    {
      "epoch": 1.89,
      "grad_norm": 4.054347991943359,
      "learning_rate": 0.00018115562107307503,
      "loss": 2.9745,
      "step": 145074
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.646845817565918,
      "learning_rate": 0.00018115186518433734,
      "loss": 3.0062,
      "step": 145075
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.743983745574951,
      "learning_rate": 0.0001811481093176958,
      "loss": 2.8879,
      "step": 145076
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.8397185802459717,
      "learning_rate": 0.00018114435347315106,
      "loss": 2.9103,
      "step": 145077
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.1140663623809814,
      "learning_rate": 0.00018114059765070382,
      "loss": 3.0042,
      "step": 145078
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.188538074493408,
      "learning_rate": 0.0001811368418503548,
      "loss": 2.8281,
      "step": 145079
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.851191520690918,
      "learning_rate": 0.00018113308607210478,
      "loss": 2.8015,
      "step": 145080
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.0950675010681152,
      "learning_rate": 0.00018112933031595426,
      "loss": 3.0439,
      "step": 145081
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2038044929504395,
      "learning_rate": 0.00018112557458190405,
      "loss": 2.77,
      "step": 145082
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.211519479751587,
      "learning_rate": 0.0001811218188699548,
      "loss": 3.0035,
      "step": 145083
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.817145586013794,
      "learning_rate": 0.00018111806318010727,
      "loss": 2.9528,
      "step": 145084
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2854793071746826,
      "learning_rate": 0.00018111430751236216,
      "loss": 2.8218,
      "step": 145085
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9901937246322632,
      "learning_rate": 0.00018111055186672024,
      "loss": 3.2922,
      "step": 145086
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1648120880126953,
      "learning_rate": 0.00018110679624318198,
      "loss": 3.0917,
      "step": 145087
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.8769588470458984,
      "learning_rate": 0.00018110304064174827,
      "loss": 2.9306,
      "step": 145088
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.527269124984741,
      "learning_rate": 0.00018109928506241968,
      "loss": 2.885,
      "step": 145089
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.340113401412964,
      "learning_rate": 0.000181095529505197,
      "loss": 2.8249,
      "step": 145090
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1570520401000977,
      "learning_rate": 0.00018109177397008093,
      "loss": 2.7423,
      "step": 145091
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.371185064315796,
      "learning_rate": 0.00018108801845707225,
      "loss": 2.9427,
      "step": 145092
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.1161718368530273,
      "learning_rate": 0.0001810842629661714,
      "loss": 2.6379,
      "step": 145093
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6873278617858887,
      "learning_rate": 0.00018108050749737927,
      "loss": 2.8405,
      "step": 145094
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.248770236968994,
      "learning_rate": 0.00018107675205069652,
      "loss": 2.8772,
      "step": 145095
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.057576894760132,
      "learning_rate": 0.00018107299662612384,
      "loss": 2.9835,
      "step": 145096
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3880326747894287,
      "learning_rate": 0.00018106924122366193,
      "loss": 3.048,
      "step": 145097
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0796778202056885,
      "learning_rate": 0.0001810654858433116,
      "loss": 3.0539,
      "step": 145098
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.79997181892395,
      "learning_rate": 0.00018106173048507333,
      "loss": 2.8809,
      "step": 145099
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.3907411098480225,
      "learning_rate": 0.00018105797514894793,
      "loss": 2.9919,
      "step": 145100
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.613065004348755,
      "learning_rate": 0.0001810542198349361,
      "loss": 2.9905,
      "step": 145101
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.121238946914673,
      "learning_rate": 0.00018105046454303854,
      "loss": 2.9736,
      "step": 145102
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.8882232904434204,
      "learning_rate": 0.00018104670927325593,
      "loss": 3.0931,
      "step": 145103
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.386651039123535,
      "learning_rate": 0.0001810429540255891,
      "loss": 3.0005,
      "step": 145104
    },
    {
      "epoch": 1.89,
      "grad_norm": 4.015366077423096,
      "learning_rate": 0.00018103919880003852,
      "loss": 2.9194,
      "step": 145105
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6355814933776855,
      "learning_rate": 0.00018103544359660502,
      "loss": 2.9894,
      "step": 145106
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0614283084869385,
      "learning_rate": 0.00018103168841528927,
      "loss": 3.0411,
      "step": 145107
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0449087619781494,
      "learning_rate": 0.00018102793325609195,
      "loss": 3.0976,
      "step": 145108
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.7881908416748047,
      "learning_rate": 0.0001810241781190138,
      "loss": 3.0108,
      "step": 145109
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.320305109024048,
      "learning_rate": 0.0001810204230040556,
      "loss": 2.9052,
      "step": 145110
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9511600732803345,
      "learning_rate": 0.00018101666791121786,
      "loss": 3.0545,
      "step": 145111
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0724501609802246,
      "learning_rate": 0.00018101291284050136,
      "loss": 2.8995,
      "step": 145112
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.107252359390259,
      "learning_rate": 0.00018100915779190678,
      "loss": 3.057,
      "step": 145113
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.166400671005249,
      "learning_rate": 0.00018100540276543486,
      "loss": 2.7214,
      "step": 145114
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2531187534332275,
      "learning_rate": 0.00018100164776108628,
      "loss": 3.1251,
      "step": 145115
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6701154708862305,
      "learning_rate": 0.00018099789277886184,
      "loss": 2.7901,
      "step": 145116
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6416049003601074,
      "learning_rate": 0.00018099413781876207,
      "loss": 3.2809,
      "step": 145117
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.199634552001953,
      "learning_rate": 0.00018099038288078772,
      "loss": 2.8175,
      "step": 145118
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.0362367630004883,
      "learning_rate": 0.0001809866279649395,
      "loss": 3.0257,
      "step": 145119
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2933664321899414,
      "learning_rate": 0.0001809828730712181,
      "loss": 3.0618,
      "step": 145120
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2990260124206543,
      "learning_rate": 0.00018097911819962422,
      "loss": 2.7731,
      "step": 145121
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1052908897399902,
      "learning_rate": 0.00018097536335015868,
      "loss": 2.9789,
      "step": 145122
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5884501934051514,
      "learning_rate": 0.000180971608522822,
      "loss": 2.84,
      "step": 145123
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2250051498413086,
      "learning_rate": 0.0001809678537176149,
      "loss": 3.0041,
      "step": 145124
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.20932936668396,
      "learning_rate": 0.00018096409893453817,
      "loss": 3.0817,
      "step": 145125
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2969167232513428,
      "learning_rate": 0.0001809603441735924,
      "loss": 2.8428,
      "step": 145126
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5037293434143066,
      "learning_rate": 0.0001809565894347784,
      "loss": 2.883,
      "step": 145127
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.532435178756714,
      "learning_rate": 0.00018095283471809676,
      "loss": 2.7922,
      "step": 145128
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.160207509994507,
      "learning_rate": 0.0001809490800235484,
      "loss": 3.0812,
      "step": 145129
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.748603582382202,
      "learning_rate": 0.00018094532535113374,
      "loss": 3.1197,
      "step": 145130
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.7987165451049805,
      "learning_rate": 0.00018094157070085355,
      "loss": 3.1474,
      "step": 145131
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.47086501121521,
      "learning_rate": 0.00018093781607270864,
      "loss": 3.1908,
      "step": 145132
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.094088554382324,
      "learning_rate": 0.00018093406146669957,
      "loss": 3.0189,
      "step": 145133
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5057170391082764,
      "learning_rate": 0.00018093030688282714,
      "loss": 3.1434,
      "step": 145134
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.1904215812683105,
      "learning_rate": 0.00018092655232109206,
      "loss": 2.8679,
      "step": 145135
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2959797382354736,
      "learning_rate": 0.00018092279778149492,
      "loss": 3.2045,
      "step": 145136
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4298040866851807,
      "learning_rate": 0.0001809190432640365,
      "loss": 2.8403,
      "step": 145137
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.279909133911133,
      "learning_rate": 0.0001809152887687175,
      "loss": 2.8071,
      "step": 145138
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.28794264793396,
      "learning_rate": 0.00018091153429553852,
      "loss": 3.0823,
      "step": 145139
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.578730583190918,
      "learning_rate": 0.0001809077798445004,
      "loss": 3.0245,
      "step": 145140
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.424938440322876,
      "learning_rate": 0.0001809040254156038,
      "loss": 2.7819,
      "step": 145141
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5049021244049072,
      "learning_rate": 0.00018090027100884935,
      "loss": 2.8459,
      "step": 145142
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.9764654636383057,
      "learning_rate": 0.00018089651662423774,
      "loss": 2.853,
      "step": 145143
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.312697172164917,
      "learning_rate": 0.00018089276226176983,
      "loss": 3.0857,
      "step": 145144
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.148231267929077,
      "learning_rate": 0.00018088900792144607,
      "loss": 2.8765,
      "step": 145145
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.964160919189453,
      "learning_rate": 0.00018088525360326736,
      "loss": 2.6853,
      "step": 145146
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.696080207824707,
      "learning_rate": 0.00018088149930723437,
      "loss": 3.2469,
      "step": 145147
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.0940492153167725,
      "learning_rate": 0.0001808777450333477,
      "loss": 2.9349,
      "step": 145148
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1227827072143555,
      "learning_rate": 0.00018087399078160808,
      "loss": 3.0061,
      "step": 145149
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.286921262741089,
      "learning_rate": 0.00018087023655201628,
      "loss": 2.9794,
      "step": 145150
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.94242262840271,
      "learning_rate": 0.00018086648234457296,
      "loss": 2.7669,
      "step": 145151
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.5353472232818604,
      "learning_rate": 0.0001808627281592788,
      "loss": 3.1232,
      "step": 145152
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1570677757263184,
      "learning_rate": 0.00018085897399613455,
      "loss": 3.065,
      "step": 145153
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.873380422592163,
      "learning_rate": 0.00018085521985514082,
      "loss": 3.0778,
      "step": 145154
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2423322200775146,
      "learning_rate": 0.00018085146573629832,
      "loss": 2.8682,
      "step": 145155
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.171488046646118,
      "learning_rate": 0.0001808477116396078,
      "loss": 2.9436,
      "step": 145156
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.898815870285034,
      "learning_rate": 0.00018084395756506994,
      "loss": 2.8362,
      "step": 145157
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.8796342611312866,
      "learning_rate": 0.0001808402035126855,
      "loss": 3.2403,
      "step": 145158
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.7061145305633545,
      "learning_rate": 0.00018083644948245508,
      "loss": 2.7549,
      "step": 145159
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.243760108947754,
      "learning_rate": 0.0001808326954743794,
      "loss": 3.0591,
      "step": 145160
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0799906253814697,
      "learning_rate": 0.00018082894148845917,
      "loss": 2.7845,
      "step": 145161
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.565682888031006,
      "learning_rate": 0.00018082518752469506,
      "loss": 2.9154,
      "step": 145162
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.278369188308716,
      "learning_rate": 0.00018082143358308782,
      "loss": 2.9382,
      "step": 145163
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1692967414855957,
      "learning_rate": 0.00018081767966363814,
      "loss": 2.9824,
      "step": 145164
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.17704176902771,
      "learning_rate": 0.0001808139257663468,
      "loss": 2.9108,
      "step": 145165
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.2434499263763428,
      "learning_rate": 0.00018081017189121426,
      "loss": 2.7554,
      "step": 145166
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0066113471984863,
      "learning_rate": 0.00018080641803824138,
      "loss": 2.9089,
      "step": 145167
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.305917978286743,
      "learning_rate": 0.00018080266420742884,
      "loss": 2.7992,
      "step": 145168
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.5129077434539795,
      "learning_rate": 0.00018079891039877733,
      "loss": 3.0956,
      "step": 145169
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.597254514694214,
      "learning_rate": 0.00018079515661228757,
      "loss": 3.2453,
      "step": 145170
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9316463470458984,
      "learning_rate": 0.0001807914028479603,
      "loss": 3.074,
      "step": 145171
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2693467140197754,
      "learning_rate": 0.0001807876491057961,
      "loss": 2.8962,
      "step": 145172
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.051914930343628,
      "learning_rate": 0.0001807838953857957,
      "loss": 2.9789,
      "step": 145173
    },
    {
      "epoch": 1.89,
      "grad_norm": 5.2598676681518555,
      "learning_rate": 0.00018078014168795983,
      "loss": 2.5744,
      "step": 145174
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.242506742477417,
      "learning_rate": 0.0001807763880122892,
      "loss": 3.0487,
      "step": 145175
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4825329780578613,
      "learning_rate": 0.00018077263435878446,
      "loss": 2.9551,
      "step": 145176
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.7771077156066895,
      "learning_rate": 0.00018076888072744647,
      "loss": 2.8935,
      "step": 145177
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1947922706604004,
      "learning_rate": 0.0001807651271182757,
      "loss": 3.2202,
      "step": 145178
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.774831771850586,
      "learning_rate": 0.0001807613735312729,
      "loss": 2.965,
      "step": 145179
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.7551512718200684,
      "learning_rate": 0.00018075761996643887,
      "loss": 2.8486,
      "step": 145180
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1889615058898926,
      "learning_rate": 0.00018075386642377422,
      "loss": 2.9278,
      "step": 145181
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.138606548309326,
      "learning_rate": 0.00018075011290327966,
      "loss": 2.9603,
      "step": 145182
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.1945528984069824,
      "learning_rate": 0.00018074635940495604,
      "loss": 2.957,
      "step": 145183
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.276236057281494,
      "learning_rate": 0.0001807426059288038,
      "loss": 3.223,
      "step": 145184
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2638132572174072,
      "learning_rate": 0.00018073885247482375,
      "loss": 2.8533,
      "step": 145185
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1493911743164062,
      "learning_rate": 0.0001807350990430166,
      "loss": 3.0372,
      "step": 145186
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.463064193725586,
      "learning_rate": 0.00018073134563338305,
      "loss": 2.8258,
      "step": 145187
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2723000049591064,
      "learning_rate": 0.00018072759224592382,
      "loss": 3.1361,
      "step": 145188
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.341134548187256,
      "learning_rate": 0.00018072383888063966,
      "loss": 3.1248,
      "step": 145189
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2452597618103027,
      "learning_rate": 0.0001807200855375311,
      "loss": 3.0269,
      "step": 145190
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6964340209960938,
      "learning_rate": 0.00018071633221659894,
      "loss": 2.8307,
      "step": 145191
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3005385398864746,
      "learning_rate": 0.0001807125789178438,
      "loss": 3.0793,
      "step": 145192
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0707168579101562,
      "learning_rate": 0.0001807088256412665,
      "loss": 2.9463,
      "step": 145193
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3114848136901855,
      "learning_rate": 0.00018070507238686766,
      "loss": 3.131,
      "step": 145194
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.339726448059082,
      "learning_rate": 0.000180701319154648,
      "loss": 3.0902,
      "step": 145195
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.241117000579834,
      "learning_rate": 0.00018069756594460835,
      "loss": 3.0617,
      "step": 145196
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.180756092071533,
      "learning_rate": 0.00018069381275674916,
      "loss": 3.0099,
      "step": 145197
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.9791998863220215,
      "learning_rate": 0.0001806900595910712,
      "loss": 3.0745,
      "step": 145198
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3060522079467773,
      "learning_rate": 0.0001806863064475753,
      "loss": 3.121,
      "step": 145199
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3760952949523926,
      "learning_rate": 0.00018068255332626197,
      "loss": 2.8414,
      "step": 145200
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2911393642425537,
      "learning_rate": 0.00018067880022713204,
      "loss": 2.8997,
      "step": 145201
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9115973711013794,
      "learning_rate": 0.00018067504715018633,
      "loss": 3.2523,
      "step": 145202
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.053382635116577,
      "learning_rate": 0.0001806712940954252,
      "loss": 2.948,
      "step": 145203
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.217966318130493,
      "learning_rate": 0.00018066754106284958,
      "loss": 2.9863,
      "step": 145204
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.8895263671875,
      "learning_rate": 0.00018066378805246008,
      "loss": 3.0396,
      "step": 145205
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3635780811309814,
      "learning_rate": 0.00018066003506425743,
      "loss": 3.2833,
      "step": 145206
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1472392082214355,
      "learning_rate": 0.00018065628209824235,
      "loss": 2.9538,
      "step": 145207
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.9500834941864014,
      "learning_rate": 0.00018065252915441563,
      "loss": 2.9868,
      "step": 145208
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.219381093978882,
      "learning_rate": 0.00018064877623277778,
      "loss": 2.9003,
      "step": 145209
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.291010856628418,
      "learning_rate": 0.00018064502333332955,
      "loss": 3.087,
      "step": 145210
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.8857825994491577,
      "learning_rate": 0.00018064127045607165,
      "loss": 2.9145,
      "step": 145211
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3935916423797607,
      "learning_rate": 0.00018063751760100477,
      "loss": 2.8471,
      "step": 145212
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2985544204711914,
      "learning_rate": 0.0001806337647681297,
      "loss": 2.769,
      "step": 145213
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1931891441345215,
      "learning_rate": 0.00018063001195744711,
      "loss": 2.9081,
      "step": 145214
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.8130422830581665,
      "learning_rate": 0.00018062625916895756,
      "loss": 2.9379,
      "step": 145215
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0795373916625977,
      "learning_rate": 0.0001806225064026619,
      "loss": 2.971,
      "step": 145216
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.9706320762634277,
      "learning_rate": 0.0001806187536585607,
      "loss": 2.8041,
      "step": 145217
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.220839023590088,
      "learning_rate": 0.00018061500093665474,
      "loss": 2.9341,
      "step": 145218
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2342233657836914,
      "learning_rate": 0.00018061124823694468,
      "loss": 2.9125,
      "step": 145219
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9357249736785889,
      "learning_rate": 0.00018060749555943135,
      "loss": 2.9382,
      "step": 145220
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.413187026977539,
      "learning_rate": 0.00018060374290411533,
      "loss": 2.882,
      "step": 145221
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.8408117294311523,
      "learning_rate": 0.00018059999027099727,
      "loss": 2.7169,
      "step": 145222
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.11403751373291,
      "learning_rate": 0.00018059623766007791,
      "loss": 3.0205,
      "step": 145223
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.178196668624878,
      "learning_rate": 0.00018059248507135798,
      "loss": 2.9331,
      "step": 145224
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.304791212081909,
      "learning_rate": 0.00018058873250483812,
      "loss": 3.0322,
      "step": 145225
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.726144790649414,
      "learning_rate": 0.00018058497996051916,
      "loss": 2.5767,
      "step": 145226
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.916247606277466,
      "learning_rate": 0.00018058122743840166,
      "loss": 3.0333,
      "step": 145227
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.201735019683838,
      "learning_rate": 0.00018057747493848634,
      "loss": 3.1651,
      "step": 145228
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0973823070526123,
      "learning_rate": 0.00018057372246077402,
      "loss": 2.8562,
      "step": 145229
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.7778494358062744,
      "learning_rate": 0.00018056997000526518,
      "loss": 2.8708,
      "step": 145230
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9270853996276855,
      "learning_rate": 0.0001805662175719607,
      "loss": 2.85,
      "step": 145231
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1620655059814453,
      "learning_rate": 0.00018056246516086122,
      "loss": 2.9924,
      "step": 145232
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0525171756744385,
      "learning_rate": 0.00018055871277196739,
      "loss": 3.0007,
      "step": 145233
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9492074251174927,
      "learning_rate": 0.00018055496040527994,
      "loss": 2.9805,
      "step": 145234
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4313788414001465,
      "learning_rate": 0.00018055120806079961,
      "loss": 3.3398,
      "step": 145235
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.289750099182129,
      "learning_rate": 0.0001805474557385271,
      "loss": 2.9544,
      "step": 145236
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.171792984008789,
      "learning_rate": 0.00018054370343846305,
      "loss": 2.782,
      "step": 145237
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5192079544067383,
      "learning_rate": 0.00018053995116060822,
      "loss": 2.9903,
      "step": 145238
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.1665191650390625,
      "learning_rate": 0.00018053619890496316,
      "loss": 2.8929,
      "step": 145239
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3024420738220215,
      "learning_rate": 0.00018053244667152874,
      "loss": 2.897,
      "step": 145240
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9133943319320679,
      "learning_rate": 0.00018052869446030556,
      "loss": 2.9453,
      "step": 145241
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6106576919555664,
      "learning_rate": 0.00018052494227129438,
      "loss": 3.0701,
      "step": 145242
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.38468599319458,
      "learning_rate": 0.0001805211901044959,
      "loss": 2.7354,
      "step": 145243
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.924064874649048,
      "learning_rate": 0.00018051743795991082,
      "loss": 3.172,
      "step": 145244
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2918777465820312,
      "learning_rate": 0.00018051368583753966,
      "loss": 2.9393,
      "step": 145245
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.818859100341797,
      "learning_rate": 0.00018050993373738337,
      "loss": 2.7795,
      "step": 145246
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.006523370742798,
      "learning_rate": 0.00018050618165944246,
      "loss": 2.7944,
      "step": 145247
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0306060314178467,
      "learning_rate": 0.00018050242960371773,
      "loss": 2.9191,
      "step": 145248
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9736769199371338,
      "learning_rate": 0.0001804986775702099,
      "loss": 2.8614,
      "step": 145249
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.012572765350342,
      "learning_rate": 0.00018049492555891966,
      "loss": 3.0368,
      "step": 145250
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.3373336791992188,
      "learning_rate": 0.00018049117356984759,
      "loss": 2.9618,
      "step": 145251
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.787266969680786,
      "learning_rate": 0.00018048742160299444,
      "loss": 3.0535,
      "step": 145252
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3503682613372803,
      "learning_rate": 0.00018048366965836096,
      "loss": 2.9666,
      "step": 145253
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.118424654006958,
      "learning_rate": 0.00018047991773594786,
      "loss": 2.6069,
      "step": 145254
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.6082284450531006,
      "learning_rate": 0.00018047616583575572,
      "loss": 3.0182,
      "step": 145255
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.8769965171813965,
      "learning_rate": 0.0001804724139577855,
      "loss": 3.0266,
      "step": 145256
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2275373935699463,
      "learning_rate": 0.00018046866210203756,
      "loss": 3.2795,
      "step": 145257
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9998106956481934,
      "learning_rate": 0.00018046491026851277,
      "loss": 2.9738,
      "step": 145258
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.1182706356048584,
      "learning_rate": 0.00018046115845721182,
      "loss": 2.8494,
      "step": 145259
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.701498508453369,
      "learning_rate": 0.00018045740666813536,
      "loss": 2.8967,
      "step": 145260
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.5161774158477783,
      "learning_rate": 0.00018045365490128418,
      "loss": 2.8168,
      "step": 145261
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2261757850646973,
      "learning_rate": 0.00018044990315665887,
      "loss": 3.0113,
      "step": 145262
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.299354076385498,
      "learning_rate": 0.00018044615143426033,
      "loss": 3.0599,
      "step": 145263
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1972639560699463,
      "learning_rate": 0.00018044239973408893,
      "loss": 2.8951,
      "step": 145264
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.8613440990447998,
      "learning_rate": 0.00018043864805614564,
      "loss": 2.8178,
      "step": 145265
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1455419063568115,
      "learning_rate": 0.000180434896400431,
      "loss": 2.8353,
      "step": 145266
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3709311485290527,
      "learning_rate": 0.00018043114476694577,
      "loss": 2.7877,
      "step": 145267
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2258756160736084,
      "learning_rate": 0.0001804273931556907,
      "loss": 3.0765,
      "step": 145268
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.523648500442505,
      "learning_rate": 0.00018042364156666647,
      "loss": 3.0993,
      "step": 145269
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4030890464782715,
      "learning_rate": 0.00018041988999987368,
      "loss": 2.6787,
      "step": 145270
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1288013458251953,
      "learning_rate": 0.00018041613845531308,
      "loss": 3.1747,
      "step": 145271
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.515603542327881,
      "learning_rate": 0.00018041238693298537,
      "loss": 2.8555,
      "step": 145272
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1838104724884033,
      "learning_rate": 0.00018040863543289127,
      "loss": 3.0736,
      "step": 145273
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.626265048980713,
      "learning_rate": 0.00018040488395503144,
      "loss": 2.8274,
      "step": 145274
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.361743211746216,
      "learning_rate": 0.00018040113249940672,
      "loss": 2.9848,
      "step": 145275
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.2371368408203125,
      "learning_rate": 0.00018039738106601755,
      "loss": 3.1189,
      "step": 145276
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0731916427612305,
      "learning_rate": 0.00018039362965486483,
      "loss": 2.9401,
      "step": 145277
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5979464054107666,
      "learning_rate": 0.00018038987826594913,
      "loss": 3.0164,
      "step": 145278
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.8892476558685303,
      "learning_rate": 0.00018038612689927122,
      "loss": 2.9527,
      "step": 145279
    },
    {
      "epoch": 1.89,
      "grad_norm": 4.018216133117676,
      "learning_rate": 0.00018038237555483182,
      "loss": 2.7983,
      "step": 145280
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.4872589111328125,
      "learning_rate": 0.00018037862423263168,
      "loss": 2.7159,
      "step": 145281
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3273746967315674,
      "learning_rate": 0.00018037487293267132,
      "loss": 2.8833,
      "step": 145282
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.540170192718506,
      "learning_rate": 0.0001803711216549515,
      "loss": 2.7554,
      "step": 145283
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.25116229057312,
      "learning_rate": 0.00018036737039947296,
      "loss": 2.9037,
      "step": 145284
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4033830165863037,
      "learning_rate": 0.0001803636191662364,
      "loss": 3.1035,
      "step": 145285
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0010783672332764,
      "learning_rate": 0.00018035986795524248,
      "loss": 2.9683,
      "step": 145286
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.362366199493408,
      "learning_rate": 0.00018035611676649202,
      "loss": 3.0695,
      "step": 145287
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.326807737350464,
      "learning_rate": 0.00018035236559998553,
      "loss": 2.8557,
      "step": 145288
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.106637954711914,
      "learning_rate": 0.00018034861445572378,
      "loss": 3.1364,
      "step": 145289
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.6686837673187256,
      "learning_rate": 0.00018034486333370754,
      "loss": 2.8248,
      "step": 145290
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0887813568115234,
      "learning_rate": 0.00018034111223393735,
      "loss": 2.7834,
      "step": 145291
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4014930725097656,
      "learning_rate": 0.00018033736115641405,
      "loss": 3.0324,
      "step": 145292
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.014108419418335,
      "learning_rate": 0.00018033361010113844,
      "loss": 2.8093,
      "step": 145293
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3748059272766113,
      "learning_rate": 0.0001803298590681109,
      "loss": 2.97,
      "step": 145294
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0440948009490967,
      "learning_rate": 0.00018032610805733234,
      "loss": 3.1956,
      "step": 145295
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1296346187591553,
      "learning_rate": 0.00018032235706880338,
      "loss": 2.9617,
      "step": 145296
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.550431251525879,
      "learning_rate": 0.0001803186061025248,
      "loss": 2.976,
      "step": 145297
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.522406816482544,
      "learning_rate": 0.0001803148551584972,
      "loss": 2.9408,
      "step": 145298
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.622924566268921,
      "learning_rate": 0.0001803111042367215,
      "loss": 2.9714,
      "step": 145299
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.090620517730713,
      "learning_rate": 0.00018030735333719806,
      "loss": 2.939,
      "step": 145300
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.786302328109741,
      "learning_rate": 0.00018030360245992774,
      "loss": 2.8648,
      "step": 145301
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.7917473316192627,
      "learning_rate": 0.0001802998516049113,
      "loss": 2.9212,
      "step": 145302
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3752641677856445,
      "learning_rate": 0.00018029610077214932,
      "loss": 2.9407,
      "step": 145303
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.175102472305298,
      "learning_rate": 0.00018029234996164257,
      "loss": 3.2363,
      "step": 145304
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.382206678390503,
      "learning_rate": 0.00018028859917339178,
      "loss": 2.9781,
      "step": 145305
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.645005702972412,
      "learning_rate": 0.0001802848484073976,
      "loss": 3.0006,
      "step": 145306
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.625051498413086,
      "learning_rate": 0.00018028109766366068,
      "loss": 2.8794,
      "step": 145307
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.158632755279541,
      "learning_rate": 0.00018027734694218175,
      "loss": 3.2499,
      "step": 145308
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0723490715026855,
      "learning_rate": 0.00018027359624296152,
      "loss": 2.7831,
      "step": 145309
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1336965560913086,
      "learning_rate": 0.00018026984556600073,
      "loss": 3.0291,
      "step": 145310
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2891032695770264,
      "learning_rate": 0.00018026609491130004,
      "loss": 3.0241,
      "step": 145311
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.557377338409424,
      "learning_rate": 0.0001802623442788601,
      "loss": 3.0748,
      "step": 145312
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1241934299468994,
      "learning_rate": 0.0001802585936686817,
      "loss": 3.1657,
      "step": 145313
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3142249584198,
      "learning_rate": 0.00018025484308076546,
      "loss": 3.0419,
      "step": 145314
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.117840051651001,
      "learning_rate": 0.0001802510925151121,
      "loss": 2.8351,
      "step": 145315
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.33237361907959,
      "learning_rate": 0.00018024734197172234,
      "loss": 3.0771,
      "step": 145316
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3224294185638428,
      "learning_rate": 0.0001802435914505969,
      "loss": 3.0795,
      "step": 145317
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.260303020477295,
      "learning_rate": 0.00018023984095173637,
      "loss": 2.9972,
      "step": 145318
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.108933210372925,
      "learning_rate": 0.0001802360904751415,
      "loss": 2.9895,
      "step": 145319
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.953219175338745,
      "learning_rate": 0.00018023234002081308,
      "loss": 2.6332,
      "step": 145320
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.456404685974121,
      "learning_rate": 0.00018022858958875165,
      "loss": 3.2085,
      "step": 145321
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4474058151245117,
      "learning_rate": 0.000180224839178958,
      "loss": 2.9707,
      "step": 145322
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.2434701919555664,
      "learning_rate": 0.00018022108879143286,
      "loss": 2.9947,
      "step": 145323
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5258796215057373,
      "learning_rate": 0.00018021733842617685,
      "loss": 3.0045,
      "step": 145324
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.7283525466918945,
      "learning_rate": 0.00018021358808319068,
      "loss": 3.0923,
      "step": 145325
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.455112934112549,
      "learning_rate": 0.00018020983776247508,
      "loss": 3.0565,
      "step": 145326
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.471693754196167,
      "learning_rate": 0.0001802060874640307,
      "loss": 3.3535,
      "step": 145327
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.570348024368286,
      "learning_rate": 0.00018020233718785836,
      "loss": 2.8276,
      "step": 145328
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4700589179992676,
      "learning_rate": 0.00018019858693395861,
      "loss": 3.0332,
      "step": 145329
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1211793422698975,
      "learning_rate": 0.00018019483670233223,
      "loss": 2.584,
      "step": 145330
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.8685922622680664,
      "learning_rate": 0.00018019108649297986,
      "loss": 3.0574,
      "step": 145331
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0710484981536865,
      "learning_rate": 0.00018018733630590224,
      "loss": 2.9709,
      "step": 145332
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9923584461212158,
      "learning_rate": 0.00018018358614110003,
      "loss": 2.9606,
      "step": 145333
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.8165371417999268,
      "learning_rate": 0.00018017983599857395,
      "loss": 2.8786,
      "step": 145334
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.8281173706054688,
      "learning_rate": 0.0001801760858783248,
      "loss": 2.6325,
      "step": 145335
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0881361961364746,
      "learning_rate": 0.00018017233578035313,
      "loss": 3.1295,
      "step": 145336
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5345096588134766,
      "learning_rate": 0.00018016858570465963,
      "loss": 3.3113,
      "step": 145337
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.8281867504119873,
      "learning_rate": 0.00018016483565124508,
      "loss": 3.1443,
      "step": 145338
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.195772409439087,
      "learning_rate": 0.00018016108562011015,
      "loss": 3.045,
      "step": 145339
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.528843879699707,
      "learning_rate": 0.0001801573356112555,
      "loss": 3.0324,
      "step": 145340
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.334627389907837,
      "learning_rate": 0.0001801535856246819,
      "loss": 2.8518,
      "step": 145341
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0385355949401855,
      "learning_rate": 0.00018014983566039006,
      "loss": 3.2383,
      "step": 145342
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9142473936080933,
      "learning_rate": 0.0001801460857183806,
      "loss": 3.0118,
      "step": 145343
    },
    {
      "epoch": 1.89,
      "grad_norm": 5.237140655517578,
      "learning_rate": 0.00018014233579865418,
      "loss": 2.7152,
      "step": 145344
    },
    {
      "epoch": 1.89,
      "grad_norm": 5.037614345550537,
      "learning_rate": 0.0001801385859012116,
      "loss": 2.7428,
      "step": 145345
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.199523687362671,
      "learning_rate": 0.00018013483602605351,
      "loss": 3.0006,
      "step": 145346
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.296417474746704,
      "learning_rate": 0.00018013108617318064,
      "loss": 3.0727,
      "step": 145347
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3175430297851562,
      "learning_rate": 0.00018012733634259376,
      "loss": 2.7495,
      "step": 145348
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9253443479537964,
      "learning_rate": 0.00018012358653429335,
      "loss": 3.1207,
      "step": 145349
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.2336959838867188,
      "learning_rate": 0.0001801198367482802,
      "loss": 2.8014,
      "step": 145350
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1174659729003906,
      "learning_rate": 0.00018011608698455505,
      "loss": 3.0471,
      "step": 145351
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.761920690536499,
      "learning_rate": 0.00018011233724311862,
      "loss": 2.9544,
      "step": 145352
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6783862113952637,
      "learning_rate": 0.00018010858752397154,
      "loss": 3.0546,
      "step": 145353
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.353898525238037,
      "learning_rate": 0.00018010483782711465,
      "loss": 2.9216,
      "step": 145354
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6168341636657715,
      "learning_rate": 0.00018010108815254844,
      "loss": 2.8409,
      "step": 145355
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0511832237243652,
      "learning_rate": 0.0001800973385002737,
      "loss": 2.9481,
      "step": 145356
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.478618621826172,
      "learning_rate": 0.00018009358887029108,
      "loss": 2.9439,
      "step": 145357
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3249387741088867,
      "learning_rate": 0.00018008983926260137,
      "loss": 3.1169,
      "step": 145358
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2707934379577637,
      "learning_rate": 0.00018008608967720523,
      "loss": 2.7062,
      "step": 145359
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.92522931098938,
      "learning_rate": 0.00018008234011410344,
      "loss": 2.9931,
      "step": 145360
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.356865167617798,
      "learning_rate": 0.00018007859057329653,
      "loss": 2.9217,
      "step": 145361
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.929004669189453,
      "learning_rate": 0.0001800748410547852,
      "loss": 2.9354,
      "step": 145362
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.066056966781616,
      "learning_rate": 0.00018007109155857026,
      "loss": 3.0054,
      "step": 145363
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.9568448066711426,
      "learning_rate": 0.0001800673420846524,
      "loss": 3.0137,
      "step": 145364
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6816718578338623,
      "learning_rate": 0.00018006359263303225,
      "loss": 2.9569,
      "step": 145365
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3565289974212646,
      "learning_rate": 0.00018005984320371065,
      "loss": 3.0894,
      "step": 145366
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.248650550842285,
      "learning_rate": 0.0001800560937966881,
      "loss": 2.8307,
      "step": 145367
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1931424140930176,
      "learning_rate": 0.00018005234441196535,
      "loss": 2.9708,
      "step": 145368
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0262584686279297,
      "learning_rate": 0.00018004859504954318,
      "loss": 2.7356,
      "step": 145369
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.328734874725342,
      "learning_rate": 0.0001800448457094222,
      "loss": 3.0159,
      "step": 145370
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.221473217010498,
      "learning_rate": 0.00018004109639160317,
      "loss": 3.1085,
      "step": 145371
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.533377170562744,
      "learning_rate": 0.0001800373470960869,
      "loss": 3.07,
      "step": 145372
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3545310497283936,
      "learning_rate": 0.0001800335978228738,
      "loss": 2.7243,
      "step": 145373
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.687403440475464,
      "learning_rate": 0.00018002984857196474,
      "loss": 2.8592,
      "step": 145374
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5221898555755615,
      "learning_rate": 0.00018002609934336038,
      "loss": 3.0321,
      "step": 145375
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3176589012145996,
      "learning_rate": 0.00018002235013706144,
      "loss": 3.0567,
      "step": 145376
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.2517783641815186,
      "learning_rate": 0.0001800186009530686,
      "loss": 2.8786,
      "step": 145377
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.776660680770874,
      "learning_rate": 0.0001800148517913827,
      "loss": 3.0674,
      "step": 145378
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.150486946105957,
      "learning_rate": 0.0001800111026520042,
      "loss": 2.7639,
      "step": 145379
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.387705087661743,
      "learning_rate": 0.00018000735353493384,
      "loss": 3.116,
      "step": 145380
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.174907922744751,
      "learning_rate": 0.00018000360444017242,
      "loss": 3.1177,
      "step": 145381
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.238185167312622,
      "learning_rate": 0.00017999985536772058,
      "loss": 3.1438,
      "step": 145382
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.551347255706787,
      "learning_rate": 0.00017999610631757906,
      "loss": 3.0628,
      "step": 145383
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1154158115386963,
      "learning_rate": 0.00017999235728974864,
      "loss": 2.9241,
      "step": 145384
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.1677088737487793,
      "learning_rate": 0.00017998860828422977,
      "loss": 2.6526,
      "step": 145385
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.743180751800537,
      "learning_rate": 0.0001799848593010233,
      "loss": 2.8108,
      "step": 145386
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5351040363311768,
      "learning_rate": 0.00017998111034012992,
      "loss": 3.229,
      "step": 145387
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.124899387359619,
      "learning_rate": 0.0001799773614015503,
      "loss": 3.0686,
      "step": 145388
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.78691029548645,
      "learning_rate": 0.0001799736124852851,
      "loss": 2.8821,
      "step": 145389
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2676429748535156,
      "learning_rate": 0.00017996986359133525,
      "loss": 3.1073,
      "step": 145390
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0431578159332275,
      "learning_rate": 0.0001799661147197012,
      "loss": 3.0525,
      "step": 145391
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.29997181892395,
      "learning_rate": 0.00017996236587038364,
      "loss": 2.9662,
      "step": 145392
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.492323875427246,
      "learning_rate": 0.0001799586170433834,
      "loss": 2.6197,
      "step": 145393
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.307525873184204,
      "learning_rate": 0.00017995486823870103,
      "loss": 3.1866,
      "step": 145394
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.9300410747528076,
      "learning_rate": 0.00017995111945633737,
      "loss": 2.9977,
      "step": 145395
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.619718551635742,
      "learning_rate": 0.00017994737069629307,
      "loss": 2.9879,
      "step": 145396
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.906688928604126,
      "learning_rate": 0.00017994362195856888,
      "loss": 3.157,
      "step": 145397
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5003550052642822,
      "learning_rate": 0.0001799398732431654,
      "loss": 2.7984,
      "step": 145398
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.523829460144043,
      "learning_rate": 0.00017993612455008336,
      "loss": 3.103,
      "step": 145399
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2587084770202637,
      "learning_rate": 0.00017993237587932343,
      "loss": 3.038,
      "step": 145400
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.802582025527954,
      "learning_rate": 0.0001799286272308863,
      "loss": 3.089,
      "step": 145401
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3146791458129883,
      "learning_rate": 0.0001799248786047728,
      "loss": 2.976,
      "step": 145402
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2388510704040527,
      "learning_rate": 0.00017992113000098352,
      "loss": 3.0667,
      "step": 145403
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3829143047332764,
      "learning_rate": 0.0001799173814195191,
      "loss": 3.0491,
      "step": 145404
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.131157636642456,
      "learning_rate": 0.00017991363286038042,
      "loss": 2.8681,
      "step": 145405
    },
    {
      "epoch": 1.89,
      "grad_norm": 5.244410991668701,
      "learning_rate": 0.00017990988432356795,
      "loss": 2.9968,
      "step": 145406
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.9798943996429443,
      "learning_rate": 0.0001799061358090825,
      "loss": 2.8451,
      "step": 145407
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.0516340732574463,
      "learning_rate": 0.0001799023873169248,
      "loss": 2.8745,
      "step": 145408
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2541122436523438,
      "learning_rate": 0.00017989863884709556,
      "loss": 3.1246,
      "step": 145409
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.8346383571624756,
      "learning_rate": 0.00017989489039959536,
      "loss": 2.9807,
      "step": 145410
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.065671682357788,
      "learning_rate": 0.00017989114197442498,
      "loss": 2.9383,
      "step": 145411
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.0380406379699707,
      "learning_rate": 0.0001798873935715852,
      "loss": 2.7313,
      "step": 145412
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9369421005249023,
      "learning_rate": 0.00017988364519107647,
      "loss": 2.8933,
      "step": 145413
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4163098335266113,
      "learning_rate": 0.0001798798968328997,
      "loss": 3.013,
      "step": 145414
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1042795181274414,
      "learning_rate": 0.0001798761484970556,
      "loss": 2.8009,
      "step": 145415
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1701135635375977,
      "learning_rate": 0.00017987240018354467,
      "loss": 3.0263,
      "step": 145416
    },
    {
      "epoch": 1.89,
      "grad_norm": 4.530534267425537,
      "learning_rate": 0.0001798686518923678,
      "loss": 2.8483,
      "step": 145417
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.245321750640869,
      "learning_rate": 0.0001798649036235256,
      "loss": 2.8419,
      "step": 145418
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.301656723022461,
      "learning_rate": 0.00017986115537701875,
      "loss": 2.868,
      "step": 145419
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6389498710632324,
      "learning_rate": 0.00017985740715284804,
      "loss": 2.8259,
      "step": 145420
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.077584743499756,
      "learning_rate": 0.00017985365895101414,
      "loss": 2.8362,
      "step": 145421
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.90668785572052,
      "learning_rate": 0.00017984991077151763,
      "loss": 2.9793,
      "step": 145422
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.7342607975006104,
      "learning_rate": 0.00017984616261435923,
      "loss": 2.8815,
      "step": 145423
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3492164611816406,
      "learning_rate": 0.0001798424144795398,
      "loss": 2.7519,
      "step": 145424
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.8367737531661987,
      "learning_rate": 0.00017983866636705987,
      "loss": 2.8695,
      "step": 145425
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.6086180210113525,
      "learning_rate": 0.00017983491827692021,
      "loss": 2.9236,
      "step": 145426
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5142195224761963,
      "learning_rate": 0.00017983117020912162,
      "loss": 3.1066,
      "step": 145427
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5764262676239014,
      "learning_rate": 0.0001798274221636646,
      "loss": 3.0857,
      "step": 145428
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.485487222671509,
      "learning_rate": 0.0001798236741405499,
      "loss": 2.9521,
      "step": 145429
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.1149003505706787,
      "learning_rate": 0.0001798199261397783,
      "loss": 2.93,
      "step": 145430
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.5902650356292725,
      "learning_rate": 0.00017981617816135038,
      "loss": 2.8835,
      "step": 145431
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3156447410583496,
      "learning_rate": 0.00017981243020526692,
      "loss": 2.9992,
      "step": 145432
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.507781982421875,
      "learning_rate": 0.0001798086822715287,
      "loss": 2.9946,
      "step": 145433
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4695146083831787,
      "learning_rate": 0.00017980493436013626,
      "loss": 3.0446,
      "step": 145434
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3746774196624756,
      "learning_rate": 0.00017980118647109027,
      "loss": 3.0623,
      "step": 145435
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1025516986846924,
      "learning_rate": 0.00017979743860439156,
      "loss": 2.9669,
      "step": 145436
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.8669540882110596,
      "learning_rate": 0.00017979369076004075,
      "loss": 3.1828,
      "step": 145437
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.739640235900879,
      "learning_rate": 0.0001797899429380386,
      "loss": 3.1124,
      "step": 145438
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.8187992572784424,
      "learning_rate": 0.00017978619513838587,
      "loss": 2.7997,
      "step": 145439
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.3387601375579834,
      "learning_rate": 0.00017978244736108301,
      "loss": 3.0871,
      "step": 145440
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.149489641189575,
      "learning_rate": 0.00017977869960613088,
      "loss": 2.9208,
      "step": 145441
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2636187076568604,
      "learning_rate": 0.00017977495187353017,
      "loss": 3.046,
      "step": 145442
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6558573246002197,
      "learning_rate": 0.00017977120416328153,
      "loss": 3.1182,
      "step": 145443
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3712098598480225,
      "learning_rate": 0.0001797674564753857,
      "loss": 2.9592,
      "step": 145444
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5198326110839844,
      "learning_rate": 0.00017976370880984355,
      "loss": 2.9623,
      "step": 145445
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1439571380615234,
      "learning_rate": 0.00017975996116665542,
      "loss": 2.9108,
      "step": 145446
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2924234867095947,
      "learning_rate": 0.00017975621354582216,
      "loss": 2.9389,
      "step": 145447
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.245893716812134,
      "learning_rate": 0.00017975246594734455,
      "loss": 3.0692,
      "step": 145448
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2123665809631348,
      "learning_rate": 0.0001797487183712232,
      "loss": 3.0761,
      "step": 145449
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.428027391433716,
      "learning_rate": 0.00017974497081745883,
      "loss": 3.2036,
      "step": 145450
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.407437801361084,
      "learning_rate": 0.0001797412232860523,
      "loss": 3.0814,
      "step": 145451
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.213959217071533,
      "learning_rate": 0.000179737475777004,
      "loss": 3.0613,
      "step": 145452
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.597568988800049,
      "learning_rate": 0.00017973372829031473,
      "loss": 2.7925,
      "step": 145453
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4907214641571045,
      "learning_rate": 0.00017972998082598526,
      "loss": 2.887,
      "step": 145454
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3882534503936768,
      "learning_rate": 0.00017972623338401625,
      "loss": 3.0906,
      "step": 145455
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.407902717590332,
      "learning_rate": 0.00017972248596440844,
      "loss": 2.9661,
      "step": 145456
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.8629367351531982,
      "learning_rate": 0.00017971873856716258,
      "loss": 3.3002,
      "step": 145457
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.38173246383667,
      "learning_rate": 0.0001797149911922792,
      "loss": 3.0628,
      "step": 145458
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.263061761856079,
      "learning_rate": 0.000179711243839759,
      "loss": 2.881,
      "step": 145459
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.8795233964920044,
      "learning_rate": 0.00017970749650960278,
      "loss": 3.1927,
      "step": 145460
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9711605310440063,
      "learning_rate": 0.0001797037492018112,
      "loss": 2.9215,
      "step": 145461
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.7015771865844727,
      "learning_rate": 0.000179700001916385,
      "loss": 3.0705,
      "step": 145462
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.611109972000122,
      "learning_rate": 0.00017969625465332484,
      "loss": 3.0818,
      "step": 145463
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.314872980117798,
      "learning_rate": 0.0001796925074126315,
      "loss": 2.9392,
      "step": 145464
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6988093852996826,
      "learning_rate": 0.00017968876019430546,
      "loss": 2.8789,
      "step": 145465
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1309449672698975,
      "learning_rate": 0.0001796850129983476,
      "loss": 2.774,
      "step": 145466
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.953016519546509,
      "learning_rate": 0.00017968126582475856,
      "loss": 2.9491,
      "step": 145467
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.444221258163452,
      "learning_rate": 0.000179677518673539,
      "loss": 2.8537,
      "step": 145468
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.370985746383667,
      "learning_rate": 0.00017967377154468975,
      "loss": 2.6928,
      "step": 145469
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4316890239715576,
      "learning_rate": 0.00017967002443821143,
      "loss": 3.0249,
      "step": 145470
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.130788803100586,
      "learning_rate": 0.00017966627735410467,
      "loss": 2.9477,
      "step": 145471
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.582841634750366,
      "learning_rate": 0.0001796625302923702,
      "loss": 3.1709,
      "step": 145472
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2892184257507324,
      "learning_rate": 0.00017965878325300875,
      "loss": 3.1181,
      "step": 145473
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.316819906234741,
      "learning_rate": 0.000179655036236021,
      "loss": 3.0303,
      "step": 145474
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.5262064933776855,
      "learning_rate": 0.00017965128924140762,
      "loss": 2.8969,
      "step": 145475
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.322723150253296,
      "learning_rate": 0.0001796475422691695,
      "loss": 3.0693,
      "step": 145476
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.130009412765503,
      "learning_rate": 0.00017964379531930702,
      "loss": 3.1132,
      "step": 145477
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3520331382751465,
      "learning_rate": 0.00017964004839182108,
      "loss": 2.7857,
      "step": 145478
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.226846933364868,
      "learning_rate": 0.00017963630148671228,
      "loss": 2.7885,
      "step": 145479
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6884284019470215,
      "learning_rate": 0.00017963255460398133,
      "loss": 3.0889,
      "step": 145480
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.9933974742889404,
      "learning_rate": 0.00017962880774362902,
      "loss": 2.8766,
      "step": 145481
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.523329257965088,
      "learning_rate": 0.0001796250609056561,
      "loss": 2.9237,
      "step": 145482
    },
    {
      "epoch": 1.89,
      "grad_norm": 4.420143127441406,
      "learning_rate": 0.00017962131409006302,
      "loss": 2.8175,
      "step": 145483
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.0902211666107178,
      "learning_rate": 0.00017961756729685063,
      "loss": 2.8881,
      "step": 145484
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1698362827301025,
      "learning_rate": 0.0001796138205260196,
      "loss": 2.8477,
      "step": 145485
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.278010845184326,
      "learning_rate": 0.00017961007377757064,
      "loss": 2.8425,
      "step": 145486
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.750624656677246,
      "learning_rate": 0.0001796063270515044,
      "loss": 3.1393,
      "step": 145487
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.8109476566314697,
      "learning_rate": 0.00017960258034782173,
      "loss": 2.9755,
      "step": 145488
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.089118242263794,
      "learning_rate": 0.00017959883366652313,
      "loss": 3.2437,
      "step": 145489
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1265764236450195,
      "learning_rate": 0.0001795950870076094,
      "loss": 2.8437,
      "step": 145490
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.171537160873413,
      "learning_rate": 0.0001795913403710812,
      "loss": 3.1685,
      "step": 145491
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.551435947418213,
      "learning_rate": 0.00017958759375693925,
      "loss": 2.706,
      "step": 145492
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.884591817855835,
      "learning_rate": 0.00017958384716518423,
      "loss": 2.5255,
      "step": 145493
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.32086181640625,
      "learning_rate": 0.00017958010059581687,
      "loss": 3.0318,
      "step": 145494
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0442264080047607,
      "learning_rate": 0.0001795763540488378,
      "loss": 3.0016,
      "step": 145495
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9129654169082642,
      "learning_rate": 0.00017957260752424778,
      "loss": 2.9511,
      "step": 145496
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.9867613315582275,
      "learning_rate": 0.00017956886102204756,
      "loss": 2.5184,
      "step": 145497
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0940439701080322,
      "learning_rate": 0.00017956511454223767,
      "loss": 3.145,
      "step": 145498
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.488327741622925,
      "learning_rate": 0.0001795613680848189,
      "loss": 2.9953,
      "step": 145499
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4286575317382812,
      "learning_rate": 0.00017955762164979203,
      "loss": 2.9833,
      "step": 145500
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.714024066925049,
      "learning_rate": 0.00017955387523715756,
      "loss": 2.8237,
      "step": 145501
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.114642381668091,
      "learning_rate": 0.00017955012884691632,
      "loss": 2.7982,
      "step": 145502
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.53022837638855,
      "learning_rate": 0.00017954638247906905,
      "loss": 2.7959,
      "step": 145503
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.9811203479766846,
      "learning_rate": 0.00017954263613361636,
      "loss": 2.9703,
      "step": 145504
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.440129280090332,
      "learning_rate": 0.00017953888981055895,
      "loss": 3.0869,
      "step": 145505
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1470041275024414,
      "learning_rate": 0.0001795351435098976,
      "loss": 3.0958,
      "step": 145506
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4233386516571045,
      "learning_rate": 0.0001795313972316328,
      "loss": 2.7798,
      "step": 145507
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0866687297821045,
      "learning_rate": 0.00017952765097576547,
      "loss": 2.8182,
      "step": 145508
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.9036149978637695,
      "learning_rate": 0.00017952390474229623,
      "loss": 3.1095,
      "step": 145509
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.4751737117767334,
      "learning_rate": 0.00017952015853122573,
      "loss": 2.8948,
      "step": 145510
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.959742784500122,
      "learning_rate": 0.00017951641234255477,
      "loss": 3.0173,
      "step": 145511
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3744189739227295,
      "learning_rate": 0.000179512666176284,
      "loss": 3.0113,
      "step": 145512
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.3768608570098877,
      "learning_rate": 0.00017950892003241404,
      "loss": 3.1138,
      "step": 145513
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.33981990814209,
      "learning_rate": 0.00017950517391094562,
      "loss": 2.9257,
      "step": 145514
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4812839031219482,
      "learning_rate": 0.00017950142781187947,
      "loss": 3.1155,
      "step": 145515
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.280470848083496,
      "learning_rate": 0.0001794976817352163,
      "loss": 2.9414,
      "step": 145516
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1297643184661865,
      "learning_rate": 0.00017949393568095676,
      "loss": 3.005,
      "step": 145517
    },
    {
      "epoch": 1.89,
      "grad_norm": 4.241988182067871,
      "learning_rate": 0.0001794901896491017,
      "loss": 3.1093,
      "step": 145518
    },
    {
      "epoch": 1.89,
      "grad_norm": 4.001119136810303,
      "learning_rate": 0.0001794864436396516,
      "loss": 2.7468,
      "step": 145519
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.6951236724853516,
      "learning_rate": 0.00017948269765260722,
      "loss": 2.7523,
      "step": 145520
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2915663719177246,
      "learning_rate": 0.0001794789516879693,
      "loss": 2.8475,
      "step": 145521
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1895434856414795,
      "learning_rate": 0.0001794752057457385,
      "loss": 2.9604,
      "step": 145522
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.217053174972534,
      "learning_rate": 0.00017947145982591552,
      "loss": 3.0239,
      "step": 145523
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4778270721435547,
      "learning_rate": 0.0001794677139285012,
      "loss": 3.0395,
      "step": 145524
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.41331148147583,
      "learning_rate": 0.000179463968053496,
      "loss": 2.9194,
      "step": 145525
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.1019530296325684,
      "learning_rate": 0.0001794602222009007,
      "loss": 2.7095,
      "step": 145526
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1274075508117676,
      "learning_rate": 0.00017945647637071605,
      "loss": 2.7501,
      "step": 145527
    },
    {
      "epoch": 1.89,
      "grad_norm": 3.0312416553497314,
      "learning_rate": 0.0001794527305629427,
      "loss": 2.7539,
      "step": 145528
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.2223703861236572,
      "learning_rate": 0.00017944898477758136,
      "loss": 2.9118,
      "step": 145529
    },
    {
      "epoch": 1.89,
      "grad_norm": 1.8254334926605225,
      "learning_rate": 0.0001794452390146327,
      "loss": 3.127,
      "step": 145530
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.494910717010498,
      "learning_rate": 0.00017944149327409764,
      "loss": 2.7633,
      "step": 145531
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0307676792144775,
      "learning_rate": 0.0001794377475559765,
      "loss": 2.8483,
      "step": 145532
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.1818976402282715,
      "learning_rate": 0.0001794340018602702,
      "loss": 2.8709,
      "step": 145533
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4493517875671387,
      "learning_rate": 0.00017943025618697938,
      "loss": 2.6555,
      "step": 145534
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.0847818851470947,
      "learning_rate": 0.00017942651053610474,
      "loss": 3.2578,
      "step": 145535
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.4225735664367676,
      "learning_rate": 0.000179422764907647,
      "loss": 2.7699,
      "step": 145536
    },
    {
      "epoch": 1.89,
      "grad_norm": 2.113010883331299,
      "learning_rate": 0.00017941901930160697,
      "loss": 2.9669,
      "step": 145537
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0910072326660156,
      "learning_rate": 0.0001794152737179851,
      "loss": 3.0637,
      "step": 145538
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4779961109161377,
      "learning_rate": 0.00017941152815678218,
      "loss": 3.2139,
      "step": 145539
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.723470449447632,
      "learning_rate": 0.00017940778261799894,
      "loss": 2.7625,
      "step": 145540
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3600382804870605,
      "learning_rate": 0.00017940403710163606,
      "loss": 2.8853,
      "step": 145541
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.6915719509124756,
      "learning_rate": 0.00017940029160769426,
      "loss": 2.6988,
      "step": 145542
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.473362684249878,
      "learning_rate": 0.0001793965461361744,
      "loss": 2.8219,
      "step": 145543
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5752029418945312,
      "learning_rate": 0.0001793928006870768,
      "loss": 3.1109,
      "step": 145544
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.723466157913208,
      "learning_rate": 0.0001793890552604024,
      "loss": 3.0713,
      "step": 145545
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0645382404327393,
      "learning_rate": 0.0001793853098561518,
      "loss": 2.8124,
      "step": 145546
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0483553409576416,
      "learning_rate": 0.0001793815644743258,
      "loss": 3.029,
      "step": 145547
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3116636276245117,
      "learning_rate": 0.00017937781911492502,
      "loss": 2.8465,
      "step": 145548
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.422719717025757,
      "learning_rate": 0.00017937407377795032,
      "loss": 3.2401,
      "step": 145549
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.300808906555176,
      "learning_rate": 0.00017937032846340208,
      "loss": 2.8468,
      "step": 145550
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.1471407413482666,
      "learning_rate": 0.00017936658317128126,
      "loss": 3.0059,
      "step": 145551
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.188737392425537,
      "learning_rate": 0.0001793628379015884,
      "loss": 2.7767,
      "step": 145552
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.802095413208008,
      "learning_rate": 0.0001793590926543243,
      "loss": 2.9236,
      "step": 145553
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.3896517753601074,
      "learning_rate": 0.00017935534742948963,
      "loss": 2.6662,
      "step": 145554
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.0724964141845703,
      "learning_rate": 0.00017935160222708516,
      "loss": 2.7403,
      "step": 145555
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.915724754333496,
      "learning_rate": 0.0001793478570471114,
      "loss": 2.7361,
      "step": 145556
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.381629228591919,
      "learning_rate": 0.00017934411188956915,
      "loss": 2.9384,
      "step": 145557
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1423420906066895,
      "learning_rate": 0.00017934036675445913,
      "loss": 3.0874,
      "step": 145558
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.924255132675171,
      "learning_rate": 0.000179336621641782,
      "loss": 2.9941,
      "step": 145559
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.6088926792144775,
      "learning_rate": 0.00017933287655153845,
      "loss": 2.8627,
      "step": 145560
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2561240196228027,
      "learning_rate": 0.0001793291314837293,
      "loss": 2.8578,
      "step": 145561
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.21636700630188,
      "learning_rate": 0.0001793253864383551,
      "loss": 2.6277,
      "step": 145562
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.6692376136779785,
      "learning_rate": 0.00017932164141541655,
      "loss": 2.7701,
      "step": 145563
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.926817774772644,
      "learning_rate": 0.00017931789641491438,
      "loss": 3.0917,
      "step": 145564
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.7826242446899414,
      "learning_rate": 0.00017931415143684925,
      "loss": 2.765,
      "step": 145565
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9680533409118652,
      "learning_rate": 0.00017931040648122193,
      "loss": 2.8181,
      "step": 145566
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.81897234916687,
      "learning_rate": 0.0001793066615480332,
      "loss": 2.7916,
      "step": 145567
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9624370336532593,
      "learning_rate": 0.00017930291663728355,
      "loss": 2.9821,
      "step": 145568
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.061939001083374,
      "learning_rate": 0.00017929917174897372,
      "loss": 2.5698,
      "step": 145569
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.7787549495697021,
      "learning_rate": 0.00017929542688310452,
      "loss": 3.2602,
      "step": 145570
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3009769916534424,
      "learning_rate": 0.0001792916820396765,
      "loss": 2.9524,
      "step": 145571
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5838449001312256,
      "learning_rate": 0.0001792879372186905,
      "loss": 3.1154,
      "step": 145572
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.2675328254699707,
      "learning_rate": 0.00017928419242014716,
      "loss": 3.1412,
      "step": 145573
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.859940528869629,
      "learning_rate": 0.0001792804476440472,
      "loss": 3.038,
      "step": 145574
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3963088989257812,
      "learning_rate": 0.00017927670289039123,
      "loss": 3.035,
      "step": 145575
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.3826568126678467,
      "learning_rate": 0.00017927295815917997,
      "loss": 2.9329,
      "step": 145576
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.658566474914551,
      "learning_rate": 0.00017926921345041416,
      "loss": 3.086,
      "step": 145577
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.869792938232422,
      "learning_rate": 0.00017926546876409448,
      "loss": 2.9093,
      "step": 145578
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3076839447021484,
      "learning_rate": 0.00017926172410022168,
      "loss": 3.1518,
      "step": 145579
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8109488487243652,
      "learning_rate": 0.00017925797945879634,
      "loss": 2.9207,
      "step": 145580
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.450132846832275,
      "learning_rate": 0.00017925423483981925,
      "loss": 2.8108,
      "step": 145581
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.62958025932312,
      "learning_rate": 0.00017925049024329113,
      "loss": 3.0687,
      "step": 145582
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4354166984558105,
      "learning_rate": 0.00017924674566921253,
      "loss": 2.7983,
      "step": 145583
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4753236770629883,
      "learning_rate": 0.00017924300111758426,
      "loss": 2.8089,
      "step": 145584
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.6744930744171143,
      "learning_rate": 0.00017923925658840708,
      "loss": 3.1503,
      "step": 145585
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.63854718208313,
      "learning_rate": 0.00017923551208168153,
      "loss": 2.8256,
      "step": 145586
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.839466094970703,
      "learning_rate": 0.00017923176759740834,
      "loss": 3.1671,
      "step": 145587
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.20743989944458,
      "learning_rate": 0.0001792280231355883,
      "loss": 2.6988,
      "step": 145588
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3351213932037354,
      "learning_rate": 0.0001792242786962221,
      "loss": 2.9116,
      "step": 145589
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.0916664600372314,
      "learning_rate": 0.00017922053427931028,
      "loss": 2.8853,
      "step": 145590
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.570565938949585,
      "learning_rate": 0.00017921678988485373,
      "loss": 3.0356,
      "step": 145591
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2774689197540283,
      "learning_rate": 0.00017921304551285301,
      "loss": 2.8389,
      "step": 145592
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4589192867279053,
      "learning_rate": 0.00017920930116330886,
      "loss": 2.7754,
      "step": 145593
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.744701862335205,
      "learning_rate": 0.000179205556836222,
      "loss": 2.9791,
      "step": 145594
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.0162129402160645,
      "learning_rate": 0.00017920181253159304,
      "loss": 3.043,
      "step": 145595
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7684476375579834,
      "learning_rate": 0.00017919806824942287,
      "loss": 2.9571,
      "step": 145596
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.222205638885498,
      "learning_rate": 0.000179194323989712,
      "loss": 2.8884,
      "step": 145597
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.348860025405884,
      "learning_rate": 0.0001791905797524612,
      "loss": 3.0512,
      "step": 145598
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.0281248092651367,
      "learning_rate": 0.00017918683553767113,
      "loss": 2.8644,
      "step": 145599
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.425630807876587,
      "learning_rate": 0.00017918309134534247,
      "loss": 2.7246,
      "step": 145600
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0741798877716064,
      "learning_rate": 0.000179179347175476,
      "loss": 2.8389,
      "step": 145601
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.430696725845337,
      "learning_rate": 0.00017917560302807233,
      "loss": 2.8404,
      "step": 145602
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9209470748901367,
      "learning_rate": 0.00017917185890313226,
      "loss": 2.8953,
      "step": 145603
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.620783805847168,
      "learning_rate": 0.00017916811480065642,
      "loss": 2.5888,
      "step": 145604
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8503904342651367,
      "learning_rate": 0.00017916437072064547,
      "loss": 2.8875,
      "step": 145605
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.843339443206787,
      "learning_rate": 0.00017916062666310014,
      "loss": 3.2071,
      "step": 145606
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.301931142807007,
      "learning_rate": 0.00017915688262802116,
      "loss": 3.1076,
      "step": 145607
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4461374282836914,
      "learning_rate": 0.00017915313861540915,
      "loss": 2.8146,
      "step": 145608
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.6022987365722656,
      "learning_rate": 0.00017914939462526487,
      "loss": 2.8832,
      "step": 145609
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8393068313598633,
      "learning_rate": 0.0001791456506575891,
      "loss": 2.9408,
      "step": 145610
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.7512168884277344,
      "learning_rate": 0.00017914190671238235,
      "loss": 3.0584,
      "step": 145611
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.345731735229492,
      "learning_rate": 0.0001791381627896454,
      "loss": 2.983,
      "step": 145612
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.5144124031066895,
      "learning_rate": 0.00017913441888937892,
      "loss": 2.7842,
      "step": 145613
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.2953150272369385,
      "learning_rate": 0.0001791306750115836,
      "loss": 2.7301,
      "step": 145614
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.870007038116455,
      "learning_rate": 0.00017912693115626024,
      "loss": 3.0742,
      "step": 145615
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.389098882675171,
      "learning_rate": 0.00017912318732340955,
      "loss": 2.8576,
      "step": 145616
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2212276458740234,
      "learning_rate": 0.000179119443513032,
      "loss": 3.1099,
      "step": 145617
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.7731666564941406,
      "learning_rate": 0.0001791156997251285,
      "loss": 2.9088,
      "step": 145618
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.120579719543457,
      "learning_rate": 0.00017911195595969958,
      "loss": 3.009,
      "step": 145619
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.020161151885986,
      "learning_rate": 0.0001791082122167461,
      "loss": 3.1218,
      "step": 145620
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.102069139480591,
      "learning_rate": 0.00017910446849626866,
      "loss": 3.0187,
      "step": 145621
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.396913528442383,
      "learning_rate": 0.0001791007247982681,
      "loss": 2.8903,
      "step": 145622
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.9744863510131836,
      "learning_rate": 0.00017909698112274488,
      "loss": 3.1426,
      "step": 145623
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.720395565032959,
      "learning_rate": 0.00017909323746969984,
      "loss": 2.9534,
      "step": 145624
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.8865760564804077,
      "learning_rate": 0.00017908949383913363,
      "loss": 2.7072,
      "step": 145625
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9098577499389648,
      "learning_rate": 0.00017908575023104693,
      "loss": 3.0678,
      "step": 145626
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.865858316421509,
      "learning_rate": 0.00017908200664544052,
      "loss": 2.7515,
      "step": 145627
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.540400981903076,
      "learning_rate": 0.00017907826308231512,
      "loss": 2.856,
      "step": 145628
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.14243745803833,
      "learning_rate": 0.00017907451954167128,
      "loss": 2.7346,
      "step": 145629
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9035197496414185,
      "learning_rate": 0.00017907077602350975,
      "loss": 2.9924,
      "step": 145630
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3350164890289307,
      "learning_rate": 0.00017906703252783124,
      "loss": 3.0021,
      "step": 145631
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1555235385894775,
      "learning_rate": 0.00017906328905463646,
      "loss": 3.0347,
      "step": 145632
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.648837089538574,
      "learning_rate": 0.00017905954560392607,
      "loss": 3.1036,
      "step": 145633
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.40885591506958,
      "learning_rate": 0.00017905580217570096,
      "loss": 3.2604,
      "step": 145634
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.153092622756958,
      "learning_rate": 0.00017905205876996152,
      "loss": 3.232,
      "step": 145635
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.038022994995117,
      "learning_rate": 0.0001790483153867086,
      "loss": 3.1507,
      "step": 145636
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.394320011138916,
      "learning_rate": 0.00017904457202594286,
      "loss": 2.8373,
      "step": 145637
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.2169437408447266,
      "learning_rate": 0.000179040828687665,
      "loss": 3.0333,
      "step": 145638
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5279343128204346,
      "learning_rate": 0.00017903708537187577,
      "loss": 3.1528,
      "step": 145639
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.382168769836426,
      "learning_rate": 0.00017903334207857593,
      "loss": 3.2213,
      "step": 145640
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.2707359790802,
      "learning_rate": 0.00017902959880776594,
      "loss": 2.8382,
      "step": 145641
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.0501604080200195,
      "learning_rate": 0.00017902585555944667,
      "loss": 2.7959,
      "step": 145642
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7826955318450928,
      "learning_rate": 0.00017902211233361873,
      "loss": 2.9851,
      "step": 145643
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.515584707260132,
      "learning_rate": 0.00017901836913028293,
      "loss": 3.008,
      "step": 145644
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.847703695297241,
      "learning_rate": 0.00017901462594943981,
      "loss": 3.0002,
      "step": 145645
    },
    {
      "epoch": 1.9,
      "grad_norm": 6.418214321136475,
      "learning_rate": 0.00017901088279109035,
      "loss": 2.9475,
      "step": 145646
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.353216648101807,
      "learning_rate": 0.00017900713965523496,
      "loss": 2.9765,
      "step": 145647
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2441821098327637,
      "learning_rate": 0.00017900339654187435,
      "loss": 3.0234,
      "step": 145648
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.874504566192627,
      "learning_rate": 0.00017899965345100928,
      "loss": 3.0409,
      "step": 145649
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.503305673599243,
      "learning_rate": 0.0001789959103826405,
      "loss": 2.9719,
      "step": 145650
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.361506462097168,
      "learning_rate": 0.00017899216733676865,
      "loss": 3.0256,
      "step": 145651
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2899107933044434,
      "learning_rate": 0.0001789884243133946,
      "loss": 2.6476,
      "step": 145652
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2189273834228516,
      "learning_rate": 0.00017898468131251872,
      "loss": 3.1406,
      "step": 145653
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0716867446899414,
      "learning_rate": 0.00017898093833414188,
      "loss": 3.1602,
      "step": 145654
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3047914505004883,
      "learning_rate": 0.00017897719537826476,
      "loss": 2.975,
      "step": 145655
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0358388423919678,
      "learning_rate": 0.0001789734524448881,
      "loss": 2.8587,
      "step": 145656
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4623870849609375,
      "learning_rate": 0.00017896970953401253,
      "loss": 3.2108,
      "step": 145657
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.059650182723999,
      "learning_rate": 0.00017896596664563884,
      "loss": 3.0837,
      "step": 145658
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.054763078689575,
      "learning_rate": 0.00017896222377976765,
      "loss": 2.8703,
      "step": 145659
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.411555767059326,
      "learning_rate": 0.00017895848093639962,
      "loss": 2.9809,
      "step": 145660
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1161112785339355,
      "learning_rate": 0.0001789547381155355,
      "loss": 2.7927,
      "step": 145661
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.239295721054077,
      "learning_rate": 0.000178950995317176,
      "loss": 2.7713,
      "step": 145662
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.004708766937256,
      "learning_rate": 0.00017894725254132175,
      "loss": 2.9108,
      "step": 145663
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.107147455215454,
      "learning_rate": 0.0001789435097879735,
      "loss": 2.9963,
      "step": 145664
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8257272243499756,
      "learning_rate": 0.00017893976705713203,
      "loss": 2.9037,
      "step": 145665
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7897441387176514,
      "learning_rate": 0.00017893602434879792,
      "loss": 2.6972,
      "step": 145666
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.155198335647583,
      "learning_rate": 0.0001789322816629718,
      "loss": 2.9694,
      "step": 145667
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.434640884399414,
      "learning_rate": 0.0001789285389996545,
      "loss": 3.0931,
      "step": 145668
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.944815158843994,
      "learning_rate": 0.00017892479635884666,
      "loss": 2.8884,
      "step": 145669
    },
    {
      "epoch": 1.9,
      "grad_norm": 6.206679821014404,
      "learning_rate": 0.00017892105374054895,
      "loss": 2.9918,
      "step": 145670
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.3226141929626465,
      "learning_rate": 0.0001789173111447622,
      "loss": 2.7578,
      "step": 145671
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2501628398895264,
      "learning_rate": 0.00017891356857148693,
      "loss": 3.0911,
      "step": 145672
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5094616413116455,
      "learning_rate": 0.00017890982602072395,
      "loss": 3.0466,
      "step": 145673
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.823105812072754,
      "learning_rate": 0.00017890608349247395,
      "loss": 2.9562,
      "step": 145674
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.897895097732544,
      "learning_rate": 0.00017890234098673753,
      "loss": 2.9729,
      "step": 145675
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2476205825805664,
      "learning_rate": 0.00017889859850351547,
      "loss": 2.8351,
      "step": 145676
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.119048833847046,
      "learning_rate": 0.00017889485604280847,
      "loss": 3.0974,
      "step": 145677
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.00050687789917,
      "learning_rate": 0.00017889111360461715,
      "loss": 2.9059,
      "step": 145678
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.481151580810547,
      "learning_rate": 0.0001788873711889423,
      "loss": 2.8068,
      "step": 145679
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.0378036499023438,
      "learning_rate": 0.00017888362879578453,
      "loss": 3.032,
      "step": 145680
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1249661445617676,
      "learning_rate": 0.00017887988642514464,
      "loss": 2.8633,
      "step": 145681
    },
    {
      "epoch": 1.9,
      "grad_norm": 5.008240222930908,
      "learning_rate": 0.00017887614407702322,
      "loss": 3.0457,
      "step": 145682
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.450400352478027,
      "learning_rate": 0.0001788724017514211,
      "loss": 3.0966,
      "step": 145683
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0378565788269043,
      "learning_rate": 0.00017886865944833879,
      "loss": 3.0545,
      "step": 145684
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9905399084091187,
      "learning_rate": 0.0001788649171677771,
      "loss": 2.9093,
      "step": 145685
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9437845945358276,
      "learning_rate": 0.0001788611749097367,
      "loss": 2.9966,
      "step": 145686
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.7770434617996216,
      "learning_rate": 0.00017885743267421827,
      "loss": 3.0889,
      "step": 145687
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.263132333755493,
      "learning_rate": 0.00017885369046122262,
      "loss": 3.0786,
      "step": 145688
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.9937853813171387,
      "learning_rate": 0.00017884994827075036,
      "loss": 2.8204,
      "step": 145689
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5126218795776367,
      "learning_rate": 0.0001788462061028021,
      "loss": 3.098,
      "step": 145690
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.145275354385376,
      "learning_rate": 0.00017884246395737864,
      "loss": 2.8809,
      "step": 145691
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.921764373779297,
      "learning_rate": 0.00017883872183448065,
      "loss": 2.7988,
      "step": 145692
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.599729299545288,
      "learning_rate": 0.0001788349797341088,
      "loss": 3.019,
      "step": 145693
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.2152926921844482,
      "learning_rate": 0.00017883123765626382,
      "loss": 2.9065,
      "step": 145694
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0629851818084717,
      "learning_rate": 0.00017882749560094654,
      "loss": 2.9143,
      "step": 145695
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1521549224853516,
      "learning_rate": 0.0001788237535681574,
      "loss": 2.9818,
      "step": 145696
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.509485244750977,
      "learning_rate": 0.0001788200115578972,
      "loss": 2.8593,
      "step": 145697
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.324037790298462,
      "learning_rate": 0.00017881626957016663,
      "loss": 2.8498,
      "step": 145698
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3090105056762695,
      "learning_rate": 0.00017881252760496642,
      "loss": 2.9445,
      "step": 145699
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.419971227645874,
      "learning_rate": 0.0001788087856622973,
      "loss": 3.0692,
      "step": 145700
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.09159255027771,
      "learning_rate": 0.00017880504374215997,
      "loss": 3.0525,
      "step": 145701
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.430184841156006,
      "learning_rate": 0.00017880130184455496,
      "loss": 2.9175,
      "step": 145702
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.364957094192505,
      "learning_rate": 0.00017879755996948307,
      "loss": 2.965,
      "step": 145703
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.421189785003662,
      "learning_rate": 0.00017879381811694502,
      "loss": 2.6361,
      "step": 145704
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7989678382873535,
      "learning_rate": 0.0001787900762869415,
      "loss": 3.1767,
      "step": 145705
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2541589736938477,
      "learning_rate": 0.00017878633447947318,
      "loss": 2.8898,
      "step": 145706
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1593170166015625,
      "learning_rate": 0.00017878259269454088,
      "loss": 3.1752,
      "step": 145707
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.98861563205719,
      "learning_rate": 0.00017877885093214505,
      "loss": 3.0488,
      "step": 145708
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4862425327301025,
      "learning_rate": 0.00017877510919228653,
      "loss": 3.1018,
      "step": 145709
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.464406728744507,
      "learning_rate": 0.000178771367474966,
      "loss": 3.0637,
      "step": 145710
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.366485595703125,
      "learning_rate": 0.0001787676257801842,
      "loss": 3.0542,
      "step": 145711
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3501014709472656,
      "learning_rate": 0.00017876388410794175,
      "loss": 2.799,
      "step": 145712
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.4502224922180176,
      "learning_rate": 0.0001787601424582395,
      "loss": 3.0201,
      "step": 145713
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.185906410217285,
      "learning_rate": 0.0001787564008310779,
      "loss": 2.7614,
      "step": 145714
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.67818546295166,
      "learning_rate": 0.0001787526592264578,
      "loss": 3.1668,
      "step": 145715
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4284651279449463,
      "learning_rate": 0.0001787489176443799,
      "loss": 3.1412,
      "step": 145716
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.6024954319000244,
      "learning_rate": 0.00017874517608484482,
      "loss": 3.0292,
      "step": 145717
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.7590155601501465,
      "learning_rate": 0.0001787414345478533,
      "loss": 3.224,
      "step": 145718
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7269866466522217,
      "learning_rate": 0.00017873769303340616,
      "loss": 3.202,
      "step": 145719
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.446664333343506,
      "learning_rate": 0.00017873395154150386,
      "loss": 2.8905,
      "step": 145720
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1753787994384766,
      "learning_rate": 0.0001787302100721472,
      "loss": 3.1353,
      "step": 145721
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9060002565383911,
      "learning_rate": 0.0001787264686253369,
      "loss": 3.0306,
      "step": 145722
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1445462703704834,
      "learning_rate": 0.00017872272720107358,
      "loss": 3.0444,
      "step": 145723
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1561625003814697,
      "learning_rate": 0.00017871898579935807,
      "loss": 2.8912,
      "step": 145724
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.408751964569092,
      "learning_rate": 0.00017871524442019106,
      "loss": 2.925,
      "step": 145725
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4228668212890625,
      "learning_rate": 0.0001787115030635731,
      "loss": 2.8807,
      "step": 145726
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4478795528411865,
      "learning_rate": 0.00017870776172950495,
      "loss": 2.6773,
      "step": 145727
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2514126300811768,
      "learning_rate": 0.00017870402041798728,
      "loss": 3.1549,
      "step": 145728
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.316875696182251,
      "learning_rate": 0.00017870027912902085,
      "loss": 2.8607,
      "step": 145729
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.544715404510498,
      "learning_rate": 0.0001786965378626063,
      "loss": 2.7318,
      "step": 145730
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0393199920654297,
      "learning_rate": 0.0001786927966187444,
      "loss": 3.0687,
      "step": 145731
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.562155246734619,
      "learning_rate": 0.00017868905539743593,
      "loss": 3.0423,
      "step": 145732
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.329209089279175,
      "learning_rate": 0.00017868531419868127,
      "loss": 2.9912,
      "step": 145733
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.073291063308716,
      "learning_rate": 0.00017868157302248133,
      "loss": 2.8044,
      "step": 145734
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5839200019836426,
      "learning_rate": 0.00017867783186883675,
      "loss": 2.9208,
      "step": 145735
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.001707077026367,
      "learning_rate": 0.0001786740907377483,
      "loss": 2.8832,
      "step": 145736
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.315178871154785,
      "learning_rate": 0.0001786703496292166,
      "loss": 2.9986,
      "step": 145737
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.996398687362671,
      "learning_rate": 0.0001786666085432425,
      "loss": 3.0991,
      "step": 145738
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.008098840713501,
      "learning_rate": 0.00017866286747982645,
      "loss": 2.945,
      "step": 145739
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.161888360977173,
      "learning_rate": 0.00017865912643896925,
      "loss": 2.8121,
      "step": 145740
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.111863613128662,
      "learning_rate": 0.00017865538542067165,
      "loss": 2.9571,
      "step": 145741
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0934112071990967,
      "learning_rate": 0.00017865164442493425,
      "loss": 2.8048,
      "step": 145742
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2023658752441406,
      "learning_rate": 0.0001786479034517578,
      "loss": 3.0354,
      "step": 145743
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.89274001121521,
      "learning_rate": 0.00017864416250114312,
      "loss": 2.73,
      "step": 145744
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4793670177459717,
      "learning_rate": 0.0001786404215730907,
      "loss": 2.9954,
      "step": 145745
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.6660077571868896,
      "learning_rate": 0.00017863668066760128,
      "loss": 3.2139,
      "step": 145746
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.696714401245117,
      "learning_rate": 0.0001786329397846756,
      "loss": 2.7898,
      "step": 145747
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.754704236984253,
      "learning_rate": 0.00017862919892431439,
      "loss": 3.0084,
      "step": 145748
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.018885850906372,
      "learning_rate": 0.00017862545808651825,
      "loss": 2.882,
      "step": 145749
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.5681166648864746,
      "learning_rate": 0.000178621717271288,
      "loss": 3.0217,
      "step": 145750
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.398908853530884,
      "learning_rate": 0.00017861797647862428,
      "loss": 3.1734,
      "step": 145751
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8961639404296875,
      "learning_rate": 0.00017861423570852768,
      "loss": 2.8328,
      "step": 145752
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3644323348999023,
      "learning_rate": 0.00017861049496099903,
      "loss": 3.1687,
      "step": 145753
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.0036733150482178,
      "learning_rate": 0.00017860675423603893,
      "loss": 3.028,
      "step": 145754
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4529435634613037,
      "learning_rate": 0.00017860301353364816,
      "loss": 3.1514,
      "step": 145755
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2529373168945312,
      "learning_rate": 0.0001785992728538274,
      "loss": 2.9801,
      "step": 145756
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.998971700668335,
      "learning_rate": 0.00017859553219657734,
      "loss": 3.0132,
      "step": 145757
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.242326259613037,
      "learning_rate": 0.00017859179156189865,
      "loss": 2.977,
      "step": 145758
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.800276041030884,
      "learning_rate": 0.000178588050949792,
      "loss": 3.0582,
      "step": 145759
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9024622440338135,
      "learning_rate": 0.00017858431036025816,
      "loss": 2.9485,
      "step": 145760
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.214303493499756,
      "learning_rate": 0.00017858056979329774,
      "loss": 2.9457,
      "step": 145761
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.470646381378174,
      "learning_rate": 0.00017857682924891154,
      "loss": 2.9256,
      "step": 145762
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.002507209777832,
      "learning_rate": 0.0001785730887271002,
      "loss": 3.0284,
      "step": 145763
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3480958938598633,
      "learning_rate": 0.00017856934822786438,
      "loss": 2.8645,
      "step": 145764
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3226985931396484,
      "learning_rate": 0.00017856560775120483,
      "loss": 2.8512,
      "step": 145765
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.159620523452759,
      "learning_rate": 0.00017856186729712224,
      "loss": 2.5796,
      "step": 145766
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.544039011001587,
      "learning_rate": 0.00017855812686561723,
      "loss": 2.841,
      "step": 145767
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7168829441070557,
      "learning_rate": 0.00017855438645669064,
      "loss": 3.1156,
      "step": 145768
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1338002681732178,
      "learning_rate": 0.00017855064607034303,
      "loss": 2.8274,
      "step": 145769
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.500943422317505,
      "learning_rate": 0.00017854690570657516,
      "loss": 2.8893,
      "step": 145770
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.7734789848327637,
      "learning_rate": 0.00017854316536538767,
      "loss": 2.9943,
      "step": 145771
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7702059745788574,
      "learning_rate": 0.00017853942504678135,
      "loss": 2.9641,
      "step": 145772
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2768983840942383,
      "learning_rate": 0.00017853568475075684,
      "loss": 3.126,
      "step": 145773
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.896909713745117,
      "learning_rate": 0.00017853194447731488,
      "loss": 2.9246,
      "step": 145774
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2671406269073486,
      "learning_rate": 0.00017852820422645604,
      "loss": 2.8559,
      "step": 145775
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0489449501037598,
      "learning_rate": 0.00017852446399818113,
      "loss": 3.1251,
      "step": 145776
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9972083568572998,
      "learning_rate": 0.0001785207237924908,
      "loss": 2.8441,
      "step": 145777
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.191075086593628,
      "learning_rate": 0.00017851698360938576,
      "loss": 2.8916,
      "step": 145778
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9676839113235474,
      "learning_rate": 0.00017851324344886671,
      "loss": 3.0176,
      "step": 145779
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9141507148742676,
      "learning_rate": 0.0001785095033109345,
      "loss": 2.8775,
      "step": 145780
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.433462381362915,
      "learning_rate": 0.00017850576319558946,
      "loss": 3.2123,
      "step": 145781
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.225374221801758,
      "learning_rate": 0.00017850202310283256,
      "loss": 2.9063,
      "step": 145782
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.473226308822632,
      "learning_rate": 0.00017849828303266441,
      "loss": 2.8838,
      "step": 145783
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.177730083465576,
      "learning_rate": 0.00017849454298508573,
      "loss": 2.9568,
      "step": 145784
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3077993392944336,
      "learning_rate": 0.00017849080296009718,
      "loss": 3.0637,
      "step": 145785
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1409382820129395,
      "learning_rate": 0.00017848706295769961,
      "loss": 3.0469,
      "step": 145786
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1206679344177246,
      "learning_rate": 0.00017848332297789349,
      "loss": 2.7252,
      "step": 145787
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.6208722591400146,
      "learning_rate": 0.00017847958302067964,
      "loss": 3.1876,
      "step": 145788
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.598548650741577,
      "learning_rate": 0.0001784758430860587,
      "loss": 2.9519,
      "step": 145789
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.147791624069214,
      "learning_rate": 0.00017847210317403138,
      "loss": 3.0721,
      "step": 145790
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.457120656967163,
      "learning_rate": 0.00017846836328459838,
      "loss": 3.0258,
      "step": 145791
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8132145404815674,
      "learning_rate": 0.00017846462341776056,
      "loss": 3.2562,
      "step": 145792
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3294081687927246,
      "learning_rate": 0.00017846088357351833,
      "loss": 2.7627,
      "step": 145793
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.716158151626587,
      "learning_rate": 0.0001784571437518725,
      "loss": 2.8221,
      "step": 145794
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0781311988830566,
      "learning_rate": 0.0001784534039528238,
      "loss": 2.9767,
      "step": 145795
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.6261093616485596,
      "learning_rate": 0.00017844966417637287,
      "loss": 3.0403,
      "step": 145796
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.186619281768799,
      "learning_rate": 0.00017844592442252047,
      "loss": 2.8632,
      "step": 145797
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7395663261413574,
      "learning_rate": 0.00017844218469126728,
      "loss": 3.0472,
      "step": 145798
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5002269744873047,
      "learning_rate": 0.00017843844498261407,
      "loss": 2.7491,
      "step": 145799
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1726605892181396,
      "learning_rate": 0.00017843470529656135,
      "loss": 2.9255,
      "step": 145800
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.523430585861206,
      "learning_rate": 0.00017843096563310992,
      "loss": 2.9406,
      "step": 145801
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2008790969848633,
      "learning_rate": 0.00017842722599226046,
      "loss": 3.0889,
      "step": 145802
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4768764972686768,
      "learning_rate": 0.00017842348637401366,
      "loss": 3.0203,
      "step": 145803
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.443120241165161,
      "learning_rate": 0.00017841974677837026,
      "loss": 3.0069,
      "step": 145804
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.060382604598999,
      "learning_rate": 0.000178416007205331,
      "loss": 3.0367,
      "step": 145805
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8971848487854004,
      "learning_rate": 0.00017841226765489645,
      "loss": 3.1201,
      "step": 145806
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0455400943756104,
      "learning_rate": 0.0001784085281270673,
      "loss": 2.9405,
      "step": 145807
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.005791187286377,
      "learning_rate": 0.0001784047886218443,
      "loss": 3.1117,
      "step": 145808
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.080456018447876,
      "learning_rate": 0.00017840104913922816,
      "loss": 2.9801,
      "step": 145809
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8119137287139893,
      "learning_rate": 0.00017839730967921953,
      "loss": 2.9731,
      "step": 145810
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2181410789489746,
      "learning_rate": 0.0001783935702418193,
      "loss": 2.886,
      "step": 145811
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0296356678009033,
      "learning_rate": 0.00017838983082702782,
      "loss": 3.0918,
      "step": 145812
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.131856918334961,
      "learning_rate": 0.00017838609143484605,
      "loss": 3.0149,
      "step": 145813
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9546338319778442,
      "learning_rate": 0.00017838235206527454,
      "loss": 2.9435,
      "step": 145814
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7304747104644775,
      "learning_rate": 0.00017837861271831405,
      "loss": 2.994,
      "step": 145815
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3415417671203613,
      "learning_rate": 0.0001783748733939653,
      "loss": 2.6803,
      "step": 145816
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.3258285522460938,
      "learning_rate": 0.00017837113409222905,
      "loss": 3.0999,
      "step": 145817
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7397654056549072,
      "learning_rate": 0.00017836739481310581,
      "loss": 2.8565,
      "step": 145818
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1119749546051025,
      "learning_rate": 0.00017836365555659635,
      "loss": 3.0742,
      "step": 145819
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.4641854763031006,
      "learning_rate": 0.00017835991632270137,
      "loss": 3.0953,
      "step": 145820
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2194597721099854,
      "learning_rate": 0.00017835617711142162,
      "loss": 2.9314,
      "step": 145821
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1110281944274902,
      "learning_rate": 0.00017835243792275774,
      "loss": 2.7609,
      "step": 145822
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1863534450531006,
      "learning_rate": 0.00017834869875671053,
      "loss": 3.0411,
      "step": 145823
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.521756172180176,
      "learning_rate": 0.00017834495961328045,
      "loss": 2.9492,
      "step": 145824
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1543631553649902,
      "learning_rate": 0.0001783412204924684,
      "loss": 3.0362,
      "step": 145825
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.712724208831787,
      "learning_rate": 0.00017833748139427498,
      "loss": 3.0403,
      "step": 145826
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3021674156188965,
      "learning_rate": 0.00017833374231870092,
      "loss": 2.9727,
      "step": 145827
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.145965814590454,
      "learning_rate": 0.00017833000326574695,
      "loss": 2.7604,
      "step": 145828
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.627739191055298,
      "learning_rate": 0.0001783262642354138,
      "loss": 3.2006,
      "step": 145829
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.7970558404922485,
      "learning_rate": 0.000178322525227702,
      "loss": 2.9427,
      "step": 145830
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.4991214275360107,
      "learning_rate": 0.00017831878624261234,
      "loss": 2.6755,
      "step": 145831
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8923776149749756,
      "learning_rate": 0.0001783150472801455,
      "loss": 2.782,
      "step": 145832
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3348748683929443,
      "learning_rate": 0.00017831130834030222,
      "loss": 3.2759,
      "step": 145833
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8528096675872803,
      "learning_rate": 0.00017830756942308312,
      "loss": 3.16,
      "step": 145834
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0118114948272705,
      "learning_rate": 0.00017830383052848904,
      "loss": 2.9202,
      "step": 145835
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.538879156112671,
      "learning_rate": 0.00017830009165652056,
      "loss": 3.0635,
      "step": 145836
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.234351396560669,
      "learning_rate": 0.00017829635280717832,
      "loss": 3.1553,
      "step": 145837
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3293449878692627,
      "learning_rate": 0.00017829261398046308,
      "loss": 2.9391,
      "step": 145838
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.949625253677368,
      "learning_rate": 0.00017828887517637557,
      "loss": 3.0595,
      "step": 145839
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.236149311065674,
      "learning_rate": 0.00017828513639491642,
      "loss": 2.9707,
      "step": 145840
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.203956365585327,
      "learning_rate": 0.00017828139763608646,
      "loss": 3.1891,
      "step": 145841
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.352660655975342,
      "learning_rate": 0.0001782776588998862,
      "loss": 3.0155,
      "step": 145842
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.1781864166259766,
      "learning_rate": 0.0001782739201863165,
      "loss": 2.9977,
      "step": 145843
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1555240154266357,
      "learning_rate": 0.0001782701814953779,
      "loss": 3.0963,
      "step": 145844
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.011781692504883,
      "learning_rate": 0.00017826644282707119,
      "loss": 3.0458,
      "step": 145845
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.2820940017700195,
      "learning_rate": 0.000178262704181397,
      "loss": 2.8999,
      "step": 145846
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.6542794704437256,
      "learning_rate": 0.00017825896555835618,
      "loss": 3.0832,
      "step": 145847
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.513573408126831,
      "learning_rate": 0.0001782552269579492,
      "loss": 2.739,
      "step": 145848
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.0008747577667236,
      "learning_rate": 0.00017825148838017694,
      "loss": 3.0181,
      "step": 145849
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4264135360717773,
      "learning_rate": 0.00017824774982504003,
      "loss": 3.0657,
      "step": 145850
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0497851371765137,
      "learning_rate": 0.00017824401129253913,
      "loss": 3.0148,
      "step": 145851
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0219342708587646,
      "learning_rate": 0.00017824027278267498,
      "loss": 2.9725,
      "step": 145852
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.276078701019287,
      "learning_rate": 0.00017823653429544832,
      "loss": 3.049,
      "step": 145853
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.044220447540283,
      "learning_rate": 0.0001782327958308597,
      "loss": 3.0926,
      "step": 145854
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.6008386611938477,
      "learning_rate": 0.0001782290573889099,
      "loss": 2.8076,
      "step": 145855
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.100843906402588,
      "learning_rate": 0.00017822531896959963,
      "loss": 2.8834,
      "step": 145856
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.03995418548584,
      "learning_rate": 0.00017822158057292966,
      "loss": 2.9711,
      "step": 145857
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.748263120651245,
      "learning_rate": 0.0001782178421989005,
      "loss": 2.8377,
      "step": 145858
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.63932466506958,
      "learning_rate": 0.00017821410384751303,
      "loss": 2.9518,
      "step": 145859
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.1344656944274902,
      "learning_rate": 0.00017821036551876775,
      "loss": 3.1123,
      "step": 145860
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1476707458496094,
      "learning_rate": 0.00017820662721266552,
      "loss": 2.8066,
      "step": 145861
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.680968761444092,
      "learning_rate": 0.00017820288892920694,
      "loss": 2.9801,
      "step": 145862
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.832956314086914,
      "learning_rate": 0.00017819915066839273,
      "loss": 2.9798,
      "step": 145863
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4816231727600098,
      "learning_rate": 0.00017819541243022362,
      "loss": 3.2848,
      "step": 145864
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0247745513916016,
      "learning_rate": 0.00017819167421470036,
      "loss": 2.9341,
      "step": 145865
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.6949923038482666,
      "learning_rate": 0.00017818793602182353,
      "loss": 2.9641,
      "step": 145866
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.74359393119812,
      "learning_rate": 0.00017818419785159382,
      "loss": 3.3012,
      "step": 145867
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.346475839614868,
      "learning_rate": 0.00017818045970401198,
      "loss": 2.738,
      "step": 145868
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.162398338317871,
      "learning_rate": 0.00017817672157907868,
      "loss": 3.0691,
      "step": 145869
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8170435428619385,
      "learning_rate": 0.00017817298347679462,
      "loss": 2.8659,
      "step": 145870
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0628225803375244,
      "learning_rate": 0.00017816924539716049,
      "loss": 3.014,
      "step": 145871
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0968987941741943,
      "learning_rate": 0.00017816550734017713,
      "loss": 2.865,
      "step": 145872
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.341754198074341,
      "learning_rate": 0.00017816176930584497,
      "loss": 3.0293,
      "step": 145873
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.168131113052368,
      "learning_rate": 0.00017815803129416486,
      "loss": 2.8703,
      "step": 145874
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1503117084503174,
      "learning_rate": 0.00017815429330513748,
      "loss": 2.899,
      "step": 145875
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5097861289978027,
      "learning_rate": 0.00017815055533876352,
      "loss": 3.0013,
      "step": 145876
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4520344734191895,
      "learning_rate": 0.00017814681739504364,
      "loss": 2.7886,
      "step": 145877
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1182267665863037,
      "learning_rate": 0.0001781430794739787,
      "loss": 2.9553,
      "step": 145878
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2752246856689453,
      "learning_rate": 0.00017813934157556914,
      "loss": 2.9889,
      "step": 145879
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1633400917053223,
      "learning_rate": 0.00017813560369981578,
      "loss": 3.0318,
      "step": 145880
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2131569385528564,
      "learning_rate": 0.00017813186584671927,
      "loss": 3.1645,
      "step": 145881
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3109824657440186,
      "learning_rate": 0.0001781281280162804,
      "loss": 2.8217,
      "step": 145882
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1729698181152344,
      "learning_rate": 0.00017812439020849983,
      "loss": 3.0337,
      "step": 145883
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.889495849609375,
      "learning_rate": 0.0001781206524233783,
      "loss": 2.9064,
      "step": 145884
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.085395097732544,
      "learning_rate": 0.00017811691466091637,
      "loss": 2.9396,
      "step": 145885
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8210368156433105,
      "learning_rate": 0.00017811317692111476,
      "loss": 2.9824,
      "step": 145886
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1296355724334717,
      "learning_rate": 0.00017810943920397427,
      "loss": 2.9929,
      "step": 145887
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.584362745285034,
      "learning_rate": 0.00017810570150949548,
      "loss": 3.0846,
      "step": 145888
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1811511516571045,
      "learning_rate": 0.00017810196383767916,
      "loss": 3.2185,
      "step": 145889
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0741055011749268,
      "learning_rate": 0.00017809822618852612,
      "loss": 2.9128,
      "step": 145890
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8493456840515137,
      "learning_rate": 0.00017809448856203678,
      "loss": 2.9804,
      "step": 145891
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.027672290802002,
      "learning_rate": 0.00017809075095821198,
      "loss": 2.8611,
      "step": 145892
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.362382173538208,
      "learning_rate": 0.0001780870133770524,
      "loss": 2.8384,
      "step": 145893
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3239243030548096,
      "learning_rate": 0.00017808327581855877,
      "loss": 3.2122,
      "step": 145894
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7773475646972656,
      "learning_rate": 0.00017807953828273174,
      "loss": 2.816,
      "step": 145895
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.218361854553223,
      "learning_rate": 0.00017807580076957218,
      "loss": 2.9521,
      "step": 145896
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8628013134002686,
      "learning_rate": 0.00017807206327908047,
      "loss": 2.887,
      "step": 145897
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7595221996307373,
      "learning_rate": 0.00017806832581125745,
      "loss": 3.1279,
      "step": 145898
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.996161937713623,
      "learning_rate": 0.00017806458836610388,
      "loss": 2.9796,
      "step": 145899
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.979717254638672,
      "learning_rate": 0.0001780608509436204,
      "loss": 2.933,
      "step": 145900
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.513509750366211,
      "learning_rate": 0.0001780571135438077,
      "loss": 2.9226,
      "step": 145901
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.165001392364502,
      "learning_rate": 0.0001780533761666666,
      "loss": 2.9595,
      "step": 145902
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1469080448150635,
      "learning_rate": 0.00017804963881219756,
      "loss": 3.0321,
      "step": 145903
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.370413064956665,
      "learning_rate": 0.0001780459014804014,
      "loss": 2.9245,
      "step": 145904
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1344034671783447,
      "learning_rate": 0.00017804216417127879,
      "loss": 2.8584,
      "step": 145905
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7108802795410156,
      "learning_rate": 0.00017803842688483043,
      "loss": 2.9,
      "step": 145906
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.428845167160034,
      "learning_rate": 0.00017803468962105705,
      "loss": 2.8241,
      "step": 145907
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.524434804916382,
      "learning_rate": 0.00017803095237995946,
      "loss": 3.0144,
      "step": 145908
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3562467098236084,
      "learning_rate": 0.00017802721516153806,
      "loss": 3.0944,
      "step": 145909
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1699507236480713,
      "learning_rate": 0.00017802347796579373,
      "loss": 3.0223,
      "step": 145910
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.496799945831299,
      "learning_rate": 0.00017801974079272715,
      "loss": 2.7417,
      "step": 145911
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.355985641479492,
      "learning_rate": 0.000178016003642339,
      "loss": 2.8003,
      "step": 145912
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9750475883483887,
      "learning_rate": 0.00017801226651462993,
      "loss": 3.1106,
      "step": 145913
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0973854064941406,
      "learning_rate": 0.00017800852940960088,
      "loss": 3.1191,
      "step": 145914
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9439396858215332,
      "learning_rate": 0.0001780047923272522,
      "loss": 3.0351,
      "step": 145915
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.269660234451294,
      "learning_rate": 0.00017800105526758468,
      "loss": 3.0398,
      "step": 145916
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.832130193710327,
      "learning_rate": 0.00017799731823059912,
      "loss": 3.2381,
      "step": 145917
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7208690643310547,
      "learning_rate": 0.00017799358121629614,
      "loss": 3.0428,
      "step": 145918
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2817957401275635,
      "learning_rate": 0.00017798984422467645,
      "loss": 2.804,
      "step": 145919
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.177612066268921,
      "learning_rate": 0.0001779861072557409,
      "loss": 3.0758,
      "step": 145920
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9645427465438843,
      "learning_rate": 0.00017798237030948992,
      "loss": 2.811,
      "step": 145921
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7998688220977783,
      "learning_rate": 0.00017797863338592427,
      "loss": 2.9372,
      "step": 145922
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0685958862304688,
      "learning_rate": 0.00017797489648504471,
      "loss": 3.1005,
      "step": 145923
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.141289234161377,
      "learning_rate": 0.00017797115960685198,
      "loss": 3.009,
      "step": 145924
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.073643922805786,
      "learning_rate": 0.00017796742275134664,
      "loss": 3.2604,
      "step": 145925
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7996788024902344,
      "learning_rate": 0.00017796368591852955,
      "loss": 3.0197,
      "step": 145926
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.107954263687134,
      "learning_rate": 0.00017795994910840132,
      "loss": 3.1693,
      "step": 145927
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1940314769744873,
      "learning_rate": 0.0001779562123209626,
      "loss": 2.8893,
      "step": 145928
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3195741176605225,
      "learning_rate": 0.00017795247555621406,
      "loss": 3.1078,
      "step": 145929
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.60952091217041,
      "learning_rate": 0.0001779487388141565,
      "loss": 2.9257,
      "step": 145930
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4760632514953613,
      "learning_rate": 0.0001779450020947906,
      "loss": 2.7648,
      "step": 145931
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.679192066192627,
      "learning_rate": 0.00017794126539811699,
      "loss": 3.0716,
      "step": 145932
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4090278148651123,
      "learning_rate": 0.00017793752872413647,
      "loss": 3.2054,
      "step": 145933
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9123940467834473,
      "learning_rate": 0.0001779337920728496,
      "loss": 2.9902,
      "step": 145934
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7141549587249756,
      "learning_rate": 0.00017793005544425723,
      "loss": 3.0559,
      "step": 145935
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8030660152435303,
      "learning_rate": 0.00017792631883835987,
      "loss": 3.1632,
      "step": 145936
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.439194440841675,
      "learning_rate": 0.00017792258225515835,
      "loss": 3.0701,
      "step": 145937
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.101674795150757,
      "learning_rate": 0.00017791884569465328,
      "loss": 3.1793,
      "step": 145938
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.420764207839966,
      "learning_rate": 0.00017791510915684552,
      "loss": 2.9612,
      "step": 145939
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.859567165374756,
      "learning_rate": 0.00017791137264173553,
      "loss": 2.81,
      "step": 145940
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.9743340015411377,
      "learning_rate": 0.00017790763614932415,
      "loss": 2.9222,
      "step": 145941
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.629394054412842,
      "learning_rate": 0.00017790389967961207,
      "loss": 2.9653,
      "step": 145942
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.222566843032837,
      "learning_rate": 0.00017790016323259995,
      "loss": 2.8083,
      "step": 145943
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9228463172912598,
      "learning_rate": 0.00017789642680828845,
      "loss": 2.8725,
      "step": 145944
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.433608293533325,
      "learning_rate": 0.0001778926904066784,
      "loss": 2.9382,
      "step": 145945
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7035763263702393,
      "learning_rate": 0.00017788895402777035,
      "loss": 2.9079,
      "step": 145946
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.276854991912842,
      "learning_rate": 0.000177885217671565,
      "loss": 3.1599,
      "step": 145947
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.6040239334106445,
      "learning_rate": 0.00017788148133806312,
      "loss": 2.9535,
      "step": 145948
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.798349142074585,
      "learning_rate": 0.00017787774502726547,
      "loss": 2.9383,
      "step": 145949
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2167532444000244,
      "learning_rate": 0.00017787400873917256,
      "loss": 2.6325,
      "step": 145950
    },
    {
      "epoch": 1.9,
      "grad_norm": 5.887591361999512,
      "learning_rate": 0.00017787027247378524,
      "loss": 2.8555,
      "step": 145951
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.609180450439453,
      "learning_rate": 0.00017786653623110408,
      "loss": 2.9602,
      "step": 145952
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.2705278396606445,
      "learning_rate": 0.00017786280001112986,
      "loss": 3.2593,
      "step": 145953
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.774235248565674,
      "learning_rate": 0.00017785906381386323,
      "loss": 2.9103,
      "step": 145954
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.6418256759643555,
      "learning_rate": 0.0001778553276393049,
      "loss": 3.0283,
      "step": 145955
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.199756622314453,
      "learning_rate": 0.00017785159148745564,
      "loss": 2.8779,
      "step": 145956
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.078341007232666,
      "learning_rate": 0.00017784785535831607,
      "loss": 2.9549,
      "step": 145957
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.3394558429718018,
      "learning_rate": 0.00017784411925188685,
      "loss": 3.0181,
      "step": 145958
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.1478652954101562,
      "learning_rate": 0.0001778403831681687,
      "loss": 2.9554,
      "step": 145959
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.886073589324951,
      "learning_rate": 0.0001778366471071623,
      "loss": 3.2339,
      "step": 145960
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.2446603775024414,
      "learning_rate": 0.0001778329110688684,
      "loss": 2.8323,
      "step": 145961
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.393442392349243,
      "learning_rate": 0.00017782917505328768,
      "loss": 3.065,
      "step": 145962
    },
    {
      "epoch": 1.9,
      "grad_norm": 5.077086448669434,
      "learning_rate": 0.0001778254390604209,
      "loss": 2.816,
      "step": 145963
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0740790367126465,
      "learning_rate": 0.00017782170309026859,
      "loss": 2.9408,
      "step": 145964
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3197195529937744,
      "learning_rate": 0.00017781796714283154,
      "loss": 2.9843,
      "step": 145965
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.1537201404571533,
      "learning_rate": 0.00017781423121811043,
      "loss": 2.6804,
      "step": 145966
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.463003396987915,
      "learning_rate": 0.00017781049531610598,
      "loss": 3.1519,
      "step": 145967
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.9066030979156494,
      "learning_rate": 0.00017780675943681882,
      "loss": 3.0795,
      "step": 145968
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5910959243774414,
      "learning_rate": 0.00017780302358024982,
      "loss": 3.0246,
      "step": 145969
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.273010730743408,
      "learning_rate": 0.00017779928774639946,
      "loss": 2.7495,
      "step": 145970
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.90934157371521,
      "learning_rate": 0.00017779555193526847,
      "loss": 3.0712,
      "step": 145971
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.1830923557281494,
      "learning_rate": 0.00017779181614685762,
      "loss": 2.9845,
      "step": 145972
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.0885884761810303,
      "learning_rate": 0.00017778808038116756,
      "loss": 2.8954,
      "step": 145973
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1310455799102783,
      "learning_rate": 0.00017778434463819906,
      "loss": 3.06,
      "step": 145974
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5903971195220947,
      "learning_rate": 0.0001777806089179528,
      "loss": 2.9884,
      "step": 145975
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5177948474884033,
      "learning_rate": 0.00017777687322042938,
      "loss": 2.9469,
      "step": 145976
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.741161584854126,
      "learning_rate": 0.0001777731375456295,
      "loss": 2.9672,
      "step": 145977
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.638193130493164,
      "learning_rate": 0.00017776940189355393,
      "loss": 2.8629,
      "step": 145978
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4740569591522217,
      "learning_rate": 0.0001777656662642033,
      "loss": 2.9215,
      "step": 145979
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2911994457244873,
      "learning_rate": 0.00017776193065757837,
      "loss": 2.8683,
      "step": 145980
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2115063667297363,
      "learning_rate": 0.00017775819507367992,
      "loss": 3.0598,
      "step": 145981
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.57537841796875,
      "learning_rate": 0.00017775445951250839,
      "loss": 2.82,
      "step": 145982
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9844739437103271,
      "learning_rate": 0.00017775072397406463,
      "loss": 3.026,
      "step": 145983
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3919870853424072,
      "learning_rate": 0.00017774698845834933,
      "loss": 3.0583,
      "step": 145984
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3749327659606934,
      "learning_rate": 0.00017774325296536313,
      "loss": 3.0188,
      "step": 145985
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2817277908325195,
      "learning_rate": 0.00017773951749510683,
      "loss": 3.0724,
      "step": 145986
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.5815138816833496,
      "learning_rate": 0.00017773578204758112,
      "loss": 3.1911,
      "step": 145987
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.491544246673584,
      "learning_rate": 0.00017773204662278658,
      "loss": 3.0444,
      "step": 145988
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.149808406829834,
      "learning_rate": 0.0001777283112207239,
      "loss": 3.1058,
      "step": 145989
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1812491416931152,
      "learning_rate": 0.00017772457584139386,
      "loss": 3.1738,
      "step": 145990
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.88433039188385,
      "learning_rate": 0.00017772084048479716,
      "loss": 3.0294,
      "step": 145991
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.521561622619629,
      "learning_rate": 0.0001777171051509344,
      "loss": 2.8752,
      "step": 145992
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0900747776031494,
      "learning_rate": 0.0001777133698398065,
      "loss": 2.92,
      "step": 145993
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.056835412979126,
      "learning_rate": 0.00017770963455141387,
      "loss": 2.9088,
      "step": 145994
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.176832437515259,
      "learning_rate": 0.0001777058992857573,
      "loss": 2.9093,
      "step": 145995
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4263744354248047,
      "learning_rate": 0.00017770216404283751,
      "loss": 2.8621,
      "step": 145996
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1911940574645996,
      "learning_rate": 0.00017769842882265523,
      "loss": 2.8408,
      "step": 145997
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.322901725769043,
      "learning_rate": 0.00017769469362521117,
      "loss": 3.0998,
      "step": 145998
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.79925799369812,
      "learning_rate": 0.0001776909584505059,
      "loss": 3.1401,
      "step": 145999
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.691965341567993,
      "learning_rate": 0.0001776872232985403,
      "loss": 3.0927,
      "step": 146000
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9677658081054688,
      "learning_rate": 0.00017768348816931485,
      "loss": 2.9619,
      "step": 146001
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.035062313079834,
      "learning_rate": 0.00017767975306283036,
      "loss": 3.1806,
      "step": 146002
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.54744291305542,
      "learning_rate": 0.00017767601797908752,
      "loss": 2.7664,
      "step": 146003
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.6830480098724365,
      "learning_rate": 0.00017767228291808702,
      "loss": 3.0088,
      "step": 146004
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.6447958946228027,
      "learning_rate": 0.00017766854787982952,
      "loss": 3.0431,
      "step": 146005
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3936712741851807,
      "learning_rate": 0.00017766481286431588,
      "loss": 2.7631,
      "step": 146006
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.134938716888428,
      "learning_rate": 0.00017766107787154655,
      "loss": 2.9536,
      "step": 146007
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.3421950340270996,
      "learning_rate": 0.00017765734290152237,
      "loss": 2.8178,
      "step": 146008
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.049026966094971,
      "learning_rate": 0.00017765360795424392,
      "loss": 2.8532,
      "step": 146009
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9553282260894775,
      "learning_rate": 0.00017764987302971202,
      "loss": 3.1258,
      "step": 146010
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.276381254196167,
      "learning_rate": 0.0001776461381279273,
      "loss": 2.8008,
      "step": 146011
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.8148787021636963,
      "learning_rate": 0.00017764240324889062,
      "loss": 3.0594,
      "step": 146012
    },
    {
      "epoch": 1.9,
      "grad_norm": 5.333707809448242,
      "learning_rate": 0.0001776386683926024,
      "loss": 3.0279,
      "step": 146013
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.6465156078338623,
      "learning_rate": 0.00017763493355906341,
      "loss": 2.8154,
      "step": 146014
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.953456997871399,
      "learning_rate": 0.00017763119874827444,
      "loss": 2.964,
      "step": 146015
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1494357585906982,
      "learning_rate": 0.00017762746396023614,
      "loss": 3.063,
      "step": 146016
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.713268756866455,
      "learning_rate": 0.00017762372919494926,
      "loss": 2.8791,
      "step": 146017
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.4566593170166016,
      "learning_rate": 0.0001776199944524144,
      "loss": 2.9229,
      "step": 146018
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0117580890655518,
      "learning_rate": 0.00017761625973263236,
      "loss": 3.0191,
      "step": 146019
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.683384656906128,
      "learning_rate": 0.00017761252503560368,
      "loss": 2.9277,
      "step": 146020
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5528342723846436,
      "learning_rate": 0.00017760879036132912,
      "loss": 3.1549,
      "step": 146021
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8624680042266846,
      "learning_rate": 0.00017760505570980944,
      "loss": 2.7779,
      "step": 146022
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.3315651416778564,
      "learning_rate": 0.00017760132108104527,
      "loss": 2.6576,
      "step": 146023
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2199466228485107,
      "learning_rate": 0.00017759758647503739,
      "loss": 2.9455,
      "step": 146024
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3518240451812744,
      "learning_rate": 0.00017759385189178634,
      "loss": 3.1211,
      "step": 146025
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2633213996887207,
      "learning_rate": 0.00017759011733129297,
      "loss": 2.9353,
      "step": 146026
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.335444927215576,
      "learning_rate": 0.0001775863827935579,
      "loss": 2.9397,
      "step": 146027
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4631290435791016,
      "learning_rate": 0.00017758264827858177,
      "loss": 2.8258,
      "step": 146028
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.192157030105591,
      "learning_rate": 0.00017757891378636538,
      "loss": 3.175,
      "step": 146029
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.8633017539978027,
      "learning_rate": 0.00017757517931690943,
      "loss": 2.7337,
      "step": 146030
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.930253267288208,
      "learning_rate": 0.0001775714448702145,
      "loss": 2.6849,
      "step": 146031
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1034324169158936,
      "learning_rate": 0.00017756771044628137,
      "loss": 2.8741,
      "step": 146032
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.265998125076294,
      "learning_rate": 0.0001775639760451107,
      "loss": 3.1158,
      "step": 146033
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.3855831623077393,
      "learning_rate": 0.00017756024166670322,
      "loss": 2.8651,
      "step": 146034
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.2170114517211914,
      "learning_rate": 0.00017755650731105958,
      "loss": 2.7026,
      "step": 146035
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.947597026824951,
      "learning_rate": 0.00017755277297818055,
      "loss": 2.8504,
      "step": 146036
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.375729560852051,
      "learning_rate": 0.00017754903866806673,
      "loss": 2.9938,
      "step": 146037
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.6530301570892334,
      "learning_rate": 0.0001775453043807188,
      "loss": 2.8616,
      "step": 146038
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.1913928985595703,
      "learning_rate": 0.0001775415701161376,
      "loss": 2.7813,
      "step": 146039
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8009073734283447,
      "learning_rate": 0.00017753783587432366,
      "loss": 3.0115,
      "step": 146040
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.112224817276001,
      "learning_rate": 0.00017753410165527783,
      "loss": 2.7538,
      "step": 146041
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2106423377990723,
      "learning_rate": 0.00017753036745900072,
      "loss": 2.9654,
      "step": 146042
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.511111259460449,
      "learning_rate": 0.00017752663328549298,
      "loss": 2.8834,
      "step": 146043
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.628443717956543,
      "learning_rate": 0.0001775228991347553,
      "loss": 2.9796,
      "step": 146044
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4384403228759766,
      "learning_rate": 0.00017751916500678848,
      "loss": 3.068,
      "step": 146045
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9943777322769165,
      "learning_rate": 0.00017751543090159313,
      "loss": 3.0147,
      "step": 146046
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.313674211502075,
      "learning_rate": 0.00017751169681916998,
      "loss": 3.2601,
      "step": 146047
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.841538906097412,
      "learning_rate": 0.00017750796275951985,
      "loss": 3.1191,
      "step": 146048
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5336172580718994,
      "learning_rate": 0.00017750422872264316,
      "loss": 3.0677,
      "step": 146049
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3248095512390137,
      "learning_rate": 0.0001775004947085408,
      "loss": 2.9093,
      "step": 146050
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0339012145996094,
      "learning_rate": 0.00017749676071721334,
      "loss": 2.5898,
      "step": 146051
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1280574798583984,
      "learning_rate": 0.00017749302674866158,
      "loss": 2.7935,
      "step": 146052
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5722146034240723,
      "learning_rate": 0.00017748929280288618,
      "loss": 2.6934,
      "step": 146053
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.213834762573242,
      "learning_rate": 0.00017748555887988796,
      "loss": 3.1231,
      "step": 146054
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4261505603790283,
      "learning_rate": 0.00017748182497966735,
      "loss": 2.8183,
      "step": 146055
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.222749948501587,
      "learning_rate": 0.00017747809110222517,
      "loss": 2.9699,
      "step": 146056
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9871013164520264,
      "learning_rate": 0.00017747435724756216,
      "loss": 2.9923,
      "step": 146057
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.311044692993164,
      "learning_rate": 0.00017747062341567896,
      "loss": 2.9062,
      "step": 146058
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.515826463699341,
      "learning_rate": 0.00017746688960657627,
      "loss": 2.8074,
      "step": 146059
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.442211866378784,
      "learning_rate": 0.00017746315582025494,
      "loss": 2.8409,
      "step": 146060
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2718124389648438,
      "learning_rate": 0.0001774594220567154,
      "loss": 3.0381,
      "step": 146061
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.516810178756714,
      "learning_rate": 0.00017745568831595846,
      "loss": 2.8993,
      "step": 146062
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.224846839904785,
      "learning_rate": 0.00017745195459798483,
      "loss": 3.1619,
      "step": 146063
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.653326988220215,
      "learning_rate": 0.00017744822090279521,
      "loss": 2.8085,
      "step": 146064
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.622270345687866,
      "learning_rate": 0.00017744448723039022,
      "loss": 3.0444,
      "step": 146065
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.354418992996216,
      "learning_rate": 0.00017744075358077068,
      "loss": 2.8453,
      "step": 146066
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5858232975006104,
      "learning_rate": 0.0001774370199539373,
      "loss": 2.8711,
      "step": 146067
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.063514232635498,
      "learning_rate": 0.00017743328634989058,
      "loss": 3.145,
      "step": 146068
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.510373592376709,
      "learning_rate": 0.00017742955276863132,
      "loss": 2.8101,
      "step": 146069
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3211541175842285,
      "learning_rate": 0.00017742581921016023,
      "loss": 3.0717,
      "step": 146070
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.253984212875366,
      "learning_rate": 0.00017742208567447798,
      "loss": 2.8791,
      "step": 146071
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.5191149711608887,
      "learning_rate": 0.0001774183521615853,
      "loss": 2.8115,
      "step": 146072
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.237544059753418,
      "learning_rate": 0.00017741461867148297,
      "loss": 3.0955,
      "step": 146073
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.522519826889038,
      "learning_rate": 0.00017741088520417146,
      "loss": 2.8541,
      "step": 146074
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3327620029449463,
      "learning_rate": 0.00017740715175965156,
      "loss": 2.9948,
      "step": 146075
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.8577160835266113,
      "learning_rate": 0.00017740341833792403,
      "loss": 2.9674,
      "step": 146076
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.598231077194214,
      "learning_rate": 0.0001773996849389895,
      "loss": 3.0573,
      "step": 146077
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0822372436523438,
      "learning_rate": 0.00017739595156284865,
      "loss": 2.7918,
      "step": 146078
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9758981466293335,
      "learning_rate": 0.00017739221820950232,
      "loss": 2.9985,
      "step": 146079
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.076324224472046,
      "learning_rate": 0.000177388484878951,
      "loss": 2.8699,
      "step": 146080
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.857078790664673,
      "learning_rate": 0.00017738475157119546,
      "loss": 2.9407,
      "step": 146081
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.245913505554199,
      "learning_rate": 0.00017738101828623643,
      "loss": 2.8235,
      "step": 146082
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.122570037841797,
      "learning_rate": 0.00017737728502407462,
      "loss": 2.9526,
      "step": 146083
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9676014184951782,
      "learning_rate": 0.00017737355178471065,
      "loss": 2.8397,
      "step": 146084
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.132155418395996,
      "learning_rate": 0.00017736981856814537,
      "loss": 3.1554,
      "step": 146085
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.182800531387329,
      "learning_rate": 0.0001773660853743792,
      "loss": 2.937,
      "step": 146086
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.610581636428833,
      "learning_rate": 0.00017736235220341303,
      "loss": 2.8908,
      "step": 146087
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3635711669921875,
      "learning_rate": 0.00017735861905524749,
      "loss": 3.0236,
      "step": 146088
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.183999538421631,
      "learning_rate": 0.0001773548859298833,
      "loss": 2.9191,
      "step": 146089
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2022180557250977,
      "learning_rate": 0.00017735115282732118,
      "loss": 3.0533,
      "step": 146090
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0746099948883057,
      "learning_rate": 0.0001773474197475619,
      "loss": 2.8389,
      "step": 146091
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8784148693084717,
      "learning_rate": 0.00017734368669060596,
      "loss": 3.1177,
      "step": 146092
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5093166828155518,
      "learning_rate": 0.00017733995365645412,
      "loss": 2.9894,
      "step": 146093
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3223299980163574,
      "learning_rate": 0.00017733622064510705,
      "loss": 2.9422,
      "step": 146094
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.2660274505615234,
      "learning_rate": 0.0001773324876565656,
      "loss": 2.9215,
      "step": 146095
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.69112229347229,
      "learning_rate": 0.00017732875469083026,
      "loss": 2.6854,
      "step": 146096
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9294121265411377,
      "learning_rate": 0.00017732502174790194,
      "loss": 2.971,
      "step": 146097
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8086578845977783,
      "learning_rate": 0.00017732128882778114,
      "loss": 2.8099,
      "step": 146098
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0584042072296143,
      "learning_rate": 0.00017731755593046862,
      "loss": 2.9119,
      "step": 146099
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1195855140686035,
      "learning_rate": 0.00017731382305596509,
      "loss": 2.9331,
      "step": 146100
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3705785274505615,
      "learning_rate": 0.00017731009020427124,
      "loss": 3.0364,
      "step": 146101
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.337653160095215,
      "learning_rate": 0.0001773063573753877,
      "loss": 2.6756,
      "step": 146102
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.930684804916382,
      "learning_rate": 0.00017730262456931532,
      "loss": 2.9983,
      "step": 146103
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9611159563064575,
      "learning_rate": 0.00017729889178605472,
      "loss": 2.8983,
      "step": 146104
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.273793935775757,
      "learning_rate": 0.0001772951590256065,
      "loss": 2.8716,
      "step": 146105
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.096318244934082,
      "learning_rate": 0.00017729142628797144,
      "loss": 3.1207,
      "step": 146106
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1986260414123535,
      "learning_rate": 0.0001772876935731502,
      "loss": 3.1283,
      "step": 146107
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.146899700164795,
      "learning_rate": 0.00017728396088114348,
      "loss": 2.8937,
      "step": 146108
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1753034591674805,
      "learning_rate": 0.00017728022821195209,
      "loss": 2.9365,
      "step": 146109
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5168004035949707,
      "learning_rate": 0.00017727649556557653,
      "loss": 2.7358,
      "step": 146110
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.073025941848755,
      "learning_rate": 0.00017727276294201766,
      "loss": 3.1491,
      "step": 146111
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.095433235168457,
      "learning_rate": 0.000177269030341276,
      "loss": 2.9956,
      "step": 146112
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.403167724609375,
      "learning_rate": 0.00017726529776335238,
      "loss": 3.0365,
      "step": 146113
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.321937084197998,
      "learning_rate": 0.00017726156520824746,
      "loss": 2.9296,
      "step": 146114
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.158579111099243,
      "learning_rate": 0.000177257832675962,
      "loss": 2.9441,
      "step": 146115
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.980799913406372,
      "learning_rate": 0.00017725410016649658,
      "loss": 2.7962,
      "step": 146116
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5194623470306396,
      "learning_rate": 0.00017725036767985187,
      "loss": 2.9319,
      "step": 146117
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7908272743225098,
      "learning_rate": 0.0001772466352160287,
      "loss": 2.8681,
      "step": 146118
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.319007396697998,
      "learning_rate": 0.00017724290277502773,
      "loss": 2.8883,
      "step": 146119
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1909685134887695,
      "learning_rate": 0.00017723917035684957,
      "loss": 3.2349,
      "step": 146120
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.286975145339966,
      "learning_rate": 0.000177235437961495,
      "loss": 2.8134,
      "step": 146121
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5704593658447266,
      "learning_rate": 0.00017723170558896464,
      "loss": 3.0576,
      "step": 146122
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1868343353271484,
      "learning_rate": 0.00017722797323925925,
      "loss": 2.8488,
      "step": 146123
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1245901584625244,
      "learning_rate": 0.00017722424091237945,
      "loss": 2.9391,
      "step": 146124
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.154071092605591,
      "learning_rate": 0.00017722050860832604,
      "loss": 3.0829,
      "step": 146125
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.585890769958496,
      "learning_rate": 0.00017721677632709967,
      "loss": 2.8952,
      "step": 146126
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.579099178314209,
      "learning_rate": 0.00017721304406870104,
      "loss": 3.0045,
      "step": 146127
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2483978271484375,
      "learning_rate": 0.00017720931183313072,
      "loss": 3.0145,
      "step": 146128
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.6763713359832764,
      "learning_rate": 0.00017720557962038955,
      "loss": 2.8906,
      "step": 146129
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0340726375579834,
      "learning_rate": 0.00017720184743047815,
      "loss": 3.0072,
      "step": 146130
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.705460548400879,
      "learning_rate": 0.00017719811526339726,
      "loss": 3.0097,
      "step": 146131
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.994525671005249,
      "learning_rate": 0.00017719438311914756,
      "loss": 3.1088,
      "step": 146132
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0677127838134766,
      "learning_rate": 0.00017719065099772982,
      "loss": 2.978,
      "step": 146133
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.345050573348999,
      "learning_rate": 0.00017718691889914465,
      "loss": 2.8859,
      "step": 146134
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.390155076980591,
      "learning_rate": 0.00017718318682339267,
      "loss": 3.1187,
      "step": 146135
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2234103679656982,
      "learning_rate": 0.00017717945477047466,
      "loss": 3.1047,
      "step": 146136
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.7859671115875244,
      "learning_rate": 0.00017717572274039128,
      "loss": 2.9546,
      "step": 146137
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5647432804107666,
      "learning_rate": 0.0001771719907331433,
      "loss": 2.9075,
      "step": 146138
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.972491979598999,
      "learning_rate": 0.00017716825874873135,
      "loss": 2.8482,
      "step": 146139
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.8047304153442383,
      "learning_rate": 0.00017716452678715624,
      "loss": 3.1511,
      "step": 146140
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2999277114868164,
      "learning_rate": 0.00017716079484841847,
      "loss": 3.1269,
      "step": 146141
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.005690336227417,
      "learning_rate": 0.00017715706293251884,
      "loss": 2.952,
      "step": 146142
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.22282075881958,
      "learning_rate": 0.000177153331039458,
      "loss": 2.8951,
      "step": 146143
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.5577211380004883,
      "learning_rate": 0.00017714959916923667,
      "loss": 2.9771,
      "step": 146144
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.893155813217163,
      "learning_rate": 0.00017714586732185556,
      "loss": 2.8817,
      "step": 146145
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.2807118892669678,
      "learning_rate": 0.0001771421354973155,
      "loss": 3.0855,
      "step": 146146
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.1228766441345215,
      "learning_rate": 0.0001771384036956169,
      "loss": 3.0945,
      "step": 146147
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.883311748504639,
      "learning_rate": 0.00017713467191676055,
      "loss": 3.0135,
      "step": 146148
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.296437740325928,
      "learning_rate": 0.00017713094016074722,
      "loss": 2.7811,
      "step": 146149
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3198132514953613,
      "learning_rate": 0.00017712720842757757,
      "loss": 2.9536,
      "step": 146150
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4569051265716553,
      "learning_rate": 0.0001771234767172523,
      "loss": 3.0312,
      "step": 146151
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2610325813293457,
      "learning_rate": 0.00017711974502977217,
      "loss": 2.9852,
      "step": 146152
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.958506107330322,
      "learning_rate": 0.0001771160133651377,
      "loss": 2.8674,
      "step": 146153
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.42280912399292,
      "learning_rate": 0.0001771122817233497,
      "loss": 3.2622,
      "step": 146154
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5372395515441895,
      "learning_rate": 0.00017710855010440882,
      "loss": 2.7738,
      "step": 146155
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2819340229034424,
      "learning_rate": 0.00017710481850831582,
      "loss": 2.7958,
      "step": 146156
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.6861751079559326,
      "learning_rate": 0.00017710108693507132,
      "loss": 2.9961,
      "step": 146157
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7139205932617188,
      "learning_rate": 0.0001770973553846762,
      "loss": 2.8238,
      "step": 146158
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.8655147552490234,
      "learning_rate": 0.00017709362385713085,
      "loss": 3.2295,
      "step": 146159
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.522127389907837,
      "learning_rate": 0.0001770898923524361,
      "loss": 2.8116,
      "step": 146160
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.7539993524551392,
      "learning_rate": 0.0001770861608705927,
      "loss": 2.7848,
      "step": 146161
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8866727352142334,
      "learning_rate": 0.00017708242941160128,
      "loss": 2.806,
      "step": 146162
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.76104998588562,
      "learning_rate": 0.00017707869797546256,
      "loss": 2.7832,
      "step": 146163
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.748372793197632,
      "learning_rate": 0.00017707496656217735,
      "loss": 3.0988,
      "step": 146164
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.20650315284729,
      "learning_rate": 0.00017707123517174612,
      "loss": 3.1603,
      "step": 146165
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.628932476043701,
      "learning_rate": 0.00017706750380416962,
      "loss": 2.7146,
      "step": 146166
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.291097402572632,
      "learning_rate": 0.00017706377245944866,
      "loss": 2.9695,
      "step": 146167
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4662137031555176,
      "learning_rate": 0.0001770600411375838,
      "loss": 2.8595,
      "step": 146168
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.089953660964966,
      "learning_rate": 0.00017705630983857584,
      "loss": 3.0083,
      "step": 146169
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2665343284606934,
      "learning_rate": 0.0001770525785624256,
      "loss": 3.0594,
      "step": 146170
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.992789626121521,
      "learning_rate": 0.00017704884730913338,
      "loss": 3.0228,
      "step": 146171
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.01149320602417,
      "learning_rate": 0.00017704511607870014,
      "loss": 2.8634,
      "step": 146172
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3314199447631836,
      "learning_rate": 0.00017704138487112657,
      "loss": 2.9557,
      "step": 146173
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.119868278503418,
      "learning_rate": 0.00017703765368641333,
      "loss": 2.9479,
      "step": 146174
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.217982530593872,
      "learning_rate": 0.00017703392252456103,
      "loss": 3.077,
      "step": 146175
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2953083515167236,
      "learning_rate": 0.00017703019138557067,
      "loss": 3.0481,
      "step": 146176
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.296823263168335,
      "learning_rate": 0.00017702646026944254,
      "loss": 2.9559,
      "step": 146177
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.197410821914673,
      "learning_rate": 0.00017702272917617751,
      "loss": 2.7436,
      "step": 146178
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2959706783294678,
      "learning_rate": 0.00017701899810577628,
      "loss": 3.0208,
      "step": 146179
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.425025224685669,
      "learning_rate": 0.00017701526705823958,
      "loss": 2.9534,
      "step": 146180
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2825944423675537,
      "learning_rate": 0.000177011536033568,
      "loss": 3.0789,
      "step": 146181
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.206702709197998,
      "learning_rate": 0.00017700780503176244,
      "loss": 2.9953,
      "step": 146182
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.685948610305786,
      "learning_rate": 0.00017700407405282334,
      "loss": 2.9202,
      "step": 146183
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8133206367492676,
      "learning_rate": 0.0001770003430967515,
      "loss": 2.9931,
      "step": 146184
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0832066535949707,
      "learning_rate": 0.00017699661216354765,
      "loss": 2.8791,
      "step": 146185
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2275259494781494,
      "learning_rate": 0.0001769928812532124,
      "loss": 3.0158,
      "step": 146186
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1466662883758545,
      "learning_rate": 0.00017698915036574653,
      "loss": 2.8058,
      "step": 146187
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2177746295928955,
      "learning_rate": 0.00017698541950115072,
      "loss": 2.7981,
      "step": 146188
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3526031970977783,
      "learning_rate": 0.00017698168865942567,
      "loss": 3.261,
      "step": 146189
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2882847785949707,
      "learning_rate": 0.00017697795784057198,
      "loss": 3.097,
      "step": 146190
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9438303709030151,
      "learning_rate": 0.00017697422704459042,
      "loss": 2.8709,
      "step": 146191
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2517292499542236,
      "learning_rate": 0.00017697049627148165,
      "loss": 3.0732,
      "step": 146192
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3828177452087402,
      "learning_rate": 0.0001769667655212464,
      "loss": 2.8174,
      "step": 146193
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0644631385803223,
      "learning_rate": 0.00017696303479388542,
      "loss": 3.2194,
      "step": 146194
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3496756553649902,
      "learning_rate": 0.00017695930408939928,
      "loss": 2.785,
      "step": 146195
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.064387321472168,
      "learning_rate": 0.00017695557340778877,
      "loss": 3.0704,
      "step": 146196
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2202906608581543,
      "learning_rate": 0.0001769518427490545,
      "loss": 3.1214,
      "step": 146197
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.985257863998413,
      "learning_rate": 0.00017694811211319719,
      "loss": 3.0773,
      "step": 146198
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1675209999084473,
      "learning_rate": 0.00017694438150021752,
      "loss": 2.8217,
      "step": 146199
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.648592233657837,
      "learning_rate": 0.00017694065091011628,
      "loss": 2.9657,
      "step": 146200
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1582658290863037,
      "learning_rate": 0.00017693692034289411,
      "loss": 3.0127,
      "step": 146201
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0492947101593018,
      "learning_rate": 0.00017693318979855165,
      "loss": 2.8875,
      "step": 146202
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7521471977233887,
      "learning_rate": 0.00017692945927708968,
      "loss": 2.7759,
      "step": 146203
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2723236083984375,
      "learning_rate": 0.0001769257287785088,
      "loss": 3.0086,
      "step": 146204
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2727930545806885,
      "learning_rate": 0.00017692199830280972,
      "loss": 3.21,
      "step": 146205
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9429823160171509,
      "learning_rate": 0.0001769182678499932,
      "loss": 2.848,
      "step": 146206
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2575855255126953,
      "learning_rate": 0.00017691453742005995,
      "loss": 3.1426,
      "step": 146207
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2134084701538086,
      "learning_rate": 0.0001769108070130105,
      "loss": 3.0931,
      "step": 146208
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9487199783325195,
      "learning_rate": 0.00017690707662884573,
      "loss": 2.9824,
      "step": 146209
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0200366973876953,
      "learning_rate": 0.00017690334626756625,
      "loss": 3.0806,
      "step": 146210
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.658491849899292,
      "learning_rate": 0.00017689961592917274,
      "loss": 2.8043,
      "step": 146211
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.797179698944092,
      "learning_rate": 0.00017689588561366592,
      "loss": 3.02,
      "step": 146212
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2127482891082764,
      "learning_rate": 0.00017689215532104652,
      "loss": 2.8821,
      "step": 146213
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.230374574661255,
      "learning_rate": 0.00017688842505131515,
      "loss": 2.979,
      "step": 146214
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.252819776535034,
      "learning_rate": 0.00017688469480447254,
      "loss": 3.0406,
      "step": 146215
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.033045768737793,
      "learning_rate": 0.0001768809645805194,
      "loss": 3.0133,
      "step": 146216
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4006221294403076,
      "learning_rate": 0.0001768772343794564,
      "loss": 3.001,
      "step": 146217
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.729620933532715,
      "learning_rate": 0.0001768735042012843,
      "loss": 2.9338,
      "step": 146218
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7093749046325684,
      "learning_rate": 0.00017686977404600377,
      "loss": 2.802,
      "step": 146219
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3385300636291504,
      "learning_rate": 0.00017686604391361536,
      "loss": 2.9545,
      "step": 146220
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.6192383766174316,
      "learning_rate": 0.00017686231380411994,
      "loss": 3.0174,
      "step": 146221
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.410151720046997,
      "learning_rate": 0.00017685858371751807,
      "loss": 3.1486,
      "step": 146222
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0721960067749023,
      "learning_rate": 0.00017685485365381056,
      "loss": 2.9278,
      "step": 146223
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.26702880859375,
      "learning_rate": 0.00017685112361299807,
      "loss": 2.9278,
      "step": 146224
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.299111843109131,
      "learning_rate": 0.00017684739359508135,
      "loss": 2.645,
      "step": 146225
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.8062516450881958,
      "learning_rate": 0.00017684366360006096,
      "loss": 2.9086,
      "step": 146226
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.056861639022827,
      "learning_rate": 0.0001768399336279376,
      "loss": 3.0489,
      "step": 146227
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2067272663116455,
      "learning_rate": 0.00017683620367871207,
      "loss": 2.8962,
      "step": 146228
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.6787939071655273,
      "learning_rate": 0.000176832473752385,
      "loss": 2.9128,
      "step": 146229
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.905267357826233,
      "learning_rate": 0.0001768287438489571,
      "loss": 3.0671,
      "step": 146230
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.15004301071167,
      "learning_rate": 0.00017682501396842914,
      "loss": 2.8943,
      "step": 146231
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0033462047576904,
      "learning_rate": 0.00017682128411080166,
      "loss": 2.9797,
      "step": 146232
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0023391246795654,
      "learning_rate": 0.00017681755427607543,
      "loss": 2.986,
      "step": 146233
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.065908432006836,
      "learning_rate": 0.00017681382446425113,
      "loss": 3.1295,
      "step": 146234
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2757620811462402,
      "learning_rate": 0.00017681009467532946,
      "loss": 3.0962,
      "step": 146235
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0244204998016357,
      "learning_rate": 0.0001768063649093111,
      "loss": 2.9745,
      "step": 146236
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.8395555019378662,
      "learning_rate": 0.00017680263516619695,
      "loss": 2.8566,
      "step": 146237
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.482858180999756,
      "learning_rate": 0.00017679890544598735,
      "loss": 2.9633,
      "step": 146238
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8795230388641357,
      "learning_rate": 0.00017679517574868315,
      "loss": 3.0816,
      "step": 146239
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7932186126708984,
      "learning_rate": 0.00017679144607428508,
      "loss": 3.0024,
      "step": 146240
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.394739866256714,
      "learning_rate": 0.00017678771642279376,
      "loss": 2.7783,
      "step": 146241
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3628320693969727,
      "learning_rate": 0.00017678398679421,
      "loss": 3.0487,
      "step": 146242
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.478220224380493,
      "learning_rate": 0.00017678025718853446,
      "loss": 2.9295,
      "step": 146243
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.0186424255371094,
      "learning_rate": 0.00017677652760576774,
      "loss": 3.3182,
      "step": 146244
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.423386573791504,
      "learning_rate": 0.00017677279804591056,
      "loss": 2.7792,
      "step": 146245
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5216424465179443,
      "learning_rate": 0.00017676906850896365,
      "loss": 3.1219,
      "step": 146246
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.092538356781006,
      "learning_rate": 0.0001767653389949277,
      "loss": 2.9548,
      "step": 146247
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5491416454315186,
      "learning_rate": 0.0001767616095038034,
      "loss": 3.0848,
      "step": 146248
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1261086463928223,
      "learning_rate": 0.00017675788003559156,
      "loss": 2.8535,
      "step": 146249
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9289494752883911,
      "learning_rate": 0.00017675415059029262,
      "loss": 3.0675,
      "step": 146250
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.503065824508667,
      "learning_rate": 0.00017675042116790745,
      "loss": 2.935,
      "step": 146251
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.111987829208374,
      "learning_rate": 0.00017674669176843667,
      "loss": 3.2516,
      "step": 146252
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7622246742248535,
      "learning_rate": 0.00017674296239188103,
      "loss": 2.8218,
      "step": 146253
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5459370613098145,
      "learning_rate": 0.00017673923303824116,
      "loss": 3.0624,
      "step": 146254
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9671212434768677,
      "learning_rate": 0.00017673550370751796,
      "loss": 3.2637,
      "step": 146255
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.311216115951538,
      "learning_rate": 0.0001767317743997118,
      "loss": 3.058,
      "step": 146256
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.9693994522094727,
      "learning_rate": 0.00017672804511482357,
      "loss": 2.9959,
      "step": 146257
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.1331734657287598,
      "learning_rate": 0.00017672431585285387,
      "loss": 2.9755,
      "step": 146258
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.172969102859497,
      "learning_rate": 0.00017672058661380351,
      "loss": 3.009,
      "step": 146259
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.66050386428833,
      "learning_rate": 0.00017671685739767306,
      "loss": 3.0176,
      "step": 146260
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2316336631774902,
      "learning_rate": 0.00017671312820446343,
      "loss": 2.9045,
      "step": 146261
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1736936569213867,
      "learning_rate": 0.000176709399034175,
      "loss": 3.2726,
      "step": 146262
    },
    {
      "epoch": 1.9,
      "grad_norm": 1.9025471210479736,
      "learning_rate": 0.00017670566988680866,
      "loss": 3.143,
      "step": 146263
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3914101123809814,
      "learning_rate": 0.00017670194076236506,
      "loss": 2.9578,
      "step": 146264
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.756803512573242,
      "learning_rate": 0.0001766982116608449,
      "loss": 3.0022,
      "step": 146265
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.742156744003296,
      "learning_rate": 0.00017669448258224882,
      "loss": 2.8325,
      "step": 146266
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3811094760894775,
      "learning_rate": 0.0001766907535265776,
      "loss": 2.895,
      "step": 146267
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.326469898223877,
      "learning_rate": 0.00017668702449383206,
      "loss": 3.137,
      "step": 146268
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8863422870635986,
      "learning_rate": 0.00017668329548401252,
      "loss": 2.9369,
      "step": 146269
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.3698723316192627,
      "learning_rate": 0.00017667956649711995,
      "loss": 2.9615,
      "step": 146270
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.127045154571533,
      "learning_rate": 0.00017667583753315497,
      "loss": 2.7642,
      "step": 146271
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1993651390075684,
      "learning_rate": 0.00017667210859211827,
      "loss": 3.0092,
      "step": 146272
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7182958126068115,
      "learning_rate": 0.00017666837967401052,
      "loss": 3.2374,
      "step": 146273
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.6037957668304443,
      "learning_rate": 0.00017666465077883258,
      "loss": 2.768,
      "step": 146274
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.19500470161438,
      "learning_rate": 0.0001766609219065849,
      "loss": 3.083,
      "step": 146275
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.2303054332733154,
      "learning_rate": 0.00017665719305726832,
      "loss": 3.1113,
      "step": 146276
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.0376176834106445,
      "learning_rate": 0.00017665346423088348,
      "loss": 2.8547,
      "step": 146277
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.357088088989258,
      "learning_rate": 0.00017664973542743105,
      "loss": 3.0915,
      "step": 146278
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.7969274520874023,
      "learning_rate": 0.00017664600664691178,
      "loss": 2.7382,
      "step": 146279
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3031318187713623,
      "learning_rate": 0.0001766422778893264,
      "loss": 2.9325,
      "step": 146280
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.797884941101074,
      "learning_rate": 0.00017663854915467558,
      "loss": 2.977,
      "step": 146281
    },
    {
      "epoch": 1.9,
      "grad_norm": 4.2054762840271,
      "learning_rate": 0.0001766348204429599,
      "loss": 2.8084,
      "step": 146282
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.07357120513916,
      "learning_rate": 0.0001766310917541801,
      "loss": 3.2955,
      "step": 146283
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.825227737426758,
      "learning_rate": 0.00017662736308833696,
      "loss": 2.8217,
      "step": 146284
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.059375047683716,
      "learning_rate": 0.00017662363444543113,
      "loss": 3.015,
      "step": 146285
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.0691287517547607,
      "learning_rate": 0.00017661990582546334,
      "loss": 2.9343,
      "step": 146286
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.797821521759033,
      "learning_rate": 0.00017661617722843415,
      "loss": 3.0847,
      "step": 146287
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.003725528717041,
      "learning_rate": 0.00017661244865434445,
      "loss": 2.9874,
      "step": 146288
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.00612211227417,
      "learning_rate": 0.0001766087201031947,
      "loss": 2.7405,
      "step": 146289
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8926684856414795,
      "learning_rate": 0.0001766049915749858,
      "loss": 2.9625,
      "step": 146290
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8836307525634766,
      "learning_rate": 0.0001766012630697183,
      "loss": 2.8709,
      "step": 146291
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.9476354122161865,
      "learning_rate": 0.00017659753458739306,
      "loss": 3.1879,
      "step": 146292
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4282259941101074,
      "learning_rate": 0.00017659380612801054,
      "loss": 2.9178,
      "step": 146293
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.5326802730560303,
      "learning_rate": 0.00017659007769157164,
      "loss": 2.9004,
      "step": 146294
    },
    {
      "epoch": 1.9,
      "grad_norm": 3.409884214401245,
      "learning_rate": 0.00017658634927807695,
      "loss": 2.8627,
      "step": 146295
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.8362486362457275,
      "learning_rate": 0.00017658262088752718,
      "loss": 2.8623,
      "step": 146296
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.1804091930389404,
      "learning_rate": 0.00017657889251992306,
      "loss": 3.2194,
      "step": 146297
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.0783257484436035,
      "learning_rate": 0.00017657516417526528,
      "loss": 2.7993,
      "step": 146298
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.979367733001709,
      "learning_rate": 0.00017657143585355443,
      "loss": 3.0604,
      "step": 146299
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.021188497543335,
      "learning_rate": 0.00017656770755479127,
      "loss": 3.168,
      "step": 146300
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4887168407440186,
      "learning_rate": 0.0001765639792789765,
      "loss": 3.0082,
      "step": 146301
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.3483073711395264,
      "learning_rate": 0.00017656025102611095,
      "loss": 2.9209,
      "step": 146302
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.2992324829101562,
      "learning_rate": 0.00017655652279619506,
      "loss": 3.2292,
      "step": 146303
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.4077818393707275,
      "learning_rate": 0.0001765527945892297,
      "loss": 2.7667,
      "step": 146304
    },
    {
      "epoch": 1.9,
      "grad_norm": 2.162815570831299,
      "learning_rate": 0.00017654906640521547,
      "loss": 2.9324,
      "step": 146305
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.0412473678588867,
      "learning_rate": 0.0001765453382441531,
      "loss": 3.0511,
      "step": 146306
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3486194610595703,
      "learning_rate": 0.0001765416101060433,
      "loss": 3.0231,
      "step": 146307
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.34512996673584,
      "learning_rate": 0.0001765378819908867,
      "loss": 3.0991,
      "step": 146308
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.735503673553467,
      "learning_rate": 0.00017653415389868405,
      "loss": 3.2489,
      "step": 146309
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5429697036743164,
      "learning_rate": 0.00017653042582943618,
      "loss": 2.7872,
      "step": 146310
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.481606960296631,
      "learning_rate": 0.00017652669778314348,
      "loss": 2.9922,
      "step": 146311
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.161173105239868,
      "learning_rate": 0.0001765229697598068,
      "loss": 2.7548,
      "step": 146312
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6413979530334473,
      "learning_rate": 0.00017651924175942683,
      "loss": 2.6836,
      "step": 146313
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0031044483184814,
      "learning_rate": 0.00017651551378200427,
      "loss": 2.9137,
      "step": 146314
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.101515531539917,
      "learning_rate": 0.0001765117858275398,
      "loss": 2.8315,
      "step": 146315
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4843809604644775,
      "learning_rate": 0.0001765080578960343,
      "loss": 2.9322,
      "step": 146316
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3892149925231934,
      "learning_rate": 0.0001765043299874881,
      "loss": 2.7273,
      "step": 146317
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.4546523094177246,
      "learning_rate": 0.00017650060210190206,
      "loss": 3.0893,
      "step": 146318
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1794729232788086,
      "learning_rate": 0.00017649687423927694,
      "loss": 2.8813,
      "step": 146319
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.2465648651123047,
      "learning_rate": 0.00017649314639961336,
      "loss": 3.0781,
      "step": 146320
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.76043963432312,
      "learning_rate": 0.00017648941858291204,
      "loss": 2.9847,
      "step": 146321
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4986207485198975,
      "learning_rate": 0.00017648569078917376,
      "loss": 3.0914,
      "step": 146322
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4233837127685547,
      "learning_rate": 0.00017648196301839903,
      "loss": 2.9906,
      "step": 146323
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6233596801757812,
      "learning_rate": 0.00017647823527058867,
      "loss": 2.8672,
      "step": 146324
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.903351306915283,
      "learning_rate": 0.00017647450754574326,
      "loss": 2.8606,
      "step": 146325
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9814016819000244,
      "learning_rate": 0.0001764707798438636,
      "loss": 2.83,
      "step": 146326
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3868727684020996,
      "learning_rate": 0.0001764670521649504,
      "loss": 3.1003,
      "step": 146327
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.0111234188079834,
      "learning_rate": 0.00017646332450900435,
      "loss": 2.8607,
      "step": 146328
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.505689859390259,
      "learning_rate": 0.00017645959687602607,
      "loss": 2.9938,
      "step": 146329
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.5566275119781494,
      "learning_rate": 0.00017645586926601621,
      "loss": 2.7305,
      "step": 146330
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0215344429016113,
      "learning_rate": 0.00017645214167897556,
      "loss": 2.8586,
      "step": 146331
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.8972114324569702,
      "learning_rate": 0.00017644841411490478,
      "loss": 2.9347,
      "step": 146332
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9043538570404053,
      "learning_rate": 0.00017644468657380458,
      "loss": 2.7799,
      "step": 146333
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.960446357727051,
      "learning_rate": 0.00017644095905567565,
      "loss": 3.0499,
      "step": 146334
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.502866506576538,
      "learning_rate": 0.0001764372315605188,
      "loss": 2.7059,
      "step": 146335
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.8922728300094604,
      "learning_rate": 0.00017643350408833445,
      "loss": 2.9897,
      "step": 146336
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.202853202819824,
      "learning_rate": 0.00017642977663912348,
      "loss": 2.8816,
      "step": 146337
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.562514543533325,
      "learning_rate": 0.00017642604921288654,
      "loss": 3.0509,
      "step": 146338
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.128471612930298,
      "learning_rate": 0.00017642232180962433,
      "loss": 3.0493,
      "step": 146339
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.051440954208374,
      "learning_rate": 0.00017641859442933756,
      "loss": 2.993,
      "step": 146340
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.704329490661621,
      "learning_rate": 0.000176414867072027,
      "loss": 2.9483,
      "step": 146341
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1787776947021484,
      "learning_rate": 0.00017641113973769309,
      "loss": 3.0202,
      "step": 146342
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2439188957214355,
      "learning_rate": 0.00017640741242633674,
      "loss": 3.1143,
      "step": 146343
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.491312265396118,
      "learning_rate": 0.0001764036851379586,
      "loss": 2.8764,
      "step": 146344
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1739931106567383,
      "learning_rate": 0.0001763999578725593,
      "loss": 3.0263,
      "step": 146345
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.1477761268615723,
      "learning_rate": 0.0001763962306301396,
      "loss": 2.8277,
      "step": 146346
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1875057220458984,
      "learning_rate": 0.0001763925034107003,
      "loss": 2.9511,
      "step": 146347
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.027256965637207,
      "learning_rate": 0.00017638877621424184,
      "loss": 2.9944,
      "step": 146348
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4070115089416504,
      "learning_rate": 0.00017638504904076503,
      "loss": 2.8069,
      "step": 146349
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2548482418060303,
      "learning_rate": 0.0001763813218902706,
      "loss": 2.9048,
      "step": 146350
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3672211170196533,
      "learning_rate": 0.0001763775947627592,
      "loss": 2.9078,
      "step": 146351
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.888943672180176,
      "learning_rate": 0.00017637386765823157,
      "loss": 2.8993,
      "step": 146352
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.461397171020508,
      "learning_rate": 0.00017637014057668844,
      "loss": 3.0689,
      "step": 146353
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.924627423286438,
      "learning_rate": 0.0001763664135181303,
      "loss": 2.9755,
      "step": 146354
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0512099266052246,
      "learning_rate": 0.00017636268648255803,
      "loss": 3.1892,
      "step": 146355
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.109769821166992,
      "learning_rate": 0.00017635895946997227,
      "loss": 3.0203,
      "step": 146356
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0876994132995605,
      "learning_rate": 0.00017635523248037366,
      "loss": 2.9869,
      "step": 146357
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6966392993927,
      "learning_rate": 0.000176351505513763,
      "loss": 3.1168,
      "step": 146358
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.412308931350708,
      "learning_rate": 0.000176347778570141,
      "loss": 2.8265,
      "step": 146359
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.861903429031372,
      "learning_rate": 0.00017634405164950826,
      "loss": 2.9096,
      "step": 146360
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6585781574249268,
      "learning_rate": 0.0001763403247518654,
      "loss": 2.9801,
      "step": 146361
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.841642379760742,
      "learning_rate": 0.00017633659787721323,
      "loss": 2.906,
      "step": 146362
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2339890003204346,
      "learning_rate": 0.00017633287102555246,
      "loss": 2.9367,
      "step": 146363
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3901240825653076,
      "learning_rate": 0.00017632914419688368,
      "loss": 2.9946,
      "step": 146364
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.095388889312744,
      "learning_rate": 0.00017632541739120777,
      "loss": 2.9183,
      "step": 146365
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2134199142456055,
      "learning_rate": 0.00017632169060852524,
      "loss": 3.0003,
      "step": 146366
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.070382833480835,
      "learning_rate": 0.00017631796384883682,
      "loss": 3.0172,
      "step": 146367
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4108474254608154,
      "learning_rate": 0.00017631423711214319,
      "loss": 2.8198,
      "step": 146368
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.227015495300293,
      "learning_rate": 0.00017631051039844513,
      "loss": 2.9878,
      "step": 146369
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.276810646057129,
      "learning_rate": 0.00017630678370774322,
      "loss": 3.0085,
      "step": 146370
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1897761821746826,
      "learning_rate": 0.00017630305704003832,
      "loss": 2.7976,
      "step": 146371
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.547064781188965,
      "learning_rate": 0.000176299330395331,
      "loss": 3.0612,
      "step": 146372
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2973203659057617,
      "learning_rate": 0.00017629560377362192,
      "loss": 3.0241,
      "step": 146373
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2312958240509033,
      "learning_rate": 0.0001762918771749118,
      "loss": 3.0946,
      "step": 146374
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1458842754364014,
      "learning_rate": 0.00017628815059920138,
      "loss": 2.8471,
      "step": 146375
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.321772575378418,
      "learning_rate": 0.0001762844240464913,
      "loss": 2.88,
      "step": 146376
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1038661003112793,
      "learning_rate": 0.00017628069751678236,
      "loss": 3.0613,
      "step": 146377
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2559337615966797,
      "learning_rate": 0.00017627697101007514,
      "loss": 2.9876,
      "step": 146378
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1206037998199463,
      "learning_rate": 0.00017627324452637033,
      "loss": 2.8188,
      "step": 146379
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.298872947692871,
      "learning_rate": 0.0001762695180656687,
      "loss": 2.9055,
      "step": 146380
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.211477041244507,
      "learning_rate": 0.0001762657916279709,
      "loss": 3.1287,
      "step": 146381
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.0086541175842285,
      "learning_rate": 0.00017626206521327762,
      "loss": 2.9857,
      "step": 146382
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9302656650543213,
      "learning_rate": 0.00017625833882158956,
      "loss": 3.051,
      "step": 146383
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6673357486724854,
      "learning_rate": 0.00017625461245290738,
      "loss": 3.0726,
      "step": 146384
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.4412925243377686,
      "learning_rate": 0.0001762508861072318,
      "loss": 2.6389,
      "step": 146385
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.269441843032837,
      "learning_rate": 0.00017624715978456355,
      "loss": 2.9781,
      "step": 146386
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.607886791229248,
      "learning_rate": 0.00017624343348490333,
      "loss": 2.9879,
      "step": 146387
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.1724586486816406,
      "learning_rate": 0.00017623970720825172,
      "loss": 2.9299,
      "step": 146388
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.513779640197754,
      "learning_rate": 0.00017623598095460954,
      "loss": 2.7329,
      "step": 146389
    },
    {
      "epoch": 1.91,
      "grad_norm": 5.287059783935547,
      "learning_rate": 0.0001762322547239774,
      "loss": 3.1155,
      "step": 146390
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.699453592300415,
      "learning_rate": 0.00017622852851635596,
      "loss": 2.7949,
      "step": 146391
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1804280281066895,
      "learning_rate": 0.00017622480233174602,
      "loss": 2.9847,
      "step": 146392
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.548546314239502,
      "learning_rate": 0.0001762210761701482,
      "loss": 3.0293,
      "step": 146393
    },
    {
      "epoch": 1.91,
      "grad_norm": 5.082077503204346,
      "learning_rate": 0.00017621735003156336,
      "loss": 3.133,
      "step": 146394
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.7215895652770996,
      "learning_rate": 0.00017621362391599197,
      "loss": 3.313,
      "step": 146395
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3747029304504395,
      "learning_rate": 0.00017620989782343474,
      "loss": 2.949,
      "step": 146396
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.414470911026001,
      "learning_rate": 0.00017620617175389245,
      "loss": 2.8493,
      "step": 146397
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5735583305358887,
      "learning_rate": 0.00017620244570736577,
      "loss": 3.0118,
      "step": 146398
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.898333787918091,
      "learning_rate": 0.0001761987196838554,
      "loss": 3.0549,
      "step": 146399
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9917750358581543,
      "learning_rate": 0.00017619499368336198,
      "loss": 2.9435,
      "step": 146400
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2995355129241943,
      "learning_rate": 0.00017619126770588637,
      "loss": 3.0593,
      "step": 146401
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2494089603424072,
      "learning_rate": 0.0001761875417514291,
      "loss": 3.0181,
      "step": 146402
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.422786235809326,
      "learning_rate": 0.00017618381581999086,
      "loss": 2.9595,
      "step": 146403
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4859142303466797,
      "learning_rate": 0.00017618008991157237,
      "loss": 2.8942,
      "step": 146404
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.219679832458496,
      "learning_rate": 0.00017617636402617433,
      "loss": 2.8991,
      "step": 146405
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.972423553466797,
      "learning_rate": 0.00017617263816379746,
      "loss": 3.163,
      "step": 146406
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.672452211380005,
      "learning_rate": 0.00017616891232444246,
      "loss": 2.881,
      "step": 146407
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.421571969985962,
      "learning_rate": 0.00017616518650811008,
      "loss": 2.9479,
      "step": 146408
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5719451904296875,
      "learning_rate": 0.00017616146071480082,
      "loss": 2.8693,
      "step": 146409
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.215106725692749,
      "learning_rate": 0.00017615773494451546,
      "loss": 3.0489,
      "step": 146410
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.8359224796295166,
      "learning_rate": 0.00017615400919725475,
      "loss": 3.4222,
      "step": 146411
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5325498580932617,
      "learning_rate": 0.00017615028347301936,
      "loss": 3.0159,
      "step": 146412
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4160830974578857,
      "learning_rate": 0.0001761465577718099,
      "loss": 3.1513,
      "step": 146413
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5753731727600098,
      "learning_rate": 0.00017614283209362733,
      "loss": 2.9247,
      "step": 146414
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.8864624500274658,
      "learning_rate": 0.000176139106438472,
      "loss": 3.0598,
      "step": 146415
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5356640815734863,
      "learning_rate": 0.00017613538080634472,
      "loss": 2.9796,
      "step": 146416
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4932994842529297,
      "learning_rate": 0.00017613165519724623,
      "loss": 3.1445,
      "step": 146417
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.9968714714050293,
      "learning_rate": 0.0001761279296111772,
      "loss": 2.963,
      "step": 146418
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.411386251449585,
      "learning_rate": 0.00017612420404813833,
      "loss": 3.0815,
      "step": 146419
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5258378982543945,
      "learning_rate": 0.00017612047850813043,
      "loss": 3.0456,
      "step": 146420
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1521036624908447,
      "learning_rate": 0.00017611675299115397,
      "loss": 2.9618,
      "step": 146421
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.8252092599868774,
      "learning_rate": 0.0001761130274972097,
      "loss": 2.9999,
      "step": 146422
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1378681659698486,
      "learning_rate": 0.0001761093020262984,
      "loss": 2.6034,
      "step": 146423
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.470991849899292,
      "learning_rate": 0.0001761055765784207,
      "loss": 2.92,
      "step": 146424
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.450666904449463,
      "learning_rate": 0.00017610185115357732,
      "loss": 3.1426,
      "step": 146425
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.7525851726531982,
      "learning_rate": 0.00017609812575176907,
      "loss": 2.7596,
      "step": 146426
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.345125675201416,
      "learning_rate": 0.00017609440037299635,
      "loss": 2.9639,
      "step": 146427
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.513648509979248,
      "learning_rate": 0.00017609067501726005,
      "loss": 2.936,
      "step": 146428
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.812957525253296,
      "learning_rate": 0.00017608694968456088,
      "loss": 2.8187,
      "step": 146429
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0630345344543457,
      "learning_rate": 0.0001760832243748994,
      "loss": 2.9852,
      "step": 146430
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.256923198699951,
      "learning_rate": 0.00017607949908827648,
      "loss": 3.195,
      "step": 146431
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.475663661956787,
      "learning_rate": 0.00017607577382469276,
      "loss": 3.1028,
      "step": 146432
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4469873905181885,
      "learning_rate": 0.00017607204858414882,
      "loss": 2.8675,
      "step": 146433
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.1997218132019043,
      "learning_rate": 0.0001760683233666454,
      "loss": 2.9381,
      "step": 146434
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.209589719772339,
      "learning_rate": 0.00017606459817218323,
      "loss": 2.8092,
      "step": 146435
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2976880073547363,
      "learning_rate": 0.00017606087300076298,
      "loss": 2.9071,
      "step": 146436
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1853911876678467,
      "learning_rate": 0.00017605714785238536,
      "loss": 2.9697,
      "step": 146437
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1905176639556885,
      "learning_rate": 0.00017605342272705116,
      "loss": 2.9841,
      "step": 146438
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1055285930633545,
      "learning_rate": 0.00017604969762476086,
      "loss": 2.9118,
      "step": 146439
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1602895259857178,
      "learning_rate": 0.0001760459725455153,
      "loss": 3.0267,
      "step": 146440
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0077812671661377,
      "learning_rate": 0.0001760422474893151,
      "loss": 3.2199,
      "step": 146441
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.516047954559326,
      "learning_rate": 0.00017603852245616096,
      "loss": 2.9688,
      "step": 146442
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5493178367614746,
      "learning_rate": 0.00017603479744605368,
      "loss": 3.014,
      "step": 146443
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.420074224472046,
      "learning_rate": 0.00017603107245899393,
      "loss": 2.8579,
      "step": 146444
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1526010036468506,
      "learning_rate": 0.0001760273474949822,
      "loss": 2.7262,
      "step": 146445
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5574522018432617,
      "learning_rate": 0.00017602362255401933,
      "loss": 2.9619,
      "step": 146446
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.595529317855835,
      "learning_rate": 0.00017601989763610607,
      "loss": 3.042,
      "step": 146447
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.021379232406616,
      "learning_rate": 0.000176016172741243,
      "loss": 3.2669,
      "step": 146448
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.049337863922119,
      "learning_rate": 0.00017601244786943088,
      "loss": 3.0806,
      "step": 146449
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.125953435897827,
      "learning_rate": 0.0001760087230206705,
      "loss": 2.5538,
      "step": 146450
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.212416410446167,
      "learning_rate": 0.00017600499819496232,
      "loss": 2.9904,
      "step": 146451
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1608290672302246,
      "learning_rate": 0.00017600127339230718,
      "loss": 2.8024,
      "step": 146452
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2400333881378174,
      "learning_rate": 0.0001759975486127057,
      "loss": 2.7862,
      "step": 146453
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.2531940937042236,
      "learning_rate": 0.00017599382385615863,
      "loss": 2.9232,
      "step": 146454
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.0544443130493164,
      "learning_rate": 0.00017599009912266667,
      "loss": 2.9728,
      "step": 146455
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3381590843200684,
      "learning_rate": 0.00017598637441223054,
      "loss": 2.9767,
      "step": 146456
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2728021144866943,
      "learning_rate": 0.0001759826497248509,
      "loss": 2.9419,
      "step": 146457
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.249497413635254,
      "learning_rate": 0.00017597892506052834,
      "loss": 3.0117,
      "step": 146458
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.518648386001587,
      "learning_rate": 0.00017597520041926363,
      "loss": 3.0273,
      "step": 146459
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.263850688934326,
      "learning_rate": 0.0001759714758010575,
      "loss": 2.9963,
      "step": 146460
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.3727269172668457,
      "learning_rate": 0.0001759677512059106,
      "loss": 2.9041,
      "step": 146461
    },
    {
      "epoch": 1.91,
      "grad_norm": 6.448642253875732,
      "learning_rate": 0.0001759640266338237,
      "loss": 2.7947,
      "step": 146462
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.556851863861084,
      "learning_rate": 0.00017596030208479738,
      "loss": 3.1156,
      "step": 146463
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6813294887542725,
      "learning_rate": 0.00017595657755883242,
      "loss": 2.8609,
      "step": 146464
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0798420906066895,
      "learning_rate": 0.00017595285305592944,
      "loss": 2.8857,
      "step": 146465
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.2513790130615234,
      "learning_rate": 0.00017594912857608913,
      "loss": 2.8303,
      "step": 146466
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6354005336761475,
      "learning_rate": 0.00017594540411931223,
      "loss": 2.9537,
      "step": 146467
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.551185131072998,
      "learning_rate": 0.00017594167968559943,
      "loss": 3.0884,
      "step": 146468
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.234825849533081,
      "learning_rate": 0.00017593795527495146,
      "loss": 3.2214,
      "step": 146469
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4672861099243164,
      "learning_rate": 0.00017593423088736892,
      "loss": 2.8585,
      "step": 146470
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2276575565338135,
      "learning_rate": 0.00017593050652285256,
      "loss": 3.127,
      "step": 146471
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.2930212020874023,
      "learning_rate": 0.00017592678218140308,
      "loss": 2.9234,
      "step": 146472
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9809398651123047,
      "learning_rate": 0.00017592305786302113,
      "loss": 2.9455,
      "step": 146473
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2556638717651367,
      "learning_rate": 0.00017591933356770744,
      "loss": 2.9462,
      "step": 146474
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4821817874908447,
      "learning_rate": 0.0001759156092954627,
      "loss": 2.9791,
      "step": 146475
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.7774875164031982,
      "learning_rate": 0.00017591188504628756,
      "loss": 2.9981,
      "step": 146476
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.129748821258545,
      "learning_rate": 0.00017590816082018276,
      "loss": 2.9564,
      "step": 146477
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.0008513927459717,
      "learning_rate": 0.00017590443661714893,
      "loss": 2.914,
      "step": 146478
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.161104679107666,
      "learning_rate": 0.0001759007124371869,
      "loss": 3.0324,
      "step": 146479
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3918704986572266,
      "learning_rate": 0.0001758969882802972,
      "loss": 2.9167,
      "step": 146480
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.862024307250977,
      "learning_rate": 0.00017589326414648064,
      "loss": 2.9914,
      "step": 146481
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.321194887161255,
      "learning_rate": 0.00017588954003573782,
      "loss": 3.0403,
      "step": 146482
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3488619327545166,
      "learning_rate": 0.00017588581594806947,
      "loss": 2.99,
      "step": 146483
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1332693099975586,
      "learning_rate": 0.0001758820918834763,
      "loss": 3.0345,
      "step": 146484
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.8329834938049316,
      "learning_rate": 0.000175878367841959,
      "loss": 3.0047,
      "step": 146485
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.8696789741516113,
      "learning_rate": 0.00017587464382351828,
      "loss": 2.772,
      "step": 146486
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.134620189666748,
      "learning_rate": 0.00017587091982815484,
      "loss": 3.1306,
      "step": 146487
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.903862476348877,
      "learning_rate": 0.00017586719585586927,
      "loss": 2.6183,
      "step": 146488
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.298038959503174,
      "learning_rate": 0.00017586347190666233,
      "loss": 2.8183,
      "step": 146489
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.577796459197998,
      "learning_rate": 0.0001758597479805347,
      "loss": 3.1042,
      "step": 146490
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.7811291217803955,
      "learning_rate": 0.0001758560240774871,
      "loss": 3.0687,
      "step": 146491
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.329237937927246,
      "learning_rate": 0.00017585230019752019,
      "loss": 3.0651,
      "step": 146492
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.71604061126709,
      "learning_rate": 0.00017584857634063485,
      "loss": 2.9488,
      "step": 146493
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.407269239425659,
      "learning_rate": 0.00017584485250683144,
      "loss": 2.8857,
      "step": 146494
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.121347427368164,
      "learning_rate": 0.00017584112869611086,
      "loss": 3.068,
      "step": 146495
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.385526180267334,
      "learning_rate": 0.0001758374049084737,
      "loss": 2.8636,
      "step": 146496
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.814502716064453,
      "learning_rate": 0.00017583368114392072,
      "loss": 2.9621,
      "step": 146497
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2841415405273438,
      "learning_rate": 0.00017582995740245266,
      "loss": 2.9387,
      "step": 146498
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.217681884765625,
      "learning_rate": 0.00017582623368407022,
      "loss": 3.0102,
      "step": 146499
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.245607614517212,
      "learning_rate": 0.0001758225099887739,
      "loss": 3.0708,
      "step": 146500
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3903000354766846,
      "learning_rate": 0.00017581878631656452,
      "loss": 2.669,
      "step": 146501
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3966064453125,
      "learning_rate": 0.0001758150626674428,
      "loss": 2.8897,
      "step": 146502
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0514023303985596,
      "learning_rate": 0.0001758113390414094,
      "loss": 3.1467,
      "step": 146503
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.732041597366333,
      "learning_rate": 0.00017580761543846502,
      "loss": 2.914,
      "step": 146504
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.782602310180664,
      "learning_rate": 0.00017580389185861047,
      "loss": 2.8208,
      "step": 146505
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1700546741485596,
      "learning_rate": 0.00017580016830184616,
      "loss": 2.8988,
      "step": 146506
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1738688945770264,
      "learning_rate": 0.000175796444768173,
      "loss": 2.8611,
      "step": 146507
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.636888265609741,
      "learning_rate": 0.00017579272125759157,
      "loss": 2.7722,
      "step": 146508
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.944188117980957,
      "learning_rate": 0.00017578899777010267,
      "loss": 3.0106,
      "step": 146509
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.246192455291748,
      "learning_rate": 0.00017578527430570692,
      "loss": 2.8069,
      "step": 146510
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.472806453704834,
      "learning_rate": 0.0001757815508644051,
      "loss": 3.0154,
      "step": 146511
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.695744752883911,
      "learning_rate": 0.0001757778274461978,
      "loss": 2.7952,
      "step": 146512
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.876786470413208,
      "learning_rate": 0.00017577410405108571,
      "loss": 2.996,
      "step": 146513
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.013686180114746,
      "learning_rate": 0.00017577038067906954,
      "loss": 3.032,
      "step": 146514
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2628064155578613,
      "learning_rate": 0.00017576665733015003,
      "loss": 3.2173,
      "step": 146515
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.7373087406158447,
      "learning_rate": 0.00017576293400432783,
      "loss": 3.1265,
      "step": 146516
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3448004722595215,
      "learning_rate": 0.00017575921070160376,
      "loss": 3.0719,
      "step": 146517
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.164020299911499,
      "learning_rate": 0.00017575548742197828,
      "loss": 2.9042,
      "step": 146518
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.1742606163024902,
      "learning_rate": 0.00017575176416545225,
      "loss": 2.781,
      "step": 146519
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.288707733154297,
      "learning_rate": 0.00017574804093202623,
      "loss": 2.8781,
      "step": 146520
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6011428833007812,
      "learning_rate": 0.00017574431772170106,
      "loss": 2.9928,
      "step": 146521
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.0309245586395264,
      "learning_rate": 0.00017574059453447734,
      "loss": 2.8437,
      "step": 146522
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2981481552124023,
      "learning_rate": 0.00017573687137035593,
      "loss": 2.8901,
      "step": 146523
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.707352876663208,
      "learning_rate": 0.00017573314822933724,
      "loss": 3.0024,
      "step": 146524
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.735002279281616,
      "learning_rate": 0.00017572942511142207,
      "loss": 2.7998,
      "step": 146525
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.921524524688721,
      "learning_rate": 0.00017572570201661119,
      "loss": 2.7182,
      "step": 146526
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.7922847270965576,
      "learning_rate": 0.00017572197894490522,
      "loss": 2.9102,
      "step": 146527
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.3122968673706055,
      "learning_rate": 0.0001757182558963049,
      "loss": 3.0499,
      "step": 146528
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.94765043258667,
      "learning_rate": 0.00017571453287081105,
      "loss": 3.2531,
      "step": 146529
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.728413105010986,
      "learning_rate": 0.00017571080986842407,
      "loss": 2.8988,
      "step": 146530
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.271975040435791,
      "learning_rate": 0.00017570708688914477,
      "loss": 2.8977,
      "step": 146531
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.997164726257324,
      "learning_rate": 0.0001757033639329739,
      "loss": 2.8645,
      "step": 146532
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0143001079559326,
      "learning_rate": 0.00017569964099991208,
      "loss": 2.9189,
      "step": 146533
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.367661714553833,
      "learning_rate": 0.0001756959180899601,
      "loss": 3.2086,
      "step": 146534
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.8408281803131104,
      "learning_rate": 0.00017569219520311856,
      "loss": 2.8747,
      "step": 146535
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.8504887819290161,
      "learning_rate": 0.00017568847233938835,
      "loss": 2.9501,
      "step": 146536
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.152423143386841,
      "learning_rate": 0.00017568474949876983,
      "loss": 3.1995,
      "step": 146537
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.9698880910873413,
      "learning_rate": 0.0001756810266812639,
      "loss": 2.9878,
      "step": 146538
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.142759323120117,
      "learning_rate": 0.00017567730388687116,
      "loss": 3.0829,
      "step": 146539
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5231494903564453,
      "learning_rate": 0.0001756735811155924,
      "loss": 2.8289,
      "step": 146540
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4424030780792236,
      "learning_rate": 0.00017566985836742826,
      "loss": 3.1132,
      "step": 146541
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.522056818008423,
      "learning_rate": 0.00017566613564237957,
      "loss": 2.9575,
      "step": 146542
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.18806529045105,
      "learning_rate": 0.00017566241294044674,
      "loss": 3.21,
      "step": 146543
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4513094425201416,
      "learning_rate": 0.00017565869026163063,
      "loss": 3.1059,
      "step": 146544
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0187489986419678,
      "learning_rate": 0.00017565496760593192,
      "loss": 2.8904,
      "step": 146545
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.189908981323242,
      "learning_rate": 0.0001756512449733513,
      "loss": 3.0155,
      "step": 146546
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.15871262550354,
      "learning_rate": 0.00017564752236388947,
      "loss": 2.9794,
      "step": 146547
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.583906888961792,
      "learning_rate": 0.00017564379977754718,
      "loss": 2.9121,
      "step": 146548
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0450682640075684,
      "learning_rate": 0.00017564007721432501,
      "loss": 3.0263,
      "step": 146549
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2712442874908447,
      "learning_rate": 0.00017563635467422365,
      "loss": 2.9024,
      "step": 146550
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1738805770874023,
      "learning_rate": 0.00017563263215724387,
      "loss": 3.0757,
      "step": 146551
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6800811290740967,
      "learning_rate": 0.0001756289096633863,
      "loss": 2.8408,
      "step": 146552
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.3201136589050293,
      "learning_rate": 0.0001756251871926517,
      "loss": 3.2069,
      "step": 146553
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.1963002681732178,
      "learning_rate": 0.00017562146474504076,
      "loss": 2.8048,
      "step": 146554
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.089874744415283,
      "learning_rate": 0.0001756177423205541,
      "loss": 3.147,
      "step": 146555
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.7282333374023438,
      "learning_rate": 0.00017561401991919248,
      "loss": 3.0689,
      "step": 146556
    },
    {
      "epoch": 1.91,
      "grad_norm": 6.479975700378418,
      "learning_rate": 0.0001756102975409565,
      "loss": 2.9812,
      "step": 146557
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9924166202545166,
      "learning_rate": 0.00017560657518584695,
      "loss": 2.9209,
      "step": 146558
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1428232192993164,
      "learning_rate": 0.00017560285285386445,
      "loss": 2.9273,
      "step": 146559
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.4250760078430176,
      "learning_rate": 0.0001755991305450098,
      "loss": 3.1317,
      "step": 146560
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.128689765930176,
      "learning_rate": 0.00017559540825928357,
      "loss": 2.8657,
      "step": 146561
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.327179193496704,
      "learning_rate": 0.0001755916859966865,
      "loss": 2.8713,
      "step": 146562
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.9200515747070312,
      "learning_rate": 0.00017558796375721924,
      "loss": 2.9929,
      "step": 146563
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1620161533355713,
      "learning_rate": 0.00017558424154088262,
      "loss": 3.0191,
      "step": 146564
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.0387790203094482,
      "learning_rate": 0.0001755805193476772,
      "loss": 2.6217,
      "step": 146565
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6518008708953857,
      "learning_rate": 0.00017557679717760373,
      "loss": 2.9357,
      "step": 146566
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.353891134262085,
      "learning_rate": 0.00017557307503066286,
      "loss": 3.1445,
      "step": 146567
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.065929412841797,
      "learning_rate": 0.0001755693529068553,
      "loss": 2.9519,
      "step": 146568
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3739800453186035,
      "learning_rate": 0.00017556563080618168,
      "loss": 3.1731,
      "step": 146569
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.356788396835327,
      "learning_rate": 0.00017556190872864284,
      "loss": 3.1777,
      "step": 146570
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3028082847595215,
      "learning_rate": 0.0001755581866742394,
      "loss": 3.1078,
      "step": 146571
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3030121326446533,
      "learning_rate": 0.0001755544646429721,
      "loss": 3.163,
      "step": 146572
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.198904275894165,
      "learning_rate": 0.00017555074263484146,
      "loss": 3.0952,
      "step": 146573
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.6517271995544434,
      "learning_rate": 0.0001755470206498483,
      "loss": 2.9646,
      "step": 146574
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.323396921157837,
      "learning_rate": 0.00017554329868799332,
      "loss": 2.7554,
      "step": 146575
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2084481716156006,
      "learning_rate": 0.00017553957674927716,
      "loss": 3.1172,
      "step": 146576
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.253138780593872,
      "learning_rate": 0.00017553585483370056,
      "loss": 3.0698,
      "step": 146577
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1552789211273193,
      "learning_rate": 0.0001755321329412643,
      "loss": 2.9593,
      "step": 146578
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.9789760112762451,
      "learning_rate": 0.00017552841107196885,
      "loss": 2.8201,
      "step": 146579
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1566712856292725,
      "learning_rate": 0.00017552468922581502,
      "loss": 2.8799,
      "step": 146580
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5176007747650146,
      "learning_rate": 0.0001755209674028035,
      "loss": 3.0746,
      "step": 146581
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9873082637786865,
      "learning_rate": 0.000175517245602935,
      "loss": 3.1055,
      "step": 146582
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.607215166091919,
      "learning_rate": 0.00017551352382621016,
      "loss": 3.1123,
      "step": 146583
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5848002433776855,
      "learning_rate": 0.00017550980207262985,
      "loss": 2.6882,
      "step": 146584
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.0116257667541504,
      "learning_rate": 0.0001755060803421945,
      "loss": 2.8034,
      "step": 146585
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4593327045440674,
      "learning_rate": 0.00017550235863490488,
      "loss": 2.8655,
      "step": 146586
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1543540954589844,
      "learning_rate": 0.00017549863695076175,
      "loss": 3.0078,
      "step": 146587
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.5174453258514404,
      "learning_rate": 0.00017549491528976577,
      "loss": 3.009,
      "step": 146588
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.338141679763794,
      "learning_rate": 0.0001754911936519177,
      "loss": 2.9772,
      "step": 146589
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.810560703277588,
      "learning_rate": 0.0001754874720372182,
      "loss": 2.941,
      "step": 146590
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2126035690307617,
      "learning_rate": 0.0001754837504456678,
      "loss": 3.1076,
      "step": 146591
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.124800682067871,
      "learning_rate": 0.00017548002887726738,
      "loss": 3.0233,
      "step": 146592
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.624889850616455,
      "learning_rate": 0.00017547630733201754,
      "loss": 3.048,
      "step": 146593
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.978318929672241,
      "learning_rate": 0.00017547258580991902,
      "loss": 3.0405,
      "step": 146594
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.5031075477600098,
      "learning_rate": 0.00017546886431097248,
      "loss": 2.8219,
      "step": 146595
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0550689697265625,
      "learning_rate": 0.00017546514283517866,
      "loss": 2.6988,
      "step": 146596
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.636995553970337,
      "learning_rate": 0.0001754614213825383,
      "loss": 2.7982,
      "step": 146597
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.238664150238037,
      "learning_rate": 0.00017545769995305194,
      "loss": 3.139,
      "step": 146598
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.899878978729248,
      "learning_rate": 0.00017545397854672033,
      "loss": 3.0076,
      "step": 146599
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5022401809692383,
      "learning_rate": 0.00017545025716354419,
      "loss": 2.9093,
      "step": 146600
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.139827013015747,
      "learning_rate": 0.00017544653580352416,
      "loss": 2.8792,
      "step": 146601
    },
    {
      "epoch": 1.91,
      "grad_norm": 6.825987815856934,
      "learning_rate": 0.00017544281446666096,
      "loss": 2.973,
      "step": 146602
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.1155970096588135,
      "learning_rate": 0.00017543909315295545,
      "loss": 2.852,
      "step": 146603
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9990384578704834,
      "learning_rate": 0.00017543537186240802,
      "loss": 2.8459,
      "step": 146604
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.110730171203613,
      "learning_rate": 0.00017543165059501952,
      "loss": 2.9128,
      "step": 146605
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.187453508377075,
      "learning_rate": 0.00017542792935079063,
      "loss": 2.996,
      "step": 146606
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.971726417541504,
      "learning_rate": 0.00017542420812972204,
      "loss": 3.0361,
      "step": 146607
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.8446686267852783,
      "learning_rate": 0.00017542048693181445,
      "loss": 2.7051,
      "step": 146608
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.8054065704345703,
      "learning_rate": 0.00017541676575706864,
      "loss": 3.238,
      "step": 146609
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.442586660385132,
      "learning_rate": 0.00017541304460548507,
      "loss": 3.135,
      "step": 146610
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.0199785232543945,
      "learning_rate": 0.00017540932347706457,
      "loss": 3.0978,
      "step": 146611
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.087334394454956,
      "learning_rate": 0.00017540560237180788,
      "loss": 3.1114,
      "step": 146612
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.680896520614624,
      "learning_rate": 0.0001754018812897156,
      "loss": 2.9469,
      "step": 146613
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.8242619037628174,
      "learning_rate": 0.00017539816023078847,
      "loss": 2.8831,
      "step": 146614
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.888380527496338,
      "learning_rate": 0.0001753944391950273,
      "loss": 2.9392,
      "step": 146615
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.3630459308624268,
      "learning_rate": 0.0001753907181824325,
      "loss": 3.0136,
      "step": 146616
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.383798599243164,
      "learning_rate": 0.000175386997193005,
      "loss": 3.0096,
      "step": 146617
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.8862192630767822,
      "learning_rate": 0.00017538327622674535,
      "loss": 3.0151,
      "step": 146618
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4193193912506104,
      "learning_rate": 0.0001753795552836543,
      "loss": 2.8354,
      "step": 146619
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2769076824188232,
      "learning_rate": 0.00017537583436373256,
      "loss": 2.846,
      "step": 146620
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.845423698425293,
      "learning_rate": 0.0001753721134669809,
      "loss": 3.0183,
      "step": 146621
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.125925302505493,
      "learning_rate": 0.00017536839259339984,
      "loss": 2.8323,
      "step": 146622
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1251373291015625,
      "learning_rate": 0.00017536467174299013,
      "loss": 3.0717,
      "step": 146623
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3565165996551514,
      "learning_rate": 0.00017536095091575247,
      "loss": 2.8718,
      "step": 146624
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2814126014709473,
      "learning_rate": 0.00017535723011168755,
      "loss": 3.0276,
      "step": 146625
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3956339359283447,
      "learning_rate": 0.00017535350933079607,
      "loss": 2.9815,
      "step": 146626
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.273005962371826,
      "learning_rate": 0.0001753497885730789,
      "loss": 2.9385,
      "step": 146627
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6144421100616455,
      "learning_rate": 0.00017534606783853635,
      "loss": 2.8558,
      "step": 146628
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.8449915647506714,
      "learning_rate": 0.00017534234712716942,
      "loss": 2.994,
      "step": 146629
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.125898599624634,
      "learning_rate": 0.0001753386264389786,
      "loss": 2.9547,
      "step": 146630
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2507190704345703,
      "learning_rate": 0.00017533490577396476,
      "loss": 2.8412,
      "step": 146631
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1405675411224365,
      "learning_rate": 0.0001753311851321285,
      "loss": 3.0955,
      "step": 146632
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.439101457595825,
      "learning_rate": 0.00017532746451347057,
      "loss": 2.8153,
      "step": 146633
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.8501946926116943,
      "learning_rate": 0.00017532374391799162,
      "loss": 2.7392,
      "step": 146634
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.267857313156128,
      "learning_rate": 0.00017532002334569226,
      "loss": 2.9653,
      "step": 146635
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.949540615081787,
      "learning_rate": 0.00017531630279657328,
      "loss": 2.9208,
      "step": 146636
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6312828063964844,
      "learning_rate": 0.00017531258227063539,
      "loss": 2.9833,
      "step": 146637
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.311379909515381,
      "learning_rate": 0.00017530886176787917,
      "loss": 3.2076,
      "step": 146638
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5469210147857666,
      "learning_rate": 0.0001753051412883055,
      "loss": 3.1503,
      "step": 146639
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0541844367980957,
      "learning_rate": 0.00017530142083191488,
      "loss": 2.9911,
      "step": 146640
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6144723892211914,
      "learning_rate": 0.00017529770039870815,
      "loss": 2.9889,
      "step": 146641
    },
    {
      "epoch": 1.91,
      "grad_norm": 5.004838466644287,
      "learning_rate": 0.00017529397998868588,
      "loss": 2.8488,
      "step": 146642
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.5508687496185303,
      "learning_rate": 0.0001752902596018488,
      "loss": 3.0263,
      "step": 146643
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1246337890625,
      "learning_rate": 0.0001752865392381976,
      "loss": 2.996,
      "step": 146644
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.362050771713257,
      "learning_rate": 0.00017528281889773304,
      "loss": 2.9415,
      "step": 146645
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3914263248443604,
      "learning_rate": 0.00017527909858045573,
      "loss": 3.1038,
      "step": 146646
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.546438694000244,
      "learning_rate": 0.0001752753782863664,
      "loss": 2.9872,
      "step": 146647
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1932384967803955,
      "learning_rate": 0.00017527165801546572,
      "loss": 2.8751,
      "step": 146648
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3800368309020996,
      "learning_rate": 0.00017526793776775443,
      "loss": 2.9299,
      "step": 146649
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.179136276245117,
      "learning_rate": 0.0001752642175432331,
      "loss": 3.1584,
      "step": 146650
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9730584621429443,
      "learning_rate": 0.0001752604973419026,
      "loss": 2.9124,
      "step": 146651
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.911752462387085,
      "learning_rate": 0.00017525677716376348,
      "loss": 2.7642,
      "step": 146652
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.41516375541687,
      "learning_rate": 0.0001752530570088165,
      "loss": 2.993,
      "step": 146653
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2582411766052246,
      "learning_rate": 0.00017524933687706229,
      "loss": 2.8259,
      "step": 146654
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.202305316925049,
      "learning_rate": 0.00017524561676850165,
      "loss": 2.8653,
      "step": 146655
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.29577898979187,
      "learning_rate": 0.00017524189668313516,
      "loss": 2.8296,
      "step": 146656
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.209959030151367,
      "learning_rate": 0.00017523817662096357,
      "loss": 2.8806,
      "step": 146657
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.393625497817993,
      "learning_rate": 0.00017523445658198757,
      "loss": 2.7945,
      "step": 146658
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.013554334640503,
      "learning_rate": 0.0001752307365662078,
      "loss": 2.8069,
      "step": 146659
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.155336856842041,
      "learning_rate": 0.00017522701657362498,
      "loss": 3.0473,
      "step": 146660
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3232126235961914,
      "learning_rate": 0.00017522329660423985,
      "loss": 3.053,
      "step": 146661
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.633802890777588,
      "learning_rate": 0.00017521957665805305,
      "loss": 2.9662,
      "step": 146662
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.7130954265594482,
      "learning_rate": 0.00017521585673506532,
      "loss": 3.0118,
      "step": 146663
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.7060532569885254,
      "learning_rate": 0.00017521213683527732,
      "loss": 2.9906,
      "step": 146664
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.894864320755005,
      "learning_rate": 0.0001752084169586897,
      "loss": 3.1422,
      "step": 146665
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.344486951828003,
      "learning_rate": 0.00017520469710530318,
      "loss": 3.0726,
      "step": 146666
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.800990343093872,
      "learning_rate": 0.0001752009772751185,
      "loss": 3.1064,
      "step": 146667
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5335519313812256,
      "learning_rate": 0.00017519725746813626,
      "loss": 2.8949,
      "step": 146668
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4106967449188232,
      "learning_rate": 0.00017519353768435727,
      "loss": 3.1046,
      "step": 146669
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5873680114746094,
      "learning_rate": 0.00017518981792378224,
      "loss": 2.9127,
      "step": 146670
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.706613302230835,
      "learning_rate": 0.00017518609818641164,
      "loss": 2.9187,
      "step": 146671
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.129009246826172,
      "learning_rate": 0.0001751823784722463,
      "loss": 3.0285,
      "step": 146672
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.426528215408325,
      "learning_rate": 0.0001751786587812869,
      "loss": 2.8669,
      "step": 146673
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5772624015808105,
      "learning_rate": 0.0001751749391135342,
      "loss": 2.9191,
      "step": 146674
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.293111562728882,
      "learning_rate": 0.0001751712194689888,
      "loss": 3.1811,
      "step": 146675
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2484099864959717,
      "learning_rate": 0.00017516749984765155,
      "loss": 3.1752,
      "step": 146676
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.72125244140625,
      "learning_rate": 0.0001751637802495229,
      "loss": 3.1256,
      "step": 146677
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.060939073562622,
      "learning_rate": 0.00017516006067460368,
      "loss": 2.881,
      "step": 146678
    },
    {
      "epoch": 1.91,
      "grad_norm": 5.634241104125977,
      "learning_rate": 0.00017515634112289454,
      "loss": 3.1353,
      "step": 146679
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4213168621063232,
      "learning_rate": 0.0001751526215943962,
      "loss": 2.7984,
      "step": 146680
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0048582553863525,
      "learning_rate": 0.0001751489020891094,
      "loss": 2.8228,
      "step": 146681
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.046722173690796,
      "learning_rate": 0.00017514518260703478,
      "loss": 2.8989,
      "step": 146682
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.4063258171081543,
      "learning_rate": 0.00017514146314817302,
      "loss": 3.0126,
      "step": 146683
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.64755916595459,
      "learning_rate": 0.00017513774371252468,
      "loss": 2.8322,
      "step": 146684
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.109936237335205,
      "learning_rate": 0.00017513402430009074,
      "loss": 2.9558,
      "step": 146685
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4842519760131836,
      "learning_rate": 0.00017513030491087163,
      "loss": 2.9743,
      "step": 146686
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6540729999542236,
      "learning_rate": 0.00017512658554486824,
      "loss": 2.9226,
      "step": 146687
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1443564891815186,
      "learning_rate": 0.00017512286620208122,
      "loss": 3.2345,
      "step": 146688
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6695666313171387,
      "learning_rate": 0.00017511914688251104,
      "loss": 2.9734,
      "step": 146689
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3262813091278076,
      "learning_rate": 0.00017511542758615872,
      "loss": 2.9463,
      "step": 146690
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.372232437133789,
      "learning_rate": 0.00017511170831302478,
      "loss": 2.8757,
      "step": 146691
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.797933340072632,
      "learning_rate": 0.00017510798906310987,
      "loss": 2.9622,
      "step": 146692
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5515875816345215,
      "learning_rate": 0.00017510426983641478,
      "loss": 3.0268,
      "step": 146693
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.806465983390808,
      "learning_rate": 0.00017510055063294023,
      "loss": 2.8984,
      "step": 146694
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.151217222213745,
      "learning_rate": 0.0001750968314526867,
      "loss": 3.2795,
      "step": 146695
    },
    {
      "epoch": 1.91,
      "grad_norm": 5.110297203063965,
      "learning_rate": 0.00017509311229565514,
      "loss": 2.82,
      "step": 146696
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0447587966918945,
      "learning_rate": 0.00017508939316184603,
      "loss": 3.258,
      "step": 146697
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.62512469291687,
      "learning_rate": 0.00017508567405126026,
      "loss": 3.0287,
      "step": 146698
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.8992080688476562,
      "learning_rate": 0.00017508195496389838,
      "loss": 3.1757,
      "step": 146699
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1588730812072754,
      "learning_rate": 0.0001750782358997612,
      "loss": 2.9342,
      "step": 146700
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.245887279510498,
      "learning_rate": 0.00017507451685884917,
      "loss": 2.9193,
      "step": 146701
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.811333656311035,
      "learning_rate": 0.0001750707978411633,
      "loss": 3.0284,
      "step": 146702
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.274629592895508,
      "learning_rate": 0.00017506707884670396,
      "loss": 3.1629,
      "step": 146703
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5436065196990967,
      "learning_rate": 0.00017506335987547216,
      "loss": 2.7876,
      "step": 146704
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.144944190979004,
      "learning_rate": 0.00017505964092746844,
      "loss": 2.9212,
      "step": 146705
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.35837721824646,
      "learning_rate": 0.00017505592200269346,
      "loss": 2.9976,
      "step": 146706
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1845474243164062,
      "learning_rate": 0.00017505220310114788,
      "loss": 2.795,
      "step": 146707
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0001919269561768,
      "learning_rate": 0.0001750484842228325,
      "loss": 2.7337,
      "step": 146708
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1511173248291016,
      "learning_rate": 0.0001750447653677479,
      "loss": 3.0355,
      "step": 146709
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.0423669815063477,
      "learning_rate": 0.00017504104653589492,
      "loss": 2.8505,
      "step": 146710
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.089320421218872,
      "learning_rate": 0.0001750373277272741,
      "loss": 2.6104,
      "step": 146711
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.9867736101150513,
      "learning_rate": 0.0001750336089418864,
      "loss": 2.7936,
      "step": 146712
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.998537302017212,
      "learning_rate": 0.00017502989017973209,
      "loss": 2.988,
      "step": 146713
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.929075241088867,
      "learning_rate": 0.00017502617144081218,
      "loss": 2.9558,
      "step": 146714
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.042558431625366,
      "learning_rate": 0.00017502245272512715,
      "loss": 2.7991,
      "step": 146715
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.026806116104126,
      "learning_rate": 0.0001750187340326779,
      "loss": 2.9029,
      "step": 146716
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.338151454925537,
      "learning_rate": 0.00017501501536346497,
      "loss": 3.336,
      "step": 146717
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.7934215068817139,
      "learning_rate": 0.00017501129671748935,
      "loss": 2.9182,
      "step": 146718
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6402299404144287,
      "learning_rate": 0.00017500757809475121,
      "loss": 2.9384,
      "step": 146719
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.477332592010498,
      "learning_rate": 0.00017500385949525163,
      "loss": 3.2043,
      "step": 146720
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4922401905059814,
      "learning_rate": 0.0001750001409189911,
      "loss": 2.9965,
      "step": 146721
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0483508110046387,
      "learning_rate": 0.0001749964223659705,
      "loss": 3.1332,
      "step": 146722
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.737992525100708,
      "learning_rate": 0.00017499270383619032,
      "loss": 2.8864,
      "step": 146723
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.025322198867798,
      "learning_rate": 0.00017498898532965158,
      "loss": 2.7839,
      "step": 146724
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2793374061584473,
      "learning_rate": 0.00017498526684635454,
      "loss": 3.055,
      "step": 146725
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1645939350128174,
      "learning_rate": 0.0001749815483863002,
      "loss": 3.2059,
      "step": 146726
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.9968178272247314,
      "learning_rate": 0.000174977829949489,
      "loss": 3.1538,
      "step": 146727
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.255963087081909,
      "learning_rate": 0.00017497411153592196,
      "loss": 2.8974,
      "step": 146728
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.266073703765869,
      "learning_rate": 0.00017497039314559945,
      "loss": 2.9389,
      "step": 146729
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2788164615631104,
      "learning_rate": 0.0001749666747785224,
      "loss": 3.0736,
      "step": 146730
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3333425521850586,
      "learning_rate": 0.00017496295643469142,
      "loss": 2.9477,
      "step": 146731
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.607219696044922,
      "learning_rate": 0.00017495923811410718,
      "loss": 2.8896,
      "step": 146732
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3228676319122314,
      "learning_rate": 0.00017495551981677027,
      "loss": 3.123,
      "step": 146733
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3720133304595947,
      "learning_rate": 0.0001749518015426816,
      "loss": 3.0672,
      "step": 146734
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.0966432094573975,
      "learning_rate": 0.00017494808329184165,
      "loss": 2.7224,
      "step": 146735
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2442705631256104,
      "learning_rate": 0.0001749443650642513,
      "loss": 2.8974,
      "step": 146736
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.8912277221679688,
      "learning_rate": 0.00017494064685991118,
      "loss": 2.9038,
      "step": 146737
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.24469256401062,
      "learning_rate": 0.00017493692867882196,
      "loss": 2.8674,
      "step": 146738
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.160619020462036,
      "learning_rate": 0.00017493321052098416,
      "loss": 3.0954,
      "step": 146739
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.082101821899414,
      "learning_rate": 0.00017492949238639882,
      "loss": 3.0226,
      "step": 146740
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6411848068237305,
      "learning_rate": 0.00017492577427506627,
      "loss": 3.1039,
      "step": 146741
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.7318172454833984,
      "learning_rate": 0.0001749220561869875,
      "loss": 2.961,
      "step": 146742
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.431270122528076,
      "learning_rate": 0.00017491833812216317,
      "loss": 3.0459,
      "step": 146743
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.0232460498809814,
      "learning_rate": 0.0001749146200805938,
      "loss": 2.8262,
      "step": 146744
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.744939088821411,
      "learning_rate": 0.00017491090206228003,
      "loss": 2.7284,
      "step": 146745
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.182128667831421,
      "learning_rate": 0.0001749071840672229,
      "loss": 2.7564,
      "step": 146746
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4210009574890137,
      "learning_rate": 0.00017490346609542266,
      "loss": 2.9048,
      "step": 146747
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5189208984375,
      "learning_rate": 0.00017489974814688042,
      "loss": 3.2036,
      "step": 146748
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.8289735317230225,
      "learning_rate": 0.00017489603022159664,
      "loss": 2.8416,
      "step": 146749
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.226691484451294,
      "learning_rate": 0.00017489231231957207,
      "loss": 2.9159,
      "step": 146750
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.0066282749176025,
      "learning_rate": 0.0001748885944408072,
      "loss": 2.835,
      "step": 146751
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.565992832183838,
      "learning_rate": 0.00017488487658530308,
      "loss": 2.82,
      "step": 146752
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.222628355026245,
      "learning_rate": 0.0001748811587530601,
      "loss": 2.8715,
      "step": 146753
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.327462911605835,
      "learning_rate": 0.0001748774409440792,
      "loss": 2.8288,
      "step": 146754
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4039416313171387,
      "learning_rate": 0.00017487372315836092,
      "loss": 3.1151,
      "step": 146755
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.913008213043213,
      "learning_rate": 0.000174870005395906,
      "loss": 2.9332,
      "step": 146756
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.4910519123077393,
      "learning_rate": 0.00017486628765671497,
      "loss": 2.7661,
      "step": 146757
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.612666130065918,
      "learning_rate": 0.00017486256994078879,
      "loss": 2.8942,
      "step": 146758
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.578181505203247,
      "learning_rate": 0.0001748588522481279,
      "loss": 2.9763,
      "step": 146759
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0672616958618164,
      "learning_rate": 0.00017485513457873324,
      "loss": 3.0732,
      "step": 146760
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6174869537353516,
      "learning_rate": 0.00017485141693260537,
      "loss": 2.8532,
      "step": 146761
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.620558977127075,
      "learning_rate": 0.00017484769930974493,
      "loss": 2.8707,
      "step": 146762
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.333374261856079,
      "learning_rate": 0.00017484398171015264,
      "loss": 2.942,
      "step": 146763
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.0375092029571533,
      "learning_rate": 0.00017484026413382926,
      "loss": 2.9258,
      "step": 146764
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.160621166229248,
      "learning_rate": 0.00017483654658077531,
      "loss": 3.1362,
      "step": 146765
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.143275737762451,
      "learning_rate": 0.0001748328290509918,
      "loss": 2.676,
      "step": 146766
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.867810010910034,
      "learning_rate": 0.00017482911154447918,
      "loss": 2.9165,
      "step": 146767
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.0284295082092285,
      "learning_rate": 0.0001748253940612382,
      "loss": 2.7289,
      "step": 146768
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.30342960357666,
      "learning_rate": 0.00017482167660126948,
      "loss": 2.8082,
      "step": 146769
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.656555414199829,
      "learning_rate": 0.0001748179591645738,
      "loss": 3.1967,
      "step": 146770
    },
    {
      "epoch": 1.91,
      "grad_norm": 5.864102840423584,
      "learning_rate": 0.00017481424175115173,
      "loss": 3.0019,
      "step": 146771
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.369626760482788,
      "learning_rate": 0.0001748105243610042,
      "loss": 3.0981,
      "step": 146772
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.724586009979248,
      "learning_rate": 0.00017480680699413174,
      "loss": 3.0373,
      "step": 146773
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.317627191543579,
      "learning_rate": 0.00017480308965053492,
      "loss": 2.9603,
      "step": 146774
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.135857105255127,
      "learning_rate": 0.00017479937233021476,
      "loss": 2.9341,
      "step": 146775
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.3675551414489746,
      "learning_rate": 0.00017479565503317172,
      "loss": 2.942,
      "step": 146776
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.279897689819336,
      "learning_rate": 0.00017479193775940638,
      "loss": 2.7957,
      "step": 146777
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.7614914178848267,
      "learning_rate": 0.00017478822050891974,
      "loss": 2.8267,
      "step": 146778
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.415426254272461,
      "learning_rate": 0.00017478450328171232,
      "loss": 2.5692,
      "step": 146779
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.014871835708618,
      "learning_rate": 0.0001747807860777847,
      "loss": 2.9134,
      "step": 146780
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2260608673095703,
      "learning_rate": 0.00017477706889713782,
      "loss": 2.8815,
      "step": 146781
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2497212886810303,
      "learning_rate": 0.00017477335173977224,
      "loss": 2.7368,
      "step": 146782
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.999556303024292,
      "learning_rate": 0.00017476963460568855,
      "loss": 3.2316,
      "step": 146783
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2918546199798584,
      "learning_rate": 0.00017476591749488767,
      "loss": 2.889,
      "step": 146784
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5180399417877197,
      "learning_rate": 0.00017476220040737016,
      "loss": 3.0539,
      "step": 146785
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.561791181564331,
      "learning_rate": 0.0001747584833431366,
      "loss": 2.9122,
      "step": 146786
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.0300915241241455,
      "learning_rate": 0.0001747547663021879,
      "loss": 2.9176,
      "step": 146787
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.56831431388855,
      "learning_rate": 0.0001747510492845246,
      "loss": 2.9871,
      "step": 146788
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.305690050125122,
      "learning_rate": 0.0001747473322901475,
      "loss": 2.7928,
      "step": 146789
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.45001220703125,
      "learning_rate": 0.00017474361531905725,
      "loss": 2.8126,
      "step": 146790
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.422374963760376,
      "learning_rate": 0.00017473989837125457,
      "loss": 2.9488,
      "step": 146791
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.9956544637680054,
      "learning_rate": 0.00017473618144673992,
      "loss": 2.966,
      "step": 146792
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1551365852355957,
      "learning_rate": 0.00017473246454551434,
      "loss": 2.8484,
      "step": 146793
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9605331420898438,
      "learning_rate": 0.00017472874766757824,
      "loss": 2.9917,
      "step": 146794
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.708041191101074,
      "learning_rate": 0.00017472503081293253,
      "loss": 2.5604,
      "step": 146795
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.7478322982788086,
      "learning_rate": 0.00017472131398157766,
      "loss": 3.0501,
      "step": 146796
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.571906328201294,
      "learning_rate": 0.00017471759717351465,
      "loss": 3.0118,
      "step": 146797
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1709213256835938,
      "learning_rate": 0.00017471388038874398,
      "loss": 3.0183,
      "step": 146798
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2345809936523438,
      "learning_rate": 0.00017471016362726632,
      "loss": 2.9779,
      "step": 146799
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1659903526306152,
      "learning_rate": 0.00017470644688908233,
      "loss": 2.9228,
      "step": 146800
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.9255521297454834,
      "learning_rate": 0.0001747027301741929,
      "loss": 2.8486,
      "step": 146801
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.65132212638855,
      "learning_rate": 0.00017469901348259845,
      "loss": 2.8492,
      "step": 146802
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2436130046844482,
      "learning_rate": 0.00017469529681429997,
      "loss": 3.1445,
      "step": 146803
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.7905821800231934,
      "learning_rate": 0.00017469158016929798,
      "loss": 2.8943,
      "step": 146804
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.163860321044922,
      "learning_rate": 0.00017468786354759316,
      "loss": 2.708,
      "step": 146805
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3519866466522217,
      "learning_rate": 0.00017468414694918617,
      "loss": 2.7929,
      "step": 146806
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3199682235717773,
      "learning_rate": 0.00017468043037407785,
      "loss": 2.7908,
      "step": 146807
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2635035514831543,
      "learning_rate": 0.00017467671382226867,
      "loss": 2.7896,
      "step": 146808
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2944741249084473,
      "learning_rate": 0.00017467299729375958,
      "loss": 2.8648,
      "step": 146809
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.728236675262451,
      "learning_rate": 0.0001746692807885512,
      "loss": 3.1814,
      "step": 146810
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.12257719039917,
      "learning_rate": 0.00017466556430664412,
      "loss": 2.9357,
      "step": 146811
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0084071159362793,
      "learning_rate": 0.00017466184784803896,
      "loss": 2.9188,
      "step": 146812
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5319864749908447,
      "learning_rate": 0.00017465813141273666,
      "loss": 3.2682,
      "step": 146813
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0025620460510254,
      "learning_rate": 0.00017465441500073763,
      "loss": 3.1399,
      "step": 146814
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.8949054479599,
      "learning_rate": 0.00017465069861204288,
      "loss": 2.8294,
      "step": 146815
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5852601528167725,
      "learning_rate": 0.0001746469822466529,
      "loss": 3.0677,
      "step": 146816
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1305840015411377,
      "learning_rate": 0.0001746432659045684,
      "loss": 2.8469,
      "step": 146817
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2535829544067383,
      "learning_rate": 0.00017463954958578997,
      "loss": 2.9973,
      "step": 146818
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.2436771392822266,
      "learning_rate": 0.00017463583329031855,
      "loss": 2.9841,
      "step": 146819
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.093493700027466,
      "learning_rate": 0.00017463211701815458,
      "loss": 3.128,
      "step": 146820
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.389491319656372,
      "learning_rate": 0.00017462840076929893,
      "loss": 2.744,
      "step": 146821
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.158337354660034,
      "learning_rate": 0.00017462468454375232,
      "loss": 2.892,
      "step": 146822
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2868568897247314,
      "learning_rate": 0.00017462096834151527,
      "loss": 3.0396,
      "step": 146823
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4690194129943848,
      "learning_rate": 0.00017461725216258846,
      "loss": 2.9102,
      "step": 146824
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0764644145965576,
      "learning_rate": 0.0001746135360069728,
      "loss": 3.2245,
      "step": 146825
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.255478620529175,
      "learning_rate": 0.0001746098198746687,
      "loss": 3.0117,
      "step": 146826
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1138675212860107,
      "learning_rate": 0.00017460610376567715,
      "loss": 2.9525,
      "step": 146827
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.359778881072998,
      "learning_rate": 0.00017460238767999866,
      "loss": 2.9394,
      "step": 146828
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5823628902435303,
      "learning_rate": 0.00017459867161763397,
      "loss": 2.869,
      "step": 146829
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.8536856174468994,
      "learning_rate": 0.00017459495557858366,
      "loss": 2.8515,
      "step": 146830
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.181309938430786,
      "learning_rate": 0.0001745912395628486,
      "loss": 3.0747,
      "step": 146831
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.510564088821411,
      "learning_rate": 0.0001745875235704293,
      "loss": 2.9132,
      "step": 146832
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4669175148010254,
      "learning_rate": 0.00017458380760132665,
      "loss": 2.7135,
      "step": 146833
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.043328046798706,
      "learning_rate": 0.00017458009165554123,
      "loss": 2.9823,
      "step": 146834
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.7166941165924072,
      "learning_rate": 0.00017457637573307377,
      "loss": 2.8355,
      "step": 146835
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1588428020477295,
      "learning_rate": 0.00017457265983392478,
      "loss": 3.1254,
      "step": 146836
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.600527763366699,
      "learning_rate": 0.00017456894395809523,
      "loss": 2.9038,
      "step": 146837
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.080812454223633,
      "learning_rate": 0.00017456522810558556,
      "loss": 3.1399,
      "step": 146838
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.140958547592163,
      "learning_rate": 0.00017456151227639673,
      "loss": 3.2318,
      "step": 146839
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.497271776199341,
      "learning_rate": 0.00017455779647052925,
      "loss": 2.6977,
      "step": 146840
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.895994782447815,
      "learning_rate": 0.00017455408068798382,
      "loss": 2.9632,
      "step": 146841
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.9658353328704834,
      "learning_rate": 0.00017455036492876108,
      "loss": 3.0877,
      "step": 146842
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.065549850463867,
      "learning_rate": 0.0001745466491928619,
      "loss": 2.8869,
      "step": 146843
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.604586601257324,
      "learning_rate": 0.00017454293348028677,
      "loss": 2.9545,
      "step": 146844
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.7048144340515137,
      "learning_rate": 0.00017453921779103654,
      "loss": 3.0508,
      "step": 146845
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.596102476119995,
      "learning_rate": 0.00017453550212511186,
      "loss": 3.231,
      "step": 146846
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.330482244491577,
      "learning_rate": 0.00017453178648251343,
      "loss": 3.0808,
      "step": 146847
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1221208572387695,
      "learning_rate": 0.00017452807086324175,
      "loss": 2.9932,
      "step": 146848
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3797736167907715,
      "learning_rate": 0.00017452435526729782,
      "loss": 2.8418,
      "step": 146849
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.471458673477173,
      "learning_rate": 0.00017452063969468205,
      "loss": 3.0272,
      "step": 146850
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.122391939163208,
      "learning_rate": 0.0001745169241453954,
      "loss": 3.0352,
      "step": 146851
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9230470657348633,
      "learning_rate": 0.0001745132086194384,
      "loss": 3.0376,
      "step": 146852
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4872164726257324,
      "learning_rate": 0.00017450949311681175,
      "loss": 2.9879,
      "step": 146853
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.143296241760254,
      "learning_rate": 0.00017450577763751602,
      "loss": 2.7883,
      "step": 146854
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.448780059814453,
      "learning_rate": 0.00017450206218155222,
      "loss": 2.8873,
      "step": 146855
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.361262798309326,
      "learning_rate": 0.0001744983467489207,
      "loss": 3.2613,
      "step": 146856
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.662971019744873,
      "learning_rate": 0.00017449463133962243,
      "loss": 2.9076,
      "step": 146857
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.9236689805984497,
      "learning_rate": 0.00017449091595365796,
      "loss": 2.8207,
      "step": 146858
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.68351674079895,
      "learning_rate": 0.00017448720059102792,
      "loss": 3.3037,
      "step": 146859
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.080820083618164,
      "learning_rate": 0.00017448348525173318,
      "loss": 2.6519,
      "step": 146860
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.1268885135650635,
      "learning_rate": 0.00017447976993577427,
      "loss": 2.9609,
      "step": 146861
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3848209381103516,
      "learning_rate": 0.0001744760546431519,
      "loss": 3.215,
      "step": 146862
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2697219848632812,
      "learning_rate": 0.00017447233937386688,
      "loss": 2.9569,
      "step": 146863
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2503490447998047,
      "learning_rate": 0.00017446862412791973,
      "loss": 2.9773,
      "step": 146864
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.669968843460083,
      "learning_rate": 0.00017446490890531133,
      "loss": 3.074,
      "step": 146865
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4139761924743652,
      "learning_rate": 0.00017446119370604228,
      "loss": 2.9331,
      "step": 146866
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.9517247676849365,
      "learning_rate": 0.00017445747853011328,
      "loss": 3.1401,
      "step": 146867
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.106942653656006,
      "learning_rate": 0.00017445376337752483,
      "loss": 2.9453,
      "step": 146868
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.242732286453247,
      "learning_rate": 0.00017445004824827797,
      "loss": 3.1954,
      "step": 146869
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4340264797210693,
      "learning_rate": 0.0001744463331423731,
      "loss": 2.7599,
      "step": 146870
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.322951078414917,
      "learning_rate": 0.0001744426180598111,
      "loss": 2.8074,
      "step": 146871
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.6705479621887207,
      "learning_rate": 0.00017443890300059258,
      "loss": 2.908,
      "step": 146872
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.102214813232422,
      "learning_rate": 0.0001744351879647182,
      "loss": 2.8368,
      "step": 146873
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0270276069641113,
      "learning_rate": 0.0001744314729521887,
      "loss": 3.0641,
      "step": 146874
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3320930004119873,
      "learning_rate": 0.00017442775796300484,
      "loss": 3.1442,
      "step": 146875
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5990991592407227,
      "learning_rate": 0.0001744240429971671,
      "loss": 2.9668,
      "step": 146876
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.0133447647094727,
      "learning_rate": 0.0001744203280546764,
      "loss": 2.9653,
      "step": 146877
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3898746967315674,
      "learning_rate": 0.00017441661313553336,
      "loss": 3.1393,
      "step": 146878
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4950368404388428,
      "learning_rate": 0.00017441289823973849,
      "loss": 2.8762,
      "step": 146879
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.310880422592163,
      "learning_rate": 0.00017440918336729278,
      "loss": 3.1506,
      "step": 146880
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.227454662322998,
      "learning_rate": 0.00017440546851819663,
      "loss": 2.8674,
      "step": 146881
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.31856369972229,
      "learning_rate": 0.000174401753692451,
      "loss": 3.2143,
      "step": 146882
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.790712594985962,
      "learning_rate": 0.00017439803889005646,
      "loss": 2.9846,
      "step": 146883
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.70719838142395,
      "learning_rate": 0.0001743943241110137,
      "loss": 2.9601,
      "step": 146884
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2781262397766113,
      "learning_rate": 0.00017439060935532327,
      "loss": 2.9987,
      "step": 146885
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0176289081573486,
      "learning_rate": 0.00017438689462298613,
      "loss": 2.9689,
      "step": 146886
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.542646646499634,
      "learning_rate": 0.0001743831799140027,
      "loss": 2.9835,
      "step": 146887
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4614508152008057,
      "learning_rate": 0.00017437946522837395,
      "loss": 2.865,
      "step": 146888
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.628976821899414,
      "learning_rate": 0.0001743757505661004,
      "loss": 2.8677,
      "step": 146889
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2758219242095947,
      "learning_rate": 0.0001743720359271828,
      "loss": 2.6233,
      "step": 146890
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.252599000930786,
      "learning_rate": 0.0001743683213116217,
      "loss": 3.1191,
      "step": 146891
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.581718921661377,
      "learning_rate": 0.000174364606719418,
      "loss": 2.9373,
      "step": 146892
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.2914955615997314,
      "learning_rate": 0.00017436089215057217,
      "loss": 2.9605,
      "step": 146893
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1799557209014893,
      "learning_rate": 0.0001743571776050851,
      "loss": 3.1539,
      "step": 146894
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.113215923309326,
      "learning_rate": 0.0001743534630829575,
      "loss": 2.9612,
      "step": 146895
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.807229995727539,
      "learning_rate": 0.0001743497485841899,
      "loss": 2.9966,
      "step": 146896
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.501774787902832,
      "learning_rate": 0.00017434603410878294,
      "loss": 2.7628,
      "step": 146897
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9166858196258545,
      "learning_rate": 0.00017434231965673754,
      "loss": 3.0397,
      "step": 146898
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4545722007751465,
      "learning_rate": 0.00017433860522805415,
      "loss": 3.0322,
      "step": 146899
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.631493091583252,
      "learning_rate": 0.0001743348908227337,
      "loss": 2.75,
      "step": 146900
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4227006435394287,
      "learning_rate": 0.00017433117644077682,
      "loss": 3.1414,
      "step": 146901
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9784018993377686,
      "learning_rate": 0.0001743274620821841,
      "loss": 3.0383,
      "step": 146902
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6285338401794434,
      "learning_rate": 0.00017432374774695614,
      "loss": 3.1153,
      "step": 146903
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1206870079040527,
      "learning_rate": 0.00017432003343509394,
      "loss": 2.9199,
      "step": 146904
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.595519781112671,
      "learning_rate": 0.0001743163191465979,
      "loss": 3.1164,
      "step": 146905
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9635188579559326,
      "learning_rate": 0.0001743126048814689,
      "loss": 2.9612,
      "step": 146906
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.159655809402466,
      "learning_rate": 0.00017430889063970757,
      "loss": 3.0523,
      "step": 146907
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1753411293029785,
      "learning_rate": 0.0001743051764213146,
      "loss": 2.8619,
      "step": 146908
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1870758533477783,
      "learning_rate": 0.00017430146222629055,
      "loss": 2.9395,
      "step": 146909
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1185574531555176,
      "learning_rate": 0.00017429774805463634,
      "loss": 2.6596,
      "step": 146910
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0640735626220703,
      "learning_rate": 0.00017429403390635247,
      "loss": 2.8623,
      "step": 146911
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4996557235717773,
      "learning_rate": 0.00017429031978143976,
      "loss": 2.9652,
      "step": 146912
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1664280891418457,
      "learning_rate": 0.00017428660567989893,
      "loss": 2.7332,
      "step": 146913
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.853459358215332,
      "learning_rate": 0.00017428289160173057,
      "loss": 2.9326,
      "step": 146914
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.288820505142212,
      "learning_rate": 0.00017427917754693525,
      "loss": 2.884,
      "step": 146915
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.59047269821167,
      "learning_rate": 0.00017427546351551394,
      "loss": 2.8641,
      "step": 146916
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5133004188537598,
      "learning_rate": 0.00017427174950746706,
      "loss": 3.1165,
      "step": 146917
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.7647390365600586,
      "learning_rate": 0.0001742680355227956,
      "loss": 2.9544,
      "step": 146918
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.427802562713623,
      "learning_rate": 0.00017426432156150004,
      "loss": 2.969,
      "step": 146919
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.178595781326294,
      "learning_rate": 0.00017426060762358112,
      "loss": 2.9155,
      "step": 146920
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3036696910858154,
      "learning_rate": 0.00017425689370903938,
      "loss": 3.1444,
      "step": 146921
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9962923526763916,
      "learning_rate": 0.00017425317981787587,
      "loss": 3.0355,
      "step": 146922
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.080843925476074,
      "learning_rate": 0.0001742494659500909,
      "loss": 2.8028,
      "step": 146923
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.256690263748169,
      "learning_rate": 0.0001742457521056854,
      "loss": 2.9024,
      "step": 146924
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.077275037765503,
      "learning_rate": 0.00017424203828466005,
      "loss": 3.0681,
      "step": 146925
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6108131408691406,
      "learning_rate": 0.00017423832448701542,
      "loss": 2.8569,
      "step": 146926
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0841221809387207,
      "learning_rate": 0.00017423461071275218,
      "loss": 2.9297,
      "step": 146927
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1082825660705566,
      "learning_rate": 0.00017423089696187122,
      "loss": 3.0674,
      "step": 146928
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.350884437561035,
      "learning_rate": 0.00017422718323437296,
      "loss": 3.0507,
      "step": 146929
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.993468761444092,
      "learning_rate": 0.0001742234695302584,
      "loss": 2.9846,
      "step": 146930
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4911649227142334,
      "learning_rate": 0.00017421975584952794,
      "loss": 2.7515,
      "step": 146931
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.309885263442993,
      "learning_rate": 0.00017421604219218265,
      "loss": 2.9295,
      "step": 146932
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9487974643707275,
      "learning_rate": 0.00017421232855822273,
      "loss": 3.1658,
      "step": 146933
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.842888116836548,
      "learning_rate": 0.00017420861494764925,
      "loss": 2.7962,
      "step": 146934
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.6900827884674072,
      "learning_rate": 0.00017420490136046257,
      "loss": 2.9042,
      "step": 146935
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4836883544921875,
      "learning_rate": 0.00017420118779666374,
      "loss": 3.0601,
      "step": 146936
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.7510409355163574,
      "learning_rate": 0.00017419747425625318,
      "loss": 2.9561,
      "step": 146937
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9059298038482666,
      "learning_rate": 0.00017419376073923192,
      "loss": 2.735,
      "step": 146938
    },
    {
      "epoch": 1.91,
      "grad_norm": 5.087515830993652,
      "learning_rate": 0.00017419004724560016,
      "loss": 2.8935,
      "step": 146939
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.1478657722473145,
      "learning_rate": 0.00017418633377535897,
      "loss": 3.0106,
      "step": 146940
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.653778553009033,
      "learning_rate": 0.00017418262032850877,
      "loss": 2.8355,
      "step": 146941
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5281364917755127,
      "learning_rate": 0.00017417890690505055,
      "loss": 2.9895,
      "step": 146942
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.527923583984375,
      "learning_rate": 0.00017417519350498473,
      "loss": 2.7843,
      "step": 146943
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.968198299407959,
      "learning_rate": 0.0001741714801283123,
      "loss": 3.1262,
      "step": 146944
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.697991132736206,
      "learning_rate": 0.00017416776677503358,
      "loss": 2.9487,
      "step": 146945
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.694899082183838,
      "learning_rate": 0.0001741640534451496,
      "loss": 2.8407,
      "step": 146946
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.155498504638672,
      "learning_rate": 0.0001741603401386607,
      "loss": 2.9251,
      "step": 146947
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.383364677429199,
      "learning_rate": 0.00017415662685556796,
      "loss": 3.0097,
      "step": 146948
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6426050662994385,
      "learning_rate": 0.00017415291359587168,
      "loss": 3.216,
      "step": 146949
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.9849528074264526,
      "learning_rate": 0.00017414920035957292,
      "loss": 2.9333,
      "step": 146950
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2012484073638916,
      "learning_rate": 0.00017414548714667218,
      "loss": 3.0098,
      "step": 146951
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5048844814300537,
      "learning_rate": 0.0001741417739571702,
      "loss": 2.9166,
      "step": 146952
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.7832658290863037,
      "learning_rate": 0.0001741380607910675,
      "loss": 2.6605,
      "step": 146953
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.246788501739502,
      "learning_rate": 0.000174134347648365,
      "loss": 3.2265,
      "step": 146954
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2598841190338135,
      "learning_rate": 0.0001741306345290632,
      "loss": 2.9997,
      "step": 146955
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6415622234344482,
      "learning_rate": 0.00017412692143316304,
      "loss": 2.9385,
      "step": 146956
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.8495125770568848,
      "learning_rate": 0.00017412320836066502,
      "loss": 3.1811,
      "step": 146957
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3266334533691406,
      "learning_rate": 0.0001741194953115698,
      "loss": 2.8579,
      "step": 146958
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5010738372802734,
      "learning_rate": 0.00017411578228587824,
      "loss": 2.9877,
      "step": 146959
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3437376022338867,
      "learning_rate": 0.00017411206928359094,
      "loss": 3.0123,
      "step": 146960
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.555572032928467,
      "learning_rate": 0.00017410835630470844,
      "loss": 2.925,
      "step": 146961
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4739911556243896,
      "learning_rate": 0.0001741046433492317,
      "loss": 2.773,
      "step": 146962
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.7707371711730957,
      "learning_rate": 0.0001741009304171613,
      "loss": 2.9554,
      "step": 146963
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.389983892440796,
      "learning_rate": 0.00017409721750849777,
      "loss": 3.0554,
      "step": 146964
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2067320346832275,
      "learning_rate": 0.00017409350462324207,
      "loss": 3.1162,
      "step": 146965
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.749774694442749,
      "learning_rate": 0.00017408979176139478,
      "loss": 2.9603,
      "step": 146966
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.271101713180542,
      "learning_rate": 0.0001740860789229564,
      "loss": 2.9467,
      "step": 146967
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5463650226593018,
      "learning_rate": 0.00017408236610792798,
      "loss": 3.208,
      "step": 146968
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.9719417095184326,
      "learning_rate": 0.00017407865331631,
      "loss": 3.1888,
      "step": 146969
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.094717502593994,
      "learning_rate": 0.00017407494054810305,
      "loss": 2.8208,
      "step": 146970
    },
    {
      "epoch": 1.91,
      "grad_norm": 5.197113990783691,
      "learning_rate": 0.0001740712278033081,
      "loss": 2.7206,
      "step": 146971
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.8459465503692627,
      "learning_rate": 0.00017406751508192552,
      "loss": 2.8602,
      "step": 146972
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.502723217010498,
      "learning_rate": 0.0001740638023839563,
      "loss": 3.0727,
      "step": 146973
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3524932861328125,
      "learning_rate": 0.000174060089709401,
      "loss": 3.1715,
      "step": 146974
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6851181983947754,
      "learning_rate": 0.0001740563770582603,
      "loss": 2.9917,
      "step": 146975
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.608867645263672,
      "learning_rate": 0.0001740526644305348,
      "loss": 3.0397,
      "step": 146976
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0011277198791504,
      "learning_rate": 0.0001740489518262254,
      "loss": 2.8938,
      "step": 146977
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0788750648498535,
      "learning_rate": 0.00017404523924533253,
      "loss": 3.0131,
      "step": 146978
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3632090091705322,
      "learning_rate": 0.0001740415266878572,
      "loss": 2.9693,
      "step": 146979
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2679412364959717,
      "learning_rate": 0.0001740378141537999,
      "loss": 2.7808,
      "step": 146980
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5649640560150146,
      "learning_rate": 0.00017403410164316133,
      "loss": 2.8471,
      "step": 146981
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.217503786087036,
      "learning_rate": 0.00017403038915594208,
      "loss": 3.16,
      "step": 146982
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.871833086013794,
      "learning_rate": 0.0001740266766921431,
      "loss": 2.8958,
      "step": 146983
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1039388179779053,
      "learning_rate": 0.00017402296425176477,
      "loss": 2.8311,
      "step": 146984
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.8338632583618164,
      "learning_rate": 0.00017401925183480814,
      "loss": 2.977,
      "step": 146985
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.655630588531494,
      "learning_rate": 0.00017401553944127368,
      "loss": 2.7264,
      "step": 146986
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.8536570072174072,
      "learning_rate": 0.0001740118270711621,
      "loss": 2.8831,
      "step": 146987
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.138187885284424,
      "learning_rate": 0.00017400811472447397,
      "loss": 3.097,
      "step": 146988
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9727072715759277,
      "learning_rate": 0.00017400440240121026,
      "loss": 3.0475,
      "step": 146989
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2682580947875977,
      "learning_rate": 0.0001740006901013714,
      "loss": 2.9917,
      "step": 146990
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.7160778045654297,
      "learning_rate": 0.00017399697782495826,
      "loss": 3.0127,
      "step": 146991
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6783218383789062,
      "learning_rate": 0.0001739932655719715,
      "loss": 2.921,
      "step": 146992
    },
    {
      "epoch": 1.91,
      "grad_norm": 8.01781940460205,
      "learning_rate": 0.00017398955334241176,
      "loss": 2.9205,
      "step": 146993
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1977906227111816,
      "learning_rate": 0.00017398584113627961,
      "loss": 3.0388,
      "step": 146994
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3710274696350098,
      "learning_rate": 0.00017398212895357597,
      "loss": 2.9861,
      "step": 146995
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.1768009662628174,
      "learning_rate": 0.00017397841679430136,
      "loss": 2.8494,
      "step": 146996
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.082961082458496,
      "learning_rate": 0.0001739747046584566,
      "loss": 2.9979,
      "step": 146997
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.096633195877075,
      "learning_rate": 0.0001739709925460423,
      "loss": 2.9581,
      "step": 146998
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.296233654022217,
      "learning_rate": 0.00017396728045705932,
      "loss": 3.0082,
      "step": 146999
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.061892032623291,
      "learning_rate": 0.00017396356839150803,
      "loss": 2.9614,
      "step": 147000
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5266683101654053,
      "learning_rate": 0.00017395985634938933,
      "loss": 3.1193,
      "step": 147001
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0840306282043457,
      "learning_rate": 0.00017395614433070384,
      "loss": 3.1334,
      "step": 147002
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.25236177444458,
      "learning_rate": 0.00017395243233545233,
      "loss": 2.9899,
      "step": 147003
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.1363484859466553,
      "learning_rate": 0.00017394872036363538,
      "loss": 2.9292,
      "step": 147004
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.618867874145508,
      "learning_rate": 0.00017394500841525393,
      "loss": 2.6189,
      "step": 147005
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6990041732788086,
      "learning_rate": 0.00017394129649030826,
      "loss": 2.8094,
      "step": 147006
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.783785104751587,
      "learning_rate": 0.00017393758458879942,
      "loss": 2.9101,
      "step": 147007
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.8408360481262207,
      "learning_rate": 0.00017393387271072785,
      "loss": 2.9168,
      "step": 147008
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.403454542160034,
      "learning_rate": 0.00017393016085609446,
      "loss": 2.9284,
      "step": 147009
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.476712465286255,
      "learning_rate": 0.00017392644902489973,
      "loss": 2.8348,
      "step": 147010
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.023577690124512,
      "learning_rate": 0.00017392273721714464,
      "loss": 2.8975,
      "step": 147011
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.7547056674957275,
      "learning_rate": 0.00017391902543282948,
      "loss": 2.8684,
      "step": 147012
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.7104947566986084,
      "learning_rate": 0.00017391531367195526,
      "loss": 2.7972,
      "step": 147013
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.169320583343506,
      "learning_rate": 0.00017391160193452247,
      "loss": 3.2492,
      "step": 147014
    },
    {
      "epoch": 1.91,
      "grad_norm": 4.090485095977783,
      "learning_rate": 0.00017390789022053203,
      "loss": 2.9891,
      "step": 147015
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.358361005783081,
      "learning_rate": 0.00017390417852998431,
      "loss": 2.9226,
      "step": 147016
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6380810737609863,
      "learning_rate": 0.00017390046686288047,
      "loss": 3.0889,
      "step": 147017
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9759714603424072,
      "learning_rate": 0.00017389675521922067,
      "loss": 3.0902,
      "step": 147018
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.945342540740967,
      "learning_rate": 0.00017389304359900597,
      "loss": 2.9622,
      "step": 147019
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.921222686767578,
      "learning_rate": 0.00017388933200223678,
      "loss": 2.8084,
      "step": 147020
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.3346381187438965,
      "learning_rate": 0.0001738856204289141,
      "loss": 2.8185,
      "step": 147021
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9009642601013184,
      "learning_rate": 0.00017388190887903833,
      "loss": 3.0734,
      "step": 147022
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.70242977142334,
      "learning_rate": 0.00017387819735261052,
      "loss": 3.0235,
      "step": 147023
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5370376110076904,
      "learning_rate": 0.0001738744858496309,
      "loss": 3.1044,
      "step": 147024
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.0306613445281982,
      "learning_rate": 0.00017387077437010053,
      "loss": 2.766,
      "step": 147025
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.2271041870117188,
      "learning_rate": 0.00017386706291401983,
      "loss": 2.8287,
      "step": 147026
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.80428409576416,
      "learning_rate": 0.00017386335148138977,
      "loss": 2.8183,
      "step": 147027
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.345066547393799,
      "learning_rate": 0.0001738596400722107,
      "loss": 2.8349,
      "step": 147028
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.4128756523132324,
      "learning_rate": 0.00017385592868648383,
      "loss": 2.8588,
      "step": 147029
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.525585412979126,
      "learning_rate": 0.00017385221732420928,
      "loss": 2.8872,
      "step": 147030
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4907889366149902,
      "learning_rate": 0.00017384850598538808,
      "loss": 3.2502,
      "step": 147031
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.5013670921325684,
      "learning_rate": 0.00017384479467002064,
      "loss": 2.919,
      "step": 147032
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3903210163116455,
      "learning_rate": 0.00017384108337810804,
      "loss": 3.0596,
      "step": 147033
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.801690101623535,
      "learning_rate": 0.00017383737210965064,
      "loss": 2.8507,
      "step": 147034
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6634104251861572,
      "learning_rate": 0.00017383366086464933,
      "loss": 2.8267,
      "step": 147035
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.944117546081543,
      "learning_rate": 0.00017382994964310473,
      "loss": 2.9799,
      "step": 147036
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.1912007331848145,
      "learning_rate": 0.00017382623844501753,
      "loss": 2.8723,
      "step": 147037
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6427061557769775,
      "learning_rate": 0.0001738225272703883,
      "loss": 2.8961,
      "step": 147038
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.6541481018066406,
      "learning_rate": 0.00017381881611921798,
      "loss": 2.8815,
      "step": 147039
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.651092529296875,
      "learning_rate": 0.00017381510499150699,
      "loss": 3.0619,
      "step": 147040
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.20589017868042,
      "learning_rate": 0.0001738113938872563,
      "loss": 3.0285,
      "step": 147041
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3198256492614746,
      "learning_rate": 0.0001738076828064664,
      "loss": 3.1683,
      "step": 147042
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.780561685562134,
      "learning_rate": 0.00017380397174913793,
      "loss": 2.9938,
      "step": 147043
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9540178775787354,
      "learning_rate": 0.00017380026071527182,
      "loss": 2.9816,
      "step": 147044
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.9166111946105957,
      "learning_rate": 0.00017379654970486863,
      "loss": 2.8761,
      "step": 147045
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.245605230331421,
      "learning_rate": 0.0001737928387179289,
      "loss": 2.9831,
      "step": 147046
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.3919057846069336,
      "learning_rate": 0.0001737891277544536,
      "loss": 3.0568,
      "step": 147047
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.84344220161438,
      "learning_rate": 0.0001737854168144433,
      "loss": 3.0353,
      "step": 147048
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.7859153747558594,
      "learning_rate": 0.0001737817058978985,
      "loss": 3.187,
      "step": 147049
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.0069353580474854,
      "learning_rate": 0.00017377799500482022,
      "loss": 2.8231,
      "step": 147050
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.150033712387085,
      "learning_rate": 0.000173774284135209,
      "loss": 2.7694,
      "step": 147051
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.215837001800537,
      "learning_rate": 0.00017377057328906537,
      "loss": 2.9819,
      "step": 147052
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.8300538063049316,
      "learning_rate": 0.00017376686246639035,
      "loss": 2.8152,
      "step": 147053
    },
    {
      "epoch": 1.91,
      "grad_norm": 5.153323173522949,
      "learning_rate": 0.0001737631516671844,
      "loss": 2.7947,
      "step": 147054
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.261240243911743,
      "learning_rate": 0.0001737594408914482,
      "loss": 3.0564,
      "step": 147055
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0105297565460205,
      "learning_rate": 0.00017375573013918259,
      "loss": 3.0652,
      "step": 147056
    },
    {
      "epoch": 1.91,
      "grad_norm": 3.5210084915161133,
      "learning_rate": 0.000173752019410388,
      "loss": 2.9486,
      "step": 147057
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.7672479152679443,
      "learning_rate": 0.00017374830870506546,
      "loss": 2.7959,
      "step": 147058
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.9726897478103638,
      "learning_rate": 0.00017374459802321554,
      "loss": 2.6219,
      "step": 147059
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.0294363498687744,
      "learning_rate": 0.0001737408873648388,
      "loss": 2.8787,
      "step": 147060
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.56624174118042,
      "learning_rate": 0.00017373717672993598,
      "loss": 2.7728,
      "step": 147061
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.4104676246643066,
      "learning_rate": 0.00017373346611850786,
      "loss": 2.8059,
      "step": 147062
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.7689399719238281,
      "learning_rate": 0.00017372975553055494,
      "loss": 2.7597,
      "step": 147063
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.319791316986084,
      "learning_rate": 0.0001737260449660782,
      "loss": 3.0476,
      "step": 147064
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.127566337585449,
      "learning_rate": 0.00017372233442507814,
      "loss": 2.9409,
      "step": 147065
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.305619716644287,
      "learning_rate": 0.0001737186239075555,
      "loss": 2.9256,
      "step": 147066
    },
    {
      "epoch": 1.91,
      "grad_norm": 1.873677372932434,
      "learning_rate": 0.00017371491341351081,
      "loss": 3.0272,
      "step": 147067
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.965376138687134,
      "learning_rate": 0.00017371120294294506,
      "loss": 2.9499,
      "step": 147068
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.704802989959717,
      "learning_rate": 0.00017370749249585865,
      "loss": 2.9838,
      "step": 147069
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.606679677963257,
      "learning_rate": 0.00017370378207225249,
      "loss": 2.9864,
      "step": 147070
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.557091474533081,
      "learning_rate": 0.0001737000716721271,
      "loss": 2.9899,
      "step": 147071
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.7241318225860596,
      "learning_rate": 0.00017369636129548348,
      "loss": 2.9771,
      "step": 147072
    },
    {
      "epoch": 1.91,
      "grad_norm": 2.053067684173584,
      "learning_rate": 0.00017369265094232183,
      "loss": 2.9005,
      "step": 147073
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.253568172454834,
      "learning_rate": 0.00017368894061264327,
      "loss": 3.0001,
      "step": 147074
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.325037956237793,
      "learning_rate": 0.00017368523030644815,
      "loss": 3.1203,
      "step": 147075
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.701552152633667,
      "learning_rate": 0.00017368152002373744,
      "loss": 3.0399,
      "step": 147076
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0815577507019043,
      "learning_rate": 0.00017367780976451167,
      "loss": 2.9223,
      "step": 147077
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.964456558227539,
      "learning_rate": 0.00017367409952877177,
      "loss": 3.0665,
      "step": 147078
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.8837883472442627,
      "learning_rate": 0.000173670389316518,
      "loss": 3.127,
      "step": 147079
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4945144653320312,
      "learning_rate": 0.00017366667912775138,
      "loss": 3.1987,
      "step": 147080
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2621889114379883,
      "learning_rate": 0.00017366296896247245,
      "loss": 2.7651,
      "step": 147081
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.424001693725586,
      "learning_rate": 0.00017365925882068206,
      "loss": 2.7425,
      "step": 147082
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.013873815536499,
      "learning_rate": 0.00017365554870238067,
      "loss": 3.2817,
      "step": 147083
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9525103569030762,
      "learning_rate": 0.00017365183860756933,
      "loss": 3.2113,
      "step": 147084
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.208327293395996,
      "learning_rate": 0.00017364812853624828,
      "loss": 2.7959,
      "step": 147085
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4489409923553467,
      "learning_rate": 0.0001736444184884185,
      "loss": 2.9606,
      "step": 147086
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.294363021850586,
      "learning_rate": 0.00017364070846408056,
      "loss": 2.9515,
      "step": 147087
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0340020656585693,
      "learning_rate": 0.00017363699846323528,
      "loss": 2.9468,
      "step": 147088
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7658190727233887,
      "learning_rate": 0.00017363328848588315,
      "loss": 3.0096,
      "step": 147089
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.168055772781372,
      "learning_rate": 0.0001736295785320252,
      "loss": 3.0269,
      "step": 147090
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3311450481414795,
      "learning_rate": 0.00017362586860166168,
      "loss": 2.9222,
      "step": 147091
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4437105655670166,
      "learning_rate": 0.00017362215869479365,
      "loss": 3.2309,
      "step": 147092
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2279746532440186,
      "learning_rate": 0.00017361844881142146,
      "loss": 2.6282,
      "step": 147093
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6517627239227295,
      "learning_rate": 0.00017361473895154613,
      "loss": 2.8645,
      "step": 147094
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.921318531036377,
      "learning_rate": 0.00017361102911516807,
      "loss": 3.1386,
      "step": 147095
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3840901851654053,
      "learning_rate": 0.00017360731930228838,
      "loss": 2.9333,
      "step": 147096
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.973250150680542,
      "learning_rate": 0.00017360360951290723,
      "loss": 3.1333,
      "step": 147097
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.548306703567505,
      "learning_rate": 0.00017359989974702567,
      "loss": 2.9578,
      "step": 147098
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0906176567077637,
      "learning_rate": 0.0001735961900046441,
      "loss": 2.934,
      "step": 147099
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.09380841255188,
      "learning_rate": 0.00017359248028576355,
      "loss": 2.9138,
      "step": 147100
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1445798873901367,
      "learning_rate": 0.00017358877059038444,
      "loss": 2.8168,
      "step": 147101
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.523139715194702,
      "learning_rate": 0.00017358506091850779,
      "loss": 2.7618,
      "step": 147102
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.235758066177368,
      "learning_rate": 0.00017358135127013385,
      "loss": 2.8555,
      "step": 147103
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.96638822555542,
      "learning_rate": 0.00017357764164526362,
      "loss": 2.8921,
      "step": 147104
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4484705924987793,
      "learning_rate": 0.00017357393204389755,
      "loss": 3.0746,
      "step": 147105
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.646486759185791,
      "learning_rate": 0.00017357022246603662,
      "loss": 3.1497,
      "step": 147106
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.321713447570801,
      "learning_rate": 0.00017356651291168122,
      "loss": 2.9263,
      "step": 147107
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.985971450805664,
      "learning_rate": 0.00017356280338083247,
      "loss": 2.919,
      "step": 147108
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1954259872436523,
      "learning_rate": 0.00017355909387349052,
      "loss": 2.9244,
      "step": 147109
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1748430728912354,
      "learning_rate": 0.00017355538438965646,
      "loss": 3.0862,
      "step": 147110
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1374475955963135,
      "learning_rate": 0.0001735516749293307,
      "loss": 2.943,
      "step": 147111
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5200443267822266,
      "learning_rate": 0.0001735479654925142,
      "loss": 3.0338,
      "step": 147112
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6774444580078125,
      "learning_rate": 0.00017354425607920738,
      "loss": 3.0332,
      "step": 147113
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.291412591934204,
      "learning_rate": 0.00017354054668941132,
      "loss": 3.2415,
      "step": 147114
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.67938232421875,
      "learning_rate": 0.00017353683732312627,
      "loss": 2.9841,
      "step": 147115
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2260355949401855,
      "learning_rate": 0.0001735331279803532,
      "loss": 2.9353,
      "step": 147116
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.541109561920166,
      "learning_rate": 0.00017352941866109257,
      "loss": 2.906,
      "step": 147117
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5369677543640137,
      "learning_rate": 0.00017352570936534533,
      "loss": 3.1427,
      "step": 147118
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5701816082000732,
      "learning_rate": 0.00017352200009311192,
      "loss": 2.6714,
      "step": 147119
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3561465740203857,
      "learning_rate": 0.00017351829084439328,
      "loss": 2.8857,
      "step": 147120
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9693430662155151,
      "learning_rate": 0.00017351458161919,
      "loss": 2.8035,
      "step": 147121
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.9783403873443604,
      "learning_rate": 0.00017351087241750273,
      "loss": 3.0838,
      "step": 147122
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.82368540763855,
      "learning_rate": 0.0001735071632393321,
      "loss": 2.9785,
      "step": 147123
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.9871902465820312,
      "learning_rate": 0.00017350345408467894,
      "loss": 2.8371,
      "step": 147124
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.335068941116333,
      "learning_rate": 0.00017349974495354378,
      "loss": 2.9806,
      "step": 147125
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9441137313842773,
      "learning_rate": 0.0001734960358459275,
      "loss": 2.9676,
      "step": 147126
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.097595691680908,
      "learning_rate": 0.00017349232676183073,
      "loss": 3.2054,
      "step": 147127
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.0539135932922363,
      "learning_rate": 0.0001734886177012541,
      "loss": 2.7773,
      "step": 147128
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.3814635276794434,
      "learning_rate": 0.0001734849086641982,
      "loss": 2.8929,
      "step": 147129
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.601532459259033,
      "learning_rate": 0.00017348119965066402,
      "loss": 2.8746,
      "step": 147130
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6876204013824463,
      "learning_rate": 0.0001734774906606519,
      "loss": 3.0078,
      "step": 147131
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.539660692214966,
      "learning_rate": 0.00017347378169416282,
      "loss": 3.134,
      "step": 147132
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6500980854034424,
      "learning_rate": 0.00017347007275119732,
      "loss": 2.9179,
      "step": 147133
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.777409315109253,
      "learning_rate": 0.0001734663638317561,
      "loss": 2.8364,
      "step": 147134
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4421749114990234,
      "learning_rate": 0.0001734626549358399,
      "loss": 3.0452,
      "step": 147135
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.355212688446045,
      "learning_rate": 0.00017345894606344943,
      "loss": 3.0658,
      "step": 147136
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.264976739883423,
      "learning_rate": 0.0001734552372145852,
      "loss": 2.9956,
      "step": 147137
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9917021989822388,
      "learning_rate": 0.00017345152838924816,
      "loss": 3.0893,
      "step": 147138
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.613265514373779,
      "learning_rate": 0.00017344781958743882,
      "loss": 2.8843,
      "step": 147139
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1557345390319824,
      "learning_rate": 0.00017344411080915789,
      "loss": 3.1428,
      "step": 147140
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.38730525970459,
      "learning_rate": 0.00017344040205440615,
      "loss": 2.9597,
      "step": 147141
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0531458854675293,
      "learning_rate": 0.0001734366933231841,
      "loss": 2.9485,
      "step": 147142
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.3387813568115234,
      "learning_rate": 0.00017343298461549268,
      "loss": 2.9673,
      "step": 147143
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3000588417053223,
      "learning_rate": 0.00017342927593133248,
      "loss": 2.9064,
      "step": 147144
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.8910460472106934,
      "learning_rate": 0.00017342556727070418,
      "loss": 3.0587,
      "step": 147145
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.297997236251831,
      "learning_rate": 0.0001734218586336083,
      "loss": 2.8035,
      "step": 147146
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.019428014755249,
      "learning_rate": 0.0001734181500200458,
      "loss": 3.0787,
      "step": 147147
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5268661975860596,
      "learning_rate": 0.00017341444143001718,
      "loss": 2.7323,
      "step": 147148
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5484979152679443,
      "learning_rate": 0.00017341073286352327,
      "loss": 3.0178,
      "step": 147149
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1119964122772217,
      "learning_rate": 0.00017340702432056476,
      "loss": 2.6568,
      "step": 147150
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9981266260147095,
      "learning_rate": 0.0001734033158011422,
      "loss": 3.0313,
      "step": 147151
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.2509686946868896,
      "learning_rate": 0.0001733996073052563,
      "loss": 3.0497,
      "step": 147152
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.361635208129883,
      "learning_rate": 0.00017339589883290789,
      "loss": 2.9448,
      "step": 147153
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.73386549949646,
      "learning_rate": 0.00017339219038409745,
      "loss": 2.8813,
      "step": 147154
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7663064002990723,
      "learning_rate": 0.00017338848195882592,
      "loss": 3.0634,
      "step": 147155
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3102333545684814,
      "learning_rate": 0.00017338477355709374,
      "loss": 3.1917,
      "step": 147156
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.853347063064575,
      "learning_rate": 0.00017338106517890195,
      "loss": 2.8816,
      "step": 147157
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1421470642089844,
      "learning_rate": 0.00017337735682425081,
      "loss": 3.0434,
      "step": 147158
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.093010902404785,
      "learning_rate": 0.0001733736484931413,
      "loss": 3.0038,
      "step": 147159
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7735884189605713,
      "learning_rate": 0.00017336994018557386,
      "loss": 2.9438,
      "step": 147160
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2522854804992676,
      "learning_rate": 0.00017336623190154947,
      "loss": 3.0785,
      "step": 147161
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.650641918182373,
      "learning_rate": 0.00017336252364106864,
      "loss": 3.0466,
      "step": 147162
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.668050765991211,
      "learning_rate": 0.00017335881540413227,
      "loss": 2.8371,
      "step": 147163
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7908451557159424,
      "learning_rate": 0.0001733551071907407,
      "loss": 2.8751,
      "step": 147164
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.16088604927063,
      "learning_rate": 0.00017335139900089488,
      "loss": 2.809,
      "step": 147165
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.073394536972046,
      "learning_rate": 0.00017334769083459536,
      "loss": 2.8557,
      "step": 147166
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.444981575012207,
      "learning_rate": 0.000173343982691843,
      "loss": 3.0598,
      "step": 147167
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.440279722213745,
      "learning_rate": 0.00017334027457263824,
      "loss": 2.9574,
      "step": 147168
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.410093069076538,
      "learning_rate": 0.00017333656647698214,
      "loss": 2.7212,
      "step": 147169
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7532358169555664,
      "learning_rate": 0.00017333285840487493,
      "loss": 2.9062,
      "step": 147170
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3461241722106934,
      "learning_rate": 0.00017332915035631768,
      "loss": 2.9766,
      "step": 147171
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6599581241607666,
      "learning_rate": 0.00017332544233131082,
      "loss": 3.4114,
      "step": 147172
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7570486068725586,
      "learning_rate": 0.00017332173432985524,
      "loss": 3.0112,
      "step": 147173
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.021366596221924,
      "learning_rate": 0.00017331802635195143,
      "loss": 3.2097,
      "step": 147174
    },
    {
      "epoch": 1.92,
      "grad_norm": 6.214012622833252,
      "learning_rate": 0.00017331431839760045,
      "loss": 2.9421,
      "step": 147175
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.9364328384399414,
      "learning_rate": 0.00017331061046680247,
      "loss": 2.9771,
      "step": 147176
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6373181343078613,
      "learning_rate": 0.00017330690255955856,
      "loss": 2.8964,
      "step": 147177
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.915378451347351,
      "learning_rate": 0.0001733031946758692,
      "loss": 3.1196,
      "step": 147178
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.092015266418457,
      "learning_rate": 0.00017329948681573523,
      "loss": 3.0498,
      "step": 147179
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.9751033782958984,
      "learning_rate": 0.00017329577897915715,
      "loss": 2.9481,
      "step": 147180
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.472421407699585,
      "learning_rate": 0.00017329207116613607,
      "loss": 2.9532,
      "step": 147181
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1429364681243896,
      "learning_rate": 0.00017328836337667216,
      "loss": 3.0142,
      "step": 147182
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4326012134552,
      "learning_rate": 0.0001732846556107664,
      "loss": 2.8487,
      "step": 147183
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.7821080684661865,
      "learning_rate": 0.0001732809478684193,
      "loss": 2.8061,
      "step": 147184
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.0923008918762207,
      "learning_rate": 0.00017327724014963184,
      "loss": 2.9916,
      "step": 147185
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7428982257843018,
      "learning_rate": 0.00017327353245440434,
      "loss": 2.9776,
      "step": 147186
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.321985960006714,
      "learning_rate": 0.000173269824782738,
      "loss": 2.8629,
      "step": 147187
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.1548492908477783,
      "learning_rate": 0.00017326611713463288,
      "loss": 3.0651,
      "step": 147188
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.148474931716919,
      "learning_rate": 0.00017326240951009013,
      "loss": 2.9759,
      "step": 147189
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.121039390563965,
      "learning_rate": 0.00017325870190911018,
      "loss": 2.8785,
      "step": 147190
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.949380874633789,
      "learning_rate": 0.00017325499433169393,
      "loss": 3.3234,
      "step": 147191
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1378695964813232,
      "learning_rate": 0.00017325128677784187,
      "loss": 2.9349,
      "step": 147192
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.127288341522217,
      "learning_rate": 0.000173247579247555,
      "loss": 2.9445,
      "step": 147193
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.0506443977355957,
      "learning_rate": 0.00017324387174083355,
      "loss": 2.8037,
      "step": 147194
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1365268230438232,
      "learning_rate": 0.00017324016425767863,
      "loss": 2.9805,
      "step": 147195
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.194827079772949,
      "learning_rate": 0.00017323645679809062,
      "loss": 2.9132,
      "step": 147196
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4417195320129395,
      "learning_rate": 0.00017323274936207044,
      "loss": 2.881,
      "step": 147197
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3461644649505615,
      "learning_rate": 0.0001732290419496186,
      "loss": 2.7912,
      "step": 147198
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.338686227798462,
      "learning_rate": 0.00017322533456073596,
      "loss": 3.0866,
      "step": 147199
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.622520923614502,
      "learning_rate": 0.00017322162719542313,
      "loss": 3.2324,
      "step": 147200
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.641357660293579,
      "learning_rate": 0.0001732179198536808,
      "loss": 3.1165,
      "step": 147201
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.245274782180786,
      "learning_rate": 0.00017321421253550952,
      "loss": 2.9164,
      "step": 147202
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4376068115234375,
      "learning_rate": 0.00017321050524091026,
      "loss": 3.142,
      "step": 147203
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5246036052703857,
      "learning_rate": 0.00017320679796988344,
      "loss": 2.8937,
      "step": 147204
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2066140174865723,
      "learning_rate": 0.00017320309072242995,
      "loss": 3.0385,
      "step": 147205
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.986936092376709,
      "learning_rate": 0.0001731993834985504,
      "loss": 3.0401,
      "step": 147206
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3103201389312744,
      "learning_rate": 0.00017319567629824553,
      "loss": 3.0113,
      "step": 147207
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9087809324264526,
      "learning_rate": 0.00017319196912151581,
      "loss": 3.268,
      "step": 147208
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9589166641235352,
      "learning_rate": 0.00017318826196836221,
      "loss": 3.0461,
      "step": 147209
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7254040241241455,
      "learning_rate": 0.00017318455483878524,
      "loss": 3.0062,
      "step": 147210
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.704258918762207,
      "learning_rate": 0.00017318084773278574,
      "loss": 2.9567,
      "step": 147211
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0916099548339844,
      "learning_rate": 0.0001731771406503643,
      "loss": 3.0611,
      "step": 147212
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.486902952194214,
      "learning_rate": 0.00017317343359152167,
      "loss": 2.7669,
      "step": 147213
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.633514642715454,
      "learning_rate": 0.0001731697265562583,
      "loss": 2.7746,
      "step": 147214
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.896430015563965,
      "learning_rate": 0.00017316601954457526,
      "loss": 2.9648,
      "step": 147215
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.307513952255249,
      "learning_rate": 0.00017316231255647292,
      "loss": 3.0037,
      "step": 147216
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0572500228881836,
      "learning_rate": 0.0001731586055919522,
      "loss": 2.9757,
      "step": 147217
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.84659481048584,
      "learning_rate": 0.0001731548986510137,
      "loss": 2.9491,
      "step": 147218
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.924365997314453,
      "learning_rate": 0.000173151191733658,
      "loss": 3.1508,
      "step": 147219
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2061591148376465,
      "learning_rate": 0.00017314748483988595,
      "loss": 2.9132,
      "step": 147220
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.854653000831604,
      "learning_rate": 0.00017314377796969822,
      "loss": 2.9259,
      "step": 147221
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3524837493896484,
      "learning_rate": 0.00017314007112309533,
      "loss": 2.8756,
      "step": 147222
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.262341260910034,
      "learning_rate": 0.0001731363643000782,
      "loss": 2.9662,
      "step": 147223
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2198500633239746,
      "learning_rate": 0.00017313265750064746,
      "loss": 2.9544,
      "step": 147224
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.026120662689209,
      "learning_rate": 0.00017312895072480358,
      "loss": 3.0139,
      "step": 147225
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9178824424743652,
      "learning_rate": 0.00017312524397254756,
      "loss": 3.0541,
      "step": 147226
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3121752738952637,
      "learning_rate": 0.00017312153724387996,
      "loss": 2.8775,
      "step": 147227
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.698530673980713,
      "learning_rate": 0.00017311783053880134,
      "loss": 2.5552,
      "step": 147228
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.2329323291778564,
      "learning_rate": 0.0001731141238573126,
      "loss": 2.7918,
      "step": 147229
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.5285604000091553,
      "learning_rate": 0.0001731104171994144,
      "loss": 2.9815,
      "step": 147230
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3934292793273926,
      "learning_rate": 0.0001731067105651072,
      "loss": 2.9318,
      "step": 147231
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4092020988464355,
      "learning_rate": 0.00017310300395439202,
      "loss": 3.1884,
      "step": 147232
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.292996406555176,
      "learning_rate": 0.00017309929736726925,
      "loss": 3.0851,
      "step": 147233
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2051095962524414,
      "learning_rate": 0.00017309559080373982,
      "loss": 2.8765,
      "step": 147234
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1653072834014893,
      "learning_rate": 0.0001730918842638043,
      "loss": 3.0997,
      "step": 147235
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.281054973602295,
      "learning_rate": 0.0001730881777474634,
      "loss": 3.1914,
      "step": 147236
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4271774291992188,
      "learning_rate": 0.00017308447125471773,
      "loss": 3.159,
      "step": 147237
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7752554416656494,
      "learning_rate": 0.00017308076478556812,
      "loss": 3.1132,
      "step": 147238
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.500422954559326,
      "learning_rate": 0.0001730770583400151,
      "loss": 2.7266,
      "step": 147239
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3771891593933105,
      "learning_rate": 0.00017307335191805957,
      "loss": 2.9474,
      "step": 147240
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.241722345352173,
      "learning_rate": 0.00017306964551970196,
      "loss": 2.8943,
      "step": 147241
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.133397102355957,
      "learning_rate": 0.00017306593914494334,
      "loss": 3.1249,
      "step": 147242
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.070565700531006,
      "learning_rate": 0.00017306223279378398,
      "loss": 2.9498,
      "step": 147243
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3661742210388184,
      "learning_rate": 0.00017305852646622478,
      "loss": 2.9634,
      "step": 147244
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.336353302001953,
      "learning_rate": 0.0001730548201622663,
      "loss": 3.0736,
      "step": 147245
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5863449573516846,
      "learning_rate": 0.00017305111388190948,
      "loss": 2.7477,
      "step": 147246
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.382547616958618,
      "learning_rate": 0.00017304740762515473,
      "loss": 3.0417,
      "step": 147247
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5492334365844727,
      "learning_rate": 0.00017304370139200307,
      "loss": 2.8225,
      "step": 147248
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.053288221359253,
      "learning_rate": 0.00017303999518245475,
      "loss": 3.0247,
      "step": 147249
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.8278088569641113,
      "learning_rate": 0.00017303628899651083,
      "loss": 3.0292,
      "step": 147250
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0847158432006836,
      "learning_rate": 0.0001730325828341717,
      "loss": 2.8393,
      "step": 147251
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3380675315856934,
      "learning_rate": 0.00017302887669543842,
      "loss": 2.9398,
      "step": 147252
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3106250762939453,
      "learning_rate": 0.00017302517058031126,
      "loss": 2.9791,
      "step": 147253
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.2810676097869873,
      "learning_rate": 0.00017302146448879138,
      "loss": 3.1338,
      "step": 147254
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.176928997039795,
      "learning_rate": 0.000173017758420879,
      "loss": 2.7961,
      "step": 147255
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1058268547058105,
      "learning_rate": 0.0001730140523765751,
      "loss": 2.9701,
      "step": 147256
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.488924264907837,
      "learning_rate": 0.00017301034635588015,
      "loss": 2.7732,
      "step": 147257
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.9627645015716553,
      "learning_rate": 0.00017300664035879514,
      "loss": 2.7714,
      "step": 147258
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.097605228424072,
      "learning_rate": 0.00017300293438532045,
      "loss": 2.9978,
      "step": 147259
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.896533250808716,
      "learning_rate": 0.00017299922843545712,
      "loss": 2.8368,
      "step": 147260
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9761637449264526,
      "learning_rate": 0.00017299552250920545,
      "loss": 3.051,
      "step": 147261
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.2250754833221436,
      "learning_rate": 0.00017299181660656644,
      "loss": 2.9658,
      "step": 147262
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.670572519302368,
      "learning_rate": 0.00017298811072754045,
      "loss": 2.8025,
      "step": 147263
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.848491668701172,
      "learning_rate": 0.00017298440487212852,
      "loss": 2.9906,
      "step": 147264
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.027247667312622,
      "learning_rate": 0.00017298069904033106,
      "loss": 3.0029,
      "step": 147265
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0570311546325684,
      "learning_rate": 0.00017297699323214907,
      "loss": 2.8267,
      "step": 147266
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7535784244537354,
      "learning_rate": 0.000172973287447583,
      "loss": 3.0512,
      "step": 147267
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.635054588317871,
      "learning_rate": 0.00017296958168663357,
      "loss": 2.8878,
      "step": 147268
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1294798851013184,
      "learning_rate": 0.00017296587594930145,
      "loss": 3.015,
      "step": 147269
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.145146131515503,
      "learning_rate": 0.00017296217023558743,
      "loss": 2.9676,
      "step": 147270
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.1625308990478516,
      "learning_rate": 0.00017295846454549202,
      "loss": 2.8766,
      "step": 147271
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.2018020153045654,
      "learning_rate": 0.00017295475887901617,
      "loss": 3.0993,
      "step": 147272
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.336303234100342,
      "learning_rate": 0.0001729510532361604,
      "loss": 2.89,
      "step": 147273
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7634754180908203,
      "learning_rate": 0.0001729473476169255,
      "loss": 2.767,
      "step": 147274
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.8402621746063232,
      "learning_rate": 0.0001729436420213119,
      "loss": 2.9827,
      "step": 147275
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7524797916412354,
      "learning_rate": 0.0001729399364493206,
      "loss": 2.9032,
      "step": 147276
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9638763666152954,
      "learning_rate": 0.00017293623090095206,
      "loss": 3.0906,
      "step": 147277
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0681614875793457,
      "learning_rate": 0.0001729325253762072,
      "loss": 3.0085,
      "step": 147278
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5572941303253174,
      "learning_rate": 0.00017292881987508656,
      "loss": 3.0772,
      "step": 147279
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.163024425506592,
      "learning_rate": 0.00017292511439759086,
      "loss": 2.8109,
      "step": 147280
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.10323166847229,
      "learning_rate": 0.0001729214089437207,
      "loss": 2.8881,
      "step": 147281
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.32663893699646,
      "learning_rate": 0.0001729177035134769,
      "loss": 3.0007,
      "step": 147282
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2793540954589844,
      "learning_rate": 0.00017291399810686,
      "loss": 3.4493,
      "step": 147283
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3799362182617188,
      "learning_rate": 0.00017291029272387094,
      "loss": 2.8657,
      "step": 147284
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.4703891277313232,
      "learning_rate": 0.0001729065873645102,
      "loss": 3.0228,
      "step": 147285
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.6015565395355225,
      "learning_rate": 0.00017290288202877855,
      "loss": 2.9409,
      "step": 147286
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.8036553859710693,
      "learning_rate": 0.00017289917671667654,
      "loss": 2.9594,
      "step": 147287
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.45706844329834,
      "learning_rate": 0.0001728954714282051,
      "loss": 2.7942,
      "step": 147288
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.684391975402832,
      "learning_rate": 0.00017289176616336468,
      "loss": 2.9838,
      "step": 147289
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5730299949645996,
      "learning_rate": 0.00017288806092215616,
      "loss": 2.9328,
      "step": 147290
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.978595018386841,
      "learning_rate": 0.0001728843557045802,
      "loss": 2.7972,
      "step": 147291
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.571554183959961,
      "learning_rate": 0.00017288065051063742,
      "loss": 2.9302,
      "step": 147292
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.877549648284912,
      "learning_rate": 0.0001728769453403284,
      "loss": 2.9904,
      "step": 147293
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.8906726837158203,
      "learning_rate": 0.00017287324019365404,
      "loss": 2.8789,
      "step": 147294
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.968994617462158,
      "learning_rate": 0.00017286953507061488,
      "loss": 2.8871,
      "step": 147295
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0685677528381348,
      "learning_rate": 0.00017286582997121176,
      "loss": 2.9355,
      "step": 147296
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.089708089828491,
      "learning_rate": 0.00017286212489544528,
      "loss": 2.8445,
      "step": 147297
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.492396116256714,
      "learning_rate": 0.00017285841984331616,
      "loss": 2.9331,
      "step": 147298
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3918704986572266,
      "learning_rate": 0.00017285471481482488,
      "loss": 3.0255,
      "step": 147299
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7991299629211426,
      "learning_rate": 0.0001728510098099725,
      "loss": 3.0029,
      "step": 147300
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.4028713703155518,
      "learning_rate": 0.00017284730482875936,
      "loss": 3.1294,
      "step": 147301
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.0100858211517334,
      "learning_rate": 0.00017284359987118644,
      "loss": 2.9389,
      "step": 147302
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7432894706726074,
      "learning_rate": 0.00017283989493725428,
      "loss": 2.8553,
      "step": 147303
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.026211977005005,
      "learning_rate": 0.00017283619002696348,
      "loss": 2.904,
      "step": 147304
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7229812145233154,
      "learning_rate": 0.00017283248514031493,
      "loss": 3.1079,
      "step": 147305
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3529186248779297,
      "learning_rate": 0.00017282878027730924,
      "loss": 3.0809,
      "step": 147306
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.650684356689453,
      "learning_rate": 0.000172825075437947,
      "loss": 2.9592,
      "step": 147307
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1180412769317627,
      "learning_rate": 0.00017282137062222904,
      "loss": 3.1445,
      "step": 147308
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6750149726867676,
      "learning_rate": 0.000172817665830156,
      "loss": 2.6521,
      "step": 147309
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5064072608947754,
      "learning_rate": 0.00017281396106172842,
      "loss": 2.7772,
      "step": 147310
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9778214693069458,
      "learning_rate": 0.00017281025631694726,
      "loss": 3.2137,
      "step": 147311
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3757688999176025,
      "learning_rate": 0.0001728065515958131,
      "loss": 2.8755,
      "step": 147312
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4134621620178223,
      "learning_rate": 0.00017280284689832648,
      "loss": 3.0509,
      "step": 147313
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.507935047149658,
      "learning_rate": 0.00017279914222448833,
      "loss": 2.9822,
      "step": 147314
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6086695194244385,
      "learning_rate": 0.00017279543757429917,
      "loss": 2.8764,
      "step": 147315
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.603142023086548,
      "learning_rate": 0.00017279173294775966,
      "loss": 3.0204,
      "step": 147316
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.188765287399292,
      "learning_rate": 0.00017278802834487072,
      "loss": 3.0394,
      "step": 147317
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.9690496921539307,
      "learning_rate": 0.0001727843237656327,
      "loss": 2.9413,
      "step": 147318
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9837777614593506,
      "learning_rate": 0.00017278061921004664,
      "loss": 2.9874,
      "step": 147319
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.804092526435852,
      "learning_rate": 0.00017277691467811304,
      "loss": 2.6823,
      "step": 147320
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7535133361816406,
      "learning_rate": 0.00017277321016983264,
      "loss": 2.9848,
      "step": 147321
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.933290958404541,
      "learning_rate": 0.00017276950568520595,
      "loss": 2.8142,
      "step": 147322
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7266900539398193,
      "learning_rate": 0.00017276580122423392,
      "loss": 3.0835,
      "step": 147323
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2314999103546143,
      "learning_rate": 0.00017276209678691705,
      "loss": 2.8186,
      "step": 147324
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.343662977218628,
      "learning_rate": 0.00017275839237325622,
      "loss": 2.9883,
      "step": 147325
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.924656629562378,
      "learning_rate": 0.00017275468798325186,
      "loss": 3.0262,
      "step": 147326
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1120848655700684,
      "learning_rate": 0.00017275098361690507,
      "loss": 2.8582,
      "step": 147327
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0028958320617676,
      "learning_rate": 0.00017274727927421604,
      "loss": 3.1755,
      "step": 147328
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1295413970947266,
      "learning_rate": 0.00017274357495518583,
      "loss": 2.8524,
      "step": 147329
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2053186893463135,
      "learning_rate": 0.00017273987065981482,
      "loss": 2.7193,
      "step": 147330
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1171956062316895,
      "learning_rate": 0.00017273616638810403,
      "loss": 2.8118,
      "step": 147331
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.648876667022705,
      "learning_rate": 0.00017273246214005386,
      "loss": 3.0739,
      "step": 147332
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9789484739303589,
      "learning_rate": 0.00017272875791566524,
      "loss": 2.9257,
      "step": 147333
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.348936080932617,
      "learning_rate": 0.00017272505371493873,
      "loss": 2.5458,
      "step": 147334
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7282559871673584,
      "learning_rate": 0.00017272134953787507,
      "loss": 3.1424,
      "step": 147335
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5797922611236572,
      "learning_rate": 0.00017271764538447475,
      "loss": 3.0242,
      "step": 147336
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0716099739074707,
      "learning_rate": 0.0001727139412547388,
      "loss": 2.9897,
      "step": 147337
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2042646408081055,
      "learning_rate": 0.00017271023714866756,
      "loss": 2.9762,
      "step": 147338
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9345837831497192,
      "learning_rate": 0.000172706533066262,
      "loss": 2.9364,
      "step": 147339
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.8212053775787354,
      "learning_rate": 0.00017270282900752273,
      "loss": 3.0893,
      "step": 147340
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.460639476776123,
      "learning_rate": 0.0001726991249724504,
      "loss": 3.293,
      "step": 147341
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.608149290084839,
      "learning_rate": 0.00017269542096104562,
      "loss": 2.9259,
      "step": 147342
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9391727447509766,
      "learning_rate": 0.00017269171697330924,
      "loss": 3.0721,
      "step": 147343
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0268568992614746,
      "learning_rate": 0.00017268801300924177,
      "loss": 2.8385,
      "step": 147344
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6395959854125977,
      "learning_rate": 0.00017268430906884413,
      "loss": 2.863,
      "step": 147345
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7197623252868652,
      "learning_rate": 0.00017268060515211687,
      "loss": 2.9393,
      "step": 147346
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5209147930145264,
      "learning_rate": 0.00017267690125906068,
      "loss": 2.6596,
      "step": 147347
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.0305943489074707,
      "learning_rate": 0.00017267319738967614,
      "loss": 3.1049,
      "step": 147348
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.357903003692627,
      "learning_rate": 0.00017266949354396418,
      "loss": 2.7552,
      "step": 147349
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.819453477859497,
      "learning_rate": 0.00017266578972192524,
      "loss": 3.008,
      "step": 147350
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6829638481140137,
      "learning_rate": 0.00017266208592356028,
      "loss": 2.7544,
      "step": 147351
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3202335834503174,
      "learning_rate": 0.00017265838214886983,
      "loss": 2.7031,
      "step": 147352
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.0947160720825195,
      "learning_rate": 0.00017265467839785455,
      "loss": 2.8269,
      "step": 147353
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.164137125015259,
      "learning_rate": 0.0001726509746705151,
      "loss": 3.0588,
      "step": 147354
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.3690035343170166,
      "learning_rate": 0.00017264727096685237,
      "loss": 2.9401,
      "step": 147355
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.608553886413574,
      "learning_rate": 0.00017264356728686673,
      "loss": 2.7954,
      "step": 147356
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4746954441070557,
      "learning_rate": 0.00017263986363055922,
      "loss": 2.9947,
      "step": 147357
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.909923791885376,
      "learning_rate": 0.00017263615999793034,
      "loss": 3.0215,
      "step": 147358
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.832235097885132,
      "learning_rate": 0.00017263245638898082,
      "loss": 2.8079,
      "step": 147359
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.9001657962799072,
      "learning_rate": 0.0001726287528037112,
      "loss": 2.9832,
      "step": 147360
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2399563789367676,
      "learning_rate": 0.00017262504924212243,
      "loss": 3.1098,
      "step": 147361
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.2026116847991943,
      "learning_rate": 0.00017262134570421496,
      "loss": 2.6856,
      "step": 147362
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.005007743835449,
      "learning_rate": 0.00017261764218998965,
      "loss": 2.9416,
      "step": 147363
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3500237464904785,
      "learning_rate": 0.00017261393869944717,
      "loss": 3.0674,
      "step": 147364
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.990668773651123,
      "learning_rate": 0.00017261023523258817,
      "loss": 2.9031,
      "step": 147365
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.888216733932495,
      "learning_rate": 0.0001726065317894132,
      "loss": 2.9872,
      "step": 147366
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.8050477504730225,
      "learning_rate": 0.0001726028283699232,
      "loss": 2.8638,
      "step": 147367
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.5485684871673584,
      "learning_rate": 0.0001725991249741186,
      "loss": 3.063,
      "step": 147368
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.8366880416870117,
      "learning_rate": 0.00017259542160200038,
      "loss": 3.1055,
      "step": 147369
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2619500160217285,
      "learning_rate": 0.00017259171825356905,
      "loss": 3.15,
      "step": 147370
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3551106452941895,
      "learning_rate": 0.00017258801492882537,
      "loss": 3.0804,
      "step": 147371
    },
    {
      "epoch": 1.92,
      "grad_norm": 6.804391384124756,
      "learning_rate": 0.0001725843116277698,
      "loss": 2.9533,
      "step": 147372
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.784585952758789,
      "learning_rate": 0.00017258060835040337,
      "loss": 3.0089,
      "step": 147373
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.927671432495117,
      "learning_rate": 0.0001725769050967265,
      "loss": 2.8273,
      "step": 147374
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3238723278045654,
      "learning_rate": 0.00017257320186674007,
      "loss": 3.0781,
      "step": 147375
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.019282102584839,
      "learning_rate": 0.00017256949866044472,
      "loss": 2.9564,
      "step": 147376
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1988534927368164,
      "learning_rate": 0.0001725657954778411,
      "loss": 3.0265,
      "step": 147377
    },
    {
      "epoch": 1.92,
      "grad_norm": 5.058777332305908,
      "learning_rate": 0.00017256209231892976,
      "loss": 3.0161,
      "step": 147378
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.8308677673339844,
      "learning_rate": 0.0001725583891837117,
      "loss": 2.985,
      "step": 147379
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.186934232711792,
      "learning_rate": 0.00017255468607218726,
      "loss": 3.1488,
      "step": 147380
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.197275161743164,
      "learning_rate": 0.00017255098298435748,
      "loss": 3.1065,
      "step": 147381
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.421173095703125,
      "learning_rate": 0.00017254727992022285,
      "loss": 2.9285,
      "step": 147382
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.620615482330322,
      "learning_rate": 0.00017254357687978409,
      "loss": 2.7974,
      "step": 147383
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.626909017562866,
      "learning_rate": 0.00017253987386304175,
      "loss": 3.1459,
      "step": 147384
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.248631477355957,
      "learning_rate": 0.00017253617086999682,
      "loss": 2.9449,
      "step": 147385
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0793280601501465,
      "learning_rate": 0.00017253246790064968,
      "loss": 2.8495,
      "step": 147386
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.208451271057129,
      "learning_rate": 0.00017252876495500125,
      "loss": 2.9305,
      "step": 147387
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.416627883911133,
      "learning_rate": 0.00017252506203305218,
      "loss": 2.9653,
      "step": 147388
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3299379348754883,
      "learning_rate": 0.00017252135913480308,
      "loss": 2.8044,
      "step": 147389
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1673529148101807,
      "learning_rate": 0.00017251765626025454,
      "loss": 3.0504,
      "step": 147390
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3292641639709473,
      "learning_rate": 0.0001725139534094075,
      "loss": 2.8162,
      "step": 147391
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3759515285491943,
      "learning_rate": 0.00017251025058226242,
      "loss": 3.1352,
      "step": 147392
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.201084852218628,
      "learning_rate": 0.0001725065477788202,
      "loss": 2.7373,
      "step": 147393
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0114779472351074,
      "learning_rate": 0.00017250284499908142,
      "loss": 2.9253,
      "step": 147394
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6727733612060547,
      "learning_rate": 0.00017249914224304664,
      "loss": 2.913,
      "step": 147395
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.1350080966949463,
      "learning_rate": 0.00017249543951071677,
      "loss": 2.7704,
      "step": 147396
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4123311042785645,
      "learning_rate": 0.00017249173680209244,
      "loss": 2.8761,
      "step": 147397
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.9911043643951416,
      "learning_rate": 0.00017248803411717417,
      "loss": 3.0419,
      "step": 147398
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.210536003112793,
      "learning_rate": 0.00017248433145596295,
      "loss": 2.5629,
      "step": 147399
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2477424144744873,
      "learning_rate": 0.0001724806288184591,
      "loss": 2.6817,
      "step": 147400
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1609654426574707,
      "learning_rate": 0.00017247692620466369,
      "loss": 3.0931,
      "step": 147401
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.30991792678833,
      "learning_rate": 0.00017247322361457719,
      "loss": 3.0431,
      "step": 147402
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.9304709434509277,
      "learning_rate": 0.0001724695210482002,
      "loss": 3.061,
      "step": 147403
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1663029193878174,
      "learning_rate": 0.0001724658185055337,
      "loss": 3.0042,
      "step": 147404
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.9078800678253174,
      "learning_rate": 0.00017246211598657816,
      "loss": 2.8648,
      "step": 147405
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.417147159576416,
      "learning_rate": 0.00017245841349133423,
      "loss": 3.0835,
      "step": 147406
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.194344997406006,
      "learning_rate": 0.0001724547110198028,
      "loss": 2.8018,
      "step": 147407
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.8762534856796265,
      "learning_rate": 0.00017245100857198444,
      "loss": 2.9153,
      "step": 147408
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.566908836364746,
      "learning_rate": 0.00017244730614787975,
      "loss": 2.608,
      "step": 147409
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.21128511428833,
      "learning_rate": 0.0001724436037474896,
      "loss": 2.5349,
      "step": 147410
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.184487819671631,
      "learning_rate": 0.00017243990137081464,
      "loss": 2.9576,
      "step": 147411
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3812108039855957,
      "learning_rate": 0.00017243619901785535,
      "loss": 2.7083,
      "step": 147412
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0474226474761963,
      "learning_rate": 0.0001724324966886127,
      "loss": 2.9406,
      "step": 147413
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.158601999282837,
      "learning_rate": 0.00017242879438308727,
      "loss": 2.8908,
      "step": 147414
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2148945331573486,
      "learning_rate": 0.00017242509210127962,
      "loss": 2.9362,
      "step": 147415
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1632983684539795,
      "learning_rate": 0.00017242138984319066,
      "loss": 3.0532,
      "step": 147416
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.974231719970703,
      "learning_rate": 0.00017241768760882083,
      "loss": 3.1393,
      "step": 147417
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1545796394348145,
      "learning_rate": 0.00017241398539817113,
      "loss": 3.047,
      "step": 147418
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.012101411819458,
      "learning_rate": 0.00017241028321124204,
      "loss": 2.8771,
      "step": 147419
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6951138973236084,
      "learning_rate": 0.00017240658104803432,
      "loss": 2.9294,
      "step": 147420
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0311472415924072,
      "learning_rate": 0.00017240287890854847,
      "loss": 2.8426,
      "step": 147421
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.250638246536255,
      "learning_rate": 0.00017239917679278546,
      "loss": 2.8913,
      "step": 147422
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.091832160949707,
      "learning_rate": 0.00017239547470074572,
      "loss": 3.1934,
      "step": 147423
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6212151050567627,
      "learning_rate": 0.0001723917726324302,
      "loss": 2.9897,
      "step": 147424
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5797102451324463,
      "learning_rate": 0.00017238807058783946,
      "loss": 2.6553,
      "step": 147425
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9107019901275635,
      "learning_rate": 0.00017238436856697423,
      "loss": 2.8203,
      "step": 147426
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0935287475585938,
      "learning_rate": 0.000172380666569835,
      "loss": 3.0206,
      "step": 147427
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.652984619140625,
      "learning_rate": 0.00017237696459642277,
      "loss": 3.0145,
      "step": 147428
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1993653774261475,
      "learning_rate": 0.0001723732626467379,
      "loss": 3.0305,
      "step": 147429
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.0700581073760986,
      "learning_rate": 0.00017236956072078137,
      "loss": 2.8987,
      "step": 147430
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5013084411621094,
      "learning_rate": 0.00017236585881855377,
      "loss": 3.0889,
      "step": 147431
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2282557487487793,
      "learning_rate": 0.00017236215694005575,
      "loss": 2.951,
      "step": 147432
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.2534873485565186,
      "learning_rate": 0.0001723584550852879,
      "loss": 2.8225,
      "step": 147433
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2817978858947754,
      "learning_rate": 0.00017235475325425113,
      "loss": 3.0051,
      "step": 147434
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.19569730758667,
      "learning_rate": 0.00017235105144694594,
      "loss": 2.997,
      "step": 147435
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.0829970836639404,
      "learning_rate": 0.0001723473496633732,
      "loss": 2.9215,
      "step": 147436
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.321589708328247,
      "learning_rate": 0.0001723436479035335,
      "loss": 3.0493,
      "step": 147437
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.1195027828216553,
      "learning_rate": 0.00017233994616742748,
      "loss": 3.0303,
      "step": 147438
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.0252833366394043,
      "learning_rate": 0.00017233624445505582,
      "loss": 3.0403,
      "step": 147439
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.337918519973755,
      "learning_rate": 0.00017233254276641935,
      "loss": 2.9308,
      "step": 147440
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.651855707168579,
      "learning_rate": 0.00017232884110151853,
      "loss": 2.9643,
      "step": 147441
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.207319498062134,
      "learning_rate": 0.00017232513946035434,
      "loss": 3.1023,
      "step": 147442
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1096251010894775,
      "learning_rate": 0.00017232143784292733,
      "loss": 3.2309,
      "step": 147443
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.004758596420288,
      "learning_rate": 0.00017231773624923815,
      "loss": 2.8828,
      "step": 147444
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.149818181991577,
      "learning_rate": 0.0001723140346792874,
      "loss": 2.8464,
      "step": 147445
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.564005374908447,
      "learning_rate": 0.00017231033313307598,
      "loss": 2.8255,
      "step": 147446
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.294445514678955,
      "learning_rate": 0.0001723066316106044,
      "loss": 2.7807,
      "step": 147447
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.663702964782715,
      "learning_rate": 0.00017230293011187353,
      "loss": 2.85,
      "step": 147448
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.465583562850952,
      "learning_rate": 0.00017229922863688395,
      "loss": 2.8191,
      "step": 147449
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3840982913970947,
      "learning_rate": 0.00017229552718563633,
      "loss": 3.0906,
      "step": 147450
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2179322242736816,
      "learning_rate": 0.0001722918257581313,
      "loss": 2.905,
      "step": 147451
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.115323305130005,
      "learning_rate": 0.00017228812435436973,
      "loss": 3.0254,
      "step": 147452
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3994812965393066,
      "learning_rate": 0.0001722844229743521,
      "loss": 3.0147,
      "step": 147453
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4056618213653564,
      "learning_rate": 0.0001722807216180793,
      "loss": 2.8537,
      "step": 147454
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1503477096557617,
      "learning_rate": 0.00017227702028555194,
      "loss": 2.9068,
      "step": 147455
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2992587089538574,
      "learning_rate": 0.00017227331897677075,
      "loss": 2.9304,
      "step": 147456
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.198300361633301,
      "learning_rate": 0.00017226961769173615,
      "loss": 2.8674,
      "step": 147457
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.154557704925537,
      "learning_rate": 0.00017226591643044922,
      "loss": 3.0898,
      "step": 147458
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.144724130630493,
      "learning_rate": 0.0001722622151929103,
      "loss": 3.1972,
      "step": 147459
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.793056011199951,
      "learning_rate": 0.00017225851397912042,
      "loss": 2.7545,
      "step": 147460
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.795210123062134,
      "learning_rate": 0.00017225481278908007,
      "loss": 3.1317,
      "step": 147461
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7834348678588867,
      "learning_rate": 0.00017225111162278995,
      "loss": 3.1646,
      "step": 147462
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.282382011413574,
      "learning_rate": 0.00017224741048025064,
      "loss": 3.0864,
      "step": 147463
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0481486320495605,
      "learning_rate": 0.00017224370936146305,
      "loss": 3.0049,
      "step": 147464
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6828856468200684,
      "learning_rate": 0.0001722400082664277,
      "loss": 3.0566,
      "step": 147465
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.6589243412017822,
      "learning_rate": 0.00017223630719514542,
      "loss": 2.9368,
      "step": 147466
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.180457353591919,
      "learning_rate": 0.00017223260614761672,
      "loss": 2.9961,
      "step": 147467
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.624073028564453,
      "learning_rate": 0.00017222890512384268,
      "loss": 3.047,
      "step": 147468
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5680465698242188,
      "learning_rate": 0.00017222520412382334,
      "loss": 3.0596,
      "step": 147469
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7782504558563232,
      "learning_rate": 0.00017222150314755996,
      "loss": 3.2117,
      "step": 147470
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.455134153366089,
      "learning_rate": 0.00017221780219505288,
      "loss": 2.9798,
      "step": 147471
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.082394599914551,
      "learning_rate": 0.00017221410126630303,
      "loss": 2.8498,
      "step": 147472
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0712502002716064,
      "learning_rate": 0.00017221040036131084,
      "loss": 2.9039,
      "step": 147473
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3480989933013916,
      "learning_rate": 0.00017220669948007743,
      "loss": 2.9081,
      "step": 147474
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0456535816192627,
      "learning_rate": 0.00017220299862260297,
      "loss": 2.8295,
      "step": 147475
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.4135453701019287,
      "learning_rate": 0.0001721992977888885,
      "loss": 2.7804,
      "step": 147476
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4488821029663086,
      "learning_rate": 0.00017219559697893445,
      "loss": 3.0101,
      "step": 147477
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.11169171333313,
      "learning_rate": 0.0001721918961927418,
      "loss": 2.8312,
      "step": 147478
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6579647064208984,
      "learning_rate": 0.00017218819543031092,
      "loss": 3.064,
      "step": 147479
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1623623371124268,
      "learning_rate": 0.00017218449469164285,
      "loss": 2.9877,
      "step": 147480
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.214094400405884,
      "learning_rate": 0.00017218079397673806,
      "loss": 2.7209,
      "step": 147481
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3830149173736572,
      "learning_rate": 0.0001721770932855973,
      "loss": 3.0427,
      "step": 147482
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9036086797714233,
      "learning_rate": 0.0001721733926182211,
      "loss": 3.0357,
      "step": 147483
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6011126041412354,
      "learning_rate": 0.00017216969197461035,
      "loss": 2.8016,
      "step": 147484
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.1259708404541016,
      "learning_rate": 0.0001721659913547656,
      "loss": 3.0687,
      "step": 147485
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.687654733657837,
      "learning_rate": 0.00017216229075868773,
      "loss": 2.7818,
      "step": 147486
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4033353328704834,
      "learning_rate": 0.0001721585901863773,
      "loss": 3.158,
      "step": 147487
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.632615327835083,
      "learning_rate": 0.00017215488963783483,
      "loss": 3.1061,
      "step": 147488
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3442227840423584,
      "learning_rate": 0.00017215118911306136,
      "loss": 2.7611,
      "step": 147489
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.970881223678589,
      "learning_rate": 0.00017214748861205738,
      "loss": 2.7883,
      "step": 147490
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.273324489593506,
      "learning_rate": 0.00017214378813482344,
      "loss": 2.8782,
      "step": 147491
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.5566139221191406,
      "learning_rate": 0.00017214008768136056,
      "loss": 3.1439,
      "step": 147492
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3393757343292236,
      "learning_rate": 0.00017213638725166921,
      "loss": 2.9524,
      "step": 147493
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1989479064941406,
      "learning_rate": 0.00017213268684575003,
      "loss": 3.0747,
      "step": 147494
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4416897296905518,
      "learning_rate": 0.00017212898646360388,
      "loss": 3.1889,
      "step": 147495
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1470539569854736,
      "learning_rate": 0.00017212528610523138,
      "loss": 3.0559,
      "step": 147496
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.609797477722168,
      "learning_rate": 0.00017212158577063308,
      "loss": 2.7477,
      "step": 147497
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.299347162246704,
      "learning_rate": 0.00017211788545980994,
      "loss": 3.0189,
      "step": 147498
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9658269882202148,
      "learning_rate": 0.0001721141851727625,
      "loss": 2.8276,
      "step": 147499
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2257423400878906,
      "learning_rate": 0.00017211048490949128,
      "loss": 3.1961,
      "step": 147500
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0968480110168457,
      "learning_rate": 0.0001721067846699973,
      "loss": 3.0971,
      "step": 147501
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1977217197418213,
      "learning_rate": 0.00017210308445428095,
      "loss": 2.9776,
      "step": 147502
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7281806468963623,
      "learning_rate": 0.00017209938426234316,
      "loss": 2.6374,
      "step": 147503
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1344056129455566,
      "learning_rate": 0.0001720956840941845,
      "loss": 2.9313,
      "step": 147504
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.249990463256836,
      "learning_rate": 0.00017209198394980567,
      "loss": 3.0175,
      "step": 147505
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2700304985046387,
      "learning_rate": 0.00017208828382920724,
      "loss": 2.911,
      "step": 147506
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4642131328582764,
      "learning_rate": 0.00017208458373239014,
      "loss": 2.9541,
      "step": 147507
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.266361951828003,
      "learning_rate": 0.0001720808836593548,
      "loss": 2.8527,
      "step": 147508
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1994681358337402,
      "learning_rate": 0.0001720771836101022,
      "loss": 2.8787,
      "step": 147509
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.673301935195923,
      "learning_rate": 0.0001720734835846328,
      "loss": 3.0431,
      "step": 147510
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.238698959350586,
      "learning_rate": 0.0001720697835829474,
      "loss": 2.8463,
      "step": 147511
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4400033950805664,
      "learning_rate": 0.0001720660836050465,
      "loss": 3.132,
      "step": 147512
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.14973521232605,
      "learning_rate": 0.00017206238365093105,
      "loss": 3.137,
      "step": 147513
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9605169296264648,
      "learning_rate": 0.00017205868372060154,
      "loss": 2.9029,
      "step": 147514
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5373029708862305,
      "learning_rate": 0.0001720549838140588,
      "loss": 2.7662,
      "step": 147515
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.533926486968994,
      "learning_rate": 0.0001720512839313035,
      "loss": 2.8065,
      "step": 147516
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3885912895202637,
      "learning_rate": 0.00017204758407233622,
      "loss": 3.0574,
      "step": 147517
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.428372621536255,
      "learning_rate": 0.00017204388423715768,
      "loss": 3.0415,
      "step": 147518
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.288489580154419,
      "learning_rate": 0.00017204018442576862,
      "loss": 3.0776,
      "step": 147519
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1516788005828857,
      "learning_rate": 0.00017203648463816966,
      "loss": 2.7198,
      "step": 147520
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5168616771698,
      "learning_rate": 0.0001720327848743616,
      "loss": 2.9968,
      "step": 147521
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.045741081237793,
      "learning_rate": 0.00017202908513434508,
      "loss": 3.103,
      "step": 147522
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2438104152679443,
      "learning_rate": 0.00017202538541812078,
      "loss": 2.9916,
      "step": 147523
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4831063747406006,
      "learning_rate": 0.00017202168572568927,
      "loss": 3.0217,
      "step": 147524
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5072343349456787,
      "learning_rate": 0.00017201798605705142,
      "loss": 2.8962,
      "step": 147525
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3213655948638916,
      "learning_rate": 0.00017201428641220774,
      "loss": 3.1558,
      "step": 147526
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.272374391555786,
      "learning_rate": 0.00017201058679115915,
      "loss": 3.2024,
      "step": 147527
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3255655765533447,
      "learning_rate": 0.0001720068871939062,
      "loss": 2.9735,
      "step": 147528
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0963633060455322,
      "learning_rate": 0.00017200318762044959,
      "loss": 2.9205,
      "step": 147529
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9297035932540894,
      "learning_rate": 0.00017199948807078988,
      "loss": 3.1666,
      "step": 147530
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.790036916732788,
      "learning_rate": 0.000171995788544928,
      "loss": 2.9526,
      "step": 147531
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.206907272338867,
      "learning_rate": 0.00017199208904286438,
      "loss": 3.0438,
      "step": 147532
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.778015613555908,
      "learning_rate": 0.00017198838956460001,
      "loss": 3.0657,
      "step": 147533
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.880821466445923,
      "learning_rate": 0.0001719846901101353,
      "loss": 2.7149,
      "step": 147534
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7497048377990723,
      "learning_rate": 0.00017198099067947125,
      "loss": 3.1682,
      "step": 147535
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.470463752746582,
      "learning_rate": 0.0001719772912726081,
      "loss": 2.7209,
      "step": 147536
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.922029733657837,
      "learning_rate": 0.00017197359188954697,
      "loss": 2.7439,
      "step": 147537
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.815894365310669,
      "learning_rate": 0.0001719698925302882,
      "loss": 3.0149,
      "step": 147538
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.243154764175415,
      "learning_rate": 0.0001719661931948328,
      "loss": 3.1544,
      "step": 147539
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.783893585205078,
      "learning_rate": 0.00017196249388318113,
      "loss": 2.7882,
      "step": 147540
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.213768243789673,
      "learning_rate": 0.00017195879459533435,
      "loss": 2.9214,
      "step": 147541
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.847578287124634,
      "learning_rate": 0.00017195509533129253,
      "loss": 2.9158,
      "step": 147542
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7525811195373535,
      "learning_rate": 0.00017195139609105687,
      "loss": 3.0475,
      "step": 147543
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4793405532836914,
      "learning_rate": 0.00017194769687462768,
      "loss": 2.8734,
      "step": 147544
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.4318623542785645,
      "learning_rate": 0.000171943997682006,
      "loss": 2.953,
      "step": 147545
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3964757919311523,
      "learning_rate": 0.0001719402985131922,
      "loss": 2.8849,
      "step": 147546
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.195174694061279,
      "learning_rate": 0.00017193659936818735,
      "loss": 2.9621,
      "step": 147547
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.074759006500244,
      "learning_rate": 0.0001719329002469917,
      "loss": 2.9517,
      "step": 147548
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5377001762390137,
      "learning_rate": 0.00017192920114960628,
      "loss": 2.9895,
      "step": 147549
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.900820016860962,
      "learning_rate": 0.00017192550207603143,
      "loss": 2.8999,
      "step": 147550
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.1765472888946533,
      "learning_rate": 0.0001719218030262682,
      "loss": 2.8194,
      "step": 147551
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.108884572982788,
      "learning_rate": 0.00017191810400031706,
      "loss": 2.9986,
      "step": 147552
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.1430447101593018,
      "learning_rate": 0.00017191440499817896,
      "loss": 2.7843,
      "step": 147553
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1032845973968506,
      "learning_rate": 0.00017191070601985415,
      "loss": 3.0394,
      "step": 147554
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.021029233932495,
      "learning_rate": 0.00017190700706534365,
      "loss": 2.8689,
      "step": 147555
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0185670852661133,
      "learning_rate": 0.00017190330813464798,
      "loss": 2.8339,
      "step": 147556
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1476027965545654,
      "learning_rate": 0.000171899609227768,
      "loss": 3.0008,
      "step": 147557
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1921918392181396,
      "learning_rate": 0.00017189591034470422,
      "loss": 3.058,
      "step": 147558
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.923983097076416,
      "learning_rate": 0.0001718922114854576,
      "loss": 3.061,
      "step": 147559
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2301270961761475,
      "learning_rate": 0.00017188851265002842,
      "loss": 3.071,
      "step": 147560
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.690063714981079,
      "learning_rate": 0.0001718848138384177,
      "loss": 2.7967,
      "step": 147561
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4897561073303223,
      "learning_rate": 0.00017188111505062588,
      "loss": 3.0418,
      "step": 147562
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1503076553344727,
      "learning_rate": 0.0001718774162866539,
      "loss": 3.282,
      "step": 147563
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0350019931793213,
      "learning_rate": 0.0001718737175465022,
      "loss": 3.103,
      "step": 147564
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.698755264282227,
      "learning_rate": 0.00017187001883017174,
      "loss": 3.01,
      "step": 147565
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0468854904174805,
      "learning_rate": 0.00017186632013766305,
      "loss": 2.8819,
      "step": 147566
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.496432304382324,
      "learning_rate": 0.00017186262146897684,
      "loss": 3.0635,
      "step": 147567
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.450066089630127,
      "learning_rate": 0.00017185892282411364,
      "loss": 2.8471,
      "step": 147568
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.0496811866760254,
      "learning_rate": 0.00017185522420307445,
      "loss": 3.0105,
      "step": 147569
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9996178150177002,
      "learning_rate": 0.0001718515256058596,
      "loss": 3.121,
      "step": 147570
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3635165691375732,
      "learning_rate": 0.0001718478270324702,
      "loss": 2.8977,
      "step": 147571
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3647117614746094,
      "learning_rate": 0.00017184412848290662,
      "loss": 2.8524,
      "step": 147572
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2923200130462646,
      "learning_rate": 0.00017184042995716965,
      "loss": 3.1216,
      "step": 147573
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3115217685699463,
      "learning_rate": 0.00017183673145525986,
      "loss": 2.9997,
      "step": 147574
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9438060522079468,
      "learning_rate": 0.00017183303297717815,
      "loss": 2.9611,
      "step": 147575
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.472841262817383,
      "learning_rate": 0.000171829334522925,
      "loss": 2.9434,
      "step": 147576
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.895432949066162,
      "learning_rate": 0.00017182563609250128,
      "loss": 2.9735,
      "step": 147577
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2510530948638916,
      "learning_rate": 0.00017182193768590763,
      "loss": 2.8896,
      "step": 147578
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7134459018707275,
      "learning_rate": 0.00017181823930314456,
      "loss": 2.8548,
      "step": 147579
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.219104528427124,
      "learning_rate": 0.00017181454094421304,
      "loss": 3.2111,
      "step": 147580
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.0016868114471436,
      "learning_rate": 0.0001718108426091136,
      "loss": 2.9177,
      "step": 147581
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6196277141571045,
      "learning_rate": 0.0001718071442978468,
      "loss": 2.929,
      "step": 147582
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1249539852142334,
      "learning_rate": 0.0001718034460104136,
      "loss": 3.1423,
      "step": 147583
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2575466632843018,
      "learning_rate": 0.00017179974774681457,
      "loss": 2.9881,
      "step": 147584
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0505027770996094,
      "learning_rate": 0.00017179604950705028,
      "loss": 3.0512,
      "step": 147585
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.060166358947754,
      "learning_rate": 0.00017179235129112166,
      "loss": 3.0142,
      "step": 147586
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.761624336242676,
      "learning_rate": 0.0001717886530990291,
      "loss": 2.9174,
      "step": 147587
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.536590099334717,
      "learning_rate": 0.0001717849549307736,
      "loss": 2.8064,
      "step": 147588
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2347888946533203,
      "learning_rate": 0.00017178125678635572,
      "loss": 2.9728,
      "step": 147589
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.9598934650421143,
      "learning_rate": 0.00017177755866577607,
      "loss": 2.8685,
      "step": 147590
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.216153860092163,
      "learning_rate": 0.00017177386056903526,
      "loss": 3.0077,
      "step": 147591
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0606679916381836,
      "learning_rate": 0.00017177016249613427,
      "loss": 2.9368,
      "step": 147592
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5635626316070557,
      "learning_rate": 0.0001717664644470735,
      "loss": 3.0118,
      "step": 147593
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.9252161979675293,
      "learning_rate": 0.0001717627664218539,
      "loss": 2.9439,
      "step": 147594
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.232267379760742,
      "learning_rate": 0.000171759068420476,
      "loss": 2.9493,
      "step": 147595
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9961313009262085,
      "learning_rate": 0.0001717553704429405,
      "loss": 3.0273,
      "step": 147596
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.407297372817993,
      "learning_rate": 0.000171751672489248,
      "loss": 3.0624,
      "step": 147597
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.216068744659424,
      "learning_rate": 0.00017174797455939942,
      "loss": 2.8782,
      "step": 147598
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2295873165130615,
      "learning_rate": 0.00017174427665339514,
      "loss": 2.9289,
      "step": 147599
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.219534397125244,
      "learning_rate": 0.00017174057877123618,
      "loss": 3.0569,
      "step": 147600
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.237257957458496,
      "learning_rate": 0.00017173688091292293,
      "loss": 2.9846,
      "step": 147601
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.9084677696228027,
      "learning_rate": 0.00017173318307845648,
      "loss": 2.8099,
      "step": 147602
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0688350200653076,
      "learning_rate": 0.000171729485267837,
      "loss": 3.3117,
      "step": 147603
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.046220541000366,
      "learning_rate": 0.00017172578748106556,
      "loss": 2.7955,
      "step": 147604
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.471759557723999,
      "learning_rate": 0.00017172208971814258,
      "loss": 2.9057,
      "step": 147605
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4547011852264404,
      "learning_rate": 0.00017171839197906898,
      "loss": 2.9296,
      "step": 147606
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.3574748039245605,
      "learning_rate": 0.00017171469426384526,
      "loss": 2.9004,
      "step": 147607
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.032029151916504,
      "learning_rate": 0.00017171099657247246,
      "loss": 2.9692,
      "step": 147608
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.491180896759033,
      "learning_rate": 0.00017170729890495076,
      "loss": 3.0376,
      "step": 147609
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.2662274837493896,
      "learning_rate": 0.00017170360126128118,
      "loss": 2.9193,
      "step": 147610
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.841430902481079,
      "learning_rate": 0.00017169990364146424,
      "loss": 3.0093,
      "step": 147611
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0611376762390137,
      "learning_rate": 0.0001716962060455008,
      "loss": 2.9411,
      "step": 147612
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1996090412139893,
      "learning_rate": 0.00017169250847339133,
      "loss": 2.9195,
      "step": 147613
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.3784284591674805,
      "learning_rate": 0.00017168881092513696,
      "loss": 3.0806,
      "step": 147614
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.933720350265503,
      "learning_rate": 0.00017168511340073775,
      "loss": 2.8968,
      "step": 147615
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.4802470207214355,
      "learning_rate": 0.00017168141590019482,
      "loss": 2.9188,
      "step": 147616
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3388054370880127,
      "learning_rate": 0.00017167771842350866,
      "loss": 3.0064,
      "step": 147617
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.507830619812012,
      "learning_rate": 0.0001716740209706801,
      "loss": 3.1813,
      "step": 147618
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.915966033935547,
      "learning_rate": 0.0001716703235417097,
      "loss": 2.8559,
      "step": 147619
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.293013095855713,
      "learning_rate": 0.0001716666261365984,
      "loss": 2.9293,
      "step": 147620
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.538220167160034,
      "learning_rate": 0.0001716629287553465,
      "loss": 3.2368,
      "step": 147621
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0949172973632812,
      "learning_rate": 0.000171659231397955,
      "loss": 2.9566,
      "step": 147622
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.8345067501068115,
      "learning_rate": 0.00017165553406442433,
      "loss": 2.7785,
      "step": 147623
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.4665825366973877,
      "learning_rate": 0.00017165183675475542,
      "loss": 3.0179,
      "step": 147624
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4803929328918457,
      "learning_rate": 0.00017164813946894875,
      "loss": 2.9956,
      "step": 147625
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.595432758331299,
      "learning_rate": 0.00017164444220700537,
      "loss": 2.9119,
      "step": 147626
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.30932879447937,
      "learning_rate": 0.00017164074496892545,
      "loss": 3.0505,
      "step": 147627
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6972837448120117,
      "learning_rate": 0.0001716370477547101,
      "loss": 3.1607,
      "step": 147628
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.816959857940674,
      "learning_rate": 0.0001716333505643597,
      "loss": 2.9007,
      "step": 147629
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4704198837280273,
      "learning_rate": 0.00017162965339787516,
      "loss": 2.9081,
      "step": 147630
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4884932041168213,
      "learning_rate": 0.000171625956255257,
      "loss": 2.8733,
      "step": 147631
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4140846729278564,
      "learning_rate": 0.00017162225913650627,
      "loss": 3.1884,
      "step": 147632
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.237868547439575,
      "learning_rate": 0.00017161856204162313,
      "loss": 2.8038,
      "step": 147633
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1582324504852295,
      "learning_rate": 0.00017161486497060862,
      "loss": 3.1007,
      "step": 147634
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4857776165008545,
      "learning_rate": 0.00017161116792346326,
      "loss": 2.7646,
      "step": 147635
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.040041208267212,
      "learning_rate": 0.0001716074709001879,
      "loss": 3.0407,
      "step": 147636
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.6616976261138916,
      "learning_rate": 0.000171603773900783,
      "loss": 2.9052,
      "step": 147637
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4786994457244873,
      "learning_rate": 0.00017160007692524964,
      "loss": 2.9066,
      "step": 147638
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.0780322551727295,
      "learning_rate": 0.00017159637997358798,
      "loss": 3.0385,
      "step": 147639
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9933760166168213,
      "learning_rate": 0.00017159268304579913,
      "loss": 2.9156,
      "step": 147640
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.575178861618042,
      "learning_rate": 0.00017158898614188348,
      "loss": 2.6045,
      "step": 147641
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.282588481903076,
      "learning_rate": 0.000171585289261842,
      "loss": 3.2559,
      "step": 147642
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.499638319015503,
      "learning_rate": 0.00017158159240567513,
      "loss": 2.6688,
      "step": 147643
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.243043899536133,
      "learning_rate": 0.00017157789557338389,
      "loss": 3.0411,
      "step": 147644
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0986745357513428,
      "learning_rate": 0.00017157419876496845,
      "loss": 3.0279,
      "step": 147645
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1984500885009766,
      "learning_rate": 0.00017157050198042997,
      "loss": 3.0662,
      "step": 147646
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.058351039886475,
      "learning_rate": 0.00017156680521976885,
      "loss": 3.0141,
      "step": 147647
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2089548110961914,
      "learning_rate": 0.00017156310848298598,
      "loss": 3.0547,
      "step": 147648
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.154115676879883,
      "learning_rate": 0.00017155941177008185,
      "loss": 3.2597,
      "step": 147649
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1576123237609863,
      "learning_rate": 0.00017155571508105735,
      "loss": 3.1158,
      "step": 147650
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.929274082183838,
      "learning_rate": 0.00017155201841591308,
      "loss": 2.9942,
      "step": 147651
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.994767189025879,
      "learning_rate": 0.00017154832177464972,
      "loss": 3.0146,
      "step": 147652
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.738219738006592,
      "learning_rate": 0.0001715446251572678,
      "loss": 2.7433,
      "step": 147653
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4451749324798584,
      "learning_rate": 0.00017154092856376828,
      "loss": 2.796,
      "step": 147654
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.481222152709961,
      "learning_rate": 0.00017153723199415165,
      "loss": 3.2182,
      "step": 147655
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6110332012176514,
      "learning_rate": 0.00017153353544841876,
      "loss": 2.9954,
      "step": 147656
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.638594150543213,
      "learning_rate": 0.0001715298389265702,
      "loss": 2.9554,
      "step": 147657
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.000349521636963,
      "learning_rate": 0.00017152614242860672,
      "loss": 2.988,
      "step": 147658
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.155385971069336,
      "learning_rate": 0.0001715224459545288,
      "loss": 3.0458,
      "step": 147659
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.395754337310791,
      "learning_rate": 0.0001715187495043374,
      "loss": 3.0913,
      "step": 147660
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.316458225250244,
      "learning_rate": 0.00017151505307803298,
      "loss": 3.061,
      "step": 147661
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1457014083862305,
      "learning_rate": 0.00017151135667561646,
      "loss": 2.9588,
      "step": 147662
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3322980403900146,
      "learning_rate": 0.0001715076602970884,
      "loss": 2.8885,
      "step": 147663
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.24990177154541,
      "learning_rate": 0.00017150396394244937,
      "loss": 2.7322,
      "step": 147664
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.204096794128418,
      "learning_rate": 0.0001715002676117003,
      "loss": 2.8654,
      "step": 147665
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3491837978363037,
      "learning_rate": 0.00017149657130484174,
      "loss": 2.6775,
      "step": 147666
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4030938148498535,
      "learning_rate": 0.0001714928750218743,
      "loss": 3.0894,
      "step": 147667
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.446078300476074,
      "learning_rate": 0.00017148917876279885,
      "loss": 2.9994,
      "step": 147668
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2357265949249268,
      "learning_rate": 0.000171485482527616,
      "loss": 3.0205,
      "step": 147669
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2488882541656494,
      "learning_rate": 0.00017148178631632633,
      "loss": 3.0599,
      "step": 147670
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.341205596923828,
      "learning_rate": 0.0001714780901289307,
      "loss": 2.9481,
      "step": 147671
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5857365131378174,
      "learning_rate": 0.00017147439396542977,
      "loss": 2.9537,
      "step": 147672
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.427987813949585,
      "learning_rate": 0.00017147069782582402,
      "loss": 2.8601,
      "step": 147673
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4978225231170654,
      "learning_rate": 0.0001714670017101144,
      "loss": 3.1141,
      "step": 147674
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.032313108444214,
      "learning_rate": 0.0001714633056183015,
      "loss": 2.7113,
      "step": 147675
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5146100521087646,
      "learning_rate": 0.00017145960955038591,
      "loss": 3.1564,
      "step": 147676
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.8478015661239624,
      "learning_rate": 0.0001714559135063685,
      "loss": 2.8661,
      "step": 147677
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.385941505432129,
      "learning_rate": 0.00017145221748624974,
      "loss": 2.682,
      "step": 147678
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4855618476867676,
      "learning_rate": 0.00017144852149003056,
      "loss": 2.8592,
      "step": 147679
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4627304077148438,
      "learning_rate": 0.00017144482551771154,
      "loss": 2.8379,
      "step": 147680
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2520952224731445,
      "learning_rate": 0.00017144112956929332,
      "loss": 2.8899,
      "step": 147681
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9677191972732544,
      "learning_rate": 0.00017143743364477656,
      "loss": 2.8692,
      "step": 147682
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7345869541168213,
      "learning_rate": 0.00017143373774416206,
      "loss": 2.8348,
      "step": 147683
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.280825138092041,
      "learning_rate": 0.0001714300418674504,
      "loss": 2.9376,
      "step": 147684
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1263959407806396,
      "learning_rate": 0.0001714263460146424,
      "loss": 2.8277,
      "step": 147685
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.7637815475463867,
      "learning_rate": 0.00017142265018573854,
      "loss": 2.8827,
      "step": 147686
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.1265223026275635,
      "learning_rate": 0.00017141895438073991,
      "loss": 2.929,
      "step": 147687
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.223095178604126,
      "learning_rate": 0.0001714152585996467,
      "loss": 3.0953,
      "step": 147688
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1206371784210205,
      "learning_rate": 0.00017141156284245987,
      "loss": 3.0749,
      "step": 147689
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6164512634277344,
      "learning_rate": 0.00017140786710918,
      "loss": 2.92,
      "step": 147690
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.486741542816162,
      "learning_rate": 0.00017140417139980792,
      "loss": 3.0246,
      "step": 147691
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1156678199768066,
      "learning_rate": 0.00017140047571434413,
      "loss": 3.1914,
      "step": 147692
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.08089280128479,
      "learning_rate": 0.00017139678005278965,
      "loss": 2.951,
      "step": 147693
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5976829528808594,
      "learning_rate": 0.00017139308441514473,
      "loss": 3.0606,
      "step": 147694
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3376779556274414,
      "learning_rate": 0.00017138938880141032,
      "loss": 2.966,
      "step": 147695
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.686807870864868,
      "learning_rate": 0.00017138569321158698,
      "loss": 2.7406,
      "step": 147696
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.45318341255188,
      "learning_rate": 0.00017138199764567556,
      "loss": 2.9945,
      "step": 147697
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.0554354190826416,
      "learning_rate": 0.00017137830210367655,
      "loss": 2.8873,
      "step": 147698
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.9792873859405518,
      "learning_rate": 0.000171374606585591,
      "loss": 2.7826,
      "step": 147699
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.571274757385254,
      "learning_rate": 0.00017137091109141906,
      "loss": 3.072,
      "step": 147700
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2310335636138916,
      "learning_rate": 0.0001713672156211618,
      "loss": 2.8809,
      "step": 147701
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4013023376464844,
      "learning_rate": 0.00017136352017481973,
      "loss": 2.8288,
      "step": 147702
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.420077323913574,
      "learning_rate": 0.00017135982475239374,
      "loss": 2.7471,
      "step": 147703
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0706143379211426,
      "learning_rate": 0.00017135612935388422,
      "loss": 2.7384,
      "step": 147704
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.021470785140991,
      "learning_rate": 0.00017135243397929228,
      "loss": 2.8111,
      "step": 147705
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.867870330810547,
      "learning_rate": 0.0001713487386286181,
      "loss": 2.9176,
      "step": 147706
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.251984119415283,
      "learning_rate": 0.00017134504330186277,
      "loss": 3.0491,
      "step": 147707
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7442028522491455,
      "learning_rate": 0.0001713413479990267,
      "loss": 2.7362,
      "step": 147708
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2461955547332764,
      "learning_rate": 0.00017133765272011082,
      "loss": 3.0375,
      "step": 147709
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.212334394454956,
      "learning_rate": 0.00017133395746511554,
      "loss": 2.7045,
      "step": 147710
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.162388801574707,
      "learning_rate": 0.000171330262234042,
      "loss": 2.692,
      "step": 147711
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9657928943634033,
      "learning_rate": 0.0001713265670268903,
      "loss": 3.2766,
      "step": 147712
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0475451946258545,
      "learning_rate": 0.0001713228718436616,
      "loss": 3.1257,
      "step": 147713
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.1321606636047363,
      "learning_rate": 0.00017131917668435624,
      "loss": 2.7532,
      "step": 147714
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.276062250137329,
      "learning_rate": 0.00017131548154897524,
      "loss": 3.0766,
      "step": 147715
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.8406455516815186,
      "learning_rate": 0.00017131178643751895,
      "loss": 3.0197,
      "step": 147716
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2332277297973633,
      "learning_rate": 0.00017130809134998845,
      "loss": 3.1754,
      "step": 147717
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2650105953216553,
      "learning_rate": 0.00017130439628638405,
      "loss": 3.0938,
      "step": 147718
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.63761305809021,
      "learning_rate": 0.00017130070124670666,
      "loss": 2.5942,
      "step": 147719
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1584224700927734,
      "learning_rate": 0.00017129700623095677,
      "loss": 3.0665,
      "step": 147720
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.882086992263794,
      "learning_rate": 0.0001712933112391353,
      "loss": 3.1005,
      "step": 147721
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.382110595703125,
      "learning_rate": 0.0001712896162712428,
      "loss": 2.8224,
      "step": 147722
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.891519069671631,
      "learning_rate": 0.00017128592132728014,
      "loss": 2.8351,
      "step": 147723
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.237546920776367,
      "learning_rate": 0.00017128222640724766,
      "loss": 3.2157,
      "step": 147724
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4181504249572754,
      "learning_rate": 0.0001712785315111464,
      "loss": 3.0074,
      "step": 147725
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1910300254821777,
      "learning_rate": 0.0001712748366389767,
      "loss": 3.1383,
      "step": 147726
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.169578790664673,
      "learning_rate": 0.0001712711417907396,
      "loss": 2.9551,
      "step": 147727
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0361599922180176,
      "learning_rate": 0.00017126744696643547,
      "loss": 3.0297,
      "step": 147728
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.080817461013794,
      "learning_rate": 0.00017126375216606544,
      "loss": 2.8288,
      "step": 147729
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7975733280181885,
      "learning_rate": 0.00017126005738962963,
      "loss": 2.8189,
      "step": 147730
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.33028507232666,
      "learning_rate": 0.0001712563626371291,
      "loss": 3.4327,
      "step": 147731
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5890276432037354,
      "learning_rate": 0.00017125266790856439,
      "loss": 2.9259,
      "step": 147732
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.8478052616119385,
      "learning_rate": 0.00017124897320393631,
      "loss": 3.0801,
      "step": 147733
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.091172218322754,
      "learning_rate": 0.00017124527852324534,
      "loss": 3.0176,
      "step": 147734
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.122235059738159,
      "learning_rate": 0.00017124158386649246,
      "loss": 2.8509,
      "step": 147735
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.688964605331421,
      "learning_rate": 0.00017123788923367823,
      "loss": 2.9433,
      "step": 147736
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4612178802490234,
      "learning_rate": 0.00017123419462480323,
      "loss": 2.7412,
      "step": 147737
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.9342734813690186,
      "learning_rate": 0.00017123050003986815,
      "loss": 3.2753,
      "step": 147738
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.269538164138794,
      "learning_rate": 0.00017122680547887388,
      "loss": 2.9033,
      "step": 147739
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.0265727043151855,
      "learning_rate": 0.00017122311094182086,
      "loss": 3.161,
      "step": 147740
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.304973840713501,
      "learning_rate": 0.00017121941642871,
      "loss": 2.9413,
      "step": 147741
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.849055528640747,
      "learning_rate": 0.00017121572193954187,
      "loss": 2.9486,
      "step": 147742
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.8508944511413574,
      "learning_rate": 0.0001712120274743172,
      "loss": 3.066,
      "step": 147743
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5598785877227783,
      "learning_rate": 0.00017120833303303648,
      "loss": 2.9097,
      "step": 147744
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.0186548233032227,
      "learning_rate": 0.00017120463861570074,
      "loss": 2.893,
      "step": 147745
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.8344335556030273,
      "learning_rate": 0.00017120094422231033,
      "loss": 2.8997,
      "step": 147746
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.5339794158935547,
      "learning_rate": 0.0001711972498528662,
      "loss": 2.8583,
      "step": 147747
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.518098831176758,
      "learning_rate": 0.00017119355550736893,
      "loss": 3.0849,
      "step": 147748
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.698047161102295,
      "learning_rate": 0.0001711898611858191,
      "loss": 2.8959,
      "step": 147749
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.087674617767334,
      "learning_rate": 0.00017118616688821767,
      "loss": 2.9493,
      "step": 147750
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.684570789337158,
      "learning_rate": 0.00017118247261456513,
      "loss": 3.0062,
      "step": 147751
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9256184101104736,
      "learning_rate": 0.00017117877836486206,
      "loss": 2.9458,
      "step": 147752
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.2415316104888916,
      "learning_rate": 0.00017117508413910944,
      "loss": 3.1198,
      "step": 147753
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.1064627170562744,
      "learning_rate": 0.0001711713899373078,
      "loss": 3.1782,
      "step": 147754
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2157299518585205,
      "learning_rate": 0.00017116769575945768,
      "loss": 2.8086,
      "step": 147755
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.546462297439575,
      "learning_rate": 0.0001711640016055601,
      "loss": 2.9843,
      "step": 147756
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9994356632232666,
      "learning_rate": 0.00017116030747561548,
      "loss": 2.8821,
      "step": 147757
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6837151050567627,
      "learning_rate": 0.0001711566133696245,
      "loss": 2.918,
      "step": 147758
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.6953699588775635,
      "learning_rate": 0.00017115291928758813,
      "loss": 3.0573,
      "step": 147759
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.280055522918701,
      "learning_rate": 0.00017114922522950678,
      "loss": 2.948,
      "step": 147760
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.852815628051758,
      "learning_rate": 0.00017114553119538113,
      "loss": 2.7336,
      "step": 147761
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.9259226322174072,
      "learning_rate": 0.00017114183718521209,
      "loss": 2.7311,
      "step": 147762
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4292237758636475,
      "learning_rate": 0.00017113814319900006,
      "loss": 3.0272,
      "step": 147763
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.8259965181350708,
      "learning_rate": 0.000171134449236746,
      "loss": 3.0035,
      "step": 147764
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.411529302597046,
      "learning_rate": 0.00017113075529845054,
      "loss": 2.9997,
      "step": 147765
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3040454387664795,
      "learning_rate": 0.00017112706138411427,
      "loss": 2.9647,
      "step": 147766
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.0653469562530518,
      "learning_rate": 0.00017112336749373777,
      "loss": 3.0501,
      "step": 147767
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.49589467048645,
      "learning_rate": 0.00017111967362732204,
      "loss": 3.0132,
      "step": 147768
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.5803232192993164,
      "learning_rate": 0.0001711159797848674,
      "loss": 2.8828,
      "step": 147769
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.870685577392578,
      "learning_rate": 0.0001711122859663749,
      "loss": 2.868,
      "step": 147770
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.1671640872955322,
      "learning_rate": 0.00017110859217184493,
      "loss": 2.7882,
      "step": 147771
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4086787700653076,
      "learning_rate": 0.0001711048984012786,
      "loss": 3.0558,
      "step": 147772
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.8773744106292725,
      "learning_rate": 0.000171101204654676,
      "loss": 3.2216,
      "step": 147773
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.3281137943267822,
      "learning_rate": 0.00017109751093203826,
      "loss": 2.7014,
      "step": 147774
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.091676950454712,
      "learning_rate": 0.0001710938172333658,
      "loss": 2.8253,
      "step": 147775
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5866470336914062,
      "learning_rate": 0.0001710901235586595,
      "loss": 2.8573,
      "step": 147776
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.229414224624634,
      "learning_rate": 0.00017108642990791993,
      "loss": 3.0416,
      "step": 147777
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.286832094192505,
      "learning_rate": 0.00017108273628114806,
      "loss": 3.0174,
      "step": 147778
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5238609313964844,
      "learning_rate": 0.00017107904267834405,
      "loss": 2.9344,
      "step": 147779
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.010353088378906,
      "learning_rate": 0.00017107534909950906,
      "loss": 3.0009,
      "step": 147780
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.0203216075897217,
      "learning_rate": 0.00017107165554464343,
      "loss": 3.1651,
      "step": 147781
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9717479944229126,
      "learning_rate": 0.00017106796201374818,
      "loss": 3.2043,
      "step": 147782
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.0833067893981934,
      "learning_rate": 0.00017106426850682367,
      "loss": 2.9207,
      "step": 147783
    },
    {
      "epoch": 1.92,
      "grad_norm": 5.3215484619140625,
      "learning_rate": 0.00017106057502387103,
      "loss": 2.7062,
      "step": 147784
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.846835613250732,
      "learning_rate": 0.0001710568815648904,
      "loss": 2.9531,
      "step": 147785
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.287213087081909,
      "learning_rate": 0.00017105318812988282,
      "loss": 2.7635,
      "step": 147786
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3810412883758545,
      "learning_rate": 0.0001710494947188488,
      "loss": 2.775,
      "step": 147787
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.829904079437256,
      "learning_rate": 0.0001710458013317892,
      "loss": 3.1359,
      "step": 147788
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.4135470390319824,
      "learning_rate": 0.00017104210796870455,
      "loss": 2.9539,
      "step": 147789
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.385737419128418,
      "learning_rate": 0.00017103841462959583,
      "loss": 2.9646,
      "step": 147790
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1324896812438965,
      "learning_rate": 0.00017103472131446324,
      "loss": 2.8572,
      "step": 147791
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.405761241912842,
      "learning_rate": 0.00017103102802330789,
      "loss": 2.9186,
      "step": 147792
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7909348011016846,
      "learning_rate": 0.00017102733475613012,
      "loss": 2.7994,
      "step": 147793
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.434593915939331,
      "learning_rate": 0.00017102364151293098,
      "loss": 2.8322,
      "step": 147794
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7857189178466797,
      "learning_rate": 0.00017101994829371085,
      "loss": 2.9311,
      "step": 147795
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.379059314727783,
      "learning_rate": 0.0001710162550984708,
      "loss": 2.9229,
      "step": 147796
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.052504539489746,
      "learning_rate": 0.000171012561927211,
      "loss": 2.8554,
      "step": 147797
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4517982006073,
      "learning_rate": 0.00017100886877993252,
      "loss": 2.9818,
      "step": 147798
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.9881646633148193,
      "learning_rate": 0.0001710051756566358,
      "loss": 3.0667,
      "step": 147799
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.3547773361206055,
      "learning_rate": 0.0001710014825573218,
      "loss": 2.9374,
      "step": 147800
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.707017660140991,
      "learning_rate": 0.0001709977894819909,
      "loss": 2.9898,
      "step": 147801
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2242844104766846,
      "learning_rate": 0.00017099409643064407,
      "loss": 2.9926,
      "step": 147802
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.1309030055999756,
      "learning_rate": 0.0001709904034032819,
      "loss": 2.8804,
      "step": 147803
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1278512477874756,
      "learning_rate": 0.000170986710399905,
      "loss": 3.1071,
      "step": 147804
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.1541810035705566,
      "learning_rate": 0.00017098301742051405,
      "loss": 3.1035,
      "step": 147805
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.134153366088867,
      "learning_rate": 0.00017097932446510992,
      "loss": 2.8742,
      "step": 147806
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7293953895568848,
      "learning_rate": 0.00017097563153369298,
      "loss": 3.0203,
      "step": 147807
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.5007030963897705,
      "learning_rate": 0.00017097193862626426,
      "loss": 3.0787,
      "step": 147808
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.005641460418701,
      "learning_rate": 0.00017096824574282428,
      "loss": 2.9791,
      "step": 147809
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.0091753005981445,
      "learning_rate": 0.00017096455288337378,
      "loss": 2.8942,
      "step": 147810
    },
    {
      "epoch": 1.92,
      "grad_norm": 5.348028659820557,
      "learning_rate": 0.00017096086004791323,
      "loss": 2.7052,
      "step": 147811
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.6885669231414795,
      "learning_rate": 0.00017095716723644367,
      "loss": 2.8823,
      "step": 147812
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.853472948074341,
      "learning_rate": 0.00017095347444896544,
      "loss": 3.0928,
      "step": 147813
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.559152364730835,
      "learning_rate": 0.00017094978168547955,
      "loss": 2.857,
      "step": 147814
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.945566177368164,
      "learning_rate": 0.00017094608894598648,
      "loss": 2.6326,
      "step": 147815
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.9447267055511475,
      "learning_rate": 0.00017094239623048705,
      "loss": 3.0931,
      "step": 147816
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.636021614074707,
      "learning_rate": 0.00017093870353898167,
      "loss": 2.9368,
      "step": 147817
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.8545233011245728,
      "learning_rate": 0.00017093501087147136,
      "loss": 2.8894,
      "step": 147818
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.252889633178711,
      "learning_rate": 0.00017093131822795658,
      "loss": 2.8584,
      "step": 147819
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.0824368000030518,
      "learning_rate": 0.00017092762560843818,
      "loss": 3.156,
      "step": 147820
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.16819167137146,
      "learning_rate": 0.00017092393301291683,
      "loss": 2.9852,
      "step": 147821
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.7388646602630615,
      "learning_rate": 0.00017092024044139308,
      "loss": 2.8011,
      "step": 147822
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2412173748016357,
      "learning_rate": 0.00017091654789386763,
      "loss": 3.2395,
      "step": 147823
    },
    {
      "epoch": 1.92,
      "grad_norm": 4.169787883758545,
      "learning_rate": 0.00017091285537034132,
      "loss": 2.9136,
      "step": 147824
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2979938983917236,
      "learning_rate": 0.00017090916287081464,
      "loss": 2.9658,
      "step": 147825
    },
    {
      "epoch": 1.92,
      "grad_norm": 3.1212470531463623,
      "learning_rate": 0.0001709054703952885,
      "loss": 2.928,
      "step": 147826
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4772708415985107,
      "learning_rate": 0.0001709017779437635,
      "loss": 2.8802,
      "step": 147827
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.8003740310668945,
      "learning_rate": 0.00017089808551624025,
      "loss": 2.9625,
      "step": 147828
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.296358585357666,
      "learning_rate": 0.00017089439311271937,
      "loss": 3.0625,
      "step": 147829
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9531859159469604,
      "learning_rate": 0.0001708907007332018,
      "loss": 2.8802,
      "step": 147830
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.8314861059188843,
      "learning_rate": 0.00017088700837768794,
      "loss": 2.9003,
      "step": 147831
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.272486448287964,
      "learning_rate": 0.0001708833160461788,
      "loss": 3.2427,
      "step": 147832
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.236442804336548,
      "learning_rate": 0.00017087962373867486,
      "loss": 3.0229,
      "step": 147833
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.4922878742218018,
      "learning_rate": 0.00017087593145517676,
      "loss": 2.9214,
      "step": 147834
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9520461559295654,
      "learning_rate": 0.00017087223919568523,
      "loss": 2.5597,
      "step": 147835
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.563987970352173,
      "learning_rate": 0.0001708685469602011,
      "loss": 2.928,
      "step": 147836
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.248727798461914,
      "learning_rate": 0.0001708648547487248,
      "loss": 3.0274,
      "step": 147837
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.2946410179138184,
      "learning_rate": 0.00017086116256125728,
      "loss": 2.956,
      "step": 147838
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.9171634912490845,
      "learning_rate": 0.00017085747039779912,
      "loss": 3.041,
      "step": 147839
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.297991991043091,
      "learning_rate": 0.00017085377825835088,
      "loss": 3.0829,
      "step": 147840
    },
    {
      "epoch": 1.92,
      "grad_norm": 2.811098575592041,
      "learning_rate": 0.0001708500861429135,
      "loss": 3.163,
      "step": 147841
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.207213878631592,
      "learning_rate": 0.00017084639405148745,
      "loss": 3.1262,
      "step": 147842
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6968984603881836,
      "learning_rate": 0.00017084270198407342,
      "loss": 3.1353,
      "step": 147843
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.340848207473755,
      "learning_rate": 0.0001708390099406723,
      "loss": 2.9187,
      "step": 147844
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5346896648406982,
      "learning_rate": 0.00017083531792128466,
      "loss": 3.285,
      "step": 147845
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.1178982257843018,
      "learning_rate": 0.00017083162592591104,
      "loss": 2.9729,
      "step": 147846
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.921799659729004,
      "learning_rate": 0.00017082793395455236,
      "loss": 3.0,
      "step": 147847
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2647011280059814,
      "learning_rate": 0.0001708242420072091,
      "loss": 2.6912,
      "step": 147848
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.796416759490967,
      "learning_rate": 0.00017082055008388214,
      "loss": 2.9191,
      "step": 147849
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.098597049713135,
      "learning_rate": 0.00017081685818457214,
      "loss": 2.9456,
      "step": 147850
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.1543123722076416,
      "learning_rate": 0.00017081316630927965,
      "loss": 2.9942,
      "step": 147851
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.167046070098877,
      "learning_rate": 0.00017080947445800537,
      "loss": 2.8984,
      "step": 147852
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.9699902534484863,
      "learning_rate": 0.00017080578263075014,
      "loss": 2.8605,
      "step": 147853
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.7846226692199707,
      "learning_rate": 0.00017080209082751444,
      "loss": 3.046,
      "step": 147854
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.200106382369995,
      "learning_rate": 0.0001707983990482992,
      "loss": 3.325,
      "step": 147855
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.049915075302124,
      "learning_rate": 0.00017079470729310496,
      "loss": 2.9113,
      "step": 147856
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.096898078918457,
      "learning_rate": 0.00017079101556193244,
      "loss": 3.1851,
      "step": 147857
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0195064544677734,
      "learning_rate": 0.0001707873238547822,
      "loss": 2.9477,
      "step": 147858
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0192434787750244,
      "learning_rate": 0.00017078363217165515,
      "loss": 3.0896,
      "step": 147859
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.541948080062866,
      "learning_rate": 0.00017077994051255168,
      "loss": 2.7906,
      "step": 147860
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.401463270187378,
      "learning_rate": 0.00017077624887747284,
      "loss": 2.7781,
      "step": 147861
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.469482660293579,
      "learning_rate": 0.000170772557266419,
      "loss": 2.9594,
      "step": 147862
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.52896785736084,
      "learning_rate": 0.00017076886567939122,
      "loss": 2.9035,
      "step": 147863
    },
    {
      "epoch": 1.93,
      "grad_norm": 6.137858867645264,
      "learning_rate": 0.0001707651741163897,
      "loss": 2.6347,
      "step": 147864
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.409559488296509,
      "learning_rate": 0.00017076148257741554,
      "loss": 3.2187,
      "step": 147865
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2894442081451416,
      "learning_rate": 0.00017075779106246906,
      "loss": 2.9484,
      "step": 147866
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2905449867248535,
      "learning_rate": 0.00017075409957155132,
      "loss": 3.1356,
      "step": 147867
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.670065402984619,
      "learning_rate": 0.0001707504081046627,
      "loss": 2.9859,
      "step": 147868
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.897143840789795,
      "learning_rate": 0.00017074671666180414,
      "loss": 3.0013,
      "step": 147869
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4888923168182373,
      "learning_rate": 0.00017074302524297622,
      "loss": 2.7627,
      "step": 147870
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.224414825439453,
      "learning_rate": 0.0001707393338481796,
      "loss": 3.0679,
      "step": 147871
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.914780855178833,
      "learning_rate": 0.00017073564247741484,
      "loss": 3.0375,
      "step": 147872
    },
    {
      "epoch": 1.93,
      "grad_norm": 5.3890275955200195,
      "learning_rate": 0.00017073195113068292,
      "loss": 2.9728,
      "step": 147873
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6328258514404297,
      "learning_rate": 0.00017072825980798422,
      "loss": 2.9127,
      "step": 147874
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.878843069076538,
      "learning_rate": 0.00017072456850931967,
      "loss": 2.8822,
      "step": 147875
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2589073181152344,
      "learning_rate": 0.0001707208772346899,
      "loss": 3.0382,
      "step": 147876
    },
    {
      "epoch": 1.93,
      "grad_norm": 5.692193031311035,
      "learning_rate": 0.0001707171859840956,
      "loss": 2.9076,
      "step": 147877
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.9696784019470215,
      "learning_rate": 0.00017071349475753726,
      "loss": 3.0965,
      "step": 147878
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.601320266723633,
      "learning_rate": 0.00017070980355501583,
      "loss": 2.9505,
      "step": 147879
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3014769554138184,
      "learning_rate": 0.00017070611237653176,
      "loss": 2.8362,
      "step": 147880
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.330212354660034,
      "learning_rate": 0.000170702421222086,
      "loss": 2.9964,
      "step": 147881
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.496340036392212,
      "learning_rate": 0.00017069873009167906,
      "loss": 3.0051,
      "step": 147882
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.9177191257476807,
      "learning_rate": 0.00017069503898531173,
      "loss": 2.9113,
      "step": 147883
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6090869903564453,
      "learning_rate": 0.0001706913479029845,
      "loss": 2.8726,
      "step": 147884
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.396606683731079,
      "learning_rate": 0.00017068765684469833,
      "loss": 3.0807,
      "step": 147885
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.025642156600952,
      "learning_rate": 0.00017068396581045356,
      "loss": 2.8859,
      "step": 147886
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0724170207977295,
      "learning_rate": 0.00017068027480025125,
      "loss": 3.1822,
      "step": 147887
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.391038179397583,
      "learning_rate": 0.00017067658381409194,
      "loss": 2.6457,
      "step": 147888
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.314532518386841,
      "learning_rate": 0.00017067289285197627,
      "loss": 2.905,
      "step": 147889
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5180459022521973,
      "learning_rate": 0.00017066920191390484,
      "loss": 3.1531,
      "step": 147890
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.271406412124634,
      "learning_rate": 0.00017066551099987855,
      "loss": 2.9532,
      "step": 147891
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4808173179626465,
      "learning_rate": 0.00017066182010989792,
      "loss": 3.127,
      "step": 147892
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.075021743774414,
      "learning_rate": 0.00017065812924396375,
      "loss": 2.9585,
      "step": 147893
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.152517080307007,
      "learning_rate": 0.00017065443840207672,
      "loss": 2.9618,
      "step": 147894
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0241053104400635,
      "learning_rate": 0.00017065074758423744,
      "loss": 3.0632,
      "step": 147895
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.390882968902588,
      "learning_rate": 0.0001706470567904465,
      "loss": 3.0289,
      "step": 147896
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2143301963806152,
      "learning_rate": 0.00017064336602070487,
      "loss": 2.9282,
      "step": 147897
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.272495985031128,
      "learning_rate": 0.00017063967527501295,
      "loss": 3.1119,
      "step": 147898
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3994996547698975,
      "learning_rate": 0.00017063598455337167,
      "loss": 2.8651,
      "step": 147899
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.9443832635879517,
      "learning_rate": 0.00017063229385578163,
      "loss": 2.932,
      "step": 147900
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.9729461669921875,
      "learning_rate": 0.00017062860318224344,
      "loss": 2.8453,
      "step": 147901
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.928661584854126,
      "learning_rate": 0.00017062491253275777,
      "loss": 2.9981,
      "step": 147902
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.411118507385254,
      "learning_rate": 0.00017062122190732545,
      "loss": 2.9269,
      "step": 147903
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6172149181365967,
      "learning_rate": 0.00017061753130594693,
      "loss": 2.7972,
      "step": 147904
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.102804660797119,
      "learning_rate": 0.00017061384072862326,
      "loss": 2.6744,
      "step": 147905
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.344069242477417,
      "learning_rate": 0.00017061015017535487,
      "loss": 2.8668,
      "step": 147906
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.966193199157715,
      "learning_rate": 0.0001706064596461425,
      "loss": 3.2673,
      "step": 147907
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.216916799545288,
      "learning_rate": 0.0001706027691409867,
      "loss": 3.1688,
      "step": 147908
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.8507537841796875,
      "learning_rate": 0.00017059907865988846,
      "loss": 3.019,
      "step": 147909
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.197972297668457,
      "learning_rate": 0.00017059538820284807,
      "loss": 3.076,
      "step": 147910
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.7741763591766357,
      "learning_rate": 0.00017059169776986667,
      "loss": 2.9688,
      "step": 147911
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.681837797164917,
      "learning_rate": 0.00017058800736094465,
      "loss": 3.1103,
      "step": 147912
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.255436897277832,
      "learning_rate": 0.0001705843169760828,
      "loss": 2.8557,
      "step": 147913
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.555364608764648,
      "learning_rate": 0.00017058062661528165,
      "loss": 3.1332,
      "step": 147914
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3212223052978516,
      "learning_rate": 0.00017057693627854208,
      "loss": 3.1723,
      "step": 147915
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.691591501235962,
      "learning_rate": 0.0001705732459658646,
      "loss": 3.2904,
      "step": 147916
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.378587007522583,
      "learning_rate": 0.00017056955567725012,
      "loss": 2.7886,
      "step": 147917
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2433362007141113,
      "learning_rate": 0.00017056586541269923,
      "loss": 3.1477,
      "step": 147918
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.624490976333618,
      "learning_rate": 0.00017056217517221256,
      "loss": 3.0187,
      "step": 147919
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.297851800918579,
      "learning_rate": 0.0001705584849557907,
      "loss": 2.9355,
      "step": 147920
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0797598361968994,
      "learning_rate": 0.00017055479476343462,
      "loss": 2.955,
      "step": 147921
    },
    {
      "epoch": 1.93,
      "grad_norm": 5.012960433959961,
      "learning_rate": 0.00017055110459514468,
      "loss": 2.7045,
      "step": 147922
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.648064136505127,
      "learning_rate": 0.00017054741445092188,
      "loss": 2.8249,
      "step": 147923
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5761499404907227,
      "learning_rate": 0.00017054372433076678,
      "loss": 2.9234,
      "step": 147924
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3856773376464844,
      "learning_rate": 0.00017054003423467987,
      "loss": 3.0847,
      "step": 147925
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.292652130126953,
      "learning_rate": 0.00017053634416266212,
      "loss": 2.864,
      "step": 147926
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.9783971309661865,
      "learning_rate": 0.0001705326541147142,
      "loss": 2.9728,
      "step": 147927
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.264336585998535,
      "learning_rate": 0.00017052896409083652,
      "loss": 3.2121,
      "step": 147928
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5455269813537598,
      "learning_rate": 0.00017052527409103005,
      "loss": 3.0517,
      "step": 147929
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.997722864151001,
      "learning_rate": 0.00017052158411529544,
      "loss": 3.1394,
      "step": 147930
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.326338291168213,
      "learning_rate": 0.00017051789416363313,
      "loss": 2.765,
      "step": 147931
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2630412578582764,
      "learning_rate": 0.00017051420423604415,
      "loss": 2.8788,
      "step": 147932
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.4624876976013184,
      "learning_rate": 0.0001705105143325289,
      "loss": 3.0411,
      "step": 147933
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1506757736206055,
      "learning_rate": 0.0001705068244530883,
      "loss": 2.9804,
      "step": 147934
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.064319133758545,
      "learning_rate": 0.0001705031345977229,
      "loss": 3.1067,
      "step": 147935
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.244018077850342,
      "learning_rate": 0.00017049944476643333,
      "loss": 3.3012,
      "step": 147936
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.9431943893432617,
      "learning_rate": 0.00017049575495922044,
      "loss": 2.9986,
      "step": 147937
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.403975963592529,
      "learning_rate": 0.00017049206517608486,
      "loss": 2.9307,
      "step": 147938
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0530340671539307,
      "learning_rate": 0.0001704883754170271,
      "loss": 2.8422,
      "step": 147939
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.300248622894287,
      "learning_rate": 0.00017048468568204816,
      "loss": 3.0005,
      "step": 147940
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6434326171875,
      "learning_rate": 0.00017048099597114855,
      "loss": 3.045,
      "step": 147941
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.4035003185272217,
      "learning_rate": 0.00017047730628432883,
      "loss": 2.8003,
      "step": 147942
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0438528060913086,
      "learning_rate": 0.00017047361662158998,
      "loss": 3.1026,
      "step": 147943
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3987975120544434,
      "learning_rate": 0.0001704699269829325,
      "loss": 2.8323,
      "step": 147944
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.987497091293335,
      "learning_rate": 0.00017046623736835702,
      "loss": 2.9215,
      "step": 147945
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1924898624420166,
      "learning_rate": 0.00017046254777786435,
      "loss": 2.9618,
      "step": 147946
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.52643084526062,
      "learning_rate": 0.0001704588582114551,
      "loss": 3.3879,
      "step": 147947
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2281248569488525,
      "learning_rate": 0.0001704551686691301,
      "loss": 3.1075,
      "step": 147948
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2045772075653076,
      "learning_rate": 0.0001704514791508899,
      "loss": 3.0185,
      "step": 147949
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1869657039642334,
      "learning_rate": 0.0001704477896567352,
      "loss": 2.98,
      "step": 147950
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.481991767883301,
      "learning_rate": 0.00017044410018666662,
      "loss": 3.0542,
      "step": 147951
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2450270652770996,
      "learning_rate": 0.00017044041074068504,
      "loss": 2.9193,
      "step": 147952
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.109750270843506,
      "learning_rate": 0.0001704367213187909,
      "loss": 2.9492,
      "step": 147953
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.381488561630249,
      "learning_rate": 0.00017043303192098514,
      "loss": 2.9765,
      "step": 147954
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.2977612018585205,
      "learning_rate": 0.00017042934254726835,
      "loss": 2.8066,
      "step": 147955
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4786863327026367,
      "learning_rate": 0.00017042565319764117,
      "loss": 2.7267,
      "step": 147956
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.651372194290161,
      "learning_rate": 0.0001704219638721042,
      "loss": 2.8781,
      "step": 147957
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.240318536758423,
      "learning_rate": 0.00017041827457065832,
      "loss": 3.0664,
      "step": 147958
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1780455112457275,
      "learning_rate": 0.00017041458529330406,
      "loss": 2.9496,
      "step": 147959
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4995603561401367,
      "learning_rate": 0.00017041089604004226,
      "loss": 3.0181,
      "step": 147960
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.931675672531128,
      "learning_rate": 0.00017040720681087354,
      "loss": 2.8193,
      "step": 147961
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0261032581329346,
      "learning_rate": 0.00017040351760579854,
      "loss": 3.0962,
      "step": 147962
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1472418308258057,
      "learning_rate": 0.00017039982842481787,
      "loss": 2.9834,
      "step": 147963
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.107987642288208,
      "learning_rate": 0.00017039613926793243,
      "loss": 2.9537,
      "step": 147964
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.104482412338257,
      "learning_rate": 0.0001703924501351427,
      "loss": 2.9385,
      "step": 147965
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2400271892547607,
      "learning_rate": 0.00017038876102644954,
      "loss": 3.0442,
      "step": 147966
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2488300800323486,
      "learning_rate": 0.00017038507194185357,
      "loss": 3.3997,
      "step": 147967
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.63529896736145,
      "learning_rate": 0.0001703813828813555,
      "loss": 2.9066,
      "step": 147968
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5273847579956055,
      "learning_rate": 0.0001703776938449558,
      "loss": 3.0871,
      "step": 147969
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3134071826934814,
      "learning_rate": 0.0001703740048326555,
      "loss": 2.9346,
      "step": 147970
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4542815685272217,
      "learning_rate": 0.00017037031584445498,
      "loss": 2.8267,
      "step": 147971
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.512357473373413,
      "learning_rate": 0.00017036662688035516,
      "loss": 2.9889,
      "step": 147972
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.8631789684295654,
      "learning_rate": 0.00017036293794035667,
      "loss": 2.9031,
      "step": 147973
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6114933490753174,
      "learning_rate": 0.00017035924902446012,
      "loss": 2.985,
      "step": 147974
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1656649112701416,
      "learning_rate": 0.00017035556013266615,
      "loss": 2.925,
      "step": 147975
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.391779661178589,
      "learning_rate": 0.00017035187126497565,
      "loss": 2.8977,
      "step": 147976
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.677252769470215,
      "learning_rate": 0.00017034818242138905,
      "loss": 3.1289,
      "step": 147977
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6883997917175293,
      "learning_rate": 0.0001703444936019073,
      "loss": 2.8433,
      "step": 147978
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.041510820388794,
      "learning_rate": 0.00017034080480653093,
      "loss": 3.0626,
      "step": 147979
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.9836814403533936,
      "learning_rate": 0.00017033711603526067,
      "loss": 3.0656,
      "step": 147980
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2862915992736816,
      "learning_rate": 0.00017033342728809707,
      "loss": 2.8964,
      "step": 147981
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.9903955459594727,
      "learning_rate": 0.00017032973856504104,
      "loss": 3.0609,
      "step": 147982
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.4312543869018555,
      "learning_rate": 0.00017032604986609305,
      "loss": 3.0352,
      "step": 147983
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0316665172576904,
      "learning_rate": 0.000170322361191254,
      "loss": 2.9454,
      "step": 147984
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5276482105255127,
      "learning_rate": 0.00017031867254052447,
      "loss": 2.6356,
      "step": 147985
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6261067390441895,
      "learning_rate": 0.00017031498391390518,
      "loss": 2.9377,
      "step": 147986
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6938741207122803,
      "learning_rate": 0.00017031129531139662,
      "loss": 2.9888,
      "step": 147987
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.097123146057129,
      "learning_rate": 0.00017030760673299977,
      "loss": 2.8068,
      "step": 147988
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.8847386837005615,
      "learning_rate": 0.0001703039181787151,
      "loss": 2.9034,
      "step": 147989
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.1020097732543945,
      "learning_rate": 0.00017030022964854347,
      "loss": 3.0284,
      "step": 147990
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.720762014389038,
      "learning_rate": 0.0001702965411424855,
      "loss": 2.9685,
      "step": 147991
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.9009323120117188,
      "learning_rate": 0.0001702928526605418,
      "loss": 2.9373,
      "step": 147992
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.998778820037842,
      "learning_rate": 0.00017028916420271304,
      "loss": 2.8801,
      "step": 147993
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1571905612945557,
      "learning_rate": 0.00017028547576900004,
      "loss": 3.036,
      "step": 147994
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.586285352706909,
      "learning_rate": 0.00017028178735940336,
      "loss": 3.1057,
      "step": 147995
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.875364303588867,
      "learning_rate": 0.00017027809897392384,
      "loss": 3.3308,
      "step": 147996
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.0174078941345215,
      "learning_rate": 0.00017027441061256206,
      "loss": 3.0343,
      "step": 147997
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.900468349456787,
      "learning_rate": 0.00017027072227531872,
      "loss": 3.0656,
      "step": 147998
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0566799640655518,
      "learning_rate": 0.0001702670339621944,
      "loss": 2.8695,
      "step": 147999
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1462349891662598,
      "learning_rate": 0.00017026334567318998,
      "loss": 2.9676,
      "step": 148000
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.136931896209717,
      "learning_rate": 0.00017025965740830593,
      "loss": 2.798,
      "step": 148001
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2151033878326416,
      "learning_rate": 0.00017025596916754316,
      "loss": 2.9943,
      "step": 148002
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.546877145767212,
      "learning_rate": 0.0001702522809509022,
      "loss": 3.0293,
      "step": 148003
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.064601421356201,
      "learning_rate": 0.00017024859275838398,
      "loss": 2.9482,
      "step": 148004
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.9149136543273926,
      "learning_rate": 0.00017024490458998875,
      "loss": 3.197,
      "step": 148005
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.628574371337891,
      "learning_rate": 0.0001702412164457176,
      "loss": 2.8264,
      "step": 148006
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1654577255249023,
      "learning_rate": 0.00017023752832557093,
      "loss": 2.7753,
      "step": 148007
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.7885098457336426,
      "learning_rate": 0.00017023384022954964,
      "loss": 3.0638,
      "step": 148008
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4344065189361572,
      "learning_rate": 0.00017023015215765423,
      "loss": 3.1026,
      "step": 148009
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.452458381652832,
      "learning_rate": 0.0001702264641098856,
      "loss": 2.8765,
      "step": 148010
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.285309314727783,
      "learning_rate": 0.00017022277608624433,
      "loss": 2.8234,
      "step": 148011
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1517491340637207,
      "learning_rate": 0.00017021908808673112,
      "loss": 2.8293,
      "step": 148012
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2907378673553467,
      "learning_rate": 0.00017021540011134643,
      "loss": 3.1614,
      "step": 148013
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4069745540618896,
      "learning_rate": 0.00017021171216009137,
      "loss": 2.7336,
      "step": 148014
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.239753484725952,
      "learning_rate": 0.0001702080242329662,
      "loss": 2.746,
      "step": 148015
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4616129398345947,
      "learning_rate": 0.00017020433632997198,
      "loss": 2.9938,
      "step": 148016
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.8459362983703613,
      "learning_rate": 0.0001702006484511092,
      "loss": 2.9534,
      "step": 148017
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.038957118988037,
      "learning_rate": 0.00017019696059637857,
      "loss": 3.1038,
      "step": 148018
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.366799354553223,
      "learning_rate": 0.00017019327276578068,
      "loss": 2.6454,
      "step": 148019
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.8571815490722656,
      "learning_rate": 0.0001701895849593164,
      "loss": 3.0322,
      "step": 148020
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0346531867980957,
      "learning_rate": 0.00017018589717698624,
      "loss": 3.032,
      "step": 148021
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4380292892456055,
      "learning_rate": 0.00017018220941879104,
      "loss": 2.9413,
      "step": 148022
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5782463550567627,
      "learning_rate": 0.00017017852168473143,
      "loss": 2.9397,
      "step": 148023
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.802426815032959,
      "learning_rate": 0.000170174833974808,
      "loss": 3.1612,
      "step": 148024
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.121737480163574,
      "learning_rate": 0.0001701711462890216,
      "loss": 2.9829,
      "step": 148025
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.090376377105713,
      "learning_rate": 0.00017016745862737288,
      "loss": 2.7726,
      "step": 148026
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2896368503570557,
      "learning_rate": 0.00017016377098986234,
      "loss": 2.7734,
      "step": 148027
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.005617618560791,
      "learning_rate": 0.00017016008337649092,
      "loss": 3.188,
      "step": 148028
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2361483573913574,
      "learning_rate": 0.00017015639578725917,
      "loss": 2.701,
      "step": 148029
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5413944721221924,
      "learning_rate": 0.0001701527082221677,
      "loss": 2.9553,
      "step": 148030
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.147559404373169,
      "learning_rate": 0.00017014902068121743,
      "loss": 2.8438,
      "step": 148031
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.8581619262695312,
      "learning_rate": 0.00017014533316440876,
      "loss": 2.7605,
      "step": 148032
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.236466407775879,
      "learning_rate": 0.00017014164567174264,
      "loss": 2.836,
      "step": 148033
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.004567861557007,
      "learning_rate": 0.00017013795820321965,
      "loss": 2.8221,
      "step": 148034
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.412931442260742,
      "learning_rate": 0.00017013427075884049,
      "loss": 2.9274,
      "step": 148035
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.318430185317993,
      "learning_rate": 0.00017013058333860566,
      "loss": 2.7689,
      "step": 148036
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.080530881881714,
      "learning_rate": 0.00017012689594251616,
      "loss": 2.8971,
      "step": 148037
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.7773983478546143,
      "learning_rate": 0.00017012320857057238,
      "loss": 2.8369,
      "step": 148038
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.3830881118774414,
      "learning_rate": 0.00017011952122277524,
      "loss": 3.1119,
      "step": 148039
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1435298919677734,
      "learning_rate": 0.00017011583389912538,
      "loss": 3.1036,
      "step": 148040
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5731804370880127,
      "learning_rate": 0.00017011214659962343,
      "loss": 2.8774,
      "step": 148041
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.9981878995895386,
      "learning_rate": 0.00017010845932426998,
      "loss": 2.8086,
      "step": 148042
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.9652260541915894,
      "learning_rate": 0.00017010477207306588,
      "loss": 3.0249,
      "step": 148043
    },
    {
      "epoch": 1.93,
      "grad_norm": 5.347107410430908,
      "learning_rate": 0.00017010108484601165,
      "loss": 2.7396,
      "step": 148044
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.229642868041992,
      "learning_rate": 0.00017009739764310827,
      "loss": 2.7859,
      "step": 148045
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6029136180877686,
      "learning_rate": 0.00017009371046435617,
      "loss": 2.6854,
      "step": 148046
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.734022855758667,
      "learning_rate": 0.0001700900233097561,
      "loss": 3.1222,
      "step": 148047
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.8320903778076172,
      "learning_rate": 0.00017008633617930866,
      "loss": 3.0829,
      "step": 148048
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4284398555755615,
      "learning_rate": 0.0001700826490730147,
      "loss": 3.0165,
      "step": 148049
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6363179683685303,
      "learning_rate": 0.00017007896199087478,
      "loss": 2.8027,
      "step": 148050
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5300397872924805,
      "learning_rate": 0.0001700752749328897,
      "loss": 2.9155,
      "step": 148051
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5111985206604004,
      "learning_rate": 0.00017007158789906007,
      "loss": 3.1407,
      "step": 148052
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.213623046875,
      "learning_rate": 0.00017006790088938662,
      "loss": 2.8424,
      "step": 148053
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.414379835128784,
      "learning_rate": 0.00017006421390386984,
      "loss": 2.8425,
      "step": 148054
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.099039316177368,
      "learning_rate": 0.00017006052694251074,
      "loss": 2.8811,
      "step": 148055
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.6617836952209473,
      "learning_rate": 0.0001700568400053097,
      "loss": 2.8975,
      "step": 148056
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.4038782119750977,
      "learning_rate": 0.00017005315309226766,
      "loss": 3.0969,
      "step": 148057
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6598732471466064,
      "learning_rate": 0.0001700494662033852,
      "loss": 3.0036,
      "step": 148058
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.979360580444336,
      "learning_rate": 0.000170045779338663,
      "loss": 3.0401,
      "step": 148059
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.541273593902588,
      "learning_rate": 0.0001700420924981016,
      "loss": 3.039,
      "step": 148060
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.139777660369873,
      "learning_rate": 0.00017003840568170195,
      "loss": 3.1725,
      "step": 148061
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.220545291900635,
      "learning_rate": 0.00017003471888946454,
      "loss": 2.9454,
      "step": 148062
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.8895368576049805,
      "learning_rate": 0.0001700310321213902,
      "loss": 2.7589,
      "step": 148063
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1888058185577393,
      "learning_rate": 0.00017002734537747944,
      "loss": 3.0405,
      "step": 148064
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.003420829772949,
      "learning_rate": 0.00017002365865773328,
      "loss": 3.1172,
      "step": 148065
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.7764577865600586,
      "learning_rate": 0.00017001997196215197,
      "loss": 3.0009,
      "step": 148066
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.634446382522583,
      "learning_rate": 0.00017001628529073648,
      "loss": 2.9916,
      "step": 148067
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.58281946182251,
      "learning_rate": 0.0001700125986434873,
      "loss": 3.0154,
      "step": 148068
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2974319458007812,
      "learning_rate": 0.0001700089120204054,
      "loss": 2.83,
      "step": 148069
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.9352850914001465,
      "learning_rate": 0.00017000522542149116,
      "loss": 2.7267,
      "step": 148070
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.172458171844482,
      "learning_rate": 0.0001700015388467456,
      "loss": 3.1384,
      "step": 148071
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.228889465332031,
      "learning_rate": 0.000169997852296169,
      "loss": 2.8856,
      "step": 148072
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.699531078338623,
      "learning_rate": 0.00016999416576976236,
      "loss": 3.2489,
      "step": 148073
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1481528282165527,
      "learning_rate": 0.00016999047926752615,
      "loss": 3.0806,
      "step": 148074
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.4504120349884033,
      "learning_rate": 0.00016998679278946126,
      "loss": 2.7936,
      "step": 148075
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.8147666454315186,
      "learning_rate": 0.0001699831063355682,
      "loss": 2.7491,
      "step": 148076
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.712397575378418,
      "learning_rate": 0.00016997941990584795,
      "loss": 3.1692,
      "step": 148077
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.376844882965088,
      "learning_rate": 0.00016997573350030073,
      "loss": 2.8884,
      "step": 148078
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2014667987823486,
      "learning_rate": 0.00016997204711892763,
      "loss": 2.9702,
      "step": 148079
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.7957346439361572,
      "learning_rate": 0.00016996836076172903,
      "loss": 2.9696,
      "step": 148080
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.045610189437866,
      "learning_rate": 0.0001699646744287059,
      "loss": 2.8857,
      "step": 148081
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1968941688537598,
      "learning_rate": 0.00016996098811985865,
      "loss": 2.7682,
      "step": 148082
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3452022075653076,
      "learning_rate": 0.00016995730183518837,
      "loss": 2.8896,
      "step": 148083
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.75703763961792,
      "learning_rate": 0.00016995361557469523,
      "loss": 2.7935,
      "step": 148084
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.4681644439697266,
      "learning_rate": 0.0001699499293383803,
      "loss": 2.9987,
      "step": 148085
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4571874141693115,
      "learning_rate": 0.00016994624312624402,
      "loss": 2.7714,
      "step": 148086
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1684746742248535,
      "learning_rate": 0.00016994255693828728,
      "loss": 3.0624,
      "step": 148087
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.835265636444092,
      "learning_rate": 0.00016993887077451056,
      "loss": 3.2148,
      "step": 148088
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.531214952468872,
      "learning_rate": 0.0001699351846349149,
      "loss": 3.0874,
      "step": 148089
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.395394802093506,
      "learning_rate": 0.00016993149851950054,
      "loss": 2.9808,
      "step": 148090
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.674940347671509,
      "learning_rate": 0.00016992781242826846,
      "loss": 3.028,
      "step": 148091
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.010429859161377,
      "learning_rate": 0.00016992412636121913,
      "loss": 3.12,
      "step": 148092
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.116349220275879,
      "learning_rate": 0.0001699204403183535,
      "loss": 3.14,
      "step": 148093
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.415647029876709,
      "learning_rate": 0.00016991675429967198,
      "loss": 3.2038,
      "step": 148094
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.4835188388824463,
      "learning_rate": 0.00016991306830517553,
      "loss": 3.0871,
      "step": 148095
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2109320163726807,
      "learning_rate": 0.00016990938233486468,
      "loss": 3.108,
      "step": 148096
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.215919256210327,
      "learning_rate": 0.0001699056963887401,
      "loss": 2.9063,
      "step": 148097
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.8772833347320557,
      "learning_rate": 0.00016990201046680243,
      "loss": 2.6218,
      "step": 148098
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2427496910095215,
      "learning_rate": 0.00016989832456905255,
      "loss": 2.7865,
      "step": 148099
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2269680500030518,
      "learning_rate": 0.00016989463869549091,
      "loss": 3.0422,
      "step": 148100
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.2260820865631104,
      "learning_rate": 0.0001698909528461184,
      "loss": 2.9197,
      "step": 148101
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.1589784622192383,
      "learning_rate": 0.00016988726702093566,
      "loss": 3.0461,
      "step": 148102
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.534970760345459,
      "learning_rate": 0.0001698835812199433,
      "loss": 3.0535,
      "step": 148103
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.32560396194458,
      "learning_rate": 0.00016987989544314192,
      "loss": 2.9215,
      "step": 148104
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2031610012054443,
      "learning_rate": 0.00016987620969053245,
      "loss": 3.0089,
      "step": 148105
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4837234020233154,
      "learning_rate": 0.00016987252396211534,
      "loss": 2.769,
      "step": 148106
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0515804290771484,
      "learning_rate": 0.00016986883825789148,
      "loss": 2.9249,
      "step": 148107
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2259459495544434,
      "learning_rate": 0.0001698651525778615,
      "loss": 2.8827,
      "step": 148108
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.629539966583252,
      "learning_rate": 0.0001698614669220259,
      "loss": 2.7735,
      "step": 148109
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0769476890563965,
      "learning_rate": 0.00016985778129038564,
      "loss": 3.0945,
      "step": 148110
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.652928352355957,
      "learning_rate": 0.00016985409568294127,
      "loss": 2.7211,
      "step": 148111
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6054086685180664,
      "learning_rate": 0.00016985041009969334,
      "loss": 3.0087,
      "step": 148112
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0543220043182373,
      "learning_rate": 0.0001698467245406428,
      "loss": 2.9982,
      "step": 148113
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.35638689994812,
      "learning_rate": 0.00016984303900579022,
      "loss": 2.8753,
      "step": 148114
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0534896850585938,
      "learning_rate": 0.00016983935349513616,
      "loss": 2.8177,
      "step": 148115
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.580026149749756,
      "learning_rate": 0.00016983566800868154,
      "loss": 3.1598,
      "step": 148116
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.016449451446533,
      "learning_rate": 0.00016983198254642696,
      "loss": 3.2311,
      "step": 148117
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6536052227020264,
      "learning_rate": 0.00016982829710837289,
      "loss": 2.8941,
      "step": 148118
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.9610826969146729,
      "learning_rate": 0.00016982461169452032,
      "loss": 2.9893,
      "step": 148119
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.504819631576538,
      "learning_rate": 0.00016982092630486988,
      "loss": 3.0972,
      "step": 148120
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.8897351026535034,
      "learning_rate": 0.00016981724093942198,
      "loss": 2.9294,
      "step": 148121
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3493001461029053,
      "learning_rate": 0.00016981355559817768,
      "loss": 3.1254,
      "step": 148122
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.1805365085601807,
      "learning_rate": 0.00016980987028113742,
      "loss": 3.0246,
      "step": 148123
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6421194076538086,
      "learning_rate": 0.00016980618498830201,
      "loss": 3.0081,
      "step": 148124
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1048343181610107,
      "learning_rate": 0.00016980249971967212,
      "loss": 3.02,
      "step": 148125
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.8313924074172974,
      "learning_rate": 0.00016979881447524837,
      "loss": 2.8336,
      "step": 148126
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3658597469329834,
      "learning_rate": 0.00016979512925503138,
      "loss": 2.9422,
      "step": 148127
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.048520565032959,
      "learning_rate": 0.00016979144405902208,
      "loss": 3.0442,
      "step": 148128
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.259303331375122,
      "learning_rate": 0.00016978775888722085,
      "loss": 3.0563,
      "step": 148129
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.423381805419922,
      "learning_rate": 0.0001697840737396287,
      "loss": 3.1506,
      "step": 148130
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.4367763996124268,
      "learning_rate": 0.000169780388616246,
      "loss": 2.7512,
      "step": 148131
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.527031660079956,
      "learning_rate": 0.00016977670351707382,
      "loss": 2.9143,
      "step": 148132
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.701197624206543,
      "learning_rate": 0.00016977301844211236,
      "loss": 3.0408,
      "step": 148133
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.964073896408081,
      "learning_rate": 0.00016976933339136267,
      "loss": 2.9539,
      "step": 148134
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3740005493164062,
      "learning_rate": 0.00016976564836482523,
      "loss": 2.9195,
      "step": 148135
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2832114696502686,
      "learning_rate": 0.00016976196336250093,
      "loss": 2.8613,
      "step": 148136
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.7828354835510254,
      "learning_rate": 0.0001697582783843902,
      "loss": 2.9563,
      "step": 148137
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.090484619140625,
      "learning_rate": 0.0001697545934304941,
      "loss": 2.8828,
      "step": 148138
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.283792972564697,
      "learning_rate": 0.00016975090850081286,
      "loss": 2.7003,
      "step": 148139
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2663002014160156,
      "learning_rate": 0.0001697472235953475,
      "loss": 2.7185,
      "step": 148140
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0366687774658203,
      "learning_rate": 0.00016974353871409845,
      "loss": 2.7724,
      "step": 148141
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.512054920196533,
      "learning_rate": 0.0001697398538570667,
      "loss": 2.8738,
      "step": 148142
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.9989378452301025,
      "learning_rate": 0.0001697361690242526,
      "loss": 2.8649,
      "step": 148143
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.9181222915649414,
      "learning_rate": 0.00016973248421565726,
      "loss": 3.0192,
      "step": 148144
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.271420478820801,
      "learning_rate": 0.0001697287994312809,
      "loss": 2.9262,
      "step": 148145
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.358731269836426,
      "learning_rate": 0.0001697251146711245,
      "loss": 3.2162,
      "step": 148146
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.06974983215332,
      "learning_rate": 0.00016972142993518856,
      "loss": 2.8792,
      "step": 148147
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.43877649307251,
      "learning_rate": 0.000169717745223474,
      "loss": 2.9643,
      "step": 148148
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.1988489627838135,
      "learning_rate": 0.0001697140605359812,
      "loss": 2.94,
      "step": 148149
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4392035007476807,
      "learning_rate": 0.0001697103758727113,
      "loss": 2.8121,
      "step": 148150
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3337488174438477,
      "learning_rate": 0.00016970669123366443,
      "loss": 2.9237,
      "step": 148151
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0598435401916504,
      "learning_rate": 0.0001697030066188417,
      "loss": 3.1511,
      "step": 148152
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.226726770401001,
      "learning_rate": 0.00016969932202824347,
      "loss": 2.7867,
      "step": 148153
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.132432460784912,
      "learning_rate": 0.00016969563746187076,
      "loss": 2.7484,
      "step": 148154
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.287853479385376,
      "learning_rate": 0.00016969195291972395,
      "loss": 2.9936,
      "step": 148155
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5682294368743896,
      "learning_rate": 0.00016968826840180412,
      "loss": 2.847,
      "step": 148156
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.96545672416687,
      "learning_rate": 0.00016968458390811145,
      "loss": 3.0747,
      "step": 148157
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.9200258255004883,
      "learning_rate": 0.000169680899438647,
      "loss": 2.8804,
      "step": 148158
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.957205057144165,
      "learning_rate": 0.0001696772149934112,
      "loss": 3.103,
      "step": 148159
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.134371519088745,
      "learning_rate": 0.000169673530572405,
      "loss": 3.0274,
      "step": 148160
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2988269329071045,
      "learning_rate": 0.00016966984617562884,
      "loss": 2.9168,
      "step": 148161
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3779914379119873,
      "learning_rate": 0.00016966616180308372,
      "loss": 2.7979,
      "step": 148162
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.024521827697754,
      "learning_rate": 0.00016966247745476992,
      "loss": 2.6737,
      "step": 148163
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.305955171585083,
      "learning_rate": 0.00016965879313068843,
      "loss": 2.7494,
      "step": 148164
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5054614543914795,
      "learning_rate": 0.0001696551088308397,
      "loss": 3.1381,
      "step": 148165
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.0287625789642334,
      "learning_rate": 0.00016965142455522468,
      "loss": 3.0949,
      "step": 148166
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.23594331741333,
      "learning_rate": 0.00016964774030384382,
      "loss": 2.8728,
      "step": 148167
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6415886878967285,
      "learning_rate": 0.00016964405607669813,
      "loss": 3.1312,
      "step": 148168
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.049194574356079,
      "learning_rate": 0.0001696403718737878,
      "loss": 3.114,
      "step": 148169
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1418371200561523,
      "learning_rate": 0.0001696366876951139,
      "loss": 3.0405,
      "step": 148170
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1513073444366455,
      "learning_rate": 0.00016963300354067693,
      "loss": 2.7502,
      "step": 148171
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4362540245056152,
      "learning_rate": 0.00016962931941047775,
      "loss": 2.8224,
      "step": 148172
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4033942222595215,
      "learning_rate": 0.00016962563530451683,
      "loss": 2.7894,
      "step": 148173
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.891201138496399,
      "learning_rate": 0.00016962195122279517,
      "loss": 2.7431,
      "step": 148174
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.750155448913574,
      "learning_rate": 0.00016961826716531303,
      "loss": 2.9554,
      "step": 148175
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3213260173797607,
      "learning_rate": 0.00016961458313207144,
      "loss": 3.0598,
      "step": 148176
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.726787567138672,
      "learning_rate": 0.0001696108991230708,
      "loss": 2.8254,
      "step": 148177
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.231382369995117,
      "learning_rate": 0.00016960721513831217,
      "loss": 2.8954,
      "step": 148178
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.121781826019287,
      "learning_rate": 0.00016960353117779584,
      "loss": 3.0266,
      "step": 148179
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.4935238361358643,
      "learning_rate": 0.00016959984724152288,
      "loss": 2.8268,
      "step": 148180
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3256452083587646,
      "learning_rate": 0.0001695961633294936,
      "loss": 2.9112,
      "step": 148181
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.412968158721924,
      "learning_rate": 0.0001695924794417089,
      "loss": 3.0572,
      "step": 148182
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.193997383117676,
      "learning_rate": 0.00016958879557816935,
      "loss": 3.0289,
      "step": 148183
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5766451358795166,
      "learning_rate": 0.0001695851117388758,
      "loss": 3.1427,
      "step": 148184
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6565780639648438,
      "learning_rate": 0.00016958142792382875,
      "loss": 2.9971,
      "step": 148185
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.6806201934814453,
      "learning_rate": 0.00016957774413302906,
      "loss": 2.9252,
      "step": 148186
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3400566577911377,
      "learning_rate": 0.00016957406036647736,
      "loss": 2.8026,
      "step": 148187
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3203492164611816,
      "learning_rate": 0.0001695703766241743,
      "loss": 3.0345,
      "step": 148188
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.156618118286133,
      "learning_rate": 0.00016956669290612046,
      "loss": 2.8689,
      "step": 148189
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2773730754852295,
      "learning_rate": 0.00016956300921231672,
      "loss": 2.8268,
      "step": 148190
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.6114001274108887,
      "learning_rate": 0.00016955932554276357,
      "loss": 3.0897,
      "step": 148191
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2516090869903564,
      "learning_rate": 0.00016955564189746194,
      "loss": 3.1268,
      "step": 148192
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.7139766216278076,
      "learning_rate": 0.00016955195827641237,
      "loss": 2.9784,
      "step": 148193
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.7605130672454834,
      "learning_rate": 0.0001695482746796154,
      "loss": 2.9706,
      "step": 148194
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3370859622955322,
      "learning_rate": 0.00016954459110707205,
      "loss": 3.1,
      "step": 148195
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.8091578483581543,
      "learning_rate": 0.0001695409075587828,
      "loss": 2.8124,
      "step": 148196
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1348071098327637,
      "learning_rate": 0.00016953722403474823,
      "loss": 2.7635,
      "step": 148197
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2845425605773926,
      "learning_rate": 0.00016953354053496925,
      "loss": 2.6602,
      "step": 148198
    },
    {
      "epoch": 1.93,
      "grad_norm": 6.211649417877197,
      "learning_rate": 0.00016952985705944647,
      "loss": 2.9357,
      "step": 148199
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.311368703842163,
      "learning_rate": 0.00016952617360818043,
      "loss": 2.9842,
      "step": 148200
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.143329620361328,
      "learning_rate": 0.00016952249018117207,
      "loss": 3.2322,
      "step": 148201
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.788248538970947,
      "learning_rate": 0.0001695188067784219,
      "loss": 2.905,
      "step": 148202
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0464718341827393,
      "learning_rate": 0.00016951512339993057,
      "loss": 3.1958,
      "step": 148203
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2087652683258057,
      "learning_rate": 0.00016951144004569895,
      "loss": 3.1334,
      "step": 148204
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.8482916355133057,
      "learning_rate": 0.00016950775671572762,
      "loss": 3.1204,
      "step": 148205
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3157289028167725,
      "learning_rate": 0.00016950407341001711,
      "loss": 3.1125,
      "step": 148206
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0953497886657715,
      "learning_rate": 0.00016950039012856842,
      "loss": 2.8248,
      "step": 148207
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.8725030422210693,
      "learning_rate": 0.00016949670687138198,
      "loss": 3.0357,
      "step": 148208
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.253995656967163,
      "learning_rate": 0.00016949302363845862,
      "loss": 2.9918,
      "step": 148209
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.1460821628570557,
      "learning_rate": 0.00016948934042979898,
      "loss": 3.1075,
      "step": 148210
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.279541015625,
      "learning_rate": 0.00016948565724540373,
      "loss": 3.03,
      "step": 148211
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.422624349594116,
      "learning_rate": 0.0001694819740852735,
      "loss": 2.8959,
      "step": 148212
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.974043607711792,
      "learning_rate": 0.00016947829094940905,
      "loss": 2.8343,
      "step": 148213
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.9841773509979248,
      "learning_rate": 0.000169474607837811,
      "loss": 2.8746,
      "step": 148214
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.388557195663452,
      "learning_rate": 0.00016947092475048023,
      "loss": 3.0699,
      "step": 148215
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.528855323791504,
      "learning_rate": 0.0001694672416874171,
      "loss": 2.8648,
      "step": 148216
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0753097534179688,
      "learning_rate": 0.00016946355864862276,
      "loss": 2.9734,
      "step": 148217
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.382035732269287,
      "learning_rate": 0.00016945987563409733,
      "loss": 2.8352,
      "step": 148218
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.925743341445923,
      "learning_rate": 0.0001694561926438419,
      "loss": 3.1717,
      "step": 148219
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2552754878997803,
      "learning_rate": 0.00016945250967785692,
      "loss": 2.9915,
      "step": 148220
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6264121532440186,
      "learning_rate": 0.0001694488267361433,
      "loss": 2.9341,
      "step": 148221
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.045762300491333,
      "learning_rate": 0.0001694451438187015,
      "loss": 2.8839,
      "step": 148222
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1727304458618164,
      "learning_rate": 0.00016944146092553252,
      "loss": 3.2347,
      "step": 148223
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.405306577682495,
      "learning_rate": 0.0001694377780566366,
      "loss": 3.2335,
      "step": 148224
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.582472801208496,
      "learning_rate": 0.0001694340952120148,
      "loss": 2.8801,
      "step": 148225
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.196336507797241,
      "learning_rate": 0.00016943041239166756,
      "loss": 2.8576,
      "step": 148226
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.9025663137435913,
      "learning_rate": 0.00016942672959559574,
      "loss": 3.1703,
      "step": 148227
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4432547092437744,
      "learning_rate": 0.0001694230468237999,
      "loss": 3.1903,
      "step": 148228
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.9744020700454712,
      "learning_rate": 0.000169419364076281,
      "loss": 3.2438,
      "step": 148229
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.8122365474700928,
      "learning_rate": 0.00016941568135303927,
      "loss": 2.96,
      "step": 148230
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1938581466674805,
      "learning_rate": 0.00016941199865407568,
      "loss": 2.9222,
      "step": 148231
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2114744186401367,
      "learning_rate": 0.0001694083159793908,
      "loss": 2.8922,
      "step": 148232
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3991711139678955,
      "learning_rate": 0.00016940463332898553,
      "loss": 3.0369,
      "step": 148233
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3553595542907715,
      "learning_rate": 0.00016940095070286024,
      "loss": 2.9438,
      "step": 148234
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.5094454288482666,
      "learning_rate": 0.00016939726810101606,
      "loss": 3.0118,
      "step": 148235
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.1931843757629395,
      "learning_rate": 0.00016939358552345312,
      "loss": 2.6565,
      "step": 148236
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.559357166290283,
      "learning_rate": 0.00016938990297017247,
      "loss": 2.8253,
      "step": 148237
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.523615598678589,
      "learning_rate": 0.00016938622044117464,
      "loss": 3.0837,
      "step": 148238
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6475987434387207,
      "learning_rate": 0.00016938253793646047,
      "loss": 2.9441,
      "step": 148239
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.093064785003662,
      "learning_rate": 0.00016937885545603045,
      "loss": 3.035,
      "step": 148240
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.0166778564453125,
      "learning_rate": 0.00016937517299988556,
      "loss": 2.9014,
      "step": 148241
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.331861734390259,
      "learning_rate": 0.0001693714905680261,
      "loss": 2.9859,
      "step": 148242
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5936360359191895,
      "learning_rate": 0.00016936780816045305,
      "loss": 2.9993,
      "step": 148243
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.969413995742798,
      "learning_rate": 0.00016936412577716687,
      "loss": 2.8204,
      "step": 148244
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.4368269443511963,
      "learning_rate": 0.00016936044341816847,
      "loss": 3.206,
      "step": 148245
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3625144958496094,
      "learning_rate": 0.0001693567610834583,
      "loss": 3.0555,
      "step": 148246
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3604092597961426,
      "learning_rate": 0.00016935307877303743,
      "loss": 3.1269,
      "step": 148247
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.468648910522461,
      "learning_rate": 0.00016934939648690609,
      "loss": 2.8843,
      "step": 148248
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.608036994934082,
      "learning_rate": 0.00016934571422506524,
      "loss": 3.2332,
      "step": 148249
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.9038290977478027,
      "learning_rate": 0.00016934203198751537,
      "loss": 3.0475,
      "step": 148250
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.272108554840088,
      "learning_rate": 0.0001693383497742574,
      "loss": 2.99,
      "step": 148251
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.734393835067749,
      "learning_rate": 0.0001693346675852918,
      "loss": 2.8492,
      "step": 148252
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.164745569229126,
      "learning_rate": 0.00016933098542061957,
      "loss": 3.2047,
      "step": 148253
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0572686195373535,
      "learning_rate": 0.00016932730328024097,
      "loss": 3.3716,
      "step": 148254
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1895487308502197,
      "learning_rate": 0.00016932362116415698,
      "loss": 2.8701,
      "step": 148255
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1678266525268555,
      "learning_rate": 0.00016931993907236804,
      "loss": 3.0825,
      "step": 148256
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.098569393157959,
      "learning_rate": 0.00016931625700487518,
      "loss": 2.7234,
      "step": 148257
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3628573417663574,
      "learning_rate": 0.00016931257496167874,
      "loss": 2.8519,
      "step": 148258
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4338185787200928,
      "learning_rate": 0.00016930889294277982,
      "loss": 2.7674,
      "step": 148259
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.108731269836426,
      "learning_rate": 0.0001693052109481786,
      "loss": 2.7972,
      "step": 148260
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1929051876068115,
      "learning_rate": 0.0001693015289778761,
      "loss": 2.9365,
      "step": 148261
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2275147438049316,
      "learning_rate": 0.0001692978470318728,
      "loss": 3.0886,
      "step": 148262
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2526047229766846,
      "learning_rate": 0.00016929416511016963,
      "loss": 3.0871,
      "step": 148263
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.241140842437744,
      "learning_rate": 0.00016929048321276703,
      "loss": 3.0157,
      "step": 148264
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.353949546813965,
      "learning_rate": 0.0001692868013396659,
      "loss": 2.844,
      "step": 148265
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.7902655601501465,
      "learning_rate": 0.00016928311949086683,
      "loss": 3.006,
      "step": 148266
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.269994020462036,
      "learning_rate": 0.00016927943766637047,
      "loss": 2.748,
      "step": 148267
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.143052816390991,
      "learning_rate": 0.00016927575586617744,
      "loss": 3.0564,
      "step": 148268
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5417754650115967,
      "learning_rate": 0.00016927207409028863,
      "loss": 2.9737,
      "step": 148269
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.176503896713257,
      "learning_rate": 0.0001692683923387045,
      "loss": 2.8603,
      "step": 148270
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.261611223220825,
      "learning_rate": 0.00016926471061142593,
      "loss": 3.1057,
      "step": 148271
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.034729242324829,
      "learning_rate": 0.00016926102890845357,
      "loss": 2.8542,
      "step": 148272
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.1732876300811768,
      "learning_rate": 0.000169257347229788,
      "loss": 3.0845,
      "step": 148273
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1123597621917725,
      "learning_rate": 0.00016925366557542985,
      "loss": 2.9578,
      "step": 148274
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.663658380508423,
      "learning_rate": 0.00016924998394538004,
      "loss": 2.7264,
      "step": 148275
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.074493646621704,
      "learning_rate": 0.00016924630233963904,
      "loss": 2.9674,
      "step": 148276
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.411816358566284,
      "learning_rate": 0.00016924262075820767,
      "loss": 2.9555,
      "step": 148277
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.169210433959961,
      "learning_rate": 0.0001692389392010866,
      "loss": 2.8865,
      "step": 148278
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.2500243186950684,
      "learning_rate": 0.00016923525766827647,
      "loss": 2.9693,
      "step": 148279
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.344607353210449,
      "learning_rate": 0.0001692315761597779,
      "loss": 3.0118,
      "step": 148280
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1761209964752197,
      "learning_rate": 0.00016922789467559173,
      "loss": 2.7789,
      "step": 148281
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.50777006149292,
      "learning_rate": 0.00016922421321571843,
      "loss": 3.0092,
      "step": 148282
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.335434675216675,
      "learning_rate": 0.00016922053178015898,
      "loss": 2.9133,
      "step": 148283
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5842487812042236,
      "learning_rate": 0.00016921685036891385,
      "loss": 2.9791,
      "step": 148284
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4325785636901855,
      "learning_rate": 0.0001692131689819837,
      "loss": 3.0341,
      "step": 148285
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4347457885742188,
      "learning_rate": 0.00016920948761936933,
      "loss": 2.9625,
      "step": 148286
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2239534854888916,
      "learning_rate": 0.00016920580628107147,
      "loss": 2.9967,
      "step": 148287
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.9019345045089722,
      "learning_rate": 0.0001692021249670906,
      "loss": 3.1397,
      "step": 148288
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2522501945495605,
      "learning_rate": 0.00016919844367742758,
      "loss": 2.9467,
      "step": 148289
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.089306592941284,
      "learning_rate": 0.0001691947624120831,
      "loss": 3.2457,
      "step": 148290
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.741758108139038,
      "learning_rate": 0.00016919108117105765,
      "loss": 2.9404,
      "step": 148291
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.832803964614868,
      "learning_rate": 0.00016918739995435217,
      "loss": 2.6785,
      "step": 148292
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6203904151916504,
      "learning_rate": 0.0001691837187619671,
      "loss": 2.8313,
      "step": 148293
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.160728931427002,
      "learning_rate": 0.00016918003759390334,
      "loss": 2.9475,
      "step": 148294
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.395683765411377,
      "learning_rate": 0.0001691763564501615,
      "loss": 3.0717,
      "step": 148295
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.162590742111206,
      "learning_rate": 0.00016917267533074221,
      "loss": 2.8714,
      "step": 148296
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.0241892337799072,
      "learning_rate": 0.00016916899423564613,
      "loss": 2.9105,
      "step": 148297
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3548426628112793,
      "learning_rate": 0.00016916531316487406,
      "loss": 3.1204,
      "step": 148298
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3655214309692383,
      "learning_rate": 0.00016916163211842656,
      "loss": 2.9041,
      "step": 148299
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.425999402999878,
      "learning_rate": 0.0001691579510963045,
      "loss": 2.8664,
      "step": 148300
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0379533767700195,
      "learning_rate": 0.00016915427009850842,
      "loss": 3.0176,
      "step": 148301
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.195385217666626,
      "learning_rate": 0.00016915058912503903,
      "loss": 3.1232,
      "step": 148302
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2818474769592285,
      "learning_rate": 0.00016914690817589694,
      "loss": 3.0836,
      "step": 148303
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.7675057649612427,
      "learning_rate": 0.00016914322725108299,
      "loss": 2.8618,
      "step": 148304
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.017620801925659,
      "learning_rate": 0.00016913954635059767,
      "loss": 3.0122,
      "step": 148305
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.183478832244873,
      "learning_rate": 0.0001691358654744419,
      "loss": 3.1511,
      "step": 148306
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2561726570129395,
      "learning_rate": 0.0001691321846226161,
      "loss": 3.1634,
      "step": 148307
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0841732025146484,
      "learning_rate": 0.0001691285037951214,
      "loss": 2.808,
      "step": 148308
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.297637701034546,
      "learning_rate": 0.00016912482299195788,
      "loss": 3.0184,
      "step": 148309
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5573933124542236,
      "learning_rate": 0.0001691211422131267,
      "loss": 2.9492,
      "step": 148310
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.104863166809082,
      "learning_rate": 0.00016911746145862816,
      "loss": 3.2634,
      "step": 148311
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3371853828430176,
      "learning_rate": 0.00016911378072846338,
      "loss": 2.7138,
      "step": 148312
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0826575756073,
      "learning_rate": 0.00016911010002263266,
      "loss": 2.8729,
      "step": 148313
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.255312919616699,
      "learning_rate": 0.00016910641934113707,
      "loss": 3.0832,
      "step": 148314
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4968974590301514,
      "learning_rate": 0.0001691027386839768,
      "loss": 2.9293,
      "step": 148315
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.519339084625244,
      "learning_rate": 0.00016909905805115297,
      "loss": 2.8198,
      "step": 148316
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.8877004384994507,
      "learning_rate": 0.00016909537744266598,
      "loss": 2.712,
      "step": 148317
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.151634693145752,
      "learning_rate": 0.0001690916968585167,
      "loss": 2.991,
      "step": 148318
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.275838851928711,
      "learning_rate": 0.00016908801629870565,
      "loss": 2.8456,
      "step": 148319
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.057067632675171,
      "learning_rate": 0.00016908433576323385,
      "loss": 2.9133,
      "step": 148320
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.055938243865967,
      "learning_rate": 0.00016908065525210152,
      "loss": 2.7943,
      "step": 148321
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3603951930999756,
      "learning_rate": 0.00016907697476530967,
      "loss": 2.7413,
      "step": 148322
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6676104068756104,
      "learning_rate": 0.00016907329430285877,
      "loss": 2.993,
      "step": 148323
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.074787139892578,
      "learning_rate": 0.0001690696138647497,
      "loss": 3.0897,
      "step": 148324
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4522242546081543,
      "learning_rate": 0.00016906593345098302,
      "loss": 3.1295,
      "step": 148325
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0411365032196045,
      "learning_rate": 0.00016906225306155965,
      "loss": 2.9817,
      "step": 148326
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.240734577178955,
      "learning_rate": 0.00016905857269647982,
      "loss": 2.864,
      "step": 148327
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.979548215866089,
      "learning_rate": 0.0001690548923557446,
      "loss": 2.875,
      "step": 148328
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.079474925994873,
      "learning_rate": 0.00016905121203935442,
      "loss": 2.8159,
      "step": 148329
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.9988460540771484,
      "learning_rate": 0.00016904753174731024,
      "loss": 3.0009,
      "step": 148330
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0307118892669678,
      "learning_rate": 0.00016904385147961245,
      "loss": 3.2337,
      "step": 148331
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.9125782251358032,
      "learning_rate": 0.00016904017123626206,
      "loss": 2.8575,
      "step": 148332
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.2791128158569336,
      "learning_rate": 0.00016903649101725953,
      "loss": 2.8178,
      "step": 148333
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0312814712524414,
      "learning_rate": 0.00016903281082260557,
      "loss": 2.9278,
      "step": 148334
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.020617961883545,
      "learning_rate": 0.00016902913065230078,
      "loss": 3.0195,
      "step": 148335
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2680985927581787,
      "learning_rate": 0.00016902545050634604,
      "loss": 2.9557,
      "step": 148336
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1763317584991455,
      "learning_rate": 0.00016902177038474186,
      "loss": 3.0593,
      "step": 148337
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.907252073287964,
      "learning_rate": 0.0001690180902874891,
      "loss": 2.9318,
      "step": 148338
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.126716136932373,
      "learning_rate": 0.0001690144102145884,
      "loss": 2.9786,
      "step": 148339
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.7606680393218994,
      "learning_rate": 0.00016901073016604032,
      "loss": 2.8972,
      "step": 148340
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.384192705154419,
      "learning_rate": 0.00016900705014184555,
      "loss": 2.9985,
      "step": 148341
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.604267597198486,
      "learning_rate": 0.00016900337014200494,
      "loss": 2.7757,
      "step": 148342
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.480783224105835,
      "learning_rate": 0.00016899969016651896,
      "loss": 2.9277,
      "step": 148343
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.542728900909424,
      "learning_rate": 0.00016899601021538855,
      "loss": 2.996,
      "step": 148344
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3898468017578125,
      "learning_rate": 0.00016899233028861422,
      "loss": 2.8881,
      "step": 148345
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.411614179611206,
      "learning_rate": 0.0001689886503861967,
      "loss": 2.7853,
      "step": 148346
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5128774642944336,
      "learning_rate": 0.00016898497050813656,
      "loss": 2.9724,
      "step": 148347
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5449163913726807,
      "learning_rate": 0.0001689812906544347,
      "loss": 2.8854,
      "step": 148348
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.340623617172241,
      "learning_rate": 0.00016897761082509158,
      "loss": 3.017,
      "step": 148349
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.0037460327148438,
      "learning_rate": 0.00016897393102010806,
      "loss": 2.9866,
      "step": 148350
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.382167339324951,
      "learning_rate": 0.0001689702512394848,
      "loss": 3.2266,
      "step": 148351
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.1784377098083496,
      "learning_rate": 0.00016896657148322247,
      "loss": 3.1495,
      "step": 148352
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.314761161804199,
      "learning_rate": 0.00016896289175132156,
      "loss": 2.8456,
      "step": 148353
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.198798656463623,
      "learning_rate": 0.00016895921204378308,
      "loss": 2.9748,
      "step": 148354
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3385348320007324,
      "learning_rate": 0.00016895553236060743,
      "loss": 3.0551,
      "step": 148355
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5067150592803955,
      "learning_rate": 0.00016895185270179554,
      "loss": 2.8038,
      "step": 148356
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.056130886077881,
      "learning_rate": 0.00016894817306734792,
      "loss": 2.8682,
      "step": 148357
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.464967727661133,
      "learning_rate": 0.00016894449345726537,
      "loss": 2.9659,
      "step": 148358
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2659285068511963,
      "learning_rate": 0.00016894081387154838,
      "loss": 3.0197,
      "step": 148359
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.715301036834717,
      "learning_rate": 0.00016893713431019792,
      "loss": 3.1836,
      "step": 148360
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.282235622406006,
      "learning_rate": 0.00016893345477321433,
      "loss": 3.2486,
      "step": 148361
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.0602688789367676,
      "learning_rate": 0.00016892977526059865,
      "loss": 2.8294,
      "step": 148362
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4343109130859375,
      "learning_rate": 0.00016892609577235138,
      "loss": 2.9718,
      "step": 148363
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.076223373413086,
      "learning_rate": 0.00016892241630847322,
      "loss": 3.0461,
      "step": 148364
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.7228267192840576,
      "learning_rate": 0.00016891873686896477,
      "loss": 3.0222,
      "step": 148365
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.051873207092285,
      "learning_rate": 0.00016891505745382688,
      "loss": 3.2066,
      "step": 148366
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.526054859161377,
      "learning_rate": 0.00016891137806306007,
      "loss": 3.0717,
      "step": 148367
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.440439462661743,
      "learning_rate": 0.0001689076986966652,
      "loss": 2.974,
      "step": 148368
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4501595497131348,
      "learning_rate": 0.00016890401935464288,
      "loss": 2.8167,
      "step": 148369
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.209062099456787,
      "learning_rate": 0.00016890034003699367,
      "loss": 3.2357,
      "step": 148370
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0462334156036377,
      "learning_rate": 0.00016889666074371845,
      "loss": 3.2721,
      "step": 148371
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.80928635597229,
      "learning_rate": 0.00016889298147481784,
      "loss": 3.0492,
      "step": 148372
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.5170905590057373,
      "learning_rate": 0.00016888930223029236,
      "loss": 2.6476,
      "step": 148373
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.408252477645874,
      "learning_rate": 0.00016888562301014295,
      "loss": 2.6945,
      "step": 148374
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.910080909729004,
      "learning_rate": 0.0001688819438143702,
      "loss": 3.163,
      "step": 148375
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1969590187072754,
      "learning_rate": 0.00016887826464297465,
      "loss": 2.9177,
      "step": 148376
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4550843238830566,
      "learning_rate": 0.00016887458549595723,
      "loss": 2.8839,
      "step": 148377
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.8328075408935547,
      "learning_rate": 0.00016887090637331836,
      "loss": 2.9931,
      "step": 148378
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.278104543685913,
      "learning_rate": 0.000168867227275059,
      "loss": 3.1465,
      "step": 148379
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4595937728881836,
      "learning_rate": 0.00016886354820117968,
      "loss": 2.8488,
      "step": 148380
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.6185569763183594,
      "learning_rate": 0.00016885986915168112,
      "loss": 2.9783,
      "step": 148381
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.387671709060669,
      "learning_rate": 0.00016885619012656385,
      "loss": 2.9631,
      "step": 148382
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.148815155029297,
      "learning_rate": 0.00016885251112582882,
      "loss": 3.27,
      "step": 148383
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.340459108352661,
      "learning_rate": 0.00016884883214947648,
      "loss": 3.1685,
      "step": 148384
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.506570816040039,
      "learning_rate": 0.0001688451531975077,
      "loss": 2.9558,
      "step": 148385
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2370729446411133,
      "learning_rate": 0.00016884147426992312,
      "loss": 2.8164,
      "step": 148386
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3852040767669678,
      "learning_rate": 0.00016883779536672338,
      "loss": 3.1819,
      "step": 148387
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.9737215042114258,
      "learning_rate": 0.00016883411648790898,
      "loss": 3.034,
      "step": 148388
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.367572784423828,
      "learning_rate": 0.000168830437633481,
      "loss": 2.9316,
      "step": 148389
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.173130512237549,
      "learning_rate": 0.00016882675880343977,
      "loss": 2.9882,
      "step": 148390
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6067962646484375,
      "learning_rate": 0.00016882307999778625,
      "loss": 2.9982,
      "step": 148391
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4041237831115723,
      "learning_rate": 0.00016881940121652085,
      "loss": 2.9072,
      "step": 148392
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0837974548339844,
      "learning_rate": 0.00016881572245964466,
      "loss": 3.1495,
      "step": 148393
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4946138858795166,
      "learning_rate": 0.00016881204372715786,
      "loss": 3.0901,
      "step": 148394
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.790369987487793,
      "learning_rate": 0.00016880836501906147,
      "loss": 3.0195,
      "step": 148395
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.8677899837493896,
      "learning_rate": 0.00016880468633535603,
      "loss": 3.1177,
      "step": 148396
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.2475152015686035,
      "learning_rate": 0.0001688010076760423,
      "loss": 2.9837,
      "step": 148397
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.119767904281616,
      "learning_rate": 0.00016879732904112092,
      "loss": 3.1655,
      "step": 148398
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4468255043029785,
      "learning_rate": 0.00016879365043059264,
      "loss": 3.0267,
      "step": 148399
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.3589608669281006,
      "learning_rate": 0.00016878997184445815,
      "loss": 2.9396,
      "step": 148400
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.299607515335083,
      "learning_rate": 0.00016878629328271802,
      "loss": 2.8831,
      "step": 148401
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5027594566345215,
      "learning_rate": 0.00016878261474537293,
      "loss": 3.0134,
      "step": 148402
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.491799831390381,
      "learning_rate": 0.00016877893623242372,
      "loss": 2.6489,
      "step": 148403
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.592887878417969,
      "learning_rate": 0.00016877525774387087,
      "loss": 3.0929,
      "step": 148404
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3157386779785156,
      "learning_rate": 0.00016877157927971527,
      "loss": 2.8263,
      "step": 148405
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2579500675201416,
      "learning_rate": 0.00016876790083995757,
      "loss": 3.0575,
      "step": 148406
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4592466354370117,
      "learning_rate": 0.00016876422242459834,
      "loss": 2.9751,
      "step": 148407
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.657301664352417,
      "learning_rate": 0.0001687605440336382,
      "loss": 2.7677,
      "step": 148408
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.826814651489258,
      "learning_rate": 0.0001687568656670781,
      "loss": 2.9539,
      "step": 148409
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.869389295578003,
      "learning_rate": 0.00016875318732491844,
      "loss": 2.9713,
      "step": 148410
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.7185544967651367,
      "learning_rate": 0.0001687495090071602,
      "loss": 2.8841,
      "step": 148411
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.524350881576538,
      "learning_rate": 0.00016874583071380386,
      "loss": 3.0656,
      "step": 148412
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.458277940750122,
      "learning_rate": 0.00016874215244485016,
      "loss": 2.9508,
      "step": 148413
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.172968626022339,
      "learning_rate": 0.00016873847420029967,
      "loss": 2.7234,
      "step": 148414
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.012346029281616,
      "learning_rate": 0.00016873479598015326,
      "loss": 3.0438,
      "step": 148415
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.1097967624664307,
      "learning_rate": 0.0001687311177844114,
      "loss": 2.9207,
      "step": 148416
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2836508750915527,
      "learning_rate": 0.00016872743961307508,
      "loss": 3.0353,
      "step": 148417
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1606526374816895,
      "learning_rate": 0.00016872376146614477,
      "loss": 3.0855,
      "step": 148418
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1975557804107666,
      "learning_rate": 0.0001687200833436212,
      "loss": 3.2223,
      "step": 148419
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5865330696105957,
      "learning_rate": 0.00016871640524550492,
      "loss": 2.8239,
      "step": 148420
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.8536134958267212,
      "learning_rate": 0.00016871272717179685,
      "loss": 3.0394,
      "step": 148421
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2578766345977783,
      "learning_rate": 0.00016870904912249745,
      "loss": 2.9678,
      "step": 148422
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.091695785522461,
      "learning_rate": 0.00016870537109760767,
      "loss": 2.7506,
      "step": 148423
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0977697372436523,
      "learning_rate": 0.000168701693097128,
      "loss": 2.8288,
      "step": 148424
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.07144832611084,
      "learning_rate": 0.0001686980151210592,
      "loss": 3.007,
      "step": 148425
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.204589605331421,
      "learning_rate": 0.00016869433716940174,
      "loss": 2.9432,
      "step": 148426
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1635618209838867,
      "learning_rate": 0.00016869065924215662,
      "loss": 2.9879,
      "step": 148427
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.241140365600586,
      "learning_rate": 0.00016868698133932426,
      "loss": 3.0124,
      "step": 148428
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.390562057495117,
      "learning_rate": 0.0001686833034609056,
      "loss": 2.787,
      "step": 148429
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.392674207687378,
      "learning_rate": 0.0001686796256069012,
      "loss": 2.9294,
      "step": 148430
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6369383335113525,
      "learning_rate": 0.00016867594777731173,
      "loss": 2.914,
      "step": 148431
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6691367626190186,
      "learning_rate": 0.0001686722699721378,
      "loss": 3.2808,
      "step": 148432
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.3067288398742676,
      "learning_rate": 0.00016866859219138023,
      "loss": 3.1825,
      "step": 148433
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.344834089279175,
      "learning_rate": 0.00016866491443503958,
      "loss": 2.8057,
      "step": 148434
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4712412357330322,
      "learning_rate": 0.00016866123670311668,
      "loss": 3.174,
      "step": 148435
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.211289405822754,
      "learning_rate": 0.00016865755899561218,
      "loss": 3.0915,
      "step": 148436
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6320693492889404,
      "learning_rate": 0.00016865388131252665,
      "loss": 3.139,
      "step": 148437
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.9325987100601196,
      "learning_rate": 0.00016865020365386076,
      "loss": 3.0585,
      "step": 148438
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6366660594940186,
      "learning_rate": 0.00016864652601961542,
      "loss": 2.7914,
      "step": 148439
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2034850120544434,
      "learning_rate": 0.00016864284840979103,
      "loss": 2.6988,
      "step": 148440
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2473297119140625,
      "learning_rate": 0.0001686391708243885,
      "loss": 3.1811,
      "step": 148441
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4723682403564453,
      "learning_rate": 0.00016863549326340849,
      "loss": 3.1351,
      "step": 148442
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.15400767326355,
      "learning_rate": 0.0001686318157268516,
      "loss": 2.7826,
      "step": 148443
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4591434001922607,
      "learning_rate": 0.00016862813821471838,
      "loss": 3.1035,
      "step": 148444
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5045557022094727,
      "learning_rate": 0.00016862446072700982,
      "loss": 2.9456,
      "step": 148445
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.5430779457092285,
      "learning_rate": 0.00016862078326372633,
      "loss": 2.8085,
      "step": 148446
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5832481384277344,
      "learning_rate": 0.00016861710582486884,
      "loss": 2.9618,
      "step": 148447
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2826364040374756,
      "learning_rate": 0.00016861342841043795,
      "loss": 2.9007,
      "step": 148448
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2357125282287598,
      "learning_rate": 0.00016860975102043425,
      "loss": 2.9072,
      "step": 148449
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.459826946258545,
      "learning_rate": 0.00016860607365485835,
      "loss": 3.0485,
      "step": 148450
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4707226753234863,
      "learning_rate": 0.0001686023963137112,
      "loss": 2.9936,
      "step": 148451
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.560640573501587,
      "learning_rate": 0.00016859871899699324,
      "loss": 2.8478,
      "step": 148452
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.9907147884368896,
      "learning_rate": 0.00016859504170470537,
      "loss": 3.0537,
      "step": 148453
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.106872797012329,
      "learning_rate": 0.00016859136443684813,
      "loss": 3.1189,
      "step": 148454
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.15492844581604,
      "learning_rate": 0.00016858768719342214,
      "loss": 2.8096,
      "step": 148455
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.015819549560547,
      "learning_rate": 0.00016858400997442832,
      "loss": 2.865,
      "step": 148456
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.72806715965271,
      "learning_rate": 0.0001685803327798672,
      "loss": 2.9502,
      "step": 148457
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0480289459228516,
      "learning_rate": 0.00016857665560973933,
      "loss": 3.0119,
      "step": 148458
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5798113346099854,
      "learning_rate": 0.00016857297846404568,
      "loss": 2.8427,
      "step": 148459
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.7623722553253174,
      "learning_rate": 0.0001685693013427868,
      "loss": 3.167,
      "step": 148460
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5382039546966553,
      "learning_rate": 0.00016856562424596324,
      "loss": 2.7643,
      "step": 148461
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0700936317443848,
      "learning_rate": 0.00016856194717357594,
      "loss": 2.813,
      "step": 148462
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.880176305770874,
      "learning_rate": 0.00016855827012562545,
      "loss": 2.8981,
      "step": 148463
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.4902074337005615,
      "learning_rate": 0.00016855459310211233,
      "loss": 3.0992,
      "step": 148464
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.8303189277648926,
      "learning_rate": 0.00016855091610303754,
      "loss": 2.7898,
      "step": 148465
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.412431001663208,
      "learning_rate": 0.00016854723912840147,
      "loss": 2.9628,
      "step": 148466
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.7530288696289062,
      "learning_rate": 0.00016854356217820509,
      "loss": 3.063,
      "step": 148467
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.349947214126587,
      "learning_rate": 0.00016853988525244892,
      "loss": 2.9411,
      "step": 148468
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.5768399238586426,
      "learning_rate": 0.00016853620835113357,
      "loss": 3.0792,
      "step": 148469
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3163044452667236,
      "learning_rate": 0.00016853253147425988,
      "loss": 2.9063,
      "step": 148470
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4536216259002686,
      "learning_rate": 0.00016852885462182856,
      "loss": 2.955,
      "step": 148471
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.7475452423095703,
      "learning_rate": 0.00016852517779384002,
      "loss": 2.8913,
      "step": 148472
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.38179087638855,
      "learning_rate": 0.0001685215009902953,
      "loss": 2.9637,
      "step": 148473
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.852433204650879,
      "learning_rate": 0.00016851782421119487,
      "loss": 3.0833,
      "step": 148474
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.226926565170288,
      "learning_rate": 0.0001685141474565394,
      "loss": 2.9226,
      "step": 148475
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.27178955078125,
      "learning_rate": 0.00016851047072632974,
      "loss": 2.9045,
      "step": 148476
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5083112716674805,
      "learning_rate": 0.00016850679402056627,
      "loss": 2.7176,
      "step": 148477
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.154534339904785,
      "learning_rate": 0.00016850311733925005,
      "loss": 3.0096,
      "step": 148478
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.9553873538970947,
      "learning_rate": 0.0001684994406823816,
      "loss": 2.9187,
      "step": 148479
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2482106685638428,
      "learning_rate": 0.00016849576404996154,
      "loss": 3.1654,
      "step": 148480
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.515474319458008,
      "learning_rate": 0.00016849208744199048,
      "loss": 2.7048,
      "step": 148481
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.8153769969940186,
      "learning_rate": 0.00016848841085846937,
      "loss": 2.7455,
      "step": 148482
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1714141368865967,
      "learning_rate": 0.00016848473429939862,
      "loss": 3.0072,
      "step": 148483
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.403763771057129,
      "learning_rate": 0.00016848105776477918,
      "loss": 2.8849,
      "step": 148484
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0845377445220947,
      "learning_rate": 0.00016847738125461153,
      "loss": 2.8024,
      "step": 148485
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.5251920223236084,
      "learning_rate": 0.00016847370476889647,
      "loss": 2.7373,
      "step": 148486
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.9959700107574463,
      "learning_rate": 0.00016847002830763452,
      "loss": 2.896,
      "step": 148487
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.52117657661438,
      "learning_rate": 0.0001684663518708266,
      "loss": 3.148,
      "step": 148488
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2747673988342285,
      "learning_rate": 0.00016846267545847312,
      "loss": 2.7089,
      "step": 148489
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.626551389694214,
      "learning_rate": 0.00016845899907057501,
      "loss": 3.1787,
      "step": 148490
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5827114582061768,
      "learning_rate": 0.00016845532270713288,
      "loss": 3.0358,
      "step": 148491
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4802920818328857,
      "learning_rate": 0.0001684516463681474,
      "loss": 2.7781,
      "step": 148492
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1421494483947754,
      "learning_rate": 0.0001684479700536191,
      "loss": 3.1202,
      "step": 148493
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.446108102798462,
      "learning_rate": 0.00016844429376354893,
      "loss": 2.8083,
      "step": 148494
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0239291191101074,
      "learning_rate": 0.00016844061749793734,
      "loss": 2.9404,
      "step": 148495
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.314950942993164,
      "learning_rate": 0.00016843694125678524,
      "loss": 2.7302,
      "step": 148496
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.3919272422790527,
      "learning_rate": 0.0001684332650400932,
      "loss": 3.0867,
      "step": 148497
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.078648567199707,
      "learning_rate": 0.00016842958884786188,
      "loss": 3.1402,
      "step": 148498
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.1698033809661865,
      "learning_rate": 0.0001684259126800919,
      "loss": 2.8086,
      "step": 148499
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6503865718841553,
      "learning_rate": 0.00016842223653678413,
      "loss": 3.0628,
      "step": 148500
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.147214889526367,
      "learning_rate": 0.000168418560417939,
      "loss": 2.8544,
      "step": 148501
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.4554882049560547,
      "learning_rate": 0.0001684148843235575,
      "loss": 2.8396,
      "step": 148502
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.7717161178588867,
      "learning_rate": 0.00016841120825364017,
      "loss": 2.9947,
      "step": 148503
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.7607789039611816,
      "learning_rate": 0.00016840753220818765,
      "loss": 3.0279,
      "step": 148504
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.856480598449707,
      "learning_rate": 0.0001684038561872005,
      "loss": 2.9632,
      "step": 148505
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2088723182678223,
      "learning_rate": 0.00016840018019067973,
      "loss": 2.9866,
      "step": 148506
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6328346729278564,
      "learning_rate": 0.00016839650421862572,
      "loss": 3.0432,
      "step": 148507
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.4575412273406982,
      "learning_rate": 0.00016839282827103938,
      "loss": 3.0821,
      "step": 148508
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.538771629333496,
      "learning_rate": 0.00016838915234792135,
      "loss": 3.0762,
      "step": 148509
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5888006687164307,
      "learning_rate": 0.00016838547644927221,
      "loss": 2.953,
      "step": 148510
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5884463787078857,
      "learning_rate": 0.00016838180057509257,
      "loss": 3.0688,
      "step": 148511
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.8041250705718994,
      "learning_rate": 0.00016837812472538343,
      "loss": 2.9392,
      "step": 148512
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.133056879043579,
      "learning_rate": 0.0001683744489001451,
      "loss": 3.0142,
      "step": 148513
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.509148597717285,
      "learning_rate": 0.00016837077309937858,
      "loss": 2.8501,
      "step": 148514
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.561103343963623,
      "learning_rate": 0.0001683670973230844,
      "loss": 3.2878,
      "step": 148515
    },
    {
      "epoch": 1.93,
      "grad_norm": 5.160199165344238,
      "learning_rate": 0.0001683634215712633,
      "loss": 2.7685,
      "step": 148516
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.4930646419525146,
      "learning_rate": 0.0001683597458439158,
      "loss": 2.9735,
      "step": 148517
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.048301935195923,
      "learning_rate": 0.00016835607014104278,
      "loss": 3.205,
      "step": 148518
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.574254035949707,
      "learning_rate": 0.00016835239446264476,
      "loss": 2.9461,
      "step": 148519
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.2665817737579346,
      "learning_rate": 0.00016834871880872264,
      "loss": 2.9953,
      "step": 148520
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.8309500217437744,
      "learning_rate": 0.00016834504317927694,
      "loss": 3.0324,
      "step": 148521
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.2078604698181152,
      "learning_rate": 0.00016834136757430844,
      "loss": 3.0002,
      "step": 148522
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3650765419006348,
      "learning_rate": 0.00016833769199381762,
      "loss": 2.7481,
      "step": 148523
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.785374879837036,
      "learning_rate": 0.00016833401643780545,
      "loss": 2.9525,
      "step": 148524
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.580786943435669,
      "learning_rate": 0.0001683303409062723,
      "loss": 2.9717,
      "step": 148525
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.32747220993042,
      "learning_rate": 0.0001683266653992192,
      "loss": 2.9645,
      "step": 148526
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.331101655960083,
      "learning_rate": 0.00016832298991664664,
      "loss": 3.1091,
      "step": 148527
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.044095993041992,
      "learning_rate": 0.00016831931445855528,
      "loss": 2.8578,
      "step": 148528
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.58001446723938,
      "learning_rate": 0.00016831563902494577,
      "loss": 3.2099,
      "step": 148529
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3513734340667725,
      "learning_rate": 0.00016831196361581898,
      "loss": 2.6834,
      "step": 148530
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.848947048187256,
      "learning_rate": 0.00016830828823117535,
      "loss": 2.9828,
      "step": 148531
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3555352687835693,
      "learning_rate": 0.00016830461287101585,
      "loss": 3.0278,
      "step": 148532
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.704566478729248,
      "learning_rate": 0.00016830093753534086,
      "loss": 3.0502,
      "step": 148533
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.915665626525879,
      "learning_rate": 0.00016829726222415147,
      "loss": 2.836,
      "step": 148534
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.49341082572937,
      "learning_rate": 0.00016829358693744783,
      "loss": 3.2262,
      "step": 148535
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.1755189895629883,
      "learning_rate": 0.00016828991167523104,
      "loss": 2.8621,
      "step": 148536
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4107003211975098,
      "learning_rate": 0.00016828623643750152,
      "loss": 2.9076,
      "step": 148537
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2280945777893066,
      "learning_rate": 0.0001682825612242602,
      "loss": 3.0308,
      "step": 148538
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.947234630584717,
      "learning_rate": 0.00016827888603550753,
      "loss": 3.0298,
      "step": 148539
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1141178607940674,
      "learning_rate": 0.00016827521087124441,
      "loss": 2.9828,
      "step": 148540
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.6105263233184814,
      "learning_rate": 0.0001682715357314714,
      "loss": 3.2022,
      "step": 148541
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.765803813934326,
      "learning_rate": 0.0001682678606161892,
      "loss": 2.9479,
      "step": 148542
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.576498508453369,
      "learning_rate": 0.00016826418552539838,
      "loss": 2.8743,
      "step": 148543
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.276473045349121,
      "learning_rate": 0.00016826051045909987,
      "loss": 2.9392,
      "step": 148544
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.986403226852417,
      "learning_rate": 0.00016825683541729408,
      "loss": 2.9426,
      "step": 148545
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0705814361572266,
      "learning_rate": 0.00016825316039998197,
      "loss": 3.0248,
      "step": 148546
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1169114112854004,
      "learning_rate": 0.00016824948540716404,
      "loss": 2.819,
      "step": 148547
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.1168060302734375,
      "learning_rate": 0.00016824581043884104,
      "loss": 2.8888,
      "step": 148548
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2238736152648926,
      "learning_rate": 0.00016824213549501352,
      "loss": 2.7201,
      "step": 148549
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.655123472213745,
      "learning_rate": 0.00016823846057568238,
      "loss": 2.9632,
      "step": 148550
    },
    {
      "epoch": 1.93,
      "grad_norm": 4.567354202270508,
      "learning_rate": 0.00016823478568084812,
      "loss": 3.0772,
      "step": 148551
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.9882268905639648,
      "learning_rate": 0.00016823111081051156,
      "loss": 2.7918,
      "step": 148552
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.234553575515747,
      "learning_rate": 0.0001682274359646734,
      "loss": 2.8957,
      "step": 148553
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.640301465988159,
      "learning_rate": 0.00016822376114333402,
      "loss": 2.8103,
      "step": 148554
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.964569330215454,
      "learning_rate": 0.00016822008634649453,
      "loss": 3.1805,
      "step": 148555
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.100360870361328,
      "learning_rate": 0.00016821641157415538,
      "loss": 2.7546,
      "step": 148556
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.108004570007324,
      "learning_rate": 0.0001682127368263172,
      "loss": 3.0285,
      "step": 148557
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2944586277008057,
      "learning_rate": 0.00016820906210298085,
      "loss": 2.9198,
      "step": 148558
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.716606855392456,
      "learning_rate": 0.00016820538740414695,
      "loss": 3.2034,
      "step": 148559
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0577328205108643,
      "learning_rate": 0.00016820171272981601,
      "loss": 3.08,
      "step": 148560
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.694908380508423,
      "learning_rate": 0.000168198038079989,
      "loss": 2.9302,
      "step": 148561
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4530234336853027,
      "learning_rate": 0.00016819436345466644,
      "loss": 2.9127,
      "step": 148562
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.177208185195923,
      "learning_rate": 0.00016819068885384894,
      "loss": 2.7711,
      "step": 148563
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.619574546813965,
      "learning_rate": 0.00016818701427753738,
      "loss": 2.9024,
      "step": 148564
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6417019367218018,
      "learning_rate": 0.00016818333972573235,
      "loss": 2.8264,
      "step": 148565
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3110036849975586,
      "learning_rate": 0.00016817966519843438,
      "loss": 2.8255,
      "step": 148566
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4626944065093994,
      "learning_rate": 0.00016817599069564448,
      "loss": 3.0547,
      "step": 148567
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.041447401046753,
      "learning_rate": 0.00016817231621736297,
      "loss": 3.0477,
      "step": 148568
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.451754331588745,
      "learning_rate": 0.00016816864176359087,
      "loss": 2.9947,
      "step": 148569
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.52542781829834,
      "learning_rate": 0.0001681649673343287,
      "loss": 3.064,
      "step": 148570
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.9765549898147583,
      "learning_rate": 0.0001681612929295771,
      "loss": 3.0095,
      "step": 148571
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.758596420288086,
      "learning_rate": 0.00016815761854933677,
      "loss": 2.9684,
      "step": 148572
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.098017930984497,
      "learning_rate": 0.0001681539441936085,
      "loss": 3.2245,
      "step": 148573
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0759196281433105,
      "learning_rate": 0.00016815026986239282,
      "loss": 2.9917,
      "step": 148574
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.7613420486450195,
      "learning_rate": 0.00016814659555569058,
      "loss": 2.8278,
      "step": 148575
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.118837356567383,
      "learning_rate": 0.00016814292127350234,
      "loss": 3.158,
      "step": 148576
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0414533615112305,
      "learning_rate": 0.0001681392470158289,
      "loss": 2.9187,
      "step": 148577
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.56819224357605,
      "learning_rate": 0.00016813557278267072,
      "loss": 2.9704,
      "step": 148578
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.5010874271392822,
      "learning_rate": 0.0001681318985740287,
      "loss": 2.9374,
      "step": 148579
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.521982431411743,
      "learning_rate": 0.00016812822438990336,
      "loss": 2.7455,
      "step": 148580
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0397074222564697,
      "learning_rate": 0.00016812455023029562,
      "loss": 2.7307,
      "step": 148581
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4086451530456543,
      "learning_rate": 0.00016812087609520597,
      "loss": 2.892,
      "step": 148582
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.243354320526123,
      "learning_rate": 0.00016811720198463514,
      "loss": 3.0148,
      "step": 148583
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3634047508239746,
      "learning_rate": 0.00016811352789858371,
      "loss": 3.1191,
      "step": 148584
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6975111961364746,
      "learning_rate": 0.00016810985383705257,
      "loss": 3.2323,
      "step": 148585
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.6379635334014893,
      "learning_rate": 0.0001681061798000422,
      "loss": 2.7367,
      "step": 148586
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.318955183029175,
      "learning_rate": 0.00016810250578755352,
      "loss": 2.9586,
      "step": 148587
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2349302768707275,
      "learning_rate": 0.00016809883179958702,
      "loss": 2.8526,
      "step": 148588
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.6999876499176025,
      "learning_rate": 0.00016809515783614346,
      "loss": 3.061,
      "step": 148589
    },
    {
      "epoch": 1.93,
      "grad_norm": 1.904775619506836,
      "learning_rate": 0.00016809148389722338,
      "loss": 3.0576,
      "step": 148590
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.790782928466797,
      "learning_rate": 0.00016808780998282772,
      "loss": 2.7439,
      "step": 148591
    },
    {
      "epoch": 1.93,
      "grad_norm": 3.1578972339630127,
      "learning_rate": 0.00016808413609295688,
      "loss": 2.9782,
      "step": 148592
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4145560264587402,
      "learning_rate": 0.00016808046222761185,
      "loss": 3.1821,
      "step": 148593
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.9157745838165283,
      "learning_rate": 0.00016807678838679307,
      "loss": 2.9434,
      "step": 148594
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.063889503479004,
      "learning_rate": 0.00016807311457050136,
      "loss": 3.0453,
      "step": 148595
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4094178676605225,
      "learning_rate": 0.00016806944077873723,
      "loss": 2.9655,
      "step": 148596
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0913808345794678,
      "learning_rate": 0.0001680657670115016,
      "loss": 2.9169,
      "step": 148597
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1693921089172363,
      "learning_rate": 0.00016806209326879494,
      "loss": 3.0048,
      "step": 148598
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.0825109481811523,
      "learning_rate": 0.0001680584195506181,
      "loss": 3.0897,
      "step": 148599
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1183643341064453,
      "learning_rate": 0.00016805474585697162,
      "loss": 3.1235,
      "step": 148600
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.1097571849823,
      "learning_rate": 0.00016805107218785642,
      "loss": 2.9384,
      "step": 148601
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.3667478561401367,
      "learning_rate": 0.00016804739854327282,
      "loss": 3.1612,
      "step": 148602
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.768332004547119,
      "learning_rate": 0.00016804372492322186,
      "loss": 3.064,
      "step": 148603
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.299846649169922,
      "learning_rate": 0.00016804005132770388,
      "loss": 2.8302,
      "step": 148604
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.5337913036346436,
      "learning_rate": 0.00016803637775671988,
      "loss": 3.071,
      "step": 148605
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4636542797088623,
      "learning_rate": 0.00016803270421027032,
      "loss": 2.8932,
      "step": 148606
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.2788960933685303,
      "learning_rate": 0.0001680290306883562,
      "loss": 3.118,
      "step": 148607
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.741399049758911,
      "learning_rate": 0.0001680253571909777,
      "loss": 3.0574,
      "step": 148608
    },
    {
      "epoch": 1.93,
      "grad_norm": 2.4272587299346924,
      "learning_rate": 0.00016802168371813592,
      "loss": 2.8397,
      "step": 148609
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.083712339401245,
      "learning_rate": 0.00016801801026983127,
      "loss": 2.8468,
      "step": 148610
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.679806709289551,
      "learning_rate": 0.00016801433684606468,
      "loss": 2.8585,
      "step": 148611
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4834320545196533,
      "learning_rate": 0.0001680106634468366,
      "loss": 2.7786,
      "step": 148612
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.172720193862915,
      "learning_rate": 0.0001680069900721481,
      "loss": 2.9107,
      "step": 148613
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7350194454193115,
      "learning_rate": 0.0001680033167219993,
      "loss": 3.0801,
      "step": 148614
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2604010105133057,
      "learning_rate": 0.00016799964339639137,
      "loss": 3.2084,
      "step": 148615
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3629262447357178,
      "learning_rate": 0.0001679959700953246,
      "loss": 3.0686,
      "step": 148616
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1608216762542725,
      "learning_rate": 0.0001679922968188,
      "loss": 2.8943,
      "step": 148617
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.8845003843307495,
      "learning_rate": 0.00016798862356681805,
      "loss": 3.0035,
      "step": 148618
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4689595699310303,
      "learning_rate": 0.00016798495033937976,
      "loss": 3.2887,
      "step": 148619
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.227527618408203,
      "learning_rate": 0.00016798127713648525,
      "loss": 2.6362,
      "step": 148620
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.056264877319336,
      "learning_rate": 0.00016797760395813568,
      "loss": 2.8222,
      "step": 148621
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.672245740890503,
      "learning_rate": 0.00016797393080433142,
      "loss": 3.1938,
      "step": 148622
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3038785457611084,
      "learning_rate": 0.00016797025767507342,
      "loss": 2.9623,
      "step": 148623
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.823969602584839,
      "learning_rate": 0.00016796658457036217,
      "loss": 2.9859,
      "step": 148624
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.870370626449585,
      "learning_rate": 0.0001679629114901986,
      "loss": 2.8491,
      "step": 148625
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9390462636947632,
      "learning_rate": 0.000167959238434583,
      "loss": 3.1865,
      "step": 148626
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.516162395477295,
      "learning_rate": 0.0001679555654035164,
      "loss": 2.9991,
      "step": 148627
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.295727491378784,
      "learning_rate": 0.00016795189239699924,
      "loss": 3.1073,
      "step": 148628
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.211289405822754,
      "learning_rate": 0.0001679482194150324,
      "loss": 2.9882,
      "step": 148629
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1286215782165527,
      "learning_rate": 0.00016794454645761638,
      "loss": 2.8135,
      "step": 148630
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3700177669525146,
      "learning_rate": 0.00016794087352475207,
      "loss": 3.0897,
      "step": 148631
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.336366891860962,
      "learning_rate": 0.00016793720061644007,
      "loss": 3.1049,
      "step": 148632
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.689603567123413,
      "learning_rate": 0.00016793352773268106,
      "loss": 3.0264,
      "step": 148633
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2562599182128906,
      "learning_rate": 0.00016792985487347551,
      "loss": 3.0081,
      "step": 148634
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.172149181365967,
      "learning_rate": 0.00016792618203882446,
      "loss": 2.8784,
      "step": 148635
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7303647994995117,
      "learning_rate": 0.00016792250922872829,
      "loss": 2.8994,
      "step": 148636
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.691480875015259,
      "learning_rate": 0.0001679188364431879,
      "loss": 3.2367,
      "step": 148637
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.22961163520813,
      "learning_rate": 0.00016791516368220397,
      "loss": 2.9791,
      "step": 148638
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.786228656768799,
      "learning_rate": 0.00016791149094577695,
      "loss": 2.9626,
      "step": 148639
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.9287219047546387,
      "learning_rate": 0.00016790781823390778,
      "loss": 2.8318,
      "step": 148640
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4984099864959717,
      "learning_rate": 0.00016790414554659707,
      "loss": 3.1355,
      "step": 148641
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0899524688720703,
      "learning_rate": 0.00016790047288384532,
      "loss": 2.8368,
      "step": 148642
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.201969861984253,
      "learning_rate": 0.00016789680024565352,
      "loss": 3.2055,
      "step": 148643
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3055927753448486,
      "learning_rate": 0.00016789312763202218,
      "loss": 3.2368,
      "step": 148644
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.669065475463867,
      "learning_rate": 0.00016788945504295186,
      "loss": 2.6582,
      "step": 148645
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7829935550689697,
      "learning_rate": 0.00016788578247844355,
      "loss": 2.9562,
      "step": 148646
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3051490783691406,
      "learning_rate": 0.00016788210993849773,
      "loss": 2.9385,
      "step": 148647
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2962162494659424,
      "learning_rate": 0.000167878437423115,
      "loss": 2.9537,
      "step": 148648
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2731733322143555,
      "learning_rate": 0.0001678747649322963,
      "loss": 3.1866,
      "step": 148649
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.048060417175293,
      "learning_rate": 0.00016787109246604217,
      "loss": 2.8677,
      "step": 148650
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1790168285369873,
      "learning_rate": 0.0001678674200243532,
      "loss": 2.8432,
      "step": 148651
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.658494472503662,
      "learning_rate": 0.00016786374760723025,
      "loss": 2.7825,
      "step": 148652
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.359426736831665,
      "learning_rate": 0.00016786007521467383,
      "loss": 3.1366,
      "step": 148653
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.292067527770996,
      "learning_rate": 0.00016785640284668483,
      "loss": 2.8696,
      "step": 148654
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.9108104705810547,
      "learning_rate": 0.00016785273050326385,
      "loss": 3.1279,
      "step": 148655
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.940441608428955,
      "learning_rate": 0.0001678490581844115,
      "loss": 2.9615,
      "step": 148656
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.350599527359009,
      "learning_rate": 0.00016784538589012838,
      "loss": 2.9979,
      "step": 148657
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.533533811569214,
      "learning_rate": 0.00016784171362041545,
      "loss": 2.8803,
      "step": 148658
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.54290509223938,
      "learning_rate": 0.00016783804137527307,
      "loss": 3.0605,
      "step": 148659
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.389146089553833,
      "learning_rate": 0.00016783436915470226,
      "loss": 2.9895,
      "step": 148660
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.181535243988037,
      "learning_rate": 0.0001678306969587035,
      "loss": 3.1611,
      "step": 148661
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.432345151901245,
      "learning_rate": 0.00016782702478727757,
      "loss": 2.9865,
      "step": 148662
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0916616916656494,
      "learning_rate": 0.0001678233526404249,
      "loss": 2.9501,
      "step": 148663
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.874011993408203,
      "learning_rate": 0.00016781968051814651,
      "loss": 3.028,
      "step": 148664
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1067144870758057,
      "learning_rate": 0.00016781600842044283,
      "loss": 2.807,
      "step": 148665
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.350407123565674,
      "learning_rate": 0.00016781233634731476,
      "loss": 2.9097,
      "step": 148666
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.011852741241455,
      "learning_rate": 0.00016780866429876273,
      "loss": 3.0631,
      "step": 148667
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4206173419952393,
      "learning_rate": 0.00016780499227478786,
      "loss": 3.0225,
      "step": 148668
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4865031242370605,
      "learning_rate": 0.00016780132027539025,
      "loss": 2.8168,
      "step": 148669
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3764114379882812,
      "learning_rate": 0.00016779764830057104,
      "loss": 3.0896,
      "step": 148670
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3360512256622314,
      "learning_rate": 0.00016779397635033056,
      "loss": 2.8349,
      "step": 148671
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.516094923019409,
      "learning_rate": 0.00016779030442466982,
      "loss": 2.9336,
      "step": 148672
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.167335271835327,
      "learning_rate": 0.00016778663252358923,
      "loss": 3.2099,
      "step": 148673
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.365713357925415,
      "learning_rate": 0.00016778296064708984,
      "loss": 3.0152,
      "step": 148674
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.28004789352417,
      "learning_rate": 0.0001677792887951719,
      "loss": 3.1656,
      "step": 148675
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.2041845321655273,
      "learning_rate": 0.00016777561696783636,
      "loss": 2.947,
      "step": 148676
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.430326461791992,
      "learning_rate": 0.0001677719451650837,
      "loss": 3.0673,
      "step": 148677
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.548041820526123,
      "learning_rate": 0.00016776827338691488,
      "loss": 2.9366,
      "step": 148678
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4690659046173096,
      "learning_rate": 0.0001677646016333303,
      "loss": 2.9517,
      "step": 148679
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.688474655151367,
      "learning_rate": 0.00016776092990433103,
      "loss": 2.9783,
      "step": 148680
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.355104446411133,
      "learning_rate": 0.00016775725819991724,
      "loss": 2.9683,
      "step": 148681
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.509737491607666,
      "learning_rate": 0.00016775358652009001,
      "loss": 2.9735,
      "step": 148682
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.0377941131591797,
      "learning_rate": 0.00016774991486484975,
      "loss": 2.7563,
      "step": 148683
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4807658195495605,
      "learning_rate": 0.0001677462432341974,
      "loss": 3.0544,
      "step": 148684
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0718164443969727,
      "learning_rate": 0.0001677425716281334,
      "loss": 3.1861,
      "step": 148685
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2204878330230713,
      "learning_rate": 0.00016773890004665882,
      "loss": 3.0917,
      "step": 148686
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.810551404953003,
      "learning_rate": 0.00016773522848977383,
      "loss": 3.0361,
      "step": 148687
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.006162405014038,
      "learning_rate": 0.00016773155695747944,
      "loss": 3.0816,
      "step": 148688
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7269105911254883,
      "learning_rate": 0.00016772788544977614,
      "loss": 2.8994,
      "step": 148689
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4592833518981934,
      "learning_rate": 0.00016772421396666484,
      "loss": 2.9079,
      "step": 148690
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.613372564315796,
      "learning_rate": 0.00016772054250814605,
      "loss": 2.8771,
      "step": 148691
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3804471492767334,
      "learning_rate": 0.00016771687107422067,
      "loss": 3.1617,
      "step": 148692
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.692490577697754,
      "learning_rate": 0.000167713199664889,
      "loss": 2.9001,
      "step": 148693
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.260669708251953,
      "learning_rate": 0.0001677095282801521,
      "loss": 2.6486,
      "step": 148694
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1919095516204834,
      "learning_rate": 0.00016770585692001033,
      "loss": 2.9353,
      "step": 148695
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2521491050720215,
      "learning_rate": 0.00016770218558446464,
      "loss": 3.0635,
      "step": 148696
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.88457989692688,
      "learning_rate": 0.00016769851427351552,
      "loss": 2.9414,
      "step": 148697
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7406153678894043,
      "learning_rate": 0.000167694842987164,
      "loss": 3.0285,
      "step": 148698
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1203391551971436,
      "learning_rate": 0.00016769117172541026,
      "loss": 3.0282,
      "step": 148699
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.2260286808013916,
      "learning_rate": 0.00016768750048825535,
      "loss": 2.8511,
      "step": 148700
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9807310104370117,
      "learning_rate": 0.0001676838292756997,
      "loss": 2.8849,
      "step": 148701
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9597914218902588,
      "learning_rate": 0.00016768015808774428,
      "loss": 2.9851,
      "step": 148702
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5340700149536133,
      "learning_rate": 0.00016767648692438947,
      "loss": 3.0794,
      "step": 148703
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3254711627960205,
      "learning_rate": 0.00016767281578563629,
      "loss": 3.0098,
      "step": 148704
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.9394426345825195,
      "learning_rate": 0.00016766914467148506,
      "loss": 2.8956,
      "step": 148705
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.994781255722046,
      "learning_rate": 0.00016766547358193674,
      "loss": 2.9526,
      "step": 148706
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1591343879699707,
      "learning_rate": 0.00016766180251699176,
      "loss": 3.0682,
      "step": 148707
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4396822452545166,
      "learning_rate": 0.0001676581314766511,
      "loss": 3.0626,
      "step": 148708
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.1489527225494385,
      "learning_rate": 0.00016765446046091513,
      "loss": 2.8721,
      "step": 148709
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.2056808471679688,
      "learning_rate": 0.000167650789469785,
      "loss": 2.8589,
      "step": 148710
    },
    {
      "epoch": 1.94,
      "grad_norm": 4.112685680389404,
      "learning_rate": 0.0001676471185032608,
      "loss": 2.962,
      "step": 148711
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1365480422973633,
      "learning_rate": 0.00016764344756134363,
      "loss": 3.2356,
      "step": 148712
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.269245147705078,
      "learning_rate": 0.0001676397766440339,
      "loss": 2.9821,
      "step": 148713
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.791954755783081,
      "learning_rate": 0.00016763610575133258,
      "loss": 3.0344,
      "step": 148714
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.871148109436035,
      "learning_rate": 0.00016763243488324007,
      "loss": 2.7404,
      "step": 148715
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.305910348892212,
      "learning_rate": 0.0001676287640397573,
      "loss": 2.8927,
      "step": 148716
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1466012001037598,
      "learning_rate": 0.0001676250932208849,
      "loss": 2.792,
      "step": 148717
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.281183958053589,
      "learning_rate": 0.00016762142242662343,
      "loss": 3.1203,
      "step": 148718
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.644902467727661,
      "learning_rate": 0.00016761775165697357,
      "loss": 2.8635,
      "step": 148719
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.500748634338379,
      "learning_rate": 0.00016761408091193613,
      "loss": 2.8584,
      "step": 148720
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.258283853530884,
      "learning_rate": 0.00016761041019151165,
      "loss": 2.746,
      "step": 148721
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7318501472473145,
      "learning_rate": 0.000167606739495701,
      "loss": 2.8223,
      "step": 148722
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.014460325241089,
      "learning_rate": 0.00016760306882450477,
      "loss": 3.2229,
      "step": 148723
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3685879707336426,
      "learning_rate": 0.0001675993981779236,
      "loss": 3.0029,
      "step": 148724
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.525773286819458,
      "learning_rate": 0.0001675957275559581,
      "loss": 3.178,
      "step": 148725
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.918303966522217,
      "learning_rate": 0.00016759205695860915,
      "loss": 2.9685,
      "step": 148726
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1543171405792236,
      "learning_rate": 0.00016758838638587727,
      "loss": 2.9139,
      "step": 148727
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.617316961288452,
      "learning_rate": 0.00016758471583776328,
      "loss": 2.6704,
      "step": 148728
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9302093982696533,
      "learning_rate": 0.00016758104531426778,
      "loss": 2.8288,
      "step": 148729
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9285552501678467,
      "learning_rate": 0.00016757737481539142,
      "loss": 2.9002,
      "step": 148730
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0844647884368896,
      "learning_rate": 0.000167573704341135,
      "loss": 2.8378,
      "step": 148731
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0213687419891357,
      "learning_rate": 0.00016757003389149911,
      "loss": 2.7479,
      "step": 148732
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.1307432651519775,
      "learning_rate": 0.00016756636346648433,
      "loss": 3.0291,
      "step": 148733
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.846673011779785,
      "learning_rate": 0.0001675626930660916,
      "loss": 2.8821,
      "step": 148734
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.9451282024383545,
      "learning_rate": 0.00016755902269032148,
      "loss": 3.0503,
      "step": 148735
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1791911125183105,
      "learning_rate": 0.0001675553523391745,
      "loss": 2.9095,
      "step": 148736
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.204937219619751,
      "learning_rate": 0.00016755168201265162,
      "loss": 2.7789,
      "step": 148737
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.9803571701049805,
      "learning_rate": 0.0001675480117107532,
      "loss": 2.7896,
      "step": 148738
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.354651689529419,
      "learning_rate": 0.0001675443414334803,
      "loss": 2.8994,
      "step": 148739
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1220650672912598,
      "learning_rate": 0.00016754067118083336,
      "loss": 2.957,
      "step": 148740
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3520619869232178,
      "learning_rate": 0.00016753700095281312,
      "loss": 3.0767,
      "step": 148741
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.454806089401245,
      "learning_rate": 0.00016753333074942013,
      "loss": 2.774,
      "step": 148742
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6520068645477295,
      "learning_rate": 0.00016752966057065536,
      "loss": 2.8421,
      "step": 148743
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5316600799560547,
      "learning_rate": 0.00016752599041651916,
      "loss": 2.9383,
      "step": 148744
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.8944576978683472,
      "learning_rate": 0.00016752232028701251,
      "loss": 2.8931,
      "step": 148745
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.002131700515747,
      "learning_rate": 0.00016751865018213593,
      "loss": 2.6345,
      "step": 148746
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9951229095458984,
      "learning_rate": 0.0001675149801018902,
      "loss": 2.8259,
      "step": 148747
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3116984367370605,
      "learning_rate": 0.00016751131004627578,
      "loss": 2.7339,
      "step": 148748
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1906540393829346,
      "learning_rate": 0.0001675076400152936,
      "loss": 2.9136,
      "step": 148749
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.139458417892456,
      "learning_rate": 0.00016750397000894418,
      "loss": 2.7831,
      "step": 148750
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2646641731262207,
      "learning_rate": 0.00016750030002722835,
      "loss": 2.9254,
      "step": 148751
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6406261920928955,
      "learning_rate": 0.0001674966300701466,
      "loss": 3.3069,
      "step": 148752
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3373382091522217,
      "learning_rate": 0.00016749296013770002,
      "loss": 3.0012,
      "step": 148753
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.278059959411621,
      "learning_rate": 0.0001674892902298887,
      "loss": 3.0508,
      "step": 148754
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.173933744430542,
      "learning_rate": 0.00016748562034671376,
      "loss": 2.9029,
      "step": 148755
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6288905143737793,
      "learning_rate": 0.00016748195048817564,
      "loss": 2.8151,
      "step": 148756
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.500295877456665,
      "learning_rate": 0.00016747828065427528,
      "loss": 3.0467,
      "step": 148757
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.367901086807251,
      "learning_rate": 0.000167474610845013,
      "loss": 3.1377,
      "step": 148758
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3409156799316406,
      "learning_rate": 0.00016747094106039,
      "loss": 2.9129,
      "step": 148759
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3010363578796387,
      "learning_rate": 0.00016746727130040638,
      "loss": 2.8525,
      "step": 148760
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.559488534927368,
      "learning_rate": 0.00016746360156506324,
      "loss": 2.881,
      "step": 148761
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3054466247558594,
      "learning_rate": 0.000167459931854361,
      "loss": 2.9689,
      "step": 148762
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.40073823928833,
      "learning_rate": 0.00016745626216830054,
      "loss": 2.7739,
      "step": 148763
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.117079496383667,
      "learning_rate": 0.00016745259250688238,
      "loss": 2.9708,
      "step": 148764
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.669442653656006,
      "learning_rate": 0.0001674489228701075,
      "loss": 2.9147,
      "step": 148765
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.106204032897949,
      "learning_rate": 0.00016744525325797615,
      "loss": 2.796,
      "step": 148766
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.560798168182373,
      "learning_rate": 0.00016744158367048935,
      "loss": 2.9481,
      "step": 148767
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.200516700744629,
      "learning_rate": 0.00016743791410764753,
      "loss": 2.9335,
      "step": 148768
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.431628704071045,
      "learning_rate": 0.0001674342445694516,
      "loss": 2.7807,
      "step": 148769
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.178966999053955,
      "learning_rate": 0.00016743057505590208,
      "loss": 3.0384,
      "step": 148770
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.4674508571624756,
      "learning_rate": 0.00016742690556699992,
      "loss": 2.8368,
      "step": 148771
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3685147762298584,
      "learning_rate": 0.00016742323610274539,
      "loss": 2.8955,
      "step": 148772
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.8986173868179321,
      "learning_rate": 0.00016741956666313946,
      "loss": 2.8455,
      "step": 148773
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.103698492050171,
      "learning_rate": 0.00016741589724818265,
      "loss": 3.0961,
      "step": 148774
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2630021572113037,
      "learning_rate": 0.00016741222785787585,
      "loss": 3.1325,
      "step": 148775
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.424036979675293,
      "learning_rate": 0.00016740855849221948,
      "loss": 3.1308,
      "step": 148776
    },
    {
      "epoch": 1.94,
      "grad_norm": 4.714632511138916,
      "learning_rate": 0.0001674048891512146,
      "loss": 3.0181,
      "step": 148777
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2434043884277344,
      "learning_rate": 0.00016740121983486146,
      "loss": 2.9668,
      "step": 148778
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2977771759033203,
      "learning_rate": 0.000167397550543161,
      "loss": 3.1839,
      "step": 148779
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.8069908618927,
      "learning_rate": 0.00016739388127611374,
      "loss": 2.7291,
      "step": 148780
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.805911064147949,
      "learning_rate": 0.0001673902120337206,
      "loss": 2.7625,
      "step": 148781
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.375753164291382,
      "learning_rate": 0.00016738654281598194,
      "loss": 2.9046,
      "step": 148782
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2159078121185303,
      "learning_rate": 0.0001673828736228989,
      "loss": 3.1333,
      "step": 148783
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2120361328125,
      "learning_rate": 0.0001673792044544717,
      "loss": 2.9032,
      "step": 148784
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7696359157562256,
      "learning_rate": 0.00016737553531070127,
      "loss": 2.9318,
      "step": 148785
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4086434841156006,
      "learning_rate": 0.00016737186619158814,
      "loss": 2.9294,
      "step": 148786
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7027626037597656,
      "learning_rate": 0.00016736819709713317,
      "loss": 3.0309,
      "step": 148787
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4385008811950684,
      "learning_rate": 0.00016736452802733684,
      "loss": 3.0037,
      "step": 148788
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.2935123443603516,
      "learning_rate": 0.00016736085898220025,
      "loss": 2.9869,
      "step": 148789
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7765684127807617,
      "learning_rate": 0.00016735718996172347,
      "loss": 2.7825,
      "step": 148790
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.36565899848938,
      "learning_rate": 0.00016735352096590764,
      "loss": 2.9461,
      "step": 148791
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.810980796813965,
      "learning_rate": 0.00016734985199475314,
      "loss": 3.0896,
      "step": 148792
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3851864337921143,
      "learning_rate": 0.00016734618304826095,
      "loss": 2.9736,
      "step": 148793
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.316293239593506,
      "learning_rate": 0.00016734251412643152,
      "loss": 2.9275,
      "step": 148794
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.165966749191284,
      "learning_rate": 0.00016733884522926584,
      "loss": 3.0835,
      "step": 148795
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2819223403930664,
      "learning_rate": 0.0001673351763567641,
      "loss": 3.0528,
      "step": 148796
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.704462766647339,
      "learning_rate": 0.00016733150750892744,
      "loss": 3.093,
      "step": 148797
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.290583848953247,
      "learning_rate": 0.00016732783868575617,
      "loss": 2.9371,
      "step": 148798
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2737603187561035,
      "learning_rate": 0.0001673241698872513,
      "loss": 2.7907,
      "step": 148799
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.054849624633789,
      "learning_rate": 0.0001673205011134133,
      "loss": 3.0685,
      "step": 148800
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4135968685150146,
      "learning_rate": 0.00016731683236424297,
      "loss": 2.8472,
      "step": 148801
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6949377059936523,
      "learning_rate": 0.000167313163639741,
      "loss": 2.8879,
      "step": 148802
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6945788860321045,
      "learning_rate": 0.000167309494939908,
      "loss": 2.6791,
      "step": 148803
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.0143682956695557,
      "learning_rate": 0.00016730582626474453,
      "loss": 2.7314,
      "step": 148804
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.82944917678833,
      "learning_rate": 0.00016730215761425154,
      "loss": 2.863,
      "step": 148805
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.093672513961792,
      "learning_rate": 0.00016729848898842947,
      "loss": 3.082,
      "step": 148806
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.159558057785034,
      "learning_rate": 0.00016729482038727926,
      "loss": 3.0553,
      "step": 148807
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9929289817810059,
      "learning_rate": 0.00016729115181080142,
      "loss": 2.8153,
      "step": 148808
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.477313756942749,
      "learning_rate": 0.0001672874832589967,
      "loss": 2.9731,
      "step": 148809
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6600544452667236,
      "learning_rate": 0.00016728381473186553,
      "loss": 2.6951,
      "step": 148810
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.019727945327759,
      "learning_rate": 0.000167280146229409,
      "loss": 3.0774,
      "step": 148811
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.8801958560943604,
      "learning_rate": 0.00016727647775162746,
      "loss": 2.9942,
      "step": 148812
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0229721069335938,
      "learning_rate": 0.00016727280929852183,
      "loss": 3.2462,
      "step": 148813
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.323956251144409,
      "learning_rate": 0.00016726914087009275,
      "loss": 2.9751,
      "step": 148814
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2248356342315674,
      "learning_rate": 0.00016726547246634065,
      "loss": 2.927,
      "step": 148815
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3962225914001465,
      "learning_rate": 0.00016726180408726656,
      "loss": 3.0342,
      "step": 148816
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4775006771087646,
      "learning_rate": 0.000167258135732871,
      "loss": 3.0842,
      "step": 148817
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4469335079193115,
      "learning_rate": 0.00016725446740315452,
      "loss": 3.1602,
      "step": 148818
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6092140674591064,
      "learning_rate": 0.0001672507990981181,
      "loss": 2.7114,
      "step": 148819
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2542498111724854,
      "learning_rate": 0.00016724713081776223,
      "loss": 2.9164,
      "step": 148820
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2669718265533447,
      "learning_rate": 0.0001672434625620875,
      "loss": 2.7775,
      "step": 148821
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.377424716949463,
      "learning_rate": 0.00016723979433109484,
      "loss": 2.8836,
      "step": 148822
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0622284412384033,
      "learning_rate": 0.0001672361261247847,
      "loss": 2.8554,
      "step": 148823
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.332697629928589,
      "learning_rate": 0.00016723245794315802,
      "loss": 3.0753,
      "step": 148824
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.45019268989563,
      "learning_rate": 0.0001672287897862153,
      "loss": 2.9354,
      "step": 148825
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.865992546081543,
      "learning_rate": 0.00016722512165395724,
      "loss": 3.2954,
      "step": 148826
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7599236965179443,
      "learning_rate": 0.0001672214535463844,
      "loss": 2.9869,
      "step": 148827
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.096900463104248,
      "learning_rate": 0.0001672177854634978,
      "loss": 2.8319,
      "step": 148828
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.918537139892578,
      "learning_rate": 0.00016721411740529773,
      "loss": 3.0503,
      "step": 148829
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5521304607391357,
      "learning_rate": 0.0001672104493717852,
      "loss": 3.0452,
      "step": 148830
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.415417194366455,
      "learning_rate": 0.00016720678136296075,
      "loss": 3.2099,
      "step": 148831
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.8421790599823,
      "learning_rate": 0.00016720311337882506,
      "loss": 2.9819,
      "step": 148832
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2345409393310547,
      "learning_rate": 0.00016719944541937872,
      "loss": 2.9581,
      "step": 148833
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6945345401763916,
      "learning_rate": 0.0001671957774846226,
      "loss": 3.0042,
      "step": 148834
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.8085057735443115,
      "learning_rate": 0.0001671921095745572,
      "loss": 2.8507,
      "step": 148835
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3938255310058594,
      "learning_rate": 0.00016718844168918344,
      "loss": 2.8136,
      "step": 148836
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.218505620956421,
      "learning_rate": 0.00016718477382850167,
      "loss": 3.1524,
      "step": 148837
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.616873264312744,
      "learning_rate": 0.00016718110599251308,
      "loss": 2.9527,
      "step": 148838
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.43727970123291,
      "learning_rate": 0.00016717743818121773,
      "loss": 2.9497,
      "step": 148839
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0489442348480225,
      "learning_rate": 0.00016717377039461668,
      "loss": 2.9488,
      "step": 148840
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.6950747966766357,
      "learning_rate": 0.00016717010263271048,
      "loss": 3.0188,
      "step": 148841
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.0798585414886475,
      "learning_rate": 0.0001671664348955,
      "loss": 3.0204,
      "step": 148842
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.777289628982544,
      "learning_rate": 0.00016716276718298562,
      "loss": 3.2083,
      "step": 148843
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.360819101333618,
      "learning_rate": 0.00016715909949516843,
      "loss": 3.218,
      "step": 148844
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.5503838062286377,
      "learning_rate": 0.00016715543183204866,
      "loss": 2.96,
      "step": 148845
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.271045446395874,
      "learning_rate": 0.00016715176419362728,
      "loss": 2.7995,
      "step": 148846
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7072699069976807,
      "learning_rate": 0.00016714809657990482,
      "loss": 3.1367,
      "step": 148847
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.0724618434906006,
      "learning_rate": 0.00016714442899088214,
      "loss": 2.7049,
      "step": 148848
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.096398115158081,
      "learning_rate": 0.0001671407614265597,
      "loss": 3.0027,
      "step": 148849
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.947071075439453,
      "learning_rate": 0.00016713709388693854,
      "loss": 2.8993,
      "step": 148850
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.681379556655884,
      "learning_rate": 0.00016713342637201888,
      "loss": 3.0059,
      "step": 148851
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.3634109497070312,
      "learning_rate": 0.00016712975888180168,
      "loss": 2.8383,
      "step": 148852
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.0544471740722656,
      "learning_rate": 0.00016712609141628752,
      "loss": 3.3499,
      "step": 148853
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2953343391418457,
      "learning_rate": 0.0001671224239754772,
      "loss": 3.0331,
      "step": 148854
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6151793003082275,
      "learning_rate": 0.00016711875655937123,
      "loss": 2.8049,
      "step": 148855
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4758756160736084,
      "learning_rate": 0.00016711508916797063,
      "loss": 3.0407,
      "step": 148856
    },
    {
      "epoch": 1.94,
      "grad_norm": 4.375574588775635,
      "learning_rate": 0.0001671114218012756,
      "loss": 3.0653,
      "step": 148857
    },
    {
      "epoch": 1.94,
      "grad_norm": 4.447857856750488,
      "learning_rate": 0.0001671077544592872,
      "loss": 2.7885,
      "step": 148858
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6647908687591553,
      "learning_rate": 0.00016710408714200583,
      "loss": 2.9263,
      "step": 148859
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.8409926891326904,
      "learning_rate": 0.00016710041984943245,
      "loss": 3.042,
      "step": 148860
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.9808309078216553,
      "learning_rate": 0.0001670967525815675,
      "loss": 3.089,
      "step": 148861
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.797685384750366,
      "learning_rate": 0.00016709308533841203,
      "loss": 3.1411,
      "step": 148862
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4683902263641357,
      "learning_rate": 0.0001670894181199662,
      "loss": 2.7971,
      "step": 148863
    },
    {
      "epoch": 1.94,
      "grad_norm": 4.345596790313721,
      "learning_rate": 0.00016708575092623106,
      "loss": 2.9533,
      "step": 148864
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.054731607437134,
      "learning_rate": 0.00016708208375720707,
      "loss": 3.0301,
      "step": 148865
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.7526628971099854,
      "learning_rate": 0.0001670784166128952,
      "loss": 3.0738,
      "step": 148866
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.6947953701019287,
      "learning_rate": 0.0001670747494932958,
      "loss": 2.9624,
      "step": 148867
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.996777296066284,
      "learning_rate": 0.00016707108239840985,
      "loss": 3.0805,
      "step": 148868
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6823363304138184,
      "learning_rate": 0.00016706741532823788,
      "loss": 2.8616,
      "step": 148869
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.26491641998291,
      "learning_rate": 0.00016706374828278058,
      "loss": 3.302,
      "step": 148870
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4746203422546387,
      "learning_rate": 0.00016706008126203854,
      "loss": 3.1378,
      "step": 148871
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.1038570404052734,
      "learning_rate": 0.00016705641426601264,
      "loss": 2.8431,
      "step": 148872
    },
    {
      "epoch": 1.94,
      "grad_norm": 4.2841362953186035,
      "learning_rate": 0.00016705274729470336,
      "loss": 3.0871,
      "step": 148873
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0186667442321777,
      "learning_rate": 0.0001670490803481116,
      "loss": 2.9186,
      "step": 148874
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0945987701416016,
      "learning_rate": 0.00016704541342623793,
      "loss": 3.0661,
      "step": 148875
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2264277935028076,
      "learning_rate": 0.000167041746529083,
      "loss": 2.9598,
      "step": 148876
    },
    {
      "epoch": 1.94,
      "grad_norm": 5.313977241516113,
      "learning_rate": 0.0001670380796566474,
      "loss": 2.8196,
      "step": 148877
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2629058361053467,
      "learning_rate": 0.0001670344128089321,
      "loss": 3.1795,
      "step": 148878
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3392038345336914,
      "learning_rate": 0.00016703074598593747,
      "loss": 3.051,
      "step": 148879
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.142400026321411,
      "learning_rate": 0.0001670270791876644,
      "loss": 3.0174,
      "step": 148880
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.030137062072754,
      "learning_rate": 0.00016702341241411358,
      "loss": 2.9491,
      "step": 148881
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.296201705932617,
      "learning_rate": 0.0001670197456652856,
      "loss": 2.8067,
      "step": 148882
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.001206874847412,
      "learning_rate": 0.000167016078941181,
      "loss": 2.9768,
      "step": 148883
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4650847911834717,
      "learning_rate": 0.00016701241224180077,
      "loss": 3.0015,
      "step": 148884
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1571733951568604,
      "learning_rate": 0.00016700874556714533,
      "loss": 2.958,
      "step": 148885
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7049479484558105,
      "learning_rate": 0.00016700507891721558,
      "loss": 3.0183,
      "step": 148886
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0186421871185303,
      "learning_rate": 0.00016700141229201212,
      "loss": 3.1989,
      "step": 148887
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2632153034210205,
      "learning_rate": 0.00016699774569153562,
      "loss": 3.0296,
      "step": 148888
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2016327381134033,
      "learning_rate": 0.00016699407911578655,
      "loss": 2.9272,
      "step": 148889
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0325796604156494,
      "learning_rate": 0.00016699041256476599,
      "loss": 2.7112,
      "step": 148890
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.1831676959991455,
      "learning_rate": 0.0001669867460384743,
      "loss": 2.8704,
      "step": 148891
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5555906295776367,
      "learning_rate": 0.00016698307953691235,
      "loss": 2.8635,
      "step": 148892
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.817777395248413,
      "learning_rate": 0.00016697941306008078,
      "loss": 2.8672,
      "step": 148893
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4584126472473145,
      "learning_rate": 0.00016697574660798025,
      "loss": 2.8031,
      "step": 148894
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1753289699554443,
      "learning_rate": 0.00016697208018061135,
      "loss": 3.1818,
      "step": 148895
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0242292881011963,
      "learning_rate": 0.00016696841377797491,
      "loss": 2.7426,
      "step": 148896
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2012979984283447,
      "learning_rate": 0.00016696474740007152,
      "loss": 3.0674,
      "step": 148897
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1066598892211914,
      "learning_rate": 0.00016696108104690194,
      "loss": 3.1144,
      "step": 148898
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.342444896697998,
      "learning_rate": 0.00016695741471846684,
      "loss": 3.0406,
      "step": 148899
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.389904260635376,
      "learning_rate": 0.00016695374841476672,
      "loss": 2.9695,
      "step": 148900
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.530198812484741,
      "learning_rate": 0.00016695008213580255,
      "loss": 3.0355,
      "step": 148901
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.128450632095337,
      "learning_rate": 0.00016694641588157488,
      "loss": 2.8136,
      "step": 148902
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.156740665435791,
      "learning_rate": 0.00016694274965208425,
      "loss": 2.9726,
      "step": 148903
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6374545097351074,
      "learning_rate": 0.0001669390834473316,
      "loss": 2.9966,
      "step": 148904
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.138789176940918,
      "learning_rate": 0.0001669354172673175,
      "loss": 2.9155,
      "step": 148905
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.386078357696533,
      "learning_rate": 0.00016693175111204245,
      "loss": 2.8231,
      "step": 148906
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3217358589172363,
      "learning_rate": 0.00016692808498150748,
      "loss": 3.1517,
      "step": 148907
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5541300773620605,
      "learning_rate": 0.00016692441887571305,
      "loss": 2.933,
      "step": 148908
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4876086711883545,
      "learning_rate": 0.00016692075279465976,
      "loss": 3.1334,
      "step": 148909
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.153461217880249,
      "learning_rate": 0.00016691708673834853,
      "loss": 2.9893,
      "step": 148910
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4664111137390137,
      "learning_rate": 0.00016691342070677997,
      "loss": 2.9796,
      "step": 148911
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4770877361297607,
      "learning_rate": 0.00016690975469995457,
      "loss": 3.0091,
      "step": 148912
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.2304601669311523,
      "learning_rate": 0.00016690608871787328,
      "loss": 2.7954,
      "step": 148913
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4777915477752686,
      "learning_rate": 0.0001669024227605365,
      "loss": 2.9646,
      "step": 148914
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2970669269561768,
      "learning_rate": 0.00016689875682794524,
      "loss": 3.0112,
      "step": 148915
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.5387778282165527,
      "learning_rate": 0.0001668950909201,
      "loss": 2.9013,
      "step": 148916
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5076067447662354,
      "learning_rate": 0.00016689142503700154,
      "loss": 2.9877,
      "step": 148917
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5339293479919434,
      "learning_rate": 0.00016688775917865026,
      "loss": 2.8853,
      "step": 148918
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.728114604949951,
      "learning_rate": 0.00016688409334504725,
      "loss": 2.9859,
      "step": 148919
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.391240119934082,
      "learning_rate": 0.00016688042753619284,
      "loss": 2.7686,
      "step": 148920
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3195576667785645,
      "learning_rate": 0.00016687676175208797,
      "loss": 2.7924,
      "step": 148921
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.329882860183716,
      "learning_rate": 0.00016687309599273313,
      "loss": 2.9634,
      "step": 148922
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.027378559112549,
      "learning_rate": 0.00016686943025812935,
      "loss": 2.7106,
      "step": 148923
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0577452182769775,
      "learning_rate": 0.00016686576454827678,
      "loss": 2.9765,
      "step": 148924
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1028261184692383,
      "learning_rate": 0.00016686209886317653,
      "loss": 3.0008,
      "step": 148925
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.137218713760376,
      "learning_rate": 0.00016685843320282898,
      "loss": 2.7771,
      "step": 148926
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1165125370025635,
      "learning_rate": 0.00016685476756723514,
      "loss": 2.7131,
      "step": 148927
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.602128505706787,
      "learning_rate": 0.00016685110195639532,
      "loss": 2.8238,
      "step": 148928
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2408626079559326,
      "learning_rate": 0.00016684743637031065,
      "loss": 2.9809,
      "step": 148929
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.564791440963745,
      "learning_rate": 0.00016684377080898133,
      "loss": 2.9206,
      "step": 148930
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.487961530685425,
      "learning_rate": 0.00016684010527240838,
      "loss": 2.9833,
      "step": 148931
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.657806396484375,
      "learning_rate": 0.00016683643976059224,
      "loss": 2.8498,
      "step": 148932
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.380068302154541,
      "learning_rate": 0.0001668327742735339,
      "loss": 3.2092,
      "step": 148933
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.380265235900879,
      "learning_rate": 0.0001668291088112337,
      "loss": 2.9655,
      "step": 148934
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.281102180480957,
      "learning_rate": 0.00016682544337369255,
      "loss": 3.0541,
      "step": 148935
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.8347373008728027,
      "learning_rate": 0.0001668217779609111,
      "loss": 3.1125,
      "step": 148936
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0447769165039062,
      "learning_rate": 0.00016681811257289003,
      "loss": 3.0643,
      "step": 148937
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3620011806488037,
      "learning_rate": 0.0001668144472096298,
      "loss": 2.7839,
      "step": 148938
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5972206592559814,
      "learning_rate": 0.00016681078187113146,
      "loss": 3.0729,
      "step": 148939
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.674736738204956,
      "learning_rate": 0.00016680711655739537,
      "loss": 2.791,
      "step": 148940
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2798092365264893,
      "learning_rate": 0.0001668034512684225,
      "loss": 2.7417,
      "step": 148941
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7410428524017334,
      "learning_rate": 0.00016679978600421335,
      "loss": 3.216,
      "step": 148942
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.8118393421173096,
      "learning_rate": 0.00016679612076476863,
      "loss": 2.9464,
      "step": 148943
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2978689670562744,
      "learning_rate": 0.00016679245555008894,
      "loss": 2.9553,
      "step": 148944
    },
    {
      "epoch": 1.94,
      "grad_norm": 4.151494026184082,
      "learning_rate": 0.00016678879036017508,
      "loss": 3.0811,
      "step": 148945
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.861088514328003,
      "learning_rate": 0.00016678512519502765,
      "loss": 2.9701,
      "step": 148946
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3061978816986084,
      "learning_rate": 0.00016678146005464752,
      "loss": 3.1027,
      "step": 148947
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.9469900131225586,
      "learning_rate": 0.00016677779493903518,
      "loss": 3.1781,
      "step": 148948
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.8169732093811035,
      "learning_rate": 0.0001667741298481914,
      "loss": 3.0401,
      "step": 148949
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4914567470550537,
      "learning_rate": 0.00016677046478211666,
      "loss": 2.9653,
      "step": 148950
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1783437728881836,
      "learning_rate": 0.00016676679974081195,
      "loss": 3.0032,
      "step": 148951
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2765064239501953,
      "learning_rate": 0.0001667631347242777,
      "loss": 2.9825,
      "step": 148952
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.215003252029419,
      "learning_rate": 0.00016675946973251481,
      "loss": 3.1082,
      "step": 148953
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7601795196533203,
      "learning_rate": 0.00016675580476552387,
      "loss": 3.0228,
      "step": 148954
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.500972270965576,
      "learning_rate": 0.0001667521398233055,
      "loss": 2.9716,
      "step": 148955
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6225950717926025,
      "learning_rate": 0.0001667484749058603,
      "loss": 2.87,
      "step": 148956
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.047389030456543,
      "learning_rate": 0.00016674481001318923,
      "loss": 2.9564,
      "step": 148957
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1018471717834473,
      "learning_rate": 0.00016674114514529266,
      "loss": 2.9287,
      "step": 148958
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.389591693878174,
      "learning_rate": 0.0001667374803021716,
      "loss": 3.1242,
      "step": 148959
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0553171634674072,
      "learning_rate": 0.0001667338154838265,
      "loss": 2.9556,
      "step": 148960
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.324904203414917,
      "learning_rate": 0.00016673015069025815,
      "loss": 3.0001,
      "step": 148961
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.6510064601898193,
      "learning_rate": 0.000166726485921467,
      "loss": 2.4892,
      "step": 148962
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.147191047668457,
      "learning_rate": 0.00016672282117745406,
      "loss": 2.9528,
      "step": 148963
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.284111261367798,
      "learning_rate": 0.0001667191564582198,
      "loss": 3.0471,
      "step": 148964
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3565073013305664,
      "learning_rate": 0.00016671549176376502,
      "loss": 2.888,
      "step": 148965
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3232336044311523,
      "learning_rate": 0.00016671182709409037,
      "loss": 2.9962,
      "step": 148966
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.725884199142456,
      "learning_rate": 0.0001667081624491965,
      "loss": 2.7209,
      "step": 148967
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0224366188049316,
      "learning_rate": 0.00016670449782908391,
      "loss": 2.9449,
      "step": 148968
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0536558628082275,
      "learning_rate": 0.00016670083323375368,
      "loss": 3.0718,
      "step": 148969
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.3262109756469727,
      "learning_rate": 0.00016669716866320612,
      "loss": 2.9873,
      "step": 148970
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9709970951080322,
      "learning_rate": 0.00016669350411744222,
      "loss": 2.7985,
      "step": 148971
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2613296508789062,
      "learning_rate": 0.00016668983959646253,
      "loss": 3.0421,
      "step": 148972
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.8535722494125366,
      "learning_rate": 0.00016668617510026768,
      "loss": 2.8813,
      "step": 148973
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.248662233352661,
      "learning_rate": 0.0001666825106288583,
      "loss": 2.8607,
      "step": 148974
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.930954694747925,
      "learning_rate": 0.00016667884618223522,
      "loss": 3.2221,
      "step": 148975
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.411461353302002,
      "learning_rate": 0.00016667518176039897,
      "loss": 3.0549,
      "step": 148976
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0928244590759277,
      "learning_rate": 0.00016667151736335045,
      "loss": 2.9916,
      "step": 148977
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2802021503448486,
      "learning_rate": 0.00016666785299109023,
      "loss": 2.7159,
      "step": 148978
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.42236328125,
      "learning_rate": 0.00016666418864361894,
      "loss": 3.1547,
      "step": 148979
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1407859325408936,
      "learning_rate": 0.00016666052432093715,
      "loss": 2.9803,
      "step": 148980
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3571672439575195,
      "learning_rate": 0.00016665686002304583,
      "loss": 2.976,
      "step": 148981
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0293750762939453,
      "learning_rate": 0.00016665319574994542,
      "loss": 2.7971,
      "step": 148982
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1966567039489746,
      "learning_rate": 0.0001666495315016368,
      "loss": 3.0126,
      "step": 148983
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7306222915649414,
      "learning_rate": 0.00016664586727812056,
      "loss": 2.7382,
      "step": 148984
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0914182662963867,
      "learning_rate": 0.00016664220307939726,
      "loss": 3.0248,
      "step": 148985
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.30523943901062,
      "learning_rate": 0.0001666385389054678,
      "loss": 2.9444,
      "step": 148986
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2370445728302,
      "learning_rate": 0.00016663487475633275,
      "loss": 2.9965,
      "step": 148987
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3766908645629883,
      "learning_rate": 0.00016663121063199265,
      "loss": 2.9281,
      "step": 148988
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.081404209136963,
      "learning_rate": 0.00016662754653244845,
      "loss": 2.9293,
      "step": 148989
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.435474157333374,
      "learning_rate": 0.00016662388245770076,
      "loss": 2.8802,
      "step": 148990
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1668384075164795,
      "learning_rate": 0.00016662021840775003,
      "loss": 2.9052,
      "step": 148991
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.295315742492676,
      "learning_rate": 0.00016661655438259725,
      "loss": 3.021,
      "step": 148992
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1220738887786865,
      "learning_rate": 0.00016661289038224296,
      "loss": 2.8947,
      "step": 148993
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2203986644744873,
      "learning_rate": 0.00016660922640668777,
      "loss": 2.9734,
      "step": 148994
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9893193244934082,
      "learning_rate": 0.00016660556245593256,
      "loss": 3.0026,
      "step": 148995
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3320789337158203,
      "learning_rate": 0.00016660189852997785,
      "loss": 2.8283,
      "step": 148996
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.0783493518829346,
      "learning_rate": 0.00016659823462882432,
      "loss": 2.8598,
      "step": 148997
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.271186113357544,
      "learning_rate": 0.00016659457075247273,
      "loss": 2.8813,
      "step": 148998
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.357663154602051,
      "learning_rate": 0.00016659090690092365,
      "loss": 3.1507,
      "step": 148999
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.871476650238037,
      "learning_rate": 0.000166587243074178,
      "loss": 3.0389,
      "step": 149000
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.9711813926696777,
      "learning_rate": 0.00016658357927223623,
      "loss": 3.0793,
      "step": 149001
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.195368766784668,
      "learning_rate": 0.000166579915495099,
      "loss": 3.1197,
      "step": 149002
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.15925931930542,
      "learning_rate": 0.0001665762517427672,
      "loss": 2.7824,
      "step": 149003
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3399083614349365,
      "learning_rate": 0.0001665725880152414,
      "loss": 2.8787,
      "step": 149004
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.3068156242370605,
      "learning_rate": 0.00016656892431252213,
      "loss": 2.8327,
      "step": 149005
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1249711513519287,
      "learning_rate": 0.00016656526063461036,
      "loss": 2.9955,
      "step": 149006
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.305194139480591,
      "learning_rate": 0.0001665615969815066,
      "loss": 2.9969,
      "step": 149007
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.4186675548553467,
      "learning_rate": 0.00016655793335321146,
      "loss": 2.9516,
      "step": 149008
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.602155923843384,
      "learning_rate": 0.00016655426974972588,
      "loss": 3.0066,
      "step": 149009
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.1115057468414307,
      "learning_rate": 0.00016655060617105032,
      "loss": 2.7765,
      "step": 149010
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0561959743499756,
      "learning_rate": 0.0001665469426171854,
      "loss": 2.9509,
      "step": 149011
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6581838130950928,
      "learning_rate": 0.00016654327908813206,
      "loss": 3.2508,
      "step": 149012
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3238022327423096,
      "learning_rate": 0.00016653961558389075,
      "loss": 2.9309,
      "step": 149013
    },
    {
      "epoch": 1.94,
      "grad_norm": 4.084751129150391,
      "learning_rate": 0.00016653595210446235,
      "loss": 2.9484,
      "step": 149014
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1823902130126953,
      "learning_rate": 0.0001665322886498474,
      "loss": 3.1201,
      "step": 149015
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.452065944671631,
      "learning_rate": 0.00016652862522004662,
      "loss": 3.006,
      "step": 149016
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.7852783203125,
      "learning_rate": 0.00016652496181506063,
      "loss": 2.9101,
      "step": 149017
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.693000078201294,
      "learning_rate": 0.00016652129843489023,
      "loss": 2.997,
      "step": 149018
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.067394733428955,
      "learning_rate": 0.00016651763507953595,
      "loss": 3.1726,
      "step": 149019
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4652254581451416,
      "learning_rate": 0.00016651397174899865,
      "loss": 2.8308,
      "step": 149020
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.091383934020996,
      "learning_rate": 0.00016651030844327895,
      "loss": 3.0028,
      "step": 149021
    },
    {
      "epoch": 1.94,
      "grad_norm": 4.784501075744629,
      "learning_rate": 0.00016650664516237748,
      "loss": 2.8774,
      "step": 149022
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.858429193496704,
      "learning_rate": 0.00016650298190629486,
      "loss": 3.0696,
      "step": 149023
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2304275035858154,
      "learning_rate": 0.00016649931867503197,
      "loss": 2.9604,
      "step": 149024
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.998018264770508,
      "learning_rate": 0.00016649565546858925,
      "loss": 2.8066,
      "step": 149025
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.9348196983337402,
      "learning_rate": 0.0001664919922869676,
      "loss": 2.9166,
      "step": 149026
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6362428665161133,
      "learning_rate": 0.00016648832913016763,
      "loss": 2.8082,
      "step": 149027
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.4098517894744873,
      "learning_rate": 0.00016648466599819,
      "loss": 2.9574,
      "step": 149028
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.152933120727539,
      "learning_rate": 0.00016648100289103526,
      "loss": 3.0061,
      "step": 149029
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.938270092010498,
      "learning_rate": 0.0001664773398087044,
      "loss": 2.9657,
      "step": 149030
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.8766965866088867,
      "learning_rate": 0.00016647367675119774,
      "loss": 2.8848,
      "step": 149031
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7481648921966553,
      "learning_rate": 0.00016647001371851626,
      "loss": 3.2752,
      "step": 149032
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9267390966415405,
      "learning_rate": 0.00016646635071066056,
      "loss": 2.8813,
      "step": 149033
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.8415932655334473,
      "learning_rate": 0.00016646268772763125,
      "loss": 3.0191,
      "step": 149034
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3966455459594727,
      "learning_rate": 0.00016645902476942893,
      "loss": 2.9844,
      "step": 149035
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6066722869873047,
      "learning_rate": 0.00016645536183605456,
      "loss": 2.7914,
      "step": 149036
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3458447456359863,
      "learning_rate": 0.0001664516989275085,
      "loss": 2.7865,
      "step": 149037
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.657622814178467,
      "learning_rate": 0.00016644803604379173,
      "loss": 3.2924,
      "step": 149038
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.857144832611084,
      "learning_rate": 0.00016644437318490477,
      "loss": 2.7773,
      "step": 149039
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4871294498443604,
      "learning_rate": 0.0001664407103508484,
      "loss": 3.1894,
      "step": 149040
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.065164804458618,
      "learning_rate": 0.000166437047541623,
      "loss": 2.7912,
      "step": 149041
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.8253105878829956,
      "learning_rate": 0.00016643338475722964,
      "loss": 3.1301,
      "step": 149042
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.664597749710083,
      "learning_rate": 0.0001664297219976687,
      "loss": 2.92,
      "step": 149043
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4282212257385254,
      "learning_rate": 0.00016642605926294113,
      "loss": 2.9312,
      "step": 149044
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.7630623579025269,
      "learning_rate": 0.00016642239655304748,
      "loss": 3.0141,
      "step": 149045
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2514488697052,
      "learning_rate": 0.0001664187338679884,
      "loss": 2.9689,
      "step": 149046
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4902150630950928,
      "learning_rate": 0.00016641507120776454,
      "loss": 3.1244,
      "step": 149047
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.3904712200164795,
      "learning_rate": 0.0001664114085723767,
      "loss": 2.9555,
      "step": 149048
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.458218574523926,
      "learning_rate": 0.0001664077459618254,
      "loss": 2.7927,
      "step": 149049
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.517050266265869,
      "learning_rate": 0.00016640408337611156,
      "loss": 3.0721,
      "step": 149050
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.8895184993743896,
      "learning_rate": 0.0001664004208152357,
      "loss": 2.98,
      "step": 149051
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4187769889831543,
      "learning_rate": 0.00016639675827919852,
      "loss": 2.849,
      "step": 149052
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6165714263916016,
      "learning_rate": 0.0001663930957680006,
      "loss": 2.912,
      "step": 149053
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4874300956726074,
      "learning_rate": 0.00016638943328164285,
      "loss": 3.1751,
      "step": 149054
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.398581027984619,
      "learning_rate": 0.00016638577082012568,
      "loss": 2.9136,
      "step": 149055
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.873410224914551,
      "learning_rate": 0.00016638210838345008,
      "loss": 3.0208,
      "step": 149056
    },
    {
      "epoch": 1.94,
      "grad_norm": 4.383657932281494,
      "learning_rate": 0.00016637844597161653,
      "loss": 3.0017,
      "step": 149057
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.665116310119629,
      "learning_rate": 0.00016637478358462574,
      "loss": 2.9656,
      "step": 149058
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.11895489692688,
      "learning_rate": 0.0001663711212224783,
      "loss": 2.9932,
      "step": 149059
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4129605293273926,
      "learning_rate": 0.0001663674588851751,
      "loss": 2.9732,
      "step": 149060
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3096678256988525,
      "learning_rate": 0.00016636379657271663,
      "loss": 2.8651,
      "step": 149061
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6263935565948486,
      "learning_rate": 0.00016636013428510376,
      "loss": 3.0524,
      "step": 149062
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2744603157043457,
      "learning_rate": 0.00016635647202233703,
      "loss": 2.9858,
      "step": 149063
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.4576730728149414,
      "learning_rate": 0.00016635280978441718,
      "loss": 2.9935,
      "step": 149064
    },
    {
      "epoch": 1.94,
      "grad_norm": 4.079320907592773,
      "learning_rate": 0.00016634914757134473,
      "loss": 2.8477,
      "step": 149065
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.2777297496795654,
      "learning_rate": 0.00016634548538312064,
      "loss": 2.7729,
      "step": 149066
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7832119464874268,
      "learning_rate": 0.0001663418232197453,
      "loss": 2.9132,
      "step": 149067
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.558873414993286,
      "learning_rate": 0.00016633816108121966,
      "loss": 2.8104,
      "step": 149068
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.791816473007202,
      "learning_rate": 0.0001663344989675442,
      "loss": 2.8186,
      "step": 149069
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.0530214309692383,
      "learning_rate": 0.00016633083687871988,
      "loss": 3.1,
      "step": 149070
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2358765602111816,
      "learning_rate": 0.00016632717481474696,
      "loss": 2.9606,
      "step": 149071
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.149730920791626,
      "learning_rate": 0.00016632351277562644,
      "loss": 2.8781,
      "step": 149072
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6486833095550537,
      "learning_rate": 0.0001663198507613588,
      "loss": 2.9495,
      "step": 149073
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0872364044189453,
      "learning_rate": 0.00016631618877194495,
      "loss": 2.8653,
      "step": 149074
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.573779344558716,
      "learning_rate": 0.00016631252680738532,
      "loss": 3.0046,
      "step": 149075
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.743682622909546,
      "learning_rate": 0.0001663088648676808,
      "loss": 3.1744,
      "step": 149076
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2137458324432373,
      "learning_rate": 0.00016630520295283203,
      "loss": 2.7802,
      "step": 149077
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.086379289627075,
      "learning_rate": 0.0001663015410628396,
      "loss": 3.0323,
      "step": 149078
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5158462524414062,
      "learning_rate": 0.00016629787919770415,
      "loss": 2.8696,
      "step": 149079
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.444159984588623,
      "learning_rate": 0.00016629421735742656,
      "loss": 2.973,
      "step": 149080
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9522440433502197,
      "learning_rate": 0.00016629055554200732,
      "loss": 3.1671,
      "step": 149081
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9333596229553223,
      "learning_rate": 0.00016628689375144725,
      "loss": 2.8035,
      "step": 149082
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3493125438690186,
      "learning_rate": 0.000166283231985747,
      "loss": 3.0995,
      "step": 149083
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9575718641281128,
      "learning_rate": 0.0001662795702449071,
      "loss": 3.0984,
      "step": 149084
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.021580219268799,
      "learning_rate": 0.00016627590852892846,
      "loss": 2.9968,
      "step": 149085
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.580695867538452,
      "learning_rate": 0.00016627224683781166,
      "loss": 2.9804,
      "step": 149086
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1839675903320312,
      "learning_rate": 0.00016626858517155725,
      "loss": 3.0796,
      "step": 149087
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1735281944274902,
      "learning_rate": 0.00016626492353016617,
      "loss": 3.027,
      "step": 149088
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5321898460388184,
      "learning_rate": 0.00016626126191363896,
      "loss": 3.0352,
      "step": 149089
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9659870862960815,
      "learning_rate": 0.00016625760032197616,
      "loss": 3.0637,
      "step": 149090
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3895998001098633,
      "learning_rate": 0.00016625393875517873,
      "loss": 2.7909,
      "step": 149091
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.9235737323760986,
      "learning_rate": 0.0001662502772132472,
      "loss": 2.9857,
      "step": 149092
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6047234535217285,
      "learning_rate": 0.0001662466156961822,
      "loss": 2.8137,
      "step": 149093
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.9488539695739746,
      "learning_rate": 0.00016624295420398456,
      "loss": 2.9936,
      "step": 149094
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.723248243331909,
      "learning_rate": 0.0001662392927366549,
      "loss": 3.0574,
      "step": 149095
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4074947834014893,
      "learning_rate": 0.00016623563129419374,
      "loss": 2.943,
      "step": 149096
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5038414001464844,
      "learning_rate": 0.00016623196987660205,
      "loss": 2.9185,
      "step": 149097
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9436955451965332,
      "learning_rate": 0.0001662283084838802,
      "loss": 3.226,
      "step": 149098
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7489700317382812,
      "learning_rate": 0.0001662246471160292,
      "loss": 3.0993,
      "step": 149099
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.130075216293335,
      "learning_rate": 0.00016622098577304952,
      "loss": 2.9476,
      "step": 149100
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2708051204681396,
      "learning_rate": 0.0001662173244549419,
      "loss": 2.958,
      "step": 149101
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5141196250915527,
      "learning_rate": 0.0001662136631617069,
      "loss": 3.0828,
      "step": 149102
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6507863998413086,
      "learning_rate": 0.00016621000189334542,
      "loss": 2.8378,
      "step": 149103
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0043208599090576,
      "learning_rate": 0.0001662063406498579,
      "loss": 2.9831,
      "step": 149104
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9757291078567505,
      "learning_rate": 0.00016620267943124524,
      "loss": 3.0322,
      "step": 149105
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.040673017501831,
      "learning_rate": 0.00016619901823750806,
      "loss": 3.1311,
      "step": 149106
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.8578879833221436,
      "learning_rate": 0.000166195357068647,
      "loss": 3.2199,
      "step": 149107
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6795461177825928,
      "learning_rate": 0.00016619169592466265,
      "loss": 2.9307,
      "step": 149108
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9425379037857056,
      "learning_rate": 0.0001661880348055559,
      "loss": 2.9103,
      "step": 149109
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0317864418029785,
      "learning_rate": 0.00016618437371132718,
      "loss": 2.9885,
      "step": 149110
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.397023916244507,
      "learning_rate": 0.00016618071264197742,
      "loss": 3.1339,
      "step": 149111
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.603708028793335,
      "learning_rate": 0.0001661770515975072,
      "loss": 2.948,
      "step": 149112
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.430053949356079,
      "learning_rate": 0.00016617339057791724,
      "loss": 2.7643,
      "step": 149113
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7305686473846436,
      "learning_rate": 0.000166169729583208,
      "loss": 3.1532,
      "step": 149114
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.040050745010376,
      "learning_rate": 0.00016616606861338047,
      "loss": 2.7927,
      "step": 149115
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4191153049468994,
      "learning_rate": 0.0001661624076684351,
      "loss": 2.9593,
      "step": 149116
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.539482831954956,
      "learning_rate": 0.00016615874674837274,
      "loss": 2.8625,
      "step": 149117
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.929741144180298,
      "learning_rate": 0.000166155085853194,
      "loss": 2.9745,
      "step": 149118
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.141308546066284,
      "learning_rate": 0.00016615142498289958,
      "loss": 3.0472,
      "step": 149119
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.575789451599121,
      "learning_rate": 0.00016614776413749,
      "loss": 3.1551,
      "step": 149120
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.505297899246216,
      "learning_rate": 0.00016614410331696622,
      "loss": 2.9051,
      "step": 149121
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6038739681243896,
      "learning_rate": 0.00016614044252132864,
      "loss": 2.7606,
      "step": 149122
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2973570823669434,
      "learning_rate": 0.0001661367817505782,
      "loss": 3.1158,
      "step": 149123
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.1759285926818848,
      "learning_rate": 0.00016613312100471547,
      "loss": 2.708,
      "step": 149124
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2114310264587402,
      "learning_rate": 0.00016612946028374113,
      "loss": 3.0807,
      "step": 149125
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6914689540863037,
      "learning_rate": 0.0001661257995876557,
      "loss": 3.0493,
      "step": 149126
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4037559032440186,
      "learning_rate": 0.00016612213891646017,
      "loss": 2.8628,
      "step": 149127
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.401334047317505,
      "learning_rate": 0.00016611847827015488,
      "loss": 2.7198,
      "step": 149128
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.145777463912964,
      "learning_rate": 0.00016611481764874084,
      "loss": 2.9031,
      "step": 149129
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.311056137084961,
      "learning_rate": 0.00016611115705221857,
      "loss": 2.8821,
      "step": 149130
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.048499822616577,
      "learning_rate": 0.0001661074964805888,
      "loss": 2.9378,
      "step": 149131
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3960628509521484,
      "learning_rate": 0.00016610383593385202,
      "loss": 3.1637,
      "step": 149132
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.706752300262451,
      "learning_rate": 0.0001661001754120092,
      "loss": 2.8042,
      "step": 149133
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.084998607635498,
      "learning_rate": 0.00016609651491506077,
      "loss": 2.8225,
      "step": 149134
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5408995151519775,
      "learning_rate": 0.00016609285444300765,
      "loss": 2.8701,
      "step": 149135
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6657304763793945,
      "learning_rate": 0.00016608919399585027,
      "loss": 2.6987,
      "step": 149136
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.704792022705078,
      "learning_rate": 0.0001660855335735897,
      "loss": 2.8028,
      "step": 149137
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1055002212524414,
      "learning_rate": 0.00016608187317622606,
      "loss": 3.1542,
      "step": 149138
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9287054538726807,
      "learning_rate": 0.00016607821280376048,
      "loss": 3.0274,
      "step": 149139
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.384260416030884,
      "learning_rate": 0.00016607455245619336,
      "loss": 2.8696,
      "step": 149140
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.128904342651367,
      "learning_rate": 0.00016607089213352562,
      "loss": 2.9681,
      "step": 149141
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.165757179260254,
      "learning_rate": 0.00016606723183575775,
      "loss": 2.926,
      "step": 149142
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.8796274662017822,
      "learning_rate": 0.00016606357156289072,
      "loss": 3.1429,
      "step": 149143
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.978671669960022,
      "learning_rate": 0.00016605991131492476,
      "loss": 3.0487,
      "step": 149144
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.760571002960205,
      "learning_rate": 0.00016605625109186094,
      "loss": 3.1012,
      "step": 149145
    },
    {
      "epoch": 1.94,
      "grad_norm": 4.632896423339844,
      "learning_rate": 0.00016605259089369964,
      "loss": 3.1565,
      "step": 149146
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.214750051498413,
      "learning_rate": 0.0001660489307204418,
      "loss": 2.8994,
      "step": 149147
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4747684001922607,
      "learning_rate": 0.00016604527057208793,
      "loss": 3.3994,
      "step": 149148
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.9938371181488037,
      "learning_rate": 0.00016604161044863892,
      "loss": 2.9839,
      "step": 149149
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.395263433456421,
      "learning_rate": 0.00016603795035009515,
      "loss": 2.9719,
      "step": 149150
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7947447299957275,
      "learning_rate": 0.0001660342902764575,
      "loss": 2.9548,
      "step": 149151
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4302921295166016,
      "learning_rate": 0.00016603063022772657,
      "loss": 2.8132,
      "step": 149152
    },
    {
      "epoch": 1.94,
      "grad_norm": 4.1883864402771,
      "learning_rate": 0.00016602697020390314,
      "loss": 2.8944,
      "step": 149153
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.211029291152954,
      "learning_rate": 0.0001660233102049877,
      "loss": 2.8376,
      "step": 149154
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.950392246246338,
      "learning_rate": 0.0001660196502309813,
      "loss": 3.0181,
      "step": 149155
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.155524492263794,
      "learning_rate": 0.00016601599028188415,
      "loss": 2.9436,
      "step": 149156
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7297134399414062,
      "learning_rate": 0.0001660123303576973,
      "loss": 2.9377,
      "step": 149157
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.07464337348938,
      "learning_rate": 0.00016600867045842114,
      "loss": 2.8527,
      "step": 149158
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4457485675811768,
      "learning_rate": 0.00016600501058405663,
      "loss": 2.9962,
      "step": 149159
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.143378496170044,
      "learning_rate": 0.00016600135073460416,
      "loss": 3.1808,
      "step": 149160
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.21164608001709,
      "learning_rate": 0.0001659976909100647,
      "loss": 3.0247,
      "step": 149161
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4809327125549316,
      "learning_rate": 0.0001659940311104388,
      "loss": 2.6985,
      "step": 149162
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.9113519191741943,
      "learning_rate": 0.0001659903713357272,
      "loss": 2.9193,
      "step": 149163
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.133026123046875,
      "learning_rate": 0.0001659867115859303,
      "loss": 2.9869,
      "step": 149164
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.9393067359924316,
      "learning_rate": 0.0001659830518610492,
      "loss": 2.9368,
      "step": 149165
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5122969150543213,
      "learning_rate": 0.00016597939216108422,
      "loss": 2.8791,
      "step": 149166
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3077735900878906,
      "learning_rate": 0.00016597573248603635,
      "loss": 2.9898,
      "step": 149167
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.025878429412842,
      "learning_rate": 0.00016597207283590608,
      "loss": 2.8695,
      "step": 149168
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5924935340881348,
      "learning_rate": 0.00016596841321069413,
      "loss": 2.831,
      "step": 149169
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.056076765060425,
      "learning_rate": 0.00016596475361040113,
      "loss": 3.2036,
      "step": 149170
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9682037830352783,
      "learning_rate": 0.00016596109403502787,
      "loss": 2.9549,
      "step": 149171
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1299991607666016,
      "learning_rate": 0.00016595743448457486,
      "loss": 3.0321,
      "step": 149172
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.9935357570648193,
      "learning_rate": 0.00016595377495904305,
      "loss": 2.9625,
      "step": 149173
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.79278564453125,
      "learning_rate": 0.00016595011545843293,
      "loss": 2.9637,
      "step": 149174
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.689819574356079,
      "learning_rate": 0.0001659464559827451,
      "loss": 2.9167,
      "step": 149175
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.887573719024658,
      "learning_rate": 0.0001659427965319805,
      "loss": 3.052,
      "step": 149176
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.123103618621826,
      "learning_rate": 0.00016593913710613963,
      "loss": 2.9805,
      "step": 149177
    },
    {
      "epoch": 1.94,
      "grad_norm": 5.271327018737793,
      "learning_rate": 0.00016593547770522312,
      "loss": 3.02,
      "step": 149178
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.1173596382141113,
      "learning_rate": 0.00016593181832923182,
      "loss": 2.8054,
      "step": 149179
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.61206316947937,
      "learning_rate": 0.00016592815897816633,
      "loss": 3.0504,
      "step": 149180
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2923288345336914,
      "learning_rate": 0.0001659244996520272,
      "loss": 3.0921,
      "step": 149181
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.1952319145202637,
      "learning_rate": 0.00016592084035081537,
      "loss": 2.7351,
      "step": 149182
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0315988063812256,
      "learning_rate": 0.00016591718107453128,
      "loss": 3.0819,
      "step": 149183
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.435593366622925,
      "learning_rate": 0.00016591352182317582,
      "loss": 2.933,
      "step": 149184
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.259279251098633,
      "learning_rate": 0.0001659098625967496,
      "loss": 2.6934,
      "step": 149185
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.798384666442871,
      "learning_rate": 0.0001659062033952532,
      "loss": 2.6591,
      "step": 149186
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.559612989425659,
      "learning_rate": 0.00016590254421868728,
      "loss": 3.0924,
      "step": 149187
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.8622641563415527,
      "learning_rate": 0.00016589888506705273,
      "loss": 2.9099,
      "step": 149188
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.4986660480499268,
      "learning_rate": 0.00016589522594035003,
      "loss": 2.7342,
      "step": 149189
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2397263050079346,
      "learning_rate": 0.00016589156683858002,
      "loss": 2.7932,
      "step": 149190
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.124992609024048,
      "learning_rate": 0.00016588790776174328,
      "loss": 2.6446,
      "step": 149191
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.1036112308502197,
      "learning_rate": 0.00016588424870984055,
      "loss": 2.9558,
      "step": 149192
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.163947105407715,
      "learning_rate": 0.00016588058968287233,
      "loss": 2.9209,
      "step": 149193
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.234351873397827,
      "learning_rate": 0.00016587693068083955,
      "loss": 2.8928,
      "step": 149194
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.304084539413452,
      "learning_rate": 0.0001658732717037427,
      "loss": 2.9199,
      "step": 149195
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1277778148651123,
      "learning_rate": 0.0001658696127515826,
      "loss": 3.2928,
      "step": 149196
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.8454501628875732,
      "learning_rate": 0.0001658659538243599,
      "loss": 3.0218,
      "step": 149197
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.393123149871826,
      "learning_rate": 0.00016586229492207528,
      "loss": 2.7054,
      "step": 149198
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1623151302337646,
      "learning_rate": 0.00016585863604472924,
      "loss": 2.9673,
      "step": 149199
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.4554848670959473,
      "learning_rate": 0.00016585497719232272,
      "loss": 3.1396,
      "step": 149200
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2491800785064697,
      "learning_rate": 0.0001658513183648562,
      "loss": 2.966,
      "step": 149201
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1457598209381104,
      "learning_rate": 0.00016584765956233056,
      "loss": 3.0492,
      "step": 149202
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.24661922454834,
      "learning_rate": 0.00016584400078474627,
      "loss": 3.1172,
      "step": 149203
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6341333389282227,
      "learning_rate": 0.00016584034203210435,
      "loss": 3.016,
      "step": 149204
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.9777674674987793,
      "learning_rate": 0.00016583668330440496,
      "loss": 3.0454,
      "step": 149205
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.9952142238616943,
      "learning_rate": 0.00016583302460164922,
      "loss": 2.9792,
      "step": 149206
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.40610671043396,
      "learning_rate": 0.00016582936592383752,
      "loss": 2.9851,
      "step": 149207
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.9825985431671143,
      "learning_rate": 0.0001658257072709708,
      "loss": 3.1575,
      "step": 149208
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.543997287750244,
      "learning_rate": 0.0001658220486430495,
      "loss": 2.8013,
      "step": 149209
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6886003017425537,
      "learning_rate": 0.00016581839004007468,
      "loss": 2.7792,
      "step": 149210
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2834980487823486,
      "learning_rate": 0.00016581473146204647,
      "loss": 3.0813,
      "step": 149211
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5604450702667236,
      "learning_rate": 0.00016581107290896595,
      "loss": 2.9176,
      "step": 149212
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.0712995529174805,
      "learning_rate": 0.00016580741438083357,
      "loss": 3.0405,
      "step": 149213
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0747265815734863,
      "learning_rate": 0.00016580375587765028,
      "loss": 2.9711,
      "step": 149214
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9959497451782227,
      "learning_rate": 0.00016580009739941643,
      "loss": 2.7961,
      "step": 149215
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4333126544952393,
      "learning_rate": 0.00016579643894613317,
      "loss": 2.8624,
      "step": 149216
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.595804452896118,
      "learning_rate": 0.0001657927805178006,
      "loss": 3.1021,
      "step": 149217
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.119591474533081,
      "learning_rate": 0.00016578912211441982,
      "loss": 3.0647,
      "step": 149218
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.260409355163574,
      "learning_rate": 0.00016578546373599128,
      "loss": 3.0192,
      "step": 149219
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.2223081588745117,
      "learning_rate": 0.00016578180538251588,
      "loss": 2.5644,
      "step": 149220
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.266087770462036,
      "learning_rate": 0.000165778147053994,
      "loss": 2.9197,
      "step": 149221
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.3128740787506104,
      "learning_rate": 0.00016577448875042678,
      "loss": 3.0249,
      "step": 149222
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6369802951812744,
      "learning_rate": 0.00016577083047181437,
      "loss": 3.0695,
      "step": 149223
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.654585599899292,
      "learning_rate": 0.0001657671722181578,
      "loss": 2.9708,
      "step": 149224
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0732784271240234,
      "learning_rate": 0.00016576351398945753,
      "loss": 3.0208,
      "step": 149225
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.749809980392456,
      "learning_rate": 0.0001657598557857145,
      "loss": 2.9699,
      "step": 149226
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.338498115539551,
      "learning_rate": 0.00016575619760692911,
      "loss": 3.1182,
      "step": 149227
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.4696199893951416,
      "learning_rate": 0.0001657525394531024,
      "loss": 2.9177,
      "step": 149228
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.2074928283691406,
      "learning_rate": 0.00016574888132423463,
      "loss": 2.9843,
      "step": 149229
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1917288303375244,
      "learning_rate": 0.00016574522322032676,
      "loss": 3.1054,
      "step": 149230
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.203200101852417,
      "learning_rate": 0.0001657415651413793,
      "loss": 3.1548,
      "step": 149231
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.2854902744293213,
      "learning_rate": 0.0001657379070873931,
      "loss": 2.9484,
      "step": 149232
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6762332916259766,
      "learning_rate": 0.00016573424905836866,
      "loss": 3.0381,
      "step": 149233
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.477585792541504,
      "learning_rate": 0.00016573059105430698,
      "loss": 3.0353,
      "step": 149234
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.219076156616211,
      "learning_rate": 0.00016572693307520832,
      "loss": 2.9865,
      "step": 149235
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.0948867797851562,
      "learning_rate": 0.00016572327512107363,
      "loss": 3.0085,
      "step": 149236
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.8582022190093994,
      "learning_rate": 0.00016571961719190342,
      "loss": 2.7793,
      "step": 149237
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.380669355392456,
      "learning_rate": 0.00016571595928769857,
      "loss": 2.9636,
      "step": 149238
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.098402500152588,
      "learning_rate": 0.00016571230140845957,
      "loss": 2.9196,
      "step": 149239
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.221254825592041,
      "learning_rate": 0.00016570864355418738,
      "loss": 2.8006,
      "step": 149240
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.728530168533325,
      "learning_rate": 0.0001657049857248823,
      "loss": 3.0275,
      "step": 149241
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.672157049179077,
      "learning_rate": 0.00016570132792054532,
      "loss": 3.2458,
      "step": 149242
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.4416403770446777,
      "learning_rate": 0.00016569767014117684,
      "loss": 3.0097,
      "step": 149243
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5337867736816406,
      "learning_rate": 0.00016569401238677783,
      "loss": 3.0108,
      "step": 149244
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.2635936737060547,
      "learning_rate": 0.0001656903546573487,
      "loss": 3.0623,
      "step": 149245
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.6999127864837646,
      "learning_rate": 0.00016568669695289043,
      "loss": 3.0287,
      "step": 149246
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7298967838287354,
      "learning_rate": 0.0001656830392734035,
      "loss": 3.1823,
      "step": 149247
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.482367992401123,
      "learning_rate": 0.00016567938161888868,
      "loss": 3.1277,
      "step": 149248
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4748120307922363,
      "learning_rate": 0.00016567572398934642,
      "loss": 2.7224,
      "step": 149249
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.028749942779541,
      "learning_rate": 0.00016567206638477776,
      "loss": 2.6572,
      "step": 149250
    },
    {
      "epoch": 1.94,
      "grad_norm": 4.507233142852783,
      "learning_rate": 0.000165668408805183,
      "loss": 2.9922,
      "step": 149251
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.359027862548828,
      "learning_rate": 0.0001656647512505632,
      "loss": 2.7989,
      "step": 149252
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.822661876678467,
      "learning_rate": 0.00016566109372091883,
      "loss": 2.7928,
      "step": 149253
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6184029579162598,
      "learning_rate": 0.00016565743621625058,
      "loss": 2.9999,
      "step": 149254
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5247387886047363,
      "learning_rate": 0.00016565377873655908,
      "loss": 2.8656,
      "step": 149255
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.070108652114868,
      "learning_rate": 0.00016565012128184515,
      "loss": 2.8959,
      "step": 149256
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.0369367599487305,
      "learning_rate": 0.00016564646385210932,
      "loss": 3.0923,
      "step": 149257
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.868964195251465,
      "learning_rate": 0.00016564280644735244,
      "loss": 2.9856,
      "step": 149258
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.220444917678833,
      "learning_rate": 0.00016563914906757513,
      "loss": 2.9919,
      "step": 149259
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.9206972122192383,
      "learning_rate": 0.00016563549171277789,
      "loss": 2.9597,
      "step": 149260
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.9096426963806152,
      "learning_rate": 0.00016563183438296166,
      "loss": 2.8834,
      "step": 149261
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.7478456497192383,
      "learning_rate": 0.00016562817707812706,
      "loss": 3.1363,
      "step": 149262
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.730726957321167,
      "learning_rate": 0.00016562451979827456,
      "loss": 2.9849,
      "step": 149263
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.089362859725952,
      "learning_rate": 0.00016562086254340508,
      "loss": 3.0206,
      "step": 149264
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.548645257949829,
      "learning_rate": 0.0001656172053135193,
      "loss": 2.9629,
      "step": 149265
    },
    {
      "epoch": 1.94,
      "grad_norm": 4.399166107177734,
      "learning_rate": 0.00016561354810861763,
      "loss": 2.9664,
      "step": 149266
    },
    {
      "epoch": 1.94,
      "grad_norm": 4.154603958129883,
      "learning_rate": 0.00016560989092870111,
      "loss": 3.1826,
      "step": 149267
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.339888334274292,
      "learning_rate": 0.00016560623377377012,
      "loss": 2.9726,
      "step": 149268
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1058225631713867,
      "learning_rate": 0.00016560257664382556,
      "loss": 2.9661,
      "step": 149269
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.364445686340332,
      "learning_rate": 0.00016559891953886805,
      "loss": 2.9479,
      "step": 149270
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.9578588008880615,
      "learning_rate": 0.0001655952624588982,
      "loss": 3.0457,
      "step": 149271
    },
    {
      "epoch": 1.94,
      "grad_norm": 4.771661281585693,
      "learning_rate": 0.0001655916054039166,
      "loss": 2.9509,
      "step": 149272
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.6111562252044678,
      "learning_rate": 0.00016558794837392423,
      "loss": 2.8813,
      "step": 149273
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.789895534515381,
      "learning_rate": 0.00016558429136892143,
      "loss": 2.766,
      "step": 149274
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9778891801834106,
      "learning_rate": 0.0001655806343889092,
      "loss": 3.0155,
      "step": 149275
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.5496418476104736,
      "learning_rate": 0.00016557697743388807,
      "loss": 3.1715,
      "step": 149276
    },
    {
      "epoch": 1.94,
      "grad_norm": 5.437932968139648,
      "learning_rate": 0.00016557332050385866,
      "loss": 2.9152,
      "step": 149277
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.677074432373047,
      "learning_rate": 0.00016556966359882166,
      "loss": 3.0049,
      "step": 149278
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.8929696083068848,
      "learning_rate": 0.0001655660067187779,
      "loss": 3.0945,
      "step": 149279
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6331472396850586,
      "learning_rate": 0.00016556234986372782,
      "loss": 2.9552,
      "step": 149280
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3155717849731445,
      "learning_rate": 0.00016555869303367236,
      "loss": 2.684,
      "step": 149281
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.885648727416992,
      "learning_rate": 0.00016555503622861194,
      "loss": 3.001,
      "step": 149282
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.9223313331604004,
      "learning_rate": 0.00016555137944854767,
      "loss": 3.1465,
      "step": 149283
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.211716413497925,
      "learning_rate": 0.00016554772269347965,
      "loss": 2.8163,
      "step": 149284
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1220409870147705,
      "learning_rate": 0.00016554406596340897,
      "loss": 2.8397,
      "step": 149285
    },
    {
      "epoch": 1.94,
      "grad_norm": 4.805723190307617,
      "learning_rate": 0.0001655404092583361,
      "loss": 3.0662,
      "step": 149286
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.1527159214019775,
      "learning_rate": 0.0001655367525782619,
      "loss": 2.9831,
      "step": 149287
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2045705318450928,
      "learning_rate": 0.0001655330959231868,
      "loss": 2.9401,
      "step": 149288
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.33016037940979,
      "learning_rate": 0.00016552943929311193,
      "loss": 2.8834,
      "step": 149289
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.561856746673584,
      "learning_rate": 0.00016552578268803743,
      "loss": 3.1065,
      "step": 149290
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3400661945343018,
      "learning_rate": 0.0001655221261079643,
      "loss": 2.8521,
      "step": 149291
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.427457332611084,
      "learning_rate": 0.00016551846955289306,
      "loss": 2.879,
      "step": 149292
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.8444600105285645,
      "learning_rate": 0.0001655148130228246,
      "loss": 2.8829,
      "step": 149293
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2195346355438232,
      "learning_rate": 0.00016551115651775936,
      "loss": 2.7775,
      "step": 149294
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.9258058071136475,
      "learning_rate": 0.00016550750003769838,
      "loss": 2.9899,
      "step": 149295
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.8784687519073486,
      "learning_rate": 0.00016550384358264181,
      "loss": 2.9603,
      "step": 149296
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.250149965286255,
      "learning_rate": 0.00016550018715259077,
      "loss": 2.9972,
      "step": 149297
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.312940835952759,
      "learning_rate": 0.00016549653074754563,
      "loss": 3.0155,
      "step": 149298
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4503424167633057,
      "learning_rate": 0.0001654928743675074,
      "loss": 3.0408,
      "step": 149299
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.7634449005126953,
      "learning_rate": 0.0001654892180124764,
      "loss": 2.9897,
      "step": 149300
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2691047191619873,
      "learning_rate": 0.0001654855616824538,
      "loss": 3.1003,
      "step": 149301
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.9948408603668213,
      "learning_rate": 0.0001654819053774397,
      "loss": 3.0131,
      "step": 149302
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4311656951904297,
      "learning_rate": 0.00016547824909743513,
      "loss": 3.1799,
      "step": 149303
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.200124502182007,
      "learning_rate": 0.00016547459284244065,
      "loss": 2.8068,
      "step": 149304
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.072777271270752,
      "learning_rate": 0.00016547093661245708,
      "loss": 2.7731,
      "step": 149305
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5755670070648193,
      "learning_rate": 0.00016546728040748484,
      "loss": 3.0015,
      "step": 149306
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.345485210418701,
      "learning_rate": 0.00016546362422752503,
      "loss": 2.8216,
      "step": 149307
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.788858652114868,
      "learning_rate": 0.00016545996807257785,
      "loss": 2.7996,
      "step": 149308
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.334909439086914,
      "learning_rate": 0.00016545631194264435,
      "loss": 2.9654,
      "step": 149309
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.265730142593384,
      "learning_rate": 0.00016545265583772488,
      "loss": 3.0777,
      "step": 149310
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5952327251434326,
      "learning_rate": 0.00016544899975782045,
      "loss": 3.0912,
      "step": 149311
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.363302230834961,
      "learning_rate": 0.0001654453437029315,
      "loss": 2.7418,
      "step": 149312
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.182131052017212,
      "learning_rate": 0.000165441687673059,
      "loss": 2.7873,
      "step": 149313
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9144145250320435,
      "learning_rate": 0.0001654380316682032,
      "loss": 3.1451,
      "step": 149314
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6491987705230713,
      "learning_rate": 0.00016543437568836513,
      "loss": 2.68,
      "step": 149315
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.322817802429199,
      "learning_rate": 0.00016543071973354522,
      "loss": 2.895,
      "step": 149316
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.1592342853546143,
      "learning_rate": 0.0001654270638037444,
      "loss": 2.9317,
      "step": 149317
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.899295449256897,
      "learning_rate": 0.0001654234078989631,
      "loss": 2.9542,
      "step": 149318
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3755834102630615,
      "learning_rate": 0.0001654197520192024,
      "loss": 3.0303,
      "step": 149319
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1959376335144043,
      "learning_rate": 0.00016541609616446245,
      "loss": 2.9377,
      "step": 149320
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5037548542022705,
      "learning_rate": 0.0001654124403347443,
      "loss": 2.8687,
      "step": 149321
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0765488147735596,
      "learning_rate": 0.00016540878453004843,
      "loss": 2.8814,
      "step": 149322
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.42132568359375,
      "learning_rate": 0.0001654051287503757,
      "loss": 3.0023,
      "step": 149323
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0943374633789062,
      "learning_rate": 0.00016540147299572662,
      "loss": 2.7674,
      "step": 149324
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.051330327987671,
      "learning_rate": 0.00016539781726610214,
      "loss": 2.8746,
      "step": 149325
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0889968872070312,
      "learning_rate": 0.00016539416156150248,
      "loss": 3.04,
      "step": 149326
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4532556533813477,
      "learning_rate": 0.00016539050588192878,
      "loss": 3.0091,
      "step": 149327
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1189582347869873,
      "learning_rate": 0.00016538685022738137,
      "loss": 3.1738,
      "step": 149328
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.0353081226348877,
      "learning_rate": 0.0001653831945978612,
      "loss": 2.9863,
      "step": 149329
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1376936435699463,
      "learning_rate": 0.00016537953899336877,
      "loss": 2.8028,
      "step": 149330
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.2867326736450195,
      "learning_rate": 0.00016537588341390496,
      "loss": 2.7714,
      "step": 149331
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6214675903320312,
      "learning_rate": 0.00016537222785947012,
      "loss": 2.7875,
      "step": 149332
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.233396053314209,
      "learning_rate": 0.00016536857233006528,
      "loss": 2.9027,
      "step": 149333
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.138761281967163,
      "learning_rate": 0.00016536491682569082,
      "loss": 3.0129,
      "step": 149334
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.376915454864502,
      "learning_rate": 0.00016536126134634768,
      "loss": 3.0003,
      "step": 149335
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.3507940769195557,
      "learning_rate": 0.0001653576058920363,
      "loss": 2.7383,
      "step": 149336
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4468889236450195,
      "learning_rate": 0.0001653539504627576,
      "loss": 3.2185,
      "step": 149337
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2019858360290527,
      "learning_rate": 0.0001653502950585122,
      "loss": 3.2397,
      "step": 149338
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2739663124084473,
      "learning_rate": 0.00016534663967930066,
      "loss": 2.9704,
      "step": 149339
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4911234378814697,
      "learning_rate": 0.0001653429843251236,
      "loss": 2.8249,
      "step": 149340
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.308053731918335,
      "learning_rate": 0.00016533932899598196,
      "loss": 3.0408,
      "step": 149341
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.815673589706421,
      "learning_rate": 0.00016533567369187614,
      "loss": 2.8256,
      "step": 149342
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.416264533996582,
      "learning_rate": 0.0001653320184128071,
      "loss": 2.8344,
      "step": 149343
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2071797847747803,
      "learning_rate": 0.0001653283631587754,
      "loss": 2.8419,
      "step": 149344
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6057188510894775,
      "learning_rate": 0.00016532470792978153,
      "loss": 3.0489,
      "step": 149345
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0377542972564697,
      "learning_rate": 0.00016532105272582648,
      "loss": 3.1122,
      "step": 149346
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.4794387817382812,
      "learning_rate": 0.0001653173975469108,
      "loss": 3.0259,
      "step": 149347
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.5575990676879883,
      "learning_rate": 0.00016531374239303503,
      "loss": 3.0222,
      "step": 149348
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.107064962387085,
      "learning_rate": 0.0001653100872642001,
      "loss": 3.0584,
      "step": 149349
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.0092713832855225,
      "learning_rate": 0.0001653064321604066,
      "loss": 3.0738,
      "step": 149350
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.6916847229003906,
      "learning_rate": 0.000165302777081655,
      "loss": 2.9587,
      "step": 149351
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.9329960346221924,
      "learning_rate": 0.0001652991220279463,
      "loss": 3.0542,
      "step": 149352
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.8338749408721924,
      "learning_rate": 0.00016529546699928105,
      "loss": 3.0227,
      "step": 149353
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0355124473571777,
      "learning_rate": 0.00016529181199565976,
      "loss": 2.8578,
      "step": 149354
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.644822120666504,
      "learning_rate": 0.00016528815701708342,
      "loss": 2.9761,
      "step": 149355
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.1375694274902344,
      "learning_rate": 0.00016528450206355251,
      "loss": 2.9945,
      "step": 149356
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.434753894805908,
      "learning_rate": 0.00016528084713506765,
      "loss": 3.2486,
      "step": 149357
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.3420469760894775,
      "learning_rate": 0.0001652771922316298,
      "loss": 3.0134,
      "step": 149358
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.452033042907715,
      "learning_rate": 0.00016527353735323929,
      "loss": 2.9544,
      "step": 149359
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.803056240081787,
      "learning_rate": 0.0001652698824998971,
      "loss": 2.8658,
      "step": 149360
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.082327127456665,
      "learning_rate": 0.00016526622767160378,
      "loss": 2.9179,
      "step": 149361
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.4132235050201416,
      "learning_rate": 0.00016526257286836004,
      "loss": 3.1554,
      "step": 149362
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.737095355987549,
      "learning_rate": 0.00016525891809016639,
      "loss": 3.0617,
      "step": 149363
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.056652545928955,
      "learning_rate": 0.00016525526333702377,
      "loss": 2.8943,
      "step": 149364
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2670211791992188,
      "learning_rate": 0.00016525160860893259,
      "loss": 3.0255,
      "step": 149365
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.8452725410461426,
      "learning_rate": 0.00016524795390589382,
      "loss": 2.7304,
      "step": 149366
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.754643201828003,
      "learning_rate": 0.0001652442992279079,
      "loss": 3.1302,
      "step": 149367
    },
    {
      "epoch": 1.94,
      "grad_norm": 1.9742047786712646,
      "learning_rate": 0.00016524064457497584,
      "loss": 2.9399,
      "step": 149368
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.096212148666382,
      "learning_rate": 0.00016523698994709785,
      "loss": 2.976,
      "step": 149369
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.751499891281128,
      "learning_rate": 0.00016523333534427495,
      "loss": 2.9495,
      "step": 149370
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.146071672439575,
      "learning_rate": 0.0001652296807665076,
      "loss": 2.8818,
      "step": 149371
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.647217035293579,
      "learning_rate": 0.00016522602621379674,
      "loss": 3.1141,
      "step": 149372
    },
    {
      "epoch": 1.94,
      "grad_norm": 3.4776344299316406,
      "learning_rate": 0.00016522237168614277,
      "loss": 3.2284,
      "step": 149373
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.0585663318634033,
      "learning_rate": 0.00016521871718354675,
      "loss": 3.0803,
      "step": 149374
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.777371644973755,
      "learning_rate": 0.00016521506270600887,
      "loss": 3.2166,
      "step": 149375
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.648111581802368,
      "learning_rate": 0.00016521140825353017,
      "loss": 3.0609,
      "step": 149376
    },
    {
      "epoch": 1.94,
      "grad_norm": 2.2952005863189697,
      "learning_rate": 0.00016520775382611108,
      "loss": 2.8812,
      "step": 149377
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2411770820617676,
      "learning_rate": 0.00016520409942375256,
      "loss": 3.045,
      "step": 149378
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3248538970947266,
      "learning_rate": 0.000165200445046455,
      "loss": 2.9576,
      "step": 149379
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.454913854598999,
      "learning_rate": 0.0001651967906942195,
      "loss": 2.9943,
      "step": 149380
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0369672775268555,
      "learning_rate": 0.00016519313636704618,
      "loss": 3.124,
      "step": 149381
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.232975721359253,
      "learning_rate": 0.00016518948206493614,
      "loss": 2.9334,
      "step": 149382
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.296233892440796,
      "learning_rate": 0.0001651858277878898,
      "loss": 2.9321,
      "step": 149383
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.340914487838745,
      "learning_rate": 0.0001651821735359081,
      "loss": 3.0548,
      "step": 149384
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.3549160957336426,
      "learning_rate": 0.0001651785193089914,
      "loss": 2.979,
      "step": 149385
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.613900661468506,
      "learning_rate": 0.00016517486510714087,
      "loss": 2.859,
      "step": 149386
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.335181713104248,
      "learning_rate": 0.00016517121093035658,
      "loss": 2.9954,
      "step": 149387
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.1757843494415283,
      "learning_rate": 0.00016516755677863963,
      "loss": 2.4609,
      "step": 149388
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.323233127593994,
      "learning_rate": 0.0001651639026519905,
      "loss": 2.7855,
      "step": 149389
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0426971912384033,
      "learning_rate": 0.00016516024855041004,
      "loss": 2.8472,
      "step": 149390
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7846109867095947,
      "learning_rate": 0.0001651565944738987,
      "loss": 3.0963,
      "step": 149391
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2379796504974365,
      "learning_rate": 0.00016515294042245758,
      "loss": 2.9681,
      "step": 149392
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.459496259689331,
      "learning_rate": 0.00016514928639608676,
      "loss": 2.9761,
      "step": 149393
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3369717597961426,
      "learning_rate": 0.00016514563239478742,
      "loss": 2.9891,
      "step": 149394
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.193028211593628,
      "learning_rate": 0.0001651419784185599,
      "loss": 3.0396,
      "step": 149395
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.073422908782959,
      "learning_rate": 0.00016513832446740518,
      "loss": 2.9581,
      "step": 149396
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.463813304901123,
      "learning_rate": 0.0001651346705413236,
      "loss": 2.9251,
      "step": 149397
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0356931686401367,
      "learning_rate": 0.00016513101664031635,
      "loss": 2.7224,
      "step": 149398
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4625132083892822,
      "learning_rate": 0.00016512736276438352,
      "loss": 3.0993,
      "step": 149399
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.187816619873047,
      "learning_rate": 0.0001651237089135262,
      "loss": 2.9785,
      "step": 149400
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0629806518554688,
      "learning_rate": 0.0001651200550877448,
      "loss": 3.1203,
      "step": 149401
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.269503355026245,
      "learning_rate": 0.00016511640128704023,
      "loss": 2.9349,
      "step": 149402
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.146142959594727,
      "learning_rate": 0.00016511274751141293,
      "loss": 2.9303,
      "step": 149403
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5801310539245605,
      "learning_rate": 0.00016510909376086385,
      "loss": 2.8881,
      "step": 149404
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.9983772039413452,
      "learning_rate": 0.00016510544003539354,
      "loss": 2.9908,
      "step": 149405
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.433441400527954,
      "learning_rate": 0.00016510178633500272,
      "loss": 2.7422,
      "step": 149406
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3411037921905518,
      "learning_rate": 0.00016509813265969188,
      "loss": 3.1907,
      "step": 149407
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.1148440837860107,
      "learning_rate": 0.0001650944790094619,
      "loss": 2.9898,
      "step": 149408
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.441303014755249,
      "learning_rate": 0.00016509082538431334,
      "loss": 3.1477,
      "step": 149409
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2145190238952637,
      "learning_rate": 0.00016508717178424705,
      "loss": 3.1202,
      "step": 149410
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6779706478118896,
      "learning_rate": 0.00016508351820926365,
      "loss": 2.8441,
      "step": 149411
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.1299729347229004,
      "learning_rate": 0.0001650798646593637,
      "loss": 3.1885,
      "step": 149412
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.621384859085083,
      "learning_rate": 0.00016507621113454786,
      "loss": 2.8228,
      "step": 149413
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.164369583129883,
      "learning_rate": 0.00016507255763481703,
      "loss": 2.8464,
      "step": 149414
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7724738121032715,
      "learning_rate": 0.0001650689041601716,
      "loss": 3.0175,
      "step": 149415
    },
    {
      "epoch": 1.95,
      "grad_norm": 5.120076656341553,
      "learning_rate": 0.00016506525071061255,
      "loss": 2.8576,
      "step": 149416
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.787233591079712,
      "learning_rate": 0.00016506159728614043,
      "loss": 3.281,
      "step": 149417
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.535040855407715,
      "learning_rate": 0.0001650579438867559,
      "loss": 3.1239,
      "step": 149418
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1666224002838135,
      "learning_rate": 0.0001650542905124595,
      "loss": 2.9026,
      "step": 149419
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.813929796218872,
      "learning_rate": 0.00016505063716325223,
      "loss": 3.0143,
      "step": 149420
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2482495307922363,
      "learning_rate": 0.00016504698383913442,
      "loss": 3.17,
      "step": 149421
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.70703125,
      "learning_rate": 0.00016504333054010706,
      "loss": 3.1119,
      "step": 149422
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7355892658233643,
      "learning_rate": 0.0001650396772661707,
      "loss": 2.9909,
      "step": 149423
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7120015621185303,
      "learning_rate": 0.00016503602401732602,
      "loss": 2.8924,
      "step": 149424
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.9909451007843018,
      "learning_rate": 0.00016503237079357354,
      "loss": 2.9206,
      "step": 149425
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.286294460296631,
      "learning_rate": 0.00016502871759491424,
      "loss": 3.184,
      "step": 149426
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2772982120513916,
      "learning_rate": 0.00016502506442134853,
      "loss": 2.7844,
      "step": 149427
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.17549729347229,
      "learning_rate": 0.0001650214112728773,
      "loss": 2.7772,
      "step": 149428
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.991509437561035,
      "learning_rate": 0.00016501775814950116,
      "loss": 2.9997,
      "step": 149429
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4958860874176025,
      "learning_rate": 0.0001650141050512206,
      "loss": 3.1745,
      "step": 149430
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6171276569366455,
      "learning_rate": 0.00016501045197803667,
      "loss": 2.7449,
      "step": 149431
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3394148349761963,
      "learning_rate": 0.00016500679892994977,
      "loss": 2.7995,
      "step": 149432
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.9122748374938965,
      "learning_rate": 0.0001650031459069606,
      "loss": 2.8979,
      "step": 149433
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.121638536453247,
      "learning_rate": 0.00016499949290906996,
      "loss": 2.9106,
      "step": 149434
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.674915075302124,
      "learning_rate": 0.0001649958399362785,
      "loss": 2.8178,
      "step": 149435
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4320926666259766,
      "learning_rate": 0.0001649921869885867,
      "loss": 2.8684,
      "step": 149436
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.975243330001831,
      "learning_rate": 0.00016498853406599556,
      "loss": 2.9082,
      "step": 149437
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1719677448272705,
      "learning_rate": 0.00016498488116850558,
      "loss": 2.8338,
      "step": 149438
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3573639392852783,
      "learning_rate": 0.00016498122829611736,
      "loss": 2.9489,
      "step": 149439
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.015902519226074,
      "learning_rate": 0.00016497757544883178,
      "loss": 2.8489,
      "step": 149440
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.9110051393508911,
      "learning_rate": 0.0001649739226266494,
      "loss": 2.9581,
      "step": 149441
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.714238166809082,
      "learning_rate": 0.0001649702698295708,
      "loss": 2.7475,
      "step": 149442
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2479710578918457,
      "learning_rate": 0.00016496661705759692,
      "loss": 3.0004,
      "step": 149443
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2827765941619873,
      "learning_rate": 0.00016496296431072816,
      "loss": 2.9485,
      "step": 149444
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.353767156600952,
      "learning_rate": 0.00016495931158896546,
      "loss": 3.0128,
      "step": 149445
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2250192165374756,
      "learning_rate": 0.0001649556588923094,
      "loss": 2.9795,
      "step": 149446
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.4327409267425537,
      "learning_rate": 0.0001649520062207606,
      "loss": 2.9056,
      "step": 149447
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.335184335708618,
      "learning_rate": 0.00016494835357431964,
      "loss": 3.0935,
      "step": 149448
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.766079902648926,
      "learning_rate": 0.00016494470095298748,
      "loss": 2.8064,
      "step": 149449
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.2687299251556396,
      "learning_rate": 0.0001649410483567645,
      "loss": 2.8462,
      "step": 149450
    },
    {
      "epoch": 1.95,
      "grad_norm": 5.701719760894775,
      "learning_rate": 0.00016493739578565166,
      "loss": 2.6833,
      "step": 149451
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4842476844787598,
      "learning_rate": 0.00016493374323964951,
      "loss": 2.8539,
      "step": 149452
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4059014320373535,
      "learning_rate": 0.0001649300907187587,
      "loss": 3.0339,
      "step": 149453
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.170442581176758,
      "learning_rate": 0.00016492643822297988,
      "loss": 3.2265,
      "step": 149454
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.3669745922088623,
      "learning_rate": 0.00016492278575231387,
      "loss": 2.8306,
      "step": 149455
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.305732250213623,
      "learning_rate": 0.00016491913330676116,
      "loss": 3.2269,
      "step": 149456
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.671110153198242,
      "learning_rate": 0.0001649154808863226,
      "loss": 3.0259,
      "step": 149457
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.207320213317871,
      "learning_rate": 0.00016491182849099877,
      "loss": 2.9272,
      "step": 149458
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.6629045009613037,
      "learning_rate": 0.00016490817612079055,
      "loss": 2.8117,
      "step": 149459
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.361909866333008,
      "learning_rate": 0.00016490452377569822,
      "loss": 3.0861,
      "step": 149460
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.2752370834350586,
      "learning_rate": 0.0001649008714557228,
      "loss": 2.9027,
      "step": 149461
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.316387176513672,
      "learning_rate": 0.00016489721916086478,
      "loss": 3.1239,
      "step": 149462
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.8093602657318115,
      "learning_rate": 0.000164893566891125,
      "loss": 3.0402,
      "step": 149463
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.565011739730835,
      "learning_rate": 0.00016488991464650395,
      "loss": 3.0553,
      "step": 149464
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4438655376434326,
      "learning_rate": 0.00016488626242700265,
      "loss": 2.9715,
      "step": 149465
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.9805569648742676,
      "learning_rate": 0.00016488261023262134,
      "loss": 3.0255,
      "step": 149466
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.9833441972732544,
      "learning_rate": 0.000164878958063361,
      "loss": 3.0008,
      "step": 149467
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0972070693969727,
      "learning_rate": 0.00016487530591922207,
      "loss": 3.0423,
      "step": 149468
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.522123098373413,
      "learning_rate": 0.0001648716538002055,
      "loss": 2.9056,
      "step": 149469
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.4408493041992188,
      "learning_rate": 0.00016486800170631172,
      "loss": 2.9337,
      "step": 149470
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7104263305664062,
      "learning_rate": 0.00016486434963754166,
      "loss": 3.2334,
      "step": 149471
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1983089447021484,
      "learning_rate": 0.00016486069759389586,
      "loss": 2.9032,
      "step": 149472
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3211488723754883,
      "learning_rate": 0.00016485704557537501,
      "loss": 2.9986,
      "step": 149473
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2953765392303467,
      "learning_rate": 0.00016485339358197969,
      "loss": 3.0442,
      "step": 149474
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.260287284851074,
      "learning_rate": 0.0001648497416137108,
      "loss": 2.9355,
      "step": 149475
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.619682550430298,
      "learning_rate": 0.0001648460896705687,
      "loss": 3.0158,
      "step": 149476
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3328628540039062,
      "learning_rate": 0.00016484243775255443,
      "loss": 3.0544,
      "step": 149477
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2708165645599365,
      "learning_rate": 0.00016483878585966851,
      "loss": 2.8475,
      "step": 149478
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5287630558013916,
      "learning_rate": 0.00016483513399191162,
      "loss": 2.9226,
      "step": 149479
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2718515396118164,
      "learning_rate": 0.00016483148214928428,
      "loss": 2.8398,
      "step": 149480
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6876628398895264,
      "learning_rate": 0.00016482783033178747,
      "loss": 3.0693,
      "step": 149481
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.60042142868042,
      "learning_rate": 0.00016482417853942155,
      "loss": 2.9305,
      "step": 149482
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.471719980239868,
      "learning_rate": 0.00016482052677218757,
      "loss": 3.0197,
      "step": 149483
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.936506986618042,
      "learning_rate": 0.0001648168750300859,
      "loss": 2.7733,
      "step": 149484
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0999860763549805,
      "learning_rate": 0.00016481322331311738,
      "loss": 2.9125,
      "step": 149485
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2275431156158447,
      "learning_rate": 0.0001648095716212825,
      "loss": 2.729,
      "step": 149486
    },
    {
      "epoch": 1.95,
      "grad_norm": 6.124821662902832,
      "learning_rate": 0.00016480591995458223,
      "loss": 2.849,
      "step": 149487
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.260756015777588,
      "learning_rate": 0.00016480226831301693,
      "loss": 2.9506,
      "step": 149488
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.955385446548462,
      "learning_rate": 0.0001647986166965876,
      "loss": 3.2095,
      "step": 149489
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.410618782043457,
      "learning_rate": 0.00016479496510529473,
      "loss": 3.0124,
      "step": 149490
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.745339870452881,
      "learning_rate": 0.000164791313539139,
      "loss": 3.0668,
      "step": 149491
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.209144353866577,
      "learning_rate": 0.00016478766199812107,
      "loss": 2.9171,
      "step": 149492
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.301316261291504,
      "learning_rate": 0.00016478401048224172,
      "loss": 3.0641,
      "step": 149493
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4062161445617676,
      "learning_rate": 0.0001647803589915015,
      "loss": 3.1635,
      "step": 149494
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.040785551071167,
      "learning_rate": 0.00016477670752590125,
      "loss": 2.8771,
      "step": 149495
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.814333438873291,
      "learning_rate": 0.0001647730560854416,
      "loss": 2.9794,
      "step": 149496
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.448586940765381,
      "learning_rate": 0.00016476940467012317,
      "loss": 2.8213,
      "step": 149497
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.1737966537475586,
      "learning_rate": 0.00016476575327994654,
      "loss": 2.8063,
      "step": 149498
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.0078749656677246,
      "learning_rate": 0.00016476210191491262,
      "loss": 2.7228,
      "step": 149499
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.124310255050659,
      "learning_rate": 0.00016475845057502188,
      "loss": 3.2019,
      "step": 149500
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.286824941635132,
      "learning_rate": 0.0001647547992602752,
      "loss": 2.9151,
      "step": 149501
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.798252820968628,
      "learning_rate": 0.00016475114797067316,
      "loss": 3.0785,
      "step": 149502
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.6394290924072266,
      "learning_rate": 0.00016474749670621644,
      "loss": 3.0668,
      "step": 149503
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7362008094787598,
      "learning_rate": 0.0001647438454669056,
      "loss": 3.0451,
      "step": 149504
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3595175743103027,
      "learning_rate": 0.00016474019425274152,
      "loss": 2.7415,
      "step": 149505
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.991523027420044,
      "learning_rate": 0.0001647365430637247,
      "loss": 2.8445,
      "step": 149506
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2318813800811768,
      "learning_rate": 0.000164732891899856,
      "loss": 3.027,
      "step": 149507
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.364014148712158,
      "learning_rate": 0.00016472924076113606,
      "loss": 2.9997,
      "step": 149508
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.673413038253784,
      "learning_rate": 0.00016472558964756543,
      "loss": 3.0088,
      "step": 149509
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.120986223220825,
      "learning_rate": 0.00016472193855914484,
      "loss": 3.0685,
      "step": 149510
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1562137603759766,
      "learning_rate": 0.00016471828749587507,
      "loss": 3.0433,
      "step": 149511
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6209774017333984,
      "learning_rate": 0.00016471463645775658,
      "loss": 2.8606,
      "step": 149512
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.775421619415283,
      "learning_rate": 0.00016471098544479034,
      "loss": 2.888,
      "step": 149513
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2279326915740967,
      "learning_rate": 0.00016470733445697686,
      "loss": 2.7914,
      "step": 149514
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2721850872039795,
      "learning_rate": 0.00016470368349431687,
      "loss": 3.0525,
      "step": 149515
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3734798431396484,
      "learning_rate": 0.0001647000325568109,
      "loss": 2.688,
      "step": 149516
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.680899143218994,
      "learning_rate": 0.00016469638164445983,
      "loss": 3.0042,
      "step": 149517
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.033975601196289,
      "learning_rate": 0.0001646927307572642,
      "loss": 2.9169,
      "step": 149518
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.092087745666504,
      "learning_rate": 0.0001646890798952248,
      "loss": 2.9691,
      "step": 149519
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.089883804321289,
      "learning_rate": 0.0001646854290583423,
      "loss": 2.9354,
      "step": 149520
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.948264718055725,
      "learning_rate": 0.00016468177824661718,
      "loss": 2.9303,
      "step": 149521
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.593719720840454,
      "learning_rate": 0.0001646781274600504,
      "loss": 2.9717,
      "step": 149522
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.109691619873047,
      "learning_rate": 0.00016467447669864252,
      "loss": 3.2336,
      "step": 149523
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0413668155670166,
      "learning_rate": 0.00016467082596239408,
      "loss": 2.8101,
      "step": 149524
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.190079689025879,
      "learning_rate": 0.00016466717525130602,
      "loss": 2.9705,
      "step": 149525
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2041237354278564,
      "learning_rate": 0.00016466352456537885,
      "loss": 2.9446,
      "step": 149526
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7790424823760986,
      "learning_rate": 0.00016465987390461325,
      "loss": 2.968,
      "step": 149527
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1947407722473145,
      "learning_rate": 0.00016465622326900995,
      "loss": 2.9945,
      "step": 149528
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3420474529266357,
      "learning_rate": 0.00016465257265856954,
      "loss": 2.7199,
      "step": 149529
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.625406265258789,
      "learning_rate": 0.00016464892207329295,
      "loss": 2.7644,
      "step": 149530
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.015403985977173,
      "learning_rate": 0.0001646452715131806,
      "loss": 2.983,
      "step": 149531
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.264773368835449,
      "learning_rate": 0.00016464162097823315,
      "loss": 3.2329,
      "step": 149532
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.4661197662353516,
      "learning_rate": 0.00016463797046845148,
      "loss": 3.0906,
      "step": 149533
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1471107006073,
      "learning_rate": 0.00016463431998383623,
      "loss": 2.9931,
      "step": 149534
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.331434726715088,
      "learning_rate": 0.00016463066952438784,
      "loss": 3.0323,
      "step": 149535
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.596186399459839,
      "learning_rate": 0.00016462701909010732,
      "loss": 3.1317,
      "step": 149536
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.572378158569336,
      "learning_rate": 0.00016462336868099513,
      "loss": 2.9299,
      "step": 149537
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1377105712890625,
      "learning_rate": 0.00016461971829705193,
      "loss": 2.8823,
      "step": 149538
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.085359573364258,
      "learning_rate": 0.00016461606793827862,
      "loss": 2.7772,
      "step": 149539
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.9984192848205566,
      "learning_rate": 0.00016461241760467568,
      "loss": 3.1357,
      "step": 149540
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.4735829830169678,
      "learning_rate": 0.0001646087672962438,
      "loss": 2.9097,
      "step": 149541
    },
    {
      "epoch": 1.95,
      "grad_norm": 6.776220321655273,
      "learning_rate": 0.0001646051170129838,
      "loss": 2.8562,
      "step": 149542
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.241260528564453,
      "learning_rate": 0.0001646014667548961,
      "loss": 2.9065,
      "step": 149543
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.801137685775757,
      "learning_rate": 0.00016459781652198172,
      "loss": 2.7891,
      "step": 149544
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.812621831893921,
      "learning_rate": 0.00016459416631424114,
      "loss": 2.6418,
      "step": 149545
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1603314876556396,
      "learning_rate": 0.00016459051613167507,
      "loss": 3.1802,
      "step": 149546
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.414719581604004,
      "learning_rate": 0.00016458686597428402,
      "loss": 2.9263,
      "step": 149547
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.9226114749908447,
      "learning_rate": 0.000164583215842069,
      "loss": 2.6949,
      "step": 149548
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.372225046157837,
      "learning_rate": 0.0001645795657350304,
      "loss": 2.8503,
      "step": 149549
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.124030828475952,
      "learning_rate": 0.0001645759156531691,
      "loss": 3.0152,
      "step": 149550
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.9011597633361816,
      "learning_rate": 0.00016457226559648573,
      "loss": 2.9766,
      "step": 149551
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.861942768096924,
      "learning_rate": 0.00016456861556498088,
      "loss": 2.8946,
      "step": 149552
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.286851644515991,
      "learning_rate": 0.00016456496555865516,
      "loss": 2.9962,
      "step": 149553
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.50502872467041,
      "learning_rate": 0.00016456131557750954,
      "loss": 3.0226,
      "step": 149554
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.281754732131958,
      "learning_rate": 0.0001645576656215444,
      "loss": 2.9901,
      "step": 149555
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.046861410140991,
      "learning_rate": 0.00016455401569076065,
      "loss": 3.0586,
      "step": 149556
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.814943790435791,
      "learning_rate": 0.00016455036578515888,
      "loss": 2.9987,
      "step": 149557
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.169178009033203,
      "learning_rate": 0.0001645467159047397,
      "loss": 2.7461,
      "step": 149558
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7316315174102783,
      "learning_rate": 0.0001645430660495038,
      "loss": 2.8679,
      "step": 149559
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3994927406311035,
      "learning_rate": 0.00016453941621945195,
      "loss": 3.1115,
      "step": 149560
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2844722270965576,
      "learning_rate": 0.0001645357664145847,
      "loss": 2.8991,
      "step": 149561
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.568485736846924,
      "learning_rate": 0.00016453211663490294,
      "loss": 3.0649,
      "step": 149562
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7726898193359375,
      "learning_rate": 0.00016452846688040719,
      "loss": 3.1355,
      "step": 149563
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3863298892974854,
      "learning_rate": 0.00016452481715109816,
      "loss": 2.7982,
      "step": 149564
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7265381813049316,
      "learning_rate": 0.00016452116744697642,
      "loss": 3.0894,
      "step": 149565
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3330371379852295,
      "learning_rate": 0.00016451751776804286,
      "loss": 2.9005,
      "step": 149566
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.135478973388672,
      "learning_rate": 0.00016451386811429792,
      "loss": 2.9564,
      "step": 149567
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4275546073913574,
      "learning_rate": 0.00016451021848574255,
      "loss": 2.9888,
      "step": 149568
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.896733045578003,
      "learning_rate": 0.00016450656888237727,
      "loss": 3.1382,
      "step": 149569
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.9890285730361938,
      "learning_rate": 0.00016450291930420277,
      "loss": 3.2218,
      "step": 149570
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.840409278869629,
      "learning_rate": 0.00016449926975121965,
      "loss": 2.8801,
      "step": 149571
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.247912406921387,
      "learning_rate": 0.00016449562022342876,
      "loss": 3.0437,
      "step": 149572
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.117581605911255,
      "learning_rate": 0.00016449197072083058,
      "loss": 2.8909,
      "step": 149573
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.511957883834839,
      "learning_rate": 0.00016448832124342603,
      "loss": 2.9422,
      "step": 149574
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.024925947189331,
      "learning_rate": 0.00016448467179121568,
      "loss": 2.6102,
      "step": 149575
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5284547805786133,
      "learning_rate": 0.00016448102236420016,
      "loss": 3.0112,
      "step": 149576
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2344136238098145,
      "learning_rate": 0.00016447737296238,
      "loss": 2.8233,
      "step": 149577
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4599618911743164,
      "learning_rate": 0.00016447372358575625,
      "loss": 3.0802,
      "step": 149578
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.145451784133911,
      "learning_rate": 0.00016447007423432927,
      "loss": 3.1988,
      "step": 149579
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.743912696838379,
      "learning_rate": 0.00016446642490809996,
      "loss": 3.0152,
      "step": 149580
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.887407660484314,
      "learning_rate": 0.0001644627756070689,
      "loss": 3.153,
      "step": 149581
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.181100368499756,
      "learning_rate": 0.00016445912633123678,
      "loss": 3.3059,
      "step": 149582
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1375062465667725,
      "learning_rate": 0.00016445547708060417,
      "loss": 2.9067,
      "step": 149583
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.331861972808838,
      "learning_rate": 0.00016445182785517195,
      "loss": 2.9406,
      "step": 149584
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.4736671447753906,
      "learning_rate": 0.00016444817865494057,
      "loss": 2.9675,
      "step": 149585
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.9556910991668701,
      "learning_rate": 0.0001644445294799109,
      "loss": 2.917,
      "step": 149586
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.64971661567688,
      "learning_rate": 0.00016444088033008361,
      "loss": 2.9774,
      "step": 149587
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1757359504699707,
      "learning_rate": 0.0001644372312054593,
      "loss": 2.9086,
      "step": 149588
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3572282791137695,
      "learning_rate": 0.00016443358210603854,
      "loss": 3.0156,
      "step": 149589
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.256286859512329,
      "learning_rate": 0.00016442993303182225,
      "loss": 3.086,
      "step": 149590
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.780134916305542,
      "learning_rate": 0.00016442628398281091,
      "loss": 3.106,
      "step": 149591
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.649663209915161,
      "learning_rate": 0.00016442263495900538,
      "loss": 2.9588,
      "step": 149592
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1227447986602783,
      "learning_rate": 0.00016441898596040628,
      "loss": 2.6315,
      "step": 149593
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2843968868255615,
      "learning_rate": 0.0001644153369870142,
      "loss": 2.868,
      "step": 149594
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.8529409170150757,
      "learning_rate": 0.0001644116880388298,
      "loss": 2.9719,
      "step": 149595
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4497456550598145,
      "learning_rate": 0.00016440803911585387,
      "loss": 2.9052,
      "step": 149596
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.336672067642212,
      "learning_rate": 0.00016440439021808698,
      "loss": 2.9549,
      "step": 149597
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.630896806716919,
      "learning_rate": 0.00016440074134552995,
      "loss": 3.0395,
      "step": 149598
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6321799755096436,
      "learning_rate": 0.00016439709249818329,
      "loss": 3.0286,
      "step": 149599
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.4129552841186523,
      "learning_rate": 0.00016439344367604806,
      "loss": 2.9197,
      "step": 149600
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2858965396881104,
      "learning_rate": 0.0001643897948791243,
      "loss": 3.0793,
      "step": 149601
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.9675241708755493,
      "learning_rate": 0.00016438614610741329,
      "loss": 3.0885,
      "step": 149602
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3258585929870605,
      "learning_rate": 0.00016438249736091523,
      "loss": 2.8765,
      "step": 149603
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2367899417877197,
      "learning_rate": 0.00016437884863963116,
      "loss": 2.8867,
      "step": 149604
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.029944896697998,
      "learning_rate": 0.00016437519994356154,
      "loss": 3.0212,
      "step": 149605
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4283347129821777,
      "learning_rate": 0.00016437155127270721,
      "loss": 2.8388,
      "step": 149606
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5349924564361572,
      "learning_rate": 0.0001643679026270688,
      "loss": 2.7975,
      "step": 149607
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0706729888916016,
      "learning_rate": 0.00016436425400664695,
      "loss": 2.9252,
      "step": 149608
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.243695020675659,
      "learning_rate": 0.0001643606054114422,
      "loss": 2.9548,
      "step": 149609
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.168699264526367,
      "learning_rate": 0.0001643569568414555,
      "loss": 3.1647,
      "step": 149610
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.166037082672119,
      "learning_rate": 0.00016435330829668734,
      "loss": 3.2751,
      "step": 149611
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.542146921157837,
      "learning_rate": 0.00016434965977713852,
      "loss": 2.8033,
      "step": 149612
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3919265270233154,
      "learning_rate": 0.00016434601128280968,
      "loss": 2.9806,
      "step": 149613
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.124927520751953,
      "learning_rate": 0.00016434236281370147,
      "loss": 3.0716,
      "step": 149614
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.186448812484741,
      "learning_rate": 0.00016433871436981445,
      "loss": 3.0288,
      "step": 149615
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7880859375,
      "learning_rate": 0.0001643350659511496,
      "loss": 2.8382,
      "step": 149616
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.589005470275879,
      "learning_rate": 0.00016433141755770722,
      "loss": 2.8947,
      "step": 149617
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5116355419158936,
      "learning_rate": 0.00016432776918948836,
      "loss": 2.8286,
      "step": 149618
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1681931018829346,
      "learning_rate": 0.00016432412084649352,
      "loss": 2.9449,
      "step": 149619
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.386608362197876,
      "learning_rate": 0.00016432047252872323,
      "loss": 2.8313,
      "step": 149620
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5521600246429443,
      "learning_rate": 0.00016431682423617849,
      "loss": 2.873,
      "step": 149621
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4745593070983887,
      "learning_rate": 0.0001643131759688598,
      "loss": 2.835,
      "step": 149622
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.496464729309082,
      "learning_rate": 0.0001643095277267677,
      "loss": 2.9489,
      "step": 149623
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.33350133895874,
      "learning_rate": 0.00016430587950990323,
      "loss": 2.9732,
      "step": 149624
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.259056806564331,
      "learning_rate": 0.00016430223131826677,
      "loss": 2.8058,
      "step": 149625
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.244853973388672,
      "learning_rate": 0.000164298583151859,
      "loss": 3.2494,
      "step": 149626
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.8117496967315674,
      "learning_rate": 0.00016429493501068077,
      "loss": 2.9988,
      "step": 149627
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.8798413276672363,
      "learning_rate": 0.00016429128689473258,
      "loss": 2.7182,
      "step": 149628
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0898358821868896,
      "learning_rate": 0.00016428763880401535,
      "loss": 3.0933,
      "step": 149629
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.578383207321167,
      "learning_rate": 0.00016428399073852959,
      "loss": 3.0562,
      "step": 149630
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4906861782073975,
      "learning_rate": 0.00016428034269827597,
      "loss": 2.9942,
      "step": 149631
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.9767662286758423,
      "learning_rate": 0.0001642766946832551,
      "loss": 2.8998,
      "step": 149632
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1627252101898193,
      "learning_rate": 0.0001642730466934679,
      "loss": 3.0873,
      "step": 149633
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0835840702056885,
      "learning_rate": 0.00016426939872891475,
      "loss": 2.9184,
      "step": 149634
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6128430366516113,
      "learning_rate": 0.00016426575078959662,
      "loss": 2.8217,
      "step": 149635
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5050723552703857,
      "learning_rate": 0.00016426210287551402,
      "loss": 2.7972,
      "step": 149636
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.2619926929473877,
      "learning_rate": 0.00016425845498666767,
      "loss": 2.9856,
      "step": 149637
    },
    {
      "epoch": 1.95,
      "grad_norm": 5.2203240394592285,
      "learning_rate": 0.00016425480712305813,
      "loss": 2.8655,
      "step": 149638
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4362854957580566,
      "learning_rate": 0.0001642511592846863,
      "loss": 2.7105,
      "step": 149639
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.354759454727173,
      "learning_rate": 0.00016424751147155265,
      "loss": 2.9538,
      "step": 149640
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.527280569076538,
      "learning_rate": 0.000164243863683658,
      "loss": 3.0266,
      "step": 149641
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.235267400741577,
      "learning_rate": 0.00016424021592100301,
      "loss": 3.066,
      "step": 149642
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.8423469066619873,
      "learning_rate": 0.00016423656818358834,
      "loss": 2.719,
      "step": 149643
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1943118572235107,
      "learning_rate": 0.00016423292047141453,
      "loss": 2.9849,
      "step": 149644
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.439560651779175,
      "learning_rate": 0.00016422927278448248,
      "loss": 2.9976,
      "step": 149645
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.128904342651367,
      "learning_rate": 0.0001642256251227927,
      "loss": 3.0081,
      "step": 149646
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.287404775619507,
      "learning_rate": 0.00016422197748634603,
      "loss": 2.9902,
      "step": 149647
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.231379747390747,
      "learning_rate": 0.00016421832987514304,
      "loss": 2.9738,
      "step": 149648
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.2621123790740967,
      "learning_rate": 0.00016421468228918442,
      "loss": 2.9538,
      "step": 149649
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.647512912750244,
      "learning_rate": 0.0001642110347284708,
      "loss": 2.9968,
      "step": 149650
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1170554161071777,
      "learning_rate": 0.00016420738719300296,
      "loss": 3.0575,
      "step": 149651
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7063915729522705,
      "learning_rate": 0.00016420373968278144,
      "loss": 2.771,
      "step": 149652
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.618403196334839,
      "learning_rate": 0.00016420009219780715,
      "loss": 2.9333,
      "step": 149653
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.283855676651001,
      "learning_rate": 0.00016419644473808056,
      "loss": 2.9067,
      "step": 149654
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.27042818069458,
      "learning_rate": 0.00016419279730360247,
      "loss": 3.0717,
      "step": 149655
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.255358695983887,
      "learning_rate": 0.00016418914989437336,
      "loss": 3.0605,
      "step": 149656
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.750598669052124,
      "learning_rate": 0.00016418550251039418,
      "loss": 3.1211,
      "step": 149657
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.8993589878082275,
      "learning_rate": 0.0001641818551516654,
      "loss": 3.0715,
      "step": 149658
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0010428428649902,
      "learning_rate": 0.0001641782078181878,
      "loss": 2.8004,
      "step": 149659
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.2312748432159424,
      "learning_rate": 0.0001641745605099621,
      "loss": 2.9686,
      "step": 149660
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5803864002227783,
      "learning_rate": 0.00016417091322698893,
      "loss": 2.5626,
      "step": 149661
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.648730516433716,
      "learning_rate": 0.00016416726596926877,
      "loss": 2.8697,
      "step": 149662
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.379948616027832,
      "learning_rate": 0.00016416361873680265,
      "loss": 2.9903,
      "step": 149663
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2772936820983887,
      "learning_rate": 0.00016415997152959092,
      "loss": 2.9832,
      "step": 149664
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1759071350097656,
      "learning_rate": 0.00016415632434763457,
      "loss": 3.2258,
      "step": 149665
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2345800399780273,
      "learning_rate": 0.000164152677190934,
      "loss": 2.9502,
      "step": 149666
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.808387279510498,
      "learning_rate": 0.00016414903005949026,
      "loss": 2.9738,
      "step": 149667
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.095822334289551,
      "learning_rate": 0.0001641453829533035,
      "loss": 3.002,
      "step": 149668
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2619640827178955,
      "learning_rate": 0.0001641417358723748,
      "loss": 2.9796,
      "step": 149669
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0945894718170166,
      "learning_rate": 0.00016413808881670465,
      "loss": 3.2071,
      "step": 149670
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5525834560394287,
      "learning_rate": 0.00016413444178629388,
      "loss": 3.0001,
      "step": 149671
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2333548069000244,
      "learning_rate": 0.00016413079478114293,
      "loss": 3.0802,
      "step": 149672
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.606989622116089,
      "learning_rate": 0.00016412714780125292,
      "loss": 2.8326,
      "step": 149673
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1651365756988525,
      "learning_rate": 0.00016412350084662395,
      "loss": 2.8529,
      "step": 149674
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.3896684646606445,
      "learning_rate": 0.00016411985391725717,
      "loss": 2.8782,
      "step": 149675
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.058361530303955,
      "learning_rate": 0.0001641162070131529,
      "loss": 3.0887,
      "step": 149676
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.9266923666000366,
      "learning_rate": 0.00016411256013431215,
      "loss": 2.9672,
      "step": 149677
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.819485664367676,
      "learning_rate": 0.0001641089132807353,
      "loss": 2.8896,
      "step": 149678
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.9072184562683105,
      "learning_rate": 0.0001641052664524234,
      "loss": 2.8404,
      "step": 149679
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2112796306610107,
      "learning_rate": 0.00016410161964937664,
      "loss": 2.971,
      "step": 149680
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.247297763824463,
      "learning_rate": 0.00016409797287159612,
      "loss": 3.1607,
      "step": 149681
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3216118812561035,
      "learning_rate": 0.0001640943261190822,
      "loss": 2.6633,
      "step": 149682
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.907029628753662,
      "learning_rate": 0.00016409067939183585,
      "loss": 3.4255,
      "step": 149683
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.133424997329712,
      "learning_rate": 0.0001640870326898575,
      "loss": 2.617,
      "step": 149684
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0552804470062256,
      "learning_rate": 0.00016408338601314816,
      "loss": 3.1696,
      "step": 149685
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.8330235481262207,
      "learning_rate": 0.000164079739361708,
      "loss": 3.1207,
      "step": 149686
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.309328317642212,
      "learning_rate": 0.0001640760927355382,
      "loss": 2.9018,
      "step": 149687
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0014820098876953,
      "learning_rate": 0.00016407244613463902,
      "loss": 3.0471,
      "step": 149688
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.368985414505005,
      "learning_rate": 0.0001640687995590115,
      "loss": 3.1391,
      "step": 149689
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6661155223846436,
      "learning_rate": 0.00016406515300865598,
      "loss": 2.8292,
      "step": 149690
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.307955741882324,
      "learning_rate": 0.00016406150648357347,
      "loss": 2.9532,
      "step": 149691
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.3685476779937744,
      "learning_rate": 0.0001640578599837645,
      "loss": 2.8506,
      "step": 149692
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4591305255889893,
      "learning_rate": 0.00016405421350922973,
      "loss": 3.0927,
      "step": 149693
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.822279930114746,
      "learning_rate": 0.00016405056705996976,
      "loss": 3.145,
      "step": 149694
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1051976680755615,
      "learning_rate": 0.00016404692063598544,
      "loss": 3.2092,
      "step": 149695
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.113938093185425,
      "learning_rate": 0.0001640432742372772,
      "loss": 3.0449,
      "step": 149696
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.144986629486084,
      "learning_rate": 0.00016403962786384608,
      "loss": 2.866,
      "step": 149697
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.259181022644043,
      "learning_rate": 0.00016403598151569252,
      "loss": 2.9963,
      "step": 149698
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.304802417755127,
      "learning_rate": 0.0001640323351928173,
      "loss": 3.0268,
      "step": 149699
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.439765453338623,
      "learning_rate": 0.00016402868889522083,
      "loss": 2.9101,
      "step": 149700
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.3550052642822266,
      "learning_rate": 0.00016402504262290414,
      "loss": 3.0598,
      "step": 149701
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.115617036819458,
      "learning_rate": 0.00016402139637586766,
      "loss": 3.141,
      "step": 149702
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.9761698246002197,
      "learning_rate": 0.00016401775015411227,
      "loss": 2.788,
      "step": 149703
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.876028060913086,
      "learning_rate": 0.00016401410395763856,
      "loss": 2.9659,
      "step": 149704
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.3522233963012695,
      "learning_rate": 0.00016401045778644707,
      "loss": 2.9706,
      "step": 149705
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3655734062194824,
      "learning_rate": 0.00016400681164053873,
      "loss": 2.8636,
      "step": 149706
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.2346444129943848,
      "learning_rate": 0.0001640031655199141,
      "loss": 2.8533,
      "step": 149707
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.328026533126831,
      "learning_rate": 0.00016399951942457368,
      "loss": 3.1697,
      "step": 149708
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2524805068969727,
      "learning_rate": 0.00016399587335451848,
      "loss": 3.1885,
      "step": 149709
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.795260190963745,
      "learning_rate": 0.00016399222730974902,
      "loss": 3.151,
      "step": 149710
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.694620132446289,
      "learning_rate": 0.00016398858129026582,
      "loss": 2.8353,
      "step": 149711
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.944514274597168,
      "learning_rate": 0.00016398493529606986,
      "loss": 2.822,
      "step": 149712
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1450634002685547,
      "learning_rate": 0.00016398128932716154,
      "loss": 3.269,
      "step": 149713
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.070924758911133,
      "learning_rate": 0.00016397764338354177,
      "loss": 3.1999,
      "step": 149714
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.8893539905548096,
      "learning_rate": 0.00016397399746521113,
      "loss": 2.9616,
      "step": 149715
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.269409656524658,
      "learning_rate": 0.0001639703515721703,
      "loss": 2.8551,
      "step": 149716
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.9107557535171509,
      "learning_rate": 0.00016396670570441982,
      "loss": 3.182,
      "step": 149717
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.074228286743164,
      "learning_rate": 0.00016396305986196062,
      "loss": 3.0519,
      "step": 149718
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0444703102111816,
      "learning_rate": 0.00016395941404479317,
      "loss": 2.9592,
      "step": 149719
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.3553390502929688,
      "learning_rate": 0.00016395576825291828,
      "loss": 3.0347,
      "step": 149720
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.290506362915039,
      "learning_rate": 0.00016395212248633664,
      "loss": 2.8957,
      "step": 149721
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.258880376815796,
      "learning_rate": 0.00016394847674504884,
      "loss": 2.8262,
      "step": 149722
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6007466316223145,
      "learning_rate": 0.00016394483102905548,
      "loss": 3.2917,
      "step": 149723
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.932164669036865,
      "learning_rate": 0.00016394118533835745,
      "loss": 2.7063,
      "step": 149724
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.293320417404175,
      "learning_rate": 0.0001639375396729552,
      "loss": 2.9526,
      "step": 149725
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6378390789031982,
      "learning_rate": 0.00016393389403284968,
      "loss": 2.8161,
      "step": 149726
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6290674209594727,
      "learning_rate": 0.00016393024841804138,
      "loss": 3.0196,
      "step": 149727
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.317087173461914,
      "learning_rate": 0.00016392660282853105,
      "loss": 2.8549,
      "step": 149728
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.305797815322876,
      "learning_rate": 0.0001639229572643192,
      "loss": 3.0621,
      "step": 149729
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.6223809719085693,
      "learning_rate": 0.00016391931172540677,
      "loss": 2.9727,
      "step": 149730
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.39418625831604,
      "learning_rate": 0.00016391566621179414,
      "loss": 2.9981,
      "step": 149731
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2926690578460693,
      "learning_rate": 0.00016391202072348234,
      "loss": 3.0648,
      "step": 149732
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.2349467277526855,
      "learning_rate": 0.00016390837526047175,
      "loss": 2.9809,
      "step": 149733
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.4454033374786377,
      "learning_rate": 0.00016390472982276335,
      "loss": 2.911,
      "step": 149734
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.6335856914520264,
      "learning_rate": 0.0001639010844103574,
      "loss": 2.8742,
      "step": 149735
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.795834541320801,
      "learning_rate": 0.0001638974390232549,
      "loss": 3.2114,
      "step": 149736
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3177437782287598,
      "learning_rate": 0.0001638937936614564,
      "loss": 2.9617,
      "step": 149737
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1648924350738525,
      "learning_rate": 0.00016389014832496267,
      "loss": 2.8879,
      "step": 149738
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.4451282024383545,
      "learning_rate": 0.00016388650301377424,
      "loss": 3.1548,
      "step": 149739
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.473395347595215,
      "learning_rate": 0.00016388285772789212,
      "loss": 3.0412,
      "step": 149740
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.251983880996704,
      "learning_rate": 0.00016387921246731647,
      "loss": 2.9466,
      "step": 149741
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.761500358581543,
      "learning_rate": 0.00016387556723204835,
      "loss": 2.976,
      "step": 149742
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.092104911804199,
      "learning_rate": 0.00016387192202208827,
      "loss": 2.9138,
      "step": 149743
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.346020221710205,
      "learning_rate": 0.00016386827683743707,
      "loss": 3.199,
      "step": 149744
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.305149793624878,
      "learning_rate": 0.0001638646316780952,
      "loss": 2.8434,
      "step": 149745
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.671411991119385,
      "learning_rate": 0.0001638609865440637,
      "loss": 2.9698,
      "step": 149746
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.595510721206665,
      "learning_rate": 0.00016385734143534278,
      "loss": 2.8539,
      "step": 149747
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.8676364421844482,
      "learning_rate": 0.00016385369635193346,
      "loss": 2.9867,
      "step": 149748
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.14103364944458,
      "learning_rate": 0.00016385005129383623,
      "loss": 2.916,
      "step": 149749
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.9826769828796387,
      "learning_rate": 0.0001638464062610519,
      "loss": 2.988,
      "step": 149750
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5264618396759033,
      "learning_rate": 0.00016384276125358104,
      "loss": 3.066,
      "step": 149751
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.232536554336548,
      "learning_rate": 0.00016383911627142458,
      "loss": 2.9288,
      "step": 149752
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6510586738586426,
      "learning_rate": 0.00016383547131458275,
      "loss": 2.8847,
      "step": 149753
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0614242553710938,
      "learning_rate": 0.00016383182638305663,
      "loss": 2.9415,
      "step": 149754
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1995346546173096,
      "learning_rate": 0.0001638281814768466,
      "loss": 3.1523,
      "step": 149755
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7383060455322266,
      "learning_rate": 0.00016382453659595362,
      "loss": 2.7904,
      "step": 149756
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.187967300415039,
      "learning_rate": 0.00016382089174037806,
      "loss": 3.0763,
      "step": 149757
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0811285972595215,
      "learning_rate": 0.00016381724691012106,
      "loss": 3.0779,
      "step": 149758
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.751868963241577,
      "learning_rate": 0.00016381360210518273,
      "loss": 2.9591,
      "step": 149759
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.1292293071746826,
      "learning_rate": 0.00016380995732556418,
      "loss": 2.8549,
      "step": 149760
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0220086574554443,
      "learning_rate": 0.00016380631257126578,
      "loss": 2.8875,
      "step": 149761
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1115782260894775,
      "learning_rate": 0.00016380266784228847,
      "loss": 3.0148,
      "step": 149762
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3860387802124023,
      "learning_rate": 0.00016379902313863273,
      "loss": 2.8065,
      "step": 149763
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.192458152770996,
      "learning_rate": 0.00016379537846029953,
      "loss": 3.138,
      "step": 149764
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.9736577272415161,
      "learning_rate": 0.00016379173380728915,
      "loss": 2.8154,
      "step": 149765
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.001265287399292,
      "learning_rate": 0.00016378808917960253,
      "loss": 2.9254,
      "step": 149766
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2663278579711914,
      "learning_rate": 0.0001637844445772402,
      "loss": 3.026,
      "step": 149767
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.204376459121704,
      "learning_rate": 0.00016378080000020294,
      "loss": 2.894,
      "step": 149768
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3363630771636963,
      "learning_rate": 0.00016377715544849136,
      "loss": 2.9886,
      "step": 149769
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.759373664855957,
      "learning_rate": 0.0001637735109221064,
      "loss": 2.8704,
      "step": 149770
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.994614839553833,
      "learning_rate": 0.0001637698664210483,
      "loss": 3.0115,
      "step": 149771
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3044395446777344,
      "learning_rate": 0.00016376622194531803,
      "loss": 2.8933,
      "step": 149772
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.322345495223999,
      "learning_rate": 0.0001637625774949161,
      "loss": 2.8655,
      "step": 149773
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.65071964263916,
      "learning_rate": 0.00016375893306984338,
      "loss": 2.7495,
      "step": 149774
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6991360187530518,
      "learning_rate": 0.00016375528867010032,
      "loss": 3.1002,
      "step": 149775
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.281545639038086,
      "learning_rate": 0.00016375164429568802,
      "loss": 3.014,
      "step": 149776
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.643932580947876,
      "learning_rate": 0.00016374799994660656,
      "loss": 2.824,
      "step": 149777
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2021636962890625,
      "learning_rate": 0.00016374435562285707,
      "loss": 3.1137,
      "step": 149778
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2311723232269287,
      "learning_rate": 0.00016374071132443992,
      "loss": 3.0276,
      "step": 149779
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.197519302368164,
      "learning_rate": 0.0001637370670513561,
      "loss": 2.8253,
      "step": 149780
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.340163469314575,
      "learning_rate": 0.00016373342280360598,
      "loss": 3.1032,
      "step": 149781
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.090543508529663,
      "learning_rate": 0.00016372977858119053,
      "loss": 2.827,
      "step": 149782
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3498682975769043,
      "learning_rate": 0.00016372613438411028,
      "loss": 2.9968,
      "step": 149783
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.320192575454712,
      "learning_rate": 0.00016372249021236591,
      "loss": 2.9933,
      "step": 149784
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.438370704650879,
      "learning_rate": 0.00016371884606595802,
      "loss": 2.8554,
      "step": 149785
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5440070629119873,
      "learning_rate": 0.00016371520194488745,
      "loss": 2.9415,
      "step": 149786
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.100831985473633,
      "learning_rate": 0.00016371155784915468,
      "loss": 2.7388,
      "step": 149787
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4124419689178467,
      "learning_rate": 0.00016370791377876064,
      "loss": 2.848,
      "step": 149788
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.0843799114227295,
      "learning_rate": 0.00016370426973370586,
      "loss": 3.1182,
      "step": 149789
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.804844856262207,
      "learning_rate": 0.0001637006257139909,
      "loss": 3.0693,
      "step": 149790
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.091034412384033,
      "learning_rate": 0.00016369698171961672,
      "loss": 2.8907,
      "step": 149791
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2937839031219482,
      "learning_rate": 0.0001636933377505838,
      "loss": 3.158,
      "step": 149792
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1845593452453613,
      "learning_rate": 0.00016368969380689278,
      "loss": 2.7394,
      "step": 149793
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.9845025539398193,
      "learning_rate": 0.0001636860498885445,
      "loss": 2.8132,
      "step": 149794
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.460127830505371,
      "learning_rate": 0.00016368240599553958,
      "loss": 3.0064,
      "step": 149795
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.590193271636963,
      "learning_rate": 0.00016367876212787855,
      "loss": 2.8765,
      "step": 149796
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.9731993675231934,
      "learning_rate": 0.00016367511828556231,
      "loss": 2.7417,
      "step": 149797
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.344487190246582,
      "learning_rate": 0.0001636714744685915,
      "loss": 3.24,
      "step": 149798
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.154989242553711,
      "learning_rate": 0.00016366783067696657,
      "loss": 3.2419,
      "step": 149799
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.2477400302886963,
      "learning_rate": 0.0001636641869106885,
      "loss": 2.8716,
      "step": 149800
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6589314937591553,
      "learning_rate": 0.00016366054316975783,
      "loss": 2.8713,
      "step": 149801
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6114919185638428,
      "learning_rate": 0.0001636568994541751,
      "loss": 2.9925,
      "step": 149802
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.54818058013916,
      "learning_rate": 0.00016365325576394126,
      "loss": 2.9759,
      "step": 149803
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.053415060043335,
      "learning_rate": 0.00016364961209905672,
      "loss": 2.7386,
      "step": 149804
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7872424125671387,
      "learning_rate": 0.00016364596845952243,
      "loss": 2.913,
      "step": 149805
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.071733236312866,
      "learning_rate": 0.00016364232484533892,
      "loss": 2.9156,
      "step": 149806
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.096576452255249,
      "learning_rate": 0.00016363868125650688,
      "loss": 2.9266,
      "step": 149807
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.621049404144287,
      "learning_rate": 0.00016363503769302687,
      "loss": 2.8609,
      "step": 149808
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3479249477386475,
      "learning_rate": 0.00016363139415489976,
      "loss": 3.1269,
      "step": 149809
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1933679580688477,
      "learning_rate": 0.00016362775064212605,
      "loss": 3.2286,
      "step": 149810
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.17651629447937,
      "learning_rate": 0.00016362410715470667,
      "loss": 2.9642,
      "step": 149811
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6380207538604736,
      "learning_rate": 0.000163620463692642,
      "loss": 2.9606,
      "step": 149812
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7141287326812744,
      "learning_rate": 0.0001636168202559331,
      "loss": 3.1803,
      "step": 149813
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6370604038238525,
      "learning_rate": 0.00016361317684458014,
      "loss": 3.0646,
      "step": 149814
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4860682487487793,
      "learning_rate": 0.00016360953345858422,
      "loss": 2.8032,
      "step": 149815
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2370991706848145,
      "learning_rate": 0.00016360589009794572,
      "loss": 3.1015,
      "step": 149816
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.1164779663085938,
      "learning_rate": 0.0001636022467626656,
      "loss": 2.9995,
      "step": 149817
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.26533579826355,
      "learning_rate": 0.00016359860345274428,
      "loss": 3.2404,
      "step": 149818
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.0433900356292725,
      "learning_rate": 0.0001635949601681828,
      "loss": 3.1043,
      "step": 149819
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.573605537414551,
      "learning_rate": 0.00016359131690898133,
      "loss": 3.0137,
      "step": 149820
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.972226142883301,
      "learning_rate": 0.00016358767367514093,
      "loss": 2.93,
      "step": 149821
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0741617679595947,
      "learning_rate": 0.00016358403046666205,
      "loss": 2.9977,
      "step": 149822
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1114208698272705,
      "learning_rate": 0.00016358038728354557,
      "loss": 2.9332,
      "step": 149823
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5938472747802734,
      "learning_rate": 0.00016357674412579196,
      "loss": 2.944,
      "step": 149824
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.3582417964935303,
      "learning_rate": 0.00016357310099340225,
      "loss": 2.9193,
      "step": 149825
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2450859546661377,
      "learning_rate": 0.00016356945788637662,
      "loss": 2.9513,
      "step": 149826
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1280133724212646,
      "learning_rate": 0.00016356581480471617,
      "loss": 2.8313,
      "step": 149827
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.733449935913086,
      "learning_rate": 0.00016356217174842125,
      "loss": 2.9185,
      "step": 149828
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.355072021484375,
      "learning_rate": 0.00016355852871749284,
      "loss": 2.9671,
      "step": 149829
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4574718475341797,
      "learning_rate": 0.00016355488571193131,
      "loss": 2.9569,
      "step": 149830
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.597745895385742,
      "learning_rate": 0.00016355124273173782,
      "loss": 3.0089,
      "step": 149831
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.038220167160034,
      "learning_rate": 0.0001635475997769124,
      "loss": 2.878,
      "step": 149832
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.430778980255127,
      "learning_rate": 0.00016354395684745624,
      "loss": 2.8575,
      "step": 149833
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.9088175296783447,
      "learning_rate": 0.00016354031394336972,
      "loss": 2.8819,
      "step": 149834
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.39967679977417,
      "learning_rate": 0.0001635366710646537,
      "loss": 2.9464,
      "step": 149835
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4237539768218994,
      "learning_rate": 0.00016353302821130874,
      "loss": 2.9172,
      "step": 149836
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.7249467372894287,
      "learning_rate": 0.00016352938538333574,
      "loss": 2.9372,
      "step": 149837
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2737789154052734,
      "learning_rate": 0.000163525742580735,
      "loss": 3.1653,
      "step": 149838
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.9883904457092285,
      "learning_rate": 0.0001635220998035075,
      "loss": 2.8669,
      "step": 149839
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.9683481454849243,
      "learning_rate": 0.00016351845705165371,
      "loss": 2.9851,
      "step": 149840
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.5020105838775635,
      "learning_rate": 0.00016351481432517456,
      "loss": 2.8089,
      "step": 149841
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7330703735351562,
      "learning_rate": 0.0001635111716240704,
      "loss": 2.9105,
      "step": 149842
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2067739963531494,
      "learning_rate": 0.00016350752894834237,
      "loss": 3.0598,
      "step": 149843
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.410513162612915,
      "learning_rate": 0.00016350388629799063,
      "loss": 3.0814,
      "step": 149844
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.8180198669433594,
      "learning_rate": 0.0001635002436730162,
      "loss": 3.142,
      "step": 149845
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0123565196990967,
      "learning_rate": 0.0001634966010734195,
      "loss": 2.864,
      "step": 149846
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0863399505615234,
      "learning_rate": 0.00016349295849920153,
      "loss": 2.8154,
      "step": 149847
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4324169158935547,
      "learning_rate": 0.0001634893159503627,
      "loss": 2.7517,
      "step": 149848
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.81329083442688,
      "learning_rate": 0.00016348567342690397,
      "loss": 3.0606,
      "step": 149849
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2536113262176514,
      "learning_rate": 0.0001634820309288256,
      "loss": 2.919,
      "step": 149850
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.8999903202056885,
      "learning_rate": 0.00016347838845612861,
      "loss": 2.9898,
      "step": 149851
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5551064014434814,
      "learning_rate": 0.00016347474600881344,
      "loss": 2.8641,
      "step": 149852
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.3700332641601562,
      "learning_rate": 0.00016347110358688104,
      "loss": 2.9107,
      "step": 149853
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.420658588409424,
      "learning_rate": 0.0001634674611903318,
      "loss": 2.9204,
      "step": 149854
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2887330055236816,
      "learning_rate": 0.0001634638188191668,
      "loss": 3.0108,
      "step": 149855
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.537595748901367,
      "learning_rate": 0.0001634601764733862,
      "loss": 2.8738,
      "step": 149856
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.204803943634033,
      "learning_rate": 0.00016345653415299105,
      "loss": 3.1858,
      "step": 149857
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.9669506549835205,
      "learning_rate": 0.00016345289185798177,
      "loss": 2.9397,
      "step": 149858
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.531444549560547,
      "learning_rate": 0.0001634492495883593,
      "loss": 3.054,
      "step": 149859
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5482239723205566,
      "learning_rate": 0.0001634456073441241,
      "loss": 3.0526,
      "step": 149860
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.1983189582824707,
      "learning_rate": 0.00016344196512527715,
      "loss": 3.0232,
      "step": 149861
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.9652910232543945,
      "learning_rate": 0.0001634383229318187,
      "loss": 2.6945,
      "step": 149862
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.29215931892395,
      "learning_rate": 0.00016343468076374974,
      "loss": 3.0883,
      "step": 149863
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.377713918685913,
      "learning_rate": 0.0001634310386210707,
      "loss": 2.9527,
      "step": 149864
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5600788593292236,
      "learning_rate": 0.00016342739650378264,
      "loss": 3.0252,
      "step": 149865
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7855498790740967,
      "learning_rate": 0.0001634237544118858,
      "loss": 2.8478,
      "step": 149866
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4390180110931396,
      "learning_rate": 0.00016342011234538118,
      "loss": 2.9004,
      "step": 149867
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.545658826828003,
      "learning_rate": 0.00016341647030426935,
      "loss": 2.7171,
      "step": 149868
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0695836544036865,
      "learning_rate": 0.000163412828288551,
      "loss": 3.1398,
      "step": 149869
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.381829023361206,
      "learning_rate": 0.00016340918629822662,
      "loss": 2.9893,
      "step": 149870
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.569209575653076,
      "learning_rate": 0.0001634055443332972,
      "loss": 2.9735,
      "step": 149871
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.4773337841033936,
      "learning_rate": 0.00016340190239376315,
      "loss": 2.9233,
      "step": 149872
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1935677528381348,
      "learning_rate": 0.00016339826047962537,
      "loss": 2.9375,
      "step": 149873
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1859073638916016,
      "learning_rate": 0.00016339461859088446,
      "loss": 3.1367,
      "step": 149874
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4495933055877686,
      "learning_rate": 0.00016339097672754093,
      "loss": 2.9171,
      "step": 149875
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.2506966590881348,
      "learning_rate": 0.0001633873348895957,
      "loss": 3.0402,
      "step": 149876
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.914274215698242,
      "learning_rate": 0.00016338369307704936,
      "loss": 3.0724,
      "step": 149877
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3833870887756348,
      "learning_rate": 0.00016338005128990244,
      "loss": 3.0399,
      "step": 149878
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.272757053375244,
      "learning_rate": 0.00016337640952815587,
      "loss": 2.8162,
      "step": 149879
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.1559085845947266,
      "learning_rate": 0.00016337276779181025,
      "loss": 2.7652,
      "step": 149880
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.17354416847229,
      "learning_rate": 0.00016336912608086606,
      "loss": 3.0328,
      "step": 149881
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1725378036499023,
      "learning_rate": 0.0001633654843953242,
      "loss": 3.0579,
      "step": 149882
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3917689323425293,
      "learning_rate": 0.00016336184273518534,
      "loss": 3.0179,
      "step": 149883
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.5515754222869873,
      "learning_rate": 0.00016335820110044994,
      "loss": 3.1765,
      "step": 149884
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.569200038909912,
      "learning_rate": 0.00016335455949111898,
      "loss": 2.8624,
      "step": 149885
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.096068859100342,
      "learning_rate": 0.00016335091790719298,
      "loss": 2.8227,
      "step": 149886
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.301157236099243,
      "learning_rate": 0.00016334727634867243,
      "loss": 3.0791,
      "step": 149887
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.120387554168701,
      "learning_rate": 0.0001633436348155584,
      "loss": 2.9418,
      "step": 149888
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.9612834453582764,
      "learning_rate": 0.00016333999330785126,
      "loss": 3.1007,
      "step": 149889
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7779903411865234,
      "learning_rate": 0.00016333635182555186,
      "loss": 3.0056,
      "step": 149890
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.163454055786133,
      "learning_rate": 0.00016333271036866081,
      "loss": 2.9712,
      "step": 149891
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.501023530960083,
      "learning_rate": 0.00016332906893717884,
      "loss": 2.9125,
      "step": 149892
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.519915819168091,
      "learning_rate": 0.00016332542753110644,
      "loss": 2.8455,
      "step": 149893
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.8688392639160156,
      "learning_rate": 0.00016332178615044454,
      "loss": 2.8382,
      "step": 149894
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.335308313369751,
      "learning_rate": 0.00016331814479519355,
      "loss": 2.9366,
      "step": 149895
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.292656898498535,
      "learning_rate": 0.00016331450346535446,
      "loss": 2.9464,
      "step": 149896
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.9532642364501953,
      "learning_rate": 0.00016331086216092778,
      "loss": 2.9749,
      "step": 149897
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.3986711502075195,
      "learning_rate": 0.00016330722088191412,
      "loss": 2.9513,
      "step": 149898
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0589914321899414,
      "learning_rate": 0.00016330357962831418,
      "loss": 3.0267,
      "step": 149899
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2943222522735596,
      "learning_rate": 0.00016329993840012878,
      "loss": 2.9467,
      "step": 149900
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.559558629989624,
      "learning_rate": 0.00016329629719735841,
      "loss": 2.845,
      "step": 149901
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.1025030612945557,
      "learning_rate": 0.0001632926560200039,
      "loss": 3.0336,
      "step": 149902
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.839124917984009,
      "learning_rate": 0.00016328901486806582,
      "loss": 2.8081,
      "step": 149903
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.153522491455078,
      "learning_rate": 0.0001632853737415451,
      "loss": 2.8319,
      "step": 149904
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.398331880569458,
      "learning_rate": 0.00016328173264044198,
      "loss": 3.1174,
      "step": 149905
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4675731658935547,
      "learning_rate": 0.00016327809156475745,
      "loss": 2.8622,
      "step": 149906
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.5341904163360596,
      "learning_rate": 0.00016327445051449199,
      "loss": 2.8193,
      "step": 149907
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1439716815948486,
      "learning_rate": 0.00016327080948964657,
      "loss": 3.0903,
      "step": 149908
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2023696899414062,
      "learning_rate": 0.0001632671684902215,
      "loss": 3.1294,
      "step": 149909
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.820078134536743,
      "learning_rate": 0.0001632635275162179,
      "loss": 3.0047,
      "step": 149910
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.3368468284606934,
      "learning_rate": 0.00016325988656763595,
      "loss": 2.9327,
      "step": 149911
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.210864543914795,
      "learning_rate": 0.00016325624564447673,
      "loss": 3.2481,
      "step": 149912
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5530128479003906,
      "learning_rate": 0.00016325260474674063,
      "loss": 2.9603,
      "step": 149913
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3008456230163574,
      "learning_rate": 0.00016324896387442852,
      "loss": 2.9108,
      "step": 149914
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.9139156341552734,
      "learning_rate": 0.0001632453230275409,
      "loss": 3.1084,
      "step": 149915
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.139857769012451,
      "learning_rate": 0.00016324168220607882,
      "loss": 2.7292,
      "step": 149916
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.424109935760498,
      "learning_rate": 0.00016323804141004245,
      "loss": 3.0444,
      "step": 149917
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5295820236206055,
      "learning_rate": 0.00016323440063943282,
      "loss": 3.0644,
      "step": 149918
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.0751256942749023,
      "learning_rate": 0.00016323075989425038,
      "loss": 2.9442,
      "step": 149919
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.260932445526123,
      "learning_rate": 0.00016322711917449604,
      "loss": 2.8835,
      "step": 149920
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1387617588043213,
      "learning_rate": 0.00016322347848017025,
      "loss": 2.7896,
      "step": 149921
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6792232990264893,
      "learning_rate": 0.000163219837811274,
      "loss": 2.8102,
      "step": 149922
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.282790422439575,
      "learning_rate": 0.00016321619716780757,
      "loss": 3.0171,
      "step": 149923
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.0495944023132324,
      "learning_rate": 0.00016321255654977193,
      "loss": 2.9334,
      "step": 149924
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.706958293914795,
      "learning_rate": 0.00016320891595716756,
      "loss": 3.1007,
      "step": 149925
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.8522260189056396,
      "learning_rate": 0.00016320527538999535,
      "loss": 3.0154,
      "step": 149926
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.263157844543457,
      "learning_rate": 0.0001632016348482557,
      "loss": 2.7643,
      "step": 149927
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.681727886199951,
      "learning_rate": 0.00016319799433194975,
      "loss": 2.8061,
      "step": 149928
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.832017660140991,
      "learning_rate": 0.0001631943538410776,
      "loss": 2.6867,
      "step": 149929
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3415379524230957,
      "learning_rate": 0.0001631907133756403,
      "loss": 3.0919,
      "step": 149930
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1719484329223633,
      "learning_rate": 0.00016318707293563837,
      "loss": 2.9059,
      "step": 149931
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7867074012756348,
      "learning_rate": 0.0001631834325210726,
      "loss": 3.1891,
      "step": 149932
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5912585258483887,
      "learning_rate": 0.00016317979213194354,
      "loss": 2.8791,
      "step": 149933
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.227327346801758,
      "learning_rate": 0.00016317615176825203,
      "loss": 3.28,
      "step": 149934
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.9102541208267212,
      "learning_rate": 0.0001631725114299987,
      "loss": 2.8666,
      "step": 149935
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6686389446258545,
      "learning_rate": 0.0001631688711171841,
      "loss": 3.0492,
      "step": 149936
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.325040817260742,
      "learning_rate": 0.00016316523082980896,
      "loss": 2.9413,
      "step": 149937
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.0643310546875,
      "learning_rate": 0.00016316159056787408,
      "loss": 3.0332,
      "step": 149938
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1169934272766113,
      "learning_rate": 0.0001631579503313799,
      "loss": 3.0414,
      "step": 149939
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.194772481918335,
      "learning_rate": 0.00016315431012032738,
      "loss": 2.9224,
      "step": 149940
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.092890501022339,
      "learning_rate": 0.00016315066993471703,
      "loss": 2.8026,
      "step": 149941
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.8513555526733398,
      "learning_rate": 0.00016314702977454957,
      "loss": 2.9227,
      "step": 149942
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.316598415374756,
      "learning_rate": 0.00016314338963982556,
      "loss": 2.954,
      "step": 149943
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.2682583332061768,
      "learning_rate": 0.00016313974953054586,
      "loss": 3.0626,
      "step": 149944
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5626299381256104,
      "learning_rate": 0.00016313610944671098,
      "loss": 2.9278,
      "step": 149945
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.240812063217163,
      "learning_rate": 0.00016313246938832177,
      "loss": 3.0186,
      "step": 149946
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4347074031829834,
      "learning_rate": 0.00016312882935537882,
      "loss": 2.9086,
      "step": 149947
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1832809448242188,
      "learning_rate": 0.0001631251893478828,
      "loss": 2.9479,
      "step": 149948
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0523664951324463,
      "learning_rate": 0.0001631215493658343,
      "loss": 2.9087,
      "step": 149949
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1262333393096924,
      "learning_rate": 0.0001631179094092342,
      "loss": 2.9921,
      "step": 149950
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.539562702178955,
      "learning_rate": 0.00016311426947808295,
      "loss": 3.0056,
      "step": 149951
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3528478145599365,
      "learning_rate": 0.00016311062957238143,
      "loss": 3.0704,
      "step": 149952
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.098916530609131,
      "learning_rate": 0.00016310698969213027,
      "loss": 3.1053,
      "step": 149953
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5012316703796387,
      "learning_rate": 0.00016310334983733006,
      "loss": 3.031,
      "step": 149954
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3660266399383545,
      "learning_rate": 0.00016309971000798144,
      "loss": 3.0811,
      "step": 149955
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.124276638031006,
      "learning_rate": 0.00016309607020408527,
      "loss": 3.1968,
      "step": 149956
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3435401916503906,
      "learning_rate": 0.00016309243042564203,
      "loss": 2.9621,
      "step": 149957
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6890976428985596,
      "learning_rate": 0.00016308879067265255,
      "loss": 2.9958,
      "step": 149958
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1441938877105713,
      "learning_rate": 0.0001630851509451175,
      "loss": 2.8289,
      "step": 149959
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1043241024017334,
      "learning_rate": 0.00016308151124303752,
      "loss": 2.9633,
      "step": 149960
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4758875370025635,
      "learning_rate": 0.0001630778715664131,
      "loss": 2.8664,
      "step": 149961
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7478291988372803,
      "learning_rate": 0.00016307423191524526,
      "loss": 2.9833,
      "step": 149962
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4167864322662354,
      "learning_rate": 0.00016307059228953434,
      "loss": 2.8754,
      "step": 149963
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1979615688323975,
      "learning_rate": 0.00016306695268928132,
      "loss": 2.9889,
      "step": 149964
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.209536552429199,
      "learning_rate": 0.0001630633131144867,
      "loss": 3.0621,
      "step": 149965
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.713576316833496,
      "learning_rate": 0.00016305967356515112,
      "loss": 2.7382,
      "step": 149966
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.856701612472534,
      "learning_rate": 0.00016305603404127546,
      "loss": 3.0674,
      "step": 149967
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.272272825241089,
      "learning_rate": 0.00016305239454286025,
      "loss": 2.7259,
      "step": 149968
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3217363357543945,
      "learning_rate": 0.00016304875506990607,
      "loss": 2.9976,
      "step": 149969
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7604761123657227,
      "learning_rate": 0.00016304511562241383,
      "loss": 2.9365,
      "step": 149970
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5799267292022705,
      "learning_rate": 0.00016304147620038408,
      "loss": 2.9056,
      "step": 149971
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.531424045562744,
      "learning_rate": 0.0001630378368038174,
      "loss": 3.0463,
      "step": 149972
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.186114549636841,
      "learning_rate": 0.00016303419743271468,
      "loss": 2.9931,
      "step": 149973
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.404054641723633,
      "learning_rate": 0.00016303055808707638,
      "loss": 3.0079,
      "step": 149974
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.0093612670898438,
      "learning_rate": 0.00016302691876690339,
      "loss": 2.8423,
      "step": 149975
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.1393511295318604,
      "learning_rate": 0.0001630232794721963,
      "loss": 2.9525,
      "step": 149976
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3121585845947266,
      "learning_rate": 0.00016301964020295573,
      "loss": 3.0283,
      "step": 149977
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.783010959625244,
      "learning_rate": 0.00016301600095918232,
      "loss": 2.9349,
      "step": 149978
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.215810775756836,
      "learning_rate": 0.0001630123617408769,
      "loss": 2.8149,
      "step": 149979
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.512770175933838,
      "learning_rate": 0.00016300872254803998,
      "loss": 2.8732,
      "step": 149980
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7015178203582764,
      "learning_rate": 0.00016300508338067243,
      "loss": 2.8478,
      "step": 149981
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.072901725769043,
      "learning_rate": 0.00016300144423877483,
      "loss": 2.9757,
      "step": 149982
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3523449897766113,
      "learning_rate": 0.00016299780512234782,
      "loss": 3.0446,
      "step": 149983
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.2562923431396484,
      "learning_rate": 0.00016299416603139202,
      "loss": 2.9295,
      "step": 149984
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.024346113204956,
      "learning_rate": 0.00016299052696590828,
      "loss": 2.9784,
      "step": 149985
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.10800838470459,
      "learning_rate": 0.00016298688792589712,
      "loss": 2.9273,
      "step": 149986
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.998705267906189,
      "learning_rate": 0.00016298324891135936,
      "loss": 2.8553,
      "step": 149987
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4376370906829834,
      "learning_rate": 0.00016297960992229548,
      "loss": 3.0518,
      "step": 149988
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7029964923858643,
      "learning_rate": 0.0001629759709587065,
      "loss": 3.2582,
      "step": 149989
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.017218828201294,
      "learning_rate": 0.00016297233202059266,
      "loss": 3.1432,
      "step": 149990
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.565403461456299,
      "learning_rate": 0.00016296869310795497,
      "loss": 2.9798,
      "step": 149991
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5764060020446777,
      "learning_rate": 0.00016296505422079387,
      "loss": 3.0448,
      "step": 149992
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.6788434982299805,
      "learning_rate": 0.00016296141535911027,
      "loss": 2.9752,
      "step": 149993
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3863768577575684,
      "learning_rate": 0.0001629577765229046,
      "loss": 2.8588,
      "step": 149994
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.9114131927490234,
      "learning_rate": 0.0001629541377121779,
      "loss": 2.8103,
      "step": 149995
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.142054080963135,
      "learning_rate": 0.0001629504989269304,
      "loss": 2.8352,
      "step": 149996
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.652644395828247,
      "learning_rate": 0.0001629468601671631,
      "loss": 2.8805,
      "step": 149997
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2363038063049316,
      "learning_rate": 0.00016294322143287646,
      "loss": 3.003,
      "step": 149998
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4247658252716064,
      "learning_rate": 0.00016293958272407135,
      "loss": 2.9269,
      "step": 149999
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.4562199115753174,
      "learning_rate": 0.00016293594404074823,
      "loss": 2.7271,
      "step": 150000
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7735466957092285,
      "learning_rate": 0.00016293230538290808,
      "loss": 2.9608,
      "step": 150001
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.771761894226074,
      "learning_rate": 0.00016292866675055137,
      "loss": 3.1564,
      "step": 150002
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4326701164245605,
      "learning_rate": 0.00016292502814367878,
      "loss": 2.7499,
      "step": 150003
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7738959789276123,
      "learning_rate": 0.00016292138956229093,
      "loss": 2.8234,
      "step": 150004
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.79238224029541,
      "learning_rate": 0.0001629177510063887,
      "loss": 2.5518,
      "step": 150005
    },
    {
      "epoch": 1.95,
      "grad_norm": 5.068351745605469,
      "learning_rate": 0.00016291411247597252,
      "loss": 2.8524,
      "step": 150006
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.585684061050415,
      "learning_rate": 0.00016291047397104332,
      "loss": 3.0623,
      "step": 150007
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1518280506134033,
      "learning_rate": 0.00016290683549160165,
      "loss": 3.0942,
      "step": 150008
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.6481685638427734,
      "learning_rate": 0.00016290319703764818,
      "loss": 3.1839,
      "step": 150009
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.066131114959717,
      "learning_rate": 0.00016289955860918347,
      "loss": 2.8914,
      "step": 150010
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.27609920501709,
      "learning_rate": 0.00016289592020620846,
      "loss": 2.7573,
      "step": 150011
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.3219997882843018,
      "learning_rate": 0.00016289228182872354,
      "loss": 3.0744,
      "step": 150012
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.336362600326538,
      "learning_rate": 0.0001628886434767297,
      "loss": 2.8192,
      "step": 150013
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2924437522888184,
      "learning_rate": 0.00016288500515022737,
      "loss": 2.8161,
      "step": 150014
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.307982921600342,
      "learning_rate": 0.00016288136684921737,
      "loss": 3.0551,
      "step": 150015
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.795896530151367,
      "learning_rate": 0.0001628777285737002,
      "loss": 2.8036,
      "step": 150016
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4547808170318604,
      "learning_rate": 0.00016287409032367673,
      "loss": 2.772,
      "step": 150017
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.98636794090271,
      "learning_rate": 0.00016287045209914744,
      "loss": 2.9346,
      "step": 150018
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.3533942699432373,
      "learning_rate": 0.00016286681390011325,
      "loss": 2.8935,
      "step": 150019
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.293511152267456,
      "learning_rate": 0.00016286317572657472,
      "loss": 2.9605,
      "step": 150020
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2326009273529053,
      "learning_rate": 0.0001628595375785325,
      "loss": 2.8,
      "step": 150021
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4731204509735107,
      "learning_rate": 0.00016285589945598715,
      "loss": 2.8824,
      "step": 150022
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4141547679901123,
      "learning_rate": 0.0001628522613589396,
      "loss": 2.9622,
      "step": 150023
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.532819986343384,
      "learning_rate": 0.0001628486232873903,
      "loss": 2.9737,
      "step": 150024
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.0233519077301025,
      "learning_rate": 0.0001628449852413402,
      "loss": 2.8975,
      "step": 150025
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2390382289886475,
      "learning_rate": 0.00016284134722078969,
      "loss": 3.1089,
      "step": 150026
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2388103008270264,
      "learning_rate": 0.00016283770922573962,
      "loss": 3.1715,
      "step": 150027
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.47379994392395,
      "learning_rate": 0.0001628340712561905,
      "loss": 2.9538,
      "step": 150028
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.509953022003174,
      "learning_rate": 0.00016283043331214323,
      "loss": 2.823,
      "step": 150029
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.128732442855835,
      "learning_rate": 0.00016282679539359826,
      "loss": 2.7813,
      "step": 150030
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.691347599029541,
      "learning_rate": 0.00016282315750055644,
      "loss": 2.814,
      "step": 150031
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.9771993160247803,
      "learning_rate": 0.00016281951963301845,
      "loss": 2.9049,
      "step": 150032
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.430636405944824,
      "learning_rate": 0.0001628158817909849,
      "loss": 2.9154,
      "step": 150033
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.135883331298828,
      "learning_rate": 0.00016281224397445633,
      "loss": 2.9445,
      "step": 150034
    },
    {
      "epoch": 1.95,
      "grad_norm": 6.302051544189453,
      "learning_rate": 0.00016280860618343368,
      "loss": 2.8505,
      "step": 150035
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.0023341178894043,
      "learning_rate": 0.00016280496841791735,
      "loss": 3.1476,
      "step": 150036
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.080256700515747,
      "learning_rate": 0.0001628013306779083,
      "loss": 2.9312,
      "step": 150037
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.9215264320373535,
      "learning_rate": 0.00016279769296340707,
      "loss": 3.0564,
      "step": 150038
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.812910795211792,
      "learning_rate": 0.00016279405527441437,
      "loss": 3.0457,
      "step": 150039
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.550420045852661,
      "learning_rate": 0.0001627904176109307,
      "loss": 3.0278,
      "step": 150040
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.9364047050476074,
      "learning_rate": 0.000162786779972957,
      "loss": 3.0534,
      "step": 150041
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.0960288047790527,
      "learning_rate": 0.00016278314236049373,
      "loss": 3.021,
      "step": 150042
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.055746078491211,
      "learning_rate": 0.00016277950477354173,
      "loss": 3.0214,
      "step": 150043
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.789518356323242,
      "learning_rate": 0.00016277586721210168,
      "loss": 3.0722,
      "step": 150044
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6094298362731934,
      "learning_rate": 0.00016277222967617415,
      "loss": 3.109,
      "step": 150045
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.470339298248291,
      "learning_rate": 0.00016276859216575972,
      "loss": 2.832,
      "step": 150046
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0400376319885254,
      "learning_rate": 0.00016276495468085938,
      "loss": 2.8638,
      "step": 150047
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.575575113296509,
      "learning_rate": 0.00016276131722147344,
      "loss": 2.8937,
      "step": 150048
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.8797144889831543,
      "learning_rate": 0.0001627576797876029,
      "loss": 2.8897,
      "step": 150049
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.733503818511963,
      "learning_rate": 0.0001627540423792483,
      "loss": 2.9129,
      "step": 150050
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.971017599105835,
      "learning_rate": 0.00016275040499641019,
      "loss": 2.8854,
      "step": 150051
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7252390384674072,
      "learning_rate": 0.00016274676763908952,
      "loss": 3.1501,
      "step": 150052
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.024526834487915,
      "learning_rate": 0.00016274313030728677,
      "loss": 2.9846,
      "step": 150053
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.3854572772979736,
      "learning_rate": 0.00016273949300100256,
      "loss": 3.2408,
      "step": 150054
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.226297616958618,
      "learning_rate": 0.0001627358557202378,
      "loss": 2.9715,
      "step": 150055
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.8121368885040283,
      "learning_rate": 0.00016273221846499302,
      "loss": 2.9281,
      "step": 150056
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.424928903579712,
      "learning_rate": 0.00016272858123526882,
      "loss": 2.6135,
      "step": 150057
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.368185043334961,
      "learning_rate": 0.00016272494403106607,
      "loss": 2.889,
      "step": 150058
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0470011234283447,
      "learning_rate": 0.00016272130685238536,
      "loss": 3.0032,
      "step": 150059
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3092331886291504,
      "learning_rate": 0.00016271766969922717,
      "loss": 2.9369,
      "step": 150060
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.98307466506958,
      "learning_rate": 0.00016271403257159256,
      "loss": 2.9097,
      "step": 150061
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2123148441314697,
      "learning_rate": 0.00016271039546948194,
      "loss": 2.9793,
      "step": 150062
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3540446758270264,
      "learning_rate": 0.00016270675839289597,
      "loss": 3.0742,
      "step": 150063
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.960062026977539,
      "learning_rate": 0.00016270312134183552,
      "loss": 3.0502,
      "step": 150064
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.176391124725342,
      "learning_rate": 0.00016269948431630097,
      "loss": 2.9506,
      "step": 150065
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.141201972961426,
      "learning_rate": 0.00016269584731629338,
      "loss": 2.8923,
      "step": 150066
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.254721164703369,
      "learning_rate": 0.00016269221034181317,
      "loss": 2.8553,
      "step": 150067
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2307040691375732,
      "learning_rate": 0.00016268857339286097,
      "loss": 2.9316,
      "step": 150068
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.503530502319336,
      "learning_rate": 0.00016268493646943768,
      "loss": 2.8602,
      "step": 150069
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.128610849380493,
      "learning_rate": 0.00016268129957154386,
      "loss": 2.805,
      "step": 150070
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2733428478240967,
      "learning_rate": 0.00016267766269918002,
      "loss": 3.1094,
      "step": 150071
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.954430103302002,
      "learning_rate": 0.00016267402585234717,
      "loss": 2.9191,
      "step": 150072
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.8472158908843994,
      "learning_rate": 0.00016267038903104567,
      "loss": 3.267,
      "step": 150073
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.956360340118408,
      "learning_rate": 0.00016266675223527643,
      "loss": 3.096,
      "step": 150074
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.725759506225586,
      "learning_rate": 0.00016266311546504007,
      "loss": 2.8459,
      "step": 150075
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.179020881652832,
      "learning_rate": 0.00016265947872033722,
      "loss": 2.7136,
      "step": 150076
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7983012199401855,
      "learning_rate": 0.0001626558420011684,
      "loss": 2.9021,
      "step": 150077
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.519059896469116,
      "learning_rate": 0.00016265220530753465,
      "loss": 3.0805,
      "step": 150078
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.036647319793701,
      "learning_rate": 0.00016264856863943628,
      "loss": 2.8427,
      "step": 150079
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6328043937683105,
      "learning_rate": 0.00016264493199687427,
      "loss": 3.1042,
      "step": 150080
    },
    {
      "epoch": 1.95,
      "grad_norm": 1.9435709714889526,
      "learning_rate": 0.00016264129537984915,
      "loss": 3.2335,
      "step": 150081
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.3966729640960693,
      "learning_rate": 0.0001626376587883616,
      "loss": 2.984,
      "step": 150082
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.521123170852661,
      "learning_rate": 0.00016263402222241219,
      "loss": 3.089,
      "step": 150083
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.677281141281128,
      "learning_rate": 0.00016263038568200183,
      "loss": 2.9013,
      "step": 150084
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0212795734405518,
      "learning_rate": 0.00016262674916713097,
      "loss": 2.9799,
      "step": 150085
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3511173725128174,
      "learning_rate": 0.00016262311267780047,
      "loss": 2.8985,
      "step": 150086
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.9443209171295166,
      "learning_rate": 0.00016261947621401096,
      "loss": 2.734,
      "step": 150087
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.4692063331604004,
      "learning_rate": 0.00016261583977576305,
      "loss": 2.9251,
      "step": 150088
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.118675708770752,
      "learning_rate": 0.00016261220336305735,
      "loss": 2.7987,
      "step": 150089
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.5419552326202393,
      "learning_rate": 0.00016260856697589474,
      "loss": 2.8352,
      "step": 150090
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.3265180587768555,
      "learning_rate": 0.00016260493061427572,
      "loss": 2.9292,
      "step": 150091
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.184896469116211,
      "learning_rate": 0.0001626012942782011,
      "loss": 2.815,
      "step": 150092
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3817343711853027,
      "learning_rate": 0.00016259765796767153,
      "loss": 2.7865,
      "step": 150093
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2905046939849854,
      "learning_rate": 0.00016259402168268764,
      "loss": 2.9674,
      "step": 150094
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7485389709472656,
      "learning_rate": 0.00016259038542324998,
      "loss": 3.2392,
      "step": 150095
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.646148681640625,
      "learning_rate": 0.0001625867491893595,
      "loss": 3.0041,
      "step": 150096
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.532769203186035,
      "learning_rate": 0.0001625831129810166,
      "loss": 2.8934,
      "step": 150097
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.740321636199951,
      "learning_rate": 0.00016257947679822223,
      "loss": 2.7155,
      "step": 150098
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.749587297439575,
      "learning_rate": 0.00016257584064097692,
      "loss": 2.7852,
      "step": 150099
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5654680728912354,
      "learning_rate": 0.00016257220450928132,
      "loss": 2.8597,
      "step": 150100
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5851941108703613,
      "learning_rate": 0.0001625685684031361,
      "loss": 2.8893,
      "step": 150101
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2879655361175537,
      "learning_rate": 0.00016256493232254206,
      "loss": 3.0642,
      "step": 150102
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3240227699279785,
      "learning_rate": 0.0001625612962674997,
      "loss": 2.767,
      "step": 150103
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.3183910846710205,
      "learning_rate": 0.00016255766023800985,
      "loss": 3.0968,
      "step": 150104
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6825172901153564,
      "learning_rate": 0.0001625540242340731,
      "loss": 3.1708,
      "step": 150105
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.166184902191162,
      "learning_rate": 0.00016255038825569023,
      "loss": 3.026,
      "step": 150106
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3964195251464844,
      "learning_rate": 0.00016254675230286173,
      "loss": 2.6305,
      "step": 150107
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5428943634033203,
      "learning_rate": 0.0001625431163755884,
      "loss": 3.1,
      "step": 150108
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7926833629608154,
      "learning_rate": 0.00016253948047387088,
      "loss": 3.1619,
      "step": 150109
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.699737548828125,
      "learning_rate": 0.00016253584459770994,
      "loss": 3.0059,
      "step": 150110
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.6577982902526855,
      "learning_rate": 0.00016253220874710622,
      "loss": 3.0149,
      "step": 150111
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.180706024169922,
      "learning_rate": 0.00016252857292206032,
      "loss": 2.8226,
      "step": 150112
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.460407257080078,
      "learning_rate": 0.00016252493712257284,
      "loss": 3.0044,
      "step": 150113
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.245598316192627,
      "learning_rate": 0.00016252130134864468,
      "loss": 3.2209,
      "step": 150114
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.685297727584839,
      "learning_rate": 0.00016251766560027627,
      "loss": 3.0195,
      "step": 150115
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4674344062805176,
      "learning_rate": 0.0001625140298774686,
      "loss": 2.543,
      "step": 150116
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3146002292633057,
      "learning_rate": 0.00016251039418022214,
      "loss": 3.102,
      "step": 150117
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4782395362854004,
      "learning_rate": 0.00016250675850853756,
      "loss": 3.086,
      "step": 150118
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.103407621383667,
      "learning_rate": 0.00016250312286241546,
      "loss": 3.0815,
      "step": 150119
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.949518918991089,
      "learning_rate": 0.00016249948724185678,
      "loss": 2.8145,
      "step": 150120
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1156015396118164,
      "learning_rate": 0.00016249585164686191,
      "loss": 2.6699,
      "step": 150121
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.399085760116577,
      "learning_rate": 0.00016249221607743177,
      "loss": 3.1746,
      "step": 150122
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.11865496635437,
      "learning_rate": 0.0001624885805335669,
      "loss": 2.8678,
      "step": 150123
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2540581226348877,
      "learning_rate": 0.00016248494501526804,
      "loss": 3.0238,
      "step": 150124
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.948584794998169,
      "learning_rate": 0.00016248130952253569,
      "loss": 3.0979,
      "step": 150125
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.928955078125,
      "learning_rate": 0.00016247767405537077,
      "loss": 3.0528,
      "step": 150126
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.7273075580596924,
      "learning_rate": 0.00016247403861377376,
      "loss": 2.8284,
      "step": 150127
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.3851749897003174,
      "learning_rate": 0.00016247040319774546,
      "loss": 3.1109,
      "step": 150128
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.5625734329223633,
      "learning_rate": 0.0001624667678072866,
      "loss": 3.2012,
      "step": 150129
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1324679851531982,
      "learning_rate": 0.0001624631324423977,
      "loss": 2.868,
      "step": 150130
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.106876850128174,
      "learning_rate": 0.00016245949710307943,
      "loss": 3.0586,
      "step": 150131
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.022305965423584,
      "learning_rate": 0.00016245586178933262,
      "loss": 2.7295,
      "step": 150132
    },
    {
      "epoch": 1.95,
      "grad_norm": 4.024069309234619,
      "learning_rate": 0.00016245222650115775,
      "loss": 2.9996,
      "step": 150133
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.405550003051758,
      "learning_rate": 0.00016244859123855571,
      "loss": 3.1469,
      "step": 150134
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.382112503051758,
      "learning_rate": 0.00016244495600152697,
      "loss": 3.1606,
      "step": 150135
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.2645931243896484,
      "learning_rate": 0.00016244132079007247,
      "loss": 3.0287,
      "step": 150136
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.0500755310058594,
      "learning_rate": 0.00016243768560419267,
      "loss": 2.7292,
      "step": 150137
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.050239086151123,
      "learning_rate": 0.0001624340504438883,
      "loss": 3.0963,
      "step": 150138
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.433133840560913,
      "learning_rate": 0.00016243041530915995,
      "loss": 2.7468,
      "step": 150139
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.2695677280426025,
      "learning_rate": 0.00016242678020000851,
      "loss": 2.9362,
      "step": 150140
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.990565776824951,
      "learning_rate": 0.00016242314511643439,
      "loss": 3.0711,
      "step": 150141
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.193626642227173,
      "learning_rate": 0.00016241951005843848,
      "loss": 3.0335,
      "step": 150142
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.4961183071136475,
      "learning_rate": 0.00016241587502602146,
      "loss": 3.0035,
      "step": 150143
    },
    {
      "epoch": 1.95,
      "grad_norm": 2.1855881214141846,
      "learning_rate": 0.00016241224001918388,
      "loss": 3.1146,
      "step": 150144
    },
    {
      "epoch": 1.95,
      "grad_norm": 3.5408382415771484,
      "learning_rate": 0.00016240860503792637,
      "loss": 3.1995,
      "step": 150145
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.234342575073242,
      "learning_rate": 0.00016240497008224977,
      "loss": 3.0012,
      "step": 150146
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.098761796951294,
      "learning_rate": 0.0001624013351521546,
      "loss": 2.9978,
      "step": 150147
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.0457491874694824,
      "learning_rate": 0.00016239770024764174,
      "loss": 2.9089,
      "step": 150148
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.595578193664551,
      "learning_rate": 0.00016239406536871172,
      "loss": 3.2033,
      "step": 150149
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1649537086486816,
      "learning_rate": 0.00016239043051536513,
      "loss": 3.3011,
      "step": 150150
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.296720504760742,
      "learning_rate": 0.00016238679568760287,
      "loss": 2.8729,
      "step": 150151
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.456719160079956,
      "learning_rate": 0.00016238316088542553,
      "loss": 3.2764,
      "step": 150152
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.667645215988159,
      "learning_rate": 0.00016237952610883365,
      "loss": 3.1078,
      "step": 150153
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5254271030426025,
      "learning_rate": 0.00016237589135782807,
      "loss": 3.183,
      "step": 150154
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7311601638793945,
      "learning_rate": 0.00016237225663240945,
      "loss": 3.0437,
      "step": 150155
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.595010280609131,
      "learning_rate": 0.0001623686219325783,
      "loss": 2.8889,
      "step": 150156
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7756271362304688,
      "learning_rate": 0.00016236498725833554,
      "loss": 2.8036,
      "step": 150157
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.2198634147644043,
      "learning_rate": 0.0001623613526096816,
      "loss": 2.7197,
      "step": 150158
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.4106366634368896,
      "learning_rate": 0.00016235771798661744,
      "loss": 2.8366,
      "step": 150159
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5570058822631836,
      "learning_rate": 0.00016235408338914354,
      "loss": 3.0523,
      "step": 150160
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.304989814758301,
      "learning_rate": 0.0001623504488172606,
      "loss": 2.9071,
      "step": 150161
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.104353904724121,
      "learning_rate": 0.00016234681427096926,
      "loss": 2.9377,
      "step": 150162
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.366769313812256,
      "learning_rate": 0.00016234317975027032,
      "loss": 2.9579,
      "step": 150163
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6792805194854736,
      "learning_rate": 0.00016233954525516424,
      "loss": 2.921,
      "step": 150164
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1863389015197754,
      "learning_rate": 0.00016233591078565195,
      "loss": 3.1007,
      "step": 150165
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.5626578330993652,
      "learning_rate": 0.00016233227634173406,
      "loss": 2.6899,
      "step": 150166
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.625331401824951,
      "learning_rate": 0.00016232864192341118,
      "loss": 2.8409,
      "step": 150167
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.613328695297241,
      "learning_rate": 0.00016232500753068387,
      "loss": 2.8875,
      "step": 150168
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1400115489959717,
      "learning_rate": 0.00016232137316355308,
      "loss": 2.9392,
      "step": 150169
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3009397983551025,
      "learning_rate": 0.00016231773882201923,
      "loss": 2.924,
      "step": 150170
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.501231670379639,
      "learning_rate": 0.00016231410450608325,
      "loss": 3.0587,
      "step": 150171
    },
    {
      "epoch": 1.96,
      "grad_norm": 5.200913906097412,
      "learning_rate": 0.00016231047021574564,
      "loss": 2.8101,
      "step": 150172
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.2877798080444336,
      "learning_rate": 0.0001623068359510071,
      "loss": 2.8402,
      "step": 150173
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.700428009033203,
      "learning_rate": 0.00016230320171186822,
      "loss": 2.8532,
      "step": 150174
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.589268445968628,
      "learning_rate": 0.00016229956749832988,
      "loss": 2.9153,
      "step": 150175
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.16178035736084,
      "learning_rate": 0.00016229593331039256,
      "loss": 2.9387,
      "step": 150176
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.8278613090515137,
      "learning_rate": 0.0001622922991480571,
      "loss": 3.1851,
      "step": 150177
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.375619888305664,
      "learning_rate": 0.00016228866501132415,
      "loss": 2.5625,
      "step": 150178
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.491103172302246,
      "learning_rate": 0.00016228503090019435,
      "loss": 2.8636,
      "step": 150179
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4847373962402344,
      "learning_rate": 0.00016228139681466816,
      "loss": 2.9116,
      "step": 150180
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6234283447265625,
      "learning_rate": 0.00016227776275474663,
      "loss": 2.8405,
      "step": 150181
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6395108699798584,
      "learning_rate": 0.0001622741287204302,
      "loss": 3.0764,
      "step": 150182
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6491544246673584,
      "learning_rate": 0.00016227049471171962,
      "loss": 2.9777,
      "step": 150183
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6959941387176514,
      "learning_rate": 0.0001622668607286156,
      "loss": 2.9442,
      "step": 150184
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2947847843170166,
      "learning_rate": 0.00016226322677111876,
      "loss": 2.9873,
      "step": 150185
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2818522453308105,
      "learning_rate": 0.0001622595928392297,
      "loss": 2.9581,
      "step": 150186
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4352428913116455,
      "learning_rate": 0.00016225595893294928,
      "loss": 2.7751,
      "step": 150187
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.8691117763519287,
      "learning_rate": 0.00016225232505227795,
      "loss": 2.8865,
      "step": 150188
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.201854705810547,
      "learning_rate": 0.00016224869119721666,
      "loss": 2.9004,
      "step": 150189
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3684933185577393,
      "learning_rate": 0.0001622450573677659,
      "loss": 3.008,
      "step": 150190
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7019572257995605,
      "learning_rate": 0.0001622414235639264,
      "loss": 2.8915,
      "step": 150191
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3937339782714844,
      "learning_rate": 0.00016223778978569873,
      "loss": 3.0036,
      "step": 150192
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.109792947769165,
      "learning_rate": 0.00016223415603308373,
      "loss": 2.9326,
      "step": 150193
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5542075634002686,
      "learning_rate": 0.0001622305223060819,
      "loss": 3.0934,
      "step": 150194
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.300875186920166,
      "learning_rate": 0.00016222688860469418,
      "loss": 3.1519,
      "step": 150195
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.164182186126709,
      "learning_rate": 0.00016222325492892104,
      "loss": 3.0725,
      "step": 150196
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.319575309753418,
      "learning_rate": 0.00016221962127876318,
      "loss": 3.0326,
      "step": 150197
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.1215405464172363,
      "learning_rate": 0.0001622159876542212,
      "loss": 2.8293,
      "step": 150198
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.249669313430786,
      "learning_rate": 0.00016221235405529594,
      "loss": 2.9032,
      "step": 150199
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.146099090576172,
      "learning_rate": 0.00016220872048198793,
      "loss": 3.0669,
      "step": 150200
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1570796966552734,
      "learning_rate": 0.00016220508693429804,
      "loss": 2.8118,
      "step": 150201
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9233520030975342,
      "learning_rate": 0.00016220145341222668,
      "loss": 3.0619,
      "step": 150202
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.430654048919678,
      "learning_rate": 0.00016219781991577497,
      "loss": 2.96,
      "step": 150203
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2510344982147217,
      "learning_rate": 0.00016219418644494298,
      "loss": 2.8743,
      "step": 150204
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2201757431030273,
      "learning_rate": 0.00016219055299973185,
      "loss": 3.0392,
      "step": 150205
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.5538578033447266,
      "learning_rate": 0.000162186919580142,
      "loss": 2.9246,
      "step": 150206
    },
    {
      "epoch": 1.96,
      "grad_norm": 5.320285797119141,
      "learning_rate": 0.00016218328618617425,
      "loss": 2.9812,
      "step": 150207
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.446348190307617,
      "learning_rate": 0.00016217965281782915,
      "loss": 2.8566,
      "step": 150208
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9430149793624878,
      "learning_rate": 0.00016217601947510768,
      "loss": 3.0997,
      "step": 150209
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.974045753479004,
      "learning_rate": 0.0001621723861580101,
      "loss": 2.9959,
      "step": 150210
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.315181732177734,
      "learning_rate": 0.00016216875286653734,
      "loss": 3.0582,
      "step": 150211
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.731027603149414,
      "learning_rate": 0.0001621651196006899,
      "loss": 2.5791,
      "step": 150212
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.010845184326172,
      "learning_rate": 0.00016216148636046868,
      "loss": 2.7504,
      "step": 150213
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7347500324249268,
      "learning_rate": 0.00016215785314587414,
      "loss": 2.9728,
      "step": 150214
    },
    {
      "epoch": 1.96,
      "grad_norm": 5.562656402587891,
      "learning_rate": 0.00016215421995690728,
      "loss": 2.8846,
      "step": 150215
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.321547508239746,
      "learning_rate": 0.0001621505867935683,
      "loss": 2.8381,
      "step": 150216
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7946531772613525,
      "learning_rate": 0.0001621469536558583,
      "loss": 3.1367,
      "step": 150217
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9652135372161865,
      "learning_rate": 0.00016214332054377765,
      "loss": 3.1245,
      "step": 150218
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.8245480060577393,
      "learning_rate": 0.00016213968745732723,
      "loss": 2.8659,
      "step": 150219
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.150546550750732,
      "learning_rate": 0.0001621360543965076,
      "loss": 2.9252,
      "step": 150220
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.461374759674072,
      "learning_rate": 0.00016213242136131967,
      "loss": 2.9913,
      "step": 150221
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1308679580688477,
      "learning_rate": 0.00016212878835176372,
      "loss": 3.0657,
      "step": 150222
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5506699085235596,
      "learning_rate": 0.0001621251553678407,
      "loss": 2.9873,
      "step": 150223
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.2584445476531982,
      "learning_rate": 0.0001621215224095511,
      "loss": 2.721,
      "step": 150224
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.523123025894165,
      "learning_rate": 0.00016211788947689587,
      "loss": 2.7587,
      "step": 150225
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.853285551071167,
      "learning_rate": 0.00016211425656987538,
      "loss": 2.8734,
      "step": 150226
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.53265118598938,
      "learning_rate": 0.00016211062368849056,
      "loss": 3.074,
      "step": 150227
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.8064568042755127,
      "learning_rate": 0.00016210699083274202,
      "loss": 3.0986,
      "step": 150228
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.800896406173706,
      "learning_rate": 0.0001621033580026304,
      "loss": 2.7484,
      "step": 150229
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3155786991119385,
      "learning_rate": 0.0001620997251981562,
      "loss": 3.0563,
      "step": 150230
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9646872282028198,
      "learning_rate": 0.00016209609241932043,
      "loss": 2.9874,
      "step": 150231
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9747791290283203,
      "learning_rate": 0.00016209245966612343,
      "loss": 2.9804,
      "step": 150232
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.123284101486206,
      "learning_rate": 0.0001620888269385662,
      "loss": 3.1522,
      "step": 150233
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7414400577545166,
      "learning_rate": 0.00016208519423664924,
      "loss": 2.8046,
      "step": 150234
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.220465660095215,
      "learning_rate": 0.00016208156156037314,
      "loss": 3.0967,
      "step": 150235
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9419833421707153,
      "learning_rate": 0.00016207792890973884,
      "loss": 2.8391,
      "step": 150236
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.2794137001037598,
      "learning_rate": 0.0001620742962847468,
      "loss": 2.9563,
      "step": 150237
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.375037908554077,
      "learning_rate": 0.00016207066368539763,
      "loss": 2.85,
      "step": 150238
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.622066020965576,
      "learning_rate": 0.00016206703111169225,
      "loss": 2.9555,
      "step": 150239
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.00824236869812,
      "learning_rate": 0.00016206339856363123,
      "loss": 3.1087,
      "step": 150240
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.203092575073242,
      "learning_rate": 0.00016205976604121512,
      "loss": 2.7231,
      "step": 150241
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.078458547592163,
      "learning_rate": 0.0001620561335444448,
      "loss": 2.998,
      "step": 150242
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.606215238571167,
      "learning_rate": 0.00016205250107332084,
      "loss": 3.1147,
      "step": 150243
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7468788623809814,
      "learning_rate": 0.00016204886862784384,
      "loss": 3.2101,
      "step": 150244
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9642115831375122,
      "learning_rate": 0.00016204523620801464,
      "loss": 3.1835,
      "step": 150245
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.5737380981445312,
      "learning_rate": 0.00016204160381383387,
      "loss": 2.7859,
      "step": 150246
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3643784523010254,
      "learning_rate": 0.00016203797144530204,
      "loss": 3.2146,
      "step": 150247
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.791891574859619,
      "learning_rate": 0.00016203433910242005,
      "loss": 2.8193,
      "step": 150248
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.2753021717071533,
      "learning_rate": 0.0001620307067851884,
      "loss": 2.924,
      "step": 150249
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.5382890701293945,
      "learning_rate": 0.00016202707449360792,
      "loss": 3.0191,
      "step": 150250
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.3288655281066895,
      "learning_rate": 0.00016202344222767924,
      "loss": 2.8733,
      "step": 150251
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3660950660705566,
      "learning_rate": 0.000162019809987403,
      "loss": 3.1648,
      "step": 150252
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.323354482650757,
      "learning_rate": 0.0001620161777727798,
      "loss": 3.0434,
      "step": 150253
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.203843832015991,
      "learning_rate": 0.00016201254558381048,
      "loss": 3.1307,
      "step": 150254
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.453449249267578,
      "learning_rate": 0.0001620089134204955,
      "loss": 3.0154,
      "step": 150255
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3718228340148926,
      "learning_rate": 0.00016200528128283581,
      "loss": 3.0901,
      "step": 150256
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2722036838531494,
      "learning_rate": 0.00016200164917083197,
      "loss": 2.7418,
      "step": 150257
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.286245822906494,
      "learning_rate": 0.00016199801708448455,
      "loss": 3.0896,
      "step": 150258
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4209601879119873,
      "learning_rate": 0.00016199438502379428,
      "loss": 3.1956,
      "step": 150259
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.0478739738464355,
      "learning_rate": 0.00016199075298876188,
      "loss": 2.9475,
      "step": 150260
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.216505765914917,
      "learning_rate": 0.00016198712097938796,
      "loss": 3.0564,
      "step": 150261
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2113282680511475,
      "learning_rate": 0.0001619834889956733,
      "loss": 2.9854,
      "step": 150262
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5108840465545654,
      "learning_rate": 0.00016197985703761856,
      "loss": 2.967,
      "step": 150263
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.260148525238037,
      "learning_rate": 0.00016197622510522437,
      "loss": 2.7872,
      "step": 150264
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.817964553833008,
      "learning_rate": 0.00016197259319849128,
      "loss": 2.8344,
      "step": 150265
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.478562831878662,
      "learning_rate": 0.0001619689613174202,
      "loss": 2.8857,
      "step": 150266
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.450406789779663,
      "learning_rate": 0.00016196532946201157,
      "loss": 2.8614,
      "step": 150267
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.62727427482605,
      "learning_rate": 0.0001619616976322663,
      "loss": 2.969,
      "step": 150268
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2849338054656982,
      "learning_rate": 0.00016195806582818486,
      "loss": 2.7681,
      "step": 150269
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4628727436065674,
      "learning_rate": 0.00016195443404976826,
      "loss": 2.9126,
      "step": 150270
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.230243682861328,
      "learning_rate": 0.0001619508022970167,
      "loss": 2.8901,
      "step": 150271
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.689117670059204,
      "learning_rate": 0.00016194717056993119,
      "loss": 3.1087,
      "step": 150272
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.101707935333252,
      "learning_rate": 0.0001619435388685122,
      "loss": 2.8987,
      "step": 150273
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.5055906772613525,
      "learning_rate": 0.00016193990719276063,
      "loss": 2.9671,
      "step": 150274
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1375856399536133,
      "learning_rate": 0.00016193627554267692,
      "loss": 3.0844,
      "step": 150275
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.263782262802124,
      "learning_rate": 0.00016193264391826213,
      "loss": 2.6402,
      "step": 150276
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.566357135772705,
      "learning_rate": 0.00016192901231951637,
      "loss": 3.0576,
      "step": 150277
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4894349575042725,
      "learning_rate": 0.00016192538074644074,
      "loss": 2.8778,
      "step": 150278
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9533699750900269,
      "learning_rate": 0.00016192174919903572,
      "loss": 2.942,
      "step": 150279
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0014078617095947,
      "learning_rate": 0.00016191811767730216,
      "loss": 2.8789,
      "step": 150280
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.039663314819336,
      "learning_rate": 0.0001619144861812405,
      "loss": 2.8754,
      "step": 150281
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1756560802459717,
      "learning_rate": 0.00016191085471085176,
      "loss": 2.7874,
      "step": 150282
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.9847612380981445,
      "learning_rate": 0.00016190722326613616,
      "loss": 2.8545,
      "step": 150283
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6311159133911133,
      "learning_rate": 0.00016190359184709477,
      "loss": 3.0389,
      "step": 150284
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3040542602539062,
      "learning_rate": 0.00016189996045372796,
      "loss": 2.8812,
      "step": 150285
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.8114625215530396,
      "learning_rate": 0.00016189632908603666,
      "loss": 3.0427,
      "step": 150286
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3543341159820557,
      "learning_rate": 0.00016189269774402138,
      "loss": 2.9791,
      "step": 150287
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.133817672729492,
      "learning_rate": 0.000161889066427683,
      "loss": 2.8575,
      "step": 150288
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2307932376861572,
      "learning_rate": 0.00016188543513702191,
      "loss": 2.8887,
      "step": 150289
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4387967586517334,
      "learning_rate": 0.00016188180387203898,
      "loss": 2.967,
      "step": 150290
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6808276176452637,
      "learning_rate": 0.00016187817263273472,
      "loss": 2.9527,
      "step": 150291
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7905595302581787,
      "learning_rate": 0.00016187454141911004,
      "loss": 3.1094,
      "step": 150292
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3873677253723145,
      "learning_rate": 0.0001618709102311654,
      "loss": 3.0335,
      "step": 150293
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5255048274993896,
      "learning_rate": 0.00016186727906890178,
      "loss": 2.9583,
      "step": 150294
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6633737087249756,
      "learning_rate": 0.0001618636479323194,
      "loss": 2.8805,
      "step": 150295
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4490320682525635,
      "learning_rate": 0.0001618600168214193,
      "loss": 3.1067,
      "step": 150296
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1842875480651855,
      "learning_rate": 0.0001618563857362019,
      "loss": 3.0971,
      "step": 150297
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.399678945541382,
      "learning_rate": 0.00016185275467666812,
      "loss": 3.0661,
      "step": 150298
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.093329429626465,
      "learning_rate": 0.00016184912364281847,
      "loss": 2.6667,
      "step": 150299
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7671730518341064,
      "learning_rate": 0.0001618454926346538,
      "loss": 2.9214,
      "step": 150300
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0334582328796387,
      "learning_rate": 0.0001618418616521745,
      "loss": 3.118,
      "step": 150301
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2625391483306885,
      "learning_rate": 0.0001618382306953815,
      "loss": 2.9073,
      "step": 150302
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.018498659133911,
      "learning_rate": 0.0001618345997642753,
      "loss": 2.9774,
      "step": 150303
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.085430860519409,
      "learning_rate": 0.00016183096885885668,
      "loss": 2.7875,
      "step": 150304
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3383476734161377,
      "learning_rate": 0.0001618273379791263,
      "loss": 3.0515,
      "step": 150305
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4468908309936523,
      "learning_rate": 0.00016182370712508495,
      "loss": 3.0758,
      "step": 150306
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.612455368041992,
      "learning_rate": 0.000161820076296733,
      "loss": 3.2535,
      "step": 150307
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6480953693389893,
      "learning_rate": 0.0001618164454940714,
      "loss": 2.8471,
      "step": 150308
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2812998294830322,
      "learning_rate": 0.00016181281471710064,
      "loss": 3.0462,
      "step": 150309
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.297528028488159,
      "learning_rate": 0.0001618091839658216,
      "loss": 2.8723,
      "step": 150310
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0138418674468994,
      "learning_rate": 0.00016180555324023474,
      "loss": 2.5241,
      "step": 150311
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6526708602905273,
      "learning_rate": 0.0001618019225403409,
      "loss": 3.269,
      "step": 150312
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4373254776000977,
      "learning_rate": 0.00016179829186614074,
      "loss": 2.8747,
      "step": 150313
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0316007137298584,
      "learning_rate": 0.00016179466121763486,
      "loss": 2.8666,
      "step": 150314
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.945013999938965,
      "learning_rate": 0.00016179103059482384,
      "loss": 3.2634,
      "step": 150315
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.36608624458313,
      "learning_rate": 0.00016178739999770864,
      "loss": 3.0596,
      "step": 150316
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6079518795013428,
      "learning_rate": 0.0001617837694262896,
      "loss": 3.2249,
      "step": 150317
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5472142696380615,
      "learning_rate": 0.0001617801388805677,
      "loss": 2.8624,
      "step": 150318
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2782773971557617,
      "learning_rate": 0.0001617765083605435,
      "loss": 2.9418,
      "step": 150319
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.13616943359375,
      "learning_rate": 0.00016177287786621749,
      "loss": 2.9603,
      "step": 150320
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.584902048110962,
      "learning_rate": 0.0001617692473975907,
      "loss": 3.248,
      "step": 150321
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.164658308029175,
      "learning_rate": 0.00016176561695466354,
      "loss": 2.9387,
      "step": 150322
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.5887539386749268,
      "learning_rate": 0.00016176198653743672,
      "loss": 2.8366,
      "step": 150323
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4457552433013916,
      "learning_rate": 0.00016175835614591104,
      "loss": 3.2672,
      "step": 150324
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4129674434661865,
      "learning_rate": 0.00016175472578008708,
      "loss": 3.1505,
      "step": 150325
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.0763792991638184,
      "learning_rate": 0.00016175109543996538,
      "loss": 2.9613,
      "step": 150326
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.373598337173462,
      "learning_rate": 0.00016174746512554695,
      "loss": 3.0481,
      "step": 150327
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1761155128479004,
      "learning_rate": 0.00016174383483683223,
      "loss": 3.013,
      "step": 150328
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.90897798538208,
      "learning_rate": 0.00016174020457382184,
      "loss": 2.927,
      "step": 150329
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4414892196655273,
      "learning_rate": 0.00016173657433651666,
      "loss": 2.9985,
      "step": 150330
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5131630897521973,
      "learning_rate": 0.00016173294412491726,
      "loss": 3.2257,
      "step": 150331
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4939684867858887,
      "learning_rate": 0.0001617293139390242,
      "loss": 2.9631,
      "step": 150332
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.389885902404785,
      "learning_rate": 0.00016172568377883842,
      "loss": 2.8383,
      "step": 150333
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.405872344970703,
      "learning_rate": 0.00016172205364436027,
      "loss": 2.8657,
      "step": 150334
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2436139583587646,
      "learning_rate": 0.00016171842353559075,
      "loss": 2.9152,
      "step": 150335
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1214241981506348,
      "learning_rate": 0.0001617147934525304,
      "loss": 2.9746,
      "step": 150336
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.143944025039673,
      "learning_rate": 0.00016171116339517986,
      "loss": 3.0798,
      "step": 150337
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.9128594398498535,
      "learning_rate": 0.0001617075333635397,
      "loss": 2.9494,
      "step": 150338
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.46264910697937,
      "learning_rate": 0.00016170390335761088,
      "loss": 2.8585,
      "step": 150339
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4647693634033203,
      "learning_rate": 0.00016170027337739375,
      "loss": 2.796,
      "step": 150340
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.380948305130005,
      "learning_rate": 0.00016169664342288931,
      "loss": 3.0541,
      "step": 150341
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1909284591674805,
      "learning_rate": 0.00016169301349409804,
      "loss": 2.8559,
      "step": 150342
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.340576171875,
      "learning_rate": 0.00016168938359102064,
      "loss": 3.1551,
      "step": 150343
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2370054721832275,
      "learning_rate": 0.00016168575371365768,
      "loss": 2.809,
      "step": 150344
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.297067880630493,
      "learning_rate": 0.0001616821238620101,
      "loss": 2.8981,
      "step": 150345
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.192556381225586,
      "learning_rate": 0.00016167849403607827,
      "loss": 2.9507,
      "step": 150346
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2791895866394043,
      "learning_rate": 0.00016167486423586316,
      "loss": 2.8465,
      "step": 150347
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.011007785797119,
      "learning_rate": 0.0001616712344613652,
      "loss": 3.02,
      "step": 150348
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.39361834526062,
      "learning_rate": 0.0001616676047125854,
      "loss": 2.9122,
      "step": 150349
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.509366273880005,
      "learning_rate": 0.00016166397498952394,
      "loss": 2.9852,
      "step": 150350
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0570483207702637,
      "learning_rate": 0.00016166034529218189,
      "loss": 2.9745,
      "step": 150351
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.087394952774048,
      "learning_rate": 0.00016165671562055968,
      "loss": 3.0199,
      "step": 150352
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.807313442230225,
      "learning_rate": 0.00016165308597465818,
      "loss": 3.1357,
      "step": 150353
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.28861403465271,
      "learning_rate": 0.0001616494563544779,
      "loss": 2.9671,
      "step": 150354
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3589134216308594,
      "learning_rate": 0.00016164582676001985,
      "loss": 3.0037,
      "step": 150355
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.824906826019287,
      "learning_rate": 0.00016164219719128422,
      "loss": 2.9269,
      "step": 150356
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.661766290664673,
      "learning_rate": 0.000161638567648272,
      "loss": 3.1276,
      "step": 150357
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.621047019958496,
      "learning_rate": 0.00016163493813098367,
      "loss": 2.7952,
      "step": 150358
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6194162368774414,
      "learning_rate": 0.00016163130863942014,
      "loss": 3.0167,
      "step": 150359
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.062797784805298,
      "learning_rate": 0.00016162767917358187,
      "loss": 2.9189,
      "step": 150360
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.6727609634399414,
      "learning_rate": 0.0001616240497334698,
      "loss": 2.9025,
      "step": 150361
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6355478763580322,
      "learning_rate": 0.00016162042031908424,
      "loss": 2.7088,
      "step": 150362
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.088214159011841,
      "learning_rate": 0.00016161679093042617,
      "loss": 2.9583,
      "step": 150363
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.1238491535186768,
      "learning_rate": 0.00016161316156749605,
      "loss": 3.1404,
      "step": 150364
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1235439777374268,
      "learning_rate": 0.00016160953223029473,
      "loss": 3.0328,
      "step": 150365
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5626142024993896,
      "learning_rate": 0.0001616059029188227,
      "loss": 2.6924,
      "step": 150366
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2246499061584473,
      "learning_rate": 0.00016160227363308099,
      "loss": 2.8538,
      "step": 150367
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.429609775543213,
      "learning_rate": 0.0001615986443730698,
      "loss": 2.6664,
      "step": 150368
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2776856422424316,
      "learning_rate": 0.00016159501513879015,
      "loss": 2.8654,
      "step": 150369
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4644458293914795,
      "learning_rate": 0.00016159138593024248,
      "loss": 2.9907,
      "step": 150370
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.262392520904541,
      "learning_rate": 0.00016158775674742775,
      "loss": 3.0262,
      "step": 150371
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2907745838165283,
      "learning_rate": 0.0001615841275903463,
      "loss": 2.7412,
      "step": 150372
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7082977294921875,
      "learning_rate": 0.00016158049845899918,
      "loss": 3.0405,
      "step": 150373
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9977633953094482,
      "learning_rate": 0.00016157686935338667,
      "loss": 3.1096,
      "step": 150374
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.944345474243164,
      "learning_rate": 0.00016157324027350975,
      "loss": 2.5337,
      "step": 150375
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4444773197174072,
      "learning_rate": 0.00016156961121936882,
      "loss": 2.8481,
      "step": 150376
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.701735258102417,
      "learning_rate": 0.00016156598219096486,
      "loss": 2.9161,
      "step": 150377
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.0740795135498047,
      "learning_rate": 0.0001615623531882983,
      "loss": 2.8781,
      "step": 150378
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1998589038848877,
      "learning_rate": 0.00016155872421137011,
      "loss": 3.0531,
      "step": 150379
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.161770820617676,
      "learning_rate": 0.00016155509526018054,
      "loss": 2.9323,
      "step": 150380
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6611998081207275,
      "learning_rate": 0.0001615514663347306,
      "loss": 2.9499,
      "step": 150381
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.780459403991699,
      "learning_rate": 0.0001615478374350208,
      "loss": 3.038,
      "step": 150382
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.8215067386627197,
      "learning_rate": 0.0001615442085610519,
      "loss": 3.0388,
      "step": 150383
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.472505807876587,
      "learning_rate": 0.00016154057971282449,
      "loss": 3.1525,
      "step": 150384
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.981285333633423,
      "learning_rate": 0.0001615369508903395,
      "loss": 3.1156,
      "step": 150385
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.6329307556152344,
      "learning_rate": 0.00016153332209359718,
      "loss": 3.2002,
      "step": 150386
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.740138292312622,
      "learning_rate": 0.00016152969332259857,
      "loss": 2.8176,
      "step": 150387
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3982994556427,
      "learning_rate": 0.00016152606457734407,
      "loss": 2.7339,
      "step": 150388
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2746105194091797,
      "learning_rate": 0.0001615224358578346,
      "loss": 2.8769,
      "step": 150389
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.7324016094207764,
      "learning_rate": 0.00016151880716407062,
      "loss": 2.8769,
      "step": 150390
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.421041965484619,
      "learning_rate": 0.00016151517849605308,
      "loss": 2.8413,
      "step": 150391
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7510054111480713,
      "learning_rate": 0.00016151154985378233,
      "loss": 3.045,
      "step": 150392
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.486902952194214,
      "learning_rate": 0.00016150792123725927,
      "loss": 3.19,
      "step": 150393
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.8806004524230957,
      "learning_rate": 0.0001615042926464844,
      "loss": 2.8099,
      "step": 150394
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.783876895904541,
      "learning_rate": 0.0001615006640814586,
      "loss": 2.922,
      "step": 150395
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.999023914337158,
      "learning_rate": 0.00016149703554218237,
      "loss": 2.9459,
      "step": 150396
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7337863445281982,
      "learning_rate": 0.00016149340702865651,
      "loss": 3.0677,
      "step": 150397
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.026834487915039,
      "learning_rate": 0.00016148977854088168,
      "loss": 2.8279,
      "step": 150398
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.9824717044830322,
      "learning_rate": 0.0001614861500788585,
      "loss": 3.1454,
      "step": 150399
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.751682996749878,
      "learning_rate": 0.00016148252164258753,
      "loss": 3.0522,
      "step": 150400
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3649449348449707,
      "learning_rate": 0.00016147889323206972,
      "loss": 3.0852,
      "step": 150401
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6875784397125244,
      "learning_rate": 0.00016147526484730544,
      "loss": 2.8876,
      "step": 150402
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.656724214553833,
      "learning_rate": 0.0001614716364882957,
      "loss": 3.0256,
      "step": 150403
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0170841217041016,
      "learning_rate": 0.00016146800815504094,
      "loss": 2.9745,
      "step": 150404
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.001539707183838,
      "learning_rate": 0.00016146437984754193,
      "loss": 2.8819,
      "step": 150405
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4476983547210693,
      "learning_rate": 0.0001614607515657992,
      "loss": 3.0412,
      "step": 150406
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.036672830581665,
      "learning_rate": 0.0001614571233098136,
      "loss": 2.9579,
      "step": 150407
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6542413234710693,
      "learning_rate": 0.00016145349507958563,
      "loss": 2.9881,
      "step": 150408
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4819486141204834,
      "learning_rate": 0.0001614498668751162,
      "loss": 2.8705,
      "step": 150409
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5724806785583496,
      "learning_rate": 0.00016144623869640586,
      "loss": 2.975,
      "step": 150410
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.8607680797576904,
      "learning_rate": 0.00016144261054345515,
      "loss": 2.8975,
      "step": 150411
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1014256477355957,
      "learning_rate": 0.000161438982416265,
      "loss": 3.1703,
      "step": 150412
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0162508487701416,
      "learning_rate": 0.00016143535431483596,
      "loss": 2.9596,
      "step": 150413
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0559329986572266,
      "learning_rate": 0.0001614317262391686,
      "loss": 2.9846,
      "step": 150414
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4355199337005615,
      "learning_rate": 0.0001614280981892638,
      "loss": 3.1665,
      "step": 150415
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.696183919906616,
      "learning_rate": 0.00016142447016512213,
      "loss": 3.0734,
      "step": 150416
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.0648770332336426,
      "learning_rate": 0.00016142084216674417,
      "loss": 2.9485,
      "step": 150417
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5935239791870117,
      "learning_rate": 0.00016141721419413076,
      "loss": 2.8887,
      "step": 150418
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.8594890832901,
      "learning_rate": 0.00016141358624728244,
      "loss": 2.7581,
      "step": 150419
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.1525211334228516,
      "learning_rate": 0.00016140995832620005,
      "loss": 3.0056,
      "step": 150420
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.608290433883667,
      "learning_rate": 0.00016140633043088416,
      "loss": 2.9595,
      "step": 150421
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.4135351181030273,
      "learning_rate": 0.00016140270256133544,
      "loss": 3.05,
      "step": 150422
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1811563968658447,
      "learning_rate": 0.00016139907471755447,
      "loss": 3.0205,
      "step": 150423
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.2098851203918457,
      "learning_rate": 0.00016139544689954213,
      "loss": 3.0699,
      "step": 150424
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.979904890060425,
      "learning_rate": 0.00016139181910729893,
      "loss": 2.7055,
      "step": 150425
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3271965980529785,
      "learning_rate": 0.00016138819134082566,
      "loss": 2.9625,
      "step": 150426
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.8755152225494385,
      "learning_rate": 0.00016138456360012296,
      "loss": 2.6938,
      "step": 150427
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.23486328125,
      "learning_rate": 0.00016138093588519148,
      "loss": 2.853,
      "step": 150428
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.281437873840332,
      "learning_rate": 0.0001613773081960318,
      "loss": 3.0882,
      "step": 150429
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.98051381111145,
      "learning_rate": 0.00016137368053264483,
      "loss": 2.9255,
      "step": 150430
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2482335567474365,
      "learning_rate": 0.00016137005289503095,
      "loss": 2.9932,
      "step": 150431
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5525906085968018,
      "learning_rate": 0.00016136642528319112,
      "loss": 2.8631,
      "step": 150432
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.372305154800415,
      "learning_rate": 0.0001613627976971258,
      "loss": 3.1276,
      "step": 150433
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.6160411834716797,
      "learning_rate": 0.00016135917013683596,
      "loss": 3.0146,
      "step": 150434
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.722393035888672,
      "learning_rate": 0.00016135554260232184,
      "loss": 2.7626,
      "step": 150435
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6731760501861572,
      "learning_rate": 0.00016135191509358444,
      "loss": 3.155,
      "step": 150436
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9209400415420532,
      "learning_rate": 0.00016134828761062427,
      "loss": 3.0292,
      "step": 150437
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.629145622253418,
      "learning_rate": 0.00016134466015344214,
      "loss": 2.8489,
      "step": 150438
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0729293823242188,
      "learning_rate": 0.00016134103272203855,
      "loss": 2.9131,
      "step": 150439
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.322499990463257,
      "learning_rate": 0.00016133740531641455,
      "loss": 3.0483,
      "step": 150440
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.189235210418701,
      "learning_rate": 0.00016133377793657025,
      "loss": 2.9744,
      "step": 150441
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.7789433002471924,
      "learning_rate": 0.0001613301505825068,
      "loss": 2.9378,
      "step": 150442
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3890817165374756,
      "learning_rate": 0.00016132652325422453,
      "loss": 3.0044,
      "step": 150443
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.931159496307373,
      "learning_rate": 0.0001613228959517244,
      "loss": 3.0322,
      "step": 150444
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.425398588180542,
      "learning_rate": 0.00016131926867500685,
      "loss": 3.1428,
      "step": 150445
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.077410936355591,
      "learning_rate": 0.00016131564142407288,
      "loss": 2.682,
      "step": 150446
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6781351566314697,
      "learning_rate": 0.00016131201419892273,
      "loss": 3.2291,
      "step": 150447
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.556513786315918,
      "learning_rate": 0.00016130838699955744,
      "loss": 2.9678,
      "step": 150448
    },
    {
      "epoch": 1.96,
      "grad_norm": 5.128357887268066,
      "learning_rate": 0.00016130475982597736,
      "loss": 2.837,
      "step": 150449
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.3043980598449707,
      "learning_rate": 0.00016130113267818352,
      "loss": 2.8225,
      "step": 150450
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.433455467224121,
      "learning_rate": 0.00016129750555617627,
      "loss": 2.7526,
      "step": 150451
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1197454929351807,
      "learning_rate": 0.00016129387845995668,
      "loss": 3.0525,
      "step": 150452
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.2743639945983887,
      "learning_rate": 0.00016129025138952494,
      "loss": 3.0388,
      "step": 150453
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.9776275157928467,
      "learning_rate": 0.00016128662434488207,
      "loss": 2.7104,
      "step": 150454
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.528790235519409,
      "learning_rate": 0.00016128299732602852,
      "loss": 3.0468,
      "step": 150455
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0429186820983887,
      "learning_rate": 0.0001612793703329652,
      "loss": 3.1861,
      "step": 150456
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.44225811958313,
      "learning_rate": 0.00016127574336569255,
      "loss": 3.0856,
      "step": 150457
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5075430870056152,
      "learning_rate": 0.00016127211642421156,
      "loss": 3.0834,
      "step": 150458
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.278684616088867,
      "learning_rate": 0.0001612684895085225,
      "loss": 2.9071,
      "step": 150459
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.9501898288726807,
      "learning_rate": 0.00016126486261862637,
      "loss": 2.7298,
      "step": 150460
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.24391770362854,
      "learning_rate": 0.0001612612357545236,
      "loss": 2.9096,
      "step": 150461
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.24428391456604,
      "learning_rate": 0.0001612576089162151,
      "loss": 3.1229,
      "step": 150462
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3049862384796143,
      "learning_rate": 0.0001612539821037013,
      "loss": 3.0189,
      "step": 150463
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.556485176086426,
      "learning_rate": 0.00016125035531698324,
      "loss": 2.905,
      "step": 150464
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.76880145072937,
      "learning_rate": 0.00016124672855606114,
      "loss": 2.882,
      "step": 150465
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.327345609664917,
      "learning_rate": 0.00016124310182093597,
      "loss": 2.8982,
      "step": 150466
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6478328704833984,
      "learning_rate": 0.00016123947511160826,
      "loss": 3.0762,
      "step": 150467
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.65665340423584,
      "learning_rate": 0.00016123584842807885,
      "loss": 3.0222,
      "step": 150468
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1692867279052734,
      "learning_rate": 0.00016123222177034818,
      "loss": 2.702,
      "step": 150469
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5994653701782227,
      "learning_rate": 0.00016122859513841722,
      "loss": 3.0191,
      "step": 150470
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.490487813949585,
      "learning_rate": 0.00016122496853228642,
      "loss": 2.8555,
      "step": 150471
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0519330501556396,
      "learning_rate": 0.00016122134195195657,
      "loss": 2.9409,
      "step": 150472
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2760372161865234,
      "learning_rate": 0.00016121771539742816,
      "loss": 2.9907,
      "step": 150473
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.263136148452759,
      "learning_rate": 0.0001612140888687021,
      "loss": 3.1002,
      "step": 150474
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.8326480388641357,
      "learning_rate": 0.00016121046236577884,
      "loss": 2.9231,
      "step": 150475
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3142032623291016,
      "learning_rate": 0.00016120683588865932,
      "loss": 2.9846,
      "step": 150476
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1998450756073,
      "learning_rate": 0.00016120320943734405,
      "loss": 2.9219,
      "step": 150477
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3077785968780518,
      "learning_rate": 0.00016119958301183372,
      "loss": 2.7335,
      "step": 150478
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9387972354888916,
      "learning_rate": 0.00016119595661212888,
      "loss": 2.9693,
      "step": 150479
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.6454548835754395,
      "learning_rate": 0.0001611923302382305,
      "loss": 2.7677,
      "step": 150480
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.68872332572937,
      "learning_rate": 0.00016118870389013893,
      "loss": 3.0601,
      "step": 150481
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.8980982303619385,
      "learning_rate": 0.00016118507756785513,
      "loss": 2.9933,
      "step": 150482
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2565057277679443,
      "learning_rate": 0.00016118145127137963,
      "loss": 2.891,
      "step": 150483
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.505620002746582,
      "learning_rate": 0.00016117782500071315,
      "loss": 2.8675,
      "step": 150484
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.5711684226989746,
      "learning_rate": 0.00016117419875585616,
      "loss": 3.0534,
      "step": 150485
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3874688148498535,
      "learning_rate": 0.00016117057253680966,
      "loss": 2.9047,
      "step": 150486
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4336516857147217,
      "learning_rate": 0.00016116694634357406,
      "loss": 3.1819,
      "step": 150487
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4868152141571045,
      "learning_rate": 0.00016116332017615024,
      "loss": 3.0722,
      "step": 150488
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.8362667560577393,
      "learning_rate": 0.0001611596940345388,
      "loss": 2.6808,
      "step": 150489
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.102747678756714,
      "learning_rate": 0.0001611560679187404,
      "loss": 3.124,
      "step": 150490
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.9159462451934814,
      "learning_rate": 0.00016115244182875554,
      "loss": 2.9323,
      "step": 150491
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.464964389801025,
      "learning_rate": 0.00016114881576458523,
      "loss": 2.907,
      "step": 150492
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4292995929718018,
      "learning_rate": 0.00016114518972622986,
      "loss": 2.9315,
      "step": 150493
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4359419345855713,
      "learning_rate": 0.00016114156371369032,
      "loss": 2.9588,
      "step": 150494
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.252598285675049,
      "learning_rate": 0.00016113793772696717,
      "loss": 2.726,
      "step": 150495
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3056182861328125,
      "learning_rate": 0.00016113431176606103,
      "loss": 3.0106,
      "step": 150496
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0401628017425537,
      "learning_rate": 0.0001611306858309727,
      "loss": 2.8055,
      "step": 150497
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.203378438949585,
      "learning_rate": 0.00016112705992170283,
      "loss": 3.3128,
      "step": 150498
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.1462674140930176,
      "learning_rate": 0.00016112343403825195,
      "loss": 2.7471,
      "step": 150499
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.624260663986206,
      "learning_rate": 0.00016111980818062094,
      "loss": 2.9085,
      "step": 150500
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2069571018218994,
      "learning_rate": 0.0001611161823488104,
      "loss": 2.9771,
      "step": 150501
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7355940341949463,
      "learning_rate": 0.00016111255654282087,
      "loss": 2.9172,
      "step": 150502
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2525885105133057,
      "learning_rate": 0.00016110893076265323,
      "loss": 3.0822,
      "step": 150503
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.508073568344116,
      "learning_rate": 0.00016110530500830806,
      "loss": 2.7237,
      "step": 150504
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3972513675689697,
      "learning_rate": 0.00016110167927978593,
      "loss": 3.0187,
      "step": 150505
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2607569694519043,
      "learning_rate": 0.00016109805357708776,
      "loss": 2.8695,
      "step": 150506
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.193784236907959,
      "learning_rate": 0.00016109442790021404,
      "loss": 3.1117,
      "step": 150507
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1694729328155518,
      "learning_rate": 0.0001610908022491654,
      "loss": 2.689,
      "step": 150508
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.2078769207000732,
      "learning_rate": 0.0001610871766239427,
      "loss": 3.018,
      "step": 150509
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3932225704193115,
      "learning_rate": 0.00016108355102454646,
      "loss": 2.8361,
      "step": 150510
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.063554525375366,
      "learning_rate": 0.00016107992545097744,
      "loss": 3.083,
      "step": 150511
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.3568289279937744,
      "learning_rate": 0.00016107629990323632,
      "loss": 3.2532,
      "step": 150512
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.723633050918579,
      "learning_rate": 0.00016107267438132377,
      "loss": 3.1024,
      "step": 150513
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0334534645080566,
      "learning_rate": 0.00016106904888524027,
      "loss": 3.001,
      "step": 150514
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.840310573577881,
      "learning_rate": 0.0001610654234149868,
      "loss": 2.95,
      "step": 150515
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.287789821624756,
      "learning_rate": 0.00016106179797056373,
      "loss": 2.899,
      "step": 150516
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.7380011081695557,
      "learning_rate": 0.00016105817255197206,
      "loss": 2.9531,
      "step": 150517
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0674092769622803,
      "learning_rate": 0.00016105454715921216,
      "loss": 3.2156,
      "step": 150518
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.045948028564453,
      "learning_rate": 0.00016105092179228506,
      "loss": 2.7877,
      "step": 150519
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.481327533721924,
      "learning_rate": 0.000161047296451191,
      "loss": 2.9988,
      "step": 150520
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7309937477111816,
      "learning_rate": 0.00016104367113593102,
      "loss": 2.9584,
      "step": 150521
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.947002410888672,
      "learning_rate": 0.00016104004584650548,
      "loss": 2.6055,
      "step": 150522
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.170267105102539,
      "learning_rate": 0.00016103642058291537,
      "loss": 2.9507,
      "step": 150523
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.785681962966919,
      "learning_rate": 0.00016103279534516112,
      "loss": 3.0835,
      "step": 150524
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.331427812576294,
      "learning_rate": 0.00016102917013324365,
      "loss": 3.0027,
      "step": 150525
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5382516384124756,
      "learning_rate": 0.0001610255449471633,
      "loss": 3.1065,
      "step": 150526
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.197221517562866,
      "learning_rate": 0.00016102191978692104,
      "loss": 3.0022,
      "step": 150527
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.6719043254852295,
      "learning_rate": 0.00016101829465251728,
      "loss": 3.0061,
      "step": 150528
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.8171944618225098,
      "learning_rate": 0.000161014669543953,
      "loss": 2.6976,
      "step": 150529
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4196102619171143,
      "learning_rate": 0.00016101104446122858,
      "loss": 2.955,
      "step": 150530
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.407365322113037,
      "learning_rate": 0.00016100741940434506,
      "loss": 2.8376,
      "step": 150531
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.151677370071411,
      "learning_rate": 0.00016100379437330265,
      "loss": 3.1203,
      "step": 150532
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4590723514556885,
      "learning_rate": 0.00016100016936810237,
      "loss": 3.0104,
      "step": 150533
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4771833419799805,
      "learning_rate": 0.0001609965443887447,
      "loss": 2.8758,
      "step": 150534
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.25368595123291,
      "learning_rate": 0.00016099291943523047,
      "loss": 2.8169,
      "step": 150535
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7929818630218506,
      "learning_rate": 0.00016098929450756018,
      "loss": 2.8567,
      "step": 150536
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1840949058532715,
      "learning_rate": 0.00016098566960573473,
      "loss": 3.234,
      "step": 150537
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.217076063156128,
      "learning_rate": 0.00016098204472975465,
      "loss": 2.9526,
      "step": 150538
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.806863307952881,
      "learning_rate": 0.00016097841987962064,
      "loss": 3.0868,
      "step": 150539
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.649547815322876,
      "learning_rate": 0.00016097479505533326,
      "loss": 2.7315,
      "step": 150540
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.216658592224121,
      "learning_rate": 0.0001609711702568934,
      "loss": 3.0609,
      "step": 150541
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2113754749298096,
      "learning_rate": 0.00016096754548430148,
      "loss": 3.0039,
      "step": 150542
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.285421848297119,
      "learning_rate": 0.00016096392073755849,
      "loss": 2.8406,
      "step": 150543
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.343921422958374,
      "learning_rate": 0.00016096029601666488,
      "loss": 2.9362,
      "step": 150544
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2060322761535645,
      "learning_rate": 0.00016095667132162144,
      "loss": 2.8613,
      "step": 150545
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.280944585800171,
      "learning_rate": 0.00016095304665242859,
      "loss": 2.9623,
      "step": 150546
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.1758081912994385,
      "learning_rate": 0.00016094942200908734,
      "loss": 3.0222,
      "step": 150547
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.716200828552246,
      "learning_rate": 0.0001609457973915981,
      "loss": 2.8687,
      "step": 150548
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.53071665763855,
      "learning_rate": 0.0001609421727999618,
      "loss": 3.032,
      "step": 150549
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.0011157989501953,
      "learning_rate": 0.00016093854823417896,
      "loss": 2.9803,
      "step": 150550
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.125823736190796,
      "learning_rate": 0.00016093492369425024,
      "loss": 2.8046,
      "step": 150551
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9109854698181152,
      "learning_rate": 0.00016093129918017625,
      "loss": 3.1325,
      "step": 150552
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.8020143508911133,
      "learning_rate": 0.00016092767469195787,
      "loss": 2.9426,
      "step": 150553
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.4764976501464844,
      "learning_rate": 0.00016092405022959555,
      "loss": 2.9954,
      "step": 150554
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.8557722568511963,
      "learning_rate": 0.00016092042579309016,
      "loss": 2.7467,
      "step": 150555
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.231569528579712,
      "learning_rate": 0.00016091680138244236,
      "loss": 2.8989,
      "step": 150556
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.8584980964660645,
      "learning_rate": 0.00016091317699765267,
      "loss": 2.965,
      "step": 150557
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.0690274238586426,
      "learning_rate": 0.0001609095526387218,
      "loss": 2.7362,
      "step": 150558
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.612863779067993,
      "learning_rate": 0.00016090592830565056,
      "loss": 3.0185,
      "step": 150559
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2300591468811035,
      "learning_rate": 0.0001609023039984394,
      "loss": 3.1239,
      "step": 150560
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3131814002990723,
      "learning_rate": 0.00016089867971708926,
      "loss": 3.0686,
      "step": 150561
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.694910764694214,
      "learning_rate": 0.00016089505546160065,
      "loss": 2.6943,
      "step": 150562
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.91152286529541,
      "learning_rate": 0.0001608914312319743,
      "loss": 2.8026,
      "step": 150563
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3952648639678955,
      "learning_rate": 0.00016088780702821074,
      "loss": 3.0374,
      "step": 150564
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.124693870544434,
      "learning_rate": 0.0001608841828503109,
      "loss": 2.6534,
      "step": 150565
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.119492769241333,
      "learning_rate": 0.00016088055869827516,
      "loss": 2.8853,
      "step": 150566
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.589308977127075,
      "learning_rate": 0.0001608769345721045,
      "loss": 2.9025,
      "step": 150567
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.278473377227783,
      "learning_rate": 0.0001608733104717994,
      "loss": 2.9955,
      "step": 150568
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2261595726013184,
      "learning_rate": 0.00016086968639736067,
      "loss": 2.7612,
      "step": 150569
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.125593900680542,
      "learning_rate": 0.0001608660623487887,
      "loss": 3.0031,
      "step": 150570
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6721489429473877,
      "learning_rate": 0.0001608624383260845,
      "loss": 2.6303,
      "step": 150571
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.1351184844970703,
      "learning_rate": 0.00016085881432924845,
      "loss": 2.8173,
      "step": 150572
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6444144248962402,
      "learning_rate": 0.0001608551903582815,
      "loss": 3.1128,
      "step": 150573
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.657447338104248,
      "learning_rate": 0.00016085156641318423,
      "loss": 3.267,
      "step": 150574
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5753560066223145,
      "learning_rate": 0.00016084794249395724,
      "loss": 2.9507,
      "step": 150575
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.0277490615844727,
      "learning_rate": 0.0001608443186006011,
      "loss": 2.8753,
      "step": 150576
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5135691165924072,
      "learning_rate": 0.00016084069473311683,
      "loss": 2.814,
      "step": 150577
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9684778451919556,
      "learning_rate": 0.00016083707089150474,
      "loss": 2.9704,
      "step": 150578
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.426487445831299,
      "learning_rate": 0.0001608334470757658,
      "loss": 3.0067,
      "step": 150579
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.284672498703003,
      "learning_rate": 0.0001608298232859005,
      "loss": 2.9673,
      "step": 150580
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5319926738739014,
      "learning_rate": 0.0001608261995219095,
      "loss": 2.9515,
      "step": 150581
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2813527584075928,
      "learning_rate": 0.00016082257578379366,
      "loss": 2.9674,
      "step": 150582
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.850954294204712,
      "learning_rate": 0.00016081895207155348,
      "loss": 3.2185,
      "step": 150583
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.024535894393921,
      "learning_rate": 0.0001608153283851896,
      "loss": 2.7379,
      "step": 150584
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6773054599761963,
      "learning_rate": 0.00016081170472470287,
      "loss": 2.8724,
      "step": 150585
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.019749641418457,
      "learning_rate": 0.0001608080810900939,
      "loss": 3.0015,
      "step": 150586
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7474873065948486,
      "learning_rate": 0.00016080445748136322,
      "loss": 3.0843,
      "step": 150587
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1946301460266113,
      "learning_rate": 0.00016080083389851172,
      "loss": 3.2784,
      "step": 150588
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3798394203186035,
      "learning_rate": 0.00016079721034153996,
      "loss": 2.7064,
      "step": 150589
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.549328088760376,
      "learning_rate": 0.00016079358681044852,
      "loss": 2.6793,
      "step": 150590
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.740349531173706,
      "learning_rate": 0.0001607899633052383,
      "loss": 2.9424,
      "step": 150591
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.251101016998291,
      "learning_rate": 0.00016078633982590985,
      "loss": 2.8081,
      "step": 150592
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.533513069152832,
      "learning_rate": 0.00016078271637246373,
      "loss": 3.1382,
      "step": 150593
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.096994161605835,
      "learning_rate": 0.00016077909294490083,
      "loss": 2.8681,
      "step": 150594
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0801844596862793,
      "learning_rate": 0.00016077546954322168,
      "loss": 2.967,
      "step": 150595
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7120604515075684,
      "learning_rate": 0.00016077184616742704,
      "loss": 3.1434,
      "step": 150596
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.056563377380371,
      "learning_rate": 0.00016076822281751755,
      "loss": 2.9317,
      "step": 150597
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.193427085876465,
      "learning_rate": 0.00016076459949349392,
      "loss": 3.1035,
      "step": 150598
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7557759284973145,
      "learning_rate": 0.00016076097619535662,
      "loss": 2.7402,
      "step": 150599
    },
    {
      "epoch": 1.96,
      "grad_norm": 5.236400604248047,
      "learning_rate": 0.00016075735292310663,
      "loss": 3.0065,
      "step": 150600
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2404561042785645,
      "learning_rate": 0.00016075372967674432,
      "loss": 2.7977,
      "step": 150601
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5240232944488525,
      "learning_rate": 0.00016075010645627067,
      "loss": 2.7827,
      "step": 150602
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1851003170013428,
      "learning_rate": 0.0001607464832616861,
      "loss": 2.9752,
      "step": 150603
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5510201454162598,
      "learning_rate": 0.00016074286009299144,
      "loss": 3.0079,
      "step": 150604
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.811225414276123,
      "learning_rate": 0.00016073923695018736,
      "loss": 2.6847,
      "step": 150605
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.783766269683838,
      "learning_rate": 0.0001607356138332745,
      "loss": 3.1394,
      "step": 150606
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.776334285736084,
      "learning_rate": 0.00016073199074225338,
      "loss": 2.8827,
      "step": 150607
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.1621408462524414,
      "learning_rate": 0.00016072836767712495,
      "loss": 2.8446,
      "step": 150608
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0142059326171875,
      "learning_rate": 0.00016072474463788957,
      "loss": 2.9711,
      "step": 150609
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.878540515899658,
      "learning_rate": 0.00016072112162454825,
      "loss": 3.1184,
      "step": 150610
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.3647031784057617,
      "learning_rate": 0.0001607174986371015,
      "loss": 3.0145,
      "step": 150611
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.908315896987915,
      "learning_rate": 0.00016071387567554997,
      "loss": 2.8448,
      "step": 150612
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.69870924949646,
      "learning_rate": 0.00016071025273989428,
      "loss": 3.2291,
      "step": 150613
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1353931427001953,
      "learning_rate": 0.0001607066298301353,
      "loss": 3.066,
      "step": 150614
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.0477781295776367,
      "learning_rate": 0.00016070300694627344,
      "loss": 2.987,
      "step": 150615
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.77493143081665,
      "learning_rate": 0.00016069938408830962,
      "loss": 3.1018,
      "step": 150616
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.1222763061523438,
      "learning_rate": 0.0001606957612562445,
      "loss": 2.9706,
      "step": 150617
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.44747257232666,
      "learning_rate": 0.0001606921384500786,
      "loss": 2.8659,
      "step": 150618
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.312365770339966,
      "learning_rate": 0.00016068851566981255,
      "loss": 3.0917,
      "step": 150619
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5050556659698486,
      "learning_rate": 0.00016068489291544725,
      "loss": 2.6691,
      "step": 150620
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.936941146850586,
      "learning_rate": 0.00016068127018698314,
      "loss": 2.9014,
      "step": 150621
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.9408884048461914,
      "learning_rate": 0.00016067764748442113,
      "loss": 2.9155,
      "step": 150622
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.8232181072235107,
      "learning_rate": 0.0001606740248077618,
      "loss": 2.6932,
      "step": 150623
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.649120807647705,
      "learning_rate": 0.00016067040215700577,
      "loss": 2.5546,
      "step": 150624
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6890976428985596,
      "learning_rate": 0.00016066677953215368,
      "loss": 2.9178,
      "step": 150625
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.269160509109497,
      "learning_rate": 0.00016066315693320633,
      "loss": 2.9584,
      "step": 150626
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.291452407836914,
      "learning_rate": 0.0001606595343601642,
      "loss": 2.9679,
      "step": 150627
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.797569751739502,
      "learning_rate": 0.00016065591181302827,
      "loss": 3.0464,
      "step": 150628
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.916116237640381,
      "learning_rate": 0.00016065228929179896,
      "loss": 2.6186,
      "step": 150629
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0833818912506104,
      "learning_rate": 0.00016064866679647705,
      "loss": 3.1505,
      "step": 150630
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.071295738220215,
      "learning_rate": 0.0001606450443270631,
      "loss": 3.1618,
      "step": 150631
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3293967247009277,
      "learning_rate": 0.00016064142188355798,
      "loss": 2.9779,
      "step": 150632
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3832008838653564,
      "learning_rate": 0.0001606377994659621,
      "loss": 2.9902,
      "step": 150633
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.457184076309204,
      "learning_rate": 0.0001606341770742764,
      "loss": 3.0055,
      "step": 150634
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6798510551452637,
      "learning_rate": 0.00016063055470850145,
      "loss": 3.0291,
      "step": 150635
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.150056838989258,
      "learning_rate": 0.00016062693236863792,
      "loss": 2.969,
      "step": 150636
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.59019136428833,
      "learning_rate": 0.00016062331005468636,
      "loss": 3.0382,
      "step": 150637
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6342101097106934,
      "learning_rate": 0.00016061968776664764,
      "loss": 3.2009,
      "step": 150638
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.273149251937866,
      "learning_rate": 0.00016061606550452225,
      "loss": 3.07,
      "step": 150639
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.335181713104248,
      "learning_rate": 0.00016061244326831108,
      "loss": 2.9932,
      "step": 150640
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6474008560180664,
      "learning_rate": 0.00016060882105801473,
      "loss": 2.8701,
      "step": 150641
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.332620620727539,
      "learning_rate": 0.00016060519887363379,
      "loss": 3.0602,
      "step": 150642
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.09515380859375,
      "learning_rate": 0.00016060157671516886,
      "loss": 2.88,
      "step": 150643
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1952500343322754,
      "learning_rate": 0.00016059795458262086,
      "loss": 2.9394,
      "step": 150644
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1310207843780518,
      "learning_rate": 0.0001605943324759902,
      "loss": 3.2314,
      "step": 150645
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.284916877746582,
      "learning_rate": 0.00016059071039527782,
      "loss": 3.1267,
      "step": 150646
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.8349056243896484,
      "learning_rate": 0.00016058708834048422,
      "loss": 3.015,
      "step": 150647
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.442471504211426,
      "learning_rate": 0.00016058346631161015,
      "loss": 2.7188,
      "step": 150648
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.0978622436523438,
      "learning_rate": 0.0001605798443086561,
      "loss": 2.8888,
      "step": 150649
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.237560987472534,
      "learning_rate": 0.00016057622233162303,
      "loss": 3.0961,
      "step": 150650
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5818207263946533,
      "learning_rate": 0.00016057260038051133,
      "loss": 2.8612,
      "step": 150651
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.480114221572876,
      "learning_rate": 0.00016056897845532198,
      "loss": 2.6333,
      "step": 150652
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2140591144561768,
      "learning_rate": 0.00016056535655605542,
      "loss": 2.9327,
      "step": 150653
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.216984748840332,
      "learning_rate": 0.00016056173468271246,
      "loss": 2.9316,
      "step": 150654
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1161580085754395,
      "learning_rate": 0.00016055811283529356,
      "loss": 2.9728,
      "step": 150655
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1917474269866943,
      "learning_rate": 0.00016055449101379967,
      "loss": 2.9049,
      "step": 150656
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2033774852752686,
      "learning_rate": 0.0001605508692182312,
      "loss": 2.9932,
      "step": 150657
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.589073419570923,
      "learning_rate": 0.00016054724744858905,
      "loss": 3.0122,
      "step": 150658
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.257246732711792,
      "learning_rate": 0.00016054362570487382,
      "loss": 3.1208,
      "step": 150659
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.688185691833496,
      "learning_rate": 0.00016054000398708616,
      "loss": 2.77,
      "step": 150660
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.432061195373535,
      "learning_rate": 0.00016053638229522664,
      "loss": 3.004,
      "step": 150661
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.185340404510498,
      "learning_rate": 0.00016053276062929616,
      "loss": 2.9686,
      "step": 150662
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1730313301086426,
      "learning_rate": 0.00016052913898929516,
      "loss": 3.019,
      "step": 150663
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.8764877319335938,
      "learning_rate": 0.00016052551737522452,
      "loss": 2.9312,
      "step": 150664
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2520363330841064,
      "learning_rate": 0.0001605218957870848,
      "loss": 2.9462,
      "step": 150665
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.441009998321533,
      "learning_rate": 0.00016051827422487676,
      "loss": 2.9738,
      "step": 150666
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2251486778259277,
      "learning_rate": 0.0001605146526886008,
      "loss": 3.1131,
      "step": 150667
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.42085337638855,
      "learning_rate": 0.00016051103117825798,
      "loss": 2.88,
      "step": 150668
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0452613830566406,
      "learning_rate": 0.00016050740969384864,
      "loss": 2.7255,
      "step": 150669
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.3093557357788086,
      "learning_rate": 0.00016050378823537375,
      "loss": 2.969,
      "step": 150670
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9790855646133423,
      "learning_rate": 0.0001605001668028337,
      "loss": 2.7187,
      "step": 150671
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3487977981567383,
      "learning_rate": 0.00016049654539622944,
      "loss": 2.9827,
      "step": 150672
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2329349517822266,
      "learning_rate": 0.0001604929240155615,
      "loss": 2.8883,
      "step": 150673
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.312047004699707,
      "learning_rate": 0.00016048930266083055,
      "loss": 2.8697,
      "step": 150674
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.641516923904419,
      "learning_rate": 0.00016048568133203718,
      "loss": 2.9396,
      "step": 150675
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.064870834350586,
      "learning_rate": 0.00016048206002918227,
      "loss": 3.344,
      "step": 150676
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.8512673377990723,
      "learning_rate": 0.00016047843875226624,
      "loss": 2.8405,
      "step": 150677
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3882617950439453,
      "learning_rate": 0.00016047481750129002,
      "loss": 2.899,
      "step": 150678
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4661026000976562,
      "learning_rate": 0.00016047119627625415,
      "loss": 2.8763,
      "step": 150679
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.148472309112549,
      "learning_rate": 0.00016046757507715923,
      "loss": 2.9628,
      "step": 150680
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0075736045837402,
      "learning_rate": 0.00016046395390400617,
      "loss": 3.0067,
      "step": 150681
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3631844520568848,
      "learning_rate": 0.00016046033275679542,
      "loss": 3.0145,
      "step": 150682
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.0810201168060303,
      "learning_rate": 0.00016045671163552764,
      "loss": 2.9107,
      "step": 150683
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.288587808609009,
      "learning_rate": 0.00016045309054020372,
      "loss": 3.0191,
      "step": 150684
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6403849124908447,
      "learning_rate": 0.00016044946947082423,
      "loss": 2.8217,
      "step": 150685
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.33320689201355,
      "learning_rate": 0.00016044584842738968,
      "loss": 2.9953,
      "step": 150686
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2987446784973145,
      "learning_rate": 0.00016044222740990095,
      "loss": 3.0611,
      "step": 150687
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.238237142562866,
      "learning_rate": 0.0001604386064183587,
      "loss": 3.0554,
      "step": 150688
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.641812801361084,
      "learning_rate": 0.00016043498545276343,
      "loss": 2.9647,
      "step": 150689
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.58880615234375,
      "learning_rate": 0.000160431364513116,
      "loss": 3.0666,
      "step": 150690
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.7452635765075684,
      "learning_rate": 0.00016042774359941705,
      "loss": 3.0782,
      "step": 150691
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6854605674743652,
      "learning_rate": 0.0001604241227116671,
      "loss": 2.7264,
      "step": 150692
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.09769868850708,
      "learning_rate": 0.00016042050184986707,
      "loss": 2.8921,
      "step": 150693
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3464651107788086,
      "learning_rate": 0.00016041688101401736,
      "loss": 2.8778,
      "step": 150694
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.8893892765045166,
      "learning_rate": 0.00016041326020411892,
      "loss": 2.8964,
      "step": 150695
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.1721267700195312,
      "learning_rate": 0.00016040963942017228,
      "loss": 3.0619,
      "step": 150696
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.010599613189697,
      "learning_rate": 0.00016040601866217815,
      "loss": 2.6358,
      "step": 150697
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.405571460723877,
      "learning_rate": 0.00016040239793013703,
      "loss": 2.8619,
      "step": 150698
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7459182739257812,
      "learning_rate": 0.00016039877722404984,
      "loss": 2.7823,
      "step": 150699
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1561100482940674,
      "learning_rate": 0.00016039515654391704,
      "loss": 2.9777,
      "step": 150700
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.2668354511260986,
      "learning_rate": 0.00016039153588973956,
      "loss": 2.9134,
      "step": 150701
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9694758653640747,
      "learning_rate": 0.00016038791526151792,
      "loss": 3.1316,
      "step": 150702
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.104624271392822,
      "learning_rate": 0.0001603842946592528,
      "loss": 2.7996,
      "step": 150703
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.005934000015259,
      "learning_rate": 0.00016038067408294477,
      "loss": 3.2535,
      "step": 150704
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1304643154144287,
      "learning_rate": 0.0001603770535325947,
      "loss": 2.9964,
      "step": 150705
    },
    {
      "epoch": 1.96,
      "grad_norm": 5.073506832122803,
      "learning_rate": 0.00016037343300820308,
      "loss": 2.7739,
      "step": 150706
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.979356050491333,
      "learning_rate": 0.00016036981250977078,
      "loss": 3.0979,
      "step": 150707
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.867567300796509,
      "learning_rate": 0.00016036619203729834,
      "loss": 2.938,
      "step": 150708
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1674249172210693,
      "learning_rate": 0.0001603625715907865,
      "loss": 2.9232,
      "step": 150709
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1011743545532227,
      "learning_rate": 0.00016035895117023577,
      "loss": 3.0665,
      "step": 150710
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.514854907989502,
      "learning_rate": 0.00016035533077564704,
      "loss": 2.883,
      "step": 150711
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.9706971645355225,
      "learning_rate": 0.0001603517104070208,
      "loss": 3.1293,
      "step": 150712
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.9651284217834473,
      "learning_rate": 0.00016034809006435793,
      "loss": 2.9282,
      "step": 150713
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3108527660369873,
      "learning_rate": 0.00016034446974765897,
      "loss": 2.7996,
      "step": 150714
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.0296735763549805,
      "learning_rate": 0.00016034084945692464,
      "loss": 3.018,
      "step": 150715
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.506944417953491,
      "learning_rate": 0.00016033722919215541,
      "loss": 2.8535,
      "step": 150716
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1267426013946533,
      "learning_rate": 0.0001603336089533523,
      "loss": 2.8905,
      "step": 150717
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2725255489349365,
      "learning_rate": 0.00016032998874051564,
      "loss": 2.7721,
      "step": 150718
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9872032403945923,
      "learning_rate": 0.00016032636855364642,
      "loss": 2.9856,
      "step": 150719
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.197822093963623,
      "learning_rate": 0.00016032274839274517,
      "loss": 2.959,
      "step": 150720
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.5291082859039307,
      "learning_rate": 0.00016031912825781254,
      "loss": 2.7132,
      "step": 150721
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.5051331520080566,
      "learning_rate": 0.0001603155081488491,
      "loss": 3.1081,
      "step": 150722
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6268973350524902,
      "learning_rate": 0.00016031188806585577,
      "loss": 2.5622,
      "step": 150723
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3343098163604736,
      "learning_rate": 0.000160308268008833,
      "loss": 3.0792,
      "step": 150724
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.7350988388061523,
      "learning_rate": 0.00016030464797778165,
      "loss": 2.9405,
      "step": 150725
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.353152275085449,
      "learning_rate": 0.00016030102797270234,
      "loss": 2.8431,
      "step": 150726
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.156632423400879,
      "learning_rate": 0.00016029740799359567,
      "loss": 3.0194,
      "step": 150727
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2915098667144775,
      "learning_rate": 0.00016029378804046222,
      "loss": 2.6885,
      "step": 150728
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7137293815612793,
      "learning_rate": 0.00016029016811330293,
      "loss": 2.8871,
      "step": 150729
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.941272258758545,
      "learning_rate": 0.00016028654821211821,
      "loss": 3.0601,
      "step": 150730
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2569830417633057,
      "learning_rate": 0.000160282928336909,
      "loss": 2.7434,
      "step": 150731
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.30237078666687,
      "learning_rate": 0.0001602793084876758,
      "loss": 3.165,
      "step": 150732
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2224557399749756,
      "learning_rate": 0.0001602756886644193,
      "loss": 2.8667,
      "step": 150733
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.8527493476867676,
      "learning_rate": 0.0001602720688671401,
      "loss": 3.0758,
      "step": 150734
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.169046640396118,
      "learning_rate": 0.00016026844909583908,
      "loss": 2.841,
      "step": 150735
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.8632969856262207,
      "learning_rate": 0.00016026482935051667,
      "loss": 2.9292,
      "step": 150736
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.112445592880249,
      "learning_rate": 0.0001602612096311738,
      "loss": 2.808,
      "step": 150737
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.277813196182251,
      "learning_rate": 0.00016025758993781086,
      "loss": 2.7637,
      "step": 150738
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.8514938354492188,
      "learning_rate": 0.00016025397027042896,
      "loss": 2.9951,
      "step": 150739
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.6379730701446533,
      "learning_rate": 0.0001602503506290282,
      "loss": 2.7564,
      "step": 150740
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5233230590820312,
      "learning_rate": 0.00016024673101360965,
      "loss": 3.144,
      "step": 150741
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3361058235168457,
      "learning_rate": 0.00016024311142417375,
      "loss": 3.0251,
      "step": 150742
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.273399591445923,
      "learning_rate": 0.00016023949186072143,
      "loss": 2.9431,
      "step": 150743
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.132356643676758,
      "learning_rate": 0.00016023587232325313,
      "loss": 3.1675,
      "step": 150744
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.655249834060669,
      "learning_rate": 0.00016023225281176982,
      "loss": 2.8504,
      "step": 150745
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.357729196548462,
      "learning_rate": 0.00016022863332627174,
      "loss": 3.0947,
      "step": 150746
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.992724895477295,
      "learning_rate": 0.00016022501386675993,
      "loss": 2.9476,
      "step": 150747
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0201098918914795,
      "learning_rate": 0.0001602213944332348,
      "loss": 2.9802,
      "step": 150748
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.324885606765747,
      "learning_rate": 0.00016021777502569728,
      "loss": 2.8455,
      "step": 150749
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.352292537689209,
      "learning_rate": 0.0001602141556441478,
      "loss": 3.0321,
      "step": 150750
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.517666816711426,
      "learning_rate": 0.00016021053628858736,
      "loss": 3.0265,
      "step": 150751
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.426008701324463,
      "learning_rate": 0.0001602069169590162,
      "loss": 2.7766,
      "step": 150752
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2723288536071777,
      "learning_rate": 0.0001602032976554353,
      "loss": 3.0442,
      "step": 150753
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7426748275756836,
      "learning_rate": 0.0001601996783778452,
      "loss": 3.1485,
      "step": 150754
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7862045764923096,
      "learning_rate": 0.00016019605912624666,
      "loss": 2.7589,
      "step": 150755
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.310009479522705,
      "learning_rate": 0.00016019243990064025,
      "loss": 2.944,
      "step": 150756
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1299822330474854,
      "learning_rate": 0.0001601888207010268,
      "loss": 3.1598,
      "step": 150757
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.145357608795166,
      "learning_rate": 0.00016018520152740685,
      "loss": 2.86,
      "step": 150758
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.14853572845459,
      "learning_rate": 0.00016018158237978122,
      "loss": 2.837,
      "step": 150759
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3712642192840576,
      "learning_rate": 0.00016017796325815023,
      "loss": 2.8795,
      "step": 150760
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.207972288131714,
      "learning_rate": 0.000160174344162515,
      "loss": 3.2932,
      "step": 150761
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4216411113739014,
      "learning_rate": 0.0001601707250928758,
      "loss": 2.7909,
      "step": 150762
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2408368587493896,
      "learning_rate": 0.0001601671060492337,
      "loss": 3.0236,
      "step": 150763
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.48732328414917,
      "learning_rate": 0.00016016348703158913,
      "loss": 2.9019,
      "step": 150764
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.647705078125,
      "learning_rate": 0.0001601598680399427,
      "loss": 2.8882,
      "step": 150765
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2851080894470215,
      "learning_rate": 0.0001601562490742953,
      "loss": 3.0197,
      "step": 150766
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5648210048675537,
      "learning_rate": 0.0001601526301346475,
      "loss": 2.933,
      "step": 150767
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.322840929031372,
      "learning_rate": 0.00016014901122099985,
      "loss": 3.0027,
      "step": 150768
    },
    {
      "epoch": 1.96,
      "grad_norm": 7.112026214599609,
      "learning_rate": 0.00016014539233335324,
      "loss": 3.0582,
      "step": 150769
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.189440965652466,
      "learning_rate": 0.0001601417734717083,
      "loss": 3.0965,
      "step": 150770
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.114082098007202,
      "learning_rate": 0.00016013815463606547,
      "loss": 2.8038,
      "step": 150771
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.247286319732666,
      "learning_rate": 0.00016013453582642572,
      "loss": 2.8323,
      "step": 150772
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2450616359710693,
      "learning_rate": 0.0001601309170427896,
      "loss": 3.0878,
      "step": 150773
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3388192653656006,
      "learning_rate": 0.0001601272982851577,
      "loss": 2.9296,
      "step": 150774
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.031705141067505,
      "learning_rate": 0.00016012367955353088,
      "loss": 3.1261,
      "step": 150775
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3838727474212646,
      "learning_rate": 0.00016012006084790964,
      "loss": 2.8118,
      "step": 150776
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0905652046203613,
      "learning_rate": 0.00016011644216829468,
      "loss": 3.0181,
      "step": 150777
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2202796936035156,
      "learning_rate": 0.0001601128235146868,
      "loss": 3.0266,
      "step": 150778
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.090813159942627,
      "learning_rate": 0.00016010920488708649,
      "loss": 2.8816,
      "step": 150779
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.825660467147827,
      "learning_rate": 0.0001601055862854946,
      "loss": 2.7766,
      "step": 150780
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3564157485961914,
      "learning_rate": 0.00016010196770991177,
      "loss": 2.9072,
      "step": 150781
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0848143100738525,
      "learning_rate": 0.00016009834916033865,
      "loss": 2.8929,
      "step": 150782
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3331174850463867,
      "learning_rate": 0.0001600947306367757,
      "loss": 2.9867,
      "step": 150783
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.025731086730957,
      "learning_rate": 0.00016009111213922391,
      "loss": 2.9871,
      "step": 150784
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1495931148529053,
      "learning_rate": 0.00016008749366768372,
      "loss": 2.8627,
      "step": 150785
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5302133560180664,
      "learning_rate": 0.000160083875222156,
      "loss": 3.0549,
      "step": 150786
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.188490390777588,
      "learning_rate": 0.00016008025680264135,
      "loss": 3.053,
      "step": 150787
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0856070518493652,
      "learning_rate": 0.00016007663840914043,
      "loss": 2.8952,
      "step": 150788
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.060702323913574,
      "learning_rate": 0.00016007302004165377,
      "loss": 3.1168,
      "step": 150789
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.371227502822876,
      "learning_rate": 0.00016006940170018233,
      "loss": 2.9775,
      "step": 150790
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.3786652088165283,
      "learning_rate": 0.00016006578338472645,
      "loss": 3.0235,
      "step": 150791
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3547890186309814,
      "learning_rate": 0.00016006216509528712,
      "loss": 2.9011,
      "step": 150792
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.322200059890747,
      "learning_rate": 0.00016005854683186492,
      "loss": 2.749,
      "step": 150793
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.9769864082336426,
      "learning_rate": 0.00016005492859446044,
      "loss": 2.9083,
      "step": 150794
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.689014196395874,
      "learning_rate": 0.0001600513103830743,
      "loss": 3.1557,
      "step": 150795
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.432746171951294,
      "learning_rate": 0.00016004769219770735,
      "loss": 2.9023,
      "step": 150796
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.570063352584839,
      "learning_rate": 0.00016004407403836008,
      "loss": 3.1511,
      "step": 150797
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.834923267364502,
      "learning_rate": 0.00016004045590503334,
      "loss": 3.0765,
      "step": 150798
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.550149917602539,
      "learning_rate": 0.00016003683779772777,
      "loss": 3.0261,
      "step": 150799
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1366021633148193,
      "learning_rate": 0.00016003321971644395,
      "loss": 3.0144,
      "step": 150800
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.8557097911834717,
      "learning_rate": 0.00016002960166118253,
      "loss": 3.2008,
      "step": 150801
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.730062961578369,
      "learning_rate": 0.00016002598363194431,
      "loss": 2.9372,
      "step": 150802
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.400735378265381,
      "learning_rate": 0.00016002236562872983,
      "loss": 2.8518,
      "step": 150803
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.381049633026123,
      "learning_rate": 0.0001600187476515399,
      "loss": 3.0376,
      "step": 150804
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5325570106506348,
      "learning_rate": 0.00016001512970037507,
      "loss": 2.8454,
      "step": 150805
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.328819513320923,
      "learning_rate": 0.00016001151177523628,
      "loss": 2.8312,
      "step": 150806
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.098351001739502,
      "learning_rate": 0.00016000789387612372,
      "loss": 2.8716,
      "step": 150807
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.163987159729004,
      "learning_rate": 0.0001600042760030385,
      "loss": 2.815,
      "step": 150808
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6624643802642822,
      "learning_rate": 0.000160000658155981,
      "loss": 2.9145,
      "step": 150809
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1454358100891113,
      "learning_rate": 0.00015999704033495214,
      "loss": 2.9846,
      "step": 150810
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1262331008911133,
      "learning_rate": 0.00015999342253995234,
      "loss": 2.6002,
      "step": 150811
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5303544998168945,
      "learning_rate": 0.00015998980477098266,
      "loss": 2.8396,
      "step": 150812
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5645835399627686,
      "learning_rate": 0.00015998618702804324,
      "loss": 2.818,
      "step": 150813
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6198782920837402,
      "learning_rate": 0.0001599825693111352,
      "loss": 2.8553,
      "step": 150814
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.027564525604248,
      "learning_rate": 0.00015997895162025893,
      "loss": 2.8823,
      "step": 150815
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.162611246109009,
      "learning_rate": 0.0001599753339554153,
      "loss": 2.8466,
      "step": 150816
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.419290781021118,
      "learning_rate": 0.0001599717163166048,
      "loss": 3.1037,
      "step": 150817
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.4298133850097656,
      "learning_rate": 0.00015996809870382842,
      "loss": 3.0242,
      "step": 150818
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.154930830001831,
      "learning_rate": 0.00015996448111708637,
      "loss": 2.8545,
      "step": 150819
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.807758092880249,
      "learning_rate": 0.00015996086355637973,
      "loss": 3.1404,
      "step": 150820
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3542490005493164,
      "learning_rate": 0.00015995724602170888,
      "loss": 2.9789,
      "step": 150821
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3212273120880127,
      "learning_rate": 0.0001599536285130747,
      "loss": 3.0324,
      "step": 150822
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1355338096618652,
      "learning_rate": 0.00015995001103047768,
      "loss": 3.2604,
      "step": 150823
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.135028839111328,
      "learning_rate": 0.00015994639357391885,
      "loss": 3.1657,
      "step": 150824
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.8098137378692627,
      "learning_rate": 0.00015994277614339838,
      "loss": 2.8097,
      "step": 150825
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.293379783630371,
      "learning_rate": 0.0001599391587389173,
      "loss": 3.0622,
      "step": 150826
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.218038320541382,
      "learning_rate": 0.00015993554136047608,
      "loss": 2.9487,
      "step": 150827
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.492835283279419,
      "learning_rate": 0.00015993192400807558,
      "loss": 3.1362,
      "step": 150828
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.04113507270813,
      "learning_rate": 0.0001599283066817163,
      "loss": 3.2213,
      "step": 150829
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.211939573287964,
      "learning_rate": 0.0001599246893813992,
      "loss": 2.858,
      "step": 150830
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2092010974884033,
      "learning_rate": 0.00015992107210712451,
      "loss": 2.8938,
      "step": 150831
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.502171516418457,
      "learning_rate": 0.00015991745485889326,
      "loss": 2.8123,
      "step": 150832
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3107900619506836,
      "learning_rate": 0.00015991383763670587,
      "loss": 3.1005,
      "step": 150833
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0685386657714844,
      "learning_rate": 0.00015991022044056327,
      "loss": 3.1317,
      "step": 150834
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5631814002990723,
      "learning_rate": 0.0001599066032704659,
      "loss": 3.0158,
      "step": 150835
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0733866691589355,
      "learning_rate": 0.00015990298612641473,
      "loss": 2.9701,
      "step": 150836
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7437632083892822,
      "learning_rate": 0.00015989936900841006,
      "loss": 2.8793,
      "step": 150837
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5098958015441895,
      "learning_rate": 0.00015989575191645283,
      "loss": 2.8793,
      "step": 150838
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1015474796295166,
      "learning_rate": 0.0001598921348505435,
      "loss": 2.8206,
      "step": 150839
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.009427309036255,
      "learning_rate": 0.000159888517810683,
      "loss": 2.8977,
      "step": 150840
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3441686630249023,
      "learning_rate": 0.00015988490079687172,
      "loss": 3.1824,
      "step": 150841
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.9872021675109863,
      "learning_rate": 0.00015988128380911066,
      "loss": 3.1071,
      "step": 150842
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0278754234313965,
      "learning_rate": 0.00015987766684740024,
      "loss": 2.9488,
      "step": 150843
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.413499593734741,
      "learning_rate": 0.00015987404991174125,
      "loss": 3.1338,
      "step": 150844
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4729442596435547,
      "learning_rate": 0.00015987043300213417,
      "loss": 2.9094,
      "step": 150845
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7083067893981934,
      "learning_rate": 0.00015986681611858,
      "loss": 3.1544,
      "step": 150846
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.1466853618621826,
      "learning_rate": 0.0001598631992610791,
      "loss": 3.0188,
      "step": 150847
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7183966636657715,
      "learning_rate": 0.00015985958242963234,
      "loss": 2.7968,
      "step": 150848
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6402087211608887,
      "learning_rate": 0.00015985596562424038,
      "loss": 2.9429,
      "step": 150849
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.547995090484619,
      "learning_rate": 0.00015985234884490382,
      "loss": 2.852,
      "step": 150850
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6169025897979736,
      "learning_rate": 0.00015984873209162325,
      "loss": 3.0827,
      "step": 150851
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4370524883270264,
      "learning_rate": 0.00015984511536439958,
      "loss": 2.9323,
      "step": 150852
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.181004047393799,
      "learning_rate": 0.00015984149866323316,
      "loss": 2.9552,
      "step": 150853
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0070457458496094,
      "learning_rate": 0.00015983788198812503,
      "loss": 2.8872,
      "step": 150854
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.7968316078186035,
      "learning_rate": 0.00015983426533907566,
      "loss": 3.0792,
      "step": 150855
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.601963520050049,
      "learning_rate": 0.00015983064871608562,
      "loss": 2.9734,
      "step": 150856
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4420089721679688,
      "learning_rate": 0.00015982703211915589,
      "loss": 2.9233,
      "step": 150857
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.040543079376221,
      "learning_rate": 0.0001598234155482869,
      "loss": 3.0854,
      "step": 150858
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.6727871894836426,
      "learning_rate": 0.00015981979900347927,
      "loss": 3.061,
      "step": 150859
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9731616973876953,
      "learning_rate": 0.00015981618248473394,
      "loss": 3.0498,
      "step": 150860
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3088905811309814,
      "learning_rate": 0.00015981256599205138,
      "loss": 3.018,
      "step": 150861
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.7716166973114014,
      "learning_rate": 0.0001598089495254322,
      "loss": 3.0716,
      "step": 150862
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.512779474258423,
      "learning_rate": 0.0001598053330848773,
      "loss": 3.151,
      "step": 150863
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.602526903152466,
      "learning_rate": 0.00015980171667038715,
      "loss": 2.949,
      "step": 150864
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2451229095458984,
      "learning_rate": 0.00015979810028196262,
      "loss": 2.9799,
      "step": 150865
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3235480785369873,
      "learning_rate": 0.00015979448391960425,
      "loss": 2.8351,
      "step": 150866
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0050039291381836,
      "learning_rate": 0.00015979086758331274,
      "loss": 3.0545,
      "step": 150867
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.967973232269287,
      "learning_rate": 0.00015978725127308862,
      "loss": 2.8852,
      "step": 150868
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.256925106048584,
      "learning_rate": 0.00015978363498893284,
      "loss": 2.7421,
      "step": 150869
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.408986806869507,
      "learning_rate": 0.00015978001873084578,
      "loss": 2.9875,
      "step": 150870
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.19350266456604,
      "learning_rate": 0.0001597764024988284,
      "loss": 2.9587,
      "step": 150871
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.277679681777954,
      "learning_rate": 0.00015977278629288126,
      "loss": 2.9041,
      "step": 150872
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9908994436264038,
      "learning_rate": 0.000159769170113005,
      "loss": 3.1919,
      "step": 150873
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.9089391231536865,
      "learning_rate": 0.00015976555395920013,
      "loss": 2.9452,
      "step": 150874
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4486210346221924,
      "learning_rate": 0.00015976193783146764,
      "loss": 2.7065,
      "step": 150875
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1832706928253174,
      "learning_rate": 0.0001597583217298079,
      "loss": 3.2217,
      "step": 150876
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.4765267372131348,
      "learning_rate": 0.00015975470565422192,
      "loss": 3.0534,
      "step": 150877
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.980731964111328,
      "learning_rate": 0.00015975108960471004,
      "loss": 3.1491,
      "step": 150878
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.349623680114746,
      "learning_rate": 0.00015974747358127328,
      "loss": 2.958,
      "step": 150879
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.131741523742676,
      "learning_rate": 0.00015974385758391193,
      "loss": 2.699,
      "step": 150880
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0766119956970215,
      "learning_rate": 0.00015974024161262694,
      "loss": 2.963,
      "step": 150881
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.473790168762207,
      "learning_rate": 0.00015973662566741876,
      "loss": 3.1223,
      "step": 150882
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.812004804611206,
      "learning_rate": 0.00015973300974828831,
      "loss": 3.014,
      "step": 150883
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.172699451446533,
      "learning_rate": 0.00015972939385523602,
      "loss": 2.9357,
      "step": 150884
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.242913007736206,
      "learning_rate": 0.00015972577798826297,
      "loss": 3.0528,
      "step": 150885
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.201737403869629,
      "learning_rate": 0.00015972216214736926,
      "loss": 2.8719,
      "step": 150886
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.9038047790527344,
      "learning_rate": 0.00015971854633255595,
      "loss": 2.7736,
      "step": 150887
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.960654616355896,
      "learning_rate": 0.00015971493054382354,
      "loss": 2.7581,
      "step": 150888
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.534661293029785,
      "learning_rate": 0.00015971131478117285,
      "loss": 2.8524,
      "step": 150889
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.797930955886841,
      "learning_rate": 0.00015970769904460435,
      "loss": 3.0214,
      "step": 150890
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9948670864105225,
      "learning_rate": 0.0001597040833341191,
      "loss": 2.8206,
      "step": 150891
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0218617916107178,
      "learning_rate": 0.00015970046764971727,
      "loss": 2.9287,
      "step": 150892
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9903087615966797,
      "learning_rate": 0.00015969685199139993,
      "loss": 3.1172,
      "step": 150893
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.3703434467315674,
      "learning_rate": 0.0001596932363591674,
      "loss": 2.7739,
      "step": 150894
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2438220977783203,
      "learning_rate": 0.0001596896207530207,
      "loss": 3.1457,
      "step": 150895
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.31596040725708,
      "learning_rate": 0.0001596860051729602,
      "loss": 2.7977,
      "step": 150896
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5774457454681396,
      "learning_rate": 0.000159682389618987,
      "loss": 2.8298,
      "step": 150897
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.66192626953125,
      "learning_rate": 0.00015967877409110126,
      "loss": 2.9484,
      "step": 150898
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3360416889190674,
      "learning_rate": 0.00015967515858930398,
      "loss": 3.022,
      "step": 150899
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.296771287918091,
      "learning_rate": 0.0001596715431135956,
      "loss": 2.9356,
      "step": 150900
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.034663200378418,
      "learning_rate": 0.0001596679276639771,
      "loss": 2.769,
      "step": 150901
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.0772979259490967,
      "learning_rate": 0.00015966431224044884,
      "loss": 2.9586,
      "step": 150902
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.5295841693878174,
      "learning_rate": 0.00015966069684301187,
      "loss": 3.1456,
      "step": 150903
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.7550477981567383,
      "learning_rate": 0.0001596570814716664,
      "loss": 2.8098,
      "step": 150904
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3554370403289795,
      "learning_rate": 0.0001596534661264134,
      "loss": 2.9174,
      "step": 150905
    },
    {
      "epoch": 1.96,
      "grad_norm": 1.9693323373794556,
      "learning_rate": 0.00015964985080725336,
      "loss": 3.207,
      "step": 150906
    },
    {
      "epoch": 1.96,
      "grad_norm": 3.467935085296631,
      "learning_rate": 0.00015964623551418722,
      "loss": 2.9812,
      "step": 150907
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.1578497886657715,
      "learning_rate": 0.00015964262024721536,
      "loss": 2.8656,
      "step": 150908
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.3671023845672607,
      "learning_rate": 0.0001596390050063388,
      "loss": 3.011,
      "step": 150909
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2661397457122803,
      "learning_rate": 0.0001596353897915578,
      "loss": 2.8967,
      "step": 150910
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.587360382080078,
      "learning_rate": 0.0001596317746028733,
      "loss": 3.1017,
      "step": 150911
    },
    {
      "epoch": 1.96,
      "grad_norm": 4.191646099090576,
      "learning_rate": 0.00015962815944028583,
      "loss": 2.7104,
      "step": 150912
    },
    {
      "epoch": 1.96,
      "grad_norm": 2.2609944343566895,
      "learning_rate": 0.00015962454430379626,
      "loss": 3.0321,
      "step": 150913
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.093437433242798,
      "learning_rate": 0.00015962092919340497,
      "loss": 2.8675,
      "step": 150914
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.107294797897339,
      "learning_rate": 0.0001596173141091131,
      "loss": 3.1162,
      "step": 150915
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.162733793258667,
      "learning_rate": 0.00015961369905092068,
      "loss": 2.8917,
      "step": 150916
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.503225326538086,
      "learning_rate": 0.00015961008401882895,
      "loss": 2.8961,
      "step": 150917
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.3455066680908203,
      "learning_rate": 0.0001596064690128382,
      "loss": 2.7613,
      "step": 150918
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3677303791046143,
      "learning_rate": 0.00015960285403294938,
      "loss": 3.0377,
      "step": 150919
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.421609401702881,
      "learning_rate": 0.00015959923907916292,
      "loss": 2.9584,
      "step": 150920
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.124526262283325,
      "learning_rate": 0.0001595956241514798,
      "loss": 2.894,
      "step": 150921
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4055674076080322,
      "learning_rate": 0.0001595920092499003,
      "loss": 2.9965,
      "step": 150922
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.531862258911133,
      "learning_rate": 0.00015958839437442543,
      "loss": 3.0528,
      "step": 150923
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.720433473587036,
      "learning_rate": 0.00015958477952505558,
      "loss": 3.1432,
      "step": 150924
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2475879192352295,
      "learning_rate": 0.00015958116470179173,
      "loss": 3.1254,
      "step": 150925
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.61624813079834,
      "learning_rate": 0.00015957754990463427,
      "loss": 2.8715,
      "step": 150926
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.50338077545166,
      "learning_rate": 0.0001595739351335841,
      "loss": 3.116,
      "step": 150927
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2381091117858887,
      "learning_rate": 0.00015957032038864177,
      "loss": 3.3304,
      "step": 150928
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.505181074142456,
      "learning_rate": 0.000159566705669808,
      "loss": 2.7292,
      "step": 150929
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.593214988708496,
      "learning_rate": 0.00015956309097708328,
      "loss": 3.133,
      "step": 150930
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5581576824188232,
      "learning_rate": 0.0001595594763104686,
      "loss": 3.0052,
      "step": 150931
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2637600898742676,
      "learning_rate": 0.00015955586166996428,
      "loss": 2.8965,
      "step": 150932
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.171757221221924,
      "learning_rate": 0.00015955224705557132,
      "loss": 2.5149,
      "step": 150933
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.485476016998291,
      "learning_rate": 0.00015954863246729026,
      "loss": 2.9949,
      "step": 150934
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1528091430664062,
      "learning_rate": 0.00015954501790512175,
      "loss": 2.9516,
      "step": 150935
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.099489688873291,
      "learning_rate": 0.00015954140336906636,
      "loss": 3.0215,
      "step": 150936
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.392476797103882,
      "learning_rate": 0.000159537788859125,
      "loss": 3.2644,
      "step": 150937
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3101792335510254,
      "learning_rate": 0.0001595341743752981,
      "loss": 2.9063,
      "step": 150938
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.4292609691619873,
      "learning_rate": 0.00015953055991758654,
      "loss": 3.0678,
      "step": 150939
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.166139841079712,
      "learning_rate": 0.0001595269454859909,
      "loss": 2.8836,
      "step": 150940
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0878872871398926,
      "learning_rate": 0.00015952333108051176,
      "loss": 3.16,
      "step": 150941
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.358755350112915,
      "learning_rate": 0.00015951971670114998,
      "loss": 2.7448,
      "step": 150942
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.950796604156494,
      "learning_rate": 0.00015951610234790617,
      "loss": 3.0795,
      "step": 150943
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.560236930847168,
      "learning_rate": 0.0001595124880207808,
      "loss": 2.7246,
      "step": 150944
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.833314895629883,
      "learning_rate": 0.00015950887371977485,
      "loss": 2.6932,
      "step": 150945
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.13333797454834,
      "learning_rate": 0.00015950525944488888,
      "loss": 3.122,
      "step": 150946
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4892122745513916,
      "learning_rate": 0.0001595016451961234,
      "loss": 3.0255,
      "step": 150947
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.0259315967559814,
      "learning_rate": 0.0001594980309734793,
      "loss": 2.8919,
      "step": 150948
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.2847468852996826,
      "learning_rate": 0.00015949441677695725,
      "loss": 2.9389,
      "step": 150949
    },
    {
      "epoch": 1.97,
      "grad_norm": 1.9675939083099365,
      "learning_rate": 0.0001594908026065576,
      "loss": 2.8474,
      "step": 150950
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.8820719718933105,
      "learning_rate": 0.0001594871884622815,
      "loss": 2.9763,
      "step": 150951
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3536975383758545,
      "learning_rate": 0.00015948357434412936,
      "loss": 2.8438,
      "step": 150952
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.7705905437469482,
      "learning_rate": 0.0001594799602521017,
      "loss": 2.9546,
      "step": 150953
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.341845750808716,
      "learning_rate": 0.00015947634618619953,
      "loss": 2.9384,
      "step": 150954
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2696352005004883,
      "learning_rate": 0.00015947273214642323,
      "loss": 2.82,
      "step": 150955
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.9550840854644775,
      "learning_rate": 0.0001594691181327737,
      "loss": 3.0132,
      "step": 150956
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.155770778656006,
      "learning_rate": 0.00015946550414525155,
      "loss": 2.974,
      "step": 150957
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5231189727783203,
      "learning_rate": 0.00015946189018385738,
      "loss": 2.7945,
      "step": 150958
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.54807186126709,
      "learning_rate": 0.0001594582762485918,
      "loss": 2.9179,
      "step": 150959
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.8271100521087646,
      "learning_rate": 0.00015945466233945572,
      "loss": 2.8995,
      "step": 150960
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3309295177459717,
      "learning_rate": 0.00015945104845644953,
      "loss": 3.1563,
      "step": 150961
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3381595611572266,
      "learning_rate": 0.00015944743459957416,
      "loss": 3.0506,
      "step": 150962
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.382164478302002,
      "learning_rate": 0.00015944382076883007,
      "loss": 2.9411,
      "step": 150963
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0565195083618164,
      "learning_rate": 0.0001594402069642182,
      "loss": 3.0566,
      "step": 150964
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.349677801132202,
      "learning_rate": 0.00015943659318573886,
      "loss": 3.0514,
      "step": 150965
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.009341239929199,
      "learning_rate": 0.00015943297943339302,
      "loss": 3.0537,
      "step": 150966
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.2693049907684326,
      "learning_rate": 0.00015942936570718114,
      "loss": 2.8246,
      "step": 150967
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.443204879760742,
      "learning_rate": 0.0001594257520071041,
      "loss": 2.7673,
      "step": 150968
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.659499406814575,
      "learning_rate": 0.00015942213833316235,
      "loss": 2.8105,
      "step": 150969
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.6962594985961914,
      "learning_rate": 0.00015941852468535689,
      "loss": 3.1746,
      "step": 150970
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2862842082977295,
      "learning_rate": 0.000159414911063688,
      "loss": 2.703,
      "step": 150971
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1956844329833984,
      "learning_rate": 0.0001594112974681566,
      "loss": 3.045,
      "step": 150972
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.327676296234131,
      "learning_rate": 0.0001594076838987632,
      "loss": 2.7263,
      "step": 150973
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.0643310546875,
      "learning_rate": 0.00015940407035550867,
      "loss": 2.898,
      "step": 150974
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.22637677192688,
      "learning_rate": 0.00015940045683839347,
      "loss": 2.8944,
      "step": 150975
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.9151523113250732,
      "learning_rate": 0.00015939684334741858,
      "loss": 2.878,
      "step": 150976
    },
    {
      "epoch": 1.97,
      "grad_norm": 1.9839553833007812,
      "learning_rate": 0.00015939322988258426,
      "loss": 2.8836,
      "step": 150977
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.8448715209960938,
      "learning_rate": 0.00015938961644389155,
      "loss": 3.0875,
      "step": 150978
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.2466843128204346,
      "learning_rate": 0.00015938600303134077,
      "loss": 3.0839,
      "step": 150979
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.9035301208496094,
      "learning_rate": 0.00015938238964493295,
      "loss": 2.9432,
      "step": 150980
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.703705310821533,
      "learning_rate": 0.0001593787762846685,
      "loss": 2.8355,
      "step": 150981
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.185236930847168,
      "learning_rate": 0.00015937516295054838,
      "loss": 2.9903,
      "step": 150982
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.369697332382202,
      "learning_rate": 0.00015937154964257289,
      "loss": 2.8957,
      "step": 150983
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.200052738189697,
      "learning_rate": 0.00015936793636074295,
      "loss": 2.8814,
      "step": 150984
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.622411727905273,
      "learning_rate": 0.0001593643231050591,
      "loss": 2.8635,
      "step": 150985
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4767110347747803,
      "learning_rate": 0.00015936070987552213,
      "loss": 2.8415,
      "step": 150986
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.482363700866699,
      "learning_rate": 0.00015935709667213256,
      "loss": 2.8677,
      "step": 150987
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.3896665573120117,
      "learning_rate": 0.00015935348349489144,
      "loss": 2.9197,
      "step": 150988
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.916114330291748,
      "learning_rate": 0.00015934987034379886,
      "loss": 2.9002,
      "step": 150989
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.9926695823669434,
      "learning_rate": 0.00015934625721885593,
      "loss": 2.8204,
      "step": 150990
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.33536696434021,
      "learning_rate": 0.00015934264412006312,
      "loss": 2.8982,
      "step": 150991
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4408986568450928,
      "learning_rate": 0.00015933903104742124,
      "loss": 2.7317,
      "step": 150992
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.0848617553710938,
      "learning_rate": 0.0001593354180009308,
      "loss": 2.8507,
      "step": 150993
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.4798457622528076,
      "learning_rate": 0.00015933180498059276,
      "loss": 2.8255,
      "step": 150994
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1149985790252686,
      "learning_rate": 0.00015932819198640738,
      "loss": 2.7721,
      "step": 150995
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3633580207824707,
      "learning_rate": 0.00015932457901837564,
      "loss": 2.9021,
      "step": 150996
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1902661323547363,
      "learning_rate": 0.00015932096607649803,
      "loss": 3.1462,
      "step": 150997
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.4118683338165283,
      "learning_rate": 0.00015931735316077544,
      "loss": 2.9704,
      "step": 150998
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3835599422454834,
      "learning_rate": 0.00015931374027120825,
      "loss": 3.0821,
      "step": 150999
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0425844192504883,
      "learning_rate": 0.0001593101274077974,
      "loss": 2.9121,
      "step": 151000
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5826096534729004,
      "learning_rate": 0.00015930651457054347,
      "loss": 3.2077,
      "step": 151001
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.3159942626953125,
      "learning_rate": 0.00015930290175944714,
      "loss": 2.8614,
      "step": 151002
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.886582374572754,
      "learning_rate": 0.0001592992889745089,
      "loss": 3.015,
      "step": 151003
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.064880847930908,
      "learning_rate": 0.00015929567621572973,
      "loss": 2.8532,
      "step": 151004
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1644983291625977,
      "learning_rate": 0.00015929206348311002,
      "loss": 2.9601,
      "step": 151005
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1950080394744873,
      "learning_rate": 0.00015928845077665068,
      "loss": 2.9603,
      "step": 151006
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.7243459224700928,
      "learning_rate": 0.00015928483809635226,
      "loss": 3.0389,
      "step": 151007
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.5595381259918213,
      "learning_rate": 0.00015928122544221546,
      "loss": 2.9619,
      "step": 151008
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.08083438873291,
      "learning_rate": 0.00015927761281424083,
      "loss": 2.8576,
      "step": 151009
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.748751163482666,
      "learning_rate": 0.00015927400021242927,
      "loss": 3.0363,
      "step": 151010
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0095465183258057,
      "learning_rate": 0.0001592703876367812,
      "loss": 3.2468,
      "step": 151011
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.1000139713287354,
      "learning_rate": 0.00015926677508729754,
      "loss": 3.1834,
      "step": 151012
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.9578800201416016,
      "learning_rate": 0.00015926316256397886,
      "loss": 2.9269,
      "step": 151013
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.27874231338501,
      "learning_rate": 0.00015925955006682578,
      "loss": 2.8944,
      "step": 151014
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.8054964542388916,
      "learning_rate": 0.0001592559375958389,
      "loss": 3.0051,
      "step": 151015
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.138700485229492,
      "learning_rate": 0.00015925232515101915,
      "loss": 2.8319,
      "step": 151016
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.788067579269409,
      "learning_rate": 0.00015924871273236688,
      "loss": 2.8081,
      "step": 151017
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.243180990219116,
      "learning_rate": 0.00015924510033988307,
      "loss": 2.8924,
      "step": 151018
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6216325759887695,
      "learning_rate": 0.00015924148797356825,
      "loss": 2.7565,
      "step": 151019
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6275105476379395,
      "learning_rate": 0.00015923787563342312,
      "loss": 2.8677,
      "step": 151020
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.329373836517334,
      "learning_rate": 0.00015923426331944818,
      "loss": 3.0789,
      "step": 151021
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.26780104637146,
      "learning_rate": 0.00015923065103164437,
      "loss": 2.9595,
      "step": 151022
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.226677894592285,
      "learning_rate": 0.00015922703877001214,
      "loss": 2.7363,
      "step": 151023
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.984504461288452,
      "learning_rate": 0.00015922342653455234,
      "loss": 3.0135,
      "step": 151024
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1510488986968994,
      "learning_rate": 0.00015921981432526558,
      "loss": 2.8513,
      "step": 151025
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.331474542617798,
      "learning_rate": 0.00015921620214215244,
      "loss": 3.1228,
      "step": 151026
    },
    {
      "epoch": 1.97,
      "grad_norm": 6.238731861114502,
      "learning_rate": 0.00015921258998521373,
      "loss": 2.9502,
      "step": 151027
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.052525997161865,
      "learning_rate": 0.00015920897785445007,
      "loss": 3.052,
      "step": 151028
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.025783061981201,
      "learning_rate": 0.000159205365749862,
      "loss": 2.8591,
      "step": 151029
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.657413959503174,
      "learning_rate": 0.00015920175367145043,
      "loss": 2.7607,
      "step": 151030
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2512872219085693,
      "learning_rate": 0.0001591981416192159,
      "loss": 2.928,
      "step": 151031
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.0053014755249023,
      "learning_rate": 0.000159194529593159,
      "loss": 2.9776,
      "step": 151032
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.745133638381958,
      "learning_rate": 0.0001591909175932806,
      "loss": 2.9833,
      "step": 151033
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5670714378356934,
      "learning_rate": 0.00015918730561958127,
      "loss": 2.9176,
      "step": 151034
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.374046564102173,
      "learning_rate": 0.00015918369367206157,
      "loss": 2.9633,
      "step": 151035
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.189713954925537,
      "learning_rate": 0.00015918008175072235,
      "loss": 2.7665,
      "step": 151036
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2216711044311523,
      "learning_rate": 0.00015917646985556428,
      "loss": 2.8311,
      "step": 151037
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.7184841632843018,
      "learning_rate": 0.0001591728579865878,
      "loss": 3.0195,
      "step": 151038
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.559950113296509,
      "learning_rate": 0.00015916924614379387,
      "loss": 3.0871,
      "step": 151039
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.7029433250427246,
      "learning_rate": 0.00015916563432718291,
      "loss": 2.6875,
      "step": 151040
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.891272783279419,
      "learning_rate": 0.00015916202253675585,
      "loss": 3.0288,
      "step": 151041
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.544992208480835,
      "learning_rate": 0.00015915841077251324,
      "loss": 3.0454,
      "step": 151042
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.9720094203948975,
      "learning_rate": 0.00015915479903445574,
      "loss": 2.6906,
      "step": 151043
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2892189025878906,
      "learning_rate": 0.00015915118732258386,
      "loss": 2.9629,
      "step": 151044
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.254938840866089,
      "learning_rate": 0.00015914757563689858,
      "loss": 2.8165,
      "step": 151045
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.291618824005127,
      "learning_rate": 0.0001591439639774003,
      "loss": 2.8413,
      "step": 151046
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.439295530319214,
      "learning_rate": 0.00015914035234408994,
      "loss": 2.9889,
      "step": 151047
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.798236608505249,
      "learning_rate": 0.00015913674073696793,
      "loss": 2.9162,
      "step": 151048
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.121206283569336,
      "learning_rate": 0.0001591331291560353,
      "loss": 3.1009,
      "step": 151049
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6404738426208496,
      "learning_rate": 0.00015912951760129224,
      "loss": 3.1272,
      "step": 151050
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.004023551940918,
      "learning_rate": 0.0001591259060727398,
      "loss": 2.8704,
      "step": 151051
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.152216911315918,
      "learning_rate": 0.00015912229457037836,
      "loss": 3.0704,
      "step": 151052
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.0413713455200195,
      "learning_rate": 0.00015911868309420886,
      "loss": 3.0132,
      "step": 151053
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.625685691833496,
      "learning_rate": 0.00015911507164423176,
      "loss": 2.7464,
      "step": 151054
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.819460391998291,
      "learning_rate": 0.00015911146022044807,
      "loss": 2.7482,
      "step": 151055
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.4320638179779053,
      "learning_rate": 0.00015910784882285797,
      "loss": 2.9083,
      "step": 151056
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.381284475326538,
      "learning_rate": 0.0001591042374514625,
      "loss": 2.8885,
      "step": 151057
    },
    {
      "epoch": 1.97,
      "grad_norm": 1.9825439453125,
      "learning_rate": 0.0001591006261062621,
      "loss": 3.0763,
      "step": 151058
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4728775024414062,
      "learning_rate": 0.00015909701478725765,
      "loss": 2.9523,
      "step": 151059
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.394707441329956,
      "learning_rate": 0.0001590934034944496,
      "loss": 3.2265,
      "step": 151060
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2965283393859863,
      "learning_rate": 0.000159089792227839,
      "loss": 2.9673,
      "step": 151061
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.350961446762085,
      "learning_rate": 0.00015908618098742604,
      "loss": 3.139,
      "step": 151062
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.026901721954346,
      "learning_rate": 0.00015908256977321174,
      "loss": 3.0098,
      "step": 151063
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.156844139099121,
      "learning_rate": 0.00015907895858519651,
      "loss": 2.9009,
      "step": 151064
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.34604549407959,
      "learning_rate": 0.00015907534742338127,
      "loss": 3.0031,
      "step": 151065
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.735532760620117,
      "learning_rate": 0.0001590717362877665,
      "loss": 2.8326,
      "step": 151066
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.7443294525146484,
      "learning_rate": 0.00015906812517835306,
      "loss": 2.8882,
      "step": 151067
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0144898891448975,
      "learning_rate": 0.00015906451409514153,
      "loss": 2.9382,
      "step": 151068
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0520358085632324,
      "learning_rate": 0.00015906090303813256,
      "loss": 2.9387,
      "step": 151069
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5065410137176514,
      "learning_rate": 0.00015905729200732668,
      "loss": 2.9659,
      "step": 151070
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.213764190673828,
      "learning_rate": 0.0001590536810027249,
      "loss": 2.9386,
      "step": 151071
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.111971616744995,
      "learning_rate": 0.00015905007002432753,
      "loss": 2.7942,
      "step": 151072
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0044093132019043,
      "learning_rate": 0.00015904645907213554,
      "loss": 2.9955,
      "step": 151073
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.288970947265625,
      "learning_rate": 0.00015904284814614945,
      "loss": 2.9376,
      "step": 151074
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6424248218536377,
      "learning_rate": 0.00015903923724636996,
      "loss": 2.9814,
      "step": 151075
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.4769554138183594,
      "learning_rate": 0.00015903562637279765,
      "loss": 2.9519,
      "step": 151076
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.304670572280884,
      "learning_rate": 0.00015903201552543337,
      "loss": 2.8501,
      "step": 151077
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4693009853363037,
      "learning_rate": 0.0001590284047042776,
      "loss": 2.7803,
      "step": 151078
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.704545259475708,
      "learning_rate": 0.00015902479390933122,
      "loss": 2.8721,
      "step": 151079
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4501147270202637,
      "learning_rate": 0.00015902118314059478,
      "loss": 2.7389,
      "step": 151080
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.28755521774292,
      "learning_rate": 0.00015901757239806898,
      "loss": 2.8013,
      "step": 151081
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.0123631954193115,
      "learning_rate": 0.00015901396168175433,
      "loss": 2.9686,
      "step": 151082
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.991664171218872,
      "learning_rate": 0.0001590103509916518,
      "loss": 2.9527,
      "step": 151083
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.9890849590301514,
      "learning_rate": 0.0001590067403277618,
      "loss": 3.1336,
      "step": 151084
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3253896236419678,
      "learning_rate": 0.00015900312969008518,
      "loss": 3.3383,
      "step": 151085
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.986213445663452,
      "learning_rate": 0.00015899951907862253,
      "loss": 2.9614,
      "step": 151086
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1829943656921387,
      "learning_rate": 0.0001589959084933746,
      "loss": 2.8579,
      "step": 151087
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.233647108078003,
      "learning_rate": 0.0001589922979343418,
      "loss": 3.0347,
      "step": 151088
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.4132139682769775,
      "learning_rate": 0.00015898868740152515,
      "loss": 2.9748,
      "step": 151089
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3303284645080566,
      "learning_rate": 0.00015898507689492505,
      "loss": 2.9842,
      "step": 151090
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.6855380535125732,
      "learning_rate": 0.00015898146641454242,
      "loss": 3.0573,
      "step": 151091
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.166268348693848,
      "learning_rate": 0.00015897785596037775,
      "loss": 2.6599,
      "step": 151092
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.41155743598938,
      "learning_rate": 0.0001589742455324318,
      "loss": 3.1068,
      "step": 151093
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.371128797531128,
      "learning_rate": 0.00015897063513070509,
      "loss": 2.9495,
      "step": 151094
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.9564104080200195,
      "learning_rate": 0.00015896702475519847,
      "loss": 2.8381,
      "step": 151095
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.3111326694488525,
      "learning_rate": 0.00015896341440591247,
      "loss": 2.9058,
      "step": 151096
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.658158302307129,
      "learning_rate": 0.0001589598040828479,
      "loss": 3.0018,
      "step": 151097
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.14762544631958,
      "learning_rate": 0.00015895619378600542,
      "loss": 3.0174,
      "step": 151098
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.354408025741577,
      "learning_rate": 0.00015895258351538563,
      "loss": 2.887,
      "step": 151099
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.388204574584961,
      "learning_rate": 0.00015894897327098907,
      "loss": 2.9403,
      "step": 151100
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.9898366928100586,
      "learning_rate": 0.00015894536305281672,
      "loss": 2.9294,
      "step": 151101
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.38955020904541,
      "learning_rate": 0.00015894175286086892,
      "loss": 2.8735,
      "step": 151102
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0589754581451416,
      "learning_rate": 0.00015893814269514668,
      "loss": 2.9962,
      "step": 151103
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5679032802581787,
      "learning_rate": 0.00015893453255565046,
      "loss": 3.021,
      "step": 151104
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5630431175231934,
      "learning_rate": 0.00015893092244238102,
      "loss": 2.8509,
      "step": 151105
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.474123001098633,
      "learning_rate": 0.0001589273123553388,
      "loss": 2.7822,
      "step": 151106
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1997270584106445,
      "learning_rate": 0.00015892370229452483,
      "loss": 2.8778,
      "step": 151107
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.65126895904541,
      "learning_rate": 0.0001589200922599395,
      "loss": 2.9849,
      "step": 151108
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5156264305114746,
      "learning_rate": 0.00015891648225158363,
      "loss": 2.8165,
      "step": 151109
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4264137744903564,
      "learning_rate": 0.0001589128722694579,
      "loss": 3.0512,
      "step": 151110
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4336724281311035,
      "learning_rate": 0.00015890926231356293,
      "loss": 3.09,
      "step": 151111
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0711207389831543,
      "learning_rate": 0.00015890565238389927,
      "loss": 2.7759,
      "step": 151112
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.9118576049804688,
      "learning_rate": 0.00015890204248046784,
      "loss": 2.7618,
      "step": 151113
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3968498706817627,
      "learning_rate": 0.00015889843260326905,
      "loss": 2.9274,
      "step": 151114
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.163618564605713,
      "learning_rate": 0.00015889482275230384,
      "loss": 2.8478,
      "step": 151115
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3806722164154053,
      "learning_rate": 0.00015889121292757272,
      "loss": 2.9148,
      "step": 151116
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.159357786178589,
      "learning_rate": 0.0001588876031290763,
      "loss": 2.9744,
      "step": 151117
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4928290843963623,
      "learning_rate": 0.00015888399335681544,
      "loss": 2.9438,
      "step": 151118
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2542355060577393,
      "learning_rate": 0.0001588803836107907,
      "loss": 3.0108,
      "step": 151119
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.673238754272461,
      "learning_rate": 0.00015887677389100267,
      "loss": 2.9317,
      "step": 151120
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.59657621383667,
      "learning_rate": 0.0001588731641974522,
      "loss": 3.075,
      "step": 151121
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4127328395843506,
      "learning_rate": 0.00015886955453013986,
      "loss": 2.995,
      "step": 151122
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.62751841545105,
      "learning_rate": 0.00015886594488906627,
      "loss": 3.0367,
      "step": 151123
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.726410150527954,
      "learning_rate": 0.00015886233527423227,
      "loss": 2.7619,
      "step": 151124
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.374098539352417,
      "learning_rate": 0.00015885872568563827,
      "loss": 3.0489,
      "step": 151125
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1038851737976074,
      "learning_rate": 0.00015885511612328523,
      "loss": 2.8584,
      "step": 151126
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0394773483276367,
      "learning_rate": 0.00015885150658717369,
      "loss": 3.2702,
      "step": 151127
    },
    {
      "epoch": 1.97,
      "grad_norm": 1.9442095756530762,
      "learning_rate": 0.00015884789707730436,
      "loss": 3.0919,
      "step": 151128
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.514397144317627,
      "learning_rate": 0.0001588442875936777,
      "loss": 2.8735,
      "step": 151129
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5143613815307617,
      "learning_rate": 0.00015884067813629467,
      "loss": 2.8933,
      "step": 151130
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3308818340301514,
      "learning_rate": 0.00015883706870515577,
      "loss": 3.0217,
      "step": 151131
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.7095279693603516,
      "learning_rate": 0.00015883345930026177,
      "loss": 2.9664,
      "step": 151132
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3265926837921143,
      "learning_rate": 0.0001588298499216133,
      "loss": 3.1267,
      "step": 151133
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3052403926849365,
      "learning_rate": 0.00015882624056921094,
      "loss": 3.1781,
      "step": 151134
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.547528028488159,
      "learning_rate": 0.00015882263124305553,
      "loss": 2.8615,
      "step": 151135
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.501054048538208,
      "learning_rate": 0.0001588190219431477,
      "loss": 3.0531,
      "step": 151136
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2370941638946533,
      "learning_rate": 0.00015881541266948794,
      "loss": 3.1496,
      "step": 151137
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3440802097320557,
      "learning_rate": 0.00015881180342207715,
      "loss": 3.1589,
      "step": 151138
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.254580020904541,
      "learning_rate": 0.00015880819420091582,
      "loss": 2.8621,
      "step": 151139
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4400694370269775,
      "learning_rate": 0.00015880458500600484,
      "loss": 2.923,
      "step": 151140
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5017788410186768,
      "learning_rate": 0.00015880097583734475,
      "loss": 2.974,
      "step": 151141
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.725905179977417,
      "learning_rate": 0.0001587973666949362,
      "loss": 2.9576,
      "step": 151142
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.071990489959717,
      "learning_rate": 0.0001587937575787798,
      "loss": 2.8205,
      "step": 151143
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.0461816787719727,
      "learning_rate": 0.0001587901484888764,
      "loss": 2.8527,
      "step": 151144
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2967946529388428,
      "learning_rate": 0.00015878653942522647,
      "loss": 2.9024,
      "step": 151145
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2710492610931396,
      "learning_rate": 0.00015878293038783088,
      "loss": 3.0236,
      "step": 151146
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.830157518386841,
      "learning_rate": 0.00015877932137669024,
      "loss": 2.8959,
      "step": 151147
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2691380977630615,
      "learning_rate": 0.0001587757123918052,
      "loss": 2.9206,
      "step": 151148
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.166443109512329,
      "learning_rate": 0.00015877210343317625,
      "loss": 2.848,
      "step": 151149
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.4678561687469482,
      "learning_rate": 0.00015876849450080437,
      "loss": 2.9361,
      "step": 151150
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4852631092071533,
      "learning_rate": 0.00015876488559469002,
      "loss": 2.8041,
      "step": 151151
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.633254051208496,
      "learning_rate": 0.000158761276714834,
      "loss": 2.7602,
      "step": 151152
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3107545375823975,
      "learning_rate": 0.00015875766786123697,
      "loss": 3.2741,
      "step": 151153
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.432447671890259,
      "learning_rate": 0.00015875405903389956,
      "loss": 2.7001,
      "step": 151154
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.45509672164917,
      "learning_rate": 0.0001587504502328223,
      "loss": 2.7106,
      "step": 151155
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.1357192993164062,
      "learning_rate": 0.0001587468414580061,
      "loss": 2.9359,
      "step": 151156
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.246039390563965,
      "learning_rate": 0.0001587432327094514,
      "loss": 3.3081,
      "step": 151157
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.520374298095703,
      "learning_rate": 0.00015873962398715917,
      "loss": 3.0436,
      "step": 151158
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.353870153427124,
      "learning_rate": 0.0001587360152911299,
      "loss": 2.9655,
      "step": 151159
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.221935272216797,
      "learning_rate": 0.0001587324066213642,
      "loss": 2.944,
      "step": 151160
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.180737257003784,
      "learning_rate": 0.00015872879797786279,
      "loss": 2.8676,
      "step": 151161
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.263007640838623,
      "learning_rate": 0.00015872518936062643,
      "loss": 3.1703,
      "step": 151162
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.0669777393341064,
      "learning_rate": 0.0001587215807696556,
      "loss": 2.731,
      "step": 151163
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.469111442565918,
      "learning_rate": 0.00015871797220495126,
      "loss": 2.8593,
      "step": 151164
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.976715564727783,
      "learning_rate": 0.00015871436366651388,
      "loss": 3.0765,
      "step": 151165
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5552115440368652,
      "learning_rate": 0.00015871075515434414,
      "loss": 3.0311,
      "step": 151166
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.64906644821167,
      "learning_rate": 0.00015870714666844267,
      "loss": 2.9393,
      "step": 151167
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6702635288238525,
      "learning_rate": 0.0001587035382088103,
      "loss": 3.0856,
      "step": 151168
    },
    {
      "epoch": 1.97,
      "grad_norm": 1.9425411224365234,
      "learning_rate": 0.00015869992977544752,
      "loss": 2.5943,
      "step": 151169
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.8222992420196533,
      "learning_rate": 0.00015869632136835519,
      "loss": 2.9137,
      "step": 151170
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3115930557250977,
      "learning_rate": 0.00015869271298753386,
      "loss": 2.9258,
      "step": 151171
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.2617833614349365,
      "learning_rate": 0.00015868910463298424,
      "loss": 3.0393,
      "step": 151172
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.372161388397217,
      "learning_rate": 0.00015868549630470685,
      "loss": 3.049,
      "step": 151173
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.13679838180542,
      "learning_rate": 0.00015868188800270268,
      "loss": 3.0989,
      "step": 151174
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.098362445831299,
      "learning_rate": 0.00015867827972697201,
      "loss": 3.1608,
      "step": 151175
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.351522207260132,
      "learning_rate": 0.00015867467147751585,
      "loss": 3.0362,
      "step": 151176
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.69083833694458,
      "learning_rate": 0.00015867106325433478,
      "loss": 2.8193,
      "step": 151177
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.707793951034546,
      "learning_rate": 0.0001586674550574294,
      "loss": 2.7436,
      "step": 151178
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0791945457458496,
      "learning_rate": 0.00015866384688680027,
      "loss": 3.0405,
      "step": 151179
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.12717342376709,
      "learning_rate": 0.00015866023874244834,
      "loss": 2.9133,
      "step": 151180
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4778835773468018,
      "learning_rate": 0.00015865663062437403,
      "loss": 3.1353,
      "step": 151181
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3265504837036133,
      "learning_rate": 0.00015865302253257823,
      "loss": 2.81,
      "step": 151182
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5341296195983887,
      "learning_rate": 0.0001586494144670615,
      "loss": 2.9342,
      "step": 151183
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.108654499053955,
      "learning_rate": 0.0001586458064278245,
      "loss": 3.0584,
      "step": 151184
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.7111947536468506,
      "learning_rate": 0.0001586421984148678,
      "loss": 2.8027,
      "step": 151185
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.8662123680114746,
      "learning_rate": 0.00015863859042819232,
      "loss": 3.1428,
      "step": 151186
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5867316722869873,
      "learning_rate": 0.00015863498246779844,
      "loss": 2.6894,
      "step": 151187
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1477303504943848,
      "learning_rate": 0.00015863137453368717,
      "loss": 3.2156,
      "step": 151188
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.169229030609131,
      "learning_rate": 0.00015862776662585896,
      "loss": 2.9255,
      "step": 151189
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.254957675933838,
      "learning_rate": 0.0001586241587443145,
      "loss": 3.0256,
      "step": 151190
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2996034622192383,
      "learning_rate": 0.00015862055088905434,
      "loss": 2.9866,
      "step": 151191
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.7168376445770264,
      "learning_rate": 0.00015861694306007945,
      "loss": 2.868,
      "step": 151192
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5070383548736572,
      "learning_rate": 0.0001586133352573902,
      "loss": 2.8855,
      "step": 151193
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.136277675628662,
      "learning_rate": 0.00015860972748098754,
      "loss": 3.0846,
      "step": 151194
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.878821611404419,
      "learning_rate": 0.00015860611973087196,
      "loss": 2.8742,
      "step": 151195
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2819178104400635,
      "learning_rate": 0.00015860251200704422,
      "loss": 2.8505,
      "step": 151196
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.307003974914551,
      "learning_rate": 0.00015859890430950478,
      "loss": 3.138,
      "step": 151197
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0990617275238037,
      "learning_rate": 0.00015859529663825463,
      "loss": 3.0622,
      "step": 151198
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.78528094291687,
      "learning_rate": 0.00015859168899329414,
      "loss": 2.8505,
      "step": 151199
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4789037704467773,
      "learning_rate": 0.00015858808137462428,
      "loss": 3.1387,
      "step": 151200
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1787166595458984,
      "learning_rate": 0.00015858447378224537,
      "loss": 2.8785,
      "step": 151201
    },
    {
      "epoch": 1.97,
      "grad_norm": 1.9326783418655396,
      "learning_rate": 0.00015858086621615844,
      "loss": 2.9877,
      "step": 151202
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2530875205993652,
      "learning_rate": 0.00015857725867636402,
      "loss": 2.8316,
      "step": 151203
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4585235118865967,
      "learning_rate": 0.0001585736511628627,
      "loss": 2.9371,
      "step": 151204
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.334799289703369,
      "learning_rate": 0.00015857004367565513,
      "loss": 2.8551,
      "step": 151205
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1894402503967285,
      "learning_rate": 0.00015856643621474218,
      "loss": 3.0041,
      "step": 151206
    },
    {
      "epoch": 1.97,
      "grad_norm": 1.9447892904281616,
      "learning_rate": 0.00015856282878012426,
      "loss": 3.1962,
      "step": 151207
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.5212996006011963,
      "learning_rate": 0.00015855922137180233,
      "loss": 2.9503,
      "step": 151208
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.067755222320557,
      "learning_rate": 0.0001585556139897769,
      "loss": 2.9855,
      "step": 151209
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3651959896087646,
      "learning_rate": 0.0001585520066340485,
      "loss": 2.9723,
      "step": 151210
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.277054786682129,
      "learning_rate": 0.0001585483993046181,
      "loss": 3.0065,
      "step": 151211
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.36386775970459,
      "learning_rate": 0.0001585447920014862,
      "loss": 3.0981,
      "step": 151212
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3800714015960693,
      "learning_rate": 0.00015854118472465343,
      "loss": 3.1645,
      "step": 151213
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.708728790283203,
      "learning_rate": 0.00015853757747412057,
      "loss": 2.9312,
      "step": 151214
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5913431644439697,
      "learning_rate": 0.0001585339702498883,
      "loss": 3.0647,
      "step": 151215
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3183369636535645,
      "learning_rate": 0.00015853036305195708,
      "loss": 2.8977,
      "step": 151216
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.336531162261963,
      "learning_rate": 0.00015852675588032787,
      "loss": 3.1722,
      "step": 151217
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.987879514694214,
      "learning_rate": 0.0001585231487350012,
      "loss": 2.992,
      "step": 151218
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.21213436126709,
      "learning_rate": 0.00015851954161597763,
      "loss": 3.0111,
      "step": 151219
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3968780040740967,
      "learning_rate": 0.00015851593452325806,
      "loss": 3.0697,
      "step": 151220
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.7193593978881836,
      "learning_rate": 0.0001585123274568431,
      "loss": 2.9487,
      "step": 151221
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4496095180511475,
      "learning_rate": 0.00015850872041673318,
      "loss": 2.8508,
      "step": 151222
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2413837909698486,
      "learning_rate": 0.00015850511340292933,
      "loss": 2.9942,
      "step": 151223
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2900822162628174,
      "learning_rate": 0.0001585015064154319,
      "loss": 2.9768,
      "step": 151224
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2698936462402344,
      "learning_rate": 0.00015849789945424184,
      "loss": 2.9737,
      "step": 151225
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0743634700775146,
      "learning_rate": 0.0001584942925193597,
      "loss": 2.8913,
      "step": 151226
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2258875370025635,
      "learning_rate": 0.00015849068561078612,
      "loss": 3.1984,
      "step": 151227
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4539151191711426,
      "learning_rate": 0.0001584870787285217,
      "loss": 2.9789,
      "step": 151228
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.0986928939819336,
      "learning_rate": 0.00015848347187256729,
      "loss": 3.0845,
      "step": 151229
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2606616020202637,
      "learning_rate": 0.0001584798650429234,
      "loss": 3.0509,
      "step": 151230
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3477721214294434,
      "learning_rate": 0.00015847625823959088,
      "loss": 2.8418,
      "step": 151231
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.9741499423980713,
      "learning_rate": 0.00015847265146257025,
      "loss": 2.8262,
      "step": 151232
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5180351734161377,
      "learning_rate": 0.00015846904471186225,
      "loss": 3.0715,
      "step": 151233
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.363863706588745,
      "learning_rate": 0.00015846543798746744,
      "loss": 3.0159,
      "step": 151234
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5471818447113037,
      "learning_rate": 0.00015846183128938665,
      "loss": 3.09,
      "step": 151235
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.732008457183838,
      "learning_rate": 0.00015845822461762042,
      "loss": 3.0394,
      "step": 151236
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.8496363162994385,
      "learning_rate": 0.00015845461797216951,
      "loss": 2.8546,
      "step": 151237
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.7342448234558105,
      "learning_rate": 0.00015845101135303463,
      "loss": 3.0052,
      "step": 151238
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4437637329101562,
      "learning_rate": 0.00015844740476021635,
      "loss": 2.8695,
      "step": 151239
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.17885684967041,
      "learning_rate": 0.00015844379819371525,
      "loss": 3.1241,
      "step": 151240
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0014541149139404,
      "learning_rate": 0.00015844019165353224,
      "loss": 2.9049,
      "step": 151241
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.646174430847168,
      "learning_rate": 0.00015843658513966776,
      "loss": 2.9494,
      "step": 151242
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6646008491516113,
      "learning_rate": 0.0001584329786521227,
      "loss": 2.8524,
      "step": 151243
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.243191957473755,
      "learning_rate": 0.00015842937219089762,
      "loss": 3.0784,
      "step": 151244
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.121244430541992,
      "learning_rate": 0.00015842576575599322,
      "loss": 3.0916,
      "step": 151245
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2933168411254883,
      "learning_rate": 0.00015842215934741002,
      "loss": 2.9634,
      "step": 151246
    },
    {
      "epoch": 1.97,
      "grad_norm": 1.9060231447219849,
      "learning_rate": 0.0001584185529651489,
      "loss": 3.0783,
      "step": 151247
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.267258405685425,
      "learning_rate": 0.0001584149466092103,
      "loss": 3.1325,
      "step": 151248
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.258939504623413,
      "learning_rate": 0.00015841134027959523,
      "loss": 2.6922,
      "step": 151249
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3805556297302246,
      "learning_rate": 0.0001584077339763041,
      "loss": 3.0396,
      "step": 151250
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.377424478530884,
      "learning_rate": 0.0001584041276993377,
      "loss": 3.147,
      "step": 151251
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4081315994262695,
      "learning_rate": 0.00015840052144869648,
      "loss": 3.1457,
      "step": 151252
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3964993953704834,
      "learning_rate": 0.0001583969152243814,
      "loss": 2.8895,
      "step": 151253
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4016504287719727,
      "learning_rate": 0.00015839330902639293,
      "loss": 2.8954,
      "step": 151254
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1064560413360596,
      "learning_rate": 0.00015838970285473187,
      "loss": 3.0583,
      "step": 151255
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.584869146347046,
      "learning_rate": 0.0001583860967093989,
      "loss": 3.0155,
      "step": 151256
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2220146656036377,
      "learning_rate": 0.00015838249059039458,
      "loss": 2.9494,
      "step": 151257
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3565008640289307,
      "learning_rate": 0.00015837888449771954,
      "loss": 2.8806,
      "step": 151258
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.42680025100708,
      "learning_rate": 0.00015837527843137465,
      "loss": 2.9822,
      "step": 151259
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.418449878692627,
      "learning_rate": 0.00015837167239136037,
      "loss": 3.2967,
      "step": 151260
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.712141752243042,
      "learning_rate": 0.00015836806637767754,
      "loss": 2.8484,
      "step": 151261
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5386087894439697,
      "learning_rate": 0.00015836446039032683,
      "loss": 2.9705,
      "step": 151262
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.324904680252075,
      "learning_rate": 0.00015836085442930877,
      "loss": 3.1661,
      "step": 151263
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.960050344467163,
      "learning_rate": 0.000158357248494624,
      "loss": 2.8421,
      "step": 151264
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.886075258255005,
      "learning_rate": 0.0001583536425862734,
      "loss": 3.062,
      "step": 151265
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5434443950653076,
      "learning_rate": 0.00015835003670425744,
      "loss": 2.6223,
      "step": 151266
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.389768362045288,
      "learning_rate": 0.000158346430848577,
      "loss": 2.81,
      "step": 151267
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.792597770690918,
      "learning_rate": 0.00015834282501923252,
      "loss": 3.0974,
      "step": 151268
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.095170021057129,
      "learning_rate": 0.000158339219216225,
      "loss": 2.865,
      "step": 151269
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.825180768966675,
      "learning_rate": 0.00015833561343955464,
      "loss": 2.9096,
      "step": 151270
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.760850667953491,
      "learning_rate": 0.00015833200768922254,
      "loss": 3.075,
      "step": 151271
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.8135342597961426,
      "learning_rate": 0.000158328401965229,
      "loss": 2.896,
      "step": 151272
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.570590019226074,
      "learning_rate": 0.00015832479626757507,
      "loss": 2.7088,
      "step": 151273
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.877929449081421,
      "learning_rate": 0.0001583211905962611,
      "loss": 2.7393,
      "step": 151274
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.457648992538452,
      "learning_rate": 0.0001583175849512881,
      "loss": 3.1828,
      "step": 151275
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.444845676422119,
      "learning_rate": 0.00015831397933265633,
      "loss": 2.7319,
      "step": 151276
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3941805362701416,
      "learning_rate": 0.0001583103737403668,
      "loss": 2.8891,
      "step": 151277
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.2883851528167725,
      "learning_rate": 0.0001583067681744199,
      "loss": 2.9655,
      "step": 151278
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.787210464477539,
      "learning_rate": 0.00015830316263481655,
      "loss": 3.0508,
      "step": 151279
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.189674139022827,
      "learning_rate": 0.00015829955712155723,
      "loss": 2.8673,
      "step": 151280
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1496360301971436,
      "learning_rate": 0.0001582959516346429,
      "loss": 2.9823,
      "step": 151281
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2282891273498535,
      "learning_rate": 0.00015829234617407382,
      "loss": 2.9143,
      "step": 151282
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.111149311065674,
      "learning_rate": 0.000158288740739851,
      "loss": 3.0988,
      "step": 151283
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.269047260284424,
      "learning_rate": 0.0001582851353319748,
      "loss": 2.745,
      "step": 151284
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.0973706245422363,
      "learning_rate": 0.0001582815299504462,
      "loss": 3.0705,
      "step": 151285
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5929813385009766,
      "learning_rate": 0.00015827792459526568,
      "loss": 3.0895,
      "step": 151286
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.898857593536377,
      "learning_rate": 0.00015827431926643406,
      "loss": 2.9169,
      "step": 151287
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.603182077407837,
      "learning_rate": 0.0001582707139639519,
      "loss": 2.841,
      "step": 151288
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.4478440284729004,
      "learning_rate": 0.00015826710868781986,
      "loss": 2.9772,
      "step": 151289
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.324760675430298,
      "learning_rate": 0.00015826350343803858,
      "loss": 3.0542,
      "step": 151290
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3346848487854004,
      "learning_rate": 0.0001582598982146089,
      "loss": 2.9922,
      "step": 151291
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1282718181610107,
      "learning_rate": 0.00015825629301753123,
      "loss": 3.1365,
      "step": 151292
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4600586891174316,
      "learning_rate": 0.00015825268784680653,
      "loss": 2.9718,
      "step": 151293
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.443023681640625,
      "learning_rate": 0.00015824908270243538,
      "loss": 2.9313,
      "step": 151294
    },
    {
      "epoch": 1.97,
      "grad_norm": 1.8904732465744019,
      "learning_rate": 0.00015824547758441833,
      "loss": 2.7473,
      "step": 151295
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.326692581176758,
      "learning_rate": 0.00015824187249275599,
      "loss": 3.0136,
      "step": 151296
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.282557249069214,
      "learning_rate": 0.00015823826742744932,
      "loss": 2.9682,
      "step": 151297
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5715465545654297,
      "learning_rate": 0.00015823466238849872,
      "loss": 2.8151,
      "step": 151298
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.093803882598877,
      "learning_rate": 0.0001582310573759051,
      "loss": 3.0414,
      "step": 151299
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.401139974594116,
      "learning_rate": 0.00015822745238966898,
      "loss": 3.035,
      "step": 151300
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.097215175628662,
      "learning_rate": 0.0001582238474297909,
      "loss": 2.9472,
      "step": 151301
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.215330123901367,
      "learning_rate": 0.00015822024249627188,
      "loss": 2.9374,
      "step": 151302
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.7246127128601074,
      "learning_rate": 0.00015821663758911237,
      "loss": 3.2097,
      "step": 151303
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1187121868133545,
      "learning_rate": 0.0001582130327083129,
      "loss": 2.8414,
      "step": 151304
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4408915042877197,
      "learning_rate": 0.00015820942785387443,
      "loss": 2.9901,
      "step": 151305
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.253544569015503,
      "learning_rate": 0.00015820582302579757,
      "loss": 2.7844,
      "step": 151306
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.410768985748291,
      "learning_rate": 0.00015820221822408274,
      "loss": 2.8807,
      "step": 151307
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.346505880355835,
      "learning_rate": 0.0001581986134487309,
      "loss": 2.868,
      "step": 151308
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3995697498321533,
      "learning_rate": 0.00015819500869974253,
      "loss": 2.9566,
      "step": 151309
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.729076385498047,
      "learning_rate": 0.0001581914039771185,
      "loss": 3.0042,
      "step": 151310
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.964799165725708,
      "learning_rate": 0.00015818779928085936,
      "loss": 2.9449,
      "step": 151311
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4811346530914307,
      "learning_rate": 0.00015818419461096579,
      "loss": 2.9584,
      "step": 151312
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.941291093826294,
      "learning_rate": 0.00015818058996743835,
      "loss": 3.0077,
      "step": 151313
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.14771032333374,
      "learning_rate": 0.00015817698535027793,
      "loss": 3.1073,
      "step": 151314
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5684518814086914,
      "learning_rate": 0.00015817338075948498,
      "loss": 2.9554,
      "step": 151315
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5830368995666504,
      "learning_rate": 0.00015816977619506037,
      "loss": 2.9615,
      "step": 151316
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.10866379737854,
      "learning_rate": 0.00015816617165700467,
      "loss": 2.8899,
      "step": 151317
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.038846969604492,
      "learning_rate": 0.00015816256714531855,
      "loss": 2.9894,
      "step": 151318
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.4135000705718994,
      "learning_rate": 0.0001581589626600026,
      "loss": 3.017,
      "step": 151319
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3991386890411377,
      "learning_rate": 0.00015815535820105768,
      "loss": 3.183,
      "step": 151320
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.9494850635528564,
      "learning_rate": 0.0001581517537684842,
      "loss": 3.1313,
      "step": 151321
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.695021390914917,
      "learning_rate": 0.00015814814936228314,
      "loss": 3.0369,
      "step": 151322
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.8316919803619385,
      "learning_rate": 0.000158144544982455,
      "loss": 3.036,
      "step": 151323
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.9313888549804688,
      "learning_rate": 0.0001581409406290005,
      "loss": 3.0987,
      "step": 151324
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.566394329071045,
      "learning_rate": 0.00015813733630192016,
      "loss": 2.8697,
      "step": 151325
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6237800121307373,
      "learning_rate": 0.00015813373200121485,
      "loss": 3.0607,
      "step": 151326
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.9088566303253174,
      "learning_rate": 0.00015813012772688506,
      "loss": 2.8752,
      "step": 151327
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.216242790222168,
      "learning_rate": 0.00015812652347893167,
      "loss": 3.0862,
      "step": 151328
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.139993667602539,
      "learning_rate": 0.00015812291925735523,
      "loss": 2.9947,
      "step": 151329
    },
    {
      "epoch": 1.97,
      "grad_norm": 1.9690897464752197,
      "learning_rate": 0.00015811931506215645,
      "loss": 3.1782,
      "step": 151330
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.253777265548706,
      "learning_rate": 0.0001581157108933358,
      "loss": 2.8469,
      "step": 151331
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2098660469055176,
      "learning_rate": 0.0001581121067508943,
      "loss": 3.1029,
      "step": 151332
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.823148012161255,
      "learning_rate": 0.00015810850263483223,
      "loss": 3.0095,
      "step": 151333
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.2061190605163574,
      "learning_rate": 0.00015810489854515067,
      "loss": 2.7382,
      "step": 151334
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.57112193107605,
      "learning_rate": 0.00015810129448184994,
      "loss": 3.1969,
      "step": 151335
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2413086891174316,
      "learning_rate": 0.0001580976904449311,
      "loss": 2.9769,
      "step": 151336
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.595472812652588,
      "learning_rate": 0.0001580940864343943,
      "loss": 3.1676,
      "step": 151337
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2427711486816406,
      "learning_rate": 0.00015809048245024065,
      "loss": 3.0416,
      "step": 151338
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.162879228591919,
      "learning_rate": 0.0001580868784924705,
      "loss": 2.9526,
      "step": 151339
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.289520502090454,
      "learning_rate": 0.00015808327456108486,
      "loss": 3.1153,
      "step": 151340
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.086634874343872,
      "learning_rate": 0.00015807967065608407,
      "loss": 2.991,
      "step": 151341
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1754229068756104,
      "learning_rate": 0.00015807606677746916,
      "loss": 2.9144,
      "step": 151342
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.199638605117798,
      "learning_rate": 0.0001580724629252404,
      "loss": 2.8678,
      "step": 151343
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4771461486816406,
      "learning_rate": 0.0001580688590993987,
      "loss": 2.8907,
      "step": 151344
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.9768500328063965,
      "learning_rate": 0.0001580652552999446,
      "loss": 2.9315,
      "step": 151345
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.126117706298828,
      "learning_rate": 0.00015806165152687895,
      "loss": 3.0594,
      "step": 151346
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.8533549308776855,
      "learning_rate": 0.00015805804778020222,
      "loss": 2.8475,
      "step": 151347
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3404948711395264,
      "learning_rate": 0.00015805444405991544,
      "loss": 3.0832,
      "step": 151348
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4889862537384033,
      "learning_rate": 0.00015805084036601874,
      "loss": 3.0573,
      "step": 151349
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.290907382965088,
      "learning_rate": 0.0001580472366985132,
      "loss": 2.9601,
      "step": 151350
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4887170791625977,
      "learning_rate": 0.00015804363305739922,
      "loss": 3.0745,
      "step": 151351
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.3134469985961914,
      "learning_rate": 0.00015804002944267773,
      "loss": 2.9697,
      "step": 151352
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.499354124069214,
      "learning_rate": 0.0001580364258543492,
      "loss": 3.0533,
      "step": 151353
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1895575523376465,
      "learning_rate": 0.00015803282229241461,
      "loss": 2.8573,
      "step": 151354
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.426375150680542,
      "learning_rate": 0.00015802921875687412,
      "loss": 3.0059,
      "step": 151355
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1053121089935303,
      "learning_rate": 0.00015802561524772883,
      "loss": 3.0964,
      "step": 151356
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.0097758769989014,
      "learning_rate": 0.0001580220117649791,
      "loss": 3.122,
      "step": 151357
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1489200592041016,
      "learning_rate": 0.0001580184083086259,
      "loss": 3.0039,
      "step": 151358
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.668617010116577,
      "learning_rate": 0.00015801480487866965,
      "loss": 2.8472,
      "step": 151359
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4282801151275635,
      "learning_rate": 0.00015801120147511136,
      "loss": 2.9712,
      "step": 151360
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1709115505218506,
      "learning_rate": 0.0001580075980979512,
      "loss": 2.8943,
      "step": 151361
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4453258514404297,
      "learning_rate": 0.00015800399474719028,
      "loss": 2.7813,
      "step": 151362
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.89143705368042,
      "learning_rate": 0.00015800039142282896,
      "loss": 2.9635,
      "step": 151363
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.300663709640503,
      "learning_rate": 0.00015799678812486814,
      "loss": 2.8883,
      "step": 151364
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.7945594787597656,
      "learning_rate": 0.00015799318485330832,
      "loss": 3.0359,
      "step": 151365
    },
    {
      "epoch": 1.97,
      "grad_norm": 1.9418954849243164,
      "learning_rate": 0.00015798958160815048,
      "loss": 2.7167,
      "step": 151366
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.074610471725464,
      "learning_rate": 0.00015798597838939478,
      "loss": 2.8396,
      "step": 151367
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.1399896144866943,
      "learning_rate": 0.00015798237519704228,
      "loss": 2.979,
      "step": 151368
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.003415822982788,
      "learning_rate": 0.00015797877203109348,
      "loss": 2.8171,
      "step": 151369
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.590360403060913,
      "learning_rate": 0.0001579751688915492,
      "loss": 2.9727,
      "step": 151370
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1786768436431885,
      "learning_rate": 0.00015797156577840988,
      "loss": 3.2254,
      "step": 151371
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0926055908203125,
      "learning_rate": 0.00015796796269167647,
      "loss": 3.1095,
      "step": 151372
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.9379775524139404,
      "learning_rate": 0.00015796435963134946,
      "loss": 2.6156,
      "step": 151373
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5834062099456787,
      "learning_rate": 0.00015796075659742958,
      "loss": 3.1109,
      "step": 151374
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.108991861343384,
      "learning_rate": 0.00015795715358991736,
      "loss": 2.9783,
      "step": 151375
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3714959621429443,
      "learning_rate": 0.0001579535506088137,
      "loss": 3.0359,
      "step": 151376
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.526271104812622,
      "learning_rate": 0.00015794994765411908,
      "loss": 3.1917,
      "step": 151377
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5142931938171387,
      "learning_rate": 0.00015794634472583429,
      "loss": 2.8762,
      "step": 151378
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4987285137176514,
      "learning_rate": 0.00015794274182395998,
      "loss": 2.9531,
      "step": 151379
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.073359251022339,
      "learning_rate": 0.0001579391389484968,
      "loss": 2.8433,
      "step": 151380
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2963852882385254,
      "learning_rate": 0.00015793553609944532,
      "loss": 3.1171,
      "step": 151381
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.117518424987793,
      "learning_rate": 0.0001579319332768064,
      "loss": 2.9285,
      "step": 151382
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4063847064971924,
      "learning_rate": 0.00015792833048058053,
      "loss": 2.8664,
      "step": 151383
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.9087886810302734,
      "learning_rate": 0.00015792472771076859,
      "loss": 2.9851,
      "step": 151384
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6360976696014404,
      "learning_rate": 0.00015792112496737108,
      "loss": 3.0772,
      "step": 151385
    },
    {
      "epoch": 1.97,
      "grad_norm": 5.160800457000732,
      "learning_rate": 0.0001579175222503886,
      "loss": 3.0153,
      "step": 151386
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.354097366333008,
      "learning_rate": 0.0001579139195598221,
      "loss": 3.0332,
      "step": 151387
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.496767282485962,
      "learning_rate": 0.00015791031689567201,
      "loss": 3.0454,
      "step": 151388
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.4371633529663086,
      "learning_rate": 0.000157906714257939,
      "loss": 2.7416,
      "step": 151389
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.219963550567627,
      "learning_rate": 0.00015790311164662395,
      "loss": 2.9855,
      "step": 151390
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.9260149002075195,
      "learning_rate": 0.00015789950906172734,
      "loss": 2.9686,
      "step": 151391
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3963706493377686,
      "learning_rate": 0.00015789590650324982,
      "loss": 2.9826,
      "step": 151392
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4633307456970215,
      "learning_rate": 0.0001578923039711922,
      "loss": 2.7496,
      "step": 151393
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.869816541671753,
      "learning_rate": 0.00015788870146555515,
      "loss": 3.2483,
      "step": 151394
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.156075954437256,
      "learning_rate": 0.00015788509898633914,
      "loss": 3.0791,
      "step": 151395
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3439218997955322,
      "learning_rate": 0.00015788149653354506,
      "loss": 3.0997,
      "step": 151396
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0304980278015137,
      "learning_rate": 0.00015787789410717353,
      "loss": 3.0137,
      "step": 151397
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.054652452468872,
      "learning_rate": 0.000157874291707225,
      "loss": 2.901,
      "step": 151398
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.9269607067108154,
      "learning_rate": 0.0001578706893337005,
      "loss": 2.8562,
      "step": 151399
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6898694038391113,
      "learning_rate": 0.0001578670869866004,
      "loss": 3.0875,
      "step": 151400
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4704675674438477,
      "learning_rate": 0.0001578634846659256,
      "loss": 2.7633,
      "step": 151401
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1402523517608643,
      "learning_rate": 0.00015785988237167664,
      "loss": 2.9399,
      "step": 151402
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2758255004882812,
      "learning_rate": 0.00015785628010385423,
      "loss": 2.9136,
      "step": 151403
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.029715061187744,
      "learning_rate": 0.0001578526778624589,
      "loss": 3.0428,
      "step": 151404
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.069512367248535,
      "learning_rate": 0.00015784907564749158,
      "loss": 3.1014,
      "step": 151405
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.60593581199646,
      "learning_rate": 0.00015784547345895267,
      "loss": 3.0249,
      "step": 151406
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.286058187484741,
      "learning_rate": 0.00015784187129684306,
      "loss": 2.8925,
      "step": 151407
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3780899047851562,
      "learning_rate": 0.00015783826916116324,
      "loss": 3.0912,
      "step": 151408
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.319692373275757,
      "learning_rate": 0.0001578346670519142,
      "loss": 2.9147,
      "step": 151409
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3944923877716064,
      "learning_rate": 0.00015783106496909614,
      "loss": 2.981,
      "step": 151410
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.546967029571533,
      "learning_rate": 0.00015782746291271007,
      "loss": 2.8208,
      "step": 151411
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.019540786743164,
      "learning_rate": 0.00015782386088275646,
      "loss": 2.6323,
      "step": 151412
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.055917739868164,
      "learning_rate": 0.0001578202588792362,
      "loss": 3.0791,
      "step": 151413
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0586931705474854,
      "learning_rate": 0.0001578166569021497,
      "loss": 2.7515,
      "step": 151414
    },
    {
      "epoch": 1.97,
      "grad_norm": 1.9800816774368286,
      "learning_rate": 0.00015781305495149802,
      "loss": 2.8568,
      "step": 151415
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5266666412353516,
      "learning_rate": 0.00015780945302728134,
      "loss": 3.1883,
      "step": 151416
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.447361469268799,
      "learning_rate": 0.0001578058511295007,
      "loss": 3.0264,
      "step": 151417
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.262312173843384,
      "learning_rate": 0.00015780224925815648,
      "loss": 2.7428,
      "step": 151418
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1555111408233643,
      "learning_rate": 0.00015779864741324965,
      "loss": 2.8113,
      "step": 151419
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.126077890396118,
      "learning_rate": 0.0001577950455947806,
      "loss": 2.995,
      "step": 151420
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.080909252166748,
      "learning_rate": 0.00015779144380275042,
      "loss": 2.9415,
      "step": 151421
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.7477710247039795,
      "learning_rate": 0.00015778784203715918,
      "loss": 3.0264,
      "step": 151422
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.168445348739624,
      "learning_rate": 0.00015778424029800806,
      "loss": 2.8416,
      "step": 151423
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4772322177886963,
      "learning_rate": 0.00015778063858529738,
      "loss": 2.8919,
      "step": 151424
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3686017990112305,
      "learning_rate": 0.00015777703689902808,
      "loss": 2.8385,
      "step": 151425
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.130514621734619,
      "learning_rate": 0.00015777343523920055,
      "loss": 3.0021,
      "step": 151426
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4601552486419678,
      "learning_rate": 0.00015776983360581595,
      "loss": 2.9871,
      "step": 151427
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.36385440826416,
      "learning_rate": 0.00015776623199887434,
      "loss": 3.0485,
      "step": 151428
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.202972888946533,
      "learning_rate": 0.00015776263041837677,
      "loss": 2.8622,
      "step": 151429
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.0891175270080566,
      "learning_rate": 0.0001577590288643237,
      "loss": 2.9199,
      "step": 151430
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.8447067737579346,
      "learning_rate": 0.0001577554273367161,
      "loss": 3.1583,
      "step": 151431
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6856648921966553,
      "learning_rate": 0.00015775182583555427,
      "loss": 2.9151,
      "step": 151432
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.959552049636841,
      "learning_rate": 0.0001577482243608393,
      "loss": 2.913,
      "step": 151433
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.2739827632904053,
      "learning_rate": 0.0001577446229125714,
      "loss": 3.1926,
      "step": 151434
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.000476598739624,
      "learning_rate": 0.0001577410214907516,
      "loss": 3.161,
      "step": 151435
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3581531047821045,
      "learning_rate": 0.00015773742009538027,
      "loss": 2.9429,
      "step": 151436
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.083892345428467,
      "learning_rate": 0.00015773381872645836,
      "loss": 3.0683,
      "step": 151437
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.37980318069458,
      "learning_rate": 0.00015773021738398627,
      "loss": 2.9289,
      "step": 151438
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.051002264022827,
      "learning_rate": 0.00015772661606796513,
      "loss": 3.0441,
      "step": 151439
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.358175277709961,
      "learning_rate": 0.000157723014778395,
      "loss": 2.9193,
      "step": 151440
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6068365573883057,
      "learning_rate": 0.00015771941351527702,
      "loss": 3.0903,
      "step": 151441
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.634294033050537,
      "learning_rate": 0.00015771581227861154,
      "loss": 2.9947,
      "step": 151442
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5859713554382324,
      "learning_rate": 0.00015771221106839948,
      "loss": 3.1545,
      "step": 151443
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.844212532043457,
      "learning_rate": 0.0001577086098846413,
      "loss": 2.9713,
      "step": 151444
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.415837526321411,
      "learning_rate": 0.00015770500872733803,
      "loss": 3.3076,
      "step": 151445
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6922991275787354,
      "learning_rate": 0.00015770140759648984,
      "loss": 3.0159,
      "step": 151446
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.3401904106140137,
      "learning_rate": 0.00015769780649209776,
      "loss": 2.7823,
      "step": 151447
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.287905693054199,
      "learning_rate": 0.00015769420541416219,
      "loss": 2.8818,
      "step": 151448
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4418020248413086,
      "learning_rate": 0.0001576906043626841,
      "loss": 2.759,
      "step": 151449
    },
    {
      "epoch": 1.97,
      "grad_norm": 1.9416698217391968,
      "learning_rate": 0.0001576870033376639,
      "loss": 2.8312,
      "step": 151450
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.459792137145996,
      "learning_rate": 0.0001576834023391026,
      "loss": 2.8992,
      "step": 151451
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.805588960647583,
      "learning_rate": 0.00015767980136700043,
      "loss": 2.7303,
      "step": 151452
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1894707679748535,
      "learning_rate": 0.00015767620042135837,
      "loss": 3.1331,
      "step": 151453
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.7241649627685547,
      "learning_rate": 0.00015767259950217686,
      "loss": 3.0965,
      "step": 151454
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4000000953674316,
      "learning_rate": 0.00015766899860945684,
      "loss": 3.2011,
      "step": 151455
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5553524494171143,
      "learning_rate": 0.00015766539774319868,
      "loss": 2.9057,
      "step": 151456
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.8733081817626953,
      "learning_rate": 0.00015766179690340348,
      "loss": 2.9424,
      "step": 151457
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4530458450317383,
      "learning_rate": 0.00015765819609007145,
      "loss": 2.8446,
      "step": 151458
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2929415702819824,
      "learning_rate": 0.0001576545953032035,
      "loss": 3.1016,
      "step": 151459
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.085676670074463,
      "learning_rate": 0.00015765099454280012,
      "loss": 3.0015,
      "step": 151460
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.881899118423462,
      "learning_rate": 0.00015764739380886227,
      "loss": 2.8918,
      "step": 151461
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.540856122970581,
      "learning_rate": 0.00015764379310139032,
      "loss": 2.9463,
      "step": 151462
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5022146701812744,
      "learning_rate": 0.0001576401924203852,
      "loss": 3.0998,
      "step": 151463
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3208272457122803,
      "learning_rate": 0.00015763659176584743,
      "loss": 2.971,
      "step": 151464
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1244893074035645,
      "learning_rate": 0.00015763299113777775,
      "loss": 2.9427,
      "step": 151465
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4246129989624023,
      "learning_rate": 0.00015762939053617664,
      "loss": 2.9057,
      "step": 151466
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0953752994537354,
      "learning_rate": 0.00015762578996104503,
      "loss": 3.0598,
      "step": 151467
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.824174165725708,
      "learning_rate": 0.00015762218941238335,
      "loss": 3.053,
      "step": 151468
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.497769355773926,
      "learning_rate": 0.00015761858889019254,
      "loss": 2.6755,
      "step": 151469
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.846339702606201,
      "learning_rate": 0.0001576149883944731,
      "loss": 3.3786,
      "step": 151470
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5008649826049805,
      "learning_rate": 0.0001576113879252256,
      "loss": 2.9573,
      "step": 151471
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4367306232452393,
      "learning_rate": 0.00015760778748245095,
      "loss": 2.9516,
      "step": 151472
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2830142974853516,
      "learning_rate": 0.00015760418706614967,
      "loss": 2.9617,
      "step": 151473
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.135777711868286,
      "learning_rate": 0.0001576005866763224,
      "loss": 3.0713,
      "step": 151474
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.9859960079193115,
      "learning_rate": 0.00015759698631296996,
      "loss": 2.7862,
      "step": 151475
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.433237314224243,
      "learning_rate": 0.00015759338597609292,
      "loss": 2.8695,
      "step": 151476
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2660112380981445,
      "learning_rate": 0.00015758978566569185,
      "loss": 3.0474,
      "step": 151477
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6450464725494385,
      "learning_rate": 0.00015758618538176767,
      "loss": 2.8495,
      "step": 151478
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.102055311203003,
      "learning_rate": 0.0001575825851243209,
      "loss": 2.9945,
      "step": 151479
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.839003801345825,
      "learning_rate": 0.00015757898489335208,
      "loss": 2.9951,
      "step": 151480
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.8953025341033936,
      "learning_rate": 0.0001575753846888621,
      "loss": 2.8938,
      "step": 151481
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.374004602432251,
      "learning_rate": 0.00015757178451085159,
      "loss": 2.7248,
      "step": 151482
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2815918922424316,
      "learning_rate": 0.00015756818435932102,
      "loss": 2.9011,
      "step": 151483
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.316918849945068,
      "learning_rate": 0.00015756458423427138,
      "loss": 3.0946,
      "step": 151484
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3147268295288086,
      "learning_rate": 0.00015756098413570303,
      "loss": 2.9041,
      "step": 151485
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.604266405105591,
      "learning_rate": 0.0001575573840636169,
      "loss": 3.1079,
      "step": 151486
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1621005535125732,
      "learning_rate": 0.00015755378401801354,
      "loss": 3.0833,
      "step": 151487
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.160494089126587,
      "learning_rate": 0.0001575501839988936,
      "loss": 2.5468,
      "step": 151488
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.751742362976074,
      "learning_rate": 0.00015754658400625769,
      "loss": 2.9678,
      "step": 151489
    },
    {
      "epoch": 1.97,
      "grad_norm": 5.765926361083984,
      "learning_rate": 0.00015754298404010663,
      "loss": 3.1083,
      "step": 151490
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5063576698303223,
      "learning_rate": 0.00015753938410044093,
      "loss": 2.851,
      "step": 151491
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.857618570327759,
      "learning_rate": 0.00015753578418726147,
      "loss": 3.0145,
      "step": 151492
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4823081493377686,
      "learning_rate": 0.0001575321843005687,
      "loss": 2.9861,
      "step": 151493
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.784499406814575,
      "learning_rate": 0.00015752858444036358,
      "loss": 3.1073,
      "step": 151494
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.292069911956787,
      "learning_rate": 0.00015752498460664636,
      "loss": 3.0146,
      "step": 151495
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4038758277893066,
      "learning_rate": 0.00015752138479941806,
      "loss": 2.8209,
      "step": 151496
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.40118145942688,
      "learning_rate": 0.00015751778501867913,
      "loss": 2.7309,
      "step": 151497
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4601149559020996,
      "learning_rate": 0.00015751418526443038,
      "loss": 2.8346,
      "step": 151498
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6776297092437744,
      "learning_rate": 0.00015751058553667237,
      "loss": 3.1918,
      "step": 151499
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.0752112865448,
      "learning_rate": 0.00015750698583540609,
      "loss": 3.059,
      "step": 151500
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.7399661540985107,
      "learning_rate": 0.00015750338616063166,
      "loss": 2.8172,
      "step": 151501
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.45119309425354,
      "learning_rate": 0.0001574997865123502,
      "loss": 2.9239,
      "step": 151502
    },
    {
      "epoch": 1.97,
      "grad_norm": 1.8592075109481812,
      "learning_rate": 0.0001574961868905621,
      "loss": 2.9929,
      "step": 151503
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6648852825164795,
      "learning_rate": 0.00015749258729526823,
      "loss": 3.0016,
      "step": 151504
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.40828800201416,
      "learning_rate": 0.00015748898772646906,
      "loss": 2.9642,
      "step": 151505
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.2860217094421387,
      "learning_rate": 0.00015748538818416565,
      "loss": 2.7127,
      "step": 151506
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1176679134368896,
      "learning_rate": 0.0001574817886683581,
      "loss": 2.9755,
      "step": 151507
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.378464698791504,
      "learning_rate": 0.00015747818917904758,
      "loss": 3.0162,
      "step": 151508
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.084251880645752,
      "learning_rate": 0.00015747458971623437,
      "loss": 2.6646,
      "step": 151509
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.511052131652832,
      "learning_rate": 0.0001574709902799195,
      "loss": 2.9276,
      "step": 151510
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.624570846557617,
      "learning_rate": 0.0001574673908701033,
      "loss": 2.8187,
      "step": 151511
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4799392223358154,
      "learning_rate": 0.00015746379148678683,
      "loss": 3.1624,
      "step": 151512
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.08772873878479,
      "learning_rate": 0.00015746019212997033,
      "loss": 3.1176,
      "step": 151513
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.0865089893341064,
      "learning_rate": 0.00015745659279965476,
      "loss": 3.034,
      "step": 151514
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.027203321456909,
      "learning_rate": 0.00015745299349584062,
      "loss": 2.9968,
      "step": 151515
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.7787280082702637,
      "learning_rate": 0.00015744939421852874,
      "loss": 2.8724,
      "step": 151516
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0771164894104004,
      "learning_rate": 0.00015744579496771962,
      "loss": 3.1375,
      "step": 151517
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5426876544952393,
      "learning_rate": 0.00015744219574341422,
      "loss": 3.0976,
      "step": 151518
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4132120609283447,
      "learning_rate": 0.0001574385965456128,
      "loss": 3.027,
      "step": 151519
    },
    {
      "epoch": 1.97,
      "grad_norm": 5.615012168884277,
      "learning_rate": 0.00015743499737431633,
      "loss": 2.6825,
      "step": 151520
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4193997383117676,
      "learning_rate": 0.0001574313982295253,
      "loss": 3.1249,
      "step": 151521
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.06626558303833,
      "learning_rate": 0.00015742779911124055,
      "loss": 3.1311,
      "step": 151522
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.338995933532715,
      "learning_rate": 0.00015742420001946258,
      "loss": 2.9451,
      "step": 151523
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.250065565109253,
      "learning_rate": 0.00015742060095419235,
      "loss": 2.8864,
      "step": 151524
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.257113456726074,
      "learning_rate": 0.0001574170019154301,
      "loss": 2.8196,
      "step": 151525
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1576461791992188,
      "learning_rate": 0.00015741340290317682,
      "loss": 2.8399,
      "step": 151526
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5224592685699463,
      "learning_rate": 0.00015740980391743299,
      "loss": 3.1464,
      "step": 151527
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.7545089721679688,
      "learning_rate": 0.0001574062049581994,
      "loss": 2.8305,
      "step": 151528
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.314568042755127,
      "learning_rate": 0.0001574026060254767,
      "loss": 3.073,
      "step": 151529
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.007080554962158,
      "learning_rate": 0.0001573990071192657,
      "loss": 3.1172,
      "step": 151530
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1442697048187256,
      "learning_rate": 0.00015739540823956672,
      "loss": 2.6904,
      "step": 151531
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.907470703125,
      "learning_rate": 0.00015739180938638072,
      "loss": 3.1438,
      "step": 151532
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.721961259841919,
      "learning_rate": 0.00015738821055970816,
      "loss": 2.9202,
      "step": 151533
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6228408813476562,
      "learning_rate": 0.00015738461175954994,
      "loss": 3.0588,
      "step": 151534
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.8696751594543457,
      "learning_rate": 0.00015738101298590654,
      "loss": 3.1347,
      "step": 151535
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.901193380355835,
      "learning_rate": 0.00015737741423877877,
      "loss": 3.1766,
      "step": 151536
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6729745864868164,
      "learning_rate": 0.00015737381551816722,
      "loss": 2.9208,
      "step": 151537
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3200228214263916,
      "learning_rate": 0.0001573702168240726,
      "loss": 2.9145,
      "step": 151538
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.45680570602417,
      "learning_rate": 0.0001573666181564954,
      "loss": 2.8109,
      "step": 151539
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.754485845565796,
      "learning_rate": 0.0001573630195154366,
      "loss": 2.8219,
      "step": 151540
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.4578070640563965,
      "learning_rate": 0.00015735942090089658,
      "loss": 2.8085,
      "step": 151541
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.8594119548797607,
      "learning_rate": 0.0001573558223128762,
      "loss": 2.8595,
      "step": 151542
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.061084747314453,
      "learning_rate": 0.00015735222375137616,
      "loss": 2.9475,
      "step": 151543
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.6314597129821777,
      "learning_rate": 0.00015734862521639696,
      "loss": 2.8401,
      "step": 151544
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.551961898803711,
      "learning_rate": 0.00015734502670793922,
      "loss": 2.9853,
      "step": 151545
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4197981357574463,
      "learning_rate": 0.00015734142822600392,
      "loss": 3.0466,
      "step": 151546
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.8859550952911377,
      "learning_rate": 0.00015733782977059136,
      "loss": 2.8538,
      "step": 151547
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.433943748474121,
      "learning_rate": 0.00015733423134170254,
      "loss": 2.7719,
      "step": 151548
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.158808469772339,
      "learning_rate": 0.00015733063293933794,
      "loss": 2.8869,
      "step": 151549
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.9403111934661865,
      "learning_rate": 0.0001573270345634983,
      "loss": 2.9273,
      "step": 151550
    },
    {
      "epoch": 1.97,
      "grad_norm": 5.350248336791992,
      "learning_rate": 0.00015732343621418414,
      "loss": 2.7084,
      "step": 151551
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.7881503105163574,
      "learning_rate": 0.00015731983789139638,
      "loss": 3.0253,
      "step": 151552
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.212099313735962,
      "learning_rate": 0.0001573162395951354,
      "loss": 2.8375,
      "step": 151553
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.1529793739318848,
      "learning_rate": 0.00015731264132540218,
      "loss": 2.8214,
      "step": 151554
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.9024903774261475,
      "learning_rate": 0.0001573090430821972,
      "loss": 2.7593,
      "step": 151555
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.075532913208008,
      "learning_rate": 0.00015730544486552116,
      "loss": 2.7804,
      "step": 151556
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.189340114593506,
      "learning_rate": 0.00015730184667537463,
      "loss": 2.8118,
      "step": 151557
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.7825634479522705,
      "learning_rate": 0.0001572982485117585,
      "loss": 2.73,
      "step": 151558
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.7541840076446533,
      "learning_rate": 0.00015729465037467319,
      "loss": 2.9387,
      "step": 151559
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.0790538787841797,
      "learning_rate": 0.00015729105226411962,
      "loss": 2.7638,
      "step": 151560
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6749868392944336,
      "learning_rate": 0.00015728745418009836,
      "loss": 3.0382,
      "step": 151561
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4670045375823975,
      "learning_rate": 0.00015728385612260993,
      "loss": 3.1396,
      "step": 151562
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3426930904388428,
      "learning_rate": 0.00015728025809165524,
      "loss": 3.0378,
      "step": 151563
    },
    {
      "epoch": 1.97,
      "grad_norm": 5.580460071563721,
      "learning_rate": 0.00015727666008723482,
      "loss": 3.1989,
      "step": 151564
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.9869911670684814,
      "learning_rate": 0.00015727306210934928,
      "loss": 2.5308,
      "step": 151565
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.486095428466797,
      "learning_rate": 0.00015726946415799947,
      "loss": 2.9595,
      "step": 151566
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0440993309020996,
      "learning_rate": 0.00015726586623318592,
      "loss": 3.0756,
      "step": 151567
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4702985286712646,
      "learning_rate": 0.00015726226833490928,
      "loss": 2.7222,
      "step": 151568
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.439300537109375,
      "learning_rate": 0.00015725867046317043,
      "loss": 2.87,
      "step": 151569
    },
    {
      "epoch": 1.97,
      "grad_norm": 5.476335048675537,
      "learning_rate": 0.0001572550726179697,
      "loss": 2.858,
      "step": 151570
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1472725868225098,
      "learning_rate": 0.00015725147479930807,
      "loss": 2.9144,
      "step": 151571
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.0954983234405518,
      "learning_rate": 0.00015724787700718612,
      "loss": 2.8432,
      "step": 151572
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5418896675109863,
      "learning_rate": 0.00015724427924160446,
      "loss": 2.9814,
      "step": 151573
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.504948377609253,
      "learning_rate": 0.00015724068150256367,
      "loss": 2.9237,
      "step": 151574
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.745591878890991,
      "learning_rate": 0.00015723708379006466,
      "loss": 3.2941,
      "step": 151575
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.488816738128662,
      "learning_rate": 0.00015723348610410784,
      "loss": 3.1476,
      "step": 151576
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.659444808959961,
      "learning_rate": 0.00015722988844469414,
      "loss": 2.769,
      "step": 151577
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6005494594573975,
      "learning_rate": 0.00015722629081182406,
      "loss": 3.0422,
      "step": 151578
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.448047399520874,
      "learning_rate": 0.00015722269320549834,
      "loss": 2.732,
      "step": 151579
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2975692749023438,
      "learning_rate": 0.00015721909562571752,
      "loss": 3.104,
      "step": 151580
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0112695693969727,
      "learning_rate": 0.00015721549807248245,
      "loss": 2.8066,
      "step": 151581
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.293886184692383,
      "learning_rate": 0.0001572119005457936,
      "loss": 3.214,
      "step": 151582
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.936857223510742,
      "learning_rate": 0.00015720830304565188,
      "loss": 2.8487,
      "step": 151583
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.175612211227417,
      "learning_rate": 0.00015720470557205774,
      "loss": 2.9312,
      "step": 151584
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.208207845687866,
      "learning_rate": 0.00015720110812501213,
      "loss": 2.9645,
      "step": 151585
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3889055252075195,
      "learning_rate": 0.0001571975107045153,
      "loss": 3.013,
      "step": 151586
    },
    {
      "epoch": 1.97,
      "grad_norm": 1.8773778676986694,
      "learning_rate": 0.00015719391331056828,
      "loss": 3.0339,
      "step": 151587
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.9140853881835938,
      "learning_rate": 0.00015719031594317148,
      "loss": 3.0031,
      "step": 151588
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.161714792251587,
      "learning_rate": 0.0001571867186023258,
      "loss": 2.894,
      "step": 151589
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0388591289520264,
      "learning_rate": 0.00015718312128803173,
      "loss": 2.8723,
      "step": 151590
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3460583686828613,
      "learning_rate": 0.0001571795240002902,
      "loss": 2.9744,
      "step": 151591
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4658315181732178,
      "learning_rate": 0.00015717592673910151,
      "loss": 2.8151,
      "step": 151592
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.28055477142334,
      "learning_rate": 0.0001571723295044666,
      "loss": 3.1329,
      "step": 151593
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.357210636138916,
      "learning_rate": 0.00015716873229638595,
      "loss": 3.0713,
      "step": 151594
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.333866834640503,
      "learning_rate": 0.00015716513511486043,
      "loss": 3.0629,
      "step": 151595
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.7185187339782715,
      "learning_rate": 0.0001571615379598905,
      "loss": 2.9971,
      "step": 151596
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2443904876708984,
      "learning_rate": 0.00015715794083147718,
      "loss": 2.937,
      "step": 151597
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0988688468933105,
      "learning_rate": 0.00015715434372962062,
      "loss": 2.9048,
      "step": 151598
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.864307165145874,
      "learning_rate": 0.0001571507466543219,
      "loss": 2.9594,
      "step": 151599
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.044129371643066,
      "learning_rate": 0.00015714714960558143,
      "loss": 2.9421,
      "step": 151600
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.1702122688293457,
      "learning_rate": 0.00015714355258340012,
      "loss": 2.7629,
      "step": 151601
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.141115188598633,
      "learning_rate": 0.00015713995558777844,
      "loss": 2.8102,
      "step": 151602
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1396100521087646,
      "learning_rate": 0.00015713635861871724,
      "loss": 3.1906,
      "step": 151603
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.144120216369629,
      "learning_rate": 0.00015713276167621704,
      "loss": 2.95,
      "step": 151604
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2141971588134766,
      "learning_rate": 0.0001571291647602786,
      "loss": 3.0011,
      "step": 151605
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.902217149734497,
      "learning_rate": 0.00015712556787090244,
      "loss": 2.8727,
      "step": 151606
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.864278554916382,
      "learning_rate": 0.00015712197100808942,
      "loss": 2.8687,
      "step": 151607
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.16395902633667,
      "learning_rate": 0.00015711837417184003,
      "loss": 3.0931,
      "step": 151608
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.15655517578125,
      "learning_rate": 0.00015711477736215514,
      "loss": 3.086,
      "step": 151609
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.0270073413848877,
      "learning_rate": 0.00015711118057903527,
      "loss": 3.0305,
      "step": 151610
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.559901237487793,
      "learning_rate": 0.0001571075838224812,
      "loss": 2.9812,
      "step": 151611
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.221226215362549,
      "learning_rate": 0.00015710398709249337,
      "loss": 3.1482,
      "step": 151612
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4121792316436768,
      "learning_rate": 0.0001571003903890727,
      "loss": 2.8584,
      "step": 151613
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.1179158687591553,
      "learning_rate": 0.0001570967937122197,
      "loss": 3.1319,
      "step": 151614
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0943779945373535,
      "learning_rate": 0.0001570931970619352,
      "loss": 2.9232,
      "step": 151615
    },
    {
      "epoch": 1.97,
      "grad_norm": 1.968977689743042,
      "learning_rate": 0.00015708960043821975,
      "loss": 2.6982,
      "step": 151616
    },
    {
      "epoch": 1.97,
      "grad_norm": 5.090506076812744,
      "learning_rate": 0.00015708600384107404,
      "loss": 3.0872,
      "step": 151617
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.193061351776123,
      "learning_rate": 0.00015708240727049867,
      "loss": 2.7014,
      "step": 151618
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.304647207260132,
      "learning_rate": 0.00015707881072649446,
      "loss": 2.9371,
      "step": 151619
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.399238348007202,
      "learning_rate": 0.00015707521420906185,
      "loss": 3.1232,
      "step": 151620
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.521308660507202,
      "learning_rate": 0.00015707161771820187,
      "loss": 2.835,
      "step": 151621
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6273975372314453,
      "learning_rate": 0.0001570680212539149,
      "loss": 2.6627,
      "step": 151622
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6417338848114014,
      "learning_rate": 0.0001570644248162017,
      "loss": 3.0287,
      "step": 151623
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.6461589336395264,
      "learning_rate": 0.0001570608284050628,
      "loss": 3.0662,
      "step": 151624
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.112675189971924,
      "learning_rate": 0.0001570572320204991,
      "loss": 2.9637,
      "step": 151625
    },
    {
      "epoch": 1.97,
      "grad_norm": 5.016942501068115,
      "learning_rate": 0.00015705363566251106,
      "loss": 2.8405,
      "step": 151626
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1390275955200195,
      "learning_rate": 0.00015705003933109952,
      "loss": 2.8456,
      "step": 151627
    },
    {
      "epoch": 1.97,
      "grad_norm": 1.976305603981018,
      "learning_rate": 0.00015704644302626513,
      "loss": 3.0276,
      "step": 151628
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.1409215927124023,
      "learning_rate": 0.00015704284674800847,
      "loss": 3.0949,
      "step": 151629
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.58316969871521,
      "learning_rate": 0.00015703925049633015,
      "loss": 2.9148,
      "step": 151630
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.663114547729492,
      "learning_rate": 0.00015703565427123103,
      "loss": 3.0538,
      "step": 151631
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.042660713195801,
      "learning_rate": 0.0001570320580727116,
      "loss": 3.1848,
      "step": 151632
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2386741638183594,
      "learning_rate": 0.0001570284619007727,
      "loss": 3.1067,
      "step": 151633
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.061530113220215,
      "learning_rate": 0.0001570248657554149,
      "loss": 2.7907,
      "step": 151634
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.1952080726623535,
      "learning_rate": 0.0001570212696366389,
      "loss": 2.874,
      "step": 151635
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5592586994171143,
      "learning_rate": 0.0001570176735444452,
      "loss": 2.7197,
      "step": 151636
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1738836765289307,
      "learning_rate": 0.00015701407747883473,
      "loss": 3.0769,
      "step": 151637
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.498331308364868,
      "learning_rate": 0.00015701048143980796,
      "loss": 3.096,
      "step": 151638
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.331329107284546,
      "learning_rate": 0.00015700688542736574,
      "loss": 3.0103,
      "step": 151639
    },
    {
      "epoch": 1.97,
      "grad_norm": 1.8411213159561157,
      "learning_rate": 0.0001570032894415086,
      "loss": 2.8583,
      "step": 151640
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.576991081237793,
      "learning_rate": 0.0001569996934822373,
      "loss": 3.1377,
      "step": 151641
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.535811424255371,
      "learning_rate": 0.00015699609754955232,
      "loss": 3.2764,
      "step": 151642
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.2507503032684326,
      "learning_rate": 0.00015699250164345462,
      "loss": 3.0143,
      "step": 151643
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.492249011993408,
      "learning_rate": 0.00015698890576394453,
      "loss": 3.1046,
      "step": 151644
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1159732341766357,
      "learning_rate": 0.00015698530991102303,
      "loss": 3.1079,
      "step": 151645
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.1343586444854736,
      "learning_rate": 0.0001569817140846907,
      "loss": 3.1158,
      "step": 151646
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.9888744354248047,
      "learning_rate": 0.00015697811828494805,
      "loss": 3.1508,
      "step": 151647
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1343796253204346,
      "learning_rate": 0.00015697452251179595,
      "loss": 3.0931,
      "step": 151648
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.051344156265259,
      "learning_rate": 0.00015697092676523499,
      "loss": 2.8701,
      "step": 151649
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.686554431915283,
      "learning_rate": 0.00015696733104526573,
      "loss": 3.0771,
      "step": 151650
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6588120460510254,
      "learning_rate": 0.0001569637353518891,
      "loss": 3.0606,
      "step": 151651
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.201951742172241,
      "learning_rate": 0.00015696013968510554,
      "loss": 3.0588,
      "step": 151652
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.4618887901306152,
      "learning_rate": 0.0001569565440449157,
      "loss": 3.1274,
      "step": 151653
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.7409965991973877,
      "learning_rate": 0.00015695294843132046,
      "loss": 3.0112,
      "step": 151654
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.2940051555633545,
      "learning_rate": 0.00015694935284432025,
      "loss": 2.7768,
      "step": 151655
    },
    {
      "epoch": 1.97,
      "grad_norm": 6.545478343963623,
      "learning_rate": 0.00015694575728391596,
      "loss": 2.9458,
      "step": 151656
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.2605133056640625,
      "learning_rate": 0.00015694216175010818,
      "loss": 3.1305,
      "step": 151657
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.068338394165039,
      "learning_rate": 0.00015693856624289754,
      "loss": 2.8842,
      "step": 151658
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.1607537269592285,
      "learning_rate": 0.00015693497076228462,
      "loss": 2.877,
      "step": 151659
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.800529956817627,
      "learning_rate": 0.0001569313753082703,
      "loss": 2.9967,
      "step": 151660
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.5917534828186035,
      "learning_rate": 0.00015692777988085497,
      "loss": 3.0433,
      "step": 151661
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3775100708007812,
      "learning_rate": 0.00015692418448003963,
      "loss": 2.7539,
      "step": 151662
    },
    {
      "epoch": 1.97,
      "grad_norm": 1.9337724447250366,
      "learning_rate": 0.0001569205891058248,
      "loss": 3.1153,
      "step": 151663
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.432323455810547,
      "learning_rate": 0.0001569169937582111,
      "loss": 3.2886,
      "step": 151664
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.6401405334472656,
      "learning_rate": 0.00015691339843719915,
      "loss": 3.1514,
      "step": 151665
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.3944482803344727,
      "learning_rate": 0.00015690980314278977,
      "loss": 2.6887,
      "step": 151666
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1868786811828613,
      "learning_rate": 0.00015690620787498348,
      "loss": 3.0621,
      "step": 151667
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.301607131958008,
      "learning_rate": 0.00015690261263378113,
      "loss": 2.9521,
      "step": 151668
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1615612506866455,
      "learning_rate": 0.00015689901741918318,
      "loss": 2.8416,
      "step": 151669
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.065883159637451,
      "learning_rate": 0.00015689542223119046,
      "loss": 2.9733,
      "step": 151670
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.877431869506836,
      "learning_rate": 0.0001568918270698037,
      "loss": 3.1954,
      "step": 151671
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1410510540008545,
      "learning_rate": 0.00015688823193502334,
      "loss": 3.0395,
      "step": 151672
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.5945582389831543,
      "learning_rate": 0.0001568846368268501,
      "loss": 2.9598,
      "step": 151673
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.2833762168884277,
      "learning_rate": 0.0001568810417452848,
      "loss": 2.8765,
      "step": 151674
    },
    {
      "epoch": 1.97,
      "grad_norm": 3.519977569580078,
      "learning_rate": 0.0001568774466903279,
      "loss": 2.7796,
      "step": 151675
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.53598952293396,
      "learning_rate": 0.00015687385166198033,
      "loss": 3.2253,
      "step": 151676
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.1585404872894287,
      "learning_rate": 0.00015687025666024255,
      "loss": 2.7773,
      "step": 151677
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.3787994384765625,
      "learning_rate": 0.00015686666168511536,
      "loss": 3.0562,
      "step": 151678
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.0220787525177,
      "learning_rate": 0.00015686306673659922,
      "loss": 3.0778,
      "step": 151679
    },
    {
      "epoch": 1.97,
      "grad_norm": 4.224400520324707,
      "learning_rate": 0.00015685947181469506,
      "loss": 2.8908,
      "step": 151680
    },
    {
      "epoch": 1.97,
      "grad_norm": 2.136056900024414,
      "learning_rate": 0.0001568558769194033,
      "loss": 2.8437,
      "step": 151681
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.455909490585327,
      "learning_rate": 0.0001568522820507248,
      "loss": 2.9435,
      "step": 151682
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2796406745910645,
      "learning_rate": 0.00015684868720866027,
      "loss": 3.0727,
      "step": 151683
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.1590094566345215,
      "learning_rate": 0.0001568450923932102,
      "loss": 3.1887,
      "step": 151684
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.203997611999512,
      "learning_rate": 0.0001568414976043752,
      "loss": 3.0787,
      "step": 151685
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.045661926269531,
      "learning_rate": 0.00015683790284215624,
      "loss": 2.7949,
      "step": 151686
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7799453735351562,
      "learning_rate": 0.00015683430810655365,
      "loss": 2.6814,
      "step": 151687
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7841410636901855,
      "learning_rate": 0.00015683071339756842,
      "loss": 2.6731,
      "step": 151688
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.886411666870117,
      "learning_rate": 0.00015682711871520106,
      "loss": 3.0681,
      "step": 151689
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.780228614807129,
      "learning_rate": 0.00015682352405945222,
      "loss": 3.2989,
      "step": 151690
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.516627550125122,
      "learning_rate": 0.00015681992943032246,
      "loss": 3.0727,
      "step": 151691
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1691834926605225,
      "learning_rate": 0.00015681633482781274,
      "loss": 3.0596,
      "step": 151692
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3090944290161133,
      "learning_rate": 0.00015681274025192341,
      "loss": 3.1095,
      "step": 151693
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.087441921234131,
      "learning_rate": 0.00015680914570265545,
      "loss": 2.8649,
      "step": 151694
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8949034214019775,
      "learning_rate": 0.00015680555118000934,
      "loss": 3.0541,
      "step": 151695
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4296274185180664,
      "learning_rate": 0.0001568019566839858,
      "loss": 2.9928,
      "step": 151696
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.871534824371338,
      "learning_rate": 0.00015679836221458535,
      "loss": 2.9742,
      "step": 151697
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2914493083953857,
      "learning_rate": 0.0001567947677718089,
      "loss": 2.9591,
      "step": 151698
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.672330856323242,
      "learning_rate": 0.00015679117335565688,
      "loss": 3.1406,
      "step": 151699
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.802910804748535,
      "learning_rate": 0.00015678757896613022,
      "loss": 3.0581,
      "step": 151700
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1009902954101562,
      "learning_rate": 0.00015678398460322948,
      "loss": 2.8154,
      "step": 151701
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.455505847930908,
      "learning_rate": 0.00015678039026695525,
      "loss": 2.7033,
      "step": 151702
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5074000358581543,
      "learning_rate": 0.00015677679595730814,
      "loss": 3.037,
      "step": 151703
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.22690749168396,
      "learning_rate": 0.00015677320167428906,
      "loss": 2.9432,
      "step": 151704
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1969313621520996,
      "learning_rate": 0.0001567696074178984,
      "loss": 2.8574,
      "step": 151705
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1800432205200195,
      "learning_rate": 0.00015676601318813713,
      "loss": 2.8117,
      "step": 151706
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.076253890991211,
      "learning_rate": 0.00015676241898500575,
      "loss": 3.009,
      "step": 151707
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.302159547805786,
      "learning_rate": 0.00015675882480850492,
      "loss": 2.8189,
      "step": 151708
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.8865923881530762,
      "learning_rate": 0.00015675523065863527,
      "loss": 3.0412,
      "step": 151709
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.891958236694336,
      "learning_rate": 0.00015675163653539758,
      "loss": 3.0823,
      "step": 151710
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9511611461639404,
      "learning_rate": 0.00015674804243879237,
      "loss": 2.955,
      "step": 151711
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2228505611419678,
      "learning_rate": 0.00015674444836882054,
      "loss": 3.3432,
      "step": 151712
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.827584743499756,
      "learning_rate": 0.0001567408543254826,
      "loss": 2.9107,
      "step": 151713
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9482779502868652,
      "learning_rate": 0.00015673726030877923,
      "loss": 2.753,
      "step": 151714
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1978580951690674,
      "learning_rate": 0.000156733666318711,
      "loss": 2.8298,
      "step": 151715
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.359006643295288,
      "learning_rate": 0.0001567300723552788,
      "loss": 2.816,
      "step": 151716
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.904008150100708,
      "learning_rate": 0.00015672647841848304,
      "loss": 3.1556,
      "step": 151717
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9571948051452637,
      "learning_rate": 0.00015672288450832472,
      "loss": 3.118,
      "step": 151718
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4612255096435547,
      "learning_rate": 0.00015671929062480427,
      "loss": 3.0834,
      "step": 151719
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0752649307250977,
      "learning_rate": 0.00015671569676792245,
      "loss": 3.0673,
      "step": 151720
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.3026387691497803,
      "learning_rate": 0.00015671210293767972,
      "loss": 3.0708,
      "step": 151721
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.548427581787109,
      "learning_rate": 0.00015670850913407705,
      "loss": 3.0124,
      "step": 151722
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.726111888885498,
      "learning_rate": 0.00015670491535711484,
      "loss": 3.1536,
      "step": 151723
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8879306316375732,
      "learning_rate": 0.00015670132160679404,
      "loss": 3.1029,
      "step": 151724
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3603386878967285,
      "learning_rate": 0.00015669772788311513,
      "loss": 3.0989,
      "step": 151725
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.2525734901428223,
      "learning_rate": 0.00015669413418607886,
      "loss": 3.0903,
      "step": 151726
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.3890490531921387,
      "learning_rate": 0.00015669054051568572,
      "loss": 2.9236,
      "step": 151727
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3668084144592285,
      "learning_rate": 0.0001566869468719366,
      "loss": 2.9668,
      "step": 151728
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6555423736572266,
      "learning_rate": 0.00015668335325483203,
      "loss": 3.0032,
      "step": 151729
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.301244020462036,
      "learning_rate": 0.00015667975966437278,
      "loss": 2.978,
      "step": 151730
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.601219892501831,
      "learning_rate": 0.00015667616610055954,
      "loss": 3.0433,
      "step": 151731
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.959378242492676,
      "learning_rate": 0.00015667257256339273,
      "loss": 3.0182,
      "step": 151732
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.742898464202881,
      "learning_rate": 0.00015666897905287336,
      "loss": 2.7865,
      "step": 151733
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6082394123077393,
      "learning_rate": 0.0001566653855690019,
      "loss": 3.0732,
      "step": 151734
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.474222421646118,
      "learning_rate": 0.0001566617921117789,
      "loss": 2.9305,
      "step": 151735
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2250571250915527,
      "learning_rate": 0.00015665819868120533,
      "loss": 3.1919,
      "step": 151736
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5158069133758545,
      "learning_rate": 0.0001566546052772816,
      "loss": 2.8207,
      "step": 151737
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.5265021324157715,
      "learning_rate": 0.0001566510119000086,
      "loss": 2.9594,
      "step": 151738
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.617323398590088,
      "learning_rate": 0.00015664741854938687,
      "loss": 3.1099,
      "step": 151739
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.6672797203063965,
      "learning_rate": 0.00015664382522541712,
      "loss": 2.9882,
      "step": 151740
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4614334106445312,
      "learning_rate": 0.00015664023192809984,
      "loss": 2.9844,
      "step": 151741
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.65773868560791,
      "learning_rate": 0.00015663663865743597,
      "loss": 2.729,
      "step": 151742
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.307847499847412,
      "learning_rate": 0.00015663304541342594,
      "loss": 2.885,
      "step": 151743
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.674395799636841,
      "learning_rate": 0.00015662945219607063,
      "loss": 3.0002,
      "step": 151744
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.983203887939453,
      "learning_rate": 0.00015662585900537066,
      "loss": 2.9008,
      "step": 151745
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.529339551925659,
      "learning_rate": 0.0001566222658413265,
      "loss": 3.1788,
      "step": 151746
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.6892411708831787,
      "learning_rate": 0.00015661867270393907,
      "loss": 2.8797,
      "step": 151747
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.6896073818206787,
      "learning_rate": 0.00015661507959320898,
      "loss": 2.964,
      "step": 151748
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.3113389015197754,
      "learning_rate": 0.0001566114865091367,
      "loss": 2.8879,
      "step": 151749
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.024111270904541,
      "learning_rate": 0.00015660789345172318,
      "loss": 2.9516,
      "step": 151750
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8835699558258057,
      "learning_rate": 0.00015660430042096897,
      "loss": 2.9336,
      "step": 151751
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0647621154785156,
      "learning_rate": 0.0001566007074168746,
      "loss": 3.09,
      "step": 151752
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.575862169265747,
      "learning_rate": 0.00015659711443944096,
      "loss": 3.0561,
      "step": 151753
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.999703884124756,
      "learning_rate": 0.00015659352148866853,
      "loss": 3.1115,
      "step": 151754
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8678646087646484,
      "learning_rate": 0.00015658992856455817,
      "loss": 3.0312,
      "step": 151755
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3048012256622314,
      "learning_rate": 0.00015658633566711046,
      "loss": 2.975,
      "step": 151756
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.637955665588379,
      "learning_rate": 0.0001565827427963261,
      "loss": 2.9874,
      "step": 151757
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.0695362091064453,
      "learning_rate": 0.00015657914995220555,
      "loss": 3.0922,
      "step": 151758
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8023996353149414,
      "learning_rate": 0.00015657555713474976,
      "loss": 2.9683,
      "step": 151759
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0675835609436035,
      "learning_rate": 0.00015657196434395917,
      "loss": 2.8447,
      "step": 151760
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.138606309890747,
      "learning_rate": 0.0001565683715798347,
      "loss": 2.8027,
      "step": 151761
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.372851848602295,
      "learning_rate": 0.00015656477884237684,
      "loss": 2.7905,
      "step": 151762
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4214916229248047,
      "learning_rate": 0.00015656118613158635,
      "loss": 2.7786,
      "step": 151763
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.977294683456421,
      "learning_rate": 0.00015655759344746368,
      "loss": 3.0037,
      "step": 151764
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4331140518188477,
      "learning_rate": 0.00015655400079000976,
      "loss": 2.9074,
      "step": 151765
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.289785146713257,
      "learning_rate": 0.00015655040815922507,
      "loss": 2.9549,
      "step": 151766
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.9881343841552734,
      "learning_rate": 0.00015654681555511048,
      "loss": 3.0114,
      "step": 151767
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4132156372070312,
      "learning_rate": 0.00015654322297766656,
      "loss": 3.0427,
      "step": 151768
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.612410068511963,
      "learning_rate": 0.00015653963042689396,
      "loss": 3.1076,
      "step": 151769
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0762181282043457,
      "learning_rate": 0.00015653603790279322,
      "loss": 3.0121,
      "step": 151770
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3969204425811768,
      "learning_rate": 0.00015653244540536526,
      "loss": 3.2284,
      "step": 151771
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.228139638900757,
      "learning_rate": 0.00015652885293461052,
      "loss": 3.0631,
      "step": 151772
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0128729343414307,
      "learning_rate": 0.00015652526049052989,
      "loss": 2.9212,
      "step": 151773
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.199822187423706,
      "learning_rate": 0.0001565216680731239,
      "loss": 2.7423,
      "step": 151774
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.009573221206665,
      "learning_rate": 0.00015651807568239326,
      "loss": 2.738,
      "step": 151775
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4038591384887695,
      "learning_rate": 0.00015651448331833852,
      "loss": 3.2045,
      "step": 151776
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.242943286895752,
      "learning_rate": 0.0001565108909809605,
      "loss": 2.8723,
      "step": 151777
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1983492374420166,
      "learning_rate": 0.00015650729867025973,
      "loss": 2.6402,
      "step": 151778
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3344666957855225,
      "learning_rate": 0.0001565037063862371,
      "loss": 2.9051,
      "step": 151779
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9685006141662598,
      "learning_rate": 0.00015650011412889314,
      "loss": 2.7187,
      "step": 151780
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.067068099975586,
      "learning_rate": 0.00015649652189822846,
      "loss": 3.0767,
      "step": 151781
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.012171983718872,
      "learning_rate": 0.00015649292969424373,
      "loss": 3.1079,
      "step": 151782
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.983704924583435,
      "learning_rate": 0.0001564893375169398,
      "loss": 2.8998,
      "step": 151783
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5206727981567383,
      "learning_rate": 0.00015648574536631705,
      "loss": 2.7192,
      "step": 151784
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7796688079833984,
      "learning_rate": 0.0001564821532423764,
      "loss": 2.8991,
      "step": 151785
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.477379083633423,
      "learning_rate": 0.0001564785611451185,
      "loss": 3.1133,
      "step": 151786
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4967105388641357,
      "learning_rate": 0.0001564749690745439,
      "loss": 2.901,
      "step": 151787
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3301286697387695,
      "learning_rate": 0.0001564713770306532,
      "loss": 3.1959,
      "step": 151788
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.083282232284546,
      "learning_rate": 0.0001564677850134473,
      "loss": 2.7951,
      "step": 151789
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.3156251907348633,
      "learning_rate": 0.00015646419302292667,
      "loss": 2.7912,
      "step": 151790
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.284452438354492,
      "learning_rate": 0.0001564606010590921,
      "loss": 2.8062,
      "step": 151791
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3093879222869873,
      "learning_rate": 0.00015645700912194426,
      "loss": 3.2097,
      "step": 151792
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8096957206726074,
      "learning_rate": 0.00015645341721148379,
      "loss": 3.0305,
      "step": 151793
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1553118228912354,
      "learning_rate": 0.00015644982532771117,
      "loss": 3.0889,
      "step": 151794
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2821993827819824,
      "learning_rate": 0.0001564462334706274,
      "loss": 2.8911,
      "step": 151795
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.190683126449585,
      "learning_rate": 0.00015644264164023283,
      "loss": 2.83,
      "step": 151796
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5612435340881348,
      "learning_rate": 0.00015643904983652843,
      "loss": 3.0379,
      "step": 151797
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.664281129837036,
      "learning_rate": 0.0001564354580595147,
      "loss": 2.85,
      "step": 151798
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.495462417602539,
      "learning_rate": 0.00015643186630919233,
      "loss": 3.2344,
      "step": 151799
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4442381858825684,
      "learning_rate": 0.00015642827458556188,
      "loss": 3.1041,
      "step": 151800
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1298139095306396,
      "learning_rate": 0.00015642468288862426,
      "loss": 2.902,
      "step": 151801
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.325148820877075,
      "learning_rate": 0.00015642109121837987,
      "loss": 3.0309,
      "step": 151802
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2383430004119873,
      "learning_rate": 0.00015641749957482964,
      "loss": 3.058,
      "step": 151803
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3803672790527344,
      "learning_rate": 0.00015641390795797394,
      "loss": 2.86,
      "step": 151804
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3239662647247314,
      "learning_rate": 0.00015641031636781387,
      "loss": 2.967,
      "step": 151805
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7280426025390625,
      "learning_rate": 0.0001564067248043496,
      "loss": 3.0125,
      "step": 151806
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.515273332595825,
      "learning_rate": 0.00015640313326758214,
      "loss": 3.0143,
      "step": 151807
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2095835208892822,
      "learning_rate": 0.00015639954175751192,
      "loss": 2.7051,
      "step": 151808
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6574089527130127,
      "learning_rate": 0.0001563959502741399,
      "loss": 2.9959,
      "step": 151809
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2398979663848877,
      "learning_rate": 0.00015639235881746642,
      "loss": 3.0363,
      "step": 151810
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.259575128555298,
      "learning_rate": 0.00015638876738749256,
      "loss": 3.1099,
      "step": 151811
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7894206047058105,
      "learning_rate": 0.0001563851759842185,
      "loss": 2.8415,
      "step": 151812
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1923046112060547,
      "learning_rate": 0.00015638158460764526,
      "loss": 2.8545,
      "step": 151813
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1016876697540283,
      "learning_rate": 0.00015637799325777328,
      "loss": 3.0858,
      "step": 151814
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2222883701324463,
      "learning_rate": 0.00015637440193460347,
      "loss": 3.1088,
      "step": 151815
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.861827373504639,
      "learning_rate": 0.00015637081063813623,
      "loss": 2.9942,
      "step": 151816
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.387808322906494,
      "learning_rate": 0.00015636721936837254,
      "loss": 3.0915,
      "step": 151817
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.074918270111084,
      "learning_rate": 0.00015636362812531288,
      "loss": 2.954,
      "step": 151818
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6006765365600586,
      "learning_rate": 0.00015636003690895788,
      "loss": 2.9941,
      "step": 151819
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1552319526672363,
      "learning_rate": 0.0001563564457193082,
      "loss": 2.9904,
      "step": 151820
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3284456729888916,
      "learning_rate": 0.00015635285455636465,
      "loss": 3.0596,
      "step": 151821
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.337388753890991,
      "learning_rate": 0.0001563492634201277,
      "loss": 2.7812,
      "step": 151822
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1960442066192627,
      "learning_rate": 0.00015634567231059823,
      "loss": 3.077,
      "step": 151823
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5376482009887695,
      "learning_rate": 0.00015634208122777688,
      "loss": 2.7657,
      "step": 151824
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.522411584854126,
      "learning_rate": 0.0001563384901716642,
      "loss": 3.1947,
      "step": 151825
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.15299391746521,
      "learning_rate": 0.00015633489914226075,
      "loss": 3.0752,
      "step": 151826
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2983059883117676,
      "learning_rate": 0.00015633130813956755,
      "loss": 2.8582,
      "step": 151827
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.051272392272949,
      "learning_rate": 0.00015632771716358492,
      "loss": 2.8956,
      "step": 151828
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.914099931716919,
      "learning_rate": 0.00015632412621431377,
      "loss": 2.9186,
      "step": 151829
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1426727771759033,
      "learning_rate": 0.00015632053529175474,
      "loss": 3.1319,
      "step": 151830
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.281061887741089,
      "learning_rate": 0.00015631694439590828,
      "loss": 3.142,
      "step": 151831
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.036255359649658,
      "learning_rate": 0.00015631335352677534,
      "loss": 2.7606,
      "step": 151832
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.461848020553589,
      "learning_rate": 0.00015630976268435646,
      "loss": 3.2766,
      "step": 151833
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0777318477630615,
      "learning_rate": 0.00015630617186865217,
      "loss": 3.0581,
      "step": 151834
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1050362586975098,
      "learning_rate": 0.00015630258107966343,
      "loss": 3.0845,
      "step": 151835
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2122743129730225,
      "learning_rate": 0.0001562989903173907,
      "loss": 2.9174,
      "step": 151836
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2124810218811035,
      "learning_rate": 0.00015629539958183461,
      "loss": 3.1686,
      "step": 151837
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.9657301902770996,
      "learning_rate": 0.00015629180887299606,
      "loss": 2.9733,
      "step": 151838
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0782015323638916,
      "learning_rate": 0.00015628821819087556,
      "loss": 2.9333,
      "step": 151839
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.21612286567688,
      "learning_rate": 0.00015628462753547364,
      "loss": 3.1103,
      "step": 151840
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.337329864501953,
      "learning_rate": 0.0001562810369067913,
      "loss": 2.8022,
      "step": 151841
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2354650497436523,
      "learning_rate": 0.00015627744630482897,
      "loss": 3.0381,
      "step": 151842
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3987419605255127,
      "learning_rate": 0.0001562738557295873,
      "loss": 2.7264,
      "step": 151843
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.901421308517456,
      "learning_rate": 0.00015627026518106712,
      "loss": 3.0562,
      "step": 151844
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.877694845199585,
      "learning_rate": 0.00015626667465926892,
      "loss": 2.9829,
      "step": 151845
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.0310935974121094,
      "learning_rate": 0.00015626308416419354,
      "loss": 3.0303,
      "step": 151846
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.665030002593994,
      "learning_rate": 0.0001562594936958416,
      "loss": 3.1004,
      "step": 151847
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.227325916290283,
      "learning_rate": 0.00015625590325421372,
      "loss": 2.9991,
      "step": 151848
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1341700553894043,
      "learning_rate": 0.00015625231283931045,
      "loss": 3.0082,
      "step": 151849
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.1179513931274414,
      "learning_rate": 0.00015624872245113274,
      "loss": 2.8934,
      "step": 151850
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4961283206939697,
      "learning_rate": 0.000156245132089681,
      "loss": 2.8292,
      "step": 151851
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0034587383270264,
      "learning_rate": 0.00015624154175495608,
      "loss": 2.9579,
      "step": 151852
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.370072841644287,
      "learning_rate": 0.00015623795144695856,
      "loss": 3.0304,
      "step": 151853
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.361553430557251,
      "learning_rate": 0.00015623436116568915,
      "loss": 2.9355,
      "step": 151854
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.413191795349121,
      "learning_rate": 0.00015623077091114835,
      "loss": 3.0261,
      "step": 151855
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.226668357849121,
      "learning_rate": 0.0001562271806833371,
      "loss": 2.9384,
      "step": 151856
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.330324411392212,
      "learning_rate": 0.00015622359048225582,
      "loss": 3.1042,
      "step": 151857
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.413257360458374,
      "learning_rate": 0.00015622000030790536,
      "loss": 3.3197,
      "step": 151858
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.599003791809082,
      "learning_rate": 0.00015621641016028633,
      "loss": 2.9615,
      "step": 151859
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2249326705932617,
      "learning_rate": 0.00015621282003939942,
      "loss": 3.0512,
      "step": 151860
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.242954969406128,
      "learning_rate": 0.00015620922994524514,
      "loss": 3.0245,
      "step": 151861
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0663397312164307,
      "learning_rate": 0.00015620563987782436,
      "loss": 2.8587,
      "step": 151862
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1137118339538574,
      "learning_rate": 0.0001562020498371376,
      "loss": 3.0347,
      "step": 151863
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0535526275634766,
      "learning_rate": 0.00015619845982318566,
      "loss": 3.1666,
      "step": 151864
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.262742757797241,
      "learning_rate": 0.00015619486983596914,
      "loss": 2.8167,
      "step": 151865
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2238287925720215,
      "learning_rate": 0.00015619127987548877,
      "loss": 3.0485,
      "step": 151866
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.636967420578003,
      "learning_rate": 0.00015618768994174494,
      "loss": 2.9135,
      "step": 151867
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8589634895324707,
      "learning_rate": 0.00015618410003473876,
      "loss": 3.2053,
      "step": 151868
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3780298233032227,
      "learning_rate": 0.00015618051015447047,
      "loss": 3.0003,
      "step": 151869
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.537804126739502,
      "learning_rate": 0.00015617692030094106,
      "loss": 2.9449,
      "step": 151870
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.146552085876465,
      "learning_rate": 0.000156173330474151,
      "loss": 2.9525,
      "step": 151871
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1024134159088135,
      "learning_rate": 0.00015616974067410126,
      "loss": 2.9959,
      "step": 151872
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.08294677734375,
      "learning_rate": 0.00015616615090079203,
      "loss": 2.952,
      "step": 151873
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.4696600437164307,
      "learning_rate": 0.00015616256115422435,
      "loss": 3.1083,
      "step": 151874
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0126302242279053,
      "learning_rate": 0.00015615897143439865,
      "loss": 3.0764,
      "step": 151875
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3533833026885986,
      "learning_rate": 0.00015615538174131583,
      "loss": 2.973,
      "step": 151876
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.967147946357727,
      "learning_rate": 0.00015615179207497632,
      "loss": 2.7582,
      "step": 151877
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.615060806274414,
      "learning_rate": 0.00015614820243538114,
      "loss": 2.9138,
      "step": 151878
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.236830472946167,
      "learning_rate": 0.0001561446128225305,
      "loss": 2.771,
      "step": 151879
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4635515213012695,
      "learning_rate": 0.0001561410232364254,
      "loss": 2.7769,
      "step": 151880
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.565173864364624,
      "learning_rate": 0.00015613743367706628,
      "loss": 3.0312,
      "step": 151881
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.160980701446533,
      "learning_rate": 0.00015613384414445408,
      "loss": 3.0199,
      "step": 151882
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3374006748199463,
      "learning_rate": 0.00015613025463858912,
      "loss": 3.0261,
      "step": 151883
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3539299964904785,
      "learning_rate": 0.00015612666515947256,
      "loss": 2.8718,
      "step": 151884
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.771408796310425,
      "learning_rate": 0.00015612307570710452,
      "loss": 2.9007,
      "step": 151885
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8018767833709717,
      "learning_rate": 0.00015611948628148603,
      "loss": 2.8045,
      "step": 151886
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7897846698760986,
      "learning_rate": 0.0001561158968826175,
      "loss": 2.975,
      "step": 151887
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.266555070877075,
      "learning_rate": 0.0001561123075104999,
      "loss": 3.1597,
      "step": 151888
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1978306770324707,
      "learning_rate": 0.0001561087181651336,
      "loss": 2.8551,
      "step": 151889
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.8205130100250244,
      "learning_rate": 0.00015610512884651963,
      "loss": 2.8826,
      "step": 151890
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3645405769348145,
      "learning_rate": 0.0001561015395546582,
      "loss": 2.9577,
      "step": 151891
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8073441982269287,
      "learning_rate": 0.00015609795028955034,
      "loss": 2.881,
      "step": 151892
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1236650943756104,
      "learning_rate": 0.0001560943610511965,
      "loss": 3.2757,
      "step": 151893
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.8605619668960571,
      "learning_rate": 0.00015609077183959748,
      "loss": 2.9693,
      "step": 151894
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.988722801208496,
      "learning_rate": 0.00015608718265475386,
      "loss": 2.8961,
      "step": 151895
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9372828006744385,
      "learning_rate": 0.00015608359349666656,
      "loss": 2.6981,
      "step": 151896
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2742910385131836,
      "learning_rate": 0.00015608000436533577,
      "loss": 2.7867,
      "step": 151897
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5576658248901367,
      "learning_rate": 0.00015607641526076257,
      "loss": 2.9417,
      "step": 151898
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3701975345611572,
      "learning_rate": 0.00015607282618294735,
      "loss": 2.6737,
      "step": 151899
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3782663345336914,
      "learning_rate": 0.00015606923713189103,
      "loss": 3.1949,
      "step": 151900
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1573853492736816,
      "learning_rate": 0.00015606564810759403,
      "loss": 2.9965,
      "step": 151901
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1565604209899902,
      "learning_rate": 0.00015606205911005737,
      "loss": 3.1991,
      "step": 151902
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3825807571411133,
      "learning_rate": 0.00015605847013928128,
      "loss": 2.6327,
      "step": 151903
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.914700984954834,
      "learning_rate": 0.00015605488119526677,
      "loss": 2.8156,
      "step": 151904
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.629920482635498,
      "learning_rate": 0.00015605129227801425,
      "loss": 2.8518,
      "step": 151905
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.300934076309204,
      "learning_rate": 0.00015604770338752458,
      "loss": 3.0337,
      "step": 151906
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7201120853424072,
      "learning_rate": 0.00015604411452379828,
      "loss": 3.128,
      "step": 151907
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.103231430053711,
      "learning_rate": 0.00015604052568683624,
      "loss": 2.9088,
      "step": 151908
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4934399127960205,
      "learning_rate": 0.00015603693687663895,
      "loss": 3.1065,
      "step": 151909
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1911966800689697,
      "learning_rate": 0.0001560333480932071,
      "loss": 3.1473,
      "step": 151910
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.8813891410827637,
      "learning_rate": 0.00015602975933654128,
      "loss": 3.1359,
      "step": 151911
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.446915626525879,
      "learning_rate": 0.00015602617060664234,
      "loss": 2.8394,
      "step": 151912
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0855979919433594,
      "learning_rate": 0.00015602258190351075,
      "loss": 2.7676,
      "step": 151913
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.550421714782715,
      "learning_rate": 0.00015601899322714737,
      "loss": 2.973,
      "step": 151914
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0280823707580566,
      "learning_rate": 0.00015601540457755282,
      "loss": 3.0505,
      "step": 151915
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3101439476013184,
      "learning_rate": 0.0001560118159547276,
      "loss": 2.9185,
      "step": 151916
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4302515983581543,
      "learning_rate": 0.0001560082273586726,
      "loss": 2.648,
      "step": 151917
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.102886199951172,
      "learning_rate": 0.00015600463878938842,
      "loss": 2.809,
      "step": 151918
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.879279136657715,
      "learning_rate": 0.00015600105024687552,
      "loss": 2.8998,
      "step": 151919
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4682376384735107,
      "learning_rate": 0.0001559974617311349,
      "loss": 2.9908,
      "step": 151920
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.9462026357650757,
      "learning_rate": 0.00015599387324216705,
      "loss": 3.1389,
      "step": 151921
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9850666522979736,
      "learning_rate": 0.00015599028477997253,
      "loss": 2.8184,
      "step": 151922
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5319864749908447,
      "learning_rate": 0.0001559866963445523,
      "loss": 3.3937,
      "step": 151923
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5530571937561035,
      "learning_rate": 0.00015598310793590682,
      "loss": 3.2685,
      "step": 151924
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.572230339050293,
      "learning_rate": 0.00015597951955403672,
      "loss": 3.0081,
      "step": 151925
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4113924503326416,
      "learning_rate": 0.0001559759311989428,
      "loss": 3.2817,
      "step": 151926
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.108592987060547,
      "learning_rate": 0.00015597234287062573,
      "loss": 3.0167,
      "step": 151927
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6223208904266357,
      "learning_rate": 0.000155968754569086,
      "loss": 2.9518,
      "step": 151928
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4789113998413086,
      "learning_rate": 0.00015596516629432446,
      "loss": 2.9562,
      "step": 151929
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.02822208404541,
      "learning_rate": 0.00015596157804634167,
      "loss": 3.1243,
      "step": 151930
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.8407890796661377,
      "learning_rate": 0.00015595798982513844,
      "loss": 3.2243,
      "step": 151931
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.034170627593994,
      "learning_rate": 0.00015595440163071532,
      "loss": 2.9294,
      "step": 151932
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.148709297180176,
      "learning_rate": 0.00015595081346307297,
      "loss": 2.8425,
      "step": 151933
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4052698612213135,
      "learning_rate": 0.000155947225322212,
      "loss": 2.8521,
      "step": 151934
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3771111965179443,
      "learning_rate": 0.00015594363720813328,
      "loss": 2.6918,
      "step": 151935
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.150399684906006,
      "learning_rate": 0.00015594004912083725,
      "loss": 2.6522,
      "step": 151936
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.9588209390640259,
      "learning_rate": 0.00015593646106032476,
      "loss": 3.0736,
      "step": 151937
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1604583263397217,
      "learning_rate": 0.00015593287302659634,
      "loss": 2.89,
      "step": 151938
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.662289619445801,
      "learning_rate": 0.00015592928501965292,
      "loss": 2.9448,
      "step": 151939
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2696692943573,
      "learning_rate": 0.00015592569703949474,
      "loss": 3.0809,
      "step": 151940
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.471513271331787,
      "learning_rate": 0.00015592210908612277,
      "loss": 3.0166,
      "step": 151941
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4286837577819824,
      "learning_rate": 0.00015591852115953752,
      "loss": 3.122,
      "step": 151942
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.636777877807617,
      "learning_rate": 0.00015591493325973986,
      "loss": 3.0314,
      "step": 151943
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5061697959899902,
      "learning_rate": 0.00015591134538673023,
      "loss": 3.0236,
      "step": 151944
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3234012126922607,
      "learning_rate": 0.00015590775754050957,
      "loss": 2.9582,
      "step": 151945
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5139126777648926,
      "learning_rate": 0.00015590416972107823,
      "loss": 3.1156,
      "step": 151946
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.023064374923706,
      "learning_rate": 0.00015590058192843705,
      "loss": 3.0673,
      "step": 151947
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0492191314697266,
      "learning_rate": 0.00015589699416258663,
      "loss": 3.0403,
      "step": 151948
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.117579936981201,
      "learning_rate": 0.00015589340642352775,
      "loss": 3.0295,
      "step": 151949
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.168391704559326,
      "learning_rate": 0.0001558898187112609,
      "loss": 2.9115,
      "step": 151950
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.206873655319214,
      "learning_rate": 0.0001558862310257871,
      "loss": 2.9561,
      "step": 151951
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0529401302337646,
      "learning_rate": 0.00015588264336710653,
      "loss": 2.9398,
      "step": 151952
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.722635507583618,
      "learning_rate": 0.0001558790557352202,
      "loss": 2.712,
      "step": 151953
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.900653600692749,
      "learning_rate": 0.00015587546813012855,
      "loss": 2.9214,
      "step": 151954
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.1279945373535156,
      "learning_rate": 0.00015587188055183254,
      "loss": 3.2539,
      "step": 151955
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.163607358932495,
      "learning_rate": 0.00015586829300033253,
      "loss": 2.9827,
      "step": 151956
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1434953212738037,
      "learning_rate": 0.00015586470547562954,
      "loss": 3.1661,
      "step": 151957
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.50471830368042,
      "learning_rate": 0.0001558611179777238,
      "loss": 2.7258,
      "step": 151958
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.137721300125122,
      "learning_rate": 0.00015585753050661628,
      "loss": 3.1686,
      "step": 151959
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1529111862182617,
      "learning_rate": 0.0001558539430623075,
      "loss": 2.9145,
      "step": 151960
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3786399364471436,
      "learning_rate": 0.00015585035564479827,
      "loss": 2.9836,
      "step": 151961
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.189512252807617,
      "learning_rate": 0.00015584676825408913,
      "loss": 2.8393,
      "step": 151962
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1800177097320557,
      "learning_rate": 0.00015584318089018097,
      "loss": 3.0403,
      "step": 151963
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3135595321655273,
      "learning_rate": 0.00015583959355307405,
      "loss": 2.7172,
      "step": 151964
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0719001293182373,
      "learning_rate": 0.00015583600624276942,
      "loss": 2.957,
      "step": 151965
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8845109939575195,
      "learning_rate": 0.00015583241895926743,
      "loss": 3.0816,
      "step": 151966
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4965083599090576,
      "learning_rate": 0.0001558288317025691,
      "loss": 2.8538,
      "step": 151967
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.44386625289917,
      "learning_rate": 0.00015582524447267475,
      "loss": 2.8816,
      "step": 151968
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.195711612701416,
      "learning_rate": 0.00015582165726958544,
      "loss": 2.8093,
      "step": 151969
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5787453651428223,
      "learning_rate": 0.0001558180700933014,
      "loss": 2.7896,
      "step": 151970
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.542541980743408,
      "learning_rate": 0.0001558144829438236,
      "loss": 2.9252,
      "step": 151971
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.929353713989258,
      "learning_rate": 0.00015581089582115246,
      "loss": 2.7771,
      "step": 151972
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.992480754852295,
      "learning_rate": 0.00015580730872528898,
      "loss": 2.8977,
      "step": 151973
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5191562175750732,
      "learning_rate": 0.0001558037216562335,
      "loss": 3.2443,
      "step": 151974
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2771894931793213,
      "learning_rate": 0.00015580013461398705,
      "loss": 2.8554,
      "step": 151975
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.9582809209823608,
      "learning_rate": 0.00015579654759854985,
      "loss": 3.1401,
      "step": 151976
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.481590509414673,
      "learning_rate": 0.0001557929606099229,
      "loss": 3.0871,
      "step": 151977
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5746657848358154,
      "learning_rate": 0.00015578937364810666,
      "loss": 2.6872,
      "step": 151978
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.956515073776245,
      "learning_rate": 0.00015578578671310205,
      "loss": 2.9126,
      "step": 151979
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3610706329345703,
      "learning_rate": 0.0001557821998049094,
      "loss": 2.9588,
      "step": 151980
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.272165060043335,
      "learning_rate": 0.00015577861292352983,
      "loss": 3.3743,
      "step": 151981
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1133034229278564,
      "learning_rate": 0.0001557750260689635,
      "loss": 3.0737,
      "step": 151982
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7486302852630615,
      "learning_rate": 0.00015577143924121143,
      "loss": 2.6594,
      "step": 151983
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.057279586791992,
      "learning_rate": 0.00015576785244027404,
      "loss": 2.8959,
      "step": 151984
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2975480556488037,
      "learning_rate": 0.00015576426566615227,
      "loss": 3.0304,
      "step": 151985
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.339855670928955,
      "learning_rate": 0.00015576067891884653,
      "loss": 2.8018,
      "step": 151986
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3688595294952393,
      "learning_rate": 0.0001557570921983578,
      "loss": 2.7835,
      "step": 151987
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.2223057746887207,
      "learning_rate": 0.00015575350550468635,
      "loss": 3.0478,
      "step": 151988
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.056619644165039,
      "learning_rate": 0.00015574991883783316,
      "loss": 3.1214,
      "step": 151989
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.207432746887207,
      "learning_rate": 0.00015574633219779866,
      "loss": 3.0365,
      "step": 151990
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2048680782318115,
      "learning_rate": 0.00015574274558458376,
      "loss": 3.1195,
      "step": 151991
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4461510181427,
      "learning_rate": 0.0001557391589981889,
      "loss": 3.027,
      "step": 151992
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1705679893493652,
      "learning_rate": 0.00015573557243861495,
      "loss": 3.0399,
      "step": 151993
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0586559772491455,
      "learning_rate": 0.00015573198590586247,
      "loss": 2.845,
      "step": 151994
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0577147006988525,
      "learning_rate": 0.00015572839939993218,
      "loss": 2.8725,
      "step": 151995
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.144852638244629,
      "learning_rate": 0.00015572481292082458,
      "loss": 2.9498,
      "step": 151996
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.729613780975342,
      "learning_rate": 0.00015572122646854056,
      "loss": 2.8562,
      "step": 151997
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3020317554473877,
      "learning_rate": 0.00015571764004308053,
      "loss": 2.9247,
      "step": 151998
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3451554775238037,
      "learning_rate": 0.00015571405364444546,
      "loss": 3.0876,
      "step": 151999
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0209548473358154,
      "learning_rate": 0.0001557104672726359,
      "loss": 2.6876,
      "step": 152000
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.057586431503296,
      "learning_rate": 0.0001557068809276525,
      "loss": 3.1785,
      "step": 152001
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.4701716899871826,
      "learning_rate": 0.00015570329460949574,
      "loss": 2.9175,
      "step": 152002
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1554839611053467,
      "learning_rate": 0.0001556997083181666,
      "loss": 3.0961,
      "step": 152003
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4241018295288086,
      "learning_rate": 0.0001556961220536655,
      "loss": 2.8023,
      "step": 152004
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8829429149627686,
      "learning_rate": 0.00015569253581599335,
      "loss": 2.9494,
      "step": 152005
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0617964267730713,
      "learning_rate": 0.00015568894960515066,
      "loss": 2.7763,
      "step": 152006
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3917393684387207,
      "learning_rate": 0.000155685363421138,
      "loss": 3.0826,
      "step": 152007
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.789560556411743,
      "learning_rate": 0.00015568177726395626,
      "loss": 3.0399,
      "step": 152008
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.070330858230591,
      "learning_rate": 0.00015567819113360604,
      "loss": 3.0299,
      "step": 152009
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2313392162323,
      "learning_rate": 0.0001556746050300878,
      "loss": 3.1924,
      "step": 152010
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7549779415130615,
      "learning_rate": 0.00015567101895340256,
      "loss": 2.8181,
      "step": 152011
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4562888145446777,
      "learning_rate": 0.00015566743290355075,
      "loss": 3.1317,
      "step": 152012
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9365735054016113,
      "learning_rate": 0.00015566384688053296,
      "loss": 3.4109,
      "step": 152013
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.570133924484253,
      "learning_rate": 0.00015566026088435013,
      "loss": 3.0933,
      "step": 152014
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.60748028755188,
      "learning_rate": 0.00015565667491500266,
      "loss": 3.0558,
      "step": 152015
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.238006114959717,
      "learning_rate": 0.00015565308897249147,
      "loss": 2.8004,
      "step": 152016
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.041030168533325,
      "learning_rate": 0.00015564950305681707,
      "loss": 3.0318,
      "step": 152017
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2736592292785645,
      "learning_rate": 0.00015564591716798017,
      "loss": 2.9207,
      "step": 152018
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.609342575073242,
      "learning_rate": 0.0001556423313059813,
      "loss": 3.0552,
      "step": 152019
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.851093292236328,
      "learning_rate": 0.00015563874547082135,
      "loss": 3.0262,
      "step": 152020
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.215261936187744,
      "learning_rate": 0.00015563515966250075,
      "loss": 2.8061,
      "step": 152021
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3851616382598877,
      "learning_rate": 0.00015563157388102046,
      "loss": 3.0343,
      "step": 152022
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.457700252532959,
      "learning_rate": 0.00015562798812638096,
      "loss": 2.7835,
      "step": 152023
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3881375789642334,
      "learning_rate": 0.00015562440239858295,
      "loss": 2.9809,
      "step": 152024
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.219024181365967,
      "learning_rate": 0.00015562081669762696,
      "loss": 2.7468,
      "step": 152025
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4516372680664062,
      "learning_rate": 0.00015561723102351394,
      "loss": 3.1752,
      "step": 152026
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.342576265335083,
      "learning_rate": 0.00015561364537624423,
      "loss": 2.9737,
      "step": 152027
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1293420791625977,
      "learning_rate": 0.00015561005975581882,
      "loss": 2.8669,
      "step": 152028
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7053427696228027,
      "learning_rate": 0.00015560647416223812,
      "loss": 2.9137,
      "step": 152029
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.769193649291992,
      "learning_rate": 0.0001556028885955031,
      "loss": 3.0856,
      "step": 152030
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2157444953918457,
      "learning_rate": 0.00015559930305561402,
      "loss": 3.0891,
      "step": 152031
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.271084785461426,
      "learning_rate": 0.00015559571754257187,
      "loss": 2.8192,
      "step": 152032
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8785746097564697,
      "learning_rate": 0.0001555921320563771,
      "loss": 3.0659,
      "step": 152033
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1864750385284424,
      "learning_rate": 0.00015558854659703054,
      "loss": 3.1886,
      "step": 152034
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.316396474838257,
      "learning_rate": 0.00015558496116453274,
      "loss": 3.0286,
      "step": 152035
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5205516815185547,
      "learning_rate": 0.0001555813757588846,
      "loss": 3.1028,
      "step": 152036
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.768566370010376,
      "learning_rate": 0.0001555777903800864,
      "loss": 3.1203,
      "step": 152037
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.359147548675537,
      "learning_rate": 0.00015557420502813915,
      "loss": 2.9186,
      "step": 152038
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.441087245941162,
      "learning_rate": 0.00015557061970304325,
      "loss": 3.0262,
      "step": 152039
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7429733276367188,
      "learning_rate": 0.00015556703440479963,
      "loss": 2.8179,
      "step": 152040
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3414180278778076,
      "learning_rate": 0.0001555634491334087,
      "loss": 2.9062,
      "step": 152041
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1896448135375977,
      "learning_rate": 0.00015555986388887148,
      "loss": 2.9604,
      "step": 152042
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.454073190689087,
      "learning_rate": 0.00015555627867118815,
      "loss": 2.8616,
      "step": 152043
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.451688051223755,
      "learning_rate": 0.0001555526934803598,
      "loss": 2.8513,
      "step": 152044
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.9745067358016968,
      "learning_rate": 0.00015554910831638675,
      "loss": 2.9477,
      "step": 152045
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.830955743789673,
      "learning_rate": 0.00015554552317927002,
      "loss": 2.8641,
      "step": 152046
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3507628440856934,
      "learning_rate": 0.00015554193806900998,
      "loss": 2.7973,
      "step": 152047
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1985368728637695,
      "learning_rate": 0.00015553835298560764,
      "loss": 3.0862,
      "step": 152048
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5589568614959717,
      "learning_rate": 0.0001555347679290632,
      "loss": 2.7587,
      "step": 152049
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4036686420440674,
      "learning_rate": 0.0001555311828993777,
      "loss": 2.892,
      "step": 152050
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.423833131790161,
      "learning_rate": 0.00015552759789655158,
      "loss": 2.8797,
      "step": 152051
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2631425857543945,
      "learning_rate": 0.00015552401292058573,
      "loss": 3.1537,
      "step": 152052
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.248253345489502,
      "learning_rate": 0.00015552042797148058,
      "loss": 2.8319,
      "step": 152053
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3548271656036377,
      "learning_rate": 0.0001555168430492371,
      "loss": 2.9993,
      "step": 152054
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6648495197296143,
      "learning_rate": 0.00015551325815385555,
      "loss": 3.0794,
      "step": 152055
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.198227882385254,
      "learning_rate": 0.00015550967328533693,
      "loss": 3.0649,
      "step": 152056
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.0068767070770264,
      "learning_rate": 0.00015550608844368166,
      "loss": 2.95,
      "step": 152057
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.705552577972412,
      "learning_rate": 0.00015550250362889072,
      "loss": 2.6928,
      "step": 152058
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.671050786972046,
      "learning_rate": 0.00015549891884096442,
      "loss": 3.1192,
      "step": 152059
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.574355602264404,
      "learning_rate": 0.0001554953340799038,
      "loss": 2.7965,
      "step": 152060
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.810739517211914,
      "learning_rate": 0.00015549174934570913,
      "loss": 2.9433,
      "step": 152061
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.314992666244507,
      "learning_rate": 0.00015548816463838142,
      "loss": 2.8483,
      "step": 152062
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4508097171783447,
      "learning_rate": 0.00015548457995792102,
      "loss": 3.1746,
      "step": 152063
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.3493685722351074,
      "learning_rate": 0.0001554809953043289,
      "loss": 2.8995,
      "step": 152064
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0405609607696533,
      "learning_rate": 0.00015547741067760548,
      "loss": 3.0406,
      "step": 152065
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8261666297912598,
      "learning_rate": 0.00015547382607775177,
      "loss": 3.0785,
      "step": 152066
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.289982318878174,
      "learning_rate": 0.00015547024150476798,
      "loss": 2.9426,
      "step": 152067
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.095020294189453,
      "learning_rate": 0.0001554666569586551,
      "loss": 2.7134,
      "step": 152068
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4631428718566895,
      "learning_rate": 0.00015546307243941357,
      "loss": 3.2908,
      "step": 152069
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.140411615371704,
      "learning_rate": 0.0001554594879470443,
      "loss": 2.7323,
      "step": 152070
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4150216579437256,
      "learning_rate": 0.00015545590348154775,
      "loss": 2.9753,
      "step": 152071
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3907604217529297,
      "learning_rate": 0.00015545231904292477,
      "loss": 3.0826,
      "step": 152072
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.55849289894104,
      "learning_rate": 0.00015544873463117594,
      "loss": 3.0162,
      "step": 152073
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4087371826171875,
      "learning_rate": 0.00015544515024630196,
      "loss": 2.8999,
      "step": 152074
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.226905345916748,
      "learning_rate": 0.0001554415658883033,
      "loss": 2.8736,
      "step": 152075
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.463484525680542,
      "learning_rate": 0.00015543798155718088,
      "loss": 2.9578,
      "step": 152076
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3160464763641357,
      "learning_rate": 0.00015543439725293517,
      "loss": 2.9484,
      "step": 152077
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.598815441131592,
      "learning_rate": 0.00015543081297556704,
      "loss": 3.0017,
      "step": 152078
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0911173820495605,
      "learning_rate": 0.00015542722872507705,
      "loss": 2.8035,
      "step": 152079
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.250089406967163,
      "learning_rate": 0.00015542364450146586,
      "loss": 2.9375,
      "step": 152080
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.113867998123169,
      "learning_rate": 0.00015542006030473405,
      "loss": 2.9374,
      "step": 152081
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5250120162963867,
      "learning_rate": 0.00015541647613488252,
      "loss": 3.0648,
      "step": 152082
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0591299533843994,
      "learning_rate": 0.00015541289199191163,
      "loss": 2.7944,
      "step": 152083
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.135748863220215,
      "learning_rate": 0.00015540930787582235,
      "loss": 3.0105,
      "step": 152084
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2919373512268066,
      "learning_rate": 0.0001554057237866152,
      "loss": 3.0869,
      "step": 152085
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.721872568130493,
      "learning_rate": 0.00015540213972429088,
      "loss": 2.8059,
      "step": 152086
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.365535259246826,
      "learning_rate": 0.00015539855568884992,
      "loss": 3.1448,
      "step": 152087
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3795673847198486,
      "learning_rate": 0.0001553949716802932,
      "loss": 2.946,
      "step": 152088
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7823681831359863,
      "learning_rate": 0.00015539138769862115,
      "loss": 2.9807,
      "step": 152089
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0667057037353516,
      "learning_rate": 0.0001553878037438347,
      "loss": 3.0335,
      "step": 152090
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.347358465194702,
      "learning_rate": 0.00015538421981593436,
      "loss": 2.991,
      "step": 152091
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9377310276031494,
      "learning_rate": 0.00015538063591492076,
      "loss": 2.8493,
      "step": 152092
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.619903087615967,
      "learning_rate": 0.00015537705204079471,
      "loss": 2.9963,
      "step": 152093
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.882366895675659,
      "learning_rate": 0.0001553734681935568,
      "loss": 2.9675,
      "step": 152094
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.87624192237854,
      "learning_rate": 0.0001553698843732076,
      "loss": 2.952,
      "step": 152095
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.9807534217834473,
      "learning_rate": 0.00015536630057974796,
      "loss": 2.7651,
      "step": 152096
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5243098735809326,
      "learning_rate": 0.00015536271681317846,
      "loss": 2.9157,
      "step": 152097
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.229033946990967,
      "learning_rate": 0.00015535913307349964,
      "loss": 2.8808,
      "step": 152098
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3792848587036133,
      "learning_rate": 0.00015535554936071242,
      "loss": 2.9195,
      "step": 152099
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.422056198120117,
      "learning_rate": 0.00015535196567481718,
      "loss": 2.871,
      "step": 152100
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3243424892425537,
      "learning_rate": 0.0001553483820158149,
      "loss": 3.1227,
      "step": 152101
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0751953125,
      "learning_rate": 0.00015534479838370607,
      "loss": 3.1763,
      "step": 152102
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0428268909454346,
      "learning_rate": 0.0001553412147784914,
      "loss": 3.2609,
      "step": 152103
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.540438652038574,
      "learning_rate": 0.0001553376312001714,
      "loss": 3.0276,
      "step": 152104
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5641138553619385,
      "learning_rate": 0.0001553340476487469,
      "loss": 3.1658,
      "step": 152105
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5307910442352295,
      "learning_rate": 0.00015533046412421851,
      "loss": 2.8524,
      "step": 152106
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7404568195343018,
      "learning_rate": 0.00015532688062658702,
      "loss": 2.9199,
      "step": 152107
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.1035685539245605,
      "learning_rate": 0.00015532329715585295,
      "loss": 3.2425,
      "step": 152108
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.667311668395996,
      "learning_rate": 0.00015531971371201707,
      "loss": 2.8987,
      "step": 152109
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3075740337371826,
      "learning_rate": 0.00015531613029507984,
      "loss": 3.0698,
      "step": 152110
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1302006244659424,
      "learning_rate": 0.00015531254690504217,
      "loss": 2.6585,
      "step": 152111
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.9858989715576172,
      "learning_rate": 0.00015530896354190452,
      "loss": 3.1057,
      "step": 152112
    },
    {
      "epoch": 1.98,
      "grad_norm": 5.538239479064941,
      "learning_rate": 0.0001553053802056678,
      "loss": 3.1125,
      "step": 152113
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.651858806610107,
      "learning_rate": 0.0001553017968963324,
      "loss": 2.8862,
      "step": 152114
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.0172765254974365,
      "learning_rate": 0.00015529821361389939,
      "loss": 2.8618,
      "step": 152115
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9110326766967773,
      "learning_rate": 0.00015529463035836893,
      "loss": 2.8944,
      "step": 152116
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.264127254486084,
      "learning_rate": 0.00015529104712974206,
      "loss": 2.9234,
      "step": 152117
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.211164712905884,
      "learning_rate": 0.00015528746392801918,
      "loss": 2.9523,
      "step": 152118
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.325342893600464,
      "learning_rate": 0.0001552838807532012,
      "loss": 3.228,
      "step": 152119
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7553579807281494,
      "learning_rate": 0.00015528029760528857,
      "loss": 3.1052,
      "step": 152120
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.265505075454712,
      "learning_rate": 0.00015527671448428232,
      "loss": 2.8336,
      "step": 152121
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3978776931762695,
      "learning_rate": 0.0001552731313901826,
      "loss": 3.062,
      "step": 152122
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7496302127838135,
      "learning_rate": 0.00015526954832299048,
      "loss": 2.9858,
      "step": 152123
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.2495107650756836,
      "learning_rate": 0.00015526596528270633,
      "loss": 2.8219,
      "step": 152124
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3984591960906982,
      "learning_rate": 0.0001552623822693311,
      "loss": 2.8765,
      "step": 152125
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.126277208328247,
      "learning_rate": 0.00015525879928286523,
      "loss": 2.9261,
      "step": 152126
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5594441890716553,
      "learning_rate": 0.00015525521632330972,
      "loss": 2.8737,
      "step": 152127
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.473146915435791,
      "learning_rate": 0.0001552516333906647,
      "loss": 2.9265,
      "step": 152128
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4804532527923584,
      "learning_rate": 0.0001552480504849313,
      "loss": 3.2051,
      "step": 152129
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3039731979370117,
      "learning_rate": 0.00015524446760610986,
      "loss": 2.7772,
      "step": 152130
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0312066078186035,
      "learning_rate": 0.00015524088475420136,
      "loss": 2.9137,
      "step": 152131
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1272077560424805,
      "learning_rate": 0.00015523730192920624,
      "loss": 3.1151,
      "step": 152132
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.595834732055664,
      "learning_rate": 0.0001552337191311254,
      "loss": 3.0062,
      "step": 152133
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.177635908126831,
      "learning_rate": 0.00015523013635995912,
      "loss": 2.9754,
      "step": 152134
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7053580284118652,
      "learning_rate": 0.00015522655361570846,
      "loss": 3.0333,
      "step": 152135
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5018298625946045,
      "learning_rate": 0.00015522297089837371,
      "loss": 2.7903,
      "step": 152136
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.4213998317718506,
      "learning_rate": 0.00015521938820795594,
      "loss": 2.9038,
      "step": 152137
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7561593055725098,
      "learning_rate": 0.0001552158055444555,
      "loss": 3.0012,
      "step": 152138
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5514955520629883,
      "learning_rate": 0.00015521222290787326,
      "loss": 2.902,
      "step": 152139
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.806040048599243,
      "learning_rate": 0.0001552086402982098,
      "loss": 2.9037,
      "step": 152140
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.086183547973633,
      "learning_rate": 0.0001552050577154658,
      "loss": 3.1049,
      "step": 152141
    },
    {
      "epoch": 1.98,
      "grad_norm": 5.210095405578613,
      "learning_rate": 0.0001552014751596418,
      "loss": 2.8726,
      "step": 152142
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6796605587005615,
      "learning_rate": 0.00015519789263073865,
      "loss": 2.8828,
      "step": 152143
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5633373260498047,
      "learning_rate": 0.00015519431012875684,
      "loss": 2.8941,
      "step": 152144
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.0273189544677734,
      "learning_rate": 0.00015519072765369732,
      "loss": 2.9865,
      "step": 152145
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4513487815856934,
      "learning_rate": 0.00015518714520556054,
      "loss": 2.9644,
      "step": 152146
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.701528787612915,
      "learning_rate": 0.00015518356278434718,
      "loss": 2.8267,
      "step": 152147
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.199772596359253,
      "learning_rate": 0.00015517998039005785,
      "loss": 2.9123,
      "step": 152148
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.158432722091675,
      "learning_rate": 0.0001551763980226934,
      "loss": 3.0764,
      "step": 152149
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.6555235385894775,
      "learning_rate": 0.00015517281568225427,
      "loss": 2.9884,
      "step": 152150
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.129153251647949,
      "learning_rate": 0.00015516923336874133,
      "loss": 2.8919,
      "step": 152151
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.170175790786743,
      "learning_rate": 0.00015516565108215525,
      "loss": 2.9213,
      "step": 152152
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3256545066833496,
      "learning_rate": 0.00015516206882249653,
      "loss": 2.7526,
      "step": 152153
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3902554512023926,
      "learning_rate": 0.00015515848658976582,
      "loss": 2.8788,
      "step": 152154
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.221022367477417,
      "learning_rate": 0.00015515490438396402,
      "loss": 2.7316,
      "step": 152155
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.250317335128784,
      "learning_rate": 0.0001551513222050915,
      "loss": 3.0296,
      "step": 152156
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1929852962493896,
      "learning_rate": 0.00015514774005314922,
      "loss": 3.023,
      "step": 152157
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.212118148803711,
      "learning_rate": 0.00015514415792813773,
      "loss": 2.8487,
      "step": 152158
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1893160343170166,
      "learning_rate": 0.00015514057583005766,
      "loss": 3.1403,
      "step": 152159
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.057213306427002,
      "learning_rate": 0.00015513699375890958,
      "loss": 2.972,
      "step": 152160
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5901293754577637,
      "learning_rate": 0.00015513341171469438,
      "loss": 2.9662,
      "step": 152161
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0490591526031494,
      "learning_rate": 0.00015512982969741252,
      "loss": 3.1201,
      "step": 152162
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1089789867401123,
      "learning_rate": 0.00015512624770706485,
      "loss": 2.6526,
      "step": 152163
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4479990005493164,
      "learning_rate": 0.00015512266574365193,
      "loss": 2.9871,
      "step": 152164
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.4769577980041504,
      "learning_rate": 0.00015511908380717447,
      "loss": 3.1243,
      "step": 152165
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.2556233406066895,
      "learning_rate": 0.00015511550189763299,
      "loss": 2.6088,
      "step": 152166
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2911195755004883,
      "learning_rate": 0.00015511192001502837,
      "loss": 2.9302,
      "step": 152167
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5211198329925537,
      "learning_rate": 0.00015510833815936105,
      "loss": 2.9176,
      "step": 152168
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6935720443725586,
      "learning_rate": 0.00015510475633063198,
      "loss": 2.8398,
      "step": 152169
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0302553176879883,
      "learning_rate": 0.00015510117452884165,
      "loss": 3.1237,
      "step": 152170
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7722597122192383,
      "learning_rate": 0.00015509759275399076,
      "loss": 2.9787,
      "step": 152171
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7862510681152344,
      "learning_rate": 0.00015509401100607983,
      "loss": 3.1399,
      "step": 152172
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.824047803878784,
      "learning_rate": 0.00015509042928510975,
      "loss": 3.0277,
      "step": 152173
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.5463309288024902,
      "learning_rate": 0.00015508684759108106,
      "loss": 3.024,
      "step": 152174
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5505759716033936,
      "learning_rate": 0.0001550832659239945,
      "loss": 2.8686,
      "step": 152175
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3329899311065674,
      "learning_rate": 0.00015507968428385074,
      "loss": 2.9829,
      "step": 152176
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6154847145080566,
      "learning_rate": 0.00015507610267065025,
      "loss": 2.9569,
      "step": 152177
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7758426666259766,
      "learning_rate": 0.000155072521084394,
      "loss": 3.1069,
      "step": 152178
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3212478160858154,
      "learning_rate": 0.00015506893952508248,
      "loss": 2.9649,
      "step": 152179
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2610251903533936,
      "learning_rate": 0.00015506535799271628,
      "loss": 2.9657,
      "step": 152180
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6287577152252197,
      "learning_rate": 0.00015506177648729627,
      "loss": 3.2849,
      "step": 152181
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9430899620056152,
      "learning_rate": 0.000155058195008823,
      "loss": 2.8546,
      "step": 152182
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.311992645263672,
      "learning_rate": 0.00015505461355729702,
      "loss": 3.1721,
      "step": 152183
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.551924705505371,
      "learning_rate": 0.0001550510321327193,
      "loss": 3.2584,
      "step": 152184
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.190619468688965,
      "learning_rate": 0.0001550474507350903,
      "loss": 3.0727,
      "step": 152185
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.77341365814209,
      "learning_rate": 0.00015504386936441057,
      "loss": 2.9017,
      "step": 152186
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.907794713973999,
      "learning_rate": 0.00015504028802068107,
      "loss": 2.9468,
      "step": 152187
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5992839336395264,
      "learning_rate": 0.00015503670670390235,
      "loss": 2.8254,
      "step": 152188
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5187854766845703,
      "learning_rate": 0.00015503312541407485,
      "loss": 3.0739,
      "step": 152189
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.4955008029937744,
      "learning_rate": 0.0001550295441511996,
      "loss": 3.1775,
      "step": 152190
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8158273696899414,
      "learning_rate": 0.0001550259629152769,
      "loss": 2.9929,
      "step": 152191
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6843020915985107,
      "learning_rate": 0.0001550223817063078,
      "loss": 2.9518,
      "step": 152192
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.6320714950561523,
      "learning_rate": 0.00015501880052429278,
      "loss": 2.8805,
      "step": 152193
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.227900743484497,
      "learning_rate": 0.0001550152193692325,
      "loss": 2.9863,
      "step": 152194
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0109312534332275,
      "learning_rate": 0.00015501163824112747,
      "loss": 3.3125,
      "step": 152195
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5305771827697754,
      "learning_rate": 0.00015500805713997864,
      "loss": 2.877,
      "step": 152196
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.328007698059082,
      "learning_rate": 0.00015500447606578646,
      "loss": 2.7519,
      "step": 152197
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.826259136199951,
      "learning_rate": 0.00015500089501855178,
      "loss": 2.9947,
      "step": 152198
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.1592302322387695,
      "learning_rate": 0.00015499731399827506,
      "loss": 2.9518,
      "step": 152199
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7607812881469727,
      "learning_rate": 0.00015499373300495732,
      "loss": 2.8586,
      "step": 152200
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.2762770652770996,
      "learning_rate": 0.00015499015203859876,
      "loss": 3.0212,
      "step": 152201
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.06529426574707,
      "learning_rate": 0.00015498657109920034,
      "loss": 2.8119,
      "step": 152202
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.067730665206909,
      "learning_rate": 0.00015498299018676257,
      "loss": 3.0759,
      "step": 152203
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3411107063293457,
      "learning_rate": 0.0001549794093012863,
      "loss": 2.9298,
      "step": 152204
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.876434803009033,
      "learning_rate": 0.000154975828442772,
      "loss": 3.1483,
      "step": 152205
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9345920085906982,
      "learning_rate": 0.0001549722476112205,
      "loss": 2.9171,
      "step": 152206
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9050793647766113,
      "learning_rate": 0.00015496866680663244,
      "loss": 3.1061,
      "step": 152207
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1068387031555176,
      "learning_rate": 0.00015496508602900843,
      "loss": 2.9779,
      "step": 152208
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5342018604278564,
      "learning_rate": 0.00015496150527834904,
      "loss": 2.9433,
      "step": 152209
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8038671016693115,
      "learning_rate": 0.00015495792455465517,
      "loss": 3.0756,
      "step": 152210
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.443112373352051,
      "learning_rate": 0.0001549543438579272,
      "loss": 2.9256,
      "step": 152211
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6053273677825928,
      "learning_rate": 0.0001549507631881661,
      "loss": 2.734,
      "step": 152212
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1635820865631104,
      "learning_rate": 0.00015494718254537238,
      "loss": 2.8759,
      "step": 152213
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1093780994415283,
      "learning_rate": 0.00015494360192954673,
      "loss": 2.9291,
      "step": 152214
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2316744327545166,
      "learning_rate": 0.0001549400213406897,
      "loss": 2.8421,
      "step": 152215
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.817552328109741,
      "learning_rate": 0.00015493644077880215,
      "loss": 2.9468,
      "step": 152216
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2949650287628174,
      "learning_rate": 0.00015493286024388455,
      "loss": 3.0924,
      "step": 152217
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0440618991851807,
      "learning_rate": 0.00015492927973593778,
      "loss": 2.9376,
      "step": 152218
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7698371410369873,
      "learning_rate": 0.0001549256992549624,
      "loss": 3.0908,
      "step": 152219
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.663969039916992,
      "learning_rate": 0.00015492211880095905,
      "loss": 3.0732,
      "step": 152220
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3522534370422363,
      "learning_rate": 0.00015491853837392835,
      "loss": 2.9171,
      "step": 152221
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2134592533111572,
      "learning_rate": 0.0001549149579738711,
      "loss": 3.0027,
      "step": 152222
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.270308494567871,
      "learning_rate": 0.00015491137760078783,
      "loss": 2.9271,
      "step": 152223
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.789061069488525,
      "learning_rate": 0.00015490779725467932,
      "loss": 2.9109,
      "step": 152224
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2101590633392334,
      "learning_rate": 0.00015490421693554622,
      "loss": 2.7766,
      "step": 152225
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.291745662689209,
      "learning_rate": 0.00015490063664338918,
      "loss": 3.0125,
      "step": 152226
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.650073528289795,
      "learning_rate": 0.00015489705637820872,
      "loss": 3.2097,
      "step": 152227
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.1354644298553467,
      "learning_rate": 0.00015489347614000576,
      "loss": 3.008,
      "step": 152228
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.399111747741699,
      "learning_rate": 0.00015488989592878075,
      "loss": 2.9639,
      "step": 152229
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7079391479492188,
      "learning_rate": 0.00015488631574453454,
      "loss": 2.9113,
      "step": 152230
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7307589054107666,
      "learning_rate": 0.0001548827355872677,
      "loss": 2.9819,
      "step": 152231
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1034998893737793,
      "learning_rate": 0.00015487915545698088,
      "loss": 3.0048,
      "step": 152232
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.8697465658187866,
      "learning_rate": 0.00015487557535367468,
      "loss": 3.1112,
      "step": 152233
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.35868239402771,
      "learning_rate": 0.00015487199527734996,
      "loss": 2.8618,
      "step": 152234
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.157719612121582,
      "learning_rate": 0.00015486841522800715,
      "loss": 3.027,
      "step": 152235
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2915875911712646,
      "learning_rate": 0.00015486483520564718,
      "loss": 2.8011,
      "step": 152236
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.068009614944458,
      "learning_rate": 0.00015486125521027057,
      "loss": 2.8419,
      "step": 152237
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.028109312057495,
      "learning_rate": 0.00015485767524187798,
      "loss": 3.1686,
      "step": 152238
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.947644591331482,
      "learning_rate": 0.00015485409530046997,
      "loss": 3.0451,
      "step": 152239
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.771364450454712,
      "learning_rate": 0.00015485051538604742,
      "loss": 3.0996,
      "step": 152240
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.087190628051758,
      "learning_rate": 0.0001548469354986108,
      "loss": 3.0446,
      "step": 152241
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.104645252227783,
      "learning_rate": 0.000154843355638161,
      "loss": 3.1232,
      "step": 152242
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.9307690858840942,
      "learning_rate": 0.00015483977580469857,
      "loss": 3.0741,
      "step": 152243
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.183209180831909,
      "learning_rate": 0.00015483619599822415,
      "loss": 2.9383,
      "step": 152244
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.665329694747925,
      "learning_rate": 0.0001548326162187383,
      "loss": 2.8086,
      "step": 152245
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6157774925231934,
      "learning_rate": 0.00015482903646624194,
      "loss": 2.9797,
      "step": 152246
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3489859104156494,
      "learning_rate": 0.00015482545674073547,
      "loss": 2.7872,
      "step": 152247
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1590287685394287,
      "learning_rate": 0.00015482187704221986,
      "loss": 2.9853,
      "step": 152248
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9582862854003906,
      "learning_rate": 0.0001548182973706955,
      "loss": 3.1118,
      "step": 152249
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.945504665374756,
      "learning_rate": 0.00015481471772616325,
      "loss": 2.9936,
      "step": 152250
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.313521146774292,
      "learning_rate": 0.00015481113810862353,
      "loss": 2.7011,
      "step": 152251
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0864739418029785,
      "learning_rate": 0.00015480755851807727,
      "loss": 2.9214,
      "step": 152252
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4506900310516357,
      "learning_rate": 0.0001548039789545249,
      "loss": 2.9638,
      "step": 152253
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.235633611679077,
      "learning_rate": 0.00015480039941796736,
      "loss": 2.8907,
      "step": 152254
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.1701130867004395,
      "learning_rate": 0.00015479681990840513,
      "loss": 2.8302,
      "step": 152255
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.526895523071289,
      "learning_rate": 0.00015479324042583895,
      "loss": 2.9431,
      "step": 152256
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1288366317749023,
      "learning_rate": 0.0001547896609702693,
      "loss": 3.0787,
      "step": 152257
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2192225456237793,
      "learning_rate": 0.00015478608154169716,
      "loss": 2.7821,
      "step": 152258
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.409796953201294,
      "learning_rate": 0.00015478250214012287,
      "loss": 3.1393,
      "step": 152259
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.004232168197632,
      "learning_rate": 0.00015477892276554735,
      "loss": 2.7023,
      "step": 152260
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5593421459198,
      "learning_rate": 0.00015477534341797123,
      "loss": 2.6855,
      "step": 152261
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.177220106124878,
      "learning_rate": 0.00015477176409739495,
      "loss": 2.8835,
      "step": 152262
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9106454849243164,
      "learning_rate": 0.00015476818480381947,
      "loss": 2.7575,
      "step": 152263
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.0115063190460205,
      "learning_rate": 0.0001547646055372453,
      "loss": 3.0784,
      "step": 152264
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6212399005889893,
      "learning_rate": 0.00015476102629767308,
      "loss": 2.9286,
      "step": 152265
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5992255210876465,
      "learning_rate": 0.00015475744708510358,
      "loss": 3.2139,
      "step": 152266
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0488243103027344,
      "learning_rate": 0.00015475386789953746,
      "loss": 2.857,
      "step": 152267
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.109687566757202,
      "learning_rate": 0.0001547502887409752,
      "loss": 3.0124,
      "step": 152268
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.60135555267334,
      "learning_rate": 0.00015474670960941773,
      "loss": 3.088,
      "step": 152269
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4770636558532715,
      "learning_rate": 0.00015474313050486556,
      "loss": 2.957,
      "step": 152270
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8486826419830322,
      "learning_rate": 0.00015473955142731926,
      "loss": 3.0089,
      "step": 152271
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1509411334991455,
      "learning_rate": 0.0001547359723767798,
      "loss": 2.7548,
      "step": 152272
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9017817974090576,
      "learning_rate": 0.00015473239335324753,
      "loss": 2.9688,
      "step": 152273
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4646785259246826,
      "learning_rate": 0.0001547288143567233,
      "loss": 2.9478,
      "step": 152274
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6286463737487793,
      "learning_rate": 0.00015472523538720778,
      "loss": 3.0364,
      "step": 152275
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.484191656112671,
      "learning_rate": 0.00015472165644470146,
      "loss": 3.137,
      "step": 152276
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.441340446472168,
      "learning_rate": 0.00015471807752920523,
      "loss": 3.046,
      "step": 152277
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1649010181427,
      "learning_rate": 0.00015471449864071967,
      "loss": 2.8427,
      "step": 152278
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5612308979034424,
      "learning_rate": 0.0001547109197792453,
      "loss": 3.0923,
      "step": 152279
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.758190155029297,
      "learning_rate": 0.00015470734094478306,
      "loss": 3.1577,
      "step": 152280
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.414177417755127,
      "learning_rate": 0.0001547037621373334,
      "loss": 3.1943,
      "step": 152281
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.5903217792510986,
      "learning_rate": 0.00015470018335689701,
      "loss": 2.9696,
      "step": 152282
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4297895431518555,
      "learning_rate": 0.00015469660460347466,
      "loss": 3.1159,
      "step": 152283
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.070768356323242,
      "learning_rate": 0.00015469302587706697,
      "loss": 3.1352,
      "step": 152284
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3815948963165283,
      "learning_rate": 0.00015468944717767448,
      "loss": 3.0038,
      "step": 152285
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.0010242462158203,
      "learning_rate": 0.0001546858685052981,
      "loss": 3.2722,
      "step": 152286
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3129358291625977,
      "learning_rate": 0.00015468228985993833,
      "loss": 2.8032,
      "step": 152287
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9599666595458984,
      "learning_rate": 0.00015467871124159576,
      "loss": 2.8183,
      "step": 152288
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.1882734298706055,
      "learning_rate": 0.00015467513265027128,
      "loss": 2.78,
      "step": 152289
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.395644187927246,
      "learning_rate": 0.0001546715540859653,
      "loss": 2.832,
      "step": 152290
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8202247619628906,
      "learning_rate": 0.00015466797554867875,
      "loss": 3.1909,
      "step": 152291
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3830177783966064,
      "learning_rate": 0.00015466439703841218,
      "loss": 3.1392,
      "step": 152292
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1513829231262207,
      "learning_rate": 0.00015466081855516623,
      "loss": 2.961,
      "step": 152293
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.418726444244385,
      "learning_rate": 0.00015465724009894145,
      "loss": 3.1226,
      "step": 152294
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.628222942352295,
      "learning_rate": 0.00015465366166973876,
      "loss": 3.1755,
      "step": 152295
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2401883602142334,
      "learning_rate": 0.00015465008326755858,
      "loss": 2.9504,
      "step": 152296
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.16129994392395,
      "learning_rate": 0.0001546465048924018,
      "loss": 2.9649,
      "step": 152297
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5286989212036133,
      "learning_rate": 0.000154642926544269,
      "loss": 2.8928,
      "step": 152298
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.641655921936035,
      "learning_rate": 0.00015463934822316085,
      "loss": 3.0485,
      "step": 152299
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2262046337127686,
      "learning_rate": 0.0001546357699290778,
      "loss": 2.7567,
      "step": 152300
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.0090067386627197,
      "learning_rate": 0.00015463219166202087,
      "loss": 3.3015,
      "step": 152301
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1765565872192383,
      "learning_rate": 0.00015462861342199042,
      "loss": 3.0251,
      "step": 152302
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.961636543273926,
      "learning_rate": 0.0001546250352089874,
      "loss": 2.8288,
      "step": 152303
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0330705642700195,
      "learning_rate": 0.0001546214570230123,
      "loss": 2.7506,
      "step": 152304
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.666400671005249,
      "learning_rate": 0.00015461787886406585,
      "loss": 3.1886,
      "step": 152305
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.8245716094970703,
      "learning_rate": 0.00015461430073214851,
      "loss": 3.1317,
      "step": 152306
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6594855785369873,
      "learning_rate": 0.00015461072262726132,
      "loss": 3.1244,
      "step": 152307
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.255251169204712,
      "learning_rate": 0.00015460714454940453,
      "loss": 2.8616,
      "step": 152308
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2215116024017334,
      "learning_rate": 0.00015460356649857918,
      "loss": 2.823,
      "step": 152309
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3116917610168457,
      "learning_rate": 0.00015459998847478578,
      "loss": 2.8765,
      "step": 152310
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.879775047302246,
      "learning_rate": 0.00015459641047802495,
      "loss": 2.8926,
      "step": 152311
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5430169105529785,
      "learning_rate": 0.0001545928325082973,
      "loss": 2.9491,
      "step": 152312
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.114790678024292,
      "learning_rate": 0.00015458925456560368,
      "loss": 3.1669,
      "step": 152313
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3712410926818848,
      "learning_rate": 0.0001545856766499446,
      "loss": 2.9735,
      "step": 152314
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9581005573272705,
      "learning_rate": 0.00015458209876132085,
      "loss": 3.102,
      "step": 152315
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9365506172180176,
      "learning_rate": 0.00015457852089973304,
      "loss": 3.1549,
      "step": 152316
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3666012287139893,
      "learning_rate": 0.00015457494306518184,
      "loss": 2.9144,
      "step": 152317
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.0252532958984375,
      "learning_rate": 0.0001545713652576678,
      "loss": 3.0524,
      "step": 152318
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.802286148071289,
      "learning_rate": 0.00015456778747719178,
      "loss": 2.85,
      "step": 152319
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.211963176727295,
      "learning_rate": 0.00015456420972375428,
      "loss": 3.1442,
      "step": 152320
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.219348192214966,
      "learning_rate": 0.0001545606319973561,
      "loss": 2.8183,
      "step": 152321
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.146967649459839,
      "learning_rate": 0.00015455705429799787,
      "loss": 2.9304,
      "step": 152322
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2280094623565674,
      "learning_rate": 0.00015455347662568023,
      "loss": 3.1178,
      "step": 152323
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.888628959655762,
      "learning_rate": 0.00015454989898040374,
      "loss": 3.0444,
      "step": 152324
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.38958740234375,
      "learning_rate": 0.00015454632136216928,
      "loss": 2.9784,
      "step": 152325
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.33842134475708,
      "learning_rate": 0.00015454274377097723,
      "loss": 3.0013,
      "step": 152326
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3119113445281982,
      "learning_rate": 0.0001545391662068286,
      "loss": 2.8698,
      "step": 152327
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.181738376617432,
      "learning_rate": 0.00015453558866972388,
      "loss": 2.9052,
      "step": 152328
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.916772842407227,
      "learning_rate": 0.00015453201115966377,
      "loss": 2.8988,
      "step": 152329
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.615039348602295,
      "learning_rate": 0.00015452843367664873,
      "loss": 3.0589,
      "step": 152330
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.0479726791381836,
      "learning_rate": 0.00015452485622067974,
      "loss": 2.7989,
      "step": 152331
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7875328063964844,
      "learning_rate": 0.00015452127879175725,
      "loss": 2.9094,
      "step": 152332
    },
    {
      "epoch": 1.98,
      "grad_norm": 5.099259376525879,
      "learning_rate": 0.00015451770138988203,
      "loss": 2.7614,
      "step": 152333
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4133267402648926,
      "learning_rate": 0.0001545141240150548,
      "loss": 2.9701,
      "step": 152334
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5386531352996826,
      "learning_rate": 0.0001545105466672761,
      "loss": 2.8281,
      "step": 152335
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.203244924545288,
      "learning_rate": 0.00015450696934654653,
      "loss": 3.0675,
      "step": 152336
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9122722148895264,
      "learning_rate": 0.00015450339205286699,
      "loss": 3.1915,
      "step": 152337
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.489527702331543,
      "learning_rate": 0.00015449981478623785,
      "loss": 2.6581,
      "step": 152338
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.346972703933716,
      "learning_rate": 0.0001544962375466601,
      "loss": 2.7786,
      "step": 152339
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1648952960968018,
      "learning_rate": 0.00015449266033413413,
      "loss": 3.0104,
      "step": 152340
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.227010726928711,
      "learning_rate": 0.00015448908314866095,
      "loss": 2.9122,
      "step": 152341
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.915649890899658,
      "learning_rate": 0.00015448550599024078,
      "loss": 3.1611,
      "step": 152342
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.0460596084594727,
      "learning_rate": 0.00015448192885887457,
      "loss": 2.9914,
      "step": 152343
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1802279949188232,
      "learning_rate": 0.00015447835175456284,
      "loss": 3.2095,
      "step": 152344
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.4643282890319824,
      "learning_rate": 0.00015447477467730642,
      "loss": 2.9343,
      "step": 152345
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.0110929012298584,
      "learning_rate": 0.00015447119762710578,
      "loss": 3.0101,
      "step": 152346
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.131045341491699,
      "learning_rate": 0.0001544676206039619,
      "loss": 2.9848,
      "step": 152347
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6489503383636475,
      "learning_rate": 0.00015446404360787503,
      "loss": 2.8423,
      "step": 152348
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.499849319458008,
      "learning_rate": 0.0001544604666388462,
      "loss": 2.9205,
      "step": 152349
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.728828430175781,
      "learning_rate": 0.00015445688969687572,
      "loss": 2.9756,
      "step": 152350
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.518277645111084,
      "learning_rate": 0.00015445331278196461,
      "loss": 3.3023,
      "step": 152351
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4417057037353516,
      "learning_rate": 0.00015444973589411328,
      "loss": 3.0255,
      "step": 152352
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.00584077835083,
      "learning_rate": 0.00015444615903332257,
      "loss": 2.9711,
      "step": 152353
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.1317267417907715,
      "learning_rate": 0.00015444258219959308,
      "loss": 3.0063,
      "step": 152354
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.746799945831299,
      "learning_rate": 0.00015443900539292548,
      "loss": 2.8292,
      "step": 152355
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.063176155090332,
      "learning_rate": 0.00015443542861332026,
      "loss": 2.9247,
      "step": 152356
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3112659454345703,
      "learning_rate": 0.00015443185186077838,
      "loss": 2.8341,
      "step": 152357
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2070271968841553,
      "learning_rate": 0.00015442827513530022,
      "loss": 3.1456,
      "step": 152358
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1629559993743896,
      "learning_rate": 0.0001544246984368868,
      "loss": 3.0489,
      "step": 152359
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6735987663269043,
      "learning_rate": 0.00015442112176553848,
      "loss": 3.0279,
      "step": 152360
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4744842052459717,
      "learning_rate": 0.00015441754512125595,
      "loss": 2.9322,
      "step": 152361
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1000237464904785,
      "learning_rate": 0.00015441396850404004,
      "loss": 2.7977,
      "step": 152362
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5156500339508057,
      "learning_rate": 0.00015441039191389135,
      "loss": 2.9201,
      "step": 152363
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6707231998443604,
      "learning_rate": 0.00015440681535081039,
      "loss": 3.179,
      "step": 152364
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6006014347076416,
      "learning_rate": 0.00015440323881479805,
      "loss": 2.6007,
      "step": 152365
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.189018487930298,
      "learning_rate": 0.00015439966230585493,
      "loss": 2.9286,
      "step": 152366
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.148425340652466,
      "learning_rate": 0.0001543960858239815,
      "loss": 3.0922,
      "step": 152367
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.977622628211975,
      "learning_rate": 0.00015439250936917872,
      "loss": 3.099,
      "step": 152368
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.387835741043091,
      "learning_rate": 0.00015438893294144712,
      "loss": 3.0953,
      "step": 152369
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6954517364501953,
      "learning_rate": 0.00015438535654078726,
      "loss": 2.9815,
      "step": 152370
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5208027362823486,
      "learning_rate": 0.00015438178016720003,
      "loss": 2.9465,
      "step": 152371
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3297855854034424,
      "learning_rate": 0.00015437820382068596,
      "loss": 2.9206,
      "step": 152372
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5182979106903076,
      "learning_rate": 0.00015437462750124564,
      "loss": 2.9491,
      "step": 152373
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.680949687957764,
      "learning_rate": 0.00015437105120887993,
      "loss": 2.9115,
      "step": 152374
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.0521769523620605,
      "learning_rate": 0.00015436747494358927,
      "loss": 2.8928,
      "step": 152375
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.310814142227173,
      "learning_rate": 0.00015436389870537455,
      "loss": 2.787,
      "step": 152376
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.22179913520813,
      "learning_rate": 0.00015436032249423635,
      "loss": 2.9797,
      "step": 152377
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.721179485321045,
      "learning_rate": 0.0001543567463101753,
      "loss": 2.9016,
      "step": 152378
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.514937162399292,
      "learning_rate": 0.00015435317015319197,
      "loss": 3.0205,
      "step": 152379
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.2558891773223877,
      "learning_rate": 0.00015434959402328726,
      "loss": 2.825,
      "step": 152380
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.431490182876587,
      "learning_rate": 0.00015434601792046159,
      "loss": 3.0522,
      "step": 152381
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.3118979930877686,
      "learning_rate": 0.00015434244184471587,
      "loss": 2.7137,
      "step": 152382
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.525399684906006,
      "learning_rate": 0.00015433886579605061,
      "loss": 2.8013,
      "step": 152383
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5595693588256836,
      "learning_rate": 0.00015433528977446649,
      "loss": 3.044,
      "step": 152384
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.271463394165039,
      "learning_rate": 0.00015433171377996413,
      "loss": 2.918,
      "step": 152385
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.9259885549545288,
      "learning_rate": 0.00015432813781254435,
      "loss": 3.0234,
      "step": 152386
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.069222927093506,
      "learning_rate": 0.00015432456187220757,
      "loss": 3.0179,
      "step": 152387
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2380900382995605,
      "learning_rate": 0.00015432098595895475,
      "loss": 3.0738,
      "step": 152388
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.516542911529541,
      "learning_rate": 0.00015431741007278642,
      "loss": 2.9799,
      "step": 152389
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6608517169952393,
      "learning_rate": 0.0001543138342137032,
      "loss": 2.9933,
      "step": 152390
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.9691938161849976,
      "learning_rate": 0.0001543102583817057,
      "loss": 3.0306,
      "step": 152391
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7626688480377197,
      "learning_rate": 0.00015430668257679478,
      "loss": 3.0345,
      "step": 152392
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5640366077423096,
      "learning_rate": 0.00015430310679897087,
      "loss": 3.1424,
      "step": 152393
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.687598466873169,
      "learning_rate": 0.0001542995310482349,
      "loss": 2.8162,
      "step": 152394
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.139019250869751,
      "learning_rate": 0.00015429595532458738,
      "loss": 3.0196,
      "step": 152395
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.279937267303467,
      "learning_rate": 0.000154292379628029,
      "loss": 3.0157,
      "step": 152396
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.648709297180176,
      "learning_rate": 0.00015428880395856033,
      "loss": 2.9132,
      "step": 152397
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2964229583740234,
      "learning_rate": 0.0001542852283161822,
      "loss": 2.8221,
      "step": 152398
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.425252914428711,
      "learning_rate": 0.00015428165270089508,
      "loss": 2.9763,
      "step": 152399
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.135927677154541,
      "learning_rate": 0.0001542780771126999,
      "loss": 2.9999,
      "step": 152400
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.052316188812256,
      "learning_rate": 0.00015427450155159703,
      "loss": 2.9001,
      "step": 152401
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5499684810638428,
      "learning_rate": 0.00015427092601758753,
      "loss": 3.1705,
      "step": 152402
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5708107948303223,
      "learning_rate": 0.00015426735051067157,
      "loss": 2.9357,
      "step": 152403
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.409839630126953,
      "learning_rate": 0.00015426377503085018,
      "loss": 2.8607,
      "step": 152404
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5440943241119385,
      "learning_rate": 0.00015426019957812378,
      "loss": 2.7455,
      "step": 152405
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.59952712059021,
      "learning_rate": 0.00015425662415249327,
      "loss": 3.0492,
      "step": 152406
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4918363094329834,
      "learning_rate": 0.00015425304875395915,
      "loss": 3.0054,
      "step": 152407
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.931530237197876,
      "learning_rate": 0.0001542494733825223,
      "loss": 2.8673,
      "step": 152408
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8362276554107666,
      "learning_rate": 0.00015424589803818303,
      "loss": 2.7636,
      "step": 152409
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4241247177124023,
      "learning_rate": 0.0001542423227209423,
      "loss": 3.0644,
      "step": 152410
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8283474445343018,
      "learning_rate": 0.00015423874743080056,
      "loss": 3.1736,
      "step": 152411
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.5894556045532227,
      "learning_rate": 0.00015423517216775866,
      "loss": 2.8754,
      "step": 152412
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5374507904052734,
      "learning_rate": 0.00015423159693181716,
      "loss": 3.135,
      "step": 152413
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.33764386177063,
      "learning_rate": 0.00015422802172297692,
      "loss": 2.8552,
      "step": 152414
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7350919246673584,
      "learning_rate": 0.00015422444654123826,
      "loss": 2.7061,
      "step": 152415
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.388887643814087,
      "learning_rate": 0.00015422087138660214,
      "loss": 2.8108,
      "step": 152416
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.2638440132141113,
      "learning_rate": 0.00015421729625906898,
      "loss": 2.8998,
      "step": 152417
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.150380849838257,
      "learning_rate": 0.0001542137211586397,
      "loss": 2.7663,
      "step": 152418
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6547744274139404,
      "learning_rate": 0.0001542101460853147,
      "loss": 3.2319,
      "step": 152419
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.378075122833252,
      "learning_rate": 0.00015420657103909505,
      "loss": 2.9256,
      "step": 152420
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4409470558166504,
      "learning_rate": 0.0001542029960199809,
      "loss": 2.7094,
      "step": 152421
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.028857707977295,
      "learning_rate": 0.00015419942102797327,
      "loss": 2.8344,
      "step": 152422
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1793527603149414,
      "learning_rate": 0.00015419584606307264,
      "loss": 2.8536,
      "step": 152423
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6910881996154785,
      "learning_rate": 0.00015419227112527982,
      "loss": 3.0749,
      "step": 152424
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.565962314605713,
      "learning_rate": 0.00015418869621459534,
      "loss": 3.0482,
      "step": 152425
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.9238855838775635,
      "learning_rate": 0.00015418512133102014,
      "loss": 3.3356,
      "step": 152426
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.45882511138916,
      "learning_rate": 0.00015418154647455447,
      "loss": 3.0722,
      "step": 152427
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3942530155181885,
      "learning_rate": 0.0001541779716451993,
      "loss": 2.9531,
      "step": 152428
    },
    {
      "epoch": 1.98,
      "grad_norm": 4.761782646179199,
      "learning_rate": 0.00015417439684295512,
      "loss": 3.0895,
      "step": 152429
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.7417192459106445,
      "learning_rate": 0.00015417082206782275,
      "loss": 2.8724,
      "step": 152430
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.2373509407043457,
      "learning_rate": 0.00015416724731980263,
      "loss": 2.9183,
      "step": 152431
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.708176612854004,
      "learning_rate": 0.00015416367259889587,
      "loss": 2.902,
      "step": 152432
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.155449151992798,
      "learning_rate": 0.00015416009790510256,
      "loss": 2.8771,
      "step": 152433
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.3741672039031982,
      "learning_rate": 0.00015415652323842375,
      "loss": 3.0186,
      "step": 152434
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.332267999649048,
      "learning_rate": 0.00015415294859885993,
      "loss": 3.1644,
      "step": 152435
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.321384906768799,
      "learning_rate": 0.0001541493739864119,
      "loss": 3.0479,
      "step": 152436
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.42777419090271,
      "learning_rate": 0.00015414579940108017,
      "loss": 2.8081,
      "step": 152437
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.5450797080993652,
      "learning_rate": 0.00015414222484286556,
      "loss": 2.9729,
      "step": 152438
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.6209189891815186,
      "learning_rate": 0.0001541386503117687,
      "loss": 2.9534,
      "step": 152439
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.3609654903411865,
      "learning_rate": 0.0001541350758077902,
      "loss": 2.7853,
      "step": 152440
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.8083231449127197,
      "learning_rate": 0.00015413150133093063,
      "loss": 2.7739,
      "step": 152441
    },
    {
      "epoch": 1.98,
      "grad_norm": 1.984551191329956,
      "learning_rate": 0.0001541279268811909,
      "loss": 3.154,
      "step": 152442
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.855583906173706,
      "learning_rate": 0.00015412435245857142,
      "loss": 3.0028,
      "step": 152443
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.0519394874572754,
      "learning_rate": 0.0001541207780630731,
      "loss": 2.8732,
      "step": 152444
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.64961838722229,
      "learning_rate": 0.0001541172036946965,
      "loss": 3.0014,
      "step": 152445
    },
    {
      "epoch": 1.98,
      "grad_norm": 3.172400712966919,
      "learning_rate": 0.0001541136293534422,
      "loss": 3.0521,
      "step": 152446
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.1708881855010986,
      "learning_rate": 0.00015411005503931084,
      "loss": 3.2582,
      "step": 152447
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.4278366565704346,
      "learning_rate": 0.00015410648075230333,
      "loss": 2.6866,
      "step": 152448
    },
    {
      "epoch": 1.98,
      "grad_norm": 2.269296407699585,
      "learning_rate": 0.00015410290649242,
      "loss": 3.2474,
      "step": 152449
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2100119590759277,
      "learning_rate": 0.00015409933225966188,
      "loss": 2.9085,
      "step": 152450
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.544342041015625,
      "learning_rate": 0.0001540957580540294,
      "loss": 3.0325,
      "step": 152451
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8281116485595703,
      "learning_rate": 0.00015409218387552317,
      "loss": 3.0071,
      "step": 152452
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2950799465179443,
      "learning_rate": 0.0001540886097241441,
      "loss": 3.3067,
      "step": 152453
    },
    {
      "epoch": 1.99,
      "grad_norm": 1.9609392881393433,
      "learning_rate": 0.00015408503559989268,
      "loss": 2.9948,
      "step": 152454
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.2714741230010986,
      "learning_rate": 0.0001540814615027695,
      "loss": 2.9274,
      "step": 152455
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1732959747314453,
      "learning_rate": 0.00015407788743277543,
      "loss": 3.0605,
      "step": 152456
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.468066692352295,
      "learning_rate": 0.00015407431338991103,
      "loss": 2.8777,
      "step": 152457
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.544431686401367,
      "learning_rate": 0.0001540707393741769,
      "loss": 2.8348,
      "step": 152458
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.095344305038452,
      "learning_rate": 0.0001540671653855738,
      "loss": 2.9409,
      "step": 152459
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5070290565490723,
      "learning_rate": 0.0001540635914241023,
      "loss": 2.8534,
      "step": 152460
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1882829666137695,
      "learning_rate": 0.0001540600174897633,
      "loss": 3.2119,
      "step": 152461
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1355159282684326,
      "learning_rate": 0.00015405644358255726,
      "loss": 3.1179,
      "step": 152462
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1385345458984375,
      "learning_rate": 0.00015405286970248487,
      "loss": 2.8384,
      "step": 152463
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.115792751312256,
      "learning_rate": 0.00015404929584954673,
      "loss": 2.8455,
      "step": 152464
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.7504992485046387,
      "learning_rate": 0.00015404572202374364,
      "loss": 3.026,
      "step": 152465
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9965572357177734,
      "learning_rate": 0.00015404214822507613,
      "loss": 3.1136,
      "step": 152466
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.037526845932007,
      "learning_rate": 0.00015403857445354507,
      "loss": 3.0749,
      "step": 152467
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.620896816253662,
      "learning_rate": 0.00015403500070915096,
      "loss": 2.901,
      "step": 152468
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4303526878356934,
      "learning_rate": 0.00015403142699189448,
      "loss": 2.8455,
      "step": 152469
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.3376505374908447,
      "learning_rate": 0.00015402785330177624,
      "loss": 2.9061,
      "step": 152470
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.0940723419189453,
      "learning_rate": 0.0001540242796387971,
      "loss": 3.0428,
      "step": 152471
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3394618034362793,
      "learning_rate": 0.00015402070600295745,
      "loss": 3.025,
      "step": 152472
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.529971599578857,
      "learning_rate": 0.0001540171323942582,
      "loss": 3.0308,
      "step": 152473
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.9531497955322266,
      "learning_rate": 0.00015401355881269988,
      "loss": 2.8495,
      "step": 152474
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.734002113342285,
      "learning_rate": 0.00015400998525828336,
      "loss": 3.1119,
      "step": 152475
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6071698665618896,
      "learning_rate": 0.00015400641173100895,
      "loss": 2.9179,
      "step": 152476
    },
    {
      "epoch": 1.99,
      "grad_norm": 1.977965235710144,
      "learning_rate": 0.0001540028382308776,
      "loss": 3.0178,
      "step": 152477
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.0022432804107666,
      "learning_rate": 0.00015399926475788975,
      "loss": 2.9105,
      "step": 152478
    },
    {
      "epoch": 1.99,
      "grad_norm": 5.73423433303833,
      "learning_rate": 0.00015399569131204632,
      "loss": 2.7367,
      "step": 152479
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.0500054359436035,
      "learning_rate": 0.00015399211789334777,
      "loss": 2.7001,
      "step": 152480
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.370716094970703,
      "learning_rate": 0.00015398854450179503,
      "loss": 2.7097,
      "step": 152481
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2015867233276367,
      "learning_rate": 0.00015398497113738836,
      "loss": 3.022,
      "step": 152482
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.776512384414673,
      "learning_rate": 0.00015398139780012872,
      "loss": 3.2051,
      "step": 152483
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.3729703426361084,
      "learning_rate": 0.00015397782449001662,
      "loss": 2.75,
      "step": 152484
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6462292671203613,
      "learning_rate": 0.00015397425120705293,
      "loss": 3.2402,
      "step": 152485
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5912370681762695,
      "learning_rate": 0.00015397067795123803,
      "loss": 3.036,
      "step": 152486
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.7633514404296875,
      "learning_rate": 0.00015396710472257298,
      "loss": 2.8483,
      "step": 152487
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.442662239074707,
      "learning_rate": 0.00015396353152105798,
      "loss": 2.8231,
      "step": 152488
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2361481189727783,
      "learning_rate": 0.000153959958346694,
      "loss": 2.9566,
      "step": 152489
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.383902072906494,
      "learning_rate": 0.00015395638519948154,
      "loss": 2.9536,
      "step": 152490
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.083702564239502,
      "learning_rate": 0.00015395281207942143,
      "loss": 2.8302,
      "step": 152491
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.125443458557129,
      "learning_rate": 0.00015394923898651416,
      "loss": 2.9621,
      "step": 152492
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2678728103637695,
      "learning_rate": 0.00015394566592076073,
      "loss": 2.8584,
      "step": 152493
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.083266496658325,
      "learning_rate": 0.00015394209288216126,
      "loss": 2.9397,
      "step": 152494
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4383950233459473,
      "learning_rate": 0.00015393851987071687,
      "loss": 3.0715,
      "step": 152495
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5877957344055176,
      "learning_rate": 0.00015393494688642798,
      "loss": 2.9202,
      "step": 152496
    },
    {
      "epoch": 1.99,
      "grad_norm": 1.8327877521514893,
      "learning_rate": 0.00015393137392929543,
      "loss": 2.789,
      "step": 152497
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.326585531234741,
      "learning_rate": 0.00015392780099931968,
      "loss": 2.7943,
      "step": 152498
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1805357933044434,
      "learning_rate": 0.00015392422809650173,
      "loss": 2.7502,
      "step": 152499
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4071292877197266,
      "learning_rate": 0.00015392065522084177,
      "loss": 3.1169,
      "step": 152500
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6264469623565674,
      "learning_rate": 0.00015391708237234088,
      "loss": 2.7853,
      "step": 152501
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.9075021743774414,
      "learning_rate": 0.00015391350955099945,
      "loss": 2.8824,
      "step": 152502
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2505364418029785,
      "learning_rate": 0.00015390993675681833,
      "loss": 2.9614,
      "step": 152503
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.34455943107605,
      "learning_rate": 0.000153906363989798,
      "loss": 2.9565,
      "step": 152504
    },
    {
      "epoch": 1.99,
      "grad_norm": 1.9976023435592651,
      "learning_rate": 0.00015390279124993946,
      "loss": 3.1392,
      "step": 152505
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.320251226425171,
      "learning_rate": 0.00015389921853724295,
      "loss": 2.8678,
      "step": 152506
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.033372640609741,
      "learning_rate": 0.00015389564585170944,
      "loss": 2.9114,
      "step": 152507
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9633898735046387,
      "learning_rate": 0.00015389207319333937,
      "loss": 2.6951,
      "step": 152508
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.7389369010925293,
      "learning_rate": 0.00015388850056213363,
      "loss": 2.9791,
      "step": 152509
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.7640552520751953,
      "learning_rate": 0.00015388492795809268,
      "loss": 2.9635,
      "step": 152510
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.437912940979004,
      "learning_rate": 0.00015388135538121748,
      "loss": 2.8491,
      "step": 152511
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2152624130249023,
      "learning_rate": 0.0001538777828315083,
      "loss": 2.871,
      "step": 152512
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.0873770713806152,
      "learning_rate": 0.0001538742103089661,
      "loss": 2.8345,
      "step": 152513
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.0460922718048096,
      "learning_rate": 0.0001538706378135913,
      "loss": 2.9917,
      "step": 152514
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.32310152053833,
      "learning_rate": 0.00015386706534538483,
      "loss": 3.045,
      "step": 152515
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.590639114379883,
      "learning_rate": 0.00015386349290434715,
      "loss": 3.0228,
      "step": 152516
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.0554449558258057,
      "learning_rate": 0.0001538599204904792,
      "loss": 3.0125,
      "step": 152517
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.710822820663452,
      "learning_rate": 0.00015385634810378127,
      "loss": 2.978,
      "step": 152518
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.257763147354126,
      "learning_rate": 0.00015385277574425426,
      "loss": 2.863,
      "step": 152519
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1652956008911133,
      "learning_rate": 0.0001538492034118987,
      "loss": 3.1216,
      "step": 152520
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.460176706314087,
      "learning_rate": 0.00015384563110671542,
      "loss": 2.8697,
      "step": 152521
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1710216999053955,
      "learning_rate": 0.00015384205882870488,
      "loss": 3.1835,
      "step": 152522
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2225608825683594,
      "learning_rate": 0.000153838486577868,
      "loss": 2.8724,
      "step": 152523
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.116783618927002,
      "learning_rate": 0.00015383491435420527,
      "loss": 3.0347,
      "step": 152524
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.154104471206665,
      "learning_rate": 0.00015383134215771744,
      "loss": 2.8461,
      "step": 152525
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5777969360351562,
      "learning_rate": 0.00015382776998840498,
      "loss": 2.913,
      "step": 152526
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.960420846939087,
      "learning_rate": 0.0001538241978462688,
      "loss": 2.9025,
      "step": 152527
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1180226802825928,
      "learning_rate": 0.00015382062573130934,
      "loss": 3.0936,
      "step": 152528
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3505661487579346,
      "learning_rate": 0.00015381705364352752,
      "loss": 2.7164,
      "step": 152529
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.01255464553833,
      "learning_rate": 0.00015381348158292386,
      "loss": 3.0592,
      "step": 152530
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.519636869430542,
      "learning_rate": 0.0001538099095494991,
      "loss": 2.7086,
      "step": 152531
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.055851697921753,
      "learning_rate": 0.0001538063375432536,
      "loss": 2.9694,
      "step": 152532
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.283393383026123,
      "learning_rate": 0.00015380276556418846,
      "loss": 3.0308,
      "step": 152533
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.8225908279418945,
      "learning_rate": 0.00015379919361230401,
      "loss": 2.9457,
      "step": 152534
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9125239849090576,
      "learning_rate": 0.00015379562168760114,
      "loss": 2.8621,
      "step": 152535
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.0837738513946533,
      "learning_rate": 0.00015379204979008043,
      "loss": 2.9378,
      "step": 152536
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.828150510787964,
      "learning_rate": 0.00015378847791974246,
      "loss": 3.0309,
      "step": 152537
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.424755811691284,
      "learning_rate": 0.0001537849060765881,
      "loss": 3.0102,
      "step": 152538
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.134171962738037,
      "learning_rate": 0.00015378133426061783,
      "loss": 3.1149,
      "step": 152539
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3034114837646484,
      "learning_rate": 0.0001537777624718323,
      "loss": 2.9116,
      "step": 152540
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.0861399173736572,
      "learning_rate": 0.0001537741907102323,
      "loss": 3.0869,
      "step": 152541
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1058030128479004,
      "learning_rate": 0.0001537706189758185,
      "loss": 2.987,
      "step": 152542
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.760462522506714,
      "learning_rate": 0.00015376704726859132,
      "loss": 2.7995,
      "step": 152543
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.897540330886841,
      "learning_rate": 0.0001537634755885518,
      "loss": 2.8014,
      "step": 152544
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6715469360351562,
      "learning_rate": 0.00015375990393570025,
      "loss": 2.9958,
      "step": 152545
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.165563583374023,
      "learning_rate": 0.00015375633231003762,
      "loss": 2.8442,
      "step": 152546
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.103914260864258,
      "learning_rate": 0.00015375276071156446,
      "loss": 3.2046,
      "step": 152547
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.353088140487671,
      "learning_rate": 0.0001537491891402814,
      "loss": 2.8377,
      "step": 152548
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2762045860290527,
      "learning_rate": 0.000153745617596189,
      "loss": 3.02,
      "step": 152549
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4268407821655273,
      "learning_rate": 0.0001537420460792882,
      "loss": 3.034,
      "step": 152550
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.704873561859131,
      "learning_rate": 0.00015373847458957938,
      "loss": 3.1619,
      "step": 152551
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1904594898223877,
      "learning_rate": 0.00015373490312706347,
      "loss": 2.8693,
      "step": 152552
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9900269508361816,
      "learning_rate": 0.000153731331691741,
      "loss": 2.9618,
      "step": 152553
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4468929767608643,
      "learning_rate": 0.0001537277602836126,
      "loss": 3.1069,
      "step": 152554
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.724994659423828,
      "learning_rate": 0.0001537241889026789,
      "loss": 2.8807,
      "step": 152555
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9259109497070312,
      "learning_rate": 0.00015372061754894073,
      "loss": 2.8554,
      "step": 152556
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.923896074295044,
      "learning_rate": 0.00015371704622239854,
      "loss": 2.861,
      "step": 152557
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5295867919921875,
      "learning_rate": 0.0001537134749230532,
      "loss": 2.8852,
      "step": 152558
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3984689712524414,
      "learning_rate": 0.00015370990365090525,
      "loss": 2.8786,
      "step": 152559
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8306264877319336,
      "learning_rate": 0.00015370633240595558,
      "loss": 3.1027,
      "step": 152560
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.175027370452881,
      "learning_rate": 0.0001537027611882044,
      "loss": 3.1549,
      "step": 152561
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.483713150024414,
      "learning_rate": 0.0001536991899976528,
      "loss": 3.1254,
      "step": 152562
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.23960018157959,
      "learning_rate": 0.00015369561883430116,
      "loss": 2.9563,
      "step": 152563
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.583122968673706,
      "learning_rate": 0.00015369204769815037,
      "loss": 2.7668,
      "step": 152564
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.914085865020752,
      "learning_rate": 0.00015368847658920088,
      "loss": 2.8046,
      "step": 152565
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.322066307067871,
      "learning_rate": 0.0001536849055074537,
      "loss": 2.8653,
      "step": 152566
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.053805112838745,
      "learning_rate": 0.000153681334452909,
      "loss": 2.8125,
      "step": 152567
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.199671983718872,
      "learning_rate": 0.0001536777634255678,
      "loss": 3.0471,
      "step": 152568
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.09328293800354,
      "learning_rate": 0.0001536741924254306,
      "loss": 3.0,
      "step": 152569
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1744890213012695,
      "learning_rate": 0.00015367062145249825,
      "loss": 2.7766,
      "step": 152570
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4120752811431885,
      "learning_rate": 0.00015366705050677112,
      "loss": 2.776,
      "step": 152571
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2424802780151367,
      "learning_rate": 0.00015366347958825036,
      "loss": 2.9836,
      "step": 152572
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3111801147460938,
      "learning_rate": 0.00015365990869693603,
      "loss": 2.8661,
      "step": 152573
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.282223701477051,
      "learning_rate": 0.0001536563378328292,
      "loss": 2.9963,
      "step": 152574
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8785018920898438,
      "learning_rate": 0.00015365276699593036,
      "loss": 2.9965,
      "step": 152575
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.879603862762451,
      "learning_rate": 0.0001536491961862403,
      "loss": 3.1984,
      "step": 152576
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1964287757873535,
      "learning_rate": 0.00015364562540375954,
      "loss": 2.9291,
      "step": 152577
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.19574236869812,
      "learning_rate": 0.000153642054648489,
      "loss": 2.7615,
      "step": 152578
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6937077045440674,
      "learning_rate": 0.00015363848392042898,
      "loss": 3.11,
      "step": 152579
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.305915117263794,
      "learning_rate": 0.00015363491321958037,
      "loss": 2.9122,
      "step": 152580
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.438904047012329,
      "learning_rate": 0.00015363134254594372,
      "loss": 3.165,
      "step": 152581
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.216581106185913,
      "learning_rate": 0.00015362777189951993,
      "loss": 3.1248,
      "step": 152582
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5585198402404785,
      "learning_rate": 0.00015362420128030933,
      "loss": 3.0475,
      "step": 152583
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.328474760055542,
      "learning_rate": 0.00015362063068831303,
      "loss": 3.0793,
      "step": 152584
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2317922115325928,
      "learning_rate": 0.00015361706012353112,
      "loss": 3.0935,
      "step": 152585
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1265103816986084,
      "learning_rate": 0.0001536134895859647,
      "loss": 2.8368,
      "step": 152586
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3524038791656494,
      "learning_rate": 0.00015360991907561422,
      "loss": 3.191,
      "step": 152587
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.936321496963501,
      "learning_rate": 0.00015360634859248052,
      "loss": 3.0058,
      "step": 152588
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.29793381690979,
      "learning_rate": 0.00015360277813656405,
      "loss": 3.138,
      "step": 152589
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4199066162109375,
      "learning_rate": 0.00015359920770786578,
      "loss": 2.8503,
      "step": 152590
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.7522244453430176,
      "learning_rate": 0.000153595637306386,
      "loss": 3.0652,
      "step": 152591
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.629302740097046,
      "learning_rate": 0.00015359206693212564,
      "loss": 2.9786,
      "step": 152592
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.423846483230591,
      "learning_rate": 0.00015358849658508517,
      "loss": 3.1137,
      "step": 152593
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9544782638549805,
      "learning_rate": 0.00015358492626526548,
      "loss": 3.2597,
      "step": 152594
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.8310487270355225,
      "learning_rate": 0.00015358135597266697,
      "loss": 2.9511,
      "step": 152595
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.1347851753234863,
      "learning_rate": 0.00015357778570729074,
      "loss": 2.8978,
      "step": 152596
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.991142749786377,
      "learning_rate": 0.00015357421546913688,
      "loss": 3.225,
      "step": 152597
    },
    {
      "epoch": 1.99,
      "grad_norm": 5.196023941040039,
      "learning_rate": 0.00015357064525820647,
      "loss": 3.0153,
      "step": 152598
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.8249216079711914,
      "learning_rate": 0.00015356707507449994,
      "loss": 2.9693,
      "step": 152599
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2681877613067627,
      "learning_rate": 0.00015356350491801816,
      "loss": 2.8305,
      "step": 152600
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6827189922332764,
      "learning_rate": 0.00015355993478876159,
      "loss": 2.9266,
      "step": 152601
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.692157745361328,
      "learning_rate": 0.00015355636468673104,
      "loss": 3.0977,
      "step": 152602
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.0613245964050293,
      "learning_rate": 0.0001535527946119272,
      "loss": 2.79,
      "step": 152603
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.3033082485198975,
      "learning_rate": 0.00015354922456435063,
      "loss": 2.9851,
      "step": 152604
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.731980323791504,
      "learning_rate": 0.0001535456545440019,
      "loss": 3.2405,
      "step": 152605
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.42956280708313,
      "learning_rate": 0.00015354208455088195,
      "loss": 2.6096,
      "step": 152606
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.0156664848327637,
      "learning_rate": 0.00015353851458499115,
      "loss": 3.0263,
      "step": 152607
    },
    {
      "epoch": 1.99,
      "grad_norm": 5.748304843902588,
      "learning_rate": 0.0001535349446463304,
      "loss": 2.8977,
      "step": 152608
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.0726218223571777,
      "learning_rate": 0.00015353137473490033,
      "loss": 3.1025,
      "step": 152609
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.594287633895874,
      "learning_rate": 0.00015352780485070148,
      "loss": 2.8703,
      "step": 152610
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5968029499053955,
      "learning_rate": 0.0001535242349937345,
      "loss": 2.9178,
      "step": 152611
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.0416767597198486,
      "learning_rate": 0.00015352066516400022,
      "loss": 2.9498,
      "step": 152612
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.8475475311279297,
      "learning_rate": 0.00015351709536149912,
      "loss": 2.8594,
      "step": 152613
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.5608110427856445,
      "learning_rate": 0.000153513525586232,
      "loss": 3.0636,
      "step": 152614
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.534892559051514,
      "learning_rate": 0.00015350995583819958,
      "loss": 3.1945,
      "step": 152615
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.7731423377990723,
      "learning_rate": 0.00015350638611740238,
      "loss": 3.0349,
      "step": 152616
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.436150550842285,
      "learning_rate": 0.00015350281642384102,
      "loss": 3.0037,
      "step": 152617
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4994730949401855,
      "learning_rate": 0.00015349924675751636,
      "loss": 3.0993,
      "step": 152618
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.5516488552093506,
      "learning_rate": 0.00015349567711842878,
      "loss": 3.1605,
      "step": 152619
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.0102696418762207,
      "learning_rate": 0.0001534921075065793,
      "loss": 2.822,
      "step": 152620
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.3888111114501953,
      "learning_rate": 0.00015348853792196835,
      "loss": 2.9532,
      "step": 152621
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.038637638092041,
      "learning_rate": 0.0001534849683645966,
      "loss": 2.8855,
      "step": 152622
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3268065452575684,
      "learning_rate": 0.00015348139883446482,
      "loss": 2.929,
      "step": 152623
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.6467859745025635,
      "learning_rate": 0.00015347782933157367,
      "loss": 3.0437,
      "step": 152624
    },
    {
      "epoch": 1.99,
      "grad_norm": 5.00736141204834,
      "learning_rate": 0.00015347425985592358,
      "loss": 2.968,
      "step": 152625
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.395448207855225,
      "learning_rate": 0.00015347069040751553,
      "loss": 2.9783,
      "step": 152626
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4345650672912598,
      "learning_rate": 0.00015346712098635005,
      "loss": 2.9759,
      "step": 152627
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.576014280319214,
      "learning_rate": 0.0001534635515924277,
      "loss": 2.9876,
      "step": 152628
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4112579822540283,
      "learning_rate": 0.0001534599822257493,
      "loss": 2.9186,
      "step": 152629
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.741060495376587,
      "learning_rate": 0.00015345641288631553,
      "loss": 3.2004,
      "step": 152630
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.846979856491089,
      "learning_rate": 0.00015345284357412678,
      "loss": 2.9071,
      "step": 152631
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.907440185546875,
      "learning_rate": 0.00015344927428918409,
      "loss": 3.1581,
      "step": 152632
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.0031135082244873,
      "learning_rate": 0.0001534457050314879,
      "loss": 2.8226,
      "step": 152633
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.05678391456604,
      "learning_rate": 0.00015344213580103884,
      "loss": 3.0921,
      "step": 152634
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.215463161468506,
      "learning_rate": 0.00015343856659783778,
      "loss": 2.9893,
      "step": 152635
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.24631404876709,
      "learning_rate": 0.0001534349974218851,
      "loss": 2.8877,
      "step": 152636
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.558638095855713,
      "learning_rate": 0.0001534314282731818,
      "loss": 2.74,
      "step": 152637
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.201990842819214,
      "learning_rate": 0.00015342785915172828,
      "loss": 2.684,
      "step": 152638
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.002997875213623,
      "learning_rate": 0.00015342429005752538,
      "loss": 2.9985,
      "step": 152639
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.025223731994629,
      "learning_rate": 0.00015342072099057346,
      "loss": 2.9484,
      "step": 152640
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.125743865966797,
      "learning_rate": 0.00015341715195087358,
      "loss": 3.1976,
      "step": 152641
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1332931518554688,
      "learning_rate": 0.0001534135829384261,
      "loss": 2.7676,
      "step": 152642
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.01965069770813,
      "learning_rate": 0.0001534100139532319,
      "loss": 2.8112,
      "step": 152643
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.0226893424987793,
      "learning_rate": 0.0001534064449952914,
      "loss": 2.8337,
      "step": 152644
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.153172254562378,
      "learning_rate": 0.00015340287606460567,
      "loss": 3.0305,
      "step": 152645
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.728806972503662,
      "learning_rate": 0.0001533993071611749,
      "loss": 2.9658,
      "step": 152646
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.26039457321167,
      "learning_rate": 0.00015339573828500005,
      "loss": 2.9581,
      "step": 152647
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.5455522537231445,
      "learning_rate": 0.0001533921694360816,
      "loss": 3.0011,
      "step": 152648
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2341930866241455,
      "learning_rate": 0.0001533886006144204,
      "loss": 3.0667,
      "step": 152649
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3955187797546387,
      "learning_rate": 0.00015338503182001694,
      "loss": 2.7967,
      "step": 152650
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4367427825927734,
      "learning_rate": 0.0001533814630528722,
      "loss": 2.9437,
      "step": 152651
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5145161151885986,
      "learning_rate": 0.00015337789431298638,
      "loss": 2.8323,
      "step": 152652
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9120094776153564,
      "learning_rate": 0.00015337432560036052,
      "loss": 2.8807,
      "step": 152653
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1167500019073486,
      "learning_rate": 0.00015337075691499496,
      "loss": 2.8151,
      "step": 152654
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5728304386138916,
      "learning_rate": 0.00015336718825689072,
      "loss": 3.1128,
      "step": 152655
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.643860340118408,
      "learning_rate": 0.00015336361962604815,
      "loss": 2.6585,
      "step": 152656
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.502427339553833,
      "learning_rate": 0.00015336005102246827,
      "loss": 2.908,
      "step": 152657
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5550875663757324,
      "learning_rate": 0.00015335648244615128,
      "loss": 2.915,
      "step": 152658
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8345582485198975,
      "learning_rate": 0.00015335291389709823,
      "loss": 3.0151,
      "step": 152659
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6119182109832764,
      "learning_rate": 0.00015334934537530957,
      "loss": 2.7974,
      "step": 152660
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8267486095428467,
      "learning_rate": 0.0001533457768807861,
      "loss": 2.8569,
      "step": 152661
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6791841983795166,
      "learning_rate": 0.00015334220841352827,
      "loss": 2.5941,
      "step": 152662
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1433067321777344,
      "learning_rate": 0.00015333863997353718,
      "loss": 3.1873,
      "step": 152663
    },
    {
      "epoch": 1.99,
      "grad_norm": 1.8819698095321655,
      "learning_rate": 0.00015333507156081293,
      "loss": 2.9784,
      "step": 152664
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9751360416412354,
      "learning_rate": 0.00015333150317535662,
      "loss": 2.9703,
      "step": 152665
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9843220710754395,
      "learning_rate": 0.0001533279348171686,
      "loss": 2.9952,
      "step": 152666
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1027843952178955,
      "learning_rate": 0.00015332436648624983,
      "loss": 2.8996,
      "step": 152667
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.0224084854125977,
      "learning_rate": 0.0001533207981826007,
      "loss": 2.7466,
      "step": 152668
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.693178415298462,
      "learning_rate": 0.00015331722990622213,
      "loss": 2.8664,
      "step": 152669
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.522883415222168,
      "learning_rate": 0.0001533136616571146,
      "loss": 2.9584,
      "step": 152670
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.550539255142212,
      "learning_rate": 0.0001533100934352789,
      "loss": 3.1335,
      "step": 152671
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5186634063720703,
      "learning_rate": 0.00015330652524071548,
      "loss": 2.9844,
      "step": 152672
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.666149139404297,
      "learning_rate": 0.00015330295707342528,
      "loss": 3.0045,
      "step": 152673
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1959753036499023,
      "learning_rate": 0.0001532993889334087,
      "loss": 2.8202,
      "step": 152674
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2061705589294434,
      "learning_rate": 0.00015329582082066663,
      "loss": 3.1921,
      "step": 152675
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4435625076293945,
      "learning_rate": 0.00015329225273519963,
      "loss": 2.9862,
      "step": 152676
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9509527683258057,
      "learning_rate": 0.0001532886846770084,
      "loss": 2.9131,
      "step": 152677
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.386309862136841,
      "learning_rate": 0.00015328511664609344,
      "loss": 2.9963,
      "step": 152678
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3747713565826416,
      "learning_rate": 0.00015328154864245567,
      "loss": 2.9279,
      "step": 152679
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.179981231689453,
      "learning_rate": 0.00015327798066609553,
      "loss": 2.9395,
      "step": 152680
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5721659660339355,
      "learning_rate": 0.0001532744127170139,
      "loss": 2.9399,
      "step": 152681
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.795018196105957,
      "learning_rate": 0.00015327084479521127,
      "loss": 2.8771,
      "step": 152682
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.1502270698547363,
      "learning_rate": 0.0001532672769006884,
      "loss": 2.7849,
      "step": 152683
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.087951898574829,
      "learning_rate": 0.0001532637090334458,
      "loss": 3.0981,
      "step": 152684
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.387051105499268,
      "learning_rate": 0.00015326014119348432,
      "loss": 2.8656,
      "step": 152685
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.20062255859375,
      "learning_rate": 0.0001532565733808045,
      "loss": 3.2398,
      "step": 152686
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.2740964889526367,
      "learning_rate": 0.00015325300559540712,
      "loss": 2.9666,
      "step": 152687
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.175353527069092,
      "learning_rate": 0.0001532494378372928,
      "loss": 3.0283,
      "step": 152688
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.257075786590576,
      "learning_rate": 0.0001532458701064622,
      "loss": 3.108,
      "step": 152689
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.53578782081604,
      "learning_rate": 0.00015324230240291583,
      "loss": 2.9118,
      "step": 152690
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2748255729675293,
      "learning_rate": 0.00015323873472665456,
      "loss": 3.0191,
      "step": 152691
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.7193517684936523,
      "learning_rate": 0.0001532351670776789,
      "loss": 2.8629,
      "step": 152692
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.818932056427002,
      "learning_rate": 0.0001532315994559897,
      "loss": 2.9186,
      "step": 152693
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8440818786621094,
      "learning_rate": 0.00015322803186158752,
      "loss": 2.8169,
      "step": 152694
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.5137548446655273,
      "learning_rate": 0.00015322446429447305,
      "loss": 3.1694,
      "step": 152695
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.4055097103118896,
      "learning_rate": 0.00015322089675464678,
      "loss": 2.9548,
      "step": 152696
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.354870080947876,
      "learning_rate": 0.00015321732924210963,
      "loss": 3.178,
      "step": 152697
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.501255512237549,
      "learning_rate": 0.00015321376175686204,
      "loss": 3.0497,
      "step": 152698
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1896495819091797,
      "learning_rate": 0.00015321019429890485,
      "loss": 2.9755,
      "step": 152699
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2072391510009766,
      "learning_rate": 0.00015320662686823874,
      "loss": 3.202,
      "step": 152700
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.632293939590454,
      "learning_rate": 0.00015320305946486422,
      "loss": 3.1135,
      "step": 152701
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.54659104347229,
      "learning_rate": 0.00015319949208878196,
      "loss": 2.854,
      "step": 152702
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1656813621520996,
      "learning_rate": 0.0001531959247399928,
      "loss": 2.8951,
      "step": 152703
    },
    {
      "epoch": 1.99,
      "grad_norm": 1.9188408851623535,
      "learning_rate": 0.0001531923574184971,
      "loss": 2.9454,
      "step": 152704
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.924962282180786,
      "learning_rate": 0.0001531887901242959,
      "loss": 2.9629,
      "step": 152705
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.64967942237854,
      "learning_rate": 0.00015318522285738965,
      "loss": 2.7647,
      "step": 152706
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.29268741607666,
      "learning_rate": 0.0001531816556177789,
      "loss": 3.066,
      "step": 152707
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3400800228118896,
      "learning_rate": 0.0001531780884054646,
      "loss": 2.6693,
      "step": 152708
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5069775581359863,
      "learning_rate": 0.00015317452122044726,
      "loss": 2.9413,
      "step": 152709
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.215141534805298,
      "learning_rate": 0.00015317095406272742,
      "loss": 2.8121,
      "step": 152710
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.108323812484741,
      "learning_rate": 0.00015316738693230603,
      "loss": 2.9826,
      "step": 152711
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.566260814666748,
      "learning_rate": 0.00015316381982918354,
      "loss": 2.9425,
      "step": 152712
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2959537506103516,
      "learning_rate": 0.00015316025275336054,
      "loss": 2.7977,
      "step": 152713
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.7521607875823975,
      "learning_rate": 0.00015315668570483798,
      "loss": 3.1148,
      "step": 152714
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.524662971496582,
      "learning_rate": 0.00015315311868361635,
      "loss": 2.8831,
      "step": 152715
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4555299282073975,
      "learning_rate": 0.00015314955168969623,
      "loss": 2.8423,
      "step": 152716
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1428933143615723,
      "learning_rate": 0.00015314598472307848,
      "loss": 2.8811,
      "step": 152717
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.3386690616607666,
      "learning_rate": 0.00015314241778376363,
      "loss": 3.0377,
      "step": 152718
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.724414587020874,
      "learning_rate": 0.00015313885087175232,
      "loss": 2.8657,
      "step": 152719
    },
    {
      "epoch": 1.99,
      "grad_norm": 1.895466685295105,
      "learning_rate": 0.00015313528398704535,
      "loss": 3.1237,
      "step": 152720
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.6845555305480957,
      "learning_rate": 0.0001531317171296432,
      "loss": 2.9576,
      "step": 152721
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.407763719558716,
      "learning_rate": 0.00015312815029954678,
      "loss": 3.1207,
      "step": 152722
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2014307975769043,
      "learning_rate": 0.00015312458349675656,
      "loss": 2.9791,
      "step": 152723
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3775181770324707,
      "learning_rate": 0.00015312101672127328,
      "loss": 2.7183,
      "step": 152724
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.760215997695923,
      "learning_rate": 0.00015311744997309744,
      "loss": 2.9516,
      "step": 152725
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.0146446228027344,
      "learning_rate": 0.00015311388325222995,
      "loss": 3.0368,
      "step": 152726
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.568418025970459,
      "learning_rate": 0.00015311031655867126,
      "loss": 3.1931,
      "step": 152727
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.492805004119873,
      "learning_rate": 0.00015310674989242224,
      "loss": 2.8707,
      "step": 152728
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.786344528198242,
      "learning_rate": 0.0001531031832534835,
      "loss": 2.8855,
      "step": 152729
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8861827850341797,
      "learning_rate": 0.00015309961664185557,
      "loss": 3.0998,
      "step": 152730
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.257979393005371,
      "learning_rate": 0.00015309605005753914,
      "loss": 3.1393,
      "step": 152731
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.766542911529541,
      "learning_rate": 0.000153092483500535,
      "loss": 2.9032,
      "step": 152732
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.7271342277526855,
      "learning_rate": 0.0001530889169708437,
      "loss": 2.8739,
      "step": 152733
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3158133029937744,
      "learning_rate": 0.000153085350468466,
      "loss": 2.8324,
      "step": 152734
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5931382179260254,
      "learning_rate": 0.00015308178399340238,
      "loss": 2.8843,
      "step": 152735
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9965453147888184,
      "learning_rate": 0.00015307821754565376,
      "loss": 2.9365,
      "step": 152736
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.6358485221862793,
      "learning_rate": 0.0001530746511252207,
      "loss": 2.7795,
      "step": 152737
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1228208541870117,
      "learning_rate": 0.00015307108473210378,
      "loss": 2.9141,
      "step": 152738
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.792017936706543,
      "learning_rate": 0.00015306751836630363,
      "loss": 2.8749,
      "step": 152739
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.598655939102173,
      "learning_rate": 0.0001530639520278211,
      "loss": 2.8904,
      "step": 152740
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1996705532073975,
      "learning_rate": 0.00015306038571665667,
      "loss": 3.0188,
      "step": 152741
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.7362513542175293,
      "learning_rate": 0.00015305681943281123,
      "loss": 3.0133,
      "step": 152742
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.8622753620147705,
      "learning_rate": 0.00015305325317628525,
      "loss": 3.0803,
      "step": 152743
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.018559455871582,
      "learning_rate": 0.00015304968694707945,
      "loss": 2.9865,
      "step": 152744
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.652733325958252,
      "learning_rate": 0.0001530461207451944,
      "loss": 2.9911,
      "step": 152745
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2322678565979004,
      "learning_rate": 0.00015304255457063094,
      "loss": 2.8594,
      "step": 152746
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.701821804046631,
      "learning_rate": 0.0001530389884233895,
      "loss": 2.7521,
      "step": 152747
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.852869749069214,
      "learning_rate": 0.00015303542230347107,
      "loss": 2.8198,
      "step": 152748
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.651959180831909,
      "learning_rate": 0.0001530318562108761,
      "loss": 3.1241,
      "step": 152749
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.374915599822998,
      "learning_rate": 0.00015302829014560525,
      "loss": 2.8721,
      "step": 152750
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.7418620586395264,
      "learning_rate": 0.00015302472410765912,
      "loss": 3.1131,
      "step": 152751
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1250152587890625,
      "learning_rate": 0.0001530211580970386,
      "loss": 3.1249,
      "step": 152752
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1280035972595215,
      "learning_rate": 0.0001530175921137441,
      "loss": 2.917,
      "step": 152753
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4455220699310303,
      "learning_rate": 0.00015301402615777653,
      "loss": 2.716,
      "step": 152754
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1598358154296875,
      "learning_rate": 0.00015301046022913638,
      "loss": 2.9116,
      "step": 152755
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2044179439544678,
      "learning_rate": 0.00015300689432782443,
      "loss": 2.948,
      "step": 152756
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.7094879150390625,
      "learning_rate": 0.0001530033284538411,
      "loss": 3.1557,
      "step": 152757
    },
    {
      "epoch": 1.99,
      "grad_norm": 1.9986765384674072,
      "learning_rate": 0.00015299976260718738,
      "loss": 3.0022,
      "step": 152758
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1387171745300293,
      "learning_rate": 0.00015299619678786364,
      "loss": 2.6089,
      "step": 152759
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.300523519515991,
      "learning_rate": 0.0001529926309958708,
      "loss": 3.0791,
      "step": 152760
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.807481050491333,
      "learning_rate": 0.00015298906523120943,
      "loss": 2.8045,
      "step": 152761
    },
    {
      "epoch": 1.99,
      "grad_norm": 1.9985765218734741,
      "learning_rate": 0.0001529854994938801,
      "loss": 2.8672,
      "step": 152762
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.247217893600464,
      "learning_rate": 0.00015298193378388347,
      "loss": 2.9034,
      "step": 152763
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3684637546539307,
      "learning_rate": 0.00015297836810122041,
      "loss": 3.1237,
      "step": 152764
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.300609827041626,
      "learning_rate": 0.00015297480244589129,
      "loss": 2.7332,
      "step": 152765
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3958725929260254,
      "learning_rate": 0.0001529712368178971,
      "loss": 3.1363,
      "step": 152766
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2702579498291016,
      "learning_rate": 0.00015296767121723829,
      "loss": 2.9727,
      "step": 152767
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2283213138580322,
      "learning_rate": 0.00015296410564391558,
      "loss": 2.997,
      "step": 152768
    },
    {
      "epoch": 1.99,
      "grad_norm": 1.8576762676239014,
      "learning_rate": 0.00015296054009792951,
      "loss": 3.078,
      "step": 152769
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4458587169647217,
      "learning_rate": 0.0001529569745792809,
      "loss": 2.9903,
      "step": 152770
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8764989376068115,
      "learning_rate": 0.0001529534090879703,
      "loss": 2.9267,
      "step": 152771
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.401987075805664,
      "learning_rate": 0.00015294984362399858,
      "loss": 2.8753,
      "step": 152772
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4231958389282227,
      "learning_rate": 0.00015294627818736623,
      "loss": 3.1044,
      "step": 152773
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.0944950580596924,
      "learning_rate": 0.00015294271277807396,
      "loss": 2.9015,
      "step": 152774
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2714014053344727,
      "learning_rate": 0.0001529391473961223,
      "loss": 2.9312,
      "step": 152775
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.353029727935791,
      "learning_rate": 0.0001529355820415121,
      "loss": 2.9819,
      "step": 152776
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4517838954925537,
      "learning_rate": 0.00015293201671424386,
      "loss": 2.8875,
      "step": 152777
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1038897037506104,
      "learning_rate": 0.00015292845141431848,
      "loss": 3.1654,
      "step": 152778
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5102386474609375,
      "learning_rate": 0.0001529248861417365,
      "loss": 2.9312,
      "step": 152779
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8169965744018555,
      "learning_rate": 0.00015292132089649848,
      "loss": 2.9839,
      "step": 152780
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.083049774169922,
      "learning_rate": 0.00015291775567860506,
      "loss": 3.0432,
      "step": 152781
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.372711181640625,
      "learning_rate": 0.00015291419048805717,
      "loss": 2.8765,
      "step": 152782
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3576290607452393,
      "learning_rate": 0.00015291062532485515,
      "loss": 2.777,
      "step": 152783
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.260596752166748,
      "learning_rate": 0.00015290706018899997,
      "loss": 2.8236,
      "step": 152784
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.107387065887451,
      "learning_rate": 0.0001529034950804921,
      "loss": 3.0977,
      "step": 152785
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.310549259185791,
      "learning_rate": 0.00015289992999933228,
      "loss": 2.8889,
      "step": 152786
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.267667055130005,
      "learning_rate": 0.000152896364945521,
      "loss": 3.1421,
      "step": 152787
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6086812019348145,
      "learning_rate": 0.0001528927999190592,
      "loss": 2.9561,
      "step": 152788
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1734399795532227,
      "learning_rate": 0.00015288923491994728,
      "loss": 2.9602,
      "step": 152789
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6390366554260254,
      "learning_rate": 0.00015288566994818616,
      "loss": 2.9522,
      "step": 152790
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.602205753326416,
      "learning_rate": 0.00015288210500377636,
      "loss": 2.8698,
      "step": 152791
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.047260046005249,
      "learning_rate": 0.00015287854008671855,
      "loss": 3.3267,
      "step": 152792
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.362600088119507,
      "learning_rate": 0.0001528749751970133,
      "loss": 3.0012,
      "step": 152793
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2010796070098877,
      "learning_rate": 0.00015287141033466147,
      "loss": 2.9719,
      "step": 152794
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.214237689971924,
      "learning_rate": 0.0001528678454996635,
      "loss": 2.8779,
      "step": 152795
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.817081928253174,
      "learning_rate": 0.00015286428069202028,
      "loss": 3.0722,
      "step": 152796
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.210820436477661,
      "learning_rate": 0.0001528607159117324,
      "loss": 2.842,
      "step": 152797
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6328461170196533,
      "learning_rate": 0.00015285715115880035,
      "loss": 3.1351,
      "step": 152798
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.1488163471221924,
      "learning_rate": 0.00015285358643322508,
      "loss": 3.0273,
      "step": 152799
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.2179741859436035,
      "learning_rate": 0.00015285002173500708,
      "loss": 2.9327,
      "step": 152800
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.323805093765259,
      "learning_rate": 0.0001528464570641469,
      "loss": 3.06,
      "step": 152801
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.400120973587036,
      "learning_rate": 0.0001528428924206455,
      "loss": 3.0497,
      "step": 152802
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.6547436714172363,
      "learning_rate": 0.00015283932780450323,
      "loss": 3.1519,
      "step": 152803
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6419949531555176,
      "learning_rate": 0.00015283576321572107,
      "loss": 2.9623,
      "step": 152804
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.185063123703003,
      "learning_rate": 0.0001528321986542995,
      "loss": 2.9402,
      "step": 152805
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5637640953063965,
      "learning_rate": 0.00015282863412023905,
      "loss": 2.9718,
      "step": 152806
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8124539852142334,
      "learning_rate": 0.0001528250696135407,
      "loss": 2.8601,
      "step": 152807
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.1378116607666016,
      "learning_rate": 0.00015282150513420495,
      "loss": 3.009,
      "step": 152808
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2757630348205566,
      "learning_rate": 0.0001528179406822323,
      "loss": 3.2095,
      "step": 152809
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4735288619995117,
      "learning_rate": 0.0001528143762576237,
      "loss": 3.1637,
      "step": 152810
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.625256299972534,
      "learning_rate": 0.00015281081186037973,
      "loss": 3.127,
      "step": 152811
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.311774253845215,
      "learning_rate": 0.00015280724749050086,
      "loss": 2.9096,
      "step": 152812
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.155522346496582,
      "learning_rate": 0.00015280368314798802,
      "loss": 3.0796,
      "step": 152813
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2413065433502197,
      "learning_rate": 0.0001528001188328417,
      "loss": 2.9892,
      "step": 152814
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6063427925109863,
      "learning_rate": 0.00015279655454506259,
      "loss": 2.8027,
      "step": 152815
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.327343463897705,
      "learning_rate": 0.00015279299028465144,
      "loss": 2.9105,
      "step": 152816
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.028165817260742,
      "learning_rate": 0.00015278942605160886,
      "loss": 2.9607,
      "step": 152817
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2738609313964844,
      "learning_rate": 0.0001527858618459354,
      "loss": 2.9559,
      "step": 152818
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.144448757171631,
      "learning_rate": 0.00015278229766763196,
      "loss": 3.0992,
      "step": 152819
    },
    {
      "epoch": 1.99,
      "grad_norm": 5.299533367156982,
      "learning_rate": 0.0001527787335166989,
      "loss": 2.7376,
      "step": 152820
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.8354969024658203,
      "learning_rate": 0.00015277516939313723,
      "loss": 3.1623,
      "step": 152821
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3361289501190186,
      "learning_rate": 0.00015277160529694742,
      "loss": 2.693,
      "step": 152822
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4829726219177246,
      "learning_rate": 0.0001527680412281301,
      "loss": 2.9486,
      "step": 152823
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5096940994262695,
      "learning_rate": 0.00015276447718668595,
      "loss": 2.9416,
      "step": 152824
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2128918170928955,
      "learning_rate": 0.0001527609131726157,
      "loss": 2.7781,
      "step": 152825
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.510991334915161,
      "learning_rate": 0.0001527573491859199,
      "loss": 2.9402,
      "step": 152826
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.0731990337371826,
      "learning_rate": 0.00015275378522659937,
      "loss": 2.9599,
      "step": 152827
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.449972152709961,
      "learning_rate": 0.00015275022129465475,
      "loss": 2.9433,
      "step": 152828
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.972203016281128,
      "learning_rate": 0.00015274665739008659,
      "loss": 2.8645,
      "step": 152829
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.21108341217041,
      "learning_rate": 0.00015274309351289553,
      "loss": 2.7867,
      "step": 152830
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5662646293640137,
      "learning_rate": 0.0001527395296630824,
      "loss": 2.8418,
      "step": 152831
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.547635555267334,
      "learning_rate": 0.00015273596584064765,
      "loss": 2.8284,
      "step": 152832
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5259768962860107,
      "learning_rate": 0.00015273240204559217,
      "loss": 2.9804,
      "step": 152833
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3104124069213867,
      "learning_rate": 0.00015272883827791654,
      "loss": 2.6851,
      "step": 152834
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.488487720489502,
      "learning_rate": 0.00015272527453762138,
      "loss": 2.9681,
      "step": 152835
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1572015285491943,
      "learning_rate": 0.00015272171082470729,
      "loss": 3.1442,
      "step": 152836
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.3920388221740723,
      "learning_rate": 0.00015271814713917512,
      "loss": 3.0365,
      "step": 152837
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.198340892791748,
      "learning_rate": 0.00015271458348102527,
      "loss": 2.993,
      "step": 152838
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.649764060974121,
      "learning_rate": 0.00015271101985025873,
      "loss": 3.003,
      "step": 152839
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6211061477661133,
      "learning_rate": 0.00015270745624687595,
      "loss": 2.9707,
      "step": 152840
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2931482791900635,
      "learning_rate": 0.00015270389267087766,
      "loss": 2.9965,
      "step": 152841
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3430540561676025,
      "learning_rate": 0.00015270032912226435,
      "loss": 2.8804,
      "step": 152842
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.682276487350464,
      "learning_rate": 0.00015269676560103696,
      "loss": 2.9371,
      "step": 152843
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.3176991939544678,
      "learning_rate": 0.0001526932021071959,
      "loss": 2.8334,
      "step": 152844
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2440249919891357,
      "learning_rate": 0.0001526896386407421,
      "loss": 2.5422,
      "step": 152845
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.088569402694702,
      "learning_rate": 0.00015268607520167607,
      "loss": 3.0482,
      "step": 152846
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.260235071182251,
      "learning_rate": 0.0001526825117899985,
      "loss": 2.9491,
      "step": 152847
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.314324378967285,
      "learning_rate": 0.00015267894840570985,
      "loss": 3.0959,
      "step": 152848
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3474860191345215,
      "learning_rate": 0.0001526753850488111,
      "loss": 3.2559,
      "step": 152849
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.411395311355591,
      "learning_rate": 0.0001526718217193027,
      "loss": 2.9595,
      "step": 152850
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.385770797729492,
      "learning_rate": 0.00015266825841718547,
      "loss": 2.8089,
      "step": 152851
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.878607749938965,
      "learning_rate": 0.00015266469514245998,
      "loss": 2.9112,
      "step": 152852
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.181466817855835,
      "learning_rate": 0.00015266113189512696,
      "loss": 2.8884,
      "step": 152853
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4612767696380615,
      "learning_rate": 0.00015265756867518685,
      "loss": 2.9373,
      "step": 152854
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3453662395477295,
      "learning_rate": 0.0001526540054826406,
      "loss": 2.9807,
      "step": 152855
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3771305084228516,
      "learning_rate": 0.00015265044231748868,
      "loss": 2.8409,
      "step": 152856
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.459357976913452,
      "learning_rate": 0.00015264687917973188,
      "loss": 3.0896,
      "step": 152857
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4307620525360107,
      "learning_rate": 0.00015264331606937087,
      "loss": 2.8542,
      "step": 152858
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5624284744262695,
      "learning_rate": 0.00015263975298640618,
      "loss": 2.9995,
      "step": 152859
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2238523960113525,
      "learning_rate": 0.00015263618993083847,
      "loss": 2.7232,
      "step": 152860
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.389819383621216,
      "learning_rate": 0.0001526326269026686,
      "loss": 2.9843,
      "step": 152861
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1557281017303467,
      "learning_rate": 0.00015262906390189698,
      "loss": 3.0096,
      "step": 152862
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2418277263641357,
      "learning_rate": 0.00015262550092852445,
      "loss": 3.083,
      "step": 152863
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.967066526412964,
      "learning_rate": 0.00015262193798255174,
      "loss": 2.8782,
      "step": 152864
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4052295684814453,
      "learning_rate": 0.00015261837506397929,
      "loss": 3.0934,
      "step": 152865
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5154480934143066,
      "learning_rate": 0.0001526148121728078,
      "loss": 2.9352,
      "step": 152866
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.2389285564422607,
      "learning_rate": 0.00015261124930903813,
      "loss": 2.8973,
      "step": 152867
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.1844470500946045,
      "learning_rate": 0.00015260768647267064,
      "loss": 2.7285,
      "step": 152868
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.624117851257324,
      "learning_rate": 0.00015260412366370634,
      "loss": 3.1971,
      "step": 152869
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.132427930831909,
      "learning_rate": 0.00015260056088214557,
      "loss": 2.8091,
      "step": 152870
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.0917882919311523,
      "learning_rate": 0.00015259699812798935,
      "loss": 2.8815,
      "step": 152871
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.504774332046509,
      "learning_rate": 0.00015259343540123793,
      "loss": 2.8804,
      "step": 152872
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6142842769622803,
      "learning_rate": 0.00015258987270189224,
      "loss": 2.9895,
      "step": 152873
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.000992774963379,
      "learning_rate": 0.00015258631002995282,
      "loss": 2.7395,
      "step": 152874
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.252216100692749,
      "learning_rate": 0.00015258274738542046,
      "loss": 2.9495,
      "step": 152875
    },
    {
      "epoch": 1.99,
      "grad_norm": 1.9843953847885132,
      "learning_rate": 0.00015257918476829565,
      "loss": 2.9575,
      "step": 152876
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.093886375427246,
      "learning_rate": 0.0001525756221785794,
      "loss": 2.89,
      "step": 152877
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.0478603839874268,
      "learning_rate": 0.0001525720596162719,
      "loss": 3.0289,
      "step": 152878
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3018674850463867,
      "learning_rate": 0.00015256849708137412,
      "loss": 2.8995,
      "step": 152879
    },
    {
      "epoch": 1.99,
      "grad_norm": 1.9910343885421753,
      "learning_rate": 0.00015256493457388654,
      "loss": 2.7501,
      "step": 152880
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.26019024848938,
      "learning_rate": 0.00015256137209381,
      "loss": 2.5814,
      "step": 152881
    },
    {
      "epoch": 1.99,
      "grad_norm": 5.10344934463501,
      "learning_rate": 0.000152557809641145,
      "loss": 2.9044,
      "step": 152882
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.2632055282592773,
      "learning_rate": 0.00015255424721589243,
      "loss": 3.0041,
      "step": 152883
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2922751903533936,
      "learning_rate": 0.00015255068481805276,
      "loss": 3.0414,
      "step": 152884
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9651858806610107,
      "learning_rate": 0.00015254712244762675,
      "loss": 2.8184,
      "step": 152885
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.069248676300049,
      "learning_rate": 0.0001525435601046149,
      "loss": 2.7859,
      "step": 152886
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2300055027008057,
      "learning_rate": 0.00015253999778901803,
      "loss": 2.9976,
      "step": 152887
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9776899814605713,
      "learning_rate": 0.00015253643550083672,
      "loss": 2.9149,
      "step": 152888
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4126675128936768,
      "learning_rate": 0.00015253287324007173,
      "loss": 2.735,
      "step": 152889
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.805931568145752,
      "learning_rate": 0.00015252931100672367,
      "loss": 3.0333,
      "step": 152890
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.7723209857940674,
      "learning_rate": 0.00015252574880079324,
      "loss": 3.1306,
      "step": 152891
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6129322052001953,
      "learning_rate": 0.00015252218662228088,
      "loss": 2.7349,
      "step": 152892
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.0981764793395996,
      "learning_rate": 0.0001525186244711876,
      "loss": 2.8788,
      "step": 152893
    },
    {
      "epoch": 1.99,
      "grad_norm": 1.9712027311325073,
      "learning_rate": 0.00015251506234751375,
      "loss": 2.935,
      "step": 152894
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4261765480041504,
      "learning_rate": 0.00015251150025126026,
      "loss": 2.9086,
      "step": 152895
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.855764627456665,
      "learning_rate": 0.00015250793818242763,
      "loss": 3.1657,
      "step": 152896
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.539611339569092,
      "learning_rate": 0.00015250437614101648,
      "loss": 2.8848,
      "step": 152897
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2800076007843018,
      "learning_rate": 0.00015250081412702767,
      "loss": 2.7684,
      "step": 152898
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.892177104949951,
      "learning_rate": 0.00015249725214046172,
      "loss": 2.8762,
      "step": 152899
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.187469005584717,
      "learning_rate": 0.00015249369018131923,
      "loss": 3.0241,
      "step": 152900
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2862870693206787,
      "learning_rate": 0.00015249012824960101,
      "loss": 3.0269,
      "step": 152901
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5566318035125732,
      "learning_rate": 0.0001524865663453077,
      "loss": 3.1106,
      "step": 152902
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4580934047698975,
      "learning_rate": 0.00015248300446843983,
      "loss": 3.0655,
      "step": 152903
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.417522668838501,
      "learning_rate": 0.00015247944261899822,
      "loss": 2.9251,
      "step": 152904
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.888094425201416,
      "learning_rate": 0.00015247588079698337,
      "loss": 3.062,
      "step": 152905
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9368436336517334,
      "learning_rate": 0.00015247231900239615,
      "loss": 2.7916,
      "step": 152906
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.7178561687469482,
      "learning_rate": 0.00015246875723523708,
      "loss": 3.2428,
      "step": 152907
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8016531467437744,
      "learning_rate": 0.00015246519549550692,
      "loss": 2.8608,
      "step": 152908
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.055372714996338,
      "learning_rate": 0.0001524616337832061,
      "loss": 3.0772,
      "step": 152909
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2300660610198975,
      "learning_rate": 0.0001524580720983356,
      "loss": 3.1643,
      "step": 152910
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.1551995277404785,
      "learning_rate": 0.00015245451044089577,
      "loss": 2.8552,
      "step": 152911
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.348350763320923,
      "learning_rate": 0.00015245094881088757,
      "loss": 2.9431,
      "step": 152912
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4783411026000977,
      "learning_rate": 0.0001524473872083115,
      "loss": 3.1103,
      "step": 152913
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5227959156036377,
      "learning_rate": 0.0001524438256331683,
      "loss": 3.2434,
      "step": 152914
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1919517517089844,
      "learning_rate": 0.00015244026408545841,
      "loss": 2.9234,
      "step": 152915
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.17832088470459,
      "learning_rate": 0.00015243670256518283,
      "loss": 2.9035,
      "step": 152916
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.505312919616699,
      "learning_rate": 0.00015243314107234187,
      "loss": 2.74,
      "step": 152917
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.329235792160034,
      "learning_rate": 0.00015242957960693657,
      "loss": 3.0968,
      "step": 152918
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9223127365112305,
      "learning_rate": 0.00015242601816896733,
      "loss": 3.113,
      "step": 152919
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6352336406707764,
      "learning_rate": 0.00015242245675843488,
      "loss": 3.1434,
      "step": 152920
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.298668622970581,
      "learning_rate": 0.00015241889537533977,
      "loss": 3.1483,
      "step": 152921
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9354963302612305,
      "learning_rate": 0.00015241533401968294,
      "loss": 2.8836,
      "step": 152922
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.19095516204834,
      "learning_rate": 0.00015241177269146474,
      "loss": 3.0392,
      "step": 152923
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.274091958999634,
      "learning_rate": 0.00015240821139068605,
      "loss": 2.9419,
      "step": 152924
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.169512987136841,
      "learning_rate": 0.00015240465011734752,
      "loss": 3.2977,
      "step": 152925
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.407733917236328,
      "learning_rate": 0.00015240108887144973,
      "loss": 3.1526,
      "step": 152926
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.7460689544677734,
      "learning_rate": 0.00015239752765299326,
      "loss": 3.1787,
      "step": 152927
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3662967681884766,
      "learning_rate": 0.00015239396646197898,
      "loss": 2.9822,
      "step": 152928
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.234837055206299,
      "learning_rate": 0.0001523904052984073,
      "loss": 2.7485,
      "step": 152929
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8861522674560547,
      "learning_rate": 0.00015238684416227918,
      "loss": 3.0741,
      "step": 152930
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.354738235473633,
      "learning_rate": 0.00015238328305359515,
      "loss": 2.9848,
      "step": 152931
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3064537048339844,
      "learning_rate": 0.00015237972197235583,
      "loss": 2.8208,
      "step": 152932
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.36303448677063,
      "learning_rate": 0.00015237616091856178,
      "loss": 3.0304,
      "step": 152933
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.0060665607452393,
      "learning_rate": 0.0001523725998922139,
      "loss": 2.9433,
      "step": 152934
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9339723587036133,
      "learning_rate": 0.00015236903889331266,
      "loss": 2.7507,
      "step": 152935
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.380347490310669,
      "learning_rate": 0.00015236547792185892,
      "loss": 2.9183,
      "step": 152936
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.5615782737731934,
      "learning_rate": 0.00015236191697785305,
      "loss": 2.9765,
      "step": 152937
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.7572641372680664,
      "learning_rate": 0.00015235835606129615,
      "loss": 2.8632,
      "step": 152938
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.076162815093994,
      "learning_rate": 0.00015235479517218843,
      "loss": 3.0849,
      "step": 152939
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4787375926971436,
      "learning_rate": 0.0001523512343105308,
      "loss": 2.7379,
      "step": 152940
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3653056621551514,
      "learning_rate": 0.00015234767347632375,
      "loss": 2.8861,
      "step": 152941
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.520676612854004,
      "learning_rate": 0.00015234411266956823,
      "loss": 2.9142,
      "step": 152942
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.534977674484253,
      "learning_rate": 0.00015234055189026455,
      "loss": 2.894,
      "step": 152943
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.3028225898742676,
      "learning_rate": 0.0001523369911384138,
      "loss": 2.9828,
      "step": 152944
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.8558216094970703,
      "learning_rate": 0.00015233343041401613,
      "loss": 2.967,
      "step": 152945
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.24855899810791,
      "learning_rate": 0.00015232986971707262,
      "loss": 2.8052,
      "step": 152946
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8471055030822754,
      "learning_rate": 0.00015232630904758363,
      "loss": 2.8138,
      "step": 152947
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.4904110431671143,
      "learning_rate": 0.00015232274840555013,
      "loss": 2.8975,
      "step": 152948
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.44840669631958,
      "learning_rate": 0.00015231918779097246,
      "loss": 3.0919,
      "step": 152949
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.894423961639404,
      "learning_rate": 0.0001523156272038517,
      "loss": 2.971,
      "step": 152950
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.339987277984619,
      "learning_rate": 0.00015231206664418799,
      "loss": 3.1473,
      "step": 152951
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6706132888793945,
      "learning_rate": 0.00015230850611198238,
      "loss": 2.941,
      "step": 152952
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.0000414848327637,
      "learning_rate": 0.00015230494560723528,
      "loss": 3.0849,
      "step": 152953
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5819804668426514,
      "learning_rate": 0.0001523013851299476,
      "loss": 2.8611,
      "step": 152954
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3998680114746094,
      "learning_rate": 0.00015229782468011976,
      "loss": 3.194,
      "step": 152955
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.609473705291748,
      "learning_rate": 0.00015229426425775284,
      "loss": 2.7305,
      "step": 152956
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5587759017944336,
      "learning_rate": 0.0001522907038628469,
      "loss": 2.8658,
      "step": 152957
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.180027484893799,
      "learning_rate": 0.00015228714349540303,
      "loss": 2.867,
      "step": 152958
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5861032009124756,
      "learning_rate": 0.00015228358315542168,
      "loss": 3.1691,
      "step": 152959
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.1028993129730225,
      "learning_rate": 0.0001522800228429037,
      "loss": 2.9274,
      "step": 152960
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.12563419342041,
      "learning_rate": 0.00015227646255784954,
      "loss": 3.074,
      "step": 152961
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.0410070419311523,
      "learning_rate": 0.00015227290230026018,
      "loss": 2.8304,
      "step": 152962
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.813553810119629,
      "learning_rate": 0.00015226934207013588,
      "loss": 3.1262,
      "step": 152963
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4484925270080566,
      "learning_rate": 0.0001522657818674776,
      "loss": 2.9719,
      "step": 152964
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.18843412399292,
      "learning_rate": 0.00015226222169228583,
      "loss": 3.1108,
      "step": 152965
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4207186698913574,
      "learning_rate": 0.00015225866154456132,
      "loss": 2.9652,
      "step": 152966
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.4919540882110596,
      "learning_rate": 0.00015225510142430465,
      "loss": 2.8839,
      "step": 152967
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.7108378410339355,
      "learning_rate": 0.0001522515413315167,
      "loss": 2.9507,
      "step": 152968
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.2720603942871094,
      "learning_rate": 0.0001522479812661979,
      "loss": 3.0477,
      "step": 152969
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2771308422088623,
      "learning_rate": 0.00015224442122834902,
      "loss": 3.1555,
      "step": 152970
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.56974196434021,
      "learning_rate": 0.0001522408612179706,
      "loss": 3.0194,
      "step": 152971
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.377397060394287,
      "learning_rate": 0.0001522373012350635,
      "loss": 2.9444,
      "step": 152972
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9075615406036377,
      "learning_rate": 0.00015223374127962817,
      "loss": 3.1469,
      "step": 152973
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.714914321899414,
      "learning_rate": 0.00015223018135166545,
      "loss": 3.0464,
      "step": 152974
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.204235792160034,
      "learning_rate": 0.00015222662145117596,
      "loss": 3.0142,
      "step": 152975
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4901938438415527,
      "learning_rate": 0.00015222306157816035,
      "loss": 2.9084,
      "step": 152976
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.975623369216919,
      "learning_rate": 0.00015221950173261912,
      "loss": 3.1293,
      "step": 152977
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5992140769958496,
      "learning_rate": 0.0001522159419145532,
      "loss": 2.9538,
      "step": 152978
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.623659372329712,
      "learning_rate": 0.00015221238212396296,
      "loss": 2.9861,
      "step": 152979
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6800851821899414,
      "learning_rate": 0.00015220882236084942,
      "loss": 2.9415,
      "step": 152980
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.133061170578003,
      "learning_rate": 0.00015220526262521298,
      "loss": 3.0475,
      "step": 152981
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.5164506435394287,
      "learning_rate": 0.00015220170291705432,
      "loss": 3.181,
      "step": 152982
    },
    {
      "epoch": 1.99,
      "grad_norm": 5.654704570770264,
      "learning_rate": 0.0001521981432363742,
      "loss": 2.6715,
      "step": 152983
    },
    {
      "epoch": 1.99,
      "grad_norm": 7.226897239685059,
      "learning_rate": 0.0001521945835831733,
      "loss": 3.0153,
      "step": 152984
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.5646209716796875,
      "learning_rate": 0.00015219102395745208,
      "loss": 2.8665,
      "step": 152985
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.086414098739624,
      "learning_rate": 0.00015218746435921145,
      "loss": 2.8333,
      "step": 152986
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.576481342315674,
      "learning_rate": 0.00015218390478845197,
      "loss": 2.8991,
      "step": 152987
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8939151763916016,
      "learning_rate": 0.00015218034524517413,
      "loss": 3.0527,
      "step": 152988
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.323908805847168,
      "learning_rate": 0.00015217678572937895,
      "loss": 2.9878,
      "step": 152989
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.932326555252075,
      "learning_rate": 0.0001521732262410667,
      "loss": 3.0627,
      "step": 152990
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.1480302810668945,
      "learning_rate": 0.00015216966678023843,
      "loss": 3.0063,
      "step": 152991
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3454439640045166,
      "learning_rate": 0.00015216610734689455,
      "loss": 2.8,
      "step": 152992
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.1023218631744385,
      "learning_rate": 0.00015216254794103576,
      "loss": 3.1057,
      "step": 152993
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.9260003566741943,
      "learning_rate": 0.0001521589885626627,
      "loss": 3.0308,
      "step": 152994
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.878343343734741,
      "learning_rate": 0.00015215542921177618,
      "loss": 3.0733,
      "step": 152995
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4874587059020996,
      "learning_rate": 0.0001521518698883766,
      "loss": 3.0182,
      "step": 152996
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2959702014923096,
      "learning_rate": 0.00015214831059246492,
      "loss": 3.0385,
      "step": 152997
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6215174198150635,
      "learning_rate": 0.00015214475132404166,
      "loss": 3.0159,
      "step": 152998
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.3760688304901123,
      "learning_rate": 0.00015214119208310746,
      "loss": 2.8138,
      "step": 152999
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.042208433151245,
      "learning_rate": 0.0001521376328696629,
      "loss": 2.9572,
      "step": 153000
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.1917128562927246,
      "learning_rate": 0.00015213407368370888,
      "loss": 2.9596,
      "step": 153001
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2799103260040283,
      "learning_rate": 0.00015213051452524578,
      "loss": 3.0931,
      "step": 153002
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.078439235687256,
      "learning_rate": 0.00015212695539427456,
      "loss": 2.8864,
      "step": 153003
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.229829788208008,
      "learning_rate": 0.00015212339629079558,
      "loss": 2.7979,
      "step": 153004
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2430672645568848,
      "learning_rate": 0.0001521198372148099,
      "loss": 3.0369,
      "step": 153005
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.0792367458343506,
      "learning_rate": 0.00015211627816631772,
      "loss": 3.0455,
      "step": 153006
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.593048572540283,
      "learning_rate": 0.00015211271914532,
      "loss": 2.6637,
      "step": 153007
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.7253565788269043,
      "learning_rate": 0.00015210916015181723,
      "loss": 2.7995,
      "step": 153008
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.901153087615967,
      "learning_rate": 0.00015210560118581026,
      "loss": 3.0139,
      "step": 153009
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.386904716491699,
      "learning_rate": 0.0001521020422472995,
      "loss": 2.8961,
      "step": 153010
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5551578998565674,
      "learning_rate": 0.00015209848333628604,
      "loss": 3.1045,
      "step": 153011
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4463019371032715,
      "learning_rate": 0.00015209492445276997,
      "loss": 2.8929,
      "step": 153012
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2589223384857178,
      "learning_rate": 0.00015209136559675245,
      "loss": 3.0254,
      "step": 153013
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.488574504852295,
      "learning_rate": 0.00015208780676823376,
      "loss": 2.95,
      "step": 153014
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6555306911468506,
      "learning_rate": 0.00015208424796721488,
      "loss": 2.9577,
      "step": 153015
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.423748254776001,
      "learning_rate": 0.0001520806891936962,
      "loss": 2.7667,
      "step": 153016
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.132848024368286,
      "learning_rate": 0.0001520771304476788,
      "loss": 3.2659,
      "step": 153017
    },
    {
      "epoch": 1.99,
      "grad_norm": 5.318999767303467,
      "learning_rate": 0.00015207357172916273,
      "loss": 2.9338,
      "step": 153018
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.280146598815918,
      "learning_rate": 0.00015207001303814915,
      "loss": 2.8095,
      "step": 153019
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.229661703109741,
      "learning_rate": 0.0001520664543746384,
      "loss": 3.1504,
      "step": 153020
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9909839630126953,
      "learning_rate": 0.00015206289573863144,
      "loss": 2.9467,
      "step": 153021
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.2847580909729004,
      "learning_rate": 0.00015205933713012863,
      "loss": 2.9478,
      "step": 153022
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3355603218078613,
      "learning_rate": 0.00015205577854913101,
      "loss": 2.6318,
      "step": 153023
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9613921642303467,
      "learning_rate": 0.00015205221999563885,
      "loss": 3.0177,
      "step": 153024
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.568070650100708,
      "learning_rate": 0.00015204866146965304,
      "loss": 2.7924,
      "step": 153025
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1207258701324463,
      "learning_rate": 0.00015204510297117405,
      "loss": 3.0266,
      "step": 153026
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1951746940612793,
      "learning_rate": 0.0001520415445002028,
      "loss": 3.0878,
      "step": 153027
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.2314293384552,
      "learning_rate": 0.0001520379860567397,
      "loss": 3.2492,
      "step": 153028
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5731403827667236,
      "learning_rate": 0.00015203442764078577,
      "loss": 2.9123,
      "step": 153029
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1613821983337402,
      "learning_rate": 0.00015203086925234122,
      "loss": 3.0423,
      "step": 153030
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.133746862411499,
      "learning_rate": 0.000152027310891407,
      "loss": 2.8691,
      "step": 153031
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.067359685897827,
      "learning_rate": 0.00015202375255798359,
      "loss": 2.9103,
      "step": 153032
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.048553705215454,
      "learning_rate": 0.00015202019425207186,
      "loss": 2.9728,
      "step": 153033
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.916248321533203,
      "learning_rate": 0.0001520166359736723,
      "loss": 3.0969,
      "step": 153034
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6265881061553955,
      "learning_rate": 0.00015201307772278583,
      "loss": 3.1413,
      "step": 153035
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3163065910339355,
      "learning_rate": 0.0001520095194994127,
      "loss": 2.923,
      "step": 153036
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.263500213623047,
      "learning_rate": 0.00015200596130355394,
      "loss": 3.1372,
      "step": 153037
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5518200397491455,
      "learning_rate": 0.00015200240313520993,
      "loss": 3.0859,
      "step": 153038
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.387559413909912,
      "learning_rate": 0.00015199884499438154,
      "loss": 3.0055,
      "step": 153039
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4038517475128174,
      "learning_rate": 0.0001519952868810693,
      "loss": 2.947,
      "step": 153040
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.7200796604156494,
      "learning_rate": 0.00015199172879527413,
      "loss": 3.0815,
      "step": 153041
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6187524795532227,
      "learning_rate": 0.0001519881707369963,
      "loss": 3.1174,
      "step": 153042
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2898952960968018,
      "learning_rate": 0.00015198461270623674,
      "loss": 3.0556,
      "step": 153043
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3490772247314453,
      "learning_rate": 0.0001519810547029959,
      "loss": 2.8153,
      "step": 153044
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.0819947719573975,
      "learning_rate": 0.00015197749672727474,
      "loss": 3.0838,
      "step": 153045
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5321786403656006,
      "learning_rate": 0.00015197393877907365,
      "loss": 2.7721,
      "step": 153046
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.1517744064331055,
      "learning_rate": 0.0001519703808583936,
      "loss": 2.7657,
      "step": 153047
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.019428730010986,
      "learning_rate": 0.0001519668229652348,
      "loss": 3.1633,
      "step": 153048
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.7410573959350586,
      "learning_rate": 0.00015196326509959833,
      "loss": 3.0158,
      "step": 153049
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.17002010345459,
      "learning_rate": 0.0001519597072614845,
      "loss": 2.999,
      "step": 153050
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.309032440185547,
      "learning_rate": 0.00015195614945089433,
      "loss": 3.013,
      "step": 153051
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.279405355453491,
      "learning_rate": 0.0001519525916678282,
      "loss": 2.9956,
      "step": 153052
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6139650344848633,
      "learning_rate": 0.00015194903391228695,
      "loss": 3.2713,
      "step": 153053
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.912816047668457,
      "learning_rate": 0.00015194547618427114,
      "loss": 3.0074,
      "step": 153054
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3781864643096924,
      "learning_rate": 0.00015194191848378155,
      "loss": 3.0224,
      "step": 153055
    },
    {
      "epoch": 1.99,
      "grad_norm": 1.9801907539367676,
      "learning_rate": 0.0001519383608108186,
      "loss": 2.7236,
      "step": 153056
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.203720808029175,
      "learning_rate": 0.00015193480316538316,
      "loss": 2.8778,
      "step": 153057
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1663918495178223,
      "learning_rate": 0.0001519312455474758,
      "loss": 3.0207,
      "step": 153058
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6690006256103516,
      "learning_rate": 0.00015192768795709727,
      "loss": 3.3143,
      "step": 153059
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.907275915145874,
      "learning_rate": 0.0001519241303942482,
      "loss": 2.9171,
      "step": 153060
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.401028871536255,
      "learning_rate": 0.00015192057285892926,
      "loss": 3.0223,
      "step": 153061
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.028290033340454,
      "learning_rate": 0.0001519170153511409,
      "loss": 2.9685,
      "step": 153062
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4544858932495117,
      "learning_rate": 0.00015191345787088413,
      "loss": 2.8079,
      "step": 153063
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.405627727508545,
      "learning_rate": 0.00015190990041815934,
      "loss": 2.7038,
      "step": 153064
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.737373113632202,
      "learning_rate": 0.00015190634299296738,
      "loss": 3.0524,
      "step": 153065
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1223721504211426,
      "learning_rate": 0.00015190278559530887,
      "loss": 2.7528,
      "step": 153066
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.0496180057525635,
      "learning_rate": 0.00015189922822518427,
      "loss": 2.7979,
      "step": 153067
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3746440410614014,
      "learning_rate": 0.0001518956708825945,
      "loss": 2.9865,
      "step": 153068
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5456058979034424,
      "learning_rate": 0.00015189211356754015,
      "loss": 2.9334,
      "step": 153069
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4488816261291504,
      "learning_rate": 0.00015188855628002175,
      "loss": 3.103,
      "step": 153070
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.612490177154541,
      "learning_rate": 0.00015188499902004014,
      "loss": 2.9629,
      "step": 153071
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.398078680038452,
      "learning_rate": 0.00015188144178759593,
      "loss": 2.9924,
      "step": 153072
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.577714681625366,
      "learning_rate": 0.00015187788458268962,
      "loss": 3.0246,
      "step": 153073
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.667764663696289,
      "learning_rate": 0.00015187432740532212,
      "loss": 3.035,
      "step": 153074
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9899678230285645,
      "learning_rate": 0.000151870770255494,
      "loss": 2.9345,
      "step": 153075
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4117839336395264,
      "learning_rate": 0.00015186721313320577,
      "loss": 2.885,
      "step": 153076
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5723094940185547,
      "learning_rate": 0.00015186365603845836,
      "loss": 2.9494,
      "step": 153077
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.989772796630859,
      "learning_rate": 0.00015186009897125228,
      "loss": 3.0008,
      "step": 153078
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.4254000186920166,
      "learning_rate": 0.00015185654193158808,
      "loss": 3.141,
      "step": 153079
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.106184959411621,
      "learning_rate": 0.00015185298491946666,
      "loss": 3.0611,
      "step": 153080
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6355204582214355,
      "learning_rate": 0.00015184942793488847,
      "loss": 2.764,
      "step": 153081
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.632720947265625,
      "learning_rate": 0.00015184587097785438,
      "loss": 2.8251,
      "step": 153082
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8051624298095703,
      "learning_rate": 0.0001518423140483649,
      "loss": 3.1421,
      "step": 153083
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.783876895904541,
      "learning_rate": 0.0001518387571464208,
      "loss": 3.1785,
      "step": 153084
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.274003505706787,
      "learning_rate": 0.00015183520027202253,
      "loss": 3.0015,
      "step": 153085
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8969686031341553,
      "learning_rate": 0.00015183164342517098,
      "loss": 2.9801,
      "step": 153086
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.0474061965942383,
      "learning_rate": 0.0001518280866058666,
      "loss": 2.8854,
      "step": 153087
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.795990467071533,
      "learning_rate": 0.00015182452981411034,
      "loss": 3.0482,
      "step": 153088
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.365138530731201,
      "learning_rate": 0.00015182097304990253,
      "loss": 2.9719,
      "step": 153089
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6119935512542725,
      "learning_rate": 0.00015181741631324427,
      "loss": 2.8753,
      "step": 153090
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.133122444152832,
      "learning_rate": 0.00015181385960413572,
      "loss": 3.1505,
      "step": 153091
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2442073822021484,
      "learning_rate": 0.00015181030292257786,
      "loss": 2.908,
      "step": 153092
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.3703107833862305,
      "learning_rate": 0.00015180674626857118,
      "loss": 2.6661,
      "step": 153093
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.338019609451294,
      "learning_rate": 0.0001518031896421165,
      "loss": 3.0305,
      "step": 153094
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8905551433563232,
      "learning_rate": 0.0001517996330432143,
      "loss": 3.016,
      "step": 153095
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1995365619659424,
      "learning_rate": 0.0001517960764718656,
      "loss": 2.9545,
      "step": 153096
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.287761688232422,
      "learning_rate": 0.00015179251992807053,
      "loss": 3.0207,
      "step": 153097
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.307655096054077,
      "learning_rate": 0.00015178896341183016,
      "loss": 3.0362,
      "step": 153098
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.131821870803833,
      "learning_rate": 0.00015178540692314493,
      "loss": 3.0532,
      "step": 153099
    },
    {
      "epoch": 1.99,
      "grad_norm": 1.9355485439300537,
      "learning_rate": 0.00015178185046201567,
      "loss": 2.8666,
      "step": 153100
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.387117862701416,
      "learning_rate": 0.0001517782940284429,
      "loss": 2.9994,
      "step": 153101
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.232245445251465,
      "learning_rate": 0.00015177473762242755,
      "loss": 2.9908,
      "step": 153102
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.0765645503997803,
      "learning_rate": 0.00015177118124396982,
      "loss": 2.9484,
      "step": 153103
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2889404296875,
      "learning_rate": 0.00015176762489307077,
      "loss": 2.9702,
      "step": 153104
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.0356478691101074,
      "learning_rate": 0.0001517640685697308,
      "loss": 3.164,
      "step": 153105
    },
    {
      "epoch": 1.99,
      "grad_norm": 5.811327934265137,
      "learning_rate": 0.0001517605122739508,
      "loss": 2.8146,
      "step": 153106
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.540712356567383,
      "learning_rate": 0.00015175695600573114,
      "loss": 2.8191,
      "step": 153107
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.8179280757904053,
      "learning_rate": 0.00015175339976507298,
      "loss": 2.8237,
      "step": 153108
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.320701837539673,
      "learning_rate": 0.00015174984355197639,
      "loss": 3.0684,
      "step": 153109
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9992713928222656,
      "learning_rate": 0.0001517462873664424,
      "loss": 2.8454,
      "step": 153110
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.386101007461548,
      "learning_rate": 0.00015174273120847152,
      "loss": 2.8673,
      "step": 153111
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.7461326122283936,
      "learning_rate": 0.0001517391750780645,
      "loss": 2.9622,
      "step": 153112
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.5510342121124268,
      "learning_rate": 0.00015173561897522192,
      "loss": 3.3078,
      "step": 153113
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.288902759552002,
      "learning_rate": 0.00015173206289994468,
      "loss": 3.0038,
      "step": 153114
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.790703058242798,
      "learning_rate": 0.00015172850685223304,
      "loss": 3.0407,
      "step": 153115
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.5508203506469727,
      "learning_rate": 0.00015172495083208795,
      "loss": 2.8807,
      "step": 153116
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8720836639404297,
      "learning_rate": 0.00015172139483950992,
      "loss": 3.0304,
      "step": 153117
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.276413917541504,
      "learning_rate": 0.00015171783887449978,
      "loss": 3.0921,
      "step": 153118
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.94603967666626,
      "learning_rate": 0.00015171428293705793,
      "loss": 3.1387,
      "step": 153119
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.2033374309539795,
      "learning_rate": 0.00015171072702718552,
      "loss": 2.8174,
      "step": 153120
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.184045314788818,
      "learning_rate": 0.00015170717114488258,
      "loss": 2.9222,
      "step": 153121
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1298270225524902,
      "learning_rate": 0.0001517036152901502,
      "loss": 3.0349,
      "step": 153122
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.003269672393799,
      "learning_rate": 0.00015170005946298884,
      "loss": 2.948,
      "step": 153123
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.753065586090088,
      "learning_rate": 0.00015169650366339934,
      "loss": 2.9234,
      "step": 153124
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.0777554512023926,
      "learning_rate": 0.00015169294789138212,
      "loss": 3.2969,
      "step": 153125
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.071359872817993,
      "learning_rate": 0.0001516893921469382,
      "loss": 2.9042,
      "step": 153126
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.062162399291992,
      "learning_rate": 0.00015168583643006785,
      "loss": 3.0426,
      "step": 153127
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2669036388397217,
      "learning_rate": 0.00015168228074077198,
      "loss": 3.0181,
      "step": 153128
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5535449981689453,
      "learning_rate": 0.00015167872507905106,
      "loss": 3.0137,
      "step": 153129
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.1964752674102783,
      "learning_rate": 0.00015167516944490598,
      "loss": 2.7804,
      "step": 153130
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.488619804382324,
      "learning_rate": 0.00015167161383833718,
      "loss": 3.0134,
      "step": 153131
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4964494705200195,
      "learning_rate": 0.00015166805825934563,
      "loss": 3.1208,
      "step": 153132
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5630099773406982,
      "learning_rate": 0.00015166450270793157,
      "loss": 2.9181,
      "step": 153133
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9236130714416504,
      "learning_rate": 0.00015166094718409602,
      "loss": 3.0122,
      "step": 153134
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.545452356338501,
      "learning_rate": 0.00015165739168783934,
      "loss": 2.9775,
      "step": 153135
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.382974147796631,
      "learning_rate": 0.0001516538362191625,
      "loss": 2.9103,
      "step": 153136
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3141658306121826,
      "learning_rate": 0.0001516502807780659,
      "loss": 3.0368,
      "step": 153137
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3657984733581543,
      "learning_rate": 0.0001516467253645504,
      "loss": 3.0638,
      "step": 153138
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4270973205566406,
      "learning_rate": 0.0001516431699786166,
      "loss": 2.7287,
      "step": 153139
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.1811575889587402,
      "learning_rate": 0.00015163961462026515,
      "loss": 2.9182,
      "step": 153140
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2777750492095947,
      "learning_rate": 0.00015163605928949657,
      "loss": 3.1944,
      "step": 153141
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3656325340270996,
      "learning_rate": 0.0001516325039863117,
      "loss": 2.8859,
      "step": 153142
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.756284713745117,
      "learning_rate": 0.00015162894871071112,
      "loss": 2.9902,
      "step": 153143
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3355791568756104,
      "learning_rate": 0.0001516253934626956,
      "loss": 2.8194,
      "step": 153144
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.000901222229004,
      "learning_rate": 0.00015162183824226567,
      "loss": 2.9835,
      "step": 153145
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.274488687515259,
      "learning_rate": 0.0001516182830494221,
      "loss": 2.8933,
      "step": 153146
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.24662184715271,
      "learning_rate": 0.00015161472788416538,
      "loss": 3.253,
      "step": 153147
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4455554485321045,
      "learning_rate": 0.0001516111727464964,
      "loss": 3.0348,
      "step": 153148
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.473090410232544,
      "learning_rate": 0.00015160761763641556,
      "loss": 2.8476,
      "step": 153149
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.0771191120147705,
      "learning_rate": 0.0001516040625539238,
      "loss": 2.7247,
      "step": 153150
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.882021188735962,
      "learning_rate": 0.00015160050749902162,
      "loss": 2.9242,
      "step": 153151
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.360689878463745,
      "learning_rate": 0.00015159695247170962,
      "loss": 3.1131,
      "step": 153152
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8116137981414795,
      "learning_rate": 0.00015159339747198864,
      "loss": 2.8818,
      "step": 153153
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.554654359817505,
      "learning_rate": 0.00015158984249985928,
      "loss": 3.2547,
      "step": 153154
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.7953948974609375,
      "learning_rate": 0.00015158628755532205,
      "loss": 3.0538,
      "step": 153155
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.511561632156372,
      "learning_rate": 0.00015158273263837782,
      "loss": 2.7988,
      "step": 153156
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8930680751800537,
      "learning_rate": 0.00015157917774902716,
      "loss": 2.9597,
      "step": 153157
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4693901538848877,
      "learning_rate": 0.00015157562288727064,
      "loss": 2.6242,
      "step": 153158
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9217324256896973,
      "learning_rate": 0.00015157206805310912,
      "loss": 3.0004,
      "step": 153159
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.334446668624878,
      "learning_rate": 0.00015156851324654318,
      "loss": 2.8822,
      "step": 153160
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.46205735206604,
      "learning_rate": 0.0001515649584675733,
      "loss": 3.0283,
      "step": 153161
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.326148271560669,
      "learning_rate": 0.00015156140371620042,
      "loss": 3.1912,
      "step": 153162
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1425838470458984,
      "learning_rate": 0.00015155784899242508,
      "loss": 2.909,
      "step": 153163
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.578411817550659,
      "learning_rate": 0.0001515542942962478,
      "loss": 2.99,
      "step": 153164
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6924245357513428,
      "learning_rate": 0.00015155073962766955,
      "loss": 3.1161,
      "step": 153165
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.0212583541870117,
      "learning_rate": 0.00015154718498669068,
      "loss": 2.9561,
      "step": 153166
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.13104510307312,
      "learning_rate": 0.0001515436303733121,
      "loss": 2.9106,
      "step": 153167
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3665482997894287,
      "learning_rate": 0.00015154007578753437,
      "loss": 2.9716,
      "step": 153168
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.537914276123047,
      "learning_rate": 0.00015153652122935814,
      "loss": 2.9293,
      "step": 153169
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.097538709640503,
      "learning_rate": 0.00015153296669878391,
      "loss": 3.0525,
      "step": 153170
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.232785701751709,
      "learning_rate": 0.00015152941219581268,
      "loss": 2.9248,
      "step": 153171
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5751216411590576,
      "learning_rate": 0.00015152585772044478,
      "loss": 2.9025,
      "step": 153172
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3709373474121094,
      "learning_rate": 0.0001515223032726812,
      "loss": 3.2709,
      "step": 153173
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.439938545227051,
      "learning_rate": 0.0001515187488525224,
      "loss": 3.0525,
      "step": 153174
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.263843297958374,
      "learning_rate": 0.00015151519445996906,
      "loss": 3.0007,
      "step": 153175
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.441706895828247,
      "learning_rate": 0.00015151164009502167,
      "loss": 3.293,
      "step": 153176
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8007547855377197,
      "learning_rate": 0.0001515080857576813,
      "loss": 2.8202,
      "step": 153177
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.7047433853149414,
      "learning_rate": 0.00015150453144794815,
      "loss": 3.0594,
      "step": 153178
    },
    {
      "epoch": 1.99,
      "grad_norm": 1.9881733655929565,
      "learning_rate": 0.00015150097716582332,
      "loss": 2.9331,
      "step": 153179
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5790517330169678,
      "learning_rate": 0.0001514974229113071,
      "loss": 3.0363,
      "step": 153180
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.671353340148926,
      "learning_rate": 0.00015149386868440056,
      "loss": 2.6795,
      "step": 153181
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.071998119354248,
      "learning_rate": 0.0001514903144851039,
      "loss": 3.09,
      "step": 153182
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8053245544433594,
      "learning_rate": 0.00015148676031341807,
      "loss": 3.0377,
      "step": 153183
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2375738620758057,
      "learning_rate": 0.00015148320616934353,
      "loss": 2.5016,
      "step": 153184
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5921144485473633,
      "learning_rate": 0.00015147965205288124,
      "loss": 3.0812,
      "step": 153185
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.097679376602173,
      "learning_rate": 0.00015147609796403152,
      "loss": 2.8631,
      "step": 153186
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.986147880554199,
      "learning_rate": 0.00015147254390279543,
      "loss": 3.1607,
      "step": 153187
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6671323776245117,
      "learning_rate": 0.00015146898986917318,
      "loss": 2.8916,
      "step": 153188
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.250779628753662,
      "learning_rate": 0.00015146543586316575,
      "loss": 2.9585,
      "step": 153189
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.397754669189453,
      "learning_rate": 0.00015146188188477363,
      "loss": 3.0148,
      "step": 153190
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9554953575134277,
      "learning_rate": 0.00015145832793399762,
      "loss": 2.897,
      "step": 153191
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.209268569946289,
      "learning_rate": 0.0001514547740108382,
      "loss": 3.0361,
      "step": 153192
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.961995840072632,
      "learning_rate": 0.0001514512201152964,
      "loss": 3.0541,
      "step": 153193
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.626798152923584,
      "learning_rate": 0.00015144766624737235,
      "loss": 3.1987,
      "step": 153194
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.3704733848571777,
      "learning_rate": 0.00015144411240706712,
      "loss": 3.0685,
      "step": 153195
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.234795570373535,
      "learning_rate": 0.00015144055859438112,
      "loss": 3.0495,
      "step": 153196
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.161919355392456,
      "learning_rate": 0.00015143700480931525,
      "loss": 3.1043,
      "step": 153197
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.1415789127349854,
      "learning_rate": 0.0001514334510518699,
      "loss": 2.9386,
      "step": 153198
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.114999532699585,
      "learning_rate": 0.00015142989732204617,
      "loss": 3.2909,
      "step": 153199
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.5773355960845947,
      "learning_rate": 0.0001514263436198441,
      "loss": 3.0725,
      "step": 153200
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.636817693710327,
      "learning_rate": 0.00015142278994526484,
      "loss": 3.1636,
      "step": 153201
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2878577709198,
      "learning_rate": 0.00015141923629830882,
      "loss": 2.9016,
      "step": 153202
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.032334804534912,
      "learning_rate": 0.0001514156826789768,
      "loss": 2.9205,
      "step": 153203
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.2943756580352783,
      "learning_rate": 0.00015141212908726935,
      "loss": 3.0519,
      "step": 153204
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.7728960514068604,
      "learning_rate": 0.0001514085755231873,
      "loss": 2.9359,
      "step": 153205
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.5264267921447754,
      "learning_rate": 0.00015140502198673117,
      "loss": 3.138,
      "step": 153206
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.8107311725616455,
      "learning_rate": 0.00015140146847790172,
      "loss": 2.7073,
      "step": 153207
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.6112489700317383,
      "learning_rate": 0.00015139791499669937,
      "loss": 3.1702,
      "step": 153208
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.773660659790039,
      "learning_rate": 0.00015139436154312504,
      "loss": 2.9274,
      "step": 153209
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.949284076690674,
      "learning_rate": 0.00015139080811717924,
      "loss": 2.8796,
      "step": 153210
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.0508437156677246,
      "learning_rate": 0.00015138725471886282,
      "loss": 2.9305,
      "step": 153211
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.61521053314209,
      "learning_rate": 0.00015138370134817626,
      "loss": 3.0594,
      "step": 153212
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.185673952102661,
      "learning_rate": 0.00015138014800512032,
      "loss": 2.8312,
      "step": 153213
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.9582483768463135,
      "learning_rate": 0.00015137659468969546,
      "loss": 3.0159,
      "step": 153214
    },
    {
      "epoch": 1.99,
      "grad_norm": 3.863978147506714,
      "learning_rate": 0.00015137304140190263,
      "loss": 3.0664,
      "step": 153215
    },
    {
      "epoch": 1.99,
      "grad_norm": 4.253006935119629,
      "learning_rate": 0.00015136948814174224,
      "loss": 2.9018,
      "step": 153216
    },
    {
      "epoch": 1.99,
      "grad_norm": 2.4692156314849854,
      "learning_rate": 0.00015136593490921521,
      "loss": 2.8174,
      "step": 153217
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3521244525909424,
      "learning_rate": 0.00015136238170432206,
      "loss": 2.8434,
      "step": 153218
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.003753185272217,
      "learning_rate": 0.0001513588285270634,
      "loss": 2.9087,
      "step": 153219
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.3796393871307373,
      "learning_rate": 0.00015135527537743986,
      "loss": 2.7794,
      "step": 153220
    },
    {
      "epoch": 2.0,
      "grad_norm": 5.700955867767334,
      "learning_rate": 0.00015135172225545229,
      "loss": 3.0397,
      "step": 153221
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7557501792907715,
      "learning_rate": 0.00015134816916110116,
      "loss": 3.0836,
      "step": 153222
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.742140054702759,
      "learning_rate": 0.00015134461609438724,
      "loss": 2.7768,
      "step": 153223
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.186694383621216,
      "learning_rate": 0.00015134106305531123,
      "loss": 3.0448,
      "step": 153224
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.809131145477295,
      "learning_rate": 0.00015133751004387368,
      "loss": 3.0828,
      "step": 153225
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.337482452392578,
      "learning_rate": 0.00015133395706007526,
      "loss": 2.6452,
      "step": 153226
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.954026222229004,
      "learning_rate": 0.0001513304041039167,
      "loss": 2.9686,
      "step": 153227
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.8687827587127686,
      "learning_rate": 0.00015132685117539852,
      "loss": 2.8184,
      "step": 153228
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.0270838737487793,
      "learning_rate": 0.00015132329827452162,
      "loss": 2.9746,
      "step": 153229
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7672877311706543,
      "learning_rate": 0.00015131974540128653,
      "loss": 2.9877,
      "step": 153230
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.4291205406188965,
      "learning_rate": 0.00015131619255569388,
      "loss": 2.6985,
      "step": 153231
    },
    {
      "epoch": 2.0,
      "grad_norm": 6.99962854385376,
      "learning_rate": 0.00015131263973774424,
      "loss": 2.7685,
      "step": 153232
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.545182228088379,
      "learning_rate": 0.00015130908694743853,
      "loss": 2.9465,
      "step": 153233
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5812225341796875,
      "learning_rate": 0.00015130553418477713,
      "loss": 3.304,
      "step": 153234
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4309237003326416,
      "learning_rate": 0.000151301981449761,
      "loss": 3.0946,
      "step": 153235
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.679325819015503,
      "learning_rate": 0.00015129842874239056,
      "loss": 2.855,
      "step": 153236
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.734017848968506,
      "learning_rate": 0.0001512948760626666,
      "loss": 2.8037,
      "step": 153237
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.9972987174987793,
      "learning_rate": 0.00015129132341058958,
      "loss": 3.2634,
      "step": 153238
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.9598071575164795,
      "learning_rate": 0.00015128777078616046,
      "loss": 3.1019,
      "step": 153239
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.701768398284912,
      "learning_rate": 0.00015128421818937963,
      "loss": 2.8768,
      "step": 153240
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2997329235076904,
      "learning_rate": 0.00015128066562024797,
      "loss": 2.8547,
      "step": 153241
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.0188260078430176,
      "learning_rate": 0.00015127711307876603,
      "loss": 2.9204,
      "step": 153242
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.245228290557861,
      "learning_rate": 0.0001512735605649344,
      "loss": 2.8231,
      "step": 153243
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.8904614448547363,
      "learning_rate": 0.0001512700080787539,
      "loss": 2.8688,
      "step": 153244
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1017374992370605,
      "learning_rate": 0.00015126645562022517,
      "loss": 2.9832,
      "step": 153245
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7916853427886963,
      "learning_rate": 0.00015126290318934866,
      "loss": 3.0417,
      "step": 153246
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.2536699771881104,
      "learning_rate": 0.00015125935078612532,
      "loss": 2.8617,
      "step": 153247
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.947871685028076,
      "learning_rate": 0.00015125579841055565,
      "loss": 2.6676,
      "step": 153248
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1369872093200684,
      "learning_rate": 0.00015125224606264023,
      "loss": 3.0401,
      "step": 153249
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1071622371673584,
      "learning_rate": 0.00015124869374237993,
      "loss": 2.9174,
      "step": 153250
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.628847599029541,
      "learning_rate": 0.0001512451414497752,
      "loss": 2.9225,
      "step": 153251
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3874003887176514,
      "learning_rate": 0.00015124158918482691,
      "loss": 3.0973,
      "step": 153252
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.7617709636688232,
      "learning_rate": 0.00015123803694753568,
      "loss": 3.0416,
      "step": 153253
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.27704119682312,
      "learning_rate": 0.00015123448473790204,
      "loss": 2.7509,
      "step": 153254
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.284235954284668,
      "learning_rate": 0.0001512309325559266,
      "loss": 3.2169,
      "step": 153255
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.237443447113037,
      "learning_rate": 0.00015122738040161028,
      "loss": 3.1511,
      "step": 153256
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.695249557495117,
      "learning_rate": 0.0001512238282749535,
      "loss": 3.1041,
      "step": 153257
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.1185548305511475,
      "learning_rate": 0.00015122027617595708,
      "loss": 3.0736,
      "step": 153258
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1651196479797363,
      "learning_rate": 0.00015121672410462166,
      "loss": 3.066,
      "step": 153259
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.466878652572632,
      "learning_rate": 0.00015121317206094782,
      "loss": 3.0979,
      "step": 153260
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.143097400665283,
      "learning_rate": 0.00015120962004493618,
      "loss": 2.9315,
      "step": 153261
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2352564334869385,
      "learning_rate": 0.0001512060680565876,
      "loss": 2.7044,
      "step": 153262
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.786771535873413,
      "learning_rate": 0.00015120251609590247,
      "loss": 2.7823,
      "step": 153263
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5245437622070312,
      "learning_rate": 0.0001511989641628818,
      "loss": 2.9149,
      "step": 153264
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.0983476638793945,
      "learning_rate": 0.00015119541225752585,
      "loss": 3.1588,
      "step": 153265
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.385930061340332,
      "learning_rate": 0.00015119186037983572,
      "loss": 3.1782,
      "step": 153266
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.690155506134033,
      "learning_rate": 0.0001511883085298116,
      "loss": 2.891,
      "step": 153267
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.414296865463257,
      "learning_rate": 0.00015118475670745454,
      "loss": 2.8474,
      "step": 153268
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.6309850215911865,
      "learning_rate": 0.00015118120491276489,
      "loss": 2.9164,
      "step": 153269
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.336190700531006,
      "learning_rate": 0.0001511776531457436,
      "loss": 2.862,
      "step": 153270
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.285954475402832,
      "learning_rate": 0.00015117410140639106,
      "loss": 3.0111,
      "step": 153271
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4706149101257324,
      "learning_rate": 0.0001511705496947082,
      "loss": 2.6466,
      "step": 153272
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4855334758758545,
      "learning_rate": 0.0001511669980106955,
      "loss": 3.0145,
      "step": 153273
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2359650135040283,
      "learning_rate": 0.00015116344635435376,
      "loss": 2.8296,
      "step": 153274
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.6417946815490723,
      "learning_rate": 0.00015115989472568332,
      "loss": 3.1958,
      "step": 153275
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.9005823135375977,
      "learning_rate": 0.00015115634312468527,
      "loss": 3.0139,
      "step": 153276
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.254093885421753,
      "learning_rate": 0.0001511527915513599,
      "loss": 3.1823,
      "step": 153277
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5858020782470703,
      "learning_rate": 0.00015114924000570815,
      "loss": 2.838,
      "step": 153278
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1924688816070557,
      "learning_rate": 0.00015114568848773058,
      "loss": 2.8215,
      "step": 153279
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2261874675750732,
      "learning_rate": 0.0001511421369974278,
      "loss": 2.9343,
      "step": 153280
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.9051716327667236,
      "learning_rate": 0.00015113858553480046,
      "loss": 2.9813,
      "step": 153281
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.0299339294433594,
      "learning_rate": 0.00015113503409984935,
      "loss": 2.9346,
      "step": 153282
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1981847286224365,
      "learning_rate": 0.00015113148269257492,
      "loss": 2.8228,
      "step": 153283
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.368757486343384,
      "learning_rate": 0.00015112793131297808,
      "loss": 3.1631,
      "step": 153284
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.978919506072998,
      "learning_rate": 0.00015112437996105937,
      "loss": 2.8453,
      "step": 153285
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.2267067432403564,
      "learning_rate": 0.00015112082863681943,
      "loss": 3.1581,
      "step": 153286
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.361579418182373,
      "learning_rate": 0.00015111727734025887,
      "loss": 2.8581,
      "step": 153287
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.283707618713379,
      "learning_rate": 0.0001511137260713785,
      "loss": 2.8533,
      "step": 153288
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4031524658203125,
      "learning_rate": 0.0001511101748301788,
      "loss": 2.8734,
      "step": 153289
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5183753967285156,
      "learning_rate": 0.00015110662361666062,
      "loss": 3.009,
      "step": 153290
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.49025821685791,
      "learning_rate": 0.00015110307243082453,
      "loss": 3.1217,
      "step": 153291
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.327122926712036,
      "learning_rate": 0.0001510995212726712,
      "loss": 3.1024,
      "step": 153292
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.682767152786255,
      "learning_rate": 0.00015109597014220113,
      "loss": 2.9327,
      "step": 153293
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0921804904937744,
      "learning_rate": 0.0001510924190394153,
      "loss": 2.975,
      "step": 153294
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.8861358165740967,
      "learning_rate": 0.00015108886796431407,
      "loss": 3.1757,
      "step": 153295
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.210894823074341,
      "learning_rate": 0.00015108531691689834,
      "loss": 2.8892,
      "step": 153296
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.2450344562530518,
      "learning_rate": 0.00015108176589716867,
      "loss": 2.9164,
      "step": 153297
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.2750704288482666,
      "learning_rate": 0.00015107821490512567,
      "loss": 3.1332,
      "step": 153298
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.8822641372680664,
      "learning_rate": 0.00015107466394077,
      "loss": 2.9663,
      "step": 153299
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3585360050201416,
      "learning_rate": 0.0001510711130041024,
      "loss": 3.1813,
      "step": 153300
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.996450424194336,
      "learning_rate": 0.00015106756209512344,
      "loss": 2.9433,
      "step": 153301
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.8488516807556152,
      "learning_rate": 0.0001510640112138339,
      "loss": 2.9047,
      "step": 153302
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.362098217010498,
      "learning_rate": 0.0001510604603602344,
      "loss": 2.7358,
      "step": 153303
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0508291721343994,
      "learning_rate": 0.00015105690953432557,
      "loss": 3.1179,
      "step": 153304
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.6780054569244385,
      "learning_rate": 0.0001510533587361079,
      "loss": 2.8095,
      "step": 153305
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.43156099319458,
      "learning_rate": 0.0001510498079655824,
      "loss": 3.0022,
      "step": 153306
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0247724056243896,
      "learning_rate": 0.00015104625722274942,
      "loss": 3.0162,
      "step": 153307
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5451791286468506,
      "learning_rate": 0.0001510427065076099,
      "loss": 3.123,
      "step": 153308
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3557469844818115,
      "learning_rate": 0.0001510391558201643,
      "loss": 2.9347,
      "step": 153309
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0981762409210205,
      "learning_rate": 0.00015103560516041336,
      "loss": 2.8015,
      "step": 153310
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.241729497909546,
      "learning_rate": 0.00015103205452835758,
      "loss": 2.8399,
      "step": 153311
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.354384422302246,
      "learning_rate": 0.0001510285039239979,
      "loss": 3.0707,
      "step": 153312
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0847325325012207,
      "learning_rate": 0.0001510249533473347,
      "loss": 2.8981,
      "step": 153313
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.12536883354187,
      "learning_rate": 0.00015102140279836887,
      "loss": 3.0441,
      "step": 153314
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.268293619155884,
      "learning_rate": 0.000151017852277101,
      "loss": 2.9799,
      "step": 153315
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2144269943237305,
      "learning_rate": 0.0001510143017835317,
      "loss": 3.0253,
      "step": 153316
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0723516941070557,
      "learning_rate": 0.00015101075131766155,
      "loss": 2.7346,
      "step": 153317
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.292053699493408,
      "learning_rate": 0.00015100720087949144,
      "loss": 3.0603,
      "step": 153318
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.154283285140991,
      "learning_rate": 0.00015100365046902178,
      "loss": 2.9659,
      "step": 153319
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2697293758392334,
      "learning_rate": 0.00015100010008625345,
      "loss": 2.7585,
      "step": 153320
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.506312131881714,
      "learning_rate": 0.00015099654973118706,
      "loss": 2.9373,
      "step": 153321
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.297813892364502,
      "learning_rate": 0.0001509929994038232,
      "loss": 3.1191,
      "step": 153322
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7148008346557617,
      "learning_rate": 0.0001509894491041624,
      "loss": 2.7792,
      "step": 153323
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3436529636383057,
      "learning_rate": 0.00015098589883220565,
      "loss": 3.0591,
      "step": 153324
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.149679183959961,
      "learning_rate": 0.0001509823485879533,
      "loss": 3.0065,
      "step": 153325
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.239063262939453,
      "learning_rate": 0.00015097879837140625,
      "loss": 3.0326,
      "step": 153326
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.225321054458618,
      "learning_rate": 0.00015097524818256505,
      "loss": 2.6859,
      "step": 153327
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.284255266189575,
      "learning_rate": 0.00015097169802143028,
      "loss": 2.9263,
      "step": 153328
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.979235887527466,
      "learning_rate": 0.00015096814788800278,
      "loss": 3.1238,
      "step": 153329
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.9847545623779297,
      "learning_rate": 0.00015096459778228308,
      "loss": 2.9796,
      "step": 153330
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1753385066986084,
      "learning_rate": 0.00015096104770427181,
      "loss": 3.0826,
      "step": 153331
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.404005527496338,
      "learning_rate": 0.00015095749765396975,
      "loss": 3.0143,
      "step": 153332
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5574944019317627,
      "learning_rate": 0.00015095394763137756,
      "loss": 2.9423,
      "step": 153333
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5050809383392334,
      "learning_rate": 0.00015095039763649573,
      "loss": 3.1928,
      "step": 153334
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3933234214782715,
      "learning_rate": 0.00015094684766932511,
      "loss": 3.0482,
      "step": 153335
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4396355152130127,
      "learning_rate": 0.00015094329772986632,
      "loss": 3.0908,
      "step": 153336
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.357931137084961,
      "learning_rate": 0.00015093974781811987,
      "loss": 2.7457,
      "step": 153337
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.493523120880127,
      "learning_rate": 0.00015093619793408663,
      "loss": 2.8879,
      "step": 153338
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.511190176010132,
      "learning_rate": 0.0001509326480777671,
      "loss": 2.9815,
      "step": 153339
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.080733299255371,
      "learning_rate": 0.000150929098249162,
      "loss": 3.0331,
      "step": 153340
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.589592695236206,
      "learning_rate": 0.00015092554844827212,
      "loss": 3.1125,
      "step": 153341
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2493460178375244,
      "learning_rate": 0.00015092199867509785,
      "loss": 3.0933,
      "step": 153342
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.2870378494262695,
      "learning_rate": 0.00015091844892964007,
      "loss": 2.9601,
      "step": 153343
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.044170618057251,
      "learning_rate": 0.0001509148992118994,
      "loss": 2.7805,
      "step": 153344
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.6114132404327393,
      "learning_rate": 0.00015091134952187633,
      "loss": 3.0735,
      "step": 153345
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.707266330718994,
      "learning_rate": 0.0001509077998595718,
      "loss": 3.0709,
      "step": 153346
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.0116779804229736,
      "learning_rate": 0.00015090425022498626,
      "loss": 2.8756,
      "step": 153347
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.646376371383667,
      "learning_rate": 0.00015090070061812033,
      "loss": 3.1128,
      "step": 153348
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.817859649658203,
      "learning_rate": 0.00015089715103897494,
      "loss": 2.8883,
      "step": 153349
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3846638202667236,
      "learning_rate": 0.00015089360148755045,
      "loss": 2.8296,
      "step": 153350
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.337500810623169,
      "learning_rate": 0.0001508900519638478,
      "loss": 2.801,
      "step": 153351
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.644775390625,
      "learning_rate": 0.00015088650246786745,
      "loss": 2.8276,
      "step": 153352
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3636085987091064,
      "learning_rate": 0.00015088295299961013,
      "loss": 2.9077,
      "step": 153353
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3010635375976562,
      "learning_rate": 0.00015087940355907638,
      "loss": 2.9509,
      "step": 153354
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.8449866771698,
      "learning_rate": 0.00015087585414626703,
      "loss": 2.7995,
      "step": 153355
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3968889713287354,
      "learning_rate": 0.0001508723047611826,
      "loss": 3.0877,
      "step": 153356
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4126904010772705,
      "learning_rate": 0.00015086875540382396,
      "loss": 2.8609,
      "step": 153357
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.9803818464279175,
      "learning_rate": 0.0001508652060741916,
      "loss": 2.9806,
      "step": 153358
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.028406858444214,
      "learning_rate": 0.00015086165677228626,
      "loss": 3.0125,
      "step": 153359
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.129640579223633,
      "learning_rate": 0.00015085810749810837,
      "loss": 2.803,
      "step": 153360
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7844254970550537,
      "learning_rate": 0.00015085455825165892,
      "loss": 2.9643,
      "step": 153361
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7051162719726562,
      "learning_rate": 0.0001508510090329383,
      "loss": 2.8774,
      "step": 153362
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.285391330718994,
      "learning_rate": 0.00015084745984194744,
      "loss": 2.8949,
      "step": 153363
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.991546869277954,
      "learning_rate": 0.00015084391067868684,
      "loss": 2.9415,
      "step": 153364
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.260387659072876,
      "learning_rate": 0.00015084036154315713,
      "loss": 2.8753,
      "step": 153365
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.441391944885254,
      "learning_rate": 0.00015083681243535893,
      "loss": 2.7736,
      "step": 153366
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.640913724899292,
      "learning_rate": 0.0001508332633552931,
      "loss": 3.1146,
      "step": 153367
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7847416400909424,
      "learning_rate": 0.00015082971430296007,
      "loss": 2.8649,
      "step": 153368
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.436872720718384,
      "learning_rate": 0.00015082616527836072,
      "loss": 2.9297,
      "step": 153369
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.981523036956787,
      "learning_rate": 0.00015082261628149562,
      "loss": 3.3788,
      "step": 153370
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.353428602218628,
      "learning_rate": 0.00015081906731236538,
      "loss": 2.7086,
      "step": 153371
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.09243106842041,
      "learning_rate": 0.0001508155183709706,
      "loss": 2.9562,
      "step": 153372
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3914284706115723,
      "learning_rate": 0.00015081196945731213,
      "loss": 3.0782,
      "step": 153373
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.1255078315734863,
      "learning_rate": 0.0001508084205713904,
      "loss": 2.8164,
      "step": 153374
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.9707131385803223,
      "learning_rate": 0.00015080487171320635,
      "loss": 2.9578,
      "step": 153375
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1036007404327393,
      "learning_rate": 0.0001508013228827605,
      "loss": 3.1391,
      "step": 153376
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.9793826341629028,
      "learning_rate": 0.00015079777408005346,
      "loss": 2.9015,
      "step": 153377
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.0081021785736084,
      "learning_rate": 0.00015079422530508586,
      "loss": 3.0017,
      "step": 153378
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.789485216140747,
      "learning_rate": 0.0001507906765578585,
      "loss": 3.024,
      "step": 153379
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2828409671783447,
      "learning_rate": 0.00015078712783837188,
      "loss": 2.9236,
      "step": 153380
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.16584849357605,
      "learning_rate": 0.00015078357914662685,
      "loss": 2.9271,
      "step": 153381
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.8909249305725098,
      "learning_rate": 0.00015078003048262398,
      "loss": 2.883,
      "step": 153382
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3342127799987793,
      "learning_rate": 0.00015077648184636392,
      "loss": 3.0047,
      "step": 153383
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4487390518188477,
      "learning_rate": 0.00015077293323784722,
      "loss": 2.9118,
      "step": 153384
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4737703800201416,
      "learning_rate": 0.00015076938465707475,
      "loss": 2.8471,
      "step": 153385
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.9040590524673462,
      "learning_rate": 0.00015076583610404694,
      "loss": 2.8313,
      "step": 153386
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.429111957550049,
      "learning_rate": 0.0001507622875787647,
      "loss": 3.1071,
      "step": 153387
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.3179357051849365,
      "learning_rate": 0.00015075873908122861,
      "loss": 2.8634,
      "step": 153388
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3661160469055176,
      "learning_rate": 0.00015075519061143923,
      "loss": 2.8837,
      "step": 153389
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2018165588378906,
      "learning_rate": 0.00015075164216939721,
      "loss": 2.9319,
      "step": 153390
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.397578716278076,
      "learning_rate": 0.00015074809375510337,
      "loss": 3.052,
      "step": 153391
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.3882861137390137,
      "learning_rate": 0.00015074454536855817,
      "loss": 2.8773,
      "step": 153392
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0885093212127686,
      "learning_rate": 0.00015074099700976247,
      "loss": 2.9876,
      "step": 153393
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2465903759002686,
      "learning_rate": 0.00015073744867871686,
      "loss": 2.8875,
      "step": 153394
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1262929439544678,
      "learning_rate": 0.00015073390037542193,
      "loss": 2.9053,
      "step": 153395
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.3216917514801025,
      "learning_rate": 0.00015073035209987835,
      "loss": 2.9825,
      "step": 153396
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.645015239715576,
      "learning_rate": 0.00015072680385208687,
      "loss": 3.0733,
      "step": 153397
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7177937030792236,
      "learning_rate": 0.000150723255632048,
      "loss": 3.2193,
      "step": 153398
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.684760093688965,
      "learning_rate": 0.0001507197074397626,
      "loss": 3.0409,
      "step": 153399
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2392609119415283,
      "learning_rate": 0.00015071615927523123,
      "loss": 2.6856,
      "step": 153400
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3427622318267822,
      "learning_rate": 0.00015071261113845457,
      "loss": 2.8777,
      "step": 153401
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.361760139465332,
      "learning_rate": 0.0001507090630294331,
      "loss": 2.9899,
      "step": 153402
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2118895053863525,
      "learning_rate": 0.0001507055149481678,
      "loss": 2.8624,
      "step": 153403
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.172194242477417,
      "learning_rate": 0.00015070196689465898,
      "loss": 3.0851,
      "step": 153404
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7719836235046387,
      "learning_rate": 0.00015069841886890763,
      "loss": 3.0702,
      "step": 153405
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2723329067230225,
      "learning_rate": 0.0001506948708709141,
      "loss": 3.1676,
      "step": 153406
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4124794006347656,
      "learning_rate": 0.0001506913229006795,
      "loss": 2.889,
      "step": 153407
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2670135498046875,
      "learning_rate": 0.00015068777495820396,
      "loss": 3.0035,
      "step": 153408
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.8232219219207764,
      "learning_rate": 0.00015068422704348847,
      "loss": 3.2464,
      "step": 153409
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.220195770263672,
      "learning_rate": 0.00015068067915653352,
      "loss": 3.1244,
      "step": 153410
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.1088993549346924,
      "learning_rate": 0.00015067713129733996,
      "loss": 3.0095,
      "step": 153411
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7290990352630615,
      "learning_rate": 0.00015067358346590825,
      "loss": 2.9209,
      "step": 153412
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2847421169281006,
      "learning_rate": 0.0001506700356622392,
      "loss": 2.9759,
      "step": 153413
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1327121257781982,
      "learning_rate": 0.00015066648788633345,
      "loss": 2.8468,
      "step": 153414
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2444071769714355,
      "learning_rate": 0.00015066294013819164,
      "loss": 2.9378,
      "step": 153415
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.168133497238159,
      "learning_rate": 0.0001506593924178142,
      "loss": 2.8532,
      "step": 153416
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5870742797851562,
      "learning_rate": 0.0001506558447252022,
      "loss": 3.125,
      "step": 153417
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3125853538513184,
      "learning_rate": 0.00015065229706035592,
      "loss": 3.0847,
      "step": 153418
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.280423641204834,
      "learning_rate": 0.00015064874942327634,
      "loss": 2.8821,
      "step": 153419
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.8350846767425537,
      "learning_rate": 0.00015064520181396398,
      "loss": 2.8875,
      "step": 153420
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2945873737335205,
      "learning_rate": 0.00015064165423241948,
      "loss": 2.8828,
      "step": 153421
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5440433025360107,
      "learning_rate": 0.00015063810667864346,
      "loss": 3.0837,
      "step": 153422
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2504866123199463,
      "learning_rate": 0.0001506345591526367,
      "loss": 2.8914,
      "step": 153423
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.421891212463379,
      "learning_rate": 0.0001506310116543997,
      "loss": 3.0452,
      "step": 153424
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3927791118621826,
      "learning_rate": 0.0001506274641839333,
      "loss": 2.8933,
      "step": 153425
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.8643827438354492,
      "learning_rate": 0.00015062391674123808,
      "loss": 3.0702,
      "step": 153426
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.725749969482422,
      "learning_rate": 0.00015062036932631458,
      "loss": 3.3085,
      "step": 153427
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2295939922332764,
      "learning_rate": 0.0001506168219391637,
      "loss": 2.8223,
      "step": 153428
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4744315147399902,
      "learning_rate": 0.00015061327457978598,
      "loss": 2.8687,
      "step": 153429
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4872477054595947,
      "learning_rate": 0.00015060972724818196,
      "loss": 2.9283,
      "step": 153430
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.313077211380005,
      "learning_rate": 0.0001506061799443525,
      "loss": 2.9448,
      "step": 153431
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.757840156555176,
      "learning_rate": 0.0001506026326682982,
      "loss": 2.6685,
      "step": 153432
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.339578628540039,
      "learning_rate": 0.00015059908542001957,
      "loss": 3.0247,
      "step": 153433
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1184780597686768,
      "learning_rate": 0.0001505955381995175,
      "loss": 3.1209,
      "step": 153434
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.794340133666992,
      "learning_rate": 0.0001505919910067924,
      "loss": 2.9451,
      "step": 153435
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4096298217773438,
      "learning_rate": 0.0001505884438418452,
      "loss": 2.8491,
      "step": 153436
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5214970111846924,
      "learning_rate": 0.00015058489670467646,
      "loss": 2.8243,
      "step": 153437
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4739809036254883,
      "learning_rate": 0.00015058134959528677,
      "loss": 2.6724,
      "step": 153438
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.244884967803955,
      "learning_rate": 0.0001505778025136767,
      "loss": 2.7888,
      "step": 153439
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4717328548431396,
      "learning_rate": 0.0001505742554598472,
      "loss": 2.891,
      "step": 153440
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.194833278656006,
      "learning_rate": 0.00015057070843379862,
      "loss": 2.9046,
      "step": 153441
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.686657428741455,
      "learning_rate": 0.0001505671614355319,
      "loss": 2.9453,
      "step": 153442
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4276657104492188,
      "learning_rate": 0.00015056361446504752,
      "loss": 2.8526,
      "step": 153443
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.10090970993042,
      "learning_rate": 0.00015056006752234624,
      "loss": 2.9014,
      "step": 153444
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.333115577697754,
      "learning_rate": 0.00015055652060742855,
      "loss": 2.8854,
      "step": 153445
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.3239715099334717,
      "learning_rate": 0.0001505529737202953,
      "loss": 2.8528,
      "step": 153446
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4028220176696777,
      "learning_rate": 0.00015054942686094696,
      "loss": 2.8671,
      "step": 153447
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.9744298458099365,
      "learning_rate": 0.00015054588002938446,
      "loss": 2.8881,
      "step": 153448
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.494593620300293,
      "learning_rate": 0.00015054233322560825,
      "loss": 3.108,
      "step": 153449
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.383885383605957,
      "learning_rate": 0.00015053878644961908,
      "loss": 2.95,
      "step": 153450
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.024456739425659,
      "learning_rate": 0.00015053523970141744,
      "loss": 2.856,
      "step": 153451
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.330524444580078,
      "learning_rate": 0.00015053169298100426,
      "loss": 2.6396,
      "step": 153452
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.0916759967803955,
      "learning_rate": 0.00015052814628837993,
      "loss": 2.7285,
      "step": 153453
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7948763370513916,
      "learning_rate": 0.0001505245996235453,
      "loss": 2.9862,
      "step": 153454
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5403764247894287,
      "learning_rate": 0.00015052105298650107,
      "loss": 2.911,
      "step": 153455
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.975539207458496,
      "learning_rate": 0.00015051750637724772,
      "loss": 2.7536,
      "step": 153456
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4660325050354004,
      "learning_rate": 0.00015051395979578586,
      "loss": 2.9943,
      "step": 153457
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.396230459213257,
      "learning_rate": 0.00015051041324211646,
      "loss": 2.8883,
      "step": 153458
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.785799503326416,
      "learning_rate": 0.00015050686671623986,
      "loss": 2.8325,
      "step": 153459
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0692567825317383,
      "learning_rate": 0.00015050332021815694,
      "loss": 2.8599,
      "step": 153460
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5682332515716553,
      "learning_rate": 0.0001504997737478683,
      "loss": 3.1329,
      "step": 153461
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.75313401222229,
      "learning_rate": 0.00015049622730537457,
      "loss": 2.8887,
      "step": 153462
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.445819854736328,
      "learning_rate": 0.0001504926808906763,
      "loss": 3.2179,
      "step": 153463
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.495249032974243,
      "learning_rate": 0.00015048913450377437,
      "loss": 2.9354,
      "step": 153464
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5492372512817383,
      "learning_rate": 0.0001504855881446692,
      "loss": 2.6965,
      "step": 153465
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.6317574977874756,
      "learning_rate": 0.00015048204181336173,
      "loss": 2.9603,
      "step": 153466
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.1506171226501465,
      "learning_rate": 0.00015047849550985247,
      "loss": 3.1848,
      "step": 153467
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7736876010894775,
      "learning_rate": 0.00015047494923414205,
      "loss": 2.9834,
      "step": 153468
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.857858419418335,
      "learning_rate": 0.00015047140298623106,
      "loss": 2.5794,
      "step": 153469
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5436809062957764,
      "learning_rate": 0.00015046785676612036,
      "loss": 2.8207,
      "step": 153470
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.332902193069458,
      "learning_rate": 0.00015046431057381043,
      "loss": 2.9363,
      "step": 153471
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.803255796432495,
      "learning_rate": 0.00015046076440930213,
      "loss": 3.1293,
      "step": 153472
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.860100746154785,
      "learning_rate": 0.00015045721827259585,
      "loss": 3.1301,
      "step": 153473
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0112497806549072,
      "learning_rate": 0.0001504536721636926,
      "loss": 3.2912,
      "step": 153474
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2699034214019775,
      "learning_rate": 0.00015045012608259263,
      "loss": 3.0528,
      "step": 153475
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1152100563049316,
      "learning_rate": 0.00015044658002929693,
      "loss": 2.9964,
      "step": 153476
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.897981643676758,
      "learning_rate": 0.00015044303400380592,
      "loss": 2.7451,
      "step": 153477
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.0849997997283936,
      "learning_rate": 0.00015043948800612047,
      "loss": 2.8743,
      "step": 153478
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.205268621444702,
      "learning_rate": 0.00015043594203624107,
      "loss": 2.7152,
      "step": 153479
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.326970338821411,
      "learning_rate": 0.00015043239609416863,
      "loss": 3.0333,
      "step": 153480
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.1273162364959717,
      "learning_rate": 0.0001504288501799034,
      "loss": 2.9259,
      "step": 153481
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.6469926834106445,
      "learning_rate": 0.0001504253042934464,
      "loss": 3.0162,
      "step": 153482
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3748364448547363,
      "learning_rate": 0.00015042175843479807,
      "loss": 2.7503,
      "step": 153483
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0466108322143555,
      "learning_rate": 0.00015041821260395927,
      "loss": 2.7524,
      "step": 153484
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4693708419799805,
      "learning_rate": 0.00015041466680093042,
      "loss": 3.2602,
      "step": 153485
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7615230083465576,
      "learning_rate": 0.00015041112102571254,
      "loss": 3.0803,
      "step": 153486
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.9935665130615234,
      "learning_rate": 0.00015040757527830578,
      "loss": 2.8464,
      "step": 153487
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.1322286128997803,
      "learning_rate": 0.00015040402955871125,
      "loss": 2.8258,
      "step": 153488
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7244958877563477,
      "learning_rate": 0.0001504004838669293,
      "loss": 3.0421,
      "step": 153489
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.670971393585205,
      "learning_rate": 0.00015039693820296084,
      "loss": 3.0117,
      "step": 153490
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.982330799102783,
      "learning_rate": 0.0001503933925668063,
      "loss": 3.0023,
      "step": 153491
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.4337973594665527,
      "learning_rate": 0.0001503898469584667,
      "loss": 3.1488,
      "step": 153492
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.2893083095550537,
      "learning_rate": 0.00015038630137794218,
      "loss": 3.1211,
      "step": 153493
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2686867713928223,
      "learning_rate": 0.00015038275582523382,
      "loss": 3.1157,
      "step": 153494
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5807127952575684,
      "learning_rate": 0.000150379210300342,
      "loss": 2.9076,
      "step": 153495
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.9097750186920166,
      "learning_rate": 0.00015037566480326762,
      "loss": 2.8661,
      "step": 153496
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.322507381439209,
      "learning_rate": 0.0001503721193340111,
      "loss": 2.9064,
      "step": 153497
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4521868228912354,
      "learning_rate": 0.0001503685738925734,
      "loss": 2.8813,
      "step": 153498
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1723105907440186,
      "learning_rate": 0.00015036502847895494,
      "loss": 2.9413,
      "step": 153499
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0954222679138184,
      "learning_rate": 0.0001503614830931565,
      "loss": 2.9921,
      "step": 153500
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.278982639312744,
      "learning_rate": 0.0001503579377351785,
      "loss": 3.1674,
      "step": 153501
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.593451499938965,
      "learning_rate": 0.00015035439240502197,
      "loss": 3.1901,
      "step": 153502
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.5508272647857666,
      "learning_rate": 0.00015035084710268725,
      "loss": 2.9948,
      "step": 153503
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.618481159210205,
      "learning_rate": 0.00015034730182817525,
      "loss": 3.0038,
      "step": 153504
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.685822010040283,
      "learning_rate": 0.0001503437565814865,
      "loss": 2.9182,
      "step": 153505
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2626335620880127,
      "learning_rate": 0.00015034021136262166,
      "loss": 2.787,
      "step": 153506
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.717898845672607,
      "learning_rate": 0.0001503366661715813,
      "loss": 3.0347,
      "step": 153507
    },
    {
      "epoch": 2.0,
      "grad_norm": 5.605600357055664,
      "learning_rate": 0.0001503331210083663,
      "loss": 2.8971,
      "step": 153508
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.577469825744629,
      "learning_rate": 0.00015032957587297706,
      "loss": 2.7263,
      "step": 153509
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.140202522277832,
      "learning_rate": 0.00015032603076541446,
      "loss": 3.1958,
      "step": 153510
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2269465923309326,
      "learning_rate": 0.00015032248568567916,
      "loss": 3.0168,
      "step": 153511
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.0038669109344482,
      "learning_rate": 0.0001503189406337716,
      "loss": 2.9987,
      "step": 153512
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.361398220062256,
      "learning_rate": 0.00015031539560969267,
      "loss": 3.0614,
      "step": 153513
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.6945245265960693,
      "learning_rate": 0.00015031185061344291,
      "loss": 3.0246,
      "step": 153514
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.186350107192993,
      "learning_rate": 0.0001503083056450229,
      "loss": 3.1633,
      "step": 153515
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2262370586395264,
      "learning_rate": 0.00015030476070443356,
      "loss": 2.9624,
      "step": 153516
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.379322052001953,
      "learning_rate": 0.00015030121579167536,
      "loss": 2.9235,
      "step": 153517
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1237056255340576,
      "learning_rate": 0.00015029767090674886,
      "loss": 2.9623,
      "step": 153518
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.6266884803771973,
      "learning_rate": 0.00015029412604965496,
      "loss": 2.8537,
      "step": 153519
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0871524810791016,
      "learning_rate": 0.00015029058122039424,
      "loss": 3.1178,
      "step": 153520
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.485304117202759,
      "learning_rate": 0.00015028703641896719,
      "loss": 3.0121,
      "step": 153521
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2049400806427,
      "learning_rate": 0.00015028349164537472,
      "loss": 3.0746,
      "step": 153522
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7106428146362305,
      "learning_rate": 0.0001502799468996174,
      "loss": 2.8056,
      "step": 153523
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.341611862182617,
      "learning_rate": 0.00015027640218169574,
      "loss": 2.9468,
      "step": 153524
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7123281955718994,
      "learning_rate": 0.0001502728574916106,
      "loss": 3.0474,
      "step": 153525
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2465713024139404,
      "learning_rate": 0.0001502693128293625,
      "loss": 3.0438,
      "step": 153526
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.438553810119629,
      "learning_rate": 0.00015026576819495224,
      "loss": 2.7776,
      "step": 153527
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3259518146514893,
      "learning_rate": 0.00015026222358838044,
      "loss": 2.9941,
      "step": 153528
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.4403679370880127,
      "learning_rate": 0.0001502586790096477,
      "loss": 3.2677,
      "step": 153529
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.283243179321289,
      "learning_rate": 0.00015025513445875456,
      "loss": 2.8752,
      "step": 153530
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.101396322250366,
      "learning_rate": 0.00015025158993570191,
      "loss": 3.0274,
      "step": 153531
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7858481407165527,
      "learning_rate": 0.00015024804544049025,
      "loss": 3.1204,
      "step": 153532
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.6480112075805664,
      "learning_rate": 0.00015024450097312044,
      "loss": 3.1375,
      "step": 153533
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.203443765640259,
      "learning_rate": 0.00015024095653359297,
      "loss": 2.8614,
      "step": 153534
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.087207317352295,
      "learning_rate": 0.00015023741212190854,
      "loss": 3.2008,
      "step": 153535
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.081472873687744,
      "learning_rate": 0.00015023386773806768,
      "loss": 2.7572,
      "step": 153536
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.756190299987793,
      "learning_rate": 0.0001502303233820713,
      "loss": 2.9618,
      "step": 153537
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.625103235244751,
      "learning_rate": 0.00015022677905391979,
      "loss": 2.9994,
      "step": 153538
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.0131280422210693,
      "learning_rate": 0.00015022323475361406,
      "loss": 2.8424,
      "step": 153539
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.589143991470337,
      "learning_rate": 0.0001502196904811546,
      "loss": 3.2294,
      "step": 153540
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.345747947692871,
      "learning_rate": 0.00015021614623654233,
      "loss": 3.1571,
      "step": 153541
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.45390248298645,
      "learning_rate": 0.00015021260201977744,
      "loss": 2.8962,
      "step": 153542
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.538731813430786,
      "learning_rate": 0.000150209057830861,
      "loss": 2.9895,
      "step": 153543
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.040846824645996,
      "learning_rate": 0.0001502055136697934,
      "loss": 2.839,
      "step": 153544
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5818638801574707,
      "learning_rate": 0.00015020196953657552,
      "loss": 2.7749,
      "step": 153545
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3990955352783203,
      "learning_rate": 0.00015019842543120785,
      "loss": 3.0173,
      "step": 153546
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.441904306411743,
      "learning_rate": 0.00015019488135369131,
      "loss": 2.8394,
      "step": 153547
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4900996685028076,
      "learning_rate": 0.00015019133730402614,
      "loss": 2.8854,
      "step": 153548
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3577613830566406,
      "learning_rate": 0.0001501877932822134,
      "loss": 3.2068,
      "step": 153549
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2213869094848633,
      "learning_rate": 0.00015018424928825344,
      "loss": 2.8338,
      "step": 153550
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.203746795654297,
      "learning_rate": 0.00015018070532214713,
      "loss": 3.0186,
      "step": 153551
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.797285556793213,
      "learning_rate": 0.00015017716138389497,
      "loss": 2.6834,
      "step": 153552
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.7966673374176025,
      "learning_rate": 0.00015017361747349792,
      "loss": 2.9895,
      "step": 153553
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4535956382751465,
      "learning_rate": 0.0001501700735909562,
      "loss": 2.8197,
      "step": 153554
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5984084606170654,
      "learning_rate": 0.0001501665297362708,
      "loss": 2.9325,
      "step": 153555
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.232062816619873,
      "learning_rate": 0.00015016298590944216,
      "loss": 2.8303,
      "step": 153556
    },
    {
      "epoch": 2.0,
      "grad_norm": 6.458110332489014,
      "learning_rate": 0.0001501594421104712,
      "loss": 3.0588,
      "step": 153557
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.033995151519775,
      "learning_rate": 0.00015015589833935824,
      "loss": 2.8566,
      "step": 153558
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.415771961212158,
      "learning_rate": 0.0001501523545961044,
      "loss": 2.9207,
      "step": 153559
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3675153255462646,
      "learning_rate": 0.00015014881088070984,
      "loss": 2.995,
      "step": 153560
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.186725616455078,
      "learning_rate": 0.00015014526719317556,
      "loss": 3.0319,
      "step": 153561
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.2610843181610107,
      "learning_rate": 0.00015014172353350195,
      "loss": 2.9829,
      "step": 153562
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.487881660461426,
      "learning_rate": 0.00015013817990168997,
      "loss": 2.8198,
      "step": 153563
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2263522148132324,
      "learning_rate": 0.00015013463629774004,
      "loss": 2.8775,
      "step": 153564
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5650339126586914,
      "learning_rate": 0.00015013109272165313,
      "loss": 3.0323,
      "step": 153565
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1832637786865234,
      "learning_rate": 0.00015012754917342942,
      "loss": 3.0089,
      "step": 153566
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.481724977493286,
      "learning_rate": 0.00015012400565306993,
      "loss": 2.9302,
      "step": 153567
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.435950517654419,
      "learning_rate": 0.00015012046216057514,
      "loss": 3.1848,
      "step": 153568
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.480618715286255,
      "learning_rate": 0.0001501169186959459,
      "loss": 2.962,
      "step": 153569
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.114506483078003,
      "learning_rate": 0.00015011337525918262,
      "loss": 2.9151,
      "step": 153570
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.6632306575775146,
      "learning_rate": 0.00015010983185028635,
      "loss": 3.1991,
      "step": 153571
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.54081392288208,
      "learning_rate": 0.00015010628846925722,
      "loss": 2.7545,
      "step": 153572
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.0705628395080566,
      "learning_rate": 0.00015010274511609629,
      "loss": 2.7614,
      "step": 153573
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.722641706466675,
      "learning_rate": 0.00015009920179080396,
      "loss": 2.8048,
      "step": 153574
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4477782249450684,
      "learning_rate": 0.00015009565849338115,
      "loss": 2.7983,
      "step": 153575
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.433776378631592,
      "learning_rate": 0.0001500921152238283,
      "loss": 2.6534,
      "step": 153576
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.621924638748169,
      "learning_rate": 0.00015008857198214637,
      "loss": 2.966,
      "step": 153577
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4574332237243652,
      "learning_rate": 0.00015008502876833556,
      "loss": 2.7895,
      "step": 153578
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.262545108795166,
      "learning_rate": 0.00015008148558239687,
      "loss": 3.0165,
      "step": 153579
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2406487464904785,
      "learning_rate": 0.0001500779424243308,
      "loss": 3.0349,
      "step": 153580
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.332435131072998,
      "learning_rate": 0.00015007439929413816,
      "loss": 2.9816,
      "step": 153581
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2380917072296143,
      "learning_rate": 0.00015007085619181938,
      "loss": 2.6017,
      "step": 153582
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.493908166885376,
      "learning_rate": 0.00015006731311737552,
      "loss": 2.9062,
      "step": 153583
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.356990098953247,
      "learning_rate": 0.0001500637700708067,
      "loss": 2.8867,
      "step": 153584
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.517458915710449,
      "learning_rate": 0.00015006022705211402,
      "loss": 3.3005,
      "step": 153585
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3112194538116455,
      "learning_rate": 0.00015005668406129787,
      "loss": 3.0808,
      "step": 153586
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4259867668151855,
      "learning_rate": 0.0001500531410983591,
      "loss": 2.8945,
      "step": 153587
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.428662061691284,
      "learning_rate": 0.00015004959816329816,
      "loss": 2.9709,
      "step": 153588
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1443350315093994,
      "learning_rate": 0.00015004605525611596,
      "loss": 3.0215,
      "step": 153589
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2482497692108154,
      "learning_rate": 0.00015004251237681304,
      "loss": 2.761,
      "step": 153590
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.7266087532043457,
      "learning_rate": 0.00015003896952539,
      "loss": 2.6838,
      "step": 153591
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5070676803588867,
      "learning_rate": 0.00015003542670184746,
      "loss": 2.8892,
      "step": 153592
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4992196559906006,
      "learning_rate": 0.0001500318839061863,
      "loss": 2.7678,
      "step": 153593
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.455965042114258,
      "learning_rate": 0.0001500283411384069,
      "loss": 3.0997,
      "step": 153594
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1200921535491943,
      "learning_rate": 0.00015002479839851019,
      "loss": 3.1032,
      "step": 153595
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.309593677520752,
      "learning_rate": 0.0001500212556864967,
      "loss": 3.0799,
      "step": 153596
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.6538949012756348,
      "learning_rate": 0.00015001771300236698,
      "loss": 2.9323,
      "step": 153597
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.145353078842163,
      "learning_rate": 0.00015001417034612188,
      "loss": 2.8419,
      "step": 153598
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4218788146972656,
      "learning_rate": 0.00015001062771776203,
      "loss": 2.8204,
      "step": 153599
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.270828723907471,
      "learning_rate": 0.00015000708511728788,
      "loss": 2.6955,
      "step": 153600
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.533149480819702,
      "learning_rate": 0.00015000354254470036,
      "loss": 3.0082,
      "step": 153601
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.6298038959503174,
      "learning_rate": 0.00015000000000000004,
      "loss": 2.5313,
      "step": 153602
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1613364219665527,
      "learning_rate": 0.00014999645748318743,
      "loss": 2.952,
      "step": 153603
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.222414255142212,
      "learning_rate": 0.00014999291499426343,
      "loss": 2.9519,
      "step": 153604
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.322732448577881,
      "learning_rate": 0.00014998937253322855,
      "loss": 2.9619,
      "step": 153605
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.526658773422241,
      "learning_rate": 0.0001499858301000834,
      "loss": 2.9969,
      "step": 153606
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5617835521698,
      "learning_rate": 0.0001499822876948288,
      "loss": 2.9273,
      "step": 153607
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3768820762634277,
      "learning_rate": 0.00014997874531746532,
      "loss": 2.945,
      "step": 153608
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.43166446685791,
      "learning_rate": 0.00014997520296799354,
      "loss": 2.7626,
      "step": 153609
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.265106678009033,
      "learning_rate": 0.00014997166064641427,
      "loss": 2.8636,
      "step": 153610
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2552273273468018,
      "learning_rate": 0.00014996811835272803,
      "loss": 2.9957,
      "step": 153611
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4588119983673096,
      "learning_rate": 0.00014996457608693563,
      "loss": 2.8445,
      "step": 153612
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.3588979244232178,
      "learning_rate": 0.00014996103384903766,
      "loss": 2.9024,
      "step": 153613
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.918441653251648,
      "learning_rate": 0.00014995749163903478,
      "loss": 3.0616,
      "step": 153614
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2773876190185547,
      "learning_rate": 0.00014995394945692753,
      "loss": 3.2966,
      "step": 153615
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3223559856414795,
      "learning_rate": 0.00014995040730271677,
      "loss": 2.9314,
      "step": 153616
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3502912521362305,
      "learning_rate": 0.00014994686517640296,
      "loss": 2.8127,
      "step": 153617
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.8092103004455566,
      "learning_rate": 0.00014994332307798695,
      "loss": 3.0522,
      "step": 153618
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2160518169403076,
      "learning_rate": 0.0001499397810074693,
      "loss": 2.844,
      "step": 153619
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.577904224395752,
      "learning_rate": 0.0001499362389648507,
      "loss": 2.943,
      "step": 153620
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.228977680206299,
      "learning_rate": 0.0001499326969501317,
      "loss": 3.055,
      "step": 153621
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3205580711364746,
      "learning_rate": 0.0001499291549633131,
      "loss": 2.9288,
      "step": 153622
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5689704418182373,
      "learning_rate": 0.0001499256130043954,
      "loss": 3.1028,
      "step": 153623
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.637803554534912,
      "learning_rate": 0.00014992207107337953,
      "loss": 2.9035,
      "step": 153624
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.326117753982544,
      "learning_rate": 0.00014991852917026582,
      "loss": 2.8544,
      "step": 153625
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.189612865447998,
      "learning_rate": 0.0001499149872950553,
      "loss": 2.8446,
      "step": 153626
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.467393159866333,
      "learning_rate": 0.00014991144544774824,
      "loss": 3.0856,
      "step": 153627
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.137092113494873,
      "learning_rate": 0.00014990790362834556,
      "loss": 2.7403,
      "step": 153628
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0199410915374756,
      "learning_rate": 0.0001499043618368477,
      "loss": 2.8689,
      "step": 153629
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.563048839569092,
      "learning_rate": 0.0001499008200732556,
      "loss": 2.8153,
      "step": 153630
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5407207012176514,
      "learning_rate": 0.00014989727833756965,
      "loss": 2.978,
      "step": 153631
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2739531993865967,
      "learning_rate": 0.00014989373662979084,
      "loss": 2.8847,
      "step": 153632
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5403740406036377,
      "learning_rate": 0.0001498901949499194,
      "loss": 2.9937,
      "step": 153633
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.372135877609253,
      "learning_rate": 0.00014988665329795635,
      "loss": 3.0098,
      "step": 153634
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0877439975738525,
      "learning_rate": 0.00014988311167390205,
      "loss": 3.2477,
      "step": 153635
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.98441743850708,
      "learning_rate": 0.00014987957007775744,
      "loss": 2.606,
      "step": 153636
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.128026247024536,
      "learning_rate": 0.0001498760285095229,
      "loss": 2.8407,
      "step": 153637
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.9893593788146973,
      "learning_rate": 0.00014987248696919953,
      "loss": 3.0259,
      "step": 153638
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.155045747756958,
      "learning_rate": 0.00014986894545678745,
      "loss": 2.8596,
      "step": 153639
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3533737659454346,
      "learning_rate": 0.0001498654039722877,
      "loss": 2.9521,
      "step": 153640
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.196948528289795,
      "learning_rate": 0.00014986186251570065,
      "loss": 2.9098,
      "step": 153641
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3579931259155273,
      "learning_rate": 0.00014985832108702725,
      "loss": 2.8101,
      "step": 153642
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.874545097351074,
      "learning_rate": 0.0001498547796862679,
      "loss": 2.9385,
      "step": 153643
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.674408197402954,
      "learning_rate": 0.0001498512383134236,
      "loss": 2.818,
      "step": 153644
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.8392038345336914,
      "learning_rate": 0.0001498476969684946,
      "loss": 3.0803,
      "step": 153645
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.078028917312622,
      "learning_rate": 0.00014984415565148183,
      "loss": 2.8607,
      "step": 153646
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2930424213409424,
      "learning_rate": 0.00014984061436238577,
      "loss": 2.9703,
      "step": 153647
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.739656448364258,
      "learning_rate": 0.00014983707310120727,
      "loss": 2.9616,
      "step": 153648
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.5525455474853516,
      "learning_rate": 0.00014983353186794678,
      "loss": 3.0409,
      "step": 153649
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.985199213027954,
      "learning_rate": 0.0001498299906626053,
      "loss": 2.7846,
      "step": 153650
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0247867107391357,
      "learning_rate": 0.00014982644948518305,
      "loss": 3.1534,
      "step": 153651
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.485769033432007,
      "learning_rate": 0.00014982290833568098,
      "loss": 3.0001,
      "step": 153652
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5965957641601562,
      "learning_rate": 0.0001498193672140996,
      "loss": 2.9004,
      "step": 153653
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.300814628601074,
      "learning_rate": 0.0001498158261204397,
      "loss": 3.2058,
      "step": 153654
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.822521924972534,
      "learning_rate": 0.00014981228505470178,
      "loss": 2.9306,
      "step": 153655
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.809117317199707,
      "learning_rate": 0.00014980874401688684,
      "loss": 3.0509,
      "step": 153656
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.244462728500366,
      "learning_rate": 0.000149805203006995,
      "loss": 3.0271,
      "step": 153657
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.820641279220581,
      "learning_rate": 0.00014980166202502738,
      "loss": 2.7139,
      "step": 153658
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2592318058013916,
      "learning_rate": 0.00014979812107098432,
      "loss": 3.037,
      "step": 153659
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.703828811645508,
      "learning_rate": 0.00014979458014486674,
      "loss": 3.0751,
      "step": 153660
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.978931188583374,
      "learning_rate": 0.00014979103924667509,
      "loss": 2.8528,
      "step": 153661
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.5363175868988037,
      "learning_rate": 0.00014978749837641034,
      "loss": 2.9539,
      "step": 153662
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.323974609375,
      "learning_rate": 0.0001497839575340727,
      "loss": 2.95,
      "step": 153663
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.509571075439453,
      "learning_rate": 0.0001497804167196632,
      "loss": 2.8345,
      "step": 153664
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1957454681396484,
      "learning_rate": 0.0001497768759331822,
      "loss": 2.9133,
      "step": 153665
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.9579143524169922,
      "learning_rate": 0.00014977333517463066,
      "loss": 3.0641,
      "step": 153666
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.9176712036132812,
      "learning_rate": 0.0001497697944440089,
      "loss": 2.8728,
      "step": 153667
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.3031115531921387,
      "learning_rate": 0.00014976625374131812,
      "loss": 3.0229,
      "step": 153668
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7234506607055664,
      "learning_rate": 0.00014976271306655834,
      "loss": 2.9751,
      "step": 153669
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.620370626449585,
      "learning_rate": 0.0001497591724197306,
      "loss": 3.1073,
      "step": 153670
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.277822494506836,
      "learning_rate": 0.00014975563180083537,
      "loss": 2.9374,
      "step": 153671
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1867380142211914,
      "learning_rate": 0.00014975209120987351,
      "loss": 2.94,
      "step": 153672
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2727839946746826,
      "learning_rate": 0.00014974855064684548,
      "loss": 2.9098,
      "step": 153673
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.9554917812347412,
      "learning_rate": 0.00014974501011175216,
      "loss": 2.9628,
      "step": 153674
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7378928661346436,
      "learning_rate": 0.00014974146960459404,
      "loss": 2.9078,
      "step": 153675
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.478860855102539,
      "learning_rate": 0.00014973792912537185,
      "loss": 2.8226,
      "step": 153676
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.368560552597046,
      "learning_rate": 0.00014973438867408605,
      "loss": 3.0737,
      "step": 153677
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.478846549987793,
      "learning_rate": 0.00014973084825073763,
      "loss": 2.8639,
      "step": 153678
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5758755207061768,
      "learning_rate": 0.00014972730785532695,
      "loss": 2.8195,
      "step": 153679
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.472837209701538,
      "learning_rate": 0.00014972376748785492,
      "loss": 2.7791,
      "step": 153680
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.1274337768554688,
      "learning_rate": 0.00014972022714832207,
      "loss": 2.7867,
      "step": 153681
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.081674814224243,
      "learning_rate": 0.00014971668683672905,
      "loss": 2.619,
      "step": 153682
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.691312074661255,
      "learning_rate": 0.00014971314655307645,
      "loss": 2.9334,
      "step": 153683
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.1449010372161865,
      "learning_rate": 0.00014970960629736514,
      "loss": 2.6146,
      "step": 153684
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1581666469573975,
      "learning_rate": 0.00014970606606959548,
      "loss": 2.871,
      "step": 153685
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.722499132156372,
      "learning_rate": 0.00014970252586976845,
      "loss": 2.9859,
      "step": 153686
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.8723957538604736,
      "learning_rate": 0.00014969898569788456,
      "loss": 2.9755,
      "step": 153687
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.5499887466430664,
      "learning_rate": 0.00014969544555394436,
      "loss": 2.5983,
      "step": 153688
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3266472816467285,
      "learning_rate": 0.0001496919054379487,
      "loss": 3.092,
      "step": 153689
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1249074935913086,
      "learning_rate": 0.00014968836534989818,
      "loss": 2.9721,
      "step": 153690
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.8927254676818848,
      "learning_rate": 0.0001496848252897933,
      "loss": 3.0417,
      "step": 153691
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.8406429290771484,
      "learning_rate": 0.00014968128525763493,
      "loss": 2.9276,
      "step": 153692
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1669836044311523,
      "learning_rate": 0.00014967774525342372,
      "loss": 3.2008,
      "step": 153693
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1217286586761475,
      "learning_rate": 0.00014967420527716007,
      "loss": 2.8706,
      "step": 153694
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.904309034347534,
      "learning_rate": 0.00014967066532884496,
      "loss": 2.7983,
      "step": 153695
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.1040682792663574,
      "learning_rate": 0.00014966712540847885,
      "loss": 3.0344,
      "step": 153696
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.1053671836853027,
      "learning_rate": 0.00014966358551606252,
      "loss": 3.045,
      "step": 153697
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.286818504333496,
      "learning_rate": 0.00014966004565159657,
      "loss": 3.0159,
      "step": 153698
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2735984325408936,
      "learning_rate": 0.00014965650581508165,
      "loss": 2.7451,
      "step": 153699
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.369945764541626,
      "learning_rate": 0.0001496529660065183,
      "loss": 2.8098,
      "step": 153700
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.8115060329437256,
      "learning_rate": 0.00014964942622590745,
      "loss": 2.6766,
      "step": 153701
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.283114433288574,
      "learning_rate": 0.0001496458864732495,
      "loss": 3.0917,
      "step": 153702
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.9825947284698486,
      "learning_rate": 0.0001496423467485453,
      "loss": 3.0016,
      "step": 153703
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.526858329772949,
      "learning_rate": 0.00014963880705179543,
      "loss": 2.9596,
      "step": 153704
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.9900016784667969,
      "learning_rate": 0.00014963526738300057,
      "loss": 2.8961,
      "step": 153705
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.9969402551651,
      "learning_rate": 0.00014963172774216124,
      "loss": 2.7276,
      "step": 153706
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3070411682128906,
      "learning_rate": 0.0001496281881292783,
      "loss": 2.982,
      "step": 153707
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.2376396656036377,
      "learning_rate": 0.0001496246485443522,
      "loss": 2.6667,
      "step": 153708
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2176971435546875,
      "learning_rate": 0.00014962110898738385,
      "loss": 3.064,
      "step": 153709
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.067741632461548,
      "learning_rate": 0.00014961756945837366,
      "loss": 2.9818,
      "step": 153710
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5934982299804688,
      "learning_rate": 0.0001496140299573226,
      "loss": 2.6955,
      "step": 153711
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4634475708007812,
      "learning_rate": 0.00014961049048423092,
      "loss": 2.9992,
      "step": 153712
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1579623222351074,
      "learning_rate": 0.00014960695103909962,
      "loss": 3.1558,
      "step": 153713
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.818467617034912,
      "learning_rate": 0.00014960341162192908,
      "loss": 3.0694,
      "step": 153714
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3127477169036865,
      "learning_rate": 0.00014959987223272028,
      "loss": 3.1449,
      "step": 153715
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2196872234344482,
      "learning_rate": 0.00014959633287147352,
      "loss": 2.943,
      "step": 153716
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.6362228393554688,
      "learning_rate": 0.0001495927935381899,
      "loss": 3.079,
      "step": 153717
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2953062057495117,
      "learning_rate": 0.00014958925423286959,
      "loss": 3.1161,
      "step": 153718
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5341763496398926,
      "learning_rate": 0.0001495857149555136,
      "loss": 3.0472,
      "step": 153719
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5467071533203125,
      "learning_rate": 0.00014958217570612233,
      "loss": 2.95,
      "step": 153720
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.508436918258667,
      "learning_rate": 0.0001495786364846967,
      "loss": 2.7782,
      "step": 153721
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2282822132110596,
      "learning_rate": 0.00014957509729123712,
      "loss": 2.837,
      "step": 153722
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.6971025466918945,
      "learning_rate": 0.0001495715581257446,
      "loss": 3.2266,
      "step": 153723
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.5647149085998535,
      "learning_rate": 0.00014956801898821935,
      "loss": 2.8049,
      "step": 153724
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.6147825717926025,
      "learning_rate": 0.00014956447987866234,
      "loss": 2.8793,
      "step": 153725
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2219722270965576,
      "learning_rate": 0.000149560940797074,
      "loss": 2.9487,
      "step": 153726
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4414947032928467,
      "learning_rate": 0.0001495574017434553,
      "loss": 2.8817,
      "step": 153727
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1493592262268066,
      "learning_rate": 0.00014955386271780656,
      "loss": 2.967,
      "step": 153728
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.707326650619507,
      "learning_rate": 0.0001495503237201288,
      "loss": 2.9214,
      "step": 153729
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.101078510284424,
      "learning_rate": 0.0001495467847504223,
      "loss": 2.9852,
      "step": 153730
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2151432037353516,
      "learning_rate": 0.000149543245808688,
      "loss": 2.9483,
      "step": 153731
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.3227717876434326,
      "learning_rate": 0.0001495397068949263,
      "loss": 2.914,
      "step": 153732
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.369269371032715,
      "learning_rate": 0.00014953616800913815,
      "loss": 3.0709,
      "step": 153733
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.486849308013916,
      "learning_rate": 0.00014953262915132397,
      "loss": 3.0642,
      "step": 153734
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3761940002441406,
      "learning_rate": 0.0001495290903214847,
      "loss": 3.1444,
      "step": 153735
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.9083404541015625,
      "learning_rate": 0.00014952555151962062,
      "loss": 3.0,
      "step": 153736
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2555768489837646,
      "learning_rate": 0.00014952201274573266,
      "loss": 3.0925,
      "step": 153737
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.4315268993377686,
      "learning_rate": 0.0001495184739998213,
      "loss": 2.9246,
      "step": 153738
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.513962745666504,
      "learning_rate": 0.0001495149352818874,
      "loss": 3.0043,
      "step": 153739
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1171085834503174,
      "learning_rate": 0.0001495113965919314,
      "loss": 3.2029,
      "step": 153740
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2868752479553223,
      "learning_rate": 0.00014950785792995424,
      "loss": 2.8203,
      "step": 153741
    },
    {
      "epoch": 2.0,
      "grad_norm": 7.185043811798096,
      "learning_rate": 0.00014950431929595633,
      "loss": 2.9311,
      "step": 153742
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.9542572498321533,
      "learning_rate": 0.00014950078068993848,
      "loss": 2.8938,
      "step": 153743
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.426187753677368,
      "learning_rate": 0.00014949724211190112,
      "loss": 2.7793,
      "step": 153744
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.421253204345703,
      "learning_rate": 0.00014949370356184518,
      "loss": 2.6733,
      "step": 153745
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.9948434829711914,
      "learning_rate": 0.00014949016503977108,
      "loss": 2.9817,
      "step": 153746
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.296086549758911,
      "learning_rate": 0.00014948662654567973,
      "loss": 2.9695,
      "step": 153747
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.737410068511963,
      "learning_rate": 0.00014948308807957166,
      "loss": 3.1952,
      "step": 153748
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.6306817531585693,
      "learning_rate": 0.00014947954964144753,
      "loss": 2.719,
      "step": 153749
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3005635738372803,
      "learning_rate": 0.00014947601123130788,
      "loss": 2.9446,
      "step": 153750
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.495969533920288,
      "learning_rate": 0.00014947247284915354,
      "loss": 3.0901,
      "step": 153751
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.342802047729492,
      "learning_rate": 0.00014946893449498507,
      "loss": 2.9239,
      "step": 153752
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.627105712890625,
      "learning_rate": 0.00014946539616880324,
      "loss": 2.945,
      "step": 153753
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3420443534851074,
      "learning_rate": 0.00014946185787060863,
      "loss": 3.0357,
      "step": 153754
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.224102020263672,
      "learning_rate": 0.00014945831960040193,
      "loss": 2.9639,
      "step": 153755
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.355656623840332,
      "learning_rate": 0.00014945478135818362,
      "loss": 2.8288,
      "step": 153756
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.062992572784424,
      "learning_rate": 0.00014945124314395467,
      "loss": 2.9488,
      "step": 153757
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.0924901962280273,
      "learning_rate": 0.00014944770495771542,
      "loss": 3.2796,
      "step": 153758
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.110884666442871,
      "learning_rate": 0.0001494441667994668,
      "loss": 2.8752,
      "step": 153759
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3685433864593506,
      "learning_rate": 0.00014944062866920936,
      "loss": 2.7485,
      "step": 153760
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.31966495513916,
      "learning_rate": 0.00014943709056694377,
      "loss": 3.0928,
      "step": 153761
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.479578733444214,
      "learning_rate": 0.00014943355249267054,
      "loss": 3.0367,
      "step": 153762
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.9187902212142944,
      "learning_rate": 0.00014943001444639057,
      "loss": 3.3063,
      "step": 153763
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.2218048572540283,
      "learning_rate": 0.0001494264764281043,
      "loss": 2.9885,
      "step": 153764
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.4870660305023193,
      "learning_rate": 0.00014942293843781256,
      "loss": 2.8446,
      "step": 153765
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.9214980602264404,
      "learning_rate": 0.00014941940047551595,
      "loss": 2.7942,
      "step": 153766
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.1562538146972656,
      "learning_rate": 0.00014941586254121515,
      "loss": 2.9256,
      "step": 153767
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2723169326782227,
      "learning_rate": 0.00014941232463491064,
      "loss": 2.7342,
      "step": 153768
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.127894401550293,
      "learning_rate": 0.00014940878675660338,
      "loss": 2.8819,
      "step": 153769
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4542922973632812,
      "learning_rate": 0.00014940524890629368,
      "loss": 2.9732,
      "step": 153770
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.208000421524048,
      "learning_rate": 0.00014940171108398256,
      "loss": 3.1265,
      "step": 153771
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.060297727584839,
      "learning_rate": 0.00014939817328967048,
      "loss": 2.8935,
      "step": 153772
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.286447763442993,
      "learning_rate": 0.00014939463552335802,
      "loss": 2.7313,
      "step": 153773
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.6066739559173584,
      "learning_rate": 0.00014939109778504606,
      "loss": 2.7445,
      "step": 153774
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.09496808052063,
      "learning_rate": 0.00014938756007473514,
      "loss": 3.0535,
      "step": 153775
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.176285982131958,
      "learning_rate": 0.00014938402239242577,
      "loss": 2.9337,
      "step": 153776
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.263533115386963,
      "learning_rate": 0.0001493804847381189,
      "loss": 2.8259,
      "step": 153777
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.403412342071533,
      "learning_rate": 0.000149376947111815,
      "loss": 2.9995,
      "step": 153778
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3601932525634766,
      "learning_rate": 0.00014937340951351471,
      "loss": 3.2337,
      "step": 153779
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2412123680114746,
      "learning_rate": 0.0001493698719432188,
      "loss": 2.8073,
      "step": 153780
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0979976654052734,
      "learning_rate": 0.00014936633440092793,
      "loss": 3.0173,
      "step": 153781
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.167095422744751,
      "learning_rate": 0.00014936279688664258,
      "loss": 3.1376,
      "step": 153782
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.791635274887085,
      "learning_rate": 0.0001493592594003636,
      "loss": 3.0899,
      "step": 153783
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7888004779815674,
      "learning_rate": 0.00014935572194209163,
      "loss": 3.0165,
      "step": 153784
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.11863374710083,
      "learning_rate": 0.00014935218451182712,
      "loss": 2.9856,
      "step": 153785
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3948581218719482,
      "learning_rate": 0.000149348647109571,
      "loss": 2.9481,
      "step": 153786
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2675070762634277,
      "learning_rate": 0.00014934510973532372,
      "loss": 3.1534,
      "step": 153787
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.299321174621582,
      "learning_rate": 0.00014934157238908613,
      "loss": 3.1081,
      "step": 153788
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3277854919433594,
      "learning_rate": 0.0001493380350708588,
      "loss": 3.0045,
      "step": 153789
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4847068786621094,
      "learning_rate": 0.00014933449778064235,
      "loss": 2.9704,
      "step": 153790
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1558444499969482,
      "learning_rate": 0.00014933096051843734,
      "loss": 3.1366,
      "step": 153791
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4932785034179688,
      "learning_rate": 0.0001493274232842447,
      "loss": 3.084,
      "step": 153792
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.1775827407836914,
      "learning_rate": 0.0001493238860780648,
      "loss": 3.1081,
      "step": 153793
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.6263651847839355,
      "learning_rate": 0.00014932034889989857,
      "loss": 3.004,
      "step": 153794
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2015717029571533,
      "learning_rate": 0.0001493168117497464,
      "loss": 3.0857,
      "step": 153795
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.597635507583618,
      "learning_rate": 0.00014931327462760927,
      "loss": 2.4546,
      "step": 153796
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.478739023208618,
      "learning_rate": 0.00014930973753348747,
      "loss": 2.9937,
      "step": 153797
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.5856034755706787,
      "learning_rate": 0.00014930620046738194,
      "loss": 2.8809,
      "step": 153798
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.009390115737915,
      "learning_rate": 0.0001493026634292931,
      "loss": 2.9761,
      "step": 153799
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.013859272003174,
      "learning_rate": 0.00014929912641922186,
      "loss": 3.0605,
      "step": 153800
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.7879626750946045,
      "learning_rate": 0.00014929558943716864,
      "loss": 3.1266,
      "step": 153801
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.0679733753204346,
      "learning_rate": 0.00014929205248313446,
      "loss": 2.9348,
      "step": 153802
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.734870672225952,
      "learning_rate": 0.00014928851555711948,
      "loss": 2.9364,
      "step": 153803
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.545635461807251,
      "learning_rate": 0.00014928497865912473,
      "loss": 3.0721,
      "step": 153804
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.6094682216644287,
      "learning_rate": 0.00014928144178915067,
      "loss": 3.0571,
      "step": 153805
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4465548992156982,
      "learning_rate": 0.00014927790494719808,
      "loss": 2.8934,
      "step": 153806
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0958404541015625,
      "learning_rate": 0.00014927436813326754,
      "loss": 2.9142,
      "step": 153807
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.335090160369873,
      "learning_rate": 0.00014927083134735978,
      "loss": 2.8458,
      "step": 153808
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.0340843200683594,
      "learning_rate": 0.00014926729458947547,
      "loss": 2.8342,
      "step": 153809
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3426361083984375,
      "learning_rate": 0.0001492637578596152,
      "loss": 2.8609,
      "step": 153810
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.027919054031372,
      "learning_rate": 0.0001492602211577795,
      "loss": 2.9408,
      "step": 153811
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.186075687408447,
      "learning_rate": 0.00014925668448396932,
      "loss": 2.944,
      "step": 153812
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.998349189758301,
      "learning_rate": 0.00014925314783818504,
      "loss": 2.9756,
      "step": 153813
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4359242916107178,
      "learning_rate": 0.00014924961122042758,
      "loss": 3.0615,
      "step": 153814
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.9363181591033936,
      "learning_rate": 0.00014924607463069744,
      "loss": 3.008,
      "step": 153815
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.485097885131836,
      "learning_rate": 0.0001492425380689953,
      "loss": 2.8602,
      "step": 153816
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.726306438446045,
      "learning_rate": 0.00014923900153532171,
      "loss": 3.0899,
      "step": 153817
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.4444005489349365,
      "learning_rate": 0.00014923546502967753,
      "loss": 3.0273,
      "step": 153818
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.599597454071045,
      "learning_rate": 0.00014923192855206322,
      "loss": 3.0754,
      "step": 153819
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.260205030441284,
      "learning_rate": 0.00014922839210247966,
      "loss": 2.8563,
      "step": 153820
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.5941648483276367,
      "learning_rate": 0.00014922485568092742,
      "loss": 3.4184,
      "step": 153821
    },
    {
      "epoch": 2.0,
      "grad_norm": 6.7341108322143555,
      "learning_rate": 0.00014922131928740706,
      "loss": 2.856,
      "step": 153822
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.973413944244385,
      "learning_rate": 0.00014921778292191924,
      "loss": 3.002,
      "step": 153823
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.969007968902588,
      "learning_rate": 0.00014921424658446479,
      "loss": 2.8036,
      "step": 153824
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.178903102874756,
      "learning_rate": 0.00014921071027504412,
      "loss": 2.9906,
      "step": 153825
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.442362070083618,
      "learning_rate": 0.00014920717399365813,
      "loss": 2.6773,
      "step": 153826
    },
    {
      "epoch": 2.0,
      "grad_norm": 5.044288158416748,
      "learning_rate": 0.00014920363774030744,
      "loss": 2.7104,
      "step": 153827
    },
    {
      "epoch": 2.0,
      "grad_norm": 5.9075822830200195,
      "learning_rate": 0.00014920010151499256,
      "loss": 2.7829,
      "step": 153828
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.244845151901245,
      "learning_rate": 0.00014919656531771415,
      "loss": 3.1171,
      "step": 153829
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.345684289932251,
      "learning_rate": 0.00014919302914847307,
      "loss": 3.1414,
      "step": 153830
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7620675563812256,
      "learning_rate": 0.0001491894930072697,
      "loss": 2.7401,
      "step": 153831
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.384916305541992,
      "learning_rate": 0.000149185956894105,
      "loss": 3.0089,
      "step": 153832
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3315651416778564,
      "learning_rate": 0.00014918242080897944,
      "loss": 2.8095,
      "step": 153833
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7291579246520996,
      "learning_rate": 0.0001491788847518938,
      "loss": 2.8577,
      "step": 153834
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.20527720451355,
      "learning_rate": 0.00014917534872284843,
      "loss": 2.9599,
      "step": 153835
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3766257762908936,
      "learning_rate": 0.0001491718127218444,
      "loss": 2.73,
      "step": 153836
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.740111827850342,
      "learning_rate": 0.00014916827674888204,
      "loss": 2.9592,
      "step": 153837
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3349130153656006,
      "learning_rate": 0.00014916474080396225,
      "loss": 2.8235,
      "step": 153838
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.86152982711792,
      "learning_rate": 0.00014916120488708557,
      "loss": 2.8815,
      "step": 153839
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.9389963150024414,
      "learning_rate": 0.00014915766899825268,
      "loss": 2.8824,
      "step": 153840
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.0833935737609863,
      "learning_rate": 0.00014915413313746414,
      "loss": 3.1504,
      "step": 153841
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4758307933807373,
      "learning_rate": 0.0001491505973047208,
      "loss": 3.1969,
      "step": 153842
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.317321538925171,
      "learning_rate": 0.00014914706150002306,
      "loss": 2.9642,
      "step": 153843
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.140235185623169,
      "learning_rate": 0.00014914352572337186,
      "loss": 2.9756,
      "step": 153844
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1907923221588135,
      "learning_rate": 0.00014913998997476773,
      "loss": 2.9235,
      "step": 153845
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2653331756591797,
      "learning_rate": 0.00014913645425421133,
      "loss": 2.8912,
      "step": 153846
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5920987129211426,
      "learning_rate": 0.00014913291856170322,
      "loss": 2.9873,
      "step": 153847
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0420889854431152,
      "learning_rate": 0.00014912938289724423,
      "loss": 2.9365,
      "step": 153848
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5739049911499023,
      "learning_rate": 0.00014912584726083485,
      "loss": 2.7197,
      "step": 153849
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.03652286529541,
      "learning_rate": 0.0001491223116524759,
      "loss": 3.0098,
      "step": 153850
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.26824688911438,
      "learning_rate": 0.00014911877607216796,
      "loss": 2.9781,
      "step": 153851
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4299964904785156,
      "learning_rate": 0.0001491152405199117,
      "loss": 2.836,
      "step": 153852
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.909791946411133,
      "learning_rate": 0.00014911170499570768,
      "loss": 2.9302,
      "step": 153853
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3530800342559814,
      "learning_rate": 0.00014910816949955673,
      "loss": 3.1781,
      "step": 153854
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.8577234745025635,
      "learning_rate": 0.00014910463403145928,
      "loss": 2.866,
      "step": 153855
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3775854110717773,
      "learning_rate": 0.0001491010985914163,
      "loss": 3.1521,
      "step": 153856
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2396657466888428,
      "learning_rate": 0.00014909756317942822,
      "loss": 2.8716,
      "step": 153857
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.641669511795044,
      "learning_rate": 0.00014909402779549565,
      "loss": 2.8213,
      "step": 153858
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.315084934234619,
      "learning_rate": 0.00014909049243961946,
      "loss": 2.6993,
      "step": 153859
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.13279390335083,
      "learning_rate": 0.00014908695711180023,
      "loss": 3.0302,
      "step": 153860
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.524170398712158,
      "learning_rate": 0.00014908342181203845,
      "loss": 3.1088,
      "step": 153861
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.9961687326431274,
      "learning_rate": 0.00014907988654033503,
      "loss": 2.8994,
      "step": 153862
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2488491535186768,
      "learning_rate": 0.0001490763512966905,
      "loss": 3.1846,
      "step": 153863
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.2958874702453613,
      "learning_rate": 0.0001490728160811054,
      "loss": 2.936,
      "step": 153864
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.6882150173187256,
      "learning_rate": 0.00014906928089358066,
      "loss": 3.104,
      "step": 153865
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.188711404800415,
      "learning_rate": 0.00014906574573411675,
      "loss": 2.7906,
      "step": 153866
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2464449405670166,
      "learning_rate": 0.00014906221060271427,
      "loss": 3.0362,
      "step": 153867
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.448359489440918,
      "learning_rate": 0.00014905867549937408,
      "loss": 3.0308,
      "step": 153868
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5358049869537354,
      "learning_rate": 0.0001490551404240966,
      "loss": 2.7638,
      "step": 153869
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1091203689575195,
      "learning_rate": 0.0001490516053768828,
      "loss": 3.0721,
      "step": 153870
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.130087375640869,
      "learning_rate": 0.00014904807035773308,
      "loss": 3.1478,
      "step": 153871
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.529285430908203,
      "learning_rate": 0.0001490445353666481,
      "loss": 3.0536,
      "step": 153872
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3038365840911865,
      "learning_rate": 0.0001490410004036287,
      "loss": 2.9373,
      "step": 153873
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.749077796936035,
      "learning_rate": 0.00014903746546867543,
      "loss": 2.534,
      "step": 153874
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.9184373617172241,
      "learning_rate": 0.00014903393056178883,
      "loss": 3.1687,
      "step": 153875
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2515645027160645,
      "learning_rate": 0.0001490303956829698,
      "loss": 3.0169,
      "step": 153876
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.9756929874420166,
      "learning_rate": 0.00014902686083221884,
      "loss": 2.9006,
      "step": 153877
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3462002277374268,
      "learning_rate": 0.0001490233260095365,
      "loss": 3.0801,
      "step": 153878
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.05663800239563,
      "learning_rate": 0.00014901979121492373,
      "loss": 3.0695,
      "step": 153879
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.291417360305786,
      "learning_rate": 0.00014901625644838092,
      "loss": 2.7904,
      "step": 153880
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2583017349243164,
      "learning_rate": 0.00014901272170990895,
      "loss": 2.7967,
      "step": 153881
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.6196999549865723,
      "learning_rate": 0.00014900918699950832,
      "loss": 2.9343,
      "step": 153882
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.503159761428833,
      "learning_rate": 0.0001490056523171798,
      "loss": 3.0221,
      "step": 153883
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1781845092773438,
      "learning_rate": 0.0001490021176629238,
      "loss": 2.7358,
      "step": 153884
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5272607803344727,
      "learning_rate": 0.00014899858303674135,
      "loss": 2.8119,
      "step": 153885
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.258401870727539,
      "learning_rate": 0.00014899504843863274,
      "loss": 2.8515,
      "step": 153886
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4953958988189697,
      "learning_rate": 0.00014899151386859891,
      "loss": 2.9293,
      "step": 153887
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.635892152786255,
      "learning_rate": 0.00014898797932664043,
      "loss": 2.9512,
      "step": 153888
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.1790878772735596,
      "learning_rate": 0.00014898444481275795,
      "loss": 2.885,
      "step": 153889
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3889825344085693,
      "learning_rate": 0.00014898091032695198,
      "loss": 2.9431,
      "step": 153890
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3787240982055664,
      "learning_rate": 0.00014897737586922342,
      "loss": 3.2567,
      "step": 153891
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.1006417274475098,
      "learning_rate": 0.0001489738414395727,
      "loss": 2.935,
      "step": 153892
    },
    {
      "epoch": 2.0,
      "grad_norm": 6.158875942230225,
      "learning_rate": 0.00014897030703800072,
      "loss": 3.0106,
      "step": 153893
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.231562852859497,
      "learning_rate": 0.00014896677266450797,
      "loss": 2.9666,
      "step": 153894
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1377954483032227,
      "learning_rate": 0.0001489632383190952,
      "loss": 2.9317,
      "step": 153895
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.047602891921997,
      "learning_rate": 0.00014895970400176289,
      "loss": 2.8567,
      "step": 153896
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.503955841064453,
      "learning_rate": 0.00014895616971251192,
      "loss": 3.0504,
      "step": 153897
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.8121728897094727,
      "learning_rate": 0.00014895263545134277,
      "loss": 3.238,
      "step": 153898
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3583741188049316,
      "learning_rate": 0.00014894910121825624,
      "loss": 3.1262,
      "step": 153899
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2885851860046387,
      "learning_rate": 0.00014894556701325296,
      "loss": 3.0869,
      "step": 153900
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.268765926361084,
      "learning_rate": 0.00014894203283633353,
      "loss": 2.8334,
      "step": 153901
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.599735975265503,
      "learning_rate": 0.0001489384986874985,
      "loss": 2.7454,
      "step": 153902
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3292815685272217,
      "learning_rate": 0.00014893496456674883,
      "loss": 3.216,
      "step": 153903
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.417759418487549,
      "learning_rate": 0.00014893143047408486,
      "loss": 3.0768,
      "step": 153904
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0651392936706543,
      "learning_rate": 0.00014892789640950747,
      "loss": 2.9687,
      "step": 153905
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.9397573471069336,
      "learning_rate": 0.00014892436237301727,
      "loss": 3.0795,
      "step": 153906
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3922433853149414,
      "learning_rate": 0.00014892082836461486,
      "loss": 3.0177,
      "step": 153907
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.160414934158325,
      "learning_rate": 0.00014891729438430083,
      "loss": 3.0507,
      "step": 153908
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.131882429122925,
      "learning_rate": 0.000148913760432076,
      "loss": 2.9749,
      "step": 153909
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.210604190826416,
      "learning_rate": 0.00014891022650794089,
      "loss": 2.7466,
      "step": 153910
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1129062175750732,
      "learning_rate": 0.00014890669261189633,
      "loss": 2.6343,
      "step": 153911
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0529234409332275,
      "learning_rate": 0.00014890315874394283,
      "loss": 2.8577,
      "step": 153912
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3225739002227783,
      "learning_rate": 0.0001488996249040811,
      "loss": 3.0986,
      "step": 153913
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.342111349105835,
      "learning_rate": 0.00014889609109231168,
      "loss": 3.2611,
      "step": 153914
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.3412439823150635,
      "learning_rate": 0.00014889255730863544,
      "loss": 2.9083,
      "step": 153915
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.238595485687256,
      "learning_rate": 0.00014888902355305277,
      "loss": 3.1419,
      "step": 153916
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.934644937515259,
      "learning_rate": 0.00014888548982556465,
      "loss": 2.9693,
      "step": 153917
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3902053833007812,
      "learning_rate": 0.00014888195612617155,
      "loss": 2.9646,
      "step": 153918
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0839121341705322,
      "learning_rate": 0.0001488784224548742,
      "loss": 2.9275,
      "step": 153919
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4395627975463867,
      "learning_rate": 0.000148874888811673,
      "loss": 2.9148,
      "step": 153920
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4888346195220947,
      "learning_rate": 0.00014887135519656898,
      "loss": 3.0571,
      "step": 153921
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.254458427429199,
      "learning_rate": 0.00014886782160956249,
      "loss": 3.0335,
      "step": 153922
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.8277955055236816,
      "learning_rate": 0.00014886428805065444,
      "loss": 3.0533,
      "step": 153923
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3724334239959717,
      "learning_rate": 0.00014886075451984537,
      "loss": 2.9609,
      "step": 153924
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7378427982330322,
      "learning_rate": 0.00014885722101713598,
      "loss": 2.9671,
      "step": 153925
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.335547924041748,
      "learning_rate": 0.00014885368754252674,
      "loss": 3.1465,
      "step": 153926
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.294981002807617,
      "learning_rate": 0.00014885015409601856,
      "loss": 3.1818,
      "step": 153927
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.1783158779144287,
      "learning_rate": 0.0001488466206776119,
      "loss": 2.9158,
      "step": 153928
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.440462112426758,
      "learning_rate": 0.00014884308728730757,
      "loss": 2.9511,
      "step": 153929
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.28629207611084,
      "learning_rate": 0.00014883955392510623,
      "loss": 3.0489,
      "step": 153930
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3448517322540283,
      "learning_rate": 0.00014883602059100843,
      "loss": 2.8432,
      "step": 153931
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1046431064605713,
      "learning_rate": 0.00014883248728501472,
      "loss": 2.8964,
      "step": 153932
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.9790666103363037,
      "learning_rate": 0.0001488289540071261,
      "loss": 2.8649,
      "step": 153933
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.532979726791382,
      "learning_rate": 0.00014882542075734284,
      "loss": 2.8961,
      "step": 153934
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2356748580932617,
      "learning_rate": 0.00014882188753566596,
      "loss": 2.985,
      "step": 153935
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.558410882949829,
      "learning_rate": 0.0001488183543420958,
      "loss": 2.8935,
      "step": 153936
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.9061992168426514,
      "learning_rate": 0.0001488148211766334,
      "loss": 2.744,
      "step": 153937
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.0201408863067627,
      "learning_rate": 0.00014881128803927895,
      "loss": 2.8612,
      "step": 153938
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.940661668777466,
      "learning_rate": 0.00014880775493003346,
      "loss": 2.9395,
      "step": 153939
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.351444721221924,
      "learning_rate": 0.00014880422184889732,
      "loss": 2.9361,
      "step": 153940
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.0365517139434814,
      "learning_rate": 0.00014880068879587145,
      "loss": 2.8236,
      "step": 153941
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.784797430038452,
      "learning_rate": 0.0001487971557709563,
      "loss": 2.8688,
      "step": 153942
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.312798261642456,
      "learning_rate": 0.00014879362277415273,
      "loss": 2.9259,
      "step": 153943
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.3107492923736572,
      "learning_rate": 0.00014879008980546123,
      "loss": 3.0049,
      "step": 153944
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3287646770477295,
      "learning_rate": 0.00014878655686488253,
      "loss": 2.7724,
      "step": 153945
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.626491069793701,
      "learning_rate": 0.00014878302395241716,
      "loss": 2.8631,
      "step": 153946
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.147716522216797,
      "learning_rate": 0.000148779491068066,
      "loss": 2.8523,
      "step": 153947
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.377695083618164,
      "learning_rate": 0.00014877595821182946,
      "loss": 2.9091,
      "step": 153948
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.31559419631958,
      "learning_rate": 0.00014877242538370843,
      "loss": 3.0918,
      "step": 153949
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2749152183532715,
      "learning_rate": 0.0001487688925837035,
      "loss": 2.9877,
      "step": 153950
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.9745501279830933,
      "learning_rate": 0.00014876535981181522,
      "loss": 2.9948,
      "step": 153951
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.169405937194824,
      "learning_rate": 0.00014876182706804425,
      "loss": 2.8372,
      "step": 153952
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.3164494037628174,
      "learning_rate": 0.0001487582943523914,
      "loss": 3.1021,
      "step": 153953
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.146493673324585,
      "learning_rate": 0.00014875476166485713,
      "loss": 2.8568,
      "step": 153954
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.108046054840088,
      "learning_rate": 0.00014875122900544235,
      "loss": 2.9334,
      "step": 153955
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.8122286796569824,
      "learning_rate": 0.00014874769637414755,
      "loss": 2.7031,
      "step": 153956
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.208028554916382,
      "learning_rate": 0.0001487441637709733,
      "loss": 2.8847,
      "step": 153957
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.7777504920959473,
      "learning_rate": 0.00014874063119592044,
      "loss": 2.872,
      "step": 153958
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.0372607707977295,
      "learning_rate": 0.0001487370986489896,
      "loss": 2.8726,
      "step": 153959
    },
    {
      "epoch": 2.0,
      "grad_norm": 1.945419192314148,
      "learning_rate": 0.00014873356613018124,
      "loss": 2.8794,
      "step": 153960
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.924185276031494,
      "learning_rate": 0.00014873003363949632,
      "loss": 2.9168,
      "step": 153961
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.722280263900757,
      "learning_rate": 0.00014872650117693533,
      "loss": 2.9929,
      "step": 153962
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4935696125030518,
      "learning_rate": 0.0001487229687424988,
      "loss": 2.8668,
      "step": 153963
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.800422430038452,
      "learning_rate": 0.00014871943633618768,
      "loss": 2.8777,
      "step": 153964
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2625906467437744,
      "learning_rate": 0.00014871590395800242,
      "loss": 2.8712,
      "step": 153965
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2167882919311523,
      "learning_rate": 0.00014871237160794367,
      "loss": 3.056,
      "step": 153966
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.052539587020874,
      "learning_rate": 0.00014870883928601222,
      "loss": 3.1111,
      "step": 153967
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.5517592430114746,
      "learning_rate": 0.00014870530699220865,
      "loss": 2.767,
      "step": 153968
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.716002941131592,
      "learning_rate": 0.00014870177472653352,
      "loss": 2.978,
      "step": 153969
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.424551010131836,
      "learning_rate": 0.00014869824248898772,
      "loss": 2.9175,
      "step": 153970
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.638408899307251,
      "learning_rate": 0.00014869471027957162,
      "loss": 3.055,
      "step": 153971
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.227149486541748,
      "learning_rate": 0.00014869117809828615,
      "loss": 2.9871,
      "step": 153972
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.317838191986084,
      "learning_rate": 0.00014868764594513185,
      "loss": 2.7969,
      "step": 153973
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4661941528320312,
      "learning_rate": 0.0001486841138201094,
      "loss": 3.0286,
      "step": 153974
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3009116649627686,
      "learning_rate": 0.00014868058172321928,
      "loss": 2.9263,
      "step": 153975
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.447375535964966,
      "learning_rate": 0.00014867704965446245,
      "loss": 2.7951,
      "step": 153976
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.913952350616455,
      "learning_rate": 0.0001486735176138393,
      "loss": 3.1069,
      "step": 153977
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.810657024383545,
      "learning_rate": 0.00014866998560135065,
      "loss": 2.933,
      "step": 153978
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.5053470134735107,
      "learning_rate": 0.00014866645361699714,
      "loss": 2.6095,
      "step": 153979
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.2084851264953613,
      "learning_rate": 0.00014866292166077941,
      "loss": 2.7644,
      "step": 153980
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1170554161071777,
      "learning_rate": 0.00014865938973269798,
      "loss": 2.9878,
      "step": 153981
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.1179943084716797,
      "learning_rate": 0.00014865585783275373,
      "loss": 2.8686,
      "step": 153982
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.2970192432403564,
      "learning_rate": 0.0001486523259609471,
      "loss": 3.1398,
      "step": 153983
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.4217476844787598,
      "learning_rate": 0.000148648794117279,
      "loss": 2.7599,
      "step": 153984
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.3536126613616943,
      "learning_rate": 0.00014864526230174988,
      "loss": 2.8589,
      "step": 153985
    },
    {
      "epoch": 2.0,
      "grad_norm": 2.684044122695923,
      "learning_rate": 0.00014864173051436054,
      "loss": 2.9562,
      "step": 153986
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.209555149078369,
      "learning_rate": 0.0001486381987551114,
      "loss": 3.0077,
      "step": 153987
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3093032836914062,
      "learning_rate": 0.0001486346670240034,
      "loss": 2.911,
      "step": 153988
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.907109498977661,
      "learning_rate": 0.00014863113532103696,
      "loss": 3.0491,
      "step": 153989
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.192885398864746,
      "learning_rate": 0.00014862760364621296,
      "loss": 3.0642,
      "step": 153990
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.510490655899048,
      "learning_rate": 0.00014862407199953197,
      "loss": 3.179,
      "step": 153991
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.045488357543945,
      "learning_rate": 0.0001486205403809946,
      "loss": 2.7413,
      "step": 153992
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.912759304046631,
      "learning_rate": 0.0001486170087906014,
      "loss": 2.9765,
      "step": 153993
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6386351585388184,
      "learning_rate": 0.0001486134772283533,
      "loss": 3.1142,
      "step": 153994
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.9571501016616821,
      "learning_rate": 0.00014860994569425068,
      "loss": 3.1052,
      "step": 153995
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.30244517326355,
      "learning_rate": 0.00014860641418829444,
      "loss": 3.0578,
      "step": 153996
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.997145175933838,
      "learning_rate": 0.00014860288271048512,
      "loss": 2.8772,
      "step": 153997
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.165447950363159,
      "learning_rate": 0.0001485993512608234,
      "loss": 3.1348,
      "step": 153998
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.032919406890869,
      "learning_rate": 0.0001485958198393098,
      "loss": 2.9605,
      "step": 153999
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.9892678260803223,
      "learning_rate": 0.0001485922884459452,
      "loss": 2.8763,
      "step": 154000
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6344802379608154,
      "learning_rate": 0.00014858875708073006,
      "loss": 3.2371,
      "step": 154001
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.328611135482788,
      "learning_rate": 0.00014858522574366523,
      "loss": 3.0134,
      "step": 154002
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.227964162826538,
      "learning_rate": 0.00014858169443475116,
      "loss": 2.9459,
      "step": 154003
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.476940393447876,
      "learning_rate": 0.0001485781631539888,
      "loss": 2.7738,
      "step": 154004
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3160600662231445,
      "learning_rate": 0.00014857463190137842,
      "loss": 3.044,
      "step": 154005
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2944657802581787,
      "learning_rate": 0.000148571100676921,
      "loss": 2.854,
      "step": 154006
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7786526679992676,
      "learning_rate": 0.0001485675694806169,
      "loss": 3.1635,
      "step": 154007
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.661386489868164,
      "learning_rate": 0.00014856403831246714,
      "loss": 2.6876,
      "step": 154008
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.234585762023926,
      "learning_rate": 0.00014856050717247204,
      "loss": 2.9142,
      "step": 154009
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.528017044067383,
      "learning_rate": 0.0001485569760606326,
      "loss": 2.797,
      "step": 154010
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4414355754852295,
      "learning_rate": 0.00014855344497694909,
      "loss": 3.0981,
      "step": 154011
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.07800555229187,
      "learning_rate": 0.00014854991392142244,
      "loss": 3.0602,
      "step": 154012
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8643579483032227,
      "learning_rate": 0.00014854638289405313,
      "loss": 3.0644,
      "step": 154013
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1275219917297363,
      "learning_rate": 0.00014854285189484198,
      "loss": 3.1101,
      "step": 154014
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4592878818511963,
      "learning_rate": 0.0001485393209237895,
      "loss": 3.2881,
      "step": 154015
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3228037357330322,
      "learning_rate": 0.0001485357899808966,
      "loss": 2.9333,
      "step": 154016
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.2421789169311523,
      "learning_rate": 0.0001485322590661636,
      "loss": 2.8501,
      "step": 154017
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.9428915977478027,
      "learning_rate": 0.00014852872817959137,
      "loss": 2.9749,
      "step": 154018
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2265353202819824,
      "learning_rate": 0.0001485251973211804,
      "loss": 3.07,
      "step": 154019
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4179248809814453,
      "learning_rate": 0.00014852166649093157,
      "loss": 3.015,
      "step": 154020
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.150547504425049,
      "learning_rate": 0.00014851813568884532,
      "loss": 3.0444,
      "step": 154021
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.803983211517334,
      "learning_rate": 0.00014851460491492263,
      "loss": 2.8842,
      "step": 154022
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.593923807144165,
      "learning_rate": 0.00014851107416916375,
      "loss": 2.7445,
      "step": 154023
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.0938198566436768,
      "learning_rate": 0.0001485075434515696,
      "loss": 3.0569,
      "step": 154024
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.0271358489990234,
      "learning_rate": 0.0001485040127621406,
      "loss": 2.938,
      "step": 154025
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.528024435043335,
      "learning_rate": 0.0001485004821008777,
      "loss": 3.1251,
      "step": 154026
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.325258731842041,
      "learning_rate": 0.00014849695146778132,
      "loss": 2.9261,
      "step": 154027
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8549792766571045,
      "learning_rate": 0.00014849342086285246,
      "loss": 2.8961,
      "step": 154028
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.15692400932312,
      "learning_rate": 0.0001484898902860913,
      "loss": 2.9752,
      "step": 154029
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3590269088745117,
      "learning_rate": 0.00014848635973749882,
      "loss": 2.9616,
      "step": 154030
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.742241144180298,
      "learning_rate": 0.00014848282921707548,
      "loss": 2.6856,
      "step": 154031
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.6134631633758545,
      "learning_rate": 0.00014847929872482217,
      "loss": 2.9631,
      "step": 154032
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.87825083732605,
      "learning_rate": 0.00014847576826073928,
      "loss": 2.6375,
      "step": 154033
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.721471071243286,
      "learning_rate": 0.00014847223782482773,
      "loss": 2.7798,
      "step": 154034
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.5579135417938232,
      "learning_rate": 0.00014846870741708809,
      "loss": 3.1806,
      "step": 154035
    },
    {
      "epoch": 2.01,
      "grad_norm": 5.255890369415283,
      "learning_rate": 0.00014846517703752092,
      "loss": 2.8581,
      "step": 154036
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.6729695796966553,
      "learning_rate": 0.00014846164668612684,
      "loss": 2.9753,
      "step": 154037
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.1486387252807617,
      "learning_rate": 0.00014845811636290672,
      "loss": 2.9762,
      "step": 154038
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.398698091506958,
      "learning_rate": 0.000148454586067861,
      "loss": 2.8271,
      "step": 154039
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.952632427215576,
      "learning_rate": 0.00014845105580099053,
      "loss": 3.1557,
      "step": 154040
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4080119132995605,
      "learning_rate": 0.00014844752556229584,
      "loss": 2.4063,
      "step": 154041
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.751054048538208,
      "learning_rate": 0.00014844399535177754,
      "loss": 2.9271,
      "step": 154042
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.889292001724243,
      "learning_rate": 0.00014844046516943645,
      "loss": 2.9268,
      "step": 154043
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.099696636199951,
      "learning_rate": 0.00014843693501527316,
      "loss": 3.1049,
      "step": 154044
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.0628392696380615,
      "learning_rate": 0.0001484334048892882,
      "loss": 2.9908,
      "step": 154045
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.251377820968628,
      "learning_rate": 0.00014842987479148244,
      "loss": 2.925,
      "step": 154046
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.7952234745025635,
      "learning_rate": 0.0001484263447218564,
      "loss": 3.1018,
      "step": 154047
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.055419921875,
      "learning_rate": 0.00014842281468041067,
      "loss": 2.8705,
      "step": 154048
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.470893144607544,
      "learning_rate": 0.00014841928466714611,
      "loss": 2.7347,
      "step": 154049
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.794160842895508,
      "learning_rate": 0.00014841575468206328,
      "loss": 2.9934,
      "step": 154050
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.555816411972046,
      "learning_rate": 0.0001484122247251627,
      "loss": 3.0457,
      "step": 154051
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.134390354156494,
      "learning_rate": 0.00014840869479644522,
      "loss": 3.2556,
      "step": 154052
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.414093494415283,
      "learning_rate": 0.0001484051648959115,
      "loss": 2.921,
      "step": 154053
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5926735401153564,
      "learning_rate": 0.00014840163502356194,
      "loss": 3.1589,
      "step": 154054
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.30224347114563,
      "learning_rate": 0.00014839810517939755,
      "loss": 2.8502,
      "step": 154055
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.0462734699249268,
      "learning_rate": 0.00014839457536341865,
      "loss": 2.9889,
      "step": 154056
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.4041686058044434,
      "learning_rate": 0.00014839104557562622,
      "loss": 2.9806,
      "step": 154057
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1784422397613525,
      "learning_rate": 0.00014838751581602072,
      "loss": 2.9603,
      "step": 154058
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.098473072052002,
      "learning_rate": 0.00014838398608460287,
      "loss": 2.899,
      "step": 154059
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.28796124458313,
      "learning_rate": 0.00014838045638137316,
      "loss": 2.8805,
      "step": 154060
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2486088275909424,
      "learning_rate": 0.0001483769267063325,
      "loss": 2.9418,
      "step": 154061
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3350327014923096,
      "learning_rate": 0.00014837339705948136,
      "loss": 2.9567,
      "step": 154062
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.9565236568450928,
      "learning_rate": 0.00014836986744082054,
      "loss": 3.0604,
      "step": 154063
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6199145317077637,
      "learning_rate": 0.0001483663378503506,
      "loss": 2.9233,
      "step": 154064
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7008562088012695,
      "learning_rate": 0.00014836280828807224,
      "loss": 3.0127,
      "step": 154065
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7603988647460938,
      "learning_rate": 0.00014835927875398602,
      "loss": 2.9365,
      "step": 154066
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.2554399967193604,
      "learning_rate": 0.00014835574924809277,
      "loss": 3.0041,
      "step": 154067
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.2261178493499756,
      "learning_rate": 0.00014835221977039294,
      "loss": 3.0616,
      "step": 154068
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.2733216285705566,
      "learning_rate": 0.00014834869032088736,
      "loss": 2.9884,
      "step": 154069
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.556102991104126,
      "learning_rate": 0.00014834516089957657,
      "loss": 2.9229,
      "step": 154070
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.1912484169006348,
      "learning_rate": 0.00014834163150646146,
      "loss": 2.8647,
      "step": 154071
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7395873069763184,
      "learning_rate": 0.00014833810214154232,
      "loss": 3.0212,
      "step": 154072
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.429504871368408,
      "learning_rate": 0.00014833457280482006,
      "loss": 3.2469,
      "step": 154073
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.101036787033081,
      "learning_rate": 0.0001483310434962952,
      "loss": 3.0281,
      "step": 154074
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.339972734451294,
      "learning_rate": 0.00014832751421596852,
      "loss": 2.9822,
      "step": 154075
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.511002779006958,
      "learning_rate": 0.0001483239849638405,
      "loss": 2.9742,
      "step": 154076
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.50211238861084,
      "learning_rate": 0.0001483204557399122,
      "loss": 3.1644,
      "step": 154077
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4277589321136475,
      "learning_rate": 0.00014831692654418372,
      "loss": 3.1348,
      "step": 154078
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.405068874359131,
      "learning_rate": 0.00014831339737665612,
      "loss": 3.0238,
      "step": 154079
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2330322265625,
      "learning_rate": 0.0001483098682373298,
      "loss": 2.8095,
      "step": 154080
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.373748779296875,
      "learning_rate": 0.00014830633912620567,
      "loss": 2.8905,
      "step": 154081
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3841359615325928,
      "learning_rate": 0.0001483028100432841,
      "loss": 3.0226,
      "step": 154082
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.2661986351013184,
      "learning_rate": 0.0001482992809885662,
      "loss": 3.1215,
      "step": 154083
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3084537982940674,
      "learning_rate": 0.00014829575196205204,
      "loss": 2.9088,
      "step": 154084
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6660890579223633,
      "learning_rate": 0.0001482922229637427,
      "loss": 2.8898,
      "step": 154085
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.306593894958496,
      "learning_rate": 0.00014828869399363854,
      "loss": 2.7518,
      "step": 154086
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.286317825317383,
      "learning_rate": 0.00014828516505174054,
      "loss": 2.8782,
      "step": 154087
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.437082290649414,
      "learning_rate": 0.00014828163613804908,
      "loss": 2.7757,
      "step": 154088
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7223613262176514,
      "learning_rate": 0.00014827810725256512,
      "loss": 3.0248,
      "step": 154089
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.874180316925049,
      "learning_rate": 0.00014827457839528894,
      "loss": 3.1581,
      "step": 154090
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.7201502323150635,
      "learning_rate": 0.00014827104956622144,
      "loss": 3.0105,
      "step": 154091
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.0881776809692383,
      "learning_rate": 0.00014826752076536315,
      "loss": 2.9242,
      "step": 154092
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8029251098632812,
      "learning_rate": 0.0001482639919927149,
      "loss": 2.915,
      "step": 154093
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2401294708251953,
      "learning_rate": 0.0001482604632482771,
      "loss": 3.1893,
      "step": 154094
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.431403636932373,
      "learning_rate": 0.0001482569345320508,
      "loss": 3.03,
      "step": 154095
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3958230018615723,
      "learning_rate": 0.00014825340584403613,
      "loss": 3.2201,
      "step": 154096
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.2233235836029053,
      "learning_rate": 0.0001482498771842342,
      "loss": 2.8636,
      "step": 154097
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.570218324661255,
      "learning_rate": 0.0001482463485526453,
      "loss": 3.0665,
      "step": 154098
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.159001350402832,
      "learning_rate": 0.0001482428199492704,
      "loss": 3.0226,
      "step": 154099
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.9953645467758179,
      "learning_rate": 0.0001482392913741099,
      "loss": 3.018,
      "step": 154100
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7763023376464844,
      "learning_rate": 0.00014823576282716486,
      "loss": 2.7165,
      "step": 154101
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5598225593566895,
      "learning_rate": 0.00014823223430843538,
      "loss": 2.8963,
      "step": 154102
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.023397207260132,
      "learning_rate": 0.00014822870581792253,
      "loss": 2.7545,
      "step": 154103
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1076462268829346,
      "learning_rate": 0.0001482251773556267,
      "loss": 3.0376,
      "step": 154104
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3261756896972656,
      "learning_rate": 0.00014822164892154882,
      "loss": 2.9944,
      "step": 154105
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.0426485538482666,
      "learning_rate": 0.00014821812051568923,
      "loss": 3.0967,
      "step": 154106
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.421419620513916,
      "learning_rate": 0.00014821459213804904,
      "loss": 2.8307,
      "step": 154107
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1520416736602783,
      "learning_rate": 0.00014821106378862836,
      "loss": 3.1889,
      "step": 154108
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.100203037261963,
      "learning_rate": 0.00014820753546742824,
      "loss": 2.8976,
      "step": 154109
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5510387420654297,
      "learning_rate": 0.00014820400717444908,
      "loss": 2.7964,
      "step": 154110
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.085977077484131,
      "learning_rate": 0.0001482004789096918,
      "loss": 2.8034,
      "step": 154111
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.0932252407073975,
      "learning_rate": 0.00014819695067315675,
      "loss": 3.1146,
      "step": 154112
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.157695770263672,
      "learning_rate": 0.000148193422464845,
      "loss": 3.116,
      "step": 154113
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3309836387634277,
      "learning_rate": 0.00014818989428475675,
      "loss": 2.9329,
      "step": 154114
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.612091541290283,
      "learning_rate": 0.00014818636613289299,
      "loss": 2.897,
      "step": 154115
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.349301815032959,
      "learning_rate": 0.0001481828380092541,
      "loss": 2.9157,
      "step": 154116
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.523568630218506,
      "learning_rate": 0.00014817930991384103,
      "loss": 3.0382,
      "step": 154117
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7185916900634766,
      "learning_rate": 0.00014817578184665413,
      "loss": 2.916,
      "step": 154118
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.974330425262451,
      "learning_rate": 0.0001481722538076944,
      "loss": 3.0733,
      "step": 154119
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.59515118598938,
      "learning_rate": 0.00014816872579696228,
      "loss": 2.9652,
      "step": 154120
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6424875259399414,
      "learning_rate": 0.00014816519781445846,
      "loss": 2.9331,
      "step": 154121
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.499277353286743,
      "learning_rate": 0.0001481616698601835,
      "loss": 3.0178,
      "step": 154122
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2513277530670166,
      "learning_rate": 0.00014815814193413823,
      "loss": 2.9135,
      "step": 154123
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.784059762954712,
      "learning_rate": 0.00014815461403632315,
      "loss": 2.897,
      "step": 154124
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.1362030506134033,
      "learning_rate": 0.00014815108616673907,
      "loss": 3.0125,
      "step": 154125
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.699927806854248,
      "learning_rate": 0.0001481475583253866,
      "loss": 3.1821,
      "step": 154126
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.0371460914611816,
      "learning_rate": 0.00014814403051226633,
      "loss": 2.9119,
      "step": 154127
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.653918743133545,
      "learning_rate": 0.00014814050272737885,
      "loss": 2.8642,
      "step": 154128
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.900388717651367,
      "learning_rate": 0.00014813697497072505,
      "loss": 3.1572,
      "step": 154129
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.0246429443359375,
      "learning_rate": 0.00014813344724230534,
      "loss": 2.8935,
      "step": 154130
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.7091927528381348,
      "learning_rate": 0.0001481299195421206,
      "loss": 2.9342,
      "step": 154131
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3778021335601807,
      "learning_rate": 0.00014812639187017135,
      "loss": 3.1199,
      "step": 154132
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.368605136871338,
      "learning_rate": 0.00014812286422645818,
      "loss": 2.9835,
      "step": 154133
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.5592899322509766,
      "learning_rate": 0.00014811933661098195,
      "loss": 3.0389,
      "step": 154134
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.553548812866211,
      "learning_rate": 0.0001481158090237432,
      "loss": 3.043,
      "step": 154135
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.142941474914551,
      "learning_rate": 0.00014811228146474246,
      "loss": 2.7216,
      "step": 154136
    },
    {
      "epoch": 2.01,
      "grad_norm": 5.314342021942139,
      "learning_rate": 0.00014810875393398066,
      "loss": 2.8453,
      "step": 154137
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6907286643981934,
      "learning_rate": 0.0001481052264314583,
      "loss": 3.1542,
      "step": 154138
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.9852921962738037,
      "learning_rate": 0.0001481016989571759,
      "loss": 3.2353,
      "step": 154139
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.942664384841919,
      "learning_rate": 0.00014809817151113441,
      "loss": 2.6007,
      "step": 154140
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1924984455108643,
      "learning_rate": 0.0001480946440933342,
      "loss": 2.9562,
      "step": 154141
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.3914988040924072,
      "learning_rate": 0.0001480911167037762,
      "loss": 2.7998,
      "step": 154142
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7689127922058105,
      "learning_rate": 0.0001480875893424609,
      "loss": 2.6835,
      "step": 154143
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.007174015045166,
      "learning_rate": 0.000148084062009389,
      "loss": 2.9428,
      "step": 154144
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.9313315153121948,
      "learning_rate": 0.00014808053470456102,
      "loss": 2.9281,
      "step": 154145
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1705617904663086,
      "learning_rate": 0.00014807700742797787,
      "loss": 2.8959,
      "step": 154146
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.147552728652954,
      "learning_rate": 0.00014807348017963992,
      "loss": 3.1247,
      "step": 154147
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.338104009628296,
      "learning_rate": 0.00014806995295954813,
      "loss": 2.9825,
      "step": 154148
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.9250104427337646,
      "learning_rate": 0.000148066425767703,
      "loss": 2.9675,
      "step": 154149
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.11875057220459,
      "learning_rate": 0.00014806289860410518,
      "loss": 2.8261,
      "step": 154150
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.220486879348755,
      "learning_rate": 0.00014805937146875522,
      "loss": 2.9095,
      "step": 154151
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2978758811950684,
      "learning_rate": 0.000148055844361654,
      "loss": 3.2126,
      "step": 154152
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.1683764457702637,
      "learning_rate": 0.00014805231728280197,
      "loss": 2.8571,
      "step": 154153
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7999985218048096,
      "learning_rate": 0.0001480487902322,
      "loss": 2.8992,
      "step": 154154
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3356666564941406,
      "learning_rate": 0.0001480452632098485,
      "loss": 2.9972,
      "step": 154155
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.32664155960083,
      "learning_rate": 0.00014804173621574848,
      "loss": 2.8222,
      "step": 154156
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3828203678131104,
      "learning_rate": 0.00014803820924990014,
      "loss": 2.9472,
      "step": 154157
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.645933151245117,
      "learning_rate": 0.0001480346823123045,
      "loss": 2.9545,
      "step": 154158
    },
    {
      "epoch": 2.01,
      "grad_norm": 7.558299541473389,
      "learning_rate": 0.00014803115540296195,
      "loss": 2.9767,
      "step": 154159
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.566661834716797,
      "learning_rate": 0.00014802762852187337,
      "loss": 2.925,
      "step": 154160
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.416001319885254,
      "learning_rate": 0.00014802410166903923,
      "loss": 2.9348,
      "step": 154161
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3610117435455322,
      "learning_rate": 0.0001480205748444605,
      "loss": 2.8362,
      "step": 154162
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.154502868652344,
      "learning_rate": 0.0001480170480481374,
      "loss": 3.1286,
      "step": 154163
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4466447830200195,
      "learning_rate": 0.0001480135212800709,
      "loss": 2.833,
      "step": 154164
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.271134853363037,
      "learning_rate": 0.00014800999454026145,
      "loss": 3.0942,
      "step": 154165
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.437924385070801,
      "learning_rate": 0.0001480064678287099,
      "loss": 2.8435,
      "step": 154166
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.8841912746429443,
      "learning_rate": 0.00014800294114541675,
      "loss": 3.0068,
      "step": 154167
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.174705743789673,
      "learning_rate": 0.0001479994144903829,
      "loss": 2.9229,
      "step": 154168
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1476383209228516,
      "learning_rate": 0.00014799588786360864,
      "loss": 2.7368,
      "step": 154169
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7363781929016113,
      "learning_rate": 0.00014799236126509494,
      "loss": 2.6873,
      "step": 154170
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.9325757026672363,
      "learning_rate": 0.00014798883469484218,
      "loss": 2.7739,
      "step": 154171
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4159393310546875,
      "learning_rate": 0.00014798530815285127,
      "loss": 2.9183,
      "step": 154172
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.364079236984253,
      "learning_rate": 0.00014798178163912267,
      "loss": 3.1214,
      "step": 154173
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.0450093746185303,
      "learning_rate": 0.00014797825515365734,
      "loss": 2.8595,
      "step": 154174
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.629364490509033,
      "learning_rate": 0.00014797472869645552,
      "loss": 3.1183,
      "step": 154175
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.9993934631347656,
      "learning_rate": 0.00014797120226751816,
      "loss": 2.8266,
      "step": 154176
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.285074234008789,
      "learning_rate": 0.0001479676758668457,
      "loss": 2.9196,
      "step": 154177
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.267212152481079,
      "learning_rate": 0.00014796414949443907,
      "loss": 3.0515,
      "step": 154178
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1652846336364746,
      "learning_rate": 0.0001479606231502986,
      "loss": 3.1581,
      "step": 154179
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.5109622478485107,
      "learning_rate": 0.0001479570968344254,
      "loss": 2.9789,
      "step": 154180
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.879966974258423,
      "learning_rate": 0.00014795357054681963,
      "loss": 2.9708,
      "step": 154181
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.612241268157959,
      "learning_rate": 0.00014795004428748223,
      "loss": 3.1069,
      "step": 154182
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.576809883117676,
      "learning_rate": 0.0001479465180564137,
      "loss": 2.8882,
      "step": 154183
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.122401475906372,
      "learning_rate": 0.00014794299185361487,
      "loss": 2.9251,
      "step": 154184
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3484675884246826,
      "learning_rate": 0.0001479394656790862,
      "loss": 2.9659,
      "step": 154185
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1849918365478516,
      "learning_rate": 0.0001479359395328287,
      "loss": 3.1173,
      "step": 154186
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.237497091293335,
      "learning_rate": 0.00014793241341484257,
      "loss": 2.8262,
      "step": 154187
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2744803428649902,
      "learning_rate": 0.00014792888732512874,
      "loss": 3.3802,
      "step": 154188
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.3119349479675293,
      "learning_rate": 0.00014792536126368772,
      "loss": 2.8609,
      "step": 154189
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6811490058898926,
      "learning_rate": 0.00014792183523052035,
      "loss": 3.2163,
      "step": 154190
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.4354844093322754,
      "learning_rate": 0.00014791830922562707,
      "loss": 3.0325,
      "step": 154191
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.221575975418091,
      "learning_rate": 0.00014791478324900886,
      "loss": 2.7578,
      "step": 154192
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.1759519577026367,
      "learning_rate": 0.00014791125730066596,
      "loss": 2.8275,
      "step": 154193
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.71305775642395,
      "learning_rate": 0.0001479077313805993,
      "loss": 2.7331,
      "step": 154194
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6261868476867676,
      "learning_rate": 0.00014790420548880943,
      "loss": 3.1938,
      "step": 154195
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.2892305850982666,
      "learning_rate": 0.00014790067962529706,
      "loss": 3.2852,
      "step": 154196
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.077449083328247,
      "learning_rate": 0.00014789715379006274,
      "loss": 2.781,
      "step": 154197
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.2542500495910645,
      "learning_rate": 0.00014789362798310748,
      "loss": 2.9424,
      "step": 154198
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.122293710708618,
      "learning_rate": 0.0001478901022044314,
      "loss": 2.8845,
      "step": 154199
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6202809810638428,
      "learning_rate": 0.00014788657645403556,
      "loss": 2.6486,
      "step": 154200
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.053632736206055,
      "learning_rate": 0.00014788305073192034,
      "loss": 2.9716,
      "step": 154201
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8245933055877686,
      "learning_rate": 0.00014787952503808667,
      "loss": 2.931,
      "step": 154202
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3738162517547607,
      "learning_rate": 0.00014787599937253492,
      "loss": 3.1658,
      "step": 154203
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.145907402038574,
      "learning_rate": 0.000147872473735266,
      "loss": 3.0572,
      "step": 154204
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.398178815841675,
      "learning_rate": 0.0001478689481262805,
      "loss": 3.0561,
      "step": 154205
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.132478713989258,
      "learning_rate": 0.00014786542254557902,
      "loss": 2.8645,
      "step": 154206
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.918165683746338,
      "learning_rate": 0.00014786189699316212,
      "loss": 2.8311,
      "step": 154207
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4683995246887207,
      "learning_rate": 0.00014785837146903067,
      "loss": 3.074,
      "step": 154208
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.274566650390625,
      "learning_rate": 0.0001478548459731851,
      "loss": 2.8301,
      "step": 154209
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.731078624725342,
      "learning_rate": 0.0001478513205056263,
      "loss": 2.9064,
      "step": 154210
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.728146553039551,
      "learning_rate": 0.00014784779506635482,
      "loss": 3.1854,
      "step": 154211
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.611372470855713,
      "learning_rate": 0.0001478442696553713,
      "loss": 2.7071,
      "step": 154212
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2160511016845703,
      "learning_rate": 0.0001478407442726763,
      "loss": 2.7861,
      "step": 154213
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.5552799701690674,
      "learning_rate": 0.00014783721891827072,
      "loss": 2.9639,
      "step": 154214
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.431000232696533,
      "learning_rate": 0.00014783369359215492,
      "loss": 2.9655,
      "step": 154215
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.491028308868408,
      "learning_rate": 0.00014783016829432983,
      "loss": 3.0142,
      "step": 154216
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3356573581695557,
      "learning_rate": 0.00014782664302479597,
      "loss": 3.0645,
      "step": 154217
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.3292219638824463,
      "learning_rate": 0.00014782311778355393,
      "loss": 2.8697,
      "step": 154218
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2429306507110596,
      "learning_rate": 0.00014781959257060453,
      "loss": 2.9369,
      "step": 154219
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.09981369972229,
      "learning_rate": 0.00014781606738594835,
      "loss": 3.1135,
      "step": 154220
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.509608745574951,
      "learning_rate": 0.0001478125422295859,
      "loss": 2.8606,
      "step": 154221
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.904714584350586,
      "learning_rate": 0.0001478090171015181,
      "loss": 2.6072,
      "step": 154222
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5146543979644775,
      "learning_rate": 0.0001478054920017455,
      "loss": 2.9284,
      "step": 154223
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3518764972686768,
      "learning_rate": 0.00014780196693026858,
      "loss": 3.0447,
      "step": 154224
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.867539644241333,
      "learning_rate": 0.0001477984418870883,
      "loss": 3.1787,
      "step": 154225
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7359397411346436,
      "learning_rate": 0.00014779491687220514,
      "loss": 2.9076,
      "step": 154226
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.269101858139038,
      "learning_rate": 0.0001477913918856197,
      "loss": 2.9264,
      "step": 154227
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2345664501190186,
      "learning_rate": 0.00014778786692733276,
      "loss": 2.8737,
      "step": 154228
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.275843858718872,
      "learning_rate": 0.00014778434199734497,
      "loss": 3.0718,
      "step": 154229
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2550864219665527,
      "learning_rate": 0.00014778081709565684,
      "loss": 3.0024,
      "step": 154230
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.356766939163208,
      "learning_rate": 0.00014777729222226923,
      "loss": 2.8854,
      "step": 154231
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.401754379272461,
      "learning_rate": 0.00014777376737718257,
      "loss": 2.9195,
      "step": 154232
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1708157062530518,
      "learning_rate": 0.00014777024256039777,
      "loss": 2.9014,
      "step": 154233
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.117396116256714,
      "learning_rate": 0.00014776671777191533,
      "loss": 2.8893,
      "step": 154234
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1775033473968506,
      "learning_rate": 0.00014776319301173594,
      "loss": 2.8841,
      "step": 154235
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4323570728302,
      "learning_rate": 0.00014775966827986018,
      "loss": 2.7962,
      "step": 154236
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.07755446434021,
      "learning_rate": 0.00014775614357628882,
      "loss": 2.793,
      "step": 154237
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.49753737449646,
      "learning_rate": 0.00014775261890102238,
      "loss": 3.0597,
      "step": 154238
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7692129611968994,
      "learning_rate": 0.00014774909425406172,
      "loss": 3.1225,
      "step": 154239
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4394450187683105,
      "learning_rate": 0.00014774556963540725,
      "loss": 3.0445,
      "step": 154240
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2393157482147217,
      "learning_rate": 0.00014774204504506,
      "loss": 3.0533,
      "step": 154241
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4861552715301514,
      "learning_rate": 0.00014773852048302014,
      "loss": 3.0604,
      "step": 154242
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2600483894348145,
      "learning_rate": 0.00014773499594928868,
      "loss": 2.8852,
      "step": 154243
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5941882133483887,
      "learning_rate": 0.00014773147144386603,
      "loss": 3.0214,
      "step": 154244
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.191394805908203,
      "learning_rate": 0.0001477279469667531,
      "loss": 3.1109,
      "step": 154245
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.9279894828796387,
      "learning_rate": 0.00014772442251795033,
      "loss": 3.0794,
      "step": 154246
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2573914527893066,
      "learning_rate": 0.00014772089809745863,
      "loss": 2.94,
      "step": 154247
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4246973991394043,
      "learning_rate": 0.00014771737370527832,
      "loss": 3.0306,
      "step": 154248
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.9569358825683594,
      "learning_rate": 0.00014771384934141033,
      "loss": 2.9057,
      "step": 154249
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8758833408355713,
      "learning_rate": 0.00014771032500585508,
      "loss": 3.0054,
      "step": 154250
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6377973556518555,
      "learning_rate": 0.00014770680069861345,
      "loss": 2.8695,
      "step": 154251
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.470548152923584,
      "learning_rate": 0.0001477032764196859,
      "loss": 2.8762,
      "step": 154252
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1975176334381104,
      "learning_rate": 0.00014769975216907341,
      "loss": 2.8424,
      "step": 154253
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2331995964050293,
      "learning_rate": 0.00014769622794677623,
      "loss": 2.7866,
      "step": 154254
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.403959274291992,
      "learning_rate": 0.00014769270375279526,
      "loss": 2.8551,
      "step": 154255
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7657227516174316,
      "learning_rate": 0.000147689179587131,
      "loss": 2.9175,
      "step": 154256
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.054327964782715,
      "learning_rate": 0.00014768565544978428,
      "loss": 2.9239,
      "step": 154257
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.720916748046875,
      "learning_rate": 0.0001476821313407556,
      "loss": 3.0289,
      "step": 154258
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.452570676803589,
      "learning_rate": 0.00014767860726004592,
      "loss": 2.9098,
      "step": 154259
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7727925777435303,
      "learning_rate": 0.00014767508320765536,
      "loss": 2.9216,
      "step": 154260
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1111538410186768,
      "learning_rate": 0.00014767155918358509,
      "loss": 3.0838,
      "step": 154261
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.3206071853637695,
      "learning_rate": 0.0001476680351878354,
      "loss": 2.9933,
      "step": 154262
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.131831169128418,
      "learning_rate": 0.00014766451122040717,
      "loss": 3.1861,
      "step": 154263
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5599613189697266,
      "learning_rate": 0.00014766098728130088,
      "loss": 2.8001,
      "step": 154264
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4130964279174805,
      "learning_rate": 0.00014765746337051756,
      "loss": 2.8643,
      "step": 154265
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.1679952144622803,
      "learning_rate": 0.00014765393948805734,
      "loss": 2.8562,
      "step": 154266
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.612006664276123,
      "learning_rate": 0.00014765041563392125,
      "loss": 2.922,
      "step": 154267
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.349792718887329,
      "learning_rate": 0.0001476468918081097,
      "loss": 2.9504,
      "step": 154268
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.8967211246490479,
      "learning_rate": 0.00014764336801062358,
      "loss": 2.9833,
      "step": 154269
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1584720611572266,
      "learning_rate": 0.00014763984424146333,
      "loss": 2.8485,
      "step": 154270
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6251213550567627,
      "learning_rate": 0.00014763632050062981,
      "loss": 2.8468,
      "step": 154271
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.964318037033081,
      "learning_rate": 0.0001476327967881236,
      "loss": 3.115,
      "step": 154272
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.322903633117676,
      "learning_rate": 0.0001476292731039453,
      "loss": 2.9803,
      "step": 154273
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.565276861190796,
      "learning_rate": 0.00014762574944809549,
      "loss": 2.8674,
      "step": 154274
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.732247829437256,
      "learning_rate": 0.00014762222582057508,
      "loss": 3.0204,
      "step": 154275
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7663934230804443,
      "learning_rate": 0.00014761870222138442,
      "loss": 3.1385,
      "step": 154276
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.858064889907837,
      "learning_rate": 0.00014761517865052442,
      "loss": 3.0331,
      "step": 154277
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.38193416595459,
      "learning_rate": 0.00014761165510799568,
      "loss": 2.6018,
      "step": 154278
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7660529613494873,
      "learning_rate": 0.00014760813159379876,
      "loss": 3.0875,
      "step": 154279
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.324936628341675,
      "learning_rate": 0.00014760460810793428,
      "loss": 2.9611,
      "step": 154280
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2359158992767334,
      "learning_rate": 0.0001476010846504031,
      "loss": 3.1222,
      "step": 154281
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.291600227355957,
      "learning_rate": 0.00014759756122120565,
      "loss": 2.851,
      "step": 154282
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.218303680419922,
      "learning_rate": 0.00014759403782034277,
      "loss": 2.8867,
      "step": 154283
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7633588314056396,
      "learning_rate": 0.00014759051444781504,
      "loss": 2.89,
      "step": 154284
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6643409729003906,
      "learning_rate": 0.00014758699110362314,
      "loss": 3.0618,
      "step": 154285
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.037790298461914,
      "learning_rate": 0.00014758346778776757,
      "loss": 3.0881,
      "step": 154286
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4655544757843018,
      "learning_rate": 0.0001475799445002492,
      "loss": 2.9109,
      "step": 154287
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.1384150981903076,
      "learning_rate": 0.0001475764212410685,
      "loss": 2.6705,
      "step": 154288
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.294771432876587,
      "learning_rate": 0.00014757289801022635,
      "loss": 3.0528,
      "step": 154289
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3372254371643066,
      "learning_rate": 0.00014756937480772325,
      "loss": 3.0635,
      "step": 154290
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.216262102127075,
      "learning_rate": 0.0001475658516335599,
      "loss": 3.1174,
      "step": 154291
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.107957363128662,
      "learning_rate": 0.00014756232848773678,
      "loss": 2.7897,
      "step": 154292
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1608753204345703,
      "learning_rate": 0.00014755880537025486,
      "loss": 2.9107,
      "step": 154293
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4743590354919434,
      "learning_rate": 0.0001475552822811145,
      "loss": 2.9938,
      "step": 154294
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.024325132369995,
      "learning_rate": 0.0001475517592203166,
      "loss": 2.7488,
      "step": 154295
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.561988592147827,
      "learning_rate": 0.00014754823618786175,
      "loss": 2.7214,
      "step": 154296
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2838141918182373,
      "learning_rate": 0.0001475447131837505,
      "loss": 2.9095,
      "step": 154297
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6026766300201416,
      "learning_rate": 0.0001475411902079835,
      "loss": 3.1348,
      "step": 154298
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4764838218688965,
      "learning_rate": 0.00014753766726056155,
      "loss": 2.9413,
      "step": 154299
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6095213890075684,
      "learning_rate": 0.0001475341443414851,
      "loss": 2.9208,
      "step": 154300
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5820226669311523,
      "learning_rate": 0.0001475306214507551,
      "loss": 3.134,
      "step": 154301
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.7929985523223877,
      "learning_rate": 0.000147527098588372,
      "loss": 2.8998,
      "step": 154302
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.207873582839966,
      "learning_rate": 0.0001475235757543364,
      "loss": 3.0404,
      "step": 154303
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4083950519561768,
      "learning_rate": 0.00014752005294864912,
      "loss": 2.9492,
      "step": 154304
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1588964462280273,
      "learning_rate": 0.00014751653017131079,
      "loss": 2.9594,
      "step": 154305
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.0249924659729,
      "learning_rate": 0.00014751300742232187,
      "loss": 2.8806,
      "step": 154306
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.198193073272705,
      "learning_rate": 0.0001475094847016833,
      "loss": 3.0181,
      "step": 154307
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.31119441986084,
      "learning_rate": 0.00014750596200939554,
      "loss": 2.9403,
      "step": 154308
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3416738510131836,
      "learning_rate": 0.00014750243934545922,
      "loss": 3.0802,
      "step": 154309
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.8437535762786865,
      "learning_rate": 0.0001474989167098752,
      "loss": 3.0163,
      "step": 154310
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.5202436447143555,
      "learning_rate": 0.00014749539410264398,
      "loss": 2.9273,
      "step": 154311
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4595015048980713,
      "learning_rate": 0.00014749187152376616,
      "loss": 2.9805,
      "step": 154312
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4869513511657715,
      "learning_rate": 0.0001474883489732426,
      "loss": 3.0809,
      "step": 154313
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.480470895767212,
      "learning_rate": 0.0001474848264510738,
      "loss": 3.0334,
      "step": 154314
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.9128639698028564,
      "learning_rate": 0.00014748130395726036,
      "loss": 3.1806,
      "step": 154315
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.485260486602783,
      "learning_rate": 0.00014747778149180313,
      "loss": 3.0426,
      "step": 154316
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.0450189113616943,
      "learning_rate": 0.00014747425905470255,
      "loss": 2.9665,
      "step": 154317
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.118307590484619,
      "learning_rate": 0.0001474707366459595,
      "loss": 2.8379,
      "step": 154318
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.563049554824829,
      "learning_rate": 0.00014746721426557452,
      "loss": 3.1052,
      "step": 154319
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.212387800216675,
      "learning_rate": 0.00014746369191354823,
      "loss": 3.0956,
      "step": 154320
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4944329261779785,
      "learning_rate": 0.00014746016958988128,
      "loss": 3.312,
      "step": 154321
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7102749347686768,
      "learning_rate": 0.0001474566472945744,
      "loss": 2.769,
      "step": 154322
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.325594902038574,
      "learning_rate": 0.00014745312502762814,
      "loss": 2.8886,
      "step": 154323
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6963272094726562,
      "learning_rate": 0.00014744960278904333,
      "loss": 3.0545,
      "step": 154324
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.4719340801239014,
      "learning_rate": 0.0001474460805788204,
      "loss": 2.8362,
      "step": 154325
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.4335334300994873,
      "learning_rate": 0.00014744255839696036,
      "loss": 2.7969,
      "step": 154326
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.0130887031555176,
      "learning_rate": 0.00014743903624346339,
      "loss": 2.9247,
      "step": 154327
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.0553340911865234,
      "learning_rate": 0.00014743551411833047,
      "loss": 2.8521,
      "step": 154328
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.651388645172119,
      "learning_rate": 0.0001474319920215621,
      "loss": 2.7401,
      "step": 154329
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.757641077041626,
      "learning_rate": 0.0001474284699531591,
      "loss": 2.8771,
      "step": 154330
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.5889365673065186,
      "learning_rate": 0.0001474249479131219,
      "loss": 2.958,
      "step": 154331
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.319554090499878,
      "learning_rate": 0.0001474214259014515,
      "loss": 2.9878,
      "step": 154332
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.912294626235962,
      "learning_rate": 0.00014741790391814815,
      "loss": 2.808,
      "step": 154333
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.221686840057373,
      "learning_rate": 0.0001474143819632128,
      "loss": 3.0392,
      "step": 154334
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.9608986377716064,
      "learning_rate": 0.00014741086003664588,
      "loss": 2.9931,
      "step": 154335
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3622841835021973,
      "learning_rate": 0.00014740733813844826,
      "loss": 2.9848,
      "step": 154336
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8863027095794678,
      "learning_rate": 0.0001474038162686204,
      "loss": 2.8056,
      "step": 154337
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1052000522613525,
      "learning_rate": 0.00014740029442716313,
      "loss": 2.9941,
      "step": 154338
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1568260192871094,
      "learning_rate": 0.00014739677261407707,
      "loss": 3.006,
      "step": 154339
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8701634407043457,
      "learning_rate": 0.00014739325082936282,
      "loss": 2.8017,
      "step": 154340
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5169126987457275,
      "learning_rate": 0.0001473897290730209,
      "loss": 2.9967,
      "step": 154341
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1491308212280273,
      "learning_rate": 0.00014738620734505223,
      "loss": 3.0225,
      "step": 154342
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1894679069519043,
      "learning_rate": 0.00014738268564545726,
      "loss": 3.0776,
      "step": 154343
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.984804630279541,
      "learning_rate": 0.00014737916397423683,
      "loss": 2.7379,
      "step": 154344
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.9834206104278564,
      "learning_rate": 0.0001473756423313915,
      "loss": 2.7697,
      "step": 154345
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.340742588043213,
      "learning_rate": 0.00014737212071692192,
      "loss": 3.005,
      "step": 154346
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.485222816467285,
      "learning_rate": 0.00014736859913082862,
      "loss": 3.1728,
      "step": 154347
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.692772150039673,
      "learning_rate": 0.00014736507757311248,
      "loss": 2.7583,
      "step": 154348
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.483130693435669,
      "learning_rate": 0.00014736155604377395,
      "loss": 2.8588,
      "step": 154349
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.598987579345703,
      "learning_rate": 0.00014735803454281392,
      "loss": 2.8887,
      "step": 154350
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1481497287750244,
      "learning_rate": 0.0001473545130702329,
      "loss": 2.9593,
      "step": 154351
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.230165481567383,
      "learning_rate": 0.00014735099162603154,
      "loss": 3.2036,
      "step": 154352
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.6971497535705566,
      "learning_rate": 0.00014734747021021043,
      "loss": 3.0346,
      "step": 154353
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3909080028533936,
      "learning_rate": 0.00014734394882277038,
      "loss": 3.0044,
      "step": 154354
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3813486099243164,
      "learning_rate": 0.0001473404274637119,
      "loss": 2.9245,
      "step": 154355
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.9894005060195923,
      "learning_rate": 0.00014733690613303582,
      "loss": 2.9843,
      "step": 154356
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.3166375160217285,
      "learning_rate": 0.00014733338483074266,
      "loss": 2.9628,
      "step": 154357
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.2143590450286865,
      "learning_rate": 0.00014732986355683314,
      "loss": 3.1426,
      "step": 154358
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5199801921844482,
      "learning_rate": 0.00014732634231130775,
      "loss": 3.2196,
      "step": 154359
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4491450786590576,
      "learning_rate": 0.00014732282109416743,
      "loss": 2.8851,
      "step": 154360
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.082047939300537,
      "learning_rate": 0.00014731929990541252,
      "loss": 2.956,
      "step": 154361
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1880974769592285,
      "learning_rate": 0.00014731577874504392,
      "loss": 2.7568,
      "step": 154362
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.101362943649292,
      "learning_rate": 0.00014731225761306229,
      "loss": 2.9702,
      "step": 154363
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.717142343521118,
      "learning_rate": 0.00014730873650946813,
      "loss": 3.0215,
      "step": 154364
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.473086357116699,
      "learning_rate": 0.00014730521543426204,
      "loss": 3.0034,
      "step": 154365
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5203826427459717,
      "learning_rate": 0.00014730169438744494,
      "loss": 2.9765,
      "step": 154366
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.957381248474121,
      "learning_rate": 0.0001472981733690172,
      "loss": 2.9968,
      "step": 154367
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.346179246902466,
      "learning_rate": 0.0001472946523789797,
      "loss": 2.5619,
      "step": 154368
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2932677268981934,
      "learning_rate": 0.00014729113141733307,
      "loss": 3.0561,
      "step": 154369
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.366391181945801,
      "learning_rate": 0.00014728761048407786,
      "loss": 3.0502,
      "step": 154370
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4335508346557617,
      "learning_rate": 0.00014728408957921465,
      "loss": 3.0414,
      "step": 154371
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.403850793838501,
      "learning_rate": 0.00014728056870274436,
      "loss": 3.231,
      "step": 154372
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.099858522415161,
      "learning_rate": 0.00014727704785466732,
      "loss": 2.9833,
      "step": 154373
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8373303413391113,
      "learning_rate": 0.00014727352703498454,
      "loss": 2.9049,
      "step": 154374
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.028514862060547,
      "learning_rate": 0.00014727000624369644,
      "loss": 2.8256,
      "step": 154375
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.0367798805236816,
      "learning_rate": 0.0001472664854808038,
      "loss": 3.0047,
      "step": 154376
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5472662448883057,
      "learning_rate": 0.000147262964746307,
      "loss": 3.1512,
      "step": 154377
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.675690650939941,
      "learning_rate": 0.00014725944404020706,
      "loss": 3.1453,
      "step": 154378
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.706421852111816,
      "learning_rate": 0.00014725592336250435,
      "loss": 3.0381,
      "step": 154379
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.471498966217041,
      "learning_rate": 0.0001472524027131998,
      "loss": 3.0612,
      "step": 154380
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3642237186431885,
      "learning_rate": 0.00014724888209229384,
      "loss": 3.0754,
      "step": 154381
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.899620532989502,
      "learning_rate": 0.00014724536149978728,
      "loss": 3.0031,
      "step": 154382
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.299347400665283,
      "learning_rate": 0.0001472418409356805,
      "loss": 3.0567,
      "step": 154383
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.563526153564453,
      "learning_rate": 0.00014723832039997448,
      "loss": 2.9855,
      "step": 154384
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.478876829147339,
      "learning_rate": 0.00014723479989266964,
      "loss": 2.8593,
      "step": 154385
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.0391228199005127,
      "learning_rate": 0.00014723127941376686,
      "loss": 3.2071,
      "step": 154386
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6345605850219727,
      "learning_rate": 0.00014722775896326663,
      "loss": 2.9325,
      "step": 154387
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.308476686477661,
      "learning_rate": 0.00014722423854116954,
      "loss": 2.8563,
      "step": 154388
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.0434045791625977,
      "learning_rate": 0.00014722071814747648,
      "loss": 2.9221,
      "step": 154389
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6726560592651367,
      "learning_rate": 0.00014721719778218798,
      "loss": 2.9648,
      "step": 154390
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2796270847320557,
      "learning_rate": 0.00014721367744530454,
      "loss": 2.884,
      "step": 154391
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.9006831645965576,
      "learning_rate": 0.00014721015713682712,
      "loss": 2.7851,
      "step": 154392
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.926527976989746,
      "learning_rate": 0.00014720663685675617,
      "loss": 2.9057,
      "step": 154393
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3353710174560547,
      "learning_rate": 0.0001472031166050923,
      "loss": 2.9766,
      "step": 154394
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.889873743057251,
      "learning_rate": 0.00014719959638183637,
      "loss": 2.7405,
      "step": 154395
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8126156330108643,
      "learning_rate": 0.00014719607618698892,
      "loss": 2.8883,
      "step": 154396
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.9089534282684326,
      "learning_rate": 0.00014719255602055047,
      "loss": 3.2396,
      "step": 154397
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3932907581329346,
      "learning_rate": 0.00014718903588252193,
      "loss": 3.0962,
      "step": 154398
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2650861740112305,
      "learning_rate": 0.00014718551577290386,
      "loss": 3.0347,
      "step": 154399
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.804102897644043,
      "learning_rate": 0.00014718199569169673,
      "loss": 3.0068,
      "step": 154400
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7055652141571045,
      "learning_rate": 0.0001471784756389015,
      "loss": 3.1777,
      "step": 154401
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6312568187713623,
      "learning_rate": 0.00014717495561451853,
      "loss": 2.9695,
      "step": 154402
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.5712029933929443,
      "learning_rate": 0.00014717143561854878,
      "loss": 2.8786,
      "step": 154403
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.9199349880218506,
      "learning_rate": 0.0001471679156509927,
      "loss": 3.1564,
      "step": 154404
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.715946674346924,
      "learning_rate": 0.0001471643957118509,
      "loss": 2.9951,
      "step": 154405
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.8365142345428467,
      "learning_rate": 0.00014716087580112424,
      "loss": 2.7539,
      "step": 154406
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.466017961502075,
      "learning_rate": 0.00014715735591881328,
      "loss": 2.967,
      "step": 154407
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.880944013595581,
      "learning_rate": 0.0001471538360649185,
      "loss": 2.9067,
      "step": 154408
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.2591207027435303,
      "learning_rate": 0.0001471503162394408,
      "loss": 2.8306,
      "step": 154409
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.284567356109619,
      "learning_rate": 0.00014714679644238079,
      "loss": 2.8089,
      "step": 154410
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.2764179706573486,
      "learning_rate": 0.00014714327667373893,
      "loss": 2.8239,
      "step": 154411
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.896465301513672,
      "learning_rate": 0.00014713975693351615,
      "loss": 2.8559,
      "step": 154412
    },
    {
      "epoch": 2.01,
      "grad_norm": 5.2754435539245605,
      "learning_rate": 0.00014713623722171297,
      "loss": 2.726,
      "step": 154413
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5173261165618896,
      "learning_rate": 0.00014713271753832993,
      "loss": 2.8788,
      "step": 154414
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4959633350372314,
      "learning_rate": 0.00014712919788336794,
      "loss": 3.05,
      "step": 154415
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.645268201828003,
      "learning_rate": 0.0001471256782568274,
      "loss": 2.9746,
      "step": 154416
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.513167381286621,
      "learning_rate": 0.00014712215865870918,
      "loss": 3.0293,
      "step": 154417
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.017030715942383,
      "learning_rate": 0.00014711863908901384,
      "loss": 2.9976,
      "step": 154418
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.601567506790161,
      "learning_rate": 0.00014711511954774206,
      "loss": 2.8152,
      "step": 154419
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.482548952102661,
      "learning_rate": 0.0001471116000348943,
      "loss": 3.1678,
      "step": 154420
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.914168357849121,
      "learning_rate": 0.00014710808055047153,
      "loss": 3.1871,
      "step": 154421
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.768444061279297,
      "learning_rate": 0.0001471045610944741,
      "loss": 3.1855,
      "step": 154422
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.252068519592285,
      "learning_rate": 0.000147101041666903,
      "loss": 3.1025,
      "step": 154423
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.2085931301116943,
      "learning_rate": 0.00014709752226775864,
      "loss": 2.7515,
      "step": 154424
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6317336559295654,
      "learning_rate": 0.00014709400289704178,
      "loss": 2.9844,
      "step": 154425
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.053551197052002,
      "learning_rate": 0.00014709048355475288,
      "loss": 2.7811,
      "step": 154426
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.323455333709717,
      "learning_rate": 0.00014708696424089288,
      "loss": 2.8994,
      "step": 154427
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.9007562398910522,
      "learning_rate": 0.00014708344495546218,
      "loss": 2.9791,
      "step": 154428
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2878873348236084,
      "learning_rate": 0.00014707992569846164,
      "loss": 2.7325,
      "step": 154429
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.423442840576172,
      "learning_rate": 0.0001470764064698919,
      "loss": 3.1031,
      "step": 154430
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.673100233078003,
      "learning_rate": 0.00014707288726975348,
      "loss": 2.9977,
      "step": 154431
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.716789960861206,
      "learning_rate": 0.00014706936809804702,
      "loss": 3.1155,
      "step": 154432
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1503453254699707,
      "learning_rate": 0.00014706584895477333,
      "loss": 2.8232,
      "step": 154433
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5898754596710205,
      "learning_rate": 0.0001470623298399329,
      "loss": 3.1165,
      "step": 154434
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.00748348236084,
      "learning_rate": 0.00014705881075352658,
      "loss": 2.8949,
      "step": 154435
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3948400020599365,
      "learning_rate": 0.00014705529169555488,
      "loss": 2.9163,
      "step": 154436
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.459941864013672,
      "learning_rate": 0.00014705177266601853,
      "loss": 2.8931,
      "step": 154437
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.303086519241333,
      "learning_rate": 0.00014704825366491803,
      "loss": 2.9093,
      "step": 154438
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1878502368927,
      "learning_rate": 0.00014704473469225423,
      "loss": 2.9576,
      "step": 154439
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4301412105560303,
      "learning_rate": 0.0001470412157480276,
      "loss": 2.945,
      "step": 154440
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.9969720840454102,
      "learning_rate": 0.00014703769683223903,
      "loss": 3.2069,
      "step": 154441
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.288804531097412,
      "learning_rate": 0.00014703417794488904,
      "loss": 3.0542,
      "step": 154442
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3913285732269287,
      "learning_rate": 0.00014703065908597824,
      "loss": 2.8809,
      "step": 154443
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.715554714202881,
      "learning_rate": 0.00014702714025550723,
      "loss": 2.9355,
      "step": 154444
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4712092876434326,
      "learning_rate": 0.00014702362145347687,
      "loss": 2.9788,
      "step": 154445
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.029675245285034,
      "learning_rate": 0.00014702010267988764,
      "loss": 3.0055,
      "step": 154446
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.2496426105499268,
      "learning_rate": 0.00014701658393474035,
      "loss": 3.1035,
      "step": 154447
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.207245111465454,
      "learning_rate": 0.00014701306521803554,
      "loss": 3.0122,
      "step": 154448
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.9700021743774414,
      "learning_rate": 0.0001470095465297739,
      "loss": 3.2044,
      "step": 154449
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.138598680496216,
      "learning_rate": 0.00014700602786995597,
      "loss": 3.2574,
      "step": 154450
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5584664344787598,
      "learning_rate": 0.00014700250923858264,
      "loss": 2.8763,
      "step": 154451
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.249459743499756,
      "learning_rate": 0.00014699899063565428,
      "loss": 3.3287,
      "step": 154452
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3455426692962646,
      "learning_rate": 0.00014699547206117183,
      "loss": 2.9785,
      "step": 154453
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.423581600189209,
      "learning_rate": 0.00014699195351513582,
      "loss": 3.0877,
      "step": 154454
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8370590209960938,
      "learning_rate": 0.00014698843499754685,
      "loss": 3.0596,
      "step": 154455
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4634318351745605,
      "learning_rate": 0.00014698491650840556,
      "loss": 2.7629,
      "step": 154456
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.235407829284668,
      "learning_rate": 0.00014698139804771276,
      "loss": 2.9461,
      "step": 154457
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.259821653366089,
      "learning_rate": 0.00014697787961546885,
      "loss": 2.9641,
      "step": 154458
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.207105875015259,
      "learning_rate": 0.0001469743612116748,
      "loss": 2.8004,
      "step": 154459
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.1828577518463135,
      "learning_rate": 0.0001469708428363311,
      "loss": 3.0872,
      "step": 154460
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.878390312194824,
      "learning_rate": 0.00014696732448943838,
      "loss": 2.9648,
      "step": 154461
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2823843955993652,
      "learning_rate": 0.00014696380617099724,
      "loss": 3.1236,
      "step": 154462
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8103573322296143,
      "learning_rate": 0.00014696028788100852,
      "loss": 2.9706,
      "step": 154463
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.814004898071289,
      "learning_rate": 0.00014695676961947265,
      "loss": 2.7028,
      "step": 154464
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1433732509613037,
      "learning_rate": 0.00014695325138639056,
      "loss": 2.7052,
      "step": 154465
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.344180107116699,
      "learning_rate": 0.00014694973318176268,
      "loss": 2.7855,
      "step": 154466
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.0492770671844482,
      "learning_rate": 0.0001469462150055898,
      "loss": 3.0837,
      "step": 154467
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.21537184715271,
      "learning_rate": 0.00014694269685787235,
      "loss": 2.9843,
      "step": 154468
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2146408557891846,
      "learning_rate": 0.00014693917873861127,
      "loss": 2.7427,
      "step": 154469
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4176957607269287,
      "learning_rate": 0.000146935660647807,
      "loss": 3.157,
      "step": 154470
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8586764335632324,
      "learning_rate": 0.00014693214258546035,
      "loss": 3.1915,
      "step": 154471
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4851608276367188,
      "learning_rate": 0.0001469286245515718,
      "loss": 2.98,
      "step": 154472
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.0723910331726074,
      "learning_rate": 0.00014692510654614233,
      "loss": 2.7704,
      "step": 154473
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.7947049140930176,
      "learning_rate": 0.00014692158856917216,
      "loss": 2.9791,
      "step": 154474
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.8644416332244873,
      "learning_rate": 0.00014691807062066226,
      "loss": 2.7706,
      "step": 154475
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.427983283996582,
      "learning_rate": 0.0001469145527006131,
      "loss": 2.9197,
      "step": 154476
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.822887420654297,
      "learning_rate": 0.00014691103480902548,
      "loss": 2.7179,
      "step": 154477
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7083218097686768,
      "learning_rate": 0.0001469075169458999,
      "loss": 3.0096,
      "step": 154478
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3518035411834717,
      "learning_rate": 0.00014690399911123722,
      "loss": 2.6983,
      "step": 154479
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8430943489074707,
      "learning_rate": 0.00014690048130503795,
      "loss": 2.9943,
      "step": 154480
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3838605880737305,
      "learning_rate": 0.00014689696352730282,
      "loss": 2.9453,
      "step": 154481
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6985080242156982,
      "learning_rate": 0.00014689344577803226,
      "loss": 2.9804,
      "step": 154482
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2203245162963867,
      "learning_rate": 0.00014688992805722727,
      "loss": 3.0419,
      "step": 154483
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.480515480041504,
      "learning_rate": 0.00014688641036488816,
      "loss": 2.8224,
      "step": 154484
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.248805522918701,
      "learning_rate": 0.00014688289270101593,
      "loss": 2.937,
      "step": 154485
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2752480506896973,
      "learning_rate": 0.00014687937506561102,
      "loss": 2.9588,
      "step": 154486
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.669232130050659,
      "learning_rate": 0.00014687585745867403,
      "loss": 2.985,
      "step": 154487
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7931599617004395,
      "learning_rate": 0.00014687233988020583,
      "loss": 2.7885,
      "step": 154488
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.0042383670806885,
      "learning_rate": 0.00014686882233020692,
      "loss": 3.0125,
      "step": 154489
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1534831523895264,
      "learning_rate": 0.0001468653048086779,
      "loss": 2.885,
      "step": 154490
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6974027156829834,
      "learning_rate": 0.0001468617873156196,
      "loss": 2.7811,
      "step": 154491
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.279644727706909,
      "learning_rate": 0.00014685826985103261,
      "loss": 3.1247,
      "step": 154492
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.4283645153045654,
      "learning_rate": 0.00014685475241491743,
      "loss": 2.9623,
      "step": 154493
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.0851547718048096,
      "learning_rate": 0.00014685123500727496,
      "loss": 2.9866,
      "step": 154494
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1445553302764893,
      "learning_rate": 0.00014684771762810573,
      "loss": 3.1854,
      "step": 154495
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.428046941757202,
      "learning_rate": 0.00014684420027741028,
      "loss": 2.9191,
      "step": 154496
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.9510575532913208,
      "learning_rate": 0.00014684068295518955,
      "loss": 3.015,
      "step": 154497
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.198244333267212,
      "learning_rate": 0.00014683716566144398,
      "loss": 2.8372,
      "step": 154498
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4167826175689697,
      "learning_rate": 0.00014683364839617415,
      "loss": 2.9675,
      "step": 154499
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.451653480529785,
      "learning_rate": 0.00014683013115938097,
      "loss": 2.8691,
      "step": 154500
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.542902708053589,
      "learning_rate": 0.00014682661395106484,
      "loss": 3.0808,
      "step": 154501
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.686495780944824,
      "learning_rate": 0.00014682309677122667,
      "loss": 2.8746,
      "step": 154502
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2184267044067383,
      "learning_rate": 0.00014681957961986693,
      "loss": 3.0416,
      "step": 154503
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5161874294281006,
      "learning_rate": 0.00014681606249698637,
      "loss": 2.8295,
      "step": 154504
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.059480667114258,
      "learning_rate": 0.00014681254540258544,
      "loss": 3.1516,
      "step": 154505
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2488670349121094,
      "learning_rate": 0.00014680902833666512,
      "loss": 2.8323,
      "step": 154506
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2704577445983887,
      "learning_rate": 0.00014680551129922574,
      "loss": 2.9716,
      "step": 154507
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2274959087371826,
      "learning_rate": 0.00014680199429026824,
      "loss": 2.8515,
      "step": 154508
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3118131160736084,
      "learning_rate": 0.00014679847730979313,
      "loss": 2.9368,
      "step": 154509
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.9442194700241089,
      "learning_rate": 0.00014679496035780105,
      "loss": 3.0584,
      "step": 154510
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.301119327545166,
      "learning_rate": 0.00014679144343429258,
      "loss": 3.0254,
      "step": 154511
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1632115840911865,
      "learning_rate": 0.0001467879265392686,
      "loss": 3.2161,
      "step": 154512
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.865330219268799,
      "learning_rate": 0.0001467844096727295,
      "loss": 3.0569,
      "step": 154513
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.446028709411621,
      "learning_rate": 0.0001467808928346762,
      "loss": 2.7605,
      "step": 154514
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5715601444244385,
      "learning_rate": 0.00014677737602510925,
      "loss": 2.6854,
      "step": 154515
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.326866388320923,
      "learning_rate": 0.00014677385924402925,
      "loss": 3.0077,
      "step": 154516
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.464829683303833,
      "learning_rate": 0.00014677034249143675,
      "loss": 2.8987,
      "step": 154517
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6093578338623047,
      "learning_rate": 0.00014676682576733268,
      "loss": 3.0625,
      "step": 154518
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3151283264160156,
      "learning_rate": 0.00014676330907171741,
      "loss": 2.8941,
      "step": 154519
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.0153493881225586,
      "learning_rate": 0.0001467597924045919,
      "loss": 2.8625,
      "step": 154520
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.22463059425354,
      "learning_rate": 0.0001467562757659566,
      "loss": 3.002,
      "step": 154521
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.61334490776062,
      "learning_rate": 0.00014675275915581218,
      "loss": 2.9499,
      "step": 154522
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.351783514022827,
      "learning_rate": 0.00014674924257415922,
      "loss": 2.7867,
      "step": 154523
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.646086692810059,
      "learning_rate": 0.0001467457260209986,
      "loss": 3.0658,
      "step": 154524
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5199522972106934,
      "learning_rate": 0.0001467422094963307,
      "loss": 3.0085,
      "step": 154525
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.16351056098938,
      "learning_rate": 0.00014673869300015646,
      "loss": 3.139,
      "step": 154526
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.188507556915283,
      "learning_rate": 0.00014673517653247635,
      "loss": 3.0755,
      "step": 154527
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.233691692352295,
      "learning_rate": 0.0001467316600932911,
      "loss": 2.9982,
      "step": 154528
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4839906692504883,
      "learning_rate": 0.0001467281436826012,
      "loss": 2.9462,
      "step": 154529
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3769168853759766,
      "learning_rate": 0.00014672462730040757,
      "loss": 3.1919,
      "step": 154530
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2274954319000244,
      "learning_rate": 0.00014672111094671058,
      "loss": 3.2105,
      "step": 154531
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.1833269596099854,
      "learning_rate": 0.0001467175946215111,
      "loss": 2.9274,
      "step": 154532
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.755244493484497,
      "learning_rate": 0.00014671407832480982,
      "loss": 2.6002,
      "step": 154533
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.853161573410034,
      "learning_rate": 0.0001467105620566072,
      "loss": 3.0285,
      "step": 154534
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.041057825088501,
      "learning_rate": 0.0001467070458169039,
      "loss": 2.9677,
      "step": 154535
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.498680591583252,
      "learning_rate": 0.00014670352960570077,
      "loss": 2.7437,
      "step": 154536
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.128068447113037,
      "learning_rate": 0.00014670001342299824,
      "loss": 2.957,
      "step": 154537
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.223430871963501,
      "learning_rate": 0.00014669649726879716,
      "loss": 3.0687,
      "step": 154538
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.215519666671753,
      "learning_rate": 0.000146692981143098,
      "loss": 2.9144,
      "step": 154539
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.346306800842285,
      "learning_rate": 0.00014668946504590172,
      "loss": 2.8233,
      "step": 154540
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4509308338165283,
      "learning_rate": 0.00014668594897720852,
      "loss": 2.8177,
      "step": 154541
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.667746067047119,
      "learning_rate": 0.00014668243293701946,
      "loss": 2.9689,
      "step": 154542
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.361149311065674,
      "learning_rate": 0.0001466789169253349,
      "loss": 2.7575,
      "step": 154543
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.644742250442505,
      "learning_rate": 0.00014667540094215572,
      "loss": 3.1384,
      "step": 154544
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1862215995788574,
      "learning_rate": 0.00014667188498748235,
      "loss": 2.7817,
      "step": 154545
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1247925758361816,
      "learning_rate": 0.00014666836906131585,
      "loss": 3.0892,
      "step": 154546
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.317370891571045,
      "learning_rate": 0.00014666485316365633,
      "loss": 3.006,
      "step": 154547
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.441594123840332,
      "learning_rate": 0.00014666133729450482,
      "loss": 2.7695,
      "step": 154548
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.1669719219207764,
      "learning_rate": 0.00014665782145386176,
      "loss": 3.0321,
      "step": 154549
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.072519063949585,
      "learning_rate": 0.00014665430564172801,
      "loss": 2.7607,
      "step": 154550
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.409396171569824,
      "learning_rate": 0.000146650789858104,
      "loss": 2.8817,
      "step": 154551
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.227956533432007,
      "learning_rate": 0.0001466472741029908,
      "loss": 2.911,
      "step": 154552
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.9076991081237793,
      "learning_rate": 0.00014664375837638842,
      "loss": 2.68,
      "step": 154553
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5776171684265137,
      "learning_rate": 0.00014664024267829805,
      "loss": 2.7577,
      "step": 154554
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3731300830841064,
      "learning_rate": 0.00014663672700872003,
      "loss": 3.0793,
      "step": 154555
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2020907402038574,
      "learning_rate": 0.00014663321136765525,
      "loss": 3.0537,
      "step": 154556
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.856445074081421,
      "learning_rate": 0.00014662969575510412,
      "loss": 2.6961,
      "step": 154557
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.844346761703491,
      "learning_rate": 0.00014662618017106767,
      "loss": 2.9473,
      "step": 154558
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.0705840587615967,
      "learning_rate": 0.00014662266461554606,
      "loss": 2.9649,
      "step": 154559
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3995649814605713,
      "learning_rate": 0.00014661914908854033,
      "loss": 2.6289,
      "step": 154560
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.517486572265625,
      "learning_rate": 0.00014661563359005087,
      "loss": 3.254,
      "step": 154561
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5623233318328857,
      "learning_rate": 0.0001466121181200786,
      "loss": 2.8632,
      "step": 154562
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2663612365722656,
      "learning_rate": 0.0001466086026786239,
      "loss": 2.9658,
      "step": 154563
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1487414836883545,
      "learning_rate": 0.00014660508726568763,
      "loss": 3.0005,
      "step": 154564
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4726407527923584,
      "learning_rate": 0.00014660157188127045,
      "loss": 2.8114,
      "step": 154565
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6512677669525146,
      "learning_rate": 0.00014659805652537288,
      "loss": 2.9402,
      "step": 154566
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.60756778717041,
      "learning_rate": 0.00014659454119799552,
      "loss": 2.9114,
      "step": 154567
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.451378107070923,
      "learning_rate": 0.00014659102589913923,
      "loss": 3.1146,
      "step": 154568
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.93881893157959,
      "learning_rate": 0.00014658751062880448,
      "loss": 3.0159,
      "step": 154569
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.140028476715088,
      "learning_rate": 0.00014658399538699209,
      "loss": 3.0811,
      "step": 154570
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7282986640930176,
      "learning_rate": 0.00014658048017370266,
      "loss": 2.9212,
      "step": 154571
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6430461406707764,
      "learning_rate": 0.0001465769649889368,
      "loss": 2.9788,
      "step": 154572
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.0720179080963135,
      "learning_rate": 0.00014657344983269507,
      "loss": 3.0716,
      "step": 154573
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.824697971343994,
      "learning_rate": 0.00014656993470497833,
      "loss": 2.5264,
      "step": 154574
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.385101795196533,
      "learning_rate": 0.00014656641960578707,
      "loss": 3.0548,
      "step": 154575
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2872776985168457,
      "learning_rate": 0.00014656290453512205,
      "loss": 2.9545,
      "step": 154576
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.039536476135254,
      "learning_rate": 0.00014655938949298392,
      "loss": 3.0457,
      "step": 154577
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2653164863586426,
      "learning_rate": 0.00014655587447937317,
      "loss": 2.7444,
      "step": 154578
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.437061309814453,
      "learning_rate": 0.00014655235949429072,
      "loss": 2.85,
      "step": 154579
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.292738437652588,
      "learning_rate": 0.00014654884453773706,
      "loss": 2.9042,
      "step": 154580
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.601534366607666,
      "learning_rate": 0.0001465453296097128,
      "loss": 3.0694,
      "step": 154581
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3245012760162354,
      "learning_rate": 0.00014654181471021873,
      "loss": 3.1147,
      "step": 154582
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4943337440490723,
      "learning_rate": 0.0001465382998392554,
      "loss": 2.9215,
      "step": 154583
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5128743648529053,
      "learning_rate": 0.00014653478499682343,
      "loss": 3.1174,
      "step": 154584
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4208884239196777,
      "learning_rate": 0.00014653127018292368,
      "loss": 2.866,
      "step": 154585
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.211338043212891,
      "learning_rate": 0.0001465277553975565,
      "loss": 3.1429,
      "step": 154586
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4230616092681885,
      "learning_rate": 0.00014652424064072285,
      "loss": 3.3109,
      "step": 154587
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3156352043151855,
      "learning_rate": 0.00014652072591242322,
      "loss": 2.7278,
      "step": 154588
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.973160743713379,
      "learning_rate": 0.0001465172112126583,
      "loss": 3.0764,
      "step": 154589
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6774299144744873,
      "learning_rate": 0.0001465136965414286,
      "loss": 2.9461,
      "step": 154590
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.3608837127685547,
      "learning_rate": 0.000146510181898735,
      "loss": 2.6992,
      "step": 154591
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7280027866363525,
      "learning_rate": 0.00014650666728457799,
      "loss": 3.1826,
      "step": 154592
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6833646297454834,
      "learning_rate": 0.00014650315269895833,
      "loss": 3.0491,
      "step": 154593
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8087732791900635,
      "learning_rate": 0.0001464996381418767,
      "loss": 2.743,
      "step": 154594
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2622058391571045,
      "learning_rate": 0.00014649612361333365,
      "loss": 2.7165,
      "step": 154595
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3006558418273926,
      "learning_rate": 0.00014649260911332975,
      "loss": 2.8736,
      "step": 154596
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.0083301067352295,
      "learning_rate": 0.0001464890946418659,
      "loss": 2.9525,
      "step": 154597
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4240434169769287,
      "learning_rate": 0.00014648558019894254,
      "loss": 2.872,
      "step": 154598
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2806336879730225,
      "learning_rate": 0.0001464820657845605,
      "loss": 3.0506,
      "step": 154599
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.32352876663208,
      "learning_rate": 0.00014647855139872032,
      "loss": 3.1048,
      "step": 154600
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4221251010894775,
      "learning_rate": 0.00014647503704142266,
      "loss": 2.9451,
      "step": 154601
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.028113603591919,
      "learning_rate": 0.00014647152271266814,
      "loss": 2.8296,
      "step": 154602
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7187469005584717,
      "learning_rate": 0.00014646800841245752,
      "loss": 2.8864,
      "step": 154603
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2936744689941406,
      "learning_rate": 0.0001464644941407913,
      "loss": 2.9014,
      "step": 154604
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.4519519805908203,
      "learning_rate": 0.00014646097989767036,
      "loss": 2.8495,
      "step": 154605
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4037561416625977,
      "learning_rate": 0.00014645746568309507,
      "loss": 3.0373,
      "step": 154606
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.192805051803589,
      "learning_rate": 0.0001464539514970665,
      "loss": 2.9952,
      "step": 154607
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.422377824783325,
      "learning_rate": 0.0001464504373395848,
      "loss": 2.9301,
      "step": 154608
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.9506428241729736,
      "learning_rate": 0.00014644692321065097,
      "loss": 2.8944,
      "step": 154609
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.493472099304199,
      "learning_rate": 0.00014644340911026542,
      "loss": 2.9086,
      "step": 154610
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.1128785610198975,
      "learning_rate": 0.0001464398950384291,
      "loss": 2.9241,
      "step": 154611
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.115110397338867,
      "learning_rate": 0.00014643638099514234,
      "loss": 2.9143,
      "step": 154612
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.358703374862671,
      "learning_rate": 0.00014643286698040622,
      "loss": 2.8065,
      "step": 154613
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.154085874557495,
      "learning_rate": 0.0001464293529942209,
      "loss": 3.0727,
      "step": 154614
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6044654846191406,
      "learning_rate": 0.00014642583903658735,
      "loss": 3.1026,
      "step": 154615
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3232533931732178,
      "learning_rate": 0.00014642232510750605,
      "loss": 2.7988,
      "step": 154616
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4160711765289307,
      "learning_rate": 0.00014641881120697786,
      "loss": 2.7934,
      "step": 154617
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.0867109298706055,
      "learning_rate": 0.00014641529733500316,
      "loss": 3.1571,
      "step": 154618
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7658896446228027,
      "learning_rate": 0.000146411783491583,
      "loss": 2.7395,
      "step": 154619
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.6955344676971436,
      "learning_rate": 0.00014640826967671756,
      "loss": 2.8284,
      "step": 154620
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4510648250579834,
      "learning_rate": 0.00014640475589040785,
      "loss": 2.7394,
      "step": 154621
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4060919284820557,
      "learning_rate": 0.00014640124213265428,
      "loss": 2.8029,
      "step": 154622
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.0861740112304688,
      "learning_rate": 0.0001463977284034577,
      "loss": 2.7875,
      "step": 154623
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.106806516647339,
      "learning_rate": 0.00014639421470281862,
      "loss": 3.0524,
      "step": 154624
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.095012664794922,
      "learning_rate": 0.00014639070103073795,
      "loss": 2.9129,
      "step": 154625
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.204866886138916,
      "learning_rate": 0.0001463871873872159,
      "loss": 2.8719,
      "step": 154626
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.885371685028076,
      "learning_rate": 0.00014638367377225352,
      "loss": 2.8234,
      "step": 154627
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.2510390281677246,
      "learning_rate": 0.00014638016018585117,
      "loss": 2.8619,
      "step": 154628
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.4422497749328613,
      "learning_rate": 0.0001463766466280098,
      "loss": 3.041,
      "step": 154629
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.1189913749694824,
      "learning_rate": 0.00014637313309872978,
      "loss": 3.1243,
      "step": 154630
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2253119945526123,
      "learning_rate": 0.00014636961959801212,
      "loss": 2.7874,
      "step": 154631
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.742152690887451,
      "learning_rate": 0.00014636610612585703,
      "loss": 2.9436,
      "step": 154632
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.803405523300171,
      "learning_rate": 0.00014636259268226547,
      "loss": 3.1064,
      "step": 154633
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.7468276023864746,
      "learning_rate": 0.0001463590792672379,
      "loss": 2.9612,
      "step": 154634
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7918262481689453,
      "learning_rate": 0.00014635556588077519,
      "loss": 2.9996,
      "step": 154635
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4559056758880615,
      "learning_rate": 0.0001463520525228778,
      "loss": 2.6995,
      "step": 154636
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4781222343444824,
      "learning_rate": 0.00014634853919354667,
      "loss": 2.9853,
      "step": 154637
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.9994285106658936,
      "learning_rate": 0.00014634502589278201,
      "loss": 2.7565,
      "step": 154638
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2383058071136475,
      "learning_rate": 0.00014634151262058482,
      "loss": 3.0061,
      "step": 154639
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1302244663238525,
      "learning_rate": 0.00014633799937695553,
      "loss": 2.9518,
      "step": 154640
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4813621044158936,
      "learning_rate": 0.00014633448616189503,
      "loss": 3.0651,
      "step": 154641
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.1776232719421387,
      "learning_rate": 0.00014633097297540374,
      "loss": 2.8762,
      "step": 154642
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1593971252441406,
      "learning_rate": 0.00014632745981748262,
      "loss": 2.9204,
      "step": 154643
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2608633041381836,
      "learning_rate": 0.00014632394668813193,
      "loss": 3.1558,
      "step": 154644
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.945845365524292,
      "learning_rate": 0.00014632043358735265,
      "loss": 2.6318,
      "step": 154645
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.20133638381958,
      "learning_rate": 0.00014631692051514513,
      "loss": 2.8807,
      "step": 154646
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4551007747650146,
      "learning_rate": 0.00014631340747151038,
      "loss": 2.8354,
      "step": 154647
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2570955753326416,
      "learning_rate": 0.0001463098944564487,
      "loss": 2.9649,
      "step": 154648
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4951012134552,
      "learning_rate": 0.00014630638146996105,
      "loss": 3.053,
      "step": 154649
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.0417706966400146,
      "learning_rate": 0.00014630286851204796,
      "loss": 3.049,
      "step": 154650
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8333868980407715,
      "learning_rate": 0.00014629935558271002,
      "loss": 3.1144,
      "step": 154651
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4671828746795654,
      "learning_rate": 0.00014629584268194786,
      "loss": 3.0267,
      "step": 154652
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2530946731567383,
      "learning_rate": 0.00014629232980976228,
      "loss": 3.1278,
      "step": 154653
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5767738819122314,
      "learning_rate": 0.00014628881696615378,
      "loss": 3.0453,
      "step": 154654
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3730266094207764,
      "learning_rate": 0.0001462853041511232,
      "loss": 2.8857,
      "step": 154655
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8422744274139404,
      "learning_rate": 0.00014628179136467106,
      "loss": 2.8545,
      "step": 154656
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7845473289489746,
      "learning_rate": 0.00014627827860679804,
      "loss": 2.8951,
      "step": 154657
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.381560802459717,
      "learning_rate": 0.0001462747658775047,
      "loss": 3.0885,
      "step": 154658
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.9739022254943848,
      "learning_rate": 0.00014627125317679188,
      "loss": 2.9353,
      "step": 154659
    },
    {
      "epoch": 2.01,
      "grad_norm": 5.596831321716309,
      "learning_rate": 0.00014626774050466003,
      "loss": 3.0815,
      "step": 154660
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4314141273498535,
      "learning_rate": 0.00014626422786111,
      "loss": 2.9958,
      "step": 154661
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2299435138702393,
      "learning_rate": 0.00014626071524614237,
      "loss": 2.9482,
      "step": 154662
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3717353343963623,
      "learning_rate": 0.0001462572026597577,
      "loss": 2.739,
      "step": 154663
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.7484140396118164,
      "learning_rate": 0.00014625369010195678,
      "loss": 3.0318,
      "step": 154664
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.2190287113189697,
      "learning_rate": 0.0001462501775727402,
      "loss": 2.9891,
      "step": 154665
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.9178773164749146,
      "learning_rate": 0.00014624666507210852,
      "loss": 2.7616,
      "step": 154666
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4753594398498535,
      "learning_rate": 0.0001462431526000626,
      "loss": 3.2467,
      "step": 154667
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.731628894805908,
      "learning_rate": 0.00014623964015660296,
      "loss": 2.8719,
      "step": 154668
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.99640154838562,
      "learning_rate": 0.00014623612774173017,
      "loss": 3.1016,
      "step": 154669
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8228774070739746,
      "learning_rate": 0.0001462326153554451,
      "loss": 3.0835,
      "step": 154670
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.232184648513794,
      "learning_rate": 0.00014622910299774832,
      "loss": 2.8209,
      "step": 154671
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.7908849716186523,
      "learning_rate": 0.0001462255906686403,
      "loss": 2.8199,
      "step": 154672
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6321120262145996,
      "learning_rate": 0.00014622207836812197,
      "loss": 3.0481,
      "step": 154673
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.091923236846924,
      "learning_rate": 0.0001462185660961939,
      "loss": 3.1759,
      "step": 154674
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.140228271484375,
      "learning_rate": 0.00014621505385285652,
      "loss": 2.7881,
      "step": 154675
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.666309118270874,
      "learning_rate": 0.00014621154163811081,
      "loss": 2.9706,
      "step": 154676
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.6731457710266113,
      "learning_rate": 0.00014620802945195717,
      "loss": 2.9431,
      "step": 154677
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.963402271270752,
      "learning_rate": 0.00014620451729439646,
      "loss": 2.8642,
      "step": 154678
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4091861248016357,
      "learning_rate": 0.00014620100516542922,
      "loss": 2.8618,
      "step": 154679
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.153557062149048,
      "learning_rate": 0.00014619749306505613,
      "loss": 2.7797,
      "step": 154680
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.644568681716919,
      "learning_rate": 0.00014619398099327774,
      "loss": 3.0218,
      "step": 154681
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5858654975891113,
      "learning_rate": 0.0001461904689500949,
      "loss": 3.0993,
      "step": 154682
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.06411600112915,
      "learning_rate": 0.00014618695693550802,
      "loss": 2.7572,
      "step": 154683
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8449556827545166,
      "learning_rate": 0.000146183444949518,
      "loss": 3.0413,
      "step": 154684
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4017484188079834,
      "learning_rate": 0.0001461799329921253,
      "loss": 2.8967,
      "step": 154685
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.176599979400635,
      "learning_rate": 0.00014617642106333085,
      "loss": 3.0651,
      "step": 154686
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.845236778259277,
      "learning_rate": 0.00014617290916313485,
      "loss": 3.0199,
      "step": 154687
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.189420223236084,
      "learning_rate": 0.00014616939729153837,
      "loss": 3.1839,
      "step": 154688
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.0311081409454346,
      "learning_rate": 0.0001461658854485418,
      "loss": 3.0668,
      "step": 154689
    },
    {
      "epoch": 2.01,
      "grad_norm": 1.9769641160964966,
      "learning_rate": 0.00014616237363414598,
      "loss": 3.1687,
      "step": 154690
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.570446014404297,
      "learning_rate": 0.00014615886184835139,
      "loss": 2.8583,
      "step": 154691
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.3887317180633545,
      "learning_rate": 0.00014615535009115897,
      "loss": 2.9936,
      "step": 154692
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.879720449447632,
      "learning_rate": 0.00014615183836256895,
      "loss": 2.8826,
      "step": 154693
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7157907485961914,
      "learning_rate": 0.00014614832666258234,
      "loss": 3.0807,
      "step": 154694
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.511838436126709,
      "learning_rate": 0.00014614481499119954,
      "loss": 2.9097,
      "step": 154695
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.956536293029785,
      "learning_rate": 0.00014614130334842144,
      "loss": 3.1631,
      "step": 154696
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.521801471710205,
      "learning_rate": 0.00014613779173424846,
      "loss": 2.619,
      "step": 154697
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4754528999328613,
      "learning_rate": 0.0001461342801486816,
      "loss": 2.8279,
      "step": 154698
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.0579686164855957,
      "learning_rate": 0.00014613076859172104,
      "loss": 3.2291,
      "step": 154699
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1596570014953613,
      "learning_rate": 0.0001461272570633678,
      "loss": 3.1282,
      "step": 154700
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6153922080993652,
      "learning_rate": 0.0001461237455636223,
      "loss": 2.7761,
      "step": 154701
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.020113706588745,
      "learning_rate": 0.0001461202340924854,
      "loss": 2.9435,
      "step": 154702
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3096134662628174,
      "learning_rate": 0.00014611672264995754,
      "loss": 2.7895,
      "step": 154703
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8698225021362305,
      "learning_rate": 0.00014611321123603972,
      "loss": 2.9494,
      "step": 154704
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4269356727600098,
      "learning_rate": 0.00014610969985073214,
      "loss": 2.8963,
      "step": 154705
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3097262382507324,
      "learning_rate": 0.00014610618849403577,
      "loss": 2.8267,
      "step": 154706
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.188537836074829,
      "learning_rate": 0.00014610267716595103,
      "loss": 2.8317,
      "step": 154707
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6803274154663086,
      "learning_rate": 0.00014609916586647888,
      "loss": 3.0041,
      "step": 154708
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5287528038024902,
      "learning_rate": 0.00014609565459561963,
      "loss": 2.6293,
      "step": 154709
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.3934569358825684,
      "learning_rate": 0.00014609214335337437,
      "loss": 2.9807,
      "step": 154710
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2794430255889893,
      "learning_rate": 0.00014608863213974322,
      "loss": 3.0442,
      "step": 154711
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.588284969329834,
      "learning_rate": 0.00014608512095472725,
      "loss": 3.1367,
      "step": 154712
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2158119678497314,
      "learning_rate": 0.00014608160979832682,
      "loss": 2.9129,
      "step": 154713
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.301001787185669,
      "learning_rate": 0.00014607809867054285,
      "loss": 2.9091,
      "step": 154714
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.008259296417236,
      "learning_rate": 0.00014607458757137577,
      "loss": 2.7861,
      "step": 154715
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.7642807960510254,
      "learning_rate": 0.00014607107650082654,
      "loss": 3.0369,
      "step": 154716
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.1048171520233154,
      "learning_rate": 0.0001460675654588954,
      "loss": 2.8648,
      "step": 154717
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.5481324195861816,
      "learning_rate": 0.00014606405444558324,
      "loss": 3.065,
      "step": 154718
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.7167704105377197,
      "learning_rate": 0.00014606054346089065,
      "loss": 2.9377,
      "step": 154719
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.1818671226501465,
      "learning_rate": 0.00014605703250481836,
      "loss": 2.742,
      "step": 154720
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.6106667518615723,
      "learning_rate": 0.0001460535215773669,
      "loss": 2.8892,
      "step": 154721
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.394500732421875,
      "learning_rate": 0.0001460500106785372,
      "loss": 3.0575,
      "step": 154722
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.969318151473999,
      "learning_rate": 0.0001460464998083295,
      "loss": 3.0637,
      "step": 154723
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.2906789779663086,
      "learning_rate": 0.0001460429889667448,
      "loss": 3.0012,
      "step": 154724
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4825711250305176,
      "learning_rate": 0.00014603947815378348,
      "loss": 3.0756,
      "step": 154725
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.1121153831481934,
      "learning_rate": 0.00014603596736944644,
      "loss": 2.7766,
      "step": 154726
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.197042226791382,
      "learning_rate": 0.0001460324566137341,
      "loss": 2.8879,
      "step": 154727
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.2407219409942627,
      "learning_rate": 0.00014602894588664744,
      "loss": 2.7144,
      "step": 154728
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8529608249664307,
      "learning_rate": 0.00014602543518818671,
      "loss": 2.9394,
      "step": 154729
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8807578086853027,
      "learning_rate": 0.00014602192451835288,
      "loss": 3.1087,
      "step": 154730
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.3975625038146973,
      "learning_rate": 0.00014601841387714632,
      "loss": 2.8815,
      "step": 154731
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.330274820327759,
      "learning_rate": 0.000146014903264568,
      "loss": 2.9645,
      "step": 154732
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.241445541381836,
      "learning_rate": 0.00014601139268061828,
      "loss": 2.8677,
      "step": 154733
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4570798873901367,
      "learning_rate": 0.00014600788212529818,
      "loss": 2.9717,
      "step": 154734
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.748807430267334,
      "learning_rate": 0.0001460043715986079,
      "loss": 2.7276,
      "step": 154735
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.259661912918091,
      "learning_rate": 0.00014600086110054842,
      "loss": 2.7598,
      "step": 154736
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.5723209381103516,
      "learning_rate": 0.00014599735063112017,
      "loss": 3.0011,
      "step": 154737
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.179278612136841,
      "learning_rate": 0.00014599384019032408,
      "loss": 3.0491,
      "step": 154738
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.180664300918579,
      "learning_rate": 0.00014599032977816048,
      "loss": 2.889,
      "step": 154739
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.2368721961975098,
      "learning_rate": 0.00014598681939463032,
      "loss": 2.8703,
      "step": 154740
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.601879835128784,
      "learning_rate": 0.0001459833090397341,
      "loss": 3.1631,
      "step": 154741
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.440761089324951,
      "learning_rate": 0.0001459797987134725,
      "loss": 2.9496,
      "step": 154742
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.206418037414551,
      "learning_rate": 0.00014597628841584606,
      "loss": 2.9923,
      "step": 154743
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.995903253555298,
      "learning_rate": 0.0001459727781468556,
      "loss": 3.0832,
      "step": 154744
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.684868335723877,
      "learning_rate": 0.00014596926790650164,
      "loss": 3.0467,
      "step": 154745
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4636754989624023,
      "learning_rate": 0.00014596575769478502,
      "loss": 2.9707,
      "step": 154746
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.807934284210205,
      "learning_rate": 0.00014596224751170625,
      "loss": 2.6897,
      "step": 154747
    },
    {
      "epoch": 2.01,
      "grad_norm": 4.042628288269043,
      "learning_rate": 0.0001459587373572659,
      "loss": 3.0068,
      "step": 154748
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.2467041015625,
      "learning_rate": 0.00014595522723146484,
      "loss": 2.9019,
      "step": 154749
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.5259270668029785,
      "learning_rate": 0.0001459517171343036,
      "loss": 3.1611,
      "step": 154750
    },
    {
      "epoch": 2.01,
      "grad_norm": 3.056072473526001,
      "learning_rate": 0.00014594820706578275,
      "loss": 2.683,
      "step": 154751
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.8263635635375977,
      "learning_rate": 0.00014594469702590315,
      "loss": 2.979,
      "step": 154752
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.4265308380126953,
      "learning_rate": 0.00014594118701466533,
      "loss": 2.7906,
      "step": 154753
    },
    {
      "epoch": 2.01,
      "grad_norm": 2.449984073638916,
      "learning_rate": 0.00014593767703206982,
      "loss": 2.8732,
      "step": 154754
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.365659713745117,
      "learning_rate": 0.00014593416707811752,
      "loss": 2.8496,
      "step": 154755
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.212623119354248,
      "learning_rate": 0.000145930657152809,
      "loss": 3.0603,
      "step": 154756
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4479987621307373,
      "learning_rate": 0.00014592714725614477,
      "loss": 2.7855,
      "step": 154757
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3685576915740967,
      "learning_rate": 0.00014592363738812566,
      "loss": 3.0127,
      "step": 154758
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.165574789047241,
      "learning_rate": 0.0001459201275487523,
      "loss": 3.0415,
      "step": 154759
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.404529094696045,
      "learning_rate": 0.00014591661773802515,
      "loss": 2.5153,
      "step": 154760
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.343579053878784,
      "learning_rate": 0.00014591310795594514,
      "loss": 3.026,
      "step": 154761
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.3856875896453857,
      "learning_rate": 0.0001459095982025127,
      "loss": 2.8869,
      "step": 154762
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.234875440597534,
      "learning_rate": 0.00014590608847772862,
      "loss": 3.0042,
      "step": 154763
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4892494678497314,
      "learning_rate": 0.00014590257878159356,
      "loss": 3.2642,
      "step": 154764
    },
    {
      "epoch": 2.02,
      "grad_norm": 1.9499175548553467,
      "learning_rate": 0.0001458990691141081,
      "loss": 2.7325,
      "step": 154765
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.142277479171753,
      "learning_rate": 0.0001458955594752728,
      "loss": 2.9129,
      "step": 154766
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2260262966156006,
      "learning_rate": 0.00014589204986508854,
      "loss": 3.1432,
      "step": 154767
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.755373954772949,
      "learning_rate": 0.00014588854028355577,
      "loss": 2.7778,
      "step": 154768
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1568989753723145,
      "learning_rate": 0.0001458850307306753,
      "loss": 3.1808,
      "step": 154769
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.600701332092285,
      "learning_rate": 0.00014588152120644765,
      "loss": 3.0589,
      "step": 154770
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.315551996231079,
      "learning_rate": 0.00014587801171087373,
      "loss": 2.609,
      "step": 154771
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.780489206314087,
      "learning_rate": 0.00014587450224395375,
      "loss": 3.1688,
      "step": 154772
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.268524408340454,
      "learning_rate": 0.00014587099280568875,
      "loss": 3.1092,
      "step": 154773
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2410635948181152,
      "learning_rate": 0.00014586748339607912,
      "loss": 2.8254,
      "step": 154774
    },
    {
      "epoch": 2.02,
      "grad_norm": 1.966591238975525,
      "learning_rate": 0.00014586397401512575,
      "loss": 2.8191,
      "step": 154775
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.631244659423828,
      "learning_rate": 0.00014586046466282906,
      "loss": 3.1037,
      "step": 154776
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.57346510887146,
      "learning_rate": 0.00014585695533919003,
      "loss": 3.0504,
      "step": 154777
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.728461265563965,
      "learning_rate": 0.0001458534460442089,
      "loss": 3.0841,
      "step": 154778
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2760894298553467,
      "learning_rate": 0.00014584993677788663,
      "loss": 3.0574,
      "step": 154779
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.786550521850586,
      "learning_rate": 0.00014584642754022367,
      "loss": 2.8596,
      "step": 154780
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6081621646881104,
      "learning_rate": 0.00014584291833122086,
      "loss": 2.9574,
      "step": 154781
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.119461536407471,
      "learning_rate": 0.00014583940915087866,
      "loss": 2.9332,
      "step": 154782
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.258565902709961,
      "learning_rate": 0.00014583589999919805,
      "loss": 2.9886,
      "step": 154783
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.563140869140625,
      "learning_rate": 0.00014583239087617922,
      "loss": 2.9285,
      "step": 154784
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.17690110206604,
      "learning_rate": 0.00014582888178182315,
      "loss": 2.9642,
      "step": 154785
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6553590297698975,
      "learning_rate": 0.00014582537271613035,
      "loss": 2.8836,
      "step": 154786
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.023831367492676,
      "learning_rate": 0.0001458218636791016,
      "loss": 3.122,
      "step": 154787
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.813521385192871,
      "learning_rate": 0.00014581835467073734,
      "loss": 2.9764,
      "step": 154788
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.066470146179199,
      "learning_rate": 0.0001458148456910386,
      "loss": 2.9564,
      "step": 154789
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9794540405273438,
      "learning_rate": 0.0001458113367400056,
      "loss": 2.6776,
      "step": 154790
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.280585527420044,
      "learning_rate": 0.00014580782781763927,
      "loss": 2.7982,
      "step": 154791
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7110424041748047,
      "learning_rate": 0.00014580431892394002,
      "loss": 2.6329,
      "step": 154792
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.763962984085083,
      "learning_rate": 0.00014580081005890883,
      "loss": 2.7011,
      "step": 154793
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8601768016815186,
      "learning_rate": 0.00014579730122254607,
      "loss": 3.1539,
      "step": 154794
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.879298686981201,
      "learning_rate": 0.0001457937924148527,
      "loss": 2.9287,
      "step": 154795
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.278862237930298,
      "learning_rate": 0.00014579028363582893,
      "loss": 2.8033,
      "step": 154796
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.196068048477173,
      "learning_rate": 0.00014578677488547582,
      "loss": 3.1808,
      "step": 154797
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1111466884613037,
      "learning_rate": 0.00014578326616379372,
      "loss": 3.155,
      "step": 154798
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0010430812835693,
      "learning_rate": 0.00014577975747078354,
      "loss": 2.9757,
      "step": 154799
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.5678281784057617,
      "learning_rate": 0.0001457762488064457,
      "loss": 2.9586,
      "step": 154800
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.83754301071167,
      "learning_rate": 0.0001457727401707812,
      "loss": 2.9379,
      "step": 154801
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6045219898223877,
      "learning_rate": 0.00014576923156379022,
      "loss": 3.0142,
      "step": 154802
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7685129642486572,
      "learning_rate": 0.00014576572298547377,
      "loss": 2.9414,
      "step": 154803
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9049179553985596,
      "learning_rate": 0.00014576221443583225,
      "loss": 3.0122,
      "step": 154804
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.172701597213745,
      "learning_rate": 0.0001457587059148666,
      "loss": 2.9465,
      "step": 154805
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.230402946472168,
      "learning_rate": 0.00014575519742257716,
      "loss": 3.1497,
      "step": 154806
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.13850736618042,
      "learning_rate": 0.00014575168895896492,
      "loss": 3.1107,
      "step": 154807
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3857452869415283,
      "learning_rate": 0.00014574818052403026,
      "loss": 2.9441,
      "step": 154808
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.755283832550049,
      "learning_rate": 0.00014574467211777399,
      "loss": 2.7693,
      "step": 154809
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0445261001586914,
      "learning_rate": 0.00014574116374019657,
      "loss": 2.883,
      "step": 154810
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5356431007385254,
      "learning_rate": 0.0001457376553912989,
      "loss": 2.8785,
      "step": 154811
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.934713840484619,
      "learning_rate": 0.0001457341470710814,
      "loss": 2.9538,
      "step": 154812
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.378462314605713,
      "learning_rate": 0.00014573063877954491,
      "loss": 2.9669,
      "step": 154813
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.323681354522705,
      "learning_rate": 0.00014572713051669005,
      "loss": 2.9581,
      "step": 154814
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.658899784088135,
      "learning_rate": 0.0001457236222825174,
      "loss": 2.7647,
      "step": 154815
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4147064685821533,
      "learning_rate": 0.00014572011407702756,
      "loss": 2.9295,
      "step": 154816
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3525655269622803,
      "learning_rate": 0.00014571660590022132,
      "loss": 2.7126,
      "step": 154817
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.54516863822937,
      "learning_rate": 0.00014571309775209923,
      "loss": 3.0233,
      "step": 154818
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.921356678009033,
      "learning_rate": 0.00014570958963266206,
      "loss": 2.8367,
      "step": 154819
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5333738327026367,
      "learning_rate": 0.00014570608154191038,
      "loss": 3.0458,
      "step": 154820
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4125285148620605,
      "learning_rate": 0.00014570257347984488,
      "loss": 2.8947,
      "step": 154821
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4403467178344727,
      "learning_rate": 0.00014569906544646607,
      "loss": 2.9684,
      "step": 154822
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9411776065826416,
      "learning_rate": 0.00014569555744177484,
      "loss": 3.031,
      "step": 154823
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7717714309692383,
      "learning_rate": 0.00014569204946577157,
      "loss": 2.9219,
      "step": 154824
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3885366916656494,
      "learning_rate": 0.00014568854151845718,
      "loss": 3.0029,
      "step": 154825
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.679609537124634,
      "learning_rate": 0.00014568503359983222,
      "loss": 2.8912,
      "step": 154826
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2095754146575928,
      "learning_rate": 0.00014568152570989732,
      "loss": 2.9223,
      "step": 154827
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6790919303894043,
      "learning_rate": 0.000145678017848653,
      "loss": 3.0613,
      "step": 154828
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.829481363296509,
      "learning_rate": 0.00014567451001610022,
      "loss": 3.0243,
      "step": 154829
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.5288796424865723,
      "learning_rate": 0.00014567100221223928,
      "loss": 2.7686,
      "step": 154830
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8736166954040527,
      "learning_rate": 0.00014566749443707116,
      "loss": 2.7668,
      "step": 154831
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.453073501586914,
      "learning_rate": 0.0001456639866905964,
      "loss": 2.8594,
      "step": 154832
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4551525115966797,
      "learning_rate": 0.00014566047897281544,
      "loss": 2.9227,
      "step": 154833
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6052048206329346,
      "learning_rate": 0.00014565697128372928,
      "loss": 2.9957,
      "step": 154834
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.436009407043457,
      "learning_rate": 0.00014565346362333838,
      "loss": 2.9461,
      "step": 154835
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0913264751434326,
      "learning_rate": 0.00014564995599164327,
      "loss": 2.8855,
      "step": 154836
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.8887336254119873,
      "learning_rate": 0.00014564644838864493,
      "loss": 3.063,
      "step": 154837
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2174718379974365,
      "learning_rate": 0.00014564294081434377,
      "loss": 2.9878,
      "step": 154838
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.64764404296875,
      "learning_rate": 0.00014563943326874042,
      "loss": 2.916,
      "step": 154839
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2148003578186035,
      "learning_rate": 0.0001456359257518357,
      "loss": 2.8607,
      "step": 154840
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5191378593444824,
      "learning_rate": 0.00014563241826363017,
      "loss": 2.9304,
      "step": 154841
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.273970603942871,
      "learning_rate": 0.00014562891080412442,
      "loss": 3.016,
      "step": 154842
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.422473669052124,
      "learning_rate": 0.00014562540337331926,
      "loss": 3.2028,
      "step": 154843
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.550269842147827,
      "learning_rate": 0.00014562189597121524,
      "loss": 3.1318,
      "step": 154844
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.649210214614868,
      "learning_rate": 0.0001456183885978129,
      "loss": 2.8016,
      "step": 154845
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.620698928833008,
      "learning_rate": 0.00014561488125311315,
      "loss": 2.8332,
      "step": 154846
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.41416335105896,
      "learning_rate": 0.00014561137393711638,
      "loss": 2.807,
      "step": 154847
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1385087966918945,
      "learning_rate": 0.00014560786664982348,
      "loss": 2.8567,
      "step": 154848
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.8201193809509277,
      "learning_rate": 0.00014560435939123503,
      "loss": 3.0138,
      "step": 154849
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.256436586380005,
      "learning_rate": 0.00014560085216135162,
      "loss": 3.262,
      "step": 154850
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7520995140075684,
      "learning_rate": 0.0001455973449601738,
      "loss": 3.181,
      "step": 154851
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2841532230377197,
      "learning_rate": 0.00014559383778770248,
      "loss": 3.0465,
      "step": 154852
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.982880115509033,
      "learning_rate": 0.00014559033064393806,
      "loss": 3.1321,
      "step": 154853
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.1546478271484375,
      "learning_rate": 0.00014558682352888144,
      "loss": 2.9132,
      "step": 154854
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.1201295852661133,
      "learning_rate": 0.0001455833164425331,
      "loss": 3.1507,
      "step": 154855
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7887206077575684,
      "learning_rate": 0.00014557980938489378,
      "loss": 2.7689,
      "step": 154856
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.092500686645508,
      "learning_rate": 0.00014557630235596396,
      "loss": 2.8637,
      "step": 154857
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8359947204589844,
      "learning_rate": 0.00014557279535574452,
      "loss": 3.0458,
      "step": 154858
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.2811203002929688,
      "learning_rate": 0.00014556928838423594,
      "loss": 2.6995,
      "step": 154859
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.1195735931396484,
      "learning_rate": 0.00014556578144143904,
      "loss": 2.8106,
      "step": 154860
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.68839168548584,
      "learning_rate": 0.00014556227452735423,
      "loss": 3.0907,
      "step": 154861
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6736905574798584,
      "learning_rate": 0.00014555876764198256,
      "loss": 2.8309,
      "step": 154862
    },
    {
      "epoch": 2.02,
      "grad_norm": 5.196897029876709,
      "learning_rate": 0.0001455552607853242,
      "loss": 3.0682,
      "step": 154863
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7990689277648926,
      "learning_rate": 0.00014555175395738015,
      "loss": 2.8809,
      "step": 154864
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.07069730758667,
      "learning_rate": 0.00014554824715815083,
      "loss": 2.9219,
      "step": 154865
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1703109741210938,
      "learning_rate": 0.00014554474038763713,
      "loss": 2.895,
      "step": 154866
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.095677614212036,
      "learning_rate": 0.00014554123364583947,
      "loss": 3.0039,
      "step": 154867
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.417771339416504,
      "learning_rate": 0.0001455377269327588,
      "loss": 2.9472,
      "step": 154868
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.5630178451538086,
      "learning_rate": 0.0001455342202483954,
      "loss": 3.0101,
      "step": 154869
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.4208035469055176,
      "learning_rate": 0.00014553071359275018,
      "loss": 2.872,
      "step": 154870
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0972654819488525,
      "learning_rate": 0.0001455272069658236,
      "loss": 2.9395,
      "step": 154871
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.931072473526001,
      "learning_rate": 0.00014552370036761657,
      "loss": 2.9939,
      "step": 154872
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.721975564956665,
      "learning_rate": 0.00014552019379812948,
      "loss": 2.822,
      "step": 154873
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6114227771759033,
      "learning_rate": 0.0001455166872573632,
      "loss": 2.8662,
      "step": 154874
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.580871343612671,
      "learning_rate": 0.00014551318074531832,
      "loss": 2.991,
      "step": 154875
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.390998601913452,
      "learning_rate": 0.0001455096742619954,
      "loss": 3.0736,
      "step": 154876
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0587575435638428,
      "learning_rate": 0.0001455061678073951,
      "loss": 3.0044,
      "step": 154877
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.7619574069976807,
      "learning_rate": 0.00014550266138151817,
      "loss": 2.9677,
      "step": 154878
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4445011615753174,
      "learning_rate": 0.00014549915498436516,
      "loss": 2.8721,
      "step": 154879
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8742971420288086,
      "learning_rate": 0.00014549564861593685,
      "loss": 3.0589,
      "step": 154880
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4084293842315674,
      "learning_rate": 0.00014549214227623379,
      "loss": 3.051,
      "step": 154881
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.144787549972534,
      "learning_rate": 0.0001454886359652567,
      "loss": 3.1522,
      "step": 154882
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.749340057373047,
      "learning_rate": 0.00014548512968300605,
      "loss": 2.989,
      "step": 154883
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.38019061088562,
      "learning_rate": 0.00014548162342948277,
      "loss": 3.0308,
      "step": 154884
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5412490367889404,
      "learning_rate": 0.00014547811720468727,
      "loss": 2.958,
      "step": 154885
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2029170989990234,
      "learning_rate": 0.0001454746110086204,
      "loss": 2.9909,
      "step": 154886
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7565407752990723,
      "learning_rate": 0.00014547110484128267,
      "loss": 3.0404,
      "step": 154887
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.104248285293579,
      "learning_rate": 0.00014546759870267485,
      "loss": 2.9485,
      "step": 154888
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1967406272888184,
      "learning_rate": 0.00014546409259279738,
      "loss": 3.3188,
      "step": 154889
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.6876296997070312,
      "learning_rate": 0.00014546058651165113,
      "loss": 2.7554,
      "step": 154890
    },
    {
      "epoch": 2.02,
      "grad_norm": 1.9808199405670166,
      "learning_rate": 0.00014545708045923662,
      "loss": 3.0841,
      "step": 154891
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.352569580078125,
      "learning_rate": 0.00014545357443555463,
      "loss": 2.8958,
      "step": 154892
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.305980920791626,
      "learning_rate": 0.00014545006844060573,
      "loss": 2.9759,
      "step": 154893
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3418097496032715,
      "learning_rate": 0.00014544656247439063,
      "loss": 3.0425,
      "step": 154894
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.481947898864746,
      "learning_rate": 0.0001454430565369098,
      "loss": 2.9577,
      "step": 154895
    },
    {
      "epoch": 2.02,
      "grad_norm": 1.9104676246643066,
      "learning_rate": 0.0001454395506281641,
      "loss": 3.0796,
      "step": 154896
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.83979868888855,
      "learning_rate": 0.000145436044748154,
      "loss": 2.9281,
      "step": 154897
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8651058673858643,
      "learning_rate": 0.00014543253889688043,
      "loss": 2.9763,
      "step": 154898
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.2862305641174316,
      "learning_rate": 0.0001454290330743438,
      "loss": 2.7109,
      "step": 154899
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.437765598297119,
      "learning_rate": 0.00014542552728054485,
      "loss": 3.0439,
      "step": 154900
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7241597175598145,
      "learning_rate": 0.00014542202151548414,
      "loss": 2.9831,
      "step": 154901
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6274240016937256,
      "learning_rate": 0.00014541851577916246,
      "loss": 2.9488,
      "step": 154902
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.34993314743042,
      "learning_rate": 0.0001454150100715803,
      "loss": 2.9506,
      "step": 154903
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5983707904815674,
      "learning_rate": 0.0001454115043927385,
      "loss": 2.8472,
      "step": 154904
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.461803674697876,
      "learning_rate": 0.00014540799874263762,
      "loss": 2.9572,
      "step": 154905
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5673580169677734,
      "learning_rate": 0.00014540449312127834,
      "loss": 2.7098,
      "step": 154906
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5846993923187256,
      "learning_rate": 0.00014540098752866118,
      "loss": 2.937,
      "step": 154907
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.0313963890075684,
      "learning_rate": 0.00014539748196478695,
      "loss": 2.8822,
      "step": 154908
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.7091853618621826,
      "learning_rate": 0.0001453939764296562,
      "loss": 2.8695,
      "step": 154909
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.977698564529419,
      "learning_rate": 0.0001453904709232697,
      "loss": 2.6486,
      "step": 154910
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5307841300964355,
      "learning_rate": 0.00014538696544562805,
      "loss": 2.9499,
      "step": 154911
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.9539897441864014,
      "learning_rate": 0.00014538345999673185,
      "loss": 3.0069,
      "step": 154912
    },
    {
      "epoch": 2.02,
      "grad_norm": 8.804487228393555,
      "learning_rate": 0.0001453799545765817,
      "loss": 2.7811,
      "step": 154913
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.196960687637329,
      "learning_rate": 0.00014537644918517843,
      "loss": 2.8638,
      "step": 154914
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.17509126663208,
      "learning_rate": 0.00014537294382252247,
      "loss": 3.0935,
      "step": 154915
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.329909086227417,
      "learning_rate": 0.00014536943848861473,
      "loss": 2.7433,
      "step": 154916
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.7416834831237793,
      "learning_rate": 0.00014536593318345578,
      "loss": 2.8668,
      "step": 154917
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3481788635253906,
      "learning_rate": 0.00014536242790704618,
      "loss": 2.9725,
      "step": 154918
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4727530479431152,
      "learning_rate": 0.00014535892265938647,
      "loss": 3.0764,
      "step": 154919
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.551685333251953,
      "learning_rate": 0.00014535541744047762,
      "loss": 2.9875,
      "step": 154920
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.710331916809082,
      "learning_rate": 0.00014535191225032,
      "loss": 2.962,
      "step": 154921
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.438502550125122,
      "learning_rate": 0.00014534840708891448,
      "loss": 2.9272,
      "step": 154922
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.2482218742370605,
      "learning_rate": 0.0001453449019562616,
      "loss": 3.0641,
      "step": 154923
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.052530288696289,
      "learning_rate": 0.0001453413968523619,
      "loss": 3.2714,
      "step": 154924
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4555771350860596,
      "learning_rate": 0.00014533789177721633,
      "loss": 2.766,
      "step": 154925
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8808765411376953,
      "learning_rate": 0.0001453343867308253,
      "loss": 2.8589,
      "step": 154926
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.028954029083252,
      "learning_rate": 0.00014533088171318943,
      "loss": 3.0761,
      "step": 154927
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9465951919555664,
      "learning_rate": 0.0001453273767243096,
      "loss": 2.6648,
      "step": 154928
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9166407585144043,
      "learning_rate": 0.00014532387176418633,
      "loss": 2.8376,
      "step": 154929
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0004754066467285,
      "learning_rate": 0.0001453203668328201,
      "loss": 2.9241,
      "step": 154930
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.133543014526367,
      "learning_rate": 0.00014531686193021195,
      "loss": 2.9723,
      "step": 154931
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3521010875701904,
      "learning_rate": 0.00014531335705636215,
      "loss": 2.7536,
      "step": 154932
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.530041456222534,
      "learning_rate": 0.00014530985221127162,
      "loss": 3.0944,
      "step": 154933
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.507967948913574,
      "learning_rate": 0.00014530634739494088,
      "loss": 3.018,
      "step": 154934
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.023184299468994,
      "learning_rate": 0.00014530284260737066,
      "loss": 2.8402,
      "step": 154935
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5901153087615967,
      "learning_rate": 0.00014529933784856143,
      "loss": 2.8959,
      "step": 154936
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.386155366897583,
      "learning_rate": 0.00014529583311851413,
      "loss": 2.9571,
      "step": 154937
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5405972003936768,
      "learning_rate": 0.00014529232841722906,
      "loss": 3.137,
      "step": 154938
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9678955078125,
      "learning_rate": 0.00014528882374470725,
      "loss": 2.8321,
      "step": 154939
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.868242025375366,
      "learning_rate": 0.00014528531910094913,
      "loss": 2.9863,
      "step": 154940
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4042351245880127,
      "learning_rate": 0.0001452818144859553,
      "loss": 3.0196,
      "step": 154941
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1460580825805664,
      "learning_rate": 0.00014527830989972658,
      "loss": 3.0966,
      "step": 154942
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.548506736755371,
      "learning_rate": 0.00014527480534226359,
      "loss": 3.1218,
      "step": 154943
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.010420799255371,
      "learning_rate": 0.00014527130081356678,
      "loss": 2.753,
      "step": 154944
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9562127590179443,
      "learning_rate": 0.00014526779631363708,
      "loss": 2.8922,
      "step": 154945
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3650670051574707,
      "learning_rate": 0.0001452642918424749,
      "loss": 2.8468,
      "step": 154946
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1996989250183105,
      "learning_rate": 0.00014526078740008115,
      "loss": 2.9912,
      "step": 154947
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2226569652557373,
      "learning_rate": 0.0001452572829864563,
      "loss": 2.9902,
      "step": 154948
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2344412803649902,
      "learning_rate": 0.00014525377860160105,
      "loss": 3.2548,
      "step": 154949
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4950878620147705,
      "learning_rate": 0.00014525027424551598,
      "loss": 2.781,
      "step": 154950
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0732879638671875,
      "learning_rate": 0.0001452467699182019,
      "loss": 3.0122,
      "step": 154951
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3345530033111572,
      "learning_rate": 0.0001452432656196592,
      "loss": 2.9984,
      "step": 154952
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.241098165512085,
      "learning_rate": 0.00014523976134988884,
      "loss": 3.263,
      "step": 154953
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6841821670532227,
      "learning_rate": 0.00014523625710889134,
      "loss": 3.0406,
      "step": 154954
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6849496364593506,
      "learning_rate": 0.00014523275289666734,
      "loss": 3.0499,
      "step": 154955
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.070662498474121,
      "learning_rate": 0.00014522924871321737,
      "loss": 3.0536,
      "step": 154956
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9511046409606934,
      "learning_rate": 0.00014522574455854234,
      "loss": 2.8069,
      "step": 154957
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.639760732650757,
      "learning_rate": 0.00014522224043264263,
      "loss": 2.905,
      "step": 154958
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.9913861751556396,
      "learning_rate": 0.00014521873633551913,
      "loss": 2.8989,
      "step": 154959
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.0911264419555664,
      "learning_rate": 0.0001452152322671724,
      "loss": 3.0763,
      "step": 154960
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.0814435482025146,
      "learning_rate": 0.0001452117282276031,
      "loss": 3.0295,
      "step": 154961
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.2267773151397705,
      "learning_rate": 0.00014520822421681175,
      "loss": 3.0791,
      "step": 154962
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1983330249786377,
      "learning_rate": 0.0001452047202347992,
      "loss": 2.8944,
      "step": 154963
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0444729328155518,
      "learning_rate": 0.00014520121628156592,
      "loss": 2.9129,
      "step": 154964
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.121260166168213,
      "learning_rate": 0.00014519771235711276,
      "loss": 3.0491,
      "step": 154965
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.843397617340088,
      "learning_rate": 0.00014519420846144024,
      "loss": 3.0347,
      "step": 154966
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.529677391052246,
      "learning_rate": 0.00014519070459454908,
      "loss": 2.8128,
      "step": 154967
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.230201482772827,
      "learning_rate": 0.00014518720075643978,
      "loss": 2.7881,
      "step": 154968
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.009425163269043,
      "learning_rate": 0.0001451836969471132,
      "loss": 3.0123,
      "step": 154969
    },
    {
      "epoch": 2.02,
      "grad_norm": 1.9651093482971191,
      "learning_rate": 0.00014518019316656975,
      "loss": 2.9725,
      "step": 154970
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3376216888427734,
      "learning_rate": 0.0001451766894148104,
      "loss": 2.6296,
      "step": 154971
    },
    {
      "epoch": 2.02,
      "grad_norm": 1.9961121082305908,
      "learning_rate": 0.0001451731856918356,
      "loss": 2.8344,
      "step": 154972
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7563693523406982,
      "learning_rate": 0.000145169681997646,
      "loss": 2.7844,
      "step": 154973
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8959412574768066,
      "learning_rate": 0.0001451661783322422,
      "loss": 3.0439,
      "step": 154974
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.01177716255188,
      "learning_rate": 0.000145162674695625,
      "loss": 2.9453,
      "step": 154975
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6804728507995605,
      "learning_rate": 0.0001451591710877949,
      "loss": 2.8956,
      "step": 154976
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6178736686706543,
      "learning_rate": 0.00014515566750875276,
      "loss": 3.0731,
      "step": 154977
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.4862475395202637,
      "learning_rate": 0.00014515216395849908,
      "loss": 2.7003,
      "step": 154978
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8331029415130615,
      "learning_rate": 0.00014514866043703454,
      "loss": 3.1638,
      "step": 154979
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8343076705932617,
      "learning_rate": 0.0001451451569443597,
      "loss": 3.0376,
      "step": 154980
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2990756034851074,
      "learning_rate": 0.00014514165348047535,
      "loss": 3.2019,
      "step": 154981
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2029547691345215,
      "learning_rate": 0.00014513815004538202,
      "loss": 3.0147,
      "step": 154982
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3059661388397217,
      "learning_rate": 0.00014513464663908058,
      "loss": 3.0788,
      "step": 154983
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.330911159515381,
      "learning_rate": 0.00014513114326157147,
      "loss": 2.9614,
      "step": 154984
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5231990814208984,
      "learning_rate": 0.00014512763991285545,
      "loss": 2.8079,
      "step": 154985
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3819739818573,
      "learning_rate": 0.00014512413659293296,
      "loss": 3.1461,
      "step": 154986
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.131765604019165,
      "learning_rate": 0.00014512063330180495,
      "loss": 2.8581,
      "step": 154987
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2237894535064697,
      "learning_rate": 0.00014511713003947184,
      "loss": 2.9997,
      "step": 154988
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.00314998626709,
      "learning_rate": 0.0001451136268059345,
      "loss": 3.0704,
      "step": 154989
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3767876625061035,
      "learning_rate": 0.00014511012360119343,
      "loss": 3.0318,
      "step": 154990
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8228588104248047,
      "learning_rate": 0.00014510662042524933,
      "loss": 3.0644,
      "step": 154991
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5722405910491943,
      "learning_rate": 0.0001451031172781027,
      "loss": 2.7837,
      "step": 154992
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5277140140533447,
      "learning_rate": 0.00014509961415975448,
      "loss": 2.8426,
      "step": 154993
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6124658584594727,
      "learning_rate": 0.00014509611107020503,
      "loss": 3.0388,
      "step": 154994
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.40324068069458,
      "learning_rate": 0.00014509260800945521,
      "loss": 2.9409,
      "step": 154995
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9689879417419434,
      "learning_rate": 0.00014508910497750568,
      "loss": 2.955,
      "step": 154996
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5950722694396973,
      "learning_rate": 0.00014508560197435696,
      "loss": 2.8196,
      "step": 154997
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.137714385986328,
      "learning_rate": 0.00014508209900000962,
      "loss": 3.0547,
      "step": 154998
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.428847074508667,
      "learning_rate": 0.0001450785960544646,
      "loss": 2.8958,
      "step": 154999
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9948058128356934,
      "learning_rate": 0.00014507509313772227,
      "loss": 2.6052,
      "step": 155000
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2315356731414795,
      "learning_rate": 0.0001450715902497835,
      "loss": 3.0916,
      "step": 155001
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.024783134460449,
      "learning_rate": 0.00014506808739064882,
      "loss": 2.9797,
      "step": 155002
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8139712810516357,
      "learning_rate": 0.00014506458456031894,
      "loss": 2.9421,
      "step": 155003
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.92264986038208,
      "learning_rate": 0.00014506108175879438,
      "loss": 3.1307,
      "step": 155004
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.460517406463623,
      "learning_rate": 0.000145057578986076,
      "loss": 2.6528,
      "step": 155005
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4572060108184814,
      "learning_rate": 0.0001450540762421642,
      "loss": 2.92,
      "step": 155006
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.887171745300293,
      "learning_rate": 0.0001450505735270599,
      "loss": 3.0285,
      "step": 155007
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7468795776367188,
      "learning_rate": 0.0001450470708407635,
      "loss": 2.9993,
      "step": 155008
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.496098518371582,
      "learning_rate": 0.00014504356818327593,
      "loss": 2.8933,
      "step": 155009
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.193535089492798,
      "learning_rate": 0.00014504006555459765,
      "loss": 2.8087,
      "step": 155010
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.742546558380127,
      "learning_rate": 0.00014503656295472935,
      "loss": 2.9928,
      "step": 155011
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.723149299621582,
      "learning_rate": 0.00014503306038367154,
      "loss": 2.7502,
      "step": 155012
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.7240841388702393,
      "learning_rate": 0.00014502955784142515,
      "loss": 3.0215,
      "step": 155013
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.100003480911255,
      "learning_rate": 0.0001450260553279906,
      "loss": 3.1184,
      "step": 155014
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2597904205322266,
      "learning_rate": 0.0001450225528433687,
      "loss": 2.9264,
      "step": 155015
    },
    {
      "epoch": 2.02,
      "grad_norm": 5.564370155334473,
      "learning_rate": 0.00014501905038756007,
      "loss": 2.8122,
      "step": 155016
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7728371620178223,
      "learning_rate": 0.0001450155479605653,
      "loss": 2.7526,
      "step": 155017
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.3432774543762207,
      "learning_rate": 0.00014501204556238495,
      "loss": 2.8937,
      "step": 155018
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4105641841888428,
      "learning_rate": 0.00014500854319301993,
      "loss": 2.7825,
      "step": 155019
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.4537367820739746,
      "learning_rate": 0.00014500504085247057,
      "loss": 2.9426,
      "step": 155020
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.2655136585235596,
      "learning_rate": 0.00014500153854073787,
      "loss": 3.0447,
      "step": 155021
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5860297679901123,
      "learning_rate": 0.00014499803625782232,
      "loss": 2.7951,
      "step": 155022
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.629488229751587,
      "learning_rate": 0.0001449945340037244,
      "loss": 2.9643,
      "step": 155023
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0317656993865967,
      "learning_rate": 0.00014499103177844505,
      "loss": 3.1443,
      "step": 155024
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.2685062885284424,
      "learning_rate": 0.0001449875295819848,
      "loss": 2.7122,
      "step": 155025
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3284218311309814,
      "learning_rate": 0.0001449840274143442,
      "loss": 3.0357,
      "step": 155026
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2287611961364746,
      "learning_rate": 0.0001449805252755241,
      "loss": 2.9686,
      "step": 155027
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3993139266967773,
      "learning_rate": 0.00014497702316552502,
      "loss": 2.8623,
      "step": 155028
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6143364906311035,
      "learning_rate": 0.00014497352108434753,
      "loss": 2.9728,
      "step": 155029
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7554702758789062,
      "learning_rate": 0.00014497001903199252,
      "loss": 3.1346,
      "step": 155030
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0213794708251953,
      "learning_rate": 0.00014496651700846038,
      "loss": 2.9658,
      "step": 155031
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1393306255340576,
      "learning_rate": 0.00014496301501375203,
      "loss": 3.1108,
      "step": 155032
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3372738361358643,
      "learning_rate": 0.00014495951304786795,
      "loss": 3.0069,
      "step": 155033
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8100531101226807,
      "learning_rate": 0.00014495601111080885,
      "loss": 2.8998,
      "step": 155034
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8140597343444824,
      "learning_rate": 0.00014495250920257524,
      "loss": 2.9873,
      "step": 155035
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9514317512512207,
      "learning_rate": 0.000144949007323168,
      "loss": 2.6467,
      "step": 155036
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.640281915664673,
      "learning_rate": 0.00014494550547258754,
      "loss": 3.0925,
      "step": 155037
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6472373008728027,
      "learning_rate": 0.00014494200365083472,
      "loss": 2.9388,
      "step": 155038
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3590340614318848,
      "learning_rate": 0.00014493850185791016,
      "loss": 2.8524,
      "step": 155039
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.739962100982666,
      "learning_rate": 0.00014493500009381442,
      "loss": 3.0408,
      "step": 155040
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3900058269500732,
      "learning_rate": 0.0001449314983585481,
      "loss": 3.0722,
      "step": 155041
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.644099712371826,
      "learning_rate": 0.00014492799665211207,
      "loss": 2.9868,
      "step": 155042
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8142311573028564,
      "learning_rate": 0.00014492449497450672,
      "loss": 2.8274,
      "step": 155043
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.354429006576538,
      "learning_rate": 0.00014492099332573293,
      "loss": 3.0394,
      "step": 155044
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4675159454345703,
      "learning_rate": 0.0001449174917057913,
      "loss": 2.7719,
      "step": 155045
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.282259702682495,
      "learning_rate": 0.00014491399011468242,
      "loss": 3.0678,
      "step": 155046
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.303619146347046,
      "learning_rate": 0.0001449104885524068,
      "loss": 2.8117,
      "step": 155047
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.557612895965576,
      "learning_rate": 0.0001449069870189654,
      "loss": 2.9044,
      "step": 155048
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.647862434387207,
      "learning_rate": 0.0001449034855143586,
      "loss": 2.7006,
      "step": 155049
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2603185176849365,
      "learning_rate": 0.00014489998403858728,
      "loss": 3.0661,
      "step": 155050
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4975497722625732,
      "learning_rate": 0.000144896482591652,
      "loss": 3.2545,
      "step": 155051
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4797656536102295,
      "learning_rate": 0.00014489298117355336,
      "loss": 2.8614,
      "step": 155052
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4219577312469482,
      "learning_rate": 0.00014488947978429198,
      "loss": 3.2284,
      "step": 155053
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.483722448348999,
      "learning_rate": 0.00014488597842386865,
      "loss": 2.9032,
      "step": 155054
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.51432204246521,
      "learning_rate": 0.00014488247709228382,
      "loss": 2.8653,
      "step": 155055
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.253431797027588,
      "learning_rate": 0.00014487897578953837,
      "loss": 2.9046,
      "step": 155056
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.519879341125488,
      "learning_rate": 0.00014487547451563288,
      "loss": 3.0125,
      "step": 155057
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.285883665084839,
      "learning_rate": 0.00014487197327056796,
      "loss": 3.0059,
      "step": 155058
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3788983821868896,
      "learning_rate": 0.00014486847205434416,
      "loss": 2.9106,
      "step": 155059
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.210212230682373,
      "learning_rate": 0.00014486497086696235,
      "loss": 2.9443,
      "step": 155060
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.609503984451294,
      "learning_rate": 0.0001448614697084229,
      "loss": 3.0381,
      "step": 155061
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.330559730529785,
      "learning_rate": 0.00014485796857872683,
      "loss": 2.7166,
      "step": 155062
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.371493339538574,
      "learning_rate": 0.00014485446747787456,
      "loss": 3.0974,
      "step": 155063
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0665674209594727,
      "learning_rate": 0.00014485096640586676,
      "loss": 3.0301,
      "step": 155064
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0507187843322754,
      "learning_rate": 0.000144847465362704,
      "loss": 2.9593,
      "step": 155065
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0622377395629883,
      "learning_rate": 0.0001448439643483871,
      "loss": 3.2469,
      "step": 155066
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.149998426437378,
      "learning_rate": 0.0001448404633629166,
      "loss": 2.8173,
      "step": 155067
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.255782127380371,
      "learning_rate": 0.00014483696240629322,
      "loss": 2.8168,
      "step": 155068
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.240696430206299,
      "learning_rate": 0.00014483346147851762,
      "loss": 3.0155,
      "step": 155069
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.0765633583068848,
      "learning_rate": 0.0001448299605795904,
      "loss": 2.9246,
      "step": 155070
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.328953981399536,
      "learning_rate": 0.0001448264597095121,
      "loss": 2.9802,
      "step": 155071
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6870484352111816,
      "learning_rate": 0.00014482295886828362,
      "loss": 2.8304,
      "step": 155072
    },
    {
      "epoch": 2.02,
      "grad_norm": 1.8124021291732788,
      "learning_rate": 0.00014481945805590535,
      "loss": 2.9672,
      "step": 155073
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.160914421081543,
      "learning_rate": 0.0001448159572723782,
      "loss": 2.8476,
      "step": 155074
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.363823652267456,
      "learning_rate": 0.00014481245651770254,
      "loss": 2.9601,
      "step": 155075
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6021900177001953,
      "learning_rate": 0.00014480895579187947,
      "loss": 3.0429,
      "step": 155076
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.347304582595825,
      "learning_rate": 0.00014480545509490904,
      "loss": 3.2701,
      "step": 155077
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.150808572769165,
      "learning_rate": 0.00014480195442679236,
      "loss": 2.8793,
      "step": 155078
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.1249167919158936,
      "learning_rate": 0.00014479845378752984,
      "loss": 3.0945,
      "step": 155079
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.1326823234558105,
      "learning_rate": 0.00014479495317712228,
      "loss": 2.8759,
      "step": 155080
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.010401964187622,
      "learning_rate": 0.00014479145259557017,
      "loss": 3.0214,
      "step": 155081
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2106871604919434,
      "learning_rate": 0.0001447879520428745,
      "loss": 2.8484,
      "step": 155082
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.340151309967041,
      "learning_rate": 0.00014478445151903545,
      "loss": 2.9852,
      "step": 155083
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6916136741638184,
      "learning_rate": 0.000144780951024054,
      "loss": 3.0115,
      "step": 155084
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.166748046875,
      "learning_rate": 0.00014477745055793058,
      "loss": 2.9125,
      "step": 155085
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.314436197280884,
      "learning_rate": 0.0001447739501206661,
      "loss": 2.9526,
      "step": 155086
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.1199045181274414,
      "learning_rate": 0.00014477044971226096,
      "loss": 2.7841,
      "step": 155087
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.180797576904297,
      "learning_rate": 0.00014476694933271615,
      "loss": 2.9322,
      "step": 155088
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.486571788787842,
      "learning_rate": 0.00014476344898203185,
      "loss": 3.008,
      "step": 155089
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.2306251525878906,
      "learning_rate": 0.0001447599486602091,
      "loss": 3.1317,
      "step": 155090
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5479846000671387,
      "learning_rate": 0.00014475644836724823,
      "loss": 3.031,
      "step": 155091
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0861446857452393,
      "learning_rate": 0.00014475294810315024,
      "loss": 2.7705,
      "step": 155092
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3255481719970703,
      "learning_rate": 0.00014474944786791545,
      "loss": 3.0179,
      "step": 155093
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.196122646331787,
      "learning_rate": 0.0001447459476615448,
      "loss": 2.7284,
      "step": 155094
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.570223569869995,
      "learning_rate": 0.00014474244748403884,
      "loss": 3.1271,
      "step": 155095
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.233940839767456,
      "learning_rate": 0.00014473894733539814,
      "loss": 3.1514,
      "step": 155096
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1653335094451904,
      "learning_rate": 0.00014473544721562331,
      "loss": 3.1161,
      "step": 155097
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3620519638061523,
      "learning_rate": 0.00014473194712471518,
      "loss": 2.8766,
      "step": 155098
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.2561142444610596,
      "learning_rate": 0.00014472844706267426,
      "loss": 3.0084,
      "step": 155099
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.382636785507202,
      "learning_rate": 0.0001447249470295013,
      "loss": 2.9347,
      "step": 155100
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.494786500930786,
      "learning_rate": 0.00014472144702519697,
      "loss": 2.8881,
      "step": 155101
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6465532779693604,
      "learning_rate": 0.0001447179470497618,
      "loss": 2.9157,
      "step": 155102
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2507681846618652,
      "learning_rate": 0.0001447144471031964,
      "loss": 3.1198,
      "step": 155103
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4425971508026123,
      "learning_rate": 0.00014471094718550162,
      "loss": 2.96,
      "step": 155104
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3099517822265625,
      "learning_rate": 0.0001447074472966779,
      "loss": 3.0381,
      "step": 155105
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0822055339813232,
      "learning_rate": 0.00014470394743672612,
      "loss": 3.133,
      "step": 155106
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3221263885498047,
      "learning_rate": 0.0001447004476056468,
      "loss": 2.8924,
      "step": 155107
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.163761615753174,
      "learning_rate": 0.00014469694780344048,
      "loss": 3.0066,
      "step": 155108
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2905051708221436,
      "learning_rate": 0.00014469344803010804,
      "loss": 3.0303,
      "step": 155109
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.011658668518066,
      "learning_rate": 0.00014468994828565005,
      "loss": 2.9816,
      "step": 155110
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7103750705718994,
      "learning_rate": 0.00014468644857006696,
      "loss": 2.7987,
      "step": 155111
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3958559036254883,
      "learning_rate": 0.00014468294888335975,
      "loss": 2.7655,
      "step": 155112
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.460726737976074,
      "learning_rate": 0.00014467944922552892,
      "loss": 3.0382,
      "step": 155113
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6763970851898193,
      "learning_rate": 0.00014467594959657494,
      "loss": 2.9275,
      "step": 155114
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.253490924835205,
      "learning_rate": 0.0001446724499964988,
      "loss": 2.721,
      "step": 155115
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3200321197509766,
      "learning_rate": 0.00014466895042530097,
      "loss": 2.9797,
      "step": 155116
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.535360813140869,
      "learning_rate": 0.000144665450882982,
      "loss": 2.6949,
      "step": 155117
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6481356620788574,
      "learning_rate": 0.00014466195136954277,
      "loss": 2.7687,
      "step": 155118
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.068000078201294,
      "learning_rate": 0.0001446584518849838,
      "loss": 3.2283,
      "step": 155119
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1871907711029053,
      "learning_rate": 0.00014465495242930567,
      "loss": 2.7986,
      "step": 155120
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.526777505874634,
      "learning_rate": 0.00014465145300250918,
      "loss": 2.6612,
      "step": 155121
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.210186243057251,
      "learning_rate": 0.00014464795360459484,
      "loss": 2.9363,
      "step": 155122
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.2931857109069824,
      "learning_rate": 0.0001446444542355635,
      "loss": 2.7053,
      "step": 155123
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.373548984527588,
      "learning_rate": 0.0001446409548954157,
      "loss": 2.5653,
      "step": 155124
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.460559844970703,
      "learning_rate": 0.00014463745558415203,
      "loss": 2.9163,
      "step": 155125
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.442204475402832,
      "learning_rate": 0.0001446339563017731,
      "loss": 2.7106,
      "step": 155126
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.312753438949585,
      "learning_rate": 0.00014463045704827976,
      "loss": 2.9171,
      "step": 155127
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.403923749923706,
      "learning_rate": 0.00014462695782367248,
      "loss": 2.9385,
      "step": 155128
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0477585792541504,
      "learning_rate": 0.00014462345862795205,
      "loss": 2.9327,
      "step": 155129
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7408289909362793,
      "learning_rate": 0.00014461995946111905,
      "loss": 2.8954,
      "step": 155130
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.525493860244751,
      "learning_rate": 0.00014461646032317414,
      "loss": 2.9439,
      "step": 155131
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.274754524230957,
      "learning_rate": 0.00014461296121411787,
      "loss": 2.9054,
      "step": 155132
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.228471279144287,
      "learning_rate": 0.00014460946213395109,
      "loss": 3.0399,
      "step": 155133
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3378751277923584,
      "learning_rate": 0.0001446059630826742,
      "loss": 2.9064,
      "step": 155134
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.419358253479004,
      "learning_rate": 0.00014460246406028813,
      "loss": 2.8894,
      "step": 155135
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.203399419784546,
      "learning_rate": 0.0001445989650667934,
      "loss": 3.0867,
      "step": 155136
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.995023012161255,
      "learning_rate": 0.00014459546610219066,
      "loss": 2.8705,
      "step": 155137
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2926597595214844,
      "learning_rate": 0.00014459196716648043,
      "loss": 2.956,
      "step": 155138
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3360683917999268,
      "learning_rate": 0.0001445884682596636,
      "loss": 2.8077,
      "step": 155139
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6856915950775146,
      "learning_rate": 0.00014458496938174058,
      "loss": 2.9745,
      "step": 155140
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5898749828338623,
      "learning_rate": 0.00014458147053271228,
      "loss": 2.8608,
      "step": 155141
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9080941677093506,
      "learning_rate": 0.0001445779717125791,
      "loss": 3.203,
      "step": 155142
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.78216814994812,
      "learning_rate": 0.00014457447292134208,
      "loss": 2.8716,
      "step": 155143
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.404813051223755,
      "learning_rate": 0.00014457097415900133,
      "loss": 2.7732,
      "step": 155144
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9156787395477295,
      "learning_rate": 0.00014456747542555785,
      "loss": 2.817,
      "step": 155145
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3906655311584473,
      "learning_rate": 0.00014456397672101215,
      "loss": 3.1386,
      "step": 155146
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6137537956237793,
      "learning_rate": 0.00014456047804536505,
      "loss": 3.1847,
      "step": 155147
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5456533432006836,
      "learning_rate": 0.00014455697939861692,
      "loss": 3.0417,
      "step": 155148
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.053605556488037,
      "learning_rate": 0.00014455348078076886,
      "loss": 2.9093,
      "step": 155149
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3887391090393066,
      "learning_rate": 0.000144549982191821,
      "loss": 2.6614,
      "step": 155150
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3754348754882812,
      "learning_rate": 0.0001445464836317744,
      "loss": 3.1019,
      "step": 155151
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.4049770832061768,
      "learning_rate": 0.00014454298510062938,
      "loss": 3.0009,
      "step": 155152
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3555173873901367,
      "learning_rate": 0.00014453948659838686,
      "loss": 2.8003,
      "step": 155153
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.245908737182617,
      "learning_rate": 0.00014453598812504728,
      "loss": 3.169,
      "step": 155154
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.822493553161621,
      "learning_rate": 0.00014453248968061163,
      "loss": 2.9116,
      "step": 155155
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.38588285446167,
      "learning_rate": 0.0001445289912650801,
      "loss": 2.9914,
      "step": 155156
    },
    {
      "epoch": 2.02,
      "grad_norm": 1.9499351978302002,
      "learning_rate": 0.00014452549287845363,
      "loss": 2.7518,
      "step": 155157
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5543277263641357,
      "learning_rate": 0.00014452199452073277,
      "loss": 2.6608,
      "step": 155158
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4102284908294678,
      "learning_rate": 0.00014451849619191826,
      "loss": 3.0122,
      "step": 155159
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1688902378082275,
      "learning_rate": 0.0001445149978920106,
      "loss": 3.0292,
      "step": 155160
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.0310611724853516,
      "learning_rate": 0.0001445114996210108,
      "loss": 3.0286,
      "step": 155161
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.420431613922119,
      "learning_rate": 0.00014450800137891896,
      "loss": 3.1677,
      "step": 155162
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.0664222240448,
      "learning_rate": 0.00014450450316573615,
      "loss": 2.9172,
      "step": 155163
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.147258996963501,
      "learning_rate": 0.0001445010049814628,
      "loss": 2.9362,
      "step": 155164
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1285808086395264,
      "learning_rate": 0.00014449750682609977,
      "loss": 2.8775,
      "step": 155165
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.091370105743408,
      "learning_rate": 0.00014449400869964742,
      "loss": 2.9404,
      "step": 155166
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1541833877563477,
      "learning_rate": 0.00014449051060210687,
      "loss": 2.7516,
      "step": 155167
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7865498065948486,
      "learning_rate": 0.00014448701253347816,
      "loss": 2.6742,
      "step": 155168
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1485071182250977,
      "learning_rate": 0.00014448351449376242,
      "loss": 2.8111,
      "step": 155169
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.417560577392578,
      "learning_rate": 0.00014448001648296,
      "loss": 2.9772,
      "step": 155170
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.3883156776428223,
      "learning_rate": 0.0001444765185010718,
      "loss": 2.6789,
      "step": 155171
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8361215591430664,
      "learning_rate": 0.00014447302054809824,
      "loss": 2.8851,
      "step": 155172
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.370807409286499,
      "learning_rate": 0.00014446952262404028,
      "loss": 2.8962,
      "step": 155173
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.2291553020477295,
      "learning_rate": 0.00014446602472889818,
      "loss": 2.8261,
      "step": 155174
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.723536491394043,
      "learning_rate": 0.00014446252686267286,
      "loss": 2.9255,
      "step": 155175
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0601630210876465,
      "learning_rate": 0.00014445902902536481,
      "loss": 2.963,
      "step": 155176
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.351677656173706,
      "learning_rate": 0.00014445553121697488,
      "loss": 2.7816,
      "step": 155177
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3107807636260986,
      "learning_rate": 0.00014445203343750344,
      "loss": 2.9219,
      "step": 155178
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.154284954071045,
      "learning_rate": 0.0001444485356869516,
      "loss": 2.9833,
      "step": 155179
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3114521503448486,
      "learning_rate": 0.0001444450379653194,
      "loss": 2.9898,
      "step": 155180
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2944302558898926,
      "learning_rate": 0.000144441540272608,
      "loss": 2.9566,
      "step": 155181
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.742802381515503,
      "learning_rate": 0.00014443804260881772,
      "loss": 2.907,
      "step": 155182
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0212297439575195,
      "learning_rate": 0.00014443454497394944,
      "loss": 3.0025,
      "step": 155183
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7354090213775635,
      "learning_rate": 0.0001444310473680036,
      "loss": 3.0484,
      "step": 155184
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.090102195739746,
      "learning_rate": 0.00014442754979098108,
      "loss": 2.7677,
      "step": 155185
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7245259284973145,
      "learning_rate": 0.00014442405224288245,
      "loss": 3.1613,
      "step": 155186
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.0632550716400146,
      "learning_rate": 0.00014442055472370827,
      "loss": 2.9247,
      "step": 155187
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3370866775512695,
      "learning_rate": 0.0001444170572334592,
      "loss": 2.8097,
      "step": 155188
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.80543851852417,
      "learning_rate": 0.00014441355977213598,
      "loss": 2.9928,
      "step": 155189
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.8269667625427246,
      "learning_rate": 0.00014441006233973915,
      "loss": 2.9502,
      "step": 155190
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.978596210479736,
      "learning_rate": 0.0001444065649362695,
      "loss": 3.0492,
      "step": 155191
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3671748638153076,
      "learning_rate": 0.00014440306756172765,
      "loss": 2.845,
      "step": 155192
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.250335454940796,
      "learning_rate": 0.00014439957021611407,
      "loss": 2.7612,
      "step": 155193
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.012844562530518,
      "learning_rate": 0.00014439607289942963,
      "loss": 2.6373,
      "step": 155194
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7565391063690186,
      "learning_rate": 0.00014439257561167494,
      "loss": 2.7728,
      "step": 155195
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5554776191711426,
      "learning_rate": 0.00014438907835285047,
      "loss": 3.0557,
      "step": 155196
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7241833209991455,
      "learning_rate": 0.00014438558112295715,
      "loss": 2.9219,
      "step": 155197
    },
    {
      "epoch": 2.02,
      "grad_norm": 1.9868742227554321,
      "learning_rate": 0.00014438208392199547,
      "loss": 3.1737,
      "step": 155198
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.610588788986206,
      "learning_rate": 0.00014437858674996596,
      "loss": 3.1624,
      "step": 155199
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2607147693634033,
      "learning_rate": 0.00014437508960686956,
      "loss": 3.1088,
      "step": 155200
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.160529613494873,
      "learning_rate": 0.00014437159249270677,
      "loss": 2.9349,
      "step": 155201
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.425981283187866,
      "learning_rate": 0.0001443680954074781,
      "loss": 2.8541,
      "step": 155202
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2121686935424805,
      "learning_rate": 0.00014436459835118448,
      "loss": 3.1493,
      "step": 155203
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.321000099182129,
      "learning_rate": 0.0001443611013238264,
      "loss": 2.9516,
      "step": 155204
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0297446250915527,
      "learning_rate": 0.00014435760432540443,
      "loss": 3.3042,
      "step": 155205
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1115641593933105,
      "learning_rate": 0.00014435410735591942,
      "loss": 2.8923,
      "step": 155206
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.168470621109009,
      "learning_rate": 0.0001443506104153718,
      "loss": 2.9192,
      "step": 155207
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.262202262878418,
      "learning_rate": 0.00014434711350376248,
      "loss": 2.8695,
      "step": 155208
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2853879928588867,
      "learning_rate": 0.00014434361662109195,
      "loss": 2.956,
      "step": 155209
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0012452602386475,
      "learning_rate": 0.00014434011976736087,
      "loss": 2.7777,
      "step": 155210
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5118114948272705,
      "learning_rate": 0.00014433662294256982,
      "loss": 3.1447,
      "step": 155211
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.620992660522461,
      "learning_rate": 0.0001443331261467196,
      "loss": 3.1126,
      "step": 155212
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.0510475635528564,
      "learning_rate": 0.0001443296293798107,
      "loss": 3.0099,
      "step": 155213
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.811344623565674,
      "learning_rate": 0.00014432613264184397,
      "loss": 2.9165,
      "step": 155214
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.1397030353546143,
      "learning_rate": 0.00014432263593281984,
      "loss": 3.1522,
      "step": 155215
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.298429250717163,
      "learning_rate": 0.0001443191392527393,
      "loss": 2.8584,
      "step": 155216
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.452446460723877,
      "learning_rate": 0.00014431564260160255,
      "loss": 2.9323,
      "step": 155217
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.860081911087036,
      "learning_rate": 0.00014431214597941056,
      "loss": 2.8219,
      "step": 155218
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9161829948425293,
      "learning_rate": 0.00014430864938616377,
      "loss": 3.1675,
      "step": 155219
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3216521739959717,
      "learning_rate": 0.00014430515282186306,
      "loss": 2.8004,
      "step": 155220
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4247186183929443,
      "learning_rate": 0.00014430165628650883,
      "loss": 3.253,
      "step": 155221
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5017073154449463,
      "learning_rate": 0.0001442981597801021,
      "loss": 2.9426,
      "step": 155222
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4452879428863525,
      "learning_rate": 0.00014429466330264306,
      "loss": 3.0875,
      "step": 155223
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4085705280303955,
      "learning_rate": 0.0001442911668541327,
      "loss": 2.7575,
      "step": 155224
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.25972318649292,
      "learning_rate": 0.00014428767043457143,
      "loss": 2.9598,
      "step": 155225
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3126351833343506,
      "learning_rate": 0.00014428417404396013,
      "loss": 2.743,
      "step": 155226
    },
    {
      "epoch": 2.02,
      "grad_norm": 1.9921185970306396,
      "learning_rate": 0.00014428067768229924,
      "loss": 2.8323,
      "step": 155227
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.464414119720459,
      "learning_rate": 0.0001442771813495897,
      "loss": 3.0105,
      "step": 155228
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.769467353820801,
      "learning_rate": 0.0001442736850458318,
      "loss": 2.8481,
      "step": 155229
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.447824478149414,
      "learning_rate": 0.00014427018877102642,
      "loss": 2.7727,
      "step": 155230
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9175148010253906,
      "learning_rate": 0.0001442666925251741,
      "loss": 2.8117,
      "step": 155231
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.425513505935669,
      "learning_rate": 0.0001442631963082756,
      "loss": 2.9461,
      "step": 155232
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3687949180603027,
      "learning_rate": 0.00014425970012033144,
      "loss": 2.8111,
      "step": 155233
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.32572603225708,
      "learning_rate": 0.00014425620396134255,
      "loss": 2.9587,
      "step": 155234
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.356597423553467,
      "learning_rate": 0.00014425270783130912,
      "loss": 2.9689,
      "step": 155235
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2239487171173096,
      "learning_rate": 0.0001442492117302322,
      "loss": 2.8213,
      "step": 155236
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.235856533050537,
      "learning_rate": 0.00014424571565811216,
      "loss": 3.0428,
      "step": 155237
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.445251941680908,
      "learning_rate": 0.00014424221961494992,
      "loss": 2.9888,
      "step": 155238
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4304513931274414,
      "learning_rate": 0.00014423872360074587,
      "loss": 3.1447,
      "step": 155239
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.618597984313965,
      "learning_rate": 0.00014423522761550098,
      "loss": 2.8527,
      "step": 155240
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.198716163635254,
      "learning_rate": 0.0001442317316592155,
      "loss": 3.029,
      "step": 155241
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8706002235412598,
      "learning_rate": 0.00014422823573189038,
      "loss": 2.8081,
      "step": 155242
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0849111080169678,
      "learning_rate": 0.00014422473983352605,
      "loss": 3.0042,
      "step": 155243
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.187483072280884,
      "learning_rate": 0.00014422124396412344,
      "loss": 3.0697,
      "step": 155244
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.193981409072876,
      "learning_rate": 0.00014421774812368288,
      "loss": 3.1529,
      "step": 155245
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.029829263687134,
      "learning_rate": 0.00014421425231220546,
      "loss": 2.8743,
      "step": 155246
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.276942729949951,
      "learning_rate": 0.00014421075652969125,
      "loss": 3.1668,
      "step": 155247
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9471435546875,
      "learning_rate": 0.0001442072607761414,
      "loss": 2.7704,
      "step": 155248
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6141185760498047,
      "learning_rate": 0.00014420376505155618,
      "loss": 2.8509,
      "step": 155249
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1675713062286377,
      "learning_rate": 0.00014420026935593654,
      "loss": 2.9438,
      "step": 155250
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2692503929138184,
      "learning_rate": 0.00014419677368928294,
      "loss": 3.0298,
      "step": 155251
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.403425931930542,
      "learning_rate": 0.0001441932780515963,
      "loss": 2.9546,
      "step": 155252
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.673213005065918,
      "learning_rate": 0.00014418978244287685,
      "loss": 2.9149,
      "step": 155253
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.129406690597534,
      "learning_rate": 0.00014418628686312554,
      "loss": 2.9035,
      "step": 155254
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2452242374420166,
      "learning_rate": 0.00014418279131234285,
      "loss": 2.793,
      "step": 155255
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.114806890487671,
      "learning_rate": 0.00014417929579052966,
      "loss": 2.9143,
      "step": 155256
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.827404022216797,
      "learning_rate": 0.00014417580029768635,
      "loss": 3.1277,
      "step": 155257
    },
    {
      "epoch": 2.02,
      "grad_norm": 1.9602088928222656,
      "learning_rate": 0.00014417230483381393,
      "loss": 2.8464,
      "step": 155258
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.636124849319458,
      "learning_rate": 0.00014416880939891258,
      "loss": 2.9693,
      "step": 155259
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9762911796569824,
      "learning_rate": 0.0001441653139929833,
      "loss": 3.087,
      "step": 155260
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4641220569610596,
      "learning_rate": 0.00014416181861602655,
      "loss": 3.2748,
      "step": 155261
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0823349952697754,
      "learning_rate": 0.00014415832326804316,
      "loss": 2.8877,
      "step": 155262
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.1762006282806396,
      "learning_rate": 0.00014415482794903357,
      "loss": 2.9914,
      "step": 155263
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.3956387042999268,
      "learning_rate": 0.00014415133265899873,
      "loss": 2.8356,
      "step": 155264
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.106684684753418,
      "learning_rate": 0.0001441478373979389,
      "loss": 3.0635,
      "step": 155265
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7036149501800537,
      "learning_rate": 0.00014414434216585505,
      "loss": 2.9551,
      "step": 155266
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3195595741271973,
      "learning_rate": 0.00014414084696274756,
      "loss": 2.9893,
      "step": 155267
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3042120933532715,
      "learning_rate": 0.00014413735178861738,
      "loss": 2.9628,
      "step": 155268
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6685070991516113,
      "learning_rate": 0.0001441338566434649,
      "loss": 2.9008,
      "step": 155269
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.678344964981079,
      "learning_rate": 0.00014413036152729095,
      "loss": 2.936,
      "step": 155270
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1781909465789795,
      "learning_rate": 0.0001441268664400961,
      "loss": 2.8867,
      "step": 155271
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.327881097793579,
      "learning_rate": 0.00014412337138188107,
      "loss": 3.0295,
      "step": 155272
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.29872989654541,
      "learning_rate": 0.00014411987635264627,
      "loss": 2.8662,
      "step": 155273
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.391892671585083,
      "learning_rate": 0.0001441163813523927,
      "loss": 3.1107,
      "step": 155274
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.529092311859131,
      "learning_rate": 0.0001441128863811207,
      "loss": 3.0762,
      "step": 155275
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.0365378856658936,
      "learning_rate": 0.00014410939143883115,
      "loss": 3.0092,
      "step": 155276
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.2599451541900635,
      "learning_rate": 0.0001441058965255246,
      "loss": 2.9538,
      "step": 155277
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8163902759552,
      "learning_rate": 0.00014410240164120164,
      "loss": 3.0747,
      "step": 155278
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.6338372230529785,
      "learning_rate": 0.0001440989067858631,
      "loss": 2.5955,
      "step": 155279
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5232741832733154,
      "learning_rate": 0.00014409541195950948,
      "loss": 2.6837,
      "step": 155280
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.691127061843872,
      "learning_rate": 0.00014409191716214137,
      "loss": 2.8319,
      "step": 155281
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.354556083679199,
      "learning_rate": 0.00014408842239375962,
      "loss": 2.8553,
      "step": 155282
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.914620876312256,
      "learning_rate": 0.00014408492765436478,
      "loss": 2.7904,
      "step": 155283
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.396958351135254,
      "learning_rate": 0.0001440814329439574,
      "loss": 3.0514,
      "step": 155284
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.919471025466919,
      "learning_rate": 0.00014407793826253833,
      "loss": 3.1799,
      "step": 155285
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.155223846435547,
      "learning_rate": 0.00014407444361010807,
      "loss": 2.8903,
      "step": 155286
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.901623725891113,
      "learning_rate": 0.00014407094898666726,
      "loss": 2.8569,
      "step": 155287
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.850001335144043,
      "learning_rate": 0.00014406745439221667,
      "loss": 2.9835,
      "step": 155288
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.605534076690674,
      "learning_rate": 0.0001440639598267569,
      "loss": 3.1898,
      "step": 155289
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4587411880493164,
      "learning_rate": 0.00014406046529028848,
      "loss": 2.972,
      "step": 155290
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2863876819610596,
      "learning_rate": 0.00014405697078281228,
      "loss": 2.7995,
      "step": 155291
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7603840827941895,
      "learning_rate": 0.0001440534763043287,
      "loss": 3.0461,
      "step": 155292
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.77878475189209,
      "learning_rate": 0.00014404998185483866,
      "loss": 2.6502,
      "step": 155293
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3278326988220215,
      "learning_rate": 0.00014404648743434267,
      "loss": 2.9008,
      "step": 155294
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.535576105117798,
      "learning_rate": 0.00014404299304284136,
      "loss": 3.0674,
      "step": 155295
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5111873149871826,
      "learning_rate": 0.0001440394986803353,
      "loss": 2.7226,
      "step": 155296
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.341614007949829,
      "learning_rate": 0.00014403600434682536,
      "loss": 3.2002,
      "step": 155297
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7138023376464844,
      "learning_rate": 0.00014403251004231195,
      "loss": 3.0126,
      "step": 155298
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7999026775360107,
      "learning_rate": 0.00014402901576679594,
      "loss": 2.9315,
      "step": 155299
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8056602478027344,
      "learning_rate": 0.0001440255215202779,
      "loss": 2.8449,
      "step": 155300
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7514357566833496,
      "learning_rate": 0.00014402202730275844,
      "loss": 2.8327,
      "step": 155301
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.03739070892334,
      "learning_rate": 0.00014401853311423815,
      "loss": 2.8651,
      "step": 155302
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7365834712982178,
      "learning_rate": 0.00014401503895471787,
      "loss": 3.0781,
      "step": 155303
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4219014644622803,
      "learning_rate": 0.000144011544824198,
      "loss": 3.0811,
      "step": 155304
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.38537335395813,
      "learning_rate": 0.00014400805072267947,
      "loss": 3.1853,
      "step": 155305
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4988179206848145,
      "learning_rate": 0.00014400455665016264,
      "loss": 2.8378,
      "step": 155306
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5262022018432617,
      "learning_rate": 0.00014400106260664856,
      "loss": 2.8855,
      "step": 155307
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.204353094100952,
      "learning_rate": 0.00014399756859213736,
      "loss": 3.0061,
      "step": 155308
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4493510723114014,
      "learning_rate": 0.0001439940746066301,
      "loss": 2.7901,
      "step": 155309
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3690614700317383,
      "learning_rate": 0.0001439905806501272,
      "loss": 2.9785,
      "step": 155310
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0771260261535645,
      "learning_rate": 0.00014398708672262947,
      "loss": 3.0369,
      "step": 155311
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.067553758621216,
      "learning_rate": 0.0001439835928241374,
      "loss": 2.758,
      "step": 155312
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5385396480560303,
      "learning_rate": 0.00014398009895465191,
      "loss": 2.9405,
      "step": 155313
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.217374563217163,
      "learning_rate": 0.0001439766051141733,
      "loss": 3.0638,
      "step": 155314
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.429525852203369,
      "learning_rate": 0.00014397311130270247,
      "loss": 2.8934,
      "step": 155315
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.124695301055908,
      "learning_rate": 0.00014396961752023983,
      "loss": 2.9521,
      "step": 155316
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9294259548187256,
      "learning_rate": 0.00014396612376678635,
      "loss": 2.9988,
      "step": 155317
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.0811030864715576,
      "learning_rate": 0.00014396263004234236,
      "loss": 2.7544,
      "step": 155318
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0441696643829346,
      "learning_rate": 0.0001439591363469089,
      "loss": 2.8,
      "step": 155319
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.251368761062622,
      "learning_rate": 0.0001439556426804862,
      "loss": 2.8575,
      "step": 155320
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.305889844894409,
      "learning_rate": 0.00014395214904307514,
      "loss": 3.0841,
      "step": 155321
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2668941020965576,
      "learning_rate": 0.00014394865543467623,
      "loss": 2.6916,
      "step": 155322
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.986990213394165,
      "learning_rate": 0.00014394516185529035,
      "loss": 2.8049,
      "step": 155323
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5340819358825684,
      "learning_rate": 0.00014394166830491786,
      "loss": 2.9828,
      "step": 155324
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2659904956817627,
      "learning_rate": 0.00014393817478355983,
      "loss": 2.7541,
      "step": 155325
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.220445156097412,
      "learning_rate": 0.0001439346812912164,
      "loss": 2.9704,
      "step": 155326
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.221137285232544,
      "learning_rate": 0.00014393118782788855,
      "loss": 3.0584,
      "step": 155327
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.023325204849243,
      "learning_rate": 0.0001439276943935767,
      "loss": 2.858,
      "step": 155328
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5769224166870117,
      "learning_rate": 0.0001439242009882818,
      "loss": 3.1679,
      "step": 155329
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3319098949432373,
      "learning_rate": 0.00014392070761200422,
      "loss": 2.9177,
      "step": 155330
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.756225824356079,
      "learning_rate": 0.00014391721426474491,
      "loss": 3.2326,
      "step": 155331
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.40834379196167,
      "learning_rate": 0.00014391372094650417,
      "loss": 2.8683,
      "step": 155332
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.434302806854248,
      "learning_rate": 0.0001439102276572829,
      "loss": 2.9532,
      "step": 155333
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.475902795791626,
      "learning_rate": 0.00014390673439708155,
      "loss": 2.9953,
      "step": 155334
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4936444759368896,
      "learning_rate": 0.000143903241165901,
      "loss": 2.9259,
      "step": 155335
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0812478065490723,
      "learning_rate": 0.00014389974796374166,
      "loss": 2.9368,
      "step": 155336
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.785334825515747,
      "learning_rate": 0.00014389625479060446,
      "loss": 2.9127,
      "step": 155337
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.315192222595215,
      "learning_rate": 0.0001438927616464898,
      "loss": 3.1387,
      "step": 155338
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2044553756713867,
      "learning_rate": 0.0001438892685313985,
      "loss": 3.0806,
      "step": 155339
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5902509689331055,
      "learning_rate": 0.000143885775445331,
      "loss": 2.7764,
      "step": 155340
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.667423963546753,
      "learning_rate": 0.00014388228238828822,
      "loss": 2.9511,
      "step": 155341
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1908907890319824,
      "learning_rate": 0.00014387878936027054,
      "loss": 3.0116,
      "step": 155342
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.31229567527771,
      "learning_rate": 0.00014387529636127885,
      "loss": 2.9252,
      "step": 155343
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.787653923034668,
      "learning_rate": 0.00014387180339131369,
      "loss": 2.8541,
      "step": 155344
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.0478663444519043,
      "learning_rate": 0.00014386831045037568,
      "loss": 2.9711,
      "step": 155345
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.161200761795044,
      "learning_rate": 0.00014386481753846542,
      "loss": 2.9778,
      "step": 155346
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9451346397399902,
      "learning_rate": 0.00014386132465558373,
      "loss": 3.0263,
      "step": 155347
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4131522178649902,
      "learning_rate": 0.00014385783180173107,
      "loss": 3.0746,
      "step": 155348
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8260114192962646,
      "learning_rate": 0.0001438543389769083,
      "loss": 2.7299,
      "step": 155349
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.2139532566070557,
      "learning_rate": 0.00014385084618111595,
      "loss": 2.8374,
      "step": 155350
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.772099733352661,
      "learning_rate": 0.00014384735341435467,
      "loss": 2.9203,
      "step": 155351
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0828518867492676,
      "learning_rate": 0.000143843860676625,
      "loss": 2.9275,
      "step": 155352
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.5065414905548096,
      "learning_rate": 0.00014384036796792783,
      "loss": 3.1922,
      "step": 155353
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2733938694000244,
      "learning_rate": 0.00014383687528826358,
      "loss": 2.9816,
      "step": 155354
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.378445625305176,
      "learning_rate": 0.0001438333826376331,
      "loss": 2.9443,
      "step": 155355
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2976694107055664,
      "learning_rate": 0.00014382989001603694,
      "loss": 2.5866,
      "step": 155356
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.400618076324463,
      "learning_rate": 0.00014382639742347575,
      "loss": 3.0916,
      "step": 155357
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4249298572540283,
      "learning_rate": 0.00014382290485995006,
      "loss": 2.9979,
      "step": 155358
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.3051846027374268,
      "learning_rate": 0.00014381941232546078,
      "loss": 2.9575,
      "step": 155359
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4010016918182373,
      "learning_rate": 0.00014381591982000826,
      "loss": 2.9468,
      "step": 155360
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3559377193450928,
      "learning_rate": 0.00014381242734359345,
      "loss": 2.8791,
      "step": 155361
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5627057552337646,
      "learning_rate": 0.00014380893489621685,
      "loss": 2.9671,
      "step": 155362
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8305585384368896,
      "learning_rate": 0.00014380544247787915,
      "loss": 3.0063,
      "step": 155363
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.472529172897339,
      "learning_rate": 0.00014380195008858085,
      "loss": 2.5394,
      "step": 155364
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.603397846221924,
      "learning_rate": 0.0001437984577283228,
      "loss": 3.0718,
      "step": 155365
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.040670156478882,
      "learning_rate": 0.00014379496539710545,
      "loss": 3.1025,
      "step": 155366
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.458301305770874,
      "learning_rate": 0.0001437914730949297,
      "loss": 3.0158,
      "step": 155367
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3475375175476074,
      "learning_rate": 0.00014378798082179608,
      "loss": 2.9783,
      "step": 155368
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.403870105743408,
      "learning_rate": 0.00014378448857770506,
      "loss": 2.852,
      "step": 155369
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2138705253601074,
      "learning_rate": 0.0001437809963626576,
      "loss": 3.1993,
      "step": 155370
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.133946657180786,
      "learning_rate": 0.0001437775041766542,
      "loss": 3.0277,
      "step": 155371
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3339321613311768,
      "learning_rate": 0.00014377401201969537,
      "loss": 3.0543,
      "step": 155372
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.630539655685425,
      "learning_rate": 0.00014377051989178204,
      "loss": 3.0321,
      "step": 155373
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6731574535369873,
      "learning_rate": 0.0001437670277929147,
      "loss": 2.9398,
      "step": 155374
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8101441860198975,
      "learning_rate": 0.0001437635357230939,
      "loss": 3.0646,
      "step": 155375
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.298535108566284,
      "learning_rate": 0.00014376004368232056,
      "loss": 2.9196,
      "step": 155376
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.18778395652771,
      "learning_rate": 0.000143756551670595,
      "loss": 3.0919,
      "step": 155377
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.724534034729004,
      "learning_rate": 0.00014375305968791823,
      "loss": 2.8445,
      "step": 155378
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.0223162174224854,
      "learning_rate": 0.0001437495677342907,
      "loss": 3.0084,
      "step": 155379
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3781394958496094,
      "learning_rate": 0.00014374607580971307,
      "loss": 2.9038,
      "step": 155380
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.305994749069214,
      "learning_rate": 0.00014374258391418586,
      "loss": 3.1024,
      "step": 155381
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6660847663879395,
      "learning_rate": 0.00014373909204771,
      "loss": 3.203,
      "step": 155382
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.152434349060059,
      "learning_rate": 0.00014373560021028586,
      "loss": 2.9404,
      "step": 155383
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6677443981170654,
      "learning_rate": 0.0001437321084019143,
      "loss": 2.8996,
      "step": 155384
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4456491470336914,
      "learning_rate": 0.00014372861662259595,
      "loss": 2.9898,
      "step": 155385
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.694601535797119,
      "learning_rate": 0.00014372512487233134,
      "loss": 2.9826,
      "step": 155386
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.280697822570801,
      "learning_rate": 0.0001437216331511211,
      "loss": 2.8776,
      "step": 155387
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7611241340637207,
      "learning_rate": 0.00014371814145896608,
      "loss": 3.1905,
      "step": 155388
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4762117862701416,
      "learning_rate": 0.0001437146497958667,
      "loss": 2.5472,
      "step": 155389
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.487391233444214,
      "learning_rate": 0.00014371115816182381,
      "loss": 2.9109,
      "step": 155390
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.659940719604492,
      "learning_rate": 0.00014370766655683785,
      "loss": 2.7632,
      "step": 155391
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.0247926712036133,
      "learning_rate": 0.00014370417498090984,
      "loss": 2.9816,
      "step": 155392
    },
    {
      "epoch": 2.02,
      "grad_norm": 5.230328559875488,
      "learning_rate": 0.0001437006834340399,
      "loss": 2.9861,
      "step": 155393
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.156000852584839,
      "learning_rate": 0.00014369719191622911,
      "loss": 3.0862,
      "step": 155394
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.304433584213257,
      "learning_rate": 0.0001436937004274778,
      "loss": 2.8267,
      "step": 155395
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6434412002563477,
      "learning_rate": 0.00014369020896778695,
      "loss": 2.8525,
      "step": 155396
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5495283603668213,
      "learning_rate": 0.0001436867175371569,
      "loss": 3.1119,
      "step": 155397
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.376854419708252,
      "learning_rate": 0.0001436832261355887,
      "loss": 2.9901,
      "step": 155398
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2218501567840576,
      "learning_rate": 0.00014367973476308248,
      "loss": 2.9386,
      "step": 155399
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.27382230758667,
      "learning_rate": 0.00014367624341963926,
      "loss": 2.9997,
      "step": 155400
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3447771072387695,
      "learning_rate": 0.00014367275210525946,
      "loss": 2.9177,
      "step": 155401
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.900243043899536,
      "learning_rate": 0.00014366926081994398,
      "loss": 2.9763,
      "step": 155402
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.185117721557617,
      "learning_rate": 0.00014366576956369322,
      "loss": 3.0519,
      "step": 155403
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4892404079437256,
      "learning_rate": 0.00014366227833650805,
      "loss": 2.9701,
      "step": 155404
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.0629780292510986,
      "learning_rate": 0.00014365878713838901,
      "loss": 3.0799,
      "step": 155405
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1707465648651123,
      "learning_rate": 0.00014365529596933676,
      "loss": 2.9302,
      "step": 155406
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1500980854034424,
      "learning_rate": 0.00014365180482935183,
      "loss": 3.0425,
      "step": 155407
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.828253746032715,
      "learning_rate": 0.00014364831371843506,
      "loss": 2.7151,
      "step": 155408
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7652554512023926,
      "learning_rate": 0.00014364482263658695,
      "loss": 2.8251,
      "step": 155409
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.444279670715332,
      "learning_rate": 0.0001436413315838083,
      "loss": 2.9728,
      "step": 155410
    },
    {
      "epoch": 2.02,
      "grad_norm": 1.8982157707214355,
      "learning_rate": 0.00014363784056009972,
      "loss": 2.8583,
      "step": 155411
    },
    {
      "epoch": 2.02,
      "grad_norm": 1.9141356945037842,
      "learning_rate": 0.0001436343495654618,
      "loss": 2.7807,
      "step": 155412
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.0449345111846924,
      "learning_rate": 0.00014363085859989507,
      "loss": 2.9662,
      "step": 155413
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.432916164398193,
      "learning_rate": 0.00014362736766340042,
      "loss": 3.0068,
      "step": 155414
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.011324167251587,
      "learning_rate": 0.00014362387675597834,
      "loss": 3.1336,
      "step": 155415
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.427628755569458,
      "learning_rate": 0.0001436203858776296,
      "loss": 2.8333,
      "step": 155416
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4563753604888916,
      "learning_rate": 0.0001436168950283548,
      "loss": 2.6471,
      "step": 155417
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.1703643798828125,
      "learning_rate": 0.00014361340420815456,
      "loss": 2.927,
      "step": 155418
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5781633853912354,
      "learning_rate": 0.00014360991341702943,
      "loss": 3.0146,
      "step": 155419
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.864372968673706,
      "learning_rate": 0.00014360642265498028,
      "loss": 2.9669,
      "step": 155420
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.283243179321289,
      "learning_rate": 0.00014360293192200752,
      "loss": 3.0451,
      "step": 155421
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4204320907592773,
      "learning_rate": 0.00014359944121811207,
      "loss": 3.2571,
      "step": 155422
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4829907417297363,
      "learning_rate": 0.00014359595054329444,
      "loss": 2.8886,
      "step": 155423
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0764427185058594,
      "learning_rate": 0.00014359245989755524,
      "loss": 3.011,
      "step": 155424
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.819362163543701,
      "learning_rate": 0.00014358896928089507,
      "loss": 3.0302,
      "step": 155425
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8446898460388184,
      "learning_rate": 0.00014358547869331475,
      "loss": 2.7684,
      "step": 155426
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7426540851593018,
      "learning_rate": 0.00014358198813481477,
      "loss": 2.8947,
      "step": 155427
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1702804565429688,
      "learning_rate": 0.00014357849760539593,
      "loss": 2.8864,
      "step": 155428
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7054996490478516,
      "learning_rate": 0.00014357500710505882,
      "loss": 3.1971,
      "step": 155429
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.08705472946167,
      "learning_rate": 0.00014357151663380404,
      "loss": 2.937,
      "step": 155430
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.582029104232788,
      "learning_rate": 0.00014356802619163217,
      "loss": 2.9197,
      "step": 155431
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6189756393432617,
      "learning_rate": 0.00014356453577854409,
      "loss": 2.994,
      "step": 155432
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.150674819946289,
      "learning_rate": 0.00014356104539454015,
      "loss": 2.8998,
      "step": 155433
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.425746440887451,
      "learning_rate": 0.00014355755503962135,
      "loss": 2.7647,
      "step": 155434
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5623395442962646,
      "learning_rate": 0.00014355406471378812,
      "loss": 3.0685,
      "step": 155435
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.939499616622925,
      "learning_rate": 0.00014355057441704113,
      "loss": 2.7523,
      "step": 155436
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.2795989513397217,
      "learning_rate": 0.00014354708414938093,
      "loss": 3.1406,
      "step": 155437
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4257829189300537,
      "learning_rate": 0.0001435435939108084,
      "loss": 3.0613,
      "step": 155438
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.537182569503784,
      "learning_rate": 0.00014354010370132395,
      "loss": 2.7278,
      "step": 155439
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4705564975738525,
      "learning_rate": 0.0001435366135209285,
      "loss": 2.7067,
      "step": 155440
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.8764731884002686,
      "learning_rate": 0.0001435331233696225,
      "loss": 2.7153,
      "step": 155441
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.286325454711914,
      "learning_rate": 0.00014352963324740667,
      "loss": 2.6756,
      "step": 155442
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4578423500061035,
      "learning_rate": 0.0001435261431542815,
      "loss": 2.857,
      "step": 155443
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.383403778076172,
      "learning_rate": 0.0001435226530902479,
      "loss": 3.0033,
      "step": 155444
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4201250076293945,
      "learning_rate": 0.00014351916305530628,
      "loss": 2.8369,
      "step": 155445
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.650869131088257,
      "learning_rate": 0.00014351567304945754,
      "loss": 3.0634,
      "step": 155446
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6826465129852295,
      "learning_rate": 0.00014351218307270214,
      "loss": 2.7317,
      "step": 155447
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.070932626724243,
      "learning_rate": 0.00014350869312504082,
      "loss": 2.9549,
      "step": 155448
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2454371452331543,
      "learning_rate": 0.00014350520320647404,
      "loss": 2.9047,
      "step": 155449
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2526562213897705,
      "learning_rate": 0.00014350171331700275,
      "loss": 2.9496,
      "step": 155450
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.443065643310547,
      "learning_rate": 0.0001434982234566273,
      "loss": 2.7903,
      "step": 155451
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.466219425201416,
      "learning_rate": 0.0001434947336253486,
      "loss": 2.676,
      "step": 155452
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5399293899536133,
      "learning_rate": 0.0001434912438231672,
      "loss": 3.0692,
      "step": 155453
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6999316215515137,
      "learning_rate": 0.00014348775405008363,
      "loss": 2.9286,
      "step": 155454
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1275463104248047,
      "learning_rate": 0.00014348426430609872,
      "loss": 3.0055,
      "step": 155455
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.473492383956909,
      "learning_rate": 0.00014348077459121303,
      "loss": 2.8715,
      "step": 155456
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6304666996002197,
      "learning_rate": 0.00014347728490542714,
      "loss": 3.172,
      "step": 155457
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9314024448394775,
      "learning_rate": 0.00014347379524874189,
      "loss": 2.9887,
      "step": 155458
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4672701358795166,
      "learning_rate": 0.0001434703056211578,
      "loss": 2.7468,
      "step": 155459
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0323495864868164,
      "learning_rate": 0.00014346681602267546,
      "loss": 2.9028,
      "step": 155460
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.559687376022339,
      "learning_rate": 0.00014346332645329566,
      "loss": 2.7286,
      "step": 155461
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1530888080596924,
      "learning_rate": 0.000143459836913019,
      "loss": 2.888,
      "step": 155462
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.572683811187744,
      "learning_rate": 0.00014345634740184598,
      "loss": 2.9122,
      "step": 155463
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2962265014648438,
      "learning_rate": 0.0001434528579197775,
      "loss": 2.8506,
      "step": 155464
    },
    {
      "epoch": 2.02,
      "grad_norm": 1.92662513256073,
      "learning_rate": 0.00014344936846681414,
      "loss": 3.1465,
      "step": 155465
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3613595962524414,
      "learning_rate": 0.0001434458790429563,
      "loss": 2.9079,
      "step": 155466
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.694643497467041,
      "learning_rate": 0.000143442389648205,
      "loss": 3.2059,
      "step": 155467
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1189627647399902,
      "learning_rate": 0.00014343890028256057,
      "loss": 3.0252,
      "step": 155468
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1451120376586914,
      "learning_rate": 0.00014343541094602395,
      "loss": 3.073,
      "step": 155469
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7120561599731445,
      "learning_rate": 0.00014343192163859562,
      "loss": 2.9068,
      "step": 155470
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7536368370056152,
      "learning_rate": 0.00014342843236027616,
      "loss": 2.997,
      "step": 155471
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1493120193481445,
      "learning_rate": 0.0001434249431110664,
      "loss": 2.8459,
      "step": 155472
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.956450939178467,
      "learning_rate": 0.0001434214538909669,
      "loss": 2.9307,
      "step": 155473
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.0200555324554443,
      "learning_rate": 0.0001434179646999782,
      "loss": 3.0551,
      "step": 155474
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6599996089935303,
      "learning_rate": 0.00014341447553810113,
      "loss": 2.9454,
      "step": 155475
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.002394199371338,
      "learning_rate": 0.0001434109864053362,
      "loss": 2.939,
      "step": 155476
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2163503170013428,
      "learning_rate": 0.00014340749730168423,
      "loss": 3.1389,
      "step": 155477
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3265771865844727,
      "learning_rate": 0.00014340400822714574,
      "loss": 2.9952,
      "step": 155478
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4901177883148193,
      "learning_rate": 0.00014340051918172142,
      "loss": 2.8255,
      "step": 155479
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.519138813018799,
      "learning_rate": 0.0001433970301654118,
      "loss": 3.1338,
      "step": 155480
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9449238777160645,
      "learning_rate": 0.0001433935411782177,
      "loss": 2.9057,
      "step": 155481
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.1505260467529297,
      "learning_rate": 0.00014339005222013964,
      "loss": 2.7078,
      "step": 155482
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5320470333099365,
      "learning_rate": 0.0001433865632911784,
      "loss": 3.1129,
      "step": 155483
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.5461690425872803,
      "learning_rate": 0.00014338307439133456,
      "loss": 2.8101,
      "step": 155484
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.1094465255737305,
      "learning_rate": 0.0001433795855206088,
      "loss": 3.1857,
      "step": 155485
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.437224864959717,
      "learning_rate": 0.00014337609667900152,
      "loss": 2.8517,
      "step": 155486
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.0002381801605225,
      "learning_rate": 0.00014337260786651378,
      "loss": 2.8623,
      "step": 155487
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.6813805103302,
      "learning_rate": 0.0001433691190831459,
      "loss": 2.7176,
      "step": 155488
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9457225799560547,
      "learning_rate": 0.00014336563032889876,
      "loss": 2.8624,
      "step": 155489
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.50138521194458,
      "learning_rate": 0.0001433621416037729,
      "loss": 2.9283,
      "step": 155490
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5405476093292236,
      "learning_rate": 0.00014335865290776898,
      "loss": 2.9551,
      "step": 155491
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.2738587856292725,
      "learning_rate": 0.00014335516424088753,
      "loss": 3.0848,
      "step": 155492
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.0241098403930664,
      "learning_rate": 0.00014335167560312943,
      "loss": 2.8902,
      "step": 155493
    },
    {
      "epoch": 2.02,
      "grad_norm": 4.175429821014404,
      "learning_rate": 0.0001433481869944951,
      "loss": 2.9368,
      "step": 155494
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.0514211654663086,
      "learning_rate": 0.00014334469841498543,
      "loss": 2.7837,
      "step": 155495
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9230082035064697,
      "learning_rate": 0.00014334120986460089,
      "loss": 2.9795,
      "step": 155496
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.874368667602539,
      "learning_rate": 0.00014333772134334218,
      "loss": 2.8852,
      "step": 155497
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.136401414871216,
      "learning_rate": 0.0001433342328512099,
      "loss": 3.0223,
      "step": 155498
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.35159969329834,
      "learning_rate": 0.0001433307443882048,
      "loss": 2.7276,
      "step": 155499
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.23630952835083,
      "learning_rate": 0.00014332725595432737,
      "loss": 3.0548,
      "step": 155500
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6290693283081055,
      "learning_rate": 0.00014332376754957847,
      "loss": 2.9528,
      "step": 155501
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.667543411254883,
      "learning_rate": 0.00014332027917395864,
      "loss": 2.9097,
      "step": 155502
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.1757490634918213,
      "learning_rate": 0.0001433167908274685,
      "loss": 3.0341,
      "step": 155503
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.099421977996826,
      "learning_rate": 0.00014331330251010868,
      "loss": 2.9791,
      "step": 155504
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.0162951946258545,
      "learning_rate": 0.00014330981422187993,
      "loss": 3.0505,
      "step": 155505
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7007572650909424,
      "learning_rate": 0.00014330632596278275,
      "loss": 2.9244,
      "step": 155506
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5695667266845703,
      "learning_rate": 0.00014330283773281798,
      "loss": 2.8211,
      "step": 155507
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3859856128692627,
      "learning_rate": 0.0001432993495319862,
      "loss": 2.888,
      "step": 155508
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.228466272354126,
      "learning_rate": 0.00014329586136028802,
      "loss": 2.761,
      "step": 155509
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4550986289978027,
      "learning_rate": 0.00014329237321772397,
      "loss": 3.0696,
      "step": 155510
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.7658448219299316,
      "learning_rate": 0.00014328888510429497,
      "loss": 2.952,
      "step": 155511
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.301278829574585,
      "learning_rate": 0.00014328539702000137,
      "loss": 2.9837,
      "step": 155512
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.564556121826172,
      "learning_rate": 0.0001432819089648441,
      "loss": 2.8198,
      "step": 155513
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.5916409492492676,
      "learning_rate": 0.00014327842093882367,
      "loss": 2.9557,
      "step": 155514
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.870727300643921,
      "learning_rate": 0.00014327493294194078,
      "loss": 2.9375,
      "step": 155515
    },
    {
      "epoch": 2.02,
      "grad_norm": 3.2687013149261475,
      "learning_rate": 0.00014327144497419588,
      "loss": 3.1628,
      "step": 155516
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.235780715942383,
      "learning_rate": 0.00014326795703558992,
      "loss": 2.9453,
      "step": 155517
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.3090221881866455,
      "learning_rate": 0.0001432644691261233,
      "loss": 2.938,
      "step": 155518
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.6357204914093018,
      "learning_rate": 0.0001432609812457969,
      "loss": 3.0915,
      "step": 155519
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.4259848594665527,
      "learning_rate": 0.00014325749339461118,
      "loss": 2.8829,
      "step": 155520
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.867067813873291,
      "learning_rate": 0.0001432540055725669,
      "loss": 2.9723,
      "step": 155521
    },
    {
      "epoch": 2.02,
      "grad_norm": 2.9033284187316895,
      "learning_rate": 0.0001432505177796645,
      "loss": 3.0416,
      "step": 155522
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.239163398742676,
      "learning_rate": 0.00014324703001590493,
      "loss": 2.9768,
      "step": 155523
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.222264528274536,
      "learning_rate": 0.0001432435422812886,
      "loss": 3.0707,
      "step": 155524
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.348327398300171,
      "learning_rate": 0.00014324005457581638,
      "loss": 2.9188,
      "step": 155525
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5640366077423096,
      "learning_rate": 0.00014323656689948873,
      "loss": 3.2048,
      "step": 155526
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8286490440368652,
      "learning_rate": 0.0001432330792523064,
      "loss": 3.1341,
      "step": 155527
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5346386432647705,
      "learning_rate": 0.00014322959163426988,
      "loss": 2.7401,
      "step": 155528
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.0496902465820312,
      "learning_rate": 0.00014322610404538005,
      "loss": 3.0114,
      "step": 155529
    },
    {
      "epoch": 2.03,
      "grad_norm": 1.9744054079055786,
      "learning_rate": 0.00014322261648563737,
      "loss": 2.8858,
      "step": 155530
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.228793144226074,
      "learning_rate": 0.0001432191289550426,
      "loss": 2.9562,
      "step": 155531
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9653024673461914,
      "learning_rate": 0.0001432156414535964,
      "loss": 2.927,
      "step": 155532
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4826819896698,
      "learning_rate": 0.0001432121539812994,
      "loss": 3.0948,
      "step": 155533
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.863140106201172,
      "learning_rate": 0.00014320866653815207,
      "loss": 3.0292,
      "step": 155534
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3453121185302734,
      "learning_rate": 0.00014320517912415532,
      "loss": 3.0093,
      "step": 155535
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.785691499710083,
      "learning_rate": 0.00014320169173930958,
      "loss": 2.9831,
      "step": 155536
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.49211049079895,
      "learning_rate": 0.00014319820438361572,
      "loss": 2.9419,
      "step": 155537
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.271514654159546,
      "learning_rate": 0.00014319471705707414,
      "loss": 2.9751,
      "step": 155538
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3207740783691406,
      "learning_rate": 0.00014319122975968578,
      "loss": 3.005,
      "step": 155539
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.392237901687622,
      "learning_rate": 0.0001431877424914511,
      "loss": 2.9034,
      "step": 155540
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8471732139587402,
      "learning_rate": 0.0001431842552523708,
      "loss": 2.6936,
      "step": 155541
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.7877752780914307,
      "learning_rate": 0.0001431807680424454,
      "loss": 2.8905,
      "step": 155542
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.6850757598876953,
      "learning_rate": 0.00014317728086167574,
      "loss": 2.9801,
      "step": 155543
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.524348497390747,
      "learning_rate": 0.00014317379371006227,
      "loss": 2.9799,
      "step": 155544
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.0758376121520996,
      "learning_rate": 0.0001431703065876059,
      "loss": 3.1806,
      "step": 155545
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.315783977508545,
      "learning_rate": 0.00014316681949430707,
      "loss": 2.7877,
      "step": 155546
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.888129234313965,
      "learning_rate": 0.0001431633324301665,
      "loss": 2.9757,
      "step": 155547
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.7391855716705322,
      "learning_rate": 0.00014315984539518475,
      "loss": 3.0714,
      "step": 155548
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1258363723754883,
      "learning_rate": 0.00014315635838936267,
      "loss": 2.8337,
      "step": 155549
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.030839443206787,
      "learning_rate": 0.0001431528714127006,
      "loss": 3.2236,
      "step": 155550
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9511003494262695,
      "learning_rate": 0.00014314938446519955,
      "loss": 3.0205,
      "step": 155551
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.8514699935913086,
      "learning_rate": 0.00014314589754685998,
      "loss": 3.0901,
      "step": 155552
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7426209449768066,
      "learning_rate": 0.0001431424106576824,
      "loss": 3.0979,
      "step": 155553
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.47223162651062,
      "learning_rate": 0.0001431389237976678,
      "loss": 3.0866,
      "step": 155554
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.805219888687134,
      "learning_rate": 0.00014313543696681656,
      "loss": 3.0484,
      "step": 155555
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.529078722000122,
      "learning_rate": 0.00014313195016512928,
      "loss": 3.0338,
      "step": 155556
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.350238084793091,
      "learning_rate": 0.0001431284633926069,
      "loss": 2.9541,
      "step": 155557
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2275941371917725,
      "learning_rate": 0.00014312497664924987,
      "loss": 2.8652,
      "step": 155558
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3501944541931152,
      "learning_rate": 0.00014312148993505876,
      "loss": 3.0058,
      "step": 155559
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3101110458374023,
      "learning_rate": 0.00014311800325003443,
      "loss": 2.9634,
      "step": 155560
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8455960750579834,
      "learning_rate": 0.00014311451659417747,
      "loss": 2.9508,
      "step": 155561
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.0779671669006348,
      "learning_rate": 0.0001431110299674883,
      "loss": 2.6878,
      "step": 155562
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8292040824890137,
      "learning_rate": 0.00014310754336996793,
      "loss": 2.8638,
      "step": 155563
    },
    {
      "epoch": 2.03,
      "grad_norm": 1.997312307357788,
      "learning_rate": 0.00014310405680161679,
      "loss": 2.9322,
      "step": 155564
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8693976402282715,
      "learning_rate": 0.00014310057026243545,
      "loss": 2.9048,
      "step": 155565
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.6454477310180664,
      "learning_rate": 0.00014309708375242482,
      "loss": 3.0802,
      "step": 155566
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.593799352645874,
      "learning_rate": 0.00014309359727158525,
      "loss": 2.9705,
      "step": 155567
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3270909786224365,
      "learning_rate": 0.00014309011081991768,
      "loss": 3.1883,
      "step": 155568
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.606884717941284,
      "learning_rate": 0.00014308662439742263,
      "loss": 2.8631,
      "step": 155569
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.572774887084961,
      "learning_rate": 0.00014308313800410073,
      "loss": 2.9074,
      "step": 155570
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.665323495864868,
      "learning_rate": 0.0001430796516399525,
      "loss": 3.0266,
      "step": 155571
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.019134283065796,
      "learning_rate": 0.0001430761653049789,
      "loss": 2.8006,
      "step": 155572
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4295406341552734,
      "learning_rate": 0.0001430726789991802,
      "loss": 2.7715,
      "step": 155573
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.0112144947052,
      "learning_rate": 0.00014306919272255743,
      "loss": 3.1453,
      "step": 155574
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.126776695251465,
      "learning_rate": 0.00014306570647511105,
      "loss": 3.017,
      "step": 155575
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2997913360595703,
      "learning_rate": 0.0001430622202568417,
      "loss": 3.0185,
      "step": 155576
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.114607572555542,
      "learning_rate": 0.00014305873406774997,
      "loss": 2.897,
      "step": 155577
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3626441955566406,
      "learning_rate": 0.00014305524790783665,
      "loss": 2.9335,
      "step": 155578
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.519580841064453,
      "learning_rate": 0.00014305176177710222,
      "loss": 3.1924,
      "step": 155579
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.181136131286621,
      "learning_rate": 0.00014304827567554755,
      "loss": 3.0652,
      "step": 155580
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1092429161071777,
      "learning_rate": 0.0001430447896031732,
      "loss": 2.9402,
      "step": 155581
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.970585346221924,
      "learning_rate": 0.00014304130355997975,
      "loss": 3.0401,
      "step": 155582
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.747027635574341,
      "learning_rate": 0.00014303781754596777,
      "loss": 2.8324,
      "step": 155583
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.670346975326538,
      "learning_rate": 0.00014303433156113814,
      "loss": 2.8046,
      "step": 155584
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6855194568634033,
      "learning_rate": 0.00014303084560549127,
      "loss": 3.0299,
      "step": 155585
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6528172492980957,
      "learning_rate": 0.00014302735967902807,
      "loss": 2.9058,
      "step": 155586
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.446237325668335,
      "learning_rate": 0.000143023873781749,
      "loss": 2.9267,
      "step": 155587
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.765514612197876,
      "learning_rate": 0.00014302038791365478,
      "loss": 3.1716,
      "step": 155588
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.434814453125,
      "learning_rate": 0.00014301690207474594,
      "loss": 3.1657,
      "step": 155589
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.501544237136841,
      "learning_rate": 0.00014301341626502334,
      "loss": 2.8135,
      "step": 155590
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1168861389160156,
      "learning_rate": 0.00014300993048448735,
      "loss": 2.8476,
      "step": 155591
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5090506076812744,
      "learning_rate": 0.00014300644473313893,
      "loss": 2.8051,
      "step": 155592
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6537532806396484,
      "learning_rate": 0.00014300295901097852,
      "loss": 3.0248,
      "step": 155593
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.647000789642334,
      "learning_rate": 0.0001429994733180069,
      "loss": 2.7542,
      "step": 155594
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2262990474700928,
      "learning_rate": 0.0001429959876542245,
      "loss": 3.0161,
      "step": 155595
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.291654109954834,
      "learning_rate": 0.0001429925020196322,
      "loss": 2.837,
      "step": 155596
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.487424373626709,
      "learning_rate": 0.00014298901641423046,
      "loss": 2.938,
      "step": 155597
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2693488597869873,
      "learning_rate": 0.00014298553083802015,
      "loss": 3.0665,
      "step": 155598
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.317856788635254,
      "learning_rate": 0.00014298204529100176,
      "loss": 3.1312,
      "step": 155599
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2571802139282227,
      "learning_rate": 0.00014297855977317599,
      "loss": 3.0771,
      "step": 155600
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.789135217666626,
      "learning_rate": 0.00014297507428454338,
      "loss": 3.1668,
      "step": 155601
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6333725452423096,
      "learning_rate": 0.00014297158882510473,
      "loss": 3.035,
      "step": 155602
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.336927890777588,
      "learning_rate": 0.00014296810339486056,
      "loss": 3.1013,
      "step": 155603
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2168874740600586,
      "learning_rate": 0.00014296461799381168,
      "loss": 2.9245,
      "step": 155604
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7664241790771484,
      "learning_rate": 0.00014296113262195854,
      "loss": 2.9667,
      "step": 155605
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.2804508209228516,
      "learning_rate": 0.0001429576472793021,
      "loss": 2.8138,
      "step": 155606
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4254021644592285,
      "learning_rate": 0.00014295416196584255,
      "loss": 2.8791,
      "step": 155607
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.968052864074707,
      "learning_rate": 0.00014295067668158097,
      "loss": 2.7639,
      "step": 155608
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.290372133255005,
      "learning_rate": 0.00014294719142651766,
      "loss": 2.8838,
      "step": 155609
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.638427257537842,
      "learning_rate": 0.00014294370620065354,
      "loss": 2.8325,
      "step": 155610
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1687910556793213,
      "learning_rate": 0.00014294022100398905,
      "loss": 2.9111,
      "step": 155611
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2172183990478516,
      "learning_rate": 0.00014293673583652515,
      "loss": 2.9268,
      "step": 155612
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2162082195281982,
      "learning_rate": 0.00014293325069826207,
      "loss": 3.0929,
      "step": 155613
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.662405014038086,
      "learning_rate": 0.00014292976558920076,
      "loss": 2.9018,
      "step": 155614
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4391188621520996,
      "learning_rate": 0.00014292628050934167,
      "loss": 2.9595,
      "step": 155615
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4524567127227783,
      "learning_rate": 0.00014292279545868568,
      "loss": 2.941,
      "step": 155616
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.26953387260437,
      "learning_rate": 0.00014291931043723314,
      "loss": 2.9156,
      "step": 155617
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.624415636062622,
      "learning_rate": 0.00014291582544498512,
      "loss": 3.0085,
      "step": 155618
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.1266167163848877,
      "learning_rate": 0.00014291234048194182,
      "loss": 3.0347,
      "step": 155619
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.760139226913452,
      "learning_rate": 0.00014290885554810414,
      "loss": 3.016,
      "step": 155620
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.117976188659668,
      "learning_rate": 0.0001429053706434726,
      "loss": 2.9896,
      "step": 155621
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9754855632781982,
      "learning_rate": 0.000142901885768048,
      "loss": 3.0041,
      "step": 155622
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.389477252960205,
      "learning_rate": 0.00014289840092183084,
      "loss": 2.7342,
      "step": 155623
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4003794193267822,
      "learning_rate": 0.000142894916104822,
      "loss": 2.9088,
      "step": 155624
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.1780216693878174,
      "learning_rate": 0.00014289143131702176,
      "loss": 3.059,
      "step": 155625
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.375422477722168,
      "learning_rate": 0.0001428879465584311,
      "loss": 2.9775,
      "step": 155626
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.47761869430542,
      "learning_rate": 0.00014288446182905037,
      "loss": 2.9347,
      "step": 155627
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.46305251121521,
      "learning_rate": 0.00014288097712888054,
      "loss": 3.0956,
      "step": 155628
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7566940784454346,
      "learning_rate": 0.00014287749245792196,
      "loss": 2.9016,
      "step": 155629
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.0077033042907715,
      "learning_rate": 0.00014287400781617558,
      "loss": 2.8392,
      "step": 155630
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.779667615890503,
      "learning_rate": 0.00014287052320364186,
      "loss": 2.9919,
      "step": 155631
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.032949447631836,
      "learning_rate": 0.00014286703862032146,
      "loss": 2.9754,
      "step": 155632
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5290474891662598,
      "learning_rate": 0.00014286355406621494,
      "loss": 2.9008,
      "step": 155633
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3782711029052734,
      "learning_rate": 0.00014286006954132316,
      "loss": 2.8681,
      "step": 155634
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.241206407546997,
      "learning_rate": 0.00014285658504564657,
      "loss": 2.8632,
      "step": 155635
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.49582839012146,
      "learning_rate": 0.000142853100579186,
      "loss": 2.9156,
      "step": 155636
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.550762891769409,
      "learning_rate": 0.00014284961614194203,
      "loss": 2.9794,
      "step": 155637
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2910289764404297,
      "learning_rate": 0.00014284613173391517,
      "loss": 2.949,
      "step": 155638
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4722845554351807,
      "learning_rate": 0.00014284264735510625,
      "loss": 3.2115,
      "step": 155639
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1743216514587402,
      "learning_rate": 0.0001428391630055159,
      "loss": 2.9625,
      "step": 155640
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4211463928222656,
      "learning_rate": 0.00014283567868514455,
      "loss": 2.7473,
      "step": 155641
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.268186330795288,
      "learning_rate": 0.00014283219439399316,
      "loss": 2.9311,
      "step": 155642
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8522729873657227,
      "learning_rate": 0.0001428287101320622,
      "loss": 3.2214,
      "step": 155643
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4721598625183105,
      "learning_rate": 0.00014282522589935227,
      "loss": 3.1278,
      "step": 155644
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.434786319732666,
      "learning_rate": 0.0001428217416958642,
      "loss": 2.9324,
      "step": 155645
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9379448890686035,
      "learning_rate": 0.00014281825752159854,
      "loss": 2.8161,
      "step": 155646
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3519952297210693,
      "learning_rate": 0.00014281477337655584,
      "loss": 2.8828,
      "step": 155647
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5295913219451904,
      "learning_rate": 0.0001428112892607369,
      "loss": 2.7688,
      "step": 155648
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.563983917236328,
      "learning_rate": 0.00014280780517414234,
      "loss": 2.905,
      "step": 155649
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4000320434570312,
      "learning_rate": 0.00014280432111677267,
      "loss": 2.8773,
      "step": 155650
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6055967807769775,
      "learning_rate": 0.00014280083708862875,
      "loss": 3.0262,
      "step": 155651
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.89749813079834,
      "learning_rate": 0.00014279735308971104,
      "loss": 3.2526,
      "step": 155652
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.509653091430664,
      "learning_rate": 0.00014279386912002036,
      "loss": 3.0406,
      "step": 155653
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.516620397567749,
      "learning_rate": 0.00014279038517955723,
      "loss": 3.0021,
      "step": 155654
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6688320636749268,
      "learning_rate": 0.00014278690126832237,
      "loss": 2.8783,
      "step": 155655
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9056522846221924,
      "learning_rate": 0.00014278341738631627,
      "loss": 2.9772,
      "step": 155656
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.528874397277832,
      "learning_rate": 0.0001427799335335398,
      "loss": 3.2113,
      "step": 155657
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9383246898651123,
      "learning_rate": 0.00014277644970999342,
      "loss": 3.0986,
      "step": 155658
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.240246295928955,
      "learning_rate": 0.00014277296591567798,
      "loss": 2.9043,
      "step": 155659
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.8900091648101807,
      "learning_rate": 0.00014276948215059403,
      "loss": 3.1712,
      "step": 155660
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.6713037490844727,
      "learning_rate": 0.00014276599841474218,
      "loss": 2.7774,
      "step": 155661
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7216598987579346,
      "learning_rate": 0.00014276251470812296,
      "loss": 2.849,
      "step": 155662
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4736554622650146,
      "learning_rate": 0.00014275903103073729,
      "loss": 3.265,
      "step": 155663
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.852501153945923,
      "learning_rate": 0.00014275554738258557,
      "loss": 3.0354,
      "step": 155664
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2297773361206055,
      "learning_rate": 0.00014275206376366867,
      "loss": 2.7269,
      "step": 155665
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9326183795928955,
      "learning_rate": 0.00014274858017398716,
      "loss": 2.9305,
      "step": 155666
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.112507343292236,
      "learning_rate": 0.00014274509661354163,
      "loss": 2.8929,
      "step": 155667
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.041287899017334,
      "learning_rate": 0.00014274161308233262,
      "loss": 2.9746,
      "step": 155668
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3726344108581543,
      "learning_rate": 0.0001427381295803611,
      "loss": 3.015,
      "step": 155669
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.3853814601898193,
      "learning_rate": 0.00014273464610762735,
      "loss": 3.0036,
      "step": 155670
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.497675895690918,
      "learning_rate": 0.00014273116266413233,
      "loss": 2.8857,
      "step": 155671
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.305250883102417,
      "learning_rate": 0.00014272767924987644,
      "loss": 3.1793,
      "step": 155672
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.172708034515381,
      "learning_rate": 0.00014272419586486068,
      "loss": 2.9217,
      "step": 155673
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.9981865882873535,
      "learning_rate": 0.00014272071250908522,
      "loss": 2.9147,
      "step": 155674
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1211469173431396,
      "learning_rate": 0.00014271722918255108,
      "loss": 2.8645,
      "step": 155675
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2636094093322754,
      "learning_rate": 0.00014271374588525863,
      "loss": 3.1404,
      "step": 155676
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.631364107131958,
      "learning_rate": 0.00014271026261720882,
      "loss": 2.9843,
      "step": 155677
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.234827756881714,
      "learning_rate": 0.00014270677937840197,
      "loss": 3.0064,
      "step": 155678
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2138872146606445,
      "learning_rate": 0.00014270329616883917,
      "loss": 2.9033,
      "step": 155679
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.402468681335449,
      "learning_rate": 0.00014269981298852056,
      "loss": 3.0265,
      "step": 155680
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.361936092376709,
      "learning_rate": 0.00014269632983744717,
      "loss": 2.893,
      "step": 155681
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.395252227783203,
      "learning_rate": 0.0001426928467156193,
      "loss": 2.7635,
      "step": 155682
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4430153369903564,
      "learning_rate": 0.000142689363623038,
      "loss": 2.7913,
      "step": 155683
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.419356107711792,
      "learning_rate": 0.00014268588055970358,
      "loss": 3.1268,
      "step": 155684
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.376427173614502,
      "learning_rate": 0.00014268239752561704,
      "loss": 2.7583,
      "step": 155685
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.555713653564453,
      "learning_rate": 0.00014267891452077858,
      "loss": 2.9771,
      "step": 155686
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3870108127593994,
      "learning_rate": 0.00014267543154518922,
      "loss": 2.8695,
      "step": 155687
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.55295991897583,
      "learning_rate": 0.00014267194859884935,
      "loss": 3.0305,
      "step": 155688
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.920517921447754,
      "learning_rate": 0.0001426684656817598,
      "loss": 2.7879,
      "step": 155689
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.742330074310303,
      "learning_rate": 0.00014266498279392106,
      "loss": 2.9676,
      "step": 155690
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.972083568572998,
      "learning_rate": 0.00014266149993533413,
      "loss": 3.0694,
      "step": 155691
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1685991287231445,
      "learning_rate": 0.00014265801710599914,
      "loss": 2.8245,
      "step": 155692
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.735281467437744,
      "learning_rate": 0.00014265453430591714,
      "loss": 3.1755,
      "step": 155693
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.053895950317383,
      "learning_rate": 0.00014265105153508847,
      "loss": 2.8357,
      "step": 155694
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.697035074234009,
      "learning_rate": 0.0001426475687935141,
      "loss": 3.0543,
      "step": 155695
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.188365936279297,
      "learning_rate": 0.00014264408608119438,
      "loss": 3.1014,
      "step": 155696
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.748945474624634,
      "learning_rate": 0.00014264060339813032,
      "loss": 3.0913,
      "step": 155697
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.0357825756073,
      "learning_rate": 0.00014263712074432213,
      "loss": 2.983,
      "step": 155698
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4771018028259277,
      "learning_rate": 0.00014263363811977076,
      "loss": 3.0028,
      "step": 155699
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4777474403381348,
      "learning_rate": 0.00014263015552447666,
      "loss": 2.8348,
      "step": 155700
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.744126319885254,
      "learning_rate": 0.0001426266729584407,
      "loss": 2.9812,
      "step": 155701
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.879966974258423,
      "learning_rate": 0.00014262319042166333,
      "loss": 3.1167,
      "step": 155702
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.282139301300049,
      "learning_rate": 0.00014261970791414546,
      "loss": 3.0098,
      "step": 155703
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1417081356048584,
      "learning_rate": 0.00014261622543588736,
      "loss": 3.1277,
      "step": 155704
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.285045862197876,
      "learning_rate": 0.00014261274298688997,
      "loss": 3.0234,
      "step": 155705
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.822118043899536,
      "learning_rate": 0.00014260926056715374,
      "loss": 2.9181,
      "step": 155706
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.024820327758789,
      "learning_rate": 0.00014260577817667953,
      "loss": 3.019,
      "step": 155707
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.5742435455322266,
      "learning_rate": 0.00014260229581546777,
      "loss": 2.8608,
      "step": 155708
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.268156051635742,
      "learning_rate": 0.00014259881348351944,
      "loss": 2.8262,
      "step": 155709
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.27608585357666,
      "learning_rate": 0.00014259533118083474,
      "loss": 2.656,
      "step": 155710
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9822113513946533,
      "learning_rate": 0.0001425918489074146,
      "loss": 2.6416,
      "step": 155711
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.487447500228882,
      "learning_rate": 0.00014258836666325954,
      "loss": 2.998,
      "step": 155712
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.260084867477417,
      "learning_rate": 0.00014258488444837038,
      "loss": 3.1122,
      "step": 155713
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4520397186279297,
      "learning_rate": 0.00014258140226274755,
      "loss": 2.9899,
      "step": 155714
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.102081298828125,
      "learning_rate": 0.00014257792010639194,
      "loss": 2.873,
      "step": 155715
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.050112247467041,
      "learning_rate": 0.00014257443797930405,
      "loss": 2.8676,
      "step": 155716
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1008260250091553,
      "learning_rate": 0.00014257095588148455,
      "loss": 3.175,
      "step": 155717
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9056828022003174,
      "learning_rate": 0.00014256747381293395,
      "loss": 2.8779,
      "step": 155718
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3126187324523926,
      "learning_rate": 0.00014256399177365312,
      "loss": 2.8975,
      "step": 155719
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.3539297580718994,
      "learning_rate": 0.00014256050976364258,
      "loss": 2.8918,
      "step": 155720
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3234312534332275,
      "learning_rate": 0.00014255702778290306,
      "loss": 2.9857,
      "step": 155721
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4069652557373047,
      "learning_rate": 0.0001425535458314352,
      "loss": 2.9011,
      "step": 155722
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.377883195877075,
      "learning_rate": 0.00014255006390923944,
      "loss": 2.8835,
      "step": 155723
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.0244364738464355,
      "learning_rate": 0.00014254658201631675,
      "loss": 2.819,
      "step": 155724
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8469271659851074,
      "learning_rate": 0.00014254310015266762,
      "loss": 3.1652,
      "step": 155725
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.553776741027832,
      "learning_rate": 0.0001425396183182926,
      "loss": 2.9817,
      "step": 155726
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5638718605041504,
      "learning_rate": 0.00014253613651319253,
      "loss": 2.9475,
      "step": 155727
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.119312047958374,
      "learning_rate": 0.00014253265473736796,
      "loss": 2.9415,
      "step": 155728
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4994637966156006,
      "learning_rate": 0.00014252917299081944,
      "loss": 3.0127,
      "step": 155729
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.309786319732666,
      "learning_rate": 0.00014252569127354783,
      "loss": 3.0342,
      "step": 155730
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4755642414093018,
      "learning_rate": 0.00014252220958555368,
      "loss": 3.1818,
      "step": 155731
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.897311210632324,
      "learning_rate": 0.00014251872792683747,
      "loss": 3.1659,
      "step": 155732
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.169767379760742,
      "learning_rate": 0.00014251524629740016,
      "loss": 3.0957,
      "step": 155733
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.447869300842285,
      "learning_rate": 0.00014251176469724222,
      "loss": 2.9267,
      "step": 155734
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.685976266860962,
      "learning_rate": 0.0001425082831263642,
      "loss": 2.8701,
      "step": 155735
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.402996301651001,
      "learning_rate": 0.00014250480158476696,
      "loss": 2.9096,
      "step": 155736
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.196714162826538,
      "learning_rate": 0.00014250132007245098,
      "loss": 2.8295,
      "step": 155737
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5060489177703857,
      "learning_rate": 0.00014249783858941707,
      "loss": 2.9205,
      "step": 155738
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.216801404953003,
      "learning_rate": 0.00014249435713566575,
      "loss": 2.9694,
      "step": 155739
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.03131103515625,
      "learning_rate": 0.00014249087571119773,
      "loss": 2.8695,
      "step": 155740
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.445809841156006,
      "learning_rate": 0.00014248739431601355,
      "loss": 2.9631,
      "step": 155741
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.101325273513794,
      "learning_rate": 0.000142483912950114,
      "loss": 3.0794,
      "step": 155742
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6296885013580322,
      "learning_rate": 0.00014248043161349958,
      "loss": 3.0603,
      "step": 155743
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3031139373779297,
      "learning_rate": 0.0001424769503061711,
      "loss": 3.1014,
      "step": 155744
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1407644748687744,
      "learning_rate": 0.00014247346902812916,
      "loss": 2.9997,
      "step": 155745
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3713386058807373,
      "learning_rate": 0.00014246998777937438,
      "loss": 2.7413,
      "step": 155746
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.153028726577759,
      "learning_rate": 0.00014246650655990728,
      "loss": 3.0657,
      "step": 155747
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7998085021972656,
      "learning_rate": 0.00014246302536972876,
      "loss": 2.8131,
      "step": 155748
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.403297185897827,
      "learning_rate": 0.0001424595442088392,
      "loss": 2.7329,
      "step": 155749
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.7114217281341553,
      "learning_rate": 0.00014245606307723948,
      "loss": 2.8782,
      "step": 155750
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.6644673347473145,
      "learning_rate": 0.00014245258197493008,
      "loss": 2.834,
      "step": 155751
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.215737819671631,
      "learning_rate": 0.0001424491009019119,
      "loss": 3.0023,
      "step": 155752
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.501427412033081,
      "learning_rate": 0.00014244561985818523,
      "loss": 2.9723,
      "step": 155753
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.038726806640625,
      "learning_rate": 0.000142442138843751,
      "loss": 3.0596,
      "step": 155754
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.054985523223877,
      "learning_rate": 0.0001424386578586096,
      "loss": 2.9623,
      "step": 155755
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9914255142211914,
      "learning_rate": 0.00014243517690276197,
      "loss": 3.0265,
      "step": 155756
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.51035213470459,
      "learning_rate": 0.0001424316959762085,
      "loss": 2.8603,
      "step": 155757
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.211606979370117,
      "learning_rate": 0.00014242821507895015,
      "loss": 3.2039,
      "step": 155758
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.637195348739624,
      "learning_rate": 0.0001424247342109872,
      "loss": 2.8118,
      "step": 155759
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9460196495056152,
      "learning_rate": 0.00014242125337232052,
      "loss": 2.9707,
      "step": 155760
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.7023041248321533,
      "learning_rate": 0.0001424177725629506,
      "loss": 3.1756,
      "step": 155761
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3244235515594482,
      "learning_rate": 0.00014241429178287834,
      "loss": 3.0024,
      "step": 155762
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.367003917694092,
      "learning_rate": 0.0001424108110321041,
      "loss": 3.1047,
      "step": 155763
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3662238121032715,
      "learning_rate": 0.00014240733031062889,
      "loss": 3.2811,
      "step": 155764
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.657695770263672,
      "learning_rate": 0.00014240384961845291,
      "loss": 3.0535,
      "step": 155765
    },
    {
      "epoch": 2.03,
      "grad_norm": 6.55508279800415,
      "learning_rate": 0.00014240036895557716,
      "loss": 2.7063,
      "step": 155766
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.427783966064453,
      "learning_rate": 0.000142396888322002,
      "loss": 3.0286,
      "step": 155767
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7646775245666504,
      "learning_rate": 0.00014239340771772838,
      "loss": 2.818,
      "step": 155768
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.128361940383911,
      "learning_rate": 0.0001423899271427567,
      "loss": 2.8345,
      "step": 155769
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.2572529315948486,
      "learning_rate": 0.0001423864465970879,
      "loss": 3.0735,
      "step": 155770
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.9728450775146484,
      "learning_rate": 0.00014238296608072224,
      "loss": 3.0557,
      "step": 155771
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.840023994445801,
      "learning_rate": 0.00014237948559366065,
      "loss": 2.9848,
      "step": 155772
    },
    {
      "epoch": 2.03,
      "grad_norm": 5.29092264175415,
      "learning_rate": 0.00014237600513590358,
      "loss": 2.9136,
      "step": 155773
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5545010566711426,
      "learning_rate": 0.0001423725247074519,
      "loss": 3.1959,
      "step": 155774
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.076798439025879,
      "learning_rate": 0.000142369044308306,
      "loss": 3.1535,
      "step": 155775
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.622774124145508,
      "learning_rate": 0.00014236556393846695,
      "loss": 3.0399,
      "step": 155776
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.656187057495117,
      "learning_rate": 0.00014236208359793488,
      "loss": 2.6247,
      "step": 155777
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7241668701171875,
      "learning_rate": 0.00014235860328671077,
      "loss": 3.1276,
      "step": 155778
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2919423580169678,
      "learning_rate": 0.00014235512300479509,
      "loss": 2.7753,
      "step": 155779
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2741358280181885,
      "learning_rate": 0.00014235164275218864,
      "loss": 2.9526,
      "step": 155780
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.248063802719116,
      "learning_rate": 0.0001423481625288919,
      "loss": 2.8744,
      "step": 155781
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.764083385467529,
      "learning_rate": 0.00014234468233490584,
      "loss": 2.7973,
      "step": 155782
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4129788875579834,
      "learning_rate": 0.00014234120217023063,
      "loss": 2.8624,
      "step": 155783
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5613467693328857,
      "learning_rate": 0.0001423377220348673,
      "loss": 3.0627,
      "step": 155784
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.649941921234131,
      "learning_rate": 0.00014233424192881626,
      "loss": 3.0116,
      "step": 155785
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6149115562438965,
      "learning_rate": 0.00014233076185207836,
      "loss": 3.2266,
      "step": 155786
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.211411237716675,
      "learning_rate": 0.00014232728180465406,
      "loss": 2.8918,
      "step": 155787
    },
    {
      "epoch": 2.03,
      "grad_norm": 1.9757665395736694,
      "learning_rate": 0.00014232380178654428,
      "loss": 2.922,
      "step": 155788
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.405757427215576,
      "learning_rate": 0.0001423203217977493,
      "loss": 2.9104,
      "step": 155789
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3123676776885986,
      "learning_rate": 0.00014231684183827004,
      "loss": 3.1201,
      "step": 155790
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3049728870391846,
      "learning_rate": 0.0001423133619081069,
      "loss": 3.1222,
      "step": 155791
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5233755111694336,
      "learning_rate": 0.00014230988200726084,
      "loss": 2.7785,
      "step": 155792
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5131378173828125,
      "learning_rate": 0.00014230640213573227,
      "loss": 2.96,
      "step": 155793
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.041895866394043,
      "learning_rate": 0.00014230292229352207,
      "loss": 3.1286,
      "step": 155794
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.455084800720215,
      "learning_rate": 0.00014229944248063056,
      "loss": 2.7325,
      "step": 155795
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.328267812728882,
      "learning_rate": 0.00014229596269705865,
      "loss": 3.0501,
      "step": 155796
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5173795223236084,
      "learning_rate": 0.00014229248294280678,
      "loss": 2.7384,
      "step": 155797
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.504627227783203,
      "learning_rate": 0.00014228900321787587,
      "loss": 2.8508,
      "step": 155798
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7330117225646973,
      "learning_rate": 0.00014228552352226625,
      "loss": 3.0851,
      "step": 155799
    },
    {
      "epoch": 2.03,
      "grad_norm": 5.107998847961426,
      "learning_rate": 0.00014228204385597883,
      "loss": 2.6854,
      "step": 155800
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.4843099117279053,
      "learning_rate": 0.0001422785642190142,
      "loss": 3.1084,
      "step": 155801
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.260584592819214,
      "learning_rate": 0.00014227508461137295,
      "loss": 2.8953,
      "step": 155802
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6723830699920654,
      "learning_rate": 0.0001422716050330556,
      "loss": 2.9508,
      "step": 155803
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.599032402038574,
      "learning_rate": 0.0001422681254840631,
      "loss": 3.055,
      "step": 155804
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5328941345214844,
      "learning_rate": 0.00014226464596439576,
      "loss": 2.8682,
      "step": 155805
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4721884727478027,
      "learning_rate": 0.00014226116647405457,
      "loss": 2.7697,
      "step": 155806
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3352630138397217,
      "learning_rate": 0.00014225768701303998,
      "loss": 2.8975,
      "step": 155807
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.207043170928955,
      "learning_rate": 0.00014225420758135263,
      "loss": 2.751,
      "step": 155808
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.319060802459717,
      "learning_rate": 0.00014225072817899314,
      "loss": 3.0593,
      "step": 155809
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9618372917175293,
      "learning_rate": 0.00014224724880596232,
      "loss": 2.7665,
      "step": 155810
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9922971725463867,
      "learning_rate": 0.00014224376946226058,
      "loss": 2.7651,
      "step": 155811
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.0645973682403564,
      "learning_rate": 0.00014224029014788884,
      "loss": 3.265,
      "step": 155812
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.758641004562378,
      "learning_rate": 0.00014223681086284758,
      "loss": 3.1345,
      "step": 155813
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1866097450256348,
      "learning_rate": 0.00014223333160713738,
      "loss": 2.9124,
      "step": 155814
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6781203746795654,
      "learning_rate": 0.00014222985238075908,
      "loss": 2.8558,
      "step": 155815
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7016005516052246,
      "learning_rate": 0.00014222637318371325,
      "loss": 2.7682,
      "step": 155816
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.547508478164673,
      "learning_rate": 0.0001422228940160004,
      "loss": 2.9199,
      "step": 155817
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.241072654724121,
      "learning_rate": 0.0001422194148776214,
      "loss": 2.9799,
      "step": 155818
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4460363388061523,
      "learning_rate": 0.00014221593576857678,
      "loss": 2.9415,
      "step": 155819
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2459452152252197,
      "learning_rate": 0.00014221245668886708,
      "loss": 2.9318,
      "step": 155820
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8729703426361084,
      "learning_rate": 0.0001422089776384932,
      "loss": 3.1562,
      "step": 155821
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.875579833984375,
      "learning_rate": 0.0001422054986174555,
      "loss": 2.6915,
      "step": 155822
    },
    {
      "epoch": 2.03,
      "grad_norm": 1.919621467590332,
      "learning_rate": 0.00014220201962575492,
      "loss": 2.9993,
      "step": 155823
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.0897130966186523,
      "learning_rate": 0.00014219854066339197,
      "loss": 2.8507,
      "step": 155824
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.089284896850586,
      "learning_rate": 0.00014219506173036725,
      "loss": 3.0733,
      "step": 155825
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2731051445007324,
      "learning_rate": 0.00014219158282668137,
      "loss": 2.9558,
      "step": 155826
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.111414670944214,
      "learning_rate": 0.00014218810395233514,
      "loss": 2.7202,
      "step": 155827
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.076382637023926,
      "learning_rate": 0.000142184625107329,
      "loss": 2.9258,
      "step": 155828
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.9537363052368164,
      "learning_rate": 0.00014218114629166388,
      "loss": 2.7565,
      "step": 155829
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8330392837524414,
      "learning_rate": 0.00014217766750534023,
      "loss": 3.1312,
      "step": 155830
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.284914016723633,
      "learning_rate": 0.00014217418874835875,
      "loss": 3.0018,
      "step": 155831
    },
    {
      "epoch": 2.03,
      "grad_norm": 6.667600154876709,
      "learning_rate": 0.00014217071002071993,
      "loss": 2.8727,
      "step": 155832
    },
    {
      "epoch": 2.03,
      "grad_norm": 5.052587985992432,
      "learning_rate": 0.00014216723132242469,
      "loss": 2.9756,
      "step": 155833
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.0034613609313965,
      "learning_rate": 0.00014216375265347343,
      "loss": 2.7414,
      "step": 155834
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8331408500671387,
      "learning_rate": 0.00014216027401386697,
      "loss": 2.819,
      "step": 155835
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7295796871185303,
      "learning_rate": 0.00014215679540360583,
      "loss": 2.8013,
      "step": 155836
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.1318817138671875,
      "learning_rate": 0.00014215331682269092,
      "loss": 2.8206,
      "step": 155837
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.263411283493042,
      "learning_rate": 0.0001421498382711225,
      "loss": 2.8497,
      "step": 155838
    },
    {
      "epoch": 2.03,
      "grad_norm": 5.443755626678467,
      "learning_rate": 0.0001421463597489015,
      "loss": 2.94,
      "step": 155839
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.769162178039551,
      "learning_rate": 0.00014214288125602835,
      "loss": 2.8499,
      "step": 155840
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5997092723846436,
      "learning_rate": 0.00014213940279250394,
      "loss": 3.0619,
      "step": 155841
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.025457382202148,
      "learning_rate": 0.00014213592435832867,
      "loss": 3.0033,
      "step": 155842
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.443696975708008,
      "learning_rate": 0.00014213244595350354,
      "loss": 2.9771,
      "step": 155843
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.049651145935059,
      "learning_rate": 0.00014212896757802873,
      "loss": 2.9487,
      "step": 155844
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.548924446105957,
      "learning_rate": 0.00014212548923190528,
      "loss": 3.0601,
      "step": 155845
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2272863388061523,
      "learning_rate": 0.00014212201091513352,
      "loss": 3.2294,
      "step": 155846
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.338192939758301,
      "learning_rate": 0.00014211853262771442,
      "loss": 2.9294,
      "step": 155847
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.000359535217285,
      "learning_rate": 0.0001421150543696483,
      "loss": 2.8347,
      "step": 155848
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.928395748138428,
      "learning_rate": 0.00014211157614093625,
      "loss": 2.9006,
      "step": 155849
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.750551223754883,
      "learning_rate": 0.00014210809794157838,
      "loss": 2.7908,
      "step": 155850
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.3395614624023438,
      "learning_rate": 0.00014210461977157572,
      "loss": 3.112,
      "step": 155851
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.084839105606079,
      "learning_rate": 0.0001421011416309287,
      "loss": 3.0181,
      "step": 155852
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.866971492767334,
      "learning_rate": 0.0001420976635196381,
      "loss": 2.9599,
      "step": 155853
    },
    {
      "epoch": 2.03,
      "grad_norm": 6.90555477142334,
      "learning_rate": 0.00014209418543770447,
      "loss": 2.9804,
      "step": 155854
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.5290515422821045,
      "learning_rate": 0.00014209070738512873,
      "loss": 3.0692,
      "step": 155855
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5987155437469482,
      "learning_rate": 0.00014208722936191108,
      "loss": 2.9982,
      "step": 155856
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7629940509796143,
      "learning_rate": 0.00014208375136805248,
      "loss": 3.2226,
      "step": 155857
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3382725715637207,
      "learning_rate": 0.00014208027340355343,
      "loss": 3.0409,
      "step": 155858
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.360869884490967,
      "learning_rate": 0.0001420767954684147,
      "loss": 2.833,
      "step": 155859
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.192747592926025,
      "learning_rate": 0.0001420733175626368,
      "loss": 2.6889,
      "step": 155860
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.402121067047119,
      "learning_rate": 0.00014206983968622069,
      "loss": 3.2084,
      "step": 155861
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5597152709960938,
      "learning_rate": 0.0001420663618391665,
      "loss": 2.9443,
      "step": 155862
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.1003379821777344,
      "learning_rate": 0.0001420628840214753,
      "loss": 2.9227,
      "step": 155863
    },
    {
      "epoch": 2.03,
      "grad_norm": 5.521399974822998,
      "learning_rate": 0.00014205940623314748,
      "loss": 2.9705,
      "step": 155864
    },
    {
      "epoch": 2.03,
      "grad_norm": 5.908773422241211,
      "learning_rate": 0.0001420559284741839,
      "loss": 2.8105,
      "step": 155865
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.699113130569458,
      "learning_rate": 0.00014205245074458495,
      "loss": 2.8582,
      "step": 155866
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.483806610107422,
      "learning_rate": 0.00014204897304435175,
      "loss": 3.0204,
      "step": 155867
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.95796537399292,
      "learning_rate": 0.00014204549537348432,
      "loss": 2.9498,
      "step": 155868
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.830892562866211,
      "learning_rate": 0.00014204201773198376,
      "loss": 2.7908,
      "step": 155869
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.486292600631714,
      "learning_rate": 0.00014203854011985047,
      "loss": 2.8935,
      "step": 155870
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.181363582611084,
      "learning_rate": 0.00014203506253708528,
      "loss": 3.0221,
      "step": 155871
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.920097827911377,
      "learning_rate": 0.00014203158498368867,
      "loss": 2.8155,
      "step": 155872
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5213563442230225,
      "learning_rate": 0.00014202810745966147,
      "loss": 2.9121,
      "step": 155873
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7021610736846924,
      "learning_rate": 0.00014202462996500421,
      "loss": 2.9461,
      "step": 155874
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5742197036743164,
      "learning_rate": 0.0001420211524997176,
      "loss": 2.8405,
      "step": 155875
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.217772960662842,
      "learning_rate": 0.00014201767506380206,
      "loss": 2.9172,
      "step": 155876
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.248739004135132,
      "learning_rate": 0.0001420141976572586,
      "loss": 3.2503,
      "step": 155877
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.095693588256836,
      "learning_rate": 0.00014201072028008753,
      "loss": 2.9476,
      "step": 155878
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6998515129089355,
      "learning_rate": 0.00014200724293228977,
      "loss": 2.7206,
      "step": 155879
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2687885761260986,
      "learning_rate": 0.00014200376561386582,
      "loss": 3.0015,
      "step": 155880
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.242657423019409,
      "learning_rate": 0.0001420002883248164,
      "loss": 2.9486,
      "step": 155881
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8418004512786865,
      "learning_rate": 0.00014199681106514197,
      "loss": 2.6717,
      "step": 155882
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3408820629119873,
      "learning_rate": 0.00014199333383484343,
      "loss": 2.8963,
      "step": 155883
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.452862024307251,
      "learning_rate": 0.00014198985663392119,
      "loss": 3.1206,
      "step": 155884
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7152528762817383,
      "learning_rate": 0.00014198637946237616,
      "loss": 2.8295,
      "step": 155885
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.405029296875,
      "learning_rate": 0.00014198290232020884,
      "loss": 2.926,
      "step": 155886
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1934473514556885,
      "learning_rate": 0.00014197942520741984,
      "loss": 2.8368,
      "step": 155887
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.75193190574646,
      "learning_rate": 0.00014197594812400977,
      "loss": 2.844,
      "step": 155888
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1740808486938477,
      "learning_rate": 0.00014197247106997944,
      "loss": 2.9602,
      "step": 155889
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5567679405212402,
      "learning_rate": 0.00014196899404532933,
      "loss": 3.1033,
      "step": 155890
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.372166872024536,
      "learning_rate": 0.00014196551705006027,
      "loss": 2.7733,
      "step": 155891
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2515459060668945,
      "learning_rate": 0.0001419620400841728,
      "loss": 2.963,
      "step": 155892
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.736060619354248,
      "learning_rate": 0.00014195856314766755,
      "loss": 3.1332,
      "step": 155893
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7248780727386475,
      "learning_rate": 0.00014195508624054512,
      "loss": 2.8207,
      "step": 155894
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1873459815979004,
      "learning_rate": 0.0001419516093628063,
      "loss": 3.1071,
      "step": 155895
    },
    {
      "epoch": 2.03,
      "grad_norm": 1.8689690828323364,
      "learning_rate": 0.00014194813251445156,
      "loss": 3.0513,
      "step": 155896
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.217729330062866,
      "learning_rate": 0.00014194465569548174,
      "loss": 2.7818,
      "step": 155897
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.198516607284546,
      "learning_rate": 0.00014194117890589742,
      "loss": 2.9726,
      "step": 155898
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7476894855499268,
      "learning_rate": 0.0001419377021456991,
      "loss": 2.8872,
      "step": 155899
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3949451446533203,
      "learning_rate": 0.00014193422541488766,
      "loss": 2.9498,
      "step": 155900
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7035999298095703,
      "learning_rate": 0.00014193074871346361,
      "loss": 3.0692,
      "step": 155901
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.149857759475708,
      "learning_rate": 0.0001419272720414275,
      "loss": 2.9135,
      "step": 155902
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.6162075996398926,
      "learning_rate": 0.00014192379539878025,
      "loss": 3.2155,
      "step": 155903
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.360724687576294,
      "learning_rate": 0.00014192031878552234,
      "loss": 2.9058,
      "step": 155904
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7863540649414062,
      "learning_rate": 0.00014191684220165427,
      "loss": 3.1458,
      "step": 155905
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.568514823913574,
      "learning_rate": 0.00014191336564717697,
      "loss": 2.7965,
      "step": 155906
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.524930238723755,
      "learning_rate": 0.000141909889122091,
      "loss": 2.9661,
      "step": 155907
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.611724853515625,
      "learning_rate": 0.0001419064126263968,
      "loss": 2.7786,
      "step": 155908
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1202197074890137,
      "learning_rate": 0.00014190293616009534,
      "loss": 3.1822,
      "step": 155909
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8320538997650146,
      "learning_rate": 0.00014189945972318708,
      "loss": 3.0133,
      "step": 155910
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.792053699493408,
      "learning_rate": 0.0001418959833156726,
      "loss": 2.887,
      "step": 155911
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.43137264251709,
      "learning_rate": 0.00014189250693755274,
      "loss": 2.7944,
      "step": 155912
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.507181167602539,
      "learning_rate": 0.00014188903058882793,
      "loss": 3.0075,
      "step": 155913
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.44992733001709,
      "learning_rate": 0.00014188555426949905,
      "loss": 2.7953,
      "step": 155914
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7773053646087646,
      "learning_rate": 0.00014188207797956665,
      "loss": 3.0865,
      "step": 155915
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.663126230239868,
      "learning_rate": 0.00014187860171903133,
      "loss": 2.8775,
      "step": 155916
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5025413036346436,
      "learning_rate": 0.00014187512548789367,
      "loss": 3.0824,
      "step": 155917
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4232943058013916,
      "learning_rate": 0.0001418716492861545,
      "loss": 3.0088,
      "step": 155918
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4322879314422607,
      "learning_rate": 0.0001418681731138143,
      "loss": 3.0009,
      "step": 155919
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.2684502601623535,
      "learning_rate": 0.00014186469697087385,
      "loss": 2.5889,
      "step": 155920
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.569953203201294,
      "learning_rate": 0.00014186122085733369,
      "loss": 2.9174,
      "step": 155921
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6221764087677,
      "learning_rate": 0.00014185774477319468,
      "loss": 2.7375,
      "step": 155922
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4653942584991455,
      "learning_rate": 0.0001418542687184571,
      "loss": 3.2042,
      "step": 155923
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.5046377182006836,
      "learning_rate": 0.00014185079269312192,
      "loss": 3.0077,
      "step": 155924
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9234349727630615,
      "learning_rate": 0.00014184731669718954,
      "loss": 2.8741,
      "step": 155925
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.894001007080078,
      "learning_rate": 0.00014184384073066082,
      "loss": 2.8927,
      "step": 155926
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6765339374542236,
      "learning_rate": 0.00014184036479353626,
      "loss": 2.9658,
      "step": 155927
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6542248725891113,
      "learning_rate": 0.00014183688888581673,
      "loss": 2.7496,
      "step": 155928
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.989823818206787,
      "learning_rate": 0.0001418334130075025,
      "loss": 2.9062,
      "step": 155929
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3173227310180664,
      "learning_rate": 0.00014182993715859454,
      "loss": 2.8364,
      "step": 155930
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3367605209350586,
      "learning_rate": 0.0001418264613390933,
      "loss": 2.8994,
      "step": 155931
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.353879928588867,
      "learning_rate": 0.00014182298554899959,
      "loss": 3.0114,
      "step": 155932
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.041753053665161,
      "learning_rate": 0.00014181950978831384,
      "loss": 2.9871,
      "step": 155933
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3934361934661865,
      "learning_rate": 0.0001418160340570371,
      "loss": 2.9176,
      "step": 155934
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5950210094451904,
      "learning_rate": 0.00014181255835516948,
      "loss": 3.0665,
      "step": 155935
    },
    {
      "epoch": 2.03,
      "grad_norm": 6.166100978851318,
      "learning_rate": 0.000141809082682712,
      "loss": 2.6231,
      "step": 155936
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.1168341636657715,
      "learning_rate": 0.0001418056070396651,
      "loss": 2.8801,
      "step": 155937
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.85825514793396,
      "learning_rate": 0.00014180213142602963,
      "loss": 2.8377,
      "step": 155938
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.0539722442626953,
      "learning_rate": 0.00014179865584180605,
      "loss": 2.8581,
      "step": 155939
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.5977611541748047,
      "learning_rate": 0.00014179518028699517,
      "loss": 2.8287,
      "step": 155940
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.145053386688232,
      "learning_rate": 0.00014179170476159755,
      "loss": 2.8528,
      "step": 155941
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.731483221054077,
      "learning_rate": 0.00014178822926561385,
      "loss": 2.8699,
      "step": 155942
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.230104446411133,
      "learning_rate": 0.00014178475379904458,
      "loss": 3.0308,
      "step": 155943
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.5831429958343506,
      "learning_rate": 0.00014178127836189064,
      "loss": 2.8898,
      "step": 155944
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.2985589504241943,
      "learning_rate": 0.0001417778029541524,
      "loss": 2.8976,
      "step": 155945
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.957247495651245,
      "learning_rate": 0.0001417743275758308,
      "loss": 2.8638,
      "step": 155946
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.55875301361084,
      "learning_rate": 0.00014177085222692636,
      "loss": 2.7233,
      "step": 155947
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4838480949401855,
      "learning_rate": 0.00014176737690743967,
      "loss": 2.9997,
      "step": 155948
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.2201900482177734,
      "learning_rate": 0.00014176390161737128,
      "loss": 3.0609,
      "step": 155949
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5456349849700928,
      "learning_rate": 0.00014176042635672212,
      "loss": 2.8064,
      "step": 155950
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4462196826934814,
      "learning_rate": 0.00014175695112549254,
      "loss": 3.0842,
      "step": 155951
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.357264995574951,
      "learning_rate": 0.0001417534759236835,
      "loss": 2.5639,
      "step": 155952
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2369394302368164,
      "learning_rate": 0.00014175000075129538,
      "loss": 2.8135,
      "step": 155953
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.646790027618408,
      "learning_rate": 0.000141746525608329,
      "loss": 2.8375,
      "step": 155954
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2588212490081787,
      "learning_rate": 0.00014174305049478481,
      "loss": 3.0476,
      "step": 155955
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.015294313430786,
      "learning_rate": 0.00014173957541066364,
      "loss": 3.0466,
      "step": 155956
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.7273759841918945,
      "learning_rate": 0.000141736100355966,
      "loss": 2.8251,
      "step": 155957
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.309854745864868,
      "learning_rate": 0.0001417326253306927,
      "loss": 3.3695,
      "step": 155958
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4071755409240723,
      "learning_rate": 0.0001417291503348443,
      "loss": 3.0919,
      "step": 155959
    },
    {
      "epoch": 2.03,
      "grad_norm": 1.9930967092514038,
      "learning_rate": 0.00014172567536842141,
      "loss": 3.139,
      "step": 155960
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.494784355163574,
      "learning_rate": 0.00014172220043142463,
      "loss": 3.1091,
      "step": 155961
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1929097175598145,
      "learning_rate": 0.0001417187255238548,
      "loss": 2.8718,
      "step": 155962
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.364114761352539,
      "learning_rate": 0.00014171525064571232,
      "loss": 3.0576,
      "step": 155963
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2889299392700195,
      "learning_rate": 0.00014171177579699805,
      "loss": 2.8446,
      "step": 155964
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.474334716796875,
      "learning_rate": 0.00014170830097771258,
      "loss": 2.9149,
      "step": 155965
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.336148977279663,
      "learning_rate": 0.00014170482618785653,
      "loss": 2.9823,
      "step": 155966
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.529438018798828,
      "learning_rate": 0.0001417013514274304,
      "loss": 3.1416,
      "step": 155967
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.485072135925293,
      "learning_rate": 0.0001416978766964351,
      "loss": 2.9569,
      "step": 155968
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6177749633789062,
      "learning_rate": 0.00014169440199487105,
      "loss": 2.7334,
      "step": 155969
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.584207773208618,
      "learning_rate": 0.0001416909273227391,
      "loss": 3.0608,
      "step": 155970
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.080970525741577,
      "learning_rate": 0.00014168745268003982,
      "loss": 2.9037,
      "step": 155971
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.10087251663208,
      "learning_rate": 0.0001416839780667738,
      "loss": 2.7394,
      "step": 155972
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.4043006896972656,
      "learning_rate": 0.0001416805034829416,
      "loss": 2.8704,
      "step": 155973
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.682771921157837,
      "learning_rate": 0.00014167702892854412,
      "loss": 2.8326,
      "step": 155974
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.451174020767212,
      "learning_rate": 0.00014167355440358173,
      "loss": 2.6979,
      "step": 155975
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7337286472320557,
      "learning_rate": 0.00014167007990805534,
      "loss": 2.8644,
      "step": 155976
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.453671455383301,
      "learning_rate": 0.00014166660544196548,
      "loss": 3.1401,
      "step": 155977
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6255218982696533,
      "learning_rate": 0.0001416631310053128,
      "loss": 3.1326,
      "step": 155978
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.054378032684326,
      "learning_rate": 0.00014165965659809779,
      "loss": 2.8539,
      "step": 155979
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.0982069969177246,
      "learning_rate": 0.0001416561822203214,
      "loss": 2.9761,
      "step": 155980
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.430023670196533,
      "learning_rate": 0.0001416527078719839,
      "loss": 3.1838,
      "step": 155981
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.125420093536377,
      "learning_rate": 0.00014164923355308636,
      "loss": 2.959,
      "step": 155982
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.2565770149230957,
      "learning_rate": 0.00014164575926362919,
      "loss": 2.9035,
      "step": 155983
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4769179821014404,
      "learning_rate": 0.00014164228500361292,
      "loss": 2.8442,
      "step": 155984
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.453127145767212,
      "learning_rate": 0.0001416388107730385,
      "loss": 3.1467,
      "step": 155985
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8344359397888184,
      "learning_rate": 0.0001416353365719064,
      "loss": 3.0304,
      "step": 155986
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.227721691131592,
      "learning_rate": 0.00014163186240021716,
      "loss": 3.1951,
      "step": 155987
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.268934488296509,
      "learning_rate": 0.00014162838825797162,
      "loss": 3.2052,
      "step": 155988
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6231722831726074,
      "learning_rate": 0.0001416249141451704,
      "loss": 3.0461,
      "step": 155989
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.530205726623535,
      "learning_rate": 0.00014162144006181398,
      "loss": 2.9666,
      "step": 155990
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3808610439300537,
      "learning_rate": 0.00014161796600790322,
      "loss": 3.1258,
      "step": 155991
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2482898235321045,
      "learning_rate": 0.0001416144919834387,
      "loss": 3.0509,
      "step": 155992
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1128110885620117,
      "learning_rate": 0.0001416110179884209,
      "loss": 3.1808,
      "step": 155993
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.086164712905884,
      "learning_rate": 0.00014160754402285072,
      "loss": 2.9106,
      "step": 155994
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4161412715911865,
      "learning_rate": 0.0001416040700867287,
      "loss": 2.6542,
      "step": 155995
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4148428440093994,
      "learning_rate": 0.00014160059618005533,
      "loss": 2.8172,
      "step": 155996
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.0210416316986084,
      "learning_rate": 0.00014159712230283152,
      "loss": 2.9883,
      "step": 155997
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.650123119354248,
      "learning_rate": 0.0001415936484550577,
      "loss": 3.1277,
      "step": 155998
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.143083095550537,
      "learning_rate": 0.0001415901746367347,
      "loss": 2.9023,
      "step": 155999
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.338470697402954,
      "learning_rate": 0.0001415867008478631,
      "loss": 3.2501,
      "step": 156000
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.094944000244141,
      "learning_rate": 0.0001415832270884435,
      "loss": 2.9279,
      "step": 156001
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.400184392929077,
      "learning_rate": 0.00014157975335847648,
      "loss": 2.918,
      "step": 156002
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.365187883377075,
      "learning_rate": 0.00014157627965796289,
      "loss": 2.9488,
      "step": 156003
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.5664072036743164,
      "learning_rate": 0.0001415728059869031,
      "loss": 3.1237,
      "step": 156004
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.262307643890381,
      "learning_rate": 0.0001415693323452981,
      "loss": 3.0673,
      "step": 156005
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.757356882095337,
      "learning_rate": 0.0001415658587331483,
      "loss": 3.0941,
      "step": 156006
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4816415309906006,
      "learning_rate": 0.0001415623851504543,
      "loss": 2.871,
      "step": 156007
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4002575874328613,
      "learning_rate": 0.00014155891159721695,
      "loss": 3.1591,
      "step": 156008
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3415627479553223,
      "learning_rate": 0.0001415554380734368,
      "loss": 2.9134,
      "step": 156009
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.259307622909546,
      "learning_rate": 0.00014155196457911435,
      "loss": 2.9681,
      "step": 156010
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.495000123977661,
      "learning_rate": 0.0001415484911142505,
      "loss": 2.8579,
      "step": 156011
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7785043716430664,
      "learning_rate": 0.00014154501767884567,
      "loss": 3.0371,
      "step": 156012
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.3554577827453613,
      "learning_rate": 0.00014154154427290075,
      "loss": 2.8372,
      "step": 156013
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.564950466156006,
      "learning_rate": 0.0001415380708964162,
      "loss": 3.0262,
      "step": 156014
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.898655414581299,
      "learning_rate": 0.00014153459754939274,
      "loss": 2.9413,
      "step": 156015
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9432032108306885,
      "learning_rate": 0.00014153112423183087,
      "loss": 3.0689,
      "step": 156016
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.20246958732605,
      "learning_rate": 0.0001415276509437315,
      "loss": 2.9889,
      "step": 156017
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.988471746444702,
      "learning_rate": 0.00014152417768509499,
      "loss": 3.0775,
      "step": 156018
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.410142183303833,
      "learning_rate": 0.00014152070445592224,
      "loss": 3.1407,
      "step": 156019
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.487424373626709,
      "learning_rate": 0.00014151723125621377,
      "loss": 2.9229,
      "step": 156020
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8102903366088867,
      "learning_rate": 0.00014151375808597029,
      "loss": 2.6509,
      "step": 156021
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8051950931549072,
      "learning_rate": 0.00014151028494519224,
      "loss": 2.8643,
      "step": 156022
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6596503257751465,
      "learning_rate": 0.0001415068118338805,
      "loss": 2.72,
      "step": 156023
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.219960927963257,
      "learning_rate": 0.0001415033387520356,
      "loss": 2.924,
      "step": 156024
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6440863609313965,
      "learning_rate": 0.0001414998656996583,
      "loss": 2.9311,
      "step": 156025
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4595983028411865,
      "learning_rate": 0.00014149639267674915,
      "loss": 2.707,
      "step": 156026
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2331180572509766,
      "learning_rate": 0.00014149291968330882,
      "loss": 3.1125,
      "step": 156027
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1408579349517822,
      "learning_rate": 0.00014148944671933783,
      "loss": 3.0786,
      "step": 156028
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.185736656188965,
      "learning_rate": 0.00014148597378483706,
      "loss": 3.0303,
      "step": 156029
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.94460129737854,
      "learning_rate": 0.00014148250087980693,
      "loss": 2.8791,
      "step": 156030
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.729149580001831,
      "learning_rate": 0.00014147902800424835,
      "loss": 3.2842,
      "step": 156031
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9884488582611084,
      "learning_rate": 0.00014147555515816175,
      "loss": 2.8025,
      "step": 156032
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3783321380615234,
      "learning_rate": 0.00014147208234154785,
      "loss": 3.0158,
      "step": 156033
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8798937797546387,
      "learning_rate": 0.0001414686095544072,
      "loss": 3.2667,
      "step": 156034
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.241943359375,
      "learning_rate": 0.0001414651367967406,
      "loss": 3.0869,
      "step": 156035
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3575780391693115,
      "learning_rate": 0.00014146166406854853,
      "loss": 2.9268,
      "step": 156036
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.869293689727783,
      "learning_rate": 0.00014145819136983183,
      "loss": 2.9285,
      "step": 156037
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.1354715824127197,
      "learning_rate": 0.00014145471870059107,
      "loss": 2.7578,
      "step": 156038
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.855997085571289,
      "learning_rate": 0.00014145124606082686,
      "loss": 2.9815,
      "step": 156039
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3368401527404785,
      "learning_rate": 0.00014144777345053973,
      "loss": 2.9494,
      "step": 156040
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6986794471740723,
      "learning_rate": 0.0001414443008697306,
      "loss": 2.9759,
      "step": 156041
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3147175312042236,
      "learning_rate": 0.0001414408283183998,
      "loss": 2.9076,
      "step": 156042
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.467834234237671,
      "learning_rate": 0.0001414373557965483,
      "loss": 2.7208,
      "step": 156043
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7253637313842773,
      "learning_rate": 0.0001414338833041766,
      "loss": 3.1895,
      "step": 156044
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.143336534500122,
      "learning_rate": 0.0001414304108412853,
      "loss": 3.1238,
      "step": 156045
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3976175785064697,
      "learning_rate": 0.00014142693840787495,
      "loss": 2.95,
      "step": 156046
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9019968509674072,
      "learning_rate": 0.00014142346600394648,
      "loss": 3.0405,
      "step": 156047
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9206552505493164,
      "learning_rate": 0.00014141999362950023,
      "loss": 3.0373,
      "step": 156048
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4836297035217285,
      "learning_rate": 0.00014141652128453713,
      "loss": 2.9973,
      "step": 156049
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.778764247894287,
      "learning_rate": 0.0001414130489690577,
      "loss": 3.045,
      "step": 156050
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.907299280166626,
      "learning_rate": 0.0001414095766830626,
      "loss": 3.0427,
      "step": 156051
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.39424729347229,
      "learning_rate": 0.0001414061044265523,
      "loss": 2.9018,
      "step": 156052
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.236569404602051,
      "learning_rate": 0.0001414026321995277,
      "loss": 3.1313,
      "step": 156053
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6683754920959473,
      "learning_rate": 0.00014139916000198928,
      "loss": 2.9409,
      "step": 156054
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5202763080596924,
      "learning_rate": 0.00014139568783393783,
      "loss": 2.8829,
      "step": 156055
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.146644115447998,
      "learning_rate": 0.00014139221569537392,
      "loss": 3.0267,
      "step": 156056
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.472442388534546,
      "learning_rate": 0.0001413887435862982,
      "loss": 2.9045,
      "step": 156057
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2337613105773926,
      "learning_rate": 0.00014138527150671119,
      "loss": 3.2648,
      "step": 156058
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6006593704223633,
      "learning_rate": 0.00014138179945661375,
      "loss": 3.0302,
      "step": 156059
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1686503887176514,
      "learning_rate": 0.00014137832743600634,
      "loss": 2.9082,
      "step": 156060
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.705192804336548,
      "learning_rate": 0.0001413748554448898,
      "loss": 3.1056,
      "step": 156061
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.1901466846466064,
      "learning_rate": 0.00014137138348326465,
      "loss": 2.9516,
      "step": 156062
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.315009355545044,
      "learning_rate": 0.00014136791155113155,
      "loss": 3.0437,
      "step": 156063
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3802478313446045,
      "learning_rate": 0.00014136443964849109,
      "loss": 3.1193,
      "step": 156064
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.378763437271118,
      "learning_rate": 0.00014136096777534404,
      "loss": 2.8537,
      "step": 156065
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.162889003753662,
      "learning_rate": 0.00014135749593169086,
      "loss": 3.068,
      "step": 156066
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.1992194652557373,
      "learning_rate": 0.00014135402411753246,
      "loss": 2.8952,
      "step": 156067
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.0434465408325195,
      "learning_rate": 0.00014135055233286935,
      "loss": 3.0707,
      "step": 156068
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.214452028274536,
      "learning_rate": 0.00014134708057770214,
      "loss": 3.0956,
      "step": 156069
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.4760937690734863,
      "learning_rate": 0.0001413436088520314,
      "loss": 2.9243,
      "step": 156070
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.0584933757781982,
      "learning_rate": 0.00014134013715585799,
      "loss": 3.0549,
      "step": 156071
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3373167514801025,
      "learning_rate": 0.0001413366654891823,
      "loss": 2.7413,
      "step": 156072
    },
    {
      "epoch": 2.03,
      "grad_norm": 1.9772891998291016,
      "learning_rate": 0.00014133319385200527,
      "loss": 3.0237,
      "step": 156073
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1794445514678955,
      "learning_rate": 0.0001413297222443273,
      "loss": 2.9524,
      "step": 156074
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2364742755889893,
      "learning_rate": 0.00014132625066614918,
      "loss": 2.8879,
      "step": 156075
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.601891279220581,
      "learning_rate": 0.00014132277911747156,
      "loss": 3.0959,
      "step": 156076
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.734314203262329,
      "learning_rate": 0.000141319307598295,
      "loss": 2.8514,
      "step": 156077
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.674522638320923,
      "learning_rate": 0.0001413158361086201,
      "loss": 2.8055,
      "step": 156078
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.2182869911193848,
      "learning_rate": 0.00014131236464844763,
      "loss": 2.9346,
      "step": 156079
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.597353935241699,
      "learning_rate": 0.00014130889321777815,
      "loss": 2.8007,
      "step": 156080
    },
    {
      "epoch": 2.03,
      "grad_norm": 1.9653675556182861,
      "learning_rate": 0.0001413054218166124,
      "loss": 3.0632,
      "step": 156081
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5429840087890625,
      "learning_rate": 0.00014130195044495097,
      "loss": 3.0673,
      "step": 156082
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2262797355651855,
      "learning_rate": 0.0001412984791027944,
      "loss": 2.7706,
      "step": 156083
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.787236452102661,
      "learning_rate": 0.00014129500779014358,
      "loss": 2.9982,
      "step": 156084
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.892894744873047,
      "learning_rate": 0.00014129153650699897,
      "loss": 3.2035,
      "step": 156085
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.169175386428833,
      "learning_rate": 0.0001412880652533612,
      "loss": 2.814,
      "step": 156086
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.302645444869995,
      "learning_rate": 0.000141284594029231,
      "loss": 3.0055,
      "step": 156087
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5019547939300537,
      "learning_rate": 0.00014128112283460907,
      "loss": 2.9341,
      "step": 156088
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.538599729537964,
      "learning_rate": 0.00014127765166949583,
      "loss": 3.1892,
      "step": 156089
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3965742588043213,
      "learning_rate": 0.00014127418053389221,
      "loss": 3.0134,
      "step": 156090
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7941298484802246,
      "learning_rate": 0.00014127070942779869,
      "loss": 3.012,
      "step": 156091
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.361143112182617,
      "learning_rate": 0.00014126723835121584,
      "loss": 2.9517,
      "step": 156092
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7871856689453125,
      "learning_rate": 0.0001412637673041445,
      "loss": 3.0601,
      "step": 156093
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.195457696914673,
      "learning_rate": 0.00014126029628658526,
      "loss": 2.6508,
      "step": 156094
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6095690727233887,
      "learning_rate": 0.00014125682529853858,
      "loss": 2.8992,
      "step": 156095
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1407198905944824,
      "learning_rate": 0.00014125335434000537,
      "loss": 3.027,
      "step": 156096
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5306754112243652,
      "learning_rate": 0.00014124988341098605,
      "loss": 2.9552,
      "step": 156097
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.629913568496704,
      "learning_rate": 0.00014124641251148147,
      "loss": 2.9062,
      "step": 156098
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3291845321655273,
      "learning_rate": 0.0001412429416414922,
      "loss": 3.0621,
      "step": 156099
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7145395278930664,
      "learning_rate": 0.00014123947080101884,
      "loss": 3.0467,
      "step": 156100
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.435257911682129,
      "learning_rate": 0.00014123599999006197,
      "loss": 3.0769,
      "step": 156101
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7366225719451904,
      "learning_rate": 0.00014123252920862246,
      "loss": 2.952,
      "step": 156102
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8791451454162598,
      "learning_rate": 0.00014122905845670067,
      "loss": 3.105,
      "step": 156103
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2840042114257812,
      "learning_rate": 0.0001412255877342975,
      "loss": 2.9373,
      "step": 156104
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.203359842300415,
      "learning_rate": 0.00014122211704141352,
      "loss": 3.0662,
      "step": 156105
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.0327703952789307,
      "learning_rate": 0.00014121864637804934,
      "loss": 3.0482,
      "step": 156106
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3281824588775635,
      "learning_rate": 0.00014121517574420552,
      "loss": 3.1116,
      "step": 156107
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.0989558696746826,
      "learning_rate": 0.00014121170513988284,
      "loss": 3.0275,
      "step": 156108
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6504766941070557,
      "learning_rate": 0.00014120823456508183,
      "loss": 2.7112,
      "step": 156109
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.318103551864624,
      "learning_rate": 0.00014120476401980334,
      "loss": 3.0698,
      "step": 156110
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2556393146514893,
      "learning_rate": 0.00014120129350404787,
      "loss": 3.0837,
      "step": 156111
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.178314208984375,
      "learning_rate": 0.00014119782301781606,
      "loss": 2.9187,
      "step": 156112
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6111395359039307,
      "learning_rate": 0.00014119435256110846,
      "loss": 2.9632,
      "step": 156113
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6020853519439697,
      "learning_rate": 0.00014119088213392596,
      "loss": 2.924,
      "step": 156114
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.517045259475708,
      "learning_rate": 0.00014118741173626894,
      "loss": 2.8041,
      "step": 156115
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.804216146469116,
      "learning_rate": 0.00014118394136813827,
      "loss": 2.8823,
      "step": 156116
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.0551750659942627,
      "learning_rate": 0.00014118047102953458,
      "loss": 2.8917,
      "step": 156117
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.3802969455718994,
      "learning_rate": 0.00014117700072045837,
      "loss": 3.0117,
      "step": 156118
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.2708301544189453,
      "learning_rate": 0.00014117353044091024,
      "loss": 2.7586,
      "step": 156119
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.749451160430908,
      "learning_rate": 0.0001411700601908911,
      "loss": 2.8504,
      "step": 156120
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.192307472229004,
      "learning_rate": 0.0001411665899704013,
      "loss": 3.0419,
      "step": 156121
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.072402238845825,
      "learning_rate": 0.00014116311977944177,
      "loss": 2.9563,
      "step": 156122
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7728803157806396,
      "learning_rate": 0.000141159649618013,
      "loss": 2.6621,
      "step": 156123
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7865052223205566,
      "learning_rate": 0.00014115617948611564,
      "loss": 2.8547,
      "step": 156124
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.924131393432617,
      "learning_rate": 0.00014115270938375025,
      "loss": 2.918,
      "step": 156125
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7889370918273926,
      "learning_rate": 0.00014114923931091768,
      "loss": 2.8606,
      "step": 156126
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.061640977859497,
      "learning_rate": 0.00014114576926761835,
      "loss": 2.7779,
      "step": 156127
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.0365347862243652,
      "learning_rate": 0.00014114229925385311,
      "loss": 2.9928,
      "step": 156128
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4398937225341797,
      "learning_rate": 0.00014113882926962255,
      "loss": 2.9108,
      "step": 156129
    },
    {
      "epoch": 2.03,
      "grad_norm": 1.9953008890151978,
      "learning_rate": 0.00014113535931492725,
      "loss": 3.078,
      "step": 156130
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8639612197875977,
      "learning_rate": 0.0001411318893897678,
      "loss": 2.9051,
      "step": 156131
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7231605052948,
      "learning_rate": 0.00014112841949414504,
      "loss": 2.9511,
      "step": 156132
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.398277759552002,
      "learning_rate": 0.0001411249496280594,
      "loss": 2.9551,
      "step": 156133
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.81769061088562,
      "learning_rate": 0.0001411214797915117,
      "loss": 2.881,
      "step": 156134
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.572368621826172,
      "learning_rate": 0.00014111800998450254,
      "loss": 2.9559,
      "step": 156135
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.918039321899414,
      "learning_rate": 0.00014111454020703253,
      "loss": 2.8096,
      "step": 156136
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2172274589538574,
      "learning_rate": 0.00014111107045910223,
      "loss": 2.8354,
      "step": 156137
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.224374771118164,
      "learning_rate": 0.00014110760074071248,
      "loss": 3.0426,
      "step": 156138
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5145959854125977,
      "learning_rate": 0.00014110413105186375,
      "loss": 3.0376,
      "step": 156139
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.710001230239868,
      "learning_rate": 0.00014110066139255687,
      "loss": 2.9348,
      "step": 156140
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.759765625,
      "learning_rate": 0.00014109719176279222,
      "loss": 3.189,
      "step": 156141
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.244825601577759,
      "learning_rate": 0.00014109372216257086,
      "loss": 3.0643,
      "step": 156142
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.3249542713165283,
      "learning_rate": 0.00014109025259189295,
      "loss": 2.9091,
      "step": 156143
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1369569301605225,
      "learning_rate": 0.00014108678305075944,
      "loss": 2.8198,
      "step": 156144
    },
    {
      "epoch": 2.03,
      "grad_norm": 1.9810922145843506,
      "learning_rate": 0.00014108331353917083,
      "loss": 3.0069,
      "step": 156145
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.271521806716919,
      "learning_rate": 0.0001410798440571279,
      "loss": 2.7973,
      "step": 156146
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6570615768432617,
      "learning_rate": 0.00014107637460463116,
      "loss": 2.9868,
      "step": 156147
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9221761226654053,
      "learning_rate": 0.00014107290518168152,
      "loss": 2.5901,
      "step": 156148
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.781315326690674,
      "learning_rate": 0.0001410694357882792,
      "loss": 3.2118,
      "step": 156149
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8141465187072754,
      "learning_rate": 0.0001410659664244252,
      "loss": 2.9547,
      "step": 156150
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.476316452026367,
      "learning_rate": 0.00014106249709011992,
      "loss": 2.9106,
      "step": 156151
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6059253215789795,
      "learning_rate": 0.0001410590277853642,
      "loss": 2.9901,
      "step": 156152
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.159036636352539,
      "learning_rate": 0.00014105555851015857,
      "loss": 2.7873,
      "step": 156153
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.777435779571533,
      "learning_rate": 0.00014105208926450387,
      "loss": 2.9436,
      "step": 156154
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6159422397613525,
      "learning_rate": 0.00014104862004840038,
      "loss": 2.9619,
      "step": 156155
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4161553382873535,
      "learning_rate": 0.0001410451508618491,
      "loss": 2.9229,
      "step": 156156
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.200610876083374,
      "learning_rate": 0.00014104168170485035,
      "loss": 3.0743,
      "step": 156157
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.86702561378479,
      "learning_rate": 0.0001410382125774051,
      "loss": 3.0411,
      "step": 156158
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2469582557678223,
      "learning_rate": 0.00014103474347951372,
      "loss": 2.7828,
      "step": 156159
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.686211347579956,
      "learning_rate": 0.00014103127441117714,
      "loss": 2.9791,
      "step": 156160
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7044832706451416,
      "learning_rate": 0.00014102780537239578,
      "loss": 3.0607,
      "step": 156161
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.347604513168335,
      "learning_rate": 0.0001410243363631704,
      "loss": 2.994,
      "step": 156162
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4064626693725586,
      "learning_rate": 0.00014102086738350148,
      "loss": 3.2046,
      "step": 156163
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.118617534637451,
      "learning_rate": 0.00014101739843338986,
      "loss": 2.8568,
      "step": 156164
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3412296772003174,
      "learning_rate": 0.00014101392951283603,
      "loss": 3.0258,
      "step": 156165
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.711308240890503,
      "learning_rate": 0.0001410104606218408,
      "loss": 2.9507,
      "step": 156166
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1893019676208496,
      "learning_rate": 0.00014100699176040476,
      "loss": 2.9705,
      "step": 156167
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8968238830566406,
      "learning_rate": 0.00014100352292852847,
      "loss": 2.7465,
      "step": 156168
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.1087052822113037,
      "learning_rate": 0.00014100005412621256,
      "loss": 2.8287,
      "step": 156169
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.522191286087036,
      "learning_rate": 0.00014099658535345785,
      "loss": 3.0641,
      "step": 156170
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7534539699554443,
      "learning_rate": 0.00014099311661026475,
      "loss": 2.9916,
      "step": 156171
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9424033164978027,
      "learning_rate": 0.00014098964789663416,
      "loss": 3.0728,
      "step": 156172
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.175079345703125,
      "learning_rate": 0.00014098617921256656,
      "loss": 3.0087,
      "step": 156173
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.359906196594238,
      "learning_rate": 0.00014098271055806256,
      "loss": 2.749,
      "step": 156174
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4876763820648193,
      "learning_rate": 0.00014097924193312296,
      "loss": 2.885,
      "step": 156175
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4797399044036865,
      "learning_rate": 0.00014097577333774833,
      "loss": 3.1458,
      "step": 156176
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.3276920318603516,
      "learning_rate": 0.0001409723047719392,
      "loss": 2.9425,
      "step": 156177
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.262246131896973,
      "learning_rate": 0.00014096883623569638,
      "loss": 3.0415,
      "step": 156178
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.021164894104004,
      "learning_rate": 0.00014096536772902052,
      "loss": 2.8042,
      "step": 156179
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5675182342529297,
      "learning_rate": 0.00014096189925191205,
      "loss": 3.0341,
      "step": 156180
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.119720458984375,
      "learning_rate": 0.0001409584308043719,
      "loss": 3.0362,
      "step": 156181
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.4251949787139893,
      "learning_rate": 0.0001409549623864005,
      "loss": 2.8161,
      "step": 156182
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.0569539070129395,
      "learning_rate": 0.00014095149399799862,
      "loss": 2.8365,
      "step": 156183
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4229867458343506,
      "learning_rate": 0.0001409480256391669,
      "loss": 3.0304,
      "step": 156184
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.325068712234497,
      "learning_rate": 0.00014094455730990593,
      "loss": 2.7383,
      "step": 156185
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2558155059814453,
      "learning_rate": 0.00014094108901021625,
      "loss": 2.929,
      "step": 156186
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.4865643978118896,
      "learning_rate": 0.00014093762074009875,
      "loss": 2.7967,
      "step": 156187
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.310291051864624,
      "learning_rate": 0.00014093415249955384,
      "loss": 2.8877,
      "step": 156188
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1776998043060303,
      "learning_rate": 0.0001409306842885824,
      "loss": 2.6636,
      "step": 156189
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.616373300552368,
      "learning_rate": 0.0001409272161071849,
      "loss": 2.885,
      "step": 156190
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.271063804626465,
      "learning_rate": 0.00014092374795536205,
      "loss": 2.931,
      "step": 156191
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.0845847129821777,
      "learning_rate": 0.0001409202798331144,
      "loss": 3.065,
      "step": 156192
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.893122673034668,
      "learning_rate": 0.00014091681174044276,
      "loss": 2.8764,
      "step": 156193
    },
    {
      "epoch": 2.03,
      "grad_norm": 1.9303072690963745,
      "learning_rate": 0.00014091334367734756,
      "loss": 3.1034,
      "step": 156194
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.438220500946045,
      "learning_rate": 0.0001409098756438297,
      "loss": 3.0903,
      "step": 156195
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.276763677597046,
      "learning_rate": 0.0001409064076398897,
      "loss": 2.8501,
      "step": 156196
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8752248287200928,
      "learning_rate": 0.00014090293966552818,
      "loss": 3.0185,
      "step": 156197
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1781673431396484,
      "learning_rate": 0.00014089947172074574,
      "loss": 3.0278,
      "step": 156198
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.64215087890625,
      "learning_rate": 0.00014089600380554318,
      "loss": 2.9725,
      "step": 156199
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.112222909927368,
      "learning_rate": 0.00014089253591992095,
      "loss": 3.1902,
      "step": 156200
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3758504390716553,
      "learning_rate": 0.0001408890680638799,
      "loss": 2.9963,
      "step": 156201
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1888222694396973,
      "learning_rate": 0.0001408856002374206,
      "loss": 2.86,
      "step": 156202
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4674320220947266,
      "learning_rate": 0.00014088213244054364,
      "loss": 2.9894,
      "step": 156203
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.908050060272217,
      "learning_rate": 0.0001408786646732496,
      "loss": 2.9687,
      "step": 156204
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9833672046661377,
      "learning_rate": 0.0001408751969355393,
      "loss": 3.1353,
      "step": 156205
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8438022136688232,
      "learning_rate": 0.00014087172922741322,
      "loss": 3.1912,
      "step": 156206
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.826655864715576,
      "learning_rate": 0.0001408682615488722,
      "loss": 2.9759,
      "step": 156207
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.032688617706299,
      "learning_rate": 0.00014086479389991666,
      "loss": 3.0414,
      "step": 156208
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2788503170013428,
      "learning_rate": 0.0001408613262805476,
      "loss": 2.8114,
      "step": 156209
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5213918685913086,
      "learning_rate": 0.00014085785869076518,
      "loss": 3.0747,
      "step": 156210
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3181536197662354,
      "learning_rate": 0.0001408543911305704,
      "loss": 2.7445,
      "step": 156211
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.2631213665008545,
      "learning_rate": 0.00014085092359996372,
      "loss": 2.7111,
      "step": 156212
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.4516096115112305,
      "learning_rate": 0.00014084745609894593,
      "loss": 3.1448,
      "step": 156213
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1686782836914062,
      "learning_rate": 0.00014084398862751748,
      "loss": 2.7561,
      "step": 156214
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.10797119140625,
      "learning_rate": 0.00014084052118567937,
      "loss": 2.9875,
      "step": 156215
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.0587422847747803,
      "learning_rate": 0.00014083705377343177,
      "loss": 3.0825,
      "step": 156216
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.24424409866333,
      "learning_rate": 0.0001408335863907757,
      "loss": 2.8598,
      "step": 156217
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.2247154712677,
      "learning_rate": 0.0001408301190377116,
      "loss": 2.8794,
      "step": 156218
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.3247451782226562,
      "learning_rate": 0.00014082665171424026,
      "loss": 3.0104,
      "step": 156219
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3762080669403076,
      "learning_rate": 0.00014082318442036214,
      "loss": 2.9128,
      "step": 156220
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2478363513946533,
      "learning_rate": 0.0001408197171560782,
      "loss": 3.1557,
      "step": 156221
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1094861030578613,
      "learning_rate": 0.00014081624992138867,
      "loss": 3.1431,
      "step": 156222
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9725394248962402,
      "learning_rate": 0.00014081278271629454,
      "loss": 3.1058,
      "step": 156223
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3389952182769775,
      "learning_rate": 0.00014080931554079614,
      "loss": 3.1219,
      "step": 156224
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.619699239730835,
      "learning_rate": 0.00014080584839489446,
      "loss": 2.9646,
      "step": 156225
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5485777854919434,
      "learning_rate": 0.00014080238127858985,
      "loss": 2.7534,
      "step": 156226
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3321003913879395,
      "learning_rate": 0.0001407989141918833,
      "loss": 2.7597,
      "step": 156227
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.783703565597534,
      "learning_rate": 0.00014079544713477504,
      "loss": 3.0188,
      "step": 156228
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.108668565750122,
      "learning_rate": 0.00014079198010726602,
      "loss": 2.899,
      "step": 156229
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6226537227630615,
      "learning_rate": 0.00014078851310935663,
      "loss": 2.9782,
      "step": 156230
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.921922206878662,
      "learning_rate": 0.0001407850461410478,
      "loss": 2.8973,
      "step": 156231
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3377490043640137,
      "learning_rate": 0.00014078157920233993,
      "loss": 2.9877,
      "step": 156232
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8202364444732666,
      "learning_rate": 0.00014077811229323397,
      "loss": 2.9718,
      "step": 156233
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.459134578704834,
      "learning_rate": 0.00014077464541373017,
      "loss": 2.8886,
      "step": 156234
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.501317024230957,
      "learning_rate": 0.00014077117856382942,
      "loss": 2.9831,
      "step": 156235
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5296528339385986,
      "learning_rate": 0.00014076771174353225,
      "loss": 2.8518,
      "step": 156236
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1686081886291504,
      "learning_rate": 0.00014076424495283952,
      "loss": 2.889,
      "step": 156237
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.309041738510132,
      "learning_rate": 0.00014076077819175154,
      "loss": 3.0365,
      "step": 156238
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3408758640289307,
      "learning_rate": 0.0001407573114602694,
      "loss": 2.9869,
      "step": 156239
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6003198623657227,
      "learning_rate": 0.00014075384475839323,
      "loss": 2.7477,
      "step": 156240
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.379075765609741,
      "learning_rate": 0.00014075037808612405,
      "loss": 2.6639,
      "step": 156241
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.4978816509246826,
      "learning_rate": 0.00014074691144346229,
      "loss": 2.9115,
      "step": 156242
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3007423877716064,
      "learning_rate": 0.00014074344483040876,
      "loss": 3.1657,
      "step": 156243
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.638566493988037,
      "learning_rate": 0.00014073997824696393,
      "loss": 2.8188,
      "step": 156244
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.179091453552246,
      "learning_rate": 0.00014073651169312868,
      "loss": 3.0699,
      "step": 156245
    },
    {
      "epoch": 2.03,
      "grad_norm": 1.992932677268982,
      "learning_rate": 0.00014073304516890348,
      "loss": 2.7576,
      "step": 156246
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.440335512161255,
      "learning_rate": 0.00014072957867428904,
      "loss": 2.9517,
      "step": 156247
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2061054706573486,
      "learning_rate": 0.00014072611220928587,
      "loss": 2.9392,
      "step": 156248
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.6539065837860107,
      "learning_rate": 0.0001407226457738948,
      "loss": 2.9714,
      "step": 156249
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.700855016708374,
      "learning_rate": 0.00014071917936811634,
      "loss": 2.7748,
      "step": 156250
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.595092535018921,
      "learning_rate": 0.00014071571299195126,
      "loss": 3.1931,
      "step": 156251
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.5571975708007812,
      "learning_rate": 0.00014071224664540015,
      "loss": 2.8575,
      "step": 156252
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.937871217727661,
      "learning_rate": 0.00014070878032846365,
      "loss": 2.9179,
      "step": 156253
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9325203895568848,
      "learning_rate": 0.00014070531404114227,
      "loss": 2.843,
      "step": 156254
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.418830156326294,
      "learning_rate": 0.00014070184778343687,
      "loss": 2.8089,
      "step": 156255
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.1026370525360107,
      "learning_rate": 0.00014069838155534792,
      "loss": 3.1868,
      "step": 156256
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.5122814178466797,
      "learning_rate": 0.00014069491535687628,
      "loss": 3.0858,
      "step": 156257
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.797372817993164,
      "learning_rate": 0.0001406914491880224,
      "loss": 2.9442,
      "step": 156258
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2980823516845703,
      "learning_rate": 0.00014068798304878692,
      "loss": 2.7034,
      "step": 156259
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6356585025787354,
      "learning_rate": 0.00014068451693917064,
      "loss": 2.9736,
      "step": 156260
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.066967725753784,
      "learning_rate": 0.00014068105085917416,
      "loss": 3.2126,
      "step": 156261
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.533759593963623,
      "learning_rate": 0.00014067758480879793,
      "loss": 2.7959,
      "step": 156262
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.8761956691741943,
      "learning_rate": 0.00014067411878804285,
      "loss": 3.0048,
      "step": 156263
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7818379402160645,
      "learning_rate": 0.00014067065279690948,
      "loss": 2.9244,
      "step": 156264
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.714745283126831,
      "learning_rate": 0.0001406671868353983,
      "loss": 2.9815,
      "step": 156265
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.227992534637451,
      "learning_rate": 0.00014066372090351025,
      "loss": 3.0033,
      "step": 156266
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.3579471111297607,
      "learning_rate": 0.0001406602550012457,
      "loss": 2.9717,
      "step": 156267
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6141512393951416,
      "learning_rate": 0.00014065678912860552,
      "loss": 3.0964,
      "step": 156268
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2089474201202393,
      "learning_rate": 0.00014065332328559027,
      "loss": 2.8925,
      "step": 156269
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.103283405303955,
      "learning_rate": 0.00014064985747220055,
      "loss": 3.0061,
      "step": 156270
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.2393929958343506,
      "learning_rate": 0.00014064639168843692,
      "loss": 3.2263,
      "step": 156271
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6513023376464844,
      "learning_rate": 0.00014064292593430022,
      "loss": 2.9266,
      "step": 156272
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.293375015258789,
      "learning_rate": 0.00014063946020979095,
      "loss": 2.908,
      "step": 156273
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.5185813903808594,
      "learning_rate": 0.00014063599451490992,
      "loss": 2.9976,
      "step": 156274
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7150425910949707,
      "learning_rate": 0.00014063252884965763,
      "loss": 3.1295,
      "step": 156275
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6201090812683105,
      "learning_rate": 0.00014062906321403478,
      "loss": 3.1927,
      "step": 156276
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.077892541885376,
      "learning_rate": 0.00014062559760804187,
      "loss": 2.8549,
      "step": 156277
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.2949416637420654,
      "learning_rate": 0.00014062213203167983,
      "loss": 2.8923,
      "step": 156278
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.258436918258667,
      "learning_rate": 0.00014061866648494898,
      "loss": 2.8401,
      "step": 156279
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.0374691486358643,
      "learning_rate": 0.00014061520096785024,
      "loss": 2.9852,
      "step": 156280
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.9865241050720215,
      "learning_rate": 0.00014061173548038405,
      "loss": 2.8229,
      "step": 156281
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.1040472984313965,
      "learning_rate": 0.0001406082700225514,
      "loss": 2.8726,
      "step": 156282
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.7229530811309814,
      "learning_rate": 0.0001406048045943524,
      "loss": 3.0251,
      "step": 156283
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.6127986907958984,
      "learning_rate": 0.00014060133919578812,
      "loss": 3.0918,
      "step": 156284
    },
    {
      "epoch": 2.03,
      "grad_norm": 2.464932441711426,
      "learning_rate": 0.00014059787382685894,
      "loss": 2.6083,
      "step": 156285
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.342725992202759,
      "learning_rate": 0.00014059440848756576,
      "loss": 3.0588,
      "step": 156286
    },
    {
      "epoch": 2.03,
      "grad_norm": 1.9671587944030762,
      "learning_rate": 0.00014059094317790894,
      "loss": 2.8913,
      "step": 156287
    },
    {
      "epoch": 2.03,
      "grad_norm": 1.8180954456329346,
      "learning_rate": 0.0001405874778978895,
      "loss": 3.0564,
      "step": 156288
    },
    {
      "epoch": 2.03,
      "grad_norm": 3.625129461288452,
      "learning_rate": 0.00014058401264750766,
      "loss": 3.2239,
      "step": 156289
    },
    {
      "epoch": 2.03,
      "grad_norm": 4.478658199310303,
      "learning_rate": 0.00014058054742676433,
      "loss": 2.7854,
      "step": 156290
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8027496337890625,
      "learning_rate": 0.00014057708223566003,
      "loss": 3.0082,
      "step": 156291
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1310462951660156,
      "learning_rate": 0.00014057361707419552,
      "loss": 2.8315,
      "step": 156292
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.463916063308716,
      "learning_rate": 0.0001405701519423713,
      "loss": 3.1409,
      "step": 156293
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9069230556488037,
      "learning_rate": 0.00014056668684018833,
      "loss": 3.1901,
      "step": 156294
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.647645950317383,
      "learning_rate": 0.0001405632217676468,
      "loss": 2.9214,
      "step": 156295
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.657195806503296,
      "learning_rate": 0.00014055975672474765,
      "loss": 2.7627,
      "step": 156296
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.273728609085083,
      "learning_rate": 0.00014055629171149138,
      "loss": 3.0137,
      "step": 156297
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9641013145446777,
      "learning_rate": 0.0001405528267278788,
      "loss": 2.8854,
      "step": 156298
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.985395908355713,
      "learning_rate": 0.00014054936177391035,
      "loss": 3.0512,
      "step": 156299
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.1870598793029785,
      "learning_rate": 0.000140545896849587,
      "loss": 2.7543,
      "step": 156300
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.0312395095825195,
      "learning_rate": 0.00014054243195490895,
      "loss": 3.1923,
      "step": 156301
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.226548910140991,
      "learning_rate": 0.0001405389670898772,
      "loss": 3.0641,
      "step": 156302
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.647738456726074,
      "learning_rate": 0.00014053550225449216,
      "loss": 3.0318,
      "step": 156303
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.652869939804077,
      "learning_rate": 0.00014053203744875467,
      "loss": 2.6564,
      "step": 156304
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.240859031677246,
      "learning_rate": 0.00014052857267266518,
      "loss": 3.0319,
      "step": 156305
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.754974126815796,
      "learning_rate": 0.00014052510792622467,
      "loss": 2.8575,
      "step": 156306
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.616253614425659,
      "learning_rate": 0.0001405216432094333,
      "loss": 2.8631,
      "step": 156307
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.746821880340576,
      "learning_rate": 0.0001405181785222921,
      "loss": 3.0437,
      "step": 156308
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.409748077392578,
      "learning_rate": 0.0001405147138648015,
      "loss": 2.9919,
      "step": 156309
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.577334403991699,
      "learning_rate": 0.0001405112492369623,
      "loss": 2.8714,
      "step": 156310
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.906254768371582,
      "learning_rate": 0.00014050778463877496,
      "loss": 2.9472,
      "step": 156311
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.062311887741089,
      "learning_rate": 0.00014050432007024048,
      "loss": 2.8974,
      "step": 156312
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.382117748260498,
      "learning_rate": 0.000140500855531359,
      "loss": 2.9103,
      "step": 156313
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.086129903793335,
      "learning_rate": 0.00014049739102213156,
      "loss": 2.8085,
      "step": 156314
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2235100269317627,
      "learning_rate": 0.00014049392654255852,
      "loss": 2.7802,
      "step": 156315
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.591108798980713,
      "learning_rate": 0.00014049046209264082,
      "loss": 2.9625,
      "step": 156316
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.236330986022949,
      "learning_rate": 0.00014048699767237882,
      "loss": 2.9373,
      "step": 156317
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9875173568725586,
      "learning_rate": 0.00014048353328177353,
      "loss": 2.9295,
      "step": 156318
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5734336376190186,
      "learning_rate": 0.00014048006892082514,
      "loss": 2.9868,
      "step": 156319
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.7240099906921387,
      "learning_rate": 0.00014047660458953463,
      "loss": 2.8111,
      "step": 156320
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.450531482696533,
      "learning_rate": 0.00014047314028790242,
      "loss": 3.0439,
      "step": 156321
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.676691055297852,
      "learning_rate": 0.00014046967601592938,
      "loss": 2.9314,
      "step": 156322
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.2869210243225098,
      "learning_rate": 0.00014046621177361593,
      "loss": 3.0134,
      "step": 156323
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2636377811431885,
      "learning_rate": 0.00014046274756096303,
      "loss": 2.9826,
      "step": 156324
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.531313180923462,
      "learning_rate": 0.0001404592833779709,
      "loss": 3.1046,
      "step": 156325
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.4928438663482666,
      "learning_rate": 0.00014045581922464049,
      "loss": 3.2059,
      "step": 156326
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.2075560092926025,
      "learning_rate": 0.00014045235510097228,
      "loss": 3.0984,
      "step": 156327
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.004044532775879,
      "learning_rate": 0.00014044889100696707,
      "loss": 2.9259,
      "step": 156328
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5666961669921875,
      "learning_rate": 0.00014044542694262532,
      "loss": 3.0598,
      "step": 156329
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.607036590576172,
      "learning_rate": 0.0001404419629079479,
      "loss": 2.8449,
      "step": 156330
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1387088298797607,
      "learning_rate": 0.00014043849890293532,
      "loss": 2.9977,
      "step": 156331
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.214647054672241,
      "learning_rate": 0.00014043503492758825,
      "loss": 3.0519,
      "step": 156332
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.339853286743164,
      "learning_rate": 0.0001404315709819072,
      "loss": 2.8599,
      "step": 156333
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.87385892868042,
      "learning_rate": 0.00014042810706589305,
      "loss": 3.2016,
      "step": 156334
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7769358158111572,
      "learning_rate": 0.00014042464317954619,
      "loss": 2.8661,
      "step": 156335
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.3641860485076904,
      "learning_rate": 0.00014042117932286754,
      "loss": 2.8667,
      "step": 156336
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.239422082901001,
      "learning_rate": 0.0001404177154958576,
      "loss": 2.9582,
      "step": 156337
    },
    {
      "epoch": 2.04,
      "grad_norm": 1.9965189695358276,
      "learning_rate": 0.000140414251698517,
      "loss": 3.2016,
      "step": 156338
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2185306549072266,
      "learning_rate": 0.00014041078793084632,
      "loss": 3.0034,
      "step": 156339
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3764944076538086,
      "learning_rate": 0.0001404073241928464,
      "loss": 3.0357,
      "step": 156340
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.208420753479004,
      "learning_rate": 0.00014040386048451764,
      "loss": 2.9934,
      "step": 156341
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.858790397644043,
      "learning_rate": 0.00014040039680586093,
      "loss": 2.8243,
      "step": 156342
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9452567100524902,
      "learning_rate": 0.0001403969331568768,
      "loss": 2.8215,
      "step": 156343
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.242650032043457,
      "learning_rate": 0.00014039346953756575,
      "loss": 3.0573,
      "step": 156344
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3613994121551514,
      "learning_rate": 0.00014039000594792873,
      "loss": 2.8646,
      "step": 156345
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.144315004348755,
      "learning_rate": 0.00014038654238796619,
      "loss": 2.9152,
      "step": 156346
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2875583171844482,
      "learning_rate": 0.0001403830788576787,
      "loss": 2.8874,
      "step": 156347
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.392024040222168,
      "learning_rate": 0.00014037961535706713,
      "loss": 2.994,
      "step": 156348
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6643476486206055,
      "learning_rate": 0.00014037615188613198,
      "loss": 3.0002,
      "step": 156349
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.0488181114196777,
      "learning_rate": 0.00014037268844487383,
      "loss": 2.7083,
      "step": 156350
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.377049446105957,
      "learning_rate": 0.0001403692250332935,
      "loss": 3.0596,
      "step": 156351
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.0097033977508545,
      "learning_rate": 0.00014036576165139158,
      "loss": 3.0331,
      "step": 156352
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4442331790924072,
      "learning_rate": 0.00014036229829916854,
      "loss": 3.1532,
      "step": 156353
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5824685096740723,
      "learning_rate": 0.0001403588349766253,
      "loss": 3.2058,
      "step": 156354
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.4294016361236572,
      "learning_rate": 0.00014035537168376232,
      "loss": 3.326,
      "step": 156355
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.397827625274658,
      "learning_rate": 0.00014035190842058017,
      "loss": 2.8903,
      "step": 156356
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.413339614868164,
      "learning_rate": 0.00014034844518707977,
      "loss": 2.9609,
      "step": 156357
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2378149032592773,
      "learning_rate": 0.00014034498198326148,
      "loss": 2.9681,
      "step": 156358
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.465214967727661,
      "learning_rate": 0.0001403415188091262,
      "loss": 2.7177,
      "step": 156359
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.091099500656128,
      "learning_rate": 0.00014033805566467445,
      "loss": 2.998,
      "step": 156360
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7902910709381104,
      "learning_rate": 0.00014033459254990686,
      "loss": 2.6487,
      "step": 156361
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6303160190582275,
      "learning_rate": 0.00014033112946482395,
      "loss": 2.9538,
      "step": 156362
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.292754650115967,
      "learning_rate": 0.0001403276664094266,
      "loss": 3.1591,
      "step": 156363
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.8098018169403076,
      "learning_rate": 0.00014032420338371527,
      "loss": 2.6822,
      "step": 156364
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1739766597747803,
      "learning_rate": 0.00014032074038769078,
      "loss": 3.0466,
      "step": 156365
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.325739860534668,
      "learning_rate": 0.0001403172774213536,
      "loss": 2.8783,
      "step": 156366
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.559213399887085,
      "learning_rate": 0.00014031381448470463,
      "loss": 2.9851,
      "step": 156367
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3790876865386963,
      "learning_rate": 0.00014031035157774415,
      "loss": 3.3903,
      "step": 156368
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.553473949432373,
      "learning_rate": 0.00014030688870047308,
      "loss": 2.9382,
      "step": 156369
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.0203888416290283,
      "learning_rate": 0.00014030342585289186,
      "loss": 3.0799,
      "step": 156370
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.0765602588653564,
      "learning_rate": 0.00014029996303500136,
      "loss": 2.8826,
      "step": 156371
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.339169502258301,
      "learning_rate": 0.000140296500246802,
      "loss": 2.9324,
      "step": 156372
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.899648427963257,
      "learning_rate": 0.00014029303748829477,
      "loss": 2.9853,
      "step": 156373
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5086963176727295,
      "learning_rate": 0.00014028957475947985,
      "loss": 2.964,
      "step": 156374
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.0771212577819824,
      "learning_rate": 0.00014028611206035822,
      "loss": 2.8349,
      "step": 156375
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6800928115844727,
      "learning_rate": 0.0001402826493909303,
      "loss": 2.7103,
      "step": 156376
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7290284633636475,
      "learning_rate": 0.000140279186751197,
      "loss": 2.9084,
      "step": 156377
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.270490646362305,
      "learning_rate": 0.00014027572414115867,
      "loss": 2.9051,
      "step": 156378
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.3711280822753906,
      "learning_rate": 0.00014027226156081632,
      "loss": 3.1229,
      "step": 156379
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1167309284210205,
      "learning_rate": 0.00014026879901017015,
      "loss": 2.8166,
      "step": 156380
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4093358516693115,
      "learning_rate": 0.00014026533648922116,
      "loss": 2.7387,
      "step": 156381
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3211562633514404,
      "learning_rate": 0.0001402618739979697,
      "loss": 3.0694,
      "step": 156382
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.097825050354004,
      "learning_rate": 0.00014025841153641672,
      "loss": 3.1517,
      "step": 156383
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9477109909057617,
      "learning_rate": 0.0001402549491045626,
      "loss": 3.0568,
      "step": 156384
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.670341730117798,
      "learning_rate": 0.0001402514867024083,
      "loss": 3.0125,
      "step": 156385
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2910594940185547,
      "learning_rate": 0.00014024802432995406,
      "loss": 3.0052,
      "step": 156386
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1843655109405518,
      "learning_rate": 0.00014024456198720087,
      "loss": 2.7298,
      "step": 156387
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.442814826965332,
      "learning_rate": 0.00014024109967414908,
      "loss": 2.7301,
      "step": 156388
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1152167320251465,
      "learning_rate": 0.00014023763739079962,
      "loss": 2.7101,
      "step": 156389
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.66231107711792,
      "learning_rate": 0.00014023417513715287,
      "loss": 3.0321,
      "step": 156390
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.261385679244995,
      "learning_rate": 0.00014023071291320983,
      "loss": 2.8516,
      "step": 156391
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1078901290893555,
      "learning_rate": 0.0001402272507189707,
      "loss": 3.0704,
      "step": 156392
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5919101238250732,
      "learning_rate": 0.00014022378855443643,
      "loss": 2.7343,
      "step": 156393
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.166887044906616,
      "learning_rate": 0.00014022032641960747,
      "loss": 2.912,
      "step": 156394
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5531954765319824,
      "learning_rate": 0.0001402168643144847,
      "loss": 2.8527,
      "step": 156395
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.365792751312256,
      "learning_rate": 0.0001402134022390685,
      "loss": 2.6566,
      "step": 156396
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7120232582092285,
      "learning_rate": 0.0001402099401933599,
      "loss": 2.9864,
      "step": 156397
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6385724544525146,
      "learning_rate": 0.00014020647817735902,
      "loss": 2.9754,
      "step": 156398
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.0247583389282227,
      "learning_rate": 0.00014020301619106692,
      "loss": 2.9707,
      "step": 156399
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3660037517547607,
      "learning_rate": 0.00014019955423448394,
      "loss": 2.7918,
      "step": 156400
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7845609188079834,
      "learning_rate": 0.00014019609230761103,
      "loss": 2.7814,
      "step": 156401
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.316631555557251,
      "learning_rate": 0.00014019263041044856,
      "loss": 3.1257,
      "step": 156402
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4533700942993164,
      "learning_rate": 0.00014018916854299752,
      "loss": 3.0829,
      "step": 156403
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.3864662647247314,
      "learning_rate": 0.00014018570670525812,
      "loss": 2.8952,
      "step": 156404
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.489899158477783,
      "learning_rate": 0.0001401822448972313,
      "loss": 3.1664,
      "step": 156405
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.238567352294922,
      "learning_rate": 0.0001401787831189175,
      "loss": 2.5433,
      "step": 156406
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.243250608444214,
      "learning_rate": 0.00014017532137031762,
      "loss": 2.8066,
      "step": 156407
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.257084846496582,
      "learning_rate": 0.00014017185965143204,
      "loss": 3.0598,
      "step": 156408
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8836112022399902,
      "learning_rate": 0.00014016839796226167,
      "loss": 2.7054,
      "step": 156409
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1294684410095215,
      "learning_rate": 0.00014016493630280698,
      "loss": 3.1033,
      "step": 156410
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4450573921203613,
      "learning_rate": 0.00014016147467306862,
      "loss": 3.1608,
      "step": 156411
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.184131383895874,
      "learning_rate": 0.0001401580130730472,
      "loss": 3.1455,
      "step": 156412
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5780251026153564,
      "learning_rate": 0.0001401545515027435,
      "loss": 3.3124,
      "step": 156413
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6394972801208496,
      "learning_rate": 0.000140151089962158,
      "loss": 3.1176,
      "step": 156414
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5971696376800537,
      "learning_rate": 0.00014014762845129152,
      "loss": 2.6879,
      "step": 156415
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9154770374298096,
      "learning_rate": 0.0001401441669701446,
      "loss": 2.9318,
      "step": 156416
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.0383710861206055,
      "learning_rate": 0.00014014070551871796,
      "loss": 2.666,
      "step": 156417
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4846608638763428,
      "learning_rate": 0.00014013724409701203,
      "loss": 3.189,
      "step": 156418
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2115042209625244,
      "learning_rate": 0.00014013378270502768,
      "loss": 2.925,
      "step": 156419
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.588405132293701,
      "learning_rate": 0.00014013032134276541,
      "loss": 2.8917,
      "step": 156420
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5811610221862793,
      "learning_rate": 0.00014012686001022606,
      "loss": 2.827,
      "step": 156421
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.972140073776245,
      "learning_rate": 0.00014012339870741008,
      "loss": 2.8533,
      "step": 156422
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4984941482543945,
      "learning_rate": 0.00014011993743431823,
      "loss": 2.8663,
      "step": 156423
    },
    {
      "epoch": 2.04,
      "grad_norm": 1.981882929801941,
      "learning_rate": 0.00014011647619095098,
      "loss": 3.0135,
      "step": 156424
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.180734872817993,
      "learning_rate": 0.0001401130149773092,
      "loss": 2.754,
      "step": 156425
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.493499994277954,
      "learning_rate": 0.0001401095537933933,
      "loss": 3.1975,
      "step": 156426
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8872978687286377,
      "learning_rate": 0.0001401060926392042,
      "loss": 2.8457,
      "step": 156427
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9844460487365723,
      "learning_rate": 0.00014010263151474235,
      "loss": 3.1991,
      "step": 156428
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.0677168369293213,
      "learning_rate": 0.00014009917042000837,
      "loss": 3.065,
      "step": 156429
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.009036540985107,
      "learning_rate": 0.0001400957093550031,
      "loss": 2.7745,
      "step": 156430
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9011292457580566,
      "learning_rate": 0.000140092248319727,
      "loss": 2.758,
      "step": 156431
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1238415241241455,
      "learning_rate": 0.0001400887873141807,
      "loss": 3.1748,
      "step": 156432
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.388878583908081,
      "learning_rate": 0.00014008532633836497,
      "loss": 2.9977,
      "step": 156433
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.7570643424987793,
      "learning_rate": 0.00014008186539228045,
      "loss": 2.8245,
      "step": 156434
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.273761034011841,
      "learning_rate": 0.00014007840447592763,
      "loss": 2.7199,
      "step": 156435
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.368621587753296,
      "learning_rate": 0.0001400749435893073,
      "loss": 2.9076,
      "step": 156436
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9809372425079346,
      "learning_rate": 0.00014007148273242009,
      "loss": 2.7332,
      "step": 156437
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6054019927978516,
      "learning_rate": 0.0001400680219052665,
      "loss": 3.0368,
      "step": 156438
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.50099778175354,
      "learning_rate": 0.00014006456110784743,
      "loss": 2.8126,
      "step": 156439
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.442286729812622,
      "learning_rate": 0.00014006110034016334,
      "loss": 2.8496,
      "step": 156440
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.656909704208374,
      "learning_rate": 0.0001400576396022148,
      "loss": 2.9178,
      "step": 156441
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.623904228210449,
      "learning_rate": 0.0001400541788940027,
      "loss": 3.0804,
      "step": 156442
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.0225629806518555,
      "learning_rate": 0.0001400507182155274,
      "loss": 2.9575,
      "step": 156443
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1848325729370117,
      "learning_rate": 0.0001400472575667898,
      "loss": 2.8563,
      "step": 156444
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.327712059020996,
      "learning_rate": 0.00014004379694779046,
      "loss": 3.1239,
      "step": 156445
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2990243434906006,
      "learning_rate": 0.00014004033635852997,
      "loss": 2.9835,
      "step": 156446
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.284925699234009,
      "learning_rate": 0.00014003687579900893,
      "loss": 3.1195,
      "step": 156447
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.590080738067627,
      "learning_rate": 0.00014003341526922815,
      "loss": 2.8938,
      "step": 156448
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.0865108966827393,
      "learning_rate": 0.00014002995476918806,
      "loss": 2.9019,
      "step": 156449
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7155814170837402,
      "learning_rate": 0.00014002649429888952,
      "loss": 3.0912,
      "step": 156450
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3332347869873047,
      "learning_rate": 0.0001400230338583331,
      "loss": 2.8113,
      "step": 156451
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.145694732666016,
      "learning_rate": 0.0001400195734475194,
      "loss": 2.9076,
      "step": 156452
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1888601779937744,
      "learning_rate": 0.00014001611306644897,
      "loss": 3.0103,
      "step": 156453
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.0014243125915527,
      "learning_rate": 0.0001400126527151227,
      "loss": 2.8755,
      "step": 156454
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4117133617401123,
      "learning_rate": 0.00014000919239354095,
      "loss": 3.3098,
      "step": 156455
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2309696674346924,
      "learning_rate": 0.00014000573210170466,
      "loss": 2.8973,
      "step": 156456
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6382405757904053,
      "learning_rate": 0.00014000227183961418,
      "loss": 2.9535,
      "step": 156457
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.769714593887329,
      "learning_rate": 0.00013999881160727052,
      "loss": 3.0028,
      "step": 156458
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.653024911880493,
      "learning_rate": 0.0001399953514046739,
      "loss": 2.7163,
      "step": 156459
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.926539421081543,
      "learning_rate": 0.00013999189123182527,
      "loss": 3.1274,
      "step": 156460
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2309908866882324,
      "learning_rate": 0.00013998843108872506,
      "loss": 2.9615,
      "step": 156461
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4939632415771484,
      "learning_rate": 0.0001399849709753741,
      "loss": 2.8977,
      "step": 156462
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.081022262573242,
      "learning_rate": 0.00013998151089177288,
      "loss": 3.1046,
      "step": 156463
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.934131145477295,
      "learning_rate": 0.00013997805083792235,
      "loss": 2.823,
      "step": 156464
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.233703851699829,
      "learning_rate": 0.00013997459081382268,
      "loss": 2.7248,
      "step": 156465
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.542635440826416,
      "learning_rate": 0.00013997113081947485,
      "loss": 3.188,
      "step": 156466
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.954838275909424,
      "learning_rate": 0.00013996767085487933,
      "loss": 2.9775,
      "step": 156467
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6636674404144287,
      "learning_rate": 0.00013996421092003693,
      "loss": 3.0536,
      "step": 156468
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.554741859436035,
      "learning_rate": 0.0001399607510149481,
      "loss": 3.2528,
      "step": 156469
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1616761684417725,
      "learning_rate": 0.0001399572911396138,
      "loss": 3.053,
      "step": 156470
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2664577960968018,
      "learning_rate": 0.00013995383129403427,
      "loss": 2.9982,
      "step": 156471
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.665907859802246,
      "learning_rate": 0.00013995037147821038,
      "loss": 2.6701,
      "step": 156472
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.0919487476348877,
      "learning_rate": 0.00013994691169214268,
      "loss": 3.0416,
      "step": 156473
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.312649965286255,
      "learning_rate": 0.000139943451935832,
      "loss": 2.9744,
      "step": 156474
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7950570583343506,
      "learning_rate": 0.0001399399922092787,
      "loss": 2.9604,
      "step": 156475
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7648367881774902,
      "learning_rate": 0.00013993653251248372,
      "loss": 2.9126,
      "step": 156476
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5632283687591553,
      "learning_rate": 0.00013993307284544755,
      "loss": 2.9927,
      "step": 156477
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3392200469970703,
      "learning_rate": 0.00013992961320817083,
      "loss": 3.049,
      "step": 156478
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.215090274810791,
      "learning_rate": 0.00013992615360065413,
      "loss": 2.9016,
      "step": 156479
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.622453451156616,
      "learning_rate": 0.00013992269402289827,
      "loss": 2.8452,
      "step": 156480
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7303712368011475,
      "learning_rate": 0.00013991923447490367,
      "loss": 2.9371,
      "step": 156481
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6893773078918457,
      "learning_rate": 0.00013991577495667127,
      "loss": 2.7375,
      "step": 156482
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3515470027923584,
      "learning_rate": 0.00013991231546820154,
      "loss": 2.7629,
      "step": 156483
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.461259365081787,
      "learning_rate": 0.00013990885600949512,
      "loss": 2.6621,
      "step": 156484
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.481935739517212,
      "learning_rate": 0.00013990539658055253,
      "loss": 2.7334,
      "step": 156485
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2864506244659424,
      "learning_rate": 0.0001399019371813747,
      "loss": 3.0175,
      "step": 156486
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4865853786468506,
      "learning_rate": 0.000139898477811962,
      "loss": 3.2643,
      "step": 156487
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6113803386688232,
      "learning_rate": 0.00013989501847231527,
      "loss": 3.041,
      "step": 156488
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.111912250518799,
      "learning_rate": 0.00013989155916243508,
      "loss": 3.0622,
      "step": 156489
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.778642416000366,
      "learning_rate": 0.00013988809988232213,
      "loss": 2.9651,
      "step": 156490
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.659336805343628,
      "learning_rate": 0.00013988464063197686,
      "loss": 2.9623,
      "step": 156491
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8059115409851074,
      "learning_rate": 0.00013988118141140018,
      "loss": 3.1428,
      "step": 156492
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5252609252929688,
      "learning_rate": 0.00013987772222059247,
      "loss": 2.9099,
      "step": 156493
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4144809246063232,
      "learning_rate": 0.00013987426305955467,
      "loss": 3.0201,
      "step": 156494
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.04464054107666,
      "learning_rate": 0.00013987080392828725,
      "loss": 3.0045,
      "step": 156495
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7189579010009766,
      "learning_rate": 0.00013986734482679083,
      "loss": 2.874,
      "step": 156496
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5895490646362305,
      "learning_rate": 0.00013986388575506603,
      "loss": 2.953,
      "step": 156497
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9392993450164795,
      "learning_rate": 0.00013986042671311366,
      "loss": 3.0725,
      "step": 156498
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.485018014907837,
      "learning_rate": 0.00013985696770093414,
      "loss": 2.8196,
      "step": 156499
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.678025960922241,
      "learning_rate": 0.00013985350871852833,
      "loss": 2.8602,
      "step": 156500
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1754631996154785,
      "learning_rate": 0.00013985004976589682,
      "loss": 2.9749,
      "step": 156501
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6376845836639404,
      "learning_rate": 0.00013984659084304013,
      "loss": 2.9276,
      "step": 156502
    },
    {
      "epoch": 2.04,
      "grad_norm": 1.985338568687439,
      "learning_rate": 0.00013984313194995894,
      "loss": 2.7825,
      "step": 156503
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7728796005249023,
      "learning_rate": 0.00013983967308665404,
      "loss": 3.2339,
      "step": 156504
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.293045997619629,
      "learning_rate": 0.00013983621425312583,
      "loss": 2.8525,
      "step": 156505
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5162830352783203,
      "learning_rate": 0.0001398327554493752,
      "loss": 2.9596,
      "step": 156506
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.64862060546875,
      "learning_rate": 0.0001398292966754027,
      "loss": 2.8951,
      "step": 156507
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.886439085006714,
      "learning_rate": 0.00013982583793120895,
      "loss": 2.7575,
      "step": 156508
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1336934566497803,
      "learning_rate": 0.00013982237921679453,
      "loss": 3.0843,
      "step": 156509
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.472163677215576,
      "learning_rate": 0.00013981892053216023,
      "loss": 2.9081,
      "step": 156510
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.674372434616089,
      "learning_rate": 0.00013981546187730648,
      "loss": 2.9126,
      "step": 156511
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.083533525466919,
      "learning_rate": 0.0001398120032522342,
      "loss": 2.9131,
      "step": 156512
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.414039373397827,
      "learning_rate": 0.00013980854465694392,
      "loss": 3.0412,
      "step": 156513
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.650038242340088,
      "learning_rate": 0.00013980508609143618,
      "loss": 3.1602,
      "step": 156514
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2786943912506104,
      "learning_rate": 0.00013980162755571166,
      "loss": 2.8922,
      "step": 156515
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.603714942932129,
      "learning_rate": 0.00013979816904977112,
      "loss": 2.8873,
      "step": 156516
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1554126739501953,
      "learning_rate": 0.00013979471057361503,
      "loss": 2.9205,
      "step": 156517
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6548573970794678,
      "learning_rate": 0.0001397912521272442,
      "loss": 3.1184,
      "step": 156518
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2374842166900635,
      "learning_rate": 0.00013978779371065923,
      "loss": 2.8799,
      "step": 156519
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.350428342819214,
      "learning_rate": 0.0001397843353238606,
      "loss": 2.962,
      "step": 156520
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.366784572601318,
      "learning_rate": 0.00013978087696684923,
      "loss": 2.8294,
      "step": 156521
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3746418952941895,
      "learning_rate": 0.00013977741863962562,
      "loss": 2.9519,
      "step": 156522
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.120729923248291,
      "learning_rate": 0.00013977396034219028,
      "loss": 2.8031,
      "step": 156523
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3793892860412598,
      "learning_rate": 0.0001397705020745441,
      "loss": 3.0612,
      "step": 156524
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.515778064727783,
      "learning_rate": 0.00013976704383668762,
      "loss": 2.8883,
      "step": 156525
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9306039810180664,
      "learning_rate": 0.00013976358562862133,
      "loss": 3.1401,
      "step": 156526
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.94762921333313,
      "learning_rate": 0.00013976012745034615,
      "loss": 3.1606,
      "step": 156527
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.571282148361206,
      "learning_rate": 0.0001397566693018625,
      "loss": 2.9293,
      "step": 156528
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4064691066741943,
      "learning_rate": 0.00013975321118317116,
      "loss": 2.9858,
      "step": 156529
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.31542706489563,
      "learning_rate": 0.00013974975309427275,
      "loss": 2.8977,
      "step": 156530
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3107352256774902,
      "learning_rate": 0.00013974629503516793,
      "loss": 2.7425,
      "step": 156531
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.343743085861206,
      "learning_rate": 0.0001397428370058571,
      "loss": 2.8317,
      "step": 156532
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.504133462905884,
      "learning_rate": 0.00013973937900634128,
      "loss": 2.7888,
      "step": 156533
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3592147827148438,
      "learning_rate": 0.00013973592103662082,
      "loss": 2.9078,
      "step": 156534
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.369826316833496,
      "learning_rate": 0.00013973246309669657,
      "loss": 3.199,
      "step": 156535
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4195189476013184,
      "learning_rate": 0.0001397290051865691,
      "loss": 2.9741,
      "step": 156536
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3474318981170654,
      "learning_rate": 0.000139725547306239,
      "loss": 2.838,
      "step": 156537
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.165469169616699,
      "learning_rate": 0.00013972208945570684,
      "loss": 2.7995,
      "step": 156538
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.002930164337158,
      "learning_rate": 0.0001397186316349735,
      "loss": 3.115,
      "step": 156539
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3139777183532715,
      "learning_rate": 0.0001397151738440394,
      "loss": 2.6772,
      "step": 156540
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.249222755432129,
      "learning_rate": 0.00013971171608290534,
      "loss": 2.91,
      "step": 156541
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.335982084274292,
      "learning_rate": 0.00013970825835157182,
      "loss": 2.9651,
      "step": 156542
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.816810131072998,
      "learning_rate": 0.00013970480065003964,
      "loss": 3.1094,
      "step": 156543
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2644166946411133,
      "learning_rate": 0.00013970134297830938,
      "loss": 2.8864,
      "step": 156544
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4995415210723877,
      "learning_rate": 0.00013969788533638167,
      "loss": 2.9293,
      "step": 156545
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4573488235473633,
      "learning_rate": 0.00013969442772425702,
      "loss": 3.2068,
      "step": 156546
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3807830810546875,
      "learning_rate": 0.00013969097014193633,
      "loss": 2.7225,
      "step": 156547
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.222338914871216,
      "learning_rate": 0.00013968751258942,
      "loss": 3.1642,
      "step": 156548
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.107175588607788,
      "learning_rate": 0.00013968405506670892,
      "loss": 2.9319,
      "step": 156549
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.263761520385742,
      "learning_rate": 0.00013968059757380355,
      "loss": 3.0225,
      "step": 156550
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2971298694610596,
      "learning_rate": 0.00013967714011070464,
      "loss": 3.0642,
      "step": 156551
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.565486192703247,
      "learning_rate": 0.00013967368267741264,
      "loss": 3.0365,
      "step": 156552
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2014572620391846,
      "learning_rate": 0.00013967022527392844,
      "loss": 3.1954,
      "step": 156553
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.601673126220703,
      "learning_rate": 0.00013966676790025246,
      "loss": 2.9835,
      "step": 156554
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.638610363006592,
      "learning_rate": 0.00013966331055638556,
      "loss": 2.928,
      "step": 156555
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3097901344299316,
      "learning_rate": 0.00013965985324232829,
      "loss": 2.9966,
      "step": 156556
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.450340747833252,
      "learning_rate": 0.00013965639595808126,
      "loss": 3.0881,
      "step": 156557
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.66156005859375,
      "learning_rate": 0.00013965293870364504,
      "loss": 2.818,
      "step": 156558
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.165388584136963,
      "learning_rate": 0.00013964948147902046,
      "loss": 3.1279,
      "step": 156559
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4913456439971924,
      "learning_rate": 0.00013964602428420797,
      "loss": 3.0456,
      "step": 156560
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.446200132369995,
      "learning_rate": 0.00013964256711920844,
      "loss": 2.8403,
      "step": 156561
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8108608722686768,
      "learning_rate": 0.00013963910998402235,
      "loss": 3.0726,
      "step": 156562
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.141780376434326,
      "learning_rate": 0.0001396356528786504,
      "loss": 3.0107,
      "step": 156563
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.765169858932495,
      "learning_rate": 0.00013963219580309308,
      "loss": 2.8869,
      "step": 156564
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9025661945343018,
      "learning_rate": 0.00013962873875735129,
      "loss": 2.8767,
      "step": 156565
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.637084484100342,
      "learning_rate": 0.00013962528174142543,
      "loss": 2.8516,
      "step": 156566
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.476393938064575,
      "learning_rate": 0.00013962182475531635,
      "loss": 2.9968,
      "step": 156567
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.363452434539795,
      "learning_rate": 0.00013961836779902463,
      "loss": 2.9816,
      "step": 156568
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5458152294158936,
      "learning_rate": 0.00013961491087255084,
      "loss": 2.8978,
      "step": 156569
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.182812452316284,
      "learning_rate": 0.00013961145397589557,
      "loss": 3.1913,
      "step": 156570
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.271064281463623,
      "learning_rate": 0.0001396079971090597,
      "loss": 2.5927,
      "step": 156571
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.669459819793701,
      "learning_rate": 0.0001396045402720436,
      "loss": 3.0871,
      "step": 156572
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.219317674636841,
      "learning_rate": 0.00013960108346484815,
      "loss": 2.9512,
      "step": 156573
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6376781463623047,
      "learning_rate": 0.0001395976266874739,
      "loss": 3.1803,
      "step": 156574
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4920456409454346,
      "learning_rate": 0.00013959416993992147,
      "loss": 2.9659,
      "step": 156575
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.328073263168335,
      "learning_rate": 0.0001395907132221914,
      "loss": 2.7834,
      "step": 156576
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1136085987091064,
      "learning_rate": 0.00013958725653428456,
      "loss": 2.9959,
      "step": 156577
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6563289165496826,
      "learning_rate": 0.00013958379987620136,
      "loss": 2.8662,
      "step": 156578
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6726016998291016,
      "learning_rate": 0.00013958034324794267,
      "loss": 2.9455,
      "step": 156579
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.950974464416504,
      "learning_rate": 0.00013957688664950903,
      "loss": 3.0627,
      "step": 156580
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3383662700653076,
      "learning_rate": 0.00013957343008090106,
      "loss": 2.9995,
      "step": 156581
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.731818914413452,
      "learning_rate": 0.00013956997354211932,
      "loss": 2.7923,
      "step": 156582
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.539703845977783,
      "learning_rate": 0.00013956651703316467,
      "loss": 2.9509,
      "step": 156583
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.48781156539917,
      "learning_rate": 0.0001395630605540375,
      "loss": 3.0125,
      "step": 156584
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2506868839263916,
      "learning_rate": 0.0001395596041047387,
      "loss": 3.0044,
      "step": 156585
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6844849586486816,
      "learning_rate": 0.0001395561476852688,
      "loss": 3.2111,
      "step": 156586
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5076348781585693,
      "learning_rate": 0.00013955269129562847,
      "loss": 2.8545,
      "step": 156587
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1993415355682373,
      "learning_rate": 0.00013954923493581816,
      "loss": 2.9377,
      "step": 156588
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.3172035217285156,
      "learning_rate": 0.0001395457786058388,
      "loss": 2.766,
      "step": 156589
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6481690406799316,
      "learning_rate": 0.00013954232230569083,
      "loss": 2.9546,
      "step": 156590
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.440021276473999,
      "learning_rate": 0.00013953886603537506,
      "loss": 2.8728,
      "step": 156591
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8501970767974854,
      "learning_rate": 0.00013953540979489207,
      "loss": 2.8187,
      "step": 156592
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5006017684936523,
      "learning_rate": 0.00013953195358424244,
      "loss": 2.788,
      "step": 156593
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9050040245056152,
      "learning_rate": 0.00013952849740342675,
      "loss": 3.1201,
      "step": 156594
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1807193756103516,
      "learning_rate": 0.00013952504125244587,
      "loss": 3.1215,
      "step": 156595
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4087584018707275,
      "learning_rate": 0.0001395215851313002,
      "loss": 3.0652,
      "step": 156596
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.234001874923706,
      "learning_rate": 0.0001395181290399906,
      "loss": 3.2934,
      "step": 156597
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.809175729751587,
      "learning_rate": 0.0001395146729785176,
      "loss": 2.8124,
      "step": 156598
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1106371879577637,
      "learning_rate": 0.00013951121694688183,
      "loss": 3.0655,
      "step": 156599
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.291961193084717,
      "learning_rate": 0.00013950776094508386,
      "loss": 2.959,
      "step": 156600
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.186149835586548,
      "learning_rate": 0.00013950430497312457,
      "loss": 3.2878,
      "step": 156601
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.076693296432495,
      "learning_rate": 0.0001395008490310043,
      "loss": 2.9169,
      "step": 156602
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.09126877784729,
      "learning_rate": 0.000139497393118724,
      "loss": 2.8889,
      "step": 156603
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.4772839546203613,
      "learning_rate": 0.00013949393723628416,
      "loss": 2.6971,
      "step": 156604
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.232780694961548,
      "learning_rate": 0.00013949048138368528,
      "loss": 2.7922,
      "step": 156605
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9062693119049072,
      "learning_rate": 0.0001394870255609283,
      "loss": 2.7724,
      "step": 156606
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.150125741958618,
      "learning_rate": 0.0001394835697680137,
      "loss": 2.9684,
      "step": 156607
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.204874038696289,
      "learning_rate": 0.00013948011400494203,
      "loss": 2.8339,
      "step": 156608
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.21513032913208,
      "learning_rate": 0.00013947665827171416,
      "loss": 3.0435,
      "step": 156609
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.5868427753448486,
      "learning_rate": 0.00013947320256833047,
      "loss": 2.9243,
      "step": 156610
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4638261795043945,
      "learning_rate": 0.00013946974689479187,
      "loss": 3.1381,
      "step": 156611
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.2423324584960938,
      "learning_rate": 0.00013946629125109885,
      "loss": 2.9812,
      "step": 156612
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8350846767425537,
      "learning_rate": 0.00013946283563725213,
      "loss": 2.9952,
      "step": 156613
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.207578420639038,
      "learning_rate": 0.00013945938005325214,
      "loss": 2.9013,
      "step": 156614
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.595798492431641,
      "learning_rate": 0.00013945592449909978,
      "loss": 2.9819,
      "step": 156615
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8501813411712646,
      "learning_rate": 0.00013945246897479553,
      "loss": 2.7912,
      "step": 156616
    },
    {
      "epoch": 2.04,
      "grad_norm": 1.9520195722579956,
      "learning_rate": 0.00013944901348034018,
      "loss": 3.1636,
      "step": 156617
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5613019466400146,
      "learning_rate": 0.0001394455580157343,
      "loss": 3.0588,
      "step": 156618
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4344050884246826,
      "learning_rate": 0.00013944210258097842,
      "loss": 2.8411,
      "step": 156619
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.6806864738464355,
      "learning_rate": 0.00013943864717607337,
      "loss": 3.0174,
      "step": 156620
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3259639739990234,
      "learning_rate": 0.0001394351918010197,
      "loss": 3.0678,
      "step": 156621
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.604555606842041,
      "learning_rate": 0.00013943173645581796,
      "loss": 2.8198,
      "step": 156622
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1415226459503174,
      "learning_rate": 0.000139428281140469,
      "loss": 3.0982,
      "step": 156623
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.171041965484619,
      "learning_rate": 0.00013942482585497338,
      "loss": 2.695,
      "step": 156624
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.028704643249512,
      "learning_rate": 0.00013942137059933158,
      "loss": 2.8362,
      "step": 156625
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4236371517181396,
      "learning_rate": 0.00013941791537354448,
      "loss": 2.9576,
      "step": 156626
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.616629123687744,
      "learning_rate": 0.00013941446017761252,
      "loss": 3.1257,
      "step": 156627
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9250831604003906,
      "learning_rate": 0.00013941100501153658,
      "loss": 2.8881,
      "step": 156628
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.161123752593994,
      "learning_rate": 0.00013940754987531716,
      "loss": 3.1013,
      "step": 156629
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.558896541595459,
      "learning_rate": 0.00013940409476895487,
      "loss": 3.0337,
      "step": 156630
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.142594337463379,
      "learning_rate": 0.0001394006396924503,
      "loss": 2.8661,
      "step": 156631
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.365018367767334,
      "learning_rate": 0.0001393971846458043,
      "loss": 3.0029,
      "step": 156632
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3208062648773193,
      "learning_rate": 0.00013939372962901728,
      "loss": 2.8084,
      "step": 156633
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.496981620788574,
      "learning_rate": 0.00013939027464209008,
      "loss": 3.0051,
      "step": 156634
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.099059820175171,
      "learning_rate": 0.00013938681968502333,
      "loss": 2.8645,
      "step": 156635
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.568868398666382,
      "learning_rate": 0.00013938336475781754,
      "loss": 2.8622,
      "step": 156636
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3946115970611572,
      "learning_rate": 0.0001393799098604733,
      "loss": 2.8361,
      "step": 156637
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8919906616210938,
      "learning_rate": 0.00013937645499299153,
      "loss": 2.8249,
      "step": 156638
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.527771234512329,
      "learning_rate": 0.00013937300015537255,
      "loss": 2.7638,
      "step": 156639
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.182539701461792,
      "learning_rate": 0.00013936954534761727,
      "loss": 2.8396,
      "step": 156640
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.21557879447937,
      "learning_rate": 0.00013936609056972629,
      "loss": 2.8992,
      "step": 156641
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.334629774093628,
      "learning_rate": 0.00013936263582170014,
      "loss": 3.0249,
      "step": 156642
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3949618339538574,
      "learning_rate": 0.0001393591811035394,
      "loss": 2.9125,
      "step": 156643
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.320678472518921,
      "learning_rate": 0.00013935572641524493,
      "loss": 2.7055,
      "step": 156644
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.5329666137695312,
      "learning_rate": 0.0001393522717568171,
      "loss": 2.7113,
      "step": 156645
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.073183298110962,
      "learning_rate": 0.0001393488171282569,
      "loss": 3.0437,
      "step": 156646
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1548118591308594,
      "learning_rate": 0.00013934536252956477,
      "loss": 2.7406,
      "step": 156647
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1316139698028564,
      "learning_rate": 0.00013934190796074136,
      "loss": 3.0891,
      "step": 156648
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2056884765625,
      "learning_rate": 0.0001393384534217872,
      "loss": 2.747,
      "step": 156649
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.158853530883789,
      "learning_rate": 0.00013933499891270322,
      "loss": 3.1369,
      "step": 156650
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.160292625427246,
      "learning_rate": 0.00013933154443348972,
      "loss": 2.8639,
      "step": 156651
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3563296794891357,
      "learning_rate": 0.00013932808998414766,
      "loss": 2.7451,
      "step": 156652
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4582924842834473,
      "learning_rate": 0.00013932463556467754,
      "loss": 3.0195,
      "step": 156653
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3753435611724854,
      "learning_rate": 0.00013932118117508,
      "loss": 2.7113,
      "step": 156654
    },
    {
      "epoch": 2.04,
      "grad_norm": 1.9977887868881226,
      "learning_rate": 0.0001393177268153556,
      "loss": 3.0134,
      "step": 156655
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.484835624694824,
      "learning_rate": 0.0001393142724855051,
      "loss": 2.955,
      "step": 156656
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6363251209259033,
      "learning_rate": 0.00013931081818552906,
      "loss": 3.1308,
      "step": 156657
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1933112144470215,
      "learning_rate": 0.00013930736391542828,
      "loss": 3.0697,
      "step": 156658
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.408470630645752,
      "learning_rate": 0.0001393039096752033,
      "loss": 3.0898,
      "step": 156659
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.57737135887146,
      "learning_rate": 0.00013930045546485474,
      "loss": 3.221,
      "step": 156660
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2168281078338623,
      "learning_rate": 0.00013929700128438314,
      "loss": 2.8764,
      "step": 156661
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.877981185913086,
      "learning_rate": 0.0001392935471337894,
      "loss": 2.6135,
      "step": 156662
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.082617998123169,
      "learning_rate": 0.0001392900930130739,
      "loss": 3.1473,
      "step": 156663
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.3459322452545166,
      "learning_rate": 0.00013928663892223749,
      "loss": 2.8137,
      "step": 156664
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.5996570587158203,
      "learning_rate": 0.00013928318486128077,
      "loss": 2.9062,
      "step": 156665
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.623216152191162,
      "learning_rate": 0.00013927973083020432,
      "loss": 2.6592,
      "step": 156666
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3440182209014893,
      "learning_rate": 0.0001392762768290087,
      "loss": 2.9089,
      "step": 156667
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.827008008956909,
      "learning_rate": 0.00013927282285769473,
      "loss": 3.1926,
      "step": 156668
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.08634090423584,
      "learning_rate": 0.0001392693689162629,
      "loss": 3.0676,
      "step": 156669
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.205491065979004,
      "learning_rate": 0.00013926591500471402,
      "loss": 3.1787,
      "step": 156670
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8074169158935547,
      "learning_rate": 0.00013926246112304868,
      "loss": 2.7085,
      "step": 156671
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.498385429382324,
      "learning_rate": 0.00013925900727126744,
      "loss": 2.9425,
      "step": 156672
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4281601905822754,
      "learning_rate": 0.0001392555534493709,
      "loss": 2.9922,
      "step": 156673
    },
    {
      "epoch": 2.04,
      "grad_norm": 1.9156880378723145,
      "learning_rate": 0.0001392520996573599,
      "loss": 2.9071,
      "step": 156674
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7821271419525146,
      "learning_rate": 0.00013924864589523483,
      "loss": 2.8443,
      "step": 156675
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.645287275314331,
      "learning_rate": 0.0001392451921629966,
      "loss": 3.0942,
      "step": 156676
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.226392984390259,
      "learning_rate": 0.00013924173846064557,
      "loss": 3.0671,
      "step": 156677
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.729980945587158,
      "learning_rate": 0.00013923828478818283,
      "loss": 2.9861,
      "step": 156678
    },
    {
      "epoch": 2.04,
      "grad_norm": 1.9428479671478271,
      "learning_rate": 0.00013923483114560846,
      "loss": 2.932,
      "step": 156679
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.7449097633361816,
      "learning_rate": 0.0001392313775329235,
      "loss": 2.8486,
      "step": 156680
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6422488689422607,
      "learning_rate": 0.00013922792395012833,
      "loss": 2.894,
      "step": 156681
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.795180082321167,
      "learning_rate": 0.00013922447039722385,
      "loss": 2.9694,
      "step": 156682
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4641058444976807,
      "learning_rate": 0.00013922101687421047,
      "loss": 3.174,
      "step": 156683
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.591261386871338,
      "learning_rate": 0.00013921756338108914,
      "loss": 3.0714,
      "step": 156684
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1128344535827637,
      "learning_rate": 0.00013921410991786008,
      "loss": 3.0821,
      "step": 156685
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4968550205230713,
      "learning_rate": 0.00013921065648452428,
      "loss": 2.854,
      "step": 156686
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4023141860961914,
      "learning_rate": 0.00013920720308108214,
      "loss": 2.9564,
      "step": 156687
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4562840461730957,
      "learning_rate": 0.0001392037497075345,
      "loss": 3.0356,
      "step": 156688
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5108587741851807,
      "learning_rate": 0.00013920029636388184,
      "loss": 3.1478,
      "step": 156689
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.729978084564209,
      "learning_rate": 0.000139196843050125,
      "loss": 3.0675,
      "step": 156690
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.014911651611328,
      "learning_rate": 0.0001391933897662645,
      "loss": 2.9742,
      "step": 156691
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5342652797698975,
      "learning_rate": 0.00013918993651230093,
      "loss": 2.9736,
      "step": 156692
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.70890474319458,
      "learning_rate": 0.00013918648328823494,
      "loss": 2.9448,
      "step": 156693
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.3183634281158447,
      "learning_rate": 0.0001391830300940673,
      "loss": 3.1047,
      "step": 156694
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9413321018218994,
      "learning_rate": 0.00013917957692979847,
      "loss": 3.0507,
      "step": 156695
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.213906288146973,
      "learning_rate": 0.0001391761237954293,
      "loss": 2.8926,
      "step": 156696
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3976552486419678,
      "learning_rate": 0.0001391726706909603,
      "loss": 3.1904,
      "step": 156697
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.141667366027832,
      "learning_rate": 0.0001391692176163922,
      "loss": 3.0152,
      "step": 156698
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.137335777282715,
      "learning_rate": 0.0001391657645717254,
      "loss": 3.0404,
      "step": 156699
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.71941876411438,
      "learning_rate": 0.00013916231155696086,
      "loss": 2.8309,
      "step": 156700
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.5320115089416504,
      "learning_rate": 0.00013915885857209895,
      "loss": 3.0969,
      "step": 156701
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.433114767074585,
      "learning_rate": 0.00013915540561714056,
      "loss": 2.9186,
      "step": 156702
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8590312004089355,
      "learning_rate": 0.00013915195269208625,
      "loss": 3.1136,
      "step": 156703
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.389575958251953,
      "learning_rate": 0.00013914849979693647,
      "loss": 3.0117,
      "step": 156704
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.940911054611206,
      "learning_rate": 0.00013914504693169217,
      "loss": 3.1068,
      "step": 156705
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.115649461746216,
      "learning_rate": 0.00013914159409635383,
      "loss": 2.9962,
      "step": 156706
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.266220808029175,
      "learning_rate": 0.000139138141290922,
      "loss": 3.0689,
      "step": 156707
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.547358751296997,
      "learning_rate": 0.00013913468851539751,
      "loss": 3.0098,
      "step": 156708
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5024733543395996,
      "learning_rate": 0.00013913123576978097,
      "loss": 2.6376,
      "step": 156709
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.371926784515381,
      "learning_rate": 0.00013912778305407282,
      "loss": 3.2053,
      "step": 156710
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.0168001651763916,
      "learning_rate": 0.00013912433036827396,
      "loss": 2.9237,
      "step": 156711
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4469878673553467,
      "learning_rate": 0.00013912087771238482,
      "loss": 3.2077,
      "step": 156712
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2547218799591064,
      "learning_rate": 0.00013911742508640625,
      "loss": 2.6812,
      "step": 156713
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6972928047180176,
      "learning_rate": 0.0001391139724903388,
      "loss": 3.226,
      "step": 156714
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4577126502990723,
      "learning_rate": 0.0001391105199241831,
      "loss": 2.4187,
      "step": 156715
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.331645965576172,
      "learning_rate": 0.00013910706738793965,
      "loss": 3.1425,
      "step": 156716
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.366157293319702,
      "learning_rate": 0.00013910361488160937,
      "loss": 2.7749,
      "step": 156717
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6108553409576416,
      "learning_rate": 0.0001391001624051927,
      "loss": 2.7703,
      "step": 156718
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4640963077545166,
      "learning_rate": 0.00013909670995869037,
      "loss": 2.9545,
      "step": 156719
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.462616205215454,
      "learning_rate": 0.00013909325754210306,
      "loss": 3.0107,
      "step": 156720
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.808779239654541,
      "learning_rate": 0.00013908980515543132,
      "loss": 2.8896,
      "step": 156721
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.078193426132202,
      "learning_rate": 0.00013908635279867573,
      "loss": 3.0193,
      "step": 156722
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4188177585601807,
      "learning_rate": 0.00013908290047183713,
      "loss": 3.0263,
      "step": 156723
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.333878755569458,
      "learning_rate": 0.00013907944817491592,
      "loss": 3.1357,
      "step": 156724
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.299447536468506,
      "learning_rate": 0.00013907599590791303,
      "loss": 2.9413,
      "step": 156725
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.4753477573394775,
      "learning_rate": 0.00013907254367082894,
      "loss": 2.6139,
      "step": 156726
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.310689926147461,
      "learning_rate": 0.0001390690914636643,
      "loss": 2.8868,
      "step": 156727
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.426223039627075,
      "learning_rate": 0.00013906563928641963,
      "loss": 3.0214,
      "step": 156728
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.0676767826080322,
      "learning_rate": 0.00013906218713909582,
      "loss": 3.1128,
      "step": 156729
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.433203935623169,
      "learning_rate": 0.00013905873502169326,
      "loss": 3.1801,
      "step": 156730
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.832885503768921,
      "learning_rate": 0.00013905528293421286,
      "loss": 2.8187,
      "step": 156731
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4026668071746826,
      "learning_rate": 0.00013905183087665508,
      "loss": 2.8826,
      "step": 156732
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.245134115219116,
      "learning_rate": 0.00013904837884902064,
      "loss": 2.9204,
      "step": 156733
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.912165641784668,
      "learning_rate": 0.00013904492685130998,
      "loss": 3.1354,
      "step": 156734
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.0185863971710205,
      "learning_rate": 0.00013904147488352406,
      "loss": 2.7322,
      "step": 156735
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.216728925704956,
      "learning_rate": 0.0001390380229456633,
      "loss": 3.2304,
      "step": 156736
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.932363033294678,
      "learning_rate": 0.00013903457103772845,
      "loss": 2.8519,
      "step": 156737
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9354279041290283,
      "learning_rate": 0.00013903111915972014,
      "loss": 2.9281,
      "step": 156738
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1630358695983887,
      "learning_rate": 0.00013902766731163895,
      "loss": 3.1555,
      "step": 156739
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4055144786834717,
      "learning_rate": 0.00013902421549348549,
      "loss": 2.9659,
      "step": 156740
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.008467435836792,
      "learning_rate": 0.00013902076370526055,
      "loss": 2.8805,
      "step": 156741
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.6584720611572266,
      "learning_rate": 0.0001390173119469646,
      "loss": 2.8951,
      "step": 156742
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4604554176330566,
      "learning_rate": 0.00013901386021859848,
      "loss": 2.9056,
      "step": 156743
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.158128023147583,
      "learning_rate": 0.0001390104085201626,
      "loss": 3.0424,
      "step": 156744
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6488242149353027,
      "learning_rate": 0.00013900695685165794,
      "loss": 2.8101,
      "step": 156745
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.119751214981079,
      "learning_rate": 0.00013900350521308473,
      "loss": 2.8271,
      "step": 156746
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.5967702865600586,
      "learning_rate": 0.00013900005360444387,
      "loss": 2.8313,
      "step": 156747
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.352255344390869,
      "learning_rate": 0.00013899660202573586,
      "loss": 2.9173,
      "step": 156748
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4430692195892334,
      "learning_rate": 0.00013899315047696155,
      "loss": 2.9202,
      "step": 156749
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.139115333557129,
      "learning_rate": 0.00013898969895812131,
      "loss": 3.0272,
      "step": 156750
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.543396472930908,
      "learning_rate": 0.00013898624746921617,
      "loss": 3.1102,
      "step": 156751
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.079216718673706,
      "learning_rate": 0.00013898279601024628,
      "loss": 3.0364,
      "step": 156752
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5857789516448975,
      "learning_rate": 0.00013897934458121267,
      "loss": 2.8096,
      "step": 156753
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1850697994232178,
      "learning_rate": 0.00013897589318211567,
      "loss": 2.8286,
      "step": 156754
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3064379692077637,
      "learning_rate": 0.00013897244181295627,
      "loss": 2.8014,
      "step": 156755
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.0901877880096436,
      "learning_rate": 0.0001389689904737348,
      "loss": 3.1727,
      "step": 156756
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.871347665786743,
      "learning_rate": 0.00013896553916445222,
      "loss": 3.1022,
      "step": 156757
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2458715438842773,
      "learning_rate": 0.00013896208788510882,
      "loss": 3.0909,
      "step": 156758
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1393494606018066,
      "learning_rate": 0.00013895863663570548,
      "loss": 3.071,
      "step": 156759
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.0753486156463623,
      "learning_rate": 0.00013895518541624268,
      "loss": 2.8659,
      "step": 156760
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.773756265640259,
      "learning_rate": 0.00013895173422672123,
      "loss": 3.1434,
      "step": 156761
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6846561431884766,
      "learning_rate": 0.0001389482830671416,
      "loss": 2.9086,
      "step": 156762
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.227811813354492,
      "learning_rate": 0.00013894483193750476,
      "loss": 2.8852,
      "step": 156763
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2795474529266357,
      "learning_rate": 0.00013894138083781087,
      "loss": 3.0201,
      "step": 156764
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3179614543914795,
      "learning_rate": 0.00013893792976806096,
      "loss": 3.0422,
      "step": 156765
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.05293607711792,
      "learning_rate": 0.00013893447872825537,
      "loss": 2.8085,
      "step": 156766
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5794501304626465,
      "learning_rate": 0.00013893102771839506,
      "loss": 2.7929,
      "step": 156767
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4894044399261475,
      "learning_rate": 0.00013892757673848039,
      "loss": 3.1848,
      "step": 156768
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9916067123413086,
      "learning_rate": 0.00013892412578851235,
      "loss": 2.9211,
      "step": 156769
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2235376834869385,
      "learning_rate": 0.0001389206748684911,
      "loss": 3.0937,
      "step": 156770
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5781445503234863,
      "learning_rate": 0.00013891722397841767,
      "loss": 2.8046,
      "step": 156771
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.389523983001709,
      "learning_rate": 0.00013891377311829245,
      "loss": 2.8721,
      "step": 156772
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3662047386169434,
      "learning_rate": 0.00013891032228811634,
      "loss": 2.9843,
      "step": 156773
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.0672056674957275,
      "learning_rate": 0.0001389068714878897,
      "loss": 2.7773,
      "step": 156774
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.409919500350952,
      "learning_rate": 0.00013890342071761345,
      "loss": 3.0323,
      "step": 156775
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6419336795806885,
      "learning_rate": 0.00013889996997728805,
      "loss": 2.9006,
      "step": 156776
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2286715507507324,
      "learning_rate": 0.00013889651926691422,
      "loss": 2.935,
      "step": 156777
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5874741077423096,
      "learning_rate": 0.00013889306858649246,
      "loss": 2.8747,
      "step": 156778
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.443890333175659,
      "learning_rate": 0.00013888961793602362,
      "loss": 2.9526,
      "step": 156779
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.0118587017059326,
      "learning_rate": 0.00013888616731550814,
      "loss": 2.9709,
      "step": 156780
    },
    {
      "epoch": 2.04,
      "grad_norm": 1.877687692642212,
      "learning_rate": 0.0001388827167249469,
      "loss": 3.2533,
      "step": 156781
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.088916778564453,
      "learning_rate": 0.00013887926616434033,
      "loss": 3.1025,
      "step": 156782
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.321488380432129,
      "learning_rate": 0.0001388758156336892,
      "loss": 2.8294,
      "step": 156783
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6772403717041016,
      "learning_rate": 0.00013887236513299397,
      "loss": 2.9422,
      "step": 156784
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2950644493103027,
      "learning_rate": 0.00013886891466225555,
      "loss": 3.0758,
      "step": 156785
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4679462909698486,
      "learning_rate": 0.00013886546422147428,
      "loss": 2.8938,
      "step": 156786
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.047761917114258,
      "learning_rate": 0.00013886201381065112,
      "loss": 2.9511,
      "step": 156787
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5185346603393555,
      "learning_rate": 0.00013885856342978653,
      "loss": 2.6524,
      "step": 156788
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7121102809906006,
      "learning_rate": 0.00013885511307888104,
      "loss": 2.8469,
      "step": 156789
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1214182376861572,
      "learning_rate": 0.00013885166275793556,
      "loss": 2.798,
      "step": 156790
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.500366449356079,
      "learning_rate": 0.0001388482124669506,
      "loss": 3.304,
      "step": 156791
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6735687255859375,
      "learning_rate": 0.00013884476220592668,
      "loss": 2.9956,
      "step": 156792
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7510533332824707,
      "learning_rate": 0.0001388413119748647,
      "loss": 2.7597,
      "step": 156793
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.244094133377075,
      "learning_rate": 0.00013883786177376514,
      "loss": 3.1806,
      "step": 156794
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.042384147644043,
      "learning_rate": 0.00013883441160262856,
      "loss": 2.954,
      "step": 156795
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6876044273376465,
      "learning_rate": 0.0001388309614614558,
      "loss": 3.038,
      "step": 156796
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8187544345855713,
      "learning_rate": 0.00013882751135024741,
      "loss": 2.9047,
      "step": 156797
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.478645086288452,
      "learning_rate": 0.0001388240612690039,
      "loss": 3.0257,
      "step": 156798
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4484386444091797,
      "learning_rate": 0.00013882061121772623,
      "loss": 3.2142,
      "step": 156799
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6747090816497803,
      "learning_rate": 0.0001388171611964148,
      "loss": 2.9706,
      "step": 156800
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8295955657958984,
      "learning_rate": 0.00013881371120507015,
      "loss": 2.6922,
      "step": 156801
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1246020793914795,
      "learning_rate": 0.00013881026124369324,
      "loss": 2.9306,
      "step": 156802
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.659001111984253,
      "learning_rate": 0.0001388068113122844,
      "loss": 3.064,
      "step": 156803
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7984976768493652,
      "learning_rate": 0.00013880336141084454,
      "loss": 2.764,
      "step": 156804
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1272389888763428,
      "learning_rate": 0.0001387999115393742,
      "loss": 2.8558,
      "step": 156805
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.329481363296509,
      "learning_rate": 0.00013879646169787397,
      "loss": 3.0407,
      "step": 156806
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.013333559036255,
      "learning_rate": 0.00013879301188634443,
      "loss": 2.9958,
      "step": 156807
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3477466106414795,
      "learning_rate": 0.0001387895621047864,
      "loss": 3.1785,
      "step": 156808
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.1117143630981445,
      "learning_rate": 0.00013878611235320035,
      "loss": 2.9659,
      "step": 156809
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7931718826293945,
      "learning_rate": 0.00013878266263158708,
      "loss": 3.0301,
      "step": 156810
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1006276607513428,
      "learning_rate": 0.00013877921293994707,
      "loss": 2.8914,
      "step": 156811
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6980230808258057,
      "learning_rate": 0.00013877576327828126,
      "loss": 2.7586,
      "step": 156812
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.404162645339966,
      "learning_rate": 0.00013877231364658988,
      "loss": 2.9595,
      "step": 156813
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.025449752807617,
      "learning_rate": 0.00013876886404487387,
      "loss": 2.9425,
      "step": 156814
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.41165828704834,
      "learning_rate": 0.00013876541447313363,
      "loss": 2.8863,
      "step": 156815
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.946613311767578,
      "learning_rate": 0.00013876196493137007,
      "loss": 3.0312,
      "step": 156816
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2381751537323,
      "learning_rate": 0.00013875851541958362,
      "loss": 2.9034,
      "step": 156817
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.29516339302063,
      "learning_rate": 0.0001387550659377752,
      "loss": 3.2023,
      "step": 156818
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.655487298965454,
      "learning_rate": 0.00013875161648594506,
      "loss": 2.8318,
      "step": 156819
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.720458030700684,
      "learning_rate": 0.00013874816706409415,
      "loss": 2.9508,
      "step": 156820
    },
    {
      "epoch": 2.04,
      "grad_norm": 5.134896755218506,
      "learning_rate": 0.00013874471767222284,
      "loss": 2.8772,
      "step": 156821
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7815239429473877,
      "learning_rate": 0.0001387412683103321,
      "loss": 3.1203,
      "step": 156822
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.008638620376587,
      "learning_rate": 0.00013873781897842227,
      "loss": 3.0287,
      "step": 156823
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5656957626342773,
      "learning_rate": 0.0001387343696764943,
      "loss": 3.1047,
      "step": 156824
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.3379571437835693,
      "learning_rate": 0.0001387309204045485,
      "loss": 2.8979,
      "step": 156825
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7166476249694824,
      "learning_rate": 0.00013872747116258576,
      "loss": 2.6633,
      "step": 156826
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.162790298461914,
      "learning_rate": 0.0001387240219506065,
      "loss": 3.02,
      "step": 156827
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3042819499969482,
      "learning_rate": 0.0001387205727686116,
      "loss": 2.9945,
      "step": 156828
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.383441209793091,
      "learning_rate": 0.0001387171236166015,
      "loss": 3.1612,
      "step": 156829
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.8128464221954346,
      "learning_rate": 0.0001387136744945771,
      "loss": 3.0214,
      "step": 156830
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.227174758911133,
      "learning_rate": 0.0001387102254025387,
      "loss": 3.1312,
      "step": 156831
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2794086933135986,
      "learning_rate": 0.00013870677634048723,
      "loss": 2.7905,
      "step": 156832
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.0456480979919434,
      "learning_rate": 0.00013870332730842307,
      "loss": 2.8012,
      "step": 156833
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.841172933578491,
      "learning_rate": 0.00013869987830634713,
      "loss": 3.0665,
      "step": 156834
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.637988567352295,
      "learning_rate": 0.00013869642933425985,
      "loss": 3.007,
      "step": 156835
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5992095470428467,
      "learning_rate": 0.00013869298039216214,
      "loss": 2.6475,
      "step": 156836
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.024705171585083,
      "learning_rate": 0.00013868953148005422,
      "loss": 2.986,
      "step": 156837
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.408250331878662,
      "learning_rate": 0.0001386860825979371,
      "loss": 2.8918,
      "step": 156838
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.677077293395996,
      "learning_rate": 0.00013868263374581114,
      "loss": 2.938,
      "step": 156839
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4986789226531982,
      "learning_rate": 0.00013867918492367726,
      "loss": 2.9509,
      "step": 156840
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.926426649093628,
      "learning_rate": 0.00013867573613153582,
      "loss": 3.0251,
      "step": 156841
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.2461118698120117,
      "learning_rate": 0.00013867228736938786,
      "loss": 3.0419,
      "step": 156842
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.8753273487091064,
      "learning_rate": 0.00013866883863723355,
      "loss": 2.981,
      "step": 156843
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.598883628845215,
      "learning_rate": 0.00013866538993507384,
      "loss": 2.8564,
      "step": 156844
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.398911714553833,
      "learning_rate": 0.00013866194126290916,
      "loss": 3.0316,
      "step": 156845
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1156699657440186,
      "learning_rate": 0.0001386584926207404,
      "loss": 2.8881,
      "step": 156846
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.0520246028900146,
      "learning_rate": 0.00013865504400856796,
      "loss": 3.137,
      "step": 156847
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.2423501014709473,
      "learning_rate": 0.0001386515954263928,
      "loss": 3.104,
      "step": 156848
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.52337908744812,
      "learning_rate": 0.00013864814687421516,
      "loss": 2.8944,
      "step": 156849
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3480172157287598,
      "learning_rate": 0.00013864469835203597,
      "loss": 2.9375,
      "step": 156850
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.71921443939209,
      "learning_rate": 0.00013864124985985565,
      "loss": 3.095,
      "step": 156851
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.610910415649414,
      "learning_rate": 0.00013863780139767508,
      "loss": 2.9777,
      "step": 156852
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1140670776367188,
      "learning_rate": 0.0001386343529654947,
      "loss": 2.627,
      "step": 156853
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7046828269958496,
      "learning_rate": 0.00013863090456331547,
      "loss": 3.036,
      "step": 156854
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.285647392272949,
      "learning_rate": 0.00013862745619113755,
      "loss": 2.8708,
      "step": 156855
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7333483695983887,
      "learning_rate": 0.00013862400784896194,
      "loss": 2.9385,
      "step": 156856
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.019587516784668,
      "learning_rate": 0.00013862055953678908,
      "loss": 2.9081,
      "step": 156857
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2762937545776367,
      "learning_rate": 0.0001386171112546198,
      "loss": 2.9815,
      "step": 156858
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.597166061401367,
      "learning_rate": 0.00013861366300245456,
      "loss": 2.8396,
      "step": 156859
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4269449710845947,
      "learning_rate": 0.0001386102147802943,
      "loss": 3.1068,
      "step": 156860
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1790266036987305,
      "learning_rate": 0.0001386067665881392,
      "loss": 2.9489,
      "step": 156861
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.691758632659912,
      "learning_rate": 0.00013860331842599027,
      "loss": 3.3027,
      "step": 156862
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.401174545288086,
      "learning_rate": 0.00013859987029384797,
      "loss": 3.0105,
      "step": 156863
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.510864496231079,
      "learning_rate": 0.00013859642219171304,
      "loss": 2.7266,
      "step": 156864
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.349850654602051,
      "learning_rate": 0.000138592974119586,
      "loss": 2.9377,
      "step": 156865
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5028231143951416,
      "learning_rate": 0.0001385895260774677,
      "loss": 2.9115,
      "step": 156866
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4036715030670166,
      "learning_rate": 0.00013858607806535863,
      "loss": 3.0597,
      "step": 156867
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5971567630767822,
      "learning_rate": 0.00013858263008325946,
      "loss": 2.9232,
      "step": 156868
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7610857486724854,
      "learning_rate": 0.00013857918213117072,
      "loss": 2.796,
      "step": 156869
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.58017897605896,
      "learning_rate": 0.00013857573420909328,
      "loss": 3.1003,
      "step": 156870
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.299997091293335,
      "learning_rate": 0.00013857228631702751,
      "loss": 3.1426,
      "step": 156871
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.177824974060059,
      "learning_rate": 0.00013856883845497436,
      "loss": 2.7969,
      "step": 156872
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.269644021987915,
      "learning_rate": 0.00013856539062293428,
      "loss": 2.9306,
      "step": 156873
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.358612537384033,
      "learning_rate": 0.0001385619428209078,
      "loss": 3.0607,
      "step": 156874
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.419687032699585,
      "learning_rate": 0.0001385584950488959,
      "loss": 2.8824,
      "step": 156875
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.2395100593566895,
      "learning_rate": 0.00013855504730689894,
      "loss": 3.0487,
      "step": 156876
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4925081729888916,
      "learning_rate": 0.00013855159959491758,
      "loss": 2.7871,
      "step": 156877
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.333327054977417,
      "learning_rate": 0.0001385481519129526,
      "loss": 2.9438,
      "step": 156878
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.489511013031006,
      "learning_rate": 0.0001385447042610046,
      "loss": 2.844,
      "step": 156879
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2468321323394775,
      "learning_rate": 0.00013854125663907406,
      "loss": 3.1199,
      "step": 156880
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8854565620422363,
      "learning_rate": 0.00013853780904716186,
      "loss": 2.7232,
      "step": 156881
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8983819484710693,
      "learning_rate": 0.00013853436148526853,
      "loss": 2.8342,
      "step": 156882
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.631077766418457,
      "learning_rate": 0.0001385309139533946,
      "loss": 3.0807,
      "step": 156883
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.538198947906494,
      "learning_rate": 0.00013852746645154094,
      "loss": 3.0796,
      "step": 156884
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1825637817382812,
      "learning_rate": 0.00013852401897970805,
      "loss": 2.8085,
      "step": 156885
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.660766124725342,
      "learning_rate": 0.00013852057153789649,
      "loss": 3.1228,
      "step": 156886
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.713909864425659,
      "learning_rate": 0.00013851712412610713,
      "loss": 2.908,
      "step": 156887
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.5721399784088135,
      "learning_rate": 0.00013851367674434033,
      "loss": 2.9929,
      "step": 156888
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1768972873687744,
      "learning_rate": 0.00013851022939259704,
      "loss": 3.012,
      "step": 156889
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.313901901245117,
      "learning_rate": 0.00013850678207087775,
      "loss": 2.9368,
      "step": 156890
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1457982063293457,
      "learning_rate": 0.00013850333477918308,
      "loss": 3.1498,
      "step": 156891
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1127946376800537,
      "learning_rate": 0.0001384998875175136,
      "loss": 2.9782,
      "step": 156892
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.567763328552246,
      "learning_rate": 0.00013849644028587012,
      "loss": 2.7923,
      "step": 156893
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2495269775390625,
      "learning_rate": 0.0001384929930842531,
      "loss": 3.0545,
      "step": 156894
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.320526599884033,
      "learning_rate": 0.0001384895459126634,
      "loss": 3.0222,
      "step": 156895
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.584524154663086,
      "learning_rate": 0.00013848609877110152,
      "loss": 3.104,
      "step": 156896
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.087197780609131,
      "learning_rate": 0.00013848265165956815,
      "loss": 2.973,
      "step": 156897
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7183079719543457,
      "learning_rate": 0.0001384792045780638,
      "loss": 3.2614,
      "step": 156898
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3606956005096436,
      "learning_rate": 0.00013847575752658933,
      "loss": 2.6823,
      "step": 156899
    },
    {
      "epoch": 2.04,
      "grad_norm": 1.9993799924850464,
      "learning_rate": 0.0001384723105051451,
      "loss": 3.0013,
      "step": 156900
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3760390281677246,
      "learning_rate": 0.00013846886351373205,
      "loss": 2.8047,
      "step": 156901
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2243010997772217,
      "learning_rate": 0.0001384654165523506,
      "loss": 2.8924,
      "step": 156902
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.229377031326294,
      "learning_rate": 0.0001384619696210017,
      "loss": 2.9382,
      "step": 156903
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.8018603324890137,
      "learning_rate": 0.0001384585227196855,
      "loss": 2.7512,
      "step": 156904
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.736353635787964,
      "learning_rate": 0.00013845507584840308,
      "loss": 3.1479,
      "step": 156905
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.082559108734131,
      "learning_rate": 0.00013845162900715476,
      "loss": 2.8546,
      "step": 156906
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.802510738372803,
      "learning_rate": 0.0001384481821959415,
      "loss": 2.9152,
      "step": 156907
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.491652727127075,
      "learning_rate": 0.0001384447354147636,
      "loss": 2.9047,
      "step": 156908
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.136610984802246,
      "learning_rate": 0.0001384412886636221,
      "loss": 2.7562,
      "step": 156909
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.532750129699707,
      "learning_rate": 0.0001384378419425172,
      "loss": 3.0781,
      "step": 156910
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.387744903564453,
      "learning_rate": 0.0001384343952514499,
      "loss": 2.9502,
      "step": 156911
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1809351444244385,
      "learning_rate": 0.00013843094859042056,
      "loss": 2.7886,
      "step": 156912
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.0341057777404785,
      "learning_rate": 0.00013842750195943007,
      "loss": 2.9787,
      "step": 156913
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6051137447357178,
      "learning_rate": 0.00013842405535847882,
      "loss": 2.9141,
      "step": 156914
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.752032518386841,
      "learning_rate": 0.00013842060878756785,
      "loss": 3.0692,
      "step": 156915
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3929357528686523,
      "learning_rate": 0.00013841716224669728,
      "loss": 2.8396,
      "step": 156916
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3742127418518066,
      "learning_rate": 0.00013841371573586816,
      "loss": 2.864,
      "step": 156917
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.0387752056121826,
      "learning_rate": 0.00013841026925508082,
      "loss": 2.92,
      "step": 156918
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4096171855926514,
      "learning_rate": 0.0001384068228043362,
      "loss": 3.0864,
      "step": 156919
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2633132934570312,
      "learning_rate": 0.0001384033763836347,
      "loss": 2.8868,
      "step": 156920
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.355024576187134,
      "learning_rate": 0.00013839992999297728,
      "loss": 2.8048,
      "step": 156921
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1054623126983643,
      "learning_rate": 0.00013839648363236413,
      "loss": 2.9161,
      "step": 156922
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8052423000335693,
      "learning_rate": 0.00013839303730179628,
      "loss": 3.0555,
      "step": 156923
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.7365005016326904,
      "learning_rate": 0.00013838959100127404,
      "loss": 2.8924,
      "step": 156924
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.67301869392395,
      "learning_rate": 0.00013838614473079836,
      "loss": 2.5803,
      "step": 156925
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7904839515686035,
      "learning_rate": 0.00013838269849036965,
      "loss": 2.8341,
      "step": 156926
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.7787182331085205,
      "learning_rate": 0.00013837925227998886,
      "loss": 2.8227,
      "step": 156927
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.2199056148529053,
      "learning_rate": 0.0001383758060996562,
      "loss": 3.199,
      "step": 156928
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.420506715774536,
      "learning_rate": 0.00013837235994937263,
      "loss": 2.7879,
      "step": 156929
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.314568042755127,
      "learning_rate": 0.0001383689138291386,
      "loss": 2.8609,
      "step": 156930
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9635825157165527,
      "learning_rate": 0.0001383654677389549,
      "loss": 2.9347,
      "step": 156931
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.628941297531128,
      "learning_rate": 0.00013836202167882204,
      "loss": 3.0823,
      "step": 156932
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9444010257720947,
      "learning_rate": 0.00013835857564874093,
      "loss": 2.918,
      "step": 156933
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.523775339126587,
      "learning_rate": 0.00013835512964871185,
      "loss": 3.1889,
      "step": 156934
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4264323711395264,
      "learning_rate": 0.00013835168367873563,
      "loss": 3.0291,
      "step": 156935
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3013670444488525,
      "learning_rate": 0.00013834823773881282,
      "loss": 2.993,
      "step": 156936
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.57232928276062,
      "learning_rate": 0.00013834479182894425,
      "loss": 2.9047,
      "step": 156937
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5439300537109375,
      "learning_rate": 0.0001383413459491303,
      "loss": 2.846,
      "step": 156938
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.116255521774292,
      "learning_rate": 0.00013833790009937181,
      "loss": 3.2289,
      "step": 156939
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1287992000579834,
      "learning_rate": 0.0001383344542796694,
      "loss": 3.0067,
      "step": 156940
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.6430160999298096,
      "learning_rate": 0.0001383310084900237,
      "loss": 2.7837,
      "step": 156941
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.856008291244507,
      "learning_rate": 0.00013832756273043516,
      "loss": 2.8623,
      "step": 156942
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.438049554824829,
      "learning_rate": 0.00013832411700090473,
      "loss": 3.132,
      "step": 156943
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1703858375549316,
      "learning_rate": 0.00013832067130143275,
      "loss": 3.0622,
      "step": 156944
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1537444591522217,
      "learning_rate": 0.00013831722563202012,
      "loss": 3.0946,
      "step": 156945
    },
    {
      "epoch": 2.04,
      "grad_norm": 1.8968775272369385,
      "learning_rate": 0.0001383137799926674,
      "loss": 3.2691,
      "step": 156946
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2824037075042725,
      "learning_rate": 0.00013831033438337518,
      "loss": 2.768,
      "step": 156947
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.302149534225464,
      "learning_rate": 0.000138306888804144,
      "loss": 2.7304,
      "step": 156948
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3650717735290527,
      "learning_rate": 0.00013830344325497476,
      "loss": 2.9913,
      "step": 156949
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1441922187805176,
      "learning_rate": 0.00013829999773586784,
      "loss": 3.0898,
      "step": 156950
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.181617021560669,
      "learning_rate": 0.00013829655224682408,
      "loss": 2.9724,
      "step": 156951
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.773927927017212,
      "learning_rate": 0.0001382931067878441,
      "loss": 2.7821,
      "step": 156952
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6882848739624023,
      "learning_rate": 0.00013828966135892847,
      "loss": 2.9782,
      "step": 156953
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.164531707763672,
      "learning_rate": 0.0001382862159600777,
      "loss": 2.8361,
      "step": 156954
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4749374389648438,
      "learning_rate": 0.0001382827705912927,
      "loss": 3.1058,
      "step": 156955
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7825217247009277,
      "learning_rate": 0.0001382793252525739,
      "loss": 2.7434,
      "step": 156956
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8021767139434814,
      "learning_rate": 0.0001382758799439221,
      "loss": 2.8028,
      "step": 156957
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.1912529468536377,
      "learning_rate": 0.0001382724346653379,
      "loss": 2.7052,
      "step": 156958
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5475049018859863,
      "learning_rate": 0.00013826898941682187,
      "loss": 3.0152,
      "step": 156959
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.341215133666992,
      "learning_rate": 0.00013826554419837461,
      "loss": 3.1581,
      "step": 156960
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.794822931289673,
      "learning_rate": 0.00013826209900999692,
      "loss": 2.7647,
      "step": 156961
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6461877822875977,
      "learning_rate": 0.0001382586538516893,
      "loss": 2.9924,
      "step": 156962
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.313267230987549,
      "learning_rate": 0.0001382552087234525,
      "loss": 3.1245,
      "step": 156963
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.0595290660858154,
      "learning_rate": 0.00013825176362528715,
      "loss": 2.7528,
      "step": 156964
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.740415096282959,
      "learning_rate": 0.00013824831855719372,
      "loss": 2.8433,
      "step": 156965
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.349851608276367,
      "learning_rate": 0.0001382448735191731,
      "loss": 2.8841,
      "step": 156966
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.0488381385803223,
      "learning_rate": 0.00013824142851122582,
      "loss": 2.798,
      "step": 156967
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.343748092651367,
      "learning_rate": 0.0001382379835333524,
      "loss": 2.9733,
      "step": 156968
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.605468988418579,
      "learning_rate": 0.00013823453858555367,
      "loss": 2.7475,
      "step": 156969
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.283393383026123,
      "learning_rate": 0.00013823109366783022,
      "loss": 2.738,
      "step": 156970
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.548142433166504,
      "learning_rate": 0.00013822764878018253,
      "loss": 2.7871,
      "step": 156971
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.3531501293182373,
      "learning_rate": 0.00013822420392261154,
      "loss": 2.9374,
      "step": 156972
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.356477975845337,
      "learning_rate": 0.00013822075909511756,
      "loss": 2.9941,
      "step": 156973
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4676668643951416,
      "learning_rate": 0.00013821731429770152,
      "loss": 3.0021,
      "step": 156974
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.487640619277954,
      "learning_rate": 0.00013821386953036396,
      "loss": 3.1259,
      "step": 156975
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3899078369140625,
      "learning_rate": 0.00013821042479310544,
      "loss": 3.0132,
      "step": 156976
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2059178352355957,
      "learning_rate": 0.00013820698008592657,
      "loss": 2.9972,
      "step": 156977
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.4587836265563965,
      "learning_rate": 0.0001382035354088282,
      "loss": 3.0142,
      "step": 156978
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3880443572998047,
      "learning_rate": 0.00013820009076181074,
      "loss": 3.1629,
      "step": 156979
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5673086643218994,
      "learning_rate": 0.00013819664614487503,
      "loss": 2.8551,
      "step": 156980
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.176530122756958,
      "learning_rate": 0.00013819320155802165,
      "loss": 2.9618,
      "step": 156981
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.975492477416992,
      "learning_rate": 0.00013818975700125117,
      "loss": 2.6384,
      "step": 156982
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.395077705383301,
      "learning_rate": 0.00013818631247456418,
      "loss": 2.8214,
      "step": 156983
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.861356019973755,
      "learning_rate": 0.00013818286797796152,
      "loss": 2.7145,
      "step": 156984
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.725155830383301,
      "learning_rate": 0.0001381794235114436,
      "loss": 3.056,
      "step": 156985
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.801942825317383,
      "learning_rate": 0.0001381759790750113,
      "loss": 3.0557,
      "step": 156986
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.168344020843506,
      "learning_rate": 0.00013817253466866502,
      "loss": 3.2027,
      "step": 156987
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.897573709487915,
      "learning_rate": 0.00013816909029240574,
      "loss": 3.0833,
      "step": 156988
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7995810508728027,
      "learning_rate": 0.00013816564594623367,
      "loss": 2.5841,
      "step": 156989
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.69148325920105,
      "learning_rate": 0.00013816220163014975,
      "loss": 3.0561,
      "step": 156990
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7474727630615234,
      "learning_rate": 0.00013815875734415445,
      "loss": 3.1019,
      "step": 156991
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.5540919303894043,
      "learning_rate": 0.0001381553130882486,
      "loss": 2.9733,
      "step": 156992
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.0920841693878174,
      "learning_rate": 0.0001381518688624326,
      "loss": 3.1935,
      "step": 156993
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.8908543586730957,
      "learning_rate": 0.00013814842466670745,
      "loss": 3.0179,
      "step": 156994
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.4665379524230957,
      "learning_rate": 0.00013814498050107336,
      "loss": 2.8375,
      "step": 156995
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.085634231567383,
      "learning_rate": 0.00013814153636553124,
      "loss": 2.7259,
      "step": 156996
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2781143188476562,
      "learning_rate": 0.0001381380922600816,
      "loss": 2.8881,
      "step": 156997
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5768706798553467,
      "learning_rate": 0.00013813464818472523,
      "loss": 2.8507,
      "step": 156998
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8547933101654053,
      "learning_rate": 0.00013813120413946257,
      "loss": 2.7412,
      "step": 156999
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.755311965942383,
      "learning_rate": 0.00013812776012429463,
      "loss": 3.1826,
      "step": 157000
    },
    {
      "epoch": 2.04,
      "grad_norm": 1.9856412410736084,
      "learning_rate": 0.00013812431613922157,
      "loss": 2.8832,
      "step": 157001
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3721654415130615,
      "learning_rate": 0.00013812087218424433,
      "loss": 2.9674,
      "step": 157002
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.0302677154541016,
      "learning_rate": 0.00013811742825936338,
      "loss": 3.225,
      "step": 157003
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5603115558624268,
      "learning_rate": 0.00013811398436457958,
      "loss": 2.6776,
      "step": 157004
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.118651390075684,
      "learning_rate": 0.00013811054049989331,
      "loss": 2.9712,
      "step": 157005
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7156896591186523,
      "learning_rate": 0.00013810709666530548,
      "loss": 2.8898,
      "step": 157006
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4777650833129883,
      "learning_rate": 0.00013810365286081656,
      "loss": 2.9631,
      "step": 157007
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.4129724502563477,
      "learning_rate": 0.00013810020908642726,
      "loss": 3.0004,
      "step": 157008
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1491875648498535,
      "learning_rate": 0.00013809676534213807,
      "loss": 3.1622,
      "step": 157009
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1612775325775146,
      "learning_rate": 0.00013809332162794985,
      "loss": 3.1081,
      "step": 157010
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8091628551483154,
      "learning_rate": 0.00013808987794386307,
      "loss": 3.118,
      "step": 157011
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.4475202560424805,
      "learning_rate": 0.0001380864342898785,
      "loss": 2.98,
      "step": 157012
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7807159423828125,
      "learning_rate": 0.00013808299066599676,
      "loss": 2.9341,
      "step": 157013
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5285797119140625,
      "learning_rate": 0.00013807954707221838,
      "loss": 2.6733,
      "step": 157014
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.741063356399536,
      "learning_rate": 0.00013807610350854402,
      "loss": 3.1002,
      "step": 157015
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.112938642501831,
      "learning_rate": 0.00013807265997497445,
      "loss": 2.9713,
      "step": 157016
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.27409291267395,
      "learning_rate": 0.00013806921647151015,
      "loss": 2.8497,
      "step": 157017
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.9520716667175293,
      "learning_rate": 0.0001380657729981519,
      "loss": 3.0215,
      "step": 157018
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.171881675720215,
      "learning_rate": 0.00013806232955490032,
      "loss": 3.2384,
      "step": 157019
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.941218137741089,
      "learning_rate": 0.000138058886141756,
      "loss": 3.0492,
      "step": 157020
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7494101524353027,
      "learning_rate": 0.00013805544275871946,
      "loss": 2.9405,
      "step": 157021
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.3984594345092773,
      "learning_rate": 0.00013805199940579161,
      "loss": 2.8428,
      "step": 157022
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.106871604919434,
      "learning_rate": 0.00013804855608297282,
      "loss": 2.8553,
      "step": 157023
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.346014499664307,
      "learning_rate": 0.000138045112790264,
      "loss": 2.7583,
      "step": 157024
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.403640031814575,
      "learning_rate": 0.0001380416695276656,
      "loss": 2.7023,
      "step": 157025
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.159276008605957,
      "learning_rate": 0.00013803822629517837,
      "loss": 2.8822,
      "step": 157026
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7581772804260254,
      "learning_rate": 0.00013803478309280275,
      "loss": 3.1308,
      "step": 157027
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2019059658050537,
      "learning_rate": 0.00013803133992053963,
      "loss": 3.0769,
      "step": 157028
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.20355486869812,
      "learning_rate": 0.0001380278967783894,
      "loss": 2.7945,
      "step": 157029
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7857563495635986,
      "learning_rate": 0.00013802445366635298,
      "loss": 2.9912,
      "step": 157030
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.727034568786621,
      "learning_rate": 0.0001380210105844309,
      "loss": 3.1121,
      "step": 157031
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.321682929992676,
      "learning_rate": 0.00013801756753262375,
      "loss": 3.3101,
      "step": 157032
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.844717264175415,
      "learning_rate": 0.00013801412451093208,
      "loss": 2.9457,
      "step": 157033
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.672593593597412,
      "learning_rate": 0.00013801068151935675,
      "loss": 2.9133,
      "step": 157034
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.361450433731079,
      "learning_rate": 0.0001380072385578982,
      "loss": 2.9584,
      "step": 157035
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6061694622039795,
      "learning_rate": 0.00013800379562655725,
      "loss": 2.7243,
      "step": 157036
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.439321994781494,
      "learning_rate": 0.00013800035272533445,
      "loss": 3.1083,
      "step": 157037
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.83919358253479,
      "learning_rate": 0.00013799690985423043,
      "loss": 2.7759,
      "step": 157038
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6441712379455566,
      "learning_rate": 0.00013799346701324578,
      "loss": 2.8108,
      "step": 157039
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.5616984367370605,
      "learning_rate": 0.00013799002420238128,
      "loss": 3.0132,
      "step": 157040
    },
    {
      "epoch": 2.04,
      "grad_norm": 4.987490177154541,
      "learning_rate": 0.0001379865814216374,
      "loss": 2.9135,
      "step": 157041
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7130894660949707,
      "learning_rate": 0.00013798313867101493,
      "loss": 3.1004,
      "step": 157042
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.341277837753296,
      "learning_rate": 0.00013797969595051452,
      "loss": 2.9703,
      "step": 157043
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6756250858306885,
      "learning_rate": 0.0001379762532601367,
      "loss": 2.8677,
      "step": 157044
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.070495128631592,
      "learning_rate": 0.00013797281059988207,
      "loss": 2.9748,
      "step": 157045
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.7381598949432373,
      "learning_rate": 0.00013796936796975145,
      "loss": 3.0406,
      "step": 157046
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8379342555999756,
      "learning_rate": 0.00013796592536974528,
      "loss": 2.6529,
      "step": 157047
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.2376439571380615,
      "learning_rate": 0.00013796248279986442,
      "loss": 3.0707,
      "step": 157048
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.250929832458496,
      "learning_rate": 0.0001379590402601094,
      "loss": 2.7077,
      "step": 157049
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.6677603721618652,
      "learning_rate": 0.0001379555977504807,
      "loss": 2.9166,
      "step": 157050
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.2279300689697266,
      "learning_rate": 0.00013795215527097923,
      "loss": 2.7189,
      "step": 157051
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.278585433959961,
      "learning_rate": 0.00013794871282160555,
      "loss": 3.0624,
      "step": 157052
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.724121570587158,
      "learning_rate": 0.0001379452704023601,
      "loss": 2.8348,
      "step": 157053
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.338928699493408,
      "learning_rate": 0.00013794182801324387,
      "loss": 3.0462,
      "step": 157054
    },
    {
      "epoch": 2.04,
      "grad_norm": 3.1930739879608154,
      "learning_rate": 0.00013793838565425727,
      "loss": 2.8349,
      "step": 157055
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.55983829498291,
      "learning_rate": 0.00013793494332540088,
      "loss": 3.0287,
      "step": 157056
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.591198682785034,
      "learning_rate": 0.00013793150102667554,
      "loss": 2.92,
      "step": 157057
    },
    {
      "epoch": 2.04,
      "grad_norm": 2.8664870262145996,
      "learning_rate": 0.0001379280587580818,
      "loss": 2.9486,
      "step": 157058
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5579988956451416,
      "learning_rate": 0.00013792461651962018,
      "loss": 2.9303,
      "step": 157059
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7689013481140137,
      "learning_rate": 0.00013792117431129155,
      "loss": 2.6883,
      "step": 157060
    },
    {
      "epoch": 2.05,
      "grad_norm": 5.1442646980285645,
      "learning_rate": 0.0001379177321330964,
      "loss": 2.9208,
      "step": 157061
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.963688373565674,
      "learning_rate": 0.00013791428998503535,
      "loss": 2.8232,
      "step": 157062
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4011340141296387,
      "learning_rate": 0.00013791084786710916,
      "loss": 3.1826,
      "step": 157063
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.7457826137542725,
      "learning_rate": 0.00013790740577931832,
      "loss": 2.8245,
      "step": 157064
    },
    {
      "epoch": 2.05,
      "grad_norm": 5.237119674682617,
      "learning_rate": 0.00013790396372166365,
      "loss": 2.9076,
      "step": 157065
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2341814041137695,
      "learning_rate": 0.0001379005216941457,
      "loss": 2.7196,
      "step": 157066
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.550818920135498,
      "learning_rate": 0.00013789707969676508,
      "loss": 3.0633,
      "step": 157067
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.499952793121338,
      "learning_rate": 0.0001378936377295224,
      "loss": 3.1787,
      "step": 157068
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.513868808746338,
      "learning_rate": 0.0001378901957924184,
      "loss": 3.0235,
      "step": 157069
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.4318642616271973,
      "learning_rate": 0.0001378867538854536,
      "loss": 3.0254,
      "step": 157070
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.581975221633911,
      "learning_rate": 0.0001378833120086288,
      "loss": 3.0901,
      "step": 157071
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.241481065750122,
      "learning_rate": 0.00013787987016194444,
      "loss": 2.7708,
      "step": 157072
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.679873466491699,
      "learning_rate": 0.0001378764283454014,
      "loss": 2.8504,
      "step": 157073
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.1080102920532227,
      "learning_rate": 0.0001378729865590002,
      "loss": 3.1221,
      "step": 157074
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.024731159210205,
      "learning_rate": 0.00013786954480274148,
      "loss": 3.1483,
      "step": 157075
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3333041667938232,
      "learning_rate": 0.00013786610307662574,
      "loss": 2.8046,
      "step": 157076
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.372490644454956,
      "learning_rate": 0.00013786266138065385,
      "loss": 3.0286,
      "step": 157077
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.525022506713867,
      "learning_rate": 0.00013785921971482624,
      "loss": 2.9797,
      "step": 157078
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.450068712234497,
      "learning_rate": 0.0001378557780791438,
      "loss": 2.8801,
      "step": 157079
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4025051593780518,
      "learning_rate": 0.00013785233647360703,
      "loss": 2.9476,
      "step": 157080
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2727274894714355,
      "learning_rate": 0.00013784889489821655,
      "loss": 3.0558,
      "step": 157081
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6465063095092773,
      "learning_rate": 0.0001378454533529729,
      "loss": 3.0339,
      "step": 157082
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6173512935638428,
      "learning_rate": 0.00013784201183787698,
      "loss": 2.7714,
      "step": 157083
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.950512647628784,
      "learning_rate": 0.00013783857035292916,
      "loss": 3.0423,
      "step": 157084
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.73415470123291,
      "learning_rate": 0.00013783512889813037,
      "loss": 2.9178,
      "step": 157085
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.395395517349243,
      "learning_rate": 0.00013783168747348104,
      "loss": 2.8464,
      "step": 157086
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.1935794353485107,
      "learning_rate": 0.00013782824607898187,
      "loss": 3.0636,
      "step": 157087
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3281192779541016,
      "learning_rate": 0.00013782480471463338,
      "loss": 3.1467,
      "step": 157088
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2488114833831787,
      "learning_rate": 0.00013782136338043644,
      "loss": 2.7896,
      "step": 157089
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2526748180389404,
      "learning_rate": 0.00013781792207639146,
      "loss": 2.8187,
      "step": 157090
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1254000663757324,
      "learning_rate": 0.00013781448080249926,
      "loss": 2.9589,
      "step": 157091
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.4425578117370605,
      "learning_rate": 0.00013781103955876048,
      "loss": 2.9158,
      "step": 157092
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.902719736099243,
      "learning_rate": 0.00013780759834517564,
      "loss": 3.0146,
      "step": 157093
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.562546491622925,
      "learning_rate": 0.00013780415716174534,
      "loss": 2.8484,
      "step": 157094
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.244478940963745,
      "learning_rate": 0.0001378007160084704,
      "loss": 3.1456,
      "step": 157095
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.716247081756592,
      "learning_rate": 0.0001377972748853513,
      "loss": 2.8934,
      "step": 157096
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.312568664550781,
      "learning_rate": 0.00013779383379238882,
      "loss": 2.7977,
      "step": 157097
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.314076900482178,
      "learning_rate": 0.00013779039272958352,
      "loss": 3.0481,
      "step": 157098
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.4997127056121826,
      "learning_rate": 0.00013778695169693606,
      "loss": 3.044,
      "step": 157099
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.698662757873535,
      "learning_rate": 0.00013778351069444698,
      "loss": 3.1324,
      "step": 157100
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.9509873390197754,
      "learning_rate": 0.00013778006972211708,
      "loss": 3.0369,
      "step": 157101
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.722166061401367,
      "learning_rate": 0.00013777662877994686,
      "loss": 3.0311,
      "step": 157102
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.92002010345459,
      "learning_rate": 0.0001377731878679371,
      "loss": 3.2649,
      "step": 157103
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.876619815826416,
      "learning_rate": 0.00013776974698608837,
      "loss": 3.1529,
      "step": 157104
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2211451530456543,
      "learning_rate": 0.00013776630613440133,
      "loss": 2.9083,
      "step": 157105
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4038987159729004,
      "learning_rate": 0.00013776286531287646,
      "loss": 2.9547,
      "step": 157106
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5646774768829346,
      "learning_rate": 0.00013775942452151467,
      "loss": 2.8933,
      "step": 157107
    },
    {
      "epoch": 2.05,
      "grad_norm": 6.223160266876221,
      "learning_rate": 0.00013775598376031636,
      "loss": 3.02,
      "step": 157108
    },
    {
      "epoch": 2.05,
      "grad_norm": 5.29569149017334,
      "learning_rate": 0.00013775254302928237,
      "loss": 3.0335,
      "step": 157109
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.882885217666626,
      "learning_rate": 0.00013774910232841324,
      "loss": 3.0492,
      "step": 157110
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.330129384994507,
      "learning_rate": 0.0001377456616577096,
      "loss": 3.2236,
      "step": 157111
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.961212158203125,
      "learning_rate": 0.000137742221017172,
      "loss": 2.9814,
      "step": 157112
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.141773223876953,
      "learning_rate": 0.00013773878040680134,
      "loss": 2.7512,
      "step": 157113
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.059708595275879,
      "learning_rate": 0.00013773533982659793,
      "loss": 2.7359,
      "step": 157114
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.965603351593018,
      "learning_rate": 0.00013773189927656273,
      "loss": 3.1148,
      "step": 157115
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1225616931915283,
      "learning_rate": 0.00013772845875669623,
      "loss": 2.899,
      "step": 157116
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.1903343200683594,
      "learning_rate": 0.00013772501826699908,
      "loss": 3.0367,
      "step": 157117
    },
    {
      "epoch": 2.05,
      "grad_norm": 5.259174823760986,
      "learning_rate": 0.00013772157780747178,
      "loss": 2.8604,
      "step": 157118
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.545369863510132,
      "learning_rate": 0.00013771813737811526,
      "loss": 2.7882,
      "step": 157119
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6389691829681396,
      "learning_rate": 0.00013771469697892983,
      "loss": 3.1324,
      "step": 157120
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.0686163902282715,
      "learning_rate": 0.00013771125660991645,
      "loss": 3.0618,
      "step": 157121
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.521920680999756,
      "learning_rate": 0.0001377078162710756,
      "loss": 3.1254,
      "step": 157122
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.893446445465088,
      "learning_rate": 0.0001377043759624079,
      "loss": 2.921,
      "step": 157123
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.383176326751709,
      "learning_rate": 0.00013770093568391396,
      "loss": 2.8869,
      "step": 157124
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.486743688583374,
      "learning_rate": 0.00013769749543559456,
      "loss": 2.896,
      "step": 157125
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.914501667022705,
      "learning_rate": 0.00013769405521745016,
      "loss": 3.0206,
      "step": 157126
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8624820709228516,
      "learning_rate": 0.0001376906150294816,
      "loss": 3.1115,
      "step": 157127
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6320042610168457,
      "learning_rate": 0.00013768717487168942,
      "loss": 3.0007,
      "step": 157128
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.431577682495117,
      "learning_rate": 0.00013768373474407427,
      "loss": 2.8203,
      "step": 157129
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.698662042617798,
      "learning_rate": 0.00013768029464663668,
      "loss": 3.0058,
      "step": 157130
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.405709743499756,
      "learning_rate": 0.00013767685457937744,
      "loss": 2.999,
      "step": 157131
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.865419387817383,
      "learning_rate": 0.00013767341454229708,
      "loss": 2.9217,
      "step": 157132
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.391079902648926,
      "learning_rate": 0.00013766997453539638,
      "loss": 2.8034,
      "step": 157133
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.0022356510162354,
      "learning_rate": 0.0001376665345586759,
      "loss": 2.9705,
      "step": 157134
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2781360149383545,
      "learning_rate": 0.00013766309461213619,
      "loss": 3.0966,
      "step": 157135
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2420873641967773,
      "learning_rate": 0.00013765965469577808,
      "loss": 2.7669,
      "step": 157136
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.1661288738250732,
      "learning_rate": 0.0001376562148096021,
      "loss": 2.8,
      "step": 157137
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.1217100620269775,
      "learning_rate": 0.00013765277495360873,
      "loss": 2.8444,
      "step": 157138
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.208185911178589,
      "learning_rate": 0.00013764933512779896,
      "loss": 3.0345,
      "step": 157139
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5441176891326904,
      "learning_rate": 0.00013764589533217308,
      "loss": 2.9151,
      "step": 157140
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.182004690170288,
      "learning_rate": 0.00013764245556673204,
      "loss": 2.886,
      "step": 157141
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.323781728744507,
      "learning_rate": 0.0001376390158314763,
      "loss": 2.8855,
      "step": 157142
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2053191661834717,
      "learning_rate": 0.00013763557612640656,
      "loss": 2.81,
      "step": 157143
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.0945215225219727,
      "learning_rate": 0.0001376321364515233,
      "loss": 3.0452,
      "step": 157144
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.597386121749878,
      "learning_rate": 0.00013762869680682745,
      "loss": 2.9946,
      "step": 157145
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6393773555755615,
      "learning_rate": 0.00013762525719231933,
      "loss": 3.0447,
      "step": 157146
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.330639362335205,
      "learning_rate": 0.00013762181760799987,
      "loss": 2.8451,
      "step": 157147
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8212389945983887,
      "learning_rate": 0.00013761837805386954,
      "loss": 2.8864,
      "step": 157148
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.500605344772339,
      "learning_rate": 0.00013761493852992894,
      "loss": 2.9122,
      "step": 157149
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.96675968170166,
      "learning_rate": 0.0001376114990361789,
      "loss": 2.8491,
      "step": 157150
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.033369541168213,
      "learning_rate": 0.00013760805957261998,
      "loss": 2.9339,
      "step": 157151
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3693623542785645,
      "learning_rate": 0.00013760462013925263,
      "loss": 3.0048,
      "step": 157152
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3923587799072266,
      "learning_rate": 0.00013760118073607777,
      "loss": 2.8809,
      "step": 157153
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.34108829498291,
      "learning_rate": 0.0001375977413630959,
      "loss": 3.0,
      "step": 157154
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1975901126861572,
      "learning_rate": 0.00013759430202030758,
      "loss": 3.1465,
      "step": 157155
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8950507640838623,
      "learning_rate": 0.00013759086270771366,
      "loss": 2.8915,
      "step": 157156
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.30055046081543,
      "learning_rate": 0.00013758742342531455,
      "loss": 2.8399,
      "step": 157157
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.272343158721924,
      "learning_rate": 0.0001375839841731111,
      "loss": 2.9421,
      "step": 157158
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.0127112865448,
      "learning_rate": 0.00013758054495110388,
      "loss": 2.8848,
      "step": 157159
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.375922679901123,
      "learning_rate": 0.0001375771057592935,
      "loss": 2.7633,
      "step": 157160
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.443683385848999,
      "learning_rate": 0.00013757366659768048,
      "loss": 2.7552,
      "step": 157161
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.168224811553955,
      "learning_rate": 0.00013757022746626568,
      "loss": 3.0077,
      "step": 157162
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.949716091156006,
      "learning_rate": 0.00013756678836504953,
      "loss": 2.9222,
      "step": 157163
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.025867462158203,
      "learning_rate": 0.0001375633492940329,
      "loss": 2.9526,
      "step": 157164
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8560454845428467,
      "learning_rate": 0.00013755991025321632,
      "loss": 3.0129,
      "step": 157165
    },
    {
      "epoch": 2.05,
      "grad_norm": 5.027534008026123,
      "learning_rate": 0.00013755647124260044,
      "loss": 3.0473,
      "step": 157166
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.9505271911621094,
      "learning_rate": 0.00013755303226218574,
      "loss": 2.9261,
      "step": 157167
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2860169410705566,
      "learning_rate": 0.0001375495933119731,
      "loss": 3.031,
      "step": 157168
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.3278841972351074,
      "learning_rate": 0.00013754615439196295,
      "loss": 3.0071,
      "step": 157169
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.230005741119385,
      "learning_rate": 0.00013754271550215616,
      "loss": 2.8819,
      "step": 157170
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.2575435638427734,
      "learning_rate": 0.0001375392766425532,
      "loss": 2.8976,
      "step": 157171
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3388161659240723,
      "learning_rate": 0.0001375358378131548,
      "loss": 2.9281,
      "step": 157172
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.169769525527954,
      "learning_rate": 0.00013753239901396144,
      "loss": 3.0581,
      "step": 157173
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6501455307006836,
      "learning_rate": 0.00013752896024497402,
      "loss": 3.0266,
      "step": 157174
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5374014377593994,
      "learning_rate": 0.00013752552150619285,
      "loss": 3.1557,
      "step": 157175
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.169501781463623,
      "learning_rate": 0.00013752208279761892,
      "loss": 3.0711,
      "step": 157176
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.093217372894287,
      "learning_rate": 0.0001375186441192527,
      "loss": 3.0702,
      "step": 157177
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4088199138641357,
      "learning_rate": 0.00013751520547109476,
      "loss": 2.9794,
      "step": 157178
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.163825035095215,
      "learning_rate": 0.00013751176685314578,
      "loss": 3.0064,
      "step": 157179
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1885974407196045,
      "learning_rate": 0.0001375083282654065,
      "loss": 3.0194,
      "step": 157180
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2574307918548584,
      "learning_rate": 0.0001375048897078774,
      "loss": 3.0082,
      "step": 157181
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.6354947090148926,
      "learning_rate": 0.00013750145118055927,
      "loss": 2.9256,
      "step": 157182
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1632165908813477,
      "learning_rate": 0.00013749801268345273,
      "loss": 2.8762,
      "step": 157183
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.097367763519287,
      "learning_rate": 0.00013749457421655838,
      "loss": 2.8645,
      "step": 157184
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.0827829837799072,
      "learning_rate": 0.00013749113577987674,
      "loss": 3.0744,
      "step": 157185
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4565365314483643,
      "learning_rate": 0.0001374876973734087,
      "loss": 2.8925,
      "step": 157186
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.641339063644409,
      "learning_rate": 0.00013748425899715462,
      "loss": 2.7304,
      "step": 157187
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.318756103515625,
      "learning_rate": 0.0001374808206511154,
      "loss": 3.0728,
      "step": 157188
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1501693725585938,
      "learning_rate": 0.00013747738233529157,
      "loss": 2.7423,
      "step": 157189
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2160940170288086,
      "learning_rate": 0.00013747394404968374,
      "loss": 2.9824,
      "step": 157190
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2890372276306152,
      "learning_rate": 0.00013747050579429249,
      "loss": 2.7592,
      "step": 157191
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7103660106658936,
      "learning_rate": 0.00013746706756911865,
      "loss": 2.88,
      "step": 157192
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.1752405166625977,
      "learning_rate": 0.00013746362937416262,
      "loss": 2.8606,
      "step": 157193
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.9400572776794434,
      "learning_rate": 0.0001374601912094253,
      "loss": 3.0199,
      "step": 157194
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.5609488487243652,
      "learning_rate": 0.00013745675307490717,
      "loss": 2.9851,
      "step": 157195
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.1313130855560303,
      "learning_rate": 0.00013745331497060892,
      "loss": 3.2093,
      "step": 157196
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6695735454559326,
      "learning_rate": 0.00013744987689653105,
      "loss": 3.0472,
      "step": 157197
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.8681347370147705,
      "learning_rate": 0.00013744643885267444,
      "loss": 3.1123,
      "step": 157198
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.722792387008667,
      "learning_rate": 0.0001374430008390395,
      "loss": 2.9709,
      "step": 157199
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4975697994232178,
      "learning_rate": 0.00013743956285562707,
      "loss": 2.8552,
      "step": 157200
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.259653091430664,
      "learning_rate": 0.00013743612490243768,
      "loss": 2.8256,
      "step": 157201
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4184458255767822,
      "learning_rate": 0.000137432686979472,
      "loss": 3.138,
      "step": 157202
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5186710357666016,
      "learning_rate": 0.00013742924908673055,
      "loss": 2.7504,
      "step": 157203
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.2345218658447266,
      "learning_rate": 0.00013742581122421415,
      "loss": 2.9411,
      "step": 157204
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4400064945220947,
      "learning_rate": 0.00013742237339192326,
      "loss": 2.8494,
      "step": 157205
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4699044227600098,
      "learning_rate": 0.00013741893558985875,
      "loss": 3.2639,
      "step": 157206
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.390828847885132,
      "learning_rate": 0.00013741549781802102,
      "loss": 2.8227,
      "step": 157207
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1704699993133545,
      "learning_rate": 0.00013741206007641103,
      "loss": 3.0361,
      "step": 157208
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4485723972320557,
      "learning_rate": 0.00013740862236502898,
      "loss": 3.0442,
      "step": 157209
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6000478267669678,
      "learning_rate": 0.00013740518468387586,
      "loss": 3.0836,
      "step": 157210
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.0631401538848877,
      "learning_rate": 0.00013740174703295204,
      "loss": 2.8967,
      "step": 157211
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2121074199676514,
      "learning_rate": 0.00013739830941225846,
      "loss": 2.9383,
      "step": 157212
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3359246253967285,
      "learning_rate": 0.00013739487182179547,
      "loss": 2.8448,
      "step": 157213
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.624248743057251,
      "learning_rate": 0.00013739143426156407,
      "loss": 3.1411,
      "step": 157214
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.099973201751709,
      "learning_rate": 0.00013738799673156444,
      "loss": 3.0687,
      "step": 157215
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.805635929107666,
      "learning_rate": 0.0001373845592317976,
      "loss": 2.8876,
      "step": 157216
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.169041156768799,
      "learning_rate": 0.00013738112176226388,
      "loss": 2.974,
      "step": 157217
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3368570804595947,
      "learning_rate": 0.00013737768432296423,
      "loss": 3.1182,
      "step": 157218
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.470686197280884,
      "learning_rate": 0.000137374246913899,
      "loss": 2.8576,
      "step": 157219
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2226593494415283,
      "learning_rate": 0.00013737080953506905,
      "loss": 2.8215,
      "step": 157220
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.189723014831543,
      "learning_rate": 0.000137367372186475,
      "loss": 2.9466,
      "step": 157221
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.437110662460327,
      "learning_rate": 0.00013736393486811737,
      "loss": 2.8341,
      "step": 157222
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4947855472564697,
      "learning_rate": 0.0001373604975799968,
      "loss": 2.8075,
      "step": 157223
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.708098888397217,
      "learning_rate": 0.00013735706032211406,
      "loss": 2.8085,
      "step": 157224
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.370901584625244,
      "learning_rate": 0.0001373536230944696,
      "loss": 2.9307,
      "step": 157225
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.615724563598633,
      "learning_rate": 0.0001373501858970643,
      "loss": 2.8831,
      "step": 157226
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.238466501235962,
      "learning_rate": 0.00013734674872989863,
      "loss": 3.0108,
      "step": 157227
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.286536455154419,
      "learning_rate": 0.00013734331159297332,
      "loss": 3.0743,
      "step": 157228
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.40910267829895,
      "learning_rate": 0.00013733987448628882,
      "loss": 3.0488,
      "step": 157229
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.788649797439575,
      "learning_rate": 0.00013733643740984603,
      "loss": 3.0747,
      "step": 157230
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1445164680480957,
      "learning_rate": 0.00013733300036364535,
      "loss": 2.7667,
      "step": 157231
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7642667293548584,
      "learning_rate": 0.00013732956334768764,
      "loss": 2.7963,
      "step": 157232
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4403297901153564,
      "learning_rate": 0.00013732612636197344,
      "loss": 2.8266,
      "step": 157233
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6339831352233887,
      "learning_rate": 0.0001373226894065033,
      "loss": 2.9088,
      "step": 157234
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.170520782470703,
      "learning_rate": 0.00013731925248127802,
      "loss": 2.9096,
      "step": 157235
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.086587905883789,
      "learning_rate": 0.00013731581558629817,
      "loss": 2.8456,
      "step": 157236
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.191329002380371,
      "learning_rate": 0.00013731237872156428,
      "loss": 3.026,
      "step": 157237
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3401663303375244,
      "learning_rate": 0.00013730894188707717,
      "loss": 2.914,
      "step": 157238
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.8506431579589844,
      "learning_rate": 0.00013730550508283745,
      "loss": 2.8903,
      "step": 157239
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.6655890941619873,
      "learning_rate": 0.00013730206830884554,
      "loss": 2.8581,
      "step": 157240
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.3716607093811035,
      "learning_rate": 0.00013729863156510238,
      "loss": 2.9098,
      "step": 157241
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.331641674041748,
      "learning_rate": 0.00013729519485160851,
      "loss": 3.1046,
      "step": 157242
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.329096555709839,
      "learning_rate": 0.00013729175816836438,
      "loss": 3.0856,
      "step": 157243
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.569147825241089,
      "learning_rate": 0.0001372883215153709,
      "loss": 2.8446,
      "step": 157244
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.069347381591797,
      "learning_rate": 0.00013728488489262862,
      "loss": 3.2055,
      "step": 157245
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.459493398666382,
      "learning_rate": 0.000137281448300138,
      "loss": 3.1422,
      "step": 157246
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2191007137298584,
      "learning_rate": 0.0001372780117379,
      "loss": 3.2297,
      "step": 157247
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.518749237060547,
      "learning_rate": 0.0001372745752059149,
      "loss": 3.1475,
      "step": 157248
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.097416877746582,
      "learning_rate": 0.0001372711387041837,
      "loss": 2.8191,
      "step": 157249
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8618385791778564,
      "learning_rate": 0.00013726770223270685,
      "loss": 2.999,
      "step": 157250
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3236751556396484,
      "learning_rate": 0.00013726426579148502,
      "loss": 2.9383,
      "step": 157251
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.521761178970337,
      "learning_rate": 0.0001372608293805187,
      "loss": 3.018,
      "step": 157252
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.551100254058838,
      "learning_rate": 0.00013725739299980878,
      "loss": 3.0559,
      "step": 157253
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.967606544494629,
      "learning_rate": 0.00013725395664935567,
      "loss": 2.818,
      "step": 157254
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.3637642860412598,
      "learning_rate": 0.00013725052032916024,
      "loss": 3.0828,
      "step": 157255
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1638505458831787,
      "learning_rate": 0.00013724708403922302,
      "loss": 3.0275,
      "step": 157256
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.073354482650757,
      "learning_rate": 0.00013724364777954463,
      "loss": 2.9382,
      "step": 157257
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.132045030593872,
      "learning_rate": 0.00013724021155012557,
      "loss": 2.8613,
      "step": 157258
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.73020076751709,
      "learning_rate": 0.0001372367753509668,
      "loss": 3.0166,
      "step": 157259
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7193763256073,
      "learning_rate": 0.00013723333918206864,
      "loss": 3.0546,
      "step": 157260
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4228131771087646,
      "learning_rate": 0.000137229903043432,
      "loss": 3.0579,
      "step": 157261
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.266650438308716,
      "learning_rate": 0.00013722646693505735,
      "loss": 2.9796,
      "step": 157262
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4341981410980225,
      "learning_rate": 0.00013722303085694543,
      "loss": 2.9954,
      "step": 157263
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5640149116516113,
      "learning_rate": 0.0001372195948090967,
      "loss": 2.7388,
      "step": 157264
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5313220024108887,
      "learning_rate": 0.000137216158791512,
      "loss": 3.0911,
      "step": 157265
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.484994649887085,
      "learning_rate": 0.0001372127228041918,
      "loss": 3.1288,
      "step": 157266
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.120929718017578,
      "learning_rate": 0.00013720928684713693,
      "loss": 3.1555,
      "step": 157267
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7591607570648193,
      "learning_rate": 0.00013720585092034794,
      "loss": 3.0677,
      "step": 157268
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7207448482513428,
      "learning_rate": 0.00013720241502382545,
      "loss": 3.1186,
      "step": 157269
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7804012298583984,
      "learning_rate": 0.00013719897915757,
      "loss": 2.9247,
      "step": 157270
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.277118444442749,
      "learning_rate": 0.00013719554332158242,
      "loss": 2.8852,
      "step": 157271
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.737335681915283,
      "learning_rate": 0.00013719210751586317,
      "loss": 2.9478,
      "step": 157272
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.4679059982299805,
      "learning_rate": 0.00013718867174041308,
      "loss": 2.8188,
      "step": 157273
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.612919807434082,
      "learning_rate": 0.0001371852359952326,
      "loss": 2.8226,
      "step": 157274
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8625223636627197,
      "learning_rate": 0.00013718180028032264,
      "loss": 3.0288,
      "step": 157275
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.242901086807251,
      "learning_rate": 0.00013717836459568346,
      "loss": 2.9704,
      "step": 157276
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.564480304718018,
      "learning_rate": 0.00013717492894131604,
      "loss": 2.9537,
      "step": 157277
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4144222736358643,
      "learning_rate": 0.0001371714933172207,
      "loss": 2.9042,
      "step": 157278
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.319533109664917,
      "learning_rate": 0.00013716805772339842,
      "loss": 2.9204,
      "step": 157279
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.9282474517822266,
      "learning_rate": 0.0001371646221598495,
      "loss": 2.9314,
      "step": 157280
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.439603328704834,
      "learning_rate": 0.00013716118662657504,
      "loss": 2.9204,
      "step": 157281
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5732216835021973,
      "learning_rate": 0.00013715775112357512,
      "loss": 2.8291,
      "step": 157282
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.0453875064849854,
      "learning_rate": 0.00013715431565085076,
      "loss": 2.7728,
      "step": 157283
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.9286742210388184,
      "learning_rate": 0.0001371508802084024,
      "loss": 2.8937,
      "step": 157284
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.490210056304932,
      "learning_rate": 0.00013714744479623086,
      "loss": 2.903,
      "step": 157285
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5016655921936035,
      "learning_rate": 0.00013714400941433657,
      "loss": 3.022,
      "step": 157286
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.604635000228882,
      "learning_rate": 0.00013714057406272054,
      "loss": 3.0223,
      "step": 157287
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.355760335922241,
      "learning_rate": 0.0001371371387413829,
      "loss": 2.9372,
      "step": 157288
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.6862025260925293,
      "learning_rate": 0.00013713370345032466,
      "loss": 3.1025,
      "step": 157289
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.743669271469116,
      "learning_rate": 0.0001371302681895462,
      "loss": 3.1195,
      "step": 157290
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.049642562866211,
      "learning_rate": 0.00013712683295904842,
      "loss": 3.1003,
      "step": 157291
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.2518184185028076,
      "learning_rate": 0.00013712339775883174,
      "loss": 2.9477,
      "step": 157292
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.624791145324707,
      "learning_rate": 0.00013711996258889713,
      "loss": 2.7842,
      "step": 157293
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7489211559295654,
      "learning_rate": 0.00013711652744924474,
      "loss": 2.9578,
      "step": 157294
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.800469160079956,
      "learning_rate": 0.0001371130923398756,
      "loss": 3.0296,
      "step": 157295
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.6787683963775635,
      "learning_rate": 0.00013710965726079007,
      "loss": 2.9208,
      "step": 157296
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.9737207889556885,
      "learning_rate": 0.00013710622221198906,
      "loss": 2.648,
      "step": 157297
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.582106590270996,
      "learning_rate": 0.00013710278719347295,
      "loss": 2.7758,
      "step": 157298
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.56252121925354,
      "learning_rate": 0.00013709935220524276,
      "loss": 3.083,
      "step": 157299
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.311955213546753,
      "learning_rate": 0.00013709591724729862,
      "loss": 3.1176,
      "step": 157300
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4267868995666504,
      "learning_rate": 0.00013709248231964154,
      "loss": 2.8578,
      "step": 157301
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1819043159484863,
      "learning_rate": 0.000137089047422272,
      "loss": 3.0671,
      "step": 157302
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.0136921405792236,
      "learning_rate": 0.00013708561255519071,
      "loss": 2.9161,
      "step": 157303
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.600616216659546,
      "learning_rate": 0.0001370821777183982,
      "loss": 2.8506,
      "step": 157304
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4857659339904785,
      "learning_rate": 0.0001370787429118954,
      "loss": 2.856,
      "step": 157305
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.3235464096069336,
      "learning_rate": 0.0001370753081356825,
      "loss": 2.704,
      "step": 157306
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5151538848876953,
      "learning_rate": 0.00013707187338976053,
      "loss": 2.9371,
      "step": 157307
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.215273857116699,
      "learning_rate": 0.00013706843867412983,
      "loss": 2.9688,
      "step": 157308
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4830949306488037,
      "learning_rate": 0.0001370650039887913,
      "loss": 2.9651,
      "step": 157309
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.699000835418701,
      "learning_rate": 0.00013706156933374536,
      "loss": 2.8135,
      "step": 157310
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1295523643493652,
      "learning_rate": 0.00013705813470899286,
      "loss": 3.0912,
      "step": 157311
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.553027391433716,
      "learning_rate": 0.00013705470011453433,
      "loss": 3.0726,
      "step": 157312
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1402316093444824,
      "learning_rate": 0.0001370512655503704,
      "loss": 2.9751,
      "step": 157313
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2442471981048584,
      "learning_rate": 0.00013704783101650157,
      "loss": 2.8485,
      "step": 157314
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.401176929473877,
      "learning_rate": 0.0001370443965129288,
      "loss": 2.9336,
      "step": 157315
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1335906982421875,
      "learning_rate": 0.0001370409620396524,
      "loss": 3.304,
      "step": 157316
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5550448894500732,
      "learning_rate": 0.00013703752759667328,
      "loss": 2.9687,
      "step": 157317
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2687509059906006,
      "learning_rate": 0.00013703409318399198,
      "loss": 2.6505,
      "step": 157318
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.198220729827881,
      "learning_rate": 0.00013703065880160898,
      "loss": 2.7303,
      "step": 157319
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.9882736206054688,
      "learning_rate": 0.00013702722444952517,
      "loss": 2.7925,
      "step": 157320
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2635293006896973,
      "learning_rate": 0.0001370237901277411,
      "loss": 3.0043,
      "step": 157321
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.4650752544403076,
      "learning_rate": 0.00013702035583625726,
      "loss": 3.0019,
      "step": 157322
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.901857376098633,
      "learning_rate": 0.0001370169215750745,
      "loss": 3.0593,
      "step": 157323
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.955209493637085,
      "learning_rate": 0.0001370134873441934,
      "loss": 3.0318,
      "step": 157324
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7892274856567383,
      "learning_rate": 0.00013701005314361447,
      "loss": 3.1045,
      "step": 157325
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3634817600250244,
      "learning_rate": 0.00013700661897333852,
      "loss": 2.9864,
      "step": 157326
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.947697639465332,
      "learning_rate": 0.00013700318483336613,
      "loss": 3.0478,
      "step": 157327
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.344297409057617,
      "learning_rate": 0.00013699975072369781,
      "loss": 2.9232,
      "step": 157328
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3526928424835205,
      "learning_rate": 0.00013699631664433444,
      "loss": 3.0361,
      "step": 157329
    },
    {
      "epoch": 2.05,
      "grad_norm": 1.8895293474197388,
      "learning_rate": 0.00013699288259527652,
      "loss": 3.0956,
      "step": 157330
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.467917442321777,
      "learning_rate": 0.0001369894485765246,
      "loss": 3.1185,
      "step": 157331
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.590524911880493,
      "learning_rate": 0.00013698601458807954,
      "loss": 2.8168,
      "step": 157332
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.098914861679077,
      "learning_rate": 0.00013698258062994173,
      "loss": 2.9948,
      "step": 157333
    },
    {
      "epoch": 2.05,
      "grad_norm": 1.8416190147399902,
      "learning_rate": 0.00013697914670211204,
      "loss": 3.2185,
      "step": 157334
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.422455310821533,
      "learning_rate": 0.000136975712804591,
      "loss": 2.9394,
      "step": 157335
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.9564733505249023,
      "learning_rate": 0.00013697227893737926,
      "loss": 3.2464,
      "step": 157336
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8676133155822754,
      "learning_rate": 0.00013696884510047734,
      "loss": 2.8498,
      "step": 157337
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.05289888381958,
      "learning_rate": 0.00013696541129388612,
      "loss": 2.8389,
      "step": 157338
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1401500701904297,
      "learning_rate": 0.00013696197751760596,
      "loss": 2.8431,
      "step": 157339
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7139415740966797,
      "learning_rate": 0.00013695854377163776,
      "loss": 2.94,
      "step": 157340
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.0766615867614746,
      "learning_rate": 0.0001369551100559821,
      "loss": 2.7692,
      "step": 157341
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.324463367462158,
      "learning_rate": 0.0001369516763706395,
      "loss": 2.9067,
      "step": 157342
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5553362369537354,
      "learning_rate": 0.0001369482427156106,
      "loss": 3.1983,
      "step": 157343
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4231958389282227,
      "learning_rate": 0.00013694480909089618,
      "loss": 3.0544,
      "step": 157344
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7618093490600586,
      "learning_rate": 0.00013694137549649665,
      "loss": 2.9125,
      "step": 157345
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.732520580291748,
      "learning_rate": 0.000136937941932413,
      "loss": 3.0772,
      "step": 157346
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2120373249053955,
      "learning_rate": 0.00013693450839864546,
      "loss": 2.7891,
      "step": 157347
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6245009899139404,
      "learning_rate": 0.00013693107489519516,
      "loss": 2.7737,
      "step": 157348
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2360379695892334,
      "learning_rate": 0.00013692764142206218,
      "loss": 3.0915,
      "step": 157349
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.121236562728882,
      "learning_rate": 0.00013692420797924759,
      "loss": 3.0487,
      "step": 157350
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7646474838256836,
      "learning_rate": 0.0001369207745667517,
      "loss": 2.7513,
      "step": 157351
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4460301399230957,
      "learning_rate": 0.00013691734118457546,
      "loss": 3.1696,
      "step": 157352
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.446514129638672,
      "learning_rate": 0.00013691390783271927,
      "loss": 2.8716,
      "step": 157353
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.64827036857605,
      "learning_rate": 0.00013691047451118408,
      "loss": 2.8036,
      "step": 157354
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3573005199432373,
      "learning_rate": 0.00013690704121997006,
      "loss": 2.8605,
      "step": 157355
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6137168407440186,
      "learning_rate": 0.00013690360795907822,
      "loss": 3.067,
      "step": 157356
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6587576866149902,
      "learning_rate": 0.000136900174728509,
      "loss": 2.9312,
      "step": 157357
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.326380968093872,
      "learning_rate": 0.0001368967415282632,
      "loss": 2.9135,
      "step": 157358
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.0539793968200684,
      "learning_rate": 0.00013689330835834124,
      "loss": 3.0685,
      "step": 157359
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.456296920776367,
      "learning_rate": 0.00013688987521874417,
      "loss": 2.7164,
      "step": 157360
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.078183650970459,
      "learning_rate": 0.0001368864421094721,
      "loss": 2.8595,
      "step": 157361
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2577321529388428,
      "learning_rate": 0.00013688300903052603,
      "loss": 3.1378,
      "step": 157362
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.05747127532959,
      "learning_rate": 0.00013687957598190636,
      "loss": 2.8095,
      "step": 157363
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4136106967926025,
      "learning_rate": 0.00013687614296361398,
      "loss": 2.9942,
      "step": 157364
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.080071210861206,
      "learning_rate": 0.00013687270997564933,
      "loss": 2.7522,
      "step": 157365
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.784315824508667,
      "learning_rate": 0.00013686927701801329,
      "loss": 2.6741,
      "step": 157366
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6490869522094727,
      "learning_rate": 0.00013686584409070614,
      "loss": 3.132,
      "step": 157367
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6977169513702393,
      "learning_rate": 0.00013686241119372882,
      "loss": 3.0977,
      "step": 157368
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.432129383087158,
      "learning_rate": 0.00013685897832708174,
      "loss": 3.1855,
      "step": 157369
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4372713565826416,
      "learning_rate": 0.00013685554549076577,
      "loss": 3.0013,
      "step": 157370
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.274559736251831,
      "learning_rate": 0.0001368521126847813,
      "loss": 2.8051,
      "step": 157371
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.318310260772705,
      "learning_rate": 0.0001368486799091293,
      "loss": 2.933,
      "step": 157372
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1165599822998047,
      "learning_rate": 0.00013684524716381006,
      "loss": 3.098,
      "step": 157373
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.211609125137329,
      "learning_rate": 0.00013684181444882445,
      "loss": 2.7781,
      "step": 157374
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.377351999282837,
      "learning_rate": 0.0001368383817641729,
      "loss": 2.6994,
      "step": 157375
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7223825454711914,
      "learning_rate": 0.0001368349491098563,
      "loss": 2.9871,
      "step": 157376
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.598419427871704,
      "learning_rate": 0.00013683151648587506,
      "loss": 2.8758,
      "step": 157377
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.040031909942627,
      "learning_rate": 0.00013682808389223013,
      "loss": 3.2931,
      "step": 157378
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1805379390716553,
      "learning_rate": 0.00013682465132892172,
      "loss": 2.7622,
      "step": 157379
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.360508918762207,
      "learning_rate": 0.0001368212187959508,
      "loss": 2.9587,
      "step": 157380
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.310733795166016,
      "learning_rate": 0.00013681778629331777,
      "loss": 2.9018,
      "step": 157381
    },
    {
      "epoch": 2.05,
      "grad_norm": 1.9500850439071655,
      "learning_rate": 0.00013681435382102355,
      "loss": 2.9892,
      "step": 157382
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.495196580886841,
      "learning_rate": 0.0001368109213790685,
      "loss": 2.9614,
      "step": 157383
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5782837867736816,
      "learning_rate": 0.00013680748896745354,
      "loss": 2.9449,
      "step": 157384
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.0004806518554688,
      "learning_rate": 0.00013680405658617897,
      "loss": 2.9957,
      "step": 157385
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.434483289718628,
      "learning_rate": 0.00013680062423524573,
      "loss": 2.771,
      "step": 157386
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.15384840965271,
      "learning_rate": 0.00013679719191465422,
      "loss": 2.8267,
      "step": 157387
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.9545814990997314,
      "learning_rate": 0.00013679375962440528,
      "loss": 2.9224,
      "step": 157388
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2912793159484863,
      "learning_rate": 0.00013679032736449936,
      "loss": 3.0329,
      "step": 157389
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.807825803756714,
      "learning_rate": 0.00013678689513493743,
      "loss": 2.9314,
      "step": 157390
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.694308042526245,
      "learning_rate": 0.00013678346293571965,
      "loss": 2.9492,
      "step": 157391
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4064319133758545,
      "learning_rate": 0.00013678003076684705,
      "loss": 2.8397,
      "step": 157392
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.360044479370117,
      "learning_rate": 0.00013677659862832,
      "loss": 2.831,
      "step": 157393
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.457441806793213,
      "learning_rate": 0.00013677316652013937,
      "loss": 2.8885,
      "step": 157394
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3631582260131836,
      "learning_rate": 0.00013676973444230557,
      "loss": 2.985,
      "step": 157395
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.1809723377227783,
      "learning_rate": 0.0001367663023948195,
      "loss": 3.0431,
      "step": 157396
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.0111546516418457,
      "learning_rate": 0.00013676287037768162,
      "loss": 2.8695,
      "step": 157397
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.292524576187134,
      "learning_rate": 0.00013675943839089262,
      "loss": 2.9227,
      "step": 157398
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.599935531616211,
      "learning_rate": 0.00013675600643445302,
      "loss": 3.0297,
      "step": 157399
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.65289568901062,
      "learning_rate": 0.00013675257450836365,
      "loss": 2.8506,
      "step": 157400
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.190319061279297,
      "learning_rate": 0.00013674914261262494,
      "loss": 3.0163,
      "step": 157401
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.1834769248962402,
      "learning_rate": 0.00013674571074723778,
      "loss": 3.0688,
      "step": 157402
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6914145946502686,
      "learning_rate": 0.00013674227891220266,
      "loss": 3.135,
      "step": 157403
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.311128854751587,
      "learning_rate": 0.00013673884710752027,
      "loss": 3.1488,
      "step": 157404
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3947854042053223,
      "learning_rate": 0.00013673541533319107,
      "loss": 2.9806,
      "step": 157405
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2183427810668945,
      "learning_rate": 0.00013673198358921592,
      "loss": 3.1471,
      "step": 157406
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.272310972213745,
      "learning_rate": 0.0001367285518755953,
      "loss": 2.927,
      "step": 157407
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.99700927734375,
      "learning_rate": 0.00013672512019233005,
      "loss": 3.1681,
      "step": 157408
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4308574199676514,
      "learning_rate": 0.00013672168853942065,
      "loss": 2.9775,
      "step": 157409
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4699528217315674,
      "learning_rate": 0.00013671825691686766,
      "loss": 3.0436,
      "step": 157410
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.501317024230957,
      "learning_rate": 0.00013671482532467196,
      "loss": 2.8369,
      "step": 157411
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7557108402252197,
      "learning_rate": 0.00013671139376283402,
      "loss": 2.8711,
      "step": 157412
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4485933780670166,
      "learning_rate": 0.00013670796223135443,
      "loss": 3.2643,
      "step": 157413
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.773736000061035,
      "learning_rate": 0.000136704530730234,
      "loss": 2.8562,
      "step": 157414
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.158090591430664,
      "learning_rate": 0.00013670109925947334,
      "loss": 2.9667,
      "step": 157415
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2637827396392822,
      "learning_rate": 0.00013669766781907288,
      "loss": 3.0536,
      "step": 157416
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.585081577301025,
      "learning_rate": 0.0001366942364090335,
      "loss": 2.8278,
      "step": 157417
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2859854698181152,
      "learning_rate": 0.00013669080502935564,
      "loss": 2.7484,
      "step": 157418
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.813000440597534,
      "learning_rate": 0.00013668737368004017,
      "loss": 3.2335,
      "step": 157419
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7826664447784424,
      "learning_rate": 0.00013668394236108757,
      "loss": 2.8921,
      "step": 157420
    },
    {
      "epoch": 2.05,
      "grad_norm": 5.726481914520264,
      "learning_rate": 0.00013668051107249856,
      "loss": 2.8014,
      "step": 157421
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6044342517852783,
      "learning_rate": 0.00013667707981427355,
      "loss": 2.9948,
      "step": 157422
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.620260000228882,
      "learning_rate": 0.00013667364858641352,
      "loss": 2.8936,
      "step": 157423
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.677229404449463,
      "learning_rate": 0.00013667021738891883,
      "loss": 3.2489,
      "step": 157424
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.74428129196167,
      "learning_rate": 0.0001366667862217903,
      "loss": 2.7671,
      "step": 157425
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.734631538391113,
      "learning_rate": 0.00013666335508502852,
      "loss": 2.9754,
      "step": 157426
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.8960304260253906,
      "learning_rate": 0.00013665992397863408,
      "loss": 2.8474,
      "step": 157427
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.171428680419922,
      "learning_rate": 0.00013665649290260756,
      "loss": 3.0014,
      "step": 157428
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.77133846282959,
      "learning_rate": 0.0001366530618569498,
      "loss": 3.2067,
      "step": 157429
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.7075486183166504,
      "learning_rate": 0.0001366496308416612,
      "loss": 2.8932,
      "step": 157430
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.862701177597046,
      "learning_rate": 0.00013664619985674263,
      "loss": 2.8575,
      "step": 157431
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.535940647125244,
      "learning_rate": 0.0001366427689021945,
      "loss": 3.1085,
      "step": 157432
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1820900440216064,
      "learning_rate": 0.00013663933797801776,
      "loss": 3.2466,
      "step": 157433
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.196974277496338,
      "learning_rate": 0.00013663590708421267,
      "loss": 3.0337,
      "step": 157434
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.104004144668579,
      "learning_rate": 0.00013663247622078015,
      "loss": 2.9395,
      "step": 157435
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.36307430267334,
      "learning_rate": 0.0001366290453877206,
      "loss": 3.0665,
      "step": 157436
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.614717960357666,
      "learning_rate": 0.00013662561458503494,
      "loss": 2.9155,
      "step": 157437
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.937776565551758,
      "learning_rate": 0.0001366221838127235,
      "loss": 2.8308,
      "step": 157438
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5763540267944336,
      "learning_rate": 0.00013661875307078734,
      "loss": 3.0038,
      "step": 157439
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.141066551208496,
      "learning_rate": 0.00013661532235922664,
      "loss": 2.9731,
      "step": 157440
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.450155019760132,
      "learning_rate": 0.0001366118916780423,
      "loss": 3.0006,
      "step": 157441
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.294753074645996,
      "learning_rate": 0.0001366084610272348,
      "loss": 2.747,
      "step": 157442
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.1032917499542236,
      "learning_rate": 0.00013660503040680496,
      "loss": 3.1717,
      "step": 157443
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.166526794433594,
      "learning_rate": 0.00013660159981675326,
      "loss": 3.051,
      "step": 157444
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.652291774749756,
      "learning_rate": 0.00013659816925708064,
      "loss": 3.0331,
      "step": 157445
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.0769293308258057,
      "learning_rate": 0.00013659473872778722,
      "loss": 2.741,
      "step": 157446
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.434582233428955,
      "learning_rate": 0.00013659130822887407,
      "loss": 3.0232,
      "step": 157447
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.0744218826293945,
      "learning_rate": 0.0001365878777603416,
      "loss": 3.0091,
      "step": 157448
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3105108737945557,
      "learning_rate": 0.00013658444732219058,
      "loss": 3.1269,
      "step": 157449
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1006031036376953,
      "learning_rate": 0.0001365810169144215,
      "loss": 2.8005,
      "step": 157450
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3434412479400635,
      "learning_rate": 0.00013657758653703533,
      "loss": 3.0553,
      "step": 157451
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.480849027633667,
      "learning_rate": 0.00013657415619003225,
      "loss": 3.1857,
      "step": 157452
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5468909740448,
      "learning_rate": 0.00013657072587341322,
      "loss": 3.1235,
      "step": 157453
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.523921251296997,
      "learning_rate": 0.00013656729558717868,
      "loss": 2.7898,
      "step": 157454
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1612322330474854,
      "learning_rate": 0.0001365638653313295,
      "loss": 2.9645,
      "step": 157455
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.753975868225098,
      "learning_rate": 0.000136560435105866,
      "loss": 2.7223,
      "step": 157456
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.129917621612549,
      "learning_rate": 0.00013655700491078926,
      "loss": 2.7591,
      "step": 157457
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.5484299659729,
      "learning_rate": 0.00013655357474609943,
      "loss": 2.9115,
      "step": 157458
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5643796920776367,
      "learning_rate": 0.0001365501446117975,
      "loss": 3.1135,
      "step": 157459
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.2715415954589844,
      "learning_rate": 0.00013654671450788388,
      "loss": 2.7491,
      "step": 157460
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.000990629196167,
      "learning_rate": 0.00013654328443435938,
      "loss": 2.9174,
      "step": 157461
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.742018222808838,
      "learning_rate": 0.0001365398543912245,
      "loss": 3.028,
      "step": 157462
    },
    {
      "epoch": 2.05,
      "grad_norm": 5.102391719818115,
      "learning_rate": 0.00013653642437848017,
      "loss": 2.9182,
      "step": 157463
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.884967565536499,
      "learning_rate": 0.00013653299439612656,
      "loss": 2.8246,
      "step": 157464
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.725966215133667,
      "learning_rate": 0.00013652956444416467,
      "loss": 3.0363,
      "step": 157465
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.613969326019287,
      "learning_rate": 0.0001365261345225949,
      "loss": 2.8008,
      "step": 157466
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1635477542877197,
      "learning_rate": 0.00013652270463141813,
      "loss": 3.1946,
      "step": 157467
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.036904811859131,
      "learning_rate": 0.00013651927477063478,
      "loss": 2.7469,
      "step": 157468
    },
    {
      "epoch": 2.05,
      "grad_norm": 8.15967845916748,
      "learning_rate": 0.00013651584494024578,
      "loss": 2.7442,
      "step": 157469
    },
    {
      "epoch": 2.05,
      "grad_norm": 5.552493095397949,
      "learning_rate": 0.00013651241514025137,
      "loss": 2.7917,
      "step": 157470
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.593398094177246,
      "learning_rate": 0.00013650898537065248,
      "loss": 2.7606,
      "step": 157471
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3333146572113037,
      "learning_rate": 0.00013650555563144952,
      "loss": 3.0441,
      "step": 157472
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.290517807006836,
      "learning_rate": 0.0001365021259226434,
      "loss": 2.9426,
      "step": 157473
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.920679807662964,
      "learning_rate": 0.00013649869624423453,
      "loss": 3.0517,
      "step": 157474
    },
    {
      "epoch": 2.05,
      "grad_norm": 6.378746032714844,
      "learning_rate": 0.00013649526659622373,
      "loss": 2.9544,
      "step": 157475
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.515165328979492,
      "learning_rate": 0.00013649183697861153,
      "loss": 2.992,
      "step": 157476
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.178591251373291,
      "learning_rate": 0.00013648840739139862,
      "loss": 2.9959,
      "step": 157477
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6290979385375977,
      "learning_rate": 0.00013648497783458547,
      "loss": 2.8303,
      "step": 157478
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.487648010253906,
      "learning_rate": 0.000136481548308173,
      "loss": 3.3485,
      "step": 157479
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.6467928886413574,
      "learning_rate": 0.00013647811881216152,
      "loss": 2.9404,
      "step": 157480
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.8531012535095215,
      "learning_rate": 0.000136474689346552,
      "loss": 2.8432,
      "step": 157481
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3048391342163086,
      "learning_rate": 0.0001364712599113449,
      "loss": 2.8513,
      "step": 157482
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5286693572998047,
      "learning_rate": 0.00013646783050654088,
      "loss": 2.9574,
      "step": 157483
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.331735610961914,
      "learning_rate": 0.00013646440113214048,
      "loss": 3.0484,
      "step": 157484
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.310291290283203,
      "learning_rate": 0.00013646097178814453,
      "loss": 3.1199,
      "step": 157485
    },
    {
      "epoch": 2.05,
      "grad_norm": 5.458805561065674,
      "learning_rate": 0.00013645754247455346,
      "loss": 2.9454,
      "step": 157486
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.0765814781188965,
      "learning_rate": 0.00013645411319136816,
      "loss": 3.0454,
      "step": 157487
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.279819965362549,
      "learning_rate": 0.00013645068393858912,
      "loss": 3.0284,
      "step": 157488
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.485563039779663,
      "learning_rate": 0.00013644725471621694,
      "loss": 3.0666,
      "step": 157489
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.56889009475708,
      "learning_rate": 0.00013644382552425224,
      "loss": 2.9821,
      "step": 157490
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.5574119091033936,
      "learning_rate": 0.00013644039636269583,
      "loss": 2.749,
      "step": 157491
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8495872020721436,
      "learning_rate": 0.00013643696723154812,
      "loss": 2.9792,
      "step": 157492
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.465533494949341,
      "learning_rate": 0.00013643353813080995,
      "loss": 2.9707,
      "step": 157493
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.216881036758423,
      "learning_rate": 0.00013643010906048188,
      "loss": 3.0155,
      "step": 157494
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.2826952934265137,
      "learning_rate": 0.00013642668002056445,
      "loss": 3.0086,
      "step": 157495
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8708739280700684,
      "learning_rate": 0.00013642325101105847,
      "loss": 3.1066,
      "step": 157496
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.863755941390991,
      "learning_rate": 0.00013641982203196453,
      "loss": 2.9705,
      "step": 157497
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2097764015197754,
      "learning_rate": 0.00013641639308328312,
      "loss": 3.0214,
      "step": 157498
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.0643393993377686,
      "learning_rate": 0.0001364129641650151,
      "loss": 2.8997,
      "step": 157499
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7947802543640137,
      "learning_rate": 0.00013640953527716096,
      "loss": 2.9741,
      "step": 157500
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.5880820751190186,
      "learning_rate": 0.00013640610641972128,
      "loss": 3.0768,
      "step": 157501
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.109602928161621,
      "learning_rate": 0.00013640267759269694,
      "loss": 2.8643,
      "step": 157502
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.221165895462036,
      "learning_rate": 0.0001363992487960884,
      "loss": 2.884,
      "step": 157503
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4044880867004395,
      "learning_rate": 0.00013639582002989624,
      "loss": 2.9556,
      "step": 157504
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.608145236968994,
      "learning_rate": 0.00013639239129412128,
      "loss": 2.8483,
      "step": 157505
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.2120676040649414,
      "learning_rate": 0.00013638896258876405,
      "loss": 3.0917,
      "step": 157506
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.617673873901367,
      "learning_rate": 0.00013638553391382511,
      "loss": 2.8946,
      "step": 157507
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.1736793518066406,
      "learning_rate": 0.00013638210526930529,
      "loss": 2.945,
      "step": 157508
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.1914076805114746,
      "learning_rate": 0.000136378676655205,
      "loss": 2.8877,
      "step": 157509
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.278672695159912,
      "learning_rate": 0.00013637524807152512,
      "loss": 2.9542,
      "step": 157510
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.819509983062744,
      "learning_rate": 0.00013637181951826617,
      "loss": 3.0806,
      "step": 157511
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.0822439193725586,
      "learning_rate": 0.00013636839099542882,
      "loss": 2.918,
      "step": 157512
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.213470935821533,
      "learning_rate": 0.00013636496250301355,
      "loss": 2.8752,
      "step": 157513
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.519412040710449,
      "learning_rate": 0.0001363615340410212,
      "loss": 2.7564,
      "step": 157514
    },
    {
      "epoch": 2.05,
      "grad_norm": 1.9505856037139893,
      "learning_rate": 0.00013635810560945223,
      "loss": 2.9285,
      "step": 157515
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.343412160873413,
      "learning_rate": 0.00013635467720830751,
      "loss": 2.9531,
      "step": 157516
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.789386749267578,
      "learning_rate": 0.00013635124883758744,
      "loss": 3.0668,
      "step": 157517
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8104498386383057,
      "learning_rate": 0.00013634782049729295,
      "loss": 3.0351,
      "step": 157518
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7998175621032715,
      "learning_rate": 0.0001363443921874243,
      "loss": 3.0789,
      "step": 157519
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.42795467376709,
      "learning_rate": 0.0001363409639079824,
      "loss": 2.8312,
      "step": 157520
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.997619390487671,
      "learning_rate": 0.0001363375356589677,
      "loss": 2.9355,
      "step": 157521
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6510446071624756,
      "learning_rate": 0.00013633410744038105,
      "loss": 2.8217,
      "step": 157522
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4598348140716553,
      "learning_rate": 0.0001363306792522229,
      "loss": 2.7846,
      "step": 157523
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8942108154296875,
      "learning_rate": 0.00013632725109449413,
      "loss": 3.0619,
      "step": 157524
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.0869271755218506,
      "learning_rate": 0.00013632382296719503,
      "loss": 3.1375,
      "step": 157525
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5081584453582764,
      "learning_rate": 0.00013632039487032654,
      "loss": 2.7506,
      "step": 157526
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4416420459747314,
      "learning_rate": 0.00013631696680388905,
      "loss": 2.9218,
      "step": 157527
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.0889463424682617,
      "learning_rate": 0.00013631353876788346,
      "loss": 2.9836,
      "step": 157528
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.208178758621216,
      "learning_rate": 0.00013631011076231013,
      "loss": 3.1352,
      "step": 157529
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5893476009368896,
      "learning_rate": 0.00013630668278717008,
      "loss": 3.0815,
      "step": 157530
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5859382152557373,
      "learning_rate": 0.00013630325484246348,
      "loss": 2.9118,
      "step": 157531
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6564226150512695,
      "learning_rate": 0.0001362998269281913,
      "loss": 2.9591,
      "step": 157532
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.723022937774658,
      "learning_rate": 0.00013629639904435395,
      "loss": 2.8876,
      "step": 157533
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.157132863998413,
      "learning_rate": 0.00013629297119095233,
      "loss": 3.1199,
      "step": 157534
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.266850233078003,
      "learning_rate": 0.00013628954336798683,
      "loss": 2.9228,
      "step": 157535
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.3765249252319336,
      "learning_rate": 0.00013628611557545839,
      "loss": 3.2027,
      "step": 157536
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.7078356742858887,
      "learning_rate": 0.00013628268781336724,
      "loss": 3.0387,
      "step": 157537
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1425225734710693,
      "learning_rate": 0.00013627926008171433,
      "loss": 2.9767,
      "step": 157538
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8104238510131836,
      "learning_rate": 0.0001362758323805001,
      "loss": 2.8143,
      "step": 157539
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.383584976196289,
      "learning_rate": 0.00013627240470972537,
      "loss": 3.0131,
      "step": 157540
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.155972480773926,
      "learning_rate": 0.0001362689770693906,
      "loss": 2.8449,
      "step": 157541
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4466116428375244,
      "learning_rate": 0.00013626554945949664,
      "loss": 3.0383,
      "step": 157542
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.57463002204895,
      "learning_rate": 0.000136262121880044,
      "loss": 2.6603,
      "step": 157543
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.338662624359131,
      "learning_rate": 0.00013625869433103334,
      "loss": 2.9815,
      "step": 157544
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2408409118652344,
      "learning_rate": 0.00013625526681246514,
      "loss": 2.6083,
      "step": 157545
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.158684730529785,
      "learning_rate": 0.00013625183932434027,
      "loss": 3.0092,
      "step": 157546
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.302225351333618,
      "learning_rate": 0.00013624841186665916,
      "loss": 2.78,
      "step": 157547
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6988677978515625,
      "learning_rate": 0.0001362449844394227,
      "loss": 3.2108,
      "step": 157548
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.304368257522583,
      "learning_rate": 0.0001362415570426314,
      "loss": 3.0553,
      "step": 157549
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.290461540222168,
      "learning_rate": 0.00013623812967628586,
      "loss": 2.9332,
      "step": 157550
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6205849647521973,
      "learning_rate": 0.00013623470234038664,
      "loss": 2.8522,
      "step": 157551
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1337990760803223,
      "learning_rate": 0.0001362312750349346,
      "loss": 2.8783,
      "step": 157552
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1909308433532715,
      "learning_rate": 0.00013622784775993015,
      "loss": 3.1347,
      "step": 157553
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.78905987739563,
      "learning_rate": 0.00013622442051537415,
      "loss": 2.843,
      "step": 157554
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.0806820392608643,
      "learning_rate": 0.0001362209933012671,
      "loss": 3.1693,
      "step": 157555
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4894356727600098,
      "learning_rate": 0.0001362175661176097,
      "loss": 2.8363,
      "step": 157556
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.235966920852661,
      "learning_rate": 0.00013621413896440236,
      "loss": 2.9651,
      "step": 157557
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8542098999023438,
      "learning_rate": 0.00013621071184164607,
      "loss": 2.8199,
      "step": 157558
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8128159046173096,
      "learning_rate": 0.00013620728474934116,
      "loss": 2.9619,
      "step": 157559
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.34767746925354,
      "learning_rate": 0.00013620385768748852,
      "loss": 3.2088,
      "step": 157560
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4190311431884766,
      "learning_rate": 0.0001362004306560887,
      "loss": 2.957,
      "step": 157561
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2578840255737305,
      "learning_rate": 0.0001361970036551423,
      "loss": 2.8342,
      "step": 157562
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4407479763031006,
      "learning_rate": 0.00013619357668464985,
      "loss": 3.0949,
      "step": 157563
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5038344860076904,
      "learning_rate": 0.00013619014974461225,
      "loss": 3.0291,
      "step": 157564
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7802767753601074,
      "learning_rate": 0.00013618672283502984,
      "loss": 3.0582,
      "step": 157565
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.847784996032715,
      "learning_rate": 0.00013618329595590354,
      "loss": 2.9177,
      "step": 157566
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.321228504180908,
      "learning_rate": 0.00013617986910723385,
      "loss": 3.0715,
      "step": 157567
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.347167491912842,
      "learning_rate": 0.00013617644228902144,
      "loss": 3.05,
      "step": 157568
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7009990215301514,
      "learning_rate": 0.0001361730155012668,
      "loss": 2.9134,
      "step": 157569
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5139145851135254,
      "learning_rate": 0.0001361695887439708,
      "loss": 2.831,
      "step": 157570
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8660454750061035,
      "learning_rate": 0.00013616616201713382,
      "loss": 2.7523,
      "step": 157571
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.319150686264038,
      "learning_rate": 0.0001361627353207568,
      "loss": 3.0213,
      "step": 157572
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.578305959701538,
      "learning_rate": 0.0001361593086548402,
      "loss": 2.8055,
      "step": 157573
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4379024505615234,
      "learning_rate": 0.00013615588201938467,
      "loss": 2.593,
      "step": 157574
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1144235134124756,
      "learning_rate": 0.00013615245541439075,
      "loss": 3.0471,
      "step": 157575
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.760331630706787,
      "learning_rate": 0.0001361490288398593,
      "loss": 2.9973,
      "step": 157576
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.1472482681274414,
      "learning_rate": 0.00013614560229579072,
      "loss": 3.1438,
      "step": 157577
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.771392822265625,
      "learning_rate": 0.00013614217578218586,
      "loss": 2.9455,
      "step": 157578
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2122623920440674,
      "learning_rate": 0.0001361387492990453,
      "loss": 2.7945,
      "step": 157579
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8368070125579834,
      "learning_rate": 0.00013613532284636952,
      "loss": 3.1138,
      "step": 157580
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4510226249694824,
      "learning_rate": 0.00013613189642415935,
      "loss": 2.7642,
      "step": 157581
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.060908555984497,
      "learning_rate": 0.0001361284700324154,
      "loss": 2.8103,
      "step": 157582
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.124755382537842,
      "learning_rate": 0.00013612504367113812,
      "loss": 2.9697,
      "step": 157583
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8890841007232666,
      "learning_rate": 0.0001361216173403284,
      "loss": 3.3912,
      "step": 157584
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.550140380859375,
      "learning_rate": 0.0001361181910399868,
      "loss": 3.0117,
      "step": 157585
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6038622856140137,
      "learning_rate": 0.00013611476477011378,
      "loss": 2.7937,
      "step": 157586
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5141263008117676,
      "learning_rate": 0.00013611133853071023,
      "loss": 2.9006,
      "step": 157587
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.490636110305786,
      "learning_rate": 0.0001361079123217767,
      "loss": 2.9728,
      "step": 157588
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.992466688156128,
      "learning_rate": 0.00013610448614331366,
      "loss": 3.0205,
      "step": 157589
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.741023302078247,
      "learning_rate": 0.00013610105999532201,
      "loss": 2.8303,
      "step": 157590
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.0772955417633057,
      "learning_rate": 0.00013609763387780228,
      "loss": 2.7838,
      "step": 157591
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.332152843475342,
      "learning_rate": 0.00013609420779075494,
      "loss": 2.7987,
      "step": 157592
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.072463035583496,
      "learning_rate": 0.00013609078173418095,
      "loss": 2.6333,
      "step": 157593
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.432680606842041,
      "learning_rate": 0.00013608735570808063,
      "loss": 3.0494,
      "step": 157594
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.276897430419922,
      "learning_rate": 0.0001360839297124549,
      "loss": 2.7308,
      "step": 157595
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2047319412231445,
      "learning_rate": 0.00013608050374730422,
      "loss": 2.9789,
      "step": 157596
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.153001070022583,
      "learning_rate": 0.0001360770778126293,
      "loss": 2.7928,
      "step": 157597
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.49503231048584,
      "learning_rate": 0.00013607365190843065,
      "loss": 3.0228,
      "step": 157598
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2772457599639893,
      "learning_rate": 0.00013607022603470911,
      "loss": 2.8013,
      "step": 157599
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2662930488586426,
      "learning_rate": 0.00013606680019146504,
      "loss": 3.2338,
      "step": 157600
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.076314687728882,
      "learning_rate": 0.00013606337437869942,
      "loss": 3.024,
      "step": 157601
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3517024517059326,
      "learning_rate": 0.0001360599485964126,
      "loss": 2.9709,
      "step": 157602
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.507972002029419,
      "learning_rate": 0.00013605652284460554,
      "loss": 2.9474,
      "step": 157603
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3178951740264893,
      "learning_rate": 0.00013605309712327843,
      "loss": 2.8968,
      "step": 157604
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4903199672698975,
      "learning_rate": 0.00013604967143243226,
      "loss": 2.865,
      "step": 157605
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8186094760894775,
      "learning_rate": 0.00013604624577206746,
      "loss": 3.1214,
      "step": 157606
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.361621618270874,
      "learning_rate": 0.00013604282014218483,
      "loss": 3.0413,
      "step": 157607
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2288818359375,
      "learning_rate": 0.00013603939454278485,
      "loss": 3.3151,
      "step": 157608
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6457316875457764,
      "learning_rate": 0.00013603596897386838,
      "loss": 2.8788,
      "step": 157609
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4583375453948975,
      "learning_rate": 0.00013603254343543585,
      "loss": 2.8069,
      "step": 157610
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1689701080322266,
      "learning_rate": 0.00013602911792748803,
      "loss": 2.9852,
      "step": 157611
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.9273762702941895,
      "learning_rate": 0.00013602569245002534,
      "loss": 2.7867,
      "step": 157612
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4387474060058594,
      "learning_rate": 0.0001360222670030487,
      "loss": 2.9718,
      "step": 157613
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4534833431243896,
      "learning_rate": 0.00013601884158655848,
      "loss": 3.1806,
      "step": 157614
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5330893993377686,
      "learning_rate": 0.00013601541620055562,
      "loss": 3.1117,
      "step": 157615
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.1564815044403076,
      "learning_rate": 0.00013601199084504055,
      "loss": 2.9073,
      "step": 157616
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.896132946014404,
      "learning_rate": 0.00013600856552001394,
      "loss": 2.9397,
      "step": 157617
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6522512435913086,
      "learning_rate": 0.00013600514022547633,
      "loss": 2.9481,
      "step": 157618
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.8788015842437744,
      "learning_rate": 0.00013600171496142856,
      "loss": 2.8903,
      "step": 157619
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1299006938934326,
      "learning_rate": 0.00013599828972787108,
      "loss": 3.1027,
      "step": 157620
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.307642698287964,
      "learning_rate": 0.0001359948645248047,
      "loss": 3.0101,
      "step": 157621
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.756317377090454,
      "learning_rate": 0.00013599143935223,
      "loss": 3.2067,
      "step": 157622
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.994412422180176,
      "learning_rate": 0.00013598801421014757,
      "loss": 2.9139,
      "step": 157623
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5366578102111816,
      "learning_rate": 0.00013598458909855796,
      "loss": 3.1218,
      "step": 157624
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1297011375427246,
      "learning_rate": 0.00013598116401746202,
      "loss": 2.96,
      "step": 157625
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.216745138168335,
      "learning_rate": 0.00013597773896686016,
      "loss": 3.1826,
      "step": 157626
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.9955976009368896,
      "learning_rate": 0.00013597431394675325,
      "loss": 3.1591,
      "step": 157627
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4561445713043213,
      "learning_rate": 0.0001359708889571418,
      "loss": 3.0419,
      "step": 157628
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1951496601104736,
      "learning_rate": 0.0001359674639980265,
      "loss": 3.0715,
      "step": 157629
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4753644466400146,
      "learning_rate": 0.00013596403906940776,
      "loss": 3.0255,
      "step": 157630
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.3140625953674316,
      "learning_rate": 0.00013596061417128657,
      "loss": 2.8089,
      "step": 157631
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3255679607391357,
      "learning_rate": 0.00013595718930366327,
      "loss": 3.1081,
      "step": 157632
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.836789131164551,
      "learning_rate": 0.00013595376446653873,
      "loss": 2.9519,
      "step": 157633
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8007752895355225,
      "learning_rate": 0.0001359503396599135,
      "loss": 2.7216,
      "step": 157634
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.198066234588623,
      "learning_rate": 0.0001359469148837882,
      "loss": 2.8697,
      "step": 157635
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.870267629623413,
      "learning_rate": 0.00013594349013816334,
      "loss": 2.8267,
      "step": 157636
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.180133104324341,
      "learning_rate": 0.0001359400654230398,
      "loss": 3.0132,
      "step": 157637
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4486465454101562,
      "learning_rate": 0.00013593664073841796,
      "loss": 2.726,
      "step": 157638
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.2952237129211426,
      "learning_rate": 0.00013593321608429876,
      "loss": 2.9229,
      "step": 157639
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.3002023696899414,
      "learning_rate": 0.00013592979146068263,
      "loss": 2.7914,
      "step": 157640
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.7072372436523438,
      "learning_rate": 0.00013592636686757023,
      "loss": 2.7148,
      "step": 157641
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4803264141082764,
      "learning_rate": 0.00013592294230496216,
      "loss": 3.0267,
      "step": 157642
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1301662921905518,
      "learning_rate": 0.0001359195177728592,
      "loss": 2.9105,
      "step": 157643
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.9669675827026367,
      "learning_rate": 0.00013591609327126179,
      "loss": 2.7888,
      "step": 157644
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.936126232147217,
      "learning_rate": 0.00013591266880017076,
      "loss": 2.9528,
      "step": 157645
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2545812129974365,
      "learning_rate": 0.0001359092443595867,
      "loss": 2.8293,
      "step": 157646
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6582930088043213,
      "learning_rate": 0.0001359058199495102,
      "loss": 3.274,
      "step": 157647
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.272185802459717,
      "learning_rate": 0.0001359023955699418,
      "loss": 2.9293,
      "step": 157648
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4043309688568115,
      "learning_rate": 0.0001358989712208823,
      "loss": 3.1895,
      "step": 157649
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.278458595275879,
      "learning_rate": 0.00013589554690233223,
      "loss": 3.0804,
      "step": 157650
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5263476371765137,
      "learning_rate": 0.00013589212261429237,
      "loss": 3.131,
      "step": 157651
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4352381229400635,
      "learning_rate": 0.00013588869835676326,
      "loss": 2.8388,
      "step": 157652
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.342043399810791,
      "learning_rate": 0.00013588527412974555,
      "loss": 3.2307,
      "step": 157653
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.734851598739624,
      "learning_rate": 0.00013588184993323975,
      "loss": 3.1088,
      "step": 157654
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.389761209487915,
      "learning_rate": 0.00013587842576724668,
      "loss": 3.2516,
      "step": 157655
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.792170286178589,
      "learning_rate": 0.00013587500163176683,
      "loss": 2.7229,
      "step": 157656
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4999332427978516,
      "learning_rate": 0.00013587157752680104,
      "loss": 3.0186,
      "step": 157657
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.3157665729522705,
      "learning_rate": 0.00013586815345234982,
      "loss": 2.7965,
      "step": 157658
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.618572473526001,
      "learning_rate": 0.00013586472940841377,
      "loss": 2.6696,
      "step": 157659
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2983505725860596,
      "learning_rate": 0.0001358613053949935,
      "loss": 3.0036,
      "step": 157660
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.640782833099365,
      "learning_rate": 0.0001358578814120898,
      "loss": 3.0603,
      "step": 157661
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.9004862308502197,
      "learning_rate": 0.00013585445745970312,
      "loss": 2.9931,
      "step": 157662
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6504158973693848,
      "learning_rate": 0.0001358510335378343,
      "loss": 2.6467,
      "step": 157663
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.489694595336914,
      "learning_rate": 0.00013584760964648385,
      "loss": 2.7612,
      "step": 157664
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.921376943588257,
      "learning_rate": 0.00013584418578565236,
      "loss": 2.9009,
      "step": 157665
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2224035263061523,
      "learning_rate": 0.0001358407619553406,
      "loss": 3.0157,
      "step": 157666
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.125666379928589,
      "learning_rate": 0.0001358373381555492,
      "loss": 3.1218,
      "step": 157667
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4308719635009766,
      "learning_rate": 0.00013583391438627855,
      "loss": 2.8982,
      "step": 157668
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.327972412109375,
      "learning_rate": 0.00013583049064752966,
      "loss": 2.8712,
      "step": 157669
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.332698106765747,
      "learning_rate": 0.00013582706693930294,
      "loss": 3.0693,
      "step": 157670
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.0600099563598633,
      "learning_rate": 0.00013582364326159897,
      "loss": 2.8557,
      "step": 157671
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4108991622924805,
      "learning_rate": 0.00013582021961441857,
      "loss": 2.8559,
      "step": 157672
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4879367351531982,
      "learning_rate": 0.00013581679599776233,
      "loss": 3.1005,
      "step": 157673
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2213966846466064,
      "learning_rate": 0.0001358133724116307,
      "loss": 3.2058,
      "step": 157674
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2741293907165527,
      "learning_rate": 0.0001358099488560246,
      "loss": 3.0545,
      "step": 157675
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3515796661376953,
      "learning_rate": 0.00013580652533094442,
      "loss": 2.8678,
      "step": 157676
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.602060556411743,
      "learning_rate": 0.000135803101836391,
      "loss": 2.8711,
      "step": 157677
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.240173578262329,
      "learning_rate": 0.00013579967837236485,
      "loss": 3.0149,
      "step": 157678
    },
    {
      "epoch": 2.05,
      "grad_norm": 1.9657000303268433,
      "learning_rate": 0.00013579625493886658,
      "loss": 3.0303,
      "step": 157679
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.858405590057373,
      "learning_rate": 0.00013579283153589697,
      "loss": 2.784,
      "step": 157680
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2963812351226807,
      "learning_rate": 0.00013578940816345658,
      "loss": 2.9046,
      "step": 157681
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1029858589172363,
      "learning_rate": 0.00013578598482154593,
      "loss": 2.9991,
      "step": 157682
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5290989875793457,
      "learning_rate": 0.0001357825615101659,
      "loss": 2.832,
      "step": 157683
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2819628715515137,
      "learning_rate": 0.00013577913822931698,
      "loss": 3.0051,
      "step": 157684
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.367372989654541,
      "learning_rate": 0.0001357757149789997,
      "loss": 2.9783,
      "step": 157685
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2558090686798096,
      "learning_rate": 0.0001357722917592149,
      "loss": 2.7946,
      "step": 157686
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5106124877929688,
      "learning_rate": 0.00013576886856996316,
      "loss": 3.1021,
      "step": 157687
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6232621669769287,
      "learning_rate": 0.00013576544541124498,
      "loss": 3.0407,
      "step": 157688
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.955744743347168,
      "learning_rate": 0.0001357620222830612,
      "loss": 3.1914,
      "step": 157689
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1581335067749023,
      "learning_rate": 0.00013575859918541237,
      "loss": 3.0259,
      "step": 157690
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4810500144958496,
      "learning_rate": 0.000135755176118299,
      "loss": 3.1672,
      "step": 157691
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.518232822418213,
      "learning_rate": 0.00013575175308172194,
      "loss": 2.7396,
      "step": 157692
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.485621213912964,
      "learning_rate": 0.00013574833007568162,
      "loss": 2.9251,
      "step": 157693
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2397305965423584,
      "learning_rate": 0.0001357449071001789,
      "loss": 2.8233,
      "step": 157694
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7317934036254883,
      "learning_rate": 0.00013574148415521436,
      "loss": 2.7998,
      "step": 157695
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6164307594299316,
      "learning_rate": 0.00013573806124078853,
      "loss": 3.077,
      "step": 157696
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.208442211151123,
      "learning_rate": 0.00013573463835690196,
      "loss": 3.1326,
      "step": 157697
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.519212484359741,
      "learning_rate": 0.00013573121550355556,
      "loss": 2.8784,
      "step": 157698
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3420000076293945,
      "learning_rate": 0.00013572779268074972,
      "loss": 3.1701,
      "step": 157699
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.627220392227173,
      "learning_rate": 0.00013572436988848528,
      "loss": 3.0093,
      "step": 157700
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.768852710723877,
      "learning_rate": 0.00013572094712676282,
      "loss": 2.8599,
      "step": 157701
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5287933349609375,
      "learning_rate": 0.0001357175243955829,
      "loss": 2.9657,
      "step": 157702
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.389118671417236,
      "learning_rate": 0.00013571410169494608,
      "loss": 3.1526,
      "step": 157703
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3193185329437256,
      "learning_rate": 0.00013571067902485323,
      "loss": 2.9612,
      "step": 157704
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.9266550540924072,
      "learning_rate": 0.00013570725638530477,
      "loss": 2.8955,
      "step": 157705
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.403819561004639,
      "learning_rate": 0.0001357038337763015,
      "loss": 3.2422,
      "step": 157706
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.080437183380127,
      "learning_rate": 0.00013570041119784405,
      "loss": 2.7838,
      "step": 157707
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.8863956928253174,
      "learning_rate": 0.00013569698864993295,
      "loss": 3.042,
      "step": 157708
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.6165504455566406,
      "learning_rate": 0.00013569356613256877,
      "loss": 3.1416,
      "step": 157709
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.7250900268554688,
      "learning_rate": 0.00013569014364575238,
      "loss": 2.969,
      "step": 157710
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.0986201763153076,
      "learning_rate": 0.0001356867211894842,
      "loss": 2.9377,
      "step": 157711
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.702483892440796,
      "learning_rate": 0.00013568329876376505,
      "loss": 3.0497,
      "step": 157712
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.0834763050079346,
      "learning_rate": 0.0001356798763685955,
      "loss": 3.2107,
      "step": 157713
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6808419227600098,
      "learning_rate": 0.00013567645400397612,
      "loss": 2.9146,
      "step": 157714
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.285170316696167,
      "learning_rate": 0.00013567303166990753,
      "loss": 2.8767,
      "step": 157715
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.2910618782043457,
      "learning_rate": 0.00013566960936639048,
      "loss": 2.6111,
      "step": 157716
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.203278064727783,
      "learning_rate": 0.0001356661870934255,
      "loss": 2.8906,
      "step": 157717
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5577263832092285,
      "learning_rate": 0.00013566276485101335,
      "loss": 2.9713,
      "step": 157718
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.244722604751587,
      "learning_rate": 0.00013565934263915457,
      "loss": 2.772,
      "step": 157719
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4334187507629395,
      "learning_rate": 0.00013565592045784987,
      "loss": 2.9422,
      "step": 157720
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.010801315307617,
      "learning_rate": 0.0001356524983070997,
      "loss": 3.0562,
      "step": 157721
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.21144437789917,
      "learning_rate": 0.00013564907618690496,
      "loss": 2.8853,
      "step": 157722
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.9512710571289062,
      "learning_rate": 0.00013564565409726606,
      "loss": 3.03,
      "step": 157723
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2948381900787354,
      "learning_rate": 0.0001356422320381838,
      "loss": 3.0439,
      "step": 157724
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.362624168395996,
      "learning_rate": 0.0001356388100096588,
      "loss": 2.9247,
      "step": 157725
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.0804660320281982,
      "learning_rate": 0.00013563538801169162,
      "loss": 2.8992,
      "step": 157726
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.0525858402252197,
      "learning_rate": 0.0001356319660442828,
      "loss": 2.9594,
      "step": 157727
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.7370824813842773,
      "learning_rate": 0.00013562854410743323,
      "loss": 2.9259,
      "step": 157728
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4080803394317627,
      "learning_rate": 0.0001356251222011433,
      "loss": 3.0964,
      "step": 157729
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.341078281402588,
      "learning_rate": 0.00013562170032541392,
      "loss": 3.1167,
      "step": 157730
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.9846620559692383,
      "learning_rate": 0.0001356182784802455,
      "loss": 3.0325,
      "step": 157731
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1167800426483154,
      "learning_rate": 0.00013561485666563878,
      "loss": 3.0108,
      "step": 157732
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.230752468109131,
      "learning_rate": 0.00013561143488159425,
      "loss": 2.9592,
      "step": 157733
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.224774122238159,
      "learning_rate": 0.00013560801312811275,
      "loss": 2.8148,
      "step": 157734
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3740081787109375,
      "learning_rate": 0.00013560459140519473,
      "loss": 2.9954,
      "step": 157735
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.39032244682312,
      "learning_rate": 0.00013560116971284103,
      "loss": 3.0731,
      "step": 157736
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.873807191848755,
      "learning_rate": 0.00013559774805105217,
      "loss": 2.96,
      "step": 157737
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.0071990489959717,
      "learning_rate": 0.0001355943264198288,
      "loss": 2.8731,
      "step": 157738
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.533332347869873,
      "learning_rate": 0.00013559090481917143,
      "loss": 3.1685,
      "step": 157739
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6236631870269775,
      "learning_rate": 0.00013558748324908092,
      "loss": 3.0748,
      "step": 157740
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.974308729171753,
      "learning_rate": 0.0001355840617095577,
      "loss": 3.0144,
      "step": 157741
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5869333744049072,
      "learning_rate": 0.0001355806402006026,
      "loss": 2.8472,
      "step": 157742
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.281646490097046,
      "learning_rate": 0.00013557721872221607,
      "loss": 2.9203,
      "step": 157743
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4403295516967773,
      "learning_rate": 0.00013557379727439903,
      "loss": 3.0358,
      "step": 157744
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.029218912124634,
      "learning_rate": 0.00013557037585715175,
      "loss": 3.0811,
      "step": 157745
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.165787696838379,
      "learning_rate": 0.00013556695447047512,
      "loss": 2.9293,
      "step": 157746
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.908707857131958,
      "learning_rate": 0.00013556353311436957,
      "loss": 3.1563,
      "step": 157747
    },
    {
      "epoch": 2.05,
      "grad_norm": 1.9602066278457642,
      "learning_rate": 0.00013556011178883604,
      "loss": 3.2245,
      "step": 157748
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.525038719177246,
      "learning_rate": 0.00013555669049387483,
      "loss": 3.0557,
      "step": 157749
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.791937828063965,
      "learning_rate": 0.00013555326922948697,
      "loss": 3.0204,
      "step": 157750
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.103168487548828,
      "learning_rate": 0.00013554984799567262,
      "loss": 3.1135,
      "step": 157751
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3178536891937256,
      "learning_rate": 0.00013554642679243278,
      "loss": 2.7838,
      "step": 157752
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3587772846221924,
      "learning_rate": 0.00013554300561976785,
      "loss": 2.8736,
      "step": 157753
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.1901116371154785,
      "learning_rate": 0.0001355395844776787,
      "loss": 2.7325,
      "step": 157754
    },
    {
      "epoch": 2.05,
      "grad_norm": 5.320107460021973,
      "learning_rate": 0.00013553616336616574,
      "loss": 2.9921,
      "step": 157755
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.333195447921753,
      "learning_rate": 0.0001355327422852298,
      "loss": 2.7576,
      "step": 157756
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.034407138824463,
      "learning_rate": 0.00013552932123487145,
      "loss": 3.1108,
      "step": 157757
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.337904691696167,
      "learning_rate": 0.0001355259002150913,
      "loss": 2.8312,
      "step": 157758
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.277334451675415,
      "learning_rate": 0.00013552247922588988,
      "loss": 2.9757,
      "step": 157759
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.3183882236480713,
      "learning_rate": 0.00013551905826726803,
      "loss": 3.1072,
      "step": 157760
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.7607648372650146,
      "learning_rate": 0.0001355156373392262,
      "loss": 3.0761,
      "step": 157761
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.499422073364258,
      "learning_rate": 0.0001355122164417652,
      "loss": 3.0066,
      "step": 157762
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.3358817100524902,
      "learning_rate": 0.0001355087955748856,
      "loss": 3.0653,
      "step": 157763
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.9682955741882324,
      "learning_rate": 0.00013550537473858794,
      "loss": 2.8639,
      "step": 157764
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.6717655658721924,
      "learning_rate": 0.00013550195393287302,
      "loss": 2.8413,
      "step": 157765
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6185665130615234,
      "learning_rate": 0.0001354985331577414,
      "loss": 2.9944,
      "step": 157766
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.8933873176574707,
      "learning_rate": 0.00013549511241319357,
      "loss": 2.9931,
      "step": 157767
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.448482036590576,
      "learning_rate": 0.00013549169169923045,
      "loss": 3.1813,
      "step": 157768
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.17212176322937,
      "learning_rate": 0.0001354882710158525,
      "loss": 2.8676,
      "step": 157769
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.027767181396484,
      "learning_rate": 0.00013548485036306027,
      "loss": 2.8669,
      "step": 157770
    },
    {
      "epoch": 2.05,
      "grad_norm": 5.4520463943481445,
      "learning_rate": 0.00013548142974085468,
      "loss": 3.1076,
      "step": 157771
    },
    {
      "epoch": 2.05,
      "grad_norm": 5.4114789962768555,
      "learning_rate": 0.00013547800914923616,
      "loss": 2.8377,
      "step": 157772
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.605960845947266,
      "learning_rate": 0.00013547458858820526,
      "loss": 2.8858,
      "step": 157773
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1620285511016846,
      "learning_rate": 0.00013547116805776287,
      "loss": 3.0536,
      "step": 157774
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.290654182434082,
      "learning_rate": 0.0001354677475579095,
      "loss": 2.8228,
      "step": 157775
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.299691677093506,
      "learning_rate": 0.00013546432708864563,
      "loss": 3.1502,
      "step": 157776
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.739167213439941,
      "learning_rate": 0.0001354609066499722,
      "loss": 3.0474,
      "step": 157777
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6655752658843994,
      "learning_rate": 0.00013545748624188957,
      "loss": 2.8095,
      "step": 157778
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.4275500774383545,
      "learning_rate": 0.0001354540658643986,
      "loss": 2.6801,
      "step": 157779
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3592121601104736,
      "learning_rate": 0.00013545064551749984,
      "loss": 2.9835,
      "step": 157780
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.033719539642334,
      "learning_rate": 0.0001354472252011939,
      "loss": 3.0688,
      "step": 157781
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.3866143226623535,
      "learning_rate": 0.0001354438049154813,
      "loss": 2.8983,
      "step": 157782
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.404522657394409,
      "learning_rate": 0.00013544038466036296,
      "loss": 3.2115,
      "step": 157783
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5878875255584717,
      "learning_rate": 0.00013543696443583922,
      "loss": 2.8782,
      "step": 157784
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.335879325866699,
      "learning_rate": 0.000135433544241911,
      "loss": 2.9892,
      "step": 157785
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.30267596244812,
      "learning_rate": 0.00013543012407857873,
      "loss": 3.082,
      "step": 157786
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.675015449523926,
      "learning_rate": 0.00013542670394584315,
      "loss": 2.7634,
      "step": 157787
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.465778112411499,
      "learning_rate": 0.00013542328384370472,
      "loss": 3.0359,
      "step": 157788
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.18794584274292,
      "learning_rate": 0.00013541986377216432,
      "loss": 3.1191,
      "step": 157789
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4875924587249756,
      "learning_rate": 0.00013541644373122236,
      "loss": 3.002,
      "step": 157790
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.6388704776763916,
      "learning_rate": 0.0001354130237208797,
      "loss": 2.8075,
      "step": 157791
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.712571859359741,
      "learning_rate": 0.0001354096037411369,
      "loss": 2.9022,
      "step": 157792
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3494865894317627,
      "learning_rate": 0.00013540618379199453,
      "loss": 3.0426,
      "step": 157793
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.104501962661743,
      "learning_rate": 0.00013540276387345317,
      "loss": 2.9526,
      "step": 157794
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1001250743865967,
      "learning_rate": 0.00013539934398551363,
      "loss": 2.8579,
      "step": 157795
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.217362642288208,
      "learning_rate": 0.00013539592412817634,
      "loss": 2.8575,
      "step": 157796
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.417508602142334,
      "learning_rate": 0.00013539250430144218,
      "loss": 3.2975,
      "step": 157797
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3737380504608154,
      "learning_rate": 0.00013538908450531168,
      "loss": 3.1232,
      "step": 157798
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.0567994117736816,
      "learning_rate": 0.00013538566473978545,
      "loss": 2.9357,
      "step": 157799
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.714848756790161,
      "learning_rate": 0.000135382245004864,
      "loss": 3.072,
      "step": 157800
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.935029983520508,
      "learning_rate": 0.00013537882530054822,
      "loss": 3.0397,
      "step": 157801
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.161747932434082,
      "learning_rate": 0.0001353754056268385,
      "loss": 3.0495,
      "step": 157802
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.2446117401123047,
      "learning_rate": 0.00013537198598373573,
      "loss": 2.791,
      "step": 157803
    },
    {
      "epoch": 2.05,
      "grad_norm": 4.26296854019165,
      "learning_rate": 0.00013536856637124043,
      "loss": 3.0581,
      "step": 157804
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.34230375289917,
      "learning_rate": 0.00013536514678935316,
      "loss": 2.7793,
      "step": 157805
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.648458957672119,
      "learning_rate": 0.00013536172723807458,
      "loss": 2.7311,
      "step": 157806
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.397534132003784,
      "learning_rate": 0.00013535830771740546,
      "loss": 3.0933,
      "step": 157807
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.9501118659973145,
      "learning_rate": 0.00013535488822734616,
      "loss": 2.9764,
      "step": 157808
    },
    {
      "epoch": 2.05,
      "grad_norm": 3.083860397338867,
      "learning_rate": 0.00013535146876789768,
      "loss": 2.8792,
      "step": 157809
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.724635601043701,
      "learning_rate": 0.00013534804933906032,
      "loss": 2.9361,
      "step": 157810
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.580059766769409,
      "learning_rate": 0.0001353446299408351,
      "loss": 3.1483,
      "step": 157811
    },
    {
      "epoch": 2.05,
      "grad_norm": 6.135270595550537,
      "learning_rate": 0.0001353412105732222,
      "loss": 2.827,
      "step": 157812
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.3826699256896973,
      "learning_rate": 0.0001353377912362226,
      "loss": 2.8321,
      "step": 157813
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.527249813079834,
      "learning_rate": 0.00013533437192983665,
      "loss": 2.9233,
      "step": 157814
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2347893714904785,
      "learning_rate": 0.0001353309526540653,
      "loss": 3.0018,
      "step": 157815
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.2059972286224365,
      "learning_rate": 0.00013532753340890892,
      "loss": 2.8975,
      "step": 157816
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.4582180976867676,
      "learning_rate": 0.0001353241141943685,
      "loss": 3.0067,
      "step": 157817
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.5302326679229736,
      "learning_rate": 0.00013532069501044421,
      "loss": 3.1344,
      "step": 157818
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.1018128395080566,
      "learning_rate": 0.000135317275857137,
      "loss": 2.8437,
      "step": 157819
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.9497222900390625,
      "learning_rate": 0.00013531385673444733,
      "loss": 2.8916,
      "step": 157820
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.120802164077759,
      "learning_rate": 0.000135310437642376,
      "loss": 2.933,
      "step": 157821
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.479626178741455,
      "learning_rate": 0.00013530701858092349,
      "loss": 2.886,
      "step": 157822
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.9739162921905518,
      "learning_rate": 0.0001353035995500907,
      "loss": 3.0851,
      "step": 157823
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.432451009750366,
      "learning_rate": 0.0001353001805498779,
      "loss": 2.9033,
      "step": 157824
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.435938835144043,
      "learning_rate": 0.00013529676158028598,
      "loss": 3.0534,
      "step": 157825
    },
    {
      "epoch": 2.05,
      "grad_norm": 2.707646608352661,
      "learning_rate": 0.00013529334264131542,
      "loss": 2.9423,
      "step": 157826
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.336344003677368,
      "learning_rate": 0.00013528992373296701,
      "loss": 2.8998,
      "step": 157827
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.567615032196045,
      "learning_rate": 0.00013528650485524122,
      "loss": 3.1237,
      "step": 157828
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4373395442962646,
      "learning_rate": 0.00013528308600813905,
      "loss": 3.0628,
      "step": 157829
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.0811543464660645,
      "learning_rate": 0.0001352796671916606,
      "loss": 2.9867,
      "step": 157830
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.36592960357666,
      "learning_rate": 0.00013527624840580684,
      "loss": 3.166,
      "step": 157831
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.302722454071045,
      "learning_rate": 0.00013527282965057827,
      "loss": 3.2492,
      "step": 157832
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.9649996757507324,
      "learning_rate": 0.00013526941092597568,
      "loss": 2.8589,
      "step": 157833
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3068366050720215,
      "learning_rate": 0.00013526599223199955,
      "loss": 2.905,
      "step": 157834
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2881040573120117,
      "learning_rate": 0.00013526257356865078,
      "loss": 2.9422,
      "step": 157835
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.637300491333008,
      "learning_rate": 0.00013525915493592955,
      "loss": 2.7845,
      "step": 157836
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.102060556411743,
      "learning_rate": 0.00013525573633383687,
      "loss": 2.9247,
      "step": 157837
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.284512758255005,
      "learning_rate": 0.00013525231776237318,
      "loss": 2.9193,
      "step": 157838
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.0396673679351807,
      "learning_rate": 0.0001352488992215393,
      "loss": 2.7361,
      "step": 157839
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.362086534500122,
      "learning_rate": 0.00013524548071133566,
      "loss": 3.0054,
      "step": 157840
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.7148468494415283,
      "learning_rate": 0.00013524206223176308,
      "loss": 2.7458,
      "step": 157841
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.565060615539551,
      "learning_rate": 0.00013523864378282212,
      "loss": 2.8576,
      "step": 157842
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.059422492980957,
      "learning_rate": 0.0001352352253645134,
      "loss": 2.9849,
      "step": 157843
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.390840530395508,
      "learning_rate": 0.00013523180697683744,
      "loss": 2.7666,
      "step": 157844
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.8852972984313965,
      "learning_rate": 0.00013522838861979516,
      "loss": 3.0592,
      "step": 157845
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.9809491634368896,
      "learning_rate": 0.00013522497029338685,
      "loss": 2.9462,
      "step": 157846
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.279977321624756,
      "learning_rate": 0.00013522155199761348,
      "loss": 3.1122,
      "step": 157847
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.611398458480835,
      "learning_rate": 0.00013521813373247552,
      "loss": 2.7841,
      "step": 157848
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.328343629837036,
      "learning_rate": 0.00013521471549797365,
      "loss": 2.9975,
      "step": 157849
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.45093297958374,
      "learning_rate": 0.00013521129729410832,
      "loss": 3.009,
      "step": 157850
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.600029468536377,
      "learning_rate": 0.00013520787912088046,
      "loss": 2.9373,
      "step": 157851
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.048386096954346,
      "learning_rate": 0.00013520446097829044,
      "loss": 3.0764,
      "step": 157852
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.595944404602051,
      "learning_rate": 0.00013520104286633914,
      "loss": 2.8566,
      "step": 157853
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.3131422996520996,
      "learning_rate": 0.0001351976247850271,
      "loss": 3.1802,
      "step": 157854
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.507908821105957,
      "learning_rate": 0.00013519420673435478,
      "loss": 2.7697,
      "step": 157855
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.4102606773376465,
      "learning_rate": 0.0001351907887143231,
      "loss": 2.8567,
      "step": 157856
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.956960678100586,
      "learning_rate": 0.00013518737072493254,
      "loss": 2.7367,
      "step": 157857
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1590089797973633,
      "learning_rate": 0.0001351839527661837,
      "loss": 3.1198,
      "step": 157858
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.107656717300415,
      "learning_rate": 0.00013518053483807735,
      "loss": 3.0701,
      "step": 157859
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.61442756652832,
      "learning_rate": 0.00013517711694061406,
      "loss": 2.6278,
      "step": 157860
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.794025421142578,
      "learning_rate": 0.00013517369907379434,
      "loss": 2.8539,
      "step": 157861
    },
    {
      "epoch": 2.06,
      "grad_norm": 5.599438190460205,
      "learning_rate": 0.00013517028123761904,
      "loss": 2.8503,
      "step": 157862
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.276787757873535,
      "learning_rate": 0.0001351668634320886,
      "loss": 2.8532,
      "step": 157863
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3767988681793213,
      "learning_rate": 0.00013516344565720384,
      "loss": 2.7787,
      "step": 157864
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.6540493965148926,
      "learning_rate": 0.00013516002791296534,
      "loss": 3.0345,
      "step": 157865
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.589646816253662,
      "learning_rate": 0.0001351566101993737,
      "loss": 2.8301,
      "step": 157866
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.512179374694824,
      "learning_rate": 0.00013515319251642942,
      "loss": 2.989,
      "step": 157867
    },
    {
      "epoch": 2.06,
      "grad_norm": 1.869786024093628,
      "learning_rate": 0.0001351497748641334,
      "loss": 2.817,
      "step": 157868
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.103780746459961,
      "learning_rate": 0.00013514635724248602,
      "loss": 2.5763,
      "step": 157869
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3069844245910645,
      "learning_rate": 0.0001351429396514882,
      "loss": 3.0311,
      "step": 157870
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.324281692504883,
      "learning_rate": 0.00013513952209114036,
      "loss": 2.8155,
      "step": 157871
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.525615930557251,
      "learning_rate": 0.00013513610456144325,
      "loss": 2.9414,
      "step": 157872
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5212996006011963,
      "learning_rate": 0.0001351326870623973,
      "loss": 2.9101,
      "step": 157873
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.0951309204101562,
      "learning_rate": 0.00013512926959400345,
      "loss": 2.9341,
      "step": 157874
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.444369316101074,
      "learning_rate": 0.00013512585215626202,
      "loss": 2.9836,
      "step": 157875
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.368612051010132,
      "learning_rate": 0.00013512243474917394,
      "loss": 3.0226,
      "step": 157876
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4242212772369385,
      "learning_rate": 0.0001351190173727396,
      "loss": 3.1539,
      "step": 157877
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.69297194480896,
      "learning_rate": 0.00013511560002695997,
      "loss": 2.8518,
      "step": 157878
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.078942060470581,
      "learning_rate": 0.00013511218271183525,
      "loss": 3.1434,
      "step": 157879
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.725900650024414,
      "learning_rate": 0.00013510876542736637,
      "loss": 2.9612,
      "step": 157880
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.370690107345581,
      "learning_rate": 0.0001351053481735538,
      "loss": 2.8879,
      "step": 157881
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.6562814712524414,
      "learning_rate": 0.00013510193095039833,
      "loss": 2.9915,
      "step": 157882
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.837326765060425,
      "learning_rate": 0.00013509851375790048,
      "loss": 2.869,
      "step": 157883
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.360292673110962,
      "learning_rate": 0.00013509509659606107,
      "loss": 2.5224,
      "step": 157884
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.393002510070801,
      "learning_rate": 0.00013509167946488044,
      "loss": 2.8971,
      "step": 157885
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.364758014678955,
      "learning_rate": 0.00013508826236435946,
      "loss": 2.899,
      "step": 157886
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8525404930114746,
      "learning_rate": 0.0001350848452944986,
      "loss": 2.8087,
      "step": 157887
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2113001346588135,
      "learning_rate": 0.00013508142825529866,
      "loss": 2.8246,
      "step": 157888
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4047958850860596,
      "learning_rate": 0.00013507801124676007,
      "loss": 2.929,
      "step": 157889
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.014970302581787,
      "learning_rate": 0.00013507459426888386,
      "loss": 2.963,
      "step": 157890
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1568219661712646,
      "learning_rate": 0.00013507117732167016,
      "loss": 2.917,
      "step": 157891
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.663463592529297,
      "learning_rate": 0.00013506776040511992,
      "loss": 2.9996,
      "step": 157892
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5627822875976562,
      "learning_rate": 0.00013506434351923363,
      "loss": 2.7597,
      "step": 157893
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.394404649734497,
      "learning_rate": 0.0001350609266640121,
      "loss": 2.7721,
      "step": 157894
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3491897583007812,
      "learning_rate": 0.0001350575098394557,
      "loss": 2.8967,
      "step": 157895
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.015070915222168,
      "learning_rate": 0.00013505409304556549,
      "loss": 3.0498,
      "step": 157896
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.0577569007873535,
      "learning_rate": 0.0001350506762823416,
      "loss": 3.0886,
      "step": 157897
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8861777782440186,
      "learning_rate": 0.00013504725954978502,
      "loss": 2.9088,
      "step": 157898
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2286643981933594,
      "learning_rate": 0.00013504384284789613,
      "loss": 2.8987,
      "step": 157899
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.988670587539673,
      "learning_rate": 0.00013504042617667585,
      "loss": 2.787,
      "step": 157900
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.392250061035156,
      "learning_rate": 0.00013503700953612454,
      "loss": 2.9304,
      "step": 157901
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4512529373168945,
      "learning_rate": 0.00013503359292624318,
      "loss": 2.8918,
      "step": 157902
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.221076250076294,
      "learning_rate": 0.000135030176347032,
      "loss": 3.0942,
      "step": 157903
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.302907943725586,
      "learning_rate": 0.0001350267597984919,
      "loss": 2.8593,
      "step": 157904
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.36346435546875,
      "learning_rate": 0.0001350233432806233,
      "loss": 2.9987,
      "step": 157905
    },
    {
      "epoch": 2.06,
      "grad_norm": 5.239744663238525,
      "learning_rate": 0.00013501992679342713,
      "loss": 2.9074,
      "step": 157906
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.0540049076080322,
      "learning_rate": 0.0001350165103369038,
      "loss": 2.9797,
      "step": 157907
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.461430072784424,
      "learning_rate": 0.00013501309391105416,
      "loss": 2.9417,
      "step": 157908
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7562148571014404,
      "learning_rate": 0.0001350096775158785,
      "loss": 2.7671,
      "step": 157909
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.5538244247436523,
      "learning_rate": 0.0001350062611513778,
      "loss": 3.0059,
      "step": 157910
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8804640769958496,
      "learning_rate": 0.0001350028448175524,
      "loss": 2.9772,
      "step": 157911
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.276179075241089,
      "learning_rate": 0.00013499942851440323,
      "loss": 2.9857,
      "step": 157912
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8701305389404297,
      "learning_rate": 0.00013499601224193066,
      "loss": 2.9973,
      "step": 157913
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.656320810317993,
      "learning_rate": 0.00013499259600013562,
      "loss": 2.9696,
      "step": 157914
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4481756687164307,
      "learning_rate": 0.0001349891797890184,
      "loss": 2.904,
      "step": 157915
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6030683517456055,
      "learning_rate": 0.00013498576360857991,
      "loss": 2.9392,
      "step": 157916
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5634562969207764,
      "learning_rate": 0.00013498234745882058,
      "loss": 2.8781,
      "step": 157917
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8664400577545166,
      "learning_rate": 0.00013497893133974123,
      "loss": 3.0321,
      "step": 157918
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.244629144668579,
      "learning_rate": 0.00013497551525134234,
      "loss": 3.0087,
      "step": 157919
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.934807777404785,
      "learning_rate": 0.0001349720991936248,
      "loss": 2.5874,
      "step": 157920
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.117532968521118,
      "learning_rate": 0.00013496868316658882,
      "loss": 3.0487,
      "step": 157921
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.828871726989746,
      "learning_rate": 0.00013496526717023544,
      "loss": 2.9809,
      "step": 157922
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7197022438049316,
      "learning_rate": 0.00013496185120456501,
      "loss": 2.8489,
      "step": 157923
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.2591209411621094,
      "learning_rate": 0.00013495843526957837,
      "loss": 2.8203,
      "step": 157924
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5388364791870117,
      "learning_rate": 0.000134955019365276,
      "loss": 2.8928,
      "step": 157925
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.448094367980957,
      "learning_rate": 0.0001349516034916587,
      "loss": 3.0572,
      "step": 157926
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.881220817565918,
      "learning_rate": 0.000134948187648727,
      "loss": 3.1474,
      "step": 157927
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.047628402709961,
      "learning_rate": 0.00013494477183648157,
      "loss": 2.9937,
      "step": 157928
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2129671573638916,
      "learning_rate": 0.0001349413560549229,
      "loss": 3.0163,
      "step": 157929
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3016765117645264,
      "learning_rate": 0.0001349379403040519,
      "loss": 2.9621,
      "step": 157930
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.0406477451324463,
      "learning_rate": 0.00013493452458386888,
      "loss": 2.8803,
      "step": 157931
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.262505531311035,
      "learning_rate": 0.00013493110889437477,
      "loss": 3.0703,
      "step": 157932
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.67110013961792,
      "learning_rate": 0.00013492769323557012,
      "loss": 2.9189,
      "step": 157933
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4806816577911377,
      "learning_rate": 0.00013492427760745547,
      "loss": 2.9589,
      "step": 157934
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.382131576538086,
      "learning_rate": 0.00013492086201003143,
      "loss": 2.9717,
      "step": 157935
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4731786251068115,
      "learning_rate": 0.00013491744644329886,
      "loss": 3.0934,
      "step": 157936
    },
    {
      "epoch": 2.06,
      "grad_norm": 5.6348490715026855,
      "learning_rate": 0.0001349140309072581,
      "loss": 2.7954,
      "step": 157937
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.754396677017212,
      "learning_rate": 0.00013491061540191006,
      "loss": 2.8997,
      "step": 157938
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.55720591545105,
      "learning_rate": 0.00013490719992725525,
      "loss": 2.8488,
      "step": 157939
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.796746253967285,
      "learning_rate": 0.0001349037844832942,
      "loss": 2.9243,
      "step": 157940
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.528778076171875,
      "learning_rate": 0.0001349003690700278,
      "loss": 3.0716,
      "step": 157941
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.449807167053223,
      "learning_rate": 0.0001348969536874565,
      "loss": 2.8725,
      "step": 157942
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3235175609588623,
      "learning_rate": 0.00013489353833558086,
      "loss": 3.0524,
      "step": 157943
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.816605806350708,
      "learning_rate": 0.00013489012301440173,
      "loss": 2.7757,
      "step": 157944
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6428329944610596,
      "learning_rate": 0.00013488670772391967,
      "loss": 2.9017,
      "step": 157945
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4207019805908203,
      "learning_rate": 0.00013488329246413516,
      "loss": 2.7413,
      "step": 157946
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.755711078643799,
      "learning_rate": 0.00013487987723504908,
      "loss": 2.8169,
      "step": 157947
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.2407026290893555,
      "learning_rate": 0.00013487646203666197,
      "loss": 2.7066,
      "step": 157948
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.42284893989563,
      "learning_rate": 0.00013487304686897429,
      "loss": 2.9115,
      "step": 157949
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.541163206100464,
      "learning_rate": 0.00013486963173198693,
      "loss": 3.1521,
      "step": 157950
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.568579912185669,
      "learning_rate": 0.00013486621662570048,
      "loss": 3.0022,
      "step": 157951
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4367098808288574,
      "learning_rate": 0.00013486280155011534,
      "loss": 2.9322,
      "step": 157952
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4780051708221436,
      "learning_rate": 0.0001348593865052325,
      "loss": 2.8548,
      "step": 157953
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.406233549118042,
      "learning_rate": 0.00013485597149105226,
      "loss": 2.8534,
      "step": 157954
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.3289575576782227,
      "learning_rate": 0.00013485255650757557,
      "loss": 3.2495,
      "step": 157955
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.1883835792541504,
      "learning_rate": 0.00013484914155480286,
      "loss": 2.787,
      "step": 157956
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.170100212097168,
      "learning_rate": 0.0001348457266327348,
      "loss": 2.8181,
      "step": 157957
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.423149824142456,
      "learning_rate": 0.00013484231174137198,
      "loss": 2.7673,
      "step": 157958
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.1996872425079346,
      "learning_rate": 0.00013483889688071518,
      "loss": 2.6749,
      "step": 157959
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.1531405448913574,
      "learning_rate": 0.00013483548205076484,
      "loss": 3.0192,
      "step": 157960
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.722683906555176,
      "learning_rate": 0.0001348320672515218,
      "loss": 2.734,
      "step": 157961
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.64082670211792,
      "learning_rate": 0.00013482865248298648,
      "loss": 2.9178,
      "step": 157962
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.09057354927063,
      "learning_rate": 0.00013482523774515986,
      "loss": 2.8348,
      "step": 157963
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4453861713409424,
      "learning_rate": 0.00013482182303804213,
      "loss": 2.8266,
      "step": 157964
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.535330295562744,
      "learning_rate": 0.00013481840836163426,
      "loss": 2.9195,
      "step": 157965
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5256495475769043,
      "learning_rate": 0.00013481499371593663,
      "loss": 2.9001,
      "step": 157966
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7418601512908936,
      "learning_rate": 0.00013481157910095012,
      "loss": 2.9374,
      "step": 157967
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.189485549926758,
      "learning_rate": 0.0001348081645166752,
      "loss": 3.0388,
      "step": 157968
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2679479122161865,
      "learning_rate": 0.00013480474996311272,
      "loss": 2.8253,
      "step": 157969
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6773369312286377,
      "learning_rate": 0.00013480133544026298,
      "loss": 2.9478,
      "step": 157970
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1360037326812744,
      "learning_rate": 0.00013479792094812687,
      "loss": 2.9872,
      "step": 157971
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.774726390838623,
      "learning_rate": 0.00013479450648670483,
      "loss": 3.0306,
      "step": 157972
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2733495235443115,
      "learning_rate": 0.00013479109205599772,
      "loss": 3.2091,
      "step": 157973
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.0576303005218506,
      "learning_rate": 0.000134787677656006,
      "loss": 2.8791,
      "step": 157974
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.757478713989258,
      "learning_rate": 0.00013478426328673054,
      "loss": 2.8052,
      "step": 157975
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4362289905548096,
      "learning_rate": 0.0001347808489481716,
      "loss": 3.0331,
      "step": 157976
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.542325019836426,
      "learning_rate": 0.00013477743464033014,
      "loss": 3.099,
      "step": 157977
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.604345321655273,
      "learning_rate": 0.00013477402036320656,
      "loss": 3.0655,
      "step": 157978
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.76646089553833,
      "learning_rate": 0.00013477060611680167,
      "loss": 2.8112,
      "step": 157979
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2931065559387207,
      "learning_rate": 0.000134767191901116,
      "loss": 3.032,
      "step": 157980
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.4650063514709473,
      "learning_rate": 0.00013476377771615044,
      "loss": 2.9667,
      "step": 157981
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.495088815689087,
      "learning_rate": 0.00013476036356190515,
      "loss": 2.9597,
      "step": 157982
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2302377223968506,
      "learning_rate": 0.0001347569494383812,
      "loss": 2.8425,
      "step": 157983
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.645347595214844,
      "learning_rate": 0.0001347535353455789,
      "loss": 3.1607,
      "step": 157984
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.183241367340088,
      "learning_rate": 0.00013475012128349914,
      "loss": 2.9042,
      "step": 157985
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5710394382476807,
      "learning_rate": 0.00013474670725214236,
      "loss": 3.1264,
      "step": 157986
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2698123455047607,
      "learning_rate": 0.0001347432932515095,
      "loss": 2.7788,
      "step": 157987
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.564800262451172,
      "learning_rate": 0.00013473987928160075,
      "loss": 2.7439,
      "step": 157988
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3977692127227783,
      "learning_rate": 0.0001347364653424171,
      "loss": 3.0139,
      "step": 157989
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.693866491317749,
      "learning_rate": 0.00013473305143395895,
      "loss": 2.8825,
      "step": 157990
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5889711380004883,
      "learning_rate": 0.00013472963755622715,
      "loss": 2.9043,
      "step": 157991
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7696361541748047,
      "learning_rate": 0.00013472622370922212,
      "loss": 2.9613,
      "step": 157992
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.477757215499878,
      "learning_rate": 0.00013472280989294482,
      "loss": 3.1861,
      "step": 157993
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.31870436668396,
      "learning_rate": 0.0001347193961073955,
      "loss": 2.8647,
      "step": 157994
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2760326862335205,
      "learning_rate": 0.00013471598235257503,
      "loss": 2.9971,
      "step": 157995
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.9044692516326904,
      "learning_rate": 0.00013471256862848387,
      "loss": 3.178,
      "step": 157996
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.524306535720825,
      "learning_rate": 0.00013470915493512286,
      "loss": 2.7272,
      "step": 157997
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5786075592041016,
      "learning_rate": 0.00013470574127249244,
      "loss": 2.9831,
      "step": 157998
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.9183945655822754,
      "learning_rate": 0.00013470232764059358,
      "loss": 2.9951,
      "step": 157999
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2609200477600098,
      "learning_rate": 0.00013469891403942644,
      "loss": 2.9975,
      "step": 158000
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.405824661254883,
      "learning_rate": 0.00013469550046899198,
      "loss": 2.8694,
      "step": 158001
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7004220485687256,
      "learning_rate": 0.0001346920869292907,
      "loss": 2.704,
      "step": 158002
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.419315814971924,
      "learning_rate": 0.00013468867342032333,
      "loss": 2.9786,
      "step": 158003
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6334855556488037,
      "learning_rate": 0.00013468525994209037,
      "loss": 2.7633,
      "step": 158004
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.847477674484253,
      "learning_rate": 0.00013468184649459276,
      "loss": 2.7987,
      "step": 158005
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.393399477005005,
      "learning_rate": 0.0001346784330778307,
      "loss": 2.8885,
      "step": 158006
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7846338748931885,
      "learning_rate": 0.00013467501969180516,
      "loss": 2.7691,
      "step": 158007
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.542978048324585,
      "learning_rate": 0.0001346716063365165,
      "loss": 3.0168,
      "step": 158008
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5052895545959473,
      "learning_rate": 0.00013466819301196568,
      "loss": 2.8806,
      "step": 158009
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8898439407348633,
      "learning_rate": 0.00013466477971815302,
      "loss": 3.1076,
      "step": 158010
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5751683712005615,
      "learning_rate": 0.0001346613664550794,
      "loss": 3.1088,
      "step": 158011
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.959132194519043,
      "learning_rate": 0.00013465795322274536,
      "loss": 2.7104,
      "step": 158012
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3972814083099365,
      "learning_rate": 0.00013465454002115152,
      "loss": 2.846,
      "step": 158013
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2278635501861572,
      "learning_rate": 0.00013465112685029845,
      "loss": 2.8186,
      "step": 158014
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1886675357818604,
      "learning_rate": 0.00013464771371018695,
      "loss": 2.8928,
      "step": 158015
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.323221445083618,
      "learning_rate": 0.00013464430060081744,
      "loss": 2.7742,
      "step": 158016
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5609543323516846,
      "learning_rate": 0.00013464088752219078,
      "loss": 3.0989,
      "step": 158017
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2702178955078125,
      "learning_rate": 0.0001346374744743075,
      "loss": 2.7931,
      "step": 158018
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5277764797210693,
      "learning_rate": 0.00013463406145716825,
      "loss": 2.9062,
      "step": 158019
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7169923782348633,
      "learning_rate": 0.00013463064847077353,
      "loss": 2.9638,
      "step": 158020
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.05371356010437,
      "learning_rate": 0.00013462723551512423,
      "loss": 2.8771,
      "step": 158021
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.367297649383545,
      "learning_rate": 0.0001346238225902207,
      "loss": 3.0321,
      "step": 158022
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.4417781829833984,
      "learning_rate": 0.00013462040969606387,
      "loss": 2.8533,
      "step": 158023
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6061272621154785,
      "learning_rate": 0.00013461699683265418,
      "loss": 2.9208,
      "step": 158024
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.256781816482544,
      "learning_rate": 0.00013461358399999223,
      "loss": 3.0463,
      "step": 158025
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.164827585220337,
      "learning_rate": 0.00013461017119807887,
      "loss": 2.9538,
      "step": 158026
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.72033953666687,
      "learning_rate": 0.00013460675842691457,
      "loss": 3.2039,
      "step": 158027
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4721319675445557,
      "learning_rate": 0.0001346033456864999,
      "loss": 2.8261,
      "step": 158028
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5759057998657227,
      "learning_rate": 0.00013459993297683566,
      "loss": 2.836,
      "step": 158029
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.279557943344116,
      "learning_rate": 0.00013459652029792246,
      "loss": 2.7749,
      "step": 158030
    },
    {
      "epoch": 2.06,
      "grad_norm": 1.9885919094085693,
      "learning_rate": 0.00013459310764976079,
      "loss": 3.0376,
      "step": 158031
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3895888328552246,
      "learning_rate": 0.00013458969503235145,
      "loss": 2.5293,
      "step": 158032
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.436876058578491,
      "learning_rate": 0.00013458628244569502,
      "loss": 2.8544,
      "step": 158033
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5942578315734863,
      "learning_rate": 0.00013458286988979204,
      "loss": 3.0867,
      "step": 158034
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2728664875030518,
      "learning_rate": 0.0001345794573646433,
      "loss": 3.1379,
      "step": 158035
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1322696208953857,
      "learning_rate": 0.00013457604487024938,
      "loss": 3.2347,
      "step": 158036
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2629008293151855,
      "learning_rate": 0.00013457263240661081,
      "loss": 2.8184,
      "step": 158037
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5843865871429443,
      "learning_rate": 0.00013456921997372838,
      "loss": 3.0922,
      "step": 158038
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3900885581970215,
      "learning_rate": 0.0001345658075716026,
      "loss": 3.0394,
      "step": 158039
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.340087890625,
      "learning_rate": 0.0001345623952002342,
      "loss": 3.1892,
      "step": 158040
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2716705799102783,
      "learning_rate": 0.0001345589828596238,
      "loss": 2.9196,
      "step": 158041
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.0846123695373535,
      "learning_rate": 0.00013455557054977202,
      "loss": 2.6229,
      "step": 158042
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.0681471824645996,
      "learning_rate": 0.00013455215827067935,
      "loss": 2.8989,
      "step": 158043
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.104949951171875,
      "learning_rate": 0.00013454874602234667,
      "loss": 3.1715,
      "step": 158044
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7974157333374023,
      "learning_rate": 0.00013454533380477442,
      "loss": 3.365,
      "step": 158045
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2728540897369385,
      "learning_rate": 0.0001345419216179634,
      "loss": 2.9948,
      "step": 158046
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.26051664352417,
      "learning_rate": 0.00013453850946191408,
      "loss": 2.9529,
      "step": 158047
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.941669464111328,
      "learning_rate": 0.00013453509733662736,
      "loss": 2.8002,
      "step": 158048
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3789572715759277,
      "learning_rate": 0.00013453168524210345,
      "loss": 2.7406,
      "step": 158049
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.239896297454834,
      "learning_rate": 0.0001345282731783434,
      "loss": 3.159,
      "step": 158050
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2374863624572754,
      "learning_rate": 0.0001345248611453475,
      "loss": 2.8997,
      "step": 158051
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.0051236152648926,
      "learning_rate": 0.0001345214491431167,
      "loss": 2.9327,
      "step": 158052
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5065770149230957,
      "learning_rate": 0.00013451803717165134,
      "loss": 2.9629,
      "step": 158053
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2084836959838867,
      "learning_rate": 0.00013451462523095243,
      "loss": 2.954,
      "step": 158054
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.401456832885742,
      "learning_rate": 0.0001345112133210202,
      "loss": 2.7816,
      "step": 158055
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.948617458343506,
      "learning_rate": 0.00013450780144185553,
      "loss": 3.0911,
      "step": 158056
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.367594003677368,
      "learning_rate": 0.00013450438959345884,
      "loss": 2.619,
      "step": 158057
    },
    {
      "epoch": 2.06,
      "grad_norm": 1.9969414472579956,
      "learning_rate": 0.00013450097777583108,
      "loss": 2.9722,
      "step": 158058
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4138479232788086,
      "learning_rate": 0.00013449756598897256,
      "loss": 3.0622,
      "step": 158059
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6083309650421143,
      "learning_rate": 0.00013449415423288433,
      "loss": 3.1112,
      "step": 158060
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4975340366363525,
      "learning_rate": 0.0001344907425075665,
      "loss": 2.9803,
      "step": 158061
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3791089057922363,
      "learning_rate": 0.0001344873308130201,
      "loss": 2.9131,
      "step": 158062
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3498098850250244,
      "learning_rate": 0.0001344839191492455,
      "loss": 2.998,
      "step": 158063
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3905746936798096,
      "learning_rate": 0.0001344805075162436,
      "loss": 2.7929,
      "step": 158064
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.378746747970581,
      "learning_rate": 0.00013447709591401477,
      "loss": 3.1363,
      "step": 158065
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.671670436859131,
      "learning_rate": 0.00013447368434256,
      "loss": 3.0334,
      "step": 158066
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.037123441696167,
      "learning_rate": 0.00013447027280187945,
      "loss": 3.0556,
      "step": 158067
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.32047963142395,
      "learning_rate": 0.00013446686129197415,
      "loss": 2.8991,
      "step": 158068
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.234619140625,
      "learning_rate": 0.00013446344981284445,
      "loss": 2.9672,
      "step": 158069
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.418841600418091,
      "learning_rate": 0.00013446003836449125,
      "loss": 2.9221,
      "step": 158070
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6046030521392822,
      "learning_rate": 0.00013445662694691493,
      "loss": 2.9373,
      "step": 158071
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.004106044769287,
      "learning_rate": 0.00013445321556011645,
      "loss": 2.7125,
      "step": 158072
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.024709939956665,
      "learning_rate": 0.00013444980420409606,
      "loss": 3.0626,
      "step": 158073
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5005009174346924,
      "learning_rate": 0.00013444639287885465,
      "loss": 2.9067,
      "step": 158074
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.92120623588562,
      "learning_rate": 0.00013444298158439268,
      "loss": 2.976,
      "step": 158075
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.858074426651001,
      "learning_rate": 0.00013443957032071102,
      "loss": 3.0824,
      "step": 158076
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.981461524963379,
      "learning_rate": 0.00013443615908781005,
      "loss": 3.0821,
      "step": 158077
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4249701499938965,
      "learning_rate": 0.00013443274788569058,
      "loss": 2.5898,
      "step": 158078
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.902421712875366,
      "learning_rate": 0.00013442933671435327,
      "loss": 2.8721,
      "step": 158079
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.50541353225708,
      "learning_rate": 0.00013442592557379862,
      "loss": 3.0734,
      "step": 158080
    },
    {
      "epoch": 2.06,
      "grad_norm": 6.057198524475098,
      "learning_rate": 0.00013442251446402724,
      "loss": 3.0202,
      "step": 158081
    },
    {
      "epoch": 2.06,
      "grad_norm": 5.386682033538818,
      "learning_rate": 0.00013441910338503994,
      "loss": 2.8257,
      "step": 158082
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.60583758354187,
      "learning_rate": 0.0001344156923368371,
      "loss": 2.9961,
      "step": 158083
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.0068752765655518,
      "learning_rate": 0.00013441228131941965,
      "loss": 3.1852,
      "step": 158084
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.4556870460510254,
      "learning_rate": 0.0001344088703327881,
      "loss": 2.994,
      "step": 158085
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.6611881256103516,
      "learning_rate": 0.00013440545937694307,
      "loss": 3.0809,
      "step": 158086
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.1412744522094727,
      "learning_rate": 0.00013440204845188502,
      "loss": 3.0388,
      "step": 158087
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2707784175872803,
      "learning_rate": 0.00013439863755761492,
      "loss": 3.0428,
      "step": 158088
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3515584468841553,
      "learning_rate": 0.0001343952266941331,
      "loss": 3.0026,
      "step": 158089
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2353131771087646,
      "learning_rate": 0.00013439181586144048,
      "loss": 2.8242,
      "step": 158090
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2645914554595947,
      "learning_rate": 0.00013438840505953755,
      "loss": 3.0364,
      "step": 158091
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.390957832336426,
      "learning_rate": 0.0001343849942884249,
      "loss": 2.9562,
      "step": 158092
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2708864212036133,
      "learning_rate": 0.00013438158354810314,
      "loss": 3.0585,
      "step": 158093
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.737675189971924,
      "learning_rate": 0.000134378172838573,
      "loss": 2.9655,
      "step": 158094
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.216931104660034,
      "learning_rate": 0.00013437476215983504,
      "loss": 3.0676,
      "step": 158095
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1887145042419434,
      "learning_rate": 0.00013437135151189003,
      "loss": 2.7547,
      "step": 158096
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.892768621444702,
      "learning_rate": 0.00013436794089473854,
      "loss": 2.9224,
      "step": 158097
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2102954387664795,
      "learning_rate": 0.00013436453030838118,
      "loss": 2.8735,
      "step": 158098
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3626670837402344,
      "learning_rate": 0.00013436111975281844,
      "loss": 2.9213,
      "step": 158099
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5277116298675537,
      "learning_rate": 0.00013435770922805122,
      "loss": 3.0079,
      "step": 158100
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2538204193115234,
      "learning_rate": 0.00013435429873407987,
      "loss": 2.8917,
      "step": 158101
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.4554803371429443,
      "learning_rate": 0.00013435088827090536,
      "loss": 3.1025,
      "step": 158102
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5415115356445312,
      "learning_rate": 0.0001343474778385281,
      "loss": 2.7623,
      "step": 158103
    },
    {
      "epoch": 2.06,
      "grad_norm": 7.626174449920654,
      "learning_rate": 0.0001343440674369488,
      "loss": 2.6921,
      "step": 158104
    },
    {
      "epoch": 2.06,
      "grad_norm": 5.2037672996521,
      "learning_rate": 0.00013434065706616796,
      "loss": 2.8993,
      "step": 158105
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.50394868850708,
      "learning_rate": 0.0001343372467261864,
      "loss": 3.1534,
      "step": 158106
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5389509201049805,
      "learning_rate": 0.0001343338364170046,
      "loss": 2.8363,
      "step": 158107
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.835977077484131,
      "learning_rate": 0.00013433042613862333,
      "loss": 2.7491,
      "step": 158108
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6074397563934326,
      "learning_rate": 0.00013432701589104318,
      "loss": 2.9299,
      "step": 158109
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.1034576892852783,
      "learning_rate": 0.00013432360567426467,
      "loss": 3.257,
      "step": 158110
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.359311580657959,
      "learning_rate": 0.00013432019548828864,
      "loss": 3.1666,
      "step": 158111
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2398617267608643,
      "learning_rate": 0.0001343167853331156,
      "loss": 2.8645,
      "step": 158112
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.1233973503112793,
      "learning_rate": 0.00013431337520874607,
      "loss": 3.0205,
      "step": 158113
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3684024810791016,
      "learning_rate": 0.00013430996511518096,
      "loss": 2.9638,
      "step": 158114
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.150418519973755,
      "learning_rate": 0.00013430655505242072,
      "loss": 3.049,
      "step": 158115
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2911932468414307,
      "learning_rate": 0.00013430314502046594,
      "loss": 2.8471,
      "step": 158116
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5609538555145264,
      "learning_rate": 0.00013429973501931743,
      "loss": 3.1174,
      "step": 158117
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.115816354751587,
      "learning_rate": 0.0001342963250489757,
      "loss": 2.8747,
      "step": 158118
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.020498037338257,
      "learning_rate": 0.00013429291510944132,
      "loss": 2.9659,
      "step": 158119
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3145577907562256,
      "learning_rate": 0.00013428950520071514,
      "loss": 3.2274,
      "step": 158120
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.479043483734131,
      "learning_rate": 0.00013428609532279765,
      "loss": 3.0485,
      "step": 158121
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2982418537139893,
      "learning_rate": 0.00013428268547568942,
      "loss": 3.1279,
      "step": 158122
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6852076053619385,
      "learning_rate": 0.00013427927565939126,
      "loss": 3.0321,
      "step": 158123
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6195058822631836,
      "learning_rate": 0.0001342758658739036,
      "loss": 2.9167,
      "step": 158124
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3586976528167725,
      "learning_rate": 0.0001342724561192273,
      "loss": 3.1861,
      "step": 158125
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.7651162147521973,
      "learning_rate": 0.00013426904639536288,
      "loss": 2.9649,
      "step": 158126
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.511240243911743,
      "learning_rate": 0.00013426563670231097,
      "loss": 2.9719,
      "step": 158127
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.2811765670776367,
      "learning_rate": 0.00013426222704007208,
      "loss": 2.8947,
      "step": 158128
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8285892009735107,
      "learning_rate": 0.0001342588174086471,
      "loss": 3.1226,
      "step": 158129
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.172442674636841,
      "learning_rate": 0.0001342554078080364,
      "loss": 3.1204,
      "step": 158130
    },
    {
      "epoch": 2.06,
      "grad_norm": 1.9932247400283813,
      "learning_rate": 0.0001342519982382409,
      "loss": 3.0353,
      "step": 158131
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.940873622894287,
      "learning_rate": 0.00013424858869926106,
      "loss": 2.9949,
      "step": 158132
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.165666103363037,
      "learning_rate": 0.00013424517919109752,
      "loss": 2.9661,
      "step": 158133
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.460439682006836,
      "learning_rate": 0.00013424176971375083,
      "loss": 3.0124,
      "step": 158134
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.612710952758789,
      "learning_rate": 0.00013423836026722183,
      "loss": 3.0516,
      "step": 158135
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.329104900360107,
      "learning_rate": 0.00013423495085151095,
      "loss": 2.853,
      "step": 158136
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8394935131073,
      "learning_rate": 0.00013423154146661903,
      "loss": 2.8846,
      "step": 158137
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.567429304122925,
      "learning_rate": 0.00013422813211254648,
      "loss": 3.0325,
      "step": 158138
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8690884113311768,
      "learning_rate": 0.00013422472278929427,
      "loss": 2.9027,
      "step": 158139
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.847433090209961,
      "learning_rate": 0.0001342213134968626,
      "loss": 2.9764,
      "step": 158140
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3299100399017334,
      "learning_rate": 0.00013421790423525244,
      "loss": 2.9515,
      "step": 158141
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.866755485534668,
      "learning_rate": 0.00013421449500446416,
      "loss": 3.0481,
      "step": 158142
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.5367889404296875,
      "learning_rate": 0.00013421108580449867,
      "loss": 2.9968,
      "step": 158143
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.286351203918457,
      "learning_rate": 0.00013420767663535634,
      "loss": 3.0616,
      "step": 158144
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4407711029052734,
      "learning_rate": 0.00013420426749703807,
      "loss": 3.139,
      "step": 158145
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4354007244110107,
      "learning_rate": 0.00013420085838954435,
      "loss": 3.0556,
      "step": 158146
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1984732151031494,
      "learning_rate": 0.0001341974493128758,
      "loss": 2.9033,
      "step": 158147
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.2095437049865723,
      "learning_rate": 0.00013419404026703298,
      "loss": 2.8892,
      "step": 158148
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1749014854431152,
      "learning_rate": 0.00013419063125201668,
      "loss": 2.8089,
      "step": 158149
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2872796058654785,
      "learning_rate": 0.00013418722226782743,
      "loss": 2.9138,
      "step": 158150
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.415879249572754,
      "learning_rate": 0.00013418381331446597,
      "loss": 2.8267,
      "step": 158151
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3429362773895264,
      "learning_rate": 0.00013418040439193288,
      "loss": 3.0277,
      "step": 158152
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.315791606903076,
      "learning_rate": 0.00013417699550022878,
      "loss": 3.0243,
      "step": 158153
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.578848123550415,
      "learning_rate": 0.00013417358663935422,
      "loss": 2.7431,
      "step": 158154
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7444465160369873,
      "learning_rate": 0.00013417017780931,
      "loss": 2.8216,
      "step": 158155
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.1473653316497803,
      "learning_rate": 0.0001341667690100966,
      "loss": 2.8585,
      "step": 158156
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.556457757949829,
      "learning_rate": 0.00013416336024171482,
      "loss": 3.0578,
      "step": 158157
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6004257202148438,
      "learning_rate": 0.0001341599515041652,
      "loss": 3.026,
      "step": 158158
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7291884422302246,
      "learning_rate": 0.00013415654279744837,
      "loss": 2.8385,
      "step": 158159
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.162245988845825,
      "learning_rate": 0.00013415313412156488,
      "loss": 3.0893,
      "step": 158160
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.111777305603027,
      "learning_rate": 0.00013414972547651552,
      "loss": 2.6802,
      "step": 158161
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8940272331237793,
      "learning_rate": 0.00013414631686230077,
      "loss": 2.9699,
      "step": 158162
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.717694044113159,
      "learning_rate": 0.00013414290827892147,
      "loss": 3.0215,
      "step": 158163
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.234330415725708,
      "learning_rate": 0.00013413949972637814,
      "loss": 3.0404,
      "step": 158164
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4661240577697754,
      "learning_rate": 0.0001341360912046714,
      "loss": 2.8727,
      "step": 158165
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.530310869216919,
      "learning_rate": 0.00013413268271380177,
      "loss": 3.3209,
      "step": 158166
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.39434289932251,
      "learning_rate": 0.00013412927425377011,
      "loss": 2.7783,
      "step": 158167
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1295387744903564,
      "learning_rate": 0.00013412586582457688,
      "loss": 2.9276,
      "step": 158168
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.246065616607666,
      "learning_rate": 0.00013412245742622285,
      "loss": 3.021,
      "step": 158169
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.444082736968994,
      "learning_rate": 0.0001341190490587086,
      "loss": 3.1525,
      "step": 158170
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.4792869091033936,
      "learning_rate": 0.00013411564072203473,
      "loss": 3.2553,
      "step": 158171
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4098739624023438,
      "learning_rate": 0.0001341122324162018,
      "loss": 3.1698,
      "step": 158172
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.100926160812378,
      "learning_rate": 0.00013410882414121068,
      "loss": 2.931,
      "step": 158173
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.367202043533325,
      "learning_rate": 0.00013410541589706172,
      "loss": 2.8874,
      "step": 158174
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.000610589981079,
      "learning_rate": 0.00013410200768375582,
      "loss": 2.8715,
      "step": 158175
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.396939277648926,
      "learning_rate": 0.00013409859950129347,
      "loss": 2.938,
      "step": 158176
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.706150770187378,
      "learning_rate": 0.00013409519134967532,
      "loss": 2.9212,
      "step": 158177
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.1475284099578857,
      "learning_rate": 0.00013409178322890187,
      "loss": 2.8365,
      "step": 158178
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.350764274597168,
      "learning_rate": 0.00013408837513897404,
      "loss": 2.9636,
      "step": 158179
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8876824378967285,
      "learning_rate": 0.0001340849670798922,
      "loss": 2.9445,
      "step": 158180
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.613060235977173,
      "learning_rate": 0.00013408155905165721,
      "loss": 3.1167,
      "step": 158181
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.280076265335083,
      "learning_rate": 0.00013407815105426958,
      "loss": 2.5807,
      "step": 158182
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.0693657398223877,
      "learning_rate": 0.00013407474308773,
      "loss": 3.3149,
      "step": 158183
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.589776039123535,
      "learning_rate": 0.00013407133515203886,
      "loss": 2.7976,
      "step": 158184
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2750518321990967,
      "learning_rate": 0.00013406792724719716,
      "loss": 3.0068,
      "step": 158185
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.550387144088745,
      "learning_rate": 0.00013406451937320526,
      "loss": 3.0473,
      "step": 158186
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.496933698654175,
      "learning_rate": 0.00013406111153006398,
      "loss": 3.3147,
      "step": 158187
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3387274742126465,
      "learning_rate": 0.00013405770371777384,
      "loss": 2.8279,
      "step": 158188
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.109098434448242,
      "learning_rate": 0.00013405429593633556,
      "loss": 2.9138,
      "step": 158189
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7200353145599365,
      "learning_rate": 0.0001340508881857496,
      "loss": 2.8284,
      "step": 158190
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.9840197563171387,
      "learning_rate": 0.0001340474804660168,
      "loss": 2.8725,
      "step": 158191
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.510401725769043,
      "learning_rate": 0.00013404407277713763,
      "loss": 3.0013,
      "step": 158192
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.9163925647735596,
      "learning_rate": 0.0001340406651191129,
      "loss": 3.0117,
      "step": 158193
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.0464587211608887,
      "learning_rate": 0.00013403725749194312,
      "loss": 2.8597,
      "step": 158194
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3047425746917725,
      "learning_rate": 0.00013403384989562898,
      "loss": 2.9843,
      "step": 158195
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7202088832855225,
      "learning_rate": 0.00013403044233017093,
      "loss": 3.1779,
      "step": 158196
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2101142406463623,
      "learning_rate": 0.00013402703479556988,
      "loss": 2.9924,
      "step": 158197
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.080984354019165,
      "learning_rate": 0.00013402362729182622,
      "loss": 3.0806,
      "step": 158198
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5542678833007812,
      "learning_rate": 0.00013402021981894082,
      "loss": 3.0763,
      "step": 158199
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.0707943439483643,
      "learning_rate": 0.00013401681237691423,
      "loss": 3.1349,
      "step": 158200
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.504473924636841,
      "learning_rate": 0.00013401340496574688,
      "loss": 2.8647,
      "step": 158201
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.1431398391723633,
      "learning_rate": 0.0001340099975854397,
      "loss": 3.2412,
      "step": 158202
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8389992713928223,
      "learning_rate": 0.0001340065902359932,
      "loss": 2.8113,
      "step": 158203
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.486785888671875,
      "learning_rate": 0.00013400318291740792,
      "loss": 2.9093,
      "step": 158204
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.671139717102051,
      "learning_rate": 0.00013399977562968465,
      "loss": 2.874,
      "step": 158205
    },
    {
      "epoch": 2.06,
      "grad_norm": 1.9999133348464966,
      "learning_rate": 0.000133996368372824,
      "loss": 2.8084,
      "step": 158206
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5722432136535645,
      "learning_rate": 0.00013399296114682642,
      "loss": 3.3278,
      "step": 158207
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.547084331512451,
      "learning_rate": 0.0001339895539516928,
      "loss": 2.8875,
      "step": 158208
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2621259689331055,
      "learning_rate": 0.00013398614678742355,
      "loss": 2.7284,
      "step": 158209
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2064132690429688,
      "learning_rate": 0.0001339827396540195,
      "loss": 2.9034,
      "step": 158210
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2774064540863037,
      "learning_rate": 0.0001339793325514812,
      "loss": 3.0674,
      "step": 158211
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.404017925262451,
      "learning_rate": 0.00013397592547980918,
      "loss": 3.0671,
      "step": 158212
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.236288070678711,
      "learning_rate": 0.00013397251843900426,
      "loss": 2.9546,
      "step": 158213
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3634018898010254,
      "learning_rate": 0.000133969111429067,
      "loss": 3.0541,
      "step": 158214
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4118869304656982,
      "learning_rate": 0.0001339657044499979,
      "loss": 3.005,
      "step": 158215
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.523930788040161,
      "learning_rate": 0.0001339622975017978,
      "loss": 2.7877,
      "step": 158216
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2171506881713867,
      "learning_rate": 0.00013395889058446725,
      "loss": 2.9589,
      "step": 158217
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5877227783203125,
      "learning_rate": 0.00013395548369800678,
      "loss": 2.8975,
      "step": 158218
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7892799377441406,
      "learning_rate": 0.00013395207684241723,
      "loss": 2.8965,
      "step": 158219
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6451854705810547,
      "learning_rate": 0.00013394867001769914,
      "loss": 3.0067,
      "step": 158220
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2327699661254883,
      "learning_rate": 0.00013394526322385298,
      "loss": 2.7943,
      "step": 158221
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.2885916233062744,
      "learning_rate": 0.00013394185646087964,
      "loss": 2.798,
      "step": 158222
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.004636764526367,
      "learning_rate": 0.00013393844972877956,
      "loss": 3.124,
      "step": 158223
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.541501045227051,
      "learning_rate": 0.0001339350430275536,
      "loss": 2.911,
      "step": 158224
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.2285969257354736,
      "learning_rate": 0.00013393163635720218,
      "loss": 3.0516,
      "step": 158225
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.492567777633667,
      "learning_rate": 0.00013392822971772602,
      "loss": 2.8433,
      "step": 158226
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.4812161922454834,
      "learning_rate": 0.00013392482310912565,
      "loss": 2.9088,
      "step": 158227
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.60719633102417,
      "learning_rate": 0.00013392141653140188,
      "loss": 3.0387,
      "step": 158228
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.552281618118286,
      "learning_rate": 0.00013391800998455516,
      "loss": 2.8457,
      "step": 158229
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.021416187286377,
      "learning_rate": 0.00013391460346858633,
      "loss": 3.1698,
      "step": 158230
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.0873847007751465,
      "learning_rate": 0.00013391119698349588,
      "loss": 3.0289,
      "step": 158231
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.849558353424072,
      "learning_rate": 0.0001339077905292845,
      "loss": 3.0906,
      "step": 158232
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.074704647064209,
      "learning_rate": 0.00013390438410595268,
      "loss": 2.9608,
      "step": 158233
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4429643154144287,
      "learning_rate": 0.00013390097771350128,
      "loss": 3.0654,
      "step": 158234
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6457226276397705,
      "learning_rate": 0.0001338975713519307,
      "loss": 3.065,
      "step": 158235
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6578004360198975,
      "learning_rate": 0.00013389416502124183,
      "loss": 2.6874,
      "step": 158236
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.611746072769165,
      "learning_rate": 0.0001338907587214352,
      "loss": 3.0034,
      "step": 158237
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.382080078125,
      "learning_rate": 0.00013388735245251136,
      "loss": 2.9588,
      "step": 158238
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.315056324005127,
      "learning_rate": 0.0001338839462144709,
      "loss": 2.9967,
      "step": 158239
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.436150550842285,
      "learning_rate": 0.00013388054000731462,
      "loss": 2.7418,
      "step": 158240
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.5300216674804688,
      "learning_rate": 0.00013387713383104303,
      "loss": 2.931,
      "step": 158241
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.0827255249023438,
      "learning_rate": 0.00013387372768565691,
      "loss": 3.0958,
      "step": 158242
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3534326553344727,
      "learning_rate": 0.0001338703215711568,
      "loss": 2.8511,
      "step": 158243
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2422220706939697,
      "learning_rate": 0.00013386691548754334,
      "loss": 2.8944,
      "step": 158244
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.061518907546997,
      "learning_rate": 0.00013386350943481705,
      "loss": 2.9383,
      "step": 158245
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5386602878570557,
      "learning_rate": 0.00013386010341297875,
      "loss": 2.7785,
      "step": 158246
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1849169731140137,
      "learning_rate": 0.00013385669742202891,
      "loss": 2.7908,
      "step": 158247
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.0151360034942627,
      "learning_rate": 0.00013385329146196835,
      "loss": 2.6719,
      "step": 158248
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3102495670318604,
      "learning_rate": 0.00013384988553279763,
      "loss": 2.8335,
      "step": 158249
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.177410840988159,
      "learning_rate": 0.00013384647963451732,
      "loss": 2.8458,
      "step": 158250
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.853801965713501,
      "learning_rate": 0.00013384307376712796,
      "loss": 3.0828,
      "step": 158251
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3312900066375732,
      "learning_rate": 0.00013383966793063045,
      "loss": 3.1115,
      "step": 158252
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6658847332000732,
      "learning_rate": 0.00013383626212502513,
      "loss": 2.933,
      "step": 158253
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.6421432495117188,
      "learning_rate": 0.00013383285635031294,
      "loss": 2.901,
      "step": 158254
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5071732997894287,
      "learning_rate": 0.00013382945060649436,
      "loss": 3.0034,
      "step": 158255
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.44338321685791,
      "learning_rate": 0.00013382604489356998,
      "loss": 2.8791,
      "step": 158256
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3556625843048096,
      "learning_rate": 0.00013382263921154038,
      "loss": 2.9041,
      "step": 158257
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.570483922958374,
      "learning_rate": 0.00013381923356040642,
      "loss": 2.8256,
      "step": 158258
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2894845008850098,
      "learning_rate": 0.00013381582794016846,
      "loss": 3.044,
      "step": 158259
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.252171277999878,
      "learning_rate": 0.00013381242235082743,
      "loss": 2.9367,
      "step": 158260
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.9190750122070312,
      "learning_rate": 0.0001338090167923838,
      "loss": 2.8117,
      "step": 158261
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.131434440612793,
      "learning_rate": 0.00013380561126483814,
      "loss": 2.4039,
      "step": 158262
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.6630449295043945,
      "learning_rate": 0.00013380220576819115,
      "loss": 2.6241,
      "step": 158263
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3065390586853027,
      "learning_rate": 0.0001337988003024435,
      "loss": 3.092,
      "step": 158264
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.005228281021118,
      "learning_rate": 0.0001337953948675957,
      "loss": 2.9284,
      "step": 158265
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.286729574203491,
      "learning_rate": 0.0001337919894636486,
      "loss": 2.8118,
      "step": 158266
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.651350975036621,
      "learning_rate": 0.00013378858409060268,
      "loss": 2.9551,
      "step": 158267
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5258216857910156,
      "learning_rate": 0.00013378517874845861,
      "loss": 3.1933,
      "step": 158268
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.5242769718170166,
      "learning_rate": 0.0001337817734372169,
      "loss": 3.111,
      "step": 158269
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.322878837585449,
      "learning_rate": 0.00013377836815687842,
      "loss": 2.8312,
      "step": 158270
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.06327223777771,
      "learning_rate": 0.00013377496290744358,
      "loss": 3.1637,
      "step": 158271
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.54370379447937,
      "learning_rate": 0.0001337715576889132,
      "loss": 2.9664,
      "step": 158272
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.0617291927337646,
      "learning_rate": 0.0001337681525012877,
      "loss": 3.0404,
      "step": 158273
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.273933172225952,
      "learning_rate": 0.0001337647473445681,
      "loss": 3.1106,
      "step": 158274
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.492342948913574,
      "learning_rate": 0.0001337613422187545,
      "loss": 3.1362,
      "step": 158275
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.217191219329834,
      "learning_rate": 0.00013375793712384795,
      "loss": 2.7179,
      "step": 158276
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.632175922393799,
      "learning_rate": 0.00013375453205984882,
      "loss": 2.9047,
      "step": 158277
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7130513191223145,
      "learning_rate": 0.000133751127026758,
      "loss": 2.9419,
      "step": 158278
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8351809978485107,
      "learning_rate": 0.00013374772202457582,
      "loss": 2.833,
      "step": 158279
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2740800380706787,
      "learning_rate": 0.00013374431705330333,
      "loss": 2.7812,
      "step": 158280
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.2774813175201416,
      "learning_rate": 0.00013374091211294068,
      "loss": 2.9134,
      "step": 158281
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.667024612426758,
      "learning_rate": 0.00013373750720348883,
      "loss": 3.1747,
      "step": 158282
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.478029251098633,
      "learning_rate": 0.00013373410232494823,
      "loss": 3.084,
      "step": 158283
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.4424028396606445,
      "learning_rate": 0.00013373069747731972,
      "loss": 3.2698,
      "step": 158284
    },
    {
      "epoch": 2.06,
      "grad_norm": 1.9999877214431763,
      "learning_rate": 0.00013372729266060367,
      "loss": 2.9908,
      "step": 158285
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.130497694015503,
      "learning_rate": 0.00013372388787480098,
      "loss": 2.9676,
      "step": 158286
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.618392467498779,
      "learning_rate": 0.00013372048311991215,
      "loss": 2.928,
      "step": 158287
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.427731037139893,
      "learning_rate": 0.0001337170783959378,
      "loss": 3.0454,
      "step": 158288
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.133012533187866,
      "learning_rate": 0.00013371367370287852,
      "loss": 3.0357,
      "step": 158289
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.483167886734009,
      "learning_rate": 0.0001337102690407351,
      "loss": 3.0562,
      "step": 158290
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.0093743801116943,
      "learning_rate": 0.00013370686440950796,
      "loss": 2.9525,
      "step": 158291
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.141262531280518,
      "learning_rate": 0.00013370345980919795,
      "loss": 2.871,
      "step": 158292
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.4319732189178467,
      "learning_rate": 0.00013370005523980565,
      "loss": 2.7669,
      "step": 158293
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.010363817214966,
      "learning_rate": 0.00013369665070133162,
      "loss": 2.9346,
      "step": 158294
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6224560737609863,
      "learning_rate": 0.0001336932461937764,
      "loss": 2.9748,
      "step": 158295
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.712965726852417,
      "learning_rate": 0.00013368984171714088,
      "loss": 3.2198,
      "step": 158296
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.1401264667510986,
      "learning_rate": 0.00013368643727142542,
      "loss": 2.7361,
      "step": 158297
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.855053186416626,
      "learning_rate": 0.0001336830328566309,
      "loss": 2.7659,
      "step": 158298
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.228789806365967,
      "learning_rate": 0.0001336796284727579,
      "loss": 2.9475,
      "step": 158299
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.830505132675171,
      "learning_rate": 0.00013367622411980683,
      "loss": 3.0481,
      "step": 158300
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.523630142211914,
      "learning_rate": 0.00013367281979777864,
      "loss": 2.8697,
      "step": 158301
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.4848239421844482,
      "learning_rate": 0.00013366941550667377,
      "loss": 2.9363,
      "step": 158302
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.248833417892456,
      "learning_rate": 0.00013366601124649284,
      "loss": 3.0525,
      "step": 158303
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.9311718940734863,
      "learning_rate": 0.0001336626070172366,
      "loss": 2.7321,
      "step": 158304
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8507068157196045,
      "learning_rate": 0.0001336592028189056,
      "loss": 2.8458,
      "step": 158305
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.3643012046813965,
      "learning_rate": 0.00013365579865150045,
      "loss": 2.823,
      "step": 158306
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.358036518096924,
      "learning_rate": 0.00013365239451502192,
      "loss": 2.9412,
      "step": 158307
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.589731454849243,
      "learning_rate": 0.0001336489904094704,
      "loss": 2.9214,
      "step": 158308
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1198346614837646,
      "learning_rate": 0.00013364558633484683,
      "loss": 3.0752,
      "step": 158309
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2662386894226074,
      "learning_rate": 0.0001336421822911517,
      "loss": 2.827,
      "step": 158310
    },
    {
      "epoch": 2.06,
      "grad_norm": 1.8855750560760498,
      "learning_rate": 0.00013363877827838556,
      "loss": 2.937,
      "step": 158311
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4199230670928955,
      "learning_rate": 0.00013363537429654905,
      "loss": 2.8547,
      "step": 158312
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.120528221130371,
      "learning_rate": 0.00013363197034564294,
      "loss": 2.7811,
      "step": 158313
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6253662109375,
      "learning_rate": 0.00013362856642566768,
      "loss": 2.7371,
      "step": 158314
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.195949077606201,
      "learning_rate": 0.00013362516253662414,
      "loss": 2.7434,
      "step": 158315
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.403287887573242,
      "learning_rate": 0.00013362175867851284,
      "loss": 2.8537,
      "step": 158316
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1687240600585938,
      "learning_rate": 0.00013361835485133436,
      "loss": 2.7987,
      "step": 158317
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.62329363822937,
      "learning_rate": 0.00013361495105508928,
      "loss": 3.1245,
      "step": 158318
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.48065185546875,
      "learning_rate": 0.00013361154728977844,
      "loss": 2.7401,
      "step": 158319
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3059592247009277,
      "learning_rate": 0.00013360814355540222,
      "loss": 3.1485,
      "step": 158320
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.244610071182251,
      "learning_rate": 0.00013360473985196152,
      "loss": 2.9019,
      "step": 158321
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.588008165359497,
      "learning_rate": 0.00013360133617945681,
      "loss": 2.9801,
      "step": 158322
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.9349846839904785,
      "learning_rate": 0.0001335979325378888,
      "loss": 2.9581,
      "step": 158323
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6803412437438965,
      "learning_rate": 0.0001335945289272579,
      "loss": 3.1009,
      "step": 158324
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.303892135620117,
      "learning_rate": 0.00013359112534756505,
      "loss": 3.0975,
      "step": 158325
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6302638053894043,
      "learning_rate": 0.00013358772179881067,
      "loss": 3.0137,
      "step": 158326
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3200855255126953,
      "learning_rate": 0.00013358431828099554,
      "loss": 3.1496,
      "step": 158327
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2008142471313477,
      "learning_rate": 0.0001335809147941203,
      "loss": 3.0178,
      "step": 158328
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6567487716674805,
      "learning_rate": 0.00013357751133818543,
      "loss": 2.6539,
      "step": 158329
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6012251377105713,
      "learning_rate": 0.00013357410791319159,
      "loss": 3.227,
      "step": 158330
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.805356740951538,
      "learning_rate": 0.00013357070451913952,
      "loss": 2.9173,
      "step": 158331
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.40570330619812,
      "learning_rate": 0.00013356730115602974,
      "loss": 2.7821,
      "step": 158332
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.152477979660034,
      "learning_rate": 0.000133563897823863,
      "loss": 3.123,
      "step": 158333
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.152451276779175,
      "learning_rate": 0.00013356049452263995,
      "loss": 2.848,
      "step": 158334
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.103440523147583,
      "learning_rate": 0.00013355709125236108,
      "loss": 2.8479,
      "step": 158335
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.148336172103882,
      "learning_rate": 0.00013355368801302696,
      "loss": 3.0564,
      "step": 158336
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.270448923110962,
      "learning_rate": 0.00013355028480463855,
      "loss": 2.5975,
      "step": 158337
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.269780158996582,
      "learning_rate": 0.0001335468816271961,
      "loss": 3.2421,
      "step": 158338
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.903944492340088,
      "learning_rate": 0.00013354347848070056,
      "loss": 2.8883,
      "step": 158339
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5232818126678467,
      "learning_rate": 0.00013354007536515232,
      "loss": 2.8117,
      "step": 158340
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1981804370880127,
      "learning_rate": 0.00013353667228055234,
      "loss": 2.7905,
      "step": 158341
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4387142658233643,
      "learning_rate": 0.0001335332692269008,
      "loss": 2.8799,
      "step": 158342
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.008025646209717,
      "learning_rate": 0.0001335298662041987,
      "loss": 2.7402,
      "step": 158343
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2666425704956055,
      "learning_rate": 0.00013352646321244642,
      "loss": 2.8455,
      "step": 158344
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.391850471496582,
      "learning_rate": 0.00013352306025164482,
      "loss": 2.9806,
      "step": 158345
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3640425205230713,
      "learning_rate": 0.00013351965732179432,
      "loss": 3.026,
      "step": 158346
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.22243070602417,
      "learning_rate": 0.0001335162544228959,
      "loss": 2.903,
      "step": 158347
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.400517702102661,
      "learning_rate": 0.0001335128515549497,
      "loss": 3.0168,
      "step": 158348
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.866873025894165,
      "learning_rate": 0.00013350944871795672,
      "loss": 3.0979,
      "step": 158349
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3300797939300537,
      "learning_rate": 0.0001335060459119174,
      "loss": 2.8547,
      "step": 158350
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.598126173019409,
      "learning_rate": 0.0001335026431368325,
      "loss": 3.0535,
      "step": 158351
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.2743544578552246,
      "learning_rate": 0.00013349924039270252,
      "loss": 3.1132,
      "step": 158352
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.1749892234802246,
      "learning_rate": 0.00013349583767952839,
      "loss": 2.8749,
      "step": 158353
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.549760580062866,
      "learning_rate": 0.00013349243499731028,
      "loss": 3.0336,
      "step": 158354
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.0808913707733154,
      "learning_rate": 0.00013348903234604924,
      "loss": 2.992,
      "step": 158355
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.0501930713653564,
      "learning_rate": 0.0001334856297257456,
      "loss": 2.7754,
      "step": 158356
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.1160943508148193,
      "learning_rate": 0.00013348222713640022,
      "loss": 3.0767,
      "step": 158357
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.429692268371582,
      "learning_rate": 0.00013347882457801353,
      "loss": 3.1145,
      "step": 158358
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5575928688049316,
      "learning_rate": 0.0001334754220505865,
      "loss": 2.7995,
      "step": 158359
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7864720821380615,
      "learning_rate": 0.00013347201955411932,
      "loss": 3.0654,
      "step": 158360
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2646422386169434,
      "learning_rate": 0.0001334686170886129,
      "loss": 2.9921,
      "step": 158361
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.621807813644409,
      "learning_rate": 0.00013346521465406772,
      "loss": 2.8731,
      "step": 158362
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6432697772979736,
      "learning_rate": 0.00013346181225048465,
      "loss": 2.9254,
      "step": 158363
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.354804039001465,
      "learning_rate": 0.00013345840987786403,
      "loss": 2.9778,
      "step": 158364
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8257827758789062,
      "learning_rate": 0.00013345500753620684,
      "loss": 2.9912,
      "step": 158365
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8653998374938965,
      "learning_rate": 0.00013345160522551332,
      "loss": 3.0327,
      "step": 158366
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.573503017425537,
      "learning_rate": 0.00013344820294578438,
      "loss": 2.906,
      "step": 158367
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.474045753479004,
      "learning_rate": 0.00013344480069702046,
      "loss": 3.0068,
      "step": 158368
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.657392978668213,
      "learning_rate": 0.0001334413984792224,
      "loss": 2.8894,
      "step": 158369
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.0542526245117188,
      "learning_rate": 0.0001334379962923906,
      "loss": 2.8307,
      "step": 158370
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.294618844985962,
      "learning_rate": 0.00013343459413652599,
      "loss": 3.0485,
      "step": 158371
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.213092088699341,
      "learning_rate": 0.00013343119201162897,
      "loss": 3.0187,
      "step": 158372
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.0027730464935303,
      "learning_rate": 0.00013342778991770027,
      "loss": 2.8157,
      "step": 158373
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3294029235839844,
      "learning_rate": 0.00013342438785474036,
      "loss": 2.9724,
      "step": 158374
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5614120960235596,
      "learning_rate": 0.00013342098582275015,
      "loss": 2.866,
      "step": 158375
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.580845832824707,
      "learning_rate": 0.00013341758382173,
      "loss": 3.1895,
      "step": 158376
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8038196563720703,
      "learning_rate": 0.00013341418185168073,
      "loss": 2.9215,
      "step": 158377
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.6477599143981934,
      "learning_rate": 0.00013341077991260296,
      "loss": 2.8735,
      "step": 158378
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.2377631664276123,
      "learning_rate": 0.00013340737800449724,
      "loss": 2.8458,
      "step": 158379
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.243375301361084,
      "learning_rate": 0.00013340397612736412,
      "loss": 3.1634,
      "step": 158380
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.716449499130249,
      "learning_rate": 0.00013340057428120448,
      "loss": 2.6547,
      "step": 158381
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3909666538238525,
      "learning_rate": 0.00013339717246601868,
      "loss": 2.728,
      "step": 158382
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8271570205688477,
      "learning_rate": 0.0001333937706818076,
      "loss": 2.8628,
      "step": 158383
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5836827754974365,
      "learning_rate": 0.00013339036892857177,
      "loss": 2.848,
      "step": 158384
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.0259952545166016,
      "learning_rate": 0.00013338696720631175,
      "loss": 2.9246,
      "step": 158385
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.552788257598877,
      "learning_rate": 0.0001333835655150283,
      "loss": 2.9931,
      "step": 158386
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.733393430709839,
      "learning_rate": 0.000133380163854722,
      "loss": 3.0952,
      "step": 158387
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.4253158569335938,
      "learning_rate": 0.00013337676222539336,
      "loss": 3.1301,
      "step": 158388
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.7361161708831787,
      "learning_rate": 0.00013337336062704323,
      "loss": 2.8035,
      "step": 158389
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1986589431762695,
      "learning_rate": 0.00013336995905967216,
      "loss": 3.1155,
      "step": 158390
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8304996490478516,
      "learning_rate": 0.00013336655752328063,
      "loss": 3.0528,
      "step": 158391
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5763397216796875,
      "learning_rate": 0.00013336315601786953,
      "loss": 3.0046,
      "step": 158392
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5198354721069336,
      "learning_rate": 0.00013335975454343933,
      "loss": 2.8216,
      "step": 158393
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.749068260192871,
      "learning_rate": 0.00013335635309999065,
      "loss": 3.1339,
      "step": 158394
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7808823585510254,
      "learning_rate": 0.00013335295168752422,
      "loss": 2.8247,
      "step": 158395
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.513018846511841,
      "learning_rate": 0.00013334955030604065,
      "loss": 2.9759,
      "step": 158396
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3780996799468994,
      "learning_rate": 0.00013334614895554043,
      "loss": 2.8912,
      "step": 158397
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.761925458908081,
      "learning_rate": 0.00013334274763602443,
      "loss": 2.8513,
      "step": 158398
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8382363319396973,
      "learning_rate": 0.000133339346347493,
      "loss": 2.873,
      "step": 158399
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2239012718200684,
      "learning_rate": 0.00013333594508994708,
      "loss": 2.7661,
      "step": 158400
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7828476428985596,
      "learning_rate": 0.00013333254386338716,
      "loss": 3.1158,
      "step": 158401
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3599307537078857,
      "learning_rate": 0.00013332914266781385,
      "loss": 2.9421,
      "step": 158402
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.244255304336548,
      "learning_rate": 0.00013332574150322772,
      "loss": 2.8784,
      "step": 158403
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.469191789627075,
      "learning_rate": 0.00013332234036962954,
      "loss": 2.9591,
      "step": 158404
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3442842960357666,
      "learning_rate": 0.00013331893926701982,
      "loss": 2.9492,
      "step": 158405
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2323222160339355,
      "learning_rate": 0.0001333155381953993,
      "loss": 3.0927,
      "step": 158406
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3966193199157715,
      "learning_rate": 0.0001333121371547685,
      "loss": 2.8759,
      "step": 158407
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.052222490310669,
      "learning_rate": 0.00013330873614512837,
      "loss": 2.9398,
      "step": 158408
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4007225036621094,
      "learning_rate": 0.00013330533516647903,
      "loss": 2.8714,
      "step": 158409
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1089627742767334,
      "learning_rate": 0.0001333019342188215,
      "loss": 2.9196,
      "step": 158410
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6602602005004883,
      "learning_rate": 0.0001332985333021562,
      "loss": 2.9787,
      "step": 158411
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.780341386795044,
      "learning_rate": 0.0001332951324164839,
      "loss": 2.786,
      "step": 158412
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6370930671691895,
      "learning_rate": 0.00013329173156180515,
      "loss": 2.8574,
      "step": 158413
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.244476795196533,
      "learning_rate": 0.00013328833073812077,
      "loss": 2.9187,
      "step": 158414
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.0650713443756104,
      "learning_rate": 0.00013328492994543102,
      "loss": 3.1113,
      "step": 158415
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.5033364295959473,
      "learning_rate": 0.0001332815291837369,
      "loss": 3.0312,
      "step": 158416
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.099412441253662,
      "learning_rate": 0.00013327812845303875,
      "loss": 2.729,
      "step": 158417
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.1151208877563477,
      "learning_rate": 0.00013327472775333745,
      "loss": 2.9341,
      "step": 158418
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.0418894290924072,
      "learning_rate": 0.0001332713270846334,
      "loss": 2.834,
      "step": 158419
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.592566967010498,
      "learning_rate": 0.00013326792644692762,
      "loss": 3.0487,
      "step": 158420
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3393373489379883,
      "learning_rate": 0.00013326452584022026,
      "loss": 2.9231,
      "step": 158421
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5348782539367676,
      "learning_rate": 0.00013326112526451227,
      "loss": 2.848,
      "step": 158422
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.491948366165161,
      "learning_rate": 0.00013325772471980404,
      "loss": 2.846,
      "step": 158423
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.126375198364258,
      "learning_rate": 0.0001332543242060965,
      "loss": 3.1011,
      "step": 158424
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.050475835800171,
      "learning_rate": 0.00013325092372339,
      "loss": 2.7432,
      "step": 158425
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.628772020339966,
      "learning_rate": 0.00013324752327168552,
      "loss": 2.8211,
      "step": 158426
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.1576223373413086,
      "learning_rate": 0.00013324412285098324,
      "loss": 2.8409,
      "step": 158427
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.331496477127075,
      "learning_rate": 0.00013324072246128418,
      "loss": 2.7524,
      "step": 158428
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3046586513519287,
      "learning_rate": 0.00013323732210258869,
      "loss": 2.7783,
      "step": 158429
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.082359790802002,
      "learning_rate": 0.0001332339217748976,
      "loss": 3.0001,
      "step": 158430
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7349600791931152,
      "learning_rate": 0.0001332305214782114,
      "loss": 2.6909,
      "step": 158431
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.396792411804199,
      "learning_rate": 0.00013322712121253104,
      "loss": 3.0405,
      "step": 158432
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5146899223327637,
      "learning_rate": 0.00013322372097785665,
      "loss": 2.8115,
      "step": 158433
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.341658115386963,
      "learning_rate": 0.00013322032077418922,
      "loss": 3.0349,
      "step": 158434
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8130245208740234,
      "learning_rate": 0.00013321692060152916,
      "loss": 3.0022,
      "step": 158435
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.566030740737915,
      "learning_rate": 0.00013321352045987734,
      "loss": 2.6185,
      "step": 158436
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.2885560989379883,
      "learning_rate": 0.0001332101203492342,
      "loss": 3.1709,
      "step": 158437
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.496157169342041,
      "learning_rate": 0.00013320672026960063,
      "loss": 3.0079,
      "step": 158438
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.1784448623657227,
      "learning_rate": 0.00013320332022097688,
      "loss": 2.9191,
      "step": 158439
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.298259258270264,
      "learning_rate": 0.00013319992020336392,
      "loss": 2.8906,
      "step": 158440
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.9246110916137695,
      "learning_rate": 0.0001331965202167621,
      "loss": 2.89,
      "step": 158441
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.480088472366333,
      "learning_rate": 0.0001331931202611723,
      "loss": 3.0162,
      "step": 158442
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.510329008102417,
      "learning_rate": 0.0001331897203365949,
      "loss": 2.8922,
      "step": 158443
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6364104747772217,
      "learning_rate": 0.00013318632044303095,
      "loss": 2.918,
      "step": 158444
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.500349760055542,
      "learning_rate": 0.0001331829205804806,
      "loss": 2.8165,
      "step": 158445
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1869254112243652,
      "learning_rate": 0.0001331795207489448,
      "loss": 3.1979,
      "step": 158446
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.752103090286255,
      "learning_rate": 0.00013317612094842395,
      "loss": 3.021,
      "step": 158447
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.820038080215454,
      "learning_rate": 0.00013317272117891892,
      "loss": 3.2558,
      "step": 158448
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5606679916381836,
      "learning_rate": 0.0001331693214404301,
      "loss": 2.8899,
      "step": 158449
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.489249229431152,
      "learning_rate": 0.0001331659217329585,
      "loss": 2.9214,
      "step": 158450
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.361750602722168,
      "learning_rate": 0.0001331625220565043,
      "loss": 2.7917,
      "step": 158451
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.3584253787994385,
      "learning_rate": 0.00013315912241106841,
      "loss": 2.9,
      "step": 158452
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.841529369354248,
      "learning_rate": 0.00013315572279665132,
      "loss": 3.0142,
      "step": 158453
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6633739471435547,
      "learning_rate": 0.00013315232321325378,
      "loss": 2.9553,
      "step": 158454
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.126978635787964,
      "learning_rate": 0.0001331489236608763,
      "loss": 3.0845,
      "step": 158455
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.506192207336426,
      "learning_rate": 0.00013314552413951983,
      "loss": 2.9546,
      "step": 158456
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.336371898651123,
      "learning_rate": 0.0001331421246491845,
      "loss": 2.9643,
      "step": 158457
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3286635875701904,
      "learning_rate": 0.00013313872518987133,
      "loss": 3.1062,
      "step": 158458
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8142638206481934,
      "learning_rate": 0.00013313532576158068,
      "loss": 3.0043,
      "step": 158459
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1961286067962646,
      "learning_rate": 0.00013313192636431345,
      "loss": 2.7306,
      "step": 158460
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.99383807182312,
      "learning_rate": 0.00013312852699807006,
      "loss": 2.9791,
      "step": 158461
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.8745453357696533,
      "learning_rate": 0.00013312512766285132,
      "loss": 2.8704,
      "step": 158462
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.712731122970581,
      "learning_rate": 0.00013312172835865774,
      "loss": 3.0656,
      "step": 158463
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8977770805358887,
      "learning_rate": 0.00013311832908549,
      "loss": 3.0704,
      "step": 158464
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3380136489868164,
      "learning_rate": 0.0001331149298433486,
      "loss": 2.9242,
      "step": 158465
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1769561767578125,
      "learning_rate": 0.0001331115306322344,
      "loss": 3.0007,
      "step": 158466
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.024388551712036,
      "learning_rate": 0.0001331081314521478,
      "loss": 3.1591,
      "step": 158467
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.89741849899292,
      "learning_rate": 0.0001331047323030897,
      "loss": 2.9651,
      "step": 158468
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3659873008728027,
      "learning_rate": 0.00013310133318506054,
      "loss": 3.0235,
      "step": 158469
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6165883541107178,
      "learning_rate": 0.00013309793409806085,
      "loss": 3.1019,
      "step": 158470
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.293769359588623,
      "learning_rate": 0.00013309453504209157,
      "loss": 2.8312,
      "step": 158471
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3953654766082764,
      "learning_rate": 0.00013309113601715316,
      "loss": 3.1029,
      "step": 158472
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.683363914489746,
      "learning_rate": 0.00013308773702324614,
      "loss": 3.0943,
      "step": 158473
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.7583425045013428,
      "learning_rate": 0.00013308433806037136,
      "loss": 3.0627,
      "step": 158474
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.331922769546509,
      "learning_rate": 0.00013308093912852936,
      "loss": 3.111,
      "step": 158475
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6032395362854004,
      "learning_rate": 0.00013307754022772066,
      "loss": 3.1029,
      "step": 158476
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.631810188293457,
      "learning_rate": 0.0001330741413579461,
      "loss": 2.8094,
      "step": 158477
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.411259174346924,
      "learning_rate": 0.00013307074251920622,
      "loss": 2.851,
      "step": 158478
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7605173587799072,
      "learning_rate": 0.00013306734371150149,
      "loss": 3.1389,
      "step": 158479
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1652612686157227,
      "learning_rate": 0.00013306394493483285,
      "loss": 2.7841,
      "step": 158480
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5710880756378174,
      "learning_rate": 0.00013306054618920075,
      "loss": 2.6611,
      "step": 158481
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6657845973968506,
      "learning_rate": 0.00013305714747460573,
      "loss": 2.9344,
      "step": 158482
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.33658766746521,
      "learning_rate": 0.00013305374879104867,
      "loss": 2.9106,
      "step": 158483
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4170007705688477,
      "learning_rate": 0.00013305035013852997,
      "loss": 2.7658,
      "step": 158484
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.344154119491577,
      "learning_rate": 0.00013304695151705043,
      "loss": 2.9298,
      "step": 158485
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.2187047004699707,
      "learning_rate": 0.00013304355292661065,
      "loss": 3.0714,
      "step": 158486
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.083108901977539,
      "learning_rate": 0.0001330401543672112,
      "loss": 2.9584,
      "step": 158487
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5863616466522217,
      "learning_rate": 0.00013303675583885264,
      "loss": 2.9227,
      "step": 158488
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.603846549987793,
      "learning_rate": 0.00013303335734153577,
      "loss": 3.0162,
      "step": 158489
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.437580108642578,
      "learning_rate": 0.00013302995887526107,
      "loss": 2.8228,
      "step": 158490
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4598913192749023,
      "learning_rate": 0.00013302656044002938,
      "loss": 3.2219,
      "step": 158491
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1874125003814697,
      "learning_rate": 0.0001330231620358411,
      "loss": 2.7952,
      "step": 158492
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4089107513427734,
      "learning_rate": 0.00013301976366269716,
      "loss": 2.9812,
      "step": 158493
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.449192762374878,
      "learning_rate": 0.00013301636532059776,
      "loss": 2.9596,
      "step": 158494
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.594139814376831,
      "learning_rate": 0.00013301296700954393,
      "loss": 2.8639,
      "step": 158495
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.358353853225708,
      "learning_rate": 0.000133009568729536,
      "loss": 3.1191,
      "step": 158496
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.0935757160186768,
      "learning_rate": 0.00013300617048057485,
      "loss": 2.9821,
      "step": 158497
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6221518516540527,
      "learning_rate": 0.0001330027722626609,
      "loss": 2.9851,
      "step": 158498
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2604126930236816,
      "learning_rate": 0.00013299937407579513,
      "loss": 3.1512,
      "step": 158499
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8479535579681396,
      "learning_rate": 0.00013299597591997768,
      "loss": 2.9611,
      "step": 158500
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1279196739196777,
      "learning_rate": 0.00013299257779520956,
      "loss": 2.8555,
      "step": 158501
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.21187686920166,
      "learning_rate": 0.00013298917970149115,
      "loss": 3.1614,
      "step": 158502
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.602280855178833,
      "learning_rate": 0.0001329857816388233,
      "loss": 2.9654,
      "step": 158503
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3358333110809326,
      "learning_rate": 0.0001329823836072065,
      "loss": 2.9854,
      "step": 158504
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2971935272216797,
      "learning_rate": 0.00013297898560664156,
      "loss": 2.886,
      "step": 158505
    },
    {
      "epoch": 2.06,
      "grad_norm": 1.9721040725708008,
      "learning_rate": 0.00013297558763712878,
      "loss": 2.9503,
      "step": 158506
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6816775798797607,
      "learning_rate": 0.00013297218969866915,
      "loss": 2.9099,
      "step": 158507
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5734198093414307,
      "learning_rate": 0.000132968791791263,
      "loss": 2.9603,
      "step": 158508
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.621877908706665,
      "learning_rate": 0.0001329653939149112,
      "loss": 3.1108,
      "step": 158509
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.440669298171997,
      "learning_rate": 0.0001329619960696142,
      "loss": 2.866,
      "step": 158510
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.222954273223877,
      "learning_rate": 0.00013295859825537292,
      "loss": 2.7922,
      "step": 158511
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.0536201000213623,
      "learning_rate": 0.00013295520047218763,
      "loss": 3.1894,
      "step": 158512
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.782914876937866,
      "learning_rate": 0.00013295180272005917,
      "loss": 2.9727,
      "step": 158513
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7947254180908203,
      "learning_rate": 0.000132948404998988,
      "loss": 2.6959,
      "step": 158514
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3401527404785156,
      "learning_rate": 0.00013294500730897502,
      "loss": 3.1634,
      "step": 158515
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.318632125854492,
      "learning_rate": 0.0001329416096500206,
      "loss": 3.0727,
      "step": 158516
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2117719650268555,
      "learning_rate": 0.00013293821202212572,
      "loss": 2.6723,
      "step": 158517
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1546990871429443,
      "learning_rate": 0.00013293481442529052,
      "loss": 3.122,
      "step": 158518
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1612141132354736,
      "learning_rate": 0.00013293141685951604,
      "loss": 3.0346,
      "step": 158519
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.594054937362671,
      "learning_rate": 0.00013292801932480265,
      "loss": 3.0578,
      "step": 158520
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3561642169952393,
      "learning_rate": 0.00013292462182115121,
      "loss": 2.9431,
      "step": 158521
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4383678436279297,
      "learning_rate": 0.00013292122434856212,
      "loss": 3.1344,
      "step": 158522
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3902854919433594,
      "learning_rate": 0.00013291782690703634,
      "loss": 3.0046,
      "step": 158523
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6466333866119385,
      "learning_rate": 0.00013291442949657403,
      "loss": 2.9316,
      "step": 158524
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.380670070648193,
      "learning_rate": 0.00013291103211717626,
      "loss": 3.025,
      "step": 158525
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7649827003479004,
      "learning_rate": 0.00013290763476884334,
      "loss": 2.8013,
      "step": 158526
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.35024356842041,
      "learning_rate": 0.00013290423745157616,
      "loss": 2.8142,
      "step": 158527
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.778803825378418,
      "learning_rate": 0.0001329008401653751,
      "loss": 3.0202,
      "step": 158528
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.189908504486084,
      "learning_rate": 0.0001328974429102412,
      "loss": 2.9188,
      "step": 158529
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.594181537628174,
      "learning_rate": 0.0001328940456861746,
      "loss": 2.8167,
      "step": 158530
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.8809762001037598,
      "learning_rate": 0.0001328906484931762,
      "loss": 3.0377,
      "step": 158531
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.471269130706787,
      "learning_rate": 0.0001328872513312465,
      "loss": 2.7572,
      "step": 158532
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.6074728965759277,
      "learning_rate": 0.00013288385420038635,
      "loss": 2.7291,
      "step": 158533
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.7785868644714355,
      "learning_rate": 0.0001328804571005961,
      "loss": 2.8592,
      "step": 158534
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.277243137359619,
      "learning_rate": 0.0001328770600318768,
      "loss": 3.1119,
      "step": 158535
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2848381996154785,
      "learning_rate": 0.00013287366299422854,
      "loss": 3.0028,
      "step": 158536
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1116526126861572,
      "learning_rate": 0.00013287026598765233,
      "loss": 2.7099,
      "step": 158537
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.028360843658447,
      "learning_rate": 0.00013286686901214863,
      "loss": 2.9434,
      "step": 158538
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.4406166076660156,
      "learning_rate": 0.0001328634720677182,
      "loss": 2.8353,
      "step": 158539
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.1328868865966797,
      "learning_rate": 0.00013286007515436152,
      "loss": 3.098,
      "step": 158540
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.993765115737915,
      "learning_rate": 0.0001328566782720794,
      "loss": 3.2645,
      "step": 158541
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.346746921539307,
      "learning_rate": 0.00013285328142087236,
      "loss": 2.927,
      "step": 158542
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.7719504833221436,
      "learning_rate": 0.0001328498846007411,
      "loss": 2.7758,
      "step": 158543
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.775785207748413,
      "learning_rate": 0.00013284648781168606,
      "loss": 2.9653,
      "step": 158544
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.474682569503784,
      "learning_rate": 0.00013284309105370814,
      "loss": 3.0369,
      "step": 158545
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.400153398513794,
      "learning_rate": 0.0001328396943268077,
      "loss": 2.9118,
      "step": 158546
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.082418441772461,
      "learning_rate": 0.00013283629763098567,
      "loss": 2.8104,
      "step": 158547
    },
    {
      "epoch": 2.06,
      "grad_norm": 5.050994873046875,
      "learning_rate": 0.0001328329009662425,
      "loss": 2.9699,
      "step": 158548
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.253278970718384,
      "learning_rate": 0.00013282950433257887,
      "loss": 3.173,
      "step": 158549
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.022744655609131,
      "learning_rate": 0.00013282610772999526,
      "loss": 2.9007,
      "step": 158550
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1762940883636475,
      "learning_rate": 0.00013282271115849255,
      "loss": 2.9343,
      "step": 158551
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.3966126441955566,
      "learning_rate": 0.00013281931461807113,
      "loss": 2.9826,
      "step": 158552
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.194774866104126,
      "learning_rate": 0.00013281591810873188,
      "loss": 2.8166,
      "step": 158553
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4149134159088135,
      "learning_rate": 0.0001328125216304753,
      "loss": 2.8888,
      "step": 158554
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.212914228439331,
      "learning_rate": 0.00013280912518330194,
      "loss": 2.98,
      "step": 158555
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.755441427230835,
      "learning_rate": 0.00013280572876721262,
      "loss": 3.0635,
      "step": 158556
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.0417892932891846,
      "learning_rate": 0.00013280233238220786,
      "loss": 3.1717,
      "step": 158557
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1836698055267334,
      "learning_rate": 0.0001327989360282882,
      "loss": 2.905,
      "step": 158558
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3026599884033203,
      "learning_rate": 0.00013279553970545449,
      "loss": 2.8622,
      "step": 158559
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.2692415714263916,
      "learning_rate": 0.00013279214341370722,
      "loss": 2.8785,
      "step": 158560
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.282867193222046,
      "learning_rate": 0.00013278874715304698,
      "loss": 3.0153,
      "step": 158561
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.5554518699645996,
      "learning_rate": 0.00013278535092347453,
      "loss": 3.0591,
      "step": 158562
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.178755760192871,
      "learning_rate": 0.00013278195472499045,
      "loss": 3.0063,
      "step": 158563
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3800151348114014,
      "learning_rate": 0.00013277855855759529,
      "loss": 2.9785,
      "step": 158564
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.1501340866088867,
      "learning_rate": 0.0001327751624212898,
      "loss": 2.938,
      "step": 158565
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.56308913230896,
      "learning_rate": 0.00013277176631607462,
      "loss": 3.1033,
      "step": 158566
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.504503011703491,
      "learning_rate": 0.00013276837024195018,
      "loss": 2.945,
      "step": 158567
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.483365535736084,
      "learning_rate": 0.0001327649741989174,
      "loss": 3.0361,
      "step": 158568
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.218459129333496,
      "learning_rate": 0.00013276157818697663,
      "loss": 2.8651,
      "step": 158569
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6255877017974854,
      "learning_rate": 0.00013275818220612876,
      "loss": 3.1073,
      "step": 158570
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.5114493370056152,
      "learning_rate": 0.00013275478625637428,
      "loss": 2.798,
      "step": 158571
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.3775978088378906,
      "learning_rate": 0.00013275139033771388,
      "loss": 2.8963,
      "step": 158572
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.628401517868042,
      "learning_rate": 0.00013274799445014802,
      "loss": 3.0986,
      "step": 158573
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.511007308959961,
      "learning_rate": 0.00013274459859367757,
      "loss": 2.9769,
      "step": 158574
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.6965348720550537,
      "learning_rate": 0.00013274120276830298,
      "loss": 2.8653,
      "step": 158575
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.050126075744629,
      "learning_rate": 0.00013273780697402505,
      "loss": 3.1768,
      "step": 158576
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.0007359981536865,
      "learning_rate": 0.00013273441121084429,
      "loss": 3.1415,
      "step": 158577
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.4760639667510986,
      "learning_rate": 0.00013273101547876137,
      "loss": 3.2059,
      "step": 158578
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.6199891567230225,
      "learning_rate": 0.00013272761977777683,
      "loss": 2.9667,
      "step": 158579
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.9511067867279053,
      "learning_rate": 0.00013272422410789148,
      "loss": 2.8576,
      "step": 158580
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.141951322555542,
      "learning_rate": 0.00013272082846910575,
      "loss": 2.9497,
      "step": 158581
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.6612470149993896,
      "learning_rate": 0.00013271743286142048,
      "loss": 2.965,
      "step": 158582
    },
    {
      "epoch": 2.06,
      "grad_norm": 4.098252296447754,
      "learning_rate": 0.00013271403728483608,
      "loss": 3.0801,
      "step": 158583
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.826995372772217,
      "learning_rate": 0.00013271064173935352,
      "loss": 2.5732,
      "step": 158584
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.2751553058624268,
      "learning_rate": 0.00013270724622497298,
      "loss": 2.7629,
      "step": 158585
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.661790370941162,
      "learning_rate": 0.00013270385074169543,
      "loss": 2.9033,
      "step": 158586
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.240435838699341,
      "learning_rate": 0.0001327004552895213,
      "loss": 2.9346,
      "step": 158587
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.478431463241577,
      "learning_rate": 0.00013269705986845143,
      "loss": 2.8157,
      "step": 158588
    },
    {
      "epoch": 2.06,
      "grad_norm": 3.3853237628936768,
      "learning_rate": 0.00013269366447848623,
      "loss": 2.6207,
      "step": 158589
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.472327709197998,
      "learning_rate": 0.00013269026911962663,
      "loss": 3.0786,
      "step": 158590
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.36730694770813,
      "learning_rate": 0.00013268687379187287,
      "loss": 3.0132,
      "step": 158591
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.652705192565918,
      "learning_rate": 0.00013268347849522587,
      "loss": 3.16,
      "step": 158592
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.751554012298584,
      "learning_rate": 0.00013268008322968604,
      "loss": 2.9836,
      "step": 158593
    },
    {
      "epoch": 2.06,
      "grad_norm": 2.302403450012207,
      "learning_rate": 0.0001326766879952543,
      "loss": 2.9303,
      "step": 158594
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.0965914726257324,
      "learning_rate": 0.00013267329279193098,
      "loss": 2.9511,
      "step": 158595
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.0023140907287598,
      "learning_rate": 0.00013266989761971708,
      "loss": 2.6955,
      "step": 158596
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5632059574127197,
      "learning_rate": 0.0001326665024786128,
      "loss": 3.0265,
      "step": 158597
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2771003246307373,
      "learning_rate": 0.00013266310736861905,
      "loss": 3.2312,
      "step": 158598
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.149726152420044,
      "learning_rate": 0.0001326597122897363,
      "loss": 2.918,
      "step": 158599
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.649796724319458,
      "learning_rate": 0.00013265631724196538,
      "loss": 2.8508,
      "step": 158600
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6129729747772217,
      "learning_rate": 0.0001326529222253067,
      "loss": 3.0278,
      "step": 158601
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.2254397869110107,
      "learning_rate": 0.00013264952723976122,
      "loss": 2.9469,
      "step": 158602
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.874542713165283,
      "learning_rate": 0.00013264613228532914,
      "loss": 2.9857,
      "step": 158603
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.373281478881836,
      "learning_rate": 0.0001326427373620114,
      "loss": 2.8765,
      "step": 158604
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.404738903045654,
      "learning_rate": 0.00013263934246980841,
      "loss": 2.8698,
      "step": 158605
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.194485902786255,
      "learning_rate": 0.00013263594760872107,
      "loss": 3.0387,
      "step": 158606
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.430893659591675,
      "learning_rate": 0.00013263255277874974,
      "loss": 2.898,
      "step": 158607
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3977010250091553,
      "learning_rate": 0.00013262915797989527,
      "loss": 2.9154,
      "step": 158608
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.440822124481201,
      "learning_rate": 0.0001326257632121582,
      "loss": 2.754,
      "step": 158609
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.313634157180786,
      "learning_rate": 0.0001326223684755392,
      "loss": 2.83,
      "step": 158610
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6611690521240234,
      "learning_rate": 0.00013261897377003873,
      "loss": 2.8871,
      "step": 158611
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6147236824035645,
      "learning_rate": 0.00013261557909565762,
      "loss": 2.9636,
      "step": 158612
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.9366977214813232,
      "learning_rate": 0.0001326121844523964,
      "loss": 2.8611,
      "step": 158613
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.8355517387390137,
      "learning_rate": 0.0001326087898402558,
      "loss": 2.8834,
      "step": 158614
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.353590726852417,
      "learning_rate": 0.0001326053952592364,
      "loss": 2.996,
      "step": 158615
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.1777102947235107,
      "learning_rate": 0.0001326020007093388,
      "loss": 2.914,
      "step": 158616
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.011833190917969,
      "learning_rate": 0.00013259860619056353,
      "loss": 2.9969,
      "step": 158617
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.950035810470581,
      "learning_rate": 0.00013259521170291147,
      "loss": 2.872,
      "step": 158618
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.2139341831207275,
      "learning_rate": 0.00013259181724638303,
      "loss": 2.9561,
      "step": 158619
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.663487195968628,
      "learning_rate": 0.000132588422820979,
      "loss": 2.7071,
      "step": 158620
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.026721954345703,
      "learning_rate": 0.00013258502842669996,
      "loss": 2.9389,
      "step": 158621
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4096450805664062,
      "learning_rate": 0.0001325816340635465,
      "loss": 2.9079,
      "step": 158622
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.3194563388824463,
      "learning_rate": 0.00013257823973151923,
      "loss": 2.95,
      "step": 158623
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5980401039123535,
      "learning_rate": 0.00013257484543061887,
      "loss": 2.8462,
      "step": 158624
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.463190793991089,
      "learning_rate": 0.00013257145116084593,
      "loss": 3.0637,
      "step": 158625
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4320154190063477,
      "learning_rate": 0.00013256805692220123,
      "loss": 3.0156,
      "step": 158626
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.467060089111328,
      "learning_rate": 0.00013256466271468527,
      "loss": 2.9659,
      "step": 158627
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.225640296936035,
      "learning_rate": 0.00013256126853829873,
      "loss": 2.9102,
      "step": 158628
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.140390157699585,
      "learning_rate": 0.00013255787439304207,
      "loss": 2.9527,
      "step": 158629
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7636642456054688,
      "learning_rate": 0.00013255448027891614,
      "loss": 2.9239,
      "step": 158630
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.621438980102539,
      "learning_rate": 0.00013255108619592145,
      "loss": 2.9822,
      "step": 158631
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.161151170730591,
      "learning_rate": 0.00013254769214405876,
      "loss": 2.8871,
      "step": 158632
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.650806188583374,
      "learning_rate": 0.0001325442981233286,
      "loss": 2.811,
      "step": 158633
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.665900945663452,
      "learning_rate": 0.00013254090413373162,
      "loss": 2.8117,
      "step": 158634
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.962460994720459,
      "learning_rate": 0.00013253751017526836,
      "loss": 2.9461,
      "step": 158635
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3070127964019775,
      "learning_rate": 0.00013253411624793962,
      "loss": 2.7519,
      "step": 158636
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6535604000091553,
      "learning_rate": 0.00013253072235174584,
      "loss": 2.9174,
      "step": 158637
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.8025424480438232,
      "learning_rate": 0.00013252732848668787,
      "loss": 2.9027,
      "step": 158638
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.714479684829712,
      "learning_rate": 0.00013252393465276624,
      "loss": 3.1731,
      "step": 158639
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.270886182785034,
      "learning_rate": 0.0001325205408499816,
      "loss": 2.948,
      "step": 158640
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7109534740448,
      "learning_rate": 0.0001325171470783344,
      "loss": 2.7388,
      "step": 158641
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.020040512084961,
      "learning_rate": 0.00013251375333782556,
      "loss": 3.0546,
      "step": 158642
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.616396188735962,
      "learning_rate": 0.00013251035962845548,
      "loss": 3.0748,
      "step": 158643
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.195312976837158,
      "learning_rate": 0.00013250696595022493,
      "loss": 2.9052,
      "step": 158644
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3871371746063232,
      "learning_rate": 0.00013250357230313456,
      "loss": 2.6125,
      "step": 158645
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4104323387145996,
      "learning_rate": 0.0001325001786871848,
      "loss": 2.9812,
      "step": 158646
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.852085590362549,
      "learning_rate": 0.00013249678510237654,
      "loss": 2.8887,
      "step": 158647
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5387990474700928,
      "learning_rate": 0.00013249339154871025,
      "loss": 2.5567,
      "step": 158648
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3205618858337402,
      "learning_rate": 0.00013248999802618653,
      "loss": 2.8975,
      "step": 158649
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.948714256286621,
      "learning_rate": 0.0001324866045348062,
      "loss": 3.0618,
      "step": 158650
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.2851788997650146,
      "learning_rate": 0.00013248321107456977,
      "loss": 3.0087,
      "step": 158651
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2834126949310303,
      "learning_rate": 0.00013247981764547774,
      "loss": 3.1276,
      "step": 158652
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2764739990234375,
      "learning_rate": 0.000132476424247531,
      "loss": 2.8413,
      "step": 158653
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.084545850753784,
      "learning_rate": 0.00013247303088072994,
      "loss": 2.726,
      "step": 158654
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3035616874694824,
      "learning_rate": 0.0001324696375450754,
      "loss": 3.0904,
      "step": 158655
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.4379327297210693,
      "learning_rate": 0.00013246624424056793,
      "loss": 2.9785,
      "step": 158656
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4874014854431152,
      "learning_rate": 0.00013246285096720818,
      "loss": 3.2391,
      "step": 158657
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.155642509460449,
      "learning_rate": 0.00013245945772499658,
      "loss": 2.9315,
      "step": 158658
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.517758846282959,
      "learning_rate": 0.00013245606451393408,
      "loss": 2.7652,
      "step": 158659
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6729440689086914,
      "learning_rate": 0.00013245267133402103,
      "loss": 2.9929,
      "step": 158660
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.4498188495635986,
      "learning_rate": 0.0001324492781852583,
      "loss": 2.88,
      "step": 158661
    },
    {
      "epoch": 2.07,
      "grad_norm": 5.511504173278809,
      "learning_rate": 0.00013244588506764642,
      "loss": 3.3171,
      "step": 158662
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.9088222980499268,
      "learning_rate": 0.00013244249198118602,
      "loss": 2.8281,
      "step": 158663
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.02075719833374,
      "learning_rate": 0.00013243909892587762,
      "loss": 3.0449,
      "step": 158664
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4241414070129395,
      "learning_rate": 0.00013243570590172203,
      "loss": 2.8305,
      "step": 158665
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.235551595687866,
      "learning_rate": 0.00013243231290871975,
      "loss": 3.0019,
      "step": 158666
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.8897976875305176,
      "learning_rate": 0.00013242891994687153,
      "loss": 3.0388,
      "step": 158667
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.976895809173584,
      "learning_rate": 0.00013242552701617783,
      "loss": 2.9207,
      "step": 158668
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.294181823730469,
      "learning_rate": 0.00013242213411663963,
      "loss": 2.8988,
      "step": 158669
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.4849207401275635,
      "learning_rate": 0.0001324187412482571,
      "loss": 2.895,
      "step": 158670
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4917984008789062,
      "learning_rate": 0.00013241534841103116,
      "loss": 2.9531,
      "step": 158671
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.301600217819214,
      "learning_rate": 0.0001324119556049623,
      "loss": 2.9403,
      "step": 158672
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.81111478805542,
      "learning_rate": 0.00013240856283005132,
      "loss": 2.9771,
      "step": 158673
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5424246788024902,
      "learning_rate": 0.00013240517008629863,
      "loss": 3.2171,
      "step": 158674
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.374474048614502,
      "learning_rate": 0.0001324017773737051,
      "loss": 2.6879,
      "step": 158675
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.752638101577759,
      "learning_rate": 0.00013239838469227124,
      "loss": 3.088,
      "step": 158676
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.629603147506714,
      "learning_rate": 0.0001323949920419977,
      "loss": 2.9875,
      "step": 158677
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.8407490253448486,
      "learning_rate": 0.00013239159942288496,
      "loss": 2.9305,
      "step": 158678
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.03501033782959,
      "learning_rate": 0.00013238820683493394,
      "loss": 3.094,
      "step": 158679
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.278040885925293,
      "learning_rate": 0.00013238481427814492,
      "loss": 3.1366,
      "step": 158680
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.1183602809906006,
      "learning_rate": 0.00013238142175251891,
      "loss": 2.7733,
      "step": 158681
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7039577960968018,
      "learning_rate": 0.00013237802925805637,
      "loss": 2.975,
      "step": 158682
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.265122175216675,
      "learning_rate": 0.0001323746367947579,
      "loss": 3.0884,
      "step": 158683
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.567654132843018,
      "learning_rate": 0.00013237124436262402,
      "loss": 2.915,
      "step": 158684
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.103370428085327,
      "learning_rate": 0.00013236785196165556,
      "loss": 2.7144,
      "step": 158685
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.9194092750549316,
      "learning_rate": 0.00013236445959185304,
      "loss": 2.8238,
      "step": 158686
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3507347106933594,
      "learning_rate": 0.00013236106725321724,
      "loss": 2.9608,
      "step": 158687
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.8050332069396973,
      "learning_rate": 0.00013235767494574864,
      "loss": 2.9738,
      "step": 158688
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.102813720703125,
      "learning_rate": 0.00013235428266944793,
      "loss": 2.9223,
      "step": 158689
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4318454265594482,
      "learning_rate": 0.00013235089042431563,
      "loss": 2.9945,
      "step": 158690
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7769877910614014,
      "learning_rate": 0.00013234749821035256,
      "loss": 2.8593,
      "step": 158691
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7082631587982178,
      "learning_rate": 0.00013234410602755913,
      "loss": 2.98,
      "step": 158692
    },
    {
      "epoch": 2.07,
      "grad_norm": 5.076296806335449,
      "learning_rate": 0.00013234071387593623,
      "loss": 3.1757,
      "step": 158693
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5866894721984863,
      "learning_rate": 0.00013233732175548435,
      "loss": 2.9863,
      "step": 158694
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2417407035827637,
      "learning_rate": 0.00013233392966620412,
      "loss": 3.0425,
      "step": 158695
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.947690725326538,
      "learning_rate": 0.00013233053760809606,
      "loss": 2.6864,
      "step": 158696
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.550657033920288,
      "learning_rate": 0.00013232714558116104,
      "loss": 3.0924,
      "step": 158697
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.973879337310791,
      "learning_rate": 0.00013232375358539946,
      "loss": 3.0798,
      "step": 158698
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.3291194438934326,
      "learning_rate": 0.00013232036162081216,
      "loss": 2.727,
      "step": 158699
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.9839205741882324,
      "learning_rate": 0.00013231696968739968,
      "loss": 3.1473,
      "step": 158700
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.847072124481201,
      "learning_rate": 0.00013231357778516268,
      "loss": 2.9634,
      "step": 158701
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.662219285964966,
      "learning_rate": 0.0001323101859141016,
      "loss": 3.0419,
      "step": 158702
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.115370273590088,
      "learning_rate": 0.0001323067940742173,
      "loss": 2.8614,
      "step": 158703
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.8670694828033447,
      "learning_rate": 0.00013230340226551027,
      "loss": 2.9917,
      "step": 158704
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6577811241149902,
      "learning_rate": 0.0001323000104879813,
      "loss": 3.0078,
      "step": 158705
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.94858980178833,
      "learning_rate": 0.0001322966187416309,
      "loss": 2.9906,
      "step": 158706
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5209391117095947,
      "learning_rate": 0.00013229322702645977,
      "loss": 2.8052,
      "step": 158707
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.9111931324005127,
      "learning_rate": 0.00013228983534246833,
      "loss": 2.7852,
      "step": 158708
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.1582283973693848,
      "learning_rate": 0.00013228644368965754,
      "loss": 2.9863,
      "step": 158709
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.370771646499634,
      "learning_rate": 0.00013228305206802773,
      "loss": 3.1297,
      "step": 158710
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.9585838317871094,
      "learning_rate": 0.00013227966047757976,
      "loss": 3.0724,
      "step": 158711
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3749959468841553,
      "learning_rate": 0.0001322762689183142,
      "loss": 3.2132,
      "step": 158712
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.136300563812256,
      "learning_rate": 0.00013227287739023163,
      "loss": 3.2215,
      "step": 158713
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4631295204162598,
      "learning_rate": 0.00013226948589333258,
      "loss": 2.9725,
      "step": 158714
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4282736778259277,
      "learning_rate": 0.0001322660944276179,
      "loss": 2.6951,
      "step": 158715
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5211641788482666,
      "learning_rate": 0.00013226270299308807,
      "loss": 2.9015,
      "step": 158716
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.619372606277466,
      "learning_rate": 0.0001322593115897438,
      "loss": 3.0529,
      "step": 158717
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.677398443222046,
      "learning_rate": 0.00013225592021758574,
      "loss": 2.9988,
      "step": 158718
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.422255754470825,
      "learning_rate": 0.00013225252887661445,
      "loss": 3.0732,
      "step": 158719
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.495908737182617,
      "learning_rate": 0.00013224913756683049,
      "loss": 3.0566,
      "step": 158720
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3440797328948975,
      "learning_rate": 0.00013224574628823468,
      "loss": 2.9503,
      "step": 158721
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.154331684112549,
      "learning_rate": 0.00013224235504082747,
      "loss": 3.0062,
      "step": 158722
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3841943740844727,
      "learning_rate": 0.00013223896382460966,
      "loss": 2.7364,
      "step": 158723
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.8536574840545654,
      "learning_rate": 0.00013223557263958177,
      "loss": 3.2025,
      "step": 158724
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.1978919506073,
      "learning_rate": 0.0001322321814857445,
      "loss": 2.9341,
      "step": 158725
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.135507106781006,
      "learning_rate": 0.00013222879036309826,
      "loss": 2.922,
      "step": 158726
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.239811897277832,
      "learning_rate": 0.00013222539927164402,
      "loss": 2.9812,
      "step": 158727
    },
    {
      "epoch": 2.07,
      "grad_norm": 5.607083320617676,
      "learning_rate": 0.00013222200821138213,
      "loss": 2.8928,
      "step": 158728
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.267878293991089,
      "learning_rate": 0.00013221861718231342,
      "loss": 3.2053,
      "step": 158729
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.500476837158203,
      "learning_rate": 0.00013221522618443845,
      "loss": 2.7962,
      "step": 158730
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.646916389465332,
      "learning_rate": 0.00013221183521775775,
      "loss": 3.1268,
      "step": 158731
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.9320688247680664,
      "learning_rate": 0.0001322084442822721,
      "loss": 2.7109,
      "step": 158732
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.060118675231934,
      "learning_rate": 0.00013220505337798211,
      "loss": 2.9361,
      "step": 158733
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5169878005981445,
      "learning_rate": 0.00013220166250488825,
      "loss": 2.7385,
      "step": 158734
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.122312068939209,
      "learning_rate": 0.00013219827166299138,
      "loss": 3.0018,
      "step": 158735
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3050339221954346,
      "learning_rate": 0.000132194880852292,
      "loss": 2.9814,
      "step": 158736
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4790329933166504,
      "learning_rate": 0.00013219149007279065,
      "loss": 2.6911,
      "step": 158737
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4277403354644775,
      "learning_rate": 0.00013218809932448818,
      "loss": 2.9275,
      "step": 158738
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.1437129974365234,
      "learning_rate": 0.00013218470860738512,
      "loss": 2.9125,
      "step": 158739
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.04482364654541,
      "learning_rate": 0.00013218131792148196,
      "loss": 2.9016,
      "step": 158740
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7033424377441406,
      "learning_rate": 0.0001321779272667796,
      "loss": 2.9002,
      "step": 158741
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.859163999557495,
      "learning_rate": 0.00013217453664327838,
      "loss": 2.9133,
      "step": 158742
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.245586395263672,
      "learning_rate": 0.00013217114605097925,
      "loss": 3.0223,
      "step": 158743
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.069253921508789,
      "learning_rate": 0.00013216775548988266,
      "loss": 2.7095,
      "step": 158744
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.0515847206115723,
      "learning_rate": 0.0001321643649599891,
      "loss": 2.8039,
      "step": 158745
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6812174320220947,
      "learning_rate": 0.0001321609744612995,
      "loss": 3.1371,
      "step": 158746
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.399648427963257,
      "learning_rate": 0.00013215758399381431,
      "loss": 2.8342,
      "step": 158747
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.6771457195281982,
      "learning_rate": 0.00013215419355753412,
      "loss": 2.6515,
      "step": 158748
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.361954689025879,
      "learning_rate": 0.00013215080315245973,
      "loss": 3.0652,
      "step": 158749
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7070438861846924,
      "learning_rate": 0.00013214741277859168,
      "loss": 2.9982,
      "step": 158750
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.1455419063568115,
      "learning_rate": 0.00013214402243593047,
      "loss": 3.16,
      "step": 158751
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5942747592926025,
      "learning_rate": 0.00013214063212447694,
      "loss": 2.8518,
      "step": 158752
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.925441265106201,
      "learning_rate": 0.00013213724184423157,
      "loss": 2.7891,
      "step": 158753
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.137152671813965,
      "learning_rate": 0.00013213385159519518,
      "loss": 3.0867,
      "step": 158754
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.0962235927581787,
      "learning_rate": 0.00013213046137736825,
      "loss": 2.6825,
      "step": 158755
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5463552474975586,
      "learning_rate": 0.0001321270711907514,
      "loss": 2.9941,
      "step": 158756
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.409846544265747,
      "learning_rate": 0.00013212368103534522,
      "loss": 2.8258,
      "step": 158757
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.44358491897583,
      "learning_rate": 0.00013212029091115053,
      "loss": 3.0442,
      "step": 158758
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.691911458969116,
      "learning_rate": 0.00013211690081816775,
      "loss": 2.5391,
      "step": 158759
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.262580871582031,
      "learning_rate": 0.00013211351075639768,
      "loss": 2.9143,
      "step": 158760
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.0855085849761963,
      "learning_rate": 0.00013211012072584093,
      "loss": 2.8046,
      "step": 158761
    },
    {
      "epoch": 2.07,
      "grad_norm": 1.9965596199035645,
      "learning_rate": 0.00013210673072649805,
      "loss": 3.0095,
      "step": 158762
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.6584088802337646,
      "learning_rate": 0.00013210334075836958,
      "loss": 2.9687,
      "step": 158763
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.680402994155884,
      "learning_rate": 0.00013209995082145637,
      "loss": 3.0225,
      "step": 158764
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.100477457046509,
      "learning_rate": 0.00013209656091575883,
      "loss": 3.0036,
      "step": 158765
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.202449321746826,
      "learning_rate": 0.00013209317104127786,
      "loss": 3.0122,
      "step": 158766
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.3279004096984863,
      "learning_rate": 0.0001320897811980139,
      "loss": 2.9714,
      "step": 158767
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.5767927169799805,
      "learning_rate": 0.00013208639138596764,
      "loss": 2.6536,
      "step": 158768
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5599358081817627,
      "learning_rate": 0.00013208300160513957,
      "loss": 3.0058,
      "step": 158769
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.8383774757385254,
      "learning_rate": 0.00013207961185553058,
      "loss": 2.7436,
      "step": 158770
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5700602531433105,
      "learning_rate": 0.00013207622213714103,
      "loss": 2.9446,
      "step": 158771
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.0598702430725098,
      "learning_rate": 0.00013207283244997175,
      "loss": 3.0347,
      "step": 158772
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4662134647369385,
      "learning_rate": 0.00013206944279402338,
      "loss": 2.8483,
      "step": 158773
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.9726405143737793,
      "learning_rate": 0.0001320660531692964,
      "loss": 3.1194,
      "step": 158774
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3448166847229004,
      "learning_rate": 0.00013206266357579144,
      "loss": 2.7724,
      "step": 158775
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.428008556365967,
      "learning_rate": 0.0001320592740135093,
      "loss": 3.094,
      "step": 158776
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5286476612091064,
      "learning_rate": 0.00013205588448245037,
      "loss": 2.7409,
      "step": 158777
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.9697184562683105,
      "learning_rate": 0.00013205249498261557,
      "loss": 2.6887,
      "step": 158778
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.961820602416992,
      "learning_rate": 0.0001320491055140054,
      "loss": 2.8742,
      "step": 158779
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.1811835765838623,
      "learning_rate": 0.00013204571607662043,
      "loss": 2.8872,
      "step": 158780
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2691097259521484,
      "learning_rate": 0.00013204232667046124,
      "loss": 3.1262,
      "step": 158781
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.487494468688965,
      "learning_rate": 0.00013203893729552864,
      "loss": 2.6947,
      "step": 158782
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.2553114891052246,
      "learning_rate": 0.0001320355479518231,
      "loss": 3.1178,
      "step": 158783
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.8890221118927,
      "learning_rate": 0.00013203215863934542,
      "loss": 2.8205,
      "step": 158784
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3705525398254395,
      "learning_rate": 0.00013202876935809613,
      "loss": 2.8106,
      "step": 158785
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7467222213745117,
      "learning_rate": 0.00013202538010807584,
      "loss": 2.8481,
      "step": 158786
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4476091861724854,
      "learning_rate": 0.0001320219908892851,
      "loss": 2.9321,
      "step": 158787
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4591057300567627,
      "learning_rate": 0.00013201860170172476,
      "loss": 2.8224,
      "step": 158788
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3434460163116455,
      "learning_rate": 0.0001320152125453952,
      "loss": 2.9402,
      "step": 158789
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4851505756378174,
      "learning_rate": 0.00013201182342029735,
      "loss": 3.2365,
      "step": 158790
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.138035774230957,
      "learning_rate": 0.00013200843432643165,
      "loss": 2.9495,
      "step": 158791
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.271702289581299,
      "learning_rate": 0.00013200504526379874,
      "loss": 2.8781,
      "step": 158792
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2904934883117676,
      "learning_rate": 0.00013200165623239918,
      "loss": 2.9667,
      "step": 158793
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.459432601928711,
      "learning_rate": 0.00013199826723223374,
      "loss": 2.9191,
      "step": 158794
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2788729667663574,
      "learning_rate": 0.00013199487826330293,
      "loss": 2.7935,
      "step": 158795
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.9056284427642822,
      "learning_rate": 0.00013199148932560754,
      "loss": 2.8725,
      "step": 158796
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.9652538299560547,
      "learning_rate": 0.00013198810041914808,
      "loss": 2.9963,
      "step": 158797
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7348315715789795,
      "learning_rate": 0.00013198471154392524,
      "loss": 2.6048,
      "step": 158798
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.496281385421753,
      "learning_rate": 0.00013198132269993948,
      "loss": 2.8555,
      "step": 158799
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.828047037124634,
      "learning_rate": 0.0001319779338871917,
      "loss": 3.2879,
      "step": 158800
    },
    {
      "epoch": 2.07,
      "grad_norm": 5.0775275230407715,
      "learning_rate": 0.00013197454510568225,
      "loss": 2.881,
      "step": 158801
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.266599416732788,
      "learning_rate": 0.00013197115635541203,
      "loss": 2.9947,
      "step": 158802
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.094223737716675,
      "learning_rate": 0.00013196776763638155,
      "loss": 2.8757,
      "step": 158803
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.859459161758423,
      "learning_rate": 0.0001319643789485914,
      "loss": 2.8368,
      "step": 158804
    },
    {
      "epoch": 2.07,
      "grad_norm": 5.082162380218506,
      "learning_rate": 0.00013196099029204217,
      "loss": 2.8016,
      "step": 158805
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5879688262939453,
      "learning_rate": 0.0001319576016667347,
      "loss": 3.0733,
      "step": 158806
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.956002235412598,
      "learning_rate": 0.0001319542130726693,
      "loss": 2.8467,
      "step": 158807
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.976686477661133,
      "learning_rate": 0.00013195082450984694,
      "loss": 2.8904,
      "step": 158808
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.0148959159851074,
      "learning_rate": 0.000131947435978268,
      "loss": 3.0093,
      "step": 158809
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6246232986450195,
      "learning_rate": 0.0001319440474779334,
      "loss": 2.9203,
      "step": 158810
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.9380929470062256,
      "learning_rate": 0.00013194065900884333,
      "loss": 2.8706,
      "step": 158811
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.117056846618652,
      "learning_rate": 0.0001319372705709988,
      "loss": 3.0979,
      "step": 158812
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.611220359802246,
      "learning_rate": 0.0001319338821644002,
      "loss": 2.9929,
      "step": 158813
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2945916652679443,
      "learning_rate": 0.00013193049378904835,
      "loss": 3.2485,
      "step": 158814
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.373622179031372,
      "learning_rate": 0.00013192710544494374,
      "loss": 3.11,
      "step": 158815
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.740745782852173,
      "learning_rate": 0.0001319237171320871,
      "loss": 2.9537,
      "step": 158816
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.748375415802002,
      "learning_rate": 0.00013192032885047902,
      "loss": 3.042,
      "step": 158817
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.101348876953125,
      "learning_rate": 0.0001319169406001202,
      "loss": 2.9864,
      "step": 158818
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2812435626983643,
      "learning_rate": 0.000131913552381011,
      "loss": 2.8098,
      "step": 158819
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4088618755340576,
      "learning_rate": 0.00013191016419315235,
      "loss": 3.2335,
      "step": 158820
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.855954885482788,
      "learning_rate": 0.00013190677603654472,
      "loss": 3.2124,
      "step": 158821
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.374433994293213,
      "learning_rate": 0.00013190338791118888,
      "loss": 2.988,
      "step": 158822
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3912153244018555,
      "learning_rate": 0.00013189999981708537,
      "loss": 2.982,
      "step": 158823
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4449963569641113,
      "learning_rate": 0.0001318966117542348,
      "loss": 2.8528,
      "step": 158824
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.2256979942321777,
      "learning_rate": 0.00013189322372263773,
      "loss": 3.0099,
      "step": 158825
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2474911212921143,
      "learning_rate": 0.00013188983572229502,
      "loss": 2.8788,
      "step": 158826
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.551076650619507,
      "learning_rate": 0.000131886447753207,
      "loss": 2.8384,
      "step": 158827
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.11081862449646,
      "learning_rate": 0.00013188305981537462,
      "loss": 3.1506,
      "step": 158828
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.0956010818481445,
      "learning_rate": 0.00013187967190879835,
      "loss": 2.9713,
      "step": 158829
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.0511741638183594,
      "learning_rate": 0.00013187628403347866,
      "loss": 2.8836,
      "step": 158830
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.548692464828491,
      "learning_rate": 0.00013187289618941651,
      "loss": 2.8502,
      "step": 158831
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.9041101932525635,
      "learning_rate": 0.00013186950837661235,
      "loss": 2.9418,
      "step": 158832
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.8288888931274414,
      "learning_rate": 0.0001318661205950667,
      "loss": 2.9869,
      "step": 158833
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4897170066833496,
      "learning_rate": 0.00013186273284478042,
      "loss": 2.9503,
      "step": 158834
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.110938310623169,
      "learning_rate": 0.00013185934512575405,
      "loss": 2.8208,
      "step": 158835
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.994469165802002,
      "learning_rate": 0.00013185595743798808,
      "loss": 3.0167,
      "step": 158836
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5443220138549805,
      "learning_rate": 0.00013185256978148337,
      "loss": 2.9736,
      "step": 158837
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.235020875930786,
      "learning_rate": 0.00013184918215624044,
      "loss": 2.9146,
      "step": 158838
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.189014196395874,
      "learning_rate": 0.0001318457945622598,
      "loss": 2.733,
      "step": 158839
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5156056880950928,
      "learning_rate": 0.00013184240699954235,
      "loss": 2.9429,
      "step": 158840
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.9087095260620117,
      "learning_rate": 0.00013183901946808851,
      "loss": 2.9564,
      "step": 158841
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.755197286605835,
      "learning_rate": 0.0001318356319678989,
      "loss": 2.8038,
      "step": 158842
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.587338447570801,
      "learning_rate": 0.00013183224449897434,
      "loss": 2.9739,
      "step": 158843
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4415740966796875,
      "learning_rate": 0.00013182885706131524,
      "loss": 3.0743,
      "step": 158844
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.481430768966675,
      "learning_rate": 0.00013182546965492242,
      "loss": 3.1583,
      "step": 158845
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.23101544380188,
      "learning_rate": 0.0001318220822797964,
      "loss": 3.0074,
      "step": 158846
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2575478553771973,
      "learning_rate": 0.00013181869493593783,
      "loss": 3.205,
      "step": 158847
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.1146299839019775,
      "learning_rate": 0.00013181530762334726,
      "loss": 2.9011,
      "step": 158848
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.2016665935516357,
      "learning_rate": 0.00013181192034202548,
      "loss": 2.9706,
      "step": 158849
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.472496271133423,
      "learning_rate": 0.00013180853309197296,
      "loss": 2.9064,
      "step": 158850
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.0873098373413086,
      "learning_rate": 0.00013180514587319048,
      "loss": 2.664,
      "step": 158851
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.581804037094116,
      "learning_rate": 0.00013180175868567865,
      "loss": 3.1093,
      "step": 158852
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3748481273651123,
      "learning_rate": 0.000131798371529438,
      "loss": 2.9366,
      "step": 158853
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.603737831115723,
      "learning_rate": 0.00013179498440446912,
      "loss": 2.6494,
      "step": 158854
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.608383893966675,
      "learning_rate": 0.00013179159731077284,
      "loss": 2.9953,
      "step": 158855
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4873154163360596,
      "learning_rate": 0.00013178821024834955,
      "loss": 2.939,
      "step": 158856
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.0625054836273193,
      "learning_rate": 0.00013178482321720013,
      "loss": 2.9672,
      "step": 158857
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.122060537338257,
      "learning_rate": 0.0001317814362173251,
      "loss": 2.8035,
      "step": 158858
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.168036460876465,
      "learning_rate": 0.00013177804924872504,
      "loss": 2.7688,
      "step": 158859
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.365424394607544,
      "learning_rate": 0.00013177466231140052,
      "loss": 2.8977,
      "step": 158860
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.439739227294922,
      "learning_rate": 0.0001317712754053524,
      "loss": 2.9569,
      "step": 158861
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3074984550476074,
      "learning_rate": 0.00013176788853058106,
      "loss": 3.0564,
      "step": 158862
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2853004932403564,
      "learning_rate": 0.00013176450168708733,
      "loss": 3.0623,
      "step": 158863
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.39921498298645,
      "learning_rate": 0.00013176111487487176,
      "loss": 2.9052,
      "step": 158864
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.29050874710083,
      "learning_rate": 0.000131757728093935,
      "loss": 3.1174,
      "step": 158865
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.719909191131592,
      "learning_rate": 0.0001317543413442775,
      "loss": 2.9327,
      "step": 158866
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.186042070388794,
      "learning_rate": 0.0001317509546259002,
      "loss": 2.8909,
      "step": 158867
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.149244546890259,
      "learning_rate": 0.00013174756793880345,
      "loss": 2.8985,
      "step": 158868
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4148828983306885,
      "learning_rate": 0.00013174418128298807,
      "loss": 2.815,
      "step": 158869
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.555014133453369,
      "learning_rate": 0.00013174079465845466,
      "loss": 3.1129,
      "step": 158870
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.0436320304870605,
      "learning_rate": 0.00013173740806520385,
      "loss": 2.9007,
      "step": 158871
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.394806385040283,
      "learning_rate": 0.00013173402150323606,
      "loss": 2.6485,
      "step": 158872
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.0572116374969482,
      "learning_rate": 0.00013173063497255223,
      "loss": 2.9192,
      "step": 158873
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.859313726425171,
      "learning_rate": 0.00013172724847315275,
      "loss": 2.9339,
      "step": 158874
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4952263832092285,
      "learning_rate": 0.00013172386200503843,
      "loss": 3.1146,
      "step": 158875
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5779032707214355,
      "learning_rate": 0.00013172047556820975,
      "loss": 2.9772,
      "step": 158876
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.0051639080047607,
      "learning_rate": 0.0001317170891626676,
      "loss": 2.8117,
      "step": 158877
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.897195339202881,
      "learning_rate": 0.0001317137027884122,
      "loss": 2.8665,
      "step": 158878
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.317617654800415,
      "learning_rate": 0.00013171031644544453,
      "loss": 3.1279,
      "step": 158879
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.312173366546631,
      "learning_rate": 0.00013170693013376495,
      "loss": 3.2057,
      "step": 158880
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.365767002105713,
      "learning_rate": 0.00013170354385337436,
      "loss": 3.1987,
      "step": 158881
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.2247917652130127,
      "learning_rate": 0.00013170015760427314,
      "loss": 3.0287,
      "step": 158882
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3754708766937256,
      "learning_rate": 0.00013169677138646224,
      "loss": 3.001,
      "step": 158883
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6341609954833984,
      "learning_rate": 0.0001316933851999419,
      "loss": 2.7793,
      "step": 158884
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.404391288757324,
      "learning_rate": 0.00013168999904471305,
      "loss": 3.0978,
      "step": 158885
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7418630123138428,
      "learning_rate": 0.0001316866129207761,
      "loss": 2.9267,
      "step": 158886
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2884931564331055,
      "learning_rate": 0.00013168322682813186,
      "loss": 2.6806,
      "step": 158887
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4937994480133057,
      "learning_rate": 0.0001316798407667808,
      "loss": 2.8448,
      "step": 158888
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5171329975128174,
      "learning_rate": 0.0001316764547367238,
      "loss": 3.1531,
      "step": 158889
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.008460283279419,
      "learning_rate": 0.0001316730687379612,
      "loss": 2.8959,
      "step": 158890
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3587679862976074,
      "learning_rate": 0.00013166968277049382,
      "loss": 3.0223,
      "step": 158891
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.1769232749938965,
      "learning_rate": 0.0001316662968343221,
      "loss": 2.9791,
      "step": 158892
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3947861194610596,
      "learning_rate": 0.0001316629109294469,
      "loss": 2.8585,
      "step": 158893
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.9736502170562744,
      "learning_rate": 0.00013165952505586866,
      "loss": 2.9686,
      "step": 158894
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3914458751678467,
      "learning_rate": 0.00013165613921358826,
      "loss": 2.94,
      "step": 158895
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4389286041259766,
      "learning_rate": 0.000131652753402606,
      "loss": 3.0637,
      "step": 158896
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.051304817199707,
      "learning_rate": 0.00013164936762292278,
      "loss": 2.9134,
      "step": 158897
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.38078236579895,
      "learning_rate": 0.00013164598187453895,
      "loss": 2.7663,
      "step": 158898
    },
    {
      "epoch": 2.07,
      "grad_norm": 5.121986389160156,
      "learning_rate": 0.00013164259615745547,
      "loss": 2.5917,
      "step": 158899
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3923215866088867,
      "learning_rate": 0.0001316392104716727,
      "loss": 2.7417,
      "step": 158900
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7602264881134033,
      "learning_rate": 0.00013163582481719158,
      "loss": 2.8674,
      "step": 158901
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3825325965881348,
      "learning_rate": 0.0001316324391940123,
      "loss": 2.7722,
      "step": 158902
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4409236907958984,
      "learning_rate": 0.0001316290536021359,
      "loss": 2.8533,
      "step": 158903
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.28314208984375,
      "learning_rate": 0.00013162566804156268,
      "loss": 3.0075,
      "step": 158904
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.080878734588623,
      "learning_rate": 0.00013162228251229356,
      "loss": 2.9962,
      "step": 158905
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.452788829803467,
      "learning_rate": 0.00013161889701432888,
      "loss": 2.8252,
      "step": 158906
    },
    {
      "epoch": 2.07,
      "grad_norm": 1.976794958114624,
      "learning_rate": 0.00013161551154766956,
      "loss": 2.9808,
      "step": 158907
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.5638511180877686,
      "learning_rate": 0.00013161212611231612,
      "loss": 2.9146,
      "step": 158908
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.462519884109497,
      "learning_rate": 0.00013160874070826915,
      "loss": 3.0308,
      "step": 158909
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.8004801273345947,
      "learning_rate": 0.00013160535533552917,
      "loss": 2.9315,
      "step": 158910
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.190241813659668,
      "learning_rate": 0.00013160196999409708,
      "loss": 3.2633,
      "step": 158911
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2678141593933105,
      "learning_rate": 0.00013159858468397322,
      "loss": 2.9595,
      "step": 158912
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.0727484226226807,
      "learning_rate": 0.00013159519940515846,
      "loss": 3.1649,
      "step": 158913
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2038393020629883,
      "learning_rate": 0.0001315918141576534,
      "loss": 2.9928,
      "step": 158914
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.377537965774536,
      "learning_rate": 0.00013158842894145842,
      "loss": 2.7616,
      "step": 158915
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3531436920166016,
      "learning_rate": 0.00013158504375657444,
      "loss": 2.8314,
      "step": 158916
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.9848031997680664,
      "learning_rate": 0.00013158165860300202,
      "loss": 3.0045,
      "step": 158917
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.97575044631958,
      "learning_rate": 0.0001315782734807416,
      "loss": 2.783,
      "step": 158918
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.99573016166687,
      "learning_rate": 0.0001315748883897941,
      "loss": 3.0502,
      "step": 158919
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4696011543273926,
      "learning_rate": 0.00013157150333016,
      "loss": 2.9238,
      "step": 158920
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.56201434135437,
      "learning_rate": 0.00013156811830183983,
      "loss": 2.9405,
      "step": 158921
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.316972017288208,
      "learning_rate": 0.0001315647333048344,
      "loss": 2.8662,
      "step": 158922
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2501511573791504,
      "learning_rate": 0.00013156134833914432,
      "loss": 3.1671,
      "step": 158923
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4901883602142334,
      "learning_rate": 0.00013155796340477002,
      "loss": 2.7819,
      "step": 158924
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.201993227005005,
      "learning_rate": 0.0001315545785017124,
      "loss": 2.9557,
      "step": 158925
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5879015922546387,
      "learning_rate": 0.00013155119362997196,
      "loss": 3.1652,
      "step": 158926
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.254241466522217,
      "learning_rate": 0.0001315478087895492,
      "loss": 2.8532,
      "step": 158927
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.83235239982605,
      "learning_rate": 0.000131544423980445,
      "loss": 3.058,
      "step": 158928
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.212127685546875,
      "learning_rate": 0.00013154103920265977,
      "loss": 3.0372,
      "step": 158929
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.282132148742676,
      "learning_rate": 0.00013153765445619438,
      "loss": 2.7994,
      "step": 158930
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2880845069885254,
      "learning_rate": 0.00013153426974104929,
      "loss": 2.9887,
      "step": 158931
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.759829044342041,
      "learning_rate": 0.00013153088505722516,
      "loss": 3.1573,
      "step": 158932
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.591310501098633,
      "learning_rate": 0.0001315275004047225,
      "loss": 3.0127,
      "step": 158933
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2219631671905518,
      "learning_rate": 0.00013152411578354217,
      "loss": 3.0088,
      "step": 158934
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.9851224422454834,
      "learning_rate": 0.00013152073119368458,
      "loss": 3.0101,
      "step": 158935
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.332731008529663,
      "learning_rate": 0.0001315173466351506,
      "loss": 3.0027,
      "step": 158936
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.940429925918579,
      "learning_rate": 0.00013151396210794072,
      "loss": 2.6099,
      "step": 158937
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.346538543701172,
      "learning_rate": 0.00013151057761205554,
      "loss": 2.9833,
      "step": 158938
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.708601474761963,
      "learning_rate": 0.00013150719314749564,
      "loss": 3.0201,
      "step": 158939
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.489896774291992,
      "learning_rate": 0.00013150380871426184,
      "loss": 3.2091,
      "step": 158940
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7079858779907227,
      "learning_rate": 0.0001315004243123546,
      "loss": 2.9244,
      "step": 158941
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5010695457458496,
      "learning_rate": 0.00013149703994177464,
      "loss": 2.8963,
      "step": 158942
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4347376823425293,
      "learning_rate": 0.0001314936556025225,
      "loss": 2.7799,
      "step": 158943
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4853506088256836,
      "learning_rate": 0.00013149027129459904,
      "loss": 2.9969,
      "step": 158944
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.632037878036499,
      "learning_rate": 0.00013148688701800456,
      "loss": 3.2449,
      "step": 158945
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5647315979003906,
      "learning_rate": 0.00013148350277273992,
      "loss": 2.7815,
      "step": 158946
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5849854946136475,
      "learning_rate": 0.0001314801185588056,
      "loss": 2.8513,
      "step": 158947
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.896169662475586,
      "learning_rate": 0.00013147673437620237,
      "loss": 2.8671,
      "step": 158948
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7854576110839844,
      "learning_rate": 0.00013147335022493074,
      "loss": 3.0419,
      "step": 158949
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.541644334793091,
      "learning_rate": 0.0001314699661049916,
      "loss": 2.9241,
      "step": 158950
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.61560320854187,
      "learning_rate": 0.00013146658201638512,
      "loss": 3.0373,
      "step": 158951
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.531963586807251,
      "learning_rate": 0.00013146319795911233,
      "loss": 2.8994,
      "step": 158952
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.274796009063721,
      "learning_rate": 0.00013145981393317358,
      "loss": 3.0024,
      "step": 158953
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.8482584953308105,
      "learning_rate": 0.00013145642993856975,
      "loss": 2.8687,
      "step": 158954
    },
    {
      "epoch": 2.07,
      "grad_norm": 5.3123674392700195,
      "learning_rate": 0.00013145304597530125,
      "loss": 3.065,
      "step": 158955
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.351938247680664,
      "learning_rate": 0.00013144966204336905,
      "loss": 2.7653,
      "step": 158956
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.488121509552002,
      "learning_rate": 0.0001314462781427733,
      "loss": 3.2238,
      "step": 158957
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7643895149230957,
      "learning_rate": 0.00013144289427351495,
      "loss": 2.8292,
      "step": 158958
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.7632315158843994,
      "learning_rate": 0.00013143951043559444,
      "loss": 2.951,
      "step": 158959
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5365724563598633,
      "learning_rate": 0.00013143612662901266,
      "loss": 3.0964,
      "step": 158960
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.263057231903076,
      "learning_rate": 0.00013143274285376994,
      "loss": 2.9287,
      "step": 158961
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.6668431758880615,
      "learning_rate": 0.0001314293591098673,
      "loss": 2.5804,
      "step": 158962
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.1404709815979004,
      "learning_rate": 0.0001314259753973049,
      "loss": 2.8516,
      "step": 158963
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.21106219291687,
      "learning_rate": 0.00013142259171608367,
      "loss": 2.6715,
      "step": 158964
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3195149898529053,
      "learning_rate": 0.0001314192080662041,
      "loss": 2.948,
      "step": 158965
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4817585945129395,
      "learning_rate": 0.00013141582444766697,
      "loss": 2.9873,
      "step": 158966
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.994138479232788,
      "learning_rate": 0.00013141244086047272,
      "loss": 2.7681,
      "step": 158967
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7513587474823,
      "learning_rate": 0.00013140905730462228,
      "loss": 2.8497,
      "step": 158968
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6717631816864014,
      "learning_rate": 0.00013140567378011587,
      "loss": 2.9417,
      "step": 158969
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.750001907348633,
      "learning_rate": 0.0001314022902869544,
      "loss": 2.9107,
      "step": 158970
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.220754384994507,
      "learning_rate": 0.0001313989068251384,
      "loss": 2.902,
      "step": 158971
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2662956714630127,
      "learning_rate": 0.0001313955233946686,
      "loss": 2.8539,
      "step": 158972
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.0846362113952637,
      "learning_rate": 0.0001313921399955454,
      "loss": 2.7767,
      "step": 158973
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6580238342285156,
      "learning_rate": 0.00013138875662776987,
      "loss": 2.9001,
      "step": 158974
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5759127140045166,
      "learning_rate": 0.00013138537329134207,
      "loss": 3.1036,
      "step": 158975
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3290674686431885,
      "learning_rate": 0.00013138198998626307,
      "loss": 2.9743,
      "step": 158976
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.263869524002075,
      "learning_rate": 0.00013137860671253321,
      "loss": 3.1555,
      "step": 158977
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.727006196975708,
      "learning_rate": 0.00013137522347015336,
      "loss": 2.8826,
      "step": 158978
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.408860683441162,
      "learning_rate": 0.00013137184025912392,
      "loss": 2.7199,
      "step": 158979
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.690787315368652,
      "learning_rate": 0.0001313684570794459,
      "loss": 2.8353,
      "step": 158980
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.712037086486816,
      "learning_rate": 0.0001313650739311194,
      "loss": 3.0052,
      "step": 158981
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.010986328125,
      "learning_rate": 0.00013136169081414545,
      "loss": 3.032,
      "step": 158982
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.69638729095459,
      "learning_rate": 0.00013135830772852442,
      "loss": 3.1143,
      "step": 158983
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5410759449005127,
      "learning_rate": 0.0001313549246742572,
      "loss": 2.8848,
      "step": 158984
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.260343551635742,
      "learning_rate": 0.00013135154165134412,
      "loss": 2.8395,
      "step": 158985
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.6368303298950195,
      "learning_rate": 0.00013134815865978623,
      "loss": 2.9507,
      "step": 158986
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.7533180713653564,
      "learning_rate": 0.0001313447756995837,
      "loss": 2.8343,
      "step": 158987
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.402322769165039,
      "learning_rate": 0.00013134139277073744,
      "loss": 2.6049,
      "step": 158988
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.727189540863037,
      "learning_rate": 0.0001313380098732479,
      "loss": 2.8355,
      "step": 158989
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.998581647872925,
      "learning_rate": 0.00013133462700711595,
      "loss": 2.9608,
      "step": 158990
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2506656646728516,
      "learning_rate": 0.0001313312441723419,
      "loss": 3.1739,
      "step": 158991
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.144447088241577,
      "learning_rate": 0.0001313278613689267,
      "loss": 2.9393,
      "step": 158992
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5786259174346924,
      "learning_rate": 0.00013132447859687084,
      "loss": 2.7519,
      "step": 158993
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2868564128875732,
      "learning_rate": 0.0001313210958561749,
      "loss": 2.7528,
      "step": 158994
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.83076810836792,
      "learning_rate": 0.0001313177131468395,
      "loss": 2.8085,
      "step": 158995
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.484323501586914,
      "learning_rate": 0.0001313143304688654,
      "loss": 2.7261,
      "step": 158996
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.740443468093872,
      "learning_rate": 0.00013131094782225307,
      "loss": 2.8915,
      "step": 158997
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4068870544433594,
      "learning_rate": 0.0001313075652070033,
      "loss": 3.1854,
      "step": 158998
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.1120598316192627,
      "learning_rate": 0.00013130418262311664,
      "loss": 2.9476,
      "step": 158999
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.750593662261963,
      "learning_rate": 0.0001313008000705936,
      "loss": 3.1953,
      "step": 159000
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.0058023929595947,
      "learning_rate": 0.00013129741754943505,
      "loss": 2.9091,
      "step": 159001
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.486854314804077,
      "learning_rate": 0.0001312940350596415,
      "loss": 2.8737,
      "step": 159002
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.392456531524658,
      "learning_rate": 0.00013129065260121346,
      "loss": 2.8551,
      "step": 159003
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6750073432922363,
      "learning_rate": 0.00013128727017415177,
      "loss": 3.0571,
      "step": 159004
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.1457631587982178,
      "learning_rate": 0.00013128388777845697,
      "loss": 3.0303,
      "step": 159005
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.480520009994507,
      "learning_rate": 0.00013128050541412958,
      "loss": 3.0931,
      "step": 159006
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7655742168426514,
      "learning_rate": 0.00013127712308117043,
      "loss": 3.1296,
      "step": 159007
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.19034743309021,
      "learning_rate": 0.00013127374077958004,
      "loss": 2.8404,
      "step": 159008
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.0463552474975586,
      "learning_rate": 0.00013127035850935895,
      "loss": 3.0706,
      "step": 159009
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.635054111480713,
      "learning_rate": 0.00013126697627050796,
      "loss": 2.8862,
      "step": 159010
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.266780376434326,
      "learning_rate": 0.00013126359406302767,
      "loss": 2.8508,
      "step": 159011
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4314584732055664,
      "learning_rate": 0.00013126021188691853,
      "loss": 2.9381,
      "step": 159012
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.184342861175537,
      "learning_rate": 0.00013125682974218144,
      "loss": 2.8562,
      "step": 159013
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3525402545928955,
      "learning_rate": 0.00013125344762881673,
      "loss": 2.8233,
      "step": 159014
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4520628452301025,
      "learning_rate": 0.0001312500655468253,
      "loss": 2.8289,
      "step": 159015
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.1741695404052734,
      "learning_rate": 0.00013124668349620772,
      "loss": 2.9525,
      "step": 159016
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.336097240447998,
      "learning_rate": 0.00013124330147696456,
      "loss": 2.8235,
      "step": 159017
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5240633487701416,
      "learning_rate": 0.0001312399194890963,
      "loss": 3.0734,
      "step": 159018
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.088968276977539,
      "learning_rate": 0.0001312365375326039,
      "loss": 3.0275,
      "step": 159019
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.1629862785339355,
      "learning_rate": 0.00013123315560748764,
      "loss": 3.0149,
      "step": 159020
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.265002489089966,
      "learning_rate": 0.00013122977371374847,
      "loss": 3.1775,
      "step": 159021
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.963083028793335,
      "learning_rate": 0.00013122639185138686,
      "loss": 3.2439,
      "step": 159022
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2892513275146484,
      "learning_rate": 0.00013122301002040345,
      "loss": 3.0753,
      "step": 159023
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.583303689956665,
      "learning_rate": 0.00013121962822079878,
      "loss": 2.8948,
      "step": 159024
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.1224637031555176,
      "learning_rate": 0.00013121624645257363,
      "loss": 3.0421,
      "step": 159025
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5986618995666504,
      "learning_rate": 0.00013121286471572845,
      "loss": 3.0857,
      "step": 159026
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.960923910140991,
      "learning_rate": 0.00013120948301026413,
      "loss": 3.126,
      "step": 159027
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5459144115448,
      "learning_rate": 0.00013120610133618104,
      "loss": 3.0127,
      "step": 159028
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3372740745544434,
      "learning_rate": 0.00013120271969348015,
      "loss": 2.9741,
      "step": 159029
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.8253068923950195,
      "learning_rate": 0.00013119933808216163,
      "loss": 2.7966,
      "step": 159030
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.920172929763794,
      "learning_rate": 0.00013119595650222646,
      "loss": 3.1018,
      "step": 159031
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4605751037597656,
      "learning_rate": 0.000131192574953675,
      "loss": 2.8477,
      "step": 159032
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.680208444595337,
      "learning_rate": 0.0001311891934365082,
      "loss": 2.7784,
      "step": 159033
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.30167555809021,
      "learning_rate": 0.00013118581195072635,
      "loss": 3.0038,
      "step": 159034
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4415977001190186,
      "learning_rate": 0.00013118243049633048,
      "loss": 2.7321,
      "step": 159035
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.002624750137329,
      "learning_rate": 0.00013117904907332075,
      "loss": 3.1932,
      "step": 159036
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.11826753616333,
      "learning_rate": 0.00013117566768169816,
      "loss": 2.854,
      "step": 159037
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.542463541030884,
      "learning_rate": 0.00013117228632146308,
      "loss": 2.9861,
      "step": 159038
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.416170120239258,
      "learning_rate": 0.00013116890499261638,
      "loss": 2.9819,
      "step": 159039
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.718202829360962,
      "learning_rate": 0.00013116552369515843,
      "loss": 3.0044,
      "step": 159040
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.8102312088012695,
      "learning_rate": 0.00013116214242909024,
      "loss": 2.9368,
      "step": 159041
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3544886112213135,
      "learning_rate": 0.00013115876119441198,
      "loss": 2.9788,
      "step": 159042
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.197727918624878,
      "learning_rate": 0.0001311553799911246,
      "loss": 2.9859,
      "step": 159043
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.0583176612854004,
      "learning_rate": 0.0001311519988192285,
      "loss": 2.9719,
      "step": 159044
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.740097999572754,
      "learning_rate": 0.00013114861767872453,
      "loss": 3.1107,
      "step": 159045
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.664790630340576,
      "learning_rate": 0.00013114523656961312,
      "loss": 2.9743,
      "step": 159046
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.1688997745513916,
      "learning_rate": 0.00013114185549189524,
      "loss": 2.849,
      "step": 159047
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7839581966400146,
      "learning_rate": 0.000131138474445571,
      "loss": 2.936,
      "step": 159048
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3158152103424072,
      "learning_rate": 0.00013113509343064145,
      "loss": 2.728,
      "step": 159049
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.4307122230529785,
      "learning_rate": 0.000131131712447107,
      "loss": 3.0406,
      "step": 159050
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5907413959503174,
      "learning_rate": 0.0001311283314949684,
      "loss": 2.8973,
      "step": 159051
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.8094730377197266,
      "learning_rate": 0.00013112495057422617,
      "loss": 2.8631,
      "step": 159052
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.014545202255249,
      "learning_rate": 0.00013112156968488119,
      "loss": 2.9144,
      "step": 159053
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.029034376144409,
      "learning_rate": 0.0001311181888269337,
      "loss": 2.8624,
      "step": 159054
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6347227096557617,
      "learning_rate": 0.0001311148080003846,
      "loss": 2.94,
      "step": 159055
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.1288304328918457,
      "learning_rate": 0.00013111142720523438,
      "loss": 2.7631,
      "step": 159056
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5672435760498047,
      "learning_rate": 0.00013110804644148378,
      "loss": 2.8954,
      "step": 159057
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.177147626876831,
      "learning_rate": 0.00013110466570913332,
      "loss": 2.8683,
      "step": 159058
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.961941719055176,
      "learning_rate": 0.0001311012850081839,
      "loss": 2.8748,
      "step": 159059
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.864474058151245,
      "learning_rate": 0.00013109790433863574,
      "loss": 2.8151,
      "step": 159060
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.185724258422852,
      "learning_rate": 0.00013109452370048977,
      "loss": 2.8898,
      "step": 159061
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.693685293197632,
      "learning_rate": 0.0001310911430937464,
      "loss": 3.0446,
      "step": 159062
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.644493341445923,
      "learning_rate": 0.00013108776251840646,
      "loss": 3.0479,
      "step": 159063
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.881892442703247,
      "learning_rate": 0.00013108438197447043,
      "loss": 2.7048,
      "step": 159064
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.720268726348877,
      "learning_rate": 0.00013108100146193917,
      "loss": 3.1136,
      "step": 159065
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.400256156921387,
      "learning_rate": 0.00013107762098081295,
      "loss": 3.251,
      "step": 159066
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.860957622528076,
      "learning_rate": 0.00013107424053109272,
      "loss": 2.927,
      "step": 159067
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.617100715637207,
      "learning_rate": 0.00013107086011277883,
      "loss": 3.0594,
      "step": 159068
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2327370643615723,
      "learning_rate": 0.00013106747972587218,
      "loss": 2.6708,
      "step": 159069
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3500587940216064,
      "learning_rate": 0.00013106409937037317,
      "loss": 3.3036,
      "step": 159070
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.9426753520965576,
      "learning_rate": 0.00013106071904628273,
      "loss": 3.0529,
      "step": 159071
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.5520076751708984,
      "learning_rate": 0.0001310573387536011,
      "loss": 3.0316,
      "step": 159072
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.084251642227173,
      "learning_rate": 0.00013105395849232921,
      "loss": 3.0708,
      "step": 159073
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2446694374084473,
      "learning_rate": 0.00013105057826246747,
      "loss": 2.9987,
      "step": 159074
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.3932061195373535,
      "learning_rate": 0.00013104719806401668,
      "loss": 2.9766,
      "step": 159075
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.489850997924805,
      "learning_rate": 0.00013104381789697734,
      "loss": 3.1768,
      "step": 159076
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.154327154159546,
      "learning_rate": 0.00013104043776135025,
      "loss": 2.8093,
      "step": 159077
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3612711429595947,
      "learning_rate": 0.00013103705765713593,
      "loss": 3.0419,
      "step": 159078
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.248904228210449,
      "learning_rate": 0.00013103367758433499,
      "loss": 3.0746,
      "step": 159079
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.2266156673431396,
      "learning_rate": 0.000131030297542948,
      "loss": 2.878,
      "step": 159080
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.3259053230285645,
      "learning_rate": 0.00013102691753297575,
      "loss": 2.7086,
      "step": 159081
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.953911304473877,
      "learning_rate": 0.00013102353755441867,
      "loss": 2.8458,
      "step": 159082
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6463704109191895,
      "learning_rate": 0.00013102015760727765,
      "loss": 2.9572,
      "step": 159083
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7434141635894775,
      "learning_rate": 0.00013101677769155313,
      "loss": 2.9793,
      "step": 159084
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.3313956260681152,
      "learning_rate": 0.0001310133978072458,
      "loss": 2.8929,
      "step": 159085
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.054502010345459,
      "learning_rate": 0.0001310100179543562,
      "loss": 2.8453,
      "step": 159086
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7849550247192383,
      "learning_rate": 0.0001310066381328851,
      "loss": 3.075,
      "step": 159087
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4856085777282715,
      "learning_rate": 0.00013100325834283292,
      "loss": 2.8711,
      "step": 159088
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4018447399139404,
      "learning_rate": 0.00013099987858420056,
      "loss": 2.9846,
      "step": 159089
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.908238172531128,
      "learning_rate": 0.00013099649885698852,
      "loss": 2.8231,
      "step": 159090
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4905073642730713,
      "learning_rate": 0.0001309931191611973,
      "loss": 2.5825,
      "step": 159091
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.28006649017334,
      "learning_rate": 0.00013098973949682776,
      "loss": 2.7437,
      "step": 159092
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5399763584136963,
      "learning_rate": 0.00013098635986388043,
      "loss": 3.1468,
      "step": 159093
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.346808671951294,
      "learning_rate": 0.0001309829802623558,
      "loss": 2.9217,
      "step": 159094
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.445221424102783,
      "learning_rate": 0.00013097960069225476,
      "loss": 2.9497,
      "step": 159095
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.926253318786621,
      "learning_rate": 0.00013097622115357778,
      "loss": 3.0141,
      "step": 159096
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.583197593688965,
      "learning_rate": 0.0001309728416463254,
      "loss": 3.0649,
      "step": 159097
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5144290924072266,
      "learning_rate": 0.00013096946217049847,
      "loss": 2.9871,
      "step": 159098
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.0456743240356445,
      "learning_rate": 0.0001309660827260974,
      "loss": 2.8738,
      "step": 159099
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.172370433807373,
      "learning_rate": 0.00013096270331312303,
      "loss": 2.9818,
      "step": 159100
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.475513219833374,
      "learning_rate": 0.00013095932393157592,
      "loss": 2.9885,
      "step": 159101
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.77630352973938,
      "learning_rate": 0.0001309559445814566,
      "loss": 3.1425,
      "step": 159102
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.0339362621307373,
      "learning_rate": 0.0001309525652627657,
      "loss": 2.8095,
      "step": 159103
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2648465633392334,
      "learning_rate": 0.000130949185975504,
      "loss": 3.0715,
      "step": 159104
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.4648871421813965,
      "learning_rate": 0.00013094580671967195,
      "loss": 3.0222,
      "step": 159105
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.046647071838379,
      "learning_rate": 0.00013094242749527035,
      "loss": 3.0647,
      "step": 159106
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6160831451416016,
      "learning_rate": 0.00013093904830229974,
      "loss": 2.683,
      "step": 159107
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.621211051940918,
      "learning_rate": 0.00013093566914076078,
      "loss": 2.9224,
      "step": 159108
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7013800144195557,
      "learning_rate": 0.00013093229001065397,
      "loss": 2.9325,
      "step": 159109
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.415426731109619,
      "learning_rate": 0.00013092891091198009,
      "loss": 2.9292,
      "step": 159110
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5889039039611816,
      "learning_rate": 0.00013092553184473963,
      "loss": 3.0372,
      "step": 159111
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.0115699768066406,
      "learning_rate": 0.00013092215280893345,
      "loss": 3.0569,
      "step": 159112
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.053738594055176,
      "learning_rate": 0.0001309187738045619,
      "loss": 2.7379,
      "step": 159113
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6481027603149414,
      "learning_rate": 0.00013091539483162595,
      "loss": 2.932,
      "step": 159114
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.856675148010254,
      "learning_rate": 0.0001309120158901258,
      "loss": 2.8316,
      "step": 159115
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4534072875976562,
      "learning_rate": 0.00013090863698006245,
      "loss": 2.8818,
      "step": 159116
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.055762767791748,
      "learning_rate": 0.00013090525810143623,
      "loss": 2.9791,
      "step": 159117
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.622194290161133,
      "learning_rate": 0.00013090187925424802,
      "loss": 3.1423,
      "step": 159118
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.0396652221679688,
      "learning_rate": 0.00013089850043849826,
      "loss": 2.8078,
      "step": 159119
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7646589279174805,
      "learning_rate": 0.00013089512165418785,
      "loss": 2.7487,
      "step": 159120
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.002370834350586,
      "learning_rate": 0.00013089174290131702,
      "loss": 3.0357,
      "step": 159121
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.721397876739502,
      "learning_rate": 0.00013088836417988672,
      "loss": 3.1376,
      "step": 159122
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.364805221557617,
      "learning_rate": 0.00013088498548989734,
      "loss": 2.9487,
      "step": 159123
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4711527824401855,
      "learning_rate": 0.00013088160683134977,
      "loss": 2.9387,
      "step": 159124
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2213470935821533,
      "learning_rate": 0.0001308782282042444,
      "loss": 2.7673,
      "step": 159125
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4415407180786133,
      "learning_rate": 0.00013087484960858214,
      "loss": 3.1369,
      "step": 159126
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.371595859527588,
      "learning_rate": 0.00013087147104436322,
      "loss": 2.6344,
      "step": 159127
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4084994792938232,
      "learning_rate": 0.00013086809251158863,
      "loss": 2.9019,
      "step": 159128
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5624470710754395,
      "learning_rate": 0.00013086471401025875,
      "loss": 2.4037,
      "step": 159129
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.357086658477783,
      "learning_rate": 0.0001308613355403744,
      "loss": 3.0563,
      "step": 159130
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7395126819610596,
      "learning_rate": 0.000130857957101936,
      "loss": 3.1246,
      "step": 159131
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.282134532928467,
      "learning_rate": 0.00013085457869494452,
      "loss": 3.0462,
      "step": 159132
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5374982357025146,
      "learning_rate": 0.00013085120031940014,
      "loss": 3.1184,
      "step": 159133
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.491431713104248,
      "learning_rate": 0.00013084782197530384,
      "loss": 3.0108,
      "step": 159134
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.9089901447296143,
      "learning_rate": 0.00013084444366265603,
      "loss": 3.0577,
      "step": 159135
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6478400230407715,
      "learning_rate": 0.00013084106538145752,
      "loss": 3.1227,
      "step": 159136
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2977521419525146,
      "learning_rate": 0.00013083768713170876,
      "loss": 2.949,
      "step": 159137
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.348129987716675,
      "learning_rate": 0.00013083430891341064,
      "loss": 2.7089,
      "step": 159138
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5799615383148193,
      "learning_rate": 0.00013083093072656347,
      "loss": 3.2037,
      "step": 159139
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.278850793838501,
      "learning_rate": 0.00013082755257116808,
      "loss": 2.9697,
      "step": 159140
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.002808094024658,
      "learning_rate": 0.00013082417444722493,
      "loss": 3.1741,
      "step": 159141
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.379514455795288,
      "learning_rate": 0.00013082079635473487,
      "loss": 3.1896,
      "step": 159142
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4501500129699707,
      "learning_rate": 0.00013081741829369832,
      "loss": 2.9613,
      "step": 159143
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.219563961029053,
      "learning_rate": 0.0001308140402641161,
      "loss": 3.0491,
      "step": 159144
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.3796820640563965,
      "learning_rate": 0.00013081066226598874,
      "loss": 2.9785,
      "step": 159145
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.46170711517334,
      "learning_rate": 0.0001308072842993169,
      "loss": 3.0999,
      "step": 159146
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.297819137573242,
      "learning_rate": 0.00013080390636410107,
      "loss": 3.0888,
      "step": 159147
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.7114288806915283,
      "learning_rate": 0.0001308005284603421,
      "loss": 2.9333,
      "step": 159148
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.123628616333008,
      "learning_rate": 0.00013079715058804034,
      "loss": 2.9845,
      "step": 159149
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.1013057231903076,
      "learning_rate": 0.00013079377274719675,
      "loss": 3.1179,
      "step": 159150
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.548126459121704,
      "learning_rate": 0.0001307903949378118,
      "loss": 3.0029,
      "step": 159151
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6167945861816406,
      "learning_rate": 0.00013078701715988605,
      "loss": 3.0674,
      "step": 159152
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4706592559814453,
      "learning_rate": 0.00013078363941342015,
      "loss": 3.0739,
      "step": 159153
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.327847957611084,
      "learning_rate": 0.0001307802616984148,
      "loss": 3.0183,
      "step": 159154
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.392069101333618,
      "learning_rate": 0.00013077688401487057,
      "loss": 2.9705,
      "step": 159155
    },
    {
      "epoch": 2.07,
      "grad_norm": 1.9738171100616455,
      "learning_rate": 0.00013077350636278817,
      "loss": 3.2967,
      "step": 159156
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.503403902053833,
      "learning_rate": 0.00013077012874216815,
      "loss": 3.0296,
      "step": 159157
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.5286214351654053,
      "learning_rate": 0.00013076675115301117,
      "loss": 2.6939,
      "step": 159158
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.066641807556152,
      "learning_rate": 0.0001307633735953178,
      "loss": 3.0606,
      "step": 159159
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.1987478733062744,
      "learning_rate": 0.00013075999606908874,
      "loss": 3.0598,
      "step": 159160
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2472012042999268,
      "learning_rate": 0.0001307566185743245,
      "loss": 3.1722,
      "step": 159161
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.1273324489593506,
      "learning_rate": 0.00013075324111102595,
      "loss": 3.043,
      "step": 159162
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.9581518173217773,
      "learning_rate": 0.0001307498636791935,
      "loss": 3.1487,
      "step": 159163
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.653745651245117,
      "learning_rate": 0.00013074648627882789,
      "loss": 2.825,
      "step": 159164
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.242284059524536,
      "learning_rate": 0.0001307431089099296,
      "loss": 2.8506,
      "step": 159165
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.692131280899048,
      "learning_rate": 0.00013073973157249946,
      "loss": 2.8798,
      "step": 159166
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.234983444213867,
      "learning_rate": 0.0001307363542665379,
      "loss": 2.9277,
      "step": 159167
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.5546164512634277,
      "learning_rate": 0.00013073297699204572,
      "loss": 2.8948,
      "step": 159168
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.304219961166382,
      "learning_rate": 0.00013072959974902348,
      "loss": 3.0011,
      "step": 159169
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.27384090423584,
      "learning_rate": 0.00013072622253747183,
      "loss": 2.8933,
      "step": 159170
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.127042531967163,
      "learning_rate": 0.00013072284535739124,
      "loss": 2.8736,
      "step": 159171
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.284029722213745,
      "learning_rate": 0.00013071946820878262,
      "loss": 2.8238,
      "step": 159172
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.287177562713623,
      "learning_rate": 0.00013071609109164628,
      "loss": 3.0793,
      "step": 159173
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4612700939178467,
      "learning_rate": 0.00013071271400598317,
      "loss": 2.6059,
      "step": 159174
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2466917037963867,
      "learning_rate": 0.00013070933695179374,
      "loss": 2.8764,
      "step": 159175
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.9863648414611816,
      "learning_rate": 0.00013070595992907853,
      "loss": 3.0707,
      "step": 159176
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7798972129821777,
      "learning_rate": 0.0001307025829378384,
      "loss": 3.1167,
      "step": 159177
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.476881742477417,
      "learning_rate": 0.00013069920597807383,
      "loss": 3.1313,
      "step": 159178
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.746537446975708,
      "learning_rate": 0.00013069582904978537,
      "loss": 3.0325,
      "step": 159179
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.042031288146973,
      "learning_rate": 0.0001306924521529739,
      "loss": 3.1205,
      "step": 159180
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.278205156326294,
      "learning_rate": 0.00013068907528763986,
      "loss": 2.983,
      "step": 159181
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.402669668197632,
      "learning_rate": 0.00013068569845378378,
      "loss": 3.0937,
      "step": 159182
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.086418867111206,
      "learning_rate": 0.00013068232165140662,
      "loss": 2.9285,
      "step": 159183
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.775466203689575,
      "learning_rate": 0.00013067894488050875,
      "loss": 2.9567,
      "step": 159184
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.799222707748413,
      "learning_rate": 0.00013067556814109075,
      "loss": 2.8931,
      "step": 159185
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.0439913272857666,
      "learning_rate": 0.0001306721914331535,
      "loss": 2.6986,
      "step": 159186
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2044119834899902,
      "learning_rate": 0.00013066881475669746,
      "loss": 2.8987,
      "step": 159187
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.8238580226898193,
      "learning_rate": 0.0001306654381117232,
      "loss": 3.2593,
      "step": 159188
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2970399856567383,
      "learning_rate": 0.0001306620614982315,
      "loss": 2.8791,
      "step": 159189
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.50773024559021,
      "learning_rate": 0.00013065868491622285,
      "loss": 2.8144,
      "step": 159190
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3026082515716553,
      "learning_rate": 0.00013065530836569802,
      "loss": 2.983,
      "step": 159191
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4832026958465576,
      "learning_rate": 0.0001306519318466576,
      "loss": 2.8619,
      "step": 159192
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.89689564704895,
      "learning_rate": 0.00013064855535910216,
      "loss": 2.9558,
      "step": 159193
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.8055524826049805,
      "learning_rate": 0.00013064517890303223,
      "loss": 2.9814,
      "step": 159194
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5610504150390625,
      "learning_rate": 0.00013064180247844868,
      "loss": 2.9474,
      "step": 159195
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3929197788238525,
      "learning_rate": 0.0001306384260853519,
      "loss": 2.9225,
      "step": 159196
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.845139265060425,
      "learning_rate": 0.00013063504972374277,
      "loss": 3.0367,
      "step": 159197
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3135488033294678,
      "learning_rate": 0.00013063167339362166,
      "loss": 2.9056,
      "step": 159198
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.544996738433838,
      "learning_rate": 0.00013062829709498952,
      "loss": 2.5948,
      "step": 159199
    },
    {
      "epoch": 2.07,
      "grad_norm": 1.962053894996643,
      "learning_rate": 0.0001306249208278466,
      "loss": 2.9714,
      "step": 159200
    },
    {
      "epoch": 2.07,
      "grad_norm": 1.8865476846694946,
      "learning_rate": 0.00013062154459219378,
      "loss": 3.0729,
      "step": 159201
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.63149356842041,
      "learning_rate": 0.0001306181683880315,
      "loss": 3.0188,
      "step": 159202
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2173819541931152,
      "learning_rate": 0.00013061479221536062,
      "loss": 3.3806,
      "step": 159203
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.495532512664795,
      "learning_rate": 0.00013061141607418155,
      "loss": 2.9294,
      "step": 159204
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.945634126663208,
      "learning_rate": 0.0001306080399644952,
      "loss": 3.1407,
      "step": 159205
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.447920083999634,
      "learning_rate": 0.00013060466388630182,
      "loss": 3.0121,
      "step": 159206
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3822994232177734,
      "learning_rate": 0.00013060128783960232,
      "loss": 3.1581,
      "step": 159207
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7375965118408203,
      "learning_rate": 0.00013059791182439717,
      "loss": 3.1421,
      "step": 159208
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.003936529159546,
      "learning_rate": 0.00013059453584068713,
      "loss": 2.9391,
      "step": 159209
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.019257545471191,
      "learning_rate": 0.00013059115988847266,
      "loss": 3.0981,
      "step": 159210
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.288320779800415,
      "learning_rate": 0.00013058778396775463,
      "loss": 2.9983,
      "step": 159211
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.8504958152770996,
      "learning_rate": 0.0001305844080785335,
      "loss": 3.0142,
      "step": 159212
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.1999616622924805,
      "learning_rate": 0.00013058103222080993,
      "loss": 2.864,
      "step": 159213
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.750439167022705,
      "learning_rate": 0.00013057765639458446,
      "loss": 2.9709,
      "step": 159214
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.233485698699951,
      "learning_rate": 0.00013057428059985788,
      "loss": 2.975,
      "step": 159215
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.365489959716797,
      "learning_rate": 0.00013057090483663066,
      "loss": 3.2424,
      "step": 159216
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4458813667297363,
      "learning_rate": 0.0001305675291049036,
      "loss": 3.0372,
      "step": 159217
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.613617181777954,
      "learning_rate": 0.00013056415340467723,
      "loss": 2.8072,
      "step": 159218
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7953710556030273,
      "learning_rate": 0.00013056077773595216,
      "loss": 3.2007,
      "step": 159219
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3767008781433105,
      "learning_rate": 0.000130557402098729,
      "loss": 2.9707,
      "step": 159220
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.768953561782837,
      "learning_rate": 0.00013055402649300845,
      "loss": 2.9727,
      "step": 159221
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.14357328414917,
      "learning_rate": 0.00013055065091879104,
      "loss": 3.1252,
      "step": 159222
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.4194841384887695,
      "learning_rate": 0.00013054727537607755,
      "loss": 2.8759,
      "step": 159223
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.184969902038574,
      "learning_rate": 0.00013054389986486855,
      "loss": 2.9847,
      "step": 159224
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.1573948860168457,
      "learning_rate": 0.0001305405243851646,
      "loss": 2.8909,
      "step": 159225
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2276670932769775,
      "learning_rate": 0.00013053714893696626,
      "loss": 2.995,
      "step": 159226
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.604893207550049,
      "learning_rate": 0.00013053377352027442,
      "loss": 3.1444,
      "step": 159227
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7866437435150146,
      "learning_rate": 0.0001305303981350894,
      "loss": 2.944,
      "step": 159228
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.620664596557617,
      "learning_rate": 0.0001305270227814121,
      "loss": 2.9815,
      "step": 159229
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.687345027923584,
      "learning_rate": 0.000130523647459243,
      "loss": 2.8653,
      "step": 159230
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4355413913726807,
      "learning_rate": 0.0001305202721685828,
      "loss": 2.9728,
      "step": 159231
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.553032398223877,
      "learning_rate": 0.00013051689690943193,
      "loss": 3.0489,
      "step": 159232
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.1150054931640625,
      "learning_rate": 0.00013051352168179131,
      "loss": 3.0364,
      "step": 159233
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.043694257736206,
      "learning_rate": 0.0001305101464856613,
      "loss": 3.0731,
      "step": 159234
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.037473440170288,
      "learning_rate": 0.00013050677132104275,
      "loss": 2.8044,
      "step": 159235
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.427266836166382,
      "learning_rate": 0.0001305033961879362,
      "loss": 3.0864,
      "step": 159236
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4020004272460938,
      "learning_rate": 0.00013050002108634228,
      "loss": 2.6017,
      "step": 159237
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2566914558410645,
      "learning_rate": 0.0001304966460162615,
      "loss": 2.9594,
      "step": 159238
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.0184009075164795,
      "learning_rate": 0.00013049327097769468,
      "loss": 3.1622,
      "step": 159239
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.128736972808838,
      "learning_rate": 0.00013048989597064223,
      "loss": 3.1088,
      "step": 159240
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7839407920837402,
      "learning_rate": 0.00013048652099510507,
      "loss": 2.8727,
      "step": 159241
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7591471672058105,
      "learning_rate": 0.00013048314605108367,
      "loss": 3.0098,
      "step": 159242
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2591896057128906,
      "learning_rate": 0.00013047977113857863,
      "loss": 3.1595,
      "step": 159243
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.8695013523101807,
      "learning_rate": 0.00013047639625759046,
      "loss": 2.7938,
      "step": 159244
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.649120330810547,
      "learning_rate": 0.00013047302140812008,
      "loss": 2.9593,
      "step": 159245
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4233651161193848,
      "learning_rate": 0.00013046964659016786,
      "loss": 3.0142,
      "step": 159246
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.528991460800171,
      "learning_rate": 0.0001304662718037346,
      "loss": 2.9686,
      "step": 159247
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.2199530601501465,
      "learning_rate": 0.00013046289704882088,
      "loss": 3.027,
      "step": 159248
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.548290967941284,
      "learning_rate": 0.0001304595223254273,
      "loss": 3.0318,
      "step": 159249
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.162926435470581,
      "learning_rate": 0.0001304561476335544,
      "loss": 3.2526,
      "step": 159250
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.319490909576416,
      "learning_rate": 0.00013045277297320303,
      "loss": 2.8814,
      "step": 159251
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.401102066040039,
      "learning_rate": 0.00013044939834437356,
      "loss": 2.9305,
      "step": 159252
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6510627269744873,
      "learning_rate": 0.00013044602374706686,
      "loss": 2.8075,
      "step": 159253
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.413184881210327,
      "learning_rate": 0.00013044264918128342,
      "loss": 2.8601,
      "step": 159254
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.327310562133789,
      "learning_rate": 0.00013043927464702392,
      "loss": 3.082,
      "step": 159255
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5854406356811523,
      "learning_rate": 0.00013043590014428884,
      "loss": 2.8634,
      "step": 159256
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2852869033813477,
      "learning_rate": 0.00013043252567307905,
      "loss": 2.9267,
      "step": 159257
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.396775722503662,
      "learning_rate": 0.00013042915123339494,
      "loss": 2.8937,
      "step": 159258
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.255150556564331,
      "learning_rate": 0.00013042577682523734,
      "loss": 2.9727,
      "step": 159259
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.21199631690979,
      "learning_rate": 0.0001304224024486068,
      "loss": 3.4253,
      "step": 159260
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.9288365840911865,
      "learning_rate": 0.00013041902810350384,
      "loss": 2.9874,
      "step": 159261
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4328627586364746,
      "learning_rate": 0.00013041565378992928,
      "loss": 2.9129,
      "step": 159262
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7470219135284424,
      "learning_rate": 0.00013041227950788366,
      "loss": 2.6027,
      "step": 159263
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.3287792205810547,
      "learning_rate": 0.0001304089052573675,
      "loss": 2.9904,
      "step": 159264
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.697631597518921,
      "learning_rate": 0.0001304055310383816,
      "loss": 3.0168,
      "step": 159265
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.37919545173645,
      "learning_rate": 0.00013040215685092654,
      "loss": 2.9794,
      "step": 159266
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.282601833343506,
      "learning_rate": 0.00013039878269500283,
      "loss": 2.8461,
      "step": 159267
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.39680814743042,
      "learning_rate": 0.00013039540857061126,
      "loss": 2.9338,
      "step": 159268
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.847590446472168,
      "learning_rate": 0.0001303920344777524,
      "loss": 2.6583,
      "step": 159269
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.0725038051605225,
      "learning_rate": 0.00013038866041642676,
      "loss": 3.0143,
      "step": 159270
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.651296377182007,
      "learning_rate": 0.00013038528638663517,
      "loss": 3.0031,
      "step": 159271
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2964229583740234,
      "learning_rate": 0.00013038191238837814,
      "loss": 3.0127,
      "step": 159272
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2528724670410156,
      "learning_rate": 0.00013037853842165622,
      "loss": 3.0835,
      "step": 159273
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.1322498321533203,
      "learning_rate": 0.00013037516448647022,
      "loss": 2.9573,
      "step": 159274
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.2994678020477295,
      "learning_rate": 0.0001303717905828206,
      "loss": 3.0667,
      "step": 159275
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.4521939754486084,
      "learning_rate": 0.00013036841671070819,
      "loss": 2.9657,
      "step": 159276
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.804856777191162,
      "learning_rate": 0.00013036504287013346,
      "loss": 2.9597,
      "step": 159277
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.2905595302581787,
      "learning_rate": 0.00013036166906109695,
      "loss": 3.0905,
      "step": 159278
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6358189582824707,
      "learning_rate": 0.00013035829528359953,
      "loss": 2.9947,
      "step": 159279
    },
    {
      "epoch": 2.07,
      "grad_norm": 5.6015543937683105,
      "learning_rate": 0.00013035492153764175,
      "loss": 2.9956,
      "step": 159280
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.1355416774749756,
      "learning_rate": 0.00013035154782322405,
      "loss": 3.1112,
      "step": 159281
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5790858268737793,
      "learning_rate": 0.0001303481741403473,
      "loss": 2.7364,
      "step": 159282
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.18332576751709,
      "learning_rate": 0.00013034480048901203,
      "loss": 3.1379,
      "step": 159283
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.497737169265747,
      "learning_rate": 0.00013034142686921876,
      "loss": 2.8153,
      "step": 159284
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.8638980388641357,
      "learning_rate": 0.0001303380532809683,
      "loss": 3.1968,
      "step": 159285
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.399291515350342,
      "learning_rate": 0.00013033467972426127,
      "loss": 3.0214,
      "step": 159286
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.710078239440918,
      "learning_rate": 0.00013033130619909803,
      "loss": 3.0584,
      "step": 159287
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2879559993743896,
      "learning_rate": 0.0001303279327054796,
      "loss": 3.1578,
      "step": 159288
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.0957508087158203,
      "learning_rate": 0.00013032455924340624,
      "loss": 3.0792,
      "step": 159289
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7987663745880127,
      "learning_rate": 0.0001303211858128789,
      "loss": 3.1069,
      "step": 159290
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3771870136260986,
      "learning_rate": 0.000130317812413898,
      "loss": 2.9986,
      "step": 159291
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5340795516967773,
      "learning_rate": 0.00013031443904646422,
      "loss": 2.8299,
      "step": 159292
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.546274423599243,
      "learning_rate": 0.00013031106571057815,
      "loss": 3.0481,
      "step": 159293
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4720981121063232,
      "learning_rate": 0.00013030769240624052,
      "loss": 2.8058,
      "step": 159294
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.0540544986724854,
      "learning_rate": 0.0001303043191334518,
      "loss": 2.981,
      "step": 159295
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3071300983428955,
      "learning_rate": 0.00013030094589221278,
      "loss": 3.0525,
      "step": 159296
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.6868529319763184,
      "learning_rate": 0.00013029757268252405,
      "loss": 2.6567,
      "step": 159297
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3074469566345215,
      "learning_rate": 0.0001302941995043862,
      "loss": 3.0472,
      "step": 159298
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4011032581329346,
      "learning_rate": 0.00013029082635779976,
      "loss": 3.0656,
      "step": 159299
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.834435224533081,
      "learning_rate": 0.00013028745324276557,
      "loss": 2.7883,
      "step": 159300
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3842079639434814,
      "learning_rate": 0.000130284080159284,
      "loss": 3.0557,
      "step": 159301
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.3876092433929443,
      "learning_rate": 0.00013028070710735596,
      "loss": 2.7467,
      "step": 159302
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.8213555812835693,
      "learning_rate": 0.00013027733408698194,
      "loss": 3.0062,
      "step": 159303
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.103076696395874,
      "learning_rate": 0.00013027396109816257,
      "loss": 3.0695,
      "step": 159304
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.7947306632995605,
      "learning_rate": 0.00013027058814089835,
      "loss": 3.0212,
      "step": 159305
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.436903953552246,
      "learning_rate": 0.00013026721521519014,
      "loss": 2.7807,
      "step": 159306
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3349931240081787,
      "learning_rate": 0.00013026384232103837,
      "loss": 3.2165,
      "step": 159307
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.17417049407959,
      "learning_rate": 0.0001302604694584439,
      "loss": 2.8154,
      "step": 159308
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.3712868690490723,
      "learning_rate": 0.00013025709662740716,
      "loss": 3.0057,
      "step": 159309
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.291675329208374,
      "learning_rate": 0.00013025372382792882,
      "loss": 2.9431,
      "step": 159310
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.978457450866699,
      "learning_rate": 0.00013025035106000943,
      "loss": 3.2365,
      "step": 159311
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2500503063201904,
      "learning_rate": 0.0001302469783236498,
      "loss": 3.009,
      "step": 159312
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7830488681793213,
      "learning_rate": 0.00013024360561885036,
      "loss": 3.1231,
      "step": 159313
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3603427410125732,
      "learning_rate": 0.00013024023294561196,
      "loss": 3.1219,
      "step": 159314
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.3016815185546875,
      "learning_rate": 0.0001302368603039351,
      "loss": 2.653,
      "step": 159315
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.629803419113159,
      "learning_rate": 0.0001302334876938204,
      "loss": 2.9252,
      "step": 159316
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4486465454101562,
      "learning_rate": 0.0001302301151152684,
      "loss": 2.5662,
      "step": 159317
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.812309980392456,
      "learning_rate": 0.00013022674256827995,
      "loss": 2.6285,
      "step": 159318
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.121185779571533,
      "learning_rate": 0.00013022337005285541,
      "loss": 2.9783,
      "step": 159319
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.0324954986572266,
      "learning_rate": 0.00013021999756899566,
      "loss": 2.8762,
      "step": 159320
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.44244384765625,
      "learning_rate": 0.00013021662511670124,
      "loss": 3.0238,
      "step": 159321
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5544028282165527,
      "learning_rate": 0.00013021325269597277,
      "loss": 3.0216,
      "step": 159322
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.671933650970459,
      "learning_rate": 0.0001302098803068107,
      "loss": 3.0457,
      "step": 159323
    },
    {
      "epoch": 2.07,
      "grad_norm": 4.196252822875977,
      "learning_rate": 0.00013020650794921595,
      "loss": 3.1313,
      "step": 159324
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.0253615379333496,
      "learning_rate": 0.00013020313562318894,
      "loss": 2.9857,
      "step": 159325
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5406298637390137,
      "learning_rate": 0.00013019976332873044,
      "loss": 2.7164,
      "step": 159326
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7608699798583984,
      "learning_rate": 0.00013019639106584104,
      "loss": 2.7366,
      "step": 159327
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.362220525741577,
      "learning_rate": 0.00013019301883452134,
      "loss": 2.9635,
      "step": 159328
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2414464950561523,
      "learning_rate": 0.00013018964663477183,
      "loss": 2.9476,
      "step": 159329
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.898446798324585,
      "learning_rate": 0.00013018627446659338,
      "loss": 3.2141,
      "step": 159330
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.740596055984497,
      "learning_rate": 0.00013018290232998637,
      "loss": 2.9883,
      "step": 159331
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.3950843811035156,
      "learning_rate": 0.00013017953022495171,
      "loss": 2.7813,
      "step": 159332
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.46777081489563,
      "learning_rate": 0.0001301761581514899,
      "loss": 2.9394,
      "step": 159333
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.920424461364746,
      "learning_rate": 0.0001301727861096015,
      "loss": 3.053,
      "step": 159334
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.474794387817383,
      "learning_rate": 0.00013016941409928713,
      "loss": 3.0333,
      "step": 159335
    },
    {
      "epoch": 2.07,
      "grad_norm": 6.2532172203063965,
      "learning_rate": 0.00013016604212054753,
      "loss": 2.8294,
      "step": 159336
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6382064819335938,
      "learning_rate": 0.00013016267017338317,
      "loss": 2.9234,
      "step": 159337
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4173424243927,
      "learning_rate": 0.0001301592982577949,
      "loss": 2.7715,
      "step": 159338
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5704827308654785,
      "learning_rate": 0.00013015592637378323,
      "loss": 3.0508,
      "step": 159339
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.736527919769287,
      "learning_rate": 0.00013015255452134876,
      "loss": 3.1591,
      "step": 159340
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.107248544692993,
      "learning_rate": 0.00013014918270049203,
      "loss": 2.9397,
      "step": 159341
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.0887274742126465,
      "learning_rate": 0.00013014581091121388,
      "loss": 2.9285,
      "step": 159342
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.790029525756836,
      "learning_rate": 0.00013014243915351474,
      "loss": 2.9284,
      "step": 159343
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.7751407623291016,
      "learning_rate": 0.0001301390674273954,
      "loss": 3.0274,
      "step": 159344
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.8104560375213623,
      "learning_rate": 0.00013013569573285634,
      "loss": 3.0404,
      "step": 159345
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.723660469055176,
      "learning_rate": 0.00013013232406989847,
      "loss": 2.9144,
      "step": 159346
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3713769912719727,
      "learning_rate": 0.00013012895243852198,
      "loss": 2.9857,
      "step": 159347
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.282397747039795,
      "learning_rate": 0.00013012558083872781,
      "loss": 2.4635,
      "step": 159348
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.722137212753296,
      "learning_rate": 0.00013012220927051646,
      "loss": 3.0026,
      "step": 159349
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.296891927719116,
      "learning_rate": 0.00013011883773388866,
      "loss": 2.9463,
      "step": 159350
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3973228931427,
      "learning_rate": 0.00013011546622884488,
      "loss": 2.8159,
      "step": 159351
    },
    {
      "epoch": 2.07,
      "grad_norm": 3.080091953277588,
      "learning_rate": 0.00013011209475538593,
      "loss": 2.9126,
      "step": 159352
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.3890719413757324,
      "learning_rate": 0.00013010872331351237,
      "loss": 3.2363,
      "step": 159353
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.538620710372925,
      "learning_rate": 0.0001301053519032248,
      "loss": 3.1405,
      "step": 159354
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.1905314922332764,
      "learning_rate": 0.00013010198052452372,
      "loss": 3.114,
      "step": 159355
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.2910990715026855,
      "learning_rate": 0.00013009860917741005,
      "loss": 3.0193,
      "step": 159356
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.4315924644470215,
      "learning_rate": 0.0001300952378618841,
      "loss": 3.1478,
      "step": 159357
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.0752360820770264,
      "learning_rate": 0.00013009186657794678,
      "loss": 3.0376,
      "step": 159358
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.5827484130859375,
      "learning_rate": 0.00013008849532559856,
      "loss": 2.8582,
      "step": 159359
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.137859344482422,
      "learning_rate": 0.00013008512410484,
      "loss": 3.092,
      "step": 159360
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.492964029312134,
      "learning_rate": 0.00013008175291567196,
      "loss": 2.7109,
      "step": 159361
    },
    {
      "epoch": 2.07,
      "grad_norm": 2.6036911010742188,
      "learning_rate": 0.00013007838175809494,
      "loss": 3.096,
      "step": 159362
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4487035274505615,
      "learning_rate": 0.0001300750106321094,
      "loss": 3.0238,
      "step": 159363
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4203133583068848,
      "learning_rate": 0.00013007163953771626,
      "loss": 2.8074,
      "step": 159364
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.459803342819214,
      "learning_rate": 0.000130068268474916,
      "loss": 2.9062,
      "step": 159365
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2730538845062256,
      "learning_rate": 0.00013006489744370916,
      "loss": 2.8381,
      "step": 159366
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2318708896636963,
      "learning_rate": 0.00013006152644409657,
      "loss": 3.0241,
      "step": 159367
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2335948944091797,
      "learning_rate": 0.00013005815547607874,
      "loss": 2.9506,
      "step": 159368
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.592891216278076,
      "learning_rate": 0.00013005478453965622,
      "loss": 2.9642,
      "step": 159369
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8754096031188965,
      "learning_rate": 0.00013005141363482982,
      "loss": 2.9948,
      "step": 159370
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.659313440322876,
      "learning_rate": 0.00013004804276160005,
      "loss": 2.6661,
      "step": 159371
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3772566318511963,
      "learning_rate": 0.00013004467191996745,
      "loss": 3.1301,
      "step": 159372
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.480642557144165,
      "learning_rate": 0.00013004130110993287,
      "loss": 2.9786,
      "step": 159373
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3894991874694824,
      "learning_rate": 0.00013003793033149674,
      "loss": 2.5414,
      "step": 159374
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4756271839141846,
      "learning_rate": 0.00013003455958465981,
      "loss": 3.0159,
      "step": 159375
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5781772136688232,
      "learning_rate": 0.00013003118886942272,
      "loss": 3.061,
      "step": 159376
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4702749252319336,
      "learning_rate": 0.00013002781818578606,
      "loss": 2.9131,
      "step": 159377
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.54636812210083,
      "learning_rate": 0.00013002444753375028,
      "loss": 2.919,
      "step": 159378
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7470953464508057,
      "learning_rate": 0.00013002107691331629,
      "loss": 2.7194,
      "step": 159379
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7281546592712402,
      "learning_rate": 0.00013001770632448448,
      "loss": 3.0396,
      "step": 159380
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8436741828918457,
      "learning_rate": 0.00013001433576725568,
      "loss": 3.0263,
      "step": 159381
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4273698329925537,
      "learning_rate": 0.00013001096524163045,
      "loss": 2.8894,
      "step": 159382
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4738898277282715,
      "learning_rate": 0.00013000759474760936,
      "loss": 3.0811,
      "step": 159383
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.145179271697998,
      "learning_rate": 0.000130004224285193,
      "loss": 2.8247,
      "step": 159384
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.087811231613159,
      "learning_rate": 0.00013000085385438215,
      "loss": 3.0126,
      "step": 159385
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.375930070877075,
      "learning_rate": 0.00012999748345517723,
      "loss": 2.888,
      "step": 159386
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3457190990448,
      "learning_rate": 0.00012999411308757913,
      "loss": 3.0425,
      "step": 159387
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5632083415985107,
      "learning_rate": 0.00012999074275158828,
      "loss": 3.0384,
      "step": 159388
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.357741355895996,
      "learning_rate": 0.00012998737244720537,
      "loss": 3.091,
      "step": 159389
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6495707035064697,
      "learning_rate": 0.00012998400217443094,
      "loss": 2.8535,
      "step": 159390
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4418489933013916,
      "learning_rate": 0.0001299806319332658,
      "loss": 3.0426,
      "step": 159391
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.69551157951355,
      "learning_rate": 0.00012997726172371034,
      "loss": 2.7912,
      "step": 159392
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.2940611839294434,
      "learning_rate": 0.00012997389154576546,
      "loss": 2.873,
      "step": 159393
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.652657985687256,
      "learning_rate": 0.00012997052139943164,
      "loss": 3.0509,
      "step": 159394
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.262301206588745,
      "learning_rate": 0.00012996715128470946,
      "loss": 3.0584,
      "step": 159395
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.068131923675537,
      "learning_rate": 0.00012996378120159953,
      "loss": 2.9052,
      "step": 159396
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.255044937133789,
      "learning_rate": 0.00012996041115010263,
      "loss": 2.8793,
      "step": 159397
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.011256694793701,
      "learning_rate": 0.0001299570411302192,
      "loss": 2.9017,
      "step": 159398
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.061418056488037,
      "learning_rate": 0.00012995367114195007,
      "loss": 2.9624,
      "step": 159399
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3937606811523438,
      "learning_rate": 0.00012995030118529577,
      "loss": 2.8168,
      "step": 159400
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5862059593200684,
      "learning_rate": 0.0001299469312602569,
      "loss": 2.7774,
      "step": 159401
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6941802501678467,
      "learning_rate": 0.00012994356136683403,
      "loss": 2.8349,
      "step": 159402
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.6933493614196777,
      "learning_rate": 0.00012994019150502795,
      "loss": 2.7935,
      "step": 159403
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.217073440551758,
      "learning_rate": 0.00012993682167483907,
      "loss": 3.0539,
      "step": 159404
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3811159133911133,
      "learning_rate": 0.0001299334518762683,
      "loss": 2.7226,
      "step": 159405
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.75008225440979,
      "learning_rate": 0.0001299300821093161,
      "loss": 3.151,
      "step": 159406
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.79195237159729,
      "learning_rate": 0.0001299267123739831,
      "loss": 3.0625,
      "step": 159407
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.405343532562256,
      "learning_rate": 0.0001299233426702698,
      "loss": 2.8578,
      "step": 159408
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.9384374618530273,
      "learning_rate": 0.00012991997299817708,
      "loss": 3.1504,
      "step": 159409
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2295525074005127,
      "learning_rate": 0.00012991660335770534,
      "loss": 3.0126,
      "step": 159410
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.1266536712646484,
      "learning_rate": 0.00012991323374885545,
      "loss": 3.2867,
      "step": 159411
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.2602078914642334,
      "learning_rate": 0.00012990986417162778,
      "loss": 2.9442,
      "step": 159412
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.4107584953308105,
      "learning_rate": 0.00012990649462602327,
      "loss": 2.8108,
      "step": 159413
    },
    {
      "epoch": 2.08,
      "grad_norm": 1.9986963272094727,
      "learning_rate": 0.00012990312511204215,
      "loss": 3.1066,
      "step": 159414
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.1072592735290527,
      "learning_rate": 0.0001298997556296854,
      "loss": 2.8073,
      "step": 159415
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.1528663635253906,
      "learning_rate": 0.00012989638617895332,
      "loss": 3.0469,
      "step": 159416
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.4658138751983643,
      "learning_rate": 0.00012989301675984686,
      "loss": 2.8768,
      "step": 159417
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.7731823921203613,
      "learning_rate": 0.00012988964737236638,
      "loss": 2.9898,
      "step": 159418
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2883734703063965,
      "learning_rate": 0.00012988627801651287,
      "loss": 2.644,
      "step": 159419
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.780388593673706,
      "learning_rate": 0.00012988290869228643,
      "loss": 2.966,
      "step": 159420
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.149308443069458,
      "learning_rate": 0.00012987953939968814,
      "loss": 2.8407,
      "step": 159421
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.2056703567504883,
      "learning_rate": 0.00012987617013871831,
      "loss": 2.7362,
      "step": 159422
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.67102313041687,
      "learning_rate": 0.00012987280090937783,
      "loss": 3.1671,
      "step": 159423
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2618799209594727,
      "learning_rate": 0.0001298694317116671,
      "loss": 2.9254,
      "step": 159424
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.1639645099639893,
      "learning_rate": 0.00012986606254558708,
      "loss": 2.7045,
      "step": 159425
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.829974174499512,
      "learning_rate": 0.00012986269341113795,
      "loss": 2.7564,
      "step": 159426
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.189159393310547,
      "learning_rate": 0.00012985932430832067,
      "loss": 2.896,
      "step": 159427
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.551137924194336,
      "learning_rate": 0.00012985595523713563,
      "loss": 2.8832,
      "step": 159428
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8625078201293945,
      "learning_rate": 0.0001298525861975837,
      "loss": 2.745,
      "step": 159429
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.6216788291931152,
      "learning_rate": 0.00012984921718966528,
      "loss": 2.8501,
      "step": 159430
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.926225185394287,
      "learning_rate": 0.0001298458482133813,
      "loss": 2.8502,
      "step": 159431
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.481360673904419,
      "learning_rate": 0.00012984247926873198,
      "loss": 2.8122,
      "step": 159432
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.1876959800720215,
      "learning_rate": 0.00012983911035571827,
      "loss": 2.9047,
      "step": 159433
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.045919179916382,
      "learning_rate": 0.00012983574147434058,
      "loss": 2.8561,
      "step": 159434
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.521939516067505,
      "learning_rate": 0.00012983237262459974,
      "loss": 3.0423,
      "step": 159435
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.114696979522705,
      "learning_rate": 0.00012982900380649617,
      "loss": 2.942,
      "step": 159436
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.33443284034729,
      "learning_rate": 0.0001298256350200307,
      "loss": 2.9886,
      "step": 159437
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3921754360198975,
      "learning_rate": 0.00012982226626520385,
      "loss": 3.0607,
      "step": 159438
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3184754848480225,
      "learning_rate": 0.00012981889754201626,
      "loss": 3.0031,
      "step": 159439
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.1198184490203857,
      "learning_rate": 0.00012981552885046844,
      "loss": 2.6427,
      "step": 159440
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.852849245071411,
      "learning_rate": 0.00012981216019056124,
      "loss": 3.1109,
      "step": 159441
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.945099353790283,
      "learning_rate": 0.00012980879156229507,
      "loss": 3.2333,
      "step": 159442
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.196228265762329,
      "learning_rate": 0.00012980542296567074,
      "loss": 2.9848,
      "step": 159443
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.2162344455718994,
      "learning_rate": 0.0001298020544006888,
      "loss": 2.9212,
      "step": 159444
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8231897354125977,
      "learning_rate": 0.0001297986858673498,
      "loss": 2.8875,
      "step": 159445
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.516510486602783,
      "learning_rate": 0.0001297953173656545,
      "loss": 2.7726,
      "step": 159446
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.1016950607299805,
      "learning_rate": 0.00012979194889560348,
      "loss": 2.7247,
      "step": 159447
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4991841316223145,
      "learning_rate": 0.00012978858045719724,
      "loss": 3.0402,
      "step": 159448
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4906492233276367,
      "learning_rate": 0.00012978521205043666,
      "loss": 2.945,
      "step": 159449
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6229443550109863,
      "learning_rate": 0.0001297818436753222,
      "loss": 2.6975,
      "step": 159450
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.238311290740967,
      "learning_rate": 0.00012977847533185438,
      "loss": 2.8225,
      "step": 159451
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.091384172439575,
      "learning_rate": 0.00012977510702003405,
      "loss": 2.9941,
      "step": 159452
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5414273738861084,
      "learning_rate": 0.00012977173873986178,
      "loss": 3.1191,
      "step": 159453
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6457457542419434,
      "learning_rate": 0.00012976837049133802,
      "loss": 3.1247,
      "step": 159454
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.817326068878174,
      "learning_rate": 0.00012976500227446366,
      "loss": 2.9308,
      "step": 159455
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2134389877319336,
      "learning_rate": 0.0001297616340892392,
      "loss": 2.8994,
      "step": 159456
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.557339906692505,
      "learning_rate": 0.00012975826593566517,
      "loss": 2.7902,
      "step": 159457
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4419362545013428,
      "learning_rate": 0.00012975489781374237,
      "loss": 3.1936,
      "step": 159458
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.40816593170166,
      "learning_rate": 0.00012975152972347125,
      "loss": 2.9155,
      "step": 159459
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.123612403869629,
      "learning_rate": 0.00012974816166485265,
      "loss": 2.7827,
      "step": 159460
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4704339504241943,
      "learning_rate": 0.00012974479363788707,
      "loss": 3.2206,
      "step": 159461
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7340571880340576,
      "learning_rate": 0.00012974142564257514,
      "loss": 2.9721,
      "step": 159462
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.4390110969543457,
      "learning_rate": 0.00012973805767891737,
      "loss": 2.9149,
      "step": 159463
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5510058403015137,
      "learning_rate": 0.00012973468974691467,
      "loss": 2.9936,
      "step": 159464
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7690844535827637,
      "learning_rate": 0.00012973132184656736,
      "loss": 2.8932,
      "step": 159465
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.044930934906006,
      "learning_rate": 0.00012972795397787632,
      "loss": 2.854,
      "step": 159466
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6873676776885986,
      "learning_rate": 0.0001297245861408421,
      "loss": 2.7817,
      "step": 159467
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2246274948120117,
      "learning_rate": 0.00012972121833546528,
      "loss": 3.1794,
      "step": 159468
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.716886043548584,
      "learning_rate": 0.00012971785056174637,
      "loss": 2.681,
      "step": 159469
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5642364025115967,
      "learning_rate": 0.00012971448281968626,
      "loss": 3.0492,
      "step": 159470
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7182834148406982,
      "learning_rate": 0.0001297111151092853,
      "loss": 2.8639,
      "step": 159471
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6755433082580566,
      "learning_rate": 0.0001297077474305444,
      "loss": 2.7904,
      "step": 159472
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.369786500930786,
      "learning_rate": 0.00012970437978346403,
      "loss": 2.9251,
      "step": 159473
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.155316114425659,
      "learning_rate": 0.00012970101216804483,
      "loss": 2.8621,
      "step": 159474
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3146731853485107,
      "learning_rate": 0.00012969764458428733,
      "loss": 2.9774,
      "step": 159475
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.190014123916626,
      "learning_rate": 0.00012969427703219235,
      "loss": 2.7974,
      "step": 159476
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.071608304977417,
      "learning_rate": 0.0001296909095117603,
      "loss": 2.9443,
      "step": 159477
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.728196620941162,
      "learning_rate": 0.00012968754202299202,
      "loss": 2.8195,
      "step": 159478
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.347977638244629,
      "learning_rate": 0.00012968417456588798,
      "loss": 3.116,
      "step": 159479
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.187908411026001,
      "learning_rate": 0.00012968080714044903,
      "loss": 2.9127,
      "step": 159480
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6790497303009033,
      "learning_rate": 0.0001296774397466755,
      "loss": 2.7226,
      "step": 159481
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.4421887397766113,
      "learning_rate": 0.00012967407238456815,
      "loss": 3.0051,
      "step": 159482
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.569394111633301,
      "learning_rate": 0.00012967070505412757,
      "loss": 2.9836,
      "step": 159483
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6649250984191895,
      "learning_rate": 0.0001296673377553545,
      "loss": 2.7296,
      "step": 159484
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.395425796508789,
      "learning_rate": 0.00012966397048824938,
      "loss": 2.9818,
      "step": 159485
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.753424644470215,
      "learning_rate": 0.0001296606032528132,
      "loss": 3.1069,
      "step": 159486
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.267082452774048,
      "learning_rate": 0.00012965723604904604,
      "loss": 2.8992,
      "step": 159487
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.920300006866455,
      "learning_rate": 0.00012965386887694901,
      "loss": 2.6217,
      "step": 159488
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0756986141204834,
      "learning_rate": 0.00012965050173652237,
      "loss": 2.8513,
      "step": 159489
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6715855598449707,
      "learning_rate": 0.00012964713462776704,
      "loss": 2.8012,
      "step": 159490
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.694845676422119,
      "learning_rate": 0.00012964376755068342,
      "loss": 2.9925,
      "step": 159491
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.1054255962371826,
      "learning_rate": 0.00012964040050527246,
      "loss": 3.0843,
      "step": 159492
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.617957353591919,
      "learning_rate": 0.00012963703349153433,
      "loss": 2.9725,
      "step": 159493
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.087627649307251,
      "learning_rate": 0.00012963366650947005,
      "loss": 2.9765,
      "step": 159494
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.361898183822632,
      "learning_rate": 0.0001296302995590799,
      "loss": 3.0968,
      "step": 159495
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.172527551651001,
      "learning_rate": 0.00012962693264036485,
      "loss": 2.9056,
      "step": 159496
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.455440044403076,
      "learning_rate": 0.00012962356575332525,
      "loss": 2.9459,
      "step": 159497
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.780085325241089,
      "learning_rate": 0.00012962019889796206,
      "loss": 2.7118,
      "step": 159498
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.5929417610168457,
      "learning_rate": 0.0001296168320742755,
      "loss": 2.8872,
      "step": 159499
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.2880961894989014,
      "learning_rate": 0.00012961346528226646,
      "loss": 3.087,
      "step": 159500
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2118518352508545,
      "learning_rate": 0.0001296100985219354,
      "loss": 2.8313,
      "step": 159501
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.5851385593414307,
      "learning_rate": 0.00012960673179328314,
      "loss": 2.8149,
      "step": 159502
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.899900436401367,
      "learning_rate": 0.00012960336509631012,
      "loss": 3.1891,
      "step": 159503
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.519963264465332,
      "learning_rate": 0.00012959999843101727,
      "loss": 2.8986,
      "step": 159504
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.598369836807251,
      "learning_rate": 0.00012959663179740473,
      "loss": 3.153,
      "step": 159505
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.519564390182495,
      "learning_rate": 0.00012959326519547355,
      "loss": 2.9527,
      "step": 159506
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.425443172454834,
      "learning_rate": 0.00012958989862522406,
      "loss": 2.9216,
      "step": 159507
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.674075126647949,
      "learning_rate": 0.00012958653208665715,
      "loss": 3.1355,
      "step": 159508
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.1222727298736572,
      "learning_rate": 0.0001295831655797732,
      "loss": 2.9323,
      "step": 159509
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2706499099731445,
      "learning_rate": 0.0001295797991045732,
      "loss": 2.8659,
      "step": 159510
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.1581220626831055,
      "learning_rate": 0.0001295764326610573,
      "loss": 3.0359,
      "step": 159511
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.410425186157227,
      "learning_rate": 0.00012957306624922647,
      "loss": 2.7863,
      "step": 159512
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.525789737701416,
      "learning_rate": 0.0001295696998690811,
      "loss": 3.0567,
      "step": 159513
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.25068998336792,
      "learning_rate": 0.00012956633352062208,
      "loss": 3.01,
      "step": 159514
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.407662868499756,
      "learning_rate": 0.00012956296720384973,
      "loss": 3.0238,
      "step": 159515
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5469603538513184,
      "learning_rate": 0.00012955960091876512,
      "loss": 3.1382,
      "step": 159516
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.1857597827911377,
      "learning_rate": 0.00012955623466536835,
      "loss": 2.9704,
      "step": 159517
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.231513023376465,
      "learning_rate": 0.00012955286844366042,
      "loss": 2.8306,
      "step": 159518
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6737172603607178,
      "learning_rate": 0.0001295495022536417,
      "loss": 2.8614,
      "step": 159519
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.021361351013184,
      "learning_rate": 0.00012954613609531305,
      "loss": 2.98,
      "step": 159520
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7669761180877686,
      "learning_rate": 0.0001295427699686749,
      "loss": 2.9394,
      "step": 159521
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.691746950149536,
      "learning_rate": 0.00012953940387372807,
      "loss": 2.8667,
      "step": 159522
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5146234035491943,
      "learning_rate": 0.00012953603781047307,
      "loss": 3.0192,
      "step": 159523
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.63594126701355,
      "learning_rate": 0.00012953267177891057,
      "loss": 2.8276,
      "step": 159524
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.251049518585205,
      "learning_rate": 0.00012952930577904103,
      "loss": 2.9681,
      "step": 159525
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.0588254928588867,
      "learning_rate": 0.0001295259398108653,
      "loss": 3.0926,
      "step": 159526
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.648403644561768,
      "learning_rate": 0.00012952257387438383,
      "loss": 2.6399,
      "step": 159527
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7488317489624023,
      "learning_rate": 0.00012951920796959744,
      "loss": 3.007,
      "step": 159528
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.036895990371704,
      "learning_rate": 0.00012951584209650667,
      "loss": 3.0368,
      "step": 159529
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.0597143173217773,
      "learning_rate": 0.00012951247625511206,
      "loss": 2.8192,
      "step": 159530
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.186938762664795,
      "learning_rate": 0.00012950911044541424,
      "loss": 2.807,
      "step": 159531
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.731138229370117,
      "learning_rate": 0.00012950574466741402,
      "loss": 2.7812,
      "step": 159532
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.605128049850464,
      "learning_rate": 0.00012950237892111175,
      "loss": 2.9537,
      "step": 159533
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5603058338165283,
      "learning_rate": 0.00012949901320650833,
      "loss": 2.8782,
      "step": 159534
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.275050163269043,
      "learning_rate": 0.0001294956475236043,
      "loss": 2.9331,
      "step": 159535
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8102564811706543,
      "learning_rate": 0.00012949228187240008,
      "loss": 3.1417,
      "step": 159536
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.6527273654937744,
      "learning_rate": 0.00012948891625289658,
      "loss": 2.8369,
      "step": 159537
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2742717266082764,
      "learning_rate": 0.00012948555066509435,
      "loss": 2.8123,
      "step": 159538
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0603926181793213,
      "learning_rate": 0.00012948218510899383,
      "loss": 2.9219,
      "step": 159539
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5506913661956787,
      "learning_rate": 0.0001294788195845959,
      "loss": 2.9507,
      "step": 159540
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8321869373321533,
      "learning_rate": 0.0001294754540919011,
      "loss": 2.7295,
      "step": 159541
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.223381996154785,
      "learning_rate": 0.00012947208863090987,
      "loss": 2.9981,
      "step": 159542
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.346031427383423,
      "learning_rate": 0.00012946872320162317,
      "loss": 3.0466,
      "step": 159543
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.341325521469116,
      "learning_rate": 0.00012946535780404133,
      "loss": 2.7695,
      "step": 159544
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.615960121154785,
      "learning_rate": 0.00012946199243816516,
      "loss": 2.9989,
      "step": 159545
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8084919452667236,
      "learning_rate": 0.00012945862710399527,
      "loss": 2.8277,
      "step": 159546
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.638612747192383,
      "learning_rate": 0.00012945526180153225,
      "loss": 3.0039,
      "step": 159547
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.48830509185791,
      "learning_rate": 0.00012945189653077658,
      "loss": 2.8802,
      "step": 159548
    },
    {
      "epoch": 2.08,
      "grad_norm": 5.08908224105835,
      "learning_rate": 0.00012944853129172914,
      "loss": 2.9442,
      "step": 159549
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.372386932373047,
      "learning_rate": 0.00012944516608439033,
      "loss": 2.8125,
      "step": 159550
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5624818801879883,
      "learning_rate": 0.000129441800908761,
      "loss": 3.0039,
      "step": 159551
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4802663326263428,
      "learning_rate": 0.00012943843576484164,
      "loss": 2.7225,
      "step": 159552
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.627764940261841,
      "learning_rate": 0.0001294350706526329,
      "loss": 3.0104,
      "step": 159553
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.8600969314575195,
      "learning_rate": 0.0001294317055721353,
      "loss": 2.9378,
      "step": 159554
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8633460998535156,
      "learning_rate": 0.00012942834052334965,
      "loss": 3.0098,
      "step": 159555
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.379922389984131,
      "learning_rate": 0.0001294249755062764,
      "loss": 2.9465,
      "step": 159556
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.448746681213379,
      "learning_rate": 0.00012942161052091638,
      "loss": 2.9688,
      "step": 159557
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.9317550659179688,
      "learning_rate": 0.00012941824556726998,
      "loss": 2.81,
      "step": 159558
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.53853702545166,
      "learning_rate": 0.0001294148806453382,
      "loss": 3.1642,
      "step": 159559
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.811323881149292,
      "learning_rate": 0.00012941151575512119,
      "loss": 3.0039,
      "step": 159560
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.743191719055176,
      "learning_rate": 0.00012940815089661987,
      "loss": 3.1037,
      "step": 159561
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.433483123779297,
      "learning_rate": 0.00012940478606983472,
      "loss": 2.9023,
      "step": 159562
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4785070419311523,
      "learning_rate": 0.00012940142127476653,
      "loss": 3.2167,
      "step": 159563
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.420051336288452,
      "learning_rate": 0.00012939805651141572,
      "loss": 2.9425,
      "step": 159564
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.860548257827759,
      "learning_rate": 0.00012939469177978327,
      "loss": 2.8628,
      "step": 159565
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4024338722229004,
      "learning_rate": 0.00012939132707986937,
      "loss": 2.9365,
      "step": 159566
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5708348751068115,
      "learning_rate": 0.0001293879624116749,
      "loss": 2.9757,
      "step": 159567
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.378647565841675,
      "learning_rate": 0.00012938459777520037,
      "loss": 3.0661,
      "step": 159568
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.1571907997131348,
      "learning_rate": 0.00012938123317044652,
      "loss": 2.9086,
      "step": 159569
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6718733310699463,
      "learning_rate": 0.00012937786859741385,
      "loss": 3.1002,
      "step": 159570
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6912758350372314,
      "learning_rate": 0.00012937450405610325,
      "loss": 2.9898,
      "step": 159571
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5798418521881104,
      "learning_rate": 0.00012937113954651495,
      "loss": 2.584,
      "step": 159572
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.572681427001953,
      "learning_rate": 0.00012936777506864988,
      "loss": 2.9747,
      "step": 159573
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6372437477111816,
      "learning_rate": 0.00012936441062250847,
      "loss": 2.9662,
      "step": 159574
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7092690467834473,
      "learning_rate": 0.00012936104620809148,
      "loss": 2.7912,
      "step": 159575
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.004570722579956,
      "learning_rate": 0.00012935768182539945,
      "loss": 3.1363,
      "step": 159576
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.63340163230896,
      "learning_rate": 0.00012935431747443323,
      "loss": 2.9486,
      "step": 159577
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4722816944122314,
      "learning_rate": 0.00012935095315519307,
      "loss": 2.6438,
      "step": 159578
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.633681058883667,
      "learning_rate": 0.00012934758886767989,
      "loss": 2.9875,
      "step": 159579
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.4244213104248047,
      "learning_rate": 0.00012934422461189412,
      "loss": 3.1335,
      "step": 159580
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.38260555267334,
      "learning_rate": 0.00012934086038783658,
      "loss": 3.1957,
      "step": 159581
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.113975763320923,
      "learning_rate": 0.00012933749619550765,
      "loss": 2.9977,
      "step": 159582
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.392986536026001,
      "learning_rate": 0.00012933413203490836,
      "loss": 2.9397,
      "step": 159583
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7651419639587402,
      "learning_rate": 0.00012933076790603887,
      "loss": 3.1848,
      "step": 159584
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.241600275039673,
      "learning_rate": 0.0001293274038089001,
      "loss": 2.7917,
      "step": 159585
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.761044502258301,
      "learning_rate": 0.00012932403974349246,
      "loss": 3.0194,
      "step": 159586
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.06087064743042,
      "learning_rate": 0.00012932067570981684,
      "loss": 3.1407,
      "step": 159587
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3539092540740967,
      "learning_rate": 0.0001293173117078736,
      "loss": 2.802,
      "step": 159588
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.242687702178955,
      "learning_rate": 0.0001293139477376637,
      "loss": 3.0843,
      "step": 159589
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.073765277862549,
      "learning_rate": 0.00012931058379918737,
      "loss": 2.9098,
      "step": 159590
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.259126901626587,
      "learning_rate": 0.00012930721989244553,
      "loss": 2.9955,
      "step": 159591
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0640199184417725,
      "learning_rate": 0.0001293038560174386,
      "loss": 3.0173,
      "step": 159592
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0971133708953857,
      "learning_rate": 0.0001293004921741674,
      "loss": 3.0027,
      "step": 159593
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0973055362701416,
      "learning_rate": 0.00012929712836263235,
      "loss": 2.7434,
      "step": 159594
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.9358198642730713,
      "learning_rate": 0.0001292937645828344,
      "loss": 2.9122,
      "step": 159595
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2379744052886963,
      "learning_rate": 0.00012929040083477374,
      "loss": 2.9266,
      "step": 159596
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8599090576171875,
      "learning_rate": 0.00012928703711845134,
      "loss": 3.032,
      "step": 159597
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.5578174591064453,
      "learning_rate": 0.0001292836734338676,
      "loss": 2.8622,
      "step": 159598
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3800103664398193,
      "learning_rate": 0.00012928030978102335,
      "loss": 2.7189,
      "step": 159599
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3135411739349365,
      "learning_rate": 0.00012927694615991897,
      "loss": 2.8263,
      "step": 159600
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3796112537384033,
      "learning_rate": 0.0001292735825705555,
      "loss": 2.9046,
      "step": 159601
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.0686416625976562,
      "learning_rate": 0.00012927021901293302,
      "loss": 3.106,
      "step": 159602
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.271324872970581,
      "learning_rate": 0.00012926685548705257,
      "loss": 2.9836,
      "step": 159603
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.135477304458618,
      "learning_rate": 0.00012926349199291455,
      "loss": 2.8892,
      "step": 159604
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4737143516540527,
      "learning_rate": 0.00012926012853051974,
      "loss": 2.7653,
      "step": 159605
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2569010257720947,
      "learning_rate": 0.00012925676509986862,
      "loss": 2.9997,
      "step": 159606
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7297134399414062,
      "learning_rate": 0.00012925340170096195,
      "loss": 2.8094,
      "step": 159607
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7083895206451416,
      "learning_rate": 0.00012925003833380035,
      "loss": 2.9681,
      "step": 159608
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2618472576141357,
      "learning_rate": 0.00012924667499838432,
      "loss": 3.2825,
      "step": 159609
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.697908401489258,
      "learning_rate": 0.0001292433116947145,
      "loss": 3.1197,
      "step": 159610
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.700160503387451,
      "learning_rate": 0.00012923994842279168,
      "loss": 2.8341,
      "step": 159611
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.40842866897583,
      "learning_rate": 0.00012923658518261628,
      "loss": 3.0422,
      "step": 159612
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.746416091918945,
      "learning_rate": 0.00012923322197418908,
      "loss": 3.0292,
      "step": 159613
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.8856470584869385,
      "learning_rate": 0.00012922985879751072,
      "loss": 2.9839,
      "step": 159614
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.190737724304199,
      "learning_rate": 0.0001292264956525817,
      "loss": 2.9421,
      "step": 159615
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8254446983337402,
      "learning_rate": 0.00012922313253940258,
      "loss": 2.9437,
      "step": 159616
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.2693419456481934,
      "learning_rate": 0.00012921976945797425,
      "loss": 2.9476,
      "step": 159617
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.880977153778076,
      "learning_rate": 0.00012921640640829708,
      "loss": 2.8982,
      "step": 159618
    },
    {
      "epoch": 2.08,
      "grad_norm": 6.667651653289795,
      "learning_rate": 0.00012921304339037187,
      "loss": 2.7752,
      "step": 159619
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0859596729278564,
      "learning_rate": 0.0001292096804041992,
      "loss": 2.9327,
      "step": 159620
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2403810024261475,
      "learning_rate": 0.00012920631744977958,
      "loss": 2.8573,
      "step": 159621
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.30729603767395,
      "learning_rate": 0.00012920295452711384,
      "loss": 2.9484,
      "step": 159622
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.319817304611206,
      "learning_rate": 0.00012919959163620247,
      "loss": 3.1385,
      "step": 159623
    },
    {
      "epoch": 2.08,
      "grad_norm": 5.204872131347656,
      "learning_rate": 0.00012919622877704604,
      "loss": 2.9105,
      "step": 159624
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.380603313446045,
      "learning_rate": 0.00012919286594964536,
      "loss": 3.0484,
      "step": 159625
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.560086727142334,
      "learning_rate": 0.0001291895031540009,
      "loss": 3.0805,
      "step": 159626
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.656873941421509,
      "learning_rate": 0.0001291861403901133,
      "loss": 2.8126,
      "step": 159627
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.023209571838379,
      "learning_rate": 0.00012918277765798327,
      "loss": 3.0938,
      "step": 159628
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.706728935241699,
      "learning_rate": 0.00012917941495761142,
      "loss": 2.9394,
      "step": 159629
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.1273906230926514,
      "learning_rate": 0.00012917605228899822,
      "loss": 3.0257,
      "step": 159630
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.703202247619629,
      "learning_rate": 0.00012917268965214452,
      "loss": 2.8012,
      "step": 159631
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.516746997833252,
      "learning_rate": 0.0001291693270470509,
      "loss": 3.0416,
      "step": 159632
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6131691932678223,
      "learning_rate": 0.00012916596447371773,
      "loss": 3.0481,
      "step": 159633
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.853846549987793,
      "learning_rate": 0.000129162601932146,
      "loss": 2.9148,
      "step": 159634
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.928345203399658,
      "learning_rate": 0.00012915923942233604,
      "loss": 2.93,
      "step": 159635
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.12172532081604,
      "learning_rate": 0.00012915587694428872,
      "loss": 3.0889,
      "step": 159636
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4061484336853027,
      "learning_rate": 0.0001291525144980045,
      "loss": 3.0502,
      "step": 159637
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4896085262298584,
      "learning_rate": 0.00012914915208348413,
      "loss": 2.8539,
      "step": 159638
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.364312171936035,
      "learning_rate": 0.00012914578970072798,
      "loss": 3.1748,
      "step": 159639
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0596203804016113,
      "learning_rate": 0.000129142427349737,
      "loss": 2.6491,
      "step": 159640
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.4679713249206543,
      "learning_rate": 0.00012913906503051156,
      "loss": 2.5974,
      "step": 159641
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.231989622116089,
      "learning_rate": 0.00012913570274305247,
      "loss": 2.9726,
      "step": 159642
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.765415906906128,
      "learning_rate": 0.00012913234048736019,
      "loss": 3.083,
      "step": 159643
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.3036489486694336,
      "learning_rate": 0.00012912897826343567,
      "loss": 2.8277,
      "step": 159644
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5706913471221924,
      "learning_rate": 0.00012912561607127904,
      "loss": 2.7477,
      "step": 159645
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.231910467147827,
      "learning_rate": 0.00012912225391089132,
      "loss": 2.8844,
      "step": 159646
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5144588947296143,
      "learning_rate": 0.00012911889178227287,
      "loss": 2.8629,
      "step": 159647
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.575139045715332,
      "learning_rate": 0.00012911552968542458,
      "loss": 2.912,
      "step": 159648
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8072564601898193,
      "learning_rate": 0.0001291121676203468,
      "loss": 2.8347,
      "step": 159649
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5522708892822266,
      "learning_rate": 0.00012910880558704055,
      "loss": 2.8194,
      "step": 159650
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.371976613998413,
      "learning_rate": 0.00012910544358550596,
      "loss": 3.0602,
      "step": 159651
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.1185989379882812,
      "learning_rate": 0.000129102081615744,
      "loss": 2.8869,
      "step": 159652
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.434530019760132,
      "learning_rate": 0.00012909871967775507,
      "loss": 3.0329,
      "step": 159653
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3024742603302,
      "learning_rate": 0.00012909535777154006,
      "loss": 2.9732,
      "step": 159654
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.9217300415039062,
      "learning_rate": 0.00012909199589709929,
      "loss": 2.9552,
      "step": 159655
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3711798191070557,
      "learning_rate": 0.00012908863405443382,
      "loss": 3.1144,
      "step": 159656
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6978342533111572,
      "learning_rate": 0.00012908527224354374,
      "loss": 2.6467,
      "step": 159657
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.5494253635406494,
      "learning_rate": 0.00012908191046443005,
      "loss": 2.9435,
      "step": 159658
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.0825631618499756,
      "learning_rate": 0.00012907854871709314,
      "loss": 3.0355,
      "step": 159659
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.754326343536377,
      "learning_rate": 0.0001290751870015339,
      "loss": 2.6697,
      "step": 159660
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.744590997695923,
      "learning_rate": 0.00012907182531775264,
      "loss": 3.1832,
      "step": 159661
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2645421028137207,
      "learning_rate": 0.0001290684636657504,
      "loss": 3.0414,
      "step": 159662
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5708727836608887,
      "learning_rate": 0.00012906510204552737,
      "loss": 2.6734,
      "step": 159663
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.661949396133423,
      "learning_rate": 0.00012906174045708446,
      "loss": 3.0571,
      "step": 159664
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3508126735687256,
      "learning_rate": 0.00012905837890042206,
      "loss": 2.8816,
      "step": 159665
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.5409255027771,
      "learning_rate": 0.00012905501737554107,
      "loss": 2.9841,
      "step": 159666
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.468324899673462,
      "learning_rate": 0.00012905165588244187,
      "loss": 2.8307,
      "step": 159667
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0926513671875,
      "learning_rate": 0.0001290482944211254,
      "loss": 2.7895,
      "step": 159668
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.262383222579956,
      "learning_rate": 0.00012904493299159187,
      "loss": 2.7714,
      "step": 159669
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.1326639652252197,
      "learning_rate": 0.00012904157159384223,
      "loss": 3.0988,
      "step": 159670
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.664077281951904,
      "learning_rate": 0.00012903821022787685,
      "loss": 3.0681,
      "step": 159671
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.409175157546997,
      "learning_rate": 0.00012903484889369664,
      "loss": 2.7903,
      "step": 159672
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8738656044006348,
      "learning_rate": 0.00012903148759130193,
      "loss": 2.9214,
      "step": 159673
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0272109508514404,
      "learning_rate": 0.00012902812632069376,
      "loss": 3.0718,
      "step": 159674
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.042442560195923,
      "learning_rate": 0.00012902476508187225,
      "loss": 2.8881,
      "step": 159675
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4933502674102783,
      "learning_rate": 0.00012902140387483835,
      "loss": 3.0727,
      "step": 159676
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3276989459991455,
      "learning_rate": 0.0001290180426995925,
      "loss": 3.065,
      "step": 159677
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.413283586502075,
      "learning_rate": 0.00012901468155613555,
      "loss": 3.1825,
      "step": 159678
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.203904628753662,
      "learning_rate": 0.00012901132044446786,
      "loss": 2.9657,
      "step": 159679
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4933512210845947,
      "learning_rate": 0.00012900795936459034,
      "loss": 3.1184,
      "step": 159680
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.9336185455322266,
      "learning_rate": 0.00012900459831650347,
      "loss": 2.8348,
      "step": 159681
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4341964721679688,
      "learning_rate": 0.00012900123730020784,
      "loss": 2.8187,
      "step": 159682
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2469050884246826,
      "learning_rate": 0.00012899787631570403,
      "loss": 2.9949,
      "step": 159683
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.293652057647705,
      "learning_rate": 0.00012899451536299282,
      "loss": 2.9296,
      "step": 159684
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.62191104888916,
      "learning_rate": 0.00012899115444207465,
      "loss": 2.7421,
      "step": 159685
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2581090927124023,
      "learning_rate": 0.00012898779355295038,
      "loss": 2.9392,
      "step": 159686
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3052895069122314,
      "learning_rate": 0.0001289844326956205,
      "loss": 2.8323,
      "step": 159687
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.367647886276245,
      "learning_rate": 0.00012898107187008564,
      "loss": 2.9037,
      "step": 159688
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3398053646087646,
      "learning_rate": 0.00012897771107634633,
      "loss": 3.1313,
      "step": 159689
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.388636589050293,
      "learning_rate": 0.0001289743503144034,
      "loss": 3.0988,
      "step": 159690
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.0433967113494873,
      "learning_rate": 0.00012897098958425724,
      "loss": 3.0374,
      "step": 159691
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.959477424621582,
      "learning_rate": 0.0001289676288859087,
      "loss": 3.0646,
      "step": 159692
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.810387372970581,
      "learning_rate": 0.00012896426821935837,
      "loss": 2.9851,
      "step": 159693
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5788519382476807,
      "learning_rate": 0.00012896090758460674,
      "loss": 3.1419,
      "step": 159694
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.239208698272705,
      "learning_rate": 0.00012895754698165443,
      "loss": 2.9352,
      "step": 159695
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.689537286758423,
      "learning_rate": 0.00012895418641050224,
      "loss": 2.8987,
      "step": 159696
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.178858757019043,
      "learning_rate": 0.00012895082587115056,
      "loss": 2.8327,
      "step": 159697
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.401116132736206,
      "learning_rate": 0.0001289474653636003,
      "loss": 2.6811,
      "step": 159698
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2828638553619385,
      "learning_rate": 0.00012894410488785193,
      "loss": 3.0456,
      "step": 159699
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.880647659301758,
      "learning_rate": 0.0001289407444439061,
      "loss": 2.9924,
      "step": 159700
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4130771160125732,
      "learning_rate": 0.0001289373840317633,
      "loss": 2.8338,
      "step": 159701
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8135557174682617,
      "learning_rate": 0.00012893402365142433,
      "loss": 3.1574,
      "step": 159702
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2735772132873535,
      "learning_rate": 0.00012893066330288967,
      "loss": 2.8951,
      "step": 159703
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.4588873386383057,
      "learning_rate": 0.00012892730298616015,
      "loss": 2.9182,
      "step": 159704
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.669710636138916,
      "learning_rate": 0.00012892394270123624,
      "loss": 2.7204,
      "step": 159705
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.0851340293884277,
      "learning_rate": 0.00012892058244811852,
      "loss": 3.055,
      "step": 159706
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2848751544952393,
      "learning_rate": 0.00012891722222680782,
      "loss": 2.6578,
      "step": 159707
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.780747890472412,
      "learning_rate": 0.0001289138620373046,
      "loss": 2.8743,
      "step": 159708
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.1681203842163086,
      "learning_rate": 0.00012891050187960942,
      "loss": 2.8572,
      "step": 159709
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.1662254333496094,
      "learning_rate": 0.00012890714175372312,
      "loss": 2.9971,
      "step": 159710
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.484380006790161,
      "learning_rate": 0.0001289037816596462,
      "loss": 3.1405,
      "step": 159711
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4725406169891357,
      "learning_rate": 0.00012890042159737923,
      "loss": 3.0232,
      "step": 159712
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.423718214035034,
      "learning_rate": 0.000128897061566923,
      "loss": 2.9416,
      "step": 159713
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.249033212661743,
      "learning_rate": 0.00012889370156827801,
      "loss": 2.7529,
      "step": 159714
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5316343307495117,
      "learning_rate": 0.00012889034160144482,
      "loss": 2.9931,
      "step": 159715
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.076944589614868,
      "learning_rate": 0.00012888698166642424,
      "loss": 2.8343,
      "step": 159716
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.3900556564331055,
      "learning_rate": 0.00012888362176321678,
      "loss": 2.8543,
      "step": 159717
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6862659454345703,
      "learning_rate": 0.000128880261891823,
      "loss": 2.9763,
      "step": 159718
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.585622787475586,
      "learning_rate": 0.00012887690205224376,
      "loss": 2.9191,
      "step": 159719
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.662564516067505,
      "learning_rate": 0.00012887354224447935,
      "loss": 2.8765,
      "step": 159720
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.32350754737854,
      "learning_rate": 0.00012887018246853074,
      "loss": 2.8126,
      "step": 159721
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3392579555511475,
      "learning_rate": 0.00012886682272439837,
      "loss": 2.9333,
      "step": 159722
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7459893226623535,
      "learning_rate": 0.00012886346301208291,
      "loss": 2.9425,
      "step": 159723
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7502546310424805,
      "learning_rate": 0.00012886010333158483,
      "loss": 2.9273,
      "step": 159724
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6577706336975098,
      "learning_rate": 0.00012885674368290503,
      "loss": 3.2222,
      "step": 159725
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4020259380340576,
      "learning_rate": 0.00012885338406604384,
      "loss": 3.1232,
      "step": 159726
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.624143362045288,
      "learning_rate": 0.00012885002448100215,
      "loss": 3.0068,
      "step": 159727
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4456732273101807,
      "learning_rate": 0.00012884666492778049,
      "loss": 2.9887,
      "step": 159728
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3274805545806885,
      "learning_rate": 0.00012884330540637945,
      "loss": 2.9038,
      "step": 159729
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.508298873901367,
      "learning_rate": 0.00012883994591679956,
      "loss": 2.6514,
      "step": 159730
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.1597354412078857,
      "learning_rate": 0.0001288365864590417,
      "loss": 2.8868,
      "step": 159731
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.256263494491577,
      "learning_rate": 0.00012883322703310623,
      "loss": 2.8297,
      "step": 159732
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7023766040802,
      "learning_rate": 0.00012882986763899397,
      "loss": 2.9013,
      "step": 159733
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.534106731414795,
      "learning_rate": 0.00012882650827670538,
      "loss": 2.9971,
      "step": 159734
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.207512855529785,
      "learning_rate": 0.00012882314894624138,
      "loss": 3.0226,
      "step": 159735
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.0996506214141846,
      "learning_rate": 0.00012881978964760218,
      "loss": 2.855,
      "step": 159736
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5126450061798096,
      "learning_rate": 0.00012881643038078873,
      "loss": 3.045,
      "step": 159737
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.279651641845703,
      "learning_rate": 0.00012881307114580144,
      "loss": 3.0802,
      "step": 159738
    },
    {
      "epoch": 2.08,
      "grad_norm": 1.9936528205871582,
      "learning_rate": 0.0001288097119426411,
      "loss": 3.109,
      "step": 159739
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5445799827575684,
      "learning_rate": 0.0001288063527713082,
      "loss": 3.0051,
      "step": 159740
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5979716777801514,
      "learning_rate": 0.0001288029936318035,
      "loss": 2.9954,
      "step": 159741
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.622849941253662,
      "learning_rate": 0.00012879963452412755,
      "loss": 3.0064,
      "step": 159742
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.750762462615967,
      "learning_rate": 0.00012879627544828102,
      "loss": 3.212,
      "step": 159743
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.1908957958221436,
      "learning_rate": 0.00012879291640426437,
      "loss": 3.0507,
      "step": 159744
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.1224024295806885,
      "learning_rate": 0.00012878955739207846,
      "loss": 2.9715,
      "step": 159745
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.6002635955810547,
      "learning_rate": 0.0001287861984117237,
      "loss": 2.8998,
      "step": 159746
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.048542022705078,
      "learning_rate": 0.00012878283946320089,
      "loss": 3.0446,
      "step": 159747
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.6504054069519043,
      "learning_rate": 0.0001287794805465106,
      "loss": 2.9857,
      "step": 159748
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.154425859451294,
      "learning_rate": 0.00012877612166165345,
      "loss": 2.9132,
      "step": 159749
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3269221782684326,
      "learning_rate": 0.00012877276280862993,
      "loss": 2.8031,
      "step": 159750
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.425002336502075,
      "learning_rate": 0.00012876940398744093,
      "loss": 2.6188,
      "step": 159751
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.9636433124542236,
      "learning_rate": 0.00012876604519808678,
      "loss": 3.0875,
      "step": 159752
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.46987247467041,
      "learning_rate": 0.00012876268644056835,
      "loss": 2.6039,
      "step": 159753
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4021618366241455,
      "learning_rate": 0.00012875932771488623,
      "loss": 3.0454,
      "step": 159754
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.626173496246338,
      "learning_rate": 0.00012875596902104094,
      "loss": 2.8418,
      "step": 159755
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.561276435852051,
      "learning_rate": 0.00012875261035903303,
      "loss": 3.0186,
      "step": 159756
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.501187801361084,
      "learning_rate": 0.0001287492517288634,
      "loss": 2.7604,
      "step": 159757
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.278752326965332,
      "learning_rate": 0.00012874589313053233,
      "loss": 3.0621,
      "step": 159758
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4304604530334473,
      "learning_rate": 0.00012874253456404081,
      "loss": 3.0775,
      "step": 159759
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4215087890625,
      "learning_rate": 0.00012873917602938925,
      "loss": 2.6995,
      "step": 159760
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.971883535385132,
      "learning_rate": 0.00012873581752657833,
      "loss": 2.989,
      "step": 159761
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.462813377380371,
      "learning_rate": 0.00012873245905560855,
      "loss": 2.739,
      "step": 159762
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.650775671005249,
      "learning_rate": 0.0001287291006164807,
      "loss": 2.8291,
      "step": 159763
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0370516777038574,
      "learning_rate": 0.00012872574220919528,
      "loss": 2.7163,
      "step": 159764
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5292601585388184,
      "learning_rate": 0.00012872238383375306,
      "loss": 3.0728,
      "step": 159765
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.938173770904541,
      "learning_rate": 0.00012871902549015462,
      "loss": 3.2725,
      "step": 159766
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4726901054382324,
      "learning_rate": 0.0001287156671784005,
      "loss": 2.7985,
      "step": 159767
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.1901729106903076,
      "learning_rate": 0.0001287123088984913,
      "loss": 3.2218,
      "step": 159768
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6303012371063232,
      "learning_rate": 0.0001287089506504278,
      "loss": 2.8954,
      "step": 159769
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.451533555984497,
      "learning_rate": 0.00012870559243421042,
      "loss": 2.9077,
      "step": 159770
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2777903079986572,
      "learning_rate": 0.00012870223424984005,
      "loss": 2.9312,
      "step": 159771
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3393337726593018,
      "learning_rate": 0.00012869887609731715,
      "loss": 2.8913,
      "step": 159772
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4124419689178467,
      "learning_rate": 0.00012869551797664236,
      "loss": 2.8681,
      "step": 159773
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2873759269714355,
      "learning_rate": 0.0001286921598878162,
      "loss": 2.912,
      "step": 159774
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.162659168243408,
      "learning_rate": 0.0001286888018308395,
      "loss": 2.8872,
      "step": 159775
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.1995651721954346,
      "learning_rate": 0.0001286854438057127,
      "loss": 2.8419,
      "step": 159776
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2005319595336914,
      "learning_rate": 0.0001286820858124366,
      "loss": 2.9121,
      "step": 159777
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3187096118927,
      "learning_rate": 0.00012867872785101176,
      "loss": 3.0336,
      "step": 159778
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.669419288635254,
      "learning_rate": 0.00012867536992143875,
      "loss": 2.6883,
      "step": 159779
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.696321487426758,
      "learning_rate": 0.0001286720120237181,
      "loss": 2.8645,
      "step": 159780
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.555525541305542,
      "learning_rate": 0.0001286686541578507,
      "loss": 2.9788,
      "step": 159781
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6878821849823,
      "learning_rate": 0.00012866529632383692,
      "loss": 3.0932,
      "step": 159782
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.407975912094116,
      "learning_rate": 0.00012866193852167758,
      "loss": 3.1118,
      "step": 159783
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.0315823554992676,
      "learning_rate": 0.00012865858075137326,
      "loss": 3.0235,
      "step": 159784
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.421945810317993,
      "learning_rate": 0.00012865522301292451,
      "loss": 2.7368,
      "step": 159785
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8875930309295654,
      "learning_rate": 0.00012865186530633188,
      "loss": 3.2063,
      "step": 159786
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5640597343444824,
      "learning_rate": 0.00012864850763159621,
      "loss": 3.0986,
      "step": 159787
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.396183967590332,
      "learning_rate": 0.0001286451499887179,
      "loss": 3.0941,
      "step": 159788
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.306373357772827,
      "learning_rate": 0.00012864179237769783,
      "loss": 2.8356,
      "step": 159789
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4049789905548096,
      "learning_rate": 0.00012863843479853651,
      "loss": 3.0505,
      "step": 159790
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.244262933731079,
      "learning_rate": 0.00012863507725123448,
      "loss": 3.1485,
      "step": 159791
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8296267986297607,
      "learning_rate": 0.00012863171973579235,
      "loss": 2.9528,
      "step": 159792
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4040422439575195,
      "learning_rate": 0.00012862836225221088,
      "loss": 3.2138,
      "step": 159793
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6727726459503174,
      "learning_rate": 0.0001286250048004906,
      "loss": 3.0446,
      "step": 159794
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.535616397857666,
      "learning_rate": 0.00012862164738063222,
      "loss": 2.9988,
      "step": 159795
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.851081132888794,
      "learning_rate": 0.00012861828999263634,
      "loss": 3.0687,
      "step": 159796
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5565004348754883,
      "learning_rate": 0.00012861493263650344,
      "loss": 2.9605,
      "step": 159797
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4321985244750977,
      "learning_rate": 0.00012861157531223434,
      "loss": 3.0134,
      "step": 159798
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.390418291091919,
      "learning_rate": 0.0001286082180198296,
      "loss": 2.966,
      "step": 159799
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.271611452102661,
      "learning_rate": 0.0001286048607592897,
      "loss": 3.2578,
      "step": 159800
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.080178737640381,
      "learning_rate": 0.00012860150353061553,
      "loss": 3.0035,
      "step": 159801
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.368622064590454,
      "learning_rate": 0.00012859814633380757,
      "loss": 2.8852,
      "step": 159802
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.271451473236084,
      "learning_rate": 0.00012859478916886632,
      "loss": 3.0774,
      "step": 159803
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7652666568756104,
      "learning_rate": 0.0001285914320357927,
      "loss": 3.0202,
      "step": 159804
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.4073426723480225,
      "learning_rate": 0.000128588074934587,
      "loss": 2.7928,
      "step": 159805
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.419061660766602,
      "learning_rate": 0.00012858471786525015,
      "loss": 2.7168,
      "step": 159806
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.108168125152588,
      "learning_rate": 0.00012858136082778265,
      "loss": 2.8118,
      "step": 159807
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.379725933074951,
      "learning_rate": 0.00012857800382218496,
      "loss": 2.8336,
      "step": 159808
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8917741775512695,
      "learning_rate": 0.00012857464684845797,
      "loss": 2.8964,
      "step": 159809
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.870529890060425,
      "learning_rate": 0.0001285712899066022,
      "loss": 3.1096,
      "step": 159810
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7396364212036133,
      "learning_rate": 0.0001285679329966182,
      "loss": 2.8285,
      "step": 159811
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6371359825134277,
      "learning_rate": 0.0001285645761185067,
      "loss": 2.9662,
      "step": 159812
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.187082529067993,
      "learning_rate": 0.0001285612192722683,
      "loss": 3.0972,
      "step": 159813
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.372071743011475,
      "learning_rate": 0.00012855786245790352,
      "loss": 3.1786,
      "step": 159814
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.616696357727051,
      "learning_rate": 0.00012855450567541315,
      "loss": 2.9506,
      "step": 159815
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.931427478790283,
      "learning_rate": 0.00012855114892479776,
      "loss": 2.8283,
      "step": 159816
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.807354211807251,
      "learning_rate": 0.00012854779220605782,
      "loss": 2.8824,
      "step": 159817
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.6670172214508057,
      "learning_rate": 0.0001285444355191942,
      "loss": 3.1281,
      "step": 159818
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4696078300476074,
      "learning_rate": 0.0001285410788642073,
      "loss": 2.9415,
      "step": 159819
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.1949894428253174,
      "learning_rate": 0.00012853772224109792,
      "loss": 2.8705,
      "step": 159820
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.0572457313537598,
      "learning_rate": 0.00012853436564986667,
      "loss": 2.8619,
      "step": 159821
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.9399874210357666,
      "learning_rate": 0.00012853100909051407,
      "loss": 3.01,
      "step": 159822
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.804711103439331,
      "learning_rate": 0.0001285276525630407,
      "loss": 2.9312,
      "step": 159823
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6197891235351562,
      "learning_rate": 0.00012852429606744742,
      "loss": 2.7189,
      "step": 159824
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.158926010131836,
      "learning_rate": 0.00012852093960373455,
      "loss": 2.9582,
      "step": 159825
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6413094997406006,
      "learning_rate": 0.000128517583171903,
      "loss": 3.0416,
      "step": 159826
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.228940725326538,
      "learning_rate": 0.00012851422677195328,
      "loss": 3.032,
      "step": 159827
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.9076285362243652,
      "learning_rate": 0.00012851087040388598,
      "loss": 3.0557,
      "step": 159828
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3917665481567383,
      "learning_rate": 0.0001285075140677017,
      "loss": 2.8292,
      "step": 159829
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.9071240425109863,
      "learning_rate": 0.00012850415776340113,
      "loss": 2.7286,
      "step": 159830
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4868853092193604,
      "learning_rate": 0.0001285008014909848,
      "loss": 2.806,
      "step": 159831
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6302456855773926,
      "learning_rate": 0.00012849744525045354,
      "loss": 2.7868,
      "step": 159832
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.354917049407959,
      "learning_rate": 0.00012849408904180786,
      "loss": 3.0158,
      "step": 159833
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.37969708442688,
      "learning_rate": 0.0001284907328650483,
      "loss": 2.8558,
      "step": 159834
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8246707916259766,
      "learning_rate": 0.0001284873767201755,
      "loss": 2.8966,
      "step": 159835
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.379528522491455,
      "learning_rate": 0.00012848402060719021,
      "loss": 2.9946,
      "step": 159836
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.482463836669922,
      "learning_rate": 0.0001284806645260929,
      "loss": 3.0525,
      "step": 159837
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.9899206161499023,
      "learning_rate": 0.0001284773084768843,
      "loss": 3.0243,
      "step": 159838
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.104185104370117,
      "learning_rate": 0.0001284739524595651,
      "loss": 2.9542,
      "step": 159839
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.9117279052734375,
      "learning_rate": 0.00012847059647413578,
      "loss": 2.6958,
      "step": 159840
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.949179172515869,
      "learning_rate": 0.0001284672405205969,
      "loss": 3.1456,
      "step": 159841
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.047544002532959,
      "learning_rate": 0.00012846388459894934,
      "loss": 2.9175,
      "step": 159842
    },
    {
      "epoch": 2.08,
      "grad_norm": 5.123260021209717,
      "learning_rate": 0.00012846052870919345,
      "loss": 2.7958,
      "step": 159843
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7370707988739014,
      "learning_rate": 0.00012845717285133013,
      "loss": 3.196,
      "step": 159844
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.283160924911499,
      "learning_rate": 0.00012845381702535983,
      "loss": 3.1891,
      "step": 159845
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3548104763031006,
      "learning_rate": 0.0001284504612312832,
      "loss": 3.19,
      "step": 159846
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.188851356506348,
      "learning_rate": 0.00012844710546910078,
      "loss": 2.7626,
      "step": 159847
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.9664247035980225,
      "learning_rate": 0.00012844374973881335,
      "loss": 3.0985,
      "step": 159848
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.8191606998443604,
      "learning_rate": 0.0001284403940404214,
      "loss": 2.817,
      "step": 159849
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3415699005126953,
      "learning_rate": 0.00012843703837392572,
      "loss": 3.1436,
      "step": 159850
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.437148332595825,
      "learning_rate": 0.00012843368273932683,
      "loss": 3.1466,
      "step": 159851
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.466615915298462,
      "learning_rate": 0.0001284303271366253,
      "loss": 2.9981,
      "step": 159852
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.8223845958709717,
      "learning_rate": 0.00012842697156582178,
      "loss": 2.9379,
      "step": 159853
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.539114236831665,
      "learning_rate": 0.000128423616026917,
      "loss": 3.0216,
      "step": 159854
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.332432508468628,
      "learning_rate": 0.0001284202605199114,
      "loss": 2.8016,
      "step": 159855
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.542808771133423,
      "learning_rate": 0.00012841690504480585,
      "loss": 2.8727,
      "step": 159856
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5312530994415283,
      "learning_rate": 0.00012841354960160082,
      "loss": 3.0217,
      "step": 159857
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.399332284927368,
      "learning_rate": 0.00012841019419029695,
      "loss": 2.9939,
      "step": 159858
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.6840062141418457,
      "learning_rate": 0.00012840683881089473,
      "loss": 3.2177,
      "step": 159859
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6566481590270996,
      "learning_rate": 0.00012840348346339505,
      "loss": 2.9787,
      "step": 159860
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.382896900177002,
      "learning_rate": 0.00012840012814779828,
      "loss": 2.9799,
      "step": 159861
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.199521064758301,
      "learning_rate": 0.00012839677286410528,
      "loss": 2.9787,
      "step": 159862
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.658993721008301,
      "learning_rate": 0.00012839341761231656,
      "loss": 3.0153,
      "step": 159863
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.929795026779175,
      "learning_rate": 0.00012839006239243276,
      "loss": 3.1821,
      "step": 159864
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3451523780822754,
      "learning_rate": 0.00012838670720445434,
      "loss": 2.9767,
      "step": 159865
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.613074779510498,
      "learning_rate": 0.0001283833520483822,
      "loss": 2.9004,
      "step": 159866
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.9502310752868652,
      "learning_rate": 0.00012837999692421673,
      "loss": 2.8107,
      "step": 159867
    },
    {
      "epoch": 2.08,
      "grad_norm": 5.1872944831848145,
      "learning_rate": 0.00012837664183195876,
      "loss": 2.9619,
      "step": 159868
    },
    {
      "epoch": 2.08,
      "grad_norm": 7.373577117919922,
      "learning_rate": 0.00012837328677160883,
      "loss": 3.065,
      "step": 159869
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.467391014099121,
      "learning_rate": 0.00012836993174316751,
      "loss": 3.0025,
      "step": 159870
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.443291664123535,
      "learning_rate": 0.0001283665767466354,
      "loss": 2.8382,
      "step": 159871
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.144202709197998,
      "learning_rate": 0.00012836322178201324,
      "loss": 2.9167,
      "step": 159872
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.211493968963623,
      "learning_rate": 0.00012835986684930155,
      "loss": 2.9622,
      "step": 159873
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.959286689758301,
      "learning_rate": 0.00012835651194850105,
      "loss": 2.8331,
      "step": 159874
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.4197325706481934,
      "learning_rate": 0.00012835315707961223,
      "loss": 3.0974,
      "step": 159875
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.367696285247803,
      "learning_rate": 0.00012834980224263602,
      "loss": 2.9549,
      "step": 159876
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.70361590385437,
      "learning_rate": 0.00012834644743757258,
      "loss": 2.9008,
      "step": 159877
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4894142150878906,
      "learning_rate": 0.0001283430926644229,
      "loss": 2.7724,
      "step": 159878
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.2725443840026855,
      "learning_rate": 0.0001283397379231874,
      "loss": 2.7745,
      "step": 159879
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.261590480804443,
      "learning_rate": 0.00012833638321386686,
      "loss": 2.8447,
      "step": 159880
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8508243560791016,
      "learning_rate": 0.00012833302853646172,
      "loss": 3.036,
      "step": 159881
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4241931438446045,
      "learning_rate": 0.0001283296738909728,
      "loss": 3.0288,
      "step": 159882
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.0310356616973877,
      "learning_rate": 0.00012832631927740066,
      "loss": 2.7886,
      "step": 159883
    },
    {
      "epoch": 2.08,
      "grad_norm": 1.8590060472488403,
      "learning_rate": 0.00012832296469574592,
      "loss": 3.108,
      "step": 159884
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.209306240081787,
      "learning_rate": 0.00012831961014600902,
      "loss": 2.8952,
      "step": 159885
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.621757745742798,
      "learning_rate": 0.0001283162556281909,
      "loss": 3.2083,
      "step": 159886
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.1172659397125244,
      "learning_rate": 0.0001283129011422919,
      "loss": 2.7649,
      "step": 159887
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8260200023651123,
      "learning_rate": 0.00012830954668831287,
      "loss": 2.9062,
      "step": 159888
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.820504903793335,
      "learning_rate": 0.00012830619226625434,
      "loss": 2.9793,
      "step": 159889
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.329132556915283,
      "learning_rate": 0.00012830283787611686,
      "loss": 3.0118,
      "step": 159890
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.804931640625,
      "learning_rate": 0.00012829948351790119,
      "loss": 2.9371,
      "step": 159891
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.517749309539795,
      "learning_rate": 0.0001282961291916079,
      "loss": 2.8702,
      "step": 159892
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.013252019882202,
      "learning_rate": 0.00012829277489723748,
      "loss": 2.6691,
      "step": 159893
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.666325807571411,
      "learning_rate": 0.00012828942063479077,
      "loss": 3.0508,
      "step": 159894
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.107630729675293,
      "learning_rate": 0.00012828606640426834,
      "loss": 3.0222,
      "step": 159895
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0066962242126465,
      "learning_rate": 0.00012828271220567062,
      "loss": 2.9725,
      "step": 159896
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5652003288269043,
      "learning_rate": 0.00012827935803899856,
      "loss": 3.158,
      "step": 159897
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.581212043762207,
      "learning_rate": 0.00012827600390425255,
      "loss": 3.1234,
      "step": 159898
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.440639019012451,
      "learning_rate": 0.0001282726498014332,
      "loss": 2.918,
      "step": 159899
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.470613479614258,
      "learning_rate": 0.00012826929573054127,
      "loss": 3.0453,
      "step": 159900
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.65895938873291,
      "learning_rate": 0.00012826594169157736,
      "loss": 2.8658,
      "step": 159901
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0032365322113037,
      "learning_rate": 0.00012826258768454193,
      "loss": 2.7493,
      "step": 159902
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.2792532444000244,
      "learning_rate": 0.00012825923370943582,
      "loss": 3.0327,
      "step": 159903
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3357021808624268,
      "learning_rate": 0.00012825587976625946,
      "loss": 3.0055,
      "step": 159904
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5048775672912598,
      "learning_rate": 0.0001282525258550137,
      "loss": 3.2765,
      "step": 159905
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0369083881378174,
      "learning_rate": 0.00012824917197569902,
      "loss": 2.9608,
      "step": 159906
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4299654960632324,
      "learning_rate": 0.00012824581812831607,
      "loss": 3.0272,
      "step": 159907
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.82453989982605,
      "learning_rate": 0.00012824246431286535,
      "loss": 2.9431,
      "step": 159908
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.339879274368286,
      "learning_rate": 0.0001282391105293477,
      "loss": 2.8138,
      "step": 159909
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5632660388946533,
      "learning_rate": 0.00012823575677776356,
      "loss": 2.994,
      "step": 159910
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.252762794494629,
      "learning_rate": 0.00012823240305811373,
      "loss": 3.1144,
      "step": 159911
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6635560989379883,
      "learning_rate": 0.00012822904937039874,
      "loss": 3.1191,
      "step": 159912
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.416903018951416,
      "learning_rate": 0.0001282256957146192,
      "loss": 2.9781,
      "step": 159913
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.664608955383301,
      "learning_rate": 0.00012822234209077565,
      "loss": 2.7664,
      "step": 159914
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0429415702819824,
      "learning_rate": 0.0001282189884988689,
      "loss": 2.845,
      "step": 159915
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.780535936355591,
      "learning_rate": 0.0001282156349388994,
      "loss": 2.8293,
      "step": 159916
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.408313751220703,
      "learning_rate": 0.00012821228141086794,
      "loss": 2.9208,
      "step": 159917
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3597540855407715,
      "learning_rate": 0.0001282089279147751,
      "loss": 2.979,
      "step": 159918
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3204128742218018,
      "learning_rate": 0.00012820557445062143,
      "loss": 2.92,
      "step": 159919
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.1334991455078125,
      "learning_rate": 0.0001282022210184075,
      "loss": 2.988,
      "step": 159920
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.614370822906494,
      "learning_rate": 0.00012819886761813407,
      "loss": 3.0524,
      "step": 159921
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4384658336639404,
      "learning_rate": 0.00012819551424980168,
      "loss": 2.9741,
      "step": 159922
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.174013137817383,
      "learning_rate": 0.00012819216091341102,
      "loss": 2.7006,
      "step": 159923
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.286062479019165,
      "learning_rate": 0.00012818880760896275,
      "loss": 2.8854,
      "step": 159924
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.202841281890869,
      "learning_rate": 0.00012818545433645738,
      "loss": 2.9459,
      "step": 159925
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.623396396636963,
      "learning_rate": 0.0001281821010958955,
      "loss": 2.8383,
      "step": 159926
    },
    {
      "epoch": 2.08,
      "grad_norm": 1.9280673265457153,
      "learning_rate": 0.00012817874788727788,
      "loss": 2.9962,
      "step": 159927
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.254706621170044,
      "learning_rate": 0.000128175394710605,
      "loss": 3.004,
      "step": 159928
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.896357536315918,
      "learning_rate": 0.00012817204156587766,
      "loss": 2.9196,
      "step": 159929
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.230262041091919,
      "learning_rate": 0.00012816868845309638,
      "loss": 2.9697,
      "step": 159930
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.1547558307647705,
      "learning_rate": 0.00012816533537226174,
      "loss": 2.7162,
      "step": 159931
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.511354446411133,
      "learning_rate": 0.00012816198232337437,
      "loss": 3.175,
      "step": 159932
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2251546382904053,
      "learning_rate": 0.00012815862930643502,
      "loss": 2.7701,
      "step": 159933
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4712605476379395,
      "learning_rate": 0.0001281552763214441,
      "loss": 2.9754,
      "step": 159934
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4672181606292725,
      "learning_rate": 0.00012815192336840248,
      "loss": 3.0041,
      "step": 159935
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3484439849853516,
      "learning_rate": 0.00012814857044731064,
      "loss": 3.0686,
      "step": 159936
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8368380069732666,
      "learning_rate": 0.00012814521755816923,
      "loss": 2.7947,
      "step": 159937
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.282369375228882,
      "learning_rate": 0.00012814186470097875,
      "loss": 3.0525,
      "step": 159938
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8218512535095215,
      "learning_rate": 0.0001281385118757401,
      "loss": 2.9864,
      "step": 159939
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2092297077178955,
      "learning_rate": 0.0001281351590824536,
      "loss": 3.2017,
      "step": 159940
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.405334711074829,
      "learning_rate": 0.0001281318063211201,
      "loss": 2.9639,
      "step": 159941
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.328383207321167,
      "learning_rate": 0.0001281284535917401,
      "loss": 3.1183,
      "step": 159942
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.385521411895752,
      "learning_rate": 0.00012812510089431443,
      "loss": 3.0078,
      "step": 159943
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.300290107727051,
      "learning_rate": 0.00012812174822884336,
      "loss": 2.8317,
      "step": 159944
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.2048563957214355,
      "learning_rate": 0.00012811839559532777,
      "loss": 2.9877,
      "step": 159945
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.939939498901367,
      "learning_rate": 0.00012811504299376815,
      "loss": 2.8771,
      "step": 159946
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3754491806030273,
      "learning_rate": 0.00012811169042416528,
      "loss": 2.8849,
      "step": 159947
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.02376389503479,
      "learning_rate": 0.00012810833788651959,
      "loss": 3.0622,
      "step": 159948
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6367132663726807,
      "learning_rate": 0.000128104985380832,
      "loss": 2.8584,
      "step": 159949
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7791237831115723,
      "learning_rate": 0.00012810163290710274,
      "loss": 2.9388,
      "step": 159950
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4914467334747314,
      "learning_rate": 0.00012809828046533275,
      "loss": 2.7685,
      "step": 159951
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5110867023468018,
      "learning_rate": 0.0001280949280555224,
      "loss": 3.0102,
      "step": 159952
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8290483951568604,
      "learning_rate": 0.00012809157567767254,
      "loss": 2.7478,
      "step": 159953
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6167783737182617,
      "learning_rate": 0.00012808822333178362,
      "loss": 2.7403,
      "step": 159954
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.4687416553497314,
      "learning_rate": 0.00012808487101785657,
      "loss": 3.0066,
      "step": 159955
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.1668193340301514,
      "learning_rate": 0.00012808151873589159,
      "loss": 3.0732,
      "step": 159956
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.236034631729126,
      "learning_rate": 0.00012807816648588956,
      "loss": 3.1885,
      "step": 159957
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.249445915222168,
      "learning_rate": 0.00012807481426785095,
      "loss": 2.7867,
      "step": 159958
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.516908645629883,
      "learning_rate": 0.0001280714620817766,
      "loss": 2.9549,
      "step": 159959
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4886112213134766,
      "learning_rate": 0.00012806810992766688,
      "loss": 3.0535,
      "step": 159960
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.475504159927368,
      "learning_rate": 0.00012806475780552279,
      "loss": 3.0031,
      "step": 159961
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.267730712890625,
      "learning_rate": 0.00012806140571534446,
      "loss": 3.1317,
      "step": 159962
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3531415462493896,
      "learning_rate": 0.0001280580536571329,
      "loss": 2.9466,
      "step": 159963
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.629995822906494,
      "learning_rate": 0.00012805470163088846,
      "loss": 2.9159,
      "step": 159964
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.984680414199829,
      "learning_rate": 0.00012805134963661202,
      "loss": 2.7672,
      "step": 159965
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.164670705795288,
      "learning_rate": 0.00012804799767430396,
      "loss": 2.9843,
      "step": 159966
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.626058340072632,
      "learning_rate": 0.00012804464574396515,
      "loss": 3.0683,
      "step": 159967
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.59978723526001,
      "learning_rate": 0.00012804129384559607,
      "loss": 2.8697,
      "step": 159968
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.681240081787109,
      "learning_rate": 0.00012803794197919737,
      "loss": 2.9326,
      "step": 159969
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6297903060913086,
      "learning_rate": 0.00012803459014476952,
      "loss": 3.1305,
      "step": 159970
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.623960494995117,
      "learning_rate": 0.00012803123834231344,
      "loss": 2.9205,
      "step": 159971
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.269721269607544,
      "learning_rate": 0.00012802788657182947,
      "loss": 3.0515,
      "step": 159972
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.723581314086914,
      "learning_rate": 0.00012802453483331847,
      "loss": 3.1273,
      "step": 159973
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.984548807144165,
      "learning_rate": 0.00012802118312678096,
      "loss": 3.0694,
      "step": 159974
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.4201178550720215,
      "learning_rate": 0.00012801783145221756,
      "loss": 3.0949,
      "step": 159975
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3470916748046875,
      "learning_rate": 0.00012801447980962877,
      "loss": 3.0795,
      "step": 159976
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.256793975830078,
      "learning_rate": 0.00012801112819901545,
      "loss": 3.0314,
      "step": 159977
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.15741491317749,
      "learning_rate": 0.000128007776620378,
      "loss": 2.9502,
      "step": 159978
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.6987736225128174,
      "learning_rate": 0.00012800442507371728,
      "loss": 3.0444,
      "step": 159979
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.259084701538086,
      "learning_rate": 0.00012800107355903374,
      "loss": 3.0768,
      "step": 159980
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3462867736816406,
      "learning_rate": 0.00012799772207632798,
      "loss": 3.0423,
      "step": 159981
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.498783588409424,
      "learning_rate": 0.00012799437062560077,
      "loss": 2.8261,
      "step": 159982
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0910778045654297,
      "learning_rate": 0.00012799101920685268,
      "loss": 3.1307,
      "step": 159983
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8850972652435303,
      "learning_rate": 0.0001279876678200842,
      "loss": 2.9004,
      "step": 159984
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.222879409790039,
      "learning_rate": 0.00012798431646529615,
      "loss": 2.842,
      "step": 159985
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3468289375305176,
      "learning_rate": 0.00012798096514248905,
      "loss": 2.9443,
      "step": 159986
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8601057529449463,
      "learning_rate": 0.00012797761385166344,
      "loss": 2.8555,
      "step": 159987
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.275649070739746,
      "learning_rate": 0.00012797426259282012,
      "loss": 3.0477,
      "step": 159988
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.503654479980469,
      "learning_rate": 0.00012797091136595957,
      "loss": 2.9746,
      "step": 159989
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.1724579334259033,
      "learning_rate": 0.00012796756017108258,
      "loss": 3.2237,
      "step": 159990
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.325639247894287,
      "learning_rate": 0.00012796420900818963,
      "loss": 3.1386,
      "step": 159991
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4166641235351562,
      "learning_rate": 0.00012796085787728143,
      "loss": 2.8594,
      "step": 159992
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.392091751098633,
      "learning_rate": 0.00012795750677835842,
      "loss": 3.1017,
      "step": 159993
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0094528198242188,
      "learning_rate": 0.00012795415571142146,
      "loss": 2.7864,
      "step": 159994
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.3887715339660645,
      "learning_rate": 0.00012795080467647098,
      "loss": 2.8793,
      "step": 159995
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4935503005981445,
      "learning_rate": 0.00012794745367350775,
      "loss": 2.9742,
      "step": 159996
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3480148315429688,
      "learning_rate": 0.00012794410270253239,
      "loss": 3.1225,
      "step": 159997
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6127278804779053,
      "learning_rate": 0.00012794075176354546,
      "loss": 2.9754,
      "step": 159998
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.073228359222412,
      "learning_rate": 0.00012793740085654748,
      "loss": 3.0911,
      "step": 159999
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.9299662113189697,
      "learning_rate": 0.0001279340499815393,
      "loss": 2.9281,
      "step": 160000
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2163333892822266,
      "learning_rate": 0.0001279306991385213,
      "loss": 3.0147,
      "step": 160001
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.280808687210083,
      "learning_rate": 0.00012792734832749435,
      "loss": 2.9979,
      "step": 160002
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.507185459136963,
      "learning_rate": 0.00012792399754845896,
      "loss": 3.18,
      "step": 160003
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.184391736984253,
      "learning_rate": 0.00012792064680141574,
      "loss": 3.0732,
      "step": 160004
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.550506114959717,
      "learning_rate": 0.00012791729608636519,
      "loss": 2.941,
      "step": 160005
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6610348224639893,
      "learning_rate": 0.00012791394540330822,
      "loss": 2.7695,
      "step": 160006
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2987897396087646,
      "learning_rate": 0.00012791059475224513,
      "loss": 3.0761,
      "step": 160007
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.1831514835357666,
      "learning_rate": 0.00012790724413317688,
      "loss": 2.7692,
      "step": 160008
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.299962282180786,
      "learning_rate": 0.0001279038935461038,
      "loss": 2.9081,
      "step": 160009
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.180149555206299,
      "learning_rate": 0.0001279005429910268,
      "loss": 3.1097,
      "step": 160010
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6332385540008545,
      "learning_rate": 0.00012789719246794615,
      "loss": 2.9693,
      "step": 160011
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.2878880500793457,
      "learning_rate": 0.0001278938419768628,
      "loss": 3.0646,
      "step": 160012
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.0976364612579346,
      "learning_rate": 0.0001278904915177771,
      "loss": 3.2211,
      "step": 160013
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0861477851867676,
      "learning_rate": 0.00012788714109068992,
      "loss": 2.7751,
      "step": 160014
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6255083084106445,
      "learning_rate": 0.00012788379069560165,
      "loss": 3.0144,
      "step": 160015
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.958684206008911,
      "learning_rate": 0.00012788044033251324,
      "loss": 2.8832,
      "step": 160016
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3621933460235596,
      "learning_rate": 0.00012787709000142491,
      "loss": 2.7005,
      "step": 160017
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.366370677947998,
      "learning_rate": 0.00012787373970233755,
      "loss": 2.9391,
      "step": 160018
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.350696325302124,
      "learning_rate": 0.00012787038943525163,
      "loss": 2.9448,
      "step": 160019
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.180832862854004,
      "learning_rate": 0.00012786703920016797,
      "loss": 3.1022,
      "step": 160020
    },
    {
      "epoch": 2.08,
      "grad_norm": 5.1262431144714355,
      "learning_rate": 0.00012786368899708696,
      "loss": 2.8877,
      "step": 160021
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.6021347045898438,
      "learning_rate": 0.00012786033882600958,
      "loss": 2.878,
      "step": 160022
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.5031912326812744,
      "learning_rate": 0.00012785698868693595,
      "loss": 2.7522,
      "step": 160023
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.0671074390411377,
      "learning_rate": 0.0001278536385798671,
      "loss": 2.6694,
      "step": 160024
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6590094566345215,
      "learning_rate": 0.0001278502885048034,
      "loss": 2.989,
      "step": 160025
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.659590244293213,
      "learning_rate": 0.0001278469384617457,
      "loss": 3.1261,
      "step": 160026
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6683032512664795,
      "learning_rate": 0.00012784358845069436,
      "loss": 3.0475,
      "step": 160027
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3618996143341064,
      "learning_rate": 0.00012784023847165035,
      "loss": 2.8623,
      "step": 160028
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.28127384185791,
      "learning_rate": 0.00012783688852461392,
      "loss": 3.0538,
      "step": 160029
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8255672454833984,
      "learning_rate": 0.00012783353860958596,
      "loss": 2.6594,
      "step": 160030
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3932347297668457,
      "learning_rate": 0.00012783018872656685,
      "loss": 2.901,
      "step": 160031
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2674953937530518,
      "learning_rate": 0.0001278268388755575,
      "loss": 3.0975,
      "step": 160032
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.2861006259918213,
      "learning_rate": 0.00012782348905655827,
      "loss": 2.6726,
      "step": 160033
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.318181037902832,
      "learning_rate": 0.0001278201392695701,
      "loss": 3.2342,
      "step": 160034
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2820568084716797,
      "learning_rate": 0.00012781678951459324,
      "loss": 2.9681,
      "step": 160035
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2521815299987793,
      "learning_rate": 0.00012781343979162857,
      "loss": 2.7872,
      "step": 160036
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.9002537727355957,
      "learning_rate": 0.00012781009010067655,
      "loss": 2.9406,
      "step": 160037
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4790430068969727,
      "learning_rate": 0.00012780674044173795,
      "loss": 2.7408,
      "step": 160038
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.242518186569214,
      "learning_rate": 0.00012780339081481327,
      "loss": 2.9868,
      "step": 160039
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.367234945297241,
      "learning_rate": 0.0001278000412199034,
      "loss": 3.0173,
      "step": 160040
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0043792724609375,
      "learning_rate": 0.0001277966916570085,
      "loss": 2.7679,
      "step": 160041
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.771557331085205,
      "learning_rate": 0.00012779334212612958,
      "loss": 3.0714,
      "step": 160042
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.1153066158294678,
      "learning_rate": 0.000127789992627267,
      "loss": 3.0635,
      "step": 160043
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.079988479614258,
      "learning_rate": 0.00012778664316042163,
      "loss": 2.8925,
      "step": 160044
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4583733081817627,
      "learning_rate": 0.00012778329372559386,
      "loss": 3.1211,
      "step": 160045
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.882920026779175,
      "learning_rate": 0.00012777994432278467,
      "loss": 2.6769,
      "step": 160046
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2332956790924072,
      "learning_rate": 0.0001277765949519942,
      "loss": 3.0729,
      "step": 160047
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.0341413021087646,
      "learning_rate": 0.00012777324561322342,
      "loss": 2.9365,
      "step": 160048
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.589766025543213,
      "learning_rate": 0.00012776989630647274,
      "loss": 2.8863,
      "step": 160049
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2252609729766846,
      "learning_rate": 0.000127766547031743,
      "loss": 3.2073,
      "step": 160050
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3521790504455566,
      "learning_rate": 0.0001277631977890346,
      "loss": 2.7795,
      "step": 160051
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.489931583404541,
      "learning_rate": 0.0001277598485783484,
      "loss": 3.106,
      "step": 160052
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.266711473464966,
      "learning_rate": 0.00012775649939968486,
      "loss": 2.8454,
      "step": 160053
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7055575847625732,
      "learning_rate": 0.00012775315025304466,
      "loss": 3.1164,
      "step": 160054
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7272794246673584,
      "learning_rate": 0.00012774980113842827,
      "loss": 2.8788,
      "step": 160055
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.9717414379119873,
      "learning_rate": 0.0001277464520558366,
      "loss": 2.9128,
      "step": 160056
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.165919303894043,
      "learning_rate": 0.00012774310300526994,
      "loss": 3.0252,
      "step": 160057
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.729499101638794,
      "learning_rate": 0.00012773975398672926,
      "loss": 3.1175,
      "step": 160058
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.6143906116485596,
      "learning_rate": 0.00012773640500021494,
      "loss": 2.9833,
      "step": 160059
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8441543579101562,
      "learning_rate": 0.00012773305604572776,
      "loss": 2.7534,
      "step": 160060
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.0824813842773438,
      "learning_rate": 0.00012772970712326807,
      "loss": 3.0022,
      "step": 160061
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7144293785095215,
      "learning_rate": 0.00012772635823283686,
      "loss": 2.8586,
      "step": 160062
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.551568031311035,
      "learning_rate": 0.00012772300937443439,
      "loss": 3.1316,
      "step": 160063
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.638385534286499,
      "learning_rate": 0.00012771966054806163,
      "loss": 2.898,
      "step": 160064
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.32381534576416,
      "learning_rate": 0.00012771631175371903,
      "loss": 2.9202,
      "step": 160065
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5978877544403076,
      "learning_rate": 0.0001277129629914071,
      "loss": 3.0271,
      "step": 160066
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.649813652038574,
      "learning_rate": 0.00012770961426112666,
      "loss": 3.2236,
      "step": 160067
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.8494954109191895,
      "learning_rate": 0.0001277062655628783,
      "loss": 2.9579,
      "step": 160068
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.371814250946045,
      "learning_rate": 0.00012770291689666245,
      "loss": 2.9931,
      "step": 160069
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.231976270675659,
      "learning_rate": 0.00012769956826248,
      "loss": 3.0602,
      "step": 160070
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7564549446105957,
      "learning_rate": 0.0001276962196603315,
      "loss": 2.9708,
      "step": 160071
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0705196857452393,
      "learning_rate": 0.0001276928710902174,
      "loss": 3.0365,
      "step": 160072
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.878136396408081,
      "learning_rate": 0.0001276895225521385,
      "loss": 2.8578,
      "step": 160073
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.706125020980835,
      "learning_rate": 0.00012768617404609542,
      "loss": 2.9382,
      "step": 160074
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3388187885284424,
      "learning_rate": 0.0001276828255720886,
      "loss": 3.3106,
      "step": 160075
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.0290305614471436,
      "learning_rate": 0.00012767947713011892,
      "loss": 3.2156,
      "step": 160076
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0463976860046387,
      "learning_rate": 0.00012767612872018687,
      "loss": 3.0237,
      "step": 160077
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.439129114151001,
      "learning_rate": 0.000127672780342293,
      "loss": 3.0636,
      "step": 160078
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.476227283477783,
      "learning_rate": 0.00012766943199643807,
      "loss": 2.8869,
      "step": 160079
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.770174026489258,
      "learning_rate": 0.0001276660836826226,
      "loss": 2.8233,
      "step": 160080
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.668447971343994,
      "learning_rate": 0.00012766273540084733,
      "loss": 3.0797,
      "step": 160081
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.753396987915039,
      "learning_rate": 0.00012765938715111278,
      "loss": 2.7819,
      "step": 160082
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6841325759887695,
      "learning_rate": 0.00012765603893341964,
      "loss": 3.1223,
      "step": 160083
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0994484424591064,
      "learning_rate": 0.00012765269074776838,
      "loss": 3.0159,
      "step": 160084
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.0796260833740234,
      "learning_rate": 0.00012764934259415985,
      "loss": 2.8501,
      "step": 160085
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.155769109725952,
      "learning_rate": 0.00012764599447259446,
      "loss": 3.1142,
      "step": 160086
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.381218433380127,
      "learning_rate": 0.00012764264638307304,
      "loss": 3.0809,
      "step": 160087
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2144007682800293,
      "learning_rate": 0.00012763929832559597,
      "loss": 2.934,
      "step": 160088
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7226476669311523,
      "learning_rate": 0.00012763595030016424,
      "loss": 2.9814,
      "step": 160089
    },
    {
      "epoch": 2.08,
      "grad_norm": 4.063601493835449,
      "learning_rate": 0.00012763260230677805,
      "loss": 2.7155,
      "step": 160090
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2992098331451416,
      "learning_rate": 0.0001276292543454383,
      "loss": 2.9723,
      "step": 160091
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3419439792633057,
      "learning_rate": 0.0001276259064161454,
      "loss": 2.7383,
      "step": 160092
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.0285110473632812,
      "learning_rate": 0.00012762255851890024,
      "loss": 3.0864,
      "step": 160093
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.0385541915893555,
      "learning_rate": 0.00012761921065370315,
      "loss": 3.0536,
      "step": 160094
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.4874765872955322,
      "learning_rate": 0.00012761586282055516,
      "loss": 2.983,
      "step": 160095
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.330435037612915,
      "learning_rate": 0.00012761251501945637,
      "loss": 3.039,
      "step": 160096
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5654494762420654,
      "learning_rate": 0.00012760916725040785,
      "loss": 3.0565,
      "step": 160097
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.1455376148223877,
      "learning_rate": 0.00012760581951340986,
      "loss": 2.9687,
      "step": 160098
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.784597396850586,
      "learning_rate": 0.00012760247180846338,
      "loss": 2.9096,
      "step": 160099
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.340193748474121,
      "learning_rate": 0.00012759912413556868,
      "loss": 2.7662,
      "step": 160100
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.099289655685425,
      "learning_rate": 0.00012759577649472682,
      "loss": 3.2782,
      "step": 160101
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7750277519226074,
      "learning_rate": 0.0001275924288859379,
      "loss": 2.8476,
      "step": 160102
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.814795732498169,
      "learning_rate": 0.00012758908130920292,
      "loss": 3.1229,
      "step": 160103
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.3570618629455566,
      "learning_rate": 0.0001275857337645223,
      "loss": 3.0665,
      "step": 160104
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4734609127044678,
      "learning_rate": 0.00012758238625189684,
      "loss": 2.9349,
      "step": 160105
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.9325993061065674,
      "learning_rate": 0.00012757903877132698,
      "loss": 2.963,
      "step": 160106
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.413426399230957,
      "learning_rate": 0.00012757569132281364,
      "loss": 2.9504,
      "step": 160107
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.6232032775878906,
      "learning_rate": 0.000127572343906357,
      "loss": 2.8651,
      "step": 160108
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.2235803604125977,
      "learning_rate": 0.00012756899652195805,
      "loss": 2.7822,
      "step": 160109
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.849992036819458,
      "learning_rate": 0.00012756564916961713,
      "loss": 2.9963,
      "step": 160110
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5313684940338135,
      "learning_rate": 0.00012756230184933514,
      "loss": 3.1,
      "step": 160111
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.711543083190918,
      "learning_rate": 0.00012755895456111247,
      "loss": 2.9539,
      "step": 160112
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.2104809284210205,
      "learning_rate": 0.00012755560730495007,
      "loss": 3.138,
      "step": 160113
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.61623477935791,
      "learning_rate": 0.0001275522600808481,
      "loss": 2.8202,
      "step": 160114
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.0768563747406006,
      "learning_rate": 0.00012754891288880758,
      "loss": 2.8936,
      "step": 160115
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.9282195568084717,
      "learning_rate": 0.0001275455657288288,
      "loss": 2.9287,
      "step": 160116
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.290029287338257,
      "learning_rate": 0.00012754221860091267,
      "loss": 2.907,
      "step": 160117
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.7572896480560303,
      "learning_rate": 0.00012753887150505964,
      "loss": 2.8088,
      "step": 160118
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.224553108215332,
      "learning_rate": 0.00012753552444127054,
      "loss": 3.0481,
      "step": 160119
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.865809917449951,
      "learning_rate": 0.00012753217740954566,
      "loss": 3.0356,
      "step": 160120
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.7226483821868896,
      "learning_rate": 0.00012752883040988592,
      "loss": 2.8351,
      "step": 160121
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8965744972229004,
      "learning_rate": 0.0001275254834422917,
      "loss": 3.0545,
      "step": 160122
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.3256428241729736,
      "learning_rate": 0.00012752213650676388,
      "loss": 2.9662,
      "step": 160123
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.583747625350952,
      "learning_rate": 0.0001275187896033028,
      "loss": 3.0273,
      "step": 160124
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.4836466312408447,
      "learning_rate": 0.0001275154427319095,
      "loss": 2.9321,
      "step": 160125
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.3955185413360596,
      "learning_rate": 0.00012751209589258408,
      "loss": 2.7164,
      "step": 160126
    },
    {
      "epoch": 2.08,
      "grad_norm": 3.1979873180389404,
      "learning_rate": 0.00012750874908532757,
      "loss": 3.1044,
      "step": 160127
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.8294241428375244,
      "learning_rate": 0.00012750540231014026,
      "loss": 3.0563,
      "step": 160128
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5754992961883545,
      "learning_rate": 0.0001275020555670231,
      "loss": 3.2102,
      "step": 160129
    },
    {
      "epoch": 2.08,
      "grad_norm": 2.5084121227264404,
      "learning_rate": 0.00012749870885597647,
      "loss": 2.8406,
      "step": 160130
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8042614459991455,
      "learning_rate": 0.0001274953621770013,
      "loss": 2.8399,
      "step": 160131
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.782241106033325,
      "learning_rate": 0.00012749201553009776,
      "loss": 2.7965,
      "step": 160132
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.724276065826416,
      "learning_rate": 0.00012748866891526683,
      "loss": 2.9357,
      "step": 160133
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.672583818435669,
      "learning_rate": 0.0001274853223325089,
      "loss": 3.0207,
      "step": 160134
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.727261781692505,
      "learning_rate": 0.00012748197578182482,
      "loss": 2.9706,
      "step": 160135
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5760915279388428,
      "learning_rate": 0.00012747862926321498,
      "loss": 2.9356,
      "step": 160136
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.1992599964141846,
      "learning_rate": 0.00012747528277668035,
      "loss": 2.9791,
      "step": 160137
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.718641996383667,
      "learning_rate": 0.0001274719363222211,
      "loss": 2.8859,
      "step": 160138
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.772306442260742,
      "learning_rate": 0.00012746858989983817,
      "loss": 3.123,
      "step": 160139
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.034507989883423,
      "learning_rate": 0.00012746524350953198,
      "loss": 2.7765,
      "step": 160140
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.204854726791382,
      "learning_rate": 0.0001274618971513034,
      "loss": 2.7012,
      "step": 160141
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1850597858428955,
      "learning_rate": 0.00012745855082515274,
      "loss": 3.2113,
      "step": 160142
    },
    {
      "epoch": 2.09,
      "grad_norm": 5.201166152954102,
      "learning_rate": 0.00012745520453108093,
      "loss": 2.8506,
      "step": 160143
    },
    {
      "epoch": 2.09,
      "grad_norm": 5.240382194519043,
      "learning_rate": 0.00012745185826908847,
      "loss": 2.916,
      "step": 160144
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.922769784927368,
      "learning_rate": 0.00012744851203917598,
      "loss": 2.9128,
      "step": 160145
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3394033908843994,
      "learning_rate": 0.00012744516584134392,
      "loss": 2.9489,
      "step": 160146
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.5150628089904785,
      "learning_rate": 0.00012744181967559322,
      "loss": 2.9665,
      "step": 160147
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.9734413623809814,
      "learning_rate": 0.00012743847354192416,
      "loss": 3.0153,
      "step": 160148
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.3145620822906494,
      "learning_rate": 0.00012743512744033773,
      "loss": 2.9342,
      "step": 160149
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.617500066757202,
      "learning_rate": 0.0001274317813708343,
      "loss": 3.0896,
      "step": 160150
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.438917875289917,
      "learning_rate": 0.0001274284353334145,
      "loss": 3.1406,
      "step": 160151
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7860360145568848,
      "learning_rate": 0.0001274250893280791,
      "loss": 2.8307,
      "step": 160152
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7846195697784424,
      "learning_rate": 0.00012742174335482863,
      "loss": 2.818,
      "step": 160153
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.442312002182007,
      "learning_rate": 0.00012741839741366363,
      "loss": 3.1917,
      "step": 160154
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.852311611175537,
      "learning_rate": 0.0001274150515045849,
      "loss": 3.1038,
      "step": 160155
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3119146823883057,
      "learning_rate": 0.00012741170562759297,
      "loss": 2.5992,
      "step": 160156
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9256343841552734,
      "learning_rate": 0.00012740835978268834,
      "loss": 2.8597,
      "step": 160157
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.2833735942840576,
      "learning_rate": 0.00012740501396987187,
      "loss": 2.8523,
      "step": 160158
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.591243028640747,
      "learning_rate": 0.00012740166818914407,
      "loss": 3.2161,
      "step": 160159
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5079123973846436,
      "learning_rate": 0.00012739832244050545,
      "loss": 3.0343,
      "step": 160160
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.9273979663848877,
      "learning_rate": 0.00012739497672395686,
      "loss": 2.8983,
      "step": 160161
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.459474563598633,
      "learning_rate": 0.0001273916310394988,
      "loss": 3.0552,
      "step": 160162
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.450843334197998,
      "learning_rate": 0.00012738828538713177,
      "loss": 3.0726,
      "step": 160163
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6435294151306152,
      "learning_rate": 0.00012738493976685664,
      "loss": 2.9421,
      "step": 160164
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.689467668533325,
      "learning_rate": 0.0001273815941786738,
      "loss": 3.0479,
      "step": 160165
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.548858404159546,
      "learning_rate": 0.00012737824862258407,
      "loss": 2.9851,
      "step": 160166
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2656657695770264,
      "learning_rate": 0.000127374903098588,
      "loss": 2.8803,
      "step": 160167
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.295285701751709,
      "learning_rate": 0.00012737155760668618,
      "loss": 2.7309,
      "step": 160168
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.466627836227417,
      "learning_rate": 0.00012736821214687913,
      "loss": 3.0181,
      "step": 160169
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.0908448696136475,
      "learning_rate": 0.00012736486671916772,
      "loss": 3.0763,
      "step": 160170
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.270620346069336,
      "learning_rate": 0.00012736152132355232,
      "loss": 2.9638,
      "step": 160171
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.451259136199951,
      "learning_rate": 0.00012735817596003378,
      "loss": 2.9238,
      "step": 160172
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.308824062347412,
      "learning_rate": 0.0001273548306286126,
      "loss": 3.1439,
      "step": 160173
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.639941692352295,
      "learning_rate": 0.00012735148532928945,
      "loss": 2.6318,
      "step": 160174
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.4302055835723877,
      "learning_rate": 0.00012734814006206478,
      "loss": 3.0769,
      "step": 160175
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3112635612487793,
      "learning_rate": 0.00012734479482693947,
      "loss": 3.266,
      "step": 160176
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.935762643814087,
      "learning_rate": 0.00012734144962391392,
      "loss": 2.8754,
      "step": 160177
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3806145191192627,
      "learning_rate": 0.00012733810445298893,
      "loss": 3.1914,
      "step": 160178
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6687028408050537,
      "learning_rate": 0.00012733475931416498,
      "loss": 2.9652,
      "step": 160179
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4050183296203613,
      "learning_rate": 0.00012733141420744293,
      "loss": 3.1282,
      "step": 160180
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2081425189971924,
      "learning_rate": 0.00012732806913282307,
      "loss": 3.1278,
      "step": 160181
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.929928779602051,
      "learning_rate": 0.00012732472409030627,
      "loss": 2.5808,
      "step": 160182
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4302914142608643,
      "learning_rate": 0.00012732137907989295,
      "loss": 2.9236,
      "step": 160183
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.386106014251709,
      "learning_rate": 0.00012731803410158393,
      "loss": 2.9089,
      "step": 160184
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1486523151397705,
      "learning_rate": 0.00012731468915537963,
      "loss": 3.0695,
      "step": 160185
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.223574161529541,
      "learning_rate": 0.00012731134424128103,
      "loss": 2.8986,
      "step": 160186
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.090836524963379,
      "learning_rate": 0.00012730799935928834,
      "loss": 2.6567,
      "step": 160187
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.328693389892578,
      "learning_rate": 0.0001273046545094024,
      "loss": 3.1332,
      "step": 160188
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7823030948638916,
      "learning_rate": 0.00012730130969162368,
      "loss": 2.8236,
      "step": 160189
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5661563873291016,
      "learning_rate": 0.00012729796490595306,
      "loss": 3.0545,
      "step": 160190
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5612423419952393,
      "learning_rate": 0.00012729462015239083,
      "loss": 2.7472,
      "step": 160191
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.362814426422119,
      "learning_rate": 0.00012729127543093806,
      "loss": 3.1105,
      "step": 160192
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.282867908477783,
      "learning_rate": 0.00012728793074159487,
      "loss": 2.9883,
      "step": 160193
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4086670875549316,
      "learning_rate": 0.00012728458608436222,
      "loss": 3.2537,
      "step": 160194
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.5235888957977295,
      "learning_rate": 0.0001272812414592405,
      "loss": 2.9028,
      "step": 160195
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5666656494140625,
      "learning_rate": 0.00012727789686623058,
      "loss": 2.9141,
      "step": 160196
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8217556476593018,
      "learning_rate": 0.0001272745523053329,
      "loss": 2.8616,
      "step": 160197
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.357948064804077,
      "learning_rate": 0.0001272712077765483,
      "loss": 2.9132,
      "step": 160198
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2605507373809814,
      "learning_rate": 0.00012726786327987703,
      "loss": 2.8735,
      "step": 160199
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3190174102783203,
      "learning_rate": 0.00012726451881532006,
      "loss": 2.7293,
      "step": 160200
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2272443771362305,
      "learning_rate": 0.00012726117438287773,
      "loss": 2.9318,
      "step": 160201
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.411912202835083,
      "learning_rate": 0.00012725782998255093,
      "loss": 2.8331,
      "step": 160202
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9555132389068604,
      "learning_rate": 0.00012725448561434007,
      "loss": 3.0408,
      "step": 160203
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.557001829147339,
      "learning_rate": 0.00012725114127824607,
      "loss": 3.0044,
      "step": 160204
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.413163661956787,
      "learning_rate": 0.00012724779697426911,
      "loss": 2.9629,
      "step": 160205
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.8274312019348145,
      "learning_rate": 0.0001272444527024102,
      "loss": 2.9034,
      "step": 160206
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.648098945617676,
      "learning_rate": 0.00012724110846266964,
      "loss": 2.6973,
      "step": 160207
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.625666856765747,
      "learning_rate": 0.0001272377642550484,
      "loss": 2.9537,
      "step": 160208
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.642735242843628,
      "learning_rate": 0.00012723442007954675,
      "loss": 2.7449,
      "step": 160209
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7759337425231934,
      "learning_rate": 0.0001272310759361656,
      "loss": 3.033,
      "step": 160210
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5995020866394043,
      "learning_rate": 0.00012722773182490546,
      "loss": 3.0065,
      "step": 160211
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.277055263519287,
      "learning_rate": 0.00012722438774576695,
      "loss": 3.0633,
      "step": 160212
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1265087127685547,
      "learning_rate": 0.0001272210436987506,
      "loss": 2.9236,
      "step": 160213
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3236846923828125,
      "learning_rate": 0.0001272176996838572,
      "loss": 2.9097,
      "step": 160214
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.333200216293335,
      "learning_rate": 0.0001272143557010872,
      "loss": 2.8809,
      "step": 160215
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.346088171005249,
      "learning_rate": 0.0001272110117504414,
      "loss": 2.8475,
      "step": 160216
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5003209114074707,
      "learning_rate": 0.00012720766783192036,
      "loss": 3.0293,
      "step": 160217
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2699012756347656,
      "learning_rate": 0.00012720432394552463,
      "loss": 3.145,
      "step": 160218
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.545684576034546,
      "learning_rate": 0.00012720098009125482,
      "loss": 2.8843,
      "step": 160219
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5558483600616455,
      "learning_rate": 0.00012719763626911168,
      "loss": 3.0067,
      "step": 160220
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7886669635772705,
      "learning_rate": 0.00012719429247909567,
      "loss": 3.0219,
      "step": 160221
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.575317621231079,
      "learning_rate": 0.00012719094872120764,
      "loss": 2.8236,
      "step": 160222
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.620561361312866,
      "learning_rate": 0.000127187604995448,
      "loss": 2.8244,
      "step": 160223
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6010332107543945,
      "learning_rate": 0.00012718426130181753,
      "loss": 2.8326,
      "step": 160224
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.184391021728516,
      "learning_rate": 0.00012718091764031664,
      "loss": 2.8852,
      "step": 160225
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.321580410003662,
      "learning_rate": 0.00012717757401094616,
      "loss": 2.9311,
      "step": 160226
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.993055820465088,
      "learning_rate": 0.0001271742304137065,
      "loss": 2.8932,
      "step": 160227
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.913048028945923,
      "learning_rate": 0.0001271708868485986,
      "loss": 2.8771,
      "step": 160228
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.31050443649292,
      "learning_rate": 0.00012716754331562285,
      "loss": 2.9186,
      "step": 160229
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.707052230834961,
      "learning_rate": 0.00012716419981477988,
      "loss": 2.9255,
      "step": 160230
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.68097186088562,
      "learning_rate": 0.0001271608563460703,
      "loss": 3.0192,
      "step": 160231
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.86395263671875,
      "learning_rate": 0.00012715751290949485,
      "loss": 2.9762,
      "step": 160232
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2617380619049072,
      "learning_rate": 0.000127154169505054,
      "loss": 2.7889,
      "step": 160233
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1729824542999268,
      "learning_rate": 0.00012715082613274856,
      "loss": 3.0345,
      "step": 160234
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.3239388465881348,
      "learning_rate": 0.00012714748279257903,
      "loss": 3.0523,
      "step": 160235
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.533981800079346,
      "learning_rate": 0.00012714413948454605,
      "loss": 2.8025,
      "step": 160236
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.953696250915527,
      "learning_rate": 0.00012714079620865012,
      "loss": 2.7482,
      "step": 160237
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8740429878234863,
      "learning_rate": 0.0001271374529648921,
      "loss": 3.1177,
      "step": 160238
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.643538475036621,
      "learning_rate": 0.00012713410975327238,
      "loss": 2.8315,
      "step": 160239
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.319024085998535,
      "learning_rate": 0.0001271307665737918,
      "loss": 2.9113,
      "step": 160240
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.360048294067383,
      "learning_rate": 0.00012712742342645085,
      "loss": 2.7929,
      "step": 160241
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.663566827774048,
      "learning_rate": 0.00012712408031125008,
      "loss": 2.9488,
      "step": 160242
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.119629144668579,
      "learning_rate": 0.00012712073722819035,
      "loss": 2.9687,
      "step": 160243
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.801323175430298,
      "learning_rate": 0.0001271173941772721,
      "loss": 2.9619,
      "step": 160244
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.055758237838745,
      "learning_rate": 0.00012711405115849587,
      "loss": 3.0658,
      "step": 160245
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.6325418949127197,
      "learning_rate": 0.00012711070817186254,
      "loss": 3.0299,
      "step": 160246
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.304945945739746,
      "learning_rate": 0.0001271073652173726,
      "loss": 3.0284,
      "step": 160247
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5484402179718018,
      "learning_rate": 0.00012710402229502654,
      "loss": 2.9378,
      "step": 160248
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.60483980178833,
      "learning_rate": 0.0001271006794048252,
      "loss": 3.104,
      "step": 160249
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.721717834472656,
      "learning_rate": 0.000127097336546769,
      "loss": 3.1145,
      "step": 160250
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7511820793151855,
      "learning_rate": 0.0001270939937208588,
      "loss": 2.995,
      "step": 160251
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.50028920173645,
      "learning_rate": 0.0001270906509270951,
      "loss": 3.2275,
      "step": 160252
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.3416452407836914,
      "learning_rate": 0.0001270873081654785,
      "loss": 2.8488,
      "step": 160253
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.225997447967529,
      "learning_rate": 0.00012708396543600951,
      "loss": 2.743,
      "step": 160254
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3673644065856934,
      "learning_rate": 0.000127080622738689,
      "loss": 3.1131,
      "step": 160255
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9197394847869873,
      "learning_rate": 0.00012707728007351735,
      "loss": 3.0556,
      "step": 160256
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7048110961914062,
      "learning_rate": 0.0001270739374404954,
      "loss": 2.8052,
      "step": 160257
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.202054500579834,
      "learning_rate": 0.00012707059483962367,
      "loss": 2.9284,
      "step": 160258
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3512823581695557,
      "learning_rate": 0.00012706725227090278,
      "loss": 2.8801,
      "step": 160259
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.499399185180664,
      "learning_rate": 0.00012706390973433326,
      "loss": 2.8219,
      "step": 160260
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8786494731903076,
      "learning_rate": 0.00012706056722991592,
      "loss": 3.0387,
      "step": 160261
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7085344791412354,
      "learning_rate": 0.00012705722475765118,
      "loss": 3.1516,
      "step": 160262
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.378282070159912,
      "learning_rate": 0.00012705388231753988,
      "loss": 2.819,
      "step": 160263
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.4869725704193115,
      "learning_rate": 0.0001270505399095824,
      "loss": 3.0486,
      "step": 160264
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.642449140548706,
      "learning_rate": 0.00012704719753377973,
      "loss": 3.1098,
      "step": 160265
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.664846897125244,
      "learning_rate": 0.000127043855190132,
      "loss": 2.7852,
      "step": 160266
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7159688472747803,
      "learning_rate": 0.00012704051287864022,
      "loss": 2.7346,
      "step": 160267
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.352604389190674,
      "learning_rate": 0.00012703717059930474,
      "loss": 3.0055,
      "step": 160268
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.889504909515381,
      "learning_rate": 0.00012703382835212648,
      "loss": 2.8762,
      "step": 160269
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.7662835121154785,
      "learning_rate": 0.00012703048613710573,
      "loss": 3.2375,
      "step": 160270
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.140455961227417,
      "learning_rate": 0.00012702714395424353,
      "loss": 2.8978,
      "step": 160271
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4263062477111816,
      "learning_rate": 0.00012702380180354,
      "loss": 2.6862,
      "step": 160272
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5637834072113037,
      "learning_rate": 0.00012702045968499616,
      "loss": 3.0311,
      "step": 160273
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7382757663726807,
      "learning_rate": 0.00012701711759861232,
      "loss": 2.9556,
      "step": 160274
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4072442054748535,
      "learning_rate": 0.0001270137755443894,
      "loss": 2.8652,
      "step": 160275
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.421531915664673,
      "learning_rate": 0.00012701043352232778,
      "loss": 2.818,
      "step": 160276
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6030099391937256,
      "learning_rate": 0.0001270070915324283,
      "loss": 3.0703,
      "step": 160277
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2905380725860596,
      "learning_rate": 0.00012700374957469145,
      "loss": 2.8877,
      "step": 160278
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4988577365875244,
      "learning_rate": 0.00012700040764911788,
      "loss": 2.7332,
      "step": 160279
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.417375087738037,
      "learning_rate": 0.0001269970657557081,
      "loss": 3.2237,
      "step": 160280
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6575820446014404,
      "learning_rate": 0.00012699372389446292,
      "loss": 2.7685,
      "step": 160281
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.608938217163086,
      "learning_rate": 0.00012699038206538275,
      "loss": 2.8053,
      "step": 160282
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2656242847442627,
      "learning_rate": 0.00012698704026846847,
      "loss": 2.9537,
      "step": 160283
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.4116663932800293,
      "learning_rate": 0.00012698369850372058,
      "loss": 2.8658,
      "step": 160284
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9101669788360596,
      "learning_rate": 0.00012698035677113966,
      "loss": 2.9247,
      "step": 160285
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.503300905227661,
      "learning_rate": 0.00012697701507072623,
      "loss": 2.8713,
      "step": 160286
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.154010772705078,
      "learning_rate": 0.0001269736734024812,
      "loss": 3.0083,
      "step": 160287
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.322422504425049,
      "learning_rate": 0.0001269703317664049,
      "loss": 2.8388,
      "step": 160288
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.107304811477661,
      "learning_rate": 0.00012696699016249817,
      "loss": 2.8633,
      "step": 160289
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.276283025741577,
      "learning_rate": 0.00012696364859076154,
      "loss": 2.8074,
      "step": 160290
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.348017930984497,
      "learning_rate": 0.00012696030705119566,
      "loss": 3.0159,
      "step": 160291
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8491015434265137,
      "learning_rate": 0.000126956965543801,
      "loss": 2.779,
      "step": 160292
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.337188720703125,
      "learning_rate": 0.00012695362406857843,
      "loss": 2.7299,
      "step": 160293
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2350149154663086,
      "learning_rate": 0.00012695028262552834,
      "loss": 2.8583,
      "step": 160294
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.1207385063171387,
      "learning_rate": 0.00012694694121465156,
      "loss": 3.0663,
      "step": 160295
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.029466152191162,
      "learning_rate": 0.0001269435998359486,
      "loss": 2.9086,
      "step": 160296
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2409284114837646,
      "learning_rate": 0.0001269402584894201,
      "loss": 2.8762,
      "step": 160297
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6287312507629395,
      "learning_rate": 0.00012693691717506657,
      "loss": 2.8052,
      "step": 160298
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.297454357147217,
      "learning_rate": 0.00012693357589288882,
      "loss": 3.0737,
      "step": 160299
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.326460361480713,
      "learning_rate": 0.00012693023464288732,
      "loss": 2.7051,
      "step": 160300
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.880685806274414,
      "learning_rate": 0.00012692689342506282,
      "loss": 2.9582,
      "step": 160301
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.2121994495391846,
      "learning_rate": 0.0001269235522394159,
      "loss": 2.7561,
      "step": 160302
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2627856731414795,
      "learning_rate": 0.00012692021108594715,
      "loss": 2.785,
      "step": 160303
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.55853009223938,
      "learning_rate": 0.00012691686996465712,
      "loss": 3.1778,
      "step": 160304
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.235264778137207,
      "learning_rate": 0.00012691352887554658,
      "loss": 3.0163,
      "step": 160305
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.5112366676330566,
      "learning_rate": 0.00012691018781861598,
      "loss": 2.6116,
      "step": 160306
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.857527732849121,
      "learning_rate": 0.00012690684679386615,
      "loss": 3.159,
      "step": 160307
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.3137733936309814,
      "learning_rate": 0.00012690350580129764,
      "loss": 3.045,
      "step": 160308
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8373732566833496,
      "learning_rate": 0.00012690016484091102,
      "loss": 2.9102,
      "step": 160309
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.8259687423706055,
      "learning_rate": 0.0001268968239127068,
      "loss": 2.8296,
      "step": 160310
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.719986915588379,
      "learning_rate": 0.00012689348301668583,
      "loss": 2.8872,
      "step": 160311
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.044656276702881,
      "learning_rate": 0.00012689014215284857,
      "loss": 3.0149,
      "step": 160312
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.364365339279175,
      "learning_rate": 0.00012688680132119575,
      "loss": 3.0197,
      "step": 160313
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.3866806030273438,
      "learning_rate": 0.00012688346052172798,
      "loss": 2.915,
      "step": 160314
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.630322456359863,
      "learning_rate": 0.00012688011975444584,
      "loss": 2.9533,
      "step": 160315
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.825777530670166,
      "learning_rate": 0.0001268767790193498,
      "loss": 3.0451,
      "step": 160316
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.504612445831299,
      "learning_rate": 0.0001268734383164408,
      "loss": 3.1066,
      "step": 160317
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4873621463775635,
      "learning_rate": 0.00012687009764571916,
      "loss": 2.986,
      "step": 160318
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2747554779052734,
      "learning_rate": 0.00012686675700718575,
      "loss": 3.0412,
      "step": 160319
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8026883602142334,
      "learning_rate": 0.0001268634164008411,
      "loss": 3.2381,
      "step": 160320
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.6061410903930664,
      "learning_rate": 0.00012686007582668577,
      "loss": 3.0459,
      "step": 160321
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.535175800323486,
      "learning_rate": 0.00012685673528472032,
      "loss": 2.7259,
      "step": 160322
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.246885061264038,
      "learning_rate": 0.0001268533947749456,
      "loss": 3.0014,
      "step": 160323
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.146787166595459,
      "learning_rate": 0.00012685005429736198,
      "loss": 2.9306,
      "step": 160324
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2861807346343994,
      "learning_rate": 0.0001268467138519703,
      "loss": 2.8947,
      "step": 160325
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.507674217224121,
      "learning_rate": 0.00012684337343877114,
      "loss": 3.0703,
      "step": 160326
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.941112756729126,
      "learning_rate": 0.0001268400330577649,
      "loss": 3.0787,
      "step": 160327
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8208069801330566,
      "learning_rate": 0.00012683669270895247,
      "loss": 2.8945,
      "step": 160328
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4116744995117188,
      "learning_rate": 0.0001268333523923344,
      "loss": 2.941,
      "step": 160329
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.987375259399414,
      "learning_rate": 0.0001268300121079112,
      "loss": 2.8284,
      "step": 160330
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.159085512161255,
      "learning_rate": 0.0001268266718556836,
      "loss": 2.7623,
      "step": 160331
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.899653196334839,
      "learning_rate": 0.00012682333163565224,
      "loss": 2.959,
      "step": 160332
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9480061531066895,
      "learning_rate": 0.00012681999144781755,
      "loss": 2.7549,
      "step": 160333
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4716358184814453,
      "learning_rate": 0.00012681665129218042,
      "loss": 2.6946,
      "step": 160334
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.0374701023101807,
      "learning_rate": 0.00012681331116874123,
      "loss": 2.8041,
      "step": 160335
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1827523708343506,
      "learning_rate": 0.00012680997107750085,
      "loss": 2.733,
      "step": 160336
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.327714204788208,
      "learning_rate": 0.00012680663101845974,
      "loss": 2.944,
      "step": 160337
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.1265976428985596,
      "learning_rate": 0.00012680329099161856,
      "loss": 2.7255,
      "step": 160338
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6878883838653564,
      "learning_rate": 0.0001267999509969778,
      "loss": 2.6888,
      "step": 160339
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.576134443283081,
      "learning_rate": 0.00012679661103453833,
      "loss": 2.8407,
      "step": 160340
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3916914463043213,
      "learning_rate": 0.00012679327110430048,
      "loss": 2.9569,
      "step": 160341
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.739020347595215,
      "learning_rate": 0.00012678993120626518,
      "loss": 2.8513,
      "step": 160342
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2708773612976074,
      "learning_rate": 0.00012678659134043288,
      "loss": 2.9922,
      "step": 160343
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6585659980773926,
      "learning_rate": 0.00012678325150680412,
      "loss": 2.985,
      "step": 160344
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.413940906524658,
      "learning_rate": 0.00012677991170537974,
      "loss": 3.1315,
      "step": 160345
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.91560435295105,
      "learning_rate": 0.00012677657193616026,
      "loss": 3.0257,
      "step": 160346
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.018022060394287,
      "learning_rate": 0.00012677323219914614,
      "loss": 2.9842,
      "step": 160347
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.676081418991089,
      "learning_rate": 0.00012676989249433827,
      "loss": 3.0347,
      "step": 160348
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.7889363765716553,
      "learning_rate": 0.00012676655282173704,
      "loss": 2.9346,
      "step": 160349
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3057076930999756,
      "learning_rate": 0.00012676321318134327,
      "loss": 3.1749,
      "step": 160350
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.0154452323913574,
      "learning_rate": 0.00012675987357315752,
      "loss": 2.9402,
      "step": 160351
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6393167972564697,
      "learning_rate": 0.00012675653399718035,
      "loss": 2.9803,
      "step": 160352
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6864264011383057,
      "learning_rate": 0.00012675319445341234,
      "loss": 3.117,
      "step": 160353
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1768240928649902,
      "learning_rate": 0.00012674985494185427,
      "loss": 2.9963,
      "step": 160354
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4331088066101074,
      "learning_rate": 0.00012674651546250654,
      "loss": 2.8213,
      "step": 160355
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.70753812789917,
      "learning_rate": 0.00012674317601537005,
      "loss": 3.1345,
      "step": 160356
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.885711431503296,
      "learning_rate": 0.00012673983660044526,
      "loss": 2.8853,
      "step": 160357
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7234301567077637,
      "learning_rate": 0.00012673649721773277,
      "loss": 2.9881,
      "step": 160358
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.402745008468628,
      "learning_rate": 0.0001267331578672332,
      "loss": 2.8786,
      "step": 160359
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5288915634155273,
      "learning_rate": 0.00012672981854894725,
      "loss": 2.6327,
      "step": 160360
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.160875082015991,
      "learning_rate": 0.00012672647926287542,
      "loss": 2.9098,
      "step": 160361
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1715564727783203,
      "learning_rate": 0.00012672314000901848,
      "loss": 3.0109,
      "step": 160362
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3090622425079346,
      "learning_rate": 0.00012671980078737702,
      "loss": 2.8696,
      "step": 160363
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.4318456649780273,
      "learning_rate": 0.00012671646159795162,
      "loss": 2.7622,
      "step": 160364
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3352792263031006,
      "learning_rate": 0.0001267131224407428,
      "loss": 2.9752,
      "step": 160365
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.201162576675415,
      "learning_rate": 0.00012670978331575134,
      "loss": 2.7758,
      "step": 160366
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7901647090911865,
      "learning_rate": 0.00012670644422297774,
      "loss": 3.0032,
      "step": 160367
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6519551277160645,
      "learning_rate": 0.00012670310516242278,
      "loss": 3.0143,
      "step": 160368
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.221456527709961,
      "learning_rate": 0.00012669976613408695,
      "loss": 2.8701,
      "step": 160369
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.388991355895996,
      "learning_rate": 0.00012669642713797097,
      "loss": 2.8788,
      "step": 160370
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.647653579711914,
      "learning_rate": 0.0001266930881740752,
      "loss": 2.7575,
      "step": 160371
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3642578125,
      "learning_rate": 0.00012668974924240063,
      "loss": 2.9594,
      "step": 160372
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.557461738586426,
      "learning_rate": 0.00012668641034294762,
      "loss": 3.0559,
      "step": 160373
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.782090663909912,
      "learning_rate": 0.0001266830714757169,
      "loss": 3.086,
      "step": 160374
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3034377098083496,
      "learning_rate": 0.00012667973264070915,
      "loss": 3.0104,
      "step": 160375
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.5041093826293945,
      "learning_rate": 0.00012667639383792488,
      "loss": 2.9762,
      "step": 160376
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2339725494384766,
      "learning_rate": 0.00012667305506736465,
      "loss": 2.977,
      "step": 160377
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4188036918640137,
      "learning_rate": 0.00012666971632902926,
      "loss": 3.0422,
      "step": 160378
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.925574779510498,
      "learning_rate": 0.00012666637762291912,
      "loss": 2.9669,
      "step": 160379
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.220472812652588,
      "learning_rate": 0.00012666303894903508,
      "loss": 3.0371,
      "step": 160380
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.744558811187744,
      "learning_rate": 0.00012665970030737768,
      "loss": 2.88,
      "step": 160381
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6063756942749023,
      "learning_rate": 0.0001266563616979475,
      "loss": 3.1798,
      "step": 160382
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5950539112091064,
      "learning_rate": 0.00012665302312074507,
      "loss": 2.9407,
      "step": 160383
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.397172451019287,
      "learning_rate": 0.00012664968457577123,
      "loss": 3.0951,
      "step": 160384
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5759806632995605,
      "learning_rate": 0.00012664634606302636,
      "loss": 2.9582,
      "step": 160385
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.199772834777832,
      "learning_rate": 0.00012664300758251135,
      "loss": 2.8564,
      "step": 160386
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.559553861618042,
      "learning_rate": 0.00012663966913422663,
      "loss": 2.9324,
      "step": 160387
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9073774814605713,
      "learning_rate": 0.00012663633071817288,
      "loss": 2.9545,
      "step": 160388
    },
    {
      "epoch": 2.09,
      "grad_norm": 1.9988898038864136,
      "learning_rate": 0.0001266329923343506,
      "loss": 2.9537,
      "step": 160389
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.334662437438965,
      "learning_rate": 0.00012662965398276064,
      "loss": 2.9563,
      "step": 160390
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9332499504089355,
      "learning_rate": 0.0001266263156634034,
      "loss": 2.8948,
      "step": 160391
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.1711552143096924,
      "learning_rate": 0.0001266229773762797,
      "loss": 2.7482,
      "step": 160392
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.463279962539673,
      "learning_rate": 0.00012661963912139005,
      "loss": 3.1052,
      "step": 160393
    },
    {
      "epoch": 2.09,
      "grad_norm": 1.9927978515625,
      "learning_rate": 0.0001266163008987351,
      "loss": 2.9732,
      "step": 160394
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.853532075881958,
      "learning_rate": 0.00012661296270831532,
      "loss": 2.9051,
      "step": 160395
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4851722717285156,
      "learning_rate": 0.00012660962455013158,
      "loss": 2.9794,
      "step": 160396
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.717775344848633,
      "learning_rate": 0.00012660628642418428,
      "loss": 2.948,
      "step": 160397
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.635694980621338,
      "learning_rate": 0.00012660294833047425,
      "loss": 2.9606,
      "step": 160398
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.211381673812866,
      "learning_rate": 0.00012659961026900203,
      "loss": 2.9754,
      "step": 160399
    },
    {
      "epoch": 2.09,
      "grad_norm": 1.9742141962051392,
      "learning_rate": 0.00012659627223976817,
      "loss": 2.8141,
      "step": 160400
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.65217661857605,
      "learning_rate": 0.00012659293424277325,
      "loss": 2.907,
      "step": 160401
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.552813768386841,
      "learning_rate": 0.0001265895962780181,
      "loss": 2.9896,
      "step": 160402
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5540482997894287,
      "learning_rate": 0.0001265862583455031,
      "loss": 3.0493,
      "step": 160403
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.244379758834839,
      "learning_rate": 0.0001265829204452291,
      "loss": 2.8871,
      "step": 160404
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.0861926078796387,
      "learning_rate": 0.0001265795825771966,
      "loss": 2.8938,
      "step": 160405
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3285439014434814,
      "learning_rate": 0.00012657624474140622,
      "loss": 3.03,
      "step": 160406
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.702558994293213,
      "learning_rate": 0.0001265729069378585,
      "loss": 2.7514,
      "step": 160407
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9734692573547363,
      "learning_rate": 0.00012656956916655426,
      "loss": 3.0071,
      "step": 160408
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.648487091064453,
      "learning_rate": 0.0001265662314274939,
      "loss": 2.8892,
      "step": 160409
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.459705114364624,
      "learning_rate": 0.00012656289372067826,
      "loss": 2.6631,
      "step": 160410
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.0030269622802734,
      "learning_rate": 0.00012655955604610772,
      "loss": 2.9628,
      "step": 160411
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5566418170928955,
      "learning_rate": 0.00012655621840378315,
      "loss": 3.0902,
      "step": 160412
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.708648443222046,
      "learning_rate": 0.00012655288079370505,
      "loss": 2.9407,
      "step": 160413
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3244714736938477,
      "learning_rate": 0.00012654954321587405,
      "loss": 2.7933,
      "step": 160414
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6882238388061523,
      "learning_rate": 0.00012654620567029067,
      "loss": 2.8916,
      "step": 160415
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.857236385345459,
      "learning_rate": 0.00012654286815695572,
      "loss": 2.9887,
      "step": 160416
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.379370927810669,
      "learning_rate": 0.00012653953067586962,
      "loss": 2.9264,
      "step": 160417
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.125577926635742,
      "learning_rate": 0.00012653619322703323,
      "loss": 3.2751,
      "step": 160418
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.467705726623535,
      "learning_rate": 0.000126532855810447,
      "loss": 2.9352,
      "step": 160419
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.377676010131836,
      "learning_rate": 0.00012652951842611156,
      "loss": 2.809,
      "step": 160420
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.473301410675049,
      "learning_rate": 0.00012652618107402753,
      "loss": 2.9611,
      "step": 160421
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5077497959136963,
      "learning_rate": 0.00012652284375419561,
      "loss": 2.7367,
      "step": 160422
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.9165711402893066,
      "learning_rate": 0.00012651950646661624,
      "loss": 3.0163,
      "step": 160423
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3989603519439697,
      "learning_rate": 0.00012651616921129032,
      "loss": 2.8242,
      "step": 160424
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9329047203063965,
      "learning_rate": 0.00012651283198821832,
      "loss": 2.9189,
      "step": 160425
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.750448226928711,
      "learning_rate": 0.00012650949479740072,
      "loss": 3.133,
      "step": 160426
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8119959831237793,
      "learning_rate": 0.0001265061576388384,
      "loss": 2.9479,
      "step": 160427
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3066582679748535,
      "learning_rate": 0.00012650282051253188,
      "loss": 3.188,
      "step": 160428
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.771651268005371,
      "learning_rate": 0.00012649948341848162,
      "loss": 2.8569,
      "step": 160429
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1910300254821777,
      "learning_rate": 0.00012649614635668852,
      "loss": 2.9372,
      "step": 160430
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.181162118911743,
      "learning_rate": 0.00012649280932715306,
      "loss": 3.126,
      "step": 160431
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.413248062133789,
      "learning_rate": 0.0001264894723298757,
      "loss": 2.975,
      "step": 160432
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.543856143951416,
      "learning_rate": 0.0001264861353648574,
      "loss": 2.7998,
      "step": 160433
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.1360504627227783,
      "learning_rate": 0.00012648279843209846,
      "loss": 2.9889,
      "step": 160434
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5252573490142822,
      "learning_rate": 0.00012647946153159978,
      "loss": 2.9581,
      "step": 160435
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.328765869140625,
      "learning_rate": 0.00012647612466336181,
      "loss": 2.8505,
      "step": 160436
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7565155029296875,
      "learning_rate": 0.00012647278782738523,
      "loss": 3.006,
      "step": 160437
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1820876598358154,
      "learning_rate": 0.0001264694510236705,
      "loss": 2.7467,
      "step": 160438
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3642401695251465,
      "learning_rate": 0.00012646611425221846,
      "loss": 2.9555,
      "step": 160439
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3572750091552734,
      "learning_rate": 0.00012646277751302958,
      "loss": 3.0513,
      "step": 160440
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.65775203704834,
      "learning_rate": 0.00012645944080610466,
      "loss": 2.7948,
      "step": 160441
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5563082695007324,
      "learning_rate": 0.0001264561041314442,
      "loss": 3.0132,
      "step": 160442
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.208171844482422,
      "learning_rate": 0.0001264527674890488,
      "loss": 2.7169,
      "step": 160443
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.614875078201294,
      "learning_rate": 0.00012644943087891902,
      "loss": 3.1259,
      "step": 160444
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6354541778564453,
      "learning_rate": 0.00012644609430105568,
      "loss": 2.9563,
      "step": 160445
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.070868730545044,
      "learning_rate": 0.00012644275775545918,
      "loss": 2.944,
      "step": 160446
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.237361192703247,
      "learning_rate": 0.00012643942124213032,
      "loss": 3.0584,
      "step": 160447
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.629420757293701,
      "learning_rate": 0.0001264360847610697,
      "loss": 2.9036,
      "step": 160448
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.295710802078247,
      "learning_rate": 0.00012643274831227784,
      "loss": 2.8846,
      "step": 160449
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.276029586791992,
      "learning_rate": 0.00012642941189575536,
      "loss": 2.8003,
      "step": 160450
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.2762060165405273,
      "learning_rate": 0.000126426075511503,
      "loss": 2.9479,
      "step": 160451
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.588289260864258,
      "learning_rate": 0.00012642273915952116,
      "loss": 2.946,
      "step": 160452
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2030341625213623,
      "learning_rate": 0.00012641940283981075,
      "loss": 2.7848,
      "step": 160453
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.0566937923431396,
      "learning_rate": 0.00012641606655237227,
      "loss": 2.9269,
      "step": 160454
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1671581268310547,
      "learning_rate": 0.00012641273029720632,
      "loss": 2.8412,
      "step": 160455
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4025983810424805,
      "learning_rate": 0.0001264093940743134,
      "loss": 3.1932,
      "step": 160456
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4516899585723877,
      "learning_rate": 0.00012640605788369433,
      "loss": 2.9208,
      "step": 160457
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1178438663482666,
      "learning_rate": 0.00012640272172534958,
      "loss": 3.0048,
      "step": 160458
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.476609945297241,
      "learning_rate": 0.00012639938559927996,
      "loss": 2.6229,
      "step": 160459
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.425443410873413,
      "learning_rate": 0.00012639604950548595,
      "loss": 3.0683,
      "step": 160460
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.827620506286621,
      "learning_rate": 0.00012639271344396817,
      "loss": 2.8946,
      "step": 160461
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3187849521636963,
      "learning_rate": 0.00012638937741472718,
      "loss": 3.0981,
      "step": 160462
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4909322261810303,
      "learning_rate": 0.00012638604141776378,
      "loss": 2.5879,
      "step": 160463
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7862625122070312,
      "learning_rate": 0.00012638270545307838,
      "loss": 2.9192,
      "step": 160464
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5113847255706787,
      "learning_rate": 0.00012637936952067185,
      "loss": 2.6468,
      "step": 160465
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5014638900756836,
      "learning_rate": 0.00012637603362054465,
      "loss": 2.8653,
      "step": 160466
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7469136714935303,
      "learning_rate": 0.00012637269775269742,
      "loss": 2.9539,
      "step": 160467
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.278954029083252,
      "learning_rate": 0.0001263693619171307,
      "loss": 2.8788,
      "step": 160468
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.0271759033203125,
      "learning_rate": 0.00012636602611384526,
      "loss": 2.9622,
      "step": 160469
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.604572057723999,
      "learning_rate": 0.00012636269034284157,
      "loss": 2.938,
      "step": 160470
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.606293201446533,
      "learning_rate": 0.00012635935460412041,
      "loss": 3.1079,
      "step": 160471
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.962796211242676,
      "learning_rate": 0.00012635601889768238,
      "loss": 2.9154,
      "step": 160472
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5257344245910645,
      "learning_rate": 0.000126352683223528,
      "loss": 2.9054,
      "step": 160473
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5757925510406494,
      "learning_rate": 0.00012634934758165784,
      "loss": 3.037,
      "step": 160474
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2125132083892822,
      "learning_rate": 0.00012634601197207275,
      "loss": 3.0891,
      "step": 160475
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.7709004878997803,
      "learning_rate": 0.00012634267639477308,
      "loss": 3.0245,
      "step": 160476
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.616657257080078,
      "learning_rate": 0.00012633934084975968,
      "loss": 2.9922,
      "step": 160477
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.1897430419921875,
      "learning_rate": 0.00012633600533703298,
      "loss": 3.1538,
      "step": 160478
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.221304416656494,
      "learning_rate": 0.0001263326698565939,
      "loss": 2.8692,
      "step": 160479
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.075463771820068,
      "learning_rate": 0.00012632933440844267,
      "loss": 3.0359,
      "step": 160480
    },
    {
      "epoch": 2.09,
      "grad_norm": 6.799204349517822,
      "learning_rate": 0.00012632599899258017,
      "loss": 3.1353,
      "step": 160481
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.5629947185516357,
      "learning_rate": 0.00012632266360900683,
      "loss": 3.0388,
      "step": 160482
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.089254379272461,
      "learning_rate": 0.00012631932825772353,
      "loss": 2.939,
      "step": 160483
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4847357273101807,
      "learning_rate": 0.0001263159929387306,
      "loss": 2.7382,
      "step": 160484
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9601495265960693,
      "learning_rate": 0.00012631265765202906,
      "loss": 3.08,
      "step": 160485
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.571491003036499,
      "learning_rate": 0.00012630932239761903,
      "loss": 2.8547,
      "step": 160486
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3822968006134033,
      "learning_rate": 0.00012630598717550148,
      "loss": 3.0147,
      "step": 160487
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.777717113494873,
      "learning_rate": 0.00012630265198567683,
      "loss": 3.0039,
      "step": 160488
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.19569993019104,
      "learning_rate": 0.0001262993168281459,
      "loss": 3.0048,
      "step": 160489
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.417041540145874,
      "learning_rate": 0.00012629598170290913,
      "loss": 2.9838,
      "step": 160490
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2838611602783203,
      "learning_rate": 0.00012629264660996738,
      "loss": 3.0514,
      "step": 160491
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.104238510131836,
      "learning_rate": 0.00012628931154932092,
      "loss": 3.1197,
      "step": 160492
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4541704654693604,
      "learning_rate": 0.00012628597652097065,
      "loss": 2.9478,
      "step": 160493
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.934191942214966,
      "learning_rate": 0.000126282641524917,
      "loss": 3.0336,
      "step": 160494
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4820756912231445,
      "learning_rate": 0.00012627930656116082,
      "loss": 2.8507,
      "step": 160495
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2617547512054443,
      "learning_rate": 0.00012627597162970244,
      "loss": 3.1793,
      "step": 160496
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.563049554824829,
      "learning_rate": 0.00012627263673054278,
      "loss": 2.991,
      "step": 160497
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.1318416595458984,
      "learning_rate": 0.00012626930186368228,
      "loss": 3.0856,
      "step": 160498
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.1839444637298584,
      "learning_rate": 0.00012626596702912163,
      "loss": 2.7775,
      "step": 160499
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.948288679122925,
      "learning_rate": 0.00012626263222686128,
      "loss": 2.782,
      "step": 160500
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3042352199554443,
      "learning_rate": 0.0001262592974569021,
      "loss": 2.7789,
      "step": 160501
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.847637414932251,
      "learning_rate": 0.00012625596271924452,
      "loss": 2.8004,
      "step": 160502
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7867586612701416,
      "learning_rate": 0.0001262526280138893,
      "loss": 3.0118,
      "step": 160503
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.110783576965332,
      "learning_rate": 0.00012624929334083702,
      "loss": 3.1095,
      "step": 160504
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9152963161468506,
      "learning_rate": 0.0001262459587000883,
      "loss": 3.0943,
      "step": 160505
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.2237136363983154,
      "learning_rate": 0.0001262426240916436,
      "loss": 2.8422,
      "step": 160506
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.672381639480591,
      "learning_rate": 0.00012623928951550377,
      "loss": 2.9733,
      "step": 160507
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.130671262741089,
      "learning_rate": 0.00012623595497166925,
      "loss": 2.8793,
      "step": 160508
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8278684616088867,
      "learning_rate": 0.00012623262046014086,
      "loss": 2.9548,
      "step": 160509
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2593939304351807,
      "learning_rate": 0.0001262292859809191,
      "loss": 2.8526,
      "step": 160510
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.581313133239746,
      "learning_rate": 0.00012622595153400447,
      "loss": 2.5652,
      "step": 160511
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.502286911010742,
      "learning_rate": 0.00012622261711939782,
      "loss": 2.8683,
      "step": 160512
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8689918518066406,
      "learning_rate": 0.00012621928273709966,
      "loss": 2.9858,
      "step": 160513
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1499581336975098,
      "learning_rate": 0.00012621594838711057,
      "loss": 2.8287,
      "step": 160514
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2063779830932617,
      "learning_rate": 0.00012621261406943127,
      "loss": 2.9568,
      "step": 160515
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.404768228530884,
      "learning_rate": 0.00012620927978406233,
      "loss": 3.0282,
      "step": 160516
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.960951805114746,
      "learning_rate": 0.00012620594553100427,
      "loss": 3.0612,
      "step": 160517
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.219212532043457,
      "learning_rate": 0.0001262026113102579,
      "loss": 2.9127,
      "step": 160518
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.966050624847412,
      "learning_rate": 0.00012619927712182377,
      "loss": 2.8904,
      "step": 160519
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.698504686355591,
      "learning_rate": 0.00012619594296570233,
      "loss": 2.9467,
      "step": 160520
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4092495441436768,
      "learning_rate": 0.0001261926088418945,
      "loss": 2.9054,
      "step": 160521
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4482827186584473,
      "learning_rate": 0.0001261892747504007,
      "loss": 3.2163,
      "step": 160522
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3837504386901855,
      "learning_rate": 0.0001261859406912215,
      "loss": 2.8614,
      "step": 160523
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9002115726470947,
      "learning_rate": 0.00012618260666435774,
      "loss": 2.8332,
      "step": 160524
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7787816524505615,
      "learning_rate": 0.00012617927266980977,
      "loss": 2.8675,
      "step": 160525
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5954525470733643,
      "learning_rate": 0.0001261759387075785,
      "loss": 2.8361,
      "step": 160526
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9828903675079346,
      "learning_rate": 0.00012617260477766441,
      "loss": 2.8573,
      "step": 160527
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4813318252563477,
      "learning_rate": 0.00012616927088006808,
      "loss": 2.9048,
      "step": 160528
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7099034786224365,
      "learning_rate": 0.00012616593701479008,
      "loss": 2.8392,
      "step": 160529
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3353240489959717,
      "learning_rate": 0.0001261626031818312,
      "loss": 2.8613,
      "step": 160530
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2642323970794678,
      "learning_rate": 0.0001261592693811919,
      "loss": 2.9273,
      "step": 160531
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7209479808807373,
      "learning_rate": 0.00012615593561287293,
      "loss": 3.0719,
      "step": 160532
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7318027019500732,
      "learning_rate": 0.0001261526018768749,
      "loss": 2.9196,
      "step": 160533
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2977471351623535,
      "learning_rate": 0.00012614926817319837,
      "loss": 3.1346,
      "step": 160534
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.643949508666992,
      "learning_rate": 0.00012614593450184386,
      "loss": 2.8036,
      "step": 160535
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1942641735076904,
      "learning_rate": 0.0001261426008628122,
      "loss": 2.9265,
      "step": 160536
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2121951580047607,
      "learning_rate": 0.00012613926725610382,
      "loss": 2.7363,
      "step": 160537
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.0464916229248047,
      "learning_rate": 0.00012613593368171952,
      "loss": 2.8677,
      "step": 160538
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.224262714385986,
      "learning_rate": 0.00012613260013965986,
      "loss": 3.0033,
      "step": 160539
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.403640031814575,
      "learning_rate": 0.0001261292666299254,
      "loss": 2.8405,
      "step": 160540
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.426187038421631,
      "learning_rate": 0.0001261259331525167,
      "loss": 3.1173,
      "step": 160541
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.222220420837402,
      "learning_rate": 0.00012612259970743461,
      "loss": 2.9035,
      "step": 160542
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6876633167266846,
      "learning_rate": 0.00012611926629467945,
      "loss": 2.9788,
      "step": 160543
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3285787105560303,
      "learning_rate": 0.00012611593291425214,
      "loss": 2.9524,
      "step": 160544
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.017270803451538,
      "learning_rate": 0.00012611259956615305,
      "loss": 2.8792,
      "step": 160545
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4154038429260254,
      "learning_rate": 0.00012610926625038313,
      "loss": 3.0711,
      "step": 160546
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8407833576202393,
      "learning_rate": 0.00012610593296694255,
      "loss": 3.1514,
      "step": 160547
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4137277603149414,
      "learning_rate": 0.00012610259971583226,
      "loss": 2.9725,
      "step": 160548
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2437541484832764,
      "learning_rate": 0.0001260992664970527,
      "loss": 2.8648,
      "step": 160549
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3612217903137207,
      "learning_rate": 0.00012609593331060467,
      "loss": 3.2267,
      "step": 160550
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.571176052093506,
      "learning_rate": 0.00012609260015648854,
      "loss": 2.8735,
      "step": 160551
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5584583282470703,
      "learning_rate": 0.00012608926703470532,
      "loss": 3.1728,
      "step": 160552
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.122568368911743,
      "learning_rate": 0.00012608593394525518,
      "loss": 2.95,
      "step": 160553
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3598594665527344,
      "learning_rate": 0.0001260826008881391,
      "loss": 2.9168,
      "step": 160554
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5502028465270996,
      "learning_rate": 0.0001260792678633574,
      "loss": 3.0316,
      "step": 160555
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.252819776535034,
      "learning_rate": 0.0001260759348709109,
      "loss": 3.0774,
      "step": 160556
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.358124256134033,
      "learning_rate": 0.00012607260191080011,
      "loss": 3.1347,
      "step": 160557
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3094751834869385,
      "learning_rate": 0.00012606926898302593,
      "loss": 3.0243,
      "step": 160558
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9576122760772705,
      "learning_rate": 0.00012606593608758854,
      "loss": 2.9582,
      "step": 160559
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.1404969692230225,
      "learning_rate": 0.00012606260322448886,
      "loss": 2.9653,
      "step": 160560
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3456921577453613,
      "learning_rate": 0.00012605927039372733,
      "loss": 2.9743,
      "step": 160561
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2916083335876465,
      "learning_rate": 0.00012605593759530478,
      "loss": 2.8257,
      "step": 160562
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.256697177886963,
      "learning_rate": 0.00012605260482922162,
      "loss": 3.2249,
      "step": 160563
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5168404579162598,
      "learning_rate": 0.00012604927209547873,
      "loss": 2.6934,
      "step": 160564
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.615370273590088,
      "learning_rate": 0.0001260459393940764,
      "loss": 3.0476,
      "step": 160565
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.0023674964904785,
      "learning_rate": 0.0001260426067250155,
      "loss": 2.8322,
      "step": 160566
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.040736436843872,
      "learning_rate": 0.00012603927408829646,
      "loss": 2.9577,
      "step": 160567
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5983660221099854,
      "learning_rate": 0.00012603594148392008,
      "loss": 3.0173,
      "step": 160568
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7642452716827393,
      "learning_rate": 0.00012603260891188683,
      "loss": 3.1975,
      "step": 160569
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8608510494232178,
      "learning_rate": 0.0001260292763721976,
      "loss": 2.8839,
      "step": 160570
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8523848056793213,
      "learning_rate": 0.0001260259438648526,
      "loss": 2.7019,
      "step": 160571
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.937633752822876,
      "learning_rate": 0.0001260226113898528,
      "loss": 2.8929,
      "step": 160572
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.818840503692627,
      "learning_rate": 0.00012601927894719854,
      "loss": 2.8718,
      "step": 160573
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.263932943344116,
      "learning_rate": 0.0001260159465368907,
      "loss": 3.0909,
      "step": 160574
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9461264610290527,
      "learning_rate": 0.00012601261415892966,
      "loss": 2.86,
      "step": 160575
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.729468822479248,
      "learning_rate": 0.00012600928181331637,
      "loss": 2.9399,
      "step": 160576
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.4329657554626465,
      "learning_rate": 0.00012600594950005106,
      "loss": 2.6627,
      "step": 160577
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.958645343780518,
      "learning_rate": 0.00012600261721913458,
      "loss": 2.8396,
      "step": 160578
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.57806658744812,
      "learning_rate": 0.00012599928497056745,
      "loss": 2.8881,
      "step": 160579
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.61844801902771,
      "learning_rate": 0.00012599595275435038,
      "loss": 3.058,
      "step": 160580
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.254617214202881,
      "learning_rate": 0.00012599262057048387,
      "loss": 2.932,
      "step": 160581
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.810253858566284,
      "learning_rate": 0.00012598928841896885,
      "loss": 3.0214,
      "step": 160582
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.945133686065674,
      "learning_rate": 0.0001259859562998055,
      "loss": 2.8623,
      "step": 160583
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.603766918182373,
      "learning_rate": 0.0001259826242129947,
      "loss": 3.0338,
      "step": 160584
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4405922889709473,
      "learning_rate": 0.00012597929215853697,
      "loss": 2.9027,
      "step": 160585
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5707600116729736,
      "learning_rate": 0.00012597596013643302,
      "loss": 3.4045,
      "step": 160586
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.8400650024414062,
      "learning_rate": 0.00012597262814668334,
      "loss": 3.1651,
      "step": 160587
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.140466213226318,
      "learning_rate": 0.00012596929618928875,
      "loss": 2.88,
      "step": 160588
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5172908306121826,
      "learning_rate": 0.00012596596426424976,
      "loss": 2.9601,
      "step": 160589
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2705445289611816,
      "learning_rate": 0.00012596263237156697,
      "loss": 2.9484,
      "step": 160590
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5801987648010254,
      "learning_rate": 0.0001259593005112409,
      "loss": 3.0195,
      "step": 160591
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.904745101928711,
      "learning_rate": 0.0001259559686832724,
      "loss": 2.8886,
      "step": 160592
    },
    {
      "epoch": 2.09,
      "grad_norm": 5.601466178894043,
      "learning_rate": 0.00012595263688766186,
      "loss": 2.98,
      "step": 160593
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.327706813812256,
      "learning_rate": 0.0001259493051244101,
      "loss": 2.8689,
      "step": 160594
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.955099105834961,
      "learning_rate": 0.0001259459733935177,
      "loss": 2.9935,
      "step": 160595
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.120670795440674,
      "learning_rate": 0.00012594264169498508,
      "loss": 2.9074,
      "step": 160596
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.6539106369018555,
      "learning_rate": 0.00012593931002881313,
      "loss": 2.9199,
      "step": 160597
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.026618003845215,
      "learning_rate": 0.0001259359783950023,
      "loss": 2.9741,
      "step": 160598
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.411626815795898,
      "learning_rate": 0.0001259326467935532,
      "loss": 2.8493,
      "step": 160599
    },
    {
      "epoch": 2.09,
      "grad_norm": 7.230110168457031,
      "learning_rate": 0.00012592931522446664,
      "loss": 2.5628,
      "step": 160600
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.766956329345703,
      "learning_rate": 0.00012592598368774304,
      "loss": 2.9291,
      "step": 160601
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.630511522293091,
      "learning_rate": 0.00012592265218338303,
      "loss": 2.9916,
      "step": 160602
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8065097332000732,
      "learning_rate": 0.00012591932071138735,
      "loss": 3.0479,
      "step": 160603
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.238091945648193,
      "learning_rate": 0.0001259159892717566,
      "loss": 3.0618,
      "step": 160604
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1654977798461914,
      "learning_rate": 0.0001259126578644912,
      "loss": 3.1019,
      "step": 160605
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9117443561553955,
      "learning_rate": 0.00012590932648959207,
      "loss": 3.1255,
      "step": 160606
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.294848680496216,
      "learning_rate": 0.00012590599514705967,
      "loss": 2.618,
      "step": 160607
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.238770008087158,
      "learning_rate": 0.0001259026638368945,
      "loss": 2.8899,
      "step": 160608
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.5580480098724365,
      "learning_rate": 0.00012589933255909743,
      "loss": 2.7797,
      "step": 160609
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.1695849895477295,
      "learning_rate": 0.0001258960013136689,
      "loss": 3.017,
      "step": 160610
    },
    {
      "epoch": 2.09,
      "grad_norm": 1.8645495176315308,
      "learning_rate": 0.00012589267010060964,
      "loss": 2.7298,
      "step": 160611
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4768471717834473,
      "learning_rate": 0.00012588933891992025,
      "loss": 2.8755,
      "step": 160612
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8842668533325195,
      "learning_rate": 0.00012588600777160128,
      "loss": 3.1174,
      "step": 160613
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6131045818328857,
      "learning_rate": 0.00012588267665565331,
      "loss": 3.1139,
      "step": 160614
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3889336585998535,
      "learning_rate": 0.00012587934557207716,
      "loss": 2.7579,
      "step": 160615
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.089514970779419,
      "learning_rate": 0.0001258760145208732,
      "loss": 3.2625,
      "step": 160616
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.301349639892578,
      "learning_rate": 0.00012587268350204225,
      "loss": 2.988,
      "step": 160617
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.4918878078460693,
      "learning_rate": 0.0001258693525155849,
      "loss": 2.8301,
      "step": 160618
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.0739338397979736,
      "learning_rate": 0.00012586602156150175,
      "loss": 2.7851,
      "step": 160619
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.43133807182312,
      "learning_rate": 0.0001258626906397932,
      "loss": 2.9829,
      "step": 160620
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4782330989837646,
      "learning_rate": 0.00012585935975046022,
      "loss": 2.8418,
      "step": 160621
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.091785192489624,
      "learning_rate": 0.00012585602889350319,
      "loss": 2.9648,
      "step": 160622
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5370614528656006,
      "learning_rate": 0.0001258526980689229,
      "loss": 3.0515,
      "step": 160623
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5537571907043457,
      "learning_rate": 0.00012584936727671976,
      "loss": 2.9097,
      "step": 160624
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.87368106842041,
      "learning_rate": 0.00012584603651689472,
      "loss": 2.8861,
      "step": 160625
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.2180063724517822,
      "learning_rate": 0.000125842705789448,
      "loss": 2.8311,
      "step": 160626
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.782189130783081,
      "learning_rate": 0.0001258393750943805,
      "loss": 2.829,
      "step": 160627
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7998526096343994,
      "learning_rate": 0.00012583604443169264,
      "loss": 3.2752,
      "step": 160628
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.356926441192627,
      "learning_rate": 0.00012583271380138525,
      "loss": 2.9408,
      "step": 160629
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.7063148021698,
      "learning_rate": 0.00012582938320345876,
      "loss": 2.908,
      "step": 160630
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.899838447570801,
      "learning_rate": 0.00012582605263791404,
      "loss": 3.1045,
      "step": 160631
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1292831897735596,
      "learning_rate": 0.0001258227221047514,
      "loss": 3.0993,
      "step": 160632
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.624227523803711,
      "learning_rate": 0.00012581939160397166,
      "loss": 2.9851,
      "step": 160633
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.0484485626220703,
      "learning_rate": 0.00012581606113557528,
      "loss": 2.9998,
      "step": 160634
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.743969202041626,
      "learning_rate": 0.00012581273069956313,
      "loss": 2.9036,
      "step": 160635
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.211313486099243,
      "learning_rate": 0.0001258094002959355,
      "loss": 2.9196,
      "step": 160636
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.126523733139038,
      "learning_rate": 0.00012580606992469346,
      "loss": 3.0673,
      "step": 160637
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.380302667617798,
      "learning_rate": 0.00012580273958583715,
      "loss": 3.0471,
      "step": 160638
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.764561176300049,
      "learning_rate": 0.0001257994092793675,
      "loss": 2.8563,
      "step": 160639
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.0594701766967773,
      "learning_rate": 0.0001257960790052849,
      "loss": 2.9697,
      "step": 160640
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.633780002593994,
      "learning_rate": 0.00012579274876359022,
      "loss": 3.0623,
      "step": 160641
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.0666840076446533,
      "learning_rate": 0.00012578941855428385,
      "loss": 2.9606,
      "step": 160642
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.434258222579956,
      "learning_rate": 0.00012578608837736672,
      "loss": 2.8491,
      "step": 160643
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.764514923095703,
      "learning_rate": 0.00012578275823283905,
      "loss": 3.083,
      "step": 160644
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2956702709198,
      "learning_rate": 0.00012577942812070175,
      "loss": 3.0328,
      "step": 160645
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4510269165039062,
      "learning_rate": 0.0001257760980409552,
      "loss": 3.0612,
      "step": 160646
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6647279262542725,
      "learning_rate": 0.00012577276799360032,
      "loss": 3.0903,
      "step": 160647
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2109901905059814,
      "learning_rate": 0.00012576943797863746,
      "loss": 2.9902,
      "step": 160648
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.0890488624572754,
      "learning_rate": 0.00012576610799606755,
      "loss": 2.9501,
      "step": 160649
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.99461030960083,
      "learning_rate": 0.00012576277804589078,
      "loss": 2.6278,
      "step": 160650
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.408963918685913,
      "learning_rate": 0.00012575944812810813,
      "loss": 3.0094,
      "step": 160651
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.484801769256592,
      "learning_rate": 0.00012575611824271998,
      "loss": 2.9563,
      "step": 160652
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8720662593841553,
      "learning_rate": 0.00012575278838972714,
      "loss": 2.9788,
      "step": 160653
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8213882446289062,
      "learning_rate": 0.00012574945856913005,
      "loss": 2.9188,
      "step": 160654
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5693271160125732,
      "learning_rate": 0.00012574612878092964,
      "loss": 3.0291,
      "step": 160655
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8751933574676514,
      "learning_rate": 0.00012574279902512615,
      "loss": 2.8802,
      "step": 160656
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5907397270202637,
      "learning_rate": 0.0001257394693017204,
      "loss": 2.9832,
      "step": 160657
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.590900182723999,
      "learning_rate": 0.0001257361396107129,
      "loss": 3.2017,
      "step": 160658
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.582557201385498,
      "learning_rate": 0.00012573280995210442,
      "loss": 2.8372,
      "step": 160659
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.096182346343994,
      "learning_rate": 0.00012572948032589542,
      "loss": 2.7968,
      "step": 160660
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.934772491455078,
      "learning_rate": 0.00012572615073208683,
      "loss": 3.0226,
      "step": 160661
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.013606309890747,
      "learning_rate": 0.00012572282117067878,
      "loss": 2.972,
      "step": 160662
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.131631374359131,
      "learning_rate": 0.00012571949164167227,
      "loss": 3.0329,
      "step": 160663
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.957489490509033,
      "learning_rate": 0.0001257161621450677,
      "loss": 2.9266,
      "step": 160664
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.60729718208313,
      "learning_rate": 0.00012571283268086587,
      "loss": 3.0757,
      "step": 160665
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.039341449737549,
      "learning_rate": 0.0001257095032490672,
      "loss": 2.8413,
      "step": 160666
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9265835285186768,
      "learning_rate": 0.0001257061738496727,
      "loss": 3.0479,
      "step": 160667
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.347950458526611,
      "learning_rate": 0.00012570284448268243,
      "loss": 2.869,
      "step": 160668
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.536196231842041,
      "learning_rate": 0.00012569951514809744,
      "loss": 3.1343,
      "step": 160669
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9356391429901123,
      "learning_rate": 0.00012569618584591807,
      "loss": 2.9183,
      "step": 160670
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1388099193573,
      "learning_rate": 0.0001256928565761452,
      "loss": 2.9221,
      "step": 160671
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8413803577423096,
      "learning_rate": 0.00012568952733877918,
      "loss": 2.8882,
      "step": 160672
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.2499923706054688,
      "learning_rate": 0.0001256861981338209,
      "loss": 3.1164,
      "step": 160673
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.4664928913116455,
      "learning_rate": 0.0001256828689612708,
      "loss": 2.7691,
      "step": 160674
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.319819927215576,
      "learning_rate": 0.00012567953982112962,
      "loss": 2.7983,
      "step": 160675
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.546067714691162,
      "learning_rate": 0.00012567621071339773,
      "loss": 2.9116,
      "step": 160676
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3245301246643066,
      "learning_rate": 0.0001256728816380761,
      "loss": 2.9327,
      "step": 160677
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.871173620223999,
      "learning_rate": 0.00012566955259516499,
      "loss": 2.7529,
      "step": 160678
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.294057369232178,
      "learning_rate": 0.00012566622358466535,
      "loss": 2.9176,
      "step": 160679
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.429840087890625,
      "learning_rate": 0.00012566289460657764,
      "loss": 3.1683,
      "step": 160680
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.234876871109009,
      "learning_rate": 0.00012565956566090253,
      "loss": 2.6452,
      "step": 160681
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.840371608734131,
      "learning_rate": 0.0001256562367476404,
      "loss": 2.9163,
      "step": 160682
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7385849952697754,
      "learning_rate": 0.00012565290786679224,
      "loss": 2.8415,
      "step": 160683
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.856743097305298,
      "learning_rate": 0.00012564957901835838,
      "loss": 2.8208,
      "step": 160684
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.498753309249878,
      "learning_rate": 0.00012564625020233965,
      "loss": 2.9281,
      "step": 160685
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3320534229278564,
      "learning_rate": 0.00012564292141873659,
      "loss": 2.9223,
      "step": 160686
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.537884473800659,
      "learning_rate": 0.00012563959266754967,
      "loss": 3.0384,
      "step": 160687
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7561068534851074,
      "learning_rate": 0.00012563626394877979,
      "loss": 2.8538,
      "step": 160688
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.604539632797241,
      "learning_rate": 0.00012563293526242735,
      "loss": 3.0525,
      "step": 160689
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.024034023284912,
      "learning_rate": 0.00012562960660849297,
      "loss": 2.9998,
      "step": 160690
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6860523223876953,
      "learning_rate": 0.00012562627798697745,
      "loss": 2.9687,
      "step": 160691
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1047215461730957,
      "learning_rate": 0.0001256229493978813,
      "loss": 3.0134,
      "step": 160692
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5701985359191895,
      "learning_rate": 0.00012561962084120502,
      "loss": 3.0974,
      "step": 160693
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.073434829711914,
      "learning_rate": 0.00012561629231694945,
      "loss": 3.0818,
      "step": 160694
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.709815740585327,
      "learning_rate": 0.000125612963825115,
      "loss": 2.6183,
      "step": 160695
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9943063259124756,
      "learning_rate": 0.0001256096353657025,
      "loss": 3.0096,
      "step": 160696
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7227230072021484,
      "learning_rate": 0.00012560630693871246,
      "loss": 2.9061,
      "step": 160697
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1898488998413086,
      "learning_rate": 0.0001256029785441455,
      "loss": 3.2788,
      "step": 160698
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.842623233795166,
      "learning_rate": 0.00012559965018200215,
      "loss": 2.8835,
      "step": 160699
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.003783941268921,
      "learning_rate": 0.00012559632185228317,
      "loss": 2.9735,
      "step": 160700
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.429321050643921,
      "learning_rate": 0.00012559299355498907,
      "loss": 3.0716,
      "step": 160701
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.243323802947998,
      "learning_rate": 0.00012558966529012064,
      "loss": 2.8309,
      "step": 160702
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.0134835243225098,
      "learning_rate": 0.00012558633705767833,
      "loss": 2.8706,
      "step": 160703
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2094926834106445,
      "learning_rate": 0.00012558300885766283,
      "loss": 3.1086,
      "step": 160704
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6411595344543457,
      "learning_rate": 0.00012557968069007466,
      "loss": 3.0727,
      "step": 160705
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.3720662593841553,
      "learning_rate": 0.00012557635255491462,
      "loss": 2.708,
      "step": 160706
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7087485790252686,
      "learning_rate": 0.00012557302445218312,
      "loss": 3.12,
      "step": 160707
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.679438829421997,
      "learning_rate": 0.00012556969638188097,
      "loss": 2.7941,
      "step": 160708
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.264735698699951,
      "learning_rate": 0.0001255663683440086,
      "loss": 2.8932,
      "step": 160709
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.391319513320923,
      "learning_rate": 0.000125563040338567,
      "loss": 2.9742,
      "step": 160710
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.143617630004883,
      "learning_rate": 0.00012555971236555624,
      "loss": 3.133,
      "step": 160711
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3931870460510254,
      "learning_rate": 0.00012555638442497737,
      "loss": 3.1609,
      "step": 160712
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.0129809379577637,
      "learning_rate": 0.00012555305651683075,
      "loss": 2.7511,
      "step": 160713
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5658183097839355,
      "learning_rate": 0.00012554972864111722,
      "loss": 2.9866,
      "step": 160714
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.263967514038086,
      "learning_rate": 0.00012554640079783717,
      "loss": 2.87,
      "step": 160715
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3105297088623047,
      "learning_rate": 0.00012554307298699155,
      "loss": 3.0536,
      "step": 160716
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.205698251724243,
      "learning_rate": 0.00012553974520858056,
      "loss": 2.8294,
      "step": 160717
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.168168306350708,
      "learning_rate": 0.00012553641746260513,
      "loss": 2.6887,
      "step": 160718
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2504825592041016,
      "learning_rate": 0.00012553308974906567,
      "loss": 3.0177,
      "step": 160719
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5471839904785156,
      "learning_rate": 0.00012552976206796302,
      "loss": 2.9452,
      "step": 160720
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3452603816986084,
      "learning_rate": 0.00012552643441929756,
      "loss": 2.8012,
      "step": 160721
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6190028190612793,
      "learning_rate": 0.00012552310680307024,
      "loss": 3.1415,
      "step": 160722
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.2264575958251953,
      "learning_rate": 0.00012551977921928124,
      "loss": 2.8007,
      "step": 160723
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.422715663909912,
      "learning_rate": 0.00012551645166793154,
      "loss": 2.7725,
      "step": 160724
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.2987422943115234,
      "learning_rate": 0.0001255131241490215,
      "loss": 2.8531,
      "step": 160725
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.370421886444092,
      "learning_rate": 0.00012550979666255197,
      "loss": 3.0231,
      "step": 160726
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4703433513641357,
      "learning_rate": 0.0001255064692085234,
      "loss": 2.946,
      "step": 160727
    },
    {
      "epoch": 2.09,
      "grad_norm": 5.117917537689209,
      "learning_rate": 0.00012550314178693664,
      "loss": 2.9244,
      "step": 160728
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.261718511581421,
      "learning_rate": 0.00012549981439779193,
      "loss": 3.2148,
      "step": 160729
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.7954912185668945,
      "learning_rate": 0.00012549648704109022,
      "loss": 2.8977,
      "step": 160730
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.544956684112549,
      "learning_rate": 0.00012549315971683187,
      "loss": 3.2395,
      "step": 160731
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9978525638580322,
      "learning_rate": 0.00012548983242501775,
      "loss": 3.0025,
      "step": 160732
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.6625657081604004,
      "learning_rate": 0.00012548650516564828,
      "loss": 2.8808,
      "step": 160733
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3467252254486084,
      "learning_rate": 0.00012548317793872437,
      "loss": 2.8491,
      "step": 160734
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.251643657684326,
      "learning_rate": 0.00012547985074424625,
      "loss": 3.13,
      "step": 160735
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3409080505371094,
      "learning_rate": 0.0001254765235822148,
      "loss": 3.1205,
      "step": 160736
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.719081163406372,
      "learning_rate": 0.00012547319645263042,
      "loss": 2.7995,
      "step": 160737
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3322150707244873,
      "learning_rate": 0.00012546986935549397,
      "loss": 3.0291,
      "step": 160738
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.801182985305786,
      "learning_rate": 0.0001254665422908059,
      "loss": 2.8505,
      "step": 160739
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.0763866901397705,
      "learning_rate": 0.00012546321525856713,
      "loss": 2.7931,
      "step": 160740
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.677354097366333,
      "learning_rate": 0.00012545988825877778,
      "loss": 2.9353,
      "step": 160741
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.351425886154175,
      "learning_rate": 0.00012545656129143884,
      "loss": 3.1924,
      "step": 160742
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.159003496170044,
      "learning_rate": 0.00012545323435655072,
      "loss": 3.0407,
      "step": 160743
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8043107986450195,
      "learning_rate": 0.00012544990745411426,
      "loss": 2.9478,
      "step": 160744
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6606669425964355,
      "learning_rate": 0.00012544658058412985,
      "loss": 3.0102,
      "step": 160745
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.177668809890747,
      "learning_rate": 0.00012544325374659832,
      "loss": 2.9953,
      "step": 160746
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.0393333435058594,
      "learning_rate": 0.00012543992694152015,
      "loss": 2.8175,
      "step": 160747
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.844667911529541,
      "learning_rate": 0.00012543660016889606,
      "loss": 3.0283,
      "step": 160748
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.626603841781616,
      "learning_rate": 0.00012543327342872645,
      "loss": 3.146,
      "step": 160749
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5093750953674316,
      "learning_rate": 0.00012542994672101217,
      "loss": 2.9195,
      "step": 160750
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.545820951461792,
      "learning_rate": 0.0001254266200457537,
      "loss": 2.8218,
      "step": 160751
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4735498428344727,
      "learning_rate": 0.00012542329340295178,
      "loss": 2.8414,
      "step": 160752
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.236612558364868,
      "learning_rate": 0.00012541996679260703,
      "loss": 2.9332,
      "step": 160753
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.217559337615967,
      "learning_rate": 0.00012541664021471994,
      "loss": 3.0161,
      "step": 160754
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.1560182571411133,
      "learning_rate": 0.00012541331366929112,
      "loss": 3.0445,
      "step": 160755
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1642537117004395,
      "learning_rate": 0.00012540998715632136,
      "loss": 2.616,
      "step": 160756
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5066802501678467,
      "learning_rate": 0.00012540666067581107,
      "loss": 3.0752,
      "step": 160757
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6024904251098633,
      "learning_rate": 0.0001254033342277611,
      "loss": 3.0019,
      "step": 160758
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8123977184295654,
      "learning_rate": 0.0001254000078121719,
      "loss": 2.83,
      "step": 160759
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7121191024780273,
      "learning_rate": 0.00012539668142904416,
      "loss": 2.9627,
      "step": 160760
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4598655700683594,
      "learning_rate": 0.00012539335507837839,
      "loss": 2.9588,
      "step": 160761
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.635814905166626,
      "learning_rate": 0.00012539002876017538,
      "loss": 2.9401,
      "step": 160762
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5649161338806152,
      "learning_rate": 0.00012538670247443556,
      "loss": 3.3343,
      "step": 160763
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4519355297088623,
      "learning_rate": 0.00012538337622115974,
      "loss": 2.9252,
      "step": 160764
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.2684335708618164,
      "learning_rate": 0.00012538005000034848,
      "loss": 2.8734,
      "step": 160765
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6598570346832275,
      "learning_rate": 0.00012537672381200232,
      "loss": 3.0389,
      "step": 160766
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7627437114715576,
      "learning_rate": 0.0001253733976561218,
      "loss": 2.9695,
      "step": 160767
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.593008518218994,
      "learning_rate": 0.00012537007153270782,
      "loss": 2.9201,
      "step": 160768
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4877099990844727,
      "learning_rate": 0.00012536674544176072,
      "loss": 3.1763,
      "step": 160769
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.43843936920166,
      "learning_rate": 0.00012536341938328133,
      "loss": 3.0295,
      "step": 160770
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.244626760482788,
      "learning_rate": 0.0001253600933572702,
      "loss": 2.838,
      "step": 160771
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.659573554992676,
      "learning_rate": 0.00012535676736372781,
      "loss": 2.9374,
      "step": 160772
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.270301103591919,
      "learning_rate": 0.000125353441402655,
      "loss": 2.8726,
      "step": 160773
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.162940263748169,
      "learning_rate": 0.00012535011547405226,
      "loss": 2.9213,
      "step": 160774
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9523916244506836,
      "learning_rate": 0.00012534678957792017,
      "loss": 3.2774,
      "step": 160775
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5388877391815186,
      "learning_rate": 0.0001253434637142595,
      "loss": 2.8519,
      "step": 160776
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4439380168914795,
      "learning_rate": 0.00012534013788307073,
      "loss": 2.9747,
      "step": 160777
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.434798240661621,
      "learning_rate": 0.00012533681208435447,
      "loss": 2.9011,
      "step": 160778
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.480616807937622,
      "learning_rate": 0.00012533348631811147,
      "loss": 2.9469,
      "step": 160779
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.409724235534668,
      "learning_rate": 0.0001253301605843422,
      "loss": 3.1515,
      "step": 160780
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.710404872894287,
      "learning_rate": 0.00012532683488304742,
      "loss": 2.9,
      "step": 160781
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.529456615447998,
      "learning_rate": 0.00012532350921422772,
      "loss": 3.0649,
      "step": 160782
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9893367290496826,
      "learning_rate": 0.00012532018357788363,
      "loss": 2.8201,
      "step": 160783
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1952855587005615,
      "learning_rate": 0.00012531685797401572,
      "loss": 2.7449,
      "step": 160784
    },
    {
      "epoch": 2.09,
      "grad_norm": 1.935838222503662,
      "learning_rate": 0.00012531353240262486,
      "loss": 2.8447,
      "step": 160785
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.0997495651245117,
      "learning_rate": 0.00012531020686371133,
      "loss": 2.8671,
      "step": 160786
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2247061729431152,
      "learning_rate": 0.00012530688135727608,
      "loss": 2.9322,
      "step": 160787
    },
    {
      "epoch": 2.09,
      "grad_norm": 4.117857933044434,
      "learning_rate": 0.00012530355588331958,
      "loss": 3.017,
      "step": 160788
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.620039701461792,
      "learning_rate": 0.00012530023044184243,
      "loss": 2.786,
      "step": 160789
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.138408899307251,
      "learning_rate": 0.00012529690503284516,
      "loss": 2.9394,
      "step": 160790
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.311807155609131,
      "learning_rate": 0.00012529357965632861,
      "loss": 2.9743,
      "step": 160791
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.360656976699829,
      "learning_rate": 0.00012529025431229316,
      "loss": 3.3636,
      "step": 160792
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.20664644241333,
      "learning_rate": 0.00012528692900073968,
      "loss": 2.9049,
      "step": 160793
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8159308433532715,
      "learning_rate": 0.00012528360372166854,
      "loss": 2.8631,
      "step": 160794
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8056366443634033,
      "learning_rate": 0.0001252802784750807,
      "loss": 3.047,
      "step": 160795
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.285064220428467,
      "learning_rate": 0.0001252769532609763,
      "loss": 2.8654,
      "step": 160796
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3896548748016357,
      "learning_rate": 0.00012527362807935636,
      "loss": 2.6977,
      "step": 160797
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6388821601867676,
      "learning_rate": 0.00012527030293022122,
      "loss": 2.788,
      "step": 160798
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6160190105438232,
      "learning_rate": 0.00012526697781357177,
      "loss": 2.6553,
      "step": 160799
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4984793663024902,
      "learning_rate": 0.00012526365272940836,
      "loss": 3.1008,
      "step": 160800
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.0650274753570557,
      "learning_rate": 0.00012526032767773193,
      "loss": 3.072,
      "step": 160801
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.782783031463623,
      "learning_rate": 0.00012525700265854272,
      "loss": 2.8424,
      "step": 160802
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.330623149871826,
      "learning_rate": 0.0001252536776718416,
      "loss": 3.1292,
      "step": 160803
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.760056495666504,
      "learning_rate": 0.00012525035271762907,
      "loss": 2.8054,
      "step": 160804
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1644997596740723,
      "learning_rate": 0.00012524702779590584,
      "loss": 2.8177,
      "step": 160805
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.1149628162384033,
      "learning_rate": 0.00012524370290667244,
      "loss": 3.0066,
      "step": 160806
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9906961917877197,
      "learning_rate": 0.0001252403780499297,
      "loss": 3.0259,
      "step": 160807
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.963853597640991,
      "learning_rate": 0.0001252370532256779,
      "loss": 2.9197,
      "step": 160808
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.443387031555176,
      "learning_rate": 0.00012523372843391788,
      "loss": 2.9529,
      "step": 160809
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1030492782592773,
      "learning_rate": 0.00012523040367465015,
      "loss": 3.0463,
      "step": 160810
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.489888906478882,
      "learning_rate": 0.0001252270789478755,
      "loss": 2.9359,
      "step": 160811
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.964961051940918,
      "learning_rate": 0.00012522375425359428,
      "loss": 2.854,
      "step": 160812
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7574150562286377,
      "learning_rate": 0.0001252204295918074,
      "loss": 2.9024,
      "step": 160813
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.53047251701355,
      "learning_rate": 0.00012521710496251538,
      "loss": 3.0137,
      "step": 160814
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3318169116973877,
      "learning_rate": 0.00012521378036571876,
      "loss": 3.1969,
      "step": 160815
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7706427574157715,
      "learning_rate": 0.0001252104558014181,
      "loss": 2.9682,
      "step": 160816
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.001047372817993,
      "learning_rate": 0.00012520713126961422,
      "loss": 2.9459,
      "step": 160817
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.850090980529785,
      "learning_rate": 0.0001252038067703075,
      "loss": 2.9062,
      "step": 160818
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.509979248046875,
      "learning_rate": 0.00012520048230349882,
      "loss": 3.0614,
      "step": 160819
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.6750640869140625,
      "learning_rate": 0.00012519715786918866,
      "loss": 2.9054,
      "step": 160820
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.191660165786743,
      "learning_rate": 0.00012519383346737767,
      "loss": 2.9523,
      "step": 160821
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.6932437419891357,
      "learning_rate": 0.00012519050909806631,
      "loss": 3.0822,
      "step": 160822
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.22916316986084,
      "learning_rate": 0.00012518718476125547,
      "loss": 2.7021,
      "step": 160823
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.447415828704834,
      "learning_rate": 0.0001251838604569455,
      "loss": 3.0014,
      "step": 160824
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3423473834991455,
      "learning_rate": 0.00012518053618513727,
      "loss": 2.9171,
      "step": 160825
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6579837799072266,
      "learning_rate": 0.0001251772119458313,
      "loss": 2.9033,
      "step": 160826
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.7922866344451904,
      "learning_rate": 0.00012517388773902816,
      "loss": 2.898,
      "step": 160827
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.2285540103912354,
      "learning_rate": 0.0001251705635647284,
      "loss": 2.9482,
      "step": 160828
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3899362087249756,
      "learning_rate": 0.00012516723942293282,
      "loss": 3.1455,
      "step": 160829
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.227231740951538,
      "learning_rate": 0.00012516391531364185,
      "loss": 2.8068,
      "step": 160830
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.286712646484375,
      "learning_rate": 0.00012516059123685632,
      "loss": 2.9032,
      "step": 160831
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.8070030212402344,
      "learning_rate": 0.00012515726719257677,
      "loss": 3.0098,
      "step": 160832
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4377641677856445,
      "learning_rate": 0.00012515394318080375,
      "loss": 3.2382,
      "step": 160833
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.527127504348755,
      "learning_rate": 0.00012515061920153781,
      "loss": 2.8969,
      "step": 160834
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4470059871673584,
      "learning_rate": 0.00012514729525477976,
      "loss": 2.9774,
      "step": 160835
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3993561267852783,
      "learning_rate": 0.00012514397134053007,
      "loss": 2.8678,
      "step": 160836
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.311224937438965,
      "learning_rate": 0.00012514064745878947,
      "loss": 3.0018,
      "step": 160837
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.242238998413086,
      "learning_rate": 0.00012513732360955852,
      "loss": 3.0208,
      "step": 160838
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.239600419998169,
      "learning_rate": 0.0001251339997928379,
      "loss": 2.8409,
      "step": 160839
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4666197299957275,
      "learning_rate": 0.000125130676008628,
      "loss": 2.9937,
      "step": 160840
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.614677667617798,
      "learning_rate": 0.00012512735225692976,
      "loss": 2.8019,
      "step": 160841
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.357604503631592,
      "learning_rate": 0.00012512402853774352,
      "loss": 2.8682,
      "step": 160842
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.9484574794769287,
      "learning_rate": 0.00012512070485107014,
      "loss": 3.0823,
      "step": 160843
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.828686475753784,
      "learning_rate": 0.0001251173811969101,
      "loss": 2.9414,
      "step": 160844
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4829585552215576,
      "learning_rate": 0.0001251140575752641,
      "loss": 2.802,
      "step": 160845
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1776328086853027,
      "learning_rate": 0.00012511073398613253,
      "loss": 3.1296,
      "step": 160846
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.774919271469116,
      "learning_rate": 0.00012510741042951628,
      "loss": 2.9678,
      "step": 160847
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4545090198516846,
      "learning_rate": 0.0001251040869054158,
      "loss": 3.1255,
      "step": 160848
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5649778842926025,
      "learning_rate": 0.00012510076341383186,
      "loss": 3.0036,
      "step": 160849
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.22426438331604,
      "learning_rate": 0.00012509743995476496,
      "loss": 2.934,
      "step": 160850
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6273717880249023,
      "learning_rate": 0.0001250941165282158,
      "loss": 2.8015,
      "step": 160851
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.781919479370117,
      "learning_rate": 0.0001250907931341848,
      "loss": 2.9344,
      "step": 160852
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3084871768951416,
      "learning_rate": 0.00012508746977267285,
      "loss": 2.7902,
      "step": 160853
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4510746002197266,
      "learning_rate": 0.0001250841464436803,
      "loss": 2.8877,
      "step": 160854
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3673529624938965,
      "learning_rate": 0.00012508082314720807,
      "loss": 3.0407,
      "step": 160855
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2402596473693848,
      "learning_rate": 0.0001250774998832566,
      "loss": 2.8659,
      "step": 160856
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.5186872482299805,
      "learning_rate": 0.0001250741766518264,
      "loss": 3.1495,
      "step": 160857
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2742302417755127,
      "learning_rate": 0.0001250708534529183,
      "loss": 3.0937,
      "step": 160858
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7826762199401855,
      "learning_rate": 0.00012506753028653287,
      "loss": 3.1152,
      "step": 160859
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.7930023670196533,
      "learning_rate": 0.0001250642071526706,
      "loss": 2.9581,
      "step": 160860
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1017258167266846,
      "learning_rate": 0.00012506088405133225,
      "loss": 3.0878,
      "step": 160861
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.4751834869384766,
      "learning_rate": 0.0001250575609825184,
      "loss": 2.8895,
      "step": 160862
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.022117853164673,
      "learning_rate": 0.00012505423794622955,
      "loss": 2.9591,
      "step": 160863
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3042774200439453,
      "learning_rate": 0.00012505091494246652,
      "loss": 2.8245,
      "step": 160864
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.0903501510620117,
      "learning_rate": 0.00012504759197122986,
      "loss": 2.9857,
      "step": 160865
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.569643259048462,
      "learning_rate": 0.00012504426903252,
      "loss": 2.9194,
      "step": 160866
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.447633743286133,
      "learning_rate": 0.00012504094612633785,
      "loss": 2.9923,
      "step": 160867
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.252469301223755,
      "learning_rate": 0.00012503762325268388,
      "loss": 2.8299,
      "step": 160868
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.4476304054260254,
      "learning_rate": 0.00012503430041155863,
      "loss": 3.0485,
      "step": 160869
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.8892016410827637,
      "learning_rate": 0.0001250309776029629,
      "loss": 2.6439,
      "step": 160870
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.60103440284729,
      "learning_rate": 0.0001250276548268971,
      "loss": 3.0616,
      "step": 160871
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.4681694507598877,
      "learning_rate": 0.00012502433208336203,
      "loss": 2.7276,
      "step": 160872
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.3887081146240234,
      "learning_rate": 0.0001250210093723583,
      "loss": 2.9123,
      "step": 160873
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.513437032699585,
      "learning_rate": 0.00012501768669388649,
      "loss": 2.9097,
      "step": 160874
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.138627767562866,
      "learning_rate": 0.000125014364047947,
      "loss": 3.1684,
      "step": 160875
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.400575637817383,
      "learning_rate": 0.00012501104143454082,
      "loss": 3.1941,
      "step": 160876
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.47214674949646,
      "learning_rate": 0.00012500771885366825,
      "loss": 2.9448,
      "step": 160877
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.372971773147583,
      "learning_rate": 0.0001250043963053302,
      "loss": 2.7077,
      "step": 160878
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.4851975440979004,
      "learning_rate": 0.00012500107378952697,
      "loss": 3.0023,
      "step": 160879
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.368957281112671,
      "learning_rate": 0.00012499775130625948,
      "loss": 3.036,
      "step": 160880
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.411879777908325,
      "learning_rate": 0.0001249944288555282,
      "loss": 2.8894,
      "step": 160881
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.511913537979126,
      "learning_rate": 0.00012499110643733377,
      "loss": 2.86,
      "step": 160882
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.726994276046753,
      "learning_rate": 0.0001249877840516767,
      "loss": 2.9817,
      "step": 160883
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2446417808532715,
      "learning_rate": 0.0001249844616985578,
      "loss": 2.8415,
      "step": 160884
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.6754660606384277,
      "learning_rate": 0.00012498113937797748,
      "loss": 2.7974,
      "step": 160885
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.5741422176361084,
      "learning_rate": 0.00012497781708993658,
      "loss": 3.0949,
      "step": 160886
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.504793882369995,
      "learning_rate": 0.0001249744948344356,
      "loss": 3.0844,
      "step": 160887
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.337836503982544,
      "learning_rate": 0.00012497117261147517,
      "loss": 3.0744,
      "step": 160888
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.2256696224212646,
      "learning_rate": 0.0001249678504210558,
      "loss": 2.9223,
      "step": 160889
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.361222743988037,
      "learning_rate": 0.00012496452826317835,
      "loss": 2.7802,
      "step": 160890
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1933932304382324,
      "learning_rate": 0.00012496120613784316,
      "loss": 2.6475,
      "step": 160891
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.168342351913452,
      "learning_rate": 0.00012495788404505106,
      "loss": 2.8759,
      "step": 160892
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.1819565296173096,
      "learning_rate": 0.00012495456198480264,
      "loss": 2.7448,
      "step": 160893
    },
    {
      "epoch": 2.09,
      "grad_norm": 3.3136050701141357,
      "learning_rate": 0.00012495123995709845,
      "loss": 3.0203,
      "step": 160894
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.3942079544067383,
      "learning_rate": 0.00012494791796193903,
      "loss": 2.6777,
      "step": 160895
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.003232002258301,
      "learning_rate": 0.0001249445959993252,
      "loss": 2.9166,
      "step": 160896
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.234273910522461,
      "learning_rate": 0.00012494127406925738,
      "loss": 2.9088,
      "step": 160897
    },
    {
      "epoch": 2.09,
      "grad_norm": 2.147388458251953,
      "learning_rate": 0.00012493795217173635,
      "loss": 2.866,
      "step": 160898
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7958011627197266,
      "learning_rate": 0.0001249346303067627,
      "loss": 2.9914,
      "step": 160899
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.521696090698242,
      "learning_rate": 0.000124931308474337,
      "loss": 2.8448,
      "step": 160900
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3006677627563477,
      "learning_rate": 0.00012492798667445976,
      "loss": 2.9539,
      "step": 160901
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6117606163024902,
      "learning_rate": 0.0001249246649071318,
      "loss": 2.8704,
      "step": 160902
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7032880783081055,
      "learning_rate": 0.00012492134317235355,
      "loss": 3.1368,
      "step": 160903
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.801790714263916,
      "learning_rate": 0.00012491802147012587,
      "loss": 2.8486,
      "step": 160904
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.369570016860962,
      "learning_rate": 0.0001249146998004492,
      "loss": 2.7801,
      "step": 160905
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.571631908416748,
      "learning_rate": 0.0001249113781633242,
      "loss": 3.0847,
      "step": 160906
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.3871326446533203,
      "learning_rate": 0.00012490805655875138,
      "loss": 2.8692,
      "step": 160907
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.25846529006958,
      "learning_rate": 0.0001249047349867316,
      "loss": 2.8006,
      "step": 160908
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5010879039764404,
      "learning_rate": 0.00012490141344726516,
      "loss": 2.9066,
      "step": 160909
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.380559206008911,
      "learning_rate": 0.000124898091940353,
      "loss": 3.1366,
      "step": 160910
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.02719259262085,
      "learning_rate": 0.0001248947704659956,
      "loss": 3.1183,
      "step": 160911
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.852717399597168,
      "learning_rate": 0.00012489144902419353,
      "loss": 3.0524,
      "step": 160912
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.541480302810669,
      "learning_rate": 0.00012488812761494736,
      "loss": 3.0134,
      "step": 160913
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.208054780960083,
      "learning_rate": 0.00012488480623825787,
      "loss": 2.7449,
      "step": 160914
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.783236503601074,
      "learning_rate": 0.00012488148489412553,
      "loss": 2.9575,
      "step": 160915
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.993567943572998,
      "learning_rate": 0.00012487816358255113,
      "loss": 3.0854,
      "step": 160916
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.801578998565674,
      "learning_rate": 0.00012487484230353516,
      "loss": 2.7342,
      "step": 160917
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.525956153869629,
      "learning_rate": 0.0001248715210570783,
      "loss": 2.8817,
      "step": 160918
    },
    {
      "epoch": 2.1,
      "grad_norm": 5.816355228424072,
      "learning_rate": 0.000124868199843181,
      "loss": 2.7311,
      "step": 160919
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5252840518951416,
      "learning_rate": 0.0001248648786618441,
      "loss": 2.841,
      "step": 160920
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2327451705932617,
      "learning_rate": 0.000124861557513068,
      "loss": 2.9026,
      "step": 160921
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8804359436035156,
      "learning_rate": 0.0001248582363968536,
      "loss": 2.9399,
      "step": 160922
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.011956214904785,
      "learning_rate": 0.00012485491531320135,
      "loss": 2.986,
      "step": 160923
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1766891479492188,
      "learning_rate": 0.0001248515942621119,
      "loss": 2.8592,
      "step": 160924
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.703052997589111,
      "learning_rate": 0.00012484827324358568,
      "loss": 2.7499,
      "step": 160925
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.2446837425231934,
      "learning_rate": 0.0001248449522576236,
      "loss": 3.059,
      "step": 160926
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.333064556121826,
      "learning_rate": 0.00012484163130422606,
      "loss": 2.8341,
      "step": 160927
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.653313636779785,
      "learning_rate": 0.00012483831038339387,
      "loss": 3.1005,
      "step": 160928
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.9432437419891357,
      "learning_rate": 0.00012483498949512754,
      "loss": 3.0841,
      "step": 160929
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.802953243255615,
      "learning_rate": 0.0001248316686394277,
      "loss": 2.8268,
      "step": 160930
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.847395181655884,
      "learning_rate": 0.0001248283478162948,
      "loss": 2.7617,
      "step": 160931
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.335541248321533,
      "learning_rate": 0.00012482502702572978,
      "loss": 2.9652,
      "step": 160932
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.566657304763794,
      "learning_rate": 0.000124821706267733,
      "loss": 3.2411,
      "step": 160933
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.7684574127197266,
      "learning_rate": 0.00012481838554230523,
      "loss": 2.8351,
      "step": 160934
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.755633354187012,
      "learning_rate": 0.00012481506484944706,
      "loss": 2.9259,
      "step": 160935
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8945469856262207,
      "learning_rate": 0.00012481174418915906,
      "loss": 2.8366,
      "step": 160936
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.621126413345337,
      "learning_rate": 0.00012480842356144177,
      "loss": 3.1387,
      "step": 160937
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.3115952014923096,
      "learning_rate": 0.00012480510296629602,
      "loss": 2.9652,
      "step": 160938
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.417010307312012,
      "learning_rate": 0.00012480178240372217,
      "loss": 3.0288,
      "step": 160939
    },
    {
      "epoch": 2.1,
      "grad_norm": 7.336467266082764,
      "learning_rate": 0.00012479846187372112,
      "loss": 2.7444,
      "step": 160940
    },
    {
      "epoch": 2.1,
      "grad_norm": 8.862383842468262,
      "learning_rate": 0.0001247951413762933,
      "loss": 2.563,
      "step": 160941
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.931626558303833,
      "learning_rate": 0.0001247918209114393,
      "loss": 3.0125,
      "step": 160942
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.280690908432007,
      "learning_rate": 0.0001247885004791599,
      "loss": 3.0512,
      "step": 160943
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.208081007003784,
      "learning_rate": 0.0001247851800794556,
      "loss": 2.7856,
      "step": 160944
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.696320533752441,
      "learning_rate": 0.00012478185971232696,
      "loss": 3.1453,
      "step": 160945
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.791482448577881,
      "learning_rate": 0.0001247785393777748,
      "loss": 2.8881,
      "step": 160946
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.125492095947266,
      "learning_rate": 0.0001247752190757995,
      "loss": 2.9889,
      "step": 160947
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.747490167617798,
      "learning_rate": 0.0001247718988064019,
      "loss": 3.0345,
      "step": 160948
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.451714277267456,
      "learning_rate": 0.0001247685785695825,
      "loss": 2.7855,
      "step": 160949
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.66450572013855,
      "learning_rate": 0.0001247652583653419,
      "loss": 2.8119,
      "step": 160950
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.477161407470703,
      "learning_rate": 0.0001247619381936807,
      "loss": 3.0161,
      "step": 160951
    },
    {
      "epoch": 2.1,
      "grad_norm": 6.061732292175293,
      "learning_rate": 0.00012475861805459965,
      "loss": 2.7522,
      "step": 160952
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.9918196201324463,
      "learning_rate": 0.0001247552979480992,
      "loss": 2.9556,
      "step": 160953
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.247262954711914,
      "learning_rate": 0.0001247519778741801,
      "loss": 2.7669,
      "step": 160954
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.0820693969726562,
      "learning_rate": 0.00012474865783284296,
      "loss": 2.751,
      "step": 160955
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.898393154144287,
      "learning_rate": 0.0001247453378240882,
      "loss": 2.8074,
      "step": 160956
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.025942087173462,
      "learning_rate": 0.00012474201784791675,
      "loss": 2.9866,
      "step": 160957
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.657669544219971,
      "learning_rate": 0.00012473869790432905,
      "loss": 2.9014,
      "step": 160958
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2645881175994873,
      "learning_rate": 0.0001247353779933256,
      "loss": 3.0381,
      "step": 160959
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.191310405731201,
      "learning_rate": 0.00012473205811490727,
      "loss": 2.9145,
      "step": 160960
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.080904960632324,
      "learning_rate": 0.00012472873826907457,
      "loss": 3.0787,
      "step": 160961
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4734935760498047,
      "learning_rate": 0.000124725418455828,
      "loss": 3.009,
      "step": 160962
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1126413345336914,
      "learning_rate": 0.0001247220986751684,
      "loss": 2.8596,
      "step": 160963
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.272653818130493,
      "learning_rate": 0.00012471877892709625,
      "loss": 2.8812,
      "step": 160964
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.369184732437134,
      "learning_rate": 0.00012471545921161208,
      "loss": 2.9518,
      "step": 160965
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6871719360351562,
      "learning_rate": 0.00012471213952871673,
      "loss": 3.0939,
      "step": 160966
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8492908477783203,
      "learning_rate": 0.00012470881987841073,
      "loss": 2.9328,
      "step": 160967
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.5175042152404785,
      "learning_rate": 0.00012470550026069452,
      "loss": 3.0901,
      "step": 160968
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.079230546951294,
      "learning_rate": 0.000124702180675569,
      "loss": 2.8789,
      "step": 160969
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3675596714019775,
      "learning_rate": 0.00012469886112303452,
      "loss": 2.7484,
      "step": 160970
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.0291879177093506,
      "learning_rate": 0.00012469554160309196,
      "loss": 2.8017,
      "step": 160971
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.38014554977417,
      "learning_rate": 0.0001246922221157418,
      "loss": 3.1001,
      "step": 160972
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3088812828063965,
      "learning_rate": 0.00012468890266098466,
      "loss": 3.0718,
      "step": 160973
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.1285207271575928,
      "learning_rate": 0.00012468558323882104,
      "loss": 2.951,
      "step": 160974
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.730121612548828,
      "learning_rate": 0.0001246822638492518,
      "loss": 3.2389,
      "step": 160975
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.591686964035034,
      "learning_rate": 0.00012467894449227736,
      "loss": 2.935,
      "step": 160976
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7869207859039307,
      "learning_rate": 0.0001246756251678985,
      "loss": 2.925,
      "step": 160977
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.224729061126709,
      "learning_rate": 0.00012467230587611574,
      "loss": 3.2997,
      "step": 160978
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.131744146347046,
      "learning_rate": 0.00012466898661692974,
      "loss": 2.905,
      "step": 160979
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2766518592834473,
      "learning_rate": 0.0001246656673903409,
      "loss": 2.6125,
      "step": 160980
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.639853000640869,
      "learning_rate": 0.00012466234819635022,
      "loss": 2.9516,
      "step": 160981
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5058043003082275,
      "learning_rate": 0.00012465902903495795,
      "loss": 2.818,
      "step": 160982
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.509751319885254,
      "learning_rate": 0.000124655709906165,
      "loss": 2.9964,
      "step": 160983
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.996840476989746,
      "learning_rate": 0.0001246523908099719,
      "loss": 3.0938,
      "step": 160984
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.6261749267578125,
      "learning_rate": 0.0001246490717463792,
      "loss": 2.9875,
      "step": 160985
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.0547890663146973,
      "learning_rate": 0.00012464575271538744,
      "loss": 3.0078,
      "step": 160986
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3417084217071533,
      "learning_rate": 0.00012464243371699746,
      "loss": 3.0376,
      "step": 160987
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.397428274154663,
      "learning_rate": 0.00012463911475120966,
      "loss": 2.9532,
      "step": 160988
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.105295658111572,
      "learning_rate": 0.00012463579581802484,
      "loss": 2.9958,
      "step": 160989
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7680110931396484,
      "learning_rate": 0.00012463247691744353,
      "loss": 2.7901,
      "step": 160990
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4692206382751465,
      "learning_rate": 0.0001246291580494664,
      "loss": 3.1616,
      "step": 160991
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8116002082824707,
      "learning_rate": 0.00012462583921409387,
      "loss": 2.8978,
      "step": 160992
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.3319759368896484,
      "learning_rate": 0.00012462252041132684,
      "loss": 2.8464,
      "step": 160993
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9593355655670166,
      "learning_rate": 0.00012461920164116567,
      "loss": 3.0738,
      "step": 160994
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.2050251960754395,
      "learning_rate": 0.0001246158829036112,
      "loss": 3.1919,
      "step": 160995
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.644798755645752,
      "learning_rate": 0.00012461256419866396,
      "loss": 2.7206,
      "step": 160996
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.12557053565979,
      "learning_rate": 0.00012460924552632456,
      "loss": 2.8705,
      "step": 160997
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.611424207687378,
      "learning_rate": 0.0001246059268865935,
      "loss": 2.9383,
      "step": 160998
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.492953300476074,
      "learning_rate": 0.00012460260827947163,
      "loss": 3.1389,
      "step": 160999
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.867767810821533,
      "learning_rate": 0.00012459928970495933,
      "loss": 2.7898,
      "step": 161000
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.527418613433838,
      "learning_rate": 0.00012459597116305744,
      "loss": 2.8229,
      "step": 161001
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1287388801574707,
      "learning_rate": 0.00012459265265376646,
      "loss": 3.0209,
      "step": 161002
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.119615316390991,
      "learning_rate": 0.00012458933417708703,
      "loss": 2.7602,
      "step": 161003
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.071307420730591,
      "learning_rate": 0.00012458601573301964,
      "loss": 2.9855,
      "step": 161004
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.5413026809692383,
      "learning_rate": 0.00012458269732156514,
      "loss": 2.9687,
      "step": 161005
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.685915470123291,
      "learning_rate": 0.0001245793789427239,
      "loss": 3.025,
      "step": 161006
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3345580101013184,
      "learning_rate": 0.00012457606059649678,
      "loss": 3.1282,
      "step": 161007
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3278377056121826,
      "learning_rate": 0.0001245727422828843,
      "loss": 2.8479,
      "step": 161008
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7944343090057373,
      "learning_rate": 0.00012456942400188703,
      "loss": 3.0356,
      "step": 161009
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.4626948833465576,
      "learning_rate": 0.0001245661057535055,
      "loss": 2.9856,
      "step": 161010
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.3521037101745605,
      "learning_rate": 0.0001245627875377406,
      "loss": 2.8493,
      "step": 161011
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.112377882003784,
      "learning_rate": 0.00012455946935459265,
      "loss": 3.2948,
      "step": 161012
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4986319541931152,
      "learning_rate": 0.0001245561512040625,
      "loss": 2.9398,
      "step": 161013
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9279539585113525,
      "learning_rate": 0.00012455283308615057,
      "loss": 2.6498,
      "step": 161014
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3905906677246094,
      "learning_rate": 0.00012454951500085783,
      "loss": 2.767,
      "step": 161015
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.039243459701538,
      "learning_rate": 0.0001245461969481844,
      "loss": 2.9079,
      "step": 161016
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5139310359954834,
      "learning_rate": 0.00012454287892813126,
      "loss": 3.0215,
      "step": 161017
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.136261463165283,
      "learning_rate": 0.00012453956094069882,
      "loss": 3.0661,
      "step": 161018
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1758267879486084,
      "learning_rate": 0.00012453624298588788,
      "loss": 3.0445,
      "step": 161019
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4429614543914795,
      "learning_rate": 0.00012453292506369885,
      "loss": 3.0551,
      "step": 161020
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.619933605194092,
      "learning_rate": 0.00012452960717413265,
      "loss": 2.9299,
      "step": 161021
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.15215802192688,
      "learning_rate": 0.00012452628931718953,
      "loss": 2.92,
      "step": 161022
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9708549976348877,
      "learning_rate": 0.00012452297149287043,
      "loss": 2.6334,
      "step": 161023
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6559135913848877,
      "learning_rate": 0.00012451965370117565,
      "loss": 2.6821,
      "step": 161024
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.079559087753296,
      "learning_rate": 0.0001245163359421061,
      "loss": 3.0817,
      "step": 161025
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.3396825790405273,
      "learning_rate": 0.0001245130182156622,
      "loss": 2.9422,
      "step": 161026
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.302046537399292,
      "learning_rate": 0.00012450970052184482,
      "loss": 2.8791,
      "step": 161027
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.184394359588623,
      "learning_rate": 0.0001245063828606542,
      "loss": 3.1191,
      "step": 161028
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2694091796875,
      "learning_rate": 0.00012450306523209128,
      "loss": 2.9421,
      "step": 161029
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4334051609039307,
      "learning_rate": 0.0001244997476361564,
      "loss": 3.1306,
      "step": 161030
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2386603355407715,
      "learning_rate": 0.00012449643007285047,
      "loss": 2.7315,
      "step": 161031
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.662936210632324,
      "learning_rate": 0.00012449311254217387,
      "loss": 2.9751,
      "step": 161032
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.72363018989563,
      "learning_rate": 0.00012448979504412738,
      "loss": 2.8654,
      "step": 161033
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.337310791015625,
      "learning_rate": 0.0001244864775787116,
      "loss": 2.9586,
      "step": 161034
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2972514629364014,
      "learning_rate": 0.00012448316014592709,
      "loss": 2.8912,
      "step": 161035
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5493955612182617,
      "learning_rate": 0.00012447984274577433,
      "loss": 3.062,
      "step": 161036
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.697920560836792,
      "learning_rate": 0.0001244765253782542,
      "loss": 3.0414,
      "step": 161037
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.629268169403076,
      "learning_rate": 0.0001244732080433671,
      "loss": 3.0317,
      "step": 161038
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.579012155532837,
      "learning_rate": 0.00012446989074111386,
      "loss": 3.1045,
      "step": 161039
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.159015655517578,
      "learning_rate": 0.00012446657347149498,
      "loss": 2.9877,
      "step": 161040
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.964012384414673,
      "learning_rate": 0.00012446325623451094,
      "loss": 2.7076,
      "step": 161041
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6050829887390137,
      "learning_rate": 0.00012445993903016262,
      "loss": 2.5294,
      "step": 161042
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4352939128875732,
      "learning_rate": 0.0001244566218584505,
      "loss": 2.8915,
      "step": 161043
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6800060272216797,
      "learning_rate": 0.00012445330471937512,
      "loss": 3.0578,
      "step": 161044
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.864614963531494,
      "learning_rate": 0.0001244499876129373,
      "loss": 3.0363,
      "step": 161045
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.679056406021118,
      "learning_rate": 0.0001244466705391375,
      "loss": 2.8192,
      "step": 161046
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1656882762908936,
      "learning_rate": 0.00012444335349797632,
      "loss": 3.0646,
      "step": 161047
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.152367353439331,
      "learning_rate": 0.0001244400364894545,
      "loss": 2.9704,
      "step": 161048
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.300553798675537,
      "learning_rate": 0.0001244367195135726,
      "loss": 2.8777,
      "step": 161049
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.17164945602417,
      "learning_rate": 0.00012443340257033112,
      "loss": 3.0801,
      "step": 161050
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4094581604003906,
      "learning_rate": 0.0001244300856597309,
      "loss": 2.9304,
      "step": 161051
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.288297414779663,
      "learning_rate": 0.00012442676878177245,
      "loss": 2.9657,
      "step": 161052
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.988569498062134,
      "learning_rate": 0.00012442345193645625,
      "loss": 3.1667,
      "step": 161053
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7994472980499268,
      "learning_rate": 0.00012442013512378318,
      "loss": 2.8517,
      "step": 161054
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.648813009262085,
      "learning_rate": 0.0001244168183437536,
      "loss": 3.0035,
      "step": 161055
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8228507041931152,
      "learning_rate": 0.00012441350159636834,
      "loss": 2.7534,
      "step": 161056
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8019728660583496,
      "learning_rate": 0.00012441018488162795,
      "loss": 3.1036,
      "step": 161057
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.357118844985962,
      "learning_rate": 0.00012440686819953302,
      "loss": 2.9735,
      "step": 161058
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6406326293945312,
      "learning_rate": 0.00012440355155008404,
      "loss": 3.0598,
      "step": 161059
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6927425861358643,
      "learning_rate": 0.00012440023493328188,
      "loss": 2.9095,
      "step": 161060
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.145247220993042,
      "learning_rate": 0.0001243969183491269,
      "loss": 3.1566,
      "step": 161061
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.406861305236816,
      "learning_rate": 0.00012439360179761997,
      "loss": 2.9074,
      "step": 161062
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6095621585845947,
      "learning_rate": 0.0001243902852787616,
      "loss": 2.9311,
      "step": 161063
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3384830951690674,
      "learning_rate": 0.00012438696879255238,
      "loss": 2.8284,
      "step": 161064
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.479729413986206,
      "learning_rate": 0.0001243836523389928,
      "loss": 2.8654,
      "step": 161065
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.415626287460327,
      "learning_rate": 0.00012438033591808374,
      "loss": 2.6407,
      "step": 161066
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7910170555114746,
      "learning_rate": 0.00012437701952982557,
      "loss": 2.6107,
      "step": 161067
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1102519035339355,
      "learning_rate": 0.00012437370317421918,
      "loss": 2.8934,
      "step": 161068
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.481934070587158,
      "learning_rate": 0.000124370386851265,
      "loss": 2.7783,
      "step": 161069
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6587576866149902,
      "learning_rate": 0.0001243670705609637,
      "loss": 2.9117,
      "step": 161070
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.90246844291687,
      "learning_rate": 0.00012436375430331575,
      "loss": 3.125,
      "step": 161071
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.728785276412964,
      "learning_rate": 0.000124360438078322,
      "loss": 2.9591,
      "step": 161072
    },
    {
      "epoch": 2.1,
      "grad_norm": 7.414417266845703,
      "learning_rate": 0.00012435712188598288,
      "loss": 2.863,
      "step": 161073
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.604175329208374,
      "learning_rate": 0.00012435380572629914,
      "loss": 3.1661,
      "step": 161074
    },
    {
      "epoch": 2.1,
      "grad_norm": 1.9325388669967651,
      "learning_rate": 0.0001243504895992714,
      "loss": 2.766,
      "step": 161075
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.797863006591797,
      "learning_rate": 0.00012434717350490022,
      "loss": 3.1362,
      "step": 161076
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2297356128692627,
      "learning_rate": 0.00012434385744318605,
      "loss": 3.0995,
      "step": 161077
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7497153282165527,
      "learning_rate": 0.00012434054141412986,
      "loss": 2.9122,
      "step": 161078
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7904293537139893,
      "learning_rate": 0.00012433722541773192,
      "loss": 2.88,
      "step": 161079
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5430591106414795,
      "learning_rate": 0.0001243339094539931,
      "loss": 2.8173,
      "step": 161080
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.0932559967041016,
      "learning_rate": 0.00012433059352291384,
      "loss": 3.0808,
      "step": 161081
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7163610458374023,
      "learning_rate": 0.00012432727762449507,
      "loss": 3.1691,
      "step": 161082
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.763254404067993,
      "learning_rate": 0.00012432396175873694,
      "loss": 2.8375,
      "step": 161083
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.209831476211548,
      "learning_rate": 0.0001243206459256404,
      "loss": 2.9059,
      "step": 161084
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3636884689331055,
      "learning_rate": 0.00012431733012520583,
      "loss": 2.8925,
      "step": 161085
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7529959678649902,
      "learning_rate": 0.00012431401435743415,
      "loss": 2.9734,
      "step": 161086
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9454097747802734,
      "learning_rate": 0.0001243106986223257,
      "loss": 2.9377,
      "step": 161087
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.140444040298462,
      "learning_rate": 0.0001243073829198814,
      "loss": 2.9004,
      "step": 161088
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.165677785873413,
      "learning_rate": 0.00012430406725010148,
      "loss": 3.0636,
      "step": 161089
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.1519010066986084,
      "learning_rate": 0.00012430075161298684,
      "loss": 3.0097,
      "step": 161090
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.225492477416992,
      "learning_rate": 0.0001242974360085379,
      "loss": 3.13,
      "step": 161091
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.309480905532837,
      "learning_rate": 0.00012429412043675548,
      "loss": 2.7741,
      "step": 161092
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5726194381713867,
      "learning_rate": 0.00012429080489763998,
      "loss": 3.0161,
      "step": 161093
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6137568950653076,
      "learning_rate": 0.00012428748939119242,
      "loss": 3.1485,
      "step": 161094
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5890729427337646,
      "learning_rate": 0.00012428417391741287,
      "loss": 2.8016,
      "step": 161095
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2430624961853027,
      "learning_rate": 0.0001242808584763023,
      "loss": 2.9189,
      "step": 161096
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.2940919399261475,
      "learning_rate": 0.00012427754306786116,
      "loss": 2.9327,
      "step": 161097
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.034228801727295,
      "learning_rate": 0.00012427422769209024,
      "loss": 2.9907,
      "step": 161098
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.563387870788574,
      "learning_rate": 0.00012427091234898995,
      "loss": 3.0571,
      "step": 161099
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.882389545440674,
      "learning_rate": 0.00012426759703856123,
      "loss": 3.1757,
      "step": 161100
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.7349181175231934,
      "learning_rate": 0.00012426428176080426,
      "loss": 2.9387,
      "step": 161101
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.518914222717285,
      "learning_rate": 0.00012426096651571998,
      "loss": 3.0994,
      "step": 161102
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.579437732696533,
      "learning_rate": 0.00012425765130330882,
      "loss": 3.1065,
      "step": 161103
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.0813677310943604,
      "learning_rate": 0.0001242543361235716,
      "loss": 2.7614,
      "step": 161104
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.511315107345581,
      "learning_rate": 0.00012425102097650867,
      "loss": 2.7812,
      "step": 161105
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.042639970779419,
      "learning_rate": 0.00012424770586212102,
      "loss": 2.7352,
      "step": 161106
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.320451259613037,
      "learning_rate": 0.00012424439078040882,
      "loss": 2.9857,
      "step": 161107
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.86924409866333,
      "learning_rate": 0.000124241075731373,
      "loss": 2.9145,
      "step": 161108
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.602968454360962,
      "learning_rate": 0.00012423776071501398,
      "loss": 3.2064,
      "step": 161109
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7956366539001465,
      "learning_rate": 0.00012423444573133262,
      "loss": 2.7923,
      "step": 161110
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.605163097381592,
      "learning_rate": 0.00012423113078032926,
      "loss": 2.8918,
      "step": 161111
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.221560478210449,
      "learning_rate": 0.00012422781586200486,
      "loss": 3.0406,
      "step": 161112
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.718392848968506,
      "learning_rate": 0.00012422450097635966,
      "loss": 2.7908,
      "step": 161113
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3738291263580322,
      "learning_rate": 0.0001242211861233945,
      "loss": 2.9614,
      "step": 161114
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6243579387664795,
      "learning_rate": 0.00012421787130310983,
      "loss": 2.8715,
      "step": 161115
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.0354199409484863,
      "learning_rate": 0.0001242145565155065,
      "loss": 3.0587,
      "step": 161116
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.403747797012329,
      "learning_rate": 0.00012421124176058488,
      "loss": 2.7705,
      "step": 161117
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2605791091918945,
      "learning_rate": 0.00012420792703834584,
      "loss": 2.9903,
      "step": 161118
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2949395179748535,
      "learning_rate": 0.00012420461234878986,
      "loss": 2.8825,
      "step": 161119
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.574754476547241,
      "learning_rate": 0.00012420129769191756,
      "loss": 2.9996,
      "step": 161120
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2846295833587646,
      "learning_rate": 0.00012419798306772942,
      "loss": 2.9886,
      "step": 161121
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.294412851333618,
      "learning_rate": 0.00012419466847622633,
      "loss": 2.9198,
      "step": 161122
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1964328289031982,
      "learning_rate": 0.00012419135391740865,
      "loss": 3.273,
      "step": 161123
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3850953578948975,
      "learning_rate": 0.00012418803939127725,
      "loss": 2.8605,
      "step": 161124
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.6256017684936523,
      "learning_rate": 0.0001241847248978326,
      "loss": 3.0109,
      "step": 161125
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2374603748321533,
      "learning_rate": 0.0001241814104370753,
      "loss": 2.9786,
      "step": 161126
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.834994077682495,
      "learning_rate": 0.0001241780960090059,
      "loss": 2.8082,
      "step": 161127
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8616812229156494,
      "learning_rate": 0.00012417478161362526,
      "loss": 3.2477,
      "step": 161128
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5714075565338135,
      "learning_rate": 0.00012417146725093368,
      "loss": 2.9538,
      "step": 161129
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8020081520080566,
      "learning_rate": 0.00012416815292093207,
      "loss": 2.953,
      "step": 161130
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.357595205307007,
      "learning_rate": 0.00012416483862362094,
      "loss": 2.8983,
      "step": 161131
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.495042324066162,
      "learning_rate": 0.00012416152435900077,
      "loss": 3.083,
      "step": 161132
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.278635025024414,
      "learning_rate": 0.0001241582101270724,
      "loss": 2.7926,
      "step": 161133
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5548276901245117,
      "learning_rate": 0.0001241548959278363,
      "loss": 2.9412,
      "step": 161134
    },
    {
      "epoch": 2.1,
      "grad_norm": 1.9887115955352783,
      "learning_rate": 0.00012415158176129308,
      "loss": 2.9791,
      "step": 161135
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.70920991897583,
      "learning_rate": 0.00012414826762744346,
      "loss": 3.298,
      "step": 161136
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.18782377243042,
      "learning_rate": 0.00012414495352628803,
      "loss": 2.9001,
      "step": 161137
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.463197708129883,
      "learning_rate": 0.00012414163945782724,
      "loss": 2.9955,
      "step": 161138
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2472355365753174,
      "learning_rate": 0.00012413832542206195,
      "loss": 2.7842,
      "step": 161139
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.154871702194214,
      "learning_rate": 0.00012413501141899256,
      "loss": 3.1978,
      "step": 161140
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6099307537078857,
      "learning_rate": 0.00012413169744861992,
      "loss": 2.9223,
      "step": 161141
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2347476482391357,
      "learning_rate": 0.00012412838351094445,
      "loss": 3.1411,
      "step": 161142
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.884101867675781,
      "learning_rate": 0.0001241250696059669,
      "loss": 3.0517,
      "step": 161143
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.7335336208343506,
      "learning_rate": 0.00012412175573368772,
      "loss": 2.9741,
      "step": 161144
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.157649517059326,
      "learning_rate": 0.0001241184418941077,
      "loss": 2.8903,
      "step": 161145
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.903904914855957,
      "learning_rate": 0.00012411512808722727,
      "loss": 2.907,
      "step": 161146
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1334431171417236,
      "learning_rate": 0.00012411181431304727,
      "loss": 2.801,
      "step": 161147
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.34637451171875,
      "learning_rate": 0.00012410850057156822,
      "loss": 3.1177,
      "step": 161148
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5064802169799805,
      "learning_rate": 0.0001241051868627907,
      "loss": 2.8616,
      "step": 161149
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5147511959075928,
      "learning_rate": 0.00012410187318671526,
      "loss": 3.0811,
      "step": 161150
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.18275785446167,
      "learning_rate": 0.00012409855954334268,
      "loss": 3.0493,
      "step": 161151
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.460887908935547,
      "learning_rate": 0.0001240952459326734,
      "loss": 2.9505,
      "step": 161152
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.041099786758423,
      "learning_rate": 0.00012409193235470825,
      "loss": 3.0723,
      "step": 161153
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5904836654663086,
      "learning_rate": 0.00012408861880944765,
      "loss": 2.7906,
      "step": 161154
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4827606678009033,
      "learning_rate": 0.0001240853052968925,
      "loss": 2.875,
      "step": 161155
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2982397079467773,
      "learning_rate": 0.00012408199181704294,
      "loss": 3.2259,
      "step": 161156
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.900904417037964,
      "learning_rate": 0.0001240786783699,
      "loss": 3.2508,
      "step": 161157
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4829323291778564,
      "learning_rate": 0.00012407536495546406,
      "loss": 2.833,
      "step": 161158
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.1666605472564697,
      "learning_rate": 0.00012407205157373595,
      "loss": 3.1642,
      "step": 161159
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6194565296173096,
      "learning_rate": 0.00012406873822471602,
      "loss": 2.6297,
      "step": 161160
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7880592346191406,
      "learning_rate": 0.00012406542490840528,
      "loss": 2.7994,
      "step": 161161
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.380816698074341,
      "learning_rate": 0.00012406211162480386,
      "loss": 3.115,
      "step": 161162
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.0564630031585693,
      "learning_rate": 0.00012405879837391274,
      "loss": 3.2102,
      "step": 161163
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.3423502445220947,
      "learning_rate": 0.0001240554851557323,
      "loss": 2.7507,
      "step": 161164
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.4395041465759277,
      "learning_rate": 0.0001240521719702634,
      "loss": 3.1076,
      "step": 161165
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.167208194732666,
      "learning_rate": 0.00012404885881750634,
      "loss": 3.0942,
      "step": 161166
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.642392158508301,
      "learning_rate": 0.0001240455456974622,
      "loss": 3.2064,
      "step": 161167
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.5707805156707764,
      "learning_rate": 0.0001240422326101311,
      "loss": 2.6619,
      "step": 161168
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5495760440826416,
      "learning_rate": 0.00012403891955551394,
      "loss": 3.0898,
      "step": 161169
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5393149852752686,
      "learning_rate": 0.0001240356065336112,
      "loss": 2.8327,
      "step": 161170
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2452893257141113,
      "learning_rate": 0.00012403229354442363,
      "loss": 2.848,
      "step": 161171
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.320211887359619,
      "learning_rate": 0.00012402898058795167,
      "loss": 2.9381,
      "step": 161172
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.438115358352661,
      "learning_rate": 0.00012402566766419627,
      "loss": 3.1397,
      "step": 161173
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2404086589813232,
      "learning_rate": 0.00012402235477315763,
      "loss": 3.0288,
      "step": 161174
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2273788452148438,
      "learning_rate": 0.00012401904191483665,
      "loss": 3.1303,
      "step": 161175
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5922586917877197,
      "learning_rate": 0.00012401572908923373,
      "loss": 2.9634,
      "step": 161176
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1884896755218506,
      "learning_rate": 0.00012401241629634972,
      "loss": 3.0626,
      "step": 161177
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3286895751953125,
      "learning_rate": 0.000124009103536185,
      "loss": 2.9386,
      "step": 161178
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.374119758605957,
      "learning_rate": 0.00012400579080874057,
      "loss": 2.8459,
      "step": 161179
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.226271867752075,
      "learning_rate": 0.00012400247811401654,
      "loss": 3.2236,
      "step": 161180
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.724273204803467,
      "learning_rate": 0.0001239991654520139,
      "loss": 2.7892,
      "step": 161181
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.0547292232513428,
      "learning_rate": 0.000123995852822733,
      "loss": 2.9445,
      "step": 161182
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1511430740356445,
      "learning_rate": 0.00012399254022617474,
      "loss": 2.979,
      "step": 161183
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.905341625213623,
      "learning_rate": 0.00012398922766233944,
      "loss": 2.9874,
      "step": 161184
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3879525661468506,
      "learning_rate": 0.00012398591513122808,
      "loss": 3.2417,
      "step": 161185
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.960844039916992,
      "learning_rate": 0.00012398260263284087,
      "loss": 2.9943,
      "step": 161186
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.265580177307129,
      "learning_rate": 0.00012397929016717872,
      "loss": 2.8878,
      "step": 161187
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8211090564727783,
      "learning_rate": 0.00012397597773424203,
      "loss": 3.0213,
      "step": 161188
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.896223545074463,
      "learning_rate": 0.00012397266533403163,
      "loss": 3.2497,
      "step": 161189
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.5648951530456543,
      "learning_rate": 0.00012396935296654795,
      "loss": 3.0385,
      "step": 161190
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.840628147125244,
      "learning_rate": 0.0001239660406317919,
      "loss": 3.0155,
      "step": 161191
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3932242393493652,
      "learning_rate": 0.00012396272832976364,
      "loss": 2.9006,
      "step": 161192
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.524352788925171,
      "learning_rate": 0.00012395941606046417,
      "loss": 2.879,
      "step": 161193
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.137568950653076,
      "learning_rate": 0.00012395610382389384,
      "loss": 3.361,
      "step": 161194
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.370492935180664,
      "learning_rate": 0.0001239527916200535,
      "loss": 2.7217,
      "step": 161195
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4425363540649414,
      "learning_rate": 0.0001239494794489436,
      "loss": 2.8832,
      "step": 161196
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7515523433685303,
      "learning_rate": 0.00012394616731056497,
      "loss": 2.8975,
      "step": 161197
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.220947027206421,
      "learning_rate": 0.00012394285520491788,
      "loss": 2.9193,
      "step": 161198
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5841715335845947,
      "learning_rate": 0.00012393954313200326,
      "loss": 2.8838,
      "step": 161199
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6951372623443604,
      "learning_rate": 0.0001239362310918215,
      "loss": 3.0105,
      "step": 161200
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.483081102371216,
      "learning_rate": 0.00012393291908437338,
      "loss": 2.862,
      "step": 161201
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.117044687271118,
      "learning_rate": 0.00012392960710965936,
      "loss": 3.0937,
      "step": 161202
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.227783203125,
      "learning_rate": 0.00012392629516768026,
      "loss": 2.8831,
      "step": 161203
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.280510425567627,
      "learning_rate": 0.0001239229832584366,
      "loss": 2.8215,
      "step": 161204
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.463646650314331,
      "learning_rate": 0.00012391967138192898,
      "loss": 2.9756,
      "step": 161205
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.929035186767578,
      "learning_rate": 0.0001239163595381579,
      "loss": 2.8783,
      "step": 161206
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9361040592193604,
      "learning_rate": 0.0001239130477271242,
      "loss": 2.9763,
      "step": 161207
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.697187900543213,
      "learning_rate": 0.0001239097359488283,
      "loss": 2.7125,
      "step": 161208
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.655911684036255,
      "learning_rate": 0.00012390642420327097,
      "loss": 2.9171,
      "step": 161209
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.467755079269409,
      "learning_rate": 0.0001239031124904528,
      "loss": 3.1711,
      "step": 161210
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.577120542526245,
      "learning_rate": 0.00012389980081037433,
      "loss": 2.6345,
      "step": 161211
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6899213790893555,
      "learning_rate": 0.00012389648916303616,
      "loss": 3.1087,
      "step": 161212
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4954559803009033,
      "learning_rate": 0.000123893177548439,
      "loss": 2.9075,
      "step": 161213
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.1577677726745605,
      "learning_rate": 0.00012388986596658337,
      "loss": 3.0326,
      "step": 161214
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.6251707077026367,
      "learning_rate": 0.00012388655441747,
      "loss": 2.8582,
      "step": 161215
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6636385917663574,
      "learning_rate": 0.0001238832429010995,
      "loss": 2.7164,
      "step": 161216
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.0367865562438965,
      "learning_rate": 0.00012387993141747228,
      "loss": 3.1149,
      "step": 161217
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6905295848846436,
      "learning_rate": 0.00012387661996658922,
      "loss": 2.8811,
      "step": 161218
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8013882637023926,
      "learning_rate": 0.0001238733085484508,
      "loss": 3.1431,
      "step": 161219
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7367188930511475,
      "learning_rate": 0.00012386999716305756,
      "loss": 2.9511,
      "step": 161220
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.164058208465576,
      "learning_rate": 0.00012386668581041034,
      "loss": 2.7415,
      "step": 161221
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6726362705230713,
      "learning_rate": 0.00012386337449050964,
      "loss": 3.1183,
      "step": 161222
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6876180171966553,
      "learning_rate": 0.00012386006320335593,
      "loss": 3.1002,
      "step": 161223
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.6523597240448,
      "learning_rate": 0.00012385675194895006,
      "loss": 2.935,
      "step": 161224
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4087562561035156,
      "learning_rate": 0.0001238534407272924,
      "loss": 2.6946,
      "step": 161225
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5822274684906006,
      "learning_rate": 0.0001238501295383839,
      "loss": 3.1112,
      "step": 161226
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.255570888519287,
      "learning_rate": 0.00012384681838222494,
      "loss": 2.9556,
      "step": 161227
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.001824378967285,
      "learning_rate": 0.00012384350725881622,
      "loss": 2.9173,
      "step": 161228
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.477015972137451,
      "learning_rate": 0.00012384019616815817,
      "loss": 3.0194,
      "step": 161229
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.1348371505737305,
      "learning_rate": 0.00012383688511025166,
      "loss": 2.7662,
      "step": 161230
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.986708402633667,
      "learning_rate": 0.0001238335740850971,
      "loss": 2.9071,
      "step": 161231
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5684237480163574,
      "learning_rate": 0.0001238302630926953,
      "loss": 2.5448,
      "step": 161232
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.561751127243042,
      "learning_rate": 0.0001238269521330468,
      "loss": 3.0831,
      "step": 161233
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6864681243896484,
      "learning_rate": 0.0001238236412061522,
      "loss": 2.8211,
      "step": 161234
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1877758502960205,
      "learning_rate": 0.00012382033031201197,
      "loss": 2.7077,
      "step": 161235
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4014456272125244,
      "learning_rate": 0.000123817019450627,
      "loss": 3.073,
      "step": 161236
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.469870090484619,
      "learning_rate": 0.00012381370862199768,
      "loss": 2.9814,
      "step": 161237
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.321296453475952,
      "learning_rate": 0.0001238103978261248,
      "loss": 2.7724,
      "step": 161238
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.043032169342041,
      "learning_rate": 0.0001238070870630088,
      "loss": 2.9402,
      "step": 161239
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4725160598754883,
      "learning_rate": 0.00012380377633265058,
      "loss": 2.8932,
      "step": 161240
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.57707142829895,
      "learning_rate": 0.00012380046563505035,
      "loss": 2.8405,
      "step": 161241
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6420116424560547,
      "learning_rate": 0.0001237971549702091,
      "loss": 3.0777,
      "step": 161242
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.1582858562469482,
      "learning_rate": 0.0001237938443381271,
      "loss": 3.0197,
      "step": 161243
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.561966896057129,
      "learning_rate": 0.0001237905337388053,
      "loss": 3.2309,
      "step": 161244
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.284877061843872,
      "learning_rate": 0.00012378722317224404,
      "loss": 2.9434,
      "step": 161245
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.718226671218872,
      "learning_rate": 0.0001237839126384443,
      "loss": 2.8524,
      "step": 161246
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6501736640930176,
      "learning_rate": 0.00012378060213740624,
      "loss": 2.8504,
      "step": 161247
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6517958641052246,
      "learning_rate": 0.00012377729166913081,
      "loss": 2.7528,
      "step": 161248
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.366898775100708,
      "learning_rate": 0.00012377398123361835,
      "loss": 3.0311,
      "step": 161249
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.204515218734741,
      "learning_rate": 0.0001237706708308698,
      "loss": 2.9147,
      "step": 161250
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9982447624206543,
      "learning_rate": 0.00012376736046088547,
      "loss": 2.9983,
      "step": 161251
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6429402828216553,
      "learning_rate": 0.00012376405012366632,
      "loss": 2.932,
      "step": 161252
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.424523115158081,
      "learning_rate": 0.00012376073981921259,
      "loss": 2.9021,
      "step": 161253
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9208168983459473,
      "learning_rate": 0.0001237574295475251,
      "loss": 2.7923,
      "step": 161254
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1768734455108643,
      "learning_rate": 0.00012375411930860437,
      "loss": 2.8542,
      "step": 161255
    },
    {
      "epoch": 2.1,
      "grad_norm": 5.392920970916748,
      "learning_rate": 0.0001237508091024512,
      "loss": 2.9078,
      "step": 161256
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.11969256401062,
      "learning_rate": 0.00012374749892906593,
      "loss": 2.9846,
      "step": 161257
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.255763292312622,
      "learning_rate": 0.0001237441887884496,
      "loss": 2.957,
      "step": 161258
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.52003812789917,
      "learning_rate": 0.00012374087868060227,
      "loss": 3.0575,
      "step": 161259
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.588841199874878,
      "learning_rate": 0.000123737568605525,
      "loss": 2.9255,
      "step": 161260
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.061316967010498,
      "learning_rate": 0.00012373425856321808,
      "loss": 2.866,
      "step": 161261
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.358110189437866,
      "learning_rate": 0.00012373094855368244,
      "loss": 3.2785,
      "step": 161262
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.117431640625,
      "learning_rate": 0.00012372763857691844,
      "loss": 2.8917,
      "step": 161263
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.975491762161255,
      "learning_rate": 0.000123724328632927,
      "loss": 3.0406,
      "step": 161264
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.158160924911499,
      "learning_rate": 0.0001237210187217083,
      "loss": 2.6364,
      "step": 161265
    },
    {
      "epoch": 2.1,
      "grad_norm": 6.092525005340576,
      "learning_rate": 0.00012371770884326332,
      "loss": 2.754,
      "step": 161266
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.947597980499268,
      "learning_rate": 0.00012371439899759245,
      "loss": 2.9322,
      "step": 161267
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.3511359691619873,
      "learning_rate": 0.00012371108918469647,
      "loss": 3.005,
      "step": 161268
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.185455322265625,
      "learning_rate": 0.00012370777940457585,
      "loss": 2.997,
      "step": 161269
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.373044967651367,
      "learning_rate": 0.0001237044696572315,
      "loss": 2.8559,
      "step": 161270
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6364288330078125,
      "learning_rate": 0.00012370115994266357,
      "loss": 3.0201,
      "step": 161271
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.336001873016357,
      "learning_rate": 0.00012369785026087308,
      "loss": 2.9016,
      "step": 161272
    },
    {
      "epoch": 2.1,
      "grad_norm": 5.209418296813965,
      "learning_rate": 0.00012369454061186033,
      "loss": 2.9918,
      "step": 161273
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.415119171142578,
      "learning_rate": 0.0001236912309956262,
      "loss": 2.9487,
      "step": 161274
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.1237199306488037,
      "learning_rate": 0.00012368792141217113,
      "loss": 3.0753,
      "step": 161275
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.154994487762451,
      "learning_rate": 0.0001236846118614959,
      "loss": 3.0385,
      "step": 161276
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.5973474979400635,
      "learning_rate": 0.00012368130234360103,
      "loss": 2.911,
      "step": 161277
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.78391170501709,
      "learning_rate": 0.00012367799285848712,
      "loss": 2.7877,
      "step": 161278
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.381969451904297,
      "learning_rate": 0.0001236746834061547,
      "loss": 2.9451,
      "step": 161279
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.71162486076355,
      "learning_rate": 0.00012367137398660457,
      "loss": 2.9533,
      "step": 161280
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.146703004837036,
      "learning_rate": 0.00012366806459983718,
      "loss": 2.7819,
      "step": 161281
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.0987632274627686,
      "learning_rate": 0.00012366475524585335,
      "loss": 3.0766,
      "step": 161282
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.816683053970337,
      "learning_rate": 0.00012366144592465355,
      "loss": 2.9933,
      "step": 161283
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4159605503082275,
      "learning_rate": 0.0001236581366362384,
      "loss": 2.9922,
      "step": 161284
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.349745035171509,
      "learning_rate": 0.00012365482738060842,
      "loss": 3.1556,
      "step": 161285
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.219101667404175,
      "learning_rate": 0.00012365151815776446,
      "loss": 3.0758,
      "step": 161286
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1268625259399414,
      "learning_rate": 0.00012364820896770693,
      "loss": 2.8395,
      "step": 161287
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3845932483673096,
      "learning_rate": 0.0001236448998104366,
      "loss": 3.1135,
      "step": 161288
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.810685634613037,
      "learning_rate": 0.00012364159068595402,
      "loss": 2.6758,
      "step": 161289
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1201884746551514,
      "learning_rate": 0.0001236382815942598,
      "loss": 3.2689,
      "step": 161290
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.543808937072754,
      "learning_rate": 0.00012363497253535443,
      "loss": 3.058,
      "step": 161291
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3695971965789795,
      "learning_rate": 0.00012363166350923876,
      "loss": 3.0114,
      "step": 161292
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6646180152893066,
      "learning_rate": 0.0001236283545159132,
      "loss": 2.9248,
      "step": 161293
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4116299152374268,
      "learning_rate": 0.00012362504555537857,
      "loss": 3.1138,
      "step": 161294
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4671802520751953,
      "learning_rate": 0.00012362173662763536,
      "loss": 3.1029,
      "step": 161295
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.353346347808838,
      "learning_rate": 0.0001236184277326842,
      "loss": 2.7542,
      "step": 161296
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.1926238536834717,
      "learning_rate": 0.00012361511887052558,
      "loss": 2.9342,
      "step": 161297
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.652026891708374,
      "learning_rate": 0.00012361181004116035,
      "loss": 2.8767,
      "step": 161298
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.1297624111175537,
      "learning_rate": 0.00012360850124458893,
      "loss": 2.714,
      "step": 161299
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.575087547302246,
      "learning_rate": 0.00012360519248081213,
      "loss": 2.9061,
      "step": 161300
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.5636160373687744,
      "learning_rate": 0.00012360188374983043,
      "loss": 3.251,
      "step": 161301
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.1522717475891113,
      "learning_rate": 0.0001235985750516444,
      "loss": 2.8819,
      "step": 161302
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.094738006591797,
      "learning_rate": 0.0001235952663862548,
      "loss": 3.0201,
      "step": 161303
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9092464447021484,
      "learning_rate": 0.00012359195775366215,
      "loss": 2.8515,
      "step": 161304
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5990989208221436,
      "learning_rate": 0.00012358864915386702,
      "loss": 3.029,
      "step": 161305
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9820711612701416,
      "learning_rate": 0.00012358534058687018,
      "loss": 3.1024,
      "step": 161306
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.29236364364624,
      "learning_rate": 0.00012358203205267213,
      "loss": 2.7676,
      "step": 161307
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.0199532508850098,
      "learning_rate": 0.00012357872355127345,
      "loss": 2.9912,
      "step": 161308
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6043639183044434,
      "learning_rate": 0.0001235754150826749,
      "loss": 2.8876,
      "step": 161309
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.246997833251953,
      "learning_rate": 0.000123572106646877,
      "loss": 2.9308,
      "step": 161310
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.4878392219543457,
      "learning_rate": 0.0001235687982438803,
      "loss": 2.9342,
      "step": 161311
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.86296010017395,
      "learning_rate": 0.00012356548987368555,
      "loss": 3.1965,
      "step": 161312
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.837938070297241,
      "learning_rate": 0.00012356218153629334,
      "loss": 3.0591,
      "step": 161313
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5810816287994385,
      "learning_rate": 0.00012355887323170413,
      "loss": 3.0934,
      "step": 161314
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.97367787361145,
      "learning_rate": 0.0001235555649599188,
      "loss": 2.9025,
      "step": 161315
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.9966537952423096,
      "learning_rate": 0.00012355225672093767,
      "loss": 3.1156,
      "step": 161316
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1079280376434326,
      "learning_rate": 0.00012354894851476162,
      "loss": 3.0299,
      "step": 161317
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.255077838897705,
      "learning_rate": 0.00012354564034139118,
      "loss": 2.9453,
      "step": 161318
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.46374249458313,
      "learning_rate": 0.0001235423322008269,
      "loss": 3.2528,
      "step": 161319
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3776955604553223,
      "learning_rate": 0.00012353902409306935,
      "loss": 3.0491,
      "step": 161320
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.157506227493286,
      "learning_rate": 0.00012353571601811934,
      "loss": 2.9405,
      "step": 161321
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4953579902648926,
      "learning_rate": 0.00012353240797597723,
      "loss": 2.8788,
      "step": 161322
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.736398696899414,
      "learning_rate": 0.0001235290999666439,
      "loss": 2.8769,
      "step": 161323
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.0579311847686768,
      "learning_rate": 0.00012352579199011973,
      "loss": 3.0535,
      "step": 161324
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3058605194091797,
      "learning_rate": 0.00012352248404640567,
      "loss": 3.1926,
      "step": 161325
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.757558822631836,
      "learning_rate": 0.00012351917613550188,
      "loss": 2.8583,
      "step": 161326
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.883946657180786,
      "learning_rate": 0.00012351586825740934,
      "loss": 2.8774,
      "step": 161327
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.6880478858947754,
      "learning_rate": 0.00012351256041212843,
      "loss": 2.8833,
      "step": 161328
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6631274223327637,
      "learning_rate": 0.00012350925259965997,
      "loss": 3.0774,
      "step": 161329
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4973554611206055,
      "learning_rate": 0.00012350594482000432,
      "loss": 2.9744,
      "step": 161330
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.646420955657959,
      "learning_rate": 0.00012350263707316248,
      "loss": 2.813,
      "step": 161331
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.0276904106140137,
      "learning_rate": 0.00012349932935913464,
      "loss": 2.9851,
      "step": 161332
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.243363857269287,
      "learning_rate": 0.0001234960216779217,
      "loss": 2.9066,
      "step": 161333
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.6155145168304443,
      "learning_rate": 0.00012349271402952407,
      "loss": 2.7482,
      "step": 161334
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6043155193328857,
      "learning_rate": 0.0001234894064139426,
      "loss": 2.9114,
      "step": 161335
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.294342279434204,
      "learning_rate": 0.00012348609883117766,
      "loss": 2.9541,
      "step": 161336
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3608877658843994,
      "learning_rate": 0.0001234827912812302,
      "loss": 2.8456,
      "step": 161337
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2701029777526855,
      "learning_rate": 0.0001234794837641004,
      "loss": 2.6552,
      "step": 161338
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.009629964828491,
      "learning_rate": 0.0001234761762797892,
      "loss": 2.6841,
      "step": 161339
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.874067544937134,
      "learning_rate": 0.00012347286882829703,
      "loss": 2.8273,
      "step": 161340
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.051574230194092,
      "learning_rate": 0.0001234695614096247,
      "loss": 3.0135,
      "step": 161341
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.696993112564087,
      "learning_rate": 0.00012346625402377256,
      "loss": 3.0715,
      "step": 161342
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.684753179550171,
      "learning_rate": 0.00012346294667074152,
      "loss": 2.8651,
      "step": 161343
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.211796283721924,
      "learning_rate": 0.00012345963935053207,
      "loss": 2.876,
      "step": 161344
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7229185104370117,
      "learning_rate": 0.00012345633206314473,
      "loss": 3.1712,
      "step": 161345
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.113431453704834,
      "learning_rate": 0.00012345302480858017,
      "loss": 3.09,
      "step": 161346
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4559319019317627,
      "learning_rate": 0.00012344971758683905,
      "loss": 2.9158,
      "step": 161347
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1156234741210938,
      "learning_rate": 0.0001234464103979219,
      "loss": 3.1617,
      "step": 161348
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.292057752609253,
      "learning_rate": 0.0001234431032418295,
      "loss": 2.8778,
      "step": 161349
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7899022102355957,
      "learning_rate": 0.00012343979611856236,
      "loss": 2.6558,
      "step": 161350
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3966591358184814,
      "learning_rate": 0.0001234364890281211,
      "loss": 3.0649,
      "step": 161351
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.854726791381836,
      "learning_rate": 0.0001234331819705062,
      "loss": 2.886,
      "step": 161352
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9546923637390137,
      "learning_rate": 0.0001234298749457185,
      "loss": 3.329,
      "step": 161353
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.381678104400635,
      "learning_rate": 0.00012342656795375845,
      "loss": 3.0203,
      "step": 161354
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3023810386657715,
      "learning_rate": 0.00012342326099462682,
      "loss": 3.0222,
      "step": 161355
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.462540864944458,
      "learning_rate": 0.00012341995406832415,
      "loss": 3.0241,
      "step": 161356
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3033766746520996,
      "learning_rate": 0.00012341664717485103,
      "loss": 2.8371,
      "step": 161357
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.02834939956665,
      "learning_rate": 0.000123413340314208,
      "loss": 2.9879,
      "step": 161358
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.691570997238159,
      "learning_rate": 0.00012341003348639585,
      "loss": 3.096,
      "step": 161359
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4104433059692383,
      "learning_rate": 0.000123406726691415,
      "loss": 2.9957,
      "step": 161360
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.818678140640259,
      "learning_rate": 0.0001234034199292663,
      "loss": 2.8714,
      "step": 161361
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.951920509338379,
      "learning_rate": 0.00012340011319995025,
      "loss": 3.1403,
      "step": 161362
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.817718505859375,
      "learning_rate": 0.0001233968065034674,
      "loss": 2.9815,
      "step": 161363
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.230499029159546,
      "learning_rate": 0.00012339349983981837,
      "loss": 3.1577,
      "step": 161364
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4338929653167725,
      "learning_rate": 0.00012339019320900389,
      "loss": 2.9439,
      "step": 161365
    },
    {
      "epoch": 2.1,
      "grad_norm": 5.079452991485596,
      "learning_rate": 0.00012338688661102444,
      "loss": 2.7337,
      "step": 161366
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.375249862670898,
      "learning_rate": 0.00012338358004588076,
      "loss": 3.2055,
      "step": 161367
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.180262088775635,
      "learning_rate": 0.00012338027351357342,
      "loss": 2.8687,
      "step": 161368
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.892540454864502,
      "learning_rate": 0.00012337696701410304,
      "loss": 2.8247,
      "step": 161369
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.157144069671631,
      "learning_rate": 0.0001233736605474701,
      "loss": 2.7607,
      "step": 161370
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.162542819976807,
      "learning_rate": 0.0001233703541136754,
      "loss": 3.0711,
      "step": 161371
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3918967247009277,
      "learning_rate": 0.00012336704771271942,
      "loss": 3.1362,
      "step": 161372
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.224022626876831,
      "learning_rate": 0.00012336374134460295,
      "loss": 2.9092,
      "step": 161373
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4220008850097656,
      "learning_rate": 0.00012336043500932648,
      "loss": 3.0065,
      "step": 161374
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.2341349124908447,
      "learning_rate": 0.00012335712870689065,
      "loss": 2.7544,
      "step": 161375
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.182922601699829,
      "learning_rate": 0.00012335382243729596,
      "loss": 3.1362,
      "step": 161376
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.848870277404785,
      "learning_rate": 0.0001233505162005432,
      "loss": 2.9822,
      "step": 161377
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.167128086090088,
      "learning_rate": 0.00012334720999663285,
      "loss": 2.9975,
      "step": 161378
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6741065979003906,
      "learning_rate": 0.00012334390382556567,
      "loss": 2.7205,
      "step": 161379
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.607893705368042,
      "learning_rate": 0.0001233405976873422,
      "loss": 3.1842,
      "step": 161380
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2088441848754883,
      "learning_rate": 0.00012333729158196306,
      "loss": 2.7828,
      "step": 161381
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7953948974609375,
      "learning_rate": 0.00012333398550942872,
      "loss": 2.9141,
      "step": 161382
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2131810188293457,
      "learning_rate": 0.00012333067946974008,
      "loss": 3.0139,
      "step": 161383
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6856112480163574,
      "learning_rate": 0.00012332737346289744,
      "loss": 3.0714,
      "step": 161384
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8046929836273193,
      "learning_rate": 0.0001233240674889017,
      "loss": 2.9061,
      "step": 161385
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.9586355686187744,
      "learning_rate": 0.00012332076154775336,
      "loss": 2.7882,
      "step": 161386
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.918761968612671,
      "learning_rate": 0.0001233174556394529,
      "loss": 2.8553,
      "step": 161387
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.0118157863616943,
      "learning_rate": 0.0001233141497640012,
      "loss": 2.9183,
      "step": 161388
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.376269817352295,
      "learning_rate": 0.0001233108439213987,
      "loss": 3.044,
      "step": 161389
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3062353134155273,
      "learning_rate": 0.00012330753811164594,
      "loss": 2.7998,
      "step": 161390
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.5963704586029053,
      "learning_rate": 0.0001233042323347438,
      "loss": 2.8024,
      "step": 161391
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7854959964752197,
      "learning_rate": 0.0001233009265906927,
      "loss": 2.9925,
      "step": 161392
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3585262298583984,
      "learning_rate": 0.00012329762087949319,
      "loss": 3.0253,
      "step": 161393
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.731687545776367,
      "learning_rate": 0.0001232943152011461,
      "loss": 2.8794,
      "step": 161394
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3211803436279297,
      "learning_rate": 0.00012329100955565193,
      "loss": 2.974,
      "step": 161395
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9565391540527344,
      "learning_rate": 0.0001232877039430112,
      "loss": 3.1371,
      "step": 161396
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.3829033374786377,
      "learning_rate": 0.00012328439836322474,
      "loss": 2.9879,
      "step": 161397
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.844634771347046,
      "learning_rate": 0.00012328109281629302,
      "loss": 2.9555,
      "step": 161398
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2592906951904297,
      "learning_rate": 0.00012327778730221656,
      "loss": 2.9825,
      "step": 161399
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4365792274475098,
      "learning_rate": 0.00012327448182099623,
      "loss": 2.7922,
      "step": 161400
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.519192218780518,
      "learning_rate": 0.00012327117637263242,
      "loss": 2.9538,
      "step": 161401
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.276226043701172,
      "learning_rate": 0.0001232678709571259,
      "loss": 2.9487,
      "step": 161402
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7439589500427246,
      "learning_rate": 0.00012326456557447724,
      "loss": 3.0363,
      "step": 161403
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2739098072052,
      "learning_rate": 0.000123261260224687,
      "loss": 2.9991,
      "step": 161404
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.589449644088745,
      "learning_rate": 0.00012325795490775578,
      "loss": 2.7523,
      "step": 161405
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.34393572807312,
      "learning_rate": 0.00012325464962368432,
      "loss": 2.9139,
      "step": 161406
    },
    {
      "epoch": 2.1,
      "grad_norm": 5.581220626831055,
      "learning_rate": 0.00012325134437247306,
      "loss": 2.9751,
      "step": 161407
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.784122943878174,
      "learning_rate": 0.0001232480391541228,
      "loss": 2.9951,
      "step": 161408
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.620499610900879,
      "learning_rate": 0.00012324473396863408,
      "loss": 3.1812,
      "step": 161409
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.861640453338623,
      "learning_rate": 0.00012324142881600737,
      "loss": 3.3579,
      "step": 161410
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.354928731918335,
      "learning_rate": 0.00012323812369624353,
      "loss": 2.7751,
      "step": 161411
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.297312021255493,
      "learning_rate": 0.00012323481860934308,
      "loss": 3.0308,
      "step": 161412
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.2015206813812256,
      "learning_rate": 0.0001232315135553065,
      "loss": 3.0085,
      "step": 161413
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8116142749786377,
      "learning_rate": 0.00012322820853413461,
      "loss": 3.1546,
      "step": 161414
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.649144172668457,
      "learning_rate": 0.00012322490354582782,
      "loss": 2.8935,
      "step": 161415
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.322781562805176,
      "learning_rate": 0.000123221598590387,
      "loss": 3.1733,
      "step": 161416
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.972508430480957,
      "learning_rate": 0.0001232182936678126,
      "loss": 2.8248,
      "step": 161417
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.817762613296509,
      "learning_rate": 0.00012321498877810525,
      "loss": 2.6688,
      "step": 161418
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.0615808963775635,
      "learning_rate": 0.00012321168392126546,
      "loss": 3.1655,
      "step": 161419
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.2461843490600586,
      "learning_rate": 0.0001232083790972941,
      "loss": 2.6259,
      "step": 161420
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.885878562927246,
      "learning_rate": 0.0001232050743061915,
      "loss": 3.0575,
      "step": 161421
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.593595027923584,
      "learning_rate": 0.00012320176954795853,
      "loss": 2.6631,
      "step": 161422
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.31666898727417,
      "learning_rate": 0.00012319846482259568,
      "loss": 2.8472,
      "step": 161423
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.220719337463379,
      "learning_rate": 0.00012319516013010358,
      "loss": 3.0165,
      "step": 161424
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.4998974800109863,
      "learning_rate": 0.00012319185547048274,
      "loss": 3.0485,
      "step": 161425
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4288439750671387,
      "learning_rate": 0.00012318855084373395,
      "loss": 2.8767,
      "step": 161426
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.0097553730010986,
      "learning_rate": 0.00012318524624985768,
      "loss": 2.9794,
      "step": 161427
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.190230369567871,
      "learning_rate": 0.00012318194168885467,
      "loss": 2.8623,
      "step": 161428
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.560443878173828,
      "learning_rate": 0.0001231786371607255,
      "loss": 3.0386,
      "step": 161429
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.965282440185547,
      "learning_rate": 0.00012317533266547077,
      "loss": 3.0062,
      "step": 161430
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8919222354888916,
      "learning_rate": 0.000123172028203091,
      "loss": 2.7788,
      "step": 161431
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.204637289047241,
      "learning_rate": 0.00012316872377358693,
      "loss": 2.8918,
      "step": 161432
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6811301708221436,
      "learning_rate": 0.00012316541937695908,
      "loss": 2.8335,
      "step": 161433
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4496164321899414,
      "learning_rate": 0.0001231621150132082,
      "loss": 3.0051,
      "step": 161434
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.5108482837677,
      "learning_rate": 0.0001231588106823348,
      "loss": 2.7528,
      "step": 161435
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6187403202056885,
      "learning_rate": 0.00012315550638433955,
      "loss": 3.148,
      "step": 161436
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.874876976013184,
      "learning_rate": 0.00012315220211922288,
      "loss": 3.122,
      "step": 161437
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.142645835876465,
      "learning_rate": 0.0001231488978869857,
      "loss": 2.7429,
      "step": 161438
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2691845893859863,
      "learning_rate": 0.00012314559368762834,
      "loss": 2.8845,
      "step": 161439
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.351444721221924,
      "learning_rate": 0.00012314228952115168,
      "loss": 3.1594,
      "step": 161440
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.79172420501709,
      "learning_rate": 0.00012313898538755623,
      "loss": 2.993,
      "step": 161441
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6800856590270996,
      "learning_rate": 0.00012313568128684255,
      "loss": 2.7217,
      "step": 161442
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.254150867462158,
      "learning_rate": 0.00012313237721901118,
      "loss": 2.9537,
      "step": 161443
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9045889377593994,
      "learning_rate": 0.00012312907318406296,
      "loss": 2.9943,
      "step": 161444
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5594019889831543,
      "learning_rate": 0.00012312576918199826,
      "loss": 3.1299,
      "step": 161445
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6279337406158447,
      "learning_rate": 0.00012312246521281793,
      "loss": 2.9023,
      "step": 161446
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.0875730514526367,
      "learning_rate": 0.00012311916127652248,
      "loss": 2.7537,
      "step": 161447
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.45430850982666,
      "learning_rate": 0.00012311585737311253,
      "loss": 3.2556,
      "step": 161448
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2917284965515137,
      "learning_rate": 0.00012311255350258853,
      "loss": 3.1361,
      "step": 161449
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.1614668369293213,
      "learning_rate": 0.00012310924966495138,
      "loss": 2.8897,
      "step": 161450
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6027462482452393,
      "learning_rate": 0.00012310594586020145,
      "loss": 3.0598,
      "step": 161451
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.0404412746429443,
      "learning_rate": 0.0001231026420883396,
      "loss": 3.0209,
      "step": 161452
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.879871368408203,
      "learning_rate": 0.00012309933834936626,
      "loss": 2.9603,
      "step": 161453
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5300581455230713,
      "learning_rate": 0.00012309603464328214,
      "loss": 2.8737,
      "step": 161454
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.7697746753692627,
      "learning_rate": 0.00012309273097008765,
      "loss": 3.0656,
      "step": 161455
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.624481439590454,
      "learning_rate": 0.0001230894273297837,
      "loss": 2.921,
      "step": 161456
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.1597189903259277,
      "learning_rate": 0.00012308612372237065,
      "loss": 3.0653,
      "step": 161457
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7735416889190674,
      "learning_rate": 0.00012308282014784934,
      "loss": 3.0192,
      "step": 161458
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.2290427684783936,
      "learning_rate": 0.00012307951660622026,
      "loss": 2.9202,
      "step": 161459
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7161412239074707,
      "learning_rate": 0.00012307621309748406,
      "loss": 2.6982,
      "step": 161460
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.181117534637451,
      "learning_rate": 0.00012307290962164117,
      "loss": 2.9746,
      "step": 161461
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.0006184577941895,
      "learning_rate": 0.00012306960617869254,
      "loss": 2.9151,
      "step": 161462
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.918599843978882,
      "learning_rate": 0.00012306630276863847,
      "loss": 3.0234,
      "step": 161463
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9316725730895996,
      "learning_rate": 0.00012306299939147985,
      "loss": 3.2819,
      "step": 161464
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.0909392833709717,
      "learning_rate": 0.0001230596960472171,
      "loss": 3.0597,
      "step": 161465
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.574580192565918,
      "learning_rate": 0.00012305639273585094,
      "loss": 3.1267,
      "step": 161466
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.7565689086914062,
      "learning_rate": 0.00012305308945738183,
      "loss": 2.7495,
      "step": 161467
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.04484224319458,
      "learning_rate": 0.00012304978621181057,
      "loss": 2.8766,
      "step": 161468
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.850781202316284,
      "learning_rate": 0.00012304648299913762,
      "loss": 2.7008,
      "step": 161469
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8372507095336914,
      "learning_rate": 0.00012304317981936375,
      "loss": 2.9133,
      "step": 161470
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2580738067626953,
      "learning_rate": 0.00012303987667248948,
      "loss": 2.8699,
      "step": 161471
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.4796226024627686,
      "learning_rate": 0.0001230365735585155,
      "loss": 3.0297,
      "step": 161472
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5705223083496094,
      "learning_rate": 0.0001230332704774422,
      "loss": 2.8818,
      "step": 161473
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.058137893676758,
      "learning_rate": 0.00012302996742927049,
      "loss": 3.0663,
      "step": 161474
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.152649164199829,
      "learning_rate": 0.00012302666441400072,
      "loss": 3.1119,
      "step": 161475
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.038210391998291,
      "learning_rate": 0.00012302336143163374,
      "loss": 3.2966,
      "step": 161476
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2020230293273926,
      "learning_rate": 0.00012302005848216998,
      "loss": 2.7089,
      "step": 161477
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.775150775909424,
      "learning_rate": 0.0001230167555656102,
      "loss": 3.1603,
      "step": 161478
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.377382516860962,
      "learning_rate": 0.00012301345268195496,
      "loss": 2.9438,
      "step": 161479
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.590954542160034,
      "learning_rate": 0.00012301014983120487,
      "loss": 2.9967,
      "step": 161480
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3693833351135254,
      "learning_rate": 0.00012300684701336043,
      "loss": 2.8958,
      "step": 161481
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.260108470916748,
      "learning_rate": 0.00012300354422842245,
      "loss": 2.6624,
      "step": 161482
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.269733428955078,
      "learning_rate": 0.00012300024147639135,
      "loss": 3.0809,
      "step": 161483
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2254555225372314,
      "learning_rate": 0.00012299693875726797,
      "loss": 3.1687,
      "step": 161484
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.567653179168701,
      "learning_rate": 0.00012299363607105277,
      "loss": 3.1491,
      "step": 161485
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.873368501663208,
      "learning_rate": 0.0001229903334177463,
      "loss": 3.1017,
      "step": 161486
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3486292362213135,
      "learning_rate": 0.0001229870307973494,
      "loss": 2.85,
      "step": 161487
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.195685863494873,
      "learning_rate": 0.00012298372820986254,
      "loss": 2.9046,
      "step": 161488
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2820160388946533,
      "learning_rate": 0.0001229804256552862,
      "loss": 2.8947,
      "step": 161489
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.510209321975708,
      "learning_rate": 0.00012297712313362133,
      "loss": 2.7753,
      "step": 161490
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3222410678863525,
      "learning_rate": 0.0001229738206448683,
      "loss": 3.0102,
      "step": 161491
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1797616481781006,
      "learning_rate": 0.00012297051818902765,
      "loss": 2.7925,
      "step": 161492
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1639389991760254,
      "learning_rate": 0.00012296721576610028,
      "loss": 2.9446,
      "step": 161493
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9944002628326416,
      "learning_rate": 0.0001229639133760866,
      "loss": 3.0006,
      "step": 161494
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.6653361320495605,
      "learning_rate": 0.00012296061101898717,
      "loss": 2.9831,
      "step": 161495
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.5923452377319336,
      "learning_rate": 0.00012295730869480283,
      "loss": 2.8132,
      "step": 161496
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.842188358306885,
      "learning_rate": 0.00012295400640353408,
      "loss": 3.0503,
      "step": 161497
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8431973457336426,
      "learning_rate": 0.00012295070414518138,
      "loss": 2.9639,
      "step": 161498
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.3535637855529785,
      "learning_rate": 0.0001229474019197456,
      "loss": 2.8991,
      "step": 161499
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.6420326232910156,
      "learning_rate": 0.00012294409972722715,
      "loss": 2.8522,
      "step": 161500
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4497337341308594,
      "learning_rate": 0.00012294079756762684,
      "loss": 2.7656,
      "step": 161501
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4961965084075928,
      "learning_rate": 0.00012293749544094518,
      "loss": 2.9909,
      "step": 161502
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8084800243377686,
      "learning_rate": 0.00012293419334718277,
      "loss": 2.995,
      "step": 161503
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2173101902008057,
      "learning_rate": 0.00012293089128634013,
      "loss": 2.9209,
      "step": 161504
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6767895221710205,
      "learning_rate": 0.00012292758925841813,
      "loss": 2.7945,
      "step": 161505
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7655251026153564,
      "learning_rate": 0.00012292428726341705,
      "loss": 2.9427,
      "step": 161506
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.928140163421631,
      "learning_rate": 0.00012292098530133783,
      "loss": 3.1455,
      "step": 161507
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.439565420150757,
      "learning_rate": 0.00012291768337218098,
      "loss": 3.181,
      "step": 161508
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4313416481018066,
      "learning_rate": 0.000122914381475947,
      "loss": 2.8664,
      "step": 161509
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6395981311798096,
      "learning_rate": 0.00012291107961263654,
      "loss": 2.7765,
      "step": 161510
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.575330972671509,
      "learning_rate": 0.00012290777778225032,
      "loss": 2.7719,
      "step": 161511
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.008214235305786,
      "learning_rate": 0.0001229044759847888,
      "loss": 2.922,
      "step": 161512
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4121458530426025,
      "learning_rate": 0.00012290117422025277,
      "loss": 2.9432,
      "step": 161513
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.332571029663086,
      "learning_rate": 0.00012289787248864276,
      "loss": 3.072,
      "step": 161514
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3431737422943115,
      "learning_rate": 0.00012289457078995937,
      "loss": 3.0431,
      "step": 161515
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.186941623687744,
      "learning_rate": 0.00012289126912420315,
      "loss": 2.8691,
      "step": 161516
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5449445247650146,
      "learning_rate": 0.00012288796749137487,
      "loss": 2.9325,
      "step": 161517
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.589522123336792,
      "learning_rate": 0.00012288466589147493,
      "loss": 2.7664,
      "step": 161518
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.3713603019714355,
      "learning_rate": 0.00012288136432450422,
      "loss": 2.9935,
      "step": 161519
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.311285972595215,
      "learning_rate": 0.0001228780627904632,
      "loss": 2.9959,
      "step": 161520
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.64530348777771,
      "learning_rate": 0.00012287476128935247,
      "loss": 3.0294,
      "step": 161521
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.426039695739746,
      "learning_rate": 0.0001228714598211726,
      "loss": 2.9369,
      "step": 161522
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.425846576690674,
      "learning_rate": 0.00012286815838592435,
      "loss": 2.8197,
      "step": 161523
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5413129329681396,
      "learning_rate": 0.0001228648569836081,
      "loss": 2.912,
      "step": 161524
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3474326133728027,
      "learning_rate": 0.00012286155561422478,
      "loss": 2.8185,
      "step": 161525
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4370005130767822,
      "learning_rate": 0.00012285825427777483,
      "loss": 2.9174,
      "step": 161526
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.932971239089966,
      "learning_rate": 0.00012285495297425887,
      "loss": 2.8493,
      "step": 161527
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5334432125091553,
      "learning_rate": 0.0001228516517036774,
      "loss": 3.0155,
      "step": 161528
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.0230796337127686,
      "learning_rate": 0.0001228483504660313,
      "loss": 2.823,
      "step": 161529
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8505430221557617,
      "learning_rate": 0.00012284504926132091,
      "loss": 3.0726,
      "step": 161530
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8354732990264893,
      "learning_rate": 0.00012284174808954707,
      "loss": 2.9518,
      "step": 161531
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9920432567596436,
      "learning_rate": 0.00012283844695071028,
      "loss": 2.836,
      "step": 161532
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7692902088165283,
      "learning_rate": 0.00012283514584481118,
      "loss": 3.0846,
      "step": 161533
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5467848777770996,
      "learning_rate": 0.00012283184477185027,
      "loss": 3.0417,
      "step": 161534
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.380744457244873,
      "learning_rate": 0.00012282854373182837,
      "loss": 2.9804,
      "step": 161535
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6361653804779053,
      "learning_rate": 0.00012282524272474586,
      "loss": 3.1234,
      "step": 161536
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.79193377494812,
      "learning_rate": 0.0001228219417506036,
      "loss": 3.0366,
      "step": 161537
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.1085896492004395,
      "learning_rate": 0.00012281864080940209,
      "loss": 3.0053,
      "step": 161538
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.0461933612823486,
      "learning_rate": 0.00012281533990114195,
      "loss": 3.1108,
      "step": 161539
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.406454086303711,
      "learning_rate": 0.00012281203902582367,
      "loss": 2.8061,
      "step": 161540
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.722693920135498,
      "learning_rate": 0.00012280873818344806,
      "loss": 3.0417,
      "step": 161541
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.837210655212402,
      "learning_rate": 0.00012280543737401556,
      "loss": 2.9994,
      "step": 161542
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.4878618717193604,
      "learning_rate": 0.000122802136597527,
      "loss": 3.1016,
      "step": 161543
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.467335224151611,
      "learning_rate": 0.00012279883585398273,
      "loss": 2.8962,
      "step": 161544
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3760972023010254,
      "learning_rate": 0.00012279553514338374,
      "loss": 2.9157,
      "step": 161545
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4986374378204346,
      "learning_rate": 0.00012279223446573016,
      "loss": 2.8511,
      "step": 161546
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.47391414642334,
      "learning_rate": 0.00012278893382102298,
      "loss": 3.255,
      "step": 161547
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.239429950714111,
      "learning_rate": 0.00012278563320926257,
      "loss": 3.0536,
      "step": 161548
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.551820278167725,
      "learning_rate": 0.00012278233263044976,
      "loss": 3.1089,
      "step": 161549
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.988713264465332,
      "learning_rate": 0.00012277903208458496,
      "loss": 2.8696,
      "step": 161550
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3146069049835205,
      "learning_rate": 0.0001227757315716691,
      "loss": 3.0215,
      "step": 161551
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.062294006347656,
      "learning_rate": 0.00012277243109170234,
      "loss": 3.0827,
      "step": 161552
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9217610359191895,
      "learning_rate": 0.00012276913064468567,
      "loss": 2.9883,
      "step": 161553
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.5712385177612305,
      "learning_rate": 0.00012276583023061944,
      "loss": 2.9443,
      "step": 161554
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7260677814483643,
      "learning_rate": 0.0001227625298495045,
      "loss": 2.9788,
      "step": 161555
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.1877317428588867,
      "learning_rate": 0.00012275922950134123,
      "loss": 2.8148,
      "step": 161556
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.121438503265381,
      "learning_rate": 0.0001227559291861306,
      "loss": 2.6979,
      "step": 161557
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5745437145233154,
      "learning_rate": 0.00012275262890387275,
      "loss": 3.0669,
      "step": 161558
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.753392219543457,
      "learning_rate": 0.00012274932865456863,
      "loss": 2.6837,
      "step": 161559
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4852631092071533,
      "learning_rate": 0.0001227460284382187,
      "loss": 2.9467,
      "step": 161560
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.734572649002075,
      "learning_rate": 0.00012274272825482373,
      "loss": 2.9454,
      "step": 161561
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.216714859008789,
      "learning_rate": 0.00012273942810438407,
      "loss": 3.163,
      "step": 161562
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.9663403034210205,
      "learning_rate": 0.00012273612798690064,
      "loss": 2.8783,
      "step": 161563
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.537522554397583,
      "learning_rate": 0.0001227328279023739,
      "loss": 3.0357,
      "step": 161564
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.530919075012207,
      "learning_rate": 0.00012272952785080449,
      "loss": 3.1183,
      "step": 161565
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1633665561676025,
      "learning_rate": 0.00012272622783219287,
      "loss": 3.0988,
      "step": 161566
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1388776302337646,
      "learning_rate": 0.00012272292784653994,
      "loss": 2.7791,
      "step": 161567
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.90781831741333,
      "learning_rate": 0.00012271962789384605,
      "loss": 3.2537,
      "step": 161568
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.217299461364746,
      "learning_rate": 0.00012271632797411202,
      "loss": 2.8528,
      "step": 161569
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.169393539428711,
      "learning_rate": 0.00012271302808733835,
      "loss": 2.828,
      "step": 161570
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2067086696624756,
      "learning_rate": 0.0001227097282335257,
      "loss": 3.0364,
      "step": 161571
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5899312496185303,
      "learning_rate": 0.00012270642841267454,
      "loss": 3.1242,
      "step": 161572
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.518308401107788,
      "learning_rate": 0.0001227031286247857,
      "loss": 3.0969,
      "step": 161573
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.168595314025879,
      "learning_rate": 0.00012269982886985961,
      "loss": 2.7314,
      "step": 161574
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.434898614883423,
      "learning_rate": 0.00012269652914789707,
      "loss": 2.7834,
      "step": 161575
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.484309196472168,
      "learning_rate": 0.0001226932294588986,
      "loss": 2.9646,
      "step": 161576
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.443035840988159,
      "learning_rate": 0.00012268992980286468,
      "loss": 3.1382,
      "step": 161577
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4452273845672607,
      "learning_rate": 0.00012268663017979615,
      "loss": 2.8984,
      "step": 161578
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6250274181365967,
      "learning_rate": 0.00012268333058969354,
      "loss": 2.8514,
      "step": 161579
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.0351290702819824,
      "learning_rate": 0.00012268003103255737,
      "loss": 2.9843,
      "step": 161580
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9038782119750977,
      "learning_rate": 0.00012267673150838837,
      "loss": 2.6125,
      "step": 161581
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.656825542449951,
      "learning_rate": 0.00012267343201718714,
      "loss": 2.9496,
      "step": 161582
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.496931552886963,
      "learning_rate": 0.00012267013255895417,
      "loss": 2.9847,
      "step": 161583
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.258460521697998,
      "learning_rate": 0.00012266683313369027,
      "loss": 3.0407,
      "step": 161584
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3994245529174805,
      "learning_rate": 0.00012266353374139588,
      "loss": 3.0639,
      "step": 161585
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.940492868423462,
      "learning_rate": 0.00012266023438207174,
      "loss": 3.0869,
      "step": 161586
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2728657722473145,
      "learning_rate": 0.00012265693505571844,
      "loss": 2.8823,
      "step": 161587
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.4681384563446045,
      "learning_rate": 0.00012265363576233654,
      "loss": 2.8944,
      "step": 161588
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.8789734840393066,
      "learning_rate": 0.00012265033650192658,
      "loss": 3.0176,
      "step": 161589
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8413915634155273,
      "learning_rate": 0.00012264703727448938,
      "loss": 3.0136,
      "step": 161590
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2678449153900146,
      "learning_rate": 0.00012264373808002535,
      "loss": 2.7431,
      "step": 161591
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.08774995803833,
      "learning_rate": 0.00012264043891853528,
      "loss": 2.9593,
      "step": 161592
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4906928539276123,
      "learning_rate": 0.00012263713979001971,
      "loss": 2.9529,
      "step": 161593
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8386521339416504,
      "learning_rate": 0.00012263384069447926,
      "loss": 2.9631,
      "step": 161594
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4675228595733643,
      "learning_rate": 0.00012263054163191443,
      "loss": 3.0268,
      "step": 161595
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.0286407470703125,
      "learning_rate": 0.00012262724260232597,
      "loss": 3.1811,
      "step": 161596
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7240662574768066,
      "learning_rate": 0.00012262394360571443,
      "loss": 3.0549,
      "step": 161597
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.8275396823883057,
      "learning_rate": 0.00012262064464208048,
      "loss": 2.8123,
      "step": 161598
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6824426651000977,
      "learning_rate": 0.00012261734571142473,
      "loss": 2.8446,
      "step": 161599
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5694997310638428,
      "learning_rate": 0.00012261404681374779,
      "loss": 3.0506,
      "step": 161600
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8603949546813965,
      "learning_rate": 0.0001226107479490501,
      "loss": 2.8659,
      "step": 161601
    },
    {
      "epoch": 2.1,
      "grad_norm": 5.53787899017334,
      "learning_rate": 0.00012260744911733257,
      "loss": 2.8849,
      "step": 161602
    },
    {
      "epoch": 2.1,
      "grad_norm": 5.7585768699646,
      "learning_rate": 0.00012260415031859555,
      "loss": 2.8036,
      "step": 161603
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.474255323410034,
      "learning_rate": 0.00012260085155283985,
      "loss": 2.9499,
      "step": 161604
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.640732526779175,
      "learning_rate": 0.000122597552820066,
      "loss": 2.9488,
      "step": 161605
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.155256748199463,
      "learning_rate": 0.0001225942541202746,
      "loss": 3.0366,
      "step": 161606
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.125218152999878,
      "learning_rate": 0.00012259095545346619,
      "loss": 2.7861,
      "step": 161607
    },
    {
      "epoch": 2.1,
      "grad_norm": 5.034692764282227,
      "learning_rate": 0.0001225876568196416,
      "loss": 2.9151,
      "step": 161608
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.757323980331421,
      "learning_rate": 0.00012258435821880118,
      "loss": 2.8344,
      "step": 161609
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6879892349243164,
      "learning_rate": 0.0001225810596509458,
      "loss": 3.1333,
      "step": 161610
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.608341932296753,
      "learning_rate": 0.00012257776111607583,
      "loss": 2.6356,
      "step": 161611
    },
    {
      "epoch": 2.1,
      "grad_norm": 5.845739364624023,
      "learning_rate": 0.0001225744626141922,
      "loss": 2.879,
      "step": 161612
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2836246490478516,
      "learning_rate": 0.00012257116414529515,
      "loss": 2.7703,
      "step": 161613
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5065205097198486,
      "learning_rate": 0.00012256786570938555,
      "loss": 3.2572,
      "step": 161614
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.201249122619629,
      "learning_rate": 0.00012256456730646385,
      "loss": 2.9345,
      "step": 161615
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.321789264678955,
      "learning_rate": 0.00012256126893653082,
      "loss": 2.8259,
      "step": 161616
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9241867065429688,
      "learning_rate": 0.0001225579705995869,
      "loss": 2.9818,
      "step": 161617
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.160504102706909,
      "learning_rate": 0.00012255467229563304,
      "loss": 2.772,
      "step": 161618
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3883509635925293,
      "learning_rate": 0.0001225513740246694,
      "loss": 2.7788,
      "step": 161619
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2574825286865234,
      "learning_rate": 0.0001225480757866969,
      "loss": 2.8916,
      "step": 161620
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2968873977661133,
      "learning_rate": 0.00012254477758171596,
      "loss": 2.9334,
      "step": 161621
    },
    {
      "epoch": 2.1,
      "grad_norm": 4.130241394042969,
      "learning_rate": 0.0001225414794097274,
      "loss": 3.1967,
      "step": 161622
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.685089111328125,
      "learning_rate": 0.00012253818127073163,
      "loss": 3.2621,
      "step": 161623
    },
    {
      "epoch": 2.1,
      "grad_norm": 1.9979151487350464,
      "learning_rate": 0.00012253488316472957,
      "loss": 3.0305,
      "step": 161624
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.20438289642334,
      "learning_rate": 0.0001225315850917214,
      "loss": 2.8261,
      "step": 161625
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.535313606262207,
      "learning_rate": 0.00012252828705170806,
      "loss": 3.2188,
      "step": 161626
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.276890754699707,
      "learning_rate": 0.00012252498904469,
      "loss": 3.0009,
      "step": 161627
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2898879051208496,
      "learning_rate": 0.00012252169107066795,
      "loss": 2.9577,
      "step": 161628
    },
    {
      "epoch": 2.1,
      "grad_norm": 5.118728160858154,
      "learning_rate": 0.0001225183931296424,
      "loss": 2.7351,
      "step": 161629
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.4538958072662354,
      "learning_rate": 0.00012251509522161422,
      "loss": 2.9371,
      "step": 161630
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.7781996726989746,
      "learning_rate": 0.00012251179734658362,
      "loss": 3.279,
      "step": 161631
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.98045015335083,
      "learning_rate": 0.00012250849950455156,
      "loss": 3.0282,
      "step": 161632
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.593670606613159,
      "learning_rate": 0.0001225052016955184,
      "loss": 2.9502,
      "step": 161633
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.9485738277435303,
      "learning_rate": 0.00012250190391948496,
      "loss": 2.9987,
      "step": 161634
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9755501747131348,
      "learning_rate": 0.00012249860617645166,
      "loss": 2.6903,
      "step": 161635
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.421520948410034,
      "learning_rate": 0.00012249530846641945,
      "loss": 2.9395,
      "step": 161636
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6804051399230957,
      "learning_rate": 0.0001224920107893885,
      "loss": 3.0972,
      "step": 161637
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.8433239459991455,
      "learning_rate": 0.00012248871314535977,
      "loss": 3.0357,
      "step": 161638
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6662354469299316,
      "learning_rate": 0.0001224854155343336,
      "loss": 2.9562,
      "step": 161639
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.496908664703369,
      "learning_rate": 0.00012248211795631085,
      "loss": 2.7174,
      "step": 161640
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.805959701538086,
      "learning_rate": 0.0001224788204112919,
      "loss": 3.1376,
      "step": 161641
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9747302532196045,
      "learning_rate": 0.00012247552289927776,
      "loss": 2.8673,
      "step": 161642
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.1361477375030518,
      "learning_rate": 0.0001224722254202685,
      "loss": 3.1442,
      "step": 161643
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.0608181953430176,
      "learning_rate": 0.00012246892797426515,
      "loss": 2.7084,
      "step": 161644
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3898563385009766,
      "learning_rate": 0.00012246563056126806,
      "loss": 2.9938,
      "step": 161645
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.167207717895508,
      "learning_rate": 0.00012246233318127803,
      "loss": 2.9501,
      "step": 161646
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.9091415405273438,
      "learning_rate": 0.00012245903583429555,
      "loss": 2.9456,
      "step": 161647
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.1550543308258057,
      "learning_rate": 0.00012245573852032136,
      "loss": 2.9736,
      "step": 161648
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.6856486797332764,
      "learning_rate": 0.00012245244123935603,
      "loss": 2.797,
      "step": 161649
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5787932872772217,
      "learning_rate": 0.00012244914399140014,
      "loss": 2.9658,
      "step": 161650
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6978044509887695,
      "learning_rate": 0.00012244584677645416,
      "loss": 3.1185,
      "step": 161651
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.3664846420288086,
      "learning_rate": 0.00012244254959451896,
      "loss": 2.8802,
      "step": 161652
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.268531084060669,
      "learning_rate": 0.00012243925244559496,
      "loss": 3.0591,
      "step": 161653
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.573723077774048,
      "learning_rate": 0.00012243595532968296,
      "loss": 2.9355,
      "step": 161654
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.848422050476074,
      "learning_rate": 0.00012243265824678345,
      "loss": 2.7763,
      "step": 161655
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.721791982650757,
      "learning_rate": 0.0001224293611968971,
      "loss": 2.9372,
      "step": 161656
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.137446165084839,
      "learning_rate": 0.00012242606418002433,
      "loss": 2.915,
      "step": 161657
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6335034370422363,
      "learning_rate": 0.00012242276719616604,
      "loss": 3.0849,
      "step": 161658
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.5034170150756836,
      "learning_rate": 0.00012241947024532258,
      "loss": 3.1505,
      "step": 161659
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4006359577178955,
      "learning_rate": 0.00012241617332749483,
      "loss": 2.8796,
      "step": 161660
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.2779793739318848,
      "learning_rate": 0.00012241287644268323,
      "loss": 2.9758,
      "step": 161661
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.4914939403533936,
      "learning_rate": 0.00012240957959088833,
      "loss": 2.7955,
      "step": 161662
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.6806817054748535,
      "learning_rate": 0.00012240628277211096,
      "loss": 3.1906,
      "step": 161663
    },
    {
      "epoch": 2.1,
      "grad_norm": 2.55727481842041,
      "learning_rate": 0.00012240298598635164,
      "loss": 2.9384,
      "step": 161664
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.0695838928222656,
      "learning_rate": 0.0001223996892336108,
      "loss": 3.1779,
      "step": 161665
    },
    {
      "epoch": 2.1,
      "grad_norm": 3.6617186069488525,
      "learning_rate": 0.00012239639251388935,
      "loss": 2.9341,
      "step": 161666
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8488881587982178,
      "learning_rate": 0.00012239309582718778,
      "loss": 2.5725,
      "step": 161667
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.586668014526367,
      "learning_rate": 0.00012238979917350654,
      "loss": 2.8673,
      "step": 161668
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6366448402404785,
      "learning_rate": 0.00012238650255284652,
      "loss": 2.9807,
      "step": 161669
    },
    {
      "epoch": 2.11,
      "grad_norm": 5.347269058227539,
      "learning_rate": 0.00012238320596520807,
      "loss": 2.8465,
      "step": 161670
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.53330135345459,
      "learning_rate": 0.00012237990941059207,
      "loss": 2.8366,
      "step": 161671
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3957302570343018,
      "learning_rate": 0.00012237661288899896,
      "loss": 3.1753,
      "step": 161672
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9417619705200195,
      "learning_rate": 0.0001223733164004294,
      "loss": 2.977,
      "step": 161673
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.69838285446167,
      "learning_rate": 0.00012237001994488392,
      "loss": 2.8783,
      "step": 161674
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.769340753555298,
      "learning_rate": 0.0001223667235223633,
      "loss": 3.2186,
      "step": 161675
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4416143894195557,
      "learning_rate": 0.00012236342713286793,
      "loss": 3.0091,
      "step": 161676
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.23081111907959,
      "learning_rate": 0.00012236013077639868,
      "loss": 2.7679,
      "step": 161677
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.7423908710479736,
      "learning_rate": 0.00012235683445295604,
      "loss": 2.9453,
      "step": 161678
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.74284291267395,
      "learning_rate": 0.0001223535381625406,
      "loss": 3.032,
      "step": 161679
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.451646089553833,
      "learning_rate": 0.0001223502419051529,
      "loss": 3.1642,
      "step": 161680
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7738723754882812,
      "learning_rate": 0.00012234694568079375,
      "loss": 2.8961,
      "step": 161681
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5260164737701416,
      "learning_rate": 0.00012234364948946353,
      "loss": 2.6709,
      "step": 161682
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.455810546875,
      "learning_rate": 0.0001223403533311631,
      "loss": 3.1688,
      "step": 161683
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.895812511444092,
      "learning_rate": 0.00012233705720589283,
      "loss": 2.7878,
      "step": 161684
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4079999923706055,
      "learning_rate": 0.0001223337611136537,
      "loss": 2.794,
      "step": 161685
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.486945629119873,
      "learning_rate": 0.00012233046505444586,
      "loss": 2.7252,
      "step": 161686
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.185274124145508,
      "learning_rate": 0.0001223271690282702,
      "loss": 2.6921,
      "step": 161687
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.327604293823242,
      "learning_rate": 0.0001223238730351272,
      "loss": 2.865,
      "step": 161688
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3845479488372803,
      "learning_rate": 0.0001223205770750177,
      "loss": 2.9203,
      "step": 161689
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3322694301605225,
      "learning_rate": 0.000122317281147942,
      "loss": 3.0137,
      "step": 161690
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.501715898513794,
      "learning_rate": 0.00012231398525390107,
      "loss": 2.7602,
      "step": 161691
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.284919023513794,
      "learning_rate": 0.0001223106893928951,
      "loss": 2.8903,
      "step": 161692
    },
    {
      "epoch": 2.11,
      "grad_norm": 5.433095932006836,
      "learning_rate": 0.00012230739356492508,
      "loss": 3.0035,
      "step": 161693
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2933316230773926,
      "learning_rate": 0.00012230409776999135,
      "loss": 2.963,
      "step": 161694
    },
    {
      "epoch": 2.11,
      "grad_norm": 1.9572309255599976,
      "learning_rate": 0.00012230080200809475,
      "loss": 2.6878,
      "step": 161695
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.485199451446533,
      "learning_rate": 0.00012229750627923566,
      "loss": 2.9417,
      "step": 161696
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.469165325164795,
      "learning_rate": 0.00012229421058341505,
      "loss": 3.1412,
      "step": 161697
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9512202739715576,
      "learning_rate": 0.00012229091492063307,
      "loss": 2.9786,
      "step": 161698
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5083112716674805,
      "learning_rate": 0.00012228761929089068,
      "loss": 2.7772,
      "step": 161699
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4647369384765625,
      "learning_rate": 0.0001222843236941883,
      "loss": 2.8538,
      "step": 161700
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.709235429763794,
      "learning_rate": 0.00012228102813052668,
      "loss": 3.0632,
      "step": 161701
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.430142879486084,
      "learning_rate": 0.00012227773259990626,
      "loss": 2.7287,
      "step": 161702
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.760653495788574,
      "learning_rate": 0.000122274437102328,
      "loss": 2.8799,
      "step": 161703
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.675734043121338,
      "learning_rate": 0.0001222711416377921,
      "loss": 3.0965,
      "step": 161704
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.747776508331299,
      "learning_rate": 0.00012226784620629938,
      "loss": 2.9603,
      "step": 161705
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.273655652999878,
      "learning_rate": 0.00012226455080785036,
      "loss": 2.6647,
      "step": 161706
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3060693740844727,
      "learning_rate": 0.00012226125544244584,
      "loss": 3.0044,
      "step": 161707
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5613014698028564,
      "learning_rate": 0.00012225796011008615,
      "loss": 3.234,
      "step": 161708
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.7198541164398193,
      "learning_rate": 0.00012225466481077235,
      "loss": 3.0107,
      "step": 161709
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.469714879989624,
      "learning_rate": 0.00012225136954450449,
      "loss": 2.9587,
      "step": 161710
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.411752223968506,
      "learning_rate": 0.00012224807431128357,
      "loss": 2.9668,
      "step": 161711
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.5485899448394775,
      "learning_rate": 0.00012224477911110997,
      "loss": 3.0376,
      "step": 161712
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.213045120239258,
      "learning_rate": 0.00012224148394398453,
      "loss": 3.0312,
      "step": 161713
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.956763982772827,
      "learning_rate": 0.00012223818880990767,
      "loss": 2.8293,
      "step": 161714
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.338301420211792,
      "learning_rate": 0.00012223489370888032,
      "loss": 2.9363,
      "step": 161715
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.708035707473755,
      "learning_rate": 0.0001222315986409026,
      "loss": 2.9941,
      "step": 161716
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.752552032470703,
      "learning_rate": 0.0001222283036059755,
      "loss": 3.1099,
      "step": 161717
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.6393585205078125,
      "learning_rate": 0.00012222500860409942,
      "loss": 2.9313,
      "step": 161718
    },
    {
      "epoch": 2.11,
      "grad_norm": 5.529378414154053,
      "learning_rate": 0.00012222171363527516,
      "loss": 3.0509,
      "step": 161719
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.666351079940796,
      "learning_rate": 0.00012221841869950313,
      "loss": 2.7077,
      "step": 161720
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3521111011505127,
      "learning_rate": 0.00012221512379678427,
      "loss": 2.9014,
      "step": 161721
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1369991302490234,
      "learning_rate": 0.00012221182892711876,
      "loss": 3.0025,
      "step": 161722
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5299723148345947,
      "learning_rate": 0.00012220853409050753,
      "loss": 2.912,
      "step": 161723
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.849978446960449,
      "learning_rate": 0.000122205239286951,
      "loss": 2.8911,
      "step": 161724
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.0419204235076904,
      "learning_rate": 0.00012220194451644997,
      "loss": 3.0325,
      "step": 161725
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2581982612609863,
      "learning_rate": 0.00012219864977900484,
      "loss": 3.0017,
      "step": 161726
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7916107177734375,
      "learning_rate": 0.00012219535507461656,
      "loss": 2.7002,
      "step": 161727
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.433077096939087,
      "learning_rate": 0.00012219206040328532,
      "loss": 3.0012,
      "step": 161728
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.27528715133667,
      "learning_rate": 0.00012218876576501202,
      "loss": 2.9082,
      "step": 161729
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8452556133270264,
      "learning_rate": 0.00012218547115979707,
      "loss": 2.7377,
      "step": 161730
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7297403812408447,
      "learning_rate": 0.00012218217658764132,
      "loss": 2.8159,
      "step": 161731
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7969892024993896,
      "learning_rate": 0.00012217888204854515,
      "loss": 2.9387,
      "step": 161732
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8826029300689697,
      "learning_rate": 0.00012217558754250947,
      "loss": 2.6078,
      "step": 161733
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.107192516326904,
      "learning_rate": 0.00012217229306953452,
      "loss": 3.0909,
      "step": 161734
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.2608532905578613,
      "learning_rate": 0.00012216899862962122,
      "loss": 2.9854,
      "step": 161735
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.40149188041687,
      "learning_rate": 0.00012216570422276992,
      "loss": 2.7864,
      "step": 161736
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.4878664016723633,
      "learning_rate": 0.00012216240984898148,
      "loss": 2.9001,
      "step": 161737
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.260351657867432,
      "learning_rate": 0.0001221591155082563,
      "loss": 2.9052,
      "step": 161738
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3063368797302246,
      "learning_rate": 0.00012215582120059522,
      "loss": 3.124,
      "step": 161739
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3707361221313477,
      "learning_rate": 0.00012215252692599873,
      "loss": 2.799,
      "step": 161740
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2862281799316406,
      "learning_rate": 0.00012214923268446745,
      "loss": 2.9517,
      "step": 161741
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6246986389160156,
      "learning_rate": 0.00012214593847600187,
      "loss": 2.9995,
      "step": 161742
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.8734729290008545,
      "learning_rate": 0.0001221426443006028,
      "loss": 2.8998,
      "step": 161743
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4484028816223145,
      "learning_rate": 0.0001221393501582707,
      "loss": 2.9038,
      "step": 161744
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.103971242904663,
      "learning_rate": 0.00012213605604900635,
      "loss": 3.2297,
      "step": 161745
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8914520740509033,
      "learning_rate": 0.00012213276197281024,
      "loss": 2.8614,
      "step": 161746
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.355961322784424,
      "learning_rate": 0.0001221294679296829,
      "loss": 2.6968,
      "step": 161747
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.312699317932129,
      "learning_rate": 0.00012212617391962516,
      "loss": 2.9226,
      "step": 161748
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2012453079223633,
      "learning_rate": 0.00012212287994263756,
      "loss": 2.7992,
      "step": 161749
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.365572214126587,
      "learning_rate": 0.00012211958599872054,
      "loss": 2.9367,
      "step": 161750
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.141575813293457,
      "learning_rate": 0.00012211629208787496,
      "loss": 3.0361,
      "step": 161751
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.450549364089966,
      "learning_rate": 0.0001221129982101013,
      "loss": 2.6931,
      "step": 161752
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.0732736587524414,
      "learning_rate": 0.0001221097043654001,
      "loss": 2.696,
      "step": 161753
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6147689819335938,
      "learning_rate": 0.00012210641055377218,
      "loss": 2.9526,
      "step": 161754
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8425076007843018,
      "learning_rate": 0.000122103116775218,
      "loss": 2.831,
      "step": 161755
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.445164680480957,
      "learning_rate": 0.00012209982302973816,
      "loss": 2.9605,
      "step": 161756
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3443541526794434,
      "learning_rate": 0.00012209652931733338,
      "loss": 3.0978,
      "step": 161757
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.686110258102417,
      "learning_rate": 0.00012209323563800424,
      "loss": 2.5977,
      "step": 161758
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.1761488914489746,
      "learning_rate": 0.0001220899419917512,
      "loss": 2.9964,
      "step": 161759
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.020789861679077,
      "learning_rate": 0.0001220866483785751,
      "loss": 3.089,
      "step": 161760
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9673047065734863,
      "learning_rate": 0.00012208335479847634,
      "loss": 3.0754,
      "step": 161761
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7419683933258057,
      "learning_rate": 0.00012208006125145578,
      "loss": 2.8405,
      "step": 161762
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.731463670730591,
      "learning_rate": 0.0001220767677375139,
      "loss": 2.9637,
      "step": 161763
    },
    {
      "epoch": 2.11,
      "grad_norm": 5.227260112762451,
      "learning_rate": 0.00012207347425665128,
      "loss": 2.8667,
      "step": 161764
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.558269500732422,
      "learning_rate": 0.00012207018080886845,
      "loss": 2.9186,
      "step": 161765
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.745565414428711,
      "learning_rate": 0.00012206688739416625,
      "loss": 2.7362,
      "step": 161766
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.114501714706421,
      "learning_rate": 0.00012206359401254507,
      "loss": 3.0946,
      "step": 161767
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4438858032226562,
      "learning_rate": 0.00012206030066400573,
      "loss": 2.9965,
      "step": 161768
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.142528533935547,
      "learning_rate": 0.00012205700734854865,
      "loss": 2.712,
      "step": 161769
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3453872203826904,
      "learning_rate": 0.00012205371406617471,
      "loss": 2.9533,
      "step": 161770
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2649033069610596,
      "learning_rate": 0.00012205042081688415,
      "loss": 2.8126,
      "step": 161771
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.575334072113037,
      "learning_rate": 0.00012204712760067787,
      "loss": 2.9035,
      "step": 161772
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.259812116622925,
      "learning_rate": 0.00012204383441755631,
      "loss": 2.8555,
      "step": 161773
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6067609786987305,
      "learning_rate": 0.00012204054126752024,
      "loss": 2.8681,
      "step": 161774
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.06229305267334,
      "learning_rate": 0.0001220372481505701,
      "loss": 2.9159,
      "step": 161775
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9093306064605713,
      "learning_rate": 0.00012203395506670681,
      "loss": 3.3052,
      "step": 161776
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9537930488586426,
      "learning_rate": 0.00012203066201593054,
      "loss": 2.8139,
      "step": 161777
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.714526414871216,
      "learning_rate": 0.00012202736899824224,
      "loss": 2.7295,
      "step": 161778
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.277843475341797,
      "learning_rate": 0.00012202407601364232,
      "loss": 2.9581,
      "step": 161779
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.641446828842163,
      "learning_rate": 0.00012202078306213158,
      "loss": 2.9406,
      "step": 161780
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.509838342666626,
      "learning_rate": 0.0001220174901437104,
      "loss": 2.847,
      "step": 161781
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.6304447650909424,
      "learning_rate": 0.0001220141972583798,
      "loss": 2.7229,
      "step": 161782
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3181159496307373,
      "learning_rate": 0.00012201090440613986,
      "loss": 2.6685,
      "step": 161783
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.2771167755126953,
      "learning_rate": 0.00012200761158699157,
      "loss": 2.9198,
      "step": 161784
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8915135860443115,
      "learning_rate": 0.00012200431880093532,
      "loss": 2.8893,
      "step": 161785
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5414726734161377,
      "learning_rate": 0.00012200102604797197,
      "loss": 2.9666,
      "step": 161786
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.825031280517578,
      "learning_rate": 0.00012199773332810183,
      "loss": 2.637,
      "step": 161787
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.505030632019043,
      "learning_rate": 0.0001219944406413259,
      "loss": 2.9345,
      "step": 161788
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.8101227283477783,
      "learning_rate": 0.00012199114798764436,
      "loss": 3.0726,
      "step": 161789
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.541013717651367,
      "learning_rate": 0.00012198785536705814,
      "loss": 2.916,
      "step": 161790
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.814131021499634,
      "learning_rate": 0.00012198456277956762,
      "loss": 3.1783,
      "step": 161791
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6880850791931152,
      "learning_rate": 0.00012198127022517363,
      "loss": 2.86,
      "step": 161792
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.680516481399536,
      "learning_rate": 0.00012197797770387659,
      "loss": 2.9814,
      "step": 161793
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.154336452484131,
      "learning_rate": 0.0001219746852156774,
      "loss": 2.8458,
      "step": 161794
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.1843678951263428,
      "learning_rate": 0.00012197139276057627,
      "loss": 3.1535,
      "step": 161795
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.4554836750030518,
      "learning_rate": 0.00012196810033857411,
      "loss": 3.1573,
      "step": 161796
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7038614749908447,
      "learning_rate": 0.00012196480794967137,
      "loss": 3.102,
      "step": 161797
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.408339738845825,
      "learning_rate": 0.00012196151559386882,
      "loss": 2.9469,
      "step": 161798
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3444294929504395,
      "learning_rate": 0.00012195822327116688,
      "loss": 3.0521,
      "step": 161799
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.34218430519104,
      "learning_rate": 0.00012195493098156647,
      "loss": 2.8783,
      "step": 161800
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5399553775787354,
      "learning_rate": 0.00012195163872506779,
      "loss": 2.812,
      "step": 161801
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.062523126602173,
      "learning_rate": 0.00012194834650167179,
      "loss": 3.0238,
      "step": 161802
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.321995496749878,
      "learning_rate": 0.00012194505431137884,
      "loss": 2.9032,
      "step": 161803
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.304628849029541,
      "learning_rate": 0.00012194176215418975,
      "loss": 3.2398,
      "step": 161804
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8012118339538574,
      "learning_rate": 0.00012193847003010494,
      "loss": 2.9352,
      "step": 161805
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.728151798248291,
      "learning_rate": 0.00012193517793912538,
      "loss": 2.7713,
      "step": 161806
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.944056510925293,
      "learning_rate": 0.00012193188588125118,
      "loss": 3.2459,
      "step": 161807
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.6919846534729004,
      "learning_rate": 0.00012192859385648333,
      "loss": 2.8066,
      "step": 161808
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4798483848571777,
      "learning_rate": 0.00012192530186482222,
      "loss": 3.1574,
      "step": 161809
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.038506507873535,
      "learning_rate": 0.00012192200990626864,
      "loss": 2.9581,
      "step": 161810
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.265677452087402,
      "learning_rate": 0.000121918717980823,
      "loss": 2.8314,
      "step": 161811
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.823068618774414,
      "learning_rate": 0.00012191542608848618,
      "loss": 2.7851,
      "step": 161812
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.0231852531433105,
      "learning_rate": 0.0001219121342292586,
      "loss": 2.9622,
      "step": 161813
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.1761467456817627,
      "learning_rate": 0.00012190884240314095,
      "loss": 3.1466,
      "step": 161814
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.142570972442627,
      "learning_rate": 0.0001219055506101337,
      "loss": 2.8284,
      "step": 161815
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1469902992248535,
      "learning_rate": 0.00012190225885023767,
      "loss": 3.0773,
      "step": 161816
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.257828712463379,
      "learning_rate": 0.00012189896712345327,
      "loss": 2.857,
      "step": 161817
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.084578037261963,
      "learning_rate": 0.00012189567542978129,
      "loss": 2.7521,
      "step": 161818
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5376625061035156,
      "learning_rate": 0.0001218923837692223,
      "loss": 2.9773,
      "step": 161819
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.991203546524048,
      "learning_rate": 0.00012188909214177689,
      "loss": 2.8618,
      "step": 161820
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.255904197692871,
      "learning_rate": 0.00012188580054744552,
      "loss": 2.7952,
      "step": 161821
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.308407783508301,
      "learning_rate": 0.00012188250898622907,
      "loss": 2.634,
      "step": 161822
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1453146934509277,
      "learning_rate": 0.00012187921745812788,
      "loss": 3.0469,
      "step": 161823
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.443181037902832,
      "learning_rate": 0.00012187592596314285,
      "loss": 2.7688,
      "step": 161824
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2791380882263184,
      "learning_rate": 0.00012187263450127443,
      "loss": 2.9036,
      "step": 161825
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.228994607925415,
      "learning_rate": 0.00012186934307252328,
      "loss": 3.0637,
      "step": 161826
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.473572254180908,
      "learning_rate": 0.00012186605167688983,
      "loss": 3.1136,
      "step": 161827
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4734489917755127,
      "learning_rate": 0.00012186276031437497,
      "loss": 3.1311,
      "step": 161828
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.436436653137207,
      "learning_rate": 0.00012185946898497911,
      "loss": 3.0571,
      "step": 161829
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.0637176036834717,
      "learning_rate": 0.00012185617768870298,
      "loss": 2.9867,
      "step": 161830
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.0698375701904297,
      "learning_rate": 0.0001218528864255472,
      "loss": 2.8825,
      "step": 161831
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.327504873275757,
      "learning_rate": 0.0001218495951955122,
      "loss": 2.7105,
      "step": 161832
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.374514579772949,
      "learning_rate": 0.00012184630399859882,
      "loss": 2.8643,
      "step": 161833
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.2425122261047363,
      "learning_rate": 0.00012184301283480763,
      "loss": 2.8655,
      "step": 161834
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5920917987823486,
      "learning_rate": 0.00012183972170413903,
      "loss": 2.9975,
      "step": 161835
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4291062355041504,
      "learning_rate": 0.00012183643060659391,
      "loss": 3.1588,
      "step": 161836
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.5354795455932617,
      "learning_rate": 0.00012183313954217276,
      "loss": 3.1296,
      "step": 161837
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6840627193450928,
      "learning_rate": 0.00012182984851087608,
      "loss": 3.0748,
      "step": 161838
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.564044237136841,
      "learning_rate": 0.0001218265575127047,
      "loss": 3.0464,
      "step": 161839
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9497649669647217,
      "learning_rate": 0.00012182326654765912,
      "loss": 3.0221,
      "step": 161840
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.6064600944519043,
      "learning_rate": 0.00012181997561573988,
      "loss": 2.7596,
      "step": 161841
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8141140937805176,
      "learning_rate": 0.00012181668471694777,
      "loss": 2.8777,
      "step": 161842
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.2126591205596924,
      "learning_rate": 0.0001218133938512833,
      "loss": 2.8205,
      "step": 161843
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9770069122314453,
      "learning_rate": 0.00012181010301874696,
      "loss": 2.9233,
      "step": 161844
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.4216270446777344,
      "learning_rate": 0.00012180681221933959,
      "loss": 3.0376,
      "step": 161845
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.766488552093506,
      "learning_rate": 0.00012180352145306161,
      "loss": 3.0605,
      "step": 161846
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3176140785217285,
      "learning_rate": 0.0001218002307199138,
      "loss": 2.9185,
      "step": 161847
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.328909873962402,
      "learning_rate": 0.00012179694001989673,
      "loss": 2.8512,
      "step": 161848
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.216518402099609,
      "learning_rate": 0.00012179364935301094,
      "loss": 3.0814,
      "step": 161849
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.283886671066284,
      "learning_rate": 0.000121790358719257,
      "loss": 2.983,
      "step": 161850
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.346313714981079,
      "learning_rate": 0.00012178706811863568,
      "loss": 2.8452,
      "step": 161851
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8257896900177,
      "learning_rate": 0.00012178377755114739,
      "loss": 2.8448,
      "step": 161852
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6465609073638916,
      "learning_rate": 0.000121780487016793,
      "loss": 2.9477,
      "step": 161853
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.170592784881592,
      "learning_rate": 0.00012177719651557292,
      "loss": 2.8327,
      "step": 161854
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8721847534179688,
      "learning_rate": 0.00012177390604748789,
      "loss": 2.7137,
      "step": 161855
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.565295934677124,
      "learning_rate": 0.00012177061561253832,
      "loss": 2.9101,
      "step": 161856
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.566871166229248,
      "learning_rate": 0.00012176732521072506,
      "loss": 3.0074,
      "step": 161857
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.704350471496582,
      "learning_rate": 0.00012176403484204854,
      "loss": 2.9664,
      "step": 161858
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.67496657371521,
      "learning_rate": 0.00012176074450650952,
      "loss": 2.9388,
      "step": 161859
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.0617334842681885,
      "learning_rate": 0.00012175745420410844,
      "loss": 3.0081,
      "step": 161860
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3642919063568115,
      "learning_rate": 0.00012175416393484626,
      "loss": 2.9948,
      "step": 161861
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4650368690490723,
      "learning_rate": 0.0001217508736987231,
      "loss": 2.9449,
      "step": 161862
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8588104248046875,
      "learning_rate": 0.00012174758349573994,
      "loss": 3.1199,
      "step": 161863
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7542600631713867,
      "learning_rate": 0.00012174429332589717,
      "loss": 2.9302,
      "step": 161864
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5816919803619385,
      "learning_rate": 0.00012174100318919558,
      "loss": 2.9647,
      "step": 161865
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.675523519515991,
      "learning_rate": 0.00012173771308563563,
      "loss": 2.761,
      "step": 161866
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7937240600585938,
      "learning_rate": 0.00012173442301521819,
      "loss": 2.9278,
      "step": 161867
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.948288679122925,
      "learning_rate": 0.00012173113297794347,
      "loss": 2.9663,
      "step": 161868
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.209045171737671,
      "learning_rate": 0.00012172784297381241,
      "loss": 2.9161,
      "step": 161869
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.772265911102295,
      "learning_rate": 0.00012172455300282539,
      "loss": 3.1292,
      "step": 161870
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5482192039489746,
      "learning_rate": 0.00012172126306498327,
      "loss": 2.7823,
      "step": 161871
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.286639928817749,
      "learning_rate": 0.00012171797316028645,
      "loss": 2.9471,
      "step": 161872
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5429835319519043,
      "learning_rate": 0.0001217146832887358,
      "loss": 3.2432,
      "step": 161873
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7532386779785156,
      "learning_rate": 0.00012171139345033153,
      "loss": 2.953,
      "step": 161874
    },
    {
      "epoch": 2.11,
      "grad_norm": 5.293939590454102,
      "learning_rate": 0.00012170810364507461,
      "loss": 2.9041,
      "step": 161875
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.627408266067505,
      "learning_rate": 0.00012170481387296543,
      "loss": 2.886,
      "step": 161876
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.814626693725586,
      "learning_rate": 0.00012170152413400474,
      "loss": 2.8985,
      "step": 161877
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4318935871124268,
      "learning_rate": 0.00012169823442819306,
      "loss": 2.8504,
      "step": 161878
    },
    {
      "epoch": 2.11,
      "grad_norm": 5.232611656188965,
      "learning_rate": 0.00012169494475553111,
      "loss": 2.8125,
      "step": 161879
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9734866619110107,
      "learning_rate": 0.00012169165511601945,
      "loss": 2.7413,
      "step": 161880
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.875422239303589,
      "learning_rate": 0.00012168836550965866,
      "loss": 3.0308,
      "step": 161881
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.782740116119385,
      "learning_rate": 0.00012168507593644928,
      "loss": 2.8209,
      "step": 161882
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.41349458694458,
      "learning_rate": 0.00012168178639639213,
      "loss": 3.048,
      "step": 161883
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6050376892089844,
      "learning_rate": 0.00012167849688948756,
      "loss": 3.0556,
      "step": 161884
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2157981395721436,
      "learning_rate": 0.00012167520741573643,
      "loss": 2.9266,
      "step": 161885
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.0940773487091064,
      "learning_rate": 0.00012167191797513928,
      "loss": 3.0219,
      "step": 161886
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6110968589782715,
      "learning_rate": 0.00012166862856769666,
      "loss": 2.9657,
      "step": 161887
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.275603771209717,
      "learning_rate": 0.0001216653391934091,
      "loss": 2.944,
      "step": 161888
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3150179386138916,
      "learning_rate": 0.00012166204985227746,
      "loss": 2.8324,
      "step": 161889
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4247255325317383,
      "learning_rate": 0.00012165876054430208,
      "loss": 2.9474,
      "step": 161890
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3845245838165283,
      "learning_rate": 0.00012165547126948382,
      "loss": 3.144,
      "step": 161891
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9766106605529785,
      "learning_rate": 0.00012165218202782315,
      "loss": 2.9668,
      "step": 161892
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.766016960144043,
      "learning_rate": 0.00012164889281932071,
      "loss": 2.9973,
      "step": 161893
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.258439540863037,
      "learning_rate": 0.00012164560364397701,
      "loss": 2.911,
      "step": 161894
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3333241939544678,
      "learning_rate": 0.00012164231450179287,
      "loss": 2.9952,
      "step": 161895
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3942055702209473,
      "learning_rate": 0.00012163902539276869,
      "loss": 2.9391,
      "step": 161896
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6183295249938965,
      "learning_rate": 0.0001216357363169053,
      "loss": 2.9691,
      "step": 161897
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.53955078125,
      "learning_rate": 0.00012163244727420317,
      "loss": 2.7637,
      "step": 161898
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.695563554763794,
      "learning_rate": 0.00012162915826466291,
      "loss": 3.005,
      "step": 161899
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9544601440429688,
      "learning_rate": 0.0001216258692882851,
      "loss": 2.8914,
      "step": 161900
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1943962574005127,
      "learning_rate": 0.0001216225803450705,
      "loss": 2.823,
      "step": 161901
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.474522590637207,
      "learning_rate": 0.0001216192914350195,
      "loss": 3.1896,
      "step": 161902
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.1140031814575195,
      "learning_rate": 0.00012161600255813297,
      "loss": 2.8922,
      "step": 161903
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3110055923461914,
      "learning_rate": 0.00012161271371441137,
      "loss": 3.0834,
      "step": 161904
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8784544467926025,
      "learning_rate": 0.00012160942490385538,
      "loss": 2.7328,
      "step": 161905
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.2566375732421875,
      "learning_rate": 0.00012160613612646542,
      "loss": 3.1887,
      "step": 161906
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1988797187805176,
      "learning_rate": 0.00012160284738224234,
      "loss": 2.7899,
      "step": 161907
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7085399627685547,
      "learning_rate": 0.0001215995586711866,
      "loss": 2.9572,
      "step": 161908
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5138742923736572,
      "learning_rate": 0.00012159626999329895,
      "loss": 3.1296,
      "step": 161909
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6447594165802,
      "learning_rate": 0.0001215929813485799,
      "loss": 3.002,
      "step": 161910
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4104795455932617,
      "learning_rate": 0.00012158969273703012,
      "loss": 2.8829,
      "step": 161911
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.801679849624634,
      "learning_rate": 0.00012158640415865004,
      "loss": 2.976,
      "step": 161912
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4028124809265137,
      "learning_rate": 0.00012158311561344057,
      "loss": 3.0838,
      "step": 161913
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2743852138519287,
      "learning_rate": 0.00012157982710140201,
      "loss": 3.1639,
      "step": 161914
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3081700801849365,
      "learning_rate": 0.00012157653862253528,
      "loss": 3.054,
      "step": 161915
    },
    {
      "epoch": 2.11,
      "grad_norm": 5.449284076690674,
      "learning_rate": 0.00012157325017684079,
      "loss": 2.6465,
      "step": 161916
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6732168197631836,
      "learning_rate": 0.00012156996176431924,
      "loss": 3.102,
      "step": 161917
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3997392654418945,
      "learning_rate": 0.00012156667338497108,
      "loss": 2.9062,
      "step": 161918
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6057052612304688,
      "learning_rate": 0.00012156338503879715,
      "loss": 3.0726,
      "step": 161919
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.505615234375,
      "learning_rate": 0.00012156009672579786,
      "loss": 2.8465,
      "step": 161920
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.0889811515808105,
      "learning_rate": 0.000121556808445974,
      "loss": 3.0342,
      "step": 161921
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.09489107131958,
      "learning_rate": 0.00012155352019932613,
      "loss": 2.74,
      "step": 161922
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.587670087814331,
      "learning_rate": 0.00012155023198585468,
      "loss": 3.0917,
      "step": 161923
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.0479793548583984,
      "learning_rate": 0.00012154694380556057,
      "loss": 3.0523,
      "step": 161924
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.970688581466675,
      "learning_rate": 0.00012154365565844422,
      "loss": 3.1135,
      "step": 161925
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.667811870574951,
      "learning_rate": 0.00012154036754450616,
      "loss": 2.813,
      "step": 161926
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3063697814941406,
      "learning_rate": 0.00012153707946374723,
      "loss": 2.9593,
      "step": 161927
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5529866218566895,
      "learning_rate": 0.00012153379141616794,
      "loss": 2.7655,
      "step": 161928
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.259551048278809,
      "learning_rate": 0.00012153050340176877,
      "loss": 2.8929,
      "step": 161929
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4915781021118164,
      "learning_rate": 0.00012152721542055054,
      "loss": 2.8507,
      "step": 161930
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2398147583007812,
      "learning_rate": 0.00012152392747251367,
      "loss": 2.9806,
      "step": 161931
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3460726737976074,
      "learning_rate": 0.00012152063955765899,
      "loss": 2.8569,
      "step": 161932
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3219189643859863,
      "learning_rate": 0.00012151735167598698,
      "loss": 3.0703,
      "step": 161933
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5390164852142334,
      "learning_rate": 0.00012151406382749828,
      "loss": 2.9717,
      "step": 161934
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.5571250915527344,
      "learning_rate": 0.00012151077601219337,
      "loss": 3.1898,
      "step": 161935
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.660935640335083,
      "learning_rate": 0.00012150748823007304,
      "loss": 3.0947,
      "step": 161936
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.0874886512756348,
      "learning_rate": 0.00012150420048113777,
      "loss": 2.9413,
      "step": 161937
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.442054033279419,
      "learning_rate": 0.00012150091276538836,
      "loss": 3.1418,
      "step": 161938
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7694308757781982,
      "learning_rate": 0.00012149762508282528,
      "loss": 2.8803,
      "step": 161939
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1490414142608643,
      "learning_rate": 0.00012149433743344917,
      "loss": 3.0467,
      "step": 161940
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.205566883087158,
      "learning_rate": 0.00012149104981726053,
      "loss": 2.8982,
      "step": 161941
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.03230619430542,
      "learning_rate": 0.00012148776223426015,
      "loss": 3.1986,
      "step": 161942
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1444833278656006,
      "learning_rate": 0.0001214844746844485,
      "loss": 3.0431,
      "step": 161943
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7366650104522705,
      "learning_rate": 0.00012148118716782635,
      "loss": 2.9891,
      "step": 161944
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.750828981399536,
      "learning_rate": 0.0001214778996843941,
      "loss": 3.1772,
      "step": 161945
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.183521270751953,
      "learning_rate": 0.00012147461223415257,
      "loss": 2.7395,
      "step": 161946
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.090090751647949,
      "learning_rate": 0.0001214713248171023,
      "loss": 3.1158,
      "step": 161947
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3177523612976074,
      "learning_rate": 0.00012146803743324389,
      "loss": 2.7454,
      "step": 161948
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8247389793395996,
      "learning_rate": 0.00012146475008257781,
      "loss": 2.8091,
      "step": 161949
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.248124599456787,
      "learning_rate": 0.00012146146276510492,
      "loss": 2.9233,
      "step": 161950
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.3930575847625732,
      "learning_rate": 0.00012145817548082563,
      "loss": 2.855,
      "step": 161951
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.446556329727173,
      "learning_rate": 0.00012145488822974069,
      "loss": 3.0604,
      "step": 161952
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5339832305908203,
      "learning_rate": 0.00012145160101185069,
      "loss": 3.0004,
      "step": 161953
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.740370273590088,
      "learning_rate": 0.00012144831382715622,
      "loss": 3.187,
      "step": 161954
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9245471954345703,
      "learning_rate": 0.00012144502667565775,
      "loss": 2.7809,
      "step": 161955
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.582994222640991,
      "learning_rate": 0.00012144173955735613,
      "loss": 2.9726,
      "step": 161956
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.081988573074341,
      "learning_rate": 0.00012143845247225176,
      "loss": 2.7571,
      "step": 161957
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3294992446899414,
      "learning_rate": 0.00012143516542034546,
      "loss": 2.7762,
      "step": 161958
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.2602882385253906,
      "learning_rate": 0.00012143187840163773,
      "loss": 3.0089,
      "step": 161959
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.198472261428833,
      "learning_rate": 0.0001214285914161292,
      "loss": 3.0763,
      "step": 161960
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.698842763900757,
      "learning_rate": 0.00012142530446382032,
      "loss": 3.0576,
      "step": 161961
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8596081733703613,
      "learning_rate": 0.000121422017544712,
      "loss": 2.9897,
      "step": 161962
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2703943252563477,
      "learning_rate": 0.00012141873065880459,
      "loss": 3.027,
      "step": 161963
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4241960048675537,
      "learning_rate": 0.00012141544380609887,
      "loss": 2.8848,
      "step": 161964
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.504486083984375,
      "learning_rate": 0.0001214121569865954,
      "loss": 2.9732,
      "step": 161965
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.295942783355713,
      "learning_rate": 0.00012140887020029481,
      "loss": 3.0875,
      "step": 161966
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1600894927978516,
      "learning_rate": 0.00012140558344719756,
      "loss": 2.7136,
      "step": 161967
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9732635021209717,
      "learning_rate": 0.00012140229672730451,
      "loss": 2.8847,
      "step": 161968
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.793760061264038,
      "learning_rate": 0.00012139901004061601,
      "loss": 3.0752,
      "step": 161969
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.432527542114258,
      "learning_rate": 0.00012139572338713294,
      "loss": 2.9369,
      "step": 161970
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4600377082824707,
      "learning_rate": 0.00012139243676685573,
      "loss": 2.8522,
      "step": 161971
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.4719302654266357,
      "learning_rate": 0.00012138915017978508,
      "loss": 2.9748,
      "step": 161972
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.088879346847534,
      "learning_rate": 0.00012138586362592142,
      "loss": 3.1429,
      "step": 161973
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.38863468170166,
      "learning_rate": 0.00012138257710526563,
      "loss": 3.1236,
      "step": 161974
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.556593179702759,
      "learning_rate": 0.00012137929061781808,
      "loss": 2.9132,
      "step": 161975
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3553006649017334,
      "learning_rate": 0.0001213760041635796,
      "loss": 3.0027,
      "step": 161976
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5585334300994873,
      "learning_rate": 0.00012137271774255069,
      "loss": 2.9608,
      "step": 161977
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7490131855010986,
      "learning_rate": 0.00012136943135473197,
      "loss": 2.9554,
      "step": 161978
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4203760623931885,
      "learning_rate": 0.00012136614500012393,
      "loss": 2.7519,
      "step": 161979
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.107661008834839,
      "learning_rate": 0.00012136285867872739,
      "loss": 3.018,
      "step": 161980
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2355761528015137,
      "learning_rate": 0.00012135957239054277,
      "loss": 2.8611,
      "step": 161981
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.996346950531006,
      "learning_rate": 0.0001213562861355709,
      "loss": 2.8955,
      "step": 161982
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2208545207977295,
      "learning_rate": 0.00012135299991381224,
      "loss": 3.1052,
      "step": 161983
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.388549327850342,
      "learning_rate": 0.00012134971372526747,
      "loss": 2.8125,
      "step": 161984
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.016080141067505,
      "learning_rate": 0.00012134642756993702,
      "loss": 2.7861,
      "step": 161985
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.039355993270874,
      "learning_rate": 0.00012134314144782175,
      "loss": 2.9252,
      "step": 161986
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5102009773254395,
      "learning_rate": 0.00012133985535892206,
      "loss": 2.8274,
      "step": 161987
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.691256523132324,
      "learning_rate": 0.00012133656930323878,
      "loss": 2.8828,
      "step": 161988
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2668375968933105,
      "learning_rate": 0.00012133328328077237,
      "loss": 2.9101,
      "step": 161989
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1943986415863037,
      "learning_rate": 0.00012132999729152354,
      "loss": 2.992,
      "step": 161990
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.56962513923645,
      "learning_rate": 0.00012132671133549266,
      "loss": 2.7643,
      "step": 161991
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2628841400146484,
      "learning_rate": 0.00012132342541268069,
      "loss": 3.0419,
      "step": 161992
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3962554931640625,
      "learning_rate": 0.0001213201395230879,
      "loss": 3.2334,
      "step": 161993
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.327272415161133,
      "learning_rate": 0.00012131685366671521,
      "loss": 2.7361,
      "step": 161994
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.276728630065918,
      "learning_rate": 0.00012131356784356307,
      "loss": 3.1018,
      "step": 161995
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3770854473114014,
      "learning_rate": 0.00012131028205363213,
      "loss": 2.8686,
      "step": 161996
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6253855228424072,
      "learning_rate": 0.00012130699629692288,
      "loss": 3.0502,
      "step": 161997
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.342200994491577,
      "learning_rate": 0.00012130371057343612,
      "loss": 2.8018,
      "step": 161998
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2504403591156006,
      "learning_rate": 0.00012130042488317226,
      "loss": 3.0803,
      "step": 161999
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.633957624435425,
      "learning_rate": 0.00012129713922613216,
      "loss": 3.2017,
      "step": 162000
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6730780601501465,
      "learning_rate": 0.00012129385360231626,
      "loss": 3.0302,
      "step": 162001
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.512382745742798,
      "learning_rate": 0.00012129056801172525,
      "loss": 2.9066,
      "step": 162002
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.067727565765381,
      "learning_rate": 0.00012128728245435956,
      "loss": 2.9987,
      "step": 162003
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.515972137451172,
      "learning_rate": 0.00012128399693022005,
      "loss": 2.963,
      "step": 162004
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.080430269241333,
      "learning_rate": 0.00012128071143930712,
      "loss": 2.7468,
      "step": 162005
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2599403858184814,
      "learning_rate": 0.00012127742598162158,
      "loss": 2.8062,
      "step": 162006
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5724313259124756,
      "learning_rate": 0.00012127414055716392,
      "loss": 2.824,
      "step": 162007
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.95634388923645,
      "learning_rate": 0.00012127085516593468,
      "loss": 2.8026,
      "step": 162008
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9928102493286133,
      "learning_rate": 0.00012126756980793467,
      "loss": 3.0637,
      "step": 162009
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.984203338623047,
      "learning_rate": 0.00012126428448316441,
      "loss": 3.3047,
      "step": 162010
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.0038015842437744,
      "learning_rate": 0.00012126099919162437,
      "loss": 2.9563,
      "step": 162011
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.0965898036956787,
      "learning_rate": 0.00012125771393331537,
      "loss": 2.8123,
      "step": 162012
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.657846689224243,
      "learning_rate": 0.00012125442870823786,
      "loss": 2.8933,
      "step": 162013
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.060342788696289,
      "learning_rate": 0.00012125114351639264,
      "loss": 3.2219,
      "step": 162014
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.186726093292236,
      "learning_rate": 0.00012124785835778016,
      "loss": 3.1722,
      "step": 162015
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4125444889068604,
      "learning_rate": 0.0001212445732324011,
      "loss": 2.8346,
      "step": 162016
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8587467670440674,
      "learning_rate": 0.00012124128814025596,
      "loss": 2.9953,
      "step": 162017
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.788243055343628,
      "learning_rate": 0.00012123800308134553,
      "loss": 3.037,
      "step": 162018
    },
    {
      "epoch": 2.11,
      "grad_norm": 7.224116802215576,
      "learning_rate": 0.00012123471805567025,
      "loss": 3.0258,
      "step": 162019
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.0447189807891846,
      "learning_rate": 0.00012123143306323087,
      "loss": 3.0023,
      "step": 162020
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.724604368209839,
      "learning_rate": 0.00012122814810402796,
      "loss": 2.954,
      "step": 162021
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.153059959411621,
      "learning_rate": 0.00012122486317806202,
      "loss": 2.8558,
      "step": 162022
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9777746200561523,
      "learning_rate": 0.00012122157828533385,
      "loss": 2.9714,
      "step": 162023
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.830013751983643,
      "learning_rate": 0.00012121829342584397,
      "loss": 2.9234,
      "step": 162024
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.648959159851074,
      "learning_rate": 0.00012121500859959286,
      "loss": 2.8673,
      "step": 162025
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.2301089763641357,
      "learning_rate": 0.00012121172380658138,
      "loss": 2.7419,
      "step": 162026
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4408371448516846,
      "learning_rate": 0.00012120843904680998,
      "loss": 3.1225,
      "step": 162027
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.225470542907715,
      "learning_rate": 0.00012120515432027921,
      "loss": 3.0385,
      "step": 162028
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.946993827819824,
      "learning_rate": 0.00012120186962698992,
      "loss": 2.9143,
      "step": 162029
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.767662286758423,
      "learning_rate": 0.00012119858496694244,
      "loss": 2.8423,
      "step": 162030
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4303395748138428,
      "learning_rate": 0.00012119530034013766,
      "loss": 2.9429,
      "step": 162031
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.773319959640503,
      "learning_rate": 0.00012119201574657603,
      "loss": 3.1141,
      "step": 162032
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.0403149127960205,
      "learning_rate": 0.00012118873118625815,
      "loss": 2.8768,
      "step": 162033
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.19310188293457,
      "learning_rate": 0.00012118544665918457,
      "loss": 2.727,
      "step": 162034
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.012075424194336,
      "learning_rate": 0.00012118216216535613,
      "loss": 3.0539,
      "step": 162035
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1271092891693115,
      "learning_rate": 0.00012117887770477314,
      "loss": 2.9537,
      "step": 162036
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6774203777313232,
      "learning_rate": 0.00012117559327743651,
      "loss": 2.85,
      "step": 162037
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.232595205307007,
      "learning_rate": 0.00012117230888334671,
      "loss": 2.7971,
      "step": 162038
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.2884671688079834,
      "learning_rate": 0.00012116902452250435,
      "loss": 3.1048,
      "step": 162039
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.706063747406006,
      "learning_rate": 0.00012116574019490997,
      "loss": 2.9387,
      "step": 162040
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5358738899230957,
      "learning_rate": 0.00012116245590056432,
      "loss": 3.1129,
      "step": 162041
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.464534282684326,
      "learning_rate": 0.00012115917163946784,
      "loss": 3.1599,
      "step": 162042
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.0860917568206787,
      "learning_rate": 0.00012115588741162136,
      "loss": 3.2082,
      "step": 162043
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.6064178943634033,
      "learning_rate": 0.00012115260321702538,
      "loss": 2.8372,
      "step": 162044
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.520437717437744,
      "learning_rate": 0.0001211493190556805,
      "loss": 3.1243,
      "step": 162045
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1770706176757812,
      "learning_rate": 0.00012114603492758727,
      "loss": 3.2139,
      "step": 162046
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.4120538234710693,
      "learning_rate": 0.00012114275083274643,
      "loss": 2.9944,
      "step": 162047
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.19685697555542,
      "learning_rate": 0.00012113946677115842,
      "loss": 3.244,
      "step": 162048
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.787555694580078,
      "learning_rate": 0.00012113618274282409,
      "loss": 3.0909,
      "step": 162049
    },
    {
      "epoch": 2.11,
      "grad_norm": 5.982666492462158,
      "learning_rate": 0.00012113289874774392,
      "loss": 2.9288,
      "step": 162050
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.836902618408203,
      "learning_rate": 0.0001211296147859185,
      "loss": 2.9563,
      "step": 162051
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2397243976593018,
      "learning_rate": 0.00012112633085734836,
      "loss": 2.9515,
      "step": 162052
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.552459478378296,
      "learning_rate": 0.00012112304696203432,
      "loss": 3.0874,
      "step": 162053
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.02644681930542,
      "learning_rate": 0.00012111976309997681,
      "loss": 2.8327,
      "step": 162054
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.34781551361084,
      "learning_rate": 0.00012111647927117656,
      "loss": 2.8097,
      "step": 162055
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.0015718936920166,
      "learning_rate": 0.00012111319547563418,
      "loss": 3.027,
      "step": 162056
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.569676637649536,
      "learning_rate": 0.00012110991171335021,
      "loss": 2.8471,
      "step": 162057
    },
    {
      "epoch": 2.11,
      "grad_norm": 1.9597874879837036,
      "learning_rate": 0.00012110662798432519,
      "loss": 2.9025,
      "step": 162058
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.27618408203125,
      "learning_rate": 0.00012110334428855992,
      "loss": 2.864,
      "step": 162059
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.3461790084838867,
      "learning_rate": 0.00012110006062605481,
      "loss": 2.8491,
      "step": 162060
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5950567722320557,
      "learning_rate": 0.00012109677699681068,
      "loss": 2.905,
      "step": 162061
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5188939571380615,
      "learning_rate": 0.00012109349340082806,
      "loss": 3.0867,
      "step": 162062
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3880069255828857,
      "learning_rate": 0.0001210902098381075,
      "loss": 2.8259,
      "step": 162063
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6924612522125244,
      "learning_rate": 0.00012108692630864956,
      "loss": 2.9759,
      "step": 162064
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6825973987579346,
      "learning_rate": 0.00012108364281245506,
      "loss": 2.9446,
      "step": 162065
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.749905824661255,
      "learning_rate": 0.00012108035934952435,
      "loss": 2.7882,
      "step": 162066
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.535947322845459,
      "learning_rate": 0.0001210770759198583,
      "loss": 3.0874,
      "step": 162067
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.6536810398101807,
      "learning_rate": 0.0001210737925234574,
      "loss": 2.8871,
      "step": 162068
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3925259113311768,
      "learning_rate": 0.00012107050916032225,
      "loss": 3.1108,
      "step": 162069
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.044767379760742,
      "learning_rate": 0.0001210672258304534,
      "loss": 2.8379,
      "step": 162070
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.544856548309326,
      "learning_rate": 0.0001210639425338516,
      "loss": 2.9976,
      "step": 162071
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5865585803985596,
      "learning_rate": 0.00012106065927051728,
      "loss": 3.0553,
      "step": 162072
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.166597604751587,
      "learning_rate": 0.00012105737604045128,
      "loss": 3.1034,
      "step": 162073
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7983052730560303,
      "learning_rate": 0.00012105409284365411,
      "loss": 2.8792,
      "step": 162074
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9726014137268066,
      "learning_rate": 0.00012105080968012633,
      "loss": 3.0167,
      "step": 162075
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.551020860671997,
      "learning_rate": 0.00012104752654986851,
      "loss": 2.9877,
      "step": 162076
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9088072776794434,
      "learning_rate": 0.00012104424345288139,
      "loss": 2.8494,
      "step": 162077
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.6822972297668457,
      "learning_rate": 0.00012104096038916544,
      "loss": 2.8378,
      "step": 162078
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.21403431892395,
      "learning_rate": 0.00012103767735872148,
      "loss": 3.0667,
      "step": 162079
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.596649646759033,
      "learning_rate": 0.00012103439436154989,
      "loss": 3.006,
      "step": 162080
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5439791679382324,
      "learning_rate": 0.00012103111139765156,
      "loss": 2.949,
      "step": 162081
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.6010079383850098,
      "learning_rate": 0.00012102782846702674,
      "loss": 3.0674,
      "step": 162082
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7411277294158936,
      "learning_rate": 0.00012102454556967631,
      "loss": 2.9812,
      "step": 162083
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1789162158966064,
      "learning_rate": 0.00012102126270560073,
      "loss": 3.1354,
      "step": 162084
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.352012872695923,
      "learning_rate": 0.00012101797987480074,
      "loss": 3.0988,
      "step": 162085
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4026150703430176,
      "learning_rate": 0.00012101469707727682,
      "loss": 2.8388,
      "step": 162086
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4275641441345215,
      "learning_rate": 0.0001210114143130298,
      "loss": 3.1091,
      "step": 162087
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5558905601501465,
      "learning_rate": 0.00012100813158205999,
      "loss": 2.9797,
      "step": 162088
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.758453130722046,
      "learning_rate": 0.00012100484888436822,
      "loss": 3.0976,
      "step": 162089
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3464293479919434,
      "learning_rate": 0.00012100156621995492,
      "loss": 2.9766,
      "step": 162090
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.296945095062256,
      "learning_rate": 0.00012099828358882089,
      "loss": 2.9109,
      "step": 162091
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.831005573272705,
      "learning_rate": 0.00012099500099096659,
      "loss": 3.1102,
      "step": 162092
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.814908266067505,
      "learning_rate": 0.0001209917184263928,
      "loss": 2.8649,
      "step": 162093
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3980154991149902,
      "learning_rate": 0.00012098843589510002,
      "loss": 2.9565,
      "step": 162094
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.084183931350708,
      "learning_rate": 0.00012098515339708885,
      "loss": 2.9663,
      "step": 162095
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.457202911376953,
      "learning_rate": 0.00012098187093235985,
      "loss": 2.7434,
      "step": 162096
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7883918285369873,
      "learning_rate": 0.00012097858850091378,
      "loss": 2.8573,
      "step": 162097
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7680933475494385,
      "learning_rate": 0.00012097530610275107,
      "loss": 2.9535,
      "step": 162098
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1182374954223633,
      "learning_rate": 0.00012097202373787254,
      "loss": 2.8531,
      "step": 162099
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4834020137786865,
      "learning_rate": 0.00012096874140627871,
      "loss": 3.1099,
      "step": 162100
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9154555797576904,
      "learning_rate": 0.00012096545910797012,
      "loss": 2.6786,
      "step": 162101
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.713118076324463,
      "learning_rate": 0.00012096217684294736,
      "loss": 3.2241,
      "step": 162102
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.643571376800537,
      "learning_rate": 0.00012095889461121122,
      "loss": 2.8162,
      "step": 162103
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.758867025375366,
      "learning_rate": 0.00012095561241276209,
      "loss": 2.9784,
      "step": 162104
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4072277545928955,
      "learning_rate": 0.00012095233024760081,
      "loss": 3.0547,
      "step": 162105
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5219266414642334,
      "learning_rate": 0.00012094904811572786,
      "loss": 2.9837,
      "step": 162106
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7829082012176514,
      "learning_rate": 0.00012094576601714375,
      "loss": 3.0026,
      "step": 162107
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.279961109161377,
      "learning_rate": 0.0001209424839518493,
      "loss": 3.1392,
      "step": 162108
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.763204097747803,
      "learning_rate": 0.00012093920191984504,
      "loss": 2.865,
      "step": 162109
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.779062032699585,
      "learning_rate": 0.00012093591992113142,
      "loss": 2.9161,
      "step": 162110
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.400974988937378,
      "learning_rate": 0.00012093263795570934,
      "loss": 2.8536,
      "step": 162111
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.0517170429229736,
      "learning_rate": 0.00012092935602357924,
      "loss": 3.2875,
      "step": 162112
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.2513232231140137,
      "learning_rate": 0.00012092607412474165,
      "loss": 2.9291,
      "step": 162113
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.714817523956299,
      "learning_rate": 0.00012092279225919739,
      "loss": 2.8056,
      "step": 162114
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.98091197013855,
      "learning_rate": 0.00012091951042694687,
      "loss": 3.0778,
      "step": 162115
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3240702152252197,
      "learning_rate": 0.00012091622862799091,
      "loss": 2.815,
      "step": 162116
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8892180919647217,
      "learning_rate": 0.00012091294686232998,
      "loss": 3.0629,
      "step": 162117
    },
    {
      "epoch": 2.11,
      "grad_norm": 5.116431713104248,
      "learning_rate": 0.0001209096651299647,
      "loss": 2.8584,
      "step": 162118
    },
    {
      "epoch": 2.11,
      "grad_norm": 5.360179901123047,
      "learning_rate": 0.0001209063834308956,
      "loss": 3.0292,
      "step": 162119
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8374013900756836,
      "learning_rate": 0.00012090310176512351,
      "loss": 2.7006,
      "step": 162120
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3540472984313965,
      "learning_rate": 0.00012089982013264879,
      "loss": 2.9535,
      "step": 162121
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6496517658233643,
      "learning_rate": 0.00012089653853347228,
      "loss": 2.9841,
      "step": 162122
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2788796424865723,
      "learning_rate": 0.00012089325696759448,
      "loss": 2.8783,
      "step": 162123
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.0709402561187744,
      "learning_rate": 0.00012088997543501604,
      "loss": 2.7885,
      "step": 162124
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9082281589508057,
      "learning_rate": 0.00012088669393573741,
      "loss": 2.9062,
      "step": 162125
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8102316856384277,
      "learning_rate": 0.0001208834124697594,
      "loss": 3.1289,
      "step": 162126
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.60733699798584,
      "learning_rate": 0.00012088013103708248,
      "loss": 2.8387,
      "step": 162127
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.542898654937744,
      "learning_rate": 0.0001208768496377074,
      "loss": 2.8004,
      "step": 162128
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.7647793292999268,
      "learning_rate": 0.0001208735682716347,
      "loss": 2.8876,
      "step": 162129
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3877062797546387,
      "learning_rate": 0.000120870286938865,
      "loss": 3.0647,
      "step": 162130
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6792123317718506,
      "learning_rate": 0.0001208670056393988,
      "loss": 3.227,
      "step": 162131
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2105278968811035,
      "learning_rate": 0.00012086372437323687,
      "loss": 2.9428,
      "step": 162132
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3109383583068848,
      "learning_rate": 0.00012086044314037969,
      "loss": 2.7477,
      "step": 162133
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.649784564971924,
      "learning_rate": 0.000120857161940828,
      "loss": 2.9834,
      "step": 162134
    },
    {
      "epoch": 2.11,
      "grad_norm": 5.995934963226318,
      "learning_rate": 0.0001208538807745824,
      "loss": 2.8389,
      "step": 162135
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.383669137954712,
      "learning_rate": 0.00012085059964164337,
      "loss": 2.5878,
      "step": 162136
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.137565851211548,
      "learning_rate": 0.00012084731854201155,
      "loss": 3.0196,
      "step": 162137
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.290289878845215,
      "learning_rate": 0.00012084403747568768,
      "loss": 3.0318,
      "step": 162138
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4352786540985107,
      "learning_rate": 0.00012084075644267217,
      "loss": 3.0725,
      "step": 162139
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9761452674865723,
      "learning_rate": 0.00012083747544296588,
      "loss": 3.0071,
      "step": 162140
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1055185794830322,
      "learning_rate": 0.00012083419447656928,
      "loss": 2.9935,
      "step": 162141
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2499234676361084,
      "learning_rate": 0.00012083091354348296,
      "loss": 2.8419,
      "step": 162142
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.744257926940918,
      "learning_rate": 0.00012082763264370747,
      "loss": 3.1147,
      "step": 162143
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1457619667053223,
      "learning_rate": 0.0001208243517772436,
      "loss": 3.0921,
      "step": 162144
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5080132484436035,
      "learning_rate": 0.00012082107094409177,
      "loss": 2.701,
      "step": 162145
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4772534370422363,
      "learning_rate": 0.00012081779014425278,
      "loss": 3.1416,
      "step": 162146
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6143569946289062,
      "learning_rate": 0.00012081450937772704,
      "loss": 2.8181,
      "step": 162147
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.371645927429199,
      "learning_rate": 0.0001208112286445155,
      "loss": 2.9651,
      "step": 162148
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4456989765167236,
      "learning_rate": 0.00012080794794461829,
      "loss": 3.0176,
      "step": 162149
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3244974613189697,
      "learning_rate": 0.00012080466727803639,
      "loss": 2.865,
      "step": 162150
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.706153631210327,
      "learning_rate": 0.00012080138664477018,
      "loss": 2.9229,
      "step": 162151
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1095523834228516,
      "learning_rate": 0.00012079810604482046,
      "loss": 2.8603,
      "step": 162152
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.0367558002471924,
      "learning_rate": 0.00012079482547818767,
      "loss": 2.8846,
      "step": 162153
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7969326972961426,
      "learning_rate": 0.0001207915449448727,
      "loss": 2.8482,
      "step": 162154
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.380162239074707,
      "learning_rate": 0.00012078826444487576,
      "loss": 2.9605,
      "step": 162155
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.1528337001800537,
      "learning_rate": 0.00012078498397819779,
      "loss": 3.1228,
      "step": 162156
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.888871431350708,
      "learning_rate": 0.00012078170354483913,
      "loss": 3.0539,
      "step": 162157
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5208096504211426,
      "learning_rate": 0.0001207784231448007,
      "loss": 2.9789,
      "step": 162158
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.439460277557373,
      "learning_rate": 0.00012077514277808277,
      "loss": 2.8629,
      "step": 162159
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7331228256225586,
      "learning_rate": 0.00012077186244468639,
      "loss": 2.6869,
      "step": 162160
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1133835315704346,
      "learning_rate": 0.00012076858214461164,
      "loss": 2.8267,
      "step": 162161
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3401525020599365,
      "learning_rate": 0.00012076530187785956,
      "loss": 3.0072,
      "step": 162162
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5856001377105713,
      "learning_rate": 0.00012076202164443044,
      "loss": 2.6746,
      "step": 162163
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.798830032348633,
      "learning_rate": 0.00012075874144432516,
      "loss": 2.9132,
      "step": 162164
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5558111667633057,
      "learning_rate": 0.00012075546127754411,
      "loss": 3.0406,
      "step": 162165
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.217700242996216,
      "learning_rate": 0.00012075218114408823,
      "loss": 2.8336,
      "step": 162166
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2449119091033936,
      "learning_rate": 0.00012074890104395768,
      "loss": 2.8934,
      "step": 162167
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.032593011856079,
      "learning_rate": 0.00012074562097715342,
      "loss": 2.8017,
      "step": 162168
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2214083671569824,
      "learning_rate": 0.00012074234094367579,
      "loss": 3.1454,
      "step": 162169
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4139161109924316,
      "learning_rate": 0.00012073906094352567,
      "loss": 2.9776,
      "step": 162170
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6985726356506348,
      "learning_rate": 0.00012073578097670342,
      "loss": 2.9966,
      "step": 162171
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1986119747161865,
      "learning_rate": 0.00012073250104320998,
      "loss": 2.9398,
      "step": 162172
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7233824729919434,
      "learning_rate": 0.00012072922114304558,
      "loss": 2.822,
      "step": 162173
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.657416343688965,
      "learning_rate": 0.00012072594127621106,
      "loss": 3.0313,
      "step": 162174
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.36691951751709,
      "learning_rate": 0.00012072266144270691,
      "loss": 3.0295,
      "step": 162175
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3043572902679443,
      "learning_rate": 0.00012071938164253388,
      "loss": 2.7019,
      "step": 162176
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5103347301483154,
      "learning_rate": 0.00012071610187569241,
      "loss": 3.0857,
      "step": 162177
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1823008060455322,
      "learning_rate": 0.0001207128221421834,
      "loss": 3.1921,
      "step": 162178
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8841657638549805,
      "learning_rate": 0.00012070954244200706,
      "loss": 2.8206,
      "step": 162179
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5333151817321777,
      "learning_rate": 0.00012070626277516429,
      "loss": 3.0561,
      "step": 162180
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3796441555023193,
      "learning_rate": 0.00012070298314165549,
      "loss": 3.1358,
      "step": 162181
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.505666494369507,
      "learning_rate": 0.00012069970354148152,
      "loss": 2.8386,
      "step": 162182
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.0321590900421143,
      "learning_rate": 0.00012069642397464276,
      "loss": 2.9607,
      "step": 162183
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2047786712646484,
      "learning_rate": 0.00012069314444114002,
      "loss": 2.8621,
      "step": 162184
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3429718017578125,
      "learning_rate": 0.00012068986494097379,
      "loss": 2.9942,
      "step": 162185
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.533409357070923,
      "learning_rate": 0.00012068658547414473,
      "loss": 3.0938,
      "step": 162186
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1807503700256348,
      "learning_rate": 0.00012068330604065329,
      "loss": 2.9026,
      "step": 162187
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.3935394287109375,
      "learning_rate": 0.00012068002664050033,
      "loss": 2.9091,
      "step": 162188
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6027488708496094,
      "learning_rate": 0.00012067674727368623,
      "loss": 2.9539,
      "step": 162189
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.222154140472412,
      "learning_rate": 0.00012067346794021182,
      "loss": 2.898,
      "step": 162190
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7126317024230957,
      "learning_rate": 0.00012067018864007758,
      "loss": 2.9294,
      "step": 162191
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.330782175064087,
      "learning_rate": 0.00012066690937328403,
      "loss": 3.0519,
      "step": 162192
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5231528282165527,
      "learning_rate": 0.00012066363013983199,
      "loss": 3.0599,
      "step": 162193
    },
    {
      "epoch": 2.11,
      "grad_norm": 1.9281436204910278,
      "learning_rate": 0.00012066035093972197,
      "loss": 3.1522,
      "step": 162194
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1229312419891357,
      "learning_rate": 0.00012065707177295449,
      "loss": 3.3033,
      "step": 162195
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7552709579467773,
      "learning_rate": 0.00012065379263953032,
      "loss": 3.0262,
      "step": 162196
    },
    {
      "epoch": 2.11,
      "grad_norm": 5.263413906097412,
      "learning_rate": 0.00012065051353945002,
      "loss": 3.0551,
      "step": 162197
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.8678972721099854,
      "learning_rate": 0.00012064723447271408,
      "loss": 3.0835,
      "step": 162198
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3069002628326416,
      "learning_rate": 0.00012064395543932327,
      "loss": 3.1281,
      "step": 162199
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9317119121551514,
      "learning_rate": 0.00012064067643927818,
      "loss": 2.8961,
      "step": 162200
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.185138463973999,
      "learning_rate": 0.00012063739747257923,
      "loss": 2.9209,
      "step": 162201
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6247358322143555,
      "learning_rate": 0.00012063411853922729,
      "loss": 2.9022,
      "step": 162202
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3425118923187256,
      "learning_rate": 0.00012063083963922287,
      "loss": 3.2646,
      "step": 162203
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3910179138183594,
      "learning_rate": 0.00012062756077256643,
      "loss": 2.704,
      "step": 162204
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.31559419631958,
      "learning_rate": 0.00012062428193925882,
      "loss": 2.7682,
      "step": 162205
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9900574684143066,
      "learning_rate": 0.00012062100313930045,
      "loss": 2.8706,
      "step": 162206
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.631312608718872,
      "learning_rate": 0.00012061772437269213,
      "loss": 2.812,
      "step": 162207
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2163846492767334,
      "learning_rate": 0.00012061444563943438,
      "loss": 2.862,
      "step": 162208
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2860107421875,
      "learning_rate": 0.00012061116693952777,
      "loss": 3.1771,
      "step": 162209
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6405084133148193,
      "learning_rate": 0.00012060788827297281,
      "loss": 2.9339,
      "step": 162210
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.8027641773223877,
      "learning_rate": 0.00012060460963977036,
      "loss": 3.0686,
      "step": 162211
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7965970039367676,
      "learning_rate": 0.00012060133103992078,
      "loss": 3.2055,
      "step": 162212
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.529287099838257,
      "learning_rate": 0.00012059805247342488,
      "loss": 2.9035,
      "step": 162213
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.000213384628296,
      "learning_rate": 0.00012059477394028312,
      "loss": 3.0976,
      "step": 162214
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9675850868225098,
      "learning_rate": 0.00012059149544049639,
      "loss": 2.9,
      "step": 162215
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.526794910430908,
      "learning_rate": 0.00012058821697406487,
      "loss": 2.861,
      "step": 162216
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.580206871032715,
      "learning_rate": 0.00012058493854098948,
      "loss": 2.9099,
      "step": 162217
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.023630380630493,
      "learning_rate": 0.00012058166014127062,
      "loss": 3.0239,
      "step": 162218
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.615384817123413,
      "learning_rate": 0.00012057838177490914,
      "loss": 3.0539,
      "step": 162219
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2278268337249756,
      "learning_rate": 0.00012057510344190544,
      "loss": 3.1286,
      "step": 162220
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.508854627609253,
      "learning_rate": 0.0001205718251422604,
      "loss": 2.837,
      "step": 162221
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.760062217712402,
      "learning_rate": 0.00012056854687597422,
      "loss": 2.9269,
      "step": 162222
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.514431476593018,
      "learning_rate": 0.00012056526864304788,
      "loss": 2.9089,
      "step": 162223
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.651540517807007,
      "learning_rate": 0.0001205619904434817,
      "loss": 2.9324,
      "step": 162224
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.0950753688812256,
      "learning_rate": 0.00012055871227727658,
      "loss": 2.9679,
      "step": 162225
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.327073097229004,
      "learning_rate": 0.00012055543414443283,
      "loss": 2.9618,
      "step": 162226
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.277018070220947,
      "learning_rate": 0.00012055215604495148,
      "loss": 2.9087,
      "step": 162227
    },
    {
      "epoch": 2.11,
      "grad_norm": 5.166388511657715,
      "learning_rate": 0.00012054887797883262,
      "loss": 2.7972,
      "step": 162228
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.1739397048950195,
      "learning_rate": 0.00012054559994607723,
      "loss": 2.908,
      "step": 162229
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.568662166595459,
      "learning_rate": 0.00012054232194668566,
      "loss": 3.1332,
      "step": 162230
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4763903617858887,
      "learning_rate": 0.00012053904398065882,
      "loss": 2.9798,
      "step": 162231
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.1703643798828125,
      "learning_rate": 0.000120535766047997,
      "loss": 2.9187,
      "step": 162232
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.631054401397705,
      "learning_rate": 0.0001205324881487012,
      "loss": 2.871,
      "step": 162233
    },
    {
      "epoch": 2.11,
      "grad_norm": 5.415609359741211,
      "learning_rate": 0.00012052921028277162,
      "loss": 3.0438,
      "step": 162234
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.560196876525879,
      "learning_rate": 0.00012052593245020912,
      "loss": 3.0958,
      "step": 162235
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7981162071228027,
      "learning_rate": 0.00012052265465101415,
      "loss": 3.0901,
      "step": 162236
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9467856884002686,
      "learning_rate": 0.00012051937688518749,
      "loss": 2.9209,
      "step": 162237
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.72059440612793,
      "learning_rate": 0.00012051609915272957,
      "loss": 3.0443,
      "step": 162238
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.473893880844116,
      "learning_rate": 0.00012051282145364133,
      "loss": 3.0254,
      "step": 162239
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.85097074508667,
      "learning_rate": 0.00012050954378792289,
      "loss": 2.8199,
      "step": 162240
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2599236965179443,
      "learning_rate": 0.00012050626615557526,
      "loss": 2.8289,
      "step": 162241
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5045087337493896,
      "learning_rate": 0.00012050298855659877,
      "loss": 2.8794,
      "step": 162242
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.162029266357422,
      "learning_rate": 0.00012049971099099428,
      "loss": 2.9605,
      "step": 162243
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.504575729370117,
      "learning_rate": 0.00012049643345876216,
      "loss": 2.629,
      "step": 162244
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.268418788909912,
      "learning_rate": 0.00012049315595990338,
      "loss": 2.7426,
      "step": 162245
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1922733783721924,
      "learning_rate": 0.00012048987849441808,
      "loss": 3.0786,
      "step": 162246
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.41019344329834,
      "learning_rate": 0.00012048660106230721,
      "loss": 3.0501,
      "step": 162247
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3404383659362793,
      "learning_rate": 0.00012048332366357117,
      "loss": 3.2771,
      "step": 162248
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.538813591003418,
      "learning_rate": 0.00012048004629821077,
      "loss": 2.7538,
      "step": 162249
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5887715816497803,
      "learning_rate": 0.00012047676896622642,
      "loss": 3.0078,
      "step": 162250
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.136730194091797,
      "learning_rate": 0.00012047349166761901,
      "loss": 3.109,
      "step": 162251
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.165192127227783,
      "learning_rate": 0.00012047021440238879,
      "loss": 2.9967,
      "step": 162252
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4159107208251953,
      "learning_rate": 0.00012046693717053663,
      "loss": 2.8789,
      "step": 162253
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.922257423400879,
      "learning_rate": 0.00012046365997206296,
      "loss": 2.7123,
      "step": 162254
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.275252103805542,
      "learning_rate": 0.0001204603828069686,
      "loss": 2.629,
      "step": 162255
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5435028076171875,
      "learning_rate": 0.00012045710567525391,
      "loss": 2.861,
      "step": 162256
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9810123443603516,
      "learning_rate": 0.00012045382857691988,
      "loss": 2.9802,
      "step": 162257
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.001131296157837,
      "learning_rate": 0.00012045055151196664,
      "loss": 2.8699,
      "step": 162258
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1147849559783936,
      "learning_rate": 0.00012044727448039514,
      "loss": 2.8176,
      "step": 162259
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.1025068759918213,
      "learning_rate": 0.00012044399748220577,
      "loss": 3.1401,
      "step": 162260
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.840254545211792,
      "learning_rate": 0.00012044072051739936,
      "loss": 3.0229,
      "step": 162261
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5256922245025635,
      "learning_rate": 0.00012043744358597627,
      "loss": 2.7892,
      "step": 162262
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.790583610534668,
      "learning_rate": 0.00012043416668793753,
      "loss": 3.0875,
      "step": 162263
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.8450796604156494,
      "learning_rate": 0.00012043088982328321,
      "loss": 2.7302,
      "step": 162264
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.981705904006958,
      "learning_rate": 0.00012042761299201432,
      "loss": 2.8531,
      "step": 162265
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.602142572402954,
      "learning_rate": 0.00012042433619413118,
      "loss": 3.1168,
      "step": 162266
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7397706508636475,
      "learning_rate": 0.00012042105942963467,
      "loss": 2.8006,
      "step": 162267
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3742306232452393,
      "learning_rate": 0.00012041778269852516,
      "loss": 3.0154,
      "step": 162268
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8402047157287598,
      "learning_rate": 0.00012041450600080346,
      "loss": 3.1486,
      "step": 162269
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.4818432331085205,
      "learning_rate": 0.00012041122933647014,
      "loss": 3.1468,
      "step": 162270
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5479416847229004,
      "learning_rate": 0.00012040795270552573,
      "loss": 2.7629,
      "step": 162271
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5543229579925537,
      "learning_rate": 0.00012040467610797079,
      "loss": 2.8486,
      "step": 162272
    },
    {
      "epoch": 2.11,
      "grad_norm": 5.816511631011963,
      "learning_rate": 0.0001204013995438061,
      "loss": 2.6888,
      "step": 162273
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.943711996078491,
      "learning_rate": 0.00012039812301303206,
      "loss": 2.7877,
      "step": 162274
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.015368700027466,
      "learning_rate": 0.00012039484651564953,
      "loss": 2.9783,
      "step": 162275
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3304831981658936,
      "learning_rate": 0.00012039157005165897,
      "loss": 2.9366,
      "step": 162276
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.240211248397827,
      "learning_rate": 0.00012038829362106094,
      "loss": 3.0369,
      "step": 162277
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.691337585449219,
      "learning_rate": 0.00012038501722385619,
      "loss": 2.9421,
      "step": 162278
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1359047889709473,
      "learning_rate": 0.00012038174086004524,
      "loss": 2.7935,
      "step": 162279
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.359757423400879,
      "learning_rate": 0.00012037846452962864,
      "loss": 3.0997,
      "step": 162280
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.065826416015625,
      "learning_rate": 0.00012037518823260715,
      "loss": 2.9544,
      "step": 162281
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2086408138275146,
      "learning_rate": 0.00012037191196898133,
      "loss": 2.7675,
      "step": 162282
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.8269784450531006,
      "learning_rate": 0.00012036863573875165,
      "loss": 2.5225,
      "step": 162283
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.684100866317749,
      "learning_rate": 0.00012036535954191896,
      "loss": 2.7378,
      "step": 162284
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9710662364959717,
      "learning_rate": 0.00012036208337848372,
      "loss": 2.949,
      "step": 162285
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4030537605285645,
      "learning_rate": 0.00012035880724844644,
      "loss": 2.9956,
      "step": 162286
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9041473865509033,
      "learning_rate": 0.00012035553115180796,
      "loss": 3.0668,
      "step": 162287
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.7138400077819824,
      "learning_rate": 0.00012035225508856877,
      "loss": 2.9099,
      "step": 162288
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5755252838134766,
      "learning_rate": 0.0001203489790587294,
      "loss": 2.8206,
      "step": 162289
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.418776273727417,
      "learning_rate": 0.00012034570306229064,
      "loss": 3.1517,
      "step": 162290
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5544304847717285,
      "learning_rate": 0.0001203424270992529,
      "loss": 3.1214,
      "step": 162291
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4604110717773438,
      "learning_rate": 0.00012033915116961701,
      "loss": 2.7537,
      "step": 162292
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.982919692993164,
      "learning_rate": 0.00012033587527338347,
      "loss": 3.0255,
      "step": 162293
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7625932693481445,
      "learning_rate": 0.00012033259941055286,
      "loss": 2.8453,
      "step": 162294
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1508655548095703,
      "learning_rate": 0.00012032932358112571,
      "loss": 3.1494,
      "step": 162295
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5140581130981445,
      "learning_rate": 0.00012032604778510287,
      "loss": 3.2295,
      "step": 162296
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.224116086959839,
      "learning_rate": 0.00012032277202248464,
      "loss": 2.868,
      "step": 162297
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.693603038787842,
      "learning_rate": 0.00012031949629327197,
      "loss": 2.88,
      "step": 162298
    },
    {
      "epoch": 2.11,
      "grad_norm": 1.9690793752670288,
      "learning_rate": 0.00012031622059746527,
      "loss": 3.017,
      "step": 162299
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6133267879486084,
      "learning_rate": 0.00012031294493506517,
      "loss": 3.0131,
      "step": 162300
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4268076419830322,
      "learning_rate": 0.00012030966930607218,
      "loss": 3.0914,
      "step": 162301
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.529714345932007,
      "learning_rate": 0.00012030639371048711,
      "loss": 3.0397,
      "step": 162302
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4908061027526855,
      "learning_rate": 0.00012030311814831038,
      "loss": 2.9793,
      "step": 162303
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3450393676757812,
      "learning_rate": 0.00012029984261954279,
      "loss": 2.8796,
      "step": 162304
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.466203451156616,
      "learning_rate": 0.00012029656712418479,
      "loss": 2.9554,
      "step": 162305
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2283310890197754,
      "learning_rate": 0.00012029329166223722,
      "loss": 3.0244,
      "step": 162306
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3922202587127686,
      "learning_rate": 0.00012029001623370031,
      "loss": 2.7513,
      "step": 162307
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.1187798976898193,
      "learning_rate": 0.000120286740838575,
      "loss": 3.1619,
      "step": 162308
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3579795360565186,
      "learning_rate": 0.00012028346547686165,
      "loss": 2.9673,
      "step": 162309
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.0796382427215576,
      "learning_rate": 0.0001202801901485611,
      "loss": 2.6468,
      "step": 162310
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.307295560836792,
      "learning_rate": 0.00012027691485367376,
      "loss": 2.8907,
      "step": 162311
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5164566040039062,
      "learning_rate": 0.00012027363959220055,
      "loss": 2.8413,
      "step": 162312
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.355191946029663,
      "learning_rate": 0.00012027036436414165,
      "loss": 2.9892,
      "step": 162313
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8519725799560547,
      "learning_rate": 0.000120267089169498,
      "loss": 3.1597,
      "step": 162314
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.2391278743743896,
      "learning_rate": 0.00012026381400826994,
      "loss": 2.9955,
      "step": 162315
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8089728355407715,
      "learning_rate": 0.0001202605388804584,
      "loss": 3.0467,
      "step": 162316
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.843242883682251,
      "learning_rate": 0.00012025726378606367,
      "loss": 2.7915,
      "step": 162317
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.0957465171813965,
      "learning_rate": 0.00012025398872508672,
      "loss": 2.8378,
      "step": 162318
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.1106395721435547,
      "learning_rate": 0.00012025071369752775,
      "loss": 2.7402,
      "step": 162319
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.536001205444336,
      "learning_rate": 0.00012024743870338767,
      "loss": 2.8798,
      "step": 162320
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2474417686462402,
      "learning_rate": 0.00012024416374266686,
      "loss": 3.0977,
      "step": 162321
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1367547512054443,
      "learning_rate": 0.00012024088881536617,
      "loss": 3.0848,
      "step": 162322
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.125908374786377,
      "learning_rate": 0.000120237613921486,
      "loss": 3.1335,
      "step": 162323
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.102508068084717,
      "learning_rate": 0.00012023433906102725,
      "loss": 2.7053,
      "step": 162324
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.0971286296844482,
      "learning_rate": 0.00012023106423399011,
      "loss": 3.014,
      "step": 162325
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4395244121551514,
      "learning_rate": 0.00012022778944037552,
      "loss": 2.9473,
      "step": 162326
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.294267416000366,
      "learning_rate": 0.00012022451468018387,
      "loss": 2.9762,
      "step": 162327
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3337199687957764,
      "learning_rate": 0.00012022123995341598,
      "loss": 2.7825,
      "step": 162328
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.237098217010498,
      "learning_rate": 0.00012021796526007226,
      "loss": 3.1472,
      "step": 162329
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.219505310058594,
      "learning_rate": 0.00012021469060015363,
      "loss": 2.984,
      "step": 162330
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.7892704010009766,
      "learning_rate": 0.00012021141597366027,
      "loss": 2.6721,
      "step": 162331
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.0201528072357178,
      "learning_rate": 0.00012020814138059309,
      "loss": 2.9834,
      "step": 162332
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.649616241455078,
      "learning_rate": 0.00012020486682095252,
      "loss": 2.9198,
      "step": 162333
    },
    {
      "epoch": 2.11,
      "grad_norm": 4.660686016082764,
      "learning_rate": 0.00012020159229473937,
      "loss": 2.9412,
      "step": 162334
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.726597785949707,
      "learning_rate": 0.00012019831780195401,
      "loss": 2.7184,
      "step": 162335
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.138594627380371,
      "learning_rate": 0.00012019504334259738,
      "loss": 2.694,
      "step": 162336
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.588831901550293,
      "learning_rate": 0.00012019176891666972,
      "loss": 3.0999,
      "step": 162337
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5301733016967773,
      "learning_rate": 0.00012018849452417188,
      "loss": 2.8265,
      "step": 162338
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6542110443115234,
      "learning_rate": 0.00012018522016510429,
      "loss": 3.0164,
      "step": 162339
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7012853622436523,
      "learning_rate": 0.00012018194583946776,
      "loss": 2.8144,
      "step": 162340
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.928398609161377,
      "learning_rate": 0.00012017867154726269,
      "loss": 2.9827,
      "step": 162341
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8111376762390137,
      "learning_rate": 0.00012017539728849005,
      "loss": 2.9158,
      "step": 162342
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8224036693573,
      "learning_rate": 0.00012017212306314996,
      "loss": 3.0396,
      "step": 162343
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.1702158451080322,
      "learning_rate": 0.00012016884887124336,
      "loss": 2.8576,
      "step": 162344
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4145395755767822,
      "learning_rate": 0.00012016557471277069,
      "loss": 3.0432,
      "step": 162345
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2308261394500732,
      "learning_rate": 0.00012016230058773273,
      "loss": 3.0883,
      "step": 162346
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.481835126876831,
      "learning_rate": 0.00012015902649612986,
      "loss": 3.0366,
      "step": 162347
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5056843757629395,
      "learning_rate": 0.00012015575243796295,
      "loss": 2.8811,
      "step": 162348
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8052268028259277,
      "learning_rate": 0.00012015247841323249,
      "loss": 3.1949,
      "step": 162349
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.396507740020752,
      "learning_rate": 0.00012014920442193904,
      "loss": 2.9724,
      "step": 162350
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3078126907348633,
      "learning_rate": 0.00012014593046408319,
      "loss": 2.8932,
      "step": 162351
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.1148407459259033,
      "learning_rate": 0.0001201426565396657,
      "loss": 2.9188,
      "step": 162352
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3834292888641357,
      "learning_rate": 0.00012013938264868698,
      "loss": 3.0397,
      "step": 162353
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3770878314971924,
      "learning_rate": 0.0001201361087911478,
      "loss": 2.6125,
      "step": 162354
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.784029722213745,
      "learning_rate": 0.0001201328349670488,
      "loss": 2.8676,
      "step": 162355
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7442915439605713,
      "learning_rate": 0.00012012956117639043,
      "loss": 2.8705,
      "step": 162356
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5507116317749023,
      "learning_rate": 0.00012012628741917328,
      "loss": 2.7252,
      "step": 162357
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.920567035675049,
      "learning_rate": 0.00012012301369539818,
      "loss": 3.0933,
      "step": 162358
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.492739677429199,
      "learning_rate": 0.00012011974000506546,
      "loss": 2.8863,
      "step": 162359
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.523677349090576,
      "learning_rate": 0.00012011646634817603,
      "loss": 2.8438,
      "step": 162360
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1092529296875,
      "learning_rate": 0.0001201131927247303,
      "loss": 2.9931,
      "step": 162361
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2720015048980713,
      "learning_rate": 0.00012010991913472898,
      "loss": 3.3191,
      "step": 162362
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.582442045211792,
      "learning_rate": 0.00012010664557817248,
      "loss": 3.0431,
      "step": 162363
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1172635555267334,
      "learning_rate": 0.00012010337205506163,
      "loss": 2.9879,
      "step": 162364
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.2147347927093506,
      "learning_rate": 0.0001201000985653969,
      "loss": 2.8934,
      "step": 162365
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.0834524631500244,
      "learning_rate": 0.00012009682510917904,
      "loss": 3.0605,
      "step": 162366
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.346360206604004,
      "learning_rate": 0.00012009355168640856,
      "loss": 3.1023,
      "step": 162367
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2954928874969482,
      "learning_rate": 0.00012009027829708599,
      "loss": 2.8145,
      "step": 162368
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.972283363342285,
      "learning_rate": 0.00012008700494121217,
      "loss": 2.6831,
      "step": 162369
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.525108814239502,
      "learning_rate": 0.00012008373161878756,
      "loss": 2.7563,
      "step": 162370
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.353562831878662,
      "learning_rate": 0.00012008045832981266,
      "loss": 2.975,
      "step": 162371
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.1917049884796143,
      "learning_rate": 0.0001200771850742883,
      "loss": 2.9491,
      "step": 162372
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.0847561359405518,
      "learning_rate": 0.00012007391185221498,
      "loss": 2.9922,
      "step": 162373
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5838818550109863,
      "learning_rate": 0.00012007063866359324,
      "loss": 3.0309,
      "step": 162374
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.20097279548645,
      "learning_rate": 0.00012006736550842386,
      "loss": 3.0735,
      "step": 162375
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7978315353393555,
      "learning_rate": 0.00012006409238670723,
      "loss": 2.728,
      "step": 162376
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9001543521881104,
      "learning_rate": 0.0001200608192984442,
      "loss": 2.9343,
      "step": 162377
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.541203260421753,
      "learning_rate": 0.00012005754624363526,
      "loss": 3.1407,
      "step": 162378
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3264198303222656,
      "learning_rate": 0.00012005427322228105,
      "loss": 2.8952,
      "step": 162379
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3784234523773193,
      "learning_rate": 0.000120051000234382,
      "loss": 3.2567,
      "step": 162380
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.191873788833618,
      "learning_rate": 0.00012004772727993897,
      "loss": 3.0045,
      "step": 162381
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4611504077911377,
      "learning_rate": 0.00012004445435895238,
      "loss": 2.9047,
      "step": 162382
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.0665509700775146,
      "learning_rate": 0.00012004118147142302,
      "loss": 3.1914,
      "step": 162383
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3906989097595215,
      "learning_rate": 0.00012003790861735139,
      "loss": 2.932,
      "step": 162384
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7980124950408936,
      "learning_rate": 0.00012003463579673814,
      "loss": 3.2496,
      "step": 162385
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.9057703018188477,
      "learning_rate": 0.00012003136300958373,
      "loss": 2.9739,
      "step": 162386
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4448490142822266,
      "learning_rate": 0.00012002809025588898,
      "loss": 2.9403,
      "step": 162387
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.221738815307617,
      "learning_rate": 0.00012002481753565429,
      "loss": 3.3027,
      "step": 162388
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.242222309112549,
      "learning_rate": 0.00012002154484888051,
      "loss": 2.8837,
      "step": 162389
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.243358850479126,
      "learning_rate": 0.00012001827219556803,
      "loss": 2.639,
      "step": 162390
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.296969413757324,
      "learning_rate": 0.00012001499957571775,
      "loss": 2.9133,
      "step": 162391
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5487825870513916,
      "learning_rate": 0.00012001172698932987,
      "loss": 2.9491,
      "step": 162392
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.801656484603882,
      "learning_rate": 0.00012000845443640529,
      "loss": 3.0871,
      "step": 162393
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.802574396133423,
      "learning_rate": 0.00012000518191694444,
      "loss": 3.0567,
      "step": 162394
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.517479419708252,
      "learning_rate": 0.00012000190943094816,
      "loss": 3.0296,
      "step": 162395
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8911097049713135,
      "learning_rate": 0.00011999863697841676,
      "loss": 2.8963,
      "step": 162396
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.4991471767425537,
      "learning_rate": 0.00011999536455935127,
      "loss": 2.8983,
      "step": 162397
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.164947509765625,
      "learning_rate": 0.00011999209217375181,
      "loss": 3.0381,
      "step": 162398
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.786349058151245,
      "learning_rate": 0.00011998881982161934,
      "loss": 3.3995,
      "step": 162399
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.845186710357666,
      "learning_rate": 0.00011998554750295424,
      "loss": 2.8571,
      "step": 162400
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7043709754943848,
      "learning_rate": 0.00011998227521775729,
      "loss": 2.9083,
      "step": 162401
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3845982551574707,
      "learning_rate": 0.00011997900296602898,
      "loss": 2.9221,
      "step": 162402
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.31941294670105,
      "learning_rate": 0.00011997573074777016,
      "loss": 3.0566,
      "step": 162403
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.7796990871429443,
      "learning_rate": 0.00011997245856298103,
      "loss": 2.8301,
      "step": 162404
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.704885959625244,
      "learning_rate": 0.00011996918641166253,
      "loss": 3.0959,
      "step": 162405
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2729711532592773,
      "learning_rate": 0.00011996591429381506,
      "loss": 3.066,
      "step": 162406
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.490333318710327,
      "learning_rate": 0.00011996264220943941,
      "loss": 3.03,
      "step": 162407
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.0924394130706787,
      "learning_rate": 0.00011995937015853602,
      "loss": 3.0492,
      "step": 162408
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.0387496948242188,
      "learning_rate": 0.00011995609814110578,
      "loss": 2.786,
      "step": 162409
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6349942684173584,
      "learning_rate": 0.00011995282615714891,
      "loss": 2.8968,
      "step": 162410
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.746774196624756,
      "learning_rate": 0.00011994955420666629,
      "loss": 3.0437,
      "step": 162411
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.383227586746216,
      "learning_rate": 0.00011994628228965833,
      "loss": 2.9214,
      "step": 162412
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.395932197570801,
      "learning_rate": 0.00011994301040612589,
      "loss": 2.7511,
      "step": 162413
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.351285696029663,
      "learning_rate": 0.00011993973855606933,
      "loss": 3.2357,
      "step": 162414
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.34330415725708,
      "learning_rate": 0.00011993646673948946,
      "loss": 2.8136,
      "step": 162415
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3305704593658447,
      "learning_rate": 0.00011993319495638683,
      "loss": 2.9505,
      "step": 162416
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2277655601501465,
      "learning_rate": 0.00011992992320676197,
      "loss": 2.8897,
      "step": 162417
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.2128982543945312,
      "learning_rate": 0.00011992665149061546,
      "loss": 3.088,
      "step": 162418
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8578314781188965,
      "learning_rate": 0.00011992337980794812,
      "loss": 2.9266,
      "step": 162419
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.8654959201812744,
      "learning_rate": 0.00011992010815876029,
      "loss": 2.8804,
      "step": 162420
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.866414785385132,
      "learning_rate": 0.00011991683654305281,
      "loss": 3.2341,
      "step": 162421
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.3926808834075928,
      "learning_rate": 0.00011991356496082618,
      "loss": 2.8579,
      "step": 162422
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.893320322036743,
      "learning_rate": 0.00011991029341208103,
      "loss": 2.8741,
      "step": 162423
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5064728260040283,
      "learning_rate": 0.00011990702189681784,
      "loss": 2.7332,
      "step": 162424
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.0858333110809326,
      "learning_rate": 0.00011990375041503748,
      "loss": 3.1488,
      "step": 162425
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.369964122772217,
      "learning_rate": 0.00011990047896674028,
      "loss": 2.932,
      "step": 162426
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5827622413635254,
      "learning_rate": 0.00011989720755192707,
      "loss": 3.1606,
      "step": 162427
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.7946135997772217,
      "learning_rate": 0.0001198939361705984,
      "loss": 2.9571,
      "step": 162428
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.5204432010650635,
      "learning_rate": 0.00011989066482275482,
      "loss": 2.8678,
      "step": 162429
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.6990952491760254,
      "learning_rate": 0.00011988739350839686,
      "loss": 2.6815,
      "step": 162430
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.55139422416687,
      "learning_rate": 0.00011988412222752538,
      "loss": 2.8373,
      "step": 162431
    },
    {
      "epoch": 2.11,
      "grad_norm": 2.305511951446533,
      "learning_rate": 0.0001198808509801407,
      "loss": 3.103,
      "step": 162432
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.02028226852417,
      "learning_rate": 0.00011987757976624367,
      "loss": 2.8543,
      "step": 162433
    },
    {
      "epoch": 2.11,
      "grad_norm": 3.024294376373291,
      "learning_rate": 0.00011987430858583482,
      "loss": 2.9806,
      "step": 162434
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.36671781539917,
      "learning_rate": 0.00011987103743891473,
      "loss": 2.7587,
      "step": 162435
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.589379072189331,
      "learning_rate": 0.00011986776632548389,
      "loss": 3.069,
      "step": 162436
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.22737979888916,
      "learning_rate": 0.00011986449524554315,
      "loss": 3.2136,
      "step": 162437
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4169812202453613,
      "learning_rate": 0.00011986122419909291,
      "loss": 2.9905,
      "step": 162438
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.469973087310791,
      "learning_rate": 0.00011985795318613395,
      "loss": 2.9972,
      "step": 162439
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6663742065429688,
      "learning_rate": 0.0001198546822066668,
      "loss": 2.9105,
      "step": 162440
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.103776216506958,
      "learning_rate": 0.00011985141126069209,
      "loss": 2.8882,
      "step": 162441
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5929009914398193,
      "learning_rate": 0.00011984814034821029,
      "loss": 2.7552,
      "step": 162442
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.916959285736084,
      "learning_rate": 0.00011984486946922219,
      "loss": 3.2642,
      "step": 162443
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.13690185546875,
      "learning_rate": 0.00011984159862372827,
      "loss": 2.8839,
      "step": 162444
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.1177403926849365,
      "learning_rate": 0.0001198383278117293,
      "loss": 3.0856,
      "step": 162445
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9586827754974365,
      "learning_rate": 0.00011983505703322575,
      "loss": 3.0447,
      "step": 162446
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6796658039093018,
      "learning_rate": 0.00011983178628821829,
      "loss": 2.7134,
      "step": 162447
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.733342409133911,
      "learning_rate": 0.00011982851557670739,
      "loss": 3.0058,
      "step": 162448
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.926588296890259,
      "learning_rate": 0.00011982524489869388,
      "loss": 2.8403,
      "step": 162449
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7773802280426025,
      "learning_rate": 0.00011982197425417815,
      "loss": 2.8802,
      "step": 162450
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3716487884521484,
      "learning_rate": 0.000119818703643161,
      "loss": 2.8591,
      "step": 162451
    },
    {
      "epoch": 2.12,
      "grad_norm": 5.49685001373291,
      "learning_rate": 0.000119815433065643,
      "loss": 3.0678,
      "step": 162452
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.3149619102478027,
      "learning_rate": 0.00011981216252162456,
      "loss": 2.891,
      "step": 162453
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6542530059814453,
      "learning_rate": 0.00011980889201110658,
      "loss": 3.1907,
      "step": 162454
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4103426933288574,
      "learning_rate": 0.00011980562153408952,
      "loss": 2.9014,
      "step": 162455
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3040075302124023,
      "learning_rate": 0.00011980235109057388,
      "loss": 2.9522,
      "step": 162456
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.913928985595703,
      "learning_rate": 0.00011979908068056051,
      "loss": 2.8234,
      "step": 162457
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4448580741882324,
      "learning_rate": 0.00011979581030404988,
      "loss": 3.0363,
      "step": 162458
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2170491218566895,
      "learning_rate": 0.00011979253996104248,
      "loss": 3.0295,
      "step": 162459
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5012545585632324,
      "learning_rate": 0.00011978926965153918,
      "loss": 2.9997,
      "step": 162460
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.530360460281372,
      "learning_rate": 0.00011978599937554047,
      "loss": 3.0113,
      "step": 162461
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.0198092460632324,
      "learning_rate": 0.00011978272913304681,
      "loss": 3.1238,
      "step": 162462
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.802084445953369,
      "learning_rate": 0.00011977945892405907,
      "loss": 2.8819,
      "step": 162463
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.44256591796875,
      "learning_rate": 0.00011977618874857773,
      "loss": 2.8926,
      "step": 162464
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.590494394302368,
      "learning_rate": 0.00011977291860660329,
      "loss": 3.1289,
      "step": 162465
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5348401069641113,
      "learning_rate": 0.00011976964849813654,
      "loss": 2.8206,
      "step": 162466
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3833436965942383,
      "learning_rate": 0.00011976637842317796,
      "loss": 2.8097,
      "step": 162467
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3649990558624268,
      "learning_rate": 0.00011976310838172827,
      "loss": 3.1736,
      "step": 162468
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.773606777191162,
      "learning_rate": 0.00011975983837378804,
      "loss": 3.0537,
      "step": 162469
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2371840476989746,
      "learning_rate": 0.00011975656839935788,
      "loss": 3.0672,
      "step": 162470
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.7206597328186035,
      "learning_rate": 0.00011975329845843824,
      "loss": 3.2092,
      "step": 162471
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8342349529266357,
      "learning_rate": 0.00011975002855103,
      "loss": 2.6956,
      "step": 162472
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7855961322784424,
      "learning_rate": 0.00011974675867713352,
      "loss": 3.0643,
      "step": 162473
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.896691083908081,
      "learning_rate": 0.00011974348883674958,
      "loss": 3.0419,
      "step": 162474
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3064627647399902,
      "learning_rate": 0.0001197402190298787,
      "loss": 2.8946,
      "step": 162475
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9976930618286133,
      "learning_rate": 0.00011973694925652165,
      "loss": 2.7716,
      "step": 162476
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.789454936981201,
      "learning_rate": 0.00011973367951667874,
      "loss": 2.8221,
      "step": 162477
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3687050342559814,
      "learning_rate": 0.00011973040981035085,
      "loss": 2.7531,
      "step": 162478
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.842376232147217,
      "learning_rate": 0.00011972714013753835,
      "loss": 3.0754,
      "step": 162479
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.1368820667266846,
      "learning_rate": 0.0001197238704982421,
      "loss": 3.0787,
      "step": 162480
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7229220867156982,
      "learning_rate": 0.00011972060089246244,
      "loss": 2.947,
      "step": 162481
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.387599229812622,
      "learning_rate": 0.00011971733132020029,
      "loss": 2.8915,
      "step": 162482
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4154105186462402,
      "learning_rate": 0.00011971406178145608,
      "loss": 2.8929,
      "step": 162483
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.284834623336792,
      "learning_rate": 0.00011971079227623038,
      "loss": 2.9461,
      "step": 162484
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5128166675567627,
      "learning_rate": 0.00011970752280452378,
      "loss": 2.8857,
      "step": 162485
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4283530712127686,
      "learning_rate": 0.00011970425336633707,
      "loss": 3.0342,
      "step": 162486
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4693336486816406,
      "learning_rate": 0.0001197009839616706,
      "loss": 3.0112,
      "step": 162487
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9981417655944824,
      "learning_rate": 0.00011969771459052526,
      "loss": 2.8963,
      "step": 162488
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8220479488372803,
      "learning_rate": 0.00011969444525290151,
      "loss": 3.0142,
      "step": 162489
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.494403839111328,
      "learning_rate": 0.00011969117594879994,
      "loss": 2.9149,
      "step": 162490
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5556516647338867,
      "learning_rate": 0.00011968790667822112,
      "loss": 2.9477,
      "step": 162491
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.370563268661499,
      "learning_rate": 0.00011968463744116581,
      "loss": 2.8006,
      "step": 162492
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.304086685180664,
      "learning_rate": 0.00011968136823763439,
      "loss": 3.0338,
      "step": 162493
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3029673099517822,
      "learning_rate": 0.00011967809906762775,
      "loss": 2.8378,
      "step": 162494
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.766444206237793,
      "learning_rate": 0.00011967482993114637,
      "loss": 3.3687,
      "step": 162495
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2632486820220947,
      "learning_rate": 0.0001196715608281908,
      "loss": 3.0344,
      "step": 162496
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.6410250663757324,
      "learning_rate": 0.00011966829175876161,
      "loss": 2.8616,
      "step": 162497
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.517117977142334,
      "learning_rate": 0.00011966502272285959,
      "loss": 2.9537,
      "step": 162498
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4554388523101807,
      "learning_rate": 0.00011966175372048513,
      "loss": 3.0325,
      "step": 162499
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.385423421859741,
      "learning_rate": 0.00011965848475163908,
      "loss": 2.8567,
      "step": 162500
    },
    {
      "epoch": 2.12,
      "grad_norm": 1.9983481168746948,
      "learning_rate": 0.00011965521581632192,
      "loss": 3.0991,
      "step": 162501
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.5109691619873047,
      "learning_rate": 0.00011965194691453426,
      "loss": 3.1266,
      "step": 162502
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.459610939025879,
      "learning_rate": 0.00011964867804627659,
      "loss": 2.97,
      "step": 162503
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.888176202774048,
      "learning_rate": 0.00011964540921154975,
      "loss": 2.9533,
      "step": 162504
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.1358799934387207,
      "learning_rate": 0.00011964214041035412,
      "loss": 2.9128,
      "step": 162505
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7789735794067383,
      "learning_rate": 0.00011963887164269052,
      "loss": 2.7906,
      "step": 162506
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2377214431762695,
      "learning_rate": 0.00011963560290855948,
      "loss": 2.928,
      "step": 162507
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8259530067443848,
      "learning_rate": 0.0001196323342079616,
      "loss": 2.9604,
      "step": 162508
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.424530029296875,
      "learning_rate": 0.00011962906554089731,
      "loss": 3.0309,
      "step": 162509
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.238443374633789,
      "learning_rate": 0.00011962579690736754,
      "loss": 2.9299,
      "step": 162510
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.083632230758667,
      "learning_rate": 0.0001196225283073726,
      "loss": 3.0823,
      "step": 162511
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.244610071182251,
      "learning_rate": 0.00011961925974091334,
      "loss": 2.7242,
      "step": 162512
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3630096912384033,
      "learning_rate": 0.00011961599120799029,
      "loss": 2.961,
      "step": 162513
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5315663814544678,
      "learning_rate": 0.00011961272270860401,
      "loss": 3.0418,
      "step": 162514
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5725324153900146,
      "learning_rate": 0.00011960945424275504,
      "loss": 3.0177,
      "step": 162515
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.107137441635132,
      "learning_rate": 0.00011960618581044417,
      "loss": 3.0276,
      "step": 162516
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.1713407039642334,
      "learning_rate": 0.00011960291741167186,
      "loss": 3.1839,
      "step": 162517
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.67383074760437,
      "learning_rate": 0.00011959964904643883,
      "loss": 2.7726,
      "step": 162518
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.9094278812408447,
      "learning_rate": 0.00011959638071474562,
      "loss": 2.8926,
      "step": 162519
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.816248893737793,
      "learning_rate": 0.00011959311241659286,
      "loss": 3.0925,
      "step": 162520
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.156644105911255,
      "learning_rate": 0.00011958984415198105,
      "loss": 3.0148,
      "step": 162521
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.650611639022827,
      "learning_rate": 0.00011958657592091101,
      "loss": 3.0361,
      "step": 162522
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.2648115158081055,
      "learning_rate": 0.00011958330772338311,
      "loss": 2.8153,
      "step": 162523
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.031369924545288,
      "learning_rate": 0.00011958003955939816,
      "loss": 2.9441,
      "step": 162524
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4237356185913086,
      "learning_rate": 0.00011957677142895673,
      "loss": 2.7281,
      "step": 162525
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5657119750976562,
      "learning_rate": 0.00011957350333205936,
      "loss": 3.1235,
      "step": 162526
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.871612310409546,
      "learning_rate": 0.0001195702352687066,
      "loss": 2.7615,
      "step": 162527
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9518330097198486,
      "learning_rate": 0.00011956696723889923,
      "loss": 2.8548,
      "step": 162528
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9694583415985107,
      "learning_rate": 0.00011956369924263766,
      "loss": 2.8557,
      "step": 162529
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.24112868309021,
      "learning_rate": 0.00011956043127992269,
      "loss": 2.9669,
      "step": 162530
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4489905834198,
      "learning_rate": 0.00011955716335075485,
      "loss": 2.9456,
      "step": 162531
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.437791109085083,
      "learning_rate": 0.00011955389545513478,
      "loss": 2.7456,
      "step": 162532
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.0312743186950684,
      "learning_rate": 0.00011955062759306289,
      "loss": 2.9172,
      "step": 162533
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7609105110168457,
      "learning_rate": 0.00011954735976454008,
      "loss": 3.1105,
      "step": 162534
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.617419719696045,
      "learning_rate": 0.00011954409196956671,
      "loss": 2.8941,
      "step": 162535
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4212803840637207,
      "learning_rate": 0.00011954082420814359,
      "loss": 3.0065,
      "step": 162536
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2426340579986572,
      "learning_rate": 0.00011953755648027125,
      "loss": 2.8918,
      "step": 162537
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8575382232666016,
      "learning_rate": 0.00011953428878595016,
      "loss": 2.9057,
      "step": 162538
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.744384288787842,
      "learning_rate": 0.0001195310211251812,
      "loss": 2.7748,
      "step": 162539
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4954168796539307,
      "learning_rate": 0.00011952775349796478,
      "loss": 2.9314,
      "step": 162540
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.841264247894287,
      "learning_rate": 0.0001195244859043015,
      "loss": 2.6995,
      "step": 162541
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.329604148864746,
      "learning_rate": 0.00011952121834419212,
      "loss": 3.0275,
      "step": 162542
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.65453839302063,
      "learning_rate": 0.00011951795081763712,
      "loss": 2.9355,
      "step": 162543
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.376612663269043,
      "learning_rate": 0.00011951468332463705,
      "loss": 3.0462,
      "step": 162544
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.452540874481201,
      "learning_rate": 0.00011951141586519273,
      "loss": 3.179,
      "step": 162545
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.319733142852783,
      "learning_rate": 0.00011950814843930461,
      "loss": 2.8566,
      "step": 162546
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.013721466064453,
      "learning_rate": 0.00011950488104697325,
      "loss": 3.0444,
      "step": 162547
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5360116958618164,
      "learning_rate": 0.00011950161368819948,
      "loss": 2.9278,
      "step": 162548
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.292555332183838,
      "learning_rate": 0.0001194983463629836,
      "loss": 2.8111,
      "step": 162549
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6463840007781982,
      "learning_rate": 0.00011949507907132653,
      "loss": 2.8696,
      "step": 162550
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5219175815582275,
      "learning_rate": 0.00011949181181322873,
      "loss": 2.8732,
      "step": 162551
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.1729788780212402,
      "learning_rate": 0.00011948854458869068,
      "loss": 2.9844,
      "step": 162552
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4743566513061523,
      "learning_rate": 0.00011948527739771322,
      "loss": 2.9249,
      "step": 162553
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5910141468048096,
      "learning_rate": 0.00011948201024029688,
      "loss": 3.127,
      "step": 162554
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.3543014526367188,
      "learning_rate": 0.00011947874311644212,
      "loss": 2.5222,
      "step": 162555
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.248626708984375,
      "learning_rate": 0.00011947547602614979,
      "loss": 2.9404,
      "step": 162556
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.088442087173462,
      "learning_rate": 0.00011947220896942036,
      "loss": 3.0892,
      "step": 162557
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.0913007259368896,
      "learning_rate": 0.00011946894194625437,
      "loss": 3.0233,
      "step": 162558
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8442509174346924,
      "learning_rate": 0.00011946567495665259,
      "loss": 2.8887,
      "step": 162559
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.101950645446777,
      "learning_rate": 0.00011946240800061546,
      "loss": 2.948,
      "step": 162560
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.508226156234741,
      "learning_rate": 0.00011945914107814378,
      "loss": 2.9234,
      "step": 162561
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.479710817337036,
      "learning_rate": 0.00011945587418923805,
      "loss": 2.9729,
      "step": 162562
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.0233325958251953,
      "learning_rate": 0.00011945260733389888,
      "loss": 2.7779,
      "step": 162563
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2601568698883057,
      "learning_rate": 0.00011944934051212678,
      "loss": 3.3329,
      "step": 162564
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.3651628494262695,
      "learning_rate": 0.00011944607372392256,
      "loss": 2.9196,
      "step": 162565
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2132086753845215,
      "learning_rate": 0.0001194428069692866,
      "loss": 3.1468,
      "step": 162566
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.178175210952759,
      "learning_rate": 0.00011943954024821974,
      "loss": 3.0375,
      "step": 162567
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.32163143157959,
      "learning_rate": 0.00011943627356072251,
      "loss": 3.0992,
      "step": 162568
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.482544183731079,
      "learning_rate": 0.00011943300690679544,
      "loss": 2.899,
      "step": 162569
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.0318472385406494,
      "learning_rate": 0.00011942974028643913,
      "loss": 2.7949,
      "step": 162570
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8346199989318848,
      "learning_rate": 0.0001194264736996543,
      "loss": 2.8572,
      "step": 162571
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2480268478393555,
      "learning_rate": 0.00011942320714644141,
      "loss": 3.0692,
      "step": 162572
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.374819278717041,
      "learning_rate": 0.00011941994062680126,
      "loss": 2.8846,
      "step": 162573
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.225316047668457,
      "learning_rate": 0.00011941667414073433,
      "loss": 2.9798,
      "step": 162574
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3302738666534424,
      "learning_rate": 0.00011941340768824128,
      "loss": 3.0571,
      "step": 162575
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.82002592086792,
      "learning_rate": 0.00011941014126932257,
      "loss": 2.8296,
      "step": 162576
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.243786334991455,
      "learning_rate": 0.00011940687488397903,
      "loss": 3.0559,
      "step": 162577
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4045772552490234,
      "learning_rate": 0.00011940360853221107,
      "loss": 3.0169,
      "step": 162578
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.502163887023926,
      "learning_rate": 0.00011940034221401945,
      "loss": 3.1949,
      "step": 162579
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.014857530593872,
      "learning_rate": 0.00011939707592940477,
      "loss": 2.9118,
      "step": 162580
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4881529808044434,
      "learning_rate": 0.00011939380967836754,
      "loss": 2.893,
      "step": 162581
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5053863525390625,
      "learning_rate": 0.00011939054346090831,
      "loss": 3.2981,
      "step": 162582
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2451658248901367,
      "learning_rate": 0.00011938727727702792,
      "loss": 3.081,
      "step": 162583
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.1684508323669434,
      "learning_rate": 0.00011938401112672673,
      "loss": 3.1862,
      "step": 162584
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.363568067550659,
      "learning_rate": 0.00011938074501000555,
      "loss": 2.8559,
      "step": 162585
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3055994510650635,
      "learning_rate": 0.00011937747892686491,
      "loss": 2.7275,
      "step": 162586
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.1474783420562744,
      "learning_rate": 0.00011937421287730541,
      "loss": 2.8973,
      "step": 162587
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.582560062408447,
      "learning_rate": 0.00011937094686132751,
      "loss": 2.6679,
      "step": 162588
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2563724517822266,
      "learning_rate": 0.00011936768087893212,
      "loss": 3.009,
      "step": 162589
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.0469675064086914,
      "learning_rate": 0.00011936441493011955,
      "loss": 2.9595,
      "step": 162590
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.774536371231079,
      "learning_rate": 0.00011936114901489066,
      "loss": 2.9707,
      "step": 162591
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6880338191986084,
      "learning_rate": 0.00011935788313324595,
      "loss": 3.2159,
      "step": 162592
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4463164806365967,
      "learning_rate": 0.00011935461728518599,
      "loss": 3.0153,
      "step": 162593
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.8890600204467773,
      "learning_rate": 0.00011935135147071134,
      "loss": 3.109,
      "step": 162594
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3492045402526855,
      "learning_rate": 0.00011934808568982279,
      "loss": 3.0122,
      "step": 162595
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.236337661743164,
      "learning_rate": 0.00011934481994252074,
      "loss": 2.8762,
      "step": 162596
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.490511655807495,
      "learning_rate": 0.00011934155422880599,
      "loss": 3.0495,
      "step": 162597
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.4885613918304443,
      "learning_rate": 0.00011933828854867904,
      "loss": 3.1587,
      "step": 162598
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.267831325531006,
      "learning_rate": 0.00011933502290214055,
      "loss": 2.9029,
      "step": 162599
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.446138620376587,
      "learning_rate": 0.00011933175728919096,
      "loss": 3.0079,
      "step": 162600
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.8256282806396484,
      "learning_rate": 0.0001193284917098311,
      "loss": 2.7972,
      "step": 162601
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.719036817550659,
      "learning_rate": 0.00011932522616406141,
      "loss": 3.1409,
      "step": 162602
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.1376893520355225,
      "learning_rate": 0.00011932196065188266,
      "loss": 2.8273,
      "step": 162603
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.586878538131714,
      "learning_rate": 0.00011931869517329535,
      "loss": 3.2253,
      "step": 162604
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.4777753353118896,
      "learning_rate": 0.00011931542972830015,
      "loss": 2.9879,
      "step": 162605
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.220311403274536,
      "learning_rate": 0.00011931216431689748,
      "loss": 2.9201,
      "step": 162606
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8383848667144775,
      "learning_rate": 0.0001193088989390882,
      "loss": 3.1753,
      "step": 162607
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.4168779850006104,
      "learning_rate": 0.0001193056335948727,
      "loss": 2.8396,
      "step": 162608
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5541720390319824,
      "learning_rate": 0.00011930236828425181,
      "loss": 3.0658,
      "step": 162609
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6081576347351074,
      "learning_rate": 0.00011929910300722592,
      "loss": 2.6734,
      "step": 162610
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6849658489227295,
      "learning_rate": 0.00011929583776379595,
      "loss": 3.2068,
      "step": 162611
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.687891960144043,
      "learning_rate": 0.00011929257255396207,
      "loss": 2.9641,
      "step": 162612
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.613690137863159,
      "learning_rate": 0.0001192893073777252,
      "loss": 2.8744,
      "step": 162613
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.038060188293457,
      "learning_rate": 0.00011928604223508577,
      "loss": 3.1242,
      "step": 162614
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5125913619995117,
      "learning_rate": 0.0001192827771260446,
      "loss": 2.6811,
      "step": 162615
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.746546745300293,
      "learning_rate": 0.00011927951205060205,
      "loss": 2.7432,
      "step": 162616
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.988565683364868,
      "learning_rate": 0.00011927624700875906,
      "loss": 3.1062,
      "step": 162617
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.1746630668640137,
      "learning_rate": 0.00011927298200051582,
      "loss": 2.8232,
      "step": 162618
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.366455316543579,
      "learning_rate": 0.00011926971702587325,
      "loss": 2.8184,
      "step": 162619
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.1636881828308105,
      "learning_rate": 0.00011926645208483175,
      "loss": 2.8759,
      "step": 162620
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5640597343444824,
      "learning_rate": 0.00011926318717739212,
      "loss": 2.9515,
      "step": 162621
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.614323616027832,
      "learning_rate": 0.0001192599223035548,
      "loss": 2.9939,
      "step": 162622
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.549009323120117,
      "learning_rate": 0.00011925665746332064,
      "loss": 3.0954,
      "step": 162623
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.483426570892334,
      "learning_rate": 0.0001192533926566899,
      "loss": 3.0478,
      "step": 162624
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.0182712078094482,
      "learning_rate": 0.00011925012788366347,
      "loss": 3.3012,
      "step": 162625
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.211054563522339,
      "learning_rate": 0.00011924686314424174,
      "loss": 3.0811,
      "step": 162626
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5945427417755127,
      "learning_rate": 0.00011924359843842552,
      "loss": 3.2126,
      "step": 162627
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3438732624053955,
      "learning_rate": 0.00011924033376621524,
      "loss": 3.0608,
      "step": 162628
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.34000563621521,
      "learning_rate": 0.0001192370691276117,
      "loss": 3.0001,
      "step": 162629
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3229267597198486,
      "learning_rate": 0.0001192338045226154,
      "loss": 2.8528,
      "step": 162630
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.2553751468658447,
      "learning_rate": 0.00011923053995122694,
      "loss": 2.7841,
      "step": 162631
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.485373020172119,
      "learning_rate": 0.00011922727541344684,
      "loss": 2.9196,
      "step": 162632
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5857393741607666,
      "learning_rate": 0.00011922401090927589,
      "loss": 2.8368,
      "step": 162633
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5816287994384766,
      "learning_rate": 0.00011922074643871452,
      "loss": 2.9553,
      "step": 162634
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.733269453048706,
      "learning_rate": 0.00011921748200176353,
      "loss": 2.7775,
      "step": 162635
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.2002129554748535,
      "learning_rate": 0.00011921421759842342,
      "loss": 2.9884,
      "step": 162636
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9244742393493652,
      "learning_rate": 0.0001192109532286947,
      "loss": 2.9839,
      "step": 162637
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.1384124755859375,
      "learning_rate": 0.00011920768889257818,
      "loss": 2.7413,
      "step": 162638
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6778736114501953,
      "learning_rate": 0.00011920442459007434,
      "loss": 3.159,
      "step": 162639
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.229782819747925,
      "learning_rate": 0.00011920116032118372,
      "loss": 3.052,
      "step": 162640
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.212602615356445,
      "learning_rate": 0.00011919789608590714,
      "loss": 3.0002,
      "step": 162641
    },
    {
      "epoch": 2.12,
      "grad_norm": 6.342057704925537,
      "learning_rate": 0.00011919463188424507,
      "loss": 2.8624,
      "step": 162642
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.996647596359253,
      "learning_rate": 0.00011919136771619802,
      "loss": 2.932,
      "step": 162643
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.305813789367676,
      "learning_rate": 0.0001191881035817668,
      "loss": 3.0336,
      "step": 162644
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.080766439437866,
      "learning_rate": 0.00011918483948095196,
      "loss": 2.9093,
      "step": 162645
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.679581880569458,
      "learning_rate": 0.00011918157541375394,
      "loss": 3.0246,
      "step": 162646
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9709160327911377,
      "learning_rate": 0.00011917831138017359,
      "loss": 2.9546,
      "step": 162647
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.094151258468628,
      "learning_rate": 0.0001191750473802114,
      "loss": 2.8098,
      "step": 162648
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.108494997024536,
      "learning_rate": 0.0001191717834138679,
      "loss": 3.0766,
      "step": 162649
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4185218811035156,
      "learning_rate": 0.00011916851948114387,
      "loss": 2.9418,
      "step": 162650
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.951050281524658,
      "learning_rate": 0.00011916525558203972,
      "loss": 3.101,
      "step": 162651
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.842759132385254,
      "learning_rate": 0.00011916199171655627,
      "loss": 2.8728,
      "step": 162652
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.0672178268432617,
      "learning_rate": 0.00011915872788469403,
      "loss": 2.8807,
      "step": 162653
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.43983268737793,
      "learning_rate": 0.00011915546408645356,
      "loss": 3.0015,
      "step": 162654
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.649181842803955,
      "learning_rate": 0.00011915220032183543,
      "loss": 3.1381,
      "step": 162655
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2775166034698486,
      "learning_rate": 0.00011914893659084042,
      "loss": 3.2546,
      "step": 162656
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.7797482013702393,
      "learning_rate": 0.0001191456728934689,
      "loss": 3.2931,
      "step": 162657
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.705496311187744,
      "learning_rate": 0.00011914240922972174,
      "loss": 3.0812,
      "step": 162658
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.2252790927886963,
      "learning_rate": 0.00011913914559959946,
      "loss": 3.1678,
      "step": 162659
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4539029598236084,
      "learning_rate": 0.00011913588200310258,
      "loss": 2.91,
      "step": 162660
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.8273110389709473,
      "learning_rate": 0.00011913261844023167,
      "loss": 2.9495,
      "step": 162661
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3272414207458496,
      "learning_rate": 0.00011912935491098751,
      "loss": 2.7897,
      "step": 162662
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7226476669311523,
      "learning_rate": 0.00011912609141537051,
      "loss": 2.5997,
      "step": 162663
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4701664447784424,
      "learning_rate": 0.00011912282795338149,
      "loss": 3.0109,
      "step": 162664
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5906777381896973,
      "learning_rate": 0.00011911956452502098,
      "loss": 2.8481,
      "step": 162665
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.1183791160583496,
      "learning_rate": 0.00011911630113028954,
      "loss": 3.1725,
      "step": 162666
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6075615882873535,
      "learning_rate": 0.00011911303776918766,
      "loss": 2.9171,
      "step": 162667
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4187073707580566,
      "learning_rate": 0.00011910977444171621,
      "loss": 2.6558,
      "step": 162668
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4961864948272705,
      "learning_rate": 0.00011910651114787557,
      "loss": 2.9056,
      "step": 162669
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6476988792419434,
      "learning_rate": 0.00011910324788766654,
      "loss": 2.601,
      "step": 162670
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3115012645721436,
      "learning_rate": 0.00011909998466108962,
      "loss": 2.9248,
      "step": 162671
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2860524654388428,
      "learning_rate": 0.00011909672146814547,
      "loss": 2.9661,
      "step": 162672
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7061591148376465,
      "learning_rate": 0.0001190934583088345,
      "loss": 2.9178,
      "step": 162673
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9067583084106445,
      "learning_rate": 0.0001190901951831576,
      "loss": 2.9733,
      "step": 162674
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.134475231170654,
      "learning_rate": 0.00011908693209111512,
      "loss": 3.0228,
      "step": 162675
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9929897785186768,
      "learning_rate": 0.00011908366903270792,
      "loss": 2.9454,
      "step": 162676
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2425918579101562,
      "learning_rate": 0.00011908040600793637,
      "loss": 3.0982,
      "step": 162677
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.294774293899536,
      "learning_rate": 0.00011907714301680142,
      "loss": 2.9002,
      "step": 162678
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.921180486679077,
      "learning_rate": 0.00011907388005930321,
      "loss": 2.9274,
      "step": 162679
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7686281204223633,
      "learning_rate": 0.00011907061713544268,
      "loss": 2.9833,
      "step": 162680
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.63960337638855,
      "learning_rate": 0.00011906735424522024,
      "loss": 2.9444,
      "step": 162681
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2650463581085205,
      "learning_rate": 0.00011906409138863671,
      "loss": 2.9487,
      "step": 162682
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.324575185775757,
      "learning_rate": 0.00011906082856569249,
      "loss": 2.9445,
      "step": 162683
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.845081329345703,
      "learning_rate": 0.00011905756577638845,
      "loss": 2.5902,
      "step": 162684
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.527719736099243,
      "learning_rate": 0.00011905430302072483,
      "loss": 2.7718,
      "step": 162685
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2751965522766113,
      "learning_rate": 0.00011905104029870255,
      "loss": 2.9822,
      "step": 162686
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.716888666152954,
      "learning_rate": 0.00011904777761032195,
      "loss": 3.0065,
      "step": 162687
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6621029376983643,
      "learning_rate": 0.00011904451495558393,
      "loss": 2.9558,
      "step": 162688
    },
    {
      "epoch": 2.12,
      "grad_norm": 5.00697660446167,
      "learning_rate": 0.00011904125233448885,
      "loss": 2.9224,
      "step": 162689
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.3340749740600586,
      "learning_rate": 0.00011903798974703762,
      "loss": 2.7814,
      "step": 162690
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2836408615112305,
      "learning_rate": 0.0001190347271932304,
      "loss": 2.8712,
      "step": 162691
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6658213138580322,
      "learning_rate": 0.00011903146467306816,
      "loss": 2.9277,
      "step": 162692
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.310819625854492,
      "learning_rate": 0.00011902820218655129,
      "loss": 2.8303,
      "step": 162693
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2946016788482666,
      "learning_rate": 0.0001190249397336806,
      "loss": 2.9609,
      "step": 162694
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.229099988937378,
      "learning_rate": 0.0001190216773144565,
      "loss": 2.9326,
      "step": 162695
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2638561725616455,
      "learning_rate": 0.00011901841492887987,
      "loss": 2.8859,
      "step": 162696
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5113186836242676,
      "learning_rate": 0.00011901515257695091,
      "loss": 3.0887,
      "step": 162697
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7445151805877686,
      "learning_rate": 0.00011901189025867059,
      "loss": 3.0978,
      "step": 162698
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8821897506713867,
      "learning_rate": 0.00011900862797403926,
      "loss": 2.9758,
      "step": 162699
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3325462341308594,
      "learning_rate": 0.00011900536572305774,
      "loss": 3.1713,
      "step": 162700
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5923357009887695,
      "learning_rate": 0.00011900210350572645,
      "loss": 2.8031,
      "step": 162701
    },
    {
      "epoch": 2.12,
      "grad_norm": 1.9830429553985596,
      "learning_rate": 0.00011899884132204629,
      "loss": 3.1316,
      "step": 162702
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.395129680633545,
      "learning_rate": 0.00011899557917201746,
      "loss": 3.0107,
      "step": 162703
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.10343337059021,
      "learning_rate": 0.00011899231705564083,
      "loss": 2.8845,
      "step": 162704
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.321568489074707,
      "learning_rate": 0.00011898905497291688,
      "loss": 2.9105,
      "step": 162705
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8227622509002686,
      "learning_rate": 0.00011898579292384639,
      "loss": 2.9939,
      "step": 162706
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.549689292907715,
      "learning_rate": 0.00011898253090842973,
      "loss": 2.7379,
      "step": 162707
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6612517833709717,
      "learning_rate": 0.00011897926892666787,
      "loss": 3.0367,
      "step": 162708
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4911391735076904,
      "learning_rate": 0.00011897600697856094,
      "loss": 3.1348,
      "step": 162709
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4310617446899414,
      "learning_rate": 0.0001189727450641099,
      "loss": 2.9112,
      "step": 162710
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.088930368423462,
      "learning_rate": 0.00011896948318331516,
      "loss": 2.9524,
      "step": 162711
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.481297254562378,
      "learning_rate": 0.00011896622133617751,
      "loss": 2.8121,
      "step": 162712
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.1974973678588867,
      "learning_rate": 0.00011896295952269732,
      "loss": 2.9791,
      "step": 162713
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.9358761310577393,
      "learning_rate": 0.00011895969774287548,
      "loss": 2.9795,
      "step": 162714
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.245206832885742,
      "learning_rate": 0.00011895643599671242,
      "loss": 3.1567,
      "step": 162715
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.1393487453460693,
      "learning_rate": 0.0001189531742842088,
      "loss": 2.9409,
      "step": 162716
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5043580532073975,
      "learning_rate": 0.00011894991260536509,
      "loss": 2.95,
      "step": 162717
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.850817918777466,
      "learning_rate": 0.00011894665096018212,
      "loss": 2.7107,
      "step": 162718
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7609076499938965,
      "learning_rate": 0.00011894338934866025,
      "loss": 3.0278,
      "step": 162719
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9016482830047607,
      "learning_rate": 0.00011894012777080035,
      "loss": 2.9383,
      "step": 162720
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4997828006744385,
      "learning_rate": 0.0001189368662266029,
      "loss": 3.0119,
      "step": 162721
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.106403350830078,
      "learning_rate": 0.00011893360471606839,
      "loss": 2.795,
      "step": 162722
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8562402725219727,
      "learning_rate": 0.00011893034323919765,
      "loss": 2.9388,
      "step": 162723
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.59189772605896,
      "learning_rate": 0.00011892708179599117,
      "loss": 3.0254,
      "step": 162724
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.511627197265625,
      "learning_rate": 0.00011892382038644947,
      "loss": 3.089,
      "step": 162725
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.411623954772949,
      "learning_rate": 0.00011892055901057335,
      "loss": 2.8775,
      "step": 162726
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.448127508163452,
      "learning_rate": 0.00011891729766836332,
      "loss": 3.1769,
      "step": 162727
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.8409340381622314,
      "learning_rate": 0.00011891403635981987,
      "loss": 2.9294,
      "step": 162728
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.3050782680511475,
      "learning_rate": 0.00011891077508494382,
      "loss": 2.8827,
      "step": 162729
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.5407629013061523,
      "learning_rate": 0.00011890751384373569,
      "loss": 2.8563,
      "step": 162730
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3819785118103027,
      "learning_rate": 0.00011890425263619594,
      "loss": 3.1981,
      "step": 162731
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8678157329559326,
      "learning_rate": 0.00011890099146232545,
      "loss": 2.8959,
      "step": 162732
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.972726583480835,
      "learning_rate": 0.00011889773032212467,
      "loss": 3.0112,
      "step": 162733
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.813388824462891,
      "learning_rate": 0.00011889446921559409,
      "loss": 2.9674,
      "step": 162734
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5604026317596436,
      "learning_rate": 0.00011889120814273457,
      "loss": 2.8455,
      "step": 162735
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8415422439575195,
      "learning_rate": 0.00011888794710354652,
      "loss": 3.0359,
      "step": 162736
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4046826362609863,
      "learning_rate": 0.00011888468609803067,
      "loss": 3.0252,
      "step": 162737
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.161438465118408,
      "learning_rate": 0.00011888142512618762,
      "loss": 3.121,
      "step": 162738
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.036288738250732,
      "learning_rate": 0.00011887816418801793,
      "loss": 2.8651,
      "step": 162739
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4908013343811035,
      "learning_rate": 0.00011887490328352206,
      "loss": 3.1771,
      "step": 162740
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.8018572330474854,
      "learning_rate": 0.0001188716424127009,
      "loss": 2.8098,
      "step": 162741
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4202237129211426,
      "learning_rate": 0.00011886838157555483,
      "loss": 2.8408,
      "step": 162742
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.349076509475708,
      "learning_rate": 0.00011886512077208462,
      "loss": 3.0516,
      "step": 162743
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.421428680419922,
      "learning_rate": 0.00011886186000229082,
      "loss": 2.8834,
      "step": 162744
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4545671939849854,
      "learning_rate": 0.00011885859926617403,
      "loss": 3.175,
      "step": 162745
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2982027530670166,
      "learning_rate": 0.0001188553385637347,
      "loss": 2.7176,
      "step": 162746
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.163707971572876,
      "learning_rate": 0.00011885207789497371,
      "loss": 3.0317,
      "step": 162747
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.336740255355835,
      "learning_rate": 0.00011884881725989143,
      "loss": 3.1392,
      "step": 162748
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.6401350498199463,
      "learning_rate": 0.00011884555665848869,
      "loss": 3.0876,
      "step": 162749
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4504547119140625,
      "learning_rate": 0.00011884229609076588,
      "loss": 2.9458,
      "step": 162750
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3398194313049316,
      "learning_rate": 0.00011883903555672391,
      "loss": 3.0729,
      "step": 162751
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.418304681777954,
      "learning_rate": 0.00011883577505636298,
      "loss": 2.9562,
      "step": 162752
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6762287616729736,
      "learning_rate": 0.000118832514589684,
      "loss": 2.8495,
      "step": 162753
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.4757895469665527,
      "learning_rate": 0.00011882925415668738,
      "loss": 2.9838,
      "step": 162754
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.699751377105713,
      "learning_rate": 0.00011882599375737388,
      "loss": 2.6082,
      "step": 162755
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6692705154418945,
      "learning_rate": 0.000118822733391744,
      "loss": 2.964,
      "step": 162756
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.212207317352295,
      "learning_rate": 0.00011881947305979858,
      "loss": 2.8904,
      "step": 162757
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.088426113128662,
      "learning_rate": 0.00011881621276153782,
      "loss": 2.7797,
      "step": 162758
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.356032609939575,
      "learning_rate": 0.00011881295249696268,
      "loss": 3.1571,
      "step": 162759
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7316787242889404,
      "learning_rate": 0.00011880969226607349,
      "loss": 3.2336,
      "step": 162760
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6683623790740967,
      "learning_rate": 0.00011880643206887112,
      "loss": 2.8674,
      "step": 162761
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.788750410079956,
      "learning_rate": 0.00011880317190535597,
      "loss": 2.992,
      "step": 162762
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.183229684829712,
      "learning_rate": 0.0001187999117755289,
      "loss": 2.9049,
      "step": 162763
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8490209579467773,
      "learning_rate": 0.00011879665167939016,
      "loss": 2.8231,
      "step": 162764
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.533808708190918,
      "learning_rate": 0.00011879339161694066,
      "loss": 3.1779,
      "step": 162765
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6705386638641357,
      "learning_rate": 0.00011879013158818075,
      "loss": 2.8468,
      "step": 162766
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.5048444271087646,
      "learning_rate": 0.00011878687159311132,
      "loss": 2.9752,
      "step": 162767
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.247676372528076,
      "learning_rate": 0.0001187836116317327,
      "loss": 3.011,
      "step": 162768
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.0162668228149414,
      "learning_rate": 0.00011878035170404585,
      "loss": 2.8605,
      "step": 162769
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.230248212814331,
      "learning_rate": 0.00011877709181005094,
      "loss": 3.0282,
      "step": 162770
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.6770546436309814,
      "learning_rate": 0.00011877383194974887,
      "loss": 3.005,
      "step": 162771
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.931156158447266,
      "learning_rate": 0.0001187705721231401,
      "loss": 2.9853,
      "step": 162772
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.2587668895721436,
      "learning_rate": 0.0001187673123302254,
      "loss": 2.7936,
      "step": 162773
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.1459267139434814,
      "learning_rate": 0.00011876405257100514,
      "loss": 2.6616,
      "step": 162774
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6304500102996826,
      "learning_rate": 0.0001187607928454803,
      "loss": 2.6267,
      "step": 162775
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.070376396179199,
      "learning_rate": 0.00011875753315365104,
      "loss": 2.9222,
      "step": 162776
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.475937843322754,
      "learning_rate": 0.00011875427349551829,
      "loss": 2.9547,
      "step": 162777
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.75235915184021,
      "learning_rate": 0.00011875101387108243,
      "loss": 2.9913,
      "step": 162778
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.362950325012207,
      "learning_rate": 0.00011874775428034426,
      "loss": 2.57,
      "step": 162779
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4494571685791016,
      "learning_rate": 0.00011874449472330422,
      "loss": 2.8506,
      "step": 162780
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7138173580169678,
      "learning_rate": 0.00011874123519996322,
      "loss": 2.9202,
      "step": 162781
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.261571168899536,
      "learning_rate": 0.00011873797571032139,
      "loss": 3.0178,
      "step": 162782
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.330876350402832,
      "learning_rate": 0.00011873471625437973,
      "loss": 3.0505,
      "step": 162783
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.728973150253296,
      "learning_rate": 0.00011873145683213859,
      "loss": 3.1652,
      "step": 162784
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5389575958251953,
      "learning_rate": 0.00011872819744359883,
      "loss": 2.8419,
      "step": 162785
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5203166007995605,
      "learning_rate": 0.0001187249380887608,
      "loss": 2.8105,
      "step": 162786
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.1552000045776367,
      "learning_rate": 0.00011872167876762542,
      "loss": 2.8395,
      "step": 162787
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.1607632637023926,
      "learning_rate": 0.0001187184194801929,
      "loss": 3.1648,
      "step": 162788
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6673049926757812,
      "learning_rate": 0.00011871516022646417,
      "loss": 3.1289,
      "step": 162789
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.675997734069824,
      "learning_rate": 0.00011871190100643956,
      "loss": 3.1962,
      "step": 162790
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8138787746429443,
      "learning_rate": 0.00011870864182011998,
      "loss": 2.7984,
      "step": 162791
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.348398447036743,
      "learning_rate": 0.00011870538266750579,
      "loss": 2.93,
      "step": 162792
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.798924446105957,
      "learning_rate": 0.00011870212354859787,
      "loss": 2.8241,
      "step": 162793
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.539111614227295,
      "learning_rate": 0.00011869886446339646,
      "loss": 2.9928,
      "step": 162794
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7727415561676025,
      "learning_rate": 0.00011869560541190246,
      "loss": 3.0165,
      "step": 162795
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.229170083999634,
      "learning_rate": 0.00011869234639411621,
      "loss": 3.229,
      "step": 162796
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.831233024597168,
      "learning_rate": 0.00011868908741003864,
      "loss": 2.8415,
      "step": 162797
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.3480701446533203,
      "learning_rate": 0.00011868582845967007,
      "loss": 2.8237,
      "step": 162798
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.929183006286621,
      "learning_rate": 0.0001186825695430113,
      "loss": 2.9853,
      "step": 162799
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4423372745513916,
      "learning_rate": 0.00011867931066006291,
      "loss": 2.801,
      "step": 162800
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7476019859313965,
      "learning_rate": 0.00011867605181082545,
      "loss": 3.1096,
      "step": 162801
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.932751417160034,
      "learning_rate": 0.00011867279299529941,
      "loss": 2.933,
      "step": 162802
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8015570640563965,
      "learning_rate": 0.00011866953421348565,
      "loss": 3.052,
      "step": 162803
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4827661514282227,
      "learning_rate": 0.00011866627546538452,
      "loss": 2.8788,
      "step": 162804
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7613320350646973,
      "learning_rate": 0.00011866301675099686,
      "loss": 3.0156,
      "step": 162805
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4101486206054688,
      "learning_rate": 0.00011865975807032318,
      "loss": 2.9718,
      "step": 162806
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.0687246322631836,
      "learning_rate": 0.00011865649942336408,
      "loss": 3.0031,
      "step": 162807
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5196921825408936,
      "learning_rate": 0.00011865324081012004,
      "loss": 3.1586,
      "step": 162808
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.076488733291626,
      "learning_rate": 0.00011864998223059187,
      "loss": 2.8371,
      "step": 162809
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2191340923309326,
      "learning_rate": 0.00011864672368478002,
      "loss": 2.8522,
      "step": 162810
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.795546770095825,
      "learning_rate": 0.00011864346517268527,
      "loss": 2.8697,
      "step": 162811
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.282567024230957,
      "learning_rate": 0.00011864020669430812,
      "loss": 3.2777,
      "step": 162812
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.414228916168213,
      "learning_rate": 0.00011863694824964906,
      "loss": 2.9323,
      "step": 162813
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.7815778255462646,
      "learning_rate": 0.00011863368983870893,
      "loss": 2.7157,
      "step": 162814
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.0142765045166016,
      "learning_rate": 0.00011863043146148822,
      "loss": 2.956,
      "step": 162815
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.1820802688598633,
      "learning_rate": 0.00011862717311798744,
      "loss": 3.0588,
      "step": 162816
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.450845718383789,
      "learning_rate": 0.00011862391480820738,
      "loss": 2.8795,
      "step": 162817
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.0211808681488037,
      "learning_rate": 0.00011862065653214856,
      "loss": 3.0281,
      "step": 162818
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.284766435623169,
      "learning_rate": 0.0001186173982898115,
      "loss": 2.8823,
      "step": 162819
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.578793525695801,
      "learning_rate": 0.00011861414008119695,
      "loss": 2.7431,
      "step": 162820
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7829606533050537,
      "learning_rate": 0.0001186108819063054,
      "loss": 3.166,
      "step": 162821
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.633192539215088,
      "learning_rate": 0.00011860762376513758,
      "loss": 2.939,
      "step": 162822
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.159231662750244,
      "learning_rate": 0.00011860436565769403,
      "loss": 3.0026,
      "step": 162823
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2279107570648193,
      "learning_rate": 0.00011860110758397539,
      "loss": 2.9777,
      "step": 162824
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.572941303253174,
      "learning_rate": 0.0001185978495439821,
      "loss": 3.0925,
      "step": 162825
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8587265014648438,
      "learning_rate": 0.000118594591537715,
      "loss": 2.9985,
      "step": 162826
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.345165967941284,
      "learning_rate": 0.00011859133356517447,
      "loss": 3.0769,
      "step": 162827
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4243578910827637,
      "learning_rate": 0.00011858807562636134,
      "loss": 2.8749,
      "step": 162828
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5175726413726807,
      "learning_rate": 0.00011858481772127614,
      "loss": 2.9179,
      "step": 162829
    },
    {
      "epoch": 2.12,
      "grad_norm": 1.9283573627471924,
      "learning_rate": 0.00011858155984991942,
      "loss": 3.0101,
      "step": 162830
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4530606269836426,
      "learning_rate": 0.00011857830201229171,
      "loss": 2.9443,
      "step": 162831
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3266890048980713,
      "learning_rate": 0.00011857504420839383,
      "loss": 3.242,
      "step": 162832
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.1304290294647217,
      "learning_rate": 0.0001185717864382262,
      "loss": 2.9155,
      "step": 162833
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5009400844573975,
      "learning_rate": 0.00011856852870178955,
      "loss": 3.0414,
      "step": 162834
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5099947452545166,
      "learning_rate": 0.00011856527099908437,
      "loss": 2.661,
      "step": 162835
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8992738723754883,
      "learning_rate": 0.00011856201333011152,
      "loss": 2.9548,
      "step": 162836
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3277976512908936,
      "learning_rate": 0.00011855875569487122,
      "loss": 3.1922,
      "step": 162837
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.401015043258667,
      "learning_rate": 0.00011855549809336437,
      "loss": 3.2273,
      "step": 162838
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.356896162033081,
      "learning_rate": 0.00011855224052559139,
      "loss": 3.0434,
      "step": 162839
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.897745370864868,
      "learning_rate": 0.00011854898299155303,
      "loss": 3.0868,
      "step": 162840
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4482176303863525,
      "learning_rate": 0.00011854572549124977,
      "loss": 2.9308,
      "step": 162841
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.961764097213745,
      "learning_rate": 0.00011854246802468248,
      "loss": 3.0939,
      "step": 162842
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.1309382915496826,
      "learning_rate": 0.00011853921059185137,
      "loss": 2.736,
      "step": 162843
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.5189690589904785,
      "learning_rate": 0.00011853595319275738,
      "loss": 2.7113,
      "step": 162844
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.262295961380005,
      "learning_rate": 0.00011853269582740084,
      "loss": 3.1179,
      "step": 162845
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.241088390350342,
      "learning_rate": 0.0001185294384957826,
      "loss": 2.9542,
      "step": 162846
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4488234519958496,
      "learning_rate": 0.00011852618119790309,
      "loss": 2.7823,
      "step": 162847
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.0446245670318604,
      "learning_rate": 0.00011852292393376317,
      "loss": 3.0972,
      "step": 162848
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3675529956817627,
      "learning_rate": 0.00011851966670336307,
      "loss": 3.1563,
      "step": 162849
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5596516132354736,
      "learning_rate": 0.00011851640950670368,
      "loss": 3.0003,
      "step": 162850
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2620797157287598,
      "learning_rate": 0.0001185131523437854,
      "loss": 2.7369,
      "step": 162851
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7177093029022217,
      "learning_rate": 0.0001185098952146091,
      "loss": 3.1813,
      "step": 162852
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4856810569763184,
      "learning_rate": 0.00011850663811917507,
      "loss": 2.9742,
      "step": 162853
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4313971996307373,
      "learning_rate": 0.00011850338105748435,
      "loss": 2.9393,
      "step": 162854
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.397937297821045,
      "learning_rate": 0.00011850012402953704,
      "loss": 2.8988,
      "step": 162855
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7416839599609375,
      "learning_rate": 0.0001184968670353341,
      "loss": 2.8964,
      "step": 162856
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2436909675598145,
      "learning_rate": 0.0001184936100748759,
      "loss": 2.8984,
      "step": 162857
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.392378568649292,
      "learning_rate": 0.00011849035314816327,
      "loss": 2.9567,
      "step": 162858
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.803368091583252,
      "learning_rate": 0.0001184870962551966,
      "loss": 3.0617,
      "step": 162859
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.0065255165100098,
      "learning_rate": 0.00011848383939597681,
      "loss": 3.0099,
      "step": 162860
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2523388862609863,
      "learning_rate": 0.00011848058257050413,
      "loss": 3.0694,
      "step": 162861
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.935045003890991,
      "learning_rate": 0.0001184773257787794,
      "loss": 2.9586,
      "step": 162862
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.601243257522583,
      "learning_rate": 0.00011847406902080306,
      "loss": 3.0644,
      "step": 162863
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.777841329574585,
      "learning_rate": 0.00011847081229657593,
      "loss": 2.8161,
      "step": 162864
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.950759172439575,
      "learning_rate": 0.0001184675556060984,
      "loss": 3.0525,
      "step": 162865
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.67374849319458,
      "learning_rate": 0.00011846429894937141,
      "loss": 2.9412,
      "step": 162866
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.458338975906372,
      "learning_rate": 0.00011846104232639509,
      "loss": 3.1837,
      "step": 162867
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.89762282371521,
      "learning_rate": 0.0001184577857371704,
      "loss": 2.949,
      "step": 162868
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4301671981811523,
      "learning_rate": 0.00011845452918169776,
      "loss": 3.0058,
      "step": 162869
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.553209066390991,
      "learning_rate": 0.00011845127265997794,
      "loss": 3.1454,
      "step": 162870
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.5347516536712646,
      "learning_rate": 0.00011844801617201132,
      "loss": 2.9148,
      "step": 162871
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.780418634414673,
      "learning_rate": 0.0001184447597177989,
      "loss": 3.0927,
      "step": 162872
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.3075761795043945,
      "learning_rate": 0.00011844150329734077,
      "loss": 3.0876,
      "step": 162873
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4939725399017334,
      "learning_rate": 0.00011843824691063792,
      "loss": 2.9275,
      "step": 162874
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.64259672164917,
      "learning_rate": 0.00011843499055769073,
      "loss": 3.124,
      "step": 162875
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2429614067077637,
      "learning_rate": 0.00011843173423849997,
      "loss": 2.7536,
      "step": 162876
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.831052780151367,
      "learning_rate": 0.00011842847795306612,
      "loss": 2.8367,
      "step": 162877
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.093049049377441,
      "learning_rate": 0.00011842522170138992,
      "loss": 2.863,
      "step": 162878
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.108513355255127,
      "learning_rate": 0.00011842196548347191,
      "loss": 2.9753,
      "step": 162879
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.501199722290039,
      "learning_rate": 0.00011841870929931267,
      "loss": 2.9991,
      "step": 162880
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3137853145599365,
      "learning_rate": 0.00011841545314891273,
      "loss": 2.9531,
      "step": 162881
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4716272354125977,
      "learning_rate": 0.00011841219703227288,
      "loss": 3.0916,
      "step": 162882
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.3787243366241455,
      "learning_rate": 0.00011840894094939352,
      "loss": 3.077,
      "step": 162883
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9299583435058594,
      "learning_rate": 0.0001184056849002755,
      "loss": 2.9299,
      "step": 162884
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6478304862976074,
      "learning_rate": 0.00011840242888491926,
      "loss": 3.0331,
      "step": 162885
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.150841236114502,
      "learning_rate": 0.00011839917290332546,
      "loss": 2.6792,
      "step": 162886
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.1836040019989014,
      "learning_rate": 0.00011839591695549457,
      "loss": 2.9815,
      "step": 162887
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6633055210113525,
      "learning_rate": 0.00011839266104142738,
      "loss": 3.0037,
      "step": 162888
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.578315258026123,
      "learning_rate": 0.00011838940516112436,
      "loss": 3.1128,
      "step": 162889
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.357339382171631,
      "learning_rate": 0.00011838614931458628,
      "loss": 2.8304,
      "step": 162890
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3697500228881836,
      "learning_rate": 0.00011838289350181363,
      "loss": 3.095,
      "step": 162891
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6701040267944336,
      "learning_rate": 0.00011837963772280703,
      "loss": 2.7781,
      "step": 162892
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.1906650066375732,
      "learning_rate": 0.00011837638197756696,
      "loss": 2.9331,
      "step": 162893
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4056379795074463,
      "learning_rate": 0.00011837312626609427,
      "loss": 3.038,
      "step": 162894
    },
    {
      "epoch": 2.12,
      "grad_norm": 5.123666286468506,
      "learning_rate": 0.00011836987058838935,
      "loss": 2.9074,
      "step": 162895
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9700798988342285,
      "learning_rate": 0.00011836661494445297,
      "loss": 2.6464,
      "step": 162896
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8230128288269043,
      "learning_rate": 0.00011836335933428572,
      "loss": 2.6524,
      "step": 162897
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3713631629943848,
      "learning_rate": 0.000118360103757888,
      "loss": 2.921,
      "step": 162898
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.720804214477539,
      "learning_rate": 0.00011835684821526067,
      "loss": 3.0892,
      "step": 162899
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7889389991760254,
      "learning_rate": 0.00011835359270640426,
      "loss": 3.1687,
      "step": 162900
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.614227294921875,
      "learning_rate": 0.00011835033723131925,
      "loss": 2.8263,
      "step": 162901
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6198177337646484,
      "learning_rate": 0.00011834708179000639,
      "loss": 2.9124,
      "step": 162902
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.4933793544769287,
      "learning_rate": 0.00011834382638246628,
      "loss": 2.9086,
      "step": 162903
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.300130605697632,
      "learning_rate": 0.00011834057100869937,
      "loss": 3.1121,
      "step": 162904
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4450254440307617,
      "learning_rate": 0.00011833731566870647,
      "loss": 2.9633,
      "step": 162905
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.70896315574646,
      "learning_rate": 0.00011833406036248812,
      "loss": 2.8941,
      "step": 162906
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.529280424118042,
      "learning_rate": 0.00011833080509004478,
      "loss": 2.9427,
      "step": 162907
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.326388120651245,
      "learning_rate": 0.00011832754985137726,
      "loss": 3.0858,
      "step": 162908
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.696960210800171,
      "learning_rate": 0.00011832429464648608,
      "loss": 3.0809,
      "step": 162909
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.416562795639038,
      "learning_rate": 0.00011832103947537176,
      "loss": 2.8996,
      "step": 162910
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.387371063232422,
      "learning_rate": 0.00011831778433803508,
      "loss": 3.0313,
      "step": 162911
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7436447143554688,
      "learning_rate": 0.00011831452923447645,
      "loss": 2.9102,
      "step": 162912
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4562828540802,
      "learning_rate": 0.00011831127416469667,
      "loss": 3.072,
      "step": 162913
    },
    {
      "epoch": 2.12,
      "grad_norm": 5.242434024810791,
      "learning_rate": 0.00011830801912869626,
      "loss": 2.6914,
      "step": 162914
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3430449962615967,
      "learning_rate": 0.00011830476412647584,
      "loss": 2.5609,
      "step": 162915
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7181637287139893,
      "learning_rate": 0.00011830150915803586,
      "loss": 3.1029,
      "step": 162916
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9931490421295166,
      "learning_rate": 0.00011829825422337718,
      "loss": 2.9595,
      "step": 162917
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.299201250076294,
      "learning_rate": 0.00011829499932250016,
      "loss": 2.8946,
      "step": 162918
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.412975788116455,
      "learning_rate": 0.00011829174445540568,
      "loss": 2.9144,
      "step": 162919
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2582974433898926,
      "learning_rate": 0.00011828848962209404,
      "loss": 2.7779,
      "step": 162920
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.674102783203125,
      "learning_rate": 0.00011828523482256624,
      "loss": 2.9425,
      "step": 162921
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2503952980041504,
      "learning_rate": 0.00011828198005682243,
      "loss": 3.0295,
      "step": 162922
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3628673553466797,
      "learning_rate": 0.00011827872532486355,
      "loss": 2.8933,
      "step": 162923
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.220521926879883,
      "learning_rate": 0.00011827547062668994,
      "loss": 2.8952,
      "step": 162924
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.9254684448242188,
      "learning_rate": 0.00011827221596230248,
      "loss": 2.97,
      "step": 162925
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.7122151851654053,
      "learning_rate": 0.00011826896133170151,
      "loss": 3.0539,
      "step": 162926
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.0671229362487793,
      "learning_rate": 0.00011826570673488803,
      "loss": 2.9543,
      "step": 162927
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4694645404815674,
      "learning_rate": 0.00011826245217186216,
      "loss": 3.0154,
      "step": 162928
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8376386165618896,
      "learning_rate": 0.00011825919764262481,
      "loss": 3.0487,
      "step": 162929
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.771800994873047,
      "learning_rate": 0.00011825594314717645,
      "loss": 2.9897,
      "step": 162930
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4257164001464844,
      "learning_rate": 0.00011825268868551781,
      "loss": 3.1054,
      "step": 162931
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.2526934146881104,
      "learning_rate": 0.00011824943425764932,
      "loss": 2.7864,
      "step": 162932
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.403296947479248,
      "learning_rate": 0.0001182461798635719,
      "loss": 2.8871,
      "step": 162933
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.113138437271118,
      "learning_rate": 0.00011824292550328574,
      "loss": 2.9084,
      "step": 162934
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.775416374206543,
      "learning_rate": 0.00011823967117679176,
      "loss": 3.123,
      "step": 162935
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.0954489707946777,
      "learning_rate": 0.00011823641688409034,
      "loss": 3.0329,
      "step": 162936
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.9109737873077393,
      "learning_rate": 0.00011823316262518233,
      "loss": 2.7604,
      "step": 162937
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4910337924957275,
      "learning_rate": 0.00011822990840006808,
      "loss": 3.0846,
      "step": 162938
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5080652236938477,
      "learning_rate": 0.00011822665420874854,
      "loss": 2.8499,
      "step": 162939
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8396942615509033,
      "learning_rate": 0.00011822340005122389,
      "loss": 2.9562,
      "step": 162940
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.2977941036224365,
      "learning_rate": 0.00011822014592749505,
      "loss": 2.7814,
      "step": 162941
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.884157657623291,
      "learning_rate": 0.0001182168918375624,
      "loss": 3.0339,
      "step": 162942
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8736953735351562,
      "learning_rate": 0.00011821363778142678,
      "loss": 2.8592,
      "step": 162943
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.46399188041687,
      "learning_rate": 0.00011821038375908856,
      "loss": 2.9683,
      "step": 162944
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8020589351654053,
      "learning_rate": 0.00011820712977054854,
      "loss": 2.957,
      "step": 162945
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5625522136688232,
      "learning_rate": 0.0001182038758158073,
      "loss": 3.2702,
      "step": 162946
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7913036346435547,
      "learning_rate": 0.00011820062189486537,
      "loss": 2.9928,
      "step": 162947
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.375589609146118,
      "learning_rate": 0.00011819736800772323,
      "loss": 2.9695,
      "step": 162948
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.375424861907959,
      "learning_rate": 0.0001181941141543818,
      "loss": 2.8573,
      "step": 162949
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4010813236236572,
      "learning_rate": 0.00011819086033484136,
      "loss": 2.5969,
      "step": 162950
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.290050506591797,
      "learning_rate": 0.00011818760654910283,
      "loss": 2.9295,
      "step": 162951
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.1629369258880615,
      "learning_rate": 0.00011818435279716661,
      "loss": 2.7879,
      "step": 162952
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2545337677001953,
      "learning_rate": 0.00011818109907903337,
      "loss": 3.009,
      "step": 162953
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.841451644897461,
      "learning_rate": 0.00011817784539470359,
      "loss": 2.9834,
      "step": 162954
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.316554546356201,
      "learning_rate": 0.00011817459174417808,
      "loss": 3.0014,
      "step": 162955
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3104729652404785,
      "learning_rate": 0.00011817133812745724,
      "loss": 3.0851,
      "step": 162956
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9443085193634033,
      "learning_rate": 0.00011816808454454188,
      "loss": 2.8889,
      "step": 162957
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.189574956893921,
      "learning_rate": 0.00011816483099543253,
      "loss": 3.1023,
      "step": 162958
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4506497383117676,
      "learning_rate": 0.00011816157748012975,
      "loss": 2.8981,
      "step": 162959
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.556713104248047,
      "learning_rate": 0.00011815832399863404,
      "loss": 2.947,
      "step": 162960
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.713435411453247,
      "learning_rate": 0.00011815507055094628,
      "loss": 2.8679,
      "step": 162961
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5851595401763916,
      "learning_rate": 0.00011815181713706678,
      "loss": 2.8009,
      "step": 162962
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3739869594573975,
      "learning_rate": 0.00011814856375699644,
      "loss": 2.8184,
      "step": 162963
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.122706890106201,
      "learning_rate": 0.00011814531041073568,
      "loss": 3.0612,
      "step": 162964
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4534897804260254,
      "learning_rate": 0.00011814205709828515,
      "loss": 2.8962,
      "step": 162965
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.2169275283813477,
      "learning_rate": 0.00011813880381964534,
      "loss": 2.9492,
      "step": 162966
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.1955645084381104,
      "learning_rate": 0.00011813555057481704,
      "loss": 2.9113,
      "step": 162967
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.268639087677002,
      "learning_rate": 0.0001181322973638007,
      "loss": 2.9938,
      "step": 162968
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.0729219913482666,
      "learning_rate": 0.00011812904418659708,
      "loss": 3.1017,
      "step": 162969
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.923563241958618,
      "learning_rate": 0.00011812579104320674,
      "loss": 3.0747,
      "step": 162970
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8158867359161377,
      "learning_rate": 0.00011812253793363022,
      "loss": 3.0233,
      "step": 162971
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.362109661102295,
      "learning_rate": 0.00011811928485786806,
      "loss": 3.0918,
      "step": 162972
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.188814163208008,
      "learning_rate": 0.00011811603181592104,
      "loss": 2.7375,
      "step": 162973
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.60599684715271,
      "learning_rate": 0.0001181127788077896,
      "loss": 2.94,
      "step": 162974
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.4441165924072266,
      "learning_rate": 0.00011810952583347453,
      "loss": 2.8421,
      "step": 162975
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6514806747436523,
      "learning_rate": 0.00011810627289297633,
      "loss": 2.8694,
      "step": 162976
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4408397674560547,
      "learning_rate": 0.00011810301998629561,
      "loss": 2.9247,
      "step": 162977
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7544748783111572,
      "learning_rate": 0.00011809976711343287,
      "loss": 2.8557,
      "step": 162978
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2163851261138916,
      "learning_rate": 0.00011809651427438892,
      "loss": 2.7736,
      "step": 162979
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.0845189094543457,
      "learning_rate": 0.00011809326146916415,
      "loss": 3.1602,
      "step": 162980
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.473332405090332,
      "learning_rate": 0.00011809000869775938,
      "loss": 2.9251,
      "step": 162981
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2458395957946777,
      "learning_rate": 0.00011808675596017514,
      "loss": 3.148,
      "step": 162982
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3682761192321777,
      "learning_rate": 0.00011808350325641187,
      "loss": 2.914,
      "step": 162983
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.2026448249816895,
      "learning_rate": 0.00011808025058647042,
      "loss": 3.1919,
      "step": 162984
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.2902991771698,
      "learning_rate": 0.0001180769979503513,
      "loss": 2.7422,
      "step": 162985
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8681840896606445,
      "learning_rate": 0.00011807374534805497,
      "loss": 3.1001,
      "step": 162986
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7556278705596924,
      "learning_rate": 0.00011807049277958228,
      "loss": 3.4221,
      "step": 162987
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.290956735610962,
      "learning_rate": 0.00011806724024493374,
      "loss": 3.116,
      "step": 162988
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.385775089263916,
      "learning_rate": 0.0001180639877441098,
      "loss": 2.7538,
      "step": 162989
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3886966705322266,
      "learning_rate": 0.00011806073527711134,
      "loss": 3.0028,
      "step": 162990
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.090416669845581,
      "learning_rate": 0.0001180574828439388,
      "loss": 2.9979,
      "step": 162991
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3068745136260986,
      "learning_rate": 0.00011805423044459271,
      "loss": 2.7896,
      "step": 162992
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.597599983215332,
      "learning_rate": 0.00011805097807907388,
      "loss": 3.2318,
      "step": 162993
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.0874764919281006,
      "learning_rate": 0.00011804772574738278,
      "loss": 2.9348,
      "step": 162994
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6224265098571777,
      "learning_rate": 0.00011804447344951996,
      "loss": 2.9783,
      "step": 162995
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.322046995162964,
      "learning_rate": 0.00011804122118548622,
      "loss": 2.9296,
      "step": 162996
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3782541751861572,
      "learning_rate": 0.00011803796895528193,
      "loss": 3.2268,
      "step": 162997
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8866071701049805,
      "learning_rate": 0.00011803471675890794,
      "loss": 2.7868,
      "step": 162998
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.350365161895752,
      "learning_rate": 0.0001180314645963647,
      "loss": 2.7248,
      "step": 162999
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.837189197540283,
      "learning_rate": 0.00011802821246765288,
      "loss": 3.0845,
      "step": 163000
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.614610195159912,
      "learning_rate": 0.00011802496037277292,
      "loss": 3.0695,
      "step": 163001
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.2044246196746826,
      "learning_rate": 0.00011802170831172567,
      "loss": 2.974,
      "step": 163002
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.6671924591064453,
      "learning_rate": 0.00011801845628451151,
      "loss": 2.9795,
      "step": 163003
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2321760654449463,
      "learning_rate": 0.00011801520429113125,
      "loss": 2.8266,
      "step": 163004
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.523463726043701,
      "learning_rate": 0.00011801195233158531,
      "loss": 2.9141,
      "step": 163005
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.426287889480591,
      "learning_rate": 0.0001180087004058746,
      "loss": 3.1824,
      "step": 163006
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.73449444770813,
      "learning_rate": 0.00011800544851399929,
      "loss": 2.9295,
      "step": 163007
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8200325965881348,
      "learning_rate": 0.00011800219665596031,
      "loss": 2.974,
      "step": 163008
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4416186809539795,
      "learning_rate": 0.00011799894483175803,
      "loss": 3.178,
      "step": 163009
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.173583507537842,
      "learning_rate": 0.00011799569304139333,
      "loss": 3.0016,
      "step": 163010
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3173742294311523,
      "learning_rate": 0.00011799244128486651,
      "loss": 3.0425,
      "step": 163011
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7828681468963623,
      "learning_rate": 0.00011798918956217847,
      "loss": 2.8101,
      "step": 163012
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4720561504364014,
      "learning_rate": 0.00011798593787332969,
      "loss": 3.0582,
      "step": 163013
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3814470767974854,
      "learning_rate": 0.00011798268621832073,
      "loss": 3.0453,
      "step": 163014
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.509920835494995,
      "learning_rate": 0.0001179794345971521,
      "loss": 3.054,
      "step": 163015
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6761999130249023,
      "learning_rate": 0.00011797618300982468,
      "loss": 3.1243,
      "step": 163016
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.0583038330078125,
      "learning_rate": 0.00011797293145633879,
      "loss": 2.8422,
      "step": 163017
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.544313907623291,
      "learning_rate": 0.0001179696799366953,
      "loss": 2.7945,
      "step": 163018
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.1621594429016113,
      "learning_rate": 0.00011796642845089465,
      "loss": 2.7589,
      "step": 163019
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.2338452339172363,
      "learning_rate": 0.00011796317699893752,
      "loss": 2.8999,
      "step": 163020
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.438794136047363,
      "learning_rate": 0.00011795992558082433,
      "loss": 3.0039,
      "step": 163021
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.9122283458709717,
      "learning_rate": 0.00011795667419655594,
      "loss": 2.8845,
      "step": 163022
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.66912579536438,
      "learning_rate": 0.00011795342284613272,
      "loss": 2.9214,
      "step": 163023
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.250792980194092,
      "learning_rate": 0.00011795017152955551,
      "loss": 3.0731,
      "step": 163024
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2354493141174316,
      "learning_rate": 0.0001179469202468248,
      "loss": 3.1405,
      "step": 163025
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2780392169952393,
      "learning_rate": 0.00011794366899794122,
      "loss": 3.1043,
      "step": 163026
    },
    {
      "epoch": 2.12,
      "grad_norm": 6.494598865509033,
      "learning_rate": 0.00011794041778290522,
      "loss": 2.8821,
      "step": 163027
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.2861931324005127,
      "learning_rate": 0.00011793716660171763,
      "loss": 2.9787,
      "step": 163028
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.288985013961792,
      "learning_rate": 0.00011793391545437889,
      "loss": 3.0867,
      "step": 163029
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.06650447845459,
      "learning_rate": 0.00011793066434088972,
      "loss": 2.9571,
      "step": 163030
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.3548381328582764,
      "learning_rate": 0.00011792741326125073,
      "loss": 2.8615,
      "step": 163031
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.3169193267822266,
      "learning_rate": 0.00011792416221546245,
      "loss": 2.894,
      "step": 163032
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.335691213607788,
      "learning_rate": 0.00011792091120352539,
      "loss": 2.9269,
      "step": 163033
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6829659938812256,
      "learning_rate": 0.00011791766022544036,
      "loss": 2.7697,
      "step": 163034
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.785665273666382,
      "learning_rate": 0.00011791440928120781,
      "loss": 3.0424,
      "step": 163035
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7044124603271484,
      "learning_rate": 0.00011791115837082849,
      "loss": 2.9046,
      "step": 163036
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.51863169670105,
      "learning_rate": 0.00011790790749430291,
      "loss": 3.0356,
      "step": 163037
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.166161298751831,
      "learning_rate": 0.00011790465665163173,
      "loss": 2.9597,
      "step": 163038
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.822073459625244,
      "learning_rate": 0.00011790140584281539,
      "loss": 2.7635,
      "step": 163039
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7262189388275146,
      "learning_rate": 0.0001178981550678547,
      "loss": 2.8373,
      "step": 163040
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2728841304779053,
      "learning_rate": 0.00011789490432675007,
      "loss": 3.1192,
      "step": 163041
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2988100051879883,
      "learning_rate": 0.00011789165361950235,
      "loss": 3.1603,
      "step": 163042
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9212193489074707,
      "learning_rate": 0.00011788840294611199,
      "loss": 2.9313,
      "step": 163043
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8725030422210693,
      "learning_rate": 0.00011788515230657961,
      "loss": 2.8647,
      "step": 163044
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.506415605545044,
      "learning_rate": 0.00011788190170090575,
      "loss": 2.8472,
      "step": 163045
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.772383689880371,
      "learning_rate": 0.00011787865112909117,
      "loss": 2.9557,
      "step": 163046
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5691418647766113,
      "learning_rate": 0.00011787540059113626,
      "loss": 2.9244,
      "step": 163047
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.637399673461914,
      "learning_rate": 0.00011787215008704187,
      "loss": 2.9912,
      "step": 163048
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.529822587966919,
      "learning_rate": 0.00011786889961680846,
      "loss": 2.7485,
      "step": 163049
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4746592044830322,
      "learning_rate": 0.0001178656491804367,
      "loss": 3.0584,
      "step": 163050
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3156259059906006,
      "learning_rate": 0.00011786239877792703,
      "loss": 3.0301,
      "step": 163051
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2811520099639893,
      "learning_rate": 0.00011785914840928029,
      "loss": 3.2174,
      "step": 163052
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.476759195327759,
      "learning_rate": 0.00011785589807449684,
      "loss": 2.9814,
      "step": 163053
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.035494804382324,
      "learning_rate": 0.00011785264777357756,
      "loss": 2.9222,
      "step": 163054
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2880001068115234,
      "learning_rate": 0.00011784939750652289,
      "loss": 3.2198,
      "step": 163055
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.41204571723938,
      "learning_rate": 0.00011784614727333347,
      "loss": 3.0128,
      "step": 163056
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9398183822631836,
      "learning_rate": 0.00011784289707400976,
      "loss": 2.8641,
      "step": 163057
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2239606380462646,
      "learning_rate": 0.00011783964690855263,
      "loss": 2.853,
      "step": 163058
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.4440762996673584,
      "learning_rate": 0.00011783639677696243,
      "loss": 2.7882,
      "step": 163059
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.0629711151123047,
      "learning_rate": 0.00011783314667923997,
      "loss": 2.8777,
      "step": 163060
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3564929962158203,
      "learning_rate": 0.00011782989661538576,
      "loss": 2.9624,
      "step": 163061
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.373990058898926,
      "learning_rate": 0.00011782664658540046,
      "loss": 2.7882,
      "step": 163062
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7226507663726807,
      "learning_rate": 0.00011782339658928447,
      "loss": 3.0125,
      "step": 163063
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.333794593811035,
      "learning_rate": 0.00011782014662703868,
      "loss": 3.032,
      "step": 163064
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.691563367843628,
      "learning_rate": 0.00011781689669866347,
      "loss": 2.8818,
      "step": 163065
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.813885450363159,
      "learning_rate": 0.00011781364680415962,
      "loss": 2.9382,
      "step": 163066
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3181583881378174,
      "learning_rate": 0.00011781039694352765,
      "loss": 2.9941,
      "step": 163067
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7436764240264893,
      "learning_rate": 0.00011780714711676818,
      "loss": 2.9688,
      "step": 163068
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4678406715393066,
      "learning_rate": 0.00011780389732388169,
      "loss": 2.8667,
      "step": 163069
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.9337968826293945,
      "learning_rate": 0.000117800647564869,
      "loss": 3.1294,
      "step": 163070
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.607720136642456,
      "learning_rate": 0.00011779739783973052,
      "loss": 3.1251,
      "step": 163071
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3834192752838135,
      "learning_rate": 0.00011779414814846702,
      "loss": 3.1691,
      "step": 163072
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2405357360839844,
      "learning_rate": 0.00011779089849107905,
      "loss": 2.8917,
      "step": 163073
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.370734691619873,
      "learning_rate": 0.00011778764886756706,
      "loss": 2.8514,
      "step": 163074
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.1211719512939453,
      "learning_rate": 0.00011778439927793192,
      "loss": 2.6927,
      "step": 163075
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.300908327102661,
      "learning_rate": 0.0001177811497221741,
      "loss": 2.8325,
      "step": 163076
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5976908206939697,
      "learning_rate": 0.00011777790020029409,
      "loss": 3.0486,
      "step": 163077
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7219455242156982,
      "learning_rate": 0.00011777465071229271,
      "loss": 3.0505,
      "step": 163078
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.517082691192627,
      "learning_rate": 0.00011777140125817033,
      "loss": 2.9259,
      "step": 163079
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.670522451400757,
      "learning_rate": 0.00011776815183792783,
      "loss": 3.043,
      "step": 163080
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.099621057510376,
      "learning_rate": 0.00011776490245156569,
      "loss": 2.8409,
      "step": 163081
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.653409004211426,
      "learning_rate": 0.00011776165309908433,
      "loss": 2.9311,
      "step": 163082
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3710131645202637,
      "learning_rate": 0.00011775840378048464,
      "loss": 2.9564,
      "step": 163083
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2667734622955322,
      "learning_rate": 0.00011775515449576712,
      "loss": 2.9193,
      "step": 163084
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9772863388061523,
      "learning_rate": 0.00011775190524493227,
      "loss": 2.9641,
      "step": 163085
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4497647285461426,
      "learning_rate": 0.00011774865602798084,
      "loss": 2.89,
      "step": 163086
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3965976238250732,
      "learning_rate": 0.00011774540684491341,
      "loss": 2.7988,
      "step": 163087
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.465219259262085,
      "learning_rate": 0.00011774215769573043,
      "loss": 2.9024,
      "step": 163088
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4232852458953857,
      "learning_rate": 0.00011773890858043272,
      "loss": 3.0387,
      "step": 163089
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7547473907470703,
      "learning_rate": 0.00011773565949902081,
      "loss": 2.8294,
      "step": 163090
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.291534185409546,
      "learning_rate": 0.00011773241045149516,
      "loss": 2.8436,
      "step": 163091
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.317610502243042,
      "learning_rate": 0.00011772916143785658,
      "loss": 3.0892,
      "step": 163092
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8761069774627686,
      "learning_rate": 0.00011772591245810566,
      "loss": 2.9181,
      "step": 163093
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.814307689666748,
      "learning_rate": 0.00011772266351224277,
      "loss": 2.9207,
      "step": 163094
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8814187049865723,
      "learning_rate": 0.00011771941460026879,
      "loss": 3.0713,
      "step": 163095
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.273159980773926,
      "learning_rate": 0.0001177161657221841,
      "loss": 2.8024,
      "step": 163096
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7156338691711426,
      "learning_rate": 0.00011771291687798952,
      "loss": 3.0715,
      "step": 163097
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.355605363845825,
      "learning_rate": 0.00011770966806768558,
      "loss": 3.2618,
      "step": 163098
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.145350456237793,
      "learning_rate": 0.00011770641929127282,
      "loss": 3.0997,
      "step": 163099
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4478046894073486,
      "learning_rate": 0.00011770317054875182,
      "loss": 2.8298,
      "step": 163100
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5696141719818115,
      "learning_rate": 0.00011769992184012332,
      "loss": 3.0621,
      "step": 163101
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.1180319786071777,
      "learning_rate": 0.00011769667316538772,
      "loss": 3.0039,
      "step": 163102
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.078373432159424,
      "learning_rate": 0.00011769342452454586,
      "loss": 2.7481,
      "step": 163103
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.6608145236968994,
      "learning_rate": 0.00011769017591759824,
      "loss": 2.9436,
      "step": 163104
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.606550931930542,
      "learning_rate": 0.00011768692734454548,
      "loss": 2.9842,
      "step": 163105
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3978936672210693,
      "learning_rate": 0.00011768367880538804,
      "loss": 3.0065,
      "step": 163106
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.855722188949585,
      "learning_rate": 0.00011768043030012675,
      "loss": 2.9613,
      "step": 163107
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.838660478591919,
      "learning_rate": 0.00011767718182876201,
      "loss": 3.0783,
      "step": 163108
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4183943271636963,
      "learning_rate": 0.00011767393339129459,
      "loss": 2.7418,
      "step": 163109
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.465855360031128,
      "learning_rate": 0.0001176706849877251,
      "loss": 3.1308,
      "step": 163110
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6031551361083984,
      "learning_rate": 0.00011766743661805402,
      "loss": 3.0357,
      "step": 163111
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.601114273071289,
      "learning_rate": 0.00011766418828228191,
      "loss": 3.0037,
      "step": 163112
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.1331002712249756,
      "learning_rate": 0.00011766093998040956,
      "loss": 3.1632,
      "step": 163113
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9823389053344727,
      "learning_rate": 0.0001176576917124374,
      "loss": 3.0965,
      "step": 163114
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.2322161197662354,
      "learning_rate": 0.00011765444347836621,
      "loss": 2.8367,
      "step": 163115
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.1570117473602295,
      "learning_rate": 0.0001176511952781965,
      "loss": 3.1365,
      "step": 163116
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.280787944793701,
      "learning_rate": 0.00011764794711192889,
      "loss": 3.048,
      "step": 163117
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4121909141540527,
      "learning_rate": 0.00011764469897956387,
      "loss": 2.8075,
      "step": 163118
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.535947322845459,
      "learning_rate": 0.00011764145088110222,
      "loss": 2.8497,
      "step": 163119
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.765531301498413,
      "learning_rate": 0.00011763820281654436,
      "loss": 2.9103,
      "step": 163120
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8536176681518555,
      "learning_rate": 0.00011763495478589113,
      "loss": 3.0227,
      "step": 163121
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.337839365005493,
      "learning_rate": 0.00011763170678914301,
      "loss": 2.8663,
      "step": 163122
    },
    {
      "epoch": 2.12,
      "grad_norm": 1.9322311878204346,
      "learning_rate": 0.00011762845882630057,
      "loss": 3.105,
      "step": 163123
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.930546283721924,
      "learning_rate": 0.00011762521089736436,
      "loss": 2.9171,
      "step": 163124
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8457064628601074,
      "learning_rate": 0.00011762196300233517,
      "loss": 2.7673,
      "step": 163125
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.16341495513916,
      "learning_rate": 0.00011761871514121336,
      "loss": 2.9081,
      "step": 163126
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.684154987335205,
      "learning_rate": 0.00011761546731399983,
      "loss": 2.9828,
      "step": 163127
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.621978759765625,
      "learning_rate": 0.000117612219520695,
      "loss": 2.9163,
      "step": 163128
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.251697540283203,
      "learning_rate": 0.00011760897176129951,
      "loss": 2.9836,
      "step": 163129
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3698418140411377,
      "learning_rate": 0.00011760572403581383,
      "loss": 2.8125,
      "step": 163130
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7181079387664795,
      "learning_rate": 0.0001176024763442388,
      "loss": 3.0059,
      "step": 163131
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2784600257873535,
      "learning_rate": 0.00011759922868657483,
      "loss": 3.046,
      "step": 163132
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.11911678314209,
      "learning_rate": 0.00011759598106282267,
      "loss": 2.8836,
      "step": 163133
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9184987545013428,
      "learning_rate": 0.00011759273347298289,
      "loss": 3.0221,
      "step": 163134
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.904428005218506,
      "learning_rate": 0.00011758948591705608,
      "loss": 2.9885,
      "step": 163135
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.169335126876831,
      "learning_rate": 0.00011758623839504267,
      "loss": 3.0135,
      "step": 163136
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7342419624328613,
      "learning_rate": 0.00011758299090694358,
      "loss": 3.1406,
      "step": 163137
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.35292387008667,
      "learning_rate": 0.00011757974345275915,
      "loss": 3.1888,
      "step": 163138
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6245715618133545,
      "learning_rate": 0.00011757649603249015,
      "loss": 2.967,
      "step": 163139
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4152705669403076,
      "learning_rate": 0.00011757324864613716,
      "loss": 3.0103,
      "step": 163140
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.183539628982544,
      "learning_rate": 0.00011757000129370072,
      "loss": 2.9525,
      "step": 163141
    },
    {
      "epoch": 2.12,
      "grad_norm": 5.079056262969971,
      "learning_rate": 0.00011756675397518137,
      "loss": 2.8426,
      "step": 163142
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9519824981689453,
      "learning_rate": 0.00011756350669057991,
      "loss": 3.0839,
      "step": 163143
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6177704334259033,
      "learning_rate": 0.00011756025943989674,
      "loss": 3.0097,
      "step": 163144
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.940166473388672,
      "learning_rate": 0.00011755701222313265,
      "loss": 2.9008,
      "step": 163145
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.592989921569824,
      "learning_rate": 0.00011755376504028807,
      "loss": 2.6916,
      "step": 163146
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.451488971710205,
      "learning_rate": 0.00011755051789136387,
      "loss": 3.1669,
      "step": 163147
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.221360683441162,
      "learning_rate": 0.00011754727077636028,
      "loss": 2.9735,
      "step": 163148
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.766751527786255,
      "learning_rate": 0.0001175440236952782,
      "loss": 2.9291,
      "step": 163149
    },
    {
      "epoch": 2.12,
      "grad_norm": 4.1135358810424805,
      "learning_rate": 0.00011754077664811801,
      "loss": 2.8531,
      "step": 163150
    },
    {
      "epoch": 2.12,
      "grad_norm": 5.291981220245361,
      "learning_rate": 0.00011753752963488057,
      "loss": 3.0003,
      "step": 163151
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5490710735321045,
      "learning_rate": 0.00011753428265556625,
      "loss": 3.2638,
      "step": 163152
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.9097352027893066,
      "learning_rate": 0.00011753103571017593,
      "loss": 2.9506,
      "step": 163153
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4272196292877197,
      "learning_rate": 0.00011752778879870984,
      "loss": 3.006,
      "step": 163154
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.8143880367279053,
      "learning_rate": 0.00011752454192116886,
      "loss": 3.334,
      "step": 163155
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.029766321182251,
      "learning_rate": 0.00011752129507755343,
      "loss": 2.9211,
      "step": 163156
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5101728439331055,
      "learning_rate": 0.00011751804826786432,
      "loss": 3.0186,
      "step": 163157
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.249826431274414,
      "learning_rate": 0.00011751480149210199,
      "loss": 2.9977,
      "step": 163158
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.837129831314087,
      "learning_rate": 0.00011751155475026717,
      "loss": 3.1109,
      "step": 163159
    },
    {
      "epoch": 2.12,
      "grad_norm": 1.99842369556427,
      "learning_rate": 0.00011750830804236038,
      "loss": 3.0671,
      "step": 163160
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.46927809715271,
      "learning_rate": 0.00011750506136838228,
      "loss": 2.7752,
      "step": 163161
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.3951821327209473,
      "learning_rate": 0.00011750181472833331,
      "loss": 2.8259,
      "step": 163162
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3946197032928467,
      "learning_rate": 0.0001174985681222143,
      "loss": 2.823,
      "step": 163163
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6442432403564453,
      "learning_rate": 0.00011749532155002565,
      "loss": 3.1013,
      "step": 163164
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.1378798484802246,
      "learning_rate": 0.00011749207501176818,
      "loss": 3.1322,
      "step": 163165
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7423627376556396,
      "learning_rate": 0.00011748882850744238,
      "loss": 2.9457,
      "step": 163166
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.1511902809143066,
      "learning_rate": 0.00011748558203704873,
      "loss": 2.6467,
      "step": 163167
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.7541074752807617,
      "learning_rate": 0.00011748233560058808,
      "loss": 2.7987,
      "step": 163168
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.980196237564087,
      "learning_rate": 0.00011747908919806088,
      "loss": 2.8886,
      "step": 163169
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4594480991363525,
      "learning_rate": 0.0001174758428294677,
      "loss": 2.7377,
      "step": 163170
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.389970064163208,
      "learning_rate": 0.00011747259649480926,
      "loss": 2.8458,
      "step": 163171
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2183780670166016,
      "learning_rate": 0.00011746935019408613,
      "loss": 3.1177,
      "step": 163172
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.773289918899536,
      "learning_rate": 0.00011746610392729882,
      "loss": 3.0076,
      "step": 163173
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.7616019248962402,
      "learning_rate": 0.00011746285769444805,
      "loss": 2.7513,
      "step": 163174
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.879997491836548,
      "learning_rate": 0.00011745961149553444,
      "loss": 2.9422,
      "step": 163175
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.363178014755249,
      "learning_rate": 0.00011745636533055839,
      "loss": 3.2086,
      "step": 163176
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.825645923614502,
      "learning_rate": 0.00011745311919952078,
      "loss": 2.7761,
      "step": 163177
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2700984477996826,
      "learning_rate": 0.00011744987310242206,
      "loss": 2.8623,
      "step": 163178
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.380200147628784,
      "learning_rate": 0.00011744662703926276,
      "loss": 2.8289,
      "step": 163179
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2972331047058105,
      "learning_rate": 0.00011744338101004369,
      "loss": 2.9281,
      "step": 163180
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.2073543071746826,
      "learning_rate": 0.00011744013501476521,
      "loss": 2.9825,
      "step": 163181
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.103606700897217,
      "learning_rate": 0.0001174368890534282,
      "loss": 2.8594,
      "step": 163182
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.980275869369507,
      "learning_rate": 0.0001174336431260331,
      "loss": 2.9406,
      "step": 163183
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.523479700088501,
      "learning_rate": 0.00011743039723258051,
      "loss": 2.8133,
      "step": 163184
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4069294929504395,
      "learning_rate": 0.00011742715137307098,
      "loss": 2.9221,
      "step": 163185
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.3245348930358887,
      "learning_rate": 0.00011742390554750528,
      "loss": 3.1482,
      "step": 163186
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.2044129371643066,
      "learning_rate": 0.0001174206597558838,
      "loss": 3.1817,
      "step": 163187
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.3850016593933105,
      "learning_rate": 0.00011741741399820741,
      "loss": 3.1146,
      "step": 163188
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4397225379943848,
      "learning_rate": 0.00011741416827447653,
      "loss": 2.9703,
      "step": 163189
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.377958297729492,
      "learning_rate": 0.00011741092258469182,
      "loss": 2.6765,
      "step": 163190
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.5093698501586914,
      "learning_rate": 0.00011740767692885376,
      "loss": 2.8327,
      "step": 163191
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4299795627593994,
      "learning_rate": 0.00011740443130696318,
      "loss": 2.8028,
      "step": 163192
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.240941286087036,
      "learning_rate": 0.00011740118571902045,
      "loss": 2.8487,
      "step": 163193
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.8838348388671875,
      "learning_rate": 0.0001173979401650264,
      "loss": 2.9827,
      "step": 163194
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.425837755203247,
      "learning_rate": 0.00011739469464498148,
      "loss": 2.8396,
      "step": 163195
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.4482109546661377,
      "learning_rate": 0.00011739144915888637,
      "loss": 2.948,
      "step": 163196
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.066831350326538,
      "learning_rate": 0.0001173882037067415,
      "loss": 2.9302,
      "step": 163197
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.379127025604248,
      "learning_rate": 0.00011738495828854776,
      "loss": 2.8652,
      "step": 163198
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.6884472370147705,
      "learning_rate": 0.00011738171290430547,
      "loss": 2.8293,
      "step": 163199
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.684156894683838,
      "learning_rate": 0.00011737846755401547,
      "loss": 2.786,
      "step": 163200
    },
    {
      "epoch": 2.12,
      "grad_norm": 2.209324598312378,
      "learning_rate": 0.00011737522223767827,
      "loss": 3.2327,
      "step": 163201
    },
    {
      "epoch": 2.12,
      "grad_norm": 3.9361624717712402,
      "learning_rate": 0.00011737197695529449,
      "loss": 2.7459,
      "step": 163202
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.488565683364868,
      "learning_rate": 0.00011736873170686458,
      "loss": 3.2416,
      "step": 163203
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.618177652359009,
      "learning_rate": 0.00011736548649238935,
      "loss": 2.8333,
      "step": 163204
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.1876511573791504,
      "learning_rate": 0.00011736224131186921,
      "loss": 2.7317,
      "step": 163205
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7819623947143555,
      "learning_rate": 0.00011735899616530501,
      "loss": 2.8619,
      "step": 163206
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9016964435577393,
      "learning_rate": 0.00011735575105269722,
      "loss": 3.016,
      "step": 163207
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.010265588760376,
      "learning_rate": 0.00011735250597404646,
      "loss": 2.7671,
      "step": 163208
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.474949359893799,
      "learning_rate": 0.0001173492609293532,
      "loss": 2.825,
      "step": 163209
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.560713529586792,
      "learning_rate": 0.00011734601591861825,
      "loss": 3.0989,
      "step": 163210
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.161914348602295,
      "learning_rate": 0.00011734277094184204,
      "loss": 2.8779,
      "step": 163211
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7619504928588867,
      "learning_rate": 0.00011733952599902535,
      "loss": 2.6238,
      "step": 163212
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.317140817642212,
      "learning_rate": 0.00011733628109016857,
      "loss": 3.0056,
      "step": 163213
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2880077362060547,
      "learning_rate": 0.00011733303621527265,
      "loss": 3.0047,
      "step": 163214
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.1699934005737305,
      "learning_rate": 0.00011732979137433774,
      "loss": 2.8419,
      "step": 163215
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2961127758026123,
      "learning_rate": 0.0001173265465673648,
      "loss": 2.9404,
      "step": 163216
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3812427520751953,
      "learning_rate": 0.00011732330179435418,
      "loss": 3.0317,
      "step": 163217
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2491707801818848,
      "learning_rate": 0.00011732005705530674,
      "loss": 2.8458,
      "step": 163218
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.102822780609131,
      "learning_rate": 0.00011731681235022282,
      "loss": 3.1602,
      "step": 163219
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.580155849456787,
      "learning_rate": 0.00011731356767910337,
      "loss": 3.1224,
      "step": 163220
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2678372859954834,
      "learning_rate": 0.00011731032304194855,
      "loss": 3.1741,
      "step": 163221
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6695306301116943,
      "learning_rate": 0.00011730707843875933,
      "loss": 2.9621,
      "step": 163222
    },
    {
      "epoch": 2.13,
      "grad_norm": 1.9683282375335693,
      "learning_rate": 0.00011730383386953604,
      "loss": 2.866,
      "step": 163223
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7128357887268066,
      "learning_rate": 0.0001173005893342795,
      "loss": 2.7675,
      "step": 163224
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9611871242523193,
      "learning_rate": 0.00011729734483299016,
      "loss": 2.9757,
      "step": 163225
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0988149642944336,
      "learning_rate": 0.00011729410036566889,
      "loss": 3.1357,
      "step": 163226
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.424290418624878,
      "learning_rate": 0.00011729085593231588,
      "loss": 2.8947,
      "step": 163227
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2741472721099854,
      "learning_rate": 0.00011728761153293207,
      "loss": 2.7106,
      "step": 163228
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3876287937164307,
      "learning_rate": 0.00011728436716751785,
      "loss": 3.1364,
      "step": 163229
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6036226749420166,
      "learning_rate": 0.00011728112283607399,
      "loss": 2.9051,
      "step": 163230
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.1948940753936768,
      "learning_rate": 0.00011727787853860091,
      "loss": 2.9763,
      "step": 163231
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.353660821914673,
      "learning_rate": 0.00011727463427509957,
      "loss": 2.8879,
      "step": 163232
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6971230506896973,
      "learning_rate": 0.00011727139004557008,
      "loss": 2.8046,
      "step": 163233
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7954819202423096,
      "learning_rate": 0.00011726814585001338,
      "loss": 3.0404,
      "step": 163234
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.802391290664673,
      "learning_rate": 0.00011726490168842992,
      "loss": 3.0362,
      "step": 163235
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.926661252975464,
      "learning_rate": 0.00011726165756082044,
      "loss": 2.7089,
      "step": 163236
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.656691312789917,
      "learning_rate": 0.00011725841346718537,
      "loss": 2.939,
      "step": 163237
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7941577434539795,
      "learning_rate": 0.0001172551694075256,
      "loss": 3.0774,
      "step": 163238
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3705124855041504,
      "learning_rate": 0.00011725192538184136,
      "loss": 2.892,
      "step": 163239
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0036141872406006,
      "learning_rate": 0.00011724868139013354,
      "loss": 2.9704,
      "step": 163240
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4038264751434326,
      "learning_rate": 0.00011724543743240254,
      "loss": 2.7963,
      "step": 163241
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7081613540649414,
      "learning_rate": 0.00011724219350864915,
      "loss": 2.9644,
      "step": 163242
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.394348621368408,
      "learning_rate": 0.00011723894961887379,
      "loss": 3.2439,
      "step": 163243
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0035817623138428,
      "learning_rate": 0.00011723570576307728,
      "loss": 3.0,
      "step": 163244
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4800660610198975,
      "learning_rate": 0.00011723246194126006,
      "loss": 2.8942,
      "step": 163245
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.259256601333618,
      "learning_rate": 0.00011722921815342282,
      "loss": 3.0812,
      "step": 163246
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.146244525909424,
      "learning_rate": 0.00011722597439956597,
      "loss": 3.0706,
      "step": 163247
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3105216026306152,
      "learning_rate": 0.0001172227306796904,
      "loss": 3.0468,
      "step": 163248
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.457803249359131,
      "learning_rate": 0.00011721948699379645,
      "loss": 2.9713,
      "step": 163249
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.88008189201355,
      "learning_rate": 0.00011721624334188498,
      "loss": 3.1432,
      "step": 163250
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.45356822013855,
      "learning_rate": 0.00011721299972395644,
      "loss": 2.9962,
      "step": 163251
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8232908248901367,
      "learning_rate": 0.00011720975614001147,
      "loss": 3.0967,
      "step": 163252
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.600461959838867,
      "learning_rate": 0.00011720651259005051,
      "loss": 3.1418,
      "step": 163253
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.115170478820801,
      "learning_rate": 0.00011720326907407445,
      "loss": 3.056,
      "step": 163254
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5528383255004883,
      "learning_rate": 0.00011720002559208366,
      "loss": 2.9745,
      "step": 163255
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7662711143493652,
      "learning_rate": 0.00011719678214407892,
      "loss": 2.9708,
      "step": 163256
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4429402351379395,
      "learning_rate": 0.00011719353873006075,
      "loss": 2.7751,
      "step": 163257
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6339545249938965,
      "learning_rate": 0.00011719029535002964,
      "loss": 2.8976,
      "step": 163258
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.99080228805542,
      "learning_rate": 0.00011718705200398643,
      "loss": 2.8837,
      "step": 163259
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.400643825531006,
      "learning_rate": 0.00011718380869193158,
      "loss": 2.6976,
      "step": 163260
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0129692554473877,
      "learning_rate": 0.0001171805654138656,
      "loss": 2.8671,
      "step": 163261
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.037553071975708,
      "learning_rate": 0.00011717732216978933,
      "loss": 2.8501,
      "step": 163262
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.759305000305176,
      "learning_rate": 0.00011717407895970324,
      "loss": 3.0381,
      "step": 163263
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7885916233062744,
      "learning_rate": 0.00011717083578360783,
      "loss": 3.1517,
      "step": 163264
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.397568464279175,
      "learning_rate": 0.00011716759264150393,
      "loss": 2.7158,
      "step": 163265
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6940340995788574,
      "learning_rate": 0.0001171643495333919,
      "loss": 2.9516,
      "step": 163266
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.551239013671875,
      "learning_rate": 0.00011716110645927258,
      "loss": 3.0053,
      "step": 163267
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.596553325653076,
      "learning_rate": 0.00011715786341914647,
      "loss": 2.8918,
      "step": 163268
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.86320161819458,
      "learning_rate": 0.00011715462041301417,
      "loss": 2.8944,
      "step": 163269
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0143685340881348,
      "learning_rate": 0.00011715137744087614,
      "loss": 2.9205,
      "step": 163270
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.1500742435455322,
      "learning_rate": 0.00011714813450273325,
      "loss": 3.1708,
      "step": 163271
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3537917137145996,
      "learning_rate": 0.00011714489159858587,
      "loss": 2.6612,
      "step": 163272
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2130651473999023,
      "learning_rate": 0.00011714164872843482,
      "loss": 3.1187,
      "step": 163273
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.316694736480713,
      "learning_rate": 0.00011713840589228059,
      "loss": 3.0825,
      "step": 163274
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5421433448791504,
      "learning_rate": 0.00011713516309012374,
      "loss": 3.1316,
      "step": 163275
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.949446678161621,
      "learning_rate": 0.00011713192032196485,
      "loss": 3.0479,
      "step": 163276
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.251978874206543,
      "learning_rate": 0.00011712867758780469,
      "loss": 2.9135,
      "step": 163277
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.716733932495117,
      "learning_rate": 0.00011712543488764364,
      "loss": 3.0713,
      "step": 163278
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.856264114379883,
      "learning_rate": 0.00011712219222148255,
      "loss": 2.8635,
      "step": 163279
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.538799524307251,
      "learning_rate": 0.00011711894958932178,
      "loss": 3.5237,
      "step": 163280
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.921485424041748,
      "learning_rate": 0.00011711570699116226,
      "loss": 2.8749,
      "step": 163281
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3999085426330566,
      "learning_rate": 0.00011711246442700415,
      "loss": 3.1318,
      "step": 163282
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.686403751373291,
      "learning_rate": 0.00011710922189684844,
      "loss": 3.1183,
      "step": 163283
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.354471206665039,
      "learning_rate": 0.00011710597940069543,
      "loss": 2.9407,
      "step": 163284
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.410478353500366,
      "learning_rate": 0.00011710273693854598,
      "loss": 2.7765,
      "step": 163285
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.2605831623077393,
      "learning_rate": 0.00011709949451040051,
      "loss": 2.8789,
      "step": 163286
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.782252788543701,
      "learning_rate": 0.00011709625211625985,
      "loss": 3.1179,
      "step": 163287
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.8623671531677246,
      "learning_rate": 0.00011709300975612428,
      "loss": 2.8477,
      "step": 163288
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.414168119430542,
      "learning_rate": 0.00011708976742999469,
      "loss": 2.9605,
      "step": 163289
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.094717502593994,
      "learning_rate": 0.00011708652513787144,
      "loss": 2.6868,
      "step": 163290
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6186459064483643,
      "learning_rate": 0.00011708328287975536,
      "loss": 2.858,
      "step": 163291
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.161125898361206,
      "learning_rate": 0.00011708004065564683,
      "loss": 2.9603,
      "step": 163292
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.3856101036071777,
      "learning_rate": 0.0001170767984655468,
      "loss": 2.8826,
      "step": 163293
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.178579330444336,
      "learning_rate": 0.00011707355630945545,
      "loss": 2.8258,
      "step": 163294
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.805664539337158,
      "learning_rate": 0.00011707031418737367,
      "loss": 3.0596,
      "step": 163295
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.390606164932251,
      "learning_rate": 0.00011706707209930187,
      "loss": 2.9969,
      "step": 163296
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.642486572265625,
      "learning_rate": 0.00011706383004524087,
      "loss": 2.9061,
      "step": 163297
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.245530366897583,
      "learning_rate": 0.00011706058802519102,
      "loss": 3.2246,
      "step": 163298
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.422151565551758,
      "learning_rate": 0.0001170573460391533,
      "loss": 2.9586,
      "step": 163299
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.430236577987671,
      "learning_rate": 0.00011705410408712787,
      "loss": 2.9265,
      "step": 163300
    },
    {
      "epoch": 2.13,
      "grad_norm": 1.983918309211731,
      "learning_rate": 0.00011705086216911563,
      "loss": 3.193,
      "step": 163301
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.856973171234131,
      "learning_rate": 0.00011704762028511698,
      "loss": 2.9991,
      "step": 163302
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5445773601531982,
      "learning_rate": 0.00011704437843513272,
      "loss": 2.9058,
      "step": 163303
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9185616970062256,
      "learning_rate": 0.00011704113661916328,
      "loss": 2.9989,
      "step": 163304
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6971356868743896,
      "learning_rate": 0.00011703789483720954,
      "loss": 3.107,
      "step": 163305
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.1746323108673096,
      "learning_rate": 0.00011703465308927171,
      "loss": 2.9699,
      "step": 163306
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.732879638671875,
      "learning_rate": 0.00011703141137535068,
      "loss": 2.8924,
      "step": 163307
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3005740642547607,
      "learning_rate": 0.00011702816969544687,
      "loss": 3.0809,
      "step": 163308
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.972907543182373,
      "learning_rate": 0.00011702492804956108,
      "loss": 2.9039,
      "step": 163309
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3971474170684814,
      "learning_rate": 0.00011702168643769372,
      "loss": 2.9562,
      "step": 163310
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.935861587524414,
      "learning_rate": 0.00011701844485984567,
      "loss": 2.9213,
      "step": 163311
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3380448818206787,
      "learning_rate": 0.00011701520331601714,
      "loss": 2.7815,
      "step": 163312
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.1733694076538086,
      "learning_rate": 0.00011701196180620908,
      "loss": 2.9319,
      "step": 163313
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7279815673828125,
      "learning_rate": 0.00011700872033042182,
      "loss": 2.8039,
      "step": 163314
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.783327579498291,
      "learning_rate": 0.0001170054788886562,
      "loss": 2.8972,
      "step": 163315
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9908785820007324,
      "learning_rate": 0.00011700223748091257,
      "loss": 3.0265,
      "step": 163316
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2481329441070557,
      "learning_rate": 0.00011699899610719195,
      "loss": 2.9215,
      "step": 163317
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.019237995147705,
      "learning_rate": 0.00011699575476749444,
      "loss": 2.845,
      "step": 163318
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.957638740539551,
      "learning_rate": 0.00011699251346182097,
      "loss": 2.7615,
      "step": 163319
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6528244018554688,
      "learning_rate": 0.00011698927219017192,
      "loss": 2.9023,
      "step": 163320
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.073615074157715,
      "learning_rate": 0.00011698603095254816,
      "loss": 2.8067,
      "step": 163321
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.847580671310425,
      "learning_rate": 0.00011698278974895005,
      "loss": 2.5799,
      "step": 163322
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.787691593170166,
      "learning_rate": 0.00011697954857937848,
      "loss": 3.0466,
      "step": 163323
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.839935779571533,
      "learning_rate": 0.00011697630744383366,
      "loss": 2.9408,
      "step": 163324
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.2793431282043457,
      "learning_rate": 0.0001169730663423165,
      "loss": 3.0906,
      "step": 163325
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.5890955924987793,
      "learning_rate": 0.0001169698252748274,
      "loss": 3.1294,
      "step": 163326
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.880045175552368,
      "learning_rate": 0.00011696658424136719,
      "loss": 3.0276,
      "step": 163327
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8827731609344482,
      "learning_rate": 0.00011696334324193623,
      "loss": 2.7101,
      "step": 163328
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4954216480255127,
      "learning_rate": 0.00011696010227653534,
      "loss": 3.1417,
      "step": 163329
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.737535238265991,
      "learning_rate": 0.00011695686134516503,
      "loss": 2.8018,
      "step": 163330
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.1530449390411377,
      "learning_rate": 0.00011695362044782586,
      "loss": 3.0761,
      "step": 163331
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.313143253326416,
      "learning_rate": 0.00011695037958451842,
      "loss": 2.9724,
      "step": 163332
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4787797927856445,
      "learning_rate": 0.00011694713875524344,
      "loss": 2.843,
      "step": 163333
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.372420310974121,
      "learning_rate": 0.00011694389796000131,
      "loss": 2.8572,
      "step": 163334
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6504805088043213,
      "learning_rate": 0.00011694065719879292,
      "loss": 2.7965,
      "step": 163335
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7516391277313232,
      "learning_rate": 0.00011693741647161868,
      "loss": 2.7614,
      "step": 163336
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.729018449783325,
      "learning_rate": 0.00011693417577847923,
      "loss": 2.9938,
      "step": 163337
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.2010254859924316,
      "learning_rate": 0.00011693093511937508,
      "loss": 2.8735,
      "step": 163338
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.0863523483276367,
      "learning_rate": 0.00011692769449430705,
      "loss": 3.2196,
      "step": 163339
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.85796856880188,
      "learning_rate": 0.00011692445390327546,
      "loss": 2.8726,
      "step": 163340
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.70798659324646,
      "learning_rate": 0.00011692121334628122,
      "loss": 2.8189,
      "step": 163341
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7313461303710938,
      "learning_rate": 0.00011691797282332478,
      "loss": 3.0157,
      "step": 163342
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.820651054382324,
      "learning_rate": 0.00011691473233440662,
      "loss": 2.744,
      "step": 163343
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6878626346588135,
      "learning_rate": 0.00011691149187952758,
      "loss": 2.6976,
      "step": 163344
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5306074619293213,
      "learning_rate": 0.00011690825145868813,
      "loss": 3.0298,
      "step": 163345
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.781332492828369,
      "learning_rate": 0.00011690501107188881,
      "loss": 3.0653,
      "step": 163346
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5381314754486084,
      "learning_rate": 0.00011690177071913043,
      "loss": 2.85,
      "step": 163347
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0671935081481934,
      "learning_rate": 0.00011689853040041341,
      "loss": 2.8392,
      "step": 163348
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.597980260848999,
      "learning_rate": 0.00011689529011573834,
      "loss": 3.006,
      "step": 163349
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.509840965270996,
      "learning_rate": 0.000116892049865106,
      "loss": 2.8697,
      "step": 163350
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.083369255065918,
      "learning_rate": 0.00011688880964851689,
      "loss": 2.9964,
      "step": 163351
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3666625022888184,
      "learning_rate": 0.00011688556946597148,
      "loss": 2.8109,
      "step": 163352
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.3287582397460938,
      "learning_rate": 0.00011688232931747062,
      "loss": 3.0512,
      "step": 163353
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.747532844543457,
      "learning_rate": 0.00011687908920301477,
      "loss": 2.8842,
      "step": 163354
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.860889196395874,
      "learning_rate": 0.00011687584912260447,
      "loss": 2.8179,
      "step": 163355
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9743597507476807,
      "learning_rate": 0.00011687260907624054,
      "loss": 2.9682,
      "step": 163356
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6803946495056152,
      "learning_rate": 0.00011686936906392328,
      "loss": 2.8209,
      "step": 163357
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.703347682952881,
      "learning_rate": 0.00011686612908565361,
      "loss": 2.9979,
      "step": 163358
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.126023054122925,
      "learning_rate": 0.00011686288914143197,
      "loss": 2.9844,
      "step": 163359
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6695950031280518,
      "learning_rate": 0.00011685964923125895,
      "loss": 2.8648,
      "step": 163360
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.495549201965332,
      "learning_rate": 0.00011685640935513511,
      "loss": 3.0895,
      "step": 163361
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.005265235900879,
      "learning_rate": 0.00011685316951306122,
      "loss": 3.2777,
      "step": 163362
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8893697261810303,
      "learning_rate": 0.00011684992970503769,
      "loss": 3.2909,
      "step": 163363
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2694687843322754,
      "learning_rate": 0.0001168466899310653,
      "loss": 2.9404,
      "step": 163364
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.319732427597046,
      "learning_rate": 0.00011684345019114446,
      "loss": 2.8798,
      "step": 163365
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.461116313934326,
      "learning_rate": 0.00011684021048527609,
      "loss": 3.0333,
      "step": 163366
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.114985227584839,
      "learning_rate": 0.00011683697081346037,
      "loss": 2.9929,
      "step": 163367
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.1407394409179688,
      "learning_rate": 0.00011683373117569826,
      "loss": 2.9324,
      "step": 163368
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3681602478027344,
      "learning_rate": 0.00011683049157199005,
      "loss": 2.9143,
      "step": 163369
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.460141181945801,
      "learning_rate": 0.00011682725200233665,
      "loss": 3.0195,
      "step": 163370
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4482369422912598,
      "learning_rate": 0.00011682401246673842,
      "loss": 2.9369,
      "step": 163371
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.605029821395874,
      "learning_rate": 0.00011682077296519627,
      "loss": 2.8225,
      "step": 163372
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.371427536010742,
      "learning_rate": 0.00011681753349771037,
      "loss": 3.2369,
      "step": 163373
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9049770832061768,
      "learning_rate": 0.00011681429406428167,
      "loss": 2.9816,
      "step": 163374
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.230802536010742,
      "learning_rate": 0.00011681105466491051,
      "loss": 2.9451,
      "step": 163375
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.750678300857544,
      "learning_rate": 0.0001168078152995978,
      "loss": 2.8652,
      "step": 163376
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.80257248878479,
      "learning_rate": 0.00011680457596834382,
      "loss": 3.0786,
      "step": 163377
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.844024419784546,
      "learning_rate": 0.00011680133667114955,
      "loss": 2.9054,
      "step": 163378
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.568746328353882,
      "learning_rate": 0.00011679809740801515,
      "loss": 3.0545,
      "step": 163379
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4046661853790283,
      "learning_rate": 0.00011679485817894159,
      "loss": 2.9997,
      "step": 163380
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.520876884460449,
      "learning_rate": 0.00011679161898392916,
      "loss": 2.8764,
      "step": 163381
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.296478033065796,
      "learning_rate": 0.00011678837982297877,
      "loss": 2.9913,
      "step": 163382
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2676503658294678,
      "learning_rate": 0.00011678514069609077,
      "loss": 3.0695,
      "step": 163383
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6706461906433105,
      "learning_rate": 0.00011678190160326606,
      "loss": 3.0204,
      "step": 163384
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3511455059051514,
      "learning_rate": 0.0001167786625445049,
      "loss": 2.9096,
      "step": 163385
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3109450340270996,
      "learning_rate": 0.00011677542351980807,
      "loss": 2.93,
      "step": 163386
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.14601731300354,
      "learning_rate": 0.0001167721845291761,
      "loss": 2.9084,
      "step": 163387
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7430129051208496,
      "learning_rate": 0.00011676894557260972,
      "loss": 2.9421,
      "step": 163388
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.464783191680908,
      "learning_rate": 0.00011676570665010934,
      "loss": 2.877,
      "step": 163389
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.329366445541382,
      "learning_rate": 0.00011676246776167593,
      "loss": 2.8231,
      "step": 163390
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8293190002441406,
      "learning_rate": 0.00011675922890730958,
      "loss": 3.275,
      "step": 163391
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.673365592956543,
      "learning_rate": 0.00011675599008701127,
      "loss": 2.9259,
      "step": 163392
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7263317108154297,
      "learning_rate": 0.00011675275130078135,
      "loss": 2.8913,
      "step": 163393
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.059805154800415,
      "learning_rate": 0.0001167495125486207,
      "loss": 2.9772,
      "step": 163394
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7320985794067383,
      "learning_rate": 0.00011674627383052965,
      "loss": 3.1405,
      "step": 163395
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3267815113067627,
      "learning_rate": 0.00011674303514650915,
      "loss": 3.0756,
      "step": 163396
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4689457416534424,
      "learning_rate": 0.00011673979649655932,
      "loss": 3.1226,
      "step": 163397
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.4620697498321533,
      "learning_rate": 0.00011673655788068119,
      "loss": 2.6927,
      "step": 163398
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.8790929317474365,
      "learning_rate": 0.00011673331929887505,
      "loss": 2.8754,
      "step": 163399
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.154207944869995,
      "learning_rate": 0.00011673008075114176,
      "loss": 2.9518,
      "step": 163400
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.386857271194458,
      "learning_rate": 0.00011672684223748168,
      "loss": 2.891,
      "step": 163401
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.203962564468384,
      "learning_rate": 0.00011672360375789578,
      "loss": 3.0071,
      "step": 163402
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.622227191925049,
      "learning_rate": 0.0001167203653123842,
      "loss": 2.8271,
      "step": 163403
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5683329105377197,
      "learning_rate": 0.0001167171269009479,
      "loss": 3.1826,
      "step": 163404
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.520364284515381,
      "learning_rate": 0.00011671388852358719,
      "loss": 2.8512,
      "step": 163405
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.454684257507324,
      "learning_rate": 0.00011671065018030296,
      "loss": 2.9661,
      "step": 163406
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.737724542617798,
      "learning_rate": 0.00011670741187109557,
      "loss": 2.7745,
      "step": 163407
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.58823823928833,
      "learning_rate": 0.00011670417359596596,
      "loss": 2.7844,
      "step": 163408
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.569821834564209,
      "learning_rate": 0.00011670093535491425,
      "loss": 2.9736,
      "step": 163409
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5251874923706055,
      "learning_rate": 0.00011669769714794145,
      "loss": 2.75,
      "step": 163410
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.598790168762207,
      "learning_rate": 0.00011669445897504789,
      "loss": 2.89,
      "step": 163411
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.282909631729126,
      "learning_rate": 0.00011669122083623436,
      "loss": 2.8503,
      "step": 163412
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3567819595336914,
      "learning_rate": 0.00011668798273150132,
      "loss": 2.908,
      "step": 163413
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8560686111450195,
      "learning_rate": 0.00011668474466084954,
      "loss": 2.9663,
      "step": 163414
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6592631340026855,
      "learning_rate": 0.00011668150662427953,
      "loss": 2.9661,
      "step": 163415
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.06072735786438,
      "learning_rate": 0.0001166782686217919,
      "loss": 3.0715,
      "step": 163416
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8070614337921143,
      "learning_rate": 0.00011667503065338713,
      "loss": 2.96,
      "step": 163417
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.006540060043335,
      "learning_rate": 0.00011667179271906603,
      "loss": 2.8699,
      "step": 163418
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4640049934387207,
      "learning_rate": 0.000116668554818829,
      "loss": 2.9516,
      "step": 163419
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.690821647644043,
      "learning_rate": 0.00011666531695267684,
      "loss": 3.0359,
      "step": 163420
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2682301998138428,
      "learning_rate": 0.00011666207912061008,
      "loss": 3.0434,
      "step": 163421
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.114647626876831,
      "learning_rate": 0.00011665884132262929,
      "loss": 3.0158,
      "step": 163422
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.641925811767578,
      "learning_rate": 0.00011665560355873496,
      "loss": 2.99,
      "step": 163423
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.417170286178589,
      "learning_rate": 0.00011665236582892794,
      "loss": 2.7883,
      "step": 163424
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.122770309448242,
      "learning_rate": 0.00011664912813320861,
      "loss": 2.6142,
      "step": 163425
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.4800872802734375,
      "learning_rate": 0.00011664589047157775,
      "loss": 2.9492,
      "step": 163426
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3455934524536133,
      "learning_rate": 0.00011664265284403588,
      "loss": 2.7209,
      "step": 163427
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.737396001815796,
      "learning_rate": 0.00011663941525058352,
      "loss": 3.1035,
      "step": 163428
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.4234237670898438,
      "learning_rate": 0.00011663617769122145,
      "loss": 3.1049,
      "step": 163429
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.3826375007629395,
      "learning_rate": 0.00011663294016595017,
      "loss": 2.8131,
      "step": 163430
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.879671096801758,
      "learning_rate": 0.00011662970267477016,
      "loss": 2.8532,
      "step": 163431
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.470823049545288,
      "learning_rate": 0.00011662646521768228,
      "loss": 2.8911,
      "step": 163432
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6898016929626465,
      "learning_rate": 0.000116623227794687,
      "loss": 2.8267,
      "step": 163433
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.1043553352355957,
      "learning_rate": 0.00011661999040578485,
      "loss": 2.9377,
      "step": 163434
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.194200277328491,
      "learning_rate": 0.00011661675305097655,
      "loss": 3.2722,
      "step": 163435
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7237613201141357,
      "learning_rate": 0.00011661351573026273,
      "loss": 2.8381,
      "step": 163436
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6698014736175537,
      "learning_rate": 0.00011661027844364375,
      "loss": 3.2023,
      "step": 163437
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.579490900039673,
      "learning_rate": 0.0001166070411911205,
      "loss": 2.7397,
      "step": 163438
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.404134750366211,
      "learning_rate": 0.00011660380397269348,
      "loss": 2.9242,
      "step": 163439
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.068249464035034,
      "learning_rate": 0.00011660056678836315,
      "loss": 2.8723,
      "step": 163440
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3103652000427246,
      "learning_rate": 0.00011659732963813034,
      "loss": 2.8113,
      "step": 163441
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.36942982673645,
      "learning_rate": 0.00011659409252199547,
      "loss": 3.1527,
      "step": 163442
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.937513589859009,
      "learning_rate": 0.00011659085543995931,
      "loss": 2.9019,
      "step": 163443
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.802626371383667,
      "learning_rate": 0.00011658761839202238,
      "loss": 3.0558,
      "step": 163444
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.636462926864624,
      "learning_rate": 0.00011658438137818527,
      "loss": 2.805,
      "step": 163445
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.520820140838623,
      "learning_rate": 0.00011658114439844848,
      "loss": 3.0148,
      "step": 163446
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4010283946990967,
      "learning_rate": 0.00011657790745281281,
      "loss": 3.0357,
      "step": 163447
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.3962764739990234,
      "learning_rate": 0.00011657467054127867,
      "loss": 2.8976,
      "step": 163448
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.1416015625,
      "learning_rate": 0.00011657143366384688,
      "loss": 2.7085,
      "step": 163449
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.490541696548462,
      "learning_rate": 0.00011656819682051778,
      "loss": 3.0705,
      "step": 163450
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5533945560455322,
      "learning_rate": 0.00011656496001129236,
      "loss": 3.0246,
      "step": 163451
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.712136745452881,
      "learning_rate": 0.00011656172323617077,
      "loss": 2.8734,
      "step": 163452
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4522807598114014,
      "learning_rate": 0.00011655848649515389,
      "loss": 3.0797,
      "step": 163453
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5325405597686768,
      "learning_rate": 0.00011655524978824217,
      "loss": 2.8698,
      "step": 163454
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.009974718093872,
      "learning_rate": 0.0001165520131154364,
      "loss": 2.9654,
      "step": 163455
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.632615566253662,
      "learning_rate": 0.00011654877647673698,
      "loss": 3.0814,
      "step": 163456
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.590035915374756,
      "learning_rate": 0.0001165455398721448,
      "loss": 3.0297,
      "step": 163457
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4131903648376465,
      "learning_rate": 0.00011654230330166004,
      "loss": 2.962,
      "step": 163458
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4411826133728027,
      "learning_rate": 0.00011653906676528364,
      "loss": 2.8802,
      "step": 163459
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6848509311676025,
      "learning_rate": 0.00011653583026301602,
      "loss": 2.8773,
      "step": 163460
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4731526374816895,
      "learning_rate": 0.0001165325937948579,
      "loss": 3.0175,
      "step": 163461
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.19555926322937,
      "learning_rate": 0.00011652935736080979,
      "loss": 2.9659,
      "step": 163462
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5269293785095215,
      "learning_rate": 0.0001165261209608725,
      "loss": 3.1952,
      "step": 163463
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.1874842643737793,
      "learning_rate": 0.00011652288459504627,
      "loss": 2.9469,
      "step": 163464
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.300680637359619,
      "learning_rate": 0.00011651964826333203,
      "loss": 3.2372,
      "step": 163465
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7869067192077637,
      "learning_rate": 0.00011651641196573012,
      "loss": 2.9658,
      "step": 163466
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.17352032661438,
      "learning_rate": 0.00011651317570224139,
      "loss": 2.9602,
      "step": 163467
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0956151485443115,
      "learning_rate": 0.0001165099394728662,
      "loss": 2.925,
      "step": 163468
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3572030067443848,
      "learning_rate": 0.0001165067032776055,
      "loss": 3.0835,
      "step": 163469
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5596506595611572,
      "learning_rate": 0.00011650346711645945,
      "loss": 3.0262,
      "step": 163470
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.050462484359741,
      "learning_rate": 0.00011650023098942897,
      "loss": 2.6403,
      "step": 163471
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.41049861907959,
      "learning_rate": 0.00011649699489651446,
      "loss": 2.8629,
      "step": 163472
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.3119914531707764,
      "learning_rate": 0.00011649375883771673,
      "loss": 2.9043,
      "step": 163473
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.674102544784546,
      "learning_rate": 0.00011649052281303618,
      "loss": 2.9038,
      "step": 163474
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.265444278717041,
      "learning_rate": 0.0001164872868224737,
      "loss": 2.7966,
      "step": 163475
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8071603775024414,
      "learning_rate": 0.0001164840508660295,
      "loss": 2.8801,
      "step": 163476
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3440446853637695,
      "learning_rate": 0.00011648081494370447,
      "loss": 2.8984,
      "step": 163477
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.1242246627807617,
      "learning_rate": 0.00011647757905549904,
      "loss": 3.463,
      "step": 163478
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2146801948547363,
      "learning_rate": 0.00011647434320141397,
      "loss": 3.142,
      "step": 163479
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.1317687034606934,
      "learning_rate": 0.0001164711073814497,
      "loss": 2.9918,
      "step": 163480
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.616196870803833,
      "learning_rate": 0.00011646787159560702,
      "loss": 3.0007,
      "step": 163481
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.143752098083496,
      "learning_rate": 0.00011646463584388643,
      "loss": 2.8788,
      "step": 163482
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7932627201080322,
      "learning_rate": 0.0001164614001262885,
      "loss": 3.0646,
      "step": 163483
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.164039373397827,
      "learning_rate": 0.00011645816444281378,
      "loss": 3.0951,
      "step": 163484
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.202605962753296,
      "learning_rate": 0.00011645492879346306,
      "loss": 2.7806,
      "step": 163485
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.383511543273926,
      "learning_rate": 0.00011645169317823671,
      "loss": 3.2642,
      "step": 163486
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.099072217941284,
      "learning_rate": 0.00011644845759713555,
      "loss": 2.7312,
      "step": 163487
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7723429203033447,
      "learning_rate": 0.00011644522205016012,
      "loss": 2.9992,
      "step": 163488
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.671144962310791,
      "learning_rate": 0.00011644198653731097,
      "loss": 2.7261,
      "step": 163489
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3475534915924072,
      "learning_rate": 0.0001164387510585886,
      "loss": 2.8633,
      "step": 163490
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.299196720123291,
      "learning_rate": 0.00011643551561399386,
      "loss": 3.1063,
      "step": 163491
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.35098934173584,
      "learning_rate": 0.00011643228020352711,
      "loss": 2.9703,
      "step": 163492
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5244522094726562,
      "learning_rate": 0.00011642904482718918,
      "loss": 2.7387,
      "step": 163493
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.323925256729126,
      "learning_rate": 0.00011642580948498054,
      "loss": 3.0261,
      "step": 163494
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.859760046005249,
      "learning_rate": 0.00011642257417690179,
      "loss": 2.9405,
      "step": 163495
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.453720808029175,
      "learning_rate": 0.00011641933890295348,
      "loss": 2.8814,
      "step": 163496
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7303102016448975,
      "learning_rate": 0.00011641610366313638,
      "loss": 3.0147,
      "step": 163497
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.207279920578003,
      "learning_rate": 0.00011641286845745086,
      "loss": 3.0163,
      "step": 163498
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2418081760406494,
      "learning_rate": 0.00011640963328589778,
      "loss": 3.159,
      "step": 163499
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.7768990993499756,
      "learning_rate": 0.00011640639814847762,
      "loss": 2.6465,
      "step": 163500
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.675786018371582,
      "learning_rate": 0.00011640316304519096,
      "loss": 2.9054,
      "step": 163501
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.5016825199127197,
      "learning_rate": 0.00011639992797603833,
      "loss": 3.0779,
      "step": 163502
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.565495729446411,
      "learning_rate": 0.0001163966929410205,
      "loss": 2.9592,
      "step": 163503
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8955917358398438,
      "learning_rate": 0.00011639345794013793,
      "loss": 2.9347,
      "step": 163504
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.364259719848633,
      "learning_rate": 0.00011639022297339134,
      "loss": 2.8411,
      "step": 163505
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.059377670288086,
      "learning_rate": 0.00011638698804078129,
      "loss": 3.1201,
      "step": 163506
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6166136264801025,
      "learning_rate": 0.00011638375314230837,
      "loss": 2.8866,
      "step": 163507
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.617250919342041,
      "learning_rate": 0.00011638051827797306,
      "loss": 3.0027,
      "step": 163508
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.709852457046509,
      "learning_rate": 0.0001163772834477762,
      "loss": 3.0178,
      "step": 163509
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.734025478363037,
      "learning_rate": 0.00011637404865171815,
      "loss": 2.8673,
      "step": 163510
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.098785400390625,
      "learning_rate": 0.00011637081388979972,
      "loss": 2.8369,
      "step": 163511
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3650615215301514,
      "learning_rate": 0.00011636757916202145,
      "loss": 2.8793,
      "step": 163512
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5636799335479736,
      "learning_rate": 0.00011636434446838391,
      "loss": 2.9694,
      "step": 163513
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.127767562866211,
      "learning_rate": 0.00011636110980888759,
      "loss": 3.1126,
      "step": 163514
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.112572431564331,
      "learning_rate": 0.00011635787518353332,
      "loss": 3.0766,
      "step": 163515
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.685018539428711,
      "learning_rate": 0.00011635464059232147,
      "loss": 2.8159,
      "step": 163516
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.856053590774536,
      "learning_rate": 0.00011635140603525289,
      "loss": 2.7027,
      "step": 163517
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7341549396514893,
      "learning_rate": 0.00011634817151232803,
      "loss": 2.902,
      "step": 163518
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.0551564693450928,
      "learning_rate": 0.00011634493702354741,
      "loss": 3.0131,
      "step": 163519
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.703801155090332,
      "learning_rate": 0.00011634170256891182,
      "loss": 3.046,
      "step": 163520
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3455770015716553,
      "learning_rate": 0.00011633846814842179,
      "loss": 2.9607,
      "step": 163521
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.399277925491333,
      "learning_rate": 0.0001163352337620778,
      "loss": 3.0303,
      "step": 163522
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8524649143218994,
      "learning_rate": 0.00011633199940988067,
      "loss": 3.2827,
      "step": 163523
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4213008880615234,
      "learning_rate": 0.00011632876509183087,
      "loss": 2.9959,
      "step": 163524
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.838529348373413,
      "learning_rate": 0.00011632553080792895,
      "loss": 2.8226,
      "step": 163525
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.656139373779297,
      "learning_rate": 0.00011632229655817567,
      "loss": 3.0286,
      "step": 163526
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.302213430404663,
      "learning_rate": 0.0001163190623425714,
      "loss": 3.0337,
      "step": 163527
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.40897798538208,
      "learning_rate": 0.00011631582816111702,
      "loss": 3.0169,
      "step": 163528
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.274923801422119,
      "learning_rate": 0.00011631259401381298,
      "loss": 2.7728,
      "step": 163529
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.4650604724884033,
      "learning_rate": 0.00011630935990065993,
      "loss": 2.9442,
      "step": 163530
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.487680435180664,
      "learning_rate": 0.0001163061258216583,
      "loss": 3.0398,
      "step": 163531
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.624138355255127,
      "learning_rate": 0.00011630289177680895,
      "loss": 2.8928,
      "step": 163532
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9790334701538086,
      "learning_rate": 0.00011629965776611224,
      "loss": 2.8383,
      "step": 163533
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3387374877929688,
      "learning_rate": 0.00011629642378956902,
      "loss": 2.7748,
      "step": 163534
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.3019251823425293,
      "learning_rate": 0.00011629318984717977,
      "loss": 2.8836,
      "step": 163535
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3772220611572266,
      "learning_rate": 0.00011628995593894504,
      "loss": 2.9858,
      "step": 163536
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2746894359588623,
      "learning_rate": 0.00011628672206486541,
      "loss": 2.8579,
      "step": 163537
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.285649061203003,
      "learning_rate": 0.00011628348822494163,
      "loss": 2.9603,
      "step": 163538
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.788872003555298,
      "learning_rate": 0.0001162802544191741,
      "loss": 3.4664,
      "step": 163539
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.851818084716797,
      "learning_rate": 0.00011627702064756368,
      "loss": 3.0307,
      "step": 163540
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.473595380783081,
      "learning_rate": 0.0001162737869101107,
      "loss": 2.9137,
      "step": 163541
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.632395029067993,
      "learning_rate": 0.0001162705532068161,
      "loss": 2.8841,
      "step": 163542
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.170360565185547,
      "learning_rate": 0.00011626731953768006,
      "loss": 2.9496,
      "step": 163543
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.098964214324951,
      "learning_rate": 0.00011626408590270351,
      "loss": 2.9969,
      "step": 163544
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.966053009033203,
      "learning_rate": 0.00011626085230188684,
      "loss": 2.809,
      "step": 163545
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.281043767929077,
      "learning_rate": 0.00011625761873523084,
      "loss": 2.8265,
      "step": 163546
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.26080322265625,
      "learning_rate": 0.00011625438520273591,
      "loss": 2.9693,
      "step": 163547
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8725554943084717,
      "learning_rate": 0.00011625115170440286,
      "loss": 2.67,
      "step": 163548
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.779108762741089,
      "learning_rate": 0.00011624791824023217,
      "loss": 3.0347,
      "step": 163549
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6397335529327393,
      "learning_rate": 0.0001162446848102245,
      "loss": 2.9094,
      "step": 163550
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.126833915710449,
      "learning_rate": 0.0001162414514143803,
      "loss": 2.9543,
      "step": 163551
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5587475299835205,
      "learning_rate": 0.00011623821805270036,
      "loss": 2.9737,
      "step": 163552
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2450501918792725,
      "learning_rate": 0.00011623498472518513,
      "loss": 3.1581,
      "step": 163553
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6920928955078125,
      "learning_rate": 0.00011623175143183537,
      "loss": 2.687,
      "step": 163554
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.022850751876831,
      "learning_rate": 0.00011622851817265158,
      "loss": 2.9033,
      "step": 163555
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.371331214904785,
      "learning_rate": 0.00011622528494763444,
      "loss": 3.1981,
      "step": 163556
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2722959518432617,
      "learning_rate": 0.00011622205175678432,
      "loss": 3.0221,
      "step": 163557
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2888660430908203,
      "learning_rate": 0.00011621881860010214,
      "loss": 2.9176,
      "step": 163558
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3307294845581055,
      "learning_rate": 0.00011621558547758819,
      "loss": 2.8734,
      "step": 163559
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.324784755706787,
      "learning_rate": 0.00011621235238924337,
      "loss": 3.0039,
      "step": 163560
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0448861122131348,
      "learning_rate": 0.00011620911933506817,
      "loss": 2.8948,
      "step": 163561
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.404674530029297,
      "learning_rate": 0.00011620588631506312,
      "loss": 3.2137,
      "step": 163562
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4717438220977783,
      "learning_rate": 0.00011620265332922878,
      "loss": 2.8431,
      "step": 163563
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8936142921447754,
      "learning_rate": 0.00011619942037756593,
      "loss": 3.2388,
      "step": 163564
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.538952350616455,
      "learning_rate": 0.00011619618746007498,
      "loss": 3.0145,
      "step": 163565
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.8919906616210938,
      "learning_rate": 0.00011619295457675677,
      "loss": 3.1092,
      "step": 163566
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.05851149559021,
      "learning_rate": 0.00011618972172761172,
      "loss": 2.7938,
      "step": 163567
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.896062135696411,
      "learning_rate": 0.00011618648891264046,
      "loss": 3.265,
      "step": 163568
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3082058429718018,
      "learning_rate": 0.0001161832561318435,
      "loss": 2.8958,
      "step": 163569
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2723448276519775,
      "learning_rate": 0.00011618002338522167,
      "loss": 2.7571,
      "step": 163570
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.505751848220825,
      "learning_rate": 0.00011617679067277535,
      "loss": 3.0147,
      "step": 163571
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0194809436798096,
      "learning_rate": 0.00011617355799450531,
      "loss": 3.0705,
      "step": 163572
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0654587745666504,
      "learning_rate": 0.00011617032535041206,
      "loss": 3.0771,
      "step": 163573
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4563379287719727,
      "learning_rate": 0.00011616709274049625,
      "loss": 3.0583,
      "step": 163574
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.762214183807373,
      "learning_rate": 0.00011616386016475833,
      "loss": 2.8877,
      "step": 163575
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7857871055603027,
      "learning_rate": 0.00011616062762319913,
      "loss": 2.784,
      "step": 163576
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.1329307556152344,
      "learning_rate": 0.00011615739511581903,
      "loss": 3.1018,
      "step": 163577
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5132734775543213,
      "learning_rate": 0.00011615416264261885,
      "loss": 2.9532,
      "step": 163578
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8699593544006348,
      "learning_rate": 0.00011615093020359907,
      "loss": 3.129,
      "step": 163579
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2980334758758545,
      "learning_rate": 0.00011614769779876031,
      "loss": 3.0376,
      "step": 163580
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0202763080596924,
      "learning_rate": 0.00011614446542810305,
      "loss": 3.2289,
      "step": 163581
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3587074279785156,
      "learning_rate": 0.00011614123309162812,
      "loss": 3.1566,
      "step": 163582
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9587700366973877,
      "learning_rate": 0.0001161380007893359,
      "loss": 2.9784,
      "step": 163583
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.3729114532470703,
      "learning_rate": 0.0001161347685212272,
      "loss": 3.1072,
      "step": 163584
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8966214656829834,
      "learning_rate": 0.00011613153628730253,
      "loss": 3.0831,
      "step": 163585
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.2739205360412598,
      "learning_rate": 0.00011612830408756245,
      "loss": 2.8318,
      "step": 163586
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.537614107131958,
      "learning_rate": 0.00011612507192200751,
      "loss": 3.1801,
      "step": 163587
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9746975898742676,
      "learning_rate": 0.00011612183979063846,
      "loss": 2.7913,
      "step": 163588
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.154008626937866,
      "learning_rate": 0.00011611860769345577,
      "loss": 3.0591,
      "step": 163589
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5277915000915527,
      "learning_rate": 0.00011611537563046016,
      "loss": 2.8073,
      "step": 163590
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.172032594680786,
      "learning_rate": 0.0001161121436016522,
      "loss": 3.0488,
      "step": 163591
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.579348087310791,
      "learning_rate": 0.00011610891160703248,
      "loss": 2.9201,
      "step": 163592
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8142611980438232,
      "learning_rate": 0.00011610567964660146,
      "loss": 2.8684,
      "step": 163593
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4910550117492676,
      "learning_rate": 0.00011610244772035995,
      "loss": 2.9707,
      "step": 163594
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7350854873657227,
      "learning_rate": 0.0001160992158283084,
      "loss": 2.7545,
      "step": 163595
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4722185134887695,
      "learning_rate": 0.00011609598397044758,
      "loss": 3.029,
      "step": 163596
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.150038003921509,
      "learning_rate": 0.00011609275214677796,
      "loss": 3.0054,
      "step": 163597
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.630114793777466,
      "learning_rate": 0.0001160895203573002,
      "loss": 2.8959,
      "step": 163598
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.968341112136841,
      "learning_rate": 0.00011608628860201475,
      "loss": 2.8675,
      "step": 163599
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.348947763442993,
      "learning_rate": 0.00011608305688092242,
      "loss": 2.7878,
      "step": 163600
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2834651470184326,
      "learning_rate": 0.00011607982519402364,
      "loss": 2.8827,
      "step": 163601
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.349947452545166,
      "learning_rate": 0.0001160765935413192,
      "loss": 2.7317,
      "step": 163602
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2162322998046875,
      "learning_rate": 0.00011607336192280958,
      "loss": 3.0601,
      "step": 163603
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.519869804382324,
      "learning_rate": 0.00011607013033849526,
      "loss": 3.125,
      "step": 163604
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6003780364990234,
      "learning_rate": 0.00011606689878837712,
      "loss": 2.7624,
      "step": 163605
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8135106563568115,
      "learning_rate": 0.00011606366727245561,
      "loss": 3.0812,
      "step": 163606
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.46563458442688,
      "learning_rate": 0.00011606043579073123,
      "loss": 3.0454,
      "step": 163607
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.66105580329895,
      "learning_rate": 0.00011605720434320477,
      "loss": 2.914,
      "step": 163608
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6530423164367676,
      "learning_rate": 0.00011605397292987678,
      "loss": 2.8549,
      "step": 163609
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4009461402893066,
      "learning_rate": 0.0001160507415507477,
      "loss": 2.7087,
      "step": 163610
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.862231731414795,
      "learning_rate": 0.00011604751020581837,
      "loss": 2.9142,
      "step": 163611
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.3541698455810547,
      "learning_rate": 0.00011604427889508918,
      "loss": 3.0313,
      "step": 163612
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7430737018585205,
      "learning_rate": 0.00011604104761856093,
      "loss": 2.9049,
      "step": 163613
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4978699684143066,
      "learning_rate": 0.00011603781637623412,
      "loss": 3.0332,
      "step": 163614
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.947368860244751,
      "learning_rate": 0.00011603458516810926,
      "loss": 3.0733,
      "step": 163615
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.1338250637054443,
      "learning_rate": 0.00011603135399418714,
      "loss": 2.8971,
      "step": 163616
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.645394802093506,
      "learning_rate": 0.00011602812285446829,
      "loss": 3.0196,
      "step": 163617
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5537750720977783,
      "learning_rate": 0.00011602489174895314,
      "loss": 2.9366,
      "step": 163618
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.43019437789917,
      "learning_rate": 0.00011602166067764256,
      "loss": 2.9695,
      "step": 163619
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9615347385406494,
      "learning_rate": 0.00011601842964053704,
      "loss": 2.7986,
      "step": 163620
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7739853858947754,
      "learning_rate": 0.00011601519863763704,
      "loss": 2.9945,
      "step": 163621
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5380988121032715,
      "learning_rate": 0.00011601196766894339,
      "loss": 3.0425,
      "step": 163622
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3274331092834473,
      "learning_rate": 0.0001160087367344566,
      "loss": 2.919,
      "step": 163623
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.3860530853271484,
      "learning_rate": 0.00011600550583417714,
      "loss": 2.7116,
      "step": 163624
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5673794746398926,
      "learning_rate": 0.00011600227496810584,
      "loss": 2.8623,
      "step": 163625
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8736369609832764,
      "learning_rate": 0.0001159990441362431,
      "loss": 2.9187,
      "step": 163626
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.604877233505249,
      "learning_rate": 0.00011599581333858968,
      "loss": 2.9194,
      "step": 163627
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.526399612426758,
      "learning_rate": 0.00011599258257514615,
      "loss": 2.8296,
      "step": 163628
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.983407497406006,
      "learning_rate": 0.00011598935184591307,
      "loss": 2.8714,
      "step": 163629
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.979099988937378,
      "learning_rate": 0.00011598612115089093,
      "loss": 3.1136,
      "step": 163630
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.176579713821411,
      "learning_rate": 0.00011598289049008052,
      "loss": 3.1243,
      "step": 163631
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2907323837280273,
      "learning_rate": 0.00011597965986348229,
      "loss": 3.0198,
      "step": 163632
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5177061557769775,
      "learning_rate": 0.00011597642927109701,
      "loss": 3.0187,
      "step": 163633
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.306567430496216,
      "learning_rate": 0.0001159731987129252,
      "loss": 3.2424,
      "step": 163634
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.331075668334961,
      "learning_rate": 0.00011596996818896743,
      "loss": 2.5538,
      "step": 163635
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.609717845916748,
      "learning_rate": 0.00011596673769922422,
      "loss": 3.1302,
      "step": 163636
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3761301040649414,
      "learning_rate": 0.00011596350724369637,
      "loss": 2.8157,
      "step": 163637
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.550323009490967,
      "learning_rate": 0.00011596027682238429,
      "loss": 2.8348,
      "step": 163638
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.392758369445801,
      "learning_rate": 0.00011595704643528878,
      "loss": 2.9411,
      "step": 163639
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.396343231201172,
      "learning_rate": 0.00011595381608241027,
      "loss": 2.8312,
      "step": 163640
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3958191871643066,
      "learning_rate": 0.00011595058576374948,
      "loss": 3.0878,
      "step": 163641
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.2200822830200195,
      "learning_rate": 0.00011594735547930682,
      "loss": 2.8588,
      "step": 163642
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.338853120803833,
      "learning_rate": 0.00011594412522908311,
      "loss": 2.8688,
      "step": 163643
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.565857410430908,
      "learning_rate": 0.00011594089501307879,
      "loss": 3.089,
      "step": 163644
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.899646282196045,
      "learning_rate": 0.0001159376648312946,
      "loss": 2.8932,
      "step": 163645
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.5493788719177246,
      "learning_rate": 0.00011593443468373111,
      "loss": 2.7723,
      "step": 163646
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.1637043952941895,
      "learning_rate": 0.00011593120457038885,
      "loss": 2.8626,
      "step": 163647
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.594167470932007,
      "learning_rate": 0.00011592797449126838,
      "loss": 2.7948,
      "step": 163648
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4168930053710938,
      "learning_rate": 0.00011592474444637046,
      "loss": 3.1271,
      "step": 163649
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7479183673858643,
      "learning_rate": 0.00011592151443569549,
      "loss": 3.1292,
      "step": 163650
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.0813019275665283,
      "learning_rate": 0.00011591828445924432,
      "loss": 2.8218,
      "step": 163651
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0659453868865967,
      "learning_rate": 0.00011591505451701738,
      "loss": 3.097,
      "step": 163652
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8723325729370117,
      "learning_rate": 0.00011591182460901533,
      "loss": 2.6912,
      "step": 163653
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8295209407806396,
      "learning_rate": 0.00011590859473523865,
      "loss": 2.9996,
      "step": 163654
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.1875193119049072,
      "learning_rate": 0.00011590536489568811,
      "loss": 2.9805,
      "step": 163655
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.158031940460205,
      "learning_rate": 0.00011590213509036415,
      "loss": 2.9731,
      "step": 163656
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4571967124938965,
      "learning_rate": 0.00011589890531926758,
      "loss": 3.1617,
      "step": 163657
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.1148486137390137,
      "learning_rate": 0.00011589567558239888,
      "loss": 2.9896,
      "step": 163658
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4149091243743896,
      "learning_rate": 0.0001158924458797586,
      "loss": 2.8557,
      "step": 163659
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6144790649414062,
      "learning_rate": 0.00011588921621134733,
      "loss": 2.9711,
      "step": 163660
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.440868377685547,
      "learning_rate": 0.00011588598657716585,
      "loss": 2.779,
      "step": 163661
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4183781147003174,
      "learning_rate": 0.00011588275697721451,
      "loss": 2.824,
      "step": 163662
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6398203372955322,
      "learning_rate": 0.00011587952741149412,
      "loss": 2.8067,
      "step": 163663
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.417048931121826,
      "learning_rate": 0.00011587629788000525,
      "loss": 3.1028,
      "step": 163664
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.3533568382263184,
      "learning_rate": 0.0001158730683827484,
      "loss": 3.0314,
      "step": 163665
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0901730060577393,
      "learning_rate": 0.00011586983891972417,
      "loss": 3.1006,
      "step": 163666
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.678870677947998,
      "learning_rate": 0.00011586660949093329,
      "loss": 2.7649,
      "step": 163667
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.1340765953063965,
      "learning_rate": 0.00011586338009637618,
      "loss": 3.072,
      "step": 163668
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9242167472839355,
      "learning_rate": 0.00011586015073605366,
      "loss": 2.803,
      "step": 163669
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7679975032806396,
      "learning_rate": 0.00011585692140996619,
      "loss": 3.0049,
      "step": 163670
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.582829713821411,
      "learning_rate": 0.0001158536921181144,
      "loss": 3.0126,
      "step": 163671
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.230756998062134,
      "learning_rate": 0.00011585046286049879,
      "loss": 2.8811,
      "step": 163672
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6598379611968994,
      "learning_rate": 0.00011584723363712015,
      "loss": 2.6536,
      "step": 163673
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.1314539909362793,
      "learning_rate": 0.0001158440044479789,
      "loss": 2.8447,
      "step": 163674
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.5706729888916016,
      "learning_rate": 0.00011584077529307582,
      "loss": 3.0343,
      "step": 163675
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0582540035247803,
      "learning_rate": 0.0001158375461724114,
      "loss": 3.1222,
      "step": 163676
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.545295000076294,
      "learning_rate": 0.00011583431708598628,
      "loss": 2.7385,
      "step": 163677
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.531507968902588,
      "learning_rate": 0.00011583108803380091,
      "loss": 2.7857,
      "step": 163678
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.653898000717163,
      "learning_rate": 0.00011582785901585616,
      "loss": 3.0256,
      "step": 163679
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3181982040405273,
      "learning_rate": 0.00011582463003215237,
      "loss": 2.7706,
      "step": 163680
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0249643325805664,
      "learning_rate": 0.00011582140108269032,
      "loss": 3.0246,
      "step": 163681
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.479598045349121,
      "learning_rate": 0.00011581817216747049,
      "loss": 3.0969,
      "step": 163682
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4658732414245605,
      "learning_rate": 0.00011581494328649373,
      "loss": 2.8384,
      "step": 163683
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5054924488067627,
      "learning_rate": 0.00011581171443976025,
      "loss": 3.0,
      "step": 163684
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3972387313842773,
      "learning_rate": 0.00011580848562727094,
      "loss": 3.1299,
      "step": 163685
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2152156829833984,
      "learning_rate": 0.00011580525684902621,
      "loss": 2.9562,
      "step": 163686
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3950235843658447,
      "learning_rate": 0.00011580202810502685,
      "loss": 3.0246,
      "step": 163687
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3625612258911133,
      "learning_rate": 0.00011579879939527329,
      "loss": 3.129,
      "step": 163688
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9335508346557617,
      "learning_rate": 0.00011579557071976631,
      "loss": 3.0789,
      "step": 163689
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4229576587677,
      "learning_rate": 0.0001157923420785064,
      "loss": 2.8752,
      "step": 163690
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4297292232513428,
      "learning_rate": 0.00011578911347149419,
      "loss": 2.7702,
      "step": 163691
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.453561544418335,
      "learning_rate": 0.00011578588489873011,
      "loss": 2.7426,
      "step": 163692
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.620248794555664,
      "learning_rate": 0.00011578265636021505,
      "loss": 3.0094,
      "step": 163693
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.6170549392700195,
      "learning_rate": 0.00011577942785594939,
      "loss": 3.3145,
      "step": 163694
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.463068723678589,
      "learning_rate": 0.00011577619938593387,
      "loss": 3.1842,
      "step": 163695
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.589759349822998,
      "learning_rate": 0.00011577297095016908,
      "loss": 2.9973,
      "step": 163696
    },
    {
      "epoch": 2.13,
      "grad_norm": 8.824657440185547,
      "learning_rate": 0.00011576974254865552,
      "loss": 2.8801,
      "step": 163697
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.7677721977233887,
      "learning_rate": 0.00011576651418139377,
      "loss": 2.8016,
      "step": 163698
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6685965061187744,
      "learning_rate": 0.00011576328584838462,
      "loss": 2.8786,
      "step": 163699
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9616785049438477,
      "learning_rate": 0.00011576005754962842,
      "loss": 3.0793,
      "step": 163700
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3727328777313232,
      "learning_rate": 0.00011575682928512601,
      "loss": 2.7174,
      "step": 163701
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8292810916900635,
      "learning_rate": 0.00011575360105487788,
      "loss": 2.9562,
      "step": 163702
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.3849117755889893,
      "learning_rate": 0.00011575037285888456,
      "loss": 2.8763,
      "step": 163703
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.53767991065979,
      "learning_rate": 0.0001157471446971468,
      "loss": 3.0957,
      "step": 163704
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.5831010341644287,
      "learning_rate": 0.00011574391656966512,
      "loss": 2.9367,
      "step": 163705
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.573331594467163,
      "learning_rate": 0.00011574068847644003,
      "loss": 2.8147,
      "step": 163706
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.565155267715454,
      "learning_rate": 0.00011573746041747233,
      "loss": 2.8712,
      "step": 163707
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.750364303588867,
      "learning_rate": 0.00011573423239276253,
      "loss": 2.8582,
      "step": 163708
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.427745819091797,
      "learning_rate": 0.00011573100440231108,
      "loss": 3.0145,
      "step": 163709
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.536475658416748,
      "learning_rate": 0.00011572777644611885,
      "loss": 3.0114,
      "step": 163710
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.953216314315796,
      "learning_rate": 0.00011572454852418616,
      "loss": 2.8848,
      "step": 163711
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.899876594543457,
      "learning_rate": 0.00011572132063651388,
      "loss": 2.8284,
      "step": 163712
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.619516134262085,
      "learning_rate": 0.0001157180927831025,
      "loss": 2.7713,
      "step": 163713
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8404929637908936,
      "learning_rate": 0.00011571486496395259,
      "loss": 3.1008,
      "step": 163714
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5816142559051514,
      "learning_rate": 0.00011571163717906466,
      "loss": 3.2388,
      "step": 163715
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.141857862472534,
      "learning_rate": 0.00011570840942843954,
      "loss": 2.8135,
      "step": 163716
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.1859869956970215,
      "learning_rate": 0.00011570518171207759,
      "loss": 2.691,
      "step": 163717
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6985480785369873,
      "learning_rate": 0.00011570195402997963,
      "loss": 2.8325,
      "step": 163718
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.1186342239379883,
      "learning_rate": 0.00011569872638214613,
      "loss": 2.6689,
      "step": 163719
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.028377056121826,
      "learning_rate": 0.00011569549876857775,
      "loss": 2.9718,
      "step": 163720
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.017666816711426,
      "learning_rate": 0.00011569227118927492,
      "loss": 3.0535,
      "step": 163721
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3728699684143066,
      "learning_rate": 0.00011568904364423849,
      "loss": 2.9948,
      "step": 163722
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.563828706741333,
      "learning_rate": 0.00011568581613346884,
      "loss": 2.9942,
      "step": 163723
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6194469928741455,
      "learning_rate": 0.00011568258865696682,
      "loss": 2.7964,
      "step": 163724
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.378852605819702,
      "learning_rate": 0.00011567936121473286,
      "loss": 2.6737,
      "step": 163725
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.737987518310547,
      "learning_rate": 0.00011567613380676758,
      "loss": 3.0076,
      "step": 163726
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.2667880058288574,
      "learning_rate": 0.00011567290643307145,
      "loss": 3.2522,
      "step": 163727
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.1536178588867188,
      "learning_rate": 0.00011566967909364535,
      "loss": 3.0837,
      "step": 163728
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.603628158569336,
      "learning_rate": 0.00011566645178848964,
      "loss": 2.8682,
      "step": 163729
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.241876602172852,
      "learning_rate": 0.0001156632245176051,
      "loss": 3.0692,
      "step": 163730
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.7653632164001465,
      "learning_rate": 0.00011565999728099226,
      "loss": 2.9272,
      "step": 163731
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.133504629135132,
      "learning_rate": 0.00011565677007865169,
      "loss": 2.8544,
      "step": 163732
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.883925437927246,
      "learning_rate": 0.00011565354291058392,
      "loss": 3.2073,
      "step": 163733
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.524303436279297,
      "learning_rate": 0.00011565031577678973,
      "loss": 2.9285,
      "step": 163734
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8431177139282227,
      "learning_rate": 0.00011564708867726952,
      "loss": 2.9922,
      "step": 163735
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.876889944076538,
      "learning_rate": 0.0001156438616120241,
      "loss": 2.9968,
      "step": 163736
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.231140375137329,
      "learning_rate": 0.00011564063458105397,
      "loss": 3.0759,
      "step": 163737
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2460765838623047,
      "learning_rate": 0.00011563740758435973,
      "loss": 3.0272,
      "step": 163738
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.3954670429229736,
      "learning_rate": 0.00011563418062194188,
      "loss": 2.9136,
      "step": 163739
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.1504099369049072,
      "learning_rate": 0.00011563095369380118,
      "loss": 3.0647,
      "step": 163740
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2365872859954834,
      "learning_rate": 0.00011562772679993809,
      "loss": 3.0745,
      "step": 163741
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.753039598464966,
      "learning_rate": 0.00011562449994035339,
      "loss": 2.9325,
      "step": 163742
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5689868927001953,
      "learning_rate": 0.0001156212731150476,
      "loss": 2.8035,
      "step": 163743
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9576525688171387,
      "learning_rate": 0.00011561804632402126,
      "loss": 3.03,
      "step": 163744
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.30649995803833,
      "learning_rate": 0.00011561481956727492,
      "loss": 2.6344,
      "step": 163745
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.346179723739624,
      "learning_rate": 0.00011561159284480934,
      "loss": 2.9958,
      "step": 163746
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6040050983428955,
      "learning_rate": 0.00011560836615662495,
      "loss": 2.9088,
      "step": 163747
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6876444816589355,
      "learning_rate": 0.0001156051395027226,
      "loss": 2.8336,
      "step": 163748
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.775411367416382,
      "learning_rate": 0.00011560191288310256,
      "loss": 2.7393,
      "step": 163749
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6925694942474365,
      "learning_rate": 0.00011559868629776585,
      "loss": 2.7897,
      "step": 163750
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.432394504547119,
      "learning_rate": 0.00011559545974671262,
      "loss": 2.9134,
      "step": 163751
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2106587886810303,
      "learning_rate": 0.00011559223322994378,
      "loss": 2.9998,
      "step": 163752
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.295417070388794,
      "learning_rate": 0.0001155890067474597,
      "loss": 3.0625,
      "step": 163753
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.475571870803833,
      "learning_rate": 0.00011558578029926123,
      "loss": 2.8678,
      "step": 163754
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.546285629272461,
      "learning_rate": 0.00011558255388534875,
      "loss": 3.2644,
      "step": 163755
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.557626247406006,
      "learning_rate": 0.00011557932750572316,
      "loss": 3.0014,
      "step": 163756
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.897664785385132,
      "learning_rate": 0.0001155761011603846,
      "loss": 2.9061,
      "step": 163757
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.8917579650878906,
      "learning_rate": 0.00011557287484933407,
      "loss": 2.9054,
      "step": 163758
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.6511170864105225,
      "learning_rate": 0.00011556964857257189,
      "loss": 2.8125,
      "step": 163759
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3278536796569824,
      "learning_rate": 0.00011556642233009892,
      "loss": 2.875,
      "step": 163760
    },
    {
      "epoch": 2.13,
      "grad_norm": 5.049887657165527,
      "learning_rate": 0.00011556319612191555,
      "loss": 2.8958,
      "step": 163761
    },
    {
      "epoch": 2.13,
      "grad_norm": 5.162221431732178,
      "learning_rate": 0.00011555996994802265,
      "loss": 2.8567,
      "step": 163762
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.85001540184021,
      "learning_rate": 0.00011555674380842042,
      "loss": 3.0013,
      "step": 163763
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.815258502960205,
      "learning_rate": 0.00011555351770310979,
      "loss": 2.8391,
      "step": 163764
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.281132221221924,
      "learning_rate": 0.00011555029163209109,
      "loss": 2.9394,
      "step": 163765
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.2123990058898926,
      "learning_rate": 0.00011554706559536524,
      "loss": 3.0478,
      "step": 163766
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.439711093902588,
      "learning_rate": 0.00011554383959293255,
      "loss": 3.0877,
      "step": 163767
    },
    {
      "epoch": 2.13,
      "grad_norm": 5.314995765686035,
      "learning_rate": 0.00011554061362479393,
      "loss": 2.9188,
      "step": 163768
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.948706150054932,
      "learning_rate": 0.00011553738769094962,
      "loss": 2.7437,
      "step": 163769
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.056570529937744,
      "learning_rate": 0.00011553416179140046,
      "loss": 2.9637,
      "step": 163770
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0399370193481445,
      "learning_rate": 0.00011553093592614687,
      "loss": 2.8096,
      "step": 163771
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.4511921405792236,
      "learning_rate": 0.00011552771009518971,
      "loss": 3.1489,
      "step": 163772
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.3026673793792725,
      "learning_rate": 0.0001155244842985293,
      "loss": 2.8729,
      "step": 163773
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7888357639312744,
      "learning_rate": 0.00011552125853616647,
      "loss": 2.7459,
      "step": 163774
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.083771228790283,
      "learning_rate": 0.00011551803280810175,
      "loss": 3.0834,
      "step": 163775
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.613811731338501,
      "learning_rate": 0.00011551480711433569,
      "loss": 2.8489,
      "step": 163776
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3538436889648438,
      "learning_rate": 0.00011551158145486879,
      "loss": 2.8503,
      "step": 163777
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.650747060775757,
      "learning_rate": 0.00011550835582970188,
      "loss": 3.0714,
      "step": 163778
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2367029190063477,
      "learning_rate": 0.00011550513023883534,
      "loss": 3.2178,
      "step": 163779
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.2566914558410645,
      "learning_rate": 0.00011550190468226999,
      "loss": 2.8274,
      "step": 163780
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2508649826049805,
      "learning_rate": 0.00011549867916000635,
      "loss": 2.8096,
      "step": 163781
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.347316026687622,
      "learning_rate": 0.00011549545367204496,
      "loss": 3.1483,
      "step": 163782
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6053924560546875,
      "learning_rate": 0.00011549222821838633,
      "loss": 3.1243,
      "step": 163783
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.1467199325561523,
      "learning_rate": 0.00011548900279903129,
      "loss": 2.9307,
      "step": 163784
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3699913024902344,
      "learning_rate": 0.00011548577741398025,
      "loss": 3.0014,
      "step": 163785
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8392562866210938,
      "learning_rate": 0.00011548255206323397,
      "loss": 2.9671,
      "step": 163786
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.239388942718506,
      "learning_rate": 0.00011547932674679298,
      "loss": 3.0008,
      "step": 163787
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.609018564224243,
      "learning_rate": 0.00011547610146465774,
      "loss": 3.004,
      "step": 163788
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2264561653137207,
      "learning_rate": 0.0001154728762168291,
      "loss": 2.8507,
      "step": 163789
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6935157775878906,
      "learning_rate": 0.00011546965100330752,
      "loss": 2.912,
      "step": 163790
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.189131021499634,
      "learning_rate": 0.00011546642582409352,
      "loss": 3.1411,
      "step": 163791
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.0622611045837402,
      "learning_rate": 0.00011546320067918788,
      "loss": 2.6588,
      "step": 163792
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4832911491394043,
      "learning_rate": 0.00011545997556859113,
      "loss": 2.9861,
      "step": 163793
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5230624675750732,
      "learning_rate": 0.00011545675049230377,
      "loss": 2.8748,
      "step": 163794
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5099027156829834,
      "learning_rate": 0.00011545352545032657,
      "loss": 2.997,
      "step": 163795
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2425918579101562,
      "learning_rate": 0.00011545030044266005,
      "loss": 2.9345,
      "step": 163796
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4488747119903564,
      "learning_rate": 0.00011544707546930468,
      "loss": 2.9286,
      "step": 163797
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4787492752075195,
      "learning_rate": 0.0001154438505302613,
      "loss": 3.0401,
      "step": 163798
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.720689296722412,
      "learning_rate": 0.00011544062562553038,
      "loss": 2.9787,
      "step": 163799
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2902631759643555,
      "learning_rate": 0.00011543740075511244,
      "loss": 2.9929,
      "step": 163800
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2949211597442627,
      "learning_rate": 0.00011543417591900825,
      "loss": 2.9837,
      "step": 163801
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2858176231384277,
      "learning_rate": 0.00011543095111721825,
      "loss": 3.1041,
      "step": 163802
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3927645683288574,
      "learning_rate": 0.00011542772634974323,
      "loss": 3.4344,
      "step": 163803
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.735943078994751,
      "learning_rate": 0.00011542450161658366,
      "loss": 2.8511,
      "step": 163804
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.470639228820801,
      "learning_rate": 0.00011542127691774018,
      "loss": 3.01,
      "step": 163805
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.884284019470215,
      "learning_rate": 0.00011541805225321328,
      "loss": 3.1195,
      "step": 163806
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4362800121307373,
      "learning_rate": 0.00011541482762300373,
      "loss": 3.0299,
      "step": 163807
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.655974864959717,
      "learning_rate": 0.00011541160302711192,
      "loss": 3.118,
      "step": 163808
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2114036083221436,
      "learning_rate": 0.0001154083784655387,
      "loss": 2.9022,
      "step": 163809
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9130141735076904,
      "learning_rate": 0.00011540515393828456,
      "loss": 2.8543,
      "step": 163810
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0969316959381104,
      "learning_rate": 0.0001154019294453501,
      "loss": 3.1264,
      "step": 163811
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5188515186309814,
      "learning_rate": 0.00011539870498673575,
      "loss": 2.8149,
      "step": 163812
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6113510131835938,
      "learning_rate": 0.00011539548056244241,
      "loss": 2.7865,
      "step": 163813
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9680614471435547,
      "learning_rate": 0.00011539225617247043,
      "loss": 2.9047,
      "step": 163814
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.368842601776123,
      "learning_rate": 0.00011538903181682057,
      "loss": 3.0659,
      "step": 163815
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.4246742725372314,
      "learning_rate": 0.00011538580749549334,
      "loss": 3.0759,
      "step": 163816
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.232076644897461,
      "learning_rate": 0.00011538258320848956,
      "loss": 2.8405,
      "step": 163817
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.3132944107055664,
      "learning_rate": 0.0001153793589558094,
      "loss": 2.9925,
      "step": 163818
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.110309600830078,
      "learning_rate": 0.00011537613473745383,
      "loss": 3.0106,
      "step": 163819
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4566171169281006,
      "learning_rate": 0.00011537291055342321,
      "loss": 2.7968,
      "step": 163820
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.810576915740967,
      "learning_rate": 0.00011536968640371837,
      "loss": 2.8933,
      "step": 163821
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6200363636016846,
      "learning_rate": 0.0001153664622883397,
      "loss": 3.0095,
      "step": 163822
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4172723293304443,
      "learning_rate": 0.00011536323820728806,
      "loss": 3.0589,
      "step": 163823
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.880505084991455,
      "learning_rate": 0.00011536001416056367,
      "loss": 2.9704,
      "step": 163824
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8183789253234863,
      "learning_rate": 0.00011535679014816747,
      "loss": 2.7684,
      "step": 163825
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.6260769367218018,
      "learning_rate": 0.0001153535661700998,
      "loss": 2.8481,
      "step": 163826
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8970882892608643,
      "learning_rate": 0.00011535034222636152,
      "loss": 2.8045,
      "step": 163827
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.196611166000366,
      "learning_rate": 0.00011534711831695299,
      "loss": 3.0257,
      "step": 163828
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9732563495635986,
      "learning_rate": 0.0001153438944418751,
      "loss": 2.9085,
      "step": 163829
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8552439212799072,
      "learning_rate": 0.00011534067060112807,
      "loss": 2.874,
      "step": 163830
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.571506977081299,
      "learning_rate": 0.00011533744679471282,
      "loss": 2.9186,
      "step": 163831
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7773759365081787,
      "learning_rate": 0.00011533422302262966,
      "loss": 2.9837,
      "step": 163832
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.322050094604492,
      "learning_rate": 0.00011533099928487952,
      "loss": 3.0816,
      "step": 163833
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.94626522064209,
      "learning_rate": 0.00011532777558146272,
      "loss": 3.125,
      "step": 163834
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.158813714981079,
      "learning_rate": 0.00011532455191238017,
      "loss": 3.08,
      "step": 163835
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9913158416748047,
      "learning_rate": 0.00011532132827763204,
      "loss": 2.9559,
      "step": 163836
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3553359508514404,
      "learning_rate": 0.0001153181046772193,
      "loss": 2.9983,
      "step": 163837
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2914226055145264,
      "learning_rate": 0.00011531488111114229,
      "loss": 3.0018,
      "step": 163838
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.44765043258667,
      "learning_rate": 0.00011531165757940181,
      "loss": 3.1249,
      "step": 163839
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.465846300125122,
      "learning_rate": 0.00011530843408199831,
      "loss": 3.1453,
      "step": 163840
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.4516983032226562,
      "learning_rate": 0.00011530521061893268,
      "loss": 3.0242,
      "step": 163841
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5224595069885254,
      "learning_rate": 0.00011530198719020507,
      "loss": 3.1614,
      "step": 163842
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7431797981262207,
      "learning_rate": 0.0001152987637958164,
      "loss": 2.9581,
      "step": 163843
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.037489652633667,
      "learning_rate": 0.00011529554043576709,
      "loss": 2.9805,
      "step": 163844
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3666486740112305,
      "learning_rate": 0.0001152923171100579,
      "loss": 2.8293,
      "step": 163845
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.361185073852539,
      "learning_rate": 0.0001152890938186893,
      "loss": 2.9864,
      "step": 163846
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6699411869049072,
      "learning_rate": 0.00011528587056166211,
      "loss": 2.7452,
      "step": 163847
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.3650243282318115,
      "learning_rate": 0.00011528264733897656,
      "loss": 2.8675,
      "step": 163848
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.484095573425293,
      "learning_rate": 0.00011527942415063355,
      "loss": 2.935,
      "step": 163849
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.341552734375,
      "learning_rate": 0.00011527620099663353,
      "loss": 2.8675,
      "step": 163850
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2152819633483887,
      "learning_rate": 0.00011527297787697719,
      "loss": 3.043,
      "step": 163851
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4885847568511963,
      "learning_rate": 0.00011526975479166501,
      "loss": 3.1293,
      "step": 163852
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.021845579147339,
      "learning_rate": 0.0001152665317406979,
      "loss": 3.089,
      "step": 163853
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6665337085723877,
      "learning_rate": 0.000115263308724076,
      "loss": 2.9038,
      "step": 163854
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4411025047302246,
      "learning_rate": 0.00011526008574180025,
      "loss": 2.9177,
      "step": 163855
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.788609027862549,
      "learning_rate": 0.00011525686279387104,
      "loss": 2.9318,
      "step": 163856
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.586845874786377,
      "learning_rate": 0.00011525363988028917,
      "loss": 2.9017,
      "step": 163857
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4958269596099854,
      "learning_rate": 0.000115250417001055,
      "loss": 3.0885,
      "step": 163858
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0094447135925293,
      "learning_rate": 0.00011524719415616951,
      "loss": 2.8334,
      "step": 163859
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.0980961322784424,
      "learning_rate": 0.00011524397134563284,
      "loss": 2.812,
      "step": 163860
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.3466877937316895,
      "learning_rate": 0.00011524074856944588,
      "loss": 2.732,
      "step": 163861
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.413825035095215,
      "learning_rate": 0.00011523752582760906,
      "loss": 2.9368,
      "step": 163862
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.0049712657928467,
      "learning_rate": 0.00011523430312012319,
      "loss": 3.1354,
      "step": 163863
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.274707555770874,
      "learning_rate": 0.00011523108044698864,
      "loss": 2.8995,
      "step": 163864
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.545121192932129,
      "learning_rate": 0.00011522785780820621,
      "loss": 2.7638,
      "step": 163865
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6530630588531494,
      "learning_rate": 0.00011522463520377643,
      "loss": 2.8716,
      "step": 163866
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5945003032684326,
      "learning_rate": 0.00011522141263369987,
      "loss": 3.0237,
      "step": 163867
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4722390174865723,
      "learning_rate": 0.00011521819009797704,
      "loss": 2.9398,
      "step": 163868
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4877452850341797,
      "learning_rate": 0.00011521496759660872,
      "loss": 2.9956,
      "step": 163869
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.338195323944092,
      "learning_rate": 0.00011521174512959536,
      "loss": 2.9091,
      "step": 163870
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2934672832489014,
      "learning_rate": 0.00011520852269693768,
      "loss": 3.0068,
      "step": 163871
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.033759117126465,
      "learning_rate": 0.00011520530029863625,
      "loss": 3.0693,
      "step": 163872
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.926337480545044,
      "learning_rate": 0.00011520207793469154,
      "loss": 2.764,
      "step": 163873
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0382800102233887,
      "learning_rate": 0.00011519885560510435,
      "loss": 2.9697,
      "step": 163874
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2765121459960938,
      "learning_rate": 0.0001151956333098752,
      "loss": 3.1288,
      "step": 163875
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.28641939163208,
      "learning_rate": 0.00011519241104900454,
      "loss": 3.022,
      "step": 163876
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.316986560821533,
      "learning_rate": 0.00011518918882249323,
      "loss": 2.9774,
      "step": 163877
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.329730987548828,
      "learning_rate": 0.00011518596663034169,
      "loss": 2.8662,
      "step": 163878
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0821495056152344,
      "learning_rate": 0.00011518274447255051,
      "loss": 2.989,
      "step": 163879
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6701204776763916,
      "learning_rate": 0.00011517952234912045,
      "loss": 3.0399,
      "step": 163880
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3092005252838135,
      "learning_rate": 0.000115176300260052,
      "loss": 2.9796,
      "step": 163881
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3628971576690674,
      "learning_rate": 0.00011517307820534562,
      "loss": 3.0844,
      "step": 163882
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.499642848968506,
      "learning_rate": 0.00011516985618500219,
      "loss": 2.9246,
      "step": 163883
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.125274419784546,
      "learning_rate": 0.00011516663419902217,
      "loss": 2.954,
      "step": 163884
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2568533420562744,
      "learning_rate": 0.00011516341224740606,
      "loss": 2.8478,
      "step": 163885
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7913146018981934,
      "learning_rate": 0.00011516019033015466,
      "loss": 3.0626,
      "step": 163886
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.426295518875122,
      "learning_rate": 0.00011515696844726838,
      "loss": 2.9094,
      "step": 163887
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.041733980178833,
      "learning_rate": 0.00011515374659874801,
      "loss": 3.0876,
      "step": 163888
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.7725799083709717,
      "learning_rate": 0.00011515052478459404,
      "loss": 2.7835,
      "step": 163889
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4720077514648438,
      "learning_rate": 0.00011514730300480709,
      "loss": 2.9752,
      "step": 163890
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.8614981174468994,
      "learning_rate": 0.00011514408125938761,
      "loss": 2.8722,
      "step": 163891
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.093986988067627,
      "learning_rate": 0.00011514085954833648,
      "loss": 2.7887,
      "step": 163892
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.1877620220184326,
      "learning_rate": 0.00011513763787165401,
      "loss": 2.7801,
      "step": 163893
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.1887128353118896,
      "learning_rate": 0.00011513441622934106,
      "loss": 2.7005,
      "step": 163894
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.379340887069702,
      "learning_rate": 0.00011513119462139801,
      "loss": 2.9663,
      "step": 163895
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.749316692352295,
      "learning_rate": 0.0001151279730478258,
      "loss": 3.0558,
      "step": 163896
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4731040000915527,
      "learning_rate": 0.00011512475150862453,
      "loss": 2.8775,
      "step": 163897
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.228994846343994,
      "learning_rate": 0.00011512153000379519,
      "loss": 2.738,
      "step": 163898
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.906364917755127,
      "learning_rate": 0.00011511830853333814,
      "loss": 2.8499,
      "step": 163899
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.511514663696289,
      "learning_rate": 0.00011511508709725417,
      "loss": 2.8105,
      "step": 163900
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.560948610305786,
      "learning_rate": 0.00011511186569554372,
      "loss": 2.6786,
      "step": 163901
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2153236865997314,
      "learning_rate": 0.00011510864432820767,
      "loss": 2.8715,
      "step": 163902
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2963833808898926,
      "learning_rate": 0.0001151054229952462,
      "loss": 2.9584,
      "step": 163903
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.980299234390259,
      "learning_rate": 0.00011510220169666022,
      "loss": 2.8416,
      "step": 163904
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.900531053543091,
      "learning_rate": 0.00011509898043245011,
      "loss": 2.6537,
      "step": 163905
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.744617223739624,
      "learning_rate": 0.00011509575920261674,
      "loss": 2.8111,
      "step": 163906
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7025163173675537,
      "learning_rate": 0.0001150925380071604,
      "loss": 2.9353,
      "step": 163907
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.807811975479126,
      "learning_rate": 0.00011508931684608211,
      "loss": 2.8763,
      "step": 163908
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0108494758605957,
      "learning_rate": 0.00011508609571938197,
      "loss": 3.0728,
      "step": 163909
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.768098831176758,
      "learning_rate": 0.00011508287462706093,
      "loss": 2.7457,
      "step": 163910
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8047704696655273,
      "learning_rate": 0.00011507965356911936,
      "loss": 2.8879,
      "step": 163911
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4105005264282227,
      "learning_rate": 0.00011507643254555808,
      "loss": 3.0712,
      "step": 163912
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.39599609375,
      "learning_rate": 0.00011507321155637748,
      "loss": 3.2079,
      "step": 163913
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2954702377319336,
      "learning_rate": 0.00011506999060157846,
      "loss": 3.1648,
      "step": 163914
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.8181962966918945,
      "learning_rate": 0.00011506676968116123,
      "loss": 2.9969,
      "step": 163915
    },
    {
      "epoch": 2.13,
      "grad_norm": 5.339370250701904,
      "learning_rate": 0.00011506354879512663,
      "loss": 3.0563,
      "step": 163916
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.063634157180786,
      "learning_rate": 0.00011506032794347515,
      "loss": 2.996,
      "step": 163917
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3189682960510254,
      "learning_rate": 0.00011505710712620756,
      "loss": 3.0578,
      "step": 163918
    },
    {
      "epoch": 2.13,
      "grad_norm": 5.732846260070801,
      "learning_rate": 0.00011505388634332423,
      "loss": 3.0381,
      "step": 163919
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.994948148727417,
      "learning_rate": 0.00011505066559482606,
      "loss": 2.9127,
      "step": 163920
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.011453151702881,
      "learning_rate": 0.0001150474448807133,
      "loss": 2.8303,
      "step": 163921
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.083449125289917,
      "learning_rate": 0.00011504422420098677,
      "loss": 3.0848,
      "step": 163922
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.514930486679077,
      "learning_rate": 0.00011504100355564693,
      "loss": 2.8416,
      "step": 163923
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.761277914047241,
      "learning_rate": 0.00011503778294469454,
      "loss": 3.0321,
      "step": 163924
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6925439834594727,
      "learning_rate": 0.00011503456236813003,
      "loss": 2.8266,
      "step": 163925
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4347875118255615,
      "learning_rate": 0.00011503134182595429,
      "loss": 2.8538,
      "step": 163926
    },
    {
      "epoch": 2.13,
      "grad_norm": 4.4991583824157715,
      "learning_rate": 0.0001150281213181675,
      "loss": 2.9913,
      "step": 163927
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.204726219177246,
      "learning_rate": 0.00011502490084477058,
      "loss": 2.7468,
      "step": 163928
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7620363235473633,
      "learning_rate": 0.00011502168040576395,
      "loss": 3.0645,
      "step": 163929
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9067118167877197,
      "learning_rate": 0.00011501846000114834,
      "loss": 2.9739,
      "step": 163930
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9484927654266357,
      "learning_rate": 0.00011501523963092422,
      "loss": 2.4885,
      "step": 163931
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6878345012664795,
      "learning_rate": 0.00011501201929509245,
      "loss": 2.8898,
      "step": 163932
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6884748935699463,
      "learning_rate": 0.00011500879899365324,
      "loss": 2.6864,
      "step": 163933
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.966754913330078,
      "learning_rate": 0.00011500557872660749,
      "loss": 2.7567,
      "step": 163934
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5671963691711426,
      "learning_rate": 0.00011500235849395558,
      "loss": 2.9621,
      "step": 163935
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7441141605377197,
      "learning_rate": 0.00011499913829569834,
      "loss": 2.6972,
      "step": 163936
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3799378871917725,
      "learning_rate": 0.00011499591813183614,
      "loss": 2.8776,
      "step": 163937
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5658440589904785,
      "learning_rate": 0.00011499269800236989,
      "loss": 2.944,
      "step": 163938
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.4294347763061523,
      "learning_rate": 0.00011498947790729977,
      "loss": 2.9912,
      "step": 163939
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0211541652679443,
      "learning_rate": 0.00011498625784662674,
      "loss": 3.0275,
      "step": 163940
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.5702755451202393,
      "learning_rate": 0.00011498303782035116,
      "loss": 2.8131,
      "step": 163941
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.6356120109558105,
      "learning_rate": 0.00011497981782847379,
      "loss": 2.8699,
      "step": 163942
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.1819939613342285,
      "learning_rate": 0.00011497659787099507,
      "loss": 2.9972,
      "step": 163943
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.446652889251709,
      "learning_rate": 0.0001149733779479159,
      "loss": 2.9658,
      "step": 163944
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.653463125228882,
      "learning_rate": 0.00011497015805923648,
      "loss": 2.7155,
      "step": 163945
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.6409614086151123,
      "learning_rate": 0.00011496693820495772,
      "loss": 2.8431,
      "step": 163946
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.7610042095184326,
      "learning_rate": 0.00011496371838507996,
      "loss": 2.8265,
      "step": 163947
    },
    {
      "epoch": 2.13,
      "grad_norm": 5.221120834350586,
      "learning_rate": 0.00011496049859960405,
      "loss": 3.3331,
      "step": 163948
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.530363082885742,
      "learning_rate": 0.00011495727884853037,
      "loss": 3.0466,
      "step": 163949
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.295701026916504,
      "learning_rate": 0.00011495405913185973,
      "loss": 2.8309,
      "step": 163950
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.309964179992676,
      "learning_rate": 0.00011495083944959263,
      "loss": 3.0656,
      "step": 163951
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.3167755603790283,
      "learning_rate": 0.00011494761980172967,
      "loss": 2.9843,
      "step": 163952
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.2469594478607178,
      "learning_rate": 0.0001149444001882713,
      "loss": 2.9398,
      "step": 163953
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.9840219020843506,
      "learning_rate": 0.00011494118060921841,
      "loss": 3.0612,
      "step": 163954
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.002837896347046,
      "learning_rate": 0.00011493796106457132,
      "loss": 3.0823,
      "step": 163955
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.4670217037200928,
      "learning_rate": 0.00011493474155433086,
      "loss": 3.0299,
      "step": 163956
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.4375059604644775,
      "learning_rate": 0.00011493152207849753,
      "loss": 2.9376,
      "step": 163957
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.216036081314087,
      "learning_rate": 0.0001149283026370719,
      "loss": 2.8755,
      "step": 163958
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.404336452484131,
      "learning_rate": 0.00011492508323005454,
      "loss": 3.0801,
      "step": 163959
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.500164270401001,
      "learning_rate": 0.00011492186385744617,
      "loss": 2.8566,
      "step": 163960
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.289395332336426,
      "learning_rate": 0.00011491864451924722,
      "loss": 2.9866,
      "step": 163961
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.568741798400879,
      "learning_rate": 0.00011491542521545847,
      "loss": 2.8931,
      "step": 163962
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.350266933441162,
      "learning_rate": 0.00011491220594608046,
      "loss": 3.0231,
      "step": 163963
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.576774835586548,
      "learning_rate": 0.00011490898671111366,
      "loss": 3.1957,
      "step": 163964
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.364640235900879,
      "learning_rate": 0.00011490576751055886,
      "loss": 2.9438,
      "step": 163965
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.875746726989746,
      "learning_rate": 0.00011490254834441662,
      "loss": 3.1303,
      "step": 163966
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.0594370365142822,
      "learning_rate": 0.00011489932921268733,
      "loss": 3.0957,
      "step": 163967
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.86822509765625,
      "learning_rate": 0.00011489611011537189,
      "loss": 2.8502,
      "step": 163968
    },
    {
      "epoch": 2.13,
      "grad_norm": 2.371281147003174,
      "learning_rate": 0.00011489289105247075,
      "loss": 3.0382,
      "step": 163969
    },
    {
      "epoch": 2.13,
      "grad_norm": 3.700591802597046,
      "learning_rate": 0.00011488967202398438,
      "loss": 3.0617,
      "step": 163970
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8969006538391113,
      "learning_rate": 0.00011488645302991366,
      "loss": 3.0006,
      "step": 163971
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3525614738464355,
      "learning_rate": 0.00011488323407025892,
      "loss": 3.0298,
      "step": 163972
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.4688167572021484,
      "learning_rate": 0.000114880015145021,
      "loss": 2.8834,
      "step": 163973
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.854572296142578,
      "learning_rate": 0.00011487679625420034,
      "loss": 2.8745,
      "step": 163974
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.294914960861206,
      "learning_rate": 0.00011487357739779762,
      "loss": 2.7936,
      "step": 163975
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2743823528289795,
      "learning_rate": 0.00011487035857581326,
      "loss": 2.5398,
      "step": 163976
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7078168392181396,
      "learning_rate": 0.00011486713978824813,
      "loss": 2.8661,
      "step": 163977
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.644054889678955,
      "learning_rate": 0.00011486392103510257,
      "loss": 2.6608,
      "step": 163978
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.3071653842926025,
      "learning_rate": 0.00011486070231637741,
      "loss": 2.7771,
      "step": 163979
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4125008583068848,
      "learning_rate": 0.00011485748363207314,
      "loss": 3.0315,
      "step": 163980
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.273454189300537,
      "learning_rate": 0.00011485426498219039,
      "loss": 2.6156,
      "step": 163981
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.801515817642212,
      "learning_rate": 0.00011485104636672956,
      "loss": 2.8921,
      "step": 163982
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.568354368209839,
      "learning_rate": 0.00011484782778569158,
      "loss": 2.8529,
      "step": 163983
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4520695209503174,
      "learning_rate": 0.00011484460923907675,
      "loss": 3.203,
      "step": 163984
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.224682092666626,
      "learning_rate": 0.00011484139072688588,
      "loss": 2.9946,
      "step": 163985
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0582077503204346,
      "learning_rate": 0.00011483817224911941,
      "loss": 2.9664,
      "step": 163986
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.154825687408447,
      "learning_rate": 0.00011483495380577825,
      "loss": 2.907,
      "step": 163987
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6204023361206055,
      "learning_rate": 0.00011483173539686255,
      "loss": 3.0526,
      "step": 163988
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.0560805797576904,
      "learning_rate": 0.0001148285170223732,
      "loss": 2.8165,
      "step": 163989
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.308185577392578,
      "learning_rate": 0.00011482529868231065,
      "loss": 2.9434,
      "step": 163990
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7305452823638916,
      "learning_rate": 0.00011482208037667566,
      "loss": 2.9965,
      "step": 163991
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.52956223487854,
      "learning_rate": 0.00011481886210546863,
      "loss": 2.7382,
      "step": 163992
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.355856418609619,
      "learning_rate": 0.0001148156438686905,
      "loss": 2.854,
      "step": 163993
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0950615406036377,
      "learning_rate": 0.00011481242566634142,
      "loss": 2.9785,
      "step": 163994
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.677450656890869,
      "learning_rate": 0.00011480920749842231,
      "loss": 2.7857,
      "step": 163995
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8366317749023438,
      "learning_rate": 0.00011480598936493358,
      "loss": 2.9571,
      "step": 163996
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.6957719326019287,
      "learning_rate": 0.00011480277126587598,
      "loss": 3.0651,
      "step": 163997
    },
    {
      "epoch": 2.14,
      "grad_norm": 5.018423080444336,
      "learning_rate": 0.00011479955320124997,
      "loss": 2.8589,
      "step": 163998
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.064101457595825,
      "learning_rate": 0.00011479633517105641,
      "loss": 2.9158,
      "step": 163999
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.1261866092681885,
      "learning_rate": 0.00011479311717529549,
      "loss": 2.8441,
      "step": 164000
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.629868745803833,
      "learning_rate": 0.00011478989921396815,
      "loss": 2.9217,
      "step": 164001
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.647867441177368,
      "learning_rate": 0.00011478668128707474,
      "loss": 2.866,
      "step": 164002
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.1645455360412598,
      "learning_rate": 0.0001147834633946161,
      "loss": 2.968,
      "step": 164003
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.084447145462036,
      "learning_rate": 0.0001147802455365926,
      "loss": 2.6482,
      "step": 164004
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9555606842041016,
      "learning_rate": 0.00011477702771300514,
      "loss": 2.7645,
      "step": 164005
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.1575775146484375,
      "learning_rate": 0.00011477380992385394,
      "loss": 3.2116,
      "step": 164006
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.527284622192383,
      "learning_rate": 0.0001147705921691399,
      "loss": 2.8132,
      "step": 164007
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.1976304054260254,
      "learning_rate": 0.0001147673744488634,
      "loss": 2.8997,
      "step": 164008
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3535008430480957,
      "learning_rate": 0.00011476415676302522,
      "loss": 2.9179,
      "step": 164009
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.470757484436035,
      "learning_rate": 0.00011476093911162578,
      "loss": 2.7901,
      "step": 164010
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7139880657196045,
      "learning_rate": 0.000114757721494666,
      "loss": 2.9013,
      "step": 164011
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4851746559143066,
      "learning_rate": 0.00011475450391214604,
      "loss": 2.7516,
      "step": 164012
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.467289924621582,
      "learning_rate": 0.0001147512863640668,
      "loss": 2.9025,
      "step": 164013
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.432007312774658,
      "learning_rate": 0.00011474806885042872,
      "loss": 3.2093,
      "step": 164014
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.3554491996765137,
      "learning_rate": 0.00011474485137123258,
      "loss": 2.6205,
      "step": 164015
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.1036105155944824,
      "learning_rate": 0.00011474163392647877,
      "loss": 2.909,
      "step": 164016
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.325086832046509,
      "learning_rate": 0.00011473841651616806,
      "loss": 2.9775,
      "step": 164017
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.513868808746338,
      "learning_rate": 0.00011473519914030096,
      "loss": 2.6617,
      "step": 164018
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3725802898406982,
      "learning_rate": 0.00011473198179887811,
      "loss": 2.9994,
      "step": 164019
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.737121343612671,
      "learning_rate": 0.00011472876449189998,
      "loss": 3.0537,
      "step": 164020
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3309617042541504,
      "learning_rate": 0.00011472554721936736,
      "loss": 2.9498,
      "step": 164021
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.385124683380127,
      "learning_rate": 0.0001147223299812807,
      "loss": 2.8506,
      "step": 164022
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.032132863998413,
      "learning_rate": 0.0001147191127776407,
      "loss": 3.0909,
      "step": 164023
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9300625324249268,
      "learning_rate": 0.00011471589560844795,
      "loss": 2.8852,
      "step": 164024
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2330949306488037,
      "learning_rate": 0.00011471267847370301,
      "loss": 3.0043,
      "step": 164025
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.688222885131836,
      "learning_rate": 0.00011470946137340635,
      "loss": 2.9036,
      "step": 164026
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.772007465362549,
      "learning_rate": 0.00011470624430755884,
      "loss": 3.0401,
      "step": 164027
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.709291458129883,
      "learning_rate": 0.0001147030272761608,
      "loss": 2.813,
      "step": 164028
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6330325603485107,
      "learning_rate": 0.0001146998102792131,
      "loss": 2.8326,
      "step": 164029
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.525315284729004,
      "learning_rate": 0.00011469659331671618,
      "loss": 2.7683,
      "step": 164030
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.515850782394409,
      "learning_rate": 0.00011469337638867066,
      "loss": 2.866,
      "step": 164031
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.6677823066711426,
      "learning_rate": 0.00011469015949507706,
      "loss": 3.023,
      "step": 164032
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7266130447387695,
      "learning_rate": 0.00011468694263593612,
      "loss": 2.9769,
      "step": 164033
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.516709566116333,
      "learning_rate": 0.0001146837258112483,
      "loss": 3.0472,
      "step": 164034
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4354889392852783,
      "learning_rate": 0.00011468050902101439,
      "loss": 2.7189,
      "step": 164035
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.378190755844116,
      "learning_rate": 0.00011467729226523484,
      "loss": 2.9241,
      "step": 164036
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.276994466781616,
      "learning_rate": 0.0001146740755439103,
      "loss": 2.98,
      "step": 164037
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.69454288482666,
      "learning_rate": 0.00011467085885704124,
      "loss": 2.9504,
      "step": 164038
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0925590991973877,
      "learning_rate": 0.00011466764220462847,
      "loss": 3.1606,
      "step": 164039
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8926327228546143,
      "learning_rate": 0.00011466442558667239,
      "loss": 2.9473,
      "step": 164040
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8881375789642334,
      "learning_rate": 0.00011466120900317376,
      "loss": 2.9245,
      "step": 164041
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3408846855163574,
      "learning_rate": 0.00011465799245413314,
      "loss": 3.0903,
      "step": 164042
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.48475980758667,
      "learning_rate": 0.00011465477593955109,
      "loss": 3.0419,
      "step": 164043
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.727755546569824,
      "learning_rate": 0.00011465155945942811,
      "loss": 2.933,
      "step": 164044
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3660824298858643,
      "learning_rate": 0.00011464834301376499,
      "loss": 3.0846,
      "step": 164045
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.260378837585449,
      "learning_rate": 0.00011464512660256219,
      "loss": 2.7494,
      "step": 164046
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2525858879089355,
      "learning_rate": 0.0001146419102258204,
      "loss": 2.8961,
      "step": 164047
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6381189823150635,
      "learning_rate": 0.00011463869388354023,
      "loss": 3.0489,
      "step": 164048
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4473986625671387,
      "learning_rate": 0.0001146354775757221,
      "loss": 3.1418,
      "step": 164049
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3677597045898438,
      "learning_rate": 0.00011463226130236685,
      "loss": 2.9549,
      "step": 164050
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4593281745910645,
      "learning_rate": 0.00011462904506347493,
      "loss": 3.0524,
      "step": 164051
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.417343854904175,
      "learning_rate": 0.00011462582885904689,
      "loss": 3.0014,
      "step": 164052
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6213274002075195,
      "learning_rate": 0.00011462261268908351,
      "loss": 2.9159,
      "step": 164053
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.1710028648376465,
      "learning_rate": 0.00011461939655358528,
      "loss": 2.9486,
      "step": 164054
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5143773555755615,
      "learning_rate": 0.00011461618045255274,
      "loss": 3.2201,
      "step": 164055
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4545531272888184,
      "learning_rate": 0.0001146129643859866,
      "loss": 3.002,
      "step": 164056
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.866881847381592,
      "learning_rate": 0.00011460974835388731,
      "loss": 2.7877,
      "step": 164057
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2941012382507324,
      "learning_rate": 0.00011460653235625571,
      "loss": 3.1141,
      "step": 164058
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.1117851734161377,
      "learning_rate": 0.0001146033163930922,
      "loss": 3.1064,
      "step": 164059
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6024749279022217,
      "learning_rate": 0.0001146001004643975,
      "loss": 2.6977,
      "step": 164060
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3860249519348145,
      "learning_rate": 0.00011459688457017198,
      "loss": 3.2093,
      "step": 164061
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.739798069000244,
      "learning_rate": 0.00011459366871041651,
      "loss": 3.121,
      "step": 164062
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.340261459350586,
      "learning_rate": 0.0001145904528851315,
      "loss": 3.0762,
      "step": 164063
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5632901191711426,
      "learning_rate": 0.00011458723709431772,
      "loss": 2.88,
      "step": 164064
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.445734739303589,
      "learning_rate": 0.00011458402133797567,
      "loss": 2.7953,
      "step": 164065
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.931525468826294,
      "learning_rate": 0.00011458080561610595,
      "loss": 2.9937,
      "step": 164066
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.569702386856079,
      "learning_rate": 0.00011457758992870906,
      "loss": 2.9911,
      "step": 164067
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.718397855758667,
      "learning_rate": 0.00011457437427578578,
      "loss": 3.0039,
      "step": 164068
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.582339286804199,
      "learning_rate": 0.00011457115865733653,
      "loss": 2.8379,
      "step": 164069
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.73439359664917,
      "learning_rate": 0.00011456794307336211,
      "loss": 2.8219,
      "step": 164070
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.688793182373047,
      "learning_rate": 0.00011456472752386291,
      "loss": 2.8061,
      "step": 164071
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.004940986633301,
      "learning_rate": 0.00011456151200883983,
      "loss": 2.8018,
      "step": 164072
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2280211448669434,
      "learning_rate": 0.00011455829652829303,
      "loss": 3.0089,
      "step": 164073
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.772993803024292,
      "learning_rate": 0.0001145550810822235,
      "loss": 2.9946,
      "step": 164074
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2573230266571045,
      "learning_rate": 0.00011455186567063155,
      "loss": 2.8491,
      "step": 164075
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6516149044036865,
      "learning_rate": 0.00011454865029351802,
      "loss": 2.9896,
      "step": 164076
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0725409984588623,
      "learning_rate": 0.00011454543495088328,
      "loss": 2.7463,
      "step": 164077
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.465087652206421,
      "learning_rate": 0.00011454221964272815,
      "loss": 2.8675,
      "step": 164078
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5985989570617676,
      "learning_rate": 0.00011453900436905314,
      "loss": 3.2064,
      "step": 164079
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.339315414428711,
      "learning_rate": 0.00011453578912985883,
      "loss": 2.9886,
      "step": 164080
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7254555225372314,
      "learning_rate": 0.00011453257392514568,
      "loss": 3.0274,
      "step": 164081
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.7734315395355225,
      "learning_rate": 0.00011452935875491453,
      "loss": 3.2633,
      "step": 164082
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.65736722946167,
      "learning_rate": 0.0001145261436191658,
      "loss": 3.0781,
      "step": 164083
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.8286283016204834,
      "learning_rate": 0.00011452292851790025,
      "loss": 3.1073,
      "step": 164084
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.035895586013794,
      "learning_rate": 0.00011451971345111841,
      "loss": 3.1838,
      "step": 164085
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5363142490386963,
      "learning_rate": 0.00011451649841882082,
      "loss": 2.9718,
      "step": 164086
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2770180702209473,
      "learning_rate": 0.00011451328342100805,
      "loss": 3.0183,
      "step": 164087
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.002779483795166,
      "learning_rate": 0.00011451006845768086,
      "loss": 3.0523,
      "step": 164088
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8824708461761475,
      "learning_rate": 0.00011450685352883961,
      "loss": 2.9772,
      "step": 164089
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0189526081085205,
      "learning_rate": 0.00011450363863448515,
      "loss": 3.0191,
      "step": 164090
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.948054313659668,
      "learning_rate": 0.000114500423774618,
      "loss": 2.9007,
      "step": 164091
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9301271438598633,
      "learning_rate": 0.0001144972089492387,
      "loss": 3.058,
      "step": 164092
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4211297035217285,
      "learning_rate": 0.00011449399415834777,
      "loss": 3.0374,
      "step": 164093
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.300363779067993,
      "learning_rate": 0.00011449077940194601,
      "loss": 2.9223,
      "step": 164094
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4770593643188477,
      "learning_rate": 0.00011448756468003381,
      "loss": 3.0109,
      "step": 164095
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3315751552581787,
      "learning_rate": 0.000114484349992612,
      "loss": 3.0233,
      "step": 164096
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.3784995079040527,
      "learning_rate": 0.00011448113533968101,
      "loss": 2.9152,
      "step": 164097
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.630516529083252,
      "learning_rate": 0.0001144779207212415,
      "loss": 2.9512,
      "step": 164098
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0009078979492188,
      "learning_rate": 0.00011447470613729393,
      "loss": 2.961,
      "step": 164099
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4489517211914062,
      "learning_rate": 0.00011447149158783914,
      "loss": 3.0737,
      "step": 164100
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.355727195739746,
      "learning_rate": 0.00011446827707287746,
      "loss": 3.0145,
      "step": 164101
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4955875873565674,
      "learning_rate": 0.0001144650625924098,
      "loss": 2.8528,
      "step": 164102
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6233975887298584,
      "learning_rate": 0.00011446184814643653,
      "loss": 3.3067,
      "step": 164103
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.126446008682251,
      "learning_rate": 0.00011445863373495834,
      "loss": 2.7568,
      "step": 164104
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.300884962081909,
      "learning_rate": 0.00011445541935797565,
      "loss": 2.9874,
      "step": 164105
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.416504383087158,
      "learning_rate": 0.00011445220501548933,
      "loss": 2.9708,
      "step": 164106
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.806175470352173,
      "learning_rate": 0.00011444899070749973,
      "loss": 3.0109,
      "step": 164107
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.946779489517212,
      "learning_rate": 0.0001144457764340077,
      "loss": 2.666,
      "step": 164108
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0550923347473145,
      "learning_rate": 0.00011444256219501366,
      "loss": 2.9192,
      "step": 164109
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.693568229675293,
      "learning_rate": 0.00011443934799051827,
      "loss": 2.96,
      "step": 164110
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4976511001586914,
      "learning_rate": 0.00011443613382052202,
      "loss": 2.7244,
      "step": 164111
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.055169105529785,
      "learning_rate": 0.00011443291968502569,
      "loss": 2.8223,
      "step": 164112
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5409443378448486,
      "learning_rate": 0.00011442970558402968,
      "loss": 3.0276,
      "step": 164113
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5432426929473877,
      "learning_rate": 0.00011442649151753475,
      "loss": 2.9884,
      "step": 164114
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.622418165206909,
      "learning_rate": 0.00011442327748554151,
      "loss": 3.1768,
      "step": 164115
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.072704553604126,
      "learning_rate": 0.00011442006348805043,
      "loss": 3.0285,
      "step": 164116
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.067870140075684,
      "learning_rate": 0.00011441684952506208,
      "loss": 2.9375,
      "step": 164117
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.1536288261413574,
      "learning_rate": 0.00011441363559657727,
      "loss": 3.0393,
      "step": 164118
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8857734203338623,
      "learning_rate": 0.00011441042170259632,
      "loss": 2.7979,
      "step": 164119
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.848897695541382,
      "learning_rate": 0.0001144072078431201,
      "loss": 3.0382,
      "step": 164120
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.56497859954834,
      "learning_rate": 0.00011440399401814909,
      "loss": 3.0358,
      "step": 164121
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9583752155303955,
      "learning_rate": 0.00011440078022768387,
      "loss": 3.018,
      "step": 164122
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.339186191558838,
      "learning_rate": 0.00011439756647172495,
      "loss": 2.7866,
      "step": 164123
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8593947887420654,
      "learning_rate": 0.00011439435275027313,
      "loss": 3.3059,
      "step": 164124
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.861679792404175,
      "learning_rate": 0.0001143911390633288,
      "loss": 2.9578,
      "step": 164125
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.1239261627197266,
      "learning_rate": 0.00011438792541089277,
      "loss": 2.9232,
      "step": 164126
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.335458755493164,
      "learning_rate": 0.0001143847117929655,
      "loss": 2.8287,
      "step": 164127
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6405189037323,
      "learning_rate": 0.00011438149820954767,
      "loss": 2.9746,
      "step": 164128
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.758100748062134,
      "learning_rate": 0.00011437828466063966,
      "loss": 2.9451,
      "step": 164129
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.376817464828491,
      "learning_rate": 0.00011437507114624238,
      "loss": 2.8387,
      "step": 164130
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.2971127033233643,
      "learning_rate": 0.00011437185766635615,
      "loss": 2.8906,
      "step": 164131
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.664017677307129,
      "learning_rate": 0.0001143686442209818,
      "loss": 2.9959,
      "step": 164132
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8377327919006348,
      "learning_rate": 0.00011436543081011983,
      "loss": 3.1167,
      "step": 164133
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5238022804260254,
      "learning_rate": 0.00011436221743377072,
      "loss": 3.1123,
      "step": 164134
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.306333065032959,
      "learning_rate": 0.00011435900409193531,
      "loss": 2.8639,
      "step": 164135
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.2596073150634766,
      "learning_rate": 0.00011435579078461405,
      "loss": 2.7614,
      "step": 164136
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4673311710357666,
      "learning_rate": 0.00011435257751180741,
      "loss": 3.1325,
      "step": 164137
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.292947292327881,
      "learning_rate": 0.00011434936427351626,
      "loss": 3.0364,
      "step": 164138
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.590183973312378,
      "learning_rate": 0.00011434615106974107,
      "loss": 2.748,
      "step": 164139
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.2762067317962646,
      "learning_rate": 0.00011434293790048235,
      "loss": 2.8767,
      "step": 164140
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.6946423053741455,
      "learning_rate": 0.00011433972476574088,
      "loss": 3.0682,
      "step": 164141
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.865997314453125,
      "learning_rate": 0.00011433651166551715,
      "loss": 3.1054,
      "step": 164142
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.194628953933716,
      "learning_rate": 0.00011433329859981166,
      "loss": 3.0367,
      "step": 164143
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.780031681060791,
      "learning_rate": 0.00011433008556862521,
      "loss": 2.7978,
      "step": 164144
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.629542112350464,
      "learning_rate": 0.00011432687257195821,
      "loss": 2.8116,
      "step": 164145
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.316584587097168,
      "learning_rate": 0.00011432365960981148,
      "loss": 2.8855,
      "step": 164146
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5593338012695312,
      "learning_rate": 0.00011432044668218542,
      "loss": 2.9823,
      "step": 164147
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2246055603027344,
      "learning_rate": 0.00011431723378908066,
      "loss": 2.9458,
      "step": 164148
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6246097087860107,
      "learning_rate": 0.00011431402093049788,
      "loss": 2.7554,
      "step": 164149
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.845148801803589,
      "learning_rate": 0.00011431080810643769,
      "loss": 2.8092,
      "step": 164150
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.51910662651062,
      "learning_rate": 0.00011430759531690047,
      "loss": 2.9399,
      "step": 164151
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.129188060760498,
      "learning_rate": 0.00011430438256188708,
      "loss": 2.8036,
      "step": 164152
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2081336975097656,
      "learning_rate": 0.00011430116984139804,
      "loss": 2.9402,
      "step": 164153
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4378931522369385,
      "learning_rate": 0.00011429795715543379,
      "loss": 3.1025,
      "step": 164154
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.2126340866088867,
      "learning_rate": 0.00011429474450399517,
      "loss": 2.8128,
      "step": 164155
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.926692247390747,
      "learning_rate": 0.00011429153188708255,
      "loss": 2.723,
      "step": 164156
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.7313530445098877,
      "learning_rate": 0.00011428831930469677,
      "loss": 2.8731,
      "step": 164157
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.0624186992645264,
      "learning_rate": 0.00011428510675683827,
      "loss": 2.8314,
      "step": 164158
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.597825288772583,
      "learning_rate": 0.00011428189424350768,
      "loss": 3.1376,
      "step": 164159
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5988240242004395,
      "learning_rate": 0.00011427868176470547,
      "loss": 2.7715,
      "step": 164160
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.457576036453247,
      "learning_rate": 0.00011427546932043248,
      "loss": 2.9382,
      "step": 164161
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.016756534576416,
      "learning_rate": 0.0001142722569106891,
      "loss": 2.9516,
      "step": 164162
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.1470232009887695,
      "learning_rate": 0.0001142690445354761,
      "loss": 2.8502,
      "step": 164163
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5253171920776367,
      "learning_rate": 0.00011426583219479397,
      "loss": 2.9854,
      "step": 164164
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.42700457572937,
      "learning_rate": 0.00011426261988864336,
      "loss": 2.7224,
      "step": 164165
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6195602416992188,
      "learning_rate": 0.00011425940761702472,
      "loss": 3.0112,
      "step": 164166
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9881646633148193,
      "learning_rate": 0.00011425619537993887,
      "loss": 3.1336,
      "step": 164167
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.378415822982788,
      "learning_rate": 0.00011425298317738619,
      "loss": 2.9795,
      "step": 164168
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5617480278015137,
      "learning_rate": 0.00011424977100936747,
      "loss": 2.988,
      "step": 164169
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6998064517974854,
      "learning_rate": 0.00011424655887588324,
      "loss": 2.9568,
      "step": 164170
    },
    {
      "epoch": 2.14,
      "grad_norm": 6.06252908706665,
      "learning_rate": 0.00011424334677693412,
      "loss": 3.1083,
      "step": 164171
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.172199249267578,
      "learning_rate": 0.00011424013471252051,
      "loss": 3.2269,
      "step": 164172
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.3627092838287354,
      "learning_rate": 0.0001142369226826433,
      "loss": 3.1682,
      "step": 164173
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.0795271396636963,
      "learning_rate": 0.00011423371068730283,
      "loss": 3.1448,
      "step": 164174
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.657958984375,
      "learning_rate": 0.00011423049872649993,
      "loss": 2.9145,
      "step": 164175
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.7409636974334717,
      "learning_rate": 0.00011422728680023511,
      "loss": 2.8332,
      "step": 164176
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.473472833633423,
      "learning_rate": 0.00011422407490850892,
      "loss": 3.0583,
      "step": 164177
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.371363639831543,
      "learning_rate": 0.00011422086305132187,
      "loss": 2.807,
      "step": 164178
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.1912617683410645,
      "learning_rate": 0.00011421765122867477,
      "loss": 3.1332,
      "step": 164179
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.448925256729126,
      "learning_rate": 0.00011421443944056803,
      "loss": 3.0454,
      "step": 164180
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9767708778381348,
      "learning_rate": 0.00011421122768700243,
      "loss": 3.1264,
      "step": 164181
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.659036874771118,
      "learning_rate": 0.0001142080159679785,
      "loss": 3.1487,
      "step": 164182
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.066030979156494,
      "learning_rate": 0.00011420480428349677,
      "loss": 3.1137,
      "step": 164183
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9394547939300537,
      "learning_rate": 0.00011420159263355777,
      "loss": 2.9772,
      "step": 164184
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.379654884338379,
      "learning_rate": 0.00011419838101816231,
      "loss": 2.9872,
      "step": 164185
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.009272336959839,
      "learning_rate": 0.00011419516943731078,
      "loss": 3.1243,
      "step": 164186
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.8077762126922607,
      "learning_rate": 0.00011419195789100396,
      "loss": 3.0659,
      "step": 164187
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.185591697692871,
      "learning_rate": 0.0001141887463792424,
      "loss": 2.9331,
      "step": 164188
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6860475540161133,
      "learning_rate": 0.00011418553490202665,
      "loss": 3.1976,
      "step": 164189
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.375330924987793,
      "learning_rate": 0.00011418232345935718,
      "loss": 3.2021,
      "step": 164190
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.794774055480957,
      "learning_rate": 0.00011417911205123486,
      "loss": 2.8177,
      "step": 164191
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.8253796100616455,
      "learning_rate": 0.00011417590067766003,
      "loss": 3.0729,
      "step": 164192
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.284682512283325,
      "learning_rate": 0.00011417268933863353,
      "loss": 2.7896,
      "step": 164193
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.505232095718384,
      "learning_rate": 0.00011416947803415582,
      "loss": 3.1426,
      "step": 164194
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.501004934310913,
      "learning_rate": 0.00011416626676422749,
      "loss": 3.0862,
      "step": 164195
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.431323528289795,
      "learning_rate": 0.00011416305552884908,
      "loss": 3.1403,
      "step": 164196
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.923987865447998,
      "learning_rate": 0.00011415984432802136,
      "loss": 3.0493,
      "step": 164197
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.641648054122925,
      "learning_rate": 0.00011415663316174475,
      "loss": 2.8861,
      "step": 164198
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.936316967010498,
      "learning_rate": 0.00011415342203002002,
      "loss": 2.9517,
      "step": 164199
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.1653873920440674,
      "learning_rate": 0.0001141502109328477,
      "loss": 3.0337,
      "step": 164200
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.496570110321045,
      "learning_rate": 0.00011414699987022832,
      "loss": 2.9988,
      "step": 164201
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.739043951034546,
      "learning_rate": 0.00011414378884216246,
      "loss": 3.0391,
      "step": 164202
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.397792339324951,
      "learning_rate": 0.00011414057784865087,
      "loss": 3.1058,
      "step": 164203
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.910443067550659,
      "learning_rate": 0.00011413736688969391,
      "loss": 3.0899,
      "step": 164204
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.541957378387451,
      "learning_rate": 0.00011413415596529248,
      "loss": 3.1696,
      "step": 164205
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.097932815551758,
      "learning_rate": 0.00011413094507544701,
      "loss": 2.751,
      "step": 164206
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.684262275695801,
      "learning_rate": 0.00011412773422015806,
      "loss": 2.7543,
      "step": 164207
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.279677152633667,
      "learning_rate": 0.00011412452339942621,
      "loss": 3.0806,
      "step": 164208
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3969852924346924,
      "learning_rate": 0.00011412131261325223,
      "loss": 2.8604,
      "step": 164209
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2761690616607666,
      "learning_rate": 0.0001141181018616365,
      "loss": 2.838,
      "step": 164210
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5274248123168945,
      "learning_rate": 0.0001141148911445798,
      "loss": 3.1118,
      "step": 164211
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.860431671142578,
      "learning_rate": 0.00011411168046208256,
      "loss": 2.9675,
      "step": 164212
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.2965197563171387,
      "learning_rate": 0.00011410846981414569,
      "loss": 2.9457,
      "step": 164213
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.607882261276245,
      "learning_rate": 0.00011410525920076933,
      "loss": 3.0375,
      "step": 164214
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4779889583587646,
      "learning_rate": 0.00011410204862195442,
      "loss": 2.7725,
      "step": 164215
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8455286026000977,
      "learning_rate": 0.00011409883807770137,
      "loss": 2.9674,
      "step": 164216
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5916881561279297,
      "learning_rate": 0.00011409562756801094,
      "loss": 2.9231,
      "step": 164217
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.235431671142578,
      "learning_rate": 0.00011409241709288356,
      "loss": 3.0284,
      "step": 164218
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4454431533813477,
      "learning_rate": 0.00011408920665232,
      "loss": 3.2682,
      "step": 164219
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9063374996185303,
      "learning_rate": 0.00011408599624632075,
      "loss": 2.9182,
      "step": 164220
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2118635177612305,
      "learning_rate": 0.00011408278587488643,
      "loss": 3.0817,
      "step": 164221
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.663353443145752,
      "learning_rate": 0.00011407957553801752,
      "loss": 2.98,
      "step": 164222
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.435070037841797,
      "learning_rate": 0.00011407636523571486,
      "loss": 2.897,
      "step": 164223
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.597034454345703,
      "learning_rate": 0.00011407315496797879,
      "loss": 2.9209,
      "step": 164224
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.060495615005493,
      "learning_rate": 0.00011406994473481013,
      "loss": 2.7423,
      "step": 164225
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.279902458190918,
      "learning_rate": 0.00011406673453620938,
      "loss": 2.8989,
      "step": 164226
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.006993293762207,
      "learning_rate": 0.00011406352437217712,
      "loss": 2.8588,
      "step": 164227
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.172457218170166,
      "learning_rate": 0.00011406031424271387,
      "loss": 3.0394,
      "step": 164228
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.0306572914123535,
      "learning_rate": 0.00011405710414782044,
      "loss": 2.8315,
      "step": 164229
    },
    {
      "epoch": 2.14,
      "grad_norm": 5.350554943084717,
      "learning_rate": 0.00011405389408749718,
      "loss": 2.9433,
      "step": 164230
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.073498725891113,
      "learning_rate": 0.0001140506840617449,
      "loss": 2.8506,
      "step": 164231
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8808822631835938,
      "learning_rate": 0.00011404747407056414,
      "loss": 2.8081,
      "step": 164232
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.946320056915283,
      "learning_rate": 0.00011404426411395532,
      "loss": 2.9935,
      "step": 164233
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.2520804405212402,
      "learning_rate": 0.00011404105419191932,
      "loss": 2.9562,
      "step": 164234
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.227389097213745,
      "learning_rate": 0.00011403784430445658,
      "loss": 2.9431,
      "step": 164235
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.230319499969482,
      "learning_rate": 0.00011403463445156763,
      "loss": 2.7736,
      "step": 164236
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3008341789245605,
      "learning_rate": 0.00011403142463325325,
      "loss": 3.191,
      "step": 164237
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.343217134475708,
      "learning_rate": 0.00011402821484951393,
      "loss": 2.7842,
      "step": 164238
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.7611446380615234,
      "learning_rate": 0.00011402500510035016,
      "loss": 2.99,
      "step": 164239
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.7424402236938477,
      "learning_rate": 0.00011402179538576278,
      "loss": 3.1916,
      "step": 164240
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.794355630874634,
      "learning_rate": 0.00011401858570575227,
      "loss": 2.6525,
      "step": 164241
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5871970653533936,
      "learning_rate": 0.00011401537606031907,
      "loss": 2.7519,
      "step": 164242
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2024245262145996,
      "learning_rate": 0.00011401216644946405,
      "loss": 3.0019,
      "step": 164243
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.07045841217041,
      "learning_rate": 0.0001140089568731877,
      "loss": 2.858,
      "step": 164244
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.007142066955566,
      "learning_rate": 0.00011400574733149045,
      "loss": 2.8752,
      "step": 164245
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9439468383789062,
      "learning_rate": 0.00011400253782437317,
      "loss": 2.8435,
      "step": 164246
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5528054237365723,
      "learning_rate": 0.00011399932835183624,
      "loss": 3.0515,
      "step": 164247
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0297486782073975,
      "learning_rate": 0.00011399611891388041,
      "loss": 2.8087,
      "step": 164248
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5894148349761963,
      "learning_rate": 0.00011399290951050624,
      "loss": 2.9141,
      "step": 164249
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.1135435104370117,
      "learning_rate": 0.0001139897001417143,
      "loss": 2.74,
      "step": 164250
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7040607929229736,
      "learning_rate": 0.00011398649080750507,
      "loss": 3.0752,
      "step": 164251
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.1695804595947266,
      "learning_rate": 0.00011398328150787938,
      "loss": 2.8294,
      "step": 164252
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.671605348587036,
      "learning_rate": 0.00011398007224283761,
      "loss": 3.1542,
      "step": 164253
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0894808769226074,
      "learning_rate": 0.00011397686301238052,
      "loss": 3.0605,
      "step": 164254
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.4327738285064697,
      "learning_rate": 0.00011397365381650868,
      "loss": 2.87,
      "step": 164255
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.367199420928955,
      "learning_rate": 0.00011397044465522263,
      "loss": 2.931,
      "step": 164256
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.524616003036499,
      "learning_rate": 0.00011396723552852289,
      "loss": 2.8251,
      "step": 164257
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0349338054656982,
      "learning_rate": 0.00011396402643641025,
      "loss": 2.8385,
      "step": 164258
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0620524883270264,
      "learning_rate": 0.00011396081737888514,
      "loss": 3.015,
      "step": 164259
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.540659189224243,
      "learning_rate": 0.00011395760835594829,
      "loss": 3.1003,
      "step": 164260
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9586703777313232,
      "learning_rate": 0.00011395439936760024,
      "loss": 2.8088,
      "step": 164261
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2563533782958984,
      "learning_rate": 0.0001139511904138416,
      "loss": 2.8953,
      "step": 164262
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.686753273010254,
      "learning_rate": 0.0001139479814946728,
      "loss": 2.9231,
      "step": 164263
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.676983833312988,
      "learning_rate": 0.00011394477261009472,
      "loss": 2.7019,
      "step": 164264
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8042473793029785,
      "learning_rate": 0.0001139415637601077,
      "loss": 3.1823,
      "step": 164265
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6999189853668213,
      "learning_rate": 0.00011393835494471257,
      "loss": 3.2333,
      "step": 164266
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4684839248657227,
      "learning_rate": 0.0001139351461639098,
      "loss": 2.9559,
      "step": 164267
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.498779058456421,
      "learning_rate": 0.00011393193741770004,
      "loss": 2.9048,
      "step": 164268
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.495321035385132,
      "learning_rate": 0.0001139287287060837,
      "loss": 2.7199,
      "step": 164269
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4588255882263184,
      "learning_rate": 0.0001139255200290616,
      "loss": 2.805,
      "step": 164270
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6410884857177734,
      "learning_rate": 0.00011392231138663422,
      "loss": 2.879,
      "step": 164271
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.562626600265503,
      "learning_rate": 0.00011391910277880227,
      "loss": 2.6875,
      "step": 164272
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3215932846069336,
      "learning_rate": 0.00011391589420556627,
      "loss": 2.8598,
      "step": 164273
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.2709686756134033,
      "learning_rate": 0.00011391268566692682,
      "loss": 2.8649,
      "step": 164274
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3331093788146973,
      "learning_rate": 0.0001139094771628844,
      "loss": 3.1206,
      "step": 164275
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.527085542678833,
      "learning_rate": 0.0001139062686934398,
      "loss": 3.0169,
      "step": 164276
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7952542304992676,
      "learning_rate": 0.00011390306025859347,
      "loss": 3.0449,
      "step": 164277
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.201923131942749,
      "learning_rate": 0.0001138998518583462,
      "loss": 3.0255,
      "step": 164278
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5270748138427734,
      "learning_rate": 0.0001138966434926983,
      "loss": 2.8959,
      "step": 164279
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7901105880737305,
      "learning_rate": 0.00011389343516165077,
      "loss": 3.0595,
      "step": 164280
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.358024835586548,
      "learning_rate": 0.00011389022686520376,
      "loss": 2.8969,
      "step": 164281
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.1278011798858643,
      "learning_rate": 0.00011388701860335817,
      "loss": 2.9936,
      "step": 164282
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5856244564056396,
      "learning_rate": 0.00011388381037611436,
      "loss": 2.9304,
      "step": 164283
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.421086549758911,
      "learning_rate": 0.00011388060218347323,
      "loss": 2.9556,
      "step": 164284
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.550910711288452,
      "learning_rate": 0.00011387739402543506,
      "loss": 2.925,
      "step": 164285
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2305562496185303,
      "learning_rate": 0.0001138741859020008,
      "loss": 2.9825,
      "step": 164286
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3362457752227783,
      "learning_rate": 0.00011387097781317064,
      "loss": 2.7433,
      "step": 164287
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.978118658065796,
      "learning_rate": 0.00011386776975894551,
      "loss": 3.0289,
      "step": 164288
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.172600507736206,
      "learning_rate": 0.00011386456173932572,
      "loss": 2.9772,
      "step": 164289
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.639956474304199,
      "learning_rate": 0.00011386135375431215,
      "loss": 2.9669,
      "step": 164290
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.430389881134033,
      "learning_rate": 0.00011385814580390517,
      "loss": 2.7131,
      "step": 164291
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.722349166870117,
      "learning_rate": 0.00011385493788810572,
      "loss": 3.329,
      "step": 164292
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7297229766845703,
      "learning_rate": 0.0001138517300069139,
      "loss": 3.0153,
      "step": 164293
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4314510822296143,
      "learning_rate": 0.00011384852216033064,
      "loss": 2.914,
      "step": 164294
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0815742015838623,
      "learning_rate": 0.00011384531434835639,
      "loss": 2.9332,
      "step": 164295
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3789520263671875,
      "learning_rate": 0.00011384210657099188,
      "loss": 3.2,
      "step": 164296
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0800487995147705,
      "learning_rate": 0.00011383889882823756,
      "loss": 2.9734,
      "step": 164297
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.829263687133789,
      "learning_rate": 0.0001138356911200943,
      "loss": 2.9606,
      "step": 164298
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.107720375061035,
      "learning_rate": 0.0001138324834465623,
      "loss": 2.8391,
      "step": 164299
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0537681579589844,
      "learning_rate": 0.00011382927580764246,
      "loss": 2.8076,
      "step": 164300
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4272713661193848,
      "learning_rate": 0.00011382606820333517,
      "loss": 3.1654,
      "step": 164301
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.395010232925415,
      "learning_rate": 0.00011382286063364121,
      "loss": 3.1367,
      "step": 164302
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.6168317794799805,
      "learning_rate": 0.000113819653098561,
      "loss": 2.9264,
      "step": 164303
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.0595526695251465,
      "learning_rate": 0.00011381644559809545,
      "loss": 3.1996,
      "step": 164304
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4312777519226074,
      "learning_rate": 0.00011381323813224472,
      "loss": 2.9542,
      "step": 164305
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.248187303543091,
      "learning_rate": 0.00011381003070100971,
      "loss": 2.9564,
      "step": 164306
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.512239933013916,
      "learning_rate": 0.00011380682330439085,
      "loss": 2.8328,
      "step": 164307
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.423178195953369,
      "learning_rate": 0.00011380361594238892,
      "loss": 3.0295,
      "step": 164308
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4125635623931885,
      "learning_rate": 0.00011380040861500431,
      "loss": 2.7898,
      "step": 164309
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.560096502304077,
      "learning_rate": 0.0001137972013222378,
      "loss": 3.0197,
      "step": 164310
    },
    {
      "epoch": 2.14,
      "grad_norm": 5.029138565063477,
      "learning_rate": 0.00011379399406408994,
      "loss": 3.1927,
      "step": 164311
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.4085049629211426,
      "learning_rate": 0.00011379078684056129,
      "loss": 2.9098,
      "step": 164312
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.6142706871032715,
      "learning_rate": 0.0001137875796516523,
      "loss": 3.0383,
      "step": 164313
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.5920567512512207,
      "learning_rate": 0.00011378437249736387,
      "loss": 2.6751,
      "step": 164314
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2487621307373047,
      "learning_rate": 0.00011378116537769629,
      "loss": 2.9196,
      "step": 164315
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2211673259735107,
      "learning_rate": 0.00011377795829265042,
      "loss": 2.913,
      "step": 164316
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.9474592208862305,
      "learning_rate": 0.00011377475124222676,
      "loss": 2.8679,
      "step": 164317
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.3511199951171875,
      "learning_rate": 0.00011377154422642578,
      "loss": 2.9834,
      "step": 164318
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.136966705322266,
      "learning_rate": 0.0001137683372452483,
      "loss": 2.9242,
      "step": 164319
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6187844276428223,
      "learning_rate": 0.00011376513029869479,
      "loss": 2.9867,
      "step": 164320
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6930835247039795,
      "learning_rate": 0.00011376192338676574,
      "loss": 2.907,
      "step": 164321
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5053510665893555,
      "learning_rate": 0.000113758716509462,
      "loss": 2.7049,
      "step": 164322
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.443033218383789,
      "learning_rate": 0.000113755509666784,
      "loss": 2.6971,
      "step": 164323
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.548295259475708,
      "learning_rate": 0.00011375230285873229,
      "loss": 2.8361,
      "step": 164324
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.1356682777404785,
      "learning_rate": 0.00011374909608530762,
      "loss": 2.7672,
      "step": 164325
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4072718620300293,
      "learning_rate": 0.00011374588934651053,
      "loss": 2.9965,
      "step": 164326
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.316274881362915,
      "learning_rate": 0.00011374268264234148,
      "loss": 2.8719,
      "step": 164327
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.1851367950439453,
      "learning_rate": 0.00011373947597280128,
      "loss": 2.6253,
      "step": 164328
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.472935199737549,
      "learning_rate": 0.00011373626933789045,
      "loss": 2.6348,
      "step": 164329
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4064605236053467,
      "learning_rate": 0.00011373306273760941,
      "loss": 3.0393,
      "step": 164330
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.339634656906128,
      "learning_rate": 0.00011372985617195908,
      "loss": 2.7697,
      "step": 164331
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.20211124420166,
      "learning_rate": 0.00011372664964093973,
      "loss": 3.1033,
      "step": 164332
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.511143445968628,
      "learning_rate": 0.00011372344314455223,
      "loss": 2.6715,
      "step": 164333
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8175601959228516,
      "learning_rate": 0.00011372023668279709,
      "loss": 2.672,
      "step": 164334
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.242767333984375,
      "learning_rate": 0.00011371703025567483,
      "loss": 3.0149,
      "step": 164335
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.334540843963623,
      "learning_rate": 0.00011371382386318602,
      "loss": 2.9739,
      "step": 164336
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.373094320297241,
      "learning_rate": 0.0001137106175053314,
      "loss": 2.8116,
      "step": 164337
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7667124271392822,
      "learning_rate": 0.00011370741118211139,
      "loss": 2.8442,
      "step": 164338
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.809190511703491,
      "learning_rate": 0.00011370420489352684,
      "loss": 2.7177,
      "step": 164339
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.943394422531128,
      "learning_rate": 0.00011370099863957812,
      "loss": 2.7527,
      "step": 164340
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.88716197013855,
      "learning_rate": 0.00011369779242026598,
      "loss": 2.8294,
      "step": 164341
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6312594413757324,
      "learning_rate": 0.00011369458623559079,
      "loss": 2.9437,
      "step": 164342
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.806696891784668,
      "learning_rate": 0.0001136913800855534,
      "loss": 2.9133,
      "step": 164343
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.332749605178833,
      "learning_rate": 0.0001136881739701542,
      "loss": 3.0711,
      "step": 164344
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3605823516845703,
      "learning_rate": 0.00011368496788939399,
      "loss": 2.9349,
      "step": 164345
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.2202258110046387,
      "learning_rate": 0.00011368176184327313,
      "loss": 3.2342,
      "step": 164346
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.470784902572632,
      "learning_rate": 0.00011367855583179257,
      "loss": 3.0713,
      "step": 164347
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.837263345718384,
      "learning_rate": 0.0001136753498549525,
      "loss": 2.7662,
      "step": 164348
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.2321462631225586,
      "learning_rate": 0.00011367214391275379,
      "loss": 3.2476,
      "step": 164349
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6526942253112793,
      "learning_rate": 0.00011366893800519683,
      "loss": 2.9485,
      "step": 164350
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.6466386318206787,
      "learning_rate": 0.00011366573213228246,
      "loss": 2.9574,
      "step": 164351
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6948580741882324,
      "learning_rate": 0.00011366252629401101,
      "loss": 3.1439,
      "step": 164352
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0697410106658936,
      "learning_rate": 0.00011365932049038345,
      "loss": 3.0813,
      "step": 164353
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.3763301372528076,
      "learning_rate": 0.00011365611472139991,
      "loss": 3.0179,
      "step": 164354
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.523007869720459,
      "learning_rate": 0.00011365290898706136,
      "loss": 2.9949,
      "step": 164355
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3399598598480225,
      "learning_rate": 0.0001136497032873681,
      "loss": 3.2333,
      "step": 164356
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.668349266052246,
      "learning_rate": 0.00011364649762232102,
      "loss": 2.8699,
      "step": 164357
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.1996638774871826,
      "learning_rate": 0.00011364329199192047,
      "loss": 2.9926,
      "step": 164358
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.179238796234131,
      "learning_rate": 0.00011364008639616732,
      "loss": 2.8427,
      "step": 164359
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.253685474395752,
      "learning_rate": 0.0001136368808350618,
      "loss": 2.6716,
      "step": 164360
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5485994815826416,
      "learning_rate": 0.00011363367530860483,
      "loss": 3.1992,
      "step": 164361
    },
    {
      "epoch": 2.14,
      "grad_norm": 1.950056791305542,
      "learning_rate": 0.00011363046981679673,
      "loss": 2.6127,
      "step": 164362
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.223996162414551,
      "learning_rate": 0.00011362726435963839,
      "loss": 3.0471,
      "step": 164363
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0872135162353516,
      "learning_rate": 0.00011362405893713014,
      "loss": 2.8464,
      "step": 164364
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5506134033203125,
      "learning_rate": 0.0001136208535492729,
      "loss": 2.9796,
      "step": 164365
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.2184486389160156,
      "learning_rate": 0.00011361764819606687,
      "loss": 2.9646,
      "step": 164366
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.54463267326355,
      "learning_rate": 0.00011361444287751293,
      "loss": 2.9164,
      "step": 164367
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6185998916625977,
      "learning_rate": 0.00011361123759361146,
      "loss": 2.9412,
      "step": 164368
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.158156633377075,
      "learning_rate": 0.00011360803234436331,
      "loss": 3.0413,
      "step": 164369
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.3302416801452637,
      "learning_rate": 0.00011360482712976884,
      "loss": 3.0474,
      "step": 164370
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.144994020462036,
      "learning_rate": 0.00011360162194982894,
      "loss": 3.2186,
      "step": 164371
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.457728624343872,
      "learning_rate": 0.00011359841680454383,
      "loss": 3.0718,
      "step": 164372
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.1976566314697266,
      "learning_rate": 0.0001135952116939144,
      "loss": 3.0015,
      "step": 164373
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6909024715423584,
      "learning_rate": 0.00011359200661794101,
      "loss": 2.8317,
      "step": 164374
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.738860845565796,
      "learning_rate": 0.0001135888015766245,
      "loss": 2.737,
      "step": 164375
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6892590522766113,
      "learning_rate": 0.00011358559656996527,
      "loss": 3.0642,
      "step": 164376
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.20794939994812,
      "learning_rate": 0.00011358239159796418,
      "loss": 2.8555,
      "step": 164377
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.832017183303833,
      "learning_rate": 0.00011357918666062143,
      "loss": 2.9424,
      "step": 164378
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.452146053314209,
      "learning_rate": 0.00011357598175793792,
      "loss": 2.7789,
      "step": 164379
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.451239585876465,
      "learning_rate": 0.00011357277688991408,
      "loss": 3.0152,
      "step": 164380
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2182257175445557,
      "learning_rate": 0.00011356957205655066,
      "loss": 2.666,
      "step": 164381
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3015248775482178,
      "learning_rate": 0.00011356636725784808,
      "loss": 2.896,
      "step": 164382
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6365432739257812,
      "learning_rate": 0.00011356316249380727,
      "loss": 2.8674,
      "step": 164383
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.501217842102051,
      "learning_rate": 0.00011355995776442833,
      "loss": 2.8215,
      "step": 164384
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.411268711090088,
      "learning_rate": 0.00011355675306971227,
      "loss": 2.9035,
      "step": 164385
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9765167236328125,
      "learning_rate": 0.00011355354840965937,
      "loss": 2.8181,
      "step": 164386
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.21213436126709,
      "learning_rate": 0.00011355034378427052,
      "loss": 2.7854,
      "step": 164387
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6474008560180664,
      "learning_rate": 0.00011354713919354608,
      "loss": 2.7416,
      "step": 164388
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.101491689682007,
      "learning_rate": 0.00011354393463748693,
      "loss": 2.8728,
      "step": 164389
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3981387615203857,
      "learning_rate": 0.00011354073011609329,
      "loss": 2.998,
      "step": 164390
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8997304439544678,
      "learning_rate": 0.00011353752562936605,
      "loss": 3.1059,
      "step": 164391
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.710057258605957,
      "learning_rate": 0.00011353432117730562,
      "loss": 2.9333,
      "step": 164392
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8477189540863037,
      "learning_rate": 0.00011353111675991275,
      "loss": 3.076,
      "step": 164393
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.390164613723755,
      "learning_rate": 0.00011352791237718789,
      "loss": 2.7443,
      "step": 164394
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8364429473876953,
      "learning_rate": 0.00011352470802913179,
      "loss": 3.1448,
      "step": 164395
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.814887285232544,
      "learning_rate": 0.000113521503715745,
      "loss": 2.9605,
      "step": 164396
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.891537666320801,
      "learning_rate": 0.00011351829943702804,
      "loss": 3.1003,
      "step": 164397
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.771574020385742,
      "learning_rate": 0.00011351509519298148,
      "loss": 3.0545,
      "step": 164398
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6956088542938232,
      "learning_rate": 0.00011351189098360605,
      "loss": 2.8348,
      "step": 164399
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4947972297668457,
      "learning_rate": 0.00011350868680890222,
      "loss": 3.2334,
      "step": 164400
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.3076729774475098,
      "learning_rate": 0.00011350548266887072,
      "loss": 3.1396,
      "step": 164401
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.173886299133301,
      "learning_rate": 0.00011350227856351209,
      "loss": 2.8886,
      "step": 164402
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.913208484649658,
      "learning_rate": 0.00011349907449282688,
      "loss": 2.9895,
      "step": 164403
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.361511707305908,
      "learning_rate": 0.00011349587045681565,
      "loss": 3.2088,
      "step": 164404
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.6215310096740723,
      "learning_rate": 0.00011349266645547909,
      "loss": 3.1094,
      "step": 164405
    },
    {
      "epoch": 2.14,
      "grad_norm": 6.090402603149414,
      "learning_rate": 0.00011348946248881772,
      "loss": 3.0681,
      "step": 164406
    },
    {
      "epoch": 2.14,
      "grad_norm": 5.474676132202148,
      "learning_rate": 0.00011348625855683227,
      "loss": 2.8539,
      "step": 164407
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.578585624694824,
      "learning_rate": 0.00011348305465952324,
      "loss": 3.0874,
      "step": 164408
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8893837928771973,
      "learning_rate": 0.00011347985079689115,
      "loss": 3.0021,
      "step": 164409
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3459951877593994,
      "learning_rate": 0.00011347664696893675,
      "loss": 2.9088,
      "step": 164410
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.536769390106201,
      "learning_rate": 0.00011347344317566057,
      "loss": 2.6692,
      "step": 164411
    },
    {
      "epoch": 2.14,
      "grad_norm": 7.647494792938232,
      "learning_rate": 0.00011347023941706313,
      "loss": 2.9135,
      "step": 164412
    },
    {
      "epoch": 2.14,
      "grad_norm": 5.681869029998779,
      "learning_rate": 0.00011346703569314514,
      "loss": 3.1199,
      "step": 164413
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.7194130420684814,
      "learning_rate": 0.00011346383200390717,
      "loss": 3.067,
      "step": 164414
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9384992122650146,
      "learning_rate": 0.0001134606283493497,
      "loss": 2.8058,
      "step": 164415
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2484872341156006,
      "learning_rate": 0.00011345742472947352,
      "loss": 3.0514,
      "step": 164416
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.733278751373291,
      "learning_rate": 0.00011345422114427904,
      "loss": 2.9985,
      "step": 164417
    },
    {
      "epoch": 2.14,
      "grad_norm": 5.231255531311035,
      "learning_rate": 0.00011345101759376704,
      "loss": 3.0953,
      "step": 164418
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.4571409225463867,
      "learning_rate": 0.000113447814077938,
      "loss": 2.92,
      "step": 164419
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.615234613418579,
      "learning_rate": 0.00011344461059679254,
      "loss": 3.0723,
      "step": 164420
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.9884650707244873,
      "learning_rate": 0.00011344140715033114,
      "loss": 3.0027,
      "step": 164421
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5905332565307617,
      "learning_rate": 0.0001134382037385546,
      "loss": 3.0146,
      "step": 164422
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7601521015167236,
      "learning_rate": 0.00011343500036146331,
      "loss": 2.8023,
      "step": 164423
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0204076766967773,
      "learning_rate": 0.00011343179701905811,
      "loss": 3.0235,
      "step": 164424
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4684619903564453,
      "learning_rate": 0.00011342859371133944,
      "loss": 2.75,
      "step": 164425
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4390382766723633,
      "learning_rate": 0.00011342539043830794,
      "loss": 2.8739,
      "step": 164426
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5078229904174805,
      "learning_rate": 0.00011342218719996407,
      "loss": 2.7942,
      "step": 164427
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8777172565460205,
      "learning_rate": 0.0001134189839963086,
      "loss": 2.877,
      "step": 164428
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8693602085113525,
      "learning_rate": 0.000113415780827342,
      "loss": 3.1359,
      "step": 164429
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6747963428497314,
      "learning_rate": 0.00011341257769306502,
      "loss": 3.0747,
      "step": 164430
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.6934921741485596,
      "learning_rate": 0.00011340937459347805,
      "loss": 3.0272,
      "step": 164431
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.929426908493042,
      "learning_rate": 0.00011340617152858203,
      "loss": 3.0195,
      "step": 164432
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.421644449234009,
      "learning_rate": 0.0001134029684983771,
      "loss": 3.0626,
      "step": 164433
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.043482780456543,
      "learning_rate": 0.00011339976550286421,
      "loss": 3.0438,
      "step": 164434
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.362069845199585,
      "learning_rate": 0.0001133965625420437,
      "loss": 2.8438,
      "step": 164435
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.147174835205078,
      "learning_rate": 0.0001133933596159164,
      "loss": 2.9467,
      "step": 164436
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.811434745788574,
      "learning_rate": 0.00011339015672448269,
      "loss": 2.8848,
      "step": 164437
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.61584210395813,
      "learning_rate": 0.00011338695386774349,
      "loss": 3.1008,
      "step": 164438
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.774695634841919,
      "learning_rate": 0.00011338375104569895,
      "loss": 3.0107,
      "step": 164439
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.585939645767212,
      "learning_rate": 0.00011338054825835002,
      "loss": 2.9711,
      "step": 164440
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.858790636062622,
      "learning_rate": 0.00011337734550569706,
      "loss": 2.8002,
      "step": 164441
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.223904609680176,
      "learning_rate": 0.00011337414278774085,
      "loss": 2.7523,
      "step": 164442
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6168289184570312,
      "learning_rate": 0.00011337094010448182,
      "loss": 2.699,
      "step": 164443
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.996056318283081,
      "learning_rate": 0.0001133677374559209,
      "loss": 3.0268,
      "step": 164444
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.3872897624969482,
      "learning_rate": 0.0001133645348420582,
      "loss": 2.9305,
      "step": 164445
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.2207534313201904,
      "learning_rate": 0.00011336133226289466,
      "loss": 3.0626,
      "step": 164446
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.2003631591796875,
      "learning_rate": 0.00011335812971843068,
      "loss": 2.9251,
      "step": 164447
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9900689125061035,
      "learning_rate": 0.00011335492720866704,
      "loss": 3.023,
      "step": 164448
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2406938076019287,
      "learning_rate": 0.00011335172473360414,
      "loss": 3.274,
      "step": 164449
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.3799057006835938,
      "learning_rate": 0.00011334852229324288,
      "loss": 3.0021,
      "step": 164450
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3588645458221436,
      "learning_rate": 0.00011334531988758346,
      "loss": 2.8793,
      "step": 164451
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2139341831207275,
      "learning_rate": 0.00011334211751662675,
      "loss": 2.9239,
      "step": 164452
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.627187967300415,
      "learning_rate": 0.00011333891518037317,
      "loss": 2.7832,
      "step": 164453
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8149526119232178,
      "learning_rate": 0.0001133357128788235,
      "loss": 2.8328,
      "step": 164454
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5273029804229736,
      "learning_rate": 0.00011333251061197817,
      "loss": 2.9678,
      "step": 164455
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.4769303798675537,
      "learning_rate": 0.00011332930837983806,
      "loss": 3.0705,
      "step": 164456
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.102421760559082,
      "learning_rate": 0.0001133261061824033,
      "loss": 2.9987,
      "step": 164457
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.477064609527588,
      "learning_rate": 0.0001133229040196749,
      "loss": 3.0858,
      "step": 164458
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7629575729370117,
      "learning_rate": 0.00011331970189165316,
      "loss": 2.9025,
      "step": 164459
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3352010250091553,
      "learning_rate": 0.00011331649979833892,
      "loss": 2.9197,
      "step": 164460
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8904314041137695,
      "learning_rate": 0.00011331329773973259,
      "loss": 2.9077,
      "step": 164461
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.902653694152832,
      "learning_rate": 0.00011331009571583502,
      "loss": 2.9802,
      "step": 164462
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.996539354324341,
      "learning_rate": 0.00011330689372664643,
      "loss": 3.0468,
      "step": 164463
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.717766284942627,
      "learning_rate": 0.0001133036917721677,
      "loss": 2.8342,
      "step": 164464
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8823630809783936,
      "learning_rate": 0.00011330048985239925,
      "loss": 3.0176,
      "step": 164465
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7161998748779297,
      "learning_rate": 0.00011329728796734185,
      "loss": 2.9166,
      "step": 164466
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.9487950801849365,
      "learning_rate": 0.00011329408611699593,
      "loss": 2.7625,
      "step": 164467
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0649189949035645,
      "learning_rate": 0.00011329088430136237,
      "loss": 2.6251,
      "step": 164468
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.627197027206421,
      "learning_rate": 0.00011328768252044132,
      "loss": 2.9807,
      "step": 164469
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0493855476379395,
      "learning_rate": 0.00011328448077423376,
      "loss": 3.0052,
      "step": 164470
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2887699604034424,
      "learning_rate": 0.00011328127906274,
      "loss": 2.886,
      "step": 164471
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.736225128173828,
      "learning_rate": 0.0001132780773859609,
      "loss": 2.8446,
      "step": 164472
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.127307415008545,
      "learning_rate": 0.00011327487574389679,
      "loss": 2.9764,
      "step": 164473
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.2056782245635986,
      "learning_rate": 0.00011327167413654866,
      "loss": 2.7887,
      "step": 164474
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.674924373626709,
      "learning_rate": 0.00011326847256391663,
      "loss": 2.838,
      "step": 164475
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9931910037994385,
      "learning_rate": 0.00011326527102600162,
      "loss": 3.0946,
      "step": 164476
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.960620403289795,
      "learning_rate": 0.000113262069522804,
      "loss": 2.8996,
      "step": 164477
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.213282346725464,
      "learning_rate": 0.00011325886805432462,
      "loss": 2.9525,
      "step": 164478
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.194826126098633,
      "learning_rate": 0.00011325566662056379,
      "loss": 2.9335,
      "step": 164479
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.462507963180542,
      "learning_rate": 0.00011325246522152239,
      "loss": 3.0296,
      "step": 164480
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6464014053344727,
      "learning_rate": 0.00011324926385720089,
      "loss": 2.8277,
      "step": 164481
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5725276470184326,
      "learning_rate": 0.00011324606252759988,
      "loss": 2.8568,
      "step": 164482
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8884057998657227,
      "learning_rate": 0.00011324286123271982,
      "loss": 2.7977,
      "step": 164483
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.838373899459839,
      "learning_rate": 0.00011323965997256156,
      "loss": 2.9865,
      "step": 164484
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6861090660095215,
      "learning_rate": 0.00011323645874712546,
      "loss": 2.8205,
      "step": 164485
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.16033935546875,
      "learning_rate": 0.00011323325755641236,
      "loss": 2.9497,
      "step": 164486
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.088932752609253,
      "learning_rate": 0.00011323005640042271,
      "loss": 2.8008,
      "step": 164487
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.465776205062866,
      "learning_rate": 0.00011322685527915711,
      "loss": 2.8413,
      "step": 164488
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.634490489959717,
      "learning_rate": 0.00011322365419261604,
      "loss": 2.8171,
      "step": 164489
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4289934635162354,
      "learning_rate": 0.00011322045314080036,
      "loss": 3.0821,
      "step": 164490
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.359787702560425,
      "learning_rate": 0.0001132172521237104,
      "loss": 3.0045,
      "step": 164491
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.860411643981934,
      "learning_rate": 0.00011321405114134697,
      "loss": 2.8001,
      "step": 164492
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.1571836471557617,
      "learning_rate": 0.00011321085019371058,
      "loss": 2.751,
      "step": 164493
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.609165906906128,
      "learning_rate": 0.00011320764928080175,
      "loss": 2.9744,
      "step": 164494
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4494175910949707,
      "learning_rate": 0.00011320444840262121,
      "loss": 3.069,
      "step": 164495
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.467503309249878,
      "learning_rate": 0.00011320124755916948,
      "loss": 2.8961,
      "step": 164496
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.557605028152466,
      "learning_rate": 0.00011319804675044708,
      "loss": 3.3191,
      "step": 164497
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7279112339019775,
      "learning_rate": 0.0001131948459764548,
      "loss": 3.0029,
      "step": 164498
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4240691661834717,
      "learning_rate": 0.00011319164523719312,
      "loss": 3.0915,
      "step": 164499
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.7622647285461426,
      "learning_rate": 0.00011318844453266255,
      "loss": 2.9155,
      "step": 164500
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7072501182556152,
      "learning_rate": 0.00011318524386286386,
      "loss": 2.8655,
      "step": 164501
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7226507663726807,
      "learning_rate": 0.00011318204322779744,
      "loss": 2.7526,
      "step": 164502
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.152237892150879,
      "learning_rate": 0.00011317884262746413,
      "loss": 3.0343,
      "step": 164503
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9296016693115234,
      "learning_rate": 0.0001131756420618644,
      "loss": 2.9426,
      "step": 164504
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6160809993743896,
      "learning_rate": 0.00011317244153099882,
      "loss": 2.9662,
      "step": 164505
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0865416526794434,
      "learning_rate": 0.00011316924103486794,
      "loss": 3.2031,
      "step": 164506
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.942084789276123,
      "learning_rate": 0.0001131660405734725,
      "loss": 3.2721,
      "step": 164507
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3631763458251953,
      "learning_rate": 0.00011316284014681294,
      "loss": 2.9437,
      "step": 164508
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.606198310852051,
      "learning_rate": 0.00011315963975489005,
      "loss": 3.1945,
      "step": 164509
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2884202003479004,
      "learning_rate": 0.00011315643939770428,
      "loss": 3.1225,
      "step": 164510
    },
    {
      "epoch": 2.14,
      "grad_norm": 1.9832432270050049,
      "learning_rate": 0.00011315323907525626,
      "loss": 2.8338,
      "step": 164511
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.103710412979126,
      "learning_rate": 0.00011315003878754646,
      "loss": 2.8609,
      "step": 164512
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4248130321502686,
      "learning_rate": 0.00011314683853457574,
      "loss": 3.1246,
      "step": 164513
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4665768146514893,
      "learning_rate": 0.00011314363831634443,
      "loss": 2.7698,
      "step": 164514
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.4103641510009766,
      "learning_rate": 0.00011314043813285331,
      "loss": 2.8699,
      "step": 164515
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.1329309940338135,
      "learning_rate": 0.00011313723798410285,
      "loss": 2.886,
      "step": 164516
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.4955620765686035,
      "learning_rate": 0.0001131340378700939,
      "loss": 3.0112,
      "step": 164517
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.406418800354004,
      "learning_rate": 0.00011313083779082667,
      "loss": 3.1044,
      "step": 164518
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4466707706451416,
      "learning_rate": 0.00011312763774630203,
      "loss": 2.9474,
      "step": 164519
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9021031856536865,
      "learning_rate": 0.00011312443773652038,
      "loss": 2.7584,
      "step": 164520
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2323784828186035,
      "learning_rate": 0.00011312123776148253,
      "loss": 2.8172,
      "step": 164521
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7354633808135986,
      "learning_rate": 0.00011311803782118889,
      "loss": 2.8774,
      "step": 164522
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.719775199890137,
      "learning_rate": 0.00011311483791564032,
      "loss": 2.9281,
      "step": 164523
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.198183298110962,
      "learning_rate": 0.00011311163804483703,
      "loss": 3.0347,
      "step": 164524
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6272761821746826,
      "learning_rate": 0.0001131084382087799,
      "loss": 3.099,
      "step": 164525
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.9801840782165527,
      "learning_rate": 0.00011310523840746935,
      "loss": 2.9271,
      "step": 164526
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.6620569229125977,
      "learning_rate": 0.00011310203864090618,
      "loss": 3.0602,
      "step": 164527
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.627303123474121,
      "learning_rate": 0.00011309883890909073,
      "loss": 3.0534,
      "step": 164528
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6584200859069824,
      "learning_rate": 0.00011309563921202397,
      "loss": 2.8403,
      "step": 164529
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.315784215927124,
      "learning_rate": 0.00011309243954970603,
      "loss": 3.0908,
      "step": 164530
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4940500259399414,
      "learning_rate": 0.00011308923992213788,
      "loss": 3.1503,
      "step": 164531
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5125436782836914,
      "learning_rate": 0.00011308604032931982,
      "loss": 3.1614,
      "step": 164532
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.249070882797241,
      "learning_rate": 0.00011308284077125272,
      "loss": 2.8699,
      "step": 164533
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.407791614532471,
      "learning_rate": 0.00011307964124793693,
      "loss": 2.7129,
      "step": 164534
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3627238273620605,
      "learning_rate": 0.00011307644175937338,
      "loss": 2.8754,
      "step": 164535
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2444891929626465,
      "learning_rate": 0.00011307324230556226,
      "loss": 2.9459,
      "step": 164536
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.491748094558716,
      "learning_rate": 0.0001130700428865044,
      "loss": 3.0315,
      "step": 164537
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.1215174198150635,
      "learning_rate": 0.00011306684350220029,
      "loss": 3.0622,
      "step": 164538
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3243424892425537,
      "learning_rate": 0.0001130636441526507,
      "loss": 2.9599,
      "step": 164539
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.357835054397583,
      "learning_rate": 0.00011306044483785598,
      "loss": 2.8499,
      "step": 164540
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.3040201663970947,
      "learning_rate": 0.00011305724555781705,
      "loss": 2.819,
      "step": 164541
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7057251930236816,
      "learning_rate": 0.0001130540463125341,
      "loss": 3.1357,
      "step": 164542
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.0247907638549805,
      "learning_rate": 0.00011305084710200803,
      "loss": 3.1474,
      "step": 164543
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3824899196624756,
      "learning_rate": 0.00011304764792623928,
      "loss": 3.0778,
      "step": 164544
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.0827417373657227,
      "learning_rate": 0.00011304444878522857,
      "loss": 3.0022,
      "step": 164545
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.2109951972961426,
      "learning_rate": 0.0001130412496789763,
      "loss": 2.771,
      "step": 164546
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3446643352508545,
      "learning_rate": 0.00011303805060748332,
      "loss": 2.963,
      "step": 164547
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.9265124797821045,
      "learning_rate": 0.0001130348515707501,
      "loss": 2.8981,
      "step": 164548
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.790807008743286,
      "learning_rate": 0.00011303165256877722,
      "loss": 2.9382,
      "step": 164549
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4895479679107666,
      "learning_rate": 0.00011302845360156519,
      "loss": 2.9587,
      "step": 164550
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.294816017150879,
      "learning_rate": 0.0001130252546691148,
      "loss": 2.9912,
      "step": 164551
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6336395740509033,
      "learning_rate": 0.00011302205577142646,
      "loss": 3.2201,
      "step": 164552
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4405322074890137,
      "learning_rate": 0.00011301885690850091,
      "loss": 3.0601,
      "step": 164553
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5247280597686768,
      "learning_rate": 0.00011301565808033872,
      "loss": 2.9421,
      "step": 164554
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.35155987739563,
      "learning_rate": 0.00011301245928694042,
      "loss": 3.0162,
      "step": 164555
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5989041328430176,
      "learning_rate": 0.00011300926052830656,
      "loss": 2.9274,
      "step": 164556
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2526726722717285,
      "learning_rate": 0.0001130060618044379,
      "loss": 3.209,
      "step": 164557
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.336881637573242,
      "learning_rate": 0.00011300286311533482,
      "loss": 2.8407,
      "step": 164558
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.652369737625122,
      "learning_rate": 0.00011299966446099815,
      "loss": 2.7231,
      "step": 164559
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.1773793697357178,
      "learning_rate": 0.00011299646584142841,
      "loss": 3.2252,
      "step": 164560
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8213248252868652,
      "learning_rate": 0.00011299326725662613,
      "loss": 2.9537,
      "step": 164561
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.490156650543213,
      "learning_rate": 0.00011299006870659182,
      "loss": 3.0172,
      "step": 164562
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.154633522033691,
      "learning_rate": 0.00011298687019132627,
      "loss": 2.8754,
      "step": 164563
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7285735607147217,
      "learning_rate": 0.00011298367171082991,
      "loss": 2.9082,
      "step": 164564
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4498236179351807,
      "learning_rate": 0.00011298047326510354,
      "loss": 2.7978,
      "step": 164565
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.663074493408203,
      "learning_rate": 0.00011297727485414762,
      "loss": 2.9254,
      "step": 164566
    },
    {
      "epoch": 2.14,
      "grad_norm": 5.268639087677002,
      "learning_rate": 0.00011297407647796272,
      "loss": 2.839,
      "step": 164567
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.473587989807129,
      "learning_rate": 0.00011297087813654941,
      "loss": 3.3,
      "step": 164568
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.6229496002197266,
      "learning_rate": 0.00011296767982990842,
      "loss": 3.0656,
      "step": 164569
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.648688554763794,
      "learning_rate": 0.00011296448155804017,
      "loss": 3.009,
      "step": 164570
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9425482749938965,
      "learning_rate": 0.00011296128332094549,
      "loss": 3.08,
      "step": 164571
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.404909133911133,
      "learning_rate": 0.00011295808511862481,
      "loss": 3.1661,
      "step": 164572
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9542038440704346,
      "learning_rate": 0.00011295488695107875,
      "loss": 2.6842,
      "step": 164573
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.818326950073242,
      "learning_rate": 0.00011295168881830781,
      "loss": 2.724,
      "step": 164574
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.191383123397827,
      "learning_rate": 0.00011294849072031278,
      "loss": 2.7541,
      "step": 164575
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2032570838928223,
      "learning_rate": 0.00011294529265709407,
      "loss": 2.8038,
      "step": 164576
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.353069543838501,
      "learning_rate": 0.00011294209462865244,
      "loss": 2.9138,
      "step": 164577
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.379624605178833,
      "learning_rate": 0.00011293889663498842,
      "loss": 2.97,
      "step": 164578
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.7689099311828613,
      "learning_rate": 0.00011293569867610248,
      "loss": 2.9142,
      "step": 164579
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.563530206680298,
      "learning_rate": 0.00011293250075199542,
      "loss": 3.2693,
      "step": 164580
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.947258472442627,
      "learning_rate": 0.00011292930286266775,
      "loss": 3.1214,
      "step": 164581
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.1749629974365234,
      "learning_rate": 0.00011292610500811995,
      "loss": 3.0286,
      "step": 164582
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6972997188568115,
      "learning_rate": 0.00011292290718835283,
      "loss": 2.9961,
      "step": 164583
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.248912811279297,
      "learning_rate": 0.00011291970940336685,
      "loss": 2.8785,
      "step": 164584
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.1979148387908936,
      "learning_rate": 0.00011291651165316256,
      "loss": 2.8856,
      "step": 164585
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.193093776702881,
      "learning_rate": 0.00011291331393774071,
      "loss": 2.9406,
      "step": 164586
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.391295909881592,
      "learning_rate": 0.00011291011625710179,
      "loss": 2.8098,
      "step": 164587
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.304107189178467,
      "learning_rate": 0.00011290691861124632,
      "loss": 2.7814,
      "step": 164588
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.38730525970459,
      "learning_rate": 0.00011290372100017509,
      "loss": 3.1199,
      "step": 164589
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.63328218460083,
      "learning_rate": 0.00011290052342388858,
      "loss": 3.1768,
      "step": 164590
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.414808988571167,
      "learning_rate": 0.0001128973258823873,
      "loss": 2.8426,
      "step": 164591
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3806588649749756,
      "learning_rate": 0.00011289412837567204,
      "loss": 2.9051,
      "step": 164592
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2239928245544434,
      "learning_rate": 0.00011289093090374321,
      "loss": 2.9675,
      "step": 164593
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2637617588043213,
      "learning_rate": 0.00011288773346660155,
      "loss": 2.9838,
      "step": 164594
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8307197093963623,
      "learning_rate": 0.00011288453606424764,
      "loss": 3.0781,
      "step": 164595
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4923360347747803,
      "learning_rate": 0.000112881338696682,
      "loss": 3.1042,
      "step": 164596
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.550915002822876,
      "learning_rate": 0.00011287814136390512,
      "loss": 2.736,
      "step": 164597
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.3904011249542236,
      "learning_rate": 0.00011287494406591788,
      "loss": 2.725,
      "step": 164598
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.604055404663086,
      "learning_rate": 0.00011287174680272059,
      "loss": 2.9759,
      "step": 164599
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3112540245056152,
      "learning_rate": 0.0001128685495743141,
      "loss": 2.8363,
      "step": 164600
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.2721402645111084,
      "learning_rate": 0.00011286535238069874,
      "loss": 2.9962,
      "step": 164601
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5366575717926025,
      "learning_rate": 0.00011286215522187545,
      "loss": 2.9602,
      "step": 164602
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.232229471206665,
      "learning_rate": 0.00011285895809784444,
      "loss": 2.9668,
      "step": 164603
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2306950092315674,
      "learning_rate": 0.00011285576100860658,
      "loss": 3.0707,
      "step": 164604
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8457114696502686,
      "learning_rate": 0.00011285256395416227,
      "loss": 3.2308,
      "step": 164605
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.285768508911133,
      "learning_rate": 0.00011284936693451229,
      "loss": 3.1232,
      "step": 164606
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.39687442779541,
      "learning_rate": 0.00011284616994965705,
      "loss": 2.7554,
      "step": 164607
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.917433261871338,
      "learning_rate": 0.00011284297299959743,
      "loss": 3.0915,
      "step": 164608
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.569556474685669,
      "learning_rate": 0.00011283977608433365,
      "loss": 2.7553,
      "step": 164609
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4198248386383057,
      "learning_rate": 0.00011283657920386657,
      "loss": 2.887,
      "step": 164610
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.468528985977173,
      "learning_rate": 0.00011283338235819661,
      "loss": 2.8902,
      "step": 164611
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6457712650299072,
      "learning_rate": 0.00011283018554732456,
      "loss": 3.0485,
      "step": 164612
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9440958499908447,
      "learning_rate": 0.00011282698877125081,
      "loss": 2.9438,
      "step": 164613
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.900165319442749,
      "learning_rate": 0.00011282379202997618,
      "loss": 3.0432,
      "step": 164614
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.080029010772705,
      "learning_rate": 0.00011282059532350112,
      "loss": 2.6983,
      "step": 164615
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2923500537872314,
      "learning_rate": 0.00011281739865182622,
      "loss": 3.1625,
      "step": 164616
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.338353395462036,
      "learning_rate": 0.00011281420201495202,
      "loss": 3.0242,
      "step": 164617
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.457376003265381,
      "learning_rate": 0.0001128110054128793,
      "loss": 2.7997,
      "step": 164618
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5241057872772217,
      "learning_rate": 0.00011280780884560846,
      "loss": 2.9343,
      "step": 164619
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3724777698516846,
      "learning_rate": 0.00011280461231314024,
      "loss": 3.0502,
      "step": 164620
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.29571270942688,
      "learning_rate": 0.00011280141581547521,
      "loss": 2.9539,
      "step": 164621
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.311922788619995,
      "learning_rate": 0.00011279821935261391,
      "loss": 2.7411,
      "step": 164622
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5587801933288574,
      "learning_rate": 0.00011279502292455687,
      "loss": 3.0113,
      "step": 164623
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5581798553466797,
      "learning_rate": 0.00011279182653130486,
      "loss": 2.9123,
      "step": 164624
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9963114261627197,
      "learning_rate": 0.0001127886301728583,
      "loss": 3.1437,
      "step": 164625
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.523077964782715,
      "learning_rate": 0.00011278543384921794,
      "loss": 2.7123,
      "step": 164626
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9317407608032227,
      "learning_rate": 0.00011278223756038433,
      "loss": 3.1581,
      "step": 164627
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.607585906982422,
      "learning_rate": 0.00011277904130635802,
      "loss": 3.0582,
      "step": 164628
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6872541904449463,
      "learning_rate": 0.00011277584508713953,
      "loss": 3.0833,
      "step": 164629
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.431884765625,
      "learning_rate": 0.00011277264890272966,
      "loss": 3.086,
      "step": 164630
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.628167152404785,
      "learning_rate": 0.00011276945275312875,
      "loss": 2.8183,
      "step": 164631
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.78222918510437,
      "learning_rate": 0.00011276625663833765,
      "loss": 2.9762,
      "step": 164632
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3871772289276123,
      "learning_rate": 0.00011276306055835685,
      "loss": 2.986,
      "step": 164633
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4193227291107178,
      "learning_rate": 0.00011275986451318689,
      "loss": 3.1805,
      "step": 164634
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2162489891052246,
      "learning_rate": 0.00011275666850282836,
      "loss": 3.1305,
      "step": 164635
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.232229471206665,
      "learning_rate": 0.00011275347252728195,
      "loss": 2.9444,
      "step": 164636
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.046123504638672,
      "learning_rate": 0.00011275027658654813,
      "loss": 2.8886,
      "step": 164637
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6725950241088867,
      "learning_rate": 0.00011274708068062769,
      "loss": 3.043,
      "step": 164638
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2945334911346436,
      "learning_rate": 0.00011274388480952106,
      "loss": 3.0206,
      "step": 164639
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.567655086517334,
      "learning_rate": 0.0001127406889732289,
      "loss": 2.8307,
      "step": 164640
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.809168815612793,
      "learning_rate": 0.00011273749317175167,
      "loss": 3.1006,
      "step": 164641
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.6199793815612793,
      "learning_rate": 0.00011273429740509017,
      "loss": 3.0051,
      "step": 164642
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3072350025177,
      "learning_rate": 0.00011273110167324483,
      "loss": 3.0381,
      "step": 164643
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3995766639709473,
      "learning_rate": 0.00011272790597621639,
      "loss": 2.7647,
      "step": 164644
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.340294361114502,
      "learning_rate": 0.00011272471031400537,
      "loss": 2.7304,
      "step": 164645
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.3693997859954834,
      "learning_rate": 0.00011272151468661237,
      "loss": 3.0002,
      "step": 164646
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.659442186355591,
      "learning_rate": 0.00011271831909403784,
      "loss": 3.0246,
      "step": 164647
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.3761274814605713,
      "learning_rate": 0.00011271512353628267,
      "loss": 2.9358,
      "step": 164648
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.048729658126831,
      "learning_rate": 0.00011271192801334714,
      "loss": 3.0095,
      "step": 164649
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.139000415802002,
      "learning_rate": 0.00011270873252523213,
      "loss": 3.0033,
      "step": 164650
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.610291957855225,
      "learning_rate": 0.0001127055370719381,
      "loss": 2.8492,
      "step": 164651
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.558730125427246,
      "learning_rate": 0.00011270234165346563,
      "loss": 2.7855,
      "step": 164652
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.044999837875366,
      "learning_rate": 0.00011269914626981522,
      "loss": 2.7878,
      "step": 164653
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5496444702148438,
      "learning_rate": 0.00011269595092098772,
      "loss": 2.995,
      "step": 164654
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.3746352195739746,
      "learning_rate": 0.00011269275560698343,
      "loss": 3.1318,
      "step": 164655
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.989140033721924,
      "learning_rate": 0.00011268956032780322,
      "loss": 2.8399,
      "step": 164656
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.8684163093566895,
      "learning_rate": 0.00011268636508344755,
      "loss": 2.8818,
      "step": 164657
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.028078079223633,
      "learning_rate": 0.00011268316987391702,
      "loss": 2.9943,
      "step": 164658
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.72452974319458,
      "learning_rate": 0.0001126799746992121,
      "loss": 2.8295,
      "step": 164659
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.663050651550293,
      "learning_rate": 0.00011267677955933366,
      "loss": 3.1678,
      "step": 164660
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.1577248573303223,
      "learning_rate": 0.00011267358445428199,
      "loss": 3.0202,
      "step": 164661
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.387001037597656,
      "learning_rate": 0.00011267038938405797,
      "loss": 2.6961,
      "step": 164662
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.665942668914795,
      "learning_rate": 0.00011266719434866205,
      "loss": 2.8519,
      "step": 164663
    },
    {
      "epoch": 2.14,
      "grad_norm": 1.86536705493927,
      "learning_rate": 0.00011266399934809473,
      "loss": 2.8416,
      "step": 164664
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9925293922424316,
      "learning_rate": 0.00011266080438235683,
      "loss": 3.1441,
      "step": 164665
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.2523868083953857,
      "learning_rate": 0.00011265760945144882,
      "loss": 3.0214,
      "step": 164666
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.3086814880371094,
      "learning_rate": 0.00011265441455537116,
      "loss": 2.8172,
      "step": 164667
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.714744806289673,
      "learning_rate": 0.00011265121969412474,
      "loss": 3.0497,
      "step": 164668
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.029573440551758,
      "learning_rate": 0.00011264802486770995,
      "loss": 2.9862,
      "step": 164669
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.5940802097320557,
      "learning_rate": 0.00011264483007612733,
      "loss": 3.0113,
      "step": 164670
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.802809476852417,
      "learning_rate": 0.0001126416353193777,
      "loss": 3.1092,
      "step": 164671
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7005324363708496,
      "learning_rate": 0.00011263844059746151,
      "loss": 3.1211,
      "step": 164672
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.319530725479126,
      "learning_rate": 0.00011263524591037927,
      "loss": 2.8609,
      "step": 164673
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.632960081100464,
      "learning_rate": 0.00011263205125813176,
      "loss": 2.9863,
      "step": 164674
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.216123342514038,
      "learning_rate": 0.00011262885664071952,
      "loss": 2.838,
      "step": 164675
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2043776512145996,
      "learning_rate": 0.00011262566205814298,
      "loss": 2.9997,
      "step": 164676
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.373274326324463,
      "learning_rate": 0.00011262246751040298,
      "loss": 2.9395,
      "step": 164677
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.2214467525482178,
      "learning_rate": 0.0001126192729974999,
      "loss": 3.1081,
      "step": 164678
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4828810691833496,
      "learning_rate": 0.00011261607851943453,
      "loss": 2.634,
      "step": 164679
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.662942409515381,
      "learning_rate": 0.0001126128840762074,
      "loss": 2.7504,
      "step": 164680
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.024855852127075,
      "learning_rate": 0.00011260968966781893,
      "loss": 2.8258,
      "step": 164681
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.182387351989746,
      "learning_rate": 0.00011260649529426998,
      "loss": 2.826,
      "step": 164682
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.859999895095825,
      "learning_rate": 0.000112603300955561,
      "loss": 3.0814,
      "step": 164683
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2875959873199463,
      "learning_rate": 0.00011260010665169252,
      "loss": 2.874,
      "step": 164684
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.7280430793762207,
      "learning_rate": 0.0001125969123826653,
      "loss": 3.1694,
      "step": 164685
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.850349426269531,
      "learning_rate": 0.00011259371814847989,
      "loss": 2.9392,
      "step": 164686
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.7280077934265137,
      "learning_rate": 0.00011259052394913673,
      "loss": 3.0156,
      "step": 164687
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.1637473106384277,
      "learning_rate": 0.0001125873297846366,
      "loss": 2.7715,
      "step": 164688
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8984951972961426,
      "learning_rate": 0.00011258413565498005,
      "loss": 3.1879,
      "step": 164689
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.896350145339966,
      "learning_rate": 0.00011258094156016754,
      "loss": 2.9699,
      "step": 164690
    },
    {
      "epoch": 2.14,
      "grad_norm": 5.036806106567383,
      "learning_rate": 0.00011257774750019987,
      "loss": 3.2412,
      "step": 164691
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.3021347522735596,
      "learning_rate": 0.00011257455347507741,
      "loss": 2.9945,
      "step": 164692
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4049389362335205,
      "learning_rate": 0.00011257135948480102,
      "loss": 2.7807,
      "step": 164693
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.2801177501678467,
      "learning_rate": 0.00011256816552937114,
      "loss": 3.0376,
      "step": 164694
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.129804849624634,
      "learning_rate": 0.00011256497160878837,
      "loss": 2.7861,
      "step": 164695
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.5205681324005127,
      "learning_rate": 0.00011256177772305322,
      "loss": 3.2428,
      "step": 164696
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.027306318283081,
      "learning_rate": 0.00011255858387216645,
      "loss": 2.7995,
      "step": 164697
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9418163299560547,
      "learning_rate": 0.00011255539005612852,
      "loss": 2.9501,
      "step": 164698
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9428842067718506,
      "learning_rate": 0.00011255219627494014,
      "loss": 3.0431,
      "step": 164699
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0363717079162598,
      "learning_rate": 0.00011254900252860188,
      "loss": 2.8511,
      "step": 164700
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.2804036140441895,
      "learning_rate": 0.00011254580881711426,
      "loss": 3.0245,
      "step": 164701
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.5788331031799316,
      "learning_rate": 0.00011254261514047782,
      "loss": 3.1078,
      "step": 164702
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.038259983062744,
      "learning_rate": 0.00011253942149869336,
      "loss": 3.0482,
      "step": 164703
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.727442502975464,
      "learning_rate": 0.00011253622789176126,
      "loss": 3.0137,
      "step": 164704
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4734902381896973,
      "learning_rate": 0.0001125330343196823,
      "loss": 3.0804,
      "step": 164705
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.8268203735351562,
      "learning_rate": 0.00011252984078245701,
      "loss": 2.8785,
      "step": 164706
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.298614740371704,
      "learning_rate": 0.00011252664728008596,
      "loss": 2.7428,
      "step": 164707
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.2158377170562744,
      "learning_rate": 0.00011252345381256963,
      "loss": 3.0091,
      "step": 164708
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.1819007396698,
      "learning_rate": 0.00011252026037990882,
      "loss": 2.8071,
      "step": 164709
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.719144821166992,
      "learning_rate": 0.00011251706698210397,
      "loss": 2.8437,
      "step": 164710
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5029916763305664,
      "learning_rate": 0.00011251387361915579,
      "loss": 3.1516,
      "step": 164711
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.808875799179077,
      "learning_rate": 0.00011251068029106483,
      "loss": 2.88,
      "step": 164712
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.7826662063598633,
      "learning_rate": 0.0001125074869978317,
      "loss": 3.0316,
      "step": 164713
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.75619649887085,
      "learning_rate": 0.00011250429373945688,
      "loss": 3.0099,
      "step": 164714
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.528522491455078,
      "learning_rate": 0.0001125011005159411,
      "loss": 2.9414,
      "step": 164715
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.117156505584717,
      "learning_rate": 0.00011249790732728484,
      "loss": 3.0803,
      "step": 164716
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.52641224861145,
      "learning_rate": 0.00011249471417348883,
      "loss": 3.2444,
      "step": 164717
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.4531819820404053,
      "learning_rate": 0.00011249152105455363,
      "loss": 2.9088,
      "step": 164718
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.5920650959014893,
      "learning_rate": 0.00011248832797047979,
      "loss": 3.0677,
      "step": 164719
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.075746536254883,
      "learning_rate": 0.00011248513492126778,
      "loss": 2.7236,
      "step": 164720
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5088412761688232,
      "learning_rate": 0.00011248194190691842,
      "loss": 2.8683,
      "step": 164721
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.757206916809082,
      "learning_rate": 0.00011247874892743214,
      "loss": 2.9169,
      "step": 164722
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6330692768096924,
      "learning_rate": 0.00011247555598280969,
      "loss": 2.8474,
      "step": 164723
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.771205186843872,
      "learning_rate": 0.00011247236307305157,
      "loss": 2.9786,
      "step": 164724
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.9001874923706055,
      "learning_rate": 0.00011246917019815839,
      "loss": 2.8446,
      "step": 164725
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5488948822021484,
      "learning_rate": 0.00011246597735813059,
      "loss": 2.9606,
      "step": 164726
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.736729145050049,
      "learning_rate": 0.00011246278455296903,
      "loss": 3.0072,
      "step": 164727
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.461571216583252,
      "learning_rate": 0.00011245959178267408,
      "loss": 3.3225,
      "step": 164728
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.5637753009796143,
      "learning_rate": 0.00011245639904724655,
      "loss": 2.8024,
      "step": 164729
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.1539418697357178,
      "learning_rate": 0.00011245320634668689,
      "loss": 2.8835,
      "step": 164730
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.642343521118164,
      "learning_rate": 0.00011245001368099573,
      "loss": 3.2645,
      "step": 164731
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.7992947101593018,
      "learning_rate": 0.00011244682105017356,
      "loss": 2.6402,
      "step": 164732
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.655776023864746,
      "learning_rate": 0.00011244362845422115,
      "loss": 2.9251,
      "step": 164733
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.60876202583313,
      "learning_rate": 0.00011244043589313891,
      "loss": 2.7682,
      "step": 164734
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.362260580062866,
      "learning_rate": 0.00011243724336692765,
      "loss": 2.9315,
      "step": 164735
    },
    {
      "epoch": 2.14,
      "grad_norm": 2.6234781742095947,
      "learning_rate": 0.00011243405087558783,
      "loss": 2.9619,
      "step": 164736
    },
    {
      "epoch": 2.14,
      "grad_norm": 4.150674343109131,
      "learning_rate": 0.00011243085841912008,
      "loss": 2.8642,
      "step": 164737
    },
    {
      "epoch": 2.14,
      "grad_norm": 3.0120296478271484,
      "learning_rate": 0.00011242766599752484,
      "loss": 2.9495,
      "step": 164738
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.194169521331787,
      "learning_rate": 0.00011242447361080297,
      "loss": 2.8511,
      "step": 164739
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.399543523788452,
      "learning_rate": 0.00011242128125895483,
      "loss": 2.7556,
      "step": 164740
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.717203140258789,
      "learning_rate": 0.0001124180889419812,
      "loss": 2.7053,
      "step": 164741
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.379054069519043,
      "learning_rate": 0.0001124148966598826,
      "loss": 3.0676,
      "step": 164742
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.3157999515533447,
      "learning_rate": 0.00011241170441265961,
      "loss": 2.825,
      "step": 164743
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.397073745727539,
      "learning_rate": 0.00011240851220031272,
      "loss": 3.0538,
      "step": 164744
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3517773151397705,
      "learning_rate": 0.00011240532002284273,
      "loss": 2.8772,
      "step": 164745
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.9336860179901123,
      "learning_rate": 0.00011240212788025001,
      "loss": 2.9537,
      "step": 164746
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.827941656112671,
      "learning_rate": 0.00011239893577253542,
      "loss": 2.6911,
      "step": 164747
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.720128059387207,
      "learning_rate": 0.00011239574369969926,
      "loss": 2.9036,
      "step": 164748
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.393354892730713,
      "learning_rate": 0.00011239255166174251,
      "loss": 2.9544,
      "step": 164749
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.671205520629883,
      "learning_rate": 0.00011238935965866531,
      "loss": 2.796,
      "step": 164750
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.261157274246216,
      "learning_rate": 0.00011238616769046858,
      "loss": 3.0046,
      "step": 164751
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.4874229431152344,
      "learning_rate": 0.0001123829757571527,
      "loss": 2.8676,
      "step": 164752
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.110692262649536,
      "learning_rate": 0.00011237978385871847,
      "loss": 2.9628,
      "step": 164753
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.310487747192383,
      "learning_rate": 0.00011237659199516626,
      "loss": 2.869,
      "step": 164754
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.674673557281494,
      "learning_rate": 0.00011237340016649689,
      "loss": 3.0976,
      "step": 164755
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.492933750152588,
      "learning_rate": 0.00011237020837271086,
      "loss": 2.8554,
      "step": 164756
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.468454599380493,
      "learning_rate": 0.00011236701661380875,
      "loss": 2.8816,
      "step": 164757
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4132235050201416,
      "learning_rate": 0.00011236382488979103,
      "loss": 2.9292,
      "step": 164758
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4359638690948486,
      "learning_rate": 0.00011236063320065855,
      "loss": 3.0535,
      "step": 164759
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.0648231506347656,
      "learning_rate": 0.00011235744154641165,
      "loss": 3.0371,
      "step": 164760
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.408416509628296,
      "learning_rate": 0.00011235424992705116,
      "loss": 3.1418,
      "step": 164761
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.1981992721557617,
      "learning_rate": 0.00011235105834257751,
      "loss": 2.9504,
      "step": 164762
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.094386339187622,
      "learning_rate": 0.00011234786679299127,
      "loss": 2.9035,
      "step": 164763
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.589815855026245,
      "learning_rate": 0.00011234467527829321,
      "loss": 2.8138,
      "step": 164764
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.2115635871887207,
      "learning_rate": 0.00011234148379848382,
      "loss": 3.0598,
      "step": 164765
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.173875331878662,
      "learning_rate": 0.00011233829235356358,
      "loss": 3.2811,
      "step": 164766
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3846287727355957,
      "learning_rate": 0.0001123351009435333,
      "loss": 2.9416,
      "step": 164767
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.989462375640869,
      "learning_rate": 0.00011233190956839348,
      "loss": 3.0925,
      "step": 164768
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.411316394805908,
      "learning_rate": 0.00011232871822814459,
      "loss": 3.0092,
      "step": 164769
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5058605670928955,
      "learning_rate": 0.00011232552692278745,
      "loss": 3.1103,
      "step": 164770
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2641398906707764,
      "learning_rate": 0.00011232233565232249,
      "loss": 3.2292,
      "step": 164771
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2207114696502686,
      "learning_rate": 0.0001123191444167503,
      "loss": 3.0722,
      "step": 164772
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.3989174365997314,
      "learning_rate": 0.00011231595321607158,
      "loss": 3.0042,
      "step": 164773
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3980815410614014,
      "learning_rate": 0.00011231276205028689,
      "loss": 2.8368,
      "step": 164774
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.229921817779541,
      "learning_rate": 0.00011230957091939669,
      "loss": 2.7967,
      "step": 164775
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.4972736835479736,
      "learning_rate": 0.00011230637982340182,
      "loss": 3.0781,
      "step": 164776
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.9279463291168213,
      "learning_rate": 0.00011230318876230258,
      "loss": 2.9386,
      "step": 164777
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.158815383911133,
      "learning_rate": 0.00011229999773609988,
      "loss": 3.0813,
      "step": 164778
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.0555460453033447,
      "learning_rate": 0.00011229680674479412,
      "loss": 2.685,
      "step": 164779
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.2731211185455322,
      "learning_rate": 0.00011229361578838594,
      "loss": 2.9722,
      "step": 164780
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.021883726119995,
      "learning_rate": 0.0001122904248668758,
      "loss": 3.1491,
      "step": 164781
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.437079906463623,
      "learning_rate": 0.00011228723398026453,
      "loss": 2.6762,
      "step": 164782
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8933234214782715,
      "learning_rate": 0.0001122840431285525,
      "loss": 2.8897,
      "step": 164783
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6727066040039062,
      "learning_rate": 0.0001122808523117405,
      "loss": 2.8355,
      "step": 164784
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.190768241882324,
      "learning_rate": 0.00011227766152982904,
      "loss": 3.0109,
      "step": 164785
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.1059093475341797,
      "learning_rate": 0.00011227447078281874,
      "loss": 3.1121,
      "step": 164786
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.715358257293701,
      "learning_rate": 0.00011227128007071001,
      "loss": 3.0799,
      "step": 164787
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6923904418945312,
      "learning_rate": 0.0001122680893935037,
      "loss": 2.8617,
      "step": 164788
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.1910603046417236,
      "learning_rate": 0.00011226489875120023,
      "loss": 3.1753,
      "step": 164789
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.4331934452056885,
      "learning_rate": 0.00011226170814380033,
      "loss": 3.2401,
      "step": 164790
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.0209009647369385,
      "learning_rate": 0.00011225851757130452,
      "loss": 2.8744,
      "step": 164791
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.452815294265747,
      "learning_rate": 0.00011225532703371343,
      "loss": 3.0034,
      "step": 164792
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.201240062713623,
      "learning_rate": 0.00011225213653102749,
      "loss": 3.0386,
      "step": 164793
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.2805824279785156,
      "learning_rate": 0.00011224894606324752,
      "loss": 2.8258,
      "step": 164794
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.351189613342285,
      "learning_rate": 0.00011224575563037393,
      "loss": 2.99,
      "step": 164795
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.716271162033081,
      "learning_rate": 0.00011224256523240748,
      "loss": 2.9674,
      "step": 164796
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.194603443145752,
      "learning_rate": 0.00011223937486934868,
      "loss": 3.0767,
      "step": 164797
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4771006107330322,
      "learning_rate": 0.00011223618454119815,
      "loss": 3.058,
      "step": 164798
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.0397276878356934,
      "learning_rate": 0.00011223299424795634,
      "loss": 3.0244,
      "step": 164799
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6007189750671387,
      "learning_rate": 0.00011222980398962406,
      "loss": 2.8828,
      "step": 164800
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.591343879699707,
      "learning_rate": 0.00011222661376620169,
      "loss": 2.8615,
      "step": 164801
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.416581630706787,
      "learning_rate": 0.00011222342357769007,
      "loss": 2.8166,
      "step": 164802
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4391345977783203,
      "learning_rate": 0.00011222023342408965,
      "loss": 2.8964,
      "step": 164803
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.39890456199646,
      "learning_rate": 0.00011221704330540103,
      "loss": 3.0737,
      "step": 164804
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5750982761383057,
      "learning_rate": 0.00011221385322162469,
      "loss": 3.061,
      "step": 164805
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2272706031799316,
      "learning_rate": 0.00011221066317276145,
      "loss": 2.836,
      "step": 164806
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.2445504665374756,
      "learning_rate": 0.0001122074731588117,
      "loss": 3.0386,
      "step": 164807
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6954684257507324,
      "learning_rate": 0.00011220428317977622,
      "loss": 3.0968,
      "step": 164808
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3573362827301025,
      "learning_rate": 0.00011220109323565553,
      "loss": 2.8534,
      "step": 164809
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3763606548309326,
      "learning_rate": 0.00011219790332645018,
      "loss": 2.9912,
      "step": 164810
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.738189220428467,
      "learning_rate": 0.00011219471345216066,
      "loss": 2.9601,
      "step": 164811
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.8554818630218506,
      "learning_rate": 0.00011219152361278782,
      "loss": 2.9041,
      "step": 164812
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.447105884552002,
      "learning_rate": 0.00011218833380833203,
      "loss": 3.1248,
      "step": 164813
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2817540168762207,
      "learning_rate": 0.00011218514403879405,
      "loss": 3.0534,
      "step": 164814
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.310572862625122,
      "learning_rate": 0.00011218195430417431,
      "loss": 2.9979,
      "step": 164815
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.649066925048828,
      "learning_rate": 0.00011217876460447368,
      "loss": 2.8898,
      "step": 164816
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.291524648666382,
      "learning_rate": 0.00011217557493969237,
      "loss": 2.9924,
      "step": 164817
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4862706661224365,
      "learning_rate": 0.00011217238530983128,
      "loss": 3.2654,
      "step": 164818
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.1721837520599365,
      "learning_rate": 0.00011216919571489075,
      "loss": 3.0805,
      "step": 164819
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5250585079193115,
      "learning_rate": 0.00011216600615487165,
      "loss": 3.0414,
      "step": 164820
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.67441725730896,
      "learning_rate": 0.00011216281662977432,
      "loss": 2.9939,
      "step": 164821
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.4482452869415283,
      "learning_rate": 0.00011215962713959967,
      "loss": 3.0762,
      "step": 164822
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.435084819793701,
      "learning_rate": 0.0001121564376843479,
      "loss": 2.9119,
      "step": 164823
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5168285369873047,
      "learning_rate": 0.00011215324826401985,
      "loss": 2.9181,
      "step": 164824
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2458856105804443,
      "learning_rate": 0.00011215005887861601,
      "loss": 2.8378,
      "step": 164825
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.328242540359497,
      "learning_rate": 0.0001121468695281371,
      "loss": 2.9475,
      "step": 164826
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.32875394821167,
      "learning_rate": 0.00011214368021258354,
      "loss": 2.9687,
      "step": 164827
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.312635898590088,
      "learning_rate": 0.00011214049093195621,
      "loss": 2.7292,
      "step": 164828
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6478943824768066,
      "learning_rate": 0.00011213730168625532,
      "loss": 2.9673,
      "step": 164829
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.800034523010254,
      "learning_rate": 0.00011213411247548172,
      "loss": 3.1584,
      "step": 164830
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4674410820007324,
      "learning_rate": 0.00011213092329963588,
      "loss": 2.7572,
      "step": 164831
    },
    {
      "epoch": 2.15,
      "grad_norm": 5.180713176727295,
      "learning_rate": 0.0001121277341587185,
      "loss": 3.0093,
      "step": 164832
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7294914722442627,
      "learning_rate": 0.00011212454505273004,
      "loss": 3.0755,
      "step": 164833
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.3621158599853516,
      "learning_rate": 0.00011212135598167138,
      "loss": 2.8613,
      "step": 164834
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.66654372215271,
      "learning_rate": 0.0001121181669455427,
      "loss": 2.8129,
      "step": 164835
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6089534759521484,
      "learning_rate": 0.00011211497794434493,
      "loss": 2.9681,
      "step": 164836
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.620041847229004,
      "learning_rate": 0.00011211178897807841,
      "loss": 3.0192,
      "step": 164837
    },
    {
      "epoch": 2.15,
      "grad_norm": 5.0828938484191895,
      "learning_rate": 0.00011210860004674395,
      "loss": 2.7541,
      "step": 164838
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.549471616744995,
      "learning_rate": 0.00011210541115034193,
      "loss": 2.9181,
      "step": 164839
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5812761783599854,
      "learning_rate": 0.00011210222228887319,
      "loss": 3.2275,
      "step": 164840
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.153205394744873,
      "learning_rate": 0.00011209903346233819,
      "loss": 3.2154,
      "step": 164841
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.451598644256592,
      "learning_rate": 0.00011209584467073752,
      "loss": 3.158,
      "step": 164842
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.050008535385132,
      "learning_rate": 0.00011209265591407167,
      "loss": 2.8918,
      "step": 164843
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5499978065490723,
      "learning_rate": 0.00011208946719234144,
      "loss": 2.8904,
      "step": 164844
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5702669620513916,
      "learning_rate": 0.00011208627850554723,
      "loss": 2.7433,
      "step": 164845
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7839841842651367,
      "learning_rate": 0.00011208308985368984,
      "loss": 3.1481,
      "step": 164846
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7563416957855225,
      "learning_rate": 0.00011207990123676974,
      "loss": 2.9658,
      "step": 164847
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.183647632598877,
      "learning_rate": 0.00011207671265478756,
      "loss": 2.9326,
      "step": 164848
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5777080059051514,
      "learning_rate": 0.00011207352410774373,
      "loss": 3.2592,
      "step": 164849
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.46773624420166,
      "learning_rate": 0.00011207033559563912,
      "loss": 2.7521,
      "step": 164850
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4678330421447754,
      "learning_rate": 0.00011206714711847406,
      "loss": 2.8001,
      "step": 164851
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.1903738975524902,
      "learning_rate": 0.00011206395867624935,
      "loss": 2.8437,
      "step": 164852
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.580639123916626,
      "learning_rate": 0.00011206077026896552,
      "loss": 2.8788,
      "step": 164853
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.791733980178833,
      "learning_rate": 0.00011205758189662301,
      "loss": 2.8827,
      "step": 164854
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.9865663051605225,
      "learning_rate": 0.00011205439355922268,
      "loss": 2.9573,
      "step": 164855
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.1236941814422607,
      "learning_rate": 0.00011205120525676497,
      "loss": 3.0598,
      "step": 164856
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2233636379241943,
      "learning_rate": 0.00011204801698925036,
      "loss": 2.9665,
      "step": 164857
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.896610975265503,
      "learning_rate": 0.00011204482875667971,
      "loss": 2.8315,
      "step": 164858
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.441145420074463,
      "learning_rate": 0.00011204164055905348,
      "loss": 3.2159,
      "step": 164859
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.339094638824463,
      "learning_rate": 0.00011203845239637213,
      "loss": 2.9463,
      "step": 164860
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.242846727371216,
      "learning_rate": 0.00011203526426863648,
      "loss": 2.8177,
      "step": 164861
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.780932903289795,
      "learning_rate": 0.00011203207617584695,
      "loss": 2.8839,
      "step": 164862
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.5794365406036377,
      "learning_rate": 0.00011202888811800426,
      "loss": 2.8725,
      "step": 164863
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.611816883087158,
      "learning_rate": 0.000112025700095109,
      "loss": 3.0002,
      "step": 164864
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.939971923828125,
      "learning_rate": 0.0001120225121071617,
      "loss": 2.9314,
      "step": 164865
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.683837413787842,
      "learning_rate": 0.00011201932415416282,
      "loss": 3.1484,
      "step": 164866
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.0296976566314697,
      "learning_rate": 0.00011201613623611324,
      "loss": 2.867,
      "step": 164867
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.4198644161224365,
      "learning_rate": 0.00011201294835301328,
      "loss": 3.035,
      "step": 164868
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.982656240463257,
      "learning_rate": 0.00011200976050486379,
      "loss": 3.0014,
      "step": 164869
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3067786693573,
      "learning_rate": 0.0001120065726916652,
      "loss": 2.9061,
      "step": 164870
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.351612091064453,
      "learning_rate": 0.00011200338491341817,
      "loss": 2.7625,
      "step": 164871
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.9860095977783203,
      "learning_rate": 0.00011200019717012314,
      "loss": 2.7981,
      "step": 164872
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3573899269104004,
      "learning_rate": 0.00011199700946178095,
      "loss": 3.0059,
      "step": 164873
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.621688365936279,
      "learning_rate": 0.00011199382178839193,
      "loss": 2.764,
      "step": 164874
    },
    {
      "epoch": 2.15,
      "grad_norm": 5.386307716369629,
      "learning_rate": 0.00011199063414995693,
      "loss": 2.8351,
      "step": 164875
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.889191150665283,
      "learning_rate": 0.00011198744654647643,
      "loss": 2.8746,
      "step": 164876
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8500452041625977,
      "learning_rate": 0.00011198425897795096,
      "loss": 2.7622,
      "step": 164877
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.961791753768921,
      "learning_rate": 0.0001119810714443811,
      "loss": 3.0125,
      "step": 164878
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.998382568359375,
      "learning_rate": 0.0001119778839457676,
      "loss": 3.0655,
      "step": 164879
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.0059571266174316,
      "learning_rate": 0.0001119746964821109,
      "loss": 2.9488,
      "step": 164880
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.103632926940918,
      "learning_rate": 0.00011197150905341172,
      "loss": 2.9752,
      "step": 164881
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.750305414199829,
      "learning_rate": 0.00011196832165967047,
      "loss": 3.1418,
      "step": 164882
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.874875545501709,
      "learning_rate": 0.0001119651343008881,
      "loss": 3.0003,
      "step": 164883
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.0735719203948975,
      "learning_rate": 0.00011196194697706472,
      "loss": 2.8953,
      "step": 164884
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.301776885986328,
      "learning_rate": 0.00011195875968820128,
      "loss": 2.8255,
      "step": 164885
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.551240921020508,
      "learning_rate": 0.00011195557243429815,
      "loss": 2.8105,
      "step": 164886
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.278543472290039,
      "learning_rate": 0.00011195238521535614,
      "loss": 3.1085,
      "step": 164887
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.610639810562134,
      "learning_rate": 0.00011194919803137561,
      "loss": 2.6474,
      "step": 164888
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.7264344692230225,
      "learning_rate": 0.0001119460108823575,
      "loss": 2.5194,
      "step": 164889
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4734508991241455,
      "learning_rate": 0.00011194282376830196,
      "loss": 3.0234,
      "step": 164890
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5119147300720215,
      "learning_rate": 0.0001119396366892099,
      "loss": 2.9448,
      "step": 164891
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.1674163341522217,
      "learning_rate": 0.00011193644964508173,
      "loss": 3.2154,
      "step": 164892
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.3683114051818848,
      "learning_rate": 0.00011193326263591818,
      "loss": 2.908,
      "step": 164893
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.602034330368042,
      "learning_rate": 0.00011193007566171975,
      "loss": 2.9345,
      "step": 164894
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.107351303100586,
      "learning_rate": 0.00011192688872248723,
      "loss": 2.9477,
      "step": 164895
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6539018154144287,
      "learning_rate": 0.00011192370181822084,
      "loss": 2.7572,
      "step": 164896
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.823669910430908,
      "learning_rate": 0.00011192051494892153,
      "loss": 3.104,
      "step": 164897
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.269296407699585,
      "learning_rate": 0.0001119173281145896,
      "loss": 3.2075,
      "step": 164898
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6723318099975586,
      "learning_rate": 0.0001119141413152259,
      "loss": 2.7697,
      "step": 164899
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.083026170730591,
      "learning_rate": 0.00011191095455083084,
      "loss": 2.8824,
      "step": 164900
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5531179904937744,
      "learning_rate": 0.00011190776782140526,
      "loss": 2.8462,
      "step": 164901
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5617711544036865,
      "learning_rate": 0.00011190458112694938,
      "loss": 2.9985,
      "step": 164902
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.1219942569732666,
      "learning_rate": 0.00011190139446746406,
      "loss": 3.0521,
      "step": 164903
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.395745754241943,
      "learning_rate": 0.00011189820784294977,
      "loss": 3.0069,
      "step": 164904
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.6915571689605713,
      "learning_rate": 0.00011189502125340722,
      "loss": 2.9333,
      "step": 164905
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.9149863719940186,
      "learning_rate": 0.00011189183469883684,
      "loss": 2.9017,
      "step": 164906
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.024773120880127,
      "learning_rate": 0.00011188864817923955,
      "loss": 2.7441,
      "step": 164907
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.222080945968628,
      "learning_rate": 0.00011188546169461546,
      "loss": 3.1227,
      "step": 164908
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.3675971031188965,
      "learning_rate": 0.00011188227524496556,
      "loss": 2.9593,
      "step": 164909
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.249486446380615,
      "learning_rate": 0.00011187908883029017,
      "loss": 3.0909,
      "step": 164910
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8900537490844727,
      "learning_rate": 0.00011187590245059011,
      "loss": 2.8901,
      "step": 164911
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.52053165435791,
      "learning_rate": 0.00011187271610586578,
      "loss": 2.9215,
      "step": 164912
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2501885890960693,
      "learning_rate": 0.00011186952979611809,
      "loss": 2.7491,
      "step": 164913
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.138451099395752,
      "learning_rate": 0.00011186634352134717,
      "loss": 3.1723,
      "step": 164914
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.9058449268341064,
      "learning_rate": 0.00011186315728155396,
      "loss": 2.867,
      "step": 164915
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.872201919555664,
      "learning_rate": 0.00011185997107673881,
      "loss": 2.8949,
      "step": 164916
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.0289113521575928,
      "learning_rate": 0.00011185678490690257,
      "loss": 2.9858,
      "step": 164917
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.738905429840088,
      "learning_rate": 0.00011185359877204563,
      "loss": 3.1636,
      "step": 164918
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.686634063720703,
      "learning_rate": 0.00011185041267216884,
      "loss": 3.1167,
      "step": 164919
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6403048038482666,
      "learning_rate": 0.00011184722660727239,
      "loss": 3.1496,
      "step": 164920
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.105678081512451,
      "learning_rate": 0.0001118440405773572,
      "loss": 2.829,
      "step": 164921
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4742817878723145,
      "learning_rate": 0.00011184085458242365,
      "loss": 2.8532,
      "step": 164922
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.566291332244873,
      "learning_rate": 0.00011183766862247253,
      "loss": 2.88,
      "step": 164923
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.704479694366455,
      "learning_rate": 0.00011183448269750425,
      "loss": 2.8635,
      "step": 164924
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6130616664886475,
      "learning_rate": 0.0001118312968075196,
      "loss": 2.893,
      "step": 164925
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.4895403385162354,
      "learning_rate": 0.00011182811095251905,
      "loss": 2.9626,
      "step": 164926
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.369851589202881,
      "learning_rate": 0.00011182492513250323,
      "loss": 2.82,
      "step": 164927
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.186455726623535,
      "learning_rate": 0.00011182173934747258,
      "loss": 3.0317,
      "step": 164928
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.82230806350708,
      "learning_rate": 0.00011181855359742796,
      "loss": 3.2145,
      "step": 164929
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4335556030273438,
      "learning_rate": 0.0001118153678823697,
      "loss": 2.9248,
      "step": 164930
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.551220417022705,
      "learning_rate": 0.0001118121822022986,
      "loss": 2.8244,
      "step": 164931
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2849457263946533,
      "learning_rate": 0.0001118089965572152,
      "loss": 3.1334,
      "step": 164932
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3387627601623535,
      "learning_rate": 0.00011180581094712002,
      "loss": 2.8773,
      "step": 164933
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.19530987739563,
      "learning_rate": 0.00011180262537201363,
      "loss": 2.8077,
      "step": 164934
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.1082098484039307,
      "learning_rate": 0.00011179943983189677,
      "loss": 2.8321,
      "step": 164935
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.869015693664551,
      "learning_rate": 0.00011179625432676982,
      "loss": 2.7961,
      "step": 164936
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.318243980407715,
      "learning_rate": 0.00011179306885663364,
      "loss": 3.0421,
      "step": 164937
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3933966159820557,
      "learning_rate": 0.00011178988342148866,
      "loss": 2.8901,
      "step": 164938
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5436840057373047,
      "learning_rate": 0.00011178669802133541,
      "loss": 2.9615,
      "step": 164939
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.3381168842315674,
      "learning_rate": 0.00011178351265617463,
      "loss": 2.925,
      "step": 164940
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3222577571868896,
      "learning_rate": 0.00011178032732600687,
      "loss": 2.9276,
      "step": 164941
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.172121524810791,
      "learning_rate": 0.00011177714203083261,
      "loss": 2.9006,
      "step": 164942
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7119109630584717,
      "learning_rate": 0.00011177395677065263,
      "loss": 3.1261,
      "step": 164943
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4340555667877197,
      "learning_rate": 0.00011177077154546742,
      "loss": 2.8881,
      "step": 164944
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4366400241851807,
      "learning_rate": 0.00011176758635527744,
      "loss": 2.9679,
      "step": 164945
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.0022096633911133,
      "learning_rate": 0.00011176440120008358,
      "loss": 2.8453,
      "step": 164946
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.9692604541778564,
      "learning_rate": 0.00011176121607988612,
      "loss": 3.0668,
      "step": 164947
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3021204471588135,
      "learning_rate": 0.00011175803099468594,
      "loss": 2.7705,
      "step": 164948
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.975029706954956,
      "learning_rate": 0.00011175484594448349,
      "loss": 2.8579,
      "step": 164949
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.72112512588501,
      "learning_rate": 0.00011175166092927936,
      "loss": 3.0085,
      "step": 164950
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.398080825805664,
      "learning_rate": 0.00011174847594907404,
      "loss": 3.0156,
      "step": 164951
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.1174943447113037,
      "learning_rate": 0.00011174529100386834,
      "loss": 3.1736,
      "step": 164952
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8751349449157715,
      "learning_rate": 0.00011174210609366268,
      "loss": 2.8438,
      "step": 164953
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.013978004455566,
      "learning_rate": 0.00011173892121845778,
      "loss": 2.9058,
      "step": 164954
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3301761150360107,
      "learning_rate": 0.00011173573637825415,
      "loss": 3.003,
      "step": 164955
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4561424255371094,
      "learning_rate": 0.00011173255157305242,
      "loss": 3.0196,
      "step": 164956
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7451212406158447,
      "learning_rate": 0.00011172936680285307,
      "loss": 2.944,
      "step": 164957
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.191705703735352,
      "learning_rate": 0.00011172618206765687,
      "loss": 3.0221,
      "step": 164958
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8383734226226807,
      "learning_rate": 0.00011172299736746424,
      "loss": 2.8699,
      "step": 164959
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7066290378570557,
      "learning_rate": 0.00011171981270227595,
      "loss": 2.9345,
      "step": 164960
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.5171866416931152,
      "learning_rate": 0.00011171662807209242,
      "loss": 2.6979,
      "step": 164961
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6932547092437744,
      "learning_rate": 0.00011171344347691449,
      "loss": 2.9921,
      "step": 164962
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.0492074489593506,
      "learning_rate": 0.00011171025891674243,
      "loss": 3.2529,
      "step": 164963
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4426732063293457,
      "learning_rate": 0.00011170707439157705,
      "loss": 2.9562,
      "step": 164964
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.082777261734009,
      "learning_rate": 0.00011170388990141879,
      "loss": 2.9708,
      "step": 164965
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.394078254699707,
      "learning_rate": 0.00011170070544626841,
      "loss": 2.8744,
      "step": 164966
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.1817216873168945,
      "learning_rate": 0.00011169752102612633,
      "loss": 2.9634,
      "step": 164967
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.9146921634674072,
      "learning_rate": 0.00011169433664099349,
      "loss": 2.9483,
      "step": 164968
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.9302608966827393,
      "learning_rate": 0.00011169115229086996,
      "loss": 3.0717,
      "step": 164969
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.697437286376953,
      "learning_rate": 0.00011168796797575675,
      "loss": 2.961,
      "step": 164970
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.9403975009918213,
      "learning_rate": 0.00011168478369565414,
      "loss": 2.7529,
      "step": 164971
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.6692605018615723,
      "learning_rate": 0.00011168159945056304,
      "loss": 2.9793,
      "step": 164972
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.383565664291382,
      "learning_rate": 0.00011167841524048376,
      "loss": 3.112,
      "step": 164973
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.841212511062622,
      "learning_rate": 0.00011167523106541721,
      "loss": 3.4179,
      "step": 164974
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.804856538772583,
      "learning_rate": 0.00011167204692536362,
      "loss": 2.8089,
      "step": 164975
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.61181378364563,
      "learning_rate": 0.00011166886282032382,
      "loss": 3.0246,
      "step": 164976
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.462712287902832,
      "learning_rate": 0.00011166567875029824,
      "loss": 2.9639,
      "step": 164977
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.333441972732544,
      "learning_rate": 0.0001116624947152877,
      "loss": 2.8496,
      "step": 164978
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.0485801696777344,
      "learning_rate": 0.00011165931071529251,
      "loss": 3.0392,
      "step": 164979
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4192047119140625,
      "learning_rate": 0.00011165612675031363,
      "loss": 2.8234,
      "step": 164980
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5947093963623047,
      "learning_rate": 0.00011165294282035126,
      "loss": 3.0395,
      "step": 164981
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2077014446258545,
      "learning_rate": 0.00011164975892540623,
      "loss": 2.8416,
      "step": 164982
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.544018507003784,
      "learning_rate": 0.00011164657506547895,
      "loss": 3.0754,
      "step": 164983
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.046067953109741,
      "learning_rate": 0.00011164339124057024,
      "loss": 2.7388,
      "step": 164984
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6479408740997314,
      "learning_rate": 0.00011164020745068049,
      "loss": 2.9474,
      "step": 164985
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.632502555847168,
      "learning_rate": 0.00011163702369581057,
      "loss": 3.0709,
      "step": 164986
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.243826389312744,
      "learning_rate": 0.00011163383997596069,
      "loss": 3.0046,
      "step": 164987
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.567216396331787,
      "learning_rate": 0.00011163065629113176,
      "loss": 2.9739,
      "step": 164988
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6958649158477783,
      "learning_rate": 0.00011162747264132412,
      "loss": 3.0403,
      "step": 164989
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.81992506980896,
      "learning_rate": 0.00011162428902653858,
      "loss": 2.8026,
      "step": 164990
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2573893070220947,
      "learning_rate": 0.00011162110544677553,
      "loss": 2.9875,
      "step": 164991
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.088773488998413,
      "learning_rate": 0.00011161792190203591,
      "loss": 2.831,
      "step": 164992
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.486830949783325,
      "learning_rate": 0.00011161473839231987,
      "loss": 2.833,
      "step": 164993
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.020090341567993,
      "learning_rate": 0.00011161155491762827,
      "loss": 2.9299,
      "step": 164994
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.578066349029541,
      "learning_rate": 0.00011160837147796157,
      "loss": 3.0688,
      "step": 164995
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.4657206535339355,
      "learning_rate": 0.00011160518807332051,
      "loss": 3.0761,
      "step": 164996
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.863647222518921,
      "learning_rate": 0.00011160200470370553,
      "loss": 2.8886,
      "step": 164997
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.574221611022949,
      "learning_rate": 0.00011159882136911747,
      "loss": 2.8785,
      "step": 164998
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6774649620056152,
      "learning_rate": 0.00011159563806955655,
      "loss": 3.0412,
      "step": 164999
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8421590328216553,
      "learning_rate": 0.00011159245480502365,
      "loss": 3.0022,
      "step": 165000
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.821338415145874,
      "learning_rate": 0.00011158927157551919,
      "loss": 2.8644,
      "step": 165001
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3659889698028564,
      "learning_rate": 0.00011158608838104391,
      "loss": 2.8563,
      "step": 165002
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.210205316543579,
      "learning_rate": 0.00011158290522159824,
      "loss": 2.6525,
      "step": 165003
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5340631008148193,
      "learning_rate": 0.0001115797220971831,
      "loss": 2.8392,
      "step": 165004
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.354161500930786,
      "learning_rate": 0.00011157653900779863,
      "loss": 3.1252,
      "step": 165005
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.476804494857788,
      "learning_rate": 0.00011157335595344573,
      "loss": 2.7036,
      "step": 165006
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.9421474933624268,
      "learning_rate": 0.00011157017293412482,
      "loss": 3.1253,
      "step": 165007
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5956857204437256,
      "learning_rate": 0.00011156698994983663,
      "loss": 2.7266,
      "step": 165008
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.930699110031128,
      "learning_rate": 0.00011156380700058161,
      "loss": 2.9825,
      "step": 165009
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.208831548690796,
      "learning_rate": 0.00011156062408636066,
      "loss": 2.8989,
      "step": 165010
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8170971870422363,
      "learning_rate": 0.00011155744120717391,
      "loss": 2.8298,
      "step": 165011
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.202817440032959,
      "learning_rate": 0.00011155425836302232,
      "loss": 2.95,
      "step": 165012
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.03208065032959,
      "learning_rate": 0.00011155107555390623,
      "loss": 2.7756,
      "step": 165013
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3835105895996094,
      "learning_rate": 0.00011154789277982647,
      "loss": 2.8486,
      "step": 165014
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3021914958953857,
      "learning_rate": 0.00011154471004078339,
      "loss": 2.6306,
      "step": 165015
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.677213191986084,
      "learning_rate": 0.00011154152733677781,
      "loss": 3.0559,
      "step": 165016
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.669614553451538,
      "learning_rate": 0.00011153834466781024,
      "loss": 2.9937,
      "step": 165017
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.785855531692505,
      "learning_rate": 0.00011153516203388125,
      "loss": 2.9787,
      "step": 165018
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4452946186065674,
      "learning_rate": 0.00011153197943499131,
      "loss": 3.1188,
      "step": 165019
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4145257472991943,
      "learning_rate": 0.00011152879687114122,
      "loss": 3.0158,
      "step": 165020
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.463122606277466,
      "learning_rate": 0.00011152561434233144,
      "loss": 2.8308,
      "step": 165021
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.332494020462036,
      "learning_rate": 0.00011152243184856265,
      "loss": 3.0616,
      "step": 165022
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.1621387004852295,
      "learning_rate": 0.0001115192493898354,
      "loss": 2.7521,
      "step": 165023
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.127558708190918,
      "learning_rate": 0.00011151606696615023,
      "loss": 2.6395,
      "step": 165024
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.6781067848205566,
      "learning_rate": 0.00011151288457750785,
      "loss": 3.1376,
      "step": 165025
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2451579570770264,
      "learning_rate": 0.00011150970222390877,
      "loss": 2.8836,
      "step": 165026
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.76237416267395,
      "learning_rate": 0.00011150651990535351,
      "loss": 2.5533,
      "step": 165027
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2617838382720947,
      "learning_rate": 0.00011150333762184284,
      "loss": 2.9887,
      "step": 165028
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.169059991836548,
      "learning_rate": 0.00011150015537337726,
      "loss": 2.8859,
      "step": 165029
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.601659059524536,
      "learning_rate": 0.00011149697315995726,
      "loss": 2.8152,
      "step": 165030
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3845605850219727,
      "learning_rate": 0.00011149379098158361,
      "loss": 3.0642,
      "step": 165031
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5437793731689453,
      "learning_rate": 0.00011149060883825685,
      "loss": 3.293,
      "step": 165032
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.558933973312378,
      "learning_rate": 0.00011148742672997743,
      "loss": 3.0197,
      "step": 165033
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.1971957683563232,
      "learning_rate": 0.00011148424465674619,
      "loss": 2.9833,
      "step": 165034
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.3687500953674316,
      "learning_rate": 0.00011148106261856354,
      "loss": 3.1285,
      "step": 165035
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.411484956741333,
      "learning_rate": 0.00011147788061543005,
      "loss": 3.0123,
      "step": 165036
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.0693531036376953,
      "learning_rate": 0.00011147469864734647,
      "loss": 2.8614,
      "step": 165037
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7815229892730713,
      "learning_rate": 0.00011147151671431319,
      "loss": 2.8569,
      "step": 165038
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.190265417098999,
      "learning_rate": 0.00011146833481633102,
      "loss": 2.8327,
      "step": 165039
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.848721981048584,
      "learning_rate": 0.00011146515295340045,
      "loss": 3.0403,
      "step": 165040
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3718459606170654,
      "learning_rate": 0.00011146197112552208,
      "loss": 3.2404,
      "step": 165041
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.2188217639923096,
      "learning_rate": 0.00011145878933269636,
      "loss": 3.0043,
      "step": 165042
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.503101348876953,
      "learning_rate": 0.00011145560757492414,
      "loss": 2.6191,
      "step": 165043
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5733370780944824,
      "learning_rate": 0.00011145242585220578,
      "loss": 2.9376,
      "step": 165044
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.3355050086975098,
      "learning_rate": 0.00011144924416454206,
      "loss": 2.7649,
      "step": 165045
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.214115858078003,
      "learning_rate": 0.00011144606251193338,
      "loss": 2.9447,
      "step": 165046
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2432100772857666,
      "learning_rate": 0.00011144288089438065,
      "loss": 3.0032,
      "step": 165047
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.380059242248535,
      "learning_rate": 0.00011143969931188404,
      "loss": 2.8151,
      "step": 165048
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.118159532546997,
      "learning_rate": 0.00011143651776444443,
      "loss": 3.1662,
      "step": 165049
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.4866995811462402,
      "learning_rate": 0.00011143333625206226,
      "loss": 2.9931,
      "step": 165050
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6632211208343506,
      "learning_rate": 0.00011143015477473827,
      "loss": 2.8151,
      "step": 165051
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.419860363006592,
      "learning_rate": 0.0001114269733324729,
      "loss": 3.0199,
      "step": 165052
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.486841917037964,
      "learning_rate": 0.00011142379192526699,
      "loss": 2.9859,
      "step": 165053
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.1912384033203125,
      "learning_rate": 0.00011142061055312077,
      "loss": 2.9691,
      "step": 165054
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.6055750846862793,
      "learning_rate": 0.00011141742921603513,
      "loss": 3.1197,
      "step": 165055
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5897624492645264,
      "learning_rate": 0.00011141424791401042,
      "loss": 2.9335,
      "step": 165056
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3505992889404297,
      "learning_rate": 0.0001114110666470475,
      "loss": 2.7786,
      "step": 165057
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6823537349700928,
      "learning_rate": 0.00011140788541514669,
      "loss": 2.7792,
      "step": 165058
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.344407558441162,
      "learning_rate": 0.0001114047042183089,
      "loss": 2.8186,
      "step": 165059
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.9003946781158447,
      "learning_rate": 0.00011140152305653436,
      "loss": 3.157,
      "step": 165060
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.5567336082458496,
      "learning_rate": 0.00011139834192982395,
      "loss": 3.088,
      "step": 165061
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.039250373840332,
      "learning_rate": 0.00011139516083817802,
      "loss": 2.8845,
      "step": 165062
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.325242042541504,
      "learning_rate": 0.00011139197978159741,
      "loss": 3.1893,
      "step": 165063
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.176622152328491,
      "learning_rate": 0.00011138879876008252,
      "loss": 2.9143,
      "step": 165064
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.667361259460449,
      "learning_rate": 0.00011138561777363416,
      "loss": 2.9534,
      "step": 165065
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.5203092098236084,
      "learning_rate": 0.0001113824368222526,
      "loss": 2.9702,
      "step": 165066
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8030619621276855,
      "learning_rate": 0.00011137925590593868,
      "loss": 2.9281,
      "step": 165067
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.140669584274292,
      "learning_rate": 0.00011137607502469285,
      "loss": 2.9638,
      "step": 165068
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.012200355529785,
      "learning_rate": 0.00011137289417851582,
      "loss": 2.8472,
      "step": 165069
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.114403486251831,
      "learning_rate": 0.00011136971336740808,
      "loss": 2.8535,
      "step": 165070
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8448848724365234,
      "learning_rate": 0.00011136653259137046,
      "loss": 2.9128,
      "step": 165071
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.351778507232666,
      "learning_rate": 0.00011136335185040315,
      "loss": 2.7566,
      "step": 165072
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.306518793106079,
      "learning_rate": 0.00011136017114450705,
      "loss": 2.588,
      "step": 165073
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.238774538040161,
      "learning_rate": 0.00011135699047368256,
      "loss": 2.8371,
      "step": 165074
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4077346324920654,
      "learning_rate": 0.00011135380983793048,
      "loss": 2.9968,
      "step": 165075
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7710111141204834,
      "learning_rate": 0.0001113506292372512,
      "loss": 3.0384,
      "step": 165076
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.0116374492645264,
      "learning_rate": 0.00011134744867164556,
      "loss": 2.9356,
      "step": 165077
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8469181060791016,
      "learning_rate": 0.00011134426814111383,
      "loss": 3.1121,
      "step": 165078
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.667086124420166,
      "learning_rate": 0.00011134108764565682,
      "loss": 3.1981,
      "step": 165079
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.7033796310424805,
      "learning_rate": 0.00011133790718527496,
      "loss": 3.035,
      "step": 165080
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.337216377258301,
      "learning_rate": 0.00011133472675996909,
      "loss": 3.2489,
      "step": 165081
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.42138671875,
      "learning_rate": 0.00011133154636973951,
      "loss": 2.8375,
      "step": 165082
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.584172248840332,
      "learning_rate": 0.00011132836601458712,
      "loss": 2.898,
      "step": 165083
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.5777580738067627,
      "learning_rate": 0.0001113251856945123,
      "loss": 2.9748,
      "step": 165084
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.33154034614563,
      "learning_rate": 0.00011132200540951567,
      "loss": 2.856,
      "step": 165085
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5585391521453857,
      "learning_rate": 0.00011131882515959779,
      "loss": 2.8803,
      "step": 165086
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.0267810821533203,
      "learning_rate": 0.00011131564494475936,
      "loss": 2.8606,
      "step": 165087
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.1490590572357178,
      "learning_rate": 0.00011131246476500085,
      "loss": 3.3065,
      "step": 165088
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.532390594482422,
      "learning_rate": 0.000111309284620323,
      "loss": 2.9885,
      "step": 165089
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.1423261165618896,
      "learning_rate": 0.00011130610451072631,
      "loss": 2.8335,
      "step": 165090
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.543428421020508,
      "learning_rate": 0.00011130292443621138,
      "loss": 2.8841,
      "step": 165091
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.55709171295166,
      "learning_rate": 0.0001112997443967787,
      "loss": 2.8779,
      "step": 165092
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.615028142929077,
      "learning_rate": 0.00011129656439242906,
      "loss": 2.8097,
      "step": 165093
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6488161087036133,
      "learning_rate": 0.00011129338442316285,
      "loss": 2.8836,
      "step": 165094
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.060865879058838,
      "learning_rate": 0.00011129020448898084,
      "loss": 3.0763,
      "step": 165095
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.9652018547058105,
      "learning_rate": 0.00011128702458988356,
      "loss": 2.8827,
      "step": 165096
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.189659595489502,
      "learning_rate": 0.00011128384472587159,
      "loss": 2.8423,
      "step": 165097
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.570039749145508,
      "learning_rate": 0.00011128066489694542,
      "loss": 2.7264,
      "step": 165098
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4068453311920166,
      "learning_rate": 0.00011127748510310582,
      "loss": 3.0194,
      "step": 165099
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.330878496170044,
      "learning_rate": 0.00011127430534435321,
      "loss": 2.9545,
      "step": 165100
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7937021255493164,
      "learning_rate": 0.00011127112562068838,
      "loss": 2.8176,
      "step": 165101
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.1523876190185547,
      "learning_rate": 0.00011126794593211182,
      "loss": 3.119,
      "step": 165102
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.794254779815674,
      "learning_rate": 0.00011126476627862407,
      "loss": 2.9291,
      "step": 165103
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3173162937164307,
      "learning_rate": 0.00011126158666022567,
      "loss": 2.9505,
      "step": 165104
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.056960344314575,
      "learning_rate": 0.00011125840707691742,
      "loss": 2.879,
      "step": 165105
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.338469982147217,
      "learning_rate": 0.0001112552275286997,
      "loss": 2.9772,
      "step": 165106
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.992489814758301,
      "learning_rate": 0.00011125204801557329,
      "loss": 3.1666,
      "step": 165107
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.570768356323242,
      "learning_rate": 0.00011124886853753867,
      "loss": 2.7358,
      "step": 165108
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7530829906463623,
      "learning_rate": 0.00011124568909459638,
      "loss": 2.9137,
      "step": 165109
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.304969549179077,
      "learning_rate": 0.00011124250968674716,
      "loss": 2.8587,
      "step": 165110
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.814746856689453,
      "learning_rate": 0.00011123933031399151,
      "loss": 3.0491,
      "step": 165111
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4387693405151367,
      "learning_rate": 0.00011123615097632994,
      "loss": 3.0122,
      "step": 165112
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.134462833404541,
      "learning_rate": 0.00011123297167376323,
      "loss": 3.1783,
      "step": 165113
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.862208127975464,
      "learning_rate": 0.00011122979240629191,
      "loss": 2.714,
      "step": 165114
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4217240810394287,
      "learning_rate": 0.00011122661317391638,
      "loss": 2.7767,
      "step": 165115
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6670706272125244,
      "learning_rate": 0.0001112234339766375,
      "loss": 2.9675,
      "step": 165116
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8342349529266357,
      "learning_rate": 0.00011122025481445577,
      "loss": 3.1444,
      "step": 165117
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.299941062927246,
      "learning_rate": 0.00011121707568737164,
      "loss": 2.8753,
      "step": 165118
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.522406816482544,
      "learning_rate": 0.00011121389659538596,
      "loss": 2.7655,
      "step": 165119
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.565476894378662,
      "learning_rate": 0.00011121071753849912,
      "loss": 2.88,
      "step": 165120
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.012733459472656,
      "learning_rate": 0.0001112075385167117,
      "loss": 3.0005,
      "step": 165121
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.645585775375366,
      "learning_rate": 0.00011120435953002445,
      "loss": 3.1816,
      "step": 165122
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2363855838775635,
      "learning_rate": 0.00011120118057843778,
      "loss": 2.5837,
      "step": 165123
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5423269271850586,
      "learning_rate": 0.00011119800166195249,
      "loss": 3.0809,
      "step": 165124
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3157694339752197,
      "learning_rate": 0.00011119482278056904,
      "loss": 2.6746,
      "step": 165125
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4826250076293945,
      "learning_rate": 0.00011119164393428804,
      "loss": 3.1394,
      "step": 165126
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5186100006103516,
      "learning_rate": 0.00011118846512310998,
      "loss": 3.0869,
      "step": 165127
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.472092390060425,
      "learning_rate": 0.00011118528634703566,
      "loss": 2.9782,
      "step": 165128
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5937716960906982,
      "learning_rate": 0.00011118210760606544,
      "loss": 2.7893,
      "step": 165129
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.355347156524658,
      "learning_rate": 0.00011117892890020011,
      "loss": 2.7441,
      "step": 165130
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.1651761531829834,
      "learning_rate": 0.00011117575022944021,
      "loss": 3.018,
      "step": 165131
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.702130079269409,
      "learning_rate": 0.00011117257159378633,
      "loss": 3.0685,
      "step": 165132
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5575780868530273,
      "learning_rate": 0.00011116939299323888,
      "loss": 3.1359,
      "step": 165133
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.0203757286071777,
      "learning_rate": 0.00011116621442779873,
      "loss": 2.8204,
      "step": 165134
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7030844688415527,
      "learning_rate": 0.00011116303589746623,
      "loss": 2.8721,
      "step": 165135
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.8999228477478027,
      "learning_rate": 0.0001111598574022422,
      "loss": 2.8485,
      "step": 165136
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.690490961074829,
      "learning_rate": 0.000111156678942127,
      "loss": 2.9079,
      "step": 165137
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8119029998779297,
      "learning_rate": 0.00011115350051712158,
      "loss": 2.9327,
      "step": 165138
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.784576416015625,
      "learning_rate": 0.00011115032212722606,
      "loss": 2.8598,
      "step": 165139
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.9265265464782715,
      "learning_rate": 0.00011114714377244134,
      "loss": 2.941,
      "step": 165140
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.9852113723754883,
      "learning_rate": 0.00011114396545276786,
      "loss": 2.8649,
      "step": 165141
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.701814651489258,
      "learning_rate": 0.00011114078716820639,
      "loss": 2.9627,
      "step": 165142
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.9939017295837402,
      "learning_rate": 0.00011113760891875727,
      "loss": 2.6176,
      "step": 165143
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5458152294158936,
      "learning_rate": 0.00011113443070442147,
      "loss": 3.1932,
      "step": 165144
    },
    {
      "epoch": 2.15,
      "grad_norm": 5.656966686248779,
      "learning_rate": 0.00011113125252519913,
      "loss": 2.9918,
      "step": 165145
    },
    {
      "epoch": 2.15,
      "grad_norm": 6.475121974945068,
      "learning_rate": 0.00011112807438109118,
      "loss": 3.0917,
      "step": 165146
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.647794723510742,
      "learning_rate": 0.00011112489627209794,
      "loss": 2.7468,
      "step": 165147
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.26509690284729,
      "learning_rate": 0.00011112171819822028,
      "loss": 2.9861,
      "step": 165148
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.426500082015991,
      "learning_rate": 0.00011111854015945856,
      "loss": 3.0979,
      "step": 165149
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.498230218887329,
      "learning_rate": 0.00011111536215581355,
      "loss": 3.2491,
      "step": 165150
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.9721803665161133,
      "learning_rate": 0.00011111218418728577,
      "loss": 2.9081,
      "step": 165151
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.7492756843566895,
      "learning_rate": 0.00011110900625387582,
      "loss": 2.7035,
      "step": 165152
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.416311740875244,
      "learning_rate": 0.00011110582835558413,
      "loss": 3.0181,
      "step": 165153
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8075647354125977,
      "learning_rate": 0.00011110265049241156,
      "loss": 3.0875,
      "step": 165154
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5926194190979004,
      "learning_rate": 0.00011109947266435846,
      "loss": 3.0696,
      "step": 165155
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.060748100280762,
      "learning_rate": 0.00011109629487142567,
      "loss": 2.9057,
      "step": 165156
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.763912677764893,
      "learning_rate": 0.00011109311711361361,
      "loss": 2.6791,
      "step": 165157
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6261110305786133,
      "learning_rate": 0.00011108993939092288,
      "loss": 3.0124,
      "step": 165158
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.205251693725586,
      "learning_rate": 0.00011108676170335405,
      "loss": 2.9426,
      "step": 165159
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.0623180866241455,
      "learning_rate": 0.00011108358405090784,
      "loss": 2.8679,
      "step": 165160
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.3225696086883545,
      "learning_rate": 0.00011108040643358464,
      "loss": 2.9023,
      "step": 165161
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.074004888534546,
      "learning_rate": 0.00011107722885138526,
      "loss": 3.1112,
      "step": 165162
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2893316745758057,
      "learning_rate": 0.00011107405130431021,
      "loss": 2.9919,
      "step": 165163
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.353667736053467,
      "learning_rate": 0.00011107087379236005,
      "loss": 2.9435,
      "step": 165164
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.495067596435547,
      "learning_rate": 0.00011106769631553529,
      "loss": 3.2512,
      "step": 165165
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.735344171524048,
      "learning_rate": 0.00011106451887383671,
      "loss": 3.0114,
      "step": 165166
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8538448810577393,
      "learning_rate": 0.00011106134146726469,
      "loss": 2.7878,
      "step": 165167
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6900229454040527,
      "learning_rate": 0.00011105816409582004,
      "loss": 3.0758,
      "step": 165168
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.429828405380249,
      "learning_rate": 0.00011105498675950322,
      "loss": 3.0384,
      "step": 165169
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.476970672607422,
      "learning_rate": 0.00011105180945831486,
      "loss": 2.9912,
      "step": 165170
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.2935574054718018,
      "learning_rate": 0.00011104863219225545,
      "loss": 3.0707,
      "step": 165171
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.617806911468506,
      "learning_rate": 0.00011104545496132577,
      "loss": 2.9076,
      "step": 165172
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3864028453826904,
      "learning_rate": 0.00011104227776552618,
      "loss": 2.737,
      "step": 165173
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5215277671813965,
      "learning_rate": 0.00011103910060485753,
      "loss": 2.8692,
      "step": 165174
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.496408462524414,
      "learning_rate": 0.00011103592347932026,
      "loss": 3.0488,
      "step": 165175
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.9915261268615723,
      "learning_rate": 0.00011103274638891498,
      "loss": 2.616,
      "step": 165176
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.700026273727417,
      "learning_rate": 0.00011102956933364217,
      "loss": 3.1092,
      "step": 165177
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.403334379196167,
      "learning_rate": 0.00011102639231350264,
      "loss": 3.1576,
      "step": 165178
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5401298999786377,
      "learning_rate": 0.00011102321532849677,
      "loss": 2.8566,
      "step": 165179
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.232719898223877,
      "learning_rate": 0.00011102003837862533,
      "loss": 2.9206,
      "step": 165180
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.281418800354004,
      "learning_rate": 0.00011101686146388885,
      "loss": 2.7653,
      "step": 165181
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3927619457244873,
      "learning_rate": 0.00011101368458428793,
      "loss": 2.9728,
      "step": 165182
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.395246982574463,
      "learning_rate": 0.000111010507739823,
      "loss": 2.8661,
      "step": 165183
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.381084680557251,
      "learning_rate": 0.00011100733093049491,
      "loss": 3.0464,
      "step": 165184
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3019676208496094,
      "learning_rate": 0.000111004154156304,
      "loss": 2.7705,
      "step": 165185
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.654832601547241,
      "learning_rate": 0.00011100097741725109,
      "loss": 2.755,
      "step": 165186
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4329960346221924,
      "learning_rate": 0.00011099780071333667,
      "loss": 3.0484,
      "step": 165187
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.622497320175171,
      "learning_rate": 0.00011099462404456131,
      "loss": 3.0087,
      "step": 165188
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.3578836917877197,
      "learning_rate": 0.00011099144741092554,
      "loss": 2.8612,
      "step": 165189
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6548421382904053,
      "learning_rate": 0.00011098827081243013,
      "loss": 3.002,
      "step": 165190
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.166423797607422,
      "learning_rate": 0.00011098509424907543,
      "loss": 2.7331,
      "step": 165191
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.902606964111328,
      "learning_rate": 0.0001109819177208623,
      "loss": 2.7606,
      "step": 165192
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.765331745147705,
      "learning_rate": 0.00011097874122779123,
      "loss": 2.8517,
      "step": 165193
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.521411657333374,
      "learning_rate": 0.00011097556476986274,
      "loss": 3.0218,
      "step": 165194
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.5829992294311523,
      "learning_rate": 0.00011097238834707736,
      "loss": 2.9408,
      "step": 165195
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6685256958007812,
      "learning_rate": 0.00011096921195943588,
      "loss": 3.2867,
      "step": 165196
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6628079414367676,
      "learning_rate": 0.0001109660356069387,
      "loss": 2.7276,
      "step": 165197
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.1697847843170166,
      "learning_rate": 0.00011096285928958663,
      "loss": 2.788,
      "step": 165198
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3845784664154053,
      "learning_rate": 0.00011095968300738007,
      "loss": 3.043,
      "step": 165199
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.333176851272583,
      "learning_rate": 0.0001109565067603196,
      "loss": 2.9085,
      "step": 165200
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.5108413696289062,
      "learning_rate": 0.00011095333054840602,
      "loss": 3.0464,
      "step": 165201
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3975541591644287,
      "learning_rate": 0.00011095015437163977,
      "loss": 2.8813,
      "step": 165202
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3183202743530273,
      "learning_rate": 0.00011094697823002132,
      "loss": 2.8297,
      "step": 165203
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.564085006713867,
      "learning_rate": 0.00011094380212355152,
      "loss": 2.7113,
      "step": 165204
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.537827968597412,
      "learning_rate": 0.00011094062605223085,
      "loss": 3.2058,
      "step": 165205
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3564586639404297,
      "learning_rate": 0.00011093745001605976,
      "loss": 2.9505,
      "step": 165206
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.060581922531128,
      "learning_rate": 0.00011093427401503909,
      "loss": 3.1501,
      "step": 165207
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.0473098754882812,
      "learning_rate": 0.00011093109804916919,
      "loss": 2.886,
      "step": 165208
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6730077266693115,
      "learning_rate": 0.00011092792211845087,
      "loss": 3.1004,
      "step": 165209
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.380249977111816,
      "learning_rate": 0.0001109247462228846,
      "loss": 2.8449,
      "step": 165210
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.975656509399414,
      "learning_rate": 0.00011092157036247103,
      "loss": 2.8389,
      "step": 165211
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.063967227935791,
      "learning_rate": 0.00011091839453721058,
      "loss": 3.166,
      "step": 165212
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.339995861053467,
      "learning_rate": 0.00011091521874710407,
      "loss": 2.9086,
      "step": 165213
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.638415813446045,
      "learning_rate": 0.00011091204299215188,
      "loss": 2.9941,
      "step": 165214
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.4141275882720947,
      "learning_rate": 0.00011090886727235485,
      "loss": 3.0956,
      "step": 165215
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.2821505069732666,
      "learning_rate": 0.00011090569158771339,
      "loss": 2.992,
      "step": 165216
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.809619903564453,
      "learning_rate": 0.00011090251593822806,
      "loss": 2.7775,
      "step": 165217
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5629513263702393,
      "learning_rate": 0.00011089934032389962,
      "loss": 2.7579,
      "step": 165218
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.109109878540039,
      "learning_rate": 0.00011089616474472856,
      "loss": 2.9233,
      "step": 165219
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.0581984519958496,
      "learning_rate": 0.00011089298920071535,
      "loss": 2.8279,
      "step": 165220
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.967506170272827,
      "learning_rate": 0.00011088981369186081,
      "loss": 3.1029,
      "step": 165221
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.141544818878174,
      "learning_rate": 0.00011088663821816536,
      "loss": 3.044,
      "step": 165222
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8674228191375732,
      "learning_rate": 0.00011088346277962974,
      "loss": 2.9207,
      "step": 165223
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7736172676086426,
      "learning_rate": 0.00011088028737625443,
      "loss": 2.8584,
      "step": 165224
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.785098075866699,
      "learning_rate": 0.00011087711200804005,
      "loss": 3.0014,
      "step": 165225
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.683558940887451,
      "learning_rate": 0.00011087393667498709,
      "loss": 3.1159,
      "step": 165226
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.371981620788574,
      "learning_rate": 0.00011087076137709634,
      "loss": 3.1756,
      "step": 165227
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.4480645656585693,
      "learning_rate": 0.00011086758611436819,
      "loss": 2.8744,
      "step": 165228
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7528247833251953,
      "learning_rate": 0.00011086441088680341,
      "loss": 2.891,
      "step": 165229
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.656646728515625,
      "learning_rate": 0.0001108612356944025,
      "loss": 2.8482,
      "step": 165230
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.092691898345947,
      "learning_rate": 0.00011085806053716607,
      "loss": 2.9533,
      "step": 165231
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.4013328552246094,
      "learning_rate": 0.0001108548854150946,
      "loss": 3.0506,
      "step": 165232
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.150944709777832,
      "learning_rate": 0.00011085171032818889,
      "loss": 3.0001,
      "step": 165233
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4852230548858643,
      "learning_rate": 0.0001108485352764493,
      "loss": 3.1934,
      "step": 165234
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.945744037628174,
      "learning_rate": 0.00011084536025987663,
      "loss": 2.8998,
      "step": 165235
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.316105365753174,
      "learning_rate": 0.00011084218527847138,
      "loss": 2.9648,
      "step": 165236
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.6349050998687744,
      "learning_rate": 0.00011083901033223417,
      "loss": 2.8551,
      "step": 165237
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.36555814743042,
      "learning_rate": 0.00011083583542116542,
      "loss": 2.8403,
      "step": 165238
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.1842398643493652,
      "learning_rate": 0.00011083266054526597,
      "loss": 2.7985,
      "step": 165239
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.286268711090088,
      "learning_rate": 0.0001108294857045362,
      "loss": 3.1146,
      "step": 165240
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.074735164642334,
      "learning_rate": 0.0001108263108989769,
      "loss": 3.1161,
      "step": 165241
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.998857259750366,
      "learning_rate": 0.00011082313612858856,
      "loss": 2.9575,
      "step": 165242
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4691145420074463,
      "learning_rate": 0.00011081996139337178,
      "loss": 2.9775,
      "step": 165243
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.395209789276123,
      "learning_rate": 0.00011081678669332704,
      "loss": 3.1085,
      "step": 165244
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.422023057937622,
      "learning_rate": 0.00011081361202845511,
      "loss": 2.8197,
      "step": 165245
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8108866214752197,
      "learning_rate": 0.00011081043739875642,
      "loss": 3.0555,
      "step": 165246
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.064705848693848,
      "learning_rate": 0.00011080726280423173,
      "loss": 2.6902,
      "step": 165247
    },
    {
      "epoch": 2.15,
      "grad_norm": 5.863388538360596,
      "learning_rate": 0.00011080408824488154,
      "loss": 3.0324,
      "step": 165248
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7323155403137207,
      "learning_rate": 0.00011080091372070647,
      "loss": 3.0395,
      "step": 165249
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.364539623260498,
      "learning_rate": 0.00011079773923170694,
      "loss": 2.9838,
      "step": 165250
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4611918926239014,
      "learning_rate": 0.0001107945647778838,
      "loss": 2.8269,
      "step": 165251
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.453840494155884,
      "learning_rate": 0.00011079139035923742,
      "loss": 2.9711,
      "step": 165252
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8346292972564697,
      "learning_rate": 0.0001107882159757686,
      "loss": 2.9284,
      "step": 165253
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.227170467376709,
      "learning_rate": 0.00011078504162747781,
      "loss": 2.9362,
      "step": 165254
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8448781967163086,
      "learning_rate": 0.00011078186731436568,
      "loss": 2.8977,
      "step": 165255
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.451835870742798,
      "learning_rate": 0.00011077869303643265,
      "loss": 3.0587,
      "step": 165256
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.405400037765503,
      "learning_rate": 0.00011077551879367953,
      "loss": 3.2567,
      "step": 165257
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.339890241622925,
      "learning_rate": 0.00011077234458610671,
      "loss": 2.7907,
      "step": 165258
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3951833248138428,
      "learning_rate": 0.00011076917041371498,
      "loss": 2.9447,
      "step": 165259
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.388821601867676,
      "learning_rate": 0.00011076599627650484,
      "loss": 2.9888,
      "step": 165260
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.9463050365448,
      "learning_rate": 0.00011076282217447689,
      "loss": 3.0391,
      "step": 165261
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.9477741718292236,
      "learning_rate": 0.00011075964810763161,
      "loss": 3.1078,
      "step": 165262
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.0739569664001465,
      "learning_rate": 0.00011075647407596973,
      "loss": 2.8035,
      "step": 165263
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6292426586151123,
      "learning_rate": 0.00011075330007949176,
      "loss": 2.875,
      "step": 165264
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.070568323135376,
      "learning_rate": 0.00011075012611819837,
      "loss": 3.2013,
      "step": 165265
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.341583490371704,
      "learning_rate": 0.00011074695219209014,
      "loss": 2.7629,
      "step": 165266
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.051995277404785,
      "learning_rate": 0.00011074377830116762,
      "loss": 2.9362,
      "step": 165267
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4556386470794678,
      "learning_rate": 0.00011074060444543126,
      "loss": 3.009,
      "step": 165268
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6200907230377197,
      "learning_rate": 0.00011073743062488195,
      "loss": 2.8098,
      "step": 165269
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3194844722747803,
      "learning_rate": 0.00011073425683952,
      "loss": 2.9074,
      "step": 165270
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.597235679626465,
      "learning_rate": 0.00011073108308934623,
      "loss": 2.9355,
      "step": 165271
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.023085117340088,
      "learning_rate": 0.00011072790937436112,
      "loss": 3.1577,
      "step": 165272
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.8410251140594482,
      "learning_rate": 0.00011072473569456525,
      "loss": 2.8582,
      "step": 165273
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.1304547786712646,
      "learning_rate": 0.00011072156204995913,
      "loss": 2.9169,
      "step": 165274
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5427944660186768,
      "learning_rate": 0.00011071838844054357,
      "loss": 3.0799,
      "step": 165275
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.67281436920166,
      "learning_rate": 0.00011071521486631892,
      "loss": 2.9543,
      "step": 165276
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.1005475521087646,
      "learning_rate": 0.00011071204132728599,
      "loss": 2.9288,
      "step": 165277
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.553046226501465,
      "learning_rate": 0.00011070886782344525,
      "loss": 3.1969,
      "step": 165278
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.340583086013794,
      "learning_rate": 0.00011070569435479731,
      "loss": 3.005,
      "step": 165279
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.523883104324341,
      "learning_rate": 0.00011070252092134267,
      "loss": 3.1359,
      "step": 165280
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.4963765144348145,
      "learning_rate": 0.0001106993475230821,
      "loss": 3.02,
      "step": 165281
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2501659393310547,
      "learning_rate": 0.00011069617416001597,
      "loss": 2.9007,
      "step": 165282
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.1151905059814453,
      "learning_rate": 0.0001106930008321451,
      "loss": 2.8946,
      "step": 165283
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.9606261253356934,
      "learning_rate": 0.0001106898275394699,
      "loss": 3.0013,
      "step": 165284
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.2329931259155273,
      "learning_rate": 0.00011068665428199111,
      "loss": 3.0441,
      "step": 165285
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5549848079681396,
      "learning_rate": 0.00011068348105970925,
      "loss": 2.9106,
      "step": 165286
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7131075859069824,
      "learning_rate": 0.00011068030787262491,
      "loss": 3.0166,
      "step": 165287
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4153518676757812,
      "learning_rate": 0.00011067713472073856,
      "loss": 3.0972,
      "step": 165288
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6915714740753174,
      "learning_rate": 0.00011067396160405103,
      "loss": 2.9732,
      "step": 165289
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6938016414642334,
      "learning_rate": 0.00011067078852256267,
      "loss": 3.197,
      "step": 165290
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.339545488357544,
      "learning_rate": 0.00011066761547627429,
      "loss": 2.7749,
      "step": 165291
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8195183277130127,
      "learning_rate": 0.00011066444246518638,
      "loss": 2.919,
      "step": 165292
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.323230743408203,
      "learning_rate": 0.0001106612694892995,
      "loss": 3.0773,
      "step": 165293
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7035608291625977,
      "learning_rate": 0.00011065809654861418,
      "loss": 3.0522,
      "step": 165294
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.650571346282959,
      "learning_rate": 0.0001106549236431312,
      "loss": 2.842,
      "step": 165295
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8146212100982666,
      "learning_rate": 0.00011065175077285096,
      "loss": 3.1335,
      "step": 165296
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.515028715133667,
      "learning_rate": 0.00011064857793777419,
      "loss": 2.8224,
      "step": 165297
    },
    {
      "epoch": 2.15,
      "grad_norm": 1.954957127571106,
      "learning_rate": 0.00011064540513790145,
      "loss": 3.1592,
      "step": 165298
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8710668087005615,
      "learning_rate": 0.00011064223237323319,
      "loss": 3.3016,
      "step": 165299
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7219064235687256,
      "learning_rate": 0.00011063905964377025,
      "loss": 2.8963,
      "step": 165300
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.593087911605835,
      "learning_rate": 0.00011063588694951303,
      "loss": 2.8511,
      "step": 165301
    },
    {
      "epoch": 2.15,
      "grad_norm": 5.287248611450195,
      "learning_rate": 0.00011063271429046209,
      "loss": 2.9781,
      "step": 165302
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.975355386734009,
      "learning_rate": 0.00011062954166661822,
      "loss": 2.7758,
      "step": 165303
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.499333381652832,
      "learning_rate": 0.00011062636907798188,
      "loss": 3.0534,
      "step": 165304
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7317092418670654,
      "learning_rate": 0.00011062319652455358,
      "loss": 3.042,
      "step": 165305
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4217875003814697,
      "learning_rate": 0.00011062002400633408,
      "loss": 2.9325,
      "step": 165306
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.1125102043151855,
      "learning_rate": 0.00011061685152332383,
      "loss": 2.9726,
      "step": 165307
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.101616382598877,
      "learning_rate": 0.00011061367907552356,
      "loss": 2.9213,
      "step": 165308
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.341801643371582,
      "learning_rate": 0.00011061050666293378,
      "loss": 2.8177,
      "step": 165309
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.664816379547119,
      "learning_rate": 0.00011060733428555508,
      "loss": 2.9788,
      "step": 165310
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5659992694854736,
      "learning_rate": 0.00011060416194338795,
      "loss": 3.1566,
      "step": 165311
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7889792919158936,
      "learning_rate": 0.0001106009896364332,
      "loss": 2.9264,
      "step": 165312
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5126190185546875,
      "learning_rate": 0.00011059781736469121,
      "loss": 2.9869,
      "step": 165313
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.565223455429077,
      "learning_rate": 0.00011059464512816276,
      "loss": 2.9012,
      "step": 165314
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.1350908279418945,
      "learning_rate": 0.00011059147292684829,
      "loss": 2.7676,
      "step": 165315
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.279996633529663,
      "learning_rate": 0.00011058830076074849,
      "loss": 2.847,
      "step": 165316
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.63165020942688,
      "learning_rate": 0.0001105851286298638,
      "loss": 2.8746,
      "step": 165317
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.588975191116333,
      "learning_rate": 0.00011058195653419495,
      "loss": 2.7305,
      "step": 165318
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3099308013916016,
      "learning_rate": 0.00011057878447374246,
      "loss": 2.8956,
      "step": 165319
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.0793955326080322,
      "learning_rate": 0.00011057561244850701,
      "loss": 2.8239,
      "step": 165320
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.0220701694488525,
      "learning_rate": 0.00011057244045848916,
      "loss": 2.9703,
      "step": 165321
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.891680955886841,
      "learning_rate": 0.00011056926850368945,
      "loss": 2.6767,
      "step": 165322
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.069999933242798,
      "learning_rate": 0.00011056609658410839,
      "loss": 2.8455,
      "step": 165323
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.444655656814575,
      "learning_rate": 0.00011056292469974676,
      "loss": 2.7763,
      "step": 165324
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.3976986408233643,
      "learning_rate": 0.00011055975285060496,
      "loss": 2.9015,
      "step": 165325
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.261198043823242,
      "learning_rate": 0.00011055658103668382,
      "loss": 2.7496,
      "step": 165326
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6039774417877197,
      "learning_rate": 0.00011055340925798373,
      "loss": 2.9335,
      "step": 165327
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.170905590057373,
      "learning_rate": 0.00011055023751450537,
      "loss": 2.9639,
      "step": 165328
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.7046334743499756,
      "learning_rate": 0.00011054706580624917,
      "loss": 2.9362,
      "step": 165329
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.3438875675201416,
      "learning_rate": 0.00011054389413321596,
      "loss": 3.0906,
      "step": 165330
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7622783184051514,
      "learning_rate": 0.00011054072249540614,
      "loss": 3.0172,
      "step": 165331
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2517499923706055,
      "learning_rate": 0.00011053755089282045,
      "loss": 2.7472,
      "step": 165332
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4720566272735596,
      "learning_rate": 0.00011053437932545941,
      "loss": 3.0056,
      "step": 165333
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.3032078742980957,
      "learning_rate": 0.0001105312077933236,
      "loss": 3.0469,
      "step": 165334
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.9526402950286865,
      "learning_rate": 0.00011052803629641353,
      "loss": 2.8612,
      "step": 165335
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.432804584503174,
      "learning_rate": 0.00011052486483472997,
      "loss": 3.1847,
      "step": 165336
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.58976149559021,
      "learning_rate": 0.00011052169340827331,
      "loss": 2.8462,
      "step": 165337
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.477546453475952,
      "learning_rate": 0.00011051852201704435,
      "loss": 3.1572,
      "step": 165338
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.456981658935547,
      "learning_rate": 0.00011051535066104355,
      "loss": 3.2153,
      "step": 165339
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.447289228439331,
      "learning_rate": 0.00011051217934027156,
      "loss": 2.8303,
      "step": 165340
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.3206088542938232,
      "learning_rate": 0.00011050900805472878,
      "loss": 2.8688,
      "step": 165341
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5904736518859863,
      "learning_rate": 0.0001105058368044161,
      "loss": 2.755,
      "step": 165342
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.321486473083496,
      "learning_rate": 0.00011050266558933387,
      "loss": 2.9485,
      "step": 165343
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.734323024749756,
      "learning_rate": 0.00011049949440948282,
      "loss": 3.1541,
      "step": 165344
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.632459878921509,
      "learning_rate": 0.00011049632326486353,
      "loss": 2.956,
      "step": 165345
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6315419673919678,
      "learning_rate": 0.00011049315215547655,
      "loss": 3.1023,
      "step": 165346
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.765136957168579,
      "learning_rate": 0.00011048998108132235,
      "loss": 2.8254,
      "step": 165347
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3317224979400635,
      "learning_rate": 0.00011048681004240178,
      "loss": 2.7813,
      "step": 165348
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.607476234436035,
      "learning_rate": 0.00011048363903871514,
      "loss": 2.9998,
      "step": 165349
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8627936840057373,
      "learning_rate": 0.0001104804680702633,
      "loss": 2.8322,
      "step": 165350
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.10583233833313,
      "learning_rate": 0.0001104772971370466,
      "loss": 2.9958,
      "step": 165351
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.481140613555908,
      "learning_rate": 0.00011047412623906598,
      "loss": 2.995,
      "step": 165352
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.1231167316436768,
      "learning_rate": 0.00011047095537632157,
      "loss": 3.003,
      "step": 165353
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7620222568511963,
      "learning_rate": 0.00011046778454881427,
      "loss": 3.2157,
      "step": 165354
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.505038022994995,
      "learning_rate": 0.0001104646137565445,
      "loss": 3.0622,
      "step": 165355
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.5381150245666504,
      "learning_rate": 0.00011046144299951306,
      "loss": 3.1132,
      "step": 165356
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.509702444076538,
      "learning_rate": 0.0001104582722777203,
      "loss": 3.0912,
      "step": 165357
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.607560157775879,
      "learning_rate": 0.00011045510159116713,
      "loss": 2.9691,
      "step": 165358
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.876457691192627,
      "learning_rate": 0.00011045193093985373,
      "loss": 3.0209,
      "step": 165359
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6118462085723877,
      "learning_rate": 0.00011044876032378097,
      "loss": 2.9014,
      "step": 165360
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2048134803771973,
      "learning_rate": 0.00011044558974294929,
      "loss": 2.9286,
      "step": 165361
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.539926052093506,
      "learning_rate": 0.00011044241919735945,
      "loss": 2.9133,
      "step": 165362
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2791364192962646,
      "learning_rate": 0.00011043924868701182,
      "loss": 2.9962,
      "step": 165363
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.342341184616089,
      "learning_rate": 0.00011043607821190738,
      "loss": 3.1271,
      "step": 165364
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8212900161743164,
      "learning_rate": 0.0001104329077720462,
      "loss": 2.8149,
      "step": 165365
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.432598352432251,
      "learning_rate": 0.0001104297373674292,
      "loss": 2.9128,
      "step": 165366
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.8643250465393066,
      "learning_rate": 0.00011042656699805683,
      "loss": 2.8174,
      "step": 165367
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7501089572906494,
      "learning_rate": 0.00011042339666392981,
      "loss": 2.9387,
      "step": 165368
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7999722957611084,
      "learning_rate": 0.00011042022636504859,
      "loss": 3.0075,
      "step": 165369
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.631629467010498,
      "learning_rate": 0.00011041705610141392,
      "loss": 2.8057,
      "step": 165370
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4112038612365723,
      "learning_rate": 0.00011041388587302628,
      "loss": 2.7822,
      "step": 165371
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6483261585235596,
      "learning_rate": 0.00011041071567988628,
      "loss": 2.9447,
      "step": 165372
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.9499282836914062,
      "learning_rate": 0.00011040754552199441,
      "loss": 2.9923,
      "step": 165373
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.702068567276001,
      "learning_rate": 0.00011040437539935149,
      "loss": 2.9146,
      "step": 165374
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.65592622756958,
      "learning_rate": 0.00011040120531195783,
      "loss": 2.9002,
      "step": 165375
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3064112663269043,
      "learning_rate": 0.00011039803525981428,
      "loss": 2.9109,
      "step": 165376
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.248643159866333,
      "learning_rate": 0.00011039486524292132,
      "loss": 3.0885,
      "step": 165377
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5845534801483154,
      "learning_rate": 0.00011039169526127955,
      "loss": 3.0177,
      "step": 165378
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.671663522720337,
      "learning_rate": 0.0001103885253148894,
      "loss": 3.1227,
      "step": 165379
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7673027515411377,
      "learning_rate": 0.00011038535540375172,
      "loss": 2.9684,
      "step": 165380
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3253164291381836,
      "learning_rate": 0.00011038218552786687,
      "loss": 3.1798,
      "step": 165381
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3014779090881348,
      "learning_rate": 0.0001103790156872357,
      "loss": 3.2384,
      "step": 165382
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.323798418045044,
      "learning_rate": 0.00011037584588185862,
      "loss": 2.7455,
      "step": 165383
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.779413938522339,
      "learning_rate": 0.00011037267611173613,
      "loss": 3.232,
      "step": 165384
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.1665563583374023,
      "learning_rate": 0.00011036950637686905,
      "loss": 3.1582,
      "step": 165385
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3077032566070557,
      "learning_rate": 0.00011036633667725787,
      "loss": 2.9214,
      "step": 165386
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.383906364440918,
      "learning_rate": 0.00011036316701290305,
      "loss": 2.811,
      "step": 165387
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.9693281650543213,
      "learning_rate": 0.00011035999738380543,
      "loss": 2.9102,
      "step": 165388
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.087913990020752,
      "learning_rate": 0.00011035682778996544,
      "loss": 2.9217,
      "step": 165389
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.364321708679199,
      "learning_rate": 0.00011035365823138362,
      "loss": 3.0034,
      "step": 165390
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.015829086303711,
      "learning_rate": 0.00011035048870806071,
      "loss": 2.9653,
      "step": 165391
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.6661720275878906,
      "learning_rate": 0.00011034731921999711,
      "loss": 2.8576,
      "step": 165392
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.809067964553833,
      "learning_rate": 0.00011034414976719367,
      "loss": 2.7926,
      "step": 165393
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.665447235107422,
      "learning_rate": 0.00011034098034965079,
      "loss": 2.942,
      "step": 165394
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.744089126586914,
      "learning_rate": 0.00011033781096736913,
      "loss": 2.8203,
      "step": 165395
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.515674114227295,
      "learning_rate": 0.00011033464162034914,
      "loss": 2.8396,
      "step": 165396
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.643361806869507,
      "learning_rate": 0.00011033147230859161,
      "loss": 2.9091,
      "step": 165397
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.9779860973358154,
      "learning_rate": 0.00011032830303209694,
      "loss": 2.9453,
      "step": 165398
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4807729721069336,
      "learning_rate": 0.00011032513379086592,
      "loss": 3.0161,
      "step": 165399
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.245267868041992,
      "learning_rate": 0.00011032196458489906,
      "loss": 2.7741,
      "step": 165400
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.59033203125,
      "learning_rate": 0.00011031879541419692,
      "loss": 2.9894,
      "step": 165401
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.509099245071411,
      "learning_rate": 0.00011031562627875999,
      "loss": 3.0702,
      "step": 165402
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.3949666023254395,
      "learning_rate": 0.00011031245717858906,
      "loss": 2.4513,
      "step": 165403
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5065362453460693,
      "learning_rate": 0.00011030928811368454,
      "loss": 3.2024,
      "step": 165404
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.668210983276367,
      "learning_rate": 0.00011030611908404716,
      "loss": 2.8499,
      "step": 165405
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.415942430496216,
      "learning_rate": 0.0001103029500896775,
      "loss": 3.1082,
      "step": 165406
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6684653759002686,
      "learning_rate": 0.00011029978113057608,
      "loss": 2.9124,
      "step": 165407
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.7542810440063477,
      "learning_rate": 0.00011029661220674345,
      "loss": 2.8746,
      "step": 165408
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.46935772895813,
      "learning_rate": 0.00011029344331818032,
      "loss": 2.8009,
      "step": 165409
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3608310222625732,
      "learning_rate": 0.00011029027446488711,
      "loss": 2.8416,
      "step": 165410
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8803296089172363,
      "learning_rate": 0.00011028710564686467,
      "loss": 2.9337,
      "step": 165411
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.882361650466919,
      "learning_rate": 0.00011028393686411342,
      "loss": 2.971,
      "step": 165412
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.160931348800659,
      "learning_rate": 0.00011028076811663393,
      "loss": 2.981,
      "step": 165413
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.796884298324585,
      "learning_rate": 0.00011027759940442674,
      "loss": 2.982,
      "step": 165414
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.08236837387085,
      "learning_rate": 0.00011027443072749266,
      "loss": 3.086,
      "step": 165415
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.464993953704834,
      "learning_rate": 0.00011027126208583202,
      "loss": 3.0583,
      "step": 165416
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.2309131622314453,
      "learning_rate": 0.00011026809347944564,
      "loss": 3.5509,
      "step": 165417
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.379703998565674,
      "learning_rate": 0.00011026492490833388,
      "loss": 2.9022,
      "step": 165418
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.2240848541259766,
      "learning_rate": 0.00011026175637249768,
      "loss": 3.0356,
      "step": 165419
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.009507894515991,
      "learning_rate": 0.00011025858787193719,
      "loss": 2.8922,
      "step": 165420
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.716371536254883,
      "learning_rate": 0.00011025541940665331,
      "loss": 3.1024,
      "step": 165421
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3092129230499268,
      "learning_rate": 0.00011025225097664645,
      "loss": 3.2405,
      "step": 165422
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.628880023956299,
      "learning_rate": 0.00011024908258191733,
      "loss": 3.0203,
      "step": 165423
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.9362428188323975,
      "learning_rate": 0.00011024591422246643,
      "loss": 2.8422,
      "step": 165424
    },
    {
      "epoch": 2.15,
      "grad_norm": 5.775578498840332,
      "learning_rate": 0.00011024274589829458,
      "loss": 3.0654,
      "step": 165425
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.542358875274658,
      "learning_rate": 0.00011023957760940201,
      "loss": 2.9121,
      "step": 165426
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.1991429328918457,
      "learning_rate": 0.00011023640935578955,
      "loss": 3.0724,
      "step": 165427
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.376669406890869,
      "learning_rate": 0.00011023324113745763,
      "loss": 3.1432,
      "step": 165428
    },
    {
      "epoch": 2.15,
      "grad_norm": 5.532688617706299,
      "learning_rate": 0.00011023007295440706,
      "loss": 2.9347,
      "step": 165429
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.1628973484039307,
      "learning_rate": 0.00011022690480663817,
      "loss": 3.268,
      "step": 165430
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4706828594207764,
      "learning_rate": 0.00011022373669415191,
      "loss": 2.9891,
      "step": 165431
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.0210630893707275,
      "learning_rate": 0.0001102205686169484,
      "loss": 3.0437,
      "step": 165432
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.624713897705078,
      "learning_rate": 0.0001102174005750286,
      "loss": 2.9633,
      "step": 165433
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.5431699752807617,
      "learning_rate": 0.00011021423256839285,
      "loss": 3.1482,
      "step": 165434
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.985342025756836,
      "learning_rate": 0.00011021106459704197,
      "loss": 3.0436,
      "step": 165435
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.125256538391113,
      "learning_rate": 0.00011020789666097633,
      "loss": 2.6291,
      "step": 165436
    },
    {
      "epoch": 2.15,
      "grad_norm": 1.989017128944397,
      "learning_rate": 0.00011020472876019682,
      "loss": 3.1293,
      "step": 165437
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.101996660232544,
      "learning_rate": 0.00011020156089470363,
      "loss": 2.9887,
      "step": 165438
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.944929599761963,
      "learning_rate": 0.00011019839306449768,
      "loss": 2.834,
      "step": 165439
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.0032601356506348,
      "learning_rate": 0.0001101952252695793,
      "loss": 2.9365,
      "step": 165440
    },
    {
      "epoch": 2.15,
      "grad_norm": 5.45527982711792,
      "learning_rate": 0.00011019205750994932,
      "loss": 3.0567,
      "step": 165441
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.170802116394043,
      "learning_rate": 0.00011018888978560813,
      "loss": 2.9793,
      "step": 165442
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.77470326423645,
      "learning_rate": 0.00011018572209655662,
      "loss": 2.6776,
      "step": 165443
    },
    {
      "epoch": 2.15,
      "grad_norm": 5.305910587310791,
      "learning_rate": 0.00011018255444279494,
      "loss": 3.0627,
      "step": 165444
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8940234184265137,
      "learning_rate": 0.00011017938682432401,
      "loss": 2.8248,
      "step": 165445
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.578284502029419,
      "learning_rate": 0.00011017621924114422,
      "loss": 2.8445,
      "step": 165446
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.781409502029419,
      "learning_rate": 0.00011017305169325634,
      "loss": 2.9784,
      "step": 165447
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.396597146987915,
      "learning_rate": 0.0001101698841806608,
      "loss": 2.8856,
      "step": 165448
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7953481674194336,
      "learning_rate": 0.00011016671670335845,
      "loss": 3.112,
      "step": 165449
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.716013193130493,
      "learning_rate": 0.0001101635492613495,
      "loss": 2.8876,
      "step": 165450
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.033496141433716,
      "learning_rate": 0.00011016038185463483,
      "loss": 2.9861,
      "step": 165451
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4588441848754883,
      "learning_rate": 0.00011015721448321478,
      "loss": 3.0739,
      "step": 165452
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5279970169067383,
      "learning_rate": 0.00011015404714709024,
      "loss": 2.9289,
      "step": 165453
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.3318984508514404,
      "learning_rate": 0.00011015087984626153,
      "loss": 2.9325,
      "step": 165454
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.386326313018799,
      "learning_rate": 0.00011014771258072956,
      "loss": 3.1009,
      "step": 165455
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.2772016525268555,
      "learning_rate": 0.00011014454535049451,
      "loss": 2.8232,
      "step": 165456
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.725123882293701,
      "learning_rate": 0.00011014137815555726,
      "loss": 2.848,
      "step": 165457
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.2447502613067627,
      "learning_rate": 0.00011013821099591824,
      "loss": 3.0161,
      "step": 165458
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.890458583831787,
      "learning_rate": 0.00011013504387157819,
      "loss": 3.0936,
      "step": 165459
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.266981601715088,
      "learning_rate": 0.00011013187678253753,
      "loss": 3.1933,
      "step": 165460
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6207640171051025,
      "learning_rate": 0.00011012870972879704,
      "loss": 3.1195,
      "step": 165461
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7879550457000732,
      "learning_rate": 0.00011012554271035721,
      "loss": 2.841,
      "step": 165462
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.342405080795288,
      "learning_rate": 0.0001101223757272186,
      "loss": 2.7803,
      "step": 165463
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4319844245910645,
      "learning_rate": 0.00011011920877938173,
      "loss": 2.9495,
      "step": 165464
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.712674379348755,
      "learning_rate": 0.00011011604186684738,
      "loss": 3.063,
      "step": 165465
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.885458469390869,
      "learning_rate": 0.00011011287498961596,
      "loss": 3.1407,
      "step": 165466
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8871243000030518,
      "learning_rate": 0.00011010970814768823,
      "loss": 2.9031,
      "step": 165467
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.578249931335449,
      "learning_rate": 0.0001101065413410647,
      "loss": 2.8383,
      "step": 165468
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.634568929672241,
      "learning_rate": 0.00011010337456974587,
      "loss": 2.8464,
      "step": 165469
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.154825210571289,
      "learning_rate": 0.00011010020783373248,
      "loss": 2.7927,
      "step": 165470
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.3212528228759766,
      "learning_rate": 0.00011009704113302506,
      "loss": 2.7385,
      "step": 165471
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.067326307296753,
      "learning_rate": 0.00011009387446762405,
      "loss": 2.9635,
      "step": 165472
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.429227590560913,
      "learning_rate": 0.00011009070783753031,
      "loss": 2.8295,
      "step": 165473
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4919514656066895,
      "learning_rate": 0.00011008754124274426,
      "loss": 2.8752,
      "step": 165474
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.414376974105835,
      "learning_rate": 0.00011008437468326643,
      "loss": 3.1033,
      "step": 165475
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.1599924564361572,
      "learning_rate": 0.00011008120815909762,
      "loss": 2.8106,
      "step": 165476
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.993011236190796,
      "learning_rate": 0.0001100780416702383,
      "loss": 2.6126,
      "step": 165477
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.188915252685547,
      "learning_rate": 0.00011007487521668893,
      "loss": 3.1314,
      "step": 165478
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.1928136348724365,
      "learning_rate": 0.00011007170879845034,
      "loss": 3.0887,
      "step": 165479
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7969982624053955,
      "learning_rate": 0.00011006854241552301,
      "loss": 2.8378,
      "step": 165480
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.217113971710205,
      "learning_rate": 0.0001100653760679074,
      "loss": 2.8218,
      "step": 165481
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8217997550964355,
      "learning_rate": 0.00011006220975560436,
      "loss": 2.9317,
      "step": 165482
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.0417144298553467,
      "learning_rate": 0.00011005904347861423,
      "loss": 3.0258,
      "step": 165483
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7908356189727783,
      "learning_rate": 0.00011005587723693778,
      "loss": 2.7055,
      "step": 165484
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.203887701034546,
      "learning_rate": 0.00011005271103057556,
      "loss": 2.877,
      "step": 165485
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.1654069423675537,
      "learning_rate": 0.00011004954485952812,
      "loss": 2.7868,
      "step": 165486
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6876401901245117,
      "learning_rate": 0.00011004637872379592,
      "loss": 2.6826,
      "step": 165487
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.867553472518921,
      "learning_rate": 0.00011004321262337986,
      "loss": 2.842,
      "step": 165488
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.261636257171631,
      "learning_rate": 0.00011004004655828019,
      "loss": 2.7375,
      "step": 165489
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.2937395572662354,
      "learning_rate": 0.00011003688052849775,
      "loss": 3.1804,
      "step": 165490
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.4298481941223145,
      "learning_rate": 0.000110033714534033,
      "loss": 2.6903,
      "step": 165491
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.5819435119628906,
      "learning_rate": 0.00011003054857488672,
      "loss": 3.2027,
      "step": 165492
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.9820737838745117,
      "learning_rate": 0.00011002738265105915,
      "loss": 2.8538,
      "step": 165493
    },
    {
      "epoch": 2.15,
      "grad_norm": 3.105731725692749,
      "learning_rate": 0.0001100242167625512,
      "loss": 3.0023,
      "step": 165494
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.698150396347046,
      "learning_rate": 0.00011002105090936323,
      "loss": 2.8483,
      "step": 165495
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.4864656925201416,
      "learning_rate": 0.000110017885091496,
      "loss": 3.3529,
      "step": 165496
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.445355176925659,
      "learning_rate": 0.00011001471930894996,
      "loss": 3.0244,
      "step": 165497
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.7787606716156006,
      "learning_rate": 0.00011001155356172596,
      "loss": 2.9318,
      "step": 165498
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.8228657245635986,
      "learning_rate": 0.0001100083878498242,
      "loss": 2.932,
      "step": 165499
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.361560821533203,
      "learning_rate": 0.0001100052221732456,
      "loss": 3.2102,
      "step": 165500
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.722460985183716,
      "learning_rate": 0.00011000205653199047,
      "loss": 2.8534,
      "step": 165501
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.844430446624756,
      "learning_rate": 0.00010999889092605968,
      "loss": 2.9726,
      "step": 165502
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.6319146156311035,
      "learning_rate": 0.00010999572535545354,
      "loss": 2.6891,
      "step": 165503
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.604261636734009,
      "learning_rate": 0.00010999255982017303,
      "loss": 2.9419,
      "step": 165504
    },
    {
      "epoch": 2.15,
      "grad_norm": 2.695131540298462,
      "learning_rate": 0.00010998939432021824,
      "loss": 2.8358,
      "step": 165505
    },
    {
      "epoch": 2.15,
      "grad_norm": 4.05313777923584,
      "learning_rate": 0.00010998622885559016,
      "loss": 2.9664,
      "step": 165506
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4945085048675537,
      "learning_rate": 0.00010998306342628908,
      "loss": 2.94,
      "step": 165507
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.6229965686798096,
      "learning_rate": 0.00010997989803231587,
      "loss": 2.8595,
      "step": 165508
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9703798294067383,
      "learning_rate": 0.00010997673267367084,
      "loss": 3.0716,
      "step": 165509
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.403432846069336,
      "learning_rate": 0.00010997356735035498,
      "loss": 3.0241,
      "step": 165510
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.3695921897888184,
      "learning_rate": 0.00010997040206236836,
      "loss": 2.9966,
      "step": 165511
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.0956485271453857,
      "learning_rate": 0.00010996723680971199,
      "loss": 2.987,
      "step": 165512
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7055158615112305,
      "learning_rate": 0.00010996407159238615,
      "loss": 3.0449,
      "step": 165513
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3950374126434326,
      "learning_rate": 0.00010996090641039171,
      "loss": 2.9263,
      "step": 165514
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.089776039123535,
      "learning_rate": 0.00010995774126372902,
      "loss": 2.9074,
      "step": 165515
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4587442874908447,
      "learning_rate": 0.00010995457615239895,
      "loss": 2.9851,
      "step": 165516
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.667203903198242,
      "learning_rate": 0.00010995141107640172,
      "loss": 2.9395,
      "step": 165517
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8278040885925293,
      "learning_rate": 0.00010994824603573824,
      "loss": 2.9564,
      "step": 165518
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3905279636383057,
      "learning_rate": 0.00010994508103040881,
      "loss": 2.8234,
      "step": 165519
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8372299671173096,
      "learning_rate": 0.00010994191606041434,
      "loss": 2.9654,
      "step": 165520
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2536838054656982,
      "learning_rate": 0.00010993875112575512,
      "loss": 2.8329,
      "step": 165521
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.059567451477051,
      "learning_rate": 0.00010993558622643207,
      "loss": 2.8898,
      "step": 165522
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6709132194519043,
      "learning_rate": 0.00010993242136244539,
      "loss": 3.0005,
      "step": 165523
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.828611373901367,
      "learning_rate": 0.00010992925653379597,
      "loss": 2.7776,
      "step": 165524
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.390717029571533,
      "learning_rate": 0.00010992609174048419,
      "loss": 2.8694,
      "step": 165525
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.0678701400756836,
      "learning_rate": 0.00010992292698251083,
      "loss": 3.1988,
      "step": 165526
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.750898838043213,
      "learning_rate": 0.00010991976225987629,
      "loss": 2.9894,
      "step": 165527
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6530814170837402,
      "learning_rate": 0.00010991659757258143,
      "loss": 3.1933,
      "step": 165528
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5907840728759766,
      "learning_rate": 0.00010991343292062652,
      "loss": 2.8733,
      "step": 165529
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.738356351852417,
      "learning_rate": 0.00010991026830401232,
      "loss": 2.9898,
      "step": 165530
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.350980043411255,
      "learning_rate": 0.00010990710372273932,
      "loss": 2.9254,
      "step": 165531
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.928354501724243,
      "learning_rate": 0.00010990393917680829,
      "loss": 3.0573,
      "step": 165532
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.559187650680542,
      "learning_rate": 0.00010990077466621959,
      "loss": 3.0193,
      "step": 165533
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5365734100341797,
      "learning_rate": 0.00010989761019097415,
      "loss": 2.8292,
      "step": 165534
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5143699645996094,
      "learning_rate": 0.0001098944457510721,
      "loss": 2.9849,
      "step": 165535
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6122426986694336,
      "learning_rate": 0.00010989128134651437,
      "loss": 2.8295,
      "step": 165536
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.405313014984131,
      "learning_rate": 0.00010988811697730134,
      "loss": 3.0245,
      "step": 165537
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.835768461227417,
      "learning_rate": 0.0001098849526434338,
      "loss": 2.9399,
      "step": 165538
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4472527503967285,
      "learning_rate": 0.00010988178834491214,
      "loss": 2.8313,
      "step": 165539
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4549665451049805,
      "learning_rate": 0.00010987862408173725,
      "loss": 2.7763,
      "step": 165540
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3591461181640625,
      "learning_rate": 0.00010987545985390929,
      "loss": 3.0759,
      "step": 165541
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4150543212890625,
      "learning_rate": 0.0001098722956614292,
      "loss": 3.0144,
      "step": 165542
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6111011505126953,
      "learning_rate": 0.00010986913150429732,
      "loss": 3.1292,
      "step": 165543
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4270834922790527,
      "learning_rate": 0.00010986596738251447,
      "loss": 3.1956,
      "step": 165544
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6691765785217285,
      "learning_rate": 0.00010986280329608102,
      "loss": 2.855,
      "step": 165545
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6159474849700928,
      "learning_rate": 0.00010985963924499778,
      "loss": 3.157,
      "step": 165546
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.871610164642334,
      "learning_rate": 0.0001098564752292652,
      "loss": 2.9267,
      "step": 165547
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4406633377075195,
      "learning_rate": 0.00010985331124888386,
      "loss": 3.0537,
      "step": 165548
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9149482250213623,
      "learning_rate": 0.00010985014730385434,
      "loss": 3.0516,
      "step": 165549
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.704437255859375,
      "learning_rate": 0.00010984698339417735,
      "loss": 2.917,
      "step": 165550
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.202691078186035,
      "learning_rate": 0.00010984381951985328,
      "loss": 3.1662,
      "step": 165551
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3396646976470947,
      "learning_rate": 0.00010984065568088294,
      "loss": 2.8004,
      "step": 165552
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6313040256500244,
      "learning_rate": 0.00010983749187726681,
      "loss": 2.9009,
      "step": 165553
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.170179843902588,
      "learning_rate": 0.00010983432810900537,
      "loss": 3.0885,
      "step": 165554
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.474604606628418,
      "learning_rate": 0.00010983116437609943,
      "loss": 2.885,
      "step": 165555
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.1793041229248047,
      "learning_rate": 0.00010982800067854945,
      "loss": 2.7345,
      "step": 165556
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.265775203704834,
      "learning_rate": 0.00010982483701635598,
      "loss": 2.8024,
      "step": 165557
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2197368144989014,
      "learning_rate": 0.0001098216733895197,
      "loss": 2.8456,
      "step": 165558
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.61149001121521,
      "learning_rate": 0.00010981850979804122,
      "loss": 3.012,
      "step": 165559
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.394257068634033,
      "learning_rate": 0.00010981534624192092,
      "loss": 2.8731,
      "step": 165560
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4776368141174316,
      "learning_rate": 0.00010981218272115967,
      "loss": 2.6004,
      "step": 165561
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.818789482116699,
      "learning_rate": 0.0001098090192357579,
      "loss": 2.9958,
      "step": 165562
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2045884132385254,
      "learning_rate": 0.00010980585578571615,
      "loss": 2.9234,
      "step": 165563
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8101658821105957,
      "learning_rate": 0.00010980269237103517,
      "loss": 2.9404,
      "step": 165564
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.0010383129119873,
      "learning_rate": 0.00010979952899171545,
      "loss": 2.8839,
      "step": 165565
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.824845790863037,
      "learning_rate": 0.0001097963656477575,
      "loss": 2.8814,
      "step": 165566
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.160571575164795,
      "learning_rate": 0.0001097932023391621,
      "loss": 2.8714,
      "step": 165567
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4051284790039062,
      "learning_rate": 0.00010979003906592961,
      "loss": 3.0467,
      "step": 165568
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9159181118011475,
      "learning_rate": 0.00010978687582806087,
      "loss": 3.0713,
      "step": 165569
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.477390766143799,
      "learning_rate": 0.00010978371262555635,
      "loss": 2.9382,
      "step": 165570
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7809207439422607,
      "learning_rate": 0.00010978054945841659,
      "loss": 3.0351,
      "step": 165571
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.636045455932617,
      "learning_rate": 0.00010977738632664215,
      "loss": 3.0182,
      "step": 165572
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.4911296367645264,
      "learning_rate": 0.00010977422323023375,
      "loss": 3.0264,
      "step": 165573
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.682077169418335,
      "learning_rate": 0.00010977106016919183,
      "loss": 3.0185,
      "step": 165574
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.387059450149536,
      "learning_rate": 0.00010976789714351718,
      "loss": 2.8563,
      "step": 165575
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.526442527770996,
      "learning_rate": 0.00010976473415321024,
      "loss": 2.8809,
      "step": 165576
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3044283390045166,
      "learning_rate": 0.00010976157119827165,
      "loss": 3.0068,
      "step": 165577
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.0294201374053955,
      "learning_rate": 0.00010975840827870186,
      "loss": 2.8737,
      "step": 165578
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7465174198150635,
      "learning_rate": 0.00010975524539450168,
      "loss": 3.0571,
      "step": 165579
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.423044443130493,
      "learning_rate": 0.00010975208254567147,
      "loss": 3.0086,
      "step": 165580
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4130804538726807,
      "learning_rate": 0.00010974891973221207,
      "loss": 3.1044,
      "step": 165581
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.46869158744812,
      "learning_rate": 0.00010974575695412381,
      "loss": 2.8714,
      "step": 165582
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7178330421447754,
      "learning_rate": 0.00010974259421140762,
      "loss": 3.1861,
      "step": 165583
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7809865474700928,
      "learning_rate": 0.00010973943150406367,
      "loss": 2.7971,
      "step": 165584
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4209940433502197,
      "learning_rate": 0.00010973626883209285,
      "loss": 3.1261,
      "step": 165585
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.403512477874756,
      "learning_rate": 0.00010973310619549558,
      "loss": 2.7554,
      "step": 165586
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.374418020248413,
      "learning_rate": 0.00010972994359427258,
      "loss": 2.8547,
      "step": 165587
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.361112594604492,
      "learning_rate": 0.00010972678102842427,
      "loss": 3.1789,
      "step": 165588
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5668370723724365,
      "learning_rate": 0.00010972361849795156,
      "loss": 2.95,
      "step": 165589
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5798752307891846,
      "learning_rate": 0.00010972045600285464,
      "loss": 3.0551,
      "step": 165590
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.317424774169922,
      "learning_rate": 0.00010971729354313435,
      "loss": 2.9793,
      "step": 165591
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.143028497695923,
      "learning_rate": 0.00010971413111879112,
      "loss": 2.8806,
      "step": 165592
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6508395671844482,
      "learning_rate": 0.00010971096872982572,
      "loss": 2.9131,
      "step": 165593
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.3051350116729736,
      "learning_rate": 0.00010970780637623853,
      "loss": 2.9397,
      "step": 165594
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.334578275680542,
      "learning_rate": 0.00010970464405803046,
      "loss": 2.8174,
      "step": 165595
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.408269166946411,
      "learning_rate": 0.00010970148177520171,
      "loss": 3.2151,
      "step": 165596
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7077629566192627,
      "learning_rate": 0.00010969831952775311,
      "loss": 3.1715,
      "step": 165597
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.426197052001953,
      "learning_rate": 0.00010969515731568511,
      "loss": 2.9297,
      "step": 165598
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.612534761428833,
      "learning_rate": 0.00010969199513899844,
      "loss": 2.9849,
      "step": 165599
    },
    {
      "epoch": 2.16,
      "grad_norm": 5.675954818725586,
      "learning_rate": 0.00010968883299769358,
      "loss": 3.049,
      "step": 165600
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.447223424911499,
      "learning_rate": 0.0001096856708917713,
      "loss": 2.88,
      "step": 165601
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.098968505859375,
      "learning_rate": 0.00010968250882123189,
      "loss": 2.8583,
      "step": 165602
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6402428150177,
      "learning_rate": 0.00010967934678607618,
      "loss": 3.0551,
      "step": 165603
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.639915704727173,
      "learning_rate": 0.00010967618478630457,
      "loss": 2.9768,
      "step": 165604
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8737261295318604,
      "learning_rate": 0.00010967302282191785,
      "loss": 2.9361,
      "step": 165605
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.1247408390045166,
      "learning_rate": 0.00010966986089291639,
      "loss": 2.7806,
      "step": 165606
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.1931824684143066,
      "learning_rate": 0.00010966669899930111,
      "loss": 2.8971,
      "step": 165607
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7775697708129883,
      "learning_rate": 0.00010966353714107218,
      "loss": 2.8454,
      "step": 165608
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2120018005371094,
      "learning_rate": 0.00010966037531823049,
      "loss": 3.0274,
      "step": 165609
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.1498045921325684,
      "learning_rate": 0.00010965721353077645,
      "loss": 3.0816,
      "step": 165610
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.2713027000427246,
      "learning_rate": 0.00010965405177871079,
      "loss": 3.0796,
      "step": 165611
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.640030860900879,
      "learning_rate": 0.00010965089006203395,
      "loss": 3.1632,
      "step": 165612
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.768369674682617,
      "learning_rate": 0.00010964772838074674,
      "loss": 2.8015,
      "step": 165613
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.138047695159912,
      "learning_rate": 0.00010964456673484954,
      "loss": 2.9772,
      "step": 165614
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.0531108379364014,
      "learning_rate": 0.00010964140512434308,
      "loss": 3.3137,
      "step": 165615
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5038821697235107,
      "learning_rate": 0.00010963824354922774,
      "loss": 2.9079,
      "step": 165616
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3149895668029785,
      "learning_rate": 0.00010963508200950432,
      "loss": 3.2279,
      "step": 165617
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.448903799057007,
      "learning_rate": 0.00010963192050517326,
      "loss": 2.862,
      "step": 165618
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.921144485473633,
      "learning_rate": 0.00010962875903623533,
      "loss": 2.9544,
      "step": 165619
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.50486159324646,
      "learning_rate": 0.000109625597602691,
      "loss": 2.9584,
      "step": 165620
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.188279390335083,
      "learning_rate": 0.00010962243620454087,
      "loss": 2.9633,
      "step": 165621
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.320549964904785,
      "learning_rate": 0.0001096192748417854,
      "loss": 2.8467,
      "step": 165622
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.03464412689209,
      "learning_rate": 0.00010961611351442541,
      "loss": 3.0876,
      "step": 165623
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8926665782928467,
      "learning_rate": 0.0001096129522224613,
      "loss": 3.0077,
      "step": 165624
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.3253262042999268,
      "learning_rate": 0.00010960979096589382,
      "loss": 2.9877,
      "step": 165625
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.6407225131988525,
      "learning_rate": 0.00010960662974472346,
      "loss": 2.7733,
      "step": 165626
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.707655906677246,
      "learning_rate": 0.00010960346855895084,
      "loss": 3.06,
      "step": 165627
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.887430191040039,
      "learning_rate": 0.0001096003074085764,
      "loss": 2.9095,
      "step": 165628
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4811818599700928,
      "learning_rate": 0.00010959714629360099,
      "loss": 2.8997,
      "step": 165629
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6287143230438232,
      "learning_rate": 0.00010959398521402497,
      "loss": 2.8234,
      "step": 165630
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8398377895355225,
      "learning_rate": 0.0001095908241698491,
      "loss": 2.9093,
      "step": 165631
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.762721061706543,
      "learning_rate": 0.00010958766316107391,
      "loss": 2.8708,
      "step": 165632
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.0142180919647217,
      "learning_rate": 0.00010958450218769993,
      "loss": 2.923,
      "step": 165633
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4311790466308594,
      "learning_rate": 0.00010958134124972773,
      "loss": 3.3277,
      "step": 165634
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.847127676010132,
      "learning_rate": 0.00010957818034715804,
      "loss": 3.0536,
      "step": 165635
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.717677593231201,
      "learning_rate": 0.00010957501947999124,
      "loss": 3.0288,
      "step": 165636
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.188287258148193,
      "learning_rate": 0.00010957185864822813,
      "loss": 2.9573,
      "step": 165637
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.622676134109497,
      "learning_rate": 0.00010956869785186926,
      "loss": 2.9164,
      "step": 165638
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4031050205230713,
      "learning_rate": 0.00010956553709091512,
      "loss": 2.9053,
      "step": 165639
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.0911636352539062,
      "learning_rate": 0.00010956237636536626,
      "loss": 2.8795,
      "step": 165640
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.382508277893066,
      "learning_rate": 0.00010955921567522342,
      "loss": 2.9267,
      "step": 165641
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.073533058166504,
      "learning_rate": 0.00010955605502048706,
      "loss": 2.8907,
      "step": 165642
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7823798656463623,
      "learning_rate": 0.0001095528944011579,
      "loss": 3.0644,
      "step": 165643
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.2755489349365234,
      "learning_rate": 0.00010954973381723645,
      "loss": 2.9286,
      "step": 165644
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8982443809509277,
      "learning_rate": 0.00010954657326872322,
      "loss": 2.9074,
      "step": 165645
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.199110746383667,
      "learning_rate": 0.00010954341275561895,
      "loss": 2.9163,
      "step": 165646
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.562448740005493,
      "learning_rate": 0.00010954025227792416,
      "loss": 2.9419,
      "step": 165647
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4701600074768066,
      "learning_rate": 0.00010953709183563936,
      "loss": 2.8206,
      "step": 165648
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.84749436378479,
      "learning_rate": 0.00010953393142876529,
      "loss": 3.0881,
      "step": 165649
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.509977340698242,
      "learning_rate": 0.00010953077105730246,
      "loss": 2.993,
      "step": 165650
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.303508281707764,
      "learning_rate": 0.00010952761072125133,
      "loss": 2.8098,
      "step": 165651
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4817092418670654,
      "learning_rate": 0.00010952445042061272,
      "loss": 3.1118,
      "step": 165652
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.1749558448791504,
      "learning_rate": 0.00010952129015538705,
      "loss": 2.9663,
      "step": 165653
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.3458290100097656,
      "learning_rate": 0.00010951812992557504,
      "loss": 2.8504,
      "step": 165654
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9140517711639404,
      "learning_rate": 0.0001095149697311772,
      "loss": 3.0503,
      "step": 165655
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8677544593811035,
      "learning_rate": 0.00010951180957219414,
      "loss": 2.9336,
      "step": 165656
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.972297430038452,
      "learning_rate": 0.00010950864944862632,
      "loss": 3.0705,
      "step": 165657
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.5482311248779297,
      "learning_rate": 0.00010950548936047454,
      "loss": 2.7974,
      "step": 165658
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.637186288833618,
      "learning_rate": 0.00010950232930773918,
      "loss": 3.0427,
      "step": 165659
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3382952213287354,
      "learning_rate": 0.00010949916929042103,
      "loss": 2.7725,
      "step": 165660
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5402917861938477,
      "learning_rate": 0.00010949600930852058,
      "loss": 2.9058,
      "step": 165661
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.396235704421997,
      "learning_rate": 0.00010949284936203844,
      "loss": 3.0395,
      "step": 165662
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.1759815216064453,
      "learning_rate": 0.00010948968945097507,
      "loss": 2.6941,
      "step": 165663
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.0589046478271484,
      "learning_rate": 0.00010948652957533122,
      "loss": 3.0539,
      "step": 165664
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.619896650314331,
      "learning_rate": 0.00010948336973510737,
      "loss": 3.0075,
      "step": 165665
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4035303592681885,
      "learning_rate": 0.00010948020993030424,
      "loss": 2.8623,
      "step": 165666
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.398515462875366,
      "learning_rate": 0.00010947705016092222,
      "loss": 2.7196,
      "step": 165667
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.311906337738037,
      "learning_rate": 0.00010947389042696224,
      "loss": 3.1147,
      "step": 165668
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5745556354522705,
      "learning_rate": 0.00010947073072842446,
      "loss": 2.7884,
      "step": 165669
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5278780460357666,
      "learning_rate": 0.00010946757106530975,
      "loss": 3.1481,
      "step": 165670
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.709726333618164,
      "learning_rate": 0.00010946441143761852,
      "loss": 2.8358,
      "step": 165671
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.1493499279022217,
      "learning_rate": 0.00010946125184535154,
      "loss": 2.772,
      "step": 165672
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8092758655548096,
      "learning_rate": 0.00010945809228850925,
      "loss": 3.0538,
      "step": 165673
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.105135917663574,
      "learning_rate": 0.00010945493276709245,
      "loss": 2.8986,
      "step": 165674
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.671801805496216,
      "learning_rate": 0.00010945177328110142,
      "loss": 2.8756,
      "step": 165675
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.783071279525757,
      "learning_rate": 0.00010944861383053698,
      "loss": 2.9699,
      "step": 165676
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.284229278564453,
      "learning_rate": 0.00010944545441539953,
      "loss": 2.8135,
      "step": 165677
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.236424446105957,
      "learning_rate": 0.00010944229503568989,
      "loss": 2.84,
      "step": 165678
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.716566801071167,
      "learning_rate": 0.00010943913569140839,
      "loss": 3.1065,
      "step": 165679
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2293825149536133,
      "learning_rate": 0.00010943597638255587,
      "loss": 2.9361,
      "step": 165680
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7904183864593506,
      "learning_rate": 0.0001094328171091328,
      "loss": 3.0545,
      "step": 165681
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.241800308227539,
      "learning_rate": 0.00010942965787113979,
      "loss": 3.0905,
      "step": 165682
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.403456449508667,
      "learning_rate": 0.00010942649866857726,
      "loss": 3.0205,
      "step": 165683
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.171919584274292,
      "learning_rate": 0.00010942333950144607,
      "loss": 2.6893,
      "step": 165684
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.378540277481079,
      "learning_rate": 0.00010942018036974657,
      "loss": 3.0581,
      "step": 165685
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.583102226257324,
      "learning_rate": 0.00010941702127347956,
      "loss": 2.9652,
      "step": 165686
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.064528226852417,
      "learning_rate": 0.00010941386221264554,
      "loss": 2.8171,
      "step": 165687
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.968254566192627,
      "learning_rate": 0.00010941070318724503,
      "loss": 2.8787,
      "step": 165688
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.015214443206787,
      "learning_rate": 0.00010940754419727862,
      "loss": 2.8514,
      "step": 165689
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.658780574798584,
      "learning_rate": 0.00010940438524274703,
      "loss": 3.1339,
      "step": 165690
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4926748275756836,
      "learning_rate": 0.00010940122632365062,
      "loss": 2.9888,
      "step": 165691
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4143502712249756,
      "learning_rate": 0.00010939806743999028,
      "loss": 3.0883,
      "step": 165692
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.757667064666748,
      "learning_rate": 0.00010939490859176642,
      "loss": 2.8996,
      "step": 165693
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.093142032623291,
      "learning_rate": 0.00010939174977897965,
      "loss": 2.674,
      "step": 165694
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.639019250869751,
      "learning_rate": 0.00010938859100163042,
      "loss": 3.0369,
      "step": 165695
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6605052947998047,
      "learning_rate": 0.00010938543225971955,
      "loss": 3.0991,
      "step": 165696
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7089943885803223,
      "learning_rate": 0.00010938227355324747,
      "loss": 2.973,
      "step": 165697
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9999284744262695,
      "learning_rate": 0.0001093791148822149,
      "loss": 2.7889,
      "step": 165698
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.58233380317688,
      "learning_rate": 0.00010937595624662235,
      "loss": 2.8625,
      "step": 165699
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.893507480621338,
      "learning_rate": 0.0001093727976464704,
      "loss": 2.9289,
      "step": 165700
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3542890548706055,
      "learning_rate": 0.00010936963908175955,
      "loss": 3.1104,
      "step": 165701
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.561235189437866,
      "learning_rate": 0.00010936648055249058,
      "loss": 2.8014,
      "step": 165702
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.261669158935547,
      "learning_rate": 0.00010936332205866389,
      "loss": 2.981,
      "step": 165703
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.938286304473877,
      "learning_rate": 0.00010936016360028026,
      "loss": 2.8451,
      "step": 165704
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5155584812164307,
      "learning_rate": 0.00010935700517734017,
      "loss": 2.7647,
      "step": 165705
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.929028272628784,
      "learning_rate": 0.0001093538467898442,
      "loss": 2.8622,
      "step": 165706
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9117684364318848,
      "learning_rate": 0.00010935068843779284,
      "loss": 3.044,
      "step": 165707
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.4345486164093018,
      "learning_rate": 0.00010934753012118693,
      "loss": 2.9724,
      "step": 165708
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.286607027053833,
      "learning_rate": 0.00010934437184002676,
      "loss": 2.9624,
      "step": 165709
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.87467360496521,
      "learning_rate": 0.00010934121359431321,
      "loss": 2.9369,
      "step": 165710
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.673182964324951,
      "learning_rate": 0.0001093380553840467,
      "loss": 3.1946,
      "step": 165711
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.29381799697876,
      "learning_rate": 0.00010933489720922787,
      "loss": 3.2363,
      "step": 165712
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.165226936340332,
      "learning_rate": 0.00010933173906985715,
      "loss": 2.8577,
      "step": 165713
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.401518821716309,
      "learning_rate": 0.00010932858096593539,
      "loss": 2.8533,
      "step": 165714
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.579336643218994,
      "learning_rate": 0.00010932542289746295,
      "loss": 2.9494,
      "step": 165715
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3498311042785645,
      "learning_rate": 0.0001093222648644406,
      "loss": 2.9748,
      "step": 165716
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.587311029434204,
      "learning_rate": 0.00010931910686686882,
      "loss": 2.8868,
      "step": 165717
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.1951823234558105,
      "learning_rate": 0.00010931594890474825,
      "loss": 2.9524,
      "step": 165718
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.1623129844665527,
      "learning_rate": 0.00010931279097807933,
      "loss": 2.9713,
      "step": 165719
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3691446781158447,
      "learning_rate": 0.00010930963308686283,
      "loss": 2.9724,
      "step": 165720
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.800262451171875,
      "learning_rate": 0.00010930647523109922,
      "loss": 3.0263,
      "step": 165721
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.257219076156616,
      "learning_rate": 0.00010930331741078921,
      "loss": 3.112,
      "step": 165722
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.9986541271209717,
      "learning_rate": 0.00010930015962593331,
      "loss": 3.1758,
      "step": 165723
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.789606094360352,
      "learning_rate": 0.00010929700187653211,
      "loss": 2.7857,
      "step": 165724
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.374699115753174,
      "learning_rate": 0.00010929384416258613,
      "loss": 3.1097,
      "step": 165725
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.772531509399414,
      "learning_rate": 0.00010929068648409611,
      "loss": 2.9657,
      "step": 165726
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3984756469726562,
      "learning_rate": 0.00010928752884106244,
      "loss": 3.1044,
      "step": 165727
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.904700994491577,
      "learning_rate": 0.00010928437123348592,
      "loss": 2.7053,
      "step": 165728
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.703014373779297,
      "learning_rate": 0.00010928121366136701,
      "loss": 2.8614,
      "step": 165729
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6768438816070557,
      "learning_rate": 0.00010927805612470625,
      "loss": 3.1881,
      "step": 165730
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9532227516174316,
      "learning_rate": 0.00010927489862350441,
      "loss": 3.163,
      "step": 165731
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.2408251762390137,
      "learning_rate": 0.00010927174115776194,
      "loss": 2.9096,
      "step": 165732
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.946499824523926,
      "learning_rate": 0.00010926858372747938,
      "loss": 2.9639,
      "step": 165733
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.931759357452393,
      "learning_rate": 0.0001092654263326575,
      "loss": 2.9626,
      "step": 165734
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.569681644439697,
      "learning_rate": 0.00010926226897329673,
      "loss": 2.8757,
      "step": 165735
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.4214730262756348,
      "learning_rate": 0.00010925911164939763,
      "loss": 2.8659,
      "step": 165736
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.726168155670166,
      "learning_rate": 0.00010925595436096097,
      "loss": 2.8576,
      "step": 165737
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.166638135910034,
      "learning_rate": 0.00010925279710798722,
      "loss": 2.8692,
      "step": 165738
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.6948721408843994,
      "learning_rate": 0.00010924963989047688,
      "loss": 2.6886,
      "step": 165739
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.2735304832458496,
      "learning_rate": 0.00010924648270843071,
      "loss": 3.09,
      "step": 165740
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9129467010498047,
      "learning_rate": 0.00010924332556184925,
      "loss": 2.7616,
      "step": 165741
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4281086921691895,
      "learning_rate": 0.00010924016845073294,
      "loss": 2.9405,
      "step": 165742
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.6460413932800293,
      "learning_rate": 0.00010923701137508261,
      "loss": 2.887,
      "step": 165743
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.5564069747924805,
      "learning_rate": 0.00010923385433489859,
      "loss": 2.937,
      "step": 165744
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.619913101196289,
      "learning_rate": 0.00010923069733018171,
      "loss": 3.1189,
      "step": 165745
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5734376907348633,
      "learning_rate": 0.00010922754036093243,
      "loss": 2.8046,
      "step": 165746
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.796246290206909,
      "learning_rate": 0.00010922438342715127,
      "loss": 2.882,
      "step": 165747
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.4023797512054443,
      "learning_rate": 0.00010922122652883898,
      "loss": 3.2843,
      "step": 165748
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4022810459136963,
      "learning_rate": 0.00010921806966599607,
      "loss": 3.1467,
      "step": 165749
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.765333652496338,
      "learning_rate": 0.00010921491283862304,
      "loss": 3.1042,
      "step": 165750
    },
    {
      "epoch": 2.16,
      "grad_norm": 5.079172611236572,
      "learning_rate": 0.00010921175604672065,
      "loss": 3.031,
      "step": 165751
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.2662177085876465,
      "learning_rate": 0.0001092085992902893,
      "loss": 2.9917,
      "step": 165752
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.357407808303833,
      "learning_rate": 0.00010920544256932973,
      "loss": 2.808,
      "step": 165753
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.1349759101867676,
      "learning_rate": 0.00010920228588384253,
      "loss": 2.8887,
      "step": 165754
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.1838884353637695,
      "learning_rate": 0.00010919912923382818,
      "loss": 2.9076,
      "step": 165755
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.619277238845825,
      "learning_rate": 0.00010919597261928726,
      "loss": 3.0819,
      "step": 165756
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.240483522415161,
      "learning_rate": 0.0001091928160402205,
      "loss": 2.8308,
      "step": 165757
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2673380374908447,
      "learning_rate": 0.00010918965949662826,
      "loss": 2.6577,
      "step": 165758
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.302098274230957,
      "learning_rate": 0.0001091865029885114,
      "loss": 3.0343,
      "step": 165759
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.2560489177703857,
      "learning_rate": 0.00010918334651587033,
      "loss": 2.942,
      "step": 165760
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.366861581802368,
      "learning_rate": 0.00010918019007870573,
      "loss": 2.9919,
      "step": 165761
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.347747802734375,
      "learning_rate": 0.00010917703367701802,
      "loss": 3.0266,
      "step": 165762
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.1934266090393066,
      "learning_rate": 0.00010917387731080798,
      "loss": 3.052,
      "step": 165763
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6767117977142334,
      "learning_rate": 0.00010917072098007603,
      "loss": 2.8706,
      "step": 165764
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5659139156341553,
      "learning_rate": 0.00010916756468482294,
      "loss": 2.9855,
      "step": 165765
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5208699703216553,
      "learning_rate": 0.00010916440842504923,
      "loss": 3.0172,
      "step": 165766
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.316464900970459,
      "learning_rate": 0.00010916125220075542,
      "loss": 2.9939,
      "step": 165767
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.150493621826172,
      "learning_rate": 0.00010915809601194204,
      "loss": 2.9754,
      "step": 165768
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.776768445968628,
      "learning_rate": 0.00010915493985860989,
      "loss": 2.8982,
      "step": 165769
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.264780282974243,
      "learning_rate": 0.00010915178374075936,
      "loss": 2.9417,
      "step": 165770
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.0848283767700195,
      "learning_rate": 0.00010914862765839117,
      "loss": 2.9727,
      "step": 165771
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4479775428771973,
      "learning_rate": 0.00010914547161150587,
      "loss": 2.9471,
      "step": 165772
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5094592571258545,
      "learning_rate": 0.00010914231560010403,
      "loss": 2.996,
      "step": 165773
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.146542549133301,
      "learning_rate": 0.00010913915962418616,
      "loss": 2.8216,
      "step": 165774
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2479329109191895,
      "learning_rate": 0.000109136003683753,
      "loss": 2.8485,
      "step": 165775
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6399550437927246,
      "learning_rate": 0.00010913284777880493,
      "loss": 3.1701,
      "step": 165776
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.698101043701172,
      "learning_rate": 0.00010912969190934281,
      "loss": 2.6861,
      "step": 165777
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.736706256866455,
      "learning_rate": 0.00010912653607536707,
      "loss": 2.8023,
      "step": 165778
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7636098861694336,
      "learning_rate": 0.00010912338027687834,
      "loss": 3.0032,
      "step": 165779
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.2378664016723633,
      "learning_rate": 0.00010912022451387703,
      "loss": 2.8653,
      "step": 165780
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.128248453140259,
      "learning_rate": 0.00010911706878636398,
      "loss": 3.1928,
      "step": 165781
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.5049097537994385,
      "learning_rate": 0.00010911391309433958,
      "loss": 3.0245,
      "step": 165782
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.447998285293579,
      "learning_rate": 0.0001091107574378046,
      "loss": 3.0629,
      "step": 165783
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.06207275390625,
      "learning_rate": 0.00010910760181675958,
      "loss": 2.9562,
      "step": 165784
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.0721373558044434,
      "learning_rate": 0.00010910444623120501,
      "loss": 2.9859,
      "step": 165785
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.058621406555176,
      "learning_rate": 0.00010910129068114143,
      "loss": 2.8746,
      "step": 165786
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.0376625061035156,
      "learning_rate": 0.00010909813516656962,
      "loss": 3.1694,
      "step": 165787
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.715766191482544,
      "learning_rate": 0.00010909497968749,
      "loss": 2.8142,
      "step": 165788
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6523141860961914,
      "learning_rate": 0.0001090918242439033,
      "loss": 2.8898,
      "step": 165789
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.6986403465271,
      "learning_rate": 0.00010908866883581006,
      "loss": 2.9801,
      "step": 165790
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7053093910217285,
      "learning_rate": 0.00010908551346321082,
      "loss": 2.75,
      "step": 165791
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6537792682647705,
      "learning_rate": 0.00010908235812610608,
      "loss": 3.0049,
      "step": 165792
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4200499057769775,
      "learning_rate": 0.00010907920282449662,
      "loss": 2.9351,
      "step": 165793
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.446028232574463,
      "learning_rate": 0.00010907604755838287,
      "loss": 2.988,
      "step": 165794
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.7146317958831787,
      "learning_rate": 0.0001090728923277656,
      "loss": 2.9607,
      "step": 165795
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.582082986831665,
      "learning_rate": 0.00010906973713264526,
      "loss": 3.2118,
      "step": 165796
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.332444906234741,
      "learning_rate": 0.00010906658197302244,
      "loss": 2.9832,
      "step": 165797
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5078608989715576,
      "learning_rate": 0.00010906342684889768,
      "loss": 3.0025,
      "step": 165798
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8460299968719482,
      "learning_rate": 0.00010906027176027172,
      "loss": 2.8056,
      "step": 165799
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.574230432510376,
      "learning_rate": 0.00010905711670714496,
      "loss": 2.963,
      "step": 165800
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.9127163887023926,
      "learning_rate": 0.00010905396168951821,
      "loss": 2.5995,
      "step": 165801
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.4128670692443848,
      "learning_rate": 0.0001090508067073919,
      "loss": 3.1467,
      "step": 165802
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.398703098297119,
      "learning_rate": 0.00010904765176076667,
      "loss": 3.0362,
      "step": 165803
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.974052906036377,
      "learning_rate": 0.00010904449684964297,
      "loss": 2.8087,
      "step": 165804
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.554739475250244,
      "learning_rate": 0.00010904134197402161,
      "loss": 3.0304,
      "step": 165805
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.128187417984009,
      "learning_rate": 0.00010903818713390296,
      "loss": 3.0464,
      "step": 165806
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.1906635761260986,
      "learning_rate": 0.00010903503232928782,
      "loss": 2.9373,
      "step": 165807
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.094898223876953,
      "learning_rate": 0.0001090318775601767,
      "loss": 2.9317,
      "step": 165808
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8300323486328125,
      "learning_rate": 0.00010902872282657012,
      "loss": 3.0457,
      "step": 165809
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.801330089569092,
      "learning_rate": 0.00010902556812846861,
      "loss": 3.0021,
      "step": 165810
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.461958885192871,
      "learning_rate": 0.00010902241346587299,
      "loss": 3.012,
      "step": 165811
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3826138973236084,
      "learning_rate": 0.00010901925883878357,
      "loss": 2.9015,
      "step": 165812
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.596245527267456,
      "learning_rate": 0.00010901610424720119,
      "loss": 2.7185,
      "step": 165813
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.3821890354156494,
      "learning_rate": 0.0001090129496911262,
      "loss": 2.7515,
      "step": 165814
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.245325803756714,
      "learning_rate": 0.00010900979517055943,
      "loss": 2.8819,
      "step": 165815
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6041407585144043,
      "learning_rate": 0.00010900664068550134,
      "loss": 3.0674,
      "step": 165816
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.171060800552368,
      "learning_rate": 0.00010900348623595253,
      "loss": 3.1408,
      "step": 165817
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.435938596725464,
      "learning_rate": 0.00010900033182191346,
      "loss": 3.0261,
      "step": 165818
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.119774580001831,
      "learning_rate": 0.00010899717744338494,
      "loss": 2.781,
      "step": 165819
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.644007682800293,
      "learning_rate": 0.00010899402310036736,
      "loss": 2.9759,
      "step": 165820
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.1162610054016113,
      "learning_rate": 0.00010899086879286149,
      "loss": 2.9634,
      "step": 165821
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6620419025421143,
      "learning_rate": 0.00010898771452086783,
      "loss": 3.0892,
      "step": 165822
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8337507247924805,
      "learning_rate": 0.00010898456028438692,
      "loss": 2.9474,
      "step": 165823
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.861368179321289,
      "learning_rate": 0.00010898140608341934,
      "loss": 2.7821,
      "step": 165824
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3353471755981445,
      "learning_rate": 0.00010897825191796579,
      "loss": 2.9594,
      "step": 165825
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.268648147583008,
      "learning_rate": 0.00010897509778802669,
      "loss": 2.8846,
      "step": 165826
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.5687546730041504,
      "learning_rate": 0.00010897194369360285,
      "loss": 3.2098,
      "step": 165827
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3536055088043213,
      "learning_rate": 0.0001089687896346947,
      "loss": 3.0571,
      "step": 165828
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.566627264022827,
      "learning_rate": 0.00010896563561130276,
      "loss": 2.9198,
      "step": 165829
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.6508448123931885,
      "learning_rate": 0.00010896248162342783,
      "loss": 3.0374,
      "step": 165830
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.437401533126831,
      "learning_rate": 0.00010895932767107038,
      "loss": 3.2113,
      "step": 165831
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.2524847984313965,
      "learning_rate": 0.0001089561737542309,
      "loss": 2.8776,
      "step": 165832
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7798163890838623,
      "learning_rate": 0.00010895301987291019,
      "loss": 2.8569,
      "step": 165833
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7671732902526855,
      "learning_rate": 0.0001089498660271087,
      "loss": 2.8137,
      "step": 165834
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6890995502471924,
      "learning_rate": 0.00010894671221682691,
      "loss": 2.7706,
      "step": 165835
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3825957775115967,
      "learning_rate": 0.00010894355844206567,
      "loss": 3.0024,
      "step": 165836
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.024532794952393,
      "learning_rate": 0.0001089404047028253,
      "loss": 2.8059,
      "step": 165837
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6352803707122803,
      "learning_rate": 0.00010893725099910664,
      "loss": 2.8854,
      "step": 165838
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4379007816314697,
      "learning_rate": 0.00010893409733091015,
      "loss": 3.0281,
      "step": 165839
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.4464564323425293,
      "learning_rate": 0.0001089309436982364,
      "loss": 2.8108,
      "step": 165840
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9278404712677,
      "learning_rate": 0.00010892779010108593,
      "loss": 2.7362,
      "step": 165841
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.330951690673828,
      "learning_rate": 0.00010892463653945945,
      "loss": 2.8148,
      "step": 165842
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6512227058410645,
      "learning_rate": 0.0001089214830133574,
      "loss": 2.9437,
      "step": 165843
    },
    {
      "epoch": 2.16,
      "grad_norm": 1.906661033630371,
      "learning_rate": 0.00010891832952278057,
      "loss": 2.9318,
      "step": 165844
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.5616984367370605,
      "learning_rate": 0.00010891517606772942,
      "loss": 2.6264,
      "step": 165845
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4682300090789795,
      "learning_rate": 0.00010891202264820451,
      "loss": 3.0705,
      "step": 165846
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3775699138641357,
      "learning_rate": 0.00010890886926420639,
      "loss": 2.9536,
      "step": 165847
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2489914894104004,
      "learning_rate": 0.00010890571591573585,
      "loss": 2.9968,
      "step": 165848
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.122044801712036,
      "learning_rate": 0.00010890256260279319,
      "loss": 3.0136,
      "step": 165849
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.0021932125091553,
      "learning_rate": 0.00010889940932537927,
      "loss": 2.8899,
      "step": 165850
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.799785614013672,
      "learning_rate": 0.00010889625608349457,
      "loss": 2.9241,
      "step": 165851
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.384047508239746,
      "learning_rate": 0.00010889310287713968,
      "loss": 2.9357,
      "step": 165852
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.461090564727783,
      "learning_rate": 0.00010888994970631503,
      "loss": 3.1299,
      "step": 165853
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.780306339263916,
      "learning_rate": 0.00010888679657102148,
      "loss": 2.6577,
      "step": 165854
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7503724098205566,
      "learning_rate": 0.00010888364347125936,
      "loss": 2.8849,
      "step": 165855
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6001622676849365,
      "learning_rate": 0.00010888049040702948,
      "loss": 2.8081,
      "step": 165856
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.3943629264831543,
      "learning_rate": 0.00010887733737833234,
      "loss": 2.9617,
      "step": 165857
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8921806812286377,
      "learning_rate": 0.00010887418438516848,
      "loss": 2.9829,
      "step": 165858
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.301626682281494,
      "learning_rate": 0.00010887103142753844,
      "loss": 3.0866,
      "step": 165859
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.200174331665039,
      "learning_rate": 0.00010886787850544297,
      "loss": 3.1049,
      "step": 165860
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.802877187728882,
      "learning_rate": 0.00010886472561888249,
      "loss": 2.8473,
      "step": 165861
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.633348226547241,
      "learning_rate": 0.00010886157276785775,
      "loss": 2.92,
      "step": 165862
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.1761116981506348,
      "learning_rate": 0.00010885841995236926,
      "loss": 2.7281,
      "step": 165863
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.280118942260742,
      "learning_rate": 0.00010885526717241759,
      "loss": 2.902,
      "step": 165864
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.446566343307495,
      "learning_rate": 0.00010885211442800324,
      "loss": 3.0006,
      "step": 165865
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3804287910461426,
      "learning_rate": 0.00010884896171912698,
      "loss": 2.9444,
      "step": 165866
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.191652297973633,
      "learning_rate": 0.00010884580904578922,
      "loss": 3.0137,
      "step": 165867
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6221108436584473,
      "learning_rate": 0.0001088426564079907,
      "loss": 2.9345,
      "step": 165868
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7401013374328613,
      "learning_rate": 0.00010883950380573199,
      "loss": 2.9451,
      "step": 165869
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.372556686401367,
      "learning_rate": 0.00010883635123901358,
      "loss": 3.1922,
      "step": 165870
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9073541164398193,
      "learning_rate": 0.00010883319870783604,
      "loss": 3.0318,
      "step": 165871
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.653627872467041,
      "learning_rate": 0.0001088300462122001,
      "loss": 2.7547,
      "step": 165872
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7150588035583496,
      "learning_rate": 0.00010882689375210613,
      "loss": 3.0676,
      "step": 165873
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.833575963973999,
      "learning_rate": 0.00010882374132755499,
      "loss": 2.8321,
      "step": 165874
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3495988845825195,
      "learning_rate": 0.00010882058893854713,
      "loss": 3.2777,
      "step": 165875
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4724080562591553,
      "learning_rate": 0.0001088174365850831,
      "loss": 3.1527,
      "step": 165876
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.224844455718994,
      "learning_rate": 0.00010881428426716343,
      "loss": 3.0061,
      "step": 165877
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7011096477508545,
      "learning_rate": 0.0001088111319847889,
      "loss": 3.0001,
      "step": 165878
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.19892692565918,
      "learning_rate": 0.00010880797973795987,
      "loss": 2.8302,
      "step": 165879
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.986937999725342,
      "learning_rate": 0.00010880482752667717,
      "loss": 2.8628,
      "step": 165880
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4908664226531982,
      "learning_rate": 0.00010880167535094114,
      "loss": 2.9437,
      "step": 165881
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.8954389095306396,
      "learning_rate": 0.0001087985232107527,
      "loss": 3.0124,
      "step": 165882
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3897488117218018,
      "learning_rate": 0.00010879537110611204,
      "loss": 2.9952,
      "step": 165883
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.743959665298462,
      "learning_rate": 0.00010879221903702,
      "loss": 2.8819,
      "step": 165884
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6458706855773926,
      "learning_rate": 0.00010878906700347703,
      "loss": 3.2542,
      "step": 165885
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3246147632598877,
      "learning_rate": 0.00010878591500548385,
      "loss": 3.177,
      "step": 165886
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.982572317123413,
      "learning_rate": 0.0001087827630430409,
      "loss": 2.8185,
      "step": 165887
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2068111896514893,
      "learning_rate": 0.00010877961111614901,
      "loss": 2.9954,
      "step": 165888
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.6433603763580322,
      "learning_rate": 0.00010877645922480845,
      "loss": 3.0498,
      "step": 165889
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.453768014907837,
      "learning_rate": 0.00010877330736902003,
      "loss": 3.0021,
      "step": 165890
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2428717613220215,
      "learning_rate": 0.00010877015554878416,
      "loss": 2.9374,
      "step": 165891
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8754639625549316,
      "learning_rate": 0.00010876700376410162,
      "loss": 3.1584,
      "step": 165892
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5828123092651367,
      "learning_rate": 0.00010876385201497279,
      "loss": 2.9496,
      "step": 165893
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8415677547454834,
      "learning_rate": 0.00010876070030139861,
      "loss": 2.759,
      "step": 165894
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.0998144149780273,
      "learning_rate": 0.0001087575486233792,
      "loss": 3.0086,
      "step": 165895
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2812328338623047,
      "learning_rate": 0.0001087543969809155,
      "loss": 3.0997,
      "step": 165896
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3132734298706055,
      "learning_rate": 0.00010875124537400782,
      "loss": 2.8748,
      "step": 165897
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.062302350997925,
      "learning_rate": 0.00010874809380265703,
      "loss": 3.0506,
      "step": 165898
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.0677926540374756,
      "learning_rate": 0.00010874494226686347,
      "loss": 2.7853,
      "step": 165899
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.745858907699585,
      "learning_rate": 0.00010874179076662806,
      "loss": 2.9161,
      "step": 165900
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3005542755126953,
      "learning_rate": 0.0001087386393019509,
      "loss": 3.3372,
      "step": 165901
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5013175010681152,
      "learning_rate": 0.000108735487872833,
      "loss": 2.7158,
      "step": 165902
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2294163703918457,
      "learning_rate": 0.00010873233647927466,
      "loss": 3.1999,
      "step": 165903
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.1977689266204834,
      "learning_rate": 0.00010872918512127667,
      "loss": 3.0159,
      "step": 165904
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.352996826171875,
      "learning_rate": 0.00010872603379883948,
      "loss": 2.943,
      "step": 165905
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4117326736450195,
      "learning_rate": 0.0001087228825119638,
      "loss": 2.913,
      "step": 165906
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.561614990234375,
      "learning_rate": 0.00010871973126065014,
      "loss": 2.8501,
      "step": 165907
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5840086936950684,
      "learning_rate": 0.0001087165800448991,
      "loss": 3.0858,
      "step": 165908
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.292766571044922,
      "learning_rate": 0.00010871342886471113,
      "loss": 2.9097,
      "step": 165909
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.420109510421753,
      "learning_rate": 0.0001087102777200871,
      "loss": 3.0803,
      "step": 165910
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.000173807144165,
      "learning_rate": 0.00010870712661102731,
      "loss": 2.8636,
      "step": 165911
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.061817169189453,
      "learning_rate": 0.00010870397553753258,
      "loss": 2.7757,
      "step": 165912
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.525817632675171,
      "learning_rate": 0.0001087008244996034,
      "loss": 2.8822,
      "step": 165913
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6406431198120117,
      "learning_rate": 0.00010869767349724023,
      "loss": 2.8763,
      "step": 165914
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.046834707260132,
      "learning_rate": 0.00010869452253044388,
      "loss": 3.026,
      "step": 165915
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.6667542457580566,
      "learning_rate": 0.00010869137159921485,
      "loss": 3.0841,
      "step": 165916
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.694735288619995,
      "learning_rate": 0.00010868822070355359,
      "loss": 3.0747,
      "step": 165917
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2977097034454346,
      "learning_rate": 0.0001086850698434609,
      "loss": 2.8827,
      "step": 165918
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.605260848999023,
      "learning_rate": 0.00010868191901893727,
      "loss": 2.8855,
      "step": 165919
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.597031593322754,
      "learning_rate": 0.00010867876822998316,
      "loss": 2.8956,
      "step": 165920
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.298262357711792,
      "learning_rate": 0.00010867561747659939,
      "loss": 2.9705,
      "step": 165921
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8123421669006348,
      "learning_rate": 0.00010867246675878648,
      "loss": 2.7117,
      "step": 165922
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.20713472366333,
      "learning_rate": 0.0001086693160765448,
      "loss": 2.9574,
      "step": 165923
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8000705242156982,
      "learning_rate": 0.00010866616542987525,
      "loss": 2.9609,
      "step": 165924
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8082520961761475,
      "learning_rate": 0.00010866301481877827,
      "loss": 2.9611,
      "step": 165925
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5849413871765137,
      "learning_rate": 0.00010865986424325433,
      "loss": 3.0388,
      "step": 165926
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.258429765701294,
      "learning_rate": 0.00010865671370330424,
      "loss": 2.9663,
      "step": 165927
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8373632431030273,
      "learning_rate": 0.00010865356319892839,
      "loss": 3.1309,
      "step": 165928
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.1390199661254883,
      "learning_rate": 0.00010865041273012753,
      "loss": 2.8918,
      "step": 165929
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.162935256958008,
      "learning_rate": 0.0001086472622969022,
      "loss": 3.2228,
      "step": 165930
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5587830543518066,
      "learning_rate": 0.00010864411189925294,
      "loss": 2.9728,
      "step": 165931
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.444287061691284,
      "learning_rate": 0.00010864096153718023,
      "loss": 2.7666,
      "step": 165932
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.1480042934417725,
      "learning_rate": 0.00010863781121068493,
      "loss": 3.0604,
      "step": 165933
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.571225881576538,
      "learning_rate": 0.00010863466091976732,
      "loss": 2.7477,
      "step": 165934
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.1480231285095215,
      "learning_rate": 0.00010863151066442826,
      "loss": 3.1074,
      "step": 165935
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.534853219985962,
      "learning_rate": 0.00010862836044466822,
      "loss": 3.0378,
      "step": 165936
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.0295422077178955,
      "learning_rate": 0.00010862521026048777,
      "loss": 2.9234,
      "step": 165937
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.450813055038452,
      "learning_rate": 0.0001086220601118874,
      "loss": 2.93,
      "step": 165938
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.234088897705078,
      "learning_rate": 0.00010861890999886794,
      "loss": 2.9109,
      "step": 165939
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.654834747314453,
      "learning_rate": 0.0001086157599214297,
      "loss": 2.8801,
      "step": 165940
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.660627603530884,
      "learning_rate": 0.0001086126098795735,
      "loss": 2.9348,
      "step": 165941
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.318783760070801,
      "learning_rate": 0.00010860945987329986,
      "loss": 2.8558,
      "step": 165942
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4938457012176514,
      "learning_rate": 0.0001086063099026093,
      "loss": 3.0695,
      "step": 165943
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.1664018630981445,
      "learning_rate": 0.00010860315996750236,
      "loss": 2.8187,
      "step": 165944
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3662779331207275,
      "learning_rate": 0.0001086000100679798,
      "loss": 2.7296,
      "step": 165945
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.516934394836426,
      "learning_rate": 0.00010859686020404202,
      "loss": 2.8765,
      "step": 165946
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.308290958404541,
      "learning_rate": 0.00010859371037568979,
      "loss": 3.0413,
      "step": 165947
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.36661696434021,
      "learning_rate": 0.00010859056058292351,
      "loss": 2.8672,
      "step": 165948
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.623370885848999,
      "learning_rate": 0.00010858741082574402,
      "loss": 2.6294,
      "step": 165949
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.449174165725708,
      "learning_rate": 0.00010858426110415158,
      "loss": 3.108,
      "step": 165950
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3725483417510986,
      "learning_rate": 0.00010858111141814704,
      "loss": 2.8227,
      "step": 165951
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.586106777191162,
      "learning_rate": 0.00010857796176773079,
      "loss": 3.0308,
      "step": 165952
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9409682750701904,
      "learning_rate": 0.00010857481215290358,
      "loss": 2.8683,
      "step": 165953
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.7466607093811035,
      "learning_rate": 0.00010857166257366583,
      "loss": 2.8504,
      "step": 165954
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.969413995742798,
      "learning_rate": 0.00010856851303001845,
      "loss": 2.7213,
      "step": 165955
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5785133838653564,
      "learning_rate": 0.00010856536352196158,
      "loss": 2.9017,
      "step": 165956
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.74519681930542,
      "learning_rate": 0.00010856221404949613,
      "loss": 2.757,
      "step": 165957
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.455024242401123,
      "learning_rate": 0.00010855906461262246,
      "loss": 2.996,
      "step": 165958
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.528320550918579,
      "learning_rate": 0.0001085559152113414,
      "loss": 3.0698,
      "step": 165959
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2962918281555176,
      "learning_rate": 0.0001085527658456533,
      "loss": 2.8058,
      "step": 165960
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.0952610969543457,
      "learning_rate": 0.00010854961651555903,
      "loss": 3.086,
      "step": 165961
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3942677974700928,
      "learning_rate": 0.00010854646722105884,
      "loss": 3.0812,
      "step": 165962
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.5219638347625732,
      "learning_rate": 0.00010854331796215355,
      "loss": 2.7137,
      "step": 165963
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.867091417312622,
      "learning_rate": 0.00010854016873884356,
      "loss": 2.9352,
      "step": 165964
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2140989303588867,
      "learning_rate": 0.00010853701955112968,
      "loss": 2.8414,
      "step": 165965
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.65352201461792,
      "learning_rate": 0.00010853387039901228,
      "loss": 2.9424,
      "step": 165966
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.777442216873169,
      "learning_rate": 0.00010853072128249226,
      "loss": 3.0174,
      "step": 165967
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.667577028274536,
      "learning_rate": 0.0001085275722015698,
      "loss": 3.0025,
      "step": 165968
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5446889400482178,
      "learning_rate": 0.00010852442315624577,
      "loss": 3.0435,
      "step": 165969
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.8560850620269775,
      "learning_rate": 0.00010852127414652055,
      "loss": 2.8435,
      "step": 165970
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.511975049972534,
      "learning_rate": 0.00010851812517239493,
      "loss": 2.9333,
      "step": 165971
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.6472816467285156,
      "learning_rate": 0.00010851497623386934,
      "loss": 3.2103,
      "step": 165972
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.1610703468322754,
      "learning_rate": 0.00010851182733094462,
      "loss": 3.183,
      "step": 165973
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.3427093029022217,
      "learning_rate": 0.00010850867846362096,
      "loss": 3.0406,
      "step": 165974
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.4243781566619873,
      "learning_rate": 0.00010850552963189923,
      "loss": 3.162,
      "step": 165975
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5790555477142334,
      "learning_rate": 0.00010850238083577986,
      "loss": 3.0384,
      "step": 165976
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6955437660217285,
      "learning_rate": 0.00010849923207526363,
      "loss": 2.9444,
      "step": 165977
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8394272327423096,
      "learning_rate": 0.0001084960833503509,
      "loss": 2.9873,
      "step": 165978
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2549633979797363,
      "learning_rate": 0.00010849293466104254,
      "loss": 2.9412,
      "step": 165979
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5338099002838135,
      "learning_rate": 0.00010848978600733878,
      "loss": 2.8408,
      "step": 165980
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.0830609798431396,
      "learning_rate": 0.00010848663738924046,
      "loss": 2.9449,
      "step": 165981
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3447296619415283,
      "learning_rate": 0.00010848348880674799,
      "loss": 3.2157,
      "step": 165982
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5556845664978027,
      "learning_rate": 0.00010848034025986219,
      "loss": 2.9545,
      "step": 165983
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.5392775535583496,
      "learning_rate": 0.00010847719174858336,
      "loss": 2.992,
      "step": 165984
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.512709140777588,
      "learning_rate": 0.00010847404327291248,
      "loss": 2.9696,
      "step": 165985
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.6561408042907715,
      "learning_rate": 0.00010847089483284965,
      "loss": 3.0605,
      "step": 165986
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4798622131347656,
      "learning_rate": 0.00010846774642839583,
      "loss": 3.0511,
      "step": 165987
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.692554235458374,
      "learning_rate": 0.00010846459805955138,
      "loss": 2.7472,
      "step": 165988
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8697237968444824,
      "learning_rate": 0.00010846144972631702,
      "loss": 3.1135,
      "step": 165989
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.22198224067688,
      "learning_rate": 0.00010845830142869326,
      "loss": 2.968,
      "step": 165990
    },
    {
      "epoch": 2.16,
      "grad_norm": 1.9596787691116333,
      "learning_rate": 0.00010845515316668077,
      "loss": 3.1898,
      "step": 165991
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4224765300750732,
      "learning_rate": 0.0001084520049402801,
      "loss": 2.9343,
      "step": 165992
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5735790729522705,
      "learning_rate": 0.0001084488567494918,
      "loss": 2.7739,
      "step": 165993
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.250115871429443,
      "learning_rate": 0.00010844570859431642,
      "loss": 2.9064,
      "step": 165994
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.4251315593719482,
      "learning_rate": 0.00010844256047475466,
      "loss": 3.1279,
      "step": 165995
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.746479034423828,
      "learning_rate": 0.00010843941239080696,
      "loss": 2.7571,
      "step": 165996
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.324521541595459,
      "learning_rate": 0.00010843626434247409,
      "loss": 3.1179,
      "step": 165997
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.1085965633392334,
      "learning_rate": 0.00010843311632975652,
      "loss": 2.9831,
      "step": 165998
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4510936737060547,
      "learning_rate": 0.00010842996835265476,
      "loss": 2.8165,
      "step": 165999
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.197784900665283,
      "learning_rate": 0.00010842682041116958,
      "loss": 2.6696,
      "step": 166000
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.580395460128784,
      "learning_rate": 0.00010842367250530148,
      "loss": 2.8916,
      "step": 166001
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.369166851043701,
      "learning_rate": 0.0001084205246350509,
      "loss": 2.9722,
      "step": 166002
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2747297286987305,
      "learning_rate": 0.00010841737680041868,
      "loss": 2.9644,
      "step": 166003
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8486599922180176,
      "learning_rate": 0.0001084142290014053,
      "loss": 2.9722,
      "step": 166004
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2720890045166016,
      "learning_rate": 0.00010841108123801123,
      "loss": 3.1818,
      "step": 166005
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.42754864692688,
      "learning_rate": 0.00010840793351023727,
      "loss": 3.1464,
      "step": 166006
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.243274211883545,
      "learning_rate": 0.00010840478581808388,
      "loss": 3.0169,
      "step": 166007
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5332279205322266,
      "learning_rate": 0.00010840163816155155,
      "loss": 3.0983,
      "step": 166008
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2081470489501953,
      "learning_rate": 0.00010839849054064107,
      "loss": 3.1736,
      "step": 166009
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.1755635738372803,
      "learning_rate": 0.00010839534295535293,
      "loss": 2.8239,
      "step": 166010
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.6938533782958984,
      "learning_rate": 0.00010839219540568759,
      "loss": 2.793,
      "step": 166011
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.330505609512329,
      "learning_rate": 0.00010838904789164589,
      "loss": 3.1089,
      "step": 166012
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.533712387084961,
      "learning_rate": 0.00010838590041322819,
      "loss": 2.9518,
      "step": 166013
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3015334606170654,
      "learning_rate": 0.00010838275297043522,
      "loss": 2.8482,
      "step": 166014
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.1731300354003906,
      "learning_rate": 0.00010837960556326757,
      "loss": 2.7721,
      "step": 166015
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2006208896636963,
      "learning_rate": 0.00010837645819172575,
      "loss": 2.8549,
      "step": 166016
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.412116527557373,
      "learning_rate": 0.00010837331085581026,
      "loss": 2.7903,
      "step": 166017
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5618443489074707,
      "learning_rate": 0.00010837016355552188,
      "loss": 3.0867,
      "step": 166018
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.106116771697998,
      "learning_rate": 0.00010836701629086098,
      "loss": 3.0191,
      "step": 166019
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.722674608230591,
      "learning_rate": 0.00010836386906182843,
      "loss": 2.9549,
      "step": 166020
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.159353733062744,
      "learning_rate": 0.00010836072186842461,
      "loss": 2.7681,
      "step": 166021
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.291217803955078,
      "learning_rate": 0.00010835757471065017,
      "loss": 3.0786,
      "step": 166022
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5754849910736084,
      "learning_rate": 0.00010835442758850558,
      "loss": 2.7453,
      "step": 166023
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.321399450302124,
      "learning_rate": 0.0001083512805019916,
      "loss": 3.0948,
      "step": 166024
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.407895088195801,
      "learning_rate": 0.00010834813345110863,
      "loss": 2.7607,
      "step": 166025
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.953932762145996,
      "learning_rate": 0.00010834498643585747,
      "loss": 2.7587,
      "step": 166026
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.345554828643799,
      "learning_rate": 0.00010834183945623847,
      "loss": 3.1796,
      "step": 166027
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.43684983253479,
      "learning_rate": 0.00010833869251225257,
      "loss": 2.8217,
      "step": 166028
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.5708608627319336,
      "learning_rate": 0.00010833554560389995,
      "loss": 3.0323,
      "step": 166029
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.882632255554199,
      "learning_rate": 0.00010833239873118143,
      "loss": 2.9421,
      "step": 166030
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.539501190185547,
      "learning_rate": 0.00010832925189409746,
      "loss": 2.8097,
      "step": 166031
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.029738426208496,
      "learning_rate": 0.00010832610509264877,
      "loss": 2.849,
      "step": 166032
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7617664337158203,
      "learning_rate": 0.00010832295832683579,
      "loss": 2.7603,
      "step": 166033
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5580813884735107,
      "learning_rate": 0.00010831981159665939,
      "loss": 2.9554,
      "step": 166034
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7554521560668945,
      "learning_rate": 0.00010831666490211976,
      "loss": 2.9569,
      "step": 166035
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.7069497108459473,
      "learning_rate": 0.00010831351824321778,
      "loss": 2.8822,
      "step": 166036
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.723362684249878,
      "learning_rate": 0.00010831037161995383,
      "loss": 2.9108,
      "step": 166037
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2595794200897217,
      "learning_rate": 0.0001083072250323287,
      "loss": 3.0851,
      "step": 166038
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.4287850856781006,
      "learning_rate": 0.00010830407848034277,
      "loss": 3.0345,
      "step": 166039
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.889962911605835,
      "learning_rate": 0.00010830093196399693,
      "loss": 3.1256,
      "step": 166040
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.36043643951416,
      "learning_rate": 0.00010829778548329138,
      "loss": 3.0635,
      "step": 166041
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.9388511180877686,
      "learning_rate": 0.00010829463903822699,
      "loss": 2.9879,
      "step": 166042
    },
    {
      "epoch": 2.16,
      "grad_norm": 6.419075965881348,
      "learning_rate": 0.00010829149262880415,
      "loss": 3.0405,
      "step": 166043
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.248737812042236,
      "learning_rate": 0.00010828834625502363,
      "loss": 3.1037,
      "step": 166044
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.6052210330963135,
      "learning_rate": 0.0001082851999168858,
      "loss": 2.9084,
      "step": 166045
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6676859855651855,
      "learning_rate": 0.00010828205361439159,
      "loss": 3.053,
      "step": 166046
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8664164543151855,
      "learning_rate": 0.00010827890734754117,
      "loss": 2.7692,
      "step": 166047
    },
    {
      "epoch": 2.16,
      "grad_norm": 5.743492603302002,
      "learning_rate": 0.00010827576111633545,
      "loss": 2.7655,
      "step": 166048
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.245249032974243,
      "learning_rate": 0.00010827261492077474,
      "loss": 2.8113,
      "step": 166049
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.023979902267456,
      "learning_rate": 0.00010826946876085988,
      "loss": 2.8727,
      "step": 166050
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9718775749206543,
      "learning_rate": 0.00010826632263659124,
      "loss": 3.2207,
      "step": 166051
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6503610610961914,
      "learning_rate": 0.00010826317654796971,
      "loss": 2.8613,
      "step": 166052
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.6736209392547607,
      "learning_rate": 0.00010826003049499552,
      "loss": 3.0136,
      "step": 166053
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.47797155380249,
      "learning_rate": 0.00010825688447766946,
      "loss": 2.6118,
      "step": 166054
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.346071243286133,
      "learning_rate": 0.00010825373849599197,
      "loss": 3.1791,
      "step": 166055
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.515019416809082,
      "learning_rate": 0.00010825059254996385,
      "loss": 2.9595,
      "step": 166056
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.602102041244507,
      "learning_rate": 0.00010824744663958546,
      "loss": 2.6911,
      "step": 166057
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.1092190742492676,
      "learning_rate": 0.00010824430076485767,
      "loss": 2.8959,
      "step": 166058
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.072908878326416,
      "learning_rate": 0.00010824115492578071,
      "loss": 3.1127,
      "step": 166059
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.792742967605591,
      "learning_rate": 0.00010823800912235542,
      "loss": 3.0334,
      "step": 166060
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.45438289642334,
      "learning_rate": 0.00010823486335458221,
      "loss": 3.1459,
      "step": 166061
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.838752508163452,
      "learning_rate": 0.00010823171762246185,
      "loss": 3.1062,
      "step": 166062
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.27108097076416,
      "learning_rate": 0.00010822857192599474,
      "loss": 2.9026,
      "step": 166063
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.389275312423706,
      "learning_rate": 0.00010822542626518175,
      "loss": 2.8492,
      "step": 166064
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.712125539779663,
      "learning_rate": 0.0001082222806400231,
      "loss": 2.9799,
      "step": 166065
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.519557237625122,
      "learning_rate": 0.00010821913505051962,
      "loss": 2.781,
      "step": 166066
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.9369492530822754,
      "learning_rate": 0.00010821598949667173,
      "loss": 2.9683,
      "step": 166067
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7735226154327393,
      "learning_rate": 0.00010821284397848022,
      "loss": 2.8113,
      "step": 166068
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.0156025886535645,
      "learning_rate": 0.00010820969849594544,
      "loss": 3.053,
      "step": 166069
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3916852474212646,
      "learning_rate": 0.0001082065530490683,
      "loss": 2.9663,
      "step": 166070
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.2884278297424316,
      "learning_rate": 0.00010820340763784902,
      "loss": 2.8885,
      "step": 166071
    },
    {
      "epoch": 2.16,
      "grad_norm": 5.779334545135498,
      "learning_rate": 0.00010820026226228841,
      "loss": 2.8019,
      "step": 166072
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.320024251937866,
      "learning_rate": 0.00010819711692238691,
      "loss": 3.1343,
      "step": 166073
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.359459400177002,
      "learning_rate": 0.00010819397161814529,
      "loss": 3.0034,
      "step": 166074
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9488909244537354,
      "learning_rate": 0.00010819082634956391,
      "loss": 2.8483,
      "step": 166075
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2643446922302246,
      "learning_rate": 0.0001081876811166436,
      "loss": 2.9528,
      "step": 166076
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3386778831481934,
      "learning_rate": 0.00010818453591938479,
      "loss": 3.0478,
      "step": 166077
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.733030319213867,
      "learning_rate": 0.00010818139075778813,
      "loss": 3.1137,
      "step": 166078
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.4901235103607178,
      "learning_rate": 0.00010817824563185404,
      "loss": 3.018,
      "step": 166079
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2413418292999268,
      "learning_rate": 0.00010817510054158336,
      "loss": 3.1342,
      "step": 166080
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6805078983306885,
      "learning_rate": 0.00010817195548697641,
      "loss": 2.8424,
      "step": 166081
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.4677517414093018,
      "learning_rate": 0.00010816881046803407,
      "loss": 2.806,
      "step": 166082
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2856898307800293,
      "learning_rate": 0.00010816566548475673,
      "loss": 2.928,
      "step": 166083
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3948490619659424,
      "learning_rate": 0.00010816252053714503,
      "loss": 2.9994,
      "step": 166084
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.5107648372650146,
      "learning_rate": 0.00010815937562519943,
      "loss": 2.9051,
      "step": 166085
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.579329490661621,
      "learning_rate": 0.00010815623074892074,
      "loss": 2.8281,
      "step": 166086
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.804116725921631,
      "learning_rate": 0.0001081530859083093,
      "loss": 2.9536,
      "step": 166087
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7671172618865967,
      "learning_rate": 0.00010814994110336593,
      "loss": 2.9897,
      "step": 166088
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5965588092803955,
      "learning_rate": 0.00010814679633409115,
      "loss": 2.9903,
      "step": 166089
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6295244693756104,
      "learning_rate": 0.00010814365160048536,
      "loss": 2.8678,
      "step": 166090
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.809603691101074,
      "learning_rate": 0.00010814050690254937,
      "loss": 2.9236,
      "step": 166091
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.786508798599243,
      "learning_rate": 0.00010813736224028372,
      "loss": 3.1524,
      "step": 166092
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.348020315170288,
      "learning_rate": 0.00010813421761368883,
      "loss": 2.896,
      "step": 166093
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7486767768859863,
      "learning_rate": 0.00010813107302276554,
      "loss": 2.9231,
      "step": 166094
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4262335300445557,
      "learning_rate": 0.00010812792846751429,
      "loss": 2.7391,
      "step": 166095
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9033260345458984,
      "learning_rate": 0.00010812478394793556,
      "loss": 2.8788,
      "step": 166096
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.2472543716430664,
      "learning_rate": 0.00010812163946403018,
      "loss": 2.7749,
      "step": 166097
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.0334606170654297,
      "learning_rate": 0.00010811849501579852,
      "loss": 3.3559,
      "step": 166098
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.896273374557495,
      "learning_rate": 0.00010811535060324131,
      "loss": 2.9494,
      "step": 166099
    },
    {
      "epoch": 2.16,
      "grad_norm": 6.8354291915893555,
      "learning_rate": 0.00010811220622635911,
      "loss": 2.8809,
      "step": 166100
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7233822345733643,
      "learning_rate": 0.00010810906188515247,
      "loss": 2.97,
      "step": 166101
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3463244438171387,
      "learning_rate": 0.0001081059175796219,
      "loss": 3.1894,
      "step": 166102
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3117482662200928,
      "learning_rate": 0.00010810277330976811,
      "loss": 2.8945,
      "step": 166103
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6630921363830566,
      "learning_rate": 0.00010809962907559155,
      "loss": 2.8207,
      "step": 166104
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.742673873901367,
      "learning_rate": 0.00010809648487709303,
      "loss": 2.9758,
      "step": 166105
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.9867990016937256,
      "learning_rate": 0.00010809334071427295,
      "loss": 2.9555,
      "step": 166106
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.0022196769714355,
      "learning_rate": 0.00010809019658713198,
      "loss": 2.8778,
      "step": 166107
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8072495460510254,
      "learning_rate": 0.00010808705249567054,
      "loss": 2.8779,
      "step": 166108
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4010791778564453,
      "learning_rate": 0.00010808390843988945,
      "loss": 2.9762,
      "step": 166109
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.895353317260742,
      "learning_rate": 0.00010808076441978907,
      "loss": 2.8068,
      "step": 166110
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5267395973205566,
      "learning_rate": 0.00010807762043537021,
      "loss": 3.0548,
      "step": 166111
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.1590921878814697,
      "learning_rate": 0.00010807447648663325,
      "loss": 3.02,
      "step": 166112
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.0621418952941895,
      "learning_rate": 0.00010807133257357906,
      "loss": 3.0562,
      "step": 166113
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8107898235321045,
      "learning_rate": 0.00010806818869620782,
      "loss": 2.7554,
      "step": 166114
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4096994400024414,
      "learning_rate": 0.00010806504485452044,
      "loss": 2.9571,
      "step": 166115
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.016726016998291,
      "learning_rate": 0.00010806190104851729,
      "loss": 2.7586,
      "step": 166116
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.630821943283081,
      "learning_rate": 0.00010805875727819919,
      "loss": 2.878,
      "step": 166117
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.1318957805633545,
      "learning_rate": 0.00010805561354356644,
      "loss": 2.9928,
      "step": 166118
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4378268718719482,
      "learning_rate": 0.00010805246984461997,
      "loss": 2.9448,
      "step": 166119
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2136895656585693,
      "learning_rate": 0.00010804932618135999,
      "loss": 2.9112,
      "step": 166120
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.1025540828704834,
      "learning_rate": 0.00010804618255378737,
      "loss": 2.9816,
      "step": 166121
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.288321018218994,
      "learning_rate": 0.00010804303896190248,
      "loss": 3.1027,
      "step": 166122
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.482231855392456,
      "learning_rate": 0.00010803989540570612,
      "loss": 3.1357,
      "step": 166123
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5700440406799316,
      "learning_rate": 0.00010803675188519865,
      "loss": 2.9587,
      "step": 166124
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5534567832946777,
      "learning_rate": 0.000108033608400381,
      "loss": 3.0324,
      "step": 166125
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.323957681655884,
      "learning_rate": 0.00010803046495125331,
      "loss": 3.2048,
      "step": 166126
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8014445304870605,
      "learning_rate": 0.00010802732153781647,
      "loss": 2.7341,
      "step": 166127
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.662407159805298,
      "learning_rate": 0.00010802417816007088,
      "loss": 2.7105,
      "step": 166128
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2511026859283447,
      "learning_rate": 0.00010802103481801733,
      "loss": 3.0931,
      "step": 166129
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3228700160980225,
      "learning_rate": 0.0001080178915116562,
      "loss": 2.8079,
      "step": 166130
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7054696083068848,
      "learning_rate": 0.00010801474824098835,
      "loss": 2.8433,
      "step": 166131
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.716132879257202,
      "learning_rate": 0.00010801160500601398,
      "loss": 2.9335,
      "step": 166132
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.436056613922119,
      "learning_rate": 0.00010800846180673395,
      "loss": 3.1082,
      "step": 166133
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9660003185272217,
      "learning_rate": 0.00010800531864314873,
      "loss": 3.0493,
      "step": 166134
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.0688509941101074,
      "learning_rate": 0.00010800217551525902,
      "loss": 2.8686,
      "step": 166135
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8041226863861084,
      "learning_rate": 0.00010799903242306522,
      "loss": 2.8127,
      "step": 166136
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6939289569854736,
      "learning_rate": 0.00010799588936656825,
      "loss": 3.0723,
      "step": 166137
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9737672805786133,
      "learning_rate": 0.00010799274634576826,
      "loss": 3.2147,
      "step": 166138
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4535136222839355,
      "learning_rate": 0.00010798960336066612,
      "loss": 2.9391,
      "step": 166139
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.372945785522461,
      "learning_rate": 0.00010798646041126225,
      "loss": 3.15,
      "step": 166140
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4166386127471924,
      "learning_rate": 0.00010798331749755742,
      "loss": 2.7999,
      "step": 166141
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.6677253246307373,
      "learning_rate": 0.000107980174619552,
      "loss": 2.9026,
      "step": 166142
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.9930572509765625,
      "learning_rate": 0.00010797703177724693,
      "loss": 3.004,
      "step": 166143
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7659385204315186,
      "learning_rate": 0.00010797388897064234,
      "loss": 2.8458,
      "step": 166144
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.8339927196502686,
      "learning_rate": 0.00010797074619973912,
      "loss": 2.9229,
      "step": 166145
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.134053945541382,
      "learning_rate": 0.00010796760346453765,
      "loss": 2.7949,
      "step": 166146
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4071059226989746,
      "learning_rate": 0.00010796446076503874,
      "loss": 3.0489,
      "step": 166147
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.279088020324707,
      "learning_rate": 0.00010796131810124278,
      "loss": 3.2377,
      "step": 166148
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.793241500854492,
      "learning_rate": 0.00010795817547315052,
      "loss": 3.2316,
      "step": 166149
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.036320209503174,
      "learning_rate": 0.00010795503288076248,
      "loss": 2.8053,
      "step": 166150
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4874162673950195,
      "learning_rate": 0.00010795189032407919,
      "loss": 2.9083,
      "step": 166151
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.760035991668701,
      "learning_rate": 0.00010794874780310118,
      "loss": 2.9034,
      "step": 166152
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3604061603546143,
      "learning_rate": 0.00010794560531782922,
      "loss": 3.0245,
      "step": 166153
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.356783866882324,
      "learning_rate": 0.00010794246286826373,
      "loss": 3.0007,
      "step": 166154
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.106595516204834,
      "learning_rate": 0.00010793932045440543,
      "loss": 2.8972,
      "step": 166155
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4777474403381348,
      "learning_rate": 0.00010793617807625484,
      "loss": 2.7244,
      "step": 166156
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.562392234802246,
      "learning_rate": 0.00010793303573381253,
      "loss": 3.223,
      "step": 166157
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.450599431991577,
      "learning_rate": 0.00010792989342707901,
      "loss": 2.9945,
      "step": 166158
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9440460205078125,
      "learning_rate": 0.00010792675115605506,
      "loss": 3.02,
      "step": 166159
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.656486749649048,
      "learning_rate": 0.00010792360892074101,
      "loss": 2.9215,
      "step": 166160
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.320530891418457,
      "learning_rate": 0.00010792046672113773,
      "loss": 2.8282,
      "step": 166161
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.4190778732299805,
      "learning_rate": 0.00010791732455724565,
      "loss": 2.9953,
      "step": 166162
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6646480560302734,
      "learning_rate": 0.00010791418242906534,
      "loss": 3.0609,
      "step": 166163
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5721888542175293,
      "learning_rate": 0.00010791104033659733,
      "loss": 3.063,
      "step": 166164
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.542579174041748,
      "learning_rate": 0.00010790789827984239,
      "loss": 2.8656,
      "step": 166165
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.543581008911133,
      "learning_rate": 0.0001079047562588009,
      "loss": 2.9033,
      "step": 166166
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.5228168964385986,
      "learning_rate": 0.00010790161427347362,
      "loss": 2.8956,
      "step": 166167
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4630322456359863,
      "learning_rate": 0.00010789847232386107,
      "loss": 2.8464,
      "step": 166168
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.45094895362854,
      "learning_rate": 0.00010789533040996382,
      "loss": 2.7747,
      "step": 166169
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.112736701965332,
      "learning_rate": 0.00010789218853178237,
      "loss": 2.8922,
      "step": 166170
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.572894334793091,
      "learning_rate": 0.00010788904668931747,
      "loss": 3.2383,
      "step": 166171
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2973127365112305,
      "learning_rate": 0.00010788590488256951,
      "loss": 2.9701,
      "step": 166172
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3372955322265625,
      "learning_rate": 0.0001078827631115393,
      "loss": 2.9801,
      "step": 166173
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9832637310028076,
      "learning_rate": 0.00010787962137622733,
      "loss": 2.875,
      "step": 166174
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.691009998321533,
      "learning_rate": 0.00010787647967663402,
      "loss": 2.9496,
      "step": 166175
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.5239741802215576,
      "learning_rate": 0.00010787333801276024,
      "loss": 3.1814,
      "step": 166176
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.3392865657806396,
      "learning_rate": 0.00010787019638460641,
      "loss": 2.8582,
      "step": 166177
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.41176700592041,
      "learning_rate": 0.00010786705479217305,
      "loss": 3.1479,
      "step": 166178
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.220271110534668,
      "learning_rate": 0.00010786391323546095,
      "loss": 2.9998,
      "step": 166179
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.0716552734375,
      "learning_rate": 0.00010786077171447053,
      "loss": 2.8245,
      "step": 166180
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4377894401550293,
      "learning_rate": 0.00010785763022920234,
      "loss": 2.9762,
      "step": 166181
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.266132116317749,
      "learning_rate": 0.00010785448877965714,
      "loss": 3.013,
      "step": 166182
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.916703224182129,
      "learning_rate": 0.00010785134736583544,
      "loss": 2.9835,
      "step": 166183
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7740323543548584,
      "learning_rate": 0.00010784820598773769,
      "loss": 2.9856,
      "step": 166184
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3343234062194824,
      "learning_rate": 0.00010784506464536466,
      "loss": 2.9164,
      "step": 166185
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7184667587280273,
      "learning_rate": 0.0001078419233387169,
      "loss": 2.9825,
      "step": 166186
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6584298610687256,
      "learning_rate": 0.00010783878206779482,
      "loss": 3.244,
      "step": 166187
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4281539916992188,
      "learning_rate": 0.00010783564083259925,
      "loss": 3.1049,
      "step": 166188
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5156939029693604,
      "learning_rate": 0.00010783249963313058,
      "loss": 2.9359,
      "step": 166189
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.289419651031494,
      "learning_rate": 0.00010782935846938953,
      "loss": 2.9156,
      "step": 166190
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4329824447631836,
      "learning_rate": 0.00010782621734137667,
      "loss": 3.2634,
      "step": 166191
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4686031341552734,
      "learning_rate": 0.00010782307624909253,
      "loss": 3.1012,
      "step": 166192
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5242557525634766,
      "learning_rate": 0.0001078199351925376,
      "loss": 3.2162,
      "step": 166193
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.0762007236480713,
      "learning_rate": 0.00010781679417171271,
      "loss": 2.9398,
      "step": 166194
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.891031503677368,
      "learning_rate": 0.00010781365318661819,
      "loss": 3.0543,
      "step": 166195
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4589486122131348,
      "learning_rate": 0.00010781051223725482,
      "loss": 2.9725,
      "step": 166196
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.395505905151367,
      "learning_rate": 0.000107807371323623,
      "loss": 3.0095,
      "step": 166197
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9017112255096436,
      "learning_rate": 0.00010780423044572363,
      "loss": 2.9767,
      "step": 166198
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6541688442230225,
      "learning_rate": 0.0001078010896035569,
      "loss": 2.9718,
      "step": 166199
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5182666778564453,
      "learning_rate": 0.00010779794879712363,
      "loss": 2.6419,
      "step": 166200
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.568187952041626,
      "learning_rate": 0.00010779480802642427,
      "loss": 3.057,
      "step": 166201
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4851696491241455,
      "learning_rate": 0.00010779166729145961,
      "loss": 2.7492,
      "step": 166202
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4351916313171387,
      "learning_rate": 0.00010778852659222999,
      "loss": 2.8747,
      "step": 166203
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.697282314300537,
      "learning_rate": 0.00010778538592873631,
      "loss": 3.0498,
      "step": 166204
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5457541942596436,
      "learning_rate": 0.00010778224530097875,
      "loss": 2.942,
      "step": 166205
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.817683696746826,
      "learning_rate": 0.00010777910470895818,
      "loss": 3.1308,
      "step": 166206
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.296919345855713,
      "learning_rate": 0.00010777596415267505,
      "loss": 2.9562,
      "step": 166207
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5834858417510986,
      "learning_rate": 0.00010777282363213005,
      "loss": 2.7694,
      "step": 166208
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.649444580078125,
      "learning_rate": 0.00010776968314732365,
      "loss": 2.9804,
      "step": 166209
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6731739044189453,
      "learning_rate": 0.00010776654269825665,
      "loss": 2.9798,
      "step": 166210
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9556643962860107,
      "learning_rate": 0.0001077634022849293,
      "loss": 2.888,
      "step": 166211
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7026302814483643,
      "learning_rate": 0.00010776026190734249,
      "loss": 2.9171,
      "step": 166212
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.737560987472534,
      "learning_rate": 0.00010775712156549653,
      "loss": 2.8675,
      "step": 166213
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.0792582035064697,
      "learning_rate": 0.00010775398125939227,
      "loss": 3.0463,
      "step": 166214
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.6288652420043945,
      "learning_rate": 0.0001077508409890301,
      "loss": 2.8182,
      "step": 166215
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8972887992858887,
      "learning_rate": 0.00010774770075441075,
      "loss": 2.8821,
      "step": 166216
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3925843238830566,
      "learning_rate": 0.00010774456055553473,
      "loss": 2.9578,
      "step": 166217
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8515987396240234,
      "learning_rate": 0.00010774142039240265,
      "loss": 3.065,
      "step": 166218
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.811513662338257,
      "learning_rate": 0.00010773828026501494,
      "loss": 2.887,
      "step": 166219
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.1351637840270996,
      "learning_rate": 0.00010773514017337242,
      "loss": 3.0149,
      "step": 166220
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.480698823928833,
      "learning_rate": 0.00010773200011747546,
      "loss": 2.8586,
      "step": 166221
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9702272415161133,
      "learning_rate": 0.00010772886009732486,
      "loss": 2.9106,
      "step": 166222
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.3287713527679443,
      "learning_rate": 0.0001077257201129211,
      "loss": 2.9052,
      "step": 166223
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6825244426727295,
      "learning_rate": 0.00010772258016426476,
      "loss": 2.9571,
      "step": 166224
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.889526844024658,
      "learning_rate": 0.00010771944025135632,
      "loss": 3.0137,
      "step": 166225
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3671586513519287,
      "learning_rate": 0.00010771630037419654,
      "loss": 3.0044,
      "step": 166226
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.374783754348755,
      "learning_rate": 0.00010771316053278587,
      "loss": 2.9785,
      "step": 166227
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5164403915405273,
      "learning_rate": 0.00010771002072712504,
      "loss": 2.9102,
      "step": 166228
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7144241333007812,
      "learning_rate": 0.00010770688095721454,
      "loss": 3.1634,
      "step": 166229
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.439480781555176,
      "learning_rate": 0.00010770374122305493,
      "loss": 2.9211,
      "step": 166230
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.900872230529785,
      "learning_rate": 0.00010770060152464677,
      "loss": 2.8927,
      "step": 166231
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5799872875213623,
      "learning_rate": 0.00010769746186199079,
      "loss": 3.0432,
      "step": 166232
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.745692253112793,
      "learning_rate": 0.00010769432223508736,
      "loss": 2.8664,
      "step": 166233
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3371129035949707,
      "learning_rate": 0.0001076911826439373,
      "loss": 2.6298,
      "step": 166234
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.175725221633911,
      "learning_rate": 0.00010768804308854108,
      "loss": 3.1169,
      "step": 166235
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4488911628723145,
      "learning_rate": 0.00010768490356889929,
      "loss": 2.6347,
      "step": 166236
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.291788339614868,
      "learning_rate": 0.00010768176408501238,
      "loss": 2.9961,
      "step": 166237
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.141606330871582,
      "learning_rate": 0.0001076786246368812,
      "loss": 2.9244,
      "step": 166238
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6812071800231934,
      "learning_rate": 0.00010767548522450606,
      "loss": 2.939,
      "step": 166239
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3860559463500977,
      "learning_rate": 0.0001076723458478878,
      "loss": 2.9295,
      "step": 166240
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.496981620788574,
      "learning_rate": 0.00010766920650702687,
      "loss": 2.9363,
      "step": 166241
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.942298412322998,
      "learning_rate": 0.00010766606720192388,
      "loss": 2.9901,
      "step": 166242
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5189924240112305,
      "learning_rate": 0.00010766292793257927,
      "loss": 2.9802,
      "step": 166243
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.2286148071289062,
      "learning_rate": 0.00010765978869899387,
      "loss": 3.219,
      "step": 166244
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6491799354553223,
      "learning_rate": 0.00010765664950116805,
      "loss": 2.9272,
      "step": 166245
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.0930798053741455,
      "learning_rate": 0.00010765351033910258,
      "loss": 2.7137,
      "step": 166246
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.201643705368042,
      "learning_rate": 0.00010765037121279796,
      "loss": 2.9112,
      "step": 166247
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4248130321502686,
      "learning_rate": 0.00010764723212225476,
      "loss": 2.9077,
      "step": 166248
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.560236930847168,
      "learning_rate": 0.00010764409306747347,
      "loss": 2.7215,
      "step": 166249
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.005192756652832,
      "learning_rate": 0.0001076409540484549,
      "loss": 2.7797,
      "step": 166250
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4718525409698486,
      "learning_rate": 0.00010763781506519937,
      "loss": 2.8545,
      "step": 166251
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.535862445831299,
      "learning_rate": 0.00010763467611770774,
      "loss": 2.7978,
      "step": 166252
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.5051770210266113,
      "learning_rate": 0.00010763153720598044,
      "loss": 2.7832,
      "step": 166253
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.644629955291748,
      "learning_rate": 0.00010762839833001805,
      "loss": 2.791,
      "step": 166254
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.482675552368164,
      "learning_rate": 0.00010762525948982106,
      "loss": 2.8727,
      "step": 166255
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.762718915939331,
      "learning_rate": 0.00010762212068539029,
      "loss": 2.8586,
      "step": 166256
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2131636142730713,
      "learning_rate": 0.00010761898191672611,
      "loss": 3.1108,
      "step": 166257
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.2563834190368652,
      "learning_rate": 0.00010761584318382928,
      "loss": 3.1259,
      "step": 166258
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.4375851154327393,
      "learning_rate": 0.00010761270448670028,
      "loss": 3.2073,
      "step": 166259
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.3201069831848145,
      "learning_rate": 0.00010760956582533963,
      "loss": 2.8745,
      "step": 166260
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.633693218231201,
      "learning_rate": 0.00010760642719974811,
      "loss": 2.7577,
      "step": 166261
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.681302070617676,
      "learning_rate": 0.00010760328860992618,
      "loss": 3.1664,
      "step": 166262
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.649488687515259,
      "learning_rate": 0.00010760015005587429,
      "loss": 3.1205,
      "step": 166263
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.897714138031006,
      "learning_rate": 0.00010759701153759329,
      "loss": 2.8349,
      "step": 166264
    },
    {
      "epoch": 2.16,
      "grad_norm": 3.5631661415100098,
      "learning_rate": 0.00010759387305508364,
      "loss": 2.7653,
      "step": 166265
    },
    {
      "epoch": 2.16,
      "grad_norm": 4.13369083404541,
      "learning_rate": 0.00010759073460834582,
      "loss": 2.933,
      "step": 166266
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.703423023223877,
      "learning_rate": 0.00010758759619738061,
      "loss": 3.1257,
      "step": 166267
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.188169002532959,
      "learning_rate": 0.00010758445782218852,
      "loss": 2.9893,
      "step": 166268
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.8401098251342773,
      "learning_rate": 0.00010758131948276998,
      "loss": 2.922,
      "step": 166269
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7531111240386963,
      "learning_rate": 0.00010757818117912583,
      "loss": 2.757,
      "step": 166270
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.9824326038360596,
      "learning_rate": 0.00010757504291125651,
      "loss": 2.9064,
      "step": 166271
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.6063156127929688,
      "learning_rate": 0.00010757190467916252,
      "loss": 2.7236,
      "step": 166272
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.7850584983825684,
      "learning_rate": 0.00010756876648284468,
      "loss": 2.9147,
      "step": 166273
    },
    {
      "epoch": 2.16,
      "grad_norm": 2.800959825515747,
      "learning_rate": 0.00010756562832230329,
      "loss": 2.7016,
      "step": 166274
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0226337909698486,
      "learning_rate": 0.00010756249019753921,
      "loss": 2.9313,
      "step": 166275
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4505813121795654,
      "learning_rate": 0.0001075593521085529,
      "loss": 2.9112,
      "step": 166276
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.1691994667053223,
      "learning_rate": 0.00010755621405534493,
      "loss": 2.9607,
      "step": 166277
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.056241989135742,
      "learning_rate": 0.00010755307603791578,
      "loss": 2.8664,
      "step": 166278
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0334267616271973,
      "learning_rate": 0.00010754993805626626,
      "loss": 3.0263,
      "step": 166279
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.0232298374176025,
      "learning_rate": 0.00010754680011039675,
      "loss": 2.845,
      "step": 166280
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5873160362243652,
      "learning_rate": 0.00010754366220030801,
      "loss": 2.7229,
      "step": 166281
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.388352632522583,
      "learning_rate": 0.00010754052432600043,
      "loss": 2.9273,
      "step": 166282
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4598209857940674,
      "learning_rate": 0.0001075373864874748,
      "loss": 3.1109,
      "step": 166283
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7727534770965576,
      "learning_rate": 0.00010753424868473161,
      "loss": 2.9572,
      "step": 166284
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.8356690406799316,
      "learning_rate": 0.00010753111091777145,
      "loss": 2.6046,
      "step": 166285
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.733320951461792,
      "learning_rate": 0.00010752797318659476,
      "loss": 2.9946,
      "step": 166286
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.25008225440979,
      "learning_rate": 0.00010752483549120235,
      "loss": 3.0583,
      "step": 166287
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.314209222793579,
      "learning_rate": 0.00010752169783159465,
      "loss": 2.7618,
      "step": 166288
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.630247116088867,
      "learning_rate": 0.00010751856020777237,
      "loss": 3.2909,
      "step": 166289
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.164555788040161,
      "learning_rate": 0.00010751542261973602,
      "loss": 3.0173,
      "step": 166290
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7689712047576904,
      "learning_rate": 0.0001075122850674862,
      "loss": 3.0692,
      "step": 166291
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8396551609039307,
      "learning_rate": 0.00010750914755102336,
      "loss": 3.0854,
      "step": 166292
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6828253269195557,
      "learning_rate": 0.00010750601007034833,
      "loss": 2.6595,
      "step": 166293
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.09143328666687,
      "learning_rate": 0.00010750287262546145,
      "loss": 2.9299,
      "step": 166294
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6025335788726807,
      "learning_rate": 0.00010749973521636352,
      "loss": 3.0117,
      "step": 166295
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6310195922851562,
      "learning_rate": 0.00010749659784305501,
      "loss": 2.8096,
      "step": 166296
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.232827663421631,
      "learning_rate": 0.00010749346050553653,
      "loss": 2.9356,
      "step": 166297
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7093474864959717,
      "learning_rate": 0.00010749032320380855,
      "loss": 3.0373,
      "step": 166298
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.162777900695801,
      "learning_rate": 0.00010748718593787185,
      "loss": 3.0039,
      "step": 166299
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.586740732192993,
      "learning_rate": 0.00010748404870772682,
      "loss": 2.9273,
      "step": 166300
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.458904266357422,
      "learning_rate": 0.00010748091151337425,
      "loss": 2.7121,
      "step": 166301
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.388230562210083,
      "learning_rate": 0.00010747777435481459,
      "loss": 2.986,
      "step": 166302
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0147831439971924,
      "learning_rate": 0.00010747463723204847,
      "loss": 2.8134,
      "step": 166303
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6818654537200928,
      "learning_rate": 0.00010747150014507634,
      "loss": 2.8042,
      "step": 166304
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.604170560836792,
      "learning_rate": 0.00010746836309389898,
      "loss": 3.0539,
      "step": 166305
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.590890407562256,
      "learning_rate": 0.00010746522607851679,
      "loss": 3.3876,
      "step": 166306
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.412876844406128,
      "learning_rate": 0.00010746208909893055,
      "loss": 2.7382,
      "step": 166307
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.528660535812378,
      "learning_rate": 0.00010745895215514077,
      "loss": 3.146,
      "step": 166308
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8591911792755127,
      "learning_rate": 0.00010745581524714797,
      "loss": 3.1664,
      "step": 166309
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4207465648651123,
      "learning_rate": 0.00010745267837495267,
      "loss": 3.0943,
      "step": 166310
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.561873197555542,
      "learning_rate": 0.00010744954153855568,
      "loss": 2.7877,
      "step": 166311
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.307072162628174,
      "learning_rate": 0.0001074464047379573,
      "loss": 2.843,
      "step": 166312
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.260204315185547,
      "learning_rate": 0.00010744326797315843,
      "loss": 3.0597,
      "step": 166313
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6284055709838867,
      "learning_rate": 0.00010744013124415945,
      "loss": 2.8868,
      "step": 166314
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.056159973144531,
      "learning_rate": 0.00010743699455096102,
      "loss": 2.9717,
      "step": 166315
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.9749715328216553,
      "learning_rate": 0.00010743385789356356,
      "loss": 2.9553,
      "step": 166316
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0043487548828125,
      "learning_rate": 0.00010743072127196787,
      "loss": 3.0066,
      "step": 166317
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8195085525512695,
      "learning_rate": 0.00010742758468617437,
      "loss": 2.6527,
      "step": 166318
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8712894916534424,
      "learning_rate": 0.00010742444813618381,
      "loss": 2.7288,
      "step": 166319
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.282928943634033,
      "learning_rate": 0.00010742131162199667,
      "loss": 3.0063,
      "step": 166320
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6231467723846436,
      "learning_rate": 0.00010741817514361355,
      "loss": 2.7113,
      "step": 166321
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4635329246520996,
      "learning_rate": 0.00010741503870103493,
      "loss": 2.7653,
      "step": 166322
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3972034454345703,
      "learning_rate": 0.0001074119022942616,
      "loss": 2.768,
      "step": 166323
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.364473342895508,
      "learning_rate": 0.00010740876592329393,
      "loss": 2.9913,
      "step": 166324
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7293450832366943,
      "learning_rate": 0.00010740562958813268,
      "loss": 2.861,
      "step": 166325
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6315131187438965,
      "learning_rate": 0.0001074024932887784,
      "loss": 2.9382,
      "step": 166326
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3590476512908936,
      "learning_rate": 0.0001073993570252316,
      "loss": 3.001,
      "step": 166327
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.252683639526367,
      "learning_rate": 0.00010739622079749278,
      "loss": 2.9107,
      "step": 166328
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3265576362609863,
      "learning_rate": 0.00010739308460556276,
      "loss": 2.915,
      "step": 166329
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.280879020690918,
      "learning_rate": 0.00010738994844944193,
      "loss": 2.9442,
      "step": 166330
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.133361339569092,
      "learning_rate": 0.000107386812329131,
      "loss": 2.9234,
      "step": 166331
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8164150714874268,
      "learning_rate": 0.00010738367624463054,
      "loss": 2.8345,
      "step": 166332
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.282486915588379,
      "learning_rate": 0.00010738054019594103,
      "loss": 3.0587,
      "step": 166333
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7458996772766113,
      "learning_rate": 0.00010737740418306305,
      "loss": 2.9465,
      "step": 166334
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.7215051651000977,
      "learning_rate": 0.00010737426820599736,
      "loss": 2.807,
      "step": 166335
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3888468742370605,
      "learning_rate": 0.00010737113226474432,
      "loss": 2.897,
      "step": 166336
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.503844738006592,
      "learning_rate": 0.0001073679963593047,
      "loss": 3.047,
      "step": 166337
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3915469646453857,
      "learning_rate": 0.00010736486048967903,
      "loss": 3.0568,
      "step": 166338
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.354891777038574,
      "learning_rate": 0.00010736172465586788,
      "loss": 2.9334,
      "step": 166339
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.460685968399048,
      "learning_rate": 0.00010735858885787167,
      "loss": 3.0236,
      "step": 166340
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.1042473316192627,
      "learning_rate": 0.00010735545309569129,
      "loss": 3.0537,
      "step": 166341
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5065860748291016,
      "learning_rate": 0.00010735231736932702,
      "loss": 2.9195,
      "step": 166342
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2587006092071533,
      "learning_rate": 0.00010734918167877973,
      "loss": 2.9091,
      "step": 166343
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.6097445487976074,
      "learning_rate": 0.00010734604602404987,
      "loss": 2.932,
      "step": 166344
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.201871633529663,
      "learning_rate": 0.00010734291040513799,
      "loss": 2.8385,
      "step": 166345
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0700528621673584,
      "learning_rate": 0.00010733977482204462,
      "loss": 2.8487,
      "step": 166346
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3491311073303223,
      "learning_rate": 0.00010733663927477048,
      "loss": 2.7935,
      "step": 166347
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.386281967163086,
      "learning_rate": 0.00010733350376331604,
      "loss": 2.809,
      "step": 166348
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0030012130737305,
      "learning_rate": 0.00010733036828768201,
      "loss": 3.0238,
      "step": 166349
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3067731857299805,
      "learning_rate": 0.00010732723284786883,
      "loss": 2.8757,
      "step": 166350
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3097779750823975,
      "learning_rate": 0.00010732409744387722,
      "loss": 3.0608,
      "step": 166351
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.997918128967285,
      "learning_rate": 0.00010732096207570772,
      "loss": 3.0347,
      "step": 166352
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.021925210952759,
      "learning_rate": 0.00010731782674336088,
      "loss": 3.0809,
      "step": 166353
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.288540840148926,
      "learning_rate": 0.00010731469144683721,
      "loss": 2.6582,
      "step": 166354
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.2804203033447266,
      "learning_rate": 0.00010731155618613749,
      "loss": 2.7131,
      "step": 166355
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.4741575717926025,
      "learning_rate": 0.00010730842096126203,
      "loss": 3.088,
      "step": 166356
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.822216033935547,
      "learning_rate": 0.00010730528577221172,
      "loss": 3.1113,
      "step": 166357
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5629706382751465,
      "learning_rate": 0.00010730215061898699,
      "loss": 2.7891,
      "step": 166358
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.4483232498168945,
      "learning_rate": 0.0001072990155015883,
      "loss": 3.1438,
      "step": 166359
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7981417179107666,
      "learning_rate": 0.0001072958804200165,
      "loss": 3.0351,
      "step": 166360
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3503096103668213,
      "learning_rate": 0.00010729274537427202,
      "loss": 2.8777,
      "step": 166361
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4659478664398193,
      "learning_rate": 0.00010728961036435535,
      "loss": 3.1479,
      "step": 166362
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7836058139801025,
      "learning_rate": 0.00010728647539026726,
      "loss": 2.9162,
      "step": 166363
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.1108765602111816,
      "learning_rate": 0.00010728334045200827,
      "loss": 2.967,
      "step": 166364
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5972084999084473,
      "learning_rate": 0.00010728020554957885,
      "loss": 2.8499,
      "step": 166365
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.872504234313965,
      "learning_rate": 0.00010727707068297978,
      "loss": 2.8085,
      "step": 166366
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4925589561462402,
      "learning_rate": 0.00010727393585221151,
      "loss": 2.7999,
      "step": 166367
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5275776386260986,
      "learning_rate": 0.00010727080105727458,
      "loss": 2.8152,
      "step": 166368
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.103116989135742,
      "learning_rate": 0.00010726766629816976,
      "loss": 3.1116,
      "step": 166369
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3764190673828125,
      "learning_rate": 0.0001072645315748975,
      "loss": 3.0171,
      "step": 166370
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.386181592941284,
      "learning_rate": 0.00010726139688745831,
      "loss": 3.0869,
      "step": 166371
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0922935009002686,
      "learning_rate": 0.00010725826223585296,
      "loss": 3.0006,
      "step": 166372
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7184536457061768,
      "learning_rate": 0.00010725512762008184,
      "loss": 2.9479,
      "step": 166373
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.607673168182373,
      "learning_rate": 0.00010725199304014573,
      "loss": 2.879,
      "step": 166374
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.1356422901153564,
      "learning_rate": 0.00010724885849604507,
      "loss": 3.0297,
      "step": 166375
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0801496505737305,
      "learning_rate": 0.00010724572398778053,
      "loss": 2.9375,
      "step": 166376
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.8808135986328125,
      "learning_rate": 0.00010724258951535255,
      "loss": 2.9334,
      "step": 166377
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.8541533946990967,
      "learning_rate": 0.0001072394550787619,
      "loss": 3.0243,
      "step": 166378
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.4987595081329346,
      "learning_rate": 0.000107236320678009,
      "loss": 2.9104,
      "step": 166379
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.833390474319458,
      "learning_rate": 0.00010723318631309457,
      "loss": 2.9965,
      "step": 166380
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.315122604370117,
      "learning_rate": 0.00010723005198401913,
      "loss": 2.8419,
      "step": 166381
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.694133996963501,
      "learning_rate": 0.00010722691769078327,
      "loss": 2.7007,
      "step": 166382
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.7122061252593994,
      "learning_rate": 0.00010722378343338749,
      "loss": 2.8094,
      "step": 166383
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.956040382385254,
      "learning_rate": 0.00010722064921183253,
      "loss": 2.8787,
      "step": 166384
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.2244534492492676,
      "learning_rate": 0.00010721751502611877,
      "loss": 2.924,
      "step": 166385
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.653069257736206,
      "learning_rate": 0.00010721438087624704,
      "loss": 3.0385,
      "step": 166386
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.1375060081481934,
      "learning_rate": 0.00010721124676221781,
      "loss": 2.8936,
      "step": 166387
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.878776788711548,
      "learning_rate": 0.0001072081126840316,
      "loss": 2.973,
      "step": 166388
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.6191821098327637,
      "learning_rate": 0.000107204978641689,
      "loss": 3.0891,
      "step": 166389
    },
    {
      "epoch": 2.17,
      "grad_norm": 5.651603698730469,
      "learning_rate": 0.00010720184463519067,
      "loss": 3.1544,
      "step": 166390
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.1086432933807373,
      "learning_rate": 0.00010719871066453708,
      "loss": 2.8042,
      "step": 166391
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.170687675476074,
      "learning_rate": 0.00010719557672972902,
      "loss": 2.8344,
      "step": 166392
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.088869571685791,
      "learning_rate": 0.00010719244283076691,
      "loss": 2.8059,
      "step": 166393
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.203579902648926,
      "learning_rate": 0.00010718930896765137,
      "loss": 2.8658,
      "step": 166394
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.2971506118774414,
      "learning_rate": 0.00010718617514038288,
      "loss": 2.6944,
      "step": 166395
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.004171848297119,
      "learning_rate": 0.00010718304134896221,
      "loss": 3.048,
      "step": 166396
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5338878631591797,
      "learning_rate": 0.00010717990759338978,
      "loss": 2.8071,
      "step": 166397
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2228314876556396,
      "learning_rate": 0.00010717677387366634,
      "loss": 3.0381,
      "step": 166398
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.9031472206115723,
      "learning_rate": 0.00010717364018979234,
      "loss": 3.0446,
      "step": 166399
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.696634531021118,
      "learning_rate": 0.00010717050654176844,
      "loss": 2.9113,
      "step": 166400
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.910159111022949,
      "learning_rate": 0.00010716737292959507,
      "loss": 2.803,
      "step": 166401
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3182613849639893,
      "learning_rate": 0.000107164239353273,
      "loss": 2.8053,
      "step": 166402
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.431303024291992,
      "learning_rate": 0.00010716110581280268,
      "loss": 2.9902,
      "step": 166403
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.597487449645996,
      "learning_rate": 0.00010715797230818483,
      "loss": 3.0017,
      "step": 166404
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.688735485076904,
      "learning_rate": 0.00010715483883941993,
      "loss": 2.9129,
      "step": 166405
    },
    {
      "epoch": 2.17,
      "grad_norm": 5.081748008728027,
      "learning_rate": 0.00010715170540650863,
      "loss": 3.0447,
      "step": 166406
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.802201747894287,
      "learning_rate": 0.00010714857200945135,
      "loss": 2.6187,
      "step": 166407
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4887826442718506,
      "learning_rate": 0.00010714543864824887,
      "loss": 3.0356,
      "step": 166408
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.334780216217041,
      "learning_rate": 0.00010714230532290162,
      "loss": 3.0154,
      "step": 166409
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.225334405899048,
      "learning_rate": 0.00010713917203341036,
      "loss": 3.0049,
      "step": 166410
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.204529285430908,
      "learning_rate": 0.00010713603877977555,
      "loss": 2.9775,
      "step": 166411
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0960052013397217,
      "learning_rate": 0.00010713290556199779,
      "loss": 2.8835,
      "step": 166412
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7445826530456543,
      "learning_rate": 0.00010712977238007758,
      "loss": 2.8281,
      "step": 166413
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.881265640258789,
      "learning_rate": 0.00010712663923401567,
      "loss": 2.9325,
      "step": 166414
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4461569786071777,
      "learning_rate": 0.00010712350612381247,
      "loss": 2.7713,
      "step": 166415
    },
    {
      "epoch": 2.17,
      "grad_norm": 5.101675033569336,
      "learning_rate": 0.00010712037304946873,
      "loss": 3.0731,
      "step": 166416
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7158620357513428,
      "learning_rate": 0.00010711724001098488,
      "loss": 3.0261,
      "step": 166417
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4516141414642334,
      "learning_rate": 0.00010711410700836174,
      "loss": 2.9248,
      "step": 166418
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7644710540771484,
      "learning_rate": 0.00010711097404159956,
      "loss": 2.79,
      "step": 166419
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.5440189838409424,
      "learning_rate": 0.00010710784111069919,
      "loss": 2.9206,
      "step": 166420
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.4495279788970947,
      "learning_rate": 0.00010710470821566103,
      "loss": 2.7371,
      "step": 166421
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.390361309051514,
      "learning_rate": 0.00010710157535648582,
      "loss": 3.2223,
      "step": 166422
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3410651683807373,
      "learning_rate": 0.00010709844253317397,
      "loss": 3.0273,
      "step": 166423
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.117194175720215,
      "learning_rate": 0.00010709530974572636,
      "loss": 2.8761,
      "step": 166424
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7913315296173096,
      "learning_rate": 0.00010709217699414318,
      "loss": 3.0309,
      "step": 166425
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.67865252494812,
      "learning_rate": 0.0001070890442784253,
      "loss": 3.0072,
      "step": 166426
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7364988327026367,
      "learning_rate": 0.00010708591159857311,
      "loss": 3.1222,
      "step": 166427
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.1149022579193115,
      "learning_rate": 0.0001070827789545874,
      "loss": 2.8437,
      "step": 166428
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3459455966949463,
      "learning_rate": 0.00010707964634646853,
      "loss": 2.914,
      "step": 166429
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4021828174591064,
      "learning_rate": 0.0001070765137742174,
      "loss": 2.8866,
      "step": 166430
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3086562156677246,
      "learning_rate": 0.00010707338123783417,
      "loss": 2.9565,
      "step": 166431
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7203168869018555,
      "learning_rate": 0.00010707024873731977,
      "loss": 2.8905,
      "step": 166432
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.716904640197754,
      "learning_rate": 0.00010706711627267456,
      "loss": 3.0064,
      "step": 166433
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.676826477050781,
      "learning_rate": 0.0001070639838438993,
      "loss": 2.9989,
      "step": 166434
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.612368583679199,
      "learning_rate": 0.00010706085145099437,
      "loss": 2.794,
      "step": 166435
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.952742338180542,
      "learning_rate": 0.00010705771909396058,
      "loss": 2.8167,
      "step": 166436
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.639159679412842,
      "learning_rate": 0.0001070545867727984,
      "loss": 3.087,
      "step": 166437
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0640790462493896,
      "learning_rate": 0.00010705145448750842,
      "loss": 2.859,
      "step": 166438
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.389357566833496,
      "learning_rate": 0.00010704832223809112,
      "loss": 2.7613,
      "step": 166439
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.438352108001709,
      "learning_rate": 0.00010704519002454729,
      "loss": 2.7682,
      "step": 166440
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5008416175842285,
      "learning_rate": 0.00010704205784687725,
      "loss": 2.9131,
      "step": 166441
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7374188899993896,
      "learning_rate": 0.0001070389257050819,
      "loss": 2.8907,
      "step": 166442
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.032562017440796,
      "learning_rate": 0.00010703579359916161,
      "loss": 2.9324,
      "step": 166443
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.897998094558716,
      "learning_rate": 0.00010703266152911693,
      "loss": 3.0285,
      "step": 166444
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3479552268981934,
      "learning_rate": 0.00010702952949494861,
      "loss": 2.6009,
      "step": 166445
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.124342441558838,
      "learning_rate": 0.00010702639749665716,
      "loss": 2.9743,
      "step": 166446
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.614091157913208,
      "learning_rate": 0.00010702326553424304,
      "loss": 2.7042,
      "step": 166447
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.04897141456604,
      "learning_rate": 0.00010702013360770703,
      "loss": 3.1072,
      "step": 166448
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0859365463256836,
      "learning_rate": 0.00010701700171704961,
      "loss": 2.8316,
      "step": 166449
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.323458671569824,
      "learning_rate": 0.0001070138698622713,
      "loss": 2.9782,
      "step": 166450
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3786351680755615,
      "learning_rate": 0.00010701073804337284,
      "loss": 3.0426,
      "step": 166451
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.923976182937622,
      "learning_rate": 0.00010700760626035473,
      "loss": 3.0586,
      "step": 166452
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6401145458221436,
      "learning_rate": 0.00010700447451321744,
      "loss": 2.8926,
      "step": 166453
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.305757999420166,
      "learning_rate": 0.00010700134280196176,
      "loss": 3.07,
      "step": 166454
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4361982345581055,
      "learning_rate": 0.00010699821112658816,
      "loss": 2.9044,
      "step": 166455
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6198089122772217,
      "learning_rate": 0.00010699507948709716,
      "loss": 3.1813,
      "step": 166456
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.587908983230591,
      "learning_rate": 0.00010699194788348953,
      "loss": 2.9695,
      "step": 166457
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3598742485046387,
      "learning_rate": 0.00010698881631576561,
      "loss": 2.8846,
      "step": 166458
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7027790546417236,
      "learning_rate": 0.00010698568478392623,
      "loss": 2.8677,
      "step": 166459
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.185412645339966,
      "learning_rate": 0.00010698255328797185,
      "loss": 2.8953,
      "step": 166460
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.455636739730835,
      "learning_rate": 0.00010697942182790307,
      "loss": 2.9656,
      "step": 166461
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.714503765106201,
      "learning_rate": 0.00010697629040372034,
      "loss": 3.0795,
      "step": 166462
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0792367458343506,
      "learning_rate": 0.00010697315901542445,
      "loss": 2.6224,
      "step": 166463
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.858120918273926,
      "learning_rate": 0.00010697002766301581,
      "loss": 3.0082,
      "step": 166464
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.767195224761963,
      "learning_rate": 0.0001069668963464952,
      "loss": 3.0882,
      "step": 166465
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.888228178024292,
      "learning_rate": 0.00010696376506586308,
      "loss": 2.9629,
      "step": 166466
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4908804893493652,
      "learning_rate": 0.00010696063382112002,
      "loss": 2.933,
      "step": 166467
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.1655218601226807,
      "learning_rate": 0.00010695750261226656,
      "loss": 2.8981,
      "step": 166468
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.511894941329956,
      "learning_rate": 0.00010695437143930341,
      "loss": 3.0686,
      "step": 166469
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.163748264312744,
      "learning_rate": 0.000106951240302231,
      "loss": 3.1068,
      "step": 166470
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.094082832336426,
      "learning_rate": 0.00010694810920105009,
      "loss": 2.9923,
      "step": 166471
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3289077281951904,
      "learning_rate": 0.00010694497813576119,
      "loss": 2.8732,
      "step": 166472
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.464932680130005,
      "learning_rate": 0.00010694184710636484,
      "loss": 2.6254,
      "step": 166473
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4864273071289062,
      "learning_rate": 0.00010693871611286154,
      "loss": 2.9543,
      "step": 166474
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.686650276184082,
      "learning_rate": 0.00010693558515525208,
      "loss": 3.1041,
      "step": 166475
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.964911937713623,
      "learning_rate": 0.00010693245423353686,
      "loss": 2.9545,
      "step": 166476
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7203140258789062,
      "learning_rate": 0.00010692932334771664,
      "loss": 2.9342,
      "step": 166477
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.794053316116333,
      "learning_rate": 0.00010692619249779189,
      "loss": 2.8482,
      "step": 166478
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.309753179550171,
      "learning_rate": 0.00010692306168376319,
      "loss": 3.0256,
      "step": 166479
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.007927417755127,
      "learning_rate": 0.00010691993090563107,
      "loss": 2.8663,
      "step": 166480
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5833077430725098,
      "learning_rate": 0.00010691680016339625,
      "loss": 3.0965,
      "step": 166481
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.70481538772583,
      "learning_rate": 0.00010691366945705918,
      "loss": 3.0696,
      "step": 166482
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7325782775878906,
      "learning_rate": 0.00010691053878662056,
      "loss": 2.8609,
      "step": 166483
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.313596248626709,
      "learning_rate": 0.00010690740815208086,
      "loss": 3.033,
      "step": 166484
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.144733428955078,
      "learning_rate": 0.0001069042775534409,
      "loss": 2.9687,
      "step": 166485
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.508676290512085,
      "learning_rate": 0.00010690114699070086,
      "loss": 2.9624,
      "step": 166486
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.254396677017212,
      "learning_rate": 0.00010689801646386167,
      "loss": 2.9743,
      "step": 166487
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.3300416469573975,
      "learning_rate": 0.00010689488597292366,
      "loss": 2.8635,
      "step": 166488
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.363694190979004,
      "learning_rate": 0.00010689175551788764,
      "loss": 3.1979,
      "step": 166489
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.106339931488037,
      "learning_rate": 0.00010688862509875399,
      "loss": 2.8038,
      "step": 166490
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.799626350402832,
      "learning_rate": 0.00010688549471552358,
      "loss": 2.9209,
      "step": 166491
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5988149642944336,
      "learning_rate": 0.00010688236436819664,
      "loss": 3.0012,
      "step": 166492
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7339866161346436,
      "learning_rate": 0.000106879234056774,
      "loss": 2.9069,
      "step": 166493
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6482672691345215,
      "learning_rate": 0.00010687610378125603,
      "loss": 3.0292,
      "step": 166494
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2838354110717773,
      "learning_rate": 0.00010687297354164356,
      "loss": 2.8364,
      "step": 166495
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.244646072387695,
      "learning_rate": 0.00010686984333793693,
      "loss": 2.924,
      "step": 166496
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7463653087615967,
      "learning_rate": 0.00010686671317013706,
      "loss": 3.0199,
      "step": 166497
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5308191776275635,
      "learning_rate": 0.00010686358303824412,
      "loss": 2.6423,
      "step": 166498
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5863490104675293,
      "learning_rate": 0.00010686045294225894,
      "loss": 2.8292,
      "step": 166499
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.094008684158325,
      "learning_rate": 0.00010685732288218202,
      "loss": 2.9916,
      "step": 166500
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.6108009815216064,
      "learning_rate": 0.000106854192858014,
      "loss": 2.9596,
      "step": 166501
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.839118003845215,
      "learning_rate": 0.00010685106286975539,
      "loss": 2.8219,
      "step": 166502
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.9983112812042236,
      "learning_rate": 0.00010684793291740698,
      "loss": 2.9484,
      "step": 166503
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0963847637176514,
      "learning_rate": 0.00010684480300096899,
      "loss": 3.158,
      "step": 166504
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4927926063537598,
      "learning_rate": 0.00010684167312044231,
      "loss": 2.8302,
      "step": 166505
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3966550827026367,
      "learning_rate": 0.00010683854327582729,
      "loss": 3.0611,
      "step": 166506
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.526634931564331,
      "learning_rate": 0.00010683541346712472,
      "loss": 2.8717,
      "step": 166507
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.806309461593628,
      "learning_rate": 0.00010683228369433501,
      "loss": 3.3132,
      "step": 166508
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.957843065261841,
      "learning_rate": 0.00010682915395745902,
      "loss": 2.8964,
      "step": 166509
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.184030055999756,
      "learning_rate": 0.00010682602425649697,
      "loss": 3.1632,
      "step": 166510
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5299072265625,
      "learning_rate": 0.00010682289459144968,
      "loss": 2.8515,
      "step": 166511
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.690530776977539,
      "learning_rate": 0.00010681976496231754,
      "loss": 2.8376,
      "step": 166512
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.381502151489258,
      "learning_rate": 0.0001068166353691014,
      "loss": 2.8523,
      "step": 166513
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7058401107788086,
      "learning_rate": 0.00010681350581180155,
      "loss": 3.2194,
      "step": 166514
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.9268150329589844,
      "learning_rate": 0.00010681037629041894,
      "loss": 3.0209,
      "step": 166515
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2346582412719727,
      "learning_rate": 0.00010680724680495371,
      "loss": 2.8831,
      "step": 166516
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0052900314331055,
      "learning_rate": 0.00010680411735540677,
      "loss": 2.9819,
      "step": 166517
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.517910957336426,
      "learning_rate": 0.00010680098794177851,
      "loss": 2.8093,
      "step": 166518
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5498838424682617,
      "learning_rate": 0.00010679785856406967,
      "loss": 2.9327,
      "step": 166519
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.1842639446258545,
      "learning_rate": 0.00010679472922228067,
      "loss": 2.9624,
      "step": 166520
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.271054744720459,
      "learning_rate": 0.00010679159991641224,
      "loss": 3.0266,
      "step": 166521
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.301633596420288,
      "learning_rate": 0.00010678847064646494,
      "loss": 3.2713,
      "step": 166522
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5404491424560547,
      "learning_rate": 0.00010678534141243931,
      "loss": 3.0328,
      "step": 166523
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.014960289001465,
      "learning_rate": 0.00010678221221433581,
      "loss": 2.9812,
      "step": 166524
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.5463740825653076,
      "learning_rate": 0.00010677908305215527,
      "loss": 2.8158,
      "step": 166525
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.22188401222229,
      "learning_rate": 0.000106775953925898,
      "loss": 2.9246,
      "step": 166526
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5583150386810303,
      "learning_rate": 0.00010677282483556485,
      "loss": 3.0847,
      "step": 166527
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4368200302124023,
      "learning_rate": 0.0001067696957811563,
      "loss": 2.9589,
      "step": 166528
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.4953103065490723,
      "learning_rate": 0.00010676656676267289,
      "loss": 2.9953,
      "step": 166529
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.3010032176971436,
      "learning_rate": 0.00010676343778011513,
      "loss": 2.9372,
      "step": 166530
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6666038036346436,
      "learning_rate": 0.00010676030883348381,
      "loss": 3.0342,
      "step": 166531
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.766366720199585,
      "learning_rate": 0.00010675717992277928,
      "loss": 2.9385,
      "step": 166532
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0201337337493896,
      "learning_rate": 0.00010675405104800235,
      "loss": 2.9097,
      "step": 166533
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.282961845397949,
      "learning_rate": 0.00010675092220915346,
      "loss": 2.9901,
      "step": 166534
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.297574520111084,
      "learning_rate": 0.00010674779340623315,
      "loss": 2.9827,
      "step": 166535
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.844089984893799,
      "learning_rate": 0.00010674466463924215,
      "loss": 2.9902,
      "step": 166536
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.330838441848755,
      "learning_rate": 0.00010674153590818099,
      "loss": 3.1173,
      "step": 166537
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.918461799621582,
      "learning_rate": 0.0001067384072130501,
      "loss": 2.8548,
      "step": 166538
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.7462387084960938,
      "learning_rate": 0.00010673527855385032,
      "loss": 2.876,
      "step": 166539
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.216621160507202,
      "learning_rate": 0.00010673214993058209,
      "loss": 2.7661,
      "step": 166540
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.8483901023864746,
      "learning_rate": 0.0001067290213432459,
      "loss": 2.9953,
      "step": 166541
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.9635188579559326,
      "learning_rate": 0.00010672589279184252,
      "loss": 2.8244,
      "step": 166542
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.86848521232605,
      "learning_rate": 0.00010672276427637233,
      "loss": 2.97,
      "step": 166543
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.602842092514038,
      "learning_rate": 0.00010671963579683617,
      "loss": 3.2058,
      "step": 166544
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4908225536346436,
      "learning_rate": 0.00010671650735323443,
      "loss": 2.8806,
      "step": 166545
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.794983386993408,
      "learning_rate": 0.00010671337894556778,
      "loss": 3.0364,
      "step": 166546
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.651129961013794,
      "learning_rate": 0.00010671025057383664,
      "loss": 2.8207,
      "step": 166547
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.3917524814605713,
      "learning_rate": 0.00010670712223804183,
      "loss": 2.7942,
      "step": 166548
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.208249092102051,
      "learning_rate": 0.00010670399393818371,
      "loss": 3.2334,
      "step": 166549
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0281825065612793,
      "learning_rate": 0.00010670086567426303,
      "loss": 3.1147,
      "step": 166550
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.42462420463562,
      "learning_rate": 0.00010669773744628035,
      "loss": 3.2431,
      "step": 166551
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2464752197265625,
      "learning_rate": 0.00010669460925423622,
      "loss": 2.7568,
      "step": 166552
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.9262938499450684,
      "learning_rate": 0.00010669148109813107,
      "loss": 3.0421,
      "step": 166553
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3704142570495605,
      "learning_rate": 0.00010668835297796574,
      "loss": 2.8753,
      "step": 166554
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7437376976013184,
      "learning_rate": 0.00010668522489374059,
      "loss": 3.1987,
      "step": 166555
    },
    {
      "epoch": 2.17,
      "grad_norm": 5.097600936889648,
      "learning_rate": 0.00010668209684545644,
      "loss": 2.7897,
      "step": 166556
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4948647022247314,
      "learning_rate": 0.00010667896883311359,
      "loss": 3.1168,
      "step": 166557
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4235401153564453,
      "learning_rate": 0.00010667584085671298,
      "loss": 3.1761,
      "step": 166558
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.968238353729248,
      "learning_rate": 0.00010667271291625481,
      "loss": 3.1828,
      "step": 166559
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7224719524383545,
      "learning_rate": 0.00010666958501173992,
      "loss": 3.1237,
      "step": 166560
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.753976583480835,
      "learning_rate": 0.0001066664571431687,
      "loss": 2.9444,
      "step": 166561
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.3651130199432373,
      "learning_rate": 0.00010666332931054192,
      "loss": 3.0602,
      "step": 166562
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4792251586914062,
      "learning_rate": 0.00010666020151386,
      "loss": 3.0286,
      "step": 166563
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.5034682750701904,
      "learning_rate": 0.00010665707375312376,
      "loss": 3.079,
      "step": 166564
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.723423480987549,
      "learning_rate": 0.00010665394602833345,
      "loss": 2.8656,
      "step": 166565
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.5115513801574707,
      "learning_rate": 0.00010665081833948991,
      "loss": 2.8775,
      "step": 166566
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6324031352996826,
      "learning_rate": 0.00010664769068659353,
      "loss": 2.8273,
      "step": 166567
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.497330904006958,
      "learning_rate": 0.0001066445630696451,
      "loss": 2.7026,
      "step": 166568
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.99541974067688,
      "learning_rate": 0.00010664143548864502,
      "loss": 3.004,
      "step": 166569
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.227447271347046,
      "learning_rate": 0.00010663830794359412,
      "loss": 3.0563,
      "step": 166570
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.564394474029541,
      "learning_rate": 0.00010663518043449262,
      "loss": 3.0239,
      "step": 166571
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7536258697509766,
      "learning_rate": 0.0001066320529613414,
      "loss": 3.0072,
      "step": 166572
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.3756749629974365,
      "learning_rate": 0.00010662892552414079,
      "loss": 2.9042,
      "step": 166573
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.157613515853882,
      "learning_rate": 0.00010662579812289165,
      "loss": 3.0492,
      "step": 166574
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3785312175750732,
      "learning_rate": 0.00010662267075759432,
      "loss": 3.1505,
      "step": 166575
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7340970039367676,
      "learning_rate": 0.00010661954342824967,
      "loss": 2.9031,
      "step": 166576
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6882593631744385,
      "learning_rate": 0.00010661641613485788,
      "loss": 2.9107,
      "step": 166577
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.6041316986083984,
      "learning_rate": 0.00010661328887741991,
      "loss": 2.8377,
      "step": 166578
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.5642800331115723,
      "learning_rate": 0.00010661016165593604,
      "loss": 2.9707,
      "step": 166579
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.658381223678589,
      "learning_rate": 0.00010660703447040708,
      "loss": 2.9477,
      "step": 166580
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3961379528045654,
      "learning_rate": 0.00010660390732083342,
      "loss": 3.0071,
      "step": 166581
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.43652606010437,
      "learning_rate": 0.00010660078020721597,
      "loss": 2.8245,
      "step": 166582
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.597104072570801,
      "learning_rate": 0.0001065976531295549,
      "loss": 2.772,
      "step": 166583
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.41131067276001,
      "learning_rate": 0.00010659452608785105,
      "loss": 2.8282,
      "step": 166584
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.205965518951416,
      "learning_rate": 0.00010659139908210484,
      "loss": 3.0981,
      "step": 166585
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.599470853805542,
      "learning_rate": 0.00010658827211231704,
      "loss": 3.1644,
      "step": 166586
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.668048620223999,
      "learning_rate": 0.000106585145178488,
      "loss": 2.6834,
      "step": 166587
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.198514938354492,
      "learning_rate": 0.00010658201828061868,
      "loss": 3.1214,
      "step": 166588
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.086630344390869,
      "learning_rate": 0.00010657889141870917,
      "loss": 2.8304,
      "step": 166589
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.509429693222046,
      "learning_rate": 0.00010657576459276042,
      "loss": 2.8984,
      "step": 166590
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.510974645614624,
      "learning_rate": 0.00010657263780277279,
      "loss": 2.6254,
      "step": 166591
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.63930344581604,
      "learning_rate": 0.00010656951104874706,
      "loss": 2.8835,
      "step": 166592
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.5606184005737305,
      "learning_rate": 0.0001065663843306836,
      "loss": 2.8209,
      "step": 166593
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.024106740951538,
      "learning_rate": 0.0001065632576485833,
      "loss": 2.7331,
      "step": 166594
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.275373697280884,
      "learning_rate": 0.00010656013100244634,
      "loss": 2.8406,
      "step": 166595
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.562814950942993,
      "learning_rate": 0.0001065570043922736,
      "loss": 2.9125,
      "step": 166596
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.565161943435669,
      "learning_rate": 0.00010655387781806546,
      "loss": 2.9969,
      "step": 166597
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.9749979972839355,
      "learning_rate": 0.00010655075127982274,
      "loss": 2.9078,
      "step": 166598
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.627713918685913,
      "learning_rate": 0.00010654762477754572,
      "loss": 3.0342,
      "step": 166599
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.308344602584839,
      "learning_rate": 0.00010654449831123542,
      "loss": 2.8432,
      "step": 166600
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3940131664276123,
      "learning_rate": 0.00010654137188089191,
      "loss": 2.9234,
      "step": 166601
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.443230628967285,
      "learning_rate": 0.00010653824548651611,
      "loss": 3.0976,
      "step": 166602
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.489354133605957,
      "learning_rate": 0.00010653511912810843,
      "loss": 3.0506,
      "step": 166603
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.373164415359497,
      "learning_rate": 0.0001065319928056696,
      "loss": 2.7799,
      "step": 166604
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7509241104125977,
      "learning_rate": 0.00010652886651920003,
      "loss": 3.0081,
      "step": 166605
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2793450355529785,
      "learning_rate": 0.00010652574026870049,
      "loss": 2.8303,
      "step": 166606
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6895644664764404,
      "learning_rate": 0.00010652261405417148,
      "loss": 2.8684,
      "step": 166607
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3538546562194824,
      "learning_rate": 0.00010651948787561355,
      "loss": 3.1445,
      "step": 166608
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4887733459472656,
      "learning_rate": 0.00010651636173302722,
      "loss": 3.1788,
      "step": 166609
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.368527412414551,
      "learning_rate": 0.00010651323562641325,
      "loss": 2.9663,
      "step": 166610
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.568547248840332,
      "learning_rate": 0.000106510109555772,
      "loss": 2.9731,
      "step": 166611
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.576129198074341,
      "learning_rate": 0.00010650698352110428,
      "loss": 3.1837,
      "step": 166612
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.277606725692749,
      "learning_rate": 0.00010650385752241059,
      "loss": 3.1829,
      "step": 166613
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.816284656524658,
      "learning_rate": 0.00010650073155969147,
      "loss": 2.878,
      "step": 166614
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.488046884536743,
      "learning_rate": 0.00010649760563294739,
      "loss": 3.3431,
      "step": 166615
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3667707443237305,
      "learning_rate": 0.0001064944797421792,
      "loss": 2.7499,
      "step": 166616
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2051870822906494,
      "learning_rate": 0.00010649135388738722,
      "loss": 3.1892,
      "step": 166617
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.627645969390869,
      "learning_rate": 0.00010648822806857223,
      "loss": 3.1237,
      "step": 166618
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3246805667877197,
      "learning_rate": 0.00010648510228573477,
      "loss": 3.0827,
      "step": 166619
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.389524221420288,
      "learning_rate": 0.00010648197653887526,
      "loss": 2.9634,
      "step": 166620
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.4007349014282227,
      "learning_rate": 0.0001064788508279945,
      "loss": 2.8566,
      "step": 166621
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.604757785797119,
      "learning_rate": 0.000106475725153093,
      "loss": 2.9275,
      "step": 166622
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.27443790435791,
      "learning_rate": 0.00010647259951417117,
      "loss": 3.0594,
      "step": 166623
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.2135112285614014,
      "learning_rate": 0.00010646947391122988,
      "loss": 2.8,
      "step": 166624
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3720569610595703,
      "learning_rate": 0.00010646634834426956,
      "loss": 2.9628,
      "step": 166625
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.2072904109954834,
      "learning_rate": 0.00010646322281329066,
      "loss": 2.7172,
      "step": 166626
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.704761266708374,
      "learning_rate": 0.00010646009731829404,
      "loss": 2.8827,
      "step": 166627
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.142669200897217,
      "learning_rate": 0.00010645697185928014,
      "loss": 2.9076,
      "step": 166628
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.484372615814209,
      "learning_rate": 0.00010645384643624944,
      "loss": 3.0512,
      "step": 166629
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7490315437316895,
      "learning_rate": 0.0001064507210492027,
      "loss": 2.9978,
      "step": 166630
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6551554203033447,
      "learning_rate": 0.00010644759569814045,
      "loss": 2.8088,
      "step": 166631
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.045574903488159,
      "learning_rate": 0.00010644447038306315,
      "loss": 2.7683,
      "step": 166632
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.128560781478882,
      "learning_rate": 0.00010644134510397157,
      "loss": 2.953,
      "step": 166633
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.499234676361084,
      "learning_rate": 0.0001064382198608661,
      "loss": 3.0754,
      "step": 166634
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4758994579315186,
      "learning_rate": 0.00010643509465374749,
      "loss": 3.1041,
      "step": 166635
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5576741695404053,
      "learning_rate": 0.00010643196948261628,
      "loss": 3.2219,
      "step": 166636
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.7496416568756104,
      "learning_rate": 0.00010642884434747302,
      "loss": 2.8102,
      "step": 166637
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4283759593963623,
      "learning_rate": 0.00010642571924831817,
      "loss": 3.1468,
      "step": 166638
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8040993213653564,
      "learning_rate": 0.00010642259418515256,
      "loss": 2.8238,
      "step": 166639
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.1820614337921143,
      "learning_rate": 0.00010641946915797652,
      "loss": 2.8655,
      "step": 166640
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.1208977699279785,
      "learning_rate": 0.00010641634416679087,
      "loss": 2.963,
      "step": 166641
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5709733963012695,
      "learning_rate": 0.00010641321921159595,
      "loss": 2.8884,
      "step": 166642
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.441010475158691,
      "learning_rate": 0.0001064100942923927,
      "loss": 3.0742,
      "step": 166643
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5897340774536133,
      "learning_rate": 0.00010640696940918126,
      "loss": 2.9156,
      "step": 166644
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.437163829803467,
      "learning_rate": 0.00010640384456196251,
      "loss": 2.8855,
      "step": 166645
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.3417887687683105,
      "learning_rate": 0.00010640071975073685,
      "loss": 2.671,
      "step": 166646
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6328611373901367,
      "learning_rate": 0.00010639759497550504,
      "loss": 2.9596,
      "step": 166647
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.0951130390167236,
      "learning_rate": 0.00010639447023626744,
      "loss": 2.8956,
      "step": 166648
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.783649444580078,
      "learning_rate": 0.00010639134553302504,
      "loss": 2.7911,
      "step": 166649
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5990519523620605,
      "learning_rate": 0.00010638822086577788,
      "loss": 3.0919,
      "step": 166650
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5389161109924316,
      "learning_rate": 0.00010638509623452694,
      "loss": 2.9664,
      "step": 166651
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.886185884475708,
      "learning_rate": 0.00010638197163927255,
      "loss": 2.6804,
      "step": 166652
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.3419597148895264,
      "learning_rate": 0.00010637884708001548,
      "loss": 2.9969,
      "step": 166653
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.551212787628174,
      "learning_rate": 0.00010637572255675619,
      "loss": 3.0525,
      "step": 166654
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.43170166015625,
      "learning_rate": 0.00010637259806949548,
      "loss": 2.9107,
      "step": 166655
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2725069522857666,
      "learning_rate": 0.00010636947361823356,
      "loss": 3.0761,
      "step": 166656
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.503502607345581,
      "learning_rate": 0.00010636634920297131,
      "loss": 2.975,
      "step": 166657
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.592175006866455,
      "learning_rate": 0.0001063632248237091,
      "loss": 2.9651,
      "step": 166658
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.3625504970550537,
      "learning_rate": 0.00010636010048044777,
      "loss": 2.6405,
      "step": 166659
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.676577091217041,
      "learning_rate": 0.0001063569761731876,
      "loss": 2.8781,
      "step": 166660
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.542966604232788,
      "learning_rate": 0.00010635385190192955,
      "loss": 3.0906,
      "step": 166661
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.554025173187256,
      "learning_rate": 0.00010635072766667375,
      "loss": 3.2702,
      "step": 166662
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.73465895652771,
      "learning_rate": 0.00010634760346742111,
      "loss": 2.8214,
      "step": 166663
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.884566307067871,
      "learning_rate": 0.00010634447930417202,
      "loss": 3.104,
      "step": 166664
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.9142279624938965,
      "learning_rate": 0.00010634135517692725,
      "loss": 2.8859,
      "step": 166665
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2957327365875244,
      "learning_rate": 0.00010633823108568718,
      "loss": 2.9533,
      "step": 166666
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.5338830947875977,
      "learning_rate": 0.00010633510703045267,
      "loss": 2.7585,
      "step": 166667
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6847052574157715,
      "learning_rate": 0.00010633198301122393,
      "loss": 3.181,
      "step": 166668
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.592135429382324,
      "learning_rate": 0.00010632885902800182,
      "loss": 2.6585,
      "step": 166669
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4740712642669678,
      "learning_rate": 0.00010632573508078675,
      "loss": 2.9707,
      "step": 166670
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8720109462738037,
      "learning_rate": 0.00010632261116957947,
      "loss": 3.2305,
      "step": 166671
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.126652717590332,
      "learning_rate": 0.00010631948729438038,
      "loss": 2.9737,
      "step": 166672
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.2373220920562744,
      "learning_rate": 0.00010631636345519032,
      "loss": 3.0569,
      "step": 166673
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.83035945892334,
      "learning_rate": 0.00010631323965200956,
      "loss": 2.9015,
      "step": 166674
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5604708194732666,
      "learning_rate": 0.00010631011588483889,
      "loss": 2.9918,
      "step": 166675
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8942151069641113,
      "learning_rate": 0.00010630699215367872,
      "loss": 3.0435,
      "step": 166676
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8495099544525146,
      "learning_rate": 0.00010630386845852985,
      "loss": 2.7407,
      "step": 166677
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4434256553649902,
      "learning_rate": 0.00010630074479939263,
      "loss": 2.8248,
      "step": 166678
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4780490398406982,
      "learning_rate": 0.00010629762117626798,
      "loss": 2.9452,
      "step": 166679
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.978532314300537,
      "learning_rate": 0.00010629449758915609,
      "loss": 3.0086,
      "step": 166680
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.319577932357788,
      "learning_rate": 0.00010629137403805774,
      "loss": 2.8057,
      "step": 166681
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.368358850479126,
      "learning_rate": 0.00010628825052297343,
      "loss": 3.0818,
      "step": 166682
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.005495548248291,
      "learning_rate": 0.00010628512704390387,
      "loss": 2.964,
      "step": 166683
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.676556348800659,
      "learning_rate": 0.00010628200360084945,
      "loss": 2.9787,
      "step": 166684
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.3799846172332764,
      "learning_rate": 0.00010627888019381097,
      "loss": 3.0065,
      "step": 166685
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.557929515838623,
      "learning_rate": 0.00010627575682278891,
      "loss": 3.0257,
      "step": 166686
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.861098289489746,
      "learning_rate": 0.00010627263348778385,
      "loss": 2.9459,
      "step": 166687
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.301917791366577,
      "learning_rate": 0.00010626951018879625,
      "loss": 2.8419,
      "step": 166688
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2896106243133545,
      "learning_rate": 0.00010626638692582692,
      "loss": 2.7874,
      "step": 166689
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.233243227005005,
      "learning_rate": 0.0001062632636988762,
      "loss": 2.942,
      "step": 166690
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.3660504817962646,
      "learning_rate": 0.00010626014050794492,
      "loss": 3.2498,
      "step": 166691
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.2728092670440674,
      "learning_rate": 0.00010625701735303353,
      "loss": 3.0635,
      "step": 166692
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5308353900909424,
      "learning_rate": 0.00010625389423414263,
      "loss": 3.0742,
      "step": 166693
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2774691581726074,
      "learning_rate": 0.00010625077115127269,
      "loss": 2.8011,
      "step": 166694
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7380003929138184,
      "learning_rate": 0.00010624764810442446,
      "loss": 2.8584,
      "step": 166695
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5329887866973877,
      "learning_rate": 0.00010624452509359837,
      "loss": 2.9164,
      "step": 166696
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.1467578411102295,
      "learning_rate": 0.00010624140211879519,
      "loss": 3.0822,
      "step": 166697
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.190897464752197,
      "learning_rate": 0.00010623827918001534,
      "loss": 2.9443,
      "step": 166698
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5588741302490234,
      "learning_rate": 0.00010623515627725951,
      "loss": 3.0603,
      "step": 166699
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.195424795150757,
      "learning_rate": 0.00010623203341052811,
      "loss": 2.8363,
      "step": 166700
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.532194137573242,
      "learning_rate": 0.00010622891057982191,
      "loss": 2.8967,
      "step": 166701
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.553734302520752,
      "learning_rate": 0.0001062257877851413,
      "loss": 3.0995,
      "step": 166702
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.548959970474243,
      "learning_rate": 0.00010622266502648711,
      "loss": 3.1191,
      "step": 166703
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.424562931060791,
      "learning_rate": 0.0001062195423038598,
      "loss": 2.8639,
      "step": 166704
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.127753973007202,
      "learning_rate": 0.00010621641961725979,
      "loss": 2.9637,
      "step": 166705
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3459877967834473,
      "learning_rate": 0.00010621329696668792,
      "loss": 3.0102,
      "step": 166706
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.1258106231689453,
      "learning_rate": 0.00010621017435214468,
      "loss": 2.7597,
      "step": 166707
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.497770071029663,
      "learning_rate": 0.00010620705177363048,
      "loss": 2.9772,
      "step": 166708
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4875309467315674,
      "learning_rate": 0.00010620392923114617,
      "loss": 2.8283,
      "step": 166709
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.298142433166504,
      "learning_rate": 0.00010620080672469224,
      "loss": 2.7821,
      "step": 166710
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.634979248046875,
      "learning_rate": 0.00010619768425426912,
      "loss": 3.0451,
      "step": 166711
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3257944583892822,
      "learning_rate": 0.00010619456181987756,
      "loss": 3.1298,
      "step": 166712
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5459413528442383,
      "learning_rate": 0.00010619143942151813,
      "loss": 2.8018,
      "step": 166713
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.1987125873565674,
      "learning_rate": 0.00010618831705919127,
      "loss": 2.858,
      "step": 166714
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.207458257675171,
      "learning_rate": 0.00010618519473289776,
      "loss": 3.1823,
      "step": 166715
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.522200345993042,
      "learning_rate": 0.00010618207244263809,
      "loss": 2.9134,
      "step": 166716
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.995447874069214,
      "learning_rate": 0.00010617895018841273,
      "loss": 3.1624,
      "step": 166717
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.1175119876861572,
      "learning_rate": 0.00010617582797022243,
      "loss": 2.8194,
      "step": 166718
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.4905712604522705,
      "learning_rate": 0.00010617270578806762,
      "loss": 3.1519,
      "step": 166719
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.871145486831665,
      "learning_rate": 0.00010616958364194908,
      "loss": 3.1365,
      "step": 166720
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.7195241451263428,
      "learning_rate": 0.00010616646153186725,
      "loss": 2.9619,
      "step": 166721
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.941150188446045,
      "learning_rate": 0.00010616333945782277,
      "loss": 2.8644,
      "step": 166722
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.386112689971924,
      "learning_rate": 0.00010616021741981604,
      "loss": 3.1542,
      "step": 166723
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.639228343963623,
      "learning_rate": 0.00010615709541784793,
      "loss": 3.0593,
      "step": 166724
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4784035682678223,
      "learning_rate": 0.00010615397345191872,
      "loss": 2.9963,
      "step": 166725
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5594522953033447,
      "learning_rate": 0.00010615085152202928,
      "loss": 2.8283,
      "step": 166726
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.2517402172088623,
      "learning_rate": 0.00010614772962817995,
      "loss": 2.9866,
      "step": 166727
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8658270835876465,
      "learning_rate": 0.00010614460777037162,
      "loss": 2.9081,
      "step": 166728
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.913717746734619,
      "learning_rate": 0.00010614148594860445,
      "loss": 2.8705,
      "step": 166729
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3678925037384033,
      "learning_rate": 0.00010613836416287937,
      "loss": 2.8703,
      "step": 166730
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.3928346633911133,
      "learning_rate": 0.0001061352424131967,
      "loss": 2.6479,
      "step": 166731
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4135653972625732,
      "learning_rate": 0.00010613212069955725,
      "loss": 2.6832,
      "step": 166732
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.171067237854004,
      "learning_rate": 0.0001061289990219614,
      "loss": 3.1044,
      "step": 166733
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.453265428543091,
      "learning_rate": 0.00010612587738041006,
      "loss": 2.8139,
      "step": 166734
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.9493820667266846,
      "learning_rate": 0.00010612275577490334,
      "loss": 3.1843,
      "step": 166735
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.1478641033172607,
      "learning_rate": 0.00010611963420544216,
      "loss": 3.0083,
      "step": 166736
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0486867427825928,
      "learning_rate": 0.0001061165126720269,
      "loss": 2.9135,
      "step": 166737
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.630544900894165,
      "learning_rate": 0.00010611339117465834,
      "loss": 3.0142,
      "step": 166738
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.349201202392578,
      "learning_rate": 0.0001061102697133369,
      "loss": 2.9364,
      "step": 166739
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.458824872970581,
      "learning_rate": 0.00010610714828806337,
      "loss": 2.9988,
      "step": 166740
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8150722980499268,
      "learning_rate": 0.00010610402689883801,
      "loss": 2.9896,
      "step": 166741
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.427165985107422,
      "learning_rate": 0.00010610090554566168,
      "loss": 3.0078,
      "step": 166742
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8521690368652344,
      "learning_rate": 0.00010609778422853472,
      "loss": 2.5345,
      "step": 166743
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8937268257141113,
      "learning_rate": 0.00010609466294745795,
      "loss": 2.9264,
      "step": 166744
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.04189133644104,
      "learning_rate": 0.00010609154170243174,
      "loss": 3.0543,
      "step": 166745
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6940555572509766,
      "learning_rate": 0.00010608842049345701,
      "loss": 2.8357,
      "step": 166746
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2193431854248047,
      "learning_rate": 0.00010608529932053389,
      "loss": 2.839,
      "step": 166747
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6562957763671875,
      "learning_rate": 0.00010608217818366324,
      "loss": 2.9495,
      "step": 166748
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.227017402648926,
      "learning_rate": 0.00010607905708284552,
      "loss": 2.9063,
      "step": 166749
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.697767972946167,
      "learning_rate": 0.0001060759360180814,
      "loss": 2.8903,
      "step": 166750
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.582862615585327,
      "learning_rate": 0.00010607281498937137,
      "loss": 3.0496,
      "step": 166751
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.089033842086792,
      "learning_rate": 0.00010606969399671615,
      "loss": 3.0118,
      "step": 166752
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.272517681121826,
      "learning_rate": 0.00010606657304011626,
      "loss": 2.6401,
      "step": 166753
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.65301251411438,
      "learning_rate": 0.00010606345211957224,
      "loss": 2.7297,
      "step": 166754
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4596667289733887,
      "learning_rate": 0.00010606033123508457,
      "loss": 3.0295,
      "step": 166755
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.9748735427856445,
      "learning_rate": 0.00010605721038665404,
      "loss": 3.0576,
      "step": 166756
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5183942317962646,
      "learning_rate": 0.00010605408957428106,
      "loss": 3.157,
      "step": 166757
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.615858554840088,
      "learning_rate": 0.00010605096879796639,
      "loss": 3.0111,
      "step": 166758
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.28997802734375,
      "learning_rate": 0.00010604784805771048,
      "loss": 3.0581,
      "step": 166759
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.640836238861084,
      "learning_rate": 0.00010604472735351394,
      "loss": 2.7557,
      "step": 166760
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.9807651042938232,
      "learning_rate": 0.00010604160668537728,
      "loss": 2.7052,
      "step": 166761
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.804809093475342,
      "learning_rate": 0.0001060384860533012,
      "loss": 2.8137,
      "step": 166762
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6639351844787598,
      "learning_rate": 0.00010603536545728613,
      "loss": 3.0249,
      "step": 166763
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.407580852508545,
      "learning_rate": 0.0001060322448973329,
      "loss": 2.9583,
      "step": 166764
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2608561515808105,
      "learning_rate": 0.00010602912437344188,
      "loss": 3.0076,
      "step": 166765
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.1188435554504395,
      "learning_rate": 0.00010602600388561375,
      "loss": 2.9036,
      "step": 166766
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.299286842346191,
      "learning_rate": 0.00010602288343384892,
      "loss": 2.8209,
      "step": 166767
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.443023681640625,
      "learning_rate": 0.00010601976301814819,
      "loss": 3.0297,
      "step": 166768
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6749422550201416,
      "learning_rate": 0.00010601664263851198,
      "loss": 2.9461,
      "step": 166769
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3227527141571045,
      "learning_rate": 0.00010601352229494102,
      "loss": 2.8537,
      "step": 166770
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5979628562927246,
      "learning_rate": 0.00010601040198743581,
      "loss": 2.9429,
      "step": 166771
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.672680616378784,
      "learning_rate": 0.00010600728171599692,
      "loss": 2.9635,
      "step": 166772
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.576320171356201,
      "learning_rate": 0.00010600416148062483,
      "loss": 3.1626,
      "step": 166773
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2155017852783203,
      "learning_rate": 0.00010600104128132034,
      "loss": 2.7817,
      "step": 166774
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3159964084625244,
      "learning_rate": 0.00010599792111808383,
      "loss": 2.9255,
      "step": 166775
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6354777812957764,
      "learning_rate": 0.00010599480099091607,
      "loss": 2.9555,
      "step": 166776
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.102353811264038,
      "learning_rate": 0.00010599168089981755,
      "loss": 3.0139,
      "step": 166777
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4523508548736572,
      "learning_rate": 0.00010598856084478881,
      "loss": 2.7603,
      "step": 166778
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7907557487487793,
      "learning_rate": 0.00010598544082583034,
      "loss": 3.2322,
      "step": 166779
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.268176317214966,
      "learning_rate": 0.00010598232084294297,
      "loss": 2.9651,
      "step": 166780
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.0503127574920654,
      "learning_rate": 0.00010597920089612704,
      "loss": 3.0226,
      "step": 166781
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4684560298919678,
      "learning_rate": 0.00010597608098538335,
      "loss": 2.9535,
      "step": 166782
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.597313165664673,
      "learning_rate": 0.00010597296111071237,
      "loss": 2.9099,
      "step": 166783
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6302719116210938,
      "learning_rate": 0.00010596984127211468,
      "loss": 2.7374,
      "step": 166784
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.917325258255005,
      "learning_rate": 0.00010596672146959072,
      "loss": 2.9914,
      "step": 166785
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4726574420928955,
      "learning_rate": 0.00010596360170314135,
      "loss": 3.0815,
      "step": 166786
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.657043218612671,
      "learning_rate": 0.00010596048197276691,
      "loss": 3.0915,
      "step": 166787
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.867861747741699,
      "learning_rate": 0.00010595736227846815,
      "loss": 2.9418,
      "step": 166788
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4323956966400146,
      "learning_rate": 0.0001059542426202456,
      "loss": 2.9282,
      "step": 166789
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.071364641189575,
      "learning_rate": 0.00010595112299809983,
      "loss": 3.0137,
      "step": 166790
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4610729217529297,
      "learning_rate": 0.00010594800341203129,
      "loss": 2.9368,
      "step": 166791
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3545870780944824,
      "learning_rate": 0.0001059448838620408,
      "loss": 3.0297,
      "step": 166792
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5193192958831787,
      "learning_rate": 0.00010594176434812868,
      "loss": 3.0073,
      "step": 166793
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.915400743484497,
      "learning_rate": 0.00010593864487029577,
      "loss": 2.5974,
      "step": 166794
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.399667978286743,
      "learning_rate": 0.00010593552542854252,
      "loss": 2.9673,
      "step": 166795
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8622701168060303,
      "learning_rate": 0.00010593240602286945,
      "loss": 2.9444,
      "step": 166796
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.697082996368408,
      "learning_rate": 0.0001059292866532773,
      "loss": 2.792,
      "step": 166797
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.5832431316375732,
      "learning_rate": 0.00010592616731976654,
      "loss": 2.9836,
      "step": 166798
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.157351493835449,
      "learning_rate": 0.00010592304802233768,
      "loss": 2.9084,
      "step": 166799
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.477285385131836,
      "learning_rate": 0.0001059199287609915,
      "loss": 2.8099,
      "step": 166800
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.451758623123169,
      "learning_rate": 0.00010591680953572844,
      "loss": 2.7123,
      "step": 166801
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.2599213123321533,
      "learning_rate": 0.00010591369034654902,
      "loss": 2.9687,
      "step": 166802
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5326344966888428,
      "learning_rate": 0.00010591057119345402,
      "loss": 3.1365,
      "step": 166803
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.72300124168396,
      "learning_rate": 0.00010590745207644377,
      "loss": 3.0301,
      "step": 166804
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.392561674118042,
      "learning_rate": 0.00010590433299551913,
      "loss": 3.1101,
      "step": 166805
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.292567253112793,
      "learning_rate": 0.00010590121395068052,
      "loss": 2.8142,
      "step": 166806
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4046695232391357,
      "learning_rate": 0.00010589809494192855,
      "loss": 2.9672,
      "step": 166807
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.778992176055908,
      "learning_rate": 0.00010589497596926371,
      "loss": 3.1263,
      "step": 166808
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.785503625869751,
      "learning_rate": 0.00010589185703268671,
      "loss": 2.7163,
      "step": 166809
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.494874954223633,
      "learning_rate": 0.00010588873813219801,
      "loss": 3.0692,
      "step": 166810
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.214709520339966,
      "learning_rate": 0.00010588561926779836,
      "loss": 3.1594,
      "step": 166811
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6992201805114746,
      "learning_rate": 0.00010588250043948822,
      "loss": 2.795,
      "step": 166812
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.20306658744812,
      "learning_rate": 0.0001058793816472682,
      "loss": 3.0226,
      "step": 166813
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7171854972839355,
      "learning_rate": 0.00010587626289113875,
      "loss": 3.043,
      "step": 166814
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4353315830230713,
      "learning_rate": 0.00010587314417110067,
      "loss": 2.9618,
      "step": 166815
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2586753368377686,
      "learning_rate": 0.00010587002548715434,
      "loss": 2.8021,
      "step": 166816
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.9721744060516357,
      "learning_rate": 0.00010586690683930054,
      "loss": 2.8659,
      "step": 166817
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.558682918548584,
      "learning_rate": 0.00010586378822753965,
      "loss": 2.8583,
      "step": 166818
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.339397668838501,
      "learning_rate": 0.00010586066965187244,
      "loss": 3.0743,
      "step": 166819
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5547354221343994,
      "learning_rate": 0.0001058575511122994,
      "loss": 3.0205,
      "step": 166820
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.866870403289795,
      "learning_rate": 0.00010585443260882111,
      "loss": 2.9378,
      "step": 166821
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3540496826171875,
      "learning_rate": 0.00010585131414143806,
      "loss": 3.0715,
      "step": 166822
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.762864589691162,
      "learning_rate": 0.000105848195710151,
      "loss": 2.4746,
      "step": 166823
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.463667392730713,
      "learning_rate": 0.00010584507731496033,
      "loss": 2.8371,
      "step": 166824
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.1720447540283203,
      "learning_rate": 0.00010584195895586681,
      "loss": 3.0848,
      "step": 166825
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6553022861480713,
      "learning_rate": 0.00010583884063287093,
      "loss": 2.9752,
      "step": 166826
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.7537410259246826,
      "learning_rate": 0.0001058357223459733,
      "loss": 3.1122,
      "step": 166827
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.291166305541992,
      "learning_rate": 0.00010583260409517435,
      "loss": 2.6929,
      "step": 166828
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7389988899230957,
      "learning_rate": 0.00010582948588047491,
      "loss": 3.0758,
      "step": 166829
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3810184001922607,
      "learning_rate": 0.00010582636770187531,
      "loss": 3.2547,
      "step": 166830
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.3915271759033203,
      "learning_rate": 0.00010582324955937634,
      "loss": 2.8856,
      "step": 166831
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.486921548843384,
      "learning_rate": 0.00010582013145297853,
      "loss": 2.9106,
      "step": 166832
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6135499477386475,
      "learning_rate": 0.0001058170133826824,
      "loss": 2.9451,
      "step": 166833
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.863987922668457,
      "learning_rate": 0.00010581389534848846,
      "loss": 2.8858,
      "step": 166834
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.533473253250122,
      "learning_rate": 0.00010581077735039752,
      "loss": 2.8736,
      "step": 166835
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.107095241546631,
      "learning_rate": 0.00010580765938840985,
      "loss": 2.9859,
      "step": 166836
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7575361728668213,
      "learning_rate": 0.00010580454146252634,
      "loss": 2.8574,
      "step": 166837
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7022573947906494,
      "learning_rate": 0.00010580142357274745,
      "loss": 2.7067,
      "step": 166838
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5367066860198975,
      "learning_rate": 0.0001057983057190737,
      "loss": 2.5852,
      "step": 166839
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3751494884490967,
      "learning_rate": 0.00010579518790150565,
      "loss": 2.8724,
      "step": 166840
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4001941680908203,
      "learning_rate": 0.00010579207012004401,
      "loss": 2.9666,
      "step": 166841
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.9836859703063965,
      "learning_rate": 0.0001057889523746892,
      "loss": 2.8621,
      "step": 166842
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5961222648620605,
      "learning_rate": 0.00010578583466544199,
      "loss": 2.5966,
      "step": 166843
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5951449871063232,
      "learning_rate": 0.00010578271699230288,
      "loss": 3.0066,
      "step": 166844
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.039523124694824,
      "learning_rate": 0.00010577959935527241,
      "loss": 3.2691,
      "step": 166845
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.9088354110717773,
      "learning_rate": 0.00010577648175435108,
      "loss": 2.9622,
      "step": 166846
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7608323097229004,
      "learning_rate": 0.00010577336418953964,
      "loss": 3.0166,
      "step": 166847
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.899111270904541,
      "learning_rate": 0.00010577024666083855,
      "loss": 3.0068,
      "step": 166848
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2582993507385254,
      "learning_rate": 0.0001057671291682485,
      "loss": 2.9373,
      "step": 166849
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.013740539550781,
      "learning_rate": 0.00010576401171177001,
      "loss": 2.9089,
      "step": 166850
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6695895195007324,
      "learning_rate": 0.0001057608942914037,
      "loss": 3.1014,
      "step": 166851
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.276820659637451,
      "learning_rate": 0.00010575777690714997,
      "loss": 2.9712,
      "step": 166852
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0738494396209717,
      "learning_rate": 0.00010575465955900964,
      "loss": 2.8931,
      "step": 166853
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8110299110412598,
      "learning_rate": 0.00010575154224698306,
      "loss": 3.0213,
      "step": 166854
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.982574939727783,
      "learning_rate": 0.00010574842497107106,
      "loss": 2.7688,
      "step": 166855
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5818121433258057,
      "learning_rate": 0.0001057453077312741,
      "loss": 2.8598,
      "step": 166856
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2160019874572754,
      "learning_rate": 0.00010574219052759276,
      "loss": 2.9556,
      "step": 166857
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.462893009185791,
      "learning_rate": 0.00010573907336002748,
      "loss": 2.8983,
      "step": 166858
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.412480115890503,
      "learning_rate": 0.00010573595622857912,
      "loss": 3.063,
      "step": 166859
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.428920269012451,
      "learning_rate": 0.00010573283913324796,
      "loss": 3.0624,
      "step": 166860
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2359492778778076,
      "learning_rate": 0.00010572972207403486,
      "loss": 3.0102,
      "step": 166861
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5711050033569336,
      "learning_rate": 0.0001057266050509403,
      "loss": 3.114,
      "step": 166862
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.941951274871826,
      "learning_rate": 0.0001057234880639648,
      "loss": 2.7781,
      "step": 166863
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.001430034637451,
      "learning_rate": 0.00010572037111310884,
      "loss": 2.8045,
      "step": 166864
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.807069778442383,
      "learning_rate": 0.0001057172541983733,
      "loss": 3.1531,
      "step": 166865
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5751776695251465,
      "learning_rate": 0.00010571413731975843,
      "loss": 2.9129,
      "step": 166866
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.1114468574523926,
      "learning_rate": 0.0001057110204772651,
      "loss": 3.0049,
      "step": 166867
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4396488666534424,
      "learning_rate": 0.00010570790367089377,
      "loss": 2.7691,
      "step": 166868
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.464695692062378,
      "learning_rate": 0.00010570478690064498,
      "loss": 2.7193,
      "step": 166869
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.288486957550049,
      "learning_rate": 0.00010570167016651925,
      "loss": 3.2111,
      "step": 166870
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6583127975463867,
      "learning_rate": 0.00010569855346851734,
      "loss": 2.9096,
      "step": 166871
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.582589864730835,
      "learning_rate": 0.00010569543680663963,
      "loss": 2.9221,
      "step": 166872
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.86055850982666,
      "learning_rate": 0.00010569232018088692,
      "loss": 2.818,
      "step": 166873
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3919284343719482,
      "learning_rate": 0.00010568920359125968,
      "loss": 2.9672,
      "step": 166874
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.561688184738159,
      "learning_rate": 0.00010568608703775849,
      "loss": 2.799,
      "step": 166875
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0931994915008545,
      "learning_rate": 0.00010568297052038381,
      "loss": 2.5174,
      "step": 166876
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0141146183013916,
      "learning_rate": 0.00010567985403913645,
      "loss": 2.6244,
      "step": 166877
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4111242294311523,
      "learning_rate": 0.00010567673759401674,
      "loss": 2.9865,
      "step": 166878
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0893683433532715,
      "learning_rate": 0.00010567362118502551,
      "loss": 3.0293,
      "step": 166879
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.238896131515503,
      "learning_rate": 0.00010567050481216322,
      "loss": 3.0171,
      "step": 166880
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.42615008354187,
      "learning_rate": 0.00010566738847543035,
      "loss": 2.7117,
      "step": 166881
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.194387674331665,
      "learning_rate": 0.00010566427217482772,
      "loss": 2.9963,
      "step": 166882
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.113060474395752,
      "learning_rate": 0.00010566115591035572,
      "loss": 2.8182,
      "step": 166883
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7712225914001465,
      "learning_rate": 0.0001056580396820149,
      "loss": 2.8363,
      "step": 166884
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4333794116973877,
      "learning_rate": 0.000105654923489806,
      "loss": 2.7038,
      "step": 166885
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6623783111572266,
      "learning_rate": 0.00010565180733372946,
      "loss": 3.0223,
      "step": 166886
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.268831729888916,
      "learning_rate": 0.00010564869121378597,
      "loss": 2.9964,
      "step": 166887
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5380001068115234,
      "learning_rate": 0.0001056455751299761,
      "loss": 2.9487,
      "step": 166888
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.9424808025360107,
      "learning_rate": 0.00010564245908230024,
      "loss": 2.9427,
      "step": 166889
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.676443338394165,
      "learning_rate": 0.00010563934307075924,
      "loss": 2.928,
      "step": 166890
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7247958183288574,
      "learning_rate": 0.00010563622709535358,
      "loss": 2.9613,
      "step": 166891
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3952248096466064,
      "learning_rate": 0.00010563311115608367,
      "loss": 3.0919,
      "step": 166892
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.714097261428833,
      "learning_rate": 0.00010562999525295037,
      "loss": 3.064,
      "step": 166893
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.915379285812378,
      "learning_rate": 0.00010562687938595408,
      "loss": 2.9624,
      "step": 166894
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.091935157775879,
      "learning_rate": 0.00010562376355509536,
      "loss": 2.9864,
      "step": 166895
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7279727458953857,
      "learning_rate": 0.00010562064776037492,
      "loss": 2.8025,
      "step": 166896
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.758484125137329,
      "learning_rate": 0.00010561753200179332,
      "loss": 2.8848,
      "step": 166897
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.440577983856201,
      "learning_rate": 0.00010561441627935094,
      "loss": 3.2442,
      "step": 166898
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.8240253925323486,
      "learning_rate": 0.00010561130059304864,
      "loss": 2.728,
      "step": 166899
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4860880374908447,
      "learning_rate": 0.00010560818494288685,
      "loss": 2.9803,
      "step": 166900
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.5948662757873535,
      "learning_rate": 0.00010560506932886607,
      "loss": 2.9241,
      "step": 166901
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5635101795196533,
      "learning_rate": 0.00010560195375098708,
      "loss": 3.0712,
      "step": 166902
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.4660282135009766,
      "learning_rate": 0.00010559883820925027,
      "loss": 3.1005,
      "step": 166903
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.725180625915527,
      "learning_rate": 0.00010559572270365641,
      "loss": 2.7218,
      "step": 166904
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7005386352539062,
      "learning_rate": 0.00010559260723420595,
      "loss": 2.921,
      "step": 166905
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.198861837387085,
      "learning_rate": 0.00010558949180089952,
      "loss": 3.1245,
      "step": 166906
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8515172004699707,
      "learning_rate": 0.00010558637640373754,
      "loss": 2.9846,
      "step": 166907
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.2788681983947754,
      "learning_rate": 0.00010558326104272084,
      "loss": 3.2309,
      "step": 166908
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2249538898468018,
      "learning_rate": 0.00010558014571784978,
      "loss": 2.9062,
      "step": 166909
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6804277896881104,
      "learning_rate": 0.00010557703042912516,
      "loss": 2.9058,
      "step": 166910
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.281177043914795,
      "learning_rate": 0.00010557391517654742,
      "loss": 3.1511,
      "step": 166911
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.071514844894409,
      "learning_rate": 0.00010557079996011713,
      "loss": 2.9289,
      "step": 166912
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2764227390289307,
      "learning_rate": 0.00010556768477983483,
      "loss": 2.8838,
      "step": 166913
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.602789878845215,
      "learning_rate": 0.00010556456963570129,
      "loss": 2.7667,
      "step": 166914
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.23288893699646,
      "learning_rate": 0.00010556145452771683,
      "loss": 3.0803,
      "step": 166915
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.632768630981445,
      "learning_rate": 0.00010555833945588228,
      "loss": 2.8831,
      "step": 166916
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.6089253425598145,
      "learning_rate": 0.00010555522442019811,
      "loss": 2.6544,
      "step": 166917
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.924603223800659,
      "learning_rate": 0.00010555210942066489,
      "loss": 3.0093,
      "step": 166918
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.636979818344116,
      "learning_rate": 0.0001055489944572831,
      "loss": 2.961,
      "step": 166919
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.801783323287964,
      "learning_rate": 0.00010554587953005354,
      "loss": 3.1009,
      "step": 166920
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.9855048656463623,
      "learning_rate": 0.00010554276463897654,
      "loss": 3.2739,
      "step": 166921
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.412391662597656,
      "learning_rate": 0.00010553964978405295,
      "loss": 2.9467,
      "step": 166922
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.106393814086914,
      "learning_rate": 0.00010553653496528317,
      "loss": 2.7961,
      "step": 166923
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6187164783477783,
      "learning_rate": 0.00010553342018266784,
      "loss": 3.1704,
      "step": 166924
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3579013347625732,
      "learning_rate": 0.00010553030543620741,
      "loss": 3.0879,
      "step": 166925
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.533271312713623,
      "learning_rate": 0.00010552719072590268,
      "loss": 2.6573,
      "step": 166926
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.384230852127075,
      "learning_rate": 0.00010552407605175402,
      "loss": 3.0728,
      "step": 166927
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4916605949401855,
      "learning_rate": 0.00010552096141376217,
      "loss": 3.0477,
      "step": 166928
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.1661574840545654,
      "learning_rate": 0.00010551784681192768,
      "loss": 2.8811,
      "step": 166929
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8163554668426514,
      "learning_rate": 0.0001055147322462511,
      "loss": 3.1033,
      "step": 166930
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.31170916557312,
      "learning_rate": 0.00010551161771673287,
      "loss": 3.0613,
      "step": 166931
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.890272617340088,
      "learning_rate": 0.00010550850322337379,
      "loss": 3.1095,
      "step": 166932
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6858932971954346,
      "learning_rate": 0.00010550538876617428,
      "loss": 3.0257,
      "step": 166933
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4172234535217285,
      "learning_rate": 0.00010550227434513509,
      "loss": 2.8988,
      "step": 166934
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.541090726852417,
      "learning_rate": 0.0001054991599602567,
      "loss": 2.6859,
      "step": 166935
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3470728397369385,
      "learning_rate": 0.00010549604561153969,
      "loss": 2.9851,
      "step": 166936
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.877274990081787,
      "learning_rate": 0.0001054929312989845,
      "loss": 3.0648,
      "step": 166937
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.2548792362213135,
      "learning_rate": 0.000105489817022592,
      "loss": 3.094,
      "step": 166938
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.9704480171203613,
      "learning_rate": 0.00010548670278236247,
      "loss": 2.8554,
      "step": 166939
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6619653701782227,
      "learning_rate": 0.00010548358857829676,
      "loss": 2.8765,
      "step": 166940
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.947996139526367,
      "learning_rate": 0.0001054804744103953,
      "loss": 3.1885,
      "step": 166941
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.11175537109375,
      "learning_rate": 0.00010547736027865872,
      "loss": 2.991,
      "step": 166942
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.096435546875,
      "learning_rate": 0.00010547424618308744,
      "loss": 2.8383,
      "step": 166943
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6241533756256104,
      "learning_rate": 0.0001054711321236823,
      "loss": 3.1016,
      "step": 166944
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7634310722351074,
      "learning_rate": 0.00010546801810044363,
      "loss": 3.0853,
      "step": 166945
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.699467420578003,
      "learning_rate": 0.00010546490411337223,
      "loss": 3.0208,
      "step": 166946
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.754502534866333,
      "learning_rate": 0.00010546179016246859,
      "loss": 3.0097,
      "step": 166947
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.599574089050293,
      "learning_rate": 0.00010545867624773326,
      "loss": 2.918,
      "step": 166948
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.9108734130859375,
      "learning_rate": 0.00010545556236916674,
      "loss": 2.7344,
      "step": 166949
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5594236850738525,
      "learning_rate": 0.00010545244852676981,
      "loss": 3.0856,
      "step": 166950
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0946857929229736,
      "learning_rate": 0.00010544933472054281,
      "loss": 2.7883,
      "step": 166951
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.281735420227051,
      "learning_rate": 0.00010544622095048658,
      "loss": 2.9649,
      "step": 166952
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8997294902801514,
      "learning_rate": 0.00010544310721660148,
      "loss": 2.9281,
      "step": 166953
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.0915982723236084,
      "learning_rate": 0.00010543999351888835,
      "loss": 2.8756,
      "step": 166954
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.658463716506958,
      "learning_rate": 0.00010543687985734742,
      "loss": 2.7937,
      "step": 166955
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.672372341156006,
      "learning_rate": 0.00010543376623197955,
      "loss": 2.9905,
      "step": 166956
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7930796146392822,
      "learning_rate": 0.00010543065264278508,
      "loss": 2.7554,
      "step": 166957
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6422905921936035,
      "learning_rate": 0.00010542753908976487,
      "loss": 2.8403,
      "step": 166958
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.11873197555542,
      "learning_rate": 0.00010542442557291923,
      "loss": 2.6672,
      "step": 166959
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.893474102020264,
      "learning_rate": 0.00010542131209224905,
      "loss": 2.8453,
      "step": 166960
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.02885627746582,
      "learning_rate": 0.00010541819864775454,
      "loss": 3.1077,
      "step": 166961
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.9178640842437744,
      "learning_rate": 0.00010541508523943654,
      "loss": 2.5549,
      "step": 166962
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.740734100341797,
      "learning_rate": 0.00010541197186729549,
      "loss": 2.7432,
      "step": 166963
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5347328186035156,
      "learning_rate": 0.00010540885853133209,
      "loss": 2.7192,
      "step": 166964
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7305197715759277,
      "learning_rate": 0.00010540574523154677,
      "loss": 2.9887,
      "step": 166965
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.856109619140625,
      "learning_rate": 0.0001054026319679403,
      "loss": 3.0096,
      "step": 166966
    },
    {
      "epoch": 2.17,
      "grad_norm": 6.150521755218506,
      "learning_rate": 0.00010539951874051315,
      "loss": 2.7923,
      "step": 166967
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.902294635772705,
      "learning_rate": 0.00010539640554926589,
      "loss": 2.7508,
      "step": 166968
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4024901390075684,
      "learning_rate": 0.00010539329239419901,
      "loss": 3.0009,
      "step": 166969
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5981171131134033,
      "learning_rate": 0.00010539017927531334,
      "loss": 2.6832,
      "step": 166970
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.50970196723938,
      "learning_rate": 0.00010538706619260915,
      "loss": 2.7635,
      "step": 166971
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.669346809387207,
      "learning_rate": 0.00010538395314608729,
      "loss": 2.6954,
      "step": 166972
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8260014057159424,
      "learning_rate": 0.00010538084013574824,
      "loss": 2.8239,
      "step": 166973
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.9086647033691406,
      "learning_rate": 0.00010537772716159257,
      "loss": 3.0495,
      "step": 166974
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.27112078666687,
      "learning_rate": 0.00010537461422362073,
      "loss": 2.8765,
      "step": 166975
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.1964192390441895,
      "learning_rate": 0.00010537150132183357,
      "loss": 3.0118,
      "step": 166976
    },
    {
      "epoch": 2.17,
      "grad_norm": 4.394458770751953,
      "learning_rate": 0.00010536838845623137,
      "loss": 2.9373,
      "step": 166977
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6161692142486572,
      "learning_rate": 0.00010536527562681498,
      "loss": 2.8412,
      "step": 166978
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.9859561920166016,
      "learning_rate": 0.00010536216283358487,
      "loss": 2.7316,
      "step": 166979
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.296076536178589,
      "learning_rate": 0.00010535905007654148,
      "loss": 2.9791,
      "step": 166980
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4675161838531494,
      "learning_rate": 0.00010535593735568565,
      "loss": 3.0218,
      "step": 166981
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.2479257583618164,
      "learning_rate": 0.0001053528246710178,
      "loss": 2.9833,
      "step": 166982
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4935848712921143,
      "learning_rate": 0.00010534971202253847,
      "loss": 2.7637,
      "step": 166983
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.316556215286255,
      "learning_rate": 0.00010534659941024836,
      "loss": 3.1402,
      "step": 166984
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7584054470062256,
      "learning_rate": 0.000105343486834148,
      "loss": 3.1779,
      "step": 166985
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.225154399871826,
      "learning_rate": 0.00010534037429423789,
      "loss": 3.0293,
      "step": 166986
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.733344793319702,
      "learning_rate": 0.00010533726179051876,
      "loss": 2.6359,
      "step": 166987
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4459643363952637,
      "learning_rate": 0.00010533414932299098,
      "loss": 2.9839,
      "step": 166988
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.471564531326294,
      "learning_rate": 0.00010533103689165538,
      "loss": 3.1427,
      "step": 166989
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4623541831970215,
      "learning_rate": 0.00010532792449651244,
      "loss": 2.9017,
      "step": 166990
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7247791290283203,
      "learning_rate": 0.00010532481213756268,
      "loss": 2.693,
      "step": 166991
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3848989009857178,
      "learning_rate": 0.00010532169981480663,
      "loss": 3.1057,
      "step": 166992
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3416736125946045,
      "learning_rate": 0.00010531858752824502,
      "loss": 3.0165,
      "step": 166993
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6786866188049316,
      "learning_rate": 0.00010531547527787831,
      "loss": 2.9608,
      "step": 166994
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.9977951049804688,
      "learning_rate": 0.00010531236306370719,
      "loss": 3.0652,
      "step": 166995
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.479656457901001,
      "learning_rate": 0.0001053092508857322,
      "loss": 2.9675,
      "step": 166996
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4035520553588867,
      "learning_rate": 0.00010530613874395388,
      "loss": 2.7467,
      "step": 166997
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6881368160247803,
      "learning_rate": 0.0001053030266383727,
      "loss": 3.0872,
      "step": 166998
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.1550416946411133,
      "learning_rate": 0.00010529991456898952,
      "loss": 3.0746,
      "step": 166999
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4932422637939453,
      "learning_rate": 0.00010529680253580463,
      "loss": 3.0601,
      "step": 167000
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.7793707847595215,
      "learning_rate": 0.00010529369053881883,
      "loss": 2.9392,
      "step": 167001
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.8998863697052,
      "learning_rate": 0.00010529057857803264,
      "loss": 2.845,
      "step": 167002
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.9869487285614014,
      "learning_rate": 0.00010528746665344658,
      "loss": 3.0238,
      "step": 167003
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6147758960723877,
      "learning_rate": 0.00010528435476506114,
      "loss": 2.7316,
      "step": 167004
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8937199115753174,
      "learning_rate": 0.00010528124291287713,
      "loss": 2.9998,
      "step": 167005
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.840139627456665,
      "learning_rate": 0.00010527813109689494,
      "loss": 2.9747,
      "step": 167006
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4868719577789307,
      "learning_rate": 0.0001052750193171153,
      "loss": 2.8629,
      "step": 167007
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7399744987487793,
      "learning_rate": 0.00010527190757353868,
      "loss": 2.9583,
      "step": 167008
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8453333377838135,
      "learning_rate": 0.00010526879586616574,
      "loss": 2.8237,
      "step": 167009
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.859398603439331,
      "learning_rate": 0.00010526568419499688,
      "loss": 3.0215,
      "step": 167010
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.8114829063415527,
      "learning_rate": 0.00010526257256003292,
      "loss": 3.0372,
      "step": 167011
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.930388927459717,
      "learning_rate": 0.0001052594609612742,
      "loss": 2.6693,
      "step": 167012
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.1531100273132324,
      "learning_rate": 0.00010525634939872151,
      "loss": 2.8252,
      "step": 167013
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.35975980758667,
      "learning_rate": 0.00010525323787237527,
      "loss": 2.908,
      "step": 167014
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.35680890083313,
      "learning_rate": 0.00010525012638223634,
      "loss": 3.1023,
      "step": 167015
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.261969566345215,
      "learning_rate": 0.00010524701492830487,
      "loss": 3.1027,
      "step": 167016
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.620541572570801,
      "learning_rate": 0.00010524390351058176,
      "loss": 2.7201,
      "step": 167017
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.465388536453247,
      "learning_rate": 0.00010524079212906739,
      "loss": 2.7832,
      "step": 167018
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4674646854400635,
      "learning_rate": 0.00010523768078376253,
      "loss": 2.9413,
      "step": 167019
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4513633251190186,
      "learning_rate": 0.00010523456947466754,
      "loss": 2.8861,
      "step": 167020
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3918418884277344,
      "learning_rate": 0.00010523145820178334,
      "loss": 3.0356,
      "step": 167021
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.0298686027526855,
      "learning_rate": 0.00010522834696511009,
      "loss": 2.7428,
      "step": 167022
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.819672107696533,
      "learning_rate": 0.00010522523576464866,
      "loss": 3.106,
      "step": 167023
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.5873172283172607,
      "learning_rate": 0.00010522212460039943,
      "loss": 2.919,
      "step": 167024
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.783524990081787,
      "learning_rate": 0.00010521901347236321,
      "loss": 3.138,
      "step": 167025
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.4426023960113525,
      "learning_rate": 0.00010521590238054031,
      "loss": 2.7732,
      "step": 167026
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.345268487930298,
      "learning_rate": 0.00010521279132493168,
      "loss": 2.9897,
      "step": 167027
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.3611373901367188,
      "learning_rate": 0.00010520968030553749,
      "loss": 2.7066,
      "step": 167028
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4170756340026855,
      "learning_rate": 0.00010520656932235855,
      "loss": 2.9023,
      "step": 167029
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.6487884521484375,
      "learning_rate": 0.00010520345837539533,
      "loss": 3.1408,
      "step": 167030
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.840597629547119,
      "learning_rate": 0.00010520034746464852,
      "loss": 3.1597,
      "step": 167031
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4772045612335205,
      "learning_rate": 0.00010519723659011857,
      "loss": 2.9269,
      "step": 167032
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.448683977127075,
      "learning_rate": 0.00010519412575180633,
      "loss": 2.8171,
      "step": 167033
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.760451316833496,
      "learning_rate": 0.00010519101494971198,
      "loss": 2.9945,
      "step": 167034
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.4006130695343018,
      "learning_rate": 0.00010518790418383639,
      "loss": 3.2852,
      "step": 167035
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.5807039737701416,
      "learning_rate": 0.00010518479345417995,
      "loss": 2.8678,
      "step": 167036
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.7657411098480225,
      "learning_rate": 0.00010518168276074342,
      "loss": 2.969,
      "step": 167037
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.8519115447998047,
      "learning_rate": 0.0001051785721035272,
      "loss": 2.9231,
      "step": 167038
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.4079835414886475,
      "learning_rate": 0.00010517546148253216,
      "loss": 2.997,
      "step": 167039
    },
    {
      "epoch": 2.17,
      "grad_norm": 3.406144618988037,
      "learning_rate": 0.0001051723508977585,
      "loss": 2.8786,
      "step": 167040
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.611544609069824,
      "learning_rate": 0.00010516924034920706,
      "loss": 2.7462,
      "step": 167041
    },
    {
      "epoch": 2.17,
      "grad_norm": 2.968203544616699,
      "learning_rate": 0.00010516612983687822,
      "loss": 3.2349,
      "step": 167042
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.139268636703491,
      "learning_rate": 0.0001051630193607728,
      "loss": 2.9781,
      "step": 167043
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5287320613861084,
      "learning_rate": 0.00010515990892089115,
      "loss": 2.5902,
      "step": 167044
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3199949264526367,
      "learning_rate": 0.00010515679851723414,
      "loss": 3.1364,
      "step": 167045
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.603574752807617,
      "learning_rate": 0.00010515368814980197,
      "loss": 2.9362,
      "step": 167046
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.129733085632324,
      "learning_rate": 0.00010515057781859551,
      "loss": 2.8947,
      "step": 167047
    },
    {
      "epoch": 2.18,
      "grad_norm": 5.214107990264893,
      "learning_rate": 0.0001051474675236151,
      "loss": 2.9173,
      "step": 167048
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8441433906555176,
      "learning_rate": 0.00010514435726486158,
      "loss": 2.8117,
      "step": 167049
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.440225124359131,
      "learning_rate": 0.00010514124704233533,
      "loss": 3.075,
      "step": 167050
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.385651111602783,
      "learning_rate": 0.00010513813685603706,
      "loss": 2.9676,
      "step": 167051
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.2515807151794434,
      "learning_rate": 0.0001051350267059673,
      "loss": 2.8915,
      "step": 167052
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.986281633377075,
      "learning_rate": 0.00010513191659212664,
      "loss": 2.9912,
      "step": 167053
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6127822399139404,
      "learning_rate": 0.00010512880651451549,
      "loss": 2.9918,
      "step": 167054
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6799864768981934,
      "learning_rate": 0.0001051256964731347,
      "loss": 2.9553,
      "step": 167055
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4190285205841064,
      "learning_rate": 0.00010512258646798462,
      "loss": 3.0991,
      "step": 167056
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6160991191864014,
      "learning_rate": 0.00010511947649906603,
      "loss": 3.0532,
      "step": 167057
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.040311098098755,
      "learning_rate": 0.00010511636656637942,
      "loss": 3.1458,
      "step": 167058
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.099167823791504,
      "learning_rate": 0.00010511325666992534,
      "loss": 2.8665,
      "step": 167059
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.646282196044922,
      "learning_rate": 0.0001051101468097043,
      "loss": 2.9437,
      "step": 167060
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5151238441467285,
      "learning_rate": 0.00010510703698571704,
      "loss": 2.8475,
      "step": 167061
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.9067203998565674,
      "learning_rate": 0.00010510392719796397,
      "loss": 2.9082,
      "step": 167062
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.220634937286377,
      "learning_rate": 0.00010510081744644587,
      "loss": 2.8304,
      "step": 167063
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.557596445083618,
      "learning_rate": 0.0001050977077311632,
      "loss": 3.0354,
      "step": 167064
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.791844129562378,
      "learning_rate": 0.00010509459805211644,
      "loss": 3.0972,
      "step": 167065
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9237353801727295,
      "learning_rate": 0.00010509148840930638,
      "loss": 2.9513,
      "step": 167066
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.7930948734283447,
      "learning_rate": 0.00010508837880273348,
      "loss": 2.9412,
      "step": 167067
    },
    {
      "epoch": 2.18,
      "grad_norm": 5.1764984130859375,
      "learning_rate": 0.00010508526923239824,
      "loss": 2.9852,
      "step": 167068
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.527330160140991,
      "learning_rate": 0.00010508215969830144,
      "loss": 2.6514,
      "step": 167069
    },
    {
      "epoch": 2.18,
      "grad_norm": 5.8999247550964355,
      "learning_rate": 0.00010507905020044351,
      "loss": 3.0753,
      "step": 167070
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4639699459075928,
      "learning_rate": 0.00010507594073882498,
      "loss": 2.921,
      "step": 167071
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.999152898788452,
      "learning_rate": 0.00010507283131344664,
      "loss": 2.9034,
      "step": 167072
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.559926986694336,
      "learning_rate": 0.0001050697219243089,
      "loss": 2.7241,
      "step": 167073
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.7038164138793945,
      "learning_rate": 0.00010506661257141231,
      "loss": 2.8101,
      "step": 167074
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7259230613708496,
      "learning_rate": 0.0001050635032547576,
      "loss": 2.9145,
      "step": 167075
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.384683132171631,
      "learning_rate": 0.00010506039397434525,
      "loss": 3.0871,
      "step": 167076
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.1025805473327637,
      "learning_rate": 0.00010505728473017576,
      "loss": 3.0636,
      "step": 167077
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.234650135040283,
      "learning_rate": 0.0001050541755222499,
      "loss": 3.0077,
      "step": 167078
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3182108402252197,
      "learning_rate": 0.00010505106635056809,
      "loss": 2.7022,
      "step": 167079
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.53580641746521,
      "learning_rate": 0.00010504795721513102,
      "loss": 3.0789,
      "step": 167080
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.255634069442749,
      "learning_rate": 0.00010504484811593925,
      "loss": 2.7307,
      "step": 167081
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.376851797103882,
      "learning_rate": 0.0001050417390529933,
      "loss": 2.8368,
      "step": 167082
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.809631109237671,
      "learning_rate": 0.00010503863002629367,
      "loss": 2.7629,
      "step": 167083
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.427997589111328,
      "learning_rate": 0.00010503552103584111,
      "loss": 3.1364,
      "step": 167084
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.506121873855591,
      "learning_rate": 0.00010503241208163607,
      "loss": 3.1422,
      "step": 167085
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3767659664154053,
      "learning_rate": 0.00010502930316367927,
      "loss": 2.9721,
      "step": 167086
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.463194847106934,
      "learning_rate": 0.00010502619428197113,
      "loss": 2.9734,
      "step": 167087
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.265065908432007,
      "learning_rate": 0.00010502308543651247,
      "loss": 2.9902,
      "step": 167088
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.15895676612854,
      "learning_rate": 0.0001050199766273035,
      "loss": 2.5881,
      "step": 167089
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3146777153015137,
      "learning_rate": 0.00010501686785434511,
      "loss": 3.2147,
      "step": 167090
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.431422472000122,
      "learning_rate": 0.00010501375911763765,
      "loss": 3.1402,
      "step": 167091
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4553635120391846,
      "learning_rate": 0.00010501065041718193,
      "loss": 2.8532,
      "step": 167092
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.884655714035034,
      "learning_rate": 0.00010500754175297831,
      "loss": 2.9323,
      "step": 167093
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4601149559020996,
      "learning_rate": 0.00010500443312502765,
      "loss": 2.8824,
      "step": 167094
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4063174724578857,
      "learning_rate": 0.00010500132453333016,
      "loss": 2.9539,
      "step": 167095
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.1158089637756348,
      "learning_rate": 0.00010499821597788671,
      "loss": 2.983,
      "step": 167096
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5816779136657715,
      "learning_rate": 0.00010499510745869765,
      "loss": 2.9036,
      "step": 167097
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.0874598026275635,
      "learning_rate": 0.0001049919989757638,
      "loss": 2.7043,
      "step": 167098
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.852208375930786,
      "learning_rate": 0.00010498889052908551,
      "loss": 2.9568,
      "step": 167099
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6425986289978027,
      "learning_rate": 0.00010498578211866366,
      "loss": 3.0205,
      "step": 167100
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.23396635055542,
      "learning_rate": 0.00010498267374449846,
      "loss": 2.9859,
      "step": 167101
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9432032108306885,
      "learning_rate": 0.00010497956540659073,
      "loss": 2.6626,
      "step": 167102
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.091050624847412,
      "learning_rate": 0.00010497645710494089,
      "loss": 2.8087,
      "step": 167103
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3851735591888428,
      "learning_rate": 0.00010497334883954972,
      "loss": 2.834,
      "step": 167104
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.055820941925049,
      "learning_rate": 0.00010497024061041758,
      "loss": 2.9992,
      "step": 167105
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9543941020965576,
      "learning_rate": 0.00010496713241754537,
      "loss": 3.1442,
      "step": 167106
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.3692760467529297,
      "learning_rate": 0.00010496402426093325,
      "loss": 3.077,
      "step": 167107
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.44751238822937,
      "learning_rate": 0.00010496091614058206,
      "loss": 2.9724,
      "step": 167108
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8940629959106445,
      "learning_rate": 0.00010495780805649225,
      "loss": 2.829,
      "step": 167109
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.594578266143799,
      "learning_rate": 0.00010495470000866454,
      "loss": 2.9028,
      "step": 167110
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7689833641052246,
      "learning_rate": 0.00010495159199709938,
      "loss": 3.0841,
      "step": 167111
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.176889657974243,
      "learning_rate": 0.00010494848402179755,
      "loss": 2.7382,
      "step": 167112
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7579708099365234,
      "learning_rate": 0.00010494537608275929,
      "loss": 3.0444,
      "step": 167113
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4967896938323975,
      "learning_rate": 0.00010494226817998549,
      "loss": 2.9772,
      "step": 167114
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.5159876346588135,
      "learning_rate": 0.00010493916031347648,
      "loss": 2.8679,
      "step": 167115
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.970350503921509,
      "learning_rate": 0.00010493605248323309,
      "loss": 2.7204,
      "step": 167116
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.259697437286377,
      "learning_rate": 0.00010493294468925566,
      "loss": 2.8361,
      "step": 167117
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3353428840637207,
      "learning_rate": 0.00010492983693154505,
      "loss": 2.8798,
      "step": 167118
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.2306582927703857,
      "learning_rate": 0.0001049267292101015,
      "loss": 3.0015,
      "step": 167119
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8200604915618896,
      "learning_rate": 0.00010492362152492584,
      "loss": 2.9657,
      "step": 167120
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.34731388092041,
      "learning_rate": 0.00010492051387601846,
      "loss": 2.7749,
      "step": 167121
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6480586528778076,
      "learning_rate": 0.00010491740626338014,
      "loss": 2.7611,
      "step": 167122
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.344971179962158,
      "learning_rate": 0.00010491429868701125,
      "loss": 2.9023,
      "step": 167123
    },
    {
      "epoch": 2.18,
      "grad_norm": 5.084080219268799,
      "learning_rate": 0.0001049111911469127,
      "loss": 3.0253,
      "step": 167124
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5364761352539062,
      "learning_rate": 0.00010490808364308461,
      "loss": 2.7355,
      "step": 167125
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5629565715789795,
      "learning_rate": 0.0001049049761755279,
      "loss": 3.0046,
      "step": 167126
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3149077892303467,
      "learning_rate": 0.00010490186874424294,
      "loss": 3.1316,
      "step": 167127
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6618175506591797,
      "learning_rate": 0.00010489876134923051,
      "loss": 3.1871,
      "step": 167128
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.2866101264953613,
      "learning_rate": 0.00010489565399049096,
      "loss": 2.7665,
      "step": 167129
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3026926517486572,
      "learning_rate": 0.00010489254666802522,
      "loss": 2.9434,
      "step": 167130
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3171072006225586,
      "learning_rate": 0.00010488943938183344,
      "loss": 2.9892,
      "step": 167131
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4404308795928955,
      "learning_rate": 0.00010488633213191648,
      "loss": 2.8294,
      "step": 167132
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.837085723876953,
      "learning_rate": 0.00010488322491827472,
      "loss": 3.1187,
      "step": 167133
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.440322160720825,
      "learning_rate": 0.00010488011774090898,
      "loss": 2.8547,
      "step": 167134
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.747440814971924,
      "learning_rate": 0.0001048770105998196,
      "loss": 2.9309,
      "step": 167135
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6873528957366943,
      "learning_rate": 0.00010487390349500746,
      "loss": 2.7831,
      "step": 167136
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4449832439422607,
      "learning_rate": 0.00010487079642647278,
      "loss": 3.0066,
      "step": 167137
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7935376167297363,
      "learning_rate": 0.00010486768939421635,
      "loss": 2.675,
      "step": 167138
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.474179744720459,
      "learning_rate": 0.00010486458239823862,
      "loss": 2.8202,
      "step": 167139
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3035426139831543,
      "learning_rate": 0.00010486147543854034,
      "loss": 3.0364,
      "step": 167140
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7512784004211426,
      "learning_rate": 0.0001048583685151219,
      "loss": 2.9028,
      "step": 167141
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7789151668548584,
      "learning_rate": 0.00010485526162798411,
      "loss": 2.8923,
      "step": 167142
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.266223907470703,
      "learning_rate": 0.00010485215477712736,
      "loss": 3.1778,
      "step": 167143
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9071521759033203,
      "learning_rate": 0.00010484904796255229,
      "loss": 3.054,
      "step": 167144
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.25140643119812,
      "learning_rate": 0.0001048459411842594,
      "loss": 2.7861,
      "step": 167145
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.549844980239868,
      "learning_rate": 0.00010484283444224938,
      "loss": 2.885,
      "step": 167146
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.363983392715454,
      "learning_rate": 0.0001048397277365227,
      "loss": 2.9119,
      "step": 167147
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.858893632888794,
      "learning_rate": 0.00010483662106708007,
      "loss": 2.9507,
      "step": 167148
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.935930252075195,
      "learning_rate": 0.00010483351443392202,
      "loss": 3.3084,
      "step": 167149
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.15053129196167,
      "learning_rate": 0.000104830407837049,
      "loss": 2.5012,
      "step": 167150
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3309173583984375,
      "learning_rate": 0.00010482730127646179,
      "loss": 2.8746,
      "step": 167151
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6532974243164062,
      "learning_rate": 0.00010482419475216086,
      "loss": 2.7332,
      "step": 167152
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4874818325042725,
      "learning_rate": 0.0001048210882641467,
      "loss": 2.9502,
      "step": 167153
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4678120613098145,
      "learning_rate": 0.00010481798181242008,
      "loss": 2.9874,
      "step": 167154
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4655284881591797,
      "learning_rate": 0.0001048148753969815,
      "loss": 3.0267,
      "step": 167155
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.760141134262085,
      "learning_rate": 0.00010481176901783143,
      "loss": 2.8431,
      "step": 167156
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.425666093826294,
      "learning_rate": 0.00010480866267497059,
      "loss": 3.0211,
      "step": 167157
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5824666023254395,
      "learning_rate": 0.00010480555636839952,
      "loss": 3.0426,
      "step": 167158
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7514333724975586,
      "learning_rate": 0.00010480245009811869,
      "loss": 2.7966,
      "step": 167159
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.152993202209473,
      "learning_rate": 0.00010479934386412887,
      "loss": 2.9375,
      "step": 167160
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8933730125427246,
      "learning_rate": 0.00010479623766643055,
      "loss": 2.8368,
      "step": 167161
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.048774480819702,
      "learning_rate": 0.00010479313150502417,
      "loss": 2.7973,
      "step": 167162
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.195091247558594,
      "learning_rate": 0.00010479002537991055,
      "loss": 2.9286,
      "step": 167163
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.325913429260254,
      "learning_rate": 0.00010478691929109004,
      "loss": 2.8769,
      "step": 167164
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.766648769378662,
      "learning_rate": 0.00010478381323856342,
      "loss": 2.7842,
      "step": 167165
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.415806531906128,
      "learning_rate": 0.00010478070722233118,
      "loss": 2.9942,
      "step": 167166
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.559095621109009,
      "learning_rate": 0.00010477760124239389,
      "loss": 2.6577,
      "step": 167167
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.452486276626587,
      "learning_rate": 0.00010477449529875204,
      "loss": 2.8942,
      "step": 167168
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.5918538570404053,
      "learning_rate": 0.0001047713893914064,
      "loss": 3.0489,
      "step": 167169
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.290849208831787,
      "learning_rate": 0.00010476828352035732,
      "loss": 2.9825,
      "step": 167170
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.241992712020874,
      "learning_rate": 0.0001047651776856056,
      "loss": 2.8581,
      "step": 167171
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.56606388092041,
      "learning_rate": 0.00010476207188715165,
      "loss": 2.9259,
      "step": 167172
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.623528480529785,
      "learning_rate": 0.0001047589661249963,
      "loss": 3.2442,
      "step": 167173
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7137815952301025,
      "learning_rate": 0.00010475586039913975,
      "loss": 3.1069,
      "step": 167174
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7950143814086914,
      "learning_rate": 0.00010475275470958285,
      "loss": 2.8838,
      "step": 167175
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9000742435455322,
      "learning_rate": 0.00010474964905632603,
      "loss": 3.1923,
      "step": 167176
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.059101104736328,
      "learning_rate": 0.00010474654343936999,
      "loss": 3.0864,
      "step": 167177
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5710601806640625,
      "learning_rate": 0.00010474343785871518,
      "loss": 3.0029,
      "step": 167178
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.633208990097046,
      "learning_rate": 0.00010474033231436247,
      "loss": 2.9189,
      "step": 167179
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.2181036472320557,
      "learning_rate": 0.000104737226806312,
      "loss": 3.0339,
      "step": 167180
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8104939460754395,
      "learning_rate": 0.00010473412133456467,
      "loss": 3.1661,
      "step": 167181
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.517214775085449,
      "learning_rate": 0.00010473101589912084,
      "loss": 2.8796,
      "step": 167182
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8540127277374268,
      "learning_rate": 0.00010472791049998131,
      "loss": 2.968,
      "step": 167183
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.408290147781372,
      "learning_rate": 0.00010472480513714643,
      "loss": 3.1902,
      "step": 167184
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7624900341033936,
      "learning_rate": 0.0001047216998106171,
      "loss": 3.2173,
      "step": 167185
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.0924885272979736,
      "learning_rate": 0.00010471859452039352,
      "loss": 3.074,
      "step": 167186
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.787459135055542,
      "learning_rate": 0.00010471548926647651,
      "loss": 3.0923,
      "step": 167187
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7333807945251465,
      "learning_rate": 0.0001047123840488665,
      "loss": 2.9619,
      "step": 167188
    },
    {
      "epoch": 2.18,
      "grad_norm": 5.078120708465576,
      "learning_rate": 0.00010470927886756422,
      "loss": 2.827,
      "step": 167189
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.858397960662842,
      "learning_rate": 0.00010470617372257009,
      "loss": 2.8663,
      "step": 167190
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.1683034896850586,
      "learning_rate": 0.00010470306861388498,
      "loss": 2.9575,
      "step": 167191
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.69108247756958,
      "learning_rate": 0.00010469996354150903,
      "loss": 2.7577,
      "step": 167192
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7067999839782715,
      "learning_rate": 0.00010469685850544316,
      "loss": 3.1018,
      "step": 167193
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.4687836170196533,
      "learning_rate": 0.00010469375350568772,
      "loss": 2.9529,
      "step": 167194
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.0273587703704834,
      "learning_rate": 0.0001046906485422435,
      "loss": 2.9219,
      "step": 167195
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.779451370239258,
      "learning_rate": 0.00010468754361511086,
      "loss": 2.891,
      "step": 167196
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5144805908203125,
      "learning_rate": 0.0001046844387242907,
      "loss": 3.0362,
      "step": 167197
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4714865684509277,
      "learning_rate": 0.00010468133386978322,
      "loss": 2.8076,
      "step": 167198
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9688053131103516,
      "learning_rate": 0.00010467822905158924,
      "loss": 2.9266,
      "step": 167199
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.123638153076172,
      "learning_rate": 0.00010467512426970917,
      "loss": 2.5889,
      "step": 167200
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.006194829940796,
      "learning_rate": 0.00010467201952414377,
      "loss": 3.0541,
      "step": 167201
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.1861484050750732,
      "learning_rate": 0.00010466891481489344,
      "loss": 3.2778,
      "step": 167202
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4023444652557373,
      "learning_rate": 0.00010466581014195904,
      "loss": 2.8739,
      "step": 167203
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.476701259613037,
      "learning_rate": 0.00010466270550534079,
      "loss": 2.6539,
      "step": 167204
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.3168740272521973,
      "learning_rate": 0.00010465960090503947,
      "loss": 3.0723,
      "step": 167205
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.121677875518799,
      "learning_rate": 0.00010465649634105555,
      "loss": 3.0195,
      "step": 167206
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.070744752883911,
      "learning_rate": 0.00010465339181338977,
      "loss": 3.0338,
      "step": 167207
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.4164323806762695,
      "learning_rate": 0.00010465028732204251,
      "loss": 3.0536,
      "step": 167208
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.785118579864502,
      "learning_rate": 0.00010464718286701467,
      "loss": 3.1084,
      "step": 167209
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.2137832641601562,
      "learning_rate": 0.00010464407844830637,
      "loss": 2.8218,
      "step": 167210
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7975573539733887,
      "learning_rate": 0.00010464097406591854,
      "loss": 2.831,
      "step": 167211
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.573347568511963,
      "learning_rate": 0.00010463786971985154,
      "loss": 2.8456,
      "step": 167212
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.066878318786621,
      "learning_rate": 0.00010463476541010617,
      "loss": 2.9107,
      "step": 167213
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9209916591644287,
      "learning_rate": 0.00010463166113668276,
      "loss": 2.8697,
      "step": 167214
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.288919925689697,
      "learning_rate": 0.00010462855689958213,
      "loss": 2.7634,
      "step": 167215
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.443279266357422,
      "learning_rate": 0.00010462545269880471,
      "loss": 2.9808,
      "step": 167216
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.3521347045898438,
      "learning_rate": 0.00010462234853435112,
      "loss": 3.0036,
      "step": 167217
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.368964672088623,
      "learning_rate": 0.00010461924440622185,
      "loss": 3.0321,
      "step": 167218
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.338452100753784,
      "learning_rate": 0.00010461614031441761,
      "loss": 2.7054,
      "step": 167219
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.347970962524414,
      "learning_rate": 0.00010461303625893883,
      "loss": 3.1725,
      "step": 167220
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6363418102264404,
      "learning_rate": 0.00010460993223978629,
      "loss": 2.9762,
      "step": 167221
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.564753770828247,
      "learning_rate": 0.00010460682825696042,
      "loss": 3.0978,
      "step": 167222
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.031043529510498,
      "learning_rate": 0.00010460372431046186,
      "loss": 2.5106,
      "step": 167223
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7517709732055664,
      "learning_rate": 0.00010460062040029103,
      "loss": 2.7671,
      "step": 167224
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.5765023231506348,
      "learning_rate": 0.00010459751652644876,
      "loss": 2.7504,
      "step": 167225
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7309439182281494,
      "learning_rate": 0.00010459441268893537,
      "loss": 2.9279,
      "step": 167226
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.613938331604004,
      "learning_rate": 0.00010459130888775171,
      "loss": 3.2065,
      "step": 167227
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.8785529136657715,
      "learning_rate": 0.00010458820512289818,
      "loss": 2.7948,
      "step": 167228
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.6105661392211914,
      "learning_rate": 0.00010458510139437543,
      "loss": 3.0341,
      "step": 167229
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.0436160564422607,
      "learning_rate": 0.00010458199770218387,
      "loss": 2.8491,
      "step": 167230
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.665292978286743,
      "learning_rate": 0.00010457889404632429,
      "loss": 2.9045,
      "step": 167231
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.145925760269165,
      "learning_rate": 0.00010457579042679712,
      "loss": 2.8505,
      "step": 167232
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.058336019515991,
      "learning_rate": 0.00010457268684360307,
      "loss": 2.8624,
      "step": 167233
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.280862808227539,
      "learning_rate": 0.00010456958329674263,
      "loss": 2.6804,
      "step": 167234
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.029524803161621,
      "learning_rate": 0.00010456647978621643,
      "loss": 2.951,
      "step": 167235
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.8456239700317383,
      "learning_rate": 0.0001045633763120249,
      "loss": 2.9986,
      "step": 167236
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.1441545486450195,
      "learning_rate": 0.00010456027287416883,
      "loss": 3.0541,
      "step": 167237
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.2835657596588135,
      "learning_rate": 0.0001045571694726486,
      "loss": 3.0962,
      "step": 167238
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.08455753326416,
      "learning_rate": 0.00010455406610746496,
      "loss": 3.036,
      "step": 167239
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.431029796600342,
      "learning_rate": 0.00010455096277861843,
      "loss": 3.1334,
      "step": 167240
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.765916347503662,
      "learning_rate": 0.00010454785948610943,
      "loss": 2.8623,
      "step": 167241
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7696075439453125,
      "learning_rate": 0.0001045447562299388,
      "loss": 2.8397,
      "step": 167242
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.366387367248535,
      "learning_rate": 0.000104541653010107,
      "loss": 3.0061,
      "step": 167243
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3383822441101074,
      "learning_rate": 0.00010453854982661449,
      "loss": 3.0111,
      "step": 167244
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.179875612258911,
      "learning_rate": 0.00010453544667946206,
      "loss": 2.8801,
      "step": 167245
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.9842183589935303,
      "learning_rate": 0.00010453234356865014,
      "loss": 2.8576,
      "step": 167246
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.157702684402466,
      "learning_rate": 0.00010452924049417927,
      "loss": 3.1134,
      "step": 167247
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.636791944503784,
      "learning_rate": 0.00010452613745605021,
      "loss": 2.8102,
      "step": 167248
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.161470890045166,
      "learning_rate": 0.00010452303445426334,
      "loss": 2.7968,
      "step": 167249
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.242048501968384,
      "learning_rate": 0.00010451993148881941,
      "loss": 3.172,
      "step": 167250
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.2448344230651855,
      "learning_rate": 0.00010451682855971891,
      "loss": 2.8938,
      "step": 167251
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9383692741394043,
      "learning_rate": 0.00010451372566696244,
      "loss": 2.8564,
      "step": 167252
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.845287322998047,
      "learning_rate": 0.00010451062281055047,
      "loss": 3.1826,
      "step": 167253
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.1323063373565674,
      "learning_rate": 0.00010450751999048372,
      "loss": 2.9677,
      "step": 167254
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9064395427703857,
      "learning_rate": 0.00010450441720676267,
      "loss": 2.822,
      "step": 167255
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5316333770751953,
      "learning_rate": 0.00010450131445938801,
      "loss": 3.2094,
      "step": 167256
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.004268646240234,
      "learning_rate": 0.00010449821174836023,
      "loss": 3.0862,
      "step": 167257
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.2495102882385254,
      "learning_rate": 0.00010449510907367993,
      "loss": 2.9033,
      "step": 167258
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.346270799636841,
      "learning_rate": 0.0001044920064353476,
      "loss": 2.9076,
      "step": 167259
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.177990674972534,
      "learning_rate": 0.00010448890383336398,
      "loss": 2.7559,
      "step": 167260
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5991690158843994,
      "learning_rate": 0.00010448580126772948,
      "loss": 2.9716,
      "step": 167261
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4253664016723633,
      "learning_rate": 0.00010448269873844482,
      "loss": 3.0481,
      "step": 167262
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5249857902526855,
      "learning_rate": 0.00010447959624551047,
      "loss": 2.9944,
      "step": 167263
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.408581256866455,
      "learning_rate": 0.0001044764937889272,
      "loss": 2.993,
      "step": 167264
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4787216186523438,
      "learning_rate": 0.00010447339136869528,
      "loss": 2.8141,
      "step": 167265
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.3627371788024902,
      "learning_rate": 0.00010447028898481552,
      "loss": 2.5576,
      "step": 167266
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.906097888946533,
      "learning_rate": 0.00010446718663728835,
      "loss": 3.0969,
      "step": 167267
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.1893818378448486,
      "learning_rate": 0.00010446408432611453,
      "loss": 2.5412,
      "step": 167268
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8043501377105713,
      "learning_rate": 0.0001044609820512944,
      "loss": 2.9187,
      "step": 167269
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5153579711914062,
      "learning_rate": 0.00010445787981282888,
      "loss": 3.073,
      "step": 167270
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.2137997150421143,
      "learning_rate": 0.00010445477761071813,
      "loss": 2.7926,
      "step": 167271
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7848191261291504,
      "learning_rate": 0.00010445167544496302,
      "loss": 3.0816,
      "step": 167272
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.349391222000122,
      "learning_rate": 0.00010444857331556393,
      "loss": 2.7657,
      "step": 167273
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7453505992889404,
      "learning_rate": 0.00010444547122252165,
      "loss": 3.0371,
      "step": 167274
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.817178964614868,
      "learning_rate": 0.00010444236916583653,
      "loss": 3.0046,
      "step": 167275
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.054828405380249,
      "learning_rate": 0.00010443926714550948,
      "loss": 2.8117,
      "step": 167276
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.532299041748047,
      "learning_rate": 0.00010443616516154067,
      "loss": 2.9485,
      "step": 167277
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.384836435317993,
      "learning_rate": 0.00010443306321393095,
      "loss": 3.0277,
      "step": 167278
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.0994954109191895,
      "learning_rate": 0.00010442996130268073,
      "loss": 2.8657,
      "step": 167279
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9764256477355957,
      "learning_rate": 0.0001044268594277908,
      "loss": 3.0829,
      "step": 167280
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.680908203125,
      "learning_rate": 0.00010442375758926147,
      "loss": 2.8754,
      "step": 167281
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5755324363708496,
      "learning_rate": 0.00010442065578709354,
      "loss": 2.964,
      "step": 167282
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.138272523880005,
      "learning_rate": 0.00010441755402128755,
      "loss": 2.7892,
      "step": 167283
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.760672092437744,
      "learning_rate": 0.00010441445229184399,
      "loss": 3.204,
      "step": 167284
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.441016912460327,
      "learning_rate": 0.00010441135059876337,
      "loss": 2.8595,
      "step": 167285
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.0484459400177,
      "learning_rate": 0.00010440824894204651,
      "loss": 2.9184,
      "step": 167286
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4157142639160156,
      "learning_rate": 0.00010440514732169373,
      "loss": 3.0348,
      "step": 167287
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4508872032165527,
      "learning_rate": 0.00010440204573770583,
      "loss": 2.9032,
      "step": 167288
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7863497734069824,
      "learning_rate": 0.00010439894419008325,
      "loss": 2.9557,
      "step": 167289
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.635204792022705,
      "learning_rate": 0.00010439584267882664,
      "loss": 2.8658,
      "step": 167290
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.833305835723877,
      "learning_rate": 0.00010439274120393643,
      "loss": 3.0415,
      "step": 167291
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.3300936222076416,
      "learning_rate": 0.0001043896397654134,
      "loss": 3.1349,
      "step": 167292
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.1176326274871826,
      "learning_rate": 0.0001043865383632579,
      "loss": 2.8904,
      "step": 167293
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.0137462615966797,
      "learning_rate": 0.00010438343699747075,
      "loss": 2.9017,
      "step": 167294
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5833065509796143,
      "learning_rate": 0.00010438033566805244,
      "loss": 3.0025,
      "step": 167295
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4898810386657715,
      "learning_rate": 0.00010437723437500348,
      "loss": 3.0024,
      "step": 167296
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7162673473358154,
      "learning_rate": 0.00010437413311832442,
      "loss": 3.1286,
      "step": 167297
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.672935962677002,
      "learning_rate": 0.00010437103189801596,
      "loss": 3.2305,
      "step": 167298
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3243408203125,
      "learning_rate": 0.00010436793071407855,
      "loss": 3.016,
      "step": 167299
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.103530168533325,
      "learning_rate": 0.00010436482956651293,
      "loss": 3.3263,
      "step": 167300
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.764519691467285,
      "learning_rate": 0.00010436172845531956,
      "loss": 2.7859,
      "step": 167301
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5384109020233154,
      "learning_rate": 0.00010435862738049907,
      "loss": 3.0248,
      "step": 167302
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.370635986328125,
      "learning_rate": 0.00010435552634205187,
      "loss": 2.9926,
      "step": 167303
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.357550859451294,
      "learning_rate": 0.00010435242533997878,
      "loss": 2.856,
      "step": 167304
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.614776611328125,
      "learning_rate": 0.00010434932437428019,
      "loss": 2.8491,
      "step": 167305
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.600240707397461,
      "learning_rate": 0.00010434622344495684,
      "loss": 2.7918,
      "step": 167306
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5246760845184326,
      "learning_rate": 0.00010434312255200922,
      "loss": 2.949,
      "step": 167307
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.0615038871765137,
      "learning_rate": 0.00010434002169543791,
      "loss": 3.1873,
      "step": 167308
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7565596103668213,
      "learning_rate": 0.00010433692087524335,
      "loss": 2.9444,
      "step": 167309
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.723632574081421,
      "learning_rate": 0.00010433382009142637,
      "loss": 3.2114,
      "step": 167310
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.00260853767395,
      "learning_rate": 0.00010433071934398737,
      "loss": 2.8782,
      "step": 167311
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.402505874633789,
      "learning_rate": 0.00010432761863292702,
      "loss": 3.192,
      "step": 167312
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.214906215667725,
      "learning_rate": 0.00010432451795824591,
      "loss": 2.8306,
      "step": 167313
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.512883424758911,
      "learning_rate": 0.00010432141731994453,
      "loss": 2.924,
      "step": 167314
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.457271099090576,
      "learning_rate": 0.00010431831671802342,
      "loss": 3.0141,
      "step": 167315
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.2574806213378906,
      "learning_rate": 0.0001043152161524833,
      "loss": 3.0341,
      "step": 167316
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.401862621307373,
      "learning_rate": 0.0001043121156233246,
      "loss": 2.9463,
      "step": 167317
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.971177339553833,
      "learning_rate": 0.00010430901513054807,
      "loss": 2.8915,
      "step": 167318
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.2987887859344482,
      "learning_rate": 0.00010430591467415415,
      "loss": 3.03,
      "step": 167319
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.440369129180908,
      "learning_rate": 0.00010430281425414349,
      "loss": 2.775,
      "step": 167320
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.224248170852661,
      "learning_rate": 0.00010429971387051652,
      "loss": 2.8037,
      "step": 167321
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9572970867156982,
      "learning_rate": 0.00010429661352327405,
      "loss": 3.0858,
      "step": 167322
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4343454837799072,
      "learning_rate": 0.00010429351321241641,
      "loss": 2.9116,
      "step": 167323
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.758796453475952,
      "learning_rate": 0.0001042904129379444,
      "loss": 2.9674,
      "step": 167324
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5044009685516357,
      "learning_rate": 0.00010428731269985852,
      "loss": 2.9359,
      "step": 167325
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3567709922790527,
      "learning_rate": 0.0001042842124981592,
      "loss": 3.0147,
      "step": 167326
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9008371829986572,
      "learning_rate": 0.00010428111233284723,
      "loss": 2.9026,
      "step": 167327
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.338736057281494,
      "learning_rate": 0.00010427801220392313,
      "loss": 2.9331,
      "step": 167328
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6288349628448486,
      "learning_rate": 0.00010427491211138731,
      "loss": 2.954,
      "step": 167329
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8430371284484863,
      "learning_rate": 0.00010427181205524059,
      "loss": 2.7882,
      "step": 167330
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6764907836914062,
      "learning_rate": 0.00010426871203548343,
      "loss": 2.9199,
      "step": 167331
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.081975221633911,
      "learning_rate": 0.00010426561205211632,
      "loss": 3.0232,
      "step": 167332
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.449354648590088,
      "learning_rate": 0.00010426251210514,
      "loss": 2.8115,
      "step": 167333
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.557253122329712,
      "learning_rate": 0.00010425941219455492,
      "loss": 3.0157,
      "step": 167334
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.00217866897583,
      "learning_rate": 0.00010425631232036178,
      "loss": 2.9546,
      "step": 167335
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5562846660614014,
      "learning_rate": 0.0001042532124825611,
      "loss": 2.8424,
      "step": 167336
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3427698612213135,
      "learning_rate": 0.00010425011268115344,
      "loss": 2.8638,
      "step": 167337
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.160202980041504,
      "learning_rate": 0.00010424701291613927,
      "loss": 2.9009,
      "step": 167338
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8458988666534424,
      "learning_rate": 0.00010424391318751939,
      "loss": 3.0385,
      "step": 167339
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.586411237716675,
      "learning_rate": 0.00010424081349529412,
      "loss": 2.9637,
      "step": 167340
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.458040475845337,
      "learning_rate": 0.00010423771383946432,
      "loss": 2.9381,
      "step": 167341
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.1358580589294434,
      "learning_rate": 0.00010423461422003043,
      "loss": 2.895,
      "step": 167342
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.831705331802368,
      "learning_rate": 0.000104231514636993,
      "loss": 3.1191,
      "step": 167343
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5128941535949707,
      "learning_rate": 0.00010422841509035254,
      "loss": 2.8777,
      "step": 167344
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3804149627685547,
      "learning_rate": 0.00010422531558010981,
      "loss": 3.025,
      "step": 167345
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.53311824798584,
      "learning_rate": 0.00010422221610626517,
      "loss": 3.0759,
      "step": 167346
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.247140407562256,
      "learning_rate": 0.00010421911666881943,
      "loss": 2.9356,
      "step": 167347
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.80804705619812,
      "learning_rate": 0.00010421601726777295,
      "loss": 3.1026,
      "step": 167348
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4553496837615967,
      "learning_rate": 0.00010421291790312652,
      "loss": 3.0169,
      "step": 167349
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8767738342285156,
      "learning_rate": 0.00010420981857488061,
      "loss": 3.0825,
      "step": 167350
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.497252941131592,
      "learning_rate": 0.00010420671928303577,
      "loss": 2.9653,
      "step": 167351
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4289839267730713,
      "learning_rate": 0.00010420362002759249,
      "loss": 2.8803,
      "step": 167352
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.0735881328582764,
      "learning_rate": 0.00010420052080855158,
      "loss": 2.6685,
      "step": 167353
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.044553279876709,
      "learning_rate": 0.00010419742162591337,
      "loss": 2.9225,
      "step": 167354
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.0457661151885986,
      "learning_rate": 0.00010419432247967866,
      "loss": 2.9772,
      "step": 167355
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6985292434692383,
      "learning_rate": 0.00010419122336984793,
      "loss": 3.1904,
      "step": 167356
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5336499214172363,
      "learning_rate": 0.00010418812429642172,
      "loss": 2.9936,
      "step": 167357
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.783599376678467,
      "learning_rate": 0.00010418502525940058,
      "loss": 3.2217,
      "step": 167358
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.2144479751586914,
      "learning_rate": 0.00010418192625878519,
      "loss": 3.0255,
      "step": 167359
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.661206007003784,
      "learning_rate": 0.000104178827294576,
      "loss": 3.0084,
      "step": 167360
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4306583404541016,
      "learning_rate": 0.00010417572836677377,
      "loss": 2.7529,
      "step": 167361
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.5145363807678223,
      "learning_rate": 0.00010417262947537897,
      "loss": 3.0173,
      "step": 167362
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7156929969787598,
      "learning_rate": 0.00010416953062039216,
      "loss": 2.9385,
      "step": 167363
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.850527048110962,
      "learning_rate": 0.00010416643180181382,
      "loss": 3.0686,
      "step": 167364
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.426130771636963,
      "learning_rate": 0.00010416333301964475,
      "loss": 2.9515,
      "step": 167365
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3486483097076416,
      "learning_rate": 0.00010416023427388533,
      "loss": 3.2471,
      "step": 167366
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.710071563720703,
      "learning_rate": 0.00010415713556453632,
      "loss": 2.8868,
      "step": 167367
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4265191555023193,
      "learning_rate": 0.00010415403689159818,
      "loss": 3.0023,
      "step": 167368
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.5440914630889893,
      "learning_rate": 0.00010415093825507148,
      "loss": 3.1723,
      "step": 167369
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7872438430786133,
      "learning_rate": 0.00010414783965495676,
      "loss": 2.6385,
      "step": 167370
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6457839012145996,
      "learning_rate": 0.00010414474109125474,
      "loss": 2.8215,
      "step": 167371
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8184549808502197,
      "learning_rate": 0.00010414164256396579,
      "loss": 2.8706,
      "step": 167372
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.515035629272461,
      "learning_rate": 0.00010413854407309071,
      "loss": 2.9633,
      "step": 167373
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.351419687271118,
      "learning_rate": 0.00010413544561862997,
      "loss": 2.8287,
      "step": 167374
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.2981011867523193,
      "learning_rate": 0.00010413234720058417,
      "loss": 2.9276,
      "step": 167375
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7224879264831543,
      "learning_rate": 0.00010412924881895374,
      "loss": 3.1033,
      "step": 167376
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4291276931762695,
      "learning_rate": 0.00010412615047373952,
      "loss": 3.1562,
      "step": 167377
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.708629846572876,
      "learning_rate": 0.0001041230521649418,
      "loss": 3.0475,
      "step": 167378
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7618408203125,
      "learning_rate": 0.00010411995389256141,
      "loss": 2.734,
      "step": 167379
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.744615077972412,
      "learning_rate": 0.00010411685565659887,
      "loss": 2.8371,
      "step": 167380
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5137925148010254,
      "learning_rate": 0.00010411375745705465,
      "loss": 2.8355,
      "step": 167381
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3581721782684326,
      "learning_rate": 0.0001041106592939293,
      "loss": 2.9296,
      "step": 167382
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6448440551757812,
      "learning_rate": 0.00010410756116722355,
      "loss": 3.1563,
      "step": 167383
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.143897294998169,
      "learning_rate": 0.00010410446307693784,
      "loss": 3.1846,
      "step": 167384
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.722405195236206,
      "learning_rate": 0.00010410136502307286,
      "loss": 3.0803,
      "step": 167385
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.852142333984375,
      "learning_rate": 0.00010409826700562917,
      "loss": 2.7041,
      "step": 167386
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9334068298339844,
      "learning_rate": 0.00010409516902460732,
      "loss": 2.8889,
      "step": 167387
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.78261137008667,
      "learning_rate": 0.00010409207108000774,
      "loss": 3.258,
      "step": 167388
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3647148609161377,
      "learning_rate": 0.00010408897317183126,
      "loss": 2.9655,
      "step": 167389
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.917433261871338,
      "learning_rate": 0.00010408587530007823,
      "loss": 2.9524,
      "step": 167390
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7487831115722656,
      "learning_rate": 0.00010408277746474948,
      "loss": 2.8133,
      "step": 167391
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.388681411743164,
      "learning_rate": 0.00010407967966584538,
      "loss": 2.7392,
      "step": 167392
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.706542730331421,
      "learning_rate": 0.00010407658190336658,
      "loss": 2.9001,
      "step": 167393
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4825503826141357,
      "learning_rate": 0.00010407348417731358,
      "loss": 2.8096,
      "step": 167394
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.8915863037109375,
      "learning_rate": 0.00010407038648768708,
      "loss": 2.7948,
      "step": 167395
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.0003507137298584,
      "learning_rate": 0.0001040672888344875,
      "loss": 2.953,
      "step": 167396
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4986541271209717,
      "learning_rate": 0.00010406419121771561,
      "loss": 2.9509,
      "step": 167397
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.837944984436035,
      "learning_rate": 0.00010406109363737188,
      "loss": 2.9746,
      "step": 167398
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9008090496063232,
      "learning_rate": 0.00010405799609345691,
      "loss": 2.7692,
      "step": 167399
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.2816903591156006,
      "learning_rate": 0.00010405489858597115,
      "loss": 3.1234,
      "step": 167400
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.736799478530884,
      "learning_rate": 0.00010405180111491537,
      "loss": 3.2088,
      "step": 167401
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9735829830169678,
      "learning_rate": 0.00010404870368028999,
      "loss": 2.9514,
      "step": 167402
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.072096824645996,
      "learning_rate": 0.00010404560628209572,
      "loss": 2.8268,
      "step": 167403
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.0124807357788086,
      "learning_rate": 0.00010404250892033309,
      "loss": 2.9153,
      "step": 167404
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.0620579719543457,
      "learning_rate": 0.00010403941159500263,
      "loss": 2.9641,
      "step": 167405
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.711191177368164,
      "learning_rate": 0.00010403631430610487,
      "loss": 2.9973,
      "step": 167406
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.3234171867370605,
      "learning_rate": 0.00010403321705364054,
      "loss": 3.0243,
      "step": 167407
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7674777507781982,
      "learning_rate": 0.00010403011983761006,
      "loss": 2.9936,
      "step": 167408
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6068732738494873,
      "learning_rate": 0.00010402702265801418,
      "loss": 3.0037,
      "step": 167409
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6734981536865234,
      "learning_rate": 0.00010402392551485333,
      "loss": 2.8344,
      "step": 167410
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.541882276535034,
      "learning_rate": 0.00010402082840812809,
      "loss": 3.0415,
      "step": 167411
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.1130290031433105,
      "learning_rate": 0.00010401773133783913,
      "loss": 2.9329,
      "step": 167412
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6875221729278564,
      "learning_rate": 0.00010401463430398699,
      "loss": 3.2499,
      "step": 167413
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5411317348480225,
      "learning_rate": 0.00010401153730657214,
      "loss": 2.892,
      "step": 167414
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7595772743225098,
      "learning_rate": 0.00010400844034559534,
      "loss": 3.0136,
      "step": 167415
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.7060577869415283,
      "learning_rate": 0.00010400534342105695,
      "loss": 3.0911,
      "step": 167416
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.030040740966797,
      "learning_rate": 0.00010400224653295778,
      "loss": 2.9998,
      "step": 167417
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.462661027908325,
      "learning_rate": 0.00010399914968129831,
      "loss": 3.1442,
      "step": 167418
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.0241470336914062,
      "learning_rate": 0.00010399605286607908,
      "loss": 3.0138,
      "step": 167419
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.25964617729187,
      "learning_rate": 0.0001039929560873006,
      "loss": 3.066,
      "step": 167420
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.732654333114624,
      "learning_rate": 0.00010398985934496361,
      "loss": 2.8526,
      "step": 167421
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7517642974853516,
      "learning_rate": 0.0001039867626390685,
      "loss": 2.9478,
      "step": 167422
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.095249652862549,
      "learning_rate": 0.00010398366596961607,
      "loss": 3.1703,
      "step": 167423
    },
    {
      "epoch": 2.18,
      "grad_norm": 5.429854393005371,
      "learning_rate": 0.00010398056933660675,
      "loss": 2.9223,
      "step": 167424
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.594266176223755,
      "learning_rate": 0.0001039774727400411,
      "loss": 3.156,
      "step": 167425
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.827962875366211,
      "learning_rate": 0.00010397437617991977,
      "loss": 2.9812,
      "step": 167426
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4068479537963867,
      "learning_rate": 0.00010397127965624334,
      "loss": 2.7642,
      "step": 167427
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.908140182495117,
      "learning_rate": 0.00010396818316901224,
      "loss": 3.0025,
      "step": 167428
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.181461811065674,
      "learning_rate": 0.00010396508671822727,
      "loss": 2.922,
      "step": 167429
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.462268829345703,
      "learning_rate": 0.00010396199030388886,
      "loss": 3.0476,
      "step": 167430
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.945059061050415,
      "learning_rate": 0.00010395889392599754,
      "loss": 3.0435,
      "step": 167431
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5236692428588867,
      "learning_rate": 0.00010395579758455407,
      "loss": 2.7721,
      "step": 167432
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.3311119079589844,
      "learning_rate": 0.00010395270127955881,
      "loss": 2.7038,
      "step": 167433
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.055575370788574,
      "learning_rate": 0.00010394960501101251,
      "loss": 2.9377,
      "step": 167434
    },
    {
      "epoch": 2.18,
      "grad_norm": 5.079186916351318,
      "learning_rate": 0.00010394650877891574,
      "loss": 2.9215,
      "step": 167435
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.852081775665283,
      "learning_rate": 0.00010394341258326897,
      "loss": 3.1056,
      "step": 167436
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.586024761199951,
      "learning_rate": 0.00010394031642407275,
      "loss": 3.0116,
      "step": 167437
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6742305755615234,
      "learning_rate": 0.00010393722030132779,
      "loss": 2.9736,
      "step": 167438
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.2562148571014404,
      "learning_rate": 0.00010393412421503452,
      "loss": 2.8851,
      "step": 167439
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5180718898773193,
      "learning_rate": 0.00010393102816519368,
      "loss": 2.9402,
      "step": 167440
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.472219228744507,
      "learning_rate": 0.0001039279321518058,
      "loss": 3.1454,
      "step": 167441
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7388393878936768,
      "learning_rate": 0.00010392483617487141,
      "loss": 2.9423,
      "step": 167442
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.153317928314209,
      "learning_rate": 0.00010392174023439096,
      "loss": 3.0544,
      "step": 167443
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.858393430709839,
      "learning_rate": 0.00010391864433036528,
      "loss": 2.9367,
      "step": 167444
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4358062744140625,
      "learning_rate": 0.00010391554846279475,
      "loss": 3.0448,
      "step": 167445
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.1440110206604,
      "learning_rate": 0.00010391245263168009,
      "loss": 2.7847,
      "step": 167446
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.455939769744873,
      "learning_rate": 0.00010390935683702183,
      "loss": 2.8835,
      "step": 167447
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.64605975151062,
      "learning_rate": 0.00010390626107882051,
      "loss": 2.7295,
      "step": 167448
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.0117874145507812,
      "learning_rate": 0.00010390316535707662,
      "loss": 2.6615,
      "step": 167449
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7421326637268066,
      "learning_rate": 0.00010390006967179091,
      "loss": 2.9546,
      "step": 167450
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.598466396331787,
      "learning_rate": 0.0001038969740229638,
      "loss": 3.1277,
      "step": 167451
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.880221366882324,
      "learning_rate": 0.00010389387841059605,
      "loss": 3.242,
      "step": 167452
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.413055896759033,
      "learning_rate": 0.00010389078283468812,
      "loss": 2.9473,
      "step": 167453
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9368033409118652,
      "learning_rate": 0.00010388768729524061,
      "loss": 2.9361,
      "step": 167454
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4431498050689697,
      "learning_rate": 0.00010388459179225396,
      "loss": 2.8004,
      "step": 167455
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4417271614074707,
      "learning_rate": 0.00010388149632572896,
      "loss": 2.8141,
      "step": 167456
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5046725273132324,
      "learning_rate": 0.00010387840089566603,
      "loss": 2.8742,
      "step": 167457
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.734614610671997,
      "learning_rate": 0.00010387530550206588,
      "loss": 2.8685,
      "step": 167458
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7663047313690186,
      "learning_rate": 0.00010387221014492902,
      "loss": 2.9872,
      "step": 167459
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.9213948249816895,
      "learning_rate": 0.00010386911482425601,
      "loss": 2.7357,
      "step": 167460
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.08360481262207,
      "learning_rate": 0.00010386601954004733,
      "loss": 3.0885,
      "step": 167461
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9149820804595947,
      "learning_rate": 0.00010386292429230379,
      "loss": 2.9907,
      "step": 167462
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7741007804870605,
      "learning_rate": 0.0001038598290810257,
      "loss": 3.0389,
      "step": 167463
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.995554208755493,
      "learning_rate": 0.00010385673390621388,
      "loss": 2.8771,
      "step": 167464
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7748124599456787,
      "learning_rate": 0.0001038536387678688,
      "loss": 3.0565,
      "step": 167465
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.485018730163574,
      "learning_rate": 0.00010385054366599104,
      "loss": 2.9722,
      "step": 167466
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.45800518989563,
      "learning_rate": 0.00010384744860058104,
      "loss": 2.9752,
      "step": 167467
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.950798273086548,
      "learning_rate": 0.00010384435357163961,
      "loss": 3.0765,
      "step": 167468
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7184054851531982,
      "learning_rate": 0.00010384125857916714,
      "loss": 2.8886,
      "step": 167469
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.1599061489105225,
      "learning_rate": 0.00010383816362316435,
      "loss": 3.2499,
      "step": 167470
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5466573238372803,
      "learning_rate": 0.00010383506870363176,
      "loss": 2.8326,
      "step": 167471
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5692169666290283,
      "learning_rate": 0.00010383197382056998,
      "loss": 2.6599,
      "step": 167472
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.275686740875244,
      "learning_rate": 0.00010382887897397937,
      "loss": 2.9596,
      "step": 167473
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4832217693328857,
      "learning_rate": 0.00010382578416386083,
      "loss": 3.0042,
      "step": 167474
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8458194732666016,
      "learning_rate": 0.00010382268939021464,
      "loss": 2.8237,
      "step": 167475
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.974639415740967,
      "learning_rate": 0.00010381959465304161,
      "loss": 3.2987,
      "step": 167476
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.560706615447998,
      "learning_rate": 0.00010381649995234224,
      "loss": 2.7671,
      "step": 167477
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5425071716308594,
      "learning_rate": 0.0001038134052881171,
      "loss": 3.1409,
      "step": 167478
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.614696741104126,
      "learning_rate": 0.00010381031066036665,
      "loss": 3.1392,
      "step": 167479
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.62544584274292,
      "learning_rate": 0.00010380721606909165,
      "loss": 2.9214,
      "step": 167480
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.5444540977478027,
      "learning_rate": 0.00010380412151429247,
      "loss": 2.9643,
      "step": 167481
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.1093132495880127,
      "learning_rate": 0.00010380102699596994,
      "loss": 2.9461,
      "step": 167482
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4432733058929443,
      "learning_rate": 0.00010379793251412439,
      "loss": 3.2088,
      "step": 167483
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9302525520324707,
      "learning_rate": 0.00010379483806875674,
      "loss": 2.8727,
      "step": 167484
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.291210174560547,
      "learning_rate": 0.00010379174365986708,
      "loss": 2.9301,
      "step": 167485
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3539936542510986,
      "learning_rate": 0.00010378864928745638,
      "loss": 3.0295,
      "step": 167486
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.2745015621185303,
      "learning_rate": 0.00010378555495152496,
      "loss": 2.8677,
      "step": 167487
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.8294880390167236,
      "learning_rate": 0.00010378246065207359,
      "loss": 2.8715,
      "step": 167488
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7268407344818115,
      "learning_rate": 0.0001037793663891027,
      "loss": 3.0098,
      "step": 167489
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.296612024307251,
      "learning_rate": 0.0001037762721626131,
      "loss": 2.7827,
      "step": 167490
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.752039909362793,
      "learning_rate": 0.00010377317797260502,
      "loss": 3.0853,
      "step": 167491
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6934258937835693,
      "learning_rate": 0.0001037700838190793,
      "loss": 2.9225,
      "step": 167492
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.122232675552368,
      "learning_rate": 0.00010376698970203632,
      "loss": 3.2421,
      "step": 167493
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3323442935943604,
      "learning_rate": 0.00010376389562147686,
      "loss": 3.0891,
      "step": 167494
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8755738735198975,
      "learning_rate": 0.0001037608015774013,
      "loss": 2.9177,
      "step": 167495
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8035476207733154,
      "learning_rate": 0.00010375770756981042,
      "loss": 2.9872,
      "step": 167496
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.2557289600372314,
      "learning_rate": 0.00010375461359870467,
      "loss": 2.8934,
      "step": 167497
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7476837635040283,
      "learning_rate": 0.00010375151966408462,
      "loss": 3.1388,
      "step": 167498
    },
    {
      "epoch": 2.18,
      "grad_norm": 5.0560688972473145,
      "learning_rate": 0.0001037484257659508,
      "loss": 3.0447,
      "step": 167499
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.28525447845459,
      "learning_rate": 0.00010374533190430396,
      "loss": 2.9065,
      "step": 167500
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.478080987930298,
      "learning_rate": 0.00010374223807914446,
      "loss": 3.1639,
      "step": 167501
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.2456045150756836,
      "learning_rate": 0.00010373914429047309,
      "loss": 3.0244,
      "step": 167502
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.3611509799957275,
      "learning_rate": 0.00010373605053829028,
      "loss": 2.9244,
      "step": 167503
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6092331409454346,
      "learning_rate": 0.00010373295682259669,
      "loss": 3.1145,
      "step": 167504
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.659785032272339,
      "learning_rate": 0.0001037298631433927,
      "loss": 2.8939,
      "step": 167505
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7522904872894287,
      "learning_rate": 0.00010372676950067918,
      "loss": 3.1936,
      "step": 167506
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6347546577453613,
      "learning_rate": 0.00010372367589445645,
      "loss": 2.8497,
      "step": 167507
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8240790367126465,
      "learning_rate": 0.00010372058232472527,
      "loss": 3.0932,
      "step": 167508
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.0801215171813965,
      "learning_rate": 0.00010371748879148616,
      "loss": 2.6399,
      "step": 167509
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.2076754570007324,
      "learning_rate": 0.00010371439529473956,
      "loss": 3.0041,
      "step": 167510
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6152944564819336,
      "learning_rate": 0.00010371130183448628,
      "loss": 3.1642,
      "step": 167511
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9668376445770264,
      "learning_rate": 0.00010370820841072675,
      "loss": 3.1081,
      "step": 167512
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9088938236236572,
      "learning_rate": 0.00010370511502346148,
      "loss": 2.9706,
      "step": 167513
    },
    {
      "epoch": 2.18,
      "grad_norm": 5.789430141448975,
      "learning_rate": 0.00010370202167269122,
      "loss": 2.9396,
      "step": 167514
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.1440067291259766,
      "learning_rate": 0.00010369892835841649,
      "loss": 2.9383,
      "step": 167515
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.2438554763793945,
      "learning_rate": 0.0001036958350806377,
      "loss": 2.8203,
      "step": 167516
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8337314128875732,
      "learning_rate": 0.00010369274183935569,
      "loss": 3.1627,
      "step": 167517
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3444857597351074,
      "learning_rate": 0.00010368964863457088,
      "loss": 3.0608,
      "step": 167518
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.2796471118927,
      "learning_rate": 0.00010368655546628378,
      "loss": 2.9149,
      "step": 167519
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.1364688873291016,
      "learning_rate": 0.00010368346233449517,
      "loss": 3.1667,
      "step": 167520
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.421104907989502,
      "learning_rate": 0.00010368036923920548,
      "loss": 2.9044,
      "step": 167521
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7576286792755127,
      "learning_rate": 0.00010367727618041525,
      "loss": 2.8874,
      "step": 167522
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.9764277935028076,
      "learning_rate": 0.00010367418315812519,
      "loss": 3.1444,
      "step": 167523
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8557193279266357,
      "learning_rate": 0.00010367109017233571,
      "loss": 3.1523,
      "step": 167524
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.202277183532715,
      "learning_rate": 0.00010366799722304758,
      "loss": 2.9944,
      "step": 167525
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.4143013954162598,
      "learning_rate": 0.00010366490431026129,
      "loss": 3.0374,
      "step": 167526
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.254971742630005,
      "learning_rate": 0.00010366181143397742,
      "loss": 2.9437,
      "step": 167527
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.39046573638916,
      "learning_rate": 0.00010365871859419637,
      "loss": 2.8487,
      "step": 167528
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.866981029510498,
      "learning_rate": 0.00010365562579091901,
      "loss": 2.9893,
      "step": 167529
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.899038791656494,
      "learning_rate": 0.00010365253302414567,
      "loss": 3.0585,
      "step": 167530
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.493619918823242,
      "learning_rate": 0.00010364944029387712,
      "loss": 2.8569,
      "step": 167531
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.2293853759765625,
      "learning_rate": 0.00010364634760011387,
      "loss": 2.9477,
      "step": 167532
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.702266216278076,
      "learning_rate": 0.00010364325494285644,
      "loss": 2.9584,
      "step": 167533
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8346328735351562,
      "learning_rate": 0.00010364016232210532,
      "loss": 2.9824,
      "step": 167534
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9586851596832275,
      "learning_rate": 0.00010363706973786133,
      "loss": 2.9861,
      "step": 167535
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4547176361083984,
      "learning_rate": 0.00010363397719012484,
      "loss": 3.0281,
      "step": 167536
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4017834663391113,
      "learning_rate": 0.00010363088467889657,
      "loss": 3.1712,
      "step": 167537
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4534401893615723,
      "learning_rate": 0.000103627792204177,
      "loss": 3.2288,
      "step": 167538
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.0829968452453613,
      "learning_rate": 0.00010362469976596677,
      "loss": 2.9941,
      "step": 167539
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.801096200942993,
      "learning_rate": 0.00010362160736426632,
      "loss": 2.8813,
      "step": 167540
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9122366905212402,
      "learning_rate": 0.00010361851499907641,
      "loss": 2.8115,
      "step": 167541
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.412078857421875,
      "learning_rate": 0.00010361542267039738,
      "loss": 2.809,
      "step": 167542
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.115311622619629,
      "learning_rate": 0.00010361233037823013,
      "loss": 2.9778,
      "step": 167543
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.346333980560303,
      "learning_rate": 0.00010360923812257499,
      "loss": 2.9517,
      "step": 167544
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.415958404541016,
      "learning_rate": 0.00010360614590343262,
      "loss": 3.1946,
      "step": 167545
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.875364065170288,
      "learning_rate": 0.0001036030537208035,
      "loss": 2.9685,
      "step": 167546
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3974223136901855,
      "learning_rate": 0.00010359996157468835,
      "loss": 2.8507,
      "step": 167547
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.271038293838501,
      "learning_rate": 0.00010359686946508758,
      "loss": 3.1965,
      "step": 167548
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.4649770259857178,
      "learning_rate": 0.00010359377739200196,
      "loss": 2.8369,
      "step": 167549
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.971998929977417,
      "learning_rate": 0.00010359068535543185,
      "loss": 2.7406,
      "step": 167550
    },
    {
      "epoch": 2.18,
      "grad_norm": 5.527955532073975,
      "learning_rate": 0.00010358759335537815,
      "loss": 2.9294,
      "step": 167551
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.934713363647461,
      "learning_rate": 0.00010358450139184104,
      "loss": 2.8061,
      "step": 167552
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.125714063644409,
      "learning_rate": 0.00010358140946482137,
      "loss": 3.1697,
      "step": 167553
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.9837779998779297,
      "learning_rate": 0.00010357831757431953,
      "loss": 3.0988,
      "step": 167554
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.8394086360931396,
      "learning_rate": 0.00010357522572033626,
      "loss": 2.8767,
      "step": 167555
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.169213056564331,
      "learning_rate": 0.00010357213390287198,
      "loss": 3.1309,
      "step": 167556
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.981985569000244,
      "learning_rate": 0.00010356904212192758,
      "loss": 2.9315,
      "step": 167557
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.483715534210205,
      "learning_rate": 0.00010356595037750316,
      "loss": 2.8767,
      "step": 167558
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8787965774536133,
      "learning_rate": 0.00010356285866959965,
      "loss": 3.0861,
      "step": 167559
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.75681734085083,
      "learning_rate": 0.00010355976699821741,
      "loss": 2.4969,
      "step": 167560
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.313901424407959,
      "learning_rate": 0.00010355667536335722,
      "loss": 2.828,
      "step": 167561
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.70546817779541,
      "learning_rate": 0.00010355358376501945,
      "loss": 2.94,
      "step": 167562
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5675430297851562,
      "learning_rate": 0.000103550492203205,
      "loss": 2.9724,
      "step": 167563
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.2188425064086914,
      "learning_rate": 0.000103547400677914,
      "loss": 3.0621,
      "step": 167564
    },
    {
      "epoch": 2.18,
      "grad_norm": 5.246539115905762,
      "learning_rate": 0.00010354430918914736,
      "loss": 2.9595,
      "step": 167565
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.900313854217529,
      "learning_rate": 0.00010354121773690543,
      "loss": 2.9832,
      "step": 167566
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.422552108764648,
      "learning_rate": 0.00010353812632118898,
      "loss": 2.8197,
      "step": 167567
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.070953369140625,
      "learning_rate": 0.00010353503494199841,
      "loss": 3.0153,
      "step": 167568
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.662761926651001,
      "learning_rate": 0.00010353194359933458,
      "loss": 2.9639,
      "step": 167569
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.025972604751587,
      "learning_rate": 0.00010352885229319772,
      "loss": 2.9509,
      "step": 167570
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.2556655406951904,
      "learning_rate": 0.0001035257610235886,
      "loss": 2.8434,
      "step": 167571
    },
    {
      "epoch": 2.18,
      "grad_norm": 7.504300594329834,
      "learning_rate": 0.00010352266979050769,
      "loss": 3.0099,
      "step": 167572
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.047135829925537,
      "learning_rate": 0.00010351957859395569,
      "loss": 2.7492,
      "step": 167573
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.1052420139312744,
      "learning_rate": 0.00010351648743393301,
      "loss": 2.8147,
      "step": 167574
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.529048204421997,
      "learning_rate": 0.00010351339631044053,
      "loss": 2.8338,
      "step": 167575
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.621129035949707,
      "learning_rate": 0.00010351030522347844,
      "loss": 3.2047,
      "step": 167576
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.645648241043091,
      "learning_rate": 0.0001035072141730476,
      "loss": 2.8912,
      "step": 167577
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.986865997314453,
      "learning_rate": 0.00010350412315914832,
      "loss": 2.896,
      "step": 167578
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.17120099067688,
      "learning_rate": 0.0001035010321817815,
      "loss": 2.8067,
      "step": 167579
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4559903144836426,
      "learning_rate": 0.00010349794124094741,
      "loss": 2.8464,
      "step": 167580
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5528147220611572,
      "learning_rate": 0.00010349485033664694,
      "loss": 2.847,
      "step": 167581
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6382391452789307,
      "learning_rate": 0.00010349175946888034,
      "loss": 2.902,
      "step": 167582
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5559420585632324,
      "learning_rate": 0.00010348866863764839,
      "loss": 3.0569,
      "step": 167583
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5734825134277344,
      "learning_rate": 0.00010348557784295154,
      "loss": 2.9827,
      "step": 167584
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.209573745727539,
      "learning_rate": 0.0001034824870847905,
      "loss": 2.8456,
      "step": 167585
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.2086617946624756,
      "learning_rate": 0.0001034793963631657,
      "loss": 2.8785,
      "step": 167586
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.2816579341888428,
      "learning_rate": 0.00010347630567807787,
      "loss": 2.8167,
      "step": 167587
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4189953804016113,
      "learning_rate": 0.00010347321502952749,
      "loss": 2.9823,
      "step": 167588
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.0055949687957764,
      "learning_rate": 0.00010347012441751517,
      "loss": 2.7886,
      "step": 167589
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.0789859294891357,
      "learning_rate": 0.00010346703384204137,
      "loss": 2.9257,
      "step": 167590
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9472970962524414,
      "learning_rate": 0.00010346394330310687,
      "loss": 3.12,
      "step": 167591
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3693130016326904,
      "learning_rate": 0.00010346085280071201,
      "loss": 2.9837,
      "step": 167592
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.400271415710449,
      "learning_rate": 0.0001034577623348576,
      "loss": 3.0838,
      "step": 167593
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.466189384460449,
      "learning_rate": 0.00010345467190554409,
      "loss": 2.8986,
      "step": 167594
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.741853713989258,
      "learning_rate": 0.00010345158151277196,
      "loss": 2.927,
      "step": 167595
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5245776176452637,
      "learning_rate": 0.00010344849115654201,
      "loss": 2.9925,
      "step": 167596
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.194474220275879,
      "learning_rate": 0.0001034454008368547,
      "loss": 2.8716,
      "step": 167597
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.552415132522583,
      "learning_rate": 0.00010344231055371048,
      "loss": 2.9775,
      "step": 167598
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.401987314224243,
      "learning_rate": 0.00010343922030711014,
      "loss": 2.893,
      "step": 167599
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.4666073322296143,
      "learning_rate": 0.00010343613009705419,
      "loss": 2.807,
      "step": 167600
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.5392005443573,
      "learning_rate": 0.00010343303992354305,
      "loss": 3.0694,
      "step": 167601
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.73703932762146,
      "learning_rate": 0.00010342994978657754,
      "loss": 3.1372,
      "step": 167602
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.3059616088867188,
      "learning_rate": 0.00010342685968615809,
      "loss": 3.0205,
      "step": 167603
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.643874168395996,
      "learning_rate": 0.00010342376962228522,
      "loss": 2.9608,
      "step": 167604
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.552931070327759,
      "learning_rate": 0.00010342067959495964,
      "loss": 2.9111,
      "step": 167605
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.21291184425354,
      "learning_rate": 0.00010341758960418189,
      "loss": 2.9477,
      "step": 167606
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.574941635131836,
      "learning_rate": 0.00010341449964995243,
      "loss": 2.991,
      "step": 167607
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3732917308807373,
      "learning_rate": 0.00010341140973227201,
      "loss": 2.9205,
      "step": 167608
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7097678184509277,
      "learning_rate": 0.00010340831985114105,
      "loss": 2.9219,
      "step": 167609
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.2145893573760986,
      "learning_rate": 0.00010340523000656026,
      "loss": 2.8931,
      "step": 167610
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.039137601852417,
      "learning_rate": 0.00010340214019853016,
      "loss": 2.8536,
      "step": 167611
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.792637348175049,
      "learning_rate": 0.00010339905042705132,
      "loss": 2.9929,
      "step": 167612
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.677927017211914,
      "learning_rate": 0.00010339596069212421,
      "loss": 3.0504,
      "step": 167613
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8683815002441406,
      "learning_rate": 0.00010339287099374959,
      "loss": 2.8516,
      "step": 167614
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4267516136169434,
      "learning_rate": 0.00010338978133192785,
      "loss": 3.1139,
      "step": 167615
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.639448881149292,
      "learning_rate": 0.00010338669170665975,
      "loss": 3.1714,
      "step": 167616
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3123068809509277,
      "learning_rate": 0.00010338360211794572,
      "loss": 3.0186,
      "step": 167617
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.877535581588745,
      "learning_rate": 0.00010338051256578654,
      "loss": 3.0263,
      "step": 167618
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6246986389160156,
      "learning_rate": 0.00010337742305018246,
      "loss": 2.8005,
      "step": 167619
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.784717082977295,
      "learning_rate": 0.0001033743335711343,
      "loss": 3.0878,
      "step": 167620
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.17208194732666,
      "learning_rate": 0.00010337124412864251,
      "loss": 3.122,
      "step": 167621
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.401303768157959,
      "learning_rate": 0.0001033681547227078,
      "loss": 2.9781,
      "step": 167622
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.1792285442352295,
      "learning_rate": 0.00010336506535333055,
      "loss": 3.2701,
      "step": 167623
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.774444580078125,
      "learning_rate": 0.00010336197602051165,
      "loss": 2.9621,
      "step": 167624
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.352980852127075,
      "learning_rate": 0.00010335888672425124,
      "loss": 2.9866,
      "step": 167625
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.345658302307129,
      "learning_rate": 0.00010335579746455026,
      "loss": 2.7261,
      "step": 167626
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.51282000541687,
      "learning_rate": 0.00010335270824140906,
      "loss": 3.0539,
      "step": 167627
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.376352787017822,
      "learning_rate": 0.0001033496190548284,
      "loss": 2.8839,
      "step": 167628
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3616809844970703,
      "learning_rate": 0.00010334652990480864,
      "loss": 2.9849,
      "step": 167629
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.287041425704956,
      "learning_rate": 0.00010334344079135068,
      "loss": 2.5822,
      "step": 167630
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.374971628189087,
      "learning_rate": 0.00010334035171445466,
      "loss": 3.0884,
      "step": 167631
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3324921131134033,
      "learning_rate": 0.0001033372626741215,
      "loss": 2.9437,
      "step": 167632
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.08463191986084,
      "learning_rate": 0.00010333417367035156,
      "loss": 3.0169,
      "step": 167633
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5523033142089844,
      "learning_rate": 0.00010333108470314561,
      "loss": 2.7785,
      "step": 167634
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.1128578186035156,
      "learning_rate": 0.00010332799577250404,
      "loss": 2.8086,
      "step": 167635
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.329577922821045,
      "learning_rate": 0.00010332490687842769,
      "loss": 2.8997,
      "step": 167636
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.472022533416748,
      "learning_rate": 0.00010332181802091675,
      "loss": 2.8139,
      "step": 167637
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.53210711479187,
      "learning_rate": 0.0001033187291999721,
      "loss": 2.999,
      "step": 167638
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.789567470550537,
      "learning_rate": 0.00010331564041559409,
      "loss": 2.8543,
      "step": 167639
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4734511375427246,
      "learning_rate": 0.00010331255166778353,
      "loss": 2.9763,
      "step": 167640
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.981717586517334,
      "learning_rate": 0.0001033094629565408,
      "loss": 2.7745,
      "step": 167641
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.221432685852051,
      "learning_rate": 0.00010330637428186675,
      "loss": 2.6991,
      "step": 167642
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5222668647766113,
      "learning_rate": 0.00010330328564376153,
      "loss": 2.5629,
      "step": 167643
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5184733867645264,
      "learning_rate": 0.00010330019704222607,
      "loss": 2.8086,
      "step": 167644
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9478979110717773,
      "learning_rate": 0.00010329710847726073,
      "loss": 3.0077,
      "step": 167645
    },
    {
      "epoch": 2.18,
      "grad_norm": 6.448888778686523,
      "learning_rate": 0.00010329401994886623,
      "loss": 2.8241,
      "step": 167646
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.734665870666504,
      "learning_rate": 0.000103290931457043,
      "loss": 2.9907,
      "step": 167647
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.79085373878479,
      "learning_rate": 0.0001032878430017919,
      "loss": 3.0908,
      "step": 167648
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.327725648880005,
      "learning_rate": 0.0001032847545831131,
      "loss": 2.7999,
      "step": 167649
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.7203369140625,
      "learning_rate": 0.00010328166620100748,
      "loss": 2.7647,
      "step": 167650
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.061598539352417,
      "learning_rate": 0.0001032785778554754,
      "loss": 3.0329,
      "step": 167651
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8305258750915527,
      "learning_rate": 0.00010327548954651767,
      "loss": 3.0664,
      "step": 167652
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.365684747695923,
      "learning_rate": 0.00010327240127413464,
      "loss": 2.8779,
      "step": 167653
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3766093254089355,
      "learning_rate": 0.00010326931303832716,
      "loss": 2.893,
      "step": 167654
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.8548457622528076,
      "learning_rate": 0.00010326622483909546,
      "loss": 3.0533,
      "step": 167655
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.707395315170288,
      "learning_rate": 0.00010326313667644037,
      "loss": 2.9294,
      "step": 167656
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.6479170322418213,
      "learning_rate": 0.00010326004855036224,
      "loss": 2.7085,
      "step": 167657
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.807990074157715,
      "learning_rate": 0.00010325696046086193,
      "loss": 3.1809,
      "step": 167658
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3959953784942627,
      "learning_rate": 0.00010325387240793972,
      "loss": 3.015,
      "step": 167659
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.1134724617004395,
      "learning_rate": 0.00010325078439159656,
      "loss": 2.8179,
      "step": 167660
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6845624446868896,
      "learning_rate": 0.00010324769641183259,
      "loss": 3.0908,
      "step": 167661
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.358144521713257,
      "learning_rate": 0.00010324460846864867,
      "loss": 3.05,
      "step": 167662
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8294715881347656,
      "learning_rate": 0.00010324152056204522,
      "loss": 3.0426,
      "step": 167663
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.1790995597839355,
      "learning_rate": 0.00010323843269202298,
      "loss": 2.96,
      "step": 167664
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5110726356506348,
      "learning_rate": 0.00010323534485858231,
      "loss": 2.7646,
      "step": 167665
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5458621978759766,
      "learning_rate": 0.00010323225706172414,
      "loss": 2.9621,
      "step": 167666
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.847017526626587,
      "learning_rate": 0.00010322916930144856,
      "loss": 2.8345,
      "step": 167667
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.0296006202697754,
      "learning_rate": 0.0001032260815777565,
      "loss": 3.2057,
      "step": 167668
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.163454055786133,
      "learning_rate": 0.00010322299389064834,
      "loss": 2.711,
      "step": 167669
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4550087451934814,
      "learning_rate": 0.00010321990624012486,
      "loss": 2.9994,
      "step": 167670
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8029704093933105,
      "learning_rate": 0.00010321681862618639,
      "loss": 3.0284,
      "step": 167671
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.179867744445801,
      "learning_rate": 0.00010321373104883374,
      "loss": 2.9142,
      "step": 167672
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5606942176818848,
      "learning_rate": 0.00010321064350806734,
      "loss": 3.1091,
      "step": 167673
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.070309638977051,
      "learning_rate": 0.00010320755600388783,
      "loss": 2.7668,
      "step": 167674
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7610390186309814,
      "learning_rate": 0.00010320446853629564,
      "loss": 2.9769,
      "step": 167675
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.457463502883911,
      "learning_rate": 0.00010320138110529155,
      "loss": 2.6037,
      "step": 167676
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.825392007827759,
      "learning_rate": 0.00010319829371087596,
      "loss": 2.9071,
      "step": 167677
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.3686561584472656,
      "learning_rate": 0.0001031952063530496,
      "loss": 3.1297,
      "step": 167678
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.251009702682495,
      "learning_rate": 0.00010319211903181297,
      "loss": 2.9133,
      "step": 167679
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.1849606037139893,
      "learning_rate": 0.00010318903174716664,
      "loss": 3.2763,
      "step": 167680
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.633413076400757,
      "learning_rate": 0.00010318594449911108,
      "loss": 2.9717,
      "step": 167681
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.480180263519287,
      "learning_rate": 0.00010318285728764707,
      "loss": 3.0155,
      "step": 167682
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8398008346557617,
      "learning_rate": 0.00010317977011277497,
      "loss": 2.7568,
      "step": 167683
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.823964834213257,
      "learning_rate": 0.00010317668297449558,
      "loss": 3.0871,
      "step": 167684
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.406790256500244,
      "learning_rate": 0.00010317359587280937,
      "loss": 3.1334,
      "step": 167685
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.139883041381836,
      "learning_rate": 0.00010317050880771677,
      "loss": 3.0131,
      "step": 167686
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5492665767669678,
      "learning_rate": 0.00010316742177921862,
      "loss": 3.0212,
      "step": 167687
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.627034902572632,
      "learning_rate": 0.00010316433478731539,
      "loss": 2.9228,
      "step": 167688
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5605602264404297,
      "learning_rate": 0.00010316124783200748,
      "loss": 2.7112,
      "step": 167689
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.3623030185699463,
      "learning_rate": 0.0001031581609132957,
      "loss": 3.0291,
      "step": 167690
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.552466869354248,
      "learning_rate": 0.00010315507403118057,
      "loss": 3.0316,
      "step": 167691
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.617994785308838,
      "learning_rate": 0.00010315198718566251,
      "loss": 2.8162,
      "step": 167692
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7741811275482178,
      "learning_rate": 0.0001031489003767423,
      "loss": 3.0836,
      "step": 167693
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.463080406188965,
      "learning_rate": 0.00010314581360442033,
      "loss": 2.8736,
      "step": 167694
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7089109420776367,
      "learning_rate": 0.00010314272686869736,
      "loss": 3.0259,
      "step": 167695
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.755666732788086,
      "learning_rate": 0.0001031396401695739,
      "loss": 3.1557,
      "step": 167696
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4720616340637207,
      "learning_rate": 0.00010313655350705049,
      "loss": 2.925,
      "step": 167697
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.508981704711914,
      "learning_rate": 0.00010313346688112761,
      "loss": 2.9947,
      "step": 167698
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6845695972442627,
      "learning_rate": 0.00010313038029180603,
      "loss": 2.9591,
      "step": 167699
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.41684627532959,
      "learning_rate": 0.00010312729373908613,
      "loss": 2.9603,
      "step": 167700
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.283764600753784,
      "learning_rate": 0.00010312420722296871,
      "loss": 2.9429,
      "step": 167701
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4595160484313965,
      "learning_rate": 0.00010312112074345417,
      "loss": 3.1137,
      "step": 167702
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.432682991027832,
      "learning_rate": 0.00010311803430054316,
      "loss": 2.8478,
      "step": 167703
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.515862226486206,
      "learning_rate": 0.00010311494789423609,
      "loss": 2.796,
      "step": 167704
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.820275068283081,
      "learning_rate": 0.00010311186152453381,
      "loss": 2.8242,
      "step": 167705
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.410343885421753,
      "learning_rate": 0.00010310877519143662,
      "loss": 2.9373,
      "step": 167706
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6587467193603516,
      "learning_rate": 0.00010310568889494534,
      "loss": 2.955,
      "step": 167707
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.7447402477264404,
      "learning_rate": 0.00010310260263506033,
      "loss": 2.8098,
      "step": 167708
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6021816730499268,
      "learning_rate": 0.00010309951641178246,
      "loss": 2.9998,
      "step": 167709
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.222496271133423,
      "learning_rate": 0.0001030964302251119,
      "loss": 2.7117,
      "step": 167710
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3707125186920166,
      "learning_rate": 0.00010309334407504954,
      "loss": 2.9457,
      "step": 167711
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.432332754135132,
      "learning_rate": 0.00010309025796159575,
      "loss": 2.9433,
      "step": 167712
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.944068431854248,
      "learning_rate": 0.00010308717188475128,
      "loss": 2.988,
      "step": 167713
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.1142845153808594,
      "learning_rate": 0.00010308408584451652,
      "loss": 2.8251,
      "step": 167714
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.2063400745391846,
      "learning_rate": 0.00010308099984089234,
      "loss": 2.8695,
      "step": 167715
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.564573287963867,
      "learning_rate": 0.00010307791387387895,
      "loss": 2.975,
      "step": 167716
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9917843341827393,
      "learning_rate": 0.00010307482794347717,
      "loss": 3.0024,
      "step": 167717
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.910195827484131,
      "learning_rate": 0.0001030717420496874,
      "loss": 3.0766,
      "step": 167718
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.226654291152954,
      "learning_rate": 0.00010306865619251041,
      "loss": 3.1564,
      "step": 167719
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.086363792419434,
      "learning_rate": 0.0001030655703719466,
      "loss": 3.0193,
      "step": 167720
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.2996103763580322,
      "learning_rate": 0.0001030624845879968,
      "loss": 2.8769,
      "step": 167721
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3359129428863525,
      "learning_rate": 0.00010305939884066119,
      "loss": 2.909,
      "step": 167722
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.892498731613159,
      "learning_rate": 0.00010305631312994069,
      "loss": 2.7763,
      "step": 167723
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.4434850215911865,
      "learning_rate": 0.00010305322745583558,
      "loss": 3.0027,
      "step": 167724
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.5320510864257812,
      "learning_rate": 0.00010305014181834674,
      "loss": 3.2,
      "step": 167725
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3356428146362305,
      "learning_rate": 0.00010304705621747449,
      "loss": 2.9533,
      "step": 167726
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.749235153198242,
      "learning_rate": 0.00010304397065321969,
      "loss": 2.8241,
      "step": 167727
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.4782114028930664,
      "learning_rate": 0.00010304088512558256,
      "loss": 2.9976,
      "step": 167728
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.61384916305542,
      "learning_rate": 0.00010303779963456391,
      "loss": 2.964,
      "step": 167729
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.597574234008789,
      "learning_rate": 0.00010303471418016418,
      "loss": 2.7506,
      "step": 167730
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.656632423400879,
      "learning_rate": 0.0001030316287623841,
      "loss": 2.9983,
      "step": 167731
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9378480911254883,
      "learning_rate": 0.00010302854338122407,
      "loss": 2.8562,
      "step": 167732
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6404261589050293,
      "learning_rate": 0.00010302545803668495,
      "loss": 3.0109,
      "step": 167733
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.938121795654297,
      "learning_rate": 0.00010302237272876694,
      "loss": 3.1124,
      "step": 167734
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.530407190322876,
      "learning_rate": 0.00010301928745747086,
      "loss": 3.0679,
      "step": 167735
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5943291187286377,
      "learning_rate": 0.00010301620222279712,
      "loss": 2.8739,
      "step": 167736
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.1877083778381348,
      "learning_rate": 0.00010301311702474644,
      "loss": 2.8248,
      "step": 167737
    },
    {
      "epoch": 2.18,
      "grad_norm": 5.254348278045654,
      "learning_rate": 0.00010301003186331932,
      "loss": 2.946,
      "step": 167738
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.0896799564361572,
      "learning_rate": 0.00010300694673851649,
      "loss": 2.7582,
      "step": 167739
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3790767192840576,
      "learning_rate": 0.00010300386165033824,
      "loss": 3.002,
      "step": 167740
    },
    {
      "epoch": 2.18,
      "grad_norm": 5.009096145629883,
      "learning_rate": 0.00010300077659878536,
      "loss": 2.9742,
      "step": 167741
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.441507339477539,
      "learning_rate": 0.00010299769158385823,
      "loss": 2.8192,
      "step": 167742
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.419383525848389,
      "learning_rate": 0.00010299460660555768,
      "loss": 2.8215,
      "step": 167743
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4345390796661377,
      "learning_rate": 0.00010299152166388406,
      "loss": 3.1174,
      "step": 167744
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.8070311546325684,
      "learning_rate": 0.00010298843675883823,
      "loss": 2.8789,
      "step": 167745
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6560707092285156,
      "learning_rate": 0.00010298535189042034,
      "loss": 2.9076,
      "step": 167746
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.072110176086426,
      "learning_rate": 0.00010298226705863132,
      "loss": 3.0995,
      "step": 167747
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7757835388183594,
      "learning_rate": 0.00010297918226347152,
      "loss": 2.9403,
      "step": 167748
    },
    {
      "epoch": 2.18,
      "grad_norm": 5.03659200668335,
      "learning_rate": 0.0001029760975049417,
      "loss": 2.852,
      "step": 167749
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.414212703704834,
      "learning_rate": 0.00010297301278304222,
      "loss": 2.7537,
      "step": 167750
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.985360622406006,
      "learning_rate": 0.0001029699280977739,
      "loss": 2.9611,
      "step": 167751
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.317185401916504,
      "learning_rate": 0.00010296684344913718,
      "loss": 2.9276,
      "step": 167752
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5467796325683594,
      "learning_rate": 0.00010296375883713265,
      "loss": 2.7666,
      "step": 167753
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.431955575942993,
      "learning_rate": 0.0001029606742617608,
      "loss": 3.0164,
      "step": 167754
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.2285237312316895,
      "learning_rate": 0.00010295758972302234,
      "loss": 2.9612,
      "step": 167755
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4874777793884277,
      "learning_rate": 0.0001029545052209177,
      "loss": 3.0067,
      "step": 167756
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6624467372894287,
      "learning_rate": 0.00010295142075544764,
      "loss": 2.6806,
      "step": 167757
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4257009029388428,
      "learning_rate": 0.00010294833632661263,
      "loss": 3.0631,
      "step": 167758
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.436530590057373,
      "learning_rate": 0.00010294525193441324,
      "loss": 3.0331,
      "step": 167759
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5554659366607666,
      "learning_rate": 0.00010294216757884996,
      "loss": 2.8942,
      "step": 167760
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.584583282470703,
      "learning_rate": 0.00010293908325992353,
      "loss": 2.6795,
      "step": 167761
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.1864774227142334,
      "learning_rate": 0.00010293599897763435,
      "loss": 2.8969,
      "step": 167762
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.297445774078369,
      "learning_rate": 0.00010293291473198319,
      "loss": 2.7869,
      "step": 167763
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7091948986053467,
      "learning_rate": 0.00010292983052297054,
      "loss": 2.9939,
      "step": 167764
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.851320266723633,
      "learning_rate": 0.00010292674635059691,
      "loss": 2.8689,
      "step": 167765
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.017218589782715,
      "learning_rate": 0.00010292366221486285,
      "loss": 2.9414,
      "step": 167766
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5723888874053955,
      "learning_rate": 0.00010292057811576911,
      "loss": 2.8899,
      "step": 167767
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.0131993293762207,
      "learning_rate": 0.00010291749405331602,
      "loss": 2.9774,
      "step": 167768
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.728639125823975,
      "learning_rate": 0.00010291441002750442,
      "loss": 2.6723,
      "step": 167769
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.971613645553589,
      "learning_rate": 0.00010291132603833472,
      "loss": 2.773,
      "step": 167770
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.5864741802215576,
      "learning_rate": 0.00010290824208580747,
      "loss": 3.103,
      "step": 167771
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9707186222076416,
      "learning_rate": 0.00010290515816992337,
      "loss": 2.9902,
      "step": 167772
    },
    {
      "epoch": 2.18,
      "grad_norm": 4.622126579284668,
      "learning_rate": 0.0001029020742906829,
      "loss": 2.9313,
      "step": 167773
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.232362747192383,
      "learning_rate": 0.00010289899044808659,
      "loss": 2.9022,
      "step": 167774
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.143913507461548,
      "learning_rate": 0.00010289590664213518,
      "loss": 2.9507,
      "step": 167775
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6142256259918213,
      "learning_rate": 0.00010289282287282911,
      "loss": 2.9499,
      "step": 167776
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.352186441421509,
      "learning_rate": 0.00010288973914016891,
      "loss": 2.8351,
      "step": 167777
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.6013355255126953,
      "learning_rate": 0.00010288665544415531,
      "loss": 3.0169,
      "step": 167778
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9462249279022217,
      "learning_rate": 0.00010288357178478872,
      "loss": 2.8736,
      "step": 167779
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.836087703704834,
      "learning_rate": 0.00010288048816206988,
      "loss": 3.2139,
      "step": 167780
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.559882402420044,
      "learning_rate": 0.0001028774045759993,
      "loss": 2.9456,
      "step": 167781
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.4420855045318604,
      "learning_rate": 0.00010287432102657753,
      "loss": 2.9549,
      "step": 167782
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7677323818206787,
      "learning_rate": 0.00010287123751380502,
      "loss": 2.846,
      "step": 167783
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4843595027923584,
      "learning_rate": 0.00010286815403768258,
      "loss": 2.8127,
      "step": 167784
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.890191078186035,
      "learning_rate": 0.00010286507059821056,
      "loss": 3.0406,
      "step": 167785
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.3524062633514404,
      "learning_rate": 0.00010286198719538976,
      "loss": 2.89,
      "step": 167786
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.359940767288208,
      "learning_rate": 0.00010285890382922064,
      "loss": 3.1007,
      "step": 167787
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3996288776397705,
      "learning_rate": 0.00010285582049970377,
      "loss": 2.7975,
      "step": 167788
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.221057653427124,
      "learning_rate": 0.00010285273720683961,
      "loss": 2.8112,
      "step": 167789
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.564762592315674,
      "learning_rate": 0.00010284965395062896,
      "loss": 2.7693,
      "step": 167790
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.3427255153656006,
      "learning_rate": 0.0001028465707310722,
      "loss": 2.7799,
      "step": 167791
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.9118988513946533,
      "learning_rate": 0.00010284348754817004,
      "loss": 3.1272,
      "step": 167792
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8958024978637695,
      "learning_rate": 0.00010284040440192294,
      "loss": 2.9867,
      "step": 167793
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4964311122894287,
      "learning_rate": 0.00010283732129233171,
      "loss": 2.9295,
      "step": 167794
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.956761360168457,
      "learning_rate": 0.00010283423821939653,
      "loss": 2.9759,
      "step": 167795
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.638516902923584,
      "learning_rate": 0.00010283115518311831,
      "loss": 2.7656,
      "step": 167796
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.489590883255005,
      "learning_rate": 0.00010282807218349739,
      "loss": 2.9518,
      "step": 167797
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.389446973800659,
      "learning_rate": 0.00010282498922053454,
      "loss": 3.1272,
      "step": 167798
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.1744909286499023,
      "learning_rate": 0.00010282190629423016,
      "loss": 3.0736,
      "step": 167799
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.530575752258301,
      "learning_rate": 0.00010281882340458513,
      "loss": 2.9576,
      "step": 167800
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.8914499282836914,
      "learning_rate": 0.00010281574055159957,
      "loss": 2.8816,
      "step": 167801
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.536989212036133,
      "learning_rate": 0.00010281265773527441,
      "loss": 3.0471,
      "step": 167802
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.7460529804229736,
      "learning_rate": 0.00010280957495560998,
      "loss": 2.7827,
      "step": 167803
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.6942644119262695,
      "learning_rate": 0.00010280649221260708,
      "loss": 3.0816,
      "step": 167804
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.640317440032959,
      "learning_rate": 0.00010280340950626608,
      "loss": 2.8349,
      "step": 167805
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.4173855781555176,
      "learning_rate": 0.00010280032683658786,
      "loss": 2.9738,
      "step": 167806
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.1875553131103516,
      "learning_rate": 0.00010279724420357254,
      "loss": 2.8473,
      "step": 167807
    },
    {
      "epoch": 2.18,
      "grad_norm": 3.015296459197998,
      "learning_rate": 0.00010279416160722106,
      "loss": 2.8861,
      "step": 167808
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7816319465637207,
      "learning_rate": 0.00010279107904753379,
      "loss": 2.9475,
      "step": 167809
    },
    {
      "epoch": 2.18,
      "grad_norm": 2.7109522819519043,
      "learning_rate": 0.00010278799652451145,
      "loss": 2.8101,
      "step": 167810
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.3062243461608887,
      "learning_rate": 0.00010278491403815445,
      "loss": 2.719,
      "step": 167811
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.252185583114624,
      "learning_rate": 0.00010278183158846366,
      "loss": 3.1129,
      "step": 167812
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.205662250518799,
      "learning_rate": 0.00010277874917543927,
      "loss": 2.9857,
      "step": 167813
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.537698984146118,
      "learning_rate": 0.00010277566679908211,
      "loss": 3.0838,
      "step": 167814
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3548951148986816,
      "learning_rate": 0.00010277258445939256,
      "loss": 2.862,
      "step": 167815
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4804651737213135,
      "learning_rate": 0.00010276950215637145,
      "loss": 2.9115,
      "step": 167816
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.805736541748047,
      "learning_rate": 0.00010276641989001909,
      "loss": 2.7056,
      "step": 167817
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6751503944396973,
      "learning_rate": 0.00010276333766033627,
      "loss": 3.125,
      "step": 167818
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.585930585861206,
      "learning_rate": 0.0001027602554673235,
      "loss": 2.6751,
      "step": 167819
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0883684158325195,
      "learning_rate": 0.00010275717331098128,
      "loss": 2.8643,
      "step": 167820
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6117639541625977,
      "learning_rate": 0.00010275409119131016,
      "loss": 2.7573,
      "step": 167821
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4626243114471436,
      "learning_rate": 0.00010275100910831085,
      "loss": 2.8684,
      "step": 167822
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8603029251098633,
      "learning_rate": 0.00010274792706198376,
      "loss": 2.707,
      "step": 167823
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.280635118484497,
      "learning_rate": 0.00010274484505232967,
      "loss": 3.0204,
      "step": 167824
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.889556646347046,
      "learning_rate": 0.00010274176307934901,
      "loss": 3.0334,
      "step": 167825
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.948403835296631,
      "learning_rate": 0.00010273868114304239,
      "loss": 3.104,
      "step": 167826
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.1745965480804443,
      "learning_rate": 0.00010273559924341029,
      "loss": 2.9839,
      "step": 167827
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9449546337127686,
      "learning_rate": 0.00010273251738045343,
      "loss": 2.9018,
      "step": 167828
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.897143840789795,
      "learning_rate": 0.00010272943555417226,
      "loss": 2.7276,
      "step": 167829
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.88218092918396,
      "learning_rate": 0.00010272635376456748,
      "loss": 2.9247,
      "step": 167830
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4156246185302734,
      "learning_rate": 0.0001027232720116396,
      "loss": 2.9216,
      "step": 167831
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.855414867401123,
      "learning_rate": 0.00010272019029538922,
      "loss": 3.0693,
      "step": 167832
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.24631404876709,
      "learning_rate": 0.00010271710861581675,
      "loss": 3.0565,
      "step": 167833
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2973906993865967,
      "learning_rate": 0.00010271402697292299,
      "loss": 3.1662,
      "step": 167834
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.288205146789551,
      "learning_rate": 0.00010271094536670832,
      "loss": 3.1356,
      "step": 167835
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.444539785385132,
      "learning_rate": 0.00010270786379717351,
      "loss": 3.0146,
      "step": 167836
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.118293046951294,
      "learning_rate": 0.00010270478226431905,
      "loss": 2.7733,
      "step": 167837
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.982424736022949,
      "learning_rate": 0.00010270170076814549,
      "loss": 3.0954,
      "step": 167838
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.429187297821045,
      "learning_rate": 0.00010269861930865328,
      "loss": 3.0067,
      "step": 167839
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3336894512176514,
      "learning_rate": 0.00010269553788584324,
      "loss": 2.8926,
      "step": 167840
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3246376514434814,
      "learning_rate": 0.00010269245649971575,
      "loss": 2.9787,
      "step": 167841
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3750405311584473,
      "learning_rate": 0.00010268937515027152,
      "loss": 2.8427,
      "step": 167842
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.314018726348877,
      "learning_rate": 0.00010268629383751107,
      "loss": 2.9711,
      "step": 167843
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.3010072708129883,
      "learning_rate": 0.00010268321256143494,
      "loss": 2.8117,
      "step": 167844
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.017312526702881,
      "learning_rate": 0.00010268013132204368,
      "loss": 3.0632,
      "step": 167845
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4419097900390625,
      "learning_rate": 0.00010267705011933797,
      "loss": 3.045,
      "step": 167846
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7642767429351807,
      "learning_rate": 0.00010267396895331821,
      "loss": 2.9045,
      "step": 167847
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9525346755981445,
      "learning_rate": 0.0001026708878239852,
      "loss": 2.9144,
      "step": 167848
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.341829776763916,
      "learning_rate": 0.00010266780673133942,
      "loss": 2.9769,
      "step": 167849
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.57446026802063,
      "learning_rate": 0.00010266472567538139,
      "loss": 2.6924,
      "step": 167850
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6594510078430176,
      "learning_rate": 0.00010266164465611164,
      "loss": 2.7835,
      "step": 167851
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.48022198677063,
      "learning_rate": 0.00010265856367353088,
      "loss": 3.0582,
      "step": 167852
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.320131778717041,
      "learning_rate": 0.00010265548272763954,
      "loss": 2.9694,
      "step": 167853
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.1779022216796875,
      "learning_rate": 0.00010265240181843838,
      "loss": 3.0123,
      "step": 167854
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0900115966796875,
      "learning_rate": 0.00010264932094592783,
      "loss": 3.1165,
      "step": 167855
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.444295883178711,
      "learning_rate": 0.00010264624011010841,
      "loss": 2.9074,
      "step": 167856
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8205342292785645,
      "learning_rate": 0.00010264315931098091,
      "loss": 2.9502,
      "step": 167857
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4499967098236084,
      "learning_rate": 0.00010264007854854574,
      "loss": 3.0153,
      "step": 167858
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.7651383876800537,
      "learning_rate": 0.0001026369978228034,
      "loss": 2.8608,
      "step": 167859
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.2102744579315186,
      "learning_rate": 0.00010263391713375467,
      "loss": 3.2104,
      "step": 167860
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8392958641052246,
      "learning_rate": 0.00010263083648140005,
      "loss": 2.8663,
      "step": 167861
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2322819232940674,
      "learning_rate": 0.00010262775586573996,
      "loss": 3.1637,
      "step": 167862
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.294132709503174,
      "learning_rate": 0.0001026246752867752,
      "loss": 2.9981,
      "step": 167863
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.4913904666900635,
      "learning_rate": 0.00010262159474450623,
      "loss": 3.0264,
      "step": 167864
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.743511438369751,
      "learning_rate": 0.00010261851423893354,
      "loss": 2.8856,
      "step": 167865
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.878509759902954,
      "learning_rate": 0.00010261543377005788,
      "loss": 3.0182,
      "step": 167866
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.97397518157959,
      "learning_rate": 0.00010261235333787979,
      "loss": 2.9034,
      "step": 167867
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.236269474029541,
      "learning_rate": 0.00010260927294239962,
      "loss": 2.8948,
      "step": 167868
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.1252663135528564,
      "learning_rate": 0.00010260619258361824,
      "loss": 3.0276,
      "step": 167869
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3888654708862305,
      "learning_rate": 0.000102603112261536,
      "loss": 2.9565,
      "step": 167870
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.803595542907715,
      "learning_rate": 0.00010260003197615367,
      "loss": 2.9737,
      "step": 167871
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.39260196685791,
      "learning_rate": 0.00010259695172747173,
      "loss": 2.7381,
      "step": 167872
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.099522829055786,
      "learning_rate": 0.00010259387151549071,
      "loss": 2.8404,
      "step": 167873
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5721542835235596,
      "learning_rate": 0.00010259079134021114,
      "loss": 2.957,
      "step": 167874
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.7529022693634033,
      "learning_rate": 0.00010258771120163374,
      "loss": 2.8568,
      "step": 167875
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.613990306854248,
      "learning_rate": 0.00010258463109975896,
      "loss": 2.9135,
      "step": 167876
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.818598985671997,
      "learning_rate": 0.00010258155103458748,
      "loss": 3.2037,
      "step": 167877
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0211055278778076,
      "learning_rate": 0.00010257847100611975,
      "loss": 3.1389,
      "step": 167878
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9557435512542725,
      "learning_rate": 0.00010257539101435658,
      "loss": 2.97,
      "step": 167879
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3528425693511963,
      "learning_rate": 0.00010257231105929819,
      "loss": 2.9918,
      "step": 167880
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0973947048187256,
      "learning_rate": 0.00010256923114094543,
      "loss": 3.0504,
      "step": 167881
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9507734775543213,
      "learning_rate": 0.00010256615125929867,
      "loss": 2.7826,
      "step": 167882
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7244303226470947,
      "learning_rate": 0.0001025630714143587,
      "loss": 2.9597,
      "step": 167883
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.560091018676758,
      "learning_rate": 0.00010255999160612586,
      "loss": 3.0772,
      "step": 167884
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.5237503051757812,
      "learning_rate": 0.00010255691183460095,
      "loss": 2.8164,
      "step": 167885
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.9289300441741943,
      "learning_rate": 0.00010255383209978445,
      "loss": 2.7894,
      "step": 167886
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4771673679351807,
      "learning_rate": 0.00010255075240167692,
      "loss": 2.9886,
      "step": 167887
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6718087196350098,
      "learning_rate": 0.00010254767274027881,
      "loss": 3.0059,
      "step": 167888
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.138665199279785,
      "learning_rate": 0.00010254459311559096,
      "loss": 3.1517,
      "step": 167889
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.4388012886047363,
      "learning_rate": 0.00010254151352761365,
      "loss": 3.1753,
      "step": 167890
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3698525428771973,
      "learning_rate": 0.00010253843397634774,
      "loss": 2.9291,
      "step": 167891
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.735280752182007,
      "learning_rate": 0.00010253535446179364,
      "loss": 2.9529,
      "step": 167892
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0593955516815186,
      "learning_rate": 0.00010253227498395195,
      "loss": 2.8541,
      "step": 167893
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8073298931121826,
      "learning_rate": 0.00010252919554282311,
      "loss": 2.8787,
      "step": 167894
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.049351930618286,
      "learning_rate": 0.00010252611613840796,
      "loss": 2.8371,
      "step": 167895
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9640426635742188,
      "learning_rate": 0.00010252303677070682,
      "loss": 2.8745,
      "step": 167896
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8679864406585693,
      "learning_rate": 0.0001025199574397205,
      "loss": 3.0215,
      "step": 167897
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.412135124206543,
      "learning_rate": 0.00010251687814544939,
      "loss": 2.7141,
      "step": 167898
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.028649091720581,
      "learning_rate": 0.00010251379888789415,
      "loss": 3.0523,
      "step": 167899
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.582669973373413,
      "learning_rate": 0.00010251071966705524,
      "loss": 2.7122,
      "step": 167900
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.511469602584839,
      "learning_rate": 0.0001025076404829334,
      "loss": 2.882,
      "step": 167901
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.574791669845581,
      "learning_rate": 0.00010250456133552901,
      "loss": 3.0012,
      "step": 167902
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3232498168945312,
      "learning_rate": 0.00010250148222484286,
      "loss": 2.8971,
      "step": 167903
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2127580642700195,
      "learning_rate": 0.0001024984031508754,
      "loss": 2.7414,
      "step": 167904
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5435385704040527,
      "learning_rate": 0.00010249532411362723,
      "loss": 3.2517,
      "step": 167905
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.1669421195983887,
      "learning_rate": 0.0001024922451130988,
      "loss": 3.1609,
      "step": 167906
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3940494060516357,
      "learning_rate": 0.00010248916614929089,
      "loss": 2.9106,
      "step": 167907
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.309343099594116,
      "learning_rate": 0.00010248608722220386,
      "loss": 3.1331,
      "step": 167908
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8239006996154785,
      "learning_rate": 0.00010248300833183853,
      "loss": 2.9478,
      "step": 167909
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.390692710876465,
      "learning_rate": 0.0001024799294781953,
      "loss": 2.9187,
      "step": 167910
    },
    {
      "epoch": 2.19,
      "grad_norm": 5.636324882507324,
      "learning_rate": 0.00010247685066127479,
      "loss": 3.0267,
      "step": 167911
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.63557767868042,
      "learning_rate": 0.00010247377188107747,
      "loss": 3.0039,
      "step": 167912
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9145472049713135,
      "learning_rate": 0.00010247069313760412,
      "loss": 3.1185,
      "step": 167913
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0206029415130615,
      "learning_rate": 0.00010246761443085506,
      "loss": 2.7572,
      "step": 167914
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.320836067199707,
      "learning_rate": 0.00010246453576083113,
      "loss": 2.8827,
      "step": 167915
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.2742228507995605,
      "learning_rate": 0.00010246145712753276,
      "loss": 2.7633,
      "step": 167916
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.589599370956421,
      "learning_rate": 0.00010245837853096052,
      "loss": 3.141,
      "step": 167917
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.074965238571167,
      "learning_rate": 0.00010245529997111492,
      "loss": 2.8048,
      "step": 167918
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7092082500457764,
      "learning_rate": 0.00010245222144799669,
      "loss": 2.8653,
      "step": 167919
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.623063325881958,
      "learning_rate": 0.00010244914296160625,
      "loss": 2.8848,
      "step": 167920
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.723851442337036,
      "learning_rate": 0.00010244606451194433,
      "loss": 3.0575,
      "step": 167921
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4022741317749023,
      "learning_rate": 0.0001024429860990114,
      "loss": 2.7409,
      "step": 167922
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.584568738937378,
      "learning_rate": 0.00010243990772280805,
      "loss": 3.3011,
      "step": 167923
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.597440242767334,
      "learning_rate": 0.00010243682938333475,
      "loss": 3.1401,
      "step": 167924
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.67661714553833,
      "learning_rate": 0.00010243375108059228,
      "loss": 2.7719,
      "step": 167925
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4681193828582764,
      "learning_rate": 0.00010243067281458102,
      "loss": 3.1522,
      "step": 167926
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7612762451171875,
      "learning_rate": 0.0001024275945853017,
      "loss": 3.0102,
      "step": 167927
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.041325092315674,
      "learning_rate": 0.00010242451639275482,
      "loss": 3.1655,
      "step": 167928
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.66747784614563,
      "learning_rate": 0.00010242143823694097,
      "loss": 2.8905,
      "step": 167929
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5979249477386475,
      "learning_rate": 0.00010241836011786059,
      "loss": 2.791,
      "step": 167930
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.51108980178833,
      "learning_rate": 0.00010241528203551448,
      "loss": 2.9802,
      "step": 167931
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7584896087646484,
      "learning_rate": 0.00010241220398990298,
      "loss": 2.9413,
      "step": 167932
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5800702571868896,
      "learning_rate": 0.00010240912598102691,
      "loss": 2.8369,
      "step": 167933
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7560782432556152,
      "learning_rate": 0.00010240604800888671,
      "loss": 2.9141,
      "step": 167934
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8343605995178223,
      "learning_rate": 0.00010240297007348293,
      "loss": 2.968,
      "step": 167935
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.708068370819092,
      "learning_rate": 0.00010239989217481609,
      "loss": 2.9907,
      "step": 167936
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.585463762283325,
      "learning_rate": 0.00010239681431288694,
      "loss": 2.8862,
      "step": 167937
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.527407169342041,
      "learning_rate": 0.00010239373648769581,
      "loss": 2.8806,
      "step": 167938
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.2001538276672363,
      "learning_rate": 0.00010239065869924356,
      "loss": 2.7547,
      "step": 167939
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.351525068283081,
      "learning_rate": 0.0001023875809475306,
      "loss": 2.8657,
      "step": 167940
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6509761810302734,
      "learning_rate": 0.00010238450323255742,
      "loss": 2.8886,
      "step": 167941
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.815683364868164,
      "learning_rate": 0.00010238142555432479,
      "loss": 2.8526,
      "step": 167942
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6104557514190674,
      "learning_rate": 0.0001023783479128332,
      "loss": 2.9725,
      "step": 167943
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4057233333587646,
      "learning_rate": 0.0001023752703080831,
      "loss": 2.9463,
      "step": 167944
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8488364219665527,
      "learning_rate": 0.00010237219274007526,
      "loss": 2.8023,
      "step": 167945
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.480963945388794,
      "learning_rate": 0.00010236911520881017,
      "loss": 2.7412,
      "step": 167946
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5935609340667725,
      "learning_rate": 0.00010236603771428828,
      "loss": 2.7487,
      "step": 167947
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6625795364379883,
      "learning_rate": 0.00010236296025651037,
      "loss": 2.7452,
      "step": 167948
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.83376145362854,
      "learning_rate": 0.00010235988283547692,
      "loss": 3.1524,
      "step": 167949
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.812006950378418,
      "learning_rate": 0.00010235680545118839,
      "loss": 2.9292,
      "step": 167950
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8347041606903076,
      "learning_rate": 0.00010235372810364557,
      "loss": 2.8673,
      "step": 167951
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.032996416091919,
      "learning_rate": 0.00010235065079284886,
      "loss": 3.2339,
      "step": 167952
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.324125051498413,
      "learning_rate": 0.00010234757351879894,
      "loss": 3.147,
      "step": 167953
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.934264898300171,
      "learning_rate": 0.00010234449628149638,
      "loss": 2.8984,
      "step": 167954
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.34308123588562,
      "learning_rate": 0.00010234141908094157,
      "loss": 3.1314,
      "step": 167955
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4956648349761963,
      "learning_rate": 0.00010233834191713533,
      "loss": 2.9384,
      "step": 167956
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.654453754425049,
      "learning_rate": 0.00010233526479007814,
      "loss": 2.8749,
      "step": 167957
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6177589893341064,
      "learning_rate": 0.00010233218769977045,
      "loss": 3.0205,
      "step": 167958
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.406965970993042,
      "learning_rate": 0.00010232911064621303,
      "loss": 2.9509,
      "step": 167959
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6944022178649902,
      "learning_rate": 0.00010232603362940634,
      "loss": 2.8665,
      "step": 167960
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3959708213806152,
      "learning_rate": 0.0001023229566493509,
      "loss": 3.0106,
      "step": 167961
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4823200702667236,
      "learning_rate": 0.00010231987970604745,
      "loss": 2.9822,
      "step": 167962
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.395702600479126,
      "learning_rate": 0.00010231680279949646,
      "loss": 3.1044,
      "step": 167963
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.175610303878784,
      "learning_rate": 0.00010231372592969841,
      "loss": 3.0997,
      "step": 167964
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9490528106689453,
      "learning_rate": 0.00010231064909665411,
      "loss": 2.8776,
      "step": 167965
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4688806533813477,
      "learning_rate": 0.00010230757230036393,
      "loss": 2.9638,
      "step": 167966
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.8821346759796143,
      "learning_rate": 0.00010230449554082845,
      "loss": 2.7581,
      "step": 167967
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.199979305267334,
      "learning_rate": 0.00010230141881804837,
      "loss": 2.7604,
      "step": 167968
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.41483998298645,
      "learning_rate": 0.00010229834213202408,
      "loss": 2.9181,
      "step": 167969
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.569119453430176,
      "learning_rate": 0.00010229526548275639,
      "loss": 2.8035,
      "step": 167970
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0090558528900146,
      "learning_rate": 0.00010229218887024571,
      "loss": 2.9118,
      "step": 167971
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3585798740386963,
      "learning_rate": 0.00010228911229449264,
      "loss": 2.9661,
      "step": 167972
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3115108013153076,
      "learning_rate": 0.00010228603575549769,
      "loss": 2.9088,
      "step": 167973
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5375752449035645,
      "learning_rate": 0.00010228295925326159,
      "loss": 3.0072,
      "step": 167974
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6220250129699707,
      "learning_rate": 0.00010227988278778471,
      "loss": 3.1405,
      "step": 167975
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.444676399230957,
      "learning_rate": 0.00010227680635906784,
      "loss": 2.8595,
      "step": 167976
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.164501428604126,
      "learning_rate": 0.00010227372996711143,
      "loss": 3.1511,
      "step": 167977
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3721189498901367,
      "learning_rate": 0.00010227065361191608,
      "loss": 2.8279,
      "step": 167978
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4776508808135986,
      "learning_rate": 0.00010226757729348225,
      "loss": 2.8148,
      "step": 167979
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.595536231994629,
      "learning_rate": 0.00010226450101181071,
      "loss": 3.0835,
      "step": 167980
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.017487049102783,
      "learning_rate": 0.00010226142476690182,
      "loss": 3.0696,
      "step": 167981
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.500432252883911,
      "learning_rate": 0.00010225834855875637,
      "loss": 3.0707,
      "step": 167982
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5905003547668457,
      "learning_rate": 0.00010225527238737481,
      "loss": 2.8866,
      "step": 167983
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.512702465057373,
      "learning_rate": 0.00010225219625275775,
      "loss": 2.9253,
      "step": 167984
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.419497013092041,
      "learning_rate": 0.00010224912015490564,
      "loss": 2.9803,
      "step": 167985
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7148184776306152,
      "learning_rate": 0.00010224604409381926,
      "loss": 2.9243,
      "step": 167986
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5573880672454834,
      "learning_rate": 0.00010224296806949893,
      "loss": 2.9347,
      "step": 167987
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7400524616241455,
      "learning_rate": 0.0001022398920819455,
      "loss": 2.9773,
      "step": 167988
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.9411418437957764,
      "learning_rate": 0.00010223681613115941,
      "loss": 2.8993,
      "step": 167989
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.2021825313568115,
      "learning_rate": 0.00010223374021714123,
      "loss": 3.1034,
      "step": 167990
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.533162832260132,
      "learning_rate": 0.00010223066433989138,
      "loss": 2.9555,
      "step": 167991
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.354146957397461,
      "learning_rate": 0.00010222758849941074,
      "loss": 3.0364,
      "step": 167992
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3728177547454834,
      "learning_rate": 0.00010222451269569962,
      "loss": 3.0334,
      "step": 167993
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.503190755844116,
      "learning_rate": 0.00010222143692875877,
      "loss": 2.9681,
      "step": 167994
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.4588940143585205,
      "learning_rate": 0.00010221836119858869,
      "loss": 2.7001,
      "step": 167995
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.155442953109741,
      "learning_rate": 0.00010221528550518997,
      "loss": 2.8111,
      "step": 167996
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5100927352905273,
      "learning_rate": 0.00010221220984856306,
      "loss": 2.9593,
      "step": 167997
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6283669471740723,
      "learning_rate": 0.00010220913422870874,
      "loss": 3.1621,
      "step": 167998
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.533511161804199,
      "learning_rate": 0.00010220605864562735,
      "loss": 3.1778,
      "step": 167999
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.636235237121582,
      "learning_rate": 0.00010220298309931969,
      "loss": 2.9778,
      "step": 168000
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.994908094406128,
      "learning_rate": 0.00010219990758978625,
      "loss": 2.791,
      "step": 168001
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4861834049224854,
      "learning_rate": 0.00010219683211702755,
      "loss": 3.0624,
      "step": 168002
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.1019089221954346,
      "learning_rate": 0.0001021937566810441,
      "loss": 2.9486,
      "step": 168003
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.535083293914795,
      "learning_rate": 0.0001021906812818367,
      "loss": 2.7818,
      "step": 168004
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.2801167964935303,
      "learning_rate": 0.00010218760591940566,
      "loss": 2.8989,
      "step": 168005
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4660167694091797,
      "learning_rate": 0.00010218453059375179,
      "loss": 3.0554,
      "step": 168006
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6936683654785156,
      "learning_rate": 0.00010218145530487554,
      "loss": 2.7523,
      "step": 168007
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3302083015441895,
      "learning_rate": 0.00010217838005277747,
      "loss": 2.863,
      "step": 168008
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.209259271621704,
      "learning_rate": 0.00010217530483745811,
      "loss": 2.5819,
      "step": 168009
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3373618125915527,
      "learning_rate": 0.0001021722296589182,
      "loss": 2.8238,
      "step": 168010
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6294198036193848,
      "learning_rate": 0.00010216915451715806,
      "loss": 2.8018,
      "step": 168011
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.69091534614563,
      "learning_rate": 0.00010216607941217853,
      "loss": 2.7013,
      "step": 168012
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5948808193206787,
      "learning_rate": 0.00010216300434398005,
      "loss": 2.8367,
      "step": 168013
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6562728881835938,
      "learning_rate": 0.00010215992931256326,
      "loss": 3.0078,
      "step": 168014
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.50203537940979,
      "learning_rate": 0.00010215685431792851,
      "loss": 2.8573,
      "step": 168015
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0176167488098145,
      "learning_rate": 0.00010215377936007666,
      "loss": 2.9054,
      "step": 168016
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.4249110221862793,
      "learning_rate": 0.00010215070443900805,
      "loss": 3.0593,
      "step": 168017
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5715701580047607,
      "learning_rate": 0.00010214762955472347,
      "loss": 2.9462,
      "step": 168018
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4501407146453857,
      "learning_rate": 0.00010214455470722329,
      "loss": 2.9856,
      "step": 168019
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.18247652053833,
      "learning_rate": 0.00010214147989650834,
      "loss": 2.6969,
      "step": 168020
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.5176360607147217,
      "learning_rate": 0.00010213840512257884,
      "loss": 2.7982,
      "step": 168021
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.218078374862671,
      "learning_rate": 0.00010213533038543564,
      "loss": 3.1622,
      "step": 168022
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.252715587615967,
      "learning_rate": 0.00010213225568507912,
      "loss": 2.829,
      "step": 168023
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.926600694656372,
      "learning_rate": 0.00010212918102151005,
      "loss": 3.1885,
      "step": 168024
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5883264541625977,
      "learning_rate": 0.0001021261063947288,
      "loss": 2.8602,
      "step": 168025
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.077324628829956,
      "learning_rate": 0.00010212303180473624,
      "loss": 2.8245,
      "step": 168026
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.124530792236328,
      "learning_rate": 0.00010211995725153257,
      "loss": 2.803,
      "step": 168027
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6137051582336426,
      "learning_rate": 0.00010211688273511861,
      "loss": 3.0016,
      "step": 168028
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.474522352218628,
      "learning_rate": 0.00010211380825549475,
      "loss": 3.0777,
      "step": 168029
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.225574016571045,
      "learning_rate": 0.00010211073381266177,
      "loss": 2.7898,
      "step": 168030
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.708298683166504,
      "learning_rate": 0.00010210765940662006,
      "loss": 2.8211,
      "step": 168031
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5483150482177734,
      "learning_rate": 0.00010210458503737035,
      "loss": 3.0798,
      "step": 168032
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5377299785614014,
      "learning_rate": 0.00010210151070491314,
      "loss": 3.0365,
      "step": 168033
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.571594476699829,
      "learning_rate": 0.000102098436409249,
      "loss": 2.9744,
      "step": 168034
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2186596393585205,
      "learning_rate": 0.00010209536215037839,
      "loss": 2.9711,
      "step": 168035
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7251603603363037,
      "learning_rate": 0.00010209228792830209,
      "loss": 3.0525,
      "step": 168036
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3099844455718994,
      "learning_rate": 0.00010208921374302048,
      "loss": 3.1773,
      "step": 168037
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.604994535446167,
      "learning_rate": 0.00010208613959453431,
      "loss": 2.7278,
      "step": 168038
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.1891636848449707,
      "learning_rate": 0.00010208306548284405,
      "loss": 2.9648,
      "step": 168039
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9899086952209473,
      "learning_rate": 0.0001020799914079502,
      "loss": 2.9134,
      "step": 168040
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2683632373809814,
      "learning_rate": 0.00010207691736985353,
      "loss": 2.8634,
      "step": 168041
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.001744031906128,
      "learning_rate": 0.00010207384336855446,
      "loss": 3.1654,
      "step": 168042
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7290561199188232,
      "learning_rate": 0.00010207076940405351,
      "loss": 2.8416,
      "step": 168043
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7584993839263916,
      "learning_rate": 0.00010206769547635146,
      "loss": 2.8104,
      "step": 168044
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2779572010040283,
      "learning_rate": 0.00010206462158544875,
      "loss": 2.8474,
      "step": 168045
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.1834170818328857,
      "learning_rate": 0.00010206154773134586,
      "loss": 2.9731,
      "step": 168046
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.1171748638153076,
      "learning_rate": 0.00010205847391404357,
      "loss": 2.7978,
      "step": 168047
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.0924713611602783,
      "learning_rate": 0.00010205540013354235,
      "loss": 2.93,
      "step": 168048
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.109403371810913,
      "learning_rate": 0.00010205232638984268,
      "loss": 3.0533,
      "step": 168049
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4834253787994385,
      "learning_rate": 0.0001020492526829453,
      "loss": 2.8752,
      "step": 168050
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.3179731369018555,
      "learning_rate": 0.00010204617901285069,
      "loss": 2.9729,
      "step": 168051
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3905656337738037,
      "learning_rate": 0.00010204310537955934,
      "loss": 2.7554,
      "step": 168052
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.129521608352661,
      "learning_rate": 0.00010204003178307202,
      "loss": 3.1298,
      "step": 168053
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7495784759521484,
      "learning_rate": 0.00010203695822338909,
      "loss": 3.1555,
      "step": 168054
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6619064807891846,
      "learning_rate": 0.00010203388470051135,
      "loss": 3.0737,
      "step": 168055
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.286996364593506,
      "learning_rate": 0.00010203081121443921,
      "loss": 2.8649,
      "step": 168056
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2188098430633545,
      "learning_rate": 0.00010202773776517331,
      "loss": 2.7935,
      "step": 168057
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.459759473800659,
      "learning_rate": 0.00010202466435271405,
      "loss": 2.9877,
      "step": 168058
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9131288528442383,
      "learning_rate": 0.00010202159097706227,
      "loss": 2.9881,
      "step": 168059
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5141844749450684,
      "learning_rate": 0.00010201851763821833,
      "loss": 2.6504,
      "step": 168060
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.474078893661499,
      "learning_rate": 0.00010201544433618296,
      "loss": 3.0637,
      "step": 168061
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2033112049102783,
      "learning_rate": 0.00010201237107095666,
      "loss": 3.1493,
      "step": 168062
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.752140760421753,
      "learning_rate": 0.00010200929784254001,
      "loss": 3.0582,
      "step": 168063
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4885003566741943,
      "learning_rate": 0.00010200622465093345,
      "loss": 3.1222,
      "step": 168064
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.645421028137207,
      "learning_rate": 0.00010200315149613777,
      "loss": 2.8764,
      "step": 168065
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.505746603012085,
      "learning_rate": 0.00010200007837815335,
      "loss": 3.0094,
      "step": 168066
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9418883323669434,
      "learning_rate": 0.00010199700529698095,
      "loss": 3.1207,
      "step": 168067
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.912363290786743,
      "learning_rate": 0.00010199393225262107,
      "loss": 2.9471,
      "step": 168068
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.929279088973999,
      "learning_rate": 0.00010199085924507424,
      "loss": 2.7373,
      "step": 168069
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6850221157073975,
      "learning_rate": 0.00010198778627434094,
      "loss": 2.8522,
      "step": 168070
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.032589912414551,
      "learning_rate": 0.00010198471334042195,
      "loss": 3.1108,
      "step": 168071
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.1498537063598633,
      "learning_rate": 0.00010198164044331764,
      "loss": 3.0007,
      "step": 168072
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4930548667907715,
      "learning_rate": 0.00010197856758302879,
      "loss": 2.9562,
      "step": 168073
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.297367811203003,
      "learning_rate": 0.00010197549475955585,
      "loss": 3.0375,
      "step": 168074
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8733866214752197,
      "learning_rate": 0.00010197242197289939,
      "loss": 3.124,
      "step": 168075
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4042818546295166,
      "learning_rate": 0.00010196934922305992,
      "loss": 2.997,
      "step": 168076
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.099086046218872,
      "learning_rate": 0.00010196627651003815,
      "loss": 3.0316,
      "step": 168077
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.653244972229004,
      "learning_rate": 0.00010196320383383452,
      "loss": 2.9017,
      "step": 168078
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4550390243530273,
      "learning_rate": 0.00010196013119444976,
      "loss": 2.7626,
      "step": 168079
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6804661750793457,
      "learning_rate": 0.00010195705859188434,
      "loss": 2.9686,
      "step": 168080
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5366313457489014,
      "learning_rate": 0.00010195398602613888,
      "loss": 2.7156,
      "step": 168081
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2666304111480713,
      "learning_rate": 0.0001019509134972138,
      "loss": 3.1028,
      "step": 168082
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5182580947875977,
      "learning_rate": 0.00010194784100510985,
      "loss": 3.0879,
      "step": 168083
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.416811227798462,
      "learning_rate": 0.00010194476854982746,
      "loss": 2.8554,
      "step": 168084
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6488559246063232,
      "learning_rate": 0.00010194169613136735,
      "loss": 2.7326,
      "step": 168085
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4988975524902344,
      "learning_rate": 0.00010193862374972994,
      "loss": 2.9906,
      "step": 168086
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.654099941253662,
      "learning_rate": 0.00010193555140491608,
      "loss": 2.9185,
      "step": 168087
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.1913769245147705,
      "learning_rate": 0.00010193247909692594,
      "loss": 3.2395,
      "step": 168088
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5828962326049805,
      "learning_rate": 0.0001019294068257604,
      "loss": 3.1164,
      "step": 168089
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5294532775878906,
      "learning_rate": 0.0001019263345914198,
      "loss": 3.0149,
      "step": 168090
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.047724723815918,
      "learning_rate": 0.00010192326239390495,
      "loss": 3.0155,
      "step": 168091
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.660277843475342,
      "learning_rate": 0.00010192019023321617,
      "loss": 2.9282,
      "step": 168092
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9017255306243896,
      "learning_rate": 0.0001019171181093544,
      "loss": 2.8245,
      "step": 168093
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.749390602111816,
      "learning_rate": 0.00010191404602231977,
      "loss": 2.9652,
      "step": 168094
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.01908016204834,
      "learning_rate": 0.00010191097397211316,
      "loss": 2.617,
      "step": 168095
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.686837911605835,
      "learning_rate": 0.00010190790195873494,
      "loss": 2.9706,
      "step": 168096
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3484809398651123,
      "learning_rate": 0.00010190482998218586,
      "loss": 2.8696,
      "step": 168097
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.391846179962158,
      "learning_rate": 0.00010190175804246636,
      "loss": 2.8346,
      "step": 168098
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.719186544418335,
      "learning_rate": 0.00010189868613957724,
      "loss": 2.7408,
      "step": 168099
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.972938299179077,
      "learning_rate": 0.00010189561427351867,
      "loss": 3.0007,
      "step": 168100
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.515441417694092,
      "learning_rate": 0.00010189254244429156,
      "loss": 3.0584,
      "step": 168101
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.4270877838134766,
      "learning_rate": 0.00010188947065189624,
      "loss": 2.9804,
      "step": 168102
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8598673343658447,
      "learning_rate": 0.00010188639889633356,
      "loss": 2.9191,
      "step": 168103
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.3977622985839844,
      "learning_rate": 0.00010188332717760379,
      "loss": 3.0542,
      "step": 168104
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.568577289581299,
      "learning_rate": 0.00010188025549570789,
      "loss": 3.2129,
      "step": 168105
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.985699415206909,
      "learning_rate": 0.00010187718385064594,
      "loss": 2.9604,
      "step": 168106
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.929582118988037,
      "learning_rate": 0.00010187411224241891,
      "loss": 2.9705,
      "step": 168107
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.656618356704712,
      "learning_rate": 0.00010187104067102712,
      "loss": 3.0904,
      "step": 168108
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2073893547058105,
      "learning_rate": 0.00010186796913647132,
      "loss": 3.2383,
      "step": 168109
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9325366020202637,
      "learning_rate": 0.00010186489763875192,
      "loss": 2.6694,
      "step": 168110
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.882841110229492,
      "learning_rate": 0.00010186182617786975,
      "loss": 2.9016,
      "step": 168111
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9719836711883545,
      "learning_rate": 0.00010185875475382502,
      "loss": 3.1365,
      "step": 168112
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.656242847442627,
      "learning_rate": 0.00010185568336661859,
      "loss": 2.9051,
      "step": 168113
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.729506015777588,
      "learning_rate": 0.00010185261201625086,
      "loss": 3.0679,
      "step": 168114
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.036063194274902,
      "learning_rate": 0.00010184954070272253,
      "loss": 2.8256,
      "step": 168115
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4903998374938965,
      "learning_rate": 0.00010184646942603401,
      "loss": 3.098,
      "step": 168116
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.1703755855560303,
      "learning_rate": 0.00010184339818618607,
      "loss": 3.0283,
      "step": 168117
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.689568281173706,
      "learning_rate": 0.0001018403269831792,
      "loss": 3.0768,
      "step": 168118
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.938082695007324,
      "learning_rate": 0.00010183725581701396,
      "loss": 2.7686,
      "step": 168119
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.886808156967163,
      "learning_rate": 0.0001018341846876908,
      "loss": 3.0397,
      "step": 168120
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6030943393707275,
      "learning_rate": 0.00010183111359521052,
      "loss": 3.1955,
      "step": 168121
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.839660167694092,
      "learning_rate": 0.00010182804253957345,
      "loss": 2.6432,
      "step": 168122
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.851797580718994,
      "learning_rate": 0.00010182497152078042,
      "loss": 3.0733,
      "step": 168123
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.3566150665283203,
      "learning_rate": 0.00010182190053883185,
      "loss": 3.0161,
      "step": 168124
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.947600841522217,
      "learning_rate": 0.00010181882959372834,
      "loss": 2.8324,
      "step": 168125
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.787102460861206,
      "learning_rate": 0.00010181575868547035,
      "loss": 3.0093,
      "step": 168126
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8115134239196777,
      "learning_rate": 0.00010181268781405862,
      "loss": 2.9141,
      "step": 168127
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5899341106414795,
      "learning_rate": 0.00010180961697949359,
      "loss": 2.9244,
      "step": 168128
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.463735580444336,
      "learning_rate": 0.00010180654618177599,
      "loss": 3.1759,
      "step": 168129
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0339772701263428,
      "learning_rate": 0.00010180347542090627,
      "loss": 2.7232,
      "step": 168130
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3440089225769043,
      "learning_rate": 0.00010180040469688494,
      "loss": 2.8231,
      "step": 168131
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4303860664367676,
      "learning_rate": 0.00010179733400971277,
      "loss": 2.7051,
      "step": 168132
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.100827932357788,
      "learning_rate": 0.0001017942633593902,
      "loss": 3.0723,
      "step": 168133
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4030323028564453,
      "learning_rate": 0.0001017911927459177,
      "loss": 2.9412,
      "step": 168134
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.238765239715576,
      "learning_rate": 0.0001017881221692961,
      "loss": 2.9594,
      "step": 168135
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.671907663345337,
      "learning_rate": 0.0001017850516295258,
      "loss": 2.8299,
      "step": 168136
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.103257417678833,
      "learning_rate": 0.0001017819811266073,
      "loss": 2.7888,
      "step": 168137
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5816924571990967,
      "learning_rate": 0.0001017789106605414,
      "loss": 2.8798,
      "step": 168138
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3428714275360107,
      "learning_rate": 0.00010177584023132842,
      "loss": 2.8595,
      "step": 168139
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5424587726593018,
      "learning_rate": 0.00010177276983896916,
      "loss": 2.7345,
      "step": 168140
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4044721126556396,
      "learning_rate": 0.00010176969948346408,
      "loss": 2.7334,
      "step": 168141
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.473008632659912,
      "learning_rate": 0.00010176662916481377,
      "loss": 2.9198,
      "step": 168142
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.554814338684082,
      "learning_rate": 0.00010176355888301866,
      "loss": 2.8976,
      "step": 168143
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2677509784698486,
      "learning_rate": 0.00010176048863807954,
      "loss": 2.9416,
      "step": 168144
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.460364818572998,
      "learning_rate": 0.00010175741842999681,
      "loss": 2.7695,
      "step": 168145
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4420926570892334,
      "learning_rate": 0.00010175434825877123,
      "loss": 2.9773,
      "step": 168146
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3224594593048096,
      "learning_rate": 0.00010175127812440322,
      "loss": 3.0081,
      "step": 168147
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.532588481903076,
      "learning_rate": 0.0001017482080268934,
      "loss": 3.0948,
      "step": 168148
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.539503335952759,
      "learning_rate": 0.00010174513796624224,
      "loss": 3.2672,
      "step": 168149
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.807657480239868,
      "learning_rate": 0.00010174206794245052,
      "loss": 2.9967,
      "step": 168150
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4995412826538086,
      "learning_rate": 0.00010173899795551859,
      "loss": 2.8878,
      "step": 168151
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4877946376800537,
      "learning_rate": 0.0001017359280054472,
      "loss": 2.9494,
      "step": 168152
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.489473819732666,
      "learning_rate": 0.00010173285809223672,
      "loss": 2.7506,
      "step": 168153
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.545264959335327,
      "learning_rate": 0.0001017297882158881,
      "loss": 3.014,
      "step": 168154
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.81316876411438,
      "learning_rate": 0.00010172671837640142,
      "loss": 2.7725,
      "step": 168155
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8484840393066406,
      "learning_rate": 0.00010172364857377759,
      "loss": 2.8185,
      "step": 168156
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.295294761657715,
      "learning_rate": 0.000101720578808017,
      "loss": 2.7834,
      "step": 168157
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.827047109603882,
      "learning_rate": 0.00010171750907912039,
      "loss": 2.9232,
      "step": 168158
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.823499917984009,
      "learning_rate": 0.00010171443938708812,
      "loss": 3.0311,
      "step": 168159
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.115168809890747,
      "learning_rate": 0.00010171136973192107,
      "loss": 3.0004,
      "step": 168160
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.032478094100952,
      "learning_rate": 0.00010170830011361942,
      "loss": 2.8888,
      "step": 168161
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.308889627456665,
      "learning_rate": 0.00010170523053218406,
      "loss": 2.9352,
      "step": 168162
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6578006744384766,
      "learning_rate": 0.00010170216098761534,
      "loss": 2.9557,
      "step": 168163
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.520447015762329,
      "learning_rate": 0.00010169909147991405,
      "loss": 2.8378,
      "step": 168164
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.589002847671509,
      "learning_rate": 0.00010169602200908051,
      "loss": 2.9048,
      "step": 168165
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5430123805999756,
      "learning_rate": 0.00010169295257511562,
      "loss": 3.0938,
      "step": 168166
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.6867823600769043,
      "learning_rate": 0.00010168988317801956,
      "loss": 2.8796,
      "step": 168167
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.7167301177978516,
      "learning_rate": 0.00010168681381779321,
      "loss": 2.9709,
      "step": 168168
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.2717320919036865,
      "learning_rate": 0.0001016837444944369,
      "loss": 2.8184,
      "step": 168169
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4258103370666504,
      "learning_rate": 0.00010168067520795147,
      "loss": 3.1127,
      "step": 168170
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7592761516571045,
      "learning_rate": 0.0001016776059583372,
      "loss": 2.8021,
      "step": 168171
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5388240814208984,
      "learning_rate": 0.00010167453674559501,
      "loss": 3.0881,
      "step": 168172
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.601630449295044,
      "learning_rate": 0.0001016714675697251,
      "loss": 2.9232,
      "step": 168173
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.446751356124878,
      "learning_rate": 0.00010166839843072829,
      "loss": 2.9494,
      "step": 168174
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7627294063568115,
      "learning_rate": 0.00010166532932860498,
      "loss": 3.1809,
      "step": 168175
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2964744567871094,
      "learning_rate": 0.0001016622602633559,
      "loss": 3.0587,
      "step": 168176
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7116479873657227,
      "learning_rate": 0.00010165919123498146,
      "loss": 2.7504,
      "step": 168177
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.605644702911377,
      "learning_rate": 0.00010165612224348253,
      "loss": 2.7139,
      "step": 168178
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6461777687072754,
      "learning_rate": 0.00010165305328885927,
      "loss": 3.0134,
      "step": 168179
    },
    {
      "epoch": 2.19,
      "grad_norm": 7.089250564575195,
      "learning_rate": 0.00010164998437111254,
      "loss": 3.0006,
      "step": 168180
    },
    {
      "epoch": 2.19,
      "grad_norm": 5.341267108917236,
      "learning_rate": 0.00010164691549024275,
      "loss": 2.8212,
      "step": 168181
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.352795362472534,
      "learning_rate": 0.00010164384664625061,
      "loss": 3.0427,
      "step": 168182
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9177744388580322,
      "learning_rate": 0.00010164077783913652,
      "loss": 2.7102,
      "step": 168183
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.5030174255371094,
      "learning_rate": 0.0001016377090689014,
      "loss": 2.78,
      "step": 168184
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.204455852508545,
      "learning_rate": 0.00010163464033554534,
      "loss": 2.9293,
      "step": 168185
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9031178951263428,
      "learning_rate": 0.00010163157163906924,
      "loss": 3.1706,
      "step": 168186
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.35587215423584,
      "learning_rate": 0.00010162850297947348,
      "loss": 3.1091,
      "step": 168187
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.247614860534668,
      "learning_rate": 0.00010162543435675882,
      "loss": 2.8966,
      "step": 168188
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.522637367248535,
      "learning_rate": 0.00010162236577092566,
      "loss": 2.9599,
      "step": 168189
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5041592121124268,
      "learning_rate": 0.00010161929722197482,
      "loss": 3.0715,
      "step": 168190
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3001701831817627,
      "learning_rate": 0.00010161622870990651,
      "loss": 3.1186,
      "step": 168191
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.831860303878784,
      "learning_rate": 0.0001016131602347216,
      "loss": 3.0006,
      "step": 168192
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.6459193229675293,
      "learning_rate": 0.00010161009179642042,
      "loss": 2.6655,
      "step": 168193
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4959518909454346,
      "learning_rate": 0.00010160702339500381,
      "loss": 2.9948,
      "step": 168194
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3050873279571533,
      "learning_rate": 0.00010160395503047205,
      "loss": 2.9012,
      "step": 168195
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9638452529907227,
      "learning_rate": 0.00010160088670282609,
      "loss": 3.0694,
      "step": 168196
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.135434627532959,
      "learning_rate": 0.00010159781841206606,
      "loss": 3.1814,
      "step": 168197
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.717059850692749,
      "learning_rate": 0.00010159475015819285,
      "loss": 2.8119,
      "step": 168198
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5636730194091797,
      "learning_rate": 0.00010159168194120684,
      "loss": 2.8006,
      "step": 168199
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.380889892578125,
      "learning_rate": 0.00010158861376110875,
      "loss": 2.9001,
      "step": 168200
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.379303455352783,
      "learning_rate": 0.00010158554561789899,
      "loss": 2.8636,
      "step": 168201
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0808839797973633,
      "learning_rate": 0.00010158247751157832,
      "loss": 2.9708,
      "step": 168202
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.701862335205078,
      "learning_rate": 0.0001015794094421472,
      "loss": 2.9594,
      "step": 168203
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5050904750823975,
      "learning_rate": 0.00010157634140960625,
      "loss": 2.9091,
      "step": 168204
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9208006858825684,
      "learning_rate": 0.00010157327341395586,
      "loss": 3.0049,
      "step": 168205
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.728132486343384,
      "learning_rate": 0.00010157020545519689,
      "loss": 3.0346,
      "step": 168206
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4860024452209473,
      "learning_rate": 0.00010156713753332964,
      "loss": 3.0956,
      "step": 168207
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.537228584289551,
      "learning_rate": 0.0001015640696483549,
      "loss": 3.014,
      "step": 168208
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3775930404663086,
      "learning_rate": 0.00010156100180027313,
      "loss": 2.6886,
      "step": 168209
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.007610321044922,
      "learning_rate": 0.00010155793398908496,
      "loss": 2.8303,
      "step": 168210
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.858441114425659,
      "learning_rate": 0.00010155486621479078,
      "loss": 2.835,
      "step": 168211
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2646076679229736,
      "learning_rate": 0.00010155179847739141,
      "loss": 3.0142,
      "step": 168212
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.2130398750305176,
      "learning_rate": 0.00010154873077688722,
      "loss": 2.9527,
      "step": 168213
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.3828275203704834,
      "learning_rate": 0.00010154566311327896,
      "loss": 2.8619,
      "step": 168214
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.01936674118042,
      "learning_rate": 0.00010154259548656706,
      "loss": 2.8928,
      "step": 168215
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.148003578186035,
      "learning_rate": 0.0001015395278967521,
      "loss": 2.9435,
      "step": 168216
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.301382541656494,
      "learning_rate": 0.00010153646034383476,
      "loss": 3.0987,
      "step": 168217
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.568899154663086,
      "learning_rate": 0.00010153339282781557,
      "loss": 2.7471,
      "step": 168218
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8766448497772217,
      "learning_rate": 0.00010153032534869492,
      "loss": 2.938,
      "step": 168219
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.534203052520752,
      "learning_rate": 0.00010152725790647366,
      "loss": 2.8746,
      "step": 168220
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.4538493156433105,
      "learning_rate": 0.00010152419050115222,
      "loss": 2.9382,
      "step": 168221
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7410471439361572,
      "learning_rate": 0.00010152112313273108,
      "loss": 3.0814,
      "step": 168222
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.955592393875122,
      "learning_rate": 0.00010151805580121101,
      "loss": 3.0565,
      "step": 168223
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0522093772888184,
      "learning_rate": 0.00010151498850659238,
      "loss": 2.8662,
      "step": 168224
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.409003496170044,
      "learning_rate": 0.00010151192124887596,
      "loss": 2.9948,
      "step": 168225
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5169646739959717,
      "learning_rate": 0.00010150885402806222,
      "loss": 2.8801,
      "step": 168226
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8160481452941895,
      "learning_rate": 0.00010150578684415174,
      "loss": 2.9818,
      "step": 168227
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.70154070854187,
      "learning_rate": 0.00010150271969714496,
      "loss": 3.0849,
      "step": 168228
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3863818645477295,
      "learning_rate": 0.0001014996525870427,
      "loss": 2.9976,
      "step": 168229
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2200963497161865,
      "learning_rate": 0.00010149658551384529,
      "loss": 2.9069,
      "step": 168230
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.683236598968506,
      "learning_rate": 0.00010149351847755348,
      "loss": 3.0172,
      "step": 168231
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.644915819168091,
      "learning_rate": 0.00010149045147816782,
      "loss": 2.9685,
      "step": 168232
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4463400840759277,
      "learning_rate": 0.0001014873845156888,
      "loss": 3.0575,
      "step": 168233
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.320470094680786,
      "learning_rate": 0.00010148431759011693,
      "loss": 3.0453,
      "step": 168234
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.1639645099639893,
      "learning_rate": 0.00010148125070145298,
      "loss": 2.978,
      "step": 168235
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.35673189163208,
      "learning_rate": 0.00010147818384969733,
      "loss": 2.8455,
      "step": 168236
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.2480549812316895,
      "learning_rate": 0.00010147511703485068,
      "loss": 2.9035,
      "step": 168237
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6164727210998535,
      "learning_rate": 0.00010147205025691348,
      "loss": 2.6815,
      "step": 168238
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.8685739040374756,
      "learning_rate": 0.0001014689835158866,
      "loss": 3.1998,
      "step": 168239
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6932754516601562,
      "learning_rate": 0.00010146591681177017,
      "loss": 2.8981,
      "step": 168240
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.868821620941162,
      "learning_rate": 0.00010146285014456507,
      "loss": 2.8466,
      "step": 168241
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.689929246902466,
      "learning_rate": 0.00010145978351427167,
      "loss": 3.0558,
      "step": 168242
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.446038246154785,
      "learning_rate": 0.00010145671692089073,
      "loss": 2.9819,
      "step": 168243
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.312282085418701,
      "learning_rate": 0.00010145365036442264,
      "loss": 3.1507,
      "step": 168244
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5508530139923096,
      "learning_rate": 0.0001014505838448683,
      "loss": 2.7561,
      "step": 168245
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.5557169914245605,
      "learning_rate": 0.00010144751736222781,
      "loss": 2.9835,
      "step": 168246
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.1383190155029297,
      "learning_rate": 0.00010144445091650207,
      "loss": 2.9897,
      "step": 168247
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2465994358062744,
      "learning_rate": 0.00010144138450769147,
      "loss": 3.1444,
      "step": 168248
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.170499563217163,
      "learning_rate": 0.00010143831813579677,
      "loss": 2.9249,
      "step": 168249
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.527848720550537,
      "learning_rate": 0.00010143525180081831,
      "loss": 3.1367,
      "step": 168250
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.645162343978882,
      "learning_rate": 0.00010143218550275701,
      "loss": 2.8722,
      "step": 168251
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.78302001953125,
      "learning_rate": 0.00010142911924161298,
      "loss": 3.227,
      "step": 168252
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6941161155700684,
      "learning_rate": 0.00010142605301738717,
      "loss": 2.7959,
      "step": 168253
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2991909980773926,
      "learning_rate": 0.00010142298683007988,
      "loss": 3.0183,
      "step": 168254
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.823798418045044,
      "learning_rate": 0.00010141992067969192,
      "loss": 3.044,
      "step": 168255
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.1472485065460205,
      "learning_rate": 0.00010141685456622363,
      "loss": 2.884,
      "step": 168256
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8035523891448975,
      "learning_rate": 0.00010141378848967592,
      "loss": 2.9652,
      "step": 168257
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4263837337493896,
      "learning_rate": 0.0001014107224500489,
      "loss": 3.1695,
      "step": 168258
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8080310821533203,
      "learning_rate": 0.0001014076564473435,
      "loss": 2.9344,
      "step": 168259
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.506704807281494,
      "learning_rate": 0.00010140459048156006,
      "loss": 2.8871,
      "step": 168260
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.637687921524048,
      "learning_rate": 0.00010140152455269933,
      "loss": 2.9686,
      "step": 168261
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.580564498901367,
      "learning_rate": 0.00010139845866076174,
      "loss": 3.0371,
      "step": 168262
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.251523494720459,
      "learning_rate": 0.00010139539280574811,
      "loss": 3.2515,
      "step": 168263
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3037843704223633,
      "learning_rate": 0.00010139232698765865,
      "loss": 2.9854,
      "step": 168264
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.376987934112549,
      "learning_rate": 0.00010138926120649418,
      "loss": 2.8746,
      "step": 168265
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.4050493240356445,
      "learning_rate": 0.0001013861954622551,
      "loss": 2.8356,
      "step": 168266
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.955348253250122,
      "learning_rate": 0.00010138312975494216,
      "loss": 2.919,
      "step": 168267
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.539292573928833,
      "learning_rate": 0.00010138006408455577,
      "loss": 2.8897,
      "step": 168268
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.14078426361084,
      "learning_rate": 0.00010137699845109676,
      "loss": 2.9816,
      "step": 168269
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.45271635055542,
      "learning_rate": 0.00010137393285456533,
      "loss": 2.818,
      "step": 168270
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.251500368118286,
      "learning_rate": 0.00010137086729496233,
      "loss": 3.148,
      "step": 168271
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.1625850200653076,
      "learning_rate": 0.00010136780177228812,
      "loss": 2.9554,
      "step": 168272
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0295162200927734,
      "learning_rate": 0.00010136473628654351,
      "loss": 2.8949,
      "step": 168273
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.1191229820251465,
      "learning_rate": 0.00010136167083772883,
      "loss": 2.8271,
      "step": 168274
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7701098918914795,
      "learning_rate": 0.00010135860542584497,
      "loss": 2.8767,
      "step": 168275
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9322073459625244,
      "learning_rate": 0.00010135554005089209,
      "loss": 2.9841,
      "step": 168276
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4706974029541016,
      "learning_rate": 0.00010135247471287105,
      "loss": 2.9773,
      "step": 168277
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8879120349884033,
      "learning_rate": 0.00010134940941178226,
      "loss": 2.6555,
      "step": 168278
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5414342880249023,
      "learning_rate": 0.00010134634414762645,
      "loss": 2.8383,
      "step": 168279
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3968563079833984,
      "learning_rate": 0.00010134327892040401,
      "loss": 2.8981,
      "step": 168280
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.3746120929718018,
      "learning_rate": 0.00010134021373011579,
      "loss": 2.8336,
      "step": 168281
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.729937791824341,
      "learning_rate": 0.000101337148576762,
      "loss": 3.0889,
      "step": 168282
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.2448315620422363,
      "learning_rate": 0.00010133408346034348,
      "loss": 3.206,
      "step": 168283
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0433552265167236,
      "learning_rate": 0.00010133101838086059,
      "loss": 2.9207,
      "step": 168284
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.185478687286377,
      "learning_rate": 0.00010132795333831412,
      "loss": 2.9228,
      "step": 168285
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0478155612945557,
      "learning_rate": 0.00010132488833270444,
      "loss": 2.9801,
      "step": 168286
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.439650535583496,
      "learning_rate": 0.00010132182336403232,
      "loss": 2.8253,
      "step": 168287
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.2784950733184814,
      "learning_rate": 0.00010131875843229821,
      "loss": 2.7572,
      "step": 168288
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7546334266662598,
      "learning_rate": 0.00010131569353750268,
      "loss": 2.9,
      "step": 168289
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.3892557621002197,
      "learning_rate": 0.00010131262867964625,
      "loss": 3.0686,
      "step": 168290
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4821712970733643,
      "learning_rate": 0.00010130956385872962,
      "loss": 2.9189,
      "step": 168291
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3288726806640625,
      "learning_rate": 0.00010130649907475319,
      "loss": 2.9792,
      "step": 168292
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.1987130641937256,
      "learning_rate": 0.00010130343432771777,
      "loss": 2.7781,
      "step": 168293
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.066303014755249,
      "learning_rate": 0.00010130036961762376,
      "loss": 2.958,
      "step": 168294
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0122528076171875,
      "learning_rate": 0.00010129730494447178,
      "loss": 2.9039,
      "step": 168295
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6706297397613525,
      "learning_rate": 0.00010129424030826224,
      "loss": 3.0833,
      "step": 168296
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.757630348205566,
      "learning_rate": 0.00010129117570899598,
      "loss": 2.9111,
      "step": 168297
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.382718324661255,
      "learning_rate": 0.00010128811114667338,
      "loss": 3.0306,
      "step": 168298
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.1849400997161865,
      "learning_rate": 0.00010128504662129511,
      "loss": 2.7727,
      "step": 168299
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.379565715789795,
      "learning_rate": 0.00010128198213286174,
      "loss": 3.004,
      "step": 168300
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2938547134399414,
      "learning_rate": 0.00010127891768137367,
      "loss": 2.9051,
      "step": 168301
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.495265007019043,
      "learning_rate": 0.00010127585326683172,
      "loss": 2.8857,
      "step": 168302
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0496103763580322,
      "learning_rate": 0.00010127278888923634,
      "loss": 2.8179,
      "step": 168303
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.785290241241455,
      "learning_rate": 0.00010126972454858802,
      "loss": 2.8129,
      "step": 168304
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.244267463684082,
      "learning_rate": 0.00010126666024488747,
      "loss": 2.9864,
      "step": 168305
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8393537998199463,
      "learning_rate": 0.00010126359597813522,
      "loss": 3.0126,
      "step": 168306
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2291388511657715,
      "learning_rate": 0.00010126053174833173,
      "loss": 3.0496,
      "step": 168307
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7750070095062256,
      "learning_rate": 0.00010125746755547774,
      "loss": 3.014,
      "step": 168308
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.9288330078125,
      "learning_rate": 0.00010125440339957373,
      "loss": 2.9905,
      "step": 168309
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2525899410247803,
      "learning_rate": 0.0001012513392806202,
      "loss": 2.9409,
      "step": 168310
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.282344102859497,
      "learning_rate": 0.00010124827519861789,
      "loss": 3.0148,
      "step": 168311
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.694138526916504,
      "learning_rate": 0.00010124521115356726,
      "loss": 3.1645,
      "step": 168312
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.856391429901123,
      "learning_rate": 0.00010124214714546883,
      "loss": 2.9686,
      "step": 168313
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.781757354736328,
      "learning_rate": 0.00010123908317432332,
      "loss": 3.1329,
      "step": 168314
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.52486252784729,
      "learning_rate": 0.0001012360192401311,
      "loss": 2.9444,
      "step": 168315
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.394968032836914,
      "learning_rate": 0.00010123295534289298,
      "loss": 2.8525,
      "step": 168316
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.799476623535156,
      "learning_rate": 0.00010122989148260939,
      "loss": 2.7097,
      "step": 168317
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2969231605529785,
      "learning_rate": 0.00010122682765928092,
      "loss": 2.9418,
      "step": 168318
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.4384355545043945,
      "learning_rate": 0.00010122376387290805,
      "loss": 2.8386,
      "step": 168319
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5018556118011475,
      "learning_rate": 0.00010122070012349151,
      "loss": 3.0411,
      "step": 168320
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7517788410186768,
      "learning_rate": 0.00010121763641103172,
      "loss": 2.8926,
      "step": 168321
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.282360076904297,
      "learning_rate": 0.0001012145727355294,
      "loss": 3.1179,
      "step": 168322
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.7946505546569824,
      "learning_rate": 0.00010121150909698495,
      "loss": 3.0058,
      "step": 168323
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4906606674194336,
      "learning_rate": 0.00010120844549539922,
      "loss": 2.6688,
      "step": 168324
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.549210548400879,
      "learning_rate": 0.00010120538193077244,
      "loss": 2.9154,
      "step": 168325
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7389307022094727,
      "learning_rate": 0.00010120231840310539,
      "loss": 2.7886,
      "step": 168326
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.464308023452759,
      "learning_rate": 0.00010119925491239849,
      "loss": 2.9309,
      "step": 168327
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.643510580062866,
      "learning_rate": 0.0001011961914586525,
      "loss": 3.0107,
      "step": 168328
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8660967350006104,
      "learning_rate": 0.0001011931280418678,
      "loss": 2.9992,
      "step": 168329
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2227258682250977,
      "learning_rate": 0.00010119006466204526,
      "loss": 2.8801,
      "step": 168330
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.42680025100708,
      "learning_rate": 0.00010118700131918501,
      "loss": 3.0393,
      "step": 168331
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8398265838623047,
      "learning_rate": 0.00010118393801328797,
      "loss": 2.8444,
      "step": 168332
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.279057264328003,
      "learning_rate": 0.00010118087474435446,
      "loss": 2.9524,
      "step": 168333
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7727386951446533,
      "learning_rate": 0.00010117781151238531,
      "loss": 2.9294,
      "step": 168334
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.1128151416778564,
      "learning_rate": 0.00010117474831738086,
      "loss": 2.9039,
      "step": 168335
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4120123386383057,
      "learning_rate": 0.00010117168515934194,
      "loss": 3.0328,
      "step": 168336
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.114206314086914,
      "learning_rate": 0.00010116862203826879,
      "loss": 3.0253,
      "step": 168337
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7017507553100586,
      "learning_rate": 0.00010116555895416226,
      "loss": 2.9122,
      "step": 168338
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8103322982788086,
      "learning_rate": 0.00010116249590702267,
      "loss": 2.9727,
      "step": 168339
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.1256117820739746,
      "learning_rate": 0.00010115943289685084,
      "loss": 2.8104,
      "step": 168340
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6172091960906982,
      "learning_rate": 0.00010115636992364713,
      "loss": 3.1048,
      "step": 168341
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.501005172729492,
      "learning_rate": 0.00010115330698741239,
      "loss": 2.7635,
      "step": 168342
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.806121826171875,
      "learning_rate": 0.00010115024408814684,
      "loss": 2.9027,
      "step": 168343
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8427515029907227,
      "learning_rate": 0.00010114718122585129,
      "loss": 2.786,
      "step": 168344
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8880865573883057,
      "learning_rate": 0.00010114411840052612,
      "loss": 2.7448,
      "step": 168345
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8414411544799805,
      "learning_rate": 0.00010114105561217212,
      "loss": 2.7941,
      "step": 168346
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.712351083755493,
      "learning_rate": 0.00010113799286078964,
      "loss": 2.9139,
      "step": 168347
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.736130475997925,
      "learning_rate": 0.0001011349301463796,
      "loss": 2.8567,
      "step": 168348
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.090782642364502,
      "learning_rate": 0.00010113186746894206,
      "loss": 3.0413,
      "step": 168349
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.611992359161377,
      "learning_rate": 0.00010112880482847803,
      "loss": 2.5953,
      "step": 168350
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6720898151397705,
      "learning_rate": 0.00010112574222498779,
      "loss": 2.9658,
      "step": 168351
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.198563814163208,
      "learning_rate": 0.00010112267965847209,
      "loss": 2.6485,
      "step": 168352
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.252742052078247,
      "learning_rate": 0.00010111961712893139,
      "loss": 2.9471,
      "step": 168353
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.6644067764282227,
      "learning_rate": 0.00010111655463636639,
      "loss": 2.8536,
      "step": 168354
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.5766854286193848,
      "learning_rate": 0.00010111349218077757,
      "loss": 3.0892,
      "step": 168355
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8900604248046875,
      "learning_rate": 0.0001011104297621655,
      "loss": 3.071,
      "step": 168356
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.272444009780884,
      "learning_rate": 0.00010110736738053065,
      "loss": 2.8322,
      "step": 168357
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.2294440269470215,
      "learning_rate": 0.00010110430503587384,
      "loss": 3.1537,
      "step": 168358
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.3179361820220947,
      "learning_rate": 0.00010110124272819537,
      "loss": 3.1601,
      "step": 168359
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3147380352020264,
      "learning_rate": 0.00010109818045749602,
      "loss": 3.0828,
      "step": 168360
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8917222023010254,
      "learning_rate": 0.00010109511822377629,
      "loss": 2.7017,
      "step": 168361
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.299370288848877,
      "learning_rate": 0.0001010920560270367,
      "loss": 2.9025,
      "step": 168362
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.258323907852173,
      "learning_rate": 0.00010108899386727777,
      "loss": 2.9713,
      "step": 168363
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.138437271118164,
      "learning_rate": 0.00010108593174450027,
      "loss": 3.0195,
      "step": 168364
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.833723545074463,
      "learning_rate": 0.00010108286965870451,
      "loss": 3.0723,
      "step": 168365
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.0002923011779785,
      "learning_rate": 0.00010107980760989134,
      "loss": 2.6922,
      "step": 168366
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.649570941925049,
      "learning_rate": 0.00010107674559806118,
      "loss": 2.8946,
      "step": 168367
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.268737554550171,
      "learning_rate": 0.00010107368362321461,
      "loss": 2.9933,
      "step": 168368
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.2786073684692383,
      "learning_rate": 0.00010107062168535208,
      "loss": 2.9112,
      "step": 168369
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.5136213302612305,
      "learning_rate": 0.00010106755978447438,
      "loss": 3.1786,
      "step": 168370
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6830391883850098,
      "learning_rate": 0.00010106449792058187,
      "loss": 3.1849,
      "step": 168371
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3739023208618164,
      "learning_rate": 0.00010106143609367533,
      "loss": 3.0605,
      "step": 168372
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.541731357574463,
      "learning_rate": 0.00010105837430375523,
      "loss": 2.9774,
      "step": 168373
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8504178524017334,
      "learning_rate": 0.00010105531255082214,
      "loss": 3.031,
      "step": 168374
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.2059686183929443,
      "learning_rate": 0.0001010522508348765,
      "loss": 3.0319,
      "step": 168375
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9666078090667725,
      "learning_rate": 0.00010104918915591915,
      "loss": 2.9247,
      "step": 168376
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7107603549957275,
      "learning_rate": 0.00010104612751395036,
      "loss": 2.998,
      "step": 168377
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.217829942703247,
      "learning_rate": 0.00010104306590897099,
      "loss": 2.8231,
      "step": 168378
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4454634189605713,
      "learning_rate": 0.00010104000434098143,
      "loss": 2.8231,
      "step": 168379
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.080094575881958,
      "learning_rate": 0.0001010369428099823,
      "loss": 2.8721,
      "step": 168380
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.104259729385376,
      "learning_rate": 0.00010103388131597408,
      "loss": 2.903,
      "step": 168381
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7108585834503174,
      "learning_rate": 0.00010103081985895751,
      "loss": 3.2108,
      "step": 168382
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.874258995056152,
      "learning_rate": 0.00010102775843893296,
      "loss": 3.0217,
      "step": 168383
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.196876287460327,
      "learning_rate": 0.00010102469705590119,
      "loss": 3.0251,
      "step": 168384
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6169357299804688,
      "learning_rate": 0.00010102163570986271,
      "loss": 3.0783,
      "step": 168385
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.819275856018066,
      "learning_rate": 0.00010101857440081795,
      "loss": 2.7221,
      "step": 168386
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0015289783477783,
      "learning_rate": 0.0001010155131287677,
      "loss": 2.8597,
      "step": 168387
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6047067642211914,
      "learning_rate": 0.00010101245189371242,
      "loss": 2.9548,
      "step": 168388
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7856345176696777,
      "learning_rate": 0.00010100939069565258,
      "loss": 2.9563,
      "step": 168389
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.1063120365142822,
      "learning_rate": 0.00010100632953458895,
      "loss": 3.0664,
      "step": 168390
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3442790508270264,
      "learning_rate": 0.000101003268410522,
      "loss": 3.228,
      "step": 168391
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.987903594970703,
      "learning_rate": 0.0001010002073234522,
      "loss": 3.0807,
      "step": 168392
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3888213634490967,
      "learning_rate": 0.00010099714627338031,
      "loss": 3.2108,
      "step": 168393
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5999395847320557,
      "learning_rate": 0.00010099408526030683,
      "loss": 2.8376,
      "step": 168394
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.149186134338379,
      "learning_rate": 0.00010099102428423219,
      "loss": 2.9389,
      "step": 168395
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.669668436050415,
      "learning_rate": 0.00010098796334515716,
      "loss": 2.7707,
      "step": 168396
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9532461166381836,
      "learning_rate": 0.00010098490244308225,
      "loss": 2.7888,
      "step": 168397
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9400415420532227,
      "learning_rate": 0.00010098184157800789,
      "loss": 2.951,
      "step": 168398
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.146528720855713,
      "learning_rate": 0.00010097878074993484,
      "loss": 2.9705,
      "step": 168399
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.9338743686676025,
      "learning_rate": 0.00010097571995886355,
      "loss": 2.878,
      "step": 168400
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2526187896728516,
      "learning_rate": 0.0001009726592047947,
      "loss": 2.9836,
      "step": 168401
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.679487705230713,
      "learning_rate": 0.00010096959848772876,
      "loss": 2.8595,
      "step": 168402
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8012607097625732,
      "learning_rate": 0.00010096653780766637,
      "loss": 2.7763,
      "step": 168403
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.607963800430298,
      "learning_rate": 0.00010096347716460794,
      "loss": 2.9522,
      "step": 168404
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.578125238418579,
      "learning_rate": 0.00010096041655855427,
      "loss": 3.0344,
      "step": 168405
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.1874494552612305,
      "learning_rate": 0.00010095735598950571,
      "loss": 3.0442,
      "step": 168406
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3303639888763428,
      "learning_rate": 0.00010095429545746302,
      "loss": 3.0932,
      "step": 168407
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4815733432769775,
      "learning_rate": 0.00010095123496242669,
      "loss": 2.7301,
      "step": 168408
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.008516550064087,
      "learning_rate": 0.00010094817450439728,
      "loss": 2.8231,
      "step": 168409
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.058340549468994,
      "learning_rate": 0.00010094511408337528,
      "loss": 3.0259,
      "step": 168410
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.435932159423828,
      "learning_rate": 0.00010094205369936144,
      "loss": 3.0644,
      "step": 168411
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.911367177963257,
      "learning_rate": 0.00010093899335235613,
      "loss": 3.2209,
      "step": 168412
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.771332025527954,
      "learning_rate": 0.00010093593304236011,
      "loss": 2.863,
      "step": 168413
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6490089893341064,
      "learning_rate": 0.00010093287276937376,
      "loss": 2.6799,
      "step": 168414
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.4668617248535156,
      "learning_rate": 0.00010092981253339793,
      "loss": 3.1752,
      "step": 168415
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3118181228637695,
      "learning_rate": 0.00010092675233443279,
      "loss": 2.8983,
      "step": 168416
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3824992179870605,
      "learning_rate": 0.00010092369217247925,
      "loss": 2.8112,
      "step": 168417
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.824035167694092,
      "learning_rate": 0.00010092063204753767,
      "loss": 2.9715,
      "step": 168418
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.668529748916626,
      "learning_rate": 0.0001009175719596088,
      "loss": 3.0166,
      "step": 168419
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.799560308456421,
      "learning_rate": 0.000100914511908693,
      "loss": 2.9566,
      "step": 168420
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.703521966934204,
      "learning_rate": 0.00010091145189479103,
      "loss": 3.1643,
      "step": 168421
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6578104496002197,
      "learning_rate": 0.00010090839191790342,
      "loss": 2.902,
      "step": 168422
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.795274019241333,
      "learning_rate": 0.00010090533197803067,
      "loss": 2.8286,
      "step": 168423
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.200640916824341,
      "learning_rate": 0.00010090227207517327,
      "loss": 3.112,
      "step": 168424
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.094170331954956,
      "learning_rate": 0.00010089921220933204,
      "loss": 2.9002,
      "step": 168425
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5801477432250977,
      "learning_rate": 0.00010089615238050725,
      "loss": 2.8891,
      "step": 168426
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.320202589035034,
      "learning_rate": 0.00010089309258869977,
      "loss": 3.0801,
      "step": 168427
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.679996967315674,
      "learning_rate": 0.00010089003283391,
      "loss": 3.0495,
      "step": 168428
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.722106456756592,
      "learning_rate": 0.00010088697311613851,
      "loss": 2.9201,
      "step": 168429
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.440272808074951,
      "learning_rate": 0.00010088391343538582,
      "loss": 3.1017,
      "step": 168430
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.295445442199707,
      "learning_rate": 0.00010088085379165265,
      "loss": 2.7203,
      "step": 168431
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3450028896331787,
      "learning_rate": 0.00010087779418493943,
      "loss": 3.1732,
      "step": 168432
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2910401821136475,
      "learning_rate": 0.00010087473461524683,
      "loss": 2.9492,
      "step": 168433
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6002798080444336,
      "learning_rate": 0.00010087167508257542,
      "loss": 2.8341,
      "step": 168434
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8704628944396973,
      "learning_rate": 0.0001008686155869257,
      "loss": 2.9202,
      "step": 168435
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.983900785446167,
      "learning_rate": 0.00010086555612829819,
      "loss": 2.6571,
      "step": 168436
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.278045892715454,
      "learning_rate": 0.0001008624967066936,
      "loss": 3.3347,
      "step": 168437
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.77097749710083,
      "learning_rate": 0.00010085943732211233,
      "loss": 2.8,
      "step": 168438
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.215085029602051,
      "learning_rate": 0.00010085637797455516,
      "loss": 3.0181,
      "step": 168439
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5768656730651855,
      "learning_rate": 0.00010085331866402257,
      "loss": 2.9879,
      "step": 168440
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9123024940490723,
      "learning_rate": 0.00010085025939051507,
      "loss": 2.9379,
      "step": 168441
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.335033655166626,
      "learning_rate": 0.0001008472001540332,
      "loss": 2.9452,
      "step": 168442
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.008310317993164,
      "learning_rate": 0.00010084414095457768,
      "loss": 2.846,
      "step": 168443
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6242733001708984,
      "learning_rate": 0.00010084108179214887,
      "loss": 2.7874,
      "step": 168444
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6845197677612305,
      "learning_rate": 0.00010083802266674761,
      "loss": 2.9802,
      "step": 168445
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.209398031234741,
      "learning_rate": 0.0001008349635783743,
      "loss": 2.9119,
      "step": 168446
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.087202072143555,
      "learning_rate": 0.0001008319045270295,
      "loss": 2.683,
      "step": 168447
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7478835582733154,
      "learning_rate": 0.00010082884551271373,
      "loss": 2.9316,
      "step": 168448
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.459340810775757,
      "learning_rate": 0.00010082578653542777,
      "loss": 2.9959,
      "step": 168449
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.841874837875366,
      "learning_rate": 0.0001008227275951719,
      "loss": 2.9426,
      "step": 168450
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7769837379455566,
      "learning_rate": 0.00010081966869194699,
      "loss": 3.1497,
      "step": 168451
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.486687660217285,
      "learning_rate": 0.00010081660982575344,
      "loss": 3.1192,
      "step": 168452
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.2039237022399902,
      "learning_rate": 0.00010081355099659186,
      "loss": 3.0491,
      "step": 168453
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.500016689300537,
      "learning_rate": 0.00010081049220446267,
      "loss": 3.1234,
      "step": 168454
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3990910053253174,
      "learning_rate": 0.00010080743344936669,
      "loss": 3.1199,
      "step": 168455
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3331222534179688,
      "learning_rate": 0.00010080437473130426,
      "loss": 2.8144,
      "step": 168456
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3611135482788086,
      "learning_rate": 0.00010080131605027617,
      "loss": 2.935,
      "step": 168457
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5380733013153076,
      "learning_rate": 0.00010079825740628289,
      "loss": 2.9378,
      "step": 168458
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7189652919769287,
      "learning_rate": 0.00010079519879932496,
      "loss": 3.1516,
      "step": 168459
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.316200017929077,
      "learning_rate": 0.00010079214022940282,
      "loss": 3.0384,
      "step": 168460
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0536911487579346,
      "learning_rate": 0.00010078908169651732,
      "loss": 2.9691,
      "step": 168461
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8214921951293945,
      "learning_rate": 0.00010078602320066881,
      "loss": 3.1403,
      "step": 168462
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2590115070343018,
      "learning_rate": 0.000100782964741858,
      "loss": 2.9502,
      "step": 168463
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5608315467834473,
      "learning_rate": 0.00010077990632008542,
      "loss": 2.83,
      "step": 168464
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7524044513702393,
      "learning_rate": 0.00010077684793535158,
      "loss": 2.9838,
      "step": 168465
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.750305652618408,
      "learning_rate": 0.00010077378958765702,
      "loss": 3.0936,
      "step": 168466
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.005281686782837,
      "learning_rate": 0.00010077073127700247,
      "loss": 2.8675,
      "step": 168467
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.697160243988037,
      "learning_rate": 0.00010076767300338832,
      "loss": 2.8258,
      "step": 168468
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5638890266418457,
      "learning_rate": 0.0001007646147668153,
      "loss": 2.8604,
      "step": 168469
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4485228061676025,
      "learning_rate": 0.00010076155656728388,
      "loss": 2.8668,
      "step": 168470
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.2078239917755127,
      "learning_rate": 0.00010075849840479468,
      "loss": 2.9198,
      "step": 168471
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.410959005355835,
      "learning_rate": 0.00010075544027934813,
      "loss": 3.0471,
      "step": 168472
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.149775981903076,
      "learning_rate": 0.00010075238219094499,
      "loss": 3.0228,
      "step": 168473
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.3263792991638184,
      "learning_rate": 0.00010074932413958565,
      "loss": 2.9849,
      "step": 168474
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4290690422058105,
      "learning_rate": 0.00010074626612527088,
      "loss": 2.9042,
      "step": 168475
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.357870101928711,
      "learning_rate": 0.00010074320814800111,
      "loss": 3.1295,
      "step": 168476
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.449146032333374,
      "learning_rate": 0.00010074015020777687,
      "loss": 3.0605,
      "step": 168477
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8665435314178467,
      "learning_rate": 0.0001007370923045989,
      "loss": 2.8303,
      "step": 168478
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.600886344909668,
      "learning_rate": 0.00010073403443846766,
      "loss": 2.9849,
      "step": 168479
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8466460704803467,
      "learning_rate": 0.00010073097660938358,
      "loss": 3.1043,
      "step": 168480
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.895974636077881,
      "learning_rate": 0.00010072791881734754,
      "loss": 3.0207,
      "step": 168481
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.00199294090271,
      "learning_rate": 0.0001007248610623598,
      "loss": 3.0965,
      "step": 168482
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5815396308898926,
      "learning_rate": 0.00010072180334442118,
      "loss": 2.9316,
      "step": 168483
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.5658833980560303,
      "learning_rate": 0.00010071874566353216,
      "loss": 2.7517,
      "step": 168484
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7291879653930664,
      "learning_rate": 0.00010071568801969318,
      "loss": 3.1203,
      "step": 168485
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.263646364212036,
      "learning_rate": 0.00010071263041290498,
      "loss": 2.8538,
      "step": 168486
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.427119255065918,
      "learning_rate": 0.00010070957284316808,
      "loss": 3.2441,
      "step": 168487
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.608537197113037,
      "learning_rate": 0.00010070651531048295,
      "loss": 2.9306,
      "step": 168488
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.569720983505249,
      "learning_rate": 0.00010070345781485034,
      "loss": 2.7093,
      "step": 168489
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.465404987335205,
      "learning_rate": 0.0001007004003562707,
      "loss": 2.9848,
      "step": 168490
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.092834711074829,
      "learning_rate": 0.00010069734293474454,
      "loss": 3.198,
      "step": 168491
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6551923751831055,
      "learning_rate": 0.00010069428555027258,
      "loss": 2.7866,
      "step": 168492
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.318686008453369,
      "learning_rate": 0.00010069122820285531,
      "loss": 2.8123,
      "step": 168493
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5938403606414795,
      "learning_rate": 0.00010068817089249324,
      "loss": 2.8293,
      "step": 168494
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.681123733520508,
      "learning_rate": 0.00010068511361918709,
      "loss": 2.946,
      "step": 168495
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.282245635986328,
      "learning_rate": 0.00010068205638293732,
      "loss": 2.9906,
      "step": 168496
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6796224117279053,
      "learning_rate": 0.00010067899918374442,
      "loss": 2.9586,
      "step": 168497
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5351970195770264,
      "learning_rate": 0.00010067594202160918,
      "loss": 3.1461,
      "step": 168498
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.500690221786499,
      "learning_rate": 0.00010067288489653192,
      "loss": 3.0818,
      "step": 168499
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.552830219268799,
      "learning_rate": 0.00010066982780851346,
      "loss": 2.9573,
      "step": 168500
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2886815071105957,
      "learning_rate": 0.00010066677075755422,
      "loss": 3.0357,
      "step": 168501
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6851823329925537,
      "learning_rate": 0.00010066371374365479,
      "loss": 2.8706,
      "step": 168502
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4886674880981445,
      "learning_rate": 0.00010066065676681566,
      "loss": 3.1925,
      "step": 168503
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.473349094390869,
      "learning_rate": 0.00010065759982703755,
      "loss": 2.8124,
      "step": 168504
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5264413356781006,
      "learning_rate": 0.00010065454292432086,
      "loss": 3.3068,
      "step": 168505
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.064610242843628,
      "learning_rate": 0.00010065148605866639,
      "loss": 2.9573,
      "step": 168506
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6952338218688965,
      "learning_rate": 0.00010064842923007453,
      "loss": 2.9814,
      "step": 168507
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4802889823913574,
      "learning_rate": 0.0001006453724385459,
      "loss": 2.9824,
      "step": 168508
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4967410564422607,
      "learning_rate": 0.00010064231568408096,
      "loss": 2.8533,
      "step": 168509
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.260646104812622,
      "learning_rate": 0.00010063925896668045,
      "loss": 3.2065,
      "step": 168510
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.4005634784698486,
      "learning_rate": 0.00010063620228634481,
      "loss": 2.7779,
      "step": 168511
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.749931573867798,
      "learning_rate": 0.00010063314564307476,
      "loss": 3.0398,
      "step": 168512
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7267489433288574,
      "learning_rate": 0.00010063008903687075,
      "loss": 2.8189,
      "step": 168513
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6501638889312744,
      "learning_rate": 0.00010062703246773339,
      "loss": 3.0533,
      "step": 168514
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5541417598724365,
      "learning_rate": 0.00010062397593566311,
      "loss": 3.1166,
      "step": 168515
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.7693655490875244,
      "learning_rate": 0.00010062091944066067,
      "loss": 2.7169,
      "step": 168516
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.371948003768921,
      "learning_rate": 0.00010061786298272653,
      "loss": 2.8574,
      "step": 168517
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.8080086708068848,
      "learning_rate": 0.00010061480656186132,
      "loss": 2.675,
      "step": 168518
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.688650131225586,
      "learning_rate": 0.00010061175017806564,
      "loss": 2.7186,
      "step": 168519
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.518427610397339,
      "learning_rate": 0.00010060869383133997,
      "loss": 3.1868,
      "step": 168520
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5950398445129395,
      "learning_rate": 0.0001006056375216848,
      "loss": 3.1783,
      "step": 168521
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.267507553100586,
      "learning_rate": 0.00010060258124910097,
      "loss": 2.8204,
      "step": 168522
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.591649293899536,
      "learning_rate": 0.00010059952501358873,
      "loss": 2.9811,
      "step": 168523
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.223534107208252,
      "learning_rate": 0.00010059646881514892,
      "loss": 3.2453,
      "step": 168524
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4534685611724854,
      "learning_rate": 0.00010059341265378197,
      "loss": 2.995,
      "step": 168525
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.565853118896484,
      "learning_rate": 0.00010059035652948847,
      "loss": 2.6885,
      "step": 168526
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.62231707572937,
      "learning_rate": 0.00010058730044226889,
      "loss": 3.155,
      "step": 168527
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.341243028640747,
      "learning_rate": 0.00010058424439212401,
      "loss": 2.9978,
      "step": 168528
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.642549991607666,
      "learning_rate": 0.00010058118837905417,
      "loss": 3.0501,
      "step": 168529
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4646224975585938,
      "learning_rate": 0.00010057813240306014,
      "loss": 2.7442,
      "step": 168530
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2565932273864746,
      "learning_rate": 0.00010057507646414242,
      "loss": 3.2638,
      "step": 168531
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.758908748626709,
      "learning_rate": 0.00010057202056230155,
      "loss": 3.0299,
      "step": 168532
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.409432888031006,
      "learning_rate": 0.00010056896469753802,
      "loss": 3.0997,
      "step": 168533
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7044100761413574,
      "learning_rate": 0.00010056590886985256,
      "loss": 2.968,
      "step": 168534
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.965974807739258,
      "learning_rate": 0.00010056285307924557,
      "loss": 2.9138,
      "step": 168535
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6234288215637207,
      "learning_rate": 0.00010055979732571782,
      "loss": 2.7137,
      "step": 168536
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.048750638961792,
      "learning_rate": 0.00010055674160926978,
      "loss": 2.8078,
      "step": 168537
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2686805725097656,
      "learning_rate": 0.00010055368592990197,
      "loss": 3.0174,
      "step": 168538
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.0826876163482666,
      "learning_rate": 0.00010055063028761491,
      "loss": 2.9449,
      "step": 168539
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.284212589263916,
      "learning_rate": 0.00010054757468240935,
      "loss": 3.0092,
      "step": 168540
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.863722324371338,
      "learning_rate": 0.00010054451911428569,
      "loss": 2.8307,
      "step": 168541
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4703187942504883,
      "learning_rate": 0.00010054146358324464,
      "loss": 2.9284,
      "step": 168542
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4729959964752197,
      "learning_rate": 0.00010053840808928669,
      "loss": 3.0836,
      "step": 168543
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.7312633991241455,
      "learning_rate": 0.00010053535263241245,
      "loss": 3.2099,
      "step": 168544
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.502192974090576,
      "learning_rate": 0.00010053229721262233,
      "loss": 2.9513,
      "step": 168545
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.18578839302063,
      "learning_rate": 0.00010052924182991713,
      "loss": 2.9828,
      "step": 168546
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.424807548522949,
      "learning_rate": 0.0001005261864842972,
      "loss": 3.0385,
      "step": 168547
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.282311201095581,
      "learning_rate": 0.00010052313117576331,
      "loss": 3.0715,
      "step": 168548
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.532315254211426,
      "learning_rate": 0.00010052007590431584,
      "loss": 3.14,
      "step": 168549
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0781822204589844,
      "learning_rate": 0.00010051702066995567,
      "loss": 2.9542,
      "step": 168550
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.544517993927002,
      "learning_rate": 0.00010051396547268294,
      "loss": 2.9626,
      "step": 168551
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.764277935028076,
      "learning_rate": 0.00010051091031249853,
      "loss": 3.0226,
      "step": 168552
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5325660705566406,
      "learning_rate": 0.00010050785518940279,
      "loss": 2.7835,
      "step": 168553
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5019431114196777,
      "learning_rate": 0.00010050480010339651,
      "loss": 2.8394,
      "step": 168554
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.963738203048706,
      "learning_rate": 0.00010050174505448007,
      "loss": 2.8498,
      "step": 168555
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.354360342025757,
      "learning_rate": 0.00010049869004265432,
      "loss": 2.8133,
      "step": 168556
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.863239049911499,
      "learning_rate": 0.00010049563506791943,
      "loss": 2.7181,
      "step": 168557
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5844900608062744,
      "learning_rate": 0.00010049258013027624,
      "loss": 3.0038,
      "step": 168558
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.6087281703948975,
      "learning_rate": 0.00010048952522972518,
      "loss": 3.0037,
      "step": 168559
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4794232845306396,
      "learning_rate": 0.00010048647036626699,
      "loss": 2.8377,
      "step": 168560
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.766171932220459,
      "learning_rate": 0.00010048341553990199,
      "loss": 2.8157,
      "step": 168561
    },
    {
      "epoch": 2.19,
      "grad_norm": 4.331784725189209,
      "learning_rate": 0.00010048036075063101,
      "loss": 2.8982,
      "step": 168562
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.1263678073883057,
      "learning_rate": 0.0001004773059984545,
      "loss": 2.928,
      "step": 168563
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.247276544570923,
      "learning_rate": 0.00010047425128337306,
      "loss": 3.0793,
      "step": 168564
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.2737162113189697,
      "learning_rate": 0.00010047119660538706,
      "loss": 3.0148,
      "step": 168565
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5333311557769775,
      "learning_rate": 0.00010046814196449736,
      "loss": 3.1125,
      "step": 168566
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.1155872344970703,
      "learning_rate": 0.00010046508736070432,
      "loss": 2.8007,
      "step": 168567
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4191324710845947,
      "learning_rate": 0.00010046203279400864,
      "loss": 2.9923,
      "step": 168568
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.206859588623047,
      "learning_rate": 0.00010045897826441088,
      "loss": 3.1373,
      "step": 168569
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.533316135406494,
      "learning_rate": 0.00010045592377191152,
      "loss": 2.9956,
      "step": 168570
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.2088780403137207,
      "learning_rate": 0.0001004528693165111,
      "loss": 3.0333,
      "step": 168571
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.516528844833374,
      "learning_rate": 0.00010044981489821035,
      "loss": 2.8946,
      "step": 168572
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.9035000801086426,
      "learning_rate": 0.00010044676051700966,
      "loss": 2.9281,
      "step": 168573
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.5492372512817383,
      "learning_rate": 0.00010044370617290975,
      "loss": 2.9471,
      "step": 168574
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.142186403274536,
      "learning_rate": 0.00010044065186591114,
      "loss": 2.7999,
      "step": 168575
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.548055648803711,
      "learning_rate": 0.00010043759759601431,
      "loss": 2.8547,
      "step": 168576
    },
    {
      "epoch": 2.19,
      "grad_norm": 3.0952327251434326,
      "learning_rate": 0.00010043454336321997,
      "loss": 3.0949,
      "step": 168577
    },
    {
      "epoch": 2.19,
      "grad_norm": 2.4458940029144287,
      "learning_rate": 0.00010043148916752861,
      "loss": 2.8069,
      "step": 168578
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.622088670730591,
      "learning_rate": 0.00010042843500894072,
      "loss": 2.9247,
      "step": 168579
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.734971046447754,
      "learning_rate": 0.00010042538088745703,
      "loss": 2.8864,
      "step": 168580
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4013659954071045,
      "learning_rate": 0.00010042232680307803,
      "loss": 3.0938,
      "step": 168581
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.1683542728424072,
      "learning_rate": 0.00010041927275580419,
      "loss": 2.9814,
      "step": 168582
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.892261505126953,
      "learning_rate": 0.00010041621874563626,
      "loss": 2.9291,
      "step": 168583
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.344733476638794,
      "learning_rate": 0.00010041316477257466,
      "loss": 3.0153,
      "step": 168584
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.494170904159546,
      "learning_rate": 0.00010041011083662011,
      "loss": 2.7012,
      "step": 168585
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5035324096679688,
      "learning_rate": 0.00010040705693777306,
      "loss": 3.2145,
      "step": 168586
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.0829992294311523,
      "learning_rate": 0.00010040400307603415,
      "loss": 2.9127,
      "step": 168587
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.019223213195801,
      "learning_rate": 0.00010040094925140377,
      "loss": 2.8107,
      "step": 168588
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6199159622192383,
      "learning_rate": 0.00010039789546388275,
      "loss": 2.8053,
      "step": 168589
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5797526836395264,
      "learning_rate": 0.00010039484171347141,
      "loss": 3.0022,
      "step": 168590
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.501678705215454,
      "learning_rate": 0.00010039178800017051,
      "loss": 3.1834,
      "step": 168591
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8373360633850098,
      "learning_rate": 0.00010038873432398057,
      "loss": 2.9421,
      "step": 168592
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.735177755355835,
      "learning_rate": 0.00010038568068490214,
      "loss": 2.9852,
      "step": 168593
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7054622173309326,
      "learning_rate": 0.00010038262708293568,
      "loss": 2.9404,
      "step": 168594
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.594655990600586,
      "learning_rate": 0.00010037957351808195,
      "loss": 2.8808,
      "step": 168595
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8754589557647705,
      "learning_rate": 0.00010037651999034132,
      "loss": 2.774,
      "step": 168596
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.182081699371338,
      "learning_rate": 0.00010037346649971457,
      "loss": 3.1453,
      "step": 168597
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.537757396697998,
      "learning_rate": 0.00010037041304620217,
      "loss": 2.9455,
      "step": 168598
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.52488112449646,
      "learning_rate": 0.00010036735962980468,
      "loss": 3.0773,
      "step": 168599
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.0143351554870605,
      "learning_rate": 0.00010036430625052253,
      "loss": 2.7892,
      "step": 168600
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.186984062194824,
      "learning_rate": 0.00010036125290835655,
      "loss": 2.9926,
      "step": 168601
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4974868297576904,
      "learning_rate": 0.0001003581996033071,
      "loss": 2.8389,
      "step": 168602
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2924346923828125,
      "learning_rate": 0.00010035514633537491,
      "loss": 2.8217,
      "step": 168603
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.515472173690796,
      "learning_rate": 0.00010035209310456047,
      "loss": 2.9678,
      "step": 168604
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7507526874542236,
      "learning_rate": 0.00010034903991086434,
      "loss": 2.6237,
      "step": 168605
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3402388095855713,
      "learning_rate": 0.00010034598675428698,
      "loss": 2.8029,
      "step": 168606
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.191297769546509,
      "learning_rate": 0.00010034293363482919,
      "loss": 2.8429,
      "step": 168607
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4584319591522217,
      "learning_rate": 0.00010033988055249132,
      "loss": 2.8403,
      "step": 168608
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7121646404266357,
      "learning_rate": 0.00010033682750727411,
      "loss": 3.085,
      "step": 168609
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.143314838409424,
      "learning_rate": 0.00010033377449917808,
      "loss": 2.859,
      "step": 168610
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8023838996887207,
      "learning_rate": 0.00010033072152820374,
      "loss": 2.907,
      "step": 168611
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.421297073364258,
      "learning_rate": 0.00010032766859435163,
      "loss": 2.8865,
      "step": 168612
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.763638734817505,
      "learning_rate": 0.00010032461569762244,
      "loss": 2.788,
      "step": 168613
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.508796215057373,
      "learning_rate": 0.0001003215628380166,
      "loss": 2.9431,
      "step": 168614
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.737077474594116,
      "learning_rate": 0.00010031851001553482,
      "loss": 2.7312,
      "step": 168615
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.342383623123169,
      "learning_rate": 0.00010031545723017752,
      "loss": 2.7968,
      "step": 168616
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5782980918884277,
      "learning_rate": 0.00010031240448194555,
      "loss": 3.1119,
      "step": 168617
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2880735397338867,
      "learning_rate": 0.00010030935177083905,
      "loss": 2.8835,
      "step": 168618
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2145845890045166,
      "learning_rate": 0.00010030629909685892,
      "loss": 2.9908,
      "step": 168619
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.938930034637451,
      "learning_rate": 0.00010030324646000553,
      "loss": 2.9403,
      "step": 168620
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.6071977615356445,
      "learning_rate": 0.00010030019386027963,
      "loss": 3.0467,
      "step": 168621
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5621347427368164,
      "learning_rate": 0.00010029714129768161,
      "loss": 2.8135,
      "step": 168622
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.307471513748169,
      "learning_rate": 0.00010029408877221228,
      "loss": 3.0939,
      "step": 168623
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.0932672023773193,
      "learning_rate": 0.00010029103628387188,
      "loss": 2.6997,
      "step": 168624
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7075321674346924,
      "learning_rate": 0.00010028798383266122,
      "loss": 3.0438,
      "step": 168625
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.246325969696045,
      "learning_rate": 0.00010028493141858072,
      "loss": 3.217,
      "step": 168626
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.379673719406128,
      "learning_rate": 0.00010028187904163114,
      "loss": 2.7968,
      "step": 168627
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7946486473083496,
      "learning_rate": 0.00010027882670181279,
      "loss": 2.9904,
      "step": 168628
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.499971866607666,
      "learning_rate": 0.0001002757743991266,
      "loss": 2.7715,
      "step": 168629
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.594311475753784,
      "learning_rate": 0.0001002727221335727,
      "loss": 2.7698,
      "step": 168630
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9150052070617676,
      "learning_rate": 0.00010026966990515195,
      "loss": 2.9428,
      "step": 168631
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.543583393096924,
      "learning_rate": 0.00010026661771386478,
      "loss": 2.9309,
      "step": 168632
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3606457710266113,
      "learning_rate": 0.0001002635655597119,
      "loss": 2.7558,
      "step": 168633
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9708850383758545,
      "learning_rate": 0.00010026051344269367,
      "loss": 2.8418,
      "step": 168634
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.504865884780884,
      "learning_rate": 0.000100257461362811,
      "loss": 2.857,
      "step": 168635
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.385542154312134,
      "learning_rate": 0.00010025440932006404,
      "loss": 2.8797,
      "step": 168636
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8549699783325195,
      "learning_rate": 0.00010025135731445367,
      "loss": 2.7517,
      "step": 168637
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6602776050567627,
      "learning_rate": 0.00010024830534598022,
      "loss": 2.8221,
      "step": 168638
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.845845937728882,
      "learning_rate": 0.00010024525341464451,
      "loss": 2.9149,
      "step": 168639
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9780349731445312,
      "learning_rate": 0.00010024220152044685,
      "loss": 3.0737,
      "step": 168640
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.563915252685547,
      "learning_rate": 0.00010023914966338816,
      "loss": 2.9226,
      "step": 168641
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8422322273254395,
      "learning_rate": 0.00010023609784346854,
      "loss": 2.8426,
      "step": 168642
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.216404438018799,
      "learning_rate": 0.00010023304606068893,
      "loss": 2.8917,
      "step": 168643
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.325225591659546,
      "learning_rate": 0.00010022999431504967,
      "loss": 3.0895,
      "step": 168644
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9570515155792236,
      "learning_rate": 0.00010022694260655153,
      "loss": 2.9667,
      "step": 168645
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3329644203186035,
      "learning_rate": 0.00010022389093519485,
      "loss": 3.2383,
      "step": 168646
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.507446050643921,
      "learning_rate": 0.00010022083930098043,
      "loss": 3.0747,
      "step": 168647
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.320016860961914,
      "learning_rate": 0.00010021778770390873,
      "loss": 3.0297,
      "step": 168648
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4050538539886475,
      "learning_rate": 0.00010021473614398031,
      "loss": 3.2037,
      "step": 168649
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9021761417388916,
      "learning_rate": 0.00010021168462119565,
      "loss": 2.7662,
      "step": 168650
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1514275074005127,
      "learning_rate": 0.00010020863313555549,
      "loss": 2.8093,
      "step": 168651
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.379535675048828,
      "learning_rate": 0.00010020558168706023,
      "loss": 3.0553,
      "step": 168652
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.679211378097534,
      "learning_rate": 0.00010020253027571063,
      "loss": 2.9275,
      "step": 168653
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.84499192237854,
      "learning_rate": 0.00010019947890150716,
      "loss": 2.9661,
      "step": 168654
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4337828159332275,
      "learning_rate": 0.00010019642756445037,
      "loss": 3.0052,
      "step": 168655
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.866971969604492,
      "learning_rate": 0.00010019337626454073,
      "loss": 3.0603,
      "step": 168656
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.450575351715088,
      "learning_rate": 0.000100190325001779,
      "loss": 2.8494,
      "step": 168657
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3601720333099365,
      "learning_rate": 0.00010018727377616556,
      "loss": 2.869,
      "step": 168658
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8301382064819336,
      "learning_rate": 0.00010018422258770118,
      "loss": 2.8632,
      "step": 168659
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2117795944213867,
      "learning_rate": 0.00010018117143638635,
      "loss": 3.0847,
      "step": 168660
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.938462018966675,
      "learning_rate": 0.0001001781203222215,
      "loss": 3.0279,
      "step": 168661
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.809002637863159,
      "learning_rate": 0.00010017506924520742,
      "loss": 2.8674,
      "step": 168662
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.194399118423462,
      "learning_rate": 0.00010017201820534453,
      "loss": 3.0643,
      "step": 168663
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.103485107421875,
      "learning_rate": 0.00010016896720263335,
      "loss": 3.0281,
      "step": 168664
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.569730043411255,
      "learning_rate": 0.00010016591623707466,
      "loss": 2.9017,
      "step": 168665
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2828266620635986,
      "learning_rate": 0.00010016286530866888,
      "loss": 3.0325,
      "step": 168666
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4605824947357178,
      "learning_rate": 0.00010015981441741648,
      "loss": 2.7939,
      "step": 168667
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8274757862091064,
      "learning_rate": 0.00010015676356331827,
      "loss": 3.0468,
      "step": 168668
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6950910091400146,
      "learning_rate": 0.00010015371274637456,
      "loss": 2.9713,
      "step": 168669
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6710426807403564,
      "learning_rate": 0.00010015066196658617,
      "loss": 2.9611,
      "step": 168670
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8292152881622314,
      "learning_rate": 0.00010014761122395352,
      "loss": 2.9345,
      "step": 168671
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.42244291305542,
      "learning_rate": 0.00010014456051847721,
      "loss": 2.9544,
      "step": 168672
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2153429985046387,
      "learning_rate": 0.00010014150985015771,
      "loss": 2.9338,
      "step": 168673
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.068875312805176,
      "learning_rate": 0.00010013845921899577,
      "loss": 2.803,
      "step": 168674
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.95052433013916,
      "learning_rate": 0.00010013540862499177,
      "loss": 2.9211,
      "step": 168675
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.289567470550537,
      "learning_rate": 0.00010013235806814647,
      "loss": 2.8025,
      "step": 168676
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.5056228637695312,
      "learning_rate": 0.00010012930754846035,
      "loss": 2.8797,
      "step": 168677
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.227536201477051,
      "learning_rate": 0.00010012625706593394,
      "loss": 2.9214,
      "step": 168678
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.002877712249756,
      "learning_rate": 0.00010012320662056773,
      "loss": 3.0358,
      "step": 168679
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4136788845062256,
      "learning_rate": 0.0001001201562123625,
      "loss": 3.2062,
      "step": 168680
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3216593265533447,
      "learning_rate": 0.00010011710584131861,
      "loss": 2.7672,
      "step": 168681
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.625868558883667,
      "learning_rate": 0.00010011405550743683,
      "loss": 2.8988,
      "step": 168682
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.523587942123413,
      "learning_rate": 0.00010011100521071748,
      "loss": 2.8615,
      "step": 168683
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.724240303039551,
      "learning_rate": 0.00010010795495116152,
      "loss": 2.7239,
      "step": 168684
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.412139892578125,
      "learning_rate": 0.00010010490472876902,
      "loss": 2.8384,
      "step": 168685
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4583988189697266,
      "learning_rate": 0.00010010185454354093,
      "loss": 3.3551,
      "step": 168686
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6670289039611816,
      "learning_rate": 0.00010009880439547754,
      "loss": 2.995,
      "step": 168687
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6688272953033447,
      "learning_rate": 0.00010009575428457968,
      "loss": 3.068,
      "step": 168688
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5215775966644287,
      "learning_rate": 0.0001000927042108477,
      "loss": 3.0197,
      "step": 168689
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.84684681892395,
      "learning_rate": 0.00010008965417428244,
      "loss": 2.9192,
      "step": 168690
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9864280223846436,
      "learning_rate": 0.00010008660417488409,
      "loss": 3.0804,
      "step": 168691
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.659693717956543,
      "learning_rate": 0.00010008355421265352,
      "loss": 2.7978,
      "step": 168692
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.56215238571167,
      "learning_rate": 0.00010008050428759111,
      "loss": 3.0369,
      "step": 168693
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.392772912979126,
      "learning_rate": 0.00010007745439969758,
      "loss": 3.0903,
      "step": 168694
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.540452480316162,
      "learning_rate": 0.00010007440454897335,
      "loss": 2.7587,
      "step": 168695
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.6037702560424805,
      "learning_rate": 0.0001000713547354192,
      "loss": 3.0494,
      "step": 168696
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.169219732284546,
      "learning_rate": 0.00010006830495903543,
      "loss": 2.8759,
      "step": 168697
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.003979444503784,
      "learning_rate": 0.0001000652552198228,
      "loss": 3.136,
      "step": 168698
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.331946611404419,
      "learning_rate": 0.00010006220551778174,
      "loss": 3.0622,
      "step": 168699
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1630475521087646,
      "learning_rate": 0.00010005915585291296,
      "loss": 3.0678,
      "step": 168700
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.920346975326538,
      "learning_rate": 0.00010005610622521686,
      "loss": 3.0872,
      "step": 168701
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.545287847518921,
      "learning_rate": 0.00010005305663469432,
      "loss": 2.8054,
      "step": 168702
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8017354011535645,
      "learning_rate": 0.00010005000708134551,
      "loss": 2.9088,
      "step": 168703
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4499611854553223,
      "learning_rate": 0.00010004695756517122,
      "loss": 3.1062,
      "step": 168704
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.32392954826355,
      "learning_rate": 0.00010004390808617191,
      "loss": 2.6917,
      "step": 168705
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.214876174926758,
      "learning_rate": 0.00010004085864434832,
      "loss": 3.1288,
      "step": 168706
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.387881278991699,
      "learning_rate": 0.0001000378092397008,
      "loss": 2.9455,
      "step": 168707
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.642000198364258,
      "learning_rate": 0.00010003475987223023,
      "loss": 2.9119,
      "step": 168708
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2221107482910156,
      "learning_rate": 0.00010003171054193676,
      "loss": 2.8905,
      "step": 168709
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5018856525421143,
      "learning_rate": 0.00010002866124882128,
      "loss": 3.1142,
      "step": 168710
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.704023838043213,
      "learning_rate": 0.00010002561199288414,
      "loss": 2.8699,
      "step": 168711
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.507129669189453,
      "learning_rate": 0.00010002256277412611,
      "loss": 3.1527,
      "step": 168712
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4300079345703125,
      "learning_rate": 0.00010001951359254758,
      "loss": 2.9775,
      "step": 168713
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3867087364196777,
      "learning_rate": 0.00010001646444814939,
      "loss": 3.0726,
      "step": 168714
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7290940284729004,
      "learning_rate": 0.0001000134153409317,
      "loss": 2.8156,
      "step": 168715
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5879628658294678,
      "learning_rate": 0.00010001036627089541,
      "loss": 2.8016,
      "step": 168716
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.331012487411499,
      "learning_rate": 0.00010000731723804086,
      "loss": 3.1952,
      "step": 168717
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.607346296310425,
      "learning_rate": 0.00010000426824236881,
      "loss": 3.0352,
      "step": 168718
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2512316703796387,
      "learning_rate": 0.00010000121928387966,
      "loss": 2.9563,
      "step": 168719
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9887938499450684,
      "learning_rate": 9.999817036257427e-05,
      "loss": 2.8603,
      "step": 168720
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.23075532913208,
      "learning_rate": 9.999512147845277e-05,
      "loss": 3.0537,
      "step": 168721
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.08630108833313,
      "learning_rate": 9.999207263151609e-05,
      "loss": 3.2241,
      "step": 168722
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6488380432128906,
      "learning_rate": 9.998902382176453e-05,
      "loss": 2.9095,
      "step": 168723
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.460632801055908,
      "learning_rate": 9.998597504919885e-05,
      "loss": 2.8347,
      "step": 168724
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2671313285827637,
      "learning_rate": 9.99829263138195e-05,
      "loss": 3.0102,
      "step": 168725
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4788875579833984,
      "learning_rate": 9.99798776156273e-05,
      "loss": 2.9531,
      "step": 168726
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.471590518951416,
      "learning_rate": 9.997682895462239e-05,
      "loss": 2.995,
      "step": 168727
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.198519229888916,
      "learning_rate": 9.99737803308057e-05,
      "loss": 3.0215,
      "step": 168728
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.284393548965454,
      "learning_rate": 9.997073174417752e-05,
      "loss": 2.857,
      "step": 168729
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9480109214782715,
      "learning_rate": 9.99676831947387e-05,
      "loss": 2.8752,
      "step": 168730
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.4154839515686035,
      "learning_rate": 9.996463468248953e-05,
      "loss": 3.0348,
      "step": 168731
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.507783889770508,
      "learning_rate": 9.99615862074309e-05,
      "loss": 3.0337,
      "step": 168732
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.322357654571533,
      "learning_rate": 9.995853776956305e-05,
      "loss": 2.9751,
      "step": 168733
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8658225536346436,
      "learning_rate": 9.995548936888672e-05,
      "loss": 2.8572,
      "step": 168734
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.472123384475708,
      "learning_rate": 9.995244100540236e-05,
      "loss": 3.0452,
      "step": 168735
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5861966609954834,
      "learning_rate": 9.994939267911073e-05,
      "loss": 2.9117,
      "step": 168736
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7760426998138428,
      "learning_rate": 9.994634439001216e-05,
      "loss": 2.9515,
      "step": 168737
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9765758514404297,
      "learning_rate": 9.994329613810746e-05,
      "loss": 2.9656,
      "step": 168738
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.444366455078125,
      "learning_rate": 9.994024792339706e-05,
      "loss": 3.0763,
      "step": 168739
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7653839588165283,
      "learning_rate": 9.993719974588155e-05,
      "loss": 2.9012,
      "step": 168740
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.250532627105713,
      "learning_rate": 9.99341516055614e-05,
      "loss": 2.8335,
      "step": 168741
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5831353664398193,
      "learning_rate": 9.993110350243738e-05,
      "loss": 2.6047,
      "step": 168742
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3141162395477295,
      "learning_rate": 9.992805543650982e-05,
      "loss": 2.9063,
      "step": 168743
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.542909860610962,
      "learning_rate": 9.992500740777954e-05,
      "loss": 2.9472,
      "step": 168744
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.090471267700195,
      "learning_rate": 9.992195941624694e-05,
      "loss": 2.6821,
      "step": 168745
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.6450605392456055,
      "learning_rate": 9.991891146191254e-05,
      "loss": 2.806,
      "step": 168746
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.603275775909424,
      "learning_rate": 9.99158635447771e-05,
      "loss": 2.9907,
      "step": 168747
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2672853469848633,
      "learning_rate": 9.991281566484106e-05,
      "loss": 3.178,
      "step": 168748
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.2810585498809814,
      "learning_rate": 9.990976782210494e-05,
      "loss": 2.8848,
      "step": 168749
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.530646324157715,
      "learning_rate": 9.990672001656945e-05,
      "loss": 3.0081,
      "step": 168750
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.786221504211426,
      "learning_rate": 9.990367224823507e-05,
      "loss": 2.7903,
      "step": 168751
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.077800750732422,
      "learning_rate": 9.990062451710227e-05,
      "loss": 2.9835,
      "step": 168752
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5123283863067627,
      "learning_rate": 9.989757682317182e-05,
      "loss": 2.9435,
      "step": 168753
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.792374610900879,
      "learning_rate": 9.98945291664442e-05,
      "loss": 2.9556,
      "step": 168754
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.3871521949768066,
      "learning_rate": 9.989148154691986e-05,
      "loss": 2.9806,
      "step": 168755
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.9546971321105957,
      "learning_rate": 9.98884339645996e-05,
      "loss": 2.7705,
      "step": 168756
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6386404037475586,
      "learning_rate": 9.988538641948382e-05,
      "loss": 2.9961,
      "step": 168757
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.539191246032715,
      "learning_rate": 9.988233891157303e-05,
      "loss": 2.9891,
      "step": 168758
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1875624656677246,
      "learning_rate": 9.987929144086802e-05,
      "loss": 2.9331,
      "step": 168759
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.53800892829895,
      "learning_rate": 9.98762440073691e-05,
      "loss": 2.6842,
      "step": 168760
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6561710834503174,
      "learning_rate": 9.987319661107705e-05,
      "loss": 2.7286,
      "step": 168761
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7360734939575195,
      "learning_rate": 9.987014925199239e-05,
      "loss": 2.8216,
      "step": 168762
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.52939510345459,
      "learning_rate": 9.986710193011562e-05,
      "loss": 2.887,
      "step": 168763
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.9624459743499756,
      "learning_rate": 9.986405464544722e-05,
      "loss": 2.979,
      "step": 168764
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5933127403259277,
      "learning_rate": 9.986100739798798e-05,
      "loss": 2.8871,
      "step": 168765
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7885873317718506,
      "learning_rate": 9.985796018773828e-05,
      "loss": 2.8145,
      "step": 168766
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7552781105041504,
      "learning_rate": 9.985491301469886e-05,
      "loss": 2.9777,
      "step": 168767
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7329604625701904,
      "learning_rate": 9.985186587887006e-05,
      "loss": 2.7745,
      "step": 168768
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5032405853271484,
      "learning_rate": 9.984881878025281e-05,
      "loss": 2.9291,
      "step": 168769
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.158581018447876,
      "learning_rate": 9.984577171884721e-05,
      "loss": 3.0401,
      "step": 168770
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.208793878555298,
      "learning_rate": 9.984272469465419e-05,
      "loss": 2.9966,
      "step": 168771
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.0144128799438477,
      "learning_rate": 9.983967770767405e-05,
      "loss": 2.9504,
      "step": 168772
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5063841342926025,
      "learning_rate": 9.983663075790763e-05,
      "loss": 2.9086,
      "step": 168773
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.22650408744812,
      "learning_rate": 9.983358384535523e-05,
      "loss": 2.9075,
      "step": 168774
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3299803733825684,
      "learning_rate": 9.983053697001776e-05,
      "loss": 2.7905,
      "step": 168775
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.169521331787109,
      "learning_rate": 9.982749013189537e-05,
      "loss": 2.9001,
      "step": 168776
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.2592267990112305,
      "learning_rate": 9.982444333098893e-05,
      "loss": 2.9237,
      "step": 168777
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.689936399459839,
      "learning_rate": 9.98213965672988e-05,
      "loss": 2.9838,
      "step": 168778
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7106547355651855,
      "learning_rate": 9.981834984082575e-05,
      "loss": 3.0032,
      "step": 168779
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.451685667037964,
      "learning_rate": 9.981530315157016e-05,
      "loss": 3.1058,
      "step": 168780
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.456303596496582,
      "learning_rate": 9.981225649953289e-05,
      "loss": 2.7905,
      "step": 168781
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.526463270187378,
      "learning_rate": 9.980920988471405e-05,
      "loss": 2.9362,
      "step": 168782
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.0319504737854,
      "learning_rate": 9.980616330711463e-05,
      "loss": 2.9973,
      "step": 168783
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9957516193389893,
      "learning_rate": 9.980311676673486e-05,
      "loss": 2.9968,
      "step": 168784
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9289088249206543,
      "learning_rate": 9.980007026357561e-05,
      "loss": 2.8876,
      "step": 168785
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2752254009246826,
      "learning_rate": 9.97970237976372e-05,
      "loss": 2.8795,
      "step": 168786
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.519115686416626,
      "learning_rate": 9.979397736892048e-05,
      "loss": 2.8807,
      "step": 168787
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.942533254623413,
      "learning_rate": 9.979093097742565e-05,
      "loss": 3.1601,
      "step": 168788
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1132071018218994,
      "learning_rate": 9.978788462315356e-05,
      "loss": 3.2066,
      "step": 168789
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.638289451599121,
      "learning_rate": 9.978483830610459e-05,
      "loss": 2.8881,
      "step": 168790
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6032047271728516,
      "learning_rate": 9.97817920262795e-05,
      "loss": 2.9894,
      "step": 168791
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.105698585510254,
      "learning_rate": 9.977874578367866e-05,
      "loss": 3.1607,
      "step": 168792
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.418487310409546,
      "learning_rate": 9.977569957830292e-05,
      "loss": 2.7491,
      "step": 168793
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7100977897644043,
      "learning_rate": 9.977265341015246e-05,
      "loss": 2.7579,
      "step": 168794
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.4240317344665527,
      "learning_rate": 9.976960727922812e-05,
      "loss": 2.8821,
      "step": 168795
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.595749616622925,
      "learning_rate": 9.976656118553032e-05,
      "loss": 3.026,
      "step": 168796
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.382283926010132,
      "learning_rate": 9.97635151290598e-05,
      "loss": 2.966,
      "step": 168797
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.367770195007324,
      "learning_rate": 9.976046910981692e-05,
      "loss": 3.0431,
      "step": 168798
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.355950355529785,
      "learning_rate": 9.975742312780254e-05,
      "loss": 2.8752,
      "step": 168799
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.2176127433776855,
      "learning_rate": 9.975437718301682e-05,
      "loss": 2.8757,
      "step": 168800
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.631289005279541,
      "learning_rate": 9.975133127546066e-05,
      "loss": 2.6785,
      "step": 168801
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6978652477264404,
      "learning_rate": 9.974828540513442e-05,
      "loss": 3.146,
      "step": 168802
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.37660551071167,
      "learning_rate": 9.97452395720388e-05,
      "loss": 2.8528,
      "step": 168803
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.979997396469116,
      "learning_rate": 9.974219377617425e-05,
      "loss": 2.8879,
      "step": 168804
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.969998836517334,
      "learning_rate": 9.973914801754161e-05,
      "loss": 3.0873,
      "step": 168805
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.573761463165283,
      "learning_rate": 9.973610229614106e-05,
      "loss": 3.0023,
      "step": 168806
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.6536171436309814,
      "learning_rate": 9.973305661197343e-05,
      "loss": 2.755,
      "step": 168807
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.035226821899414,
      "learning_rate": 9.973001096503911e-05,
      "loss": 2.862,
      "step": 168808
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.397840976715088,
      "learning_rate": 9.972696535533887e-05,
      "loss": 3.005,
      "step": 168809
    },
    {
      "epoch": 2.2,
      "grad_norm": 5.652135848999023,
      "learning_rate": 9.972391978287306e-05,
      "loss": 2.7977,
      "step": 168810
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.81220817565918,
      "learning_rate": 9.972087424764254e-05,
      "loss": 3.2822,
      "step": 168811
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.836731433868408,
      "learning_rate": 9.971782874964749e-05,
      "loss": 2.9397,
      "step": 168812
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9768829345703125,
      "learning_rate": 9.971478328888882e-05,
      "loss": 3.1074,
      "step": 168813
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2575926780700684,
      "learning_rate": 9.971173786536681e-05,
      "loss": 3.0103,
      "step": 168814
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4933550357818604,
      "learning_rate": 9.970869247908227e-05,
      "loss": 2.8824,
      "step": 168815
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6760473251342773,
      "learning_rate": 9.970564713003556e-05,
      "loss": 2.9602,
      "step": 168816
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5836164951324463,
      "learning_rate": 9.970260181822747e-05,
      "loss": 3.0087,
      "step": 168817
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9929189682006836,
      "learning_rate": 9.969955654365846e-05,
      "loss": 3.1237,
      "step": 168818
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.320200204849243,
      "learning_rate": 9.969651130632905e-05,
      "loss": 2.9006,
      "step": 168819
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3860881328582764,
      "learning_rate": 9.969346610623976e-05,
      "loss": 2.8866,
      "step": 168820
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7984673976898193,
      "learning_rate": 9.969042094339135e-05,
      "loss": 2.976,
      "step": 168821
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.083673477172852,
      "learning_rate": 9.968737581778414e-05,
      "loss": 2.6279,
      "step": 168822
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.747825860977173,
      "learning_rate": 9.968433072941895e-05,
      "loss": 2.8103,
      "step": 168823
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.792959213256836,
      "learning_rate": 9.96812856782962e-05,
      "loss": 2.9363,
      "step": 168824
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.364877223968506,
      "learning_rate": 9.967824066441649e-05,
      "loss": 3.0414,
      "step": 168825
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6786670684814453,
      "learning_rate": 9.967519568778028e-05,
      "loss": 2.7647,
      "step": 168826
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8234786987304688,
      "learning_rate": 9.967215074838836e-05,
      "loss": 2.8907,
      "step": 168827
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5693047046661377,
      "learning_rate": 9.966910584624104e-05,
      "loss": 2.6886,
      "step": 168828
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.0227222442626953,
      "learning_rate": 9.96660609813391e-05,
      "loss": 2.6111,
      "step": 168829
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.980877637863159,
      "learning_rate": 9.966301615368306e-05,
      "loss": 3.0838,
      "step": 168830
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5837879180908203,
      "learning_rate": 9.965997136327332e-05,
      "loss": 2.9564,
      "step": 168831
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9043490886688232,
      "learning_rate": 9.965692661011066e-05,
      "loss": 2.9021,
      "step": 168832
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1836607456207275,
      "learning_rate": 9.96538818941956e-05,
      "loss": 2.9386,
      "step": 168833
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4183924198150635,
      "learning_rate": 9.965083721552851e-05,
      "loss": 2.8819,
      "step": 168834
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.498265027999878,
      "learning_rate": 9.964779257411027e-05,
      "loss": 2.9727,
      "step": 168835
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3864452838897705,
      "learning_rate": 9.964474796994126e-05,
      "loss": 3.215,
      "step": 168836
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.719754219055176,
      "learning_rate": 9.964170340302198e-05,
      "loss": 2.9218,
      "step": 168837
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3768062591552734,
      "learning_rate": 9.963865887335317e-05,
      "loss": 3.1028,
      "step": 168838
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.091028928756714,
      "learning_rate": 9.963561438093533e-05,
      "loss": 2.8161,
      "step": 168839
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5725579261779785,
      "learning_rate": 9.96325699257689e-05,
      "loss": 3.0178,
      "step": 168840
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.799065589904785,
      "learning_rate": 9.962952550785467e-05,
      "loss": 2.9863,
      "step": 168841
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.272691249847412,
      "learning_rate": 9.96264811271931e-05,
      "loss": 2.967,
      "step": 168842
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.0009536743164062,
      "learning_rate": 9.962343678378464e-05,
      "loss": 2.9125,
      "step": 168843
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.607713460922241,
      "learning_rate": 9.962039247763008e-05,
      "loss": 2.9582,
      "step": 168844
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.54792857170105,
      "learning_rate": 9.961734820872975e-05,
      "loss": 2.6783,
      "step": 168845
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.2809126377105713,
      "learning_rate": 9.961430397708444e-05,
      "loss": 3.1571,
      "step": 168846
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.849857807159424,
      "learning_rate": 9.96112597826946e-05,
      "loss": 2.9191,
      "step": 168847
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5213146209716797,
      "learning_rate": 9.960821562556084e-05,
      "loss": 2.8996,
      "step": 168848
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.0557284355163574,
      "learning_rate": 9.96051715056836e-05,
      "loss": 3.1333,
      "step": 168849
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3113856315612793,
      "learning_rate": 9.960212742306362e-05,
      "loss": 2.8289,
      "step": 168850
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.343963146209717,
      "learning_rate": 9.959908337770129e-05,
      "loss": 2.9109,
      "step": 168851
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.742793321609497,
      "learning_rate": 9.959603936959737e-05,
      "loss": 2.9832,
      "step": 168852
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6977572441101074,
      "learning_rate": 9.959299539875235e-05,
      "loss": 3.1165,
      "step": 168853
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.460270881652832,
      "learning_rate": 9.958995146516676e-05,
      "loss": 3.1685,
      "step": 168854
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.039940595626831,
      "learning_rate": 9.958690756884106e-05,
      "loss": 2.9397,
      "step": 168855
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.753351926803589,
      "learning_rate": 9.958386370977606e-05,
      "loss": 2.9598,
      "step": 168856
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.464858293533325,
      "learning_rate": 9.958081988797214e-05,
      "loss": 2.7129,
      "step": 168857
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4034268856048584,
      "learning_rate": 9.957777610342995e-05,
      "loss": 2.7057,
      "step": 168858
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.56127667427063,
      "learning_rate": 9.957473235615e-05,
      "loss": 2.8482,
      "step": 168859
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.6132006645202637,
      "learning_rate": 9.957168864613304e-05,
      "loss": 3.1014,
      "step": 168860
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.786525249481201,
      "learning_rate": 9.956864497337934e-05,
      "loss": 2.7149,
      "step": 168861
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.51194167137146,
      "learning_rate": 9.956560133788967e-05,
      "loss": 2.8294,
      "step": 168862
    },
    {
      "epoch": 2.2,
      "grad_norm": 1.964416742324829,
      "learning_rate": 9.956255773966445e-05,
      "loss": 2.9649,
      "step": 168863
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.4053256511688232,
      "learning_rate": 9.955951417870443e-05,
      "loss": 3.0641,
      "step": 168864
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.4206647872924805,
      "learning_rate": 9.955647065501e-05,
      "loss": 2.8898,
      "step": 168865
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.616145610809326,
      "learning_rate": 9.955342716858197e-05,
      "loss": 2.7396,
      "step": 168866
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.5761330127716064,
      "learning_rate": 9.955038371942058e-05,
      "loss": 2.9627,
      "step": 168867
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.592639207839966,
      "learning_rate": 9.954734030752663e-05,
      "loss": 2.8404,
      "step": 168868
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.2804744243621826,
      "learning_rate": 9.954429693290054e-05,
      "loss": 2.9617,
      "step": 168869
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.423691749572754,
      "learning_rate": 9.954125359554303e-05,
      "loss": 3.294,
      "step": 168870
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.6347532272338867,
      "learning_rate": 9.95382102954545e-05,
      "loss": 2.6896,
      "step": 168871
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.653337001800537,
      "learning_rate": 9.953516703263577e-05,
      "loss": 2.883,
      "step": 168872
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.385923385620117,
      "learning_rate": 9.953212380708705e-05,
      "loss": 2.8513,
      "step": 168873
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.294344663619995,
      "learning_rate": 9.952908061880921e-05,
      "loss": 2.9348,
      "step": 168874
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6154592037200928,
      "learning_rate": 9.952603746780259e-05,
      "loss": 3.059,
      "step": 168875
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7942326068878174,
      "learning_rate": 9.952299435406796e-05,
      "loss": 3.1223,
      "step": 168876
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.950805902481079,
      "learning_rate": 9.951995127760567e-05,
      "loss": 2.9242,
      "step": 168877
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.533801317214966,
      "learning_rate": 9.951690823841663e-05,
      "loss": 3.0038,
      "step": 168878
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6605494022369385,
      "learning_rate": 9.951386523650098e-05,
      "loss": 2.7951,
      "step": 168879
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.565321683883667,
      "learning_rate": 9.95108222718596e-05,
      "loss": 2.7281,
      "step": 168880
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.197951793670654,
      "learning_rate": 9.950777934449281e-05,
      "loss": 2.9235,
      "step": 168881
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9676673412323,
      "learning_rate": 9.950473645440141e-05,
      "loss": 2.9368,
      "step": 168882
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.0918774604797363,
      "learning_rate": 9.95016936015858e-05,
      "loss": 2.8044,
      "step": 168883
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.439056158065796,
      "learning_rate": 9.949865078604669e-05,
      "loss": 3.0425,
      "step": 168884
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.470388412475586,
      "learning_rate": 9.949560800778455e-05,
      "loss": 3.1303,
      "step": 168885
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.62813401222229,
      "learning_rate": 9.949256526679998e-05,
      "loss": 2.8101,
      "step": 168886
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.9718780517578125,
      "learning_rate": 9.948952256309342e-05,
      "loss": 2.8542,
      "step": 168887
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.606475353240967,
      "learning_rate": 9.948647989666564e-05,
      "loss": 2.9013,
      "step": 168888
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.333336353302002,
      "learning_rate": 9.9483437267517e-05,
      "loss": 3.1049,
      "step": 168889
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1990232467651367,
      "learning_rate": 9.948039467564833e-05,
      "loss": 2.9075,
      "step": 168890
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.863257884979248,
      "learning_rate": 9.947735212105998e-05,
      "loss": 2.8691,
      "step": 168891
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.0674960613250732,
      "learning_rate": 9.947430960375259e-05,
      "loss": 2.6629,
      "step": 168892
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.63816237449646,
      "learning_rate": 9.94712671237266e-05,
      "loss": 2.9061,
      "step": 168893
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3203837871551514,
      "learning_rate": 9.94682246809828e-05,
      "loss": 2.715,
      "step": 168894
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8453915119171143,
      "learning_rate": 9.946518227552156e-05,
      "loss": 2.7826,
      "step": 168895
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6900203227996826,
      "learning_rate": 9.946213990734362e-05,
      "loss": 2.9092,
      "step": 168896
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6935067176818848,
      "learning_rate": 9.945909757644944e-05,
      "loss": 3.1091,
      "step": 168897
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.366537570953369,
      "learning_rate": 9.945605528283958e-05,
      "loss": 2.9657,
      "step": 168898
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7953906059265137,
      "learning_rate": 9.945301302651454e-05,
      "loss": 2.849,
      "step": 168899
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3085758686065674,
      "learning_rate": 9.94499708074751e-05,
      "loss": 2.8385,
      "step": 168900
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6039528846740723,
      "learning_rate": 9.944692862572159e-05,
      "loss": 2.9945,
      "step": 168901
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3916633129119873,
      "learning_rate": 9.944388648125475e-05,
      "loss": 2.9892,
      "step": 168902
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.307027578353882,
      "learning_rate": 9.944084437407509e-05,
      "loss": 2.9757,
      "step": 168903
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.648465394973755,
      "learning_rate": 9.943780230418316e-05,
      "loss": 2.9857,
      "step": 168904
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2128536701202393,
      "learning_rate": 9.943476027157945e-05,
      "loss": 2.6613,
      "step": 168905
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.107579469680786,
      "learning_rate": 9.943171827626467e-05,
      "loss": 2.9688,
      "step": 168906
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3853535652160645,
      "learning_rate": 9.942867631823927e-05,
      "loss": 3.0868,
      "step": 168907
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.437601089477539,
      "learning_rate": 9.942563439750393e-05,
      "loss": 3.2037,
      "step": 168908
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.08642315864563,
      "learning_rate": 9.942259251405914e-05,
      "loss": 2.9395,
      "step": 168909
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.9382946491241455,
      "learning_rate": 9.94195506679055e-05,
      "loss": 2.8723,
      "step": 168910
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.921279191970825,
      "learning_rate": 9.941650885904344e-05,
      "loss": 2.9857,
      "step": 168911
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6065518856048584,
      "learning_rate": 9.941346708747373e-05,
      "loss": 3.0644,
      "step": 168912
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3607614040374756,
      "learning_rate": 9.941042535319677e-05,
      "loss": 3.1936,
      "step": 168913
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.5732569694519043,
      "learning_rate": 9.940738365621328e-05,
      "loss": 2.9186,
      "step": 168914
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4156978130340576,
      "learning_rate": 9.940434199652372e-05,
      "loss": 2.9326,
      "step": 168915
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6874804496765137,
      "learning_rate": 9.94013003741287e-05,
      "loss": 2.7689,
      "step": 168916
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6948578357696533,
      "learning_rate": 9.93982587890287e-05,
      "loss": 2.8306,
      "step": 168917
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.344168186187744,
      "learning_rate": 9.93952172412244e-05,
      "loss": 2.6832,
      "step": 168918
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.370546579360962,
      "learning_rate": 9.939217573071623e-05,
      "loss": 3.0271,
      "step": 168919
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.296935558319092,
      "learning_rate": 9.938913425750494e-05,
      "loss": 2.8994,
      "step": 168920
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.67922306060791,
      "learning_rate": 9.938609282159102e-05,
      "loss": 2.8664,
      "step": 168921
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.2091064453125,
      "learning_rate": 9.938305142297491e-05,
      "loss": 2.9943,
      "step": 168922
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.291572332382202,
      "learning_rate": 9.938001006165735e-05,
      "loss": 3.125,
      "step": 168923
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.5454089641571045,
      "learning_rate": 9.937696873763885e-05,
      "loss": 2.6916,
      "step": 168924
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.031639337539673,
      "learning_rate": 9.937392745091985e-05,
      "loss": 2.8186,
      "step": 168925
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.766139268875122,
      "learning_rate": 9.937088620150112e-05,
      "loss": 3.0164,
      "step": 168926
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9940123558044434,
      "learning_rate": 9.936784498938315e-05,
      "loss": 3.0681,
      "step": 168927
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.519575834274292,
      "learning_rate": 9.936480381456637e-05,
      "loss": 2.8406,
      "step": 168928
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3560075759887695,
      "learning_rate": 9.936176267705156e-05,
      "loss": 2.8786,
      "step": 168929
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.0291314125061035,
      "learning_rate": 9.935872157683911e-05,
      "loss": 2.9205,
      "step": 168930
    },
    {
      "epoch": 2.2,
      "grad_norm": 5.134814262390137,
      "learning_rate": 9.935568051392971e-05,
      "loss": 2.7853,
      "step": 168931
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6641411781311035,
      "learning_rate": 9.935263948832391e-05,
      "loss": 3.3626,
      "step": 168932
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3163249492645264,
      "learning_rate": 9.934959850002225e-05,
      "loss": 3.0314,
      "step": 168933
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8429195880889893,
      "learning_rate": 9.934655754902516e-05,
      "loss": 2.7724,
      "step": 168934
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.0985329151153564,
      "learning_rate": 9.934351663533347e-05,
      "loss": 2.8395,
      "step": 168935
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.797809600830078,
      "learning_rate": 9.934047575894748e-05,
      "loss": 2.5923,
      "step": 168936
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.527632236480713,
      "learning_rate": 9.933743491986797e-05,
      "loss": 2.8585,
      "step": 168937
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.087979316711426,
      "learning_rate": 9.933439411809547e-05,
      "loss": 2.8576,
      "step": 168938
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5527162551879883,
      "learning_rate": 9.933135335363044e-05,
      "loss": 3.1785,
      "step": 168939
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5722715854644775,
      "learning_rate": 9.932831262647344e-05,
      "loss": 2.8689,
      "step": 168940
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7194266319274902,
      "learning_rate": 9.932527193662516e-05,
      "loss": 3.1053,
      "step": 168941
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.458345413208008,
      "learning_rate": 9.932223128408601e-05,
      "loss": 3.1146,
      "step": 168942
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.842210531234741,
      "learning_rate": 9.931919066885678e-05,
      "loss": 2.8872,
      "step": 168943
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.371014356613159,
      "learning_rate": 9.931615009093776e-05,
      "loss": 2.9723,
      "step": 168944
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.636754035949707,
      "learning_rate": 9.931310955032987e-05,
      "loss": 3.1555,
      "step": 168945
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.734334707260132,
      "learning_rate": 9.931006904703324e-05,
      "loss": 2.799,
      "step": 168946
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8574116230010986,
      "learning_rate": 9.930702858104879e-05,
      "loss": 2.7913,
      "step": 168947
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.0941898822784424,
      "learning_rate": 9.930398815237683e-05,
      "loss": 2.8705,
      "step": 168948
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.455293655395508,
      "learning_rate": 9.930094776101815e-05,
      "loss": 2.8747,
      "step": 168949
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.599754810333252,
      "learning_rate": 9.929790740697312e-05,
      "loss": 2.9581,
      "step": 168950
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1653757095336914,
      "learning_rate": 9.929486709024252e-05,
      "loss": 2.9073,
      "step": 168951
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.2047348022460938,
      "learning_rate": 9.929182681082676e-05,
      "loss": 2.8889,
      "step": 168952
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9671244621276855,
      "learning_rate": 9.928878656872641e-05,
      "loss": 3.1123,
      "step": 168953
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.588623285293579,
      "learning_rate": 9.928574636394203e-05,
      "loss": 3.131,
      "step": 168954
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6194162368774414,
      "learning_rate": 9.928270619647427e-05,
      "loss": 2.7156,
      "step": 168955
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.493010997772217,
      "learning_rate": 9.927966606632357e-05,
      "loss": 2.8334,
      "step": 168956
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3060457706451416,
      "learning_rate": 9.927662597349067e-05,
      "loss": 2.7953,
      "step": 168957
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6707427501678467,
      "learning_rate": 9.927358591797601e-05,
      "loss": 2.8751,
      "step": 168958
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.160325527191162,
      "learning_rate": 9.927054589978019e-05,
      "loss": 2.8887,
      "step": 168959
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8224196434020996,
      "learning_rate": 9.926750591890366e-05,
      "loss": 2.9318,
      "step": 168960
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.460930109024048,
      "learning_rate": 9.926446597534722e-05,
      "loss": 2.9216,
      "step": 168961
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1958751678466797,
      "learning_rate": 9.926142606911118e-05,
      "loss": 3.1557,
      "step": 168962
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2799811363220215,
      "learning_rate": 9.925838620019633e-05,
      "loss": 3.0021,
      "step": 168963
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8800690174102783,
      "learning_rate": 9.925534636860312e-05,
      "loss": 2.9034,
      "step": 168964
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5797507762908936,
      "learning_rate": 9.925230657433216e-05,
      "loss": 2.9998,
      "step": 168965
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7061655521392822,
      "learning_rate": 9.924926681738386e-05,
      "loss": 2.8454,
      "step": 168966
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.854823589324951,
      "learning_rate": 9.924622709775902e-05,
      "loss": 2.9989,
      "step": 168967
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.133718729019165,
      "learning_rate": 9.924318741545803e-05,
      "loss": 2.8904,
      "step": 168968
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6240386962890625,
      "learning_rate": 9.924014777048157e-05,
      "loss": 2.7171,
      "step": 168969
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4164469242095947,
      "learning_rate": 9.923710816283019e-05,
      "loss": 2.7874,
      "step": 168970
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.811743974685669,
      "learning_rate": 9.923406859250439e-05,
      "loss": 3.1614,
      "step": 168971
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4388482570648193,
      "learning_rate": 9.923102905950469e-05,
      "loss": 2.8351,
      "step": 168972
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.784149408340454,
      "learning_rate": 9.922798956383184e-05,
      "loss": 2.9886,
      "step": 168973
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4736833572387695,
      "learning_rate": 9.922495010548617e-05,
      "loss": 3.0737,
      "step": 168974
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6755454540252686,
      "learning_rate": 9.92219106844685e-05,
      "loss": 2.8053,
      "step": 168975
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.3678195476531982,
      "learning_rate": 9.921887130077923e-05,
      "loss": 3.1698,
      "step": 168976
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7267136573791504,
      "learning_rate": 9.921583195441902e-05,
      "loss": 2.9792,
      "step": 168977
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5009756088256836,
      "learning_rate": 9.921279264538823e-05,
      "loss": 3.1347,
      "step": 168978
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.841115713119507,
      "learning_rate": 9.920975337368769e-05,
      "loss": 2.9773,
      "step": 168979
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8182389736175537,
      "learning_rate": 9.920671413931771e-05,
      "loss": 3.0735,
      "step": 168980
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.563155174255371,
      "learning_rate": 9.920367494227914e-05,
      "loss": 2.8351,
      "step": 168981
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.4472763538360596,
      "learning_rate": 9.920063578257238e-05,
      "loss": 2.7794,
      "step": 168982
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.575483798980713,
      "learning_rate": 9.919759666019802e-05,
      "loss": 2.808,
      "step": 168983
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.659367799758911,
      "learning_rate": 9.91945575751565e-05,
      "loss": 2.9534,
      "step": 168984
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3441519737243652,
      "learning_rate": 9.91915185274486e-05,
      "loss": 2.9443,
      "step": 168985
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4790918827056885,
      "learning_rate": 9.918847951707469e-05,
      "loss": 3.1122,
      "step": 168986
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.2766828536987305,
      "learning_rate": 9.918544054403554e-05,
      "loss": 3.0668,
      "step": 168987
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.5351147651672363,
      "learning_rate": 9.918240160833163e-05,
      "loss": 2.8049,
      "step": 168988
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5272669792175293,
      "learning_rate": 9.917936270996348e-05,
      "loss": 3.1451,
      "step": 168989
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5747010707855225,
      "learning_rate": 9.917632384893157e-05,
      "loss": 2.7459,
      "step": 168990
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1637051105499268,
      "learning_rate": 9.91732850252367e-05,
      "loss": 3.0187,
      "step": 168991
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.865621328353882,
      "learning_rate": 9.917024623887918e-05,
      "loss": 2.742,
      "step": 168992
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6609976291656494,
      "learning_rate": 9.91672074898598e-05,
      "loss": 2.965,
      "step": 168993
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.920853853225708,
      "learning_rate": 9.916416877817902e-05,
      "loss": 2.9294,
      "step": 168994
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.462191581726074,
      "learning_rate": 9.916113010383743e-05,
      "loss": 3.0029,
      "step": 168995
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7232706546783447,
      "learning_rate": 9.915809146683549e-05,
      "loss": 2.7793,
      "step": 168996
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7071502208709717,
      "learning_rate": 9.915505286717393e-05,
      "loss": 2.8796,
      "step": 168997
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.112935781478882,
      "learning_rate": 9.915201430485315e-05,
      "loss": 2.8914,
      "step": 168998
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.5733673572540283,
      "learning_rate": 9.914897577987389e-05,
      "loss": 3.118,
      "step": 168999
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9201836585998535,
      "learning_rate": 9.914593729223662e-05,
      "loss": 2.8967,
      "step": 169000
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1846001148223877,
      "learning_rate": 9.914289884194192e-05,
      "loss": 3.2723,
      "step": 169001
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4586737155914307,
      "learning_rate": 9.913986042899028e-05,
      "loss": 2.672,
      "step": 169002
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6436309814453125,
      "learning_rate": 9.91368220533824e-05,
      "loss": 3.0202,
      "step": 169003
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3159801959991455,
      "learning_rate": 9.913378371511865e-05,
      "loss": 3.1107,
      "step": 169004
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1071226596832275,
      "learning_rate": 9.913074541419987e-05,
      "loss": 2.7426,
      "step": 169005
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4171791076660156,
      "learning_rate": 9.912770715062647e-05,
      "loss": 3.0144,
      "step": 169006
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.335541248321533,
      "learning_rate": 9.91246689243989e-05,
      "loss": 2.8692,
      "step": 169007
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7151401042938232,
      "learning_rate": 9.912163073551795e-05,
      "loss": 2.6929,
      "step": 169008
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5613760948181152,
      "learning_rate": 9.911859258398408e-05,
      "loss": 3.2313,
      "step": 169009
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.944429874420166,
      "learning_rate": 9.911555446979774e-05,
      "loss": 3.1004,
      "step": 169010
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.236203908920288,
      "learning_rate": 9.911251639295973e-05,
      "loss": 2.7862,
      "step": 169011
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.282165765762329,
      "learning_rate": 9.91094783534705e-05,
      "loss": 2.6832,
      "step": 169012
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4656171798706055,
      "learning_rate": 9.910644035133051e-05,
      "loss": 2.6682,
      "step": 169013
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.760349988937378,
      "learning_rate": 9.910340238654051e-05,
      "loss": 2.9295,
      "step": 169014
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2145183086395264,
      "learning_rate": 9.9100364459101e-05,
      "loss": 3.0162,
      "step": 169015
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4333431720733643,
      "learning_rate": 9.909732656901241e-05,
      "loss": 2.9826,
      "step": 169016
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.388607978820801,
      "learning_rate": 9.909428871627552e-05,
      "loss": 2.8861,
      "step": 169017
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6777193546295166,
      "learning_rate": 9.909125090089071e-05,
      "loss": 2.8667,
      "step": 169018
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.393037796020508,
      "learning_rate": 9.908821312285871e-05,
      "loss": 2.9866,
      "step": 169019
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6585774421691895,
      "learning_rate": 9.908517538218002e-05,
      "loss": 2.7731,
      "step": 169020
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7910842895507812,
      "learning_rate": 9.908213767885506e-05,
      "loss": 2.751,
      "step": 169021
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4703571796417236,
      "learning_rate": 9.907910001288467e-05,
      "loss": 3.0304,
      "step": 169022
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3884897232055664,
      "learning_rate": 9.907606238426923e-05,
      "loss": 2.9878,
      "step": 169023
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.513793468475342,
      "learning_rate": 9.907302479300923e-05,
      "loss": 3.1601,
      "step": 169024
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.681945323944092,
      "learning_rate": 9.90699872391055e-05,
      "loss": 3.0231,
      "step": 169025
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.614755868911743,
      "learning_rate": 9.906694972255841e-05,
      "loss": 2.8804,
      "step": 169026
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8413515090942383,
      "learning_rate": 9.906391224336847e-05,
      "loss": 2.9996,
      "step": 169027
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4387869834899902,
      "learning_rate": 9.906087480153644e-05,
      "loss": 2.9661,
      "step": 169028
    },
    {
      "epoch": 2.2,
      "grad_norm": 5.117812633514404,
      "learning_rate": 9.905783739706271e-05,
      "loss": 2.8504,
      "step": 169029
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8786909580230713,
      "learning_rate": 9.905480002994803e-05,
      "loss": 2.8213,
      "step": 169030
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8911828994750977,
      "learning_rate": 9.905176270019282e-05,
      "loss": 2.7752,
      "step": 169031
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5384624004364014,
      "learning_rate": 9.904872540779769e-05,
      "loss": 3.0637,
      "step": 169032
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.879362106323242,
      "learning_rate": 9.904568815276311e-05,
      "loss": 2.9592,
      "step": 169033
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.301597833633423,
      "learning_rate": 9.904265093508982e-05,
      "loss": 2.7564,
      "step": 169034
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.367894172668457,
      "learning_rate": 9.90396137547782e-05,
      "loss": 2.9863,
      "step": 169035
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3657186031341553,
      "learning_rate": 9.9036576611829e-05,
      "loss": 2.9635,
      "step": 169036
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6074655055999756,
      "learning_rate": 9.903353950624267e-05,
      "loss": 2.8747,
      "step": 169037
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.4256982803344727,
      "learning_rate": 9.903050243801984e-05,
      "loss": 2.9684,
      "step": 169038
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9884605407714844,
      "learning_rate": 9.902746540716093e-05,
      "loss": 3.2207,
      "step": 169039
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8070199489593506,
      "learning_rate": 9.902442841366668e-05,
      "loss": 2.9915,
      "step": 169040
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1343677043914795,
      "learning_rate": 9.90213914575375e-05,
      "loss": 2.9512,
      "step": 169041
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.694061040878296,
      "learning_rate": 9.901835453877413e-05,
      "loss": 3.0737,
      "step": 169042
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1473591327667236,
      "learning_rate": 9.901531765737705e-05,
      "loss": 2.7818,
      "step": 169043
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9106242656707764,
      "learning_rate": 9.901228081334684e-05,
      "loss": 3.0081,
      "step": 169044
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.49771785736084,
      "learning_rate": 9.900924400668389e-05,
      "loss": 3.1616,
      "step": 169045
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6904289722442627,
      "learning_rate": 9.900620723738907e-05,
      "loss": 2.8342,
      "step": 169046
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4935567378997803,
      "learning_rate": 9.900317050546264e-05,
      "loss": 2.8905,
      "step": 169047
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.303105354309082,
      "learning_rate": 9.900013381090544e-05,
      "loss": 3.0318,
      "step": 169048
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.790513277053833,
      "learning_rate": 9.899709715371789e-05,
      "loss": 3.139,
      "step": 169049
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.171518325805664,
      "learning_rate": 9.89940605339006e-05,
      "loss": 3.024,
      "step": 169050
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6744818687438965,
      "learning_rate": 9.899102395145401e-05,
      "loss": 2.9617,
      "step": 169051
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.648200511932373,
      "learning_rate": 9.898798740637885e-05,
      "loss": 2.9093,
      "step": 169052
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5781867504119873,
      "learning_rate": 9.898495089867554e-05,
      "loss": 2.7855,
      "step": 169053
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.0248446464538574,
      "learning_rate": 9.898191442834483e-05,
      "loss": 2.6916,
      "step": 169054
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3757033348083496,
      "learning_rate": 9.897887799538717e-05,
      "loss": 3.0966,
      "step": 169055
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.355003833770752,
      "learning_rate": 9.897584159980313e-05,
      "loss": 2.836,
      "step": 169056
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.67085862159729,
      "learning_rate": 9.897280524159317e-05,
      "loss": 3.0634,
      "step": 169057
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.937746524810791,
      "learning_rate": 9.896976892075807e-05,
      "loss": 2.9846,
      "step": 169058
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2234065532684326,
      "learning_rate": 9.896673263729815e-05,
      "loss": 2.9257,
      "step": 169059
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.435030221939087,
      "learning_rate": 9.896369639121426e-05,
      "loss": 2.9618,
      "step": 169060
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.746267557144165,
      "learning_rate": 9.896066018250678e-05,
      "loss": 2.9072,
      "step": 169061
    },
    {
      "epoch": 2.2,
      "grad_norm": 5.014196872711182,
      "learning_rate": 9.895762401117633e-05,
      "loss": 2.9679,
      "step": 169062
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1289923191070557,
      "learning_rate": 9.895458787722334e-05,
      "loss": 2.9016,
      "step": 169063
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5910353660583496,
      "learning_rate": 9.895155178064859e-05,
      "loss": 3.1127,
      "step": 169064
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.24297833442688,
      "learning_rate": 9.894851572145244e-05,
      "loss": 3.0324,
      "step": 169065
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.27374792098999,
      "learning_rate": 9.894547969963567e-05,
      "loss": 2.4539,
      "step": 169066
    },
    {
      "epoch": 2.2,
      "grad_norm": 5.73652458190918,
      "learning_rate": 9.894244371519871e-05,
      "loss": 2.994,
      "step": 169067
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.5203614234924316,
      "learning_rate": 9.893940776814214e-05,
      "loss": 2.8726,
      "step": 169068
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7889416217803955,
      "learning_rate": 9.893637185846644e-05,
      "loss": 2.8267,
      "step": 169069
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.311910390853882,
      "learning_rate": 9.893333598617235e-05,
      "loss": 2.7667,
      "step": 169070
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.226609706878662,
      "learning_rate": 9.89303001512603e-05,
      "loss": 2.9594,
      "step": 169071
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.617666244506836,
      "learning_rate": 9.892726435373095e-05,
      "loss": 2.9677,
      "step": 169072
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.336174726486206,
      "learning_rate": 9.892422859358485e-05,
      "loss": 2.9855,
      "step": 169073
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.3291409015655518,
      "learning_rate": 9.892119287082251e-05,
      "loss": 3.0405,
      "step": 169074
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.5242714881896973,
      "learning_rate": 9.89181571854444e-05,
      "loss": 3.0646,
      "step": 169075
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.0768747329711914,
      "learning_rate": 9.891512153745132e-05,
      "loss": 3.0307,
      "step": 169076
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.301976203918457,
      "learning_rate": 9.891208592684365e-05,
      "loss": 3.0134,
      "step": 169077
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.726795196533203,
      "learning_rate": 9.890905035362205e-05,
      "loss": 2.855,
      "step": 169078
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.5684516429901123,
      "learning_rate": 9.89060148177871e-05,
      "loss": 2.8423,
      "step": 169079
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8859736919403076,
      "learning_rate": 9.89029793193393e-05,
      "loss": 3.0197,
      "step": 169080
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6764841079711914,
      "learning_rate": 9.889994385827915e-05,
      "loss": 2.8645,
      "step": 169081
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9189391136169434,
      "learning_rate": 9.889690843460737e-05,
      "loss": 2.9763,
      "step": 169082
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.993922710418701,
      "learning_rate": 9.889387304832438e-05,
      "loss": 2.7761,
      "step": 169083
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8407280445098877,
      "learning_rate": 9.889083769943091e-05,
      "loss": 2.885,
      "step": 169084
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4977896213531494,
      "learning_rate": 9.88878023879273e-05,
      "loss": 2.7726,
      "step": 169085
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6461987495422363,
      "learning_rate": 9.888476711381446e-05,
      "loss": 3.0455,
      "step": 169086
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.444077253341675,
      "learning_rate": 9.888173187709256e-05,
      "loss": 2.9708,
      "step": 169087
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.790632963180542,
      "learning_rate": 9.887869667776242e-05,
      "loss": 2.846,
      "step": 169088
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2137715816497803,
      "learning_rate": 9.887566151582445e-05,
      "loss": 3.1431,
      "step": 169089
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1218223571777344,
      "learning_rate": 9.887262639127935e-05,
      "loss": 2.9185,
      "step": 169090
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.508432626724243,
      "learning_rate": 9.886959130412755e-05,
      "loss": 2.9419,
      "step": 169091
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5259885787963867,
      "learning_rate": 9.88665562543698e-05,
      "loss": 3.0247,
      "step": 169092
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.341118812561035,
      "learning_rate": 9.886352124200654e-05,
      "loss": 2.9621,
      "step": 169093
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.35843563079834,
      "learning_rate": 9.886048626703833e-05,
      "loss": 2.9384,
      "step": 169094
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.456256866455078,
      "learning_rate": 9.885745132946568e-05,
      "loss": 3.0497,
      "step": 169095
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.2325422763824463,
      "learning_rate": 9.885441642928931e-05,
      "loss": 2.9167,
      "step": 169096
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.454791307449341,
      "learning_rate": 9.885138156650962e-05,
      "loss": 2.8529,
      "step": 169097
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8320791721343994,
      "learning_rate": 9.884834674112736e-05,
      "loss": 3.07,
      "step": 169098
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.866173505783081,
      "learning_rate": 9.884531195314294e-05,
      "loss": 2.992,
      "step": 169099
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7975223064422607,
      "learning_rate": 9.884227720255702e-05,
      "loss": 2.972,
      "step": 169100
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.1380414962768555,
      "learning_rate": 9.883924248937001e-05,
      "loss": 2.9633,
      "step": 169101
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.697971820831299,
      "learning_rate": 9.883620781358268e-05,
      "loss": 3.156,
      "step": 169102
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5453379154205322,
      "learning_rate": 9.88331731751954e-05,
      "loss": 3.1172,
      "step": 169103
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2803139686584473,
      "learning_rate": 9.883013857420891e-05,
      "loss": 2.985,
      "step": 169104
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9272024631500244,
      "learning_rate": 9.882710401062373e-05,
      "loss": 2.838,
      "step": 169105
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7738733291625977,
      "learning_rate": 9.882406948444026e-05,
      "loss": 2.8732,
      "step": 169106
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.61091947555542,
      "learning_rate": 9.882103499565928e-05,
      "loss": 3.1065,
      "step": 169107
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.695591926574707,
      "learning_rate": 9.881800054428131e-05,
      "loss": 2.8623,
      "step": 169108
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.041794776916504,
      "learning_rate": 9.881496613030674e-05,
      "loss": 2.8546,
      "step": 169109
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.228813886642456,
      "learning_rate": 9.881193175373637e-05,
      "loss": 3.0579,
      "step": 169110
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5534284114837646,
      "learning_rate": 9.880889741457068e-05,
      "loss": 3.1602,
      "step": 169111
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2631330490112305,
      "learning_rate": 9.880586311281009e-05,
      "loss": 2.8912,
      "step": 169112
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.395857095718384,
      "learning_rate": 9.880282884845539e-05,
      "loss": 2.8055,
      "step": 169113
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3597819805145264,
      "learning_rate": 9.879979462150695e-05,
      "loss": 3.2518,
      "step": 169114
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4838688373565674,
      "learning_rate": 9.879676043196553e-05,
      "loss": 2.9195,
      "step": 169115
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2933754920959473,
      "learning_rate": 9.879372627983156e-05,
      "loss": 2.9583,
      "step": 169116
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8399057388305664,
      "learning_rate": 9.879069216510566e-05,
      "loss": 2.8507,
      "step": 169117
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6188979148864746,
      "learning_rate": 9.878765808778828e-05,
      "loss": 2.7321,
      "step": 169118
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.611976385116577,
      "learning_rate": 9.878462404788017e-05,
      "loss": 3.3989,
      "step": 169119
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.628319501876831,
      "learning_rate": 9.878159004538167e-05,
      "loss": 2.7411,
      "step": 169120
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6766364574432373,
      "learning_rate": 9.87785560802936e-05,
      "loss": 3.1748,
      "step": 169121
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.579596996307373,
      "learning_rate": 9.877552215261638e-05,
      "loss": 3.0044,
      "step": 169122
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.007647514343262,
      "learning_rate": 9.877248826235056e-05,
      "loss": 3.2027,
      "step": 169123
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.1652348041534424,
      "learning_rate": 9.87694544094967e-05,
      "loss": 2.8875,
      "step": 169124
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.18780517578125,
      "learning_rate": 9.876642059405546e-05,
      "loss": 2.6206,
      "step": 169125
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5178780555725098,
      "learning_rate": 9.876338681602724e-05,
      "loss": 2.9649,
      "step": 169126
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6438207626342773,
      "learning_rate": 9.876035307541278e-05,
      "loss": 2.9081,
      "step": 169127
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8829476833343506,
      "learning_rate": 9.87573193722126e-05,
      "loss": 2.8614,
      "step": 169128
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.7106778621673584,
      "learning_rate": 9.875428570642725e-05,
      "loss": 3.0642,
      "step": 169129
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6731574535369873,
      "learning_rate": 9.875125207805714e-05,
      "loss": 2.8978,
      "step": 169130
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2256410121917725,
      "learning_rate": 9.874821848710311e-05,
      "loss": 2.9074,
      "step": 169131
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.472264528274536,
      "learning_rate": 9.874518493356544e-05,
      "loss": 2.8094,
      "step": 169132
    },
    {
      "epoch": 2.2,
      "grad_norm": 5.672124862670898,
      "learning_rate": 9.874215141744497e-05,
      "loss": 2.8949,
      "step": 169133
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.258368492126465,
      "learning_rate": 9.873911793874211e-05,
      "loss": 2.6698,
      "step": 169134
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.878565549850464,
      "learning_rate": 9.873608449745748e-05,
      "loss": 2.8595,
      "step": 169135
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5187251567840576,
      "learning_rate": 9.873305109359148e-05,
      "loss": 2.7496,
      "step": 169136
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.534029006958008,
      "learning_rate": 9.873001772714493e-05,
      "loss": 2.8704,
      "step": 169137
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.099148750305176,
      "learning_rate": 9.872698439811813e-05,
      "loss": 2.8652,
      "step": 169138
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.994093418121338,
      "learning_rate": 9.872395110651192e-05,
      "loss": 2.9566,
      "step": 169139
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.5585439205169678,
      "learning_rate": 9.872091785232673e-05,
      "loss": 2.6673,
      "step": 169140
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5924947261810303,
      "learning_rate": 9.87178846355631e-05,
      "loss": 2.7233,
      "step": 169141
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.002577781677246,
      "learning_rate": 9.87148514562215e-05,
      "loss": 3.0593,
      "step": 169142
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.0917582511901855,
      "learning_rate": 9.871181831430274e-05,
      "loss": 3.0432,
      "step": 169143
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.906952381134033,
      "learning_rate": 9.870878520980715e-05,
      "loss": 3.024,
      "step": 169144
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.135692596435547,
      "learning_rate": 9.870575214273548e-05,
      "loss": 2.9408,
      "step": 169145
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.796874523162842,
      "learning_rate": 9.870271911308824e-05,
      "loss": 3.0503,
      "step": 169146
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.449525833129883,
      "learning_rate": 9.869968612086593e-05,
      "loss": 2.8172,
      "step": 169147
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1894185543060303,
      "learning_rate": 9.869665316606905e-05,
      "loss": 2.9114,
      "step": 169148
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.259687900543213,
      "learning_rate": 9.869362024869837e-05,
      "loss": 2.932,
      "step": 169149
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.690248489379883,
      "learning_rate": 9.869058736875425e-05,
      "loss": 2.9351,
      "step": 169150
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.165931224822998,
      "learning_rate": 9.868755452623745e-05,
      "loss": 2.9786,
      "step": 169151
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.3848841190338135,
      "learning_rate": 9.868452172114836e-05,
      "loss": 2.867,
      "step": 169152
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.663424253463745,
      "learning_rate": 9.868148895348779e-05,
      "loss": 2.8192,
      "step": 169153
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.3819336891174316,
      "learning_rate": 9.86784562232559e-05,
      "loss": 2.8085,
      "step": 169154
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.454580307006836,
      "learning_rate": 9.867542353045363e-05,
      "loss": 2.7669,
      "step": 169155
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1186115741729736,
      "learning_rate": 9.867239087508129e-05,
      "loss": 3.1038,
      "step": 169156
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.661271572113037,
      "learning_rate": 9.866935825713967e-05,
      "loss": 3.072,
      "step": 169157
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.495283842086792,
      "learning_rate": 9.866632567662909e-05,
      "loss": 2.8843,
      "step": 169158
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.535921573638916,
      "learning_rate": 9.866329313355043e-05,
      "loss": 2.7164,
      "step": 169159
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.183866500854492,
      "learning_rate": 9.866026062790387e-05,
      "loss": 2.6959,
      "step": 169160
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.43670654296875,
      "learning_rate": 9.86572281596903e-05,
      "loss": 3.0514,
      "step": 169161
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.7109174728393555,
      "learning_rate": 9.865419572891003e-05,
      "loss": 3.0848,
      "step": 169162
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.15834379196167,
      "learning_rate": 9.865116333556383e-05,
      "loss": 3.0205,
      "step": 169163
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.201055526733398,
      "learning_rate": 9.86481309796521e-05,
      "loss": 2.9407,
      "step": 169164
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7064473628997803,
      "learning_rate": 9.864509866117568e-05,
      "loss": 3.0954,
      "step": 169165
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.344029426574707,
      "learning_rate": 9.864206638013476e-05,
      "loss": 3.2776,
      "step": 169166
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.336982011795044,
      "learning_rate": 9.863903413653015e-05,
      "loss": 2.7807,
      "step": 169167
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.8125619888305664,
      "learning_rate": 9.863600193036222e-05,
      "loss": 2.9877,
      "step": 169168
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.463107109069824,
      "learning_rate": 9.86329697616318e-05,
      "loss": 2.9635,
      "step": 169169
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.995260238647461,
      "learning_rate": 9.862993763033922e-05,
      "loss": 2.7799,
      "step": 169170
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2704503536224365,
      "learning_rate": 9.862690553648533e-05,
      "loss": 2.9006,
      "step": 169171
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4479079246520996,
      "learning_rate": 9.862387348007028e-05,
      "loss": 3.0237,
      "step": 169172
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3589799404144287,
      "learning_rate": 9.862084146109496e-05,
      "loss": 3.0105,
      "step": 169173
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8664438724517822,
      "learning_rate": 9.861780947955971e-05,
      "loss": 2.9583,
      "step": 169174
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.728015899658203,
      "learning_rate": 9.861477753546532e-05,
      "loss": 3.0088,
      "step": 169175
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.272350311279297,
      "learning_rate": 9.861174562881216e-05,
      "loss": 2.9157,
      "step": 169176
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.454648971557617,
      "learning_rate": 9.860871375960107e-05,
      "loss": 2.9709,
      "step": 169177
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.625905752182007,
      "learning_rate": 9.860568192783223e-05,
      "loss": 2.7237,
      "step": 169178
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.7601771354675293,
      "learning_rate": 9.86026501335065e-05,
      "loss": 2.9852,
      "step": 169179
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.198691368103027,
      "learning_rate": 9.859961837662422e-05,
      "loss": 2.7794,
      "step": 169180
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2583167552948,
      "learning_rate": 9.859658665718616e-05,
      "loss": 3.2105,
      "step": 169181
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5200068950653076,
      "learning_rate": 9.859355497519271e-05,
      "loss": 2.5377,
      "step": 169182
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.670041084289551,
      "learning_rate": 9.859052333064464e-05,
      "loss": 3.0382,
      "step": 169183
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.581005096435547,
      "learning_rate": 9.858749172354239e-05,
      "loss": 3.0578,
      "step": 169184
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7124581336975098,
      "learning_rate": 9.858446015388651e-05,
      "loss": 2.8526,
      "step": 169185
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2854034900665283,
      "learning_rate": 9.858142862167748e-05,
      "loss": 3.0465,
      "step": 169186
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1976921558380127,
      "learning_rate": 9.857839712691606e-05,
      "loss": 3.1097,
      "step": 169187
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.5045108795166016,
      "learning_rate": 9.857536566960265e-05,
      "loss": 2.7966,
      "step": 169188
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5685176849365234,
      "learning_rate": 9.857233424973796e-05,
      "loss": 2.9719,
      "step": 169189
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.125309467315674,
      "learning_rate": 9.856930286732247e-05,
      "loss": 2.7841,
      "step": 169190
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.977214813232422,
      "learning_rate": 9.856627152235666e-05,
      "loss": 2.9286,
      "step": 169191
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4102368354797363,
      "learning_rate": 9.856324021484126e-05,
      "loss": 3.2804,
      "step": 169192
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.580719470977783,
      "learning_rate": 9.856020894477677e-05,
      "loss": 3.0795,
      "step": 169193
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6857588291168213,
      "learning_rate": 9.855717771216365e-05,
      "loss": 2.8776,
      "step": 169194
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.49275279045105,
      "learning_rate": 9.855414651700265e-05,
      "loss": 2.9928,
      "step": 169195
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8862216472625732,
      "learning_rate": 9.855111535929425e-05,
      "loss": 2.9416,
      "step": 169196
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.8666303157806396,
      "learning_rate": 9.85480842390389e-05,
      "loss": 2.9606,
      "step": 169197
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.760070323944092,
      "learning_rate": 9.854505315623735e-05,
      "loss": 2.7047,
      "step": 169198
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.279247999191284,
      "learning_rate": 9.854202211089006e-05,
      "loss": 2.8975,
      "step": 169199
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6019446849823,
      "learning_rate": 9.853899110299752e-05,
      "loss": 3.0809,
      "step": 169200
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.679821491241455,
      "learning_rate": 9.853596013256048e-05,
      "loss": 2.9539,
      "step": 169201
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5776236057281494,
      "learning_rate": 9.853292919957942e-05,
      "loss": 2.9431,
      "step": 169202
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.527456760406494,
      "learning_rate": 9.85298983040548e-05,
      "loss": 2.6723,
      "step": 169203
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6277122497558594,
      "learning_rate": 9.852686744598734e-05,
      "loss": 3.2661,
      "step": 169204
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.879314661026001,
      "learning_rate": 9.852383662537749e-05,
      "loss": 3.0372,
      "step": 169205
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6816108226776123,
      "learning_rate": 9.852080584222591e-05,
      "loss": 2.9054,
      "step": 169206
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.3991565704345703,
      "learning_rate": 9.851777509653317e-05,
      "loss": 2.9003,
      "step": 169207
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.421010971069336,
      "learning_rate": 9.851474438829972e-05,
      "loss": 2.6884,
      "step": 169208
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8942461013793945,
      "learning_rate": 9.851171371752613e-05,
      "loss": 2.7483,
      "step": 169209
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6246755123138428,
      "learning_rate": 9.850868308421311e-05,
      "loss": 2.848,
      "step": 169210
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.8901145458221436,
      "learning_rate": 9.850565248836101e-05,
      "loss": 2.981,
      "step": 169211
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4969398975372314,
      "learning_rate": 9.85026219299706e-05,
      "loss": 3.0286,
      "step": 169212
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.407904863357544,
      "learning_rate": 9.84995914090424e-05,
      "loss": 2.8379,
      "step": 169213
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6528618335723877,
      "learning_rate": 9.84965609255769e-05,
      "loss": 3.1246,
      "step": 169214
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.581580877304077,
      "learning_rate": 9.84935304795746e-05,
      "loss": 2.9138,
      "step": 169215
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3710758686065674,
      "learning_rate": 9.849050007103624e-05,
      "loss": 2.9735,
      "step": 169216
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5642952919006348,
      "learning_rate": 9.848746969996224e-05,
      "loss": 3.0023,
      "step": 169217
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5612452030181885,
      "learning_rate": 9.848443936635329e-05,
      "loss": 2.8008,
      "step": 169218
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8419721126556396,
      "learning_rate": 9.848140907020979e-05,
      "loss": 2.994,
      "step": 169219
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.640825033187866,
      "learning_rate": 9.84783788115326e-05,
      "loss": 2.7814,
      "step": 169220
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.587827444076538,
      "learning_rate": 9.847534859032187e-05,
      "loss": 2.9612,
      "step": 169221
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.112703800201416,
      "learning_rate": 9.847231840657852e-05,
      "loss": 2.5967,
      "step": 169222
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.707910537719727,
      "learning_rate": 9.846928826030283e-05,
      "loss": 2.876,
      "step": 169223
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.734762668609619,
      "learning_rate": 9.846625815149563e-05,
      "loss": 3.1797,
      "step": 169224
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.840141773223877,
      "learning_rate": 9.846322808015725e-05,
      "loss": 3.1666,
      "step": 169225
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.490171432495117,
      "learning_rate": 9.846019804628855e-05,
      "loss": 3.0447,
      "step": 169226
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.604140281677246,
      "learning_rate": 9.845716804988969e-05,
      "loss": 2.9162,
      "step": 169227
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.440850257873535,
      "learning_rate": 9.845413809096156e-05,
      "loss": 2.69,
      "step": 169228
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.7252795696258545,
      "learning_rate": 9.845110816950453e-05,
      "loss": 2.9177,
      "step": 169229
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9315078258514404,
      "learning_rate": 9.844807828551933e-05,
      "loss": 3.0869,
      "step": 169230
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.529198408126831,
      "learning_rate": 9.844504843900634e-05,
      "loss": 3.0201,
      "step": 169231
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.3577592372894287,
      "learning_rate": 9.844201862996643e-05,
      "loss": 2.7336,
      "step": 169232
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.597730875015259,
      "learning_rate": 9.843898885839973e-05,
      "loss": 3.0122,
      "step": 169233
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.476682186126709,
      "learning_rate": 9.843595912430712e-05,
      "loss": 2.9468,
      "step": 169234
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5148096084594727,
      "learning_rate": 9.8432929427689e-05,
      "loss": 3.2613,
      "step": 169235
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.717543601989746,
      "learning_rate": 9.84298997685461e-05,
      "loss": 2.7441,
      "step": 169236
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8838324546813965,
      "learning_rate": 9.842687014687877e-05,
      "loss": 3.0815,
      "step": 169237
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7852046489715576,
      "learning_rate": 9.842384056268788e-05,
      "loss": 2.7794,
      "step": 169238
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.765749931335449,
      "learning_rate": 9.842081101597363e-05,
      "loss": 2.914,
      "step": 169239
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.431240558624268,
      "learning_rate": 9.841778150673685e-05,
      "loss": 3.1627,
      "step": 169240
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.968155860900879,
      "learning_rate": 9.84147520349779e-05,
      "loss": 2.8062,
      "step": 169241
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9541046619415283,
      "learning_rate": 9.841172260069756e-05,
      "loss": 2.8115,
      "step": 169242
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7322685718536377,
      "learning_rate": 9.840869320389618e-05,
      "loss": 3.2166,
      "step": 169243
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3374691009521484,
      "learning_rate": 9.840566384457462e-05,
      "loss": 3.2835,
      "step": 169244
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6167025566101074,
      "learning_rate": 9.840263452273309e-05,
      "loss": 2.8108,
      "step": 169245
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8548219203948975,
      "learning_rate": 9.839960523837239e-05,
      "loss": 3.0793,
      "step": 169246
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.2012813091278076,
      "learning_rate": 9.839657599149292e-05,
      "loss": 2.971,
      "step": 169247
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8330235481262207,
      "learning_rate": 9.83935467820954e-05,
      "loss": 2.876,
      "step": 169248
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.0759098529815674,
      "learning_rate": 9.839051761018026e-05,
      "loss": 2.9613,
      "step": 169249
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.8965327739715576,
      "learning_rate": 9.838748847574831e-05,
      "loss": 3.2872,
      "step": 169250
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5390121936798096,
      "learning_rate": 9.838445937879974e-05,
      "loss": 3.1657,
      "step": 169251
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.913217782974243,
      "learning_rate": 9.838143031933539e-05,
      "loss": 3.1764,
      "step": 169252
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.591176986694336,
      "learning_rate": 9.837840129735564e-05,
      "loss": 2.8104,
      "step": 169253
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9808976650238037,
      "learning_rate": 9.837537231286129e-05,
      "loss": 2.9074,
      "step": 169254
    },
    {
      "epoch": 2.2,
      "grad_norm": 5.079542636871338,
      "learning_rate": 9.837234336585265e-05,
      "loss": 2.8568,
      "step": 169255
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.480419874191284,
      "learning_rate": 9.836931445633056e-05,
      "loss": 2.8871,
      "step": 169256
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.214472770690918,
      "learning_rate": 9.836628558429524e-05,
      "loss": 2.9473,
      "step": 169257
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.121063709259033,
      "learning_rate": 9.836325674974753e-05,
      "loss": 2.7538,
      "step": 169258
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.065796375274658,
      "learning_rate": 9.836022795268781e-05,
      "loss": 3.2563,
      "step": 169259
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7999765872955322,
      "learning_rate": 9.835719919311683e-05,
      "loss": 2.896,
      "step": 169260
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4184186458587646,
      "learning_rate": 9.835417047103497e-05,
      "loss": 2.7797,
      "step": 169261
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4172887802124023,
      "learning_rate": 9.835114178644304e-05,
      "loss": 2.8631,
      "step": 169262
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4699289798736572,
      "learning_rate": 9.834811313934124e-05,
      "loss": 2.8127,
      "step": 169263
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.684652090072632,
      "learning_rate": 9.834508452973043e-05,
      "loss": 2.7828,
      "step": 169264
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.0092082023620605,
      "learning_rate": 9.834205595761101e-05,
      "loss": 2.8434,
      "step": 169265
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6529414653778076,
      "learning_rate": 9.833902742298369e-05,
      "loss": 2.6751,
      "step": 169266
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8656182289123535,
      "learning_rate": 9.833599892584886e-05,
      "loss": 2.8072,
      "step": 169267
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4744842052459717,
      "learning_rate": 9.833297046620727e-05,
      "loss": 2.9,
      "step": 169268
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6097700595855713,
      "learning_rate": 9.832994204405938e-05,
      "loss": 2.7225,
      "step": 169269
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3405864238739014,
      "learning_rate": 9.832691365940572e-05,
      "loss": 2.7952,
      "step": 169270
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5612499713897705,
      "learning_rate": 9.832388531224685e-05,
      "loss": 3.0146,
      "step": 169271
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.997300148010254,
      "learning_rate": 9.832085700258349e-05,
      "loss": 3.187,
      "step": 169272
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.7192561626434326,
      "learning_rate": 9.831782873041596e-05,
      "loss": 3.1135,
      "step": 169273
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.0332512855529785,
      "learning_rate": 9.831480049574504e-05,
      "loss": 2.956,
      "step": 169274
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3879446983337402,
      "learning_rate": 9.831177229857122e-05,
      "loss": 3.2933,
      "step": 169275
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.59029483795166,
      "learning_rate": 9.830874413889495e-05,
      "loss": 2.744,
      "step": 169276
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4111711978912354,
      "learning_rate": 9.830571601671697e-05,
      "loss": 2.8,
      "step": 169277
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.735647439956665,
      "learning_rate": 9.83026879320378e-05,
      "loss": 2.9052,
      "step": 169278
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7494235038757324,
      "learning_rate": 9.829965988485787e-05,
      "loss": 2.9121,
      "step": 169279
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3155527114868164,
      "learning_rate": 9.829663187517791e-05,
      "loss": 2.8764,
      "step": 169280
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5302059650421143,
      "learning_rate": 9.829360390299843e-05,
      "loss": 2.9769,
      "step": 169281
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6094164848327637,
      "learning_rate": 9.829057596831987e-05,
      "loss": 2.8768,
      "step": 169282
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.2856011390686035,
      "learning_rate": 9.828754807114301e-05,
      "loss": 2.8604,
      "step": 169283
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.237492084503174,
      "learning_rate": 9.828452021146829e-05,
      "loss": 2.9666,
      "step": 169284
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5081162452697754,
      "learning_rate": 9.82814923892962e-05,
      "loss": 3.1106,
      "step": 169285
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.5953269004821777,
      "learning_rate": 9.827846460462749e-05,
      "loss": 2.7936,
      "step": 169286
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.2565650939941406,
      "learning_rate": 9.82754368574626e-05,
      "loss": 2.9492,
      "step": 169287
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.3541882038116455,
      "learning_rate": 9.827240914780201e-05,
      "loss": 2.8194,
      "step": 169288
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.0453295707702637,
      "learning_rate": 9.826938147564653e-05,
      "loss": 2.9938,
      "step": 169289
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7377216815948486,
      "learning_rate": 9.826635384099643e-05,
      "loss": 2.8017,
      "step": 169290
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.654421091079712,
      "learning_rate": 9.826332624385255e-05,
      "loss": 3.1225,
      "step": 169291
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8194291591644287,
      "learning_rate": 9.826029868421533e-05,
      "loss": 3.1017,
      "step": 169292
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7803280353546143,
      "learning_rate": 9.825727116208529e-05,
      "loss": 3.04,
      "step": 169293
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7965826988220215,
      "learning_rate": 9.825424367746296e-05,
      "loss": 2.91,
      "step": 169294
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.958061933517456,
      "learning_rate": 9.825121623034909e-05,
      "loss": 3.1044,
      "step": 169295
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4206578731536865,
      "learning_rate": 9.824818882074402e-05,
      "loss": 3.0352,
      "step": 169296
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.90602707862854,
      "learning_rate": 9.824516144864851e-05,
      "loss": 2.8533,
      "step": 169297
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.7647042274475098,
      "learning_rate": 9.824213411406303e-05,
      "loss": 2.9592,
      "step": 169298
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.568713903427124,
      "learning_rate": 9.823910681698815e-05,
      "loss": 2.8645,
      "step": 169299
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8772404193878174,
      "learning_rate": 9.823607955742431e-05,
      "loss": 2.8117,
      "step": 169300
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1181480884552,
      "learning_rate": 9.823305233537232e-05,
      "loss": 2.7219,
      "step": 169301
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.5775139331817627,
      "learning_rate": 9.823002515083252e-05,
      "loss": 3.0135,
      "step": 169302
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.4655070304870605,
      "learning_rate": 9.822699800380563e-05,
      "loss": 2.9898,
      "step": 169303
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.2281594276428223,
      "learning_rate": 9.822397089429208e-05,
      "loss": 3.0238,
      "step": 169304
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2365164756774902,
      "learning_rate": 9.822094382229272e-05,
      "loss": 2.9356,
      "step": 169305
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8676445484161377,
      "learning_rate": 9.821791678780766e-05,
      "loss": 2.9925,
      "step": 169306
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6654911041259766,
      "learning_rate": 9.82148897908378e-05,
      "loss": 3.0454,
      "step": 169307
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.461538314819336,
      "learning_rate": 9.821186283138352e-05,
      "loss": 2.9843,
      "step": 169308
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.931867837905884,
      "learning_rate": 9.820883590944555e-05,
      "loss": 3.2341,
      "step": 169309
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3074612617492676,
      "learning_rate": 9.820580902502427e-05,
      "loss": 2.7553,
      "step": 169310
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.700808525085449,
      "learning_rate": 9.820278217812052e-05,
      "loss": 2.9528,
      "step": 169311
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.916625738143921,
      "learning_rate": 9.819975536873452e-05,
      "loss": 2.7107,
      "step": 169312
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6999521255493164,
      "learning_rate": 9.819672859686708e-05,
      "loss": 2.9812,
      "step": 169313
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6603522300720215,
      "learning_rate": 9.819370186251856e-05,
      "loss": 3.0725,
      "step": 169314
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.656946897506714,
      "learning_rate": 9.819067516568976e-05,
      "loss": 2.7017,
      "step": 169315
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8502683639526367,
      "learning_rate": 9.818764850638102e-05,
      "loss": 2.9832,
      "step": 169316
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.206784963607788,
      "learning_rate": 9.818462188459319e-05,
      "loss": 3.0965,
      "step": 169317
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.1548731327056885,
      "learning_rate": 9.818159530032648e-05,
      "loss": 2.8691,
      "step": 169318
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.907574415206909,
      "learning_rate": 9.817856875358169e-05,
      "loss": 3.1395,
      "step": 169319
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3981387615203857,
      "learning_rate": 9.817554224435922e-05,
      "loss": 2.7775,
      "step": 169320
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.2781553268432617,
      "learning_rate": 9.817251577265983e-05,
      "loss": 3.0125,
      "step": 169321
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.874484062194824,
      "learning_rate": 9.816948933848389e-05,
      "loss": 2.9475,
      "step": 169322
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7216060161590576,
      "learning_rate": 9.816646294183224e-05,
      "loss": 2.9071,
      "step": 169323
    },
    {
      "epoch": 2.2,
      "grad_norm": 3.7558908462524414,
      "learning_rate": 9.816343658270505e-05,
      "loss": 2.8928,
      "step": 169324
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4568183422088623,
      "learning_rate": 9.816041026110319e-05,
      "loss": 2.9794,
      "step": 169325
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.581362009048462,
      "learning_rate": 9.815738397702703e-05,
      "loss": 3.0722,
      "step": 169326
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4026858806610107,
      "learning_rate": 9.81543577304773e-05,
      "loss": 3.0971,
      "step": 169327
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.8347814083099365,
      "learning_rate": 9.81513315214544e-05,
      "loss": 2.9127,
      "step": 169328
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.342526435852051,
      "learning_rate": 9.814830534995918e-05,
      "loss": 2.9711,
      "step": 169329
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4852869510650635,
      "learning_rate": 9.81452792159918e-05,
      "loss": 2.9386,
      "step": 169330
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.9364547729492188,
      "learning_rate": 9.814225311955313e-05,
      "loss": 3.0003,
      "step": 169331
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.341697931289673,
      "learning_rate": 9.813922706064352e-05,
      "loss": 2.8849,
      "step": 169332
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.408344030380249,
      "learning_rate": 9.813620103926371e-05,
      "loss": 2.9522,
      "step": 169333
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.784893035888672,
      "learning_rate": 9.813317505541412e-05,
      "loss": 3.0536,
      "step": 169334
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.3857343196868896,
      "learning_rate": 9.813014910909562e-05,
      "loss": 2.9115,
      "step": 169335
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.4235482215881348,
      "learning_rate": 9.812712320030828e-05,
      "loss": 3.1616,
      "step": 169336
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7636911869049072,
      "learning_rate": 9.812409732905302e-05,
      "loss": 2.9668,
      "step": 169337
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.205325126647949,
      "learning_rate": 9.812107149533021e-05,
      "loss": 2.8437,
      "step": 169338
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7253942489624023,
      "learning_rate": 9.811804569914064e-05,
      "loss": 2.7267,
      "step": 169339
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7386066913604736,
      "learning_rate": 9.811501994048457e-05,
      "loss": 2.9947,
      "step": 169340
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.716456890106201,
      "learning_rate": 9.811199421936299e-05,
      "loss": 2.7416,
      "step": 169341
    },
    {
      "epoch": 2.2,
      "grad_norm": 4.426547050476074,
      "learning_rate": 9.810896853577594e-05,
      "loss": 2.8317,
      "step": 169342
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.1929829120635986,
      "learning_rate": 9.810594288972441e-05,
      "loss": 2.7049,
      "step": 169343
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.683441400527954,
      "learning_rate": 9.810291728120865e-05,
      "loss": 2.9572,
      "step": 169344
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.7783100605010986,
      "learning_rate": 9.80998917102295e-05,
      "loss": 2.809,
      "step": 169345
    },
    {
      "epoch": 2.2,
      "grad_norm": 2.6191718578338623,
      "learning_rate": 9.809686617678724e-05,
      "loss": 3.2133,
      "step": 169346
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.237361431121826,
      "learning_rate": 9.809384068088284e-05,
      "loss": 3.1018,
      "step": 169347
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8439180850982666,
      "learning_rate": 9.809081522251637e-05,
      "loss": 2.8842,
      "step": 169348
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.316945791244507,
      "learning_rate": 9.808778980168874e-05,
      "loss": 2.8862,
      "step": 169349
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.537832260131836,
      "learning_rate": 9.808476441840031e-05,
      "loss": 3.1097,
      "step": 169350
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.4582509994506836,
      "learning_rate": 9.808173907265183e-05,
      "loss": 2.9718,
      "step": 169351
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.460256576538086,
      "learning_rate": 9.807871376444369e-05,
      "loss": 3.2251,
      "step": 169352
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.0436503887176514,
      "learning_rate": 9.807568849377657e-05,
      "loss": 2.6984,
      "step": 169353
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.0713140964508057,
      "learning_rate": 9.807266326065102e-05,
      "loss": 2.5674,
      "step": 169354
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4927279949188232,
      "learning_rate": 9.806963806506759e-05,
      "loss": 2.9318,
      "step": 169355
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.803698778152466,
      "learning_rate": 9.806661290702672e-05,
      "loss": 2.9189,
      "step": 169356
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3676249980926514,
      "learning_rate": 9.806358778652918e-05,
      "loss": 3.0796,
      "step": 169357
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.055520534515381,
      "learning_rate": 9.806056270357532e-05,
      "loss": 2.8279,
      "step": 169358
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4007816314697266,
      "learning_rate": 9.805753765816591e-05,
      "loss": 2.9954,
      "step": 169359
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4180703163146973,
      "learning_rate": 9.805451265030142e-05,
      "loss": 3.0848,
      "step": 169360
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.986238479614258,
      "learning_rate": 9.805148767998242e-05,
      "loss": 2.8159,
      "step": 169361
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9471182823181152,
      "learning_rate": 9.804846274720936e-05,
      "loss": 2.976,
      "step": 169362
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.569749355316162,
      "learning_rate": 9.804543785198298e-05,
      "loss": 3.0645,
      "step": 169363
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7209365367889404,
      "learning_rate": 9.80424129943037e-05,
      "loss": 2.8993,
      "step": 169364
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.069738864898682,
      "learning_rate": 9.803938817417222e-05,
      "loss": 3.0136,
      "step": 169365
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.379239797592163,
      "learning_rate": 9.803636339158902e-05,
      "loss": 2.6578,
      "step": 169366
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.411180257797241,
      "learning_rate": 9.803333864655461e-05,
      "loss": 2.8451,
      "step": 169367
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7306580543518066,
      "learning_rate": 9.803031393906965e-05,
      "loss": 2.8367,
      "step": 169368
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7130963802337646,
      "learning_rate": 9.802728926913474e-05,
      "loss": 2.9529,
      "step": 169369
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.237939834594727,
      "learning_rate": 9.802426463675023e-05,
      "loss": 3.0166,
      "step": 169370
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.3300766944885254,
      "learning_rate": 9.802124004191694e-05,
      "loss": 3.0772,
      "step": 169371
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.146833896636963,
      "learning_rate": 9.801821548463531e-05,
      "loss": 2.9142,
      "step": 169372
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.284116506576538,
      "learning_rate": 9.801519096490581e-05,
      "loss": 2.9505,
      "step": 169373
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.2411391735076904,
      "learning_rate": 9.801216648272917e-05,
      "loss": 3.0089,
      "step": 169374
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4791531562805176,
      "learning_rate": 9.80091420381058e-05,
      "loss": 2.8023,
      "step": 169375
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3908119201660156,
      "learning_rate": 9.800611763103645e-05,
      "loss": 3.0515,
      "step": 169376
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.384829044342041,
      "learning_rate": 9.800309326152158e-05,
      "loss": 2.9481,
      "step": 169377
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8297159671783447,
      "learning_rate": 9.800006892956175e-05,
      "loss": 3.0967,
      "step": 169378
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7814383506774902,
      "learning_rate": 9.79970446351574e-05,
      "loss": 2.9417,
      "step": 169379
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9323558807373047,
      "learning_rate": 9.799402037830932e-05,
      "loss": 2.9703,
      "step": 169380
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.9903900623321533,
      "learning_rate": 9.799099615901787e-05,
      "loss": 3.0454,
      "step": 169381
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.513751268386841,
      "learning_rate": 9.79879719772838e-05,
      "loss": 2.9286,
      "step": 169382
    },
    {
      "epoch": 2.21,
      "grad_norm": 5.0034942626953125,
      "learning_rate": 9.798494783310759e-05,
      "loss": 2.7333,
      "step": 169383
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.7178661823272705,
      "learning_rate": 9.798192372648976e-05,
      "loss": 2.9105,
      "step": 169384
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1013026237487793,
      "learning_rate": 9.797889965743083e-05,
      "loss": 2.9669,
      "step": 169385
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.510423183441162,
      "learning_rate": 9.797587562593155e-05,
      "loss": 3.0594,
      "step": 169386
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.889366388320923,
      "learning_rate": 9.797285163199227e-05,
      "loss": 3.117,
      "step": 169387
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.4272537231445312,
      "learning_rate": 9.79698276756137e-05,
      "loss": 3.0176,
      "step": 169388
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.762378454208374,
      "learning_rate": 9.796680375679628e-05,
      "loss": 3.0125,
      "step": 169389
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8272366523742676,
      "learning_rate": 9.796377987554087e-05,
      "loss": 3.0877,
      "step": 169390
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.466975212097168,
      "learning_rate": 9.796075603184759e-05,
      "loss": 2.709,
      "step": 169391
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.0639307498931885,
      "learning_rate": 9.79577322257173e-05,
      "loss": 2.838,
      "step": 169392
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.762834310531616,
      "learning_rate": 9.79547084571504e-05,
      "loss": 2.8183,
      "step": 169393
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5705361366271973,
      "learning_rate": 9.795168472614765e-05,
      "loss": 3.0123,
      "step": 169394
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9813392162323,
      "learning_rate": 9.794866103270938e-05,
      "loss": 2.893,
      "step": 169395
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.293454170227051,
      "learning_rate": 9.794563737683648e-05,
      "loss": 3.1456,
      "step": 169396
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3787357807159424,
      "learning_rate": 9.794261375852907e-05,
      "loss": 2.8511,
      "step": 169397
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.544053077697754,
      "learning_rate": 9.793959017778807e-05,
      "loss": 3.1814,
      "step": 169398
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.306095600128174,
      "learning_rate": 9.79365666346138e-05,
      "loss": 3.0552,
      "step": 169399
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.804603338241577,
      "learning_rate": 9.793354312900702e-05,
      "loss": 3.0541,
      "step": 169400
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6834359169006348,
      "learning_rate": 9.793051966096813e-05,
      "loss": 2.6529,
      "step": 169401
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.551034927368164,
      "learning_rate": 9.792749623049798e-05,
      "loss": 2.9646,
      "step": 169402
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4603655338287354,
      "learning_rate": 9.79244728375967e-05,
      "loss": 2.7863,
      "step": 169403
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.400362730026245,
      "learning_rate": 9.792144948226518e-05,
      "loss": 3.1336,
      "step": 169404
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.75538969039917,
      "learning_rate": 9.79184261645038e-05,
      "loss": 2.7794,
      "step": 169405
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.288698434829712,
      "learning_rate": 9.791540288431327e-05,
      "loss": 2.9729,
      "step": 169406
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5784120559692383,
      "learning_rate": 9.7912379641694e-05,
      "loss": 2.9845,
      "step": 169407
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3938136100769043,
      "learning_rate": 9.790935643664683e-05,
      "loss": 2.9315,
      "step": 169408
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.567002058029175,
      "learning_rate": 9.79063332691719e-05,
      "loss": 2.8771,
      "step": 169409
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.192361354827881,
      "learning_rate": 9.790331013927011e-05,
      "loss": 3.1236,
      "step": 169410
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7003722190856934,
      "learning_rate": 9.790028704694185e-05,
      "loss": 2.899,
      "step": 169411
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.773834705352783,
      "learning_rate": 9.789726399218778e-05,
      "loss": 3.2112,
      "step": 169412
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3068864345550537,
      "learning_rate": 9.789424097500838e-05,
      "loss": 2.9617,
      "step": 169413
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4103000164031982,
      "learning_rate": 9.789121799540443e-05,
      "loss": 3.0562,
      "step": 169414
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.6718950271606445,
      "learning_rate": 9.78881950533761e-05,
      "loss": 2.8546,
      "step": 169415
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.574829578399658,
      "learning_rate": 9.788517214892432e-05,
      "loss": 2.9196,
      "step": 169416
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9213666915893555,
      "learning_rate": 9.788214928204934e-05,
      "loss": 3.0657,
      "step": 169417
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5009844303131104,
      "learning_rate": 9.787912645275203e-05,
      "loss": 2.8691,
      "step": 169418
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6999518871307373,
      "learning_rate": 9.787610366103266e-05,
      "loss": 3.0455,
      "step": 169419
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.150606155395508,
      "learning_rate": 9.787308090689208e-05,
      "loss": 3.0078,
      "step": 169420
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3742058277130127,
      "learning_rate": 9.78700581903307e-05,
      "loss": 2.9037,
      "step": 169421
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.513259172439575,
      "learning_rate": 9.786703551134907e-05,
      "loss": 2.9507,
      "step": 169422
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5233569145202637,
      "learning_rate": 9.786401286994771e-05,
      "loss": 2.9729,
      "step": 169423
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.459200143814087,
      "learning_rate": 9.786099026612733e-05,
      "loss": 2.9956,
      "step": 169424
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4497573375701904,
      "learning_rate": 9.78579676998883e-05,
      "loss": 2.9122,
      "step": 169425
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3074951171875,
      "learning_rate": 9.785494517123142e-05,
      "loss": 2.9258,
      "step": 169426
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.347496747970581,
      "learning_rate": 9.785192268015706e-05,
      "loss": 2.8015,
      "step": 169427
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.034738779067993,
      "learning_rate": 9.78489002266659e-05,
      "loss": 3.0326,
      "step": 169428
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.413975477218628,
      "learning_rate": 9.784587781075832e-05,
      "loss": 2.9559,
      "step": 169429
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6805946826934814,
      "learning_rate": 9.784285543243511e-05,
      "loss": 2.7975,
      "step": 169430
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1012074947357178,
      "learning_rate": 9.783983309169663e-05,
      "loss": 2.8583,
      "step": 169431
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.330275058746338,
      "learning_rate": 9.783681078854365e-05,
      "loss": 3.054,
      "step": 169432
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.2194290161132812,
      "learning_rate": 9.78337885229766e-05,
      "loss": 2.9982,
      "step": 169433
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.433795928955078,
      "learning_rate": 9.78307662949961e-05,
      "loss": 3.2118,
      "step": 169434
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.382706642150879,
      "learning_rate": 9.782774410460256e-05,
      "loss": 2.8491,
      "step": 169435
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.490067481994629,
      "learning_rate": 9.782472195179674e-05,
      "loss": 2.9823,
      "step": 169436
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.383002281188965,
      "learning_rate": 9.782169983657904e-05,
      "loss": 2.8645,
      "step": 169437
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5791361331939697,
      "learning_rate": 9.781867775895019e-05,
      "loss": 2.7306,
      "step": 169438
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.860233783721924,
      "learning_rate": 9.781565571891069e-05,
      "loss": 2.8972,
      "step": 169439
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5064587593078613,
      "learning_rate": 9.781263371646103e-05,
      "loss": 2.8752,
      "step": 169440
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.2182936668395996,
      "learning_rate": 9.780961175160175e-05,
      "loss": 3.0069,
      "step": 169441
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4162120819091797,
      "learning_rate": 9.78065898243336e-05,
      "loss": 3.0246,
      "step": 169442
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.451934337615967,
      "learning_rate": 9.780356793465686e-05,
      "loss": 3.0468,
      "step": 169443
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8248326778411865,
      "learning_rate": 9.780054608257241e-05,
      "loss": 2.9604,
      "step": 169444
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.011871576309204,
      "learning_rate": 9.779752426808063e-05,
      "loss": 2.932,
      "step": 169445
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.277056932449341,
      "learning_rate": 9.779450249118213e-05,
      "loss": 2.9232,
      "step": 169446
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.1131443977355957,
      "learning_rate": 9.779148075187733e-05,
      "loss": 2.8469,
      "step": 169447
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4326436519622803,
      "learning_rate": 9.778845905016701e-05,
      "loss": 3.1751,
      "step": 169448
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.0580060482025146,
      "learning_rate": 9.778543738605153e-05,
      "loss": 2.9223,
      "step": 169449
    },
    {
      "epoch": 2.21,
      "grad_norm": 6.822640419006348,
      "learning_rate": 9.778241575953167e-05,
      "loss": 2.8423,
      "step": 169450
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.2079601287841797,
      "learning_rate": 9.777939417060786e-05,
      "loss": 2.8626,
      "step": 169451
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.2700953483581543,
      "learning_rate": 9.777637261928058e-05,
      "loss": 2.7451,
      "step": 169452
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9408836364746094,
      "learning_rate": 9.77733511055506e-05,
      "loss": 2.8972,
      "step": 169453
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1170408725738525,
      "learning_rate": 9.777032962941837e-05,
      "loss": 2.9781,
      "step": 169454
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.715142250061035,
      "learning_rate": 9.776730819088434e-05,
      "loss": 3.0292,
      "step": 169455
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.598776340484619,
      "learning_rate": 9.77642867899493e-05,
      "loss": 2.7938,
      "step": 169456
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6582820415496826,
      "learning_rate": 9.776126542661367e-05,
      "loss": 2.7909,
      "step": 169457
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8983101844787598,
      "learning_rate": 9.775824410087797e-05,
      "loss": 3.1032,
      "step": 169458
    },
    {
      "epoch": 2.21,
      "grad_norm": 5.10036039352417,
      "learning_rate": 9.775522281274293e-05,
      "loss": 2.8755,
      "step": 169459
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.816147804260254,
      "learning_rate": 9.775220156220899e-05,
      "loss": 3.0201,
      "step": 169460
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9880917072296143,
      "learning_rate": 9.774918034927665e-05,
      "loss": 2.8192,
      "step": 169461
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3350565433502197,
      "learning_rate": 9.774615917394664e-05,
      "loss": 3.1098,
      "step": 169462
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.504774570465088,
      "learning_rate": 9.774313803621946e-05,
      "loss": 2.8004,
      "step": 169463
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.5556111335754395,
      "learning_rate": 9.774011693609553e-05,
      "loss": 2.828,
      "step": 169464
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1926400661468506,
      "learning_rate": 9.773709587357562e-05,
      "loss": 2.9074,
      "step": 169465
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.839820623397827,
      "learning_rate": 9.773407484866012e-05,
      "loss": 3.0575,
      "step": 169466
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.412655830383301,
      "learning_rate": 9.773105386134978e-05,
      "loss": 2.77,
      "step": 169467
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.749659776687622,
      "learning_rate": 9.772803291164504e-05,
      "loss": 3.0705,
      "step": 169468
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4883155822753906,
      "learning_rate": 9.772501199954647e-05,
      "loss": 3.0944,
      "step": 169469
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7520060539245605,
      "learning_rate": 9.772199112505455e-05,
      "loss": 3.0235,
      "step": 169470
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.107480525970459,
      "learning_rate": 9.771897028817006e-05,
      "loss": 2.7167,
      "step": 169471
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.7402663230895996,
      "learning_rate": 9.771594948889327e-05,
      "loss": 2.7456,
      "step": 169472
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4914677143096924,
      "learning_rate": 9.771292872722506e-05,
      "loss": 3.1152,
      "step": 169473
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7531557083129883,
      "learning_rate": 9.770990800316571e-05,
      "loss": 2.9941,
      "step": 169474
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3243515491485596,
      "learning_rate": 9.77068873167161e-05,
      "loss": 2.9009,
      "step": 169475
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.827026128768921,
      "learning_rate": 9.77038666678764e-05,
      "loss": 2.9643,
      "step": 169476
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.227855682373047,
      "learning_rate": 9.770084605664746e-05,
      "loss": 2.7508,
      "step": 169477
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.3203577995300293,
      "learning_rate": 9.769782548302967e-05,
      "loss": 3.0457,
      "step": 169478
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.277132987976074,
      "learning_rate": 9.769480494702378e-05,
      "loss": 3.0639,
      "step": 169479
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3218188285827637,
      "learning_rate": 9.769178444863011e-05,
      "loss": 3.0528,
      "step": 169480
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.546409845352173,
      "learning_rate": 9.768876398784959e-05,
      "loss": 2.8024,
      "step": 169481
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.3590502738952637,
      "learning_rate": 9.768574356468234e-05,
      "loss": 2.9527,
      "step": 169482
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.194262742996216,
      "learning_rate": 9.768272317912922e-05,
      "loss": 2.6551,
      "step": 169483
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.306757926940918,
      "learning_rate": 9.767970283119064e-05,
      "loss": 2.8644,
      "step": 169484
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3807640075683594,
      "learning_rate": 9.767668252086729e-05,
      "loss": 3.0678,
      "step": 169485
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5887272357940674,
      "learning_rate": 9.767366224815956e-05,
      "loss": 2.757,
      "step": 169486
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.897768020629883,
      "learning_rate": 9.76706420130682e-05,
      "loss": 2.888,
      "step": 169487
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.782512664794922,
      "learning_rate": 9.766762181559372e-05,
      "loss": 2.8569,
      "step": 169488
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.3775763511657715,
      "learning_rate": 9.766460165573662e-05,
      "loss": 2.8412,
      "step": 169489
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.8859641551971436,
      "learning_rate": 9.766158153349742e-05,
      "loss": 2.9263,
      "step": 169490
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.778233766555786,
      "learning_rate": 9.765856144887684e-05,
      "loss": 3.1005,
      "step": 169491
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1991167068481445,
      "learning_rate": 9.765554140187522e-05,
      "loss": 2.7091,
      "step": 169492
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.035579204559326,
      "learning_rate": 9.765252139249338e-05,
      "loss": 3.2306,
      "step": 169493
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4675891399383545,
      "learning_rate": 9.764950142073179e-05,
      "loss": 2.9069,
      "step": 169494
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9114229679107666,
      "learning_rate": 9.764648148659093e-05,
      "loss": 2.8463,
      "step": 169495
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.0645816326141357,
      "learning_rate": 9.764346159007134e-05,
      "loss": 3.046,
      "step": 169496
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7240850925445557,
      "learning_rate": 9.764044173117374e-05,
      "loss": 2.9052,
      "step": 169497
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.689387083053589,
      "learning_rate": 9.76374219098985e-05,
      "loss": 2.7767,
      "step": 169498
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.495544672012329,
      "learning_rate": 9.763440212624636e-05,
      "loss": 3.0409,
      "step": 169499
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.0365381240844727,
      "learning_rate": 9.763138238021782e-05,
      "loss": 2.9882,
      "step": 169500
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.2087855339050293,
      "learning_rate": 9.762836267181343e-05,
      "loss": 2.7715,
      "step": 169501
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.911358594894409,
      "learning_rate": 9.762534300103368e-05,
      "loss": 2.9716,
      "step": 169502
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4901652336120605,
      "learning_rate": 9.762232336787925e-05,
      "loss": 2.9184,
      "step": 169503
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9390406608581543,
      "learning_rate": 9.761930377235056e-05,
      "loss": 2.952,
      "step": 169504
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.039979934692383,
      "learning_rate": 9.761628421444839e-05,
      "loss": 2.9613,
      "step": 169505
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.0166571140289307,
      "learning_rate": 9.761326469417313e-05,
      "loss": 2.8148,
      "step": 169506
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.351130485534668,
      "learning_rate": 9.761024521152542e-05,
      "loss": 2.9938,
      "step": 169507
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.0438456535339355,
      "learning_rate": 9.760722576650567e-05,
      "loss": 3.0206,
      "step": 169508
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.2342681884765625,
      "learning_rate": 9.760420635911466e-05,
      "loss": 3.0593,
      "step": 169509
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4925525188446045,
      "learning_rate": 9.760118698935274e-05,
      "loss": 3.193,
      "step": 169510
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6189539432525635,
      "learning_rate": 9.759816765722069e-05,
      "loss": 2.8078,
      "step": 169511
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.109901189804077,
      "learning_rate": 9.759514836271895e-05,
      "loss": 2.9683,
      "step": 169512
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.019474029541016,
      "learning_rate": 9.759212910584813e-05,
      "loss": 2.923,
      "step": 169513
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9798965454101562,
      "learning_rate": 9.75891098866086e-05,
      "loss": 3.0366,
      "step": 169514
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5779166221618652,
      "learning_rate": 9.758609070500122e-05,
      "loss": 2.8676,
      "step": 169515
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6459667682647705,
      "learning_rate": 9.758307156102629e-05,
      "loss": 2.9003,
      "step": 169516
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3361635208129883,
      "learning_rate": 9.758005245468457e-05,
      "loss": 3.1096,
      "step": 169517
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6104514598846436,
      "learning_rate": 9.757703338597656e-05,
      "loss": 3.0769,
      "step": 169518
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.193080186843872,
      "learning_rate": 9.75740143549028e-05,
      "loss": 2.8562,
      "step": 169519
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.5913798809051514,
      "learning_rate": 9.757099536146376e-05,
      "loss": 3.0111,
      "step": 169520
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.421125888824463,
      "learning_rate": 9.756797640566018e-05,
      "loss": 2.8584,
      "step": 169521
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5022194385528564,
      "learning_rate": 9.756495748749243e-05,
      "loss": 3.0268,
      "step": 169522
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.573920726776123,
      "learning_rate": 9.756193860696126e-05,
      "loss": 3.0348,
      "step": 169523
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.898411273956299,
      "learning_rate": 9.755891976406717e-05,
      "loss": 3.0044,
      "step": 169524
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.2870397567749023,
      "learning_rate": 9.75559009588107e-05,
      "loss": 2.7603,
      "step": 169525
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.890000820159912,
      "learning_rate": 9.755288219119232e-05,
      "loss": 2.8903,
      "step": 169526
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9838130474090576,
      "learning_rate": 9.754986346121276e-05,
      "loss": 2.7452,
      "step": 169527
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.578589677810669,
      "learning_rate": 9.75468447688724e-05,
      "loss": 2.863,
      "step": 169528
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.723353385925293,
      "learning_rate": 9.7543826114172e-05,
      "loss": 3.0227,
      "step": 169529
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.5021746158599854,
      "learning_rate": 9.754080749711205e-05,
      "loss": 2.874,
      "step": 169530
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.4998552799224854,
      "learning_rate": 9.753778891769307e-05,
      "loss": 3.0904,
      "step": 169531
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4416897296905518,
      "learning_rate": 9.753477037591553e-05,
      "loss": 2.9857,
      "step": 169532
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.6417949199676514,
      "learning_rate": 9.753175187178022e-05,
      "loss": 2.8813,
      "step": 169533
    },
    {
      "epoch": 2.21,
      "grad_norm": 5.4777398109436035,
      "learning_rate": 9.752873340528746e-05,
      "loss": 3.1522,
      "step": 169534
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.676604747772217,
      "learning_rate": 9.752571497643802e-05,
      "loss": 3.0681,
      "step": 169535
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7120537757873535,
      "learning_rate": 9.752269658523239e-05,
      "loss": 3.1458,
      "step": 169536
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.498497247695923,
      "learning_rate": 9.751967823167103e-05,
      "loss": 3.0938,
      "step": 169537
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8523738384246826,
      "learning_rate": 9.751665991575466e-05,
      "loss": 3.1353,
      "step": 169538
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.343357086181641,
      "learning_rate": 9.751364163748377e-05,
      "loss": 2.7574,
      "step": 169539
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.5986785888671875,
      "learning_rate": 9.751062339685881e-05,
      "loss": 2.8999,
      "step": 169540
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.406437397003174,
      "learning_rate": 9.750760519388054e-05,
      "loss": 2.8658,
      "step": 169541
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.714085340499878,
      "learning_rate": 9.750458702854943e-05,
      "loss": 2.9224,
      "step": 169542
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1710822582244873,
      "learning_rate": 9.750156890086596e-05,
      "loss": 2.9544,
      "step": 169543
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.782895088195801,
      "learning_rate": 9.749855081083087e-05,
      "loss": 2.9296,
      "step": 169544
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7398204803466797,
      "learning_rate": 9.74955327584446e-05,
      "loss": 3.1659,
      "step": 169545
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.564161539077759,
      "learning_rate": 9.749251474370765e-05,
      "loss": 2.6908,
      "step": 169546
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.308243751525879,
      "learning_rate": 9.748949676662077e-05,
      "loss": 3.003,
      "step": 169547
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3294167518615723,
      "learning_rate": 9.748647882718439e-05,
      "loss": 2.9815,
      "step": 169548
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8085646629333496,
      "learning_rate": 9.748346092539902e-05,
      "loss": 2.8993,
      "step": 169549
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6165807247161865,
      "learning_rate": 9.748044306126539e-05,
      "loss": 3.1192,
      "step": 169550
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.520902633666992,
      "learning_rate": 9.747742523478388e-05,
      "loss": 2.9474,
      "step": 169551
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6470680236816406,
      "learning_rate": 9.747440744595521e-05,
      "loss": 2.7046,
      "step": 169552
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.1907451152801514,
      "learning_rate": 9.74713896947799e-05,
      "loss": 2.9747,
      "step": 169553
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6923625469207764,
      "learning_rate": 9.746837198125836e-05,
      "loss": 3.1216,
      "step": 169554
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.318225622177124,
      "learning_rate": 9.746535430539141e-05,
      "loss": 2.8873,
      "step": 169555
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.641679048538208,
      "learning_rate": 9.746233666717943e-05,
      "loss": 3.2073,
      "step": 169556
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3975136280059814,
      "learning_rate": 9.745931906662294e-05,
      "loss": 2.9733,
      "step": 169557
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4067094326019287,
      "learning_rate": 9.745630150372267e-05,
      "loss": 3.0078,
      "step": 169558
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6711905002593994,
      "learning_rate": 9.745328397847904e-05,
      "loss": 2.8766,
      "step": 169559
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8794102668762207,
      "learning_rate": 9.745026649089273e-05,
      "loss": 2.9253,
      "step": 169560
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7517173290252686,
      "learning_rate": 9.744724904096425e-05,
      "loss": 2.6918,
      "step": 169561
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6030325889587402,
      "learning_rate": 9.744423162869414e-05,
      "loss": 3.24,
      "step": 169562
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.2438344955444336,
      "learning_rate": 9.74412142540829e-05,
      "loss": 3.0494,
      "step": 169563
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.88418436050415,
      "learning_rate": 9.743819691713124e-05,
      "loss": 3.0481,
      "step": 169564
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.9703991413116455,
      "learning_rate": 9.743517961783955e-05,
      "loss": 2.9381,
      "step": 169565
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.179158926010132,
      "learning_rate": 9.743216235620858e-05,
      "loss": 2.8955,
      "step": 169566
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1260831356048584,
      "learning_rate": 9.742914513223878e-05,
      "loss": 2.8147,
      "step": 169567
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.4225547313690186,
      "learning_rate": 9.742612794593074e-05,
      "loss": 3.002,
      "step": 169568
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.184812545776367,
      "learning_rate": 9.742311079728493e-05,
      "loss": 2.9775,
      "step": 169569
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.6385819911956787,
      "learning_rate": 9.742009368630204e-05,
      "loss": 2.9347,
      "step": 169570
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.692134380340576,
      "learning_rate": 9.741707661298252e-05,
      "loss": 2.9878,
      "step": 169571
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3814327716827393,
      "learning_rate": 9.741405957732705e-05,
      "loss": 2.913,
      "step": 169572
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9274754524230957,
      "learning_rate": 9.741104257933615e-05,
      "loss": 2.8924,
      "step": 169573
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9534122943878174,
      "learning_rate": 9.740802561901039e-05,
      "loss": 3.0307,
      "step": 169574
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4605813026428223,
      "learning_rate": 9.740500869635017e-05,
      "loss": 2.8942,
      "step": 169575
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.654787063598633,
      "learning_rate": 9.740199181135627e-05,
      "loss": 2.89,
      "step": 169576
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6152267456054688,
      "learning_rate": 9.739897496402908e-05,
      "loss": 2.9903,
      "step": 169577
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.482626438140869,
      "learning_rate": 9.739595815436938e-05,
      "loss": 2.8924,
      "step": 169578
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.650177240371704,
      "learning_rate": 9.739294138237756e-05,
      "loss": 3.2178,
      "step": 169579
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.3096795082092285,
      "learning_rate": 9.738992464805421e-05,
      "loss": 2.8055,
      "step": 169580
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3939294815063477,
      "learning_rate": 9.738690795139983e-05,
      "loss": 2.8901,
      "step": 169581
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.021890163421631,
      "learning_rate": 9.738389129241512e-05,
      "loss": 2.8644,
      "step": 169582
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.418290853500366,
      "learning_rate": 9.738087467110047e-05,
      "loss": 3.0032,
      "step": 169583
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5662004947662354,
      "learning_rate": 9.737785808745667e-05,
      "loss": 2.794,
      "step": 169584
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.737374782562256,
      "learning_rate": 9.737484154148411e-05,
      "loss": 2.8766,
      "step": 169585
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.433974504470825,
      "learning_rate": 9.73718250331834e-05,
      "loss": 2.8903,
      "step": 169586
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3599436283111572,
      "learning_rate": 9.736880856255498e-05,
      "loss": 2.8141,
      "step": 169587
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9616880416870117,
      "learning_rate": 9.736579212959968e-05,
      "loss": 2.9332,
      "step": 169588
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6761858463287354,
      "learning_rate": 9.736277573431776e-05,
      "loss": 3.0515,
      "step": 169589
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6176414489746094,
      "learning_rate": 9.735975937671003e-05,
      "loss": 3.0082,
      "step": 169590
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3730485439300537,
      "learning_rate": 9.735674305677694e-05,
      "loss": 2.9494,
      "step": 169591
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.1857523918151855,
      "learning_rate": 9.735372677451908e-05,
      "loss": 3.17,
      "step": 169592
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.0754880905151367,
      "learning_rate": 9.735071052993688e-05,
      "loss": 3.102,
      "step": 169593
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.287562131881714,
      "learning_rate": 9.73476943230311e-05,
      "loss": 2.8277,
      "step": 169594
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.5433473587036133,
      "learning_rate": 9.73446781538021e-05,
      "loss": 2.7229,
      "step": 169595
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.540949583053589,
      "learning_rate": 9.734166202225069e-05,
      "loss": 2.9919,
      "step": 169596
    },
    {
      "epoch": 2.21,
      "grad_norm": 5.794629096984863,
      "learning_rate": 9.733864592837724e-05,
      "loss": 2.8065,
      "step": 169597
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7403478622436523,
      "learning_rate": 9.733562987218241e-05,
      "loss": 2.8295,
      "step": 169598
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.099860191345215,
      "learning_rate": 9.733261385366657e-05,
      "loss": 2.8965,
      "step": 169599
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.896421432495117,
      "learning_rate": 9.732959787283054e-05,
      "loss": 3.1115,
      "step": 169600
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3299331665039062,
      "learning_rate": 9.732658192967465e-05,
      "loss": 3.0385,
      "step": 169601
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.994616746902466,
      "learning_rate": 9.73235660241997e-05,
      "loss": 2.8672,
      "step": 169602
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.043318033218384,
      "learning_rate": 9.732055015640609e-05,
      "loss": 2.8874,
      "step": 169603
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7038137912750244,
      "learning_rate": 9.731753432629445e-05,
      "loss": 2.961,
      "step": 169604
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.374445915222168,
      "learning_rate": 9.73145185338652e-05,
      "loss": 2.8904,
      "step": 169605
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.6232168674468994,
      "learning_rate": 9.731150277911908e-05,
      "loss": 2.9994,
      "step": 169606
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4053397178649902,
      "learning_rate": 9.730848706205651e-05,
      "loss": 2.9244,
      "step": 169607
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.645153284072876,
      "learning_rate": 9.730547138267819e-05,
      "loss": 2.9416,
      "step": 169608
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7077317237854004,
      "learning_rate": 9.730245574098463e-05,
      "loss": 3.0744,
      "step": 169609
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1361567974090576,
      "learning_rate": 9.729944013697636e-05,
      "loss": 2.5914,
      "step": 169610
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.24008846282959,
      "learning_rate": 9.729642457065387e-05,
      "loss": 2.6657,
      "step": 169611
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1894168853759766,
      "learning_rate": 9.729340904201787e-05,
      "loss": 2.9229,
      "step": 169612
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.096824645996094,
      "learning_rate": 9.729039355106881e-05,
      "loss": 3.0729,
      "step": 169613
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.057804584503174,
      "learning_rate": 9.728737809780731e-05,
      "loss": 3.0654,
      "step": 169614
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6696887016296387,
      "learning_rate": 9.728436268223398e-05,
      "loss": 2.9073,
      "step": 169615
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.569209098815918,
      "learning_rate": 9.728134730434925e-05,
      "loss": 2.8944,
      "step": 169616
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.848947286605835,
      "learning_rate": 9.727833196415369e-05,
      "loss": 3.0133,
      "step": 169617
    },
    {
      "epoch": 2.21,
      "grad_norm": 5.094345569610596,
      "learning_rate": 9.7275316661648e-05,
      "loss": 2.8081,
      "step": 169618
    },
    {
      "epoch": 2.21,
      "grad_norm": 6.146430969238281,
      "learning_rate": 9.727230139683258e-05,
      "loss": 2.9446,
      "step": 169619
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.811333179473877,
      "learning_rate": 9.726928616970816e-05,
      "loss": 2.887,
      "step": 169620
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.551063299179077,
      "learning_rate": 9.726627098027506e-05,
      "loss": 2.8966,
      "step": 169621
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9955811500549316,
      "learning_rate": 9.726325582853425e-05,
      "loss": 2.9435,
      "step": 169622
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.7824745178222656,
      "learning_rate": 9.726024071448578e-05,
      "loss": 2.8127,
      "step": 169623
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9635250568389893,
      "learning_rate": 9.725722563813057e-05,
      "loss": 3.185,
      "step": 169624
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.569264888763428,
      "learning_rate": 9.725421059946899e-05,
      "loss": 2.4975,
      "step": 169625
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5914344787597656,
      "learning_rate": 9.725119559850177e-05,
      "loss": 2.9926,
      "step": 169626
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8127317428588867,
      "learning_rate": 9.724818063522927e-05,
      "loss": 3.0754,
      "step": 169627
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.625948667526245,
      "learning_rate": 9.724516570965227e-05,
      "loss": 2.8703,
      "step": 169628
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.041714906692505,
      "learning_rate": 9.72421508217712e-05,
      "loss": 2.9526,
      "step": 169629
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.52549147605896,
      "learning_rate": 9.723913597158666e-05,
      "loss": 2.8401,
      "step": 169630
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.00673246383667,
      "learning_rate": 9.72361211590991e-05,
      "loss": 3.041,
      "step": 169631
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5655086040496826,
      "learning_rate": 9.723310638430926e-05,
      "loss": 3.0995,
      "step": 169632
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.629072666168213,
      "learning_rate": 9.723009164721753e-05,
      "loss": 3.1838,
      "step": 169633
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.253314971923828,
      "learning_rate": 9.72270769478246e-05,
      "loss": 3.1199,
      "step": 169634
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.0216946601867676,
      "learning_rate": 9.722406228613102e-05,
      "loss": 2.8534,
      "step": 169635
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.300316572189331,
      "learning_rate": 9.722104766213725e-05,
      "loss": 3.0077,
      "step": 169636
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.421337366104126,
      "learning_rate": 9.721803307584396e-05,
      "loss": 2.9849,
      "step": 169637
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.934407949447632,
      "learning_rate": 9.721501852725171e-05,
      "loss": 2.9467,
      "step": 169638
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.795336961746216,
      "learning_rate": 9.721200401636087e-05,
      "loss": 2.9581,
      "step": 169639
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6324462890625,
      "learning_rate": 9.720898954317226e-05,
      "loss": 3.1186,
      "step": 169640
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.352123260498047,
      "learning_rate": 9.720597510768634e-05,
      "loss": 3.0112,
      "step": 169641
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.224848985671997,
      "learning_rate": 9.720296070990355e-05,
      "loss": 3.1107,
      "step": 169642
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.308347463607788,
      "learning_rate": 9.719994634982468e-05,
      "loss": 2.8603,
      "step": 169643
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.432788848876953,
      "learning_rate": 9.719693202745014e-05,
      "loss": 2.9,
      "step": 169644
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.186089515686035,
      "learning_rate": 9.719391774278044e-05,
      "loss": 2.8576,
      "step": 169645
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.396029233932495,
      "learning_rate": 9.719090349581631e-05,
      "loss": 3.1512,
      "step": 169646
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.288649082183838,
      "learning_rate": 9.718788928655821e-05,
      "loss": 2.8885,
      "step": 169647
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6332740783691406,
      "learning_rate": 9.718487511500659e-05,
      "loss": 2.7928,
      "step": 169648
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5821146965026855,
      "learning_rate": 9.718186098116229e-05,
      "loss": 2.6245,
      "step": 169649
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.99116587638855,
      "learning_rate": 9.717884688502558e-05,
      "loss": 2.9479,
      "step": 169650
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3542211055755615,
      "learning_rate": 9.717583282659723e-05,
      "loss": 3.0666,
      "step": 169651
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.410906791687012,
      "learning_rate": 9.717281880587773e-05,
      "loss": 2.711,
      "step": 169652
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5737826824188232,
      "learning_rate": 9.716980482286766e-05,
      "loss": 3.0263,
      "step": 169653
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.572406768798828,
      "learning_rate": 9.716679087756743e-05,
      "loss": 3.0031,
      "step": 169654
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.829396724700928,
      "learning_rate": 9.716377696997782e-05,
      "loss": 2.9497,
      "step": 169655
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.1360063552856445,
      "learning_rate": 9.71607631000992e-05,
      "loss": 2.9367,
      "step": 169656
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.264681577682495,
      "learning_rate": 9.715774926793233e-05,
      "loss": 3.1231,
      "step": 169657
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8544869422912598,
      "learning_rate": 9.715473547347762e-05,
      "loss": 2.9483,
      "step": 169658
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.485705852508545,
      "learning_rate": 9.715172171673574e-05,
      "loss": 3.0524,
      "step": 169659
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.190833568572998,
      "learning_rate": 9.714870799770704e-05,
      "loss": 3.0485,
      "step": 169660
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5919747352600098,
      "learning_rate": 9.714569431639234e-05,
      "loss": 3.1107,
      "step": 169661
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.518730401992798,
      "learning_rate": 9.7142680672792e-05,
      "loss": 2.97,
      "step": 169662
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1658434867858887,
      "learning_rate": 9.713966706690672e-05,
      "loss": 3.0615,
      "step": 169663
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.1946966648101807,
      "learning_rate": 9.713665349873704e-05,
      "loss": 3.1311,
      "step": 169664
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.722909927368164,
      "learning_rate": 9.713363996828348e-05,
      "loss": 3.0142,
      "step": 169665
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3543245792388916,
      "learning_rate": 9.713062647554649e-05,
      "loss": 2.7145,
      "step": 169666
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.444244623184204,
      "learning_rate": 9.712761302052687e-05,
      "loss": 2.8555,
      "step": 169667
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.967515468597412,
      "learning_rate": 9.712459960322495e-05,
      "loss": 3.2195,
      "step": 169668
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.926118850708008,
      "learning_rate": 9.71215862236415e-05,
      "loss": 2.99,
      "step": 169669
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6502435207366943,
      "learning_rate": 9.711857288177696e-05,
      "loss": 2.781,
      "step": 169670
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6102001667022705,
      "learning_rate": 9.711555957763188e-05,
      "loss": 2.9291,
      "step": 169671
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.187638759613037,
      "learning_rate": 9.71125463112068e-05,
      "loss": 3.2552,
      "step": 169672
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.537069797515869,
      "learning_rate": 9.710953308250242e-05,
      "loss": 3.126,
      "step": 169673
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.371903896331787,
      "learning_rate": 9.710651989151909e-05,
      "loss": 3.0222,
      "step": 169674
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.051602840423584,
      "learning_rate": 9.710350673825759e-05,
      "loss": 2.9092,
      "step": 169675
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.2731986045837402,
      "learning_rate": 9.710049362271839e-05,
      "loss": 2.8954,
      "step": 169676
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.549198865890503,
      "learning_rate": 9.7097480544902e-05,
      "loss": 3.0235,
      "step": 169677
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4267449378967285,
      "learning_rate": 9.709446750480898e-05,
      "loss": 2.9582,
      "step": 169678
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6812028884887695,
      "learning_rate": 9.709145450243998e-05,
      "loss": 2.8892,
      "step": 169679
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3617284297943115,
      "learning_rate": 9.708844153779544e-05,
      "loss": 3.1126,
      "step": 169680
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3968396186828613,
      "learning_rate": 9.708542861087608e-05,
      "loss": 2.9624,
      "step": 169681
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5947701930999756,
      "learning_rate": 9.708241572168236e-05,
      "loss": 2.9032,
      "step": 169682
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.745331287384033,
      "learning_rate": 9.707940287021485e-05,
      "loss": 3.0322,
      "step": 169683
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8882317543029785,
      "learning_rate": 9.707639005647402e-05,
      "loss": 2.9506,
      "step": 169684
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8813233375549316,
      "learning_rate": 9.707337728046061e-05,
      "loss": 2.8013,
      "step": 169685
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4192588329315186,
      "learning_rate": 9.7070364542175e-05,
      "loss": 2.9849,
      "step": 169686
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7881736755371094,
      "learning_rate": 9.706735184161792e-05,
      "loss": 2.8345,
      "step": 169687
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.523704767227173,
      "learning_rate": 9.706433917878977e-05,
      "loss": 3.0939,
      "step": 169688
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.954744338989258,
      "learning_rate": 9.70613265536914e-05,
      "loss": 2.9491,
      "step": 169689
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8618533611297607,
      "learning_rate": 9.705831396632292e-05,
      "loss": 2.8095,
      "step": 169690
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.497720241546631,
      "learning_rate": 9.705530141668524e-05,
      "loss": 3.0074,
      "step": 169691
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1590874195098877,
      "learning_rate": 9.705228890477874e-05,
      "loss": 3.0439,
      "step": 169692
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.747993230819702,
      "learning_rate": 9.704927643060413e-05,
      "loss": 2.9487,
      "step": 169693
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7753541469573975,
      "learning_rate": 9.70462639941618e-05,
      "loss": 2.9372,
      "step": 169694
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.049973249435425,
      "learning_rate": 9.704325159545259e-05,
      "loss": 2.9684,
      "step": 169695
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.697201728820801,
      "learning_rate": 9.704023923447666e-05,
      "loss": 2.8352,
      "step": 169696
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.0521509647369385,
      "learning_rate": 9.70372269112349e-05,
      "loss": 2.9627,
      "step": 169697
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1399824619293213,
      "learning_rate": 9.703421462572765e-05,
      "loss": 2.7636,
      "step": 169698
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7260806560516357,
      "learning_rate": 9.703120237795566e-05,
      "loss": 3.17,
      "step": 169699
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.132214069366455,
      "learning_rate": 9.702819016791933e-05,
      "loss": 2.7827,
      "step": 169700
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.192603826522827,
      "learning_rate": 9.702517799561948e-05,
      "loss": 2.9009,
      "step": 169701
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.941357374191284,
      "learning_rate": 9.702216586105626e-05,
      "loss": 3.0563,
      "step": 169702
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.64312744140625,
      "learning_rate": 9.701915376423054e-05,
      "loss": 2.997,
      "step": 169703
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5887653827667236,
      "learning_rate": 9.701614170514273e-05,
      "loss": 3.074,
      "step": 169704
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1632111072540283,
      "learning_rate": 9.701312968379352e-05,
      "loss": 3.1132,
      "step": 169705
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7091870307922363,
      "learning_rate": 9.701011770018333e-05,
      "loss": 2.8382,
      "step": 169706
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.0355401039123535,
      "learning_rate": 9.700710575431296e-05,
      "loss": 2.7534,
      "step": 169707
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.2700934410095215,
      "learning_rate": 9.700409384618262e-05,
      "loss": 2.991,
      "step": 169708
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.567812919616699,
      "learning_rate": 9.700108197579317e-05,
      "loss": 2.9825,
      "step": 169709
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4944214820861816,
      "learning_rate": 9.699807014314491e-05,
      "loss": 2.7814,
      "step": 169710
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7480309009552,
      "learning_rate": 9.699505834823864e-05,
      "loss": 3.0938,
      "step": 169711
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.971590518951416,
      "learning_rate": 9.699204659107474e-05,
      "loss": 3.191,
      "step": 169712
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.679645538330078,
      "learning_rate": 9.698903487165396e-05,
      "loss": 2.7358,
      "step": 169713
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.101644992828369,
      "learning_rate": 9.698602318997675e-05,
      "loss": 2.9689,
      "step": 169714
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.827641010284424,
      "learning_rate": 9.698301154604364e-05,
      "loss": 3.0379,
      "step": 169715
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.294137716293335,
      "learning_rate": 9.697999993985514e-05,
      "loss": 2.9394,
      "step": 169716
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.35166597366333,
      "learning_rate": 9.697698837141199e-05,
      "loss": 2.6117,
      "step": 169717
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9473633766174316,
      "learning_rate": 9.697397684071457e-05,
      "loss": 3.0063,
      "step": 169718
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7506139278411865,
      "learning_rate": 9.69709653477636e-05,
      "loss": 3.1058,
      "step": 169719
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.842733860015869,
      "learning_rate": 9.696795389255955e-05,
      "loss": 2.9524,
      "step": 169720
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6399707794189453,
      "learning_rate": 9.696494247510289e-05,
      "loss": 2.8338,
      "step": 169721
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8783557415008545,
      "learning_rate": 9.69619310953944e-05,
      "loss": 2.9999,
      "step": 169722
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1927759647369385,
      "learning_rate": 9.695891975343451e-05,
      "loss": 3.1706,
      "step": 169723
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.66694712638855,
      "learning_rate": 9.695590844922368e-05,
      "loss": 2.9888,
      "step": 169724
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4748342037200928,
      "learning_rate": 9.69528971827627e-05,
      "loss": 2.9232,
      "step": 169725
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6189815998077393,
      "learning_rate": 9.694988595405199e-05,
      "loss": 2.7836,
      "step": 169726
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.2941908836364746,
      "learning_rate": 9.694687476309204e-05,
      "loss": 2.9669,
      "step": 169727
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.504945755004883,
      "learning_rate": 9.69438636098836e-05,
      "loss": 2.771,
      "step": 169728
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.503567934036255,
      "learning_rate": 9.694085249442712e-05,
      "loss": 2.9789,
      "step": 169729
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5969793796539307,
      "learning_rate": 9.693784141672308e-05,
      "loss": 3.0411,
      "step": 169730
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.856701135635376,
      "learning_rate": 9.693483037677222e-05,
      "loss": 3.0575,
      "step": 169731
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4942333698272705,
      "learning_rate": 9.693181937457501e-05,
      "loss": 3.0217,
      "step": 169732
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.236881971359253,
      "learning_rate": 9.692880841013189e-05,
      "loss": 3.1493,
      "step": 169733
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.358720064163208,
      "learning_rate": 9.692579748344369e-05,
      "loss": 2.9948,
      "step": 169734
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.3399670124053955,
      "learning_rate": 9.692278659451068e-05,
      "loss": 2.9162,
      "step": 169735
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5544087886810303,
      "learning_rate": 9.691977574333364e-05,
      "loss": 3.1133,
      "step": 169736
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.8635382652282715,
      "learning_rate": 9.691676492991309e-05,
      "loss": 2.8801,
      "step": 169737
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.266012668609619,
      "learning_rate": 9.691375415424952e-05,
      "loss": 2.8765,
      "step": 169738
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7572872638702393,
      "learning_rate": 9.691074341634342e-05,
      "loss": 2.8737,
      "step": 169739
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.619640588760376,
      "learning_rate": 9.690773271619554e-05,
      "loss": 2.8403,
      "step": 169740
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.318183183670044,
      "learning_rate": 9.690472205380625e-05,
      "loss": 2.8285,
      "step": 169741
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3580451011657715,
      "learning_rate": 9.690171142917633e-05,
      "loss": 2.9988,
      "step": 169742
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7066681385040283,
      "learning_rate": 9.689870084230619e-05,
      "loss": 3.0508,
      "step": 169743
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3879024982452393,
      "learning_rate": 9.689569029319641e-05,
      "loss": 2.8995,
      "step": 169744
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.0741333961486816,
      "learning_rate": 9.689267978184748e-05,
      "loss": 2.8462,
      "step": 169745
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.069005012512207,
      "learning_rate": 9.688966930826011e-05,
      "loss": 2.9317,
      "step": 169746
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.170485019683838,
      "learning_rate": 9.68866588724347e-05,
      "loss": 2.5878,
      "step": 169747
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7535157203674316,
      "learning_rate": 9.6883648474372e-05,
      "loss": 2.9754,
      "step": 169748
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1764698028564453,
      "learning_rate": 9.688063811407243e-05,
      "loss": 2.9979,
      "step": 169749
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8581130504608154,
      "learning_rate": 9.687762779153661e-05,
      "loss": 2.8949,
      "step": 169750
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.318347215652466,
      "learning_rate": 9.687461750676497e-05,
      "loss": 2.9374,
      "step": 169751
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4243087768554688,
      "learning_rate": 9.687160725975827e-05,
      "loss": 3.0038,
      "step": 169752
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.273374080657959,
      "learning_rate": 9.686859705051689e-05,
      "loss": 3.0086,
      "step": 169753
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7866716384887695,
      "learning_rate": 9.686558687904154e-05,
      "loss": 2.899,
      "step": 169754
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.1077539920806885,
      "learning_rate": 9.686257674533265e-05,
      "loss": 2.9767,
      "step": 169755
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.203861713409424,
      "learning_rate": 9.685956664939101e-05,
      "loss": 2.9079,
      "step": 169756
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.3454830646514893,
      "learning_rate": 9.685655659121684e-05,
      "loss": 2.8295,
      "step": 169757
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5395076274871826,
      "learning_rate": 9.685354657081095e-05,
      "loss": 2.8985,
      "step": 169758
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3233118057250977,
      "learning_rate": 9.685053658817371e-05,
      "loss": 3.0213,
      "step": 169759
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9380486011505127,
      "learning_rate": 9.68475266433059e-05,
      "loss": 3.1384,
      "step": 169760
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.984919548034668,
      "learning_rate": 9.684451673620787e-05,
      "loss": 2.9887,
      "step": 169761
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1348910331726074,
      "learning_rate": 9.684150686688049e-05,
      "loss": 2.6406,
      "step": 169762
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.155287981033325,
      "learning_rate": 9.68384970353239e-05,
      "loss": 2.8911,
      "step": 169763
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.480036735534668,
      "learning_rate": 9.683548724153896e-05,
      "loss": 3.1007,
      "step": 169764
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8373193740844727,
      "learning_rate": 9.683247748552605e-05,
      "loss": 2.7456,
      "step": 169765
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.633808135986328,
      "learning_rate": 9.682946776728591e-05,
      "loss": 3.0248,
      "step": 169766
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.083696126937866,
      "learning_rate": 9.682645808681894e-05,
      "loss": 2.8065,
      "step": 169767
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.736671209335327,
      "learning_rate": 9.68234484441259e-05,
      "loss": 2.7141,
      "step": 169768
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.270761013031006,
      "learning_rate": 9.682043883920709e-05,
      "loss": 2.8904,
      "step": 169769
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.549044609069824,
      "learning_rate": 9.681742927206327e-05,
      "loss": 3.0482,
      "step": 169770
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.696800947189331,
      "learning_rate": 9.681441974269478e-05,
      "loss": 3.1364,
      "step": 169771
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.338890790939331,
      "learning_rate": 9.681141025110248e-05,
      "loss": 2.9998,
      "step": 169772
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.2506518363952637,
      "learning_rate": 9.680840079728663e-05,
      "loss": 3.2313,
      "step": 169773
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9246835708618164,
      "learning_rate": 9.680539138124816e-05,
      "loss": 2.9855,
      "step": 169774
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4270904064178467,
      "learning_rate": 9.680238200298721e-05,
      "loss": 2.8827,
      "step": 169775
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4098477363586426,
      "learning_rate": 9.679937266250463e-05,
      "loss": 2.9454,
      "step": 169776
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.69358491897583,
      "learning_rate": 9.679636335980077e-05,
      "loss": 3.146,
      "step": 169777
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.267890214920044,
      "learning_rate": 9.679335409487643e-05,
      "loss": 3.1593,
      "step": 169778
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4923713207244873,
      "learning_rate": 9.679034486773189e-05,
      "loss": 2.8219,
      "step": 169779
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.933069944381714,
      "learning_rate": 9.678733567836811e-05,
      "loss": 2.9143,
      "step": 169780
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7485485076904297,
      "learning_rate": 9.67843265267852e-05,
      "loss": 3.1084,
      "step": 169781
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9860026836395264,
      "learning_rate": 9.678131741298399e-05,
      "loss": 3.1739,
      "step": 169782
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6121089458465576,
      "learning_rate": 9.677830833696488e-05,
      "loss": 2.956,
      "step": 169783
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.1646652221679688,
      "learning_rate": 9.677529929872863e-05,
      "loss": 2.9652,
      "step": 169784
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.8840343952178955,
      "learning_rate": 9.677229029827556e-05,
      "loss": 3.0428,
      "step": 169785
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.364396810531616,
      "learning_rate": 9.676928133560657e-05,
      "loss": 2.8705,
      "step": 169786
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.0546443462371826,
      "learning_rate": 9.676627241072182e-05,
      "loss": 3.0683,
      "step": 169787
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.2261762619018555,
      "learning_rate": 9.676326352362214e-05,
      "loss": 2.913,
      "step": 169788
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.504668712615967,
      "learning_rate": 9.67602546743079e-05,
      "loss": 2.7894,
      "step": 169789
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.727153778076172,
      "learning_rate": 9.675724586277988e-05,
      "loss": 2.7051,
      "step": 169790
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.037774085998535,
      "learning_rate": 9.675423708903843e-05,
      "loss": 3.3286,
      "step": 169791
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.570977210998535,
      "learning_rate": 9.67512283530844e-05,
      "loss": 2.9088,
      "step": 169792
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3637020587921143,
      "learning_rate": 9.674821965491794e-05,
      "loss": 3.0341,
      "step": 169793
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.2355611324310303,
      "learning_rate": 9.674521099453994e-05,
      "loss": 2.9424,
      "step": 169794
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1484339237213135,
      "learning_rate": 9.674220237195074e-05,
      "loss": 2.9319,
      "step": 169795
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.514171600341797,
      "learning_rate": 9.673919378715108e-05,
      "loss": 2.9238,
      "step": 169796
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.672677993774414,
      "learning_rate": 9.673618524014136e-05,
      "loss": 2.9776,
      "step": 169797
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.2388222217559814,
      "learning_rate": 9.67331767309223e-05,
      "loss": 2.9269,
      "step": 169798
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.894357919692993,
      "learning_rate": 9.673016825949444e-05,
      "loss": 2.8151,
      "step": 169799
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.2506585121154785,
      "learning_rate": 9.672715982585822e-05,
      "loss": 3.0225,
      "step": 169800
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.24055814743042,
      "learning_rate": 9.672415143001419e-05,
      "loss": 3.299,
      "step": 169801
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.18563175201416,
      "learning_rate": 9.672114307196305e-05,
      "loss": 3.0498,
      "step": 169802
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.2275354862213135,
      "learning_rate": 9.671813475170517e-05,
      "loss": 2.9692,
      "step": 169803
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8769524097442627,
      "learning_rate": 9.671512646924138e-05,
      "loss": 3.0159,
      "step": 169804
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.92046856880188,
      "learning_rate": 9.671211822457204e-05,
      "loss": 3.0403,
      "step": 169805
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5867767333984375,
      "learning_rate": 9.670911001769778e-05,
      "loss": 2.7375,
      "step": 169806
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.494328737258911,
      "learning_rate": 9.670610184861901e-05,
      "loss": 2.8013,
      "step": 169807
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5796823501586914,
      "learning_rate": 9.670309371733654e-05,
      "loss": 2.6484,
      "step": 169808
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7272675037384033,
      "learning_rate": 9.670008562385071e-05,
      "loss": 2.8994,
      "step": 169809
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6243913173675537,
      "learning_rate": 9.669707756816223e-05,
      "loss": 3.0151,
      "step": 169810
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9635658264160156,
      "learning_rate": 9.669406955027164e-05,
      "loss": 2.6388,
      "step": 169811
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4363019466400146,
      "learning_rate": 9.669106157017933e-05,
      "loss": 3.0705,
      "step": 169812
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5015318393707275,
      "learning_rate": 9.668805362788611e-05,
      "loss": 2.8671,
      "step": 169813
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5234885215759277,
      "learning_rate": 9.668504572339241e-05,
      "loss": 3.1832,
      "step": 169814
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.284561634063721,
      "learning_rate": 9.668203785669873e-05,
      "loss": 3.1838,
      "step": 169815
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.295011281967163,
      "learning_rate": 9.667903002780574e-05,
      "loss": 2.9477,
      "step": 169816
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.170598030090332,
      "learning_rate": 9.667602223671398e-05,
      "loss": 2.9635,
      "step": 169817
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.456251382827759,
      "learning_rate": 9.667301448342391e-05,
      "loss": 2.8782,
      "step": 169818
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.4682705402374268,
      "learning_rate": 9.667000676793625e-05,
      "loss": 2.8264,
      "step": 169819
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9699583053588867,
      "learning_rate": 9.666699909025134e-05,
      "loss": 2.8986,
      "step": 169820
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6325016021728516,
      "learning_rate": 9.666399145037002e-05,
      "loss": 3.0362,
      "step": 169821
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.170743703842163,
      "learning_rate": 9.666098384829267e-05,
      "loss": 2.8839,
      "step": 169822
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.338578701019287,
      "learning_rate": 9.665797628401991e-05,
      "loss": 3.1823,
      "step": 169823
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.6101746559143066,
      "learning_rate": 9.665496875755214e-05,
      "loss": 2.8289,
      "step": 169824
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.254256248474121,
      "learning_rate": 9.665196126889016e-05,
      "loss": 2.996,
      "step": 169825
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5511860847473145,
      "learning_rate": 9.664895381803435e-05,
      "loss": 2.9048,
      "step": 169826
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.2271313667297363,
      "learning_rate": 9.664594640498538e-05,
      "loss": 2.8311,
      "step": 169827
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.510880470275879,
      "learning_rate": 9.664293902974382e-05,
      "loss": 2.905,
      "step": 169828
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.104351758956909,
      "learning_rate": 9.663993169231017e-05,
      "loss": 2.8624,
      "step": 169829
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.529780864715576,
      "learning_rate": 9.663692439268488e-05,
      "loss": 2.9555,
      "step": 169830
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7814059257507324,
      "learning_rate": 9.663391713086871e-05,
      "loss": 3.0536,
      "step": 169831
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.2999815940856934,
      "learning_rate": 9.663090990686207e-05,
      "loss": 3.1121,
      "step": 169832
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9770987033843994,
      "learning_rate": 9.662790272066564e-05,
      "loss": 2.978,
      "step": 169833
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.223290205001831,
      "learning_rate": 9.662489557227983e-05,
      "loss": 2.7461,
      "step": 169834
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3358843326568604,
      "learning_rate": 9.662188846170553e-05,
      "loss": 2.981,
      "step": 169835
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.443469285964966,
      "learning_rate": 9.661888138894283e-05,
      "loss": 2.7622,
      "step": 169836
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3223934173583984,
      "learning_rate": 9.661587435399262e-05,
      "loss": 2.8335,
      "step": 169837
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.407115936279297,
      "learning_rate": 9.661286735685526e-05,
      "loss": 2.982,
      "step": 169838
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.233550548553467,
      "learning_rate": 9.660986039753152e-05,
      "loss": 2.5195,
      "step": 169839
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.1552395820617676,
      "learning_rate": 9.660685347602174e-05,
      "loss": 3.0644,
      "step": 169840
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.343006134033203,
      "learning_rate": 9.660384659232677e-05,
      "loss": 2.8049,
      "step": 169841
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.26151967048645,
      "learning_rate": 9.66008397464468e-05,
      "loss": 2.9009,
      "step": 169842
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9765162467956543,
      "learning_rate": 9.659783293838265e-05,
      "loss": 2.9067,
      "step": 169843
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8842504024505615,
      "learning_rate": 9.659482616813475e-05,
      "loss": 3.0346,
      "step": 169844
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.794142723083496,
      "learning_rate": 9.659181943570378e-05,
      "loss": 2.7797,
      "step": 169845
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4235079288482666,
      "learning_rate": 9.658881274109013e-05,
      "loss": 2.7898,
      "step": 169846
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.7920162677764893,
      "learning_rate": 9.658580608429466e-05,
      "loss": 3.0609,
      "step": 169847
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7932026386260986,
      "learning_rate": 9.65827994653175e-05,
      "loss": 2.8759,
      "step": 169848
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5783796310424805,
      "learning_rate": 9.65797928841596e-05,
      "loss": 2.7752,
      "step": 169849
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.583355665206909,
      "learning_rate": 9.657678634082121e-05,
      "loss": 3.0157,
      "step": 169850
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4070820808410645,
      "learning_rate": 9.657377983530313e-05,
      "loss": 3.1732,
      "step": 169851
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.041654348373413,
      "learning_rate": 9.657077336760577e-05,
      "loss": 2.7177,
      "step": 169852
    },
    {
      "epoch": 2.21,
      "grad_norm": 5.822412490844727,
      "learning_rate": 9.656776693772992e-05,
      "loss": 2.7479,
      "step": 169853
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.150094509124756,
      "learning_rate": 9.656476054567575e-05,
      "loss": 2.7259,
      "step": 169854
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.392926216125488,
      "learning_rate": 9.656175419144415e-05,
      "loss": 2.838,
      "step": 169855
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.045916795730591,
      "learning_rate": 9.655874787503544e-05,
      "loss": 3.1386,
      "step": 169856
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.4568192958831787,
      "learning_rate": 9.65557415964504e-05,
      "loss": 2.8132,
      "step": 169857
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.93109130859375,
      "learning_rate": 9.65527353556894e-05,
      "loss": 3.0623,
      "step": 169858
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.214446544647217,
      "learning_rate": 9.654972915275329e-05,
      "loss": 3.0825,
      "step": 169859
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.830354690551758,
      "learning_rate": 9.654672298764221e-05,
      "loss": 3.0208,
      "step": 169860
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7068445682525635,
      "learning_rate": 9.654371686035705e-05,
      "loss": 3.1861,
      "step": 169861
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.621713161468506,
      "learning_rate": 9.654071077089816e-05,
      "loss": 2.7479,
      "step": 169862
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.3039391040802,
      "learning_rate": 9.653770471926627e-05,
      "loss": 3.0802,
      "step": 169863
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.330803632736206,
      "learning_rate": 9.653469870546177e-05,
      "loss": 3.0794,
      "step": 169864
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.081218957901001,
      "learning_rate": 9.65316927294855e-05,
      "loss": 2.7554,
      "step": 169865
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5378665924072266,
      "learning_rate": 9.652868679133768e-05,
      "loss": 3.0937,
      "step": 169866
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8864943981170654,
      "learning_rate": 9.652568089101905e-05,
      "loss": 2.9214,
      "step": 169867
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.682337760925293,
      "learning_rate": 9.652267502853007e-05,
      "loss": 2.8726,
      "step": 169868
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6460225582122803,
      "learning_rate": 9.651966920387146e-05,
      "loss": 3.1012,
      "step": 169869
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.614712715148926,
      "learning_rate": 9.65166634170436e-05,
      "loss": 2.7678,
      "step": 169870
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9866108894348145,
      "learning_rate": 9.651365766804729e-05,
      "loss": 2.9445,
      "step": 169871
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4505600929260254,
      "learning_rate": 9.651065195688276e-05,
      "loss": 2.9615,
      "step": 169872
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.2202303409576416,
      "learning_rate": 9.650764628355088e-05,
      "loss": 2.9972,
      "step": 169873
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4931416511535645,
      "learning_rate": 9.650464064805192e-05,
      "loss": 3.1658,
      "step": 169874
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.521658182144165,
      "learning_rate": 9.650163505038671e-05,
      "loss": 2.9784,
      "step": 169875
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.249408006668091,
      "learning_rate": 9.649862949055556e-05,
      "loss": 3.1191,
      "step": 169876
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.2512052059173584,
      "learning_rate": 9.649562396855937e-05,
      "loss": 3.091,
      "step": 169877
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9825570583343506,
      "learning_rate": 9.64926184843983e-05,
      "loss": 2.852,
      "step": 169878
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3815417289733887,
      "learning_rate": 9.648961303807319e-05,
      "loss": 3.1508,
      "step": 169879
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6587140560150146,
      "learning_rate": 9.648660762958438e-05,
      "loss": 2.8174,
      "step": 169880
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.6179800033569336,
      "learning_rate": 9.648360225893267e-05,
      "loss": 2.8693,
      "step": 169881
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.752973794937134,
      "learning_rate": 9.64805969261184e-05,
      "loss": 2.7445,
      "step": 169882
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4827020168304443,
      "learning_rate": 9.647759163114232e-05,
      "loss": 3.0244,
      "step": 169883
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.585556745529175,
      "learning_rate": 9.64745863740049e-05,
      "loss": 3.0337,
      "step": 169884
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5883865356445312,
      "learning_rate": 9.647158115470672e-05,
      "loss": 2.7399,
      "step": 169885
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.2863242626190186,
      "learning_rate": 9.646857597324818e-05,
      "loss": 3.0243,
      "step": 169886
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.269826889038086,
      "learning_rate": 9.646557082963008e-05,
      "loss": 2.8045,
      "step": 169887
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6014091968536377,
      "learning_rate": 9.64625657238528e-05,
      "loss": 2.875,
      "step": 169888
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.6406683921813965,
      "learning_rate": 9.645956065591704e-05,
      "loss": 2.7477,
      "step": 169889
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7893640995025635,
      "learning_rate": 9.645655562582332e-05,
      "loss": 2.8946,
      "step": 169890
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.192060708999634,
      "learning_rate": 9.645355063357212e-05,
      "loss": 3.0299,
      "step": 169891
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.964643716812134,
      "learning_rate": 9.645054567916398e-05,
      "loss": 3.0994,
      "step": 169892
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3854761123657227,
      "learning_rate": 9.644754076259964e-05,
      "loss": 3.0669,
      "step": 169893
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.931502342224121,
      "learning_rate": 9.644453588387943e-05,
      "loss": 2.743,
      "step": 169894
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.725477933883667,
      "learning_rate": 9.64415310430041e-05,
      "loss": 2.7835,
      "step": 169895
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5859639644622803,
      "learning_rate": 9.643852623997418e-05,
      "loss": 2.8517,
      "step": 169896
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6189558506011963,
      "learning_rate": 9.643552147479005e-05,
      "loss": 2.7772,
      "step": 169897
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.9355309009552,
      "learning_rate": 9.64325167474525e-05,
      "loss": 3.2162,
      "step": 169898
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.8883209228515625,
      "learning_rate": 9.642951205796199e-05,
      "loss": 2.8196,
      "step": 169899
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.041855573654175,
      "learning_rate": 9.642650740631896e-05,
      "loss": 3.0851,
      "step": 169900
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.5888195037841797,
      "learning_rate": 9.64235027925242e-05,
      "loss": 2.8679,
      "step": 169901
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.346618890762329,
      "learning_rate": 9.642049821657816e-05,
      "loss": 3.0479,
      "step": 169902
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.4632809162139893,
      "learning_rate": 9.641749367848127e-05,
      "loss": 2.8857,
      "step": 169903
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.52998685836792,
      "learning_rate": 9.641448917823434e-05,
      "loss": 3.0318,
      "step": 169904
    },
    {
      "epoch": 2.21,
      "grad_norm": 5.755356788635254,
      "learning_rate": 9.64114847158378e-05,
      "loss": 2.9077,
      "step": 169905
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.9676148891448975,
      "learning_rate": 9.640848029129209e-05,
      "loss": 2.9262,
      "step": 169906
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.8044867515563965,
      "learning_rate": 9.640547590459799e-05,
      "loss": 2.6731,
      "step": 169907
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.043961763381958,
      "learning_rate": 9.640247155575595e-05,
      "loss": 3.0591,
      "step": 169908
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.071299076080322,
      "learning_rate": 9.639946724476643e-05,
      "loss": 2.9358,
      "step": 169909
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.121583938598633,
      "learning_rate": 9.63964629716302e-05,
      "loss": 2.8234,
      "step": 169910
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.098374843597412,
      "learning_rate": 9.639345873634762e-05,
      "loss": 3.0394,
      "step": 169911
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3986361026763916,
      "learning_rate": 9.639045453891943e-05,
      "loss": 2.7875,
      "step": 169912
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.711728811264038,
      "learning_rate": 9.638745037934613e-05,
      "loss": 2.8265,
      "step": 169913
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.999943971633911,
      "learning_rate": 9.638444625762821e-05,
      "loss": 2.9352,
      "step": 169914
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.537414312362671,
      "learning_rate": 9.638144217376617e-05,
      "loss": 3.1114,
      "step": 169915
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.6494569778442383,
      "learning_rate": 9.637843812776076e-05,
      "loss": 3.0841,
      "step": 169916
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8595268726348877,
      "learning_rate": 9.637543411961237e-05,
      "loss": 2.8517,
      "step": 169917
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3654446601867676,
      "learning_rate": 9.637243014932168e-05,
      "loss": 3.1871,
      "step": 169918
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8885176181793213,
      "learning_rate": 9.636942621688915e-05,
      "loss": 2.815,
      "step": 169919
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.784731388092041,
      "learning_rate": 9.636642232231557e-05,
      "loss": 2.9734,
      "step": 169920
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.4490909576416016,
      "learning_rate": 9.636341846560109e-05,
      "loss": 2.9493,
      "step": 169921
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.603839159011841,
      "learning_rate": 9.63604146467466e-05,
      "loss": 2.7787,
      "step": 169922
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.9454100131988525,
      "learning_rate": 9.63574108657525e-05,
      "loss": 2.8058,
      "step": 169923
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.8463797569274902,
      "learning_rate": 9.635440712261945e-05,
      "loss": 2.7812,
      "step": 169924
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5417559146881104,
      "learning_rate": 9.635140341734791e-05,
      "loss": 2.6078,
      "step": 169925
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.790933132171631,
      "learning_rate": 9.634839974993864e-05,
      "loss": 3.1121,
      "step": 169926
    },
    {
      "epoch": 2.21,
      "grad_norm": 6.911826133728027,
      "learning_rate": 9.634539612039186e-05,
      "loss": 2.8594,
      "step": 169927
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.987366199493408,
      "learning_rate": 9.634239252870842e-05,
      "loss": 3.0954,
      "step": 169928
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6579501628875732,
      "learning_rate": 9.633938897488866e-05,
      "loss": 3.1431,
      "step": 169929
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8569257259368896,
      "learning_rate": 9.633638545893339e-05,
      "loss": 2.901,
      "step": 169930
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.7520391941070557,
      "learning_rate": 9.633338198084289e-05,
      "loss": 2.7612,
      "step": 169931
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.287918567657471,
      "learning_rate": 9.633037854061809e-05,
      "loss": 3.0975,
      "step": 169932
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.637256145477295,
      "learning_rate": 9.632737513825911e-05,
      "loss": 2.7191,
      "step": 169933
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6797027587890625,
      "learning_rate": 9.632437177376681e-05,
      "loss": 2.8807,
      "step": 169934
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.473587989807129,
      "learning_rate": 9.632136844714155e-05,
      "loss": 3.2467,
      "step": 169935
    },
    {
      "epoch": 2.21,
      "grad_norm": 6.442002773284912,
      "learning_rate": 9.631836515838409e-05,
      "loss": 2.7571,
      "step": 169936
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.364748954772949,
      "learning_rate": 9.631536190749478e-05,
      "loss": 2.8216,
      "step": 169937
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.23588752746582,
      "learning_rate": 9.631235869447451e-05,
      "loss": 2.9062,
      "step": 169938
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.59303617477417,
      "learning_rate": 9.630935551932341e-05,
      "loss": 2.8289,
      "step": 169939
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.169301748275757,
      "learning_rate": 9.630635238204232e-05,
      "loss": 3.0119,
      "step": 169940
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.504760980606079,
      "learning_rate": 9.630334928263168e-05,
      "loss": 3.181,
      "step": 169941
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.095943927764893,
      "learning_rate": 9.630034622109214e-05,
      "loss": 2.689,
      "step": 169942
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8728408813476562,
      "learning_rate": 9.629734319742414e-05,
      "loss": 3.0169,
      "step": 169943
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4109046459198,
      "learning_rate": 9.62943402116285e-05,
      "loss": 2.9223,
      "step": 169944
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.680820941925049,
      "learning_rate": 9.629133726370541e-05,
      "loss": 2.8506,
      "step": 169945
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5683000087738037,
      "learning_rate": 9.62883343536557e-05,
      "loss": 2.6543,
      "step": 169946
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.4112613201141357,
      "learning_rate": 9.62853314814797e-05,
      "loss": 2.858,
      "step": 169947
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6155827045440674,
      "learning_rate": 9.628232864717824e-05,
      "loss": 3.0134,
      "step": 169948
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3339929580688477,
      "learning_rate": 9.627932585075162e-05,
      "loss": 2.9484,
      "step": 169949
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.783599615097046,
      "learning_rate": 9.627632309220062e-05,
      "loss": 3.0798,
      "step": 169950
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4486453533172607,
      "learning_rate": 9.627332037152567e-05,
      "loss": 2.9091,
      "step": 169951
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.009636402130127,
      "learning_rate": 9.62703176887274e-05,
      "loss": 2.9167,
      "step": 169952
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.679586410522461,
      "learning_rate": 9.62673150438062e-05,
      "loss": 2.9354,
      "step": 169953
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.746525764465332,
      "learning_rate": 9.626431243676287e-05,
      "loss": 2.9882,
      "step": 169954
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.381471872329712,
      "learning_rate": 9.626130986759773e-05,
      "loss": 3.2014,
      "step": 169955
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6267893314361572,
      "learning_rate": 9.625830733631156e-05,
      "loss": 3.1333,
      "step": 169956
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4387054443359375,
      "learning_rate": 9.625530484290483e-05,
      "loss": 3.0303,
      "step": 169957
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4491236209869385,
      "learning_rate": 9.62523023873781e-05,
      "loss": 2.8718,
      "step": 169958
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.369713306427002,
      "learning_rate": 9.62492999697318e-05,
      "loss": 3.1747,
      "step": 169959
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8442468643188477,
      "learning_rate": 9.62462975899667e-05,
      "loss": 3.0642,
      "step": 169960
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.001424551010132,
      "learning_rate": 9.624329524808312e-05,
      "loss": 2.9925,
      "step": 169961
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.315469741821289,
      "learning_rate": 9.624029294408192e-05,
      "loss": 2.9616,
      "step": 169962
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.509498119354248,
      "learning_rate": 9.623729067796346e-05,
      "loss": 2.9646,
      "step": 169963
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.061373710632324,
      "learning_rate": 9.623428844972831e-05,
      "loss": 3.0777,
      "step": 169964
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.104238510131836,
      "learning_rate": 9.6231286259377e-05,
      "loss": 2.8488,
      "step": 169965
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7713966369628906,
      "learning_rate": 9.622828410691024e-05,
      "loss": 2.8888,
      "step": 169966
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6735780239105225,
      "learning_rate": 9.622528199232837e-05,
      "loss": 2.8302,
      "step": 169967
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.56587290763855,
      "learning_rate": 9.622227991563218e-05,
      "loss": 2.7717,
      "step": 169968
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.402463436126709,
      "learning_rate": 9.621927787682207e-05,
      "loss": 3.0644,
      "step": 169969
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.127283811569214,
      "learning_rate": 9.621627587589868e-05,
      "loss": 2.9664,
      "step": 169970
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.916579008102417,
      "learning_rate": 9.621327391286243e-05,
      "loss": 3.0823,
      "step": 169971
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.537914991378784,
      "learning_rate": 9.621027198771408e-05,
      "loss": 2.8691,
      "step": 169972
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.869629144668579,
      "learning_rate": 9.620727010045397e-05,
      "loss": 2.8484,
      "step": 169973
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.4969992637634277,
      "learning_rate": 9.620426825108289e-05,
      "loss": 3.0232,
      "step": 169974
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.015146493911743,
      "learning_rate": 9.620126643960128e-05,
      "loss": 2.6968,
      "step": 169975
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.05409574508667,
      "learning_rate": 9.619826466600972e-05,
      "loss": 2.6976,
      "step": 169976
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.78721809387207,
      "learning_rate": 9.619526293030861e-05,
      "loss": 2.946,
      "step": 169977
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.766345262527466,
      "learning_rate": 9.61922612324988e-05,
      "loss": 2.6796,
      "step": 169978
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5932655334472656,
      "learning_rate": 9.618925957258055e-05,
      "loss": 2.9086,
      "step": 169979
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.682251453399658,
      "learning_rate": 9.618625795055468e-05,
      "loss": 2.8323,
      "step": 169980
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1083669662475586,
      "learning_rate": 9.618325636642163e-05,
      "loss": 2.9079,
      "step": 169981
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7807509899139404,
      "learning_rate": 9.618025482018187e-05,
      "loss": 2.6026,
      "step": 169982
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.585125207901001,
      "learning_rate": 9.617725331183614e-05,
      "loss": 2.9439,
      "step": 169983
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1567540168762207,
      "learning_rate": 9.61742518413849e-05,
      "loss": 2.7153,
      "step": 169984
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.2519564628601074,
      "learning_rate": 9.617125040882864e-05,
      "loss": 2.9155,
      "step": 169985
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.672309637069702,
      "learning_rate": 9.616824901416807e-05,
      "loss": 2.7367,
      "step": 169986
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8021066188812256,
      "learning_rate": 9.616524765740367e-05,
      "loss": 2.9424,
      "step": 169987
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.9294300079345703,
      "learning_rate": 9.61622463385359e-05,
      "loss": 3.0421,
      "step": 169988
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.875642776489258,
      "learning_rate": 9.615924505756552e-05,
      "loss": 3.0858,
      "step": 169989
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4522616863250732,
      "learning_rate": 9.6156243814493e-05,
      "loss": 2.9756,
      "step": 169990
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.0678904056549072,
      "learning_rate": 9.615324260931874e-05,
      "loss": 2.8635,
      "step": 169991
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.0209710597991943,
      "learning_rate": 9.615024144204358e-05,
      "loss": 2.9713,
      "step": 169992
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4633328914642334,
      "learning_rate": 9.61472403126679e-05,
      "loss": 3.2357,
      "step": 169993
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.936469316482544,
      "learning_rate": 9.614423922119225e-05,
      "loss": 2.8147,
      "step": 169994
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.006848096847534,
      "learning_rate": 9.61412381676173e-05,
      "loss": 2.9095,
      "step": 169995
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.436789035797119,
      "learning_rate": 9.613823715194343e-05,
      "loss": 2.8801,
      "step": 169996
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3816425800323486,
      "learning_rate": 9.613523617417142e-05,
      "loss": 3.1766,
      "step": 169997
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7745814323425293,
      "learning_rate": 9.613223523430172e-05,
      "loss": 2.694,
      "step": 169998
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7140347957611084,
      "learning_rate": 9.61292343323349e-05,
      "loss": 2.949,
      "step": 169999
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.886577606201172,
      "learning_rate": 9.612623346827139e-05,
      "loss": 2.9672,
      "step": 170000
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.253117322921753,
      "learning_rate": 9.612323264211196e-05,
      "loss": 2.982,
      "step": 170001
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9755845069885254,
      "learning_rate": 9.612023185385698e-05,
      "loss": 3.018,
      "step": 170002
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7601749897003174,
      "learning_rate": 9.611723110350717e-05,
      "loss": 3.0419,
      "step": 170003
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.572613000869751,
      "learning_rate": 9.611423039106295e-05,
      "loss": 3.1809,
      "step": 170004
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.363769769668579,
      "learning_rate": 9.61112297165251e-05,
      "loss": 3.0448,
      "step": 170005
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.708627223968506,
      "learning_rate": 9.610822907989387e-05,
      "loss": 2.9698,
      "step": 170006
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4916393756866455,
      "learning_rate": 9.610522848117005e-05,
      "loss": 2.8664,
      "step": 170007
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7390341758728027,
      "learning_rate": 9.6102227920354e-05,
      "loss": 2.9048,
      "step": 170008
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.567859649658203,
      "learning_rate": 9.609922739744654e-05,
      "loss": 3.148,
      "step": 170009
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.408350944519043,
      "learning_rate": 9.609622691244793e-05,
      "loss": 2.9178,
      "step": 170010
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3104617595672607,
      "learning_rate": 9.609322646535912e-05,
      "loss": 2.8631,
      "step": 170011
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.782643795013428,
      "learning_rate": 9.609022605618021e-05,
      "loss": 2.8916,
      "step": 170012
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.903456211090088,
      "learning_rate": 9.608722568491207e-05,
      "loss": 3.001,
      "step": 170013
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.3187508583068848,
      "learning_rate": 9.608422535155508e-05,
      "loss": 3.38,
      "step": 170014
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1310787200927734,
      "learning_rate": 9.608122505610996e-05,
      "loss": 3.1164,
      "step": 170015
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.241563320159912,
      "learning_rate": 9.60782247985771e-05,
      "loss": 2.7916,
      "step": 170016
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.0717146396636963,
      "learning_rate": 9.607522457895727e-05,
      "loss": 2.8809,
      "step": 170017
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.604962110519409,
      "learning_rate": 9.607222439725086e-05,
      "loss": 3.042,
      "step": 170018
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.680211305618286,
      "learning_rate": 9.60692242534585e-05,
      "loss": 2.9397,
      "step": 170019
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.312039852142334,
      "learning_rate": 9.606622414758061e-05,
      "loss": 2.9444,
      "step": 170020
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.471663236618042,
      "learning_rate": 9.606322407961798e-05,
      "loss": 2.9937,
      "step": 170021
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.387300729751587,
      "learning_rate": 9.606022404957093e-05,
      "loss": 2.9168,
      "step": 170022
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.0344817638397217,
      "learning_rate": 9.605722405744025e-05,
      "loss": 2.9461,
      "step": 170023
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.4183590412139893,
      "learning_rate": 9.605422410322635e-05,
      "loss": 3.0606,
      "step": 170024
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.26228141784668,
      "learning_rate": 9.605122418692984e-05,
      "loss": 2.8606,
      "step": 170025
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.57271409034729,
      "learning_rate": 9.604822430855116e-05,
      "loss": 2.9201,
      "step": 170026
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.2289769649505615,
      "learning_rate": 9.604522446809105e-05,
      "loss": 2.8362,
      "step": 170027
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.0655410289764404,
      "learning_rate": 9.604222466554992e-05,
      "loss": 2.8979,
      "step": 170028
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.2938733100891113,
      "learning_rate": 9.603922490092844e-05,
      "loss": 2.9549,
      "step": 170029
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.30125093460083,
      "learning_rate": 9.603622517422714e-05,
      "loss": 3.0499,
      "step": 170030
    },
    {
      "epoch": 2.21,
      "grad_norm": 5.72622013092041,
      "learning_rate": 9.603322548544658e-05,
      "loss": 2.7961,
      "step": 170031
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3703763484954834,
      "learning_rate": 9.603022583458717e-05,
      "loss": 2.7509,
      "step": 170032
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.1808431148529053,
      "learning_rate": 9.602722622164967e-05,
      "loss": 3.0986,
      "step": 170033
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.486926555633545,
      "learning_rate": 9.60242266466345e-05,
      "loss": 3.1312,
      "step": 170034
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.667717456817627,
      "learning_rate": 9.602122710954235e-05,
      "loss": 2.8209,
      "step": 170035
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.195586681365967,
      "learning_rate": 9.60182276103737e-05,
      "loss": 2.9831,
      "step": 170036
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3950603008270264,
      "learning_rate": 9.601522814912913e-05,
      "loss": 3.0707,
      "step": 170037
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.634683847427368,
      "learning_rate": 9.601222872580909e-05,
      "loss": 3.1793,
      "step": 170038
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5381884574890137,
      "learning_rate": 9.600922934041429e-05,
      "loss": 2.939,
      "step": 170039
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5595545768737793,
      "learning_rate": 9.600622999294517e-05,
      "loss": 2.9401,
      "step": 170040
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.85888409614563,
      "learning_rate": 9.600323068340242e-05,
      "loss": 2.9482,
      "step": 170041
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8412978649139404,
      "learning_rate": 9.60002314117865e-05,
      "loss": 2.8185,
      "step": 170042
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.3899500370025635,
      "learning_rate": 9.5997232178098e-05,
      "loss": 2.8253,
      "step": 170043
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.4108293056488037,
      "learning_rate": 9.599423298233739e-05,
      "loss": 2.9256,
      "step": 170044
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.37931227684021,
      "learning_rate": 9.599123382450538e-05,
      "loss": 3.0189,
      "step": 170045
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.575453519821167,
      "learning_rate": 9.598823470460235e-05,
      "loss": 2.8726,
      "step": 170046
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.676872730255127,
      "learning_rate": 9.598523562262908e-05,
      "loss": 2.9035,
      "step": 170047
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.1872880458831787,
      "learning_rate": 9.598223657858598e-05,
      "loss": 3.269,
      "step": 170048
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.873210906982422,
      "learning_rate": 9.597923757247363e-05,
      "loss": 2.9875,
      "step": 170049
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.4787757396698,
      "learning_rate": 9.597623860429251e-05,
      "loss": 2.6843,
      "step": 170050
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.2013301849365234,
      "learning_rate": 9.597323967404335e-05,
      "loss": 2.7637,
      "step": 170051
    },
    {
      "epoch": 2.21,
      "grad_norm": 4.266537666320801,
      "learning_rate": 9.597024078172654e-05,
      "loss": 2.9547,
      "step": 170052
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.0079240798950195,
      "learning_rate": 9.596724192734278e-05,
      "loss": 2.8789,
      "step": 170053
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6198291778564453,
      "learning_rate": 9.596424311089256e-05,
      "loss": 3.0075,
      "step": 170054
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.2155301570892334,
      "learning_rate": 9.596124433237645e-05,
      "loss": 2.973,
      "step": 170055
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.374323844909668,
      "learning_rate": 9.59582455917949e-05,
      "loss": 2.9665,
      "step": 170056
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.765904664993286,
      "learning_rate": 9.595524688914868e-05,
      "loss": 2.805,
      "step": 170057
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.457935333251953,
      "learning_rate": 9.59522482244381e-05,
      "loss": 2.849,
      "step": 170058
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4777605533599854,
      "learning_rate": 9.594924959766397e-05,
      "loss": 2.8934,
      "step": 170059
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.353170156478882,
      "learning_rate": 9.594625100882671e-05,
      "loss": 2.9549,
      "step": 170060
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.470287799835205,
      "learning_rate": 9.59432524579269e-05,
      "loss": 2.7541,
      "step": 170061
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5671334266662598,
      "learning_rate": 9.594025394496501e-05,
      "loss": 3.153,
      "step": 170062
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.7745769023895264,
      "learning_rate": 9.593725546994176e-05,
      "loss": 2.9056,
      "step": 170063
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.0845401287078857,
      "learning_rate": 9.59342570328575e-05,
      "loss": 2.6814,
      "step": 170064
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5434610843658447,
      "learning_rate": 9.593125863371307e-05,
      "loss": 3.0663,
      "step": 170065
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4954724311828613,
      "learning_rate": 9.592826027250882e-05,
      "loss": 2.8699,
      "step": 170066
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5079829692840576,
      "learning_rate": 9.592526194924538e-05,
      "loss": 3.1444,
      "step": 170067
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6054317951202393,
      "learning_rate": 9.59222636639232e-05,
      "loss": 2.9075,
      "step": 170068
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8332881927490234,
      "learning_rate": 9.591926541654302e-05,
      "loss": 3.0916,
      "step": 170069
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6074318885803223,
      "learning_rate": 9.591626720710516e-05,
      "loss": 2.9472,
      "step": 170070
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8931429386138916,
      "learning_rate": 9.591326903561047e-05,
      "loss": 3.042,
      "step": 170071
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8323564529418945,
      "learning_rate": 9.591027090205934e-05,
      "loss": 3.0535,
      "step": 170072
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4708621501922607,
      "learning_rate": 9.590727280645223e-05,
      "loss": 2.5396,
      "step": 170073
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.6802194118499756,
      "learning_rate": 9.590427474878993e-05,
      "loss": 2.7669,
      "step": 170074
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.829505443572998,
      "learning_rate": 9.590127672907285e-05,
      "loss": 2.9761,
      "step": 170075
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.9230756759643555,
      "learning_rate": 9.589827874730152e-05,
      "loss": 3.0566,
      "step": 170076
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.6469180583953857,
      "learning_rate": 9.589528080347662e-05,
      "loss": 2.6984,
      "step": 170077
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4523136615753174,
      "learning_rate": 9.589228289759865e-05,
      "loss": 2.7656,
      "step": 170078
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3034863471984863,
      "learning_rate": 9.588928502966806e-05,
      "loss": 2.9998,
      "step": 170079
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.0064008235931396,
      "learning_rate": 9.588628719968557e-05,
      "loss": 2.9086,
      "step": 170080
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.689633369445801,
      "learning_rate": 9.588328940765161e-05,
      "loss": 2.9091,
      "step": 170081
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8073887825012207,
      "learning_rate": 9.588029165356688e-05,
      "loss": 2.8407,
      "step": 170082
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.2144105434417725,
      "learning_rate": 9.587729393743187e-05,
      "loss": 3.1766,
      "step": 170083
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3096418380737305,
      "learning_rate": 9.587429625924702e-05,
      "loss": 2.8073,
      "step": 170084
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.584752082824707,
      "learning_rate": 9.587129861901308e-05,
      "loss": 3.1066,
      "step": 170085
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.714874029159546,
      "learning_rate": 9.586830101673056e-05,
      "loss": 2.8499,
      "step": 170086
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4229440689086914,
      "learning_rate": 9.586530345239984e-05,
      "loss": 3.0601,
      "step": 170087
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3085432052612305,
      "learning_rate": 9.586230592602172e-05,
      "loss": 2.9574,
      "step": 170088
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.394294023513794,
      "learning_rate": 9.585930843759665e-05,
      "loss": 2.7041,
      "step": 170089
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9220597743988037,
      "learning_rate": 9.585631098712509e-05,
      "loss": 2.9496,
      "step": 170090
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.627955436706543,
      "learning_rate": 9.585331357460779e-05,
      "loss": 3.0012,
      "step": 170091
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.2529914379119873,
      "learning_rate": 9.58503162000452e-05,
      "loss": 2.866,
      "step": 170092
    },
    {
      "epoch": 2.21,
      "grad_norm": 5.145750999450684,
      "learning_rate": 9.584731886343783e-05,
      "loss": 3.0646,
      "step": 170093
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.057556629180908,
      "learning_rate": 9.584432156478635e-05,
      "loss": 3.2659,
      "step": 170094
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.565229654312134,
      "learning_rate": 9.584132430409121e-05,
      "loss": 2.8282,
      "step": 170095
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5928778648376465,
      "learning_rate": 9.583832708135308e-05,
      "loss": 2.7801,
      "step": 170096
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.1086225509643555,
      "learning_rate": 9.583532989657248e-05,
      "loss": 2.8332,
      "step": 170097
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.679063320159912,
      "learning_rate": 9.583233274974997e-05,
      "loss": 3.1048,
      "step": 170098
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.1524360179901123,
      "learning_rate": 9.582933564088594e-05,
      "loss": 2.9129,
      "step": 170099
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.5287373065948486,
      "learning_rate": 9.582633856998121e-05,
      "loss": 2.9652,
      "step": 170100
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.3474957942962646,
      "learning_rate": 9.58233415370361e-05,
      "loss": 3.0332,
      "step": 170101
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8973543643951416,
      "learning_rate": 9.582034454205142e-05,
      "loss": 2.9654,
      "step": 170102
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.743544340133667,
      "learning_rate": 9.581734758502757e-05,
      "loss": 3.0555,
      "step": 170103
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.591958522796631,
      "learning_rate": 9.581435066596513e-05,
      "loss": 3.0601,
      "step": 170104
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.903823137283325,
      "learning_rate": 9.581135378486454e-05,
      "loss": 2.9415,
      "step": 170105
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.410858392715454,
      "learning_rate": 9.58083569417266e-05,
      "loss": 2.9563,
      "step": 170106
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.471261978149414,
      "learning_rate": 9.580536013655165e-05,
      "loss": 3.142,
      "step": 170107
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.440141201019287,
      "learning_rate": 9.580236336934042e-05,
      "loss": 2.7168,
      "step": 170108
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.424985647201538,
      "learning_rate": 9.579936664009338e-05,
      "loss": 2.7884,
      "step": 170109
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.8662455081939697,
      "learning_rate": 9.579636994881109e-05,
      "loss": 3.0736,
      "step": 170110
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.556995153427124,
      "learning_rate": 9.579337329549404e-05,
      "loss": 2.8181,
      "step": 170111
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.4948034286499023,
      "learning_rate": 9.57903766801429e-05,
      "loss": 3.1328,
      "step": 170112
    },
    {
      "epoch": 2.21,
      "grad_norm": 3.084181547164917,
      "learning_rate": 9.578738010275814e-05,
      "loss": 2.9005,
      "step": 170113
    },
    {
      "epoch": 2.21,
      "grad_norm": 2.9994497299194336,
      "learning_rate": 9.578438356334045e-05,
      "loss": 3.0868,
      "step": 170114
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.417116403579712,
      "learning_rate": 9.578138706189028e-05,
      "loss": 2.8255,
      "step": 170115
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.5120580196380615,
      "learning_rate": 9.577839059840821e-05,
      "loss": 2.8391,
      "step": 170116
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6639997959136963,
      "learning_rate": 9.577539417289469e-05,
      "loss": 2.8818,
      "step": 170117
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.795114755630493,
      "learning_rate": 9.577239778535051e-05,
      "loss": 3.0286,
      "step": 170118
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.0628502368927,
      "learning_rate": 9.576940143577595e-05,
      "loss": 3.045,
      "step": 170119
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.2017455101013184,
      "learning_rate": 9.576640512417186e-05,
      "loss": 2.7273,
      "step": 170120
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.502333402633667,
      "learning_rate": 9.576340885053861e-05,
      "loss": 3.0907,
      "step": 170121
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3604114055633545,
      "learning_rate": 9.57604126148768e-05,
      "loss": 2.9769,
      "step": 170122
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4193623065948486,
      "learning_rate": 9.575741641718688e-05,
      "loss": 2.9542,
      "step": 170123
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.655930995941162,
      "learning_rate": 9.575442025746966e-05,
      "loss": 3.0199,
      "step": 170124
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.865170955657959,
      "learning_rate": 9.575142413572541e-05,
      "loss": 3.0404,
      "step": 170125
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.708561658859253,
      "learning_rate": 9.574842805195494e-05,
      "loss": 2.9504,
      "step": 170126
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.42207932472229,
      "learning_rate": 9.574543200615868e-05,
      "loss": 3.2719,
      "step": 170127
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6556243896484375,
      "learning_rate": 9.574243599833718e-05,
      "loss": 3.0012,
      "step": 170128
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9902119636535645,
      "learning_rate": 9.573944002849094e-05,
      "loss": 2.8375,
      "step": 170129
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.830310583114624,
      "learning_rate": 9.573644409662072e-05,
      "loss": 2.831,
      "step": 170130
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.877768039703369,
      "learning_rate": 9.573344820272683e-05,
      "loss": 2.927,
      "step": 170131
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.598187208175659,
      "learning_rate": 9.573045234681005e-05,
      "loss": 2.8715,
      "step": 170132
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.811997890472412,
      "learning_rate": 9.572745652887083e-05,
      "loss": 2.8629,
      "step": 170133
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.542813539505005,
      "learning_rate": 9.572446074890972e-05,
      "loss": 2.6972,
      "step": 170134
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.6664116382598877,
      "learning_rate": 9.572146500692721e-05,
      "loss": 3.0194,
      "step": 170135
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7634894847869873,
      "learning_rate": 9.571846930292402e-05,
      "loss": 2.9656,
      "step": 170136
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.55318021774292,
      "learning_rate": 9.571547363690051e-05,
      "loss": 2.9671,
      "step": 170137
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.0014052391052246,
      "learning_rate": 9.571247800885748e-05,
      "loss": 3.1187,
      "step": 170138
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.458217144012451,
      "learning_rate": 9.570948241879534e-05,
      "loss": 2.913,
      "step": 170139
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.553299903869629,
      "learning_rate": 9.570648686671466e-05,
      "loss": 3.2459,
      "step": 170140
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.683542013168335,
      "learning_rate": 9.570349135261592e-05,
      "loss": 3.1609,
      "step": 170141
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.157326698303223,
      "learning_rate": 9.570049587649984e-05,
      "loss": 3.0378,
      "step": 170142
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.193495988845825,
      "learning_rate": 9.56975004383668e-05,
      "loss": 3.1242,
      "step": 170143
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4686737060546875,
      "learning_rate": 9.569450503821755e-05,
      "loss": 2.8198,
      "step": 170144
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.493353366851807,
      "learning_rate": 9.569150967605255e-05,
      "loss": 3.0886,
      "step": 170145
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.6482136249542236,
      "learning_rate": 9.568851435187236e-05,
      "loss": 3.2395,
      "step": 170146
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.654648542404175,
      "learning_rate": 9.568551906567742e-05,
      "loss": 2.7064,
      "step": 170147
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.545173645019531,
      "learning_rate": 9.56825238174685e-05,
      "loss": 2.715,
      "step": 170148
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5181851387023926,
      "learning_rate": 9.567952860724598e-05,
      "loss": 3.0906,
      "step": 170149
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2821943759918213,
      "learning_rate": 9.567653343501054e-05,
      "loss": 2.8864,
      "step": 170150
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.2020809650421143,
      "learning_rate": 9.567353830076262e-05,
      "loss": 2.9001,
      "step": 170151
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.5599751472473145,
      "learning_rate": 9.567054320450306e-05,
      "loss": 3.0343,
      "step": 170152
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.5020511150360107,
      "learning_rate": 9.5667548146232e-05,
      "loss": 3.0622,
      "step": 170153
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.9576218128204346,
      "learning_rate": 9.566455312595029e-05,
      "loss": 2.9125,
      "step": 170154
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.605664014816284,
      "learning_rate": 9.566155814365832e-05,
      "loss": 2.9052,
      "step": 170155
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.938075304031372,
      "learning_rate": 9.565856319935679e-05,
      "loss": 2.8596,
      "step": 170156
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.88212776184082,
      "learning_rate": 9.56555682930461e-05,
      "loss": 3.0492,
      "step": 170157
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8364830017089844,
      "learning_rate": 9.5652573424727e-05,
      "loss": 2.9256,
      "step": 170158
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.462893486022949,
      "learning_rate": 9.564957859439994e-05,
      "loss": 2.9837,
      "step": 170159
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6173651218414307,
      "learning_rate": 9.564658380206549e-05,
      "loss": 2.7507,
      "step": 170160
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.536871910095215,
      "learning_rate": 9.564358904772407e-05,
      "loss": 2.9387,
      "step": 170161
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8910813331604004,
      "learning_rate": 9.56405943313765e-05,
      "loss": 3.0244,
      "step": 170162
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9059154987335205,
      "learning_rate": 9.56375996530231e-05,
      "loss": 3.147,
      "step": 170163
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9537291526794434,
      "learning_rate": 9.563460501266461e-05,
      "loss": 2.7201,
      "step": 170164
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4159882068634033,
      "learning_rate": 9.56316104103015e-05,
      "loss": 2.953,
      "step": 170165
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5922131538391113,
      "learning_rate": 9.562861584593425e-05,
      "loss": 2.9984,
      "step": 170166
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7002382278442383,
      "learning_rate": 9.562562131956358e-05,
      "loss": 2.7819,
      "step": 170167
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1149442195892334,
      "learning_rate": 9.562262683118998e-05,
      "loss": 2.8112,
      "step": 170168
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6687958240509033,
      "learning_rate": 9.56196323808139e-05,
      "loss": 3.183,
      "step": 170169
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6925413608551025,
      "learning_rate": 9.561663796843604e-05,
      "loss": 2.9688,
      "step": 170170
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.585777759552002,
      "learning_rate": 9.561364359405695e-05,
      "loss": 3.1656,
      "step": 170171
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.322375774383545,
      "learning_rate": 9.561064925767704e-05,
      "loss": 3.0511,
      "step": 170172
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2744414806365967,
      "learning_rate": 9.560765495929704e-05,
      "loss": 2.9197,
      "step": 170173
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.772768259048462,
      "learning_rate": 9.560466069891745e-05,
      "loss": 3.1705,
      "step": 170174
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.562223196029663,
      "learning_rate": 9.560166647653872e-05,
      "loss": 2.9329,
      "step": 170175
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4794623851776123,
      "learning_rate": 9.559867229216161e-05,
      "loss": 2.7591,
      "step": 170176
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3635613918304443,
      "learning_rate": 9.559567814578655e-05,
      "loss": 3.0447,
      "step": 170177
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.250199556350708,
      "learning_rate": 9.559268403741401e-05,
      "loss": 2.8176,
      "step": 170178
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5875461101531982,
      "learning_rate": 9.558968996704474e-05,
      "loss": 3.0952,
      "step": 170179
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8797950744628906,
      "learning_rate": 9.558669593467911e-05,
      "loss": 3.0988,
      "step": 170180
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.439126968383789,
      "learning_rate": 9.558370194031788e-05,
      "loss": 2.9263,
      "step": 170181
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4326937198638916,
      "learning_rate": 9.558070798396151e-05,
      "loss": 3.0716,
      "step": 170182
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1271770000457764,
      "learning_rate": 9.557771406561051e-05,
      "loss": 3.0396,
      "step": 170183
    },
    {
      "epoch": 2.22,
      "grad_norm": 5.494207382202148,
      "learning_rate": 9.55747201852654e-05,
      "loss": 3.201,
      "step": 170184
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.944678783416748,
      "learning_rate": 9.55717263429269e-05,
      "loss": 3.1419,
      "step": 170185
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.486289024353027,
      "learning_rate": 9.556873253859537e-05,
      "loss": 2.8888,
      "step": 170186
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6598596572875977,
      "learning_rate": 9.556573877227159e-05,
      "loss": 3.0608,
      "step": 170187
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.440911054611206,
      "learning_rate": 9.556274504395596e-05,
      "loss": 3.0653,
      "step": 170188
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3477721214294434,
      "learning_rate": 9.555975135364912e-05,
      "loss": 3.0273,
      "step": 170189
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.6989505290985107,
      "learning_rate": 9.555675770135145e-05,
      "loss": 2.8221,
      "step": 170190
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.57682728767395,
      "learning_rate": 9.555376408706376e-05,
      "loss": 3.0348,
      "step": 170191
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.1520140171051025,
      "learning_rate": 9.555077051078636e-05,
      "loss": 2.9506,
      "step": 170192
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6505188941955566,
      "learning_rate": 9.554777697252007e-05,
      "loss": 3.0149,
      "step": 170193
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.716275691986084,
      "learning_rate": 9.554478347226525e-05,
      "loss": 2.7543,
      "step": 170194
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7389700412750244,
      "learning_rate": 9.554179001002257e-05,
      "loss": 3.1123,
      "step": 170195
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.347459316253662,
      "learning_rate": 9.553879658579241e-05,
      "loss": 2.9679,
      "step": 170196
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.505931854248047,
      "learning_rate": 9.553580319957555e-05,
      "loss": 2.7891,
      "step": 170197
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6309988498687744,
      "learning_rate": 9.553280985137234e-05,
      "loss": 2.9152,
      "step": 170198
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3648836612701416,
      "learning_rate": 9.552981654118352e-05,
      "loss": 2.9836,
      "step": 170199
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.86489200592041,
      "learning_rate": 9.55268232690096e-05,
      "loss": 2.832,
      "step": 170200
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.5190083980560303,
      "learning_rate": 9.552383003485107e-05,
      "loss": 2.7544,
      "step": 170201
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.43548059463501,
      "learning_rate": 9.552083683870843e-05,
      "loss": 2.9183,
      "step": 170202
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2821238040924072,
      "learning_rate": 9.551784368058244e-05,
      "loss": 2.8123,
      "step": 170203
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.423724889755249,
      "learning_rate": 9.551485056047345e-05,
      "loss": 3.0331,
      "step": 170204
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.0690011978149414,
      "learning_rate": 9.55118574783822e-05,
      "loss": 3.2288,
      "step": 170205
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.6597628593444824,
      "learning_rate": 9.550886443430915e-05,
      "loss": 2.5805,
      "step": 170206
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.375051975250244,
      "learning_rate": 9.550587142825485e-05,
      "loss": 2.7387,
      "step": 170207
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.992976188659668,
      "learning_rate": 9.550287846021978e-05,
      "loss": 3.0146,
      "step": 170208
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.580904960632324,
      "learning_rate": 9.54998855302047e-05,
      "loss": 2.9109,
      "step": 170209
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.6898837089538574,
      "learning_rate": 9.549689263820993e-05,
      "loss": 2.9336,
      "step": 170210
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.796055316925049,
      "learning_rate": 9.549389978423627e-05,
      "loss": 2.5579,
      "step": 170211
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3801071643829346,
      "learning_rate": 9.549090696828414e-05,
      "loss": 2.8143,
      "step": 170212
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.612645149230957,
      "learning_rate": 9.54879141903541e-05,
      "loss": 2.777,
      "step": 170213
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.619631767272949,
      "learning_rate": 9.548492145044663e-05,
      "loss": 3.0234,
      "step": 170214
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.28194260597229,
      "learning_rate": 9.548192874856246e-05,
      "loss": 3.2014,
      "step": 170215
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.426443099975586,
      "learning_rate": 9.5478936084702e-05,
      "loss": 2.7813,
      "step": 170216
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.941843032836914,
      "learning_rate": 9.547594345886594e-05,
      "loss": 2.7516,
      "step": 170217
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2998249530792236,
      "learning_rate": 9.547295087105465e-05,
      "loss": 2.9982,
      "step": 170218
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.998232364654541,
      "learning_rate": 9.5469958321269e-05,
      "loss": 2.9242,
      "step": 170219
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6483027935028076,
      "learning_rate": 9.546696580950917e-05,
      "loss": 3.07,
      "step": 170220
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.234246015548706,
      "learning_rate": 9.546397333577597e-05,
      "loss": 2.9143,
      "step": 170221
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1772043704986572,
      "learning_rate": 9.546098090006981e-05,
      "loss": 2.9129,
      "step": 170222
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7666773796081543,
      "learning_rate": 9.54579885023914e-05,
      "loss": 3.1618,
      "step": 170223
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.474301338195801,
      "learning_rate": 9.545499614274109e-05,
      "loss": 2.8179,
      "step": 170224
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2789671421051025,
      "learning_rate": 9.545200382111976e-05,
      "loss": 2.9605,
      "step": 170225
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.723031520843506,
      "learning_rate": 9.544901153752757e-05,
      "loss": 2.7022,
      "step": 170226
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.4985415935516357,
      "learning_rate": 9.544601929196537e-05,
      "loss": 2.8872,
      "step": 170227
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.77759051322937,
      "learning_rate": 9.544302708443352e-05,
      "loss": 3.1583,
      "step": 170228
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6000351905822754,
      "learning_rate": 9.544003491493278e-05,
      "loss": 3.2245,
      "step": 170229
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.358835220336914,
      "learning_rate": 9.543704278346349e-05,
      "loss": 3.0786,
      "step": 170230
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8072195053100586,
      "learning_rate": 9.543405069002651e-05,
      "loss": 3.0152,
      "step": 170231
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.383260726928711,
      "learning_rate": 9.543105863462203e-05,
      "loss": 2.7898,
      "step": 170232
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.370314836502075,
      "learning_rate": 9.542806661725084e-05,
      "loss": 2.9747,
      "step": 170233
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.5990800857543945,
      "learning_rate": 9.542507463791335e-05,
      "loss": 3.0306,
      "step": 170234
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.493480682373047,
      "learning_rate": 9.542208269661031e-05,
      "loss": 2.7264,
      "step": 170235
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6861021518707275,
      "learning_rate": 9.541909079334204e-05,
      "loss": 2.9712,
      "step": 170236
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5986857414245605,
      "learning_rate": 9.541609892810945e-05,
      "loss": 3.1913,
      "step": 170237
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2063260078430176,
      "learning_rate": 9.541310710091267e-05,
      "loss": 3.0594,
      "step": 170238
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.868046283721924,
      "learning_rate": 9.541011531175256e-05,
      "loss": 2.8633,
      "step": 170239
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.489694356918335,
      "learning_rate": 9.540712356062945e-05,
      "loss": 2.8145,
      "step": 170240
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.453878164291382,
      "learning_rate": 9.540413184754412e-05,
      "loss": 2.9026,
      "step": 170241
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3002569675445557,
      "learning_rate": 9.540114017249693e-05,
      "loss": 3.1042,
      "step": 170242
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9310781955718994,
      "learning_rate": 9.539814853548861e-05,
      "loss": 2.7967,
      "step": 170243
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3180055618286133,
      "learning_rate": 9.539515693651965e-05,
      "loss": 2.9726,
      "step": 170244
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.918121099472046,
      "learning_rate": 9.53921653755906e-05,
      "loss": 2.8483,
      "step": 170245
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5921711921691895,
      "learning_rate": 9.53891738527019e-05,
      "loss": 2.8618,
      "step": 170246
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.3697288036346436,
      "learning_rate": 9.538618236785434e-05,
      "loss": 2.9222,
      "step": 170247
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9805421829223633,
      "learning_rate": 9.538319092104824e-05,
      "loss": 2.8825,
      "step": 170248
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.0836451053619385,
      "learning_rate": 9.538019951228434e-05,
      "loss": 2.8185,
      "step": 170249
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.335646390914917,
      "learning_rate": 9.537720814156314e-05,
      "loss": 3.009,
      "step": 170250
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.3544089794158936,
      "learning_rate": 9.537421680888519e-05,
      "loss": 2.9922,
      "step": 170251
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4465603828430176,
      "learning_rate": 9.537122551425093e-05,
      "loss": 3.0459,
      "step": 170252
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.371486186981201,
      "learning_rate": 9.53682342576611e-05,
      "loss": 2.8942,
      "step": 170253
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.828042507171631,
      "learning_rate": 9.536524303911611e-05,
      "loss": 2.9787,
      "step": 170254
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.111911773681641,
      "learning_rate": 9.536225185861666e-05,
      "loss": 3.015,
      "step": 170255
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.142216682434082,
      "learning_rate": 9.535926071616324e-05,
      "loss": 3.0062,
      "step": 170256
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.1638104915618896,
      "learning_rate": 9.535626961175631e-05,
      "loss": 2.9708,
      "step": 170257
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9184770584106445,
      "learning_rate": 9.535327854539658e-05,
      "loss": 2.7033,
      "step": 170258
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.65787672996521,
      "learning_rate": 9.535028751708455e-05,
      "loss": 3.1636,
      "step": 170259
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.2603025436401367,
      "learning_rate": 9.53472965268207e-05,
      "loss": 3.0127,
      "step": 170260
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6141412258148193,
      "learning_rate": 9.534430557460573e-05,
      "loss": 3.1347,
      "step": 170261
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4102413654327393,
      "learning_rate": 9.53413146604401e-05,
      "loss": 2.981,
      "step": 170262
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3384201526641846,
      "learning_rate": 9.533832378432429e-05,
      "loss": 3.0966,
      "step": 170263
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7873470783233643,
      "learning_rate": 9.533533294625904e-05,
      "loss": 2.9627,
      "step": 170264
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.585519552230835,
      "learning_rate": 9.533234214624473e-05,
      "loss": 2.87,
      "step": 170265
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3409996032714844,
      "learning_rate": 9.532935138428211e-05,
      "loss": 3.0206,
      "step": 170266
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.029719591140747,
      "learning_rate": 9.532636066037163e-05,
      "loss": 2.9538,
      "step": 170267
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.7010457515716553,
      "learning_rate": 9.53233699745138e-05,
      "loss": 2.8955,
      "step": 170268
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.876992702484131,
      "learning_rate": 9.532037932670917e-05,
      "loss": 3.1351,
      "step": 170269
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.758927822113037,
      "learning_rate": 9.531738871695842e-05,
      "loss": 3.0246,
      "step": 170270
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5108280181884766,
      "learning_rate": 9.531439814526193e-05,
      "loss": 2.9902,
      "step": 170271
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5833048820495605,
      "learning_rate": 9.531140761162045e-05,
      "loss": 2.9018,
      "step": 170272
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.913714647293091,
      "learning_rate": 9.530841711603445e-05,
      "loss": 2.9006,
      "step": 170273
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5030744075775146,
      "learning_rate": 9.530542665850448e-05,
      "loss": 3.1567,
      "step": 170274
    },
    {
      "epoch": 2.22,
      "grad_norm": 1.9978880882263184,
      "learning_rate": 9.5302436239031e-05,
      "loss": 2.6773,
      "step": 170275
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.495577096939087,
      "learning_rate": 9.529944585761478e-05,
      "loss": 2.954,
      "step": 170276
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.42971134185791,
      "learning_rate": 9.52964555142561e-05,
      "loss": 2.8016,
      "step": 170277
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9859776496887207,
      "learning_rate": 9.529346520895583e-05,
      "loss": 2.9452,
      "step": 170278
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1648755073547363,
      "learning_rate": 9.529047494171435e-05,
      "loss": 2.937,
      "step": 170279
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.917105197906494,
      "learning_rate": 9.528748471253221e-05,
      "loss": 3.1591,
      "step": 170280
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7835094928741455,
      "learning_rate": 9.528449452140994e-05,
      "loss": 2.9304,
      "step": 170281
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.469874143600464,
      "learning_rate": 9.528150436834822e-05,
      "loss": 2.9439,
      "step": 170282
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.940889358520508,
      "learning_rate": 9.527851425334742e-05,
      "loss": 3.0735,
      "step": 170283
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.906968116760254,
      "learning_rate": 9.527552417640833e-05,
      "loss": 2.9658,
      "step": 170284
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.247544765472412,
      "learning_rate": 9.527253413753128e-05,
      "loss": 2.9162,
      "step": 170285
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.362746477127075,
      "learning_rate": 9.526954413671714e-05,
      "loss": 3.0953,
      "step": 170286
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5660483837127686,
      "learning_rate": 9.526655417396601e-05,
      "loss": 2.8583,
      "step": 170287
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.5792064666748047,
      "learning_rate": 9.526356424927885e-05,
      "loss": 3.1711,
      "step": 170288
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.63320255279541,
      "learning_rate": 9.526057436265593e-05,
      "loss": 3.1128,
      "step": 170289
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.485879421234131,
      "learning_rate": 9.525758451409802e-05,
      "loss": 2.6867,
      "step": 170290
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1973748207092285,
      "learning_rate": 9.525459470360551e-05,
      "loss": 3.0181,
      "step": 170291
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.652470111846924,
      "learning_rate": 9.525160493117925e-05,
      "loss": 3.0702,
      "step": 170292
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.842223644256592,
      "learning_rate": 9.524861519681937e-05,
      "loss": 3.0704,
      "step": 170293
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5223193168640137,
      "learning_rate": 9.52456255005267e-05,
      "loss": 2.8339,
      "step": 170294
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.760960340499878,
      "learning_rate": 9.524263584230168e-05,
      "loss": 2.9267,
      "step": 170295
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2269039154052734,
      "learning_rate": 9.523964622214501e-05,
      "loss": 2.9604,
      "step": 170296
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1123573780059814,
      "learning_rate": 9.523665664005706e-05,
      "loss": 2.7428,
      "step": 170297
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.2720048427581787,
      "learning_rate": 9.523366709603866e-05,
      "loss": 2.7892,
      "step": 170298
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4782207012176514,
      "learning_rate": 9.523067759009e-05,
      "loss": 3.0428,
      "step": 170299
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.179089307785034,
      "learning_rate": 9.522768812221192e-05,
      "loss": 2.8215,
      "step": 170300
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.780606985092163,
      "learning_rate": 9.522469869240475e-05,
      "loss": 2.9126,
      "step": 170301
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.0635602474212646,
      "learning_rate": 9.522170930066931e-05,
      "loss": 2.9837,
      "step": 170302
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.117615222930908,
      "learning_rate": 9.521871994700592e-05,
      "loss": 3.017,
      "step": 170303
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.8726518154144287,
      "learning_rate": 9.521573063141544e-05,
      "loss": 2.9646,
      "step": 170304
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.7518656253814697,
      "learning_rate": 9.521274135389798e-05,
      "loss": 2.9372,
      "step": 170305
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5962021350860596,
      "learning_rate": 9.520975211445447e-05,
      "loss": 2.9613,
      "step": 170306
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.62471604347229,
      "learning_rate": 9.520676291308524e-05,
      "loss": 2.9977,
      "step": 170307
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.472719430923462,
      "learning_rate": 9.520377374979102e-05,
      "loss": 2.9817,
      "step": 170308
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.216501474380493,
      "learning_rate": 9.520078462457218e-05,
      "loss": 2.9491,
      "step": 170309
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.429088830947876,
      "learning_rate": 9.519779553742961e-05,
      "loss": 2.8376,
      "step": 170310
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.671053647994995,
      "learning_rate": 9.519480648836342e-05,
      "loss": 2.9253,
      "step": 170311
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.526944160461426,
      "learning_rate": 9.519181747737447e-05,
      "loss": 2.9766,
      "step": 170312
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.426848888397217,
      "learning_rate": 9.518882850446313e-05,
      "loss": 3.2182,
      "step": 170313
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.393484115600586,
      "learning_rate": 9.518583956963016e-05,
      "loss": 2.9056,
      "step": 170314
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.006846904754639,
      "learning_rate": 9.518285067287589e-05,
      "loss": 2.9026,
      "step": 170315
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8695008754730225,
      "learning_rate": 9.517986181420123e-05,
      "loss": 3.0153,
      "step": 170316
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.881255865097046,
      "learning_rate": 9.517687299360628e-05,
      "loss": 2.8498,
      "step": 170317
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.272188901901245,
      "learning_rate": 9.51738842110919e-05,
      "loss": 3.0084,
      "step": 170318
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5242998600006104,
      "learning_rate": 9.517089546665849e-05,
      "loss": 3.1109,
      "step": 170319
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.70182204246521,
      "learning_rate": 9.516790676030675e-05,
      "loss": 3.0356,
      "step": 170320
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6862950325012207,
      "learning_rate": 9.516491809203709e-05,
      "loss": 2.9762,
      "step": 170321
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6905791759490967,
      "learning_rate": 9.516192946185033e-05,
      "loss": 2.9982,
      "step": 170322
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.266594886779785,
      "learning_rate": 9.51589408697466e-05,
      "loss": 3.0445,
      "step": 170323
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5085105895996094,
      "learning_rate": 9.515595231572685e-05,
      "loss": 2.7255,
      "step": 170324
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7847094535827637,
      "learning_rate": 9.515296379979133e-05,
      "loss": 2.9338,
      "step": 170325
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.470461130142212,
      "learning_rate": 9.514997532194084e-05,
      "loss": 3.1559,
      "step": 170326
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9696905612945557,
      "learning_rate": 9.514698688217575e-05,
      "loss": 2.9497,
      "step": 170327
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5932371616363525,
      "learning_rate": 9.51439984804968e-05,
      "loss": 2.816,
      "step": 170328
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7120742797851562,
      "learning_rate": 9.514101011690446e-05,
      "loss": 2.846,
      "step": 170329
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.851827621459961,
      "learning_rate": 9.513802179139927e-05,
      "loss": 2.9831,
      "step": 170330
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6808483600616455,
      "learning_rate": 9.51350335039817e-05,
      "loss": 3.1148,
      "step": 170331
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.404513359069824,
      "learning_rate": 9.513204525465248e-05,
      "loss": 2.7646,
      "step": 170332
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8112292289733887,
      "learning_rate": 9.512905704341196e-05,
      "loss": 2.6901,
      "step": 170333
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.951828718185425,
      "learning_rate": 9.512606887026095e-05,
      "loss": 3.1645,
      "step": 170334
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.2756714820861816,
      "learning_rate": 9.512308073519985e-05,
      "loss": 3.0861,
      "step": 170335
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7836930751800537,
      "learning_rate": 9.512009263822922e-05,
      "loss": 2.8903,
      "step": 170336
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.835326910018921,
      "learning_rate": 9.511710457934957e-05,
      "loss": 2.812,
      "step": 170337
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1968181133270264,
      "learning_rate": 9.51141165585616e-05,
      "loss": 2.8913,
      "step": 170338
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.2676942348480225,
      "learning_rate": 9.511112857586567e-05,
      "loss": 2.8484,
      "step": 170339
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.919050931930542,
      "learning_rate": 9.510814063126256e-05,
      "loss": 2.7807,
      "step": 170340
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4020299911499023,
      "learning_rate": 9.510515272475272e-05,
      "loss": 3.1238,
      "step": 170341
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.2447333335876465,
      "learning_rate": 9.51021648563366e-05,
      "loss": 2.9685,
      "step": 170342
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.156134843826294,
      "learning_rate": 9.509917702601495e-05,
      "loss": 3.1496,
      "step": 170343
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.604315996170044,
      "learning_rate": 9.509618923378822e-05,
      "loss": 2.7933,
      "step": 170344
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.391242504119873,
      "learning_rate": 9.509320147965688e-05,
      "loss": 2.8859,
      "step": 170345
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.705509662628174,
      "learning_rate": 9.509021376362171e-05,
      "loss": 3.0177,
      "step": 170346
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4859542846679688,
      "learning_rate": 9.508722608568313e-05,
      "loss": 2.7489,
      "step": 170347
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5446274280548096,
      "learning_rate": 9.508423844584158e-05,
      "loss": 3.1419,
      "step": 170348
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1132149696350098,
      "learning_rate": 9.508125084409785e-05,
      "loss": 2.8022,
      "step": 170349
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3766980171203613,
      "learning_rate": 9.507826328045237e-05,
      "loss": 2.8292,
      "step": 170350
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6361920833587646,
      "learning_rate": 9.507527575490562e-05,
      "loss": 2.876,
      "step": 170351
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5320138931274414,
      "learning_rate": 9.507228826745837e-05,
      "loss": 2.9571,
      "step": 170352
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.0372941493988037,
      "learning_rate": 9.506930081811103e-05,
      "loss": 3.1329,
      "step": 170353
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8572051525115967,
      "learning_rate": 9.506631340686405e-05,
      "loss": 2.9122,
      "step": 170354
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7028391361236572,
      "learning_rate": 9.506332603371822e-05,
      "loss": 2.9134,
      "step": 170355
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9644529819488525,
      "learning_rate": 9.506033869867393e-05,
      "loss": 2.9943,
      "step": 170356
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.711050271987915,
      "learning_rate": 9.505735140173184e-05,
      "loss": 2.8861,
      "step": 170357
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.222647190093994,
      "learning_rate": 9.505436414289248e-05,
      "loss": 3.2031,
      "step": 170358
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.8611278533935547,
      "learning_rate": 9.505137692215638e-05,
      "loss": 2.8568,
      "step": 170359
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.795532464981079,
      "learning_rate": 9.5048389739524e-05,
      "loss": 2.699,
      "step": 170360
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.212169647216797,
      "learning_rate": 9.504540259499609e-05,
      "loss": 3.0138,
      "step": 170361
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.7489774227142334,
      "learning_rate": 9.5042415488573e-05,
      "loss": 2.831,
      "step": 170362
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5963499546051025,
      "learning_rate": 9.503942842025551e-05,
      "loss": 2.7263,
      "step": 170363
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8961713314056396,
      "learning_rate": 9.503644139004394e-05,
      "loss": 2.8665,
      "step": 170364
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9075324535369873,
      "learning_rate": 9.503345439793918e-05,
      "loss": 3.1015,
      "step": 170365
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.027085065841675,
      "learning_rate": 9.503046744394137e-05,
      "loss": 2.8769,
      "step": 170366
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.669457197189331,
      "learning_rate": 9.502748052805135e-05,
      "loss": 2.9245,
      "step": 170367
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.501220941543579,
      "learning_rate": 9.502449365026951e-05,
      "loss": 2.9658,
      "step": 170368
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.501837730407715,
      "learning_rate": 9.502150681059658e-05,
      "loss": 2.9411,
      "step": 170369
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.1542272567749023,
      "learning_rate": 9.501852000903291e-05,
      "loss": 3.0555,
      "step": 170370
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5638468265533447,
      "learning_rate": 9.501553324557938e-05,
      "loss": 2.878,
      "step": 170371
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.200554132461548,
      "learning_rate": 9.501254652023613e-05,
      "loss": 2.9332,
      "step": 170372
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6102702617645264,
      "learning_rate": 9.5009559833004e-05,
      "loss": 2.9533,
      "step": 170373
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5411593914031982,
      "learning_rate": 9.500657318388338e-05,
      "loss": 2.8433,
      "step": 170374
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7141692638397217,
      "learning_rate": 9.500358657287501e-05,
      "loss": 2.9133,
      "step": 170375
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.5217671394348145,
      "learning_rate": 9.500059999997926e-05,
      "loss": 3.04,
      "step": 170376
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.912734508514404,
      "learning_rate": 9.499761346519691e-05,
      "loss": 3.0087,
      "step": 170377
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6790220737457275,
      "learning_rate": 9.499462696852821e-05,
      "loss": 3.037,
      "step": 170378
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.5691111087799072,
      "learning_rate": 9.499164050997398e-05,
      "loss": 2.8191,
      "step": 170379
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.0412681102752686,
      "learning_rate": 9.498865408953458e-05,
      "loss": 3.1029,
      "step": 170380
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.784487724304199,
      "learning_rate": 9.498566770721074e-05,
      "loss": 2.8184,
      "step": 170381
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.341114044189453,
      "learning_rate": 9.498268136300285e-05,
      "loss": 3.066,
      "step": 170382
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6480743885040283,
      "learning_rate": 9.497969505691174e-05,
      "loss": 3.0495,
      "step": 170383
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3613951206207275,
      "learning_rate": 9.497670878893757e-05,
      "loss": 2.9162,
      "step": 170384
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8030335903167725,
      "learning_rate": 9.497372255908121e-05,
      "loss": 3.2311,
      "step": 170385
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.99239444732666,
      "learning_rate": 9.497073636734303e-05,
      "loss": 2.9569,
      "step": 170386
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3103997707366943,
      "learning_rate": 9.496775021372372e-05,
      "loss": 3.2124,
      "step": 170387
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.099630355834961,
      "learning_rate": 9.496476409822368e-05,
      "loss": 2.6768,
      "step": 170388
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.1106598377227783,
      "learning_rate": 9.496177802084378e-05,
      "loss": 3.0576,
      "step": 170389
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.222419023513794,
      "learning_rate": 9.495879198158412e-05,
      "loss": 2.9729,
      "step": 170390
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.840291976928711,
      "learning_rate": 9.495580598044565e-05,
      "loss": 3.0732,
      "step": 170391
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4475107192993164,
      "learning_rate": 9.495282001742863e-05,
      "loss": 2.9712,
      "step": 170392
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.0436818599700928,
      "learning_rate": 9.494983409253387e-05,
      "loss": 3.0827,
      "step": 170393
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.029453992843628,
      "learning_rate": 9.494684820576166e-05,
      "loss": 2.9536,
      "step": 170394
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.545233726501465,
      "learning_rate": 9.494386235711293e-05,
      "loss": 3.017,
      "step": 170395
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.173921585083008,
      "learning_rate": 9.49408765465878e-05,
      "loss": 3.0054,
      "step": 170396
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.4089624881744385,
      "learning_rate": 9.493789077418715e-05,
      "loss": 2.8193,
      "step": 170397
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2422661781311035,
      "learning_rate": 9.49349050399113e-05,
      "loss": 2.9303,
      "step": 170398
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.94342041015625,
      "learning_rate": 9.493191934376105e-05,
      "loss": 2.7195,
      "step": 170399
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4458770751953125,
      "learning_rate": 9.492893368573671e-05,
      "loss": 3.03,
      "step": 170400
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.994509696960449,
      "learning_rate": 9.492594806583914e-05,
      "loss": 3.0195,
      "step": 170401
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5865561962127686,
      "learning_rate": 9.492296248406852e-05,
      "loss": 2.8542,
      "step": 170402
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8954696655273438,
      "learning_rate": 9.491997694042568e-05,
      "loss": 2.9705,
      "step": 170403
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6294844150543213,
      "learning_rate": 9.4916991434911e-05,
      "loss": 3.07,
      "step": 170404
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.56715989112854,
      "learning_rate": 9.491400596752523e-05,
      "loss": 2.9534,
      "step": 170405
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2459750175476074,
      "learning_rate": 9.491102053826869e-05,
      "loss": 3.0216,
      "step": 170406
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.712449312210083,
      "learning_rate": 9.490803514714228e-05,
      "loss": 3.0235,
      "step": 170407
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9410438537597656,
      "learning_rate": 9.490504979414614e-05,
      "loss": 2.8235,
      "step": 170408
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4402692317962646,
      "learning_rate": 9.490206447928112e-05,
      "loss": 2.6144,
      "step": 170409
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4396376609802246,
      "learning_rate": 9.489907920254758e-05,
      "loss": 2.7705,
      "step": 170410
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.504572629928589,
      "learning_rate": 9.489609396394626e-05,
      "loss": 2.9115,
      "step": 170411
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.825413227081299,
      "learning_rate": 9.489310876347754e-05,
      "loss": 2.6991,
      "step": 170412
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2279794216156006,
      "learning_rate": 9.489012360114226e-05,
      "loss": 2.9662,
      "step": 170413
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.512053966522217,
      "learning_rate": 9.48871384769406e-05,
      "loss": 2.7072,
      "step": 170414
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6695666313171387,
      "learning_rate": 9.488415339087336e-05,
      "loss": 3.0565,
      "step": 170415
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.872478723526001,
      "learning_rate": 9.488116834294092e-05,
      "loss": 3.1166,
      "step": 170416
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.019770860671997,
      "learning_rate": 9.487818333314408e-05,
      "loss": 2.8179,
      "step": 170417
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.416684150695801,
      "learning_rate": 9.487519836148314e-05,
      "loss": 3.043,
      "step": 170418
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.434952974319458,
      "learning_rate": 9.487221342795885e-05,
      "loss": 2.5961,
      "step": 170419
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.858231544494629,
      "learning_rate": 9.486922853257173e-05,
      "loss": 3.0929,
      "step": 170420
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5219075679779053,
      "learning_rate": 9.486624367532227e-05,
      "loss": 2.938,
      "step": 170421
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1499199867248535,
      "learning_rate": 9.486325885621095e-05,
      "loss": 2.9114,
      "step": 170422
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8907954692840576,
      "learning_rate": 9.486027407523851e-05,
      "loss": 2.6889,
      "step": 170423
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7530109882354736,
      "learning_rate": 9.485728933240535e-05,
      "loss": 2.9162,
      "step": 170424
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.887920618057251,
      "learning_rate": 9.485430462771216e-05,
      "loss": 2.9126,
      "step": 170425
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7050535678863525,
      "learning_rate": 9.485131996115945e-05,
      "loss": 2.9685,
      "step": 170426
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4305026531219482,
      "learning_rate": 9.484833533274762e-05,
      "loss": 2.5952,
      "step": 170427
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.822384834289551,
      "learning_rate": 9.48453507424775e-05,
      "loss": 2.8955,
      "step": 170428
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.953857898712158,
      "learning_rate": 9.484236619034946e-05,
      "loss": 2.9212,
      "step": 170429
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.922384023666382,
      "learning_rate": 9.483938167636401e-05,
      "loss": 3.0373,
      "step": 170430
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.357168674468994,
      "learning_rate": 9.483639720052192e-05,
      "loss": 2.9365,
      "step": 170431
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.162820339202881,
      "learning_rate": 9.483341276282357e-05,
      "loss": 2.734,
      "step": 170432
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.227019786834717,
      "learning_rate": 9.48304283632695e-05,
      "loss": 3.1133,
      "step": 170433
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6356680393218994,
      "learning_rate": 9.482744400186043e-05,
      "loss": 3.0771,
      "step": 170434
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.525351047515869,
      "learning_rate": 9.482445967859678e-05,
      "loss": 2.9398,
      "step": 170435
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4400851726531982,
      "learning_rate": 9.482147539347906e-05,
      "loss": 3.0159,
      "step": 170436
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.773582696914673,
      "learning_rate": 9.481849114650798e-05,
      "loss": 3.1175,
      "step": 170437
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5426242351531982,
      "learning_rate": 9.481550693768404e-05,
      "loss": 3.1266,
      "step": 170438
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8407366275787354,
      "learning_rate": 9.481252276700763e-05,
      "loss": 2.9795,
      "step": 170439
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.276853322982788,
      "learning_rate": 9.480953863447958e-05,
      "loss": 2.7763,
      "step": 170440
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.4338221549987793,
      "learning_rate": 9.480655454010023e-05,
      "loss": 3.0731,
      "step": 170441
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.720539093017578,
      "learning_rate": 9.480357048387027e-05,
      "loss": 3.0629,
      "step": 170442
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.205085515975952,
      "learning_rate": 9.480058646579021e-05,
      "loss": 2.9687,
      "step": 170443
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.25834059715271,
      "learning_rate": 9.479760248586059e-05,
      "loss": 2.9094,
      "step": 170444
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.521545171737671,
      "learning_rate": 9.47946185440819e-05,
      "loss": 2.9112,
      "step": 170445
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4918975830078125,
      "learning_rate": 9.479163464045483e-05,
      "loss": 3.0341,
      "step": 170446
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.4939942359924316,
      "learning_rate": 9.478865077497976e-05,
      "loss": 3.0378,
      "step": 170447
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3609540462493896,
      "learning_rate": 9.478566694765751e-05,
      "loss": 2.7229,
      "step": 170448
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2958602905273438,
      "learning_rate": 9.478268315848833e-05,
      "loss": 2.8543,
      "step": 170449
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.405531406402588,
      "learning_rate": 9.477969940747315e-05,
      "loss": 2.9389,
      "step": 170450
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.8889145851135254,
      "learning_rate": 9.477671569461206e-05,
      "loss": 2.9444,
      "step": 170451
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1263632774353027,
      "learning_rate": 9.477373201990599e-05,
      "loss": 3.0179,
      "step": 170452
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.580958366394043,
      "learning_rate": 9.477074838335523e-05,
      "loss": 2.9263,
      "step": 170453
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5599372386932373,
      "learning_rate": 9.476776478496059e-05,
      "loss": 2.974,
      "step": 170454
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.051290988922119,
      "learning_rate": 9.476478122472238e-05,
      "loss": 3.1221,
      "step": 170455
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.3880064487457275,
      "learning_rate": 9.476179770264146e-05,
      "loss": 3.1563,
      "step": 170456
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9617342948913574,
      "learning_rate": 9.475881421871802e-05,
      "loss": 3.0058,
      "step": 170457
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.804440975189209,
      "learning_rate": 9.475583077295288e-05,
      "loss": 2.9347,
      "step": 170458
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.738795518875122,
      "learning_rate": 9.475284736534642e-05,
      "loss": 2.9006,
      "step": 170459
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.917071580886841,
      "learning_rate": 9.474986399589937e-05,
      "loss": 2.7692,
      "step": 170460
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.074800968170166,
      "learning_rate": 9.47468806646121e-05,
      "loss": 2.8298,
      "step": 170461
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.314352512359619,
      "learning_rate": 9.474389737148543e-05,
      "loss": 2.8494,
      "step": 170462
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.131002187728882,
      "learning_rate": 9.474091411651956e-05,
      "loss": 2.852,
      "step": 170463
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6714048385620117,
      "learning_rate": 9.473793089971534e-05,
      "loss": 2.8956,
      "step": 170464
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.7053632736206055,
      "learning_rate": 9.473494772107309e-05,
      "loss": 2.9835,
      "step": 170465
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8214914798736572,
      "learning_rate": 9.473196458059362e-05,
      "loss": 3.0304,
      "step": 170466
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.732330322265625,
      "learning_rate": 9.472898147827722e-05,
      "loss": 2.8484,
      "step": 170467
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.5132853984832764,
      "learning_rate": 9.47259984141248e-05,
      "loss": 3.1144,
      "step": 170468
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.506882429122925,
      "learning_rate": 9.47230153881365e-05,
      "loss": 3.0707,
      "step": 170469
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4122140407562256,
      "learning_rate": 9.472003240031315e-05,
      "loss": 3.1724,
      "step": 170470
    },
    {
      "epoch": 2.22,
      "grad_norm": 5.7523651123046875,
      "learning_rate": 9.47170494506551e-05,
      "loss": 2.866,
      "step": 170471
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.2539660930633545,
      "learning_rate": 9.471406653916316e-05,
      "loss": 3.0226,
      "step": 170472
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.382486343383789,
      "learning_rate": 9.471108366583765e-05,
      "loss": 2.9505,
      "step": 170473
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8038489818573,
      "learning_rate": 9.47081008306794e-05,
      "loss": 3.0517,
      "step": 170474
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6660430431365967,
      "learning_rate": 9.470511803368861e-05,
      "loss": 2.8349,
      "step": 170475
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4760282039642334,
      "learning_rate": 9.470213527486611e-05,
      "loss": 3.0943,
      "step": 170476
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.7140676975250244,
      "learning_rate": 9.469915255421225e-05,
      "loss": 3.1582,
      "step": 170477
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.646507978439331,
      "learning_rate": 9.469616987172778e-05,
      "loss": 2.7746,
      "step": 170478
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8609397411346436,
      "learning_rate": 9.469318722741308e-05,
      "loss": 2.8455,
      "step": 170479
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.248321771621704,
      "learning_rate": 9.469020462126902e-05,
      "loss": 3.0683,
      "step": 170480
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.721940279006958,
      "learning_rate": 9.468722205329568e-05,
      "loss": 3.1491,
      "step": 170481
    },
    {
      "epoch": 2.22,
      "grad_norm": 5.686625003814697,
      "learning_rate": 9.468423952349399e-05,
      "loss": 2.7367,
      "step": 170482
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.1270222663879395,
      "learning_rate": 9.468125703186428e-05,
      "loss": 2.9281,
      "step": 170483
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5244781970977783,
      "learning_rate": 9.467827457840728e-05,
      "loss": 2.968,
      "step": 170484
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2113261222839355,
      "learning_rate": 9.467529216312339e-05,
      "loss": 2.7279,
      "step": 170485
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5867807865142822,
      "learning_rate": 9.46723097860133e-05,
      "loss": 3.0657,
      "step": 170486
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.499922275543213,
      "learning_rate": 9.466932744707753e-05,
      "loss": 2.8871,
      "step": 170487
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.331852436065674,
      "learning_rate": 9.466634514631663e-05,
      "loss": 3.0376,
      "step": 170488
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6975438594818115,
      "learning_rate": 9.466336288373097e-05,
      "loss": 2.9065,
      "step": 170489
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.331228256225586,
      "learning_rate": 9.466038065932142e-05,
      "loss": 3.0744,
      "step": 170490
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3429768085479736,
      "learning_rate": 9.465739847308826e-05,
      "loss": 3.05,
      "step": 170491
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4526965618133545,
      "learning_rate": 9.465441632503226e-05,
      "loss": 2.909,
      "step": 170492
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.051542282104492,
      "learning_rate": 9.46514342151539e-05,
      "loss": 2.854,
      "step": 170493
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4273033142089844,
      "learning_rate": 9.464845214345372e-05,
      "loss": 2.8507,
      "step": 170494
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.558978319168091,
      "learning_rate": 9.464547010993214e-05,
      "loss": 2.6919,
      "step": 170495
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8122172355651855,
      "learning_rate": 9.464248811458996e-05,
      "loss": 3.0463,
      "step": 170496
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.318842649459839,
      "learning_rate": 9.463950615742752e-05,
      "loss": 3.0733,
      "step": 170497
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.593031406402588,
      "learning_rate": 9.463652423844559e-05,
      "loss": 3.1362,
      "step": 170498
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6044363975524902,
      "learning_rate": 9.463354235764458e-05,
      "loss": 3.0194,
      "step": 170499
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.81451153755188,
      "learning_rate": 9.463056051502505e-05,
      "loss": 2.8443,
      "step": 170500
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7542898654937744,
      "learning_rate": 9.462757871058752e-05,
      "loss": 3.0965,
      "step": 170501
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.820063591003418,
      "learning_rate": 9.462459694433269e-05,
      "loss": 2.9008,
      "step": 170502
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.507103681564331,
      "learning_rate": 9.46216152162609e-05,
      "loss": 2.8532,
      "step": 170503
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.336275815963745,
      "learning_rate": 9.461863352637294e-05,
      "loss": 2.9034,
      "step": 170504
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4034101963043213,
      "learning_rate": 9.461565187466928e-05,
      "loss": 2.8689,
      "step": 170505
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1486527919769287,
      "learning_rate": 9.46126702611504e-05,
      "loss": 3.01,
      "step": 170506
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8384432792663574,
      "learning_rate": 9.460968868581683e-05,
      "loss": 2.8466,
      "step": 170507
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6397440433502197,
      "learning_rate": 9.460670714866929e-05,
      "loss": 2.7208,
      "step": 170508
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4356021881103516,
      "learning_rate": 9.460372564970814e-05,
      "loss": 3.0901,
      "step": 170509
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9748072624206543,
      "learning_rate": 9.460074418893415e-05,
      "loss": 3.0846,
      "step": 170510
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.107686758041382,
      "learning_rate": 9.459776276634773e-05,
      "loss": 2.7577,
      "step": 170511
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.0583109855651855,
      "learning_rate": 9.459478138194947e-05,
      "loss": 3.0201,
      "step": 170512
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.64894437789917,
      "learning_rate": 9.459180003573983e-05,
      "loss": 2.9357,
      "step": 170513
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9533209800720215,
      "learning_rate": 9.458881872771957e-05,
      "loss": 3.0485,
      "step": 170514
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8802425861358643,
      "learning_rate": 9.4585837457889e-05,
      "loss": 2.7376,
      "step": 170515
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.122295379638672,
      "learning_rate": 9.458285622624889e-05,
      "loss": 2.8135,
      "step": 170516
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6824400424957275,
      "learning_rate": 9.457987503279971e-05,
      "loss": 3.088,
      "step": 170517
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.832183837890625,
      "learning_rate": 9.457689387754193e-05,
      "loss": 2.8007,
      "step": 170518
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.505591630935669,
      "learning_rate": 9.457391276047626e-05,
      "loss": 3.0515,
      "step": 170519
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.2436630725860596,
      "learning_rate": 9.45709316816032e-05,
      "loss": 3.1175,
      "step": 170520
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.686371326446533,
      "learning_rate": 9.456795064092316e-05,
      "loss": 2.8321,
      "step": 170521
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.524933338165283,
      "learning_rate": 9.456496963843694e-05,
      "loss": 2.7523,
      "step": 170522
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.772690534591675,
      "learning_rate": 9.456198867414496e-05,
      "loss": 3.0018,
      "step": 170523
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.926016330718994,
      "learning_rate": 9.455900774804766e-05,
      "loss": 2.9889,
      "step": 170524
    },
    {
      "epoch": 2.22,
      "grad_norm": 5.034706115722656,
      "learning_rate": 9.455602686014586e-05,
      "loss": 3.0213,
      "step": 170525
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9467246532440186,
      "learning_rate": 9.455304601043984e-05,
      "loss": 2.8281,
      "step": 170526
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.667935609817505,
      "learning_rate": 9.45500651989304e-05,
      "loss": 2.8262,
      "step": 170527
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9840409755706787,
      "learning_rate": 9.454708442561799e-05,
      "loss": 2.8627,
      "step": 170528
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.28755521774292,
      "learning_rate": 9.454410369050312e-05,
      "loss": 2.9745,
      "step": 170529
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.926259756088257,
      "learning_rate": 9.454112299358633e-05,
      "loss": 2.7216,
      "step": 170530
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.370513916015625,
      "learning_rate": 9.453814233486828e-05,
      "loss": 3.0857,
      "step": 170531
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3995251655578613,
      "learning_rate": 9.453516171434938e-05,
      "loss": 2.7576,
      "step": 170532
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.038666725158691,
      "learning_rate": 9.453218113203039e-05,
      "loss": 2.9135,
      "step": 170533
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9215774536132812,
      "learning_rate": 9.452920058791171e-05,
      "loss": 3.0028,
      "step": 170534
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.02143931388855,
      "learning_rate": 9.452622008199398e-05,
      "loss": 3.1815,
      "step": 170535
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2277750968933105,
      "learning_rate": 9.452323961427755e-05,
      "loss": 3.0996,
      "step": 170536
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.920480251312256,
      "learning_rate": 9.452025918476326e-05,
      "loss": 2.8161,
      "step": 170537
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.729656219482422,
      "learning_rate": 9.451727879345145e-05,
      "loss": 3.1017,
      "step": 170538
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8124444484710693,
      "learning_rate": 9.45142984403428e-05,
      "loss": 3.0068,
      "step": 170539
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.675276756286621,
      "learning_rate": 9.451131812543778e-05,
      "loss": 3.0407,
      "step": 170540
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6373445987701416,
      "learning_rate": 9.450833784873714e-05,
      "loss": 2.8957,
      "step": 170541
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.842654228210449,
      "learning_rate": 9.450535761024113e-05,
      "loss": 2.9597,
      "step": 170542
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.418729543685913,
      "learning_rate": 9.450237740995051e-05,
      "loss": 3.015,
      "step": 170543
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5458266735076904,
      "learning_rate": 9.449939724786567e-05,
      "loss": 2.9136,
      "step": 170544
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6379244327545166,
      "learning_rate": 9.44964171239874e-05,
      "loss": 3.1445,
      "step": 170545
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.762603282928467,
      "learning_rate": 9.449343703831604e-05,
      "loss": 2.9753,
      "step": 170546
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.533057451248169,
      "learning_rate": 9.449045699085237e-05,
      "loss": 3.0076,
      "step": 170547
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.444122076034546,
      "learning_rate": 9.448747698159664e-05,
      "loss": 3.0531,
      "step": 170548
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.0084409713745117,
      "learning_rate": 9.448449701054969e-05,
      "loss": 3.1618,
      "step": 170549
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.763484001159668,
      "learning_rate": 9.44815170777118e-05,
      "loss": 2.7764,
      "step": 170550
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.683332920074463,
      "learning_rate": 9.447853718308381e-05,
      "loss": 2.8014,
      "step": 170551
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.772355318069458,
      "learning_rate": 9.447555732666602e-05,
      "loss": 3.0229,
      "step": 170552
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.514662265777588,
      "learning_rate": 9.447257750845925e-05,
      "loss": 3.0451,
      "step": 170553
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.417396306991577,
      "learning_rate": 9.446959772846386e-05,
      "loss": 2.9195,
      "step": 170554
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.2680671215057373,
      "learning_rate": 9.446661798668045e-05,
      "loss": 2.9029,
      "step": 170555
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6830623149871826,
      "learning_rate": 9.44636382831095e-05,
      "loss": 2.9382,
      "step": 170556
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9670157432556152,
      "learning_rate": 9.446065861775171e-05,
      "loss": 2.7237,
      "step": 170557
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8040549755096436,
      "learning_rate": 9.44576789906075e-05,
      "loss": 2.924,
      "step": 170558
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.381667137145996,
      "learning_rate": 9.445469940167758e-05,
      "loss": 2.9723,
      "step": 170559
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.0523228645324707,
      "learning_rate": 9.445171985096239e-05,
      "loss": 2.9816,
      "step": 170560
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.17936110496521,
      "learning_rate": 9.44487403384625e-05,
      "loss": 2.7807,
      "step": 170561
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5783495903015137,
      "learning_rate": 9.44457608641784e-05,
      "loss": 3.1225,
      "step": 170562
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.963221549987793,
      "learning_rate": 9.444278142811082e-05,
      "loss": 3.2073,
      "step": 170563
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5597126483917236,
      "learning_rate": 9.443980203026008e-05,
      "loss": 2.7315,
      "step": 170564
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.469754457473755,
      "learning_rate": 9.443682267062695e-05,
      "loss": 2.8273,
      "step": 170565
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.3877127170562744,
      "learning_rate": 9.443384334921194e-05,
      "loss": 2.6484,
      "step": 170566
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.571720838546753,
      "learning_rate": 9.443086406601552e-05,
      "loss": 2.833,
      "step": 170567
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9419469833374023,
      "learning_rate": 9.44278848210382e-05,
      "loss": 2.8752,
      "step": 170568
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.490616798400879,
      "learning_rate": 9.442490561428071e-05,
      "loss": 3.2,
      "step": 170569
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1270439624786377,
      "learning_rate": 9.442192644574341e-05,
      "loss": 2.9007,
      "step": 170570
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5006632804870605,
      "learning_rate": 9.441894731542705e-05,
      "loss": 2.8595,
      "step": 170571
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6877870559692383,
      "learning_rate": 9.44159682233321e-05,
      "loss": 2.9265,
      "step": 170572
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.593783140182495,
      "learning_rate": 9.441298916945907e-05,
      "loss": 3.0804,
      "step": 170573
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4050934314727783,
      "learning_rate": 9.441001015380848e-05,
      "loss": 3.0539,
      "step": 170574
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.63400936126709,
      "learning_rate": 9.440703117638105e-05,
      "loss": 2.9816,
      "step": 170575
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5269510746002197,
      "learning_rate": 9.440405223717713e-05,
      "loss": 2.6936,
      "step": 170576
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1775264739990234,
      "learning_rate": 9.440107333619747e-05,
      "loss": 2.9328,
      "step": 170577
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.704380989074707,
      "learning_rate": 9.439809447344254e-05,
      "loss": 2.9287,
      "step": 170578
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.915508985519409,
      "learning_rate": 9.439511564891287e-05,
      "loss": 2.9056,
      "step": 170579
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.158724546432495,
      "learning_rate": 9.439213686260895e-05,
      "loss": 3.054,
      "step": 170580
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4330015182495117,
      "learning_rate": 9.438915811453148e-05,
      "loss": 2.7966,
      "step": 170581
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5639591217041016,
      "learning_rate": 9.438617940468088e-05,
      "loss": 2.7635,
      "step": 170582
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.66281795501709,
      "learning_rate": 9.438320073305786e-05,
      "loss": 2.9491,
      "step": 170583
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8511271476745605,
      "learning_rate": 9.438022209966288e-05,
      "loss": 2.9315,
      "step": 170584
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8146469593048096,
      "learning_rate": 9.437724350449651e-05,
      "loss": 2.7553,
      "step": 170585
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.729524612426758,
      "learning_rate": 9.437426494755917e-05,
      "loss": 2.9469,
      "step": 170586
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8074352741241455,
      "learning_rate": 9.437128642885166e-05,
      "loss": 2.9992,
      "step": 170587
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.904661178588867,
      "learning_rate": 9.436830794837431e-05,
      "loss": 3.1012,
      "step": 170588
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.703530788421631,
      "learning_rate": 9.436532950612784e-05,
      "loss": 3.1378,
      "step": 170589
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6169815063476562,
      "learning_rate": 9.436235110211276e-05,
      "loss": 2.9355,
      "step": 170590
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.544468402862549,
      "learning_rate": 9.435937273632962e-05,
      "loss": 3.0752,
      "step": 170591
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6304898262023926,
      "learning_rate": 9.435639440877883e-05,
      "loss": 2.8451,
      "step": 170592
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5372705459594727,
      "learning_rate": 9.435341611946116e-05,
      "loss": 3.0024,
      "step": 170593
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.844167470932007,
      "learning_rate": 9.435043786837698e-05,
      "loss": 2.8061,
      "step": 170594
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4884657859802246,
      "learning_rate": 9.434745965552704e-05,
      "loss": 2.7281,
      "step": 170595
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.250723361968994,
      "learning_rate": 9.43444814809118e-05,
      "loss": 2.8499,
      "step": 170596
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.895118236541748,
      "learning_rate": 9.434150334453178e-05,
      "loss": 2.9873,
      "step": 170597
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1748008728027344,
      "learning_rate": 9.433852524638746e-05,
      "loss": 2.9641,
      "step": 170598
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.615074634552002,
      "learning_rate": 9.433554718647962e-05,
      "loss": 2.6836,
      "step": 170599
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.808135747909546,
      "learning_rate": 9.433256916480856e-05,
      "loss": 2.7106,
      "step": 170600
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2463884353637695,
      "learning_rate": 9.432959118137504e-05,
      "loss": 2.8606,
      "step": 170601
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.268343925476074,
      "learning_rate": 9.432661323617957e-05,
      "loss": 2.8308,
      "step": 170602
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9825806617736816,
      "learning_rate": 9.432363532922255e-05,
      "loss": 2.681,
      "step": 170603
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.205639123916626,
      "learning_rate": 9.432065746050473e-05,
      "loss": 2.939,
      "step": 170604
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.674572706222534,
      "learning_rate": 9.43176796300266e-05,
      "loss": 3.0176,
      "step": 170605
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.4211061000823975,
      "learning_rate": 9.431470183778862e-05,
      "loss": 2.903,
      "step": 170606
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.613123655319214,
      "learning_rate": 9.431172408379146e-05,
      "loss": 2.8571,
      "step": 170607
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.3736507892608643,
      "learning_rate": 9.430874636803569e-05,
      "loss": 2.7878,
      "step": 170608
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.165050506591797,
      "learning_rate": 9.430576869052166e-05,
      "loss": 3.0609,
      "step": 170609
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.891235113143921,
      "learning_rate": 9.430279105125021e-05,
      "loss": 2.7531,
      "step": 170610
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.224578380584717,
      "learning_rate": 9.429981345022164e-05,
      "loss": 3.0872,
      "step": 170611
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3478288650512695,
      "learning_rate": 9.429683588743671e-05,
      "loss": 3.1069,
      "step": 170612
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.499163866043091,
      "learning_rate": 9.429385836289588e-05,
      "loss": 3.0181,
      "step": 170613
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.338125228881836,
      "learning_rate": 9.429088087659972e-05,
      "loss": 2.907,
      "step": 170614
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.51767897605896,
      "learning_rate": 9.428790342854865e-05,
      "loss": 2.8259,
      "step": 170615
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.6911160945892334,
      "learning_rate": 9.428492601874345e-05,
      "loss": 2.8042,
      "step": 170616
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.127696990966797,
      "learning_rate": 9.428194864718448e-05,
      "loss": 3.003,
      "step": 170617
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.494412660598755,
      "learning_rate": 9.427897131387245e-05,
      "loss": 2.6688,
      "step": 170618
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.402723789215088,
      "learning_rate": 9.427599401880787e-05,
      "loss": 3.0109,
      "step": 170619
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.120480537414551,
      "learning_rate": 9.427301676199115e-05,
      "loss": 2.8303,
      "step": 170620
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.3990137577056885,
      "learning_rate": 9.427003954342306e-05,
      "loss": 2.8087,
      "step": 170621
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8305411338806152,
      "learning_rate": 9.426706236310406e-05,
      "loss": 2.9796,
      "step": 170622
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7670083045959473,
      "learning_rate": 9.426408522103459e-05,
      "loss": 2.9878,
      "step": 170623
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4852988719940186,
      "learning_rate": 9.426110811721543e-05,
      "loss": 3.0742,
      "step": 170624
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.8224382400512695,
      "learning_rate": 9.425813105164689e-05,
      "loss": 2.8649,
      "step": 170625
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7677180767059326,
      "learning_rate": 9.425515402432973e-05,
      "loss": 2.8808,
      "step": 170626
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.78596830368042,
      "learning_rate": 9.425217703526446e-05,
      "loss": 3.0309,
      "step": 170627
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.550595283508301,
      "learning_rate": 9.424920008445157e-05,
      "loss": 2.9974,
      "step": 170628
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.1822996139526367,
      "learning_rate": 9.424622317189155e-05,
      "loss": 2.6427,
      "step": 170629
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.1812286376953125,
      "learning_rate": 9.424324629758515e-05,
      "loss": 2.8836,
      "step": 170630
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.07405948638916,
      "learning_rate": 9.42402694615327e-05,
      "loss": 2.7146,
      "step": 170631
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4469993114471436,
      "learning_rate": 9.423729266373496e-05,
      "loss": 3.0059,
      "step": 170632
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5765767097473145,
      "learning_rate": 9.42343159041924e-05,
      "loss": 2.9522,
      "step": 170633
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.701555013656616,
      "learning_rate": 9.423133918290557e-05,
      "loss": 2.9155,
      "step": 170634
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.471985340118408,
      "learning_rate": 9.42283624998749e-05,
      "loss": 2.8722,
      "step": 170635
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6445064544677734,
      "learning_rate": 9.422538585510118e-05,
      "loss": 2.914,
      "step": 170636
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.194040298461914,
      "learning_rate": 9.422240924858473e-05,
      "loss": 2.9875,
      "step": 170637
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.393198251724243,
      "learning_rate": 9.421943268032633e-05,
      "loss": 2.9614,
      "step": 170638
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6999292373657227,
      "learning_rate": 9.421645615032642e-05,
      "loss": 3.0281,
      "step": 170639
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.757232189178467,
      "learning_rate": 9.421347965858558e-05,
      "loss": 3.0333,
      "step": 170640
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.485461950302124,
      "learning_rate": 9.42105032051042e-05,
      "loss": 2.9662,
      "step": 170641
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.0353829860687256,
      "learning_rate": 9.42075267898831e-05,
      "loss": 2.9985,
      "step": 170642
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.349978446960449,
      "learning_rate": 9.420455041292258e-05,
      "loss": 2.9624,
      "step": 170643
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.623844623565674,
      "learning_rate": 9.420157407422344e-05,
      "loss": 2.7151,
      "step": 170644
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.102616786956787,
      "learning_rate": 9.41985977737861e-05,
      "loss": 2.7276,
      "step": 170645
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4912471771240234,
      "learning_rate": 9.419562151161111e-05,
      "loss": 3.0908,
      "step": 170646
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2355339527130127,
      "learning_rate": 9.419264528769896e-05,
      "loss": 2.7506,
      "step": 170647
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.802277088165283,
      "learning_rate": 9.41896691020504e-05,
      "loss": 2.7698,
      "step": 170648
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.503066062927246,
      "learning_rate": 9.418669295466573e-05,
      "loss": 2.8541,
      "step": 170649
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.2461025714874268,
      "learning_rate": 9.418371684554575e-05,
      "loss": 2.9966,
      "step": 170650
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2213385105133057,
      "learning_rate": 9.418074077469093e-05,
      "loss": 2.9552,
      "step": 170651
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.6577541828155518,
      "learning_rate": 9.417776474210177e-05,
      "loss": 3.0519,
      "step": 170652
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.9227242469787598,
      "learning_rate": 9.417478874777873e-05,
      "loss": 2.8779,
      "step": 170653
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.2924089431762695,
      "learning_rate": 9.417181279172262e-05,
      "loss": 2.8624,
      "step": 170654
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.2989697456359863,
      "learning_rate": 9.416883687393371e-05,
      "loss": 2.849,
      "step": 170655
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6495532989501953,
      "learning_rate": 9.416586099441284e-05,
      "loss": 2.6806,
      "step": 170656
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7366116046905518,
      "learning_rate": 9.416288515316043e-05,
      "loss": 2.9947,
      "step": 170657
    },
    {
      "epoch": 2.22,
      "grad_norm": 5.028981685638428,
      "learning_rate": 9.4159909350177e-05,
      "loss": 2.9386,
      "step": 170658
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.560241222381592,
      "learning_rate": 9.415693358546302e-05,
      "loss": 3.0457,
      "step": 170659
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.821150779724121,
      "learning_rate": 9.415395785901927e-05,
      "loss": 2.8029,
      "step": 170660
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.536795139312744,
      "learning_rate": 9.415098217084606e-05,
      "loss": 2.9712,
      "step": 170661
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6900949478149414,
      "learning_rate": 9.414800652094419e-05,
      "loss": 2.9094,
      "step": 170662
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1045658588409424,
      "learning_rate": 9.414503090931409e-05,
      "loss": 2.8599,
      "step": 170663
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.180104970932007,
      "learning_rate": 9.414205533595631e-05,
      "loss": 2.8243,
      "step": 170664
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4138708114624023,
      "learning_rate": 9.413907980087132e-05,
      "loss": 2.9159,
      "step": 170665
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.80800199508667,
      "learning_rate": 9.413610430405985e-05,
      "loss": 2.784,
      "step": 170666
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.3358724117279053,
      "learning_rate": 9.413312884552225e-05,
      "loss": 3.0736,
      "step": 170667
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.598069429397583,
      "learning_rate": 9.413015342525931e-05,
      "loss": 2.7755,
      "step": 170668
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.4284207820892334,
      "learning_rate": 9.412717804327145e-05,
      "loss": 2.8987,
      "step": 170669
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.443418502807617,
      "learning_rate": 9.412420269955925e-05,
      "loss": 2.9413,
      "step": 170670
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4019806385040283,
      "learning_rate": 9.412122739412312e-05,
      "loss": 2.8891,
      "step": 170671
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7195701599121094,
      "learning_rate": 9.411825212696385e-05,
      "loss": 2.8926,
      "step": 170672
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.802579879760742,
      "learning_rate": 9.411527689808178e-05,
      "loss": 3.0051,
      "step": 170673
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5452637672424316,
      "learning_rate": 9.41123017074777e-05,
      "loss": 3.0327,
      "step": 170674
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4311320781707764,
      "learning_rate": 9.4109326555152e-05,
      "loss": 3.1042,
      "step": 170675
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.951807975769043,
      "learning_rate": 9.410635144110524e-05,
      "loss": 2.6848,
      "step": 170676
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.016308546066284,
      "learning_rate": 9.410337636533793e-05,
      "loss": 2.8973,
      "step": 170677
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.6327779293060303,
      "learning_rate": 9.41004013278508e-05,
      "loss": 3.0504,
      "step": 170678
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4637956619262695,
      "learning_rate": 9.409742632864416e-05,
      "loss": 2.9155,
      "step": 170679
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5021424293518066,
      "learning_rate": 9.409445136771882e-05,
      "loss": 3.0073,
      "step": 170680
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.599954128265381,
      "learning_rate": 9.409147644507518e-05,
      "loss": 2.7933,
      "step": 170681
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.0676074028015137,
      "learning_rate": 9.408850156071383e-05,
      "loss": 2.7746,
      "step": 170682
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.280536413192749,
      "learning_rate": 9.408552671463522e-05,
      "loss": 2.9685,
      "step": 170683
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1122000217437744,
      "learning_rate": 9.408255190684009e-05,
      "loss": 3.2178,
      "step": 170684
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.756422281265259,
      "learning_rate": 9.407957713732885e-05,
      "loss": 3.0078,
      "step": 170685
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.872173547744751,
      "learning_rate": 9.407660240610213e-05,
      "loss": 2.9085,
      "step": 170686
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4749202728271484,
      "learning_rate": 9.407362771316039e-05,
      "loss": 3.0606,
      "step": 170687
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6565444469451904,
      "learning_rate": 9.407065305850437e-05,
      "loss": 2.9034,
      "step": 170688
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3946008682250977,
      "learning_rate": 9.406767844213448e-05,
      "loss": 2.9895,
      "step": 170689
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6065549850463867,
      "learning_rate": 9.40647038640513e-05,
      "loss": 2.9145,
      "step": 170690
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1461265087127686,
      "learning_rate": 9.406172932425529e-05,
      "loss": 2.8901,
      "step": 170691
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.760043144226074,
      "learning_rate": 9.405875482274715e-05,
      "loss": 2.7777,
      "step": 170692
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9379944801330566,
      "learning_rate": 9.405578035952732e-05,
      "loss": 2.7269,
      "step": 170693
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.2970407009124756,
      "learning_rate": 9.40528059345965e-05,
      "loss": 2.9109,
      "step": 170694
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.822375297546387,
      "learning_rate": 9.404983154795515e-05,
      "loss": 3.08,
      "step": 170695
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.731971263885498,
      "learning_rate": 9.404685719960379e-05,
      "loss": 2.9055,
      "step": 170696
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3238525390625,
      "learning_rate": 9.404388288954295e-05,
      "loss": 2.9971,
      "step": 170697
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4894661903381348,
      "learning_rate": 9.404090861777332e-05,
      "loss": 3.1026,
      "step": 170698
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.41434383392334,
      "learning_rate": 9.403793438429527e-05,
      "loss": 3.065,
      "step": 170699
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.7145159244537354,
      "learning_rate": 9.403496018910958e-05,
      "loss": 2.9965,
      "step": 170700
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.1826703548431396,
      "learning_rate": 9.403198603221665e-05,
      "loss": 2.8888,
      "step": 170701
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1524972915649414,
      "learning_rate": 9.402901191361697e-05,
      "loss": 2.8693,
      "step": 170702
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.249534845352173,
      "learning_rate": 9.402603783331128e-05,
      "loss": 2.8013,
      "step": 170703
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.58678936958313,
      "learning_rate": 9.402306379130006e-05,
      "loss": 2.9741,
      "step": 170704
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.42763352394104,
      "learning_rate": 9.402008978758372e-05,
      "loss": 3.3627,
      "step": 170705
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8762357234954834,
      "learning_rate": 9.401711582216304e-05,
      "loss": 2.8008,
      "step": 170706
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8135249614715576,
      "learning_rate": 9.401414189503847e-05,
      "loss": 2.7819,
      "step": 170707
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.705578565597534,
      "learning_rate": 9.401116800621044e-05,
      "loss": 3.1079,
      "step": 170708
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.3540046215057373,
      "learning_rate": 9.400819415567975e-05,
      "loss": 2.9002,
      "step": 170709
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6005325317382812,
      "learning_rate": 9.400522034344671e-05,
      "loss": 2.9383,
      "step": 170710
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.2134320735931396,
      "learning_rate": 9.400224656951208e-05,
      "loss": 2.886,
      "step": 170711
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9122262001037598,
      "learning_rate": 9.399927283387635e-05,
      "loss": 2.9071,
      "step": 170712
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.25101637840271,
      "learning_rate": 9.399629913654001e-05,
      "loss": 2.9979,
      "step": 170713
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.675302267074585,
      "learning_rate": 9.399332547750357e-05,
      "loss": 3.1577,
      "step": 170714
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6319737434387207,
      "learning_rate": 9.399035185676775e-05,
      "loss": 2.8939,
      "step": 170715
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6292450428009033,
      "learning_rate": 9.39873782743329e-05,
      "loss": 2.8499,
      "step": 170716
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.1704492568969727,
      "learning_rate": 9.398440473019981e-05,
      "loss": 2.8183,
      "step": 170717
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2239930629730225,
      "learning_rate": 9.398143122436888e-05,
      "loss": 3.2068,
      "step": 170718
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5182573795318604,
      "learning_rate": 9.397845775684072e-05,
      "loss": 2.7072,
      "step": 170719
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8267927169799805,
      "learning_rate": 9.397548432761574e-05,
      "loss": 3.0334,
      "step": 170720
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2838101387023926,
      "learning_rate": 9.39725109366947e-05,
      "loss": 3.2144,
      "step": 170721
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6801199913024902,
      "learning_rate": 9.396953758407798e-05,
      "loss": 2.9156,
      "step": 170722
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7203071117401123,
      "learning_rate": 9.396656426976629e-05,
      "loss": 2.8756,
      "step": 170723
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5023579597473145,
      "learning_rate": 9.396359099376012e-05,
      "loss": 2.9844,
      "step": 170724
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.141350746154785,
      "learning_rate": 9.396061775606001e-05,
      "loss": 2.755,
      "step": 170725
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.5674145221710205,
      "learning_rate": 9.395764455666639e-05,
      "loss": 3.0916,
      "step": 170726
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.556025266647339,
      "learning_rate": 9.395467139558006e-05,
      "loss": 2.9223,
      "step": 170727
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.660212993621826,
      "learning_rate": 9.395169827280132e-05,
      "loss": 3.0711,
      "step": 170728
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.013641834259033,
      "learning_rate": 9.394872518833098e-05,
      "loss": 2.8276,
      "step": 170729
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.912773370742798,
      "learning_rate": 9.394575214216941e-05,
      "loss": 3.0981,
      "step": 170730
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3187997341156006,
      "learning_rate": 9.394277913431725e-05,
      "loss": 2.6543,
      "step": 170731
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.4579153060913086,
      "learning_rate": 9.393980616477493e-05,
      "loss": 2.9776,
      "step": 170732
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.898136615753174,
      "learning_rate": 9.393683323354317e-05,
      "loss": 2.8641,
      "step": 170733
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.3973562717437744,
      "learning_rate": 9.393386034062234e-05,
      "loss": 2.7808,
      "step": 170734
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.453277826309204,
      "learning_rate": 9.393088748601319e-05,
      "loss": 2.5544,
      "step": 170735
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1077914237976074,
      "learning_rate": 9.392791466971619e-05,
      "loss": 3.1002,
      "step": 170736
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.645249605178833,
      "learning_rate": 9.392494189173188e-05,
      "loss": 3.0286,
      "step": 170737
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.760795831680298,
      "learning_rate": 9.392196915206071e-05,
      "loss": 2.9137,
      "step": 170738
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.538412094116211,
      "learning_rate": 9.391899645070345e-05,
      "loss": 2.9771,
      "step": 170739
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8466362953186035,
      "learning_rate": 9.391602378766043e-05,
      "loss": 3.0212,
      "step": 170740
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.855541229248047,
      "learning_rate": 9.391305116293239e-05,
      "loss": 3.2065,
      "step": 170741
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.579251527786255,
      "learning_rate": 9.391007857651983e-05,
      "loss": 3.1347,
      "step": 170742
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.414102077484131,
      "learning_rate": 9.390710602842326e-05,
      "loss": 2.8786,
      "step": 170743
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1118533611297607,
      "learning_rate": 9.390413351864318e-05,
      "loss": 3.0008,
      "step": 170744
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.78812837600708,
      "learning_rate": 9.390116104718028e-05,
      "loss": 2.9111,
      "step": 170745
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.807359457015991,
      "learning_rate": 9.389818861403495e-05,
      "loss": 2.8875,
      "step": 170746
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.637260675430298,
      "learning_rate": 9.389521621920795e-05,
      "loss": 2.9187,
      "step": 170747
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.414613723754883,
      "learning_rate": 9.389224386269969e-05,
      "loss": 2.9336,
      "step": 170748
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.429433822631836,
      "learning_rate": 9.388927154451078e-05,
      "loss": 2.9503,
      "step": 170749
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4760806560516357,
      "learning_rate": 9.388629926464165e-05,
      "loss": 3.1402,
      "step": 170750
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2109625339508057,
      "learning_rate": 9.388332702309304e-05,
      "loss": 3.0243,
      "step": 170751
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.29276442527771,
      "learning_rate": 9.38803548198653e-05,
      "loss": 3.109,
      "step": 170752
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2822437286376953,
      "learning_rate": 9.387738265495921e-05,
      "loss": 2.9985,
      "step": 170753
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.793466806411743,
      "learning_rate": 9.38744105283751e-05,
      "loss": 3.0918,
      "step": 170754
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.1571593284606934,
      "learning_rate": 9.38714384401138e-05,
      "loss": 3.0731,
      "step": 170755
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4834930896759033,
      "learning_rate": 9.38684663901755e-05,
      "loss": 3.048,
      "step": 170756
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.277689218521118,
      "learning_rate": 9.386549437856106e-05,
      "loss": 2.8766,
      "step": 170757
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.056460380554199,
      "learning_rate": 9.386252240527083e-05,
      "loss": 3.029,
      "step": 170758
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.753868579864502,
      "learning_rate": 9.385955047030551e-05,
      "loss": 3.0,
      "step": 170759
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.611114025115967,
      "learning_rate": 9.385657857366553e-05,
      "loss": 2.8111,
      "step": 170760
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.792912483215332,
      "learning_rate": 9.385360671535169e-05,
      "loss": 3.0526,
      "step": 170761
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5773813724517822,
      "learning_rate": 9.385063489536412e-05,
      "loss": 3.1193,
      "step": 170762
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.377011775970459,
      "learning_rate": 9.384766311370377e-05,
      "loss": 2.7633,
      "step": 170763
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5232059955596924,
      "learning_rate": 9.38446913703709e-05,
      "loss": 2.6808,
      "step": 170764
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4954159259796143,
      "learning_rate": 9.384171966536629e-05,
      "loss": 3.0147,
      "step": 170765
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2681941986083984,
      "learning_rate": 9.38387479986903e-05,
      "loss": 2.8395,
      "step": 170766
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6464242935180664,
      "learning_rate": 9.383577637034376e-05,
      "loss": 2.9445,
      "step": 170767
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.80517840385437,
      "learning_rate": 9.383280478032687e-05,
      "loss": 2.9863,
      "step": 170768
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3425514698028564,
      "learning_rate": 9.382983322864042e-05,
      "loss": 3.1614,
      "step": 170769
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5946998596191406,
      "learning_rate": 9.382686171528485e-05,
      "loss": 2.8538,
      "step": 170770
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8375132083892822,
      "learning_rate": 9.382389024026081e-05,
      "loss": 2.9899,
      "step": 170771
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6244943141937256,
      "learning_rate": 9.382091880356872e-05,
      "loss": 3.0267,
      "step": 170772
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.515486240386963,
      "learning_rate": 9.381794740520931e-05,
      "loss": 2.8054,
      "step": 170773
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.647529363632202,
      "learning_rate": 9.381497604518302e-05,
      "loss": 2.7517,
      "step": 170774
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.8428385257720947,
      "learning_rate": 9.381200472349043e-05,
      "loss": 3.0465,
      "step": 170775
    },
    {
      "epoch": 2.22,
      "grad_norm": 5.203271865844727,
      "learning_rate": 9.380903344013201e-05,
      "loss": 2.8531,
      "step": 170776
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6080093383789062,
      "learning_rate": 9.380606219510845e-05,
      "loss": 2.5847,
      "step": 170777
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.973378896713257,
      "learning_rate": 9.380309098842014e-05,
      "loss": 3.0064,
      "step": 170778
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.757089614868164,
      "learning_rate": 9.380011982006782e-05,
      "loss": 3.1062,
      "step": 170779
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.078617572784424,
      "learning_rate": 9.379714869005197e-05,
      "loss": 2.7428,
      "step": 170780
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.3326876163482666,
      "learning_rate": 9.379417759837309e-05,
      "loss": 2.8194,
      "step": 170781
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3144683837890625,
      "learning_rate": 9.37912065450317e-05,
      "loss": 3.0753,
      "step": 170782
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.1124510765075684,
      "learning_rate": 9.378823553002848e-05,
      "loss": 2.9839,
      "step": 170783
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.3535823822021484,
      "learning_rate": 9.378526455336387e-05,
      "loss": 2.9745,
      "step": 170784
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2677438259124756,
      "learning_rate": 9.378229361503855e-05,
      "loss": 2.9471,
      "step": 170785
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.4411568641662598,
      "learning_rate": 9.377932271505296e-05,
      "loss": 2.8977,
      "step": 170786
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3351047039031982,
      "learning_rate": 9.377635185340763e-05,
      "loss": 2.8819,
      "step": 170787
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4853360652923584,
      "learning_rate": 9.377338103010325e-05,
      "loss": 2.9095,
      "step": 170788
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9988441467285156,
      "learning_rate": 9.377041024514031e-05,
      "loss": 3.0047,
      "step": 170789
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.288630962371826,
      "learning_rate": 9.376743949851921e-05,
      "loss": 2.9813,
      "step": 170790
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4756155014038086,
      "learning_rate": 9.376446879024078e-05,
      "loss": 2.9964,
      "step": 170791
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.839277505874634,
      "learning_rate": 9.37614981203054e-05,
      "loss": 2.7991,
      "step": 170792
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4750287532806396,
      "learning_rate": 9.375852748871355e-05,
      "loss": 3.0226,
      "step": 170793
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.73643159866333,
      "learning_rate": 9.375555689546597e-05,
      "loss": 2.9315,
      "step": 170794
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5123331546783447,
      "learning_rate": 9.375258634056314e-05,
      "loss": 2.9451,
      "step": 170795
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.50777006149292,
      "learning_rate": 9.37496158240055e-05,
      "loss": 2.9133,
      "step": 170796
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.60664701461792,
      "learning_rate": 9.37466453457938e-05,
      "loss": 2.92,
      "step": 170797
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5852925777435303,
      "learning_rate": 9.37436749059285e-05,
      "loss": 2.9251,
      "step": 170798
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.556710720062256,
      "learning_rate": 9.374070450441003e-05,
      "loss": 2.9147,
      "step": 170799
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4851138591766357,
      "learning_rate": 9.373773414123917e-05,
      "loss": 2.8784,
      "step": 170800
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1015822887420654,
      "learning_rate": 9.373476381641627e-05,
      "loss": 3.2341,
      "step": 170801
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6119840145111084,
      "learning_rate": 9.373179352994202e-05,
      "loss": 2.7519,
      "step": 170802
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9080276489257812,
      "learning_rate": 9.372882328181697e-05,
      "loss": 3.0566,
      "step": 170803
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.030001401901245,
      "learning_rate": 9.37258530720416e-05,
      "loss": 2.6726,
      "step": 170804
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1280434131622314,
      "learning_rate": 9.37228829006164e-05,
      "loss": 3.0455,
      "step": 170805
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9900500774383545,
      "learning_rate": 9.37199127675421e-05,
      "loss": 3.0564,
      "step": 170806
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.83746337890625,
      "learning_rate": 9.371694267281907e-05,
      "loss": 2.9474,
      "step": 170807
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.0159924030303955,
      "learning_rate": 9.371397261644806e-05,
      "loss": 2.9906,
      "step": 170808
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.654330015182495,
      "learning_rate": 9.371100259842951e-05,
      "loss": 2.6902,
      "step": 170809
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.614267587661743,
      "learning_rate": 9.370803261876398e-05,
      "loss": 3.1111,
      "step": 170810
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5445079803466797,
      "learning_rate": 9.370506267745193e-05,
      "loss": 2.9851,
      "step": 170811
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3343067169189453,
      "learning_rate": 9.370209277449409e-05,
      "loss": 2.9226,
      "step": 170812
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.877930164337158,
      "learning_rate": 9.369912290989083e-05,
      "loss": 2.752,
      "step": 170813
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.190943717956543,
      "learning_rate": 9.36961530836429e-05,
      "loss": 3.0098,
      "step": 170814
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5188889503479004,
      "learning_rate": 9.369318329575073e-05,
      "loss": 3.1358,
      "step": 170815
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6091468334198,
      "learning_rate": 9.369021354621491e-05,
      "loss": 3.1113,
      "step": 170816
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8043265342712402,
      "learning_rate": 9.368724383503587e-05,
      "loss": 2.9859,
      "step": 170817
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.4690797328948975,
      "learning_rate": 9.368427416221435e-05,
      "loss": 2.8811,
      "step": 170818
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6066064834594727,
      "learning_rate": 9.368130452775075e-05,
      "loss": 2.993,
      "step": 170819
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.906681776046753,
      "learning_rate": 9.367833493164577e-05,
      "loss": 3.1238,
      "step": 170820
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.825713634490967,
      "learning_rate": 9.36753653738998e-05,
      "loss": 2.8957,
      "step": 170821
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.55808687210083,
      "learning_rate": 9.367239585451364e-05,
      "loss": 3.0281,
      "step": 170822
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.1972529888153076,
      "learning_rate": 9.366942637348752e-05,
      "loss": 2.8058,
      "step": 170823
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.2504990100860596,
      "learning_rate": 9.366645693082222e-05,
      "loss": 2.9133,
      "step": 170824
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.015906572341919,
      "learning_rate": 9.366348752651811e-05,
      "loss": 2.9007,
      "step": 170825
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.5485711097717285,
      "learning_rate": 9.366051816057598e-05,
      "loss": 2.6986,
      "step": 170826
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9294779300689697,
      "learning_rate": 9.365754883299613e-05,
      "loss": 3.1875,
      "step": 170827
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7363181114196777,
      "learning_rate": 9.365457954377946e-05,
      "loss": 3.0321,
      "step": 170828
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.5992560386657715,
      "learning_rate": 9.36516102929261e-05,
      "loss": 2.7887,
      "step": 170829
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.884641408920288,
      "learning_rate": 9.364864108043689e-05,
      "loss": 2.8449,
      "step": 170830
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.6302411556243896,
      "learning_rate": 9.36456719063122e-05,
      "loss": 3.0854,
      "step": 170831
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.717569589614868,
      "learning_rate": 9.364270277055274e-05,
      "loss": 2.9448,
      "step": 170832
    },
    {
      "epoch": 2.22,
      "grad_norm": 7.828237056732178,
      "learning_rate": 9.363973367315892e-05,
      "loss": 2.9589,
      "step": 170833
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4039416313171387,
      "learning_rate": 9.363676461413158e-05,
      "loss": 3.0663,
      "step": 170834
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.455883264541626,
      "learning_rate": 9.363379559347087e-05,
      "loss": 2.7963,
      "step": 170835
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.3621864318847656,
      "learning_rate": 9.363082661117762e-05,
      "loss": 3.021,
      "step": 170836
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.858045816421509,
      "learning_rate": 9.362785766725218e-05,
      "loss": 3.1292,
      "step": 170837
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.7938477993011475,
      "learning_rate": 9.36248887616953e-05,
      "loss": 2.9188,
      "step": 170838
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.7961461544036865,
      "learning_rate": 9.362191989450741e-05,
      "loss": 3.0635,
      "step": 170839
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4284393787384033,
      "learning_rate": 9.361895106568927e-05,
      "loss": 2.921,
      "step": 170840
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.798511028289795,
      "learning_rate": 9.361598227524105e-05,
      "loss": 3.0011,
      "step": 170841
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6478514671325684,
      "learning_rate": 9.361301352316361e-05,
      "loss": 3.0118,
      "step": 170842
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.8770694732666016,
      "learning_rate": 9.361004480945731e-05,
      "loss": 2.8117,
      "step": 170843
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9230589866638184,
      "learning_rate": 9.360707613412293e-05,
      "loss": 2.8862,
      "step": 170844
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.66994047164917,
      "learning_rate": 9.360410749716077e-05,
      "loss": 3.0439,
      "step": 170845
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4952707290649414,
      "learning_rate": 9.36011388985717e-05,
      "loss": 3.0254,
      "step": 170846
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8767619132995605,
      "learning_rate": 9.359817033835588e-05,
      "loss": 2.9468,
      "step": 170847
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.490199565887451,
      "learning_rate": 9.359520181651414e-05,
      "loss": 2.8447,
      "step": 170848
    },
    {
      "epoch": 2.22,
      "grad_norm": 4.548164367675781,
      "learning_rate": 9.359223333304688e-05,
      "loss": 2.8134,
      "step": 170849
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.561741828918457,
      "learning_rate": 9.35892648879548e-05,
      "loss": 3.1219,
      "step": 170850
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3100457191467285,
      "learning_rate": 9.358629648123828e-05,
      "loss": 2.8287,
      "step": 170851
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8253071308135986,
      "learning_rate": 9.358332811289815e-05,
      "loss": 2.8467,
      "step": 170852
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4691858291625977,
      "learning_rate": 9.358035978293459e-05,
      "loss": 2.9539,
      "step": 170853
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7121055126190186,
      "learning_rate": 9.357739149134843e-05,
      "loss": 3.0714,
      "step": 170854
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.18237042427063,
      "learning_rate": 9.357442323814003e-05,
      "loss": 2.8795,
      "step": 170855
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.22651743888855,
      "learning_rate": 9.357145502331014e-05,
      "loss": 2.9904,
      "step": 170856
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7756903171539307,
      "learning_rate": 9.356848684685913e-05,
      "loss": 2.7598,
      "step": 170857
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.6242055892944336,
      "learning_rate": 9.35655187087878e-05,
      "loss": 2.9206,
      "step": 170858
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.0629358291625977,
      "learning_rate": 9.356255060909634e-05,
      "loss": 2.8963,
      "step": 170859
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3480329513549805,
      "learning_rate": 9.355958254778562e-05,
      "loss": 2.9179,
      "step": 170860
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.2012791633605957,
      "learning_rate": 9.355661452485599e-05,
      "loss": 2.9102,
      "step": 170861
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.872936725616455,
      "learning_rate": 9.355364654030815e-05,
      "loss": 2.9462,
      "step": 170862
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.521596908569336,
      "learning_rate": 9.355067859414249e-05,
      "loss": 2.9056,
      "step": 170863
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3519461154937744,
      "learning_rate": 9.354771068635975e-05,
      "loss": 2.942,
      "step": 170864
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.3070340156555176,
      "learning_rate": 9.354474281696042e-05,
      "loss": 2.9311,
      "step": 170865
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.84216046333313,
      "learning_rate": 9.354177498594497e-05,
      "loss": 3.0344,
      "step": 170866
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.748103141784668,
      "learning_rate": 9.353880719331392e-05,
      "loss": 2.8532,
      "step": 170867
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.327486991882324,
      "learning_rate": 9.353583943906801e-05,
      "loss": 2.9914,
      "step": 170868
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.898275852203369,
      "learning_rate": 9.353287172320759e-05,
      "loss": 2.9754,
      "step": 170869
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6423447132110596,
      "learning_rate": 9.352990404573342e-05,
      "loss": 3.0456,
      "step": 170870
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.9462661743164062,
      "learning_rate": 9.352693640664591e-05,
      "loss": 3.0249,
      "step": 170871
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.3551316261291504,
      "learning_rate": 9.352396880594553e-05,
      "loss": 2.7095,
      "step": 170872
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.6979081630706787,
      "learning_rate": 9.352100124363309e-05,
      "loss": 2.9604,
      "step": 170873
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.3693783283233643,
      "learning_rate": 9.351803371970896e-05,
      "loss": 2.9574,
      "step": 170874
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.590925455093384,
      "learning_rate": 9.351506623417362e-05,
      "loss": 2.7943,
      "step": 170875
    },
    {
      "epoch": 2.22,
      "grad_norm": 3.120368003845215,
      "learning_rate": 9.351209878702783e-05,
      "loss": 3.299,
      "step": 170876
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.7629756927490234,
      "learning_rate": 9.350913137827203e-05,
      "loss": 2.7322,
      "step": 170877
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.4633843898773193,
      "learning_rate": 9.350616400790671e-05,
      "loss": 2.8593,
      "step": 170878
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.449115514755249,
      "learning_rate": 9.350319667593258e-05,
      "loss": 2.8323,
      "step": 170879
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.472835063934326,
      "learning_rate": 9.35002293823501e-05,
      "loss": 3.1696,
      "step": 170880
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.39811372756958,
      "learning_rate": 9.349726212715974e-05,
      "loss": 2.7928,
      "step": 170881
    },
    {
      "epoch": 2.22,
      "grad_norm": 2.8623392581939697,
      "learning_rate": 9.349429491036221e-05,
      "loss": 2.8679,
      "step": 170882
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4777140617370605,
      "learning_rate": 9.3491327731958e-05,
      "loss": 2.8173,
      "step": 170883
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.586749792098999,
      "learning_rate": 9.348836059194755e-05,
      "loss": 2.9027,
      "step": 170884
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.702967882156372,
      "learning_rate": 9.34853934903316e-05,
      "loss": 2.87,
      "step": 170885
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.600327968597412,
      "learning_rate": 9.348242642711055e-05,
      "loss": 3.1361,
      "step": 170886
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.493502616882324,
      "learning_rate": 9.34794594022851e-05,
      "loss": 2.964,
      "step": 170887
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.339596271514893,
      "learning_rate": 9.347649241585569e-05,
      "loss": 2.7762,
      "step": 170888
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3494443893432617,
      "learning_rate": 9.347352546782293e-05,
      "loss": 2.8637,
      "step": 170889
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.499854564666748,
      "learning_rate": 9.347055855818722e-05,
      "loss": 2.886,
      "step": 170890
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.3687188625335693,
      "learning_rate": 9.346759168694933e-05,
      "loss": 3.0975,
      "step": 170891
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3752405643463135,
      "learning_rate": 9.346462485410964e-05,
      "loss": 3.0752,
      "step": 170892
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.8370766639709473,
      "learning_rate": 9.346165805966885e-05,
      "loss": 2.9407,
      "step": 170893
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8934078216552734,
      "learning_rate": 9.345869130362735e-05,
      "loss": 2.8135,
      "step": 170894
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.1033525466918945,
      "learning_rate": 9.345572458598597e-05,
      "loss": 2.863,
      "step": 170895
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8283302783966064,
      "learning_rate": 9.345275790674486e-05,
      "loss": 3.05,
      "step": 170896
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5373544692993164,
      "learning_rate": 9.344979126590492e-05,
      "loss": 2.9297,
      "step": 170897
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.352815628051758,
      "learning_rate": 9.34468246634664e-05,
      "loss": 2.904,
      "step": 170898
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.800128221511841,
      "learning_rate": 9.34438580994302e-05,
      "loss": 2.7657,
      "step": 170899
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5751774311065674,
      "learning_rate": 9.344089157379652e-05,
      "loss": 2.9146,
      "step": 170900
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6569838523864746,
      "learning_rate": 9.343792508656629e-05,
      "loss": 2.8098,
      "step": 170901
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.528266668319702,
      "learning_rate": 9.343495863773964e-05,
      "loss": 2.8892,
      "step": 170902
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5315093994140625,
      "learning_rate": 9.343199222731745e-05,
      "loss": 2.9759,
      "step": 170903
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.30582332611084,
      "learning_rate": 9.342902585530005e-05,
      "loss": 3.053,
      "step": 170904
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.508903741836548,
      "learning_rate": 9.342605952168818e-05,
      "loss": 2.7566,
      "step": 170905
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.774282693862915,
      "learning_rate": 9.342309322648221e-05,
      "loss": 2.9636,
      "step": 170906
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.256199359893799,
      "learning_rate": 9.3420126969683e-05,
      "loss": 2.9612,
      "step": 170907
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7096428871154785,
      "learning_rate": 9.341716075129069e-05,
      "loss": 3.0613,
      "step": 170908
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.108722448348999,
      "learning_rate": 9.341419457130609e-05,
      "loss": 2.9793,
      "step": 170909
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.8967740535736084,
      "learning_rate": 9.341122842972963e-05,
      "loss": 2.7587,
      "step": 170910
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.034022569656372,
      "learning_rate": 9.340826232656203e-05,
      "loss": 2.9222,
      "step": 170911
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7942402362823486,
      "learning_rate": 9.340529626180359e-05,
      "loss": 2.7296,
      "step": 170912
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5046887397766113,
      "learning_rate": 9.340233023545523e-05,
      "loss": 3.0107,
      "step": 170913
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4760663509368896,
      "learning_rate": 9.339936424751707e-05,
      "loss": 2.8927,
      "step": 170914
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5263631343841553,
      "learning_rate": 9.339639829798999e-05,
      "loss": 2.8671,
      "step": 170915
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.548067092895508,
      "learning_rate": 9.339343238687429e-05,
      "loss": 2.9885,
      "step": 170916
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.5255303382873535,
      "learning_rate": 9.339046651417076e-05,
      "loss": 3.0499,
      "step": 170917
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.1579699516296387,
      "learning_rate": 9.338750067987976e-05,
      "loss": 2.9279,
      "step": 170918
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.763595104217529,
      "learning_rate": 9.338453488400212e-05,
      "loss": 3.0744,
      "step": 170919
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.3129079341888428,
      "learning_rate": 9.338156912653796e-05,
      "loss": 2.8452,
      "step": 170920
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.1628334522247314,
      "learning_rate": 9.33786034074882e-05,
      "loss": 3.0047,
      "step": 170921
    },
    {
      "epoch": 2.23,
      "grad_norm": 6.857908725738525,
      "learning_rate": 9.337563772685316e-05,
      "loss": 2.9828,
      "step": 170922
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.749133586883545,
      "learning_rate": 9.337267208463357e-05,
      "loss": 3.0496,
      "step": 170923
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5407960414886475,
      "learning_rate": 9.336970648082983e-05,
      "loss": 2.8923,
      "step": 170924
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9751486778259277,
      "learning_rate": 9.336674091544274e-05,
      "loss": 2.8348,
      "step": 170925
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.670417308807373,
      "learning_rate": 9.336377538847244e-05,
      "loss": 2.6932,
      "step": 170926
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.482997179031372,
      "learning_rate": 9.336080989991986e-05,
      "loss": 2.9533,
      "step": 170927
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7685489654541016,
      "learning_rate": 9.335784444978532e-05,
      "loss": 2.748,
      "step": 170928
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.521369457244873,
      "learning_rate": 9.335487903806954e-05,
      "loss": 2.8184,
      "step": 170929
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.314142942428589,
      "learning_rate": 9.335191366477288e-05,
      "loss": 2.9667,
      "step": 170930
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3823208808898926,
      "learning_rate": 9.334894832989617e-05,
      "loss": 2.7558,
      "step": 170931
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.010098695755005,
      "learning_rate": 9.334598303343965e-05,
      "loss": 2.8143,
      "step": 170932
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2429134845733643,
      "learning_rate": 9.334301777540408e-05,
      "loss": 2.8716,
      "step": 170933
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.2815890312194824,
      "learning_rate": 9.334005255578986e-05,
      "loss": 2.9485,
      "step": 170934
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.443229913711548,
      "learning_rate": 9.333708737459771e-05,
      "loss": 3.0152,
      "step": 170935
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8377201557159424,
      "learning_rate": 9.3334122231828e-05,
      "loss": 2.9187,
      "step": 170936
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.102325439453125,
      "learning_rate": 9.333115712748158e-05,
      "loss": 2.8109,
      "step": 170937
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3488378524780273,
      "learning_rate": 9.332819206155858e-05,
      "loss": 2.7231,
      "step": 170938
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8479738235473633,
      "learning_rate": 9.332522703405989e-05,
      "loss": 3.0534,
      "step": 170939
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.911195755004883,
      "learning_rate": 9.332226204498586e-05,
      "loss": 2.8671,
      "step": 170940
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4274117946624756,
      "learning_rate": 9.33192970943372e-05,
      "loss": 2.6605,
      "step": 170941
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.505277633666992,
      "learning_rate": 9.33163321821143e-05,
      "loss": 2.6606,
      "step": 170942
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.345891237258911,
      "learning_rate": 9.331336730831795e-05,
      "loss": 2.8708,
      "step": 170943
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3055121898651123,
      "learning_rate": 9.331040247294837e-05,
      "loss": 3.0482,
      "step": 170944
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.36782169342041,
      "learning_rate": 9.330743767600642e-05,
      "loss": 3.227,
      "step": 170945
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5041775703430176,
      "learning_rate": 9.33044729174924e-05,
      "loss": 3.1403,
      "step": 170946
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.170217990875244,
      "learning_rate": 9.330150819740707e-05,
      "loss": 2.8189,
      "step": 170947
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.415504217147827,
      "learning_rate": 9.32985435157508e-05,
      "loss": 2.9122,
      "step": 170948
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.360249996185303,
      "learning_rate": 9.329557887252432e-05,
      "loss": 3.03,
      "step": 170949
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.012538433074951,
      "learning_rate": 9.32926142677281e-05,
      "loss": 2.9299,
      "step": 170950
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.97455096244812,
      "learning_rate": 9.328964970136272e-05,
      "loss": 3.0604,
      "step": 170951
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0657260417938232,
      "learning_rate": 9.328668517342855e-05,
      "loss": 2.7358,
      "step": 170952
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.2340564727783203,
      "learning_rate": 9.32837206839264e-05,
      "loss": 3.1728,
      "step": 170953
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0138344764709473,
      "learning_rate": 9.328075623285661e-05,
      "loss": 2.7937,
      "step": 170954
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.892915725708008,
      "learning_rate": 9.327779182021991e-05,
      "loss": 3.0754,
      "step": 170955
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0974080562591553,
      "learning_rate": 9.32748274460168e-05,
      "loss": 3.037,
      "step": 170956
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.320855140686035,
      "learning_rate": 9.327186311024779e-05,
      "loss": 2.5298,
      "step": 170957
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7469890117645264,
      "learning_rate": 9.326889881291337e-05,
      "loss": 3.0779,
      "step": 170958
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.835695266723633,
      "learning_rate": 9.326593455401422e-05,
      "loss": 2.9332,
      "step": 170959
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5819883346557617,
      "learning_rate": 9.326297033355076e-05,
      "loss": 2.8957,
      "step": 170960
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.003176689147949,
      "learning_rate": 9.32600061515237e-05,
      "loss": 2.8795,
      "step": 170961
    },
    {
      "epoch": 2.23,
      "grad_norm": 6.047349452972412,
      "learning_rate": 9.325704200793353e-05,
      "loss": 2.9518,
      "step": 170962
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9074106216430664,
      "learning_rate": 9.325407790278066e-05,
      "loss": 3.0048,
      "step": 170963
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.106638193130493,
      "learning_rate": 9.325111383606586e-05,
      "loss": 2.8808,
      "step": 170964
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4420766830444336,
      "learning_rate": 9.324814980778961e-05,
      "loss": 2.9577,
      "step": 170965
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6453096866607666,
      "learning_rate": 9.324518581795231e-05,
      "loss": 2.9046,
      "step": 170966
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0069992542266846,
      "learning_rate": 9.324222186655472e-05,
      "loss": 2.8023,
      "step": 170967
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.5338900089263916,
      "learning_rate": 9.323925795359732e-05,
      "loss": 3.1641,
      "step": 170968
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.240224599838257,
      "learning_rate": 9.323629407908055e-05,
      "loss": 2.8116,
      "step": 170969
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6689202785491943,
      "learning_rate": 9.323333024300515e-05,
      "loss": 3.0017,
      "step": 170970
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6734745502471924,
      "learning_rate": 9.323036644537146e-05,
      "loss": 2.825,
      "step": 170971
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.241156578063965,
      "learning_rate": 9.322740268618027e-05,
      "loss": 2.9458,
      "step": 170972
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.9432215690612793,
      "learning_rate": 9.322443896543198e-05,
      "loss": 2.9408,
      "step": 170973
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.672957420349121,
      "learning_rate": 9.322147528312721e-05,
      "loss": 2.7523,
      "step": 170974
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5222198963165283,
      "learning_rate": 9.321851163926633e-05,
      "loss": 2.8008,
      "step": 170975
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.3687193393707275,
      "learning_rate": 9.321554803385015e-05,
      "loss": 3.3578,
      "step": 170976
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5258145332336426,
      "learning_rate": 9.321258446687903e-05,
      "loss": 3.0492,
      "step": 170977
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8375465869903564,
      "learning_rate": 9.320962093835364e-05,
      "loss": 3.1193,
      "step": 170978
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.688062906265259,
      "learning_rate": 9.320665744827455e-05,
      "loss": 3.0445,
      "step": 170979
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.192660093307495,
      "learning_rate": 9.32036939966422e-05,
      "loss": 3.0721,
      "step": 170980
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9908788204193115,
      "learning_rate": 9.320073058345708e-05,
      "loss": 3.0676,
      "step": 170981
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5215556621551514,
      "learning_rate": 9.319776720871998e-05,
      "loss": 2.7503,
      "step": 170982
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4452054500579834,
      "learning_rate": 9.31948038724312e-05,
      "loss": 2.8916,
      "step": 170983
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.987851619720459,
      "learning_rate": 9.319184057459152e-05,
      "loss": 2.9105,
      "step": 170984
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.602494955062866,
      "learning_rate": 9.318887731520126e-05,
      "loss": 3.0104,
      "step": 170985
    },
    {
      "epoch": 2.23,
      "grad_norm": 6.0364274978637695,
      "learning_rate": 9.31859140942613e-05,
      "loss": 2.7868,
      "step": 170986
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.6922993659973145,
      "learning_rate": 9.318295091177179e-05,
      "loss": 2.9362,
      "step": 170987
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.270242214202881,
      "learning_rate": 9.317998776773358e-05,
      "loss": 2.8188,
      "step": 170988
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5026345252990723,
      "learning_rate": 9.317702466214703e-05,
      "loss": 2.9009,
      "step": 170989
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.587590456008911,
      "learning_rate": 9.317406159501284e-05,
      "loss": 2.8549,
      "step": 170990
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8523502349853516,
      "learning_rate": 9.317109856633139e-05,
      "loss": 3.0979,
      "step": 170991
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3669614791870117,
      "learning_rate": 9.316813557610355e-05,
      "loss": 2.85,
      "step": 170992
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0059409141540527,
      "learning_rate": 9.316517262432948e-05,
      "loss": 3.013,
      "step": 170993
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.1756107807159424,
      "learning_rate": 9.316220971100999e-05,
      "loss": 2.8976,
      "step": 170994
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3915092945098877,
      "learning_rate": 9.315924683614544e-05,
      "loss": 3.0664,
      "step": 170995
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.444631338119507,
      "learning_rate": 9.31562839997366e-05,
      "loss": 2.7544,
      "step": 170996
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.3018319606781006,
      "learning_rate": 9.315332120178382e-05,
      "loss": 2.9414,
      "step": 170997
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3651695251464844,
      "learning_rate": 9.315035844228794e-05,
      "loss": 2.9486,
      "step": 170998
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5936450958251953,
      "learning_rate": 9.314739572124911e-05,
      "loss": 3.1263,
      "step": 170999
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.487163543701172,
      "learning_rate": 9.314443303866817e-05,
      "loss": 2.8329,
      "step": 171000
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.382727861404419,
      "learning_rate": 9.314147039454551e-05,
      "loss": 2.9771,
      "step": 171001
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.1595423221588135,
      "learning_rate": 9.313850778888183e-05,
      "loss": 3.0941,
      "step": 171002
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6685643196105957,
      "learning_rate": 9.313554522167753e-05,
      "loss": 3.0084,
      "step": 171003
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.706296443939209,
      "learning_rate": 9.313258269293342e-05,
      "loss": 2.8341,
      "step": 171004
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4762778282165527,
      "learning_rate": 9.312962020264966e-05,
      "loss": 2.8594,
      "step": 171005
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.625393867492676,
      "learning_rate": 9.312665775082714e-05,
      "loss": 3.2529,
      "step": 171006
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.15517520904541,
      "learning_rate": 9.312369533746615e-05,
      "loss": 2.9595,
      "step": 171007
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.619020700454712,
      "learning_rate": 9.312073296256747e-05,
      "loss": 3.1381,
      "step": 171008
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7977681159973145,
      "learning_rate": 9.311777062613148e-05,
      "loss": 3.0819,
      "step": 171009
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.522165536880493,
      "learning_rate": 9.311480832815897e-05,
      "loss": 2.8691,
      "step": 171010
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.362126111984253,
      "learning_rate": 9.311184606865014e-05,
      "loss": 3.0013,
      "step": 171011
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.675649642944336,
      "learning_rate": 9.310888384760583e-05,
      "loss": 2.9364,
      "step": 171012
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.149880886077881,
      "learning_rate": 9.310592166502636e-05,
      "loss": 2.8675,
      "step": 171013
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5862674713134766,
      "learning_rate": 9.31029595209125e-05,
      "loss": 3.0571,
      "step": 171014
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4412453174591064,
      "learning_rate": 9.309999741526464e-05,
      "loss": 3.0101,
      "step": 171015
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8449859619140625,
      "learning_rate": 9.309703534808357e-05,
      "loss": 2.7986,
      "step": 171016
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6534554958343506,
      "learning_rate": 9.309407331936947e-05,
      "loss": 2.752,
      "step": 171017
    },
    {
      "epoch": 2.23,
      "grad_norm": 5.489902973175049,
      "learning_rate": 9.309111132912319e-05,
      "loss": 2.8713,
      "step": 171018
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.717726230621338,
      "learning_rate": 9.308814937734508e-05,
      "loss": 3.1186,
      "step": 171019
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.991137504577637,
      "learning_rate": 9.308518746403589e-05,
      "loss": 3.0991,
      "step": 171020
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.491178512573242,
      "learning_rate": 9.308222558919597e-05,
      "loss": 2.8865,
      "step": 171021
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.548614740371704,
      "learning_rate": 9.307926375282607e-05,
      "loss": 3.0356,
      "step": 171022
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0134546756744385,
      "learning_rate": 9.307630195492664e-05,
      "loss": 2.796,
      "step": 171023
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.2008957862854,
      "learning_rate": 9.307334019549823e-05,
      "loss": 3.0319,
      "step": 171024
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.917729616165161,
      "learning_rate": 9.307037847454131e-05,
      "loss": 2.9973,
      "step": 171025
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.577981948852539,
      "learning_rate": 9.306741679205658e-05,
      "loss": 3.0009,
      "step": 171026
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.755265235900879,
      "learning_rate": 9.306445514804445e-05,
      "loss": 2.7972,
      "step": 171027
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.168381929397583,
      "learning_rate": 9.306149354250563e-05,
      "loss": 2.9817,
      "step": 171028
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.076692819595337,
      "learning_rate": 9.305853197544059e-05,
      "loss": 3.0248,
      "step": 171029
    },
    {
      "epoch": 2.23,
      "grad_norm": 5.46965217590332,
      "learning_rate": 9.305557044684985e-05,
      "loss": 2.8348,
      "step": 171030
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.766969680786133,
      "learning_rate": 9.305260895673392e-05,
      "loss": 2.9077,
      "step": 171031
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.267502784729004,
      "learning_rate": 9.304964750509351e-05,
      "loss": 3.0845,
      "step": 171032
    },
    {
      "epoch": 2.23,
      "grad_norm": 5.372495651245117,
      "learning_rate": 9.304668609192897e-05,
      "loss": 2.8444,
      "step": 171033
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6031975746154785,
      "learning_rate": 9.304372471724108e-05,
      "loss": 2.9734,
      "step": 171034
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.904684543609619,
      "learning_rate": 9.304076338103023e-05,
      "loss": 3.0569,
      "step": 171035
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5601000785827637,
      "learning_rate": 9.303780208329703e-05,
      "loss": 2.8736,
      "step": 171036
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3563997745513916,
      "learning_rate": 9.303484082404192e-05,
      "loss": 2.904,
      "step": 171037
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4613144397735596,
      "learning_rate": 9.30318796032656e-05,
      "loss": 2.969,
      "step": 171038
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.6139450073242188,
      "learning_rate": 9.30289184209685e-05,
      "loss": 2.6969,
      "step": 171039
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.243623971939087,
      "learning_rate": 9.302595727715132e-05,
      "loss": 2.8905,
      "step": 171040
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6627466678619385,
      "learning_rate": 9.302299617181452e-05,
      "loss": 2.7146,
      "step": 171041
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.094444751739502,
      "learning_rate": 9.302003510495864e-05,
      "loss": 3.0237,
      "step": 171042
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6410372257232666,
      "learning_rate": 9.301707407658415e-05,
      "loss": 2.8586,
      "step": 171043
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.455148458480835,
      "learning_rate": 9.301411308669181e-05,
      "loss": 2.8816,
      "step": 171044
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5636751651763916,
      "learning_rate": 9.301115213528195e-05,
      "loss": 3.1951,
      "step": 171045
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.284844160079956,
      "learning_rate": 9.300819122235531e-05,
      "loss": 2.9307,
      "step": 171046
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.436135768890381,
      "learning_rate": 9.300523034791236e-05,
      "loss": 2.8203,
      "step": 171047
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.731480598449707,
      "learning_rate": 9.300226951195354e-05,
      "loss": 3.2533,
      "step": 171048
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.727282762527466,
      "learning_rate": 9.299930871447962e-05,
      "loss": 2.9592,
      "step": 171049
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5481600761413574,
      "learning_rate": 9.2996347955491e-05,
      "loss": 2.7912,
      "step": 171050
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.320903778076172,
      "learning_rate": 9.29933872349882e-05,
      "loss": 2.8114,
      "step": 171051
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7396750450134277,
      "learning_rate": 9.299042655297191e-05,
      "loss": 2.8512,
      "step": 171052
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.444584608078003,
      "learning_rate": 9.298746590944265e-05,
      "loss": 3.0386,
      "step": 171053
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6500966548919678,
      "learning_rate": 9.29845053044008e-05,
      "loss": 2.9873,
      "step": 171054
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6988933086395264,
      "learning_rate": 9.298154473784715e-05,
      "loss": 2.8595,
      "step": 171055
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7022058963775635,
      "learning_rate": 9.297858420978201e-05,
      "loss": 2.8097,
      "step": 171056
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.381572723388672,
      "learning_rate": 9.297562372020619e-05,
      "loss": 3.1366,
      "step": 171057
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4359054565429688,
      "learning_rate": 9.297266326912009e-05,
      "loss": 2.8336,
      "step": 171058
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9680023193359375,
      "learning_rate": 9.296970285652428e-05,
      "loss": 3.1817,
      "step": 171059
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8587541580200195,
      "learning_rate": 9.296674248241923e-05,
      "loss": 2.8127,
      "step": 171060
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2680931091308594,
      "learning_rate": 9.296378214680565e-05,
      "loss": 3.1557,
      "step": 171061
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5707147121429443,
      "learning_rate": 9.296082184968392e-05,
      "loss": 2.9217,
      "step": 171062
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.47884202003479,
      "learning_rate": 9.295786159105479e-05,
      "loss": 3.0706,
      "step": 171063
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.2449543476104736,
      "learning_rate": 9.29549013709187e-05,
      "loss": 2.8,
      "step": 171064
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0810840129852295,
      "learning_rate": 9.29519411892762e-05,
      "loss": 2.7437,
      "step": 171065
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.583369731903076,
      "learning_rate": 9.294898104612774e-05,
      "loss": 3.0668,
      "step": 171066
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.515886068344116,
      "learning_rate": 9.294602094147407e-05,
      "loss": 2.8388,
      "step": 171067
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.379232168197632,
      "learning_rate": 9.294306087531552e-05,
      "loss": 2.8562,
      "step": 171068
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.67183518409729,
      "learning_rate": 9.294010084765292e-05,
      "loss": 3.0218,
      "step": 171069
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7867109775543213,
      "learning_rate": 9.29371408584865e-05,
      "loss": 2.8104,
      "step": 171070
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8148422241210938,
      "learning_rate": 9.29341809078172e-05,
      "loss": 2.9983,
      "step": 171071
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4497337341308594,
      "learning_rate": 9.293122099564517e-05,
      "loss": 2.9732,
      "step": 171072
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.632185459136963,
      "learning_rate": 9.292826112197122e-05,
      "loss": 2.903,
      "step": 171073
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.385982036590576,
      "learning_rate": 9.292530128679569e-05,
      "loss": 3.1534,
      "step": 171074
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6407971382141113,
      "learning_rate": 9.292234149011937e-05,
      "loss": 2.9444,
      "step": 171075
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5935885906219482,
      "learning_rate": 9.291938173194263e-05,
      "loss": 3.0593,
      "step": 171076
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9464986324310303,
      "learning_rate": 9.291642201226625e-05,
      "loss": 3.0566,
      "step": 171077
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7609190940856934,
      "learning_rate": 9.291346233109041e-05,
      "loss": 2.6269,
      "step": 171078
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.463625192642212,
      "learning_rate": 9.291050268841598e-05,
      "loss": 3.1595,
      "step": 171079
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.8130147457122803,
      "learning_rate": 9.29075430842433e-05,
      "loss": 3.1482,
      "step": 171080
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.2963664531707764,
      "learning_rate": 9.29045835185731e-05,
      "loss": 2.8615,
      "step": 171081
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.03319787979126,
      "learning_rate": 9.290162399140577e-05,
      "loss": 3.1219,
      "step": 171082
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6307342052459717,
      "learning_rate": 9.289866450274214e-05,
      "loss": 3.1405,
      "step": 171083
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.217839241027832,
      "learning_rate": 9.289570505258234e-05,
      "loss": 2.9234,
      "step": 171084
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.923412799835205,
      "learning_rate": 9.289274564092723e-05,
      "loss": 2.8978,
      "step": 171085
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.4066624641418457,
      "learning_rate": 9.28897862677772e-05,
      "loss": 2.8137,
      "step": 171086
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3911755084991455,
      "learning_rate": 9.288682693313294e-05,
      "loss": 2.9174,
      "step": 171087
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.265900135040283,
      "learning_rate": 9.288386763699483e-05,
      "loss": 2.9136,
      "step": 171088
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.231525182723999,
      "learning_rate": 9.288090837936362e-05,
      "loss": 2.7175,
      "step": 171089
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.024263858795166,
      "learning_rate": 9.287794916023977e-05,
      "loss": 3.1789,
      "step": 171090
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.087103843688965,
      "learning_rate": 9.287498997962382e-05,
      "loss": 2.9337,
      "step": 171091
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.6528947353363037,
      "learning_rate": 9.28720308375162e-05,
      "loss": 2.9327,
      "step": 171092
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9447433948516846,
      "learning_rate": 9.286907173391768e-05,
      "loss": 2.781,
      "step": 171093
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.731468200683594,
      "learning_rate": 9.286611266882867e-05,
      "loss": 2.8661,
      "step": 171094
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.698659658432007,
      "learning_rate": 9.28631536422498e-05,
      "loss": 2.9583,
      "step": 171095
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.46319580078125,
      "learning_rate": 9.286019465418157e-05,
      "loss": 3.3743,
      "step": 171096
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.646973133087158,
      "learning_rate": 9.285723570462456e-05,
      "loss": 2.6684,
      "step": 171097
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.89142107963562,
      "learning_rate": 9.28542767935792e-05,
      "loss": 2.8344,
      "step": 171098
    },
    {
      "epoch": 2.23,
      "grad_norm": 5.40973424911499,
      "learning_rate": 9.285131792104626e-05,
      "loss": 2.8152,
      "step": 171099
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.423450469970703,
      "learning_rate": 9.284835908702605e-05,
      "loss": 2.828,
      "step": 171100
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5605101585388184,
      "learning_rate": 9.284540029151933e-05,
      "loss": 2.9152,
      "step": 171101
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.108395576477051,
      "learning_rate": 9.284244153452658e-05,
      "loss": 3.0724,
      "step": 171102
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.80391526222229,
      "learning_rate": 9.283948281604833e-05,
      "loss": 3.1785,
      "step": 171103
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.566908121109009,
      "learning_rate": 9.2836524136085e-05,
      "loss": 3.1295,
      "step": 171104
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9303231239318848,
      "learning_rate": 9.28335654946374e-05,
      "loss": 2.7128,
      "step": 171105
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.33884859085083,
      "learning_rate": 9.283060689170585e-05,
      "loss": 3.0649,
      "step": 171106
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.629652261734009,
      "learning_rate": 9.282764832729109e-05,
      "loss": 2.9942,
      "step": 171107
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7817959785461426,
      "learning_rate": 9.28246898013936e-05,
      "loss": 2.7011,
      "step": 171108
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.492560625076294,
      "learning_rate": 9.282173131401387e-05,
      "loss": 3.0188,
      "step": 171109
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.4128410816192627,
      "learning_rate": 9.281877286515243e-05,
      "loss": 2.7887,
      "step": 171110
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3583016395568848,
      "learning_rate": 9.281581445480999e-05,
      "loss": 2.9936,
      "step": 171111
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.610340118408203,
      "learning_rate": 9.281285608298688e-05,
      "loss": 3.0864,
      "step": 171112
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4232640266418457,
      "learning_rate": 9.280989774968388e-05,
      "loss": 2.9864,
      "step": 171113
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.103025197982788,
      "learning_rate": 9.280693945490144e-05,
      "loss": 2.9997,
      "step": 171114
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.3189313411712646,
      "learning_rate": 9.280398119864009e-05,
      "loss": 2.8634,
      "step": 171115
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.675510883331299,
      "learning_rate": 9.280102298090029e-05,
      "loss": 2.8092,
      "step": 171116
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.4059135913848877,
      "learning_rate": 9.27980648016828e-05,
      "loss": 2.9188,
      "step": 171117
    },
    {
      "epoch": 2.23,
      "grad_norm": 5.105245590209961,
      "learning_rate": 9.279510666098797e-05,
      "loss": 3.1453,
      "step": 171118
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5191025733947754,
      "learning_rate": 9.279214855881652e-05,
      "loss": 2.8437,
      "step": 171119
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4586880207061768,
      "learning_rate": 9.278919049516892e-05,
      "loss": 3.0328,
      "step": 171120
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7923524379730225,
      "learning_rate": 9.278623247004571e-05,
      "loss": 2.8473,
      "step": 171121
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4391372203826904,
      "learning_rate": 9.278327448344739e-05,
      "loss": 3.0607,
      "step": 171122
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.798732280731201,
      "learning_rate": 9.278031653537464e-05,
      "loss": 2.8343,
      "step": 171123
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.31429386138916,
      "learning_rate": 9.277735862582788e-05,
      "loss": 3.1667,
      "step": 171124
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4390857219696045,
      "learning_rate": 9.277440075480776e-05,
      "loss": 2.7924,
      "step": 171125
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0425171852111816,
      "learning_rate": 9.277144292231483e-05,
      "loss": 2.8346,
      "step": 171126
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.41847825050354,
      "learning_rate": 9.276848512834962e-05,
      "loss": 2.9292,
      "step": 171127
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.530193567276001,
      "learning_rate": 9.276552737291251e-05,
      "loss": 2.8749,
      "step": 171128
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.443577289581299,
      "learning_rate": 9.276256965600431e-05,
      "loss": 2.6924,
      "step": 171129
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.986985683441162,
      "learning_rate": 9.27596119776254e-05,
      "loss": 3.0737,
      "step": 171130
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8661997318267822,
      "learning_rate": 9.275665433777647e-05,
      "loss": 3.0405,
      "step": 171131
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8273942470550537,
      "learning_rate": 9.275369673645798e-05,
      "loss": 2.9114,
      "step": 171132
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4471499919891357,
      "learning_rate": 9.275073917367037e-05,
      "loss": 3.1272,
      "step": 171133
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2885353565216064,
      "learning_rate": 9.274778164941445e-05,
      "loss": 2.989,
      "step": 171134
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2643544673919678,
      "learning_rate": 9.274482416369063e-05,
      "loss": 2.9279,
      "step": 171135
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0476396083831787,
      "learning_rate": 9.274186671649931e-05,
      "loss": 2.8883,
      "step": 171136
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.57582426071167,
      "learning_rate": 9.273890930784135e-05,
      "loss": 2.5565,
      "step": 171137
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.979971408843994,
      "learning_rate": 9.273595193771709e-05,
      "loss": 3.2359,
      "step": 171138
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7704734802246094,
      "learning_rate": 9.273299460612704e-05,
      "loss": 2.9059,
      "step": 171139
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9258172512054443,
      "learning_rate": 9.273003731307197e-05,
      "loss": 2.9597,
      "step": 171140
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.444164752960205,
      "learning_rate": 9.272708005855227e-05,
      "loss": 2.7493,
      "step": 171141
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.725428819656372,
      "learning_rate": 9.272412284256842e-05,
      "loss": 3.0489,
      "step": 171142
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2455029487609863,
      "learning_rate": 9.272116566512117e-05,
      "loss": 2.9441,
      "step": 171143
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4267570972442627,
      "learning_rate": 9.271820852621099e-05,
      "loss": 2.864,
      "step": 171144
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.228846311569214,
      "learning_rate": 9.271525142583829e-05,
      "loss": 3.0123,
      "step": 171145
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.2566092014312744,
      "learning_rate": 9.271229436400382e-05,
      "loss": 3.0033,
      "step": 171146
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6540400981903076,
      "learning_rate": 9.270933734070796e-05,
      "loss": 2.9254,
      "step": 171147
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4205257892608643,
      "learning_rate": 9.270638035595148e-05,
      "loss": 2.809,
      "step": 171148
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.0518102645874023,
      "learning_rate": 9.270342340973477e-05,
      "loss": 3.1053,
      "step": 171149
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8810935020446777,
      "learning_rate": 9.27004665020584e-05,
      "loss": 3.0626,
      "step": 171150
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.706589698791504,
      "learning_rate": 9.269750963292281e-05,
      "loss": 2.8081,
      "step": 171151
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.414137601852417,
      "learning_rate": 9.269455280232878e-05,
      "loss": 2.7619,
      "step": 171152
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6863019466400146,
      "learning_rate": 9.269159601027668e-05,
      "loss": 2.8781,
      "step": 171153
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7424561977386475,
      "learning_rate": 9.268863925676718e-05,
      "loss": 2.7287,
      "step": 171154
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.312927007675171,
      "learning_rate": 9.268568254180072e-05,
      "loss": 3.2367,
      "step": 171155
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.480701208114624,
      "learning_rate": 9.268272586537795e-05,
      "loss": 3.1421,
      "step": 171156
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.699842691421509,
      "learning_rate": 9.267976922749943e-05,
      "loss": 2.8175,
      "step": 171157
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8589119911193848,
      "learning_rate": 9.267681262816561e-05,
      "loss": 2.8653,
      "step": 171158
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7570230960845947,
      "learning_rate": 9.267385606737702e-05,
      "loss": 3.038,
      "step": 171159
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.742173910140991,
      "learning_rate": 9.267089954513436e-05,
      "loss": 2.9741,
      "step": 171160
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.463033676147461,
      "learning_rate": 9.266794306143797e-05,
      "loss": 2.9086,
      "step": 171161
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2861170768737793,
      "learning_rate": 9.266498661628863e-05,
      "loss": 2.8838,
      "step": 171162
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.777766227722168,
      "learning_rate": 9.26620302096868e-05,
      "loss": 2.9606,
      "step": 171163
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.310147762298584,
      "learning_rate": 9.265907384163301e-05,
      "loss": 2.8353,
      "step": 171164
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9629693031311035,
      "learning_rate": 9.265611751212772e-05,
      "loss": 3.0415,
      "step": 171165
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7281525135040283,
      "learning_rate": 9.265316122117165e-05,
      "loss": 3.0416,
      "step": 171166
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7284255027770996,
      "learning_rate": 9.26502049687652e-05,
      "loss": 2.7093,
      "step": 171167
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.2707228660583496,
      "learning_rate": 9.264724875490907e-05,
      "loss": 2.8915,
      "step": 171168
    },
    {
      "epoch": 2.23,
      "grad_norm": 6.610934734344482,
      "learning_rate": 9.264429257960372e-05,
      "loss": 2.9497,
      "step": 171169
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.973818302154541,
      "learning_rate": 9.264133644284973e-05,
      "loss": 3.0784,
      "step": 171170
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.1978564262390137,
      "learning_rate": 9.263838034464754e-05,
      "loss": 2.6512,
      "step": 171171
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5482168197631836,
      "learning_rate": 9.263542428499785e-05,
      "loss": 3.0698,
      "step": 171172
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.023026704788208,
      "learning_rate": 9.263246826390109e-05,
      "loss": 2.6618,
      "step": 171173
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5721328258514404,
      "learning_rate": 9.262951228135794e-05,
      "loss": 3.0193,
      "step": 171174
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.3363025188446045,
      "learning_rate": 9.262655633736889e-05,
      "loss": 2.7989,
      "step": 171175
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0338571071624756,
      "learning_rate": 9.262360043193448e-05,
      "loss": 2.9763,
      "step": 171176
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7730839252471924,
      "learning_rate": 9.262064456505514e-05,
      "loss": 2.8921,
      "step": 171177
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9678356647491455,
      "learning_rate": 9.261768873673168e-05,
      "loss": 2.9403,
      "step": 171178
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2018609046936035,
      "learning_rate": 9.261473294696437e-05,
      "loss": 2.8307,
      "step": 171179
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.6862807273864746,
      "learning_rate": 9.261177719575401e-05,
      "loss": 2.788,
      "step": 171180
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5092575550079346,
      "learning_rate": 9.260882148310102e-05,
      "loss": 3.1864,
      "step": 171181
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.383286714553833,
      "learning_rate": 9.260586580900598e-05,
      "loss": 3.0647,
      "step": 171182
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3601503372192383,
      "learning_rate": 9.260291017346929e-05,
      "loss": 3.155,
      "step": 171183
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5457065105438232,
      "learning_rate": 9.259995457649176e-05,
      "loss": 2.9641,
      "step": 171184
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.123889684677124,
      "learning_rate": 9.259699901807373e-05,
      "loss": 3.0488,
      "step": 171185
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6579172611236572,
      "learning_rate": 9.259404349821591e-05,
      "loss": 2.9533,
      "step": 171186
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.375396490097046,
      "learning_rate": 9.259108801691878e-05,
      "loss": 2.8583,
      "step": 171187
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.316075086593628,
      "learning_rate": 9.25881325741829e-05,
      "loss": 3.2065,
      "step": 171188
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.306276798248291,
      "learning_rate": 9.258517717000868e-05,
      "loss": 3.285,
      "step": 171189
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8142805099487305,
      "learning_rate": 9.258222180439689e-05,
      "loss": 3.1425,
      "step": 171190
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4527180194854736,
      "learning_rate": 9.257926647734787e-05,
      "loss": 3.0388,
      "step": 171191
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0124385356903076,
      "learning_rate": 9.25763111888624e-05,
      "loss": 2.8232,
      "step": 171192
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.834167242050171,
      "learning_rate": 9.25733559389409e-05,
      "loss": 2.8673,
      "step": 171193
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.550342321395874,
      "learning_rate": 9.257040072758393e-05,
      "loss": 2.8655,
      "step": 171194
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3907129764556885,
      "learning_rate": 9.256744555479196e-05,
      "loss": 2.9534,
      "step": 171195
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.438444137573242,
      "learning_rate": 9.256449042056568e-05,
      "loss": 2.8016,
      "step": 171196
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.503964900970459,
      "learning_rate": 9.25615353249055e-05,
      "loss": 2.9432,
      "step": 171197
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.364816188812256,
      "learning_rate": 9.255858026781214e-05,
      "loss": 3.0775,
      "step": 171198
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4300036430358887,
      "learning_rate": 9.255562524928607e-05,
      "loss": 2.7395,
      "step": 171199
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4888272285461426,
      "learning_rate": 9.255267026932781e-05,
      "loss": 2.6975,
      "step": 171200
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.4173898696899414,
      "learning_rate": 9.254971532793783e-05,
      "loss": 3.1555,
      "step": 171201
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4764537811279297,
      "learning_rate": 9.254676042511687e-05,
      "loss": 3.0292,
      "step": 171202
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4412343502044678,
      "learning_rate": 9.254380556086526e-05,
      "loss": 3.0081,
      "step": 171203
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.03045654296875,
      "learning_rate": 9.254085073518379e-05,
      "loss": 2.978,
      "step": 171204
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.769465923309326,
      "learning_rate": 9.25378959480729e-05,
      "loss": 3.0822,
      "step": 171205
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.306835412979126,
      "learning_rate": 9.253494119953312e-05,
      "loss": 3.2068,
      "step": 171206
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2857320308685303,
      "learning_rate": 9.253198648956493e-05,
      "loss": 2.9158,
      "step": 171207
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.367136240005493,
      "learning_rate": 9.252903181816905e-05,
      "loss": 2.8167,
      "step": 171208
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0311412811279297,
      "learning_rate": 9.252607718534584e-05,
      "loss": 3.1248,
      "step": 171209
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.793092727661133,
      "learning_rate": 9.252312259109606e-05,
      "loss": 3.0841,
      "step": 171210
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8959124088287354,
      "learning_rate": 9.252016803542013e-05,
      "loss": 3.0717,
      "step": 171211
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.812710762023926,
      "learning_rate": 9.251721351831864e-05,
      "loss": 2.7012,
      "step": 171212
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.693997383117676,
      "learning_rate": 9.251425903979197e-05,
      "loss": 2.5221,
      "step": 171213
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.399686574935913,
      "learning_rate": 9.251130459984095e-05,
      "loss": 2.9522,
      "step": 171214
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.6300947666168213,
      "learning_rate": 9.25083501984659e-05,
      "loss": 2.7952,
      "step": 171215
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7314343452453613,
      "learning_rate": 9.250539583566754e-05,
      "loss": 3.2245,
      "step": 171216
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.5886034965515137,
      "learning_rate": 9.250244151144638e-05,
      "loss": 3.1552,
      "step": 171217
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.8687515258789062,
      "learning_rate": 9.249948722580279e-05,
      "loss": 3.0634,
      "step": 171218
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.402069091796875,
      "learning_rate": 9.249653297873758e-05,
      "loss": 3.0532,
      "step": 171219
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0307977199554443,
      "learning_rate": 9.24935787702512e-05,
      "loss": 2.5155,
      "step": 171220
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.077831745147705,
      "learning_rate": 9.249062460034408e-05,
      "loss": 2.8576,
      "step": 171221
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5012667179107666,
      "learning_rate": 9.248767046901695e-05,
      "loss": 2.9504,
      "step": 171222
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.4674718379974365,
      "learning_rate": 9.248471637627016e-05,
      "loss": 2.929,
      "step": 171223
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.3142945766448975,
      "learning_rate": 9.248176232210451e-05,
      "loss": 3.0188,
      "step": 171224
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.6867313385009766,
      "learning_rate": 9.247880830652041e-05,
      "loss": 2.8722,
      "step": 171225
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.709820747375488,
      "learning_rate": 9.247585432951842e-05,
      "loss": 2.8008,
      "step": 171226
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.396298885345459,
      "learning_rate": 9.247290039109899e-05,
      "loss": 3.0727,
      "step": 171227
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.1564927101135254,
      "learning_rate": 9.246994649126286e-05,
      "loss": 2.9223,
      "step": 171228
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.251859426498413,
      "learning_rate": 9.246699263001039e-05,
      "loss": 3.1551,
      "step": 171229
    },
    {
      "epoch": 2.23,
      "grad_norm": 5.193532943725586,
      "learning_rate": 9.24640388073423e-05,
      "loss": 2.9395,
      "step": 171230
    },
    {
      "epoch": 2.23,
      "grad_norm": 7.196221351623535,
      "learning_rate": 9.246108502325907e-05,
      "loss": 3.089,
      "step": 171231
    },
    {
      "epoch": 2.23,
      "grad_norm": 5.274264812469482,
      "learning_rate": 9.245813127776118e-05,
      "loss": 2.934,
      "step": 171232
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.428467273712158,
      "learning_rate": 9.24551775708493e-05,
      "loss": 2.8022,
      "step": 171233
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.4824838638305664,
      "learning_rate": 9.245222390252394e-05,
      "loss": 2.6508,
      "step": 171234
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6308436393737793,
      "learning_rate": 9.244927027278551e-05,
      "loss": 2.9458,
      "step": 171235
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.3651504516601562,
      "learning_rate": 9.244631668163479e-05,
      "loss": 2.9663,
      "step": 171236
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.823913335800171,
      "learning_rate": 9.244336312907223e-05,
      "loss": 3.0481,
      "step": 171237
    },
    {
      "epoch": 2.23,
      "grad_norm": 5.160372734069824,
      "learning_rate": 9.244040961509824e-05,
      "loss": 2.9951,
      "step": 171238
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.750256061553955,
      "learning_rate": 9.243745613971362e-05,
      "loss": 3.117,
      "step": 171239
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5074565410614014,
      "learning_rate": 9.243450270291879e-05,
      "loss": 2.8914,
      "step": 171240
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.843975782394409,
      "learning_rate": 9.243154930471419e-05,
      "loss": 2.8455,
      "step": 171241
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.832320213317871,
      "learning_rate": 9.242859594510059e-05,
      "loss": 2.9749,
      "step": 171242
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.3489670753479004,
      "learning_rate": 9.242564262407842e-05,
      "loss": 3.1303,
      "step": 171243
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.826160430908203,
      "learning_rate": 9.242268934164818e-05,
      "loss": 2.9083,
      "step": 171244
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5022945404052734,
      "learning_rate": 9.241973609781054e-05,
      "loss": 2.9822,
      "step": 171245
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.9194846153259277,
      "learning_rate": 9.241678289256592e-05,
      "loss": 3.0099,
      "step": 171246
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.2437379360198975,
      "learning_rate": 9.241382972591503e-05,
      "loss": 2.9427,
      "step": 171247
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8925538063049316,
      "learning_rate": 9.241087659785831e-05,
      "loss": 2.8209,
      "step": 171248
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2884936332702637,
      "learning_rate": 9.240792350839634e-05,
      "loss": 2.8679,
      "step": 171249
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4476044178009033,
      "learning_rate": 9.240497045752954e-05,
      "loss": 3.2037,
      "step": 171250
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5210204124450684,
      "learning_rate": 9.240201744525868e-05,
      "loss": 2.9618,
      "step": 171251
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.101614475250244,
      "learning_rate": 9.239906447158414e-05,
      "loss": 2.9773,
      "step": 171252
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.498767614364624,
      "learning_rate": 9.239611153650661e-05,
      "loss": 2.9148,
      "step": 171253
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2962706089019775,
      "learning_rate": 9.239315864002654e-05,
      "loss": 3.0507,
      "step": 171254
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4082462787628174,
      "learning_rate": 9.239020578214453e-05,
      "loss": 2.8962,
      "step": 171255
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.460200071334839,
      "learning_rate": 9.238725296286098e-05,
      "loss": 2.9537,
      "step": 171256
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5672056674957275,
      "learning_rate": 9.238430018217666e-05,
      "loss": 3.0171,
      "step": 171257
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4489545822143555,
      "learning_rate": 9.238134744009195e-05,
      "loss": 3.0402,
      "step": 171258
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.131626844406128,
      "learning_rate": 9.237839473660753e-05,
      "loss": 2.8823,
      "step": 171259
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2595906257629395,
      "learning_rate": 9.237544207172387e-05,
      "loss": 3.0224,
      "step": 171260
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8607418537139893,
      "learning_rate": 9.237248944544156e-05,
      "loss": 3.109,
      "step": 171261
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6964681148529053,
      "learning_rate": 9.236953685776106e-05,
      "loss": 2.9534,
      "step": 171262
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7621614933013916,
      "learning_rate": 9.236658430868302e-05,
      "loss": 3.0038,
      "step": 171263
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.964733600616455,
      "learning_rate": 9.236363179820788e-05,
      "loss": 3.3513,
      "step": 171264
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.655620813369751,
      "learning_rate": 9.236067932633636e-05,
      "loss": 2.8668,
      "step": 171265
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.82755708694458,
      "learning_rate": 9.23577268930689e-05,
      "loss": 2.7671,
      "step": 171266
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4568099975585938,
      "learning_rate": 9.235477449840609e-05,
      "loss": 2.8748,
      "step": 171267
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9064598083496094,
      "learning_rate": 9.235182214234828e-05,
      "loss": 3.0345,
      "step": 171268
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.055333137512207,
      "learning_rate": 9.234886982489635e-05,
      "loss": 2.6567,
      "step": 171269
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0709054470062256,
      "learning_rate": 9.234591754605056e-05,
      "loss": 2.7559,
      "step": 171270
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3697662353515625,
      "learning_rate": 9.234296530581168e-05,
      "loss": 3.0176,
      "step": 171271
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.1161015033721924,
      "learning_rate": 9.234001310418019e-05,
      "loss": 2.7487,
      "step": 171272
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8776257038116455,
      "learning_rate": 9.233706094115659e-05,
      "loss": 2.9432,
      "step": 171273
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8844480514526367,
      "learning_rate": 9.233410881674133e-05,
      "loss": 3.0553,
      "step": 171274
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5981271266937256,
      "learning_rate": 9.233115673093522e-05,
      "loss": 3.0353,
      "step": 171275
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6694717407226562,
      "learning_rate": 9.232820468373853e-05,
      "loss": 2.8661,
      "step": 171276
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.470561981201172,
      "learning_rate": 9.232525267515209e-05,
      "loss": 2.9576,
      "step": 171277
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9328858852386475,
      "learning_rate": 9.232230070517626e-05,
      "loss": 2.9088,
      "step": 171278
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.762529134750366,
      "learning_rate": 9.231934877381164e-05,
      "loss": 3.0796,
      "step": 171279
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.324343681335449,
      "learning_rate": 9.231639688105871e-05,
      "loss": 3.0079,
      "step": 171280
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.651121139526367,
      "learning_rate": 9.231344502691816e-05,
      "loss": 3.008,
      "step": 171281
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0808701515197754,
      "learning_rate": 9.231049321139035e-05,
      "loss": 3.0812,
      "step": 171282
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.834749221801758,
      "learning_rate": 9.230754143447605e-05,
      "loss": 3.1672,
      "step": 171283
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.3287994861602783,
      "learning_rate": 9.23045896961757e-05,
      "loss": 3.184,
      "step": 171284
    },
    {
      "epoch": 2.23,
      "grad_norm": 7.566166400909424,
      "learning_rate": 9.230163799648986e-05,
      "loss": 2.8625,
      "step": 171285
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.9567043781280518,
      "learning_rate": 9.229868633541893e-05,
      "loss": 2.8501,
      "step": 171286
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.1118743419647217,
      "learning_rate": 9.229573471296372e-05,
      "loss": 3.0457,
      "step": 171287
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4214627742767334,
      "learning_rate": 9.229278312912458e-05,
      "loss": 2.9076,
      "step": 171288
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4875667095184326,
      "learning_rate": 9.228983158390219e-05,
      "loss": 3.1645,
      "step": 171289
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8082196712493896,
      "learning_rate": 9.228688007729697e-05,
      "loss": 3.0223,
      "step": 171290
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.959904193878174,
      "learning_rate": 9.228392860930972e-05,
      "loss": 3.0175,
      "step": 171291
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.878103256225586,
      "learning_rate": 9.228097717994062e-05,
      "loss": 2.802,
      "step": 171292
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.6092965602874756,
      "learning_rate": 9.227802578919049e-05,
      "loss": 2.867,
      "step": 171293
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.1008667945861816,
      "learning_rate": 9.22750744370597e-05,
      "loss": 3.0029,
      "step": 171294
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3317461013793945,
      "learning_rate": 9.2272123123549e-05,
      "loss": 2.9538,
      "step": 171295
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6026458740234375,
      "learning_rate": 9.226917184865875e-05,
      "loss": 3.0139,
      "step": 171296
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.5792016983032227,
      "learning_rate": 9.226622061238975e-05,
      "loss": 3.0344,
      "step": 171297
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8331947326660156,
      "learning_rate": 9.226326941474222e-05,
      "loss": 2.9865,
      "step": 171298
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.3889319896698,
      "learning_rate": 9.226031825571693e-05,
      "loss": 2.8985,
      "step": 171299
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.232398509979248,
      "learning_rate": 9.225736713531429e-05,
      "loss": 2.9249,
      "step": 171300
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4519615173339844,
      "learning_rate": 9.225441605353504e-05,
      "loss": 3.1342,
      "step": 171301
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8311893939971924,
      "learning_rate": 9.22514650103795e-05,
      "loss": 2.9102,
      "step": 171302
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.895641565322876,
      "learning_rate": 9.224851400584853e-05,
      "loss": 3.091,
      "step": 171303
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.594613552093506,
      "learning_rate": 9.224556303994226e-05,
      "loss": 2.8718,
      "step": 171304
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6036040782928467,
      "learning_rate": 9.22426121126616e-05,
      "loss": 3.0266,
      "step": 171305
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9623184204101562,
      "learning_rate": 9.223966122400683e-05,
      "loss": 2.9008,
      "step": 171306
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.488492488861084,
      "learning_rate": 9.223671037397877e-05,
      "loss": 2.9888,
      "step": 171307
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.236480712890625,
      "learning_rate": 9.223375956257772e-05,
      "loss": 2.7834,
      "step": 171308
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.835707902908325,
      "learning_rate": 9.223080878980439e-05,
      "loss": 2.9949,
      "step": 171309
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.4133849143981934,
      "learning_rate": 9.22278580556593e-05,
      "loss": 3.1281,
      "step": 171310
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.832178831100464,
      "learning_rate": 9.222490736014297e-05,
      "loss": 2.8187,
      "step": 171311
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6045806407928467,
      "learning_rate": 9.222195670325588e-05,
      "loss": 2.9075,
      "step": 171312
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.080975294113159,
      "learning_rate": 9.221900608499872e-05,
      "loss": 2.7802,
      "step": 171313
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6625149250030518,
      "learning_rate": 9.22160555053719e-05,
      "loss": 2.9185,
      "step": 171314
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7118279933929443,
      "learning_rate": 9.22131049643761e-05,
      "loss": 2.8566,
      "step": 171315
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.5095932483673096,
      "learning_rate": 9.221015446201181e-05,
      "loss": 2.8388,
      "step": 171316
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.5952296257019043,
      "learning_rate": 9.220720399827951e-05,
      "loss": 2.8225,
      "step": 171317
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8021702766418457,
      "learning_rate": 9.220425357317988e-05,
      "loss": 2.9934,
      "step": 171318
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3996472358703613,
      "learning_rate": 9.22013031867134e-05,
      "loss": 2.9021,
      "step": 171319
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5269935131073,
      "learning_rate": 9.219835283888052e-05,
      "loss": 2.8692,
      "step": 171320
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5093324184417725,
      "learning_rate": 9.219540252968201e-05,
      "loss": 2.6465,
      "step": 171321
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.295072078704834,
      "learning_rate": 9.21924522591183e-05,
      "loss": 2.9234,
      "step": 171322
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4626052379608154,
      "learning_rate": 9.218950202718985e-05,
      "loss": 3.0605,
      "step": 171323
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.497516632080078,
      "learning_rate": 9.218655183389735e-05,
      "loss": 2.8701,
      "step": 171324
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.5342624187469482,
      "learning_rate": 9.218360167924131e-05,
      "loss": 2.7377,
      "step": 171325
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0086214542388916,
      "learning_rate": 9.218065156322215e-05,
      "loss": 2.9139,
      "step": 171326
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.313107490539551,
      "learning_rate": 9.217770148584066e-05,
      "loss": 2.7908,
      "step": 171327
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.208559274673462,
      "learning_rate": 9.217475144709724e-05,
      "loss": 2.793,
      "step": 171328
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.299288034439087,
      "learning_rate": 9.217180144699234e-05,
      "loss": 2.8704,
      "step": 171329
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6716208457946777,
      "learning_rate": 9.216885148552674e-05,
      "loss": 2.744,
      "step": 171330
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8461856842041016,
      "learning_rate": 9.216590156270079e-05,
      "loss": 3.1517,
      "step": 171331
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.986293077468872,
      "learning_rate": 9.216295167851521e-05,
      "loss": 2.9441,
      "step": 171332
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.513450860977173,
      "learning_rate": 9.216000183297045e-05,
      "loss": 2.6933,
      "step": 171333
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.4407153129577637,
      "learning_rate": 9.215705202606707e-05,
      "loss": 2.7411,
      "step": 171334
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7779922485351562,
      "learning_rate": 9.215410225780552e-05,
      "loss": 2.9458,
      "step": 171335
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.672605514526367,
      "learning_rate": 9.215115252818655e-05,
      "loss": 2.8744,
      "step": 171336
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6684346199035645,
      "learning_rate": 9.214820283721051e-05,
      "loss": 2.6591,
      "step": 171337
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.544135808944702,
      "learning_rate": 9.214525318487813e-05,
      "loss": 3.0766,
      "step": 171338
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9105026721954346,
      "learning_rate": 9.21423035711899e-05,
      "loss": 3.0203,
      "step": 171339
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.368429183959961,
      "learning_rate": 9.213935399614629e-05,
      "loss": 2.8039,
      "step": 171340
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.1827471256256104,
      "learning_rate": 9.213640445974784e-05,
      "loss": 2.7521,
      "step": 171341
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6589348316192627,
      "learning_rate": 9.213345496199524e-05,
      "loss": 3.0046,
      "step": 171342
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.568976879119873,
      "learning_rate": 9.213050550288886e-05,
      "loss": 2.863,
      "step": 171343
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2015814781188965,
      "learning_rate": 9.212755608242942e-05,
      "loss": 2.9806,
      "step": 171344
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.653441905975342,
      "learning_rate": 9.212460670061741e-05,
      "loss": 2.9032,
      "step": 171345
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8151700496673584,
      "learning_rate": 9.212165735745338e-05,
      "loss": 3.1305,
      "step": 171346
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.607499837875366,
      "learning_rate": 9.211870805293773e-05,
      "loss": 2.9862,
      "step": 171347
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3868350982666016,
      "learning_rate": 9.211575878707124e-05,
      "loss": 2.7109,
      "step": 171348
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.939476251602173,
      "learning_rate": 9.211280955985427e-05,
      "loss": 2.7337,
      "step": 171349
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.305190086364746,
      "learning_rate": 9.210986037128754e-05,
      "loss": 2.8977,
      "step": 171350
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.266434907913208,
      "learning_rate": 9.21069112213714e-05,
      "loss": 2.6716,
      "step": 171351
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.178713798522949,
      "learning_rate": 9.210396211010671e-05,
      "loss": 3.0711,
      "step": 171352
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.7461142539978027,
      "learning_rate": 9.210101303749363e-05,
      "loss": 3.0404,
      "step": 171353
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.381725311279297,
      "learning_rate": 9.2098064003533e-05,
      "loss": 2.8956,
      "step": 171354
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.338580846786499,
      "learning_rate": 9.209511500822518e-05,
      "loss": 3.0375,
      "step": 171355
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.09653902053833,
      "learning_rate": 9.209216605157088e-05,
      "loss": 2.9302,
      "step": 171356
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.466935157775879,
      "learning_rate": 9.208921713357046e-05,
      "loss": 3.3461,
      "step": 171357
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.460627794265747,
      "learning_rate": 9.208626825422482e-05,
      "loss": 3.2348,
      "step": 171358
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.188164472579956,
      "learning_rate": 9.208331941353401e-05,
      "loss": 2.9433,
      "step": 171359
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5831751823425293,
      "learning_rate": 9.208037061149897e-05,
      "loss": 2.8922,
      "step": 171360
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.435068130493164,
      "learning_rate": 9.207742184812003e-05,
      "loss": 2.9602,
      "step": 171361
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.086106777191162,
      "learning_rate": 9.20744731233979e-05,
      "loss": 2.7101,
      "step": 171362
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.8593318462371826,
      "learning_rate": 9.207152443733295e-05,
      "loss": 2.856,
      "step": 171363
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.864353895187378,
      "learning_rate": 9.2068575789926e-05,
      "loss": 2.726,
      "step": 171364
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8824961185455322,
      "learning_rate": 9.206562718117727e-05,
      "loss": 2.9574,
      "step": 171365
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.4654674530029297,
      "learning_rate": 9.206267861108754e-05,
      "loss": 2.7435,
      "step": 171366
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4839446544647217,
      "learning_rate": 9.20597300796572e-05,
      "loss": 3.0169,
      "step": 171367
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.760244131088257,
      "learning_rate": 9.2056781586887e-05,
      "loss": 2.8937,
      "step": 171368
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.1159491539001465,
      "learning_rate": 9.205383313277724e-05,
      "loss": 2.8661,
      "step": 171369
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7071986198425293,
      "learning_rate": 9.20508847173288e-05,
      "loss": 3.1091,
      "step": 171370
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3515491485595703,
      "learning_rate": 9.204793634054183e-05,
      "loss": 2.7858,
      "step": 171371
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5088181495666504,
      "learning_rate": 9.204498800241719e-05,
      "loss": 2.915,
      "step": 171372
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5765647888183594,
      "learning_rate": 9.204203970295518e-05,
      "loss": 3.0118,
      "step": 171373
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.631542205810547,
      "learning_rate": 9.203909144215659e-05,
      "loss": 2.983,
      "step": 171374
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.855980157852173,
      "learning_rate": 9.203614322002176e-05,
      "loss": 2.7845,
      "step": 171375
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.694280624389648,
      "learning_rate": 9.203319503655154e-05,
      "loss": 2.9228,
      "step": 171376
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.252490520477295,
      "learning_rate": 9.20302468917461e-05,
      "loss": 2.8399,
      "step": 171377
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2260124683380127,
      "learning_rate": 9.202729878560622e-05,
      "loss": 3.2362,
      "step": 171378
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2431604862213135,
      "learning_rate": 9.202435071813231e-05,
      "loss": 2.9794,
      "step": 171379
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4439404010772705,
      "learning_rate": 9.202140268932511e-05,
      "loss": 3.0187,
      "step": 171380
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4353058338165283,
      "learning_rate": 9.201845469918495e-05,
      "loss": 2.9999,
      "step": 171381
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5261502265930176,
      "learning_rate": 9.201550674771267e-05,
      "loss": 2.9897,
      "step": 171382
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5434975624084473,
      "learning_rate": 9.201255883490844e-05,
      "loss": 2.7153,
      "step": 171383
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.551865816116333,
      "learning_rate": 9.200961096077312e-05,
      "loss": 2.8014,
      "step": 171384
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.540703058242798,
      "learning_rate": 9.200666312530703e-05,
      "loss": 2.6535,
      "step": 171385
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4590885639190674,
      "learning_rate": 9.200371532851092e-05,
      "loss": 2.6947,
      "step": 171386
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2751071453094482,
      "learning_rate": 9.200076757038514e-05,
      "loss": 2.7041,
      "step": 171387
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.542970895767212,
      "learning_rate": 9.199781985093055e-05,
      "loss": 2.925,
      "step": 171388
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.199857473373413,
      "learning_rate": 9.199487217014732e-05,
      "loss": 2.9445,
      "step": 171389
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9783239364624023,
      "learning_rate": 9.199192452803622e-05,
      "loss": 2.7912,
      "step": 171390
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.712723731994629,
      "learning_rate": 9.19889769245977e-05,
      "loss": 3.0005,
      "step": 171391
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.529409170150757,
      "learning_rate": 9.198602935983239e-05,
      "loss": 2.749,
      "step": 171392
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.079686164855957,
      "learning_rate": 9.198308183374076e-05,
      "loss": 2.9904,
      "step": 171393
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.415357828140259,
      "learning_rate": 9.198013434632349e-05,
      "loss": 2.9466,
      "step": 171394
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.111361265182495,
      "learning_rate": 9.1977186897581e-05,
      "loss": 2.9729,
      "step": 171395
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2826342582702637,
      "learning_rate": 9.197423948751394e-05,
      "loss": 3.079,
      "step": 171396
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.603973388671875,
      "learning_rate": 9.197129211612265e-05,
      "loss": 2.9524,
      "step": 171397
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.291748285293579,
      "learning_rate": 9.196834478340793e-05,
      "loss": 2.8915,
      "step": 171398
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.650019407272339,
      "learning_rate": 9.196539748937015e-05,
      "loss": 2.8904,
      "step": 171399
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.1395835876464844,
      "learning_rate": 9.196245023401001e-05,
      "loss": 3.0779,
      "step": 171400
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0141148567199707,
      "learning_rate": 9.195950301732798e-05,
      "loss": 2.7859,
      "step": 171401
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.05539870262146,
      "learning_rate": 9.19565558393246e-05,
      "loss": 2.8876,
      "step": 171402
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.4163315296173096,
      "learning_rate": 9.195360870000031e-05,
      "loss": 2.7474,
      "step": 171403
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5358493328094482,
      "learning_rate": 9.195066159935589e-05,
      "loss": 2.6986,
      "step": 171404
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.003941535949707,
      "learning_rate": 9.194771453739166e-05,
      "loss": 2.6281,
      "step": 171405
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.2527048587799072,
      "learning_rate": 9.194476751410838e-05,
      "loss": 3.0831,
      "step": 171406
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8144116401672363,
      "learning_rate": 9.19418205295065e-05,
      "loss": 3.0204,
      "step": 171407
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.1946685314178467,
      "learning_rate": 9.193887358358648e-05,
      "loss": 2.6795,
      "step": 171408
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7256736755371094,
      "learning_rate": 9.193592667634905e-05,
      "loss": 2.9413,
      "step": 171409
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.21954607963562,
      "learning_rate": 9.193297980779463e-05,
      "loss": 3.0621,
      "step": 171410
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.9185380935668945,
      "learning_rate": 9.193003297792372e-05,
      "loss": 2.7624,
      "step": 171411
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.903304100036621,
      "learning_rate": 9.192708618673702e-05,
      "loss": 2.9847,
      "step": 171412
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5833702087402344,
      "learning_rate": 9.192413943423501e-05,
      "loss": 2.9115,
      "step": 171413
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.3946802616119385,
      "learning_rate": 9.192119272041812e-05,
      "loss": 2.958,
      "step": 171414
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.902804136276245,
      "learning_rate": 9.191824604528713e-05,
      "loss": 2.9252,
      "step": 171415
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0837554931640625,
      "learning_rate": 9.191529940884238e-05,
      "loss": 2.8602,
      "step": 171416
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.035316467285156,
      "learning_rate": 9.191235281108458e-05,
      "loss": 2.8765,
      "step": 171417
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.640944242477417,
      "learning_rate": 9.190940625201418e-05,
      "loss": 3.0642,
      "step": 171418
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9726550579071045,
      "learning_rate": 9.19064597316318e-05,
      "loss": 3.0959,
      "step": 171419
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.3247203826904297,
      "learning_rate": 9.19035132499378e-05,
      "loss": 2.9991,
      "step": 171420
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.754669427871704,
      "learning_rate": 9.190056680693298e-05,
      "loss": 3.0235,
      "step": 171421
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.187494993209839,
      "learning_rate": 9.189762040261766e-05,
      "loss": 3.032,
      "step": 171422
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.815657138824463,
      "learning_rate": 9.189467403699261e-05,
      "loss": 3.1158,
      "step": 171423
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2798120975494385,
      "learning_rate": 9.189172771005829e-05,
      "loss": 3.2048,
      "step": 171424
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.1971442699432373,
      "learning_rate": 9.18887814218152e-05,
      "loss": 2.9975,
      "step": 171425
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.6967813968658447,
      "learning_rate": 9.188583517226382e-05,
      "loss": 2.7065,
      "step": 171426
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5116851329803467,
      "learning_rate": 9.188288896140487e-05,
      "loss": 2.9826,
      "step": 171427
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4541778564453125,
      "learning_rate": 9.187994278923873e-05,
      "loss": 3.1403,
      "step": 171428
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4230239391326904,
      "learning_rate": 9.187699665576615e-05,
      "loss": 2.9854,
      "step": 171429
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7361011505126953,
      "learning_rate": 9.18740505609875e-05,
      "loss": 3.1735,
      "step": 171430
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5348026752471924,
      "learning_rate": 9.187110450490354e-05,
      "loss": 2.8948,
      "step": 171431
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2112483978271484,
      "learning_rate": 9.186815848751446e-05,
      "loss": 2.9886,
      "step": 171432
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4090890884399414,
      "learning_rate": 9.186521250882117e-05,
      "loss": 2.8772,
      "step": 171433
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.036832571029663,
      "learning_rate": 9.186226656882392e-05,
      "loss": 3.1117,
      "step": 171434
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.235445261001587,
      "learning_rate": 9.185932066752353e-05,
      "loss": 2.8453,
      "step": 171435
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.936873435974121,
      "learning_rate": 9.18563748049203e-05,
      "loss": 3.1202,
      "step": 171436
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.016510248184204,
      "learning_rate": 9.185342898101513e-05,
      "loss": 3.0853,
      "step": 171437
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4965269565582275,
      "learning_rate": 9.18504831958081e-05,
      "loss": 2.7258,
      "step": 171438
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4839563369750977,
      "learning_rate": 9.184753744930011e-05,
      "loss": 2.9103,
      "step": 171439
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.466646671295166,
      "learning_rate": 9.18445917414915e-05,
      "loss": 2.789,
      "step": 171440
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.908170461654663,
      "learning_rate": 9.184164607238303e-05,
      "loss": 3.1562,
      "step": 171441
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.775036573410034,
      "learning_rate": 9.183870044197501e-05,
      "loss": 2.9657,
      "step": 171442
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6903038024902344,
      "learning_rate": 9.183575485026826e-05,
      "loss": 2.9674,
      "step": 171443
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.843552350997925,
      "learning_rate": 9.183280929726302e-05,
      "loss": 2.9007,
      "step": 171444
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.549923419952393,
      "learning_rate": 9.18298637829601e-05,
      "loss": 2.7569,
      "step": 171445
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.707404136657715,
      "learning_rate": 9.18269183073598e-05,
      "loss": 2.8793,
      "step": 171446
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3752498626708984,
      "learning_rate": 9.182397287046292e-05,
      "loss": 2.8266,
      "step": 171447
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.442373037338257,
      "learning_rate": 9.182102747226981e-05,
      "loss": 3.0639,
      "step": 171448
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.132798433303833,
      "learning_rate": 9.181808211278126e-05,
      "loss": 3.007,
      "step": 171449
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4151103496551514,
      "learning_rate": 9.18151367919975e-05,
      "loss": 3.0624,
      "step": 171450
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.1011109352111816,
      "learning_rate": 9.181219150991934e-05,
      "loss": 2.9815,
      "step": 171451
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.513911008834839,
      "learning_rate": 9.180924626654709e-05,
      "loss": 3.1424,
      "step": 171452
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6708085536956787,
      "learning_rate": 9.180630106188154e-05,
      "loss": 3.0243,
      "step": 171453
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8103573322296143,
      "learning_rate": 9.180335589592306e-05,
      "loss": 3.0814,
      "step": 171454
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.431236743927002,
      "learning_rate": 9.180041076867242e-05,
      "loss": 2.9007,
      "step": 171455
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.534637451171875,
      "learning_rate": 9.179746568012985e-05,
      "loss": 3.0497,
      "step": 171456
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.83225417137146,
      "learning_rate": 9.179452063029614e-05,
      "loss": 3.0278,
      "step": 171457
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.048255443572998,
      "learning_rate": 9.179157561917167e-05,
      "loss": 2.9595,
      "step": 171458
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6933722496032715,
      "learning_rate": 9.178863064675714e-05,
      "loss": 2.9911,
      "step": 171459
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.280801296234131,
      "learning_rate": 9.178568571305298e-05,
      "loss": 3.1455,
      "step": 171460
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.420224189758301,
      "learning_rate": 9.178274081805998e-05,
      "loss": 2.9473,
      "step": 171461
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.579819440841675,
      "learning_rate": 9.177979596177829e-05,
      "loss": 2.9727,
      "step": 171462
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2947170734405518,
      "learning_rate": 9.177685114420877e-05,
      "loss": 2.8971,
      "step": 171463
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0531325340270996,
      "learning_rate": 9.177390636535177e-05,
      "loss": 2.9008,
      "step": 171464
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.849857807159424,
      "learning_rate": 9.177096162520803e-05,
      "loss": 3.0204,
      "step": 171465
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.5645949840545654,
      "learning_rate": 9.176801692377791e-05,
      "loss": 2.7598,
      "step": 171466
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.4121172428131104,
      "learning_rate": 9.17650722610622e-05,
      "loss": 3.055,
      "step": 171467
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.15448260307312,
      "learning_rate": 9.176212763706115e-05,
      "loss": 3.0032,
      "step": 171468
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8206300735473633,
      "learning_rate": 9.17591830517755e-05,
      "loss": 2.8712,
      "step": 171469
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.98127818107605,
      "learning_rate": 9.17562385052057e-05,
      "loss": 2.9118,
      "step": 171470
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.532599449157715,
      "learning_rate": 9.17532939973524e-05,
      "loss": 2.7761,
      "step": 171471
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.958728551864624,
      "learning_rate": 9.175034952821602e-05,
      "loss": 2.9702,
      "step": 171472
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.2517452239990234,
      "learning_rate": 9.174740509779736e-05,
      "loss": 2.9338,
      "step": 171473
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9119367599487305,
      "learning_rate": 9.174446070609661e-05,
      "loss": 2.999,
      "step": 171474
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8558437824249268,
      "learning_rate": 9.174151635311462e-05,
      "loss": 2.8591,
      "step": 171475
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.312793254852295,
      "learning_rate": 9.173857203885165e-05,
      "loss": 2.8456,
      "step": 171476
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3575785160064697,
      "learning_rate": 9.173562776330856e-05,
      "loss": 3.0045,
      "step": 171477
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9432382583618164,
      "learning_rate": 9.173268352648564e-05,
      "loss": 2.7086,
      "step": 171478
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.085058689117432,
      "learning_rate": 9.172973932838362e-05,
      "loss": 3.0828,
      "step": 171479
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7559075355529785,
      "learning_rate": 9.1726795169003e-05,
      "loss": 2.9654,
      "step": 171480
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7841286659240723,
      "learning_rate": 9.17238510483443e-05,
      "loss": 2.9836,
      "step": 171481
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.643064022064209,
      "learning_rate": 9.172090696640796e-05,
      "loss": 2.7468,
      "step": 171482
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9896020889282227,
      "learning_rate": 9.171796292319474e-05,
      "loss": 2.7576,
      "step": 171483
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.065881729125977,
      "learning_rate": 9.171501891870495e-05,
      "loss": 2.9972,
      "step": 171484
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4852046966552734,
      "learning_rate": 9.171207495293942e-05,
      "loss": 2.8553,
      "step": 171485
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.878767251968384,
      "learning_rate": 9.17091310258985e-05,
      "loss": 2.7303,
      "step": 171486
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4154751300811768,
      "learning_rate": 9.170618713758282e-05,
      "loss": 2.7018,
      "step": 171487
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8346331119537354,
      "learning_rate": 9.170324328799279e-05,
      "loss": 2.8708,
      "step": 171488
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7775776386260986,
      "learning_rate": 9.170029947712915e-05,
      "loss": 3.1326,
      "step": 171489
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.5374510288238525,
      "learning_rate": 9.169735570499222e-05,
      "loss": 3.0628,
      "step": 171490
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.612894296646118,
      "learning_rate": 9.169441197158283e-05,
      "loss": 2.9544,
      "step": 171491
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.759597063064575,
      "learning_rate": 9.169146827690139e-05,
      "loss": 2.8969,
      "step": 171492
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.1071841716766357,
      "learning_rate": 9.16885246209483e-05,
      "loss": 2.8661,
      "step": 171493
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.1750571727752686,
      "learning_rate": 9.168558100372435e-05,
      "loss": 2.6171,
      "step": 171494
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.661949396133423,
      "learning_rate": 9.168263742523e-05,
      "loss": 2.9538,
      "step": 171495
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.890841007232666,
      "learning_rate": 9.16796938854657e-05,
      "loss": 3.0114,
      "step": 171496
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.107465744018555,
      "learning_rate": 9.167675038443216e-05,
      "loss": 2.9294,
      "step": 171497
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.339202404022217,
      "learning_rate": 9.167380692212982e-05,
      "loss": 2.7381,
      "step": 171498
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.2962658405303955,
      "learning_rate": 9.167086349855918e-05,
      "loss": 2.9895,
      "step": 171499
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.84625244140625,
      "learning_rate": 9.166792011372092e-05,
      "loss": 2.8924,
      "step": 171500
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.40596342086792,
      "learning_rate": 9.166497676761548e-05,
      "loss": 3.0161,
      "step": 171501
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.709242105484009,
      "learning_rate": 9.16620334602435e-05,
      "loss": 2.7258,
      "step": 171502
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.002318859100342,
      "learning_rate": 9.16590901916055e-05,
      "loss": 2.7337,
      "step": 171503
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3665175437927246,
      "learning_rate": 9.165614696170204e-05,
      "loss": 3.2431,
      "step": 171504
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0621137619018555,
      "learning_rate": 9.165320377053349e-05,
      "loss": 3.1112,
      "step": 171505
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6182429790496826,
      "learning_rate": 9.165026061810069e-05,
      "loss": 3.1455,
      "step": 171506
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7958216667175293,
      "learning_rate": 9.164731750440391e-05,
      "loss": 3.0376,
      "step": 171507
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.976362705230713,
      "learning_rate": 9.16443744294439e-05,
      "loss": 2.8982,
      "step": 171508
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.4533445835113525,
      "learning_rate": 9.164143139322118e-05,
      "loss": 2.8271,
      "step": 171509
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.384918451309204,
      "learning_rate": 9.163848839573623e-05,
      "loss": 3.0137,
      "step": 171510
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8673548698425293,
      "learning_rate": 9.163554543698953e-05,
      "loss": 2.8901,
      "step": 171511
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4535341262817383,
      "learning_rate": 9.163260251698182e-05,
      "loss": 3.0203,
      "step": 171512
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.1159355640411377,
      "learning_rate": 9.16296596357134e-05,
      "loss": 2.7503,
      "step": 171513
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7446112632751465,
      "learning_rate": 9.16267167931851e-05,
      "loss": 2.9768,
      "step": 171514
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.943868637084961,
      "learning_rate": 9.162377398939722e-05,
      "loss": 2.9306,
      "step": 171515
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.021179676055908,
      "learning_rate": 9.162083122435062e-05,
      "loss": 3.1931,
      "step": 171516
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3983936309814453,
      "learning_rate": 9.161788849804541e-05,
      "loss": 2.8298,
      "step": 171517
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4042015075683594,
      "learning_rate": 9.161494581048249e-05,
      "loss": 3.0303,
      "step": 171518
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5148608684539795,
      "learning_rate": 9.16120031616622e-05,
      "loss": 2.9333,
      "step": 171519
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.685096025466919,
      "learning_rate": 9.160906055158524e-05,
      "loss": 2.613,
      "step": 171520
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6506450176239014,
      "learning_rate": 9.160611798025201e-05,
      "loss": 3.1219,
      "step": 171521
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5537853240966797,
      "learning_rate": 9.160317544766331e-05,
      "loss": 3.0829,
      "step": 171522
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.82558012008667,
      "learning_rate": 9.160023295381937e-05,
      "loss": 2.7467,
      "step": 171523
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.31130051612854,
      "learning_rate": 9.159729049872091e-05,
      "loss": 3.0184,
      "step": 171524
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.990732192993164,
      "learning_rate": 9.159434808236842e-05,
      "loss": 2.9117,
      "step": 171525
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8220229148864746,
      "learning_rate": 9.159140570476252e-05,
      "loss": 2.7852,
      "step": 171526
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.557274580001831,
      "learning_rate": 9.158846336590363e-05,
      "loss": 3.0819,
      "step": 171527
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.500980854034424,
      "learning_rate": 9.158552106579258e-05,
      "loss": 3.066,
      "step": 171528
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.426668405532837,
      "learning_rate": 9.158257880442953e-05,
      "loss": 2.9707,
      "step": 171529
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4674205780029297,
      "learning_rate": 9.157963658181528e-05,
      "loss": 2.8797,
      "step": 171530
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6470448970794678,
      "learning_rate": 9.157669439795025e-05,
      "loss": 2.8372,
      "step": 171531
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.779951572418213,
      "learning_rate": 9.157375225283511e-05,
      "loss": 2.9573,
      "step": 171532
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.707275390625,
      "learning_rate": 9.157081014647025e-05,
      "loss": 3.2655,
      "step": 171533
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.817786931991577,
      "learning_rate": 9.15678680788565e-05,
      "loss": 2.8912,
      "step": 171534
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8527915477752686,
      "learning_rate": 9.156492604999405e-05,
      "loss": 2.9674,
      "step": 171535
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.582500457763672,
      "learning_rate": 9.156198405988372e-05,
      "loss": 3.0725,
      "step": 171536
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.695258617401123,
      "learning_rate": 9.155904210852582e-05,
      "loss": 3.2099,
      "step": 171537
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3325881958007812,
      "learning_rate": 9.155610019592113e-05,
      "loss": 3.0492,
      "step": 171538
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.718466281890869,
      "learning_rate": 9.155315832207001e-05,
      "loss": 2.7431,
      "step": 171539
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3142693042755127,
      "learning_rate": 9.155021648697329e-05,
      "loss": 2.6135,
      "step": 171540
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.326608180999756,
      "learning_rate": 9.154727469063112e-05,
      "loss": 2.937,
      "step": 171541
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0152177810668945,
      "learning_rate": 9.154433293304436e-05,
      "loss": 2.8524,
      "step": 171542
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2325990200042725,
      "learning_rate": 9.154139121421334e-05,
      "loss": 3.0379,
      "step": 171543
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.3761119842529297,
      "learning_rate": 9.153844953413877e-05,
      "loss": 2.6619,
      "step": 171544
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.4452967643737793,
      "learning_rate": 9.153550789282107e-05,
      "loss": 2.7492,
      "step": 171545
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.271243095397949,
      "learning_rate": 9.153256629026107e-05,
      "loss": 3.1425,
      "step": 171546
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2619497776031494,
      "learning_rate": 9.152962472645886e-05,
      "loss": 2.643,
      "step": 171547
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.211329936981201,
      "learning_rate": 9.152668320141535e-05,
      "loss": 2.9096,
      "step": 171548
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.3671393394470215,
      "learning_rate": 9.152374171513087e-05,
      "loss": 2.7966,
      "step": 171549
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.445882797241211,
      "learning_rate": 9.152080026760615e-05,
      "loss": 2.7769,
      "step": 171550
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.1235125064849854,
      "learning_rate": 9.151785885884156e-05,
      "loss": 3.1428,
      "step": 171551
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.703061103820801,
      "learning_rate": 9.151491748883784e-05,
      "loss": 2.9201,
      "step": 171552
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.9120352268218994,
      "learning_rate": 9.15119761575954e-05,
      "loss": 2.6017,
      "step": 171553
    },
    {
      "epoch": 2.23,
      "grad_norm": 4.123880386352539,
      "learning_rate": 9.150903486511486e-05,
      "loss": 2.7913,
      "step": 171554
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.1056041717529297,
      "learning_rate": 9.150609361139662e-05,
      "loss": 2.9748,
      "step": 171555
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3935763835906982,
      "learning_rate": 9.15031523964414e-05,
      "loss": 3.0472,
      "step": 171556
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.854459524154663,
      "learning_rate": 9.15002112202496e-05,
      "loss": 2.9698,
      "step": 171557
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7682294845581055,
      "learning_rate": 9.149727008282196e-05,
      "loss": 2.7802,
      "step": 171558
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7277791500091553,
      "learning_rate": 9.149432898415889e-05,
      "loss": 2.9047,
      "step": 171559
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4477105140686035,
      "learning_rate": 9.149138792426096e-05,
      "loss": 2.6677,
      "step": 171560
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.686357021331787,
      "learning_rate": 9.148844690312863e-05,
      "loss": 2.9074,
      "step": 171561
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4795801639556885,
      "learning_rate": 9.148550592076262e-05,
      "loss": 2.8653,
      "step": 171562
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.1779022216796875,
      "learning_rate": 9.148256497716331e-05,
      "loss": 3.1253,
      "step": 171563
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5638110637664795,
      "learning_rate": 9.147962407233142e-05,
      "loss": 2.9601,
      "step": 171564
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5993385314941406,
      "learning_rate": 9.147668320626741e-05,
      "loss": 3.1787,
      "step": 171565
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5915541648864746,
      "learning_rate": 9.147374237897182e-05,
      "loss": 2.8477,
      "step": 171566
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7174534797668457,
      "learning_rate": 9.147080159044509e-05,
      "loss": 2.8388,
      "step": 171567
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.183894395828247,
      "learning_rate": 9.146786084068796e-05,
      "loss": 2.9497,
      "step": 171568
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.657633066177368,
      "learning_rate": 9.14649201297008e-05,
      "loss": 3.0079,
      "step": 171569
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.490036725997925,
      "learning_rate": 9.146197945748437e-05,
      "loss": 2.7755,
      "step": 171570
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3682451248168945,
      "learning_rate": 9.145903882403908e-05,
      "loss": 3.0387,
      "step": 171571
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3801608085632324,
      "learning_rate": 9.145609822936549e-05,
      "loss": 2.7936,
      "step": 171572
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7518105506896973,
      "learning_rate": 9.145315767346405e-05,
      "loss": 2.9514,
      "step": 171573
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.2935118675231934,
      "learning_rate": 9.145021715633549e-05,
      "loss": 2.8111,
      "step": 171574
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.758714199066162,
      "learning_rate": 9.144727667798019e-05,
      "loss": 2.9633,
      "step": 171575
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.1632425785064697,
      "learning_rate": 9.144433623839886e-05,
      "loss": 2.931,
      "step": 171576
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6404476165771484,
      "learning_rate": 9.144139583759199e-05,
      "loss": 2.856,
      "step": 171577
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.622192859649658,
      "learning_rate": 9.143845547555997e-05,
      "loss": 3.1401,
      "step": 171578
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.621034622192383,
      "learning_rate": 9.14355151523036e-05,
      "loss": 2.9137,
      "step": 171579
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8658676147460938,
      "learning_rate": 9.143257486782331e-05,
      "loss": 3.1363,
      "step": 171580
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8010923862457275,
      "learning_rate": 9.142963462211953e-05,
      "loss": 3.2385,
      "step": 171581
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.004227638244629,
      "learning_rate": 9.1426694415193e-05,
      "loss": 3.2276,
      "step": 171582
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.952282667160034,
      "learning_rate": 9.142375424704422e-05,
      "loss": 2.8178,
      "step": 171583
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.332937717437744,
      "learning_rate": 9.142081411767359e-05,
      "loss": 2.6747,
      "step": 171584
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3692612648010254,
      "learning_rate": 9.141787402708187e-05,
      "loss": 2.8378,
      "step": 171585
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.587193250656128,
      "learning_rate": 9.141493397526947e-05,
      "loss": 2.8102,
      "step": 171586
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.221405029296875,
      "learning_rate": 9.141199396223691e-05,
      "loss": 2.6836,
      "step": 171587
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5158162117004395,
      "learning_rate": 9.140905398798489e-05,
      "loss": 2.9275,
      "step": 171588
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.908237934112549,
      "learning_rate": 9.140611405251385e-05,
      "loss": 2.8014,
      "step": 171589
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.1289501190185547,
      "learning_rate": 9.140317415582426e-05,
      "loss": 2.7764,
      "step": 171590
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7271790504455566,
      "learning_rate": 9.140023429791688e-05,
      "loss": 2.9544,
      "step": 171591
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0300209522247314,
      "learning_rate": 9.139729447879201e-05,
      "loss": 2.9492,
      "step": 171592
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0691866874694824,
      "learning_rate": 9.139435469845042e-05,
      "loss": 2.7333,
      "step": 171593
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.414337635040283,
      "learning_rate": 9.139141495689257e-05,
      "loss": 3.0521,
      "step": 171594
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.52278470993042,
      "learning_rate": 9.138847525411896e-05,
      "loss": 3.1327,
      "step": 171595
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.442577600479126,
      "learning_rate": 9.13855355901301e-05,
      "loss": 2.9309,
      "step": 171596
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.5275638103485107,
      "learning_rate": 9.138259596492671e-05,
      "loss": 2.92,
      "step": 171597
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7807672023773193,
      "learning_rate": 9.137965637850913e-05,
      "loss": 3.0116,
      "step": 171598
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6433537006378174,
      "learning_rate": 9.137671683087812e-05,
      "loss": 2.8764,
      "step": 171599
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4676287174224854,
      "learning_rate": 9.137377732203398e-05,
      "loss": 2.8575,
      "step": 171600
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.000844955444336,
      "learning_rate": 9.137083785197762e-05,
      "loss": 2.7823,
      "step": 171601
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6935441493988037,
      "learning_rate": 9.136789842070916e-05,
      "loss": 2.8767,
      "step": 171602
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2573397159576416,
      "learning_rate": 9.136495902822944e-05,
      "loss": 2.9077,
      "step": 171603
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5495877265930176,
      "learning_rate": 9.13620196745388e-05,
      "loss": 3.0762,
      "step": 171604
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.909381151199341,
      "learning_rate": 9.135908035963803e-05,
      "loss": 3.0624,
      "step": 171605
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.0875165462493896,
      "learning_rate": 9.135614108352743e-05,
      "loss": 3.0544,
      "step": 171606
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.59212589263916,
      "learning_rate": 9.135320184620783e-05,
      "loss": 3.0579,
      "step": 171607
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.730144739151001,
      "learning_rate": 9.135026264767947e-05,
      "loss": 2.8905,
      "step": 171608
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8602917194366455,
      "learning_rate": 9.134732348794308e-05,
      "loss": 2.9169,
      "step": 171609
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5853993892669678,
      "learning_rate": 9.134438436699908e-05,
      "loss": 3.0356,
      "step": 171610
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4185101985931396,
      "learning_rate": 9.13414452848482e-05,
      "loss": 3.075,
      "step": 171611
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5619003772735596,
      "learning_rate": 9.133850624149077e-05,
      "loss": 2.9234,
      "step": 171612
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.8729636669158936,
      "learning_rate": 9.133556723692768e-05,
      "loss": 2.9318,
      "step": 171613
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6969780921936035,
      "learning_rate": 9.133262827115904e-05,
      "loss": 3.0327,
      "step": 171614
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5857768058776855,
      "learning_rate": 9.132968934418569e-05,
      "loss": 2.9353,
      "step": 171615
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.951429843902588,
      "learning_rate": 9.1326750456008e-05,
      "loss": 2.8984,
      "step": 171616
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.775725841522217,
      "learning_rate": 9.132381160662668e-05,
      "loss": 2.857,
      "step": 171617
    },
    {
      "epoch": 2.23,
      "grad_norm": 5.83062744140625,
      "learning_rate": 9.132087279604213e-05,
      "loss": 2.8559,
      "step": 171618
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.409583330154419,
      "learning_rate": 9.131793402425508e-05,
      "loss": 2.9566,
      "step": 171619
    },
    {
      "epoch": 2.23,
      "grad_norm": 6.622041702270508,
      "learning_rate": 9.131499529126592e-05,
      "loss": 2.7613,
      "step": 171620
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.8034448623657227,
      "learning_rate": 9.131205659707526e-05,
      "loss": 3.1559,
      "step": 171621
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.705674648284912,
      "learning_rate": 9.130911794168358e-05,
      "loss": 2.7905,
      "step": 171622
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.170503616333008,
      "learning_rate": 9.130617932509152e-05,
      "loss": 2.851,
      "step": 171623
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.5326287746429443,
      "learning_rate": 9.130324074729947e-05,
      "loss": 2.9175,
      "step": 171624
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3328750133514404,
      "learning_rate": 9.13003022083082e-05,
      "loss": 2.9887,
      "step": 171625
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7303555011749268,
      "learning_rate": 9.129736370811816e-05,
      "loss": 3.0748,
      "step": 171626
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.2511022090911865,
      "learning_rate": 9.129442524672989e-05,
      "loss": 2.8686,
      "step": 171627
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.735398769378662,
      "learning_rate": 9.12914868241438e-05,
      "loss": 3.1609,
      "step": 171628
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.912778854370117,
      "learning_rate": 9.128854844036068e-05,
      "loss": 3.0036,
      "step": 171629
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.085078477859497,
      "learning_rate": 9.128561009538085e-05,
      "loss": 2.8452,
      "step": 171630
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.737715482711792,
      "learning_rate": 9.128267178920505e-05,
      "loss": 2.6396,
      "step": 171631
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7225100994110107,
      "learning_rate": 9.127973352183376e-05,
      "loss": 2.9973,
      "step": 171632
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.779803991317749,
      "learning_rate": 9.127679529326752e-05,
      "loss": 3.0487,
      "step": 171633
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.362837553024292,
      "learning_rate": 9.127385710350672e-05,
      "loss": 2.9101,
      "step": 171634
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.7137296199798584,
      "learning_rate": 9.127091895255218e-05,
      "loss": 2.8109,
      "step": 171635
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.6675970554351807,
      "learning_rate": 9.126798084040421e-05,
      "loss": 2.9143,
      "step": 171636
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.394912004470825,
      "learning_rate": 9.126504276706356e-05,
      "loss": 2.8499,
      "step": 171637
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3333282470703125,
      "learning_rate": 9.12621047325307e-05,
      "loss": 2.9856,
      "step": 171638
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.430893898010254,
      "learning_rate": 9.125916673680613e-05,
      "loss": 2.7942,
      "step": 171639
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.3575260639190674,
      "learning_rate": 9.125622877989035e-05,
      "loss": 2.669,
      "step": 171640
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.3694260120391846,
      "learning_rate": 9.125329086178404e-05,
      "loss": 2.9033,
      "step": 171641
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.9337408542633057,
      "learning_rate": 9.125035298248761e-05,
      "loss": 2.8727,
      "step": 171642
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.372187614440918,
      "learning_rate": 9.124741514200175e-05,
      "loss": 3.1862,
      "step": 171643
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.3801321983337402,
      "learning_rate": 9.124447734032696e-05,
      "loss": 2.9384,
      "step": 171644
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.2469520568847656,
      "learning_rate": 9.124153957746375e-05,
      "loss": 2.8996,
      "step": 171645
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4820542335510254,
      "learning_rate": 9.123860185341259e-05,
      "loss": 2.7624,
      "step": 171646
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.173240900039673,
      "learning_rate": 9.123566416817422e-05,
      "loss": 2.9144,
      "step": 171647
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.736654758453369,
      "learning_rate": 9.123272652174898e-05,
      "loss": 2.8679,
      "step": 171648
    },
    {
      "epoch": 2.23,
      "grad_norm": 3.119910955429077,
      "learning_rate": 9.122978891413762e-05,
      "loss": 2.9458,
      "step": 171649
    },
    {
      "epoch": 2.23,
      "grad_norm": 2.4218175411224365,
      "learning_rate": 9.122685134534056e-05,
      "loss": 2.8809,
      "step": 171650
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.495389938354492,
      "learning_rate": 9.122391381535838e-05,
      "loss": 2.708,
      "step": 171651
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.2879953384399414,
      "learning_rate": 9.12209763241915e-05,
      "loss": 3.0236,
      "step": 171652
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8449854850769043,
      "learning_rate": 9.12180388718407e-05,
      "loss": 3.0524,
      "step": 171653
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5056793689727783,
      "learning_rate": 9.121510145830633e-05,
      "loss": 2.7967,
      "step": 171654
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.610049247741699,
      "learning_rate": 9.121216408358906e-05,
      "loss": 2.839,
      "step": 171655
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0600392818450928,
      "learning_rate": 9.120922674768942e-05,
      "loss": 2.9036,
      "step": 171656
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8304667472839355,
      "learning_rate": 9.120628945060793e-05,
      "loss": 2.9972,
      "step": 171657
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.874654769897461,
      "learning_rate": 9.120335219234504e-05,
      "loss": 2.7988,
      "step": 171658
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.2776591777801514,
      "learning_rate": 9.120041497290148e-05,
      "loss": 2.9328,
      "step": 171659
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.291146755218506,
      "learning_rate": 9.11974777922776e-05,
      "loss": 2.9717,
      "step": 171660
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.925302267074585,
      "learning_rate": 9.119454065047416e-05,
      "loss": 2.9328,
      "step": 171661
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.493136405944824,
      "learning_rate": 9.119160354749157e-05,
      "loss": 2.872,
      "step": 171662
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.408245086669922,
      "learning_rate": 9.118866648333033e-05,
      "loss": 2.837,
      "step": 171663
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5442440509796143,
      "learning_rate": 9.118572945799115e-05,
      "loss": 2.8147,
      "step": 171664
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.945268154144287,
      "learning_rate": 9.118279247147446e-05,
      "loss": 3.0841,
      "step": 171665
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.6136462688446045,
      "learning_rate": 9.117985552378075e-05,
      "loss": 2.8627,
      "step": 171666
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.620997428894043,
      "learning_rate": 9.117691861491075e-05,
      "loss": 2.8685,
      "step": 171667
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3722856044769287,
      "learning_rate": 9.117398174486493e-05,
      "loss": 2.8742,
      "step": 171668
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.076022148132324,
      "learning_rate": 9.117104491364368e-05,
      "loss": 2.8289,
      "step": 171669
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.223177671432495,
      "learning_rate": 9.116810812124775e-05,
      "loss": 3.1394,
      "step": 171670
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7926602363586426,
      "learning_rate": 9.116517136767766e-05,
      "loss": 2.882,
      "step": 171671
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.708224296569824,
      "learning_rate": 9.116223465293378e-05,
      "loss": 3.228,
      "step": 171672
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.006968975067139,
      "learning_rate": 9.115929797701687e-05,
      "loss": 2.8995,
      "step": 171673
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.425508975982666,
      "learning_rate": 9.11563613399274e-05,
      "loss": 3.088,
      "step": 171674
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.422142505645752,
      "learning_rate": 9.115342474166584e-05,
      "loss": 2.9708,
      "step": 171675
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4293410778045654,
      "learning_rate": 9.115048818223284e-05,
      "loss": 2.8849,
      "step": 171676
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.582061767578125,
      "learning_rate": 9.114755166162884e-05,
      "loss": 3.0165,
      "step": 171677
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.114292860031128,
      "learning_rate": 9.114461517985456e-05,
      "loss": 3.0314,
      "step": 171678
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.420280933380127,
      "learning_rate": 9.114167873691043e-05,
      "loss": 3.0353,
      "step": 171679
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.572564125061035,
      "learning_rate": 9.1138742332797e-05,
      "loss": 2.8587,
      "step": 171680
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4023489952087402,
      "learning_rate": 9.11358059675147e-05,
      "loss": 3.1151,
      "step": 171681
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4149117469787598,
      "learning_rate": 9.113286964106435e-05,
      "loss": 3.2334,
      "step": 171682
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.377800703048706,
      "learning_rate": 9.112993335344621e-05,
      "loss": 3.0434,
      "step": 171683
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4244678020477295,
      "learning_rate": 9.112699710466105e-05,
      "loss": 2.7542,
      "step": 171684
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.616992950439453,
      "learning_rate": 9.112406089470932e-05,
      "loss": 2.9118,
      "step": 171685
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4696853160858154,
      "learning_rate": 9.11211247235915e-05,
      "loss": 2.8028,
      "step": 171686
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8817389011383057,
      "learning_rate": 9.111818859130828e-05,
      "loss": 2.8841,
      "step": 171687
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.562312126159668,
      "learning_rate": 9.111525249786014e-05,
      "loss": 2.8344,
      "step": 171688
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.364197015762329,
      "learning_rate": 9.111231644324753e-05,
      "loss": 2.9345,
      "step": 171689
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7708168029785156,
      "learning_rate": 9.110938042747117e-05,
      "loss": 2.8079,
      "step": 171690
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.377936601638794,
      "learning_rate": 9.11064444505314e-05,
      "loss": 3.1164,
      "step": 171691
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4649248123168945,
      "learning_rate": 9.110350851242904e-05,
      "loss": 3.1916,
      "step": 171692
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.799778461456299,
      "learning_rate": 9.110057261316444e-05,
      "loss": 3.0023,
      "step": 171693
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6431190967559814,
      "learning_rate": 9.109763675273817e-05,
      "loss": 2.9448,
      "step": 171694
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.426335096359253,
      "learning_rate": 9.109470093115073e-05,
      "loss": 3.0214,
      "step": 171695
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.286587953567505,
      "learning_rate": 9.109176514840279e-05,
      "loss": 2.9826,
      "step": 171696
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.125993013381958,
      "learning_rate": 9.108882940449478e-05,
      "loss": 2.923,
      "step": 171697
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7191221714019775,
      "learning_rate": 9.108589369942738e-05,
      "loss": 3.0237,
      "step": 171698
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4979467391967773,
      "learning_rate": 9.108295803320108e-05,
      "loss": 2.8459,
      "step": 171699
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.73329496383667,
      "learning_rate": 9.108002240581635e-05,
      "loss": 3.0043,
      "step": 171700
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.663069248199463,
      "learning_rate": 9.10770868172737e-05,
      "loss": 2.8483,
      "step": 171701
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.3859496116638184,
      "learning_rate": 9.107415126757388e-05,
      "loss": 3.2427,
      "step": 171702
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.613374948501587,
      "learning_rate": 9.107121575671722e-05,
      "loss": 3.0569,
      "step": 171703
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.591496706008911,
      "learning_rate": 9.106828028470449e-05,
      "loss": 3.0485,
      "step": 171704
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.981126308441162,
      "learning_rate": 9.106534485153605e-05,
      "loss": 3.0886,
      "step": 171705
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.2909185886383057,
      "learning_rate": 9.106240945721254e-05,
      "loss": 2.7219,
      "step": 171706
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.985388994216919,
      "learning_rate": 9.105947410173439e-05,
      "loss": 3.0677,
      "step": 171707
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.232532024383545,
      "learning_rate": 9.10565387851023e-05,
      "loss": 2.745,
      "step": 171708
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8749754428863525,
      "learning_rate": 9.105360350731667e-05,
      "loss": 2.8812,
      "step": 171709
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9165594577789307,
      "learning_rate": 9.105066826837819e-05,
      "loss": 2.9748,
      "step": 171710
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.697474956512451,
      "learning_rate": 9.104773306828734e-05,
      "loss": 3.0729,
      "step": 171711
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.9518001079559326,
      "learning_rate": 9.104479790704465e-05,
      "loss": 3.0116,
      "step": 171712
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5173072814941406,
      "learning_rate": 9.104186278465062e-05,
      "loss": 2.8493,
      "step": 171713
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.350738048553467,
      "learning_rate": 9.103892770110591e-05,
      "loss": 2.8396,
      "step": 171714
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.1978445053100586,
      "learning_rate": 9.103599265641093e-05,
      "loss": 2.8728,
      "step": 171715
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.902113437652588,
      "learning_rate": 9.10330576505664e-05,
      "loss": 3.0942,
      "step": 171716
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.105990171432495,
      "learning_rate": 9.103012268357276e-05,
      "loss": 2.9535,
      "step": 171717
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4791388511657715,
      "learning_rate": 9.102718775543057e-05,
      "loss": 2.9223,
      "step": 171718
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5493152141571045,
      "learning_rate": 9.102425286614027e-05,
      "loss": 2.844,
      "step": 171719
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.046048402786255,
      "learning_rate": 9.10213180157026e-05,
      "loss": 2.9482,
      "step": 171720
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4466888904571533,
      "learning_rate": 9.101838320411792e-05,
      "loss": 3.1248,
      "step": 171721
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4751038551330566,
      "learning_rate": 9.101544843138699e-05,
      "loss": 2.9697,
      "step": 171722
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.698383092880249,
      "learning_rate": 9.10125136975102e-05,
      "loss": 2.7328,
      "step": 171723
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7679924964904785,
      "learning_rate": 9.100957900248815e-05,
      "loss": 2.9936,
      "step": 171724
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4269866943359375,
      "learning_rate": 9.100664434632123e-05,
      "loss": 2.9665,
      "step": 171725
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.553100824356079,
      "learning_rate": 9.100370972901024e-05,
      "loss": 2.9873,
      "step": 171726
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5355007648468018,
      "learning_rate": 9.100077515055551e-05,
      "loss": 3.0873,
      "step": 171727
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4621686935424805,
      "learning_rate": 9.099784061095778e-05,
      "loss": 3.0371,
      "step": 171728
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.183135509490967,
      "learning_rate": 9.099490611021747e-05,
      "loss": 2.874,
      "step": 171729
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5936388969421387,
      "learning_rate": 9.099197164833519e-05,
      "loss": 2.7404,
      "step": 171730
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7595701217651367,
      "learning_rate": 9.098903722531135e-05,
      "loss": 2.6555,
      "step": 171731
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.2808961868286133,
      "learning_rate": 9.098610284114666e-05,
      "loss": 2.7559,
      "step": 171732
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3828680515289307,
      "learning_rate": 9.098316849584153e-05,
      "loss": 2.8788,
      "step": 171733
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8488011360168457,
      "learning_rate": 9.098023418939668e-05,
      "loss": 2.7261,
      "step": 171734
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.163360595703125,
      "learning_rate": 9.097729992181253e-05,
      "loss": 3.1314,
      "step": 171735
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.3451595306396484,
      "learning_rate": 9.097436569308963e-05,
      "loss": 3.1874,
      "step": 171736
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.54276180267334,
      "learning_rate": 9.097143150322846e-05,
      "loss": 2.8837,
      "step": 171737
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3171844482421875,
      "learning_rate": 9.096849735222975e-05,
      "loss": 3.0829,
      "step": 171738
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.846602201461792,
      "learning_rate": 9.096556324009383e-05,
      "loss": 3.0358,
      "step": 171739
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.2463154792785645,
      "learning_rate": 9.09626291668215e-05,
      "loss": 3.0961,
      "step": 171740
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8206751346588135,
      "learning_rate": 9.095969513241309e-05,
      "loss": 2.9743,
      "step": 171741
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.2736661434173584,
      "learning_rate": 9.095676113686927e-05,
      "loss": 3.0244,
      "step": 171742
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.5452818870544434,
      "learning_rate": 9.095382718019043e-05,
      "loss": 2.8486,
      "step": 171743
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0544235706329346,
      "learning_rate": 9.09508932623773e-05,
      "loss": 3.0214,
      "step": 171744
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9253365993499756,
      "learning_rate": 9.094795938343025e-05,
      "loss": 2.5723,
      "step": 171745
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7766261100769043,
      "learning_rate": 9.094502554335003e-05,
      "loss": 2.92,
      "step": 171746
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.380851984024048,
      "learning_rate": 9.094209174213706e-05,
      "loss": 2.8161,
      "step": 171747
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.6581990718841553,
      "learning_rate": 9.09391579797919e-05,
      "loss": 2.911,
      "step": 171748
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.3831048011779785,
      "learning_rate": 9.093622425631503e-05,
      "loss": 2.9032,
      "step": 171749
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.90179181098938,
      "learning_rate": 9.093329057170715e-05,
      "loss": 2.6265,
      "step": 171750
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6433823108673096,
      "learning_rate": 9.093035692596861e-05,
      "loss": 3.0473,
      "step": 171751
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0135884284973145,
      "learning_rate": 9.092742331910016e-05,
      "loss": 2.9294,
      "step": 171752
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.391927480697632,
      "learning_rate": 9.092448975110217e-05,
      "loss": 2.9489,
      "step": 171753
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.698580265045166,
      "learning_rate": 9.092155622197532e-05,
      "loss": 2.8811,
      "step": 171754
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6948578357696533,
      "learning_rate": 9.091862273172015e-05,
      "loss": 2.971,
      "step": 171755
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.2791748046875,
      "learning_rate": 9.091568928033712e-05,
      "loss": 2.799,
      "step": 171756
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.411770820617676,
      "learning_rate": 9.091275586782672e-05,
      "loss": 2.876,
      "step": 171757
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.995978593826294,
      "learning_rate": 9.090982249418969e-05,
      "loss": 2.9627,
      "step": 171758
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.277940273284912,
      "learning_rate": 9.090688915942637e-05,
      "loss": 3.0516,
      "step": 171759
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6549198627471924,
      "learning_rate": 9.090395586353749e-05,
      "loss": 3.1368,
      "step": 171760
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5233712196350098,
      "learning_rate": 9.090102260652354e-05,
      "loss": 2.8287,
      "step": 171761
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.2900562286376953,
      "learning_rate": 9.089808938838491e-05,
      "loss": 2.8272,
      "step": 171762
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5267343521118164,
      "learning_rate": 9.089515620912238e-05,
      "loss": 2.9606,
      "step": 171763
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.31968355178833,
      "learning_rate": 9.08922230687364e-05,
      "loss": 3.043,
      "step": 171764
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.421186923980713,
      "learning_rate": 9.08892899672274e-05,
      "loss": 2.836,
      "step": 171765
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6599950790405273,
      "learning_rate": 9.088635690459612e-05,
      "loss": 2.9056,
      "step": 171766
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.2801432609558105,
      "learning_rate": 9.088342388084299e-05,
      "loss": 2.8755,
      "step": 171767
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0709500312805176,
      "learning_rate": 9.088049089596851e-05,
      "loss": 2.6152,
      "step": 171768
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.139359951019287,
      "learning_rate": 9.087755794997342e-05,
      "loss": 3.1028,
      "step": 171769
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5990047454833984,
      "learning_rate": 9.087462504285807e-05,
      "loss": 2.9921,
      "step": 171770
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.534207582473755,
      "learning_rate": 9.087169217462303e-05,
      "loss": 2.9771,
      "step": 171771
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9745898246765137,
      "learning_rate": 9.086875934526895e-05,
      "loss": 2.9054,
      "step": 171772
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.509502649307251,
      "learning_rate": 9.086582655479635e-05,
      "loss": 2.8615,
      "step": 171773
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9796488285064697,
      "learning_rate": 9.086289380320564e-05,
      "loss": 3.0191,
      "step": 171774
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.334327459335327,
      "learning_rate": 9.085996109049756e-05,
      "loss": 2.8915,
      "step": 171775
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.240939140319824,
      "learning_rate": 9.085702841667244e-05,
      "loss": 2.989,
      "step": 171776
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.625774383544922,
      "learning_rate": 9.085409578173108e-05,
      "loss": 3.0162,
      "step": 171777
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3340048789978027,
      "learning_rate": 9.085116318567386e-05,
      "loss": 2.5744,
      "step": 171778
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.658994674682617,
      "learning_rate": 9.084823062850136e-05,
      "loss": 3.0233,
      "step": 171779
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.2311153411865234,
      "learning_rate": 9.084529811021406e-05,
      "loss": 3.0045,
      "step": 171780
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.282118320465088,
      "learning_rate": 9.084236563081262e-05,
      "loss": 2.9796,
      "step": 171781
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9525012969970703,
      "learning_rate": 9.083943319029748e-05,
      "loss": 3.2465,
      "step": 171782
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.34946870803833,
      "learning_rate": 9.083650078866929e-05,
      "loss": 3.123,
      "step": 171783
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.010511636734009,
      "learning_rate": 9.083356842592856e-05,
      "loss": 2.8155,
      "step": 171784
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0620484352111816,
      "learning_rate": 9.083063610207582e-05,
      "loss": 3.1594,
      "step": 171785
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.607860565185547,
      "learning_rate": 9.082770381711151e-05,
      "loss": 2.6355,
      "step": 171786
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.775179624557495,
      "learning_rate": 9.08247715710364e-05,
      "loss": 2.9844,
      "step": 171787
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5629079341888428,
      "learning_rate": 9.082183936385078e-05,
      "loss": 2.9809,
      "step": 171788
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9044406414031982,
      "learning_rate": 9.081890719555545e-05,
      "loss": 2.6301,
      "step": 171789
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.5997586250305176,
      "learning_rate": 9.081597506615083e-05,
      "loss": 2.6171,
      "step": 171790
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6912076473236084,
      "learning_rate": 9.081304297563745e-05,
      "loss": 2.9126,
      "step": 171791
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.1545073986053467,
      "learning_rate": 9.081011092401583e-05,
      "loss": 2.8251,
      "step": 171792
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.618840217590332,
      "learning_rate": 9.080717891128658e-05,
      "loss": 2.7959,
      "step": 171793
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5636935234069824,
      "learning_rate": 9.08042469374502e-05,
      "loss": 3.0158,
      "step": 171794
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.5064451694488525,
      "learning_rate": 9.08013150025073e-05,
      "loss": 3.037,
      "step": 171795
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9641053676605225,
      "learning_rate": 9.079838310645839e-05,
      "loss": 3.2361,
      "step": 171796
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.476384162902832,
      "learning_rate": 9.079545124930404e-05,
      "loss": 3.0526,
      "step": 171797
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5901412963867188,
      "learning_rate": 9.079251943104468e-05,
      "loss": 2.8233,
      "step": 171798
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6140029430389404,
      "learning_rate": 9.0789587651681e-05,
      "loss": 2.9677,
      "step": 171799
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.697312116622925,
      "learning_rate": 9.07866559112134e-05,
      "loss": 2.8709,
      "step": 171800
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.685418128967285,
      "learning_rate": 9.078372420964259e-05,
      "loss": 2.7988,
      "step": 171801
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.528095006942749,
      "learning_rate": 9.078079254696907e-05,
      "loss": 3.0779,
      "step": 171802
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8678250312805176,
      "learning_rate": 9.07778609231933e-05,
      "loss": 2.7702,
      "step": 171803
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.092710256576538,
      "learning_rate": 9.077492933831581e-05,
      "loss": 2.9488,
      "step": 171804
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4714343547821045,
      "learning_rate": 9.077199779233731e-05,
      "loss": 3.0195,
      "step": 171805
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9995505809783936,
      "learning_rate": 9.076906628525816e-05,
      "loss": 2.9753,
      "step": 171806
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4941458702087402,
      "learning_rate": 9.076613481707905e-05,
      "loss": 2.7878,
      "step": 171807
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.945228099822998,
      "learning_rate": 9.076320338780051e-05,
      "loss": 2.7779,
      "step": 171808
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.05621337890625,
      "learning_rate": 9.0760271997423e-05,
      "loss": 2.7628,
      "step": 171809
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.6991829872131348,
      "learning_rate": 9.075734064594702e-05,
      "loss": 2.9141,
      "step": 171810
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.117260932922363,
      "learning_rate": 9.075440933337331e-05,
      "loss": 2.8418,
      "step": 171811
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5258736610412598,
      "learning_rate": 9.075147805970219e-05,
      "loss": 3.0688,
      "step": 171812
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.4586753845214844,
      "learning_rate": 9.074854682493445e-05,
      "loss": 2.8074,
      "step": 171813
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9241716861724854,
      "learning_rate": 9.074561562907048e-05,
      "loss": 2.9171,
      "step": 171814
    },
    {
      "epoch": 2.24,
      "grad_norm": 5.163616180419922,
      "learning_rate": 9.074268447211083e-05,
      "loss": 2.6806,
      "step": 171815
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.68489933013916,
      "learning_rate": 9.073975335405599e-05,
      "loss": 3.0053,
      "step": 171816
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8218576908111572,
      "learning_rate": 9.07368222749067e-05,
      "loss": 3.2652,
      "step": 171817
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8346288204193115,
      "learning_rate": 9.073389123466325e-05,
      "loss": 2.8686,
      "step": 171818
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5405967235565186,
      "learning_rate": 9.073096023332644e-05,
      "loss": 3.0375,
      "step": 171819
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.4283576011657715,
      "learning_rate": 9.072802927089658e-05,
      "loss": 3.0811,
      "step": 171820
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.575979232788086,
      "learning_rate": 9.072509834737454e-05,
      "loss": 2.8361,
      "step": 171821
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6968002319335938,
      "learning_rate": 9.072216746276046e-05,
      "loss": 2.8536,
      "step": 171822
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8012001514434814,
      "learning_rate": 9.071923661705517e-05,
      "loss": 2.9799,
      "step": 171823
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.053989887237549,
      "learning_rate": 9.071630581025901e-05,
      "loss": 2.9945,
      "step": 171824
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.2981364727020264,
      "learning_rate": 9.071337504237277e-05,
      "loss": 2.9685,
      "step": 171825
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0296597480773926,
      "learning_rate": 9.071044431339673e-05,
      "loss": 2.8242,
      "step": 171826
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.849200963973999,
      "learning_rate": 9.070751362333181e-05,
      "loss": 2.7261,
      "step": 171827
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4290339946746826,
      "learning_rate": 9.070458297217809e-05,
      "loss": 3.3011,
      "step": 171828
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.413471221923828,
      "learning_rate": 9.070165235993644e-05,
      "loss": 2.9688,
      "step": 171829
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8083598613739014,
      "learning_rate": 9.06987217866072e-05,
      "loss": 2.9158,
      "step": 171830
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.254577159881592,
      "learning_rate": 9.06957912521911e-05,
      "loss": 2.7664,
      "step": 171831
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.488426446914673,
      "learning_rate": 9.069286075668853e-05,
      "loss": 3.1487,
      "step": 171832
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6727819442749023,
      "learning_rate": 9.068993030010031e-05,
      "loss": 2.9958,
      "step": 171833
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.388289451599121,
      "learning_rate": 9.068699988242659e-05,
      "loss": 2.9249,
      "step": 171834
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8895936012268066,
      "learning_rate": 9.068406950366818e-05,
      "loss": 3.2219,
      "step": 171835
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.353731870651245,
      "learning_rate": 9.068113916382546e-05,
      "loss": 2.9367,
      "step": 171836
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6030070781707764,
      "learning_rate": 9.06782088628992e-05,
      "loss": 2.9892,
      "step": 171837
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.337641477584839,
      "learning_rate": 9.06752786008897e-05,
      "loss": 3.1401,
      "step": 171838
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9003968238830566,
      "learning_rate": 9.067234837779768e-05,
      "loss": 2.9816,
      "step": 171839
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7983641624450684,
      "learning_rate": 9.066941819362366e-05,
      "loss": 2.9996,
      "step": 171840
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9113075733184814,
      "learning_rate": 9.066648804836812e-05,
      "loss": 2.9571,
      "step": 171841
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6316158771514893,
      "learning_rate": 9.066355794203153e-05,
      "loss": 3.0798,
      "step": 171842
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.293504476547241,
      "learning_rate": 9.066062787461466e-05,
      "loss": 2.9446,
      "step": 171843
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9934988021850586,
      "learning_rate": 9.065769784611784e-05,
      "loss": 2.8219,
      "step": 171844
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.863253116607666,
      "learning_rate": 9.065476785654178e-05,
      "loss": 2.6824,
      "step": 171845
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.2920870780944824,
      "learning_rate": 9.065183790588695e-05,
      "loss": 3.2425,
      "step": 171846
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.168229103088379,
      "learning_rate": 9.064890799415388e-05,
      "loss": 2.5713,
      "step": 171847
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7217464447021484,
      "learning_rate": 9.064597812134306e-05,
      "loss": 2.7891,
      "step": 171848
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.125002384185791,
      "learning_rate": 9.064304828745519e-05,
      "loss": 2.8225,
      "step": 171849
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.973851442337036,
      "learning_rate": 9.064011849249065e-05,
      "loss": 2.6591,
      "step": 171850
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5760281085968018,
      "learning_rate": 9.063718873645015e-05,
      "loss": 2.5736,
      "step": 171851
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.2873013019561768,
      "learning_rate": 9.063425901933414e-05,
      "loss": 3.113,
      "step": 171852
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.257930278778076,
      "learning_rate": 9.06313293411431e-05,
      "loss": 3.033,
      "step": 171853
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5462844371795654,
      "learning_rate": 9.062839970187772e-05,
      "loss": 2.8814,
      "step": 171854
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.172846555709839,
      "learning_rate": 9.062547010153846e-05,
      "loss": 2.9718,
      "step": 171855
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.954862117767334,
      "learning_rate": 9.06225405401258e-05,
      "loss": 2.9354,
      "step": 171856
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.3546454906463623,
      "learning_rate": 9.061961101764048e-05,
      "loss": 2.8697,
      "step": 171857
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7530176639556885,
      "learning_rate": 9.061668153408291e-05,
      "loss": 2.8327,
      "step": 171858
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5629587173461914,
      "learning_rate": 9.061375208945354e-05,
      "loss": 3.0364,
      "step": 171859
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5981791019439697,
      "learning_rate": 9.061082268375313e-05,
      "loss": 3.1984,
      "step": 171860
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.575753688812256,
      "learning_rate": 9.060789331698204e-05,
      "loss": 3.1113,
      "step": 171861
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.460554361343384,
      "learning_rate": 9.060496398914098e-05,
      "loss": 3.1776,
      "step": 171862
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.912588357925415,
      "learning_rate": 9.060203470023041e-05,
      "loss": 2.7788,
      "step": 171863
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.57167649269104,
      "learning_rate": 9.059910545025086e-05,
      "loss": 2.9155,
      "step": 171864
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.419825553894043,
      "learning_rate": 9.059617623920281e-05,
      "loss": 2.9644,
      "step": 171865
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.708890676498413,
      "learning_rate": 9.059324706708698e-05,
      "loss": 2.943,
      "step": 171866
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.610718011856079,
      "learning_rate": 9.059031793390372e-05,
      "loss": 2.7387,
      "step": 171867
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.211878776550293,
      "learning_rate": 9.058738883965376e-05,
      "loss": 2.8017,
      "step": 171868
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.908747911453247,
      "learning_rate": 9.058445978433757e-05,
      "loss": 3.1944,
      "step": 171869
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.194611072540283,
      "learning_rate": 9.05815307679557e-05,
      "loss": 3.2362,
      "step": 171870
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7656733989715576,
      "learning_rate": 9.057860179050855e-05,
      "loss": 2.7748,
      "step": 171871
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.808173894882202,
      "learning_rate": 9.057567285199689e-05,
      "loss": 2.6825,
      "step": 171872
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8511478900909424,
      "learning_rate": 9.057274395242108e-05,
      "loss": 2.884,
      "step": 171873
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.598884344100952,
      "learning_rate": 9.056981509178184e-05,
      "loss": 2.9137,
      "step": 171874
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.366576671600342,
      "learning_rate": 9.056688627007961e-05,
      "loss": 2.8876,
      "step": 171875
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9055893421173096,
      "learning_rate": 9.056395748731496e-05,
      "loss": 3.0256,
      "step": 171876
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0577163696289062,
      "learning_rate": 9.056102874348835e-05,
      "loss": 2.8884,
      "step": 171877
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.516113042831421,
      "learning_rate": 9.055810003860047e-05,
      "loss": 2.9004,
      "step": 171878
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.763864517211914,
      "learning_rate": 9.055517137265171e-05,
      "loss": 2.8603,
      "step": 171879
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.676649332046509,
      "learning_rate": 9.055224274564278e-05,
      "loss": 3.0006,
      "step": 171880
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.635209560394287,
      "learning_rate": 9.054931415757416e-05,
      "loss": 2.7487,
      "step": 171881
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.987372636795044,
      "learning_rate": 9.054638560844636e-05,
      "loss": 2.7689,
      "step": 171882
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.207212209701538,
      "learning_rate": 9.054345709825984e-05,
      "loss": 2.9088,
      "step": 171883
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8528780937194824,
      "learning_rate": 9.054052862701534e-05,
      "loss": 3.0849,
      "step": 171884
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7128548622131348,
      "learning_rate": 9.053760019471323e-05,
      "loss": 2.8562,
      "step": 171885
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.422276258468628,
      "learning_rate": 9.053467180135422e-05,
      "loss": 2.9935,
      "step": 171886
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.325798749923706,
      "learning_rate": 9.05317434469387e-05,
      "loss": 2.9824,
      "step": 171887
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.498723268508911,
      "learning_rate": 9.052881513146742e-05,
      "loss": 3.1506,
      "step": 171888
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.472543239593506,
      "learning_rate": 9.052588685494063e-05,
      "loss": 3.0283,
      "step": 171889
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9846670627593994,
      "learning_rate": 9.052295861735912e-05,
      "loss": 3.0449,
      "step": 171890
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4558615684509277,
      "learning_rate": 9.052003041872325e-05,
      "loss": 2.849,
      "step": 171891
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.89359188079834,
      "learning_rate": 9.051710225903377e-05,
      "loss": 2.9121,
      "step": 171892
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.2559735774993896,
      "learning_rate": 9.051417413829103e-05,
      "loss": 3.1631,
      "step": 171893
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8055639266967773,
      "learning_rate": 9.051124605649583e-05,
      "loss": 2.9365,
      "step": 171894
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.628530740737915,
      "learning_rate": 9.050831801364836e-05,
      "loss": 2.655,
      "step": 171895
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5735063552856445,
      "learning_rate": 9.050539000974942e-05,
      "loss": 2.9004,
      "step": 171896
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5130698680877686,
      "learning_rate": 9.050246204479942e-05,
      "loss": 3.0079,
      "step": 171897
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6465134620666504,
      "learning_rate": 9.049953411879904e-05,
      "loss": 2.9064,
      "step": 171898
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.562851905822754,
      "learning_rate": 9.049660623174866e-05,
      "loss": 3.0706,
      "step": 171899
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.614410161972046,
      "learning_rate": 9.049367838364909e-05,
      "loss": 3.0948,
      "step": 171900
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3811333179473877,
      "learning_rate": 9.049075057450054e-05,
      "loss": 3.0049,
      "step": 171901
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7789740562438965,
      "learning_rate": 9.048782280430379e-05,
      "loss": 3.0315,
      "step": 171902
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.160900354385376,
      "learning_rate": 9.048489507305922e-05,
      "loss": 2.9578,
      "step": 171903
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8369133472442627,
      "learning_rate": 9.048196738076755e-05,
      "loss": 2.7635,
      "step": 171904
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.609196662902832,
      "learning_rate": 9.047903972742916e-05,
      "loss": 2.8161,
      "step": 171905
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.842052459716797,
      "learning_rate": 9.047611211304487e-05,
      "loss": 2.799,
      "step": 171906
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.120176315307617,
      "learning_rate": 9.047318453761479e-05,
      "loss": 2.9966,
      "step": 171907
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.288787364959717,
      "learning_rate": 9.047025700113982e-05,
      "loss": 3.2028,
      "step": 171908
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.86514949798584,
      "learning_rate": 9.046732950362032e-05,
      "loss": 2.9378,
      "step": 171909
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.104160785675049,
      "learning_rate": 9.046440204505696e-05,
      "loss": 2.9528,
      "step": 171910
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4916083812713623,
      "learning_rate": 9.046147462545013e-05,
      "loss": 3.0129,
      "step": 171911
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.655163049697876,
      "learning_rate": 9.045854724480066e-05,
      "loss": 3.1921,
      "step": 171912
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.4777026176452637,
      "learning_rate": 9.045561990310873e-05,
      "loss": 3.0077,
      "step": 171913
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.394915580749512,
      "learning_rate": 9.045269260037512e-05,
      "loss": 2.6219,
      "step": 171914
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.4064576625823975,
      "learning_rate": 9.044976533660025e-05,
      "loss": 3.0482,
      "step": 171915
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5623505115509033,
      "learning_rate": 9.044683811178477e-05,
      "loss": 3.0385,
      "step": 171916
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.577634572982788,
      "learning_rate": 9.044391092592913e-05,
      "loss": 3.1681,
      "step": 171917
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.3470921516418457,
      "learning_rate": 9.044098377903409e-05,
      "loss": 2.9725,
      "step": 171918
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7333436012268066,
      "learning_rate": 9.043805667109982e-05,
      "loss": 2.8682,
      "step": 171919
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.601142406463623,
      "learning_rate": 9.043512960212719e-05,
      "loss": 3.0512,
      "step": 171920
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.594916582107544,
      "learning_rate": 9.043220257211656e-05,
      "loss": 2.8658,
      "step": 171921
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.2479259967803955,
      "learning_rate": 9.042927558106858e-05,
      "loss": 2.8427,
      "step": 171922
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4653584957122803,
      "learning_rate": 9.042634862898368e-05,
      "loss": 2.8709,
      "step": 171923
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3540139198303223,
      "learning_rate": 9.042342171586257e-05,
      "loss": 2.9644,
      "step": 171924
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.215440034866333,
      "learning_rate": 9.04204948417057e-05,
      "loss": 3.0112,
      "step": 171925
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.522261381149292,
      "learning_rate": 9.041756800651363e-05,
      "loss": 2.9478,
      "step": 171926
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.1832871437072754,
      "learning_rate": 9.041464121028679e-05,
      "loss": 2.9452,
      "step": 171927
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.1673731803894043,
      "learning_rate": 9.04117144530259e-05,
      "loss": 2.9038,
      "step": 171928
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.728633403778076,
      "learning_rate": 9.040878773473136e-05,
      "loss": 2.8292,
      "step": 171929
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.255248546600342,
      "learning_rate": 9.040586105540387e-05,
      "loss": 2.9644,
      "step": 171930
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6945314407348633,
      "learning_rate": 9.040293441504388e-05,
      "loss": 3.1099,
      "step": 171931
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6385159492492676,
      "learning_rate": 9.040000781365195e-05,
      "loss": 3.0854,
      "step": 171932
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.727837562561035,
      "learning_rate": 9.039708125122852e-05,
      "loss": 2.9605,
      "step": 171933
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6154050827026367,
      "learning_rate": 9.039415472777431e-05,
      "loss": 3.0908,
      "step": 171934
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9006381034851074,
      "learning_rate": 9.03912282432897e-05,
      "loss": 3.1466,
      "step": 171935
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.480046272277832,
      "learning_rate": 9.038830179777543e-05,
      "loss": 3.1338,
      "step": 171936
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8130784034729004,
      "learning_rate": 9.038537539123189e-05,
      "loss": 2.9437,
      "step": 171937
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5726258754730225,
      "learning_rate": 9.038244902365959e-05,
      "loss": 2.8547,
      "step": 171938
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.602538585662842,
      "learning_rate": 9.037952269505926e-05,
      "loss": 2.903,
      "step": 171939
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.270172357559204,
      "learning_rate": 9.037659640543131e-05,
      "loss": 2.9494,
      "step": 171940
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.724475622177124,
      "learning_rate": 9.037367015477622e-05,
      "loss": 2.9511,
      "step": 171941
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.843567132949829,
      "learning_rate": 9.037074394309473e-05,
      "loss": 3.0551,
      "step": 171942
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.078517198562622,
      "learning_rate": 9.036781777038725e-05,
      "loss": 3.0883,
      "step": 171943
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.789128065109253,
      "learning_rate": 9.036489163665427e-05,
      "loss": 2.8735,
      "step": 171944
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.9475793838500977,
      "learning_rate": 9.036196554189652e-05,
      "loss": 2.9553,
      "step": 171945
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7582738399505615,
      "learning_rate": 9.035903948611431e-05,
      "loss": 3.3971,
      "step": 171946
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.629077911376953,
      "learning_rate": 9.035611346930843e-05,
      "loss": 2.7339,
      "step": 171947
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.911311149597168,
      "learning_rate": 9.03531874914793e-05,
      "loss": 2.8655,
      "step": 171948
    },
    {
      "epoch": 2.24,
      "grad_norm": 5.25846529006958,
      "learning_rate": 9.035026155262748e-05,
      "loss": 2.9228,
      "step": 171949
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.422485589981079,
      "learning_rate": 9.03473356527534e-05,
      "loss": 3.1504,
      "step": 171950
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.1642212867736816,
      "learning_rate": 9.034440979185781e-05,
      "loss": 2.9341,
      "step": 171951
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.108124256134033,
      "learning_rate": 9.034148396994106e-05,
      "loss": 2.942,
      "step": 171952
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8066933155059814,
      "learning_rate": 9.033855818700388e-05,
      "loss": 3.2136,
      "step": 171953
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9003055095672607,
      "learning_rate": 9.03356324430467e-05,
      "loss": 2.9663,
      "step": 171954
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7539374828338623,
      "learning_rate": 9.033270673807009e-05,
      "loss": 2.9005,
      "step": 171955
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.400406837463379,
      "learning_rate": 9.032978107207451e-05,
      "loss": 2.9114,
      "step": 171956
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4023964405059814,
      "learning_rate": 9.032685544506067e-05,
      "loss": 3.0396,
      "step": 171957
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7810912132263184,
      "learning_rate": 9.032392985702894e-05,
      "loss": 2.6253,
      "step": 171958
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.449835777282715,
      "learning_rate": 9.032100430798002e-05,
      "loss": 3.124,
      "step": 171959
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3714182376861572,
      "learning_rate": 9.031807879791428e-05,
      "loss": 3.0566,
      "step": 171960
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4189374446868896,
      "learning_rate": 9.031515332683257e-05,
      "loss": 2.6795,
      "step": 171961
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5366897583007812,
      "learning_rate": 9.031222789473506e-05,
      "loss": 2.8575,
      "step": 171962
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3612232208251953,
      "learning_rate": 9.030930250162255e-05,
      "loss": 2.9456,
      "step": 171963
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.420564889907837,
      "learning_rate": 9.030637714749538e-05,
      "loss": 2.6224,
      "step": 171964
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.457937717437744,
      "learning_rate": 9.030345183235436e-05,
      "loss": 3.1463,
      "step": 171965
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.441141128540039,
      "learning_rate": 9.030052655619975e-05,
      "loss": 3.1105,
      "step": 171966
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4222147464752197,
      "learning_rate": 9.029760131903242e-05,
      "loss": 2.884,
      "step": 171967
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.499842405319214,
      "learning_rate": 9.029467612085254e-05,
      "loss": 3.0086,
      "step": 171968
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4140641689300537,
      "learning_rate": 9.029175096166093e-05,
      "loss": 3.1815,
      "step": 171969
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4449703693389893,
      "learning_rate": 9.028882584145798e-05,
      "loss": 3.2081,
      "step": 171970
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.2588741779327393,
      "learning_rate": 9.028590076024437e-05,
      "loss": 3.1712,
      "step": 171971
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.3928134441375732,
      "learning_rate": 9.028297571802047e-05,
      "loss": 2.9585,
      "step": 171972
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5385639667510986,
      "learning_rate": 9.028005071478712e-05,
      "loss": 2.7765,
      "step": 171973
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.49749493598938,
      "learning_rate": 9.027712575054446e-05,
      "loss": 2.7638,
      "step": 171974
    },
    {
      "epoch": 2.24,
      "grad_norm": 5.368139266967773,
      "learning_rate": 9.027420082529334e-05,
      "loss": 2.6486,
      "step": 171975
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.6049964427948,
      "learning_rate": 9.02712759390341e-05,
      "loss": 3.0392,
      "step": 171976
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.486150026321411,
      "learning_rate": 9.026835109176753e-05,
      "loss": 2.9014,
      "step": 171977
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.3534538745880127,
      "learning_rate": 9.026542628349391e-05,
      "loss": 2.9703,
      "step": 171978
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5942177772521973,
      "learning_rate": 9.026250151421408e-05,
      "loss": 2.9463,
      "step": 171979
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.051203489303589,
      "learning_rate": 9.025957678392825e-05,
      "loss": 2.7036,
      "step": 171980
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.332850694656372,
      "learning_rate": 9.025665209263721e-05,
      "loss": 2.8764,
      "step": 171981
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.638524055480957,
      "learning_rate": 9.02537274403413e-05,
      "loss": 2.8518,
      "step": 171982
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.560894727706909,
      "learning_rate": 9.02508028270413e-05,
      "loss": 3.1518,
      "step": 171983
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4031224250793457,
      "learning_rate": 9.024787825273752e-05,
      "loss": 2.8427,
      "step": 171984
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.8505780696868896,
      "learning_rate": 9.024495371743083e-05,
      "loss": 3.2777,
      "step": 171985
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.639527320861816,
      "learning_rate": 9.024202922112135e-05,
      "loss": 2.9946,
      "step": 171986
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6502530574798584,
      "learning_rate": 9.023910476380996e-05,
      "loss": 3.1087,
      "step": 171987
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.528132915496826,
      "learning_rate": 9.023618034549695e-05,
      "loss": 3.2023,
      "step": 171988
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8222641944885254,
      "learning_rate": 9.023325596618312e-05,
      "loss": 2.9556,
      "step": 171989
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6555755138397217,
      "learning_rate": 9.023033162586878e-05,
      "loss": 2.699,
      "step": 171990
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.043043851852417,
      "learning_rate": 9.022740732455478e-05,
      "loss": 2.9359,
      "step": 171991
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.1102731227874756,
      "learning_rate": 9.022448306224129e-05,
      "loss": 2.9485,
      "step": 171992
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6563971042633057,
      "learning_rate": 9.022155883892909e-05,
      "loss": 3.0322,
      "step": 171993
    },
    {
      "epoch": 2.24,
      "grad_norm": 1.988998532295227,
      "learning_rate": 9.021863465461855e-05,
      "loss": 3.1085,
      "step": 171994
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.800415277481079,
      "learning_rate": 9.021571050931047e-05,
      "loss": 3.1436,
      "step": 171995
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.560161590576172,
      "learning_rate": 9.021278640300514e-05,
      "loss": 3.025,
      "step": 171996
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.531620502471924,
      "learning_rate": 9.020986233570339e-05,
      "loss": 3.0623,
      "step": 171997
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4242353439331055,
      "learning_rate": 9.02069383074054e-05,
      "loss": 2.9306,
      "step": 171998
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.1826415061950684,
      "learning_rate": 9.020401431811201e-05,
      "loss": 3.3284,
      "step": 171999
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.1063296794891357,
      "learning_rate": 9.020109036782355e-05,
      "loss": 2.9448,
      "step": 172000
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.644246816635132,
      "learning_rate": 9.019816645654075e-05,
      "loss": 2.9384,
      "step": 172001
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3355305194854736,
      "learning_rate": 9.0195242584264e-05,
      "loss": 2.9464,
      "step": 172002
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.741081953048706,
      "learning_rate": 9.019231875099406e-05,
      "loss": 3.1836,
      "step": 172003
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3752613067626953,
      "learning_rate": 9.018939495673119e-05,
      "loss": 2.7892,
      "step": 172004
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9293038845062256,
      "learning_rate": 9.018647120147613e-05,
      "loss": 2.8855,
      "step": 172005
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6168389320373535,
      "learning_rate": 9.018354748522929e-05,
      "loss": 2.9005,
      "step": 172006
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.8223373889923096,
      "learning_rate": 9.018062380799138e-05,
      "loss": 2.9389,
      "step": 172007
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5925216674804688,
      "learning_rate": 9.017770016976277e-05,
      "loss": 3.1816,
      "step": 172008
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.519125461578369,
      "learning_rate": 9.017477657054425e-05,
      "loss": 3.0848,
      "step": 172009
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.2820165157318115,
      "learning_rate": 9.017185301033603e-05,
      "loss": 2.6988,
      "step": 172010
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.4980928897857666,
      "learning_rate": 9.016892948913893e-05,
      "loss": 2.7951,
      "step": 172011
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.487290620803833,
      "learning_rate": 9.016600600695327e-05,
      "loss": 2.7784,
      "step": 172012
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.729562759399414,
      "learning_rate": 9.016308256377978e-05,
      "loss": 2.9646,
      "step": 172013
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0301060676574707,
      "learning_rate": 9.01601591596189e-05,
      "loss": 3.1697,
      "step": 172014
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.458327054977417,
      "learning_rate": 9.015723579447125e-05,
      "loss": 2.7422,
      "step": 172015
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.734562397003174,
      "learning_rate": 9.015431246833736e-05,
      "loss": 3.1548,
      "step": 172016
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.7798399925231934,
      "learning_rate": 9.015138918121775e-05,
      "loss": 3.1217,
      "step": 172017
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.217095136642456,
      "learning_rate": 9.014846593311283e-05,
      "loss": 3.008,
      "step": 172018
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.631059646606445,
      "learning_rate": 9.014554272402341e-05,
      "loss": 2.9907,
      "step": 172019
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5204684734344482,
      "learning_rate": 9.014261955394977e-05,
      "loss": 2.9326,
      "step": 172020
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0343587398529053,
      "learning_rate": 9.013969642289267e-05,
      "loss": 3.2131,
      "step": 172021
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.497673988342285,
      "learning_rate": 9.013677333085258e-05,
      "loss": 2.9829,
      "step": 172022
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8727338314056396,
      "learning_rate": 9.013385027782993e-05,
      "loss": 2.7263,
      "step": 172023
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.1940665245056152,
      "learning_rate": 9.013092726382545e-05,
      "loss": 2.7046,
      "step": 172024
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.226318359375,
      "learning_rate": 9.012800428883956e-05,
      "loss": 2.8744,
      "step": 172025
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.262892484664917,
      "learning_rate": 9.012508135287276e-05,
      "loss": 2.8679,
      "step": 172026
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5226387977600098,
      "learning_rate": 9.012215845592579e-05,
      "loss": 2.9282,
      "step": 172027
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.7811970710754395,
      "learning_rate": 9.011923559799906e-05,
      "loss": 2.9851,
      "step": 172028
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.015787124633789,
      "learning_rate": 9.011631277909302e-05,
      "loss": 2.8042,
      "step": 172029
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.579185962677002,
      "learning_rate": 9.01133899992084e-05,
      "loss": 3.0726,
      "step": 172030
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8062684535980225,
      "learning_rate": 9.01104672583457e-05,
      "loss": 2.9832,
      "step": 172031
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6290640830993652,
      "learning_rate": 9.010754455650533e-05,
      "loss": 2.6669,
      "step": 172032
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.439065933227539,
      "learning_rate": 9.0104621893688e-05,
      "loss": 2.9116,
      "step": 172033
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5115644931793213,
      "learning_rate": 9.01016992698942e-05,
      "loss": 2.9678,
      "step": 172034
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.7726266384124756,
      "learning_rate": 9.009877668512433e-05,
      "loss": 2.82,
      "step": 172035
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.8288722038269043,
      "learning_rate": 9.009585413937919e-05,
      "loss": 2.7625,
      "step": 172036
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0421042442321777,
      "learning_rate": 9.00929316326591e-05,
      "loss": 2.9426,
      "step": 172037
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6692168712615967,
      "learning_rate": 9.009000916496475e-05,
      "loss": 2.9885,
      "step": 172038
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.66963529586792,
      "learning_rate": 9.008708673629666e-05,
      "loss": 2.7231,
      "step": 172039
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0420706272125244,
      "learning_rate": 9.008416434665534e-05,
      "loss": 2.8137,
      "step": 172040
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9238088130950928,
      "learning_rate": 9.008124199604124e-05,
      "loss": 2.7946,
      "step": 172041
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.930232048034668,
      "learning_rate": 9.00783196844551e-05,
      "loss": 2.7559,
      "step": 172042
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.188205718994141,
      "learning_rate": 9.007539741189725e-05,
      "loss": 2.7867,
      "step": 172043
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.495854377746582,
      "learning_rate": 9.007247517836847e-05,
      "loss": 2.8286,
      "step": 172044
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.895249128341675,
      "learning_rate": 9.006955298386906e-05,
      "loss": 3.3041,
      "step": 172045
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.8499886989593506,
      "learning_rate": 9.00666308283999e-05,
      "loss": 3.0462,
      "step": 172046
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.1338589191436768,
      "learning_rate": 9.00637087119611e-05,
      "loss": 3.299,
      "step": 172047
    },
    {
      "epoch": 2.24,
      "grad_norm": 6.199134826660156,
      "learning_rate": 9.006078663455353e-05,
      "loss": 2.9322,
      "step": 172048
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.951408863067627,
      "learning_rate": 9.00578645961775e-05,
      "loss": 2.6888,
      "step": 172049
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.581540822982788,
      "learning_rate": 9.005494259683381e-05,
      "loss": 2.9587,
      "step": 172050
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9147427082061768,
      "learning_rate": 9.005202063652273e-05,
      "loss": 2.8493,
      "step": 172051
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.258579969406128,
      "learning_rate": 9.004909871524517e-05,
      "loss": 3.0064,
      "step": 172052
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.218562602996826,
      "learning_rate": 9.004617683300124e-05,
      "loss": 2.9829,
      "step": 172053
    },
    {
      "epoch": 2.24,
      "grad_norm": 5.515552997589111,
      "learning_rate": 9.00432549897918e-05,
      "loss": 2.8213,
      "step": 172054
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.392313480377197,
      "learning_rate": 9.004033318561717e-05,
      "loss": 3.1253,
      "step": 172055
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7844226360321045,
      "learning_rate": 9.00374114204781e-05,
      "loss": 3.1434,
      "step": 172056
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.717170000076294,
      "learning_rate": 9.003448969437495e-05,
      "loss": 2.9169,
      "step": 172057
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.561342477798462,
      "learning_rate": 9.003156800730853e-05,
      "loss": 2.8945,
      "step": 172058
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.576840400695801,
      "learning_rate": 9.002864635927902e-05,
      "loss": 3.0825,
      "step": 172059
    },
    {
      "epoch": 2.24,
      "grad_norm": 7.915121555328369,
      "learning_rate": 9.002572475028726e-05,
      "loss": 2.8121,
      "step": 172060
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.651339054107666,
      "learning_rate": 9.002280318033359e-05,
      "loss": 2.908,
      "step": 172061
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.8475022315979004,
      "learning_rate": 9.001988164941871e-05,
      "loss": 2.71,
      "step": 172062
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.971569538116455,
      "learning_rate": 9.001696015754302e-05,
      "loss": 2.8314,
      "step": 172063
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.1233413219451904,
      "learning_rate": 9.001403870470734e-05,
      "loss": 2.9079,
      "step": 172064
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.8084847927093506,
      "learning_rate": 9.00111172909118e-05,
      "loss": 3.1694,
      "step": 172065
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9272470474243164,
      "learning_rate": 9.000819591615725e-05,
      "loss": 2.9817,
      "step": 172066
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.378145217895508,
      "learning_rate": 9.000527458044405e-05,
      "loss": 2.8513,
      "step": 172067
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.923633575439453,
      "learning_rate": 9.000235328377296e-05,
      "loss": 3.0998,
      "step": 172068
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.2779176235198975,
      "learning_rate": 8.999943202614426e-05,
      "loss": 2.9215,
      "step": 172069
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.45573091506958,
      "learning_rate": 8.999651080755886e-05,
      "loss": 2.9045,
      "step": 172070
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9438679218292236,
      "learning_rate": 8.999358962801684e-05,
      "loss": 3.0271,
      "step": 172071
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.657680511474609,
      "learning_rate": 8.999066848751911e-05,
      "loss": 2.932,
      "step": 172072
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.8353495597839355,
      "learning_rate": 8.998774738606595e-05,
      "loss": 2.7934,
      "step": 172073
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9475302696228027,
      "learning_rate": 8.998482632365816e-05,
      "loss": 2.8118,
      "step": 172074
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.6269233226776123,
      "learning_rate": 8.998190530029604e-05,
      "loss": 3.1241,
      "step": 172075
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7819063663482666,
      "learning_rate": 8.997898431598045e-05,
      "loss": 2.9083,
      "step": 172076
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.395907163619995,
      "learning_rate": 8.997606337071153e-05,
      "loss": 2.9836,
      "step": 172077
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.9777188301086426,
      "learning_rate": 8.99731424644901e-05,
      "loss": 2.9133,
      "step": 172078
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.328335285186768,
      "learning_rate": 8.997022159731657e-05,
      "loss": 2.8419,
      "step": 172079
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.7096784114837646,
      "learning_rate": 8.99673007691916e-05,
      "loss": 2.9959,
      "step": 172080
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5734944343566895,
      "learning_rate": 8.99643799801156e-05,
      "loss": 3.1126,
      "step": 172081
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7023837566375732,
      "learning_rate": 8.996145923008941e-05,
      "loss": 2.7504,
      "step": 172082
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.601130485534668,
      "learning_rate": 8.99585385191131e-05,
      "loss": 3.0648,
      "step": 172083
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.8230648040771484,
      "learning_rate": 8.995561784718758e-05,
      "loss": 2.8271,
      "step": 172084
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.34952712059021,
      "learning_rate": 8.995269721431317e-05,
      "loss": 3.1978,
      "step": 172085
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.717609167098999,
      "learning_rate": 8.994977662049066e-05,
      "loss": 2.6216,
      "step": 172086
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.2318453788757324,
      "learning_rate": 8.99468560657203e-05,
      "loss": 3.0976,
      "step": 172087
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.341315746307373,
      "learning_rate": 8.99439355500029e-05,
      "loss": 2.8437,
      "step": 172088
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.6576602458953857,
      "learning_rate": 8.99410150733389e-05,
      "loss": 3.0007,
      "step": 172089
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.76576566696167,
      "learning_rate": 8.993809463572882e-05,
      "loss": 3.0177,
      "step": 172090
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.629241943359375,
      "learning_rate": 8.99351742371731e-05,
      "loss": 2.7292,
      "step": 172091
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4494848251342773,
      "learning_rate": 8.993225387767251e-05,
      "loss": 2.5652,
      "step": 172092
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8835372924804688,
      "learning_rate": 8.992933355722741e-05,
      "loss": 2.8721,
      "step": 172093
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.264014482498169,
      "learning_rate": 8.992641327583847e-05,
      "loss": 2.8558,
      "step": 172094
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.4027936458587646,
      "learning_rate": 8.992349303350619e-05,
      "loss": 3.1019,
      "step": 172095
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.982485055923462,
      "learning_rate": 8.992057283023109e-05,
      "loss": 2.8914,
      "step": 172096
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.2222161293029785,
      "learning_rate": 8.991765266601363e-05,
      "loss": 2.7637,
      "step": 172097
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.504598617553711,
      "learning_rate": 8.991473254085455e-05,
      "loss": 2.9754,
      "step": 172098
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5363521575927734,
      "learning_rate": 8.991181245475416e-05,
      "loss": 3.0789,
      "step": 172099
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.734341859817505,
      "learning_rate": 8.990889240771326e-05,
      "loss": 3.0489,
      "step": 172100
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.679136276245117,
      "learning_rate": 8.990597239973226e-05,
      "loss": 2.7745,
      "step": 172101
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7547764778137207,
      "learning_rate": 8.99030524308117e-05,
      "loss": 2.86,
      "step": 172102
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9007556438446045,
      "learning_rate": 8.990013250095203e-05,
      "loss": 2.793,
      "step": 172103
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4690639972686768,
      "learning_rate": 8.989721261015398e-05,
      "loss": 3.0577,
      "step": 172104
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7959859371185303,
      "learning_rate": 8.989429275841788e-05,
      "loss": 2.7828,
      "step": 172105
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.1814217567443848,
      "learning_rate": 8.989137294574454e-05,
      "loss": 3.144,
      "step": 172106
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5275025367736816,
      "learning_rate": 8.988845317213436e-05,
      "loss": 2.9258,
      "step": 172107
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4999754428863525,
      "learning_rate": 8.988553343758776e-05,
      "loss": 2.8375,
      "step": 172108
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8572309017181396,
      "learning_rate": 8.988261374210551e-05,
      "loss": 2.9305,
      "step": 172109
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7744593620300293,
      "learning_rate": 8.987969408568807e-05,
      "loss": 2.8033,
      "step": 172110
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0139482021331787,
      "learning_rate": 8.987677446833582e-05,
      "loss": 2.9678,
      "step": 172111
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.274540424346924,
      "learning_rate": 8.987385489004959e-05,
      "loss": 3.0327,
      "step": 172112
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.21506404876709,
      "learning_rate": 8.987093535082974e-05,
      "loss": 3.1502,
      "step": 172113
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.455962896347046,
      "learning_rate": 8.986801585067678e-05,
      "loss": 2.8106,
      "step": 172114
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5202553272247314,
      "learning_rate": 8.98650963895914e-05,
      "loss": 2.9256,
      "step": 172115
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4785380363464355,
      "learning_rate": 8.986217696757406e-05,
      "loss": 2.9342,
      "step": 172116
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.692466974258423,
      "learning_rate": 8.985925758462524e-05,
      "loss": 3.0406,
      "step": 172117
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.404320001602173,
      "learning_rate": 8.985633824074564e-05,
      "loss": 3.0288,
      "step": 172118
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4163284301757812,
      "learning_rate": 8.985341893593569e-05,
      "loss": 2.9001,
      "step": 172119
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.977156162261963,
      "learning_rate": 8.985049967019587e-05,
      "loss": 3.1076,
      "step": 172120
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5346221923828125,
      "learning_rate": 8.984758044352691e-05,
      "loss": 2.965,
      "step": 172121
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.316070079803467,
      "learning_rate": 8.984466125592917e-05,
      "loss": 2.9404,
      "step": 172122
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.8120594024658203,
      "learning_rate": 8.984174210740336e-05,
      "loss": 2.6271,
      "step": 172123
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.687025308609009,
      "learning_rate": 8.983882299794993e-05,
      "loss": 2.9469,
      "step": 172124
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3161652088165283,
      "learning_rate": 8.983590392756942e-05,
      "loss": 3.1222,
      "step": 172125
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.486990213394165,
      "learning_rate": 8.983298489626229e-05,
      "loss": 2.9808,
      "step": 172126
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.3993594646453857,
      "learning_rate": 8.983006590402929e-05,
      "loss": 2.7543,
      "step": 172127
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.213257789611816,
      "learning_rate": 8.982714695087075e-05,
      "loss": 2.897,
      "step": 172128
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.9376275539398193,
      "learning_rate": 8.982422803678741e-05,
      "loss": 2.9956,
      "step": 172129
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7958290576934814,
      "learning_rate": 8.982130916177968e-05,
      "loss": 2.7901,
      "step": 172130
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5193591117858887,
      "learning_rate": 8.981839032584815e-05,
      "loss": 3.1661,
      "step": 172131
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.2226502895355225,
      "learning_rate": 8.981547152899328e-05,
      "loss": 2.9553,
      "step": 172132
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.527125597000122,
      "learning_rate": 8.981255277121574e-05,
      "loss": 2.7925,
      "step": 172133
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.1563522815704346,
      "learning_rate": 8.980963405251594e-05,
      "loss": 3.0757,
      "step": 172134
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.1831681728363037,
      "learning_rate": 8.980671537289459e-05,
      "loss": 2.8093,
      "step": 172135
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.444624185562134,
      "learning_rate": 8.980379673235204e-05,
      "loss": 3.1215,
      "step": 172136
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.371009349822998,
      "learning_rate": 8.980087813088913e-05,
      "loss": 2.7861,
      "step": 172137
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.2440025806427,
      "learning_rate": 8.979795956850603e-05,
      "loss": 2.9672,
      "step": 172138
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.883547067642212,
      "learning_rate": 8.979504104520353e-05,
      "loss": 2.8661,
      "step": 172139
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.106684923171997,
      "learning_rate": 8.9792122560982e-05,
      "loss": 3.0532,
      "step": 172140
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6594974994659424,
      "learning_rate": 8.978920411584218e-05,
      "loss": 2.9112,
      "step": 172141
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.29374623298645,
      "learning_rate": 8.978628570978443e-05,
      "loss": 2.9784,
      "step": 172142
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.297473669052124,
      "learning_rate": 8.978336734280955e-05,
      "loss": 2.8909,
      "step": 172143
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7424685955047607,
      "learning_rate": 8.978044901491775e-05,
      "loss": 3.1096,
      "step": 172144
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6121299266815186,
      "learning_rate": 8.97775307261098e-05,
      "loss": 2.7095,
      "step": 172145
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.942805290222168,
      "learning_rate": 8.97746124763861e-05,
      "loss": 2.8173,
      "step": 172146
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7641780376434326,
      "learning_rate": 8.977169426574733e-05,
      "loss": 2.9843,
      "step": 172147
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.523331880569458,
      "learning_rate": 8.97687760941939e-05,
      "loss": 2.7931,
      "step": 172148
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.2330098152160645,
      "learning_rate": 8.976585796172663e-05,
      "loss": 3.0214,
      "step": 172149
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.373631477355957,
      "learning_rate": 8.976293986834563e-05,
      "loss": 2.9331,
      "step": 172150
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3675711154937744,
      "learning_rate": 8.97600218140518e-05,
      "loss": 2.9314,
      "step": 172151
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.405116081237793,
      "learning_rate": 8.975710379884543e-05,
      "loss": 3.1078,
      "step": 172152
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6724233627319336,
      "learning_rate": 8.975418582272726e-05,
      "loss": 3.0575,
      "step": 172153
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4495534896850586,
      "learning_rate": 8.97512678856977e-05,
      "loss": 2.9076,
      "step": 172154
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3053653240203857,
      "learning_rate": 8.974834998775743e-05,
      "loss": 2.9033,
      "step": 172155
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.323582172393799,
      "learning_rate": 8.974543212890689e-05,
      "loss": 2.9985,
      "step": 172156
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.615621328353882,
      "learning_rate": 8.974251430914666e-05,
      "loss": 2.9965,
      "step": 172157
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.615764617919922,
      "learning_rate": 8.973959652847717e-05,
      "loss": 2.9904,
      "step": 172158
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.625199794769287,
      "learning_rate": 8.973667878689916e-05,
      "loss": 2.7879,
      "step": 172159
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.3099122047424316,
      "learning_rate": 8.973376108441298e-05,
      "loss": 2.9774,
      "step": 172160
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.626483678817749,
      "learning_rate": 8.973084342101933e-05,
      "loss": 3.043,
      "step": 172161
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.3069605827331543,
      "learning_rate": 8.972792579671871e-05,
      "loss": 2.8134,
      "step": 172162
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.392507553100586,
      "learning_rate": 8.972500821151164e-05,
      "loss": 3.2132,
      "step": 172163
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7416574954986572,
      "learning_rate": 8.972209066539853e-05,
      "loss": 2.8209,
      "step": 172164
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.576260805130005,
      "learning_rate": 8.97191731583802e-05,
      "loss": 2.9227,
      "step": 172165
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6364564895629883,
      "learning_rate": 8.971625569045692e-05,
      "loss": 2.8331,
      "step": 172166
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8275609016418457,
      "learning_rate": 8.971333826162947e-05,
      "loss": 2.9011,
      "step": 172167
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9513328075408936,
      "learning_rate": 8.971042087189824e-05,
      "loss": 3.0955,
      "step": 172168
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6519694328308105,
      "learning_rate": 8.970750352126384e-05,
      "loss": 2.9151,
      "step": 172169
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.804569959640503,
      "learning_rate": 8.97045862097267e-05,
      "loss": 2.841,
      "step": 172170
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9427802562713623,
      "learning_rate": 8.970166893728751e-05,
      "loss": 3.1056,
      "step": 172171
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8267531394958496,
      "learning_rate": 8.969875170394668e-05,
      "loss": 3.2886,
      "step": 172172
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.702974319458008,
      "learning_rate": 8.969583450970491e-05,
      "loss": 2.9343,
      "step": 172173
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.510655164718628,
      "learning_rate": 8.969291735456267e-05,
      "loss": 3.0278,
      "step": 172174
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.6320722103118896,
      "learning_rate": 8.969000023852044e-05,
      "loss": 2.6948,
      "step": 172175
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.906256675720215,
      "learning_rate": 8.968708316157878e-05,
      "loss": 3.0299,
      "step": 172176
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7686243057250977,
      "learning_rate": 8.96841661237383e-05,
      "loss": 3.0369,
      "step": 172177
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8098320960998535,
      "learning_rate": 8.968124912499945e-05,
      "loss": 2.8255,
      "step": 172178
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3395159244537354,
      "learning_rate": 8.967833216536287e-05,
      "loss": 3.1691,
      "step": 172179
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6610360145568848,
      "learning_rate": 8.967541524482909e-05,
      "loss": 2.9648,
      "step": 172180
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.3231678009033203,
      "learning_rate": 8.967249836339864e-05,
      "loss": 2.8062,
      "step": 172181
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.907547950744629,
      "learning_rate": 8.966958152107194e-05,
      "loss": 2.9172,
      "step": 172182
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.103290319442749,
      "learning_rate": 8.966666471784971e-05,
      "loss": 2.9264,
      "step": 172183
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.507024049758911,
      "learning_rate": 8.966374795373231e-05,
      "loss": 2.7192,
      "step": 172184
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.252762794494629,
      "learning_rate": 8.966083122872053e-05,
      "loss": 2.6847,
      "step": 172185
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.2293877601623535,
      "learning_rate": 8.965791454281474e-05,
      "loss": 3.0862,
      "step": 172186
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0935144424438477,
      "learning_rate": 8.965499789601553e-05,
      "loss": 2.8443,
      "step": 172187
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.654792070388794,
      "learning_rate": 8.96520812883233e-05,
      "loss": 2.9877,
      "step": 172188
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.271667242050171,
      "learning_rate": 8.964916471973885e-05,
      "loss": 2.9903,
      "step": 172189
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.622770071029663,
      "learning_rate": 8.964624819026247e-05,
      "loss": 3.057,
      "step": 172190
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.2313499450683594,
      "learning_rate": 8.964333169989491e-05,
      "loss": 2.8445,
      "step": 172191
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4629552364349365,
      "learning_rate": 8.964041524863665e-05,
      "loss": 2.9631,
      "step": 172192
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.2719199657440186,
      "learning_rate": 8.963749883648821e-05,
      "loss": 2.8531,
      "step": 172193
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9477710723876953,
      "learning_rate": 8.963458246345005e-05,
      "loss": 3.3717,
      "step": 172194
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9140703678131104,
      "learning_rate": 8.963166612952286e-05,
      "loss": 3.1343,
      "step": 172195
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.314953327178955,
      "learning_rate": 8.962874983470702e-05,
      "loss": 3.097,
      "step": 172196
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7940118312835693,
      "learning_rate": 8.962583357900326e-05,
      "loss": 2.9128,
      "step": 172197
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5397157669067383,
      "learning_rate": 8.962291736241203e-05,
      "loss": 2.8725,
      "step": 172198
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6156182289123535,
      "learning_rate": 8.96200011849338e-05,
      "loss": 3.0164,
      "step": 172199
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.1728174686431885,
      "learning_rate": 8.961708504656925e-05,
      "loss": 3.0521,
      "step": 172200
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0697784423828125,
      "learning_rate": 8.961416894731885e-05,
      "loss": 3.2153,
      "step": 172201
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.7463033199310303,
      "learning_rate": 8.961125288718308e-05,
      "loss": 2.896,
      "step": 172202
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4146196842193604,
      "learning_rate": 8.960833686616264e-05,
      "loss": 3.0616,
      "step": 172203
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.489393711090088,
      "learning_rate": 8.960542088425797e-05,
      "loss": 3.0479,
      "step": 172204
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.402907133102417,
      "learning_rate": 8.960250494146954e-05,
      "loss": 2.6665,
      "step": 172205
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0383782386779785,
      "learning_rate": 8.959958903779805e-05,
      "loss": 3.0873,
      "step": 172206
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.777526378631592,
      "learning_rate": 8.959667317324387e-05,
      "loss": 3.0431,
      "step": 172207
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6041054725646973,
      "learning_rate": 8.959375734780774e-05,
      "loss": 2.8084,
      "step": 172208
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4793169498443604,
      "learning_rate": 8.95908415614901e-05,
      "loss": 3.0464,
      "step": 172209
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.811147689819336,
      "learning_rate": 8.958792581429153e-05,
      "loss": 2.9822,
      "step": 172210
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.635136127471924,
      "learning_rate": 8.958501010621242e-05,
      "loss": 3.0379,
      "step": 172211
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.52020001411438,
      "learning_rate": 8.958209443725352e-05,
      "loss": 3.0008,
      "step": 172212
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.522360324859619,
      "learning_rate": 8.957917880741518e-05,
      "loss": 3.1333,
      "step": 172213
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6147117614746094,
      "learning_rate": 8.957626321669816e-05,
      "loss": 2.874,
      "step": 172214
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7574713230133057,
      "learning_rate": 8.957334766510285e-05,
      "loss": 2.8975,
      "step": 172215
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.44892954826355,
      "learning_rate": 8.957043215262987e-05,
      "loss": 2.745,
      "step": 172216
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.151796817779541,
      "learning_rate": 8.956751667927959e-05,
      "loss": 3.0509,
      "step": 172217
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6280412673950195,
      "learning_rate": 8.95646012450528e-05,
      "loss": 2.6875,
      "step": 172218
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.333824634552002,
      "learning_rate": 8.95616858499498e-05,
      "loss": 2.9203,
      "step": 172219
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6591243743896484,
      "learning_rate": 8.955877049397137e-05,
      "loss": 2.9677,
      "step": 172220
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.396299123764038,
      "learning_rate": 8.95558551771178e-05,
      "loss": 3.1351,
      "step": 172221
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.064668893814087,
      "learning_rate": 8.955293989938993e-05,
      "loss": 2.9633,
      "step": 172222
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.625771999359131,
      "learning_rate": 8.95500246607881e-05,
      "loss": 2.9461,
      "step": 172223
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8791487216949463,
      "learning_rate": 8.954710946131291e-05,
      "loss": 3.0608,
      "step": 172224
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.176236391067505,
      "learning_rate": 8.954419430096477e-05,
      "loss": 2.8046,
      "step": 172225
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.566422700881958,
      "learning_rate": 8.954127917974446e-05,
      "loss": 3.027,
      "step": 172226
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.124880075454712,
      "learning_rate": 8.953836409765226e-05,
      "loss": 2.9226,
      "step": 172227
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.302464246749878,
      "learning_rate": 8.953544905468898e-05,
      "loss": 3.0466,
      "step": 172228
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.648486614227295,
      "learning_rate": 8.953253405085505e-05,
      "loss": 2.6905,
      "step": 172229
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5583577156066895,
      "learning_rate": 8.952961908615096e-05,
      "loss": 2.9295,
      "step": 172230
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6084182262420654,
      "learning_rate": 8.952670416057722e-05,
      "loss": 2.8841,
      "step": 172231
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7472527027130127,
      "learning_rate": 8.952378927413452e-05,
      "loss": 2.9756,
      "step": 172232
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.536839723587036,
      "learning_rate": 8.952087442682321e-05,
      "loss": 2.9719,
      "step": 172233
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.906827926635742,
      "learning_rate": 8.951795961864406e-05,
      "loss": 2.6816,
      "step": 172234
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5187838077545166,
      "learning_rate": 8.951504484959749e-05,
      "loss": 3.0456,
      "step": 172235
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5192811489105225,
      "learning_rate": 8.951213011968404e-05,
      "loss": 2.7443,
      "step": 172236
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3423774242401123,
      "learning_rate": 8.950921542890417e-05,
      "loss": 2.9132,
      "step": 172237
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.539762258529663,
      "learning_rate": 8.95063007772586e-05,
      "loss": 2.8606,
      "step": 172238
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6048896312713623,
      "learning_rate": 8.95033861647477e-05,
      "loss": 2.9026,
      "step": 172239
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.6606645584106445,
      "learning_rate": 8.950047159137215e-05,
      "loss": 2.8382,
      "step": 172240
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.616100788116455,
      "learning_rate": 8.949755705713246e-05,
      "loss": 2.9177,
      "step": 172241
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.851101875305176,
      "learning_rate": 8.949464256202917e-05,
      "loss": 3.0618,
      "step": 172242
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5880520343780518,
      "learning_rate": 8.949172810606268e-05,
      "loss": 3.1328,
      "step": 172243
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.7907469272613525,
      "learning_rate": 8.948881368923374e-05,
      "loss": 2.8607,
      "step": 172244
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.546341419219971,
      "learning_rate": 8.948589931154273e-05,
      "loss": 2.871,
      "step": 172245
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4229021072387695,
      "learning_rate": 8.948298497299034e-05,
      "loss": 2.713,
      "step": 172246
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.2644505500793457,
      "learning_rate": 8.948007067357705e-05,
      "loss": 2.9931,
      "step": 172247
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8007702827453613,
      "learning_rate": 8.947715641330338e-05,
      "loss": 3.0102,
      "step": 172248
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.981804847717285,
      "learning_rate": 8.947424219216978e-05,
      "loss": 2.943,
      "step": 172249
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.958047389984131,
      "learning_rate": 8.947132801017703e-05,
      "loss": 3.1166,
      "step": 172250
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.876204252243042,
      "learning_rate": 8.94684138673254e-05,
      "loss": 2.9853,
      "step": 172251
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.821071147918701,
      "learning_rate": 8.946549976361566e-05,
      "loss": 3.177,
      "step": 172252
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.493955373764038,
      "learning_rate": 8.946258569904827e-05,
      "loss": 2.8657,
      "step": 172253
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7608180046081543,
      "learning_rate": 8.945967167362374e-05,
      "loss": 3.182,
      "step": 172254
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.759004831314087,
      "learning_rate": 8.945675768734257e-05,
      "loss": 2.8233,
      "step": 172255
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6985881328582764,
      "learning_rate": 8.945384374020543e-05,
      "loss": 2.9506,
      "step": 172256
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.223661422729492,
      "learning_rate": 8.94509298322127e-05,
      "loss": 2.6442,
      "step": 172257
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.727769613265991,
      "learning_rate": 8.944801596336511e-05,
      "loss": 2.9579,
      "step": 172258
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8404414653778076,
      "learning_rate": 8.944510213366312e-05,
      "loss": 2.9579,
      "step": 172259
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.534048080444336,
      "learning_rate": 8.944218834310729e-05,
      "loss": 2.7801,
      "step": 172260
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4705891609191895,
      "learning_rate": 8.943927459169801e-05,
      "loss": 2.9325,
      "step": 172261
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0618631839752197,
      "learning_rate": 8.943636087943601e-05,
      "loss": 3.2643,
      "step": 172262
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9251065254211426,
      "learning_rate": 8.94334472063217e-05,
      "loss": 3.0049,
      "step": 172263
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4472997188568115,
      "learning_rate": 8.943053357235576e-05,
      "loss": 2.8466,
      "step": 172264
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9035346508026123,
      "learning_rate": 8.942761997753869e-05,
      "loss": 2.8769,
      "step": 172265
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.99367094039917,
      "learning_rate": 8.9424706421871e-05,
      "loss": 2.9344,
      "step": 172266
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.920525312423706,
      "learning_rate": 8.942179290535312e-05,
      "loss": 2.8327,
      "step": 172267
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6688125133514404,
      "learning_rate": 8.941887942798579e-05,
      "loss": 2.8533,
      "step": 172268
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5599517822265625,
      "learning_rate": 8.941596598976937e-05,
      "loss": 2.9397,
      "step": 172269
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.293971300125122,
      "learning_rate": 8.941305259070461e-05,
      "loss": 2.9664,
      "step": 172270
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.92372989654541,
      "learning_rate": 8.941013923079193e-05,
      "loss": 2.7933,
      "step": 172271
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.046180486679077,
      "learning_rate": 8.94072259100319e-05,
      "loss": 2.9775,
      "step": 172272
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.5326271057128906,
      "learning_rate": 8.940431262842491e-05,
      "loss": 2.8785,
      "step": 172273
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.56209135055542,
      "learning_rate": 8.940139938597177e-05,
      "loss": 3.1267,
      "step": 172274
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.972609519958496,
      "learning_rate": 8.939848618267275e-05,
      "loss": 3.0731,
      "step": 172275
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.905531883239746,
      "learning_rate": 8.939557301852863e-05,
      "loss": 2.8793,
      "step": 172276
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6273558139801025,
      "learning_rate": 8.939265989353986e-05,
      "loss": 2.849,
      "step": 172277
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.090743541717529,
      "learning_rate": 8.938974680770698e-05,
      "loss": 2.8279,
      "step": 172278
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9327518939971924,
      "learning_rate": 8.938683376103043e-05,
      "loss": 2.928,
      "step": 172279
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6286239624023438,
      "learning_rate": 8.938392075351093e-05,
      "loss": 2.9205,
      "step": 172280
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3999476432800293,
      "learning_rate": 8.93810077851488e-05,
      "loss": 2.9929,
      "step": 172281
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0138635635375977,
      "learning_rate": 8.937809485594486e-05,
      "loss": 3.0897,
      "step": 172282
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.368783950805664,
      "learning_rate": 8.937518196589949e-05,
      "loss": 2.8679,
      "step": 172283
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.21869158744812,
      "learning_rate": 8.937226911501316e-05,
      "loss": 3.0255,
      "step": 172284
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.641477346420288,
      "learning_rate": 8.936935630328662e-05,
      "loss": 3.0546,
      "step": 172285
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.121798515319824,
      "learning_rate": 8.936644353072023e-05,
      "loss": 3.1836,
      "step": 172286
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.581498861312866,
      "learning_rate": 8.936353079731453e-05,
      "loss": 2.7235,
      "step": 172287
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3003454208374023,
      "learning_rate": 8.936061810307023e-05,
      "loss": 2.906,
      "step": 172288
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3140997886657715,
      "learning_rate": 8.935770544798768e-05,
      "loss": 2.9896,
      "step": 172289
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.817819118499756,
      "learning_rate": 8.93547928320676e-05,
      "loss": 3.0181,
      "step": 172290
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.804033041000366,
      "learning_rate": 8.935188025531042e-05,
      "loss": 2.9273,
      "step": 172291
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.866396188735962,
      "learning_rate": 8.934896771771672e-05,
      "loss": 3.0947,
      "step": 172292
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.875715970993042,
      "learning_rate": 8.934605521928693e-05,
      "loss": 2.8853,
      "step": 172293
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.475207567214966,
      "learning_rate": 8.934314276002177e-05,
      "loss": 3.0316,
      "step": 172294
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6552445888519287,
      "learning_rate": 8.934023033992158e-05,
      "loss": 3.1477,
      "step": 172295
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.132084369659424,
      "learning_rate": 8.933731795898716e-05,
      "loss": 2.8426,
      "step": 172296
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7077784538269043,
      "learning_rate": 8.933440561721889e-05,
      "loss": 2.7719,
      "step": 172297
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.5939557552337646,
      "learning_rate": 8.933149331461721e-05,
      "loss": 2.9126,
      "step": 172298
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.078521490097046,
      "learning_rate": 8.93285810511829e-05,
      "loss": 3.1271,
      "step": 172299
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.365445375442505,
      "learning_rate": 8.932566882691639e-05,
      "loss": 2.8524,
      "step": 172300
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5886833667755127,
      "learning_rate": 8.93227566418181e-05,
      "loss": 2.8657,
      "step": 172301
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.9028279781341553,
      "learning_rate": 8.931984449588879e-05,
      "loss": 2.994,
      "step": 172302
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8760182857513428,
      "learning_rate": 8.931693238912889e-05,
      "loss": 2.9604,
      "step": 172303
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9028656482696533,
      "learning_rate": 8.931402032153889e-05,
      "loss": 2.9081,
      "step": 172304
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.859130859375,
      "learning_rate": 8.931110829311947e-05,
      "loss": 2.9606,
      "step": 172305
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7800638675689697,
      "learning_rate": 8.930819630387096e-05,
      "loss": 2.6907,
      "step": 172306
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.214725971221924,
      "learning_rate": 8.930528435379416e-05,
      "loss": 3.1107,
      "step": 172307
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.276670217514038,
      "learning_rate": 8.93023724428895e-05,
      "loss": 2.7286,
      "step": 172308
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5578269958496094,
      "learning_rate": 8.929946057115745e-05,
      "loss": 2.9827,
      "step": 172309
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.807401180267334,
      "learning_rate": 8.929654873859858e-05,
      "loss": 2.9919,
      "step": 172310
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9608688354492188,
      "learning_rate": 8.92936369452135e-05,
      "loss": 2.8513,
      "step": 172311
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4652113914489746,
      "learning_rate": 8.929072519100266e-05,
      "loss": 2.9508,
      "step": 172312
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6249778270721436,
      "learning_rate": 8.928781347596672e-05,
      "loss": 2.8162,
      "step": 172313
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.670396566390991,
      "learning_rate": 8.928490180010617e-05,
      "loss": 2.7823,
      "step": 172314
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.779916286468506,
      "learning_rate": 8.928199016342151e-05,
      "loss": 3.1636,
      "step": 172315
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4652814865112305,
      "learning_rate": 8.927907856591323e-05,
      "loss": 2.8313,
      "step": 172316
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.472973585128784,
      "learning_rate": 8.927616700758202e-05,
      "loss": 2.788,
      "step": 172317
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.37591814994812,
      "learning_rate": 8.927325548842825e-05,
      "loss": 2.8602,
      "step": 172318
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0183231830596924,
      "learning_rate": 8.927034400845268e-05,
      "loss": 2.9735,
      "step": 172319
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.4314496517181396,
      "learning_rate": 8.926743256765572e-05,
      "loss": 2.8963,
      "step": 172320
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.45619535446167,
      "learning_rate": 8.926452116603794e-05,
      "loss": 2.8465,
      "step": 172321
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.2122156620025635,
      "learning_rate": 8.926160980359974e-05,
      "loss": 2.8269,
      "step": 172322
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.7893457412719727,
      "learning_rate": 8.92586984803419e-05,
      "loss": 2.9011,
      "step": 172323
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.3719613552093506,
      "learning_rate": 8.925578719626476e-05,
      "loss": 2.6884,
      "step": 172324
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7537598609924316,
      "learning_rate": 8.925287595136901e-05,
      "loss": 2.9806,
      "step": 172325
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.551056385040283,
      "learning_rate": 8.924996474565515e-05,
      "loss": 2.7609,
      "step": 172326
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8677985668182373,
      "learning_rate": 8.92470535791237e-05,
      "loss": 2.9088,
      "step": 172327
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.029883861541748,
      "learning_rate": 8.92441424517751e-05,
      "loss": 2.8488,
      "step": 172328
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.927110433578491,
      "learning_rate": 8.92412313636101e-05,
      "loss": 2.9626,
      "step": 172329
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.2660210132598877,
      "learning_rate": 8.923832031462903e-05,
      "loss": 2.9092,
      "step": 172330
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.2039458751678467,
      "learning_rate": 8.923540930483262e-05,
      "loss": 2.9269,
      "step": 172331
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7211380004882812,
      "learning_rate": 8.923249833422133e-05,
      "loss": 2.9116,
      "step": 172332
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.3040482997894287,
      "learning_rate": 8.922958740279568e-05,
      "loss": 2.7624,
      "step": 172333
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.6236958503723145,
      "learning_rate": 8.922667651055617e-05,
      "loss": 3.0837,
      "step": 172334
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.2607364654541016,
      "learning_rate": 8.922376565750347e-05,
      "loss": 3.0419,
      "step": 172335
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.932229995727539,
      "learning_rate": 8.922085484363796e-05,
      "loss": 2.8234,
      "step": 172336
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.784808874130249,
      "learning_rate": 8.921794406896037e-05,
      "loss": 3.0917,
      "step": 172337
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.446035146713257,
      "learning_rate": 8.921503333347115e-05,
      "loss": 3.1503,
      "step": 172338
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.062903642654419,
      "learning_rate": 8.921212263717083e-05,
      "loss": 3.1729,
      "step": 172339
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.710153341293335,
      "learning_rate": 8.920921198005985e-05,
      "loss": 2.8767,
      "step": 172340
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.18486213684082,
      "learning_rate": 8.920630136213898e-05,
      "loss": 3.0664,
      "step": 172341
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.470569610595703,
      "learning_rate": 8.920339078340851e-05,
      "loss": 3.1724,
      "step": 172342
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.54768705368042,
      "learning_rate": 8.92004802438692e-05,
      "loss": 3.0742,
      "step": 172343
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.1978108882904053,
      "learning_rate": 8.919756974352153e-05,
      "loss": 2.7,
      "step": 172344
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.55468487739563,
      "learning_rate": 8.919465928236599e-05,
      "loss": 3.0441,
      "step": 172345
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9358432292938232,
      "learning_rate": 8.919174886040306e-05,
      "loss": 2.8975,
      "step": 172346
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4336745738983154,
      "learning_rate": 8.918883847763344e-05,
      "loss": 3.0344,
      "step": 172347
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.044844150543213,
      "learning_rate": 8.918592813405753e-05,
      "loss": 2.9122,
      "step": 172348
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7278387546539307,
      "learning_rate": 8.918301782967601e-05,
      "loss": 2.9236,
      "step": 172349
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4177491664886475,
      "learning_rate": 8.918010756448935e-05,
      "loss": 2.9141,
      "step": 172350
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0153279304504395,
      "learning_rate": 8.917719733849809e-05,
      "loss": 3.1069,
      "step": 172351
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8021087646484375,
      "learning_rate": 8.917428715170264e-05,
      "loss": 3.1577,
      "step": 172352
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.51108717918396,
      "learning_rate": 8.91713770041038e-05,
      "loss": 2.7808,
      "step": 172353
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9969515800476074,
      "learning_rate": 8.916846689570188e-05,
      "loss": 3.1988,
      "step": 172354
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.4400014877319336,
      "learning_rate": 8.916555682649761e-05,
      "loss": 3.1466,
      "step": 172355
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5928566455841064,
      "learning_rate": 8.916264679649138e-05,
      "loss": 2.8862,
      "step": 172356
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.228891849517822,
      "learning_rate": 8.915973680568396e-05,
      "loss": 2.7793,
      "step": 172357
    },
    {
      "epoch": 2.24,
      "grad_norm": 4.619348526000977,
      "learning_rate": 8.915682685407551e-05,
      "loss": 3.0505,
      "step": 172358
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.450819969177246,
      "learning_rate": 8.915391694166691e-05,
      "loss": 3.0285,
      "step": 172359
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.2561404705047607,
      "learning_rate": 8.915100706845849e-05,
      "loss": 2.8589,
      "step": 172360
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.927333116531372,
      "learning_rate": 8.914809723445094e-05,
      "loss": 2.6836,
      "step": 172361
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.2569375038146973,
      "learning_rate": 8.914518743964467e-05,
      "loss": 3.0171,
      "step": 172362
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.042778491973877,
      "learning_rate": 8.914227768404047e-05,
      "loss": 2.8491,
      "step": 172363
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.700411558151245,
      "learning_rate": 8.913936796763853e-05,
      "loss": 3.1164,
      "step": 172364
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6971211433410645,
      "learning_rate": 8.913645829043961e-05,
      "loss": 2.6439,
      "step": 172365
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.642845392227173,
      "learning_rate": 8.913354865244413e-05,
      "loss": 2.8114,
      "step": 172366
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.506042242050171,
      "learning_rate": 8.913063905365283e-05,
      "loss": 2.7713,
      "step": 172367
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.815749406814575,
      "learning_rate": 8.9127729494066e-05,
      "loss": 3.0906,
      "step": 172368
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.607377529144287,
      "learning_rate": 8.912481997368442e-05,
      "loss": 2.9392,
      "step": 172369
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.712488889694214,
      "learning_rate": 8.91219104925085e-05,
      "loss": 2.931,
      "step": 172370
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.579935312271118,
      "learning_rate": 8.911900105053879e-05,
      "loss": 2.8629,
      "step": 172371
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.770092725753784,
      "learning_rate": 8.911609164777575e-05,
      "loss": 2.8313,
      "step": 172372
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.830554485321045,
      "learning_rate": 8.91131822842201e-05,
      "loss": 3.056,
      "step": 172373
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8455095291137695,
      "learning_rate": 8.911027295987221e-05,
      "loss": 2.815,
      "step": 172374
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.1045265197753906,
      "learning_rate": 8.910736367473278e-05,
      "loss": 2.9285,
      "step": 172375
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8073670864105225,
      "learning_rate": 8.910445442880228e-05,
      "loss": 2.857,
      "step": 172376
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.472353219985962,
      "learning_rate": 8.910154522208125e-05,
      "loss": 2.8668,
      "step": 172377
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.591096878051758,
      "learning_rate": 8.909863605457014e-05,
      "loss": 2.996,
      "step": 172378
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.2078425884246826,
      "learning_rate": 8.909572692626965e-05,
      "loss": 2.8,
      "step": 172379
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.130211114883423,
      "learning_rate": 8.909281783718013e-05,
      "loss": 3.0917,
      "step": 172380
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.957714319229126,
      "learning_rate": 8.908990878730238e-05,
      "loss": 2.9762,
      "step": 172381
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.5849945545196533,
      "learning_rate": 8.908699977663676e-05,
      "loss": 3.0585,
      "step": 172382
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.190403461456299,
      "learning_rate": 8.908409080518377e-05,
      "loss": 2.9278,
      "step": 172383
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.361738443374634,
      "learning_rate": 8.908118187294412e-05,
      "loss": 2.7363,
      "step": 172384
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3618431091308594,
      "learning_rate": 8.907827297991827e-05,
      "loss": 2.9759,
      "step": 172385
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.6571245193481445,
      "learning_rate": 8.907536412610661e-05,
      "loss": 3.1214,
      "step": 172386
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3517496585845947,
      "learning_rate": 8.907245531150995e-05,
      "loss": 3.1756,
      "step": 172387
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.78541898727417,
      "learning_rate": 8.906954653612872e-05,
      "loss": 3.0566,
      "step": 172388
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.988253593444824,
      "learning_rate": 8.906663779996332e-05,
      "loss": 2.8215,
      "step": 172389
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.145758628845215,
      "learning_rate": 8.906372910301452e-05,
      "loss": 2.9115,
      "step": 172390
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7124946117401123,
      "learning_rate": 8.906082044528265e-05,
      "loss": 3.1495,
      "step": 172391
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.190408945083618,
      "learning_rate": 8.905791182676848e-05,
      "loss": 2.9381,
      "step": 172392
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0032713413238525,
      "learning_rate": 8.905500324747241e-05,
      "loss": 3.0558,
      "step": 172393
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.533534288406372,
      "learning_rate": 8.905209470739499e-05,
      "loss": 2.9713,
      "step": 172394
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7529327869415283,
      "learning_rate": 8.904918620653666e-05,
      "loss": 2.9154,
      "step": 172395
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.1717238426208496,
      "learning_rate": 8.904627774489817e-05,
      "loss": 3.0125,
      "step": 172396
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.0766637325286865,
      "learning_rate": 8.904336932247986e-05,
      "loss": 2.835,
      "step": 172397
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.7402915954589844,
      "learning_rate": 8.904046093928249e-05,
      "loss": 2.9629,
      "step": 172398
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.8405821323394775,
      "learning_rate": 8.903755259530648e-05,
      "loss": 2.9855,
      "step": 172399
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.154090166091919,
      "learning_rate": 8.903464429055235e-05,
      "loss": 2.9247,
      "step": 172400
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.039724349975586,
      "learning_rate": 8.903173602502059e-05,
      "loss": 2.9283,
      "step": 172401
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.706796884536743,
      "learning_rate": 8.902882779871188e-05,
      "loss": 2.8344,
      "step": 172402
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6473073959350586,
      "learning_rate": 8.902591961162659e-05,
      "loss": 3.0477,
      "step": 172403
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5930962562561035,
      "learning_rate": 8.90230114637655e-05,
      "loss": 2.9615,
      "step": 172404
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.857649087905884,
      "learning_rate": 8.9020103355129e-05,
      "loss": 3.0691,
      "step": 172405
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.9156014919281006,
      "learning_rate": 8.901719528571763e-05,
      "loss": 3.132,
      "step": 172406
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.2569637298583984,
      "learning_rate": 8.901428725553187e-05,
      "loss": 2.9551,
      "step": 172407
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.928313732147217,
      "learning_rate": 8.90113792645724e-05,
      "loss": 2.9768,
      "step": 172408
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.972600221633911,
      "learning_rate": 8.900847131283963e-05,
      "loss": 2.9611,
      "step": 172409
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.494032382965088,
      "learning_rate": 8.900556340033427e-05,
      "loss": 2.9693,
      "step": 172410
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.3665435314178467,
      "learning_rate": 8.900265552705674e-05,
      "loss": 3.0751,
      "step": 172411
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.939711093902588,
      "learning_rate": 8.899974769300761e-05,
      "loss": 3.1815,
      "step": 172412
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.557640314102173,
      "learning_rate": 8.89968398981873e-05,
      "loss": 3.3116,
      "step": 172413
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5195882320404053,
      "learning_rate": 8.899393214259658e-05,
      "loss": 2.9905,
      "step": 172414
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.244213104248047,
      "learning_rate": 8.899102442623576e-05,
      "loss": 3.0526,
      "step": 172415
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.5818777084350586,
      "learning_rate": 8.898811674910559e-05,
      "loss": 2.8784,
      "step": 172416
    },
    {
      "epoch": 2.24,
      "grad_norm": 2.6559255123138428,
      "learning_rate": 8.89852091112065e-05,
      "loss": 2.9047,
      "step": 172417
    },
    {
      "epoch": 2.24,
      "grad_norm": 3.391683340072632,
      "learning_rate": 8.898230151253904e-05,
      "loss": 2.9062,
      "step": 172418
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6439805030822754,
      "learning_rate": 8.897939395310369e-05,
      "loss": 3.1301,
      "step": 172419
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.700265407562256,
      "learning_rate": 8.897648643290112e-05,
      "loss": 2.6611,
      "step": 172420
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.3102033138275146,
      "learning_rate": 8.897357895193173e-05,
      "loss": 2.8967,
      "step": 172421
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.4511048793792725,
      "learning_rate": 8.89706715101962e-05,
      "loss": 3.0351,
      "step": 172422
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5722692012786865,
      "learning_rate": 8.896776410769495e-05,
      "loss": 3.036,
      "step": 172423
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6142024993896484,
      "learning_rate": 8.896485674442875e-05,
      "loss": 2.8054,
      "step": 172424
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3859596252441406,
      "learning_rate": 8.896194942039776e-05,
      "loss": 3.2019,
      "step": 172425
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.048297166824341,
      "learning_rate": 8.895904213560285e-05,
      "loss": 2.8231,
      "step": 172426
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.508014440536499,
      "learning_rate": 8.895613489004433e-05,
      "loss": 3.0189,
      "step": 172427
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.445101261138916,
      "learning_rate": 8.895322768372292e-05,
      "loss": 2.9703,
      "step": 172428
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2601301670074463,
      "learning_rate": 8.895032051663902e-05,
      "loss": 2.7744,
      "step": 172429
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6579108238220215,
      "learning_rate": 8.894741338879343e-05,
      "loss": 2.7194,
      "step": 172430
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4976186752319336,
      "learning_rate": 8.894450630018629e-05,
      "loss": 2.968,
      "step": 172431
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6989622116088867,
      "learning_rate": 8.894159925081846e-05,
      "loss": 3.2127,
      "step": 172432
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5492093563079834,
      "learning_rate": 8.893869224069027e-05,
      "loss": 2.6973,
      "step": 172433
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.49566912651062,
      "learning_rate": 8.893578526980247e-05,
      "loss": 2.87,
      "step": 172434
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.0670387744903564,
      "learning_rate": 8.893287833815542e-05,
      "loss": 2.9457,
      "step": 172435
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1151680946350098,
      "learning_rate": 8.892997144574985e-05,
      "loss": 2.9147,
      "step": 172436
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.404318332672119,
      "learning_rate": 8.892706459258605e-05,
      "loss": 3.0718,
      "step": 172437
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1740541458129883,
      "learning_rate": 8.892415777866479e-05,
      "loss": 3.3523,
      "step": 172438
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.395970106124878,
      "learning_rate": 8.892125100398639e-05,
      "loss": 3.2589,
      "step": 172439
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.82589054107666,
      "learning_rate": 8.89183442685516e-05,
      "loss": 2.8548,
      "step": 172440
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9531478881835938,
      "learning_rate": 8.89154375723608e-05,
      "loss": 3.3203,
      "step": 172441
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.907771348953247,
      "learning_rate": 8.89125309154148e-05,
      "loss": 2.9751,
      "step": 172442
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.511566400527954,
      "learning_rate": 8.890962429771374e-05,
      "loss": 2.864,
      "step": 172443
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9975926876068115,
      "learning_rate": 8.890671771925849e-05,
      "loss": 3.0595,
      "step": 172444
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.985203742980957,
      "learning_rate": 8.890381118004935e-05,
      "loss": 3.1195,
      "step": 172445
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.0625152587890625,
      "learning_rate": 8.890090468008708e-05,
      "loss": 2.8285,
      "step": 172446
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4704623222351074,
      "learning_rate": 8.8897998219372e-05,
      "loss": 2.8666,
      "step": 172447
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.159142255783081,
      "learning_rate": 8.889509179790497e-05,
      "loss": 2.7662,
      "step": 172448
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0988502502441406,
      "learning_rate": 8.889218541568614e-05,
      "loss": 2.8931,
      "step": 172449
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.88337779045105,
      "learning_rate": 8.888927907271632e-05,
      "loss": 3.0581,
      "step": 172450
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3494865894317627,
      "learning_rate": 8.888637276899588e-05,
      "loss": 3.0911,
      "step": 172451
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.52337646484375,
      "learning_rate": 8.888346650452556e-05,
      "loss": 3.0397,
      "step": 172452
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.506700038909912,
      "learning_rate": 8.888056027930567e-05,
      "loss": 2.8899,
      "step": 172453
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.29460072517395,
      "learning_rate": 8.887765409333707e-05,
      "loss": 2.9078,
      "step": 172454
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3552756309509277,
      "learning_rate": 8.88747479466199e-05,
      "loss": 3.0069,
      "step": 172455
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3445420265197754,
      "learning_rate": 8.887184183915498e-05,
      "loss": 3.2004,
      "step": 172456
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.000986337661743,
      "learning_rate": 8.886893577094271e-05,
      "loss": 2.8671,
      "step": 172457
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.843851327896118,
      "learning_rate": 8.886602974198378e-05,
      "loss": 2.9549,
      "step": 172458
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.928067207336426,
      "learning_rate": 8.886312375227855e-05,
      "loss": 2.9233,
      "step": 172459
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0242183208465576,
      "learning_rate": 8.886021780182773e-05,
      "loss": 2.9417,
      "step": 172460
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.593386173248291,
      "learning_rate": 8.885731189063176e-05,
      "loss": 2.5338,
      "step": 172461
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5121238231658936,
      "learning_rate": 8.885440601869126e-05,
      "loss": 3.0181,
      "step": 172462
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4751360416412354,
      "learning_rate": 8.885150018600658e-05,
      "loss": 2.9806,
      "step": 172463
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.6173930168151855,
      "learning_rate": 8.884859439257848e-05,
      "loss": 2.9673,
      "step": 172464
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.987334966659546,
      "learning_rate": 8.884568863840733e-05,
      "loss": 3.0218,
      "step": 172465
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.524901866912842,
      "learning_rate": 8.884278292349385e-05,
      "loss": 2.8687,
      "step": 172466
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.503648281097412,
      "learning_rate": 8.883987724783847e-05,
      "loss": 2.9163,
      "step": 172467
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.489809513092041,
      "learning_rate": 8.883697161144164e-05,
      "loss": 2.8187,
      "step": 172468
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.690086603164673,
      "learning_rate": 8.883406601430411e-05,
      "loss": 3.0365,
      "step": 172469
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3683018684387207,
      "learning_rate": 8.883116045642632e-05,
      "loss": 3.1111,
      "step": 172470
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.373264789581299,
      "learning_rate": 8.882825493780872e-05,
      "loss": 2.8654,
      "step": 172471
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.36733341217041,
      "learning_rate": 8.882534945845201e-05,
      "loss": 3.082,
      "step": 172472
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.643627882003784,
      "learning_rate": 8.882244401835667e-05,
      "loss": 2.8008,
      "step": 172473
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2453370094299316,
      "learning_rate": 8.88195386175231e-05,
      "loss": 2.7689,
      "step": 172474
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.6123228073120117,
      "learning_rate": 8.881663325595208e-05,
      "loss": 2.7795,
      "step": 172475
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.880326986312866,
      "learning_rate": 8.881372793364402e-05,
      "loss": 2.8992,
      "step": 172476
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.1837339401245117,
      "learning_rate": 8.881082265059937e-05,
      "loss": 2.9522,
      "step": 172477
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.379652500152588,
      "learning_rate": 8.88079174068189e-05,
      "loss": 2.8969,
      "step": 172478
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.210036277770996,
      "learning_rate": 8.880501220230301e-05,
      "loss": 3.1136,
      "step": 172479
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.2646303176879883,
      "learning_rate": 8.880210703705218e-05,
      "loss": 2.9144,
      "step": 172480
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8557841777801514,
      "learning_rate": 8.879920191106709e-05,
      "loss": 3.0488,
      "step": 172481
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.700026750564575,
      "learning_rate": 8.879629682434815e-05,
      "loss": 3.044,
      "step": 172482
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9538187980651855,
      "learning_rate": 8.879339177689602e-05,
      "loss": 3.0202,
      "step": 172483
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3079774379730225,
      "learning_rate": 8.879048676871122e-05,
      "loss": 2.9352,
      "step": 172484
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7395408153533936,
      "learning_rate": 8.878758179979423e-05,
      "loss": 2.929,
      "step": 172485
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.277599573135376,
      "learning_rate": 8.878467687014553e-05,
      "loss": 3.0946,
      "step": 172486
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1501030921936035,
      "learning_rate": 8.878177197976585e-05,
      "loss": 2.9973,
      "step": 172487
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5071535110473633,
      "learning_rate": 8.877886712865554e-05,
      "loss": 2.9575,
      "step": 172488
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.2171287536621094,
      "learning_rate": 8.877596231681531e-05,
      "loss": 2.7985,
      "step": 172489
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2734408378601074,
      "learning_rate": 8.877305754424549e-05,
      "loss": 2.8176,
      "step": 172490
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.5419492721557617,
      "learning_rate": 8.877015281094697e-05,
      "loss": 2.8866,
      "step": 172491
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.797459840774536,
      "learning_rate": 8.876724811691986e-05,
      "loss": 3.16,
      "step": 172492
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.2628448009490967,
      "learning_rate": 8.8764343462165e-05,
      "loss": 2.8857,
      "step": 172493
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.2315354347229004,
      "learning_rate": 8.876143884668277e-05,
      "loss": 2.7024,
      "step": 172494
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.274758815765381,
      "learning_rate": 8.875853427047386e-05,
      "loss": 2.9102,
      "step": 172495
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0227818489074707,
      "learning_rate": 8.875562973353863e-05,
      "loss": 2.9312,
      "step": 172496
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7316033840179443,
      "learning_rate": 8.875272523587787e-05,
      "loss": 2.827,
      "step": 172497
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6752469539642334,
      "learning_rate": 8.87498207774918e-05,
      "loss": 3.1363,
      "step": 172498
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.723508596420288,
      "learning_rate": 8.874691635838126e-05,
      "loss": 3.284,
      "step": 172499
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7526378631591797,
      "learning_rate": 8.874401197854652e-05,
      "loss": 2.694,
      "step": 172500
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.582749605178833,
      "learning_rate": 8.874110763798834e-05,
      "loss": 3.0555,
      "step": 172501
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.272068500518799,
      "learning_rate": 8.873820333670711e-05,
      "loss": 2.8373,
      "step": 172502
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.937026262283325,
      "learning_rate": 8.873529907470361e-05,
      "loss": 2.7533,
      "step": 172503
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.738592624664307,
      "learning_rate": 8.873239485197801e-05,
      "loss": 2.8181,
      "step": 172504
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1108875274658203,
      "learning_rate": 8.872949066853114e-05,
      "loss": 2.7809,
      "step": 172505
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.692685127258301,
      "learning_rate": 8.872658652436337e-05,
      "loss": 3.2175,
      "step": 172506
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.1660423278808594,
      "learning_rate": 8.872368241947539e-05,
      "loss": 2.8828,
      "step": 172507
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.181151866912842,
      "learning_rate": 8.872077835386761e-05,
      "loss": 2.8767,
      "step": 172508
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.3734524250030518,
      "learning_rate": 8.871787432754076e-05,
      "loss": 3.0685,
      "step": 172509
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.383043050765991,
      "learning_rate": 8.871497034049509e-05,
      "loss": 2.832,
      "step": 172510
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8468177318573,
      "learning_rate": 8.87120663927314e-05,
      "loss": 2.9115,
      "step": 172511
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5701804161071777,
      "learning_rate": 8.870916248425e-05,
      "loss": 3.1317,
      "step": 172512
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2095022201538086,
      "learning_rate": 8.870625861505166e-05,
      "loss": 2.8319,
      "step": 172513
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.279616594314575,
      "learning_rate": 8.870335478513674e-05,
      "loss": 2.8372,
      "step": 172514
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.1440680027008057,
      "learning_rate": 8.870045099450602e-05,
      "loss": 3.0969,
      "step": 172515
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.154252052307129,
      "learning_rate": 8.869754724315968e-05,
      "loss": 3.0828,
      "step": 172516
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4600157737731934,
      "learning_rate": 8.869464353109858e-05,
      "loss": 2.8119,
      "step": 172517
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.843343734741211,
      "learning_rate": 8.869173985832303e-05,
      "loss": 3.0096,
      "step": 172518
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9004085063934326,
      "learning_rate": 8.868883622483375e-05,
      "loss": 3.0786,
      "step": 172519
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.2872021198272705,
      "learning_rate": 8.868593263063114e-05,
      "loss": 2.9618,
      "step": 172520
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1240880489349365,
      "learning_rate": 8.868302907571597e-05,
      "loss": 3.2357,
      "step": 172521
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.5138487815856934,
      "learning_rate": 8.868012556008841e-05,
      "loss": 2.9504,
      "step": 172522
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2756075859069824,
      "learning_rate": 8.867722208374932e-05,
      "loss": 3.0082,
      "step": 172523
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.344327449798584,
      "learning_rate": 8.867431864669905e-05,
      "loss": 2.8344,
      "step": 172524
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.664742946624756,
      "learning_rate": 8.867141524893825e-05,
      "loss": 2.7108,
      "step": 172525
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.26163387298584,
      "learning_rate": 8.866851189046739e-05,
      "loss": 2.7698,
      "step": 172526
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.798332452774048,
      "learning_rate": 8.866560857128719e-05,
      "loss": 3.0541,
      "step": 172527
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.63692045211792,
      "learning_rate": 8.866270529139789e-05,
      "loss": 2.9812,
      "step": 172528
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.545140504837036,
      "learning_rate": 8.865980205080022e-05,
      "loss": 2.9952,
      "step": 172529
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.25945782661438,
      "learning_rate": 8.865689884949463e-05,
      "loss": 2.7351,
      "step": 172530
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.1018130779266357,
      "learning_rate": 8.865399568748181e-05,
      "loss": 3.0365,
      "step": 172531
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1409661769866943,
      "learning_rate": 8.865109256476208e-05,
      "loss": 2.6293,
      "step": 172532
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1690382957458496,
      "learning_rate": 8.864818948133632e-05,
      "loss": 3.1921,
      "step": 172533
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.220492362976074,
      "learning_rate": 8.864528643720465e-05,
      "loss": 2.9499,
      "step": 172534
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.62587571144104,
      "learning_rate": 8.86423834323679e-05,
      "loss": 2.9363,
      "step": 172535
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.101975679397583,
      "learning_rate": 8.863948046682644e-05,
      "loss": 3.0134,
      "step": 172536
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.96333384513855,
      "learning_rate": 8.863657754058096e-05,
      "loss": 2.8259,
      "step": 172537
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.84136438369751,
      "learning_rate": 8.863367465363183e-05,
      "loss": 2.9293,
      "step": 172538
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9293274879455566,
      "learning_rate": 8.863077180597992e-05,
      "loss": 3.0531,
      "step": 172539
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.565581798553467,
      "learning_rate": 8.862786899762533e-05,
      "loss": 3.024,
      "step": 172540
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0913143157958984,
      "learning_rate": 8.86249662285689e-05,
      "loss": 3.0167,
      "step": 172541
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.396071672439575,
      "learning_rate": 8.862206349881098e-05,
      "loss": 2.8895,
      "step": 172542
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.810025691986084,
      "learning_rate": 8.861916080835232e-05,
      "loss": 2.7811,
      "step": 172543
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7529690265655518,
      "learning_rate": 8.861625815719325e-05,
      "loss": 3.0321,
      "step": 172544
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.421820878982544,
      "learning_rate": 8.861335554533448e-05,
      "loss": 3.056,
      "step": 172545
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6525118350982666,
      "learning_rate": 8.861045297277651e-05,
      "loss": 2.9748,
      "step": 172546
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8308680057525635,
      "learning_rate": 8.860755043951984e-05,
      "loss": 3.037,
      "step": 172547
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.5241267681121826,
      "learning_rate": 8.860464794556489e-05,
      "loss": 2.8339,
      "step": 172548
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3967504501342773,
      "learning_rate": 8.860174549091244e-05,
      "loss": 3.1132,
      "step": 172549
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.678105354309082,
      "learning_rate": 8.859884307556285e-05,
      "loss": 3.0084,
      "step": 172550
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8814570903778076,
      "learning_rate": 8.859594069951678e-05,
      "loss": 2.9941,
      "step": 172551
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.651644468307495,
      "learning_rate": 8.859303836277473e-05,
      "loss": 2.9727,
      "step": 172552
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.1876208782196045,
      "learning_rate": 8.859013606533713e-05,
      "loss": 3.0818,
      "step": 172553
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.2135872840881348,
      "learning_rate": 8.85872338072047e-05,
      "loss": 3.0018,
      "step": 172554
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.809312105178833,
      "learning_rate": 8.858433158837791e-05,
      "loss": 3.0982,
      "step": 172555
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5926971435546875,
      "learning_rate": 8.858142940885717e-05,
      "loss": 2.7115,
      "step": 172556
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.046408176422119,
      "learning_rate": 8.857852726864326e-05,
      "loss": 3.0164,
      "step": 172557
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.664877414703369,
      "learning_rate": 8.857562516773657e-05,
      "loss": 3.1276,
      "step": 172558
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4532008171081543,
      "learning_rate": 8.857272310613758e-05,
      "loss": 2.9917,
      "step": 172559
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.285888671875,
      "learning_rate": 8.8569821083847e-05,
      "loss": 2.99,
      "step": 172560
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8479909896850586,
      "learning_rate": 8.856691910086526e-05,
      "loss": 2.8504,
      "step": 172561
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.583390712738037,
      "learning_rate": 8.856401715719286e-05,
      "loss": 2.9845,
      "step": 172562
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.832160472869873,
      "learning_rate": 8.856111525283048e-05,
      "loss": 2.9291,
      "step": 172563
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.592872142791748,
      "learning_rate": 8.855821338777856e-05,
      "loss": 2.9079,
      "step": 172564
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.79642915725708,
      "learning_rate": 8.85553115620376e-05,
      "loss": 2.7444,
      "step": 172565
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.041201114654541,
      "learning_rate": 8.855240977560826e-05,
      "loss": 3.0467,
      "step": 172566
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2310376167297363,
      "learning_rate": 8.854950802849096e-05,
      "loss": 3.1273,
      "step": 172567
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.124722957611084,
      "learning_rate": 8.854660632068637e-05,
      "loss": 2.8101,
      "step": 172568
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4115469455718994,
      "learning_rate": 8.854370465219495e-05,
      "loss": 3.05,
      "step": 172569
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.904057741165161,
      "learning_rate": 8.854080302301728e-05,
      "loss": 2.8649,
      "step": 172570
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2394585609436035,
      "learning_rate": 8.853790143315373e-05,
      "loss": 2.8854,
      "step": 172571
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1385281085968018,
      "learning_rate": 8.85349998826051e-05,
      "loss": 2.7181,
      "step": 172572
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.257845640182495,
      "learning_rate": 8.85320983713717e-05,
      "loss": 2.9956,
      "step": 172573
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.244478225708008,
      "learning_rate": 8.852919689945427e-05,
      "loss": 2.8571,
      "step": 172574
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3703019618988037,
      "learning_rate": 8.852629546685325e-05,
      "loss": 2.9898,
      "step": 172575
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.3060169219970703,
      "learning_rate": 8.852339407356919e-05,
      "loss": 2.9308,
      "step": 172576
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7470874786376953,
      "learning_rate": 8.852049271960252e-05,
      "loss": 3.1014,
      "step": 172577
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2530643939971924,
      "learning_rate": 8.851759140495399e-05,
      "loss": 2.8802,
      "step": 172578
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.4574313163757324,
      "learning_rate": 8.851469012962394e-05,
      "loss": 2.9711,
      "step": 172579
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3302626609802246,
      "learning_rate": 8.851178889361308e-05,
      "loss": 2.9202,
      "step": 172580
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.2381927967071533,
      "learning_rate": 8.850888769692179e-05,
      "loss": 2.9418,
      "step": 172581
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3573853969573975,
      "learning_rate": 8.850598653955085e-05,
      "loss": 2.9357,
      "step": 172582
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.973828077316284,
      "learning_rate": 8.85030854215005e-05,
      "loss": 2.6705,
      "step": 172583
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9114327430725098,
      "learning_rate": 8.850018434277146e-05,
      "loss": 3.2719,
      "step": 172584
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.5805482864379883,
      "learning_rate": 8.849728330336416e-05,
      "loss": 2.7845,
      "step": 172585
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.301698684692383,
      "learning_rate": 8.84943823032793e-05,
      "loss": 2.9032,
      "step": 172586
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.50907039642334,
      "learning_rate": 8.849148134251723e-05,
      "loss": 2.8517,
      "step": 172587
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.948465585708618,
      "learning_rate": 8.848858042107879e-05,
      "loss": 2.9309,
      "step": 172588
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.6344878673553467,
      "learning_rate": 8.848567953896411e-05,
      "loss": 2.8628,
      "step": 172589
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8721508979797363,
      "learning_rate": 8.848277869617403e-05,
      "loss": 3.0443,
      "step": 172590
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.402623176574707,
      "learning_rate": 8.847987789270889e-05,
      "loss": 2.987,
      "step": 172591
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4387528896331787,
      "learning_rate": 8.847697712856947e-05,
      "loss": 2.6595,
      "step": 172592
    },
    {
      "epoch": 2.25,
      "grad_norm": 5.478165149688721,
      "learning_rate": 8.847407640375602e-05,
      "loss": 2.9045,
      "step": 172593
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.376382827758789,
      "learning_rate": 8.847117571826948e-05,
      "loss": 3.0989,
      "step": 172594
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6377601623535156,
      "learning_rate": 8.846827507210991e-05,
      "loss": 3.0005,
      "step": 172595
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.554365634918213,
      "learning_rate": 8.846537446527819e-05,
      "loss": 2.9024,
      "step": 172596
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6078078746795654,
      "learning_rate": 8.846247389777464e-05,
      "loss": 2.7925,
      "step": 172597
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.33449649810791,
      "learning_rate": 8.845957336960004e-05,
      "loss": 2.9419,
      "step": 172598
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.2108314037323,
      "learning_rate": 8.845667288075466e-05,
      "loss": 2.638,
      "step": 172599
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.572662115097046,
      "learning_rate": 8.84537724312394e-05,
      "loss": 3.1011,
      "step": 172600
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0141491889953613,
      "learning_rate": 8.845087202105437e-05,
      "loss": 3.1537,
      "step": 172601
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6071088314056396,
      "learning_rate": 8.844797165020042e-05,
      "loss": 2.7246,
      "step": 172602
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9429714679718018,
      "learning_rate": 8.844507131867788e-05,
      "loss": 2.9545,
      "step": 172603
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.763322353363037,
      "learning_rate": 8.844217102648749e-05,
      "loss": 2.9525,
      "step": 172604
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4446709156036377,
      "learning_rate": 8.843927077362962e-05,
      "loss": 3.0316,
      "step": 172605
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.217181444168091,
      "learning_rate": 8.843637056010505e-05,
      "loss": 2.7959,
      "step": 172606
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3632495403289795,
      "learning_rate": 8.843347038591399e-05,
      "loss": 2.8435,
      "step": 172607
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3503265380859375,
      "learning_rate": 8.843057025105722e-05,
      "loss": 3.1191,
      "step": 172608
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.73594069480896,
      "learning_rate": 8.84276701555351e-05,
      "loss": 2.9318,
      "step": 172609
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.015300750732422,
      "learning_rate": 8.842477009934836e-05,
      "loss": 2.9138,
      "step": 172610
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0534486770629883,
      "learning_rate": 8.842187008249737e-05,
      "loss": 2.8446,
      "step": 172611
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.79585862159729,
      "learning_rate": 8.841897010498291e-05,
      "loss": 2.7461,
      "step": 172612
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7852399349212646,
      "learning_rate": 8.841607016680519e-05,
      "loss": 2.9481,
      "step": 172613
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9390439987182617,
      "learning_rate": 8.841317026796503e-05,
      "loss": 3.0067,
      "step": 172614
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.358444929122925,
      "learning_rate": 8.841027040846274e-05,
      "loss": 3.0584,
      "step": 172615
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3563220500946045,
      "learning_rate": 8.840737058829904e-05,
      "loss": 3.2,
      "step": 172616
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.941521406173706,
      "learning_rate": 8.840447080747435e-05,
      "loss": 3.1171,
      "step": 172617
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.417456865310669,
      "learning_rate": 8.840157106598946e-05,
      "loss": 2.9603,
      "step": 172618
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4811699390411377,
      "learning_rate": 8.839867136384448e-05,
      "loss": 2.9746,
      "step": 172619
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7636160850524902,
      "learning_rate": 8.839577170104032e-05,
      "loss": 2.9893,
      "step": 172620
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.421696424484253,
      "learning_rate": 8.839287207757724e-05,
      "loss": 2.9643,
      "step": 172621
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.2267632484436035,
      "learning_rate": 8.838997249345604e-05,
      "loss": 3.0866,
      "step": 172622
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2613070011138916,
      "learning_rate": 8.838707294867707e-05,
      "loss": 2.7994,
      "step": 172623
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6136133670806885,
      "learning_rate": 8.838417344324098e-05,
      "loss": 2.7872,
      "step": 172624
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.835123062133789,
      "learning_rate": 8.83812739771483e-05,
      "loss": 2.9698,
      "step": 172625
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.501523971557617,
      "learning_rate": 8.837837455039954e-05,
      "loss": 2.8107,
      "step": 172626
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0636844635009766,
      "learning_rate": 8.837547516299514e-05,
      "loss": 2.871,
      "step": 172627
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.644533157348633,
      "learning_rate": 8.83725758149358e-05,
      "loss": 3.0936,
      "step": 172628
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.513441562652588,
      "learning_rate": 8.836967650622193e-05,
      "loss": 2.9607,
      "step": 172629
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3843417167663574,
      "learning_rate": 8.83667772368542e-05,
      "loss": 3.0437,
      "step": 172630
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2461793422698975,
      "learning_rate": 8.836387800683307e-05,
      "loss": 2.9359,
      "step": 172631
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.346254348754883,
      "learning_rate": 8.836097881615915e-05,
      "loss": 3.0017,
      "step": 172632
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7205753326416016,
      "learning_rate": 8.835807966483276e-05,
      "loss": 2.9055,
      "step": 172633
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.087414503097534,
      "learning_rate": 8.835518055285472e-05,
      "loss": 2.9647,
      "step": 172634
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.8670766353607178,
      "learning_rate": 8.835228148022536e-05,
      "loss": 2.8432,
      "step": 172635
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.799786329269409,
      "learning_rate": 8.83493824469454e-05,
      "loss": 2.8937,
      "step": 172636
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.531139612197876,
      "learning_rate": 8.834648345301528e-05,
      "loss": 2.9417,
      "step": 172637
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3734219074249268,
      "learning_rate": 8.834358449843552e-05,
      "loss": 3.0063,
      "step": 172638
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.560161590576172,
      "learning_rate": 8.834068558320663e-05,
      "loss": 2.898,
      "step": 172639
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.856471300125122,
      "learning_rate": 8.833778670732928e-05,
      "loss": 3.0556,
      "step": 172640
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.886556625366211,
      "learning_rate": 8.833488787080383e-05,
      "loss": 2.9264,
      "step": 172641
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8624825477600098,
      "learning_rate": 8.833198907363103e-05,
      "loss": 2.8306,
      "step": 172642
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.356177806854248,
      "learning_rate": 8.832909031581128e-05,
      "loss": 3.0455,
      "step": 172643
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2012298107147217,
      "learning_rate": 8.832619159734505e-05,
      "loss": 3.0383,
      "step": 172644
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9015891551971436,
      "learning_rate": 8.832329291823311e-05,
      "loss": 3.0556,
      "step": 172645
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.952188730239868,
      "learning_rate": 8.832039427847586e-05,
      "loss": 3.0629,
      "step": 172646
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7668893337249756,
      "learning_rate": 8.831749567807373e-05,
      "loss": 2.9625,
      "step": 172647
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.491868019104004,
      "learning_rate": 8.831459711702748e-05,
      "loss": 2.8746,
      "step": 172648
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2061545848846436,
      "learning_rate": 8.83116985953375e-05,
      "loss": 2.9076,
      "step": 172649
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.377478837966919,
      "learning_rate": 8.830880011300433e-05,
      "loss": 2.9207,
      "step": 172650
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6661884784698486,
      "learning_rate": 8.830590167002863e-05,
      "loss": 2.9016,
      "step": 172651
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.65865159034729,
      "learning_rate": 8.830300326641075e-05,
      "loss": 2.6101,
      "step": 172652
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6338937282562256,
      "learning_rate": 8.830010490215146e-05,
      "loss": 3.0713,
      "step": 172653
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4356021881103516,
      "learning_rate": 8.829720657725114e-05,
      "loss": 2.8799,
      "step": 172654
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.438204526901245,
      "learning_rate": 8.829430829171042e-05,
      "loss": 2.961,
      "step": 172655
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.81886887550354,
      "learning_rate": 8.829141004552963e-05,
      "loss": 2.7061,
      "step": 172656
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.508430004119873,
      "learning_rate": 8.828851183870959e-05,
      "loss": 3.1713,
      "step": 172657
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.205517053604126,
      "learning_rate": 8.828561367125061e-05,
      "loss": 3.0319,
      "step": 172658
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0365335941314697,
      "learning_rate": 8.828271554315344e-05,
      "loss": 3.1031,
      "step": 172659
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9341890811920166,
      "learning_rate": 8.827981745441847e-05,
      "loss": 2.7094,
      "step": 172660
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.017965793609619,
      "learning_rate": 8.827691940504634e-05,
      "loss": 3.0522,
      "step": 172661
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8833889961242676,
      "learning_rate": 8.82740213950374e-05,
      "loss": 2.714,
      "step": 172662
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.441176176071167,
      "learning_rate": 8.82711234243924e-05,
      "loss": 3.2243,
      "step": 172663
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.363476514816284,
      "learning_rate": 8.82682254931117e-05,
      "loss": 2.7315,
      "step": 172664
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6128647327423096,
      "learning_rate": 8.826532760119603e-05,
      "loss": 3.167,
      "step": 172665
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7521259784698486,
      "learning_rate": 8.826242974864574e-05,
      "loss": 2.9441,
      "step": 172666
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3746585845947266,
      "learning_rate": 8.825953193546167e-05,
      "loss": 3.0905,
      "step": 172667
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9177730083465576,
      "learning_rate": 8.825663416164394e-05,
      "loss": 3.1844,
      "step": 172668
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5272064208984375,
      "learning_rate": 8.82537364271934e-05,
      "loss": 2.8457,
      "step": 172669
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6082913875579834,
      "learning_rate": 8.825083873211039e-05,
      "loss": 2.864,
      "step": 172670
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.276456117630005,
      "learning_rate": 8.824794107639565e-05,
      "loss": 3.2283,
      "step": 172671
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5685980319976807,
      "learning_rate": 8.824504346004953e-05,
      "loss": 2.8184,
      "step": 172672
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.755187511444092,
      "learning_rate": 8.82421458830728e-05,
      "loss": 3.2073,
      "step": 172673
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9280948638916016,
      "learning_rate": 8.823924834546569e-05,
      "loss": 2.6923,
      "step": 172674
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7821810245513916,
      "learning_rate": 8.823635084722901e-05,
      "loss": 2.8096,
      "step": 172675
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.545305013656616,
      "learning_rate": 8.823345338836307e-05,
      "loss": 3.3058,
      "step": 172676
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7262022495269775,
      "learning_rate": 8.823055596886864e-05,
      "loss": 2.9984,
      "step": 172677
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.633777379989624,
      "learning_rate": 8.822765858874602e-05,
      "loss": 3.1422,
      "step": 172678
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.941046953201294,
      "learning_rate": 8.82247612479961e-05,
      "loss": 2.9256,
      "step": 172679
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.63232159614563,
      "learning_rate": 8.822186394661899e-05,
      "loss": 2.9963,
      "step": 172680
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6653242111206055,
      "learning_rate": 8.821896668461555e-05,
      "loss": 2.7229,
      "step": 172681
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.894786834716797,
      "learning_rate": 8.821606946198607e-05,
      "loss": 2.9578,
      "step": 172682
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.4943084716796875,
      "learning_rate": 8.821317227873134e-05,
      "loss": 2.8002,
      "step": 172683
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.605851411819458,
      "learning_rate": 8.821027513485171e-05,
      "loss": 2.9007,
      "step": 172684
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.019484281539917,
      "learning_rate": 8.820737803034791e-05,
      "loss": 2.7348,
      "step": 172685
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.5523531436920166,
      "learning_rate": 8.820448096522022e-05,
      "loss": 2.6453,
      "step": 172686
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8795204162597656,
      "learning_rate": 8.820158393946939e-05,
      "loss": 2.8531,
      "step": 172687
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.926206111907959,
      "learning_rate": 8.819868695309579e-05,
      "loss": 3.0357,
      "step": 172688
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.952604055404663,
      "learning_rate": 8.819579000610015e-05,
      "loss": 2.8429,
      "step": 172689
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.540875196456909,
      "learning_rate": 8.819289309848282e-05,
      "loss": 2.9138,
      "step": 172690
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5596678256988525,
      "learning_rate": 8.818999623024453e-05,
      "loss": 3.0944,
      "step": 172691
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.468327283859253,
      "learning_rate": 8.818709940138571e-05,
      "loss": 2.9132,
      "step": 172692
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.89156436920166,
      "learning_rate": 8.818420261190689e-05,
      "loss": 3.256,
      "step": 172693
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.936960220336914,
      "learning_rate": 8.818130586180852e-05,
      "loss": 2.8901,
      "step": 172694
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.370870351791382,
      "learning_rate": 8.817840915109137e-05,
      "loss": 2.8612,
      "step": 172695
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.793360948562622,
      "learning_rate": 8.817551247975578e-05,
      "loss": 2.597,
      "step": 172696
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5079424381256104,
      "learning_rate": 8.817261584780241e-05,
      "loss": 2.7745,
      "step": 172697
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1560750007629395,
      "learning_rate": 8.816971925523178e-05,
      "loss": 3.0548,
      "step": 172698
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6851282119750977,
      "learning_rate": 8.816682270204437e-05,
      "loss": 3.0468,
      "step": 172699
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3574178218841553,
      "learning_rate": 8.816392618824066e-05,
      "loss": 3.1428,
      "step": 172700
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4088683128356934,
      "learning_rate": 8.816102971382138e-05,
      "loss": 3.1977,
      "step": 172701
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.596667528152466,
      "learning_rate": 8.815813327878683e-05,
      "loss": 3.2523,
      "step": 172702
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9309909343719482,
      "learning_rate": 8.815523688313782e-05,
      "loss": 2.9976,
      "step": 172703
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8492825031280518,
      "learning_rate": 8.815234052687476e-05,
      "loss": 2.9454,
      "step": 172704
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7469558715820312,
      "learning_rate": 8.814944420999816e-05,
      "loss": 3.0052,
      "step": 172705
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.248617649078369,
      "learning_rate": 8.814654793250848e-05,
      "loss": 2.9795,
      "step": 172706
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6254632472991943,
      "learning_rate": 8.814365169440643e-05,
      "loss": 2.785,
      "step": 172707
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9033827781677246,
      "learning_rate": 8.814075549569242e-05,
      "loss": 3.2014,
      "step": 172708
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4532840251922607,
      "learning_rate": 8.813785933636709e-05,
      "loss": 3.0935,
      "step": 172709
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.389162063598633,
      "learning_rate": 8.8134963216431e-05,
      "loss": 2.8333,
      "step": 172710
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.3216357231140137,
      "learning_rate": 8.813206713588454e-05,
      "loss": 3.1335,
      "step": 172711
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5191471576690674,
      "learning_rate": 8.812917109472829e-05,
      "loss": 2.7029,
      "step": 172712
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2437069416046143,
      "learning_rate": 8.81262750929629e-05,
      "loss": 3.1,
      "step": 172713
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.851351022720337,
      "learning_rate": 8.812337913058875e-05,
      "loss": 2.8704,
      "step": 172714
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6465187072753906,
      "learning_rate": 8.812048320760654e-05,
      "loss": 2.832,
      "step": 172715
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.815216064453125,
      "learning_rate": 8.811758732401674e-05,
      "loss": 2.826,
      "step": 172716
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0942986011505127,
      "learning_rate": 8.811469147981987e-05,
      "loss": 2.9116,
      "step": 172717
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1724517345428467,
      "learning_rate": 8.811179567501643e-05,
      "loss": 3.2564,
      "step": 172718
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.36818790435791,
      "learning_rate": 8.810889990960707e-05,
      "loss": 2.7986,
      "step": 172719
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8114688396453857,
      "learning_rate": 8.810600418359217e-05,
      "loss": 2.7922,
      "step": 172720
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5264925956726074,
      "learning_rate": 8.810310849697244e-05,
      "loss": 2.7728,
      "step": 172721
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8142340183258057,
      "learning_rate": 8.810021284974834e-05,
      "loss": 2.9096,
      "step": 172722
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.34794282913208,
      "learning_rate": 8.809731724192046e-05,
      "loss": 3.2789,
      "step": 172723
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.682948112487793,
      "learning_rate": 8.809442167348914e-05,
      "loss": 3.0852,
      "step": 172724
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3408470153808594,
      "learning_rate": 8.809152614445519e-05,
      "loss": 2.849,
      "step": 172725
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4987974166870117,
      "learning_rate": 8.808863065481894e-05,
      "loss": 3.0265,
      "step": 172726
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.446613311767578,
      "learning_rate": 8.80857352045811e-05,
      "loss": 3.011,
      "step": 172727
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8046765327453613,
      "learning_rate": 8.808283979374212e-05,
      "loss": 2.9047,
      "step": 172728
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.215219020843506,
      "learning_rate": 8.807994442230244e-05,
      "loss": 3.1587,
      "step": 172729
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.112537145614624,
      "learning_rate": 8.807704909026278e-05,
      "loss": 3.0029,
      "step": 172730
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.543285608291626,
      "learning_rate": 8.807415379762361e-05,
      "loss": 2.7403,
      "step": 172731
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.478679895401001,
      "learning_rate": 8.807125854438535e-05,
      "loss": 2.9183,
      "step": 172732
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.5196526050567627,
      "learning_rate": 8.806836333054875e-05,
      "loss": 2.9989,
      "step": 172733
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.1359517574310303,
      "learning_rate": 8.806546815611426e-05,
      "loss": 2.9458,
      "step": 172734
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.6076061725616455,
      "learning_rate": 8.806257302108228e-05,
      "loss": 3.1044,
      "step": 172735
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9407260417938232,
      "learning_rate": 8.805967792545359e-05,
      "loss": 2.9497,
      "step": 172736
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6546521186828613,
      "learning_rate": 8.805678286922858e-05,
      "loss": 2.918,
      "step": 172737
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.817518472671509,
      "learning_rate": 8.805388785240773e-05,
      "loss": 3.0491,
      "step": 172738
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3188772201538086,
      "learning_rate": 8.805099287499175e-05,
      "loss": 2.7165,
      "step": 172739
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0626838207244873,
      "learning_rate": 8.80480979369811e-05,
      "loss": 2.8838,
      "step": 172740
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3471760749816895,
      "learning_rate": 8.80452030383762e-05,
      "loss": 3.0653,
      "step": 172741
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.534186601638794,
      "learning_rate": 8.804230817917781e-05,
      "loss": 2.8371,
      "step": 172742
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.67441463470459,
      "learning_rate": 8.803941335938627e-05,
      "loss": 2.8261,
      "step": 172743
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0227296352386475,
      "learning_rate": 8.803651857900227e-05,
      "loss": 2.9372,
      "step": 172744
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6788575649261475,
      "learning_rate": 8.803362383802628e-05,
      "loss": 2.7638,
      "step": 172745
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.205983877182007,
      "learning_rate": 8.803072913645886e-05,
      "loss": 2.9222,
      "step": 172746
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5707716941833496,
      "learning_rate": 8.802783447430043e-05,
      "loss": 3.1289,
      "step": 172747
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9655275344848633,
      "learning_rate": 8.802493985155174e-05,
      "loss": 3.0783,
      "step": 172748
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.140590190887451,
      "learning_rate": 8.802204526821308e-05,
      "loss": 2.8127,
      "step": 172749
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.266585350036621,
      "learning_rate": 8.801915072428527e-05,
      "loss": 2.9761,
      "step": 172750
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7821381092071533,
      "learning_rate": 8.801625621976857e-05,
      "loss": 2.9996,
      "step": 172751
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.761031150817871,
      "learning_rate": 8.801336175466383e-05,
      "loss": 2.9208,
      "step": 172752
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8709216117858887,
      "learning_rate": 8.801046732897125e-05,
      "loss": 3.0784,
      "step": 172753
    },
    {
      "epoch": 2.25,
      "grad_norm": 5.440469264984131,
      "learning_rate": 8.80075729426916e-05,
      "loss": 2.8098,
      "step": 172754
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.247089147567749,
      "learning_rate": 8.800467859582525e-05,
      "loss": 2.8968,
      "step": 172755
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4425158500671387,
      "learning_rate": 8.800178428837295e-05,
      "loss": 2.7447,
      "step": 172756
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.265204906463623,
      "learning_rate": 8.799889002033497e-05,
      "loss": 2.955,
      "step": 172757
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6149816513061523,
      "learning_rate": 8.799599579171215e-05,
      "loss": 2.9642,
      "step": 172758
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8089165687561035,
      "learning_rate": 8.799310160250486e-05,
      "loss": 2.9961,
      "step": 172759
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.043992519378662,
      "learning_rate": 8.799020745271363e-05,
      "loss": 2.9275,
      "step": 172760
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.899138927459717,
      "learning_rate": 8.798731334233896e-05,
      "loss": 2.7684,
      "step": 172761
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.5949602127075195,
      "learning_rate": 8.798441927138153e-05,
      "loss": 2.8342,
      "step": 172762
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6388790607452393,
      "learning_rate": 8.798152523984172e-05,
      "loss": 3.0197,
      "step": 172763
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1098737716674805,
      "learning_rate": 8.797863124772021e-05,
      "loss": 2.9079,
      "step": 172764
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.60500431060791,
      "learning_rate": 8.79757372950175e-05,
      "loss": 2.7697,
      "step": 172765
    },
    {
      "epoch": 2.25,
      "grad_norm": 6.926069736480713,
      "learning_rate": 8.79728433817341e-05,
      "loss": 2.8335,
      "step": 172766
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.448319911956787,
      "learning_rate": 8.796994950787045e-05,
      "loss": 3.0053,
      "step": 172767
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0065410137176514,
      "learning_rate": 8.796705567342727e-05,
      "loss": 2.9939,
      "step": 172768
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.365595817565918,
      "learning_rate": 8.796416187840494e-05,
      "loss": 2.8159,
      "step": 172769
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.041940689086914,
      "learning_rate": 8.796126812280416e-05,
      "loss": 2.9625,
      "step": 172770
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.318028450012207,
      "learning_rate": 8.795837440662539e-05,
      "loss": 2.7617,
      "step": 172771
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.145943641662598,
      "learning_rate": 8.795548072986918e-05,
      "loss": 2.9409,
      "step": 172772
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7187693119049072,
      "learning_rate": 8.795258709253594e-05,
      "loss": 2.8795,
      "step": 172773
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.361015796661377,
      "learning_rate": 8.794969349462638e-05,
      "loss": 2.958,
      "step": 172774
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.9210355281829834,
      "learning_rate": 8.79467999361409e-05,
      "loss": 2.8612,
      "step": 172775
    },
    {
      "epoch": 2.25,
      "grad_norm": 6.034237384796143,
      "learning_rate": 8.794390641708021e-05,
      "loss": 2.9207,
      "step": 172776
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.650722026824951,
      "learning_rate": 8.794101293744475e-05,
      "loss": 2.8879,
      "step": 172777
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.821991443634033,
      "learning_rate": 8.793811949723509e-05,
      "loss": 3.0245,
      "step": 172778
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0402565002441406,
      "learning_rate": 8.79352260964516e-05,
      "loss": 3.0557,
      "step": 172779
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.745112657546997,
      "learning_rate": 8.793233273509506e-05,
      "loss": 3.0606,
      "step": 172780
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8046884536743164,
      "learning_rate": 8.792943941316581e-05,
      "loss": 2.701,
      "step": 172781
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.4577560424804688,
      "learning_rate": 8.79265461306646e-05,
      "loss": 3.1594,
      "step": 172782
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.508129596710205,
      "learning_rate": 8.792365288759181e-05,
      "loss": 3.1518,
      "step": 172783
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.174463987350464,
      "learning_rate": 8.792075968394802e-05,
      "loss": 3.0573,
      "step": 172784
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.45150089263916,
      "learning_rate": 8.791786651973368e-05,
      "loss": 3.2374,
      "step": 172785
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.964714288711548,
      "learning_rate": 8.791497339494951e-05,
      "loss": 2.9874,
      "step": 172786
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.4590330123901367,
      "learning_rate": 8.791208030959585e-05,
      "loss": 2.9088,
      "step": 172787
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4984991550445557,
      "learning_rate": 8.790918726367343e-05,
      "loss": 3.1474,
      "step": 172788
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5903003215789795,
      "learning_rate": 8.79062942571827e-05,
      "loss": 3.1494,
      "step": 172789
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.244882822036743,
      "learning_rate": 8.790340129012419e-05,
      "loss": 2.8884,
      "step": 172790
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.054790735244751,
      "learning_rate": 8.790050836249837e-05,
      "loss": 3.0364,
      "step": 172791
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.025993347167969,
      "learning_rate": 8.789761547430591e-05,
      "loss": 2.6707,
      "step": 172792
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.984177589416504,
      "learning_rate": 8.789472262554719e-05,
      "loss": 2.988,
      "step": 172793
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8754100799560547,
      "learning_rate": 8.789182981622298e-05,
      "loss": 2.9394,
      "step": 172794
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9451992511749268,
      "learning_rate": 8.788893704633366e-05,
      "loss": 2.9261,
      "step": 172795
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.141195058822632,
      "learning_rate": 8.788604431587977e-05,
      "loss": 2.894,
      "step": 172796
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7832298278808594,
      "learning_rate": 8.78831516248618e-05,
      "loss": 3.0033,
      "step": 172797
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0632834434509277,
      "learning_rate": 8.788025897328046e-05,
      "loss": 3.0161,
      "step": 172798
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0169291496276855,
      "learning_rate": 8.787736636113605e-05,
      "loss": 3.035,
      "step": 172799
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8799843788146973,
      "learning_rate": 8.787447378842935e-05,
      "loss": 2.7962,
      "step": 172800
    },
    {
      "epoch": 2.25,
      "grad_norm": 5.4677042961120605,
      "learning_rate": 8.787158125516081e-05,
      "loss": 2.825,
      "step": 172801
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.060714244842529,
      "learning_rate": 8.786868876133093e-05,
      "loss": 3.0545,
      "step": 172802
    },
    {
      "epoch": 2.25,
      "grad_norm": 5.736504077911377,
      "learning_rate": 8.78657963069402e-05,
      "loss": 2.8065,
      "step": 172803
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.586252212524414,
      "learning_rate": 8.786290389198927e-05,
      "loss": 3.1176,
      "step": 172804
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.3281922340393066,
      "learning_rate": 8.786001151647858e-05,
      "loss": 2.7802,
      "step": 172805
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.326785087585449,
      "learning_rate": 8.785711918040877e-05,
      "loss": 2.927,
      "step": 172806
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6099135875701904,
      "learning_rate": 8.785422688378036e-05,
      "loss": 2.8376,
      "step": 172807
    },
    {
      "epoch": 2.25,
      "grad_norm": 5.617882251739502,
      "learning_rate": 8.785133462659385e-05,
      "loss": 2.9508,
      "step": 172808
    },
    {
      "epoch": 2.25,
      "grad_norm": 5.989716053009033,
      "learning_rate": 8.784844240884966e-05,
      "loss": 3.2087,
      "step": 172809
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.9892373085021973,
      "learning_rate": 8.784555023054857e-05,
      "loss": 3.1209,
      "step": 172810
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0776686668395996,
      "learning_rate": 8.78426580916909e-05,
      "loss": 2.9599,
      "step": 172811
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5729477405548096,
      "learning_rate": 8.783976599227738e-05,
      "loss": 2.903,
      "step": 172812
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7027156352996826,
      "learning_rate": 8.783687393230844e-05,
      "loss": 3.08,
      "step": 172813
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.662449836730957,
      "learning_rate": 8.783398191178454e-05,
      "loss": 3.0577,
      "step": 172814
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5567729473114014,
      "learning_rate": 8.78310899307064e-05,
      "loss": 3.1029,
      "step": 172815
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.085690975189209,
      "learning_rate": 8.782819798907446e-05,
      "loss": 2.9172,
      "step": 172816
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7543208599090576,
      "learning_rate": 8.782530608688919e-05,
      "loss": 2.8728,
      "step": 172817
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.1883788108825684,
      "learning_rate": 8.782241422415128e-05,
      "loss": 3.033,
      "step": 172818
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.268343448638916,
      "learning_rate": 8.78195224008611e-05,
      "loss": 2.8981,
      "step": 172819
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8354525566101074,
      "learning_rate": 8.781663061701935e-05,
      "loss": 2.8689,
      "step": 172820
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.519420623779297,
      "learning_rate": 8.781373887262652e-05,
      "loss": 3.0209,
      "step": 172821
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1382689476013184,
      "learning_rate": 8.781084716768312e-05,
      "loss": 2.6827,
      "step": 172822
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0292043685913086,
      "learning_rate": 8.78079555021896e-05,
      "loss": 3.1153,
      "step": 172823
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6743476390838623,
      "learning_rate": 8.780506387614666e-05,
      "loss": 3.0198,
      "step": 172824
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6840431690216064,
      "learning_rate": 8.780217228955469e-05,
      "loss": 2.9223,
      "step": 172825
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9587748050689697,
      "learning_rate": 8.779928074241436e-05,
      "loss": 3.0422,
      "step": 172826
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.367800712585449,
      "learning_rate": 8.779638923472617e-05,
      "loss": 3.054,
      "step": 172827
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9198927879333496,
      "learning_rate": 8.779349776649056e-05,
      "loss": 2.9982,
      "step": 172828
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.9026405811309814,
      "learning_rate": 8.779060633770823e-05,
      "loss": 2.9095,
      "step": 172829
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.262132406234741,
      "learning_rate": 8.778771494837962e-05,
      "loss": 3.0251,
      "step": 172830
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.311310291290283,
      "learning_rate": 8.778482359850518e-05,
      "loss": 2.9123,
      "step": 172831
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.902991533279419,
      "learning_rate": 8.778193228808566e-05,
      "loss": 3.0342,
      "step": 172832
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.785553455352783,
      "learning_rate": 8.777904101712148e-05,
      "loss": 2.8023,
      "step": 172833
    },
    {
      "epoch": 2.25,
      "grad_norm": 5.215487957000732,
      "learning_rate": 8.777614978561307e-05,
      "loss": 2.9127,
      "step": 172834
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.712679147720337,
      "learning_rate": 8.77732585935612e-05,
      "loss": 3.0747,
      "step": 172835
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.310328006744385,
      "learning_rate": 8.777036744096617e-05,
      "loss": 2.9636,
      "step": 172836
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.326930046081543,
      "learning_rate": 8.776747632782874e-05,
      "loss": 3.0228,
      "step": 172837
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.266209363937378,
      "learning_rate": 8.776458525414935e-05,
      "loss": 2.7942,
      "step": 172838
    },
    {
      "epoch": 2.25,
      "grad_norm": 5.16087007522583,
      "learning_rate": 8.776169421992853e-05,
      "loss": 2.9309,
      "step": 172839
    },
    {
      "epoch": 2.25,
      "grad_norm": 6.299616813659668,
      "learning_rate": 8.775880322516671e-05,
      "loss": 2.9327,
      "step": 172840
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.782161235809326,
      "learning_rate": 8.775591226986463e-05,
      "loss": 2.9103,
      "step": 172841
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.5170795917510986,
      "learning_rate": 8.775302135402266e-05,
      "loss": 2.9277,
      "step": 172842
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4844019412994385,
      "learning_rate": 8.775013047764151e-05,
      "loss": 2.9539,
      "step": 172843
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.008617639541626,
      "learning_rate": 8.774723964072157e-05,
      "loss": 2.8551,
      "step": 172844
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.966866970062256,
      "learning_rate": 8.774434884326346e-05,
      "loss": 2.8777,
      "step": 172845
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7318496704101562,
      "learning_rate": 8.774145808526758e-05,
      "loss": 3.0364,
      "step": 172846
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.005267143249512,
      "learning_rate": 8.773856736673467e-05,
      "loss": 3.1674,
      "step": 172847
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8752706050872803,
      "learning_rate": 8.773567668766506e-05,
      "loss": 3.1478,
      "step": 172848
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6927289962768555,
      "learning_rate": 8.773278604805948e-05,
      "loss": 2.9269,
      "step": 172849
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4358441829681396,
      "learning_rate": 8.77298954479184e-05,
      "loss": 3.0294,
      "step": 172850
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.838771104812622,
      "learning_rate": 8.772700488724235e-05,
      "loss": 2.8765,
      "step": 172851
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4106459617614746,
      "learning_rate": 8.772411436603173e-05,
      "loss": 2.871,
      "step": 172852
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5232110023498535,
      "learning_rate": 8.772122388428734e-05,
      "loss": 2.9102,
      "step": 172853
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2436225414276123,
      "learning_rate": 8.771833344200945e-05,
      "loss": 3.177,
      "step": 172854
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6858956813812256,
      "learning_rate": 8.771544303919886e-05,
      "loss": 2.7513,
      "step": 172855
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.809126138687134,
      "learning_rate": 8.771255267585592e-05,
      "loss": 3.1055,
      "step": 172856
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7037124633789062,
      "learning_rate": 8.770966235198128e-05,
      "loss": 2.8503,
      "step": 172857
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.295339584350586,
      "learning_rate": 8.770677206757531e-05,
      "loss": 2.7798,
      "step": 172858
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3705837726593018,
      "learning_rate": 8.770388182263872e-05,
      "loss": 2.9866,
      "step": 172859
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6444456577301025,
      "learning_rate": 8.770099161717194e-05,
      "loss": 2.9445,
      "step": 172860
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.326200008392334,
      "learning_rate": 8.769810145117561e-05,
      "loss": 2.8965,
      "step": 172861
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7266271114349365,
      "learning_rate": 8.769521132465024e-05,
      "loss": 2.809,
      "step": 172862
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7213597297668457,
      "learning_rate": 8.76923212375963e-05,
      "loss": 3.1148,
      "step": 172863
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.379608154296875,
      "learning_rate": 8.768943119001431e-05,
      "loss": 2.9662,
      "step": 172864
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8181869983673096,
      "learning_rate": 8.768654118190493e-05,
      "loss": 2.717,
      "step": 172865
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.5361363887786865,
      "learning_rate": 8.768365121326855e-05,
      "loss": 2.8858,
      "step": 172866
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.717268466949463,
      "learning_rate": 8.768076128410588e-05,
      "loss": 2.7628,
      "step": 172867
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.4546425342559814,
      "learning_rate": 8.767787139441737e-05,
      "loss": 2.8335,
      "step": 172868
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6959564685821533,
      "learning_rate": 8.767498154420354e-05,
      "loss": 2.9111,
      "step": 172869
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9532599449157715,
      "learning_rate": 8.767209173346483e-05,
      "loss": 2.8931,
      "step": 172870
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.423356771469116,
      "learning_rate": 8.766920196220201e-05,
      "loss": 3.1766,
      "step": 172871
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.456913709640503,
      "learning_rate": 8.766631223041539e-05,
      "loss": 2.9514,
      "step": 172872
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0521061420440674,
      "learning_rate": 8.766342253810569e-05,
      "loss": 2.8581,
      "step": 172873
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.877664089202881,
      "learning_rate": 8.766053288527341e-05,
      "loss": 2.9361,
      "step": 172874
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.339827060699463,
      "learning_rate": 8.765764327191901e-05,
      "loss": 2.9993,
      "step": 172875
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.143253803253174,
      "learning_rate": 8.765475369804297e-05,
      "loss": 2.8762,
      "step": 172876
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.3583474159240723,
      "learning_rate": 8.765186416364601e-05,
      "loss": 3.0407,
      "step": 172877
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.692498207092285,
      "learning_rate": 8.764897466872848e-05,
      "loss": 2.8036,
      "step": 172878
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4210314750671387,
      "learning_rate": 8.764608521329113e-05,
      "loss": 2.9317,
      "step": 172879
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9780924320220947,
      "learning_rate": 8.764319579733436e-05,
      "loss": 3.0143,
      "step": 172880
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5713467597961426,
      "learning_rate": 8.764030642085875e-05,
      "loss": 3.16,
      "step": 172881
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.4262607097625732,
      "learning_rate": 8.76374170838647e-05,
      "loss": 2.9459,
      "step": 172882
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.2761454582214355,
      "learning_rate": 8.763452778635299e-05,
      "loss": 2.8477,
      "step": 172883
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.700753688812256,
      "learning_rate": 8.763163852832389e-05,
      "loss": 2.9131,
      "step": 172884
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.875861167907715,
      "learning_rate": 8.762874930977822e-05,
      "loss": 2.9837,
      "step": 172885
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.898923873901367,
      "learning_rate": 8.762586013071622e-05,
      "loss": 3.2061,
      "step": 172886
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.2753024101257324,
      "learning_rate": 8.762297099113881e-05,
      "loss": 2.9049,
      "step": 172887
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7782254219055176,
      "learning_rate": 8.762008189104611e-05,
      "loss": 3.169,
      "step": 172888
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.5563650131225586,
      "learning_rate": 8.761719283043892e-05,
      "loss": 3.0392,
      "step": 172889
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0280208587646484,
      "learning_rate": 8.761430380931763e-05,
      "loss": 2.9904,
      "step": 172890
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4487719535827637,
      "learning_rate": 8.761141482768294e-05,
      "loss": 2.9378,
      "step": 172891
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.651041269302368,
      "learning_rate": 8.760852588553521e-05,
      "loss": 2.9289,
      "step": 172892
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4216785430908203,
      "learning_rate": 8.760563698287525e-05,
      "loss": 2.9794,
      "step": 172893
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.648585557937622,
      "learning_rate": 8.760274811970321e-05,
      "loss": 2.8934,
      "step": 172894
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.868344306945801,
      "learning_rate": 8.75998592960199e-05,
      "loss": 3.047,
      "step": 172895
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.645078659057617,
      "learning_rate": 8.759697051182575e-05,
      "loss": 3.043,
      "step": 172896
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9871904850006104,
      "learning_rate": 8.759408176712137e-05,
      "loss": 2.9463,
      "step": 172897
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.819012403488159,
      "learning_rate": 8.759119306190721e-05,
      "loss": 3.0109,
      "step": 172898
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.290135145187378,
      "learning_rate": 8.758830439618402e-05,
      "loss": 2.9552,
      "step": 172899
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.036386489868164,
      "learning_rate": 8.7585415769952e-05,
      "loss": 2.7812,
      "step": 172900
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.938629388809204,
      "learning_rate": 8.758252718321194e-05,
      "loss": 2.9923,
      "step": 172901
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7333052158355713,
      "learning_rate": 8.757963863596422e-05,
      "loss": 2.8012,
      "step": 172902
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.68587064743042,
      "learning_rate": 8.757675012820952e-05,
      "loss": 3.0267,
      "step": 172903
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7744994163513184,
      "learning_rate": 8.757386165994825e-05,
      "loss": 2.8694,
      "step": 172904
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.421750783920288,
      "learning_rate": 8.757097323118109e-05,
      "loss": 2.8589,
      "step": 172905
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3929529190063477,
      "learning_rate": 8.75680848419085e-05,
      "loss": 2.7241,
      "step": 172906
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8357181549072266,
      "learning_rate": 8.7565196492131e-05,
      "loss": 2.7705,
      "step": 172907
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.730618953704834,
      "learning_rate": 8.756230818184906e-05,
      "loss": 2.9471,
      "step": 172908
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.396713972091675,
      "learning_rate": 8.755941991106339e-05,
      "loss": 2.9403,
      "step": 172909
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.7327139377593994,
      "learning_rate": 8.755653167977432e-05,
      "loss": 2.9455,
      "step": 172910
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1649370193481445,
      "learning_rate": 8.755364348798263e-05,
      "loss": 2.9681,
      "step": 172911
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5954017639160156,
      "learning_rate": 8.75507553356887e-05,
      "loss": 2.8175,
      "step": 172912
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1203372478485107,
      "learning_rate": 8.754786722289302e-05,
      "loss": 3.1979,
      "step": 172913
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.913454532623291,
      "learning_rate": 8.754497914959627e-05,
      "loss": 2.8061,
      "step": 172914
    },
    {
      "epoch": 2.25,
      "grad_norm": 5.982997417449951,
      "learning_rate": 8.754209111579896e-05,
      "loss": 2.9937,
      "step": 172915
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.869359254837036,
      "learning_rate": 8.753920312150146e-05,
      "loss": 2.8332,
      "step": 172916
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.510857582092285,
      "learning_rate": 8.753631516670451e-05,
      "loss": 3.0403,
      "step": 172917
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2258949279785156,
      "learning_rate": 8.753342725140861e-05,
      "loss": 3.1002,
      "step": 172918
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.118718385696411,
      "learning_rate": 8.753053937561416e-05,
      "loss": 3.1764,
      "step": 172919
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.257577657699585,
      "learning_rate": 8.752765153932185e-05,
      "loss": 2.9767,
      "step": 172920
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.2742931842803955,
      "learning_rate": 8.752476374253219e-05,
      "loss": 3.2058,
      "step": 172921
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.447690486907959,
      "learning_rate": 8.75218759852456e-05,
      "loss": 2.8605,
      "step": 172922
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3728580474853516,
      "learning_rate": 8.751898826746276e-05,
      "loss": 2.9393,
      "step": 172923
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0243804454803467,
      "learning_rate": 8.75161005891842e-05,
      "loss": 2.8993,
      "step": 172924
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.91819429397583,
      "learning_rate": 8.751321295041028e-05,
      "loss": 3.0012,
      "step": 172925
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.423468828201294,
      "learning_rate": 8.751032535114177e-05,
      "loss": 3.0379,
      "step": 172926
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.618467330932617,
      "learning_rate": 8.750743779137901e-05,
      "loss": 2.83,
      "step": 172927
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.23313570022583,
      "learning_rate": 8.75045502711227e-05,
      "loss": 2.8038,
      "step": 172928
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.676042079925537,
      "learning_rate": 8.750166279037334e-05,
      "loss": 2.9338,
      "step": 172929
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.616119384765625,
      "learning_rate": 8.74987753491314e-05,
      "loss": 2.9493,
      "step": 172930
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.55476713180542,
      "learning_rate": 8.749588794739739e-05,
      "loss": 3.12,
      "step": 172931
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.617854595184326,
      "learning_rate": 8.749300058517198e-05,
      "loss": 2.909,
      "step": 172932
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.780313730239868,
      "learning_rate": 8.749011326245553e-05,
      "loss": 3.0354,
      "step": 172933
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4944934844970703,
      "learning_rate": 8.748722597924878e-05,
      "loss": 3.0515,
      "step": 172934
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0568957328796387,
      "learning_rate": 8.748433873555219e-05,
      "loss": 2.9507,
      "step": 172935
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4968056678771973,
      "learning_rate": 8.748145153136622e-05,
      "loss": 3.0365,
      "step": 172936
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5212900638580322,
      "learning_rate": 8.747856436669142e-05,
      "loss": 2.6832,
      "step": 172937
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0211522579193115,
      "learning_rate": 8.747567724152841e-05,
      "loss": 2.7989,
      "step": 172938
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.7457756996154785,
      "learning_rate": 8.747279015587762e-05,
      "loss": 3.0396,
      "step": 172939
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.549931764602661,
      "learning_rate": 8.746990310973974e-05,
      "loss": 2.8639,
      "step": 172940
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.772470712661743,
      "learning_rate": 8.746701610311524e-05,
      "loss": 2.7269,
      "step": 172941
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4669995307922363,
      "learning_rate": 8.74641291360046e-05,
      "loss": 3.015,
      "step": 172942
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9093613624572754,
      "learning_rate": 8.746124220840833e-05,
      "loss": 2.9484,
      "step": 172943
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5663506984710693,
      "learning_rate": 8.745835532032706e-05,
      "loss": 2.8736,
      "step": 172944
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.780543088912964,
      "learning_rate": 8.745546847176127e-05,
      "loss": 3.101,
      "step": 172945
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.599592685699463,
      "learning_rate": 8.745258166271157e-05,
      "loss": 3.0912,
      "step": 172946
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.411299228668213,
      "learning_rate": 8.744969489317845e-05,
      "loss": 2.7669,
      "step": 172947
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.45343279838562,
      "learning_rate": 8.744680816316245e-05,
      "loss": 2.9245,
      "step": 172948
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9794042110443115,
      "learning_rate": 8.744392147266401e-05,
      "loss": 2.9829,
      "step": 172949
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3301479816436768,
      "learning_rate": 8.744103482168386e-05,
      "loss": 2.8601,
      "step": 172950
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4549012184143066,
      "learning_rate": 8.743814821022233e-05,
      "loss": 2.9038,
      "step": 172951
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4983129501342773,
      "learning_rate": 8.743526163828017e-05,
      "loss": 2.7544,
      "step": 172952
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4538118839263916,
      "learning_rate": 8.743237510585768e-05,
      "loss": 3.2267,
      "step": 172953
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6520378589630127,
      "learning_rate": 8.742948861295574e-05,
      "loss": 2.8616,
      "step": 172954
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3750717639923096,
      "learning_rate": 8.742660215957447e-05,
      "loss": 3.0662,
      "step": 172955
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.117419242858887,
      "learning_rate": 8.742371574571471e-05,
      "loss": 3.2567,
      "step": 172956
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5338010787963867,
      "learning_rate": 8.74208293713768e-05,
      "loss": 3.0881,
      "step": 172957
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.397860288619995,
      "learning_rate": 8.741794303656144e-05,
      "loss": 2.7645,
      "step": 172958
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.537526845932007,
      "learning_rate": 8.741505674126903e-05,
      "loss": 2.9462,
      "step": 172959
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.381441116333008,
      "learning_rate": 8.741217048550036e-05,
      "loss": 3.113,
      "step": 172960
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1608710289001465,
      "learning_rate": 8.740928426925559e-05,
      "loss": 3.1073,
      "step": 172961
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.5351016521453857,
      "learning_rate": 8.740639809253554e-05,
      "loss": 2.7307,
      "step": 172962
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.2754836082458496,
      "learning_rate": 8.740351195534056e-05,
      "loss": 2.8353,
      "step": 172963
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2091829776763916,
      "learning_rate": 8.740062585767138e-05,
      "loss": 2.8206,
      "step": 172964
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5402820110321045,
      "learning_rate": 8.739773979952835e-05,
      "loss": 3.0948,
      "step": 172965
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7952141761779785,
      "learning_rate": 8.739485378091226e-05,
      "loss": 2.7293,
      "step": 172966
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2490854263305664,
      "learning_rate": 8.739196780182328e-05,
      "loss": 3.0759,
      "step": 172967
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2932939529418945,
      "learning_rate": 8.738908186226226e-05,
      "loss": 3.0075,
      "step": 172968
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9372856616973877,
      "learning_rate": 8.738619596222955e-05,
      "loss": 2.9772,
      "step": 172969
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1088016033172607,
      "learning_rate": 8.73833101017258e-05,
      "loss": 2.8579,
      "step": 172970
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.7404868602752686,
      "learning_rate": 8.738042428075146e-05,
      "loss": 3.0847,
      "step": 172971
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.79970121383667,
      "learning_rate": 8.73775384993073e-05,
      "loss": 2.9832,
      "step": 172972
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.2094926834106445,
      "learning_rate": 8.737465275739346e-05,
      "loss": 3.0799,
      "step": 172973
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.4355859756469727,
      "learning_rate": 8.737176705501078e-05,
      "loss": 2.616,
      "step": 172974
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.596625804901123,
      "learning_rate": 8.736888139215961e-05,
      "loss": 3.1325,
      "step": 172975
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.799307107925415,
      "learning_rate": 8.736599576884068e-05,
      "loss": 2.9269,
      "step": 172976
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.315197467803955,
      "learning_rate": 8.736311018505433e-05,
      "loss": 2.9824,
      "step": 172977
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.178463459014893,
      "learning_rate": 8.736022464080135e-05,
      "loss": 3.287,
      "step": 172978
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.374475479125977,
      "learning_rate": 8.735733913608197e-05,
      "loss": 3.0231,
      "step": 172979
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.780728816986084,
      "learning_rate": 8.735445367089697e-05,
      "loss": 2.7691,
      "step": 172980
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.312199115753174,
      "learning_rate": 8.735156824524666e-05,
      "loss": 2.8239,
      "step": 172981
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.598832368850708,
      "learning_rate": 8.734868285913182e-05,
      "loss": 2.8501,
      "step": 172982
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.1140458583831787,
      "learning_rate": 8.73457975125528e-05,
      "loss": 2.9874,
      "step": 172983
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4940593242645264,
      "learning_rate": 8.734291220551038e-05,
      "loss": 2.9395,
      "step": 172984
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.3346285820007324,
      "learning_rate": 8.734002693800477e-05,
      "loss": 2.8073,
      "step": 172985
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.751211166381836,
      "learning_rate": 8.733714171003671e-05,
      "loss": 2.9139,
      "step": 172986
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7373437881469727,
      "learning_rate": 8.733425652160661e-05,
      "loss": 2.837,
      "step": 172987
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9464163780212402,
      "learning_rate": 8.733137137271518e-05,
      "loss": 2.8313,
      "step": 172988
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2880499362945557,
      "learning_rate": 8.732848626336276e-05,
      "loss": 3.0577,
      "step": 172989
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.685784339904785,
      "learning_rate": 8.732560119355011e-05,
      "loss": 3.1061,
      "step": 172990
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7895727157592773,
      "learning_rate": 8.732271616327762e-05,
      "loss": 2.9724,
      "step": 172991
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4074604511260986,
      "learning_rate": 8.731983117254588e-05,
      "loss": 2.9623,
      "step": 172992
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.255852460861206,
      "learning_rate": 8.731694622135527e-05,
      "loss": 2.7227,
      "step": 172993
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3461389541625977,
      "learning_rate": 8.731406130970658e-05,
      "loss": 2.9378,
      "step": 172994
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6852731704711914,
      "learning_rate": 8.73111764376001e-05,
      "loss": 3.065,
      "step": 172995
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.0381364822387695,
      "learning_rate": 8.730829160503661e-05,
      "loss": 2.9622,
      "step": 172996
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1478121280670166,
      "learning_rate": 8.73054068120165e-05,
      "loss": 3.0738,
      "step": 172997
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.265050888061523,
      "learning_rate": 8.730252205854025e-05,
      "loss": 2.6697,
      "step": 172998
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8669486045837402,
      "learning_rate": 8.729963734460856e-05,
      "loss": 2.8276,
      "step": 172999
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3931989669799805,
      "learning_rate": 8.72967526702219e-05,
      "loss": 2.9712,
      "step": 173000
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.148654937744141,
      "learning_rate": 8.729386803538067e-05,
      "loss": 2.8674,
      "step": 173001
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.6358067989349365,
      "learning_rate": 8.72909834400856e-05,
      "loss": 2.9679,
      "step": 173002
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.6605587005615234,
      "learning_rate": 8.728809888433721e-05,
      "loss": 2.8327,
      "step": 173003
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9688658714294434,
      "learning_rate": 8.728521436813585e-05,
      "loss": 3.3272,
      "step": 173004
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8776888847351074,
      "learning_rate": 8.728232989148228e-05,
      "loss": 3.1269,
      "step": 173005
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8982605934143066,
      "learning_rate": 8.727944545437692e-05,
      "loss": 2.8721,
      "step": 173006
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.178553581237793,
      "learning_rate": 8.727656105682029e-05,
      "loss": 2.979,
      "step": 173007
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1026227474212646,
      "learning_rate": 8.727367669881301e-05,
      "loss": 3.095,
      "step": 173008
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0915274620056152,
      "learning_rate": 8.727079238035557e-05,
      "loss": 2.6338,
      "step": 173009
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5543723106384277,
      "learning_rate": 8.726790810144842e-05,
      "loss": 2.9075,
      "step": 173010
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.626377820968628,
      "learning_rate": 8.726502386209227e-05,
      "loss": 3.0498,
      "step": 173011
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.512955904006958,
      "learning_rate": 8.726213966228749e-05,
      "loss": 3.1324,
      "step": 173012
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.435736656188965,
      "learning_rate": 8.725925550203476e-05,
      "loss": 2.9303,
      "step": 173013
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.152449131011963,
      "learning_rate": 8.72563713813346e-05,
      "loss": 2.9609,
      "step": 173014
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.3654263019561768,
      "learning_rate": 8.725348730018747e-05,
      "loss": 2.9358,
      "step": 173015
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3302369117736816,
      "learning_rate": 8.725060325859382e-05,
      "loss": 2.9845,
      "step": 173016
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.849640130996704,
      "learning_rate": 8.724771925655441e-05,
      "loss": 3.148,
      "step": 173017
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.045280456542969,
      "learning_rate": 8.724483529406954e-05,
      "loss": 2.9227,
      "step": 173018
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.650785446166992,
      "learning_rate": 8.724195137114002e-05,
      "loss": 2.6704,
      "step": 173019
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.8436007499694824,
      "learning_rate": 8.723906748776621e-05,
      "loss": 3.1729,
      "step": 173020
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.351607084274292,
      "learning_rate": 8.723618364394867e-05,
      "loss": 2.931,
      "step": 173021
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6420044898986816,
      "learning_rate": 8.723329983968784e-05,
      "loss": 3.0463,
      "step": 173022
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.7281932830810547,
      "learning_rate": 8.723041607498447e-05,
      "loss": 2.7267,
      "step": 173023
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.380396366119385,
      "learning_rate": 8.722753234983886e-05,
      "loss": 2.7343,
      "step": 173024
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.531956911087036,
      "learning_rate": 8.722464866425178e-05,
      "loss": 3.1278,
      "step": 173025
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.821016788482666,
      "learning_rate": 8.722176501822356e-05,
      "loss": 2.8369,
      "step": 173026
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.019679069519043,
      "learning_rate": 8.721888141175502e-05,
      "loss": 2.9122,
      "step": 173027
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5042037963867188,
      "learning_rate": 8.721599784484633e-05,
      "loss": 2.6734,
      "step": 173028
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.276841402053833,
      "learning_rate": 8.721311431749826e-05,
      "loss": 2.834,
      "step": 173029
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.400238275527954,
      "learning_rate": 8.721023082971122e-05,
      "loss": 2.8746,
      "step": 173030
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7279040813446045,
      "learning_rate": 8.720734738148594e-05,
      "loss": 2.9622,
      "step": 173031
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3202216625213623,
      "learning_rate": 8.72044639728227e-05,
      "loss": 3.0915,
      "step": 173032
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.042520761489868,
      "learning_rate": 8.720158060372235e-05,
      "loss": 2.6592,
      "step": 173033
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6521785259246826,
      "learning_rate": 8.719869727418506e-05,
      "loss": 2.8322,
      "step": 173034
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4210922718048096,
      "learning_rate": 8.719581398421165e-05,
      "loss": 2.5917,
      "step": 173035
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.459655284881592,
      "learning_rate": 8.719293073380246e-05,
      "loss": 3.1803,
      "step": 173036
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.579801082611084,
      "learning_rate": 8.719004752295822e-05,
      "loss": 2.8808,
      "step": 173037
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.037396430969238,
      "learning_rate": 8.718716435167926e-05,
      "loss": 2.8074,
      "step": 173038
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.830759048461914,
      "learning_rate": 8.718428121996642e-05,
      "loss": 2.9234,
      "step": 173039
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9143130779266357,
      "learning_rate": 8.718139812781983e-05,
      "loss": 2.8009,
      "step": 173040
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3401451110839844,
      "learning_rate": 8.717851507524036e-05,
      "loss": 2.9534,
      "step": 173041
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6498892307281494,
      "learning_rate": 8.717563206222831e-05,
      "loss": 2.8586,
      "step": 173042
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4603476524353027,
      "learning_rate": 8.717274908878444e-05,
      "loss": 2.8564,
      "step": 173043
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9299843311309814,
      "learning_rate": 8.716986615490907e-05,
      "loss": 2.7461,
      "step": 173044
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6759932041168213,
      "learning_rate": 8.7166983260603e-05,
      "loss": 2.7499,
      "step": 173045
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.554471969604492,
      "learning_rate": 8.716410040586646e-05,
      "loss": 3.0171,
      "step": 173046
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.798112154006958,
      "learning_rate": 8.716121759070018e-05,
      "loss": 2.9542,
      "step": 173047
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.504523277282715,
      "learning_rate": 8.715833481510462e-05,
      "loss": 2.8799,
      "step": 173048
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.434971809387207,
      "learning_rate": 8.715545207908039e-05,
      "loss": 2.6899,
      "step": 173049
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.2866945266723633,
      "learning_rate": 8.715256938262787e-05,
      "loss": 2.9472,
      "step": 173050
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.024854898452759,
      "learning_rate": 8.714968672574791e-05,
      "loss": 3.0988,
      "step": 173051
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.1427953243255615,
      "learning_rate": 8.714680410844066e-05,
      "loss": 2.9535,
      "step": 173052
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6716911792755127,
      "learning_rate": 8.714392153070696e-05,
      "loss": 2.8528,
      "step": 173053
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0138697624206543,
      "learning_rate": 8.714103899254709e-05,
      "loss": 3.0716,
      "step": 173054
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0401198863983154,
      "learning_rate": 8.71381564939618e-05,
      "loss": 2.9554,
      "step": 173055
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.729132652282715,
      "learning_rate": 8.713527403495148e-05,
      "loss": 2.8615,
      "step": 173056
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4762978553771973,
      "learning_rate": 8.713239161551692e-05,
      "loss": 2.6615,
      "step": 173057
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3056063652038574,
      "learning_rate": 8.712950923565825e-05,
      "loss": 3.0337,
      "step": 173058
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.785060167312622,
      "learning_rate": 8.712662689537634e-05,
      "loss": 2.9883,
      "step": 173059
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.713660478591919,
      "learning_rate": 8.712374459467152e-05,
      "loss": 2.8484,
      "step": 173060
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.09204626083374,
      "learning_rate": 8.712086233354449e-05,
      "loss": 2.9831,
      "step": 173061
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.158719062805176,
      "learning_rate": 8.711798011199563e-05,
      "loss": 3.1004,
      "step": 173062
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.62662410736084,
      "learning_rate": 8.711509793002575e-05,
      "loss": 2.9503,
      "step": 173063
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5996577739715576,
      "learning_rate": 8.7112215787635e-05,
      "loss": 2.9947,
      "step": 173064
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.673121929168701,
      "learning_rate": 8.710933368482419e-05,
      "loss": 2.9294,
      "step": 173065
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.2667789459228516,
      "learning_rate": 8.710645162159368e-05,
      "loss": 3.0946,
      "step": 173066
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.9562180042266846,
      "learning_rate": 8.710356959794419e-05,
      "loss": 2.9438,
      "step": 173067
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1154239177703857,
      "learning_rate": 8.710068761387607e-05,
      "loss": 3.1005,
      "step": 173068
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4936368465423584,
      "learning_rate": 8.709780566939014e-05,
      "loss": 2.8665,
      "step": 173069
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6946961879730225,
      "learning_rate": 8.709492376448659e-05,
      "loss": 2.6963,
      "step": 173070
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.351640462875366,
      "learning_rate": 8.709204189916617e-05,
      "loss": 2.9916,
      "step": 173071
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.230942964553833,
      "learning_rate": 8.708916007342926e-05,
      "loss": 2.967,
      "step": 173072
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2056522369384766,
      "learning_rate": 8.708627828727662e-05,
      "loss": 3.0183,
      "step": 173073
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1921749114990234,
      "learning_rate": 8.708339654070855e-05,
      "loss": 2.9293,
      "step": 173074
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.2096025943756104,
      "learning_rate": 8.708051483372578e-05,
      "loss": 2.8999,
      "step": 173075
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.231621503829956,
      "learning_rate": 8.707763316632878e-05,
      "loss": 2.9652,
      "step": 173076
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.651028633117676,
      "learning_rate": 8.707475153851808e-05,
      "loss": 3.1979,
      "step": 173077
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.582306385040283,
      "learning_rate": 8.707186995029407e-05,
      "loss": 3.0498,
      "step": 173078
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.952782392501831,
      "learning_rate": 8.706898840165753e-05,
      "loss": 3.0629,
      "step": 173079
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9452896118164062,
      "learning_rate": 8.70661068926088e-05,
      "loss": 3.0353,
      "step": 173080
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0122804641723633,
      "learning_rate": 8.706322542314856e-05,
      "loss": 2.9654,
      "step": 173081
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.439275026321411,
      "learning_rate": 8.706034399327733e-05,
      "loss": 2.8969,
      "step": 173082
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7749977111816406,
      "learning_rate": 8.705746260299559e-05,
      "loss": 3.2276,
      "step": 173083
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2537896633148193,
      "learning_rate": 8.705458125230377e-05,
      "loss": 2.9417,
      "step": 173084
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.551147699356079,
      "learning_rate": 8.705169994120263e-05,
      "loss": 2.785,
      "step": 173085
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6457529067993164,
      "learning_rate": 8.704881866969253e-05,
      "loss": 2.6724,
      "step": 173086
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.234051465988159,
      "learning_rate": 8.704593743777414e-05,
      "loss": 3.0919,
      "step": 173087
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7153542041778564,
      "learning_rate": 8.704305624544791e-05,
      "loss": 2.8227,
      "step": 173088
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.622326374053955,
      "learning_rate": 8.704017509271435e-05,
      "loss": 2.9805,
      "step": 173089
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6905832290649414,
      "learning_rate": 8.703729397957412e-05,
      "loss": 2.866,
      "step": 173090
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.844118118286133,
      "learning_rate": 8.703441290602767e-05,
      "loss": 2.9285,
      "step": 173091
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.286952495574951,
      "learning_rate": 8.703153187207545e-05,
      "loss": 3.0643,
      "step": 173092
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7805304527282715,
      "learning_rate": 8.702865087771817e-05,
      "loss": 2.8669,
      "step": 173093
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1830008029937744,
      "learning_rate": 8.70257699229563e-05,
      "loss": 2.7368,
      "step": 173094
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8575849533081055,
      "learning_rate": 8.702288900779027e-05,
      "loss": 2.9544,
      "step": 173095
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.204068183898926,
      "learning_rate": 8.702000813222076e-05,
      "loss": 2.8566,
      "step": 173096
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7931013107299805,
      "learning_rate": 8.701712729624822e-05,
      "loss": 3.0441,
      "step": 173097
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.343944787979126,
      "learning_rate": 8.701424649987328e-05,
      "loss": 2.8719,
      "step": 173098
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.376704454421997,
      "learning_rate": 8.701136574309638e-05,
      "loss": 3.0007,
      "step": 173099
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.9786887168884277,
      "learning_rate": 8.700848502591811e-05,
      "loss": 2.8149,
      "step": 173100
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9166269302368164,
      "learning_rate": 8.700560434833891e-05,
      "loss": 2.7743,
      "step": 173101
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.147460460662842,
      "learning_rate": 8.70027237103595e-05,
      "loss": 2.8916,
      "step": 173102
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5634729862213135,
      "learning_rate": 8.699984311198016e-05,
      "loss": 3.0867,
      "step": 173103
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8126626014709473,
      "learning_rate": 8.699696255320169e-05,
      "loss": 2.7823,
      "step": 173104
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.1240427494049072,
      "learning_rate": 8.699408203402447e-05,
      "loss": 2.8109,
      "step": 173105
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.743582248687744,
      "learning_rate": 8.699120155444911e-05,
      "loss": 2.9729,
      "step": 173106
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1809682846069336,
      "learning_rate": 8.6988321114476e-05,
      "loss": 3.071,
      "step": 173107
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3832271099090576,
      "learning_rate": 8.698544071410588e-05,
      "loss": 2.8113,
      "step": 173108
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.465165853500366,
      "learning_rate": 8.698256035333908e-05,
      "loss": 3.1266,
      "step": 173109
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4289772510528564,
      "learning_rate": 8.697968003217637e-05,
      "loss": 3.1329,
      "step": 173110
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.778177261352539,
      "learning_rate": 8.697679975061804e-05,
      "loss": 2.8902,
      "step": 173111
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1083531379699707,
      "learning_rate": 8.697391950866494e-05,
      "loss": 2.925,
      "step": 173112
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.943572998046875,
      "learning_rate": 8.697103930631721e-05,
      "loss": 2.9443,
      "step": 173113
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5284132957458496,
      "learning_rate": 8.696815914357568e-05,
      "loss": 3.0971,
      "step": 173114
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.3927812576293945,
      "learning_rate": 8.69652790204407e-05,
      "loss": 2.9821,
      "step": 173115
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5603771209716797,
      "learning_rate": 8.696239893691299e-05,
      "loss": 2.9945,
      "step": 173116
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2832608222961426,
      "learning_rate": 8.695951889299291e-05,
      "loss": 2.9125,
      "step": 173117
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.229612112045288,
      "learning_rate": 8.695663888868124e-05,
      "loss": 2.7762,
      "step": 173118
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.4102635383605957,
      "learning_rate": 8.695375892397819e-05,
      "loss": 2.9551,
      "step": 173119
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.562690258026123,
      "learning_rate": 8.695087899888456e-05,
      "loss": 2.8341,
      "step": 173120
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5505447387695312,
      "learning_rate": 8.694799911340063e-05,
      "loss": 2.7389,
      "step": 173121
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0413997173309326,
      "learning_rate": 8.694511926752723e-05,
      "loss": 2.98,
      "step": 173122
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.4415206909179688,
      "learning_rate": 8.694223946126467e-05,
      "loss": 3.0636,
      "step": 173123
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4394402503967285,
      "learning_rate": 8.693935969461374e-05,
      "loss": 2.92,
      "step": 173124
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8960180282592773,
      "learning_rate": 8.69364799675746e-05,
      "loss": 3.0391,
      "step": 173125
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.808955669403076,
      "learning_rate": 8.693360028014811e-05,
      "loss": 2.9218,
      "step": 173126
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.63613224029541,
      "learning_rate": 8.693072063233458e-05,
      "loss": 2.8933,
      "step": 173127
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5513272285461426,
      "learning_rate": 8.692784102413476e-05,
      "loss": 3.0914,
      "step": 173128
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9356067180633545,
      "learning_rate": 8.692496145554895e-05,
      "loss": 2.9876,
      "step": 173129
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.340928554534912,
      "learning_rate": 8.692208192657801e-05,
      "loss": 2.7519,
      "step": 173130
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.95216965675354,
      "learning_rate": 8.691920243722212e-05,
      "loss": 2.9074,
      "step": 173131
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8057992458343506,
      "learning_rate": 8.691632298748205e-05,
      "loss": 3.1456,
      "step": 173132
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.568432331085205,
      "learning_rate": 8.691344357735814e-05,
      "loss": 2.8472,
      "step": 173133
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.529571771621704,
      "learning_rate": 8.691056420685115e-05,
      "loss": 2.9114,
      "step": 173134
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5758283138275146,
      "learning_rate": 8.690768487596143e-05,
      "loss": 2.8921,
      "step": 173135
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.786186456680298,
      "learning_rate": 8.69048055846898e-05,
      "loss": 2.814,
      "step": 173136
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.863004446029663,
      "learning_rate": 8.690192633303636e-05,
      "loss": 2.9487,
      "step": 173137
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.686802387237549,
      "learning_rate": 8.689904712100197e-05,
      "loss": 2.7368,
      "step": 173138
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0883708000183105,
      "learning_rate": 8.689616794858699e-05,
      "loss": 2.7435,
      "step": 173139
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.444117546081543,
      "learning_rate": 8.689328881579212e-05,
      "loss": 2.8657,
      "step": 173140
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6439425945281982,
      "learning_rate": 8.689040972261775e-05,
      "loss": 2.915,
      "step": 173141
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.98201322555542,
      "learning_rate": 8.688753066906462e-05,
      "loss": 2.9513,
      "step": 173142
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.2553043365478516,
      "learning_rate": 8.688465165513296e-05,
      "loss": 3.0906,
      "step": 173143
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3472511768341064,
      "learning_rate": 8.688177268082355e-05,
      "loss": 2.7893,
      "step": 173144
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5279932022094727,
      "learning_rate": 8.687889374613674e-05,
      "loss": 2.7843,
      "step": 173145
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7443134784698486,
      "learning_rate": 8.687601485107327e-05,
      "loss": 3.0305,
      "step": 173146
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6529765129089355,
      "learning_rate": 8.687313599563346e-05,
      "loss": 2.9493,
      "step": 173147
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.220736265182495,
      "learning_rate": 8.687025717981816e-05,
      "loss": 2.7597,
      "step": 173148
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.483262538909912,
      "learning_rate": 8.686737840362753e-05,
      "loss": 2.9328,
      "step": 173149
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.137502670288086,
      "learning_rate": 8.686449966706234e-05,
      "loss": 2.8205,
      "step": 173150
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8438165187835693,
      "learning_rate": 8.686162097012301e-05,
      "loss": 3.2078,
      "step": 173151
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7924399375915527,
      "learning_rate": 8.685874231281019e-05,
      "loss": 2.7672,
      "step": 173152
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9741413593292236,
      "learning_rate": 8.685586369512427e-05,
      "loss": 3.127,
      "step": 173153
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.084683895111084,
      "learning_rate": 8.685298511706595e-05,
      "loss": 3.02,
      "step": 173154
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.620126724243164,
      "learning_rate": 8.68501065786357e-05,
      "loss": 2.9616,
      "step": 173155
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.7945847511291504,
      "learning_rate": 8.684722807983402e-05,
      "loss": 3.058,
      "step": 173156
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.8807199001312256,
      "learning_rate": 8.68443496206614e-05,
      "loss": 2.849,
      "step": 173157
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.0404272079467773,
      "learning_rate": 8.684147120111848e-05,
      "loss": 2.9498,
      "step": 173158
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.191276788711548,
      "learning_rate": 8.68385928212057e-05,
      "loss": 2.9985,
      "step": 173159
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.0267646312713623,
      "learning_rate": 8.683571448092374e-05,
      "loss": 2.7863,
      "step": 173160
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.321704626083374,
      "learning_rate": 8.683283618027305e-05,
      "loss": 2.998,
      "step": 173161
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.499061346054077,
      "learning_rate": 8.682995791925416e-05,
      "loss": 2.7121,
      "step": 173162
    },
    {
      "epoch": 2.25,
      "grad_norm": 4.262509346008301,
      "learning_rate": 8.68270796978675e-05,
      "loss": 3.0569,
      "step": 173163
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.464749336242676,
      "learning_rate": 8.682420151611381e-05,
      "loss": 2.7764,
      "step": 173164
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6837430000305176,
      "learning_rate": 8.682132337399341e-05,
      "loss": 2.8576,
      "step": 173165
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.530802011489868,
      "learning_rate": 8.681844527150708e-05,
      "loss": 2.8594,
      "step": 173166
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.638953685760498,
      "learning_rate": 8.68155672086552e-05,
      "loss": 2.9879,
      "step": 173167
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6621594429016113,
      "learning_rate": 8.681268918543833e-05,
      "loss": 2.8542,
      "step": 173168
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3233230113983154,
      "learning_rate": 8.680981120185691e-05,
      "loss": 2.7169,
      "step": 173169
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5854363441467285,
      "learning_rate": 8.680693325791169e-05,
      "loss": 2.8398,
      "step": 173170
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.256639003753662,
      "learning_rate": 8.680405535360296e-05,
      "loss": 2.8878,
      "step": 173171
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.216219186782837,
      "learning_rate": 8.680117748893148e-05,
      "loss": 2.9103,
      "step": 173172
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.9882400035858154,
      "learning_rate": 8.67982996638977e-05,
      "loss": 3.021,
      "step": 173173
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.4084243774414062,
      "learning_rate": 8.679542187850206e-05,
      "loss": 3.0084,
      "step": 173174
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6433606147766113,
      "learning_rate": 8.679254413274523e-05,
      "loss": 2.9102,
      "step": 173175
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.790254592895508,
      "learning_rate": 8.678966642662771e-05,
      "loss": 2.9785,
      "step": 173176
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1670970916748047,
      "learning_rate": 8.678678876014993e-05,
      "loss": 2.987,
      "step": 173177
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.6618268489837646,
      "learning_rate": 8.678391113331261e-05,
      "loss": 2.6496,
      "step": 173178
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.246532917022705,
      "learning_rate": 8.678103354611616e-05,
      "loss": 2.9588,
      "step": 173179
    },
    {
      "epoch": 2.25,
      "grad_norm": 3.1833090782165527,
      "learning_rate": 8.677815599856105e-05,
      "loss": 3.0294,
      "step": 173180
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.453293561935425,
      "learning_rate": 8.6775278490648e-05,
      "loss": 3.1003,
      "step": 173181
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.3906772136688232,
      "learning_rate": 8.677240102237748e-05,
      "loss": 2.7897,
      "step": 173182
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.516019105911255,
      "learning_rate": 8.676952359374988e-05,
      "loss": 2.8239,
      "step": 173183
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.6789329051971436,
      "learning_rate": 8.676664620476593e-05,
      "loss": 3.1856,
      "step": 173184
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.5585498809814453,
      "learning_rate": 8.676376885542611e-05,
      "loss": 2.9313,
      "step": 173185
    },
    {
      "epoch": 2.25,
      "grad_norm": 2.724270820617676,
      "learning_rate": 8.676089154573084e-05,
      "loss": 2.8194,
      "step": 173186
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6147637367248535,
      "learning_rate": 8.675801427568081e-05,
      "loss": 2.8549,
      "step": 173187
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.895594835281372,
      "learning_rate": 8.675513704527642e-05,
      "loss": 3.0614,
      "step": 173188
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.194019079208374,
      "learning_rate": 8.675225985451838e-05,
      "loss": 2.9249,
      "step": 173189
    },
    {
      "epoch": 2.26,
      "grad_norm": 6.1263203620910645,
      "learning_rate": 8.674938270340708e-05,
      "loss": 3.0963,
      "step": 173190
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.1187193393707275,
      "learning_rate": 8.674650559194312e-05,
      "loss": 2.6343,
      "step": 173191
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5423619747161865,
      "learning_rate": 8.674362852012693e-05,
      "loss": 2.7245,
      "step": 173192
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.530442476272583,
      "learning_rate": 8.67407514879592e-05,
      "loss": 3.017,
      "step": 173193
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.1978328227996826,
      "learning_rate": 8.67378744954403e-05,
      "loss": 3.1071,
      "step": 173194
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8797309398651123,
      "learning_rate": 8.673499754257094e-05,
      "loss": 3.15,
      "step": 173195
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.959918737411499,
      "learning_rate": 8.673212062935149e-05,
      "loss": 2.9652,
      "step": 173196
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.816988706588745,
      "learning_rate": 8.672924375578275e-05,
      "loss": 2.9217,
      "step": 173197
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8160719871520996,
      "learning_rate": 8.672636692186486e-05,
      "loss": 2.7084,
      "step": 173198
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.529303550720215,
      "learning_rate": 8.672349012759869e-05,
      "loss": 3.042,
      "step": 173199
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.1794168949127197,
      "learning_rate": 8.672061337298453e-05,
      "loss": 2.953,
      "step": 173200
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.044614553451538,
      "learning_rate": 8.671773665802313e-05,
      "loss": 2.9961,
      "step": 173201
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.5850846767425537,
      "learning_rate": 8.671485998271487e-05,
      "loss": 3.0113,
      "step": 173202
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.236901044845581,
      "learning_rate": 8.67119833470605e-05,
      "loss": 2.7332,
      "step": 173203
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9420828819274902,
      "learning_rate": 8.670910675106022e-05,
      "loss": 2.9604,
      "step": 173204
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5243823528289795,
      "learning_rate": 8.670623019471483e-05,
      "loss": 2.723,
      "step": 173205
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.2490251064300537,
      "learning_rate": 8.670335367802469e-05,
      "loss": 3.0555,
      "step": 173206
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.3535516262054443,
      "learning_rate": 8.67004772009905e-05,
      "loss": 2.8555,
      "step": 173207
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.9697320461273193,
      "learning_rate": 8.669760076361264e-05,
      "loss": 2.8649,
      "step": 173208
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.876197338104248,
      "learning_rate": 8.66947243658919e-05,
      "loss": 2.9756,
      "step": 173209
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.266031503677368,
      "learning_rate": 8.669184800782847e-05,
      "loss": 2.7926,
      "step": 173210
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.318387985229492,
      "learning_rate": 8.668897168942312e-05,
      "loss": 2.7642,
      "step": 173211
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.964453935623169,
      "learning_rate": 8.668609541067624e-05,
      "loss": 2.8343,
      "step": 173212
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9518332481384277,
      "learning_rate": 8.668321917158854e-05,
      "loss": 2.9702,
      "step": 173213
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.8963165283203125,
      "learning_rate": 8.668034297216035e-05,
      "loss": 2.8647,
      "step": 173214
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3385238647460938,
      "learning_rate": 8.667746681239249e-05,
      "loss": 2.9748,
      "step": 173215
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.46106219291687,
      "learning_rate": 8.667459069228516e-05,
      "loss": 2.947,
      "step": 173216
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.66998291015625,
      "learning_rate": 8.667171461183912e-05,
      "loss": 2.9847,
      "step": 173217
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.580763578414917,
      "learning_rate": 8.666883857105475e-05,
      "loss": 2.9111,
      "step": 173218
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5852127075195312,
      "learning_rate": 8.666596256993273e-05,
      "loss": 2.9687,
      "step": 173219
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.510258674621582,
      "learning_rate": 8.666308660847349e-05,
      "loss": 3.0612,
      "step": 173220
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.5657060146331787,
      "learning_rate": 8.666021068667767e-05,
      "loss": 2.8814,
      "step": 173221
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.051304578781128,
      "learning_rate": 8.665733480454578e-05,
      "loss": 2.9581,
      "step": 173222
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9709675312042236,
      "learning_rate": 8.665445896207828e-05,
      "loss": 2.8551,
      "step": 173223
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8738036155700684,
      "learning_rate": 8.665158315927568e-05,
      "loss": 2.9226,
      "step": 173224
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5816965103149414,
      "learning_rate": 8.664870739613866e-05,
      "loss": 2.9381,
      "step": 173225
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.016942024230957,
      "learning_rate": 8.664583167266758e-05,
      "loss": 3.0214,
      "step": 173226
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.43890643119812,
      "learning_rate": 8.664295598886313e-05,
      "loss": 2.8297,
      "step": 173227
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.11704421043396,
      "learning_rate": 8.664008034472581e-05,
      "loss": 2.7727,
      "step": 173228
    },
    {
      "epoch": 2.26,
      "grad_norm": 5.24878454208374,
      "learning_rate": 8.663720474025614e-05,
      "loss": 3.025,
      "step": 173229
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.539862871170044,
      "learning_rate": 8.663432917545454e-05,
      "loss": 3.0581,
      "step": 173230
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.215237617492676,
      "learning_rate": 8.663145365032173e-05,
      "loss": 2.99,
      "step": 173231
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.505491256713867,
      "learning_rate": 8.662857816485806e-05,
      "loss": 2.8899,
      "step": 173232
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.813167095184326,
      "learning_rate": 8.662570271906429e-05,
      "loss": 2.923,
      "step": 173233
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.443995475769043,
      "learning_rate": 8.662282731294078e-05,
      "loss": 2.8895,
      "step": 173234
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9836478233337402,
      "learning_rate": 8.661995194648814e-05,
      "loss": 2.9332,
      "step": 173235
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.902177095413208,
      "learning_rate": 8.661707661970681e-05,
      "loss": 2.9894,
      "step": 173236
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9912707805633545,
      "learning_rate": 8.661420133259744e-05,
      "loss": 3.0804,
      "step": 173237
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.7800958156585693,
      "learning_rate": 8.661132608516046e-05,
      "loss": 3.021,
      "step": 173238
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7016847133636475,
      "learning_rate": 8.660845087739656e-05,
      "loss": 2.9141,
      "step": 173239
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.481583595275879,
      "learning_rate": 8.660557570930614e-05,
      "loss": 2.8681,
      "step": 173240
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.5264971256256104,
      "learning_rate": 8.660270058088979e-05,
      "loss": 2.6924,
      "step": 173241
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.237990379333496,
      "learning_rate": 8.659982549214795e-05,
      "loss": 2.8687,
      "step": 173242
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.1005215644836426,
      "learning_rate": 8.659695044308129e-05,
      "loss": 2.9233,
      "step": 173243
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.25565242767334,
      "learning_rate": 8.65940754336902e-05,
      "loss": 3.0447,
      "step": 173244
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.227112293243408,
      "learning_rate": 8.659120046397541e-05,
      "loss": 3.1177,
      "step": 173245
    },
    {
      "epoch": 2.26,
      "grad_norm": 5.810522079467773,
      "learning_rate": 8.658832553393734e-05,
      "loss": 3.0192,
      "step": 173246
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.2913472652435303,
      "learning_rate": 8.658545064357653e-05,
      "loss": 2.934,
      "step": 173247
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7753794193267822,
      "learning_rate": 8.658257579289343e-05,
      "loss": 2.8699,
      "step": 173248
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.521064519882202,
      "learning_rate": 8.657970098188874e-05,
      "loss": 3.1147,
      "step": 173249
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8570938110351562,
      "learning_rate": 8.657682621056282e-05,
      "loss": 2.8382,
      "step": 173250
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5297162532806396,
      "learning_rate": 8.65739514789164e-05,
      "loss": 2.8598,
      "step": 173251
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.39399790763855,
      "learning_rate": 8.65710767869499e-05,
      "loss": 3.1364,
      "step": 173252
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.6448616981506348,
      "learning_rate": 8.656820213466384e-05,
      "loss": 2.8437,
      "step": 173253
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.833786725997925,
      "learning_rate": 8.656532752205873e-05,
      "loss": 3.2235,
      "step": 173254
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.766859769821167,
      "learning_rate": 8.656245294913519e-05,
      "loss": 2.8298,
      "step": 173255
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4078564643859863,
      "learning_rate": 8.65595784158937e-05,
      "loss": 2.8811,
      "step": 173256
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.648146867752075,
      "learning_rate": 8.655670392233485e-05,
      "loss": 2.7928,
      "step": 173257
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3866515159606934,
      "learning_rate": 8.655382946845915e-05,
      "loss": 2.9499,
      "step": 173258
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.294991493225098,
      "learning_rate": 8.655095505426705e-05,
      "loss": 2.62,
      "step": 173259
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.2905449867248535,
      "learning_rate": 8.654808067975925e-05,
      "loss": 3.0175,
      "step": 173260
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4360952377319336,
      "learning_rate": 8.654520634493615e-05,
      "loss": 2.8643,
      "step": 173261
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4693126678466797,
      "learning_rate": 8.654233204979829e-05,
      "loss": 2.8711,
      "step": 173262
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.724846363067627,
      "learning_rate": 8.653945779434627e-05,
      "loss": 2.8718,
      "step": 173263
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5733978748321533,
      "learning_rate": 8.653658357858065e-05,
      "loss": 2.9219,
      "step": 173264
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7954468727111816,
      "learning_rate": 8.653370940250178e-05,
      "loss": 3.1579,
      "step": 173265
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.623765468597412,
      "learning_rate": 8.653083526611043e-05,
      "loss": 2.8001,
      "step": 173266
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.1563754081726074,
      "learning_rate": 8.652796116940704e-05,
      "loss": 3.0018,
      "step": 173267
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3516900539398193,
      "learning_rate": 8.652508711239202e-05,
      "loss": 3.0473,
      "step": 173268
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6473023891448975,
      "learning_rate": 8.652221309506612e-05,
      "loss": 3.0581,
      "step": 173269
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.1404199600219727,
      "learning_rate": 8.651933911742975e-05,
      "loss": 2.899,
      "step": 173270
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.610068321228027,
      "learning_rate": 8.651646517948339e-05,
      "loss": 2.7206,
      "step": 173271
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.65765118598938,
      "learning_rate": 8.651359128122776e-05,
      "loss": 2.9003,
      "step": 173272
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.109029531478882,
      "learning_rate": 8.651071742266317e-05,
      "loss": 2.8238,
      "step": 173273
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7202556133270264,
      "learning_rate": 8.650784360379033e-05,
      "loss": 2.8282,
      "step": 173274
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.310298204421997,
      "learning_rate": 8.650496982460974e-05,
      "loss": 2.8746,
      "step": 173275
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.055424213409424,
      "learning_rate": 8.650209608512193e-05,
      "loss": 2.9747,
      "step": 173276
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.105869770050049,
      "learning_rate": 8.649922238532728e-05,
      "loss": 2.9952,
      "step": 173277
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9563724994659424,
      "learning_rate": 8.649634872522656e-05,
      "loss": 2.9346,
      "step": 173278
    },
    {
      "epoch": 2.26,
      "grad_norm": 5.130588531494141,
      "learning_rate": 8.64934751048201e-05,
      "loss": 2.9196,
      "step": 173279
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.688439130783081,
      "learning_rate": 8.649060152410865e-05,
      "loss": 3.0459,
      "step": 173280
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.845224380493164,
      "learning_rate": 8.648772798309251e-05,
      "loss": 3.0041,
      "step": 173281
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.342657566070557,
      "learning_rate": 8.648485448177252e-05,
      "loss": 2.9788,
      "step": 173282
    },
    {
      "epoch": 2.26,
      "grad_norm": 6.7616963386535645,
      "learning_rate": 8.648198102014885e-05,
      "loss": 2.6167,
      "step": 173283
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.075669527053833,
      "learning_rate": 8.647910759822232e-05,
      "loss": 3.003,
      "step": 173284
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.3595550060272217,
      "learning_rate": 8.647623421599323e-05,
      "loss": 3.0361,
      "step": 173285
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3429527282714844,
      "learning_rate": 8.647336087346232e-05,
      "loss": 2.9002,
      "step": 173286
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.944944143295288,
      "learning_rate": 8.647048757062999e-05,
      "loss": 2.9091,
      "step": 173287
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9629855155944824,
      "learning_rate": 8.646761430749692e-05,
      "loss": 2.945,
      "step": 173288
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.148459434509277,
      "learning_rate": 8.64647410840635e-05,
      "loss": 2.9791,
      "step": 173289
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.555633783340454,
      "learning_rate": 8.646186790033038e-05,
      "loss": 2.9529,
      "step": 173290
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6725194454193115,
      "learning_rate": 8.645899475629789e-05,
      "loss": 3.0012,
      "step": 173291
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6491613388061523,
      "learning_rate": 8.645612165196681e-05,
      "loss": 2.9537,
      "step": 173292
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.8484561443328857,
      "learning_rate": 8.645324858733746e-05,
      "loss": 3.0016,
      "step": 173293
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.297694683074951,
      "learning_rate": 8.64503755624106e-05,
      "loss": 2.9616,
      "step": 173294
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.0316977500915527,
      "learning_rate": 8.644750257718665e-05,
      "loss": 2.7652,
      "step": 173295
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.1834051609039307,
      "learning_rate": 8.644462963166613e-05,
      "loss": 3.0308,
      "step": 173296
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8420352935791016,
      "learning_rate": 8.644175672584948e-05,
      "loss": 2.856,
      "step": 173297
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.46125864982605,
      "learning_rate": 8.643888385973745e-05,
      "loss": 2.9925,
      "step": 173298
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.714487075805664,
      "learning_rate": 8.643601103333036e-05,
      "loss": 2.8644,
      "step": 173299
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5539135932922363,
      "learning_rate": 8.643313824662892e-05,
      "loss": 2.9286,
      "step": 173300
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6221163272857666,
      "learning_rate": 8.643026549963364e-05,
      "loss": 2.875,
      "step": 173301
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.926609754562378,
      "learning_rate": 8.642739279234496e-05,
      "loss": 3.138,
      "step": 173302
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4263813495635986,
      "learning_rate": 8.642452012476338e-05,
      "loss": 3.0893,
      "step": 173303
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6724255084991455,
      "learning_rate": 8.642164749688965e-05,
      "loss": 2.9787,
      "step": 173304
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.4280242919921875,
      "learning_rate": 8.6418774908724e-05,
      "loss": 3.0001,
      "step": 173305
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.749866247177124,
      "learning_rate": 8.641590236026727e-05,
      "loss": 2.8806,
      "step": 173306
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6026806831359863,
      "learning_rate": 8.641302985151984e-05,
      "loss": 2.9994,
      "step": 173307
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.77256441116333,
      "learning_rate": 8.641015738248228e-05,
      "loss": 3.0345,
      "step": 173308
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.2258641719818115,
      "learning_rate": 8.640728495315499e-05,
      "loss": 2.9739,
      "step": 173309
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.259827136993408,
      "learning_rate": 8.640441256353873e-05,
      "loss": 3.0095,
      "step": 173310
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.272484540939331,
      "learning_rate": 8.64015402136338e-05,
      "loss": 3.1157,
      "step": 173311
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.0298802852630615,
      "learning_rate": 8.639866790344094e-05,
      "loss": 2.9383,
      "step": 173312
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.188220500946045,
      "learning_rate": 8.639579563296065e-05,
      "loss": 2.7228,
      "step": 173313
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.712322950363159,
      "learning_rate": 8.63929234021934e-05,
      "loss": 2.924,
      "step": 173314
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.312272071838379,
      "learning_rate": 8.63900512111396e-05,
      "loss": 3.2597,
      "step": 173315
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6736769676208496,
      "learning_rate": 8.638717905980004e-05,
      "loss": 2.9979,
      "step": 173316
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3347396850585938,
      "learning_rate": 8.638430694817508e-05,
      "loss": 2.926,
      "step": 173317
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.639129638671875,
      "learning_rate": 8.638143487626534e-05,
      "loss": 2.9591,
      "step": 173318
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.157280921936035,
      "learning_rate": 8.637856284407138e-05,
      "loss": 3.0123,
      "step": 173319
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6837456226348877,
      "learning_rate": 8.637569085159364e-05,
      "loss": 3.0444,
      "step": 173320
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4026222229003906,
      "learning_rate": 8.63728188988326e-05,
      "loss": 2.8597,
      "step": 173321
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.836700201034546,
      "learning_rate": 8.636994698578898e-05,
      "loss": 3.1383,
      "step": 173322
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5975732803344727,
      "learning_rate": 8.636707511246313e-05,
      "loss": 2.8593,
      "step": 173323
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4278931617736816,
      "learning_rate": 8.636420327885579e-05,
      "loss": 2.6852,
      "step": 173324
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.640260934829712,
      "learning_rate": 8.636133148496736e-05,
      "loss": 2.5879,
      "step": 173325
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.867260694503784,
      "learning_rate": 8.63584597307984e-05,
      "loss": 2.8362,
      "step": 173326
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.0227928161621094,
      "learning_rate": 8.635558801634933e-05,
      "loss": 3.0799,
      "step": 173327
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4290177822113037,
      "learning_rate": 8.635271634162087e-05,
      "loss": 2.7073,
      "step": 173328
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9170544147491455,
      "learning_rate": 8.63498447066134e-05,
      "loss": 3.0529,
      "step": 173329
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5498156547546387,
      "learning_rate": 8.63469731113276e-05,
      "loss": 2.8902,
      "step": 173330
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.615210771560669,
      "learning_rate": 8.634410155576397e-05,
      "loss": 3.0116,
      "step": 173331
    },
    {
      "epoch": 2.26,
      "grad_norm": 5.631105422973633,
      "learning_rate": 8.634123003992296e-05,
      "loss": 2.6978,
      "step": 173332
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5384485721588135,
      "learning_rate": 8.633835856380507e-05,
      "loss": 2.829,
      "step": 173333
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.378763437271118,
      "learning_rate": 8.633548712741102e-05,
      "loss": 3.0275,
      "step": 173334
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.1818456649780273,
      "learning_rate": 8.633261573074115e-05,
      "loss": 2.8269,
      "step": 173335
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.651940107345581,
      "learning_rate": 8.632974437379614e-05,
      "loss": 2.7188,
      "step": 173336
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6280484199523926,
      "learning_rate": 8.632687305657648e-05,
      "loss": 2.7974,
      "step": 173337
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5639193058013916,
      "learning_rate": 8.632400177908268e-05,
      "loss": 2.8437,
      "step": 173338
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4958643913269043,
      "learning_rate": 8.632113054131521e-05,
      "loss": 2.9923,
      "step": 173339
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5625505447387695,
      "learning_rate": 8.631825934327476e-05,
      "loss": 2.7951,
      "step": 173340
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6432607173919678,
      "learning_rate": 8.631538818496169e-05,
      "loss": 2.8416,
      "step": 173341
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.305670738220215,
      "learning_rate": 8.631251706637668e-05,
      "loss": 2.9479,
      "step": 173342
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3295857906341553,
      "learning_rate": 8.630964598752025e-05,
      "loss": 3.0001,
      "step": 173343
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5136430263519287,
      "learning_rate": 8.630677494839287e-05,
      "loss": 3.1402,
      "step": 173344
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.416952610015869,
      "learning_rate": 8.630390394899499e-05,
      "loss": 3.0851,
      "step": 173345
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.566502809524536,
      "learning_rate": 8.630103298932736e-05,
      "loss": 2.9698,
      "step": 173346
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6257269382476807,
      "learning_rate": 8.62981620693903e-05,
      "loss": 3.0966,
      "step": 173347
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.613565444946289,
      "learning_rate": 8.629529118918454e-05,
      "loss": 2.8435,
      "step": 173348
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.645003080368042,
      "learning_rate": 8.629242034871053e-05,
      "loss": 2.7562,
      "step": 173349
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.802321434020996,
      "learning_rate": 8.628954954796871e-05,
      "loss": 2.8713,
      "step": 173350
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7301342487335205,
      "learning_rate": 8.628667878695977e-05,
      "loss": 2.986,
      "step": 173351
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.998505115509033,
      "learning_rate": 8.628380806568416e-05,
      "loss": 3.2064,
      "step": 173352
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.8012943267822266,
      "learning_rate": 8.628093738414236e-05,
      "loss": 3.0096,
      "step": 173353
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.686922073364258,
      "learning_rate": 8.627806674233502e-05,
      "loss": 3.1156,
      "step": 173354
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.254985809326172,
      "learning_rate": 8.627519614026258e-05,
      "loss": 2.8018,
      "step": 173355
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.2769968509674072,
      "learning_rate": 8.627232557792568e-05,
      "loss": 2.9216,
      "step": 173356
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.362276315689087,
      "learning_rate": 8.62694550553248e-05,
      "loss": 2.9669,
      "step": 173357
    },
    {
      "epoch": 2.26,
      "grad_norm": 5.9542236328125,
      "learning_rate": 8.626658457246036e-05,
      "loss": 2.7024,
      "step": 173358
    },
    {
      "epoch": 2.26,
      "grad_norm": 5.078126907348633,
      "learning_rate": 8.626371412933307e-05,
      "loss": 2.8813,
      "step": 173359
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.929253578186035,
      "learning_rate": 8.626084372594343e-05,
      "loss": 2.7799,
      "step": 173360
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.430140972137451,
      "learning_rate": 8.625797336229183e-05,
      "loss": 2.9328,
      "step": 173361
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4512929916381836,
      "learning_rate": 8.625510303837898e-05,
      "loss": 2.8971,
      "step": 173362
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3990888595581055,
      "learning_rate": 8.625223275420534e-05,
      "loss": 3.1048,
      "step": 173363
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.4845027923583984,
      "learning_rate": 8.624936250977138e-05,
      "loss": 3.0034,
      "step": 173364
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.845142364501953,
      "learning_rate": 8.624649230507777e-05,
      "loss": 3.1763,
      "step": 173365
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.3064215183258057,
      "learning_rate": 8.624362214012499e-05,
      "loss": 2.8772,
      "step": 173366
    },
    {
      "epoch": 2.26,
      "grad_norm": 5.478344917297363,
      "learning_rate": 8.624075201491346e-05,
      "loss": 2.997,
      "step": 173367
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.755298376083374,
      "learning_rate": 8.623788192944387e-05,
      "loss": 2.9316,
      "step": 173368
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.4106974601745605,
      "learning_rate": 8.623501188371672e-05,
      "loss": 2.9991,
      "step": 173369
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.03855037689209,
      "learning_rate": 8.623214187773243e-05,
      "loss": 2.8424,
      "step": 173370
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7518832683563232,
      "learning_rate": 8.62292719114917e-05,
      "loss": 2.94,
      "step": 173371
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.940288066864014,
      "learning_rate": 8.622640198499489e-05,
      "loss": 2.9479,
      "step": 173372
    },
    {
      "epoch": 2.26,
      "grad_norm": 5.143946647644043,
      "learning_rate": 8.622353209824274e-05,
      "loss": 2.9713,
      "step": 173373
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.2048869132995605,
      "learning_rate": 8.622066225123565e-05,
      "loss": 2.8657,
      "step": 173374
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.842090606689453,
      "learning_rate": 8.621779244397417e-05,
      "loss": 3.1839,
      "step": 173375
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.8385329246520996,
      "learning_rate": 8.621492267645875e-05,
      "loss": 2.9296,
      "step": 173376
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5712215900421143,
      "learning_rate": 8.62120529486901e-05,
      "loss": 3.1938,
      "step": 173377
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.507812976837158,
      "learning_rate": 8.620918326066859e-05,
      "loss": 3.0275,
      "step": 173378
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.2735095024108887,
      "learning_rate": 8.620631361239492e-05,
      "loss": 3.0777,
      "step": 173379
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.415053844451904,
      "learning_rate": 8.620344400386952e-05,
      "loss": 2.7929,
      "step": 173380
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.4015939235687256,
      "learning_rate": 8.620057443509295e-05,
      "loss": 2.9374,
      "step": 173381
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.3445916175842285,
      "learning_rate": 8.619770490606562e-05,
      "loss": 3.1342,
      "step": 173382
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.212785720825195,
      "learning_rate": 8.619483541678826e-05,
      "loss": 3.1976,
      "step": 173383
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.2349166870117188,
      "learning_rate": 8.619196596726125e-05,
      "loss": 2.6419,
      "step": 173384
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9487690925598145,
      "learning_rate": 8.618909655748526e-05,
      "loss": 3.0751,
      "step": 173385
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.247739791870117,
      "learning_rate": 8.618622718746074e-05,
      "loss": 2.9102,
      "step": 173386
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.806005954742432,
      "learning_rate": 8.618335785718826e-05,
      "loss": 3.1303,
      "step": 173387
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.755939245223999,
      "learning_rate": 8.618048856666825e-05,
      "loss": 2.9149,
      "step": 173388
    },
    {
      "epoch": 2.26,
      "grad_norm": 5.011472702026367,
      "learning_rate": 8.617761931590136e-05,
      "loss": 2.8507,
      "step": 173389
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.6765010356903076,
      "learning_rate": 8.617475010488805e-05,
      "loss": 2.7748,
      "step": 173390
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.2547645568847656,
      "learning_rate": 8.617188093362898e-05,
      "loss": 2.9262,
      "step": 173391
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.1984238624572754,
      "learning_rate": 8.616901180212457e-05,
      "loss": 3.1478,
      "step": 173392
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.27094030380249,
      "learning_rate": 8.616614271037538e-05,
      "loss": 3.0095,
      "step": 173393
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.254495143890381,
      "learning_rate": 8.616327365838187e-05,
      "loss": 2.6679,
      "step": 173394
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.566493511199951,
      "learning_rate": 8.616040464614472e-05,
      "loss": 2.8749,
      "step": 173395
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.0743014812469482,
      "learning_rate": 8.61575356736643e-05,
      "loss": 3.072,
      "step": 173396
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3486363887786865,
      "learning_rate": 8.615466674094131e-05,
      "loss": 3.1477,
      "step": 173397
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.796999931335449,
      "learning_rate": 8.615179784797623e-05,
      "loss": 2.8853,
      "step": 173398
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.669935941696167,
      "learning_rate": 8.614892899476953e-05,
      "loss": 2.9106,
      "step": 173399
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.780371904373169,
      "learning_rate": 8.61460601813217e-05,
      "loss": 2.6396,
      "step": 173400
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5204834938049316,
      "learning_rate": 8.614319140763345e-05,
      "loss": 2.8982,
      "step": 173401
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.0937235355377197,
      "learning_rate": 8.614032267370515e-05,
      "loss": 2.8162,
      "step": 173402
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.026404619216919,
      "learning_rate": 8.613745397953748e-05,
      "loss": 2.7727,
      "step": 173403
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.186164140701294,
      "learning_rate": 8.613458532513085e-05,
      "loss": 3.0931,
      "step": 173404
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.83166766166687,
      "learning_rate": 8.61317167104859e-05,
      "loss": 2.9757,
      "step": 173405
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.394585609436035,
      "learning_rate": 8.612884813560297e-05,
      "loss": 2.9536,
      "step": 173406
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7909085750579834,
      "learning_rate": 8.612597960048284e-05,
      "loss": 2.8737,
      "step": 173407
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.290144681930542,
      "learning_rate": 8.612311110512582e-05,
      "loss": 3.1262,
      "step": 173408
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4256839752197266,
      "learning_rate": 8.612024264953266e-05,
      "loss": 2.819,
      "step": 173409
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3927924633026123,
      "learning_rate": 8.611737423370377e-05,
      "loss": 3.1285,
      "step": 173410
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.415830135345459,
      "learning_rate": 8.611450585763971e-05,
      "loss": 3.0295,
      "step": 173411
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.438774585723877,
      "learning_rate": 8.61116375213409e-05,
      "loss": 3.0651,
      "step": 173412
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.2798123359680176,
      "learning_rate": 8.610876922480803e-05,
      "loss": 3.0797,
      "step": 173413
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.107516050338745,
      "learning_rate": 8.610590096804154e-05,
      "loss": 3.0505,
      "step": 173414
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4952125549316406,
      "learning_rate": 8.610303275104207e-05,
      "loss": 2.8963,
      "step": 173415
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.577497959136963,
      "learning_rate": 8.610016457381008e-05,
      "loss": 3.2212,
      "step": 173416
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.796729326248169,
      "learning_rate": 8.609729643634612e-05,
      "loss": 2.8915,
      "step": 173417
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.033043146133423,
      "learning_rate": 8.60944283386506e-05,
      "loss": 2.8559,
      "step": 173418
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8865487575531006,
      "learning_rate": 8.60915602807243e-05,
      "loss": 3.0484,
      "step": 173419
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8753058910369873,
      "learning_rate": 8.60886922625675e-05,
      "loss": 3.0814,
      "step": 173420
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.986741065979004,
      "learning_rate": 8.608582428418091e-05,
      "loss": 2.9175,
      "step": 173421
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9328041076660156,
      "learning_rate": 8.608295634556492e-05,
      "loss": 2.6455,
      "step": 173422
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.505336046218872,
      "learning_rate": 8.608008844672035e-05,
      "loss": 2.7089,
      "step": 173423
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9724509716033936,
      "learning_rate": 8.607722058764736e-05,
      "loss": 3.0584,
      "step": 173424
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.480346918106079,
      "learning_rate": 8.607435276834674e-05,
      "loss": 2.8551,
      "step": 173425
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.634793281555176,
      "learning_rate": 8.607148498881882e-05,
      "loss": 2.8432,
      "step": 173426
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5823075771331787,
      "learning_rate": 8.606861724906435e-05,
      "loss": 3.1903,
      "step": 173427
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7512295246124268,
      "learning_rate": 8.606574954908367e-05,
      "loss": 3.2201,
      "step": 173428
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4540627002716064,
      "learning_rate": 8.606288188887757e-05,
      "loss": 3.008,
      "step": 173429
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.45188570022583,
      "learning_rate": 8.606001426844626e-05,
      "loss": 2.7191,
      "step": 173430
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7070515155792236,
      "learning_rate": 8.605714668779053e-05,
      "loss": 2.9325,
      "step": 173431
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5633020401000977,
      "learning_rate": 8.605427914691072e-05,
      "loss": 3.05,
      "step": 173432
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.586808919906616,
      "learning_rate": 8.605141164580755e-05,
      "loss": 2.6902,
      "step": 173433
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.689121723175049,
      "learning_rate": 8.604854418448137e-05,
      "loss": 2.911,
      "step": 173434
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.389955997467041,
      "learning_rate": 8.604567676293286e-05,
      "loss": 2.9029,
      "step": 173435
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7094810009002686,
      "learning_rate": 8.604280938116254e-05,
      "loss": 3.0461,
      "step": 173436
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7700259685516357,
      "learning_rate": 8.603994203917087e-05,
      "loss": 3.0968,
      "step": 173437
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.632737636566162,
      "learning_rate": 8.603707473695834e-05,
      "loss": 3.0296,
      "step": 173438
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6409029960632324,
      "learning_rate": 8.603420747452566e-05,
      "loss": 2.9336,
      "step": 173439
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.865968942642212,
      "learning_rate": 8.603134025187316e-05,
      "loss": 3.0188,
      "step": 173440
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.1758792400360107,
      "learning_rate": 8.602847306900157e-05,
      "loss": 2.9143,
      "step": 173441
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.168515920639038,
      "learning_rate": 8.602560592591128e-05,
      "loss": 3.0299,
      "step": 173442
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4093310832977295,
      "learning_rate": 8.602273882260283e-05,
      "loss": 3.1144,
      "step": 173443
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.2443437576293945,
      "learning_rate": 8.601987175907686e-05,
      "loss": 2.9651,
      "step": 173444
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.2280242443084717,
      "learning_rate": 8.601700473533386e-05,
      "loss": 3.0264,
      "step": 173445
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.594953775405884,
      "learning_rate": 8.601413775137421e-05,
      "loss": 2.9178,
      "step": 173446
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.597864866256714,
      "learning_rate": 8.601127080719869e-05,
      "loss": 2.8953,
      "step": 173447
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.686446189880371,
      "learning_rate": 8.60084039028077e-05,
      "loss": 2.8419,
      "step": 173448
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3454887866973877,
      "learning_rate": 8.600553703820168e-05,
      "loss": 3.2711,
      "step": 173449
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.6328577995300293,
      "learning_rate": 8.600267021338138e-05,
      "loss": 2.9403,
      "step": 173450
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5282580852508545,
      "learning_rate": 8.599980342834726e-05,
      "loss": 3.2245,
      "step": 173451
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.152662992477417,
      "learning_rate": 8.599693668309968e-05,
      "loss": 2.872,
      "step": 173452
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8516652584075928,
      "learning_rate": 8.59940699776394e-05,
      "loss": 2.8363,
      "step": 173453
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.1222825050354004,
      "learning_rate": 8.599120331196688e-05,
      "loss": 3.1496,
      "step": 173454
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6401727199554443,
      "learning_rate": 8.598833668608251e-05,
      "loss": 2.9318,
      "step": 173455
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3893215656280518,
      "learning_rate": 8.598547009998709e-05,
      "loss": 2.9398,
      "step": 173456
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3978424072265625,
      "learning_rate": 8.59826035536809e-05,
      "loss": 3.0334,
      "step": 173457
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9795989990234375,
      "learning_rate": 8.597973704716467e-05,
      "loss": 2.8823,
      "step": 173458
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.827518463134766,
      "learning_rate": 8.597687058043883e-05,
      "loss": 3.216,
      "step": 173459
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6790387630462646,
      "learning_rate": 8.597400415350394e-05,
      "loss": 3.2712,
      "step": 173460
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8075475692749023,
      "learning_rate": 8.597113776636045e-05,
      "loss": 3.0587,
      "step": 173461
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.557075262069702,
      "learning_rate": 8.596827141900904e-05,
      "loss": 2.9018,
      "step": 173462
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5630545616149902,
      "learning_rate": 8.596540511145006e-05,
      "loss": 2.7149,
      "step": 173463
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9686453342437744,
      "learning_rate": 8.59625388436843e-05,
      "loss": 2.7826,
      "step": 173464
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.450744152069092,
      "learning_rate": 8.59596726157121e-05,
      "loss": 3.3332,
      "step": 173465
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3649725914001465,
      "learning_rate": 8.595680642753405e-05,
      "loss": 2.9466,
      "step": 173466
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4488985538482666,
      "learning_rate": 8.595394027915058e-05,
      "loss": 2.8871,
      "step": 173467
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.17954158782959,
      "learning_rate": 8.595107417056238e-05,
      "loss": 2.7829,
      "step": 173468
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.569392442703247,
      "learning_rate": 8.594820810176985e-05,
      "loss": 3.0594,
      "step": 173469
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.963033437728882,
      "learning_rate": 8.594534207277366e-05,
      "loss": 3.1194,
      "step": 173470
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.909130334854126,
      "learning_rate": 8.59424760835743e-05,
      "loss": 2.8862,
      "step": 173471
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7236950397491455,
      "learning_rate": 8.593961013417226e-05,
      "loss": 2.6786,
      "step": 173472
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.571995496749878,
      "learning_rate": 8.5936744224568e-05,
      "loss": 3.1039,
      "step": 173473
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.319857597351074,
      "learning_rate": 8.593387835476223e-05,
      "loss": 3.0864,
      "step": 173474
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.8165085315704346,
      "learning_rate": 8.59310125247553e-05,
      "loss": 2.7822,
      "step": 173475
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.826904773712158,
      "learning_rate": 8.592814673454791e-05,
      "loss": 2.912,
      "step": 173476
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5090479850769043,
      "learning_rate": 8.592528098414054e-05,
      "loss": 3.2078,
      "step": 173477
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3979411125183105,
      "learning_rate": 8.59224152735337e-05,
      "loss": 2.7367,
      "step": 173478
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.2846577167510986,
      "learning_rate": 8.59195496027278e-05,
      "loss": 3.0632,
      "step": 173479
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.720196008682251,
      "learning_rate": 8.591668397172363e-05,
      "loss": 2.8546,
      "step": 173480
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.692196369171143,
      "learning_rate": 8.591381838052149e-05,
      "loss": 3.0603,
      "step": 173481
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7188873291015625,
      "learning_rate": 8.591095282912209e-05,
      "loss": 2.9226,
      "step": 173482
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.551543712615967,
      "learning_rate": 8.590808731752588e-05,
      "loss": 2.9725,
      "step": 173483
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.650912284851074,
      "learning_rate": 8.590522184573345e-05,
      "loss": 2.682,
      "step": 173484
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.8993380069732666,
      "learning_rate": 8.590235641374512e-05,
      "loss": 3.0371,
      "step": 173485
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3876559734344482,
      "learning_rate": 8.589949102156169e-05,
      "loss": 3.1864,
      "step": 173486
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.5169060230255127,
      "learning_rate": 8.589662566918353e-05,
      "loss": 2.7869,
      "step": 173487
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4161534309387207,
      "learning_rate": 8.589376035661129e-05,
      "loss": 2.7824,
      "step": 173488
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.9108808040618896,
      "learning_rate": 8.589089508384537e-05,
      "loss": 3.0992,
      "step": 173489
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4574344158172607,
      "learning_rate": 8.588802985088655e-05,
      "loss": 2.8924,
      "step": 173490
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.632293462753296,
      "learning_rate": 8.5885164657735e-05,
      "loss": 3.191,
      "step": 173491
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.669708728790283,
      "learning_rate": 8.588229950439153e-05,
      "loss": 2.8014,
      "step": 173492
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4467179775238037,
      "learning_rate": 8.587943439085648e-05,
      "loss": 2.9553,
      "step": 173493
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.477024793624878,
      "learning_rate": 8.58765693171306e-05,
      "loss": 2.9743,
      "step": 173494
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.363781452178955,
      "learning_rate": 8.58737042832142e-05,
      "loss": 2.7889,
      "step": 173495
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.491978406906128,
      "learning_rate": 8.58708392891081e-05,
      "loss": 3.0128,
      "step": 173496
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.953303813934326,
      "learning_rate": 8.586797433481247e-05,
      "loss": 2.8572,
      "step": 173497
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5402889251708984,
      "learning_rate": 8.586510942032812e-05,
      "loss": 2.9692,
      "step": 173498
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.4021284580230713,
      "learning_rate": 8.586224454565544e-05,
      "loss": 2.9879,
      "step": 173499
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.0366148948669434,
      "learning_rate": 8.585937971079507e-05,
      "loss": 2.8809,
      "step": 173500
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.751653671264648,
      "learning_rate": 8.585651491574738e-05,
      "loss": 2.9895,
      "step": 173501
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.205615282058716,
      "learning_rate": 8.585365016051317e-05,
      "loss": 2.8336,
      "step": 173502
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5839200019836426,
      "learning_rate": 8.585078544509266e-05,
      "loss": 2.7039,
      "step": 173503
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.2737464904785156,
      "learning_rate": 8.58479207694866e-05,
      "loss": 3.0994,
      "step": 173504
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.058629035949707,
      "learning_rate": 8.584505613369539e-05,
      "loss": 2.8835,
      "step": 173505
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.137985944747925,
      "learning_rate": 8.58421915377197e-05,
      "loss": 3.0104,
      "step": 173506
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8337578773498535,
      "learning_rate": 8.583932698155991e-05,
      "loss": 3.0046,
      "step": 173507
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5831823348999023,
      "learning_rate": 8.58364624652168e-05,
      "loss": 2.6645,
      "step": 173508
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3548636436462402,
      "learning_rate": 8.583359798869055e-05,
      "loss": 3.0004,
      "step": 173509
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.312178134918213,
      "learning_rate": 8.583073355198198e-05,
      "loss": 3.1025,
      "step": 173510
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.727844715118408,
      "learning_rate": 8.582786915509145e-05,
      "loss": 2.8089,
      "step": 173511
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.740141868591309,
      "learning_rate": 8.582500479801962e-05,
      "loss": 2.8511,
      "step": 173512
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.57136869430542,
      "learning_rate": 8.582214048076686e-05,
      "loss": 2.8464,
      "step": 173513
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.118440628051758,
      "learning_rate": 8.581927620333403e-05,
      "loss": 3.131,
      "step": 173514
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.6615936756134033,
      "learning_rate": 8.581641196572124e-05,
      "loss": 3.0305,
      "step": 173515
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.562753677368164,
      "learning_rate": 8.581354776792931e-05,
      "loss": 2.8686,
      "step": 173516
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.7765374183654785,
      "learning_rate": 8.581068360995858e-05,
      "loss": 3.1356,
      "step": 173517
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.8726069927215576,
      "learning_rate": 8.580781949180979e-05,
      "loss": 2.9557,
      "step": 173518
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.1112799644470215,
      "learning_rate": 8.58049554134833e-05,
      "loss": 3.0887,
      "step": 173519
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3573904037475586,
      "learning_rate": 8.580209137497977e-05,
      "loss": 3.0071,
      "step": 173520
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.540365219116211,
      "learning_rate": 8.579922737629968e-05,
      "loss": 2.9444,
      "step": 173521
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.950904130935669,
      "learning_rate": 8.579636341744358e-05,
      "loss": 2.8428,
      "step": 173522
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.1783528327941895,
      "learning_rate": 8.579349949841185e-05,
      "loss": 2.8954,
      "step": 173523
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.4102001190185547,
      "learning_rate": 8.579063561920528e-05,
      "loss": 3.1003,
      "step": 173524
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.9059696197509766,
      "learning_rate": 8.578777177982417e-05,
      "loss": 2.8663,
      "step": 173525
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6004204750061035,
      "learning_rate": 8.578490798026926e-05,
      "loss": 3.0505,
      "step": 173526
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5081560611724854,
      "learning_rate": 8.578204422054098e-05,
      "loss": 3.1948,
      "step": 173527
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6164004802703857,
      "learning_rate": 8.577918050063983e-05,
      "loss": 2.9912,
      "step": 173528
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.864733934402466,
      "learning_rate": 8.57763168205663e-05,
      "loss": 2.8546,
      "step": 173529
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9745259284973145,
      "learning_rate": 8.577345318032109e-05,
      "loss": 2.9001,
      "step": 173530
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.480623245239258,
      "learning_rate": 8.577058957990454e-05,
      "loss": 2.8194,
      "step": 173531
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6998891830444336,
      "learning_rate": 8.576772601931739e-05,
      "loss": 2.8141,
      "step": 173532
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.357927322387695,
      "learning_rate": 8.576486249856004e-05,
      "loss": 2.8802,
      "step": 173533
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6212120056152344,
      "learning_rate": 8.576199901763296e-05,
      "loss": 3.1575,
      "step": 173534
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.246767520904541,
      "learning_rate": 8.57591355765369e-05,
      "loss": 2.9111,
      "step": 173535
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3220465183258057,
      "learning_rate": 8.575627217527224e-05,
      "loss": 3.004,
      "step": 173536
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.628748893737793,
      "learning_rate": 8.57534088138394e-05,
      "loss": 2.9541,
      "step": 173537
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5477983951568604,
      "learning_rate": 8.575054549223919e-05,
      "loss": 3.0201,
      "step": 173538
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3329813480377197,
      "learning_rate": 8.574768221047197e-05,
      "loss": 3.1633,
      "step": 173539
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.356358051300049,
      "learning_rate": 8.574481896853822e-05,
      "loss": 3.0031,
      "step": 173540
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.441734790802002,
      "learning_rate": 8.574195576643861e-05,
      "loss": 2.9949,
      "step": 173541
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.065598249435425,
      "learning_rate": 8.573909260417357e-05,
      "loss": 2.8933,
      "step": 173542
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.6865758895874023,
      "learning_rate": 8.573622948174375e-05,
      "loss": 2.8881,
      "step": 173543
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.455187797546387,
      "learning_rate": 8.573336639914961e-05,
      "loss": 2.7988,
      "step": 173544
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7576746940612793,
      "learning_rate": 8.573050335639168e-05,
      "loss": 2.8111,
      "step": 173545
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6845903396606445,
      "learning_rate": 8.57276403534704e-05,
      "loss": 3.0725,
      "step": 173546
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9122562408447266,
      "learning_rate": 8.57247773903865e-05,
      "loss": 2.7013,
      "step": 173547
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.2748305797576904,
      "learning_rate": 8.572191446714029e-05,
      "loss": 3.1614,
      "step": 173548
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.690133571624756,
      "learning_rate": 8.571905158373256e-05,
      "loss": 2.9751,
      "step": 173549
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.042513370513916,
      "learning_rate": 8.571618874016365e-05,
      "loss": 2.9333,
      "step": 173550
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.2135121822357178,
      "learning_rate": 8.571332593643417e-05,
      "loss": 3.0024,
      "step": 173551
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6442854404449463,
      "learning_rate": 8.571046317254453e-05,
      "loss": 3.0544,
      "step": 173552
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6324286460876465,
      "learning_rate": 8.570760044849549e-05,
      "loss": 2.9035,
      "step": 173553
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.308011054992676,
      "learning_rate": 8.57047377642873e-05,
      "loss": 2.8073,
      "step": 173554
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.227416753768921,
      "learning_rate": 8.570187511992078e-05,
      "loss": 2.9974,
      "step": 173555
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3560476303100586,
      "learning_rate": 8.569901251539622e-05,
      "loss": 2.865,
      "step": 173556
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.609773874282837,
      "learning_rate": 8.569614995071443e-05,
      "loss": 2.9391,
      "step": 173557
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6935322284698486,
      "learning_rate": 8.56932874258756e-05,
      "loss": 2.8369,
      "step": 173558
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3180103302001953,
      "learning_rate": 8.569042494088053e-05,
      "loss": 2.9523,
      "step": 173559
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.290884017944336,
      "learning_rate": 8.568756249572955e-05,
      "loss": 2.9637,
      "step": 173560
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.652172803878784,
      "learning_rate": 8.56847000904234e-05,
      "loss": 2.8723,
      "step": 173561
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8841614723205566,
      "learning_rate": 8.568183772496242e-05,
      "loss": 2.8786,
      "step": 173562
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.507977247238159,
      "learning_rate": 8.56789753993474e-05,
      "loss": 2.8843,
      "step": 173563
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9157369136810303,
      "learning_rate": 8.567611311357853e-05,
      "loss": 3.0352,
      "step": 173564
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9034509658813477,
      "learning_rate": 8.567325086765662e-05,
      "loss": 3.0057,
      "step": 173565
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.791618824005127,
      "learning_rate": 8.5670388661582e-05,
      "loss": 3.1315,
      "step": 173566
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.537700653076172,
      "learning_rate": 8.566752649535537e-05,
      "loss": 2.8836,
      "step": 173567
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3787004947662354,
      "learning_rate": 8.566466436897715e-05,
      "loss": 2.9842,
      "step": 173568
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.940478801727295,
      "learning_rate": 8.566180228244803e-05,
      "loss": 2.91,
      "step": 173569
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6901094913482666,
      "learning_rate": 8.565894023576828e-05,
      "loss": 3.0378,
      "step": 173570
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5027148723602295,
      "learning_rate": 8.565607822893869e-05,
      "loss": 2.8267,
      "step": 173571
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.0354578495025635,
      "learning_rate": 8.565321626195958e-05,
      "loss": 2.7183,
      "step": 173572
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.45064640045166,
      "learning_rate": 8.565035433483167e-05,
      "loss": 2.7885,
      "step": 173573
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.392195463180542,
      "learning_rate": 8.56474924475553e-05,
      "loss": 3.1068,
      "step": 173574
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8227686882019043,
      "learning_rate": 8.56446306001313e-05,
      "loss": 2.8539,
      "step": 173575
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.383573532104492,
      "learning_rate": 8.564176879255981e-05,
      "loss": 2.9514,
      "step": 173576
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4178555011749268,
      "learning_rate": 8.563890702484168e-05,
      "loss": 2.9172,
      "step": 173577
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9571547508239746,
      "learning_rate": 8.563604529697723e-05,
      "loss": 2.7787,
      "step": 173578
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6974101066589355,
      "learning_rate": 8.563318360896717e-05,
      "loss": 2.61,
      "step": 173579
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7455296516418457,
      "learning_rate": 8.563032196081183e-05,
      "loss": 2.8857,
      "step": 173580
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3125152587890625,
      "learning_rate": 8.562746035251208e-05,
      "loss": 3.0902,
      "step": 173581
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.12125563621521,
      "learning_rate": 8.562459878406804e-05,
      "loss": 2.8591,
      "step": 173582
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.647738456726074,
      "learning_rate": 8.562173725548053e-05,
      "loss": 2.8626,
      "step": 173583
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.746954917907715,
      "learning_rate": 8.561887576674986e-05,
      "loss": 2.7942,
      "step": 173584
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6272122859954834,
      "learning_rate": 8.561601431787681e-05,
      "loss": 2.8627,
      "step": 173585
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.664717674255371,
      "learning_rate": 8.561315290886169e-05,
      "loss": 2.9956,
      "step": 173586
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6284897327423096,
      "learning_rate": 8.561029153970533e-05,
      "loss": 2.8537,
      "step": 173587
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7783265113830566,
      "learning_rate": 8.560743021040786e-05,
      "loss": 2.9245,
      "step": 173588
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.546821355819702,
      "learning_rate": 8.560456892097012e-05,
      "loss": 3.1991,
      "step": 173589
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3401260375976562,
      "learning_rate": 8.560170767139246e-05,
      "loss": 3.2358,
      "step": 173590
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.967045307159424,
      "learning_rate": 8.559884646167554e-05,
      "loss": 2.9707,
      "step": 173591
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4149205684661865,
      "learning_rate": 8.55959852918198e-05,
      "loss": 2.8952,
      "step": 173592
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.602069139480591,
      "learning_rate": 8.559312416182593e-05,
      "loss": 2.8372,
      "step": 173593
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7893295288085938,
      "learning_rate": 8.559026307169422e-05,
      "loss": 2.9542,
      "step": 173594
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.151460886001587,
      "learning_rate": 8.558740202142539e-05,
      "loss": 2.8584,
      "step": 173595
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.8851430416107178,
      "learning_rate": 8.558454101101984e-05,
      "loss": 2.9991,
      "step": 173596
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.609862804412842,
      "learning_rate": 8.558168004047827e-05,
      "loss": 2.8163,
      "step": 173597
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.0273566246032715,
      "learning_rate": 8.557881910980097e-05,
      "loss": 2.804,
      "step": 173598
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8711299896240234,
      "learning_rate": 8.557595821898885e-05,
      "loss": 3.1547,
      "step": 173599
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.141312122344971,
      "learning_rate": 8.5573097368042e-05,
      "loss": 3.2306,
      "step": 173600
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4402711391448975,
      "learning_rate": 8.557023655696124e-05,
      "loss": 2.7932,
      "step": 173601
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.848557710647583,
      "learning_rate": 8.556737578574696e-05,
      "loss": 2.8149,
      "step": 173602
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.698655128479004,
      "learning_rate": 8.556451505439982e-05,
      "loss": 3.0191,
      "step": 173603
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.0996508598327637,
      "learning_rate": 8.556165436292022e-05,
      "loss": 2.9416,
      "step": 173604
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.242354393005371,
      "learning_rate": 8.555879371130883e-05,
      "loss": 3.1163,
      "step": 173605
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7199058532714844,
      "learning_rate": 8.555593309956608e-05,
      "loss": 2.8788,
      "step": 173606
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6095221042633057,
      "learning_rate": 8.555307252769256e-05,
      "loss": 2.984,
      "step": 173607
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.382228374481201,
      "learning_rate": 8.55502119956887e-05,
      "loss": 2.8983,
      "step": 173608
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.073121070861816,
      "learning_rate": 8.554735150355517e-05,
      "loss": 2.8483,
      "step": 173609
    },
    {
      "epoch": 2.26,
      "grad_norm": 5.980925559997559,
      "learning_rate": 8.554449105129231e-05,
      "loss": 3.1161,
      "step": 173610
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.590031385421753,
      "learning_rate": 8.554163063890094e-05,
      "loss": 2.8141,
      "step": 173611
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9613497257232666,
      "learning_rate": 8.553877026638139e-05,
      "loss": 2.8605,
      "step": 173612
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.2797915935516357,
      "learning_rate": 8.553590993373422e-05,
      "loss": 2.7439,
      "step": 173613
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.523632049560547,
      "learning_rate": 8.553304964095989e-05,
      "loss": 2.941,
      "step": 173614
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.3107051849365234,
      "learning_rate": 8.553018938805914e-05,
      "loss": 2.9179,
      "step": 173615
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.48608660697937,
      "learning_rate": 8.552732917503224e-05,
      "loss": 2.8736,
      "step": 173616
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8483080863952637,
      "learning_rate": 8.552446900187995e-05,
      "loss": 2.9147,
      "step": 173617
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.1464498043060303,
      "learning_rate": 8.552160886860274e-05,
      "loss": 2.8088,
      "step": 173618
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6647188663482666,
      "learning_rate": 8.551874877520103e-05,
      "loss": 2.8037,
      "step": 173619
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.644371747970581,
      "learning_rate": 8.551588872167549e-05,
      "loss": 3.1636,
      "step": 173620
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.8904154300689697,
      "learning_rate": 8.55130287080266e-05,
      "loss": 2.9714,
      "step": 173621
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.45212459564209,
      "learning_rate": 8.55101687342548e-05,
      "loss": 2.9307,
      "step": 173622
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9060802459716797,
      "learning_rate": 8.550730880036079e-05,
      "loss": 2.837,
      "step": 173623
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.1008639335632324,
      "learning_rate": 8.550444890634505e-05,
      "loss": 3.055,
      "step": 173624
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.6066339015960693,
      "learning_rate": 8.550158905220799e-05,
      "loss": 2.9407,
      "step": 173625
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.585747003555298,
      "learning_rate": 8.549872923795029e-05,
      "loss": 2.7317,
      "step": 173626
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.378415107727051,
      "learning_rate": 8.549586946357244e-05,
      "loss": 2.6833,
      "step": 173627
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.821547031402588,
      "learning_rate": 8.549300972907486e-05,
      "loss": 3.0349,
      "step": 173628
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5271787643432617,
      "learning_rate": 8.54901500344583e-05,
      "loss": 2.9007,
      "step": 173629
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.660239219665527,
      "learning_rate": 8.548729037972315e-05,
      "loss": 2.8931,
      "step": 173630
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.582972764968872,
      "learning_rate": 8.548443076486984e-05,
      "loss": 3.072,
      "step": 173631
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.80720591545105,
      "learning_rate": 8.548157118989916e-05,
      "loss": 2.8258,
      "step": 173632
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5585548877716064,
      "learning_rate": 8.54787116548114e-05,
      "loss": 2.8376,
      "step": 173633
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.453171730041504,
      "learning_rate": 8.547585215960729e-05,
      "loss": 2.8366,
      "step": 173634
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.421024799346924,
      "learning_rate": 8.547299270428726e-05,
      "loss": 2.7043,
      "step": 173635
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5778868198394775,
      "learning_rate": 8.547013328885186e-05,
      "loss": 3.0061,
      "step": 173636
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.477163314819336,
      "learning_rate": 8.546727391330149e-05,
      "loss": 2.8744,
      "step": 173637
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8872101306915283,
      "learning_rate": 8.546441457763692e-05,
      "loss": 2.9286,
      "step": 173638
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.8979456424713135,
      "learning_rate": 8.546155528185847e-05,
      "loss": 2.8459,
      "step": 173639
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.288447380065918,
      "learning_rate": 8.545869602596689e-05,
      "loss": 2.9433,
      "step": 173640
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.690452814102173,
      "learning_rate": 8.545583680996247e-05,
      "loss": 3.1046,
      "step": 173641
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.189751386642456,
      "learning_rate": 8.545297763384604e-05,
      "loss": 2.6854,
      "step": 173642
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.0985066890716553,
      "learning_rate": 8.545011849761775e-05,
      "loss": 2.9472,
      "step": 173643
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.2875170707702637,
      "learning_rate": 8.544725940127845e-05,
      "loss": 2.9852,
      "step": 173644
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.613966941833496,
      "learning_rate": 8.544440034482846e-05,
      "loss": 2.616,
      "step": 173645
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.0264687538146973,
      "learning_rate": 8.544154132826849e-05,
      "loss": 2.7939,
      "step": 173646
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7277002334594727,
      "learning_rate": 8.543868235159888e-05,
      "loss": 2.8839,
      "step": 173647
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.765601396560669,
      "learning_rate": 8.54358234148205e-05,
      "loss": 2.8321,
      "step": 173648
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8971807956695557,
      "learning_rate": 8.543296451793345e-05,
      "loss": 2.8348,
      "step": 173649
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.112656593322754,
      "learning_rate": 8.543010566093854e-05,
      "loss": 3.0513,
      "step": 173650
    },
    {
      "epoch": 2.26,
      "grad_norm": 7.624052047729492,
      "learning_rate": 8.542724684383612e-05,
      "loss": 3.0034,
      "step": 173651
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5778512954711914,
      "learning_rate": 8.542438806662694e-05,
      "loss": 2.8178,
      "step": 173652
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.140113353729248,
      "learning_rate": 8.54215293293113e-05,
      "loss": 2.8837,
      "step": 173653
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.0908782482147217,
      "learning_rate": 8.541867063189006e-05,
      "loss": 3.0658,
      "step": 173654
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.217357158660889,
      "learning_rate": 8.541581197436335e-05,
      "loss": 2.9089,
      "step": 173655
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.529439926147461,
      "learning_rate": 8.541295335673198e-05,
      "loss": 2.9633,
      "step": 173656
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.875932216644287,
      "learning_rate": 8.54100947789963e-05,
      "loss": 3.0229,
      "step": 173657
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.108325481414795,
      "learning_rate": 8.540723624115703e-05,
      "loss": 3.0409,
      "step": 173658
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.6551332473754883,
      "learning_rate": 8.540437774321449e-05,
      "loss": 2.8885,
      "step": 173659
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.573072910308838,
      "learning_rate": 8.540151928516956e-05,
      "loss": 3.0003,
      "step": 173660
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.040355205535889,
      "learning_rate": 8.53986608670223e-05,
      "loss": 2.812,
      "step": 173661
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.121650218963623,
      "learning_rate": 8.539580248877358e-05,
      "loss": 3.0384,
      "step": 173662
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.694345235824585,
      "learning_rate": 8.539294415042378e-05,
      "loss": 2.8443,
      "step": 173663
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.349698066711426,
      "learning_rate": 8.539008585197354e-05,
      "loss": 2.8465,
      "step": 173664
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.1660215854644775,
      "learning_rate": 8.538722759342323e-05,
      "loss": 2.8061,
      "step": 173665
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.3687615394592285,
      "learning_rate": 8.538436937477371e-05,
      "loss": 3.1273,
      "step": 173666
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4493932723999023,
      "learning_rate": 8.53815111960251e-05,
      "loss": 2.8286,
      "step": 173667
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4060869216918945,
      "learning_rate": 8.537865305717817e-05,
      "loss": 2.8168,
      "step": 173668
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5225229263305664,
      "learning_rate": 8.537579495823336e-05,
      "loss": 3.0967,
      "step": 173669
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.2727394104003906,
      "learning_rate": 8.53729368991913e-05,
      "loss": 2.6812,
      "step": 173670
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.356445550918579,
      "learning_rate": 8.537007888005237e-05,
      "loss": 3.0364,
      "step": 173671
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.460367441177368,
      "learning_rate": 8.536722090081737e-05,
      "loss": 3.1899,
      "step": 173672
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.582249641418457,
      "learning_rate": 8.536436296148647e-05,
      "loss": 3.0243,
      "step": 173673
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6805875301361084,
      "learning_rate": 8.536150506206053e-05,
      "loss": 3.0909,
      "step": 173674
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.812548875808716,
      "learning_rate": 8.53586472025398e-05,
      "loss": 3.2294,
      "step": 173675
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.787199020385742,
      "learning_rate": 8.535578938292505e-05,
      "loss": 2.9708,
      "step": 173676
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3859453201293945,
      "learning_rate": 8.53529316032166e-05,
      "loss": 2.9323,
      "step": 173677
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.1307382583618164,
      "learning_rate": 8.53500738634153e-05,
      "loss": 2.6102,
      "step": 173678
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.32753324508667,
      "learning_rate": 8.534721616352128e-05,
      "loss": 3.1598,
      "step": 173679
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.284048318862915,
      "learning_rate": 8.534435850353537e-05,
      "loss": 3.2053,
      "step": 173680
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.412173271179199,
      "learning_rate": 8.53415008834579e-05,
      "loss": 2.8383,
      "step": 173681
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.7302658557891846,
      "learning_rate": 8.533864330328957e-05,
      "loss": 3.0524,
      "step": 173682
    },
    {
      "epoch": 2.26,
      "grad_norm": 1.9862576723098755,
      "learning_rate": 8.533578576303077e-05,
      "loss": 3.0494,
      "step": 173683
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.792738914489746,
      "learning_rate": 8.533292826268226e-05,
      "loss": 3.1533,
      "step": 173684
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.451650857925415,
      "learning_rate": 8.533007080224426e-05,
      "loss": 3.1156,
      "step": 173685
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.736726999282837,
      "learning_rate": 8.532721338171749e-05,
      "loss": 2.5997,
      "step": 173686
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9731082916259766,
      "learning_rate": 8.532435600110242e-05,
      "loss": 2.9497,
      "step": 173687
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6626079082489014,
      "learning_rate": 8.532149866039964e-05,
      "loss": 2.7694,
      "step": 173688
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.597219705581665,
      "learning_rate": 8.53186413596096e-05,
      "loss": 3.1043,
      "step": 173689
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.264348030090332,
      "learning_rate": 8.531578409873293e-05,
      "loss": 2.7734,
      "step": 173690
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.106778860092163,
      "learning_rate": 8.53129268777701e-05,
      "loss": 2.8312,
      "step": 173691
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5008721351623535,
      "learning_rate": 8.531006969672168e-05,
      "loss": 3.0262,
      "step": 173692
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.5646262168884277,
      "learning_rate": 8.530721255558807e-05,
      "loss": 2.7693,
      "step": 173693
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.5327985286712646,
      "learning_rate": 8.530435545437e-05,
      "loss": 2.694,
      "step": 173694
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.753223419189453,
      "learning_rate": 8.530149839306781e-05,
      "loss": 3.1496,
      "step": 173695
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.483450174331665,
      "learning_rate": 8.529864137168221e-05,
      "loss": 2.9163,
      "step": 173696
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.982292890548706,
      "learning_rate": 8.529578439021365e-05,
      "loss": 2.9601,
      "step": 173697
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6514573097229004,
      "learning_rate": 8.529292744866264e-05,
      "loss": 2.7829,
      "step": 173698
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5246315002441406,
      "learning_rate": 8.529007054702965e-05,
      "loss": 3.1627,
      "step": 173699
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.196732044219971,
      "learning_rate": 8.528721368531539e-05,
      "loss": 2.6856,
      "step": 173700
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.756202459335327,
      "learning_rate": 8.528435686352019e-05,
      "loss": 2.9721,
      "step": 173701
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6616451740264893,
      "learning_rate": 8.528150008164475e-05,
      "loss": 2.8558,
      "step": 173702
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.548262119293213,
      "learning_rate": 8.527864333968956e-05,
      "loss": 2.6838,
      "step": 173703
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4715447425842285,
      "learning_rate": 8.527578663765504e-05,
      "loss": 3.0152,
      "step": 173704
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.4120006561279297,
      "learning_rate": 8.527292997554188e-05,
      "loss": 2.8734,
      "step": 173705
    },
    {
      "epoch": 2.26,
      "grad_norm": 5.167236804962158,
      "learning_rate": 8.527007335335052e-05,
      "loss": 2.8789,
      "step": 173706
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7155323028564453,
      "learning_rate": 8.526721677108143e-05,
      "loss": 3.2481,
      "step": 173707
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7446227073669434,
      "learning_rate": 8.526436022873533e-05,
      "loss": 2.7508,
      "step": 173708
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.548532485961914,
      "learning_rate": 8.52615037263126e-05,
      "loss": 3.157,
      "step": 173709
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3904521465301514,
      "learning_rate": 8.525864726381376e-05,
      "loss": 3.0216,
      "step": 173710
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.069626569747925,
      "learning_rate": 8.525579084123946e-05,
      "loss": 3.0825,
      "step": 173711
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.743712902069092,
      "learning_rate": 8.525293445859015e-05,
      "loss": 2.7679,
      "step": 173712
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.5632004737854,
      "learning_rate": 8.52500781158663e-05,
      "loss": 2.9585,
      "step": 173713
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8516104221343994,
      "learning_rate": 8.524722181306858e-05,
      "loss": 2.9694,
      "step": 173714
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5607054233551025,
      "learning_rate": 8.52443655501975e-05,
      "loss": 2.8239,
      "step": 173715
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.9353363513946533,
      "learning_rate": 8.524150932725341e-05,
      "loss": 2.8811,
      "step": 173716
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.130244731903076,
      "learning_rate": 8.523865314423711e-05,
      "loss": 3.1199,
      "step": 173717
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.873335361480713,
      "learning_rate": 8.523579700114888e-05,
      "loss": 2.9019,
      "step": 173718
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.467284679412842,
      "learning_rate": 8.523294089798947e-05,
      "loss": 2.8909,
      "step": 173719
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.4885261058807373,
      "learning_rate": 8.523008483475932e-05,
      "loss": 3.035,
      "step": 173720
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6951513290405273,
      "learning_rate": 8.522722881145893e-05,
      "loss": 3.0584,
      "step": 173721
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5568552017211914,
      "learning_rate": 8.522437282808876e-05,
      "loss": 2.9625,
      "step": 173722
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.37961483001709,
      "learning_rate": 8.522151688464952e-05,
      "loss": 2.8978,
      "step": 173723
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.298232316970825,
      "learning_rate": 8.521866098114157e-05,
      "loss": 2.9086,
      "step": 173724
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.405123472213745,
      "learning_rate": 8.521580511756565e-05,
      "loss": 3.0603,
      "step": 173725
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.259504795074463,
      "learning_rate": 8.521294929392204e-05,
      "loss": 2.9478,
      "step": 173726
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9237122535705566,
      "learning_rate": 8.521009351021157e-05,
      "loss": 2.9947,
      "step": 173727
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5141096115112305,
      "learning_rate": 8.520723776643446e-05,
      "loss": 2.8114,
      "step": 173728
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.020589351654053,
      "learning_rate": 8.520438206259142e-05,
      "loss": 2.7257,
      "step": 173729
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.816155433654785,
      "learning_rate": 8.520152639868289e-05,
      "loss": 2.9829,
      "step": 173730
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.108236074447632,
      "learning_rate": 8.519867077470953e-05,
      "loss": 3.0414,
      "step": 173731
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.276918411254883,
      "learning_rate": 8.51958151906717e-05,
      "loss": 3.0245,
      "step": 173732
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.3350465297698975,
      "learning_rate": 8.51929596465702e-05,
      "loss": 3.097,
      "step": 173733
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.761760711669922,
      "learning_rate": 8.51901041424052e-05,
      "loss": 2.8025,
      "step": 173734
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6752002239227295,
      "learning_rate": 8.51872486781775e-05,
      "loss": 2.8152,
      "step": 173735
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.504528999328613,
      "learning_rate": 8.518439325388745e-05,
      "loss": 2.8939,
      "step": 173736
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.7861030101776123,
      "learning_rate": 8.518153786953579e-05,
      "loss": 3.17,
      "step": 173737
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7192351818084717,
      "learning_rate": 8.517868252512285e-05,
      "loss": 2.9121,
      "step": 173738
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4332804679870605,
      "learning_rate": 8.517582722064938e-05,
      "loss": 2.8292,
      "step": 173739
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.3073434829711914,
      "learning_rate": 8.517297195611564e-05,
      "loss": 2.8312,
      "step": 173740
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.695730686187744,
      "learning_rate": 8.517011673152238e-05,
      "loss": 3.0513,
      "step": 173741
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.5427119731903076,
      "learning_rate": 8.516726154686998e-05,
      "loss": 2.476,
      "step": 173742
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.073121070861816,
      "learning_rate": 8.516440640215909e-05,
      "loss": 2.6027,
      "step": 173743
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.0438077449798584,
      "learning_rate": 8.516155129739015e-05,
      "loss": 2.8642,
      "step": 173744
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4263083934783936,
      "learning_rate": 8.51586962325639e-05,
      "loss": 2.9813,
      "step": 173745
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6039042472839355,
      "learning_rate": 8.515584120768049e-05,
      "loss": 2.7575,
      "step": 173746
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6880276203155518,
      "learning_rate": 8.515298622274079e-05,
      "loss": 2.8553,
      "step": 173747
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6091952323913574,
      "learning_rate": 8.515013127774509e-05,
      "loss": 3.075,
      "step": 173748
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.1421451568603516,
      "learning_rate": 8.514727637269416e-05,
      "loss": 2.8477,
      "step": 173749
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6386592388153076,
      "learning_rate": 8.514442150758827e-05,
      "loss": 2.9028,
      "step": 173750
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.783579111099243,
      "learning_rate": 8.51415666824283e-05,
      "loss": 2.866,
      "step": 173751
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.2337591648101807,
      "learning_rate": 8.513871189721438e-05,
      "loss": 2.9412,
      "step": 173752
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8163604736328125,
      "learning_rate": 8.513585715194732e-05,
      "loss": 2.851,
      "step": 173753
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3268015384674072,
      "learning_rate": 8.513300244662747e-05,
      "loss": 2.876,
      "step": 173754
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.130180358886719,
      "learning_rate": 8.513014778125554e-05,
      "loss": 2.9494,
      "step": 173755
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5465025901794434,
      "learning_rate": 8.51272931558319e-05,
      "loss": 3.0958,
      "step": 173756
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.046351909637451,
      "learning_rate": 8.512443857035723e-05,
      "loss": 3.116,
      "step": 173757
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.657179832458496,
      "learning_rate": 8.512158402483198e-05,
      "loss": 3.2166,
      "step": 173758
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.280627727508545,
      "learning_rate": 8.511872951925667e-05,
      "loss": 2.8348,
      "step": 173759
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.609915018081665,
      "learning_rate": 8.511587505363177e-05,
      "loss": 3.0509,
      "step": 173760
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8312487602233887,
      "learning_rate": 8.511302062795797e-05,
      "loss": 2.8409,
      "step": 173761
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.094116687774658,
      "learning_rate": 8.511016624223562e-05,
      "loss": 2.869,
      "step": 173762
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7894890308380127,
      "learning_rate": 8.510731189646545e-05,
      "loss": 2.9899,
      "step": 173763
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5141963958740234,
      "learning_rate": 8.510445759064787e-05,
      "loss": 2.6844,
      "step": 173764
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.407278537750244,
      "learning_rate": 8.510160332478345e-05,
      "loss": 2.8653,
      "step": 173765
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.277796745300293,
      "learning_rate": 8.50987490988726e-05,
      "loss": 2.9737,
      "step": 173766
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.979377508163452,
      "learning_rate": 8.509589491291605e-05,
      "loss": 3.0118,
      "step": 173767
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.499760389328003,
      "learning_rate": 8.509304076691412e-05,
      "loss": 2.9205,
      "step": 173768
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4566845893859863,
      "learning_rate": 8.509018666086755e-05,
      "loss": 3.2054,
      "step": 173769
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6801607608795166,
      "learning_rate": 8.508733259477675e-05,
      "loss": 2.8792,
      "step": 173770
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6709301471710205,
      "learning_rate": 8.508447856864229e-05,
      "loss": 2.6779,
      "step": 173771
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.5994346141815186,
      "learning_rate": 8.50816245824646e-05,
      "loss": 3.0632,
      "step": 173772
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.0355238914489746,
      "learning_rate": 8.507877063624438e-05,
      "loss": 2.984,
      "step": 173773
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.2697722911834717,
      "learning_rate": 8.507591672998197e-05,
      "loss": 3.0999,
      "step": 173774
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.780219316482544,
      "learning_rate": 8.507306286367809e-05,
      "loss": 2.9831,
      "step": 173775
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.930079936981201,
      "learning_rate": 8.507020903733323e-05,
      "loss": 3.0146,
      "step": 173776
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.438117027282715,
      "learning_rate": 8.506735525094782e-05,
      "loss": 2.9621,
      "step": 173777
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.592711925506592,
      "learning_rate": 8.506450150452238e-05,
      "loss": 2.9523,
      "step": 173778
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8958451747894287,
      "learning_rate": 8.506164779805759e-05,
      "loss": 3.0383,
      "step": 173779
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.84354567527771,
      "learning_rate": 8.505879413155382e-05,
      "loss": 2.9845,
      "step": 173780
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.070106267929077,
      "learning_rate": 8.505594050501175e-05,
      "loss": 2.7779,
      "step": 173781
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5057170391082764,
      "learning_rate": 8.505308691843186e-05,
      "loss": 3.1078,
      "step": 173782
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.229130744934082,
      "learning_rate": 8.505023337181465e-05,
      "loss": 2.9763,
      "step": 173783
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.2433180809020996,
      "learning_rate": 8.504737986516055e-05,
      "loss": 2.7921,
      "step": 173784
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6596224308013916,
      "learning_rate": 8.504452639847033e-05,
      "loss": 2.8081,
      "step": 173785
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.4728872776031494,
      "learning_rate": 8.504167297174425e-05,
      "loss": 2.8481,
      "step": 173786
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7115566730499268,
      "learning_rate": 8.503881958498309e-05,
      "loss": 3.2325,
      "step": 173787
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.747602701187134,
      "learning_rate": 8.503596623818728e-05,
      "loss": 2.9538,
      "step": 173788
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.2341487407684326,
      "learning_rate": 8.503311293135734e-05,
      "loss": 2.9295,
      "step": 173789
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4828994274139404,
      "learning_rate": 8.503025966449373e-05,
      "loss": 3.0233,
      "step": 173790
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5083887577056885,
      "learning_rate": 8.502740643759712e-05,
      "loss": 2.9716,
      "step": 173791
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5922112464904785,
      "learning_rate": 8.502455325066786e-05,
      "loss": 2.794,
      "step": 173792
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.3953723907470703,
      "learning_rate": 8.502170010370674e-05,
      "loss": 2.9071,
      "step": 173793
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.905823230743408,
      "learning_rate": 8.50188469967141e-05,
      "loss": 2.6656,
      "step": 173794
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7284910678863525,
      "learning_rate": 8.501599392969044e-05,
      "loss": 3.0649,
      "step": 173795
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.442209243774414,
      "learning_rate": 8.501314090263644e-05,
      "loss": 3.0283,
      "step": 173796
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6021780967712402,
      "learning_rate": 8.501028791555259e-05,
      "loss": 2.7431,
      "step": 173797
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.466188669204712,
      "learning_rate": 8.500743496843927e-05,
      "loss": 3.2111,
      "step": 173798
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6509201526641846,
      "learning_rate": 8.50045820612972e-05,
      "loss": 2.7242,
      "step": 173799
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4252407550811768,
      "learning_rate": 8.500172919412688e-05,
      "loss": 3.2537,
      "step": 173800
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7614924907684326,
      "learning_rate": 8.499887636692867e-05,
      "loss": 2.8688,
      "step": 173801
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.2645087242126465,
      "learning_rate": 8.499602357970332e-05,
      "loss": 2.9612,
      "step": 173802
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.033057451248169,
      "learning_rate": 8.499317083245119e-05,
      "loss": 2.9671,
      "step": 173803
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.605729818344116,
      "learning_rate": 8.4990318125173e-05,
      "loss": 2.8514,
      "step": 173804
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8834943771362305,
      "learning_rate": 8.498746545786912e-05,
      "loss": 3.1656,
      "step": 173805
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4609875679016113,
      "learning_rate": 8.498461283054016e-05,
      "loss": 2.9511,
      "step": 173806
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7037882804870605,
      "learning_rate": 8.498176024318654e-05,
      "loss": 3.2281,
      "step": 173807
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4007456302642822,
      "learning_rate": 8.497890769580892e-05,
      "loss": 2.952,
      "step": 173808
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.5392119884490967,
      "learning_rate": 8.497605518840771e-05,
      "loss": 3.1691,
      "step": 173809
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.7817320823669434,
      "learning_rate": 8.497320272098361e-05,
      "loss": 2.8301,
      "step": 173810
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.208221912384033,
      "learning_rate": 8.497035029353704e-05,
      "loss": 2.8502,
      "step": 173811
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.663778066635132,
      "learning_rate": 8.496749790606855e-05,
      "loss": 3.0697,
      "step": 173812
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.523419141769409,
      "learning_rate": 8.496464555857858e-05,
      "loss": 2.9223,
      "step": 173813
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.469288349151611,
      "learning_rate": 8.49617932510678e-05,
      "loss": 3.0294,
      "step": 173814
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7269155979156494,
      "learning_rate": 8.495894098353661e-05,
      "loss": 2.9762,
      "step": 173815
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.93717360496521,
      "learning_rate": 8.495608875598569e-05,
      "loss": 2.8095,
      "step": 173816
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.605310440063477,
      "learning_rate": 8.495323656841544e-05,
      "loss": 2.9359,
      "step": 173817
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5494773387908936,
      "learning_rate": 8.49503844208266e-05,
      "loss": 2.9013,
      "step": 173818
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.68572998046875,
      "learning_rate": 8.494753231321934e-05,
      "loss": 2.9407,
      "step": 173819
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.431663513183594,
      "learning_rate": 8.49446802455945e-05,
      "loss": 2.6895,
      "step": 173820
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4438090324401855,
      "learning_rate": 8.494182821795243e-05,
      "loss": 2.8822,
      "step": 173821
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.475630044937134,
      "learning_rate": 8.493897623029379e-05,
      "loss": 2.8264,
      "step": 173822
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9568984508514404,
      "learning_rate": 8.4936124282619e-05,
      "loss": 3.0118,
      "step": 173823
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.4662065505981445,
      "learning_rate": 8.493327237492869e-05,
      "loss": 2.9535,
      "step": 173824
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5307986736297607,
      "learning_rate": 8.493042050722338e-05,
      "loss": 2.9789,
      "step": 173825
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.366445302963257,
      "learning_rate": 8.492756867950356e-05,
      "loss": 2.8965,
      "step": 173826
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.037695407867432,
      "learning_rate": 8.492471689176968e-05,
      "loss": 2.8884,
      "step": 173827
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9905691146850586,
      "learning_rate": 8.492186514402243e-05,
      "loss": 3.2423,
      "step": 173828
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.0046370029449463,
      "learning_rate": 8.49190134362622e-05,
      "loss": 2.8336,
      "step": 173829
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8314943313598633,
      "learning_rate": 8.491616176848965e-05,
      "loss": 3.0266,
      "step": 173830
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.316876173019409,
      "learning_rate": 8.491331014070524e-05,
      "loss": 3.1276,
      "step": 173831
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.2366065979003906,
      "learning_rate": 8.491045855290951e-05,
      "loss": 3.0767,
      "step": 173832
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.036564350128174,
      "learning_rate": 8.49076070051029e-05,
      "loss": 2.9146,
      "step": 173833
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7062158584594727,
      "learning_rate": 8.490475549728612e-05,
      "loss": 2.886,
      "step": 173834
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.12040376663208,
      "learning_rate": 8.490190402945953e-05,
      "loss": 2.9903,
      "step": 173835
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.118352174758911,
      "learning_rate": 8.489905260162382e-05,
      "loss": 2.7663,
      "step": 173836
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4808263778686523,
      "learning_rate": 8.489620121377941e-05,
      "loss": 2.927,
      "step": 173837
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.332583427429199,
      "learning_rate": 8.489334986592688e-05,
      "loss": 2.8473,
      "step": 173838
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.7598507404327393,
      "learning_rate": 8.489049855806664e-05,
      "loss": 2.9209,
      "step": 173839
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7007203102111816,
      "learning_rate": 8.488764729019943e-05,
      "loss": 3.0828,
      "step": 173840
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.525296211242676,
      "learning_rate": 8.488479606232552e-05,
      "loss": 2.9121,
      "step": 173841
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.805468797683716,
      "learning_rate": 8.488194487444572e-05,
      "loss": 2.9659,
      "step": 173842
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5457546710968018,
      "learning_rate": 8.487909372656045e-05,
      "loss": 2.6815,
      "step": 173843
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.671391487121582,
      "learning_rate": 8.487624261867017e-05,
      "loss": 3.0238,
      "step": 173844
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9870710372924805,
      "learning_rate": 8.487339155077539e-05,
      "loss": 2.9179,
      "step": 173845
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.0919206142425537,
      "learning_rate": 8.48705405228768e-05,
      "loss": 2.8234,
      "step": 173846
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.797114849090576,
      "learning_rate": 8.48676895349747e-05,
      "loss": 2.6832,
      "step": 173847
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.566688060760498,
      "learning_rate": 8.486483858706992e-05,
      "loss": 2.9997,
      "step": 173848
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.783622980117798,
      "learning_rate": 8.486198767916277e-05,
      "loss": 3.0144,
      "step": 173849
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.4764060974121094,
      "learning_rate": 8.485913681125386e-05,
      "loss": 2.9803,
      "step": 173850
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.660325288772583,
      "learning_rate": 8.485628598334362e-05,
      "loss": 2.827,
      "step": 173851
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7387187480926514,
      "learning_rate": 8.485343519543275e-05,
      "loss": 2.9893,
      "step": 173852
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.545421600341797,
      "learning_rate": 8.485058444752155e-05,
      "loss": 3.0442,
      "step": 173853
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7069075107574463,
      "learning_rate": 8.484773373961083e-05,
      "loss": 3.1515,
      "step": 173854
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5806713104248047,
      "learning_rate": 8.484488307170096e-05,
      "loss": 3.0676,
      "step": 173855
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.788360595703125,
      "learning_rate": 8.484203244379247e-05,
      "loss": 2.9224,
      "step": 173856
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.4856245517730713,
      "learning_rate": 8.483918185588585e-05,
      "loss": 2.6422,
      "step": 173857
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.111264228820801,
      "learning_rate": 8.483633130798177e-05,
      "loss": 2.8105,
      "step": 173858
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3101110458374023,
      "learning_rate": 8.483348080008058e-05,
      "loss": 3.206,
      "step": 173859
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.806165933609009,
      "learning_rate": 8.483063033218299e-05,
      "loss": 3.031,
      "step": 173860
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8842921257019043,
      "learning_rate": 8.482777990428947e-05,
      "loss": 2.9813,
      "step": 173861
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.7706735134124756,
      "learning_rate": 8.482492951640048e-05,
      "loss": 2.948,
      "step": 173862
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8801186084747314,
      "learning_rate": 8.482207916851656e-05,
      "loss": 2.7068,
      "step": 173863
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.2906761169433594,
      "learning_rate": 8.481922886063834e-05,
      "loss": 2.5313,
      "step": 173864
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.1301145553588867,
      "learning_rate": 8.481637859276622e-05,
      "loss": 2.9525,
      "step": 173865
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.698875904083252,
      "learning_rate": 8.481352836490087e-05,
      "loss": 3.0336,
      "step": 173866
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.2718465328216553,
      "learning_rate": 8.481067817704273e-05,
      "loss": 3.0396,
      "step": 173867
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.399754524230957,
      "learning_rate": 8.480782802919238e-05,
      "loss": 2.8985,
      "step": 173868
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7448878288269043,
      "learning_rate": 8.48049779213502e-05,
      "loss": 2.9923,
      "step": 173869
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.584449529647827,
      "learning_rate": 8.480212785351694e-05,
      "loss": 3.0107,
      "step": 173870
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.0528817176818848,
      "learning_rate": 8.479927782569292e-05,
      "loss": 2.8552,
      "step": 173871
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6313130855560303,
      "learning_rate": 8.479642783787891e-05,
      "loss": 3.1715,
      "step": 173872
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.623051166534424,
      "learning_rate": 8.479357789007526e-05,
      "loss": 2.7994,
      "step": 173873
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3544881343841553,
      "learning_rate": 8.479072798228258e-05,
      "loss": 3.0418,
      "step": 173874
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9664559364318848,
      "learning_rate": 8.478787811450124e-05,
      "loss": 2.8945,
      "step": 173875
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.960170269012451,
      "learning_rate": 8.4785028286732e-05,
      "loss": 2.7335,
      "step": 173876
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.116920232772827,
      "learning_rate": 8.478217849897523e-05,
      "loss": 2.9046,
      "step": 173877
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.5515668392181396,
      "learning_rate": 8.477932875123158e-05,
      "loss": 3.0046,
      "step": 173878
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.478623628616333,
      "learning_rate": 8.477647904350148e-05,
      "loss": 2.9369,
      "step": 173879
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.739766836166382,
      "learning_rate": 8.477362937578545e-05,
      "loss": 3.0303,
      "step": 173880
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.986438274383545,
      "learning_rate": 8.477077974808415e-05,
      "loss": 2.7066,
      "step": 173881
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.4180190563201904,
      "learning_rate": 8.4767930160398e-05,
      "loss": 2.9154,
      "step": 173882
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8489646911621094,
      "learning_rate": 8.47650806127275e-05,
      "loss": 2.7327,
      "step": 173883
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.742501974105835,
      "learning_rate": 8.476223110507331e-05,
      "loss": 2.9745,
      "step": 173884
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.503086805343628,
      "learning_rate": 8.475938163743587e-05,
      "loss": 2.7461,
      "step": 173885
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6568291187286377,
      "learning_rate": 8.475653220981565e-05,
      "loss": 2.93,
      "step": 173886
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9282915592193604,
      "learning_rate": 8.475368282221334e-05,
      "loss": 2.9315,
      "step": 173887
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.115065574645996,
      "learning_rate": 8.475083347462929e-05,
      "loss": 2.8324,
      "step": 173888
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.852649211883545,
      "learning_rate": 8.47479841670642e-05,
      "loss": 3.1009,
      "step": 173889
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5169191360473633,
      "learning_rate": 8.474513489951855e-05,
      "loss": 2.7909,
      "step": 173890
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.430253028869629,
      "learning_rate": 8.474228567199276e-05,
      "loss": 2.8722,
      "step": 173891
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.571075677871704,
      "learning_rate": 8.47394364844875e-05,
      "loss": 2.9401,
      "step": 173892
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.759978771209717,
      "learning_rate": 8.473658733700326e-05,
      "loss": 2.7147,
      "step": 173893
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9573264122009277,
      "learning_rate": 8.473373822954044e-05,
      "loss": 2.9762,
      "step": 173894
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7464256286621094,
      "learning_rate": 8.473088916209981e-05,
      "loss": 2.8647,
      "step": 173895
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7046279907226562,
      "learning_rate": 8.472804013468176e-05,
      "loss": 2.8644,
      "step": 173896
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.1918184757232666,
      "learning_rate": 8.472519114728672e-05,
      "loss": 3.0322,
      "step": 173897
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.390216827392578,
      "learning_rate": 8.472234219991541e-05,
      "loss": 2.8842,
      "step": 173898
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.306567907333374,
      "learning_rate": 8.47194932925683e-05,
      "loss": 3.0024,
      "step": 173899
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6362969875335693,
      "learning_rate": 8.471664442524583e-05,
      "loss": 2.8946,
      "step": 173900
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.279703378677368,
      "learning_rate": 8.471379559794866e-05,
      "loss": 2.6565,
      "step": 173901
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.47295880317688,
      "learning_rate": 8.471094681067718e-05,
      "loss": 2.8321,
      "step": 173902
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.449357032775879,
      "learning_rate": 8.470809806343208e-05,
      "loss": 2.6961,
      "step": 173903
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5364902019500732,
      "learning_rate": 8.470524935621383e-05,
      "loss": 3.202,
      "step": 173904
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.7249369621276855,
      "learning_rate": 8.470240068902293e-05,
      "loss": 2.8937,
      "step": 173905
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.171237945556641,
      "learning_rate": 8.46995520618598e-05,
      "loss": 3.0095,
      "step": 173906
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3049991130828857,
      "learning_rate": 8.469670347472524e-05,
      "loss": 2.987,
      "step": 173907
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.482757568359375,
      "learning_rate": 8.469385492761947e-05,
      "loss": 2.8976,
      "step": 173908
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6301584243774414,
      "learning_rate": 8.469100642054329e-05,
      "loss": 3.0973,
      "step": 173909
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.399534225463867,
      "learning_rate": 8.468815795349712e-05,
      "loss": 2.9003,
      "step": 173910
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9040791988372803,
      "learning_rate": 8.46853095264815e-05,
      "loss": 2.7027,
      "step": 173911
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.9018232822418213,
      "learning_rate": 8.468246113949685e-05,
      "loss": 3.0055,
      "step": 173912
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.526113510131836,
      "learning_rate": 8.467961279254386e-05,
      "loss": 2.8668,
      "step": 173913
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.365262985229492,
      "learning_rate": 8.467676448562295e-05,
      "loss": 3.0,
      "step": 173914
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.869137763977051,
      "learning_rate": 8.467391621873473e-05,
      "loss": 2.9911,
      "step": 173915
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.679997205734253,
      "learning_rate": 8.467106799187972e-05,
      "loss": 2.9043,
      "step": 173916
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.302860736846924,
      "learning_rate": 8.466821980505843e-05,
      "loss": 2.8899,
      "step": 173917
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.845362663269043,
      "learning_rate": 8.466537165827129e-05,
      "loss": 3.0127,
      "step": 173918
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.1737098693847656,
      "learning_rate": 8.466252355151902e-05,
      "loss": 3.0604,
      "step": 173919
    },
    {
      "epoch": 2.26,
      "grad_norm": 5.188637733459473,
      "learning_rate": 8.465967548480191e-05,
      "loss": 3.1695,
      "step": 173920
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.280661106109619,
      "learning_rate": 8.465682745812079e-05,
      "loss": 3.0881,
      "step": 173921
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.108591318130493,
      "learning_rate": 8.465397947147598e-05,
      "loss": 2.9792,
      "step": 173922
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.73677659034729,
      "learning_rate": 8.46511315248681e-05,
      "loss": 3.014,
      "step": 173923
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.0277392864227295,
      "learning_rate": 8.464828361829753e-05,
      "loss": 2.9906,
      "step": 173924
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7238399982452393,
      "learning_rate": 8.464543575176501e-05,
      "loss": 3.1202,
      "step": 173925
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.024780750274658,
      "learning_rate": 8.464258792527084e-05,
      "loss": 2.9063,
      "step": 173926
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.8142380714416504,
      "learning_rate": 8.46397401388158e-05,
      "loss": 3.1434,
      "step": 173927
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8492777347564697,
      "learning_rate": 8.463689239240031e-05,
      "loss": 3.0659,
      "step": 173928
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.1455185413360596,
      "learning_rate": 8.463404468602488e-05,
      "loss": 3.0276,
      "step": 173929
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.427663564682007,
      "learning_rate": 8.463119701968993e-05,
      "loss": 3.0365,
      "step": 173930
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.470363140106201,
      "learning_rate": 8.46283493933962e-05,
      "loss": 2.7124,
      "step": 173931
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.2556796073913574,
      "learning_rate": 8.462550180714407e-05,
      "loss": 2.7467,
      "step": 173932
    },
    {
      "epoch": 2.26,
      "grad_norm": 4.436404228210449,
      "learning_rate": 8.462265426093417e-05,
      "loss": 3.0367,
      "step": 173933
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7077488899230957,
      "learning_rate": 8.461980675476702e-05,
      "loss": 3.0258,
      "step": 173934
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7607333660125732,
      "learning_rate": 8.46169592886431e-05,
      "loss": 2.9016,
      "step": 173935
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.296048402786255,
      "learning_rate": 8.461411186256287e-05,
      "loss": 2.883,
      "step": 173936
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.09980845451355,
      "learning_rate": 8.461126447652702e-05,
      "loss": 2.916,
      "step": 173937
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.324144124984741,
      "learning_rate": 8.46084171305359e-05,
      "loss": 2.8071,
      "step": 173938
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.778339147567749,
      "learning_rate": 8.460556982459028e-05,
      "loss": 2.7155,
      "step": 173939
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5028908252716064,
      "learning_rate": 8.46027225586905e-05,
      "loss": 3.3663,
      "step": 173940
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.432105779647827,
      "learning_rate": 8.459987533283717e-05,
      "loss": 2.9704,
      "step": 173941
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.899399518966675,
      "learning_rate": 8.459702814703069e-05,
      "loss": 3.0419,
      "step": 173942
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.811643362045288,
      "learning_rate": 8.459418100127176e-05,
      "loss": 3.1874,
      "step": 173943
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.229677438735962,
      "learning_rate": 8.459133389556078e-05,
      "loss": 3.0185,
      "step": 173944
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.6618077754974365,
      "learning_rate": 8.458848682989841e-05,
      "loss": 2.6609,
      "step": 173945
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.3458521366119385,
      "learning_rate": 8.458563980428509e-05,
      "loss": 2.7744,
      "step": 173946
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5974559783935547,
      "learning_rate": 8.458279281872138e-05,
      "loss": 2.8049,
      "step": 173947
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.8762805461883545,
      "learning_rate": 8.457994587320773e-05,
      "loss": 3.0115,
      "step": 173948
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.7669074535369873,
      "learning_rate": 8.457709896774482e-05,
      "loss": 3.267,
      "step": 173949
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.506639003753662,
      "learning_rate": 8.457425210233297e-05,
      "loss": 3.0258,
      "step": 173950
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.0070738792419434,
      "learning_rate": 8.457140527697292e-05,
      "loss": 2.803,
      "step": 173951
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.2326338291168213,
      "learning_rate": 8.456855849166514e-05,
      "loss": 3.1265,
      "step": 173952
    },
    {
      "epoch": 2.26,
      "grad_norm": 2.5066795349121094,
      "learning_rate": 8.456571174641014e-05,
      "loss": 2.9955,
      "step": 173953
    },
    {
      "epoch": 2.26,
      "grad_norm": 3.335507392883301,
      "learning_rate": 8.456286504120832e-05,
      "loss": 3.0777,
      "step": 173954
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.78507137298584,
      "learning_rate": 8.456001837606044e-05,
      "loss": 2.7314,
      "step": 173955
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2258660793304443,
      "learning_rate": 8.455717175096684e-05,
      "loss": 2.6229,
      "step": 173956
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8680357933044434,
      "learning_rate": 8.455432516592818e-05,
      "loss": 2.7445,
      "step": 173957
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.778083324432373,
      "learning_rate": 8.455147862094489e-05,
      "loss": 3.0164,
      "step": 173958
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.572385311126709,
      "learning_rate": 8.45486321160177e-05,
      "loss": 2.8959,
      "step": 173959
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.1047112941741943,
      "learning_rate": 8.454578565114681e-05,
      "loss": 2.9476,
      "step": 173960
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.9850728511810303,
      "learning_rate": 8.454293922633302e-05,
      "loss": 2.9681,
      "step": 173961
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.8569467067718506,
      "learning_rate": 8.454009284157667e-05,
      "loss": 2.8528,
      "step": 173962
    },
    {
      "epoch": 2.27,
      "grad_norm": 5.078185558319092,
      "learning_rate": 8.45372464968785e-05,
      "loss": 2.8864,
      "step": 173963
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8144612312316895,
      "learning_rate": 8.453440019223878e-05,
      "loss": 3.0908,
      "step": 173964
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0345306396484375,
      "learning_rate": 8.453155392765832e-05,
      "loss": 3.0807,
      "step": 173965
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5556602478027344,
      "learning_rate": 8.45287077031375e-05,
      "loss": 2.7727,
      "step": 173966
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.63154673576355,
      "learning_rate": 8.452586151867684e-05,
      "loss": 2.9334,
      "step": 173967
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.9351212978363037,
      "learning_rate": 8.452301537427682e-05,
      "loss": 2.711,
      "step": 173968
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.105400800704956,
      "learning_rate": 8.452016926993814e-05,
      "loss": 2.7385,
      "step": 173969
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7446956634521484,
      "learning_rate": 8.451732320566112e-05,
      "loss": 2.8884,
      "step": 173970
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.2511651515960693,
      "learning_rate": 8.45144771814465e-05,
      "loss": 3.0237,
      "step": 173971
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9384918212890625,
      "learning_rate": 8.451163119729469e-05,
      "loss": 2.8345,
      "step": 173972
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.012143135070801,
      "learning_rate": 8.450878525320622e-05,
      "loss": 2.9872,
      "step": 173973
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2344253063201904,
      "learning_rate": 8.450593934918159e-05,
      "loss": 2.901,
      "step": 173974
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4624452590942383,
      "learning_rate": 8.450309348522142e-05,
      "loss": 2.8392,
      "step": 173975
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.420233726501465,
      "learning_rate": 8.450024766132612e-05,
      "loss": 2.9203,
      "step": 173976
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0587751865386963,
      "learning_rate": 8.449740187749637e-05,
      "loss": 2.6811,
      "step": 173977
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6852798461914062,
      "learning_rate": 8.449455613373263e-05,
      "loss": 2.7834,
      "step": 173978
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5608022212982178,
      "learning_rate": 8.449171043003536e-05,
      "loss": 3.0775,
      "step": 173979
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.4228932857513428,
      "learning_rate": 8.448886476640522e-05,
      "loss": 3.1172,
      "step": 173980
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6577560901641846,
      "learning_rate": 8.448601914284267e-05,
      "loss": 3.3673,
      "step": 173981
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.691505193710327,
      "learning_rate": 8.448317355934812e-05,
      "loss": 2.9022,
      "step": 173982
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.392599105834961,
      "learning_rate": 8.448032801592234e-05,
      "loss": 3.0208,
      "step": 173983
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7338733673095703,
      "learning_rate": 8.44774825125657e-05,
      "loss": 2.8514,
      "step": 173984
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.025942087173462,
      "learning_rate": 8.44746370492787e-05,
      "loss": 2.821,
      "step": 173985
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.595841884613037,
      "learning_rate": 8.447179162606202e-05,
      "loss": 3.0258,
      "step": 173986
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.63812255859375,
      "learning_rate": 8.446894624291599e-05,
      "loss": 2.99,
      "step": 173987
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.69577956199646,
      "learning_rate": 8.446610089984137e-05,
      "loss": 2.8592,
      "step": 173988
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.849597454071045,
      "learning_rate": 8.446325559683857e-05,
      "loss": 3.0085,
      "step": 173989
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6970510482788086,
      "learning_rate": 8.44604103339081e-05,
      "loss": 2.9426,
      "step": 173990
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6755053997039795,
      "learning_rate": 8.445756511105045e-05,
      "loss": 3.0164,
      "step": 173991
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4575672149658203,
      "learning_rate": 8.445471992826628e-05,
      "loss": 2.8894,
      "step": 173992
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.2681515216827393,
      "learning_rate": 8.445187478555595e-05,
      "loss": 2.8897,
      "step": 173993
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.475484848022461,
      "learning_rate": 8.444902968292016e-05,
      "loss": 3.0665,
      "step": 173994
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.689988374710083,
      "learning_rate": 8.444618462035939e-05,
      "loss": 2.786,
      "step": 173995
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8504676818847656,
      "learning_rate": 8.444333959787413e-05,
      "loss": 2.9577,
      "step": 173996
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.437800645828247,
      "learning_rate": 8.444049461546484e-05,
      "loss": 2.6348,
      "step": 173997
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.648878574371338,
      "learning_rate": 8.443764967313222e-05,
      "loss": 3.1289,
      "step": 173998
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.2488443851470947,
      "learning_rate": 8.443480477087662e-05,
      "loss": 3.0351,
      "step": 173999
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.1079862117767334,
      "learning_rate": 8.443195990869876e-05,
      "loss": 2.9499,
      "step": 174000
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6107704639434814,
      "learning_rate": 8.442911508659907e-05,
      "loss": 3.2124,
      "step": 174001
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.057123184204102,
      "learning_rate": 8.442627030457804e-05,
      "loss": 3.1239,
      "step": 174002
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.4867360591888428,
      "learning_rate": 8.442342556263617e-05,
      "loss": 2.8307,
      "step": 174003
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4625625610351562,
      "learning_rate": 8.442058086077413e-05,
      "loss": 2.9965,
      "step": 174004
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.955017566680908,
      "learning_rate": 8.44177361989923e-05,
      "loss": 2.9393,
      "step": 174005
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9006075859069824,
      "learning_rate": 8.441489157729137e-05,
      "loss": 2.9129,
      "step": 174006
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4795336723327637,
      "learning_rate": 8.441204699567179e-05,
      "loss": 2.7308,
      "step": 174007
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0391085147857666,
      "learning_rate": 8.440920245413408e-05,
      "loss": 2.8092,
      "step": 174008
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9279725551605225,
      "learning_rate": 8.440635795267867e-05,
      "loss": 2.9049,
      "step": 174009
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.211245536804199,
      "learning_rate": 8.440351349130625e-05,
      "loss": 2.9015,
      "step": 174010
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.484452247619629,
      "learning_rate": 8.440066907001725e-05,
      "loss": 2.8953,
      "step": 174011
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9637949466705322,
      "learning_rate": 8.439782468881227e-05,
      "loss": 3.0775,
      "step": 174012
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.631067991256714,
      "learning_rate": 8.439498034769184e-05,
      "loss": 2.8218,
      "step": 174013
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9981186389923096,
      "learning_rate": 8.439213604665644e-05,
      "loss": 2.8537,
      "step": 174014
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.220052480697632,
      "learning_rate": 8.438929178570655e-05,
      "loss": 2.7561,
      "step": 174015
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.1460933685302734,
      "learning_rate": 8.438644756484281e-05,
      "loss": 2.9514,
      "step": 174016
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.723087787628174,
      "learning_rate": 8.438360338406562e-05,
      "loss": 3.1429,
      "step": 174017
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.127180099487305,
      "learning_rate": 8.438075924337569e-05,
      "loss": 2.9268,
      "step": 174018
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.031016826629639,
      "learning_rate": 8.437791514277347e-05,
      "loss": 2.7755,
      "step": 174019
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.727924346923828,
      "learning_rate": 8.43750710822594e-05,
      "loss": 3.0051,
      "step": 174020
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.867356061935425,
      "learning_rate": 8.437222706183404e-05,
      "loss": 3.0221,
      "step": 174021
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.998870849609375,
      "learning_rate": 8.436938308149805e-05,
      "loss": 2.7792,
      "step": 174022
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.250755548477173,
      "learning_rate": 8.436653914125172e-05,
      "loss": 2.7438,
      "step": 174023
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.6039772033691406,
      "learning_rate": 8.436369524109585e-05,
      "loss": 3.094,
      "step": 174024
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7967920303344727,
      "learning_rate": 8.436085138103074e-05,
      "loss": 2.9925,
      "step": 174025
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3937292098999023,
      "learning_rate": 8.435800756105719e-05,
      "loss": 3.0041,
      "step": 174026
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8004252910614014,
      "learning_rate": 8.435516378117538e-05,
      "loss": 2.9694,
      "step": 174027
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.316453456878662,
      "learning_rate": 8.43523200413861e-05,
      "loss": 2.9798,
      "step": 174028
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8078219890594482,
      "learning_rate": 8.434947634168971e-05,
      "loss": 2.9118,
      "step": 174029
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2640953063964844,
      "learning_rate": 8.434663268208692e-05,
      "loss": 3.1323,
      "step": 174030
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2085506916046143,
      "learning_rate": 8.43437890625781e-05,
      "loss": 2.9115,
      "step": 174031
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.264439344406128,
      "learning_rate": 8.434094548316395e-05,
      "loss": 2.961,
      "step": 174032
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.408006191253662,
      "learning_rate": 8.433810194384476e-05,
      "loss": 2.8109,
      "step": 174033
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.275015354156494,
      "learning_rate": 8.433525844462127e-05,
      "loss": 2.9533,
      "step": 174034
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.518878698348999,
      "learning_rate": 8.433241498549385e-05,
      "loss": 2.8138,
      "step": 174035
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2715835571289062,
      "learning_rate": 8.432957156646314e-05,
      "loss": 3.0028,
      "step": 174036
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.28997278213501,
      "learning_rate": 8.432672818752961e-05,
      "loss": 2.8548,
      "step": 174037
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.215341091156006,
      "learning_rate": 8.432388484869396e-05,
      "loss": 3.0335,
      "step": 174038
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.759976863861084,
      "learning_rate": 8.432104154995638e-05,
      "loss": 3.1482,
      "step": 174039
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.35833477973938,
      "learning_rate": 8.431819829131772e-05,
      "loss": 2.899,
      "step": 174040
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.3231849670410156,
      "learning_rate": 8.431535507277825e-05,
      "loss": 3.083,
      "step": 174041
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.5941224098205566,
      "learning_rate": 8.431251189433873e-05,
      "loss": 2.8182,
      "step": 174042
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5086731910705566,
      "learning_rate": 8.430966875599947e-05,
      "loss": 2.7299,
      "step": 174043
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3115241527557373,
      "learning_rate": 8.430682565776133e-05,
      "loss": 3.2186,
      "step": 174044
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.551551342010498,
      "learning_rate": 8.430398259962445e-05,
      "loss": 3.0185,
      "step": 174045
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.901604175567627,
      "learning_rate": 8.430113958158959e-05,
      "loss": 2.9097,
      "step": 174046
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.53464937210083,
      "learning_rate": 8.429829660365716e-05,
      "loss": 2.848,
      "step": 174047
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.494318962097168,
      "learning_rate": 8.429545366582781e-05,
      "loss": 2.7233,
      "step": 174048
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.48336124420166,
      "learning_rate": 8.42926107681019e-05,
      "loss": 3.1373,
      "step": 174049
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.2715563774108887,
      "learning_rate": 8.42897679104802e-05,
      "loss": 3.0981,
      "step": 174050
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5083258152008057,
      "learning_rate": 8.428692509296307e-05,
      "loss": 2.8099,
      "step": 174051
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6402013301849365,
      "learning_rate": 8.42840823155511e-05,
      "loss": 2.8118,
      "step": 174052
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4800126552581787,
      "learning_rate": 8.428123957824468e-05,
      "loss": 3.1456,
      "step": 174053
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.7389960289001465,
      "learning_rate": 8.427839688104456e-05,
      "loss": 2.8826,
      "step": 174054
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.836198329925537,
      "learning_rate": 8.427555422395106e-05,
      "loss": 2.9467,
      "step": 174055
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4272491931915283,
      "learning_rate": 8.42727116069649e-05,
      "loss": 2.8794,
      "step": 174056
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.470759391784668,
      "learning_rate": 8.426986903008653e-05,
      "loss": 3.1908,
      "step": 174057
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.1513772010803223,
      "learning_rate": 8.426702649331642e-05,
      "loss": 3.0008,
      "step": 174058
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2321338653564453,
      "learning_rate": 8.426418399665508e-05,
      "loss": 2.8355,
      "step": 174059
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5820627212524414,
      "learning_rate": 8.426134154010318e-05,
      "loss": 2.8369,
      "step": 174060
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.832084894180298,
      "learning_rate": 8.42584991236611e-05,
      "loss": 3.2043,
      "step": 174061
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.32977032661438,
      "learning_rate": 8.425565674732949e-05,
      "loss": 2.8697,
      "step": 174062
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.976431369781494,
      "learning_rate": 8.425281441110887e-05,
      "loss": 3.0172,
      "step": 174063
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0376949310302734,
      "learning_rate": 8.42499721149996e-05,
      "loss": 3.0594,
      "step": 174064
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4289801120758057,
      "learning_rate": 8.424712985900244e-05,
      "loss": 2.9737,
      "step": 174065
    },
    {
      "epoch": 2.27,
      "grad_norm": 5.443687915802002,
      "learning_rate": 8.42442876431178e-05,
      "loss": 2.8632,
      "step": 174066
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.056478977203369,
      "learning_rate": 8.424144546734612e-05,
      "loss": 2.9956,
      "step": 174067
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.139204740524292,
      "learning_rate": 8.423860333168813e-05,
      "loss": 3.2107,
      "step": 174068
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4666037559509277,
      "learning_rate": 8.423576123614429e-05,
      "loss": 3.1464,
      "step": 174069
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.391911506652832,
      "learning_rate": 8.423291918071497e-05,
      "loss": 3.1911,
      "step": 174070
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.573889970779419,
      "learning_rate": 8.423007716540091e-05,
      "loss": 2.9925,
      "step": 174071
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.4151954650878906,
      "learning_rate": 8.422723519020256e-05,
      "loss": 3.0375,
      "step": 174072
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4485223293304443,
      "learning_rate": 8.422439325512037e-05,
      "loss": 2.9137,
      "step": 174073
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4942426681518555,
      "learning_rate": 8.422155136015498e-05,
      "loss": 2.9614,
      "step": 174074
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.471597671508789,
      "learning_rate": 8.421870950530694e-05,
      "loss": 3.0453,
      "step": 174075
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.408674716949463,
      "learning_rate": 8.421586769057657e-05,
      "loss": 3.186,
      "step": 174076
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6758766174316406,
      "learning_rate": 8.421302591596467e-05,
      "loss": 3.1032,
      "step": 174077
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.569547414779663,
      "learning_rate": 8.421018418147152e-05,
      "loss": 3.0132,
      "step": 174078
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.16544508934021,
      "learning_rate": 8.420734248709787e-05,
      "loss": 2.9237,
      "step": 174079
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7259955406188965,
      "learning_rate": 8.420450083284417e-05,
      "loss": 2.9274,
      "step": 174080
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3527231216430664,
      "learning_rate": 8.42016592187109e-05,
      "loss": 3.0055,
      "step": 174081
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.9244606494903564,
      "learning_rate": 8.419881764469855e-05,
      "loss": 3.2007,
      "step": 174082
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.028759002685547,
      "learning_rate": 8.41959761108078e-05,
      "loss": 3.076,
      "step": 174083
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.535428524017334,
      "learning_rate": 8.419313461703898e-05,
      "loss": 2.9826,
      "step": 174084
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.399517774581909,
      "learning_rate": 8.41902931633928e-05,
      "loss": 3.0305,
      "step": 174085
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.4728450775146484,
      "learning_rate": 8.418745174986977e-05,
      "loss": 2.9281,
      "step": 174086
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6930294036865234,
      "learning_rate": 8.418461037647035e-05,
      "loss": 3.0321,
      "step": 174087
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.563230276107788,
      "learning_rate": 8.418176904319498e-05,
      "loss": 2.9342,
      "step": 174088
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.836899995803833,
      "learning_rate": 8.417892775004438e-05,
      "loss": 2.9516,
      "step": 174089
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2130184173583984,
      "learning_rate": 8.417608649701894e-05,
      "loss": 2.8316,
      "step": 174090
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.703218698501587,
      "learning_rate": 8.417324528411928e-05,
      "loss": 2.9839,
      "step": 174091
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.4274208545684814,
      "learning_rate": 8.417040411134583e-05,
      "loss": 2.9918,
      "step": 174092
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.673182249069214,
      "learning_rate": 8.416756297869935e-05,
      "loss": 3.0195,
      "step": 174093
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4848828315734863,
      "learning_rate": 8.416472188618001e-05,
      "loss": 2.8087,
      "step": 174094
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3303914070129395,
      "learning_rate": 8.416188083378864e-05,
      "loss": 2.8371,
      "step": 174095
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.173739433288574,
      "learning_rate": 8.415903982152552e-05,
      "loss": 2.8034,
      "step": 174096
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7132270336151123,
      "learning_rate": 8.41561988493914e-05,
      "loss": 3.0906,
      "step": 174097
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.442364454269409,
      "learning_rate": 8.415335791738667e-05,
      "loss": 2.9981,
      "step": 174098
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6194348335266113,
      "learning_rate": 8.415051702551206e-05,
      "loss": 3.0096,
      "step": 174099
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3683650493621826,
      "learning_rate": 8.414767617376774e-05,
      "loss": 2.6934,
      "step": 174100
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.1724817752838135,
      "learning_rate": 8.414483536215456e-05,
      "loss": 2.767,
      "step": 174101
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.1089959144592285,
      "learning_rate": 8.414199459067281e-05,
      "loss": 2.9114,
      "step": 174102
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.486658811569214,
      "learning_rate": 8.413915385932326e-05,
      "loss": 3.0446,
      "step": 174103
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4083971977233887,
      "learning_rate": 8.41363131681062e-05,
      "loss": 2.7657,
      "step": 174104
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6415884494781494,
      "learning_rate": 8.413347251702249e-05,
      "loss": 3.0101,
      "step": 174105
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.5890886783599854,
      "learning_rate": 8.413063190607222e-05,
      "loss": 2.822,
      "step": 174106
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.920797824859619,
      "learning_rate": 8.412779133525625e-05,
      "loss": 2.867,
      "step": 174107
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4643030166625977,
      "learning_rate": 8.412495080457492e-05,
      "loss": 2.9416,
      "step": 174108
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.975595712661743,
      "learning_rate": 8.412211031402891e-05,
      "loss": 2.8522,
      "step": 174109
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.496107816696167,
      "learning_rate": 8.41192698636186e-05,
      "loss": 2.837,
      "step": 174110
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0799171924591064,
      "learning_rate": 8.411642945334479e-05,
      "loss": 3.0407,
      "step": 174111
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.4777724742889404,
      "learning_rate": 8.411358908320759e-05,
      "loss": 2.6064,
      "step": 174112
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.210187911987305,
      "learning_rate": 8.411074875320785e-05,
      "loss": 2.9133,
      "step": 174113
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9010133743286133,
      "learning_rate": 8.410790846334592e-05,
      "loss": 2.8437,
      "step": 174114
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3393959999084473,
      "learning_rate": 8.410506821362248e-05,
      "loss": 3.1292,
      "step": 174115
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.822382688522339,
      "learning_rate": 8.410222800403788e-05,
      "loss": 2.8272,
      "step": 174116
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.911527156829834,
      "learning_rate": 8.409938783459295e-05,
      "loss": 2.8263,
      "step": 174117
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.561880350112915,
      "learning_rate": 8.409654770528785e-05,
      "loss": 3.2439,
      "step": 174118
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.3039681911468506,
      "learning_rate": 8.409370761612338e-05,
      "loss": 2.942,
      "step": 174119
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9242050647735596,
      "learning_rate": 8.409086756709981e-05,
      "loss": 2.8412,
      "step": 174120
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.61647891998291,
      "learning_rate": 8.408802755821798e-05,
      "loss": 2.6993,
      "step": 174121
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.512453079223633,
      "learning_rate": 8.408518758947814e-05,
      "loss": 2.892,
      "step": 174122
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8137567043304443,
      "learning_rate": 8.408234766088112e-05,
      "loss": 2.9817,
      "step": 174123
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.969094753265381,
      "learning_rate": 8.407950777242708e-05,
      "loss": 2.963,
      "step": 174124
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.15397310256958,
      "learning_rate": 8.407666792411688e-05,
      "loss": 3.088,
      "step": 174125
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9611217975616455,
      "learning_rate": 8.407382811595076e-05,
      "loss": 3.06,
      "step": 174126
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8863015174865723,
      "learning_rate": 8.407098834792949e-05,
      "loss": 3.2149,
      "step": 174127
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.52204966545105,
      "learning_rate": 8.40681486200534e-05,
      "loss": 2.8212,
      "step": 174128
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8122429847717285,
      "learning_rate": 8.406530893232332e-05,
      "loss": 2.9879,
      "step": 174129
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6272881031036377,
      "learning_rate": 8.406246928473937e-05,
      "loss": 2.9967,
      "step": 174130
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8171048164367676,
      "learning_rate": 8.405962967730242e-05,
      "loss": 2.9952,
      "step": 174131
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.229449510574341,
      "learning_rate": 8.405679011001272e-05,
      "loss": 3.1233,
      "step": 174132
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.469714879989624,
      "learning_rate": 8.405395058287106e-05,
      "loss": 3.1209,
      "step": 174133
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.630474090576172,
      "learning_rate": 8.405111109587774e-05,
      "loss": 2.7881,
      "step": 174134
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4175097942352295,
      "learning_rate": 8.404827164903358e-05,
      "loss": 2.7832,
      "step": 174135
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8304755687713623,
      "learning_rate": 8.404543224233875e-05,
      "loss": 2.6943,
      "step": 174136
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0621423721313477,
      "learning_rate": 8.404259287579404e-05,
      "loss": 2.8665,
      "step": 174137
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6528337001800537,
      "learning_rate": 8.403975354939977e-05,
      "loss": 2.9744,
      "step": 174138
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5238661766052246,
      "learning_rate": 8.403691426315672e-05,
      "loss": 2.9154,
      "step": 174139
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5538198947906494,
      "learning_rate": 8.403407501706515e-05,
      "loss": 3.0825,
      "step": 174140
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4303579330444336,
      "learning_rate": 8.403123581112583e-05,
      "loss": 2.9517,
      "step": 174141
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.047684669494629,
      "learning_rate": 8.402839664533919e-05,
      "loss": 2.9265,
      "step": 174142
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.585012674331665,
      "learning_rate": 8.402555751970573e-05,
      "loss": 3.1404,
      "step": 174143
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6691439151763916,
      "learning_rate": 8.402271843422592e-05,
      "loss": 3.0202,
      "step": 174144
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.588649272918701,
      "learning_rate": 8.401987938890045e-05,
      "loss": 3.0347,
      "step": 174145
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.3172006607055664,
      "learning_rate": 8.401704038372968e-05,
      "loss": 2.954,
      "step": 174146
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4414260387420654,
      "learning_rate": 8.40142014187143e-05,
      "loss": 3.0094,
      "step": 174147
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.354466676712036,
      "learning_rate": 8.401136249385478e-05,
      "loss": 2.8012,
      "step": 174148
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6969945430755615,
      "learning_rate": 8.400852360915149e-05,
      "loss": 2.9128,
      "step": 174149
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5969314575195312,
      "learning_rate": 8.400568476460521e-05,
      "loss": 3.2178,
      "step": 174150
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.913933515548706,
      "learning_rate": 8.400284596021636e-05,
      "loss": 2.8459,
      "step": 174151
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7351877689361572,
      "learning_rate": 8.400000719598534e-05,
      "loss": 3.046,
      "step": 174152
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6924898624420166,
      "learning_rate": 8.39971684719129e-05,
      "loss": 3.2346,
      "step": 174153
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2061221599578857,
      "learning_rate": 8.399432978799948e-05,
      "loss": 3.2926,
      "step": 174154
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2915313243865967,
      "learning_rate": 8.399149114424549e-05,
      "loss": 2.7509,
      "step": 174155
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4107449054718018,
      "learning_rate": 8.398865254065164e-05,
      "loss": 2.9517,
      "step": 174156
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9748950004577637,
      "learning_rate": 8.398581397721837e-05,
      "loss": 3.0124,
      "step": 174157
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.3088457584381104,
      "learning_rate": 8.398297545394613e-05,
      "loss": 3.0391,
      "step": 174158
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7976105213165283,
      "learning_rate": 8.398013697083564e-05,
      "loss": 2.9226,
      "step": 174159
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5124125480651855,
      "learning_rate": 8.397729852788733e-05,
      "loss": 3.0878,
      "step": 174160
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.1118571758270264,
      "learning_rate": 8.397446012510159e-05,
      "loss": 3.1147,
      "step": 174161
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.951342821121216,
      "learning_rate": 8.397162176247918e-05,
      "loss": 2.78,
      "step": 174162
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5376381874084473,
      "learning_rate": 8.396878344002045e-05,
      "loss": 3.0701,
      "step": 174163
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7646641731262207,
      "learning_rate": 8.396594515772605e-05,
      "loss": 2.9125,
      "step": 174164
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.226449012756348,
      "learning_rate": 8.39631069155965e-05,
      "loss": 2.9539,
      "step": 174165
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8890883922576904,
      "learning_rate": 8.396026871363228e-05,
      "loss": 2.8705,
      "step": 174166
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7377781867980957,
      "learning_rate": 8.395743055183383e-05,
      "loss": 3.1404,
      "step": 174167
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.54468035697937,
      "learning_rate": 8.395459243020186e-05,
      "loss": 3.0637,
      "step": 174168
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.403446912765503,
      "learning_rate": 8.395175434873672e-05,
      "loss": 2.85,
      "step": 174169
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.889108896255493,
      "learning_rate": 8.394891630743912e-05,
      "loss": 2.9529,
      "step": 174170
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.5847151279449463,
      "learning_rate": 8.394607830630938e-05,
      "loss": 3.1542,
      "step": 174171
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3806684017181396,
      "learning_rate": 8.394324034534835e-05,
      "loss": 3.1888,
      "step": 174172
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8871614933013916,
      "learning_rate": 8.394040242455615e-05,
      "loss": 2.8693,
      "step": 174173
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.9712393283843994,
      "learning_rate": 8.393756454393362e-05,
      "loss": 2.942,
      "step": 174174
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8436508178710938,
      "learning_rate": 8.39347267034811e-05,
      "loss": 2.8826,
      "step": 174175
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5576441287994385,
      "learning_rate": 8.393188890319922e-05,
      "loss": 3.111,
      "step": 174176
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6745247840881348,
      "learning_rate": 8.392905114308844e-05,
      "loss": 2.7971,
      "step": 174177
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.4659180641174316,
      "learning_rate": 8.39262134231495e-05,
      "loss": 2.8447,
      "step": 174178
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7345595359802246,
      "learning_rate": 8.392337574338256e-05,
      "loss": 2.8364,
      "step": 174179
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5736806392669678,
      "learning_rate": 8.392053810378847e-05,
      "loss": 2.9571,
      "step": 174180
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.40004563331604,
      "learning_rate": 8.39177005043675e-05,
      "loss": 3.0176,
      "step": 174181
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.365817070007324,
      "learning_rate": 8.391486294512043e-05,
      "loss": 2.8114,
      "step": 174182
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.4920520782470703,
      "learning_rate": 8.391202542604756e-05,
      "loss": 3.1385,
      "step": 174183
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.346331834793091,
      "learning_rate": 8.390918794714972e-05,
      "loss": 3.068,
      "step": 174184
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8758578300476074,
      "learning_rate": 8.390635050842703e-05,
      "loss": 2.833,
      "step": 174185
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.478348731994629,
      "learning_rate": 8.390351310988032e-05,
      "loss": 2.6467,
      "step": 174186
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.1457536220550537,
      "learning_rate": 8.390067575150996e-05,
      "loss": 2.823,
      "step": 174187
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8165464401245117,
      "learning_rate": 8.389783843331663e-05,
      "loss": 3.1164,
      "step": 174188
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.349590539932251,
      "learning_rate": 8.389500115530066e-05,
      "loss": 3.0231,
      "step": 174189
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5469441413879395,
      "learning_rate": 8.38921639174629e-05,
      "loss": 3.0485,
      "step": 174190
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.772416114807129,
      "learning_rate": 8.388932671980346e-05,
      "loss": 2.761,
      "step": 174191
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.297976493835449,
      "learning_rate": 8.388648956232316e-05,
      "loss": 3.0674,
      "step": 174192
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4292564392089844,
      "learning_rate": 8.388365244502238e-05,
      "loss": 3.0021,
      "step": 174193
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.146002769470215,
      "learning_rate": 8.388081536790178e-05,
      "loss": 2.8029,
      "step": 174194
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.29415225982666,
      "learning_rate": 8.387797833096173e-05,
      "loss": 2.9692,
      "step": 174195
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5584158897399902,
      "learning_rate": 8.387514133420303e-05,
      "loss": 3.0906,
      "step": 174196
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5298256874084473,
      "learning_rate": 8.387230437762583e-05,
      "loss": 2.6564,
      "step": 174197
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5273895263671875,
      "learning_rate": 8.386946746123093e-05,
      "loss": 2.9085,
      "step": 174198
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.001908779144287,
      "learning_rate": 8.386663058501869e-05,
      "loss": 2.7086,
      "step": 174199
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.938237190246582,
      "learning_rate": 8.386379374898984e-05,
      "loss": 2.8951,
      "step": 174200
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.750322103500366,
      "learning_rate": 8.386095695314467e-05,
      "loss": 2.9552,
      "step": 174201
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.6027774810791016,
      "learning_rate": 8.385812019748404e-05,
      "loss": 2.9029,
      "step": 174202
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5680246353149414,
      "learning_rate": 8.385528348200805e-05,
      "loss": 2.8026,
      "step": 174203
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.6018710136413574,
      "learning_rate": 8.385244680671756e-05,
      "loss": 3.2537,
      "step": 174204
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.2768425941467285,
      "learning_rate": 8.384961017161285e-05,
      "loss": 3.0746,
      "step": 174205
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4630277156829834,
      "learning_rate": 8.38467735766947e-05,
      "loss": 2.8132,
      "step": 174206
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7919232845306396,
      "learning_rate": 8.384393702196343e-05,
      "loss": 2.7498,
      "step": 174207
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.717827320098877,
      "learning_rate": 8.384110050741981e-05,
      "loss": 2.6819,
      "step": 174208
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0356943607330322,
      "learning_rate": 8.383826403306405e-05,
      "loss": 2.8987,
      "step": 174209
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3382604122161865,
      "learning_rate": 8.383542759889691e-05,
      "loss": 2.9021,
      "step": 174210
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7580878734588623,
      "learning_rate": 8.383259120491875e-05,
      "loss": 3.0042,
      "step": 174211
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.878737688064575,
      "learning_rate": 8.382975485113032e-05,
      "loss": 3.0447,
      "step": 174212
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.319694757461548,
      "learning_rate": 8.382691853753189e-05,
      "loss": 2.9387,
      "step": 174213
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.6869969367980957,
      "learning_rate": 8.382408226412432e-05,
      "loss": 2.7967,
      "step": 174214
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4425177574157715,
      "learning_rate": 8.382124603090775e-05,
      "loss": 2.8474,
      "step": 174215
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.1928439140319824,
      "learning_rate": 8.381840983788297e-05,
      "loss": 3.0383,
      "step": 174216
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6864330768585205,
      "learning_rate": 8.381557368505035e-05,
      "loss": 2.771,
      "step": 174217
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0038866996765137,
      "learning_rate": 8.381273757241058e-05,
      "loss": 3.0037,
      "step": 174218
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.670780897140503,
      "learning_rate": 8.380990149996399e-05,
      "loss": 2.9199,
      "step": 174219
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.7306065559387207,
      "learning_rate": 8.380706546771146e-05,
      "loss": 3.1681,
      "step": 174220
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.6533238887786865,
      "learning_rate": 8.380422947565302e-05,
      "loss": 2.9074,
      "step": 174221
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5747289657592773,
      "learning_rate": 8.380139352378961e-05,
      "loss": 3.1465,
      "step": 174222
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0024666786193848,
      "learning_rate": 8.379855761212149e-05,
      "loss": 2.8001,
      "step": 174223
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.133967638015747,
      "learning_rate": 8.379572174064936e-05,
      "loss": 2.8492,
      "step": 174224
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.417267084121704,
      "learning_rate": 8.379288590937362e-05,
      "loss": 3.0153,
      "step": 174225
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4650251865386963,
      "learning_rate": 8.379005011829497e-05,
      "loss": 2.7359,
      "step": 174226
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.958174705505371,
      "learning_rate": 8.378721436741382e-05,
      "loss": 3.1071,
      "step": 174227
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4424240589141846,
      "learning_rate": 8.378437865673072e-05,
      "loss": 2.7377,
      "step": 174228
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.1701817512512207,
      "learning_rate": 8.378154298624607e-05,
      "loss": 2.761,
      "step": 174229
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.539933681488037,
      "learning_rate": 8.377870735596061e-05,
      "loss": 2.6796,
      "step": 174230
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2169222831726074,
      "learning_rate": 8.377587176587467e-05,
      "loss": 3.1276,
      "step": 174231
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0194883346557617,
      "learning_rate": 8.377303621598899e-05,
      "loss": 2.7779,
      "step": 174232
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.892240524291992,
      "learning_rate": 8.377020070630397e-05,
      "loss": 2.8441,
      "step": 174233
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4361486434936523,
      "learning_rate": 8.376736523682016e-05,
      "loss": 2.7632,
      "step": 174234
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9811348915100098,
      "learning_rate": 8.376452980753799e-05,
      "loss": 3.0243,
      "step": 174235
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5031802654266357,
      "learning_rate": 8.376169441845817e-05,
      "loss": 2.9318,
      "step": 174236
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.960981607437134,
      "learning_rate": 8.375885906958102e-05,
      "loss": 2.8538,
      "step": 174237
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5701587200164795,
      "learning_rate": 8.375602376090728e-05,
      "loss": 2.7385,
      "step": 174238
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.169017791748047,
      "learning_rate": 8.375318849243738e-05,
      "loss": 2.6554,
      "step": 174239
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4083430767059326,
      "learning_rate": 8.375035326417174e-05,
      "loss": 2.7644,
      "step": 174240
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.527635335922241,
      "learning_rate": 8.374751807611111e-05,
      "loss": 2.948,
      "step": 174241
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4831507205963135,
      "learning_rate": 8.37446829282559e-05,
      "loss": 2.8269,
      "step": 174242
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7089931964874268,
      "learning_rate": 8.374184782060652e-05,
      "loss": 2.9823,
      "step": 174243
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.827834129333496,
      "learning_rate": 8.373901275316371e-05,
      "loss": 2.8361,
      "step": 174244
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.6568994522094727,
      "learning_rate": 8.373617772592791e-05,
      "loss": 2.7825,
      "step": 174245
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.721774101257324,
      "learning_rate": 8.373334273889955e-05,
      "loss": 2.824,
      "step": 174246
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.875136137008667,
      "learning_rate": 8.373050779207931e-05,
      "loss": 2.7529,
      "step": 174247
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.160742998123169,
      "learning_rate": 8.372767288546755e-05,
      "loss": 3.0912,
      "step": 174248
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.517345428466797,
      "learning_rate": 8.372483801906503e-05,
      "loss": 2.9768,
      "step": 174249
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.155311107635498,
      "learning_rate": 8.372200319287211e-05,
      "loss": 3.0481,
      "step": 174250
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0932772159576416,
      "learning_rate": 8.371916840688939e-05,
      "loss": 2.9862,
      "step": 174251
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8633930683135986,
      "learning_rate": 8.371633366111725e-05,
      "loss": 3.1174,
      "step": 174252
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.668246030807495,
      "learning_rate": 8.371349895555639e-05,
      "loss": 2.5726,
      "step": 174253
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.781222105026245,
      "learning_rate": 8.37106642902072e-05,
      "loss": 2.876,
      "step": 174254
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.554147481918335,
      "learning_rate": 8.370782966507039e-05,
      "loss": 3.1727,
      "step": 174255
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.459862470626831,
      "learning_rate": 8.370499508014637e-05,
      "loss": 3.1008,
      "step": 174256
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5988271236419678,
      "learning_rate": 8.370216053543569e-05,
      "loss": 3.1739,
      "step": 174257
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7203569412231445,
      "learning_rate": 8.369932603093874e-05,
      "loss": 2.7474,
      "step": 174258
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4915812015533447,
      "learning_rate": 8.369649156665626e-05,
      "loss": 2.9624,
      "step": 174259
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.181654930114746,
      "learning_rate": 8.36936571425886e-05,
      "loss": 2.853,
      "step": 174260
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.266683578491211,
      "learning_rate": 8.369082275873647e-05,
      "loss": 3.0258,
      "step": 174261
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.534822702407837,
      "learning_rate": 8.368798841510021e-05,
      "loss": 2.9058,
      "step": 174262
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7531871795654297,
      "learning_rate": 8.368515411168062e-05,
      "loss": 2.8105,
      "step": 174263
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0675156116485596,
      "learning_rate": 8.368231984847785e-05,
      "loss": 2.9224,
      "step": 174264
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.523959159851074,
      "learning_rate": 8.367948562549271e-05,
      "loss": 2.8112,
      "step": 174265
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9181134700775146,
      "learning_rate": 8.367665144272557e-05,
      "loss": 2.9412,
      "step": 174266
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2844467163085938,
      "learning_rate": 8.367381730017709e-05,
      "loss": 2.9983,
      "step": 174267
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.732255697250366,
      "learning_rate": 8.367098319784764e-05,
      "loss": 2.964,
      "step": 174268
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.051602840423584,
      "learning_rate": 8.366814913573802e-05,
      "loss": 2.8758,
      "step": 174269
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.178504943847656,
      "learning_rate": 8.366531511384842e-05,
      "loss": 3.0274,
      "step": 174270
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.913761615753174,
      "learning_rate": 8.366248113217959e-05,
      "loss": 2.7298,
      "step": 174271
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8865325450897217,
      "learning_rate": 8.365964719073187e-05,
      "loss": 3.0845,
      "step": 174272
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.923311233520508,
      "learning_rate": 8.365681328950605e-05,
      "loss": 2.7437,
      "step": 174273
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.682948589324951,
      "learning_rate": 8.365397942850239e-05,
      "loss": 3.1222,
      "step": 174274
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.954449415206909,
      "learning_rate": 8.365114560772172e-05,
      "loss": 3.0983,
      "step": 174275
    },
    {
      "epoch": 2.27,
      "grad_norm": 7.4023919105529785,
      "learning_rate": 8.364831182716421e-05,
      "loss": 2.915,
      "step": 174276
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.159999132156372,
      "learning_rate": 8.364547808683067e-05,
      "loss": 2.8812,
      "step": 174277
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.058016777038574,
      "learning_rate": 8.36426443867214e-05,
      "loss": 2.7886,
      "step": 174278
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.20698618888855,
      "learning_rate": 8.363981072683716e-05,
      "loss": 2.8084,
      "step": 174279
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.5565025806427,
      "learning_rate": 8.363697710717825e-05,
      "loss": 3.1664,
      "step": 174280
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.261056661605835,
      "learning_rate": 8.363414352774552e-05,
      "loss": 3.0783,
      "step": 174281
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5186777114868164,
      "learning_rate": 8.363130998853908e-05,
      "loss": 2.7668,
      "step": 174282
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9505460262298584,
      "learning_rate": 8.362847648955977e-05,
      "loss": 3.1493,
      "step": 174283
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5176331996917725,
      "learning_rate": 8.362564303080791e-05,
      "loss": 3.0504,
      "step": 174284
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.630589246749878,
      "learning_rate": 8.362280961228423e-05,
      "loss": 2.8954,
      "step": 174285
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7839303016662598,
      "learning_rate": 8.361997623398905e-05,
      "loss": 2.9451,
      "step": 174286
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.403682231903076,
      "learning_rate": 8.361714289592312e-05,
      "loss": 2.7609,
      "step": 174287
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.432687759399414,
      "learning_rate": 8.361430959808681e-05,
      "loss": 2.8127,
      "step": 174288
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.294322967529297,
      "learning_rate": 8.361147634048071e-05,
      "loss": 2.7819,
      "step": 174289
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.085685968399048,
      "learning_rate": 8.36086431231052e-05,
      "loss": 3.1333,
      "step": 174290
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7510268688201904,
      "learning_rate": 8.360580994596105e-05,
      "loss": 2.9369,
      "step": 174291
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.240158796310425,
      "learning_rate": 8.360297680904857e-05,
      "loss": 2.8771,
      "step": 174292
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2816812992095947,
      "learning_rate": 8.360014371236845e-05,
      "loss": 3.0349,
      "step": 174293
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.301278591156006,
      "learning_rate": 8.359731065592117e-05,
      "loss": 2.8456,
      "step": 174294
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.7847142219543457,
      "learning_rate": 8.359447763970722e-05,
      "loss": 2.838,
      "step": 174295
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.585890769958496,
      "learning_rate": 8.359164466372708e-05,
      "loss": 2.9712,
      "step": 174296
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.708030939102173,
      "learning_rate": 8.35888117279814e-05,
      "loss": 3.0159,
      "step": 174297
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.62227201461792,
      "learning_rate": 8.358597883247056e-05,
      "loss": 2.883,
      "step": 174298
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4599931240081787,
      "learning_rate": 8.358314597719526e-05,
      "loss": 2.9106,
      "step": 174299
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8243985176086426,
      "learning_rate": 8.358031316215595e-05,
      "loss": 2.8256,
      "step": 174300
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.745875835418701,
      "learning_rate": 8.357748038735312e-05,
      "loss": 2.7998,
      "step": 174301
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8502213954925537,
      "learning_rate": 8.357464765278727e-05,
      "loss": 2.9257,
      "step": 174302
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4063832759857178,
      "learning_rate": 8.357181495845904e-05,
      "loss": 3.0089,
      "step": 174303
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.3087120056152344,
      "learning_rate": 8.356898230436881e-05,
      "loss": 2.9501,
      "step": 174304
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6291399002075195,
      "learning_rate": 8.35661496905173e-05,
      "loss": 2.9352,
      "step": 174305
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0015358924865723,
      "learning_rate": 8.35633171169049e-05,
      "loss": 2.9881,
      "step": 174306
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2852048873901367,
      "learning_rate": 8.356048458353219e-05,
      "loss": 3.0476,
      "step": 174307
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.33261775970459,
      "learning_rate": 8.355765209039957e-05,
      "loss": 2.8871,
      "step": 174308
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.878631114959717,
      "learning_rate": 8.355481963750775e-05,
      "loss": 2.8604,
      "step": 174309
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.2078278064727783,
      "learning_rate": 8.355198722485709e-05,
      "loss": 2.8419,
      "step": 174310
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7903738021850586,
      "learning_rate": 8.354915485244831e-05,
      "loss": 2.9286,
      "step": 174311
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9045701026916504,
      "learning_rate": 8.35463225202818e-05,
      "loss": 3.2238,
      "step": 174312
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.409069061279297,
      "learning_rate": 8.354349022835811e-05,
      "loss": 2.9629,
      "step": 174313
    },
    {
      "epoch": 2.27,
      "grad_norm": 5.059879302978516,
      "learning_rate": 8.354065797667773e-05,
      "loss": 3.1984,
      "step": 174314
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.374861717224121,
      "learning_rate": 8.353782576524128e-05,
      "loss": 3.1334,
      "step": 174315
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.119205474853516,
      "learning_rate": 8.353499359404915e-05,
      "loss": 2.9647,
      "step": 174316
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4242100715637207,
      "learning_rate": 8.353216146310202e-05,
      "loss": 3.0049,
      "step": 174317
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4719204902648926,
      "learning_rate": 8.352932937240037e-05,
      "loss": 2.7477,
      "step": 174318
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.552076578140259,
      "learning_rate": 8.352649732194472e-05,
      "loss": 3.0008,
      "step": 174319
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.787000894546509,
      "learning_rate": 8.352366531173544e-05,
      "loss": 2.8231,
      "step": 174320
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2081971168518066,
      "learning_rate": 8.352083334177334e-05,
      "loss": 2.7934,
      "step": 174321
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.338064193725586,
      "learning_rate": 8.351800141205867e-05,
      "loss": 2.9666,
      "step": 174322
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.157938241958618,
      "learning_rate": 8.351516952259221e-05,
      "loss": 3.1135,
      "step": 174323
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.89974308013916,
      "learning_rate": 8.351233767337437e-05,
      "loss": 2.9494,
      "step": 174324
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.419757843017578,
      "learning_rate": 8.350950586440558e-05,
      "loss": 2.9974,
      "step": 174325
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3021233081817627,
      "learning_rate": 8.350667409568653e-05,
      "loss": 2.9063,
      "step": 174326
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0670573711395264,
      "learning_rate": 8.350384236721769e-05,
      "loss": 3.1003,
      "step": 174327
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8083455562591553,
      "learning_rate": 8.350101067899949e-05,
      "loss": 2.9727,
      "step": 174328
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3245604038238525,
      "learning_rate": 8.349817903103261e-05,
      "loss": 2.7257,
      "step": 174329
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8153457641601562,
      "learning_rate": 8.34953474233175e-05,
      "loss": 2.7808,
      "step": 174330
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.577697277069092,
      "learning_rate": 8.349251585585463e-05,
      "loss": 3.0335,
      "step": 174331
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.730376958847046,
      "learning_rate": 8.348968432864467e-05,
      "loss": 2.7741,
      "step": 174332
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.01754093170166,
      "learning_rate": 8.348685284168797e-05,
      "loss": 2.8755,
      "step": 174333
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.456902265548706,
      "learning_rate": 8.348402139498523e-05,
      "loss": 2.9896,
      "step": 174334
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.2967681884765625,
      "learning_rate": 8.34811899885369e-05,
      "loss": 2.9869,
      "step": 174335
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.9421186447143555,
      "learning_rate": 8.34783586223435e-05,
      "loss": 2.8854,
      "step": 174336
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6329333782196045,
      "learning_rate": 8.34755272964055e-05,
      "loss": 3.2188,
      "step": 174337
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.38504958152771,
      "learning_rate": 8.347269601072356e-05,
      "loss": 2.8611,
      "step": 174338
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.501490354537964,
      "learning_rate": 8.346986476529802e-05,
      "loss": 2.7398,
      "step": 174339
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.387470245361328,
      "learning_rate": 8.346703356012964e-05,
      "loss": 3.0257,
      "step": 174340
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7031731605529785,
      "learning_rate": 8.34642023952188e-05,
      "loss": 2.9049,
      "step": 174341
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.695258378982544,
      "learning_rate": 8.34613712705661e-05,
      "loss": 2.7882,
      "step": 174342
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.1037817001342773,
      "learning_rate": 8.345854018617188e-05,
      "loss": 2.833,
      "step": 174343
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3738694190979004,
      "learning_rate": 8.345570914203691e-05,
      "loss": 2.7638,
      "step": 174344
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.58573842048645,
      "learning_rate": 8.345287813816152e-05,
      "loss": 2.7539,
      "step": 174345
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3123064041137695,
      "learning_rate": 8.345004717454644e-05,
      "loss": 2.9185,
      "step": 174346
    },
    {
      "epoch": 2.27,
      "grad_norm": 5.460965156555176,
      "learning_rate": 8.344721625119196e-05,
      "loss": 2.8961,
      "step": 174347
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.8037750720977783,
      "learning_rate": 8.344438536809892e-05,
      "loss": 2.9103,
      "step": 174348
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2801859378814697,
      "learning_rate": 8.344155452526748e-05,
      "loss": 2.8732,
      "step": 174349
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5166499614715576,
      "learning_rate": 8.343872372269843e-05,
      "loss": 3.0435,
      "step": 174350
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6213231086730957,
      "learning_rate": 8.343589296039213e-05,
      "loss": 2.7562,
      "step": 174351
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.9666497707366943,
      "learning_rate": 8.343306223834925e-05,
      "loss": 2.9837,
      "step": 174352
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.279850959777832,
      "learning_rate": 8.343023155657018e-05,
      "loss": 2.6987,
      "step": 174353
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.576601028442383,
      "learning_rate": 8.342740091505563e-05,
      "loss": 2.9366,
      "step": 174354
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.779308557510376,
      "learning_rate": 8.342457031380597e-05,
      "loss": 3.0169,
      "step": 174355
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4696128368377686,
      "learning_rate": 8.34217397528218e-05,
      "loss": 3.0026,
      "step": 174356
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.555788040161133,
      "learning_rate": 8.341890923210353e-05,
      "loss": 2.9195,
      "step": 174357
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.870762825012207,
      "learning_rate": 8.341607875165184e-05,
      "loss": 3.1292,
      "step": 174358
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5623903274536133,
      "learning_rate": 8.34132483114671e-05,
      "loss": 2.9334,
      "step": 174359
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5152759552001953,
      "learning_rate": 8.341041791155007e-05,
      "loss": 3.026,
      "step": 174360
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.258720874786377,
      "learning_rate": 8.340758755190108e-05,
      "loss": 2.8339,
      "step": 174361
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6971523761749268,
      "learning_rate": 8.340475723252071e-05,
      "loss": 3.0169,
      "step": 174362
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2489233016967773,
      "learning_rate": 8.34019269534094e-05,
      "loss": 2.9323,
      "step": 174363
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.384305953979492,
      "learning_rate": 8.339909671456785e-05,
      "loss": 2.889,
      "step": 174364
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.44437837600708,
      "learning_rate": 8.339626651599642e-05,
      "loss": 3.0236,
      "step": 174365
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5293126106262207,
      "learning_rate": 8.339343635769576e-05,
      "loss": 3.0898,
      "step": 174366
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.468329668045044,
      "learning_rate": 8.33906062396664e-05,
      "loss": 3.0629,
      "step": 174367
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.394340753555298,
      "learning_rate": 8.33877761619088e-05,
      "loss": 2.743,
      "step": 174368
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.755051851272583,
      "learning_rate": 8.338494612442341e-05,
      "loss": 2.8685,
      "step": 174369
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.516547441482544,
      "learning_rate": 8.338211612721092e-05,
      "loss": 3.2561,
      "step": 174370
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.596076488494873,
      "learning_rate": 8.337928617027171e-05,
      "loss": 2.9896,
      "step": 174371
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.282386302947998,
      "learning_rate": 8.337645625360647e-05,
      "loss": 2.8957,
      "step": 174372
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.348564386367798,
      "learning_rate": 8.337362637721566e-05,
      "loss": 2.8481,
      "step": 174373
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.161604881286621,
      "learning_rate": 8.337079654109972e-05,
      "loss": 3.0331,
      "step": 174374
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4530606269836426,
      "learning_rate": 8.33679667452592e-05,
      "loss": 2.8503,
      "step": 174375
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6647050380706787,
      "learning_rate": 8.336513698969472e-05,
      "loss": 3.296,
      "step": 174376
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.8123159408569336,
      "learning_rate": 8.33623072744067e-05,
      "loss": 3.2387,
      "step": 174377
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6170315742492676,
      "learning_rate": 8.335947759939577e-05,
      "loss": 3.0476,
      "step": 174378
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8156516551971436,
      "learning_rate": 8.335664796466244e-05,
      "loss": 3.1075,
      "step": 174379
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.657968759536743,
      "learning_rate": 8.335381837020716e-05,
      "loss": 2.9957,
      "step": 174380
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6222753524780273,
      "learning_rate": 8.335098881603043e-05,
      "loss": 2.9332,
      "step": 174381
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7687363624572754,
      "learning_rate": 8.334815930213292e-05,
      "loss": 2.8448,
      "step": 174382
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.6483864784240723,
      "learning_rate": 8.334532982851497e-05,
      "loss": 3.0254,
      "step": 174383
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9541878700256348,
      "learning_rate": 8.334250039517731e-05,
      "loss": 3.0604,
      "step": 174384
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.850395679473877,
      "learning_rate": 8.33396710021204e-05,
      "loss": 2.8301,
      "step": 174385
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.722008228302002,
      "learning_rate": 8.33368416493447e-05,
      "loss": 3.0129,
      "step": 174386
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7438931465148926,
      "learning_rate": 8.333401233685071e-05,
      "loss": 2.8409,
      "step": 174387
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.446084499359131,
      "learning_rate": 8.33311830646391e-05,
      "loss": 2.8829,
      "step": 174388
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9962210655212402,
      "learning_rate": 8.332835383271019e-05,
      "loss": 2.8833,
      "step": 174389
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.286285638809204,
      "learning_rate": 8.332552464106477e-05,
      "loss": 2.9968,
      "step": 174390
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.755751609802246,
      "learning_rate": 8.332269548970318e-05,
      "loss": 2.9116,
      "step": 174391
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.810683250427246,
      "learning_rate": 8.3319866378626e-05,
      "loss": 3.3074,
      "step": 174392
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.7833523750305176,
      "learning_rate": 8.331703730783368e-05,
      "loss": 2.8095,
      "step": 174393
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.388097286224365,
      "learning_rate": 8.331420827732689e-05,
      "loss": 2.9734,
      "step": 174394
    },
    {
      "epoch": 2.27,
      "grad_norm": 5.696774959564209,
      "learning_rate": 8.331137928710601e-05,
      "loss": 2.7382,
      "step": 174395
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.422102212905884,
      "learning_rate": 8.33085503371717e-05,
      "loss": 2.905,
      "step": 174396
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.4354381561279297,
      "learning_rate": 8.330572142752443e-05,
      "loss": 2.7423,
      "step": 174397
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.238020181655884,
      "learning_rate": 8.33028925581647e-05,
      "loss": 2.712,
      "step": 174398
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.319042682647705,
      "learning_rate": 8.330006372909299e-05,
      "loss": 2.7766,
      "step": 174399
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.362210750579834,
      "learning_rate": 8.329723494030994e-05,
      "loss": 2.6302,
      "step": 174400
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.657510995864868,
      "learning_rate": 8.329440619181596e-05,
      "loss": 2.7993,
      "step": 174401
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7774288654327393,
      "learning_rate": 8.329157748361171e-05,
      "loss": 2.9382,
      "step": 174402
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.1026194095611572,
      "learning_rate": 8.328874881569769e-05,
      "loss": 2.8221,
      "step": 174403
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6318564414978027,
      "learning_rate": 8.328592018807435e-05,
      "loss": 3.0861,
      "step": 174404
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.5007193088531494,
      "learning_rate": 8.328309160074216e-05,
      "loss": 3.0377,
      "step": 174405
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6134655475616455,
      "learning_rate": 8.328026305370183e-05,
      "loss": 3.0456,
      "step": 174406
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.664367198944092,
      "learning_rate": 8.327743454695371e-05,
      "loss": 2.8227,
      "step": 174407
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.3913495540618896,
      "learning_rate": 8.32746060804985e-05,
      "loss": 2.9265,
      "step": 174408
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.791688919067383,
      "learning_rate": 8.327177765433662e-05,
      "loss": 3.0004,
      "step": 174409
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3694264888763428,
      "learning_rate": 8.32689492684685e-05,
      "loss": 3.0423,
      "step": 174410
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.814915895462036,
      "learning_rate": 8.326612092289489e-05,
      "loss": 3.0854,
      "step": 174411
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5905325412750244,
      "learning_rate": 8.326329261761618e-05,
      "loss": 2.9691,
      "step": 174412
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.604231357574463,
      "learning_rate": 8.326046435263283e-05,
      "loss": 2.9505,
      "step": 174413
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7087483406066895,
      "learning_rate": 8.325763612794555e-05,
      "loss": 2.9786,
      "step": 174414
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.832427501678467,
      "learning_rate": 8.325480794355474e-05,
      "loss": 2.9691,
      "step": 174415
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7154488563537598,
      "learning_rate": 8.325197979946089e-05,
      "loss": 2.9699,
      "step": 174416
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4151809215545654,
      "learning_rate": 8.324915169566467e-05,
      "loss": 2.8761,
      "step": 174417
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.960007905960083,
      "learning_rate": 8.324632363216653e-05,
      "loss": 2.8652,
      "step": 174418
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.229130744934082,
      "learning_rate": 8.32434956089669e-05,
      "loss": 3.0745,
      "step": 174419
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.567629337310791,
      "learning_rate": 8.324066762606649e-05,
      "loss": 2.9806,
      "step": 174420
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.422232151031494,
      "learning_rate": 8.323783968346562e-05,
      "loss": 2.9832,
      "step": 174421
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9574368000030518,
      "learning_rate": 8.323501178116503e-05,
      "loss": 2.8511,
      "step": 174422
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5350186824798584,
      "learning_rate": 8.323218391916513e-05,
      "loss": 2.8687,
      "step": 174423
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.306684970855713,
      "learning_rate": 8.322935609746639e-05,
      "loss": 2.6789,
      "step": 174424
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.574836254119873,
      "learning_rate": 8.322652831606949e-05,
      "loss": 2.8733,
      "step": 174425
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4966557025909424,
      "learning_rate": 8.322370057497488e-05,
      "loss": 2.9254,
      "step": 174426
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.564645767211914,
      "learning_rate": 8.322087287418295e-05,
      "loss": 2.9237,
      "step": 174427
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.741084575653076,
      "learning_rate": 8.321804521369446e-05,
      "loss": 2.8719,
      "step": 174428
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4872076511383057,
      "learning_rate": 8.321521759350983e-05,
      "loss": 2.6623,
      "step": 174429
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.503709554672241,
      "learning_rate": 8.321239001362952e-05,
      "loss": 2.8795,
      "step": 174430
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.270965814590454,
      "learning_rate": 8.320956247405419e-05,
      "loss": 3.0179,
      "step": 174431
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8701467514038086,
      "learning_rate": 8.32067349747842e-05,
      "loss": 3.0202,
      "step": 174432
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.085528373718262,
      "learning_rate": 8.320390751582023e-05,
      "loss": 2.9293,
      "step": 174433
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.2940924167633057,
      "learning_rate": 8.32010800971628e-05,
      "loss": 2.837,
      "step": 174434
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5228497982025146,
      "learning_rate": 8.319825271881235e-05,
      "loss": 3.0617,
      "step": 174435
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.374927282333374,
      "learning_rate": 8.319542538076934e-05,
      "loss": 2.935,
      "step": 174436
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.274904489517212,
      "learning_rate": 8.31925980830345e-05,
      "loss": 3.038,
      "step": 174437
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6164472103118896,
      "learning_rate": 8.318977082560816e-05,
      "loss": 3.1898,
      "step": 174438
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.579845666885376,
      "learning_rate": 8.318694360849101e-05,
      "loss": 2.8879,
      "step": 174439
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0874452590942383,
      "learning_rate": 8.318411643168352e-05,
      "loss": 2.9267,
      "step": 174440
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.9934797286987305,
      "learning_rate": 8.318128929518617e-05,
      "loss": 2.7375,
      "step": 174441
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5682599544525146,
      "learning_rate": 8.317846219899945e-05,
      "loss": 3.1047,
      "step": 174442
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.489300489425659,
      "learning_rate": 8.317563514312402e-05,
      "loss": 3.0635,
      "step": 174443
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.7823684215545654,
      "learning_rate": 8.317280812756024e-05,
      "loss": 2.8166,
      "step": 174444
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.3453314304351807,
      "learning_rate": 8.316998115230882e-05,
      "loss": 2.8565,
      "step": 174445
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6528050899505615,
      "learning_rate": 8.316715421737021e-05,
      "loss": 3.0117,
      "step": 174446
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.507498025894165,
      "learning_rate": 8.31643273227449e-05,
      "loss": 3.1257,
      "step": 174447
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.46998929977417,
      "learning_rate": 8.316150046843336e-05,
      "loss": 2.9738,
      "step": 174448
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6092357635498047,
      "learning_rate": 8.315867365443625e-05,
      "loss": 2.803,
      "step": 174449
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3572189807891846,
      "learning_rate": 8.315584688075399e-05,
      "loss": 2.8739,
      "step": 174450
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6326372623443604,
      "learning_rate": 8.315302014738721e-05,
      "loss": 2.7917,
      "step": 174451
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.391418695449829,
      "learning_rate": 8.315019345433637e-05,
      "loss": 2.9547,
      "step": 174452
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.882638931274414,
      "learning_rate": 8.314736680160203e-05,
      "loss": 2.9028,
      "step": 174453
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.514838933944702,
      "learning_rate": 8.314454018918458e-05,
      "loss": 2.9545,
      "step": 174454
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8802261352539062,
      "learning_rate": 8.314171361708476e-05,
      "loss": 2.679,
      "step": 174455
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7112414836883545,
      "learning_rate": 8.313888708530287e-05,
      "loss": 2.6914,
      "step": 174456
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9680919647216797,
      "learning_rate": 8.313606059383968e-05,
      "loss": 2.9396,
      "step": 174457
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.944032907485962,
      "learning_rate": 8.313323414269555e-05,
      "loss": 2.9619,
      "step": 174458
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.673851251602173,
      "learning_rate": 8.313040773187106e-05,
      "loss": 3.0957,
      "step": 174459
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.404824733734131,
      "learning_rate": 8.312758136136666e-05,
      "loss": 2.7185,
      "step": 174460
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5584816932678223,
      "learning_rate": 8.3124755031183e-05,
      "loss": 2.9033,
      "step": 174461
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2202606201171875,
      "learning_rate": 8.312192874132045e-05,
      "loss": 2.8038,
      "step": 174462
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.73760724067688,
      "learning_rate": 8.311910249177973e-05,
      "loss": 3.0652,
      "step": 174463
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.16945743560791,
      "learning_rate": 8.311627628256126e-05,
      "loss": 3.024,
      "step": 174464
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6811156272888184,
      "learning_rate": 8.311345011366555e-05,
      "loss": 2.8071,
      "step": 174465
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.3721792697906494,
      "learning_rate": 8.311062398509306e-05,
      "loss": 3.0019,
      "step": 174466
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9744038581848145,
      "learning_rate": 8.31077978968445e-05,
      "loss": 2.9768,
      "step": 174467
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2591419219970703,
      "learning_rate": 8.31049718489202e-05,
      "loss": 3.053,
      "step": 174468
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8342552185058594,
      "learning_rate": 8.31021458413209e-05,
      "loss": 2.8157,
      "step": 174469
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.453040599822998,
      "learning_rate": 8.309931987404698e-05,
      "loss": 2.9386,
      "step": 174470
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7128772735595703,
      "learning_rate": 8.309649394709897e-05,
      "loss": 2.876,
      "step": 174471
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9534108638763428,
      "learning_rate": 8.309366806047735e-05,
      "loss": 3.1086,
      "step": 174472
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8474957942962646,
      "learning_rate": 8.30908422141828e-05,
      "loss": 3.0215,
      "step": 174473
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0220041275024414,
      "learning_rate": 8.308801640821564e-05,
      "loss": 3.072,
      "step": 174474
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8442883491516113,
      "learning_rate": 8.308519064257663e-05,
      "loss": 2.9386,
      "step": 174475
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.705172061920166,
      "learning_rate": 8.308236491726619e-05,
      "loss": 3.1919,
      "step": 174476
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4955031871795654,
      "learning_rate": 8.307953923228482e-05,
      "loss": 3.0706,
      "step": 174477
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.588168144226074,
      "learning_rate": 8.307671358763296e-05,
      "loss": 3.01,
      "step": 174478
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.688582181930542,
      "learning_rate": 8.307388798331131e-05,
      "loss": 2.66,
      "step": 174479
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.490076780319214,
      "learning_rate": 8.307106241932026e-05,
      "loss": 3.0422,
      "step": 174480
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.471489429473877,
      "learning_rate": 8.306823689566048e-05,
      "loss": 2.8283,
      "step": 174481
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6801106929779053,
      "learning_rate": 8.306541141233238e-05,
      "loss": 3.0188,
      "step": 174482
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.051091432571411,
      "learning_rate": 8.306258596933654e-05,
      "loss": 2.8465,
      "step": 174483
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.4214494228363037,
      "learning_rate": 8.305976056667337e-05,
      "loss": 2.9355,
      "step": 174484
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5032989978790283,
      "learning_rate": 8.305693520434357e-05,
      "loss": 2.9225,
      "step": 174485
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.610262393951416,
      "learning_rate": 8.305410988234749e-05,
      "loss": 3.0132,
      "step": 174486
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9188201427459717,
      "learning_rate": 8.305128460068583e-05,
      "loss": 2.7351,
      "step": 174487
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.226689577102661,
      "learning_rate": 8.304845935935895e-05,
      "loss": 3.059,
      "step": 174488
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.875504493713379,
      "learning_rate": 8.304563415836765e-05,
      "loss": 3.0662,
      "step": 174489
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.853670835494995,
      "learning_rate": 8.304280899771208e-05,
      "loss": 3.0394,
      "step": 174490
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.5382938385009766,
      "learning_rate": 8.303998387739298e-05,
      "loss": 2.9191,
      "step": 174491
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.1355173587799072,
      "learning_rate": 8.30371587974108e-05,
      "loss": 3.2272,
      "step": 174492
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.513538360595703,
      "learning_rate": 8.303433375776622e-05,
      "loss": 3.0253,
      "step": 174493
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4842188358306885,
      "learning_rate": 8.303150875845954e-05,
      "loss": 3.0084,
      "step": 174494
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.289759874343872,
      "learning_rate": 8.30286837994915e-05,
      "loss": 3.1004,
      "step": 174495
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.4184443950653076,
      "learning_rate": 8.302585888086251e-05,
      "loss": 2.7063,
      "step": 174496
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.7654693126678467,
      "learning_rate": 8.302303400257309e-05,
      "loss": 2.8308,
      "step": 174497
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6425940990448,
      "learning_rate": 8.302020916462369e-05,
      "loss": 3.1413,
      "step": 174498
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4948019981384277,
      "learning_rate": 8.301738436701507e-05,
      "loss": 2.923,
      "step": 174499
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.4724793434143066,
      "learning_rate": 8.301455960974747e-05,
      "loss": 3.0319,
      "step": 174500
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.780085563659668,
      "learning_rate": 8.301173489282168e-05,
      "loss": 3.0115,
      "step": 174501
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.813347816467285,
      "learning_rate": 8.300891021623812e-05,
      "loss": 2.9475,
      "step": 174502
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9741103649139404,
      "learning_rate": 8.300608557999727e-05,
      "loss": 3.1194,
      "step": 174503
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0615057945251465,
      "learning_rate": 8.30032609840996e-05,
      "loss": 2.8306,
      "step": 174504
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.969978094100952,
      "learning_rate": 8.300043642854581e-05,
      "loss": 3.23,
      "step": 174505
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9211483001708984,
      "learning_rate": 8.299761191333626e-05,
      "loss": 3.1137,
      "step": 174506
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.7320151329040527,
      "learning_rate": 8.299478743847163e-05,
      "loss": 2.7741,
      "step": 174507
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.788482666015625,
      "learning_rate": 8.299196300395237e-05,
      "loss": 3.028,
      "step": 174508
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.111125946044922,
      "learning_rate": 8.298913860977889e-05,
      "loss": 3.0206,
      "step": 174509
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.753344774246216,
      "learning_rate": 8.298631425595194e-05,
      "loss": 2.9225,
      "step": 174510
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.613595485687256,
      "learning_rate": 8.298348994247193e-05,
      "loss": 2.9782,
      "step": 174511
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6209590435028076,
      "learning_rate": 8.298066566933927e-05,
      "loss": 3.2215,
      "step": 174512
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8259124755859375,
      "learning_rate": 8.29778414365547e-05,
      "loss": 2.7665,
      "step": 174513
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.3665380477905273,
      "learning_rate": 8.297501724411865e-05,
      "loss": 2.9989,
      "step": 174514
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0262978076934814,
      "learning_rate": 8.297219309203156e-05,
      "loss": 2.9648,
      "step": 174515
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3463809490203857,
      "learning_rate": 8.296936898029412e-05,
      "loss": 2.6302,
      "step": 174516
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.047067403793335,
      "learning_rate": 8.296654490890678e-05,
      "loss": 2.9071,
      "step": 174517
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5620391368865967,
      "learning_rate": 8.296372087786993e-05,
      "loss": 2.7299,
      "step": 174518
    },
    {
      "epoch": 2.27,
      "grad_norm": 5.034482479095459,
      "learning_rate": 8.296089688718437e-05,
      "loss": 2.8453,
      "step": 174519
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.412182569503784,
      "learning_rate": 8.295807293685042e-05,
      "loss": 3.1315,
      "step": 174520
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.841254472732544,
      "learning_rate": 8.29552490268686e-05,
      "loss": 2.6634,
      "step": 174521
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7497377395629883,
      "learning_rate": 8.295242515723957e-05,
      "loss": 2.9264,
      "step": 174522
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.169890880584717,
      "learning_rate": 8.294960132796374e-05,
      "loss": 2.774,
      "step": 174523
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8195645809173584,
      "learning_rate": 8.294677753904171e-05,
      "loss": 2.8584,
      "step": 174524
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.720900774002075,
      "learning_rate": 8.2943953790474e-05,
      "loss": 3.0249,
      "step": 174525
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.449449062347412,
      "learning_rate": 8.294113008226109e-05,
      "loss": 3.0652,
      "step": 174526
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.316098690032959,
      "learning_rate": 8.293830641440342e-05,
      "loss": 2.9033,
      "step": 174527
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.421492099761963,
      "learning_rate": 8.293548278690174e-05,
      "loss": 3.1132,
      "step": 174528
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3443965911865234,
      "learning_rate": 8.293265919975634e-05,
      "loss": 2.9978,
      "step": 174529
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.4884212017059326,
      "learning_rate": 8.292983565296797e-05,
      "loss": 2.7104,
      "step": 174530
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.849536895751953,
      "learning_rate": 8.2927012146537e-05,
      "loss": 2.721,
      "step": 174531
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7278892993927,
      "learning_rate": 8.292418868046403e-05,
      "loss": 3.0795,
      "step": 174532
    },
    {
      "epoch": 2.27,
      "grad_norm": 5.22711706161499,
      "learning_rate": 8.292136525474943e-05,
      "loss": 2.7195,
      "step": 174533
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.7096662521362305,
      "learning_rate": 8.291854186939397e-05,
      "loss": 2.9015,
      "step": 174534
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.716041326522827,
      "learning_rate": 8.291571852439793e-05,
      "loss": 2.8527,
      "step": 174535
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.048309087753296,
      "learning_rate": 8.291289521976208e-05,
      "loss": 2.8599,
      "step": 174536
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.582629919052124,
      "learning_rate": 8.29100719554868e-05,
      "loss": 2.8506,
      "step": 174537
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.924440383911133,
      "learning_rate": 8.290724873157264e-05,
      "loss": 2.8948,
      "step": 174538
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.73758864402771,
      "learning_rate": 8.290442554802003e-05,
      "loss": 2.9854,
      "step": 174539
    },
    {
      "epoch": 2.27,
      "grad_norm": 5.260893821716309,
      "learning_rate": 8.290160240482968e-05,
      "loss": 2.8904,
      "step": 174540
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.903501033782959,
      "learning_rate": 8.289877930200191e-05,
      "loss": 3.0106,
      "step": 174541
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.599247932434082,
      "learning_rate": 8.289595623953748e-05,
      "loss": 3.0564,
      "step": 174542
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.759901523590088,
      "learning_rate": 8.289313321743676e-05,
      "loss": 2.8566,
      "step": 174543
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4134037494659424,
      "learning_rate": 8.289031023570032e-05,
      "loss": 2.99,
      "step": 174544
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7151644229888916,
      "learning_rate": 8.288748729432855e-05,
      "loss": 3.1404,
      "step": 174545
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5686137676239014,
      "learning_rate": 8.28846643933222e-05,
      "loss": 3.21,
      "step": 174546
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.520066499710083,
      "learning_rate": 8.288184153268164e-05,
      "loss": 2.8573,
      "step": 174547
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4405970573425293,
      "learning_rate": 8.287901871240747e-05,
      "loss": 2.9833,
      "step": 174548
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.856344223022461,
      "learning_rate": 8.287619593250022e-05,
      "loss": 3.0082,
      "step": 174549
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.385202646255493,
      "learning_rate": 8.287337319296041e-05,
      "loss": 2.817,
      "step": 174550
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6122331619262695,
      "learning_rate": 8.287055049378841e-05,
      "loss": 2.9445,
      "step": 174551
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4435312747955322,
      "learning_rate": 8.286772783498498e-05,
      "loss": 2.9621,
      "step": 174552
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4610936641693115,
      "learning_rate": 8.286490521655044e-05,
      "loss": 2.9861,
      "step": 174553
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.20939564704895,
      "learning_rate": 8.28620826384855e-05,
      "loss": 2.8353,
      "step": 174554
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.5274112224578857,
      "learning_rate": 8.285926010079053e-05,
      "loss": 3.1117,
      "step": 174555
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.291378974914551,
      "learning_rate": 8.285643760346626e-05,
      "loss": 2.8421,
      "step": 174556
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8811447620391846,
      "learning_rate": 8.285361514651294e-05,
      "loss": 2.839,
      "step": 174557
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.2293894290924072,
      "learning_rate": 8.28507927299313e-05,
      "loss": 2.9451,
      "step": 174558
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0436487197875977,
      "learning_rate": 8.284797035372172e-05,
      "loss": 2.7614,
      "step": 174559
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.759280204772949,
      "learning_rate": 8.284514801788488e-05,
      "loss": 2.9402,
      "step": 174560
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8334755897521973,
      "learning_rate": 8.284232572242114e-05,
      "loss": 2.9449,
      "step": 174561
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.309203624725342,
      "learning_rate": 8.283950346733129e-05,
      "loss": 2.937,
      "step": 174562
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.346209764480591,
      "learning_rate": 8.28366812526155e-05,
      "loss": 2.7172,
      "step": 174563
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6565239429473877,
      "learning_rate": 8.283385907827454e-05,
      "loss": 3.1803,
      "step": 174564
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.7276387214660645,
      "learning_rate": 8.283103694430881e-05,
      "loss": 3.0254,
      "step": 174565
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6486339569091797,
      "learning_rate": 8.282821485071896e-05,
      "loss": 2.9933,
      "step": 174566
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5214648246765137,
      "learning_rate": 8.282539279750536e-05,
      "loss": 2.9004,
      "step": 174567
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.6441757678985596,
      "learning_rate": 8.282257078466877e-05,
      "loss": 3.0421,
      "step": 174568
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2298712730407715,
      "learning_rate": 8.281974881220943e-05,
      "loss": 2.9747,
      "step": 174569
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.9659342765808105,
      "learning_rate": 8.281692688012806e-05,
      "loss": 3.1202,
      "step": 174570
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6676037311553955,
      "learning_rate": 8.281410498842504e-05,
      "loss": 2.8813,
      "step": 174571
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4925434589385986,
      "learning_rate": 8.281128313710108e-05,
      "loss": 2.8425,
      "step": 174572
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.6254069805145264,
      "learning_rate": 8.280846132615646e-05,
      "loss": 2.9813,
      "step": 174573
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.7791690826416016,
      "learning_rate": 8.280563955559208e-05,
      "loss": 2.9991,
      "step": 174574
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4747934341430664,
      "learning_rate": 8.280281782540805e-05,
      "loss": 3.0948,
      "step": 174575
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.25311541557312,
      "learning_rate": 8.279999613560513e-05,
      "loss": 2.9905,
      "step": 174576
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9205880165100098,
      "learning_rate": 8.279717448618375e-05,
      "loss": 2.8621,
      "step": 174577
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9119181632995605,
      "learning_rate": 8.279435287714453e-05,
      "loss": 2.7382,
      "step": 174578
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.80338716506958,
      "learning_rate": 8.279153130848785e-05,
      "loss": 2.9529,
      "step": 174579
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.2761428356170654,
      "learning_rate": 8.278870978021452e-05,
      "loss": 2.9768,
      "step": 174580
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.821103811264038,
      "learning_rate": 8.278588829232467e-05,
      "loss": 3.1248,
      "step": 174581
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.295701265335083,
      "learning_rate": 8.278306684481913e-05,
      "loss": 2.9561,
      "step": 174582
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.880462646484375,
      "learning_rate": 8.278024543769823e-05,
      "loss": 3.056,
      "step": 174583
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.431246519088745,
      "learning_rate": 8.277742407096266e-05,
      "loss": 2.6218,
      "step": 174584
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7759711742401123,
      "learning_rate": 8.277460274461278e-05,
      "loss": 3.0646,
      "step": 174585
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3749380111694336,
      "learning_rate": 8.27717814586493e-05,
      "loss": 3.0233,
      "step": 174586
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9806935787200928,
      "learning_rate": 8.276896021307265e-05,
      "loss": 2.8678,
      "step": 174587
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2625486850738525,
      "learning_rate": 8.276613900788334e-05,
      "loss": 2.7885,
      "step": 174588
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5342390537261963,
      "learning_rate": 8.276331784308182e-05,
      "loss": 3.0262,
      "step": 174589
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4998433589935303,
      "learning_rate": 8.276049671866877e-05,
      "loss": 2.8902,
      "step": 174590
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5390403270721436,
      "learning_rate": 8.275767563464458e-05,
      "loss": 2.7903,
      "step": 174591
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.239867687225342,
      "learning_rate": 8.275485459100992e-05,
      "loss": 3.1183,
      "step": 174592
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.157870292663574,
      "learning_rate": 8.275203358776526e-05,
      "loss": 2.9523,
      "step": 174593
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.3415157794952393,
      "learning_rate": 8.274921262491098e-05,
      "loss": 2.9354,
      "step": 174594
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7895641326904297,
      "learning_rate": 8.274639170244779e-05,
      "loss": 2.8994,
      "step": 174595
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.484262228012085,
      "learning_rate": 8.274357082037619e-05,
      "loss": 3.1819,
      "step": 174596
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.516836404800415,
      "learning_rate": 8.274074997869655e-05,
      "loss": 2.9234,
      "step": 174597
    },
    {
      "epoch": 2.27,
      "grad_norm": 5.931891918182373,
      "learning_rate": 8.27379291774096e-05,
      "loss": 2.91,
      "step": 174598
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.6133956909179688,
      "learning_rate": 8.273510841651577e-05,
      "loss": 2.8469,
      "step": 174599
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3708906173706055,
      "learning_rate": 8.27322876960155e-05,
      "loss": 2.8105,
      "step": 174600
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.731417655944824,
      "learning_rate": 8.272946701590948e-05,
      "loss": 3.3263,
      "step": 174601
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.167797803878784,
      "learning_rate": 8.272664637619815e-05,
      "loss": 3.0184,
      "step": 174602
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.7344555854797363,
      "learning_rate": 8.272382577688193e-05,
      "loss": 3.1658,
      "step": 174603
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0165882110595703,
      "learning_rate": 8.272100521796158e-05,
      "loss": 3.1357,
      "step": 174604
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6184849739074707,
      "learning_rate": 8.271818469943747e-05,
      "loss": 3.2056,
      "step": 174605
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.386838436126709,
      "learning_rate": 8.271536422131009e-05,
      "loss": 2.81,
      "step": 174606
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.1221728324890137,
      "learning_rate": 8.271254378358009e-05,
      "loss": 2.9489,
      "step": 174607
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.980623722076416,
      "learning_rate": 8.270972338624785e-05,
      "loss": 2.8137,
      "step": 174608
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7454464435577393,
      "learning_rate": 8.270690302931408e-05,
      "loss": 2.9092,
      "step": 174609
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4308853149414062,
      "learning_rate": 8.270408271277916e-05,
      "loss": 2.8987,
      "step": 174610
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6447746753692627,
      "learning_rate": 8.270126243664367e-05,
      "loss": 2.9899,
      "step": 174611
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.2352242469787598,
      "learning_rate": 8.269844220090803e-05,
      "loss": 3.1507,
      "step": 174612
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9685916900634766,
      "learning_rate": 8.269562200557296e-05,
      "loss": 3.1239,
      "step": 174613
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.004746198654175,
      "learning_rate": 8.269280185063874e-05,
      "loss": 3.0528,
      "step": 174614
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.2550883293151855,
      "learning_rate": 8.268998173610615e-05,
      "loss": 2.9161,
      "step": 174615
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.497945785522461,
      "learning_rate": 8.26871616619756e-05,
      "loss": 2.9164,
      "step": 174616
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6730737686157227,
      "learning_rate": 8.268434162824759e-05,
      "loss": 3.0841,
      "step": 174617
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7658610343933105,
      "learning_rate": 8.268152163492261e-05,
      "loss": 2.8687,
      "step": 174618
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8010268211364746,
      "learning_rate": 8.26787016820013e-05,
      "loss": 3.0685,
      "step": 174619
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.102034330368042,
      "learning_rate": 8.267588176948404e-05,
      "loss": 2.9719,
      "step": 174620
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.3605549335479736,
      "learning_rate": 8.267306189737149e-05,
      "loss": 2.9035,
      "step": 174621
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3425838947296143,
      "learning_rate": 8.267024206566406e-05,
      "loss": 2.6971,
      "step": 174622
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4559600353240967,
      "learning_rate": 8.266742227436254e-05,
      "loss": 2.9165,
      "step": 174623
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.829864978790283,
      "learning_rate": 8.266460252346703e-05,
      "loss": 2.7753,
      "step": 174624
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5727994441986084,
      "learning_rate": 8.266178281297837e-05,
      "loss": 2.9159,
      "step": 174625
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6487233638763428,
      "learning_rate": 8.26589631428969e-05,
      "loss": 2.8611,
      "step": 174626
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.837186098098755,
      "learning_rate": 8.265614351322332e-05,
      "loss": 3.0044,
      "step": 174627
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3338260650634766,
      "learning_rate": 8.265332392395799e-05,
      "loss": 2.9801,
      "step": 174628
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.572324275970459,
      "learning_rate": 8.265050437510166e-05,
      "loss": 2.7552,
      "step": 174629
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6619973182678223,
      "learning_rate": 8.264768486665455e-05,
      "loss": 2.9771,
      "step": 174630
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7804911136627197,
      "learning_rate": 8.264486539861745e-05,
      "loss": 2.7677,
      "step": 174631
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6128978729248047,
      "learning_rate": 8.264204597099064e-05,
      "loss": 3.0735,
      "step": 174632
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.5395116806030273,
      "learning_rate": 8.263922658377489e-05,
      "loss": 2.8391,
      "step": 174633
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.383775472640991,
      "learning_rate": 8.26364072369705e-05,
      "loss": 3.1072,
      "step": 174634
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.1377272605895996,
      "learning_rate": 8.263358793057829e-05,
      "loss": 3.1173,
      "step": 174635
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.143235206604004,
      "learning_rate": 8.263076866459843e-05,
      "loss": 2.8189,
      "step": 174636
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.77927565574646,
      "learning_rate": 8.262794943903169e-05,
      "loss": 2.9633,
      "step": 174637
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6822123527526855,
      "learning_rate": 8.262513025387843e-05,
      "loss": 3.3564,
      "step": 174638
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.590487241744995,
      "learning_rate": 8.262231110913938e-05,
      "loss": 3.0255,
      "step": 174639
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3477511405944824,
      "learning_rate": 8.261949200481484e-05,
      "loss": 2.7943,
      "step": 174640
    },
    {
      "epoch": 2.27,
      "grad_norm": 5.2131452560424805,
      "learning_rate": 8.261667294090559e-05,
      "loss": 2.9093,
      "step": 174641
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.625842094421387,
      "learning_rate": 8.261385391741185e-05,
      "loss": 2.8303,
      "step": 174642
    },
    {
      "epoch": 2.27,
      "grad_norm": 6.198440074920654,
      "learning_rate": 8.261103493433438e-05,
      "loss": 2.7035,
      "step": 174643
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2194836139678955,
      "learning_rate": 8.260821599167355e-05,
      "loss": 2.7801,
      "step": 174644
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3291430473327637,
      "learning_rate": 8.260539708943002e-05,
      "loss": 2.9579,
      "step": 174645
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.9899089336395264,
      "learning_rate": 8.26025782276042e-05,
      "loss": 2.9161,
      "step": 174646
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0381877422332764,
      "learning_rate": 8.259975940619681e-05,
      "loss": 3.1745,
      "step": 174647
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.737598896026611,
      "learning_rate": 8.259694062520805e-05,
      "loss": 2.9966,
      "step": 174648
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.499885559082031,
      "learning_rate": 8.259412188463874e-05,
      "loss": 3.208,
      "step": 174649
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.85504412651062,
      "learning_rate": 8.259130318448915e-05,
      "loss": 2.9481,
      "step": 174650
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.194375514984131,
      "learning_rate": 8.258848452476005e-05,
      "loss": 2.8624,
      "step": 174651
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.362548589706421,
      "learning_rate": 8.258566590545177e-05,
      "loss": 2.9399,
      "step": 174652
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0250837802886963,
      "learning_rate": 8.258284732656512e-05,
      "loss": 2.7383,
      "step": 174653
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0956690311431885,
      "learning_rate": 8.258002878810023e-05,
      "loss": 3.1222,
      "step": 174654
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7911438941955566,
      "learning_rate": 8.257721029005787e-05,
      "loss": 2.9757,
      "step": 174655
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.7815768718719482,
      "learning_rate": 8.257439183243844e-05,
      "loss": 3.0129,
      "step": 174656
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5964226722717285,
      "learning_rate": 8.257157341524265e-05,
      "loss": 3.0461,
      "step": 174657
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.535585641860962,
      "learning_rate": 8.256875503847082e-05,
      "loss": 2.6326,
      "step": 174658
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8515844345092773,
      "learning_rate": 8.256593670212372e-05,
      "loss": 2.9373,
      "step": 174659
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3162436485290527,
      "learning_rate": 8.256311840620155e-05,
      "loss": 2.7302,
      "step": 174660
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.841804027557373,
      "learning_rate": 8.256030015070508e-05,
      "loss": 2.703,
      "step": 174661
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5840201377868652,
      "learning_rate": 8.255748193563467e-05,
      "loss": 2.8767,
      "step": 174662
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2659852504730225,
      "learning_rate": 8.255466376099102e-05,
      "loss": 2.9007,
      "step": 174663
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5888547897338867,
      "learning_rate": 8.255184562677447e-05,
      "loss": 2.7808,
      "step": 174664
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5518808364868164,
      "learning_rate": 8.254902753298582e-05,
      "loss": 2.9926,
      "step": 174665
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.844604730606079,
      "learning_rate": 8.254620947962521e-05,
      "loss": 3.033,
      "step": 174666
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.167569398880005,
      "learning_rate": 8.254339146669348e-05,
      "loss": 2.9404,
      "step": 174667
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.037863254547119,
      "learning_rate": 8.254057349419093e-05,
      "loss": 2.9324,
      "step": 174668
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.3243532180786133,
      "learning_rate": 8.253775556211828e-05,
      "loss": 2.6512,
      "step": 174669
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.705565929412842,
      "learning_rate": 8.253493767047588e-05,
      "loss": 2.838,
      "step": 174670
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6482508182525635,
      "learning_rate": 8.253211981926446e-05,
      "loss": 2.8975,
      "step": 174671
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.708493232727051,
      "learning_rate": 8.25293020084844e-05,
      "loss": 2.9768,
      "step": 174672
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.384528160095215,
      "learning_rate": 8.252648423813626e-05,
      "loss": 2.8968,
      "step": 174673
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.3159372806549072,
      "learning_rate": 8.252366650822044e-05,
      "loss": 3.0864,
      "step": 174674
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8424856662750244,
      "learning_rate": 8.25208488187377e-05,
      "loss": 2.8205,
      "step": 174675
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7474632263183594,
      "learning_rate": 8.251803116968832e-05,
      "loss": 3.0176,
      "step": 174676
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.836094617843628,
      "learning_rate": 8.251521356107302e-05,
      "loss": 2.9405,
      "step": 174677
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.691431760787964,
      "learning_rate": 8.251239599289228e-05,
      "loss": 3.0731,
      "step": 174678
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4882922172546387,
      "learning_rate": 8.250957846514657e-05,
      "loss": 3.0154,
      "step": 174679
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.293147325515747,
      "learning_rate": 8.250676097783635e-05,
      "loss": 2.8361,
      "step": 174680
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.264906883239746,
      "learning_rate": 8.250394353096231e-05,
      "loss": 2.7149,
      "step": 174681
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.380676746368408,
      "learning_rate": 8.250112612452483e-05,
      "loss": 2.8732,
      "step": 174682
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7972347736358643,
      "learning_rate": 8.249830875852456e-05,
      "loss": 2.8443,
      "step": 174683
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.774218797683716,
      "learning_rate": 8.2495491432962e-05,
      "loss": 3.0714,
      "step": 174684
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.770831823348999,
      "learning_rate": 8.24926741478375e-05,
      "loss": 2.9634,
      "step": 174685
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.763188123703003,
      "learning_rate": 8.248985690315183e-05,
      "loss": 2.8327,
      "step": 174686
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.3566110134124756,
      "learning_rate": 8.248703969890538e-05,
      "loss": 2.7837,
      "step": 174687
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.551002025604248,
      "learning_rate": 8.248422253509862e-05,
      "loss": 2.7679,
      "step": 174688
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.990325450897217,
      "learning_rate": 8.248140541173223e-05,
      "loss": 3.0999,
      "step": 174689
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.667471170425415,
      "learning_rate": 8.247858832880666e-05,
      "loss": 2.7554,
      "step": 174690
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.429802656173706,
      "learning_rate": 8.247577128632234e-05,
      "loss": 2.9245,
      "step": 174691
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.943720817565918,
      "learning_rate": 8.247295428427998e-05,
      "loss": 2.8459,
      "step": 174692
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.121896743774414,
      "learning_rate": 8.24701373226799e-05,
      "loss": 2.9798,
      "step": 174693
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.656946659088135,
      "learning_rate": 8.24673204015228e-05,
      "loss": 2.9255,
      "step": 174694
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8105220794677734,
      "learning_rate": 8.246450352080914e-05,
      "loss": 3.0687,
      "step": 174695
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.715055227279663,
      "learning_rate": 8.246168668053948e-05,
      "loss": 2.8832,
      "step": 174696
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.8195853233337402,
      "learning_rate": 8.245886988071415e-05,
      "loss": 2.7479,
      "step": 174697
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.8745453357696533,
      "learning_rate": 8.245605312133395e-05,
      "loss": 2.8696,
      "step": 174698
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.908018112182617,
      "learning_rate": 8.245323640239915e-05,
      "loss": 3.0245,
      "step": 174699
    },
    {
      "epoch": 2.27,
      "grad_norm": 4.4653191566467285,
      "learning_rate": 8.245041972391053e-05,
      "loss": 3.0157,
      "step": 174700
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.6686506271362305,
      "learning_rate": 8.244760308586847e-05,
      "loss": 3.0974,
      "step": 174701
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.90487003326416,
      "learning_rate": 8.24447864882735e-05,
      "loss": 2.8731,
      "step": 174702
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.20851731300354,
      "learning_rate": 8.244196993112606e-05,
      "loss": 2.8891,
      "step": 174703
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.1165034770965576,
      "learning_rate": 8.243915341442686e-05,
      "loss": 2.9418,
      "step": 174704
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.9308950901031494,
      "learning_rate": 8.243633693817623e-05,
      "loss": 3.0616,
      "step": 174705
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.610199451446533,
      "learning_rate": 8.24335205023749e-05,
      "loss": 3.0313,
      "step": 174706
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.431380271911621,
      "learning_rate": 8.243070410702316e-05,
      "loss": 3.0502,
      "step": 174707
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2523133754730225,
      "learning_rate": 8.242788775212187e-05,
      "loss": 3.1714,
      "step": 174708
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.192044734954834,
      "learning_rate": 8.242507143767115e-05,
      "loss": 3.0787,
      "step": 174709
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.6633222103118896,
      "learning_rate": 8.242225516367181e-05,
      "loss": 2.9084,
      "step": 174710
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.946244478225708,
      "learning_rate": 8.241943893012422e-05,
      "loss": 2.7832,
      "step": 174711
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.2949321269989014,
      "learning_rate": 8.2416622737029e-05,
      "loss": 2.8905,
      "step": 174712
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.4729020595550537,
      "learning_rate": 8.241380658438657e-05,
      "loss": 2.7285,
      "step": 174713
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.5179874897003174,
      "learning_rate": 8.24109904721977e-05,
      "loss": 3.3442,
      "step": 174714
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.662391424179077,
      "learning_rate": 8.240817440046256e-05,
      "loss": 2.9309,
      "step": 174715
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.208042621612549,
      "learning_rate": 8.240535836918194e-05,
      "loss": 3.1273,
      "step": 174716
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.715409278869629,
      "learning_rate": 8.240254237835619e-05,
      "loss": 2.8632,
      "step": 174717
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0817296504974365,
      "learning_rate": 8.239972642798597e-05,
      "loss": 2.8178,
      "step": 174718
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.594999074935913,
      "learning_rate": 8.23969105180717e-05,
      "loss": 2.8789,
      "step": 174719
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.2904555797576904,
      "learning_rate": 8.239409464861411e-05,
      "loss": 2.7249,
      "step": 174720
    },
    {
      "epoch": 2.27,
      "grad_norm": 3.0165891647338867,
      "learning_rate": 8.239127881961338e-05,
      "loss": 3.0537,
      "step": 174721
    },
    {
      "epoch": 2.27,
      "grad_norm": 2.7449333667755127,
      "learning_rate": 8.238846303107031e-05,
      "loss": 3.1518,
      "step": 174722
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.5107479095458984,
      "learning_rate": 8.238564728298526e-05,
      "loss": 2.7479,
      "step": 174723
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.667893648147583,
      "learning_rate": 8.23828315753589e-05,
      "loss": 2.9029,
      "step": 174724
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.035111904144287,
      "learning_rate": 8.238001590819157e-05,
      "loss": 2.9047,
      "step": 174725
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5421411991119385,
      "learning_rate": 8.237720028148412e-05,
      "loss": 2.8648,
      "step": 174726
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.502697229385376,
      "learning_rate": 8.237438469523668e-05,
      "loss": 3.0549,
      "step": 174727
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.087086200714111,
      "learning_rate": 8.237156914945002e-05,
      "loss": 2.7188,
      "step": 174728
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5113039016723633,
      "learning_rate": 8.23687536441245e-05,
      "loss": 2.8715,
      "step": 174729
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.314023017883301,
      "learning_rate": 8.236593817926083e-05,
      "loss": 2.8257,
      "step": 174730
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8802900314331055,
      "learning_rate": 8.236312275485935e-05,
      "loss": 2.7687,
      "step": 174731
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.3947694301605225,
      "learning_rate": 8.236030737092086e-05,
      "loss": 2.8985,
      "step": 174732
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.1113715171813965,
      "learning_rate": 8.235749202744552e-05,
      "loss": 3.0919,
      "step": 174733
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.888134717941284,
      "learning_rate": 8.23546767244341e-05,
      "loss": 2.8144,
      "step": 174734
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4742867946624756,
      "learning_rate": 8.235186146188698e-05,
      "loss": 3.0248,
      "step": 174735
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.283817768096924,
      "learning_rate": 8.234904623980487e-05,
      "loss": 3.2414,
      "step": 174736
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7110002040863037,
      "learning_rate": 8.234623105818805e-05,
      "loss": 3.08,
      "step": 174737
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.4922971725463867,
      "learning_rate": 8.234341591703735e-05,
      "loss": 2.9092,
      "step": 174738
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.592292547225952,
      "learning_rate": 8.234060081635298e-05,
      "loss": 2.8059,
      "step": 174739
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.904209613800049,
      "learning_rate": 8.233778575613565e-05,
      "loss": 2.6611,
      "step": 174740
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4982430934906006,
      "learning_rate": 8.233497073638578e-05,
      "loss": 2.7956,
      "step": 174741
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.409576892852783,
      "learning_rate": 8.233215575710402e-05,
      "loss": 3.0481,
      "step": 174742
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6685421466827393,
      "learning_rate": 8.232934081829073e-05,
      "loss": 3.1449,
      "step": 174743
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.221065044403076,
      "learning_rate": 8.232652591994672e-05,
      "loss": 2.7817,
      "step": 174744
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9422662258148193,
      "learning_rate": 8.23237110620721e-05,
      "loss": 2.7835,
      "step": 174745
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4215030670166016,
      "learning_rate": 8.232089624466774e-05,
      "loss": 2.7926,
      "step": 174746
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.709735870361328,
      "learning_rate": 8.231808146773393e-05,
      "loss": 2.8914,
      "step": 174747
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7558846473693848,
      "learning_rate": 8.231526673127139e-05,
      "loss": 2.7485,
      "step": 174748
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7047736644744873,
      "learning_rate": 8.231245203528044e-05,
      "loss": 2.8772,
      "step": 174749
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.339158058166504,
      "learning_rate": 8.230963737976191e-05,
      "loss": 2.6617,
      "step": 174750
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1837456226348877,
      "learning_rate": 8.230682276471596e-05,
      "loss": 3.0342,
      "step": 174751
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9469642639160156,
      "learning_rate": 8.230400819014335e-05,
      "loss": 2.6315,
      "step": 174752
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.982801914215088,
      "learning_rate": 8.230119365604446e-05,
      "loss": 2.9634,
      "step": 174753
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.3462159633636475,
      "learning_rate": 8.229837916241997e-05,
      "loss": 2.7866,
      "step": 174754
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.021151542663574,
      "learning_rate": 8.229556470927023e-05,
      "loss": 2.9304,
      "step": 174755
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7841601371765137,
      "learning_rate": 8.229275029659596e-05,
      "loss": 2.7822,
      "step": 174756
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.49064564704895,
      "learning_rate": 8.228993592439756e-05,
      "loss": 2.9108,
      "step": 174757
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.631889820098877,
      "learning_rate": 8.228712159267556e-05,
      "loss": 2.6779,
      "step": 174758
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0226876735687256,
      "learning_rate": 8.228430730143045e-05,
      "loss": 2.8264,
      "step": 174759
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.606314182281494,
      "learning_rate": 8.228149305066286e-05,
      "loss": 3.0803,
      "step": 174760
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.229743719100952,
      "learning_rate": 8.227867884037313e-05,
      "loss": 2.843,
      "step": 174761
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.629648208618164,
      "learning_rate": 8.227586467056205e-05,
      "loss": 2.9875,
      "step": 174762
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.3674490451812744,
      "learning_rate": 8.227305054122995e-05,
      "loss": 3.0562,
      "step": 174763
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7365550994873047,
      "learning_rate": 8.227023645237741e-05,
      "loss": 2.761,
      "step": 174764
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.814722776412964,
      "learning_rate": 8.226742240400488e-05,
      "loss": 2.8338,
      "step": 174765
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6108779907226562,
      "learning_rate": 8.226460839611301e-05,
      "loss": 2.9858,
      "step": 174766
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.605980157852173,
      "learning_rate": 8.226179442870216e-05,
      "loss": 2.8556,
      "step": 174767
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.552274703979492,
      "learning_rate": 8.225898050177305e-05,
      "loss": 2.9654,
      "step": 174768
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4588913917541504,
      "learning_rate": 8.225616661532613e-05,
      "loss": 2.647,
      "step": 174769
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.677306890487671,
      "learning_rate": 8.225335276936176e-05,
      "loss": 2.724,
      "step": 174770
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.6622214317321777,
      "learning_rate": 8.225053896388075e-05,
      "loss": 2.6073,
      "step": 174771
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6872644424438477,
      "learning_rate": 8.224772519888345e-05,
      "loss": 3.202,
      "step": 174772
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.632673501968384,
      "learning_rate": 8.22449114743703e-05,
      "loss": 2.8797,
      "step": 174773
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7710213661193848,
      "learning_rate": 8.224209779034207e-05,
      "loss": 2.9201,
      "step": 174774
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.750809907913208,
      "learning_rate": 8.223928414679912e-05,
      "loss": 2.9288,
      "step": 174775
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6014091968536377,
      "learning_rate": 8.223647054374187e-05,
      "loss": 2.945,
      "step": 174776
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.103738784790039,
      "learning_rate": 8.223365698117111e-05,
      "loss": 2.6994,
      "step": 174777
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.143338680267334,
      "learning_rate": 8.22308434590871e-05,
      "loss": 3.075,
      "step": 174778
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.510228395462036,
      "learning_rate": 8.222802997749061e-05,
      "loss": 2.86,
      "step": 174779
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8257973194122314,
      "learning_rate": 8.222521653638203e-05,
      "loss": 2.9175,
      "step": 174780
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.544281482696533,
      "learning_rate": 8.222240313576189e-05,
      "loss": 2.8452,
      "step": 174781
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.3804242610931396,
      "learning_rate": 8.221958977563059e-05,
      "loss": 2.8739,
      "step": 174782
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0869061946868896,
      "learning_rate": 8.221677645598891e-05,
      "loss": 3.1119,
      "step": 174783
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.728633165359497,
      "learning_rate": 8.221396317683714e-05,
      "loss": 2.9196,
      "step": 174784
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6372201442718506,
      "learning_rate": 8.221114993817601e-05,
      "loss": 3.1936,
      "step": 174785
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.2521255016326904,
      "learning_rate": 8.220833674000593e-05,
      "loss": 3.0507,
      "step": 174786
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.214573383331299,
      "learning_rate": 8.220552358232743e-05,
      "loss": 3.1455,
      "step": 174787
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4207348823547363,
      "learning_rate": 8.220271046514093e-05,
      "loss": 2.9916,
      "step": 174788
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.135842800140381,
      "learning_rate": 8.219989738844714e-05,
      "loss": 2.9514,
      "step": 174789
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7864372730255127,
      "learning_rate": 8.219708435224643e-05,
      "loss": 3.0932,
      "step": 174790
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7470157146453857,
      "learning_rate": 8.21942713565395e-05,
      "loss": 2.9644,
      "step": 174791
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8860950469970703,
      "learning_rate": 8.219145840132668e-05,
      "loss": 3.1228,
      "step": 174792
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.7469186782836914,
      "learning_rate": 8.218864548660872e-05,
      "loss": 2.9861,
      "step": 174793
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.737088918685913,
      "learning_rate": 8.218583261238584e-05,
      "loss": 2.8759,
      "step": 174794
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5299415588378906,
      "learning_rate": 8.218301977865884e-05,
      "loss": 2.9047,
      "step": 174795
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8783695697784424,
      "learning_rate": 8.2180206985428e-05,
      "loss": 3.0058,
      "step": 174796
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8350257873535156,
      "learning_rate": 8.217739423269407e-05,
      "loss": 3.0517,
      "step": 174797
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7499160766601562,
      "learning_rate": 8.217458152045739e-05,
      "loss": 2.7689,
      "step": 174798
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.211422920227051,
      "learning_rate": 8.217176884871874e-05,
      "loss": 2.8024,
      "step": 174799
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.174571990966797,
      "learning_rate": 8.216895621747829e-05,
      "loss": 2.9419,
      "step": 174800
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4531424045562744,
      "learning_rate": 8.216614362673685e-05,
      "loss": 3.0318,
      "step": 174801
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.454056978225708,
      "learning_rate": 8.216333107649474e-05,
      "loss": 3.1014,
      "step": 174802
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4107165336608887,
      "learning_rate": 8.216051856675268e-05,
      "loss": 2.9197,
      "step": 174803
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.375777244567871,
      "learning_rate": 8.215770609751098e-05,
      "loss": 2.8661,
      "step": 174804
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.065139055252075,
      "learning_rate": 8.215489366877045e-05,
      "loss": 3.1614,
      "step": 174805
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9213805198669434,
      "learning_rate": 8.215208128053125e-05,
      "loss": 3.2654,
      "step": 174806
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5063202381134033,
      "learning_rate": 8.214926893279424e-05,
      "loss": 2.9358,
      "step": 174807
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.437579870223999,
      "learning_rate": 8.214645662555964e-05,
      "loss": 2.7567,
      "step": 174808
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8195462226867676,
      "learning_rate": 8.214364435882822e-05,
      "loss": 2.8712,
      "step": 174809
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.7011454105377197,
      "learning_rate": 8.214083213260037e-05,
      "loss": 3.1061,
      "step": 174810
    },
    {
      "epoch": 2.28,
      "grad_norm": 6.80748987197876,
      "learning_rate": 8.213801994687677e-05,
      "loss": 2.8464,
      "step": 174811
    },
    {
      "epoch": 2.28,
      "grad_norm": 5.0502495765686035,
      "learning_rate": 8.213520780165769e-05,
      "loss": 2.8104,
      "step": 174812
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.506126403808594,
      "learning_rate": 8.213239569694386e-05,
      "loss": 2.7113,
      "step": 174813
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6469168663024902,
      "learning_rate": 8.212958363273565e-05,
      "loss": 2.944,
      "step": 174814
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.508105516433716,
      "learning_rate": 8.212677160903371e-05,
      "loss": 2.885,
      "step": 174815
    },
    {
      "epoch": 2.28,
      "grad_norm": 6.2773356437683105,
      "learning_rate": 8.212395962583848e-05,
      "loss": 3.0069,
      "step": 174816
    },
    {
      "epoch": 2.28,
      "grad_norm": 6.502407073974609,
      "learning_rate": 8.212114768315068e-05,
      "loss": 3.0001,
      "step": 174817
    },
    {
      "epoch": 2.28,
      "grad_norm": 5.0380706787109375,
      "learning_rate": 8.21183357809705e-05,
      "loss": 2.9079,
      "step": 174818
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.6367337703704834,
      "learning_rate": 8.211552391929869e-05,
      "loss": 3.1007,
      "step": 174819
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.636943817138672,
      "learning_rate": 8.211271209813564e-05,
      "loss": 2.9654,
      "step": 174820
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1750195026397705,
      "learning_rate": 8.210990031748208e-05,
      "loss": 3.0557,
      "step": 174821
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.248659610748291,
      "learning_rate": 8.210708857733826e-05,
      "loss": 2.7781,
      "step": 174822
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.9184553623199463,
      "learning_rate": 8.210427687770497e-05,
      "loss": 3.1894,
      "step": 174823
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.869004249572754,
      "learning_rate": 8.21014652185826e-05,
      "loss": 2.8939,
      "step": 174824
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.700282573699951,
      "learning_rate": 8.209865359997167e-05,
      "loss": 3.1955,
      "step": 174825
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.990795373916626,
      "learning_rate": 8.209584202187264e-05,
      "loss": 3.0325,
      "step": 174826
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.088960647583008,
      "learning_rate": 8.20930304842862e-05,
      "loss": 2.7355,
      "step": 174827
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4105377197265625,
      "learning_rate": 8.209021898721264e-05,
      "loss": 2.8873,
      "step": 174828
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.3120930194854736,
      "learning_rate": 8.208740753065275e-05,
      "loss": 3.0193,
      "step": 174829
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.086249589920044,
      "learning_rate": 8.208459611460695e-05,
      "loss": 2.865,
      "step": 174830
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.20469856262207,
      "learning_rate": 8.208178473907571e-05,
      "loss": 2.5341,
      "step": 174831
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7268667221069336,
      "learning_rate": 8.207897340405946e-05,
      "loss": 2.8777,
      "step": 174832
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.261385917663574,
      "learning_rate": 8.207616210955896e-05,
      "loss": 3.0526,
      "step": 174833
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.5336062908172607,
      "learning_rate": 8.207335085557453e-05,
      "loss": 3.0285,
      "step": 174834
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.5515456199646,
      "learning_rate": 8.207053964210688e-05,
      "loss": 2.7242,
      "step": 174835
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9983322620391846,
      "learning_rate": 8.206772846915639e-05,
      "loss": 3.2456,
      "step": 174836
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7199208736419678,
      "learning_rate": 8.206491733672365e-05,
      "loss": 2.9302,
      "step": 174837
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.716874361038208,
      "learning_rate": 8.206210624480908e-05,
      "loss": 3.048,
      "step": 174838
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.715839147567749,
      "learning_rate": 8.205929519341335e-05,
      "loss": 2.9867,
      "step": 174839
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.2098548412323,
      "learning_rate": 8.205648418253683e-05,
      "loss": 2.9273,
      "step": 174840
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7831180095672607,
      "learning_rate": 8.20536732121802e-05,
      "loss": 2.9999,
      "step": 174841
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.38008975982666,
      "learning_rate": 8.20508622823439e-05,
      "loss": 3.1901,
      "step": 174842
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6292898654937744,
      "learning_rate": 8.204805139302848e-05,
      "loss": 3.0785,
      "step": 174843
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.539905309677124,
      "learning_rate": 8.204524054423434e-05,
      "loss": 3.0203,
      "step": 174844
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7093093395233154,
      "learning_rate": 8.204242973596221e-05,
      "loss": 3.2083,
      "step": 174845
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.3734421730041504,
      "learning_rate": 8.20396189682124e-05,
      "loss": 2.8966,
      "step": 174846
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9832911491394043,
      "learning_rate": 8.203680824098563e-05,
      "loss": 2.9468,
      "step": 174847
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4940500259399414,
      "learning_rate": 8.203399755428232e-05,
      "loss": 2.7922,
      "step": 174848
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.927922248840332,
      "learning_rate": 8.203118690810302e-05,
      "loss": 2.9903,
      "step": 174849
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4805338382720947,
      "learning_rate": 8.202837630244814e-05,
      "loss": 3.0846,
      "step": 174850
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.538296937942505,
      "learning_rate": 8.202556573731842e-05,
      "loss": 2.7678,
      "step": 174851
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6527469158172607,
      "learning_rate": 8.202275521271412e-05,
      "loss": 2.916,
      "step": 174852
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.1449337005615234,
      "learning_rate": 8.201994472863603e-05,
      "loss": 3.0603,
      "step": 174853
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.2621119022369385,
      "learning_rate": 8.201713428508456e-05,
      "loss": 2.9444,
      "step": 174854
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5643787384033203,
      "learning_rate": 8.201432388206008e-05,
      "loss": 3.0357,
      "step": 174855
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.507641315460205,
      "learning_rate": 8.201151351956336e-05,
      "loss": 3.1078,
      "step": 174856
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.441707611083984,
      "learning_rate": 8.200870319759483e-05,
      "loss": 2.7878,
      "step": 174857
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.5328800678253174,
      "learning_rate": 8.20058929161549e-05,
      "loss": 3.1738,
      "step": 174858
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.6433424949645996,
      "learning_rate": 8.200308267524429e-05,
      "loss": 2.8414,
      "step": 174859
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6862316131591797,
      "learning_rate": 8.200027247486339e-05,
      "loss": 3.0355,
      "step": 174860
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.5860750675201416,
      "learning_rate": 8.199746231501271e-05,
      "loss": 3.1771,
      "step": 174861
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.1946606636047363,
      "learning_rate": 8.199465219569287e-05,
      "loss": 3.0934,
      "step": 174862
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.995739459991455,
      "learning_rate": 8.199184211690438e-05,
      "loss": 2.7083,
      "step": 174863
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.086435317993164,
      "learning_rate": 8.198903207864759e-05,
      "loss": 2.8416,
      "step": 174864
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.231794834136963,
      "learning_rate": 8.198622208092324e-05,
      "loss": 2.6455,
      "step": 174865
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5100905895233154,
      "learning_rate": 8.198341212373178e-05,
      "loss": 3.0183,
      "step": 174866
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.42205548286438,
      "learning_rate": 8.198060220707366e-05,
      "loss": 3.0178,
      "step": 174867
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.0182912349700928,
      "learning_rate": 8.197779233094953e-05,
      "loss": 2.9869,
      "step": 174868
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9328372478485107,
      "learning_rate": 8.197498249535973e-05,
      "loss": 2.7009,
      "step": 174869
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4783337116241455,
      "learning_rate": 8.197217270030503e-05,
      "loss": 2.9946,
      "step": 174870
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7842535972595215,
      "learning_rate": 8.196936294578577e-05,
      "loss": 2.9875,
      "step": 174871
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6021907329559326,
      "learning_rate": 8.196655323180257e-05,
      "loss": 2.9841,
      "step": 174872
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.850813150405884,
      "learning_rate": 8.196374355835577e-05,
      "loss": 3.0158,
      "step": 174873
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.866760730743408,
      "learning_rate": 8.196093392544614e-05,
      "loss": 2.9207,
      "step": 174874
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0780696868896484,
      "learning_rate": 8.195812433307398e-05,
      "loss": 2.904,
      "step": 174875
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.011344909667969,
      "learning_rate": 8.195531478124004e-05,
      "loss": 2.7411,
      "step": 174876
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.3873019218444824,
      "learning_rate": 8.195250526994462e-05,
      "loss": 2.919,
      "step": 174877
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5456480979919434,
      "learning_rate": 8.194969579918851e-05,
      "loss": 3.0656,
      "step": 174878
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.626523017883301,
      "learning_rate": 8.194688636897189e-05,
      "loss": 2.7965,
      "step": 174879
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.596038818359375,
      "learning_rate": 8.194407697929556e-05,
      "loss": 2.9777,
      "step": 174880
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6181461811065674,
      "learning_rate": 8.194126763015986e-05,
      "loss": 3.1565,
      "step": 174881
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.45259165763855,
      "learning_rate": 8.193845832156549e-05,
      "loss": 3.0583,
      "step": 174882
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7205376625061035,
      "learning_rate": 8.193564905351277e-05,
      "loss": 2.6806,
      "step": 174883
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.767087936401367,
      "learning_rate": 8.193283982600251e-05,
      "loss": 2.7658,
      "step": 174884
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.517549991607666,
      "learning_rate": 8.193003063903487e-05,
      "loss": 2.9557,
      "step": 174885
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.149066686630249,
      "learning_rate": 8.192722149261063e-05,
      "loss": 2.9847,
      "step": 174886
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.709261894226074,
      "learning_rate": 8.192441238673019e-05,
      "loss": 2.9969,
      "step": 174887
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7175021171569824,
      "learning_rate": 8.192160332139418e-05,
      "loss": 2.7958,
      "step": 174888
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.818009614944458,
      "learning_rate": 8.191879429660293e-05,
      "loss": 2.8672,
      "step": 174889
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.337378978729248,
      "learning_rate": 8.191598531235724e-05,
      "loss": 3.1277,
      "step": 174890
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.602586507797241,
      "learning_rate": 8.19131763686575e-05,
      "loss": 2.8828,
      "step": 174891
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.555455446243286,
      "learning_rate": 8.191036746550416e-05,
      "loss": 3.3026,
      "step": 174892
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.321021556854248,
      "learning_rate": 8.190755860289774e-05,
      "loss": 2.9037,
      "step": 174893
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6096949577331543,
      "learning_rate": 8.19047497808389e-05,
      "loss": 2.8675,
      "step": 174894
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.593625783920288,
      "learning_rate": 8.190194099932799e-05,
      "loss": 3.0698,
      "step": 174895
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.535386800765991,
      "learning_rate": 8.189913225836575e-05,
      "loss": 2.946,
      "step": 174896
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0630385875701904,
      "learning_rate": 8.189632355795257e-05,
      "loss": 3.0486,
      "step": 174897
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.930518865585327,
      "learning_rate": 8.189351489808895e-05,
      "loss": 2.7917,
      "step": 174898
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.3013405799865723,
      "learning_rate": 8.189070627877536e-05,
      "loss": 3.0547,
      "step": 174899
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5712084770202637,
      "learning_rate": 8.188789770001251e-05,
      "loss": 2.9084,
      "step": 174900
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.260878086090088,
      "learning_rate": 8.188508916180074e-05,
      "loss": 3.0654,
      "step": 174901
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.2656521797180176,
      "learning_rate": 8.18822806641407e-05,
      "loss": 2.8266,
      "step": 174902
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.978651762008667,
      "learning_rate": 8.187947220703291e-05,
      "loss": 2.7827,
      "step": 174903
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.460131883621216,
      "learning_rate": 8.18766637904778e-05,
      "loss": 3.0228,
      "step": 174904
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5291364192962646,
      "learning_rate": 8.187385541447585e-05,
      "loss": 2.9037,
      "step": 174905
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.618410587310791,
      "learning_rate": 8.187104707902779e-05,
      "loss": 2.7642,
      "step": 174906
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.435817241668701,
      "learning_rate": 8.186823878413389e-05,
      "loss": 3.0447,
      "step": 174907
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.115248203277588,
      "learning_rate": 8.186543052979492e-05,
      "loss": 2.7057,
      "step": 174908
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9260449409484863,
      "learning_rate": 8.186262231601127e-05,
      "loss": 3.0958,
      "step": 174909
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.128783702850342,
      "learning_rate": 8.185981414278348e-05,
      "loss": 2.8973,
      "step": 174910
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.2711846828460693,
      "learning_rate": 8.185700601011196e-05,
      "loss": 3.0057,
      "step": 174911
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.54721999168396,
      "learning_rate": 8.185419791799745e-05,
      "loss": 2.8814,
      "step": 174912
    },
    {
      "epoch": 2.28,
      "grad_norm": 5.57806921005249,
      "learning_rate": 8.185138986644028e-05,
      "loss": 3.0176,
      "step": 174913
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.009387969970703,
      "learning_rate": 8.184858185544115e-05,
      "loss": 2.785,
      "step": 174914
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6469433307647705,
      "learning_rate": 8.184577388500045e-05,
      "loss": 2.8158,
      "step": 174915
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.341031312942505,
      "learning_rate": 8.184296595511878e-05,
      "loss": 2.9214,
      "step": 174916
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.1166298389434814,
      "learning_rate": 8.184015806579648e-05,
      "loss": 2.9651,
      "step": 174917
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.2202308177948,
      "learning_rate": 8.183735021703436e-05,
      "loss": 2.9602,
      "step": 174918
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.780938148498535,
      "learning_rate": 8.183454240883268e-05,
      "loss": 2.9636,
      "step": 174919
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.53861665725708,
      "learning_rate": 8.183173464119215e-05,
      "loss": 2.9556,
      "step": 174920
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.642040729522705,
      "learning_rate": 8.182892691411326e-05,
      "loss": 3.0985,
      "step": 174921
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6753313541412354,
      "learning_rate": 8.182611922759644e-05,
      "loss": 2.9451,
      "step": 174922
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.752878427505493,
      "learning_rate": 8.182331158164222e-05,
      "loss": 3.028,
      "step": 174923
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7927112579345703,
      "learning_rate": 8.182050397625123e-05,
      "loss": 3.1132,
      "step": 174924
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.86234712600708,
      "learning_rate": 8.181769641142386e-05,
      "loss": 2.9464,
      "step": 174925
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9066731929779053,
      "learning_rate": 8.181488888716075e-05,
      "loss": 3.0391,
      "step": 174926
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.723825216293335,
      "learning_rate": 8.181208140346239e-05,
      "loss": 2.8576,
      "step": 174927
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.867933511734009,
      "learning_rate": 8.180927396032929e-05,
      "loss": 2.9201,
      "step": 174928
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.379716157913208,
      "learning_rate": 8.180646655776183e-05,
      "loss": 2.9058,
      "step": 174929
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0303759574890137,
      "learning_rate": 8.18036591957608e-05,
      "loss": 2.8926,
      "step": 174930
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4032132625579834,
      "learning_rate": 8.180085187432653e-05,
      "loss": 2.985,
      "step": 174931
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.879417896270752,
      "learning_rate": 8.179804459345963e-05,
      "loss": 2.7687,
      "step": 174932
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.6719186305999756,
      "learning_rate": 8.17952373531606e-05,
      "loss": 2.9609,
      "step": 174933
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7239723205566406,
      "learning_rate": 8.179243015343e-05,
      "loss": 2.8286,
      "step": 174934
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.5646536350250244,
      "learning_rate": 8.178962299426818e-05,
      "loss": 3.0586,
      "step": 174935
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.113416910171509,
      "learning_rate": 8.178681587567588e-05,
      "loss": 2.9471,
      "step": 174936
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4720256328582764,
      "learning_rate": 8.178400879765344e-05,
      "loss": 3.0654,
      "step": 174937
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0266308784484863,
      "learning_rate": 8.178120176020156e-05,
      "loss": 2.965,
      "step": 174938
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9990627765655518,
      "learning_rate": 8.177839476332067e-05,
      "loss": 3.0161,
      "step": 174939
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.2502262592315674,
      "learning_rate": 8.177558780701122e-05,
      "loss": 2.9598,
      "step": 174940
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.852139949798584,
      "learning_rate": 8.17727808912739e-05,
      "loss": 2.9413,
      "step": 174941
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.8912746906280518,
      "learning_rate": 8.176997401610914e-05,
      "loss": 2.808,
      "step": 174942
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.6742756366729736,
      "learning_rate": 8.176716718151736e-05,
      "loss": 3.0966,
      "step": 174943
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4083433151245117,
      "learning_rate": 8.176436038749925e-05,
      "loss": 2.708,
      "step": 174944
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.531578779220581,
      "learning_rate": 8.176155363405533e-05,
      "loss": 2.8317,
      "step": 174945
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.2522189617156982,
      "learning_rate": 8.17587469211859e-05,
      "loss": 2.857,
      "step": 174946
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6121673583984375,
      "learning_rate": 8.175594024889176e-05,
      "loss": 2.8269,
      "step": 174947
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.463498592376709,
      "learning_rate": 8.175313361717329e-05,
      "loss": 2.932,
      "step": 174948
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.258659601211548,
      "learning_rate": 8.175032702603097e-05,
      "loss": 2.6609,
      "step": 174949
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5435643196105957,
      "learning_rate": 8.174752047546545e-05,
      "loss": 2.9477,
      "step": 174950
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.718170166015625,
      "learning_rate": 8.174471396547722e-05,
      "loss": 2.7715,
      "step": 174951
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.99534273147583,
      "learning_rate": 8.174190749606664e-05,
      "loss": 2.8992,
      "step": 174952
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.781170129776001,
      "learning_rate": 8.17391010672345e-05,
      "loss": 2.9748,
      "step": 174953
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4705045223236084,
      "learning_rate": 8.173629467898104e-05,
      "loss": 2.965,
      "step": 174954
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1443612575531006,
      "learning_rate": 8.173348833130703e-05,
      "loss": 2.8361,
      "step": 174955
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.3163366317749023,
      "learning_rate": 8.17306820242129e-05,
      "loss": 2.9517,
      "step": 174956
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.3442635536193848,
      "learning_rate": 8.172787575769902e-05,
      "loss": 2.8985,
      "step": 174957
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.226898670196533,
      "learning_rate": 8.172506953176616e-05,
      "loss": 3.3027,
      "step": 174958
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8783152103424072,
      "learning_rate": 8.172226334641474e-05,
      "loss": 2.8522,
      "step": 174959
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8133530616760254,
      "learning_rate": 8.171945720164519e-05,
      "loss": 2.6513,
      "step": 174960
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.666137218475342,
      "learning_rate": 8.17166510974582e-05,
      "loss": 2.9101,
      "step": 174961
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.6113076210021973,
      "learning_rate": 8.171384503385419e-05,
      "loss": 2.7797,
      "step": 174962
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4483444690704346,
      "learning_rate": 8.171103901083362e-05,
      "loss": 2.9945,
      "step": 174963
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.3874049186706543,
      "learning_rate": 8.170823302839718e-05,
      "loss": 2.8595,
      "step": 174964
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.2930333614349365,
      "learning_rate": 8.170542708654528e-05,
      "loss": 3.0814,
      "step": 174965
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8226702213287354,
      "learning_rate": 8.170262118527838e-05,
      "loss": 2.8818,
      "step": 174966
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.916184425354004,
      "learning_rate": 8.169981532459719e-05,
      "loss": 2.8116,
      "step": 174967
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.34429931640625,
      "learning_rate": 8.169700950450202e-05,
      "loss": 3.0206,
      "step": 174968
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5857293605804443,
      "learning_rate": 8.169420372499359e-05,
      "loss": 2.8391,
      "step": 174969
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.543222427368164,
      "learning_rate": 8.169139798607232e-05,
      "loss": 3.0478,
      "step": 174970
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.81339955329895,
      "learning_rate": 8.168859228773876e-05,
      "loss": 2.9686,
      "step": 174971
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.4522669315338135,
      "learning_rate": 8.16857866299933e-05,
      "loss": 3.1112,
      "step": 174972
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.8004519939422607,
      "learning_rate": 8.168298101283667e-05,
      "loss": 2.8082,
      "step": 174973
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.4069929122924805,
      "learning_rate": 8.168017543626921e-05,
      "loss": 2.9826,
      "step": 174974
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.3728387355804443,
      "learning_rate": 8.167736990029163e-05,
      "loss": 3.1676,
      "step": 174975
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1103007793426514,
      "learning_rate": 8.167456440490433e-05,
      "loss": 2.6984,
      "step": 174976
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9591546058654785,
      "learning_rate": 8.167175895010786e-05,
      "loss": 3.0816,
      "step": 174977
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.5181655883789062,
      "learning_rate": 8.166895353590261e-05,
      "loss": 2.7987,
      "step": 174978
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6518828868865967,
      "learning_rate": 8.166614816228934e-05,
      "loss": 2.8931,
      "step": 174979
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.595691680908203,
      "learning_rate": 8.166334282926836e-05,
      "loss": 2.6197,
      "step": 174980
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.2858927249908447,
      "learning_rate": 8.16605375368404e-05,
      "loss": 3.0782,
      "step": 174981
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.230231285095215,
      "learning_rate": 8.165773228500583e-05,
      "loss": 2.9952,
      "step": 174982
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8183515071868896,
      "learning_rate": 8.165492707376521e-05,
      "loss": 3.0565,
      "step": 174983
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.37115740776062,
      "learning_rate": 8.165212190311901e-05,
      "loss": 3.0766,
      "step": 174984
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5171751976013184,
      "learning_rate": 8.164931677306786e-05,
      "loss": 2.655,
      "step": 174985
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.3875603675842285,
      "learning_rate": 8.164651168361214e-05,
      "loss": 2.8558,
      "step": 174986
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5205938816070557,
      "learning_rate": 8.164370663475252e-05,
      "loss": 2.9247,
      "step": 174987
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.979274272918701,
      "learning_rate": 8.164090162648949e-05,
      "loss": 2.8819,
      "step": 174988
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.759883403778076,
      "learning_rate": 8.163809665882356e-05,
      "loss": 2.9597,
      "step": 174989
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.430553436279297,
      "learning_rate": 8.163529173175509e-05,
      "loss": 2.8435,
      "step": 174990
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4843225479125977,
      "learning_rate": 8.163248684528486e-05,
      "loss": 3.0045,
      "step": 174991
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.7419190406799316,
      "learning_rate": 8.162968199941316e-05,
      "loss": 2.9942,
      "step": 174992
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0132088661193848,
      "learning_rate": 8.162687719414072e-05,
      "loss": 2.7249,
      "step": 174993
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.67411208152771,
      "learning_rate": 8.162407242946798e-05,
      "loss": 3.1601,
      "step": 174994
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6122794151306152,
      "learning_rate": 8.162126770539546e-05,
      "loss": 2.9196,
      "step": 174995
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.854300022125244,
      "learning_rate": 8.161846302192355e-05,
      "loss": 2.837,
      "step": 174996
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.139459609985352,
      "learning_rate": 8.1615658379053e-05,
      "loss": 2.7292,
      "step": 174997
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.5350327491760254,
      "learning_rate": 8.161285377678411e-05,
      "loss": 2.6914,
      "step": 174998
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5701653957366943,
      "learning_rate": 8.161004921511762e-05,
      "loss": 2.9582,
      "step": 174999
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.472071886062622,
      "learning_rate": 8.160724469405393e-05,
      "loss": 2.849,
      "step": 175000
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0437512397766113,
      "learning_rate": 8.16044402135936e-05,
      "loss": 2.9747,
      "step": 175001
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.2452914714813232,
      "learning_rate": 8.1601635773737e-05,
      "loss": 2.748,
      "step": 175002
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5620813369750977,
      "learning_rate": 8.159883137448491e-05,
      "loss": 2.9563,
      "step": 175003
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.3548269271850586,
      "learning_rate": 8.15960270158376e-05,
      "loss": 3.0015,
      "step": 175004
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4815707206726074,
      "learning_rate": 8.15932226977958e-05,
      "loss": 2.7576,
      "step": 175005
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.155620813369751,
      "learning_rate": 8.159041842035996e-05,
      "loss": 3.143,
      "step": 175006
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.720445394515991,
      "learning_rate": 8.158761418353058e-05,
      "loss": 2.9705,
      "step": 175007
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.331454277038574,
      "learning_rate": 8.158480998730808e-05,
      "loss": 2.8628,
      "step": 175008
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1004621982574463,
      "learning_rate": 8.158200583169321e-05,
      "loss": 3.006,
      "step": 175009
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.426830530166626,
      "learning_rate": 8.157920171668625e-05,
      "loss": 2.59,
      "step": 175010
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5327553749084473,
      "learning_rate": 8.157639764228793e-05,
      "loss": 3.2216,
      "step": 175011
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7441370487213135,
      "learning_rate": 8.157359360849867e-05,
      "loss": 2.7687,
      "step": 175012
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.353423833847046,
      "learning_rate": 8.157078961531903e-05,
      "loss": 3.0312,
      "step": 175013
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.936469316482544,
      "learning_rate": 8.15679856627494e-05,
      "loss": 2.6049,
      "step": 175014
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.7821598052978516,
      "learning_rate": 8.156518175079052e-05,
      "loss": 2.8278,
      "step": 175015
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6901798248291016,
      "learning_rate": 8.156237787944269e-05,
      "loss": 2.7566,
      "step": 175016
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.55046010017395,
      "learning_rate": 8.15595740487066e-05,
      "loss": 2.8985,
      "step": 175017
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.2323172092437744,
      "learning_rate": 8.155677025858272e-05,
      "loss": 2.9095,
      "step": 175018
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.8828635215759277,
      "learning_rate": 8.155396650907157e-05,
      "loss": 3.093,
      "step": 175019
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8274412155151367,
      "learning_rate": 8.155116280017359e-05,
      "loss": 2.7934,
      "step": 175020
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.3973944187164307,
      "learning_rate": 8.154835913188946e-05,
      "loss": 3.1221,
      "step": 175021
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.469609022140503,
      "learning_rate": 8.15455555042195e-05,
      "loss": 2.9829,
      "step": 175022
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7837345600128174,
      "learning_rate": 8.154275191716442e-05,
      "loss": 2.7779,
      "step": 175023
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.5309886932373047,
      "learning_rate": 8.153994837072458e-05,
      "loss": 2.9041,
      "step": 175024
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.018118143081665,
      "learning_rate": 8.15371448649008e-05,
      "loss": 2.9486,
      "step": 175025
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8608641624450684,
      "learning_rate": 8.153434139969318e-05,
      "loss": 2.9049,
      "step": 175026
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9624738693237305,
      "learning_rate": 8.153153797510258e-05,
      "loss": 2.9687,
      "step": 175027
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6403136253356934,
      "learning_rate": 8.152873459112925e-05,
      "loss": 2.8677,
      "step": 175028
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4279422760009766,
      "learning_rate": 8.152593124777397e-05,
      "loss": 2.8261,
      "step": 175029
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.4984421730041504,
      "learning_rate": 8.152312794503706e-05,
      "loss": 3.0022,
      "step": 175030
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4110589027404785,
      "learning_rate": 8.152032468291919e-05,
      "loss": 2.7442,
      "step": 175031
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4345932006835938,
      "learning_rate": 8.151752146142084e-05,
      "loss": 2.9787,
      "step": 175032
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9841628074645996,
      "learning_rate": 8.151471828054251e-05,
      "loss": 2.8844,
      "step": 175033
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6803104877471924,
      "learning_rate": 8.15119151402846e-05,
      "loss": 2.9011,
      "step": 175034
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.184846878051758,
      "learning_rate": 8.150911204064786e-05,
      "loss": 2.9064,
      "step": 175035
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.471508741378784,
      "learning_rate": 8.15063089816326e-05,
      "loss": 2.9485,
      "step": 175036
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.56303334236145,
      "learning_rate": 8.150350596323954e-05,
      "loss": 2.8539,
      "step": 175037
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5872716903686523,
      "learning_rate": 8.150070298546911e-05,
      "loss": 3.0747,
      "step": 175038
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5369622707366943,
      "learning_rate": 8.149790004832173e-05,
      "loss": 2.7636,
      "step": 175039
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6466197967529297,
      "learning_rate": 8.149509715179809e-05,
      "loss": 2.996,
      "step": 175040
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6016154289245605,
      "learning_rate": 8.149229429589864e-05,
      "loss": 3.0278,
      "step": 175041
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.937927007675171,
      "learning_rate": 8.148949148062381e-05,
      "loss": 2.8809,
      "step": 175042
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.1957385540008545,
      "learning_rate": 8.14866887059743e-05,
      "loss": 3.005,
      "step": 175043
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4135797023773193,
      "learning_rate": 8.148388597195054e-05,
      "loss": 2.8146,
      "step": 175044
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.3593544960021973,
      "learning_rate": 8.148108327855293e-05,
      "loss": 2.8387,
      "step": 175045
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.564614772796631,
      "learning_rate": 8.147828062578225e-05,
      "loss": 3.0887,
      "step": 175046
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.07835054397583,
      "learning_rate": 8.147547801363885e-05,
      "loss": 2.7484,
      "step": 175047
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.170682191848755,
      "learning_rate": 8.147267544212322e-05,
      "loss": 2.9859,
      "step": 175048
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5522139072418213,
      "learning_rate": 8.146987291123604e-05,
      "loss": 3.0138,
      "step": 175049
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4905824661254883,
      "learning_rate": 8.146707042097771e-05,
      "loss": 3.1207,
      "step": 175050
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4212241172790527,
      "learning_rate": 8.146426797134869e-05,
      "loss": 3.0114,
      "step": 175051
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.6412785053253174,
      "learning_rate": 8.14614655623497e-05,
      "loss": 2.9912,
      "step": 175052
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.850215435028076,
      "learning_rate": 8.1458663193981e-05,
      "loss": 3.0448,
      "step": 175053
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.79610538482666,
      "learning_rate": 8.14558608662434e-05,
      "loss": 3.03,
      "step": 175054
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6177964210510254,
      "learning_rate": 8.145305857913728e-05,
      "loss": 3.2197,
      "step": 175055
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5244827270507812,
      "learning_rate": 8.145025633266315e-05,
      "loss": 3.0103,
      "step": 175056
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.902029514312744,
      "learning_rate": 8.144745412682144e-05,
      "loss": 2.9292,
      "step": 175057
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.494842529296875,
      "learning_rate": 8.14446519616129e-05,
      "loss": 3.16,
      "step": 175058
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.838491439819336,
      "learning_rate": 8.14418498370378e-05,
      "loss": 3.0332,
      "step": 175059
    },
    {
      "epoch": 2.28,
      "grad_norm": 5.361425876617432,
      "learning_rate": 8.143904775309689e-05,
      "loss": 2.8221,
      "step": 175060
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0626790523529053,
      "learning_rate": 8.14362457097906e-05,
      "loss": 2.9522,
      "step": 175061
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.723896026611328,
      "learning_rate": 8.143344370711942e-05,
      "loss": 3.0977,
      "step": 175062
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.3270750045776367,
      "learning_rate": 8.143064174508379e-05,
      "loss": 2.8285,
      "step": 175063
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0221335887908936,
      "learning_rate": 8.142783982368442e-05,
      "loss": 3.0452,
      "step": 175064
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.1851000785827637,
      "learning_rate": 8.142503794292169e-05,
      "loss": 2.8764,
      "step": 175065
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1370527744293213,
      "learning_rate": 8.142223610279622e-05,
      "loss": 2.8497,
      "step": 175066
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6877763271331787,
      "learning_rate": 8.14194343033085e-05,
      "loss": 3.1414,
      "step": 175067
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9978625774383545,
      "learning_rate": 8.141663254445904e-05,
      "loss": 2.8182,
      "step": 175068
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5281972885131836,
      "learning_rate": 8.141383082624824e-05,
      "loss": 3.01,
      "step": 175069
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.7454721927642822,
      "learning_rate": 8.141102914867684e-05,
      "loss": 3.1691,
      "step": 175070
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.3907999992370605,
      "learning_rate": 8.140822751174518e-05,
      "loss": 2.8064,
      "step": 175071
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.7717204093933105,
      "learning_rate": 8.140542591545393e-05,
      "loss": 3.0595,
      "step": 175072
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.819077730178833,
      "learning_rate": 8.140262435980353e-05,
      "loss": 3.0074,
      "step": 175073
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0196335315704346,
      "learning_rate": 8.139982284479451e-05,
      "loss": 2.9078,
      "step": 175074
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.084624290466309,
      "learning_rate": 8.139702137042733e-05,
      "loss": 2.6949,
      "step": 175075
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.490779399871826,
      "learning_rate": 8.139421993670263e-05,
      "loss": 3.2287,
      "step": 175076
    },
    {
      "epoch": 2.28,
      "grad_norm": 6.3593058586120605,
      "learning_rate": 8.139141854362079e-05,
      "loss": 2.8995,
      "step": 175077
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.466235876083374,
      "learning_rate": 8.138861719118247e-05,
      "loss": 2.8307,
      "step": 175078
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.51229190826416,
      "learning_rate": 8.138581587938817e-05,
      "loss": 2.9722,
      "step": 175079
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7083072662353516,
      "learning_rate": 8.138301460823836e-05,
      "loss": 2.9733,
      "step": 175080
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7033932209014893,
      "learning_rate": 8.13802133777335e-05,
      "loss": 2.9273,
      "step": 175081
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7429020404815674,
      "learning_rate": 8.137741218787426e-05,
      "loss": 2.7947,
      "step": 175082
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1287405490875244,
      "learning_rate": 8.137461103866102e-05,
      "loss": 3.0909,
      "step": 175083
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.852170944213867,
      "learning_rate": 8.137180993009442e-05,
      "loss": 2.8536,
      "step": 175084
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.66963529586792,
      "learning_rate": 8.136900886217495e-05,
      "loss": 2.6765,
      "step": 175085
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.757006883621216,
      "learning_rate": 8.136620783490312e-05,
      "loss": 2.8507,
      "step": 175086
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.41225266456604,
      "learning_rate": 8.136340684827931e-05,
      "loss": 2.5808,
      "step": 175087
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.720241069793701,
      "learning_rate": 8.136060590230429e-05,
      "loss": 3.1569,
      "step": 175088
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5323877334594727,
      "learning_rate": 8.135780499697835e-05,
      "loss": 3.0306,
      "step": 175089
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8149378299713135,
      "learning_rate": 8.135500413230223e-05,
      "loss": 2.8391,
      "step": 175090
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6127164363861084,
      "learning_rate": 8.135220330827625e-05,
      "loss": 2.8047,
      "step": 175091
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.194617748260498,
      "learning_rate": 8.13494025249012e-05,
      "loss": 2.8321,
      "step": 175092
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.756619691848755,
      "learning_rate": 8.134660178217726e-05,
      "loss": 3.041,
      "step": 175093
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4653260707855225,
      "learning_rate": 8.134380108010516e-05,
      "loss": 2.8086,
      "step": 175094
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7293994426727295,
      "learning_rate": 8.134100041868532e-05,
      "loss": 2.8939,
      "step": 175095
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.019256114959717,
      "learning_rate": 8.133819979791839e-05,
      "loss": 3.0067,
      "step": 175096
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7231671810150146,
      "learning_rate": 8.133539921780474e-05,
      "loss": 2.837,
      "step": 175097
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.069344997406006,
      "learning_rate": 8.133259867834514e-05,
      "loss": 2.9545,
      "step": 175098
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7544076442718506,
      "learning_rate": 8.132979817953975e-05,
      "loss": 3.1493,
      "step": 175099
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.403280019760132,
      "learning_rate": 8.132699772138938e-05,
      "loss": 2.9006,
      "step": 175100
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5768959522247314,
      "learning_rate": 8.132419730389434e-05,
      "loss": 2.6967,
      "step": 175101
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.786106824874878,
      "learning_rate": 8.13213969270554e-05,
      "loss": 3.2165,
      "step": 175102
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.696380138397217,
      "learning_rate": 8.131859659087278e-05,
      "loss": 2.8321,
      "step": 175103
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4675302505493164,
      "learning_rate": 8.131579629534739e-05,
      "loss": 2.8394,
      "step": 175104
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.087223529815674,
      "learning_rate": 8.131299604047934e-05,
      "loss": 2.7948,
      "step": 175105
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.672612428665161,
      "learning_rate": 8.13101958262694e-05,
      "loss": 2.6171,
      "step": 175106
    },
    {
      "epoch": 2.28,
      "grad_norm": 5.072048664093018,
      "learning_rate": 8.130739565271793e-05,
      "loss": 2.9031,
      "step": 175107
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.494051694869995,
      "learning_rate": 8.130459551982564e-05,
      "loss": 2.9356,
      "step": 175108
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.2604987621307373,
      "learning_rate": 8.130179542759286e-05,
      "loss": 3.0257,
      "step": 175109
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.42130446434021,
      "learning_rate": 8.12989953760204e-05,
      "loss": 2.9244,
      "step": 175110
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9609146118164062,
      "learning_rate": 8.12961953651084e-05,
      "loss": 2.7444,
      "step": 175111
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.536989450454712,
      "learning_rate": 8.129339539485768e-05,
      "loss": 2.8671,
      "step": 175112
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7907769680023193,
      "learning_rate": 8.12905954652685e-05,
      "loss": 2.8743,
      "step": 175113
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4851112365722656,
      "learning_rate": 8.128779557634166e-05,
      "loss": 3.0381,
      "step": 175114
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.896104097366333,
      "learning_rate": 8.128499572807745e-05,
      "loss": 2.9646,
      "step": 175115
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.510989189147949,
      "learning_rate": 8.128219592047655e-05,
      "loss": 3.1186,
      "step": 175116
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5206081867218018,
      "learning_rate": 8.127939615353944e-05,
      "loss": 3.028,
      "step": 175117
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.377309560775757,
      "learning_rate": 8.127659642726661e-05,
      "loss": 3.052,
      "step": 175118
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.6528067588806152,
      "learning_rate": 8.127379674165854e-05,
      "loss": 3.2746,
      "step": 175119
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.416754722595215,
      "learning_rate": 8.127099709671583e-05,
      "loss": 2.9096,
      "step": 175120
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.471621036529541,
      "learning_rate": 8.126819749243893e-05,
      "loss": 2.8502,
      "step": 175121
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6562743186950684,
      "learning_rate": 8.126539792882845e-05,
      "loss": 2.9754,
      "step": 175122
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0840320587158203,
      "learning_rate": 8.12625984058849e-05,
      "loss": 2.8775,
      "step": 175123
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.306345224380493,
      "learning_rate": 8.125979892360874e-05,
      "loss": 2.9609,
      "step": 175124
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4092531204223633,
      "learning_rate": 8.12569994820004e-05,
      "loss": 2.9468,
      "step": 175125
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9626200199127197,
      "learning_rate": 8.125420008106065e-05,
      "loss": 2.9265,
      "step": 175126
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1004221439361572,
      "learning_rate": 8.125140072078976e-05,
      "loss": 3.1627,
      "step": 175127
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.824610948562622,
      "learning_rate": 8.124860140118844e-05,
      "loss": 2.7648,
      "step": 175128
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.1625242233276367,
      "learning_rate": 8.124580212225715e-05,
      "loss": 3.2083,
      "step": 175129
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.029075622558594,
      "learning_rate": 8.124300288399633e-05,
      "loss": 2.9135,
      "step": 175130
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8443655967712402,
      "learning_rate": 8.124020368640661e-05,
      "loss": 2.8331,
      "step": 175131
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.909302234649658,
      "learning_rate": 8.123740452948849e-05,
      "loss": 2.9278,
      "step": 175132
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0826914310455322,
      "learning_rate": 8.123460541324237e-05,
      "loss": 2.9575,
      "step": 175133
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.498455762863159,
      "learning_rate": 8.123180633766895e-05,
      "loss": 2.7668,
      "step": 175134
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5745997428894043,
      "learning_rate": 8.122900730276869e-05,
      "loss": 2.8083,
      "step": 175135
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.135305643081665,
      "learning_rate": 8.122620830854196e-05,
      "loss": 2.9789,
      "step": 175136
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.2204575538635254,
      "learning_rate": 8.122340935498953e-05,
      "loss": 3.1119,
      "step": 175137
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.869617462158203,
      "learning_rate": 8.122061044211169e-05,
      "loss": 2.8947,
      "step": 175138
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6726455688476562,
      "learning_rate": 8.121781156990917e-05,
      "loss": 2.915,
      "step": 175139
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.065825939178467,
      "learning_rate": 8.121501273838238e-05,
      "loss": 2.8305,
      "step": 175140
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6777570247650146,
      "learning_rate": 8.121221394753189e-05,
      "loss": 3.1649,
      "step": 175141
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.0349221229553223,
      "learning_rate": 8.120941519735804e-05,
      "loss": 2.8191,
      "step": 175142
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.424316644668579,
      "learning_rate": 8.120661648786158e-05,
      "loss": 2.7907,
      "step": 175143
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0161237716674805,
      "learning_rate": 8.120381781904285e-05,
      "loss": 2.9596,
      "step": 175144
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.2192435264587402,
      "learning_rate": 8.120101919090258e-05,
      "loss": 2.8731,
      "step": 175145
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.543558120727539,
      "learning_rate": 8.119822060344114e-05,
      "loss": 2.99,
      "step": 175146
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7595977783203125,
      "learning_rate": 8.11954220566591e-05,
      "loss": 2.8453,
      "step": 175147
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4141628742218018,
      "learning_rate": 8.119262355055688e-05,
      "loss": 3.1646,
      "step": 175148
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.6846017837524414,
      "learning_rate": 8.118982508513515e-05,
      "loss": 2.9022,
      "step": 175149
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.3105669021606445,
      "learning_rate": 8.118702666039428e-05,
      "loss": 3.1196,
      "step": 175150
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4233973026275635,
      "learning_rate": 8.118422827633494e-05,
      "loss": 2.7987,
      "step": 175151
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.218284845352173,
      "learning_rate": 8.118142993295762e-05,
      "loss": 2.9672,
      "step": 175152
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4646663665771484,
      "learning_rate": 8.11786316302628e-05,
      "loss": 2.9583,
      "step": 175153
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.515329360961914,
      "learning_rate": 8.117583336825088e-05,
      "loss": 2.9,
      "step": 175154
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4946682453155518,
      "learning_rate": 8.117303514692261e-05,
      "loss": 3.0248,
      "step": 175155
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7893481254577637,
      "learning_rate": 8.117023696627832e-05,
      "loss": 2.7292,
      "step": 175156
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.4876015186309814,
      "learning_rate": 8.116743882631871e-05,
      "loss": 2.8266,
      "step": 175157
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7811667919158936,
      "learning_rate": 8.116464072704409e-05,
      "loss": 2.9567,
      "step": 175158
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.057711601257324,
      "learning_rate": 8.11618426684553e-05,
      "loss": 2.7598,
      "step": 175159
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.290921926498413,
      "learning_rate": 8.115904465055246e-05,
      "loss": 2.7921,
      "step": 175160
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.487771987915039,
      "learning_rate": 8.115624667333637e-05,
      "loss": 2.7701,
      "step": 175161
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9261157512664795,
      "learning_rate": 8.115344873680738e-05,
      "loss": 2.8924,
      "step": 175162
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1321778297424316,
      "learning_rate": 8.115065084096617e-05,
      "loss": 3.0081,
      "step": 175163
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.411305904388428,
      "learning_rate": 8.114785298581311e-05,
      "loss": 3.1614,
      "step": 175164
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.94470477104187,
      "learning_rate": 8.1145055171349e-05,
      "loss": 3.0516,
      "step": 175165
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.157451629638672,
      "learning_rate": 8.114225739757395e-05,
      "loss": 2.7849,
      "step": 175166
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1731438636779785,
      "learning_rate": 8.113945966448879e-05,
      "loss": 3.1427,
      "step": 175167
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.509871482849121,
      "learning_rate": 8.11366619720938e-05,
      "loss": 2.6764,
      "step": 175168
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4173669815063477,
      "learning_rate": 8.113386432038977e-05,
      "loss": 2.9647,
      "step": 175169
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.697197198867798,
      "learning_rate": 8.113106670937697e-05,
      "loss": 2.9898,
      "step": 175170
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9335875511169434,
      "learning_rate": 8.112826913905624e-05,
      "loss": 3.04,
      "step": 175171
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.308514356613159,
      "learning_rate": 8.112547160942773e-05,
      "loss": 2.8377,
      "step": 175172
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.830724000930786,
      "learning_rate": 8.112267412049219e-05,
      "loss": 3.0331,
      "step": 175173
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.39559268951416,
      "learning_rate": 8.111987667224998e-05,
      "loss": 2.6978,
      "step": 175174
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5284135341644287,
      "learning_rate": 8.11170792647018e-05,
      "loss": 2.7913,
      "step": 175175
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.266225337982178,
      "learning_rate": 8.111428189784804e-05,
      "loss": 2.7915,
      "step": 175176
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.275973320007324,
      "learning_rate": 8.11114845716894e-05,
      "loss": 2.8615,
      "step": 175177
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.910822629928589,
      "learning_rate": 8.110868728622611e-05,
      "loss": 2.7321,
      "step": 175178
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6226425170898438,
      "learning_rate": 8.110589004145895e-05,
      "loss": 2.8153,
      "step": 175179
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.288943290710449,
      "learning_rate": 8.110309283738822e-05,
      "loss": 2.8732,
      "step": 175180
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.360236406326294,
      "learning_rate": 8.110029567401466e-05,
      "loss": 2.8486,
      "step": 175181
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.3378517627716064,
      "learning_rate": 8.109749855133861e-05,
      "loss": 3.0477,
      "step": 175182
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.716726064682007,
      "learning_rate": 8.109470146936081e-05,
      "loss": 2.7984,
      "step": 175183
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6386072635650635,
      "learning_rate": 8.109190442808149e-05,
      "loss": 3.2034,
      "step": 175184
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7918126583099365,
      "learning_rate": 8.10891074275014e-05,
      "loss": 2.9239,
      "step": 175185
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.279094934463501,
      "learning_rate": 8.108631046762088e-05,
      "loss": 3.4107,
      "step": 175186
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.8277812004089355,
      "learning_rate": 8.108351354844065e-05,
      "loss": 2.8873,
      "step": 175187
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5904781818389893,
      "learning_rate": 8.108071666996104e-05,
      "loss": 2.8598,
      "step": 175188
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7326197624206543,
      "learning_rate": 8.107791983218279e-05,
      "loss": 2.9677,
      "step": 175189
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0704574584960938,
      "learning_rate": 8.107512303510616e-05,
      "loss": 2.866,
      "step": 175190
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.711360931396484,
      "learning_rate": 8.107232627873187e-05,
      "loss": 2.7996,
      "step": 175191
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8842618465423584,
      "learning_rate": 8.106952956306029e-05,
      "loss": 2.7671,
      "step": 175192
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5392565727233887,
      "learning_rate": 8.106673288809208e-05,
      "loss": 3.1219,
      "step": 175193
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9396989345550537,
      "learning_rate": 8.10639362538276e-05,
      "loss": 3.1436,
      "step": 175194
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.670743227005005,
      "learning_rate": 8.106113966026769e-05,
      "loss": 2.9953,
      "step": 175195
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.25209641456604,
      "learning_rate": 8.105834310741246e-05,
      "loss": 2.8259,
      "step": 175196
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.759448289871216,
      "learning_rate": 8.105554659526268e-05,
      "loss": 2.9894,
      "step": 175197
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1565933227539062,
      "learning_rate": 8.105275012381873e-05,
      "loss": 2.8378,
      "step": 175198
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.932788133621216,
      "learning_rate": 8.10499536930813e-05,
      "loss": 2.9962,
      "step": 175199
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.428074836730957,
      "learning_rate": 8.104715730305072e-05,
      "loss": 3.2263,
      "step": 175200
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5872230529785156,
      "learning_rate": 8.104436095372769e-05,
      "loss": 2.7925,
      "step": 175201
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.3545641899108887,
      "learning_rate": 8.104156464511266e-05,
      "loss": 2.8985,
      "step": 175202
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.681206226348877,
      "learning_rate": 8.103876837720615e-05,
      "loss": 2.6746,
      "step": 175203
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.411935329437256,
      "learning_rate": 8.103597215000852e-05,
      "loss": 2.8405,
      "step": 175204
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.476768970489502,
      "learning_rate": 8.103317596352056e-05,
      "loss": 3.1829,
      "step": 175205
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0146098136901855,
      "learning_rate": 8.103037981774258e-05,
      "loss": 3.0003,
      "step": 175206
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6135549545288086,
      "learning_rate": 8.10275837126753e-05,
      "loss": 2.5963,
      "step": 175207
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.006985902786255,
      "learning_rate": 8.102478764831909e-05,
      "loss": 2.8171,
      "step": 175208
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.874199390411377,
      "learning_rate": 8.102199162467451e-05,
      "loss": 2.6493,
      "step": 175209
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.2028911113739014,
      "learning_rate": 8.1019195641742e-05,
      "loss": 3.1069,
      "step": 175210
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4976446628570557,
      "learning_rate": 8.101639969952224e-05,
      "loss": 2.7857,
      "step": 175211
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.590129852294922,
      "learning_rate": 8.101360379801557e-05,
      "loss": 2.7478,
      "step": 175212
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8292393684387207,
      "learning_rate": 8.101080793722268e-05,
      "loss": 2.8994,
      "step": 175213
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.563718795776367,
      "learning_rate": 8.100801211714403e-05,
      "loss": 3.0282,
      "step": 175214
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.803926944732666,
      "learning_rate": 8.100521633778005e-05,
      "loss": 2.9953,
      "step": 175215
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6876959800720215,
      "learning_rate": 8.10024205991314e-05,
      "loss": 3.0751,
      "step": 175216
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.212506055831909,
      "learning_rate": 8.099962490119855e-05,
      "loss": 2.8732,
      "step": 175217
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.7993457317352295,
      "learning_rate": 8.099682924398192e-05,
      "loss": 2.8198,
      "step": 175218
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9682154655456543,
      "learning_rate": 8.099403362748219e-05,
      "loss": 3.0416,
      "step": 175219
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7404966354370117,
      "learning_rate": 8.09912380516998e-05,
      "loss": 2.8652,
      "step": 175220
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.754307746887207,
      "learning_rate": 8.098844251663518e-05,
      "loss": 2.9977,
      "step": 175221
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5079405307769775,
      "learning_rate": 8.098564702228904e-05,
      "loss": 2.8196,
      "step": 175222
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.255099058151245,
      "learning_rate": 8.09828515686617e-05,
      "loss": 3.0653,
      "step": 175223
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.336481809616089,
      "learning_rate": 8.098005615575388e-05,
      "loss": 2.7295,
      "step": 175224
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8946897983551025,
      "learning_rate": 8.097726078356603e-05,
      "loss": 3.1186,
      "step": 175225
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4142305850982666,
      "learning_rate": 8.097446545209863e-05,
      "loss": 2.9782,
      "step": 175226
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5966601371765137,
      "learning_rate": 8.097167016135209e-05,
      "loss": 3.0763,
      "step": 175227
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7282698154449463,
      "learning_rate": 8.096887491132714e-05,
      "loss": 2.9413,
      "step": 175228
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1119418144226074,
      "learning_rate": 8.096607970202415e-05,
      "loss": 3.0942,
      "step": 175229
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.404940366744995,
      "learning_rate": 8.09632845334438e-05,
      "loss": 3.17,
      "step": 175230
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9967963695526123,
      "learning_rate": 8.096048940558649e-05,
      "loss": 2.9003,
      "step": 175231
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6926817893981934,
      "learning_rate": 8.095769431845275e-05,
      "loss": 2.8128,
      "step": 175232
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.965047836303711,
      "learning_rate": 8.095489927204303e-05,
      "loss": 2.9654,
      "step": 175233
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.036324977874756,
      "learning_rate": 8.095210426635803e-05,
      "loss": 2.963,
      "step": 175234
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.997903823852539,
      "learning_rate": 8.094930930139805e-05,
      "loss": 2.6842,
      "step": 175235
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6407577991485596,
      "learning_rate": 8.094651437716384e-05,
      "loss": 2.7469,
      "step": 175236
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6728639602661133,
      "learning_rate": 8.094371949365571e-05,
      "loss": 3.0749,
      "step": 175237
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.3144445419311523,
      "learning_rate": 8.094092465087447e-05,
      "loss": 2.8622,
      "step": 175238
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.3830835819244385,
      "learning_rate": 8.093812984882026e-05,
      "loss": 2.7059,
      "step": 175239
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.6396284103393555,
      "learning_rate": 8.093533508749385e-05,
      "loss": 3.1165,
      "step": 175240
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.422672748565674,
      "learning_rate": 8.093254036689564e-05,
      "loss": 3.0564,
      "step": 175241
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.88279390335083,
      "learning_rate": 8.09297456870263e-05,
      "loss": 2.8994,
      "step": 175242
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.203878879547119,
      "learning_rate": 8.092695104788616e-05,
      "loss": 3.1022,
      "step": 175243
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8010952472686768,
      "learning_rate": 8.092415644947601e-05,
      "loss": 2.8413,
      "step": 175244
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.2536861896514893,
      "learning_rate": 8.092136189179601e-05,
      "loss": 2.9684,
      "step": 175245
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.569934129714966,
      "learning_rate": 8.091856737484696e-05,
      "loss": 2.9718,
      "step": 175246
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.865776777267456,
      "learning_rate": 8.091577289862918e-05,
      "loss": 2.7519,
      "step": 175247
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.651318311691284,
      "learning_rate": 8.09129784631434e-05,
      "loss": 2.9197,
      "step": 175248
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.716956615447998,
      "learning_rate": 8.091018406838993e-05,
      "loss": 2.9471,
      "step": 175249
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.009028673171997,
      "learning_rate": 8.09073897143696e-05,
      "loss": 2.6878,
      "step": 175250
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8864033222198486,
      "learning_rate": 8.09045954010825e-05,
      "loss": 3.347,
      "step": 175251
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.566012144088745,
      "learning_rate": 8.09018011285295e-05,
      "loss": 2.9279,
      "step": 175252
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.389777183532715,
      "learning_rate": 8.089900689671088e-05,
      "loss": 2.7344,
      "step": 175253
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.358964681625366,
      "learning_rate": 8.089621270562736e-05,
      "loss": 3.1853,
      "step": 175254
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.647265672683716,
      "learning_rate": 8.089341855527926e-05,
      "loss": 2.9972,
      "step": 175255
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7964141368865967,
      "learning_rate": 8.089062444566742e-05,
      "loss": 2.8307,
      "step": 175256
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.3362741470336914,
      "learning_rate": 8.088783037679199e-05,
      "loss": 2.5187,
      "step": 175257
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5224597454071045,
      "learning_rate": 8.08850363486537e-05,
      "loss": 3.0788,
      "step": 175258
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.09769868850708,
      "learning_rate": 8.088224236125292e-05,
      "loss": 2.7073,
      "step": 175259
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.939046382904053,
      "learning_rate": 8.08794484145904e-05,
      "loss": 3.3699,
      "step": 175260
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.2025983333587646,
      "learning_rate": 8.087665450866641e-05,
      "loss": 2.932,
      "step": 175261
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.7580173015594482,
      "learning_rate": 8.087386064348178e-05,
      "loss": 3.0689,
      "step": 175262
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.745393991470337,
      "learning_rate": 8.087106681903662e-05,
      "loss": 2.9579,
      "step": 175263
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.157219171524048,
      "learning_rate": 8.086827303533178e-05,
      "loss": 2.9442,
      "step": 175264
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6294045448303223,
      "learning_rate": 8.086547929236757e-05,
      "loss": 3.161,
      "step": 175265
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.653705358505249,
      "learning_rate": 8.086268559014471e-05,
      "loss": 2.9217,
      "step": 175266
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.60436749458313,
      "learning_rate": 8.08598919286635e-05,
      "loss": 2.6679,
      "step": 175267
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.651954412460327,
      "learning_rate": 8.085709830792479e-05,
      "loss": 2.9284,
      "step": 175268
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9724504947662354,
      "learning_rate": 8.085430472792866e-05,
      "loss": 2.9385,
      "step": 175269
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4334559440612793,
      "learning_rate": 8.085151118867597e-05,
      "loss": 2.9444,
      "step": 175270
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.916184425354004,
      "learning_rate": 8.0848717690167e-05,
      "loss": 2.8373,
      "step": 175271
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.703916549682617,
      "learning_rate": 8.084592423240253e-05,
      "loss": 2.904,
      "step": 175272
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6724228858947754,
      "learning_rate": 8.084313081538279e-05,
      "loss": 2.9058,
      "step": 175273
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.113981008529663,
      "learning_rate": 8.084033743910867e-05,
      "loss": 2.8391,
      "step": 175274
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7098581790924072,
      "learning_rate": 8.083754410358029e-05,
      "loss": 2.6937,
      "step": 175275
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.820505142211914,
      "learning_rate": 8.083475080879841e-05,
      "loss": 3.0482,
      "step": 175276
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8562514781951904,
      "learning_rate": 8.08319575547634e-05,
      "loss": 2.7949,
      "step": 175277
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4251489639282227,
      "learning_rate": 8.082916434147597e-05,
      "loss": 3.0238,
      "step": 175278
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.21777081489563,
      "learning_rate": 8.082637116893645e-05,
      "loss": 2.9907,
      "step": 175279
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1034717559814453,
      "learning_rate": 8.082357803714562e-05,
      "loss": 2.8399,
      "step": 175280
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.632490396499634,
      "learning_rate": 8.082078494610363e-05,
      "loss": 3.1023,
      "step": 175281
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.252072811126709,
      "learning_rate": 8.081799189581129e-05,
      "loss": 2.9178,
      "step": 175282
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.174576759338379,
      "learning_rate": 8.081519888626894e-05,
      "loss": 2.8479,
      "step": 175283
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.7680301666259766,
      "learning_rate": 8.081240591747727e-05,
      "loss": 2.8962,
      "step": 175284
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8906233310699463,
      "learning_rate": 8.080961298943663e-05,
      "loss": 3.0657,
      "step": 175285
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.308683395385742,
      "learning_rate": 8.080682010214769e-05,
      "loss": 2.8729,
      "step": 175286
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.305694341659546,
      "learning_rate": 8.080402725561092e-05,
      "loss": 2.8856,
      "step": 175287
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.362666130065918,
      "learning_rate": 8.080123444982681e-05,
      "loss": 2.9096,
      "step": 175288
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0078036785125732,
      "learning_rate": 8.079844168479577e-05,
      "loss": 2.7791,
      "step": 175289
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.255065679550171,
      "learning_rate": 8.079564896051857e-05,
      "loss": 2.6777,
      "step": 175290
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.375114679336548,
      "learning_rate": 8.079285627699547e-05,
      "loss": 3.0324,
      "step": 175291
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.628157615661621,
      "learning_rate": 8.079006363422725e-05,
      "loss": 2.8598,
      "step": 175292
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.283031940460205,
      "learning_rate": 8.078727103221427e-05,
      "loss": 2.8657,
      "step": 175293
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.867809772491455,
      "learning_rate": 8.078447847095707e-05,
      "loss": 3.0721,
      "step": 175294
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.522780656814575,
      "learning_rate": 8.07816859504561e-05,
      "loss": 2.9247,
      "step": 175295
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5796239376068115,
      "learning_rate": 8.077889347071203e-05,
      "loss": 2.8283,
      "step": 175296
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.514666795730591,
      "learning_rate": 8.077610103172521e-05,
      "loss": 2.9144,
      "step": 175297
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1294918060302734,
      "learning_rate": 8.077330863349633e-05,
      "loss": 2.8253,
      "step": 175298
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6384658813476562,
      "learning_rate": 8.077051627602585e-05,
      "loss": 3.0115,
      "step": 175299
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6416373252868652,
      "learning_rate": 8.07677239593142e-05,
      "loss": 3.0764,
      "step": 175300
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.085003137588501,
      "learning_rate": 8.076493168336201e-05,
      "loss": 2.961,
      "step": 175301
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7181265354156494,
      "learning_rate": 8.076213944816978e-05,
      "loss": 2.9498,
      "step": 175302
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.80051851272583,
      "learning_rate": 8.075934725373795e-05,
      "loss": 2.8233,
      "step": 175303
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5546226501464844,
      "learning_rate": 8.075655510006715e-05,
      "loss": 2.9596,
      "step": 175304
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0805904865264893,
      "learning_rate": 8.075376298715783e-05,
      "loss": 2.8755,
      "step": 175305
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.684591054916382,
      "learning_rate": 8.075097091501044e-05,
      "loss": 2.9224,
      "step": 175306
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.686168909072876,
      "learning_rate": 8.074817888362569e-05,
      "loss": 3.1193,
      "step": 175307
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1277658939361572,
      "learning_rate": 8.074538689300399e-05,
      "loss": 2.9519,
      "step": 175308
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6786375045776367,
      "learning_rate": 8.07425949431458e-05,
      "loss": 2.8237,
      "step": 175309
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8967080116271973,
      "learning_rate": 8.073980303405174e-05,
      "loss": 3.1177,
      "step": 175310
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8086276054382324,
      "learning_rate": 8.073701116572231e-05,
      "loss": 3.048,
      "step": 175311
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.774690628051758,
      "learning_rate": 8.073421933815792e-05,
      "loss": 2.875,
      "step": 175312
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4028079509735107,
      "learning_rate": 8.073142755135925e-05,
      "loss": 2.7835,
      "step": 175313
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8464348316192627,
      "learning_rate": 8.072863580532667e-05,
      "loss": 2.8653,
      "step": 175314
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.906778335571289,
      "learning_rate": 8.072584410006088e-05,
      "loss": 3.128,
      "step": 175315
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.3850319385528564,
      "learning_rate": 8.072305243556227e-05,
      "loss": 2.8945,
      "step": 175316
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7464096546173096,
      "learning_rate": 8.072026081183143e-05,
      "loss": 2.8357,
      "step": 175317
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6659483909606934,
      "learning_rate": 8.07174692288687e-05,
      "loss": 2.9861,
      "step": 175318
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.3638193607330322,
      "learning_rate": 8.071467768667483e-05,
      "loss": 3.0011,
      "step": 175319
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6535544395446777,
      "learning_rate": 8.071188618525015e-05,
      "loss": 3.195,
      "step": 175320
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.2810444831848145,
      "learning_rate": 8.070909472459536e-05,
      "loss": 2.7914,
      "step": 175321
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.2858142852783203,
      "learning_rate": 8.070630330471078e-05,
      "loss": 2.9029,
      "step": 175322
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8951430320739746,
      "learning_rate": 8.070351192559727e-05,
      "loss": 2.8086,
      "step": 175323
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8001554012298584,
      "learning_rate": 8.07007205872549e-05,
      "loss": 3.178,
      "step": 175324
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.4034230709075928,
      "learning_rate": 8.069792928968449e-05,
      "loss": 2.9214,
      "step": 175325
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9003512859344482,
      "learning_rate": 8.069513803288639e-05,
      "loss": 2.7263,
      "step": 175326
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.731621265411377,
      "learning_rate": 8.069234681686131e-05,
      "loss": 2.8621,
      "step": 175327
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.619683265686035,
      "learning_rate": 8.068955564160955e-05,
      "loss": 2.8878,
      "step": 175328
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5851516723632812,
      "learning_rate": 8.068676450713194e-05,
      "loss": 3.0702,
      "step": 175329
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.143543004989624,
      "learning_rate": 8.068397341342861e-05,
      "loss": 3.0785,
      "step": 175330
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7862095832824707,
      "learning_rate": 8.068118236050034e-05,
      "loss": 2.9977,
      "step": 175331
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.809170722961426,
      "learning_rate": 8.067839134834752e-05,
      "loss": 3.1035,
      "step": 175332
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.422891616821289,
      "learning_rate": 8.06756003769708e-05,
      "loss": 3.0173,
      "step": 175333
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.6332826614379883,
      "learning_rate": 8.067280944637054e-05,
      "loss": 3.0119,
      "step": 175334
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.380725145339966,
      "learning_rate": 8.067001855654753e-05,
      "loss": 2.8257,
      "step": 175335
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.42533540725708,
      "learning_rate": 8.066722770750193e-05,
      "loss": 3.0001,
      "step": 175336
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.2626092433929443,
      "learning_rate": 8.066443689923452e-05,
      "loss": 2.8872,
      "step": 175337
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4694437980651855,
      "learning_rate": 8.066164613174562e-05,
      "loss": 2.7998,
      "step": 175338
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.7226171493530273,
      "learning_rate": 8.065885540503596e-05,
      "loss": 3.0736,
      "step": 175339
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.3966643810272217,
      "learning_rate": 8.065606471910588e-05,
      "loss": 3.1667,
      "step": 175340
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.7098779678344727,
      "learning_rate": 8.065327407395616e-05,
      "loss": 2.7597,
      "step": 175341
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.5923051834106445,
      "learning_rate": 8.065048346958694e-05,
      "loss": 3.1295,
      "step": 175342
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.943485975265503,
      "learning_rate": 8.064769290599903e-05,
      "loss": 2.9327,
      "step": 175343
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.71140718460083,
      "learning_rate": 8.06449023831928e-05,
      "loss": 2.8596,
      "step": 175344
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7742505073547363,
      "learning_rate": 8.064211190116888e-05,
      "loss": 2.9903,
      "step": 175345
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.601060390472412,
      "learning_rate": 8.063932145992764e-05,
      "loss": 2.9461,
      "step": 175346
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0880215167999268,
      "learning_rate": 8.063653105946988e-05,
      "loss": 2.9287,
      "step": 175347
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6143195629119873,
      "learning_rate": 8.063374069979573e-05,
      "loss": 3.0378,
      "step": 175348
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.23616099357605,
      "learning_rate": 8.063095038090602e-05,
      "loss": 3.0548,
      "step": 175349
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.614572525024414,
      "learning_rate": 8.062816010280104e-05,
      "loss": 2.8839,
      "step": 175350
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.447317600250244,
      "learning_rate": 8.062536986548155e-05,
      "loss": 3.0616,
      "step": 175351
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.939661741256714,
      "learning_rate": 8.062257966894787e-05,
      "loss": 3.0335,
      "step": 175352
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.7700953483581543,
      "learning_rate": 8.061978951320072e-05,
      "loss": 3.0046,
      "step": 175353
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.624342203140259,
      "learning_rate": 8.061699939824034e-05,
      "loss": 2.9166,
      "step": 175354
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5880401134490967,
      "learning_rate": 8.061420932406748e-05,
      "loss": 2.9856,
      "step": 175355
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.235687732696533,
      "learning_rate": 8.061141929068253e-05,
      "loss": 2.9298,
      "step": 175356
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.5051755905151367,
      "learning_rate": 8.06086292980861e-05,
      "loss": 3.1606,
      "step": 175357
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.421377658843994,
      "learning_rate": 8.060583934627859e-05,
      "loss": 2.7182,
      "step": 175358
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5393264293670654,
      "learning_rate": 8.060304943526068e-05,
      "loss": 2.9623,
      "step": 175359
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.9123096466064453,
      "learning_rate": 8.06002595650328e-05,
      "loss": 2.979,
      "step": 175360
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.970902681350708,
      "learning_rate": 8.059746973559551e-05,
      "loss": 3.0132,
      "step": 175361
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0734269618988037,
      "learning_rate": 8.059467994694918e-05,
      "loss": 2.7972,
      "step": 175362
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.0027053356170654,
      "learning_rate": 8.059189019909452e-05,
      "loss": 2.8346,
      "step": 175363
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1202311515808105,
      "learning_rate": 8.05891004920319e-05,
      "loss": 2.9894,
      "step": 175364
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6769394874572754,
      "learning_rate": 8.058631082576198e-05,
      "loss": 3.0738,
      "step": 175365
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.2200117111206055,
      "learning_rate": 8.058352120028523e-05,
      "loss": 3.0219,
      "step": 175366
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.375556468963623,
      "learning_rate": 8.058073161560212e-05,
      "loss": 2.6957,
      "step": 175367
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.578930616378784,
      "learning_rate": 8.057794207171313e-05,
      "loss": 3.0322,
      "step": 175368
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.171762466430664,
      "learning_rate": 8.057515256861888e-05,
      "loss": 2.8678,
      "step": 175369
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.697326421737671,
      "learning_rate": 8.05723631063198e-05,
      "loss": 2.8688,
      "step": 175370
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.81816029548645,
      "learning_rate": 8.056957368481655e-05,
      "loss": 2.8167,
      "step": 175371
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4982383251190186,
      "learning_rate": 8.056678430410957e-05,
      "loss": 2.8536,
      "step": 175372
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.321218490600586,
      "learning_rate": 8.056399496419936e-05,
      "loss": 3.1162,
      "step": 175373
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1776628494262695,
      "learning_rate": 8.056120566508635e-05,
      "loss": 3.0191,
      "step": 175374
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.369473695755005,
      "learning_rate": 8.055841640677123e-05,
      "loss": 2.9052,
      "step": 175375
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.439790725708008,
      "learning_rate": 8.055562718925435e-05,
      "loss": 2.8374,
      "step": 175376
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.2505195140838623,
      "learning_rate": 8.055283801253642e-05,
      "loss": 3.0303,
      "step": 175377
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.953444004058838,
      "learning_rate": 8.055004887661786e-05,
      "loss": 2.8223,
      "step": 175378
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.777526378631592,
      "learning_rate": 8.054725978149921e-05,
      "loss": 3.1088,
      "step": 175379
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.519245147705078,
      "learning_rate": 8.054447072718087e-05,
      "loss": 2.7954,
      "step": 175380
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6229825019836426,
      "learning_rate": 8.054168171366355e-05,
      "loss": 2.8542,
      "step": 175381
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.87969708442688,
      "learning_rate": 8.053889274094754e-05,
      "loss": 3.2025,
      "step": 175382
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.298046827316284,
      "learning_rate": 8.053610380903362e-05,
      "loss": 2.9316,
      "step": 175383
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4763147830963135,
      "learning_rate": 8.053331491792218e-05,
      "loss": 2.7404,
      "step": 175384
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9237966537475586,
      "learning_rate": 8.053052606761363e-05,
      "loss": 2.9074,
      "step": 175385
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.556058168411255,
      "learning_rate": 8.05277372581087e-05,
      "loss": 3.0315,
      "step": 175386
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.812786340713501,
      "learning_rate": 8.052494848940783e-05,
      "loss": 2.7463,
      "step": 175387
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5342702865600586,
      "learning_rate": 8.052215976151138e-05,
      "loss": 3.0523,
      "step": 175388
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1665868759155273,
      "learning_rate": 8.051937107442014e-05,
      "loss": 2.9546,
      "step": 175389
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6097798347473145,
      "learning_rate": 8.051658242813445e-05,
      "loss": 3.2321,
      "step": 175390
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5547032356262207,
      "learning_rate": 8.051379382265478e-05,
      "loss": 3.0459,
      "step": 175391
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.315223217010498,
      "learning_rate": 8.051100525798188e-05,
      "loss": 2.9909,
      "step": 175392
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.730693817138672,
      "learning_rate": 8.05082167341161e-05,
      "loss": 2.964,
      "step": 175393
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.874419689178467,
      "learning_rate": 8.050542825105787e-05,
      "loss": 3.1119,
      "step": 175394
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1624653339385986,
      "learning_rate": 8.050263980880794e-05,
      "loss": 3.2675,
      "step": 175395
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.9089560508728027,
      "learning_rate": 8.049985140736669e-05,
      "loss": 2.9375,
      "step": 175396
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.719212293624878,
      "learning_rate": 8.04970630467346e-05,
      "loss": 2.8022,
      "step": 175397
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.877082586288452,
      "learning_rate": 8.049427472691233e-05,
      "loss": 2.8734,
      "step": 175398
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.8279709815979004,
      "learning_rate": 8.049148644790022e-05,
      "loss": 2.7338,
      "step": 175399
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.888981342315674,
      "learning_rate": 8.048869820969897e-05,
      "loss": 2.7489,
      "step": 175400
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.396200656890869,
      "learning_rate": 8.048591001230902e-05,
      "loss": 3.0336,
      "step": 175401
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1124765872955322,
      "learning_rate": 8.048312185573092e-05,
      "loss": 3.1235,
      "step": 175402
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.694622278213501,
      "learning_rate": 8.048033373996501e-05,
      "loss": 2.6285,
      "step": 175403
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.271528482437134,
      "learning_rate": 8.047754566501207e-05,
      "loss": 2.9058,
      "step": 175404
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.447227716445923,
      "learning_rate": 8.047475763087239e-05,
      "loss": 2.7266,
      "step": 175405
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.3469889163970947,
      "learning_rate": 8.047196963754668e-05,
      "loss": 2.9937,
      "step": 175406
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.507277488708496,
      "learning_rate": 8.04691816850354e-05,
      "loss": 2.9211,
      "step": 175407
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4129769802093506,
      "learning_rate": 8.046639377333903e-05,
      "loss": 2.838,
      "step": 175408
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6863462924957275,
      "learning_rate": 8.046360590245801e-05,
      "loss": 3.1227,
      "step": 175409
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.9607291221618652,
      "learning_rate": 8.046081807239306e-05,
      "loss": 3.0847,
      "step": 175410
    },
    {
      "epoch": 2.28,
      "grad_norm": 5.119506359100342,
      "learning_rate": 8.045803028314448e-05,
      "loss": 3.0596,
      "step": 175411
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1306188106536865,
      "learning_rate": 8.045524253471298e-05,
      "loss": 3.04,
      "step": 175412
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7119927406311035,
      "learning_rate": 8.045245482709892e-05,
      "loss": 3.0095,
      "step": 175413
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.8975913524627686,
      "learning_rate": 8.044966716030305e-05,
      "loss": 2.9777,
      "step": 175414
    },
    {
      "epoch": 2.28,
      "grad_norm": 5.196398735046387,
      "learning_rate": 8.044687953432557e-05,
      "loss": 2.8353,
      "step": 175415
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.774050712585449,
      "learning_rate": 8.044409194916724e-05,
      "loss": 2.7615,
      "step": 175416
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.192901849746704,
      "learning_rate": 8.044130440482841e-05,
      "loss": 2.7462,
      "step": 175417
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8700575828552246,
      "learning_rate": 8.04385169013098e-05,
      "loss": 3.1513,
      "step": 175418
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1759889125823975,
      "learning_rate": 8.043572943861171e-05,
      "loss": 3.1157,
      "step": 175419
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.856419086456299,
      "learning_rate": 8.043294201673493e-05,
      "loss": 2.6605,
      "step": 175420
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.2497215270996094,
      "learning_rate": 8.043015463567963e-05,
      "loss": 3.0341,
      "step": 175421
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9066896438598633,
      "learning_rate": 8.04273672954466e-05,
      "loss": 2.9006,
      "step": 175422
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.5335710048675537,
      "learning_rate": 8.042457999603619e-05,
      "loss": 2.9066,
      "step": 175423
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.442791700363159,
      "learning_rate": 8.042179273744908e-05,
      "loss": 2.7208,
      "step": 175424
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.905421257019043,
      "learning_rate": 8.04190055196856e-05,
      "loss": 2.9745,
      "step": 175425
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.527161121368408,
      "learning_rate": 8.041621834274648e-05,
      "loss": 3.0339,
      "step": 175426
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.1772139072418213,
      "learning_rate": 8.041343120663212e-05,
      "loss": 2.8662,
      "step": 175427
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.459120988845825,
      "learning_rate": 8.041064411134306e-05,
      "loss": 2.8669,
      "step": 175428
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.803499698638916,
      "learning_rate": 8.040785705687971e-05,
      "loss": 2.8585,
      "step": 175429
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.3756942749023438,
      "learning_rate": 8.040507004324277e-05,
      "loss": 3.078,
      "step": 175430
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.2795073986053467,
      "learning_rate": 8.040228307043257e-05,
      "loss": 2.9308,
      "step": 175431
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.669722080230713,
      "learning_rate": 8.039949613844985e-05,
      "loss": 2.8217,
      "step": 175432
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.189277410507202,
      "learning_rate": 8.0396709247295e-05,
      "loss": 3.0319,
      "step": 175433
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5406506061553955,
      "learning_rate": 8.039392239696853e-05,
      "loss": 2.8873,
      "step": 175434
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5273725986480713,
      "learning_rate": 8.03911355874709e-05,
      "loss": 2.9716,
      "step": 175435
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.091237783432007,
      "learning_rate": 8.03883488188028e-05,
      "loss": 3.0895,
      "step": 175436
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4028267860412598,
      "learning_rate": 8.038556209096454e-05,
      "loss": 2.9529,
      "step": 175437
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.4904518127441406,
      "learning_rate": 8.038277540395685e-05,
      "loss": 2.965,
      "step": 175438
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9937667846679688,
      "learning_rate": 8.037998875778014e-05,
      "loss": 2.8014,
      "step": 175439
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.3280348777770996,
      "learning_rate": 8.037720215243498e-05,
      "loss": 2.8879,
      "step": 175440
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.654836654663086,
      "learning_rate": 8.03744155879217e-05,
      "loss": 3.0871,
      "step": 175441
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.484880208969116,
      "learning_rate": 8.037162906424107e-05,
      "loss": 2.929,
      "step": 175442
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.446558713912964,
      "learning_rate": 8.036884258139341e-05,
      "loss": 2.9951,
      "step": 175443
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5588455200195312,
      "learning_rate": 8.036605613937942e-05,
      "loss": 2.8528,
      "step": 175444
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.06000018119812,
      "learning_rate": 8.036326973819953e-05,
      "loss": 2.8992,
      "step": 175445
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5193867683410645,
      "learning_rate": 8.036048337785425e-05,
      "loss": 2.9708,
      "step": 175446
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.949427366256714,
      "learning_rate": 8.035769705834399e-05,
      "loss": 2.7388,
      "step": 175447
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.662672281265259,
      "learning_rate": 8.035491077966951e-05,
      "loss": 2.9504,
      "step": 175448
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.948242425918579,
      "learning_rate": 8.035212454183109e-05,
      "loss": 2.8991,
      "step": 175449
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7808401584625244,
      "learning_rate": 8.034933834482945e-05,
      "loss": 2.9205,
      "step": 175450
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.094374656677246,
      "learning_rate": 8.0346552188665e-05,
      "loss": 2.834,
      "step": 175451
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6161301136016846,
      "learning_rate": 8.034376607333828e-05,
      "loss": 2.8666,
      "step": 175452
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5622007846832275,
      "learning_rate": 8.034097999884971e-05,
      "loss": 3.0098,
      "step": 175453
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.6381664276123047,
      "learning_rate": 8.03381939652e-05,
      "loss": 2.778,
      "step": 175454
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.3536202907562256,
      "learning_rate": 8.033540797238948e-05,
      "loss": 2.7816,
      "step": 175455
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9953999519348145,
      "learning_rate": 8.033262202041882e-05,
      "loss": 2.8388,
      "step": 175456
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6073849201202393,
      "learning_rate": 8.032983610928848e-05,
      "loss": 2.8136,
      "step": 175457
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6715869903564453,
      "learning_rate": 8.032705023899897e-05,
      "loss": 2.8217,
      "step": 175458
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.9173009395599365,
      "learning_rate": 8.032426440955072e-05,
      "loss": 3.1206,
      "step": 175459
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.258840322494507,
      "learning_rate": 8.032147862094442e-05,
      "loss": 3.1621,
      "step": 175460
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.289965629577637,
      "learning_rate": 8.031869287318042e-05,
      "loss": 2.9262,
      "step": 175461
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6390981674194336,
      "learning_rate": 8.031590716625942e-05,
      "loss": 2.8953,
      "step": 175462
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5319948196411133,
      "learning_rate": 8.031312150018184e-05,
      "loss": 3.0053,
      "step": 175463
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9156429767608643,
      "learning_rate": 8.03103358749482e-05,
      "loss": 3.0754,
      "step": 175464
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.2667431831359863,
      "learning_rate": 8.03075502905589e-05,
      "loss": 3.0498,
      "step": 175465
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.413080215454102,
      "learning_rate": 8.030476474701468e-05,
      "loss": 3.1105,
      "step": 175466
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.845078706741333,
      "learning_rate": 8.030197924431589e-05,
      "loss": 3.1236,
      "step": 175467
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.8981165885925293,
      "learning_rate": 8.029919378246315e-05,
      "loss": 3.1171,
      "step": 175468
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.542630195617676,
      "learning_rate": 8.029640836145696e-05,
      "loss": 3.1224,
      "step": 175469
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.036180257797241,
      "learning_rate": 8.029362298129781e-05,
      "loss": 2.6876,
      "step": 175470
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.4191253185272217,
      "learning_rate": 8.029083764198612e-05,
      "loss": 2.9418,
      "step": 175471
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.9066922664642334,
      "learning_rate": 8.028805234352261e-05,
      "loss": 2.9006,
      "step": 175472
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.2825026512146,
      "learning_rate": 8.028526708590764e-05,
      "loss": 2.826,
      "step": 175473
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.7448198795318604,
      "learning_rate": 8.028248186914184e-05,
      "loss": 3.0306,
      "step": 175474
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.3190436363220215,
      "learning_rate": 8.027969669322567e-05,
      "loss": 2.7506,
      "step": 175475
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.4739983081817627,
      "learning_rate": 8.027691155815958e-05,
      "loss": 2.9718,
      "step": 175476
    },
    {
      "epoch": 2.28,
      "grad_norm": 6.156233787536621,
      "learning_rate": 8.027412646394425e-05,
      "loss": 2.8189,
      "step": 175477
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.6539199352264404,
      "learning_rate": 8.02713414105801e-05,
      "loss": 2.7958,
      "step": 175478
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.273510932922363,
      "learning_rate": 8.026855639806759e-05,
      "loss": 2.9834,
      "step": 175479
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.640613317489624,
      "learning_rate": 8.026577142640736e-05,
      "loss": 2.9549,
      "step": 175480
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.9111695289611816,
      "learning_rate": 8.026298649559987e-05,
      "loss": 3.1512,
      "step": 175481
    },
    {
      "epoch": 2.28,
      "grad_norm": 3.8921971321105957,
      "learning_rate": 8.026020160564556e-05,
      "loss": 3.0553,
      "step": 175482
    },
    {
      "epoch": 2.28,
      "grad_norm": 4.708421230316162,
      "learning_rate": 8.025741675654513e-05,
      "loss": 3.0113,
      "step": 175483
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5628628730773926,
      "learning_rate": 8.02546319482989e-05,
      "loss": 2.9155,
      "step": 175484
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.418612003326416,
      "learning_rate": 8.025184718090755e-05,
      "loss": 3.0418,
      "step": 175485
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.424586057662964,
      "learning_rate": 8.024906245437155e-05,
      "loss": 3.1488,
      "step": 175486
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.388786554336548,
      "learning_rate": 8.024627776869141e-05,
      "loss": 3.0989,
      "step": 175487
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.6604976654052734,
      "learning_rate": 8.02434931238675e-05,
      "loss": 2.9483,
      "step": 175488
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.377082586288452,
      "learning_rate": 8.02407085199006e-05,
      "loss": 3.0408,
      "step": 175489
    },
    {
      "epoch": 2.28,
      "grad_norm": 2.5704360008239746,
      "learning_rate": 8.023792395679097e-05,
      "loss": 2.9609,
      "step": 175490
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.136472225189209,
      "learning_rate": 8.02351394345394e-05,
      "loss": 2.9255,
      "step": 175491
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3308451175689697,
      "learning_rate": 8.023235495314623e-05,
      "loss": 2.9413,
      "step": 175492
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.875349998474121,
      "learning_rate": 8.022957051261196e-05,
      "loss": 3.1433,
      "step": 175493
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8839738368988037,
      "learning_rate": 8.02267861129372e-05,
      "loss": 2.9885,
      "step": 175494
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.016507625579834,
      "learning_rate": 8.022400175412245e-05,
      "loss": 2.8668,
      "step": 175495
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.2684738636016846,
      "learning_rate": 8.022121743616817e-05,
      "loss": 2.9897,
      "step": 175496
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.806861639022827,
      "learning_rate": 8.021843315907494e-05,
      "loss": 3.0668,
      "step": 175497
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7181527614593506,
      "learning_rate": 8.021564892284318e-05,
      "loss": 2.8549,
      "step": 175498
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6648614406585693,
      "learning_rate": 8.021286472747356e-05,
      "loss": 2.8352,
      "step": 175499
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.605790376663208,
      "learning_rate": 8.021008057296652e-05,
      "loss": 3.1977,
      "step": 175500
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9863662719726562,
      "learning_rate": 8.020729645932259e-05,
      "loss": 2.9555,
      "step": 175501
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7983815670013428,
      "learning_rate": 8.020451238654218e-05,
      "loss": 2.7188,
      "step": 175502
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4677202701568604,
      "learning_rate": 8.020172835462597e-05,
      "loss": 2.9147,
      "step": 175503
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8566079139709473,
      "learning_rate": 8.019894436357435e-05,
      "loss": 2.9973,
      "step": 175504
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6194417476654053,
      "learning_rate": 8.019616041338797e-05,
      "loss": 3.0057,
      "step": 175505
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7630205154418945,
      "learning_rate": 8.019337650406726e-05,
      "loss": 2.9226,
      "step": 175506
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3282368183135986,
      "learning_rate": 8.019059263561277e-05,
      "loss": 3.0681,
      "step": 175507
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3756706714630127,
      "learning_rate": 8.018780880802489e-05,
      "loss": 2.9666,
      "step": 175508
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8474388122558594,
      "learning_rate": 8.018502502130436e-05,
      "loss": 3.1162,
      "step": 175509
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6036460399627686,
      "learning_rate": 8.018224127545145e-05,
      "loss": 2.7446,
      "step": 175510
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7035653591156006,
      "learning_rate": 8.017945757046693e-05,
      "loss": 2.998,
      "step": 175511
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5200862884521484,
      "learning_rate": 8.017667390635121e-05,
      "loss": 3.0847,
      "step": 175512
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4658496379852295,
      "learning_rate": 8.017389028310479e-05,
      "loss": 3.2158,
      "step": 175513
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.794797897338867,
      "learning_rate": 8.017110670072809e-05,
      "loss": 2.936,
      "step": 175514
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.823549747467041,
      "learning_rate": 8.016832315922183e-05,
      "loss": 2.9587,
      "step": 175515
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.157989263534546,
      "learning_rate": 8.016553965858635e-05,
      "loss": 2.9582,
      "step": 175516
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.333468198776245,
      "learning_rate": 8.016275619882229e-05,
      "loss": 2.8453,
      "step": 175517
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5741264820098877,
      "learning_rate": 8.015997277993017e-05,
      "loss": 3.1712,
      "step": 175518
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.61948299407959,
      "learning_rate": 8.015718940191045e-05,
      "loss": 2.9124,
      "step": 175519
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.538933277130127,
      "learning_rate": 8.015440606476357e-05,
      "loss": 3.0037,
      "step": 175520
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7836902141571045,
      "learning_rate": 8.01516227684902e-05,
      "loss": 3.0176,
      "step": 175521
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.754478931427002,
      "learning_rate": 8.014883951309072e-05,
      "loss": 2.9751,
      "step": 175522
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.698261260986328,
      "learning_rate": 8.014605629856584e-05,
      "loss": 2.9058,
      "step": 175523
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.6252644062042236,
      "learning_rate": 8.014327312491593e-05,
      "loss": 2.8329,
      "step": 175524
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.675692081451416,
      "learning_rate": 8.014048999214152e-05,
      "loss": 2.9005,
      "step": 175525
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.045992612838745,
      "learning_rate": 8.013770690024308e-05,
      "loss": 2.7833,
      "step": 175526
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7785305976867676,
      "learning_rate": 8.013492384922127e-05,
      "loss": 2.853,
      "step": 175527
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.681494951248169,
      "learning_rate": 8.013214083907644e-05,
      "loss": 3.0358,
      "step": 175528
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9305124282836914,
      "learning_rate": 8.012935786980927e-05,
      "loss": 3.1299,
      "step": 175529
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4420151710510254,
      "learning_rate": 8.012657494142024e-05,
      "loss": 2.8668,
      "step": 175530
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5429539680480957,
      "learning_rate": 8.012379205390981e-05,
      "loss": 2.88,
      "step": 175531
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6221535205841064,
      "learning_rate": 8.012100920727841e-05,
      "loss": 2.5384,
      "step": 175532
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.480336904525757,
      "learning_rate": 8.011822640152678e-05,
      "loss": 2.7934,
      "step": 175533
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3954176902770996,
      "learning_rate": 8.01154436366552e-05,
      "loss": 2.9376,
      "step": 175534
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9309749603271484,
      "learning_rate": 8.011266091266443e-05,
      "loss": 3.0583,
      "step": 175535
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6166739463806152,
      "learning_rate": 8.010987822955486e-05,
      "loss": 2.8178,
      "step": 175536
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.655857801437378,
      "learning_rate": 8.010709558732703e-05,
      "loss": 2.9846,
      "step": 175537
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5392115116119385,
      "learning_rate": 8.010431298598132e-05,
      "loss": 2.9537,
      "step": 175538
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.387127161026001,
      "learning_rate": 8.010153042551847e-05,
      "loss": 2.9733,
      "step": 175539
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9005486965179443,
      "learning_rate": 8.00987479059388e-05,
      "loss": 2.9137,
      "step": 175540
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.950157642364502,
      "learning_rate": 8.009596542724304e-05,
      "loss": 3.1329,
      "step": 175541
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.329843521118164,
      "learning_rate": 8.009318298943156e-05,
      "loss": 2.834,
      "step": 175542
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4550037384033203,
      "learning_rate": 8.009040059250493e-05,
      "loss": 2.9447,
      "step": 175543
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4472789764404297,
      "learning_rate": 8.008761823646356e-05,
      "loss": 3.029,
      "step": 175544
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4769670963287354,
      "learning_rate": 8.008483592130813e-05,
      "loss": 2.9325,
      "step": 175545
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.740888833999634,
      "learning_rate": 8.0082053647039e-05,
      "loss": 2.8066,
      "step": 175546
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.77457857131958,
      "learning_rate": 8.007927141365684e-05,
      "loss": 2.9604,
      "step": 175547
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.936171054840088,
      "learning_rate": 8.007648922116213e-05,
      "loss": 3.0147,
      "step": 175548
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.976407289505005,
      "learning_rate": 8.00737070695553e-05,
      "loss": 3.0351,
      "step": 175549
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.144796848297119,
      "learning_rate": 8.007092495883686e-05,
      "loss": 3.1141,
      "step": 175550
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.252450942993164,
      "learning_rate": 8.006814288900748e-05,
      "loss": 3.0373,
      "step": 175551
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6762537956237793,
      "learning_rate": 8.006536086006747e-05,
      "loss": 2.8343,
      "step": 175552
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.4653031826019287,
      "learning_rate": 8.006257887201757e-05,
      "loss": 3.2084,
      "step": 175553
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.1290128231048584,
      "learning_rate": 8.00597969248582e-05,
      "loss": 2.5919,
      "step": 175554
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.630558729171753,
      "learning_rate": 8.005701501858986e-05,
      "loss": 2.8476,
      "step": 175555
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.9345173835754395,
      "learning_rate": 8.005423315321297e-05,
      "loss": 2.9816,
      "step": 175556
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.753401756286621,
      "learning_rate": 8.005145132872824e-05,
      "loss": 2.8226,
      "step": 175557
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.4751906394958496,
      "learning_rate": 8.004866954513598e-05,
      "loss": 3.1615,
      "step": 175558
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5321595668792725,
      "learning_rate": 8.004588780243696e-05,
      "loss": 2.7668,
      "step": 175559
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4530797004699707,
      "learning_rate": 8.004310610063147e-05,
      "loss": 2.887,
      "step": 175560
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.867558717727661,
      "learning_rate": 8.004032443972019e-05,
      "loss": 2.9672,
      "step": 175561
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.261749029159546,
      "learning_rate": 8.003754281970357e-05,
      "loss": 2.8024,
      "step": 175562
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.045813083648682,
      "learning_rate": 8.00347612405821e-05,
      "loss": 3.0629,
      "step": 175563
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.527817726135254,
      "learning_rate": 8.003197970235625e-05,
      "loss": 2.9217,
      "step": 175564
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.21718692779541,
      "learning_rate": 8.002919820502672e-05,
      "loss": 2.7304,
      "step": 175565
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.939551830291748,
      "learning_rate": 8.002641674859381e-05,
      "loss": 2.9598,
      "step": 175566
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3358850479125977,
      "learning_rate": 8.002363533305822e-05,
      "loss": 3.0338,
      "step": 175567
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5600061416625977,
      "learning_rate": 8.002085395842038e-05,
      "loss": 2.8184,
      "step": 175568
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.910727024078369,
      "learning_rate": 8.001807262468082e-05,
      "loss": 3.0264,
      "step": 175569
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9862003326416016,
      "learning_rate": 8.001529133184e-05,
      "loss": 2.983,
      "step": 175570
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.615316390991211,
      "learning_rate": 8.001251007989855e-05,
      "loss": 2.871,
      "step": 175571
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9128756523132324,
      "learning_rate": 8.000972886885684e-05,
      "loss": 3.1695,
      "step": 175572
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.1133272647857666,
      "learning_rate": 8.000694769871556e-05,
      "loss": 2.7831,
      "step": 175573
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.478271484375,
      "learning_rate": 8.000416656947515e-05,
      "loss": 2.5649,
      "step": 175574
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4815940856933594,
      "learning_rate": 8.000138548113602e-05,
      "loss": 2.7234,
      "step": 175575
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.18682861328125,
      "learning_rate": 7.999860443369888e-05,
      "loss": 2.7326,
      "step": 175576
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.477954864501953,
      "learning_rate": 7.999582342716416e-05,
      "loss": 2.8533,
      "step": 175577
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.986884593963623,
      "learning_rate": 7.999304246153225e-05,
      "loss": 2.819,
      "step": 175578
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.8752896785736084,
      "learning_rate": 7.999026153680392e-05,
      "loss": 3.092,
      "step": 175579
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.633716106414795,
      "learning_rate": 7.998748065297951e-05,
      "loss": 2.7051,
      "step": 175580
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.8104023933410645,
      "learning_rate": 7.998469981005951e-05,
      "loss": 3.0296,
      "step": 175581
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.775482177734375,
      "learning_rate": 7.998191900804461e-05,
      "loss": 2.8329,
      "step": 175582
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7494661808013916,
      "learning_rate": 7.997913824693514e-05,
      "loss": 2.7797,
      "step": 175583
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.37914776802063,
      "learning_rate": 7.997635752673174e-05,
      "loss": 2.8833,
      "step": 175584
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.6738243103027344,
      "learning_rate": 7.997357684743495e-05,
      "loss": 3.0635,
      "step": 175585
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.010669231414795,
      "learning_rate": 7.997079620904517e-05,
      "loss": 2.8612,
      "step": 175586
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.1305484771728516,
      "learning_rate": 7.996801561156291e-05,
      "loss": 2.9056,
      "step": 175587
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8291101455688477,
      "learning_rate": 7.996523505498886e-05,
      "loss": 2.7641,
      "step": 175588
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.422842502593994,
      "learning_rate": 7.996245453932331e-05,
      "loss": 3.0283,
      "step": 175589
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3808112144470215,
      "learning_rate": 7.9959674064567e-05,
      "loss": 2.8694,
      "step": 175590
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.004908561706543,
      "learning_rate": 7.99568936307203e-05,
      "loss": 2.7733,
      "step": 175591
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.0400609970092773,
      "learning_rate": 7.995411323778383e-05,
      "loss": 3.1005,
      "step": 175592
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3413779735565186,
      "learning_rate": 7.99513328857579e-05,
      "loss": 2.9551,
      "step": 175593
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3676252365112305,
      "learning_rate": 7.994855257464327e-05,
      "loss": 2.9945,
      "step": 175594
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7697696685791016,
      "learning_rate": 7.994577230444028e-05,
      "loss": 2.8652,
      "step": 175595
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.674088954925537,
      "learning_rate": 7.994299207514963e-05,
      "loss": 2.9127,
      "step": 175596
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7185990810394287,
      "learning_rate": 7.994021188677172e-05,
      "loss": 3.0218,
      "step": 175597
    },
    {
      "epoch": 2.29,
      "grad_norm": 5.452264308929443,
      "learning_rate": 7.993743173930707e-05,
      "loss": 2.6694,
      "step": 175598
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.345057249069214,
      "learning_rate": 7.99346516327561e-05,
      "loss": 3.1243,
      "step": 175599
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.394364833831787,
      "learning_rate": 7.993187156711956e-05,
      "loss": 2.769,
      "step": 175600
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8206357955932617,
      "learning_rate": 7.992909154239774e-05,
      "loss": 2.8792,
      "step": 175601
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.552666425704956,
      "learning_rate": 7.992631155859131e-05,
      "loss": 2.8783,
      "step": 175602
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.49812912940979,
      "learning_rate": 7.992353161570076e-05,
      "loss": 2.7836,
      "step": 175603
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6144747734069824,
      "learning_rate": 7.992075171372658e-05,
      "loss": 2.9516,
      "step": 175604
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.2921206951141357,
      "learning_rate": 7.991797185266921e-05,
      "loss": 3.1083,
      "step": 175605
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9158248901367188,
      "learning_rate": 7.991519203252933e-05,
      "loss": 2.9012,
      "step": 175606
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5705220699310303,
      "learning_rate": 7.991241225330726e-05,
      "loss": 3.0369,
      "step": 175607
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.0562644004821777,
      "learning_rate": 7.990963251500372e-05,
      "loss": 2.9591,
      "step": 175608
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.0888142585754395,
      "learning_rate": 7.990685281761915e-05,
      "loss": 3.0178,
      "step": 175609
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.651801109313965,
      "learning_rate": 7.990407316115404e-05,
      "loss": 3.0958,
      "step": 175610
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.946352958679199,
      "learning_rate": 7.99012935456088e-05,
      "loss": 3.0046,
      "step": 175611
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.211442708969116,
      "learning_rate": 7.989851397098421e-05,
      "loss": 3.2142,
      "step": 175612
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.813520908355713,
      "learning_rate": 7.989573443728052e-05,
      "loss": 2.8904,
      "step": 175613
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7001540660858154,
      "learning_rate": 7.989295494449848e-05,
      "loss": 2.8618,
      "step": 175614
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.939692974090576,
      "learning_rate": 7.989017549263844e-05,
      "loss": 2.9699,
      "step": 175615
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8319098949432373,
      "learning_rate": 7.988739608170104e-05,
      "loss": 3.1325,
      "step": 175616
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.939656972885132,
      "learning_rate": 7.98846167116866e-05,
      "loss": 2.695,
      "step": 175617
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.164628267288208,
      "learning_rate": 7.988183738259586e-05,
      "loss": 2.8878,
      "step": 175618
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4778716564178467,
      "learning_rate": 7.987905809442915e-05,
      "loss": 3.0187,
      "step": 175619
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6293492317199707,
      "learning_rate": 7.987627884718717e-05,
      "loss": 3.0418,
      "step": 175620
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.1053531169891357,
      "learning_rate": 7.987349964087032e-05,
      "loss": 3.1336,
      "step": 175621
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.758497714996338,
      "learning_rate": 7.987072047547917e-05,
      "loss": 3.122,
      "step": 175622
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.0152134895324707,
      "learning_rate": 7.986794135101415e-05,
      "loss": 2.806,
      "step": 175623
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.495615005493164,
      "learning_rate": 7.986516226747589e-05,
      "loss": 2.9622,
      "step": 175624
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.139256477355957,
      "learning_rate": 7.986238322486474e-05,
      "loss": 3.0923,
      "step": 175625
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.1509528160095215,
      "learning_rate": 7.985960422318144e-05,
      "loss": 2.8667,
      "step": 175626
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6000163555145264,
      "learning_rate": 7.98568252624263e-05,
      "loss": 2.8041,
      "step": 175627
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3561906814575195,
      "learning_rate": 7.985404634260011e-05,
      "loss": 3.0214,
      "step": 175628
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8186380863189697,
      "learning_rate": 7.985126746370305e-05,
      "loss": 2.9974,
      "step": 175629
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7006289958953857,
      "learning_rate": 7.984848862573585e-05,
      "loss": 3.0783,
      "step": 175630
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.433459758758545,
      "learning_rate": 7.984570982869892e-05,
      "loss": 2.8845,
      "step": 175631
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9847748279571533,
      "learning_rate": 7.98429310725929e-05,
      "loss": 2.6415,
      "step": 175632
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.2756259441375732,
      "learning_rate": 7.984015235741814e-05,
      "loss": 2.9661,
      "step": 175633
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.800450086593628,
      "learning_rate": 7.983737368317544e-05,
      "loss": 3.1034,
      "step": 175634
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5604898929595947,
      "learning_rate": 7.983459504986494e-05,
      "loss": 2.8495,
      "step": 175635
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.343280553817749,
      "learning_rate": 7.983181645748745e-05,
      "loss": 2.8398,
      "step": 175636
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.475518226623535,
      "learning_rate": 7.982903790604326e-05,
      "loss": 3.1276,
      "step": 175637
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5567214488983154,
      "learning_rate": 7.982625939553312e-05,
      "loss": 3.1447,
      "step": 175638
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.564373731613159,
      "learning_rate": 7.982348092595734e-05,
      "loss": 2.9002,
      "step": 175639
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.316840648651123,
      "learning_rate": 7.982070249731671e-05,
      "loss": 2.7261,
      "step": 175640
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.7515292167663574,
      "learning_rate": 7.981792410961141e-05,
      "loss": 3.0022,
      "step": 175641
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7505042552948,
      "learning_rate": 7.981514576284219e-05,
      "loss": 3.0938,
      "step": 175642
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6006996631622314,
      "learning_rate": 7.98123674570094e-05,
      "loss": 2.8795,
      "step": 175643
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.0509185791015625,
      "learning_rate": 7.980958919211373e-05,
      "loss": 2.7094,
      "step": 175644
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.489745855331421,
      "learning_rate": 7.980681096815553e-05,
      "loss": 3.0015,
      "step": 175645
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8329553604125977,
      "learning_rate": 7.980403278513546e-05,
      "loss": 3.0364,
      "step": 175646
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.1926040649414062,
      "learning_rate": 7.980125464305402e-05,
      "loss": 2.9172,
      "step": 175647
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.423206329345703,
      "learning_rate": 7.979847654191168e-05,
      "loss": 3.0395,
      "step": 175648
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6962392330169678,
      "learning_rate": 7.979569848170884e-05,
      "loss": 3.0056,
      "step": 175649
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.347137212753296,
      "learning_rate": 7.979292046244624e-05,
      "loss": 3.2572,
      "step": 175650
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.2415337562561035,
      "learning_rate": 7.979014248412424e-05,
      "loss": 2.8712,
      "step": 175651
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8694400787353516,
      "learning_rate": 7.978736454674343e-05,
      "loss": 3.0185,
      "step": 175652
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8529815673828125,
      "learning_rate": 7.978458665030436e-05,
      "loss": 3.0866,
      "step": 175653
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.391033172607422,
      "learning_rate": 7.978180879480747e-05,
      "loss": 2.9153,
      "step": 175654
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7448065280914307,
      "learning_rate": 7.977903098025322e-05,
      "loss": 3.0707,
      "step": 175655
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6202597618103027,
      "learning_rate": 7.977625320664229e-05,
      "loss": 2.9555,
      "step": 175656
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.795536994934082,
      "learning_rate": 7.977347547397501e-05,
      "loss": 3.0064,
      "step": 175657
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8939404487609863,
      "learning_rate": 7.977069778225208e-05,
      "loss": 2.8848,
      "step": 175658
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.2105963230133057,
      "learning_rate": 7.976792013147397e-05,
      "loss": 2.8857,
      "step": 175659
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8994975090026855,
      "learning_rate": 7.976514252164106e-05,
      "loss": 2.9907,
      "step": 175660
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7620768547058105,
      "learning_rate": 7.976236495275404e-05,
      "loss": 2.9719,
      "step": 175661
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.523024559020996,
      "learning_rate": 7.975958742481335e-05,
      "loss": 2.8392,
      "step": 175662
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.5779173374176025,
      "learning_rate": 7.975680993781945e-05,
      "loss": 3.0071,
      "step": 175663
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.992263317108154,
      "learning_rate": 7.975403249177301e-05,
      "loss": 2.9952,
      "step": 175664
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.152064800262451,
      "learning_rate": 7.975125508667443e-05,
      "loss": 3.2011,
      "step": 175665
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.51348876953125,
      "learning_rate": 7.974847772252413e-05,
      "loss": 3.1241,
      "step": 175666
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.749617576599121,
      "learning_rate": 7.974570039932288e-05,
      "loss": 3.0873,
      "step": 175667
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.921971321105957,
      "learning_rate": 7.974292311707097e-05,
      "loss": 2.779,
      "step": 175668
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.076258659362793,
      "learning_rate": 7.974014587576907e-05,
      "loss": 2.9086,
      "step": 175669
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.290405035018921,
      "learning_rate": 7.973736867541764e-05,
      "loss": 3.0018,
      "step": 175670
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.62735915184021,
      "learning_rate": 7.973459151601723e-05,
      "loss": 3.0284,
      "step": 175671
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.734713554382324,
      "learning_rate": 7.97318143975682e-05,
      "loss": 2.9959,
      "step": 175672
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.1445727348327637,
      "learning_rate": 7.972903732007126e-05,
      "loss": 2.9481,
      "step": 175673
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6040267944335938,
      "learning_rate": 7.972626028352678e-05,
      "loss": 2.889,
      "step": 175674
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.33549165725708,
      "learning_rate": 7.972348328793544e-05,
      "loss": 3.1,
      "step": 175675
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.684295177459717,
      "learning_rate": 7.972070633329765e-05,
      "loss": 2.833,
      "step": 175676
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4466609954833984,
      "learning_rate": 7.971792941961393e-05,
      "loss": 2.855,
      "step": 175677
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.567826986312866,
      "learning_rate": 7.971515254688473e-05,
      "loss": 2.9799,
      "step": 175678
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.829023599624634,
      "learning_rate": 7.971237571511074e-05,
      "loss": 2.911,
      "step": 175679
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3015925884246826,
      "learning_rate": 7.970959892429226e-05,
      "loss": 2.7586,
      "step": 175680
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4971437454223633,
      "learning_rate": 7.970682217443006e-05,
      "loss": 2.8703,
      "step": 175681
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5865910053253174,
      "learning_rate": 7.970404546552447e-05,
      "loss": 3.0129,
      "step": 175682
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.2625632286071777,
      "learning_rate": 7.970126879757608e-05,
      "loss": 2.6332,
      "step": 175683
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.565422534942627,
      "learning_rate": 7.969849217058529e-05,
      "loss": 2.8345,
      "step": 175684
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5759856700897217,
      "learning_rate": 7.969571558455278e-05,
      "loss": 2.9004,
      "step": 175685
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.5133354663848877,
      "learning_rate": 7.969293903947893e-05,
      "loss": 3.0665,
      "step": 175686
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.767246961593628,
      "learning_rate": 7.96901625353644e-05,
      "loss": 2.8192,
      "step": 175687
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.547630548477173,
      "learning_rate": 7.968738607220963e-05,
      "loss": 2.9436,
      "step": 175688
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.362488269805908,
      "learning_rate": 7.968460965001514e-05,
      "loss": 3.021,
      "step": 175689
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.9688870906829834,
      "learning_rate": 7.968183326878135e-05,
      "loss": 2.9639,
      "step": 175690
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3203752040863037,
      "learning_rate": 7.967905692850894e-05,
      "loss": 3.1747,
      "step": 175691
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.793696880340576,
      "learning_rate": 7.967628062919827e-05,
      "loss": 2.8829,
      "step": 175692
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8257222175598145,
      "learning_rate": 7.967350437085005e-05,
      "loss": 3.1542,
      "step": 175693
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.333958387374878,
      "learning_rate": 7.967072815346457e-05,
      "loss": 2.9767,
      "step": 175694
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7404260635375977,
      "learning_rate": 7.966795197704265e-05,
      "loss": 2.7723,
      "step": 175695
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.266280174255371,
      "learning_rate": 7.966517584158443e-05,
      "loss": 2.9724,
      "step": 175696
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9147751331329346,
      "learning_rate": 7.966239974709069e-05,
      "loss": 3.1163,
      "step": 175697
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6834771633148193,
      "learning_rate": 7.965962369356181e-05,
      "loss": 3.0206,
      "step": 175698
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.298281669616699,
      "learning_rate": 7.965684768099844e-05,
      "loss": 2.811,
      "step": 175699
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.629559278488159,
      "learning_rate": 7.965407170940093e-05,
      "loss": 2.8242,
      "step": 175700
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.494684934616089,
      "learning_rate": 7.965129577877006e-05,
      "loss": 2.8973,
      "step": 175701
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5736191272735596,
      "learning_rate": 7.9648519889106e-05,
      "loss": 2.955,
      "step": 175702
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9132516384124756,
      "learning_rate": 7.964574404040953e-05,
      "loss": 2.9543,
      "step": 175703
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3076281547546387,
      "learning_rate": 7.9642968232681e-05,
      "loss": 2.7245,
      "step": 175704
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.650872230529785,
      "learning_rate": 7.964019246592108e-05,
      "loss": 2.9156,
      "step": 175705
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6231942176818848,
      "learning_rate": 7.963741674013014e-05,
      "loss": 3.1077,
      "step": 175706
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3994247913360596,
      "learning_rate": 7.963464105530891e-05,
      "loss": 2.927,
      "step": 175707
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.524662971496582,
      "learning_rate": 7.963186541145758e-05,
      "loss": 2.8537,
      "step": 175708
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3177337646484375,
      "learning_rate": 7.962908980857695e-05,
      "loss": 2.8161,
      "step": 175709
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.466623306274414,
      "learning_rate": 7.962631424666734e-05,
      "loss": 2.8659,
      "step": 175710
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.722141981124878,
      "learning_rate": 7.962353872572947e-05,
      "loss": 2.9495,
      "step": 175711
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.321748733520508,
      "learning_rate": 7.96207632457636e-05,
      "loss": 2.8318,
      "step": 175712
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4954960346221924,
      "learning_rate": 7.96179878067706e-05,
      "loss": 2.9157,
      "step": 175713
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8019354343414307,
      "learning_rate": 7.961521240875061e-05,
      "loss": 2.8444,
      "step": 175714
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.344158172607422,
      "learning_rate": 7.96124370517044e-05,
      "loss": 2.8601,
      "step": 175715
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9967288970947266,
      "learning_rate": 7.96096617356323e-05,
      "loss": 2.7179,
      "step": 175716
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.328613519668579,
      "learning_rate": 7.9606886460535e-05,
      "loss": 2.892,
      "step": 175717
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.471972703933716,
      "learning_rate": 7.960411122641286e-05,
      "loss": 3.0399,
      "step": 175718
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.780646800994873,
      "learning_rate": 7.960133603326667e-05,
      "loss": 3.0337,
      "step": 175719
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7151951789855957,
      "learning_rate": 7.959856088109654e-05,
      "loss": 2.8778,
      "step": 175720
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.736816167831421,
      "learning_rate": 7.959578576990333e-05,
      "loss": 3.1415,
      "step": 175721
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.663007974624634,
      "learning_rate": 7.95930106996873e-05,
      "loss": 2.8889,
      "step": 175722
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7503762245178223,
      "learning_rate": 7.959023567044918e-05,
      "loss": 3.097,
      "step": 175723
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.319049835205078,
      "learning_rate": 7.958746068218933e-05,
      "loss": 3.1129,
      "step": 175724
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.1158971786499023,
      "learning_rate": 7.95846857349085e-05,
      "loss": 2.7693,
      "step": 175725
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6022403240203857,
      "learning_rate": 7.958191082860687e-05,
      "loss": 3.0123,
      "step": 175726
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.519458293914795,
      "learning_rate": 7.957913596328518e-05,
      "loss": 2.7612,
      "step": 175727
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.428642749786377,
      "learning_rate": 7.957636113894381e-05,
      "loss": 2.8007,
      "step": 175728
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.782336711883545,
      "learning_rate": 7.957358635558346e-05,
      "loss": 2.8314,
      "step": 175729
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4458489418029785,
      "learning_rate": 7.957081161320447e-05,
      "loss": 3.0224,
      "step": 175730
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.790048599243164,
      "learning_rate": 7.956803691180748e-05,
      "loss": 2.8544,
      "step": 175731
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6936068534851074,
      "learning_rate": 7.956526225139295e-05,
      "loss": 3.0437,
      "step": 175732
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4339334964752197,
      "learning_rate": 7.956248763196142e-05,
      "loss": 2.9122,
      "step": 175733
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.829511880874634,
      "learning_rate": 7.955971305351329e-05,
      "loss": 2.9873,
      "step": 175734
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.214675188064575,
      "learning_rate": 7.955693851604927e-05,
      "loss": 3.0542,
      "step": 175735
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.4941353797912598,
      "learning_rate": 7.955416401956967e-05,
      "loss": 2.7493,
      "step": 175736
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.025161027908325,
      "learning_rate": 7.955138956407522e-05,
      "loss": 2.8827,
      "step": 175737
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.351245880126953,
      "learning_rate": 7.95486151495663e-05,
      "loss": 3.2425,
      "step": 175738
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8211982250213623,
      "learning_rate": 7.954584077604348e-05,
      "loss": 2.7737,
      "step": 175739
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.267529010772705,
      "learning_rate": 7.954306644350714e-05,
      "loss": 2.9041,
      "step": 175740
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.5866758823394775,
      "learning_rate": 7.954029215195802e-05,
      "loss": 3.066,
      "step": 175741
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.374607563018799,
      "learning_rate": 7.95375179013964e-05,
      "loss": 3.0672,
      "step": 175742
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7333314418792725,
      "learning_rate": 7.953474369182305e-05,
      "loss": 2.9001,
      "step": 175743
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6423940658569336,
      "learning_rate": 7.953196952323833e-05,
      "loss": 2.8825,
      "step": 175744
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.796018362045288,
      "learning_rate": 7.952919539564265e-05,
      "loss": 2.9861,
      "step": 175745
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.590815782546997,
      "learning_rate": 7.95264213090368e-05,
      "loss": 2.9198,
      "step": 175746
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7958381175994873,
      "learning_rate": 7.952364726342111e-05,
      "loss": 3.1061,
      "step": 175747
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.597362756729126,
      "learning_rate": 7.952087325879607e-05,
      "loss": 2.9131,
      "step": 175748
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.191560745239258,
      "learning_rate": 7.951809929516234e-05,
      "loss": 2.9767,
      "step": 175749
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.116208553314209,
      "learning_rate": 7.951532537252039e-05,
      "loss": 2.8504,
      "step": 175750
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6137585639953613,
      "learning_rate": 7.951255149087058e-05,
      "loss": 2.9674,
      "step": 175751
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.0282351970672607,
      "learning_rate": 7.950977765021365e-05,
      "loss": 2.6382,
      "step": 175752
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.8687171936035156,
      "learning_rate": 7.950700385054999e-05,
      "loss": 2.8229,
      "step": 175753
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.312727451324463,
      "learning_rate": 7.950423009188007e-05,
      "loss": 2.9833,
      "step": 175754
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.610867738723755,
      "learning_rate": 7.950145637420456e-05,
      "loss": 3.3466,
      "step": 175755
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.977731704711914,
      "learning_rate": 7.94986826975239e-05,
      "loss": 3.1166,
      "step": 175756
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.435978889465332,
      "learning_rate": 7.94959090618385e-05,
      "loss": 2.8404,
      "step": 175757
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.698262929916382,
      "learning_rate": 7.949313546714909e-05,
      "loss": 2.8123,
      "step": 175758
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.576824426651001,
      "learning_rate": 7.949036191345594e-05,
      "loss": 3.045,
      "step": 175759
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.505688190460205,
      "learning_rate": 7.948758840075979e-05,
      "loss": 3.1836,
      "step": 175760
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6339480876922607,
      "learning_rate": 7.94848149290611e-05,
      "loss": 2.7674,
      "step": 175761
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6554911136627197,
      "learning_rate": 7.94820414983603e-05,
      "loss": 2.9134,
      "step": 175762
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.309659004211426,
      "learning_rate": 7.947926810865786e-05,
      "loss": 2.8044,
      "step": 175763
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.9700629711151123,
      "learning_rate": 7.947649475995452e-05,
      "loss": 3.0423,
      "step": 175764
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.2508554458618164,
      "learning_rate": 7.947372145225052e-05,
      "loss": 3.022,
      "step": 175765
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3885421752929688,
      "learning_rate": 7.947094818554665e-05,
      "loss": 2.9072,
      "step": 175766
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.590148448944092,
      "learning_rate": 7.94681749598432e-05,
      "loss": 3.1453,
      "step": 175767
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3521063327789307,
      "learning_rate": 7.946540177514092e-05,
      "loss": 3.182,
      "step": 175768
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6834049224853516,
      "learning_rate": 7.946262863144005e-05,
      "loss": 2.9808,
      "step": 175769
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.531782865524292,
      "learning_rate": 7.945985552874131e-05,
      "loss": 2.935,
      "step": 175770
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.2638676166534424,
      "learning_rate": 7.945708246704504e-05,
      "loss": 2.8558,
      "step": 175771
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.948216676712036,
      "learning_rate": 7.945430944635197e-05,
      "loss": 2.8782,
      "step": 175772
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.553210735321045,
      "learning_rate": 7.945153646666243e-05,
      "loss": 2.868,
      "step": 175773
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.463557243347168,
      "learning_rate": 7.944876352797716e-05,
      "loss": 3.115,
      "step": 175774
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4715256690979004,
      "learning_rate": 7.944599063029636e-05,
      "loss": 3.1552,
      "step": 175775
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.481661558151245,
      "learning_rate": 7.944321777362082e-05,
      "loss": 2.5998,
      "step": 175776
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.16713285446167,
      "learning_rate": 7.944044495795085e-05,
      "loss": 2.9368,
      "step": 175777
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.446437358856201,
      "learning_rate": 7.943767218328715e-05,
      "loss": 2.7229,
      "step": 175778
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4279568195343018,
      "learning_rate": 7.943489944963008e-05,
      "loss": 2.9261,
      "step": 175779
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8559463024139404,
      "learning_rate": 7.943212675698037e-05,
      "loss": 2.8882,
      "step": 175780
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.443396806716919,
      "learning_rate": 7.942935410533825e-05,
      "loss": 2.6754,
      "step": 175781
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5618269443511963,
      "learning_rate": 7.942658149470443e-05,
      "loss": 2.9148,
      "step": 175782
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3771016597747803,
      "learning_rate": 7.942380892507931e-05,
      "loss": 2.9859,
      "step": 175783
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.900336503982544,
      "learning_rate": 7.942103639646352e-05,
      "loss": 2.9414,
      "step": 175784
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.1714723110198975,
      "learning_rate": 7.94182639088575e-05,
      "loss": 2.8089,
      "step": 175785
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4441990852355957,
      "learning_rate": 7.941549146226189e-05,
      "loss": 2.8247,
      "step": 175786
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4583895206451416,
      "learning_rate": 7.941271905667699e-05,
      "loss": 2.921,
      "step": 175787
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.788562059402466,
      "learning_rate": 7.940994669210348e-05,
      "loss": 3.0601,
      "step": 175788
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6643762588500977,
      "learning_rate": 7.940717436854176e-05,
      "loss": 2.9995,
      "step": 175789
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.474888563156128,
      "learning_rate": 7.940440208599247e-05,
      "loss": 3.126,
      "step": 175790
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.630342721939087,
      "learning_rate": 7.9401629844456e-05,
      "loss": 2.9823,
      "step": 175791
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.98934268951416,
      "learning_rate": 7.93988576439331e-05,
      "loss": 3.1383,
      "step": 175792
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8592336177825928,
      "learning_rate": 7.939608548442395e-05,
      "loss": 3.1136,
      "step": 175793
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.945276975631714,
      "learning_rate": 7.93933133659293e-05,
      "loss": 2.9273,
      "step": 175794
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5061588287353516,
      "learning_rate": 7.939054128844951e-05,
      "loss": 3.037,
      "step": 175795
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.1281628608703613,
      "learning_rate": 7.93877692519853e-05,
      "loss": 2.9829,
      "step": 175796
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.5859992504119873,
      "learning_rate": 7.938499725653695e-05,
      "loss": 2.9145,
      "step": 175797
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.869533061981201,
      "learning_rate": 7.93822253021053e-05,
      "loss": 2.772,
      "step": 175798
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5759658813476562,
      "learning_rate": 7.937945338869046e-05,
      "loss": 3.0558,
      "step": 175799
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.1091251373291016,
      "learning_rate": 7.937668151629322e-05,
      "loss": 2.9019,
      "step": 175800
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9266490936279297,
      "learning_rate": 7.937390968491396e-05,
      "loss": 2.9003,
      "step": 175801
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.858248710632324,
      "learning_rate": 7.937113789455333e-05,
      "loss": 3.1109,
      "step": 175802
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.882538318634033,
      "learning_rate": 7.936836614521169e-05,
      "loss": 2.8206,
      "step": 175803
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8832404613494873,
      "learning_rate": 7.936559443688979e-05,
      "loss": 2.9872,
      "step": 175804
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.525031805038452,
      "learning_rate": 7.936282276958782e-05,
      "loss": 3.0067,
      "step": 175805
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.2143070697784424,
      "learning_rate": 7.936005114330658e-05,
      "loss": 3.0969,
      "step": 175806
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.6111438274383545,
      "learning_rate": 7.935727955804634e-05,
      "loss": 2.7982,
      "step": 175807
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.799508810043335,
      "learning_rate": 7.935450801380786e-05,
      "loss": 3.0404,
      "step": 175808
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.090500831604004,
      "learning_rate": 7.935173651059147e-05,
      "loss": 3.096,
      "step": 175809
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4247734546661377,
      "learning_rate": 7.934896504839792e-05,
      "loss": 2.7451,
      "step": 175810
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.2257957458496094,
      "learning_rate": 7.934619362722737e-05,
      "loss": 2.6254,
      "step": 175811
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5617644786834717,
      "learning_rate": 7.934342224708063e-05,
      "loss": 3.056,
      "step": 175812
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.685032844543457,
      "learning_rate": 7.934065090795805e-05,
      "loss": 2.9855,
      "step": 175813
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.772160768508911,
      "learning_rate": 7.933787960986024e-05,
      "loss": 3.1042,
      "step": 175814
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.6096885204315186,
      "learning_rate": 7.933510835278765e-05,
      "loss": 2.8522,
      "step": 175815
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8218986988067627,
      "learning_rate": 7.933233713674099e-05,
      "loss": 2.8465,
      "step": 175816
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5518364906311035,
      "learning_rate": 7.932956596172043e-05,
      "loss": 2.8194,
      "step": 175817
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.409902572631836,
      "learning_rate": 7.932679482772675e-05,
      "loss": 3.0153,
      "step": 175818
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.052528142929077,
      "learning_rate": 7.93240237347603e-05,
      "loss": 3.0767,
      "step": 175819
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9111835956573486,
      "learning_rate": 7.932125268282175e-05,
      "loss": 2.895,
      "step": 175820
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.494387149810791,
      "learning_rate": 7.931848167191149e-05,
      "loss": 2.8204,
      "step": 175821
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6533992290496826,
      "learning_rate": 7.931571070203018e-05,
      "loss": 2.6434,
      "step": 175822
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4520797729492188,
      "learning_rate": 7.931293977317821e-05,
      "loss": 3.0982,
      "step": 175823
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.315667629241943,
      "learning_rate": 7.931016888535615e-05,
      "loss": 3.0989,
      "step": 175824
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.4806199073791504,
      "learning_rate": 7.930739803856441e-05,
      "loss": 2.5763,
      "step": 175825
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8386456966400146,
      "learning_rate": 7.930462723280367e-05,
      "loss": 2.96,
      "step": 175826
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9202258586883545,
      "learning_rate": 7.930185646807428e-05,
      "loss": 2.7592,
      "step": 175827
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6333296298980713,
      "learning_rate": 7.929908574437695e-05,
      "loss": 2.8859,
      "step": 175828
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.2176449298858643,
      "learning_rate": 7.929631506171208e-05,
      "loss": 2.9767,
      "step": 175829
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.3644232749938965,
      "learning_rate": 7.929354442008009e-05,
      "loss": 3.2051,
      "step": 175830
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.2431509494781494,
      "learning_rate": 7.929077381948169e-05,
      "loss": 2.8502,
      "step": 175831
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.64304518699646,
      "learning_rate": 7.928800325991731e-05,
      "loss": 2.8044,
      "step": 175832
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9378485679626465,
      "learning_rate": 7.928523274138734e-05,
      "loss": 3.1718,
      "step": 175833
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4255309104919434,
      "learning_rate": 7.928246226389253e-05,
      "loss": 3.042,
      "step": 175834
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8096330165863037,
      "learning_rate": 7.927969182743326e-05,
      "loss": 2.8323,
      "step": 175835
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5440502166748047,
      "learning_rate": 7.927692143200995e-05,
      "loss": 3.0504,
      "step": 175836
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9644763469696045,
      "learning_rate": 7.927415107762337e-05,
      "loss": 2.9151,
      "step": 175837
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3578743934631348,
      "learning_rate": 7.927138076427384e-05,
      "loss": 2.7772,
      "step": 175838
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9037771224975586,
      "learning_rate": 7.926861049196186e-05,
      "loss": 2.8909,
      "step": 175839
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.650963306427002,
      "learning_rate": 7.926584026068812e-05,
      "loss": 2.9036,
      "step": 175840
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.888915777206421,
      "learning_rate": 7.926307007045301e-05,
      "loss": 2.8184,
      "step": 175841
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5076866149902344,
      "learning_rate": 7.926029992125695e-05,
      "loss": 2.9682,
      "step": 175842
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9695963859558105,
      "learning_rate": 7.925752981310069e-05,
      "loss": 2.9707,
      "step": 175843
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9168457984924316,
      "learning_rate": 7.925475974598455e-05,
      "loss": 2.8634,
      "step": 175844
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4873809814453125,
      "learning_rate": 7.925198971990915e-05,
      "loss": 2.929,
      "step": 175845
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.319272756576538,
      "learning_rate": 7.924921973487502e-05,
      "loss": 2.9818,
      "step": 175846
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.132432460784912,
      "learning_rate": 7.92464497908826e-05,
      "loss": 2.8327,
      "step": 175847
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6128792762756348,
      "learning_rate": 7.924367988793233e-05,
      "loss": 2.9437,
      "step": 175848
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6279702186584473,
      "learning_rate": 7.924091002602496e-05,
      "loss": 2.9457,
      "step": 175849
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.971979856491089,
      "learning_rate": 7.923814020516076e-05,
      "loss": 2.9295,
      "step": 175850
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.0196545124053955,
      "learning_rate": 7.923537042534042e-05,
      "loss": 2.7283,
      "step": 175851
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3393919467926025,
      "learning_rate": 7.923260068656443e-05,
      "loss": 2.6829,
      "step": 175852
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.13118314743042,
      "learning_rate": 7.922983098883327e-05,
      "loss": 3.2098,
      "step": 175853
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6246490478515625,
      "learning_rate": 7.922706133214733e-05,
      "loss": 2.8976,
      "step": 175854
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.0668821334838867,
      "learning_rate": 7.922429171650734e-05,
      "loss": 2.9845,
      "step": 175855
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.842613697052002,
      "learning_rate": 7.922152214191364e-05,
      "loss": 3.2164,
      "step": 175856
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.9908928871154785,
      "learning_rate": 7.921875260836694e-05,
      "loss": 3.0586,
      "step": 175857
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.401421070098877,
      "learning_rate": 7.921598311586752e-05,
      "loss": 2.6338,
      "step": 175858
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.0475714206695557,
      "learning_rate": 7.921321366441621e-05,
      "loss": 2.834,
      "step": 175859
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4517173767089844,
      "learning_rate": 7.921044425401314e-05,
      "loss": 3.0546,
      "step": 175860
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.1690940856933594,
      "learning_rate": 7.920767488465913e-05,
      "loss": 3.0488,
      "step": 175861
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.774750709533691,
      "learning_rate": 7.920490555635445e-05,
      "loss": 2.7948,
      "step": 175862
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.22910475730896,
      "learning_rate": 7.920213626909988e-05,
      "loss": 2.9569,
      "step": 175863
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.2053685188293457,
      "learning_rate": 7.91993670228957e-05,
      "loss": 3.015,
      "step": 175864
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.156430244445801,
      "learning_rate": 7.919659781774272e-05,
      "loss": 2.8252,
      "step": 175865
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.6150786876678467,
      "learning_rate": 7.919382865364102e-05,
      "loss": 3.0124,
      "step": 175866
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.085662364959717,
      "learning_rate": 7.91910595305915e-05,
      "loss": 3.0313,
      "step": 175867
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.009148597717285,
      "learning_rate": 7.918829044859443e-05,
      "loss": 2.9131,
      "step": 175868
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.7373251914978027,
      "learning_rate": 7.918552140765052e-05,
      "loss": 3.1665,
      "step": 175869
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5325376987457275,
      "learning_rate": 7.918275240776008e-05,
      "loss": 2.8142,
      "step": 175870
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.542031764984131,
      "learning_rate": 7.917998344892392e-05,
      "loss": 2.9456,
      "step": 175871
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.146979331970215,
      "learning_rate": 7.917721453114219e-05,
      "loss": 2.9882,
      "step": 175872
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4263081550598145,
      "learning_rate": 7.91744456544157e-05,
      "loss": 3.1672,
      "step": 175873
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9065170288085938,
      "learning_rate": 7.917167681874472e-05,
      "loss": 3.0473,
      "step": 175874
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.405912160873413,
      "learning_rate": 7.916890802413e-05,
      "loss": 3.0038,
      "step": 175875
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.2595765590667725,
      "learning_rate": 7.916613927057185e-05,
      "loss": 2.9329,
      "step": 175876
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.1967060565948486,
      "learning_rate": 7.916337055807109e-05,
      "loss": 3.0157,
      "step": 175877
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6500039100646973,
      "learning_rate": 7.916060188662781e-05,
      "loss": 2.7837,
      "step": 175878
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4681100845336914,
      "learning_rate": 7.915783325624286e-05,
      "loss": 2.8895,
      "step": 175879
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.767270088195801,
      "learning_rate": 7.915506466691655e-05,
      "loss": 2.9067,
      "step": 175880
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.221968412399292,
      "learning_rate": 7.915229611864954e-05,
      "loss": 3.1086,
      "step": 175881
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.676879405975342,
      "learning_rate": 7.914952761144223e-05,
      "loss": 2.8321,
      "step": 175882
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.375088691711426,
      "learning_rate": 7.914675914529536e-05,
      "loss": 2.8414,
      "step": 175883
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5417532920837402,
      "learning_rate": 7.914399072020909e-05,
      "loss": 2.8437,
      "step": 175884
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.249591112136841,
      "learning_rate": 7.914122233618425e-05,
      "loss": 2.9259,
      "step": 175885
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7031290531158447,
      "learning_rate": 7.913845399322107e-05,
      "loss": 2.9434,
      "step": 175886
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6303365230560303,
      "learning_rate": 7.913568569132037e-05,
      "loss": 2.6667,
      "step": 175887
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.435699224472046,
      "learning_rate": 7.91329174304824e-05,
      "loss": 2.8823,
      "step": 175888
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.626444101333618,
      "learning_rate": 7.913014921070786e-05,
      "loss": 2.6699,
      "step": 175889
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5017049312591553,
      "learning_rate": 7.912738103199724e-05,
      "loss": 2.9503,
      "step": 175890
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.467193841934204,
      "learning_rate": 7.912461289435096e-05,
      "loss": 3.1299,
      "step": 175891
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.700176239013672,
      "learning_rate": 7.912184479776952e-05,
      "loss": 2.9219,
      "step": 175892
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3556463718414307,
      "learning_rate": 7.911907674225358e-05,
      "loss": 3.2618,
      "step": 175893
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.176107168197632,
      "learning_rate": 7.91163087278035e-05,
      "loss": 2.8081,
      "step": 175894
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.025679588317871,
      "learning_rate": 7.911354075441993e-05,
      "loss": 2.9536,
      "step": 175895
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3485851287841797,
      "learning_rate": 7.911077282210336e-05,
      "loss": 2.9253,
      "step": 175896
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.74552059173584,
      "learning_rate": 7.910800493085426e-05,
      "loss": 2.7793,
      "step": 175897
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.890066146850586,
      "learning_rate": 7.910523708067303e-05,
      "loss": 3.1933,
      "step": 175898
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9241983890533447,
      "learning_rate": 7.91024692715604e-05,
      "loss": 2.9226,
      "step": 175899
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.338726282119751,
      "learning_rate": 7.909970150351674e-05,
      "loss": 3.1088,
      "step": 175900
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.451833486557007,
      "learning_rate": 7.909693377654266e-05,
      "loss": 3.0092,
      "step": 175901
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.326568603515625,
      "learning_rate": 7.909416609063865e-05,
      "loss": 3.0174,
      "step": 175902
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.5285146236419678,
      "learning_rate": 7.90913984458052e-05,
      "loss": 3.1004,
      "step": 175903
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6527020931243896,
      "learning_rate": 7.908863084204276e-05,
      "loss": 3.1143,
      "step": 175904
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5830390453338623,
      "learning_rate": 7.908586327935198e-05,
      "loss": 3.0008,
      "step": 175905
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.142587423324585,
      "learning_rate": 7.908309575773323e-05,
      "loss": 2.8015,
      "step": 175906
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.606111526489258,
      "learning_rate": 7.908032827718718e-05,
      "loss": 2.9544,
      "step": 175907
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3555312156677246,
      "learning_rate": 7.90775608377143e-05,
      "loss": 2.8178,
      "step": 175908
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.923778772354126,
      "learning_rate": 7.907479343931502e-05,
      "loss": 2.9346,
      "step": 175909
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4116392135620117,
      "learning_rate": 7.907202608198986e-05,
      "loss": 2.8543,
      "step": 175910
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5763485431671143,
      "learning_rate": 7.906925876573944e-05,
      "loss": 2.7011,
      "step": 175911
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.642611265182495,
      "learning_rate": 7.906649149056413e-05,
      "loss": 3.024,
      "step": 175912
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.238675832748413,
      "learning_rate": 7.906372425646463e-05,
      "loss": 2.6833,
      "step": 175913
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.299382209777832,
      "learning_rate": 7.906095706344135e-05,
      "loss": 3.0158,
      "step": 175914
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.137967109680176,
      "learning_rate": 7.905818991149483e-05,
      "loss": 3.1786,
      "step": 175915
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8008873462677,
      "learning_rate": 7.905542280062545e-05,
      "loss": 2.9606,
      "step": 175916
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5717713832855225,
      "learning_rate": 7.905265573083393e-05,
      "loss": 2.718,
      "step": 175917
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6514992713928223,
      "learning_rate": 7.90498887021206e-05,
      "loss": 2.7615,
      "step": 175918
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5339810848236084,
      "learning_rate": 7.904712171448615e-05,
      "loss": 3.0474,
      "step": 175919
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8396596908569336,
      "learning_rate": 7.904435476793104e-05,
      "loss": 3.0053,
      "step": 175920
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8288488388061523,
      "learning_rate": 7.904158786245563e-05,
      "loss": 2.643,
      "step": 175921
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3551025390625,
      "learning_rate": 7.903882099806067e-05,
      "loss": 2.7958,
      "step": 175922
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.304513692855835,
      "learning_rate": 7.903605417474654e-05,
      "loss": 2.9479,
      "step": 175923
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.010838031768799,
      "learning_rate": 7.903328739251372e-05,
      "loss": 2.9628,
      "step": 175924
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.355008602142334,
      "learning_rate": 7.903052065136288e-05,
      "loss": 2.7959,
      "step": 175925
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.0660183429718018,
      "learning_rate": 7.90277539512944e-05,
      "loss": 2.8439,
      "step": 175926
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.2954180240631104,
      "learning_rate": 7.902498729230875e-05,
      "loss": 3.0357,
      "step": 175927
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4193742275238037,
      "learning_rate": 7.902222067440665e-05,
      "loss": 2.9351,
      "step": 175928
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.86538028717041,
      "learning_rate": 7.901945409758837e-05,
      "loss": 2.9832,
      "step": 175929
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.78338360786438,
      "learning_rate": 7.901668756185464e-05,
      "loss": 2.957,
      "step": 175930
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.482303619384766,
      "learning_rate": 7.901392106720587e-05,
      "loss": 2.6987,
      "step": 175931
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.996197462081909,
      "learning_rate": 7.901115461364261e-05,
      "loss": 2.8922,
      "step": 175932
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.830080509185791,
      "learning_rate": 7.900838820116525e-05,
      "loss": 2.8269,
      "step": 175933
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9458720684051514,
      "learning_rate": 7.900562182977446e-05,
      "loss": 3.2924,
      "step": 175934
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.589326858520508,
      "learning_rate": 7.900285549947064e-05,
      "loss": 2.8429,
      "step": 175935
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.608901023864746,
      "learning_rate": 7.900008921025444e-05,
      "loss": 3.021,
      "step": 175936
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.4547829627990723,
      "learning_rate": 7.899732296212631e-05,
      "loss": 3.1254,
      "step": 175937
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9245898723602295,
      "learning_rate": 7.899455675508672e-05,
      "loss": 3.1787,
      "step": 175938
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9478631019592285,
      "learning_rate": 7.899179058913615e-05,
      "loss": 3.0554,
      "step": 175939
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3275225162506104,
      "learning_rate": 7.898902446427525e-05,
      "loss": 2.9799,
      "step": 175940
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.022721767425537,
      "learning_rate": 7.898625838050439e-05,
      "loss": 3.2183,
      "step": 175941
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3444759845733643,
      "learning_rate": 7.898349233782422e-05,
      "loss": 2.959,
      "step": 175942
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.285281181335449,
      "learning_rate": 7.898072633623514e-05,
      "loss": 2.7473,
      "step": 175943
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3254430294036865,
      "learning_rate": 7.897796037573786e-05,
      "loss": 2.9686,
      "step": 175944
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.001447916030884,
      "learning_rate": 7.89751944563326e-05,
      "loss": 3.1579,
      "step": 175945
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7830770015716553,
      "learning_rate": 7.897242857802008e-05,
      "loss": 2.8195,
      "step": 175946
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.4840824604034424,
      "learning_rate": 7.896966274080068e-05,
      "loss": 2.8518,
      "step": 175947
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.159852981567383,
      "learning_rate": 7.89668969446751e-05,
      "loss": 2.9568,
      "step": 175948
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6615397930145264,
      "learning_rate": 7.896413118964363e-05,
      "loss": 2.6797,
      "step": 175949
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.415142059326172,
      "learning_rate": 7.896136547570709e-05,
      "loss": 2.976,
      "step": 175950
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.283321857452393,
      "learning_rate": 7.895859980286562e-05,
      "loss": 3.1647,
      "step": 175951
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.1197988986968994,
      "learning_rate": 7.895583417112004e-05,
      "loss": 3.1288,
      "step": 175952
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.897823810577393,
      "learning_rate": 7.895306858047061e-05,
      "loss": 2.8506,
      "step": 175953
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.11877179145813,
      "learning_rate": 7.895030303091809e-05,
      "loss": 2.8136,
      "step": 175954
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6333374977111816,
      "learning_rate": 7.894753752246278e-05,
      "loss": 3.2295,
      "step": 175955
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.639930009841919,
      "learning_rate": 7.89447720551054e-05,
      "loss": 2.8152,
      "step": 175956
    },
    {
      "epoch": 2.29,
      "grad_norm": 7.037528991699219,
      "learning_rate": 7.894200662884635e-05,
      "loss": 3.1733,
      "step": 175957
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.417245388031006,
      "learning_rate": 7.893924124368614e-05,
      "loss": 2.767,
      "step": 175958
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4801108837127686,
      "learning_rate": 7.893647589962524e-05,
      "loss": 2.9776,
      "step": 175959
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.654632091522217,
      "learning_rate": 7.893371059666429e-05,
      "loss": 3.0254,
      "step": 175960
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.882915496826172,
      "learning_rate": 7.893094533480366e-05,
      "loss": 3.0552,
      "step": 175961
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3541011810302734,
      "learning_rate": 7.892818011404403e-05,
      "loss": 3.0217,
      "step": 175962
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.563464641571045,
      "learning_rate": 7.892541493438581e-05,
      "loss": 2.7809,
      "step": 175963
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.463369846343994,
      "learning_rate": 7.892264979582952e-05,
      "loss": 2.8606,
      "step": 175964
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9644899368286133,
      "learning_rate": 7.891988469837561e-05,
      "loss": 3.0081,
      "step": 175965
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.824404716491699,
      "learning_rate": 7.891711964202477e-05,
      "loss": 2.9631,
      "step": 175966
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8245599269866943,
      "learning_rate": 7.89143546267773e-05,
      "loss": 2.9258,
      "step": 175967
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5002224445343018,
      "learning_rate": 7.89115896526339e-05,
      "loss": 3.0267,
      "step": 175968
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.848431348800659,
      "learning_rate": 7.890882471959504e-05,
      "loss": 2.9891,
      "step": 175969
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5860936641693115,
      "learning_rate": 7.89060598276612e-05,
      "loss": 2.8594,
      "step": 175970
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.460217237472534,
      "learning_rate": 7.890329497683278e-05,
      "loss": 3.0521,
      "step": 175971
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5504534244537354,
      "learning_rate": 7.890053016711051e-05,
      "loss": 3.2109,
      "step": 175972
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9559547901153564,
      "learning_rate": 7.889776539849473e-05,
      "loss": 3.1814,
      "step": 175973
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.754225492477417,
      "learning_rate": 7.889500067098613e-05,
      "loss": 2.8532,
      "step": 175974
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5121729373931885,
      "learning_rate": 7.889223598458508e-05,
      "loss": 3.016,
      "step": 175975
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.358074426651001,
      "learning_rate": 7.888947133929219e-05,
      "loss": 2.9334,
      "step": 175976
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7820827960968018,
      "learning_rate": 7.888670673510778e-05,
      "loss": 2.5653,
      "step": 175977
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.022031545639038,
      "learning_rate": 7.888394217203265e-05,
      "loss": 2.9994,
      "step": 175978
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.697004795074463,
      "learning_rate": 7.888117765006703e-05,
      "loss": 2.8544,
      "step": 175979
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.417937755584717,
      "learning_rate": 7.887841316921167e-05,
      "loss": 3.1666,
      "step": 175980
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.709296703338623,
      "learning_rate": 7.887564872946702e-05,
      "loss": 2.8499,
      "step": 175981
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.082305669784546,
      "learning_rate": 7.887288433083351e-05,
      "loss": 3.0125,
      "step": 175982
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.56648850440979,
      "learning_rate": 7.887011997331165e-05,
      "loss": 3.2501,
      "step": 175983
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6171305179595947,
      "learning_rate": 7.886735565690211e-05,
      "loss": 3.026,
      "step": 175984
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.348883867263794,
      "learning_rate": 7.886459138160518e-05,
      "loss": 2.6504,
      "step": 175985
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3226215839385986,
      "learning_rate": 7.886182714742159e-05,
      "loss": 2.8963,
      "step": 175986
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.411731481552124,
      "learning_rate": 7.88590629543518e-05,
      "loss": 2.7821,
      "step": 175987
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.246373653411865,
      "learning_rate": 7.885629880239623e-05,
      "loss": 2.8864,
      "step": 175988
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.895116090774536,
      "learning_rate": 7.885353469155542e-05,
      "loss": 3.0061,
      "step": 175989
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5125696659088135,
      "learning_rate": 7.885077062182994e-05,
      "loss": 3.093,
      "step": 175990
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.280815839767456,
      "learning_rate": 7.884800659322022e-05,
      "loss": 2.7512,
      "step": 175991
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.905803680419922,
      "learning_rate": 7.884524260572691e-05,
      "loss": 2.8156,
      "step": 175992
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.352926254272461,
      "learning_rate": 7.884247865935047e-05,
      "loss": 2.9469,
      "step": 175993
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9098286628723145,
      "learning_rate": 7.883971475409134e-05,
      "loss": 2.8131,
      "step": 175994
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.8205556869506836,
      "learning_rate": 7.883695088995002e-05,
      "loss": 2.907,
      "step": 175995
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.982823133468628,
      "learning_rate": 7.883418706692717e-05,
      "loss": 2.8066,
      "step": 175996
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8968703746795654,
      "learning_rate": 7.883142328502314e-05,
      "loss": 3.153,
      "step": 175997
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.671811580657959,
      "learning_rate": 7.882865954423861e-05,
      "loss": 2.9476,
      "step": 175998
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.450770139694214,
      "learning_rate": 7.882589584457402e-05,
      "loss": 2.8482,
      "step": 175999
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.372189521789551,
      "learning_rate": 7.882313218602985e-05,
      "loss": 3.0854,
      "step": 176000
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.86739182472229,
      "learning_rate": 7.882036856860654e-05,
      "loss": 3.1607,
      "step": 176001
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.72994065284729,
      "learning_rate": 7.88176049923048e-05,
      "loss": 3.2311,
      "step": 176002
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.2601423263549805,
      "learning_rate": 7.881484145712495e-05,
      "loss": 3.1442,
      "step": 176003
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3727850914001465,
      "learning_rate": 7.881207796306769e-05,
      "loss": 2.712,
      "step": 176004
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.1539227962493896,
      "learning_rate": 7.880931451013344e-05,
      "loss": 3.1126,
      "step": 176005
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.652238607406616,
      "learning_rate": 7.880655109832262e-05,
      "loss": 3.0472,
      "step": 176006
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3877930641174316,
      "learning_rate": 7.880378772763591e-05,
      "loss": 3.0635,
      "step": 176007
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.725830316543579,
      "learning_rate": 7.880102439807378e-05,
      "loss": 2.9964,
      "step": 176008
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.953341007232666,
      "learning_rate": 7.879826110963661e-05,
      "loss": 2.8672,
      "step": 176009
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.928060293197632,
      "learning_rate": 7.87954978623251e-05,
      "loss": 3.0067,
      "step": 176010
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.136199951171875,
      "learning_rate": 7.879273465613972e-05,
      "loss": 2.8989,
      "step": 176011
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.899136543273926,
      "learning_rate": 7.878997149108081e-05,
      "loss": 2.8734,
      "step": 176012
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.936521053314209,
      "learning_rate": 7.878720836714915e-05,
      "loss": 2.8925,
      "step": 176013
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.233744144439697,
      "learning_rate": 7.87844452843451e-05,
      "loss": 2.9909,
      "step": 176014
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3606762886047363,
      "learning_rate": 7.878168224266911e-05,
      "loss": 3.2267,
      "step": 176015
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.770998954772949,
      "learning_rate": 7.877891924212189e-05,
      "loss": 2.8032,
      "step": 176016
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.059463977813721,
      "learning_rate": 7.877615628270383e-05,
      "loss": 2.8666,
      "step": 176017
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4045090675354004,
      "learning_rate": 7.877339336441537e-05,
      "loss": 3.1184,
      "step": 176018
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.954873561859131,
      "learning_rate": 7.877063048725718e-05,
      "loss": 2.8357,
      "step": 176019
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.95514178276062,
      "learning_rate": 7.876786765122966e-05,
      "loss": 2.7178,
      "step": 176020
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.7396416664123535,
      "learning_rate": 7.876510485633342e-05,
      "loss": 2.9252,
      "step": 176021
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.081763982772827,
      "learning_rate": 7.876234210256893e-05,
      "loss": 3.0916,
      "step": 176022
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.2043893337249756,
      "learning_rate": 7.875957938993663e-05,
      "loss": 2.9699,
      "step": 176023
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.205352306365967,
      "learning_rate": 7.875681671843716e-05,
      "loss": 3.1193,
      "step": 176024
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6400973796844482,
      "learning_rate": 7.875405408807099e-05,
      "loss": 2.8253,
      "step": 176025
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5720791816711426,
      "learning_rate": 7.875129149883851e-05,
      "loss": 2.6852,
      "step": 176026
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.545825958251953,
      "learning_rate": 7.874852895074048e-05,
      "loss": 2.9959,
      "step": 176027
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3829879760742188,
      "learning_rate": 7.874576644377713e-05,
      "loss": 3.0267,
      "step": 176028
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6789162158966064,
      "learning_rate": 7.874300397794924e-05,
      "loss": 2.872,
      "step": 176029
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.945955753326416,
      "learning_rate": 7.874024155325721e-05,
      "loss": 2.9041,
      "step": 176030
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.29838752746582,
      "learning_rate": 7.873747916970152e-05,
      "loss": 2.8706,
      "step": 176031
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6344363689422607,
      "learning_rate": 7.873471682728261e-05,
      "loss": 2.8065,
      "step": 176032
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.421625852584839,
      "learning_rate": 7.873195452600122e-05,
      "loss": 2.7714,
      "step": 176033
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.589918851852417,
      "learning_rate": 7.872919226585761e-05,
      "loss": 2.9354,
      "step": 176034
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.216031551361084,
      "learning_rate": 7.872643004685254e-05,
      "loss": 3.0759,
      "step": 176035
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4264330863952637,
      "learning_rate": 7.872366786898639e-05,
      "loss": 2.9005,
      "step": 176036
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.638577699661255,
      "learning_rate": 7.872090573225968e-05,
      "loss": 2.9871,
      "step": 176037
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.65474271774292,
      "learning_rate": 7.871814363667286e-05,
      "loss": 3.0317,
      "step": 176038
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.214442253112793,
      "learning_rate": 7.87153815822266e-05,
      "loss": 2.8162,
      "step": 176039
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.199392795562744,
      "learning_rate": 7.871261956892123e-05,
      "loss": 3.1801,
      "step": 176040
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.899474859237671,
      "learning_rate": 7.870985759675745e-05,
      "loss": 2.8909,
      "step": 176041
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6505796909332275,
      "learning_rate": 7.870709566573566e-05,
      "loss": 2.7685,
      "step": 176042
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.2180728912353516,
      "learning_rate": 7.870433377585644e-05,
      "loss": 2.5202,
      "step": 176043
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4375293254852295,
      "learning_rate": 7.870157192712016e-05,
      "loss": 2.7346,
      "step": 176044
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.481158971786499,
      "learning_rate": 7.869881011952753e-05,
      "loss": 3.0106,
      "step": 176045
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.035438060760498,
      "learning_rate": 7.869604835307887e-05,
      "loss": 2.8093,
      "step": 176046
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.719590663909912,
      "learning_rate": 7.869328662777489e-05,
      "loss": 2.844,
      "step": 176047
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5679893493652344,
      "learning_rate": 7.869052494361597e-05,
      "loss": 3.0432,
      "step": 176048
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.62188458442688,
      "learning_rate": 7.86877633006027e-05,
      "loss": 3.0371,
      "step": 176049
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6370632648468018,
      "learning_rate": 7.868500169873544e-05,
      "loss": 3.006,
      "step": 176050
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.669820785522461,
      "learning_rate": 7.86822401380149e-05,
      "loss": 3.0139,
      "step": 176051
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.4260573387145996,
      "learning_rate": 7.867947861844147e-05,
      "loss": 2.9586,
      "step": 176052
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.340877056121826,
      "learning_rate": 7.867671714001574e-05,
      "loss": 2.9455,
      "step": 176053
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.8803529739379883,
      "learning_rate": 7.867395570273819e-05,
      "loss": 2.9195,
      "step": 176054
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.554856061935425,
      "learning_rate": 7.867119430660937e-05,
      "loss": 2.9463,
      "step": 176055
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.127370595932007,
      "learning_rate": 7.866843295162963e-05,
      "loss": 3.0049,
      "step": 176056
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.212174415588379,
      "learning_rate": 7.86656716377997e-05,
      "loss": 2.6919,
      "step": 176057
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.1152939796447754,
      "learning_rate": 7.866291036511989e-05,
      "loss": 3.0408,
      "step": 176058
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3722310066223145,
      "learning_rate": 7.866014913359093e-05,
      "loss": 2.8103,
      "step": 176059
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.737983465194702,
      "learning_rate": 7.865738794321326e-05,
      "loss": 2.9564,
      "step": 176060
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5418808460235596,
      "learning_rate": 7.86546267939873e-05,
      "loss": 3.1351,
      "step": 176061
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.925412178039551,
      "learning_rate": 7.865186568591358e-05,
      "loss": 3.1381,
      "step": 176062
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4597928524017334,
      "learning_rate": 7.864910461899272e-05,
      "loss": 2.9158,
      "step": 176063
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.622779130935669,
      "learning_rate": 7.86463435932251e-05,
      "loss": 3.0978,
      "step": 176064
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5864179134368896,
      "learning_rate": 7.864358260861136e-05,
      "loss": 3.0167,
      "step": 176065
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6612839698791504,
      "learning_rate": 7.864082166515201e-05,
      "loss": 3.0584,
      "step": 176066
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9696106910705566,
      "learning_rate": 7.863806076284746e-05,
      "loss": 2.8187,
      "step": 176067
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.6825754642486572,
      "learning_rate": 7.86352999016982e-05,
      "loss": 3.0007,
      "step": 176068
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7392547130584717,
      "learning_rate": 7.863253908170491e-05,
      "loss": 2.6976,
      "step": 176069
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.907909870147705,
      "learning_rate": 7.862977830286791e-05,
      "loss": 3.1885,
      "step": 176070
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.737492084503174,
      "learning_rate": 7.862701756518792e-05,
      "loss": 2.9048,
      "step": 176071
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4676690101623535,
      "learning_rate": 7.862425686866532e-05,
      "loss": 3.0512,
      "step": 176072
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4292802810668945,
      "learning_rate": 7.862149621330068e-05,
      "loss": 3.1042,
      "step": 176073
    },
    {
      "epoch": 2.29,
      "grad_norm": 5.399632453918457,
      "learning_rate": 7.861873559909438e-05,
      "loss": 2.8473,
      "step": 176074
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.267794609069824,
      "learning_rate": 7.861597502604713e-05,
      "loss": 2.8548,
      "step": 176075
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.2583885192871094,
      "learning_rate": 7.861321449415925e-05,
      "loss": 2.9112,
      "step": 176076
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.271371841430664,
      "learning_rate": 7.861045400343144e-05,
      "loss": 2.9452,
      "step": 176077
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6543633937835693,
      "learning_rate": 7.860769355386415e-05,
      "loss": 2.9296,
      "step": 176078
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8961989879608154,
      "learning_rate": 7.860493314545783e-05,
      "loss": 3.0823,
      "step": 176079
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.055579662322998,
      "learning_rate": 7.860217277821298e-05,
      "loss": 2.6882,
      "step": 176080
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9501359462738037,
      "learning_rate": 7.859941245213024e-05,
      "loss": 2.8285,
      "step": 176081
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.16717791557312,
      "learning_rate": 7.859665216720994e-05,
      "loss": 2.8415,
      "step": 176082
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6957712173461914,
      "learning_rate": 7.859389192345282e-05,
      "loss": 2.9322,
      "step": 176083
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.786787509918213,
      "learning_rate": 7.859113172085926e-05,
      "loss": 2.6935,
      "step": 176084
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7735018730163574,
      "learning_rate": 7.858837155942978e-05,
      "loss": 2.9242,
      "step": 176085
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.225759983062744,
      "learning_rate": 7.858561143916484e-05,
      "loss": 2.6802,
      "step": 176086
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.488769054412842,
      "learning_rate": 7.858285136006509e-05,
      "loss": 2.6811,
      "step": 176087
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.859750509262085,
      "learning_rate": 7.858009132213086e-05,
      "loss": 2.9752,
      "step": 176088
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.5098729133605957,
      "learning_rate": 7.857733132536289e-05,
      "loss": 2.8908,
      "step": 176089
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6744377613067627,
      "learning_rate": 7.857457136976146e-05,
      "loss": 2.9154,
      "step": 176090
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.404324769973755,
      "learning_rate": 7.85718114553273e-05,
      "loss": 3.156,
      "step": 176091
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.966655731201172,
      "learning_rate": 7.856905158206082e-05,
      "loss": 3.0003,
      "step": 176092
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.232072353363037,
      "learning_rate": 7.856629174996253e-05,
      "loss": 2.8516,
      "step": 176093
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6667702198028564,
      "learning_rate": 7.856353195903284e-05,
      "loss": 2.9628,
      "step": 176094
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8663761615753174,
      "learning_rate": 7.856077220927246e-05,
      "loss": 3.1792,
      "step": 176095
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.5864741802215576,
      "learning_rate": 7.855801250068174e-05,
      "loss": 2.842,
      "step": 176096
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.626782417297363,
      "learning_rate": 7.855525283326136e-05,
      "loss": 3.11,
      "step": 176097
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.605539083480835,
      "learning_rate": 7.855249320701175e-05,
      "loss": 3.0169,
      "step": 176098
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.83760666847229,
      "learning_rate": 7.854973362193337e-05,
      "loss": 2.9256,
      "step": 176099
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4541728496551514,
      "learning_rate": 7.854697407802671e-05,
      "loss": 3.0831,
      "step": 176100
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.462486505508423,
      "learning_rate": 7.854421457529243e-05,
      "loss": 3.0107,
      "step": 176101
    },
    {
      "epoch": 2.29,
      "grad_norm": 5.9494829177856445,
      "learning_rate": 7.854145511373087e-05,
      "loss": 2.9383,
      "step": 176102
    },
    {
      "epoch": 2.29,
      "grad_norm": 5.4479851722717285,
      "learning_rate": 7.853869569334276e-05,
      "loss": 3.0735,
      "step": 176103
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.0612192153930664,
      "learning_rate": 7.853593631412844e-05,
      "loss": 2.8703,
      "step": 176104
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.101205587387085,
      "learning_rate": 7.853317697608841e-05,
      "loss": 2.9176,
      "step": 176105
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.273810386657715,
      "learning_rate": 7.853041767922332e-05,
      "loss": 3.0546,
      "step": 176106
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.970287322998047,
      "learning_rate": 7.852765842353358e-05,
      "loss": 3.2266,
      "step": 176107
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.1058671474456787,
      "learning_rate": 7.852489920901967e-05,
      "loss": 2.8117,
      "step": 176108
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.489315986633301,
      "learning_rate": 7.852214003568227e-05,
      "loss": 2.7435,
      "step": 176109
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.10823392868042,
      "learning_rate": 7.851938090352176e-05,
      "loss": 3.0324,
      "step": 176110
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8050568103790283,
      "learning_rate": 7.851662181253857e-05,
      "loss": 2.9245,
      "step": 176111
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6722207069396973,
      "learning_rate": 7.851386276273345e-05,
      "loss": 2.9924,
      "step": 176112
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3993771076202393,
      "learning_rate": 7.851110375410665e-05,
      "loss": 2.9556,
      "step": 176113
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.6197099685668945,
      "learning_rate": 7.850834478665895e-05,
      "loss": 2.9227,
      "step": 176114
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.80637788772583,
      "learning_rate": 7.850558586039071e-05,
      "loss": 2.8755,
      "step": 176115
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.137936592102051,
      "learning_rate": 7.850282697530246e-05,
      "loss": 2.9926,
      "step": 176116
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.694180965423584,
      "learning_rate": 7.85000681313946e-05,
      "loss": 2.9783,
      "step": 176117
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.179299831390381,
      "learning_rate": 7.84973093286679e-05,
      "loss": 3.147,
      "step": 176118
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.302497625350952,
      "learning_rate": 7.849455056712262e-05,
      "loss": 3.0419,
      "step": 176119
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.425042152404785,
      "learning_rate": 7.849179184675947e-05,
      "loss": 2.9819,
      "step": 176120
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7592086791992188,
      "learning_rate": 7.848903316757889e-05,
      "loss": 2.8918,
      "step": 176121
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.429562568664551,
      "learning_rate": 7.848627452958135e-05,
      "loss": 2.8395,
      "step": 176122
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.492889404296875,
      "learning_rate": 7.848351593276733e-05,
      "loss": 3.0623,
      "step": 176123
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5467934608459473,
      "learning_rate": 7.848075737713748e-05,
      "loss": 3.0281,
      "step": 176124
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.8612921237945557,
      "learning_rate": 7.847799886269214e-05,
      "loss": 2.7307,
      "step": 176125
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.60499906539917,
      "learning_rate": 7.847524038943203e-05,
      "loss": 2.7964,
      "step": 176126
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.789921998977661,
      "learning_rate": 7.847248195735759e-05,
      "loss": 2.9493,
      "step": 176127
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6232078075408936,
      "learning_rate": 7.846972356646924e-05,
      "loss": 3.0195,
      "step": 176128
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.663181781768799,
      "learning_rate": 7.846696521676749e-05,
      "loss": 3.0448,
      "step": 176129
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.666938543319702,
      "learning_rate": 7.846420690825297e-05,
      "loss": 3.0498,
      "step": 176130
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.3245086669921875,
      "learning_rate": 7.846144864092609e-05,
      "loss": 2.8103,
      "step": 176131
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.4736504554748535,
      "learning_rate": 7.845869041478747e-05,
      "loss": 2.9636,
      "step": 176132
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.652355670928955,
      "learning_rate": 7.845593222983758e-05,
      "loss": 2.7365,
      "step": 176133
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8495917320251465,
      "learning_rate": 7.845317408607688e-05,
      "loss": 3.0128,
      "step": 176134
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.7590384483337402,
      "learning_rate": 7.845041598350586e-05,
      "loss": 2.9119,
      "step": 176135
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.007228374481201,
      "learning_rate": 7.844765792212518e-05,
      "loss": 3.4174,
      "step": 176136
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.384456634521484,
      "learning_rate": 7.844489990193514e-05,
      "loss": 2.8028,
      "step": 176137
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.331684112548828,
      "learning_rate": 7.84421419229365e-05,
      "loss": 2.9127,
      "step": 176138
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.938659429550171,
      "learning_rate": 7.843938398512963e-05,
      "loss": 3.036,
      "step": 176139
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.421973943710327,
      "learning_rate": 7.843662608851509e-05,
      "loss": 2.7667,
      "step": 176140
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7209112644195557,
      "learning_rate": 7.843386823309322e-05,
      "loss": 3.0357,
      "step": 176141
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.232527732849121,
      "learning_rate": 7.84311104188648e-05,
      "loss": 3.008,
      "step": 176142
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.16668176651001,
      "learning_rate": 7.842835264583014e-05,
      "loss": 2.8701,
      "step": 176143
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.634906530380249,
      "learning_rate": 7.842559491398989e-05,
      "loss": 2.7468,
      "step": 176144
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.1602983474731445,
      "learning_rate": 7.842283722334454e-05,
      "loss": 2.8233,
      "step": 176145
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.181976556777954,
      "learning_rate": 7.842007957389455e-05,
      "loss": 2.7155,
      "step": 176146
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.94407057762146,
      "learning_rate": 7.841732196564035e-05,
      "loss": 3.093,
      "step": 176147
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5548198223114014,
      "learning_rate": 7.841456439858265e-05,
      "loss": 2.8276,
      "step": 176148
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4720563888549805,
      "learning_rate": 7.841180687272176e-05,
      "loss": 3.1051,
      "step": 176149
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7849009037017822,
      "learning_rate": 7.840904938805842e-05,
      "loss": 2.6344,
      "step": 176150
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.2155814170837402,
      "learning_rate": 7.840629194459299e-05,
      "loss": 2.9495,
      "step": 176151
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7679953575134277,
      "learning_rate": 7.840353454232604e-05,
      "loss": 2.7864,
      "step": 176152
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.324289321899414,
      "learning_rate": 7.840077718125797e-05,
      "loss": 3.0987,
      "step": 176153
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.083527088165283,
      "learning_rate": 7.839801986138943e-05,
      "loss": 2.9714,
      "step": 176154
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.536121129989624,
      "learning_rate": 7.839526258272082e-05,
      "loss": 2.9519,
      "step": 176155
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.534867525100708,
      "learning_rate": 7.83925053452528e-05,
      "loss": 3.1501,
      "step": 176156
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.1985390186309814,
      "learning_rate": 7.838974814898571e-05,
      "loss": 3.0944,
      "step": 176157
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4873886108398438,
      "learning_rate": 7.838699099392031e-05,
      "loss": 2.9507,
      "step": 176158
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.657508134841919,
      "learning_rate": 7.83842338800568e-05,
      "loss": 2.8365,
      "step": 176159
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.5421805381774902,
      "learning_rate": 7.838147680739589e-05,
      "loss": 2.9691,
      "step": 176160
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3088436126708984,
      "learning_rate": 7.837871977593799e-05,
      "loss": 2.9577,
      "step": 176161
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7490200996398926,
      "learning_rate": 7.837596278568376e-05,
      "loss": 3.0763,
      "step": 176162
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.031107187271118,
      "learning_rate": 7.837320583663355e-05,
      "loss": 2.8383,
      "step": 176163
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4776031970977783,
      "learning_rate": 7.83704489287881e-05,
      "loss": 2.973,
      "step": 176164
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.429163694381714,
      "learning_rate": 7.83676920621476e-05,
      "loss": 2.9213,
      "step": 176165
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.868255615234375,
      "learning_rate": 7.836493523671279e-05,
      "loss": 2.7185,
      "step": 176166
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.674468994140625,
      "learning_rate": 7.836217845248407e-05,
      "loss": 2.8778,
      "step": 176167
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.760413408279419,
      "learning_rate": 7.835942170946203e-05,
      "loss": 2.8294,
      "step": 176168
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4373862743377686,
      "learning_rate": 7.835666500764714e-05,
      "loss": 2.9915,
      "step": 176169
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5751659870147705,
      "learning_rate": 7.835390834704007e-05,
      "loss": 2.835,
      "step": 176170
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.994814395904541,
      "learning_rate": 7.835115172764101e-05,
      "loss": 3.0526,
      "step": 176171
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.903630495071411,
      "learning_rate": 7.834839514945077e-05,
      "loss": 3.0564,
      "step": 176172
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.314054012298584,
      "learning_rate": 7.834563861246966e-05,
      "loss": 3.0655,
      "step": 176173
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.338874578475952,
      "learning_rate": 7.834288211669836e-05,
      "loss": 2.6707,
      "step": 176174
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9272773265838623,
      "learning_rate": 7.834012566213719e-05,
      "loss": 2.8199,
      "step": 176175
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.7345340251922607,
      "learning_rate": 7.833736924878688e-05,
      "loss": 2.9184,
      "step": 176176
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.830740213394165,
      "learning_rate": 7.833461287664785e-05,
      "loss": 2.8976,
      "step": 176177
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.8838798999786377,
      "learning_rate": 7.833185654572059e-05,
      "loss": 2.8119,
      "step": 176178
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4783332347869873,
      "learning_rate": 7.832910025600555e-05,
      "loss": 2.799,
      "step": 176179
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.11482572555542,
      "learning_rate": 7.832634400750339e-05,
      "loss": 3.3308,
      "step": 176180
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7659385204315186,
      "learning_rate": 7.832358780021444e-05,
      "loss": 2.9374,
      "step": 176181
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6709821224212646,
      "learning_rate": 7.832083163413941e-05,
      "loss": 2.9333,
      "step": 176182
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.485961437225342,
      "learning_rate": 7.831807550927874e-05,
      "loss": 3.0542,
      "step": 176183
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7585887908935547,
      "learning_rate": 7.831531942563294e-05,
      "loss": 3.0042,
      "step": 176184
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.2354958057403564,
      "learning_rate": 7.83125633832024e-05,
      "loss": 2.9978,
      "step": 176185
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6442630290985107,
      "learning_rate": 7.830980738198782e-05,
      "loss": 2.9453,
      "step": 176186
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.88020658493042,
      "learning_rate": 7.830705142198954e-05,
      "loss": 2.902,
      "step": 176187
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.6466104984283447,
      "learning_rate": 7.830429550320826e-05,
      "loss": 3.0799,
      "step": 176188
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.559263229370117,
      "learning_rate": 7.83015396256444e-05,
      "loss": 3.2051,
      "step": 176189
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.802915573120117,
      "learning_rate": 7.829878378929839e-05,
      "loss": 2.9222,
      "step": 176190
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.885692834854126,
      "learning_rate": 7.829602799417089e-05,
      "loss": 2.9589,
      "step": 176191
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.4131901264190674,
      "learning_rate": 7.829327224026235e-05,
      "loss": 3.0585,
      "step": 176192
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.2204084396362305,
      "learning_rate": 7.829051652757318e-05,
      "loss": 3.0348,
      "step": 176193
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.853318929672241,
      "learning_rate": 7.82877608561041e-05,
      "loss": 3.0181,
      "step": 176194
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6224491596221924,
      "learning_rate": 7.828500522585547e-05,
      "loss": 2.8917,
      "step": 176195
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.830883502960205,
      "learning_rate": 7.828224963682779e-05,
      "loss": 3.1623,
      "step": 176196
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.507784843444824,
      "learning_rate": 7.827949408902172e-05,
      "loss": 3.0391,
      "step": 176197
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3167755603790283,
      "learning_rate": 7.827673858243765e-05,
      "loss": 2.8765,
      "step": 176198
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3621699810028076,
      "learning_rate": 7.827398311707603e-05,
      "loss": 3.0832,
      "step": 176199
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.717345714569092,
      "learning_rate": 7.827122769293758e-05,
      "loss": 2.9321,
      "step": 176200
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5757288932800293,
      "learning_rate": 7.826847231002268e-05,
      "loss": 2.8749,
      "step": 176201
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5118770599365234,
      "learning_rate": 7.826571696833173e-05,
      "loss": 2.9304,
      "step": 176202
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.0694401264190674,
      "learning_rate": 7.826296166786552e-05,
      "loss": 2.9064,
      "step": 176203
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4735069274902344,
      "learning_rate": 7.82602064086243e-05,
      "loss": 2.9276,
      "step": 176204
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5447516441345215,
      "learning_rate": 7.825745119060875e-05,
      "loss": 3.0875,
      "step": 176205
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.0941951274871826,
      "learning_rate": 7.825469601381937e-05,
      "loss": 2.8856,
      "step": 176206
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.710726499557495,
      "learning_rate": 7.825194087825658e-05,
      "loss": 2.8285,
      "step": 176207
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.944403886795044,
      "learning_rate": 7.824918578392091e-05,
      "loss": 2.9637,
      "step": 176208
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.535078287124634,
      "learning_rate": 7.824643073081296e-05,
      "loss": 3.1506,
      "step": 176209
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.20395040512085,
      "learning_rate": 7.82436757189331e-05,
      "loss": 2.7682,
      "step": 176210
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3206262588500977,
      "learning_rate": 7.824092074828199e-05,
      "loss": 3.0419,
      "step": 176211
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.80129337310791,
      "learning_rate": 7.82381658188601e-05,
      "loss": 3.3704,
      "step": 176212
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.1419975757598877,
      "learning_rate": 7.823541093066792e-05,
      "loss": 2.7805,
      "step": 176213
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7648603916168213,
      "learning_rate": 7.823265608370588e-05,
      "loss": 3.0457,
      "step": 176214
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7701516151428223,
      "learning_rate": 7.822990127797466e-05,
      "loss": 3.1378,
      "step": 176215
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3947949409484863,
      "learning_rate": 7.82271465134746e-05,
      "loss": 2.9666,
      "step": 176216
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.633770227432251,
      "learning_rate": 7.822439179020641e-05,
      "loss": 3.0345,
      "step": 176217
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3303349018096924,
      "learning_rate": 7.822163710817047e-05,
      "loss": 2.919,
      "step": 176218
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4381027221679688,
      "learning_rate": 7.82188824673673e-05,
      "loss": 2.9407,
      "step": 176219
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5285189151763916,
      "learning_rate": 7.821612786779736e-05,
      "loss": 2.8962,
      "step": 176220
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3137154579162598,
      "learning_rate": 7.82133733094613e-05,
      "loss": 2.9506,
      "step": 176221
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.575432777404785,
      "learning_rate": 7.821061879235949e-05,
      "loss": 2.637,
      "step": 176222
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.0358991622924805,
      "learning_rate": 7.820786431649257e-05,
      "loss": 2.8782,
      "step": 176223
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6983537673950195,
      "learning_rate": 7.820510988186095e-05,
      "loss": 2.8329,
      "step": 176224
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.735321283340454,
      "learning_rate": 7.820235548846532e-05,
      "loss": 2.8488,
      "step": 176225
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.380563497543335,
      "learning_rate": 7.81996011363059e-05,
      "loss": 2.9791,
      "step": 176226
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.530611753463745,
      "learning_rate": 7.819684682538347e-05,
      "loss": 2.7811,
      "step": 176227
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.895958423614502,
      "learning_rate": 7.819409255569831e-05,
      "loss": 3.0088,
      "step": 176228
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.3482539653778076,
      "learning_rate": 7.819133832725117e-05,
      "loss": 2.9881,
      "step": 176229
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.412799835205078,
      "learning_rate": 7.818858414004233e-05,
      "loss": 2.9109,
      "step": 176230
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.4974606037139893,
      "learning_rate": 7.818582999407264e-05,
      "loss": 2.9791,
      "step": 176231
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.2401065826416016,
      "learning_rate": 7.818307588934217e-05,
      "loss": 3.0418,
      "step": 176232
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.449307918548584,
      "learning_rate": 7.818032182585178e-05,
      "loss": 2.781,
      "step": 176233
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7920265197753906,
      "learning_rate": 7.817756780360176e-05,
      "loss": 2.7774,
      "step": 176234
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.345141887664795,
      "learning_rate": 7.817481382259279e-05,
      "loss": 2.9215,
      "step": 176235
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.796534538269043,
      "learning_rate": 7.817205988282522e-05,
      "loss": 3.0006,
      "step": 176236
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7711293697357178,
      "learning_rate": 7.816930598429983e-05,
      "loss": 3.036,
      "step": 176237
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.6121268272399902,
      "learning_rate": 7.816655212701679e-05,
      "loss": 2.8169,
      "step": 176238
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.9205830097198486,
      "learning_rate": 7.816379831097686e-05,
      "loss": 2.7447,
      "step": 176239
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6573307514190674,
      "learning_rate": 7.816104453618037e-05,
      "loss": 2.7976,
      "step": 176240
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5057268142700195,
      "learning_rate": 7.815829080262801e-05,
      "loss": 2.8235,
      "step": 176241
    },
    {
      "epoch": 2.29,
      "grad_norm": 5.0375165939331055,
      "learning_rate": 7.815553711032013e-05,
      "loss": 3.0826,
      "step": 176242
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.4900097846984863,
      "learning_rate": 7.815278345925749e-05,
      "loss": 2.7236,
      "step": 176243
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.550658702850342,
      "learning_rate": 7.81500298494403e-05,
      "loss": 3.0752,
      "step": 176244
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.511024236679077,
      "learning_rate": 7.814727628086924e-05,
      "loss": 3.0699,
      "step": 176245
    },
    {
      "epoch": 2.29,
      "grad_norm": 4.472982883453369,
      "learning_rate": 7.814452275354472e-05,
      "loss": 3.0324,
      "step": 176246
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.5048389434814453,
      "learning_rate": 7.814176926746744e-05,
      "loss": 3.0051,
      "step": 176247
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.976616382598877,
      "learning_rate": 7.813901582263768e-05,
      "loss": 3.1168,
      "step": 176248
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.370943784713745,
      "learning_rate": 7.813626241905624e-05,
      "loss": 3.1662,
      "step": 176249
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.1957614421844482,
      "learning_rate": 7.813350905672328e-05,
      "loss": 2.8946,
      "step": 176250
    },
    {
      "epoch": 2.29,
      "grad_norm": 3.514012575149536,
      "learning_rate": 7.813075573563958e-05,
      "loss": 3.131,
      "step": 176251
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.1341447830200195,
      "learning_rate": 7.812800245580549e-05,
      "loss": 3.0132,
      "step": 176252
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7707202434539795,
      "learning_rate": 7.812524921722168e-05,
      "loss": 2.7711,
      "step": 176253
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.54533052444458,
      "learning_rate": 7.812249601988846e-05,
      "loss": 2.981,
      "step": 176254
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7428176403045654,
      "learning_rate": 7.811974286380664e-05,
      "loss": 2.7973,
      "step": 176255
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.7438504695892334,
      "learning_rate": 7.811698974897636e-05,
      "loss": 2.711,
      "step": 176256
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.6483209133148193,
      "learning_rate": 7.811423667539843e-05,
      "loss": 3.1312,
      "step": 176257
    },
    {
      "epoch": 2.29,
      "grad_norm": 2.3495981693267822,
      "learning_rate": 7.811148364307315e-05,
      "loss": 2.8422,
      "step": 176258
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2377915382385254,
      "learning_rate": 7.810873065200124e-05,
      "loss": 2.9282,
      "step": 176259
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.14068865776062,
      "learning_rate": 7.810597770218299e-05,
      "loss": 3.1276,
      "step": 176260
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4438297748565674,
      "learning_rate": 7.810322479361922e-05,
      "loss": 2.8624,
      "step": 176261
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7849998474121094,
      "learning_rate": 7.810047192631008e-05,
      "loss": 3.0437,
      "step": 176262
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.4908406734466553,
      "learning_rate": 7.809771910025635e-05,
      "loss": 2.7278,
      "step": 176263
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6972508430480957,
      "learning_rate": 7.809496631545833e-05,
      "loss": 2.7681,
      "step": 176264
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.54074764251709,
      "learning_rate": 7.809221357191671e-05,
      "loss": 2.9091,
      "step": 176265
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4034790992736816,
      "learning_rate": 7.80894608696319e-05,
      "loss": 2.8706,
      "step": 176266
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.820768117904663,
      "learning_rate": 7.808670820860452e-05,
      "loss": 2.9066,
      "step": 176267
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.659719944000244,
      "learning_rate": 7.808395558883501e-05,
      "loss": 3.0062,
      "step": 176268
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3893332481384277,
      "learning_rate": 7.808120301032384e-05,
      "loss": 2.9571,
      "step": 176269
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9939157962799072,
      "learning_rate": 7.807845047307152e-05,
      "loss": 3.0227,
      "step": 176270
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.284482002258301,
      "learning_rate": 7.807569797707868e-05,
      "loss": 2.9539,
      "step": 176271
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.553051710128784,
      "learning_rate": 7.807294552234569e-05,
      "loss": 2.7745,
      "step": 176272
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.319268226623535,
      "learning_rate": 7.807019310887317e-05,
      "loss": 2.9594,
      "step": 176273
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5663115978240967,
      "learning_rate": 7.806744073666164e-05,
      "loss": 2.7901,
      "step": 176274
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7460033893585205,
      "learning_rate": 7.806468840571145e-05,
      "loss": 2.8686,
      "step": 176275
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6037216186523438,
      "learning_rate": 7.806193611602333e-05,
      "loss": 3.1034,
      "step": 176276
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2481281757354736,
      "learning_rate": 7.805918386759767e-05,
      "loss": 2.7179,
      "step": 176277
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.781161308288574,
      "learning_rate": 7.80564316604349e-05,
      "loss": 2.7586,
      "step": 176278
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.4519526958465576,
      "learning_rate": 7.805367949453572e-05,
      "loss": 2.9125,
      "step": 176279
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.833608388900757,
      "learning_rate": 7.805092736990056e-05,
      "loss": 2.9337,
      "step": 176280
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7396090030670166,
      "learning_rate": 7.804817528652983e-05,
      "loss": 3.0631,
      "step": 176281
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.78246808052063,
      "learning_rate": 7.804542324442424e-05,
      "loss": 2.9685,
      "step": 176282
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9157445430755615,
      "learning_rate": 7.804267124358415e-05,
      "loss": 2.9294,
      "step": 176283
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.271160840988159,
      "learning_rate": 7.803991928401007e-05,
      "loss": 2.7386,
      "step": 176284
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.031639099121094,
      "learning_rate": 7.803716736570263e-05,
      "loss": 2.8764,
      "step": 176285
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.594367504119873,
      "learning_rate": 7.803441548866229e-05,
      "loss": 3.2407,
      "step": 176286
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.829430103302002,
      "learning_rate": 7.803166365288944e-05,
      "loss": 2.859,
      "step": 176287
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2273190021514893,
      "learning_rate": 7.802891185838479e-05,
      "loss": 3.0975,
      "step": 176288
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.0371246337890625,
      "learning_rate": 7.802616010514864e-05,
      "loss": 2.762,
      "step": 176289
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.618098735809326,
      "learning_rate": 7.802340839318172e-05,
      "loss": 3.2096,
      "step": 176290
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3400728702545166,
      "learning_rate": 7.802065672248446e-05,
      "loss": 3.1617,
      "step": 176291
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.825770854949951,
      "learning_rate": 7.801790509305731e-05,
      "loss": 2.7293,
      "step": 176292
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.197686195373535,
      "learning_rate": 7.801515350490076e-05,
      "loss": 2.6423,
      "step": 176293
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.0659286975860596,
      "learning_rate": 7.801240195801548e-05,
      "loss": 2.9173,
      "step": 176294
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9011831283569336,
      "learning_rate": 7.800965045240176e-05,
      "loss": 3.1051,
      "step": 176295
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4303832054138184,
      "learning_rate": 7.800689898806036e-05,
      "loss": 2.9815,
      "step": 176296
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.661400079727173,
      "learning_rate": 7.800414756499165e-05,
      "loss": 2.6926,
      "step": 176297
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8072924613952637,
      "learning_rate": 7.800139618319618e-05,
      "loss": 2.8873,
      "step": 176298
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.935572862625122,
      "learning_rate": 7.799864484267434e-05,
      "loss": 2.6485,
      "step": 176299
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.855914831161499,
      "learning_rate": 7.799589354342682e-05,
      "loss": 3.0691,
      "step": 176300
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.2725119590759277,
      "learning_rate": 7.799314228545396e-05,
      "loss": 2.9318,
      "step": 176301
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.9106557369232178,
      "learning_rate": 7.799039106875647e-05,
      "loss": 3.0723,
      "step": 176302
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.964552402496338,
      "learning_rate": 7.798763989333466e-05,
      "loss": 2.7619,
      "step": 176303
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9951729774475098,
      "learning_rate": 7.798488875918934e-05,
      "loss": 2.9394,
      "step": 176304
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.739553451538086,
      "learning_rate": 7.798213766632062e-05,
      "loss": 3.0673,
      "step": 176305
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.742422342300415,
      "learning_rate": 7.797938661472932e-05,
      "loss": 2.952,
      "step": 176306
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.1910016536712646,
      "learning_rate": 7.797663560441574e-05,
      "loss": 2.9244,
      "step": 176307
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.489377021789551,
      "learning_rate": 7.797388463538057e-05,
      "loss": 3.0834,
      "step": 176308
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.0049712657928467,
      "learning_rate": 7.797113370762417e-05,
      "loss": 2.8285,
      "step": 176309
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.551466941833496,
      "learning_rate": 7.79683828211473e-05,
      "loss": 2.9032,
      "step": 176310
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.1349644660949707,
      "learning_rate": 7.796563197595014e-05,
      "loss": 3.0308,
      "step": 176311
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.147709608078003,
      "learning_rate": 7.796288117203342e-05,
      "loss": 3.1088,
      "step": 176312
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2763025760650635,
      "learning_rate": 7.796013040939755e-05,
      "loss": 3.1149,
      "step": 176313
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.524723529815674,
      "learning_rate": 7.795737968804312e-05,
      "loss": 2.6765,
      "step": 176314
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.702547311782837,
      "learning_rate": 7.795462900797054e-05,
      "loss": 2.9775,
      "step": 176315
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.697326898574829,
      "learning_rate": 7.795187836918056e-05,
      "loss": 2.8292,
      "step": 176316
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6133182048797607,
      "learning_rate": 7.794912777167334e-05,
      "loss": 3.1734,
      "step": 176317
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7106709480285645,
      "learning_rate": 7.794637721544965e-05,
      "loss": 3.0495,
      "step": 176318
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5118260383605957,
      "learning_rate": 7.794362670050984e-05,
      "loss": 2.9089,
      "step": 176319
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.611567735671997,
      "learning_rate": 7.794087622685457e-05,
      "loss": 3.0923,
      "step": 176320
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7669711112976074,
      "learning_rate": 7.793812579448422e-05,
      "loss": 3.0633,
      "step": 176321
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3570125102996826,
      "learning_rate": 7.793537540339953e-05,
      "loss": 2.8382,
      "step": 176322
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.841705799102783,
      "learning_rate": 7.79326250536007e-05,
      "loss": 3.0448,
      "step": 176323
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4454259872436523,
      "learning_rate": 7.792987474508842e-05,
      "loss": 2.9419,
      "step": 176324
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.2228405475616455,
      "learning_rate": 7.792712447786313e-05,
      "loss": 3.0897,
      "step": 176325
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8672547340393066,
      "learning_rate": 7.792437425192545e-05,
      "loss": 2.9936,
      "step": 176326
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9721839427948,
      "learning_rate": 7.79216240672757e-05,
      "loss": 2.8886,
      "step": 176327
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7054131031036377,
      "learning_rate": 7.791887392391473e-05,
      "loss": 2.9656,
      "step": 176328
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6104929447174072,
      "learning_rate": 7.791612382184266e-05,
      "loss": 2.5412,
      "step": 176329
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5236880779266357,
      "learning_rate": 7.791337376106023e-05,
      "loss": 2.7838,
      "step": 176330
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6452014446258545,
      "learning_rate": 7.791062374156784e-05,
      "loss": 3.0277,
      "step": 176331
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.791252374649048,
      "learning_rate": 7.790787376336612e-05,
      "loss": 2.8883,
      "step": 176332
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5385265350341797,
      "learning_rate": 7.790512382645543e-05,
      "loss": 2.9753,
      "step": 176333
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.013773202896118,
      "learning_rate": 7.790237393083654e-05,
      "loss": 2.7737,
      "step": 176334
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.687330722808838,
      "learning_rate": 7.789962407650963e-05,
      "loss": 2.9301,
      "step": 176335
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.2954094409942627,
      "learning_rate": 7.78968742634755e-05,
      "loss": 2.916,
      "step": 176336
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4190030097961426,
      "learning_rate": 7.789412449173438e-05,
      "loss": 2.9491,
      "step": 176337
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7553811073303223,
      "learning_rate": 7.789137476128706e-05,
      "loss": 2.7652,
      "step": 176338
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.4454596042633057,
      "learning_rate": 7.788862507213382e-05,
      "loss": 2.9575,
      "step": 176339
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.654513120651245,
      "learning_rate": 7.788587542427547e-05,
      "loss": 3.0061,
      "step": 176340
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.909087657928467,
      "learning_rate": 7.788312581771214e-05,
      "loss": 2.7762,
      "step": 176341
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6348135471343994,
      "learning_rate": 7.788037625244461e-05,
      "loss": 2.8981,
      "step": 176342
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2451558113098145,
      "learning_rate": 7.787762672847324e-05,
      "loss": 2.9162,
      "step": 176343
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.03198504447937,
      "learning_rate": 7.78748772457987e-05,
      "loss": 2.696,
      "step": 176344
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.802081823348999,
      "learning_rate": 7.787212780442133e-05,
      "loss": 2.9454,
      "step": 176345
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.10933780670166,
      "learning_rate": 7.78693784043419e-05,
      "loss": 2.815,
      "step": 176346
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9778201580047607,
      "learning_rate": 7.786662904556053e-05,
      "loss": 2.8984,
      "step": 176347
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.7079966068267822,
      "learning_rate": 7.786387972807809e-05,
      "loss": 2.8763,
      "step": 176348
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5197649002075195,
      "learning_rate": 7.78611304518948e-05,
      "loss": 2.8583,
      "step": 176349
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.336789131164551,
      "learning_rate": 7.785838121701146e-05,
      "loss": 2.7978,
      "step": 176350
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.528153657913208,
      "learning_rate": 7.785563202342834e-05,
      "loss": 2.9122,
      "step": 176351
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.812607526779175,
      "learning_rate": 7.785288287114612e-05,
      "loss": 3.072,
      "step": 176352
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.549178123474121,
      "learning_rate": 7.785013376016525e-05,
      "loss": 3.1123,
      "step": 176353
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.644770383834839,
      "learning_rate": 7.784738469048625e-05,
      "loss": 2.9365,
      "step": 176354
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.6239755153656006,
      "learning_rate": 7.784463566210952e-05,
      "loss": 3.1567,
      "step": 176355
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.843146324157715,
      "learning_rate": 7.784188667503575e-05,
      "loss": 3.0303,
      "step": 176356
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.3665497303009033,
      "learning_rate": 7.783913772926526e-05,
      "loss": 3.034,
      "step": 176357
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5365209579467773,
      "learning_rate": 7.783638882479878e-05,
      "loss": 3.0448,
      "step": 176358
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.95400071144104,
      "learning_rate": 7.783363996163671e-05,
      "loss": 2.7994,
      "step": 176359
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.1564900875091553,
      "learning_rate": 7.783089113977955e-05,
      "loss": 2.8262,
      "step": 176360
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.009016990661621,
      "learning_rate": 7.782814235922775e-05,
      "loss": 2.9145,
      "step": 176361
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.44158673286438,
      "learning_rate": 7.782539361998195e-05,
      "loss": 2.8509,
      "step": 176362
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7014882564544678,
      "learning_rate": 7.782264492204253e-05,
      "loss": 2.8033,
      "step": 176363
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.4786908626556396,
      "learning_rate": 7.781989626541016e-05,
      "loss": 2.8569,
      "step": 176364
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.126948356628418,
      "learning_rate": 7.781714765008529e-05,
      "loss": 3.0788,
      "step": 176365
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.8822951316833496,
      "learning_rate": 7.781439907606826e-05,
      "loss": 2.9099,
      "step": 176366
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5616724491119385,
      "learning_rate": 7.781165054335987e-05,
      "loss": 2.8918,
      "step": 176367
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.650996685028076,
      "learning_rate": 7.780890205196047e-05,
      "loss": 3.1331,
      "step": 176368
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.35607647895813,
      "learning_rate": 7.780615360187051e-05,
      "loss": 2.8672,
      "step": 176369
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.3928868770599365,
      "learning_rate": 7.780340519309066e-05,
      "loss": 2.8735,
      "step": 176370
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.4153311252593994,
      "learning_rate": 7.780065682562134e-05,
      "loss": 2.7624,
      "step": 176371
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.5945632457733154,
      "learning_rate": 7.7797908499463e-05,
      "loss": 2.8599,
      "step": 176372
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.698333263397217,
      "learning_rate": 7.779516021461632e-05,
      "loss": 3.1375,
      "step": 176373
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.7993030548095703,
      "learning_rate": 7.779241197108161e-05,
      "loss": 2.9773,
      "step": 176374
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7481369972229004,
      "learning_rate": 7.778966376885956e-05,
      "loss": 2.8039,
      "step": 176375
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.82354474067688,
      "learning_rate": 7.778691560795061e-05,
      "loss": 2.9312,
      "step": 176376
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.767010450363159,
      "learning_rate": 7.77841674883553e-05,
      "loss": 2.9181,
      "step": 176377
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.9456589221954346,
      "learning_rate": 7.778141941007399e-05,
      "loss": 2.8626,
      "step": 176378
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.622502565383911,
      "learning_rate": 7.777867137310742e-05,
      "loss": 2.7886,
      "step": 176379
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.640265464782715,
      "learning_rate": 7.777592337745588e-05,
      "loss": 3.1459,
      "step": 176380
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.534358263015747,
      "learning_rate": 7.777317542312011e-05,
      "loss": 2.9831,
      "step": 176381
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7085988521575928,
      "learning_rate": 7.777042751010044e-05,
      "loss": 3.0099,
      "step": 176382
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4486613273620605,
      "learning_rate": 7.77676796383975e-05,
      "loss": 2.9102,
      "step": 176383
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4317047595977783,
      "learning_rate": 7.776493180801163e-05,
      "loss": 2.9895,
      "step": 176384
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6543750762939453,
      "learning_rate": 7.776218401894355e-05,
      "loss": 2.9834,
      "step": 176385
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5523362159729004,
      "learning_rate": 7.775943627119358e-05,
      "loss": 2.9974,
      "step": 176386
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.3828957080841064,
      "learning_rate": 7.775668856476244e-05,
      "loss": 3.0323,
      "step": 176387
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4444162845611572,
      "learning_rate": 7.77539408996504e-05,
      "loss": 2.763,
      "step": 176388
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.624093770980835,
      "learning_rate": 7.775119327585828e-05,
      "loss": 2.9079,
      "step": 176389
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.072506904602051,
      "learning_rate": 7.774844569338624e-05,
      "loss": 2.8177,
      "step": 176390
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.730621099472046,
      "learning_rate": 7.774569815223507e-05,
      "loss": 2.9285,
      "step": 176391
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.566877841949463,
      "learning_rate": 7.774295065240506e-05,
      "loss": 2.9922,
      "step": 176392
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.996072769165039,
      "learning_rate": 7.774020319389693e-05,
      "loss": 2.9088,
      "step": 176393
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3840391635894775,
      "learning_rate": 7.773745577671097e-05,
      "loss": 2.9262,
      "step": 176394
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6773359775543213,
      "learning_rate": 7.773470840084803e-05,
      "loss": 2.8578,
      "step": 176395
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9714527130126953,
      "learning_rate": 7.77319610663082e-05,
      "loss": 2.8723,
      "step": 176396
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6457107067108154,
      "learning_rate": 7.772921377309231e-05,
      "loss": 3.0118,
      "step": 176397
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.924567699432373,
      "learning_rate": 7.772646652120064e-05,
      "loss": 3.0017,
      "step": 176398
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7901809215545654,
      "learning_rate": 7.77237193106339e-05,
      "loss": 3.1884,
      "step": 176399
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5382285118103027,
      "learning_rate": 7.772097214139245e-05,
      "loss": 2.7065,
      "step": 176400
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.718454599380493,
      "learning_rate": 7.771822501347703e-05,
      "loss": 2.9734,
      "step": 176401
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.75298810005188,
      "learning_rate": 7.771547792688783e-05,
      "loss": 2.8869,
      "step": 176402
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.857574939727783,
      "learning_rate": 7.77127308816256e-05,
      "loss": 3.2602,
      "step": 176403
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4581501483917236,
      "learning_rate": 7.770998387769067e-05,
      "loss": 3.1416,
      "step": 176404
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2727997303009033,
      "learning_rate": 7.770723691508376e-05,
      "loss": 2.7667,
      "step": 176405
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.27235746383667,
      "learning_rate": 7.770448999380515e-05,
      "loss": 2.9686,
      "step": 176406
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.723875045776367,
      "learning_rate": 7.770174311385565e-05,
      "loss": 2.9935,
      "step": 176407
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.583007335662842,
      "learning_rate": 7.769899627523546e-05,
      "loss": 3.0362,
      "step": 176408
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.9536218643188477,
      "learning_rate": 7.769624947794527e-05,
      "loss": 2.8892,
      "step": 176409
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.017251491546631,
      "learning_rate": 7.769350272198546e-05,
      "loss": 2.8357,
      "step": 176410
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.558283567428589,
      "learning_rate": 7.769075600735672e-05,
      "loss": 2.7223,
      "step": 176411
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.566649913787842,
      "learning_rate": 7.768800933405937e-05,
      "loss": 2.8742,
      "step": 176412
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3910136222839355,
      "learning_rate": 7.768526270209421e-05,
      "loss": 3.0626,
      "step": 176413
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.025933027267456,
      "learning_rate": 7.768251611146134e-05,
      "loss": 2.9192,
      "step": 176414
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9864985942840576,
      "learning_rate": 7.767976956216162e-05,
      "loss": 2.837,
      "step": 176415
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.914379835128784,
      "learning_rate": 7.767702305419533e-05,
      "loss": 2.8001,
      "step": 176416
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.553516149520874,
      "learning_rate": 7.767427658756314e-05,
      "loss": 2.9061,
      "step": 176417
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6709954738616943,
      "learning_rate": 7.767153016226543e-05,
      "loss": 3.1347,
      "step": 176418
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9582245349884033,
      "learning_rate": 7.766878377830295e-05,
      "loss": 2.8418,
      "step": 176419
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.767354965209961,
      "learning_rate": 7.766603743567587e-05,
      "loss": 2.9579,
      "step": 176420
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.239174842834473,
      "learning_rate": 7.766329113438496e-05,
      "loss": 2.8538,
      "step": 176421
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.1632907390594482,
      "learning_rate": 7.766054487443054e-05,
      "loss": 2.8685,
      "step": 176422
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.365870475769043,
      "learning_rate": 7.765779865581334e-05,
      "loss": 3.0938,
      "step": 176423
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.398291826248169,
      "learning_rate": 7.765505247853362e-05,
      "loss": 3.0648,
      "step": 176424
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.8406569957733154,
      "learning_rate": 7.765230634259214e-05,
      "loss": 3.0944,
      "step": 176425
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.264134407043457,
      "learning_rate": 7.764956024798929e-05,
      "loss": 2.9674,
      "step": 176426
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.705612659454346,
      "learning_rate": 7.764681419472558e-05,
      "loss": 2.7101,
      "step": 176427
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.2972617149353027,
      "learning_rate": 7.764406818280144e-05,
      "loss": 2.8987,
      "step": 176428
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.691560983657837,
      "learning_rate": 7.764132221221754e-05,
      "loss": 3.0338,
      "step": 176429
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7267398834228516,
      "learning_rate": 7.763857628297423e-05,
      "loss": 3.043,
      "step": 176430
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7653582096099854,
      "learning_rate": 7.76358303950722e-05,
      "loss": 3.2323,
      "step": 176431
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.650378465652466,
      "learning_rate": 7.763308454851188e-05,
      "loss": 3.0739,
      "step": 176432
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.815437078475952,
      "learning_rate": 7.763033874329375e-05,
      "loss": 2.7976,
      "step": 176433
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.11604642868042,
      "learning_rate": 7.762759297941825e-05,
      "loss": 2.8258,
      "step": 176434
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7012557983398438,
      "learning_rate": 7.762484725688608e-05,
      "loss": 3.2026,
      "step": 176435
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.578244209289551,
      "learning_rate": 7.762210157569757e-05,
      "loss": 2.9824,
      "step": 176436
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.2550437450408936,
      "learning_rate": 7.761935593585336e-05,
      "loss": 3.0816,
      "step": 176437
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.498819589614868,
      "learning_rate": 7.761661033735394e-05,
      "loss": 2.8834,
      "step": 176438
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6570346355438232,
      "learning_rate": 7.761386478019978e-05,
      "loss": 2.8984,
      "step": 176439
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.07521653175354,
      "learning_rate": 7.76111192643913e-05,
      "loss": 2.9938,
      "step": 176440
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.124251842498779,
      "learning_rate": 7.76083737899292e-05,
      "loss": 3.2632,
      "step": 176441
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5717105865478516,
      "learning_rate": 7.760562835681382e-05,
      "loss": 3.0702,
      "step": 176442
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.4470958709716797,
      "learning_rate": 7.760288296504587e-05,
      "loss": 3.0868,
      "step": 176443
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.192049741744995,
      "learning_rate": 7.76001376146257e-05,
      "loss": 2.8098,
      "step": 176444
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6334304809570312,
      "learning_rate": 7.759739230555387e-05,
      "loss": 2.9209,
      "step": 176445
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.261277198791504,
      "learning_rate": 7.759464703783081e-05,
      "loss": 2.9862,
      "step": 176446
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.80428409576416,
      "learning_rate": 7.759190181145718e-05,
      "loss": 2.9331,
      "step": 176447
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5100700855255127,
      "learning_rate": 7.758915662643332e-05,
      "loss": 3.1758,
      "step": 176448
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.4454545974731445,
      "learning_rate": 7.758641148275995e-05,
      "loss": 2.8325,
      "step": 176449
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.295631170272827,
      "learning_rate": 7.758366638043742e-05,
      "loss": 3.0949,
      "step": 176450
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.500903367996216,
      "learning_rate": 7.758092131946623e-05,
      "loss": 2.9788,
      "step": 176451
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3818206787109375,
      "learning_rate": 7.757817629984704e-05,
      "loss": 2.7921,
      "step": 176452
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.031527519226074,
      "learning_rate": 7.757543132158026e-05,
      "loss": 2.8886,
      "step": 176453
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.740513801574707,
      "learning_rate": 7.757268638466629e-05,
      "loss": 2.8472,
      "step": 176454
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5629994869232178,
      "learning_rate": 7.756994148910588e-05,
      "loss": 2.8527,
      "step": 176455
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.233646869659424,
      "learning_rate": 7.756719663489939e-05,
      "loss": 3.1885,
      "step": 176456
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8047101497650146,
      "learning_rate": 7.756445182204728e-05,
      "loss": 3.2475,
      "step": 176457
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.658411979675293,
      "learning_rate": 7.756170705055023e-05,
      "loss": 3.002,
      "step": 176458
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8908023834228516,
      "learning_rate": 7.755896232040865e-05,
      "loss": 2.819,
      "step": 176459
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.573362112045288,
      "learning_rate": 7.755621763162297e-05,
      "loss": 3.007,
      "step": 176460
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.495878219604492,
      "learning_rate": 7.755347298419388e-05,
      "loss": 2.928,
      "step": 176461
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.432953119277954,
      "learning_rate": 7.75507283781218e-05,
      "loss": 3.2061,
      "step": 176462
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.394810914993286,
      "learning_rate": 7.754798381340713e-05,
      "loss": 3.0707,
      "step": 176463
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.452483892440796,
      "learning_rate": 7.754523929005059e-05,
      "loss": 2.636,
      "step": 176464
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5314953327178955,
      "learning_rate": 7.754249480805252e-05,
      "loss": 3.3107,
      "step": 176465
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.58522367477417,
      "learning_rate": 7.753975036741358e-05,
      "loss": 3.128,
      "step": 176466
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.522728681564331,
      "learning_rate": 7.753700596813418e-05,
      "loss": 2.8787,
      "step": 176467
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.675034284591675,
      "learning_rate": 7.753426161021486e-05,
      "loss": 2.9353,
      "step": 176468
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6399686336517334,
      "learning_rate": 7.753151729365603e-05,
      "loss": 3.0467,
      "step": 176469
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.679945707321167,
      "learning_rate": 7.752877301845837e-05,
      "loss": 3.0505,
      "step": 176470
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8262436389923096,
      "learning_rate": 7.752602878462224e-05,
      "loss": 2.7141,
      "step": 176471
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7328670024871826,
      "learning_rate": 7.752328459214832e-05,
      "loss": 2.9729,
      "step": 176472
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.1722500324249268,
      "learning_rate": 7.752054044103691e-05,
      "loss": 2.9571,
      "step": 176473
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.329317331314087,
      "learning_rate": 7.75177963312888e-05,
      "loss": 3.017,
      "step": 176474
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.299921989440918,
      "learning_rate": 7.751505226290419e-05,
      "loss": 2.9301,
      "step": 176475
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6431386470794678,
      "learning_rate": 7.751230823588378e-05,
      "loss": 3.0805,
      "step": 176476
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.709261894226074,
      "learning_rate": 7.750956425022798e-05,
      "loss": 3.152,
      "step": 176477
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5355842113494873,
      "learning_rate": 7.750682030593741e-05,
      "loss": 2.8774,
      "step": 176478
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.910191059112549,
      "learning_rate": 7.750407640301247e-05,
      "loss": 2.9624,
      "step": 176479
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.953767776489258,
      "learning_rate": 7.750133254145386e-05,
      "loss": 2.9989,
      "step": 176480
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.2472236156463623,
      "learning_rate": 7.749858872126177e-05,
      "loss": 2.9081,
      "step": 176481
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8409457206726074,
      "learning_rate": 7.7495844942437e-05,
      "loss": 2.7552,
      "step": 176482
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8456103801727295,
      "learning_rate": 7.749310120497987e-05,
      "loss": 3.2387,
      "step": 176483
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.086610794067383,
      "learning_rate": 7.749035750889108e-05,
      "loss": 3.1376,
      "step": 176484
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.3661699295043945,
      "learning_rate": 7.748761385417089e-05,
      "loss": 2.9027,
      "step": 176485
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.094062805175781,
      "learning_rate": 7.748487024082018e-05,
      "loss": 2.5511,
      "step": 176486
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.138763666152954,
      "learning_rate": 7.748212666883902e-05,
      "loss": 2.9094,
      "step": 176487
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5307669639587402,
      "learning_rate": 7.747938313822825e-05,
      "loss": 2.8986,
      "step": 176488
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8871548175811768,
      "learning_rate": 7.747663964898817e-05,
      "loss": 2.902,
      "step": 176489
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5336945056915283,
      "learning_rate": 7.747389620111945e-05,
      "loss": 3.165,
      "step": 176490
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.366842269897461,
      "learning_rate": 7.747115279462244e-05,
      "loss": 3.1104,
      "step": 176491
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.39666485786438,
      "learning_rate": 7.746840942949788e-05,
      "loss": 3.2121,
      "step": 176492
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.764477491378784,
      "learning_rate": 7.746566610574611e-05,
      "loss": 2.9939,
      "step": 176493
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.5350983142852783,
      "learning_rate": 7.746292282336766e-05,
      "loss": 3.0087,
      "step": 176494
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3757543563842773,
      "learning_rate": 7.7460179582363e-05,
      "loss": 2.8015,
      "step": 176495
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6013238430023193,
      "learning_rate": 7.745743638273275e-05,
      "loss": 3.0263,
      "step": 176496
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3360557556152344,
      "learning_rate": 7.74546932244773e-05,
      "loss": 2.9716,
      "step": 176497
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.4269485473632812,
      "learning_rate": 7.74519501075973e-05,
      "loss": 2.9286,
      "step": 176498
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.577240467071533,
      "learning_rate": 7.744920703209318e-05,
      "loss": 2.9347,
      "step": 176499
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7649037837982178,
      "learning_rate": 7.744646399796545e-05,
      "loss": 2.9471,
      "step": 176500
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2926025390625,
      "learning_rate": 7.744372100521454e-05,
      "loss": 3.0206,
      "step": 176501
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4681906700134277,
      "learning_rate": 7.744097805384114e-05,
      "loss": 2.7609,
      "step": 176502
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2121119499206543,
      "learning_rate": 7.743823514384557e-05,
      "loss": 3.0619,
      "step": 176503
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6509974002838135,
      "learning_rate": 7.743549227522851e-05,
      "loss": 2.9453,
      "step": 176504
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.189924478530884,
      "learning_rate": 7.74327494479904e-05,
      "loss": 2.982,
      "step": 176505
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.090792655944824,
      "learning_rate": 7.743000666213177e-05,
      "loss": 3.0098,
      "step": 176506
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.682098865509033,
      "learning_rate": 7.742726391765297e-05,
      "loss": 2.9601,
      "step": 176507
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.319765329360962,
      "learning_rate": 7.742452121455477e-05,
      "loss": 3.1378,
      "step": 176508
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.1147334575653076,
      "learning_rate": 7.742177855283744e-05,
      "loss": 2.8454,
      "step": 176509
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6620426177978516,
      "learning_rate": 7.74190359325017e-05,
      "loss": 3.0017,
      "step": 176510
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5989491939544678,
      "learning_rate": 7.741629335354795e-05,
      "loss": 2.9091,
      "step": 176511
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.191669464111328,
      "learning_rate": 7.741355081597675e-05,
      "loss": 3.0437,
      "step": 176512
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3124709129333496,
      "learning_rate": 7.741080831978843e-05,
      "loss": 2.7471,
      "step": 176513
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7558135986328125,
      "learning_rate": 7.740806586498378e-05,
      "loss": 3.0634,
      "step": 176514
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2085962295532227,
      "learning_rate": 7.740532345156307e-05,
      "loss": 2.851,
      "step": 176515
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9388391971588135,
      "learning_rate": 7.740258107952697e-05,
      "loss": 2.9779,
      "step": 176516
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6073620319366455,
      "learning_rate": 7.739983874887597e-05,
      "loss": 2.8113,
      "step": 176517
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.872007131576538,
      "learning_rate": 7.739709645961051e-05,
      "loss": 2.7607,
      "step": 176518
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.549956798553467,
      "learning_rate": 7.739435421173107e-05,
      "loss": 2.9358,
      "step": 176519
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9906158447265625,
      "learning_rate": 7.73916120052383e-05,
      "loss": 2.9584,
      "step": 176520
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5625405311584473,
      "learning_rate": 7.738886984013254e-05,
      "loss": 2.6727,
      "step": 176521
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.803379535675049,
      "learning_rate": 7.738612771641448e-05,
      "loss": 3.0001,
      "step": 176522
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.668726682662964,
      "learning_rate": 7.738338563408455e-05,
      "loss": 2.9124,
      "step": 176523
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4834518432617188,
      "learning_rate": 7.738064359314324e-05,
      "loss": 2.7434,
      "step": 176524
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.492341995239258,
      "learning_rate": 7.737790159359099e-05,
      "loss": 3.1137,
      "step": 176525
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.624973773956299,
      "learning_rate": 7.737515963542848e-05,
      "loss": 3.2232,
      "step": 176526
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.786011219024658,
      "learning_rate": 7.737241771865604e-05,
      "loss": 3.055,
      "step": 176527
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7025210857391357,
      "learning_rate": 7.736967584327437e-05,
      "loss": 2.9677,
      "step": 176528
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.317866563796997,
      "learning_rate": 7.736693400928386e-05,
      "loss": 2.8341,
      "step": 176529
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7722971439361572,
      "learning_rate": 7.736419221668503e-05,
      "loss": 3.0238,
      "step": 176530
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.2807610034942627,
      "learning_rate": 7.736145046547832e-05,
      "loss": 2.9828,
      "step": 176531
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.323763370513916,
      "learning_rate": 7.735870875566444e-05,
      "loss": 2.8843,
      "step": 176532
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.1577906608581543,
      "learning_rate": 7.735596708724365e-05,
      "loss": 2.9427,
      "step": 176533
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.240041732788086,
      "learning_rate": 7.735322546021669e-05,
      "loss": 3.0407,
      "step": 176534
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.5401036739349365,
      "learning_rate": 7.735048387458397e-05,
      "loss": 3.0616,
      "step": 176535
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.623455286026001,
      "learning_rate": 7.73477423303459e-05,
      "loss": 2.9865,
      "step": 176536
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.852417230606079,
      "learning_rate": 7.73450008275032e-05,
      "loss": 3.1268,
      "step": 176537
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5406746864318848,
      "learning_rate": 7.734225936605625e-05,
      "loss": 2.8839,
      "step": 176538
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.560098648071289,
      "learning_rate": 7.733951794600547e-05,
      "loss": 2.824,
      "step": 176539
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7757761478424072,
      "learning_rate": 7.73367765673516e-05,
      "loss": 2.9127,
      "step": 176540
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.495593547821045,
      "learning_rate": 7.733403523009501e-05,
      "loss": 2.9463,
      "step": 176541
    },
    {
      "epoch": 2.3,
      "grad_norm": 5.409753799438477,
      "learning_rate": 7.733129393423616e-05,
      "loss": 2.827,
      "step": 176542
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6733238697052,
      "learning_rate": 7.732855267977567e-05,
      "loss": 2.8379,
      "step": 176543
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.4712283611297607,
      "learning_rate": 7.732581146671402e-05,
      "loss": 3.0231,
      "step": 176544
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5786538124084473,
      "learning_rate": 7.732307029505163e-05,
      "loss": 2.7132,
      "step": 176545
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5091822147369385,
      "learning_rate": 7.73203291647892e-05,
      "loss": 2.9305,
      "step": 176546
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.1301681995391846,
      "learning_rate": 7.731758807592707e-05,
      "loss": 2.8888,
      "step": 176547
    },
    {
      "epoch": 2.3,
      "grad_norm": 5.173553943634033,
      "learning_rate": 7.731484702846573e-05,
      "loss": 2.8973,
      "step": 176548
    },
    {
      "epoch": 2.3,
      "grad_norm": 5.3541154861450195,
      "learning_rate": 7.731210602240587e-05,
      "loss": 3.0399,
      "step": 176549
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.707190990447998,
      "learning_rate": 7.73093650577478e-05,
      "loss": 2.9645,
      "step": 176550
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.40881609916687,
      "learning_rate": 7.73066241344922e-05,
      "loss": 3.0582,
      "step": 176551
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.0761878490448,
      "learning_rate": 7.730388325263953e-05,
      "loss": 2.9195,
      "step": 176552
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.8935446739196777,
      "learning_rate": 7.730114241219022e-05,
      "loss": 2.7816,
      "step": 176553
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.83691930770874,
      "learning_rate": 7.729840161314476e-05,
      "loss": 3.0102,
      "step": 176554
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9332056045532227,
      "learning_rate": 7.729566085550386e-05,
      "loss": 3.0035,
      "step": 176555
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6460468769073486,
      "learning_rate": 7.729292013926776e-05,
      "loss": 3.0077,
      "step": 176556
    },
    {
      "epoch": 2.3,
      "grad_norm": 5.587093830108643,
      "learning_rate": 7.729017946443725e-05,
      "loss": 2.8049,
      "step": 176557
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6774325370788574,
      "learning_rate": 7.728743883101258e-05,
      "loss": 3.1171,
      "step": 176558
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.1026458740234375,
      "learning_rate": 7.728469823899446e-05,
      "loss": 2.7741,
      "step": 176559
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.097473621368408,
      "learning_rate": 7.728195768838334e-05,
      "loss": 2.7854,
      "step": 176560
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.4402735233306885,
      "learning_rate": 7.727921717917971e-05,
      "loss": 3.118,
      "step": 176561
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.548551559448242,
      "learning_rate": 7.727647671138395e-05,
      "loss": 2.6441,
      "step": 176562
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.8452508449554443,
      "learning_rate": 7.727373628499683e-05,
      "loss": 2.8851,
      "step": 176563
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.323381185531616,
      "learning_rate": 7.727099590001863e-05,
      "loss": 3.0219,
      "step": 176564
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.4364635944366455,
      "learning_rate": 7.726825555645003e-05,
      "loss": 2.9435,
      "step": 176565
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.987931489944458,
      "learning_rate": 7.726551525429148e-05,
      "loss": 3.1474,
      "step": 176566
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.208802223205566,
      "learning_rate": 7.726277499354347e-05,
      "loss": 3.0694,
      "step": 176567
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.5631282329559326,
      "learning_rate": 7.72600347742064e-05,
      "loss": 2.8614,
      "step": 176568
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.634073257446289,
      "learning_rate": 7.7257294596281e-05,
      "loss": 3.0058,
      "step": 176569
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.0551671981811523,
      "learning_rate": 7.72545544597676e-05,
      "loss": 2.9297,
      "step": 176570
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.824268341064453,
      "learning_rate": 7.725181436466686e-05,
      "loss": 2.8231,
      "step": 176571
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.2499001026153564,
      "learning_rate": 7.724907431097921e-05,
      "loss": 3.0222,
      "step": 176572
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.39475417137146,
      "learning_rate": 7.724633429870517e-05,
      "loss": 2.7632,
      "step": 176573
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.243513345718384,
      "learning_rate": 7.724359432784514e-05,
      "loss": 2.8905,
      "step": 176574
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.3204574584960938,
      "learning_rate": 7.724085439839985e-05,
      "loss": 2.906,
      "step": 176575
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.13822603225708,
      "learning_rate": 7.723811451036955e-05,
      "loss": 2.876,
      "step": 176576
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.519162654876709,
      "learning_rate": 7.723537466375503e-05,
      "loss": 3.1185,
      "step": 176577
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7094457149505615,
      "learning_rate": 7.723263485855662e-05,
      "loss": 3.1895,
      "step": 176578
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2522878646850586,
      "learning_rate": 7.722989509477491e-05,
      "loss": 2.9927,
      "step": 176579
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.1195969581604,
      "learning_rate": 7.722715537241023e-05,
      "loss": 2.9307,
      "step": 176580
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7009074687957764,
      "learning_rate": 7.722441569146337e-05,
      "loss": 3.2148,
      "step": 176581
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4781382083892822,
      "learning_rate": 7.722167605193455e-05,
      "loss": 2.8715,
      "step": 176582
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.2356748580932617,
      "learning_rate": 7.721893645382455e-05,
      "loss": 3.1412,
      "step": 176583
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8328447341918945,
      "learning_rate": 7.721619689713376e-05,
      "loss": 3.025,
      "step": 176584
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.072451114654541,
      "learning_rate": 7.721345738186266e-05,
      "loss": 2.8635,
      "step": 176585
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.287358283996582,
      "learning_rate": 7.721071790801169e-05,
      "loss": 2.7985,
      "step": 176586
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4822027683258057,
      "learning_rate": 7.720797847558156e-05,
      "loss": 2.7496,
      "step": 176587
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.4739558696746826,
      "learning_rate": 7.720523908457259e-05,
      "loss": 2.9577,
      "step": 176588
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5647380352020264,
      "learning_rate": 7.720249973498546e-05,
      "loss": 2.8045,
      "step": 176589
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2295048236846924,
      "learning_rate": 7.719976042682058e-05,
      "loss": 3.2241,
      "step": 176590
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.871075391769409,
      "learning_rate": 7.719702116007845e-05,
      "loss": 3.206,
      "step": 176591
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4834961891174316,
      "learning_rate": 7.719428193475955e-05,
      "loss": 2.8221,
      "step": 176592
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.455735206604004,
      "learning_rate": 7.719154275086452e-05,
      "loss": 2.941,
      "step": 176593
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2226953506469727,
      "learning_rate": 7.718880360839366e-05,
      "loss": 3.2447,
      "step": 176594
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7242937088012695,
      "learning_rate": 7.718606450734775e-05,
      "loss": 2.9394,
      "step": 176595
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.1861138343811035,
      "learning_rate": 7.718332544772714e-05,
      "loss": 2.9302,
      "step": 176596
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.1095361709594727,
      "learning_rate": 7.718058642953235e-05,
      "loss": 2.9564,
      "step": 176597
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8549752235412598,
      "learning_rate": 7.71778474527638e-05,
      "loss": 3.1329,
      "step": 176598
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.789980173110962,
      "learning_rate": 7.717510851742217e-05,
      "loss": 2.9567,
      "step": 176599
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5841691493988037,
      "learning_rate": 7.717236962350784e-05,
      "loss": 2.7937,
      "step": 176600
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.900879144668579,
      "learning_rate": 7.716963077102143e-05,
      "loss": 2.8993,
      "step": 176601
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.2689473628997803,
      "learning_rate": 7.716689195996342e-05,
      "loss": 2.988,
      "step": 176602
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8471951484680176,
      "learning_rate": 7.716415319033426e-05,
      "loss": 2.9458,
      "step": 176603
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.0320682525634766,
      "learning_rate": 7.716141446213444e-05,
      "loss": 2.6741,
      "step": 176604
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4324898719787598,
      "learning_rate": 7.715867577536458e-05,
      "loss": 3.0805,
      "step": 176605
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5494699478149414,
      "learning_rate": 7.715593713002503e-05,
      "loss": 2.7639,
      "step": 176606
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5969769954681396,
      "learning_rate": 7.715319852611652e-05,
      "loss": 2.8197,
      "step": 176607
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.709747791290283,
      "learning_rate": 7.715045996363943e-05,
      "loss": 2.7566,
      "step": 176608
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6994235515594482,
      "learning_rate": 7.714772144259426e-05,
      "loss": 2.9398,
      "step": 176609
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2218635082244873,
      "learning_rate": 7.714498296298146e-05,
      "loss": 3.0809,
      "step": 176610
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.454329013824463,
      "learning_rate": 7.714224452480168e-05,
      "loss": 2.9465,
      "step": 176611
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6513733863830566,
      "learning_rate": 7.713950612805529e-05,
      "loss": 3.0908,
      "step": 176612
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4346015453338623,
      "learning_rate": 7.713676777274298e-05,
      "loss": 3.1645,
      "step": 176613
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7467968463897705,
      "learning_rate": 7.713402945886513e-05,
      "loss": 2.8464,
      "step": 176614
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.8787224292755127,
      "learning_rate": 7.713129118642227e-05,
      "loss": 2.8885,
      "step": 176615
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.15106463432312,
      "learning_rate": 7.71285529554148e-05,
      "loss": 2.9428,
      "step": 176616
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4515089988708496,
      "learning_rate": 7.712581476584349e-05,
      "loss": 2.8732,
      "step": 176617
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.287431001663208,
      "learning_rate": 7.712307661770856e-05,
      "loss": 2.9075,
      "step": 176618
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7548646926879883,
      "learning_rate": 7.712033851101075e-05,
      "loss": 2.8962,
      "step": 176619
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.228524684906006,
      "learning_rate": 7.71176004457505e-05,
      "loss": 3.082,
      "step": 176620
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.2684340476989746,
      "learning_rate": 7.711486242192819e-05,
      "loss": 2.9489,
      "step": 176621
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.8467049598693848,
      "learning_rate": 7.711212443954451e-05,
      "loss": 2.9593,
      "step": 176622
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.1713860034942627,
      "learning_rate": 7.710938649859991e-05,
      "loss": 2.9478,
      "step": 176623
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.093677282333374,
      "learning_rate": 7.71066485990948e-05,
      "loss": 2.8233,
      "step": 176624
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3078083992004395,
      "learning_rate": 7.710391074102984e-05,
      "loss": 3.0378,
      "step": 176625
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9737918376922607,
      "learning_rate": 7.710117292440539e-05,
      "loss": 2.9236,
      "step": 176626
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4064176082611084,
      "learning_rate": 7.709843514922214e-05,
      "loss": 2.9857,
      "step": 176627
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7520065307617188,
      "learning_rate": 7.709569741548049e-05,
      "loss": 3.0505,
      "step": 176628
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.718291997909546,
      "learning_rate": 7.709295972318097e-05,
      "loss": 2.8265,
      "step": 176629
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.71882963180542,
      "learning_rate": 7.709022207232396e-05,
      "loss": 3.0873,
      "step": 176630
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.128533124923706,
      "learning_rate": 7.70874844629102e-05,
      "loss": 3.1329,
      "step": 176631
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5005576610565186,
      "learning_rate": 7.708474689493999e-05,
      "loss": 2.888,
      "step": 176632
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6591920852661133,
      "learning_rate": 7.708200936841402e-05,
      "loss": 2.9091,
      "step": 176633
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.304060935974121,
      "learning_rate": 7.70792718833327e-05,
      "loss": 2.79,
      "step": 176634
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.2890987396240234,
      "learning_rate": 7.707653443969647e-05,
      "loss": 2.9843,
      "step": 176635
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.5531668663024902,
      "learning_rate": 7.707379703750601e-05,
      "loss": 3.044,
      "step": 176636
    },
    {
      "epoch": 2.3,
      "grad_norm": 5.294296741485596,
      "learning_rate": 7.707105967676174e-05,
      "loss": 2.9329,
      "step": 176637
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2324514389038086,
      "learning_rate": 7.706832235746407e-05,
      "loss": 3.04,
      "step": 176638
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3331005573272705,
      "learning_rate": 7.706558507961373e-05,
      "loss": 3.1848,
      "step": 176639
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.3130111694335938,
      "learning_rate": 7.706284784321109e-05,
      "loss": 3.0677,
      "step": 176640
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.6868550777435303,
      "learning_rate": 7.706011064825656e-05,
      "loss": 3.0765,
      "step": 176641
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.539114475250244,
      "learning_rate": 7.705737349475088e-05,
      "loss": 2.9314,
      "step": 176642
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8693153858184814,
      "learning_rate": 7.705463638269444e-05,
      "loss": 2.8487,
      "step": 176643
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.799834728240967,
      "learning_rate": 7.705189931208764e-05,
      "loss": 2.5698,
      "step": 176644
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.0973432064056396,
      "learning_rate": 7.704916228293122e-05,
      "loss": 3.0961,
      "step": 176645
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.835085391998291,
      "learning_rate": 7.704642529522554e-05,
      "loss": 2.7678,
      "step": 176646
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.239755392074585,
      "learning_rate": 7.704368834897107e-05,
      "loss": 2.8941,
      "step": 176647
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.7063441276550293,
      "learning_rate": 7.704095144416845e-05,
      "loss": 2.8991,
      "step": 176648
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6279444694519043,
      "learning_rate": 7.703821458081807e-05,
      "loss": 2.6605,
      "step": 176649
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7478761672973633,
      "learning_rate": 7.703547775892056e-05,
      "loss": 2.8157,
      "step": 176650
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.67033052444458,
      "learning_rate": 7.703274097847639e-05,
      "loss": 2.8097,
      "step": 176651
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7173550128936768,
      "learning_rate": 7.703000423948603e-05,
      "loss": 2.9831,
      "step": 176652
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.000701904296875,
      "learning_rate": 7.70272675419499e-05,
      "loss": 3.0412,
      "step": 176653
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.593388080596924,
      "learning_rate": 7.70245308858687e-05,
      "loss": 2.9314,
      "step": 176654
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.489753246307373,
      "learning_rate": 7.702179427124279e-05,
      "loss": 2.9217,
      "step": 176655
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3156421184539795,
      "learning_rate": 7.701905769807279e-05,
      "loss": 3.1163,
      "step": 176656
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.622476100921631,
      "learning_rate": 7.701632116635916e-05,
      "loss": 2.8325,
      "step": 176657
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8573474884033203,
      "learning_rate": 7.701358467610243e-05,
      "loss": 2.7152,
      "step": 176658
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7877111434936523,
      "learning_rate": 7.701084822730297e-05,
      "loss": 2.7604,
      "step": 176659
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3595809936523438,
      "learning_rate": 7.700811181996149e-05,
      "loss": 2.832,
      "step": 176660
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.003469228744507,
      "learning_rate": 7.700537545407834e-05,
      "loss": 2.9119,
      "step": 176661
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.645437479019165,
      "learning_rate": 7.700263912965418e-05,
      "loss": 2.9567,
      "step": 176662
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5516769886016846,
      "learning_rate": 7.699990284668944e-05,
      "loss": 2.8244,
      "step": 176663
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6525487899780273,
      "learning_rate": 7.699716660518464e-05,
      "loss": 3.0939,
      "step": 176664
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9838695526123047,
      "learning_rate": 7.699443040514016e-05,
      "loss": 3.105,
      "step": 176665
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.286519765853882,
      "learning_rate": 7.699169424655672e-05,
      "loss": 3.0571,
      "step": 176666
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3365466594696045,
      "learning_rate": 7.698895812943468e-05,
      "loss": 2.9794,
      "step": 176667
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.1536478996276855,
      "learning_rate": 7.698622205377464e-05,
      "loss": 2.9744,
      "step": 176668
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.600836753845215,
      "learning_rate": 7.698348601957711e-05,
      "loss": 2.7536,
      "step": 176669
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5215442180633545,
      "learning_rate": 7.698075002684254e-05,
      "loss": 3.0193,
      "step": 176670
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.7778306007385254,
      "learning_rate": 7.697801407557136e-05,
      "loss": 2.977,
      "step": 176671
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.483477830886841,
      "learning_rate": 7.69752781657643e-05,
      "loss": 2.8746,
      "step": 176672
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.690849542617798,
      "learning_rate": 7.697254229742166e-05,
      "loss": 3.1484,
      "step": 176673
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.480133295059204,
      "learning_rate": 7.696980647054409e-05,
      "loss": 2.9649,
      "step": 176674
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3802006244659424,
      "learning_rate": 7.696707068513203e-05,
      "loss": 3.0328,
      "step": 176675
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.424408912658691,
      "learning_rate": 7.696433494118605e-05,
      "loss": 2.9408,
      "step": 176676
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.634308338165283,
      "learning_rate": 7.69615992387065e-05,
      "loss": 2.9473,
      "step": 176677
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.648818254470825,
      "learning_rate": 7.695886357769409e-05,
      "loss": 3.1223,
      "step": 176678
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9779913425445557,
      "learning_rate": 7.695612795814912e-05,
      "loss": 2.7109,
      "step": 176679
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.954774856567383,
      "learning_rate": 7.695339238007233e-05,
      "loss": 2.996,
      "step": 176680
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4082865715026855,
      "learning_rate": 7.695065684346413e-05,
      "loss": 2.8593,
      "step": 176681
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.50049090385437,
      "learning_rate": 7.694792134832499e-05,
      "loss": 2.9111,
      "step": 176682
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7378463745117188,
      "learning_rate": 7.694518589465534e-05,
      "loss": 2.7942,
      "step": 176683
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.992361068725586,
      "learning_rate": 7.694245048245593e-05,
      "loss": 2.8771,
      "step": 176684
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5406692028045654,
      "learning_rate": 7.693971511172701e-05,
      "loss": 2.7705,
      "step": 176685
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.941991090774536,
      "learning_rate": 7.693697978246928e-05,
      "loss": 3.1435,
      "step": 176686
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.743314266204834,
      "learning_rate": 7.693424449468321e-05,
      "loss": 2.9499,
      "step": 176687
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6314947605133057,
      "learning_rate": 7.693150924836926e-05,
      "loss": 2.7564,
      "step": 176688
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8150219917297363,
      "learning_rate": 7.692877404352785e-05,
      "loss": 2.8335,
      "step": 176689
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.639366865158081,
      "learning_rate": 7.692603888015967e-05,
      "loss": 2.9459,
      "step": 176690
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8966166973114014,
      "learning_rate": 7.69233037582651e-05,
      "loss": 2.9943,
      "step": 176691
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5703186988830566,
      "learning_rate": 7.692056867784477e-05,
      "loss": 2.9786,
      "step": 176692
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7752625942230225,
      "learning_rate": 7.691783363889904e-05,
      "loss": 2.7873,
      "step": 176693
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4364125728607178,
      "learning_rate": 7.691509864142863e-05,
      "loss": 2.9773,
      "step": 176694
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5545108318328857,
      "learning_rate": 7.69123636854338e-05,
      "loss": 2.9602,
      "step": 176695
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9717397689819336,
      "learning_rate": 7.690962877091521e-05,
      "loss": 2.9956,
      "step": 176696
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3537702560424805,
      "learning_rate": 7.690689389787328e-05,
      "loss": 2.638,
      "step": 176697
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6876280307769775,
      "learning_rate": 7.690415906630864e-05,
      "loss": 2.8826,
      "step": 176698
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9463040828704834,
      "learning_rate": 7.690142427622165e-05,
      "loss": 3.1919,
      "step": 176699
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7773654460906982,
      "learning_rate": 7.689868952761305e-05,
      "loss": 3.0962,
      "step": 176700
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.452785015106201,
      "learning_rate": 7.689595482048303e-05,
      "loss": 2.7312,
      "step": 176701
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7764410972595215,
      "learning_rate": 7.689322015483236e-05,
      "loss": 3.0151,
      "step": 176702
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.581993579864502,
      "learning_rate": 7.689048553066136e-05,
      "loss": 3.1747,
      "step": 176703
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2185990810394287,
      "learning_rate": 7.68877509479707e-05,
      "loss": 2.5572,
      "step": 176704
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.3053038120269775,
      "learning_rate": 7.688501640676074e-05,
      "loss": 2.93,
      "step": 176705
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.729412317276001,
      "learning_rate": 7.688228190703225e-05,
      "loss": 3.0102,
      "step": 176706
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.509052038192749,
      "learning_rate": 7.687954744878538e-05,
      "loss": 2.9042,
      "step": 176707
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.695237874984741,
      "learning_rate": 7.68768130320209e-05,
      "loss": 3.0627,
      "step": 176708
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.8320066928863525,
      "learning_rate": 7.687407865673917e-05,
      "loss": 3.0117,
      "step": 176709
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7753303050994873,
      "learning_rate": 7.687134432294082e-05,
      "loss": 3.1004,
      "step": 176710
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.974064350128174,
      "learning_rate": 7.686861003062622e-05,
      "loss": 2.7133,
      "step": 176711
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4203126430511475,
      "learning_rate": 7.686587577979603e-05,
      "loss": 3.0025,
      "step": 176712
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.785400152206421,
      "learning_rate": 7.68631415704507e-05,
      "loss": 2.8592,
      "step": 176713
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.4149677753448486,
      "learning_rate": 7.686040740259071e-05,
      "loss": 3.0888,
      "step": 176714
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.0447168350219727,
      "learning_rate": 7.685767327621652e-05,
      "loss": 2.9978,
      "step": 176715
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.0201168060302734,
      "learning_rate": 7.685493919132876e-05,
      "loss": 2.9506,
      "step": 176716
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.982926845550537,
      "learning_rate": 7.685220514792781e-05,
      "loss": 2.8969,
      "step": 176717
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.864715337753296,
      "learning_rate": 7.684947114601432e-05,
      "loss": 2.5978,
      "step": 176718
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.0573809146881104,
      "learning_rate": 7.684673718558874e-05,
      "loss": 2.8728,
      "step": 176719
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.90509033203125,
      "learning_rate": 7.684400326665146e-05,
      "loss": 2.8969,
      "step": 176720
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3608360290527344,
      "learning_rate": 7.684126938920321e-05,
      "loss": 3.0244,
      "step": 176721
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.0011491775512695,
      "learning_rate": 7.683853555324437e-05,
      "loss": 3.0254,
      "step": 176722
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.458585500717163,
      "learning_rate": 7.683580175877533e-05,
      "loss": 2.9136,
      "step": 176723
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.761570453643799,
      "learning_rate": 7.683306800579688e-05,
      "loss": 3.1664,
      "step": 176724
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9151508808135986,
      "learning_rate": 7.683033429430932e-05,
      "loss": 2.9747,
      "step": 176725
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8264245986938477,
      "learning_rate": 7.682760062431316e-05,
      "loss": 2.7247,
      "step": 176726
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.384889841079712,
      "learning_rate": 7.682486699580903e-05,
      "loss": 2.992,
      "step": 176727
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3617398738861084,
      "learning_rate": 7.682213340879739e-05,
      "loss": 2.9861,
      "step": 176728
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.3954787254333496,
      "learning_rate": 7.681939986327861e-05,
      "loss": 2.9,
      "step": 176729
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.3343679904937744,
      "learning_rate": 7.681666635925341e-05,
      "loss": 3.096,
      "step": 176730
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.343803644180298,
      "learning_rate": 7.681393289672222e-05,
      "loss": 2.7122,
      "step": 176731
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.4320333003997803,
      "learning_rate": 7.681119947568543e-05,
      "loss": 2.8465,
      "step": 176732
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.349665641784668,
      "learning_rate": 7.680846609614376e-05,
      "loss": 2.8911,
      "step": 176733
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.528886795043945,
      "learning_rate": 7.680573275809753e-05,
      "loss": 3.0013,
      "step": 176734
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.982372522354126,
      "learning_rate": 7.680299946154739e-05,
      "loss": 2.9927,
      "step": 176735
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.549560785293579,
      "learning_rate": 7.680026620649378e-05,
      "loss": 2.9166,
      "step": 176736
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.642533302307129,
      "learning_rate": 7.679753299293722e-05,
      "loss": 2.9091,
      "step": 176737
    },
    {
      "epoch": 2.3,
      "grad_norm": 5.168979644775391,
      "learning_rate": 7.679479982087814e-05,
      "loss": 2.8496,
      "step": 176738
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.4488658905029297,
      "learning_rate": 7.679206669031722e-05,
      "loss": 2.9101,
      "step": 176739
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.145776271820068,
      "learning_rate": 7.678933360125475e-05,
      "loss": 3.0332,
      "step": 176740
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.362334966659546,
      "learning_rate": 7.678660055369145e-05,
      "loss": 2.9969,
      "step": 176741
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.328031539916992,
      "learning_rate": 7.678386754762772e-05,
      "loss": 2.8561,
      "step": 176742
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.79860520362854,
      "learning_rate": 7.678113458306413e-05,
      "loss": 2.8469,
      "step": 176743
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2500393390655518,
      "learning_rate": 7.677840166000101e-05,
      "loss": 3.185,
      "step": 176744
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5146892070770264,
      "learning_rate": 7.677566877843909e-05,
      "loss": 3.2346,
      "step": 176745
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.820950984954834,
      "learning_rate": 7.677293593837871e-05,
      "loss": 2.9032,
      "step": 176746
    },
    {
      "epoch": 2.3,
      "grad_norm": 5.35960578918457,
      "learning_rate": 7.677020313982053e-05,
      "loss": 2.9368,
      "step": 176747
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.150702953338623,
      "learning_rate": 7.676747038276501e-05,
      "loss": 3.1141,
      "step": 176748
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.1349680423736572,
      "learning_rate": 7.67647376672126e-05,
      "loss": 2.7921,
      "step": 176749
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4551753997802734,
      "learning_rate": 7.67620049931638e-05,
      "loss": 2.8285,
      "step": 176750
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.656818389892578,
      "learning_rate": 7.675927236061918e-05,
      "loss": 2.9806,
      "step": 176751
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.4985175132751465,
      "learning_rate": 7.675653976957917e-05,
      "loss": 2.8095,
      "step": 176752
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.124319076538086,
      "learning_rate": 7.675380722004443e-05,
      "loss": 3.0803,
      "step": 176753
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.059067249298096,
      "learning_rate": 7.675107471201535e-05,
      "loss": 3.045,
      "step": 176754
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.708680152893066,
      "learning_rate": 7.67483422454925e-05,
      "loss": 2.9897,
      "step": 176755
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.336130619049072,
      "learning_rate": 7.674560982047622e-05,
      "loss": 2.8861,
      "step": 176756
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8265750408172607,
      "learning_rate": 7.674287743696723e-05,
      "loss": 3.0551,
      "step": 176757
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.188570261001587,
      "learning_rate": 7.674014509496588e-05,
      "loss": 3.1086,
      "step": 176758
    },
    {
      "epoch": 2.3,
      "grad_norm": 5.148296356201172,
      "learning_rate": 7.673741279447285e-05,
      "loss": 2.7373,
      "step": 176759
    },
    {
      "epoch": 2.3,
      "grad_norm": 5.095952987670898,
      "learning_rate": 7.673468053548846e-05,
      "loss": 2.8889,
      "step": 176760
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.9528512954711914,
      "learning_rate": 7.673194831801345e-05,
      "loss": 2.9048,
      "step": 176761
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.37504243850708,
      "learning_rate": 7.672921614204806e-05,
      "loss": 3.0054,
      "step": 176762
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.565192937850952,
      "learning_rate": 7.672648400759295e-05,
      "loss": 2.8883,
      "step": 176763
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7962777614593506,
      "learning_rate": 7.672375191464856e-05,
      "loss": 3.0241,
      "step": 176764
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2731494903564453,
      "learning_rate": 7.672101986321551e-05,
      "loss": 2.9781,
      "step": 176765
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.953904867172241,
      "learning_rate": 7.671828785329416e-05,
      "loss": 3.0137,
      "step": 176766
    },
    {
      "epoch": 2.3,
      "grad_norm": 5.104444980621338,
      "learning_rate": 7.671555588488526e-05,
      "loss": 2.8788,
      "step": 176767
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.1258251667022705,
      "learning_rate": 7.671282395798899e-05,
      "loss": 2.9675,
      "step": 176768
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6618521213531494,
      "learning_rate": 7.671009207260608e-05,
      "loss": 3.0186,
      "step": 176769
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.478882074356079,
      "learning_rate": 7.670736022873692e-05,
      "loss": 2.8042,
      "step": 176770
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.583446502685547,
      "learning_rate": 7.670462842638217e-05,
      "loss": 2.8478,
      "step": 176771
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.38286828994751,
      "learning_rate": 7.670189666554213e-05,
      "loss": 2.6378,
      "step": 176772
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.890123128890991,
      "learning_rate": 7.669916494621762e-05,
      "loss": 3.1272,
      "step": 176773
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6310205459594727,
      "learning_rate": 7.669643326840877e-05,
      "loss": 2.8957,
      "step": 176774
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.4347124099731445,
      "learning_rate": 7.669370163211633e-05,
      "loss": 2.9261,
      "step": 176775
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.903482437133789,
      "learning_rate": 7.66909700373407e-05,
      "loss": 2.9213,
      "step": 176776
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5208818912506104,
      "learning_rate": 7.668823848408248e-05,
      "loss": 3.0368,
      "step": 176777
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6955671310424805,
      "learning_rate": 7.668550697234208e-05,
      "loss": 3.0342,
      "step": 176778
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7420449256896973,
      "learning_rate": 7.668277550212021e-05,
      "loss": 2.9252,
      "step": 176779
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.0105597972869873,
      "learning_rate": 7.668004407341705e-05,
      "loss": 3.069,
      "step": 176780
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.843444585800171,
      "learning_rate": 7.667731268623339e-05,
      "loss": 3.0662,
      "step": 176781
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5428426265716553,
      "learning_rate": 7.667458134056955e-05,
      "loss": 2.8906,
      "step": 176782
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4352729320526123,
      "learning_rate": 7.66718500364262e-05,
      "loss": 3.0804,
      "step": 176783
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6652495861053467,
      "learning_rate": 7.666911877380369e-05,
      "loss": 2.9386,
      "step": 176784
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.044447660446167,
      "learning_rate": 7.666638755270278e-05,
      "loss": 2.8235,
      "step": 176785
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.878253221511841,
      "learning_rate": 7.666365637312361e-05,
      "loss": 2.9792,
      "step": 176786
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.001647472381592,
      "learning_rate": 7.666092523506698e-05,
      "loss": 2.7382,
      "step": 176787
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7227747440338135,
      "learning_rate": 7.665819413853321e-05,
      "loss": 2.8648,
      "step": 176788
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7337989807128906,
      "learning_rate": 7.6655463083523e-05,
      "loss": 3.0301,
      "step": 176789
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7824037075042725,
      "learning_rate": 7.665273207003665e-05,
      "loss": 2.8387,
      "step": 176790
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6255550384521484,
      "learning_rate": 7.665000109807494e-05,
      "loss": 3.0639,
      "step": 176791
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7578365802764893,
      "learning_rate": 7.664727016763806e-05,
      "loss": 2.8584,
      "step": 176792
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7120938301086426,
      "learning_rate": 7.664453927872674e-05,
      "loss": 2.9481,
      "step": 176793
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.653580665588379,
      "learning_rate": 7.664180843134136e-05,
      "loss": 3.1927,
      "step": 176794
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.214451313018799,
      "learning_rate": 7.663907762548255e-05,
      "loss": 2.6206,
      "step": 176795
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3797707557678223,
      "learning_rate": 7.663634686115066e-05,
      "loss": 2.8136,
      "step": 176796
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.904121160507202,
      "learning_rate": 7.663361613834638e-05,
      "loss": 2.8723,
      "step": 176797
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9684834480285645,
      "learning_rate": 7.663088545707011e-05,
      "loss": 3.0256,
      "step": 176798
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6102981567382812,
      "learning_rate": 7.66281548173224e-05,
      "loss": 2.9274,
      "step": 176799
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.667546510696411,
      "learning_rate": 7.662542421910363e-05,
      "loss": 2.8591,
      "step": 176800
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9378981590270996,
      "learning_rate": 7.662269366241451e-05,
      "loss": 2.7091,
      "step": 176801
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.136906147003174,
      "learning_rate": 7.661996314725533e-05,
      "loss": 3.0993,
      "step": 176802
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6057331562042236,
      "learning_rate": 7.661723267362683e-05,
      "loss": 2.9467,
      "step": 176803
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.484442710876465,
      "learning_rate": 7.661450224152941e-05,
      "loss": 3.0116,
      "step": 176804
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.730001926422119,
      "learning_rate": 7.661177185096357e-05,
      "loss": 3.3444,
      "step": 176805
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8179149627685547,
      "learning_rate": 7.66090415019297e-05,
      "loss": 2.6681,
      "step": 176806
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.4052786827087402,
      "learning_rate": 7.660631119442855e-05,
      "loss": 3.0295,
      "step": 176807
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.3633792400360107,
      "learning_rate": 7.660358092846038e-05,
      "loss": 3.002,
      "step": 176808
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.401090621948242,
      "learning_rate": 7.660085070402595e-05,
      "loss": 2.789,
      "step": 176809
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5856847763061523,
      "learning_rate": 7.659812052112562e-05,
      "loss": 2.8508,
      "step": 176810
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7083024978637695,
      "learning_rate": 7.65953903797598e-05,
      "loss": 2.9364,
      "step": 176811
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.2182676792144775,
      "learning_rate": 7.659266027992923e-05,
      "loss": 2.886,
      "step": 176812
    },
    {
      "epoch": 2.3,
      "grad_norm": 6.0243096351623535,
      "learning_rate": 7.65899302216343e-05,
      "loss": 2.8734,
      "step": 176813
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5829384326934814,
      "learning_rate": 7.658720020487544e-05,
      "loss": 2.9447,
      "step": 176814
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.0493526458740234,
      "learning_rate": 7.658447022965332e-05,
      "loss": 2.9051,
      "step": 176815
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.684119462966919,
      "learning_rate": 7.658174029596836e-05,
      "loss": 3.1468,
      "step": 176816
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.5782580375671387,
      "learning_rate": 7.657901040382098e-05,
      "loss": 2.8125,
      "step": 176817
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.529416561126709,
      "learning_rate": 7.65762805532119e-05,
      "loss": 2.8929,
      "step": 176818
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.8376030921936035,
      "learning_rate": 7.657355074414136e-05,
      "loss": 2.9316,
      "step": 176819
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.065523624420166,
      "learning_rate": 7.657082097661015e-05,
      "loss": 3.2344,
      "step": 176820
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3103973865509033,
      "learning_rate": 7.65680912506186e-05,
      "loss": 2.7093,
      "step": 176821
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.878526449203491,
      "learning_rate": 7.656536156616729e-05,
      "loss": 3.034,
      "step": 176822
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.962312698364258,
      "learning_rate": 7.656263192325662e-05,
      "loss": 2.9705,
      "step": 176823
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.41860818862915,
      "learning_rate": 7.655990232188722e-05,
      "loss": 2.9213,
      "step": 176824
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5492513179779053,
      "learning_rate": 7.655717276205949e-05,
      "loss": 2.8915,
      "step": 176825
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.394237518310547,
      "learning_rate": 7.65544432437741e-05,
      "loss": 2.8047,
      "step": 176826
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4980149269104004,
      "learning_rate": 7.655171376703145e-05,
      "loss": 2.7827,
      "step": 176827
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.2840027809143066,
      "learning_rate": 7.654898433183205e-05,
      "loss": 3.0033,
      "step": 176828
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.0316362380981445,
      "learning_rate": 7.65462549381763e-05,
      "loss": 3.0237,
      "step": 176829
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5185017585754395,
      "learning_rate": 7.654352558606495e-05,
      "loss": 3.1298,
      "step": 176830
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5204198360443115,
      "learning_rate": 7.654079627549827e-05,
      "loss": 2.7721,
      "step": 176831
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8135502338409424,
      "learning_rate": 7.653806700647695e-05,
      "loss": 3.1025,
      "step": 176832
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.690936803817749,
      "learning_rate": 7.653533777900133e-05,
      "loss": 2.9207,
      "step": 176833
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8127527236938477,
      "learning_rate": 7.653260859307221e-05,
      "loss": 3.0032,
      "step": 176834
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5827455520629883,
      "learning_rate": 7.652987944868972e-05,
      "loss": 2.9755,
      "step": 176835
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7036855220794678,
      "learning_rate": 7.65271503458546e-05,
      "loss": 2.9958,
      "step": 176836
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.4510226249694824,
      "learning_rate": 7.652442128456725e-05,
      "loss": 3.1,
      "step": 176837
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7856125831604004,
      "learning_rate": 7.652169226482829e-05,
      "loss": 2.9899,
      "step": 176838
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.926215648651123,
      "learning_rate": 7.65189632866381e-05,
      "loss": 2.9987,
      "step": 176839
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.623422145843506,
      "learning_rate": 7.651623434999741e-05,
      "loss": 3.0306,
      "step": 176840
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.5738487243652344,
      "learning_rate": 7.651350545490641e-05,
      "loss": 2.7666,
      "step": 176841
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6333348751068115,
      "learning_rate": 7.651077660136584e-05,
      "loss": 2.9374,
      "step": 176842
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.542100429534912,
      "learning_rate": 7.650804778937603e-05,
      "loss": 2.8697,
      "step": 176843
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.586575508117676,
      "learning_rate": 7.650531901893773e-05,
      "loss": 2.8095,
      "step": 176844
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.008885383605957,
      "learning_rate": 7.650259029005118e-05,
      "loss": 2.8196,
      "step": 176845
    },
    {
      "epoch": 2.3,
      "grad_norm": 5.030083179473877,
      "learning_rate": 7.649986160271723e-05,
      "loss": 2.6128,
      "step": 176846
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.377136707305908,
      "learning_rate": 7.649713295693597e-05,
      "loss": 2.7828,
      "step": 176847
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8512535095214844,
      "learning_rate": 7.649440435270821e-05,
      "loss": 2.9645,
      "step": 176848
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.473695993423462,
      "learning_rate": 7.649167579003427e-05,
      "loss": 2.8653,
      "step": 176849
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.553187370300293,
      "learning_rate": 7.648894726891486e-05,
      "loss": 2.8738,
      "step": 176850
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4399936199188232,
      "learning_rate": 7.648621878935024e-05,
      "loss": 2.9457,
      "step": 176851
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.912766218185425,
      "learning_rate": 7.648349035134124e-05,
      "loss": 2.8748,
      "step": 176852
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.5672054290771484,
      "learning_rate": 7.648076195488802e-05,
      "loss": 2.8517,
      "step": 176853
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4854748249053955,
      "learning_rate": 7.647803359999134e-05,
      "loss": 2.9476,
      "step": 176854
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8620872497558594,
      "learning_rate": 7.647530528665148e-05,
      "loss": 3.096,
      "step": 176855
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9348666667938232,
      "learning_rate": 7.647257701486919e-05,
      "loss": 3.0057,
      "step": 176856
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.806612730026245,
      "learning_rate": 7.64698487846448e-05,
      "loss": 3.0069,
      "step": 176857
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.3401639461517334,
      "learning_rate": 7.646712059597904e-05,
      "loss": 2.7024,
      "step": 176858
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6217143535614014,
      "learning_rate": 7.64643924488721e-05,
      "loss": 2.8936,
      "step": 176859
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.397686719894409,
      "learning_rate": 7.646166434332472e-05,
      "loss": 3.0505,
      "step": 176860
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4721455574035645,
      "learning_rate": 7.645893627933728e-05,
      "loss": 2.9277,
      "step": 176861
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5051889419555664,
      "learning_rate": 7.645620825691039e-05,
      "loss": 3.1329,
      "step": 176862
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.371961832046509,
      "learning_rate": 7.645348027604446e-05,
      "loss": 2.7811,
      "step": 176863
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.733325958251953,
      "learning_rate": 7.64507523367402e-05,
      "loss": 2.8267,
      "step": 176864
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.0524818897247314,
      "learning_rate": 7.644802443899778e-05,
      "loss": 2.9246,
      "step": 176865
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.568906307220459,
      "learning_rate": 7.6445296582818e-05,
      "loss": 2.8855,
      "step": 176866
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.1202125549316406,
      "learning_rate": 7.644256876820118e-05,
      "loss": 2.8292,
      "step": 176867
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8701958656311035,
      "learning_rate": 7.643984099514798e-05,
      "loss": 3.0445,
      "step": 176868
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4645490646362305,
      "learning_rate": 7.643711326365873e-05,
      "loss": 2.8401,
      "step": 176869
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.0670113563537598,
      "learning_rate": 7.643438557373423e-05,
      "loss": 2.7715,
      "step": 176870
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9552969932556152,
      "learning_rate": 7.643165792537466e-05,
      "loss": 2.8144,
      "step": 176871
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.295809745788574,
      "learning_rate": 7.64289303185807e-05,
      "loss": 2.6866,
      "step": 176872
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.347576379776001,
      "learning_rate": 7.642620275335275e-05,
      "loss": 3.0248,
      "step": 176873
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.605473279953003,
      "learning_rate": 7.642347522969149e-05,
      "loss": 2.9509,
      "step": 176874
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.140037775039673,
      "learning_rate": 7.642074774759722e-05,
      "loss": 3.0822,
      "step": 176875
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7265594005584717,
      "learning_rate": 7.641802030707075e-05,
      "loss": 3.0028,
      "step": 176876
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.784888744354248,
      "learning_rate": 7.641529290811217e-05,
      "loss": 2.9735,
      "step": 176877
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.5176773071289062,
      "learning_rate": 7.641256555072234e-05,
      "loss": 2.9913,
      "step": 176878
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.173053741455078,
      "learning_rate": 7.640983823490153e-05,
      "loss": 2.9037,
      "step": 176879
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6201913356781006,
      "learning_rate": 7.640711096065045e-05,
      "loss": 2.9716,
      "step": 176880
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.761215925216675,
      "learning_rate": 7.64043837279694e-05,
      "loss": 2.8997,
      "step": 176881
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.457578659057617,
      "learning_rate": 7.640165653685909e-05,
      "loss": 2.8957,
      "step": 176882
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.814126491546631,
      "learning_rate": 7.639892938731996e-05,
      "loss": 2.9897,
      "step": 176883
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.846405267715454,
      "learning_rate": 7.639620227935248e-05,
      "loss": 3.0776,
      "step": 176884
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.948526382446289,
      "learning_rate": 7.639347521295705e-05,
      "loss": 3.0821,
      "step": 176885
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6053836345672607,
      "learning_rate": 7.639074818813442e-05,
      "loss": 3.1923,
      "step": 176886
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.561617374420166,
      "learning_rate": 7.638802120488484e-05,
      "loss": 2.9651,
      "step": 176887
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.237604856491089,
      "learning_rate": 7.638529426320908e-05,
      "loss": 3.013,
      "step": 176888
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6087634563446045,
      "learning_rate": 7.638256736310748e-05,
      "loss": 2.8302,
      "step": 176889
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5415868759155273,
      "learning_rate": 7.63798405045806e-05,
      "loss": 2.9281,
      "step": 176890
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8055500984191895,
      "learning_rate": 7.637711368762883e-05,
      "loss": 3.0738,
      "step": 176891
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.534212112426758,
      "learning_rate": 7.637438691225287e-05,
      "loss": 2.6385,
      "step": 176892
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6845993995666504,
      "learning_rate": 7.637166017845305e-05,
      "loss": 2.8821,
      "step": 176893
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.771449565887451,
      "learning_rate": 7.636893348623004e-05,
      "loss": 3.1825,
      "step": 176894
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7261641025543213,
      "learning_rate": 7.636620683558427e-05,
      "loss": 2.99,
      "step": 176895
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9439375400543213,
      "learning_rate": 7.636348022651615e-05,
      "loss": 3.1039,
      "step": 176896
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.468573808670044,
      "learning_rate": 7.63607536590264e-05,
      "loss": 2.7898,
      "step": 176897
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6662886142730713,
      "learning_rate": 7.635802713311535e-05,
      "loss": 2.9583,
      "step": 176898
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.1153066158294678,
      "learning_rate": 7.635530064878351e-05,
      "loss": 3.2104,
      "step": 176899
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.1554391384124756,
      "learning_rate": 7.635257420603155e-05,
      "loss": 2.9319,
      "step": 176900
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9625728130340576,
      "learning_rate": 7.634984780485981e-05,
      "loss": 2.7777,
      "step": 176901
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3705921173095703,
      "learning_rate": 7.634712144526883e-05,
      "loss": 2.9334,
      "step": 176902
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8233563899993896,
      "learning_rate": 7.634439512725919e-05,
      "loss": 2.7457,
      "step": 176903
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4715211391448975,
      "learning_rate": 7.634166885083138e-05,
      "loss": 2.9339,
      "step": 176904
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7776455879211426,
      "learning_rate": 7.633894261598577e-05,
      "loss": 2.7553,
      "step": 176905
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7913026809692383,
      "learning_rate": 7.633621642272303e-05,
      "loss": 2.9183,
      "step": 176906
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.577272891998291,
      "learning_rate": 7.633349027104367e-05,
      "loss": 2.8499,
      "step": 176907
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.382894992828369,
      "learning_rate": 7.6330764160948e-05,
      "loss": 3.0077,
      "step": 176908
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2480998039245605,
      "learning_rate": 7.632803809243678e-05,
      "loss": 2.8135,
      "step": 176909
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9173669815063477,
      "learning_rate": 7.632531206551029e-05,
      "loss": 2.9771,
      "step": 176910
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6668217182159424,
      "learning_rate": 7.632258608016922e-05,
      "loss": 2.9339,
      "step": 176911
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.335648536682129,
      "learning_rate": 7.631986013641404e-05,
      "loss": 2.8683,
      "step": 176912
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.337557077407837,
      "learning_rate": 7.631713423424521e-05,
      "loss": 2.9917,
      "step": 176913
    },
    {
      "epoch": 2.3,
      "grad_norm": 5.22576379776001,
      "learning_rate": 7.631440837366316e-05,
      "loss": 2.8805,
      "step": 176914
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2798242568969727,
      "learning_rate": 7.631168255466857e-05,
      "loss": 3.0877,
      "step": 176915
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.063313007354736,
      "learning_rate": 7.630895677726175e-05,
      "loss": 2.826,
      "step": 176916
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.218754768371582,
      "learning_rate": 7.630623104144343e-05,
      "loss": 3.0629,
      "step": 176917
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6490318775177,
      "learning_rate": 7.630350534721391e-05,
      "loss": 3.0641,
      "step": 176918
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5974643230438232,
      "learning_rate": 7.630077969457399e-05,
      "loss": 2.895,
      "step": 176919
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4889461994171143,
      "learning_rate": 7.629805408352376e-05,
      "loss": 2.9111,
      "step": 176920
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.244943618774414,
      "learning_rate": 7.629532851406407e-05,
      "loss": 2.902,
      "step": 176921
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9940967559814453,
      "learning_rate": 7.62926029861952e-05,
      "loss": 2.9299,
      "step": 176922
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6778011322021484,
      "learning_rate": 7.628987749991784e-05,
      "loss": 2.8117,
      "step": 176923
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7477807998657227,
      "learning_rate": 7.628715205523233e-05,
      "loss": 3.0098,
      "step": 176924
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.30715274810791,
      "learning_rate": 7.628442665213943e-05,
      "loss": 2.9797,
      "step": 176925
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6777729988098145,
      "learning_rate": 7.628170129063931e-05,
      "loss": 3.0897,
      "step": 176926
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.52944278717041,
      "learning_rate": 7.627897597073274e-05,
      "loss": 2.8944,
      "step": 176927
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.308601140975952,
      "learning_rate": 7.627625069242004e-05,
      "loss": 3.0297,
      "step": 176928
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5014231204986572,
      "learning_rate": 7.627352545570189e-05,
      "loss": 2.9454,
      "step": 176929
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.539914131164551,
      "learning_rate": 7.627080026057863e-05,
      "loss": 2.9757,
      "step": 176930
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.74883770942688,
      "learning_rate": 7.626807510705101e-05,
      "loss": 3.1812,
      "step": 176931
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.627690315246582,
      "learning_rate": 7.626534999511925e-05,
      "loss": 2.8794,
      "step": 176932
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.495974540710449,
      "learning_rate": 7.626262492478402e-05,
      "loss": 2.9923,
      "step": 176933
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7791123390197754,
      "learning_rate": 7.625989989604573e-05,
      "loss": 3.0746,
      "step": 176934
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.3042445182800293,
      "learning_rate": 7.625717490890503e-05,
      "loss": 3.2401,
      "step": 176935
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7868099212646484,
      "learning_rate": 7.625444996336226e-05,
      "loss": 2.8392,
      "step": 176936
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4190635681152344,
      "learning_rate": 7.625172505941817e-05,
      "loss": 2.8254,
      "step": 176937
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8595476150512695,
      "learning_rate": 7.624900019707294e-05,
      "loss": 2.792,
      "step": 176938
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3269591331481934,
      "learning_rate": 7.624627537632733e-05,
      "loss": 2.8922,
      "step": 176939
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.043842315673828,
      "learning_rate": 7.62435505971817e-05,
      "loss": 2.972,
      "step": 176940
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8476126194000244,
      "learning_rate": 7.624082585963666e-05,
      "loss": 2.9704,
      "step": 176941
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.0757675170898438,
      "learning_rate": 7.623810116369263e-05,
      "loss": 2.9626,
      "step": 176942
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9940526485443115,
      "learning_rate": 7.623537650935033e-05,
      "loss": 2.9852,
      "step": 176943
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.918877363204956,
      "learning_rate": 7.623265189660989e-05,
      "loss": 3.0185,
      "step": 176944
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6954877376556396,
      "learning_rate": 7.622992732547217e-05,
      "loss": 2.9514,
      "step": 176945
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7479069232940674,
      "learning_rate": 7.622720279593738e-05,
      "loss": 3.2105,
      "step": 176946
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.7145438194274902,
      "learning_rate": 7.622447830800629e-05,
      "loss": 2.98,
      "step": 176947
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3368747234344482,
      "learning_rate": 7.622175386167922e-05,
      "loss": 3.0005,
      "step": 176948
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4139857292175293,
      "learning_rate": 7.621902945695689e-05,
      "loss": 2.7981,
      "step": 176949
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3718597888946533,
      "learning_rate": 7.621630509383952e-05,
      "loss": 3.3344,
      "step": 176950
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7776808738708496,
      "learning_rate": 7.621358077232784e-05,
      "loss": 2.735,
      "step": 176951
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9832870960235596,
      "learning_rate": 7.621085649242221e-05,
      "loss": 2.9327,
      "step": 176952
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.5640385150909424,
      "learning_rate": 7.620813225412326e-05,
      "loss": 2.7898,
      "step": 176953
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7187066078186035,
      "learning_rate": 7.620540805743141e-05,
      "loss": 2.7854,
      "step": 176954
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.665034294128418,
      "learning_rate": 7.620268390234732e-05,
      "loss": 2.993,
      "step": 176955
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9666199684143066,
      "learning_rate": 7.619995978887123e-05,
      "loss": 2.9279,
      "step": 176956
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.0029232501983643,
      "learning_rate": 7.619723571700386e-05,
      "loss": 3.016,
      "step": 176957
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.72005295753479,
      "learning_rate": 7.619451168674557e-05,
      "loss": 2.6865,
      "step": 176958
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5130362510681152,
      "learning_rate": 7.619178769809702e-05,
      "loss": 2.7906,
      "step": 176959
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3382580280303955,
      "learning_rate": 7.618906375105859e-05,
      "loss": 2.983,
      "step": 176960
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5378944873809814,
      "learning_rate": 7.61863398456309e-05,
      "loss": 3.032,
      "step": 176961
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.765831232070923,
      "learning_rate": 7.618361598181437e-05,
      "loss": 2.9978,
      "step": 176962
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.430708885192871,
      "learning_rate": 7.618089215960958e-05,
      "loss": 2.8033,
      "step": 176963
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7282752990722656,
      "learning_rate": 7.617816837901688e-05,
      "loss": 2.8742,
      "step": 176964
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.595553874969482,
      "learning_rate": 7.617544464003696e-05,
      "loss": 2.9646,
      "step": 176965
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5527031421661377,
      "learning_rate": 7.617272094267017e-05,
      "loss": 2.8953,
      "step": 176966
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.728489875793457,
      "learning_rate": 7.616999728691717e-05,
      "loss": 2.9695,
      "step": 176967
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5758466720581055,
      "learning_rate": 7.616727367277841e-05,
      "loss": 2.8085,
      "step": 176968
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.5485870838165283,
      "learning_rate": 7.616455010025437e-05,
      "loss": 3.1624,
      "step": 176969
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.709625482559204,
      "learning_rate": 7.616182656934547e-05,
      "loss": 3.0135,
      "step": 176970
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.959977865219116,
      "learning_rate": 7.615910308005242e-05,
      "loss": 2.8802,
      "step": 176971
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.627847194671631,
      "learning_rate": 7.615637963237554e-05,
      "loss": 2.9408,
      "step": 176972
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.859523773193359,
      "learning_rate": 7.615365622631548e-05,
      "loss": 2.8117,
      "step": 176973
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.3248345851898193,
      "learning_rate": 7.615093286187268e-05,
      "loss": 2.931,
      "step": 176974
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.6148531436920166,
      "learning_rate": 7.614820953904767e-05,
      "loss": 3.0589,
      "step": 176975
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5532889366149902,
      "learning_rate": 7.614548625784082e-05,
      "loss": 2.8624,
      "step": 176976
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7545106410980225,
      "learning_rate": 7.614276301825283e-05,
      "loss": 3.0794,
      "step": 176977
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.033198833465576,
      "learning_rate": 7.614003982028407e-05,
      "loss": 2.9613,
      "step": 176978
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.835296392440796,
      "learning_rate": 7.613731666393515e-05,
      "loss": 2.7596,
      "step": 176979
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.977137804031372,
      "learning_rate": 7.613459354920658e-05,
      "loss": 3.0619,
      "step": 176980
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.668590545654297,
      "learning_rate": 7.613187047609868e-05,
      "loss": 2.854,
      "step": 176981
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.389488458633423,
      "learning_rate": 7.612914744461221e-05,
      "loss": 2.9157,
      "step": 176982
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.761596918106079,
      "learning_rate": 7.612642445474752e-05,
      "loss": 2.6745,
      "step": 176983
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6474499702453613,
      "learning_rate": 7.612370150650511e-05,
      "loss": 2.9097,
      "step": 176984
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.5104146003723145,
      "learning_rate": 7.61209785998856e-05,
      "loss": 2.9897,
      "step": 176985
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.452496290206909,
      "learning_rate": 7.611825573488942e-05,
      "loss": 2.8856,
      "step": 176986
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.768965721130371,
      "learning_rate": 7.611553291151702e-05,
      "loss": 2.8638,
      "step": 176987
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6710877418518066,
      "learning_rate": 7.611281012976902e-05,
      "loss": 2.954,
      "step": 176988
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6901957988739014,
      "learning_rate": 7.61100873896459e-05,
      "loss": 2.7003,
      "step": 176989
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7274279594421387,
      "learning_rate": 7.610736469114803e-05,
      "loss": 2.9561,
      "step": 176990
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.395443916320801,
      "learning_rate": 7.610464203427614e-05,
      "loss": 2.5052,
      "step": 176991
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.813347816467285,
      "learning_rate": 7.610191941903062e-05,
      "loss": 3.2309,
      "step": 176992
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8825178146362305,
      "learning_rate": 7.609919684541187e-05,
      "loss": 3.063,
      "step": 176993
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6394386291503906,
      "learning_rate": 7.609647431342062e-05,
      "loss": 3.026,
      "step": 176994
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.3327956199645996,
      "learning_rate": 7.609375182305713e-05,
      "loss": 2.9879,
      "step": 176995
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.04888916015625,
      "learning_rate": 7.609102937432219e-05,
      "loss": 2.8045,
      "step": 176996
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.7447073459625244,
      "learning_rate": 7.608830696721611e-05,
      "loss": 2.7478,
      "step": 176997
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.2541346549987793,
      "learning_rate": 7.608558460173945e-05,
      "loss": 2.8747,
      "step": 176998
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.711723566055298,
      "learning_rate": 7.608286227789261e-05,
      "loss": 2.724,
      "step": 176999
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.5076563358306885,
      "learning_rate": 7.60801399956763e-05,
      "loss": 2.9454,
      "step": 177000
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.4160783290863037,
      "learning_rate": 7.607741775509084e-05,
      "loss": 3.0889,
      "step": 177001
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.301190137863159,
      "learning_rate": 7.607469555613689e-05,
      "loss": 3.1145,
      "step": 177002
    },
    {
      "epoch": 2.3,
      "grad_norm": 4.291290283203125,
      "learning_rate": 7.607197339881477e-05,
      "loss": 2.7395,
      "step": 177003
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.453038454055786,
      "learning_rate": 7.606925128312531e-05,
      "loss": 2.9114,
      "step": 177004
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.397582530975342,
      "learning_rate": 7.606652920906859e-05,
      "loss": 2.6905,
      "step": 177005
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.961827516555786,
      "learning_rate": 7.606380717664545e-05,
      "loss": 2.9362,
      "step": 177006
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.6876578330993652,
      "learning_rate": 7.606108518585615e-05,
      "loss": 2.9434,
      "step": 177007
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.7063772678375244,
      "learning_rate": 7.605836323670144e-05,
      "loss": 3.0002,
      "step": 177008
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.106966495513916,
      "learning_rate": 7.60556413291816e-05,
      "loss": 2.9418,
      "step": 177009
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.338078022003174,
      "learning_rate": 7.605291946329746e-05,
      "loss": 2.9705,
      "step": 177010
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.7755510807037354,
      "learning_rate": 7.605019763904911e-05,
      "loss": 2.8189,
      "step": 177011
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.6558406352996826,
      "learning_rate": 7.604747585643732e-05,
      "loss": 3.059,
      "step": 177012
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.8919551372528076,
      "learning_rate": 7.604475411546248e-05,
      "loss": 2.8951,
      "step": 177013
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2062084674835205,
      "learning_rate": 7.60420324161252e-05,
      "loss": 2.7406,
      "step": 177014
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.6773509979248047,
      "learning_rate": 7.603931075842586e-05,
      "loss": 3.1835,
      "step": 177015
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.288783550262451,
      "learning_rate": 7.603658914236524e-05,
      "loss": 3.2728,
      "step": 177016
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.759866237640381,
      "learning_rate": 7.603386756794345e-05,
      "loss": 2.824,
      "step": 177017
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.7740113735198975,
      "learning_rate": 7.603114603516129e-05,
      "loss": 2.9847,
      "step": 177018
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.0152223110198975,
      "learning_rate": 7.602842454401904e-05,
      "loss": 2.8951,
      "step": 177019
    },
    {
      "epoch": 2.3,
      "grad_norm": 5.1141486167907715,
      "learning_rate": 7.602570309451745e-05,
      "loss": 3.05,
      "step": 177020
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.717078447341919,
      "learning_rate": 7.602298168665682e-05,
      "loss": 2.9743,
      "step": 177021
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.2321999073028564,
      "learning_rate": 7.602026032043792e-05,
      "loss": 2.799,
      "step": 177022
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.1157214641571045,
      "learning_rate": 7.601753899586094e-05,
      "loss": 2.904,
      "step": 177023
    },
    {
      "epoch": 2.3,
      "grad_norm": 3.1921422481536865,
      "learning_rate": 7.601481771292658e-05,
      "loss": 3.0951,
      "step": 177024
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.9536876678466797,
      "learning_rate": 7.601209647163521e-05,
      "loss": 2.9999,
      "step": 177025
    },
    {
      "epoch": 2.3,
      "grad_norm": 2.591097831726074,
      "learning_rate": 7.60093752719875e-05,
      "loss": 2.9611,
      "step": 177026
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.602149248123169,
      "learning_rate": 7.600665411398381e-05,
      "loss": 3.0494,
      "step": 177027
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.305130958557129,
      "learning_rate": 7.600393299762477e-05,
      "loss": 2.9242,
      "step": 177028
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.3046133518218994,
      "learning_rate": 7.600121192291084e-05,
      "loss": 3.0102,
      "step": 177029
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.4149510860443115,
      "learning_rate": 7.599849088984251e-05,
      "loss": 3.054,
      "step": 177030
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4811925888061523,
      "learning_rate": 7.59957698984202e-05,
      "loss": 3.0429,
      "step": 177031
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.133134841918945,
      "learning_rate": 7.59930489486446e-05,
      "loss": 3.0653,
      "step": 177032
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.1752548217773438,
      "learning_rate": 7.599032804051604e-05,
      "loss": 2.9901,
      "step": 177033
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6853978633880615,
      "learning_rate": 7.598760717403519e-05,
      "loss": 3.1035,
      "step": 177034
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5243780612945557,
      "learning_rate": 7.598488634920244e-05,
      "loss": 3.0534,
      "step": 177035
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.429274559020996,
      "learning_rate": 7.598216556601837e-05,
      "loss": 2.6862,
      "step": 177036
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5131046772003174,
      "learning_rate": 7.597944482448336e-05,
      "loss": 3.0386,
      "step": 177037
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6811068058013916,
      "learning_rate": 7.597672412459804e-05,
      "loss": 3.1044,
      "step": 177038
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.356900691986084,
      "learning_rate": 7.597400346636285e-05,
      "loss": 3.0005,
      "step": 177039
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.2221076488494873,
      "learning_rate": 7.597128284977836e-05,
      "loss": 2.8467,
      "step": 177040
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6852054595947266,
      "learning_rate": 7.596856227484505e-05,
      "loss": 2.8897,
      "step": 177041
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5373404026031494,
      "learning_rate": 7.596584174156344e-05,
      "loss": 2.7807,
      "step": 177042
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9387338161468506,
      "learning_rate": 7.596312124993387e-05,
      "loss": 3.1607,
      "step": 177043
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7441649436950684,
      "learning_rate": 7.59604007999571e-05,
      "loss": 3.0661,
      "step": 177044
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8819355964660645,
      "learning_rate": 7.595768039163343e-05,
      "loss": 2.689,
      "step": 177045
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.170650005340576,
      "learning_rate": 7.595496002496358e-05,
      "loss": 3.083,
      "step": 177046
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9397530555725098,
      "learning_rate": 7.595223969994789e-05,
      "loss": 2.8086,
      "step": 177047
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.4505069255828857,
      "learning_rate": 7.594951941658692e-05,
      "loss": 2.8298,
      "step": 177048
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.0363667011260986,
      "learning_rate": 7.594679917488106e-05,
      "loss": 3.1461,
      "step": 177049
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5910279750823975,
      "learning_rate": 7.594407897483102e-05,
      "loss": 3.1515,
      "step": 177050
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.2061238288879395,
      "learning_rate": 7.594135881643713e-05,
      "loss": 2.9094,
      "step": 177051
    },
    {
      "epoch": 2.31,
      "grad_norm": 5.185963153839111,
      "learning_rate": 7.593863869970004e-05,
      "loss": 3.0655,
      "step": 177052
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9384608268737793,
      "learning_rate": 7.593591862462019e-05,
      "loss": 3.0508,
      "step": 177053
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.2358689308166504,
      "learning_rate": 7.59331985911981e-05,
      "loss": 2.8387,
      "step": 177054
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5467777252197266,
      "learning_rate": 7.593047859943415e-05,
      "loss": 3.0215,
      "step": 177055
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.3068923950195312,
      "learning_rate": 7.592775864932906e-05,
      "loss": 3.0268,
      "step": 177056
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3005456924438477,
      "learning_rate": 7.592503874088313e-05,
      "loss": 2.9874,
      "step": 177057
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.248375177383423,
      "learning_rate": 7.592231887409703e-05,
      "loss": 2.9125,
      "step": 177058
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.661041259765625,
      "learning_rate": 7.591959904897125e-05,
      "loss": 3.0981,
      "step": 177059
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.423722505569458,
      "learning_rate": 7.59168792655062e-05,
      "loss": 3.0512,
      "step": 177060
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.916058301925659,
      "learning_rate": 7.591415952370237e-05,
      "loss": 2.9159,
      "step": 177061
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3942439556121826,
      "learning_rate": 7.591143982356041e-05,
      "loss": 2.6513,
      "step": 177062
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.456223964691162,
      "learning_rate": 7.590872016508065e-05,
      "loss": 2.8253,
      "step": 177063
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.030351400375366,
      "learning_rate": 7.590600054826377e-05,
      "loss": 2.8506,
      "step": 177064
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.916933298110962,
      "learning_rate": 7.590328097311022e-05,
      "loss": 2.7899,
      "step": 177065
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5956475734710693,
      "learning_rate": 7.590056143962038e-05,
      "loss": 2.9752,
      "step": 177066
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6508750915527344,
      "learning_rate": 7.589784194779493e-05,
      "loss": 2.8061,
      "step": 177067
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7648773193359375,
      "learning_rate": 7.589512249763433e-05,
      "loss": 2.8782,
      "step": 177068
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4654862880706787,
      "learning_rate": 7.589240308913894e-05,
      "loss": 2.8497,
      "step": 177069
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.794847011566162,
      "learning_rate": 7.588968372230949e-05,
      "loss": 2.9462,
      "step": 177070
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.430021286010742,
      "learning_rate": 7.588696439714638e-05,
      "loss": 3.2361,
      "step": 177071
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.509639263153076,
      "learning_rate": 7.588424511365e-05,
      "loss": 3.0084,
      "step": 177072
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5315003395080566,
      "learning_rate": 7.588152587182109e-05,
      "loss": 2.7526,
      "step": 177073
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.477585554122925,
      "learning_rate": 7.587880667166002e-05,
      "loss": 2.9553,
      "step": 177074
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.792849540710449,
      "learning_rate": 7.587608751316722e-05,
      "loss": 2.9425,
      "step": 177075
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.88523006439209,
      "learning_rate": 7.587336839634335e-05,
      "loss": 2.8269,
      "step": 177076
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5827784538269043,
      "learning_rate": 7.587064932118888e-05,
      "loss": 2.8693,
      "step": 177077
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.895367383956909,
      "learning_rate": 7.586793028770422e-05,
      "loss": 2.9934,
      "step": 177078
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.3279759883880615,
      "learning_rate": 7.586521129588998e-05,
      "loss": 2.8191,
      "step": 177079
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8949599266052246,
      "learning_rate": 7.586249234574658e-05,
      "loss": 3.2061,
      "step": 177080
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7028884887695312,
      "learning_rate": 7.585977343727465e-05,
      "loss": 2.6965,
      "step": 177081
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.3882100582122803,
      "learning_rate": 7.585705457047462e-05,
      "loss": 3.0857,
      "step": 177082
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.558642864227295,
      "learning_rate": 7.585433574534698e-05,
      "loss": 2.913,
      "step": 177083
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3537304401397705,
      "learning_rate": 7.585161696189218e-05,
      "loss": 2.7291,
      "step": 177084
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.556979179382324,
      "learning_rate": 7.584889822011088e-05,
      "loss": 2.8742,
      "step": 177085
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.637803554534912,
      "learning_rate": 7.58461795200034e-05,
      "loss": 2.8174,
      "step": 177086
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.16694712638855,
      "learning_rate": 7.584346086157046e-05,
      "loss": 2.9408,
      "step": 177087
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4091076850891113,
      "learning_rate": 7.584074224481245e-05,
      "loss": 2.7893,
      "step": 177088
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4222896099090576,
      "learning_rate": 7.583802366972986e-05,
      "loss": 3.0626,
      "step": 177089
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.653043746948242,
      "learning_rate": 7.58353051363231e-05,
      "loss": 3.0546,
      "step": 177090
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.496798038482666,
      "learning_rate": 7.583258664459292e-05,
      "loss": 2.9217,
      "step": 177091
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6331472396850586,
      "learning_rate": 7.582986819453958e-05,
      "loss": 3.0857,
      "step": 177092
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5982775688171387,
      "learning_rate": 7.582714978616382e-05,
      "loss": 2.9126,
      "step": 177093
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8429980278015137,
      "learning_rate": 7.582443141946592e-05,
      "loss": 2.9745,
      "step": 177094
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.629375457763672,
      "learning_rate": 7.582171309444657e-05,
      "loss": 2.621,
      "step": 177095
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.522146463394165,
      "learning_rate": 7.581899481110618e-05,
      "loss": 2.9131,
      "step": 177096
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.5279364585876465,
      "learning_rate": 7.581627656944529e-05,
      "loss": 2.9344,
      "step": 177097
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.700791597366333,
      "learning_rate": 7.581355836946429e-05,
      "loss": 2.7409,
      "step": 177098
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.815859794616699,
      "learning_rate": 7.581084021116388e-05,
      "loss": 3.0111,
      "step": 177099
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.0872786045074463,
      "learning_rate": 7.58081220945444e-05,
      "loss": 2.8694,
      "step": 177100
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.65008544921875,
      "learning_rate": 7.580540401960648e-05,
      "loss": 3.03,
      "step": 177101
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.986715078353882,
      "learning_rate": 7.580268598635057e-05,
      "loss": 3.0186,
      "step": 177102
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5827386379241943,
      "learning_rate": 7.579996799477717e-05,
      "loss": 2.9748,
      "step": 177103
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.731578826904297,
      "learning_rate": 7.579725004488671e-05,
      "loss": 2.8858,
      "step": 177104
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.909104585647583,
      "learning_rate": 7.579453213667985e-05,
      "loss": 2.7773,
      "step": 177105
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.0645952224731445,
      "learning_rate": 7.579181427015695e-05,
      "loss": 2.6514,
      "step": 177106
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4413864612579346,
      "learning_rate": 7.578909644531863e-05,
      "loss": 3.042,
      "step": 177107
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.518731117248535,
      "learning_rate": 7.578637866216541e-05,
      "loss": 3.0593,
      "step": 177108
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7211947441101074,
      "learning_rate": 7.57836609206977e-05,
      "loss": 2.8201,
      "step": 177109
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.645127296447754,
      "learning_rate": 7.578094322091595e-05,
      "loss": 3.1177,
      "step": 177110
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7348694801330566,
      "learning_rate": 7.577822556282085e-05,
      "loss": 2.9771,
      "step": 177111
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.648080825805664,
      "learning_rate": 7.57755079464127e-05,
      "loss": 2.9483,
      "step": 177112
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.45564341545105,
      "learning_rate": 7.577279037169221e-05,
      "loss": 3.1258,
      "step": 177113
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8953428268432617,
      "learning_rate": 7.577007283865981e-05,
      "loss": 3.1864,
      "step": 177114
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4386484622955322,
      "learning_rate": 7.576735534731599e-05,
      "loss": 2.6744,
      "step": 177115
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7478415966033936,
      "learning_rate": 7.576463789766117e-05,
      "loss": 2.9107,
      "step": 177116
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.315176010131836,
      "learning_rate": 7.576192048969598e-05,
      "loss": 3.1028,
      "step": 177117
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.959028482437134,
      "learning_rate": 7.575920312342082e-05,
      "loss": 2.8285,
      "step": 177118
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.231482982635498,
      "learning_rate": 7.575648579883638e-05,
      "loss": 2.8468,
      "step": 177119
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.248871088027954,
      "learning_rate": 7.575376851594299e-05,
      "loss": 2.8717,
      "step": 177120
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.881784200668335,
      "learning_rate": 7.575105127474123e-05,
      "loss": 3.2287,
      "step": 177121
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.0109546184539795,
      "learning_rate": 7.574833407523147e-05,
      "loss": 3.168,
      "step": 177122
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.785557985305786,
      "learning_rate": 7.574561691741447e-05,
      "loss": 3.0314,
      "step": 177123
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7790684700012207,
      "learning_rate": 7.574289980129046e-05,
      "loss": 2.8963,
      "step": 177124
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8789734840393066,
      "learning_rate": 7.574018272686019e-05,
      "loss": 3.1054,
      "step": 177125
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.910325050354004,
      "learning_rate": 7.573746569412402e-05,
      "loss": 2.9464,
      "step": 177126
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.679194688796997,
      "learning_rate": 7.57347487030825e-05,
      "loss": 3.1166,
      "step": 177127
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.0811350345611572,
      "learning_rate": 7.573203175373606e-05,
      "loss": 2.7867,
      "step": 177128
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6655385494232178,
      "learning_rate": 7.572931484608534e-05,
      "loss": 3.0661,
      "step": 177129
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.358731269836426,
      "learning_rate": 7.57265979801307e-05,
      "loss": 2.8354,
      "step": 177130
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9789109230041504,
      "learning_rate": 7.57238811558728e-05,
      "loss": 3.0145,
      "step": 177131
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3640990257263184,
      "learning_rate": 7.572116437331204e-05,
      "loss": 3.0545,
      "step": 177132
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.275010585784912,
      "learning_rate": 7.571844763244898e-05,
      "loss": 2.8423,
      "step": 177133
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8212804794311523,
      "learning_rate": 7.571573093328398e-05,
      "loss": 2.7759,
      "step": 177134
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.205495595932007,
      "learning_rate": 7.571301427581776e-05,
      "loss": 2.8737,
      "step": 177135
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.2650763988494873,
      "learning_rate": 7.571029766005065e-05,
      "loss": 2.8539,
      "step": 177136
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6010334491729736,
      "learning_rate": 7.57075810859833e-05,
      "loss": 2.9004,
      "step": 177137
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.590622901916504,
      "learning_rate": 7.570486455361614e-05,
      "loss": 3.148,
      "step": 177138
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.1673474311828613,
      "learning_rate": 7.570214806294969e-05,
      "loss": 3.0005,
      "step": 177139
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.562507390975952,
      "learning_rate": 7.569943161398436e-05,
      "loss": 2.9423,
      "step": 177140
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.276498556137085,
      "learning_rate": 7.569671520672083e-05,
      "loss": 2.9894,
      "step": 177141
    },
    {
      "epoch": 2.31,
      "grad_norm": 5.447856426239014,
      "learning_rate": 7.569399884115942e-05,
      "loss": 2.9556,
      "step": 177142
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.568838357925415,
      "learning_rate": 7.56912825173008e-05,
      "loss": 2.7205,
      "step": 177143
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8308017253875732,
      "learning_rate": 7.568856623514544e-05,
      "loss": 3.075,
      "step": 177144
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.2568228244781494,
      "learning_rate": 7.568584999469382e-05,
      "loss": 2.8732,
      "step": 177145
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.1839873790740967,
      "learning_rate": 7.568313379594629e-05,
      "loss": 3.0114,
      "step": 177146
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.4103965759277344,
      "learning_rate": 7.568041763890361e-05,
      "loss": 2.7596,
      "step": 177147
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6093857288360596,
      "learning_rate": 7.56777015235661e-05,
      "loss": 2.7714,
      "step": 177148
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7054648399353027,
      "learning_rate": 7.567498544993443e-05,
      "loss": 2.9598,
      "step": 177149
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.637057304382324,
      "learning_rate": 7.5672269418009e-05,
      "loss": 3.003,
      "step": 177150
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8077332973480225,
      "learning_rate": 7.566955342779034e-05,
      "loss": 2.9404,
      "step": 177151
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7334630489349365,
      "learning_rate": 7.566683747927885e-05,
      "loss": 2.8209,
      "step": 177152
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.853017807006836,
      "learning_rate": 7.566412157247523e-05,
      "loss": 3.0309,
      "step": 177153
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8975937366485596,
      "learning_rate": 7.566140570737978e-05,
      "loss": 2.9459,
      "step": 177154
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.4211361408233643,
      "learning_rate": 7.565868988399321e-05,
      "loss": 2.8954,
      "step": 177155
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.230812072753906,
      "learning_rate": 7.565597410231593e-05,
      "loss": 2.9352,
      "step": 177156
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6315560340881348,
      "learning_rate": 7.565325836234832e-05,
      "loss": 2.964,
      "step": 177157
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3899128437042236,
      "learning_rate": 7.565054266409112e-05,
      "loss": 2.8703,
      "step": 177158
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.875308036804199,
      "learning_rate": 7.564782700754471e-05,
      "loss": 3.1908,
      "step": 177159
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.2070841789245605,
      "learning_rate": 7.564511139270951e-05,
      "loss": 2.9717,
      "step": 177160
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.472947120666504,
      "learning_rate": 7.56423958195862e-05,
      "loss": 3.0897,
      "step": 177161
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.499202013015747,
      "learning_rate": 7.563968028817514e-05,
      "loss": 2.7295,
      "step": 177162
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7059249877929688,
      "learning_rate": 7.563696479847697e-05,
      "loss": 3.1105,
      "step": 177163
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.986433267593384,
      "learning_rate": 7.563424935049213e-05,
      "loss": 3.0011,
      "step": 177164
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8979275226593018,
      "learning_rate": 7.563153394422104e-05,
      "loss": 2.913,
      "step": 177165
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6235525608062744,
      "learning_rate": 7.562881857966437e-05,
      "loss": 3.0464,
      "step": 177166
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.2808268070220947,
      "learning_rate": 7.562610325682252e-05,
      "loss": 2.9883,
      "step": 177167
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8734042644500732,
      "learning_rate": 7.562338797569595e-05,
      "loss": 2.7132,
      "step": 177168
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.5756912231445312,
      "learning_rate": 7.562067273628527e-05,
      "loss": 3.0045,
      "step": 177169
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8055620193481445,
      "learning_rate": 7.561795753859098e-05,
      "loss": 2.8238,
      "step": 177170
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.135897397994995,
      "learning_rate": 7.561524238261344e-05,
      "loss": 3.0392,
      "step": 177171
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.632272481918335,
      "learning_rate": 7.561252726835339e-05,
      "loss": 2.7946,
      "step": 177172
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7552688121795654,
      "learning_rate": 7.560981219581116e-05,
      "loss": 2.8919,
      "step": 177173
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7692365646362305,
      "learning_rate": 7.560709716498723e-05,
      "loss": 3.0208,
      "step": 177174
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8327860832214355,
      "learning_rate": 7.560438217588227e-05,
      "loss": 3.1784,
      "step": 177175
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7081451416015625,
      "learning_rate": 7.56016672284967e-05,
      "loss": 2.9981,
      "step": 177176
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5497231483459473,
      "learning_rate": 7.559895232283091e-05,
      "loss": 2.9854,
      "step": 177177
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.693502187728882,
      "learning_rate": 7.559623745888559e-05,
      "loss": 3.1171,
      "step": 177178
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.2519681453704834,
      "learning_rate": 7.55935226366611e-05,
      "loss": 3.1166,
      "step": 177179
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.0183558464050293,
      "learning_rate": 7.55908078561581e-05,
      "loss": 2.9983,
      "step": 177180
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.1123619079589844,
      "learning_rate": 7.558809311737699e-05,
      "loss": 2.9562,
      "step": 177181
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.3283228874206543,
      "learning_rate": 7.558537842031828e-05,
      "loss": 2.7515,
      "step": 177182
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.656606435775757,
      "learning_rate": 7.558266376498243e-05,
      "loss": 2.9061,
      "step": 177183
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4038803577423096,
      "learning_rate": 7.557994915137005e-05,
      "loss": 2.7359,
      "step": 177184
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6483752727508545,
      "learning_rate": 7.557723457948152e-05,
      "loss": 2.9606,
      "step": 177185
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.260549545288086,
      "learning_rate": 7.55745200493175e-05,
      "loss": 2.9406,
      "step": 177186
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7184388637542725,
      "learning_rate": 7.557180556087845e-05,
      "loss": 3.0426,
      "step": 177187
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9268381595611572,
      "learning_rate": 7.556909111416476e-05,
      "loss": 2.8945,
      "step": 177188
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7866737842559814,
      "learning_rate": 7.5566376709177e-05,
      "loss": 3.0624,
      "step": 177189
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6923413276672363,
      "learning_rate": 7.556366234591573e-05,
      "loss": 2.846,
      "step": 177190
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.852907180786133,
      "learning_rate": 7.556094802438136e-05,
      "loss": 2.9753,
      "step": 177191
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.59464430809021,
      "learning_rate": 7.555823374457452e-05,
      "loss": 3.025,
      "step": 177192
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5939207077026367,
      "learning_rate": 7.555551950649562e-05,
      "loss": 2.9369,
      "step": 177193
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6338021755218506,
      "learning_rate": 7.555280531014521e-05,
      "loss": 2.8469,
      "step": 177194
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6134490966796875,
      "learning_rate": 7.55500911555237e-05,
      "loss": 3.0489,
      "step": 177195
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.931445837020874,
      "learning_rate": 7.554737704263174e-05,
      "loss": 2.8215,
      "step": 177196
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7525432109832764,
      "learning_rate": 7.554466297146965e-05,
      "loss": 2.6548,
      "step": 177197
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.615422010421753,
      "learning_rate": 7.554194894203812e-05,
      "loss": 3.0201,
      "step": 177198
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6996970176696777,
      "learning_rate": 7.553923495433761e-05,
      "loss": 2.9067,
      "step": 177199
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.0728793144226074,
      "learning_rate": 7.553652100836859e-05,
      "loss": 3.1746,
      "step": 177200
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4855048656463623,
      "learning_rate": 7.553380710413145e-05,
      "loss": 3.1011,
      "step": 177201
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.0599160194396973,
      "learning_rate": 7.553109324162692e-05,
      "loss": 2.923,
      "step": 177202
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.126631736755371,
      "learning_rate": 7.552837942085533e-05,
      "loss": 3.0174,
      "step": 177203
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.397613763809204,
      "learning_rate": 7.552566564181733e-05,
      "loss": 3.0919,
      "step": 177204
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.7784252166748047,
      "learning_rate": 7.552295190451333e-05,
      "loss": 2.9263,
      "step": 177205
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.2444217205047607,
      "learning_rate": 7.552023820894387e-05,
      "loss": 2.9448,
      "step": 177206
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4543344974517822,
      "learning_rate": 7.551752455510933e-05,
      "loss": 2.9391,
      "step": 177207
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.147857189178467,
      "learning_rate": 7.551481094301041e-05,
      "loss": 3.0166,
      "step": 177208
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.632006883621216,
      "learning_rate": 7.551209737264743e-05,
      "loss": 2.9365,
      "step": 177209
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.637565851211548,
      "learning_rate": 7.55093838440211e-05,
      "loss": 3.0495,
      "step": 177210
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8282036781311035,
      "learning_rate": 7.550667035713182e-05,
      "loss": 2.8012,
      "step": 177211
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.076223611831665,
      "learning_rate": 7.550395691198005e-05,
      "loss": 3.0822,
      "step": 177212
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7026469707489014,
      "learning_rate": 7.550124350856625e-05,
      "loss": 3.0201,
      "step": 177213
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4975855350494385,
      "learning_rate": 7.549853014689111e-05,
      "loss": 2.9126,
      "step": 177214
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.912532329559326,
      "learning_rate": 7.549581682695495e-05,
      "loss": 2.8737,
      "step": 177215
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.479628086090088,
      "learning_rate": 7.549310354875843e-05,
      "loss": 2.9657,
      "step": 177216
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.457321882247925,
      "learning_rate": 7.549039031230198e-05,
      "loss": 2.8064,
      "step": 177217
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6617062091827393,
      "learning_rate": 7.548767711758613e-05,
      "loss": 2.7896,
      "step": 177218
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.092990875244141,
      "learning_rate": 7.548496396461123e-05,
      "loss": 2.9148,
      "step": 177219
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6803247928619385,
      "learning_rate": 7.548225085337802e-05,
      "loss": 3.0185,
      "step": 177220
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4396820068359375,
      "learning_rate": 7.547953778388679e-05,
      "loss": 3.1494,
      "step": 177221
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.1317508220672607,
      "learning_rate": 7.547682475613827e-05,
      "loss": 3.1323,
      "step": 177222
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8668224811553955,
      "learning_rate": 7.547411177013276e-05,
      "loss": 3.2856,
      "step": 177223
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4968552589416504,
      "learning_rate": 7.5471398825871e-05,
      "loss": 3.0914,
      "step": 177224
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8970940113067627,
      "learning_rate": 7.546868592335318e-05,
      "loss": 3.0983,
      "step": 177225
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.411022901535034,
      "learning_rate": 7.54659730625801e-05,
      "loss": 2.9871,
      "step": 177226
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6160006523132324,
      "learning_rate": 7.546326024355197e-05,
      "loss": 2.6521,
      "step": 177227
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.296299457550049,
      "learning_rate": 7.54605474662696e-05,
      "loss": 2.956,
      "step": 177228
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.4433412551879883,
      "learning_rate": 7.545783473073326e-05,
      "loss": 2.667,
      "step": 177229
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.141214609146118,
      "learning_rate": 7.545512203694369e-05,
      "loss": 2.8605,
      "step": 177230
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.771796464920044,
      "learning_rate": 7.54524093849011e-05,
      "loss": 2.9968,
      "step": 177231
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7541656494140625,
      "learning_rate": 7.544969677460622e-05,
      "loss": 2.9192,
      "step": 177232
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.49564528465271,
      "learning_rate": 7.54469842060594e-05,
      "loss": 2.9881,
      "step": 177233
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5365231037139893,
      "learning_rate": 7.544427167926133e-05,
      "loss": 2.77,
      "step": 177234
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.760392665863037,
      "learning_rate": 7.544155919421232e-05,
      "loss": 3.094,
      "step": 177235
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9828360080718994,
      "learning_rate": 7.543884675091311e-05,
      "loss": 2.939,
      "step": 177236
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.751793384552002,
      "learning_rate": 7.543613434936388e-05,
      "loss": 2.8242,
      "step": 177237
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.2389872074127197,
      "learning_rate": 7.543342198956542e-05,
      "loss": 3.0193,
      "step": 177238
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.531212329864502,
      "learning_rate": 7.543070967151803e-05,
      "loss": 2.8821,
      "step": 177239
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8519115447998047,
      "learning_rate": 7.542799739522241e-05,
      "loss": 3.1219,
      "step": 177240
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4232676029205322,
      "learning_rate": 7.542528516067889e-05,
      "loss": 2.9498,
      "step": 177241
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4582765102386475,
      "learning_rate": 7.54225729678881e-05,
      "loss": 3.0049,
      "step": 177242
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.745859384536743,
      "learning_rate": 7.54198608168505e-05,
      "loss": 3.1903,
      "step": 177243
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.5911483764648438,
      "learning_rate": 7.541714870756659e-05,
      "loss": 2.9551,
      "step": 177244
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.507308006286621,
      "learning_rate": 7.54144366400368e-05,
      "loss": 3.0751,
      "step": 177245
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9497926235198975,
      "learning_rate": 7.541172461426178e-05,
      "loss": 3.1703,
      "step": 177246
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8369226455688477,
      "learning_rate": 7.54090126302419e-05,
      "loss": 2.9783,
      "step": 177247
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.361320972442627,
      "learning_rate": 7.540630068797777e-05,
      "loss": 3.2388,
      "step": 177248
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.027137279510498,
      "learning_rate": 7.540358878746985e-05,
      "loss": 2.6808,
      "step": 177249
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3426756858825684,
      "learning_rate": 7.540087692871866e-05,
      "loss": 3.1069,
      "step": 177250
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.043588638305664,
      "learning_rate": 7.53981651117246e-05,
      "loss": 2.9448,
      "step": 177251
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.6715760231018066,
      "learning_rate": 7.539545333648834e-05,
      "loss": 2.9211,
      "step": 177252
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.911832094192505,
      "learning_rate": 7.539274160301023e-05,
      "loss": 2.9014,
      "step": 177253
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.904627561569214,
      "learning_rate": 7.539002991129093e-05,
      "loss": 3.2713,
      "step": 177254
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8446104526519775,
      "learning_rate": 7.538731826133085e-05,
      "loss": 2.9442,
      "step": 177255
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7464065551757812,
      "learning_rate": 7.538460665313044e-05,
      "loss": 3.0022,
      "step": 177256
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6944992542266846,
      "learning_rate": 7.538189508669036e-05,
      "loss": 2.8955,
      "step": 177257
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7211554050445557,
      "learning_rate": 7.537918356201101e-05,
      "loss": 3.1127,
      "step": 177258
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.470292329788208,
      "learning_rate": 7.537647207909281e-05,
      "loss": 2.8514,
      "step": 177259
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9040749073028564,
      "learning_rate": 7.537376063793648e-05,
      "loss": 3.0948,
      "step": 177260
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6583948135375977,
      "learning_rate": 7.537104923854241e-05,
      "loss": 2.8094,
      "step": 177261
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5387425422668457,
      "learning_rate": 7.5368337880911e-05,
      "loss": 2.8931,
      "step": 177262
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.237855911254883,
      "learning_rate": 7.536562656504294e-05,
      "loss": 2.9675,
      "step": 177263
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.1852223873138428,
      "learning_rate": 7.536291529093857e-05,
      "loss": 2.8477,
      "step": 177264
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.569885492324829,
      "learning_rate": 7.536020405859853e-05,
      "loss": 2.6041,
      "step": 177265
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.837897300720215,
      "learning_rate": 7.535749286802333e-05,
      "loss": 2.7852,
      "step": 177266
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.0848028659820557,
      "learning_rate": 7.535478171921338e-05,
      "loss": 3.0812,
      "step": 177267
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8272056579589844,
      "learning_rate": 7.535207061216911e-05,
      "loss": 3.1464,
      "step": 177268
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.1796562671661377,
      "learning_rate": 7.534935954689124e-05,
      "loss": 2.9604,
      "step": 177269
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.993898868560791,
      "learning_rate": 7.534664852338009e-05,
      "loss": 3.2172,
      "step": 177270
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.2961623668670654,
      "learning_rate": 7.53439375416363e-05,
      "loss": 3.0757,
      "step": 177271
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.435123920440674,
      "learning_rate": 7.534122660166034e-05,
      "loss": 3.0215,
      "step": 177272
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.855917453765869,
      "learning_rate": 7.533851570345268e-05,
      "loss": 2.9277,
      "step": 177273
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.9702658653259277,
      "learning_rate": 7.533580484701373e-05,
      "loss": 2.9285,
      "step": 177274
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.710798740386963,
      "learning_rate": 7.533309403234419e-05,
      "loss": 3.0906,
      "step": 177275
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.502941131591797,
      "learning_rate": 7.533038325944438e-05,
      "loss": 2.9337,
      "step": 177276
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.481189727783203,
      "learning_rate": 7.532767252831497e-05,
      "loss": 3.0977,
      "step": 177277
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.305318832397461,
      "learning_rate": 7.53249618389564e-05,
      "loss": 2.9871,
      "step": 177278
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.8986823558807373,
      "learning_rate": 7.532225119136915e-05,
      "loss": 2.9574,
      "step": 177279
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8447980880737305,
      "learning_rate": 7.531954058555364e-05,
      "loss": 2.6569,
      "step": 177280
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.348101854324341,
      "learning_rate": 7.531683002151058e-05,
      "loss": 2.8179,
      "step": 177281
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.825115203857422,
      "learning_rate": 7.531411949924026e-05,
      "loss": 3.1627,
      "step": 177282
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.445551633834839,
      "learning_rate": 7.531140901874338e-05,
      "loss": 2.8708,
      "step": 177283
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.681546211242676,
      "learning_rate": 7.530869858002035e-05,
      "loss": 2.7607,
      "step": 177284
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9423394203186035,
      "learning_rate": 7.530598818307166e-05,
      "loss": 2.6729,
      "step": 177285
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.619696855545044,
      "learning_rate": 7.530327782789777e-05,
      "loss": 2.7955,
      "step": 177286
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.182315826416016,
      "learning_rate": 7.530056751449932e-05,
      "loss": 2.6999,
      "step": 177287
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.8304688930511475,
      "learning_rate": 7.529785724287663e-05,
      "loss": 2.9395,
      "step": 177288
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3798606395721436,
      "learning_rate": 7.529514701303041e-05,
      "loss": 2.9538,
      "step": 177289
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9501869678497314,
      "learning_rate": 7.529243682496098e-05,
      "loss": 2.9267,
      "step": 177290
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.400846004486084,
      "learning_rate": 7.528972667866909e-05,
      "loss": 3.2331,
      "step": 177291
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.804193019866943,
      "learning_rate": 7.528701657415493e-05,
      "loss": 2.6065,
      "step": 177292
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6620407104492188,
      "learning_rate": 7.528430651141924e-05,
      "loss": 2.9203,
      "step": 177293
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.469964027404785,
      "learning_rate": 7.528159649046233e-05,
      "loss": 2.9933,
      "step": 177294
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.658616304397583,
      "learning_rate": 7.527888651128489e-05,
      "loss": 3.067,
      "step": 177295
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.2389841079711914,
      "learning_rate": 7.527617657388728e-05,
      "loss": 3.0634,
      "step": 177296
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3116707801818848,
      "learning_rate": 7.527346667827024e-05,
      "loss": 2.9893,
      "step": 177297
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5027894973754883,
      "learning_rate": 7.527075682443395e-05,
      "loss": 3.1038,
      "step": 177298
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.650580406188965,
      "learning_rate": 7.526804701237914e-05,
      "loss": 2.8045,
      "step": 177299
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9932913780212402,
      "learning_rate": 7.526533724210617e-05,
      "loss": 3.1356,
      "step": 177300
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6585464477539062,
      "learning_rate": 7.526262751361568e-05,
      "loss": 2.9466,
      "step": 177301
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7747652530670166,
      "learning_rate": 7.525991782690803e-05,
      "loss": 2.9034,
      "step": 177302
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.76466703414917,
      "learning_rate": 7.525720818198397e-05,
      "loss": 3.0073,
      "step": 177303
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.604916572570801,
      "learning_rate": 7.525449857884366e-05,
      "loss": 2.8536,
      "step": 177304
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.514134645462036,
      "learning_rate": 7.525178901748789e-05,
      "loss": 2.8891,
      "step": 177305
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.63847017288208,
      "learning_rate": 7.524907949791698e-05,
      "loss": 2.8753,
      "step": 177306
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.0884499549865723,
      "learning_rate": 7.524637002013158e-05,
      "loss": 3.0101,
      "step": 177307
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.718759536743164,
      "learning_rate": 7.524366058413202e-05,
      "loss": 2.9318,
      "step": 177308
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6550378799438477,
      "learning_rate": 7.52409511899191e-05,
      "loss": 2.9202,
      "step": 177309
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.045431137084961,
      "learning_rate": 7.523824183749292e-05,
      "loss": 2.7773,
      "step": 177310
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.607924461364746,
      "learning_rate": 7.523553252685431e-05,
      "loss": 2.8127,
      "step": 177311
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9650752544403076,
      "learning_rate": 7.523282325800358e-05,
      "loss": 3.0118,
      "step": 177312
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.588696241378784,
      "learning_rate": 7.523011403094136e-05,
      "loss": 3.0378,
      "step": 177313
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.746854066848755,
      "learning_rate": 7.522740484566803e-05,
      "loss": 2.8306,
      "step": 177314
    },
    {
      "epoch": 2.31,
      "grad_norm": 5.45363712310791,
      "learning_rate": 7.522469570218439e-05,
      "loss": 2.8438,
      "step": 177315
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.976351737976074,
      "learning_rate": 7.522198660049052e-05,
      "loss": 2.8606,
      "step": 177316
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.9344265460968018,
      "learning_rate": 7.521927754058718e-05,
      "loss": 2.8657,
      "step": 177317
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.576001167297363,
      "learning_rate": 7.521656852247478e-05,
      "loss": 2.7663,
      "step": 177318
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.106231689453125,
      "learning_rate": 7.521385954615391e-05,
      "loss": 3.0983,
      "step": 177319
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.799654483795166,
      "learning_rate": 7.521115061162495e-05,
      "loss": 2.9872,
      "step": 177320
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3737072944641113,
      "learning_rate": 7.520844171888865e-05,
      "loss": 3.1811,
      "step": 177321
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.2845098972320557,
      "learning_rate": 7.52057328679452e-05,
      "loss": 2.8429,
      "step": 177322
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.483271837234497,
      "learning_rate": 7.520302405879531e-05,
      "loss": 3.0992,
      "step": 177323
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.0376129150390625,
      "learning_rate": 7.520031529143933e-05,
      "loss": 3.019,
      "step": 177324
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.6627864837646484,
      "learning_rate": 7.519760656587797e-05,
      "loss": 2.849,
      "step": 177325
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.792243719100952,
      "learning_rate": 7.519489788211149e-05,
      "loss": 2.9272,
      "step": 177326
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7775018215179443,
      "learning_rate": 7.519218924014064e-05,
      "loss": 2.8396,
      "step": 177327
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.829770088195801,
      "learning_rate": 7.518948063996583e-05,
      "loss": 2.8773,
      "step": 177328
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.887111186981201,
      "learning_rate": 7.518677208158747e-05,
      "loss": 2.8938,
      "step": 177329
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.012631416320801,
      "learning_rate": 7.518406356500609e-05,
      "loss": 2.9141,
      "step": 177330
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.81388783454895,
      "learning_rate": 7.518135509022233e-05,
      "loss": 2.8995,
      "step": 177331
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.57730770111084,
      "learning_rate": 7.517864665723648e-05,
      "loss": 2.8306,
      "step": 177332
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6870574951171875,
      "learning_rate": 7.51759382660493e-05,
      "loss": 2.7315,
      "step": 177333
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6464052200317383,
      "learning_rate": 7.51732299166611e-05,
      "loss": 2.7487,
      "step": 177334
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5593419075012207,
      "learning_rate": 7.517052160907248e-05,
      "loss": 3.0233,
      "step": 177335
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.718230724334717,
      "learning_rate": 7.516781334328382e-05,
      "loss": 2.8196,
      "step": 177336
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.00999116897583,
      "learning_rate": 7.516510511929574e-05,
      "loss": 2.8108,
      "step": 177337
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.462242841720581,
      "learning_rate": 7.516239693710867e-05,
      "loss": 2.9705,
      "step": 177338
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9736685752868652,
      "learning_rate": 7.515968879672326e-05,
      "loss": 3.0078,
      "step": 177339
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.734363079071045,
      "learning_rate": 7.515698069813988e-05,
      "loss": 2.9797,
      "step": 177340
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.959024429321289,
      "learning_rate": 7.515427264135896e-05,
      "loss": 3.1535,
      "step": 177341
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.448336362838745,
      "learning_rate": 7.51515646263812e-05,
      "loss": 2.7811,
      "step": 177342
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.442399978637695,
      "learning_rate": 7.514885665320704e-05,
      "loss": 2.9892,
      "step": 177343
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.161672592163086,
      "learning_rate": 7.514614872183684e-05,
      "loss": 2.9626,
      "step": 177344
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.104099988937378,
      "learning_rate": 7.514344083227129e-05,
      "loss": 2.9559,
      "step": 177345
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.8195157051086426,
      "learning_rate": 7.514073298451083e-05,
      "loss": 2.9371,
      "step": 177346
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.68646240234375,
      "learning_rate": 7.513802517855587e-05,
      "loss": 2.9952,
      "step": 177347
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4459404945373535,
      "learning_rate": 7.51353174144071e-05,
      "loss": 2.974,
      "step": 177348
    },
    {
      "epoch": 2.31,
      "grad_norm": 5.103006839752197,
      "learning_rate": 7.51326096920649e-05,
      "loss": 2.9081,
      "step": 177349
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6522867679595947,
      "learning_rate": 7.512990201152971e-05,
      "loss": 3.2178,
      "step": 177350
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9143826961517334,
      "learning_rate": 7.512719437280218e-05,
      "loss": 2.9929,
      "step": 177351
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.363283395767212,
      "learning_rate": 7.512448677588278e-05,
      "loss": 2.9946,
      "step": 177352
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.528268575668335,
      "learning_rate": 7.512177922077187e-05,
      "loss": 2.7092,
      "step": 177353
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3541200160980225,
      "learning_rate": 7.511907170747017e-05,
      "loss": 3.2052,
      "step": 177354
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.004012107849121,
      "learning_rate": 7.5116364235978e-05,
      "loss": 2.8132,
      "step": 177355
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.7973835468292236,
      "learning_rate": 7.511365680629602e-05,
      "loss": 2.7754,
      "step": 177356
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5689656734466553,
      "learning_rate": 7.511094941842466e-05,
      "loss": 2.7386,
      "step": 177357
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4446351528167725,
      "learning_rate": 7.510824207236439e-05,
      "loss": 2.9026,
      "step": 177358
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.2064809799194336,
      "learning_rate": 7.51055347681157e-05,
      "loss": 2.6968,
      "step": 177359
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.2943012714385986,
      "learning_rate": 7.510282750567921e-05,
      "loss": 2.8737,
      "step": 177360
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4556691646575928,
      "learning_rate": 7.510012028505523e-05,
      "loss": 3.0142,
      "step": 177361
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.3823821544647217,
      "learning_rate": 7.509741310624451e-05,
      "loss": 3.0056,
      "step": 177362
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.434609889984131,
      "learning_rate": 7.509470596924733e-05,
      "loss": 3.1323,
      "step": 177363
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5098373889923096,
      "learning_rate": 7.509199887406445e-05,
      "loss": 2.9355,
      "step": 177364
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7409815788269043,
      "learning_rate": 7.508929182069605e-05,
      "loss": 3.1373,
      "step": 177365
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.025179147720337,
      "learning_rate": 7.50865848091429e-05,
      "loss": 3.0888,
      "step": 177366
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.006761312484741,
      "learning_rate": 7.508387783940526e-05,
      "loss": 2.9144,
      "step": 177367
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6014504432678223,
      "learning_rate": 7.508117091148392e-05,
      "loss": 2.8264,
      "step": 177368
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.0393338203430176,
      "learning_rate": 7.507846402537913e-05,
      "loss": 3.0759,
      "step": 177369
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4497733116149902,
      "learning_rate": 7.507575718109168e-05,
      "loss": 2.978,
      "step": 177370
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6938891410827637,
      "learning_rate": 7.507305037862168e-05,
      "loss": 2.7842,
      "step": 177371
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.2795214653015137,
      "learning_rate": 7.507034361796996e-05,
      "loss": 2.977,
      "step": 177372
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7455685138702393,
      "learning_rate": 7.506763689913683e-05,
      "loss": 2.8307,
      "step": 177373
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7528042793273926,
      "learning_rate": 7.506493022212295e-05,
      "loss": 3.0887,
      "step": 177374
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.534487247467041,
      "learning_rate": 7.506222358692864e-05,
      "loss": 2.7902,
      "step": 177375
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7586569786071777,
      "learning_rate": 7.505951699355471e-05,
      "loss": 2.9751,
      "step": 177376
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.588512659072876,
      "learning_rate": 7.505681044200126e-05,
      "loss": 2.8698,
      "step": 177377
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.8195745944976807,
      "learning_rate": 7.50541039322691e-05,
      "loss": 2.9689,
      "step": 177378
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9096767902374268,
      "learning_rate": 7.505139746435856e-05,
      "loss": 2.8354,
      "step": 177379
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.721328020095825,
      "learning_rate": 7.504869103827028e-05,
      "loss": 2.6012,
      "step": 177380
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.590858221054077,
      "learning_rate": 7.504598465400462e-05,
      "loss": 2.9511,
      "step": 177381
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4006905555725098,
      "learning_rate": 7.50432783115623e-05,
      "loss": 2.8969,
      "step": 177382
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6484036445617676,
      "learning_rate": 7.504057201094353e-05,
      "loss": 3.0946,
      "step": 177383
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.45410680770874,
      "learning_rate": 7.503786575214904e-05,
      "loss": 2.8917,
      "step": 177384
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6447978019714355,
      "learning_rate": 7.503515953517919e-05,
      "loss": 2.7456,
      "step": 177385
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7480225563049316,
      "learning_rate": 7.503245336003462e-05,
      "loss": 2.9465,
      "step": 177386
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.57661509513855,
      "learning_rate": 7.502974722671569e-05,
      "loss": 3.0257,
      "step": 177387
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.2624130249023438,
      "learning_rate": 7.502704113522313e-05,
      "loss": 3.0132,
      "step": 177388
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8911375999450684,
      "learning_rate": 7.502433508555711e-05,
      "loss": 2.9486,
      "step": 177389
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8028831481933594,
      "learning_rate": 7.502162907771845e-05,
      "loss": 2.8686,
      "step": 177390
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.497696876525879,
      "learning_rate": 7.501892311170739e-05,
      "loss": 2.9486,
      "step": 177391
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.323429822921753,
      "learning_rate": 7.501621718752467e-05,
      "loss": 2.9775,
      "step": 177392
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4631125926971436,
      "learning_rate": 7.501351130517055e-05,
      "loss": 2.9542,
      "step": 177393
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.126952886581421,
      "learning_rate": 7.501080546464587e-05,
      "loss": 3.0397,
      "step": 177394
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.136001110076904,
      "learning_rate": 7.500809966595077e-05,
      "loss": 2.8011,
      "step": 177395
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.5213420391082764,
      "learning_rate": 7.500539390908598e-05,
      "loss": 2.9586,
      "step": 177396
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7106175422668457,
      "learning_rate": 7.500268819405182e-05,
      "loss": 3.0221,
      "step": 177397
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7023122310638428,
      "learning_rate": 7.499998252084903e-05,
      "loss": 3.1542,
      "step": 177398
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.906447410583496,
      "learning_rate": 7.499727688947789e-05,
      "loss": 2.977,
      "step": 177399
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.510333776473999,
      "learning_rate": 7.499457129993921e-05,
      "loss": 2.9951,
      "step": 177400
    },
    {
      "epoch": 2.31,
      "grad_norm": 6.539134502410889,
      "learning_rate": 7.499186575223307e-05,
      "loss": 2.8928,
      "step": 177401
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6054725646972656,
      "learning_rate": 7.49891602463603e-05,
      "loss": 3.0121,
      "step": 177402
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.6785058975219727,
      "learning_rate": 7.498645478232117e-05,
      "loss": 2.7925,
      "step": 177403
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.1391170024871826,
      "learning_rate": 7.498374936011644e-05,
      "loss": 2.944,
      "step": 177404
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.991922616958618,
      "learning_rate": 7.498104397974638e-05,
      "loss": 2.9139,
      "step": 177405
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.2233195304870605,
      "learning_rate": 7.497833864121175e-05,
      "loss": 3.0157,
      "step": 177406
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.233666181564331,
      "learning_rate": 7.497563334451273e-05,
      "loss": 2.9529,
      "step": 177407
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.2404589653015137,
      "learning_rate": 7.497292808965008e-05,
      "loss": 2.7249,
      "step": 177408
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.358640193939209,
      "learning_rate": 7.497022287662408e-05,
      "loss": 2.8263,
      "step": 177409
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.669159173965454,
      "learning_rate": 7.49675177054355e-05,
      "loss": 2.7754,
      "step": 177410
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.623187780380249,
      "learning_rate": 7.496481257608461e-05,
      "loss": 3.1394,
      "step": 177411
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.151370048522949,
      "learning_rate": 7.496210748857219e-05,
      "loss": 3.1143,
      "step": 177412
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8610904216766357,
      "learning_rate": 7.495940244289837e-05,
      "loss": 2.7548,
      "step": 177413
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.3466989994049072,
      "learning_rate": 7.495669743906394e-05,
      "loss": 2.9665,
      "step": 177414
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.797044277191162,
      "learning_rate": 7.495399247706921e-05,
      "loss": 3.0514,
      "step": 177415
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.506127119064331,
      "learning_rate": 7.495128755691488e-05,
      "loss": 2.7266,
      "step": 177416
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5683112144470215,
      "learning_rate": 7.494858267860125e-05,
      "loss": 3.0778,
      "step": 177417
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.592987060546875,
      "learning_rate": 7.494587784212902e-05,
      "loss": 2.9602,
      "step": 177418
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.863272190093994,
      "learning_rate": 7.494317304749862e-05,
      "loss": 2.8663,
      "step": 177419
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.825345277786255,
      "learning_rate": 7.494046829471052e-05,
      "loss": 2.7932,
      "step": 177420
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.666653871536255,
      "learning_rate": 7.493776358376514e-05,
      "loss": 3.1339,
      "step": 177421
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.489990472793579,
      "learning_rate": 7.493505891466319e-05,
      "loss": 3.0678,
      "step": 177422
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4627974033355713,
      "learning_rate": 7.493235428740492e-05,
      "loss": 2.8726,
      "step": 177423
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.57563853263855,
      "learning_rate": 7.492964970199114e-05,
      "loss": 2.9844,
      "step": 177424
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.388590097427368,
      "learning_rate": 7.492694515842213e-05,
      "loss": 2.951,
      "step": 177425
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.5075976848602295,
      "learning_rate": 7.492424065669836e-05,
      "loss": 2.8129,
      "step": 177426
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5863373279571533,
      "learning_rate": 7.492153619682052e-05,
      "loss": 2.7973,
      "step": 177427
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3850131034851074,
      "learning_rate": 7.491883177878904e-05,
      "loss": 3.0285,
      "step": 177428
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9019775390625,
      "learning_rate": 7.491612740260428e-05,
      "loss": 3.1577,
      "step": 177429
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3687095642089844,
      "learning_rate": 7.491342306826698e-05,
      "loss": 2.74,
      "step": 177430
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.569807767868042,
      "learning_rate": 7.49107187757775e-05,
      "loss": 2.967,
      "step": 177431
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5324809551239014,
      "learning_rate": 7.490801452513628e-05,
      "loss": 3.0462,
      "step": 177432
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.2524421215057373,
      "learning_rate": 7.4905310316344e-05,
      "loss": 3.045,
      "step": 177433
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.2866768836975098,
      "learning_rate": 7.490260614940105e-05,
      "loss": 3.0807,
      "step": 177434
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.912405252456665,
      "learning_rate": 7.489990202430788e-05,
      "loss": 2.668,
      "step": 177435
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.640774965286255,
      "learning_rate": 7.489719794106519e-05,
      "loss": 2.6705,
      "step": 177436
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.539881467819214,
      "learning_rate": 7.489449389967332e-05,
      "loss": 3.0328,
      "step": 177437
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.9064102172851562,
      "learning_rate": 7.489178990013274e-05,
      "loss": 2.9756,
      "step": 177438
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7180440425872803,
      "learning_rate": 7.48890859424441e-05,
      "loss": 3.0251,
      "step": 177439
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6756489276885986,
      "learning_rate": 7.488638202660776e-05,
      "loss": 2.7839,
      "step": 177440
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3759641647338867,
      "learning_rate": 7.488367815262436e-05,
      "loss": 2.788,
      "step": 177441
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4692697525024414,
      "learning_rate": 7.488097432049436e-05,
      "loss": 2.8812,
      "step": 177442
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.580764055252075,
      "learning_rate": 7.487827053021822e-05,
      "loss": 2.8218,
      "step": 177443
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.385030746459961,
      "learning_rate": 7.487556678179637e-05,
      "loss": 2.9128,
      "step": 177444
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6421151161193848,
      "learning_rate": 7.487286307522947e-05,
      "loss": 2.9077,
      "step": 177445
    },
    {
      "epoch": 2.31,
      "grad_norm": 5.638242721557617,
      "learning_rate": 7.487015941051789e-05,
      "loss": 2.7598,
      "step": 177446
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.437995195388794,
      "learning_rate": 7.48674557876623e-05,
      "loss": 2.9346,
      "step": 177447
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.2401602268218994,
      "learning_rate": 7.4864752206663e-05,
      "loss": 2.7467,
      "step": 177448
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6273725032806396,
      "learning_rate": 7.486204866752074e-05,
      "loss": 2.935,
      "step": 177449
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3138427734375,
      "learning_rate": 7.485934517023573e-05,
      "loss": 3.0044,
      "step": 177450
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.419363021850586,
      "learning_rate": 7.485664171480867e-05,
      "loss": 2.9934,
      "step": 177451
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7836005687713623,
      "learning_rate": 7.485393830123997e-05,
      "loss": 3.0053,
      "step": 177452
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5936954021453857,
      "learning_rate": 7.485123492953025e-05,
      "loss": 2.9275,
      "step": 177453
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.9387731552124023,
      "learning_rate": 7.48485315996798e-05,
      "loss": 3.0724,
      "step": 177454
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.613922595977783,
      "learning_rate": 7.484582831168948e-05,
      "loss": 3.1011,
      "step": 177455
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6204144954681396,
      "learning_rate": 7.48431250655594e-05,
      "loss": 2.9399,
      "step": 177456
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.616795063018799,
      "learning_rate": 7.484042186129031e-05,
      "loss": 3.2243,
      "step": 177457
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.113483428955078,
      "learning_rate": 7.483771869888253e-05,
      "loss": 2.9737,
      "step": 177458
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6385533809661865,
      "learning_rate": 7.483501557833677e-05,
      "loss": 2.7269,
      "step": 177459
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5907397270202637,
      "learning_rate": 7.483231249965334e-05,
      "loss": 3.1038,
      "step": 177460
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.3629724979400635,
      "learning_rate": 7.482960946283304e-05,
      "loss": 2.9518,
      "step": 177461
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.612575054168701,
      "learning_rate": 7.482690646787594e-05,
      "loss": 2.9389,
      "step": 177462
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.282539129257202,
      "learning_rate": 7.48242035147829e-05,
      "loss": 2.9639,
      "step": 177463
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.1245665550231934,
      "learning_rate": 7.482150060355417e-05,
      "loss": 3.1364,
      "step": 177464
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9890406131744385,
      "learning_rate": 7.481879773419047e-05,
      "loss": 3.0637,
      "step": 177465
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.389193534851074,
      "learning_rate": 7.481609490669212e-05,
      "loss": 2.8964,
      "step": 177466
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.745441198348999,
      "learning_rate": 7.48133921210599e-05,
      "loss": 3.1101,
      "step": 177467
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.817969799041748,
      "learning_rate": 7.481068937729393e-05,
      "loss": 2.855,
      "step": 177468
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4894096851348877,
      "learning_rate": 7.480798667539498e-05,
      "loss": 3.0245,
      "step": 177469
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.6419427394866943,
      "learning_rate": 7.48052840153634e-05,
      "loss": 3.1648,
      "step": 177470
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.168247699737549,
      "learning_rate": 7.480258139719988e-05,
      "loss": 2.9891,
      "step": 177471
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.485374689102173,
      "learning_rate": 7.479987882090469e-05,
      "loss": 2.9107,
      "step": 177472
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5129611492156982,
      "learning_rate": 7.479717628647864e-05,
      "loss": 2.8275,
      "step": 177473
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6882081031799316,
      "learning_rate": 7.479447379392188e-05,
      "loss": 3.0394,
      "step": 177474
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.4297220706939697,
      "learning_rate": 7.479177134323514e-05,
      "loss": 3.026,
      "step": 177475
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5478124618530273,
      "learning_rate": 7.478906893441878e-05,
      "loss": 3.1576,
      "step": 177476
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3411004543304443,
      "learning_rate": 7.47863665674735e-05,
      "loss": 2.9761,
      "step": 177477
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.213575839996338,
      "learning_rate": 7.478366424239959e-05,
      "loss": 3.0023,
      "step": 177478
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6544551849365234,
      "learning_rate": 7.478096195919782e-05,
      "loss": 3.0912,
      "step": 177479
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.819453477859497,
      "learning_rate": 7.477825971786831e-05,
      "loss": 2.8947,
      "step": 177480
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5892512798309326,
      "learning_rate": 7.477555751841189e-05,
      "loss": 2.8377,
      "step": 177481
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.56815505027771,
      "learning_rate": 7.477285536082886e-05,
      "loss": 2.8646,
      "step": 177482
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7889609336853027,
      "learning_rate": 7.477015324511991e-05,
      "loss": 2.952,
      "step": 177483
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.362642765045166,
      "learning_rate": 7.476745117128533e-05,
      "loss": 2.7522,
      "step": 177484
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8747429847717285,
      "learning_rate": 7.476474913932592e-05,
      "loss": 3.0886,
      "step": 177485
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4311575889587402,
      "learning_rate": 7.476204714924181e-05,
      "loss": 2.892,
      "step": 177486
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3562419414520264,
      "learning_rate": 7.475934520103377e-05,
      "loss": 2.9453,
      "step": 177487
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.0770018100738525,
      "learning_rate": 7.475664329470214e-05,
      "loss": 2.8388,
      "step": 177488
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9578158855438232,
      "learning_rate": 7.475394143024757e-05,
      "loss": 3.2153,
      "step": 177489
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.844302177429199,
      "learning_rate": 7.475123960767043e-05,
      "loss": 3.2845,
      "step": 177490
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.958247184753418,
      "learning_rate": 7.474853782697136e-05,
      "loss": 2.4286,
      "step": 177491
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.797680377960205,
      "learning_rate": 7.47458360881508e-05,
      "loss": 2.9501,
      "step": 177492
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6252541542053223,
      "learning_rate": 7.474313439120923e-05,
      "loss": 2.7985,
      "step": 177493
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.478445053100586,
      "learning_rate": 7.47404327361471e-05,
      "loss": 2.9332,
      "step": 177494
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.080594062805176,
      "learning_rate": 7.473773112296504e-05,
      "loss": 3.0969,
      "step": 177495
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.0450518131256104,
      "learning_rate": 7.473502955166341e-05,
      "loss": 2.9604,
      "step": 177496
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.1947574615478516,
      "learning_rate": 7.473232802224287e-05,
      "loss": 3.0577,
      "step": 177497
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7265989780426025,
      "learning_rate": 7.472962653470383e-05,
      "loss": 2.981,
      "step": 177498
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.67220401763916,
      "learning_rate": 7.472692508904683e-05,
      "loss": 3.0129,
      "step": 177499
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.091855525970459,
      "learning_rate": 7.472422368527226e-05,
      "loss": 2.7592,
      "step": 177500
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.1478209495544434,
      "learning_rate": 7.472152232338077e-05,
      "loss": 3.0138,
      "step": 177501
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5966956615448,
      "learning_rate": 7.471882100337276e-05,
      "loss": 3.0512,
      "step": 177502
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.919914960861206,
      "learning_rate": 7.47161197252488e-05,
      "loss": 2.6621,
      "step": 177503
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.116431474685669,
      "learning_rate": 7.471341848900942e-05,
      "loss": 2.8763,
      "step": 177504
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.3356006145477295,
      "learning_rate": 7.471071729465506e-05,
      "loss": 3.2138,
      "step": 177505
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.545853853225708,
      "learning_rate": 7.470801614218613e-05,
      "loss": 2.9927,
      "step": 177506
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.4235141277313232,
      "learning_rate": 7.47053150316033e-05,
      "loss": 2.7973,
      "step": 177507
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5675065517425537,
      "learning_rate": 7.470261396290695e-05,
      "loss": 2.801,
      "step": 177508
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.840688943862915,
      "learning_rate": 7.46999129360977e-05,
      "loss": 2.981,
      "step": 177509
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.698965549468994,
      "learning_rate": 7.469721195117602e-05,
      "loss": 2.8097,
      "step": 177510
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.964905261993408,
      "learning_rate": 7.469451100814225e-05,
      "loss": 3.1801,
      "step": 177511
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.540022373199463,
      "learning_rate": 7.469181010699716e-05,
      "loss": 2.7778,
      "step": 177512
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.795668363571167,
      "learning_rate": 7.468910924774108e-05,
      "loss": 2.8628,
      "step": 177513
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.93984317779541,
      "learning_rate": 7.468640843037449e-05,
      "loss": 2.9574,
      "step": 177514
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.1060991287231445,
      "learning_rate": 7.4683707654898e-05,
      "loss": 3.1983,
      "step": 177515
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6284356117248535,
      "learning_rate": 7.468100692131209e-05,
      "loss": 3.1432,
      "step": 177516
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5211520195007324,
      "learning_rate": 7.467830622961715e-05,
      "loss": 2.9166,
      "step": 177517
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.717714309692383,
      "learning_rate": 7.467560557981382e-05,
      "loss": 3.0602,
      "step": 177518
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.409341812133789,
      "learning_rate": 7.467290497190256e-05,
      "loss": 3.2042,
      "step": 177519
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.2081661224365234,
      "learning_rate": 7.467020440588378e-05,
      "loss": 2.9101,
      "step": 177520
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.2716033458709717,
      "learning_rate": 7.466750388175816e-05,
      "loss": 2.8408,
      "step": 177521
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.90942120552063,
      "learning_rate": 7.46648033995261e-05,
      "loss": 2.9782,
      "step": 177522
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.590040683746338,
      "learning_rate": 7.466210295918802e-05,
      "loss": 2.7373,
      "step": 177523
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4979374408721924,
      "learning_rate": 7.465940256074459e-05,
      "loss": 3.0282,
      "step": 177524
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6466259956359863,
      "learning_rate": 7.465670220419613e-05,
      "loss": 2.9275,
      "step": 177525
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7368052005767822,
      "learning_rate": 7.465400188954335e-05,
      "loss": 2.984,
      "step": 177526
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7038192749023438,
      "learning_rate": 7.465130161678666e-05,
      "loss": 2.9416,
      "step": 177527
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.5934481620788574,
      "learning_rate": 7.464860138592651e-05,
      "loss": 2.9757,
      "step": 177528
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7646288871765137,
      "learning_rate": 7.464590119696336e-05,
      "loss": 2.8411,
      "step": 177529
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.504319190979004,
      "learning_rate": 7.464320104989788e-05,
      "loss": 2.7436,
      "step": 177530
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.272106170654297,
      "learning_rate": 7.464050094473043e-05,
      "loss": 2.9288,
      "step": 177531
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.772932291030884,
      "learning_rate": 7.46378008814616e-05,
      "loss": 2.7062,
      "step": 177532
    },
    {
      "epoch": 2.31,
      "grad_norm": 5.561400413513184,
      "learning_rate": 7.463510086009188e-05,
      "loss": 3.1163,
      "step": 177533
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.847113609313965,
      "learning_rate": 7.463240088062175e-05,
      "loss": 2.8768,
      "step": 177534
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.2394423484802246,
      "learning_rate": 7.462970094305163e-05,
      "loss": 2.7542,
      "step": 177535
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.959545135498047,
      "learning_rate": 7.46270010473822e-05,
      "loss": 2.8657,
      "step": 177536
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.765498161315918,
      "learning_rate": 7.462430119361375e-05,
      "loss": 3.0917,
      "step": 177537
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.7056777477264404,
      "learning_rate": 7.462160138174698e-05,
      "loss": 3.073,
      "step": 177538
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.559976577758789,
      "learning_rate": 7.461890161178226e-05,
      "loss": 3.0861,
      "step": 177539
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.7203540802001953,
      "learning_rate": 7.461620188372028e-05,
      "loss": 2.8266,
      "step": 177540
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9327268600463867,
      "learning_rate": 7.461350219756123e-05,
      "loss": 2.8881,
      "step": 177541
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.848069190979004,
      "learning_rate": 7.461080255330588e-05,
      "loss": 2.9514,
      "step": 177542
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.410982608795166,
      "learning_rate": 7.460810295095455e-05,
      "loss": 2.803,
      "step": 177543
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7241740226745605,
      "learning_rate": 7.460540339050794e-05,
      "loss": 3.2295,
      "step": 177544
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.405731201171875,
      "learning_rate": 7.46027038719663e-05,
      "loss": 2.8228,
      "step": 177545
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.1427574157714844,
      "learning_rate": 7.460000439533046e-05,
      "loss": 2.832,
      "step": 177546
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.1072795391082764,
      "learning_rate": 7.459730496060059e-05,
      "loss": 2.7538,
      "step": 177547
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.0649490356445312,
      "learning_rate": 7.459460556777743e-05,
      "loss": 3.0803,
      "step": 177548
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.823200225830078,
      "learning_rate": 7.459190621686127e-05,
      "loss": 2.9902,
      "step": 177549
    },
    {
      "epoch": 2.31,
      "grad_norm": 5.0769453048706055,
      "learning_rate": 7.458920690785282e-05,
      "loss": 3.0184,
      "step": 177550
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.734771490097046,
      "learning_rate": 7.458650764075241e-05,
      "loss": 2.9719,
      "step": 177551
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.395340919494629,
      "learning_rate": 7.458380841556079e-05,
      "loss": 2.8132,
      "step": 177552
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.4211838245391846,
      "learning_rate": 7.458110923227814e-05,
      "loss": 3.0381,
      "step": 177553
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.473623752593994,
      "learning_rate": 7.45784100909052e-05,
      "loss": 3.0869,
      "step": 177554
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.723747491836548,
      "learning_rate": 7.457571099144226e-05,
      "loss": 2.8013,
      "step": 177555
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.200634002685547,
      "learning_rate": 7.457301193389007e-05,
      "loss": 3.0412,
      "step": 177556
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.285633563995361,
      "learning_rate": 7.457031291824892e-05,
      "loss": 2.9952,
      "step": 177557
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7675766944885254,
      "learning_rate": 7.456761394451951e-05,
      "loss": 2.7274,
      "step": 177558
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9536118507385254,
      "learning_rate": 7.45649150127022e-05,
      "loss": 2.9678,
      "step": 177559
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6830081939697266,
      "learning_rate": 7.456221612279758e-05,
      "loss": 3.1518,
      "step": 177560
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4226787090301514,
      "learning_rate": 7.455951727480599e-05,
      "loss": 2.9539,
      "step": 177561
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.787010908126831,
      "learning_rate": 7.45568184687281e-05,
      "loss": 2.9385,
      "step": 177562
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.4417381286621094,
      "learning_rate": 7.45541197045643e-05,
      "loss": 2.8711,
      "step": 177563
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8152902126312256,
      "learning_rate": 7.455142098231522e-05,
      "loss": 3.1273,
      "step": 177564
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3481225967407227,
      "learning_rate": 7.454872230198129e-05,
      "loss": 2.9561,
      "step": 177565
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4239776134490967,
      "learning_rate": 7.4546023663563e-05,
      "loss": 2.988,
      "step": 177566
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7413811683654785,
      "learning_rate": 7.454332506706076e-05,
      "loss": 2.6194,
      "step": 177567
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.591125965118408,
      "learning_rate": 7.454062651247528e-05,
      "loss": 2.9441,
      "step": 177568
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.1739490032196045,
      "learning_rate": 7.453792799980686e-05,
      "loss": 2.8238,
      "step": 177569
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.552955150604248,
      "learning_rate": 7.45352295290562e-05,
      "loss": 2.9246,
      "step": 177570
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.56486439704895,
      "learning_rate": 7.453253110022365e-05,
      "loss": 3.1406,
      "step": 177571
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.725992441177368,
      "learning_rate": 7.452983271330981e-05,
      "loss": 2.9611,
      "step": 177572
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.822617292404175,
      "learning_rate": 7.452713436831503e-05,
      "loss": 3.1947,
      "step": 177573
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8413429260253906,
      "learning_rate": 7.452443606524e-05,
      "loss": 2.8159,
      "step": 177574
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5226328372955322,
      "learning_rate": 7.452173780408501e-05,
      "loss": 2.7816,
      "step": 177575
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9775404930114746,
      "learning_rate": 7.45190395848508e-05,
      "loss": 3.0225,
      "step": 177576
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.931797742843628,
      "learning_rate": 7.451634140753775e-05,
      "loss": 2.815,
      "step": 177577
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.0155580043792725,
      "learning_rate": 7.451364327214639e-05,
      "loss": 2.9154,
      "step": 177578
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.400437831878662,
      "learning_rate": 7.451094517867711e-05,
      "loss": 3.0038,
      "step": 177579
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.6737189292907715,
      "learning_rate": 7.450824712713057e-05,
      "loss": 3.0785,
      "step": 177580
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7462782859802246,
      "learning_rate": 7.450554911750711e-05,
      "loss": 3.0351,
      "step": 177581
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.0787689685821533,
      "learning_rate": 7.450285114980743e-05,
      "loss": 2.7986,
      "step": 177582
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.680495500564575,
      "learning_rate": 7.450015322403194e-05,
      "loss": 2.7211,
      "step": 177583
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.813201904296875,
      "learning_rate": 7.449745534018109e-05,
      "loss": 2.9195,
      "step": 177584
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.599170684814453,
      "learning_rate": 7.449475749825536e-05,
      "loss": 3.0358,
      "step": 177585
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.513419151306152,
      "learning_rate": 7.44920596982554e-05,
      "loss": 2.8068,
      "step": 177586
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.279350280761719,
      "learning_rate": 7.44893619401815e-05,
      "loss": 2.8056,
      "step": 177587
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.2105090618133545,
      "learning_rate": 7.448666422403444e-05,
      "loss": 2.6945,
      "step": 177588
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.542729377746582,
      "learning_rate": 7.44839665498145e-05,
      "loss": 3.1009,
      "step": 177589
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.6268222332000732,
      "learning_rate": 7.448126891752228e-05,
      "loss": 3.0044,
      "step": 177590
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.5238113403320312,
      "learning_rate": 7.447857132715815e-05,
      "loss": 2.7807,
      "step": 177591
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4694366455078125,
      "learning_rate": 7.447587377872279e-05,
      "loss": 2.8039,
      "step": 177592
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.225573778152466,
      "learning_rate": 7.447317627221657e-05,
      "loss": 3.1681,
      "step": 177593
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.401217460632324,
      "learning_rate": 7.447047880764009e-05,
      "loss": 3.0943,
      "step": 177594
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.220825672149658,
      "learning_rate": 7.446778138499384e-05,
      "loss": 2.516,
      "step": 177595
    },
    {
      "epoch": 2.31,
      "grad_norm": 6.209611415863037,
      "learning_rate": 7.446508400427825e-05,
      "loss": 2.7995,
      "step": 177596
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.35797381401062,
      "learning_rate": 7.446238666549379e-05,
      "loss": 2.9698,
      "step": 177597
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6694676876068115,
      "learning_rate": 7.445968936864112e-05,
      "loss": 2.9672,
      "step": 177598
    },
    {
      "epoch": 2.31,
      "grad_norm": 6.796111583709717,
      "learning_rate": 7.445699211372057e-05,
      "loss": 2.9794,
      "step": 177599
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9061503410339355,
      "learning_rate": 7.445429490073278e-05,
      "loss": 2.8468,
      "step": 177600
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7438831329345703,
      "learning_rate": 7.445159772967825e-05,
      "loss": 2.9159,
      "step": 177601
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.581695556640625,
      "learning_rate": 7.444890060055727e-05,
      "loss": 3.0136,
      "step": 177602
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.053180456161499,
      "learning_rate": 7.444620351337062e-05,
      "loss": 2.825,
      "step": 177603
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.109006404876709,
      "learning_rate": 7.444350646811868e-05,
      "loss": 2.8368,
      "step": 177604
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9206619262695312,
      "learning_rate": 7.444080946480188e-05,
      "loss": 2.8424,
      "step": 177605
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4239084720611572,
      "learning_rate": 7.443811250342085e-05,
      "loss": 3.2166,
      "step": 177606
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3814330101013184,
      "learning_rate": 7.443541558397602e-05,
      "loss": 2.9509,
      "step": 177607
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.050265312194824,
      "learning_rate": 7.443271870646784e-05,
      "loss": 2.9148,
      "step": 177608
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.335196018218994,
      "learning_rate": 7.443002187089698e-05,
      "loss": 2.8537,
      "step": 177609
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.286376953125,
      "learning_rate": 7.442732507726372e-05,
      "loss": 2.9025,
      "step": 177610
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8902101516723633,
      "learning_rate": 7.442462832556876e-05,
      "loss": 2.9967,
      "step": 177611
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.329559087753296,
      "learning_rate": 7.442193161581255e-05,
      "loss": 2.746,
      "step": 177612
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.601491928100586,
      "learning_rate": 7.441923494799554e-05,
      "loss": 3.0344,
      "step": 177613
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5965309143066406,
      "learning_rate": 7.441653832211818e-05,
      "loss": 3.0388,
      "step": 177614
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.543231964111328,
      "learning_rate": 7.44138417381811e-05,
      "loss": 2.9088,
      "step": 177615
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.318265438079834,
      "learning_rate": 7.441114519618471e-05,
      "loss": 2.7468,
      "step": 177616
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.711325168609619,
      "learning_rate": 7.440844869612959e-05,
      "loss": 3.0364,
      "step": 177617
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.568171262741089,
      "learning_rate": 7.44057522380162e-05,
      "loss": 2.8846,
      "step": 177618
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.2214529514312744,
      "learning_rate": 7.440305582184506e-05,
      "loss": 2.7897,
      "step": 177619
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.5627126693725586,
      "learning_rate": 7.440035944761656e-05,
      "loss": 2.8691,
      "step": 177620
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.2691967487335205,
      "learning_rate": 7.439766311533135e-05,
      "loss": 2.7259,
      "step": 177621
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.860854387283325,
      "learning_rate": 7.439496682498982e-05,
      "loss": 3.0745,
      "step": 177622
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5039446353912354,
      "learning_rate": 7.43922705765926e-05,
      "loss": 2.7705,
      "step": 177623
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.351691246032715,
      "learning_rate": 7.438957437014004e-05,
      "loss": 3.0576,
      "step": 177624
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.3374383449554443,
      "learning_rate": 7.438687820563278e-05,
      "loss": 2.7127,
      "step": 177625
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.775364875793457,
      "learning_rate": 7.438418208307127e-05,
      "loss": 2.9369,
      "step": 177626
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8893158435821533,
      "learning_rate": 7.438148600245602e-05,
      "loss": 3.0055,
      "step": 177627
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.4379353523254395,
      "learning_rate": 7.437878996378736e-05,
      "loss": 2.9224,
      "step": 177628
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6353673934936523,
      "learning_rate": 7.43760939670661e-05,
      "loss": 2.8875,
      "step": 177629
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.3396549224853516,
      "learning_rate": 7.437339801229248e-05,
      "loss": 2.856,
      "step": 177630
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.085465431213379,
      "learning_rate": 7.437070209946716e-05,
      "loss": 2.9788,
      "step": 177631
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4467849731445312,
      "learning_rate": 7.436800622859059e-05,
      "loss": 2.6721,
      "step": 177632
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7740750312805176,
      "learning_rate": 7.43653103996633e-05,
      "loss": 2.7753,
      "step": 177633
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.945472002029419,
      "learning_rate": 7.436261461268566e-05,
      "loss": 2.8913,
      "step": 177634
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.789033889770508,
      "learning_rate": 7.435991886765835e-05,
      "loss": 2.7957,
      "step": 177635
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.470448017120361,
      "learning_rate": 7.43572231645817e-05,
      "loss": 2.7662,
      "step": 177636
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.81866979598999,
      "learning_rate": 7.435452750345638e-05,
      "loss": 3.0873,
      "step": 177637
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6364078521728516,
      "learning_rate": 7.435183188428283e-05,
      "loss": 2.9419,
      "step": 177638
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.677551746368408,
      "learning_rate": 7.434913630706153e-05,
      "loss": 2.9532,
      "step": 177639
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.942129135131836,
      "learning_rate": 7.434644077179287e-05,
      "loss": 2.9201,
      "step": 177640
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.8975837230682373,
      "learning_rate": 7.434374527847758e-05,
      "loss": 2.7755,
      "step": 177641
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.923257350921631,
      "learning_rate": 7.434104982711597e-05,
      "loss": 2.8184,
      "step": 177642
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.6028294563293457,
      "learning_rate": 7.43383544177087e-05,
      "loss": 2.978,
      "step": 177643
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4803895950317383,
      "learning_rate": 7.433565905025617e-05,
      "loss": 3.0253,
      "step": 177644
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.979625940322876,
      "learning_rate": 7.433296372475891e-05,
      "loss": 2.9381,
      "step": 177645
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.2111735343933105,
      "learning_rate": 7.433026844121735e-05,
      "loss": 2.8881,
      "step": 177646
    },
    {
      "epoch": 2.31,
      "grad_norm": 5.416772842407227,
      "learning_rate": 7.432757319963211e-05,
      "loss": 2.8885,
      "step": 177647
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.50425386428833,
      "learning_rate": 7.432487800000357e-05,
      "loss": 2.7938,
      "step": 177648
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.920367956161499,
      "learning_rate": 7.432218284233235e-05,
      "loss": 2.9943,
      "step": 177649
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6487250328063965,
      "learning_rate": 7.431948772661892e-05,
      "loss": 2.7875,
      "step": 177650
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6634528636932373,
      "learning_rate": 7.431679265286375e-05,
      "loss": 2.9918,
      "step": 177651
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4393136501312256,
      "learning_rate": 7.431409762106726e-05,
      "loss": 2.6944,
      "step": 177652
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.284080743789673,
      "learning_rate": 7.431140263123013e-05,
      "loss": 3.1129,
      "step": 177653
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.964005947113037,
      "learning_rate": 7.430870768335268e-05,
      "loss": 2.6981,
      "step": 177654
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.1551074981689453,
      "learning_rate": 7.430601277743558e-05,
      "loss": 2.9503,
      "step": 177655
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.666578531265259,
      "learning_rate": 7.43033179134793e-05,
      "loss": 3.0461,
      "step": 177656
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4138834476470947,
      "learning_rate": 7.430062309148424e-05,
      "loss": 3.0985,
      "step": 177657
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3771092891693115,
      "learning_rate": 7.42979283114509e-05,
      "loss": 2.9621,
      "step": 177658
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5512022972106934,
      "learning_rate": 7.429523357337991e-05,
      "loss": 3.162,
      "step": 177659
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.396817922592163,
      "learning_rate": 7.42925388772716e-05,
      "loss": 2.8268,
      "step": 177660
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3660318851470947,
      "learning_rate": 7.428984422312667e-05,
      "loss": 3.0068,
      "step": 177661
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9830660820007324,
      "learning_rate": 7.428714961094549e-05,
      "loss": 3.3602,
      "step": 177662
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9253599643707275,
      "learning_rate": 7.428445504072863e-05,
      "loss": 3.1279,
      "step": 177663
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.712998867034912,
      "learning_rate": 7.428176051247644e-05,
      "loss": 2.9983,
      "step": 177664
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.925898313522339,
      "learning_rate": 7.427906602618961e-05,
      "loss": 2.6685,
      "step": 177665
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.759831190109253,
      "learning_rate": 7.427637158186848e-05,
      "loss": 3.2326,
      "step": 177666
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.161184072494507,
      "learning_rate": 7.427367717951373e-05,
      "loss": 2.8769,
      "step": 177667
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.402194023132324,
      "learning_rate": 7.427098281912576e-05,
      "loss": 3.13,
      "step": 177668
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5001490116119385,
      "learning_rate": 7.426828850070508e-05,
      "loss": 2.9821,
      "step": 177669
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.315009355545044,
      "learning_rate": 7.426559422425211e-05,
      "loss": 3.0939,
      "step": 177670
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5062010288238525,
      "learning_rate": 7.42628999897675e-05,
      "loss": 3.1162,
      "step": 177671
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.2898566722869873,
      "learning_rate": 7.426020579725158e-05,
      "loss": 2.7464,
      "step": 177672
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7031447887420654,
      "learning_rate": 7.425751164670506e-05,
      "loss": 3.0244,
      "step": 177673
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6361091136932373,
      "learning_rate": 7.425481753812832e-05,
      "loss": 2.8402,
      "step": 177674
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.563351631164551,
      "learning_rate": 7.425212347152186e-05,
      "loss": 2.9753,
      "step": 177675
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.849618911743164,
      "learning_rate": 7.424942944688612e-05,
      "loss": 3.0772,
      "step": 177676
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.868795156478882,
      "learning_rate": 7.424673546422174e-05,
      "loss": 3.355,
      "step": 177677
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.5586936473846436,
      "learning_rate": 7.424404152352908e-05,
      "loss": 2.7791,
      "step": 177678
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.802079439163208,
      "learning_rate": 7.424134762480879e-05,
      "loss": 2.9033,
      "step": 177679
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.683901786804199,
      "learning_rate": 7.423865376806132e-05,
      "loss": 2.8233,
      "step": 177680
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.531923770904541,
      "learning_rate": 7.423595995328713e-05,
      "loss": 3.0098,
      "step": 177681
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.5915732383728027,
      "learning_rate": 7.423326618048665e-05,
      "loss": 2.9559,
      "step": 177682
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.2929866313934326,
      "learning_rate": 7.423057244966055e-05,
      "loss": 3.0195,
      "step": 177683
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.748751401901245,
      "learning_rate": 7.422787876080918e-05,
      "loss": 2.9094,
      "step": 177684
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.12931489944458,
      "learning_rate": 7.422518511393316e-05,
      "loss": 3.2372,
      "step": 177685
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.460287094116211,
      "learning_rate": 7.422249150903297e-05,
      "loss": 2.9791,
      "step": 177686
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.900712251663208,
      "learning_rate": 7.4219797946109e-05,
      "loss": 2.911,
      "step": 177687
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.632134199142456,
      "learning_rate": 7.421710442516192e-05,
      "loss": 3.1289,
      "step": 177688
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7175164222717285,
      "learning_rate": 7.421441094619212e-05,
      "loss": 3.0686,
      "step": 177689
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3323895931243896,
      "learning_rate": 7.421171750920004e-05,
      "loss": 2.8487,
      "step": 177690
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4876773357391357,
      "learning_rate": 7.420902411418635e-05,
      "loss": 2.918,
      "step": 177691
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5789599418640137,
      "learning_rate": 7.420633076115135e-05,
      "loss": 2.948,
      "step": 177692
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7289719581604004,
      "learning_rate": 7.420363745009578e-05,
      "loss": 3.0035,
      "step": 177693
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.657874822616577,
      "learning_rate": 7.420094418102002e-05,
      "loss": 3.0765,
      "step": 177694
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.6507551670074463,
      "learning_rate": 7.419825095392455e-05,
      "loss": 2.9272,
      "step": 177695
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.426558017730713,
      "learning_rate": 7.419555776880982e-05,
      "loss": 2.7473,
      "step": 177696
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5902085304260254,
      "learning_rate": 7.419286462567645e-05,
      "loss": 2.8439,
      "step": 177697
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.031862735748291,
      "learning_rate": 7.419017152452483e-05,
      "loss": 3.1903,
      "step": 177698
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9591803550720215,
      "learning_rate": 7.418747846535559e-05,
      "loss": 2.8644,
      "step": 177699
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3823258876800537,
      "learning_rate": 7.418478544816919e-05,
      "loss": 2.8234,
      "step": 177700
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.553823232650757,
      "learning_rate": 7.418209247296599e-05,
      "loss": 2.6898,
      "step": 177701
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5291311740875244,
      "learning_rate": 7.41793995397467e-05,
      "loss": 3.036,
      "step": 177702
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.398437261581421,
      "learning_rate": 7.417670664851174e-05,
      "loss": 2.8591,
      "step": 177703
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7277162075042725,
      "learning_rate": 7.417401379926144e-05,
      "loss": 2.9293,
      "step": 177704
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.66802978515625,
      "learning_rate": 7.41713209919966e-05,
      "loss": 2.8447,
      "step": 177705
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.536393880844116,
      "learning_rate": 7.416862822671755e-05,
      "loss": 2.803,
      "step": 177706
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.742335319519043,
      "learning_rate": 7.416593550342475e-05,
      "loss": 2.9388,
      "step": 177707
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.785470962524414,
      "learning_rate": 7.416324282211882e-05,
      "loss": 2.9235,
      "step": 177708
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.365518569946289,
      "learning_rate": 7.416055018280014e-05,
      "loss": 2.9712,
      "step": 177709
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.9100708961486816,
      "learning_rate": 7.415785758546937e-05,
      "loss": 2.6865,
      "step": 177710
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.131149768829346,
      "learning_rate": 7.415516503012689e-05,
      "loss": 2.7831,
      "step": 177711
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.532085657119751,
      "learning_rate": 7.415247251677324e-05,
      "loss": 2.9017,
      "step": 177712
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5528931617736816,
      "learning_rate": 7.414978004540885e-05,
      "loss": 3.1605,
      "step": 177713
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.5279788970947266,
      "learning_rate": 7.414708761603433e-05,
      "loss": 2.9388,
      "step": 177714
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.4383327960968018,
      "learning_rate": 7.414439522865006e-05,
      "loss": 2.993,
      "step": 177715
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.136800765991211,
      "learning_rate": 7.414170288325669e-05,
      "loss": 2.8025,
      "step": 177716
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4089200496673584,
      "learning_rate": 7.413901057985464e-05,
      "loss": 2.8164,
      "step": 177717
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.460738182067871,
      "learning_rate": 7.413631831844443e-05,
      "loss": 3.0498,
      "step": 177718
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3687736988067627,
      "learning_rate": 7.413362609902643e-05,
      "loss": 2.7516,
      "step": 177719
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.794419765472412,
      "learning_rate": 7.413093392160135e-05,
      "loss": 2.9915,
      "step": 177720
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.781919002532959,
      "learning_rate": 7.41282417861695e-05,
      "loss": 2.9723,
      "step": 177721
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.891242027282715,
      "learning_rate": 7.412554969273156e-05,
      "loss": 2.8296,
      "step": 177722
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.125157117843628,
      "learning_rate": 7.412285764128796e-05,
      "loss": 2.864,
      "step": 177723
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.010114669799805,
      "learning_rate": 7.412016563183916e-05,
      "loss": 2.8299,
      "step": 177724
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7785799503326416,
      "learning_rate": 7.411747366438562e-05,
      "loss": 2.6979,
      "step": 177725
    },
    {
      "epoch": 2.31,
      "grad_norm": 4.366639137268066,
      "learning_rate": 7.411478173892797e-05,
      "loss": 2.8596,
      "step": 177726
    },
    {
      "epoch": 2.31,
      "grad_norm": 5.272943496704102,
      "learning_rate": 7.411208985546661e-05,
      "loss": 2.9356,
      "step": 177727
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3543553352355957,
      "learning_rate": 7.41093980140021e-05,
      "loss": 2.9732,
      "step": 177728
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.832702398300171,
      "learning_rate": 7.410670621453496e-05,
      "loss": 2.894,
      "step": 177729
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.5808358192443848,
      "learning_rate": 7.410401445706562e-05,
      "loss": 2.8662,
      "step": 177730
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.53462553024292,
      "learning_rate": 7.410132274159455e-05,
      "loss": 2.787,
      "step": 177731
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.743579387664795,
      "learning_rate": 7.409863106812237e-05,
      "loss": 3.0531,
      "step": 177732
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9304862022399902,
      "learning_rate": 7.409593943664943e-05,
      "loss": 2.9985,
      "step": 177733
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7997145652770996,
      "learning_rate": 7.409324784717643e-05,
      "loss": 3.0231,
      "step": 177734
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.841097831726074,
      "learning_rate": 7.409055629970373e-05,
      "loss": 3.0712,
      "step": 177735
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.855968713760376,
      "learning_rate": 7.40878647942319e-05,
      "loss": 2.6331,
      "step": 177736
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.362746238708496,
      "learning_rate": 7.408517333076128e-05,
      "loss": 2.9718,
      "step": 177737
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8788440227508545,
      "learning_rate": 7.408248190929258e-05,
      "loss": 2.9766,
      "step": 177738
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5956151485443115,
      "learning_rate": 7.407979052982612e-05,
      "loss": 2.996,
      "step": 177739
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.9613640308380127,
      "learning_rate": 7.40770991923626e-05,
      "loss": 3.0106,
      "step": 177740
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7679684162139893,
      "learning_rate": 7.407440789690241e-05,
      "loss": 2.8098,
      "step": 177741
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4435055255889893,
      "learning_rate": 7.407171664344602e-05,
      "loss": 2.9047,
      "step": 177742
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.063720464706421,
      "learning_rate": 7.40690254319939e-05,
      "loss": 2.96,
      "step": 177743
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8700356483459473,
      "learning_rate": 7.406633426254671e-05,
      "loss": 2.8971,
      "step": 177744
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9771101474761963,
      "learning_rate": 7.406364313510475e-05,
      "loss": 2.7664,
      "step": 177745
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4034104347229004,
      "learning_rate": 7.406095204966873e-05,
      "loss": 2.8749,
      "step": 177746
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4359326362609863,
      "learning_rate": 7.4058261006239e-05,
      "loss": 3.1545,
      "step": 177747
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3783066272735596,
      "learning_rate": 7.405557000481614e-05,
      "loss": 3.1507,
      "step": 177748
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5299108028411865,
      "learning_rate": 7.405287904540055e-05,
      "loss": 2.9659,
      "step": 177749
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.2969424724578857,
      "learning_rate": 7.405018812799285e-05,
      "loss": 2.8307,
      "step": 177750
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.551809310913086,
      "learning_rate": 7.404749725259338e-05,
      "loss": 2.9922,
      "step": 177751
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.179706335067749,
      "learning_rate": 7.404480641920287e-05,
      "loss": 2.9467,
      "step": 177752
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.6399991512298584,
      "learning_rate": 7.404211562782167e-05,
      "loss": 2.6941,
      "step": 177753
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.882376194000244,
      "learning_rate": 7.403942487845032e-05,
      "loss": 2.9763,
      "step": 177754
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.5803418159484863,
      "learning_rate": 7.403673417108919e-05,
      "loss": 3.1465,
      "step": 177755
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9012980461120605,
      "learning_rate": 7.403404350573904e-05,
      "loss": 3.1059,
      "step": 177756
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7354352474212646,
      "learning_rate": 7.403135288240011e-05,
      "loss": 3.0707,
      "step": 177757
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.74637770652771,
      "learning_rate": 7.40286623010731e-05,
      "loss": 3.0133,
      "step": 177758
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7162134647369385,
      "learning_rate": 7.402597176175833e-05,
      "loss": 2.8468,
      "step": 177759
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.391359806060791,
      "learning_rate": 7.402328126445658e-05,
      "loss": 2.9637,
      "step": 177760
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.494377851486206,
      "learning_rate": 7.402059080916799e-05,
      "loss": 3.1253,
      "step": 177761
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7098710536956787,
      "learning_rate": 7.401790039589336e-05,
      "loss": 3.0242,
      "step": 177762
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.8980154991149902,
      "learning_rate": 7.401521002463295e-05,
      "loss": 3.0473,
      "step": 177763
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.501399040222168,
      "learning_rate": 7.401251969538744e-05,
      "loss": 3.071,
      "step": 177764
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.83634614944458,
      "learning_rate": 7.400982940815722e-05,
      "loss": 2.9662,
      "step": 177765
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.72705340385437,
      "learning_rate": 7.400713916294302e-05,
      "loss": 2.8289,
      "step": 177766
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.727876901626587,
      "learning_rate": 7.400444895974494e-05,
      "loss": 2.9875,
      "step": 177767
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.5282275676727295,
      "learning_rate": 7.400175879856383e-05,
      "loss": 2.8847,
      "step": 177768
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.9952640533447266,
      "learning_rate": 7.399906867939997e-05,
      "loss": 2.943,
      "step": 177769
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.4608473777770996,
      "learning_rate": 7.399637860225399e-05,
      "loss": 3.1823,
      "step": 177770
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4867963790893555,
      "learning_rate": 7.399368856712631e-05,
      "loss": 2.9733,
      "step": 177771
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7247061729431152,
      "learning_rate": 7.399099857401754e-05,
      "loss": 2.8028,
      "step": 177772
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.621837854385376,
      "learning_rate": 7.39883086229281e-05,
      "loss": 2.8929,
      "step": 177773
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7764358520507812,
      "learning_rate": 7.398561871385853e-05,
      "loss": 2.8908,
      "step": 177774
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.6075732707977295,
      "learning_rate": 7.398292884680918e-05,
      "loss": 2.9633,
      "step": 177775
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.4198429584503174,
      "learning_rate": 7.398023902178077e-05,
      "loss": 3.0215,
      "step": 177776
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.992218017578125,
      "learning_rate": 7.397754923877362e-05,
      "loss": 2.82,
      "step": 177777
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.790971040725708,
      "learning_rate": 7.397485949778838e-05,
      "loss": 2.8763,
      "step": 177778
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.709047317504883,
      "learning_rate": 7.397216979882549e-05,
      "loss": 2.8764,
      "step": 177779
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.640442371368408,
      "learning_rate": 7.396948014188545e-05,
      "loss": 3.022,
      "step": 177780
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.329528570175171,
      "learning_rate": 7.396679052696864e-05,
      "loss": 3.0251,
      "step": 177781
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.451920986175537,
      "learning_rate": 7.396410095407577e-05,
      "loss": 2.8393,
      "step": 177782
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.765580177307129,
      "learning_rate": 7.396141142320715e-05,
      "loss": 3.0716,
      "step": 177783
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.29410719871521,
      "learning_rate": 7.395872193436349e-05,
      "loss": 2.8113,
      "step": 177784
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.7583577632904053,
      "learning_rate": 7.395603248754513e-05,
      "loss": 2.9101,
      "step": 177785
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.3394320011138916,
      "learning_rate": 7.395334308275253e-05,
      "loss": 2.881,
      "step": 177786
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.799221992492676,
      "learning_rate": 7.395065371998635e-05,
      "loss": 2.6458,
      "step": 177787
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.1818222999572754,
      "learning_rate": 7.394796439924705e-05,
      "loss": 2.9837,
      "step": 177788
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.472391128540039,
      "learning_rate": 7.394527512053497e-05,
      "loss": 2.7538,
      "step": 177789
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.0747809410095215,
      "learning_rate": 7.394258588385082e-05,
      "loss": 2.9092,
      "step": 177790
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.33516788482666,
      "learning_rate": 7.3939896689195e-05,
      "loss": 2.8887,
      "step": 177791
    },
    {
      "epoch": 2.31,
      "grad_norm": 2.850329637527466,
      "learning_rate": 7.393720753656795e-05,
      "loss": 3.0111,
      "step": 177792
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.1002612113952637,
      "learning_rate": 7.393451842597033e-05,
      "loss": 2.8806,
      "step": 177793
    },
    {
      "epoch": 2.31,
      "grad_norm": 3.1808648109436035,
      "learning_rate": 7.393182935740255e-05,
      "loss": 3.0089,
      "step": 177794
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.926119327545166,
      "learning_rate": 7.392914033086503e-05,
      "loss": 2.9111,
      "step": 177795
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6022725105285645,
      "learning_rate": 7.392645134635841e-05,
      "loss": 3.0236,
      "step": 177796
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9144935607910156,
      "learning_rate": 7.392376240388315e-05,
      "loss": 2.8379,
      "step": 177797
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.400754690170288,
      "learning_rate": 7.392107350343965e-05,
      "loss": 2.9171,
      "step": 177798
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.179830551147461,
      "learning_rate": 7.391838464502856e-05,
      "loss": 3.0226,
      "step": 177799
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.135353088378906,
      "learning_rate": 7.391569582865022e-05,
      "loss": 3.1279,
      "step": 177800
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4489128589630127,
      "learning_rate": 7.391300705430534e-05,
      "loss": 2.6491,
      "step": 177801
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.543792247772217,
      "learning_rate": 7.391031832199428e-05,
      "loss": 3.0401,
      "step": 177802
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.5508670806884766,
      "learning_rate": 7.390762963171753e-05,
      "loss": 3.1395,
      "step": 177803
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8345813751220703,
      "learning_rate": 7.390494098347558e-05,
      "loss": 3.1709,
      "step": 177804
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.5408687591552734,
      "learning_rate": 7.390225237726901e-05,
      "loss": 3.0119,
      "step": 177805
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.080496788024902,
      "learning_rate": 7.389956381309821e-05,
      "loss": 2.9846,
      "step": 177806
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.083846569061279,
      "learning_rate": 7.389687529096387e-05,
      "loss": 3.1505,
      "step": 177807
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6847710609436035,
      "learning_rate": 7.389418681086632e-05,
      "loss": 2.905,
      "step": 177808
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.7539777755737305,
      "learning_rate": 7.389149837280615e-05,
      "loss": 3.1084,
      "step": 177809
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8006253242492676,
      "learning_rate": 7.388880997678372e-05,
      "loss": 2.8371,
      "step": 177810
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.59287428855896,
      "learning_rate": 7.388612162279969e-05,
      "loss": 2.8062,
      "step": 177811
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.8585734367370605,
      "learning_rate": 7.388343331085444e-05,
      "loss": 3.1619,
      "step": 177812
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4762001037597656,
      "learning_rate": 7.388074504094859e-05,
      "loss": 2.8617,
      "step": 177813
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.637089967727661,
      "learning_rate": 7.387805681308261e-05,
      "loss": 2.9368,
      "step": 177814
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.437077760696411,
      "learning_rate": 7.387536862725694e-05,
      "loss": 3.1382,
      "step": 177815
    },
    {
      "epoch": 2.32,
      "grad_norm": 5.467797756195068,
      "learning_rate": 7.387268048347203e-05,
      "loss": 2.8806,
      "step": 177816
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.183363914489746,
      "learning_rate": 7.386999238172854e-05,
      "loss": 3.1132,
      "step": 177817
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3716742992401123,
      "learning_rate": 7.386730432202681e-05,
      "loss": 3.051,
      "step": 177818
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.718938112258911,
      "learning_rate": 7.386461630436748e-05,
      "loss": 2.9813,
      "step": 177819
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8626246452331543,
      "learning_rate": 7.386192832875099e-05,
      "loss": 3.0701,
      "step": 177820
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4640276432037354,
      "learning_rate": 7.385924039517787e-05,
      "loss": 2.7475,
      "step": 177821
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.981534957885742,
      "learning_rate": 7.38565525036485e-05,
      "loss": 3.0158,
      "step": 177822
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.888360023498535,
      "learning_rate": 7.385386465416351e-05,
      "loss": 2.9907,
      "step": 177823
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4910531044006348,
      "learning_rate": 7.38511768467233e-05,
      "loss": 3.0245,
      "step": 177824
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0772078037261963,
      "learning_rate": 7.384848908132851e-05,
      "loss": 3.1915,
      "step": 177825
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6628432273864746,
      "learning_rate": 7.384580135797943e-05,
      "loss": 2.9482,
      "step": 177826
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.066138505935669,
      "learning_rate": 7.384311367667691e-05,
      "loss": 3.1653,
      "step": 177827
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5319857597351074,
      "learning_rate": 7.384042603742102e-05,
      "loss": 3.0017,
      "step": 177828
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5526928901672363,
      "learning_rate": 7.383773844021255e-05,
      "loss": 2.9661,
      "step": 177829
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.568030834197998,
      "learning_rate": 7.383505088505186e-05,
      "loss": 2.6872,
      "step": 177830
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.765697479248047,
      "learning_rate": 7.383236337193955e-05,
      "loss": 2.89,
      "step": 177831
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6396822929382324,
      "learning_rate": 7.382967590087601e-05,
      "loss": 3.0335,
      "step": 177832
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.21112322807312,
      "learning_rate": 7.382698847186196e-05,
      "loss": 2.8485,
      "step": 177833
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.68978214263916,
      "learning_rate": 7.382430108489759e-05,
      "loss": 2.9272,
      "step": 177834
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.252962827682495,
      "learning_rate": 7.382161373998364e-05,
      "loss": 3.0513,
      "step": 177835
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0387649536132812,
      "learning_rate": 7.38189264371204e-05,
      "loss": 3.0445,
      "step": 177836
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.134706735610962,
      "learning_rate": 7.381623917630859e-05,
      "loss": 3.0176,
      "step": 177837
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.854665517807007,
      "learning_rate": 7.381355195754855e-05,
      "loss": 2.7474,
      "step": 177838
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.544691324234009,
      "learning_rate": 7.3810864780841e-05,
      "loss": 3.1067,
      "step": 177839
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9809532165527344,
      "learning_rate": 7.380817764618607e-05,
      "loss": 2.9053,
      "step": 177840
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5750091075897217,
      "learning_rate": 7.380549055358461e-05,
      "loss": 2.6143,
      "step": 177841
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.399031400680542,
      "learning_rate": 7.380280350303687e-05,
      "loss": 2.8874,
      "step": 177842
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.545395851135254,
      "learning_rate": 7.380011649454354e-05,
      "loss": 2.6927,
      "step": 177843
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7827963829040527,
      "learning_rate": 7.379742952810496e-05,
      "loss": 2.9094,
      "step": 177844
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.409532308578491,
      "learning_rate": 7.379474260372187e-05,
      "loss": 3.0623,
      "step": 177845
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.096864938735962,
      "learning_rate": 7.379205572139443e-05,
      "loss": 2.7931,
      "step": 177846
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.732835054397583,
      "learning_rate": 7.378936888112344e-05,
      "loss": 2.8724,
      "step": 177847
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.211890459060669,
      "learning_rate": 7.378668208290916e-05,
      "loss": 3.0895,
      "step": 177848
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.822659969329834,
      "learning_rate": 7.37839953267523e-05,
      "loss": 3.0134,
      "step": 177849
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.833148241043091,
      "learning_rate": 7.378130861265316e-05,
      "loss": 3.0081,
      "step": 177850
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.550544261932373,
      "learning_rate": 7.377862194061256e-05,
      "loss": 3.2831,
      "step": 177851
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3288474082946777,
      "learning_rate": 7.377593531063057e-05,
      "loss": 3.0009,
      "step": 177852
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.3336119651794434,
      "learning_rate": 7.377324872270801e-05,
      "loss": 2.9754,
      "step": 177853
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.3932106494903564,
      "learning_rate": 7.37705621768452e-05,
      "loss": 2.7813,
      "step": 177854
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5708110332489014,
      "learning_rate": 7.376787567304279e-05,
      "loss": 2.8461,
      "step": 177855
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.433155059814453,
      "learning_rate": 7.37651892113011e-05,
      "loss": 3.1436,
      "step": 177856
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.582580804824829,
      "learning_rate": 7.376250279162091e-05,
      "loss": 2.9875,
      "step": 177857
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.856637716293335,
      "learning_rate": 7.37598164140024e-05,
      "loss": 2.9992,
      "step": 177858
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.506366729736328,
      "learning_rate": 7.375713007844626e-05,
      "loss": 2.793,
      "step": 177859
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.6075291633605957,
      "learning_rate": 7.375444378495285e-05,
      "loss": 3.0612,
      "step": 177860
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.667231798171997,
      "learning_rate": 7.375175753352285e-05,
      "loss": 3.0401,
      "step": 177861
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.39801025390625,
      "learning_rate": 7.374907132415661e-05,
      "loss": 2.9207,
      "step": 177862
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.71501088142395,
      "learning_rate": 7.374638515685474e-05,
      "loss": 2.71,
      "step": 177863
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2114250659942627,
      "learning_rate": 7.374369903161771e-05,
      "loss": 2.8826,
      "step": 177864
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0492379665374756,
      "learning_rate": 7.374101294844598e-05,
      "loss": 2.9157,
      "step": 177865
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4773130416870117,
      "learning_rate": 7.373832690734002e-05,
      "loss": 2.7914,
      "step": 177866
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.285542964935303,
      "learning_rate": 7.373564090830043e-05,
      "loss": 2.8885,
      "step": 177867
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.552412986755371,
      "learning_rate": 7.373295495132756e-05,
      "loss": 3.1321,
      "step": 177868
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.464720726013184,
      "learning_rate": 7.37302690364221e-05,
      "loss": 2.8804,
      "step": 177869
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.764185905456543,
      "learning_rate": 7.372758316358447e-05,
      "loss": 3.133,
      "step": 177870
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.4594480991363525,
      "learning_rate": 7.372489733281504e-05,
      "loss": 2.7671,
      "step": 177871
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.418192148208618,
      "learning_rate": 7.372221154411454e-05,
      "loss": 3.0729,
      "step": 177872
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7930490970611572,
      "learning_rate": 7.371952579748334e-05,
      "loss": 3.027,
      "step": 177873
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.571399688720703,
      "learning_rate": 7.371684009292187e-05,
      "loss": 2.7614,
      "step": 177874
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4345474243164062,
      "learning_rate": 7.371415443043077e-05,
      "loss": 3.0359,
      "step": 177875
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.277639865875244,
      "learning_rate": 7.371146881001052e-05,
      "loss": 2.8699,
      "step": 177876
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4770898818969727,
      "learning_rate": 7.370878323166149e-05,
      "loss": 2.8593,
      "step": 177877
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.913792610168457,
      "learning_rate": 7.370609769538432e-05,
      "loss": 3.0292,
      "step": 177878
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.427468776702881,
      "learning_rate": 7.370341220117948e-05,
      "loss": 3.1306,
      "step": 177879
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6299641132354736,
      "learning_rate": 7.370072674904735e-05,
      "loss": 3.2033,
      "step": 177880
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.3571698665618896,
      "learning_rate": 7.369804133898863e-05,
      "loss": 3.0155,
      "step": 177881
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5109496116638184,
      "learning_rate": 7.369535597100371e-05,
      "loss": 2.8493,
      "step": 177882
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.643388271331787,
      "learning_rate": 7.3692670645093e-05,
      "loss": 2.671,
      "step": 177883
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.475470542907715,
      "learning_rate": 7.368998536125718e-05,
      "loss": 2.7864,
      "step": 177884
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.575171947479248,
      "learning_rate": 7.368730011949657e-05,
      "loss": 2.9415,
      "step": 177885
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.9971792697906494,
      "learning_rate": 7.368461491981187e-05,
      "loss": 2.8532,
      "step": 177886
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9219517707824707,
      "learning_rate": 7.368192976220347e-05,
      "loss": 2.9705,
      "step": 177887
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5598859786987305,
      "learning_rate": 7.367924464667188e-05,
      "loss": 2.9548,
      "step": 177888
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.66161847114563,
      "learning_rate": 7.367655957321748e-05,
      "loss": 3.0299,
      "step": 177889
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5701091289520264,
      "learning_rate": 7.367387454184097e-05,
      "loss": 2.8566,
      "step": 177890
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.1304054260253906,
      "learning_rate": 7.367118955254268e-05,
      "loss": 2.78,
      "step": 177891
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.907545566558838,
      "learning_rate": 7.366850460532329e-05,
      "loss": 2.8873,
      "step": 177892
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.851944923400879,
      "learning_rate": 7.366581970018308e-05,
      "loss": 2.9307,
      "step": 177893
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.23016357421875,
      "learning_rate": 7.366313483712286e-05,
      "loss": 2.6643,
      "step": 177894
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0742976665496826,
      "learning_rate": 7.366045001614275e-05,
      "loss": 3.0871,
      "step": 177895
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.4916763305664062,
      "learning_rate": 7.365776523724353e-05,
      "loss": 2.8662,
      "step": 177896
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.4037389755249023,
      "learning_rate": 7.365508050042554e-05,
      "loss": 2.7496,
      "step": 177897
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9571492671966553,
      "learning_rate": 7.365239580568938e-05,
      "loss": 2.7614,
      "step": 177898
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3594367504119873,
      "learning_rate": 7.364971115303547e-05,
      "loss": 2.7486,
      "step": 177899
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.084399938583374,
      "learning_rate": 7.364702654246448e-05,
      "loss": 3.0396,
      "step": 177900
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.643118143081665,
      "learning_rate": 7.364434197397664e-05,
      "loss": 3.0745,
      "step": 177901
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.6325905323028564,
      "learning_rate": 7.364165744757267e-05,
      "loss": 2.9144,
      "step": 177902
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.3005194664001465,
      "learning_rate": 7.36389729632529e-05,
      "loss": 2.8888,
      "step": 177903
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.411803722381592,
      "learning_rate": 7.363628852101799e-05,
      "loss": 2.9456,
      "step": 177904
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7126994132995605,
      "learning_rate": 7.363360412086833e-05,
      "loss": 3.2151,
      "step": 177905
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.566749334335327,
      "learning_rate": 7.36309197628046e-05,
      "loss": 3.1381,
      "step": 177906
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3817200660705566,
      "learning_rate": 7.362823544682697e-05,
      "loss": 2.8877,
      "step": 177907
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.846805095672607,
      "learning_rate": 7.36255511729362e-05,
      "loss": 3.0731,
      "step": 177908
    },
    {
      "epoch": 2.32,
      "grad_norm": 6.3243231773376465,
      "learning_rate": 7.362286694113265e-05,
      "loss": 2.7032,
      "step": 177909
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.157562494277954,
      "learning_rate": 7.362018275141698e-05,
      "loss": 2.9068,
      "step": 177910
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.720940113067627,
      "learning_rate": 7.361749860378948e-05,
      "loss": 2.997,
      "step": 177911
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5716097354888916,
      "learning_rate": 7.361481449825091e-05,
      "loss": 3.0689,
      "step": 177912
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.511734962463379,
      "learning_rate": 7.361213043480148e-05,
      "loss": 2.8548,
      "step": 177913
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.5554697513580322,
      "learning_rate": 7.360944641344192e-05,
      "loss": 2.9415,
      "step": 177914
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.91969633102417,
      "learning_rate": 7.360676243417255e-05,
      "loss": 2.9596,
      "step": 177915
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.44067645072937,
      "learning_rate": 7.360407849699404e-05,
      "loss": 2.991,
      "step": 177916
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6073193550109863,
      "learning_rate": 7.360139460190671e-05,
      "loss": 2.5544,
      "step": 177917
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.3787617683410645,
      "learning_rate": 7.359871074891133e-05,
      "loss": 2.8128,
      "step": 177918
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.160198450088501,
      "learning_rate": 7.359602693800805e-05,
      "loss": 3.1258,
      "step": 177919
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.8064379692077637,
      "learning_rate": 7.359334316919763e-05,
      "loss": 2.8214,
      "step": 177920
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.523545503616333,
      "learning_rate": 7.35906594424804e-05,
      "loss": 2.9625,
      "step": 177921
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.310363531112671,
      "learning_rate": 7.358797575785704e-05,
      "loss": 2.9457,
      "step": 177922
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.53045916557312,
      "learning_rate": 7.358529211532784e-05,
      "loss": 2.9031,
      "step": 177923
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.264734983444214,
      "learning_rate": 7.358260851489357e-05,
      "loss": 2.9757,
      "step": 177924
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.804965019226074,
      "learning_rate": 7.357992495655443e-05,
      "loss": 3.01,
      "step": 177925
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5288264751434326,
      "learning_rate": 7.357724144031111e-05,
      "loss": 3.1089,
      "step": 177926
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7383763790130615,
      "learning_rate": 7.357455796616398e-05,
      "loss": 3.1937,
      "step": 177927
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.41202712059021,
      "learning_rate": 7.357187453411372e-05,
      "loss": 2.8257,
      "step": 177928
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4228897094726562,
      "learning_rate": 7.356919114416061e-05,
      "loss": 3.0032,
      "step": 177929
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9715256690979004,
      "learning_rate": 7.356650779630543e-05,
      "loss": 3.0096,
      "step": 177930
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0267162322998047,
      "learning_rate": 7.356382449054837e-05,
      "loss": 2.8916,
      "step": 177931
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.470524311065674,
      "learning_rate": 7.356114122689014e-05,
      "loss": 2.9879,
      "step": 177932
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4926669597625732,
      "learning_rate": 7.355845800533107e-05,
      "loss": 3.0116,
      "step": 177933
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7955124378204346,
      "learning_rate": 7.355577482587185e-05,
      "loss": 2.8674,
      "step": 177934
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.731945514678955,
      "learning_rate": 7.355309168851279e-05,
      "loss": 3.0578,
      "step": 177935
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8211169242858887,
      "learning_rate": 7.355040859325469e-05,
      "loss": 2.9609,
      "step": 177936
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6565186977386475,
      "learning_rate": 7.354772554009764e-05,
      "loss": 3.168,
      "step": 177937
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.207815170288086,
      "learning_rate": 7.354504252904244e-05,
      "loss": 3.0728,
      "step": 177938
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.63116717338562,
      "learning_rate": 7.35423595600894e-05,
      "loss": 3.0103,
      "step": 177939
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.555974006652832,
      "learning_rate": 7.353967663323919e-05,
      "loss": 2.845,
      "step": 177940
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0293378829956055,
      "learning_rate": 7.353699374849215e-05,
      "loss": 2.8926,
      "step": 177941
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8651092052459717,
      "learning_rate": 7.353431090584904e-05,
      "loss": 2.8962,
      "step": 177942
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4946224689483643,
      "learning_rate": 7.353162810530999e-05,
      "loss": 2.897,
      "step": 177943
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.969606876373291,
      "learning_rate": 7.352894534687579e-05,
      "loss": 2.9015,
      "step": 177944
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9013357162475586,
      "learning_rate": 7.352626263054675e-05,
      "loss": 2.9891,
      "step": 177945
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8427088260650635,
      "learning_rate": 7.35235799563235e-05,
      "loss": 3.1083,
      "step": 177946
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6540870666503906,
      "learning_rate": 7.352089732420644e-05,
      "loss": 2.8274,
      "step": 177947
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7231438159942627,
      "learning_rate": 7.35182147341962e-05,
      "loss": 2.804,
      "step": 177948
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.648165464401245,
      "learning_rate": 7.351553218629319e-05,
      "loss": 3.2376,
      "step": 177949
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5590755939483643,
      "learning_rate": 7.35128496804979e-05,
      "loss": 2.9617,
      "step": 177950
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.112488031387329,
      "learning_rate": 7.351016721681078e-05,
      "loss": 2.6991,
      "step": 177951
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7477211952209473,
      "learning_rate": 7.35074847952325e-05,
      "loss": 2.8307,
      "step": 177952
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.609978675842285,
      "learning_rate": 7.350480241576336e-05,
      "loss": 2.8857,
      "step": 177953
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4283201694488525,
      "learning_rate": 7.350212007840402e-05,
      "loss": 2.918,
      "step": 177954
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6686437129974365,
      "learning_rate": 7.349943778315491e-05,
      "loss": 2.9044,
      "step": 177955
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5310022830963135,
      "learning_rate": 7.349675553001648e-05,
      "loss": 2.8206,
      "step": 177956
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.345824956893921,
      "learning_rate": 7.349407331898933e-05,
      "loss": 3.1079,
      "step": 177957
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.1379852294921875,
      "learning_rate": 7.349139115007392e-05,
      "loss": 2.97,
      "step": 177958
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6286306381225586,
      "learning_rate": 7.348870902327064e-05,
      "loss": 2.9356,
      "step": 177959
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4662411212921143,
      "learning_rate": 7.34860269385802e-05,
      "loss": 2.8711,
      "step": 177960
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6767311096191406,
      "learning_rate": 7.348334489600297e-05,
      "loss": 2.9396,
      "step": 177961
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.055130958557129,
      "learning_rate": 7.348066289553936e-05,
      "loss": 2.8416,
      "step": 177962
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.208585500717163,
      "learning_rate": 7.347798093719005e-05,
      "loss": 2.7853,
      "step": 177963
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.385546922683716,
      "learning_rate": 7.34752990209555e-05,
      "loss": 3.1454,
      "step": 177964
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5185394287109375,
      "learning_rate": 7.347261714683604e-05,
      "loss": 2.9586,
      "step": 177965
    },
    {
      "epoch": 2.32,
      "grad_norm": 6.126802921295166,
      "learning_rate": 7.34699353148324e-05,
      "loss": 2.8834,
      "step": 177966
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.662548065185547,
      "learning_rate": 7.346725352494498e-05,
      "loss": 2.695,
      "step": 177967
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.2339468002319336,
      "learning_rate": 7.346457177717416e-05,
      "loss": 2.9918,
      "step": 177968
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6711602210998535,
      "learning_rate": 7.346189007152069e-05,
      "loss": 2.5911,
      "step": 177969
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9555981159210205,
      "learning_rate": 7.345920840798479e-05,
      "loss": 2.8282,
      "step": 177970
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.335582733154297,
      "learning_rate": 7.345652678656724e-05,
      "loss": 3.0216,
      "step": 177971
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.69826340675354,
      "learning_rate": 7.345384520726835e-05,
      "loss": 2.8629,
      "step": 177972
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.834498405456543,
      "learning_rate": 7.345116367008868e-05,
      "loss": 3.0893,
      "step": 177973
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.769531726837158,
      "learning_rate": 7.344848217502866e-05,
      "loss": 3.0373,
      "step": 177974
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9145584106445312,
      "learning_rate": 7.34458007220889e-05,
      "loss": 2.6863,
      "step": 177975
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5188703536987305,
      "learning_rate": 7.344311931126975e-05,
      "loss": 3.0069,
      "step": 177976
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.7094953060150146,
      "learning_rate": 7.344043794257191e-05,
      "loss": 2.8462,
      "step": 177977
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.441299915313721,
      "learning_rate": 7.343775661599574e-05,
      "loss": 2.9339,
      "step": 177978
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.521043300628662,
      "learning_rate": 7.343507533154178e-05,
      "loss": 2.7465,
      "step": 177979
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.643854856491089,
      "learning_rate": 7.343239408921044e-05,
      "loss": 2.9387,
      "step": 177980
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7826855182647705,
      "learning_rate": 7.342971288900235e-05,
      "loss": 3.0873,
      "step": 177981
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4304566383361816,
      "learning_rate": 7.34270317309179e-05,
      "loss": 2.8538,
      "step": 177982
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.4001026153564453,
      "learning_rate": 7.342435061495773e-05,
      "loss": 2.7405,
      "step": 177983
    },
    {
      "epoch": 2.32,
      "grad_norm": 5.160462379455566,
      "learning_rate": 7.342166954112216e-05,
      "loss": 2.7586,
      "step": 177984
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.34953498840332,
      "learning_rate": 7.341898850941195e-05,
      "loss": 3.0749,
      "step": 177985
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.385741710662842,
      "learning_rate": 7.341630751982725e-05,
      "loss": 3.023,
      "step": 177986
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2474863529205322,
      "learning_rate": 7.341362657236881e-05,
      "loss": 3.2328,
      "step": 177987
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9515540599823,
      "learning_rate": 7.341094566703698e-05,
      "loss": 2.7678,
      "step": 177988
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.6741106510162354,
      "learning_rate": 7.340826480383241e-05,
      "loss": 2.8775,
      "step": 177989
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2732841968536377,
      "learning_rate": 7.340558398275546e-05,
      "loss": 2.8929,
      "step": 177990
    },
    {
      "epoch": 2.32,
      "grad_norm": 5.15459680557251,
      "learning_rate": 7.340290320380683e-05,
      "loss": 2.9144,
      "step": 177991
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.1751291751861572,
      "learning_rate": 7.340022246698674e-05,
      "loss": 3.1409,
      "step": 177992
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.1581521034240723,
      "learning_rate": 7.339754177229588e-05,
      "loss": 3.1113,
      "step": 177993
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.967184066772461,
      "learning_rate": 7.339486111973461e-05,
      "loss": 2.8741,
      "step": 177994
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5289323329925537,
      "learning_rate": 7.339218050930362e-05,
      "loss": 3.0739,
      "step": 177995
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.1867575645446777,
      "learning_rate": 7.33894999410032e-05,
      "loss": 2.8737,
      "step": 177996
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.343052387237549,
      "learning_rate": 7.338681941483413e-05,
      "loss": 2.8072,
      "step": 177997
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.694977045059204,
      "learning_rate": 7.338413893079653e-05,
      "loss": 3.0743,
      "step": 177998
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.630800247192383,
      "learning_rate": 7.338145848889123e-05,
      "loss": 2.936,
      "step": 177999
    },
    {
      "epoch": 2.32,
      "grad_norm": 5.0797858238220215,
      "learning_rate": 7.337877808911846e-05,
      "loss": 2.9332,
      "step": 178000
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.194458484649658,
      "learning_rate": 7.3376097731479e-05,
      "loss": 2.9061,
      "step": 178001
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.322690010070801,
      "learning_rate": 7.337341741597305e-05,
      "loss": 2.9116,
      "step": 178002
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8334264755249023,
      "learning_rate": 7.337073714260146e-05,
      "loss": 2.7986,
      "step": 178003
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.282641410827637,
      "learning_rate": 7.336805691136435e-05,
      "loss": 2.7029,
      "step": 178004
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.8203043937683105,
      "learning_rate": 7.336537672226249e-05,
      "loss": 3.1108,
      "step": 178005
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.218247413635254,
      "learning_rate": 7.33626965752962e-05,
      "loss": 2.9549,
      "step": 178006
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6911613941192627,
      "learning_rate": 7.336001647046613e-05,
      "loss": 3.1775,
      "step": 178007
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.467168092727661,
      "learning_rate": 7.335733640777266e-05,
      "loss": 2.9953,
      "step": 178008
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.725992441177368,
      "learning_rate": 7.335465638721649e-05,
      "loss": 2.9283,
      "step": 178009
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.090853691101074,
      "learning_rate": 7.335197640879779e-05,
      "loss": 2.753,
      "step": 178010
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8118021488189697,
      "learning_rate": 7.33492964725173e-05,
      "loss": 3.1432,
      "step": 178011
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.606238603591919,
      "learning_rate": 7.33466165783754e-05,
      "loss": 3.0593,
      "step": 178012
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.325639009475708,
      "learning_rate": 7.334393672637273e-05,
      "loss": 2.8752,
      "step": 178013
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.477766275405884,
      "learning_rate": 7.334125691650962e-05,
      "loss": 2.8823,
      "step": 178014
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7281460762023926,
      "learning_rate": 7.333857714878679e-05,
      "loss": 3.0702,
      "step": 178015
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.572683811187744,
      "learning_rate": 7.333589742320446e-05,
      "loss": 2.904,
      "step": 178016
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.3191769123077393,
      "learning_rate": 7.333321773976335e-05,
      "loss": 3.0286,
      "step": 178017
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.473970890045166,
      "learning_rate": 7.333053809846377e-05,
      "loss": 2.9001,
      "step": 178018
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.268366813659668,
      "learning_rate": 7.33278584993064e-05,
      "loss": 2.9103,
      "step": 178019
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.290431499481201,
      "learning_rate": 7.33251789422916e-05,
      "loss": 2.9052,
      "step": 178020
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4395062923431396,
      "learning_rate": 7.332249942742007e-05,
      "loss": 2.8654,
      "step": 178021
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0068583488464355,
      "learning_rate": 7.331981995469201e-05,
      "loss": 2.7186,
      "step": 178022
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.522512674331665,
      "learning_rate": 7.331714052410818e-05,
      "loss": 2.9279,
      "step": 178023
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.805388927459717,
      "learning_rate": 7.331446113566886e-05,
      "loss": 2.6762,
      "step": 178024
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.020435810089111,
      "learning_rate": 7.331178178937472e-05,
      "loss": 2.8475,
      "step": 178025
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8296384811401367,
      "learning_rate": 7.330910248522618e-05,
      "loss": 2.9207,
      "step": 178026
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.503408670425415,
      "learning_rate": 7.330642322322379e-05,
      "loss": 2.8369,
      "step": 178027
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.4933364391326904,
      "learning_rate": 7.330374400336804e-05,
      "loss": 2.8556,
      "step": 178028
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6747615337371826,
      "learning_rate": 7.330106482565939e-05,
      "loss": 3.0869,
      "step": 178029
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.611586332321167,
      "learning_rate": 7.32983856900983e-05,
      "loss": 2.6468,
      "step": 178030
    },
    {
      "epoch": 2.32,
      "grad_norm": 5.562787055969238,
      "learning_rate": 7.329570659668535e-05,
      "loss": 3.0103,
      "step": 178031
    },
    {
      "epoch": 2.32,
      "grad_norm": 5.953810691833496,
      "learning_rate": 7.329302754542095e-05,
      "loss": 2.7733,
      "step": 178032
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6717703342437744,
      "learning_rate": 7.329034853630573e-05,
      "loss": 2.7607,
      "step": 178033
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7324745655059814,
      "learning_rate": 7.328766956934014e-05,
      "loss": 3.0123,
      "step": 178034
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9037556648254395,
      "learning_rate": 7.328499064452462e-05,
      "loss": 2.9282,
      "step": 178035
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.680891513824463,
      "learning_rate": 7.328231176185966e-05,
      "loss": 2.8371,
      "step": 178036
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.1103920936584473,
      "learning_rate": 7.327963292134584e-05,
      "loss": 3.0701,
      "step": 178037
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.120362281799316,
      "learning_rate": 7.327695412298355e-05,
      "loss": 2.8732,
      "step": 178038
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4617884159088135,
      "learning_rate": 7.327427536677343e-05,
      "loss": 2.5946,
      "step": 178039
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.395686149597168,
      "learning_rate": 7.327159665271589e-05,
      "loss": 3.0217,
      "step": 178040
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6047637462615967,
      "learning_rate": 7.326891798081147e-05,
      "loss": 2.8612,
      "step": 178041
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.6017251014709473,
      "learning_rate": 7.326623935106052e-05,
      "loss": 2.6612,
      "step": 178042
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.768015146255493,
      "learning_rate": 7.326356076346377e-05,
      "loss": 2.7827,
      "step": 178043
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.464299201965332,
      "learning_rate": 7.326088221802152e-05,
      "loss": 2.9817,
      "step": 178044
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.419194459915161,
      "learning_rate": 7.325820371473442e-05,
      "loss": 3.085,
      "step": 178045
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0974531173706055,
      "learning_rate": 7.32555252536029e-05,
      "loss": 2.9964,
      "step": 178046
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7859690189361572,
      "learning_rate": 7.325284683462739e-05,
      "loss": 3.0016,
      "step": 178047
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5435492992401123,
      "learning_rate": 7.325016845780853e-05,
      "loss": 3.009,
      "step": 178048
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.0256547927856445,
      "learning_rate": 7.324749012314674e-05,
      "loss": 3.0213,
      "step": 178049
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3695428371429443,
      "learning_rate": 7.324481183064243e-05,
      "loss": 2.8417,
      "step": 178050
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.325350284576416,
      "learning_rate": 7.32421335802963e-05,
      "loss": 2.9117,
      "step": 178051
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.644181489944458,
      "learning_rate": 7.323945537210871e-05,
      "loss": 3.0365,
      "step": 178052
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.244300127029419,
      "learning_rate": 7.323677720608012e-05,
      "loss": 2.9546,
      "step": 178053
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.410443067550659,
      "learning_rate": 7.323409908221118e-05,
      "loss": 2.8259,
      "step": 178054
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.329296112060547,
      "learning_rate": 7.323142100050219e-05,
      "loss": 2.7457,
      "step": 178055
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.121068239212036,
      "learning_rate": 7.322874296095386e-05,
      "loss": 2.9062,
      "step": 178056
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.68981671333313,
      "learning_rate": 7.322606496356662e-05,
      "loss": 3.0372,
      "step": 178057
    },
    {
      "epoch": 2.32,
      "grad_norm": 5.202738285064697,
      "learning_rate": 7.322338700834089e-05,
      "loss": 2.9622,
      "step": 178058
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9764063358306885,
      "learning_rate": 7.322070909527716e-05,
      "loss": 3.0974,
      "step": 178059
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.219409942626953,
      "learning_rate": 7.321803122437603e-05,
      "loss": 2.9706,
      "step": 178060
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7896432876586914,
      "learning_rate": 7.321535339563788e-05,
      "loss": 3.2207,
      "step": 178061
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9726781845092773,
      "learning_rate": 7.321267560906337e-05,
      "loss": 2.9502,
      "step": 178062
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5082385540008545,
      "learning_rate": 7.32099978646529e-05,
      "loss": 2.7017,
      "step": 178063
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2343246936798096,
      "learning_rate": 7.3207320162407e-05,
      "loss": 2.8933,
      "step": 178064
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.323774814605713,
      "learning_rate": 7.3204642502326e-05,
      "loss": 2.7523,
      "step": 178065
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.869154453277588,
      "learning_rate": 7.320196488441067e-05,
      "loss": 2.8649,
      "step": 178066
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.4424304962158203,
      "learning_rate": 7.319928730866125e-05,
      "loss": 3.0015,
      "step": 178067
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.554602861404419,
      "learning_rate": 7.319660977507846e-05,
      "loss": 3.0668,
      "step": 178068
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6449878215789795,
      "learning_rate": 7.319393228366265e-05,
      "loss": 3.0238,
      "step": 178069
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0553808212280273,
      "learning_rate": 7.31912548344145e-05,
      "loss": 3.0062,
      "step": 178070
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.700575351715088,
      "learning_rate": 7.318857742733422e-05,
      "loss": 2.8231,
      "step": 178071
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.749087333679199,
      "learning_rate": 7.318590006242255e-05,
      "loss": 2.9394,
      "step": 178072
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.311079502105713,
      "learning_rate": 7.318322273967984e-05,
      "loss": 3.17,
      "step": 178073
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.1356916427612305,
      "learning_rate": 7.318054545910672e-05,
      "loss": 3.1628,
      "step": 178074
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2516674995422363,
      "learning_rate": 7.317786822070351e-05,
      "loss": 2.8107,
      "step": 178075
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.527860641479492,
      "learning_rate": 7.3175191024471e-05,
      "loss": 3.1849,
      "step": 178076
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.857516288757324,
      "learning_rate": 7.317251387040937e-05,
      "loss": 3.0889,
      "step": 178077
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.215193271636963,
      "learning_rate": 7.31698367585193e-05,
      "loss": 3.0602,
      "step": 178078
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.659111738204956,
      "learning_rate": 7.316715968880116e-05,
      "loss": 2.6687,
      "step": 178079
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.599903106689453,
      "learning_rate": 7.316448266125559e-05,
      "loss": 2.8885,
      "step": 178080
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4573330879211426,
      "learning_rate": 7.316180567588298e-05,
      "loss": 2.9281,
      "step": 178081
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6616010665893555,
      "learning_rate": 7.3159128732684e-05,
      "loss": 3.0246,
      "step": 178082
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.500737428665161,
      "learning_rate": 7.31564518316589e-05,
      "loss": 3.0332,
      "step": 178083
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.691094160079956,
      "learning_rate": 7.315377497280835e-05,
      "loss": 2.846,
      "step": 178084
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.143621444702148,
      "learning_rate": 7.31510981561327e-05,
      "loss": 2.9427,
      "step": 178085
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0136075019836426,
      "learning_rate": 7.314842138163264e-05,
      "loss": 2.8919,
      "step": 178086
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8386666774749756,
      "learning_rate": 7.314574464930847e-05,
      "loss": 3.0591,
      "step": 178087
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.645134210586548,
      "learning_rate": 7.314306795916102e-05,
      "loss": 2.7333,
      "step": 178088
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.819347858428955,
      "learning_rate": 7.314039131119031e-05,
      "loss": 3.1951,
      "step": 178089
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.804025888442993,
      "learning_rate": 7.313771470539721e-05,
      "loss": 2.8235,
      "step": 178090
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.784118413925171,
      "learning_rate": 7.313503814178199e-05,
      "loss": 2.9736,
      "step": 178091
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.4909040927886963,
      "learning_rate": 7.313236162034535e-05,
      "loss": 2.7524,
      "step": 178092
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7347214221954346,
      "learning_rate": 7.312968514108759e-05,
      "loss": 2.9605,
      "step": 178093
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.549438238143921,
      "learning_rate": 7.312700870400939e-05,
      "loss": 3.0605,
      "step": 178094
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.681800603866577,
      "learning_rate": 7.312433230911119e-05,
      "loss": 2.9595,
      "step": 178095
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2472219467163086,
      "learning_rate": 7.31216559563934e-05,
      "loss": 3.2268,
      "step": 178096
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4595870971679688,
      "learning_rate": 7.311897964585656e-05,
      "loss": 3.053,
      "step": 178097
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.980651378631592,
      "learning_rate": 7.311630337750123e-05,
      "loss": 2.9844,
      "step": 178098
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.053943634033203,
      "learning_rate": 7.311362715132775e-05,
      "loss": 3.2404,
      "step": 178099
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4903485774993896,
      "learning_rate": 7.311095096733688e-05,
      "loss": 2.8259,
      "step": 178100
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8367552757263184,
      "learning_rate": 7.310827482552892e-05,
      "loss": 2.9907,
      "step": 178101
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7204172611236572,
      "learning_rate": 7.310559872590443e-05,
      "loss": 2.8388,
      "step": 178102
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.527778148651123,
      "learning_rate": 7.310292266846381e-05,
      "loss": 2.8694,
      "step": 178103
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.487276792526245,
      "learning_rate": 7.31002466532077e-05,
      "loss": 2.8592,
      "step": 178104
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4934937953948975,
      "learning_rate": 7.309757068013647e-05,
      "loss": 3.133,
      "step": 178105
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.667250871658325,
      "learning_rate": 7.30948947492508e-05,
      "loss": 2.814,
      "step": 178106
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.298107862472534,
      "learning_rate": 7.309221886055103e-05,
      "loss": 2.8298,
      "step": 178107
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7152743339538574,
      "learning_rate": 7.30895430140377e-05,
      "loss": 2.9783,
      "step": 178108
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.098702907562256,
      "learning_rate": 7.308686720971125e-05,
      "loss": 2.9458,
      "step": 178109
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8673653602600098,
      "learning_rate": 7.30841914475723e-05,
      "loss": 3.2502,
      "step": 178110
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.739950180053711,
      "learning_rate": 7.308151572762119e-05,
      "loss": 3.0021,
      "step": 178111
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8911473751068115,
      "learning_rate": 7.307884004985863e-05,
      "loss": 2.8682,
      "step": 178112
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6736817359924316,
      "learning_rate": 7.307616441428496e-05,
      "loss": 2.9711,
      "step": 178113
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7509870529174805,
      "learning_rate": 7.307348882090072e-05,
      "loss": 2.8495,
      "step": 178114
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.7146108150482178,
      "learning_rate": 7.307081326970636e-05,
      "loss": 2.8954,
      "step": 178115
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3014259338378906,
      "learning_rate": 7.306813776070243e-05,
      "loss": 2.7536,
      "step": 178116
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8931353092193604,
      "learning_rate": 7.306546229388937e-05,
      "loss": 2.8004,
      "step": 178117
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.729644536972046,
      "learning_rate": 7.306278686926784e-05,
      "loss": 2.9676,
      "step": 178118
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8392815589904785,
      "learning_rate": 7.306011148683818e-05,
      "loss": 2.6132,
      "step": 178119
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7800676822662354,
      "learning_rate": 7.305743614660096e-05,
      "loss": 2.972,
      "step": 178120
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5337071418762207,
      "learning_rate": 7.305476084855653e-05,
      "loss": 3.0214,
      "step": 178121
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6076509952545166,
      "learning_rate": 7.305208559270562e-05,
      "loss": 2.9026,
      "step": 178122
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.744990110397339,
      "learning_rate": 7.304941037904848e-05,
      "loss": 2.7865,
      "step": 178123
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.305006504058838,
      "learning_rate": 7.304673520758586e-05,
      "loss": 2.9725,
      "step": 178124
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3052690029144287,
      "learning_rate": 7.304406007831812e-05,
      "loss": 2.9964,
      "step": 178125
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.335813045501709,
      "learning_rate": 7.30413849912458e-05,
      "loss": 2.9316,
      "step": 178126
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.628854751586914,
      "learning_rate": 7.303870994636926e-05,
      "loss": 2.8134,
      "step": 178127
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3082144260406494,
      "learning_rate": 7.303603494368919e-05,
      "loss": 3.0073,
      "step": 178128
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5647075176239014,
      "learning_rate": 7.303335998320594e-05,
      "loss": 2.8366,
      "step": 178129
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.445071220397949,
      "learning_rate": 7.303068506492014e-05,
      "loss": 2.9919,
      "step": 178130
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.805875301361084,
      "learning_rate": 7.302801018883225e-05,
      "loss": 2.8448,
      "step": 178131
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.856973886489868,
      "learning_rate": 7.302533535494263e-05,
      "loss": 2.8684,
      "step": 178132
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6585488319396973,
      "learning_rate": 7.302266056325196e-05,
      "loss": 3.0993,
      "step": 178133
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.869499683380127,
      "learning_rate": 7.301998581376066e-05,
      "loss": 2.8818,
      "step": 178134
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.131563186645508,
      "learning_rate": 7.301731110646918e-05,
      "loss": 2.937,
      "step": 178135
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.677177667617798,
      "learning_rate": 7.30146364413781e-05,
      "loss": 2.7594,
      "step": 178136
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7971596717834473,
      "learning_rate": 7.301196181848794e-05,
      "loss": 3.086,
      "step": 178137
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8814077377319336,
      "learning_rate": 7.300928723779902e-05,
      "loss": 2.9511,
      "step": 178138
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.905916213989258,
      "learning_rate": 7.300661269931203e-05,
      "loss": 2.7526,
      "step": 178139
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.408142328262329,
      "learning_rate": 7.300393820302744e-05,
      "loss": 3.1408,
      "step": 178140
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5562918186187744,
      "learning_rate": 7.300126374894558e-05,
      "loss": 2.7496,
      "step": 178141
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7412631511688232,
      "learning_rate": 7.299858933706718e-05,
      "loss": 2.8652,
      "step": 178142
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.151585578918457,
      "learning_rate": 7.29959149673926e-05,
      "loss": 2.8973,
      "step": 178143
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2175629138946533,
      "learning_rate": 7.299324063992229e-05,
      "loss": 2.9369,
      "step": 178144
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8839733600616455,
      "learning_rate": 7.299056635465692e-05,
      "loss": 2.9306,
      "step": 178145
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.538515090942383,
      "learning_rate": 7.298789211159679e-05,
      "loss": 2.8448,
      "step": 178146
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4926295280456543,
      "learning_rate": 7.29852179107426e-05,
      "loss": 2.7792,
      "step": 178147
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.512120246887207,
      "learning_rate": 7.298254375209474e-05,
      "loss": 3.1053,
      "step": 178148
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.4006214141845703,
      "learning_rate": 7.29798696356537e-05,
      "loss": 2.6935,
      "step": 178149
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6796934604644775,
      "learning_rate": 7.297719556141988e-05,
      "loss": 2.9248,
      "step": 178150
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7463340759277344,
      "learning_rate": 7.297452152939401e-05,
      "loss": 2.789,
      "step": 178151
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.522019386291504,
      "learning_rate": 7.297184753957638e-05,
      "loss": 2.841,
      "step": 178152
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8197479248046875,
      "learning_rate": 7.296917359196761e-05,
      "loss": 2.9079,
      "step": 178153
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6638479232788086,
      "learning_rate": 7.296649968656812e-05,
      "loss": 2.8968,
      "step": 178154
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9510507583618164,
      "learning_rate": 7.296382582337858e-05,
      "loss": 3.0503,
      "step": 178155
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9969606399536133,
      "learning_rate": 7.296115200239919e-05,
      "loss": 3.0968,
      "step": 178156
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4998867511749268,
      "learning_rate": 7.29584782236307e-05,
      "loss": 3.03,
      "step": 178157
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.394111394882202,
      "learning_rate": 7.295580448707342e-05,
      "loss": 2.657,
      "step": 178158
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.428715229034424,
      "learning_rate": 7.295313079272803e-05,
      "loss": 2.9201,
      "step": 178159
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.643343925476074,
      "learning_rate": 7.295045714059483e-05,
      "loss": 2.7815,
      "step": 178160
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.925527572631836,
      "learning_rate": 7.294778353067454e-05,
      "loss": 2.8719,
      "step": 178161
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6684463024139404,
      "learning_rate": 7.294510996296752e-05,
      "loss": 3.0825,
      "step": 178162
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5401124954223633,
      "learning_rate": 7.294243643747433e-05,
      "loss": 2.9881,
      "step": 178163
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.951005697250366,
      "learning_rate": 7.29397629541953e-05,
      "loss": 3.0767,
      "step": 178164
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.644376754760742,
      "learning_rate": 7.293708951313117e-05,
      "loss": 3.023,
      "step": 178165
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.1878042221069336,
      "learning_rate": 7.293441611428221e-05,
      "loss": 2.9767,
      "step": 178166
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.346043348312378,
      "learning_rate": 7.293174275764915e-05,
      "loss": 2.9176,
      "step": 178167
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.9698476791381836,
      "learning_rate": 7.292906944323231e-05,
      "loss": 3.0008,
      "step": 178168
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8490850925445557,
      "learning_rate": 7.292639617103231e-05,
      "loss": 2.7417,
      "step": 178169
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0615756511688232,
      "learning_rate": 7.292372294104944e-05,
      "loss": 3.1922,
      "step": 178170
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6763813495635986,
      "learning_rate": 7.292104975328446e-05,
      "loss": 2.9387,
      "step": 178171
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.5678048133850098,
      "learning_rate": 7.29183766077376e-05,
      "loss": 2.81,
      "step": 178172
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.094682455062866,
      "learning_rate": 7.291570350440965e-05,
      "loss": 3.2718,
      "step": 178173
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4559402465820312,
      "learning_rate": 7.291303044330093e-05,
      "loss": 2.9167,
      "step": 178174
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7087295055389404,
      "learning_rate": 7.291035742441196e-05,
      "loss": 3.1115,
      "step": 178175
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.630291223526001,
      "learning_rate": 7.290768444774315e-05,
      "loss": 3.2411,
      "step": 178176
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4374098777770996,
      "learning_rate": 7.290501151329521e-05,
      "loss": 2.987,
      "step": 178177
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2376434803009033,
      "learning_rate": 7.290233862106839e-05,
      "loss": 2.7736,
      "step": 178178
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.7910590171813965,
      "learning_rate": 7.28996657710634e-05,
      "loss": 2.8168,
      "step": 178179
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3414909839630127,
      "learning_rate": 7.289699296328067e-05,
      "loss": 2.935,
      "step": 178180
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.635277032852173,
      "learning_rate": 7.289432019772066e-05,
      "loss": 2.8859,
      "step": 178181
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.940906524658203,
      "learning_rate": 7.28916474743838e-05,
      "loss": 2.6343,
      "step": 178182
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.543048620223999,
      "learning_rate": 7.288897479327079e-05,
      "loss": 2.9489,
      "step": 178183
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.1502201557159424,
      "learning_rate": 7.288630215438188e-05,
      "loss": 3.0942,
      "step": 178184
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5771701335906982,
      "learning_rate": 7.288362955771777e-05,
      "loss": 2.7559,
      "step": 178185
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8749046325683594,
      "learning_rate": 7.288095700327892e-05,
      "loss": 2.9792,
      "step": 178186
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9944841861724854,
      "learning_rate": 7.287828449106576e-05,
      "loss": 3.064,
      "step": 178187
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3594348430633545,
      "learning_rate": 7.287561202107875e-05,
      "loss": 3.1898,
      "step": 178188
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.228865146636963,
      "learning_rate": 7.287293959331849e-05,
      "loss": 2.94,
      "step": 178189
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.87308406829834,
      "learning_rate": 7.28702672077854e-05,
      "loss": 3.0549,
      "step": 178190
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.1867523193359375,
      "learning_rate": 7.286759486448008e-05,
      "loss": 2.9496,
      "step": 178191
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6730988025665283,
      "learning_rate": 7.286492256340296e-05,
      "loss": 2.6906,
      "step": 178192
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7997705936431885,
      "learning_rate": 7.286225030455456e-05,
      "loss": 2.8156,
      "step": 178193
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.4522271156311035,
      "learning_rate": 7.285957808793524e-05,
      "loss": 2.9554,
      "step": 178194
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.480072259902954,
      "learning_rate": 7.285690591354571e-05,
      "loss": 2.5748,
      "step": 178195
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.483395576477051,
      "learning_rate": 7.28542337813863e-05,
      "loss": 2.9158,
      "step": 178196
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.527968168258667,
      "learning_rate": 7.285156169145765e-05,
      "loss": 2.8533,
      "step": 178197
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8762338161468506,
      "learning_rate": 7.284888964376018e-05,
      "loss": 2.8199,
      "step": 178198
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.7237205505371094,
      "learning_rate": 7.284621763829439e-05,
      "loss": 2.9073,
      "step": 178199
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.670442581176758,
      "learning_rate": 7.284354567506069e-05,
      "loss": 2.9884,
      "step": 178200
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.9127397537231445,
      "learning_rate": 7.284087375405973e-05,
      "loss": 2.9396,
      "step": 178201
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.299433946609497,
      "learning_rate": 7.283820187529187e-05,
      "loss": 2.7573,
      "step": 178202
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7136337757110596,
      "learning_rate": 7.283553003875779e-05,
      "loss": 3.0241,
      "step": 178203
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3501670360565186,
      "learning_rate": 7.283285824445781e-05,
      "loss": 2.7733,
      "step": 178204
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.677830457687378,
      "learning_rate": 7.283018649239253e-05,
      "loss": 2.9422,
      "step": 178205
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.879861831665039,
      "learning_rate": 7.282751478256232e-05,
      "loss": 2.9327,
      "step": 178206
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6840665340423584,
      "learning_rate": 7.282484311496786e-05,
      "loss": 3.1181,
      "step": 178207
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4939041137695312,
      "learning_rate": 7.282217148960942e-05,
      "loss": 3.0258,
      "step": 178208
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.1829309463500977,
      "learning_rate": 7.281949990648777e-05,
      "loss": 2.8969,
      "step": 178209
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4378278255462646,
      "learning_rate": 7.281682836560322e-05,
      "loss": 2.9928,
      "step": 178210
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5319700241088867,
      "learning_rate": 7.281415686695635e-05,
      "loss": 3.0018,
      "step": 178211
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.032165765762329,
      "learning_rate": 7.28114854105475e-05,
      "loss": 3.0824,
      "step": 178212
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2936787605285645,
      "learning_rate": 7.280881399637735e-05,
      "loss": 2.8893,
      "step": 178213
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7695703506469727,
      "learning_rate": 7.280614262444629e-05,
      "loss": 3.0863,
      "step": 178214
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6107287406921387,
      "learning_rate": 7.28034712947549e-05,
      "loss": 2.9462,
      "step": 178215
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.46449875831604,
      "learning_rate": 7.280080000730367e-05,
      "loss": 2.9718,
      "step": 178216
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9918978214263916,
      "learning_rate": 7.279812876209298e-05,
      "loss": 2.7997,
      "step": 178217
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.1370372772216797,
      "learning_rate": 7.279545755912345e-05,
      "loss": 3.0476,
      "step": 178218
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.073843002319336,
      "learning_rate": 7.279278639839556e-05,
      "loss": 2.8116,
      "step": 178219
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7491860389709473,
      "learning_rate": 7.27901152799097e-05,
      "loss": 3.0148,
      "step": 178220
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7915661334991455,
      "learning_rate": 7.27874442036665e-05,
      "loss": 2.9795,
      "step": 178221
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.194229602813721,
      "learning_rate": 7.278477316966642e-05,
      "loss": 2.8954,
      "step": 178222
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.1394124031066895,
      "learning_rate": 7.278210217790989e-05,
      "loss": 2.8653,
      "step": 178223
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.854830741882324,
      "learning_rate": 7.277943122839749e-05,
      "loss": 2.7098,
      "step": 178224
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3580970764160156,
      "learning_rate": 7.27767603211297e-05,
      "loss": 2.6827,
      "step": 178225
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.355942726135254,
      "learning_rate": 7.277408945610691e-05,
      "loss": 3.0624,
      "step": 178226
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8246052265167236,
      "learning_rate": 7.277141863332981e-05,
      "loss": 2.9532,
      "step": 178227
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.385282278060913,
      "learning_rate": 7.276874785279871e-05,
      "loss": 2.9255,
      "step": 178228
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6031172275543213,
      "learning_rate": 7.276607711451425e-05,
      "loss": 2.8164,
      "step": 178229
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0212178230285645,
      "learning_rate": 7.276340641847687e-05,
      "loss": 3.0874,
      "step": 178230
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.657898426055908,
      "learning_rate": 7.276073576468698e-05,
      "loss": 2.7899,
      "step": 178231
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8370511531829834,
      "learning_rate": 7.275806515314527e-05,
      "loss": 2.9366,
      "step": 178232
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0890538692474365,
      "learning_rate": 7.27553945838521e-05,
      "loss": 2.8934,
      "step": 178233
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9448208808898926,
      "learning_rate": 7.27527240568079e-05,
      "loss": 2.9568,
      "step": 178234
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.191244602203369,
      "learning_rate": 7.275005357201337e-05,
      "loss": 3.1247,
      "step": 178235
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0534722805023193,
      "learning_rate": 7.274738312946888e-05,
      "loss": 2.9483,
      "step": 178236
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.789950132369995,
      "learning_rate": 7.274471272917485e-05,
      "loss": 2.6787,
      "step": 178237
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4293575286865234,
      "learning_rate": 7.274204237113193e-05,
      "loss": 2.7951,
      "step": 178238
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.348147392272949,
      "learning_rate": 7.273937205534057e-05,
      "loss": 2.715,
      "step": 178239
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.673868179321289,
      "learning_rate": 7.273670178180119e-05,
      "loss": 3.0953,
      "step": 178240
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4114396572113037,
      "learning_rate": 7.273403155051444e-05,
      "loss": 3.0475,
      "step": 178241
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8355906009674072,
      "learning_rate": 7.273136136148069e-05,
      "loss": 2.8493,
      "step": 178242
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7184109687805176,
      "learning_rate": 7.272869121470037e-05,
      "loss": 2.9596,
      "step": 178243
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7543177604675293,
      "learning_rate": 7.272602111017419e-05,
      "loss": 3.1255,
      "step": 178244
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6522839069366455,
      "learning_rate": 7.272335104790245e-05,
      "loss": 2.9528,
      "step": 178245
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4245705604553223,
      "learning_rate": 7.272068102788583e-05,
      "loss": 2.8451,
      "step": 178246
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6439342498779297,
      "learning_rate": 7.271801105012469e-05,
      "loss": 2.867,
      "step": 178247
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.468256950378418,
      "learning_rate": 7.27153411146196e-05,
      "loss": 2.9625,
      "step": 178248
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5843183994293213,
      "learning_rate": 7.271267122137087e-05,
      "loss": 2.7518,
      "step": 178249
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.804690361022949,
      "learning_rate": 7.271000137037929e-05,
      "loss": 2.9373,
      "step": 178250
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.093193292617798,
      "learning_rate": 7.27073315616451e-05,
      "loss": 3.0924,
      "step": 178251
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.831461191177368,
      "learning_rate": 7.2704661795169e-05,
      "loss": 2.748,
      "step": 178252
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7131338119506836,
      "learning_rate": 7.270199207095141e-05,
      "loss": 2.7892,
      "step": 178253
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6096742153167725,
      "learning_rate": 7.269932238899277e-05,
      "loss": 3.0291,
      "step": 178254
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.547826051712036,
      "learning_rate": 7.269665274929356e-05,
      "loss": 3.0416,
      "step": 178255
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.289574384689331,
      "learning_rate": 7.269398315185443e-05,
      "loss": 2.8821,
      "step": 178256
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8689563274383545,
      "learning_rate": 7.269131359667569e-05,
      "loss": 2.7199,
      "step": 178257
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.613187313079834,
      "learning_rate": 7.268864408375796e-05,
      "loss": 2.9727,
      "step": 178258
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9246997833251953,
      "learning_rate": 7.268597461310177e-05,
      "loss": 2.9934,
      "step": 178259
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.347207546234131,
      "learning_rate": 7.268330518470751e-05,
      "loss": 3.1443,
      "step": 178260
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6675217151641846,
      "learning_rate": 7.268063579857564e-05,
      "loss": 2.8956,
      "step": 178261
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2806971073150635,
      "learning_rate": 7.26779664547068e-05,
      "loss": 2.8892,
      "step": 178262
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.003286361694336,
      "learning_rate": 7.267529715310134e-05,
      "loss": 2.7863,
      "step": 178263
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2073898315429688,
      "learning_rate": 7.267262789375992e-05,
      "loss": 2.9515,
      "step": 178264
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.020782470703125,
      "learning_rate": 7.266995867668295e-05,
      "loss": 2.9676,
      "step": 178265
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7285449504852295,
      "learning_rate": 7.266728950187094e-05,
      "loss": 3.0833,
      "step": 178266
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.8590502738952637,
      "learning_rate": 7.266462036932429e-05,
      "loss": 2.7478,
      "step": 178267
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9687483310699463,
      "learning_rate": 7.266195127904363e-05,
      "loss": 2.7992,
      "step": 178268
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.179142475128174,
      "learning_rate": 7.265928223102936e-05,
      "loss": 2.9449,
      "step": 178269
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9598658084869385,
      "learning_rate": 7.265661322528208e-05,
      "loss": 2.8599,
      "step": 178270
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5242245197296143,
      "learning_rate": 7.265394426180222e-05,
      "loss": 2.9482,
      "step": 178271
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.253778457641602,
      "learning_rate": 7.265127534059029e-05,
      "loss": 2.8896,
      "step": 178272
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.312107801437378,
      "learning_rate": 7.26486064616467e-05,
      "loss": 2.9592,
      "step": 178273
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3514797687530518,
      "learning_rate": 7.264593762497212e-05,
      "loss": 2.8091,
      "step": 178274
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7274787425994873,
      "learning_rate": 7.264326883056685e-05,
      "loss": 2.998,
      "step": 178275
    },
    {
      "epoch": 2.32,
      "grad_norm": 5.045708656311035,
      "learning_rate": 7.26406000784316e-05,
      "loss": 3.0162,
      "step": 178276
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.1059675216674805,
      "learning_rate": 7.26379313685667e-05,
      "loss": 3.0845,
      "step": 178277
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6952502727508545,
      "learning_rate": 7.263526270097276e-05,
      "loss": 2.7337,
      "step": 178278
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6440811157226562,
      "learning_rate": 7.26325940756501e-05,
      "loss": 2.9526,
      "step": 178279
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2995738983154297,
      "learning_rate": 7.262992549259944e-05,
      "loss": 3.033,
      "step": 178280
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.198155164718628,
      "learning_rate": 7.262725695182106e-05,
      "loss": 2.8161,
      "step": 178281
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.1367926597595215,
      "learning_rate": 7.262458845331566e-05,
      "loss": 2.694,
      "step": 178282
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9596517086029053,
      "learning_rate": 7.262191999708361e-05,
      "loss": 2.9171,
      "step": 178283
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.644473075866699,
      "learning_rate": 7.261925158312548e-05,
      "loss": 2.7649,
      "step": 178284
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.889873504638672,
      "learning_rate": 7.261658321144163e-05,
      "loss": 2.7345,
      "step": 178285
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.939464807510376,
      "learning_rate": 7.261391488203271e-05,
      "loss": 2.8202,
      "step": 178286
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2474863529205322,
      "learning_rate": 7.26112465948991e-05,
      "loss": 2.9553,
      "step": 178287
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.973637342453003,
      "learning_rate": 7.260857835004144e-05,
      "loss": 3.0359,
      "step": 178288
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.8240134716033936,
      "learning_rate": 7.260591014746012e-05,
      "loss": 2.914,
      "step": 178289
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8488223552703857,
      "learning_rate": 7.260324198715566e-05,
      "loss": 2.9719,
      "step": 178290
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.020002841949463,
      "learning_rate": 7.260057386912844e-05,
      "loss": 2.9903,
      "step": 178291
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5040335655212402,
      "learning_rate": 7.259790579337917e-05,
      "loss": 2.9701,
      "step": 178292
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0605006217956543,
      "learning_rate": 7.259523775990815e-05,
      "loss": 3.0179,
      "step": 178293
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.876401424407959,
      "learning_rate": 7.259256976871608e-05,
      "loss": 2.8346,
      "step": 178294
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7145586013793945,
      "learning_rate": 7.258990181980325e-05,
      "loss": 2.9666,
      "step": 178295
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.126767635345459,
      "learning_rate": 7.258723391317041e-05,
      "loss": 3.1465,
      "step": 178296
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8861007690429688,
      "learning_rate": 7.25845660488177e-05,
      "loss": 2.9564,
      "step": 178297
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.821354627609253,
      "learning_rate": 7.258189822674595e-05,
      "loss": 3.0919,
      "step": 178298
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4105641841888428,
      "learning_rate": 7.25792304469554e-05,
      "loss": 2.8955,
      "step": 178299
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.99678111076355,
      "learning_rate": 7.257656270944674e-05,
      "loss": 2.7917,
      "step": 178300
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.938551664352417,
      "learning_rate": 7.257389501422035e-05,
      "loss": 2.842,
      "step": 178301
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.419499158859253,
      "learning_rate": 7.257122736127691e-05,
      "loss": 2.8278,
      "step": 178302
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6352274417877197,
      "learning_rate": 7.256855975061657e-05,
      "loss": 2.803,
      "step": 178303
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8719000816345215,
      "learning_rate": 7.256589218224015e-05,
      "loss": 2.9021,
      "step": 178304
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6669485569000244,
      "learning_rate": 7.256322465614794e-05,
      "loss": 3.2062,
      "step": 178305
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.125837802886963,
      "learning_rate": 7.25605571723406e-05,
      "loss": 2.8554,
      "step": 178306
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5121397972106934,
      "learning_rate": 7.255788973081843e-05,
      "loss": 2.8919,
      "step": 178307
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9265949726104736,
      "learning_rate": 7.255522233158218e-05,
      "loss": 2.7767,
      "step": 178308
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.722557783126831,
      "learning_rate": 7.255255497463216e-05,
      "loss": 2.8869,
      "step": 178309
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.968083381652832,
      "learning_rate": 7.254988765996895e-05,
      "loss": 3.1259,
      "step": 178310
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.860680103302002,
      "learning_rate": 7.254722038759288e-05,
      "loss": 2.9727,
      "step": 178311
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.528672695159912,
      "learning_rate": 7.25445531575047e-05,
      "loss": 2.8614,
      "step": 178312
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.4871273040771484,
      "learning_rate": 7.254188596970467e-05,
      "loss": 2.701,
      "step": 178313
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4287989139556885,
      "learning_rate": 7.253921882419352e-05,
      "loss": 2.9687,
      "step": 178314
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4778265953063965,
      "learning_rate": 7.253655172097159e-05,
      "loss": 2.6438,
      "step": 178315
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3222029209136963,
      "learning_rate": 7.253388466003935e-05,
      "loss": 2.9727,
      "step": 178316
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2207984924316406,
      "learning_rate": 7.25312176413974e-05,
      "loss": 3.0442,
      "step": 178317
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.574166774749756,
      "learning_rate": 7.252855066504622e-05,
      "loss": 3.0604,
      "step": 178318
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.358835220336914,
      "learning_rate": 7.252588373098619e-05,
      "loss": 2.6922,
      "step": 178319
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.466984272003174,
      "learning_rate": 7.252321683921796e-05,
      "loss": 3.0672,
      "step": 178320
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4800713062286377,
      "learning_rate": 7.252054998974199e-05,
      "loss": 2.6966,
      "step": 178321
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.826111078262329,
      "learning_rate": 7.25178831825586e-05,
      "loss": 2.7059,
      "step": 178322
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.1783549785614014,
      "learning_rate": 7.251521641766856e-05,
      "loss": 3.2517,
      "step": 178323
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6741766929626465,
      "learning_rate": 7.251254969507224e-05,
      "loss": 3.0429,
      "step": 178324
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6070871353149414,
      "learning_rate": 7.250988301477003e-05,
      "loss": 2.8159,
      "step": 178325
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.6270902156829834,
      "learning_rate": 7.250721637676262e-05,
      "loss": 2.9095,
      "step": 178326
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9219138622283936,
      "learning_rate": 7.250454978105042e-05,
      "loss": 2.8235,
      "step": 178327
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.877894639968872,
      "learning_rate": 7.25018832276338e-05,
      "loss": 2.8024,
      "step": 178328
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5818068981170654,
      "learning_rate": 7.24992167165135e-05,
      "loss": 2.9104,
      "step": 178329
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.1790144443511963,
      "learning_rate": 7.249655024768976e-05,
      "loss": 2.9907,
      "step": 178330
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9031102657318115,
      "learning_rate": 7.249388382116331e-05,
      "loss": 3.1629,
      "step": 178331
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.38389253616333,
      "learning_rate": 7.249121743693457e-05,
      "loss": 2.8593,
      "step": 178332
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2597174644470215,
      "learning_rate": 7.248855109500398e-05,
      "loss": 2.9385,
      "step": 178333
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.88059663772583,
      "learning_rate": 7.248588479537197e-05,
      "loss": 2.9758,
      "step": 178334
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.3652327060699463,
      "learning_rate": 7.248321853803926e-05,
      "loss": 3.1128,
      "step": 178335
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.938230514526367,
      "learning_rate": 7.248055232300607e-05,
      "loss": 2.9403,
      "step": 178336
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8681561946868896,
      "learning_rate": 7.247788615027315e-05,
      "loss": 2.8996,
      "step": 178337
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.426835060119629,
      "learning_rate": 7.247522001984089e-05,
      "loss": 3.0021,
      "step": 178338
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6469368934631348,
      "learning_rate": 7.24725539317098e-05,
      "loss": 2.8823,
      "step": 178339
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.766525983810425,
      "learning_rate": 7.246988788588026e-05,
      "loss": 3.0284,
      "step": 178340
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5647006034851074,
      "learning_rate": 7.246722188235291e-05,
      "loss": 2.8286,
      "step": 178341
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.462789297103882,
      "learning_rate": 7.246455592112816e-05,
      "loss": 2.6901,
      "step": 178342
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.510366201400757,
      "learning_rate": 7.246189000220661e-05,
      "loss": 3.1521,
      "step": 178343
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8412368297576904,
      "learning_rate": 7.245922412558869e-05,
      "loss": 3.1262,
      "step": 178344
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.66264271736145,
      "learning_rate": 7.245655829127489e-05,
      "loss": 3.0069,
      "step": 178345
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7985453605651855,
      "learning_rate": 7.245389249926562e-05,
      "loss": 2.9165,
      "step": 178346
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.775843381881714,
      "learning_rate": 7.245122674956158e-05,
      "loss": 3.0019,
      "step": 178347
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.702460527420044,
      "learning_rate": 7.244856104216305e-05,
      "loss": 3.0494,
      "step": 178348
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9679603576660156,
      "learning_rate": 7.244589537707069e-05,
      "loss": 2.7484,
      "step": 178349
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6755993366241455,
      "learning_rate": 7.2443229754285e-05,
      "loss": 2.8167,
      "step": 178350
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8192431926727295,
      "learning_rate": 7.244056417380635e-05,
      "loss": 3.0526,
      "step": 178351
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.1846892833709717,
      "learning_rate": 7.243789863563522e-05,
      "loss": 3.0781,
      "step": 178352
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.243373155593872,
      "learning_rate": 7.243523313977227e-05,
      "loss": 2.804,
      "step": 178353
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3265774250030518,
      "learning_rate": 7.243256768621782e-05,
      "loss": 2.762,
      "step": 178354
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4123148918151855,
      "learning_rate": 7.242990227497254e-05,
      "loss": 2.9011,
      "step": 178355
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5469307899475098,
      "learning_rate": 7.242723690603685e-05,
      "loss": 3.076,
      "step": 178356
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.683899164199829,
      "learning_rate": 7.242457157941121e-05,
      "loss": 3.1373,
      "step": 178357
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.3726463317871094,
      "learning_rate": 7.242190629509605e-05,
      "loss": 2.965,
      "step": 178358
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.5099551677703857,
      "learning_rate": 7.241924105309206e-05,
      "loss": 2.7926,
      "step": 178359
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.329784870147705,
      "learning_rate": 7.241657585339954e-05,
      "loss": 2.7994,
      "step": 178360
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.604888677597046,
      "learning_rate": 7.241391069601917e-05,
      "loss": 2.8508,
      "step": 178361
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7016024589538574,
      "learning_rate": 7.241124558095123e-05,
      "loss": 2.9021,
      "step": 178362
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.6049087047576904,
      "learning_rate": 7.240858050819651e-05,
      "loss": 2.7341,
      "step": 178363
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.71854829788208,
      "learning_rate": 7.240591547775518e-05,
      "loss": 2.8979,
      "step": 178364
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.667351722717285,
      "learning_rate": 7.240325048962797e-05,
      "loss": 3.1179,
      "step": 178365
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6212477684020996,
      "learning_rate": 7.240058554381521e-05,
      "loss": 3.0462,
      "step": 178366
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.794163227081299,
      "learning_rate": 7.239792064031754e-05,
      "loss": 2.9169,
      "step": 178367
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.572023868560791,
      "learning_rate": 7.239525577913531e-05,
      "loss": 2.9063,
      "step": 178368
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5549657344818115,
      "learning_rate": 7.239259096026932e-05,
      "loss": 2.8154,
      "step": 178369
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7867088317871094,
      "learning_rate": 7.238992618371963e-05,
      "loss": 2.9874,
      "step": 178370
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.139500141143799,
      "learning_rate": 7.238726144948702e-05,
      "loss": 2.7849,
      "step": 178371
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.501708745956421,
      "learning_rate": 7.238459675757187e-05,
      "loss": 2.9056,
      "step": 178372
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.597759485244751,
      "learning_rate": 7.238193210797476e-05,
      "loss": 2.763,
      "step": 178373
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3756797313690186,
      "learning_rate": 7.23792675006961e-05,
      "loss": 3.0245,
      "step": 178374
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2150967121124268,
      "learning_rate": 7.237660293573659e-05,
      "loss": 3.1222,
      "step": 178375
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5554189682006836,
      "learning_rate": 7.23739384130964e-05,
      "loss": 2.9271,
      "step": 178376
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.5340874195098877,
      "learning_rate": 7.237127393277626e-05,
      "loss": 2.7822,
      "step": 178377
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.1705873012542725,
      "learning_rate": 7.236860949477652e-05,
      "loss": 2.8195,
      "step": 178378
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.606621265411377,
      "learning_rate": 7.236594509909783e-05,
      "loss": 2.9435,
      "step": 178379
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.7375171184539795,
      "learning_rate": 7.236328074574052e-05,
      "loss": 3.0633,
      "step": 178380
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.863410711288452,
      "learning_rate": 7.236061643470537e-05,
      "loss": 3.1169,
      "step": 178381
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.905545711517334,
      "learning_rate": 7.235795216599248e-05,
      "loss": 2.7241,
      "step": 178382
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.840371608734131,
      "learning_rate": 7.235528793960264e-05,
      "loss": 3.0096,
      "step": 178383
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.570930004119873,
      "learning_rate": 7.235262375553618e-05,
      "loss": 2.871,
      "step": 178384
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.2696313858032227,
      "learning_rate": 7.234995961379374e-05,
      "loss": 2.7407,
      "step": 178385
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.061751365661621,
      "learning_rate": 7.234729551437564e-05,
      "loss": 2.8516,
      "step": 178386
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6281847953796387,
      "learning_rate": 7.234463145728265e-05,
      "loss": 3.0651,
      "step": 178387
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5618855953216553,
      "learning_rate": 7.234196744251492e-05,
      "loss": 2.9415,
      "step": 178388
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.2243857383728027,
      "learning_rate": 7.233930347007322e-05,
      "loss": 2.7595,
      "step": 178389
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7813687324523926,
      "learning_rate": 7.233663953995782e-05,
      "loss": 2.8355,
      "step": 178390
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8913629055023193,
      "learning_rate": 7.233397565216946e-05,
      "loss": 3.0701,
      "step": 178391
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.701258897781372,
      "learning_rate": 7.233131180670842e-05,
      "loss": 2.9867,
      "step": 178392
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9699904918670654,
      "learning_rate": 7.232864800357533e-05,
      "loss": 2.9363,
      "step": 178393
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.575451374053955,
      "learning_rate": 7.232598424277067e-05,
      "loss": 2.9584,
      "step": 178394
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.375263214111328,
      "learning_rate": 7.232332052429491e-05,
      "loss": 2.9188,
      "step": 178395
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5688767433166504,
      "learning_rate": 7.232065684814846e-05,
      "loss": 2.9992,
      "step": 178396
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.500859022140503,
      "learning_rate": 7.231799321433198e-05,
      "loss": 2.928,
      "step": 178397
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7863564491271973,
      "learning_rate": 7.231532962284581e-05,
      "loss": 2.9289,
      "step": 178398
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0416743755340576,
      "learning_rate": 7.231266607369061e-05,
      "loss": 3.1774,
      "step": 178399
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.459737539291382,
      "learning_rate": 7.231000256686674e-05,
      "loss": 2.9241,
      "step": 178400
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.535301446914673,
      "learning_rate": 7.230733910237469e-05,
      "loss": 2.7447,
      "step": 178401
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9521846771240234,
      "learning_rate": 7.230467568021506e-05,
      "loss": 3.1345,
      "step": 178402
    },
    {
      "epoch": 2.32,
      "grad_norm": 5.5333638191223145,
      "learning_rate": 7.230201230038831e-05,
      "loss": 2.7619,
      "step": 178403
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3452882766723633,
      "learning_rate": 7.229934896289483e-05,
      "loss": 2.8125,
      "step": 178404
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7671549320220947,
      "learning_rate": 7.229668566773529e-05,
      "loss": 2.9359,
      "step": 178405
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.01712965965271,
      "learning_rate": 7.229402241491008e-05,
      "loss": 2.9469,
      "step": 178406
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7071900367736816,
      "learning_rate": 7.22913592044196e-05,
      "loss": 2.7882,
      "step": 178407
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5193896293640137,
      "learning_rate": 7.228869603626456e-05,
      "loss": 3.0442,
      "step": 178408
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.700080156326294,
      "learning_rate": 7.228603291044537e-05,
      "loss": 2.6141,
      "step": 178409
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.3202834129333496,
      "learning_rate": 7.22833698269624e-05,
      "loss": 3.011,
      "step": 178410
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3796892166137695,
      "learning_rate": 7.228070678581636e-05,
      "loss": 3.2751,
      "step": 178411
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.233593463897705,
      "learning_rate": 7.22780437870076e-05,
      "loss": 3.0353,
      "step": 178412
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.423706293106079,
      "learning_rate": 7.22753808305366e-05,
      "loss": 3.0062,
      "step": 178413
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2742862701416016,
      "learning_rate": 7.227271791640397e-05,
      "loss": 2.8923,
      "step": 178414
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5394883155822754,
      "learning_rate": 7.227005504461006e-05,
      "loss": 2.7929,
      "step": 178415
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.022770881652832,
      "learning_rate": 7.226739221515553e-05,
      "loss": 2.9526,
      "step": 178416
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9764981269836426,
      "learning_rate": 7.22647294280408e-05,
      "loss": 3.0045,
      "step": 178417
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.517500162124634,
      "learning_rate": 7.226206668326632e-05,
      "loss": 3.1134,
      "step": 178418
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4047582149505615,
      "learning_rate": 7.225940398083258e-05,
      "loss": 2.9902,
      "step": 178419
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.457777976989746,
      "learning_rate": 7.225674132074017e-05,
      "loss": 2.8755,
      "step": 178420
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.072032928466797,
      "learning_rate": 7.225407870298945e-05,
      "loss": 2.8776,
      "step": 178421
    },
    {
      "epoch": 2.32,
      "grad_norm": 6.659337520599365,
      "learning_rate": 7.225141612758109e-05,
      "loss": 2.6931,
      "step": 178422
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0296032428741455,
      "learning_rate": 7.22487535945155e-05,
      "loss": 2.881,
      "step": 178423
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3907840251922607,
      "learning_rate": 7.224609110379314e-05,
      "loss": 3.1504,
      "step": 178424
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.628175735473633,
      "learning_rate": 7.224342865541449e-05,
      "loss": 2.729,
      "step": 178425
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.179765224456787,
      "learning_rate": 7.224076624938011e-05,
      "loss": 2.9578,
      "step": 178426
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.2899363040924072,
      "learning_rate": 7.223810388569044e-05,
      "loss": 2.8328,
      "step": 178427
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.102348566055298,
      "learning_rate": 7.223544156434606e-05,
      "loss": 2.9144,
      "step": 178428
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.034180641174316,
      "learning_rate": 7.223277928534733e-05,
      "loss": 2.8967,
      "step": 178429
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.624541997909546,
      "learning_rate": 7.2230117048695e-05,
      "loss": 3.2404,
      "step": 178430
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4408271312713623,
      "learning_rate": 7.22274548543892e-05,
      "loss": 2.7724,
      "step": 178431
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.881988525390625,
      "learning_rate": 7.222479270243072e-05,
      "loss": 2.5318,
      "step": 178432
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.317863702774048,
      "learning_rate": 7.222213059281987e-05,
      "loss": 2.9256,
      "step": 178433
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0886213779449463,
      "learning_rate": 7.221946852555731e-05,
      "loss": 2.9857,
      "step": 178434
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.7794201374053955,
      "learning_rate": 7.221680650064337e-05,
      "loss": 3.035,
      "step": 178435
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7209408283233643,
      "learning_rate": 7.22141445180788e-05,
      "loss": 2.7865,
      "step": 178436
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.413485050201416,
      "learning_rate": 7.221148257786371e-05,
      "loss": 2.8229,
      "step": 178437
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.1006345748901367,
      "learning_rate": 7.220882067999894e-05,
      "loss": 2.9011,
      "step": 178438
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.631171941757202,
      "learning_rate": 7.220615882448474e-05,
      "loss": 2.9035,
      "step": 178439
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.705111026763916,
      "learning_rate": 7.220349701132179e-05,
      "loss": 2.9304,
      "step": 178440
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.830139398574829,
      "learning_rate": 7.220083524051046e-05,
      "loss": 2.7497,
      "step": 178441
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8979344367980957,
      "learning_rate": 7.219817351205142e-05,
      "loss": 2.9235,
      "step": 178442
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.783512592315674,
      "learning_rate": 7.219551182594489e-05,
      "loss": 2.9889,
      "step": 178443
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2110981941223145,
      "learning_rate": 7.219285018219163e-05,
      "loss": 2.8462,
      "step": 178444
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9187417030334473,
      "learning_rate": 7.219018858079189e-05,
      "loss": 3.2769,
      "step": 178445
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.296893835067749,
      "learning_rate": 7.21875270217464e-05,
      "loss": 3.1113,
      "step": 178446
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.146984100341797,
      "learning_rate": 7.218486550505545e-05,
      "loss": 3.1101,
      "step": 178447
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5684361457824707,
      "learning_rate": 7.218220403071983e-05,
      "loss": 3.0345,
      "step": 178448
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.984957695007324,
      "learning_rate": 7.217954259873964e-05,
      "loss": 2.949,
      "step": 178449
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8551557064056396,
      "learning_rate": 7.21768812091157e-05,
      "loss": 2.9984,
      "step": 178450
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.769388198852539,
      "learning_rate": 7.217421986184823e-05,
      "loss": 3.1098,
      "step": 178451
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.758305072784424,
      "learning_rate": 7.217155855693802e-05,
      "loss": 3.0661,
      "step": 178452
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3703701496124268,
      "learning_rate": 7.216889729438528e-05,
      "loss": 2.6555,
      "step": 178453
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9329564571380615,
      "learning_rate": 7.216623607419084e-05,
      "loss": 2.8713,
      "step": 178454
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.710237503051758,
      "learning_rate": 7.216357489635481e-05,
      "loss": 2.9343,
      "step": 178455
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.4660866260528564,
      "learning_rate": 7.2160913760878e-05,
      "loss": 2.8986,
      "step": 178456
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.681413412094116,
      "learning_rate": 7.215825266776064e-05,
      "loss": 3.0587,
      "step": 178457
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9918222427368164,
      "learning_rate": 7.215559161700349e-05,
      "loss": 3.0455,
      "step": 178458
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8155019283294678,
      "learning_rate": 7.21529306086068e-05,
      "loss": 2.9955,
      "step": 178459
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6301629543304443,
      "learning_rate": 7.215026964257138e-05,
      "loss": 3.0033,
      "step": 178460
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0756659507751465,
      "learning_rate": 7.214760871889735e-05,
      "loss": 3.1111,
      "step": 178461
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.443697690963745,
      "learning_rate": 7.214494783758542e-05,
      "loss": 2.5218,
      "step": 178462
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.280683517456055,
      "learning_rate": 7.214228699863601e-05,
      "loss": 2.8471,
      "step": 178463
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.603893518447876,
      "learning_rate": 7.213962620204972e-05,
      "loss": 3.0791,
      "step": 178464
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.035972833633423,
      "learning_rate": 7.21369654478269e-05,
      "loss": 2.9825,
      "step": 178465
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.426356554031372,
      "learning_rate": 7.213430473596827e-05,
      "loss": 3.0444,
      "step": 178466
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.094322443008423,
      "learning_rate": 7.2131644066474e-05,
      "loss": 2.9111,
      "step": 178467
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.721942901611328,
      "learning_rate": 7.212898343934485e-05,
      "loss": 3.2965,
      "step": 178468
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7836368083953857,
      "learning_rate": 7.212632285458116e-05,
      "loss": 3.09,
      "step": 178469
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.405557870864868,
      "learning_rate": 7.212366231218356e-05,
      "loss": 2.9993,
      "step": 178470
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.495725393295288,
      "learning_rate": 7.212100181215238e-05,
      "loss": 3.0447,
      "step": 178471
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.644132614135742,
      "learning_rate": 7.211834135448838e-05,
      "loss": 3.0087,
      "step": 178472
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.890617847442627,
      "learning_rate": 7.211568093919171e-05,
      "loss": 2.7726,
      "step": 178473
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6143863201141357,
      "learning_rate": 7.211302056626316e-05,
      "loss": 2.9183,
      "step": 178474
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5163228511810303,
      "learning_rate": 7.211036023570297e-05,
      "loss": 2.9435,
      "step": 178475
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3392200469970703,
      "learning_rate": 7.210769994751185e-05,
      "loss": 3.097,
      "step": 178476
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.1314280033111572,
      "learning_rate": 7.210503970169015e-05,
      "loss": 2.9734,
      "step": 178477
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.167506694793701,
      "learning_rate": 7.21023794982385e-05,
      "loss": 2.9348,
      "step": 178478
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.432528257369995,
      "learning_rate": 7.209971933715732e-05,
      "loss": 3.0236,
      "step": 178479
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.1438093185424805,
      "learning_rate": 7.20970592184471e-05,
      "loss": 3.2319,
      "step": 178480
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7045130729675293,
      "learning_rate": 7.209439914210824e-05,
      "loss": 2.9423,
      "step": 178481
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.808627128601074,
      "learning_rate": 7.209173910814144e-05,
      "loss": 2.8912,
      "step": 178482
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7105350494384766,
      "learning_rate": 7.2089079116547e-05,
      "loss": 2.9879,
      "step": 178483
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.369555950164795,
      "learning_rate": 7.20864191673256e-05,
      "loss": 2.8961,
      "step": 178484
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5349485874176025,
      "learning_rate": 7.208375926047763e-05,
      "loss": 2.8204,
      "step": 178485
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9796950817108154,
      "learning_rate": 7.208109939600354e-05,
      "loss": 2.9206,
      "step": 178486
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.6519103050231934,
      "learning_rate": 7.207843957390384e-05,
      "loss": 2.8714,
      "step": 178487
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.729306221008301,
      "learning_rate": 7.207577979417914e-05,
      "loss": 3.0883,
      "step": 178488
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.41493558883667,
      "learning_rate": 7.207312005682978e-05,
      "loss": 2.846,
      "step": 178489
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.666746139526367,
      "learning_rate": 7.207046036185642e-05,
      "loss": 2.8681,
      "step": 178490
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9438107013702393,
      "learning_rate": 7.206780070925942e-05,
      "loss": 2.9066,
      "step": 178491
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6673948764801025,
      "learning_rate": 7.206514109903927e-05,
      "loss": 3.0091,
      "step": 178492
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.080963134765625,
      "learning_rate": 7.206248153119662e-05,
      "loss": 2.7034,
      "step": 178493
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2608070373535156,
      "learning_rate": 7.205982200573179e-05,
      "loss": 2.9396,
      "step": 178494
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3689281940460205,
      "learning_rate": 7.20571625226453e-05,
      "loss": 2.9453,
      "step": 178495
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3482298851013184,
      "learning_rate": 7.205450308193779e-05,
      "loss": 2.9289,
      "step": 178496
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.661405324935913,
      "learning_rate": 7.20518436836096e-05,
      "loss": 3.0962,
      "step": 178497
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5095927715301514,
      "learning_rate": 7.204918432766121e-05,
      "loss": 2.7376,
      "step": 178498
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.967615842819214,
      "learning_rate": 7.20465250140933e-05,
      "loss": 2.9471,
      "step": 178499
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.737173080444336,
      "learning_rate": 7.20438657429061e-05,
      "loss": 2.8944,
      "step": 178500
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8530616760253906,
      "learning_rate": 7.204120651410039e-05,
      "loss": 2.985,
      "step": 178501
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.902019500732422,
      "learning_rate": 7.203854732767648e-05,
      "loss": 2.8512,
      "step": 178502
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.1692137718200684,
      "learning_rate": 7.203588818363493e-05,
      "loss": 2.9965,
      "step": 178503
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.5937235355377197,
      "learning_rate": 7.203322908197609e-05,
      "loss": 2.9163,
      "step": 178504
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.16681170463562,
      "learning_rate": 7.20305700227007e-05,
      "loss": 3.4654,
      "step": 178505
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6606569290161133,
      "learning_rate": 7.202791100580902e-05,
      "loss": 3.1441,
      "step": 178506
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5093955993652344,
      "learning_rate": 7.202525203130178e-05,
      "loss": 2.8326,
      "step": 178507
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8795557022094727,
      "learning_rate": 7.20225930991793e-05,
      "loss": 2.7919,
      "step": 178508
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.736708641052246,
      "learning_rate": 7.201993420944215e-05,
      "loss": 2.8746,
      "step": 178509
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7646145820617676,
      "learning_rate": 7.201727536209073e-05,
      "loss": 3.1948,
      "step": 178510
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7357378005981445,
      "learning_rate": 7.201461655712567e-05,
      "loss": 3.3088,
      "step": 178511
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.508467197418213,
      "learning_rate": 7.20119577945473e-05,
      "loss": 2.979,
      "step": 178512
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.165666103363037,
      "learning_rate": 7.20092990743563e-05,
      "loss": 2.9594,
      "step": 178513
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.509336233139038,
      "learning_rate": 7.200664039655298e-05,
      "loss": 2.7892,
      "step": 178514
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.866880416870117,
      "learning_rate": 7.200398176113813e-05,
      "loss": 2.9093,
      "step": 178515
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.078913927078247,
      "learning_rate": 7.200132316811186e-05,
      "loss": 2.9032,
      "step": 178516
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7908785343170166,
      "learning_rate": 7.199866461747493e-05,
      "loss": 2.8985,
      "step": 178517
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.138826608657837,
      "learning_rate": 7.199600610922766e-05,
      "loss": 2.8062,
      "step": 178518
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.95732045173645,
      "learning_rate": 7.199334764337074e-05,
      "loss": 2.9928,
      "step": 178519
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5537326335906982,
      "learning_rate": 7.199068921990444e-05,
      "loss": 2.6313,
      "step": 178520
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.608480215072632,
      "learning_rate": 7.198803083882957e-05,
      "loss": 3.0792,
      "step": 178521
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7651147842407227,
      "learning_rate": 7.198537250014626e-05,
      "loss": 2.8704,
      "step": 178522
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6185269355773926,
      "learning_rate": 7.198271420385527e-05,
      "loss": 2.7546,
      "step": 178523
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.9150383472442627,
      "learning_rate": 7.198005594995689e-05,
      "loss": 2.8565,
      "step": 178524
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.5319933891296387,
      "learning_rate": 7.197739773845181e-05,
      "loss": 2.9965,
      "step": 178525
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.416125774383545,
      "learning_rate": 7.197473956934035e-05,
      "loss": 2.9678,
      "step": 178526
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0219430923461914,
      "learning_rate": 7.197208144262328e-05,
      "loss": 2.8907,
      "step": 178527
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.962918758392334,
      "learning_rate": 7.19694233583007e-05,
      "loss": 3.1274,
      "step": 178528
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.2131147384643555,
      "learning_rate": 7.196676531637345e-05,
      "loss": 3.2279,
      "step": 178529
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.96726131439209,
      "learning_rate": 7.196410731684174e-05,
      "loss": 2.8197,
      "step": 178530
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.43365216255188,
      "learning_rate": 7.196144935970632e-05,
      "loss": 2.6436,
      "step": 178531
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.515605926513672,
      "learning_rate": 7.195879144496748e-05,
      "loss": 2.7668,
      "step": 178532
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5266311168670654,
      "learning_rate": 7.195613357262597e-05,
      "loss": 2.7204,
      "step": 178533
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.5841240882873535,
      "learning_rate": 7.195347574268196e-05,
      "loss": 3.0279,
      "step": 178534
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.169511556625366,
      "learning_rate": 7.195081795513619e-05,
      "loss": 2.971,
      "step": 178535
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8722732067108154,
      "learning_rate": 7.194816020998896e-05,
      "loss": 2.7535,
      "step": 178536
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6283457279205322,
      "learning_rate": 7.194550250724098e-05,
      "loss": 3.0086,
      "step": 178537
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.146087408065796,
      "learning_rate": 7.194284484689256e-05,
      "loss": 2.617,
      "step": 178538
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.244036912918091,
      "learning_rate": 7.194018722894442e-05,
      "loss": 2.7385,
      "step": 178539
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.266962766647339,
      "learning_rate": 7.193752965339672e-05,
      "loss": 3.0411,
      "step": 178540
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.6196956634521484,
      "learning_rate": 7.193487212025024e-05,
      "loss": 2.9486,
      "step": 178541
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.526963472366333,
      "learning_rate": 7.193221462950528e-05,
      "loss": 3.0769,
      "step": 178542
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.223175048828125,
      "learning_rate": 7.192955718116253e-05,
      "loss": 2.7496,
      "step": 178543
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.944556713104248,
      "learning_rate": 7.192689977522231e-05,
      "loss": 2.8256,
      "step": 178544
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.262777328491211,
      "learning_rate": 7.192424241168531e-05,
      "loss": 3.0233,
      "step": 178545
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.0045318603515625,
      "learning_rate": 7.192158509055175e-05,
      "loss": 3.0879,
      "step": 178546
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.940645217895508,
      "learning_rate": 7.191892781182236e-05,
      "loss": 2.9686,
      "step": 178547
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.3214128017425537,
      "learning_rate": 7.191627057549745e-05,
      "loss": 2.6994,
      "step": 178548
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.63600754737854,
      "learning_rate": 7.191361338157772e-05,
      "loss": 2.8271,
      "step": 178549
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.4289145469665527,
      "learning_rate": 7.191095623006343e-05,
      "loss": 2.9958,
      "step": 178550
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5880322456359863,
      "learning_rate": 7.19082991209554e-05,
      "loss": 2.9956,
      "step": 178551
    },
    {
      "epoch": 2.32,
      "grad_norm": 5.047848701477051,
      "learning_rate": 7.190564205425373e-05,
      "loss": 2.8282,
      "step": 178552
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.8215482234954834,
      "learning_rate": 7.190298502995921e-05,
      "loss": 3.0597,
      "step": 178553
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.5280537605285645,
      "learning_rate": 7.190032804807215e-05,
      "loss": 2.9563,
      "step": 178554
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.116542339324951,
      "learning_rate": 7.189767110859321e-05,
      "loss": 2.697,
      "step": 178555
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.7185685634613037,
      "learning_rate": 7.18950142115227e-05,
      "loss": 2.9437,
      "step": 178556
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.041447639465332,
      "learning_rate": 7.189235735686138e-05,
      "loss": 2.9246,
      "step": 178557
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5909252166748047,
      "learning_rate": 7.188970054460941e-05,
      "loss": 2.9957,
      "step": 178558
    },
    {
      "epoch": 2.32,
      "grad_norm": 3.71362566947937,
      "learning_rate": 7.188704377476755e-05,
      "loss": 2.69,
      "step": 178559
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.5862202644348145,
      "learning_rate": 7.188438704733611e-05,
      "loss": 3.1449,
      "step": 178560
    },
    {
      "epoch": 2.32,
      "grad_norm": 2.786895275115967,
      "learning_rate": 7.188173036231574e-05,
      "loss": 2.9882,
      "step": 178561
    },
    {
      "epoch": 2.32,
      "grad_norm": 4.202358245849609,
      "learning_rate": 7.187907371970677e-05,
      "loss": 2.9166,
      "step": 178562
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.156477451324463,
      "learning_rate": 7.187641711950987e-05,
      "loss": 2.7577,
      "step": 178563
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.15226674079895,
      "learning_rate": 7.187376056172549e-05,
      "loss": 2.9368,
      "step": 178564
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0150349140167236,
      "learning_rate": 7.187110404635404e-05,
      "loss": 2.8975,
      "step": 178565
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.431352376937866,
      "learning_rate": 7.1868447573396e-05,
      "loss": 2.6037,
      "step": 178566
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6171751022338867,
      "learning_rate": 7.1865791142852e-05,
      "loss": 3.0957,
      "step": 178567
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8158042430877686,
      "learning_rate": 7.186313475472237e-05,
      "loss": 2.9108,
      "step": 178568
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.081909418106079,
      "learning_rate": 7.186047840900778e-05,
      "loss": 3.0935,
      "step": 178569
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8098175525665283,
      "learning_rate": 7.185782210570862e-05,
      "loss": 2.8059,
      "step": 178570
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.698603391647339,
      "learning_rate": 7.185516584482542e-05,
      "loss": 2.9012,
      "step": 178571
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7869060039520264,
      "learning_rate": 7.185250962635853e-05,
      "loss": 2.9043,
      "step": 178572
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.628460168838501,
      "learning_rate": 7.184985345030866e-05,
      "loss": 2.9308,
      "step": 178573
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.8249077796936035,
      "learning_rate": 7.184719731667615e-05,
      "loss": 2.6665,
      "step": 178574
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5020148754119873,
      "learning_rate": 7.184454122546162e-05,
      "loss": 3.0826,
      "step": 178575
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.408787250518799,
      "learning_rate": 7.184188517666548e-05,
      "loss": 2.8423,
      "step": 178576
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4455323219299316,
      "learning_rate": 7.183922917028818e-05,
      "loss": 2.6977,
      "step": 178577
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.1318178176879883,
      "learning_rate": 7.183657320633035e-05,
      "loss": 2.8076,
      "step": 178578
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.707458257675171,
      "learning_rate": 7.183391728479242e-05,
      "loss": 2.9037,
      "step": 178579
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.394808769226074,
      "learning_rate": 7.183126140567476e-05,
      "loss": 3.1648,
      "step": 178580
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6984119415283203,
      "learning_rate": 7.182860556897809e-05,
      "loss": 2.9474,
      "step": 178581
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4679243564605713,
      "learning_rate": 7.182594977470276e-05,
      "loss": 3.0327,
      "step": 178582
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.159714937210083,
      "learning_rate": 7.182329402284923e-05,
      "loss": 2.7973,
      "step": 178583
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7889645099639893,
      "learning_rate": 7.182063831341813e-05,
      "loss": 2.9285,
      "step": 178584
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8730123043060303,
      "learning_rate": 7.18179826464099e-05,
      "loss": 2.8885,
      "step": 178585
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7883951663970947,
      "learning_rate": 7.181532702182493e-05,
      "loss": 2.9607,
      "step": 178586
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.624755382537842,
      "learning_rate": 7.181267143966386e-05,
      "loss": 2.6764,
      "step": 178587
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5793395042419434,
      "learning_rate": 7.181001589992712e-05,
      "loss": 3.2609,
      "step": 178588
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.2413277626037598,
      "learning_rate": 7.180736040261514e-05,
      "loss": 2.9084,
      "step": 178589
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5213003158569336,
      "learning_rate": 7.180470494772856e-05,
      "loss": 3.1308,
      "step": 178590
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.2947440147399902,
      "learning_rate": 7.180204953526771e-05,
      "loss": 3.0724,
      "step": 178591
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.626664638519287,
      "learning_rate": 7.179939416523323e-05,
      "loss": 2.8165,
      "step": 178592
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.534543037414551,
      "learning_rate": 7.179673883762556e-05,
      "loss": 2.9624,
      "step": 178593
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4581921100616455,
      "learning_rate": 7.17940835524452e-05,
      "loss": 2.8869,
      "step": 178594
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.582883834838867,
      "learning_rate": 7.179142830969254e-05,
      "loss": 2.6666,
      "step": 178595
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.590524673461914,
      "learning_rate": 7.178877310936823e-05,
      "loss": 2.8759,
      "step": 178596
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.629279136657715,
      "learning_rate": 7.178611795147261e-05,
      "loss": 3.0606,
      "step": 178597
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5954205989837646,
      "learning_rate": 7.178346283600635e-05,
      "loss": 3.1067,
      "step": 178598
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6746888160705566,
      "learning_rate": 7.178080776296976e-05,
      "loss": 2.8563,
      "step": 178599
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1476597785949707,
      "learning_rate": 7.177815273236362e-05,
      "loss": 2.9902,
      "step": 178600
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.876591205596924,
      "learning_rate": 7.177549774418805e-05,
      "loss": 2.84,
      "step": 178601
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1331679821014404,
      "learning_rate": 7.17728427984438e-05,
      "loss": 2.8585,
      "step": 178602
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4610819816589355,
      "learning_rate": 7.17701878951312e-05,
      "loss": 2.7641,
      "step": 178603
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4482603073120117,
      "learning_rate": 7.176753303425094e-05,
      "loss": 2.9895,
      "step": 178604
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.2345516681671143,
      "learning_rate": 7.176487821580329e-05,
      "loss": 2.7722,
      "step": 178605
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4461724758148193,
      "learning_rate": 7.176222343978903e-05,
      "loss": 2.9497,
      "step": 178606
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.476618528366089,
      "learning_rate": 7.175956870620831e-05,
      "loss": 2.9065,
      "step": 178607
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.298064947128296,
      "learning_rate": 7.17569140150619e-05,
      "loss": 2.8367,
      "step": 178608
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4955530166625977,
      "learning_rate": 7.175425936635007e-05,
      "loss": 2.786,
      "step": 178609
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.3776066303253174,
      "learning_rate": 7.175160476007355e-05,
      "loss": 2.9409,
      "step": 178610
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.201803684234619,
      "learning_rate": 7.17489501962326e-05,
      "loss": 2.5957,
      "step": 178611
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.554751396179199,
      "learning_rate": 7.174629567482802e-05,
      "loss": 3.0209,
      "step": 178612
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.0856289863586426,
      "learning_rate": 7.174364119585995e-05,
      "loss": 2.7141,
      "step": 178613
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.350888967514038,
      "learning_rate": 7.17409867593291e-05,
      "loss": 2.9811,
      "step": 178614
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.918299436569214,
      "learning_rate": 7.173833236523583e-05,
      "loss": 3.0993,
      "step": 178615
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.569457530975342,
      "learning_rate": 7.173567801358083e-05,
      "loss": 2.9824,
      "step": 178616
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8607261180877686,
      "learning_rate": 7.173302370436438e-05,
      "loss": 3.0717,
      "step": 178617
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.555448293685913,
      "learning_rate": 7.17303694375872e-05,
      "loss": 3.0287,
      "step": 178618
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.3307390213012695,
      "learning_rate": 7.172771521324953e-05,
      "loss": 2.8234,
      "step": 178619
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9482812881469727,
      "learning_rate": 7.172506103135206e-05,
      "loss": 2.724,
      "step": 178620
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4986674785614014,
      "learning_rate": 7.172240689189511e-05,
      "loss": 2.8424,
      "step": 178621
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.192671775817871,
      "learning_rate": 7.171975279487938e-05,
      "loss": 3.1381,
      "step": 178622
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9181807041168213,
      "learning_rate": 7.171709874030516e-05,
      "loss": 3.0919,
      "step": 178623
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.600696563720703,
      "learning_rate": 7.17144447281732e-05,
      "loss": 3.0894,
      "step": 178624
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.209949016571045,
      "learning_rate": 7.17117907584837e-05,
      "loss": 3.1871,
      "step": 178625
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9293971061706543,
      "learning_rate": 7.170913683123733e-05,
      "loss": 2.8167,
      "step": 178626
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8527138233184814,
      "learning_rate": 7.170648294643448e-05,
      "loss": 2.6725,
      "step": 178627
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4719347953796387,
      "learning_rate": 7.170382910407577e-05,
      "loss": 2.9459,
      "step": 178628
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.857266426086426,
      "learning_rate": 7.170117530416155e-05,
      "loss": 2.8939,
      "step": 178629
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.3939390182495117,
      "learning_rate": 7.169852154669252e-05,
      "loss": 2.8065,
      "step": 178630
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.158094882965088,
      "learning_rate": 7.169586783166899e-05,
      "loss": 2.8155,
      "step": 178631
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.7864818572998047,
      "learning_rate": 7.169321415909151e-05,
      "loss": 2.7671,
      "step": 178632
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5929715633392334,
      "learning_rate": 7.16905605289605e-05,
      "loss": 2.6673,
      "step": 178633
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.585073232650757,
      "learning_rate": 7.168790694127662e-05,
      "loss": 3.0859,
      "step": 178634
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5558865070343018,
      "learning_rate": 7.168525339604018e-05,
      "loss": 3.1122,
      "step": 178635
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.65103816986084,
      "learning_rate": 7.168259989325181e-05,
      "loss": 2.7978,
      "step": 178636
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.155031204223633,
      "learning_rate": 7.1679946432912e-05,
      "loss": 3.0877,
      "step": 178637
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.8593270778656006,
      "learning_rate": 7.167729301502118e-05,
      "loss": 2.7309,
      "step": 178638
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.948782205581665,
      "learning_rate": 7.167463963957979e-05,
      "loss": 2.6579,
      "step": 178639
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5020229816436768,
      "learning_rate": 7.167198630658847e-05,
      "loss": 2.8842,
      "step": 178640
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.347985029220581,
      "learning_rate": 7.166933301604753e-05,
      "loss": 2.9764,
      "step": 178641
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.613389730453491,
      "learning_rate": 7.166667976795769e-05,
      "loss": 2.986,
      "step": 178642
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.533283233642578,
      "learning_rate": 7.16640265623193e-05,
      "loss": 3.2823,
      "step": 178643
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.614427089691162,
      "learning_rate": 7.166137339913288e-05,
      "loss": 3.1497,
      "step": 178644
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.677116870880127,
      "learning_rate": 7.165872027839884e-05,
      "loss": 2.8405,
      "step": 178645
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5399832725524902,
      "learning_rate": 7.165606720011785e-05,
      "loss": 2.9819,
      "step": 178646
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5411205291748047,
      "learning_rate": 7.165341416429021e-05,
      "loss": 3.0163,
      "step": 178647
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.285205841064453,
      "learning_rate": 7.165076117091659e-05,
      "loss": 2.872,
      "step": 178648
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.498480796813965,
      "learning_rate": 7.164810821999745e-05,
      "loss": 2.7592,
      "step": 178649
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.377824306488037,
      "learning_rate": 7.164545531153317e-05,
      "loss": 3.0482,
      "step": 178650
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9239163398742676,
      "learning_rate": 7.164280244552427e-05,
      "loss": 3.0574,
      "step": 178651
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5697431564331055,
      "learning_rate": 7.164014962197135e-05,
      "loss": 2.7833,
      "step": 178652
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.5319061279296875,
      "learning_rate": 7.163749684087475e-05,
      "loss": 3.102,
      "step": 178653
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.8695249557495117,
      "learning_rate": 7.163484410223517e-05,
      "loss": 2.9246,
      "step": 178654
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.852926015853882,
      "learning_rate": 7.163219140605295e-05,
      "loss": 3.1035,
      "step": 178655
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.132890224456787,
      "learning_rate": 7.162953875232861e-05,
      "loss": 3.0918,
      "step": 178656
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.79573917388916,
      "learning_rate": 7.162688614106257e-05,
      "loss": 3.0292,
      "step": 178657
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4008853435516357,
      "learning_rate": 7.16242335722555e-05,
      "loss": 2.7084,
      "step": 178658
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.715012073516846,
      "learning_rate": 7.162158104590769e-05,
      "loss": 2.9261,
      "step": 178659
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.66030740737915,
      "learning_rate": 7.161892856201986e-05,
      "loss": 3.2986,
      "step": 178660
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.956397533416748,
      "learning_rate": 7.161627612059236e-05,
      "loss": 2.9891,
      "step": 178661
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.761263608932495,
      "learning_rate": 7.161362372162564e-05,
      "loss": 2.8696,
      "step": 178662
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6382038593292236,
      "learning_rate": 7.161097136512031e-05,
      "loss": 2.8582,
      "step": 178663
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.137618064880371,
      "learning_rate": 7.160831905107681e-05,
      "loss": 2.887,
      "step": 178664
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.597330570220947,
      "learning_rate": 7.160566677949558e-05,
      "loss": 3.0737,
      "step": 178665
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9043128490448,
      "learning_rate": 7.160301455037722e-05,
      "loss": 2.8727,
      "step": 178666
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.2112104892730713,
      "learning_rate": 7.160036236372219e-05,
      "loss": 3.2526,
      "step": 178667
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8854053020477295,
      "learning_rate": 7.15977102195309e-05,
      "loss": 2.9793,
      "step": 178668
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5402028560638428,
      "learning_rate": 7.159505811780397e-05,
      "loss": 2.8961,
      "step": 178669
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.281336307525635,
      "learning_rate": 7.159240605854184e-05,
      "loss": 3.1218,
      "step": 178670
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.485135555267334,
      "learning_rate": 7.158975404174488e-05,
      "loss": 3.0372,
      "step": 178671
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.482451915740967,
      "learning_rate": 7.158710206741381e-05,
      "loss": 2.7951,
      "step": 178672
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7688474655151367,
      "learning_rate": 7.1584450135549e-05,
      "loss": 3.0503,
      "step": 178673
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.239011287689209,
      "learning_rate": 7.158179824615085e-05,
      "loss": 2.8695,
      "step": 178674
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.960381507873535,
      "learning_rate": 7.157914639922007e-05,
      "loss": 3.1257,
      "step": 178675
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7978885173797607,
      "learning_rate": 7.157649459475696e-05,
      "loss": 3.2133,
      "step": 178676
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.6358225345611572,
      "learning_rate": 7.157384283276215e-05,
      "loss": 2.7076,
      "step": 178677
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.2082695960998535,
      "learning_rate": 7.157119111323609e-05,
      "loss": 2.8403,
      "step": 178678
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.3504703044891357,
      "learning_rate": 7.156853943617925e-05,
      "loss": 3.1711,
      "step": 178679
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4596920013427734,
      "learning_rate": 7.156588780159205e-05,
      "loss": 3.0277,
      "step": 178680
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9532363414764404,
      "learning_rate": 7.156323620947514e-05,
      "loss": 2.8658,
      "step": 178681
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.783952236175537,
      "learning_rate": 7.156058465982885e-05,
      "loss": 2.9532,
      "step": 178682
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5437209606170654,
      "learning_rate": 7.155793315265384e-05,
      "loss": 2.8855,
      "step": 178683
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.460313320159912,
      "learning_rate": 7.155528168795054e-05,
      "loss": 2.8382,
      "step": 178684
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.391630172729492,
      "learning_rate": 7.15526302657194e-05,
      "loss": 2.948,
      "step": 178685
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.2166779041290283,
      "learning_rate": 7.154997888596088e-05,
      "loss": 3.0478,
      "step": 178686
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.1752686500549316,
      "learning_rate": 7.15473275486756e-05,
      "loss": 2.5976,
      "step": 178687
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.860793113708496,
      "learning_rate": 7.15446762538639e-05,
      "loss": 3.2436,
      "step": 178688
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.6993730068206787,
      "learning_rate": 7.154202500152647e-05,
      "loss": 2.6872,
      "step": 178689
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.3663928508758545,
      "learning_rate": 7.153937379166359e-05,
      "loss": 2.8486,
      "step": 178690
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.006417751312256,
      "learning_rate": 7.153672262427593e-05,
      "loss": 2.9851,
      "step": 178691
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8002073764801025,
      "learning_rate": 7.15340714993639e-05,
      "loss": 2.9115,
      "step": 178692
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.2189650535583496,
      "learning_rate": 7.153142041692803e-05,
      "loss": 3.049,
      "step": 178693
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1066954135894775,
      "learning_rate": 7.152876937696867e-05,
      "loss": 3.0825,
      "step": 178694
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5232372283935547,
      "learning_rate": 7.152611837948649e-05,
      "loss": 3.0357,
      "step": 178695
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.4188337326049805,
      "learning_rate": 7.152346742448185e-05,
      "loss": 2.9162,
      "step": 178696
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.918905258178711,
      "learning_rate": 7.152081651195543e-05,
      "loss": 3.0042,
      "step": 178697
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9484076499938965,
      "learning_rate": 7.151816564190755e-05,
      "loss": 2.9369,
      "step": 178698
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4593496322631836,
      "learning_rate": 7.151551481433876e-05,
      "loss": 2.9303,
      "step": 178699
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7657625675201416,
      "learning_rate": 7.15128640292495e-05,
      "loss": 3.0693,
      "step": 178700
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.585045099258423,
      "learning_rate": 7.15102132866404e-05,
      "loss": 2.9128,
      "step": 178701
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.446471691131592,
      "learning_rate": 7.150756258651174e-05,
      "loss": 2.8381,
      "step": 178702
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4554407596588135,
      "learning_rate": 7.150491192886422e-05,
      "loss": 3.0227,
      "step": 178703
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.460488796234131,
      "learning_rate": 7.150226131369831e-05,
      "loss": 2.8179,
      "step": 178704
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.3767006397247314,
      "learning_rate": 7.14996107410144e-05,
      "loss": 3.0232,
      "step": 178705
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.658048391342163,
      "learning_rate": 7.149696021081294e-05,
      "loss": 3.0494,
      "step": 178706
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.9613893032073975,
      "learning_rate": 7.149430972309457e-05,
      "loss": 2.9644,
      "step": 178707
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.802873134613037,
      "learning_rate": 7.149165927785969e-05,
      "loss": 2.9963,
      "step": 178708
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.3273837566375732,
      "learning_rate": 7.148900887510889e-05,
      "loss": 2.9923,
      "step": 178709
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6653664112091064,
      "learning_rate": 7.148635851484257e-05,
      "loss": 3.0405,
      "step": 178710
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.484952926635742,
      "learning_rate": 7.148370819706129e-05,
      "loss": 3.0444,
      "step": 178711
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.829079627990723,
      "learning_rate": 7.148105792176537e-05,
      "loss": 2.7932,
      "step": 178712
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.921541452407837,
      "learning_rate": 7.147840768895556e-05,
      "loss": 3.1922,
      "step": 178713
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7152392864227295,
      "learning_rate": 7.147575749863214e-05,
      "loss": 2.8339,
      "step": 178714
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8514928817749023,
      "learning_rate": 7.147310735079576e-05,
      "loss": 2.847,
      "step": 178715
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.600048542022705,
      "learning_rate": 7.147045724544686e-05,
      "loss": 2.9598,
      "step": 178716
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.3571274280548096,
      "learning_rate": 7.14678071825859e-05,
      "loss": 2.9685,
      "step": 178717
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.01229190826416,
      "learning_rate": 7.146515716221329e-05,
      "loss": 3.124,
      "step": 178718
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.417527198791504,
      "learning_rate": 7.146250718432973e-05,
      "loss": 3.0646,
      "step": 178719
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.6106677055358887,
      "learning_rate": 7.145985724893551e-05,
      "loss": 3.074,
      "step": 178720
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1053433418273926,
      "learning_rate": 7.145720735603132e-05,
      "loss": 3.0545,
      "step": 178721
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.387579917907715,
      "learning_rate": 7.145455750561751e-05,
      "loss": 3.0854,
      "step": 178722
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.551257610321045,
      "learning_rate": 7.145190769769463e-05,
      "loss": 2.9104,
      "step": 178723
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7606265544891357,
      "learning_rate": 7.14492579322631e-05,
      "loss": 3.0528,
      "step": 178724
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.85957670211792,
      "learning_rate": 7.144660820932353e-05,
      "loss": 3.214,
      "step": 178725
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.582514524459839,
      "learning_rate": 7.144395852887624e-05,
      "loss": 2.9573,
      "step": 178726
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.2929325103759766,
      "learning_rate": 7.144130889092194e-05,
      "loss": 2.8924,
      "step": 178727
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0963637828826904,
      "learning_rate": 7.143865929546103e-05,
      "loss": 3.1162,
      "step": 178728
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.930036783218384,
      "learning_rate": 7.143600974249397e-05,
      "loss": 2.937,
      "step": 178729
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.861440896987915,
      "learning_rate": 7.143336023202119e-05,
      "loss": 3.1239,
      "step": 178730
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7291221618652344,
      "learning_rate": 7.143071076404334e-05,
      "loss": 2.888,
      "step": 178731
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.805690050125122,
      "learning_rate": 7.142806133856074e-05,
      "loss": 2.8392,
      "step": 178732
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7084109783172607,
      "learning_rate": 7.142541195557408e-05,
      "loss": 2.8879,
      "step": 178733
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.567209243774414,
      "learning_rate": 7.142276261508375e-05,
      "loss": 2.7718,
      "step": 178734
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.769202470779419,
      "learning_rate": 7.142011331709024e-05,
      "loss": 2.9785,
      "step": 178735
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.287394046783447,
      "learning_rate": 7.141746406159396e-05,
      "loss": 2.911,
      "step": 178736
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.397799015045166,
      "learning_rate": 7.141481484859559e-05,
      "loss": 2.8596,
      "step": 178737
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.881187677383423,
      "learning_rate": 7.141216567809542e-05,
      "loss": 2.9301,
      "step": 178738
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.001184940338135,
      "learning_rate": 7.140951655009411e-05,
      "loss": 3.375,
      "step": 178739
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7054994106292725,
      "learning_rate": 7.140686746459212e-05,
      "loss": 2.9977,
      "step": 178740
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5144267082214355,
      "learning_rate": 7.140421842158989e-05,
      "loss": 2.8621,
      "step": 178741
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8134589195251465,
      "learning_rate": 7.140156942108784e-05,
      "loss": 2.903,
      "step": 178742
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.201472520828247,
      "learning_rate": 7.139892046308666e-05,
      "loss": 2.7779,
      "step": 178743
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8339712619781494,
      "learning_rate": 7.139627154758664e-05,
      "loss": 2.8908,
      "step": 178744
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1443774700164795,
      "learning_rate": 7.139362267458847e-05,
      "loss": 2.9096,
      "step": 178745
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.293215274810791,
      "learning_rate": 7.139097384409253e-05,
      "loss": 2.8912,
      "step": 178746
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0356626510620117,
      "learning_rate": 7.138832505609932e-05,
      "loss": 2.8412,
      "step": 178747
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.106757879257202,
      "learning_rate": 7.138567631060924e-05,
      "loss": 2.6676,
      "step": 178748
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.989293336868286,
      "learning_rate": 7.138302760762297e-05,
      "loss": 2.9172,
      "step": 178749
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9430758953094482,
      "learning_rate": 7.138037894714082e-05,
      "loss": 3.0841,
      "step": 178750
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9340381622314453,
      "learning_rate": 7.137773032916346e-05,
      "loss": 2.8399,
      "step": 178751
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7240357398986816,
      "learning_rate": 7.137508175369131e-05,
      "loss": 2.723,
      "step": 178752
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.2312538623809814,
      "learning_rate": 7.137243322072474e-05,
      "loss": 2.8011,
      "step": 178753
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.020162582397461,
      "learning_rate": 7.136978473026445e-05,
      "loss": 2.7483,
      "step": 178754
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.781672239303589,
      "learning_rate": 7.136713628231084e-05,
      "loss": 2.7311,
      "step": 178755
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.2106308937072754,
      "learning_rate": 7.136448787686429e-05,
      "loss": 2.8463,
      "step": 178756
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.554870367050171,
      "learning_rate": 7.136183951392551e-05,
      "loss": 2.7511,
      "step": 178757
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.207215785980225,
      "learning_rate": 7.135919119349476e-05,
      "loss": 2.9472,
      "step": 178758
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.026273012161255,
      "learning_rate": 7.135654291557277e-05,
      "loss": 3.1158,
      "step": 178759
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.939100980758667,
      "learning_rate": 7.13538946801599e-05,
      "loss": 2.9691,
      "step": 178760
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5765719413757324,
      "learning_rate": 7.135124648725658e-05,
      "loss": 2.7791,
      "step": 178761
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.304342269897461,
      "learning_rate": 7.134859833686344e-05,
      "loss": 3.0416,
      "step": 178762
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.277761459350586,
      "learning_rate": 7.134595022898092e-05,
      "loss": 3.0263,
      "step": 178763
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.605034351348877,
      "learning_rate": 7.134330216360945e-05,
      "loss": 2.7898,
      "step": 178764
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.291850566864014,
      "learning_rate": 7.134065414074963e-05,
      "loss": 2.7981,
      "step": 178765
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.088308811187744,
      "learning_rate": 7.13380061604019e-05,
      "loss": 3.086,
      "step": 178766
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9791672229766846,
      "learning_rate": 7.133535822256668e-05,
      "loss": 3.0362,
      "step": 178767
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.7409794330596924,
      "learning_rate": 7.13327103272446e-05,
      "loss": 2.8948,
      "step": 178768
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.319746971130371,
      "learning_rate": 7.133006247443612e-05,
      "loss": 2.751,
      "step": 178769
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.285918712615967,
      "learning_rate": 7.132741466414157e-05,
      "loss": 2.9546,
      "step": 178770
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.043724775314331,
      "learning_rate": 7.132476689636168e-05,
      "loss": 2.8427,
      "step": 178771
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.190883159637451,
      "learning_rate": 7.132211917109684e-05,
      "loss": 2.9447,
      "step": 178772
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.839092493057251,
      "learning_rate": 7.131947148834743e-05,
      "loss": 2.9497,
      "step": 178773
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.80932354927063,
      "learning_rate": 7.131682384811414e-05,
      "loss": 3.1736,
      "step": 178774
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6746432781219482,
      "learning_rate": 7.131417625039727e-05,
      "loss": 3.1553,
      "step": 178775
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0280232429504395,
      "learning_rate": 7.131152869519753e-05,
      "loss": 3.011,
      "step": 178776
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.612973928451538,
      "learning_rate": 7.130888118251528e-05,
      "loss": 2.6842,
      "step": 178777
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.219757080078125,
      "learning_rate": 7.130623371235101e-05,
      "loss": 2.9356,
      "step": 178778
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.626660108566284,
      "learning_rate": 7.130358628470514e-05,
      "loss": 2.7576,
      "step": 178779
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.299962043762207,
      "learning_rate": 7.130093889957835e-05,
      "loss": 2.9418,
      "step": 178780
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6420512199401855,
      "learning_rate": 7.129829155697096e-05,
      "loss": 2.9094,
      "step": 178781
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8127126693725586,
      "learning_rate": 7.129564425688361e-05,
      "loss": 2.7492,
      "step": 178782
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6416940689086914,
      "learning_rate": 7.129299699931669e-05,
      "loss": 3.2022,
      "step": 178783
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.622044324874878,
      "learning_rate": 7.129034978427075e-05,
      "loss": 2.823,
      "step": 178784
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.5742135047912598,
      "learning_rate": 7.128770261174617e-05,
      "loss": 2.7549,
      "step": 178785
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1434719562530518,
      "learning_rate": 7.128505548174361e-05,
      "loss": 2.927,
      "step": 178786
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.806811571121216,
      "learning_rate": 7.128240839426335e-05,
      "loss": 2.6603,
      "step": 178787
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6501917839050293,
      "learning_rate": 7.127976134930613e-05,
      "loss": 3.046,
      "step": 178788
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.101200580596924,
      "learning_rate": 7.127711434687229e-05,
      "loss": 2.9771,
      "step": 178789
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.5589702129364014,
      "learning_rate": 7.12744673869624e-05,
      "loss": 2.7785,
      "step": 178790
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.5651931762695312,
      "learning_rate": 7.127182046957676e-05,
      "loss": 3.0824,
      "step": 178791
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.823512315750122,
      "learning_rate": 7.126917359471613e-05,
      "loss": 2.9438,
      "step": 178792
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0738375186920166,
      "learning_rate": 7.126652676238079e-05,
      "loss": 3.0019,
      "step": 178793
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6068835258483887,
      "learning_rate": 7.12638799725714e-05,
      "loss": 2.7023,
      "step": 178794
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.587873697280884,
      "learning_rate": 7.126123322528836e-05,
      "loss": 2.7599,
      "step": 178795
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.182904243469238,
      "learning_rate": 7.12585865205322e-05,
      "loss": 2.9192,
      "step": 178796
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.876638650894165,
      "learning_rate": 7.125593985830329e-05,
      "loss": 2.8233,
      "step": 178797
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.5778121948242188,
      "learning_rate": 7.12532932386023e-05,
      "loss": 3.0201,
      "step": 178798
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.602644443511963,
      "learning_rate": 7.125064666142955e-05,
      "loss": 2.6627,
      "step": 178799
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.212071657180786,
      "learning_rate": 7.124800012678571e-05,
      "loss": 2.7234,
      "step": 178800
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0554349422454834,
      "learning_rate": 7.124535363467119e-05,
      "loss": 2.9862,
      "step": 178801
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.7881598472595215,
      "learning_rate": 7.124270718508648e-05,
      "loss": 3.0134,
      "step": 178802
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.907572031021118,
      "learning_rate": 7.124006077803198e-05,
      "loss": 2.8401,
      "step": 178803
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.8534440994262695,
      "learning_rate": 7.123741441350836e-05,
      "loss": 2.8605,
      "step": 178804
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.057671546936035,
      "learning_rate": 7.123476809151593e-05,
      "loss": 2.957,
      "step": 178805
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6482925415039062,
      "learning_rate": 7.123212181205537e-05,
      "loss": 2.7962,
      "step": 178806
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.930215358734131,
      "learning_rate": 7.122947557512708e-05,
      "loss": 2.7523,
      "step": 178807
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.314422369003296,
      "learning_rate": 7.122682938073154e-05,
      "loss": 2.912,
      "step": 178808
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9786834716796875,
      "learning_rate": 7.122418322886915e-05,
      "loss": 2.9732,
      "step": 178809
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.5705692768096924,
      "learning_rate": 7.122153711954059e-05,
      "loss": 2.9313,
      "step": 178810
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.3959691524505615,
      "learning_rate": 7.121889105274622e-05,
      "loss": 2.9916,
      "step": 178811
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.350043773651123,
      "learning_rate": 7.121624502848661e-05,
      "loss": 2.979,
      "step": 178812
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.971024751663208,
      "learning_rate": 7.121359904676227e-05,
      "loss": 2.7377,
      "step": 178813
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1641290187835693,
      "learning_rate": 7.121095310757361e-05,
      "loss": 2.8228,
      "step": 178814
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.39520525932312,
      "learning_rate": 7.120830721092106e-05,
      "loss": 2.9178,
      "step": 178815
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9194912910461426,
      "learning_rate": 7.12056613568053e-05,
      "loss": 2.9983,
      "step": 178816
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0390825271606445,
      "learning_rate": 7.120301554522663e-05,
      "loss": 3.1657,
      "step": 178817
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.980595111846924,
      "learning_rate": 7.120036977618575e-05,
      "loss": 2.8855,
      "step": 178818
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.366629123687744,
      "learning_rate": 7.119772404968303e-05,
      "loss": 3.0341,
      "step": 178819
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.305678129196167,
      "learning_rate": 7.119507836571896e-05,
      "loss": 2.886,
      "step": 178820
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.2228002548217773,
      "learning_rate": 7.119243272429398e-05,
      "loss": 2.992,
      "step": 178821
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.912905216217041,
      "learning_rate": 7.118978712540873e-05,
      "loss": 3.0727,
      "step": 178822
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6524317264556885,
      "learning_rate": 7.11871415690635e-05,
      "loss": 2.9116,
      "step": 178823
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7925615310668945,
      "learning_rate": 7.1184496055259e-05,
      "loss": 2.8202,
      "step": 178824
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.3893661499023438,
      "learning_rate": 7.118185058399555e-05,
      "loss": 2.9384,
      "step": 178825
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8201448917388916,
      "learning_rate": 7.117920515527388e-05,
      "loss": 2.9704,
      "step": 178826
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.42122483253479,
      "learning_rate": 7.117655976909414e-05,
      "loss": 2.8923,
      "step": 178827
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8793082237243652,
      "learning_rate": 7.117391442545709e-05,
      "loss": 3.2907,
      "step": 178828
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.4634721279144287,
      "learning_rate": 7.117126912436301e-05,
      "loss": 2.6683,
      "step": 178829
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.668701171875,
      "learning_rate": 7.116862386581264e-05,
      "loss": 2.864,
      "step": 178830
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.593167543411255,
      "learning_rate": 7.116597864980624e-05,
      "loss": 2.8852,
      "step": 178831
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4219295978546143,
      "learning_rate": 7.116333347634457e-05,
      "loss": 3.0386,
      "step": 178832
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5937883853912354,
      "learning_rate": 7.11606883454278e-05,
      "loss": 2.8742,
      "step": 178833
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6491591930389404,
      "learning_rate": 7.115804325705664e-05,
      "loss": 3.1089,
      "step": 178834
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.71246075630188,
      "learning_rate": 7.115539821123144e-05,
      "loss": 2.8782,
      "step": 178835
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.022125720977783,
      "learning_rate": 7.115275320795288e-05,
      "loss": 3.1211,
      "step": 178836
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8725433349609375,
      "learning_rate": 7.115010824722124e-05,
      "loss": 2.8274,
      "step": 178837
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6006927490234375,
      "learning_rate": 7.114746332903721e-05,
      "loss": 3.0178,
      "step": 178838
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.099458694458008,
      "learning_rate": 7.114481845340117e-05,
      "loss": 2.9553,
      "step": 178839
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.0653486251831055,
      "learning_rate": 7.114217362031362e-05,
      "loss": 2.9899,
      "step": 178840
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1979594230651855,
      "learning_rate": 7.113952882977499e-05,
      "loss": 3.1223,
      "step": 178841
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.278637409210205,
      "learning_rate": 7.113688408178593e-05,
      "loss": 3.0893,
      "step": 178842
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.283423662185669,
      "learning_rate": 7.113423937634676e-05,
      "loss": 3.0151,
      "step": 178843
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4546308517456055,
      "learning_rate": 7.113159471345813e-05,
      "loss": 2.9327,
      "step": 178844
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1360673904418945,
      "learning_rate": 7.112895009312046e-05,
      "loss": 2.8469,
      "step": 178845
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.445349931716919,
      "learning_rate": 7.112630551533416e-05,
      "loss": 2.6692,
      "step": 178846
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.592031240463257,
      "learning_rate": 7.11236609800999e-05,
      "loss": 2.979,
      "step": 178847
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4648382663726807,
      "learning_rate": 7.112101648741802e-05,
      "loss": 2.9135,
      "step": 178848
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.20162296295166,
      "learning_rate": 7.111837203728902e-05,
      "loss": 2.7369,
      "step": 178849
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5326430797576904,
      "learning_rate": 7.111572762971351e-05,
      "loss": 2.7659,
      "step": 178850
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1155920028686523,
      "learning_rate": 7.111308326469192e-05,
      "loss": 2.777,
      "step": 178851
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6663098335266113,
      "learning_rate": 7.111043894222462e-05,
      "loss": 2.8989,
      "step": 178852
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.103481292724609,
      "learning_rate": 7.110779466231232e-05,
      "loss": 2.9349,
      "step": 178853
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.337401866912842,
      "learning_rate": 7.110515042495539e-05,
      "loss": 2.7626,
      "step": 178854
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.000610828399658,
      "learning_rate": 7.110250623015423e-05,
      "loss": 2.8792,
      "step": 178855
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4470629692077637,
      "learning_rate": 7.109986207790954e-05,
      "loss": 3.0308,
      "step": 178856
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8648746013641357,
      "learning_rate": 7.10972179682217e-05,
      "loss": 2.9614,
      "step": 178857
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8377697467803955,
      "learning_rate": 7.109457390109114e-05,
      "loss": 3.0333,
      "step": 178858
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.754361867904663,
      "learning_rate": 7.109192987651848e-05,
      "loss": 3.0854,
      "step": 178859
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.765725612640381,
      "learning_rate": 7.108928589450406e-05,
      "loss": 2.8797,
      "step": 178860
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.6484177112579346,
      "learning_rate": 7.108664195504858e-05,
      "loss": 3.0477,
      "step": 178861
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.388920783996582,
      "learning_rate": 7.10839980581524e-05,
      "loss": 2.9588,
      "step": 178862
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.465742349624634,
      "learning_rate": 7.108135420381601e-05,
      "loss": 3.133,
      "step": 178863
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.397381544113159,
      "learning_rate": 7.107871039203984e-05,
      "loss": 3.0224,
      "step": 178864
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.6134374141693115,
      "learning_rate": 7.107606662282456e-05,
      "loss": 2.9288,
      "step": 178865
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.954761505126953,
      "learning_rate": 7.107342289617046e-05,
      "loss": 3.0843,
      "step": 178866
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.62139630317688,
      "learning_rate": 7.107077921207823e-05,
      "loss": 2.761,
      "step": 178867
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.3534867763519287,
      "learning_rate": 7.106813557054827e-05,
      "loss": 2.9715,
      "step": 178868
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.3139665126800537,
      "learning_rate": 7.106549197158104e-05,
      "loss": 2.8091,
      "step": 178869
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6037304401397705,
      "learning_rate": 7.106284841517698e-05,
      "loss": 2.9937,
      "step": 178870
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.5352964401245117,
      "learning_rate": 7.106020490133676e-05,
      "loss": 3.0655,
      "step": 178871
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.6956536769866943,
      "learning_rate": 7.105756143006065e-05,
      "loss": 2.9195,
      "step": 178872
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8936197757720947,
      "learning_rate": 7.105491800134936e-05,
      "loss": 2.9579,
      "step": 178873
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.44627046585083,
      "learning_rate": 7.105227461520328e-05,
      "loss": 2.9825,
      "step": 178874
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0112791061401367,
      "learning_rate": 7.104963127162294e-05,
      "loss": 3.1359,
      "step": 178875
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.123751163482666,
      "learning_rate": 7.10469879706087e-05,
      "loss": 2.7997,
      "step": 178876
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.293483257293701,
      "learning_rate": 7.10443447121612e-05,
      "loss": 2.8213,
      "step": 178877
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0312976837158203,
      "learning_rate": 7.10417014962808e-05,
      "loss": 2.7821,
      "step": 178878
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.98724365234375,
      "learning_rate": 7.103905832296818e-05,
      "loss": 3.1716,
      "step": 178879
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.567178249359131,
      "learning_rate": 7.103641519222368e-05,
      "loss": 2.7546,
      "step": 178880
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.053513288497925,
      "learning_rate": 7.103377210404785e-05,
      "loss": 2.8568,
      "step": 178881
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.808732032775879,
      "learning_rate": 7.10311290584411e-05,
      "loss": 2.9416,
      "step": 178882
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0512192249298096,
      "learning_rate": 7.102848605540404e-05,
      "loss": 3.0592,
      "step": 178883
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7173519134521484,
      "learning_rate": 7.102584309493706e-05,
      "loss": 2.9711,
      "step": 178884
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9593608379364014,
      "learning_rate": 7.102320017704076e-05,
      "loss": 2.9951,
      "step": 178885
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.33447265625,
      "learning_rate": 7.102055730171554e-05,
      "loss": 2.8266,
      "step": 178886
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9207189083099365,
      "learning_rate": 7.101791446896197e-05,
      "loss": 2.7892,
      "step": 178887
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7457942962646484,
      "learning_rate": 7.101527167878037e-05,
      "loss": 2.96,
      "step": 178888
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.46391224861145,
      "learning_rate": 7.101262893117146e-05,
      "loss": 3.0588,
      "step": 178889
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.957183837890625,
      "learning_rate": 7.100998622613554e-05,
      "loss": 2.9743,
      "step": 178890
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4071054458618164,
      "learning_rate": 7.100734356367326e-05,
      "loss": 2.8072,
      "step": 178891
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.596618413925171,
      "learning_rate": 7.100470094378498e-05,
      "loss": 3.0391,
      "step": 178892
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.882904052734375,
      "learning_rate": 7.100205836647137e-05,
      "loss": 2.9974,
      "step": 178893
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.586634397506714,
      "learning_rate": 7.099941583173267e-05,
      "loss": 2.9961,
      "step": 178894
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.5926783084869385,
      "learning_rate": 7.09967733395696e-05,
      "loss": 2.7786,
      "step": 178895
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1002044677734375,
      "learning_rate": 7.099413088998241e-05,
      "loss": 2.9207,
      "step": 178896
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.293306827545166,
      "learning_rate": 7.099148848297184e-05,
      "loss": 2.8965,
      "step": 178897
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.892975330352783,
      "learning_rate": 7.098884611853822e-05,
      "loss": 2.8381,
      "step": 178898
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.88037371635437,
      "learning_rate": 7.098620379668224e-05,
      "loss": 2.8568,
      "step": 178899
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4411826133728027,
      "learning_rate": 7.09835615174041e-05,
      "loss": 3.2694,
      "step": 178900
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.152184009552002,
      "learning_rate": 7.09809192807045e-05,
      "loss": 2.99,
      "step": 178901
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.524943828582764,
      "learning_rate": 7.09782770865838e-05,
      "loss": 3.0345,
      "step": 178902
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.8580639362335205,
      "learning_rate": 7.097563493504262e-05,
      "loss": 2.7834,
      "step": 178903
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.983715772628784,
      "learning_rate": 7.097299282608133e-05,
      "loss": 3.0159,
      "step": 178904
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.349064588546753,
      "learning_rate": 7.097035075970066e-05,
      "loss": 2.7603,
      "step": 178905
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.596243143081665,
      "learning_rate": 7.096770873590075e-05,
      "loss": 2.9866,
      "step": 178906
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.139418125152588,
      "learning_rate": 7.096506675468236e-05,
      "loss": 2.9147,
      "step": 178907
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.210543632507324,
      "learning_rate": 7.096242481604576e-05,
      "loss": 2.9778,
      "step": 178908
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5889430046081543,
      "learning_rate": 7.09597829199917e-05,
      "loss": 3.0572,
      "step": 178909
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5429747104644775,
      "learning_rate": 7.095714106652042e-05,
      "loss": 2.708,
      "step": 178910
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.5122389793395996,
      "learning_rate": 7.095449925563274e-05,
      "loss": 2.8938,
      "step": 178911
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.541226625442505,
      "learning_rate": 7.095185748732876e-05,
      "loss": 3.1065,
      "step": 178912
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7825233936309814,
      "learning_rate": 7.094921576160921e-05,
      "loss": 2.7523,
      "step": 178913
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0593719482421875,
      "learning_rate": 7.094657407847448e-05,
      "loss": 2.8535,
      "step": 178914
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5322256088256836,
      "learning_rate": 7.094393243792517e-05,
      "loss": 2.9987,
      "step": 178915
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5041282176971436,
      "learning_rate": 7.09412908399616e-05,
      "loss": 3.0747,
      "step": 178916
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6961512565612793,
      "learning_rate": 7.093864928458456e-05,
      "loss": 2.9159,
      "step": 178917
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.206411361694336,
      "learning_rate": 7.09360077717942e-05,
      "loss": 2.9117,
      "step": 178918
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.126579761505127,
      "learning_rate": 7.093336630159123e-05,
      "loss": 2.9515,
      "step": 178919
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.744776964187622,
      "learning_rate": 7.093072487397595e-05,
      "loss": 2.9573,
      "step": 178920
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.917170524597168,
      "learning_rate": 7.092808348894908e-05,
      "loss": 2.9917,
      "step": 178921
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.292226552963257,
      "learning_rate": 7.092544214651095e-05,
      "loss": 2.9701,
      "step": 178922
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.6017396450042725,
      "learning_rate": 7.092280084666215e-05,
      "loss": 3.0915,
      "step": 178923
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.454033851623535,
      "learning_rate": 7.092015958940314e-05,
      "loss": 2.9117,
      "step": 178924
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5148122310638428,
      "learning_rate": 7.09175183747344e-05,
      "loss": 3.1249,
      "step": 178925
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.090259075164795,
      "learning_rate": 7.091487720265632e-05,
      "loss": 2.9429,
      "step": 178926
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0357909202575684,
      "learning_rate": 7.09122360731696e-05,
      "loss": 2.8628,
      "step": 178927
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.496159553527832,
      "learning_rate": 7.090959498627452e-05,
      "loss": 3.0141,
      "step": 178928
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.022893190383911,
      "learning_rate": 7.090695394197174e-05,
      "loss": 2.9311,
      "step": 178929
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7206666469573975,
      "learning_rate": 7.090431294026171e-05,
      "loss": 2.8875,
      "step": 178930
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.810864210128784,
      "learning_rate": 7.090167198114489e-05,
      "loss": 2.9405,
      "step": 178931
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6191787719726562,
      "learning_rate": 7.089903106462165e-05,
      "loss": 3.0689,
      "step": 178932
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7804675102233887,
      "learning_rate": 7.089639019069271e-05,
      "loss": 3.0779,
      "step": 178933
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7269368171691895,
      "learning_rate": 7.089374935935839e-05,
      "loss": 2.9131,
      "step": 178934
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1978328227996826,
      "learning_rate": 7.089110857061931e-05,
      "loss": 2.875,
      "step": 178935
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.901949644088745,
      "learning_rate": 7.088846782447592e-05,
      "loss": 3.0339,
      "step": 178936
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.26155161857605,
      "learning_rate": 7.08858271209286e-05,
      "loss": 2.9777,
      "step": 178937
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.3643710613250732,
      "learning_rate": 7.0883186459978e-05,
      "loss": 2.9494,
      "step": 178938
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5477144718170166,
      "learning_rate": 7.088054584162454e-05,
      "loss": 3.1626,
      "step": 178939
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7990736961364746,
      "learning_rate": 7.087790526586863e-05,
      "loss": 2.9251,
      "step": 178940
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.452467918395996,
      "learning_rate": 7.087526473271093e-05,
      "loss": 3.0878,
      "step": 178941
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.649221420288086,
      "learning_rate": 7.087262424215183e-05,
      "loss": 2.7531,
      "step": 178942
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9916229248046875,
      "learning_rate": 7.086998379419176e-05,
      "loss": 2.9308,
      "step": 178943
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.561363935470581,
      "learning_rate": 7.086734338883136e-05,
      "loss": 2.8765,
      "step": 178944
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.3159775733947754,
      "learning_rate": 7.0864703026071e-05,
      "loss": 2.8406,
      "step": 178945
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.515408515930176,
      "learning_rate": 7.086206270591126e-05,
      "loss": 2.8336,
      "step": 178946
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6650073528289795,
      "learning_rate": 7.085942242835261e-05,
      "loss": 3.1624,
      "step": 178947
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.995464563369751,
      "learning_rate": 7.085678219339552e-05,
      "loss": 3.1153,
      "step": 178948
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.008741617202759,
      "learning_rate": 7.085414200104037e-05,
      "loss": 2.7147,
      "step": 178949
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0190632343292236,
      "learning_rate": 7.085150185128788e-05,
      "loss": 3.0179,
      "step": 178950
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7198688983917236,
      "learning_rate": 7.084886174413832e-05,
      "loss": 2.7871,
      "step": 178951
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.209206581115723,
      "learning_rate": 7.084622167959235e-05,
      "loss": 2.8921,
      "step": 178952
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.645277976989746,
      "learning_rate": 7.084358165765042e-05,
      "loss": 3.0561,
      "step": 178953
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.733218193054199,
      "learning_rate": 7.084094167831298e-05,
      "loss": 2.8109,
      "step": 178954
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6713757514953613,
      "learning_rate": 7.083830174158047e-05,
      "loss": 3.1514,
      "step": 178955
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.9141464233398438,
      "learning_rate": 7.083566184745349e-05,
      "loss": 3.0291,
      "step": 178956
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5257456302642822,
      "learning_rate": 7.083302199593241e-05,
      "loss": 2.7937,
      "step": 178957
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.498554229736328,
      "learning_rate": 7.083038218701793e-05,
      "loss": 2.6667,
      "step": 178958
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.2212624549865723,
      "learning_rate": 7.082774242071028e-05,
      "loss": 3.1341,
      "step": 178959
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.604498863220215,
      "learning_rate": 7.082510269701025e-05,
      "loss": 3.0395,
      "step": 178960
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.813178539276123,
      "learning_rate": 7.082246301591801e-05,
      "loss": 2.852,
      "step": 178961
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.8661293983459473,
      "learning_rate": 7.081982337743425e-05,
      "loss": 2.8663,
      "step": 178962
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8060529232025146,
      "learning_rate": 7.081718378155934e-05,
      "loss": 2.9996,
      "step": 178963
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6690056324005127,
      "learning_rate": 7.081454422829394e-05,
      "loss": 2.7629,
      "step": 178964
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8055078983306885,
      "learning_rate": 7.081190471763837e-05,
      "loss": 3.1613,
      "step": 178965
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.570502758026123,
      "learning_rate": 7.080926524959333e-05,
      "loss": 2.769,
      "step": 178966
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.52064847946167,
      "learning_rate": 7.080662582415903e-05,
      "loss": 3.0337,
      "step": 178967
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.549271821975708,
      "learning_rate": 7.080398644133616e-05,
      "loss": 2.8867,
      "step": 178968
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0384647846221924,
      "learning_rate": 7.08013471011251e-05,
      "loss": 3.051,
      "step": 178969
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6064324378967285,
      "learning_rate": 7.079870780352646e-05,
      "loss": 2.8942,
      "step": 178970
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.468580484390259,
      "learning_rate": 7.079606854854061e-05,
      "loss": 2.9534,
      "step": 178971
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.372055768966675,
      "learning_rate": 7.079342933616823e-05,
      "loss": 2.8015,
      "step": 178972
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.096517086029053,
      "learning_rate": 7.079079016640952e-05,
      "loss": 2.8811,
      "step": 178973
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.6983978748321533,
      "learning_rate": 7.078815103926524e-05,
      "loss": 2.6983,
      "step": 178974
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.508615255355835,
      "learning_rate": 7.078551195473567e-05,
      "loss": 3.0402,
      "step": 178975
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1046929359436035,
      "learning_rate": 7.078287291282147e-05,
      "loss": 2.7709,
      "step": 178976
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.4619076251983643,
      "learning_rate": 7.078023391352297e-05,
      "loss": 2.7655,
      "step": 178977
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.157414436340332,
      "learning_rate": 7.077759495684097e-05,
      "loss": 3.0093,
      "step": 178978
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.42073917388916,
      "learning_rate": 7.077495604277553e-05,
      "loss": 3.1904,
      "step": 178979
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5008974075317383,
      "learning_rate": 7.077231717132744e-05,
      "loss": 2.949,
      "step": 178980
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.495492458343506,
      "learning_rate": 7.076967834249703e-05,
      "loss": 2.7709,
      "step": 178981
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.89371657371521,
      "learning_rate": 7.076703955628496e-05,
      "loss": 2.7597,
      "step": 178982
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.356321334838867,
      "learning_rate": 7.07644008126915e-05,
      "loss": 2.9755,
      "step": 178983
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.572376251220703,
      "learning_rate": 7.076176211171747e-05,
      "loss": 3.1246,
      "step": 178984
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.290424346923828,
      "learning_rate": 7.075912345336295e-05,
      "loss": 2.9323,
      "step": 178985
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4148457050323486,
      "learning_rate": 7.075648483762878e-05,
      "loss": 2.9132,
      "step": 178986
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0096609592437744,
      "learning_rate": 7.075384626451517e-05,
      "loss": 3.0285,
      "step": 178987
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6347391605377197,
      "learning_rate": 7.075120773402286e-05,
      "loss": 2.9312,
      "step": 178988
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.641772508621216,
      "learning_rate": 7.074856924615214e-05,
      "loss": 2.9069,
      "step": 178989
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.855518102645874,
      "learning_rate": 7.074593080090375e-05,
      "loss": 3.0269,
      "step": 178990
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.845478534698486,
      "learning_rate": 7.074329239827784e-05,
      "loss": 2.9452,
      "step": 178991
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.188754081726074,
      "learning_rate": 7.074065403827518e-05,
      "loss": 3.1479,
      "step": 178992
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.311555862426758,
      "learning_rate": 7.073801572089607e-05,
      "loss": 2.9321,
      "step": 178993
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6538944244384766,
      "learning_rate": 7.073537744614118e-05,
      "loss": 2.8357,
      "step": 178994
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.508608102798462,
      "learning_rate": 7.073273921401082e-05,
      "loss": 2.9647,
      "step": 178995
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.5124099254608154,
      "learning_rate": 7.073010102450572e-05,
      "loss": 2.8314,
      "step": 178996
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.4784343242645264,
      "learning_rate": 7.072746287762607e-05,
      "loss": 2.9625,
      "step": 178997
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4794347286224365,
      "learning_rate": 7.072482477337263e-05,
      "loss": 2.9753,
      "step": 178998
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.2265896797180176,
      "learning_rate": 7.072218671174564e-05,
      "loss": 3.0913,
      "step": 178999
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1342387199401855,
      "learning_rate": 7.071954869274583e-05,
      "loss": 2.743,
      "step": 179000
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.4066431522369385,
      "learning_rate": 7.071691071637348e-05,
      "loss": 3.0698,
      "step": 179001
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.08141827583313,
      "learning_rate": 7.071427278262937e-05,
      "loss": 3.0028,
      "step": 179002
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.6753387451171875,
      "learning_rate": 7.071163489151366e-05,
      "loss": 2.8964,
      "step": 179003
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6563141345977783,
      "learning_rate": 7.070899704302707e-05,
      "loss": 2.861,
      "step": 179004
    },
    {
      "epoch": 2.33,
      "grad_norm": 6.292959690093994,
      "learning_rate": 7.070635923716989e-05,
      "loss": 2.8835,
      "step": 179005
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.222620964050293,
      "learning_rate": 7.070372147394281e-05,
      "loss": 2.7336,
      "step": 179006
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.803752899169922,
      "learning_rate": 7.070108375334614e-05,
      "loss": 2.9938,
      "step": 179007
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.576946496963501,
      "learning_rate": 7.069844607538059e-05,
      "loss": 3.0647,
      "step": 179008
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.458460569381714,
      "learning_rate": 7.069580844004651e-05,
      "loss": 2.9226,
      "step": 179009
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.4389524459838867,
      "learning_rate": 7.069317084734439e-05,
      "loss": 2.8369,
      "step": 179010
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.5752007961273193,
      "learning_rate": 7.069053329727467e-05,
      "loss": 3.0522,
      "step": 179011
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.364915370941162,
      "learning_rate": 7.068789578983799e-05,
      "loss": 2.8287,
      "step": 179012
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6703004837036133,
      "learning_rate": 7.06852583250347e-05,
      "loss": 2.8239,
      "step": 179013
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.912679433822632,
      "learning_rate": 7.06826209028654e-05,
      "loss": 2.9314,
      "step": 179014
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.2296695709228516,
      "learning_rate": 7.067998352333055e-05,
      "loss": 3.1188,
      "step": 179015
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9253451824188232,
      "learning_rate": 7.067734618643061e-05,
      "loss": 2.8358,
      "step": 179016
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9106321334838867,
      "learning_rate": 7.0674708892166e-05,
      "loss": 2.9575,
      "step": 179017
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.409329414367676,
      "learning_rate": 7.067207164053735e-05,
      "loss": 2.901,
      "step": 179018
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.376238822937012,
      "learning_rate": 7.066943443154503e-05,
      "loss": 3.2383,
      "step": 179019
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.752169609069824,
      "learning_rate": 7.066679726518968e-05,
      "loss": 2.8933,
      "step": 179020
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.432286500930786,
      "learning_rate": 7.066416014147168e-05,
      "loss": 2.881,
      "step": 179021
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.38191294670105,
      "learning_rate": 7.066152306039147e-05,
      "loss": 2.7722,
      "step": 179022
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5499796867370605,
      "learning_rate": 7.06588860219497e-05,
      "loss": 2.9901,
      "step": 179023
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.437809944152832,
      "learning_rate": 7.065624902614679e-05,
      "loss": 2.8899,
      "step": 179024
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.3123321533203125,
      "learning_rate": 7.065361207298309e-05,
      "loss": 3.1588,
      "step": 179025
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.141036510467529,
      "learning_rate": 7.065097516245932e-05,
      "loss": 2.9159,
      "step": 179026
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.3328380584716797,
      "learning_rate": 7.064833829457584e-05,
      "loss": 2.6892,
      "step": 179027
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.613724946975708,
      "learning_rate": 7.064570146933313e-05,
      "loss": 2.7158,
      "step": 179028
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8653271198272705,
      "learning_rate": 7.064306468673177e-05,
      "loss": 3.0612,
      "step": 179029
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.195547342300415,
      "learning_rate": 7.064042794677216e-05,
      "loss": 2.9226,
      "step": 179030
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6985714435577393,
      "learning_rate": 7.063779124945477e-05,
      "loss": 3.041,
      "step": 179031
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.809142827987671,
      "learning_rate": 7.063515459478025e-05,
      "loss": 2.9187,
      "step": 179032
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.045686721801758,
      "learning_rate": 7.063251798274895e-05,
      "loss": 2.9637,
      "step": 179033
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.168738842010498,
      "learning_rate": 7.062988141336133e-05,
      "loss": 2.9914,
      "step": 179034
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.235924005508423,
      "learning_rate": 7.062724488661801e-05,
      "loss": 3.1819,
      "step": 179035
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.706007242202759,
      "learning_rate": 7.062460840251933e-05,
      "loss": 2.8388,
      "step": 179036
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.433481216430664,
      "learning_rate": 7.062197196106598e-05,
      "loss": 2.7963,
      "step": 179037
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.138162612915039,
      "learning_rate": 7.061933556225831e-05,
      "loss": 2.9434,
      "step": 179038
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.3307690620422363,
      "learning_rate": 7.061669920609683e-05,
      "loss": 2.8156,
      "step": 179039
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8581764698028564,
      "learning_rate": 7.061406289258198e-05,
      "loss": 3.1834,
      "step": 179040
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.145766019821167,
      "learning_rate": 7.061142662171437e-05,
      "loss": 3.1136,
      "step": 179041
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.917677640914917,
      "learning_rate": 7.060879039349432e-05,
      "loss": 3.0461,
      "step": 179042
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5739781856536865,
      "learning_rate": 7.060615420792255e-05,
      "loss": 2.902,
      "step": 179043
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7901337146759033,
      "learning_rate": 7.060351806499933e-05,
      "loss": 2.8166,
      "step": 179044
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8588545322418213,
      "learning_rate": 7.06008819647254e-05,
      "loss": 3.0033,
      "step": 179045
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0828657150268555,
      "learning_rate": 7.059824590710095e-05,
      "loss": 3.0184,
      "step": 179046
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.736415147781372,
      "learning_rate": 7.059560989212668e-05,
      "loss": 2.996,
      "step": 179047
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5452332496643066,
      "learning_rate": 7.059297391980294e-05,
      "loss": 2.7875,
      "step": 179048
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.730876922607422,
      "learning_rate": 7.059033799013039e-05,
      "loss": 3.0191,
      "step": 179049
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0696780681610107,
      "learning_rate": 7.058770210310934e-05,
      "loss": 3.0112,
      "step": 179050
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5070223808288574,
      "learning_rate": 7.058506625874051e-05,
      "loss": 2.95,
      "step": 179051
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.261695384979248,
      "learning_rate": 7.058243045702412e-05,
      "loss": 3.013,
      "step": 179052
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.590670585632324,
      "learning_rate": 7.057979469796084e-05,
      "loss": 2.9103,
      "step": 179053
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.076591730117798,
      "learning_rate": 7.057715898155101e-05,
      "loss": 2.9809,
      "step": 179054
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.3120338916778564,
      "learning_rate": 7.057452330779534e-05,
      "loss": 2.9914,
      "step": 179055
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.883242607116699,
      "learning_rate": 7.057188767669411e-05,
      "loss": 3.1343,
      "step": 179056
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.253068208694458,
      "learning_rate": 7.056925208824806e-05,
      "loss": 2.998,
      "step": 179057
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.527707576751709,
      "learning_rate": 7.056661654245731e-05,
      "loss": 3.0652,
      "step": 179058
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.5104963779449463,
      "learning_rate": 7.056398103932268e-05,
      "loss": 2.729,
      "step": 179059
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.3995635509490967,
      "learning_rate": 7.056134557884443e-05,
      "loss": 3.1608,
      "step": 179060
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.9242312908172607,
      "learning_rate": 7.055871016102326e-05,
      "loss": 2.9899,
      "step": 179061
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1989455223083496,
      "learning_rate": 7.055607478585946e-05,
      "loss": 3.0676,
      "step": 179062
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.780485153198242,
      "learning_rate": 7.05534394533538e-05,
      "loss": 2.8067,
      "step": 179063
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8937695026397705,
      "learning_rate": 7.055080416350641e-05,
      "loss": 2.8322,
      "step": 179064
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1956470012664795,
      "learning_rate": 7.054816891631804e-05,
      "loss": 3.1533,
      "step": 179065
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.2763662338256836,
      "learning_rate": 7.054553371178902e-05,
      "loss": 2.874,
      "step": 179066
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.200754642486572,
      "learning_rate": 7.054289854992e-05,
      "loss": 2.9855,
      "step": 179067
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.767367362976074,
      "learning_rate": 7.05402634307113e-05,
      "loss": 2.987,
      "step": 179068
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7026617527008057,
      "learning_rate": 7.053762835416363e-05,
      "loss": 2.9603,
      "step": 179069
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.451495885848999,
      "learning_rate": 7.053499332027721e-05,
      "loss": 2.8762,
      "step": 179070
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.3484067916870117,
      "learning_rate": 7.053235832905276e-05,
      "loss": 2.8525,
      "step": 179071
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8399837017059326,
      "learning_rate": 7.052972338049059e-05,
      "loss": 2.8742,
      "step": 179072
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.440481424331665,
      "learning_rate": 7.052708847459135e-05,
      "loss": 2.9141,
      "step": 179073
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.6321098804473877,
      "learning_rate": 7.052445361135536e-05,
      "loss": 2.8766,
      "step": 179074
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.489305257797241,
      "learning_rate": 7.05218187907834e-05,
      "loss": 3.0956,
      "step": 179075
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.036515712738037,
      "learning_rate": 7.051918401287558e-05,
      "loss": 2.964,
      "step": 179076
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.520306348800659,
      "learning_rate": 7.051654927763267e-05,
      "loss": 2.7706,
      "step": 179077
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5632684230804443,
      "learning_rate": 7.051391458505497e-05,
      "loss": 3.0277,
      "step": 179078
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.16019606590271,
      "learning_rate": 7.051127993514316e-05,
      "loss": 2.7866,
      "step": 179079
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8916916847229004,
      "learning_rate": 7.050864532789753e-05,
      "loss": 2.9079,
      "step": 179080
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0716185569763184,
      "learning_rate": 7.050601076331887e-05,
      "loss": 2.7479,
      "step": 179081
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.642371892929077,
      "learning_rate": 7.05033762414073e-05,
      "loss": 2.7695,
      "step": 179082
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.865969657897949,
      "learning_rate": 7.050074176216355e-05,
      "loss": 2.7397,
      "step": 179083
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0180187225341797,
      "learning_rate": 7.0498107325588e-05,
      "loss": 2.9729,
      "step": 179084
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.78078293800354,
      "learning_rate": 7.049547293168124e-05,
      "loss": 3.0266,
      "step": 179085
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.8199899196624756,
      "learning_rate": 7.049283858044359e-05,
      "loss": 2.8647,
      "step": 179086
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.673274040222168,
      "learning_rate": 7.049020427187587e-05,
      "loss": 2.8077,
      "step": 179087
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.664588212966919,
      "learning_rate": 7.048757000597816e-05,
      "loss": 2.9013,
      "step": 179088
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.130078077316284,
      "learning_rate": 7.048493578275124e-05,
      "loss": 3.014,
      "step": 179089
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7096290588378906,
      "learning_rate": 7.048230160219543e-05,
      "loss": 3.2079,
      "step": 179090
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8405418395996094,
      "learning_rate": 7.047966746431133e-05,
      "loss": 2.8692,
      "step": 179091
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.667790412902832,
      "learning_rate": 7.047703336909935e-05,
      "loss": 2.9164,
      "step": 179092
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.6032967567443848,
      "learning_rate": 7.04743993165601e-05,
      "loss": 2.7972,
      "step": 179093
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6269142627716064,
      "learning_rate": 7.047176530669398e-05,
      "loss": 3.057,
      "step": 179094
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.722796678543091,
      "learning_rate": 7.046913133950152e-05,
      "loss": 2.8843,
      "step": 179095
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.78848934173584,
      "learning_rate": 7.046649741498308e-05,
      "loss": 2.7726,
      "step": 179096
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6788060665130615,
      "learning_rate": 7.046386353313932e-05,
      "loss": 3.0142,
      "step": 179097
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.260716915130615,
      "learning_rate": 7.046122969397058e-05,
      "loss": 2.7306,
      "step": 179098
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.850388526916504,
      "learning_rate": 7.045859589747754e-05,
      "loss": 2.9708,
      "step": 179099
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.109377861022949,
      "learning_rate": 7.045596214366054e-05,
      "loss": 2.9061,
      "step": 179100
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.5696563720703125,
      "learning_rate": 7.045332843252012e-05,
      "loss": 3.1768,
      "step": 179101
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9473767280578613,
      "learning_rate": 7.045069476405672e-05,
      "loss": 3.0935,
      "step": 179102
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7971127033233643,
      "learning_rate": 7.044806113827089e-05,
      "loss": 2.8611,
      "step": 179103
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.238593101501465,
      "learning_rate": 7.044542755516307e-05,
      "loss": 2.8111,
      "step": 179104
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.7250657081604,
      "learning_rate": 7.044279401473383e-05,
      "loss": 2.9953,
      "step": 179105
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.937991619110107,
      "learning_rate": 7.04401605169836e-05,
      "loss": 2.9833,
      "step": 179106
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.043046474456787,
      "learning_rate": 7.043752706191282e-05,
      "loss": 2.9106,
      "step": 179107
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.83482027053833,
      "learning_rate": 7.043489364952211e-05,
      "loss": 3.0191,
      "step": 179108
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.879796504974365,
      "learning_rate": 7.043226027981188e-05,
      "loss": 2.7875,
      "step": 179109
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1905322074890137,
      "learning_rate": 7.042962695278255e-05,
      "loss": 2.7329,
      "step": 179110
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.836336851119995,
      "learning_rate": 7.042699366843477e-05,
      "loss": 2.677,
      "step": 179111
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.4082396030426025,
      "learning_rate": 7.042436042676896e-05,
      "loss": 3.035,
      "step": 179112
    },
    {
      "epoch": 2.33,
      "grad_norm": 5.159844875335693,
      "learning_rate": 7.042172722778546e-05,
      "loss": 2.8654,
      "step": 179113
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.9804489612579346,
      "learning_rate": 7.041909407148502e-05,
      "loss": 2.9095,
      "step": 179114
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7393364906311035,
      "learning_rate": 7.041646095786801e-05,
      "loss": 2.9139,
      "step": 179115
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5943098068237305,
      "learning_rate": 7.041382788693482e-05,
      "loss": 2.5569,
      "step": 179116
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.806697368621826,
      "learning_rate": 7.04111948586861e-05,
      "loss": 2.913,
      "step": 179117
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0203657150268555,
      "learning_rate": 7.040856187312228e-05,
      "loss": 2.9645,
      "step": 179118
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.886646270751953,
      "learning_rate": 7.040592893024376e-05,
      "loss": 2.9437,
      "step": 179119
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8200693130493164,
      "learning_rate": 7.040329603005123e-05,
      "loss": 3.1029,
      "step": 179120
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5185978412628174,
      "learning_rate": 7.040066317254495e-05,
      "loss": 2.5358,
      "step": 179121
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7857460975646973,
      "learning_rate": 7.039803035772558e-05,
      "loss": 2.9745,
      "step": 179122
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.2298786640167236,
      "learning_rate": 7.039539758559358e-05,
      "loss": 3.0432,
      "step": 179123
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.2537474632263184,
      "learning_rate": 7.03927648561494e-05,
      "loss": 2.892,
      "step": 179124
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.312096118927002,
      "learning_rate": 7.039013216939347e-05,
      "loss": 2.9861,
      "step": 179125
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7484652996063232,
      "learning_rate": 7.038749952532642e-05,
      "loss": 3.0337,
      "step": 179126
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.746837615966797,
      "learning_rate": 7.038486692394856e-05,
      "loss": 2.876,
      "step": 179127
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.290233612060547,
      "learning_rate": 7.038223436526062e-05,
      "loss": 3.2147,
      "step": 179128
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.952482223510742,
      "learning_rate": 7.037960184926293e-05,
      "loss": 2.7909,
      "step": 179129
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5944504737854004,
      "learning_rate": 7.037696937595599e-05,
      "loss": 3.0702,
      "step": 179130
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.678752422332764,
      "learning_rate": 7.037433694534026e-05,
      "loss": 2.9219,
      "step": 179131
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8886733055114746,
      "learning_rate": 7.037170455741632e-05,
      "loss": 3.0605,
      "step": 179132
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4458229541778564,
      "learning_rate": 7.036907221218456e-05,
      "loss": 3.0192,
      "step": 179133
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.68772554397583,
      "learning_rate": 7.036643990964559e-05,
      "loss": 2.9227,
      "step": 179134
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5976948738098145,
      "learning_rate": 7.036380764979976e-05,
      "loss": 2.9751,
      "step": 179135
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.017730236053467,
      "learning_rate": 7.036117543264779e-05,
      "loss": 2.9549,
      "step": 179136
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.576897382736206,
      "learning_rate": 7.035854325818986e-05,
      "loss": 2.7834,
      "step": 179137
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.189375638961792,
      "learning_rate": 7.03559111264267e-05,
      "loss": 3.0333,
      "step": 179138
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.10794997215271,
      "learning_rate": 7.035327903735862e-05,
      "loss": 2.8861,
      "step": 179139
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.016136646270752,
      "learning_rate": 7.035064699098626e-05,
      "loss": 3.0546,
      "step": 179140
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.2771248817443848,
      "learning_rate": 7.034801498730999e-05,
      "loss": 3.0253,
      "step": 179141
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.4093337059020996,
      "learning_rate": 7.034538302633052e-05,
      "loss": 2.8956,
      "step": 179142
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.6994221210479736,
      "learning_rate": 7.034275110804802e-05,
      "loss": 2.9612,
      "step": 179143
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7773258686065674,
      "learning_rate": 7.034011923246321e-05,
      "loss": 2.9611,
      "step": 179144
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.9123857021331787,
      "learning_rate": 7.033748739957642e-05,
      "loss": 2.8913,
      "step": 179145
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.979109525680542,
      "learning_rate": 7.033485560938835e-05,
      "loss": 3.0351,
      "step": 179146
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7689664363861084,
      "learning_rate": 7.033222386189924e-05,
      "loss": 3.413,
      "step": 179147
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.634424924850464,
      "learning_rate": 7.032959215710988e-05,
      "loss": 2.9165,
      "step": 179148
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4018030166625977,
      "learning_rate": 7.03269604950204e-05,
      "loss": 3.1271,
      "step": 179149
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5537362098693848,
      "learning_rate": 7.03243288756316e-05,
      "loss": 2.8526,
      "step": 179150
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.168121337890625,
      "learning_rate": 7.032169729894373e-05,
      "loss": 2.9711,
      "step": 179151
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7299511432647705,
      "learning_rate": 7.031906576495749e-05,
      "loss": 2.9115,
      "step": 179152
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.671020746231079,
      "learning_rate": 7.031643427367317e-05,
      "loss": 2.9914,
      "step": 179153
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4056644439697266,
      "learning_rate": 7.031380282509154e-05,
      "loss": 2.9284,
      "step": 179154
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.837103843688965,
      "learning_rate": 7.031117141921274e-05,
      "loss": 2.8531,
      "step": 179155
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4840505123138428,
      "learning_rate": 7.030854005603752e-05,
      "loss": 2.9218,
      "step": 179156
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.23860502243042,
      "learning_rate": 7.030590873556618e-05,
      "loss": 2.8699,
      "step": 179157
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.6647143363952637,
      "learning_rate": 7.03032774577994e-05,
      "loss": 3.1512,
      "step": 179158
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.41711163520813,
      "learning_rate": 7.030064622273747e-05,
      "loss": 2.9457,
      "step": 179159
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4540162086486816,
      "learning_rate": 7.029801503038111e-05,
      "loss": 2.7945,
      "step": 179160
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.889174222946167,
      "learning_rate": 7.029538388073065e-05,
      "loss": 3.0749,
      "step": 179161
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.451453685760498,
      "learning_rate": 7.029275277378663e-05,
      "loss": 3.3471,
      "step": 179162
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.660106897354126,
      "learning_rate": 7.029012170954945e-05,
      "loss": 2.9562,
      "step": 179163
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.307011842727661,
      "learning_rate": 7.028749068801974e-05,
      "loss": 2.6835,
      "step": 179164
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.15655255317688,
      "learning_rate": 7.028485970919784e-05,
      "loss": 2.8153,
      "step": 179165
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.844682216644287,
      "learning_rate": 7.028222877308441e-05,
      "loss": 3.295,
      "step": 179166
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.2885515689849854,
      "learning_rate": 7.027959787967984e-05,
      "loss": 3.1553,
      "step": 179167
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.709193468093872,
      "learning_rate": 7.027696702898464e-05,
      "loss": 2.8234,
      "step": 179168
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8888540267944336,
      "learning_rate": 7.027433622099919e-05,
      "loss": 3.2247,
      "step": 179169
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.63183856010437,
      "learning_rate": 7.02717054557242e-05,
      "loss": 2.8578,
      "step": 179170
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.987818717956543,
      "learning_rate": 7.02690747331599e-05,
      "loss": 2.8279,
      "step": 179171
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.3924622535705566,
      "learning_rate": 7.026644405330702e-05,
      "loss": 3.0406,
      "step": 179172
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.453559637069702,
      "learning_rate": 7.026381341616595e-05,
      "loss": 2.8999,
      "step": 179173
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.7850897312164307,
      "learning_rate": 7.026118282173717e-05,
      "loss": 3.1085,
      "step": 179174
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.270517110824585,
      "learning_rate": 7.025855227002109e-05,
      "loss": 2.8646,
      "step": 179175
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0155322551727295,
      "learning_rate": 7.025592176101837e-05,
      "loss": 2.7769,
      "step": 179176
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.1581668853759766,
      "learning_rate": 7.025329129472931e-05,
      "loss": 2.9364,
      "step": 179177
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6463587284088135,
      "learning_rate": 7.025066087115462e-05,
      "loss": 2.941,
      "step": 179178
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6800224781036377,
      "learning_rate": 7.024803049029462e-05,
      "loss": 3.0702,
      "step": 179179
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8967387676239014,
      "learning_rate": 7.02454001521499e-05,
      "loss": 3.0205,
      "step": 179180
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4781317710876465,
      "learning_rate": 7.024276985672077e-05,
      "loss": 2.8427,
      "step": 179181
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4910268783569336,
      "learning_rate": 7.024013960400794e-05,
      "loss": 2.8662,
      "step": 179182
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6316604614257812,
      "learning_rate": 7.023750939401174e-05,
      "loss": 2.9087,
      "step": 179183
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4358346462249756,
      "learning_rate": 7.02348792267328e-05,
      "loss": 3.1061,
      "step": 179184
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.698032855987549,
      "learning_rate": 7.023224910217152e-05,
      "loss": 3.0148,
      "step": 179185
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9705450534820557,
      "learning_rate": 7.022961902032841e-05,
      "loss": 3.0412,
      "step": 179186
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.73677921295166,
      "learning_rate": 7.022698898120389e-05,
      "loss": 2.8826,
      "step": 179187
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.7092666625976562,
      "learning_rate": 7.022435898479856e-05,
      "loss": 3.055,
      "step": 179188
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.971370220184326,
      "learning_rate": 7.02217290311128e-05,
      "loss": 3.1655,
      "step": 179189
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.368598222732544,
      "learning_rate": 7.02190991201472e-05,
      "loss": 3.0016,
      "step": 179190
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.714510440826416,
      "learning_rate": 7.021646925190226e-05,
      "loss": 3.096,
      "step": 179191
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.5350630283355713,
      "learning_rate": 7.021383942637838e-05,
      "loss": 3.1163,
      "step": 179192
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.621112585067749,
      "learning_rate": 7.021120964357603e-05,
      "loss": 3.0338,
      "step": 179193
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.25744891166687,
      "learning_rate": 7.02085799034958e-05,
      "loss": 2.9572,
      "step": 179194
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.124175548553467,
      "learning_rate": 7.02059502061381e-05,
      "loss": 3.1165,
      "step": 179195
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.58685040473938,
      "learning_rate": 7.020332055150348e-05,
      "loss": 2.7927,
      "step": 179196
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5590245723724365,
      "learning_rate": 7.020069093959243e-05,
      "loss": 2.8239,
      "step": 179197
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.658254623413086,
      "learning_rate": 7.019806137040533e-05,
      "loss": 2.7513,
      "step": 179198
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6110877990722656,
      "learning_rate": 7.019543184394282e-05,
      "loss": 3.2264,
      "step": 179199
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.198233127593994,
      "learning_rate": 7.019280236020532e-05,
      "loss": 2.7884,
      "step": 179200
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9905102252960205,
      "learning_rate": 7.019017291919322e-05,
      "loss": 3.1685,
      "step": 179201
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.053040981292725,
      "learning_rate": 7.018754352090722e-05,
      "loss": 3.0752,
      "step": 179202
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.999122142791748,
      "learning_rate": 7.018491416534767e-05,
      "loss": 3.0258,
      "step": 179203
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1068332195281982,
      "learning_rate": 7.018228485251502e-05,
      "loss": 3.0839,
      "step": 179204
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.446361780166626,
      "learning_rate": 7.017965558240987e-05,
      "loss": 2.8479,
      "step": 179205
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.713651657104492,
      "learning_rate": 7.017702635503259e-05,
      "loss": 3.052,
      "step": 179206
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.420182943344116,
      "learning_rate": 7.017439717038383e-05,
      "loss": 3.0048,
      "step": 179207
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6381614208221436,
      "learning_rate": 7.017176802846399e-05,
      "loss": 2.7761,
      "step": 179208
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.08563232421875,
      "learning_rate": 7.016913892927353e-05,
      "loss": 2.8022,
      "step": 179209
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.3031208515167236,
      "learning_rate": 7.016650987281293e-05,
      "loss": 2.9388,
      "step": 179210
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.446681022644043,
      "learning_rate": 7.016388085908275e-05,
      "loss": 2.8092,
      "step": 179211
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.462127208709717,
      "learning_rate": 7.01612518880834e-05,
      "loss": 3.0697,
      "step": 179212
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4875435829162598,
      "learning_rate": 7.015862295981548e-05,
      "loss": 3.0438,
      "step": 179213
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.421908378601074,
      "learning_rate": 7.015599407427942e-05,
      "loss": 2.8412,
      "step": 179214
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8068065643310547,
      "learning_rate": 7.015336523147568e-05,
      "loss": 2.7959,
      "step": 179215
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.768775701522827,
      "learning_rate": 7.015073643140469e-05,
      "loss": 2.9568,
      "step": 179216
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6288399696350098,
      "learning_rate": 7.014810767406711e-05,
      "loss": 2.7392,
      "step": 179217
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6627609729766846,
      "learning_rate": 7.014547895946322e-05,
      "loss": 3.138,
      "step": 179218
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6873655319213867,
      "learning_rate": 7.014285028759375e-05,
      "loss": 3.1475,
      "step": 179219
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.814732551574707,
      "learning_rate": 7.014022165845895e-05,
      "loss": 2.8205,
      "step": 179220
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.174140930175781,
      "learning_rate": 7.013759307205961e-05,
      "loss": 2.7076,
      "step": 179221
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9055991172790527,
      "learning_rate": 7.013496452839585e-05,
      "loss": 2.8732,
      "step": 179222
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.924821376800537,
      "learning_rate": 7.013233602746842e-05,
      "loss": 2.905,
      "step": 179223
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.266791820526123,
      "learning_rate": 7.012970756927766e-05,
      "loss": 2.8351,
      "step": 179224
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4138896465301514,
      "learning_rate": 7.01270791538242e-05,
      "loss": 2.7691,
      "step": 179225
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.3534483909606934,
      "learning_rate": 7.012445078110837e-05,
      "loss": 2.6575,
      "step": 179226
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.233445167541504,
      "learning_rate": 7.012182245113084e-05,
      "loss": 2.9574,
      "step": 179227
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7772302627563477,
      "learning_rate": 7.011919416389199e-05,
      "loss": 2.9589,
      "step": 179228
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.076183795928955,
      "learning_rate": 7.011656591939233e-05,
      "loss": 2.9644,
      "step": 179229
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.000689744949341,
      "learning_rate": 7.011393771763223e-05,
      "loss": 2.8019,
      "step": 179230
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4577391147613525,
      "learning_rate": 7.011130955861238e-05,
      "loss": 3.0011,
      "step": 179231
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.688844919204712,
      "learning_rate": 7.010868144233312e-05,
      "loss": 2.865,
      "step": 179232
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5984528064727783,
      "learning_rate": 7.010605336879506e-05,
      "loss": 3.0013,
      "step": 179233
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8179945945739746,
      "learning_rate": 7.010342533799864e-05,
      "loss": 2.8922,
      "step": 179234
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.514232873916626,
      "learning_rate": 7.01007973499443e-05,
      "loss": 3.1742,
      "step": 179235
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4748048782348633,
      "learning_rate": 7.009816940463251e-05,
      "loss": 2.9554,
      "step": 179236
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.2859694957733154,
      "learning_rate": 7.009554150206389e-05,
      "loss": 2.9625,
      "step": 179237
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6329312324523926,
      "learning_rate": 7.009291364223875e-05,
      "loss": 3.0282,
      "step": 179238
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.495515823364258,
      "learning_rate": 7.009028582515777e-05,
      "loss": 2.9385,
      "step": 179239
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.3240928649902344,
      "learning_rate": 7.008765805082135e-05,
      "loss": 2.9663,
      "step": 179240
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.880646228790283,
      "learning_rate": 7.008503031922996e-05,
      "loss": 3.0229,
      "step": 179241
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.063815116882324,
      "learning_rate": 7.008240263038401e-05,
      "loss": 2.9545,
      "step": 179242
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.177353858947754,
      "learning_rate": 7.00797749842842e-05,
      "loss": 2.8603,
      "step": 179243
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5870909690856934,
      "learning_rate": 7.007714738093081e-05,
      "loss": 2.9739,
      "step": 179244
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.800363540649414,
      "learning_rate": 7.007451982032449e-05,
      "loss": 2.982,
      "step": 179245
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.767761707305908,
      "learning_rate": 7.007189230246565e-05,
      "loss": 2.732,
      "step": 179246
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.191711902618408,
      "learning_rate": 7.006926482735477e-05,
      "loss": 2.9386,
      "step": 179247
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.3401660919189453,
      "learning_rate": 7.006663739499233e-05,
      "loss": 3.167,
      "step": 179248
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.323263168334961,
      "learning_rate": 7.006401000537889e-05,
      "loss": 2.8606,
      "step": 179249
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8609766960144043,
      "learning_rate": 7.006138265851478e-05,
      "loss": 2.9594,
      "step": 179250
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4836440086364746,
      "learning_rate": 7.005875535440072e-05,
      "loss": 2.9176,
      "step": 179251
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0797927379608154,
      "learning_rate": 7.005612809303705e-05,
      "loss": 3.1185,
      "step": 179252
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0957391262054443,
      "learning_rate": 7.005350087442434e-05,
      "loss": 3.0033,
      "step": 179253
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.173882246017456,
      "learning_rate": 7.00508736985629e-05,
      "loss": 2.8383,
      "step": 179254
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.462949752807617,
      "learning_rate": 7.004824656545342e-05,
      "loss": 3.0423,
      "step": 179255
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.113471031188965,
      "learning_rate": 7.004561947509623e-05,
      "loss": 2.8184,
      "step": 179256
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.179107904434204,
      "learning_rate": 7.004299242749201e-05,
      "loss": 3.1377,
      "step": 179257
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4882287979125977,
      "learning_rate": 7.004036542264112e-05,
      "loss": 2.7633,
      "step": 179258
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.4278204441070557,
      "learning_rate": 7.003773846054406e-05,
      "loss": 3.0191,
      "step": 179259
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.460313320159912,
      "learning_rate": 7.003511154120125e-05,
      "loss": 2.9101,
      "step": 179260
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1818771362304688,
      "learning_rate": 7.003248466461337e-05,
      "loss": 2.9063,
      "step": 179261
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.503732919692993,
      "learning_rate": 7.002985783078064e-05,
      "loss": 3.0475,
      "step": 179262
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.684297800064087,
      "learning_rate": 7.002723103970382e-05,
      "loss": 2.9294,
      "step": 179263
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9550063610076904,
      "learning_rate": 7.00246042913833e-05,
      "loss": 2.843,
      "step": 179264
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0686166286468506,
      "learning_rate": 7.00219775858195e-05,
      "loss": 2.9452,
      "step": 179265
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.891772508621216,
      "learning_rate": 7.001935092301292e-05,
      "loss": 3.1028,
      "step": 179266
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.38460636138916,
      "learning_rate": 7.001672430296415e-05,
      "loss": 3.1558,
      "step": 179267
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.133037805557251,
      "learning_rate": 7.001409772567353e-05,
      "loss": 2.9631,
      "step": 179268
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.3546013832092285,
      "learning_rate": 7.001147119114169e-05,
      "loss": 2.8627,
      "step": 179269
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7087533473968506,
      "learning_rate": 7.000884469936908e-05,
      "loss": 2.9941,
      "step": 179270
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.060073137283325,
      "learning_rate": 7.000621825035618e-05,
      "loss": 3.0368,
      "step": 179271
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7793686389923096,
      "learning_rate": 7.000359184410336e-05,
      "loss": 2.804,
      "step": 179272
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.498666763305664,
      "learning_rate": 7.00009654806113e-05,
      "loss": 2.9316,
      "step": 179273
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.488863945007324,
      "learning_rate": 6.999833915988032e-05,
      "loss": 3.0529,
      "step": 179274
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.768937349319458,
      "learning_rate": 6.999571288191108e-05,
      "loss": 2.8185,
      "step": 179275
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.957240581512451,
      "learning_rate": 6.999308664670398e-05,
      "loss": 3.1935,
      "step": 179276
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.544208288192749,
      "learning_rate": 6.99904604542595e-05,
      "loss": 3.0776,
      "step": 179277
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.422874927520752,
      "learning_rate": 6.998783430457805e-05,
      "loss": 2.8141,
      "step": 179278
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.447693109512329,
      "learning_rate": 6.998520819766029e-05,
      "loss": 2.989,
      "step": 179279
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0498807430267334,
      "learning_rate": 6.998258213350656e-05,
      "loss": 2.966,
      "step": 179280
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6383519172668457,
      "learning_rate": 6.997995611211746e-05,
      "loss": 2.9473,
      "step": 179281
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.434602975845337,
      "learning_rate": 6.997733013349344e-05,
      "loss": 2.9772,
      "step": 179282
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4706525802612305,
      "learning_rate": 6.997470419763487e-05,
      "loss": 2.9093,
      "step": 179283
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8468966484069824,
      "learning_rate": 6.997207830454248e-05,
      "loss": 2.9304,
      "step": 179284
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8235180377960205,
      "learning_rate": 6.99694524542166e-05,
      "loss": 2.8958,
      "step": 179285
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.134493350982666,
      "learning_rate": 6.996682664665765e-05,
      "loss": 2.8709,
      "step": 179286
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.863746166229248,
      "learning_rate": 6.996420088186629e-05,
      "loss": 3.0808,
      "step": 179287
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0472049713134766,
      "learning_rate": 6.996157515984295e-05,
      "loss": 2.9418,
      "step": 179288
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.3982033729553223,
      "learning_rate": 6.995894948058795e-05,
      "loss": 3.0804,
      "step": 179289
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.4245383739471436,
      "learning_rate": 6.995632384410205e-05,
      "loss": 2.8435,
      "step": 179290
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.797830581665039,
      "learning_rate": 6.995369825038554e-05,
      "loss": 3.1527,
      "step": 179291
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.1422181129455566,
      "learning_rate": 6.995107269943906e-05,
      "loss": 3.1811,
      "step": 179292
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.883406639099121,
      "learning_rate": 6.994844719126303e-05,
      "loss": 2.7743,
      "step": 179293
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.7718558311462402,
      "learning_rate": 6.99458217258578e-05,
      "loss": 2.883,
      "step": 179294
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0492093563079834,
      "learning_rate": 6.994319630322411e-05,
      "loss": 2.9687,
      "step": 179295
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.3990092277526855,
      "learning_rate": 6.994057092336231e-05,
      "loss": 3.0045,
      "step": 179296
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6858057975769043,
      "learning_rate": 6.993794558627278e-05,
      "loss": 2.9269,
      "step": 179297
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0316121578216553,
      "learning_rate": 6.993532029195625e-05,
      "loss": 3.0091,
      "step": 179298
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0721795558929443,
      "learning_rate": 6.993269504041309e-05,
      "loss": 2.8043,
      "step": 179299
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.77793025970459,
      "learning_rate": 6.993006983164369e-05,
      "loss": 2.8575,
      "step": 179300
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.744328737258911,
      "learning_rate": 6.99274446656487e-05,
      "loss": 2.7537,
      "step": 179301
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5364325046539307,
      "learning_rate": 6.992481954242857e-05,
      "loss": 2.8131,
      "step": 179302
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.138366937637329,
      "learning_rate": 6.992219446198368e-05,
      "loss": 3.0642,
      "step": 179303
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0694453716278076,
      "learning_rate": 6.991956942431465e-05,
      "loss": 3.1732,
      "step": 179304
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.410158395767212,
      "learning_rate": 6.991694442942186e-05,
      "loss": 2.8127,
      "step": 179305
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.653834581375122,
      "learning_rate": 6.991431947730592e-05,
      "loss": 3.1341,
      "step": 179306
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0369162559509277,
      "learning_rate": 6.991169456796723e-05,
      "loss": 2.7992,
      "step": 179307
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.5581440925598145,
      "learning_rate": 6.990906970140633e-05,
      "loss": 3.1507,
      "step": 179308
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8914637565612793,
      "learning_rate": 6.99064448776236e-05,
      "loss": 3.1306,
      "step": 179309
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.861872434616089,
      "learning_rate": 6.99038200966197e-05,
      "loss": 2.9632,
      "step": 179310
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6898488998413086,
      "learning_rate": 6.990119535839489e-05,
      "loss": 2.8149,
      "step": 179311
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.965265989303589,
      "learning_rate": 6.98985706629499e-05,
      "loss": 3.0805,
      "step": 179312
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.852323532104492,
      "learning_rate": 6.98959460102851e-05,
      "loss": 2.9328,
      "step": 179313
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.2137868404388428,
      "learning_rate": 6.9893321400401e-05,
      "loss": 2.9468,
      "step": 179314
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.146888256072998,
      "learning_rate": 6.989069683329798e-05,
      "loss": 2.9192,
      "step": 179315
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.9559671878814697,
      "learning_rate": 6.988807230897671e-05,
      "loss": 3.0373,
      "step": 179316
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6165690422058105,
      "learning_rate": 6.988544782743753e-05,
      "loss": 3.0619,
      "step": 179317
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.4906089305877686,
      "learning_rate": 6.988282338868104e-05,
      "loss": 2.9727,
      "step": 179318
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.2885255813598633,
      "learning_rate": 6.98801989927077e-05,
      "loss": 2.7771,
      "step": 179319
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.950336217880249,
      "learning_rate": 6.987757463951797e-05,
      "loss": 2.9746,
      "step": 179320
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.6705191135406494,
      "learning_rate": 6.987495032911225e-05,
      "loss": 2.9203,
      "step": 179321
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.719208002090454,
      "learning_rate": 6.987232606149121e-05,
      "loss": 3.1321,
      "step": 179322
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.820769786834717,
      "learning_rate": 6.986970183665518e-05,
      "loss": 2.9582,
      "step": 179323
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.5651297569274902,
      "learning_rate": 6.986707765460479e-05,
      "loss": 3.0369,
      "step": 179324
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.8987910747528076,
      "learning_rate": 6.986445351534043e-05,
      "loss": 3.0087,
      "step": 179325
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.281949520111084,
      "learning_rate": 6.986182941886265e-05,
      "loss": 2.7527,
      "step": 179326
    },
    {
      "epoch": 2.33,
      "grad_norm": 4.310912132263184,
      "learning_rate": 6.985920536517179e-05,
      "loss": 3.0123,
      "step": 179327
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.890313148498535,
      "learning_rate": 6.985658135426856e-05,
      "loss": 2.9645,
      "step": 179328
    },
    {
      "epoch": 2.33,
      "grad_norm": 3.0187032222747803,
      "learning_rate": 6.985395738615321e-05,
      "loss": 2.975,
      "step": 179329
    },
    {
      "epoch": 2.33,
      "grad_norm": 2.757612943649292,
      "learning_rate": 6.985133346082645e-05,
      "loss": 2.7533,
      "step": 179330
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.3505985736846924,
      "learning_rate": 6.98487095782887e-05,
      "loss": 2.7828,
      "step": 179331
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5833795070648193,
      "learning_rate": 6.984608573854041e-05,
      "loss": 2.936,
      "step": 179332
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6087992191314697,
      "learning_rate": 6.984346194158197e-05,
      "loss": 2.7426,
      "step": 179333
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8518826961517334,
      "learning_rate": 6.984083818741407e-05,
      "loss": 2.8212,
      "step": 179334
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.545142650604248,
      "learning_rate": 6.983821447603702e-05,
      "loss": 2.9249,
      "step": 179335
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9286885261535645,
      "learning_rate": 6.983559080745149e-05,
      "loss": 3.0992,
      "step": 179336
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6114330291748047,
      "learning_rate": 6.983296718165783e-05,
      "loss": 2.8477,
      "step": 179337
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0202836990356445,
      "learning_rate": 6.983034359865662e-05,
      "loss": 2.984,
      "step": 179338
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.780687093734741,
      "learning_rate": 6.982772005844818e-05,
      "loss": 2.9798,
      "step": 179339
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.624394416809082,
      "learning_rate": 6.982509656103321e-05,
      "loss": 2.996,
      "step": 179340
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.763852119445801,
      "learning_rate": 6.9822473106412e-05,
      "loss": 2.845,
      "step": 179341
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.5180978775024414,
      "learning_rate": 6.981984969458519e-05,
      "loss": 3.0198,
      "step": 179342
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7806897163391113,
      "learning_rate": 6.981722632555324e-05,
      "loss": 2.8816,
      "step": 179343
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1364598274230957,
      "learning_rate": 6.981460299931664e-05,
      "loss": 3.0431,
      "step": 179344
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8432908058166504,
      "learning_rate": 6.981197971587573e-05,
      "loss": 2.9129,
      "step": 179345
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.944544553756714,
      "learning_rate": 6.980935647523123e-05,
      "loss": 3.0349,
      "step": 179346
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7251133918762207,
      "learning_rate": 6.980673327738342e-05,
      "loss": 2.8174,
      "step": 179347
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.671593189239502,
      "learning_rate": 6.980411012233298e-05,
      "loss": 2.9206,
      "step": 179348
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9266269207000732,
      "learning_rate": 6.980148701008027e-05,
      "loss": 2.8631,
      "step": 179349
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9062039852142334,
      "learning_rate": 6.979886394062582e-05,
      "loss": 3.0302,
      "step": 179350
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6346683502197266,
      "learning_rate": 6.979624091397004e-05,
      "loss": 2.9319,
      "step": 179351
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1351308822631836,
      "learning_rate": 6.979361793011356e-05,
      "loss": 2.6379,
      "step": 179352
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5518813133239746,
      "learning_rate": 6.979099498905672e-05,
      "loss": 2.8192,
      "step": 179353
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.489978790283203,
      "learning_rate": 6.978837209080016e-05,
      "loss": 2.9086,
      "step": 179354
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5190367698669434,
      "learning_rate": 6.978574923534427e-05,
      "loss": 2.9134,
      "step": 179355
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4397406578063965,
      "learning_rate": 6.978312642268957e-05,
      "loss": 2.9041,
      "step": 179356
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.3590734004974365,
      "learning_rate": 6.978050365283644e-05,
      "loss": 2.7515,
      "step": 179357
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0948233604431152,
      "learning_rate": 6.977788092578553e-05,
      "loss": 2.7893,
      "step": 179358
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.541311025619507,
      "learning_rate": 6.977525824153719e-05,
      "loss": 2.943,
      "step": 179359
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.104188919067383,
      "learning_rate": 6.977263560009207e-05,
      "loss": 3.1374,
      "step": 179360
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4452097415924072,
      "learning_rate": 6.977001300145045e-05,
      "loss": 2.9545,
      "step": 179361
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.568165302276611,
      "learning_rate": 6.976739044561312e-05,
      "loss": 3.1421,
      "step": 179362
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.4048521518707275,
      "learning_rate": 6.976476793258023e-05,
      "loss": 2.7071,
      "step": 179363
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5971505641937256,
      "learning_rate": 6.976214546235245e-05,
      "loss": 2.9719,
      "step": 179364
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.173095703125,
      "learning_rate": 6.975952303493017e-05,
      "loss": 3.1913,
      "step": 179365
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.3175225257873535,
      "learning_rate": 6.975690065031404e-05,
      "loss": 3.0331,
      "step": 179366
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.854696273803711,
      "learning_rate": 6.975427830850434e-05,
      "loss": 2.9988,
      "step": 179367
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.51749849319458,
      "learning_rate": 6.975165600950177e-05,
      "loss": 2.9895,
      "step": 179368
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5803301334381104,
      "learning_rate": 6.974903375330672e-05,
      "loss": 3.0909,
      "step": 179369
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4704856872558594,
      "learning_rate": 6.974641153991966e-05,
      "loss": 2.9583,
      "step": 179370
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.267524003982544,
      "learning_rate": 6.974378936934098e-05,
      "loss": 3.0685,
      "step": 179371
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.196424961090088,
      "learning_rate": 6.97411672415714e-05,
      "loss": 3.095,
      "step": 179372
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6440927982330322,
      "learning_rate": 6.973854515661118e-05,
      "loss": 2.6998,
      "step": 179373
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6416256427764893,
      "learning_rate": 6.9735923114461e-05,
      "loss": 3.047,
      "step": 179374
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.212604284286499,
      "learning_rate": 6.973330111512126e-05,
      "loss": 3.1381,
      "step": 179375
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0526952743530273,
      "learning_rate": 6.973067915859244e-05,
      "loss": 2.6864,
      "step": 179376
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0278141498565674,
      "learning_rate": 6.972805724487493e-05,
      "loss": 3.0387,
      "step": 179377
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4355814456939697,
      "learning_rate": 6.972543537396945e-05,
      "loss": 2.9666,
      "step": 179378
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.3940377235412598,
      "learning_rate": 6.972281354587625e-05,
      "loss": 2.7489,
      "step": 179379
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9429938793182373,
      "learning_rate": 6.972019176059601e-05,
      "loss": 2.8116,
      "step": 179380
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4568707942962646,
      "learning_rate": 6.971757001812912e-05,
      "loss": 2.9984,
      "step": 179381
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.803558826446533,
      "learning_rate": 6.971494831847601e-05,
      "loss": 3.0246,
      "step": 179382
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0359818935394287,
      "learning_rate": 6.971232666163733e-05,
      "loss": 3.1721,
      "step": 179383
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.236617088317871,
      "learning_rate": 6.970970504761346e-05,
      "loss": 3.0116,
      "step": 179384
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7310373783111572,
      "learning_rate": 6.970708347640481e-05,
      "loss": 2.8807,
      "step": 179385
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4886722564697266,
      "learning_rate": 6.970446194801208e-05,
      "loss": 2.8505,
      "step": 179386
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6311450004577637,
      "learning_rate": 6.970184046243564e-05,
      "loss": 3.0659,
      "step": 179387
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5309386253356934,
      "learning_rate": 6.969921901967583e-05,
      "loss": 3.1583,
      "step": 179388
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.231691837310791,
      "learning_rate": 6.969659761973342e-05,
      "loss": 3.0738,
      "step": 179389
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.572218894958496,
      "learning_rate": 6.969397626260865e-05,
      "loss": 3.0191,
      "step": 179390
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1818506717681885,
      "learning_rate": 6.969135494830223e-05,
      "loss": 2.8218,
      "step": 179391
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.368666887283325,
      "learning_rate": 6.96887336768145e-05,
      "loss": 2.9876,
      "step": 179392
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.187767505645752,
      "learning_rate": 6.968611244814602e-05,
      "loss": 3.1504,
      "step": 179393
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.456763744354248,
      "learning_rate": 6.968349126229711e-05,
      "loss": 2.8687,
      "step": 179394
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.495222330093384,
      "learning_rate": 6.968087011926852e-05,
      "loss": 2.7762,
      "step": 179395
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.936676025390625,
      "learning_rate": 6.967824901906048e-05,
      "loss": 3.0401,
      "step": 179396
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9856479167938232,
      "learning_rate": 6.967562796167371e-05,
      "loss": 2.7598,
      "step": 179397
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.288297414779663,
      "learning_rate": 6.967300694710859e-05,
      "loss": 2.9827,
      "step": 179398
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5669875144958496,
      "learning_rate": 6.967038597536559e-05,
      "loss": 2.8804,
      "step": 179399
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7780449390411377,
      "learning_rate": 6.966776504644512e-05,
      "loss": 2.944,
      "step": 179400
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.219161033630371,
      "learning_rate": 6.966514416034783e-05,
      "loss": 2.7024,
      "step": 179401
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.2085673809051514,
      "learning_rate": 6.966252331707411e-05,
      "loss": 2.8503,
      "step": 179402
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.937262773513794,
      "learning_rate": 6.965990251662455e-05,
      "loss": 3.1365,
      "step": 179403
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7208008766174316,
      "learning_rate": 6.965728175899953e-05,
      "loss": 3.0313,
      "step": 179404
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.592517614364624,
      "learning_rate": 6.965466104419958e-05,
      "loss": 2.9392,
      "step": 179405
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.7743539810180664,
      "learning_rate": 6.96520403722251e-05,
      "loss": 3.072,
      "step": 179406
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.856410026550293,
      "learning_rate": 6.96494197430767e-05,
      "loss": 3.0518,
      "step": 179407
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.686755418777466,
      "learning_rate": 6.96467991567548e-05,
      "loss": 2.9671,
      "step": 179408
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9219415187835693,
      "learning_rate": 6.964417861325996e-05,
      "loss": 3.0215,
      "step": 179409
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7696304321289062,
      "learning_rate": 6.96415581125926e-05,
      "loss": 3.0076,
      "step": 179410
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.72428297996521,
      "learning_rate": 6.963893765475325e-05,
      "loss": 2.9052,
      "step": 179411
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.421785354614258,
      "learning_rate": 6.963631723974227e-05,
      "loss": 3.1387,
      "step": 179412
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4683597087860107,
      "learning_rate": 6.963369686756032e-05,
      "loss": 2.9875,
      "step": 179413
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.64709734916687,
      "learning_rate": 6.963107653820776e-05,
      "loss": 2.6603,
      "step": 179414
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.107431650161743,
      "learning_rate": 6.96284562516852e-05,
      "loss": 3.0301,
      "step": 179415
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7304372787475586,
      "learning_rate": 6.962583600799306e-05,
      "loss": 2.9934,
      "step": 179416
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1956796646118164,
      "learning_rate": 6.962321580713184e-05,
      "loss": 2.842,
      "step": 179417
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.177471160888672,
      "learning_rate": 6.96205956491019e-05,
      "loss": 2.9359,
      "step": 179418
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.857168197631836,
      "learning_rate": 6.961797553390396e-05,
      "loss": 2.9291,
      "step": 179419
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.2437031269073486,
      "learning_rate": 6.961535546153826e-05,
      "loss": 2.7775,
      "step": 179420
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.675539016723633,
      "learning_rate": 6.961273543200553e-05,
      "loss": 3.0238,
      "step": 179421
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.126007556915283,
      "learning_rate": 6.961011544530614e-05,
      "loss": 2.786,
      "step": 179422
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.4718286991119385,
      "learning_rate": 6.960749550144055e-05,
      "loss": 2.8464,
      "step": 179423
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.158881902694702,
      "learning_rate": 6.960487560040923e-05,
      "loss": 2.8809,
      "step": 179424
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.447767496109009,
      "learning_rate": 6.960225574221278e-05,
      "loss": 2.5172,
      "step": 179425
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9876017570495605,
      "learning_rate": 6.959963592685152e-05,
      "loss": 2.8461,
      "step": 179426
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.2648093700408936,
      "learning_rate": 6.959701615432615e-05,
      "loss": 2.7425,
      "step": 179427
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6355514526367188,
      "learning_rate": 6.959439642463693e-05,
      "loss": 2.7727,
      "step": 179428
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.715024471282959,
      "learning_rate": 6.959177673778465e-05,
      "loss": 2.9377,
      "step": 179429
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0332067012786865,
      "learning_rate": 6.958915709376944e-05,
      "loss": 2.8385,
      "step": 179430
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8829009532928467,
      "learning_rate": 6.958653749259203e-05,
      "loss": 3.1819,
      "step": 179431
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6530892848968506,
      "learning_rate": 6.958391793425275e-05,
      "loss": 2.8569,
      "step": 179432
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5490779876708984,
      "learning_rate": 6.958129841875224e-05,
      "loss": 2.6653,
      "step": 179433
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.340973138809204,
      "learning_rate": 6.957867894609085e-05,
      "loss": 3.0108,
      "step": 179434
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5805633068084717,
      "learning_rate": 6.957605951626931e-05,
      "loss": 2.9052,
      "step": 179435
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.251069068908691,
      "learning_rate": 6.957344012928776e-05,
      "loss": 3.0927,
      "step": 179436
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.4248361587524414,
      "learning_rate": 6.957082078514691e-05,
      "loss": 2.9986,
      "step": 179437
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.446404218673706,
      "learning_rate": 6.956820148384715e-05,
      "loss": 2.6311,
      "step": 179438
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6362221240997314,
      "learning_rate": 6.956558222538906e-05,
      "loss": 2.7709,
      "step": 179439
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.9675354957580566,
      "learning_rate": 6.956296300977304e-05,
      "loss": 2.7685,
      "step": 179440
    },
    {
      "epoch": 2.34,
      "grad_norm": 5.423747539520264,
      "learning_rate": 6.956034383699973e-05,
      "loss": 2.9182,
      "step": 179441
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.763980865478516,
      "learning_rate": 6.955772470706936e-05,
      "loss": 2.8077,
      "step": 179442
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0444815158843994,
      "learning_rate": 6.955510561998266e-05,
      "loss": 3.022,
      "step": 179443
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.034095525741577,
      "learning_rate": 6.955248657573988e-05,
      "loss": 3.0693,
      "step": 179444
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.452512741088867,
      "learning_rate": 6.954986757434176e-05,
      "loss": 3.1813,
      "step": 179445
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.5645864009857178,
      "learning_rate": 6.95472486157886e-05,
      "loss": 2.9,
      "step": 179446
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.7696917057037354,
      "learning_rate": 6.95446297000811e-05,
      "loss": 2.7778,
      "step": 179447
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1307578086853027,
      "learning_rate": 6.954201082721943e-05,
      "loss": 3.1418,
      "step": 179448
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.473785161972046,
      "learning_rate": 6.953939199720435e-05,
      "loss": 2.7647,
      "step": 179449
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.780047655105591,
      "learning_rate": 6.953677321003617e-05,
      "loss": 3.0433,
      "step": 179450
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.106367826461792,
      "learning_rate": 6.953415446571554e-05,
      "loss": 2.7573,
      "step": 179451
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.369955539703369,
      "learning_rate": 6.953153576424278e-05,
      "loss": 2.7558,
      "step": 179452
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.3175809383392334,
      "learning_rate": 6.95289171056185e-05,
      "loss": 2.6435,
      "step": 179453
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.842484474182129,
      "learning_rate": 6.952629848984318e-05,
      "loss": 3.0017,
      "step": 179454
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1022565364837646,
      "learning_rate": 6.952367991691729e-05,
      "loss": 3.0335,
      "step": 179455
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5085203647613525,
      "learning_rate": 6.952106138684118e-05,
      "loss": 3.0193,
      "step": 179456
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.802874803543091,
      "learning_rate": 6.951844289961554e-05,
      "loss": 2.942,
      "step": 179457
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.214398145675659,
      "learning_rate": 6.95158244552407e-05,
      "loss": 2.9336,
      "step": 179458
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1963202953338623,
      "learning_rate": 6.951320605371731e-05,
      "loss": 3.0055,
      "step": 179459
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.2538952827453613,
      "learning_rate": 6.951058769504576e-05,
      "loss": 2.978,
      "step": 179460
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.195516586303711,
      "learning_rate": 6.950796937922653e-05,
      "loss": 3.0896,
      "step": 179461
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.663017749786377,
      "learning_rate": 6.950535110626003e-05,
      "loss": 3.0525,
      "step": 179462
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0539116859436035,
      "learning_rate": 6.950273287614695e-05,
      "loss": 2.9699,
      "step": 179463
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.3821351528167725,
      "learning_rate": 6.950011468888756e-05,
      "loss": 3.0481,
      "step": 179464
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.276531457901001,
      "learning_rate": 6.949749654448255e-05,
      "loss": 2.9269,
      "step": 179465
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.576331853866577,
      "learning_rate": 6.94948784429323e-05,
      "loss": 2.8252,
      "step": 179466
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.2464616298675537,
      "learning_rate": 6.949226038423718e-05,
      "loss": 2.7155,
      "step": 179467
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.3169713020324707,
      "learning_rate": 6.948964236839793e-05,
      "loss": 2.928,
      "step": 179468
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.786376476287842,
      "learning_rate": 6.948702439541491e-05,
      "loss": 2.9601,
      "step": 179469
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.3686363697052,
      "learning_rate": 6.948440646528848e-05,
      "loss": 2.8779,
      "step": 179470
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.2091281414031982,
      "learning_rate": 6.948178857801937e-05,
      "loss": 3.1351,
      "step": 179471
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1466941833496094,
      "learning_rate": 6.947917073360792e-05,
      "loss": 3.0241,
      "step": 179472
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0484719276428223,
      "learning_rate": 6.947655293205459e-05,
      "loss": 3.0232,
      "step": 179473
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.614347457885742,
      "learning_rate": 6.947393517335998e-05,
      "loss": 2.9461,
      "step": 179474
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.3434176445007324,
      "learning_rate": 6.947131745752451e-05,
      "loss": 2.9373,
      "step": 179475
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0519156455993652,
      "learning_rate": 6.946869978454863e-05,
      "loss": 3.0579,
      "step": 179476
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.329881191253662,
      "learning_rate": 6.946608215443293e-05,
      "loss": 3.2773,
      "step": 179477
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8296539783477783,
      "learning_rate": 6.946346456717781e-05,
      "loss": 2.9844,
      "step": 179478
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9139599800109863,
      "learning_rate": 6.946084702278376e-05,
      "loss": 2.8358,
      "step": 179479
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.348268747329712,
      "learning_rate": 6.945822952125133e-05,
      "loss": 3.3218,
      "step": 179480
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1235930919647217,
      "learning_rate": 6.94556120625809e-05,
      "loss": 2.98,
      "step": 179481
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.186392784118652,
      "learning_rate": 6.945299464677309e-05,
      "loss": 3.1313,
      "step": 179482
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.536404848098755,
      "learning_rate": 6.945037727382832e-05,
      "loss": 2.8553,
      "step": 179483
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.5577547550201416,
      "learning_rate": 6.944775994374708e-05,
      "loss": 2.7306,
      "step": 179484
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.3977694511413574,
      "learning_rate": 6.94451426565298e-05,
      "loss": 2.7427,
      "step": 179485
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.3465802669525146,
      "learning_rate": 6.944252541217705e-05,
      "loss": 3.044,
      "step": 179486
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.8571550846099854,
      "learning_rate": 6.943990821068926e-05,
      "loss": 2.9676,
      "step": 179487
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9020955562591553,
      "learning_rate": 6.9437291052067e-05,
      "loss": 2.9565,
      "step": 179488
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.433187246322632,
      "learning_rate": 6.943467393631069e-05,
      "loss": 2.9301,
      "step": 179489
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0878326892852783,
      "learning_rate": 6.943205686342083e-05,
      "loss": 2.9917,
      "step": 179490
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4595582485198975,
      "learning_rate": 6.942943983339784e-05,
      "loss": 3.2786,
      "step": 179491
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.3856048583984375,
      "learning_rate": 6.942682284624232e-05,
      "loss": 2.7271,
      "step": 179492
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0015065670013428,
      "learning_rate": 6.942420590195466e-05,
      "loss": 2.6824,
      "step": 179493
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.2884154319763184,
      "learning_rate": 6.942158900053546e-05,
      "loss": 2.8432,
      "step": 179494
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6940927505493164,
      "learning_rate": 6.941897214198504e-05,
      "loss": 2.9414,
      "step": 179495
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.204965114593506,
      "learning_rate": 6.941635532630416e-05,
      "loss": 2.9474,
      "step": 179496
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9937033653259277,
      "learning_rate": 6.941373855349299e-05,
      "loss": 3.1116,
      "step": 179497
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.559434652328491,
      "learning_rate": 6.941112182355223e-05,
      "loss": 2.9729,
      "step": 179498
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9806697368621826,
      "learning_rate": 6.94085051364822e-05,
      "loss": 2.8011,
      "step": 179499
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.799686908721924,
      "learning_rate": 6.940588849228356e-05,
      "loss": 3.0646,
      "step": 179500
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.9092016220092773,
      "learning_rate": 6.940327189095665e-05,
      "loss": 2.8541,
      "step": 179501
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.145723819732666,
      "learning_rate": 6.940065533250218e-05,
      "loss": 2.8437,
      "step": 179502
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.2497971057891846,
      "learning_rate": 6.939803881692032e-05,
      "loss": 3.1033,
      "step": 179503
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0342485904693604,
      "learning_rate": 6.939542234421182e-05,
      "loss": 3.0284,
      "step": 179504
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.9865527153015137,
      "learning_rate": 6.939280591437696e-05,
      "loss": 2.9414,
      "step": 179505
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9111506938934326,
      "learning_rate": 6.939018952741641e-05,
      "loss": 2.7767,
      "step": 179506
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.951432704925537,
      "learning_rate": 6.938757318333049e-05,
      "loss": 3.0131,
      "step": 179507
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.053539752960205,
      "learning_rate": 6.938495688211995e-05,
      "loss": 2.8862,
      "step": 179508
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.927098512649536,
      "learning_rate": 6.938234062378494e-05,
      "loss": 2.9616,
      "step": 179509
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6327035427093506,
      "learning_rate": 6.937972440832616e-05,
      "loss": 2.9201,
      "step": 179510
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.2766332626342773,
      "learning_rate": 6.9377108235744e-05,
      "loss": 2.8851,
      "step": 179511
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.030806303024292,
      "learning_rate": 6.937449210603905e-05,
      "loss": 3.0392,
      "step": 179512
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.1045145988464355,
      "learning_rate": 6.937187601921166e-05,
      "loss": 3.1228,
      "step": 179513
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.190739393234253,
      "learning_rate": 6.936925997526256e-05,
      "loss": 3.0117,
      "step": 179514
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.573359966278076,
      "learning_rate": 6.93666439741919e-05,
      "loss": 2.7696,
      "step": 179515
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.101820945739746,
      "learning_rate": 6.936402801600039e-05,
      "loss": 2.8221,
      "step": 179516
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5553438663482666,
      "learning_rate": 6.93614121006884e-05,
      "loss": 2.9539,
      "step": 179517
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.4864652156829834,
      "learning_rate": 6.935879622825657e-05,
      "loss": 3.0816,
      "step": 179518
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0224568843841553,
      "learning_rate": 6.93561803987052e-05,
      "loss": 2.893,
      "step": 179519
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.095214366912842,
      "learning_rate": 6.935356461203503e-05,
      "loss": 3.0391,
      "step": 179520
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.2298927307128906,
      "learning_rate": 6.935094886824623e-05,
      "loss": 3.2217,
      "step": 179521
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4761147499084473,
      "learning_rate": 6.934833316733952e-05,
      "loss": 2.8579,
      "step": 179522
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6751999855041504,
      "learning_rate": 6.934571750931521e-05,
      "loss": 2.8736,
      "step": 179523
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7201976776123047,
      "learning_rate": 6.9343101894174e-05,
      "loss": 3.0452,
      "step": 179524
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4690582752227783,
      "learning_rate": 6.934048632191615e-05,
      "loss": 2.8604,
      "step": 179525
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9353253841400146,
      "learning_rate": 6.933787079254247e-05,
      "loss": 3.0513,
      "step": 179526
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.429347515106201,
      "learning_rate": 6.933525530605302e-05,
      "loss": 2.7961,
      "step": 179527
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5843589305877686,
      "learning_rate": 6.93326398624486e-05,
      "loss": 2.766,
      "step": 179528
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.3117191791534424,
      "learning_rate": 6.933002446172953e-05,
      "loss": 2.8046,
      "step": 179529
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8335092067718506,
      "learning_rate": 6.932740910389643e-05,
      "loss": 3.0136,
      "step": 179530
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.502413272857666,
      "learning_rate": 6.932479378894961e-05,
      "loss": 2.8265,
      "step": 179531
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.505324363708496,
      "learning_rate": 6.932217851688987e-05,
      "loss": 2.898,
      "step": 179532
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4809858798980713,
      "learning_rate": 6.931956328771731e-05,
      "loss": 2.8768,
      "step": 179533
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.812494993209839,
      "learning_rate": 6.931694810143269e-05,
      "loss": 3.1946,
      "step": 179534
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.971251964569092,
      "learning_rate": 6.931433295803629e-05,
      "loss": 2.9895,
      "step": 179535
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.461733818054199,
      "learning_rate": 6.931171785752882e-05,
      "loss": 2.964,
      "step": 179536
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.280118227005005,
      "learning_rate": 6.930910279991056e-05,
      "loss": 2.9426,
      "step": 179537
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.993088722229004,
      "learning_rate": 6.930648778518227e-05,
      "loss": 3.0409,
      "step": 179538
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.775588274002075,
      "learning_rate": 6.930387281334411e-05,
      "loss": 2.9938,
      "step": 179539
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4720823764801025,
      "learning_rate": 6.930125788439677e-05,
      "loss": 2.9548,
      "step": 179540
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5117247104644775,
      "learning_rate": 6.929864299834062e-05,
      "loss": 2.9684,
      "step": 179541
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6339926719665527,
      "learning_rate": 6.929602815517625e-05,
      "loss": 3.0273,
      "step": 179542
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.909752607345581,
      "learning_rate": 6.929341335490405e-05,
      "loss": 3.0603,
      "step": 179543
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.7229113578796387,
      "learning_rate": 6.929079859752464e-05,
      "loss": 2.9806,
      "step": 179544
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4118263721466064,
      "learning_rate": 6.928818388303841e-05,
      "loss": 2.7681,
      "step": 179545
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.822810411453247,
      "learning_rate": 6.92855692114459e-05,
      "loss": 3.0278,
      "step": 179546
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.477013111114502,
      "learning_rate": 6.928295458274745e-05,
      "loss": 2.9788,
      "step": 179547
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9218976497650146,
      "learning_rate": 6.928033999694373e-05,
      "loss": 2.8699,
      "step": 179548
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6191916465759277,
      "learning_rate": 6.927772545403508e-05,
      "loss": 2.7996,
      "step": 179549
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.665523052215576,
      "learning_rate": 6.927511095402212e-05,
      "loss": 2.8239,
      "step": 179550
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.667393445968628,
      "learning_rate": 6.92724964969053e-05,
      "loss": 2.7732,
      "step": 179551
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.59293794631958,
      "learning_rate": 6.926988208268497e-05,
      "loss": 3.2034,
      "step": 179552
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.4960055351257324,
      "learning_rate": 6.926726771136181e-05,
      "loss": 2.8682,
      "step": 179553
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.4541175365448,
      "learning_rate": 6.926465338293624e-05,
      "loss": 3.1647,
      "step": 179554
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.704724073410034,
      "learning_rate": 6.926203909740862e-05,
      "loss": 2.783,
      "step": 179555
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.485948085784912,
      "learning_rate": 6.925942485477965e-05,
      "loss": 3.1316,
      "step": 179556
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.197392702102661,
      "learning_rate": 6.925681065504968e-05,
      "loss": 2.9921,
      "step": 179557
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.471858978271484,
      "learning_rate": 6.925419649821916e-05,
      "loss": 2.9746,
      "step": 179558
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.580932378768921,
      "learning_rate": 6.925158238428873e-05,
      "loss": 3.0792,
      "step": 179559
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.594379186630249,
      "learning_rate": 6.924896831325876e-05,
      "loss": 3.0057,
      "step": 179560
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.2994749546051025,
      "learning_rate": 6.92463542851297e-05,
      "loss": 2.9651,
      "step": 179561
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.805574893951416,
      "learning_rate": 6.924374029990219e-05,
      "loss": 3.1356,
      "step": 179562
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6560702323913574,
      "learning_rate": 6.92411263575766e-05,
      "loss": 2.9646,
      "step": 179563
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.141181707382202,
      "learning_rate": 6.923851245815336e-05,
      "loss": 2.7995,
      "step": 179564
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1986401081085205,
      "learning_rate": 6.923589860163314e-05,
      "loss": 3.2227,
      "step": 179565
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.949157476425171,
      "learning_rate": 6.923328478801621e-05,
      "loss": 3.159,
      "step": 179566
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.975404977798462,
      "learning_rate": 6.923067101730329e-05,
      "loss": 3.0497,
      "step": 179567
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4582927227020264,
      "learning_rate": 6.922805728949473e-05,
      "loss": 2.8694,
      "step": 179568
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.535522222518921,
      "learning_rate": 6.922544360459102e-05,
      "loss": 2.7021,
      "step": 179569
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6948459148406982,
      "learning_rate": 6.922282996259256e-05,
      "loss": 3.0477,
      "step": 179570
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.214569091796875,
      "learning_rate": 6.922021636350005e-05,
      "loss": 3.0155,
      "step": 179571
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.691801071166992,
      "learning_rate": 6.921760280731375e-05,
      "loss": 3.0872,
      "step": 179572
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.2942469120025635,
      "learning_rate": 6.921498929403435e-05,
      "loss": 2.9465,
      "step": 179573
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.740169048309326,
      "learning_rate": 6.921237582366223e-05,
      "loss": 3.008,
      "step": 179574
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.9738991260528564,
      "learning_rate": 6.92097623961979e-05,
      "loss": 2.8524,
      "step": 179575
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.830040454864502,
      "learning_rate": 6.920714901164173e-05,
      "loss": 3.0031,
      "step": 179576
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.218663454055786,
      "learning_rate": 6.920453566999442e-05,
      "loss": 2.884,
      "step": 179577
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.470487117767334,
      "learning_rate": 6.920192237125621e-05,
      "loss": 3.0484,
      "step": 179578
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.162938117980957,
      "learning_rate": 6.919930911542785e-05,
      "loss": 2.7971,
      "step": 179579
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7952229976654053,
      "learning_rate": 6.919669590250959e-05,
      "loss": 2.7849,
      "step": 179580
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5885355472564697,
      "learning_rate": 6.91940827325022e-05,
      "loss": 2.8316,
      "step": 179581
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.519343137741089,
      "learning_rate": 6.91914696054058e-05,
      "loss": 2.728,
      "step": 179582
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8360228538513184,
      "learning_rate": 6.918885652122117e-05,
      "loss": 2.9628,
      "step": 179583
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.659426212310791,
      "learning_rate": 6.918624347994862e-05,
      "loss": 2.7775,
      "step": 179584
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5316483974456787,
      "learning_rate": 6.918363048158872e-05,
      "loss": 3.1931,
      "step": 179585
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.448854446411133,
      "learning_rate": 6.918101752614192e-05,
      "loss": 2.8521,
      "step": 179586
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.673792600631714,
      "learning_rate": 6.917840461360887e-05,
      "loss": 2.8074,
      "step": 179587
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9038257598876953,
      "learning_rate": 6.917579174398973e-05,
      "loss": 2.8158,
      "step": 179588
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.741605281829834,
      "learning_rate": 6.917317891728528e-05,
      "loss": 2.8672,
      "step": 179589
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.454495429992676,
      "learning_rate": 6.917056613349579e-05,
      "loss": 2.7728,
      "step": 179590
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.690737724304199,
      "learning_rate": 6.916795339262193e-05,
      "loss": 2.8063,
      "step": 179591
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.607053756713867,
      "learning_rate": 6.916534069466406e-05,
      "loss": 2.8455,
      "step": 179592
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0514144897460938,
      "learning_rate": 6.916272803962284e-05,
      "loss": 2.7741,
      "step": 179593
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.712567090988159,
      "learning_rate": 6.916011542749846e-05,
      "loss": 2.8019,
      "step": 179594
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.672483205795288,
      "learning_rate": 6.915750285829166e-05,
      "loss": 2.6502,
      "step": 179595
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.802137851715088,
      "learning_rate": 6.915489033200275e-05,
      "loss": 3.091,
      "step": 179596
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.258988618850708,
      "learning_rate": 6.915227784863239e-05,
      "loss": 2.8545,
      "step": 179597
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.2646219730377197,
      "learning_rate": 6.914966540818092e-05,
      "loss": 3.0463,
      "step": 179598
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.020007610321045,
      "learning_rate": 6.914705301064902e-05,
      "loss": 2.766,
      "step": 179599
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.653273582458496,
      "learning_rate": 6.914444065603689e-05,
      "loss": 2.8887,
      "step": 179600
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.736046552658081,
      "learning_rate": 6.914182834434523e-05,
      "loss": 3.2022,
      "step": 179601
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8177151679992676,
      "learning_rate": 6.91392160755744e-05,
      "loss": 2.8649,
      "step": 179602
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.5001978874206543,
      "learning_rate": 6.913660384972503e-05,
      "loss": 2.8858,
      "step": 179603
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.666624069213867,
      "learning_rate": 6.913399166679744e-05,
      "loss": 3.0487,
      "step": 179604
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8715713024139404,
      "learning_rate": 6.913137952679234e-05,
      "loss": 3.0505,
      "step": 179605
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.554858446121216,
      "learning_rate": 6.912876742970994e-05,
      "loss": 2.8996,
      "step": 179606
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.7650341987609863,
      "learning_rate": 6.912615537555094e-05,
      "loss": 2.7617,
      "step": 179607
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9746570587158203,
      "learning_rate": 6.912354336431564e-05,
      "loss": 2.8225,
      "step": 179608
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9469971656799316,
      "learning_rate": 6.912093139600473e-05,
      "loss": 3.0687,
      "step": 179609
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7974915504455566,
      "learning_rate": 6.911831947061848e-05,
      "loss": 2.8756,
      "step": 179610
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.3800952434539795,
      "learning_rate": 6.911570758815771e-05,
      "loss": 3.2244,
      "step": 179611
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.24088191986084,
      "learning_rate": 6.911309574862249e-05,
      "loss": 3.0432,
      "step": 179612
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.6952717304229736,
      "learning_rate": 6.91104839520136e-05,
      "loss": 2.8549,
      "step": 179613
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.261352300643921,
      "learning_rate": 6.910787219833135e-05,
      "loss": 2.8443,
      "step": 179614
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.51135516166687,
      "learning_rate": 6.910526048757638e-05,
      "loss": 2.9596,
      "step": 179615
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7259974479675293,
      "learning_rate": 6.910264881974906e-05,
      "loss": 2.902,
      "step": 179616
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.732856512069702,
      "learning_rate": 6.910003719485001e-05,
      "loss": 2.8304,
      "step": 179617
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.3481078147888184,
      "learning_rate": 6.90974256128795e-05,
      "loss": 2.9546,
      "step": 179618
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0957236289978027,
      "learning_rate": 6.90948140738382e-05,
      "loss": 2.8476,
      "step": 179619
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4482595920562744,
      "learning_rate": 6.909220257772645e-05,
      "loss": 2.9682,
      "step": 179620
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9302899837493896,
      "learning_rate": 6.908959112454492e-05,
      "loss": 2.9646,
      "step": 179621
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.712620258331299,
      "learning_rate": 6.908697971429391e-05,
      "loss": 3.0015,
      "step": 179622
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.604550361633301,
      "learning_rate": 6.908436834697416e-05,
      "loss": 2.932,
      "step": 179623
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.799934148788452,
      "learning_rate": 6.908175702258581e-05,
      "loss": 2.9316,
      "step": 179624
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6324946880340576,
      "learning_rate": 6.907914574112957e-05,
      "loss": 2.8997,
      "step": 179625
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.788254737854004,
      "learning_rate": 6.907653450260583e-05,
      "loss": 2.9817,
      "step": 179626
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6344220638275146,
      "learning_rate": 6.907392330701517e-05,
      "loss": 2.774,
      "step": 179627
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7786307334899902,
      "learning_rate": 6.907131215435799e-05,
      "loss": 2.9869,
      "step": 179628
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9573349952697754,
      "learning_rate": 6.906870104463488e-05,
      "loss": 2.9036,
      "step": 179629
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7028517723083496,
      "learning_rate": 6.906608997784627e-05,
      "loss": 2.8038,
      "step": 179630
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7192583084106445,
      "learning_rate": 6.90634789539926e-05,
      "loss": 2.9855,
      "step": 179631
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.835432767868042,
      "learning_rate": 6.906086797307433e-05,
      "loss": 3.021,
      "step": 179632
    },
    {
      "epoch": 2.34,
      "grad_norm": 5.030667781829834,
      "learning_rate": 6.905825703509205e-05,
      "loss": 3.0344,
      "step": 179633
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.588951349258423,
      "learning_rate": 6.905564614004615e-05,
      "loss": 3.0204,
      "step": 179634
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.823511123657227,
      "learning_rate": 6.905303528793724e-05,
      "loss": 3.0635,
      "step": 179635
    },
    {
      "epoch": 2.34,
      "grad_norm": 5.167224884033203,
      "learning_rate": 6.905042447876575e-05,
      "loss": 3.0109,
      "step": 179636
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9709715843200684,
      "learning_rate": 6.904781371253212e-05,
      "loss": 3.1883,
      "step": 179637
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.623173713684082,
      "learning_rate": 6.904520298923678e-05,
      "loss": 2.7313,
      "step": 179638
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.377063274383545,
      "learning_rate": 6.904259230888038e-05,
      "loss": 3.0694,
      "step": 179639
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.306929588317871,
      "learning_rate": 6.903998167146326e-05,
      "loss": 3.0634,
      "step": 179640
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.210464954376221,
      "learning_rate": 6.903737107698604e-05,
      "loss": 2.7163,
      "step": 179641
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.6053640842437744,
      "learning_rate": 6.90347605254491e-05,
      "loss": 2.9638,
      "step": 179642
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8032078742980957,
      "learning_rate": 6.90321500168529e-05,
      "loss": 2.9854,
      "step": 179643
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.572641134262085,
      "learning_rate": 6.902953955119806e-05,
      "loss": 2.9215,
      "step": 179644
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.794512748718262,
      "learning_rate": 6.9026929128485e-05,
      "loss": 2.9428,
      "step": 179645
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.7082109451293945,
      "learning_rate": 6.90243187487141e-05,
      "loss": 2.9434,
      "step": 179646
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.567443609237671,
      "learning_rate": 6.902170841188604e-05,
      "loss": 3.2781,
      "step": 179647
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8767452239990234,
      "learning_rate": 6.90190981180012e-05,
      "loss": 3.091,
      "step": 179648
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.596649646759033,
      "learning_rate": 6.901648786705997e-05,
      "loss": 2.8881,
      "step": 179649
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.733408212661743,
      "learning_rate": 6.901387765906303e-05,
      "loss": 2.7026,
      "step": 179650
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6271116733551025,
      "learning_rate": 6.90112674940107e-05,
      "loss": 2.8884,
      "step": 179651
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5177996158599854,
      "learning_rate": 6.900865737190359e-05,
      "loss": 3.07,
      "step": 179652
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.290583610534668,
      "learning_rate": 6.900604729274215e-05,
      "loss": 2.8034,
      "step": 179653
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.7573442459106445,
      "learning_rate": 6.900343725652683e-05,
      "loss": 3.1492,
      "step": 179654
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1383087635040283,
      "learning_rate": 6.900082726325808e-05,
      "loss": 2.8435,
      "step": 179655
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4023165702819824,
      "learning_rate": 6.899821731293649e-05,
      "loss": 2.8847,
      "step": 179656
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.443742513656616,
      "learning_rate": 6.89956074055624e-05,
      "loss": 2.8502,
      "step": 179657
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.348324298858643,
      "learning_rate": 6.899299754113649e-05,
      "loss": 3.0338,
      "step": 179658
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.399986505508423,
      "learning_rate": 6.899038771965917e-05,
      "loss": 2.813,
      "step": 179659
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.3579633235931396,
      "learning_rate": 6.898777794113085e-05,
      "loss": 2.9608,
      "step": 179660
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7886533737182617,
      "learning_rate": 6.898516820555202e-05,
      "loss": 2.9519,
      "step": 179661
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.419987201690674,
      "learning_rate": 6.898255851292325e-05,
      "loss": 3.0219,
      "step": 179662
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.590270757675171,
      "learning_rate": 6.897994886324493e-05,
      "loss": 2.8787,
      "step": 179663
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7664127349853516,
      "learning_rate": 6.897733925651765e-05,
      "loss": 2.9531,
      "step": 179664
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.20712947845459,
      "learning_rate": 6.897472969274178e-05,
      "loss": 2.8798,
      "step": 179665
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.796976327896118,
      "learning_rate": 6.897212017191806e-05,
      "loss": 2.6888,
      "step": 179666
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5864486694335938,
      "learning_rate": 6.896951069404656e-05,
      "loss": 2.8352,
      "step": 179667
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.390188217163086,
      "learning_rate": 6.896690125912812e-05,
      "loss": 3.0872,
      "step": 179668
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.820598602294922,
      "learning_rate": 6.896429186716299e-05,
      "loss": 2.8472,
      "step": 179669
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8400187492370605,
      "learning_rate": 6.896168251815186e-05,
      "loss": 2.7147,
      "step": 179670
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5031626224517822,
      "learning_rate": 6.895907321209501e-05,
      "loss": 3.0898,
      "step": 179671
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.2413885593414307,
      "learning_rate": 6.895646394899322e-05,
      "loss": 2.7644,
      "step": 179672
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7059059143066406,
      "learning_rate": 6.895385472884658e-05,
      "loss": 2.9364,
      "step": 179673
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.653226613998413,
      "learning_rate": 6.895124555165591e-05,
      "loss": 2.9494,
      "step": 179674
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6269595623016357,
      "learning_rate": 6.894863641742144e-05,
      "loss": 2.6328,
      "step": 179675
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.876786708831787,
      "learning_rate": 6.894602732614388e-05,
      "loss": 3.0112,
      "step": 179676
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9851784706115723,
      "learning_rate": 6.894341827782356e-05,
      "loss": 2.9905,
      "step": 179677
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0685291290283203,
      "learning_rate": 6.894080927246114e-05,
      "loss": 2.904,
      "step": 179678
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.673032522201538,
      "learning_rate": 6.893820031005685e-05,
      "loss": 3.0842,
      "step": 179679
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4104514122009277,
      "learning_rate": 6.893559139061137e-05,
      "loss": 2.9419,
      "step": 179680
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.487884998321533,
      "learning_rate": 6.893298251412507e-05,
      "loss": 3.1505,
      "step": 179681
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.770442485809326,
      "learning_rate": 6.893037368059856e-05,
      "loss": 3.0469,
      "step": 179682
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.801255226135254,
      "learning_rate": 6.892776489003216e-05,
      "loss": 2.9628,
      "step": 179683
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.338845729827881,
      "learning_rate": 6.892515614242662e-05,
      "loss": 2.8,
      "step": 179684
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8900346755981445,
      "learning_rate": 6.892254743778212e-05,
      "loss": 3.1715,
      "step": 179685
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.512047529220581,
      "learning_rate": 6.891993877609932e-05,
      "loss": 2.9953,
      "step": 179686
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0494537353515625,
      "learning_rate": 6.891733015737861e-05,
      "loss": 3.022,
      "step": 179687
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.2383999824523926,
      "learning_rate": 6.891472158162058e-05,
      "loss": 3.0847,
      "step": 179688
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.973416805267334,
      "learning_rate": 6.891211304882561e-05,
      "loss": 2.9419,
      "step": 179689
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.682555913925171,
      "learning_rate": 6.890950455899441e-05,
      "loss": 2.8157,
      "step": 179690
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.442385673522949,
      "learning_rate": 6.890689611212714e-05,
      "loss": 3.203,
      "step": 179691
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7547242641448975,
      "learning_rate": 6.89042877082245e-05,
      "loss": 3.0665,
      "step": 179692
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.368710517883301,
      "learning_rate": 6.890167934728684e-05,
      "loss": 2.9486,
      "step": 179693
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.5387251377105713,
      "learning_rate": 6.889907102931479e-05,
      "loss": 2.9839,
      "step": 179694
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.9504928588867188,
      "learning_rate": 6.889646275430871e-05,
      "loss": 2.872,
      "step": 179695
    },
    {
      "epoch": 2.34,
      "grad_norm": 5.5913801193237305,
      "learning_rate": 6.88938545222692e-05,
      "loss": 2.8762,
      "step": 179696
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7256109714508057,
      "learning_rate": 6.889124633319668e-05,
      "loss": 2.9709,
      "step": 179697
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7451865673065186,
      "learning_rate": 6.888863818709166e-05,
      "loss": 3.0596,
      "step": 179698
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.08478045463562,
      "learning_rate": 6.88860300839545e-05,
      "loss": 3.1131,
      "step": 179699
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.160963773727417,
      "learning_rate": 6.888342202378591e-05,
      "loss": 3.2462,
      "step": 179700
    },
    {
      "epoch": 2.34,
      "grad_norm": 5.14639139175415,
      "learning_rate": 6.888081400658614e-05,
      "loss": 2.9502,
      "step": 179701
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.191839694976807,
      "learning_rate": 6.887820603235586e-05,
      "loss": 2.8595,
      "step": 179702
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8786537647247314,
      "learning_rate": 6.887559810109551e-05,
      "loss": 2.7806,
      "step": 179703
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.667708396911621,
      "learning_rate": 6.887299021280554e-05,
      "loss": 2.8213,
      "step": 179704
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.369385004043579,
      "learning_rate": 6.887038236748637e-05,
      "loss": 2.9837,
      "step": 179705
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6513099670410156,
      "learning_rate": 6.886777456513865e-05,
      "loss": 2.983,
      "step": 179706
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.603767156600952,
      "learning_rate": 6.886516680576267e-05,
      "loss": 3.1078,
      "step": 179707
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0814223289489746,
      "learning_rate": 6.88625590893591e-05,
      "loss": 2.8384,
      "step": 179708
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.485799551010132,
      "learning_rate": 6.885995141592834e-05,
      "loss": 2.6841,
      "step": 179709
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.608576774597168,
      "learning_rate": 6.885734378547091e-05,
      "loss": 3.2333,
      "step": 179710
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4342939853668213,
      "learning_rate": 6.885473619798714e-05,
      "loss": 2.6847,
      "step": 179711
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.633056879043579,
      "learning_rate": 6.885212865347776e-05,
      "loss": 2.8864,
      "step": 179712
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9683678150177,
      "learning_rate": 6.8849521151943e-05,
      "loss": 2.8377,
      "step": 179713
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0334863662719727,
      "learning_rate": 6.884691369338359e-05,
      "loss": 2.6967,
      "step": 179714
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.935664176940918,
      "learning_rate": 6.884430627779992e-05,
      "loss": 3.0092,
      "step": 179715
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8123788833618164,
      "learning_rate": 6.884169890519243e-05,
      "loss": 3.2279,
      "step": 179716
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6134238243103027,
      "learning_rate": 6.883909157556157e-05,
      "loss": 2.8448,
      "step": 179717
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7259485721588135,
      "learning_rate": 6.883648428890796e-05,
      "loss": 3.0123,
      "step": 179718
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.337092161178589,
      "learning_rate": 6.88338770452319e-05,
      "loss": 3.1101,
      "step": 179719
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5171992778778076,
      "learning_rate": 6.883126984453409e-05,
      "loss": 3.0373,
      "step": 179720
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.539775848388672,
      "learning_rate": 6.882866268681491e-05,
      "loss": 2.9534,
      "step": 179721
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.540750741958618,
      "learning_rate": 6.882605557207485e-05,
      "loss": 2.9046,
      "step": 179722
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.9708924293518066,
      "learning_rate": 6.882344850031432e-05,
      "loss": 2.7611,
      "step": 179723
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9483802318573,
      "learning_rate": 6.882084147153397e-05,
      "loss": 3.0385,
      "step": 179724
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0698041915893555,
      "learning_rate": 6.881823448573406e-05,
      "loss": 2.6669,
      "step": 179725
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6417953968048096,
      "learning_rate": 6.881562754291531e-05,
      "loss": 2.9966,
      "step": 179726
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0932490825653076,
      "learning_rate": 6.881302064307807e-05,
      "loss": 2.8319,
      "step": 179727
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.160205841064453,
      "learning_rate": 6.881041378622283e-05,
      "loss": 3.0343,
      "step": 179728
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.3257620334625244,
      "learning_rate": 6.880780697235013e-05,
      "loss": 3.0761,
      "step": 179729
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.056497573852539,
      "learning_rate": 6.880520020146045e-05,
      "loss": 2.7069,
      "step": 179730
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0668652057647705,
      "learning_rate": 6.880259347355415e-05,
      "loss": 2.782,
      "step": 179731
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.05530309677124,
      "learning_rate": 6.879998678863192e-05,
      "loss": 3.0889,
      "step": 179732
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.8458175659179688,
      "learning_rate": 6.879738014669411e-05,
      "loss": 3.0057,
      "step": 179733
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.813166379928589,
      "learning_rate": 6.879477354774114e-05,
      "loss": 2.9735,
      "step": 179734
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8260250091552734,
      "learning_rate": 6.87921669917737e-05,
      "loss": 3.0367,
      "step": 179735
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6624391078948975,
      "learning_rate": 6.878956047879205e-05,
      "loss": 2.8769,
      "step": 179736
    },
    {
      "epoch": 2.34,
      "grad_norm": 5.32833194732666,
      "learning_rate": 6.878695400879688e-05,
      "loss": 2.9955,
      "step": 179737
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.3566761016845703,
      "learning_rate": 6.878434758178858e-05,
      "loss": 2.7271,
      "step": 179738
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.670950412750244,
      "learning_rate": 6.878174119776765e-05,
      "loss": 3.2378,
      "step": 179739
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.715073585510254,
      "learning_rate": 6.877913485673445e-05,
      "loss": 2.8537,
      "step": 179740
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.353504180908203,
      "learning_rate": 6.877652855868968e-05,
      "loss": 3.0136,
      "step": 179741
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.735111713409424,
      "learning_rate": 6.87739223036336e-05,
      "loss": 2.7355,
      "step": 179742
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6798572540283203,
      "learning_rate": 6.877131609156691e-05,
      "loss": 2.7903,
      "step": 179743
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4322259426116943,
      "learning_rate": 6.876870992248999e-05,
      "loss": 3.1822,
      "step": 179744
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.813972234725952,
      "learning_rate": 6.876610379640336e-05,
      "loss": 2.9755,
      "step": 179745
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.381993532180786,
      "learning_rate": 6.876349771330737e-05,
      "loss": 2.8569,
      "step": 179746
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.520958662033081,
      "learning_rate": 6.876089167320272e-05,
      "loss": 2.8223,
      "step": 179747
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5796165466308594,
      "learning_rate": 6.875828567608969e-05,
      "loss": 2.8337,
      "step": 179748
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.414783477783203,
      "learning_rate": 6.875567972196892e-05,
      "loss": 2.8132,
      "step": 179749
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4115896224975586,
      "learning_rate": 6.875307381084078e-05,
      "loss": 2.7831,
      "step": 179750
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.3009088039398193,
      "learning_rate": 6.875046794270596e-05,
      "loss": 3.0732,
      "step": 179751
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.141679525375366,
      "learning_rate": 6.874786211756463e-05,
      "loss": 3.0176,
      "step": 179752
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6399753093719482,
      "learning_rate": 6.874525633541751e-05,
      "loss": 2.9459,
      "step": 179753
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.689060688018799,
      "learning_rate": 6.874265059626496e-05,
      "loss": 2.9009,
      "step": 179754
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.979835033416748,
      "learning_rate": 6.874004490010758e-05,
      "loss": 2.9263,
      "step": 179755
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.2077853679656982,
      "learning_rate": 6.873743924694572e-05,
      "loss": 2.8259,
      "step": 179756
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9116299152374268,
      "learning_rate": 6.873483363678009e-05,
      "loss": 2.8739,
      "step": 179757
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.572540521621704,
      "learning_rate": 6.873222806961082e-05,
      "loss": 2.9348,
      "step": 179758
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9354166984558105,
      "learning_rate": 6.872962254543871e-05,
      "loss": 2.9835,
      "step": 179759
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7170820236206055,
      "learning_rate": 6.872701706426407e-05,
      "loss": 2.7648,
      "step": 179760
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1377975940704346,
      "learning_rate": 6.872441162608749e-05,
      "loss": 3.0456,
      "step": 179761
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.386436939239502,
      "learning_rate": 6.872180623090933e-05,
      "loss": 3.0188,
      "step": 179762
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.992708683013916,
      "learning_rate": 6.871920087873027e-05,
      "loss": 2.9324,
      "step": 179763
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.4171621799468994,
      "learning_rate": 6.871659556955064e-05,
      "loss": 3.167,
      "step": 179764
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.372823715209961,
      "learning_rate": 6.871399030337094e-05,
      "loss": 2.6167,
      "step": 179765
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6950254440307617,
      "learning_rate": 6.871138508019162e-05,
      "loss": 3.1824,
      "step": 179766
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.4870240688323975,
      "learning_rate": 6.870877990001331e-05,
      "loss": 2.9588,
      "step": 179767
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7624833583831787,
      "learning_rate": 6.870617476283633e-05,
      "loss": 2.6863,
      "step": 179768
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.596118688583374,
      "learning_rate": 6.870356966866128e-05,
      "loss": 2.7525,
      "step": 179769
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5578300952911377,
      "learning_rate": 6.870096461748864e-05,
      "loss": 3.2463,
      "step": 179770
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6196203231811523,
      "learning_rate": 6.869835960931887e-05,
      "loss": 2.8593,
      "step": 179771
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.13303804397583,
      "learning_rate": 6.869575464415232e-05,
      "loss": 2.9702,
      "step": 179772
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9152002334594727,
      "learning_rate": 6.869314972198968e-05,
      "loss": 2.8814,
      "step": 179773
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.3875675201416016,
      "learning_rate": 6.869054484283127e-05,
      "loss": 2.9128,
      "step": 179774
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.4776546955108643,
      "learning_rate": 6.868794000667776e-05,
      "loss": 2.8476,
      "step": 179775
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.767420768737793,
      "learning_rate": 6.868533521352953e-05,
      "loss": 2.9999,
      "step": 179776
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0029754638671875,
      "learning_rate": 6.868273046338702e-05,
      "loss": 2.6722,
      "step": 179777
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8014302253723145,
      "learning_rate": 6.868012575625071e-05,
      "loss": 2.7842,
      "step": 179778
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5894415378570557,
      "learning_rate": 6.867752109212119e-05,
      "loss": 2.9568,
      "step": 179779
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1526918411254883,
      "learning_rate": 6.867491647099881e-05,
      "loss": 2.8585,
      "step": 179780
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.565369606018066,
      "learning_rate": 6.867231189288422e-05,
      "loss": 2.9434,
      "step": 179781
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.711845636367798,
      "learning_rate": 6.866970735777784e-05,
      "loss": 3.149,
      "step": 179782
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.451695680618286,
      "learning_rate": 6.866710286568008e-05,
      "loss": 2.7932,
      "step": 179783
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.667816162109375,
      "learning_rate": 6.866449841659142e-05,
      "loss": 3.0962,
      "step": 179784
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.388442277908325,
      "learning_rate": 6.866189401051246e-05,
      "loss": 3.1188,
      "step": 179785
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.806405782699585,
      "learning_rate": 6.865928964744354e-05,
      "loss": 2.8496,
      "step": 179786
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.009859323501587,
      "learning_rate": 6.865668532738534e-05,
      "loss": 2.7461,
      "step": 179787
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.3753278255462646,
      "learning_rate": 6.865408105033819e-05,
      "loss": 2.9666,
      "step": 179788
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.501063108444214,
      "learning_rate": 6.865147681630265e-05,
      "loss": 2.9764,
      "step": 179789
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.333768367767334,
      "learning_rate": 6.864887262527905e-05,
      "loss": 3.0002,
      "step": 179790
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7339284420013428,
      "learning_rate": 6.864626847726808e-05,
      "loss": 3.1697,
      "step": 179791
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.695976734161377,
      "learning_rate": 6.864366437227005e-05,
      "loss": 2.9622,
      "step": 179792
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.3250091075897217,
      "learning_rate": 6.864106031028564e-05,
      "loss": 2.8232,
      "step": 179793
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.8042359352111816,
      "learning_rate": 6.863845629131521e-05,
      "loss": 2.9346,
      "step": 179794
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.848572015762329,
      "learning_rate": 6.863585231535925e-05,
      "loss": 3.0356,
      "step": 179795
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.763721227645874,
      "learning_rate": 6.863324838241818e-05,
      "loss": 3.0713,
      "step": 179796
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.737379789352417,
      "learning_rate": 6.863064449249264e-05,
      "loss": 2.9876,
      "step": 179797
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0186543464660645,
      "learning_rate": 6.862804064558292e-05,
      "loss": 2.8458,
      "step": 179798
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.204313278198242,
      "learning_rate": 6.862543684168972e-05,
      "loss": 3.0868,
      "step": 179799
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.635702133178711,
      "learning_rate": 6.862283308081341e-05,
      "loss": 2.7878,
      "step": 179800
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.304164171218872,
      "learning_rate": 6.86202293629545e-05,
      "loss": 2.7504,
      "step": 179801
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5342376232147217,
      "learning_rate": 6.861762568811336e-05,
      "loss": 2.8123,
      "step": 179802
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.686223030090332,
      "learning_rate": 6.861502205629065e-05,
      "loss": 2.9254,
      "step": 179803
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.646498918533325,
      "learning_rate": 6.861241846748671e-05,
      "loss": 3.0852,
      "step": 179804
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8416428565979004,
      "learning_rate": 6.860981492170217e-05,
      "loss": 2.689,
      "step": 179805
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.3830039501190186,
      "learning_rate": 6.860721141893744e-05,
      "loss": 2.9605,
      "step": 179806
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.853793144226074,
      "learning_rate": 6.860460795919298e-05,
      "loss": 2.8422,
      "step": 179807
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.635632276535034,
      "learning_rate": 6.860200454246922e-05,
      "loss": 2.7673,
      "step": 179808
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.8656342029571533,
      "learning_rate": 6.859940116876677e-05,
      "loss": 2.9441,
      "step": 179809
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.881169557571411,
      "learning_rate": 6.859679783808601e-05,
      "loss": 2.8685,
      "step": 179810
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5203449726104736,
      "learning_rate": 6.859419455042753e-05,
      "loss": 2.9381,
      "step": 179811
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.087860345840454,
      "learning_rate": 6.859159130579177e-05,
      "loss": 2.7847,
      "step": 179812
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.589876413345337,
      "learning_rate": 6.858898810417913e-05,
      "loss": 2.9739,
      "step": 179813
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.339042901992798,
      "learning_rate": 6.858638494559024e-05,
      "loss": 2.7176,
      "step": 179814
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7912092208862305,
      "learning_rate": 6.85837818300255e-05,
      "loss": 3.052,
      "step": 179815
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.29839825630188,
      "learning_rate": 6.858117875748532e-05,
      "loss": 2.9241,
      "step": 179816
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1390655040740967,
      "learning_rate": 6.857857572797035e-05,
      "loss": 2.7947,
      "step": 179817
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4681508541107178,
      "learning_rate": 6.8575972741481e-05,
      "loss": 2.9208,
      "step": 179818
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.2988996505737305,
      "learning_rate": 6.857336979801767e-05,
      "loss": 2.803,
      "step": 179819
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.660860538482666,
      "learning_rate": 6.857076689758098e-05,
      "loss": 2.8481,
      "step": 179820
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.132550001144409,
      "learning_rate": 6.856816404017136e-05,
      "loss": 2.9336,
      "step": 179821
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9570577144622803,
      "learning_rate": 6.856556122578921e-05,
      "loss": 2.8994,
      "step": 179822
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.975104570388794,
      "learning_rate": 6.85629584544352e-05,
      "loss": 2.9826,
      "step": 179823
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.801057815551758,
      "learning_rate": 6.856035572610967e-05,
      "loss": 2.9068,
      "step": 179824
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9784317016601562,
      "learning_rate": 6.855775304081306e-05,
      "loss": 2.8701,
      "step": 179825
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.907050132751465,
      "learning_rate": 6.855515039854602e-05,
      "loss": 2.9337,
      "step": 179826
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1314666271209717,
      "learning_rate": 6.855254779930886e-05,
      "loss": 2.9365,
      "step": 179827
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.2037737369537354,
      "learning_rate": 6.854994524310223e-05,
      "loss": 3.0676,
      "step": 179828
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9067752361297607,
      "learning_rate": 6.854734272992656e-05,
      "loss": 2.9363,
      "step": 179829
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7968297004699707,
      "learning_rate": 6.854474025978221e-05,
      "loss": 2.795,
      "step": 179830
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.416018486022949,
      "learning_rate": 6.854213783266983e-05,
      "loss": 3.0852,
      "step": 179831
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.978375196456909,
      "learning_rate": 6.853953544858983e-05,
      "loss": 2.9694,
      "step": 179832
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.639120578765869,
      "learning_rate": 6.853693310754265e-05,
      "loss": 2.9112,
      "step": 179833
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7068567276000977,
      "learning_rate": 6.853433080952886e-05,
      "loss": 3.0874,
      "step": 179834
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1781976222991943,
      "learning_rate": 6.853172855454887e-05,
      "loss": 2.8443,
      "step": 179835
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.126582384109497,
      "learning_rate": 6.852912634260328e-05,
      "loss": 3.1134,
      "step": 179836
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7837870121002197,
      "learning_rate": 6.852652417369246e-05,
      "loss": 3.1028,
      "step": 179837
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6459531784057617,
      "learning_rate": 6.852392204781696e-05,
      "loss": 2.9125,
      "step": 179838
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.184020519256592,
      "learning_rate": 6.852131996497712e-05,
      "loss": 2.8099,
      "step": 179839
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6577186584472656,
      "learning_rate": 6.851871792517364e-05,
      "loss": 3.0844,
      "step": 179840
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4867067337036133,
      "learning_rate": 6.851611592840683e-05,
      "loss": 2.917,
      "step": 179841
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0813286304473877,
      "learning_rate": 6.851351397467731e-05,
      "loss": 3.2133,
      "step": 179842
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.089332342147827,
      "learning_rate": 6.851091206398551e-05,
      "loss": 2.8934,
      "step": 179843
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8177475929260254,
      "learning_rate": 6.850831019633188e-05,
      "loss": 3.1141,
      "step": 179844
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6112022399902344,
      "learning_rate": 6.850570837171686e-05,
      "loss": 2.8117,
      "step": 179845
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.295646905899048,
      "learning_rate": 6.850310659014108e-05,
      "loss": 2.9245,
      "step": 179846
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.098144769668579,
      "learning_rate": 6.850050485160483e-05,
      "loss": 2.9885,
      "step": 179847
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.33620285987854,
      "learning_rate": 6.849790315610882e-05,
      "loss": 2.6092,
      "step": 179848
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.855847120285034,
      "learning_rate": 6.849530150365342e-05,
      "loss": 2.8275,
      "step": 179849
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5967512130737305,
      "learning_rate": 6.849269989423909e-05,
      "loss": 2.7075,
      "step": 179850
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0641582012176514,
      "learning_rate": 6.849009832786627e-05,
      "loss": 3.1915,
      "step": 179851
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.686556339263916,
      "learning_rate": 6.84874968045356e-05,
      "loss": 2.8736,
      "step": 179852
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4308838844299316,
      "learning_rate": 6.848489532424738e-05,
      "loss": 2.9166,
      "step": 179853
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6332521438598633,
      "learning_rate": 6.84822938870023e-05,
      "loss": 2.7595,
      "step": 179854
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.375169515609741,
      "learning_rate": 6.847969249280067e-05,
      "loss": 2.8302,
      "step": 179855
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.922966718673706,
      "learning_rate": 6.847709114164309e-05,
      "loss": 2.8086,
      "step": 179856
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.759099006652832,
      "learning_rate": 6.847448983352988e-05,
      "loss": 2.9932,
      "step": 179857
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9382822513580322,
      "learning_rate": 6.847188856846173e-05,
      "loss": 2.9644,
      "step": 179858
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.381246328353882,
      "learning_rate": 6.846928734643895e-05,
      "loss": 2.8148,
      "step": 179859
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4138505458831787,
      "learning_rate": 6.846668616746217e-05,
      "loss": 2.9725,
      "step": 179860
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9628701210021973,
      "learning_rate": 6.846408503153178e-05,
      "loss": 2.8946,
      "step": 179861
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.686093807220459,
      "learning_rate": 6.846148393864834e-05,
      "loss": 2.8401,
      "step": 179862
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1228857040405273,
      "learning_rate": 6.845888288881216e-05,
      "loss": 3.1093,
      "step": 179863
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.245544910430908,
      "learning_rate": 6.845628188202394e-05,
      "loss": 2.9163,
      "step": 179864
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.881591320037842,
      "learning_rate": 6.845368091828399e-05,
      "loss": 3.0387,
      "step": 179865
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8804216384887695,
      "learning_rate": 6.845107999759294e-05,
      "loss": 2.9088,
      "step": 179866
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.494619846343994,
      "learning_rate": 6.844847911995121e-05,
      "loss": 3.1523,
      "step": 179867
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.9864110946655273,
      "learning_rate": 6.844587828535929e-05,
      "loss": 2.5595,
      "step": 179868
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5499427318573,
      "learning_rate": 6.844327749381757e-05,
      "loss": 3.0345,
      "step": 179869
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.757571220397949,
      "learning_rate": 6.844067674532666e-05,
      "loss": 3.2766,
      "step": 179870
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6215872764587402,
      "learning_rate": 6.843807603988696e-05,
      "loss": 3.0766,
      "step": 179871
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0604748725891113,
      "learning_rate": 6.843547537749905e-05,
      "loss": 3.1016,
      "step": 179872
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.474203109741211,
      "learning_rate": 6.843287475816336e-05,
      "loss": 2.8666,
      "step": 179873
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7553045749664307,
      "learning_rate": 6.843027418188039e-05,
      "loss": 2.8629,
      "step": 179874
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6837596893310547,
      "learning_rate": 6.84276736486505e-05,
      "loss": 2.8882,
      "step": 179875
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9540820121765137,
      "learning_rate": 6.842507315847436e-05,
      "loss": 2.8963,
      "step": 179876
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8245418071746826,
      "learning_rate": 6.84224727113523e-05,
      "loss": 2.8588,
      "step": 179877
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6453683376312256,
      "learning_rate": 6.841987230728495e-05,
      "loss": 2.9812,
      "step": 179878
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5977632999420166,
      "learning_rate": 6.84172719462727e-05,
      "loss": 2.8185,
      "step": 179879
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6441304683685303,
      "learning_rate": 6.841467162831607e-05,
      "loss": 2.8953,
      "step": 179880
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4613289833068848,
      "learning_rate": 6.841207135341544e-05,
      "loss": 2.9905,
      "step": 179881
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.325932741165161,
      "learning_rate": 6.840947112157147e-05,
      "loss": 2.8547,
      "step": 179882
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1401965618133545,
      "learning_rate": 6.840687093278445e-05,
      "loss": 2.9215,
      "step": 179883
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5208420753479004,
      "learning_rate": 6.840427078705506e-05,
      "loss": 2.9142,
      "step": 179884
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1540489196777344,
      "learning_rate": 6.840167068438369e-05,
      "loss": 2.8549,
      "step": 179885
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.870891809463501,
      "learning_rate": 6.839907062477081e-05,
      "loss": 3.1003,
      "step": 179886
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7890589237213135,
      "learning_rate": 6.839647060821683e-05,
      "loss": 2.825,
      "step": 179887
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.619281053543091,
      "learning_rate": 6.839387063472239e-05,
      "loss": 3.072,
      "step": 179888
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1653878688812256,
      "learning_rate": 6.839127070428785e-05,
      "loss": 2.8802,
      "step": 179889
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8256542682647705,
      "learning_rate": 6.838867081691383e-05,
      "loss": 2.7907,
      "step": 179890
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8287479877471924,
      "learning_rate": 6.838607097260069e-05,
      "loss": 2.8117,
      "step": 179891
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7969722747802734,
      "learning_rate": 6.838347117134898e-05,
      "loss": 2.9064,
      "step": 179892
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.11396861076355,
      "learning_rate": 6.838087141315906e-05,
      "loss": 2.9143,
      "step": 179893
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5985100269317627,
      "learning_rate": 6.837827169803159e-05,
      "loss": 2.7458,
      "step": 179894
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1043009757995605,
      "learning_rate": 6.837567202596689e-05,
      "loss": 2.7638,
      "step": 179895
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.424405097961426,
      "learning_rate": 6.83730723969656e-05,
      "loss": 2.9533,
      "step": 179896
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.0274200439453125,
      "learning_rate": 6.837047281102805e-05,
      "loss": 2.8161,
      "step": 179897
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.4200963973999023,
      "learning_rate": 6.83678732681549e-05,
      "loss": 3.0467,
      "step": 179898
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.857335090637207,
      "learning_rate": 6.836527376834655e-05,
      "loss": 3.0002,
      "step": 179899
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.5130844116210938,
      "learning_rate": 6.836267431160343e-05,
      "loss": 2.962,
      "step": 179900
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.959744930267334,
      "learning_rate": 6.836007489792599e-05,
      "loss": 2.9113,
      "step": 179901
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.425768852233887,
      "learning_rate": 6.835747552731487e-05,
      "loss": 2.7594,
      "step": 179902
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.5817880630493164,
      "learning_rate": 6.835487619977039e-05,
      "loss": 3.0012,
      "step": 179903
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9903993606567383,
      "learning_rate": 6.835227691529317e-05,
      "loss": 2.8426,
      "step": 179904
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.57066011428833,
      "learning_rate": 6.834967767388366e-05,
      "loss": 2.8942,
      "step": 179905
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.974465847015381,
      "learning_rate": 6.834707847554231e-05,
      "loss": 2.9836,
      "step": 179906
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.410954236984253,
      "learning_rate": 6.834447932026953e-05,
      "loss": 2.8677,
      "step": 179907
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.458904266357422,
      "learning_rate": 6.834188020806597e-05,
      "loss": 3.0004,
      "step": 179908
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.741373300552368,
      "learning_rate": 6.833928113893193e-05,
      "loss": 2.8969,
      "step": 179909
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.152052402496338,
      "learning_rate": 6.833668211286809e-05,
      "loss": 2.9001,
      "step": 179910
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.710880279541016,
      "learning_rate": 6.833408312987484e-05,
      "loss": 3.005,
      "step": 179911
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.902803659439087,
      "learning_rate": 6.833148418995253e-05,
      "loss": 2.4507,
      "step": 179912
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.2630152702331543,
      "learning_rate": 6.832888529310192e-05,
      "loss": 3.0155,
      "step": 179913
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9838345050811768,
      "learning_rate": 6.832628643932329e-05,
      "loss": 2.8435,
      "step": 179914
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.754110336303711,
      "learning_rate": 6.83236876286171e-05,
      "loss": 3.1052,
      "step": 179915
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.420422315597534,
      "learning_rate": 6.832108886098401e-05,
      "loss": 2.992,
      "step": 179916
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8549022674560547,
      "learning_rate": 6.83184901364244e-05,
      "loss": 2.8594,
      "step": 179917
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5414621829986572,
      "learning_rate": 6.831589145493866e-05,
      "loss": 2.9606,
      "step": 179918
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.562366247177124,
      "learning_rate": 6.831329281652748e-05,
      "loss": 2.949,
      "step": 179919
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4945266246795654,
      "learning_rate": 6.83106942211912e-05,
      "loss": 3.169,
      "step": 179920
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.111022472381592,
      "learning_rate": 6.830809566893027e-05,
      "loss": 2.7896,
      "step": 179921
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.6097569465637207,
      "learning_rate": 6.83054971597453e-05,
      "loss": 2.87,
      "step": 179922
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.45912504196167,
      "learning_rate": 6.830289869363676e-05,
      "loss": 2.8762,
      "step": 179923
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.001927614212036,
      "learning_rate": 6.830030027060495e-05,
      "loss": 3.0771,
      "step": 179924
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.39542555809021,
      "learning_rate": 6.829770189065061e-05,
      "loss": 3.1515,
      "step": 179925
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1509766578674316,
      "learning_rate": 6.829510355377399e-05,
      "loss": 2.9251,
      "step": 179926
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.687321662902832,
      "learning_rate": 6.829250525997577e-05,
      "loss": 2.7769,
      "step": 179927
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8443312644958496,
      "learning_rate": 6.828990700925637e-05,
      "loss": 2.6351,
      "step": 179928
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9424123764038086,
      "learning_rate": 6.828730880161623e-05,
      "loss": 2.9151,
      "step": 179929
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7546865940093994,
      "learning_rate": 6.828471063705576e-05,
      "loss": 2.8194,
      "step": 179930
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0363800525665283,
      "learning_rate": 6.828211251557564e-05,
      "loss": 3.0868,
      "step": 179931
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5476605892181396,
      "learning_rate": 6.827951443717615e-05,
      "loss": 3.0756,
      "step": 179932
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.139667272567749,
      "learning_rate": 6.827691640185797e-05,
      "loss": 2.8966,
      "step": 179933
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.778254270553589,
      "learning_rate": 6.82743184096215e-05,
      "loss": 2.9246,
      "step": 179934
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8907322883605957,
      "learning_rate": 6.827172046046719e-05,
      "loss": 2.9234,
      "step": 179935
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.11306619644165,
      "learning_rate": 6.826912255439546e-05,
      "loss": 2.8093,
      "step": 179936
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.4747140407562256,
      "learning_rate": 6.826652469140693e-05,
      "loss": 2.9269,
      "step": 179937
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.453481674194336,
      "learning_rate": 6.826392687150197e-05,
      "loss": 2.8729,
      "step": 179938
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.953552007675171,
      "learning_rate": 6.826132909468121e-05,
      "loss": 2.8115,
      "step": 179939
    },
    {
      "epoch": 2.34,
      "grad_norm": 6.61423921585083,
      "learning_rate": 6.825873136094503e-05,
      "loss": 2.7932,
      "step": 179940
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.733947277069092,
      "learning_rate": 6.825613367029394e-05,
      "loss": 3.0436,
      "step": 179941
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.3376107215881348,
      "learning_rate": 6.825353602272833e-05,
      "loss": 2.6876,
      "step": 179942
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6928579807281494,
      "learning_rate": 6.825093841824881e-05,
      "loss": 3.4096,
      "step": 179943
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.46321964263916,
      "learning_rate": 6.824834085685576e-05,
      "loss": 2.9618,
      "step": 179944
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.533935785293579,
      "learning_rate": 6.824574333854982e-05,
      "loss": 2.9451,
      "step": 179945
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.151655673980713,
      "learning_rate": 6.824314586333135e-05,
      "loss": 3.0051,
      "step": 179946
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.847869873046875,
      "learning_rate": 6.824054843120083e-05,
      "loss": 3.0375,
      "step": 179947
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.100591659545898,
      "learning_rate": 6.823795104215871e-05,
      "loss": 2.7596,
      "step": 179948
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.3509950637817383,
      "learning_rate": 6.823535369620562e-05,
      "loss": 3.011,
      "step": 179949
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5471012592315674,
      "learning_rate": 6.823275639334185e-05,
      "loss": 2.9803,
      "step": 179950
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7557268142700195,
      "learning_rate": 6.823015913356806e-05,
      "loss": 2.9883,
      "step": 179951
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.169098377227783,
      "learning_rate": 6.822756191688471e-05,
      "loss": 2.9458,
      "step": 179952
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.547240734100342,
      "learning_rate": 6.822496474329219e-05,
      "loss": 2.7997,
      "step": 179953
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8944129943847656,
      "learning_rate": 6.822236761279093e-05,
      "loss": 3.0101,
      "step": 179954
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5610904693603516,
      "learning_rate": 6.82197705253816e-05,
      "loss": 2.7385,
      "step": 179955
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6751627922058105,
      "learning_rate": 6.821717348106453e-05,
      "loss": 2.6126,
      "step": 179956
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6001453399658203,
      "learning_rate": 6.821457647984033e-05,
      "loss": 2.9882,
      "step": 179957
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8383562564849854,
      "learning_rate": 6.821197952170943e-05,
      "loss": 2.833,
      "step": 179958
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.82131028175354,
      "learning_rate": 6.820938260667228e-05,
      "loss": 2.9511,
      "step": 179959
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.8021676540374756,
      "learning_rate": 6.820678573472929e-05,
      "loss": 2.9186,
      "step": 179960
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6533117294311523,
      "learning_rate": 6.820418890588114e-05,
      "loss": 2.7664,
      "step": 179961
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.626312732696533,
      "learning_rate": 6.820159212012811e-05,
      "loss": 3.0959,
      "step": 179962
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.2339813709259033,
      "learning_rate": 6.819899537747085e-05,
      "loss": 2.9083,
      "step": 179963
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.2183403968811035,
      "learning_rate": 6.81963986779097e-05,
      "loss": 3.1284,
      "step": 179964
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.468315124511719,
      "learning_rate": 6.81938020214454e-05,
      "loss": 3.0253,
      "step": 179965
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.038119316101074,
      "learning_rate": 6.819120540807806e-05,
      "loss": 2.9216,
      "step": 179966
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.876803159713745,
      "learning_rate": 6.818860883780843e-05,
      "loss": 2.8945,
      "step": 179967
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.47512149810791,
      "learning_rate": 6.818601231063686e-05,
      "loss": 3.1194,
      "step": 179968
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.4169318675994873,
      "learning_rate": 6.818341582656394e-05,
      "loss": 2.8681,
      "step": 179969
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.273555040359497,
      "learning_rate": 6.818081938559002e-05,
      "loss": 3.1488,
      "step": 179970
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.728065252304077,
      "learning_rate": 6.817822298771585e-05,
      "loss": 2.9433,
      "step": 179971
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.057157516479492,
      "learning_rate": 6.817562663294154e-05,
      "loss": 2.8109,
      "step": 179972
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6168372631073,
      "learning_rate": 6.817303032126786e-05,
      "loss": 2.9013,
      "step": 179973
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.799999713897705,
      "learning_rate": 6.817043405269506e-05,
      "loss": 2.8636,
      "step": 179974
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.8330445289611816,
      "learning_rate": 6.81678378272239e-05,
      "loss": 2.9398,
      "step": 179975
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.5709986686706543,
      "learning_rate": 6.816524164485458e-05,
      "loss": 3.2565,
      "step": 179976
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.828716516494751,
      "learning_rate": 6.816264550558791e-05,
      "loss": 3.0525,
      "step": 179977
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.415714740753174,
      "learning_rate": 6.8160049409424e-05,
      "loss": 3.0446,
      "step": 179978
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.958622932434082,
      "learning_rate": 6.815745335636362e-05,
      "loss": 2.8975,
      "step": 179979
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.462886333465576,
      "learning_rate": 6.815485734640701e-05,
      "loss": 3.1561,
      "step": 179980
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.8806850910186768,
      "learning_rate": 6.815226137955493e-05,
      "loss": 2.9602,
      "step": 179981
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.79017448425293,
      "learning_rate": 6.81496654558076e-05,
      "loss": 2.9322,
      "step": 179982
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.89577579498291,
      "learning_rate": 6.814706957516579e-05,
      "loss": 3.1182,
      "step": 179983
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.019343852996826,
      "learning_rate": 6.814447373762967e-05,
      "loss": 3.0183,
      "step": 179984
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8302574157714844,
      "learning_rate": 6.814187794319996e-05,
      "loss": 3.0227,
      "step": 179985
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4577901363372803,
      "learning_rate": 6.813928219187694e-05,
      "loss": 2.8907,
      "step": 179986
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.975277900695801,
      "learning_rate": 6.813668648366131e-05,
      "loss": 2.793,
      "step": 179987
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9432454109191895,
      "learning_rate": 6.813409081855336e-05,
      "loss": 2.9269,
      "step": 179988
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.487790822982788,
      "learning_rate": 6.813149519655371e-05,
      "loss": 3.1856,
      "step": 179989
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.897069215774536,
      "learning_rate": 6.812889961766283e-05,
      "loss": 2.7562,
      "step": 179990
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.239766597747803,
      "learning_rate": 6.812630408188117e-05,
      "loss": 2.8016,
      "step": 179991
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.7180848121643066,
      "learning_rate": 6.812370858920912e-05,
      "loss": 2.627,
      "step": 179992
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8295538425445557,
      "learning_rate": 6.812111313964732e-05,
      "loss": 2.8607,
      "step": 179993
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.618584394454956,
      "learning_rate": 6.811851773319609e-05,
      "loss": 2.8843,
      "step": 179994
    },
    {
      "epoch": 2.34,
      "grad_norm": 5.009494304656982,
      "learning_rate": 6.811592236985608e-05,
      "loss": 2.9546,
      "step": 179995
    },
    {
      "epoch": 2.34,
      "grad_norm": 7.018723964691162,
      "learning_rate": 6.811332704962773e-05,
      "loss": 2.9127,
      "step": 179996
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.917015552520752,
      "learning_rate": 6.811073177251137e-05,
      "loss": 2.9698,
      "step": 179997
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7445271015167236,
      "learning_rate": 6.81081365385077e-05,
      "loss": 2.7695,
      "step": 179998
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.920271873474121,
      "learning_rate": 6.810554134761712e-05,
      "loss": 2.9283,
      "step": 179999
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5129127502441406,
      "learning_rate": 6.810294619984002e-05,
      "loss": 2.7951,
      "step": 180000
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.4827933311462402,
      "learning_rate": 6.810035109517702e-05,
      "loss": 2.9128,
      "step": 180001
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0304791927337646,
      "learning_rate": 6.809775603362857e-05,
      "loss": 2.9873,
      "step": 180002
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.3415169715881348,
      "learning_rate": 6.8095161015195e-05,
      "loss": 2.9718,
      "step": 180003
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.2540252208709717,
      "learning_rate": 6.809256603987704e-05,
      "loss": 2.988,
      "step": 180004
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7983405590057373,
      "learning_rate": 6.808997110767502e-05,
      "loss": 2.8157,
      "step": 180005
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.228731870651245,
      "learning_rate": 6.808737621858939e-05,
      "loss": 2.9914,
      "step": 180006
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7879767417907715,
      "learning_rate": 6.808478137262076e-05,
      "loss": 3.0715,
      "step": 180007
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7738161087036133,
      "learning_rate": 6.808218656976957e-05,
      "loss": 2.7925,
      "step": 180008
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6645424365997314,
      "learning_rate": 6.807959181003615e-05,
      "loss": 2.9613,
      "step": 180009
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.856588363647461,
      "learning_rate": 6.807699709342122e-05,
      "loss": 2.6635,
      "step": 180010
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.690495729446411,
      "learning_rate": 6.80744024199251e-05,
      "loss": 3.0912,
      "step": 180011
    },
    {
      "epoch": 2.34,
      "grad_norm": 5.021295547485352,
      "learning_rate": 6.807180778954838e-05,
      "loss": 3.0354,
      "step": 180012
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.8416285514831543,
      "learning_rate": 6.80692132022915e-05,
      "loss": 2.9342,
      "step": 180013
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.358790159225464,
      "learning_rate": 6.806661865815493e-05,
      "loss": 2.7556,
      "step": 180014
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.494816780090332,
      "learning_rate": 6.806402415713908e-05,
      "loss": 3.0078,
      "step": 180015
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5616250038146973,
      "learning_rate": 6.806142969924455e-05,
      "loss": 2.7279,
      "step": 180016
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.998255968093872,
      "learning_rate": 6.805883528447173e-05,
      "loss": 2.9782,
      "step": 180017
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.5014729499816895,
      "learning_rate": 6.805624091282123e-05,
      "loss": 3.001,
      "step": 180018
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8752241134643555,
      "learning_rate": 6.805364658429345e-05,
      "loss": 3.051,
      "step": 180019
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5913987159729004,
      "learning_rate": 6.805105229888886e-05,
      "loss": 2.9458,
      "step": 180020
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0795810222625732,
      "learning_rate": 6.804845805660789e-05,
      "loss": 2.9462,
      "step": 180021
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.759165048599243,
      "learning_rate": 6.804586385745117e-05,
      "loss": 2.9731,
      "step": 180022
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.4133083820343018,
      "learning_rate": 6.804326970141905e-05,
      "loss": 3.0401,
      "step": 180023
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.2467360496520996,
      "learning_rate": 6.80406755885121e-05,
      "loss": 3.0279,
      "step": 180024
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6727187633514404,
      "learning_rate": 6.803808151873079e-05,
      "loss": 2.8401,
      "step": 180025
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4609174728393555,
      "learning_rate": 6.803548749207556e-05,
      "loss": 2.9675,
      "step": 180026
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.993295669555664,
      "learning_rate": 6.803289350854686e-05,
      "loss": 3.0752,
      "step": 180027
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4723780155181885,
      "learning_rate": 6.803029956814528e-05,
      "loss": 2.902,
      "step": 180028
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9814374446868896,
      "learning_rate": 6.802770567087117e-05,
      "loss": 2.8181,
      "step": 180029
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7673826217651367,
      "learning_rate": 6.802511181672518e-05,
      "loss": 2.615,
      "step": 180030
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.823157548904419,
      "learning_rate": 6.80225180057076e-05,
      "loss": 2.8795,
      "step": 180031
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5674924850463867,
      "learning_rate": 6.80199242378192e-05,
      "loss": 2.9006,
      "step": 180032
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.970841407775879,
      "learning_rate": 6.80173305130601e-05,
      "loss": 3.1984,
      "step": 180033
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.543928623199463,
      "learning_rate": 6.801473683143106e-05,
      "loss": 2.9011,
      "step": 180034
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9590702056884766,
      "learning_rate": 6.801214319293235e-05,
      "loss": 2.9459,
      "step": 180035
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.746629238128662,
      "learning_rate": 6.800954959756465e-05,
      "loss": 2.7216,
      "step": 180036
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.0273311138153076,
      "learning_rate": 6.800695604532827e-05,
      "loss": 3.1684,
      "step": 180037
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.770648717880249,
      "learning_rate": 6.800436253622394e-05,
      "loss": 3.0751,
      "step": 180038
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.157244920730591,
      "learning_rate": 6.800176907025182e-05,
      "loss": 3.1115,
      "step": 180039
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.068662166595459,
      "learning_rate": 6.799917564741264e-05,
      "loss": 3.1003,
      "step": 180040
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1221563816070557,
      "learning_rate": 6.799658226770668e-05,
      "loss": 3.0252,
      "step": 180041
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.492307424545288,
      "learning_rate": 6.799398893113464e-05,
      "loss": 3.2128,
      "step": 180042
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.7623798847198486,
      "learning_rate": 6.79913956376968e-05,
      "loss": 3.1415,
      "step": 180043
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.351548671722412,
      "learning_rate": 6.798880238739395e-05,
      "loss": 2.7867,
      "step": 180044
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.3133628368377686,
      "learning_rate": 6.798620918022614e-05,
      "loss": 2.8041,
      "step": 180045
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.409191131591797,
      "learning_rate": 6.798361601619418e-05,
      "loss": 2.9643,
      "step": 180046
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.2051801681518555,
      "learning_rate": 6.798102289529837e-05,
      "loss": 2.9446,
      "step": 180047
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.728124141693115,
      "learning_rate": 6.797842981753935e-05,
      "loss": 2.7591,
      "step": 180048
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.73323130607605,
      "learning_rate": 6.797583678291742e-05,
      "loss": 3.1291,
      "step": 180049
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.421999216079712,
      "learning_rate": 6.797324379143332e-05,
      "loss": 3.0418,
      "step": 180050
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.546589374542236,
      "learning_rate": 6.797065084308723e-05,
      "loss": 2.7227,
      "step": 180051
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.023083448410034,
      "learning_rate": 6.796805793787985e-05,
      "loss": 3.1152,
      "step": 180052
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.1466708183288574,
      "learning_rate": 6.796546507581152e-05,
      "loss": 2.9021,
      "step": 180053
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.998469829559326,
      "learning_rate": 6.796287225688288e-05,
      "loss": 2.7714,
      "step": 180054
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.643282413482666,
      "learning_rate": 6.79602794810942e-05,
      "loss": 3.0674,
      "step": 180055
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.503783702850342,
      "learning_rate": 6.795768674844626e-05,
      "loss": 2.8013,
      "step": 180056
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.3985815048217773,
      "learning_rate": 6.795509405893921e-05,
      "loss": 2.877,
      "step": 180057
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.084073781967163,
      "learning_rate": 6.795250141257378e-05,
      "loss": 2.6738,
      "step": 180058
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.4982566833496094,
      "learning_rate": 6.794990880935027e-05,
      "loss": 2.9552,
      "step": 180059
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.525796413421631,
      "learning_rate": 6.794731624926932e-05,
      "loss": 2.9081,
      "step": 180060
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6398746967315674,
      "learning_rate": 6.794472373233129e-05,
      "loss": 2.9464,
      "step": 180061
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.6447806358337402,
      "learning_rate": 6.794213125853686e-05,
      "loss": 2.7682,
      "step": 180062
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.534322738647461,
      "learning_rate": 6.793953882788621e-05,
      "loss": 2.9375,
      "step": 180063
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.703791379928589,
      "learning_rate": 6.793694644038004e-05,
      "loss": 2.7864,
      "step": 180064
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.491285562515259,
      "learning_rate": 6.793435409601875e-05,
      "loss": 2.7594,
      "step": 180065
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7457566261291504,
      "learning_rate": 6.793176179480288e-05,
      "loss": 2.8646,
      "step": 180066
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.3850152492523193,
      "learning_rate": 6.792916953673279e-05,
      "loss": 3.1433,
      "step": 180067
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5717241764068604,
      "learning_rate": 6.792657732180924e-05,
      "loss": 3.2005,
      "step": 180068
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.2574095726013184,
      "learning_rate": 6.792398515003235e-05,
      "loss": 3.0488,
      "step": 180069
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.614006519317627,
      "learning_rate": 6.792139302140283e-05,
      "loss": 2.883,
      "step": 180070
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8051364421844482,
      "learning_rate": 6.791880093592102e-05,
      "loss": 2.9403,
      "step": 180071
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.6348438262939453,
      "learning_rate": 6.791620889358761e-05,
      "loss": 2.8474,
      "step": 180072
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.8312792778015137,
      "learning_rate": 6.791361689440283e-05,
      "loss": 2.896,
      "step": 180073
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.064880847930908,
      "learning_rate": 6.791102493836739e-05,
      "loss": 2.7541,
      "step": 180074
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.955568552017212,
      "learning_rate": 6.790843302548167e-05,
      "loss": 3.0252,
      "step": 180075
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7845654487609863,
      "learning_rate": 6.790584115574617e-05,
      "loss": 2.8104,
      "step": 180076
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.345984935760498,
      "learning_rate": 6.790324932916124e-05,
      "loss": 2.8927,
      "step": 180077
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.434877872467041,
      "learning_rate": 6.790065754572758e-05,
      "loss": 2.8494,
      "step": 180078
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.4834065437316895,
      "learning_rate": 6.789806580544549e-05,
      "loss": 3.059,
      "step": 180079
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.5616796016693115,
      "learning_rate": 6.789547410831562e-05,
      "loss": 2.9084,
      "step": 180080
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.2555599212646484,
      "learning_rate": 6.789288245433836e-05,
      "loss": 2.9363,
      "step": 180081
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.435532331466675,
      "learning_rate": 6.789029084351418e-05,
      "loss": 2.908,
      "step": 180082
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.433712005615234,
      "learning_rate": 6.788769927584348e-05,
      "loss": 3.0377,
      "step": 180083
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.738102674484253,
      "learning_rate": 6.788510775132694e-05,
      "loss": 2.6787,
      "step": 180084
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.584383010864258,
      "learning_rate": 6.788251626996487e-05,
      "loss": 3.2032,
      "step": 180085
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.143936634063721,
      "learning_rate": 6.787992483175788e-05,
      "loss": 2.9297,
      "step": 180086
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.337394714355469,
      "learning_rate": 6.78773334367064e-05,
      "loss": 2.9367,
      "step": 180087
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.769460678100586,
      "learning_rate": 6.787474208481084e-05,
      "loss": 3.2089,
      "step": 180088
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.933621644973755,
      "learning_rate": 6.787215077607182e-05,
      "loss": 3.0805,
      "step": 180089
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.311882734298706,
      "learning_rate": 6.786955951048975e-05,
      "loss": 2.9866,
      "step": 180090
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.036157608032227,
      "learning_rate": 6.786696828806502e-05,
      "loss": 2.834,
      "step": 180091
    },
    {
      "epoch": 2.34,
      "grad_norm": 4.524496555328369,
      "learning_rate": 6.786437710879829e-05,
      "loss": 2.7724,
      "step": 180092
    },
    {
      "epoch": 2.34,
      "grad_norm": 3.972531318664551,
      "learning_rate": 6.786178597268995e-05,
      "loss": 3.2459,
      "step": 180093
    },
    {
      "epoch": 2.34,
      "grad_norm": 5.892119407653809,
      "learning_rate": 6.785919487974043e-05,
      "loss": 2.7784,
      "step": 180094
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.633053779602051,
      "learning_rate": 6.785660382995032e-05,
      "loss": 2.8028,
      "step": 180095
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.9525814056396484,
      "learning_rate": 6.785401282331998e-05,
      "loss": 3.0053,
      "step": 180096
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.7546026706695557,
      "learning_rate": 6.785142185985004e-05,
      "loss": 2.9892,
      "step": 180097
    },
    {
      "epoch": 2.34,
      "grad_norm": 2.437849283218384,
      "learning_rate": 6.784883093954088e-05,
      "loss": 2.9106,
      "step": 180098
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.581937789916992,
      "learning_rate": 6.784624006239305e-05,
      "loss": 2.5375,
      "step": 180099
    },
    {
      "epoch": 2.35,
      "grad_norm": 5.7050676345825195,
      "learning_rate": 6.784364922840687e-05,
      "loss": 2.8496,
      "step": 180100
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.62307071685791,
      "learning_rate": 6.784105843758302e-05,
      "loss": 2.9031,
      "step": 180101
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.718770742416382,
      "learning_rate": 6.783846768992182e-05,
      "loss": 2.9278,
      "step": 180102
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.208035469055176,
      "learning_rate": 6.783587698542393e-05,
      "loss": 2.7983,
      "step": 180103
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4032561779022217,
      "learning_rate": 6.783328632408974e-05,
      "loss": 2.9024,
      "step": 180104
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.416931629180908,
      "learning_rate": 6.783069570591969e-05,
      "loss": 3.1018,
      "step": 180105
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8018951416015625,
      "learning_rate": 6.782810513091424e-05,
      "loss": 2.7049,
      "step": 180106
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.8567166328430176,
      "learning_rate": 6.7825514599074e-05,
      "loss": 3.0389,
      "step": 180107
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.603147268295288,
      "learning_rate": 6.78229241103993e-05,
      "loss": 2.8031,
      "step": 180108
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.683107614517212,
      "learning_rate": 6.782033366489077e-05,
      "loss": 2.9463,
      "step": 180109
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.4100756645202637,
      "learning_rate": 6.781774326254878e-05,
      "loss": 2.9485,
      "step": 180110
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7124361991882324,
      "learning_rate": 6.781515290337398e-05,
      "loss": 2.8952,
      "step": 180111
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6936161518096924,
      "learning_rate": 6.781256258736657e-05,
      "loss": 2.8909,
      "step": 180112
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.803126573562622,
      "learning_rate": 6.780997231452727e-05,
      "loss": 3.1297,
      "step": 180113
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.463139057159424,
      "learning_rate": 6.780738208485639e-05,
      "loss": 3.1316,
      "step": 180114
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7456061840057373,
      "learning_rate": 6.78047918983546e-05,
      "loss": 3.0212,
      "step": 180115
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7973475456237793,
      "learning_rate": 6.780220175502219e-05,
      "loss": 2.8892,
      "step": 180116
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8870396614074707,
      "learning_rate": 6.77996116548599e-05,
      "loss": 2.8985,
      "step": 180117
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.997728109359741,
      "learning_rate": 6.779702159786789e-05,
      "loss": 2.8042,
      "step": 180118
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.6863269805908203,
      "learning_rate": 6.779443158404688e-05,
      "loss": 2.8941,
      "step": 180119
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5692899227142334,
      "learning_rate": 6.779184161339717e-05,
      "loss": 2.9542,
      "step": 180120
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.160745620727539,
      "learning_rate": 6.778925168591942e-05,
      "loss": 2.9439,
      "step": 180121
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.518951654434204,
      "learning_rate": 6.778666180161398e-05,
      "loss": 3.1504,
      "step": 180122
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6492831707000732,
      "learning_rate": 6.778407196048152e-05,
      "loss": 2.8032,
      "step": 180123
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4642746448516846,
      "learning_rate": 6.778148216252221e-05,
      "loss": 3.0318,
      "step": 180124
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.480335235595703,
      "learning_rate": 6.777889240773682e-05,
      "loss": 2.8975,
      "step": 180125
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7396035194396973,
      "learning_rate": 6.777630269612563e-05,
      "loss": 3.2619,
      "step": 180126
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7518293857574463,
      "learning_rate": 6.777371302768926e-05,
      "loss": 2.9278,
      "step": 180127
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.156003713607788,
      "learning_rate": 6.777112340242807e-05,
      "loss": 2.9525,
      "step": 180128
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4884395599365234,
      "learning_rate": 6.776853382034281e-05,
      "loss": 3.0957,
      "step": 180129
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.907869815826416,
      "learning_rate": 6.776594428143358e-05,
      "loss": 2.8821,
      "step": 180130
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.362603187561035,
      "learning_rate": 6.77633547857011e-05,
      "loss": 2.9975,
      "step": 180131
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.758021116256714,
      "learning_rate": 6.776076533314575e-05,
      "loss": 2.9715,
      "step": 180132
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.366619348526001,
      "learning_rate": 6.77581759237681e-05,
      "loss": 2.7899,
      "step": 180133
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.030059814453125,
      "learning_rate": 6.775558655756854e-05,
      "loss": 2.8031,
      "step": 180134
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9968321323394775,
      "learning_rate": 6.775299723454774e-05,
      "loss": 2.9146,
      "step": 180135
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.434720993041992,
      "learning_rate": 6.775040795470591e-05,
      "loss": 2.9789,
      "step": 180136
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0573649406433105,
      "learning_rate": 6.774781871804373e-05,
      "loss": 2.7958,
      "step": 180137
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.625267267227173,
      "learning_rate": 6.774522952456152e-05,
      "loss": 2.9162,
      "step": 180138
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.955209255218506,
      "learning_rate": 6.774264037425996e-05,
      "loss": 2.716,
      "step": 180139
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.475433588027954,
      "learning_rate": 6.774005126713932e-05,
      "loss": 2.8965,
      "step": 180140
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.2755327224731445,
      "learning_rate": 6.773746220320039e-05,
      "loss": 3.0395,
      "step": 180141
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4219248294830322,
      "learning_rate": 6.773487318244323e-05,
      "loss": 3.0845,
      "step": 180142
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7208094596862793,
      "learning_rate": 6.773228420486869e-05,
      "loss": 2.9251,
      "step": 180143
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.856931447982788,
      "learning_rate": 6.772969527047698e-05,
      "loss": 2.9778,
      "step": 180144
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8812026977539062,
      "learning_rate": 6.772710637926881e-05,
      "loss": 2.8776,
      "step": 180145
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.4227187633514404,
      "learning_rate": 6.772451753124445e-05,
      "loss": 2.8081,
      "step": 180146
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.3106279373168945,
      "learning_rate": 6.772192872640466e-05,
      "loss": 2.8773,
      "step": 180147
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5638694763183594,
      "learning_rate": 6.771933996474957e-05,
      "loss": 2.8844,
      "step": 180148
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.743483543395996,
      "learning_rate": 6.771675124627992e-05,
      "loss": 2.9455,
      "step": 180149
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9095218181610107,
      "learning_rate": 6.771416257099605e-05,
      "loss": 2.6167,
      "step": 180150
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5746288299560547,
      "learning_rate": 6.771157393889858e-05,
      "loss": 3.0777,
      "step": 180151
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2555506229400635,
      "learning_rate": 6.770898534998782e-05,
      "loss": 3.172,
      "step": 180152
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9897148609161377,
      "learning_rate": 6.770639680426451e-05,
      "loss": 3.0201,
      "step": 180153
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.905447483062744,
      "learning_rate": 6.77038083017288e-05,
      "loss": 3.2078,
      "step": 180154
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.3814213275909424,
      "learning_rate": 6.770121984238144e-05,
      "loss": 2.9611,
      "step": 180155
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.833094358444214,
      "learning_rate": 6.769863142622272e-05,
      "loss": 2.7536,
      "step": 180156
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7090485095977783,
      "learning_rate": 6.769604305325327e-05,
      "loss": 2.8558,
      "step": 180157
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4807870388031006,
      "learning_rate": 6.769345472347347e-05,
      "loss": 2.7696,
      "step": 180158
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.652458429336548,
      "learning_rate": 6.769086643688387e-05,
      "loss": 2.9476,
      "step": 180159
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.043640375137329,
      "learning_rate": 6.768827819348496e-05,
      "loss": 2.8483,
      "step": 180160
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9497292041778564,
      "learning_rate": 6.768568999327718e-05,
      "loss": 2.9113,
      "step": 180161
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.956735849380493,
      "learning_rate": 6.768310183626091e-05,
      "loss": 2.948,
      "step": 180162
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.29030704498291,
      "learning_rate": 6.768051372243685e-05,
      "loss": 2.9395,
      "step": 180163
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9643945693969727,
      "learning_rate": 6.767792565180526e-05,
      "loss": 2.8367,
      "step": 180164
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6872856616973877,
      "learning_rate": 6.767533762436684e-05,
      "loss": 2.7889,
      "step": 180165
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.116715908050537,
      "learning_rate": 6.767274964012192e-05,
      "loss": 3.0837,
      "step": 180166
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.569115161895752,
      "learning_rate": 6.767016169907106e-05,
      "loss": 2.7516,
      "step": 180167
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.999256134033203,
      "learning_rate": 6.766757380121459e-05,
      "loss": 2.762,
      "step": 180168
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5035016536712646,
      "learning_rate": 6.76649859465532e-05,
      "loss": 2.7857,
      "step": 180169
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.954150199890137,
      "learning_rate": 6.766239813508721e-05,
      "loss": 2.9932,
      "step": 180170
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6339054107666016,
      "learning_rate": 6.765981036681721e-05,
      "loss": 3.0189,
      "step": 180171
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0584709644317627,
      "learning_rate": 6.765722264174366e-05,
      "loss": 3.065,
      "step": 180172
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.3467299938201904,
      "learning_rate": 6.765463495986695e-05,
      "loss": 2.8539,
      "step": 180173
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.67056941986084,
      "learning_rate": 6.765204732118768e-05,
      "loss": 2.8067,
      "step": 180174
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.1188275814056396,
      "learning_rate": 6.76494597257063e-05,
      "loss": 2.9727,
      "step": 180175
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8147435188293457,
      "learning_rate": 6.76468721734232e-05,
      "loss": 2.8388,
      "step": 180176
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.2967588901519775,
      "learning_rate": 6.7644284664339e-05,
      "loss": 3.0673,
      "step": 180177
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.747281789779663,
      "learning_rate": 6.764169719845412e-05,
      "loss": 2.7974,
      "step": 180178
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8800134658813477,
      "learning_rate": 6.763910977576894e-05,
      "loss": 2.6995,
      "step": 180179
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2236056327819824,
      "learning_rate": 6.763652239628411e-05,
      "loss": 3.0055,
      "step": 180180
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.4792327880859375,
      "learning_rate": 6.763393505999995e-05,
      "loss": 2.8218,
      "step": 180181
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.009458303451538,
      "learning_rate": 6.763134776691714e-05,
      "loss": 2.6925,
      "step": 180182
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.1364307403564453,
      "learning_rate": 6.762876051703604e-05,
      "loss": 2.921,
      "step": 180183
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6214921474456787,
      "learning_rate": 6.762617331035715e-05,
      "loss": 3.04,
      "step": 180184
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.143684387207031,
      "learning_rate": 6.762358614688083e-05,
      "loss": 3.0568,
      "step": 180185
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.176788091659546,
      "learning_rate": 6.762099902660778e-05,
      "loss": 3.1608,
      "step": 180186
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8614070415496826,
      "learning_rate": 6.761841194953827e-05,
      "loss": 2.988,
      "step": 180187
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.226809501647949,
      "learning_rate": 6.761582491567298e-05,
      "loss": 2.8337,
      "step": 180188
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0507559776306152,
      "learning_rate": 6.761323792501228e-05,
      "loss": 3.089,
      "step": 180189
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.826629877090454,
      "learning_rate": 6.76106509775567e-05,
      "loss": 3.0591,
      "step": 180190
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.502685070037842,
      "learning_rate": 6.760806407330655e-05,
      "loss": 2.891,
      "step": 180191
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.4037182331085205,
      "learning_rate": 6.760547721226259e-05,
      "loss": 3.0806,
      "step": 180192
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.3902173042297363,
      "learning_rate": 6.760289039442504e-05,
      "loss": 2.8761,
      "step": 180193
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.582557201385498,
      "learning_rate": 6.760030361979458e-05,
      "loss": 3.11,
      "step": 180194
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.1625375747680664,
      "learning_rate": 6.759771688837152e-05,
      "loss": 3.0564,
      "step": 180195
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.261223077774048,
      "learning_rate": 6.759513020015664e-05,
      "loss": 2.7719,
      "step": 180196
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5956547260284424,
      "learning_rate": 6.759254355515001e-05,
      "loss": 3.2148,
      "step": 180197
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.408522605895996,
      "learning_rate": 6.75899569533524e-05,
      "loss": 3.035,
      "step": 180198
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.836639165878296,
      "learning_rate": 6.758737039476412e-05,
      "loss": 2.8733,
      "step": 180199
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.070754051208496,
      "learning_rate": 6.758478387938584e-05,
      "loss": 3.1705,
      "step": 180200
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6244828701019287,
      "learning_rate": 6.758219740721783e-05,
      "loss": 2.6696,
      "step": 180201
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.734321117401123,
      "learning_rate": 6.757961097826086e-05,
      "loss": 2.8859,
      "step": 180202
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5461413860321045,
      "learning_rate": 6.757702459251506e-05,
      "loss": 2.8353,
      "step": 180203
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6985158920288086,
      "learning_rate": 6.757443824998115e-05,
      "loss": 2.8874,
      "step": 180204
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8789327144622803,
      "learning_rate": 6.757185195065944e-05,
      "loss": 2.9211,
      "step": 180205
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.9466769695281982,
      "learning_rate": 6.756926569455063e-05,
      "loss": 2.817,
      "step": 180206
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7847704887390137,
      "learning_rate": 6.756667948165497e-05,
      "loss": 2.8586,
      "step": 180207
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5948221683502197,
      "learning_rate": 6.756409331197323e-05,
      "loss": 2.8935,
      "step": 180208
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.7213194370269775,
      "learning_rate": 6.756150718550555e-05,
      "loss": 3.006,
      "step": 180209
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.30300235748291,
      "learning_rate": 6.755892110225264e-05,
      "loss": 2.9267,
      "step": 180210
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.9978859424591064,
      "learning_rate": 6.755633506221486e-05,
      "loss": 2.795,
      "step": 180211
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.023505687713623,
      "learning_rate": 6.755374906539281e-05,
      "loss": 2.8862,
      "step": 180212
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.1930553913116455,
      "learning_rate": 6.75511631117868e-05,
      "loss": 2.9933,
      "step": 180213
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5271451473236084,
      "learning_rate": 6.754857720139761e-05,
      "loss": 2.9382,
      "step": 180214
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.632164716720581,
      "learning_rate": 6.754599133422535e-05,
      "loss": 2.9019,
      "step": 180215
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.6678900718688965,
      "learning_rate": 6.75434055102708e-05,
      "loss": 2.8777,
      "step": 180216
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.267611980438232,
      "learning_rate": 6.75408197295342e-05,
      "loss": 2.871,
      "step": 180217
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.046088218688965,
      "learning_rate": 6.753823399201624e-05,
      "loss": 2.8169,
      "step": 180218
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.1017203330993652,
      "learning_rate": 6.753564829771722e-05,
      "loss": 2.7514,
      "step": 180219
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.429975986480713,
      "learning_rate": 6.753306264663788e-05,
      "loss": 2.8872,
      "step": 180220
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.538785219192505,
      "learning_rate": 6.753047703877836e-05,
      "loss": 3.0172,
      "step": 180221
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.131760597229004,
      "learning_rate": 6.75278914741394e-05,
      "loss": 3.1033,
      "step": 180222
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.99208402633667,
      "learning_rate": 6.75253059527213e-05,
      "loss": 2.8779,
      "step": 180223
    },
    {
      "epoch": 2.35,
      "grad_norm": 5.132080554962158,
      "learning_rate": 6.752272047452473e-05,
      "loss": 2.7081,
      "step": 180224
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.618022918701172,
      "learning_rate": 6.752013503955002e-05,
      "loss": 2.8206,
      "step": 180225
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2508749961853027,
      "learning_rate": 6.751754964779775e-05,
      "loss": 3.0724,
      "step": 180226
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.348062753677368,
      "learning_rate": 6.751496429926838e-05,
      "loss": 2.9331,
      "step": 180227
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.455422401428223,
      "learning_rate": 6.751237899396235e-05,
      "loss": 2.9734,
      "step": 180228
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9574737548828125,
      "learning_rate": 6.750979373188007e-05,
      "loss": 2.9832,
      "step": 180229
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2199296951293945,
      "learning_rate": 6.750720851302219e-05,
      "loss": 2.8359,
      "step": 180230
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.567042350769043,
      "learning_rate": 6.750462333738905e-05,
      "loss": 3.219,
      "step": 180231
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6809396743774414,
      "learning_rate": 6.750203820498124e-05,
      "loss": 2.9118,
      "step": 180232
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.069429874420166,
      "learning_rate": 6.749945311579923e-05,
      "loss": 2.849,
      "step": 180233
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7546982765197754,
      "learning_rate": 6.749686806984343e-05,
      "loss": 3.0686,
      "step": 180234
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.063804864883423,
      "learning_rate": 6.749428306711426e-05,
      "loss": 2.7108,
      "step": 180235
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.22878098487854,
      "learning_rate": 6.749169810761238e-05,
      "loss": 2.9603,
      "step": 180236
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.27957820892334,
      "learning_rate": 6.748911319133812e-05,
      "loss": 2.9945,
      "step": 180237
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.074540376663208,
      "learning_rate": 6.748652831829208e-05,
      "loss": 3.1334,
      "step": 180238
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.052143096923828,
      "learning_rate": 6.748394348847473e-05,
      "loss": 2.8459,
      "step": 180239
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.134269714355469,
      "learning_rate": 6.748135870188646e-05,
      "loss": 2.8606,
      "step": 180240
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0379509925842285,
      "learning_rate": 6.747877395852771e-05,
      "loss": 2.78,
      "step": 180241
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.659280300140381,
      "learning_rate": 6.747618925839914e-05,
      "loss": 3.2461,
      "step": 180242
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.471668243408203,
      "learning_rate": 6.747360460150108e-05,
      "loss": 2.9561,
      "step": 180243
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.73130202293396,
      "learning_rate": 6.747101998783412e-05,
      "loss": 3.1109,
      "step": 180244
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.6220903396606445,
      "learning_rate": 6.746843541739868e-05,
      "loss": 2.9457,
      "step": 180245
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7561891078948975,
      "learning_rate": 6.746585089019526e-05,
      "loss": 3.0875,
      "step": 180246
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.2668864727020264,
      "learning_rate": 6.746326640622423e-05,
      "loss": 2.8261,
      "step": 180247
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.4670441150665283,
      "learning_rate": 6.746068196548627e-05,
      "loss": 2.9573,
      "step": 180248
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.49688458442688,
      "learning_rate": 6.745809756798168e-05,
      "loss": 2.9979,
      "step": 180249
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0556914806365967,
      "learning_rate": 6.745551321371108e-05,
      "loss": 3.0412,
      "step": 180250
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.113198757171631,
      "learning_rate": 6.745292890267489e-05,
      "loss": 3.0405,
      "step": 180251
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4909799098968506,
      "learning_rate": 6.74503446348736e-05,
      "loss": 3.0024,
      "step": 180252
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7060728073120117,
      "learning_rate": 6.744776041030759e-05,
      "loss": 2.8445,
      "step": 180253
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9070565700531006,
      "learning_rate": 6.744517622897754e-05,
      "loss": 2.9462,
      "step": 180254
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4189648628234863,
      "learning_rate": 6.744259209088372e-05,
      "loss": 2.9954,
      "step": 180255
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.717123985290527,
      "learning_rate": 6.74400079960268e-05,
      "loss": 2.9427,
      "step": 180256
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6023013591766357,
      "learning_rate": 6.743742394440715e-05,
      "loss": 2.9161,
      "step": 180257
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6337223052978516,
      "learning_rate": 6.743483993602522e-05,
      "loss": 2.7383,
      "step": 180258
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.2754852771759033,
      "learning_rate": 6.74322559708816e-05,
      "loss": 3.0914,
      "step": 180259
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.495656967163086,
      "learning_rate": 6.742967204897672e-05,
      "loss": 3.079,
      "step": 180260
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7268214225769043,
      "learning_rate": 6.7427088170311e-05,
      "loss": 2.9075,
      "step": 180261
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.963003158569336,
      "learning_rate": 6.742450433488505e-05,
      "loss": 2.8694,
      "step": 180262
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.206824541091919,
      "learning_rate": 6.742192054269924e-05,
      "loss": 3.0327,
      "step": 180263
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.929392099380493,
      "learning_rate": 6.741933679375404e-05,
      "loss": 2.9262,
      "step": 180264
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.8319475650787354,
      "learning_rate": 6.741675308805003e-05,
      "loss": 2.7939,
      "step": 180265
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8957886695861816,
      "learning_rate": 6.741416942558764e-05,
      "loss": 2.9089,
      "step": 180266
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.605797052383423,
      "learning_rate": 6.741158580636729e-05,
      "loss": 2.8184,
      "step": 180267
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.390852451324463,
      "learning_rate": 6.740900223038957e-05,
      "loss": 3.0493,
      "step": 180268
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6787970066070557,
      "learning_rate": 6.740641869765491e-05,
      "loss": 3.0006,
      "step": 180269
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.9503493309020996,
      "learning_rate": 6.740383520816372e-05,
      "loss": 2.8773,
      "step": 180270
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.942410945892334,
      "learning_rate": 6.740125176191661e-05,
      "loss": 3.0253,
      "step": 180271
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9560420513153076,
      "learning_rate": 6.739866835891393e-05,
      "loss": 3.0643,
      "step": 180272
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.748093605041504,
      "learning_rate": 6.739608499915632e-05,
      "loss": 3.141,
      "step": 180273
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4319908618927,
      "learning_rate": 6.739350168264414e-05,
      "loss": 3.1746,
      "step": 180274
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7115373611450195,
      "learning_rate": 6.739091840937794e-05,
      "loss": 2.8441,
      "step": 180275
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.219270706176758,
      "learning_rate": 6.738833517935805e-05,
      "loss": 2.9705,
      "step": 180276
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9111697673797607,
      "learning_rate": 6.738575199258514e-05,
      "loss": 2.9398,
      "step": 180277
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9968302249908447,
      "learning_rate": 6.738316884905952e-05,
      "loss": 3.2303,
      "step": 180278
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7819793224334717,
      "learning_rate": 6.738058574878188e-05,
      "loss": 2.7685,
      "step": 180279
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8853049278259277,
      "learning_rate": 6.737800269175248e-05,
      "loss": 3.048,
      "step": 180280
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.689331531524658,
      "learning_rate": 6.737541967797205e-05,
      "loss": 2.696,
      "step": 180281
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.255358695983887,
      "learning_rate": 6.737283670744075e-05,
      "loss": 2.7354,
      "step": 180282
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.5947983264923096,
      "learning_rate": 6.737025378015932e-05,
      "loss": 2.8699,
      "step": 180283
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.784698724746704,
      "learning_rate": 6.736767089612806e-05,
      "loss": 2.814,
      "step": 180284
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.800858497619629,
      "learning_rate": 6.736508805534764e-05,
      "loss": 2.8506,
      "step": 180285
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7347044944763184,
      "learning_rate": 6.736250525781837e-05,
      "loss": 3.1496,
      "step": 180286
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.920433759689331,
      "learning_rate": 6.735992250354095e-05,
      "loss": 2.825,
      "step": 180287
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.621519088745117,
      "learning_rate": 6.735733979251558e-05,
      "loss": 2.9533,
      "step": 180288
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.790452718734741,
      "learning_rate": 6.73547571247429e-05,
      "loss": 2.9259,
      "step": 180289
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.052171468734741,
      "learning_rate": 6.735217450022333e-05,
      "loss": 2.77,
      "step": 180290
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.4551572799682617,
      "learning_rate": 6.73495919189574e-05,
      "loss": 3.2355,
      "step": 180291
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.617535352706909,
      "learning_rate": 6.734700938094557e-05,
      "loss": 3.0333,
      "step": 180292
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.116767168045044,
      "learning_rate": 6.734442688618837e-05,
      "loss": 3.1514,
      "step": 180293
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.098559856414795,
      "learning_rate": 6.734184443468622e-05,
      "loss": 2.8442,
      "step": 180294
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.56533670425415,
      "learning_rate": 6.733926202643961e-05,
      "loss": 2.9969,
      "step": 180295
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7642364501953125,
      "learning_rate": 6.733667966144897e-05,
      "loss": 2.9984,
      "step": 180296
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.061326026916504,
      "learning_rate": 6.733409733971489e-05,
      "loss": 2.7824,
      "step": 180297
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.783642530441284,
      "learning_rate": 6.73315150612377e-05,
      "loss": 3.0034,
      "step": 180298
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.3512353897094727,
      "learning_rate": 6.732893282601809e-05,
      "loss": 3.0022,
      "step": 180299
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.5426342487335205,
      "learning_rate": 6.73263506340564e-05,
      "loss": 2.9747,
      "step": 180300
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.951563596725464,
      "learning_rate": 6.732376848535314e-05,
      "loss": 2.9577,
      "step": 180301
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2268664836883545,
      "learning_rate": 6.732118637990869e-05,
      "loss": 2.9458,
      "step": 180302
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.032584190368652,
      "learning_rate": 6.731860431772372e-05,
      "loss": 3.0379,
      "step": 180303
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.446503162384033,
      "learning_rate": 6.731602229879853e-05,
      "loss": 3.0171,
      "step": 180304
    },
    {
      "epoch": 2.35,
      "grad_norm": 5.319511413574219,
      "learning_rate": 6.731344032313379e-05,
      "loss": 2.745,
      "step": 180305
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.227131366729736,
      "learning_rate": 6.731085839072984e-05,
      "loss": 2.9899,
      "step": 180306
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.3856382369995117,
      "learning_rate": 6.730827650158718e-05,
      "loss": 3.0712,
      "step": 180307
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.797522783279419,
      "learning_rate": 6.730569465570627e-05,
      "loss": 2.8474,
      "step": 180308
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.7105939388275146,
      "learning_rate": 6.730311285308768e-05,
      "loss": 3.195,
      "step": 180309
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.278639316558838,
      "learning_rate": 6.730053109373173e-05,
      "loss": 2.757,
      "step": 180310
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.431447744369507,
      "learning_rate": 6.729794937763913e-05,
      "loss": 2.9744,
      "step": 180311
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.680614471435547,
      "learning_rate": 6.729536770481022e-05,
      "loss": 3.0141,
      "step": 180312
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9947946071624756,
      "learning_rate": 6.729278607524546e-05,
      "loss": 3.0501,
      "step": 180313
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6896042823791504,
      "learning_rate": 6.729020448894532e-05,
      "loss": 2.8427,
      "step": 180314
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.019810199737549,
      "learning_rate": 6.728762294591039e-05,
      "loss": 2.8893,
      "step": 180315
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.9657058715820312,
      "learning_rate": 6.7285041446141e-05,
      "loss": 2.9562,
      "step": 180316
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.465726375579834,
      "learning_rate": 6.728245998963781e-05,
      "loss": 2.7142,
      "step": 180317
    },
    {
      "epoch": 2.35,
      "grad_norm": 5.806966304779053,
      "learning_rate": 6.727987857640117e-05,
      "loss": 2.8752,
      "step": 180318
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.216568946838379,
      "learning_rate": 6.727729720643162e-05,
      "loss": 2.9658,
      "step": 180319
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.4234209060668945,
      "learning_rate": 6.727471587972954e-05,
      "loss": 2.9383,
      "step": 180320
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.1845436096191406,
      "learning_rate": 6.727213459629554e-05,
      "loss": 2.7095,
      "step": 180321
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.5196781158447266,
      "learning_rate": 6.726955335613e-05,
      "loss": 2.851,
      "step": 180322
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.170919418334961,
      "learning_rate": 6.726697215923346e-05,
      "loss": 3.1353,
      "step": 180323
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.869277238845825,
      "learning_rate": 6.726439100560643e-05,
      "loss": 3.0693,
      "step": 180324
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.399843454360962,
      "learning_rate": 6.726180989524935e-05,
      "loss": 3.2757,
      "step": 180325
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.88413667678833,
      "learning_rate": 6.72592288281626e-05,
      "loss": 2.7656,
      "step": 180326
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.569093942642212,
      "learning_rate": 6.725664780434682e-05,
      "loss": 2.979,
      "step": 180327
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9671318531036377,
      "learning_rate": 6.725406682380237e-05,
      "loss": 2.8898,
      "step": 180328
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9387478828430176,
      "learning_rate": 6.725148588652983e-05,
      "loss": 2.83,
      "step": 180329
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7083911895751953,
      "learning_rate": 6.724890499252965e-05,
      "loss": 3.0464,
      "step": 180330
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.1912014484405518,
      "learning_rate": 6.724632414180229e-05,
      "loss": 3.2748,
      "step": 180331
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.3345859050750732,
      "learning_rate": 6.724374333434814e-05,
      "loss": 2.8411,
      "step": 180332
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.087456464767456,
      "learning_rate": 6.724116257016789e-05,
      "loss": 2.7709,
      "step": 180333
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0861401557922363,
      "learning_rate": 6.723858184926177e-05,
      "loss": 2.7925,
      "step": 180334
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.733736515045166,
      "learning_rate": 6.723600117163052e-05,
      "loss": 3.2344,
      "step": 180335
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8607423305511475,
      "learning_rate": 6.723342053727448e-05,
      "loss": 2.813,
      "step": 180336
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4144957065582275,
      "learning_rate": 6.723083994619412e-05,
      "loss": 3.3783,
      "step": 180337
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8582985401153564,
      "learning_rate": 6.722825939838987e-05,
      "loss": 2.9978,
      "step": 180338
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4774632453918457,
      "learning_rate": 6.722567889386235e-05,
      "loss": 2.7163,
      "step": 180339
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7522060871124268,
      "learning_rate": 6.72230984326119e-05,
      "loss": 2.7974,
      "step": 180340
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.927961826324463,
      "learning_rate": 6.722051801463913e-05,
      "loss": 2.9594,
      "step": 180341
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8146121501922607,
      "learning_rate": 6.721793763994449e-05,
      "loss": 2.9849,
      "step": 180342
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.4208338260650635,
      "learning_rate": 6.721535730852835e-05,
      "loss": 3.1127,
      "step": 180343
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2747890949249268,
      "learning_rate": 6.721277702039133e-05,
      "loss": 2.8077,
      "step": 180344
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.591444969177246,
      "learning_rate": 6.721019677553386e-05,
      "loss": 3.0308,
      "step": 180345
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5032596588134766,
      "learning_rate": 6.720761657395633e-05,
      "loss": 2.6357,
      "step": 180346
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2144341468811035,
      "learning_rate": 6.720503641565938e-05,
      "loss": 3.0119,
      "step": 180347
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.479217052459717,
      "learning_rate": 6.72024563006434e-05,
      "loss": 3.0724,
      "step": 180348
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.960266351699829,
      "learning_rate": 6.719987622890881e-05,
      "loss": 2.5859,
      "step": 180349
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6712489128112793,
      "learning_rate": 6.719729620045623e-05,
      "loss": 2.9643,
      "step": 180350
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.898256540298462,
      "learning_rate": 6.719471621528606e-05,
      "loss": 2.7459,
      "step": 180351
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.6003992557525635,
      "learning_rate": 6.71921362733987e-05,
      "loss": 2.9221,
      "step": 180352
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9156055450439453,
      "learning_rate": 6.718955637479481e-05,
      "loss": 2.9881,
      "step": 180353
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.612061023712158,
      "learning_rate": 6.718697651947478e-05,
      "loss": 3.0074,
      "step": 180354
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.627985954284668,
      "learning_rate": 6.718439670743898e-05,
      "loss": 3.0389,
      "step": 180355
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.92242431640625,
      "learning_rate": 6.718181693868813e-05,
      "loss": 2.8862,
      "step": 180356
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7159790992736816,
      "learning_rate": 6.717923721322244e-05,
      "loss": 3.1874,
      "step": 180357
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.529388666152954,
      "learning_rate": 6.717665753104262e-05,
      "loss": 2.8635,
      "step": 180358
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8669204711914062,
      "learning_rate": 6.717407789214907e-05,
      "loss": 3.0153,
      "step": 180359
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.754478931427002,
      "learning_rate": 6.717149829654216e-05,
      "loss": 3.1075,
      "step": 180360
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9768738746643066,
      "learning_rate": 6.716891874422255e-05,
      "loss": 2.9647,
      "step": 180361
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6762380599975586,
      "learning_rate": 6.716633923519064e-05,
      "loss": 2.9752,
      "step": 180362
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.593488931655884,
      "learning_rate": 6.71637597694468e-05,
      "loss": 3.2784,
      "step": 180363
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.544487714767456,
      "learning_rate": 6.716118034699171e-05,
      "loss": 2.886,
      "step": 180364
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.566635847091675,
      "learning_rate": 6.715860096782575e-05,
      "loss": 2.7695,
      "step": 180365
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.134455919265747,
      "learning_rate": 6.71560216319493e-05,
      "loss": 2.9939,
      "step": 180366
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9823105335235596,
      "learning_rate": 6.715344233936305e-05,
      "loss": 2.9354,
      "step": 180367
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5839743614196777,
      "learning_rate": 6.715086309006734e-05,
      "loss": 3.1782,
      "step": 180368
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4802982807159424,
      "learning_rate": 6.714828388406263e-05,
      "loss": 2.9898,
      "step": 180369
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.902644395828247,
      "learning_rate": 6.714570472134954e-05,
      "loss": 3.0615,
      "step": 180370
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0480546951293945,
      "learning_rate": 6.714312560192834e-05,
      "loss": 2.9463,
      "step": 180371
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4073126316070557,
      "learning_rate": 6.714054652579971e-05,
      "loss": 2.5891,
      "step": 180372
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5815656185150146,
      "learning_rate": 6.713796749296408e-05,
      "loss": 3.1008,
      "step": 180373
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5873935222625732,
      "learning_rate": 6.71353885034219e-05,
      "loss": 2.7266,
      "step": 180374
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.718078136444092,
      "learning_rate": 6.713280955717354e-05,
      "loss": 3.1094,
      "step": 180375
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0774660110473633,
      "learning_rate": 6.713023065421966e-05,
      "loss": 2.827,
      "step": 180376
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.896181106567383,
      "learning_rate": 6.71276517945606e-05,
      "loss": 2.989,
      "step": 180377
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4776268005371094,
      "learning_rate": 6.712507297819698e-05,
      "loss": 2.8863,
      "step": 180378
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9266881942749023,
      "learning_rate": 6.712249420512922e-05,
      "loss": 2.9638,
      "step": 180379
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7403016090393066,
      "learning_rate": 6.711991547535777e-05,
      "loss": 3.0486,
      "step": 180380
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.6688852310180664,
      "learning_rate": 6.711733678888305e-05,
      "loss": 2.8432,
      "step": 180381
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5959620475769043,
      "learning_rate": 6.71147581457057e-05,
      "loss": 2.81,
      "step": 180382
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.547914981842041,
      "learning_rate": 6.711217954582602e-05,
      "loss": 2.8381,
      "step": 180383
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.5439136028289795,
      "learning_rate": 6.710960098924465e-05,
      "loss": 2.9254,
      "step": 180384
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.951805591583252,
      "learning_rate": 6.710702247596203e-05,
      "loss": 3.0205,
      "step": 180385
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.629258394241333,
      "learning_rate": 6.71044440059786e-05,
      "loss": 3.0406,
      "step": 180386
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9292469024658203,
      "learning_rate": 6.710186557929478e-05,
      "loss": 3.0147,
      "step": 180387
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.999878168106079,
      "learning_rate": 6.709928719591118e-05,
      "loss": 2.9082,
      "step": 180388
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.092787742614746,
      "learning_rate": 6.709670885582814e-05,
      "loss": 2.9624,
      "step": 180389
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9020919799804688,
      "learning_rate": 6.70941305590463e-05,
      "loss": 2.8399,
      "step": 180390
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.182415246963501,
      "learning_rate": 6.709155230556605e-05,
      "loss": 2.9314,
      "step": 180391
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9240188598632812,
      "learning_rate": 6.708897409538791e-05,
      "loss": 3.132,
      "step": 180392
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.08955454826355,
      "learning_rate": 6.708639592851221e-05,
      "loss": 2.9341,
      "step": 180393
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.155103921890259,
      "learning_rate": 6.708381780493966e-05,
      "loss": 2.7927,
      "step": 180394
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0175535678863525,
      "learning_rate": 6.708123972467052e-05,
      "loss": 3.2327,
      "step": 180395
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.590327024459839,
      "learning_rate": 6.707866168770545e-05,
      "loss": 3.0797,
      "step": 180396
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5633163452148438,
      "learning_rate": 6.707608369404486e-05,
      "loss": 3.0559,
      "step": 180397
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9466209411621094,
      "learning_rate": 6.707350574368925e-05,
      "loss": 2.7128,
      "step": 180398
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5483531951904297,
      "learning_rate": 6.707092783663896e-05,
      "loss": 2.877,
      "step": 180399
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.951181650161743,
      "learning_rate": 6.706834997289465e-05,
      "loss": 2.8796,
      "step": 180400
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2739169597625732,
      "learning_rate": 6.706577215245668e-05,
      "loss": 2.9259,
      "step": 180401
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.3685150146484375,
      "learning_rate": 6.706319437532565e-05,
      "loss": 2.8774,
      "step": 180402
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5109143257141113,
      "learning_rate": 6.706061664150196e-05,
      "loss": 2.8557,
      "step": 180403
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.2886600494384766,
      "learning_rate": 6.705803895098612e-05,
      "loss": 3.0478,
      "step": 180404
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.484842538833618,
      "learning_rate": 6.705546130377852e-05,
      "loss": 3.1465,
      "step": 180405
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.924757242202759,
      "learning_rate": 6.705288369987976e-05,
      "loss": 2.9087,
      "step": 180406
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.545236349105835,
      "learning_rate": 6.705030613929015e-05,
      "loss": 2.8319,
      "step": 180407
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.3239943981170654,
      "learning_rate": 6.70477286220104e-05,
      "loss": 3.1067,
      "step": 180408
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8495116233825684,
      "learning_rate": 6.70451511480409e-05,
      "loss": 3.1675,
      "step": 180409
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2995879650115967,
      "learning_rate": 6.704257371738209e-05,
      "loss": 2.7588,
      "step": 180410
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.6129634380340576,
      "learning_rate": 6.703999633003438e-05,
      "loss": 2.8462,
      "step": 180411
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.210278272628784,
      "learning_rate": 6.703741898599838e-05,
      "loss": 2.7999,
      "step": 180412
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.731553554534912,
      "learning_rate": 6.703484168527448e-05,
      "loss": 2.7843,
      "step": 180413
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2130401134490967,
      "learning_rate": 6.703226442786326e-05,
      "loss": 3.1019,
      "step": 180414
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6407337188720703,
      "learning_rate": 6.702968721376515e-05,
      "loss": 3.3104,
      "step": 180415
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8501038551330566,
      "learning_rate": 6.702711004298065e-05,
      "loss": 2.7677,
      "step": 180416
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.9981460571289062,
      "learning_rate": 6.702453291551009e-05,
      "loss": 2.7368,
      "step": 180417
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5601205825805664,
      "learning_rate": 6.702195583135415e-05,
      "loss": 2.8316,
      "step": 180418
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.0074310302734375,
      "learning_rate": 6.701937879051312e-05,
      "loss": 2.7495,
      "step": 180419
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9899141788482666,
      "learning_rate": 6.701680179298771e-05,
      "loss": 2.7027,
      "step": 180420
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.433903932571411,
      "learning_rate": 6.701422483877826e-05,
      "loss": 3.1059,
      "step": 180421
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.427283525466919,
      "learning_rate": 6.701164792788525e-05,
      "loss": 2.8242,
      "step": 180422
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0873703956604004,
      "learning_rate": 6.700907106030911e-05,
      "loss": 3.0294,
      "step": 180423
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.415508270263672,
      "learning_rate": 6.700649423605047e-05,
      "loss": 3.0082,
      "step": 180424
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.469903945922852,
      "learning_rate": 6.700391745510961e-05,
      "loss": 2.9992,
      "step": 180425
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.5102379322052,
      "learning_rate": 6.700134071748724e-05,
      "loss": 2.9268,
      "step": 180426
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5956239700317383,
      "learning_rate": 6.699876402318361e-05,
      "loss": 2.679,
      "step": 180427
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0986974239349365,
      "learning_rate": 6.699618737219947e-05,
      "loss": 2.876,
      "step": 180428
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.1809191703796387,
      "learning_rate": 6.699361076453498e-05,
      "loss": 2.9585,
      "step": 180429
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.1603901386260986,
      "learning_rate": 6.699103420019087e-05,
      "loss": 2.9521,
      "step": 180430
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7096800804138184,
      "learning_rate": 6.698845767916742e-05,
      "loss": 3.1541,
      "step": 180431
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.3642497062683105,
      "learning_rate": 6.698588120146531e-05,
      "loss": 2.9038,
      "step": 180432
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.915764331817627,
      "learning_rate": 6.698330476708484e-05,
      "loss": 2.8554,
      "step": 180433
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5106401443481445,
      "learning_rate": 6.698072837602663e-05,
      "loss": 2.8602,
      "step": 180434
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.099695682525635,
      "learning_rate": 6.697815202829116e-05,
      "loss": 2.6942,
      "step": 180435
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.54789137840271,
      "learning_rate": 6.697557572387881e-05,
      "loss": 2.9474,
      "step": 180436
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.974236488342285,
      "learning_rate": 6.697299946279002e-05,
      "loss": 3.0235,
      "step": 180437
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.928523302078247,
      "learning_rate": 6.69704232450254e-05,
      "loss": 2.8866,
      "step": 180438
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5767791271209717,
      "learning_rate": 6.696784707058533e-05,
      "loss": 3.1701,
      "step": 180439
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.498222827911377,
      "learning_rate": 6.696527093947044e-05,
      "loss": 3.0934,
      "step": 180440
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.774743556976318,
      "learning_rate": 6.696269485168109e-05,
      "loss": 2.9853,
      "step": 180441
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4537782669067383,
      "learning_rate": 6.696011880721768e-05,
      "loss": 2.881,
      "step": 180442
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.643237590789795,
      "learning_rate": 6.695754280608085e-05,
      "loss": 3.0521,
      "step": 180443
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8390824794769287,
      "learning_rate": 6.695496684827102e-05,
      "loss": 2.8282,
      "step": 180444
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9098167419433594,
      "learning_rate": 6.69523909337886e-05,
      "loss": 3.1262,
      "step": 180445
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.143669366836548,
      "learning_rate": 6.694981506263419e-05,
      "loss": 2.8351,
      "step": 180446
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.839839458465576,
      "learning_rate": 6.694723923480823e-05,
      "loss": 2.9292,
      "step": 180447
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.537956714630127,
      "learning_rate": 6.694466345031106e-05,
      "loss": 2.9499,
      "step": 180448
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9615206718444824,
      "learning_rate": 6.694208770914341e-05,
      "loss": 2.8283,
      "step": 180449
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.37447452545166,
      "learning_rate": 6.693951201130559e-05,
      "loss": 3.0221,
      "step": 180450
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.915363311767578,
      "learning_rate": 6.693693635679802e-05,
      "loss": 2.855,
      "step": 180451
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9630753993988037,
      "learning_rate": 6.693436074562138e-05,
      "loss": 3.0653,
      "step": 180452
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7823166847229004,
      "learning_rate": 6.693178517777605e-05,
      "loss": 2.645,
      "step": 180453
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.595642328262329,
      "learning_rate": 6.69292096532624e-05,
      "loss": 2.8491,
      "step": 180454
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5989489555358887,
      "learning_rate": 6.692663417208108e-05,
      "loss": 3.056,
      "step": 180455
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.654158115386963,
      "learning_rate": 6.692405873423243e-05,
      "loss": 2.8761,
      "step": 180456
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.078784942626953,
      "learning_rate": 6.692148333971708e-05,
      "loss": 2.6819,
      "step": 180457
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9764482975006104,
      "learning_rate": 6.691890798853543e-05,
      "loss": 3.0595,
      "step": 180458
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.3840417861938477,
      "learning_rate": 6.691633268068795e-05,
      "loss": 3.4192,
      "step": 180459
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.432939052581787,
      "learning_rate": 6.691375741617505e-05,
      "loss": 3.1751,
      "step": 180460
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2555899620056152,
      "learning_rate": 6.691118219499735e-05,
      "loss": 3.1036,
      "step": 180461
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.705333948135376,
      "learning_rate": 6.690860701715519e-05,
      "loss": 3.0349,
      "step": 180462
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.54571533203125,
      "learning_rate": 6.690603188264922e-05,
      "loss": 2.8784,
      "step": 180463
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.326871633529663,
      "learning_rate": 6.690345679147978e-05,
      "loss": 2.741,
      "step": 180464
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.3403468132019043,
      "learning_rate": 6.690088174364742e-05,
      "loss": 3.0728,
      "step": 180465
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.656365394592285,
      "learning_rate": 6.689830673915245e-05,
      "loss": 2.8078,
      "step": 180466
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.580116033554077,
      "learning_rate": 6.689573177799562e-05,
      "loss": 2.842,
      "step": 180467
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.258635997772217,
      "learning_rate": 6.689315686017719e-05,
      "loss": 3.1835,
      "step": 180468
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6807191371917725,
      "learning_rate": 6.689058198569778e-05,
      "loss": 2.9204,
      "step": 180469
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.8122618198394775,
      "learning_rate": 6.688800715455782e-05,
      "loss": 2.7361,
      "step": 180470
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4778928756713867,
      "learning_rate": 6.688543236675779e-05,
      "loss": 3.0282,
      "step": 180471
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5142688751220703,
      "learning_rate": 6.688285762229805e-05,
      "loss": 2.8337,
      "step": 180472
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7519848346710205,
      "learning_rate": 6.68802829211793e-05,
      "loss": 3.094,
      "step": 180473
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0621445178985596,
      "learning_rate": 6.68777082634018e-05,
      "loss": 2.828,
      "step": 180474
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.739149808883667,
      "learning_rate": 6.687513364896626e-05,
      "loss": 2.8127,
      "step": 180475
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.5039520263671875,
      "learning_rate": 6.687255907787297e-05,
      "loss": 2.9944,
      "step": 180476
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.880493640899658,
      "learning_rate": 6.686998455012251e-05,
      "loss": 3.0008,
      "step": 180477
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.3775172233581543,
      "learning_rate": 6.686741006571525e-05,
      "loss": 3.0496,
      "step": 180478
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.1905970573425293,
      "learning_rate": 6.686483562465179e-05,
      "loss": 2.8919,
      "step": 180479
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.587691307067871,
      "learning_rate": 6.686226122693248e-05,
      "loss": 2.9845,
      "step": 180480
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.446441650390625,
      "learning_rate": 6.685968687255798e-05,
      "loss": 2.8893,
      "step": 180481
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2312657833099365,
      "learning_rate": 6.685711256152863e-05,
      "loss": 3.0005,
      "step": 180482
    },
    {
      "epoch": 2.35,
      "grad_norm": 5.230844974517822,
      "learning_rate": 6.685453829384498e-05,
      "loss": 2.976,
      "step": 180483
    },
    {
      "epoch": 2.35,
      "grad_norm": 5.037598609924316,
      "learning_rate": 6.685196406950739e-05,
      "loss": 3.0628,
      "step": 180484
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6882457733154297,
      "learning_rate": 6.684938988851649e-05,
      "loss": 3.0219,
      "step": 180485
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.844289779663086,
      "learning_rate": 6.684681575087259e-05,
      "loss": 3.0568,
      "step": 180486
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.533468246459961,
      "learning_rate": 6.684424165657637e-05,
      "loss": 2.891,
      "step": 180487
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.331489086151123,
      "learning_rate": 6.684166760562822e-05,
      "loss": 2.7314,
      "step": 180488
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.235219717025757,
      "learning_rate": 6.683909359802858e-05,
      "loss": 2.8525,
      "step": 180489
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.7292888164520264,
      "learning_rate": 6.683651963377787e-05,
      "loss": 3.2119,
      "step": 180490
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.336599349975586,
      "learning_rate": 6.683394571287676e-05,
      "loss": 3.3049,
      "step": 180491
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2145543098449707,
      "learning_rate": 6.683137183532553e-05,
      "loss": 2.7278,
      "step": 180492
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.024038553237915,
      "learning_rate": 6.682879800112483e-05,
      "loss": 2.9826,
      "step": 180493
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8444762229919434,
      "learning_rate": 6.682622421027495e-05,
      "loss": 2.596,
      "step": 180494
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.960348129272461,
      "learning_rate": 6.682365046277665e-05,
      "loss": 2.7847,
      "step": 180495
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.441077709197998,
      "learning_rate": 6.682107675863008e-05,
      "loss": 3.0203,
      "step": 180496
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0884430408477783,
      "learning_rate": 6.681850309783597e-05,
      "loss": 2.9406,
      "step": 180497
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.567225217819214,
      "learning_rate": 6.681592948039462e-05,
      "loss": 3.1066,
      "step": 180498
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9475388526916504,
      "learning_rate": 6.681335590630665e-05,
      "loss": 3.0038,
      "step": 180499
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7195096015930176,
      "learning_rate": 6.681078237557239e-05,
      "loss": 3.0691,
      "step": 180500
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.055179595947266,
      "learning_rate": 6.680820888819259e-05,
      "loss": 2.9592,
      "step": 180501
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2509634494781494,
      "learning_rate": 6.680563544416739e-05,
      "loss": 2.747,
      "step": 180502
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.988041639328003,
      "learning_rate": 6.680306204349749e-05,
      "loss": 2.8768,
      "step": 180503
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.026038885116577,
      "learning_rate": 6.680048868618325e-05,
      "loss": 3.0625,
      "step": 180504
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6455445289611816,
      "learning_rate": 6.679791537222526e-05,
      "loss": 2.9672,
      "step": 180505
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.908669948577881,
      "learning_rate": 6.679534210162386e-05,
      "loss": 3.1439,
      "step": 180506
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7604801654815674,
      "learning_rate": 6.679276887437974e-05,
      "loss": 3.0537,
      "step": 180507
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.517465591430664,
      "learning_rate": 6.679019569049314e-05,
      "loss": 2.9289,
      "step": 180508
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.836057186126709,
      "learning_rate": 6.678762254996469e-05,
      "loss": 2.7671,
      "step": 180509
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.3992550373077393,
      "learning_rate": 6.678504945279476e-05,
      "loss": 3.2084,
      "step": 180510
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6177608966827393,
      "learning_rate": 6.678247639898394e-05,
      "loss": 3.0832,
      "step": 180511
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.62807559967041,
      "learning_rate": 6.67799033885326e-05,
      "loss": 2.7415,
      "step": 180512
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.723937511444092,
      "learning_rate": 6.677733042144146e-05,
      "loss": 2.7735,
      "step": 180513
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.09519100189209,
      "learning_rate": 6.677475749771062e-05,
      "loss": 2.735,
      "step": 180514
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.458502769470215,
      "learning_rate": 6.677218461734087e-05,
      "loss": 2.9317,
      "step": 180515
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5122156143188477,
      "learning_rate": 6.676961178033246e-05,
      "loss": 3.0126,
      "step": 180516
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8177967071533203,
      "learning_rate": 6.676703898668609e-05,
      "loss": 2.8955,
      "step": 180517
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.115044116973877,
      "learning_rate": 6.676446623640205e-05,
      "loss": 2.805,
      "step": 180518
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.85886549949646,
      "learning_rate": 6.676189352948095e-05,
      "loss": 2.9878,
      "step": 180519
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.390753269195557,
      "learning_rate": 6.675932086592323e-05,
      "loss": 2.9675,
      "step": 180520
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.1429502964019775,
      "learning_rate": 6.675674824572936e-05,
      "loss": 2.9195,
      "step": 180521
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7572247982025146,
      "learning_rate": 6.675417566889972e-05,
      "loss": 3.1931,
      "step": 180522
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.199445962905884,
      "learning_rate": 6.6751603135435e-05,
      "loss": 3.0944,
      "step": 180523
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.791088342666626,
      "learning_rate": 6.674903064533544e-05,
      "loss": 3.1446,
      "step": 180524
    },
    {
      "epoch": 2.35,
      "grad_norm": 6.5474700927734375,
      "learning_rate": 6.674645819860174e-05,
      "loss": 2.9646,
      "step": 180525
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.762889862060547,
      "learning_rate": 6.674388579523425e-05,
      "loss": 3.1636,
      "step": 180526
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.31050968170166,
      "learning_rate": 6.674131343523351e-05,
      "loss": 2.7896,
      "step": 180527
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0178627967834473,
      "learning_rate": 6.673874111859985e-05,
      "loss": 2.8214,
      "step": 180528
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.621847152709961,
      "learning_rate": 6.673616884533396e-05,
      "loss": 2.7813,
      "step": 180529
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.4733517169952393,
      "learning_rate": 6.673359661543615e-05,
      "loss": 2.8135,
      "step": 180530
    },
    {
      "epoch": 2.35,
      "grad_norm": 5.166933059692383,
      "learning_rate": 6.673102442890701e-05,
      "loss": 2.9173,
      "step": 180531
    },
    {
      "epoch": 2.35,
      "grad_norm": 7.6904120445251465,
      "learning_rate": 6.672845228574701e-05,
      "loss": 2.8652,
      "step": 180532
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8889150619506836,
      "learning_rate": 6.672588018595649e-05,
      "loss": 2.8009,
      "step": 180533
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.530069351196289,
      "learning_rate": 6.672330812953615e-05,
      "loss": 3.0703,
      "step": 180534
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7627954483032227,
      "learning_rate": 6.67207361164863e-05,
      "loss": 3.0154,
      "step": 180535
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.1846864223480225,
      "learning_rate": 6.671816414680744e-05,
      "loss": 2.6259,
      "step": 180536
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.018711805343628,
      "learning_rate": 6.671559222050011e-05,
      "loss": 3.0712,
      "step": 180537
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.647029161453247,
      "learning_rate": 6.67130203375648e-05,
      "loss": 2.8197,
      "step": 180538
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6117117404937744,
      "learning_rate": 6.671044849800182e-05,
      "loss": 2.8936,
      "step": 180539
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6080853939056396,
      "learning_rate": 6.670787670181187e-05,
      "loss": 3.0326,
      "step": 180540
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8051764965057373,
      "learning_rate": 6.670530494899526e-05,
      "loss": 3.0095,
      "step": 180541
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.6660494804382324,
      "learning_rate": 6.67027332395526e-05,
      "loss": 2.8051,
      "step": 180542
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.517641544342041,
      "learning_rate": 6.670016157348434e-05,
      "loss": 2.9541,
      "step": 180543
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0693917274475098,
      "learning_rate": 6.669758995079087e-05,
      "loss": 2.789,
      "step": 180544
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8426268100738525,
      "learning_rate": 6.669501837147269e-05,
      "loss": 2.8889,
      "step": 180545
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.711300849914551,
      "learning_rate": 6.669244683553037e-05,
      "loss": 2.9777,
      "step": 180546
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.362945318222046,
      "learning_rate": 6.668987534296425e-05,
      "loss": 2.9085,
      "step": 180547
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.3345303535461426,
      "learning_rate": 6.668730389377494e-05,
      "loss": 3.0487,
      "step": 180548
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7566170692443848,
      "learning_rate": 6.668473248796289e-05,
      "loss": 3.0711,
      "step": 180549
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0903828144073486,
      "learning_rate": 6.668216112552858e-05,
      "loss": 2.7401,
      "step": 180550
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.2390379905700684,
      "learning_rate": 6.667958980647233e-05,
      "loss": 2.6839,
      "step": 180551
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.326390027999878,
      "learning_rate": 6.667701853079486e-05,
      "loss": 3.0692,
      "step": 180552
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.029966354370117,
      "learning_rate": 6.667444729849645e-05,
      "loss": 2.5981,
      "step": 180553
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.751708745956421,
      "learning_rate": 6.667187610957773e-05,
      "loss": 3.0896,
      "step": 180554
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4567995071411133,
      "learning_rate": 6.666930496403912e-05,
      "loss": 3.0847,
      "step": 180555
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5447185039520264,
      "learning_rate": 6.666673386188108e-05,
      "loss": 2.947,
      "step": 180556
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2623541355133057,
      "learning_rate": 6.666416280310404e-05,
      "loss": 3.1589,
      "step": 180557
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6779465675354004,
      "learning_rate": 6.666159178770862e-05,
      "loss": 2.7183,
      "step": 180558
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4451398849487305,
      "learning_rate": 6.66590208156951e-05,
      "loss": 2.7554,
      "step": 180559
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.604515314102173,
      "learning_rate": 6.665644988706419e-05,
      "loss": 3.0223,
      "step": 180560
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6069328784942627,
      "learning_rate": 6.665387900181617e-05,
      "loss": 3.1406,
      "step": 180561
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.542672634124756,
      "learning_rate": 6.665130815995175e-05,
      "loss": 2.8513,
      "step": 180562
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7139604091644287,
      "learning_rate": 6.664873736147108e-05,
      "loss": 2.9355,
      "step": 180563
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.947120428085327,
      "learning_rate": 6.664616660637492e-05,
      "loss": 2.6685,
      "step": 180564
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6014273166656494,
      "learning_rate": 6.664359589466355e-05,
      "loss": 2.6813,
      "step": 180565
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.075408697128296,
      "learning_rate": 6.664102522633761e-05,
      "loss": 2.9745,
      "step": 180566
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4182374477386475,
      "learning_rate": 6.663845460139743e-05,
      "loss": 2.8662,
      "step": 180567
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6395463943481445,
      "learning_rate": 6.663588401984374e-05,
      "loss": 2.9561,
      "step": 180568
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.015676259994507,
      "learning_rate": 6.663331348167669e-05,
      "loss": 3.1281,
      "step": 180569
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5771732330322266,
      "learning_rate": 6.663074298689699e-05,
      "loss": 2.7064,
      "step": 180570
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.846665620803833,
      "learning_rate": 6.662817253550493e-05,
      "loss": 2.9067,
      "step": 180571
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.619239568710327,
      "learning_rate": 6.662560212750121e-05,
      "loss": 3.0524,
      "step": 180572
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9522268772125244,
      "learning_rate": 6.662303176288609e-05,
      "loss": 3.0033,
      "step": 180573
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8809690475463867,
      "learning_rate": 6.662046144166034e-05,
      "loss": 2.8233,
      "step": 180574
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.812180995941162,
      "learning_rate": 6.661789116382407e-05,
      "loss": 3.0295,
      "step": 180575
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.605886459350586,
      "learning_rate": 6.661532092937806e-05,
      "loss": 2.8069,
      "step": 180576
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.975217819213867,
      "learning_rate": 6.661275073832253e-05,
      "loss": 2.9813,
      "step": 180577
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.865246534347534,
      "learning_rate": 6.661018059065822e-05,
      "loss": 2.8142,
      "step": 180578
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8832406997680664,
      "learning_rate": 6.660761048638538e-05,
      "loss": 2.8304,
      "step": 180579
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.617060422897339,
      "learning_rate": 6.660504042550476e-05,
      "loss": 3.0044,
      "step": 180580
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5085220336914062,
      "learning_rate": 6.66024704080165e-05,
      "loss": 3.0528,
      "step": 180581
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.5328524112701416,
      "learning_rate": 6.659990043392134e-05,
      "loss": 2.8504,
      "step": 180582
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.3097004890441895,
      "learning_rate": 6.659733050321956e-05,
      "loss": 2.9742,
      "step": 180583
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2181851863861084,
      "learning_rate": 6.659476061591184e-05,
      "loss": 3.0009,
      "step": 180584
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.996975898742676,
      "learning_rate": 6.659219077199846e-05,
      "loss": 2.8744,
      "step": 180585
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.696800947189331,
      "learning_rate": 6.658962097148018e-05,
      "loss": 3.028,
      "step": 180586
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.854691505432129,
      "learning_rate": 6.658705121435715e-05,
      "loss": 2.8823,
      "step": 180587
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7274816036224365,
      "learning_rate": 6.658448150063006e-05,
      "loss": 3.0439,
      "step": 180588
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.525888442993164,
      "learning_rate": 6.658191183029923e-05,
      "loss": 2.6057,
      "step": 180589
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8442671298980713,
      "learning_rate": 6.657934220336532e-05,
      "loss": 2.8319,
      "step": 180590
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.055920124053955,
      "learning_rate": 6.657677261982861e-05,
      "loss": 2.8823,
      "step": 180591
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.202054023742676,
      "learning_rate": 6.657420307968988e-05,
      "loss": 2.884,
      "step": 180592
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.664116621017456,
      "learning_rate": 6.657163358294923e-05,
      "loss": 2.9768,
      "step": 180593
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5628111362457275,
      "learning_rate": 6.65690641296074e-05,
      "loss": 2.9372,
      "step": 180594
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5967319011688232,
      "learning_rate": 6.656649471966473e-05,
      "loss": 3.1567,
      "step": 180595
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2424023151397705,
      "learning_rate": 6.656392535312181e-05,
      "loss": 3.0242,
      "step": 180596
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8901798725128174,
      "learning_rate": 6.656135602997899e-05,
      "loss": 2.8038,
      "step": 180597
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6722030639648438,
      "learning_rate": 6.655878675023698e-05,
      "loss": 3.1603,
      "step": 180598
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.201500415802002,
      "learning_rate": 6.655621751389592e-05,
      "loss": 3.052,
      "step": 180599
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.024914026260376,
      "learning_rate": 6.65536483209566e-05,
      "loss": 3.0872,
      "step": 180600
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.233907699584961,
      "learning_rate": 6.655107917141922e-05,
      "loss": 2.7598,
      "step": 180601
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7614569664001465,
      "learning_rate": 6.654851006528452e-05,
      "loss": 2.8684,
      "step": 180602
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7389791011810303,
      "learning_rate": 6.654594100255277e-05,
      "loss": 2.7896,
      "step": 180603
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0404446125030518,
      "learning_rate": 6.654337198322461e-05,
      "loss": 3.0331,
      "step": 180604
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.259822130203247,
      "learning_rate": 6.654080300730045e-05,
      "loss": 2.8333,
      "step": 180605
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.156404733657837,
      "learning_rate": 6.653823407478078e-05,
      "loss": 3.06,
      "step": 180606
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.526911497116089,
      "learning_rate": 6.653566518566595e-05,
      "loss": 3.0027,
      "step": 180607
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8446130752563477,
      "learning_rate": 6.653309633995662e-05,
      "loss": 2.8605,
      "step": 180608
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5779128074645996,
      "learning_rate": 6.653052753765316e-05,
      "loss": 3.2563,
      "step": 180609
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.3390278816223145,
      "learning_rate": 6.652795877875613e-05,
      "loss": 2.8031,
      "step": 180610
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.967118740081787,
      "learning_rate": 6.652539006326595e-05,
      "loss": 3.015,
      "step": 180611
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0190088748931885,
      "learning_rate": 6.652282139118311e-05,
      "loss": 2.9235,
      "step": 180612
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.506789445877075,
      "learning_rate": 6.652025276250802e-05,
      "loss": 2.8519,
      "step": 180613
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.6043593883514404,
      "learning_rate": 6.651768417724131e-05,
      "loss": 3.0326,
      "step": 180614
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8546547889709473,
      "learning_rate": 6.651511563538326e-05,
      "loss": 2.891,
      "step": 180615
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.6901423931121826,
      "learning_rate": 6.651254713693457e-05,
      "loss": 2.9795,
      "step": 180616
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.623535633087158,
      "learning_rate": 6.650997868189559e-05,
      "loss": 2.7756,
      "step": 180617
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.424152135848999,
      "learning_rate": 6.65074102702667e-05,
      "loss": 2.6694,
      "step": 180618
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.679776191711426,
      "learning_rate": 6.650484190204863e-05,
      "loss": 2.9987,
      "step": 180619
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.489812135696411,
      "learning_rate": 6.650227357724167e-05,
      "loss": 2.6526,
      "step": 180620
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.4492669105529785,
      "learning_rate": 6.649970529584628e-05,
      "loss": 2.8201,
      "step": 180621
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.474728584289551,
      "learning_rate": 6.64971370578631e-05,
      "loss": 3.1513,
      "step": 180622
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6487531661987305,
      "learning_rate": 6.64945688632925e-05,
      "loss": 2.9843,
      "step": 180623
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.370832920074463,
      "learning_rate": 6.649200071213486e-05,
      "loss": 3.193,
      "step": 180624
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.857924699783325,
      "learning_rate": 6.648943260439087e-05,
      "loss": 2.7937,
      "step": 180625
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8373472690582275,
      "learning_rate": 6.648686454006081e-05,
      "loss": 2.7598,
      "step": 180626
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8939990997314453,
      "learning_rate": 6.648429651914536e-05,
      "loss": 2.897,
      "step": 180627
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6989617347717285,
      "learning_rate": 6.648172854164489e-05,
      "loss": 3.0016,
      "step": 180628
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.117393970489502,
      "learning_rate": 6.647916060755984e-05,
      "loss": 3.1098,
      "step": 180629
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2894983291625977,
      "learning_rate": 6.647659271689064e-05,
      "loss": 3.1194,
      "step": 180630
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.592266798019409,
      "learning_rate": 6.647402486963794e-05,
      "loss": 2.9204,
      "step": 180631
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.657423973083496,
      "learning_rate": 6.647145706580204e-05,
      "loss": 2.912,
      "step": 180632
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.888075828552246,
      "learning_rate": 6.646888930538362e-05,
      "loss": 2.7946,
      "step": 180633
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0153424739837646,
      "learning_rate": 6.6466321588383e-05,
      "loss": 2.969,
      "step": 180634
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0287928581237793,
      "learning_rate": 6.646375391480075e-05,
      "loss": 2.5944,
      "step": 180635
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7070248126983643,
      "learning_rate": 6.646118628463716e-05,
      "loss": 3.2253,
      "step": 180636
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.1848623752593994,
      "learning_rate": 6.645861869789295e-05,
      "loss": 3.0067,
      "step": 180637
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.799546480178833,
      "learning_rate": 6.645605115456839e-05,
      "loss": 2.9146,
      "step": 180638
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.2471818923950195,
      "learning_rate": 6.645348365466418e-05,
      "loss": 2.8392,
      "step": 180639
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9753167629241943,
      "learning_rate": 6.645091619818056e-05,
      "loss": 3.1349,
      "step": 180640
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5910212993621826,
      "learning_rate": 6.644834878511831e-05,
      "loss": 3.047,
      "step": 180641
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.829427480697632,
      "learning_rate": 6.644578141547752e-05,
      "loss": 3.1001,
      "step": 180642
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.543924331665039,
      "learning_rate": 6.644321408925898e-05,
      "loss": 3.0015,
      "step": 180643
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9628186225891113,
      "learning_rate": 6.644064680646296e-05,
      "loss": 2.8888,
      "step": 180644
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4836299419403076,
      "learning_rate": 6.643807956709014e-05,
      "loss": 2.899,
      "step": 180645
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7418529987335205,
      "learning_rate": 6.643551237114079e-05,
      "loss": 2.8723,
      "step": 180646
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6722874641418457,
      "learning_rate": 6.643294521861563e-05,
      "loss": 2.9082,
      "step": 180647
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9405815601348877,
      "learning_rate": 6.643037810951488e-05,
      "loss": 2.8882,
      "step": 180648
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.643465995788574,
      "learning_rate": 6.642781104383918e-05,
      "loss": 2.7369,
      "step": 180649
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.596437931060791,
      "learning_rate": 6.642524402158888e-05,
      "loss": 2.7457,
      "step": 180650
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.248305320739746,
      "learning_rate": 6.642267704276461e-05,
      "loss": 2.7235,
      "step": 180651
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.749056816101074,
      "learning_rate": 6.64201101073667e-05,
      "loss": 3.0595,
      "step": 180652
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.2484443187713623,
      "learning_rate": 6.641754321539588e-05,
      "loss": 2.7569,
      "step": 180653
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.2428359985351562,
      "learning_rate": 6.641497636685227e-05,
      "loss": 3.0221,
      "step": 180654
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.360252857208252,
      "learning_rate": 6.641240956173662e-05,
      "loss": 2.669,
      "step": 180655
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.6263163089752197,
      "learning_rate": 6.640984280004919e-05,
      "loss": 3.0083,
      "step": 180656
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5047521591186523,
      "learning_rate": 6.640727608179068e-05,
      "loss": 2.7502,
      "step": 180657
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.632814884185791,
      "learning_rate": 6.640470940696141e-05,
      "loss": 2.6596,
      "step": 180658
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.907093048095703,
      "learning_rate": 6.640214277556206e-05,
      "loss": 2.7896,
      "step": 180659
    },
    {
      "epoch": 2.35,
      "grad_norm": 5.4030256271362305,
      "learning_rate": 6.639957618759281e-05,
      "loss": 2.8604,
      "step": 180660
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.360430955886841,
      "learning_rate": 6.639700964305435e-05,
      "loss": 2.8227,
      "step": 180661
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.1937239170074463,
      "learning_rate": 6.639444314194701e-05,
      "loss": 2.8054,
      "step": 180662
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.742128610610962,
      "learning_rate": 6.639187668427146e-05,
      "loss": 2.9288,
      "step": 180663
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.4605278968811035,
      "learning_rate": 6.638931027002798e-05,
      "loss": 2.9512,
      "step": 180664
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7870521545410156,
      "learning_rate": 6.638674389921732e-05,
      "loss": 3.1088,
      "step": 180665
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.793804883956909,
      "learning_rate": 6.638417757183956e-05,
      "loss": 2.7503,
      "step": 180666
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.975531578063965,
      "learning_rate": 6.638161128789549e-05,
      "loss": 2.8953,
      "step": 180667
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.881265640258789,
      "learning_rate": 6.637904504738543e-05,
      "loss": 2.7932,
      "step": 180668
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7214772701263428,
      "learning_rate": 6.637647885030999e-05,
      "loss": 2.8608,
      "step": 180669
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.513564109802246,
      "learning_rate": 6.637391269666947e-05,
      "loss": 3.025,
      "step": 180670
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.878242254257202,
      "learning_rate": 6.637134658646465e-05,
      "loss": 3.0981,
      "step": 180671
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.360339641571045,
      "learning_rate": 6.636878051969561e-05,
      "loss": 3.0427,
      "step": 180672
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.006532907485962,
      "learning_rate": 6.636621449636313e-05,
      "loss": 2.9755,
      "step": 180673
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.715214252471924,
      "learning_rate": 6.636364851646746e-05,
      "loss": 2.8402,
      "step": 180674
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7789673805236816,
      "learning_rate": 6.636108258000932e-05,
      "loss": 2.887,
      "step": 180675
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.8850274085998535,
      "learning_rate": 6.635851668698896e-05,
      "loss": 2.9428,
      "step": 180676
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.224273204803467,
      "learning_rate": 6.635595083740713e-05,
      "loss": 2.9075,
      "step": 180677
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.3797690868377686,
      "learning_rate": 6.635338503126397e-05,
      "loss": 2.9075,
      "step": 180678
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5156657695770264,
      "learning_rate": 6.635081926856025e-05,
      "loss": 3.0457,
      "step": 180679
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.522183418273926,
      "learning_rate": 6.634825354929619e-05,
      "loss": 2.7829,
      "step": 180680
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.559765577316284,
      "learning_rate": 6.63456878734725e-05,
      "loss": 3.0355,
      "step": 180681
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.880821704864502,
      "learning_rate": 6.634312224108948e-05,
      "loss": 2.9946,
      "step": 180682
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.3261570930480957,
      "learning_rate": 6.634055665214783e-05,
      "loss": 2.8504,
      "step": 180683
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6835787296295166,
      "learning_rate": 6.633799110664775e-05,
      "loss": 3.0793,
      "step": 180684
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.818613290786743,
      "learning_rate": 6.633542560458988e-05,
      "loss": 3.0353,
      "step": 180685
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8818864822387695,
      "learning_rate": 6.633286014597464e-05,
      "loss": 3.0981,
      "step": 180686
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7768633365631104,
      "learning_rate": 6.633029473080259e-05,
      "loss": 3.0597,
      "step": 180687
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6188974380493164,
      "learning_rate": 6.632772935907406e-05,
      "loss": 2.9002,
      "step": 180688
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.670064687728882,
      "learning_rate": 6.63251640307898e-05,
      "loss": 2.9515,
      "step": 180689
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0522916316986084,
      "learning_rate": 6.632259874594992e-05,
      "loss": 2.9904,
      "step": 180690
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6911916732788086,
      "learning_rate": 6.632003350455514e-05,
      "loss": 2.8091,
      "step": 180691
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.668546438217163,
      "learning_rate": 6.631746830660582e-05,
      "loss": 2.8611,
      "step": 180692
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7486624717712402,
      "learning_rate": 6.631490315210259e-05,
      "loss": 3.0502,
      "step": 180693
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.503610610961914,
      "learning_rate": 6.631233804104576e-05,
      "loss": 2.9161,
      "step": 180694
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.58636474609375,
      "learning_rate": 6.630977297343594e-05,
      "loss": 2.7825,
      "step": 180695
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.748736619949341,
      "learning_rate": 6.630720794927354e-05,
      "loss": 2.8954,
      "step": 180696
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4099907875061035,
      "learning_rate": 6.6304642968559e-05,
      "loss": 2.9968,
      "step": 180697
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.922560691833496,
      "learning_rate": 6.630207803129283e-05,
      "loss": 2.9587,
      "step": 180698
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9399170875549316,
      "learning_rate": 6.629951313747555e-05,
      "loss": 2.9609,
      "step": 180699
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.364510536193848,
      "learning_rate": 6.629694828710753e-05,
      "loss": 2.8099,
      "step": 180700
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.5804803371429443,
      "learning_rate": 6.629438348018942e-05,
      "loss": 2.8715,
      "step": 180701
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.248039484024048,
      "learning_rate": 6.629181871672158e-05,
      "loss": 2.8498,
      "step": 180702
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.575721263885498,
      "learning_rate": 6.628925399670442e-05,
      "loss": 2.9467,
      "step": 180703
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.783296585083008,
      "learning_rate": 6.62866893201386e-05,
      "loss": 2.9956,
      "step": 180704
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8771822452545166,
      "learning_rate": 6.628412468702445e-05,
      "loss": 2.8468,
      "step": 180705
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8621718883514404,
      "learning_rate": 6.628156009736245e-05,
      "loss": 2.9062,
      "step": 180706
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.3716275691986084,
      "learning_rate": 6.627899555115318e-05,
      "loss": 2.8791,
      "step": 180707
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.233567476272583,
      "learning_rate": 6.627643104839708e-05,
      "loss": 2.8219,
      "step": 180708
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5047528743743896,
      "learning_rate": 6.627386658909453e-05,
      "loss": 3.1369,
      "step": 180709
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4912192821502686,
      "learning_rate": 6.627130217324612e-05,
      "loss": 2.9307,
      "step": 180710
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.764285087585449,
      "learning_rate": 6.626873780085232e-05,
      "loss": 3.2164,
      "step": 180711
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.744718551635742,
      "learning_rate": 6.626617347191346e-05,
      "loss": 2.8624,
      "step": 180712
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4839491844177246,
      "learning_rate": 6.626360918643025e-05,
      "loss": 2.9076,
      "step": 180713
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.822514772415161,
      "learning_rate": 6.626104494440303e-05,
      "loss": 2.7978,
      "step": 180714
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.3399064540863037,
      "learning_rate": 6.625848074583222e-05,
      "loss": 2.9893,
      "step": 180715
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.3549604415893555,
      "learning_rate": 6.625591659071841e-05,
      "loss": 2.603,
      "step": 180716
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4682304859161377,
      "learning_rate": 6.625335247906201e-05,
      "loss": 2.9258,
      "step": 180717
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.73276948928833,
      "learning_rate": 6.625078841086358e-05,
      "loss": 2.7748,
      "step": 180718
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0994627475738525,
      "learning_rate": 6.624822438612357e-05,
      "loss": 3.1108,
      "step": 180719
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.493415355682373,
      "learning_rate": 6.62456604048424e-05,
      "loss": 3.036,
      "step": 180720
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5872340202331543,
      "learning_rate": 6.624309646702048e-05,
      "loss": 2.8369,
      "step": 180721
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.3270773887634277,
      "learning_rate": 6.624053257265846e-05,
      "loss": 2.961,
      "step": 180722
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6332590579986572,
      "learning_rate": 6.623796872175668e-05,
      "loss": 3.0042,
      "step": 180723
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8738017082214355,
      "learning_rate": 6.623540491431577e-05,
      "loss": 2.758,
      "step": 180724
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5195765495300293,
      "learning_rate": 6.623284115033599e-05,
      "loss": 2.9501,
      "step": 180725
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5321125984191895,
      "learning_rate": 6.623027742981814e-05,
      "loss": 3.0936,
      "step": 180726
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.851686000823975,
      "learning_rate": 6.62277137527623e-05,
      "loss": 2.8066,
      "step": 180727
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.266684055328369,
      "learning_rate": 6.622515011916926e-05,
      "loss": 3.1697,
      "step": 180728
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.870363473892212,
      "learning_rate": 6.622258652903925e-05,
      "loss": 2.9873,
      "step": 180729
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.7985100746154785,
      "learning_rate": 6.622002298237296e-05,
      "loss": 2.9309,
      "step": 180730
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.045732021331787,
      "learning_rate": 6.621745947917071e-05,
      "loss": 2.6594,
      "step": 180731
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.042583465576172,
      "learning_rate": 6.621489601943324e-05,
      "loss": 2.975,
      "step": 180732
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.650588274002075,
      "learning_rate": 6.621233260316065e-05,
      "loss": 2.8506,
      "step": 180733
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.818678379058838,
      "learning_rate": 6.620976923035367e-05,
      "loss": 3.2685,
      "step": 180734
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5117647647857666,
      "learning_rate": 6.620720590101266e-05,
      "loss": 2.9876,
      "step": 180735
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.1453404426574707,
      "learning_rate": 6.620464261513819e-05,
      "loss": 3.1247,
      "step": 180736
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6171369552612305,
      "learning_rate": 6.62020793727306e-05,
      "loss": 2.946,
      "step": 180737
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6575186252593994,
      "learning_rate": 6.619951617379063e-05,
      "loss": 2.8502,
      "step": 180738
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7839198112487793,
      "learning_rate": 6.619695301831844e-05,
      "loss": 2.9391,
      "step": 180739
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5098929405212402,
      "learning_rate": 6.619438990631472e-05,
      "loss": 2.8715,
      "step": 180740
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2639434337615967,
      "learning_rate": 6.619182683777978e-05,
      "loss": 2.9413,
      "step": 180741
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5748369693756104,
      "learning_rate": 6.61892638127143e-05,
      "loss": 2.992,
      "step": 180742
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.4765965938568115,
      "learning_rate": 6.618670083111854e-05,
      "loss": 2.9506,
      "step": 180743
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.92148494720459,
      "learning_rate": 6.618413789299328e-05,
      "loss": 2.8132,
      "step": 180744
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5409014225006104,
      "learning_rate": 6.618157499833861e-05,
      "loss": 2.8472,
      "step": 180745
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.4892830848693848,
      "learning_rate": 6.61790121471553e-05,
      "loss": 2.7979,
      "step": 180746
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.2983338832855225,
      "learning_rate": 6.617644933944366e-05,
      "loss": 2.8503,
      "step": 180747
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.1085429191589355,
      "learning_rate": 6.617388657520427e-05,
      "loss": 2.9985,
      "step": 180748
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6549105644226074,
      "learning_rate": 6.61713238544375e-05,
      "loss": 3.0426,
      "step": 180749
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9479074478149414,
      "learning_rate": 6.616876117714407e-05,
      "loss": 2.9547,
      "step": 180750
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.3858258724212646,
      "learning_rate": 6.616619854332412e-05,
      "loss": 2.8392,
      "step": 180751
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.966228485107422,
      "learning_rate": 6.616363595297837e-05,
      "loss": 3.0036,
      "step": 180752
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.5523881912231445,
      "learning_rate": 6.616107340610714e-05,
      "loss": 2.8586,
      "step": 180753
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.1712486743927,
      "learning_rate": 6.615851090271103e-05,
      "loss": 2.8781,
      "step": 180754
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.414156436920166,
      "learning_rate": 6.61559484427904e-05,
      "loss": 3.0178,
      "step": 180755
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.672494411468506,
      "learning_rate": 6.615338602634598e-05,
      "loss": 2.9455,
      "step": 180756
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.615612030029297,
      "learning_rate": 6.615082365337788e-05,
      "loss": 2.9224,
      "step": 180757
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.841661214828491,
      "learning_rate": 6.614826132388682e-05,
      "loss": 3.0326,
      "step": 180758
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6506361961364746,
      "learning_rate": 6.614569903787316e-05,
      "loss": 2.8545,
      "step": 180759
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6347131729125977,
      "learning_rate": 6.614313679533747e-05,
      "loss": 2.9794,
      "step": 180760
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0277860164642334,
      "learning_rate": 6.614057459628013e-05,
      "loss": 3.1,
      "step": 180761
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.2933311462402344,
      "learning_rate": 6.613801244070176e-05,
      "loss": 2.9702,
      "step": 180762
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.00028133392334,
      "learning_rate": 6.613545032860274e-05,
      "loss": 2.9406,
      "step": 180763
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4758219718933105,
      "learning_rate": 6.613288825998355e-05,
      "loss": 2.8184,
      "step": 180764
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.521899700164795,
      "learning_rate": 6.61303262348446e-05,
      "loss": 3.0651,
      "step": 180765
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.3010528087615967,
      "learning_rate": 6.612776425318652e-05,
      "loss": 2.9468,
      "step": 180766
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.752875804901123,
      "learning_rate": 6.612520231500962e-05,
      "loss": 3.1304,
      "step": 180767
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5917630195617676,
      "learning_rate": 6.612264042031453e-05,
      "loss": 3.113,
      "step": 180768
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.3209187984466553,
      "learning_rate": 6.612007856910166e-05,
      "loss": 2.7864,
      "step": 180769
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.8640732765197754,
      "learning_rate": 6.611751676137148e-05,
      "loss": 2.8071,
      "step": 180770
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.590221643447876,
      "learning_rate": 6.611495499712439e-05,
      "loss": 2.9338,
      "step": 180771
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.1137146949768066,
      "learning_rate": 6.611239327636103e-05,
      "loss": 2.9855,
      "step": 180772
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.5366013050079346,
      "learning_rate": 6.61098315990817e-05,
      "loss": 2.9199,
      "step": 180773
    },
    {
      "epoch": 2.35,
      "grad_norm": 5.032352447509766,
      "learning_rate": 6.610726996528705e-05,
      "loss": 2.8829,
      "step": 180774
    },
    {
      "epoch": 2.35,
      "grad_norm": 5.454364776611328,
      "learning_rate": 6.610470837497748e-05,
      "loss": 2.8461,
      "step": 180775
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.138282775878906,
      "learning_rate": 6.610214682815346e-05,
      "loss": 2.8094,
      "step": 180776
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.601776599884033,
      "learning_rate": 6.609958532481538e-05,
      "loss": 2.8687,
      "step": 180777
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.912893533706665,
      "learning_rate": 6.60970238649639e-05,
      "loss": 2.8742,
      "step": 180778
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.929261207580566,
      "learning_rate": 6.609446244859928e-05,
      "loss": 3.1981,
      "step": 180779
    },
    {
      "epoch": 2.35,
      "grad_norm": 5.472088813781738,
      "learning_rate": 6.609190107572222e-05,
      "loss": 2.9154,
      "step": 180780
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8783438205718994,
      "learning_rate": 6.60893397463331e-05,
      "loss": 2.9752,
      "step": 180781
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9924798011779785,
      "learning_rate": 6.60867784604324e-05,
      "loss": 2.9225,
      "step": 180782
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.3607423305511475,
      "learning_rate": 6.608421721802048e-05,
      "loss": 3.0371,
      "step": 180783
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.0754244327545166,
      "learning_rate": 6.608165601909799e-05,
      "loss": 3.0121,
      "step": 180784
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2844297885894775,
      "learning_rate": 6.607909486366527e-05,
      "loss": 3.0184,
      "step": 180785
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.2427361011505127,
      "learning_rate": 6.607653375172294e-05,
      "loss": 3.1275,
      "step": 180786
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.779120445251465,
      "learning_rate": 6.607397268327139e-05,
      "loss": 2.6955,
      "step": 180787
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8642311096191406,
      "learning_rate": 6.607141165831102e-05,
      "loss": 2.7002,
      "step": 180788
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.765394926071167,
      "learning_rate": 6.60688506768425e-05,
      "loss": 3.0106,
      "step": 180789
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4017529487609863,
      "learning_rate": 6.606628973886615e-05,
      "loss": 3.087,
      "step": 180790
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.4869489669799805,
      "learning_rate": 6.606372884438245e-05,
      "loss": 2.8958,
      "step": 180791
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4559507369995117,
      "learning_rate": 6.606116799339199e-05,
      "loss": 3.0404,
      "step": 180792
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.564445972442627,
      "learning_rate": 6.605860718589516e-05,
      "loss": 2.7593,
      "step": 180793
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4206814765930176,
      "learning_rate": 6.605604642189238e-05,
      "loss": 2.8778,
      "step": 180794
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.238867998123169,
      "learning_rate": 6.605348570138432e-05,
      "loss": 2.7904,
      "step": 180795
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.825981855392456,
      "learning_rate": 6.605092502437128e-05,
      "loss": 2.7976,
      "step": 180796
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6576967239379883,
      "learning_rate": 6.604836439085373e-05,
      "loss": 3.1294,
      "step": 180797
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.779559373855591,
      "learning_rate": 6.604580380083227e-05,
      "loss": 2.9879,
      "step": 180798
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.701972484588623,
      "learning_rate": 6.604324325430733e-05,
      "loss": 2.8355,
      "step": 180799
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.551194667816162,
      "learning_rate": 6.604068275127926e-05,
      "loss": 2.903,
      "step": 180800
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.663351535797119,
      "learning_rate": 6.603812229174873e-05,
      "loss": 2.9354,
      "step": 180801
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.590939521789551,
      "learning_rate": 6.603556187571607e-05,
      "loss": 3.0591,
      "step": 180802
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.626798391342163,
      "learning_rate": 6.603300150318188e-05,
      "loss": 2.9239,
      "step": 180803
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5968017578125,
      "learning_rate": 6.60304411741466e-05,
      "loss": 2.695,
      "step": 180804
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.859565496444702,
      "learning_rate": 6.602788088861065e-05,
      "loss": 3.0408,
      "step": 180805
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.60746431350708,
      "learning_rate": 6.602532064657444e-05,
      "loss": 3.0403,
      "step": 180806
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5819549560546875,
      "learning_rate": 6.602276044803863e-05,
      "loss": 2.8577,
      "step": 180807
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.3013978004455566,
      "learning_rate": 6.602020029300353e-05,
      "loss": 2.7354,
      "step": 180808
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5401055812835693,
      "learning_rate": 6.60176401814698e-05,
      "loss": 3.1276,
      "step": 180809
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.939664602279663,
      "learning_rate": 6.601508011343777e-05,
      "loss": 3.1433,
      "step": 180810
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7117066383361816,
      "learning_rate": 6.601252008890797e-05,
      "loss": 2.9901,
      "step": 180811
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.813061237335205,
      "learning_rate": 6.600996010788078e-05,
      "loss": 2.9439,
      "step": 180812
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.034526348114014,
      "learning_rate": 6.600740017035682e-05,
      "loss": 2.9977,
      "step": 180813
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.900686502456665,
      "learning_rate": 6.600484027633643e-05,
      "loss": 3.095,
      "step": 180814
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.760901927947998,
      "learning_rate": 6.600228042582023e-05,
      "loss": 3.1693,
      "step": 180815
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.716543197631836,
      "learning_rate": 6.599972061880857e-05,
      "loss": 3.1612,
      "step": 180816
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8297102451324463,
      "learning_rate": 6.599716085530211e-05,
      "loss": 3.0463,
      "step": 180817
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.8997223377227783,
      "learning_rate": 6.599460113530105e-05,
      "loss": 2.9422,
      "step": 180818
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.205608367919922,
      "learning_rate": 6.599204145880609e-05,
      "loss": 2.9704,
      "step": 180819
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.135472059249878,
      "learning_rate": 6.598948182581752e-05,
      "loss": 2.8041,
      "step": 180820
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.534585475921631,
      "learning_rate": 6.598692223633606e-05,
      "loss": 3.1172,
      "step": 180821
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9476943016052246,
      "learning_rate": 6.598436269036193e-05,
      "loss": 3.0475,
      "step": 180822
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.2825570106506348,
      "learning_rate": 6.598180318789589e-05,
      "loss": 3.1523,
      "step": 180823
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.5024209022521973,
      "learning_rate": 6.597924372893809e-05,
      "loss": 2.7507,
      "step": 180824
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.594531297683716,
      "learning_rate": 6.597668431348924e-05,
      "loss": 3.0305,
      "step": 180825
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.621101140975952,
      "learning_rate": 6.597412494154965e-05,
      "loss": 2.9093,
      "step": 180826
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.2138030529022217,
      "learning_rate": 6.597156561312e-05,
      "loss": 2.8161,
      "step": 180827
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.4335968494415283,
      "learning_rate": 6.596900632820054e-05,
      "loss": 2.9473,
      "step": 180828
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.079603910446167,
      "learning_rate": 6.596644708679198e-05,
      "loss": 2.9889,
      "step": 180829
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.601134777069092,
      "learning_rate": 6.596388788889466e-05,
      "loss": 3.1323,
      "step": 180830
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.647028923034668,
      "learning_rate": 6.596132873450904e-05,
      "loss": 2.8768,
      "step": 180831
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7494285106658936,
      "learning_rate": 6.595876962363557e-05,
      "loss": 2.8438,
      "step": 180832
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.069159984588623,
      "learning_rate": 6.595621055627488e-05,
      "loss": 3.0634,
      "step": 180833
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.515507459640503,
      "learning_rate": 6.595365153242725e-05,
      "loss": 2.8998,
      "step": 180834
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2104527950286865,
      "learning_rate": 6.595109255209331e-05,
      "loss": 2.9636,
      "step": 180835
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6349050998687744,
      "learning_rate": 6.594853361527351e-05,
      "loss": 3.0905,
      "step": 180836
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5425429344177246,
      "learning_rate": 6.594597472196829e-05,
      "loss": 3.0435,
      "step": 180837
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.2475168704986572,
      "learning_rate": 6.594341587217807e-05,
      "loss": 2.7481,
      "step": 180838
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.459226369857788,
      "learning_rate": 6.594085706590345e-05,
      "loss": 2.9735,
      "step": 180839
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.1476595401763916,
      "learning_rate": 6.593829830314476e-05,
      "loss": 2.7995,
      "step": 180840
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.008423089981079,
      "learning_rate": 6.593573958390265e-05,
      "loss": 3.0335,
      "step": 180841
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.5760104656219482,
      "learning_rate": 6.593318090817748e-05,
      "loss": 2.6376,
      "step": 180842
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.8660216331481934,
      "learning_rate": 6.593062227596978e-05,
      "loss": 2.9116,
      "step": 180843
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.698611259460449,
      "learning_rate": 6.592806368727989e-05,
      "loss": 2.8572,
      "step": 180844
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.284367322921753,
      "learning_rate": 6.59255051421085e-05,
      "loss": 2.781,
      "step": 180845
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.627791166305542,
      "learning_rate": 6.592294664045587e-05,
      "loss": 3.0022,
      "step": 180846
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.034127712249756,
      "learning_rate": 6.592038818232268e-05,
      "loss": 3.0258,
      "step": 180847
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.613175868988037,
      "learning_rate": 6.591782976770928e-05,
      "loss": 2.7578,
      "step": 180848
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.654829502105713,
      "learning_rate": 6.591527139661621e-05,
      "loss": 3.1264,
      "step": 180849
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.1105329990386963,
      "learning_rate": 6.591271306904382e-05,
      "loss": 3.2691,
      "step": 180850
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.6738674640655518,
      "learning_rate": 6.591015478499275e-05,
      "loss": 2.7763,
      "step": 180851
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.275360345840454,
      "learning_rate": 6.59075965444633e-05,
      "loss": 2.8765,
      "step": 180852
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.2824296951293945,
      "learning_rate": 6.590503834745618e-05,
      "loss": 2.8529,
      "step": 180853
    },
    {
      "epoch": 2.35,
      "grad_norm": 4.803396701812744,
      "learning_rate": 6.59024801939717e-05,
      "loss": 2.9442,
      "step": 180854
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.7088098526000977,
      "learning_rate": 6.589992208401034e-05,
      "loss": 3.028,
      "step": 180855
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.5690081119537354,
      "learning_rate": 6.589736401757256e-05,
      "loss": 2.8794,
      "step": 180856
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9929380416870117,
      "learning_rate": 6.589480599465894e-05,
      "loss": 3.0754,
      "step": 180857
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.5047812461853027,
      "learning_rate": 6.589224801526983e-05,
      "loss": 2.8354,
      "step": 180858
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.646975040435791,
      "learning_rate": 6.588969007940584e-05,
      "loss": 3.0936,
      "step": 180859
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.8531455993652344,
      "learning_rate": 6.58871321870674e-05,
      "loss": 2.9692,
      "step": 180860
    },
    {
      "epoch": 2.35,
      "grad_norm": 3.3922855854034424,
      "learning_rate": 6.588457433825494e-05,
      "loss": 2.8234,
      "step": 180861
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.576899528503418,
      "learning_rate": 6.588201653296887e-05,
      "loss": 3.1,
      "step": 180862
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.784172534942627,
      "learning_rate": 6.587945877120985e-05,
      "loss": 3.0631,
      "step": 180863
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.9891693592071533,
      "learning_rate": 6.587690105297817e-05,
      "loss": 2.9654,
      "step": 180864
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.738650321960449,
      "learning_rate": 6.587434337827448e-05,
      "loss": 3.1165,
      "step": 180865
    },
    {
      "epoch": 2.35,
      "grad_norm": 2.7915966510772705,
      "learning_rate": 6.587178574709916e-05,
      "loss": 3.1493,
      "step": 180866
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5274417400360107,
      "learning_rate": 6.586922815945269e-05,
      "loss": 3.0768,
      "step": 180867
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.131324529647827,
      "learning_rate": 6.586667061533548e-05,
      "loss": 3.2193,
      "step": 180868
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.618152141571045,
      "learning_rate": 6.586411311474816e-05,
      "loss": 2.807,
      "step": 180869
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.3975703716278076,
      "learning_rate": 6.586155565769103e-05,
      "loss": 3.0459,
      "step": 180870
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.704970121383667,
      "learning_rate": 6.585899824416476e-05,
      "loss": 2.9237,
      "step": 180871
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.657654285430908,
      "learning_rate": 6.585644087416968e-05,
      "loss": 2.9544,
      "step": 180872
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.778282403945923,
      "learning_rate": 6.585388354770632e-05,
      "loss": 2.9704,
      "step": 180873
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9735283851623535,
      "learning_rate": 6.585132626477507e-05,
      "loss": 3.1288,
      "step": 180874
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7157464027404785,
      "learning_rate": 6.584876902537657e-05,
      "loss": 3.0236,
      "step": 180875
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8915140628814697,
      "learning_rate": 6.58462118295111e-05,
      "loss": 2.8579,
      "step": 180876
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9744198322296143,
      "learning_rate": 6.584365467717936e-05,
      "loss": 2.8889,
      "step": 180877
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0113296508789062,
      "learning_rate": 6.584109756838169e-05,
      "loss": 2.7895,
      "step": 180878
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.520549774169922,
      "learning_rate": 6.583854050311845e-05,
      "loss": 3.139,
      "step": 180879
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0449025630950928,
      "learning_rate": 6.58359834813904e-05,
      "loss": 3.0304,
      "step": 180880
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.344360113143921,
      "learning_rate": 6.583342650319782e-05,
      "loss": 3.1087,
      "step": 180881
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.817328929901123,
      "learning_rate": 6.583086956854114e-05,
      "loss": 2.9772,
      "step": 180882
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6601297855377197,
      "learning_rate": 6.582831267742102e-05,
      "loss": 2.9621,
      "step": 180883
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5034074783325195,
      "learning_rate": 6.582575582983782e-05,
      "loss": 3.0956,
      "step": 180884
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.591613531112671,
      "learning_rate": 6.582319902579198e-05,
      "loss": 2.9124,
      "step": 180885
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.1710872650146484,
      "learning_rate": 6.582064226528406e-05,
      "loss": 3.0721,
      "step": 180886
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.824054002761841,
      "learning_rate": 6.581808554831446e-05,
      "loss": 2.5126,
      "step": 180887
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2689170837402344,
      "learning_rate": 6.581552887488377e-05,
      "loss": 2.7957,
      "step": 180888
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6537423133850098,
      "learning_rate": 6.58129722449924e-05,
      "loss": 2.8626,
      "step": 180889
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5946834087371826,
      "learning_rate": 6.581041565864082e-05,
      "loss": 2.7345,
      "step": 180890
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.4922308921813965,
      "learning_rate": 6.58078591158294e-05,
      "loss": 2.9442,
      "step": 180891
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.717071533203125,
      "learning_rate": 6.580530261655882e-05,
      "loss": 3.3067,
      "step": 180892
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4815752506256104,
      "learning_rate": 6.580274616082938e-05,
      "loss": 2.8777,
      "step": 180893
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.647735595703125,
      "learning_rate": 6.580018974864171e-05,
      "loss": 2.9533,
      "step": 180894
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.4479470252990723,
      "learning_rate": 6.57976333799962e-05,
      "loss": 2.8471,
      "step": 180895
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.220724105834961,
      "learning_rate": 6.579507705489324e-05,
      "loss": 3.0386,
      "step": 180896
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.943040132522583,
      "learning_rate": 6.579252077333348e-05,
      "loss": 2.8826,
      "step": 180897
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5517520904541016,
      "learning_rate": 6.578996453531731e-05,
      "loss": 3.1279,
      "step": 180898
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.260380268096924,
      "learning_rate": 6.578740834084516e-05,
      "loss": 2.7701,
      "step": 180899
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.890845537185669,
      "learning_rate": 6.57848521899176e-05,
      "loss": 2.8558,
      "step": 180900
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7394657135009766,
      "learning_rate": 6.578229608253498e-05,
      "loss": 2.9669,
      "step": 180901
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9642200469970703,
      "learning_rate": 6.577974001869793e-05,
      "loss": 2.9746,
      "step": 180902
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4303805828094482,
      "learning_rate": 6.577718399840687e-05,
      "loss": 2.8804,
      "step": 180903
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9534173011779785,
      "learning_rate": 6.577462802166225e-05,
      "loss": 2.9109,
      "step": 180904
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4422693252563477,
      "learning_rate": 6.577207208846442e-05,
      "loss": 2.971,
      "step": 180905
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8499107360839844,
      "learning_rate": 6.57695161988141e-05,
      "loss": 2.8781,
      "step": 180906
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.925621747970581,
      "learning_rate": 6.576696035271157e-05,
      "loss": 3.0321,
      "step": 180907
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.834772825241089,
      "learning_rate": 6.576440455015748e-05,
      "loss": 2.8837,
      "step": 180908
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.212057590484619,
      "learning_rate": 6.576184879115219e-05,
      "loss": 2.7846,
      "step": 180909
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9178473949432373,
      "learning_rate": 6.575929307569619e-05,
      "loss": 3.0186,
      "step": 180910
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.1824514865875244,
      "learning_rate": 6.575673740378984e-05,
      "loss": 2.721,
      "step": 180911
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6616766452789307,
      "learning_rate": 6.575418177543388e-05,
      "loss": 2.8467,
      "step": 180912
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6033456325531006,
      "learning_rate": 6.575162619062854e-05,
      "loss": 2.716,
      "step": 180913
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.3905234336853027,
      "learning_rate": 6.574907064937446e-05,
      "loss": 2.8498,
      "step": 180914
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5017104148864746,
      "learning_rate": 6.574651515167207e-05,
      "loss": 2.621,
      "step": 180915
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.968947410583496,
      "learning_rate": 6.574395969752182e-05,
      "loss": 2.7954,
      "step": 180916
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7967140674591064,
      "learning_rate": 6.57414042869241e-05,
      "loss": 3.0523,
      "step": 180917
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.3063604831695557,
      "learning_rate": 6.573884891987956e-05,
      "loss": 3.004,
      "step": 180918
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7184975147247314,
      "learning_rate": 6.573629359638849e-05,
      "loss": 3.0034,
      "step": 180919
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8714730739593506,
      "learning_rate": 6.573373831645158e-05,
      "loss": 3.2462,
      "step": 180920
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.657341480255127,
      "learning_rate": 6.573118308006916e-05,
      "loss": 2.7897,
      "step": 180921
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6226933002471924,
      "learning_rate": 6.572862788724173e-05,
      "loss": 2.8207,
      "step": 180922
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.732301950454712,
      "learning_rate": 6.572607273796974e-05,
      "loss": 2.9388,
      "step": 180923
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9569520950317383,
      "learning_rate": 6.572351763225373e-05,
      "loss": 2.9191,
      "step": 180924
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.6843953132629395,
      "learning_rate": 6.572096257009408e-05,
      "loss": 2.8997,
      "step": 180925
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.47029972076416,
      "learning_rate": 6.57184075514914e-05,
      "loss": 2.7182,
      "step": 180926
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.858405351638794,
      "learning_rate": 6.571585257644609e-05,
      "loss": 3.0849,
      "step": 180927
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7882721424102783,
      "learning_rate": 6.571329764495862e-05,
      "loss": 2.8157,
      "step": 180928
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7373993396759033,
      "learning_rate": 6.57107427570294e-05,
      "loss": 2.9426,
      "step": 180929
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5979671478271484,
      "learning_rate": 6.570818791265904e-05,
      "loss": 2.8866,
      "step": 180930
    },
    {
      "epoch": 2.36,
      "grad_norm": 5.913907527923584,
      "learning_rate": 6.57056331118479e-05,
      "loss": 2.8226,
      "step": 180931
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.10805606842041,
      "learning_rate": 6.570307835459655e-05,
      "loss": 2.8785,
      "step": 180932
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.1320903301239014,
      "learning_rate": 6.570052364090546e-05,
      "loss": 2.7489,
      "step": 180933
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.762011766433716,
      "learning_rate": 6.569796897077501e-05,
      "loss": 3.1336,
      "step": 180934
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6824047565460205,
      "learning_rate": 6.56954143442057e-05,
      "loss": 2.9452,
      "step": 180935
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5067028999328613,
      "learning_rate": 6.56928597611981e-05,
      "loss": 3.145,
      "step": 180936
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6896066665649414,
      "learning_rate": 6.569030522175252e-05,
      "loss": 2.8776,
      "step": 180937
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0262253284454346,
      "learning_rate": 6.568775072586968e-05,
      "loss": 2.9546,
      "step": 180938
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7831718921661377,
      "learning_rate": 6.568519627354983e-05,
      "loss": 3.0641,
      "step": 180939
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.9207475185394287,
      "learning_rate": 6.568264186479359e-05,
      "loss": 2.7173,
      "step": 180940
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9906253814697266,
      "learning_rate": 6.568008749960125e-05,
      "loss": 2.9098,
      "step": 180941
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6064820289611816,
      "learning_rate": 6.567753317797348e-05,
      "loss": 2.9695,
      "step": 180942
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.750554323196411,
      "learning_rate": 6.567497889991062e-05,
      "loss": 2.9955,
      "step": 180943
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.7186121940612793,
      "learning_rate": 6.567242466541327e-05,
      "loss": 3.0593,
      "step": 180944
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8042495250701904,
      "learning_rate": 6.566987047448184e-05,
      "loss": 2.9556,
      "step": 180945
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5127298831939697,
      "learning_rate": 6.566731632711684e-05,
      "loss": 2.8298,
      "step": 180946
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.607865571975708,
      "learning_rate": 6.566476222331857e-05,
      "loss": 3.0103,
      "step": 180947
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.052583694458008,
      "learning_rate": 6.566220816308777e-05,
      "loss": 3.148,
      "step": 180948
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.278451442718506,
      "learning_rate": 6.565965414642471e-05,
      "loss": 3.016,
      "step": 180949
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.3460495471954346,
      "learning_rate": 6.565710017333e-05,
      "loss": 3.1686,
      "step": 180950
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.753048896789551,
      "learning_rate": 6.565454624380407e-05,
      "loss": 2.818,
      "step": 180951
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4418702125549316,
      "learning_rate": 6.56519923578474e-05,
      "loss": 2.9773,
      "step": 180952
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.3177671432495117,
      "learning_rate": 6.564943851546033e-05,
      "loss": 3.1039,
      "step": 180953
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9769718647003174,
      "learning_rate": 6.564688471664355e-05,
      "loss": 3.0553,
      "step": 180954
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.085768699645996,
      "learning_rate": 6.564433096139738e-05,
      "loss": 2.6866,
      "step": 180955
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0422635078430176,
      "learning_rate": 6.564177724972238e-05,
      "loss": 3.0764,
      "step": 180956
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8096888065338135,
      "learning_rate": 6.563922358161904e-05,
      "loss": 3.0503,
      "step": 180957
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4391024112701416,
      "learning_rate": 6.56366699570878e-05,
      "loss": 2.9306,
      "step": 180958
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.687936305999756,
      "learning_rate": 6.563411637612902e-05,
      "loss": 2.8173,
      "step": 180959
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7502622604370117,
      "learning_rate": 6.563156283874338e-05,
      "loss": 2.8141,
      "step": 180960
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5131995677948,
      "learning_rate": 6.562900934493115e-05,
      "loss": 2.936,
      "step": 180961
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.500112533569336,
      "learning_rate": 6.562645589469303e-05,
      "loss": 2.6499,
      "step": 180962
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5815277099609375,
      "learning_rate": 6.562390248802926e-05,
      "loss": 2.6987,
      "step": 180963
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.780400514602661,
      "learning_rate": 6.562134912494052e-05,
      "loss": 2.9827,
      "step": 180964
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.992809534072876,
      "learning_rate": 6.56187958054272e-05,
      "loss": 3.1535,
      "step": 180965
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6544477939605713,
      "learning_rate": 6.56162425294898e-05,
      "loss": 3.0971,
      "step": 180966
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7985074520111084,
      "learning_rate": 6.561368929712862e-05,
      "loss": 2.8367,
      "step": 180967
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0030953884124756,
      "learning_rate": 6.56111361083444e-05,
      "loss": 2.8754,
      "step": 180968
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4028916358947754,
      "learning_rate": 6.560858296313741e-05,
      "loss": 3.1492,
      "step": 180969
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.667295217514038,
      "learning_rate": 6.560602986150831e-05,
      "loss": 2.9984,
      "step": 180970
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.1710758209228516,
      "learning_rate": 6.560347680345744e-05,
      "loss": 2.9833,
      "step": 180971
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.690656900405884,
      "learning_rate": 6.560092378898534e-05,
      "loss": 3.0811,
      "step": 180972
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.534528970718384,
      "learning_rate": 6.559837081809236e-05,
      "loss": 2.8107,
      "step": 180973
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9939074516296387,
      "learning_rate": 6.559581789077914e-05,
      "loss": 2.7455,
      "step": 180974
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.428337812423706,
      "learning_rate": 6.559326500704599e-05,
      "loss": 2.7868,
      "step": 180975
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.517502784729004,
      "learning_rate": 6.559071216689356e-05,
      "loss": 3.0913,
      "step": 180976
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.400813102722168,
      "learning_rate": 6.558815937032226e-05,
      "loss": 2.8378,
      "step": 180977
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0830466747283936,
      "learning_rate": 6.558560661733246e-05,
      "loss": 3.019,
      "step": 180978
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.1488096714019775,
      "learning_rate": 6.558305390792478e-05,
      "loss": 2.8991,
      "step": 180979
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.1283326148986816,
      "learning_rate": 6.558050124209966e-05,
      "loss": 3.0907,
      "step": 180980
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4281272888183594,
      "learning_rate": 6.557794861985748e-05,
      "loss": 2.9749,
      "step": 180981
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.446044921875,
      "learning_rate": 6.557539604119884e-05,
      "loss": 2.8946,
      "step": 180982
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.304140329360962,
      "learning_rate": 6.557284350612416e-05,
      "loss": 2.9156,
      "step": 180983
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4781203269958496,
      "learning_rate": 6.557029101463386e-05,
      "loss": 3.0174,
      "step": 180984
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7063727378845215,
      "learning_rate": 6.556773856672852e-05,
      "loss": 2.935,
      "step": 180985
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4880354404449463,
      "learning_rate": 6.556518616240849e-05,
      "loss": 3.0464,
      "step": 180986
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.3437702655792236,
      "learning_rate": 6.556263380167439e-05,
      "loss": 3.0009,
      "step": 180987
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.11564564704895,
      "learning_rate": 6.556008148452662e-05,
      "loss": 3.0111,
      "step": 180988
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8815112113952637,
      "learning_rate": 6.555752921096567e-05,
      "loss": 2.7419,
      "step": 180989
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8244431018829346,
      "learning_rate": 6.555497698099193e-05,
      "loss": 2.8016,
      "step": 180990
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.697096109390259,
      "learning_rate": 6.555242479460601e-05,
      "loss": 2.9056,
      "step": 180991
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8407387733459473,
      "learning_rate": 6.554987265180824e-05,
      "loss": 3.0186,
      "step": 180992
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.358705997467041,
      "learning_rate": 6.554732055259925e-05,
      "loss": 2.9461,
      "step": 180993
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.045846462249756,
      "learning_rate": 6.554476849697945e-05,
      "loss": 2.8863,
      "step": 180994
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5607845783233643,
      "learning_rate": 6.554221648494931e-05,
      "loss": 2.7793,
      "step": 180995
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4979071617126465,
      "learning_rate": 6.553966451650919e-05,
      "loss": 3.0851,
      "step": 180996
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.965395212173462,
      "learning_rate": 6.553711259165978e-05,
      "loss": 3.1069,
      "step": 180997
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.894436836242676,
      "learning_rate": 6.553456071040134e-05,
      "loss": 3.0958,
      "step": 180998
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9687280654907227,
      "learning_rate": 6.553200887273456e-05,
      "loss": 2.86,
      "step": 180999
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9394216537475586,
      "learning_rate": 6.552945707865978e-05,
      "loss": 2.773,
      "step": 181000
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9968454837799072,
      "learning_rate": 6.552690532817753e-05,
      "loss": 3.0792,
      "step": 181001
    },
    {
      "epoch": 2.36,
      "grad_norm": 5.876702785491943,
      "learning_rate": 6.552435362128815e-05,
      "loss": 2.9563,
      "step": 181002
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.3701324462890625,
      "learning_rate": 6.55218019579923e-05,
      "loss": 2.7597,
      "step": 181003
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.669010639190674,
      "learning_rate": 6.551925033829029e-05,
      "loss": 2.9407,
      "step": 181004
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.693915367126465,
      "learning_rate": 6.551669876218276e-05,
      "loss": 3.064,
      "step": 181005
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4677746295928955,
      "learning_rate": 6.551414722967012e-05,
      "loss": 3.0285,
      "step": 181006
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.913145065307617,
      "learning_rate": 6.551159574075281e-05,
      "loss": 2.9863,
      "step": 181007
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.016160249710083,
      "learning_rate": 6.550904429543125e-05,
      "loss": 2.7834,
      "step": 181008
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.037287950515747,
      "learning_rate": 6.550649289370607e-05,
      "loss": 2.76,
      "step": 181009
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.3033080101013184,
      "learning_rate": 6.550394153557757e-05,
      "loss": 3.2816,
      "step": 181010
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.543196678161621,
      "learning_rate": 6.550139022104639e-05,
      "loss": 2.981,
      "step": 181011
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.521898031234741,
      "learning_rate": 6.549883895011291e-05,
      "loss": 2.9404,
      "step": 181012
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.34332275390625,
      "learning_rate": 6.549628772277765e-05,
      "loss": 3.1889,
      "step": 181013
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.402078628540039,
      "learning_rate": 6.549373653904096e-05,
      "loss": 2.9123,
      "step": 181014
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.837735414505005,
      "learning_rate": 6.54911853989035e-05,
      "loss": 2.9615,
      "step": 181015
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5597727298736572,
      "learning_rate": 6.548863430236559e-05,
      "loss": 2.9089,
      "step": 181016
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2988743782043457,
      "learning_rate": 6.548608324942783e-05,
      "loss": 2.8733,
      "step": 181017
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7000603675842285,
      "learning_rate": 6.548353224009063e-05,
      "loss": 2.981,
      "step": 181018
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.594820261001587,
      "learning_rate": 6.548098127435449e-05,
      "loss": 2.9918,
      "step": 181019
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.742027759552002,
      "learning_rate": 6.547843035221976e-05,
      "loss": 2.577,
      "step": 181020
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.431190252304077,
      "learning_rate": 6.547587947368712e-05,
      "loss": 2.9877,
      "step": 181021
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7527761459350586,
      "learning_rate": 6.547332863875683e-05,
      "loss": 2.7494,
      "step": 181022
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9711754322052,
      "learning_rate": 6.547077784742958e-05,
      "loss": 2.9533,
      "step": 181023
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7135579586029053,
      "learning_rate": 6.546822709970574e-05,
      "loss": 3.0244,
      "step": 181024
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.421229839324951,
      "learning_rate": 6.546567639558575e-05,
      "loss": 2.6946,
      "step": 181025
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.567673444747925,
      "learning_rate": 6.54631257350701e-05,
      "loss": 3.0296,
      "step": 181026
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.1994752883911133,
      "learning_rate": 6.546057511815931e-05,
      "loss": 2.957,
      "step": 181027
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.640897750854492,
      "learning_rate": 6.545802454485375e-05,
      "loss": 3.0535,
      "step": 181028
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.012988328933716,
      "learning_rate": 6.545547401515408e-05,
      "loss": 2.9344,
      "step": 181029
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.847991466522217,
      "learning_rate": 6.545292352906055e-05,
      "loss": 2.9207,
      "step": 181030
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8022265434265137,
      "learning_rate": 6.545037308657394e-05,
      "loss": 2.9858,
      "step": 181031
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4873013496398926,
      "learning_rate": 6.544782268769436e-05,
      "loss": 3.024,
      "step": 181032
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7268779277801514,
      "learning_rate": 6.544527233242253e-05,
      "loss": 2.9296,
      "step": 181033
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.975224018096924,
      "learning_rate": 6.544272202075877e-05,
      "loss": 2.9582,
      "step": 181034
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.774904489517212,
      "learning_rate": 6.544017175270374e-05,
      "loss": 2.8301,
      "step": 181035
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2052762508392334,
      "learning_rate": 6.543762152825772e-05,
      "loss": 2.9909,
      "step": 181036
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6312172412872314,
      "learning_rate": 6.543507134742142e-05,
      "loss": 2.8336,
      "step": 181037
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.272623300552368,
      "learning_rate": 6.5432521210195e-05,
      "loss": 2.9143,
      "step": 181038
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8090126514434814,
      "learning_rate": 6.542997111657923e-05,
      "loss": 2.8182,
      "step": 181039
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9037399291992188,
      "learning_rate": 6.542742106657433e-05,
      "loss": 2.8385,
      "step": 181040
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.289872646331787,
      "learning_rate": 6.542487106018099e-05,
      "loss": 2.862,
      "step": 181041
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2135043144226074,
      "learning_rate": 6.54223210973995e-05,
      "loss": 3.0126,
      "step": 181042
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.803220748901367,
      "learning_rate": 6.541977117823061e-05,
      "loss": 2.8012,
      "step": 181043
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8075268268585205,
      "learning_rate": 6.541722130267446e-05,
      "loss": 2.949,
      "step": 181044
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4117753505706787,
      "learning_rate": 6.541467147073177e-05,
      "loss": 2.7378,
      "step": 181045
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.308137893676758,
      "learning_rate": 6.541212168240279e-05,
      "loss": 2.6855,
      "step": 181046
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9715635776519775,
      "learning_rate": 6.540957193768826e-05,
      "loss": 2.9492,
      "step": 181047
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7816662788391113,
      "learning_rate": 6.54070222365884e-05,
      "loss": 2.7464,
      "step": 181048
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.584627628326416,
      "learning_rate": 6.54044725791039e-05,
      "loss": 2.947,
      "step": 181049
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.709087610244751,
      "learning_rate": 6.54019229652351e-05,
      "loss": 2.9162,
      "step": 181050
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.3140745162963867,
      "learning_rate": 6.539937339498255e-05,
      "loss": 2.8728,
      "step": 181051
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4614224433898926,
      "learning_rate": 6.53968238683466e-05,
      "loss": 2.9669,
      "step": 181052
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4274628162384033,
      "learning_rate": 6.539427438532787e-05,
      "loss": 2.7939,
      "step": 181053
    },
    {
      "epoch": 2.36,
      "grad_norm": 5.039389133453369,
      "learning_rate": 6.539172494592672e-05,
      "loss": 3.0243,
      "step": 181054
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.697037696838379,
      "learning_rate": 6.538917555014374e-05,
      "loss": 3.1027,
      "step": 181055
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6015195846557617,
      "learning_rate": 6.538662619797936e-05,
      "loss": 3.1675,
      "step": 181056
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8112778663635254,
      "learning_rate": 6.538407688943404e-05,
      "loss": 2.8501,
      "step": 181057
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.451352596282959,
      "learning_rate": 6.53815276245081e-05,
      "loss": 2.8485,
      "step": 181058
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7807347774505615,
      "learning_rate": 6.537897840320232e-05,
      "loss": 3.0474,
      "step": 181059
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.705563545227051,
      "learning_rate": 6.53764292255169e-05,
      "loss": 2.7568,
      "step": 181060
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.828681468963623,
      "learning_rate": 6.537388009145253e-05,
      "loss": 2.944,
      "step": 181061
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.187413454055786,
      "learning_rate": 6.537133100100957e-05,
      "loss": 2.9807,
      "step": 181062
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0056450366973877,
      "learning_rate": 6.536878195418844e-05,
      "loss": 2.9496,
      "step": 181063
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8871824741363525,
      "learning_rate": 6.536623295098977e-05,
      "loss": 3.1497,
      "step": 181064
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6469004154205322,
      "learning_rate": 6.536368399141391e-05,
      "loss": 2.8336,
      "step": 181065
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.791353464126587,
      "learning_rate": 6.536113507546131e-05,
      "loss": 2.772,
      "step": 181066
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.3753819465637207,
      "learning_rate": 6.535858620313258e-05,
      "loss": 2.974,
      "step": 181067
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.741582155227661,
      "learning_rate": 6.535603737442811e-05,
      "loss": 3.281,
      "step": 181068
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8068087100982666,
      "learning_rate": 6.535348858934832e-05,
      "loss": 2.8493,
      "step": 181069
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6982452869415283,
      "learning_rate": 6.535093984789384e-05,
      "loss": 2.9052,
      "step": 181070
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.446168899536133,
      "learning_rate": 6.534839115006495e-05,
      "loss": 3.0325,
      "step": 181071
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.901118755340576,
      "learning_rate": 6.53458424958623e-05,
      "loss": 3.0918,
      "step": 181072
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.617823839187622,
      "learning_rate": 6.534329388528628e-05,
      "loss": 2.9956,
      "step": 181073
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.413729667663574,
      "learning_rate": 6.534074531833741e-05,
      "loss": 2.9555,
      "step": 181074
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4656870365142822,
      "learning_rate": 6.5338196795016e-05,
      "loss": 2.9764,
      "step": 181075
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.658688545227051,
      "learning_rate": 6.533564831532276e-05,
      "loss": 3.0671,
      "step": 181076
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.580549478530884,
      "learning_rate": 6.533309987925795e-05,
      "loss": 2.9895,
      "step": 181077
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.008864402770996,
      "learning_rate": 6.533055148682223e-05,
      "loss": 3.0217,
      "step": 181078
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.212433099746704,
      "learning_rate": 6.532800313801599e-05,
      "loss": 2.8695,
      "step": 181079
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6458163261413574,
      "learning_rate": 6.532545483283971e-05,
      "loss": 3.183,
      "step": 181080
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5837390422821045,
      "learning_rate": 6.53229065712938e-05,
      "loss": 2.9621,
      "step": 181081
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.808145046234131,
      "learning_rate": 6.532035835337884e-05,
      "loss": 2.846,
      "step": 181082
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.3222718238830566,
      "learning_rate": 6.531781017909518e-05,
      "loss": 3.096,
      "step": 181083
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.054588794708252,
      "learning_rate": 6.531526204844347e-05,
      "loss": 2.8964,
      "step": 181084
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5305044651031494,
      "learning_rate": 6.531271396142405e-05,
      "loss": 2.727,
      "step": 181085
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.3027169704437256,
      "learning_rate": 6.531016591803747e-05,
      "loss": 2.9931,
      "step": 181086
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.826436996459961,
      "learning_rate": 6.530761791828405e-05,
      "loss": 2.8284,
      "step": 181087
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2968356609344482,
      "learning_rate": 6.530506996216445e-05,
      "loss": 3.002,
      "step": 181088
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.789510488510132,
      "learning_rate": 6.530252204967899e-05,
      "loss": 3.0067,
      "step": 181089
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.513607978820801,
      "learning_rate": 6.529997418082829e-05,
      "loss": 2.8221,
      "step": 181090
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6213207244873047,
      "learning_rate": 6.52974263556128e-05,
      "loss": 2.8946,
      "step": 181091
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2079429626464844,
      "learning_rate": 6.529487857403292e-05,
      "loss": 3.1243,
      "step": 181092
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.73432993888855,
      "learning_rate": 6.529233083608907e-05,
      "loss": 2.7067,
      "step": 181093
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.469188690185547,
      "learning_rate": 6.528978314178188e-05,
      "loss": 2.9996,
      "step": 181094
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.214373826980591,
      "learning_rate": 6.528723549111169e-05,
      "loss": 2.8712,
      "step": 181095
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0916693210601807,
      "learning_rate": 6.528468788407909e-05,
      "loss": 2.9081,
      "step": 181096
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5103049278259277,
      "learning_rate": 6.528214032068442e-05,
      "loss": 2.7438,
      "step": 181097
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.020310878753662,
      "learning_rate": 6.52795928009284e-05,
      "loss": 2.8425,
      "step": 181098
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.3358781337738037,
      "learning_rate": 6.527704532481118e-05,
      "loss": 2.8731,
      "step": 181099
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7595996856689453,
      "learning_rate": 6.52744978923335e-05,
      "loss": 2.955,
      "step": 181100
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.911083459854126,
      "learning_rate": 6.527195050349558e-05,
      "loss": 2.9792,
      "step": 181101
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6813769340515137,
      "learning_rate": 6.526940315829814e-05,
      "loss": 2.7993,
      "step": 181102
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.911318063735962,
      "learning_rate": 6.526685585674146e-05,
      "loss": 3.2122,
      "step": 181103
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6693906784057617,
      "learning_rate": 6.526430859882629e-05,
      "loss": 2.8895,
      "step": 181104
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6252384185791016,
      "learning_rate": 6.52617613845527e-05,
      "loss": 2.9206,
      "step": 181105
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.449202537536621,
      "learning_rate": 6.525921421392152e-05,
      "loss": 2.6896,
      "step": 181106
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.518303871154785,
      "learning_rate": 6.525666708693296e-05,
      "loss": 2.959,
      "step": 181107
    },
    {
      "epoch": 2.36,
      "grad_norm": 5.208763599395752,
      "learning_rate": 6.525412000358772e-05,
      "loss": 3.064,
      "step": 181108
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.477309226989746,
      "learning_rate": 6.525157296388611e-05,
      "loss": 2.9451,
      "step": 181109
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.0791971683502197,
      "learning_rate": 6.524902596782878e-05,
      "loss": 2.9668,
      "step": 181110
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7191550731658936,
      "learning_rate": 6.524647901541595e-05,
      "loss": 2.9915,
      "step": 181111
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7271909713745117,
      "learning_rate": 6.524393210664832e-05,
      "loss": 2.6625,
      "step": 181112
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.1810176372528076,
      "learning_rate": 6.524138524152619e-05,
      "loss": 3.0949,
      "step": 181113
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0926949977874756,
      "learning_rate": 6.523883842005017e-05,
      "loss": 2.8708,
      "step": 181114
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7326860427856445,
      "learning_rate": 6.523629164222061e-05,
      "loss": 3.1849,
      "step": 181115
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2033281326293945,
      "learning_rate": 6.523374490803824e-05,
      "loss": 2.9756,
      "step": 181116
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6662113666534424,
      "learning_rate": 6.523119821750319e-05,
      "loss": 2.9792,
      "step": 181117
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.1876564025878906,
      "learning_rate": 6.522865157061615e-05,
      "loss": 3.1399,
      "step": 181118
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8898122310638428,
      "learning_rate": 6.522610496737747e-05,
      "loss": 2.8398,
      "step": 181119
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.526866912841797,
      "learning_rate": 6.522355840778776e-05,
      "loss": 3.0471,
      "step": 181120
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.977177858352661,
      "learning_rate": 6.522101189184735e-05,
      "loss": 3.1782,
      "step": 181121
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5884952545166016,
      "learning_rate": 6.521846541955692e-05,
      "loss": 2.7813,
      "step": 181122
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6640584468841553,
      "learning_rate": 6.521591899091667e-05,
      "loss": 2.8165,
      "step": 181123
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.972374439239502,
      "learning_rate": 6.52133726059273e-05,
      "loss": 3.0776,
      "step": 181124
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5007057189941406,
      "learning_rate": 6.521082626458912e-05,
      "loss": 2.8128,
      "step": 181125
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4870166778564453,
      "learning_rate": 6.520827996690272e-05,
      "loss": 2.9148,
      "step": 181126
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2854695320129395,
      "learning_rate": 6.520573371286851e-05,
      "loss": 2.5187,
      "step": 181127
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.652158498764038,
      "learning_rate": 6.52031875024871e-05,
      "loss": 2.8671,
      "step": 181128
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.727318286895752,
      "learning_rate": 6.520064133575871e-05,
      "loss": 2.9131,
      "step": 181129
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2004077434539795,
      "learning_rate": 6.519809521268403e-05,
      "loss": 3.1536,
      "step": 181130
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.749800682067871,
      "learning_rate": 6.519554913326337e-05,
      "loss": 3.1203,
      "step": 181131
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8513920307159424,
      "learning_rate": 6.519300309749738e-05,
      "loss": 3.0085,
      "step": 181132
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.651299238204956,
      "learning_rate": 6.519045710538636e-05,
      "loss": 2.9382,
      "step": 181133
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8585658073425293,
      "learning_rate": 6.518791115693103e-05,
      "loss": 2.7359,
      "step": 181134
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.416788101196289,
      "learning_rate": 6.518536525213151e-05,
      "loss": 2.7764,
      "step": 181135
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8270859718322754,
      "learning_rate": 6.518281939098859e-05,
      "loss": 3.0207,
      "step": 181136
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6170897483825684,
      "learning_rate": 6.51802735735025e-05,
      "loss": 2.925,
      "step": 181137
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0582406520843506,
      "learning_rate": 6.517772779967392e-05,
      "loss": 2.9934,
      "step": 181138
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5572593212127686,
      "learning_rate": 6.517518206950315e-05,
      "loss": 3.1249,
      "step": 181139
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.744293212890625,
      "learning_rate": 6.517263638299082e-05,
      "loss": 2.9141,
      "step": 181140
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.091463804244995,
      "learning_rate": 6.517009074013734e-05,
      "loss": 2.9582,
      "step": 181141
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.669511318206787,
      "learning_rate": 6.516754514094318e-05,
      "loss": 2.6471,
      "step": 181142
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.582063913345337,
      "learning_rate": 6.516499958540872e-05,
      "loss": 3.0549,
      "step": 181143
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5369162559509277,
      "learning_rate": 6.51624540735346e-05,
      "loss": 2.854,
      "step": 181144
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9990198612213135,
      "learning_rate": 6.515990860532115e-05,
      "loss": 3.034,
      "step": 181145
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.4125113487243652,
      "learning_rate": 6.515736318076895e-05,
      "loss": 2.9866,
      "step": 181146
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.061541795730591,
      "learning_rate": 6.515481779987846e-05,
      "loss": 2.8559,
      "step": 181147
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7159318923950195,
      "learning_rate": 6.515227246265003e-05,
      "loss": 2.9635,
      "step": 181148
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.921958923339844,
      "learning_rate": 6.514972716908429e-05,
      "loss": 3.0056,
      "step": 181149
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.448589563369751,
      "learning_rate": 6.514718191918168e-05,
      "loss": 2.9053,
      "step": 181150
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.726318120956421,
      "learning_rate": 6.514463671294254e-05,
      "loss": 2.8315,
      "step": 181151
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.617955446243286,
      "learning_rate": 6.514209155036754e-05,
      "loss": 2.7584,
      "step": 181152
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.633448362350464,
      "learning_rate": 6.513954643145706e-05,
      "loss": 2.8774,
      "step": 181153
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0456509590148926,
      "learning_rate": 6.513700135621147e-05,
      "loss": 2.88,
      "step": 181154
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.307934522628784,
      "learning_rate": 6.513445632463142e-05,
      "loss": 2.8225,
      "step": 181155
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0727744102478027,
      "learning_rate": 6.513191133671735e-05,
      "loss": 2.7843,
      "step": 181156
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4684247970581055,
      "learning_rate": 6.512936639246958e-05,
      "loss": 2.7399,
      "step": 181157
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4055681228637695,
      "learning_rate": 6.512682149188877e-05,
      "loss": 2.7406,
      "step": 181158
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.764209032058716,
      "learning_rate": 6.512427663497534e-05,
      "loss": 2.9211,
      "step": 181159
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.50539493560791,
      "learning_rate": 6.512173182172964e-05,
      "loss": 2.7307,
      "step": 181160
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9480338096618652,
      "learning_rate": 6.511918705215234e-05,
      "loss": 3.1456,
      "step": 181161
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6970348358154297,
      "learning_rate": 6.511664232624371e-05,
      "loss": 2.9414,
      "step": 181162
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.5218796730041504,
      "learning_rate": 6.511409764400443e-05,
      "loss": 2.9532,
      "step": 181163
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.334944009780884,
      "learning_rate": 6.511155300543487e-05,
      "loss": 2.5773,
      "step": 181164
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0000627040863037,
      "learning_rate": 6.510900841053551e-05,
      "loss": 2.91,
      "step": 181165
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9830079078674316,
      "learning_rate": 6.510646385930676e-05,
      "loss": 2.9524,
      "step": 181166
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6723692417144775,
      "learning_rate": 6.51039193517492e-05,
      "loss": 3.03,
      "step": 181167
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5600852966308594,
      "learning_rate": 6.510137488786318e-05,
      "loss": 2.6466,
      "step": 181168
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9008991718292236,
      "learning_rate": 6.509883046764933e-05,
      "loss": 2.72,
      "step": 181169
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9959022998809814,
      "learning_rate": 6.509628609110796e-05,
      "loss": 2.8353,
      "step": 181170
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.754136562347412,
      "learning_rate": 6.50937417582398e-05,
      "loss": 2.8976,
      "step": 181171
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.204188585281372,
      "learning_rate": 6.509119746904497e-05,
      "loss": 3.0224,
      "step": 181172
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.966155529022217,
      "learning_rate": 6.50886532235242e-05,
      "loss": 3.1739,
      "step": 181173
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.314561367034912,
      "learning_rate": 6.508610902167781e-05,
      "loss": 2.81,
      "step": 181174
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2015390396118164,
      "learning_rate": 6.508356486350645e-05,
      "loss": 3.0894,
      "step": 181175
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.538012981414795,
      "learning_rate": 6.508102074901038e-05,
      "loss": 2.781,
      "step": 181176
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.1624231338500977,
      "learning_rate": 6.507847667819036e-05,
      "loss": 3.011,
      "step": 181177
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.566661834716797,
      "learning_rate": 6.507593265104652e-05,
      "loss": 3.027,
      "step": 181178
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8170695304870605,
      "learning_rate": 6.507338866757956e-05,
      "loss": 2.7651,
      "step": 181179
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.014719247817993,
      "learning_rate": 6.507084472778985e-05,
      "loss": 2.7563,
      "step": 181180
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.886613607406616,
      "learning_rate": 6.506830083167795e-05,
      "loss": 3.1057,
      "step": 181181
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.4643003940582275,
      "learning_rate": 6.506575697924421e-05,
      "loss": 2.9908,
      "step": 181182
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5543055534362793,
      "learning_rate": 6.506321317048934e-05,
      "loss": 2.7908,
      "step": 181183
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.1604502201080322,
      "learning_rate": 6.506066940541352e-05,
      "loss": 2.9563,
      "step": 181184
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7997939586639404,
      "learning_rate": 6.50581256840174e-05,
      "loss": 2.9911,
      "step": 181185
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6088013648986816,
      "learning_rate": 6.505558200630134e-05,
      "loss": 2.9254,
      "step": 181186
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.705430030822754,
      "learning_rate": 6.505303837226598e-05,
      "loss": 3.125,
      "step": 181187
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.3184776306152344,
      "learning_rate": 6.50504947819116e-05,
      "loss": 2.8397,
      "step": 181188
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.595957040786743,
      "learning_rate": 6.504795123523895e-05,
      "loss": 2.9523,
      "step": 181189
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.3124349117279053,
      "learning_rate": 6.504540773224815e-05,
      "loss": 3.2205,
      "step": 181190
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.912931442260742,
      "learning_rate": 6.504286427293992e-05,
      "loss": 2.9157,
      "step": 181191
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6762070655822754,
      "learning_rate": 6.504032085731458e-05,
      "loss": 2.8701,
      "step": 181192
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.076056480407715,
      "learning_rate": 6.503777748537275e-05,
      "loss": 3.0073,
      "step": 181193
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.657332181930542,
      "learning_rate": 6.503523415711476e-05,
      "loss": 3.0298,
      "step": 181194
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.556506395339966,
      "learning_rate": 6.503269087254131e-05,
      "loss": 3.0576,
      "step": 181195
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7913050651550293,
      "learning_rate": 6.503014763165257e-05,
      "loss": 3.0148,
      "step": 181196
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.006024122238159,
      "learning_rate": 6.50276044344492e-05,
      "loss": 2.8895,
      "step": 181197
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.486876964569092,
      "learning_rate": 6.502506128093159e-05,
      "loss": 2.9501,
      "step": 181198
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6285107135772705,
      "learning_rate": 6.502251817110034e-05,
      "loss": 2.8509,
      "step": 181199
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7363431453704834,
      "learning_rate": 6.501997510495573e-05,
      "loss": 3.0535,
      "step": 181200
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2837836742401123,
      "learning_rate": 6.501743208249856e-05,
      "loss": 3.0122,
      "step": 181201
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.921154499053955,
      "learning_rate": 6.501488910372886e-05,
      "loss": 2.9241,
      "step": 181202
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9351887702941895,
      "learning_rate": 6.501234616864745e-05,
      "loss": 3.1341,
      "step": 181203
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0673835277557373,
      "learning_rate": 6.50098032772546e-05,
      "loss": 2.789,
      "step": 181204
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.925879955291748,
      "learning_rate": 6.500726042955094e-05,
      "loss": 2.8198,
      "step": 181205
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.095973968505859,
      "learning_rate": 6.500471762553676e-05,
      "loss": 2.8077,
      "step": 181206
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.984874963760376,
      "learning_rate": 6.500217486521282e-05,
      "loss": 2.8212,
      "step": 181207
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.4261345863342285,
      "learning_rate": 6.499963214857924e-05,
      "loss": 2.8804,
      "step": 181208
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6521060466766357,
      "learning_rate": 6.499708947563674e-05,
      "loss": 2.9688,
      "step": 181209
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6677327156066895,
      "learning_rate": 6.499454684638566e-05,
      "loss": 3.0146,
      "step": 181210
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5547823905944824,
      "learning_rate": 6.49920042608266e-05,
      "loss": 3.2745,
      "step": 181211
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.072321891784668,
      "learning_rate": 6.498946171895986e-05,
      "loss": 2.8483,
      "step": 181212
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.575643301010132,
      "learning_rate": 6.498691922078623e-05,
      "loss": 2.8528,
      "step": 181213
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0372278690338135,
      "learning_rate": 6.498437676630575e-05,
      "loss": 2.97,
      "step": 181214
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.580068349838257,
      "learning_rate": 6.498183435551921e-05,
      "loss": 3.0178,
      "step": 181215
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.325958728790283,
      "learning_rate": 6.497929198842693e-05,
      "loss": 3.1773,
      "step": 181216
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.667083263397217,
      "learning_rate": 6.49767496650295e-05,
      "loss": 3.0804,
      "step": 181217
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7531681060791016,
      "learning_rate": 6.49742073853272e-05,
      "loss": 3.1326,
      "step": 181218
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7998199462890625,
      "learning_rate": 6.497166514932086e-05,
      "loss": 2.7808,
      "step": 181219
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.635868787765503,
      "learning_rate": 6.496912295701053e-05,
      "loss": 3.0825,
      "step": 181220
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5203611850738525,
      "learning_rate": 6.496658080839698e-05,
      "loss": 3.1249,
      "step": 181221
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.572774648666382,
      "learning_rate": 6.496403870348048e-05,
      "loss": 3.1002,
      "step": 181222
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7931840419769287,
      "learning_rate": 6.496149664226169e-05,
      "loss": 2.8828,
      "step": 181223
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.244927406311035,
      "learning_rate": 6.495895462474088e-05,
      "loss": 2.7222,
      "step": 181224
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.081949710845947,
      "learning_rate": 6.495641265091877e-05,
      "loss": 2.9989,
      "step": 181225
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.262592077255249,
      "learning_rate": 6.495387072079565e-05,
      "loss": 3.0667,
      "step": 181226
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.486858606338501,
      "learning_rate": 6.495132883437207e-05,
      "loss": 2.7195,
      "step": 181227
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.525742530822754,
      "learning_rate": 6.49487869916484e-05,
      "loss": 3.0779,
      "step": 181228
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.266063690185547,
      "learning_rate": 6.494624519262528e-05,
      "loss": 2.9876,
      "step": 181229
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.644559383392334,
      "learning_rate": 6.4943703437303e-05,
      "loss": 2.8827,
      "step": 181230
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9304862022399902,
      "learning_rate": 6.494116172568219e-05,
      "loss": 2.9973,
      "step": 181231
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.466935157775879,
      "learning_rate": 6.493862005776325e-05,
      "loss": 2.9854,
      "step": 181232
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.1984035968780518,
      "learning_rate": 6.493607843354657e-05,
      "loss": 2.7728,
      "step": 181233
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2318637371063232,
      "learning_rate": 6.493353685303282e-05,
      "loss": 2.8388,
      "step": 181234
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.780791997909546,
      "learning_rate": 6.493099531622235e-05,
      "loss": 2.8835,
      "step": 181235
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.55739688873291,
      "learning_rate": 6.492845382311555e-05,
      "loss": 2.9822,
      "step": 181236
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.6445677280426025,
      "learning_rate": 6.492591237371309e-05,
      "loss": 3.0323,
      "step": 181237
    },
    {
      "epoch": 2.36,
      "grad_norm": 10.056905746459961,
      "learning_rate": 6.492337096801533e-05,
      "loss": 2.7589,
      "step": 181238
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.015065670013428,
      "learning_rate": 6.492082960602268e-05,
      "loss": 3.0995,
      "step": 181239
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2082839012145996,
      "learning_rate": 6.491828828773577e-05,
      "loss": 2.9835,
      "step": 181240
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.698406457901001,
      "learning_rate": 6.491574701315497e-05,
      "loss": 3.1052,
      "step": 181241
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.451196670532227,
      "learning_rate": 6.491320578228071e-05,
      "loss": 2.8402,
      "step": 181242
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.833442211151123,
      "learning_rate": 6.491066459511359e-05,
      "loss": 3.2653,
      "step": 181243
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.872279644012451,
      "learning_rate": 6.490812345165399e-05,
      "loss": 3.1013,
      "step": 181244
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.526803731918335,
      "learning_rate": 6.490558235190238e-05,
      "loss": 2.861,
      "step": 181245
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.5518691539764404,
      "learning_rate": 6.49030412958593e-05,
      "loss": 3.0846,
      "step": 181246
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5026888847351074,
      "learning_rate": 6.490050028352512e-05,
      "loss": 3.1902,
      "step": 181247
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.440880060195923,
      "learning_rate": 6.489795931490044e-05,
      "loss": 3.0297,
      "step": 181248
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.6509904861450195,
      "learning_rate": 6.48954183899857e-05,
      "loss": 3.0826,
      "step": 181249
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7851078510284424,
      "learning_rate": 6.48928775087813e-05,
      "loss": 2.895,
      "step": 181250
    },
    {
      "epoch": 2.36,
      "grad_norm": 5.8235602378845215,
      "learning_rate": 6.489033667128769e-05,
      "loss": 2.955,
      "step": 181251
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.3771653175354004,
      "learning_rate": 6.488779587750549e-05,
      "loss": 3.1053,
      "step": 181252
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6049857139587402,
      "learning_rate": 6.488525512743499e-05,
      "loss": 2.9221,
      "step": 181253
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.841359853744507,
      "learning_rate": 6.488271442107686e-05,
      "loss": 2.8885,
      "step": 181254
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.9924750328063965,
      "learning_rate": 6.488017375843145e-05,
      "loss": 3.0688,
      "step": 181255
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.792707920074463,
      "learning_rate": 6.487763313949928e-05,
      "loss": 3.0691,
      "step": 181256
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.530538558959961,
      "learning_rate": 6.487509256428072e-05,
      "loss": 2.8648,
      "step": 181257
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.179950714111328,
      "learning_rate": 6.487255203277635e-05,
      "loss": 3.1481,
      "step": 181258
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.873608112335205,
      "learning_rate": 6.487001154498657e-05,
      "loss": 3.2003,
      "step": 181259
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.121179580688477,
      "learning_rate": 6.486747110091197e-05,
      "loss": 2.5253,
      "step": 181260
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.813366413116455,
      "learning_rate": 6.486493070055286e-05,
      "loss": 2.9002,
      "step": 181261
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2001953125,
      "learning_rate": 6.486239034390995e-05,
      "loss": 2.7519,
      "step": 181262
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.46036696434021,
      "learning_rate": 6.485985003098341e-05,
      "loss": 2.8906,
      "step": 181263
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.2939770221710205,
      "learning_rate": 6.485730976177396e-05,
      "loss": 3.1527,
      "step": 181264
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6703343391418457,
      "learning_rate": 6.485476953628186e-05,
      "loss": 2.877,
      "step": 181265
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6729800701141357,
      "learning_rate": 6.48522293545078e-05,
      "loss": 3.1651,
      "step": 181266
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7985918521881104,
      "learning_rate": 6.484968921645205e-05,
      "loss": 2.9344,
      "step": 181267
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.9239583015441895,
      "learning_rate": 6.484714912211538e-05,
      "loss": 2.888,
      "step": 181268
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.815310478210449,
      "learning_rate": 6.484460907149787e-05,
      "loss": 2.7471,
      "step": 181269
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7172563076019287,
      "learning_rate": 6.484206906460027e-05,
      "loss": 2.8196,
      "step": 181270
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.034043550491333,
      "learning_rate": 6.483952910142288e-05,
      "loss": 2.9318,
      "step": 181271
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.548093557357788,
      "learning_rate": 6.483698918196636e-05,
      "loss": 2.9031,
      "step": 181272
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.5682873725891113,
      "learning_rate": 6.4834449306231e-05,
      "loss": 3.0749,
      "step": 181273
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.149311065673828,
      "learning_rate": 6.483190947421752e-05,
      "loss": 2.8806,
      "step": 181274
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.89070725440979,
      "learning_rate": 6.482936968592605e-05,
      "loss": 2.866,
      "step": 181275
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.648207426071167,
      "learning_rate": 6.482682994135733e-05,
      "loss": 2.9045,
      "step": 181276
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4875030517578125,
      "learning_rate": 6.482429024051167e-05,
      "loss": 2.9045,
      "step": 181277
    },
    {
      "epoch": 2.36,
      "grad_norm": 5.502951622009277,
      "learning_rate": 6.482175058338967e-05,
      "loss": 2.8739,
      "step": 181278
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.300268650054932,
      "learning_rate": 6.481921096999168e-05,
      "loss": 2.8947,
      "step": 181279
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.719895601272583,
      "learning_rate": 6.48166714003184e-05,
      "loss": 3.0162,
      "step": 181280
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.745465040206909,
      "learning_rate": 6.481413187436996e-05,
      "loss": 3.2326,
      "step": 181281
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9583754539489746,
      "learning_rate": 6.481159239214712e-05,
      "loss": 2.9859,
      "step": 181282
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.1663222312927246,
      "learning_rate": 6.480905295365014e-05,
      "loss": 2.9916,
      "step": 181283
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.33713960647583,
      "learning_rate": 6.480651355887971e-05,
      "loss": 3.1098,
      "step": 181284
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.783357858657837,
      "learning_rate": 6.480397420783607e-05,
      "loss": 3.1386,
      "step": 181285
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2585551738739014,
      "learning_rate": 6.480143490052e-05,
      "loss": 3.0025,
      "step": 181286
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7775614261627197,
      "learning_rate": 6.479889563693159e-05,
      "loss": 2.817,
      "step": 181287
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.5784096717834473,
      "learning_rate": 6.479635641707162e-05,
      "loss": 2.8087,
      "step": 181288
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8604509830474854,
      "learning_rate": 6.479381724094035e-05,
      "loss": 2.8002,
      "step": 181289
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.964789628982544,
      "learning_rate": 6.479127810853846e-05,
      "loss": 2.9812,
      "step": 181290
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9742226600646973,
      "learning_rate": 6.478873901986618e-05,
      "loss": 3.1025,
      "step": 181291
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.778836965560913,
      "learning_rate": 6.478619997492431e-05,
      "loss": 2.7055,
      "step": 181292
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8738110065460205,
      "learning_rate": 6.478366097371293e-05,
      "loss": 2.9731,
      "step": 181293
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0331192016601562,
      "learning_rate": 6.478112201623284e-05,
      "loss": 3.0193,
      "step": 181294
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9410722255706787,
      "learning_rate": 6.477858310248428e-05,
      "loss": 3.2955,
      "step": 181295
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.941498279571533,
      "learning_rate": 6.477604423246789e-05,
      "loss": 3.0049,
      "step": 181296
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4553685188293457,
      "learning_rate": 6.477350540618398e-05,
      "loss": 3.0534,
      "step": 181297
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.890486001968384,
      "learning_rate": 6.477096662363322e-05,
      "loss": 2.8817,
      "step": 181298
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.232250690460205,
      "learning_rate": 6.476842788481596e-05,
      "loss": 3.2007,
      "step": 181299
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.742459535598755,
      "learning_rate": 6.47658891897327e-05,
      "loss": 3.0419,
      "step": 181300
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.3534810543060303,
      "learning_rate": 6.476335053838383e-05,
      "loss": 2.9677,
      "step": 181301
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7942140102386475,
      "learning_rate": 6.476081193076997e-05,
      "loss": 2.8608,
      "step": 181302
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5331485271453857,
      "learning_rate": 6.475827336689144e-05,
      "loss": 3.1796,
      "step": 181303
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7736704349517822,
      "learning_rate": 6.475573484674884e-05,
      "loss": 2.8679,
      "step": 181304
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.197309732437134,
      "learning_rate": 6.47531963703426e-05,
      "loss": 3.0046,
      "step": 181305
    },
    {
      "epoch": 2.36,
      "grad_norm": 5.351821422576904,
      "learning_rate": 6.47506579376732e-05,
      "loss": 2.9788,
      "step": 181306
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9980289936065674,
      "learning_rate": 6.474811954874098e-05,
      "loss": 3.2754,
      "step": 181307
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.6060798168182373,
      "learning_rate": 6.474558120354665e-05,
      "loss": 2.9309,
      "step": 181308
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.170003890991211,
      "learning_rate": 6.474304290209042e-05,
      "loss": 3.047,
      "step": 181309
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.673927068710327,
      "learning_rate": 6.474050464437305e-05,
      "loss": 3.0046,
      "step": 181310
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.896090269088745,
      "learning_rate": 6.473796643039482e-05,
      "loss": 2.7313,
      "step": 181311
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5357375144958496,
      "learning_rate": 6.473542826015623e-05,
      "loss": 3.182,
      "step": 181312
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.102574348449707,
      "learning_rate": 6.473289013365772e-05,
      "loss": 2.9054,
      "step": 181313
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.556349992752075,
      "learning_rate": 6.473035205089986e-05,
      "loss": 3.0118,
      "step": 181314
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5949177742004395,
      "learning_rate": 6.472781401188301e-05,
      "loss": 3.0994,
      "step": 181315
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.741410732269287,
      "learning_rate": 6.472527601660776e-05,
      "loss": 2.7312,
      "step": 181316
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.1947100162506104,
      "learning_rate": 6.472273806507453e-05,
      "loss": 3.1794,
      "step": 181317
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7101292610168457,
      "learning_rate": 6.472020015728378e-05,
      "loss": 3.0693,
      "step": 181318
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.039094924926758,
      "learning_rate": 6.471766229323589e-05,
      "loss": 2.7806,
      "step": 181319
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.063830375671387,
      "learning_rate": 6.471512447293156e-05,
      "loss": 2.9628,
      "step": 181320
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0246312618255615,
      "learning_rate": 6.4712586696371e-05,
      "loss": 2.7665,
      "step": 181321
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.1374011039733887,
      "learning_rate": 6.47100489635549e-05,
      "loss": 2.9213,
      "step": 181322
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4842607975006104,
      "learning_rate": 6.470751127448366e-05,
      "loss": 2.9326,
      "step": 181323
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.742337465286255,
      "learning_rate": 6.470497362915765e-05,
      "loss": 2.7931,
      "step": 181324
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9567925930023193,
      "learning_rate": 6.470243602757752e-05,
      "loss": 2.9441,
      "step": 181325
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.5028860569000244,
      "learning_rate": 6.469989846974363e-05,
      "loss": 2.9339,
      "step": 181326
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5740835666656494,
      "learning_rate": 6.469736095565638e-05,
      "loss": 3.0475,
      "step": 181327
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.882948875427246,
      "learning_rate": 6.469482348531642e-05,
      "loss": 3.2933,
      "step": 181328
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2904443740844727,
      "learning_rate": 6.469228605872415e-05,
      "loss": 2.8594,
      "step": 181329
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9538490772247314,
      "learning_rate": 6.468974867587995e-05,
      "loss": 3.0678,
      "step": 181330
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.591116428375244,
      "learning_rate": 6.468721133678442e-05,
      "loss": 2.881,
      "step": 181331
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0711424350738525,
      "learning_rate": 6.468467404143792e-05,
      "loss": 2.7233,
      "step": 181332
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8354902267456055,
      "learning_rate": 6.468213678984107e-05,
      "loss": 2.7806,
      "step": 181333
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.585106611251831,
      "learning_rate": 6.467959958199421e-05,
      "loss": 2.8111,
      "step": 181334
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.467428684234619,
      "learning_rate": 6.46770624178979e-05,
      "loss": 2.692,
      "step": 181335
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.805629253387451,
      "learning_rate": 6.467452529755244e-05,
      "loss": 3.0606,
      "step": 181336
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6671619415283203,
      "learning_rate": 6.467198822095853e-05,
      "loss": 3.0736,
      "step": 181337
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.495978832244873,
      "learning_rate": 6.466945118811649e-05,
      "loss": 2.9441,
      "step": 181338
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.672003984451294,
      "learning_rate": 6.466691419902689e-05,
      "loss": 2.9796,
      "step": 181339
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.295351505279541,
      "learning_rate": 6.466437725369015e-05,
      "loss": 3.0221,
      "step": 181340
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9406280517578125,
      "learning_rate": 6.466184035210677e-05,
      "loss": 2.8537,
      "step": 181341
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9738211631774902,
      "learning_rate": 6.46593034942771e-05,
      "loss": 2.7738,
      "step": 181342
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6532070636749268,
      "learning_rate": 6.46567666802018e-05,
      "loss": 2.9584,
      "step": 181343
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4426958560943604,
      "learning_rate": 6.465422990988113e-05,
      "loss": 2.8663,
      "step": 181344
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7131073474884033,
      "learning_rate": 6.465169318331579e-05,
      "loss": 2.6832,
      "step": 181345
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.855936050415039,
      "learning_rate": 6.464915650050609e-05,
      "loss": 3.0836,
      "step": 181346
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.1885125637054443,
      "learning_rate": 6.464661986145267e-05,
      "loss": 2.9455,
      "step": 181347
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.636899471282959,
      "learning_rate": 6.464408326615573e-05,
      "loss": 2.8286,
      "step": 181348
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.628390312194824,
      "learning_rate": 6.464154671461598e-05,
      "loss": 2.9603,
      "step": 181349
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5459625720977783,
      "learning_rate": 6.463901020683374e-05,
      "loss": 2.9765,
      "step": 181350
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.776191234588623,
      "learning_rate": 6.463647374280965e-05,
      "loss": 2.7924,
      "step": 181351
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0672855377197266,
      "learning_rate": 6.463393732254397e-05,
      "loss": 2.8908,
      "step": 181352
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7421107292175293,
      "learning_rate": 6.463140094603745e-05,
      "loss": 2.8254,
      "step": 181353
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.815973997116089,
      "learning_rate": 6.462886461329026e-05,
      "loss": 2.7953,
      "step": 181354
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.047170639038086,
      "learning_rate": 6.462632832430306e-05,
      "loss": 2.8578,
      "step": 181355
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.784526824951172,
      "learning_rate": 6.462379207907619e-05,
      "loss": 3.0828,
      "step": 181356
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6653754711151123,
      "learning_rate": 6.462125587761028e-05,
      "loss": 2.7199,
      "step": 181357
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.2247185707092285,
      "learning_rate": 6.461871971990566e-05,
      "loss": 2.9356,
      "step": 181358
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9255964756011963,
      "learning_rate": 6.461618360596301e-05,
      "loss": 3.0892,
      "step": 181359
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8255867958068848,
      "learning_rate": 6.461364753578248e-05,
      "loss": 2.9418,
      "step": 181360
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7637939453125,
      "learning_rate": 6.461111150936482e-05,
      "loss": 3.0455,
      "step": 181361
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.699427604675293,
      "learning_rate": 6.460857552671032e-05,
      "loss": 3.0727,
      "step": 181362
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7682301998138428,
      "learning_rate": 6.46060395878196e-05,
      "loss": 2.988,
      "step": 181363
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.691913604736328,
      "learning_rate": 6.460350369269299e-05,
      "loss": 2.9689,
      "step": 181364
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.908036231994629,
      "learning_rate": 6.460096784133108e-05,
      "loss": 2.684,
      "step": 181365
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.651876211166382,
      "learning_rate": 6.459843203373432e-05,
      "loss": 2.9062,
      "step": 181366
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.127143144607544,
      "learning_rate": 6.459589626990317e-05,
      "loss": 2.8272,
      "step": 181367
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0084285736083984,
      "learning_rate": 6.459336054983797e-05,
      "loss": 2.9622,
      "step": 181368
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.572016716003418,
      "learning_rate": 6.459082487353942e-05,
      "loss": 2.8699,
      "step": 181369
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6639599800109863,
      "learning_rate": 6.458828924100777e-05,
      "loss": 2.9409,
      "step": 181370
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.088357448577881,
      "learning_rate": 6.458575365224369e-05,
      "loss": 2.8911,
      "step": 181371
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8645880222320557,
      "learning_rate": 6.45832181072476e-05,
      "loss": 2.6827,
      "step": 181372
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.0570526123046875,
      "learning_rate": 6.458068260601988e-05,
      "loss": 3.0306,
      "step": 181373
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.1243648529052734,
      "learning_rate": 6.4578147148561e-05,
      "loss": 3.1169,
      "step": 181374
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4044201374053955,
      "learning_rate": 6.457561173487156e-05,
      "loss": 2.8212,
      "step": 181375
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0162813663482666,
      "learning_rate": 6.457307636495186e-05,
      "loss": 2.8641,
      "step": 181376
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5198185443878174,
      "learning_rate": 6.457054103880257e-05,
      "loss": 2.8304,
      "step": 181377
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.498199462890625,
      "learning_rate": 6.456800575642407e-05,
      "loss": 2.8969,
      "step": 181378
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.3088607788085938,
      "learning_rate": 6.456547051781682e-05,
      "loss": 3.0228,
      "step": 181379
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.078115463256836,
      "learning_rate": 6.456293532298118e-05,
      "loss": 2.9575,
      "step": 181380
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.801816701889038,
      "learning_rate": 6.456040017191785e-05,
      "loss": 3.0585,
      "step": 181381
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6984190940856934,
      "learning_rate": 6.45578650646271e-05,
      "loss": 3.0091,
      "step": 181382
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.151582717895508,
      "learning_rate": 6.455533000110955e-05,
      "loss": 2.7266,
      "step": 181383
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7471351623535156,
      "learning_rate": 6.455279498136562e-05,
      "loss": 2.7811,
      "step": 181384
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.700700044631958,
      "learning_rate": 6.455026000539579e-05,
      "loss": 3.1816,
      "step": 181385
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.525646686553955,
      "learning_rate": 6.45477250732004e-05,
      "loss": 3.0171,
      "step": 181386
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.880577802658081,
      "learning_rate": 6.454519018478014e-05,
      "loss": 3.1617,
      "step": 181387
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7815675735473633,
      "learning_rate": 6.454265534013529e-05,
      "loss": 2.809,
      "step": 181388
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8384621143341064,
      "learning_rate": 6.454012053926645e-05,
      "loss": 3.1867,
      "step": 181389
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.718562126159668,
      "learning_rate": 6.453758578217406e-05,
      "loss": 2.747,
      "step": 181390
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.614555835723877,
      "learning_rate": 6.453505106885859e-05,
      "loss": 2.9093,
      "step": 181391
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2709357738494873,
      "learning_rate": 6.453251639932044e-05,
      "loss": 2.7087,
      "step": 181392
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.584407091140747,
      "learning_rate": 6.452998177356019e-05,
      "loss": 2.9561,
      "step": 181393
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.7529313564300537,
      "learning_rate": 6.452744719157816e-05,
      "loss": 2.9337,
      "step": 181394
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.053722858428955,
      "learning_rate": 6.452491265337505e-05,
      "loss": 3.2301,
      "step": 181395
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.3656303882598877,
      "learning_rate": 6.452237815895118e-05,
      "loss": 3.0042,
      "step": 181396
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.040951728820801,
      "learning_rate": 6.451984370830706e-05,
      "loss": 3.0554,
      "step": 181397
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5062899589538574,
      "learning_rate": 6.451730930144305e-05,
      "loss": 2.8766,
      "step": 181398
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.582631826400757,
      "learning_rate": 6.451477493835979e-05,
      "loss": 3.0882,
      "step": 181399
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.5081889629364014,
      "learning_rate": 6.45122406190576e-05,
      "loss": 2.9244,
      "step": 181400
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.894195556640625,
      "learning_rate": 6.450970634353714e-05,
      "loss": 2.864,
      "step": 181401
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6759066581726074,
      "learning_rate": 6.450717211179874e-05,
      "loss": 3.0177,
      "step": 181402
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.914628744125366,
      "learning_rate": 6.450463792384293e-05,
      "loss": 2.9687,
      "step": 181403
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7016642093658447,
      "learning_rate": 6.450210377967002e-05,
      "loss": 3.1162,
      "step": 181404
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.502398729324341,
      "learning_rate": 6.449956967928077e-05,
      "loss": 3.0642,
      "step": 181405
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.012156009674072,
      "learning_rate": 6.449703562267534e-05,
      "loss": 2.9923,
      "step": 181406
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4879422187805176,
      "learning_rate": 6.449450160985449e-05,
      "loss": 3.0634,
      "step": 181407
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.521101951599121,
      "learning_rate": 6.449196764081854e-05,
      "loss": 2.7183,
      "step": 181408
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9578592777252197,
      "learning_rate": 6.448943371556787e-05,
      "loss": 2.8359,
      "step": 181409
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4529671669006348,
      "learning_rate": 6.448689983410319e-05,
      "loss": 2.8366,
      "step": 181410
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.833139657974243,
      "learning_rate": 6.448436599642482e-05,
      "loss": 2.7497,
      "step": 181411
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.538196563720703,
      "learning_rate": 6.448183220253318e-05,
      "loss": 2.8103,
      "step": 181412
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.48648738861084,
      "learning_rate": 6.447929845242889e-05,
      "loss": 2.842,
      "step": 181413
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2332026958465576,
      "learning_rate": 6.447676474611233e-05,
      "loss": 2.8804,
      "step": 181414
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.878760814666748,
      "learning_rate": 6.447423108358392e-05,
      "loss": 2.907,
      "step": 181415
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6466188430786133,
      "learning_rate": 6.447169746484423e-05,
      "loss": 2.7955,
      "step": 181416
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.70829701423645,
      "learning_rate": 6.446916388989377e-05,
      "loss": 3.0607,
      "step": 181417
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2803146839141846,
      "learning_rate": 6.446663035873281e-05,
      "loss": 3.0088,
      "step": 181418
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8624510765075684,
      "learning_rate": 6.446409687136204e-05,
      "loss": 2.909,
      "step": 181419
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.143863201141357,
      "learning_rate": 6.446156342778187e-05,
      "loss": 2.9477,
      "step": 181420
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0768814086914062,
      "learning_rate": 6.445903002799261e-05,
      "loss": 3.1038,
      "step": 181421
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.903125762939453,
      "learning_rate": 6.445649667199496e-05,
      "loss": 2.7837,
      "step": 181422
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0150208473205566,
      "learning_rate": 6.445396335978925e-05,
      "loss": 2.991,
      "step": 181423
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5142202377319336,
      "learning_rate": 6.445143009137604e-05,
      "loss": 2.8875,
      "step": 181424
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7949094772338867,
      "learning_rate": 6.444889686675578e-05,
      "loss": 2.8576,
      "step": 181425
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2897098064422607,
      "learning_rate": 6.444636368592887e-05,
      "loss": 2.6572,
      "step": 181426
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.1727850437164307,
      "learning_rate": 6.444383054889576e-05,
      "loss": 2.9637,
      "step": 181427
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.610016345977783,
      "learning_rate": 6.44412974556571e-05,
      "loss": 3.0211,
      "step": 181428
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.5085227489471436,
      "learning_rate": 6.443876440621317e-05,
      "loss": 2.8847,
      "step": 181429
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5442988872528076,
      "learning_rate": 6.443623140056458e-05,
      "loss": 3.0869,
      "step": 181430
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8706231117248535,
      "learning_rate": 6.443369843871168e-05,
      "loss": 3.1144,
      "step": 181431
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.676928758621216,
      "learning_rate": 6.443116552065505e-05,
      "loss": 3.046,
      "step": 181432
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.3709354400634766,
      "learning_rate": 6.442863264639512e-05,
      "loss": 2.8369,
      "step": 181433
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7874152660369873,
      "learning_rate": 6.442609981593238e-05,
      "loss": 2.7994,
      "step": 181434
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.483736038208008,
      "learning_rate": 6.442356702926718e-05,
      "loss": 2.8148,
      "step": 181435
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.405512571334839,
      "learning_rate": 6.442103428640015e-05,
      "loss": 3.0583,
      "step": 181436
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.007545232772827,
      "learning_rate": 6.441850158733165e-05,
      "loss": 2.9686,
      "step": 181437
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.551016330718994,
      "learning_rate": 6.441596893206224e-05,
      "loss": 2.8892,
      "step": 181438
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.746112585067749,
      "learning_rate": 6.441343632059238e-05,
      "loss": 2.779,
      "step": 181439
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.968820095062256,
      "learning_rate": 6.441090375292249e-05,
      "loss": 2.907,
      "step": 181440
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.768393039703369,
      "learning_rate": 6.440837122905297e-05,
      "loss": 2.8449,
      "step": 181441
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4611082077026367,
      "learning_rate": 6.44058387489845e-05,
      "loss": 2.9063,
      "step": 181442
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6367955207824707,
      "learning_rate": 6.440330631271734e-05,
      "loss": 2.9341,
      "step": 181443
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.177598476409912,
      "learning_rate": 6.440077392025211e-05,
      "loss": 2.7613,
      "step": 181444
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.813957691192627,
      "learning_rate": 6.439824157158925e-05,
      "loss": 2.904,
      "step": 181445
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7563164234161377,
      "learning_rate": 6.439570926672918e-05,
      "loss": 2.9235,
      "step": 181446
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.245901107788086,
      "learning_rate": 6.439317700567233e-05,
      "loss": 2.7843,
      "step": 181447
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.799525737762451,
      "learning_rate": 6.439064478841932e-05,
      "loss": 3.0248,
      "step": 181448
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.705547571182251,
      "learning_rate": 6.438811261497043e-05,
      "loss": 2.8481,
      "step": 181449
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.005040645599365,
      "learning_rate": 6.438558048532634e-05,
      "loss": 2.9076,
      "step": 181450
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.122371196746826,
      "learning_rate": 6.43830483994874e-05,
      "loss": 2.7283,
      "step": 181451
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7735228538513184,
      "learning_rate": 6.438051635745413e-05,
      "loss": 2.8586,
      "step": 181452
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.542525291442871,
      "learning_rate": 6.437798435922687e-05,
      "loss": 3.0747,
      "step": 181453
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.647782325744629,
      "learning_rate": 6.437545240480628e-05,
      "loss": 2.7489,
      "step": 181454
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8127245903015137,
      "learning_rate": 6.437292049419264e-05,
      "loss": 3.028,
      "step": 181455
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.746150493621826,
      "learning_rate": 6.43703886273866e-05,
      "loss": 3.2805,
      "step": 181456
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6158151626586914,
      "learning_rate": 6.436785680438859e-05,
      "loss": 2.9225,
      "step": 181457
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6987497806549072,
      "learning_rate": 6.436532502519903e-05,
      "loss": 2.9785,
      "step": 181458
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.220919370651245,
      "learning_rate": 6.436279328981832e-05,
      "loss": 2.7794,
      "step": 181459
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8414294719696045,
      "learning_rate": 6.436026159824707e-05,
      "loss": 2.932,
      "step": 181460
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.785536050796509,
      "learning_rate": 6.435772995048564e-05,
      "loss": 2.9596,
      "step": 181461
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2462124824523926,
      "learning_rate": 6.435519834653463e-05,
      "loss": 2.8843,
      "step": 181462
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7427632808685303,
      "learning_rate": 6.435266678639445e-05,
      "loss": 3.2799,
      "step": 181463
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.8934335708618164,
      "learning_rate": 6.435013527006551e-05,
      "loss": 2.5376,
      "step": 181464
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.714425802230835,
      "learning_rate": 6.434760379754831e-05,
      "loss": 2.9717,
      "step": 181465
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.974726915359497,
      "learning_rate": 6.434507236884339e-05,
      "loss": 2.8445,
      "step": 181466
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8246045112609863,
      "learning_rate": 6.434254098395108e-05,
      "loss": 3.0669,
      "step": 181467
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.1530251502990723,
      "learning_rate": 6.434000964287204e-05,
      "loss": 2.8696,
      "step": 181468
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.3408234119415283,
      "learning_rate": 6.433747834560665e-05,
      "loss": 3.1281,
      "step": 181469
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.759388446807861,
      "learning_rate": 6.433494709215534e-05,
      "loss": 3.0177,
      "step": 181470
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.938760757446289,
      "learning_rate": 6.433241588251853e-05,
      "loss": 3.0303,
      "step": 181471
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.857191562652588,
      "learning_rate": 6.432988471669686e-05,
      "loss": 2.8734,
      "step": 181472
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0831260681152344,
      "learning_rate": 6.432735359469065e-05,
      "loss": 3.0154,
      "step": 181473
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.032136917114258,
      "learning_rate": 6.432482251650051e-05,
      "loss": 3.0012,
      "step": 181474
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.073674201965332,
      "learning_rate": 6.432229148212682e-05,
      "loss": 2.8172,
      "step": 181475
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9025282859802246,
      "learning_rate": 6.431976049157009e-05,
      "loss": 3.0282,
      "step": 181476
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.519475221633911,
      "learning_rate": 6.431722954483067e-05,
      "loss": 3.0189,
      "step": 181477
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8336644172668457,
      "learning_rate": 6.431469864190918e-05,
      "loss": 2.8706,
      "step": 181478
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.1465399265289307,
      "learning_rate": 6.4312167782806e-05,
      "loss": 2.9462,
      "step": 181479
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.475659132003784,
      "learning_rate": 6.43096369675217e-05,
      "loss": 2.7053,
      "step": 181480
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.218966245651245,
      "learning_rate": 6.43071061960567e-05,
      "loss": 2.8122,
      "step": 181481
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.967311382293701,
      "learning_rate": 6.430457546841142e-05,
      "loss": 2.7146,
      "step": 181482
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8514230251312256,
      "learning_rate": 6.430204478458633e-05,
      "loss": 2.5719,
      "step": 181483
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.2462775707244873,
      "learning_rate": 6.429951414458201e-05,
      "loss": 2.8071,
      "step": 181484
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0191493034362793,
      "learning_rate": 6.429698354839874e-05,
      "loss": 2.9068,
      "step": 181485
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.864374876022339,
      "learning_rate": 6.429445299603725e-05,
      "loss": 2.9227,
      "step": 181486
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6538524627685547,
      "learning_rate": 6.429192248749783e-05,
      "loss": 2.9097,
      "step": 181487
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0617992877960205,
      "learning_rate": 6.428939202278101e-05,
      "loss": 2.9714,
      "step": 181488
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.902697801589966,
      "learning_rate": 6.428686160188715e-05,
      "loss": 3.1747,
      "step": 181489
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.534841299057007,
      "learning_rate": 6.428433122481687e-05,
      "loss": 2.8006,
      "step": 181490
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.348941802978516,
      "learning_rate": 6.428180089157054e-05,
      "loss": 3.0617,
      "step": 181491
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.5666186809539795,
      "learning_rate": 6.427927060214876e-05,
      "loss": 3.1393,
      "step": 181492
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7967822551727295,
      "learning_rate": 6.427674035655188e-05,
      "loss": 3.1509,
      "step": 181493
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.5880846977233887,
      "learning_rate": 6.427421015478035e-05,
      "loss": 2.8503,
      "step": 181494
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.999262809753418,
      "learning_rate": 6.427167999683476e-05,
      "loss": 2.833,
      "step": 181495
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.163483142852783,
      "learning_rate": 6.426914988271551e-05,
      "loss": 2.7897,
      "step": 181496
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.286327838897705,
      "learning_rate": 6.426661981242301e-05,
      "loss": 2.992,
      "step": 181497
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.436532020568848,
      "learning_rate": 6.426408978595784e-05,
      "loss": 2.8808,
      "step": 181498
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.3289566040039062,
      "learning_rate": 6.426155980332038e-05,
      "loss": 3.0567,
      "step": 181499
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.798384428024292,
      "learning_rate": 6.42590298645112e-05,
      "loss": 2.8436,
      "step": 181500
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6906611919403076,
      "learning_rate": 6.425649996953076e-05,
      "loss": 3.0295,
      "step": 181501
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6252174377441406,
      "learning_rate": 6.425397011837946e-05,
      "loss": 3.0079,
      "step": 181502
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.6186723709106445,
      "learning_rate": 6.42514403110577e-05,
      "loss": 2.9629,
      "step": 181503
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.959388732910156,
      "learning_rate": 6.424891054756615e-05,
      "loss": 2.9286,
      "step": 181504
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.4419054985046387,
      "learning_rate": 6.42463808279051e-05,
      "loss": 2.9808,
      "step": 181505
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.283529758453369,
      "learning_rate": 6.424385115207516e-05,
      "loss": 2.8683,
      "step": 181506
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7461259365081787,
      "learning_rate": 6.424132152007675e-05,
      "loss": 2.978,
      "step": 181507
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.316580295562744,
      "learning_rate": 6.423879193191024e-05,
      "loss": 2.9191,
      "step": 181508
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.120095252990723,
      "learning_rate": 6.423626238757629e-05,
      "loss": 3.1452,
      "step": 181509
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7612271308898926,
      "learning_rate": 6.423373288707527e-05,
      "loss": 3.103,
      "step": 181510
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5465614795684814,
      "learning_rate": 6.423120343040754e-05,
      "loss": 2.9618,
      "step": 181511
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4270074367523193,
      "learning_rate": 6.422867401757375e-05,
      "loss": 2.9286,
      "step": 181512
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6186795234680176,
      "learning_rate": 6.422614464857435e-05,
      "loss": 2.9525,
      "step": 181513
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.809502124786377,
      "learning_rate": 6.422361532340965e-05,
      "loss": 2.8051,
      "step": 181514
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.305572986602783,
      "learning_rate": 6.422108604208031e-05,
      "loss": 2.9718,
      "step": 181515
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5379111766815186,
      "learning_rate": 6.421855680458664e-05,
      "loss": 2.6317,
      "step": 181516
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5075719356536865,
      "learning_rate": 6.42160276109293e-05,
      "loss": 2.8064,
      "step": 181517
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6255476474761963,
      "learning_rate": 6.421349846110863e-05,
      "loss": 2.828,
      "step": 181518
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6646437644958496,
      "learning_rate": 6.421096935512512e-05,
      "loss": 2.9238,
      "step": 181519
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.367615699768066,
      "learning_rate": 6.420844029297915e-05,
      "loss": 2.8491,
      "step": 181520
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.1086649894714355,
      "learning_rate": 6.420591127467138e-05,
      "loss": 3.1614,
      "step": 181521
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.9264400005340576,
      "learning_rate": 6.420338230020207e-05,
      "loss": 2.9461,
      "step": 181522
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.606114625930786,
      "learning_rate": 6.420085336957192e-05,
      "loss": 2.8524,
      "step": 181523
    },
    {
      "epoch": 2.36,
      "grad_norm": 5.1495232582092285,
      "learning_rate": 6.419832448278129e-05,
      "loss": 2.7964,
      "step": 181524
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.7057604789733887,
      "learning_rate": 6.419579563983061e-05,
      "loss": 2.9111,
      "step": 181525
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.3405981063842773,
      "learning_rate": 6.41932668407203e-05,
      "loss": 2.9874,
      "step": 181526
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.648073673248291,
      "learning_rate": 6.419073808545101e-05,
      "loss": 2.9505,
      "step": 181527
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.811931848526001,
      "learning_rate": 6.418820937402303e-05,
      "loss": 3.1141,
      "step": 181528
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0150387287139893,
      "learning_rate": 6.4185680706437e-05,
      "loss": 2.7389,
      "step": 181529
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.559170722961426,
      "learning_rate": 6.418315208269331e-05,
      "loss": 3.2016,
      "step": 181530
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5141119956970215,
      "learning_rate": 6.418062350279242e-05,
      "loss": 2.8283,
      "step": 181531
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7584640979766846,
      "learning_rate": 6.417809496673468e-05,
      "loss": 3.0111,
      "step": 181532
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7001280784606934,
      "learning_rate": 6.417556647452082e-05,
      "loss": 2.9931,
      "step": 181533
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.1595945358276367,
      "learning_rate": 6.417303802615107e-05,
      "loss": 2.8713,
      "step": 181534
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.032399892807007,
      "learning_rate": 6.417050962162609e-05,
      "loss": 2.9972,
      "step": 181535
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.026618719100952,
      "learning_rate": 6.416798126094623e-05,
      "loss": 2.7728,
      "step": 181536
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7533199787139893,
      "learning_rate": 6.416545294411203e-05,
      "loss": 2.8985,
      "step": 181537
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.157851457595825,
      "learning_rate": 6.416292467112384e-05,
      "loss": 3.0429,
      "step": 181538
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0036683082580566,
      "learning_rate": 6.416039644198229e-05,
      "loss": 2.914,
      "step": 181539
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5001823902130127,
      "learning_rate": 6.415786825668771e-05,
      "loss": 3.1085,
      "step": 181540
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.535369873046875,
      "learning_rate": 6.41553401152407e-05,
      "loss": 3.0193,
      "step": 181541
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7107038497924805,
      "learning_rate": 6.415281201764166e-05,
      "loss": 2.6265,
      "step": 181542
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9314279556274414,
      "learning_rate": 6.415028396389106e-05,
      "loss": 3.2596,
      "step": 181543
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9234118461608887,
      "learning_rate": 6.414775595398931e-05,
      "loss": 3.0896,
      "step": 181544
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.421703577041626,
      "learning_rate": 6.414522798793702e-05,
      "loss": 3.3463,
      "step": 181545
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.782470226287842,
      "learning_rate": 6.414270006573449e-05,
      "loss": 2.6758,
      "step": 181546
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.219822883605957,
      "learning_rate": 6.414017218738237e-05,
      "loss": 2.6591,
      "step": 181547
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.652254819869995,
      "learning_rate": 6.413764435288106e-05,
      "loss": 3.2176,
      "step": 181548
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.5950779914855957,
      "learning_rate": 6.413511656223099e-05,
      "loss": 2.9469,
      "step": 181549
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6195878982543945,
      "learning_rate": 6.413258881543261e-05,
      "loss": 2.8629,
      "step": 181550
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8317575454711914,
      "learning_rate": 6.41300611124865e-05,
      "loss": 3.2379,
      "step": 181551
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.3910372257232666,
      "learning_rate": 6.412753345339297e-05,
      "loss": 2.97,
      "step": 181552
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.661165475845337,
      "learning_rate": 6.412500583815266e-05,
      "loss": 2.8097,
      "step": 181553
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.826481819152832,
      "learning_rate": 6.412247826676596e-05,
      "loss": 2.9749,
      "step": 181554
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4130587577819824,
      "learning_rate": 6.411995073923337e-05,
      "loss": 3.0049,
      "step": 181555
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0766797065734863,
      "learning_rate": 6.411742325555525e-05,
      "loss": 2.9751,
      "step": 181556
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9390387535095215,
      "learning_rate": 6.411489581573221e-05,
      "loss": 2.5677,
      "step": 181557
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.008443832397461,
      "learning_rate": 6.41123684197646e-05,
      "loss": 2.9134,
      "step": 181558
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.684093713760376,
      "learning_rate": 6.410984106765304e-05,
      "loss": 2.7768,
      "step": 181559
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.598095655441284,
      "learning_rate": 6.410731375939783e-05,
      "loss": 3.1574,
      "step": 181560
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.291522264480591,
      "learning_rate": 6.410478649499968e-05,
      "loss": 3.0574,
      "step": 181561
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.3012473583221436,
      "learning_rate": 6.410225927445875e-05,
      "loss": 2.7516,
      "step": 181562
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9550588130950928,
      "learning_rate": 6.409973209777572e-05,
      "loss": 3.0535,
      "step": 181563
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.1905736923217773,
      "learning_rate": 6.409720496495092e-05,
      "loss": 2.9491,
      "step": 181564
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2352964878082275,
      "learning_rate": 6.4094677875985e-05,
      "loss": 2.9759,
      "step": 181565
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7147786617279053,
      "learning_rate": 6.409215083087826e-05,
      "loss": 2.8071,
      "step": 181566
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8537847995758057,
      "learning_rate": 6.408962382963141e-05,
      "loss": 2.7512,
      "step": 181567
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.465629816055298,
      "learning_rate": 6.408709687224457e-05,
      "loss": 3.176,
      "step": 181568
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.028517961502075,
      "learning_rate": 6.408456995871848e-05,
      "loss": 3.026,
      "step": 181569
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.17880916595459,
      "learning_rate": 6.408204308905343e-05,
      "loss": 3.0533,
      "step": 181570
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4405019283294678,
      "learning_rate": 6.407951626325008e-05,
      "loss": 2.8139,
      "step": 181571
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0129520893096924,
      "learning_rate": 6.40769894813087e-05,
      "loss": 2.8662,
      "step": 181572
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.930593252182007,
      "learning_rate": 6.407446274323006e-05,
      "loss": 3.0931,
      "step": 181573
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.172348737716675,
      "learning_rate": 6.407193604901424e-05,
      "loss": 2.8982,
      "step": 181574
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.110813856124878,
      "learning_rate": 6.406940939866197e-05,
      "loss": 3.1486,
      "step": 181575
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4001669883728027,
      "learning_rate": 6.40668827921736e-05,
      "loss": 2.9516,
      "step": 181576
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8160042762756348,
      "learning_rate": 6.406435622954971e-05,
      "loss": 2.9336,
      "step": 181577
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7869105339050293,
      "learning_rate": 6.406182971079061e-05,
      "loss": 2.921,
      "step": 181578
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.3656857013702393,
      "learning_rate": 6.405930323589709e-05,
      "loss": 2.8009,
      "step": 181579
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8087527751922607,
      "learning_rate": 6.40567768048692e-05,
      "loss": 2.9535,
      "step": 181580
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8446576595306396,
      "learning_rate": 6.40542504177077e-05,
      "loss": 2.9936,
      "step": 181581
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.759420871734619,
      "learning_rate": 6.405172407441287e-05,
      "loss": 2.633,
      "step": 181582
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9894750118255615,
      "learning_rate": 6.404919777498535e-05,
      "loss": 2.952,
      "step": 181583
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.2947051525115967,
      "learning_rate": 6.40466715194255e-05,
      "loss": 2.9369,
      "step": 181584
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.5124053955078125,
      "learning_rate": 6.404414530773388e-05,
      "loss": 2.8273,
      "step": 181585
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0511839389801025,
      "learning_rate": 6.404161913991092e-05,
      "loss": 3.0314,
      "step": 181586
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8009865283966064,
      "learning_rate": 6.403909301595707e-05,
      "loss": 2.8474,
      "step": 181587
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6826038360595703,
      "learning_rate": 6.403656693587273e-05,
      "loss": 2.9574,
      "step": 181588
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4904136657714844,
      "learning_rate": 6.403404089965853e-05,
      "loss": 2.8852,
      "step": 181589
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.717837333679199,
      "learning_rate": 6.403151490731477e-05,
      "loss": 3.0437,
      "step": 181590
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6186821460723877,
      "learning_rate": 6.402898895884207e-05,
      "loss": 2.9448,
      "step": 181591
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4273922443389893,
      "learning_rate": 6.402646305424087e-05,
      "loss": 2.7188,
      "step": 181592
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.002398490905762,
      "learning_rate": 6.402393719351148e-05,
      "loss": 3.0051,
      "step": 181593
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.710529088973999,
      "learning_rate": 6.402141137665461e-05,
      "loss": 2.9635,
      "step": 181594
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.880150079727173,
      "learning_rate": 6.40188856036706e-05,
      "loss": 2.9025,
      "step": 181595
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.551680564880371,
      "learning_rate": 6.401635987455986e-05,
      "loss": 3.1136,
      "step": 181596
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2771432399749756,
      "learning_rate": 6.4013834189323e-05,
      "loss": 3.1025,
      "step": 181597
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.4630966186523438,
      "learning_rate": 6.401130854796042e-05,
      "loss": 3.1515,
      "step": 181598
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.6289117336273193,
      "learning_rate": 6.400878295047255e-05,
      "loss": 2.7807,
      "step": 181599
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0270328521728516,
      "learning_rate": 6.400625739685994e-05,
      "loss": 2.8291,
      "step": 181600
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8582608699798584,
      "learning_rate": 6.400373188712307e-05,
      "loss": 2.8084,
      "step": 181601
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.835514783859253,
      "learning_rate": 6.400120642126223e-05,
      "loss": 2.8277,
      "step": 181602
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0446364879608154,
      "learning_rate": 6.399868099927813e-05,
      "loss": 2.9262,
      "step": 181603
    },
    {
      "epoch": 2.36,
      "grad_norm": 5.1232590675354,
      "learning_rate": 6.399615562117113e-05,
      "loss": 2.7998,
      "step": 181604
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.596244812011719,
      "learning_rate": 6.39936302869416e-05,
      "loss": 2.8771,
      "step": 181605
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.846444606781006,
      "learning_rate": 6.39911049965902e-05,
      "loss": 3.319,
      "step": 181606
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.487476348876953,
      "learning_rate": 6.398857975011721e-05,
      "loss": 2.9631,
      "step": 181607
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.702453374862671,
      "learning_rate": 6.398605454752331e-05,
      "loss": 2.8373,
      "step": 181608
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.9026248455047607,
      "learning_rate": 6.398352938880885e-05,
      "loss": 2.8638,
      "step": 181609
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.4845707416534424,
      "learning_rate": 6.398100427397429e-05,
      "loss": 2.9281,
      "step": 181610
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.014120101928711,
      "learning_rate": 6.397847920302006e-05,
      "loss": 3.1191,
      "step": 181611
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.704801559448242,
      "learning_rate": 6.397595417594673e-05,
      "loss": 2.8854,
      "step": 181612
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.610764265060425,
      "learning_rate": 6.397342919275467e-05,
      "loss": 3.0496,
      "step": 181613
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.739737033843994,
      "learning_rate": 6.39709042534445e-05,
      "loss": 3.088,
      "step": 181614
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.86661696434021,
      "learning_rate": 6.396837935801657e-05,
      "loss": 2.7875,
      "step": 181615
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.9939348697662354,
      "learning_rate": 6.396585450647139e-05,
      "loss": 3.0122,
      "step": 181616
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2291502952575684,
      "learning_rate": 6.396332969880932e-05,
      "loss": 2.9375,
      "step": 181617
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.3392035961151123,
      "learning_rate": 6.3960804935031e-05,
      "loss": 2.8596,
      "step": 181618
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.247818470001221,
      "learning_rate": 6.395828021513677e-05,
      "loss": 3.2482,
      "step": 181619
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.659437417984009,
      "learning_rate": 6.395575553912721e-05,
      "loss": 3.1081,
      "step": 181620
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.987835645675659,
      "learning_rate": 6.395323090700272e-05,
      "loss": 3.0373,
      "step": 181621
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.286842107772827,
      "learning_rate": 6.39507063187638e-05,
      "loss": 2.9462,
      "step": 181622
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7029802799224854,
      "learning_rate": 6.39481817744108e-05,
      "loss": 3.2703,
      "step": 181623
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.000860214233398,
      "learning_rate": 6.394565727394439e-05,
      "loss": 2.6256,
      "step": 181624
    },
    {
      "epoch": 2.36,
      "grad_norm": 5.182087421417236,
      "learning_rate": 6.394313281736483e-05,
      "loss": 2.9611,
      "step": 181625
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2161993980407715,
      "learning_rate": 6.39406084046728e-05,
      "loss": 3.0233,
      "step": 181626
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.034590244293213,
      "learning_rate": 6.393808403586859e-05,
      "loss": 2.813,
      "step": 181627
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.8147687911987305,
      "learning_rate": 6.39355597109529e-05,
      "loss": 2.7755,
      "step": 181628
    },
    {
      "epoch": 2.36,
      "grad_norm": 2.7737534046173096,
      "learning_rate": 6.393303542992589e-05,
      "loss": 2.941,
      "step": 181629
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0654122829437256,
      "learning_rate": 6.393051119278825e-05,
      "loss": 2.8419,
      "step": 181630
    },
    {
      "epoch": 2.36,
      "grad_norm": 4.545642852783203,
      "learning_rate": 6.392798699954032e-05,
      "loss": 2.8435,
      "step": 181631
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.2059600353240967,
      "learning_rate": 6.39254628501827e-05,
      "loss": 2.8049,
      "step": 181632
    },
    {
      "epoch": 2.36,
      "grad_norm": 5.268428325653076,
      "learning_rate": 6.392293874471573e-05,
      "loss": 3.0108,
      "step": 181633
    },
    {
      "epoch": 2.36,
      "grad_norm": 3.0375630855560303,
      "learning_rate": 6.392041468314008e-05,
      "loss": 2.9918,
      "step": 181634
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.509201765060425,
      "learning_rate": 6.391789066545592e-05,
      "loss": 2.9898,
      "step": 181635
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.535580635070801,
      "learning_rate": 6.391536669166399e-05,
      "loss": 3.0354,
      "step": 181636
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7975986003875732,
      "learning_rate": 6.391284276176454e-05,
      "loss": 2.8344,
      "step": 181637
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5679564476013184,
      "learning_rate": 6.391031887575825e-05,
      "loss": 2.8195,
      "step": 181638
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.5351884365081787,
      "learning_rate": 6.390779503364539e-05,
      "loss": 2.9584,
      "step": 181639
    },
    {
      "epoch": 2.37,
      "grad_norm": 5.286304473876953,
      "learning_rate": 6.39052712354267e-05,
      "loss": 2.756,
      "step": 181640
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6643545627593994,
      "learning_rate": 6.390274748110231e-05,
      "loss": 2.9888,
      "step": 181641
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.586369752883911,
      "learning_rate": 6.390022377067294e-05,
      "loss": 2.8557,
      "step": 181642
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5501015186309814,
      "learning_rate": 6.389770010413889e-05,
      "loss": 2.8738,
      "step": 181643
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.812559604644775,
      "learning_rate": 6.38951764815008e-05,
      "loss": 2.7049,
      "step": 181644
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.400733470916748,
      "learning_rate": 6.389265290275899e-05,
      "loss": 2.7824,
      "step": 181645
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.659808874130249,
      "learning_rate": 6.389012936791413e-05,
      "loss": 2.9506,
      "step": 181646
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0014700889587402,
      "learning_rate": 6.38876058769664e-05,
      "loss": 3.057,
      "step": 181647
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9135313034057617,
      "learning_rate": 6.38850824299165e-05,
      "loss": 3.0572,
      "step": 181648
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.674515962600708,
      "learning_rate": 6.388255902676475e-05,
      "loss": 2.9459,
      "step": 181649
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.555756092071533,
      "learning_rate": 6.388003566751173e-05,
      "loss": 3.0947,
      "step": 181650
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.051180839538574,
      "learning_rate": 6.387751235215784e-05,
      "loss": 2.7237,
      "step": 181651
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0577504634857178,
      "learning_rate": 6.387498908070372e-05,
      "loss": 2.8562,
      "step": 181652
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7938907146453857,
      "learning_rate": 6.387246585314953e-05,
      "loss": 3.214,
      "step": 181653
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.262714385986328,
      "learning_rate": 6.386994266949598e-05,
      "loss": 2.9455,
      "step": 181654
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9204533100128174,
      "learning_rate": 6.386741952974337e-05,
      "loss": 3.154,
      "step": 181655
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.2046375274658203,
      "learning_rate": 6.386489643389236e-05,
      "loss": 3.0045,
      "step": 181656
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7932040691375732,
      "learning_rate": 6.386237338194326e-05,
      "loss": 2.8792,
      "step": 181657
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.4713234901428223,
      "learning_rate": 6.385985037389675e-05,
      "loss": 2.9714,
      "step": 181658
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.206972122192383,
      "learning_rate": 6.385732740975302e-05,
      "loss": 3.0198,
      "step": 181659
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.743250846862793,
      "learning_rate": 6.385480448951269e-05,
      "loss": 2.7919,
      "step": 181660
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.5840413570404053,
      "learning_rate": 6.385228161317618e-05,
      "loss": 3.1505,
      "step": 181661
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4381508827209473,
      "learning_rate": 6.384975878074405e-05,
      "loss": 2.5802,
      "step": 181662
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8393714427948,
      "learning_rate": 6.384723599221664e-05,
      "loss": 2.7891,
      "step": 181663
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.869908094406128,
      "learning_rate": 6.384471324759463e-05,
      "loss": 2.785,
      "step": 181664
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.754894256591797,
      "learning_rate": 6.384219054687818e-05,
      "loss": 2.8997,
      "step": 181665
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.032104969024658,
      "learning_rate": 6.383966789006801e-05,
      "loss": 3.0311,
      "step": 181666
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.4278457164764404,
      "learning_rate": 6.383714527716444e-05,
      "loss": 3.0495,
      "step": 181667
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9669017791748047,
      "learning_rate": 6.383462270816808e-05,
      "loss": 2.7969,
      "step": 181668
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.77199649810791,
      "learning_rate": 6.383210018307921e-05,
      "loss": 2.9992,
      "step": 181669
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.687011241912842,
      "learning_rate": 6.382957770189854e-05,
      "loss": 3.0482,
      "step": 181670
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3839352130889893,
      "learning_rate": 6.382705526462638e-05,
      "loss": 2.8561,
      "step": 181671
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.696033239364624,
      "learning_rate": 6.382453287126323e-05,
      "loss": 3.0521,
      "step": 181672
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.638227939605713,
      "learning_rate": 6.382201052180946e-05,
      "loss": 2.7603,
      "step": 181673
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.219865083694458,
      "learning_rate": 6.381948821626573e-05,
      "loss": 3.0864,
      "step": 181674
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.374297857284546,
      "learning_rate": 6.381696595463235e-05,
      "loss": 2.7197,
      "step": 181675
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.903752565383911,
      "learning_rate": 6.381444373690994e-05,
      "loss": 3.0061,
      "step": 181676
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.639038562774658,
      "learning_rate": 6.381192156309887e-05,
      "loss": 2.9223,
      "step": 181677
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3856983184814453,
      "learning_rate": 6.380939943319954e-05,
      "loss": 2.9342,
      "step": 181678
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.307887315750122,
      "learning_rate": 6.380687734721259e-05,
      "loss": 2.998,
      "step": 181679
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7821972370147705,
      "learning_rate": 6.380435530513837e-05,
      "loss": 3.0408,
      "step": 181680
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.593048572540283,
      "learning_rate": 6.380183330697733e-05,
      "loss": 2.9896,
      "step": 181681
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6229474544525146,
      "learning_rate": 6.379931135273006e-05,
      "loss": 2.8699,
      "step": 181682
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3547282218933105,
      "learning_rate": 6.379678944239696e-05,
      "loss": 3.0084,
      "step": 181683
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6736648082733154,
      "learning_rate": 6.37942675759784e-05,
      "loss": 3.0766,
      "step": 181684
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.287729263305664,
      "learning_rate": 6.379174575347505e-05,
      "loss": 2.8059,
      "step": 181685
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.528620481491089,
      "learning_rate": 6.378922397488723e-05,
      "loss": 2.8669,
      "step": 181686
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.813401699066162,
      "learning_rate": 6.378670224021542e-05,
      "loss": 2.9724,
      "step": 181687
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7944343090057373,
      "learning_rate": 6.378418054946017e-05,
      "loss": 3.0709,
      "step": 181688
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.9369471073150635,
      "learning_rate": 6.378165890262191e-05,
      "loss": 3.0608,
      "step": 181689
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.076412677764893,
      "learning_rate": 6.377913729970103e-05,
      "loss": 2.8758,
      "step": 181690
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.066452741622925,
      "learning_rate": 6.377661574069813e-05,
      "loss": 3.0597,
      "step": 181691
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7723546028137207,
      "learning_rate": 6.377409422561353e-05,
      "loss": 2.9719,
      "step": 181692
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.9317808151245117,
      "learning_rate": 6.377157275444787e-05,
      "loss": 3.0424,
      "step": 181693
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9539737701416016,
      "learning_rate": 6.376905132720154e-05,
      "loss": 2.836,
      "step": 181694
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.4899990558624268,
      "learning_rate": 6.376652994387502e-05,
      "loss": 2.8624,
      "step": 181695
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.68888258934021,
      "learning_rate": 6.376400860446867e-05,
      "loss": 2.9812,
      "step": 181696
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.6243977546691895,
      "learning_rate": 6.376148730898314e-05,
      "loss": 2.8603,
      "step": 181697
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.375105619430542,
      "learning_rate": 6.375896605741872e-05,
      "loss": 3.227,
      "step": 181698
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.852295160293579,
      "learning_rate": 6.375644484977602e-05,
      "loss": 2.917,
      "step": 181699
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6231024265289307,
      "learning_rate": 6.37539236860555e-05,
      "loss": 3.2209,
      "step": 181700
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.627772569656372,
      "learning_rate": 6.375140256625753e-05,
      "loss": 2.916,
      "step": 181701
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.544929265975952,
      "learning_rate": 6.374888149038261e-05,
      "loss": 3.0352,
      "step": 181702
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.381755828857422,
      "learning_rate": 6.374636045843127e-05,
      "loss": 2.7666,
      "step": 181703
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.848444938659668,
      "learning_rate": 6.374383947040388e-05,
      "loss": 3.0273,
      "step": 181704
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0755417346954346,
      "learning_rate": 6.374131852630108e-05,
      "loss": 2.8257,
      "step": 181705
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8661293983459473,
      "learning_rate": 6.37387976261231e-05,
      "loss": 2.8978,
      "step": 181706
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.152961015701294,
      "learning_rate": 6.373627676987074e-05,
      "loss": 2.9437,
      "step": 181707
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.277773141860962,
      "learning_rate": 6.373375595754405e-05,
      "loss": 3.1862,
      "step": 181708
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.2789201736450195,
      "learning_rate": 6.373123518914384e-05,
      "loss": 2.9922,
      "step": 181709
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.5404908657073975,
      "learning_rate": 6.372871446467035e-05,
      "loss": 2.9063,
      "step": 181710
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3395328521728516,
      "learning_rate": 6.372619378412425e-05,
      "loss": 3.1504,
      "step": 181711
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.4579977989196777,
      "learning_rate": 6.372367314750582e-05,
      "loss": 2.9388,
      "step": 181712
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.615017890930176,
      "learning_rate": 6.37211525548158e-05,
      "loss": 2.8196,
      "step": 181713
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.6008946895599365,
      "learning_rate": 6.371863200605428e-05,
      "loss": 2.999,
      "step": 181714
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3109028339385986,
      "learning_rate": 6.371611150122203e-05,
      "loss": 3.0358,
      "step": 181715
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.769049882888794,
      "learning_rate": 6.371359104031934e-05,
      "loss": 2.8832,
      "step": 181716
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.562617063522339,
      "learning_rate": 6.371107062334681e-05,
      "loss": 2.7628,
      "step": 181717
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.5249249935150146,
      "learning_rate": 6.370855025030481e-05,
      "loss": 2.7725,
      "step": 181718
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.388667583465576,
      "learning_rate": 6.3706029921194e-05,
      "loss": 2.9045,
      "step": 181719
    },
    {
      "epoch": 2.37,
      "grad_norm": 5.231540203094482,
      "learning_rate": 6.370350963601451e-05,
      "loss": 2.8782,
      "step": 181720
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.9016497135162354,
      "learning_rate": 6.370098939476709e-05,
      "loss": 3.059,
      "step": 181721
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0396840572357178,
      "learning_rate": 6.369846919745205e-05,
      "loss": 3.0941,
      "step": 181722
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4361519813537598,
      "learning_rate": 6.369594904407001e-05,
      "loss": 3.0114,
      "step": 181723
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7552146911621094,
      "learning_rate": 6.369342893462125e-05,
      "loss": 2.9689,
      "step": 181724
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.30775260925293,
      "learning_rate": 6.369090886910652e-05,
      "loss": 2.8273,
      "step": 181725
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.337298631668091,
      "learning_rate": 6.368838884752595e-05,
      "loss": 3.0028,
      "step": 181726
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.989450454711914,
      "learning_rate": 6.368586886988026e-05,
      "loss": 3.0396,
      "step": 181727
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.772573471069336,
      "learning_rate": 6.368334893616972e-05,
      "loss": 3.1932,
      "step": 181728
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.648176670074463,
      "learning_rate": 6.368082904639497e-05,
      "loss": 2.8864,
      "step": 181729
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.5787136554718018,
      "learning_rate": 6.367830920055639e-05,
      "loss": 2.8702,
      "step": 181730
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9216129779815674,
      "learning_rate": 6.367578939865459e-05,
      "loss": 2.9353,
      "step": 181731
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.829258918762207,
      "learning_rate": 6.367326964068977e-05,
      "loss": 2.8321,
      "step": 181732
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.261772871017456,
      "learning_rate": 6.367074992666264e-05,
      "loss": 2.9617,
      "step": 181733
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.71871280670166,
      "learning_rate": 6.36682302565735e-05,
      "loss": 2.9642,
      "step": 181734
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.750154733657837,
      "learning_rate": 6.366571063042298e-05,
      "loss": 3.0148,
      "step": 181735
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.321650266647339,
      "learning_rate": 6.366319104821138e-05,
      "loss": 2.8693,
      "step": 181736
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.6058406829833984,
      "learning_rate": 6.36606715099394e-05,
      "loss": 2.8267,
      "step": 181737
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.3838112354278564,
      "learning_rate": 6.365815201560723e-05,
      "loss": 2.8498,
      "step": 181738
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8723464012145996,
      "learning_rate": 6.365563256521553e-05,
      "loss": 2.8381,
      "step": 181739
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.470290184020996,
      "learning_rate": 6.365311315876462e-05,
      "loss": 2.976,
      "step": 181740
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5081992149353027,
      "learning_rate": 6.365059379625518e-05,
      "loss": 3.0408,
      "step": 181741
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.674295425415039,
      "learning_rate": 6.364807447768746e-05,
      "loss": 2.9913,
      "step": 181742
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.710574150085449,
      "learning_rate": 6.364555520306217e-05,
      "loss": 2.9591,
      "step": 181743
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9967286586761475,
      "learning_rate": 6.364303597237948e-05,
      "loss": 2.9293,
      "step": 181744
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.559271335601807,
      "learning_rate": 6.364051678564006e-05,
      "loss": 2.8695,
      "step": 181745
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6945011615753174,
      "learning_rate": 6.36379976428443e-05,
      "loss": 3.083,
      "step": 181746
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.537766933441162,
      "learning_rate": 6.363547854399275e-05,
      "loss": 3.0977,
      "step": 181747
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4991185665130615,
      "learning_rate": 6.363295948908575e-05,
      "loss": 2.8049,
      "step": 181748
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9377825260162354,
      "learning_rate": 6.363044047812399e-05,
      "loss": 2.8541,
      "step": 181749
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.233410120010376,
      "learning_rate": 6.362792151110765e-05,
      "loss": 2.9516,
      "step": 181750
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8392107486724854,
      "learning_rate": 6.362540258803743e-05,
      "loss": 2.98,
      "step": 181751
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.413360834121704,
      "learning_rate": 6.36228837089136e-05,
      "loss": 2.9195,
      "step": 181752
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.670553684234619,
      "learning_rate": 6.362036487373687e-05,
      "loss": 2.8326,
      "step": 181753
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0552241802215576,
      "learning_rate": 6.361784608250745e-05,
      "loss": 2.8648,
      "step": 181754
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6276133060455322,
      "learning_rate": 6.361532733522604e-05,
      "loss": 2.9224,
      "step": 181755
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7644968032836914,
      "learning_rate": 6.361280863189297e-05,
      "loss": 3.0993,
      "step": 181756
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.676490545272827,
      "learning_rate": 6.361028997250878e-05,
      "loss": 2.9375,
      "step": 181757
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0585570335388184,
      "learning_rate": 6.360777135707378e-05,
      "loss": 2.9396,
      "step": 181758
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.687140941619873,
      "learning_rate": 6.360525278558868e-05,
      "loss": 3.0707,
      "step": 181759
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.834688901901245,
      "learning_rate": 6.360273425805372e-05,
      "loss": 2.8814,
      "step": 181760
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8808987140655518,
      "learning_rate": 6.360021577446954e-05,
      "loss": 2.8141,
      "step": 181761
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.2680604457855225,
      "learning_rate": 6.359769733483657e-05,
      "loss": 2.7342,
      "step": 181762
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8059983253479004,
      "learning_rate": 6.359517893915524e-05,
      "loss": 2.8827,
      "step": 181763
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.7381019592285156,
      "learning_rate": 6.359266058742596e-05,
      "loss": 2.9134,
      "step": 181764
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.1783175468444824,
      "learning_rate": 6.359014227964934e-05,
      "loss": 3.1492,
      "step": 181765
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3105125427246094,
      "learning_rate": 6.358762401582568e-05,
      "loss": 3.1148,
      "step": 181766
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.620361328125,
      "learning_rate": 6.358510579595565e-05,
      "loss": 2.8115,
      "step": 181767
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.369729995727539,
      "learning_rate": 6.35825876200396e-05,
      "loss": 2.8675,
      "step": 181768
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.920151710510254,
      "learning_rate": 6.358006948807791e-05,
      "loss": 2.7801,
      "step": 181769
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8467137813568115,
      "learning_rate": 6.357755140007126e-05,
      "loss": 2.8659,
      "step": 181770
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9197745323181152,
      "learning_rate": 6.357503335602e-05,
      "loss": 2.5682,
      "step": 181771
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7587547302246094,
      "learning_rate": 6.35725153559245e-05,
      "loss": 2.7601,
      "step": 181772
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.953465461730957,
      "learning_rate": 6.356999739978544e-05,
      "loss": 2.9226,
      "step": 181773
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.142719030380249,
      "learning_rate": 6.356747948760316e-05,
      "loss": 2.9236,
      "step": 181774
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.699586868286133,
      "learning_rate": 6.356496161937807e-05,
      "loss": 3.0632,
      "step": 181775
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.805043935775757,
      "learning_rate": 6.35624437951108e-05,
      "loss": 3.0569,
      "step": 181776
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.485569477081299,
      "learning_rate": 6.355992601480166e-05,
      "loss": 2.8917,
      "step": 181777
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.590294599533081,
      "learning_rate": 6.355740827845128e-05,
      "loss": 2.6492,
      "step": 181778
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3240370750427246,
      "learning_rate": 6.355489058606001e-05,
      "loss": 2.8667,
      "step": 181779
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.441699504852295,
      "learning_rate": 6.355237293762839e-05,
      "loss": 2.9741,
      "step": 181780
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.667210102081299,
      "learning_rate": 6.354985533315676e-05,
      "loss": 3.041,
      "step": 181781
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4686005115509033,
      "learning_rate": 6.354733777264573e-05,
      "loss": 3.0963,
      "step": 181782
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.6176180839538574,
      "learning_rate": 6.354482025609563e-05,
      "loss": 2.8582,
      "step": 181783
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7418015003204346,
      "learning_rate": 6.354230278350714e-05,
      "loss": 2.8591,
      "step": 181784
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7786178588867188,
      "learning_rate": 6.353978535488055e-05,
      "loss": 3.0025,
      "step": 181785
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0131850242614746,
      "learning_rate": 6.353726797021638e-05,
      "loss": 3.0593,
      "step": 181786
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.3394839763641357,
      "learning_rate": 6.353475062951503e-05,
      "loss": 2.9414,
      "step": 181787
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.389106035232544,
      "learning_rate": 6.353223333277713e-05,
      "loss": 2.9269,
      "step": 181788
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.839674711227417,
      "learning_rate": 6.352971608000295e-05,
      "loss": 3.0564,
      "step": 181789
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.971130609512329,
      "learning_rate": 6.352719887119316e-05,
      "loss": 2.7029,
      "step": 181790
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.683875560760498,
      "learning_rate": 6.352468170634803e-05,
      "loss": 2.9285,
      "step": 181791
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5657308101654053,
      "learning_rate": 6.35221645854683e-05,
      "loss": 2.9907,
      "step": 181792
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.9870190620422363,
      "learning_rate": 6.351964750855406e-05,
      "loss": 2.8102,
      "step": 181793
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.1911046504974365,
      "learning_rate": 6.351713047560609e-05,
      "loss": 3.0425,
      "step": 181794
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.574714660644531,
      "learning_rate": 6.351461348662469e-05,
      "loss": 2.9725,
      "step": 181795
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.069511890411377,
      "learning_rate": 6.351209654161045e-05,
      "loss": 2.9509,
      "step": 181796
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.805250644683838,
      "learning_rate": 6.350957964056367e-05,
      "loss": 3.1017,
      "step": 181797
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.026947498321533,
      "learning_rate": 6.350706278348512e-05,
      "loss": 2.8296,
      "step": 181798
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5069162845611572,
      "learning_rate": 6.35045459703749e-05,
      "loss": 2.8755,
      "step": 181799
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.713578701019287,
      "learning_rate": 6.350202920123375e-05,
      "loss": 3.0187,
      "step": 181800
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5086748600006104,
      "learning_rate": 6.349951247606191e-05,
      "loss": 2.9089,
      "step": 181801
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.356004238128662,
      "learning_rate": 6.349699579486007e-05,
      "loss": 3.1637,
      "step": 181802
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.607649564743042,
      "learning_rate": 6.349447915762856e-05,
      "loss": 2.82,
      "step": 181803
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.903153419494629,
      "learning_rate": 6.349196256436802e-05,
      "loss": 2.7218,
      "step": 181804
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.539080858230591,
      "learning_rate": 6.348944601507863e-05,
      "loss": 2.8518,
      "step": 181805
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6664845943450928,
      "learning_rate": 6.34869295097611e-05,
      "loss": 2.7404,
      "step": 181806
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.671393871307373,
      "learning_rate": 6.348441304841573e-05,
      "loss": 3.2087,
      "step": 181807
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.2231180667877197,
      "learning_rate": 6.348189663104315e-05,
      "loss": 2.98,
      "step": 181808
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.329308032989502,
      "learning_rate": 6.347938025764366e-05,
      "loss": 3.0473,
      "step": 181809
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.2177441120147705,
      "learning_rate": 6.3476863928218e-05,
      "loss": 3.0504,
      "step": 181810
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5365169048309326,
      "learning_rate": 6.34743476427663e-05,
      "loss": 3.087,
      "step": 181811
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.236889600753784,
      "learning_rate": 6.347183140128925e-05,
      "loss": 2.9358,
      "step": 181812
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.408189296722412,
      "learning_rate": 6.346931520378717e-05,
      "loss": 2.8153,
      "step": 181813
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6420657634735107,
      "learning_rate": 6.34667990502607e-05,
      "loss": 2.7278,
      "step": 181814
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7809464931488037,
      "learning_rate": 6.346428294071011e-05,
      "loss": 2.9686,
      "step": 181815
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7475175857543945,
      "learning_rate": 6.346176687513617e-05,
      "loss": 2.8929,
      "step": 181816
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8026740550994873,
      "learning_rate": 6.345925085353897e-05,
      "loss": 3.163,
      "step": 181817
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.1591923236846924,
      "learning_rate": 6.345673487591926e-05,
      "loss": 3.0189,
      "step": 181818
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.422841787338257,
      "learning_rate": 6.34542189422773e-05,
      "loss": 2.9749,
      "step": 181819
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.3585283756256104,
      "learning_rate": 6.345170305261377e-05,
      "loss": 2.9476,
      "step": 181820
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.1052825450897217,
      "learning_rate": 6.344918720692895e-05,
      "loss": 2.7985,
      "step": 181821
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6095516681671143,
      "learning_rate": 6.344667140522356e-05,
      "loss": 2.9098,
      "step": 181822
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.852501630783081,
      "learning_rate": 6.344415564749772e-05,
      "loss": 2.7891,
      "step": 181823
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.484299898147583,
      "learning_rate": 6.344163993375217e-05,
      "loss": 2.8701,
      "step": 181824
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4197049140930176,
      "learning_rate": 6.343912426398716e-05,
      "loss": 3.17,
      "step": 181825
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6012022495269775,
      "learning_rate": 6.343660863820342e-05,
      "loss": 2.861,
      "step": 181826
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.404853105545044,
      "learning_rate": 6.343409305640118e-05,
      "loss": 2.9218,
      "step": 181827
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.883613348007202,
      "learning_rate": 6.343157751858106e-05,
      "loss": 2.9007,
      "step": 181828
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.983423948287964,
      "learning_rate": 6.342906202474354e-05,
      "loss": 2.9515,
      "step": 181829
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.337315797805786,
      "learning_rate": 6.342654657488896e-05,
      "loss": 2.8705,
      "step": 181830
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.474418878555298,
      "learning_rate": 6.34240311690178e-05,
      "loss": 2.8201,
      "step": 181831
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8339760303497314,
      "learning_rate": 6.342151580713065e-05,
      "loss": 2.7153,
      "step": 181832
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7981808185577393,
      "learning_rate": 6.341900048922782e-05,
      "loss": 2.9697,
      "step": 181833
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.550060987472534,
      "learning_rate": 6.341648521530998e-05,
      "loss": 3.1498,
      "step": 181834
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.3276984691619873,
      "learning_rate": 6.341396998537744e-05,
      "loss": 2.9136,
      "step": 181835
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.795844316482544,
      "learning_rate": 6.341145479943073e-05,
      "loss": 3.0412,
      "step": 181836
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6645092964172363,
      "learning_rate": 6.340893965747023e-05,
      "loss": 2.7488,
      "step": 181837
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9212517738342285,
      "learning_rate": 6.340642455949651e-05,
      "loss": 2.746,
      "step": 181838
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.828545331954956,
      "learning_rate": 6.340390950550997e-05,
      "loss": 2.8909,
      "step": 181839
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.34702205657959,
      "learning_rate": 6.340139449551114e-05,
      "loss": 2.842,
      "step": 181840
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7077815532684326,
      "learning_rate": 6.339887952950049e-05,
      "loss": 2.8714,
      "step": 181841
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8350532054901123,
      "learning_rate": 6.339636460747846e-05,
      "loss": 3.2395,
      "step": 181842
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.3188273906707764,
      "learning_rate": 6.33938497294454e-05,
      "loss": 2.7859,
      "step": 181843
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.5441417694091797,
      "learning_rate": 6.3391334895402e-05,
      "loss": 2.7943,
      "step": 181844
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7643213272094727,
      "learning_rate": 6.338882010534852e-05,
      "loss": 3.0567,
      "step": 181845
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.130040645599365,
      "learning_rate": 6.33863053592856e-05,
      "loss": 2.9211,
      "step": 181846
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7250189781188965,
      "learning_rate": 6.338379065721366e-05,
      "loss": 2.8468,
      "step": 181847
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7416841983795166,
      "learning_rate": 6.338127599913313e-05,
      "loss": 2.8413,
      "step": 181848
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.735182523727417,
      "learning_rate": 6.337876138504437e-05,
      "loss": 2.8651,
      "step": 181849
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5364933013916016,
      "learning_rate": 6.337624681494809e-05,
      "loss": 3.2164,
      "step": 181850
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.655609369277954,
      "learning_rate": 6.337373228884453e-05,
      "loss": 3.0232,
      "step": 181851
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.082512378692627,
      "learning_rate": 6.337121780673436e-05,
      "loss": 3.0244,
      "step": 181852
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4433157444000244,
      "learning_rate": 6.336870336861791e-05,
      "loss": 3.0377,
      "step": 181853
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.5150339603424072,
      "learning_rate": 6.336618897449564e-05,
      "loss": 2.9121,
      "step": 181854
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7362637519836426,
      "learning_rate": 6.336367462436814e-05,
      "loss": 3.0239,
      "step": 181855
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.776578664779663,
      "learning_rate": 6.336116031823577e-05,
      "loss": 3.0346,
      "step": 181856
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8005523681640625,
      "learning_rate": 6.335864605609898e-05,
      "loss": 3.165,
      "step": 181857
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.665187120437622,
      "learning_rate": 6.335613183795836e-05,
      "loss": 3.0803,
      "step": 181858
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6718497276306152,
      "learning_rate": 6.335361766381429e-05,
      "loss": 2.7683,
      "step": 181859
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.545696258544922,
      "learning_rate": 6.335110353366716e-05,
      "loss": 3.0457,
      "step": 181860
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6190004348754883,
      "learning_rate": 6.334858944751763e-05,
      "loss": 2.7837,
      "step": 181861
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6158735752105713,
      "learning_rate": 6.334607540536607e-05,
      "loss": 3.1337,
      "step": 181862
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4589500427246094,
      "learning_rate": 6.334356140721285e-05,
      "loss": 2.8438,
      "step": 181863
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.55802059173584,
      "learning_rate": 6.33410474530586e-05,
      "loss": 3.0408,
      "step": 181864
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9600133895874023,
      "learning_rate": 6.333853354290373e-05,
      "loss": 2.8571,
      "step": 181865
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.633194923400879,
      "learning_rate": 6.333601967674858e-05,
      "loss": 2.8452,
      "step": 181866
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7586851119995117,
      "learning_rate": 6.333350585459385e-05,
      "loss": 3.0284,
      "step": 181867
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4672157764434814,
      "learning_rate": 6.333099207643982e-05,
      "loss": 2.9313,
      "step": 181868
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.175023317337036,
      "learning_rate": 6.332847834228705e-05,
      "loss": 2.8052,
      "step": 181869
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.591897964477539,
      "learning_rate": 6.332596465213602e-05,
      "loss": 2.9357,
      "step": 181870
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7429192066192627,
      "learning_rate": 6.332345100598716e-05,
      "loss": 3.0262,
      "step": 181871
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9027788639068604,
      "learning_rate": 6.332093740384087e-05,
      "loss": 2.8393,
      "step": 181872
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.134063243865967,
      "learning_rate": 6.331842384569774e-05,
      "loss": 2.9874,
      "step": 181873
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7986881732940674,
      "learning_rate": 6.331591033155811e-05,
      "loss": 2.9842,
      "step": 181874
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.811721086502075,
      "learning_rate": 6.33133968614226e-05,
      "loss": 2.9984,
      "step": 181875
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.435800075531006,
      "learning_rate": 6.331088343529154e-05,
      "loss": 2.8553,
      "step": 181876
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8162729740142822,
      "learning_rate": 6.330837005316558e-05,
      "loss": 2.9485,
      "step": 181877
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6891732215881348,
      "learning_rate": 6.330585671504493e-05,
      "loss": 2.7249,
      "step": 181878
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9077322483062744,
      "learning_rate": 6.330334342093027e-05,
      "loss": 2.9073,
      "step": 181879
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.1067135334014893,
      "learning_rate": 6.330083017082186e-05,
      "loss": 3.091,
      "step": 181880
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8400096893310547,
      "learning_rate": 6.329831696472041e-05,
      "loss": 2.9608,
      "step": 181881
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.868518590927124,
      "learning_rate": 6.329580380262618e-05,
      "loss": 2.9575,
      "step": 181882
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.093863010406494,
      "learning_rate": 6.329329068453991e-05,
      "loss": 2.8089,
      "step": 181883
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.938136339187622,
      "learning_rate": 6.329077761046171e-05,
      "loss": 3.0337,
      "step": 181884
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6816234588623047,
      "learning_rate": 6.32882645803923e-05,
      "loss": 2.7649,
      "step": 181885
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0387070178985596,
      "learning_rate": 6.328575159433199e-05,
      "loss": 2.8954,
      "step": 181886
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.675771474838257,
      "learning_rate": 6.32832386522814e-05,
      "loss": 2.7209,
      "step": 181887
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.4802775382995605,
      "learning_rate": 6.328072575424086e-05,
      "loss": 2.6588,
      "step": 181888
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.3628294467926025,
      "learning_rate": 6.327821290021106e-05,
      "loss": 2.8142,
      "step": 181889
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.212454080581665,
      "learning_rate": 6.327570009019218e-05,
      "loss": 3.0772,
      "step": 181890
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.641178846359253,
      "learning_rate": 6.327318732418484e-05,
      "loss": 3.0444,
      "step": 181891
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7997074127197266,
      "learning_rate": 6.327067460218943e-05,
      "loss": 3.053,
      "step": 181892
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7796525955200195,
      "learning_rate": 6.326816192420655e-05,
      "loss": 3.1633,
      "step": 181893
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4913856983184814,
      "learning_rate": 6.32656492902365e-05,
      "loss": 3.1976,
      "step": 181894
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4411420822143555,
      "learning_rate": 6.32631367002799e-05,
      "loss": 2.9615,
      "step": 181895
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8324921131134033,
      "learning_rate": 6.326062415433717e-05,
      "loss": 2.8989,
      "step": 181896
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7313761711120605,
      "learning_rate": 6.325811165240876e-05,
      "loss": 2.7838,
      "step": 181897
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.112183094024658,
      "learning_rate": 6.325559919449505e-05,
      "loss": 3.1356,
      "step": 181898
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.032375812530518,
      "learning_rate": 6.325308678059668e-05,
      "loss": 3.0373,
      "step": 181899
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3296217918395996,
      "learning_rate": 6.325057441071395e-05,
      "loss": 2.8144,
      "step": 181900
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6210289001464844,
      "learning_rate": 6.324806208484748e-05,
      "loss": 2.8126,
      "step": 181901
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7747137546539307,
      "learning_rate": 6.324554980299765e-05,
      "loss": 2.9767,
      "step": 181902
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.502526044845581,
      "learning_rate": 6.324303756516496e-05,
      "loss": 2.9379,
      "step": 181903
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5448038578033447,
      "learning_rate": 6.324052537134979e-05,
      "loss": 3.1473,
      "step": 181904
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.455142021179199,
      "learning_rate": 6.323801322155272e-05,
      "loss": 3.1771,
      "step": 181905
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.1400983333587646,
      "learning_rate": 6.323550111577413e-05,
      "loss": 3.0297,
      "step": 181906
    },
    {
      "epoch": 2.37,
      "grad_norm": 5.071937561035156,
      "learning_rate": 6.32329890540146e-05,
      "loss": 2.9094,
      "step": 181907
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.742682695388794,
      "learning_rate": 6.323047703627451e-05,
      "loss": 2.9816,
      "step": 181908
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.4541733264923096,
      "learning_rate": 6.322796506255438e-05,
      "loss": 2.834,
      "step": 181909
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.158332347869873,
      "learning_rate": 6.322545313285453e-05,
      "loss": 2.9147,
      "step": 181910
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.404052972793579,
      "learning_rate": 6.32229412471756e-05,
      "loss": 2.9042,
      "step": 181911
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3195576667785645,
      "learning_rate": 6.322042940551797e-05,
      "loss": 3.1125,
      "step": 181912
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.3657121658325195,
      "learning_rate": 6.321791760788215e-05,
      "loss": 2.9261,
      "step": 181913
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6625585556030273,
      "learning_rate": 6.321540585426862e-05,
      "loss": 2.8225,
      "step": 181914
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.536600112915039,
      "learning_rate": 6.321289414467784e-05,
      "loss": 2.6542,
      "step": 181915
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.972886800765991,
      "learning_rate": 6.321038247911015e-05,
      "loss": 2.8938,
      "step": 181916
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.829134225845337,
      "learning_rate": 6.320787085756616e-05,
      "loss": 2.8556,
      "step": 181917
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.179169178009033,
      "learning_rate": 6.320535928004628e-05,
      "loss": 2.9296,
      "step": 181918
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.423186779022217,
      "learning_rate": 6.320284774655103e-05,
      "loss": 3.0419,
      "step": 181919
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9815545082092285,
      "learning_rate": 6.320033625708087e-05,
      "loss": 2.8143,
      "step": 181920
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.053694725036621,
      "learning_rate": 6.319782481163624e-05,
      "loss": 2.6992,
      "step": 181921
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8866522312164307,
      "learning_rate": 6.31953134102175e-05,
      "loss": 2.8185,
      "step": 181922
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.78562593460083,
      "learning_rate": 6.319280205282529e-05,
      "loss": 2.9927,
      "step": 181923
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.867891788482666,
      "learning_rate": 6.319029073945997e-05,
      "loss": 3.0764,
      "step": 181924
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3966474533081055,
      "learning_rate": 6.318777947012209e-05,
      "loss": 2.5855,
      "step": 181925
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.528618335723877,
      "learning_rate": 6.318526824481209e-05,
      "loss": 3.1566,
      "step": 181926
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.53692364692688,
      "learning_rate": 6.318275706353042e-05,
      "loss": 2.775,
      "step": 181927
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.297591686248779,
      "learning_rate": 6.318024592627747e-05,
      "loss": 3.0606,
      "step": 181928
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.1458587646484375,
      "learning_rate": 6.317773483305384e-05,
      "loss": 2.9523,
      "step": 181929
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.798957586288452,
      "learning_rate": 6.317522378385987e-05,
      "loss": 2.8821,
      "step": 181930
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.243030309677124,
      "learning_rate": 6.317271277869617e-05,
      "loss": 3.0334,
      "step": 181931
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.2967140674591064,
      "learning_rate": 6.317020181756316e-05,
      "loss": 3.0475,
      "step": 181932
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.163494348526001,
      "learning_rate": 6.316769090046126e-05,
      "loss": 2.8145,
      "step": 181933
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.1834499835968018,
      "learning_rate": 6.316518002739086e-05,
      "loss": 3.1111,
      "step": 181934
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.733851432800293,
      "learning_rate": 6.316266919835265e-05,
      "loss": 3.1234,
      "step": 181935
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3307435512542725,
      "learning_rate": 6.316015841334684e-05,
      "loss": 2.7881,
      "step": 181936
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6771249771118164,
      "learning_rate": 6.315764767237414e-05,
      "loss": 2.8995,
      "step": 181937
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6806094646453857,
      "learning_rate": 6.315513697543489e-05,
      "loss": 3.1078,
      "step": 181938
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.452498197555542,
      "learning_rate": 6.315262632252949e-05,
      "loss": 3.0421,
      "step": 181939
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.27641224861145,
      "learning_rate": 6.315011571365859e-05,
      "loss": 2.9599,
      "step": 181940
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.308130979537964,
      "learning_rate": 6.314760514882253e-05,
      "loss": 3.1135,
      "step": 181941
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9048657417297363,
      "learning_rate": 6.314509462802172e-05,
      "loss": 3.1034,
      "step": 181942
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8884458541870117,
      "learning_rate": 6.314258415125681e-05,
      "loss": 2.8882,
      "step": 181943
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4032788276672363,
      "learning_rate": 6.314007371852814e-05,
      "loss": 2.7944,
      "step": 181944
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.249479055404663,
      "learning_rate": 6.313756332983612e-05,
      "loss": 3.0083,
      "step": 181945
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.6261093616485596,
      "learning_rate": 6.313505298518141e-05,
      "loss": 3.1538,
      "step": 181946
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6632840633392334,
      "learning_rate": 6.313254268456434e-05,
      "loss": 2.9693,
      "step": 181947
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4700238704681396,
      "learning_rate": 6.313003242798533e-05,
      "loss": 2.7137,
      "step": 181948
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6539361476898193,
      "learning_rate": 6.312752221544501e-05,
      "loss": 2.9823,
      "step": 181949
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5539698600769043,
      "learning_rate": 6.312501204694371e-05,
      "loss": 2.7656,
      "step": 181950
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.8229453563690186,
      "learning_rate": 6.31225019224819e-05,
      "loss": 2.9167,
      "step": 181951
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9859349727630615,
      "learning_rate": 6.311999184206015e-05,
      "loss": 3.2051,
      "step": 181952
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7254083156585693,
      "learning_rate": 6.311748180567881e-05,
      "loss": 3.055,
      "step": 181953
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5537452697753906,
      "learning_rate": 6.311497181333847e-05,
      "loss": 2.9841,
      "step": 181954
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.1003522872924805,
      "learning_rate": 6.31124618650395e-05,
      "loss": 2.7642,
      "step": 181955
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.593445301055908,
      "learning_rate": 6.310995196078243e-05,
      "loss": 2.8931,
      "step": 181956
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.3886566162109375,
      "learning_rate": 6.310744210056761e-05,
      "loss": 3.1026,
      "step": 181957
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0508930683135986,
      "learning_rate": 6.310493228439565e-05,
      "loss": 3.0213,
      "step": 181958
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.495131492614746,
      "learning_rate": 6.310242251226689e-05,
      "loss": 2.9706,
      "step": 181959
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6908326148986816,
      "learning_rate": 6.309991278418194e-05,
      "loss": 3.0559,
      "step": 181960
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.894007921218872,
      "learning_rate": 6.309740310014109e-05,
      "loss": 2.9327,
      "step": 181961
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.839163064956665,
      "learning_rate": 6.3094893460145e-05,
      "loss": 2.8032,
      "step": 181962
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9495692253112793,
      "learning_rate": 6.309238386419405e-05,
      "loss": 2.8927,
      "step": 181963
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.946406126022339,
      "learning_rate": 6.308987431228867e-05,
      "loss": 2.7836,
      "step": 181964
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3767876625061035,
      "learning_rate": 6.308736480442929e-05,
      "loss": 2.9285,
      "step": 181965
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6724634170532227,
      "learning_rate": 6.308485534061654e-05,
      "loss": 3.0276,
      "step": 181966
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0418028831481934,
      "learning_rate": 6.308234592085068e-05,
      "loss": 3.1049,
      "step": 181967
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.555905818939209,
      "learning_rate": 6.307983654513237e-05,
      "loss": 3.1526,
      "step": 181968
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3328230381011963,
      "learning_rate": 6.307732721346202e-05,
      "loss": 2.8279,
      "step": 181969
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6691102981567383,
      "learning_rate": 6.307481792584003e-05,
      "loss": 2.9448,
      "step": 181970
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.802692413330078,
      "learning_rate": 6.307230868226684e-05,
      "loss": 2.9393,
      "step": 181971
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7550737857818604,
      "learning_rate": 6.306979948274304e-05,
      "loss": 2.7203,
      "step": 181972
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.1709823608398438,
      "learning_rate": 6.306729032726898e-05,
      "loss": 2.8839,
      "step": 181973
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.723952054977417,
      "learning_rate": 6.306478121584525e-05,
      "loss": 2.7421,
      "step": 181974
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.315830707550049,
      "learning_rate": 6.306227214847224e-05,
      "loss": 2.9958,
      "step": 181975
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5562689304351807,
      "learning_rate": 6.305976312515048e-05,
      "loss": 2.7682,
      "step": 181976
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6571245193481445,
      "learning_rate": 6.305725414588023e-05,
      "loss": 3.0743,
      "step": 181977
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.5562281608581543,
      "learning_rate": 6.305474521066224e-05,
      "loss": 2.7255,
      "step": 181978
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.473541498184204,
      "learning_rate": 6.305223631949673e-05,
      "loss": 3.0092,
      "step": 181979
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8346030712127686,
      "learning_rate": 6.30497274723844e-05,
      "loss": 2.9989,
      "step": 181980
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6296701431274414,
      "learning_rate": 6.304721866932557e-05,
      "loss": 3.15,
      "step": 181981
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.6633217334747314,
      "learning_rate": 6.304470991032073e-05,
      "loss": 3.1897,
      "step": 181982
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.640378952026367,
      "learning_rate": 6.304220119537028e-05,
      "loss": 2.9094,
      "step": 181983
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.5708258152008057,
      "learning_rate": 6.303969252447488e-05,
      "loss": 2.8277,
      "step": 181984
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8856446743011475,
      "learning_rate": 6.303718389763475e-05,
      "loss": 3.076,
      "step": 181985
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0359911918640137,
      "learning_rate": 6.303467531485054e-05,
      "loss": 2.8835,
      "step": 181986
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.937143087387085,
      "learning_rate": 6.30321667761227e-05,
      "loss": 2.8355,
      "step": 181987
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5866518020629883,
      "learning_rate": 6.302965828145166e-05,
      "loss": 2.8653,
      "step": 181988
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.498155355453491,
      "learning_rate": 6.302714983083776e-05,
      "loss": 3.1195,
      "step": 181989
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.724424123764038,
      "learning_rate": 6.30246414242817e-05,
      "loss": 3.1728,
      "step": 181990
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6339845657348633,
      "learning_rate": 6.302213306178373e-05,
      "loss": 2.8752,
      "step": 181991
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.30218243598938,
      "learning_rate": 6.30196247433445e-05,
      "loss": 3.0982,
      "step": 181992
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9289498329162598,
      "learning_rate": 6.30171164689644e-05,
      "loss": 2.9761,
      "step": 181993
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.455228090286255,
      "learning_rate": 6.301460823864389e-05,
      "loss": 3.2058,
      "step": 181994
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.268608808517456,
      "learning_rate": 6.301210005238334e-05,
      "loss": 2.7688,
      "step": 181995
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.293299436569214,
      "learning_rate": 6.300959191018344e-05,
      "loss": 3.1944,
      "step": 181996
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6828649044036865,
      "learning_rate": 6.30070838120444e-05,
      "loss": 2.9147,
      "step": 181997
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7293365001678467,
      "learning_rate": 6.300457575796695e-05,
      "loss": 3.1271,
      "step": 181998
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4678850173950195,
      "learning_rate": 6.300206774795138e-05,
      "loss": 2.7929,
      "step": 181999
    },
    {
      "epoch": 2.37,
      "grad_norm": 6.562725067138672,
      "learning_rate": 6.29995597819982e-05,
      "loss": 2.7595,
      "step": 182000
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.557253122329712,
      "learning_rate": 6.299705186010781e-05,
      "loss": 2.796,
      "step": 182001
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.5027947425842285,
      "learning_rate": 6.299454398228084e-05,
      "loss": 3.0651,
      "step": 182002
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.039100170135498,
      "learning_rate": 6.299203614851754e-05,
      "loss": 2.9602,
      "step": 182003
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.052795886993408,
      "learning_rate": 6.298952835881861e-05,
      "loss": 2.584,
      "step": 182004
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9372689723968506,
      "learning_rate": 6.298702061318436e-05,
      "loss": 3.1597,
      "step": 182005
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.566704511642456,
      "learning_rate": 6.298451291161533e-05,
      "loss": 2.9345,
      "step": 182006
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.5033504962921143,
      "learning_rate": 6.298200525411184e-05,
      "loss": 2.9481,
      "step": 182007
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.6344101428985596,
      "learning_rate": 6.297949764067458e-05,
      "loss": 2.9731,
      "step": 182008
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.020569801330566,
      "learning_rate": 6.297699007130383e-05,
      "loss": 2.901,
      "step": 182009
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.116445302963257,
      "learning_rate": 6.29744825460002e-05,
      "loss": 2.7688,
      "step": 182010
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6394248008728027,
      "learning_rate": 6.29719750647641e-05,
      "loss": 3.2119,
      "step": 182011
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3060436248779297,
      "learning_rate": 6.296946762759596e-05,
      "loss": 3.0528,
      "step": 182012
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9189560413360596,
      "learning_rate": 6.296696023449621e-05,
      "loss": 2.9427,
      "step": 182013
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.564030170440674,
      "learning_rate": 6.296445288546545e-05,
      "loss": 2.6839,
      "step": 182014
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.569135904312134,
      "learning_rate": 6.296194558050399e-05,
      "loss": 3.0339,
      "step": 182015
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0268051624298096,
      "learning_rate": 6.295943831961245e-05,
      "loss": 3.1511,
      "step": 182016
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.783818483352661,
      "learning_rate": 6.295693110279123e-05,
      "loss": 3.0294,
      "step": 182017
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.365408420562744,
      "learning_rate": 6.295442393004079e-05,
      "loss": 2.9031,
      "step": 182018
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.135624647140503,
      "learning_rate": 6.295191680136153e-05,
      "loss": 3.0149,
      "step": 182019
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.288367748260498,
      "learning_rate": 6.294940971675404e-05,
      "loss": 3.0118,
      "step": 182020
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7305963039398193,
      "learning_rate": 6.294690267621867e-05,
      "loss": 3.2351,
      "step": 182021
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.815476655960083,
      "learning_rate": 6.294439567975605e-05,
      "loss": 2.7765,
      "step": 182022
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7552013397216797,
      "learning_rate": 6.29418887273665e-05,
      "loss": 2.647,
      "step": 182023
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6453399658203125,
      "learning_rate": 6.293938181905053e-05,
      "loss": 2.669,
      "step": 182024
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.951521158218384,
      "learning_rate": 6.293687495480854e-05,
      "loss": 2.8556,
      "step": 182025
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6491639614105225,
      "learning_rate": 6.293436813464112e-05,
      "loss": 2.9174,
      "step": 182026
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.452986240386963,
      "learning_rate": 6.293186135854861e-05,
      "loss": 2.8142,
      "step": 182027
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.451789617538452,
      "learning_rate": 6.292935462653162e-05,
      "loss": 3.0445,
      "step": 182028
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.484968662261963,
      "learning_rate": 6.292684793859048e-05,
      "loss": 2.7672,
      "step": 182029
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9547128677368164,
      "learning_rate": 6.292434129472577e-05,
      "loss": 3.1782,
      "step": 182030
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.372520923614502,
      "learning_rate": 6.29218346949379e-05,
      "loss": 3.0065,
      "step": 182031
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5906667709350586,
      "learning_rate": 6.291932813922734e-05,
      "loss": 2.9432,
      "step": 182032
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8512980937957764,
      "learning_rate": 6.291682162759446e-05,
      "loss": 2.9651,
      "step": 182033
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4146323204040527,
      "learning_rate": 6.291431516003991e-05,
      "loss": 3.2426,
      "step": 182034
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.344973564147949,
      "learning_rate": 6.291180873656399e-05,
      "loss": 2.6356,
      "step": 182035
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6693105697631836,
      "learning_rate": 6.290930235716733e-05,
      "loss": 2.8828,
      "step": 182036
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.605402708053589,
      "learning_rate": 6.290679602185027e-05,
      "loss": 2.9468,
      "step": 182037
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.2997193336486816,
      "learning_rate": 6.290428973061326e-05,
      "loss": 2.8951,
      "step": 182038
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.4159436225891113,
      "learning_rate": 6.290178348345688e-05,
      "loss": 3.0569,
      "step": 182039
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.2721118927001953,
      "learning_rate": 6.289927728038154e-05,
      "loss": 3.0353,
      "step": 182040
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8890726566314697,
      "learning_rate": 6.289677112138764e-05,
      "loss": 2.9744,
      "step": 182041
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7333033084869385,
      "learning_rate": 6.289426500647579e-05,
      "loss": 2.8608,
      "step": 182042
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9763269424438477,
      "learning_rate": 6.289175893564633e-05,
      "loss": 3.0204,
      "step": 182043
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7783162593841553,
      "learning_rate": 6.288925290889972e-05,
      "loss": 2.9777,
      "step": 182044
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0284512042999268,
      "learning_rate": 6.288674692623653e-05,
      "loss": 3.1896,
      "step": 182045
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.61480975151062,
      "learning_rate": 6.288424098765719e-05,
      "loss": 2.929,
      "step": 182046
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.746241569519043,
      "learning_rate": 6.288173509316208e-05,
      "loss": 3.06,
      "step": 182047
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6185126304626465,
      "learning_rate": 6.287922924275177e-05,
      "loss": 3.2274,
      "step": 182048
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.860875368118286,
      "learning_rate": 6.28767234364267e-05,
      "loss": 2.9872,
      "step": 182049
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3870112895965576,
      "learning_rate": 6.287421767418726e-05,
      "loss": 2.8931,
      "step": 182050
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.301236152648926,
      "learning_rate": 6.287171195603406e-05,
      "loss": 2.7804,
      "step": 182051
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.450847864151001,
      "learning_rate": 6.28692062819674e-05,
      "loss": 2.8892,
      "step": 182052
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5897324085235596,
      "learning_rate": 6.28667006519879e-05,
      "loss": 2.86,
      "step": 182053
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0823771953582764,
      "learning_rate": 6.286419506609597e-05,
      "loss": 2.9099,
      "step": 182054
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.1440021991729736,
      "learning_rate": 6.286168952429204e-05,
      "loss": 3.0609,
      "step": 182055
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.702080249786377,
      "learning_rate": 6.285918402657653e-05,
      "loss": 3.0634,
      "step": 182056
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.335098743438721,
      "learning_rate": 6.285667857295006e-05,
      "loss": 2.9314,
      "step": 182057
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4092965126037598,
      "learning_rate": 6.285417316341294e-05,
      "loss": 2.8914,
      "step": 182058
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3915903568267822,
      "learning_rate": 6.285166779796577e-05,
      "loss": 3.0519,
      "step": 182059
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.591524124145508,
      "learning_rate": 6.284916247660896e-05,
      "loss": 2.8785,
      "step": 182060
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.516104221343994,
      "learning_rate": 6.284665719934295e-05,
      "loss": 2.8543,
      "step": 182061
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.440932750701904,
      "learning_rate": 6.284415196616815e-05,
      "loss": 2.6694,
      "step": 182062
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.536844253540039,
      "learning_rate": 6.28416467770852e-05,
      "loss": 2.7746,
      "step": 182063
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9418063163757324,
      "learning_rate": 6.283914163209434e-05,
      "loss": 3.1524,
      "step": 182064
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.259509801864624,
      "learning_rate": 6.283663653119629e-05,
      "loss": 2.7405,
      "step": 182065
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4286153316497803,
      "learning_rate": 6.283413147439137e-05,
      "loss": 2.9025,
      "step": 182066
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.553685188293457,
      "learning_rate": 6.283162646168004e-05,
      "loss": 2.823,
      "step": 182067
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.928035020828247,
      "learning_rate": 6.282912149306271e-05,
      "loss": 3.0486,
      "step": 182068
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6786117553710938,
      "learning_rate": 6.282661656854004e-05,
      "loss": 2.8579,
      "step": 182069
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8909807205200195,
      "learning_rate": 6.282411168811226e-05,
      "loss": 2.7887,
      "step": 182070
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.580138683319092,
      "learning_rate": 6.282160685178008e-05,
      "loss": 2.7844,
      "step": 182071
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.926445722579956,
      "learning_rate": 6.281910205954378e-05,
      "loss": 2.6712,
      "step": 182072
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.662407159805298,
      "learning_rate": 6.281659731140391e-05,
      "loss": 2.889,
      "step": 182073
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.1725399494171143,
      "learning_rate": 6.281409260736084e-05,
      "loss": 2.836,
      "step": 182074
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.404965877532959,
      "learning_rate": 6.281158794741519e-05,
      "loss": 2.9439,
      "step": 182075
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7163643836975098,
      "learning_rate": 6.280908333156727e-05,
      "loss": 2.9326,
      "step": 182076
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.063044548034668,
      "learning_rate": 6.280657875981768e-05,
      "loss": 2.7941,
      "step": 182077
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7467668056488037,
      "learning_rate": 6.280407423216684e-05,
      "loss": 2.7382,
      "step": 182078
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.402557849884033,
      "learning_rate": 6.28015697486152e-05,
      "loss": 3.0353,
      "step": 182079
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.465304136276245,
      "learning_rate": 6.279906530916311e-05,
      "loss": 3.1161,
      "step": 182080
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.684333324432373,
      "learning_rate": 6.279656091381127e-05,
      "loss": 2.9878,
      "step": 182081
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.1553831100463867,
      "learning_rate": 6.279405656255993e-05,
      "loss": 2.992,
      "step": 182082
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.2305006980895996,
      "learning_rate": 6.279155225540975e-05,
      "loss": 2.8365,
      "step": 182083
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.496692419052124,
      "learning_rate": 6.278904799236109e-05,
      "loss": 2.8717,
      "step": 182084
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7096099853515625,
      "learning_rate": 6.278654377341442e-05,
      "loss": 2.7889,
      "step": 182085
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7578203678131104,
      "learning_rate": 6.278403959857012e-05,
      "loss": 3.0838,
      "step": 182086
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6435930728912354,
      "learning_rate": 6.278153546782883e-05,
      "loss": 2.9524,
      "step": 182087
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.2022037506103516,
      "learning_rate": 6.277903138119085e-05,
      "loss": 2.9591,
      "step": 182088
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6683411598205566,
      "learning_rate": 6.277652733865683e-05,
      "loss": 2.8143,
      "step": 182089
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7841684818267822,
      "learning_rate": 6.277402334022712e-05,
      "loss": 3.106,
      "step": 182090
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4594950675964355,
      "learning_rate": 6.277151938590217e-05,
      "loss": 3.0557,
      "step": 182091
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4756155014038086,
      "learning_rate": 6.27690154756824e-05,
      "loss": 2.9421,
      "step": 182092
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.1478588581085205,
      "learning_rate": 6.276651160956846e-05,
      "loss": 2.901,
      "step": 182093
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.09370756149292,
      "learning_rate": 6.27640077875606e-05,
      "loss": 2.9606,
      "step": 182094
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.66723370552063,
      "learning_rate": 6.276150400965949e-05,
      "loss": 2.7922,
      "step": 182095
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.443458318710327,
      "learning_rate": 6.275900027586536e-05,
      "loss": 3.1169,
      "step": 182096
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6563808917999268,
      "learning_rate": 6.275649658617903e-05,
      "loss": 2.6849,
      "step": 182097
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.5457496643066406,
      "learning_rate": 6.275399294060058e-05,
      "loss": 2.7591,
      "step": 182098
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.720587730407715,
      "learning_rate": 6.27514893391307e-05,
      "loss": 3.179,
      "step": 182099
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9801673889160156,
      "learning_rate": 6.27489857817697e-05,
      "loss": 2.9971,
      "step": 182100
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.79940128326416,
      "learning_rate": 6.274648226851824e-05,
      "loss": 3.1305,
      "step": 182101
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6464338302612305,
      "learning_rate": 6.274397879937661e-05,
      "loss": 3.2389,
      "step": 182102
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.945603370666504,
      "learning_rate": 6.274147537434553e-05,
      "loss": 3.0261,
      "step": 182103
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.066706657409668,
      "learning_rate": 6.273897199342511e-05,
      "loss": 2.7999,
      "step": 182104
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4606103897094727,
      "learning_rate": 6.273646865661607e-05,
      "loss": 3.0373,
      "step": 182105
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0318374633789062,
      "learning_rate": 6.273396536391874e-05,
      "loss": 2.7736,
      "step": 182106
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.519570827484131,
      "learning_rate": 6.273146211533369e-05,
      "loss": 2.8645,
      "step": 182107
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.2102010250091553,
      "learning_rate": 6.272895891086128e-05,
      "loss": 3.099,
      "step": 182108
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.983196258544922,
      "learning_rate": 6.272645575050219e-05,
      "loss": 2.9437,
      "step": 182109
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.353437662124634,
      "learning_rate": 6.272395263425658e-05,
      "loss": 3.056,
      "step": 182110
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.780191659927368,
      "learning_rate": 6.272144956212514e-05,
      "loss": 2.8589,
      "step": 182111
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.301620960235596,
      "learning_rate": 6.271894653410819e-05,
      "loss": 2.8293,
      "step": 182112
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.84999680519104,
      "learning_rate": 6.271644355020632e-05,
      "loss": 2.8094,
      "step": 182113
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0456056594848633,
      "learning_rate": 6.271394061041988e-05,
      "loss": 2.7572,
      "step": 182114
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.429710626602173,
      "learning_rate": 6.27114377147495e-05,
      "loss": 3.0293,
      "step": 182115
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.8062644004821777,
      "learning_rate": 6.270893486319552e-05,
      "loss": 2.9619,
      "step": 182116
    },
    {
      "epoch": 2.37,
      "grad_norm": 6.00740909576416,
      "learning_rate": 6.270643205575843e-05,
      "loss": 2.7506,
      "step": 182117
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.943859338760376,
      "learning_rate": 6.27039292924386e-05,
      "loss": 2.8154,
      "step": 182118
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.445061206817627,
      "learning_rate": 6.270142657323669e-05,
      "loss": 2.8538,
      "step": 182119
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9345037937164307,
      "learning_rate": 6.269892389815298e-05,
      "loss": 2.8084,
      "step": 182120
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6258246898651123,
      "learning_rate": 6.26964212671881e-05,
      "loss": 2.8332,
      "step": 182121
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.5165159702301025,
      "learning_rate": 6.269391868034244e-05,
      "loss": 2.7743,
      "step": 182122
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.679070472717285,
      "learning_rate": 6.269141613761636e-05,
      "loss": 2.7786,
      "step": 182123
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5888547897338867,
      "learning_rate": 6.268891363901052e-05,
      "loss": 2.9988,
      "step": 182124
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.563561201095581,
      "learning_rate": 6.268641118452529e-05,
      "loss": 2.8518,
      "step": 182125
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.585516929626465,
      "learning_rate": 6.268390877416104e-05,
      "loss": 2.8215,
      "step": 182126
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.08817720413208,
      "learning_rate": 6.268140640791845e-05,
      "loss": 3.1746,
      "step": 182127
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.497920513153076,
      "learning_rate": 6.267890408579783e-05,
      "loss": 3.2041,
      "step": 182128
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.672178030014038,
      "learning_rate": 6.267640180779959e-05,
      "loss": 3.0453,
      "step": 182129
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4373488426208496,
      "learning_rate": 6.267389957392439e-05,
      "loss": 3.0872,
      "step": 182130
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.833238124847412,
      "learning_rate": 6.267139738417259e-05,
      "loss": 2.8398,
      "step": 182131
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.519559860229492,
      "learning_rate": 6.266889523854461e-05,
      "loss": 3.254,
      "step": 182132
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.901027202606201,
      "learning_rate": 6.266639313704099e-05,
      "loss": 2.6662,
      "step": 182133
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.533940315246582,
      "learning_rate": 6.26638910796622e-05,
      "loss": 2.8888,
      "step": 182134
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5907437801361084,
      "learning_rate": 6.266138906640855e-05,
      "loss": 2.7806,
      "step": 182135
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5110924243927,
      "learning_rate": 6.265888709728075e-05,
      "loss": 2.9484,
      "step": 182136
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.7084226608276367,
      "learning_rate": 6.265638517227907e-05,
      "loss": 3.0498,
      "step": 182137
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.4983654022216797,
      "learning_rate": 6.26538832914041e-05,
      "loss": 2.8801,
      "step": 182138
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.394302845001221,
      "learning_rate": 6.265138145465625e-05,
      "loss": 3.057,
      "step": 182139
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8101141452789307,
      "learning_rate": 6.264887966203602e-05,
      "loss": 2.7858,
      "step": 182140
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.947357416152954,
      "learning_rate": 6.264637791354372e-05,
      "loss": 2.8402,
      "step": 182141
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.9300076961517334,
      "learning_rate": 6.264387620918006e-05,
      "loss": 3.0796,
      "step": 182142
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.555044651031494,
      "learning_rate": 6.264137454894528e-05,
      "loss": 2.7171,
      "step": 182143
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.0077900886535645,
      "learning_rate": 6.263887293284002e-05,
      "loss": 2.99,
      "step": 182144
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.303744316101074,
      "learning_rate": 6.263637136086472e-05,
      "loss": 2.9166,
      "step": 182145
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0604918003082275,
      "learning_rate": 6.263386983301976e-05,
      "loss": 2.8807,
      "step": 182146
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.780299663543701,
      "learning_rate": 6.263136834930556e-05,
      "loss": 3.1361,
      "step": 182147
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7197535037994385,
      "learning_rate": 6.262886690972278e-05,
      "loss": 3.0071,
      "step": 182148
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.213505268096924,
      "learning_rate": 6.262636551427167e-05,
      "loss": 3.0708,
      "step": 182149
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.744929075241089,
      "learning_rate": 6.262386416295288e-05,
      "loss": 2.8975,
      "step": 182150
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.779513359069824,
      "learning_rate": 6.26213628557668e-05,
      "loss": 2.9317,
      "step": 182151
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6797544956207275,
      "learning_rate": 6.261886159271388e-05,
      "loss": 3.0865,
      "step": 182152
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6798605918884277,
      "learning_rate": 6.261636037379454e-05,
      "loss": 2.6618,
      "step": 182153
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.814063787460327,
      "learning_rate": 6.261385919900939e-05,
      "loss": 3.1244,
      "step": 182154
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.2015738487243652,
      "learning_rate": 6.261135806835867e-05,
      "loss": 2.9546,
      "step": 182155
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.3333792686462402,
      "learning_rate": 6.260885698184311e-05,
      "loss": 2.9349,
      "step": 182156
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.668079376220703,
      "learning_rate": 6.2606355939463e-05,
      "loss": 2.732,
      "step": 182157
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.263072967529297,
      "learning_rate": 6.26038549412189e-05,
      "loss": 2.9269,
      "step": 182158
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.639601945877075,
      "learning_rate": 6.260135398711109e-05,
      "loss": 2.667,
      "step": 182159
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.1778178215026855,
      "learning_rate": 6.259885307714027e-05,
      "loss": 3.0343,
      "step": 182160
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.264371633529663,
      "learning_rate": 6.259635221130676e-05,
      "loss": 2.896,
      "step": 182161
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.343379020690918,
      "learning_rate": 6.259385138961113e-05,
      "loss": 2.8709,
      "step": 182162
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3534493446350098,
      "learning_rate": 6.25913506120537e-05,
      "loss": 2.8166,
      "step": 182163
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4388668537139893,
      "learning_rate": 6.258884987863515e-05,
      "loss": 3.0565,
      "step": 182164
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.989985466003418,
      "learning_rate": 6.258634918935567e-05,
      "loss": 2.5035,
      "step": 182165
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.796067953109741,
      "learning_rate": 6.258384854421598e-05,
      "loss": 2.835,
      "step": 182166
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.351757287979126,
      "learning_rate": 6.258134794321632e-05,
      "loss": 2.9583,
      "step": 182167
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.988293409347534,
      "learning_rate": 6.257884738635737e-05,
      "loss": 3.0168,
      "step": 182168
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.675745725631714,
      "learning_rate": 6.257634687363941e-05,
      "loss": 2.8372,
      "step": 182169
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4866368770599365,
      "learning_rate": 6.257384640506312e-05,
      "loss": 2.9099,
      "step": 182170
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8277535438537598,
      "learning_rate": 6.257134598062872e-05,
      "loss": 2.8106,
      "step": 182171
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.043351650238037,
      "learning_rate": 6.256884560033684e-05,
      "loss": 2.9609,
      "step": 182172
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.676274538040161,
      "learning_rate": 6.256634526418782e-05,
      "loss": 3.0786,
      "step": 182173
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.946167230606079,
      "learning_rate": 6.256384497218227e-05,
      "loss": 2.9237,
      "step": 182174
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.2039999961853027,
      "learning_rate": 6.256134472432053e-05,
      "loss": 2.8736,
      "step": 182175
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0384268760681152,
      "learning_rate": 6.255884452060326e-05,
      "loss": 2.7,
      "step": 182176
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.543795585632324,
      "learning_rate": 6.255634436103063e-05,
      "loss": 3.1516,
      "step": 182177
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8551840782165527,
      "learning_rate": 6.255384424560331e-05,
      "loss": 3.0681,
      "step": 182178
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.847978115081787,
      "learning_rate": 6.255134417432164e-05,
      "loss": 2.7529,
      "step": 182179
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6117215156555176,
      "learning_rate": 6.254884414718627e-05,
      "loss": 2.999,
      "step": 182180
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.359210968017578,
      "learning_rate": 6.254634416419745e-05,
      "loss": 3.1076,
      "step": 182181
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9859585762023926,
      "learning_rate": 6.254384422535591e-05,
      "loss": 2.9807,
      "step": 182182
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7973153591156006,
      "learning_rate": 6.254134433066179e-05,
      "loss": 2.8844,
      "step": 182183
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6017985343933105,
      "learning_rate": 6.253884448011579e-05,
      "loss": 3.2384,
      "step": 182184
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6473917961120605,
      "learning_rate": 6.253634467371824e-05,
      "loss": 2.715,
      "step": 182185
    },
    {
      "epoch": 2.37,
      "grad_norm": 5.692461967468262,
      "learning_rate": 6.253384491146975e-05,
      "loss": 2.8701,
      "step": 182186
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.748333692550659,
      "learning_rate": 6.25313451933706e-05,
      "loss": 3.1332,
      "step": 182187
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.843073844909668,
      "learning_rate": 6.25288455194215e-05,
      "loss": 2.9635,
      "step": 182188
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.792750835418701,
      "learning_rate": 6.252634588962265e-05,
      "loss": 2.8885,
      "step": 182189
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.778651475906372,
      "learning_rate": 6.252384630397469e-05,
      "loss": 2.729,
      "step": 182190
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.7888760566711426,
      "learning_rate": 6.252134676247794e-05,
      "loss": 2.8568,
      "step": 182191
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.1293721199035645,
      "learning_rate": 6.251884726513304e-05,
      "loss": 2.9423,
      "step": 182192
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.915367603302002,
      "learning_rate": 6.251634781194032e-05,
      "loss": 2.9643,
      "step": 182193
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.3743269443511963,
      "learning_rate": 6.251384840290043e-05,
      "loss": 3.0433,
      "step": 182194
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0472278594970703,
      "learning_rate": 6.251134903801354e-05,
      "loss": 3.0991,
      "step": 182195
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.003007411956787,
      "learning_rate": 6.250884971728036e-05,
      "loss": 3.1296,
      "step": 182196
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.972130537033081,
      "learning_rate": 6.250635044070117e-05,
      "loss": 3.0133,
      "step": 182197
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9657444953918457,
      "learning_rate": 6.250385120827663e-05,
      "loss": 2.7933,
      "step": 182198
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.820699453353882,
      "learning_rate": 6.250135202000702e-05,
      "loss": 3.041,
      "step": 182199
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.7008578777313232,
      "learning_rate": 6.249885287589297e-05,
      "loss": 2.9487,
      "step": 182200
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.813875198364258,
      "learning_rate": 6.249635377593489e-05,
      "loss": 2.7901,
      "step": 182201
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.20310378074646,
      "learning_rate": 6.24938547201332e-05,
      "loss": 2.859,
      "step": 182202
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4143433570861816,
      "learning_rate": 6.249135570848831e-05,
      "loss": 2.9564,
      "step": 182203
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3384480476379395,
      "learning_rate": 6.248885674100083e-05,
      "loss": 3.1104,
      "step": 182204
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.190511703491211,
      "learning_rate": 6.248635781767108e-05,
      "loss": 2.901,
      "step": 182205
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0710160732269287,
      "learning_rate": 6.248385893849969e-05,
      "loss": 3.287,
      "step": 182206
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9361460208892822,
      "learning_rate": 6.248136010348701e-05,
      "loss": 2.9933,
      "step": 182207
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7375247478485107,
      "learning_rate": 6.247886131263354e-05,
      "loss": 3.0593,
      "step": 182208
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6834557056427,
      "learning_rate": 6.247636256593965e-05,
      "loss": 2.8297,
      "step": 182209
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0916097164154053,
      "learning_rate": 6.2473863863406e-05,
      "loss": 3.054,
      "step": 182210
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.118670463562012,
      "learning_rate": 6.247136520503283e-05,
      "loss": 2.7323,
      "step": 182211
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7221033573150635,
      "learning_rate": 6.24688665908208e-05,
      "loss": 2.9803,
      "step": 182212
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.502974033355713,
      "learning_rate": 6.24663680207703e-05,
      "loss": 2.8003,
      "step": 182213
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6235504150390625,
      "learning_rate": 6.24638694948817e-05,
      "loss": 2.7905,
      "step": 182214
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7712674140930176,
      "learning_rate": 6.246137101315565e-05,
      "loss": 2.9677,
      "step": 182215
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.082035541534424,
      "learning_rate": 6.245887257559247e-05,
      "loss": 2.8799,
      "step": 182216
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7356624603271484,
      "learning_rate": 6.245637418219261e-05,
      "loss": 2.689,
      "step": 182217
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7110109329223633,
      "learning_rate": 6.245387583295669e-05,
      "loss": 3.0065,
      "step": 182218
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.2144505977630615,
      "learning_rate": 6.245137752788507e-05,
      "loss": 2.9112,
      "step": 182219
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6443912982940674,
      "learning_rate": 6.244887926697815e-05,
      "loss": 2.998,
      "step": 182220
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7441844940185547,
      "learning_rate": 6.244638105023655e-05,
      "loss": 2.9294,
      "step": 182221
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5685417652130127,
      "learning_rate": 6.244388287766054e-05,
      "loss": 2.9706,
      "step": 182222
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.312685251235962,
      "learning_rate": 6.244138474925082e-05,
      "loss": 3.0604,
      "step": 182223
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.4487144947052,
      "learning_rate": 6.243888666500772e-05,
      "loss": 2.945,
      "step": 182224
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4733030796051025,
      "learning_rate": 6.243638862493168e-05,
      "loss": 3.1143,
      "step": 182225
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.8533685207366943,
      "learning_rate": 6.243389062902316e-05,
      "loss": 3.1698,
      "step": 182226
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.45863938331604,
      "learning_rate": 6.243139267728271e-05,
      "loss": 3.0332,
      "step": 182227
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4127233028411865,
      "learning_rate": 6.24288947697107e-05,
      "loss": 2.8176,
      "step": 182228
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.2526655197143555,
      "learning_rate": 6.242639690630771e-05,
      "loss": 2.9646,
      "step": 182229
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0097551345825195,
      "learning_rate": 6.242389908707413e-05,
      "loss": 2.8609,
      "step": 182230
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5978598594665527,
      "learning_rate": 6.242140131201044e-05,
      "loss": 2.8307,
      "step": 182231
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.4853708744049072,
      "learning_rate": 6.241890358111703e-05,
      "loss": 2.7968,
      "step": 182232
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0544614791870117,
      "learning_rate": 6.241640589439448e-05,
      "loss": 2.8428,
      "step": 182233
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7198221683502197,
      "learning_rate": 6.241390825184313e-05,
      "loss": 2.8314,
      "step": 182234
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4680159091949463,
      "learning_rate": 6.241141065346362e-05,
      "loss": 3.0831,
      "step": 182235
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.981663942337036,
      "learning_rate": 6.240891309925624e-05,
      "loss": 3.1237,
      "step": 182236
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.2730259895324707,
      "learning_rate": 6.240641558922166e-05,
      "loss": 3.01,
      "step": 182237
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.835559606552124,
      "learning_rate": 6.240391812336007e-05,
      "loss": 2.9657,
      "step": 182238
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8591418266296387,
      "learning_rate": 6.240142070167215e-05,
      "loss": 2.9935,
      "step": 182239
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.533324718475342,
      "learning_rate": 6.23989233241582e-05,
      "loss": 2.9521,
      "step": 182240
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4458470344543457,
      "learning_rate": 6.239642599081888e-05,
      "loss": 3.1273,
      "step": 182241
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.359827756881714,
      "learning_rate": 6.239392870165445e-05,
      "loss": 2.7263,
      "step": 182242
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.176845073699951,
      "learning_rate": 6.239143145666565e-05,
      "loss": 2.9339,
      "step": 182243
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.569396734237671,
      "learning_rate": 6.238893425585258e-05,
      "loss": 2.7976,
      "step": 182244
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.12908673286438,
      "learning_rate": 6.238643709921602e-05,
      "loss": 3.1187,
      "step": 182245
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.917013168334961,
      "learning_rate": 6.238393998675617e-05,
      "loss": 3.0298,
      "step": 182246
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0829098224639893,
      "learning_rate": 6.238144291847376e-05,
      "loss": 3.1253,
      "step": 182247
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4418342113494873,
      "learning_rate": 6.237894589436901e-05,
      "loss": 2.8284,
      "step": 182248
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5865044593811035,
      "learning_rate": 6.237644891444267e-05,
      "loss": 2.9284,
      "step": 182249
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7254927158355713,
      "learning_rate": 6.237395197869488e-05,
      "loss": 2.7294,
      "step": 182250
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.596173286437988,
      "learning_rate": 6.237145508712634e-05,
      "loss": 3.0047,
      "step": 182251
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5087666511535645,
      "learning_rate": 6.236895823973734e-05,
      "loss": 2.9258,
      "step": 182252
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.188156843185425,
      "learning_rate": 6.236646143652852e-05,
      "loss": 2.9648,
      "step": 182253
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.4134585857391357,
      "learning_rate": 6.23639646775002e-05,
      "loss": 3.0083,
      "step": 182254
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.953354597091675,
      "learning_rate": 6.236146796265302e-05,
      "loss": 3.0613,
      "step": 182255
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4080679416656494,
      "learning_rate": 6.235897129198722e-05,
      "loss": 2.9335,
      "step": 182256
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.3718903064727783,
      "learning_rate": 6.235647466550339e-05,
      "loss": 2.7995,
      "step": 182257
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.5116782188415527,
      "learning_rate": 6.235397808320197e-05,
      "loss": 2.9742,
      "step": 182258
    },
    {
      "epoch": 2.37,
      "grad_norm": 5.420154094696045,
      "learning_rate": 6.235148154508347e-05,
      "loss": 3.043,
      "step": 182259
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9937350749969482,
      "learning_rate": 6.234898505114823e-05,
      "loss": 2.8764,
      "step": 182260
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.63350510597229,
      "learning_rate": 6.234648860139697e-05,
      "loss": 3.0537,
      "step": 182261
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6339316368103027,
      "learning_rate": 6.234399219582985e-05,
      "loss": 3.1396,
      "step": 182262
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0165281295776367,
      "learning_rate": 6.234149583444753e-05,
      "loss": 2.8347,
      "step": 182263
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6378300189971924,
      "learning_rate": 6.233899951725031e-05,
      "loss": 2.6885,
      "step": 182264
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.2627837657928467,
      "learning_rate": 6.233650324423885e-05,
      "loss": 2.8653,
      "step": 182265
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.52992582321167,
      "learning_rate": 6.233400701541344e-05,
      "loss": 2.6583,
      "step": 182266
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.299891471862793,
      "learning_rate": 6.233151083077481e-05,
      "loss": 3.0739,
      "step": 182267
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.972907066345215,
      "learning_rate": 6.232901469032305e-05,
      "loss": 3.0727,
      "step": 182268
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.218125343322754,
      "learning_rate": 6.232651859405889e-05,
      "loss": 3.0358,
      "step": 182269
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.19889235496521,
      "learning_rate": 6.232402254198267e-05,
      "loss": 2.8969,
      "step": 182270
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.086694717407227,
      "learning_rate": 6.232152653409492e-05,
      "loss": 2.946,
      "step": 182271
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.458677053451538,
      "learning_rate": 6.231903057039607e-05,
      "loss": 3.0955,
      "step": 182272
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7225332260131836,
      "learning_rate": 6.231653465088673e-05,
      "loss": 3.171,
      "step": 182273
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.371351957321167,
      "learning_rate": 6.231403877556704e-05,
      "loss": 3.0151,
      "step": 182274
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6043477058410645,
      "learning_rate": 6.231154294443777e-05,
      "loss": 2.9052,
      "step": 182275
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7801382541656494,
      "learning_rate": 6.230904715749922e-05,
      "loss": 2.9646,
      "step": 182276
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.042581558227539,
      "learning_rate": 6.230655141475196e-05,
      "loss": 2.8132,
      "step": 182277
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.379543304443359,
      "learning_rate": 6.230405571619631e-05,
      "loss": 3.0494,
      "step": 182278
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.897698402404785,
      "learning_rate": 6.230156006183298e-05,
      "loss": 2.8672,
      "step": 182279
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.614844560623169,
      "learning_rate": 6.229906445166213e-05,
      "loss": 2.8899,
      "step": 182280
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6323018074035645,
      "learning_rate": 6.229656888568442e-05,
      "loss": 2.9211,
      "step": 182281
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0156333446502686,
      "learning_rate": 6.229407336390021e-05,
      "loss": 3.0217,
      "step": 182282
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.4195046424865723,
      "learning_rate": 6.229157788631011e-05,
      "loss": 3.0172,
      "step": 182283
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.094189167022705,
      "learning_rate": 6.228908245291442e-05,
      "loss": 3.1887,
      "step": 182284
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5712711811065674,
      "learning_rate": 6.228658706371372e-05,
      "loss": 3.3495,
      "step": 182285
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.022124290466309,
      "learning_rate": 6.228409171870845e-05,
      "loss": 2.9149,
      "step": 182286
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.3996026515960693,
      "learning_rate": 6.228159641789907e-05,
      "loss": 2.9958,
      "step": 182287
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.721766710281372,
      "learning_rate": 6.227910116128591e-05,
      "loss": 3.122,
      "step": 182288
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0148611068725586,
      "learning_rate": 6.227660594886965e-05,
      "loss": 2.971,
      "step": 182289
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5631377696990967,
      "learning_rate": 6.227411078065058e-05,
      "loss": 2.8958,
      "step": 182290
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8547301292419434,
      "learning_rate": 6.227161565662934e-05,
      "loss": 2.7939,
      "step": 182291
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.6609649658203125,
      "learning_rate": 6.226912057680627e-05,
      "loss": 2.7694,
      "step": 182292
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.5539934635162354,
      "learning_rate": 6.226662554118186e-05,
      "loss": 2.9894,
      "step": 182293
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.792862892150879,
      "learning_rate": 6.22641305497565e-05,
      "loss": 2.9262,
      "step": 182294
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.747980833053589,
      "learning_rate": 6.226163560253078e-05,
      "loss": 2.9882,
      "step": 182295
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.84004282951355,
      "learning_rate": 6.225914069950507e-05,
      "loss": 2.7098,
      "step": 182296
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.488790988922119,
      "learning_rate": 6.225664584067994e-05,
      "loss": 2.7417,
      "step": 182297
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.029921531677246,
      "learning_rate": 6.225415102605576e-05,
      "loss": 3.047,
      "step": 182298
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8289384841918945,
      "learning_rate": 6.225165625563298e-05,
      "loss": 2.775,
      "step": 182299
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.1374523639678955,
      "learning_rate": 6.224916152941217e-05,
      "loss": 2.7981,
      "step": 182300
    },
    {
      "epoch": 2.37,
      "grad_norm": 5.4429192543029785,
      "learning_rate": 6.224666684739371e-05,
      "loss": 2.8114,
      "step": 182301
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.688868999481201,
      "learning_rate": 6.224417220957803e-05,
      "loss": 2.8673,
      "step": 182302
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7718024253845215,
      "learning_rate": 6.224167761596571e-05,
      "loss": 2.7196,
      "step": 182303
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0552895069122314,
      "learning_rate": 6.223918306655714e-05,
      "loss": 2.9487,
      "step": 182304
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.367006301879883,
      "learning_rate": 6.223668856135274e-05,
      "loss": 2.8252,
      "step": 182305
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.458710193634033,
      "learning_rate": 6.223419410035309e-05,
      "loss": 2.917,
      "step": 182306
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8274590969085693,
      "learning_rate": 6.22316996835586e-05,
      "loss": 2.9852,
      "step": 182307
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6865310668945312,
      "learning_rate": 6.222920531096966e-05,
      "loss": 2.9115,
      "step": 182308
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5100438594818115,
      "learning_rate": 6.222671098258685e-05,
      "loss": 3.1603,
      "step": 182309
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.151618242263794,
      "learning_rate": 6.22242166984106e-05,
      "loss": 2.8281,
      "step": 182310
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4829485416412354,
      "learning_rate": 6.222172245844128e-05,
      "loss": 2.9836,
      "step": 182311
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8347957134246826,
      "learning_rate": 6.221922826267949e-05,
      "loss": 3.0018,
      "step": 182312
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.7649168968200684,
      "learning_rate": 6.221673411112557e-05,
      "loss": 2.8697,
      "step": 182313
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.767287254333496,
      "learning_rate": 6.221424000378011e-05,
      "loss": 3.0026,
      "step": 182314
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3007078170776367,
      "learning_rate": 6.221174594064353e-05,
      "loss": 3.1149,
      "step": 182315
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0068910121917725,
      "learning_rate": 6.220925192171629e-05,
      "loss": 3.131,
      "step": 182316
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.45766544342041,
      "learning_rate": 6.220675794699871e-05,
      "loss": 2.7581,
      "step": 182317
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.896768093109131,
      "learning_rate": 6.22042640164915e-05,
      "loss": 3.2255,
      "step": 182318
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6264452934265137,
      "learning_rate": 6.220177013019492e-05,
      "loss": 2.9161,
      "step": 182319
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5533130168914795,
      "learning_rate": 6.219927628810958e-05,
      "loss": 2.8904,
      "step": 182320
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.1007561683654785,
      "learning_rate": 6.219678249023581e-05,
      "loss": 2.9343,
      "step": 182321
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.428607940673828,
      "learning_rate": 6.219428873657434e-05,
      "loss": 2.7955,
      "step": 182322
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6059153079986572,
      "learning_rate": 6.219179502712524e-05,
      "loss": 3.0755,
      "step": 182323
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.520458698272705,
      "learning_rate": 6.218930136188927e-05,
      "loss": 3.2664,
      "step": 182324
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8869335651397705,
      "learning_rate": 6.218680774086674e-05,
      "loss": 2.734,
      "step": 182325
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.441714286804199,
      "learning_rate": 6.218431416405824e-05,
      "loss": 2.82,
      "step": 182326
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6070401668548584,
      "learning_rate": 6.218182063146406e-05,
      "loss": 2.8249,
      "step": 182327
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.656201124191284,
      "learning_rate": 6.217932714308496e-05,
      "loss": 2.9172,
      "step": 182328
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0914833545684814,
      "learning_rate": 6.217683369892101e-05,
      "loss": 2.8717,
      "step": 182329
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.063044548034668,
      "learning_rate": 6.2174340298973e-05,
      "loss": 2.576,
      "step": 182330
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.600255012512207,
      "learning_rate": 6.217184694324119e-05,
      "loss": 2.9016,
      "step": 182331
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8890302181243896,
      "learning_rate": 6.216935363172618e-05,
      "loss": 2.9565,
      "step": 182332
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.661405324935913,
      "learning_rate": 6.216686036442829e-05,
      "loss": 2.8424,
      "step": 182333
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.496812343597412,
      "learning_rate": 6.216436714134826e-05,
      "loss": 3.1096,
      "step": 182334
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.107922077178955,
      "learning_rate": 6.216187396248617e-05,
      "loss": 3.1501,
      "step": 182335
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4380834102630615,
      "learning_rate": 6.215938082784277e-05,
      "loss": 2.9058,
      "step": 182336
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.334671974182129,
      "learning_rate": 6.215688773741836e-05,
      "loss": 2.7279,
      "step": 182337
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0828678607940674,
      "learning_rate": 6.215439469121355e-05,
      "loss": 3.0466,
      "step": 182338
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.1611218452453613,
      "learning_rate": 6.215190168922864e-05,
      "loss": 3.0054,
      "step": 182339
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.992504358291626,
      "learning_rate": 6.214940873146434e-05,
      "loss": 3.165,
      "step": 182340
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5381855964660645,
      "learning_rate": 6.214691581792082e-05,
      "loss": 3.2443,
      "step": 182341
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.5533745288848877,
      "learning_rate": 6.214442294859872e-05,
      "loss": 2.9165,
      "step": 182342
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3213725090026855,
      "learning_rate": 6.21419301234984e-05,
      "loss": 2.9379,
      "step": 182343
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.903517961502075,
      "learning_rate": 6.213943734262049e-05,
      "loss": 3.046,
      "step": 182344
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6721279621124268,
      "learning_rate": 6.213694460596523e-05,
      "loss": 2.9714,
      "step": 182345
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.323615312576294,
      "learning_rate": 6.213445191353337e-05,
      "loss": 3.0046,
      "step": 182346
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.303210496902466,
      "learning_rate": 6.213195926532506e-05,
      "loss": 2.9527,
      "step": 182347
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.748668670654297,
      "learning_rate": 6.212946666134097e-05,
      "loss": 2.6697,
      "step": 182348
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9162793159484863,
      "learning_rate": 6.212697410158146e-05,
      "loss": 3.103,
      "step": 182349
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.3882317543029785,
      "learning_rate": 6.212448158604709e-05,
      "loss": 3.0131,
      "step": 182350
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.496683359146118,
      "learning_rate": 6.212198911473816e-05,
      "loss": 3.0594,
      "step": 182351
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.682091236114502,
      "learning_rate": 6.211949668765545e-05,
      "loss": 2.9143,
      "step": 182352
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.050743341445923,
      "learning_rate": 6.211700430479905e-05,
      "loss": 2.9024,
      "step": 182353
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.632662534713745,
      "learning_rate": 6.211451196616964e-05,
      "loss": 2.8598,
      "step": 182354
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5930280685424805,
      "learning_rate": 6.211201967176758e-05,
      "loss": 2.9676,
      "step": 182355
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.008093357086182,
      "learning_rate": 6.210952742159344e-05,
      "loss": 3.2671,
      "step": 182356
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.8485403060913086,
      "learning_rate": 6.210703521564756e-05,
      "loss": 3.2031,
      "step": 182357
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.809650421142578,
      "learning_rate": 6.210454305393063e-05,
      "loss": 3.1154,
      "step": 182358
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5906898975372314,
      "learning_rate": 6.210205093644283e-05,
      "loss": 2.9744,
      "step": 182359
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.6750357151031494,
      "learning_rate": 6.20995588631848e-05,
      "loss": 2.8935,
      "step": 182360
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.3881354331970215,
      "learning_rate": 6.209706683415687e-05,
      "loss": 3.1017,
      "step": 182361
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.842003107070923,
      "learning_rate": 6.209457484935966e-05,
      "loss": 2.9606,
      "step": 182362
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.476557493209839,
      "learning_rate": 6.209208290879349e-05,
      "loss": 2.9804,
      "step": 182363
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.428241729736328,
      "learning_rate": 6.208959101245898e-05,
      "loss": 3.0493,
      "step": 182364
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.732990026473999,
      "learning_rate": 6.208709916035648e-05,
      "loss": 2.8311,
      "step": 182365
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.452449083328247,
      "learning_rate": 6.208460735248651e-05,
      "loss": 2.8296,
      "step": 182366
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.731182813644409,
      "learning_rate": 6.208211558884942e-05,
      "loss": 2.9014,
      "step": 182367
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.739390850067139,
      "learning_rate": 6.207962386944582e-05,
      "loss": 2.9038,
      "step": 182368
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.540046453475952,
      "learning_rate": 6.207713219427606e-05,
      "loss": 3.0304,
      "step": 182369
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.239072799682617,
      "learning_rate": 6.207464056334068e-05,
      "loss": 2.9724,
      "step": 182370
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.949916124343872,
      "learning_rate": 6.207214897664016e-05,
      "loss": 2.7353,
      "step": 182371
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.885456085205078,
      "learning_rate": 6.206965743417491e-05,
      "loss": 2.8599,
      "step": 182372
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.518364191055298,
      "learning_rate": 6.206716593594532e-05,
      "loss": 2.9335,
      "step": 182373
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9242422580718994,
      "learning_rate": 6.2064674481952e-05,
      "loss": 2.8478,
      "step": 182374
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.1269664764404297,
      "learning_rate": 6.206218307219526e-05,
      "loss": 3.0697,
      "step": 182375
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0462231636047363,
      "learning_rate": 6.205969170667575e-05,
      "loss": 2.6771,
      "step": 182376
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0822582244873047,
      "learning_rate": 6.205720038539384e-05,
      "loss": 2.8524,
      "step": 182377
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4420082569122314,
      "learning_rate": 6.205470910834999e-05,
      "loss": 2.9095,
      "step": 182378
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.1000142097473145,
      "learning_rate": 6.205221787554457e-05,
      "loss": 2.9991,
      "step": 182379
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8014166355133057,
      "learning_rate": 6.204972668697821e-05,
      "loss": 3.0717,
      "step": 182380
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.0862255096435547,
      "learning_rate": 6.204723554265122e-05,
      "loss": 3.0157,
      "step": 182381
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.66074275970459,
      "learning_rate": 6.204474444256424e-05,
      "loss": 2.9304,
      "step": 182382
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.678492784500122,
      "learning_rate": 6.204225338671761e-05,
      "loss": 2.753,
      "step": 182383
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5833077430725098,
      "learning_rate": 6.203976237511175e-05,
      "loss": 3.1363,
      "step": 182384
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5731377601623535,
      "learning_rate": 6.203727140774729e-05,
      "loss": 2.8392,
      "step": 182385
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.9960696697235107,
      "learning_rate": 6.203478048462454e-05,
      "loss": 2.6723,
      "step": 182386
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.9406750202178955,
      "learning_rate": 6.203228960574399e-05,
      "loss": 2.805,
      "step": 182387
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.974919080734253,
      "learning_rate": 6.202979877110618e-05,
      "loss": 2.9039,
      "step": 182388
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.348759651184082,
      "learning_rate": 6.202730798071151e-05,
      "loss": 2.9378,
      "step": 182389
    },
    {
      "epoch": 2.37,
      "grad_norm": 4.1734418869018555,
      "learning_rate": 6.202481723456041e-05,
      "loss": 2.9609,
      "step": 182390
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.71954345703125,
      "learning_rate": 6.202232653265345e-05,
      "loss": 2.9505,
      "step": 182391
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.176849603652954,
      "learning_rate": 6.201983587499104e-05,
      "loss": 2.8163,
      "step": 182392
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.8201677799224854,
      "learning_rate": 6.201734526157354e-05,
      "loss": 3.0498,
      "step": 182393
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.325910806655884,
      "learning_rate": 6.20148546924016e-05,
      "loss": 2.9618,
      "step": 182394
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.786870241165161,
      "learning_rate": 6.201236416747558e-05,
      "loss": 2.8061,
      "step": 182395
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.5823609828948975,
      "learning_rate": 6.200987368679587e-05,
      "loss": 2.8294,
      "step": 182396
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.922917604446411,
      "learning_rate": 6.200738325036312e-05,
      "loss": 2.7969,
      "step": 182397
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4929921627044678,
      "learning_rate": 6.200489285817758e-05,
      "loss": 2.9158,
      "step": 182398
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.832469940185547,
      "learning_rate": 6.200240251023995e-05,
      "loss": 2.7993,
      "step": 182399
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.7731451988220215,
      "learning_rate": 6.199991220655052e-05,
      "loss": 3.1344,
      "step": 182400
    },
    {
      "epoch": 2.37,
      "grad_norm": 3.60050630569458,
      "learning_rate": 6.199742194710983e-05,
      "loss": 2.8759,
      "step": 182401
    },
    {
      "epoch": 2.37,
      "grad_norm": 2.4854698181152344,
      "learning_rate": 6.199493173191821e-05,
      "loss": 2.9968,
      "step": 182402
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5086545944213867,
      "learning_rate": 6.199244156097629e-05,
      "loss": 2.8561,
      "step": 182403
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.838289260864258,
      "learning_rate": 6.198995143428442e-05,
      "loss": 2.8972,
      "step": 182404
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6736836433410645,
      "learning_rate": 6.198746135184318e-05,
      "loss": 2.9594,
      "step": 182405
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.4170801639556885,
      "learning_rate": 6.198497131365297e-05,
      "loss": 3.0232,
      "step": 182406
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5570716857910156,
      "learning_rate": 6.198248131971423e-05,
      "loss": 3.2248,
      "step": 182407
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.632432460784912,
      "learning_rate": 6.19799913700274e-05,
      "loss": 3.0869,
      "step": 182408
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.892561912536621,
      "learning_rate": 6.197750146459303e-05,
      "loss": 3.0452,
      "step": 182409
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.636584520339966,
      "learning_rate": 6.197501160341143e-05,
      "loss": 2.8046,
      "step": 182410
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4934940338134766,
      "learning_rate": 6.197252178648329e-05,
      "loss": 2.8289,
      "step": 182411
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0095131397247314,
      "learning_rate": 6.197003201380887e-05,
      "loss": 3.1731,
      "step": 182412
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6039679050445557,
      "learning_rate": 6.196754228538883e-05,
      "loss": 3.0802,
      "step": 182413
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4933154582977295,
      "learning_rate": 6.19650526012234e-05,
      "loss": 2.8888,
      "step": 182414
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7796595096588135,
      "learning_rate": 6.196256296131324e-05,
      "loss": 2.9789,
      "step": 182415
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0222671031951904,
      "learning_rate": 6.196007336565865e-05,
      "loss": 2.6999,
      "step": 182416
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.703126907348633,
      "learning_rate": 6.195758381426024e-05,
      "loss": 2.8935,
      "step": 182417
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.358586072921753,
      "learning_rate": 6.195509430711835e-05,
      "loss": 2.79,
      "step": 182418
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.590993881225586,
      "learning_rate": 6.195260484423365e-05,
      "loss": 3.0313,
      "step": 182419
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7533984184265137,
      "learning_rate": 6.195011542560628e-05,
      "loss": 2.7795,
      "step": 182420
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9305057525634766,
      "learning_rate": 6.194762605123697e-05,
      "loss": 2.7162,
      "step": 182421
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7964224815368652,
      "learning_rate": 6.194513672112602e-05,
      "loss": 2.9987,
      "step": 182422
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.5053985118865967,
      "learning_rate": 6.194264743527401e-05,
      "loss": 2.9461,
      "step": 182423
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5099241733551025,
      "learning_rate": 6.194015819368132e-05,
      "loss": 2.9619,
      "step": 182424
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.379927158355713,
      "learning_rate": 6.193766899634857e-05,
      "loss": 2.9668,
      "step": 182425
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.392866849899292,
      "learning_rate": 6.193517984327597e-05,
      "loss": 2.9788,
      "step": 182426
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9241974353790283,
      "learning_rate": 6.193269073446418e-05,
      "loss": 3.0544,
      "step": 182427
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8218019008636475,
      "learning_rate": 6.19302016699135e-05,
      "loss": 3.0458,
      "step": 182428
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.5494396686553955,
      "learning_rate": 6.19277126496246e-05,
      "loss": 3.0606,
      "step": 182429
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.4308645725250244,
      "learning_rate": 6.192522367359775e-05,
      "loss": 3.2026,
      "step": 182430
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.2171220779418945,
      "learning_rate": 6.192273474183356e-05,
      "loss": 2.8684,
      "step": 182431
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.243014097213745,
      "learning_rate": 6.192024585433243e-05,
      "loss": 2.7291,
      "step": 182432
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.377516984939575,
      "learning_rate": 6.191775701109482e-05,
      "loss": 2.8363,
      "step": 182433
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.014061689376831,
      "learning_rate": 6.191526821212111e-05,
      "loss": 2.7526,
      "step": 182434
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1176302433013916,
      "learning_rate": 6.191277945741193e-05,
      "loss": 2.8379,
      "step": 182435
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.775172472000122,
      "learning_rate": 6.191029074696757e-05,
      "loss": 3.0941,
      "step": 182436
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.956637144088745,
      "learning_rate": 6.190780208078867e-05,
      "loss": 3.1207,
      "step": 182437
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.164280891418457,
      "learning_rate": 6.190531345887562e-05,
      "loss": 2.7431,
      "step": 182438
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.57023286819458,
      "learning_rate": 6.190282488122884e-05,
      "loss": 2.8638,
      "step": 182439
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5914525985717773,
      "learning_rate": 6.190033634784878e-05,
      "loss": 2.8998,
      "step": 182440
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6928164958953857,
      "learning_rate": 6.189784785873597e-05,
      "loss": 3.0677,
      "step": 182441
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.923190116882324,
      "learning_rate": 6.189535941389078e-05,
      "loss": 2.9339,
      "step": 182442
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.777722120285034,
      "learning_rate": 6.189287101331383e-05,
      "loss": 3.0122,
      "step": 182443
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8293609619140625,
      "learning_rate": 6.18903826570055e-05,
      "loss": 2.8443,
      "step": 182444
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7888989448547363,
      "learning_rate": 6.188789434496621e-05,
      "loss": 3.0502,
      "step": 182445
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9638149738311768,
      "learning_rate": 6.188540607719641e-05,
      "loss": 2.8311,
      "step": 182446
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7097973823547363,
      "learning_rate": 6.188291785369667e-05,
      "loss": 2.7784,
      "step": 182447
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.699700355529785,
      "learning_rate": 6.188042967446731e-05,
      "loss": 3.1014,
      "step": 182448
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5541980266571045,
      "learning_rate": 6.187794153950895e-05,
      "loss": 3.0154,
      "step": 182449
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.440722703933716,
      "learning_rate": 6.187545344882196e-05,
      "loss": 2.9475,
      "step": 182450
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7597861289978027,
      "learning_rate": 6.187296540240684e-05,
      "loss": 2.9088,
      "step": 182451
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6847903728485107,
      "learning_rate": 6.187047740026396e-05,
      "loss": 2.961,
      "step": 182452
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.833854913711548,
      "learning_rate": 6.186798944239391e-05,
      "loss": 3.1347,
      "step": 182453
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.7686197757720947,
      "learning_rate": 6.186550152879704e-05,
      "loss": 2.9005,
      "step": 182454
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.6855382919311523,
      "learning_rate": 6.186301365947394e-05,
      "loss": 2.9647,
      "step": 182455
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.787175416946411,
      "learning_rate": 6.186052583442497e-05,
      "loss": 3.0488,
      "step": 182456
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.826597213745117,
      "learning_rate": 6.185803805365066e-05,
      "loss": 2.8652,
      "step": 182457
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0266025066375732,
      "learning_rate": 6.185555031715136e-05,
      "loss": 2.919,
      "step": 182458
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.540243148803711,
      "learning_rate": 6.185306262492767e-05,
      "loss": 2.9252,
      "step": 182459
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.281700849533081,
      "learning_rate": 6.18505749769799e-05,
      "loss": 3.1914,
      "step": 182460
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.780890941619873,
      "learning_rate": 6.184808737330872e-05,
      "loss": 2.9535,
      "step": 182461
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5151987075805664,
      "learning_rate": 6.184559981391445e-05,
      "loss": 2.7564,
      "step": 182462
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6168837547302246,
      "learning_rate": 6.184311229879759e-05,
      "loss": 3.1251,
      "step": 182463
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.40779709815979,
      "learning_rate": 6.184062482795849e-05,
      "loss": 3.3052,
      "step": 182464
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8384618759155273,
      "learning_rate": 6.183813740139782e-05,
      "loss": 2.895,
      "step": 182465
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.964447021484375,
      "learning_rate": 6.183565001911585e-05,
      "loss": 2.823,
      "step": 182466
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.513310194015503,
      "learning_rate": 6.183316268111318e-05,
      "loss": 3.0592,
      "step": 182467
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4926598072052,
      "learning_rate": 6.183067538739026e-05,
      "loss": 3.0703,
      "step": 182468
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5119736194610596,
      "learning_rate": 6.182818813794749e-05,
      "loss": 2.9957,
      "step": 182469
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.749514102935791,
      "learning_rate": 6.18257009327853e-05,
      "loss": 3.2007,
      "step": 182470
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8835833072662354,
      "learning_rate": 6.182321377190424e-05,
      "loss": 2.8832,
      "step": 182471
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7078874111175537,
      "learning_rate": 6.182072665530471e-05,
      "loss": 3.1317,
      "step": 182472
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.270671844482422,
      "learning_rate": 6.181823958298725e-05,
      "loss": 2.982,
      "step": 182473
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.414098024368286,
      "learning_rate": 6.18157525549523e-05,
      "loss": 3.1824,
      "step": 182474
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.812685489654541,
      "learning_rate": 6.18132655712002e-05,
      "loss": 3.2207,
      "step": 182475
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6535909175872803,
      "learning_rate": 6.18107786317316e-05,
      "loss": 2.6434,
      "step": 182476
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.2940704822540283,
      "learning_rate": 6.180829173654686e-05,
      "loss": 2.9075,
      "step": 182477
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.49649715423584,
      "learning_rate": 6.180580488564638e-05,
      "loss": 3.033,
      "step": 182478
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9326953887939453,
      "learning_rate": 6.180331807903078e-05,
      "loss": 3.0803,
      "step": 182479
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.5272226333618164,
      "learning_rate": 6.180083131670047e-05,
      "loss": 2.6982,
      "step": 182480
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4851887226104736,
      "learning_rate": 6.179834459865574e-05,
      "loss": 2.9708,
      "step": 182481
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1383626461029053,
      "learning_rate": 6.17958579248973e-05,
      "loss": 2.9838,
      "step": 182482
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.721081018447876,
      "learning_rate": 6.179337129542545e-05,
      "loss": 2.9293,
      "step": 182483
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.3348917961120605,
      "learning_rate": 6.179088471024078e-05,
      "loss": 2.9802,
      "step": 182484
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.099055767059326,
      "learning_rate": 6.178839816934367e-05,
      "loss": 2.8476,
      "step": 182485
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7571663856506348,
      "learning_rate": 6.178591167273458e-05,
      "loss": 3.0156,
      "step": 182486
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8167192935943604,
      "learning_rate": 6.178342522041395e-05,
      "loss": 3.0935,
      "step": 182487
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5543065071105957,
      "learning_rate": 6.178093881238232e-05,
      "loss": 2.9968,
      "step": 182488
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6379573345184326,
      "learning_rate": 6.177845244864003e-05,
      "loss": 2.8188,
      "step": 182489
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8808135986328125,
      "learning_rate": 6.17759661291877e-05,
      "loss": 2.9654,
      "step": 182490
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.3986470699310303,
      "learning_rate": 6.177347985402576e-05,
      "loss": 3.0439,
      "step": 182491
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.697584867477417,
      "learning_rate": 6.177099362315458e-05,
      "loss": 3.1206,
      "step": 182492
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1432557106018066,
      "learning_rate": 6.17685074365746e-05,
      "loss": 2.7791,
      "step": 182493
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4843883514404297,
      "learning_rate": 6.176602129428642e-05,
      "loss": 3.0144,
      "step": 182494
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9438741207122803,
      "learning_rate": 6.176353519629039e-05,
      "loss": 2.729,
      "step": 182495
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7149229049682617,
      "learning_rate": 6.176104914258706e-05,
      "loss": 3.2282,
      "step": 182496
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4581940174102783,
      "learning_rate": 6.17585631331768e-05,
      "loss": 2.9302,
      "step": 182497
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.092682361602783,
      "learning_rate": 6.175607716806017e-05,
      "loss": 3.0339,
      "step": 182498
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8236279487609863,
      "learning_rate": 6.175359124723761e-05,
      "loss": 2.7231,
      "step": 182499
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7908132076263428,
      "learning_rate": 6.175110537070953e-05,
      "loss": 2.885,
      "step": 182500
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.539539098739624,
      "learning_rate": 6.174861953847633e-05,
      "loss": 2.8465,
      "step": 182501
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6775286197662354,
      "learning_rate": 6.174613375053867e-05,
      "loss": 3.0926,
      "step": 182502
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.522757053375244,
      "learning_rate": 6.174364800689682e-05,
      "loss": 2.8012,
      "step": 182503
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.546818256378174,
      "learning_rate": 6.174116230755137e-05,
      "loss": 2.6989,
      "step": 182504
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5209999084472656,
      "learning_rate": 6.173867665250277e-05,
      "loss": 3.0056,
      "step": 182505
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5014708042144775,
      "learning_rate": 6.173619104175141e-05,
      "loss": 3.1012,
      "step": 182506
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.3725202083587646,
      "learning_rate": 6.173370547529775e-05,
      "loss": 2.9983,
      "step": 182507
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.949526309967041,
      "learning_rate": 6.173121995314237e-05,
      "loss": 3.1306,
      "step": 182508
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.828064203262329,
      "learning_rate": 6.172873447528554e-05,
      "loss": 2.9647,
      "step": 182509
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7566099166870117,
      "learning_rate": 6.172624904172794e-05,
      "loss": 3.0528,
      "step": 182510
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.4831719398498535,
      "learning_rate": 6.17237636524699e-05,
      "loss": 2.7248,
      "step": 182511
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.3275914192199707,
      "learning_rate": 6.172127830751194e-05,
      "loss": 2.9991,
      "step": 182512
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.93680739402771,
      "learning_rate": 6.171879300685439e-05,
      "loss": 2.9268,
      "step": 182513
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9244415760040283,
      "learning_rate": 6.171630775049791e-05,
      "loss": 2.9974,
      "step": 182514
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9162166118621826,
      "learning_rate": 6.17138225384428e-05,
      "loss": 3.0324,
      "step": 182515
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.4232585430145264,
      "learning_rate": 6.171133737068965e-05,
      "loss": 2.9561,
      "step": 182516
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8069040775299072,
      "learning_rate": 6.170885224723886e-05,
      "loss": 2.9797,
      "step": 182517
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.564899206161499,
      "learning_rate": 6.17063671680909e-05,
      "loss": 2.8476,
      "step": 182518
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4418387413024902,
      "learning_rate": 6.170388213324616e-05,
      "loss": 2.9068,
      "step": 182519
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9085466861724854,
      "learning_rate": 6.170139714270522e-05,
      "loss": 2.8781,
      "step": 182520
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6002554893493652,
      "learning_rate": 6.16989121964684e-05,
      "loss": 3.1399,
      "step": 182521
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7906715869903564,
      "learning_rate": 6.169642729453636e-05,
      "loss": 2.8834,
      "step": 182522
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.2526767253875732,
      "learning_rate": 6.169394243690943e-05,
      "loss": 2.7355,
      "step": 182523
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.336240291595459,
      "learning_rate": 6.169145762358813e-05,
      "loss": 2.9921,
      "step": 182524
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8773350715637207,
      "learning_rate": 6.168897285457275e-05,
      "loss": 2.7158,
      "step": 182525
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6620779037475586,
      "learning_rate": 6.1686488129864e-05,
      "loss": 2.8381,
      "step": 182526
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.813985586166382,
      "learning_rate": 6.168400344946217e-05,
      "loss": 3.0357,
      "step": 182527
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9304158687591553,
      "learning_rate": 6.168151881336783e-05,
      "loss": 2.8156,
      "step": 182528
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5947577953338623,
      "learning_rate": 6.16790342215814e-05,
      "loss": 3.0088,
      "step": 182529
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8569843769073486,
      "learning_rate": 6.167654967410334e-05,
      "loss": 2.9258,
      "step": 182530
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.5606770515441895,
      "learning_rate": 6.167406517093404e-05,
      "loss": 2.8107,
      "step": 182531
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.799783229827881,
      "learning_rate": 6.16715807120741e-05,
      "loss": 2.7966,
      "step": 182532
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.785083770751953,
      "learning_rate": 6.166909629752381e-05,
      "loss": 2.7673,
      "step": 182533
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8379852771759033,
      "learning_rate": 6.166661192728384e-05,
      "loss": 2.9511,
      "step": 182534
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8316233158111572,
      "learning_rate": 6.166412760135454e-05,
      "loss": 3.1709,
      "step": 182535
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6544322967529297,
      "learning_rate": 6.166164331973638e-05,
      "loss": 2.9208,
      "step": 182536
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.344270706176758,
      "learning_rate": 6.165915908242975e-05,
      "loss": 2.8192,
      "step": 182537
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.30061936378479,
      "learning_rate": 6.165667488943524e-05,
      "loss": 3.1917,
      "step": 182538
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.4710216522216797,
      "learning_rate": 6.165419074075318e-05,
      "loss": 2.8727,
      "step": 182539
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6790666580200195,
      "learning_rate": 6.165170663638418e-05,
      "loss": 2.9229,
      "step": 182540
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6036829948425293,
      "learning_rate": 6.164922257632866e-05,
      "loss": 2.9655,
      "step": 182541
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8052518367767334,
      "learning_rate": 6.164673856058702e-05,
      "loss": 3.1464,
      "step": 182542
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.418306589126587,
      "learning_rate": 6.164425458915969e-05,
      "loss": 2.6081,
      "step": 182543
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9158122539520264,
      "learning_rate": 6.164177066204721e-05,
      "loss": 2.9543,
      "step": 182544
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.729238986968994,
      "learning_rate": 6.163928677925e-05,
      "loss": 3.044,
      "step": 182545
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.375007152557373,
      "learning_rate": 6.163680294076862e-05,
      "loss": 2.9558,
      "step": 182546
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.136797904968262,
      "learning_rate": 6.163431914660347e-05,
      "loss": 2.9424,
      "step": 182547
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.105201005935669,
      "learning_rate": 6.163183539675498e-05,
      "loss": 2.8523,
      "step": 182548
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4823315143585205,
      "learning_rate": 6.162935169122354e-05,
      "loss": 2.7331,
      "step": 182549
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.3135409355163574,
      "learning_rate": 6.162686803000982e-05,
      "loss": 3.1016,
      "step": 182550
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1315090656280518,
      "learning_rate": 6.162438441311407e-05,
      "loss": 2.7558,
      "step": 182551
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4070403575897217,
      "learning_rate": 6.16219008405369e-05,
      "loss": 2.9683,
      "step": 182552
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6778628826141357,
      "learning_rate": 6.161941731227873e-05,
      "loss": 3.0568,
      "step": 182553
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6745738983154297,
      "learning_rate": 6.161693382834003e-05,
      "loss": 3.0727,
      "step": 182554
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.9135475158691406,
      "learning_rate": 6.161445038872115e-05,
      "loss": 3.0356,
      "step": 182555
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.5499260425567627,
      "learning_rate": 6.161196699342272e-05,
      "loss": 2.8826,
      "step": 182556
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.770620107650757,
      "learning_rate": 6.160948364244506e-05,
      "loss": 2.9797,
      "step": 182557
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0605924129486084,
      "learning_rate": 6.160700033578875e-05,
      "loss": 2.8342,
      "step": 182558
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.91749906539917,
      "learning_rate": 6.160451707345422e-05,
      "loss": 3.0405,
      "step": 182559
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.9002463817596436,
      "learning_rate": 6.160203385544183e-05,
      "loss": 3.1456,
      "step": 182560
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.402003288269043,
      "learning_rate": 6.15995506817522e-05,
      "loss": 3.05,
      "step": 182561
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8871121406555176,
      "learning_rate": 6.159706755238571e-05,
      "loss": 2.9413,
      "step": 182562
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.586359739303589,
      "learning_rate": 6.159458446734276e-05,
      "loss": 2.7062,
      "step": 182563
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.205249309539795,
      "learning_rate": 6.159210142662392e-05,
      "loss": 2.8986,
      "step": 182564
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7766449451446533,
      "learning_rate": 6.158961843022956e-05,
      "loss": 3.0288,
      "step": 182565
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.916287422180176,
      "learning_rate": 6.158713547816024e-05,
      "loss": 2.8763,
      "step": 182566
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.593906879425049,
      "learning_rate": 6.15846525704164e-05,
      "loss": 2.9309,
      "step": 182567
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.8907392024993896,
      "learning_rate": 6.158216970699841e-05,
      "loss": 3.0505,
      "step": 182568
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.209221839904785,
      "learning_rate": 6.157968688790685e-05,
      "loss": 3.2306,
      "step": 182569
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.7142245769500732,
      "learning_rate": 6.157720411314212e-05,
      "loss": 2.8631,
      "step": 182570
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.830141067504883,
      "learning_rate": 6.15747213827046e-05,
      "loss": 2.8262,
      "step": 182571
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6149704456329346,
      "learning_rate": 6.157223869659495e-05,
      "loss": 2.8195,
      "step": 182572
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.218322277069092,
      "learning_rate": 6.156975605481353e-05,
      "loss": 2.9034,
      "step": 182573
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.01669979095459,
      "learning_rate": 6.156727345736068e-05,
      "loss": 2.8871,
      "step": 182574
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.278657913208008,
      "learning_rate": 6.156479090423708e-05,
      "loss": 2.9483,
      "step": 182575
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.882615566253662,
      "learning_rate": 6.156230839544307e-05,
      "loss": 2.9096,
      "step": 182576
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4727249145507812,
      "learning_rate": 6.155982593097904e-05,
      "loss": 2.9137,
      "step": 182577
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7313241958618164,
      "learning_rate": 6.155734351084563e-05,
      "loss": 2.9197,
      "step": 182578
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.7465763092041016,
      "learning_rate": 6.155486113504321e-05,
      "loss": 2.8805,
      "step": 182579
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9799652099609375,
      "learning_rate": 6.155237880357216e-05,
      "loss": 3.0966,
      "step": 182580
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.8267149925231934,
      "learning_rate": 6.154989651643309e-05,
      "loss": 2.8896,
      "step": 182581
    },
    {
      "epoch": 2.38,
      "grad_norm": 5.9626569747924805,
      "learning_rate": 6.154741427362636e-05,
      "loss": 2.7558,
      "step": 182582
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.860328197479248,
      "learning_rate": 6.15449320751525e-05,
      "loss": 2.8936,
      "step": 182583
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0237743854522705,
      "learning_rate": 6.154244992101197e-05,
      "loss": 3.0536,
      "step": 182584
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.942258834838867,
      "learning_rate": 6.153996781120518e-05,
      "loss": 2.9148,
      "step": 182585
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1160526275634766,
      "learning_rate": 6.153748574573253e-05,
      "loss": 2.7448,
      "step": 182586
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.2405283451080322,
      "learning_rate": 6.153500372459465e-05,
      "loss": 3.1107,
      "step": 182587
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9858219623565674,
      "learning_rate": 6.153252174779183e-05,
      "loss": 3.1873,
      "step": 182588
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.4721903800964355,
      "learning_rate": 6.153003981532472e-05,
      "loss": 2.8369,
      "step": 182589
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0217599868774414,
      "learning_rate": 6.152755792719364e-05,
      "loss": 2.9174,
      "step": 182590
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.293170690536499,
      "learning_rate": 6.152507608339912e-05,
      "loss": 3.0549,
      "step": 182591
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9705986976623535,
      "learning_rate": 6.152259428394149e-05,
      "loss": 2.8738,
      "step": 182592
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.33864426612854,
      "learning_rate": 6.15201125288214e-05,
      "loss": 3.0922,
      "step": 182593
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6148555278778076,
      "learning_rate": 6.151763081803914e-05,
      "loss": 3.1009,
      "step": 182594
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.382321357727051,
      "learning_rate": 6.151514915159533e-05,
      "loss": 3.1058,
      "step": 182595
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.5852065086364746,
      "learning_rate": 6.151266752949037e-05,
      "loss": 2.7473,
      "step": 182596
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.564117193222046,
      "learning_rate": 6.151018595172469e-05,
      "loss": 2.858,
      "step": 182597
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7397732734680176,
      "learning_rate": 6.150770441829868e-05,
      "loss": 2.91,
      "step": 182598
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5288748741149902,
      "learning_rate": 6.1505222929213e-05,
      "loss": 2.9566,
      "step": 182599
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5271546840667725,
      "learning_rate": 6.150274148446792e-05,
      "loss": 3.122,
      "step": 182600
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7131803035736084,
      "learning_rate": 6.150026008406403e-05,
      "loss": 2.7391,
      "step": 182601
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.554096221923828,
      "learning_rate": 6.149777872800178e-05,
      "loss": 3.1673,
      "step": 182602
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8794569969177246,
      "learning_rate": 6.149529741628155e-05,
      "loss": 2.8336,
      "step": 182603
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.758687973022461,
      "learning_rate": 6.149281614890382e-05,
      "loss": 2.7266,
      "step": 182604
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.240774154663086,
      "learning_rate": 6.149033492586913e-05,
      "loss": 2.9286,
      "step": 182605
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.849519968032837,
      "learning_rate": 6.148785374717779e-05,
      "loss": 2.855,
      "step": 182606
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8386690616607666,
      "learning_rate": 6.148537261283048e-05,
      "loss": 3.0871,
      "step": 182607
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.840803623199463,
      "learning_rate": 6.148289152282753e-05,
      "loss": 2.5341,
      "step": 182608
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7672126293182373,
      "learning_rate": 6.148041047716937e-05,
      "loss": 2.7223,
      "step": 182609
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.017132759094238,
      "learning_rate": 6.147792947585648e-05,
      "loss": 2.693,
      "step": 182610
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.5447065830230713,
      "learning_rate": 6.147544851888942e-05,
      "loss": 2.9327,
      "step": 182611
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.752994060516357,
      "learning_rate": 6.147296760626848e-05,
      "loss": 2.8977,
      "step": 182612
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6138055324554443,
      "learning_rate": 6.147048673799426e-05,
      "loss": 2.9401,
      "step": 182613
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.897033929824829,
      "learning_rate": 6.146800591406724e-05,
      "loss": 3.0795,
      "step": 182614
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.6909449100494385,
      "learning_rate": 6.146552513448778e-05,
      "loss": 2.757,
      "step": 182615
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.600907802581787,
      "learning_rate": 6.146304439925634e-05,
      "loss": 2.7631,
      "step": 182616
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.961941957473755,
      "learning_rate": 6.146056370837346e-05,
      "loss": 2.9442,
      "step": 182617
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.62182354927063,
      "learning_rate": 6.145808306183952e-05,
      "loss": 2.5894,
      "step": 182618
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1756529808044434,
      "learning_rate": 6.145560245965508e-05,
      "loss": 3.3086,
      "step": 182619
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.945253849029541,
      "learning_rate": 6.145312190182058e-05,
      "loss": 2.6424,
      "step": 182620
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.945859432220459,
      "learning_rate": 6.145064138833643e-05,
      "loss": 2.8248,
      "step": 182621
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.615828514099121,
      "learning_rate": 6.144816091920301e-05,
      "loss": 2.9595,
      "step": 182622
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7028720378875732,
      "learning_rate": 6.144568049442096e-05,
      "loss": 3.1785,
      "step": 182623
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.498084783554077,
      "learning_rate": 6.144320011399062e-05,
      "loss": 2.9107,
      "step": 182624
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.12013578414917,
      "learning_rate": 6.144071977791253e-05,
      "loss": 3.0659,
      "step": 182625
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1208949089050293,
      "learning_rate": 6.143823948618712e-05,
      "loss": 2.7465,
      "step": 182626
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8103630542755127,
      "learning_rate": 6.143575923881487e-05,
      "loss": 2.9507,
      "step": 182627
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1603775024414062,
      "learning_rate": 6.143327903579612e-05,
      "loss": 3.0495,
      "step": 182628
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.399691581726074,
      "learning_rate": 6.14307988771315e-05,
      "loss": 2.8306,
      "step": 182629
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5789530277252197,
      "learning_rate": 6.142831876282131e-05,
      "loss": 2.9041,
      "step": 182630
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7351553440093994,
      "learning_rate": 6.142583869286622e-05,
      "loss": 3.046,
      "step": 182631
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.4171416759490967,
      "learning_rate": 6.142335866726647e-05,
      "loss": 2.9238,
      "step": 182632
    },
    {
      "epoch": 2.38,
      "grad_norm": 6.691840171813965,
      "learning_rate": 6.142087868602276e-05,
      "loss": 2.7997,
      "step": 182633
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.132642984390259,
      "learning_rate": 6.141839874913529e-05,
      "loss": 3.0756,
      "step": 182634
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9351415634155273,
      "learning_rate": 6.141591885660469e-05,
      "loss": 3.1091,
      "step": 182635
    },
    {
      "epoch": 2.38,
      "grad_norm": 10.282163619995117,
      "learning_rate": 6.141343900843128e-05,
      "loss": 2.7507,
      "step": 182636
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.138986110687256,
      "learning_rate": 6.141095920461573e-05,
      "loss": 3.0637,
      "step": 182637
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.7962169647216797,
      "learning_rate": 6.140847944515832e-05,
      "loss": 2.8891,
      "step": 182638
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.783360481262207,
      "learning_rate": 6.14059997300597e-05,
      "loss": 3.0193,
      "step": 182639
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.9866316318511963,
      "learning_rate": 6.140352005932005e-05,
      "loss": 3.0804,
      "step": 182640
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.531527280807495,
      "learning_rate": 6.14010404329401e-05,
      "loss": 2.7936,
      "step": 182641
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.641779661178589,
      "learning_rate": 6.139856085092009e-05,
      "loss": 3.1814,
      "step": 182642
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0976381301879883,
      "learning_rate": 6.13960813132607e-05,
      "loss": 3.0672,
      "step": 182643
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7423248291015625,
      "learning_rate": 6.139360181996218e-05,
      "loss": 2.6948,
      "step": 182644
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.3587839603424072,
      "learning_rate": 6.139112237102519e-05,
      "loss": 2.8168,
      "step": 182645
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.3951427936553955,
      "learning_rate": 6.138864296645006e-05,
      "loss": 2.8106,
      "step": 182646
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8359012603759766,
      "learning_rate": 6.138616360623732e-05,
      "loss": 2.9971,
      "step": 182647
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.530860662460327,
      "learning_rate": 6.13836842903873e-05,
      "loss": 2.9252,
      "step": 182648
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.4895975589752197,
      "learning_rate": 6.138120501890066e-05,
      "loss": 3.107,
      "step": 182649
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5777854919433594,
      "learning_rate": 6.137872579177767e-05,
      "loss": 2.7385,
      "step": 182650
    },
    {
      "epoch": 2.38,
      "grad_norm": 5.124090194702148,
      "learning_rate": 6.137624660901895e-05,
      "loss": 3.0536,
      "step": 182651
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7495079040527344,
      "learning_rate": 6.137376747062486e-05,
      "loss": 2.7788,
      "step": 182652
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.046715259552002,
      "learning_rate": 6.137128837659594e-05,
      "loss": 2.8542,
      "step": 182653
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0658810138702393,
      "learning_rate": 6.136880932693248e-05,
      "loss": 2.9822,
      "step": 182654
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.822279930114746,
      "learning_rate": 6.136633032163518e-05,
      "loss": 2.7946,
      "step": 182655
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.6908819675445557,
      "learning_rate": 6.136385136070428e-05,
      "loss": 2.9692,
      "step": 182656
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.449221134185791,
      "learning_rate": 6.136137244414047e-05,
      "loss": 3.0633,
      "step": 182657
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9620585441589355,
      "learning_rate": 6.135889357194402e-05,
      "loss": 2.7766,
      "step": 182658
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.787315845489502,
      "learning_rate": 6.135641474411542e-05,
      "loss": 3.1452,
      "step": 182659
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.2772557735443115,
      "learning_rate": 6.135393596065523e-05,
      "loss": 2.8921,
      "step": 182660
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7814409732818604,
      "learning_rate": 6.135145722156385e-05,
      "loss": 3.0047,
      "step": 182661
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.88895845413208,
      "learning_rate": 6.134897852684168e-05,
      "loss": 3.132,
      "step": 182662
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0234763622283936,
      "learning_rate": 6.134649987648927e-05,
      "loss": 2.8895,
      "step": 182663
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.72767972946167,
      "learning_rate": 6.134402127050711e-05,
      "loss": 2.8683,
      "step": 182664
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.425889730453491,
      "learning_rate": 6.134154270889549e-05,
      "loss": 2.9361,
      "step": 182665
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4491045475006104,
      "learning_rate": 6.133906419165505e-05,
      "loss": 3.0298,
      "step": 182666
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.032543182373047,
      "learning_rate": 6.133658571878612e-05,
      "loss": 2.9751,
      "step": 182667
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5362746715545654,
      "learning_rate": 6.133410729028932e-05,
      "loss": 2.661,
      "step": 182668
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.550886631011963,
      "learning_rate": 6.133162890616499e-05,
      "loss": 2.8831,
      "step": 182669
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.506214141845703,
      "learning_rate": 6.132915056641363e-05,
      "loss": 2.8752,
      "step": 182670
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8144328594207764,
      "learning_rate": 6.132667227103558e-05,
      "loss": 2.7504,
      "step": 182671
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.950465679168701,
      "learning_rate": 6.13241940200315e-05,
      "loss": 2.9001,
      "step": 182672
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4502642154693604,
      "learning_rate": 6.13217158134017e-05,
      "loss": 3.4515,
      "step": 182673
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9374539852142334,
      "learning_rate": 6.131923765114675e-05,
      "loss": 3.0201,
      "step": 182674
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9319944381713867,
      "learning_rate": 6.131675953326708e-05,
      "loss": 3.0957,
      "step": 182675
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.4787216186523438,
      "learning_rate": 6.131428145976313e-05,
      "loss": 3.1108,
      "step": 182676
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7472012042999268,
      "learning_rate": 6.131180343063526e-05,
      "loss": 3.002,
      "step": 182677
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7676563262939453,
      "learning_rate": 6.130932544588414e-05,
      "loss": 3.0092,
      "step": 182678
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.918440103530884,
      "learning_rate": 6.130684750551001e-05,
      "loss": 3.1273,
      "step": 182679
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6396090984344482,
      "learning_rate": 6.130436960951352e-05,
      "loss": 2.9349,
      "step": 182680
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1098389625549316,
      "learning_rate": 6.130189175789511e-05,
      "loss": 2.8307,
      "step": 182681
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4111549854278564,
      "learning_rate": 6.129941395065512e-05,
      "loss": 2.8747,
      "step": 182682
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.140937566757202,
      "learning_rate": 6.129693618779403e-05,
      "loss": 3.1548,
      "step": 182683
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.475296974182129,
      "learning_rate": 6.129445846931241e-05,
      "loss": 2.7199,
      "step": 182684
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.460069179534912,
      "learning_rate": 6.12919807952106e-05,
      "loss": 3.0546,
      "step": 182685
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.174041748046875,
      "learning_rate": 6.128950316548918e-05,
      "loss": 3.0506,
      "step": 182686
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5818841457366943,
      "learning_rate": 6.128702558014853e-05,
      "loss": 2.9485,
      "step": 182687
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.2986562252044678,
      "learning_rate": 6.128454803918914e-05,
      "loss": 3.0952,
      "step": 182688
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.2811124324798584,
      "learning_rate": 6.128207054261137e-05,
      "loss": 2.9781,
      "step": 182689
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.431912660598755,
      "learning_rate": 6.127959309041588e-05,
      "loss": 3.0311,
      "step": 182690
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7666547298431396,
      "learning_rate": 6.127711568260292e-05,
      "loss": 3.1223,
      "step": 182691
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9998438358306885,
      "learning_rate": 6.127463831917312e-05,
      "loss": 2.8776,
      "step": 182692
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6659905910491943,
      "learning_rate": 6.127216100012688e-05,
      "loss": 2.8089,
      "step": 182693
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4196219444274902,
      "learning_rate": 6.126968372546464e-05,
      "loss": 2.909,
      "step": 182694
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.785857915878296,
      "learning_rate": 6.126720649518684e-05,
      "loss": 2.8331,
      "step": 182695
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9800288677215576,
      "learning_rate": 6.126472930929399e-05,
      "loss": 2.9945,
      "step": 182696
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.646604061126709,
      "learning_rate": 6.126225216778651e-05,
      "loss": 3.1385,
      "step": 182697
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8251953125,
      "learning_rate": 6.125977507066492e-05,
      "loss": 2.7496,
      "step": 182698
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.14428973197937,
      "learning_rate": 6.125729801792959e-05,
      "loss": 3.0395,
      "step": 182699
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7243545055389404,
      "learning_rate": 6.12548210095812e-05,
      "loss": 3.0725,
      "step": 182700
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.667093515396118,
      "learning_rate": 6.125234404561985e-05,
      "loss": 3.1429,
      "step": 182701
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6869442462921143,
      "learning_rate": 6.124986712604631e-05,
      "loss": 2.9778,
      "step": 182702
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.067672252655029,
      "learning_rate": 6.124739025086082e-05,
      "loss": 3.0805,
      "step": 182703
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8062405586242676,
      "learning_rate": 6.124491342006406e-05,
      "loss": 2.8341,
      "step": 182704
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6072793006896973,
      "learning_rate": 6.124243663365625e-05,
      "loss": 3.0434,
      "step": 182705
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6193835735321045,
      "learning_rate": 6.123995989163818e-05,
      "loss": 2.9985,
      "step": 182706
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.3169145584106445,
      "learning_rate": 6.123748319400993e-05,
      "loss": 2.9458,
      "step": 182707
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.313502550125122,
      "learning_rate": 6.123500654077223e-05,
      "loss": 3.0788,
      "step": 182708
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.831054925918579,
      "learning_rate": 6.123252993192537e-05,
      "loss": 2.8545,
      "step": 182709
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.595247268676758,
      "learning_rate": 6.123005336746996e-05,
      "loss": 3.1561,
      "step": 182710
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6221861839294434,
      "learning_rate": 6.12275768474063e-05,
      "loss": 2.9794,
      "step": 182711
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.67582631111145,
      "learning_rate": 6.12251003717351e-05,
      "loss": 3.087,
      "step": 182712
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9421846866607666,
      "learning_rate": 6.122262394045653e-05,
      "loss": 2.8537,
      "step": 182713
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.5470879077911377,
      "learning_rate": 6.122014755357124e-05,
      "loss": 2.8007,
      "step": 182714
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.543142318725586,
      "learning_rate": 6.121767121107955e-05,
      "loss": 2.9876,
      "step": 182715
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0328967571258545,
      "learning_rate": 6.12151949129821e-05,
      "loss": 3.0377,
      "step": 182716
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.685330629348755,
      "learning_rate": 6.121271865927915e-05,
      "loss": 3.0892,
      "step": 182717
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.669372081756592,
      "learning_rate": 6.121024244997144e-05,
      "loss": 2.8285,
      "step": 182718
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1019816398620605,
      "learning_rate": 6.12077662850591e-05,
      "loss": 2.9448,
      "step": 182719
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.5420010089874268,
      "learning_rate": 6.12052901645428e-05,
      "loss": 2.8162,
      "step": 182720
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9273197650909424,
      "learning_rate": 6.120281408842287e-05,
      "loss": 3.0024,
      "step": 182721
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.386512279510498,
      "learning_rate": 6.120033805669995e-05,
      "loss": 3.0297,
      "step": 182722
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0785138607025146,
      "learning_rate": 6.119786206937428e-05,
      "loss": 2.9667,
      "step": 182723
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9162650108337402,
      "learning_rate": 6.119538612644664e-05,
      "loss": 2.7707,
      "step": 182724
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.751204490661621,
      "learning_rate": 6.119291022791707e-05,
      "loss": 3.093,
      "step": 182725
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9078099727630615,
      "learning_rate": 6.119043437378636e-05,
      "loss": 2.891,
      "step": 182726
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.5407893657684326,
      "learning_rate": 6.118795856405476e-05,
      "loss": 2.9711,
      "step": 182727
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4829227924346924,
      "learning_rate": 6.118548279872292e-05,
      "loss": 2.8568,
      "step": 182728
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.3439064025878906,
      "learning_rate": 6.118300707779113e-05,
      "loss": 2.8258,
      "step": 182729
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4269750118255615,
      "learning_rate": 6.118053140125998e-05,
      "loss": 2.9949,
      "step": 182730
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.76409912109375,
      "learning_rate": 6.117805576912992e-05,
      "loss": 3.0026,
      "step": 182731
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.294177293777466,
      "learning_rate": 6.117558018140134e-05,
      "loss": 2.8492,
      "step": 182732
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4872243404388428,
      "learning_rate": 6.117310463807465e-05,
      "loss": 2.8698,
      "step": 182733
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4746694564819336,
      "learning_rate": 6.117062913915046e-05,
      "loss": 2.9309,
      "step": 182734
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.891754627227783,
      "learning_rate": 6.116815368462909e-05,
      "loss": 3.1292,
      "step": 182735
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7496368885040283,
      "learning_rate": 6.116567827451114e-05,
      "loss": 2.8698,
      "step": 182736
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.540191173553467,
      "learning_rate": 6.116320290879701e-05,
      "loss": 2.9037,
      "step": 182737
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.735445022583008,
      "learning_rate": 6.116072758748714e-05,
      "loss": 3.1168,
      "step": 182738
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5289652347564697,
      "learning_rate": 6.115825231058193e-05,
      "loss": 2.9562,
      "step": 182739
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.942033529281616,
      "learning_rate": 6.1155777078082e-05,
      "loss": 2.9484,
      "step": 182740
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9543344974517822,
      "learning_rate": 6.115330188998762e-05,
      "loss": 2.7411,
      "step": 182741
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.827500343322754,
      "learning_rate": 6.115082674629941e-05,
      "loss": 2.9161,
      "step": 182742
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.670522928237915,
      "learning_rate": 6.114835164701781e-05,
      "loss": 3.0012,
      "step": 182743
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.458486318588257,
      "learning_rate": 6.114587659214312e-05,
      "loss": 2.9635,
      "step": 182744
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8423776626586914,
      "learning_rate": 6.114340158167601e-05,
      "loss": 2.7014,
      "step": 182745
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.695798873901367,
      "learning_rate": 6.114092661561689e-05,
      "loss": 2.7855,
      "step": 182746
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.556159257888794,
      "learning_rate": 6.113845169396607e-05,
      "loss": 2.766,
      "step": 182747
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.321120500564575,
      "learning_rate": 6.113597681672419e-05,
      "loss": 2.7374,
      "step": 182748
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7237086296081543,
      "learning_rate": 6.113350198389167e-05,
      "loss": 2.9593,
      "step": 182749
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4218552112579346,
      "learning_rate": 6.113102719546884e-05,
      "loss": 3.0662,
      "step": 182750
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.674900770187378,
      "learning_rate": 6.112855245145636e-05,
      "loss": 2.8468,
      "step": 182751
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.315122604370117,
      "learning_rate": 6.112607775185458e-05,
      "loss": 2.9656,
      "step": 182752
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.5933375358581543,
      "learning_rate": 6.112360309666388e-05,
      "loss": 2.8884,
      "step": 182753
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0100910663604736,
      "learning_rate": 6.11211284858849e-05,
      "loss": 2.9699,
      "step": 182754
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6921839714050293,
      "learning_rate": 6.111865391951801e-05,
      "loss": 3.1234,
      "step": 182755
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7698826789855957,
      "learning_rate": 6.11161793975636e-05,
      "loss": 2.9053,
      "step": 182756
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0657174587249756,
      "learning_rate": 6.111370492002227e-05,
      "loss": 3.0451,
      "step": 182757
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8525633811950684,
      "learning_rate": 6.111123048689433e-05,
      "loss": 2.8621,
      "step": 182758
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.23362922668457,
      "learning_rate": 6.110875609818042e-05,
      "loss": 2.9751,
      "step": 182759
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8494441509246826,
      "learning_rate": 6.11062817538809e-05,
      "loss": 2.8662,
      "step": 182760
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5509629249572754,
      "learning_rate": 6.110380745399622e-05,
      "loss": 3.1237,
      "step": 182761
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.077928304672241,
      "learning_rate": 6.110133319852676e-05,
      "loss": 2.7453,
      "step": 182762
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.787292003631592,
      "learning_rate": 6.109885898747317e-05,
      "loss": 2.9514,
      "step": 182763
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7085471153259277,
      "learning_rate": 6.109638482083573e-05,
      "loss": 3.0023,
      "step": 182764
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.3959128856658936,
      "learning_rate": 6.109391069861504e-05,
      "loss": 2.988,
      "step": 182765
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.4857053756713867,
      "learning_rate": 6.109143662081143e-05,
      "loss": 3.0544,
      "step": 182766
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.9763381481170654,
      "learning_rate": 6.108896258742562e-05,
      "loss": 2.9224,
      "step": 182767
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.101545810699463,
      "learning_rate": 6.108648859845769e-05,
      "loss": 2.9679,
      "step": 182768
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.894207000732422,
      "learning_rate": 6.10840146539084e-05,
      "loss": 3.0104,
      "step": 182769
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.8035593032836914,
      "learning_rate": 6.108154075377798e-05,
      "loss": 2.8097,
      "step": 182770
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8950891494750977,
      "learning_rate": 6.107906689806714e-05,
      "loss": 2.7944,
      "step": 182771
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.342230796813965,
      "learning_rate": 6.107659308677612e-05,
      "loss": 2.7149,
      "step": 182772
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7885448932647705,
      "learning_rate": 6.107411931990562e-05,
      "loss": 2.8296,
      "step": 182773
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.5756072998046875,
      "learning_rate": 6.10716455974558e-05,
      "loss": 2.8525,
      "step": 182774
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.6401360034942627,
      "learning_rate": 6.106917191942732e-05,
      "loss": 2.9532,
      "step": 182775
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0613515377044678,
      "learning_rate": 6.106669828582054e-05,
      "loss": 3.0413,
      "step": 182776
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.169823408126831,
      "learning_rate": 6.106422469663605e-05,
      "loss": 2.9391,
      "step": 182777
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.943272113800049,
      "learning_rate": 6.106175115187414e-05,
      "loss": 2.8812,
      "step": 182778
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.643057346343994,
      "learning_rate": 6.105927765153552e-05,
      "loss": 3.1187,
      "step": 182779
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.8732151985168457,
      "learning_rate": 6.105680419562034e-05,
      "loss": 2.6795,
      "step": 182780
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.731822967529297,
      "learning_rate": 6.105433078412931e-05,
      "loss": 3.0471,
      "step": 182781
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.870657444000244,
      "learning_rate": 6.105185741706269e-05,
      "loss": 2.8028,
      "step": 182782
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0399441719055176,
      "learning_rate": 6.10493840944211e-05,
      "loss": 2.7813,
      "step": 182783
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.697082996368408,
      "learning_rate": 6.10469108162049e-05,
      "loss": 2.8849,
      "step": 182784
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.300560474395752,
      "learning_rate": 6.104443758241473e-05,
      "loss": 2.929,
      "step": 182785
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1538455486297607,
      "learning_rate": 6.104196439305073e-05,
      "loss": 2.8382,
      "step": 182786
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.993038177490234,
      "learning_rate": 6.103949124811365e-05,
      "loss": 2.9595,
      "step": 182787
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.066971302032471,
      "learning_rate": 6.103701814760373e-05,
      "loss": 2.9337,
      "step": 182788
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.684441328048706,
      "learning_rate": 6.103454509152165e-05,
      "loss": 3.1662,
      "step": 182789
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9772562980651855,
      "learning_rate": 6.1032072079867656e-05,
      "loss": 2.8181,
      "step": 182790
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4423277378082275,
      "learning_rate": 6.1029599112642493e-05,
      "loss": 2.6648,
      "step": 182791
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.004793643951416,
      "learning_rate": 6.1027126189846255e-05,
      "loss": 2.8746,
      "step": 182792
    },
    {
      "epoch": 2.38,
      "grad_norm": 6.430928707122803,
      "learning_rate": 6.102465331147967e-05,
      "loss": 2.6495,
      "step": 182793
    },
    {
      "epoch": 2.38,
      "grad_norm": 6.42564582824707,
      "learning_rate": 6.102218047754306e-05,
      "loss": 2.8667,
      "step": 182794
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.31339168548584,
      "learning_rate": 6.101970768803697e-05,
      "loss": 2.8795,
      "step": 182795
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.3463563919067383,
      "learning_rate": 6.101723494296177e-05,
      "loss": 3.0459,
      "step": 182796
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.187976360321045,
      "learning_rate": 6.101476224231816e-05,
      "loss": 2.9069,
      "step": 182797
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.3323307037353516,
      "learning_rate": 6.1012289586106245e-05,
      "loss": 2.8711,
      "step": 182798
    },
    {
      "epoch": 2.38,
      "grad_norm": 6.0161943435668945,
      "learning_rate": 6.100981697432672e-05,
      "loss": 2.6946,
      "step": 182799
    },
    {
      "epoch": 2.38,
      "grad_norm": 5.965639591217041,
      "learning_rate": 6.100734440697992e-05,
      "loss": 2.9573,
      "step": 182800
    },
    {
      "epoch": 2.38,
      "grad_norm": 5.826691150665283,
      "learning_rate": 6.100487188406641e-05,
      "loss": 2.7017,
      "step": 182801
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.913200616836548,
      "learning_rate": 6.1002399405586556e-05,
      "loss": 2.9081,
      "step": 182802
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.27089262008667,
      "learning_rate": 6.099992697154103e-05,
      "loss": 2.8155,
      "step": 182803
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7994656562805176,
      "learning_rate": 6.099745458192995e-05,
      "loss": 2.7906,
      "step": 182804
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5710837841033936,
      "learning_rate": 6.099498223675406e-05,
      "loss": 2.9188,
      "step": 182805
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.3869643211364746,
      "learning_rate": 6.099250993601364e-05,
      "loss": 2.9929,
      "step": 182806
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.165838718414307,
      "learning_rate": 6.099003767970926e-05,
      "loss": 2.9085,
      "step": 182807
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9180402755737305,
      "learning_rate": 6.0987565467841304e-05,
      "loss": 2.9136,
      "step": 182808
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9610435962677,
      "learning_rate": 6.098509330041041e-05,
      "loss": 3.0268,
      "step": 182809
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.01639461517334,
      "learning_rate": 6.0982621177416755e-05,
      "loss": 2.7791,
      "step": 182810
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.878692150115967,
      "learning_rate": 6.0980149098861e-05,
      "loss": 2.8182,
      "step": 182811
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.3213610649108887,
      "learning_rate": 6.097767706474349e-05,
      "loss": 2.8994,
      "step": 182812
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.068154811859131,
      "learning_rate": 6.0975205075064804e-05,
      "loss": 2.888,
      "step": 182813
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1695401668548584,
      "learning_rate": 6.097273312982527e-05,
      "loss": 3.0358,
      "step": 182814
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.3809823989868164,
      "learning_rate": 6.097026122902559e-05,
      "loss": 2.8631,
      "step": 182815
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6313722133636475,
      "learning_rate": 6.096778937266585e-05,
      "loss": 2.8868,
      "step": 182816
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.162363529205322,
      "learning_rate": 6.096531756074684e-05,
      "loss": 3.1417,
      "step": 182817
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.255908966064453,
      "learning_rate": 6.096284579326878e-05,
      "loss": 3.1084,
      "step": 182818
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.779334306716919,
      "learning_rate": 6.09603740702323e-05,
      "loss": 2.8463,
      "step": 182819
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.707674503326416,
      "learning_rate": 6.095790239163777e-05,
      "loss": 2.8679,
      "step": 182820
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1152255535125732,
      "learning_rate": 6.0955430757485726e-05,
      "loss": 3.0385,
      "step": 182821
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5556867122650146,
      "learning_rate": 6.095295916777656e-05,
      "loss": 2.8509,
      "step": 182822
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.429913282394409,
      "learning_rate": 6.095048762251078e-05,
      "loss": 2.7944,
      "step": 182823
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.9307713508605957,
      "learning_rate": 6.094801612168875e-05,
      "loss": 2.8838,
      "step": 182824
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.4540483951568604,
      "learning_rate": 6.094554466531106e-05,
      "loss": 2.811,
      "step": 182825
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7829062938690186,
      "learning_rate": 6.094307325337803e-05,
      "loss": 2.8224,
      "step": 182826
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.102499485015869,
      "learning_rate": 6.094060188589024e-05,
      "loss": 2.9123,
      "step": 182827
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.662074327468872,
      "learning_rate": 6.093813056284816e-05,
      "loss": 2.7964,
      "step": 182828
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6143031120300293,
      "learning_rate": 6.093565928425207e-05,
      "loss": 3.083,
      "step": 182829
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.7359871864318848,
      "learning_rate": 6.093318805010266e-05,
      "loss": 2.619,
      "step": 182830
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9361956119537354,
      "learning_rate": 6.0930716860400286e-05,
      "loss": 2.9408,
      "step": 182831
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.2003817558288574,
      "learning_rate": 6.0928245715145296e-05,
      "loss": 3.1534,
      "step": 182832
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.806375503540039,
      "learning_rate": 6.0925774614338354e-05,
      "loss": 3.0626,
      "step": 182833
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.997105836868286,
      "learning_rate": 6.092330355797982e-05,
      "loss": 2.9682,
      "step": 182834
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4903221130371094,
      "learning_rate": 6.0920832546070095e-05,
      "loss": 2.8888,
      "step": 182835
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9975504875183105,
      "learning_rate": 6.0918361578609754e-05,
      "loss": 2.9567,
      "step": 182836
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.4356563091278076,
      "learning_rate": 6.091589065559922e-05,
      "loss": 2.9178,
      "step": 182837
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.2961668968200684,
      "learning_rate": 6.0913419777038865e-05,
      "loss": 2.9472,
      "step": 182838
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4455947875976562,
      "learning_rate": 6.0910948942929286e-05,
      "loss": 3.027,
      "step": 182839
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.707102060317993,
      "learning_rate": 6.090847815327088e-05,
      "loss": 3.0291,
      "step": 182840
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1785168647766113,
      "learning_rate": 6.0906007408063986e-05,
      "loss": 2.6526,
      "step": 182841
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5003459453582764,
      "learning_rate": 6.0903536707309295e-05,
      "loss": 3.1313,
      "step": 182842
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1603446006774902,
      "learning_rate": 6.090106605100705e-05,
      "loss": 2.7559,
      "step": 182843
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.858337640762329,
      "learning_rate": 6.089859543915791e-05,
      "loss": 3.1301,
      "step": 182844
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.566077470779419,
      "learning_rate": 6.089612487176222e-05,
      "loss": 3.1691,
      "step": 182845
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9184157848358154,
      "learning_rate": 6.0893654348820463e-05,
      "loss": 2.7515,
      "step": 182846
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.176619529724121,
      "learning_rate": 6.089118387033302e-05,
      "loss": 3.0485,
      "step": 182847
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4717042446136475,
      "learning_rate": 6.088871343630047e-05,
      "loss": 2.9597,
      "step": 182848
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.789755344390869,
      "learning_rate": 6.088624304672317e-05,
      "loss": 3.1199,
      "step": 182849
    },
    {
      "epoch": 2.38,
      "grad_norm": 5.300648212432861,
      "learning_rate": 6.0883772701601705e-05,
      "loss": 2.9424,
      "step": 182850
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.246302366256714,
      "learning_rate": 6.088130240093645e-05,
      "loss": 3.058,
      "step": 182851
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7332775592803955,
      "learning_rate": 6.087883214472786e-05,
      "loss": 2.9109,
      "step": 182852
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.893070936203003,
      "learning_rate": 6.0876361932976344e-05,
      "loss": 2.9345,
      "step": 182853
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.084981918334961,
      "learning_rate": 6.08738917656825e-05,
      "loss": 2.8359,
      "step": 182854
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.722459316253662,
      "learning_rate": 6.087142164284666e-05,
      "loss": 2.9626,
      "step": 182855
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.781400203704834,
      "learning_rate": 6.086895156446939e-05,
      "loss": 2.8399,
      "step": 182856
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.325221300125122,
      "learning_rate": 6.0866481530551025e-05,
      "loss": 2.9826,
      "step": 182857
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.565483331680298,
      "learning_rate": 6.086401154109226e-05,
      "loss": 2.908,
      "step": 182858
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.57171630859375,
      "learning_rate": 6.0861541596093235e-05,
      "loss": 2.8723,
      "step": 182859
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1741790771484375,
      "learning_rate": 6.085907169555464e-05,
      "loss": 3.0286,
      "step": 182860
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.52911639213562,
      "learning_rate": 6.085660183947675e-05,
      "loss": 3.1591,
      "step": 182861
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.6163387298583984,
      "learning_rate": 6.0854132027860226e-05,
      "loss": 2.897,
      "step": 182862
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.3694968223571777,
      "learning_rate": 6.085166226070537e-05,
      "loss": 3.1602,
      "step": 182863
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1225991249084473,
      "learning_rate": 6.084919253801285e-05,
      "loss": 2.9466,
      "step": 182864
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.737630844116211,
      "learning_rate": 6.084672285978283e-05,
      "loss": 2.8754,
      "step": 182865
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.385793685913086,
      "learning_rate": 6.084425322601597e-05,
      "loss": 3.1416,
      "step": 182866
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.7182960510253906,
      "learning_rate": 6.084178363671265e-05,
      "loss": 3.0818,
      "step": 182867
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.846592903137207,
      "learning_rate": 6.083931409187339e-05,
      "loss": 2.9699,
      "step": 182868
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6599960327148438,
      "learning_rate": 6.083684459149857e-05,
      "loss": 2.8516,
      "step": 182869
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5392584800720215,
      "learning_rate": 6.0834375135588806e-05,
      "loss": 3.0724,
      "step": 182870
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.635173797607422,
      "learning_rate": 6.0831905724144347e-05,
      "loss": 2.8003,
      "step": 182871
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.841367244720459,
      "learning_rate": 6.082943635716578e-05,
      "loss": 2.995,
      "step": 182872
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.874979019165039,
      "learning_rate": 6.082696703465348e-05,
      "loss": 3.0051,
      "step": 182873
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.85746169090271,
      "learning_rate": 6.0824497756608045e-05,
      "loss": 2.8243,
      "step": 182874
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.350159168243408,
      "learning_rate": 6.082202852302978e-05,
      "loss": 3.0903,
      "step": 182875
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8246614933013916,
      "learning_rate": 6.081955933391933e-05,
      "loss": 2.8501,
      "step": 182876
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.123872995376587,
      "learning_rate": 6.0817090189276915e-05,
      "loss": 3.126,
      "step": 182877
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6497929096221924,
      "learning_rate": 6.08146210891032e-05,
      "loss": 2.8449,
      "step": 182878
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6047191619873047,
      "learning_rate": 6.081215203339848e-05,
      "loss": 2.9908,
      "step": 182879
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.2725539207458496,
      "learning_rate": 6.0809683022163344e-05,
      "loss": 2.9594,
      "step": 182880
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.7171967029571533,
      "learning_rate": 6.0807214055398145e-05,
      "loss": 3.0342,
      "step": 182881
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5736794471740723,
      "learning_rate": 6.080474513310357e-05,
      "loss": 2.8152,
      "step": 182882
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.433128833770752,
      "learning_rate": 6.080227625527976e-05,
      "loss": 3.1027,
      "step": 182883
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.254833698272705,
      "learning_rate": 6.079980742192737e-05,
      "loss": 2.8014,
      "step": 182884
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.2422986030578613,
      "learning_rate": 6.079733863304674e-05,
      "loss": 3.0452,
      "step": 182885
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.810473918914795,
      "learning_rate": 6.079486988863848e-05,
      "loss": 2.9046,
      "step": 182886
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0566799640655518,
      "learning_rate": 6.07924011887029e-05,
      "loss": 2.7886,
      "step": 182887
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.2563059329986572,
      "learning_rate": 6.078993253324068e-05,
      "loss": 2.9022,
      "step": 182888
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.43103289604187,
      "learning_rate": 6.0787463922251986e-05,
      "loss": 3.143,
      "step": 182889
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.032347679138184,
      "learning_rate": 6.0784995355737454e-05,
      "loss": 3.0019,
      "step": 182890
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5792346000671387,
      "learning_rate": 6.078252683369748e-05,
      "loss": 2.9108,
      "step": 182891
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.175534725189209,
      "learning_rate": 6.0780058356132586e-05,
      "loss": 2.972,
      "step": 182892
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.1059088706970215,
      "learning_rate": 6.0777589923043125e-05,
      "loss": 2.7759,
      "step": 182893
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.037590742111206,
      "learning_rate": 6.077512153442982e-05,
      "loss": 2.8622,
      "step": 182894
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.627213954925537,
      "learning_rate": 6.077265319029273e-05,
      "loss": 2.8336,
      "step": 182895
    },
    {
      "epoch": 2.38,
      "grad_norm": 6.23382043838501,
      "learning_rate": 6.07701848906326e-05,
      "loss": 2.9941,
      "step": 182896
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.622068405151367,
      "learning_rate": 6.0767716635449756e-05,
      "loss": 2.7408,
      "step": 182897
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4868173599243164,
      "learning_rate": 6.076524842474477e-05,
      "loss": 3.3317,
      "step": 182898
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7406959533691406,
      "learning_rate": 6.076278025851797e-05,
      "loss": 2.8682,
      "step": 182899
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.523756980895996,
      "learning_rate": 6.076031213677e-05,
      "loss": 3.1485,
      "step": 182900
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.155505418777466,
      "learning_rate": 6.075784405950114e-05,
      "loss": 2.9833,
      "step": 182901
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1425955295562744,
      "learning_rate": 6.0755376026711935e-05,
      "loss": 2.9899,
      "step": 182902
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4636013507843018,
      "learning_rate": 6.075290803840275e-05,
      "loss": 3.1708,
      "step": 182903
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9953105449676514,
      "learning_rate": 6.075044009457421e-05,
      "loss": 2.981,
      "step": 182904
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.433505058288574,
      "learning_rate": 6.074797219522657e-05,
      "loss": 3.0862,
      "step": 182905
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9650604724884033,
      "learning_rate": 6.0745504340360505e-05,
      "loss": 3.0122,
      "step": 182906
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6758649349212646,
      "learning_rate": 6.074303652997636e-05,
      "loss": 2.8296,
      "step": 182907
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9482059478759766,
      "learning_rate": 6.074056876407457e-05,
      "loss": 2.8243,
      "step": 182908
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.76800537109375,
      "learning_rate": 6.073810104265556e-05,
      "loss": 3.1134,
      "step": 182909
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.584731340408325,
      "learning_rate": 6.073563336571997e-05,
      "loss": 3.1426,
      "step": 182910
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5448920726776123,
      "learning_rate": 6.073316573326803e-05,
      "loss": 3.0203,
      "step": 182911
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1916191577911377,
      "learning_rate": 6.0730698145300403e-05,
      "loss": 3.1323,
      "step": 182912
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.666281223297119,
      "learning_rate": 6.072823060181743e-05,
      "loss": 2.9613,
      "step": 182913
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.2415122985839844,
      "learning_rate": 6.07257631028196e-05,
      "loss": 3.1287,
      "step": 182914
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.073373794555664,
      "learning_rate": 6.0723295648307324e-05,
      "loss": 3.0629,
      "step": 182915
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.210970878601074,
      "learning_rate": 6.072082823828116e-05,
      "loss": 3.0979,
      "step": 182916
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.457460403442383,
      "learning_rate": 6.071836087274141e-05,
      "loss": 3.009,
      "step": 182917
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6427125930786133,
      "learning_rate": 6.071589355168874e-05,
      "loss": 2.9677,
      "step": 182918
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.763540506362915,
      "learning_rate": 6.071342627512352e-05,
      "loss": 3.0737,
      "step": 182919
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5274839401245117,
      "learning_rate": 6.071095904304608e-05,
      "loss": 3.0385,
      "step": 182920
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7864952087402344,
      "learning_rate": 6.070849185545709e-05,
      "loss": 2.6775,
      "step": 182921
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.441521167755127,
      "learning_rate": 6.0706024712356874e-05,
      "loss": 2.8076,
      "step": 182922
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.621783971786499,
      "learning_rate": 6.0703557613745865e-05,
      "loss": 3.0755,
      "step": 182923
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5241613388061523,
      "learning_rate": 6.070109055962468e-05,
      "loss": 3.2131,
      "step": 182924
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.295926570892334,
      "learning_rate": 6.069862354999366e-05,
      "loss": 3.1513,
      "step": 182925
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5128233432769775,
      "learning_rate": 6.0696156584853194e-05,
      "loss": 3.0413,
      "step": 182926
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.408573865890503,
      "learning_rate": 6.0693689664203936e-05,
      "loss": 3.1144,
      "step": 182927
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.3667216300964355,
      "learning_rate": 6.0691222788046155e-05,
      "loss": 3.1169,
      "step": 182928
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5278308391571045,
      "learning_rate": 6.068875595638045e-05,
      "loss": 2.9085,
      "step": 182929
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.2623658180236816,
      "learning_rate": 6.0686289169207224e-05,
      "loss": 3.0939,
      "step": 182930
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.5842936038970947,
      "learning_rate": 6.068382242652694e-05,
      "loss": 2.9467,
      "step": 182931
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.899500846862793,
      "learning_rate": 6.068135572834e-05,
      "loss": 3.2461,
      "step": 182932
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.525824785232544,
      "learning_rate": 6.067888907464696e-05,
      "loss": 3.2437,
      "step": 182933
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8433122634887695,
      "learning_rate": 6.067642246544817e-05,
      "loss": 3.0113,
      "step": 182934
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8514623641967773,
      "learning_rate": 6.067395590074422e-05,
      "loss": 2.963,
      "step": 182935
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5405304431915283,
      "learning_rate": 6.067148938053551e-05,
      "loss": 3.1319,
      "step": 182936
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7244303226470947,
      "learning_rate": 6.066902290482247e-05,
      "loss": 2.9878,
      "step": 182937
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.827028751373291,
      "learning_rate": 6.066655647360551e-05,
      "loss": 3.1138,
      "step": 182938
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.6093027591705322,
      "learning_rate": 6.066409008688522e-05,
      "loss": 2.8914,
      "step": 182939
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.470876455307007,
      "learning_rate": 6.066162374466193e-05,
      "loss": 2.8251,
      "step": 182940
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1859171390533447,
      "learning_rate": 6.065915744693622e-05,
      "loss": 2.9183,
      "step": 182941
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.56130313873291,
      "learning_rate": 6.065669119370845e-05,
      "loss": 3.1975,
      "step": 182942
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8196160793304443,
      "learning_rate": 6.065422498497925e-05,
      "loss": 2.9413,
      "step": 182943
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.3841116428375244,
      "learning_rate": 6.065175882074879e-05,
      "loss": 2.815,
      "step": 182944
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.441049575805664,
      "learning_rate": 6.064929270101776e-05,
      "loss": 2.8751,
      "step": 182945
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7268152236938477,
      "learning_rate": 6.064682662578647e-05,
      "loss": 2.8854,
      "step": 182946
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.790009021759033,
      "learning_rate": 6.0644360595055555e-05,
      "loss": 2.9378,
      "step": 182947
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.3043127059936523,
      "learning_rate": 6.064189460882527e-05,
      "loss": 2.9245,
      "step": 182948
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7450900077819824,
      "learning_rate": 6.0639428667096326e-05,
      "loss": 3.0243,
      "step": 182949
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9575791358947754,
      "learning_rate": 6.063696276986888e-05,
      "loss": 3.0492,
      "step": 182950
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1570959091186523,
      "learning_rate": 6.063449691714364e-05,
      "loss": 3.0663,
      "step": 182951
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.9075472354888916,
      "learning_rate": 6.0632031108920864e-05,
      "loss": 3.0261,
      "step": 182952
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0872464179992676,
      "learning_rate": 6.0629565345201194e-05,
      "loss": 2.9928,
      "step": 182953
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.804762125015259,
      "learning_rate": 6.062709962598492e-05,
      "loss": 2.7534,
      "step": 182954
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.06402587890625,
      "learning_rate": 6.0624633951272785e-05,
      "loss": 3.0879,
      "step": 182955
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.3799333572387695,
      "learning_rate": 6.0622168321064846e-05,
      "loss": 2.6857,
      "step": 182956
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.263075113296509,
      "learning_rate": 6.0619702735361876e-05,
      "loss": 2.6921,
      "step": 182957
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.666300058364868,
      "learning_rate": 6.06172371941641e-05,
      "loss": 3.0732,
      "step": 182958
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.766529083251953,
      "learning_rate": 6.06147716974722e-05,
      "loss": 3.0554,
      "step": 182959
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.516941785812378,
      "learning_rate": 6.0612306245286456e-05,
      "loss": 2.9531,
      "step": 182960
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.593731164932251,
      "learning_rate": 6.0609840837607584e-05,
      "loss": 2.8002,
      "step": 182961
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5893635749816895,
      "learning_rate": 6.060737547443567e-05,
      "loss": 2.724,
      "step": 182962
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5565669536590576,
      "learning_rate": 6.060491015577146e-05,
      "loss": 2.8398,
      "step": 182963
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.853684663772583,
      "learning_rate": 6.060244488161521e-05,
      "loss": 3.179,
      "step": 182964
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.144803285598755,
      "learning_rate": 6.059997965196759e-05,
      "loss": 2.9421,
      "step": 182965
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.2915828227996826,
      "learning_rate": 6.059751446682887e-05,
      "loss": 3.0165,
      "step": 182966
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.8629817962646484,
      "learning_rate": 6.059504932619967e-05,
      "loss": 3.2165,
      "step": 182967
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.2680816650390625,
      "learning_rate": 6.0592584230080375e-05,
      "loss": 2.8676,
      "step": 182968
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8335840702056885,
      "learning_rate": 6.059011917847144e-05,
      "loss": 3.0074,
      "step": 182969
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9322681427001953,
      "learning_rate": 6.0587654171373234e-05,
      "loss": 2.9161,
      "step": 182970
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.3661012649536133,
      "learning_rate": 6.058518920878636e-05,
      "loss": 2.6858,
      "step": 182971
    },
    {
      "epoch": 2.38,
      "grad_norm": 5.073027610778809,
      "learning_rate": 6.058272429071117e-05,
      "loss": 2.9158,
      "step": 182972
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6865458488464355,
      "learning_rate": 6.0580259417148244e-05,
      "loss": 3.0113,
      "step": 182973
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.898037910461426,
      "learning_rate": 6.057779458809795e-05,
      "loss": 3.0432,
      "step": 182974
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.003061056137085,
      "learning_rate": 6.057532980356078e-05,
      "loss": 2.7445,
      "step": 182975
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5989878177642822,
      "learning_rate": 6.05728650635371e-05,
      "loss": 2.7522,
      "step": 182976
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.3159966468811035,
      "learning_rate": 6.057040036802751e-05,
      "loss": 2.8115,
      "step": 182977
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.886955499649048,
      "learning_rate": 6.0567935717032325e-05,
      "loss": 2.8355,
      "step": 182978
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.713979482650757,
      "learning_rate": 6.0565471110552154e-05,
      "loss": 2.9375,
      "step": 182979
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7070775032043457,
      "learning_rate": 6.056300654858741e-05,
      "loss": 2.8742,
      "step": 182980
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6953604221343994,
      "learning_rate": 6.0560542031138495e-05,
      "loss": 2.8644,
      "step": 182981
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.656435012817383,
      "learning_rate": 6.055807755820583e-05,
      "loss": 3.0709,
      "step": 182982
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.023995876312256,
      "learning_rate": 6.0555613129789993e-05,
      "loss": 2.8892,
      "step": 182983
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.62724232673645,
      "learning_rate": 6.0553148745891344e-05,
      "loss": 2.6315,
      "step": 182984
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.9828267097473145,
      "learning_rate": 6.055068440651045e-05,
      "loss": 2.9888,
      "step": 182985
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.176856756210327,
      "learning_rate": 6.054822011164768e-05,
      "loss": 2.9238,
      "step": 182986
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.66276216506958,
      "learning_rate": 6.054575586130354e-05,
      "loss": 2.9747,
      "step": 182987
    },
    {
      "epoch": 2.38,
      "grad_norm": 6.128390789031982,
      "learning_rate": 6.054329165547838e-05,
      "loss": 2.9564,
      "step": 182988
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.2649829387664795,
      "learning_rate": 6.054082749417284e-05,
      "loss": 2.7288,
      "step": 182989
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.769355297088623,
      "learning_rate": 6.053836337738719e-05,
      "loss": 2.8579,
      "step": 182990
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.2905220985412598,
      "learning_rate": 6.0535899305122025e-05,
      "loss": 2.8657,
      "step": 182991
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.74050235748291,
      "learning_rate": 6.0533435277377786e-05,
      "loss": 2.92,
      "step": 182992
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.401146411895752,
      "learning_rate": 6.053097129415489e-05,
      "loss": 2.8769,
      "step": 182993
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.397756338119507,
      "learning_rate": 6.052850735545376e-05,
      "loss": 3.0099,
      "step": 182994
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.692586660385132,
      "learning_rate": 6.052604346127494e-05,
      "loss": 3.0774,
      "step": 182995
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.268712520599365,
      "learning_rate": 6.0523579611618775e-05,
      "loss": 3.1364,
      "step": 182996
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.5274293422698975,
      "learning_rate": 6.05211158064859e-05,
      "loss": 2.8139,
      "step": 182997
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6085903644561768,
      "learning_rate": 6.051865204587667e-05,
      "loss": 2.914,
      "step": 182998
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.948512315750122,
      "learning_rate": 6.051618832979153e-05,
      "loss": 2.9447,
      "step": 182999
    },
    {
      "epoch": 2.38,
      "grad_norm": 5.183965682983398,
      "learning_rate": 6.0513724658230864e-05,
      "loss": 2.8945,
      "step": 183000
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.7214772701263428,
      "learning_rate": 6.051126103119533e-05,
      "loss": 3.0359,
      "step": 183001
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8237712383270264,
      "learning_rate": 6.0508797448685134e-05,
      "loss": 3.1301,
      "step": 183002
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5488221645355225,
      "learning_rate": 6.050633391070099e-05,
      "loss": 3.1413,
      "step": 183003
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7136571407318115,
      "learning_rate": 6.050387041724326e-05,
      "loss": 2.7539,
      "step": 183004
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.627591133117676,
      "learning_rate": 6.050140696831225e-05,
      "loss": 3.0262,
      "step": 183005
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.573261022567749,
      "learning_rate": 6.049894356390868e-05,
      "loss": 3.0313,
      "step": 183006
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9346108436584473,
      "learning_rate": 6.049648020403284e-05,
      "loss": 2.7791,
      "step": 183007
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.571957588195801,
      "learning_rate": 6.049401688868517e-05,
      "loss": 2.7684,
      "step": 183008
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.663689374923706,
      "learning_rate": 6.049155361786625e-05,
      "loss": 3.1303,
      "step": 183009
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.597501754760742,
      "learning_rate": 6.0489090391576477e-05,
      "loss": 2.9037,
      "step": 183010
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8871076107025146,
      "learning_rate": 6.048662720981622e-05,
      "loss": 2.8814,
      "step": 183011
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0583229064941406,
      "learning_rate": 6.04841640725861e-05,
      "loss": 2.8942,
      "step": 183012
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.590390682220459,
      "learning_rate": 6.0481700979886407e-05,
      "loss": 2.9098,
      "step": 183013
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9680254459381104,
      "learning_rate": 6.047923793171779e-05,
      "loss": 2.9528,
      "step": 183014
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.704746723175049,
      "learning_rate": 6.0476774928080575e-05,
      "loss": 2.8363,
      "step": 183015
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6863510608673096,
      "learning_rate": 6.0474311968975274e-05,
      "loss": 3.036,
      "step": 183016
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.9253785610198975,
      "learning_rate": 6.047184905440222e-05,
      "loss": 3.1052,
      "step": 183017
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7144479751586914,
      "learning_rate": 6.046938618436208e-05,
      "loss": 2.8522,
      "step": 183018
    },
    {
      "epoch": 2.38,
      "grad_norm": 5.666326522827148,
      "learning_rate": 6.046692335885508e-05,
      "loss": 2.9414,
      "step": 183019
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.410198450088501,
      "learning_rate": 6.046446057788192e-05,
      "loss": 2.782,
      "step": 183020
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.44697904586792,
      "learning_rate": 6.046199784144291e-05,
      "loss": 3.0572,
      "step": 183021
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.842085123062134,
      "learning_rate": 6.045953514953857e-05,
      "loss": 2.9637,
      "step": 183022
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5661063194274902,
      "learning_rate": 6.045707250216921e-05,
      "loss": 2.8423,
      "step": 183023
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.644256591796875,
      "learning_rate": 6.045460989933549e-05,
      "loss": 3.1954,
      "step": 183024
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0584404468536377,
      "learning_rate": 6.0452147341037716e-05,
      "loss": 2.519,
      "step": 183025
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.5027849674224854,
      "learning_rate": 6.044968482727648e-05,
      "loss": 2.7584,
      "step": 183026
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.120605707168579,
      "learning_rate": 6.044722235805208e-05,
      "loss": 2.9103,
      "step": 183027
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8462893962860107,
      "learning_rate": 6.044475993336516e-05,
      "loss": 3.0938,
      "step": 183028
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6204090118408203,
      "learning_rate": 6.044229755321604e-05,
      "loss": 3.1216,
      "step": 183029
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.487125396728516,
      "learning_rate": 6.043983521760527e-05,
      "loss": 2.9457,
      "step": 183030
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.144113540649414,
      "learning_rate": 6.0437372926533164e-05,
      "loss": 2.9904,
      "step": 183031
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.71097993850708,
      "learning_rate": 6.0434910680000334e-05,
      "loss": 3.024,
      "step": 183032
    },
    {
      "epoch": 2.38,
      "grad_norm": 5.267323017120361,
      "learning_rate": 6.043244847800711e-05,
      "loss": 2.6695,
      "step": 183033
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.662710189819336,
      "learning_rate": 6.0429986320554094e-05,
      "loss": 3.2431,
      "step": 183034
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.710428476333618,
      "learning_rate": 6.042752420764167e-05,
      "loss": 2.9582,
      "step": 183035
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9121947288513184,
      "learning_rate": 6.04250621392703e-05,
      "loss": 2.9457,
      "step": 183036
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.932718276977539,
      "learning_rate": 6.0422600115440324e-05,
      "loss": 2.8811,
      "step": 183037
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.661715030670166,
      "learning_rate": 6.042013813615242e-05,
      "loss": 2.8896,
      "step": 183038
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4433727264404297,
      "learning_rate": 6.041767620140682e-05,
      "loss": 2.9525,
      "step": 183039
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5399627685546875,
      "learning_rate": 6.041521431120422e-05,
      "loss": 3.0132,
      "step": 183040
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.8707454204559326,
      "learning_rate": 6.041275246554492e-05,
      "loss": 2.9827,
      "step": 183041
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4547808170318604,
      "learning_rate": 6.041029066442943e-05,
      "loss": 2.9657,
      "step": 183042
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.606565237045288,
      "learning_rate": 6.04078289078581e-05,
      "loss": 3.2808,
      "step": 183043
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.861098527908325,
      "learning_rate": 6.040536719583154e-05,
      "loss": 2.8331,
      "step": 183044
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.9751017093658447,
      "learning_rate": 6.040290552835009e-05,
      "loss": 2.7304,
      "step": 183045
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.869678020477295,
      "learning_rate": 6.040044390541433e-05,
      "loss": 2.9066,
      "step": 183046
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4656805992126465,
      "learning_rate": 6.039798232702467e-05,
      "loss": 2.8162,
      "step": 183047
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.3884716033935547,
      "learning_rate": 6.0395520793181516e-05,
      "loss": 3.0445,
      "step": 183048
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7915525436401367,
      "learning_rate": 6.0393059303885325e-05,
      "loss": 2.8625,
      "step": 183049
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6167681217193604,
      "learning_rate": 6.039059785913664e-05,
      "loss": 2.9515,
      "step": 183050
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5546138286590576,
      "learning_rate": 6.038813645893578e-05,
      "loss": 3.3351,
      "step": 183051
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.540900468826294,
      "learning_rate": 6.038567510328338e-05,
      "loss": 2.9697,
      "step": 183052
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5491299629211426,
      "learning_rate": 6.038321379217979e-05,
      "loss": 3.0078,
      "step": 183053
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.859557867050171,
      "learning_rate": 6.038075252562549e-05,
      "loss": 3.0447,
      "step": 183054
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8046305179595947,
      "learning_rate": 6.037829130362085e-05,
      "loss": 2.9869,
      "step": 183055
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.2576277256011963,
      "learning_rate": 6.0375830126166514e-05,
      "loss": 2.8307,
      "step": 183056
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.408484935760498,
      "learning_rate": 6.03733689932627e-05,
      "loss": 2.8788,
      "step": 183057
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5909950733184814,
      "learning_rate": 6.0370907904910127e-05,
      "loss": 2.94,
      "step": 183058
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9090235233306885,
      "learning_rate": 6.036844686110911e-05,
      "loss": 2.8102,
      "step": 183059
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.030425071716309,
      "learning_rate": 6.036598586186012e-05,
      "loss": 2.7816,
      "step": 183060
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0833613872528076,
      "learning_rate": 6.0363524907163563e-05,
      "loss": 2.7855,
      "step": 183061
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.8693883419036865,
      "learning_rate": 6.036106399702e-05,
      "loss": 2.7841,
      "step": 183062
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.029155254364014,
      "learning_rate": 6.035860313142976e-05,
      "loss": 2.7783,
      "step": 183063
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.227597236633301,
      "learning_rate": 6.035614231039349e-05,
      "loss": 2.9483,
      "step": 183064
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.8812758922576904,
      "learning_rate": 6.03536815339115e-05,
      "loss": 2.8505,
      "step": 183065
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.3204612731933594,
      "learning_rate": 6.035122080198428e-05,
      "loss": 2.7526,
      "step": 183066
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7717878818511963,
      "learning_rate": 6.034876011461225e-05,
      "loss": 2.976,
      "step": 183067
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5062427520751953,
      "learning_rate": 6.034629947179595e-05,
      "loss": 2.8023,
      "step": 183068
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.1568472385406494,
      "learning_rate": 6.0343838873535734e-05,
      "loss": 2.8377,
      "step": 183069
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6396048069000244,
      "learning_rate": 6.034137831983221e-05,
      "loss": 2.6637,
      "step": 183070
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8059511184692383,
      "learning_rate": 6.033891781068575e-05,
      "loss": 2.971,
      "step": 183071
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0414202213287354,
      "learning_rate": 6.033645734609678e-05,
      "loss": 3.1697,
      "step": 183072
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.413054943084717,
      "learning_rate": 6.0333996926065734e-05,
      "loss": 2.9864,
      "step": 183073
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.019930362701416,
      "learning_rate": 6.033153655059321e-05,
      "loss": 2.7286,
      "step": 183074
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.5367956161499023,
      "learning_rate": 6.032907621967947e-05,
      "loss": 2.8419,
      "step": 183075
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7896759510040283,
      "learning_rate": 6.032661593332516e-05,
      "loss": 2.9757,
      "step": 183076
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.566826581954956,
      "learning_rate": 6.032415569153067e-05,
      "loss": 2.9356,
      "step": 183077
    },
    {
      "epoch": 2.38,
      "grad_norm": 6.513172626495361,
      "learning_rate": 6.032169549429644e-05,
      "loss": 2.7838,
      "step": 183078
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.940805912017822,
      "learning_rate": 6.031923534162286e-05,
      "loss": 2.9271,
      "step": 183079
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.878351926803589,
      "learning_rate": 6.031677523351054e-05,
      "loss": 2.7268,
      "step": 183080
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.636704921722412,
      "learning_rate": 6.031431516995977e-05,
      "loss": 2.9142,
      "step": 183081
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.932835578918457,
      "learning_rate": 6.031185515097115e-05,
      "loss": 2.9946,
      "step": 183082
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5145998001098633,
      "learning_rate": 6.0309395176545094e-05,
      "loss": 2.9231,
      "step": 183083
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.2372636795043945,
      "learning_rate": 6.0306935246682055e-05,
      "loss": 3.046,
      "step": 183084
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.34527325630188,
      "learning_rate": 6.0304475361382395e-05,
      "loss": 2.9413,
      "step": 183085
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9763708114624023,
      "learning_rate": 6.0302015520646766e-05,
      "loss": 2.7419,
      "step": 183086
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.48050594329834,
      "learning_rate": 6.029955572447541e-05,
      "loss": 2.9725,
      "step": 183087
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.535477638244629,
      "learning_rate": 6.0297095972868984e-05,
      "loss": 3.0192,
      "step": 183088
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.728346586227417,
      "learning_rate": 6.0294636265827835e-05,
      "loss": 2.8455,
      "step": 183089
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6432816982269287,
      "learning_rate": 6.0292176603352335e-05,
      "loss": 2.9895,
      "step": 183090
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.024990081787109,
      "learning_rate": 6.028971698544316e-05,
      "loss": 3.2269,
      "step": 183091
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.930037260055542,
      "learning_rate": 6.028725741210066e-05,
      "loss": 2.9327,
      "step": 183092
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.3888235092163086,
      "learning_rate": 6.0284797883325186e-05,
      "loss": 2.6568,
      "step": 183093
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.93117356300354,
      "learning_rate": 6.028233839911739e-05,
      "loss": 2.7465,
      "step": 183094
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.633655309677124,
      "learning_rate": 6.027987895947751e-05,
      "loss": 3.0316,
      "step": 183095
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.452685594558716,
      "learning_rate": 6.027741956440625e-05,
      "loss": 2.833,
      "step": 183096
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.4269826412200928,
      "learning_rate": 6.027496021390393e-05,
      "loss": 3.214,
      "step": 183097
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.4065401554107666,
      "learning_rate": 6.0272500907971e-05,
      "loss": 2.7924,
      "step": 183098
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.01701021194458,
      "learning_rate": 6.027004164660788e-05,
      "loss": 2.9682,
      "step": 183099
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.634589910507202,
      "learning_rate": 6.0267582429815176e-05,
      "loss": 2.9388,
      "step": 183100
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6774299144744873,
      "learning_rate": 6.026512325759315e-05,
      "loss": 2.8812,
      "step": 183101
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.5137674808502197,
      "learning_rate": 6.026266412994244e-05,
      "loss": 2.7928,
      "step": 183102
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.827479362487793,
      "learning_rate": 6.026020504686344e-05,
      "loss": 2.7392,
      "step": 183103
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.194577693939209,
      "learning_rate": 6.0257746008356525e-05,
      "loss": 2.7854,
      "step": 183104
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8985037803649902,
      "learning_rate": 6.0255287014422284e-05,
      "loss": 2.7979,
      "step": 183105
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.486530065536499,
      "learning_rate": 6.0252828065061085e-05,
      "loss": 3.0466,
      "step": 183106
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9822041988372803,
      "learning_rate": 6.025036916027337e-05,
      "loss": 3.1052,
      "step": 183107
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.3894996643066406,
      "learning_rate": 6.02479103000597e-05,
      "loss": 2.9692,
      "step": 183108
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.428647756576538,
      "learning_rate": 6.024545148442047e-05,
      "loss": 2.8733,
      "step": 183109
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.668405532836914,
      "learning_rate": 6.0242992713356055e-05,
      "loss": 2.8614,
      "step": 183110
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5366499423980713,
      "learning_rate": 6.024053398686708e-05,
      "loss": 2.8749,
      "step": 183111
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5411062240600586,
      "learning_rate": 6.023807530495385e-05,
      "loss": 3.1861,
      "step": 183112
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.4608333110809326,
      "learning_rate": 6.0235616667616956e-05,
      "loss": 2.6936,
      "step": 183113
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.835386037826538,
      "learning_rate": 6.023315807485678e-05,
      "loss": 3.0185,
      "step": 183114
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.506608724594116,
      "learning_rate": 6.0230699526673773e-05,
      "loss": 2.8439,
      "step": 183115
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.0543313026428223,
      "learning_rate": 6.0228241023068345e-05,
      "loss": 3.0889,
      "step": 183116
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.5531866550445557,
      "learning_rate": 6.022578256404109e-05,
      "loss": 3.1397,
      "step": 183117
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6080856323242188,
      "learning_rate": 6.022332414959231e-05,
      "loss": 3.0697,
      "step": 183118
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8848319053649902,
      "learning_rate": 6.02208657797226e-05,
      "loss": 2.8607,
      "step": 183119
    },
    {
      "epoch": 2.38,
      "grad_norm": 4.311959266662598,
      "learning_rate": 6.021840745443237e-05,
      "loss": 3.0119,
      "step": 183120
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0907397270202637,
      "learning_rate": 6.0215949173722075e-05,
      "loss": 2.7873,
      "step": 183121
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.733933687210083,
      "learning_rate": 6.021349093759209e-05,
      "loss": 2.8306,
      "step": 183122
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.770967483520508,
      "learning_rate": 6.021103274604301e-05,
      "loss": 2.7074,
      "step": 183123
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6362366676330566,
      "learning_rate": 6.020857459907513e-05,
      "loss": 3.003,
      "step": 183124
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.7513253688812256,
      "learning_rate": 6.020611649668909e-05,
      "loss": 3.1476,
      "step": 183125
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6952033042907715,
      "learning_rate": 6.020365843888526e-05,
      "loss": 2.8848,
      "step": 183126
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.483290910720825,
      "learning_rate": 6.02012004256641e-05,
      "loss": 3.2069,
      "step": 183127
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7833383083343506,
      "learning_rate": 6.0198742457026e-05,
      "loss": 2.9271,
      "step": 183128
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7647716999053955,
      "learning_rate": 6.019628453297155e-05,
      "loss": 2.7044,
      "step": 183129
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.572143793106079,
      "learning_rate": 6.0193826653501034e-05,
      "loss": 2.9113,
      "step": 183130
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7076900005340576,
      "learning_rate": 6.0191368818615114e-05,
      "loss": 2.8955,
      "step": 183131
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9586925506591797,
      "learning_rate": 6.0188911028314135e-05,
      "loss": 3.1152,
      "step": 183132
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.299046516418457,
      "learning_rate": 6.018645328259859e-05,
      "loss": 2.9912,
      "step": 183133
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7122490406036377,
      "learning_rate": 6.018399558146881e-05,
      "loss": 3.0499,
      "step": 183134
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7715706825256348,
      "learning_rate": 6.018153792492544e-05,
      "loss": 3.1049,
      "step": 183135
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.83156418800354,
      "learning_rate": 6.017908031296873e-05,
      "loss": 3.0495,
      "step": 183136
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.875126600265503,
      "learning_rate": 6.017662274559939e-05,
      "loss": 2.9755,
      "step": 183137
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6044349670410156,
      "learning_rate": 6.017416522281772e-05,
      "loss": 3.2713,
      "step": 183138
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.592921018600464,
      "learning_rate": 6.017170774462421e-05,
      "loss": 2.7998,
      "step": 183139
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.7411012649536133,
      "learning_rate": 6.01692503110192e-05,
      "loss": 3.0938,
      "step": 183140
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.820103883743286,
      "learning_rate": 6.016679292200336e-05,
      "loss": 2.9636,
      "step": 183141
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.760927677154541,
      "learning_rate": 6.016433557757695e-05,
      "loss": 3.0183,
      "step": 183142
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.008408308029175,
      "learning_rate": 6.01618782777406e-05,
      "loss": 3.0192,
      "step": 183143
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.9993066787719727,
      "learning_rate": 6.015942102249466e-05,
      "loss": 2.8329,
      "step": 183144
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8986687660217285,
      "learning_rate": 6.015696381183964e-05,
      "loss": 3.2772,
      "step": 183145
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.123457193374634,
      "learning_rate": 6.015450664577589e-05,
      "loss": 3.0956,
      "step": 183146
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0328598022460938,
      "learning_rate": 6.0152049524304e-05,
      "loss": 3.013,
      "step": 183147
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8256306648254395,
      "learning_rate": 6.014959244742431e-05,
      "loss": 3.0501,
      "step": 183148
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4841012954711914,
      "learning_rate": 6.0147135415137415e-05,
      "loss": 2.5295,
      "step": 183149
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.803415060043335,
      "learning_rate": 6.014467842744372e-05,
      "loss": 3.0375,
      "step": 183150
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4106764793395996,
      "learning_rate": 6.014222148434361e-05,
      "loss": 3.1497,
      "step": 183151
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.72084641456604,
      "learning_rate": 6.0139764585837536e-05,
      "loss": 2.8155,
      "step": 183152
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.65366268157959,
      "learning_rate": 6.0137307731926086e-05,
      "loss": 2.7636,
      "step": 183153
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.4358954429626465,
      "learning_rate": 6.0134850922609566e-05,
      "loss": 2.9364,
      "step": 183154
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.865138530731201,
      "learning_rate": 6.013239415788857e-05,
      "loss": 3.022,
      "step": 183155
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.833695411682129,
      "learning_rate": 6.0129937437763476e-05,
      "loss": 3.0049,
      "step": 183156
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.06703519821167,
      "learning_rate": 6.012748076223477e-05,
      "loss": 2.9687,
      "step": 183157
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6929421424865723,
      "learning_rate": 6.012502413130282e-05,
      "loss": 2.8567,
      "step": 183158
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.800640344619751,
      "learning_rate": 6.0122567544968234e-05,
      "loss": 2.7452,
      "step": 183159
    },
    {
      "epoch": 2.38,
      "grad_norm": 5.241742134094238,
      "learning_rate": 6.0120111003231306e-05,
      "loss": 2.8501,
      "step": 183160
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.6203839778900146,
      "learning_rate": 6.0117654506092675e-05,
      "loss": 2.633,
      "step": 183161
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.798961877822876,
      "learning_rate": 6.011519805355259e-05,
      "loss": 2.8825,
      "step": 183162
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.49241042137146,
      "learning_rate": 6.011274164561181e-05,
      "loss": 2.8356,
      "step": 183163
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.165262460708618,
      "learning_rate": 6.011028528227041e-05,
      "loss": 3.0866,
      "step": 183164
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.8978118896484375,
      "learning_rate": 6.010782896352914e-05,
      "loss": 2.9225,
      "step": 183165
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.466549873352051,
      "learning_rate": 6.0105372689388256e-05,
      "loss": 2.9546,
      "step": 183166
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6906545162200928,
      "learning_rate": 6.0102916459848394e-05,
      "loss": 2.7869,
      "step": 183167
    },
    {
      "epoch": 2.38,
      "grad_norm": 3.0766146183013916,
      "learning_rate": 6.010046027490986e-05,
      "loss": 2.9063,
      "step": 183168
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.6551058292388916,
      "learning_rate": 6.0098004134573305e-05,
      "loss": 3.0819,
      "step": 183169
    },
    {
      "epoch": 2.38,
      "grad_norm": 2.813462018966675,
      "learning_rate": 6.0095548038838946e-05,
      "loss": 2.9289,
      "step": 183170
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0248122215270996,
      "learning_rate": 6.009309198770741e-05,
      "loss": 2.7232,
      "step": 183171
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.559441089630127,
      "learning_rate": 6.0090635981179025e-05,
      "loss": 2.8378,
      "step": 183172
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4949862957000732,
      "learning_rate": 6.00881800192544e-05,
      "loss": 3.0555,
      "step": 183173
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.850590229034424,
      "learning_rate": 6.008572410193382e-05,
      "loss": 2.884,
      "step": 183174
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.2704946994781494,
      "learning_rate": 6.00832682292179e-05,
      "loss": 2.8693,
      "step": 183175
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.8747944831848145,
      "learning_rate": 6.008081240110706e-05,
      "loss": 2.843,
      "step": 183176
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.830091953277588,
      "learning_rate": 6.007835661760171e-05,
      "loss": 2.7593,
      "step": 183177
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.604745388031006,
      "learning_rate": 6.007590087870224e-05,
      "loss": 2.919,
      "step": 183178
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.725076675415039,
      "learning_rate": 6.007344518440926e-05,
      "loss": 3.0158,
      "step": 183179
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7992923259735107,
      "learning_rate": 6.00709895347231e-05,
      "loss": 3.0488,
      "step": 183180
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8383524417877197,
      "learning_rate": 6.0068533929644355e-05,
      "loss": 3.0401,
      "step": 183181
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8111283779144287,
      "learning_rate": 6.0066078369173364e-05,
      "loss": 3.1477,
      "step": 183182
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.5069899559020996,
      "learning_rate": 6.006362285331065e-05,
      "loss": 2.8536,
      "step": 183183
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7826991081237793,
      "learning_rate": 6.006116738205653e-05,
      "loss": 2.75,
      "step": 183184
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.112147331237793,
      "learning_rate": 6.0058711955411685e-05,
      "loss": 2.8765,
      "step": 183185
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.7878735065460205,
      "learning_rate": 6.005625657337636e-05,
      "loss": 2.9019,
      "step": 183186
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6594443321228027,
      "learning_rate": 6.005380123595115e-05,
      "loss": 3.1631,
      "step": 183187
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6998977661132812,
      "learning_rate": 6.005134594313652e-05,
      "loss": 2.7692,
      "step": 183188
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.145969867706299,
      "learning_rate": 6.004889069493277e-05,
      "loss": 2.6336,
      "step": 183189
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.675043821334839,
      "learning_rate": 6.004643549134054e-05,
      "loss": 2.92,
      "step": 183190
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.3879613876342773,
      "learning_rate": 6.004398033236022e-05,
      "loss": 3.0961,
      "step": 183191
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.851663589477539,
      "learning_rate": 6.004152521799215e-05,
      "loss": 2.8859,
      "step": 183192
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.449943780899048,
      "learning_rate": 6.003907014823699e-05,
      "loss": 2.8675,
      "step": 183193
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7876501083374023,
      "learning_rate": 6.003661512309508e-05,
      "loss": 3.1102,
      "step": 183194
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.3659276962280273,
      "learning_rate": 6.003416014256681e-05,
      "loss": 2.8166,
      "step": 183195
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9253532886505127,
      "learning_rate": 6.003170520665283e-05,
      "loss": 2.7353,
      "step": 183196
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8115439414978027,
      "learning_rate": 6.002925031535345e-05,
      "loss": 2.904,
      "step": 183197
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.632608413696289,
      "learning_rate": 6.002679546866912e-05,
      "loss": 3.0221,
      "step": 183198
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8607912063598633,
      "learning_rate": 6.0024340666600405e-05,
      "loss": 2.9529,
      "step": 183199
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.563924789428711,
      "learning_rate": 6.002188590914769e-05,
      "loss": 3.3919,
      "step": 183200
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.805321216583252,
      "learning_rate": 6.001943119631133e-05,
      "loss": 3.0386,
      "step": 183201
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9933817386627197,
      "learning_rate": 6.001697652809201e-05,
      "loss": 2.882,
      "step": 183202
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5865604877471924,
      "learning_rate": 6.001452190448997e-05,
      "loss": 3.0057,
      "step": 183203
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.56769061088562,
      "learning_rate": 6.001206732550583e-05,
      "loss": 2.8714,
      "step": 183204
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8243370056152344,
      "learning_rate": 6.000961279113997e-05,
      "loss": 2.8443,
      "step": 183205
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6032259464263916,
      "learning_rate": 6.0007158301392885e-05,
      "loss": 2.9002,
      "step": 183206
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.765540838241577,
      "learning_rate": 6.0004703856264915e-05,
      "loss": 2.6778,
      "step": 183207
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1087512969970703,
      "learning_rate": 6.000224945575665e-05,
      "loss": 2.7839,
      "step": 183208
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0528244972229004,
      "learning_rate": 5.999979509986842e-05,
      "loss": 2.8104,
      "step": 183209
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.520153045654297,
      "learning_rate": 5.9997340788600866e-05,
      "loss": 2.7543,
      "step": 183210
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7113165855407715,
      "learning_rate": 5.9994886521954325e-05,
      "loss": 2.8252,
      "step": 183211
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9850733280181885,
      "learning_rate": 5.999243229992925e-05,
      "loss": 3.1252,
      "step": 183212
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.799920082092285,
      "learning_rate": 5.998997812252605e-05,
      "loss": 2.7957,
      "step": 183213
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.644939422607422,
      "learning_rate": 5.9987523989745325e-05,
      "loss": 3.0161,
      "step": 183214
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7703404426574707,
      "learning_rate": 5.998506990158737e-05,
      "loss": 3.3333,
      "step": 183215
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.953968048095703,
      "learning_rate": 5.998261585805278e-05,
      "loss": 3.1656,
      "step": 183216
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5789787769317627,
      "learning_rate": 5.998016185914197e-05,
      "loss": 2.6164,
      "step": 183217
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6517910957336426,
      "learning_rate": 5.99777079048554e-05,
      "loss": 3.0043,
      "step": 183218
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.794247627258301,
      "learning_rate": 5.9975253995193395e-05,
      "loss": 2.934,
      "step": 183219
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5791258811950684,
      "learning_rate": 5.99728001301566e-05,
      "loss": 2.8162,
      "step": 183220
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.03349494934082,
      "learning_rate": 5.997034630974533e-05,
      "loss": 2.8752,
      "step": 183221
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0903894901275635,
      "learning_rate": 5.996789253396017e-05,
      "loss": 2.7916,
      "step": 183222
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.383530616760254,
      "learning_rate": 5.9965438802801514e-05,
      "loss": 2.9781,
      "step": 183223
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.4187824726104736,
      "learning_rate": 5.996298511626982e-05,
      "loss": 2.9882,
      "step": 183224
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.937849283218384,
      "learning_rate": 5.9960531474365435e-05,
      "loss": 2.8654,
      "step": 183225
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.986025333404541,
      "learning_rate": 5.995807787708902e-05,
      "loss": 2.6076,
      "step": 183226
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1743600368499756,
      "learning_rate": 5.995562432444083e-05,
      "loss": 3.1024,
      "step": 183227
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.083500862121582,
      "learning_rate": 5.9953170816421514e-05,
      "loss": 2.9002,
      "step": 183228
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.3045687675476074,
      "learning_rate": 5.995071735303136e-05,
      "loss": 3.2447,
      "step": 183229
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.759188652038574,
      "learning_rate": 5.994826393427108e-05,
      "loss": 2.8247,
      "step": 183230
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9393556118011475,
      "learning_rate": 5.994581056014076e-05,
      "loss": 2.955,
      "step": 183231
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0207104682922363,
      "learning_rate": 5.994335723064111e-05,
      "loss": 2.7626,
      "step": 183232
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4849958419799805,
      "learning_rate": 5.9940903945772454e-05,
      "loss": 2.9421,
      "step": 183233
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.960196018218994,
      "learning_rate": 5.99384507055354e-05,
      "loss": 2.8807,
      "step": 183234
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.2309396266937256,
      "learning_rate": 5.993599750993024e-05,
      "loss": 2.5734,
      "step": 183235
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4880361557006836,
      "learning_rate": 5.993354435895769e-05,
      "loss": 2.8804,
      "step": 183236
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8133645057678223,
      "learning_rate": 5.993109125261786e-05,
      "loss": 2.792,
      "step": 183237
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0262081623077393,
      "learning_rate": 5.9928638190911426e-05,
      "loss": 2.9816,
      "step": 183238
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.812014579772949,
      "learning_rate": 5.992618517383873e-05,
      "loss": 2.9742,
      "step": 183239
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8174359798431396,
      "learning_rate": 5.9923732201400357e-05,
      "loss": 2.8581,
      "step": 183240
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.457704782485962,
      "learning_rate": 5.992127927359661e-05,
      "loss": 2.8063,
      "step": 183241
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.904906988143921,
      "learning_rate": 5.99188263904282e-05,
      "loss": 2.8911,
      "step": 183242
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.828444480895996,
      "learning_rate": 5.991637355189525e-05,
      "loss": 2.9621,
      "step": 183243
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8786935806274414,
      "learning_rate": 5.991392075799846e-05,
      "loss": 2.7401,
      "step": 183244
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.438194513320923,
      "learning_rate": 5.99114680087381e-05,
      "loss": 2.9251,
      "step": 183245
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7853105068206787,
      "learning_rate": 5.9909015304114826e-05,
      "loss": 3.1126,
      "step": 183246
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5370867252349854,
      "learning_rate": 5.990656264412892e-05,
      "loss": 2.7857,
      "step": 183247
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9546327590942383,
      "learning_rate": 5.9904110028781104e-05,
      "loss": 2.7643,
      "step": 183248
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7337355613708496,
      "learning_rate": 5.990165745807145e-05,
      "loss": 2.8884,
      "step": 183249
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6675379276275635,
      "learning_rate": 5.989920493200069e-05,
      "loss": 2.9331,
      "step": 183250
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.493518590927124,
      "learning_rate": 5.9896752450569155e-05,
      "loss": 2.7785,
      "step": 183251
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.2042229175567627,
      "learning_rate": 5.989430001377738e-05,
      "loss": 2.9551,
      "step": 183252
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.779806613922119,
      "learning_rate": 5.989184762162572e-05,
      "loss": 3.1472,
      "step": 183253
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7870845794677734,
      "learning_rate": 5.9889395274114894e-05,
      "loss": 2.842,
      "step": 183254
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.795307159423828,
      "learning_rate": 5.9886942971244954e-05,
      "loss": 3.0119,
      "step": 183255
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.207899808883667,
      "learning_rate": 5.988449071301667e-05,
      "loss": 2.9085,
      "step": 183256
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.701096773147583,
      "learning_rate": 5.9882038499430315e-05,
      "loss": 2.7675,
      "step": 183257
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1076409816741943,
      "learning_rate": 5.987958633048648e-05,
      "loss": 3.0522,
      "step": 183258
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.283977508544922,
      "learning_rate": 5.987713420618551e-05,
      "loss": 2.9321,
      "step": 183259
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.942814588546753,
      "learning_rate": 5.987468212652805e-05,
      "loss": 2.8406,
      "step": 183260
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.062385320663452,
      "learning_rate": 5.987223009151425e-05,
      "loss": 2.9389,
      "step": 183261
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.780937671661377,
      "learning_rate": 5.986977810114484e-05,
      "loss": 2.8994,
      "step": 183262
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4368667602539062,
      "learning_rate": 5.986732615542008e-05,
      "loss": 3.0482,
      "step": 183263
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6919140815734863,
      "learning_rate": 5.986487425434061e-05,
      "loss": 3.1311,
      "step": 183264
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.439194202423096,
      "learning_rate": 5.9862422397906695e-05,
      "loss": 3.1313,
      "step": 183265
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.525839328765869,
      "learning_rate": 5.9859970586119e-05,
      "loss": 2.9375,
      "step": 183266
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.257864952087402,
      "learning_rate": 5.9857518818977824e-05,
      "loss": 2.7318,
      "step": 183267
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9436538219451904,
      "learning_rate": 5.98550670964837e-05,
      "loss": 2.8602,
      "step": 183268
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.620624303817749,
      "learning_rate": 5.9852615418636965e-05,
      "loss": 3.0253,
      "step": 183269
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.2109432220458984,
      "learning_rate": 5.985016378543824e-05,
      "loss": 3.0277,
      "step": 183270
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5246410369873047,
      "learning_rate": 5.984771219688781e-05,
      "loss": 2.9212,
      "step": 183271
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.031745433807373,
      "learning_rate": 5.9845260652986324e-05,
      "loss": 2.9406,
      "step": 183272
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.119086742401123,
      "learning_rate": 5.984280915373413e-05,
      "loss": 2.753,
      "step": 183273
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1755268573760986,
      "learning_rate": 5.9840357699131616e-05,
      "loss": 3.1254,
      "step": 183274
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0320937633514404,
      "learning_rate": 5.983790628917938e-05,
      "loss": 2.904,
      "step": 183275
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7291460037231445,
      "learning_rate": 5.983545492387784e-05,
      "loss": 3.1297,
      "step": 183276
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0881245136260986,
      "learning_rate": 5.98330036032273e-05,
      "loss": 2.9274,
      "step": 183277
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.780120611190796,
      "learning_rate": 5.9830552327228456e-05,
      "loss": 3.0428,
      "step": 183278
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4595956802368164,
      "learning_rate": 5.982810109588162e-05,
      "loss": 2.9506,
      "step": 183279
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.67044997215271,
      "learning_rate": 5.98256499091872e-05,
      "loss": 2.9864,
      "step": 183280
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6175029277801514,
      "learning_rate": 5.9823198767145795e-05,
      "loss": 2.8528,
      "step": 183281
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8631174564361572,
      "learning_rate": 5.9820747669757805e-05,
      "loss": 3.1489,
      "step": 183282
    },
    {
      "epoch": 2.39,
      "grad_norm": 5.3282246589660645,
      "learning_rate": 5.9818296617023595e-05,
      "loss": 2.7893,
      "step": 183283
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.077397108078003,
      "learning_rate": 5.981584560894377e-05,
      "loss": 3.134,
      "step": 183284
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8957183361053467,
      "learning_rate": 5.981339464551872e-05,
      "loss": 2.8996,
      "step": 183285
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9671738147735596,
      "learning_rate": 5.981094372674882e-05,
      "loss": 2.6987,
      "step": 183286
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0083799362182617,
      "learning_rate": 5.980849285263466e-05,
      "loss": 2.9034,
      "step": 183287
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0381104946136475,
      "learning_rate": 5.980604202317658e-05,
      "loss": 3.0936,
      "step": 183288
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8035426139831543,
      "learning_rate": 5.980359123837515e-05,
      "loss": 2.8919,
      "step": 183289
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1840944290161133,
      "learning_rate": 5.980114049823076e-05,
      "loss": 3.1236,
      "step": 183290
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4965615272521973,
      "learning_rate": 5.979868980274388e-05,
      "loss": 2.8867,
      "step": 183291
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.15338397026062,
      "learning_rate": 5.979623915191488e-05,
      "loss": 2.9382,
      "step": 183292
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.9250030517578125,
      "learning_rate": 5.979378854574439e-05,
      "loss": 3.0653,
      "step": 183293
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.484170436859131,
      "learning_rate": 5.979133798423268e-05,
      "loss": 2.9462,
      "step": 183294
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8864331245422363,
      "learning_rate": 5.978888746738038e-05,
      "loss": 3.0705,
      "step": 183295
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5806853771209717,
      "learning_rate": 5.978643699518781e-05,
      "loss": 3.0543,
      "step": 183296
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8495781421661377,
      "learning_rate": 5.9783986567655525e-05,
      "loss": 2.793,
      "step": 183297
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.876577854156494,
      "learning_rate": 5.978153618478385e-05,
      "loss": 2.8458,
      "step": 183298
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.2995898723602295,
      "learning_rate": 5.9779085846573385e-05,
      "loss": 2.6591,
      "step": 183299
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7653868198394775,
      "learning_rate": 5.977663555302445e-05,
      "loss": 2.7789,
      "step": 183300
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.895040273666382,
      "learning_rate": 5.977418530413767e-05,
      "loss": 2.9503,
      "step": 183301
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.577198028564453,
      "learning_rate": 5.977173509991329e-05,
      "loss": 3.0823,
      "step": 183302
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.888920545578003,
      "learning_rate": 5.9769284940352044e-05,
      "loss": 3.0571,
      "step": 183303
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.538518905639648,
      "learning_rate": 5.9766834825454076e-05,
      "loss": 3.0432,
      "step": 183304
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9241394996643066,
      "learning_rate": 5.976438475522009e-05,
      "loss": 2.9812,
      "step": 183305
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.987164258956909,
      "learning_rate": 5.976193472965033e-05,
      "loss": 2.9594,
      "step": 183306
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.503359317779541,
      "learning_rate": 5.9759484748745445e-05,
      "loss": 2.8796,
      "step": 183307
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.84539532661438,
      "learning_rate": 5.975703481250573e-05,
      "loss": 3.0238,
      "step": 183308
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.7808918952941895,
      "learning_rate": 5.975458492093186e-05,
      "loss": 2.8802,
      "step": 183309
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.5465586185455322,
      "learning_rate": 5.975213507402402e-05,
      "loss": 2.9827,
      "step": 183310
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0106201171875,
      "learning_rate": 5.9749685271782854e-05,
      "loss": 3.041,
      "step": 183311
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.512512445449829,
      "learning_rate": 5.974723551420869e-05,
      "loss": 2.8709,
      "step": 183312
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0464656352996826,
      "learning_rate": 5.9744785801302096e-05,
      "loss": 2.9732,
      "step": 183313
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5451624393463135,
      "learning_rate": 5.974233613306344e-05,
      "loss": 2.9587,
      "step": 183314
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.3122377395629883,
      "learning_rate": 5.973988650949335e-05,
      "loss": 2.9518,
      "step": 183315
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.176069259643555,
      "learning_rate": 5.973743693059203e-05,
      "loss": 3.0333,
      "step": 183316
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.360804796218872,
      "learning_rate": 5.973498739636011e-05,
      "loss": 2.7616,
      "step": 183317
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.939971923828125,
      "learning_rate": 5.97325379067979e-05,
      "loss": 3.1426,
      "step": 183318
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1116435527801514,
      "learning_rate": 5.9730088461906046e-05,
      "loss": 2.8889,
      "step": 183319
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.375929117202759,
      "learning_rate": 5.9727639061684795e-05,
      "loss": 2.9562,
      "step": 183320
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8014886379241943,
      "learning_rate": 5.972518970613491e-05,
      "loss": 3.0825,
      "step": 183321
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1222567558288574,
      "learning_rate": 5.9722740395256466e-05,
      "loss": 3.2007,
      "step": 183322
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.8486812114715576,
      "learning_rate": 5.972029112905018e-05,
      "loss": 2.8333,
      "step": 183323
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.6866331100463867,
      "learning_rate": 5.971784190751636e-05,
      "loss": 3.2227,
      "step": 183324
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9695627689361572,
      "learning_rate": 5.9715392730655607e-05,
      "loss": 2.8938,
      "step": 183325
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.762101650238037,
      "learning_rate": 5.971294359846819e-05,
      "loss": 2.7983,
      "step": 183326
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5845072269439697,
      "learning_rate": 5.9710494510954865e-05,
      "loss": 3.1665,
      "step": 183327
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.67284893989563,
      "learning_rate": 5.970804546811571e-05,
      "loss": 2.8959,
      "step": 183328
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7958762645721436,
      "learning_rate": 5.970559646995148e-05,
      "loss": 2.8682,
      "step": 183329
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.010600566864014,
      "learning_rate": 5.970314751646241e-05,
      "loss": 2.98,
      "step": 183330
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1253583431243896,
      "learning_rate": 5.970069860764914e-05,
      "loss": 3.2205,
      "step": 183331
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7770683765411377,
      "learning_rate": 5.969824974351197e-05,
      "loss": 2.872,
      "step": 183332
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9718151092529297,
      "learning_rate": 5.96958009240516e-05,
      "loss": 2.7026,
      "step": 183333
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4055991172790527,
      "learning_rate": 5.9693352149268146e-05,
      "loss": 3.0114,
      "step": 183334
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0273828506469727,
      "learning_rate": 5.969090341916233e-05,
      "loss": 2.9116,
      "step": 183335
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9964356422424316,
      "learning_rate": 5.96884547337344e-05,
      "loss": 2.8793,
      "step": 183336
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.2002062797546387,
      "learning_rate": 5.968600609298504e-05,
      "loss": 2.8409,
      "step": 183337
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8823940753936768,
      "learning_rate": 5.968355749691447e-05,
      "loss": 2.9962,
      "step": 183338
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8915634155273438,
      "learning_rate": 5.968110894552346e-05,
      "loss": 3.0048,
      "step": 183339
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.495053291320801,
      "learning_rate": 5.967866043881208e-05,
      "loss": 2.9807,
      "step": 183340
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.908933401107788,
      "learning_rate": 5.9676211976781054e-05,
      "loss": 2.7519,
      "step": 183341
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.46164870262146,
      "learning_rate": 5.9673763559430686e-05,
      "loss": 3.0594,
      "step": 183342
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.4520511627197266,
      "learning_rate": 5.967131518676158e-05,
      "loss": 2.7741,
      "step": 183343
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.066087245941162,
      "learning_rate": 5.9668866858774065e-05,
      "loss": 3.0939,
      "step": 183344
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.567744016647339,
      "learning_rate": 5.9666418575468775e-05,
      "loss": 3.0444,
      "step": 183345
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.509827136993408,
      "learning_rate": 5.966397033684585e-05,
      "loss": 3.0947,
      "step": 183346
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.573744297027588,
      "learning_rate": 5.966152214290604e-05,
      "loss": 2.5385,
      "step": 183347
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5531790256500244,
      "learning_rate": 5.965907399364962e-05,
      "loss": 2.9234,
      "step": 183348
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.997689962387085,
      "learning_rate": 5.965662588907716e-05,
      "loss": 3.0745,
      "step": 183349
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.979363441467285,
      "learning_rate": 5.965417782918902e-05,
      "loss": 3.1398,
      "step": 183350
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.858867883682251,
      "learning_rate": 5.965172981398577e-05,
      "loss": 3.0207,
      "step": 183351
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4170329570770264,
      "learning_rate": 5.964928184346781e-05,
      "loss": 3.0404,
      "step": 183352
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.4440736770629883,
      "learning_rate": 5.964683391763557e-05,
      "loss": 2.8547,
      "step": 183353
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.834836483001709,
      "learning_rate": 5.964438603648946e-05,
      "loss": 2.796,
      "step": 183354
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.175039529800415,
      "learning_rate": 5.964193820003006e-05,
      "loss": 2.9258,
      "step": 183355
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.476254940032959,
      "learning_rate": 5.9639490408257686e-05,
      "loss": 2.7086,
      "step": 183356
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6345667839050293,
      "learning_rate": 5.963704266117296e-05,
      "loss": 2.8538,
      "step": 183357
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8951122760772705,
      "learning_rate": 5.963459495877623e-05,
      "loss": 2.7564,
      "step": 183358
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7410643100738525,
      "learning_rate": 5.963214730106798e-05,
      "loss": 3.3965,
      "step": 183359
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.709899425506592,
      "learning_rate": 5.962969968804855e-05,
      "loss": 2.9077,
      "step": 183360
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1062517166137695,
      "learning_rate": 5.962725211971857e-05,
      "loss": 2.9095,
      "step": 183361
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.6842801570892334,
      "learning_rate": 5.962480459607838e-05,
      "loss": 2.7426,
      "step": 183362
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.4573822021484375,
      "learning_rate": 5.962235711712854e-05,
      "loss": 3.056,
      "step": 183363
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.35797119140625,
      "learning_rate": 5.961990968286942e-05,
      "loss": 2.889,
      "step": 183364
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8549764156341553,
      "learning_rate": 5.961746229330144e-05,
      "loss": 2.9555,
      "step": 183365
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.917567491531372,
      "learning_rate": 5.9615014948425186e-05,
      "loss": 3.0752,
      "step": 183366
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.189198017120361,
      "learning_rate": 5.961256764824102e-05,
      "loss": 2.8102,
      "step": 183367
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1072778701782227,
      "learning_rate": 5.961012039274936e-05,
      "loss": 2.9572,
      "step": 183368
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.506477355957031,
      "learning_rate": 5.960767318195079e-05,
      "loss": 2.9842,
      "step": 183369
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.7879364490509033,
      "learning_rate": 5.96052260158457e-05,
      "loss": 3.0056,
      "step": 183370
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.115811824798584,
      "learning_rate": 5.960277889443442e-05,
      "loss": 2.7261,
      "step": 183371
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6330535411834717,
      "learning_rate": 5.960033181771763e-05,
      "loss": 3.0483,
      "step": 183372
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.980191707611084,
      "learning_rate": 5.959788478569558e-05,
      "loss": 2.8681,
      "step": 183373
    },
    {
      "epoch": 2.39,
      "grad_norm": 5.423569679260254,
      "learning_rate": 5.9595437798368915e-05,
      "loss": 3.0645,
      "step": 183374
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8034822940826416,
      "learning_rate": 5.9592990855737996e-05,
      "loss": 2.8636,
      "step": 183375
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.85806131362915,
      "learning_rate": 5.95905439578033e-05,
      "loss": 2.894,
      "step": 183376
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.6884989738464355,
      "learning_rate": 5.958809710456514e-05,
      "loss": 3.044,
      "step": 183377
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0009520053863525,
      "learning_rate": 5.958565029602419e-05,
      "loss": 2.8615,
      "step": 183378
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8574142456054688,
      "learning_rate": 5.958320353218072e-05,
      "loss": 2.9339,
      "step": 183379
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.904696226119995,
      "learning_rate": 5.9580756813035366e-05,
      "loss": 2.9596,
      "step": 183380
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.780817985534668,
      "learning_rate": 5.957831013858846e-05,
      "loss": 3.0157,
      "step": 183381
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4489715099334717,
      "learning_rate": 5.957586350884053e-05,
      "loss": 3.102,
      "step": 183382
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.75388765335083,
      "learning_rate": 5.957341692379187e-05,
      "loss": 2.9049,
      "step": 183383
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4460971355438232,
      "learning_rate": 5.957097038344313e-05,
      "loss": 3.0858,
      "step": 183384
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.887012481689453,
      "learning_rate": 5.956852388779459e-05,
      "loss": 2.9712,
      "step": 183385
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.532688617706299,
      "learning_rate": 5.9566077436846905e-05,
      "loss": 2.9235,
      "step": 183386
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8354852199554443,
      "learning_rate": 5.956363103060036e-05,
      "loss": 3.0483,
      "step": 183387
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.2085254192352295,
      "learning_rate": 5.9561184669055615e-05,
      "loss": 2.7952,
      "step": 183388
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.4158363342285156,
      "learning_rate": 5.955873835221282e-05,
      "loss": 2.8389,
      "step": 183389
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5505738258361816,
      "learning_rate": 5.955629208007266e-05,
      "loss": 2.8547,
      "step": 183390
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.778515338897705,
      "learning_rate": 5.955384585263548e-05,
      "loss": 2.8951,
      "step": 183391
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.763906240463257,
      "learning_rate": 5.955139966990184e-05,
      "loss": 3.0155,
      "step": 183392
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7162890434265137,
      "learning_rate": 5.9548953531872045e-05,
      "loss": 2.7977,
      "step": 183393
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.679694890975952,
      "learning_rate": 5.954650743854682e-05,
      "loss": 3.0728,
      "step": 183394
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.882741689682007,
      "learning_rate": 5.954406138992627e-05,
      "loss": 2.7797,
      "step": 183395
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.655095100402832,
      "learning_rate": 5.954161538601109e-05,
      "loss": 2.9864,
      "step": 183396
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7244255542755127,
      "learning_rate": 5.953916942680158e-05,
      "loss": 2.939,
      "step": 183397
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1246562004089355,
      "learning_rate": 5.953672351229838e-05,
      "loss": 2.6199,
      "step": 183398
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.748480796813965,
      "learning_rate": 5.953427764250175e-05,
      "loss": 2.9128,
      "step": 183399
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9810171127319336,
      "learning_rate": 5.953183181741239e-05,
      "loss": 2.7492,
      "step": 183400
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.30983304977417,
      "learning_rate": 5.952938603703044e-05,
      "loss": 2.9359,
      "step": 183401
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.114468812942505,
      "learning_rate": 5.9526940301356586e-05,
      "loss": 3.1564,
      "step": 183402
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.820666790008545,
      "learning_rate": 5.952449461039117e-05,
      "loss": 3.1003,
      "step": 183403
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8065805435180664,
      "learning_rate": 5.9522048964134725e-05,
      "loss": 3.0181,
      "step": 183404
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.378324031829834,
      "learning_rate": 5.951960336258762e-05,
      "loss": 3.0922,
      "step": 183405
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.3885464668273926,
      "learning_rate": 5.9517157805750515e-05,
      "loss": 2.9138,
      "step": 183406
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6424944400787354,
      "learning_rate": 5.9514712293623545e-05,
      "loss": 3.0718,
      "step": 183407
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8387160301208496,
      "learning_rate": 5.951226682620741e-05,
      "loss": 2.9928,
      "step": 183408
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0829660892486572,
      "learning_rate": 5.950982140350241e-05,
      "loss": 2.717,
      "step": 183409
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4514853954315186,
      "learning_rate": 5.9507376025509176e-05,
      "loss": 2.8698,
      "step": 183410
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.3455395698547363,
      "learning_rate": 5.950493069222795e-05,
      "loss": 3.0164,
      "step": 183411
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.620173454284668,
      "learning_rate": 5.950248540365945e-05,
      "loss": 2.9386,
      "step": 183412
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.223146915435791,
      "learning_rate": 5.950004015980385e-05,
      "loss": 2.8892,
      "step": 183413
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.711482286453247,
      "learning_rate": 5.949759496066179e-05,
      "loss": 3.0731,
      "step": 183414
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4707446098327637,
      "learning_rate": 5.9495149806233586e-05,
      "loss": 2.9406,
      "step": 183415
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4832959175109863,
      "learning_rate": 5.949270469651985e-05,
      "loss": 3.1454,
      "step": 183416
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8147270679473877,
      "learning_rate": 5.949025963152089e-05,
      "loss": 2.9196,
      "step": 183417
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.610104560852051,
      "learning_rate": 5.948781461123738e-05,
      "loss": 2.952,
      "step": 183418
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.721602439880371,
      "learning_rate": 5.9485369635669475e-05,
      "loss": 2.9501,
      "step": 183419
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.366107940673828,
      "learning_rate": 5.948292470481787e-05,
      "loss": 3.0517,
      "step": 183420
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0304713249206543,
      "learning_rate": 5.9480479818682826e-05,
      "loss": 2.8442,
      "step": 183421
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.2670629024505615,
      "learning_rate": 5.947803497726501e-05,
      "loss": 2.7024,
      "step": 183422
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6915104389190674,
      "learning_rate": 5.947559018056466e-05,
      "loss": 3.1181,
      "step": 183423
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7721259593963623,
      "learning_rate": 5.9473145428582504e-05,
      "loss": 2.7744,
      "step": 183424
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8757731914520264,
      "learning_rate": 5.9470700721318675e-05,
      "loss": 2.6467,
      "step": 183425
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.720407485961914,
      "learning_rate": 5.9468256058773876e-05,
      "loss": 3.1007,
      "step": 183426
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.504693031311035,
      "learning_rate": 5.946581144094838e-05,
      "loss": 2.8484,
      "step": 183427
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.18569016456604,
      "learning_rate": 5.9463366867842796e-05,
      "loss": 3.2012,
      "step": 183428
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6391422748565674,
      "learning_rate": 5.946092233945745e-05,
      "loss": 2.7482,
      "step": 183429
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.578270673751831,
      "learning_rate": 5.945847785579293e-05,
      "loss": 3.0373,
      "step": 183430
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.5528616905212402,
      "learning_rate": 5.9456033416849604e-05,
      "loss": 2.9691,
      "step": 183431
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9448204040527344,
      "learning_rate": 5.945358902262797e-05,
      "loss": 3.0103,
      "step": 183432
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.626800775527954,
      "learning_rate": 5.945114467312836e-05,
      "loss": 2.9045,
      "step": 183433
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4070661067962646,
      "learning_rate": 5.944870036835141e-05,
      "loss": 2.9346,
      "step": 183434
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.993528127670288,
      "learning_rate": 5.944625610829742e-05,
      "loss": 3.0938,
      "step": 183435
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.755197525024414,
      "learning_rate": 5.944381189296695e-05,
      "loss": 2.9456,
      "step": 183436
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.4855329990386963,
      "learning_rate": 5.9441367722360444e-05,
      "loss": 2.9447,
      "step": 183437
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.3562328815460205,
      "learning_rate": 5.9438923596478325e-05,
      "loss": 2.8708,
      "step": 183438
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.541553497314453,
      "learning_rate": 5.943647951532097e-05,
      "loss": 2.8915,
      "step": 183439
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7362887859344482,
      "learning_rate": 5.9434035478889e-05,
      "loss": 2.6718,
      "step": 183440
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6707992553710938,
      "learning_rate": 5.943159148718272e-05,
      "loss": 2.9475,
      "step": 183441
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0198094844818115,
      "learning_rate": 5.9429147540202694e-05,
      "loss": 3.1897,
      "step": 183442
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7866079807281494,
      "learning_rate": 5.9426703637949355e-05,
      "loss": 2.906,
      "step": 183443
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.07161808013916,
      "learning_rate": 5.942425978042311e-05,
      "loss": 2.8792,
      "step": 183444
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.683098793029785,
      "learning_rate": 5.9421815967624386e-05,
      "loss": 2.7616,
      "step": 183445
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.743168354034424,
      "learning_rate": 5.941937219955375e-05,
      "loss": 2.8656,
      "step": 183446
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.4017865657806396,
      "learning_rate": 5.94169284762115e-05,
      "loss": 3.008,
      "step": 183447
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.3849501609802246,
      "learning_rate": 5.941448479759827e-05,
      "loss": 3.0958,
      "step": 183448
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5171682834625244,
      "learning_rate": 5.9412041163714466e-05,
      "loss": 2.7905,
      "step": 183449
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.180950164794922,
      "learning_rate": 5.940959757456038e-05,
      "loss": 2.972,
      "step": 183450
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5295679569244385,
      "learning_rate": 5.9407154030136704e-05,
      "loss": 2.8923,
      "step": 183451
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.618051052093506,
      "learning_rate": 5.9404710530443756e-05,
      "loss": 2.955,
      "step": 183452
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0563507080078125,
      "learning_rate": 5.940226707548196e-05,
      "loss": 3.0958,
      "step": 183453
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.2374815940856934,
      "learning_rate": 5.939982366525188e-05,
      "loss": 3.0111,
      "step": 183454
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9862940311431885,
      "learning_rate": 5.9397380299753915e-05,
      "loss": 3.0104,
      "step": 183455
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.683885335922241,
      "learning_rate": 5.939493697898844e-05,
      "loss": 3.1549,
      "step": 183456
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.567166566848755,
      "learning_rate": 5.939249370295608e-05,
      "loss": 2.9278,
      "step": 183457
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.637725830078125,
      "learning_rate": 5.9390050471657106e-05,
      "loss": 3.0465,
      "step": 183458
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8488082885742188,
      "learning_rate": 5.938760728509214e-05,
      "loss": 3.1512,
      "step": 183459
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.536036968231201,
      "learning_rate": 5.9385164143261566e-05,
      "loss": 2.9434,
      "step": 183460
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7618536949157715,
      "learning_rate": 5.938272104616584e-05,
      "loss": 2.8849,
      "step": 183461
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.3121724128723145,
      "learning_rate": 5.938027799380533e-05,
      "loss": 3.129,
      "step": 183462
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8463330268859863,
      "learning_rate": 5.9377834986180625e-05,
      "loss": 2.8848,
      "step": 183463
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.952186584472656,
      "learning_rate": 5.937539202329204e-05,
      "loss": 2.8923,
      "step": 183464
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7437822818756104,
      "learning_rate": 5.9372949105140244e-05,
      "loss": 2.7001,
      "step": 183465
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.602970123291016,
      "learning_rate": 5.937050623172552e-05,
      "loss": 2.9277,
      "step": 183466
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5263867378234863,
      "learning_rate": 5.9368063403048346e-05,
      "loss": 2.8408,
      "step": 183467
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7671971321105957,
      "learning_rate": 5.9365620619109155e-05,
      "loss": 2.812,
      "step": 183468
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.822171449661255,
      "learning_rate": 5.9363177879908476e-05,
      "loss": 3.0078,
      "step": 183469
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.330718994140625,
      "learning_rate": 5.9360735185446674e-05,
      "loss": 2.9527,
      "step": 183470
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.859431266784668,
      "learning_rate": 5.935829253572432e-05,
      "loss": 2.9754,
      "step": 183471
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.694708824157715,
      "learning_rate": 5.93558499307417e-05,
      "loss": 3.0872,
      "step": 183472
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4977529048919678,
      "learning_rate": 5.9353407370499576e-05,
      "loss": 3.0193,
      "step": 183473
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0062198638916016,
      "learning_rate": 5.9350964854998014e-05,
      "loss": 2.944,
      "step": 183474
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.85250186920166,
      "learning_rate": 5.934852238423771e-05,
      "loss": 2.7986,
      "step": 183475
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.767333745956421,
      "learning_rate": 5.9346079958219005e-05,
      "loss": 2.5345,
      "step": 183476
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.791980266571045,
      "learning_rate": 5.934363757694248e-05,
      "loss": 2.982,
      "step": 183477
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1539113521575928,
      "learning_rate": 5.9341195240408435e-05,
      "loss": 2.8646,
      "step": 183478
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1059439182281494,
      "learning_rate": 5.9338752948617565e-05,
      "loss": 2.8828,
      "step": 183479
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7065625190734863,
      "learning_rate": 5.933631070157e-05,
      "loss": 2.8215,
      "step": 183480
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.63968563079834,
      "learning_rate": 5.9333868499266444e-05,
      "loss": 2.8028,
      "step": 183481
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7326817512512207,
      "learning_rate": 5.933142634170717e-05,
      "loss": 2.8875,
      "step": 183482
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.793041944503784,
      "learning_rate": 5.932898422889283e-05,
      "loss": 2.8101,
      "step": 183483
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7449042797088623,
      "learning_rate": 5.9326542160823696e-05,
      "loss": 2.9256,
      "step": 183484
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.7863099575042725,
      "learning_rate": 5.932410013750044e-05,
      "loss": 2.7871,
      "step": 183485
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5863144397735596,
      "learning_rate": 5.9321658158923225e-05,
      "loss": 3.0543,
      "step": 183486
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7911901473999023,
      "learning_rate": 5.9319216225092714e-05,
      "loss": 2.9535,
      "step": 183487
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6514577865600586,
      "learning_rate": 5.931677433600928e-05,
      "loss": 2.8183,
      "step": 183488
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.606419324874878,
      "learning_rate": 5.9314332491673444e-05,
      "loss": 2.9977,
      "step": 183489
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.262526512145996,
      "learning_rate": 5.931189069208552e-05,
      "loss": 2.8808,
      "step": 183490
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.447657346725464,
      "learning_rate": 5.930944893724626e-05,
      "loss": 2.9761,
      "step": 183491
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0453450679779053,
      "learning_rate": 5.930700722715571e-05,
      "loss": 2.7771,
      "step": 183492
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.42518949508667,
      "learning_rate": 5.9304565561814666e-05,
      "loss": 3.0213,
      "step": 183493
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.3044466972351074,
      "learning_rate": 5.930212394122332e-05,
      "loss": 2.7601,
      "step": 183494
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0418524742126465,
      "learning_rate": 5.929968236538235e-05,
      "loss": 2.8729,
      "step": 183495
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.7967331409454346,
      "learning_rate": 5.929724083429205e-05,
      "loss": 3.0507,
      "step": 183496
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6756539344787598,
      "learning_rate": 5.929479934795298e-05,
      "loss": 3.0002,
      "step": 183497
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1505658626556396,
      "learning_rate": 5.929235790636558e-05,
      "loss": 2.9408,
      "step": 183498
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0601532459259033,
      "learning_rate": 5.928991650953026e-05,
      "loss": 2.9558,
      "step": 183499
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0012598037719727,
      "learning_rate": 5.9287475157447395e-05,
      "loss": 3.1618,
      "step": 183500
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.670910120010376,
      "learning_rate": 5.9285033850117634e-05,
      "loss": 2.7213,
      "step": 183501
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.295325994491577,
      "learning_rate": 5.9282592587541245e-05,
      "loss": 2.9842,
      "step": 183502
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8299083709716797,
      "learning_rate": 5.928015136971885e-05,
      "loss": 2.9808,
      "step": 183503
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1945621967315674,
      "learning_rate": 5.927771019665083e-05,
      "loss": 2.6864,
      "step": 183504
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6861355304718018,
      "learning_rate": 5.927526906833761e-05,
      "loss": 2.9738,
      "step": 183505
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5890190601348877,
      "learning_rate": 5.927282798477958e-05,
      "loss": 2.9848,
      "step": 183506
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8115713596343994,
      "learning_rate": 5.9270386945977355e-05,
      "loss": 2.959,
      "step": 183507
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.205632448196411,
      "learning_rate": 5.926794595193123e-05,
      "loss": 2.6871,
      "step": 183508
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.619250774383545,
      "learning_rate": 5.926550500264183e-05,
      "loss": 3.2336,
      "step": 183509
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.634507179260254,
      "learning_rate": 5.92630640981095e-05,
      "loss": 2.8386,
      "step": 183510
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.644331693649292,
      "learning_rate": 5.926062323833474e-05,
      "loss": 3.0667,
      "step": 183511
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9056286811828613,
      "learning_rate": 5.925818242331787e-05,
      "loss": 3.0918,
      "step": 183512
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.3833203315734863,
      "learning_rate": 5.925574165305953e-05,
      "loss": 2.7648,
      "step": 183513
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0659704208374023,
      "learning_rate": 5.925330092755999e-05,
      "loss": 2.7014,
      "step": 183514
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.64121150970459,
      "learning_rate": 5.925086024681991e-05,
      "loss": 3.0469,
      "step": 183515
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0425221920013428,
      "learning_rate": 5.924841961083966e-05,
      "loss": 2.9566,
      "step": 183516
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.572345733642578,
      "learning_rate": 5.924597901961964e-05,
      "loss": 2.917,
      "step": 183517
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.814096689224243,
      "learning_rate": 5.924353847316028e-05,
      "loss": 2.8799,
      "step": 183518
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8016343116760254,
      "learning_rate": 5.924109797146215e-05,
      "loss": 2.988,
      "step": 183519
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.878188133239746,
      "learning_rate": 5.923865751452558e-05,
      "loss": 2.8328,
      "step": 183520
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9177558422088623,
      "learning_rate": 5.923621710235117e-05,
      "loss": 2.9822,
      "step": 183521
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.2535030841827393,
      "learning_rate": 5.923377673493929e-05,
      "loss": 3.2374,
      "step": 183522
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.110026836395264,
      "learning_rate": 5.923133641229036e-05,
      "loss": 2.9857,
      "step": 183523
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.658046245574951,
      "learning_rate": 5.922889613440484e-05,
      "loss": 2.9913,
      "step": 183524
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9063971042633057,
      "learning_rate": 5.922645590128326e-05,
      "loss": 2.8361,
      "step": 183525
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.5065317153930664,
      "learning_rate": 5.9224015712925956e-05,
      "loss": 2.943,
      "step": 183526
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8769075870513916,
      "learning_rate": 5.9221575569333535e-05,
      "loss": 2.6835,
      "step": 183527
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9060161113739014,
      "learning_rate": 5.921913547050637e-05,
      "loss": 2.8668,
      "step": 183528
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1447715759277344,
      "learning_rate": 5.921669541644494e-05,
      "loss": 2.9128,
      "step": 183529
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.929959774017334,
      "learning_rate": 5.921425540714956e-05,
      "loss": 2.7789,
      "step": 183530
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4660019874572754,
      "learning_rate": 5.921181544262087e-05,
      "loss": 3.1705,
      "step": 183531
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5933644771575928,
      "learning_rate": 5.920937552285917e-05,
      "loss": 2.861,
      "step": 183532
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.69734787940979,
      "learning_rate": 5.9206935647865096e-05,
      "loss": 2.8324,
      "step": 183533
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8061270713806152,
      "learning_rate": 5.920449581763898e-05,
      "loss": 2.8673,
      "step": 183534
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.811293840408325,
      "learning_rate": 5.9202056032181214e-05,
      "loss": 2.9888,
      "step": 183535
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.5200886726379395,
      "learning_rate": 5.91996162914924e-05,
      "loss": 2.9794,
      "step": 183536
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.546233892440796,
      "learning_rate": 5.9197176595572945e-05,
      "loss": 3.1209,
      "step": 183537
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7540040016174316,
      "learning_rate": 5.9194736944423186e-05,
      "loss": 2.9367,
      "step": 183538
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6420741081237793,
      "learning_rate": 5.919229733804377e-05,
      "loss": 3.0592,
      "step": 183539
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.361290216445923,
      "learning_rate": 5.918985777643501e-05,
      "loss": 2.8839,
      "step": 183540
    },
    {
      "epoch": 2.39,
      "grad_norm": 6.558485507965088,
      "learning_rate": 5.9187418259597375e-05,
      "loss": 3.1338,
      "step": 183541
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.560339450836182,
      "learning_rate": 5.9184978787531386e-05,
      "loss": 2.7132,
      "step": 183542
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.272711992263794,
      "learning_rate": 5.918253936023746e-05,
      "loss": 3.1328,
      "step": 183543
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.562885284423828,
      "learning_rate": 5.918009997771598e-05,
      "loss": 2.5315,
      "step": 183544
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9782543182373047,
      "learning_rate": 5.917766063996755e-05,
      "loss": 2.9921,
      "step": 183545
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.695887327194214,
      "learning_rate": 5.917522134699251e-05,
      "loss": 2.9089,
      "step": 183546
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.274144649505615,
      "learning_rate": 5.917278209879128e-05,
      "loss": 2.6476,
      "step": 183547
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.3363327980041504,
      "learning_rate": 5.917034289536448e-05,
      "loss": 3.1472,
      "step": 183548
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5565879344940186,
      "learning_rate": 5.9167903736712355e-05,
      "loss": 3.0364,
      "step": 183549
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.828204393386841,
      "learning_rate": 5.916546462283551e-05,
      "loss": 2.8412,
      "step": 183550
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.867173433303833,
      "learning_rate": 5.916302555373439e-05,
      "loss": 2.7699,
      "step": 183551
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.849008321762085,
      "learning_rate": 5.9160586529409415e-05,
      "loss": 2.8132,
      "step": 183552
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.844186544418335,
      "learning_rate": 5.9158147549860925e-05,
      "loss": 3.1459,
      "step": 183553
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.503695487976074,
      "learning_rate": 5.915570861508958e-05,
      "loss": 2.7844,
      "step": 183554
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.726764678955078,
      "learning_rate": 5.915326972509565e-05,
      "loss": 3.0669,
      "step": 183555
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.952932596206665,
      "learning_rate": 5.915083087987977e-05,
      "loss": 2.8373,
      "step": 183556
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.956446409225464,
      "learning_rate": 5.9148392079442206e-05,
      "loss": 2.6644,
      "step": 183557
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.608318328857422,
      "learning_rate": 5.9145953323783654e-05,
      "loss": 2.9825,
      "step": 183558
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.43200945854187,
      "learning_rate": 5.914351461290425e-05,
      "loss": 2.9105,
      "step": 183559
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.8093783855438232,
      "learning_rate": 5.914107594680473e-05,
      "loss": 2.7864,
      "step": 183560
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.237049102783203,
      "learning_rate": 5.9138637325485317e-05,
      "loss": 2.8404,
      "step": 183561
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.67075777053833,
      "learning_rate": 5.913619874894665e-05,
      "loss": 3.0698,
      "step": 183562
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.696200132369995,
      "learning_rate": 5.913376021718906e-05,
      "loss": 2.5436,
      "step": 183563
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.858186960220337,
      "learning_rate": 5.913132173021312e-05,
      "loss": 3.0812,
      "step": 183564
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5808634757995605,
      "learning_rate": 5.912888328801919e-05,
      "loss": 2.8819,
      "step": 183565
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.727459192276001,
      "learning_rate": 5.912644489060777e-05,
      "loss": 2.8623,
      "step": 183566
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.2280871868133545,
      "learning_rate": 5.912400653797923e-05,
      "loss": 2.9789,
      "step": 183567
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.843217134475708,
      "learning_rate": 5.912156823013413e-05,
      "loss": 2.9323,
      "step": 183568
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.588290214538574,
      "learning_rate": 5.911912996707281e-05,
      "loss": 2.9073,
      "step": 183569
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.653998613357544,
      "learning_rate": 5.9116691748795867e-05,
      "loss": 2.7863,
      "step": 183570
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.999709129333496,
      "learning_rate": 5.9114253575303696e-05,
      "loss": 2.8187,
      "step": 183571
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.633188009262085,
      "learning_rate": 5.911181544659673e-05,
      "loss": 3.0109,
      "step": 183572
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.887504816055298,
      "learning_rate": 5.9109377362675314e-05,
      "loss": 2.8219,
      "step": 183573
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4882311820983887,
      "learning_rate": 5.9106939323540106e-05,
      "loss": 2.8548,
      "step": 183574
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8283631801605225,
      "learning_rate": 5.91045013291914e-05,
      "loss": 3.0605,
      "step": 183575
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.647815704345703,
      "learning_rate": 5.910206337962977e-05,
      "loss": 2.8405,
      "step": 183576
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8008499145507812,
      "learning_rate": 5.909962547485565e-05,
      "loss": 2.9672,
      "step": 183577
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.3881659507751465,
      "learning_rate": 5.9097187614869434e-05,
      "loss": 2.9079,
      "step": 183578
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.566585063934326,
      "learning_rate": 5.909474979967152e-05,
      "loss": 2.7464,
      "step": 183579
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.819988489151001,
      "learning_rate": 5.9092312029262525e-05,
      "loss": 2.8517,
      "step": 183580
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9730751514434814,
      "learning_rate": 5.9089874303642725e-05,
      "loss": 2.9619,
      "step": 183581
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.749108076095581,
      "learning_rate": 5.908743662281273e-05,
      "loss": 2.9112,
      "step": 183582
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8489458560943604,
      "learning_rate": 5.908499898677298e-05,
      "loss": 2.9171,
      "step": 183583
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.514697790145874,
      "learning_rate": 5.908256139552383e-05,
      "loss": 3.0715,
      "step": 183584
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.620773792266846,
      "learning_rate": 5.908012384906572e-05,
      "loss": 3.0408,
      "step": 183585
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.603893756866455,
      "learning_rate": 5.907768634739925e-05,
      "loss": 3.0417,
      "step": 183586
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.598757743835449,
      "learning_rate": 5.907524889052467e-05,
      "loss": 3.1557,
      "step": 183587
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.652261972427368,
      "learning_rate": 5.907281147844267e-05,
      "loss": 2.9355,
      "step": 183588
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6613266468048096,
      "learning_rate": 5.907037411115354e-05,
      "loss": 3.0354,
      "step": 183589
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6892895698547363,
      "learning_rate": 5.906793678865781e-05,
      "loss": 2.9656,
      "step": 183590
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.831808567047119,
      "learning_rate": 5.9065499510955816e-05,
      "loss": 3.0433,
      "step": 183591
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7306032180786133,
      "learning_rate": 5.9063062278048156e-05,
      "loss": 3.0435,
      "step": 183592
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8035449981689453,
      "learning_rate": 5.906062508993516e-05,
      "loss": 2.7587,
      "step": 183593
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4835448265075684,
      "learning_rate": 5.9058187946617406e-05,
      "loss": 2.8586,
      "step": 183594
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.537519931793213,
      "learning_rate": 5.9055750848095284e-05,
      "loss": 3.0881,
      "step": 183595
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6881191730499268,
      "learning_rate": 5.905331379436926e-05,
      "loss": 3.0884,
      "step": 183596
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6410064697265625,
      "learning_rate": 5.90508767854397e-05,
      "loss": 3.0246,
      "step": 183597
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9785749912261963,
      "learning_rate": 5.904843982130718e-05,
      "loss": 2.961,
      "step": 183598
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.819824695587158,
      "learning_rate": 5.9046002901972055e-05,
      "loss": 2.6939,
      "step": 183599
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.138589382171631,
      "learning_rate": 5.904356602743486e-05,
      "loss": 2.7578,
      "step": 183600
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.734748601913452,
      "learning_rate": 5.9041129197696066e-05,
      "loss": 2.9189,
      "step": 183601
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8946290016174316,
      "learning_rate": 5.903869241275604e-05,
      "loss": 3.1016,
      "step": 183602
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8793177604675293,
      "learning_rate": 5.903625567261521e-05,
      "loss": 2.9401,
      "step": 183603
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6376349925994873,
      "learning_rate": 5.903381897727417e-05,
      "loss": 3.1999,
      "step": 183604
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9451029300689697,
      "learning_rate": 5.90313823267332e-05,
      "loss": 2.9,
      "step": 183605
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6122853755950928,
      "learning_rate": 5.902894572099292e-05,
      "loss": 2.9392,
      "step": 183606
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.668943405151367,
      "learning_rate": 5.902650916005371e-05,
      "loss": 3.1788,
      "step": 183607
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9490039348602295,
      "learning_rate": 5.902407264391603e-05,
      "loss": 3.1783,
      "step": 183608
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8468070030212402,
      "learning_rate": 5.902163617258024e-05,
      "loss": 3.0221,
      "step": 183609
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8160862922668457,
      "learning_rate": 5.901919974604695e-05,
      "loss": 3.0226,
      "step": 183610
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.72414231300354,
      "learning_rate": 5.9016763364316486e-05,
      "loss": 2.8801,
      "step": 183611
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.001997709274292,
      "learning_rate": 5.9014327027389416e-05,
      "loss": 3.0918,
      "step": 183612
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.521660804748535,
      "learning_rate": 5.901189073526611e-05,
      "loss": 3.0256,
      "step": 183613
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.685271739959717,
      "learning_rate": 5.9009454487947064e-05,
      "loss": 3.0002,
      "step": 183614
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.981886625289917,
      "learning_rate": 5.9007018285432636e-05,
      "loss": 3.1263,
      "step": 183615
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5818428993225098,
      "learning_rate": 5.900458212772341e-05,
      "loss": 2.6582,
      "step": 183616
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.2198991775512695,
      "learning_rate": 5.900214601481974e-05,
      "loss": 3.0176,
      "step": 183617
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8327994346618652,
      "learning_rate": 5.899970994672216e-05,
      "loss": 2.8225,
      "step": 183618
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.3690528869628906,
      "learning_rate": 5.899727392343111e-05,
      "loss": 3.0508,
      "step": 183619
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.3337275981903076,
      "learning_rate": 5.8994837944946914e-05,
      "loss": 3.0528,
      "step": 183620
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.013951301574707,
      "learning_rate": 5.8992402011270214e-05,
      "loss": 3.2222,
      "step": 183621
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.5479257106781006,
      "learning_rate": 5.89899661224014e-05,
      "loss": 2.9479,
      "step": 183622
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.421332597732544,
      "learning_rate": 5.898753027834078e-05,
      "loss": 2.9964,
      "step": 183623
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1920111179351807,
      "learning_rate": 5.898509447908905e-05,
      "loss": 3.1212,
      "step": 183624
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.847024917602539,
      "learning_rate": 5.8982658724646504e-05,
      "loss": 3.1846,
      "step": 183625
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.841057538986206,
      "learning_rate": 5.898022301501358e-05,
      "loss": 3.0716,
      "step": 183626
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1106181144714355,
      "learning_rate": 5.897778735019085e-05,
      "loss": 3.0491,
      "step": 183627
    },
    {
      "epoch": 2.39,
      "grad_norm": 6.760796546936035,
      "learning_rate": 5.897535173017868e-05,
      "loss": 2.7688,
      "step": 183628
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.961716413497925,
      "learning_rate": 5.897291615497749e-05,
      "loss": 2.902,
      "step": 183629
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.16939640045166,
      "learning_rate": 5.897048062458786e-05,
      "loss": 2.6364,
      "step": 183630
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.834932565689087,
      "learning_rate": 5.896804513901008e-05,
      "loss": 3.026,
      "step": 183631
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.976588726043701,
      "learning_rate": 5.8965609698244754e-05,
      "loss": 3.0975,
      "step": 183632
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.3709068298339844,
      "learning_rate": 5.896317430229228e-05,
      "loss": 2.9809,
      "step": 183633
    },
    {
      "epoch": 2.39,
      "grad_norm": 5.4387969970703125,
      "learning_rate": 5.896073895115303e-05,
      "loss": 2.7529,
      "step": 183634
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.521810531616211,
      "learning_rate": 5.895830364482763e-05,
      "loss": 2.8513,
      "step": 183635
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.732424020767212,
      "learning_rate": 5.8955868383316386e-05,
      "loss": 3.0541,
      "step": 183636
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.300023317337036,
      "learning_rate": 5.895343316661976e-05,
      "loss": 2.7534,
      "step": 183637
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.414839506149292,
      "learning_rate": 5.895099799473828e-05,
      "loss": 3.0094,
      "step": 183638
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.4633567333221436,
      "learning_rate": 5.894856286767239e-05,
      "loss": 2.7134,
      "step": 183639
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.418308734893799,
      "learning_rate": 5.8946127785422415e-05,
      "loss": 2.8106,
      "step": 183640
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.396094560623169,
      "learning_rate": 5.8943692747989027e-05,
      "loss": 3.0238,
      "step": 183641
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.04832649230957,
      "learning_rate": 5.894125775537252e-05,
      "loss": 2.8803,
      "step": 183642
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.2610039710998535,
      "learning_rate": 5.893882280757329e-05,
      "loss": 2.944,
      "step": 183643
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.3163866996765137,
      "learning_rate": 5.8936387904591985e-05,
      "loss": 3.1401,
      "step": 183644
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.761503219604492,
      "learning_rate": 5.893395304642896e-05,
      "loss": 2.6597,
      "step": 183645
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9483933448791504,
      "learning_rate": 5.8931518233084554e-05,
      "loss": 3.0341,
      "step": 183646
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1164348125457764,
      "learning_rate": 5.8929083464559426e-05,
      "loss": 2.7903,
      "step": 183647
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.8930256366729736,
      "learning_rate": 5.8926648740853875e-05,
      "loss": 3.0816,
      "step": 183648
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.940788745880127,
      "learning_rate": 5.892421406196844e-05,
      "loss": 3.0165,
      "step": 183649
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.6038978099823,
      "learning_rate": 5.8921779427903594e-05,
      "loss": 2.6124,
      "step": 183650
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.479285478591919,
      "learning_rate": 5.891934483865972e-05,
      "loss": 2.6973,
      "step": 183651
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.347783088684082,
      "learning_rate": 5.891691029423723e-05,
      "loss": 3.1719,
      "step": 183652
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.535182476043701,
      "learning_rate": 5.8914475794636687e-05,
      "loss": 2.8796,
      "step": 183653
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9432904720306396,
      "learning_rate": 5.891204133985842e-05,
      "loss": 2.8847,
      "step": 183654
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.4226040840148926,
      "learning_rate": 5.890960692990303e-05,
      "loss": 3.2494,
      "step": 183655
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8919239044189453,
      "learning_rate": 5.890717256477092e-05,
      "loss": 2.8631,
      "step": 183656
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6727559566497803,
      "learning_rate": 5.890473824446249e-05,
      "loss": 3.0413,
      "step": 183657
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9674112796783447,
      "learning_rate": 5.890230396897817e-05,
      "loss": 3.1842,
      "step": 183658
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.409623146057129,
      "learning_rate": 5.889986973831853e-05,
      "loss": 3.0121,
      "step": 183659
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.40848445892334,
      "learning_rate": 5.8897435552483866e-05,
      "loss": 2.7405,
      "step": 183660
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.214942693710327,
      "learning_rate": 5.889500141147481e-05,
      "loss": 2.908,
      "step": 183661
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.891662359237671,
      "learning_rate": 5.889256731529173e-05,
      "loss": 3.0305,
      "step": 183662
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.802070140838623,
      "learning_rate": 5.889013326393506e-05,
      "loss": 3.1712,
      "step": 183663
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.941765069961548,
      "learning_rate": 5.888769925740516e-05,
      "loss": 2.7347,
      "step": 183664
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6714766025543213,
      "learning_rate": 5.888526529570271e-05,
      "loss": 3.0655,
      "step": 183665
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.262077808380127,
      "learning_rate": 5.888283137882797e-05,
      "loss": 3.0127,
      "step": 183666
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.2346880435943604,
      "learning_rate": 5.88803975067815e-05,
      "loss": 2.7868,
      "step": 183667
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.462860345840454,
      "learning_rate": 5.887796367956373e-05,
      "loss": 2.9251,
      "step": 183668
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6673567295074463,
      "learning_rate": 5.887552989717511e-05,
      "loss": 2.7918,
      "step": 183669
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.399632692337036,
      "learning_rate": 5.887309615961599e-05,
      "loss": 2.707,
      "step": 183670
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.220848798751831,
      "learning_rate": 5.887066246688702e-05,
      "loss": 3.015,
      "step": 183671
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1970531940460205,
      "learning_rate": 5.8868228818988415e-05,
      "loss": 3.0403,
      "step": 183672
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.931302547454834,
      "learning_rate": 5.886579521592088e-05,
      "loss": 2.9992,
      "step": 183673
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.563384771347046,
      "learning_rate": 5.8863361657684725e-05,
      "loss": 3.0638,
      "step": 183674
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6567399501800537,
      "learning_rate": 5.886092814428044e-05,
      "loss": 2.8701,
      "step": 183675
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9461331367492676,
      "learning_rate": 5.885849467570836e-05,
      "loss": 2.8781,
      "step": 183676
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1850576400756836,
      "learning_rate": 5.885606125196914e-05,
      "loss": 2.8677,
      "step": 183677
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6123409271240234,
      "learning_rate": 5.885362787306304e-05,
      "loss": 2.6351,
      "step": 183678
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5557777881622314,
      "learning_rate": 5.8851194538990666e-05,
      "loss": 2.8426,
      "step": 183679
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9155561923980713,
      "learning_rate": 5.884876124975243e-05,
      "loss": 2.705,
      "step": 183680
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.994630813598633,
      "learning_rate": 5.8846328005348765e-05,
      "loss": 2.9331,
      "step": 183681
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.83880352973938,
      "learning_rate": 5.884389480578e-05,
      "loss": 3.1611,
      "step": 183682
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8873138427734375,
      "learning_rate": 5.884146165104684e-05,
      "loss": 2.6853,
      "step": 183683
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.549612522125244,
      "learning_rate": 5.883902854114948e-05,
      "loss": 2.6363,
      "step": 183684
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9275078773498535,
      "learning_rate": 5.883659547608859e-05,
      "loss": 2.8446,
      "step": 183685
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4943652153015137,
      "learning_rate": 5.883416245586453e-05,
      "loss": 2.8345,
      "step": 183686
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7490415573120117,
      "learning_rate": 5.883172948047777e-05,
      "loss": 2.8837,
      "step": 183687
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.027660369873047,
      "learning_rate": 5.882929654992865e-05,
      "loss": 3.1177,
      "step": 183688
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5906941890716553,
      "learning_rate": 5.8826863664217795e-05,
      "loss": 2.7413,
      "step": 183689
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7758121490478516,
      "learning_rate": 5.88244308233455e-05,
      "loss": 2.9425,
      "step": 183690
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.149357795715332,
      "learning_rate": 5.8821998027312376e-05,
      "loss": 3.0485,
      "step": 183691
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6450212001800537,
      "learning_rate": 5.881956527611879e-05,
      "loss": 2.7593,
      "step": 183692
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9875752925872803,
      "learning_rate": 5.881713256976523e-05,
      "loss": 3.0767,
      "step": 183693
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.012908458709717,
      "learning_rate": 5.881469990825199e-05,
      "loss": 3.0288,
      "step": 183694
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.646340370178223,
      "learning_rate": 5.8812267291579765e-05,
      "loss": 2.9694,
      "step": 183695
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.748976230621338,
      "learning_rate": 5.880983471974879e-05,
      "loss": 2.7142,
      "step": 183696
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.580019474029541,
      "learning_rate": 5.880740219275972e-05,
      "loss": 2.8125,
      "step": 183697
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4279348850250244,
      "learning_rate": 5.8804969710612815e-05,
      "loss": 2.7423,
      "step": 183698
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.644624948501587,
      "learning_rate": 5.880253727330877e-05,
      "loss": 2.8638,
      "step": 183699
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.277596473693848,
      "learning_rate": 5.880010488084775e-05,
      "loss": 2.8405,
      "step": 183700
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.1146979331970215,
      "learning_rate": 5.87976725332304e-05,
      "loss": 3.0704,
      "step": 183701
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.2718546390533447,
      "learning_rate": 5.879524023045704e-05,
      "loss": 3.0483,
      "step": 183702
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.218053102493286,
      "learning_rate": 5.8792807972528274e-05,
      "loss": 2.8929,
      "step": 183703
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4536314010620117,
      "learning_rate": 5.879037575944444e-05,
      "loss": 2.9998,
      "step": 183704
    },
    {
      "epoch": 2.39,
      "grad_norm": 5.7602009773254395,
      "learning_rate": 5.878794359120613e-05,
      "loss": 2.7854,
      "step": 183705
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8926496505737305,
      "learning_rate": 5.8785511467813585e-05,
      "loss": 2.8918,
      "step": 183706
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.053774118423462,
      "learning_rate": 5.878307938926739e-05,
      "loss": 2.7435,
      "step": 183707
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.353067636489868,
      "learning_rate": 5.878064735556793e-05,
      "loss": 2.9145,
      "step": 183708
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.383812189102173,
      "learning_rate": 5.87782153667158e-05,
      "loss": 2.9868,
      "step": 183709
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.9208788871765137,
      "learning_rate": 5.877578342271125e-05,
      "loss": 3.0581,
      "step": 183710
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.874269485473633,
      "learning_rate": 5.87733515235549e-05,
      "loss": 2.8463,
      "step": 183711
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5123839378356934,
      "learning_rate": 5.877091966924717e-05,
      "loss": 2.8746,
      "step": 183712
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.620425224304199,
      "learning_rate": 5.876848785978847e-05,
      "loss": 3.1731,
      "step": 183713
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.05911922454834,
      "learning_rate": 5.876605609517916e-05,
      "loss": 3.1118,
      "step": 183714
    },
    {
      "epoch": 2.39,
      "grad_norm": 6.447408676147461,
      "learning_rate": 5.87636243754199e-05,
      "loss": 2.9186,
      "step": 183715
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.977787733078003,
      "learning_rate": 5.876119270051094e-05,
      "loss": 2.9165,
      "step": 183716
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7545559406280518,
      "learning_rate": 5.87587610704529e-05,
      "loss": 2.8049,
      "step": 183717
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.888397216796875,
      "learning_rate": 5.875632948524618e-05,
      "loss": 3.0338,
      "step": 183718
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7491719722747803,
      "learning_rate": 5.8753897944891116e-05,
      "loss": 2.9265,
      "step": 183719
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9865643978118896,
      "learning_rate": 5.875146644938834e-05,
      "loss": 2.9878,
      "step": 183720
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9471020698547363,
      "learning_rate": 5.874903499873822e-05,
      "loss": 2.9575,
      "step": 183721
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8325910568237305,
      "learning_rate": 5.8746603592941155e-05,
      "loss": 3.0324,
      "step": 183722
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6620805263519287,
      "learning_rate": 5.874417223199771e-05,
      "loss": 3.0234,
      "step": 183723
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.467423677444458,
      "learning_rate": 5.8741740915908255e-05,
      "loss": 2.873,
      "step": 183724
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.204953670501709,
      "learning_rate": 5.873930964467322e-05,
      "loss": 3.1177,
      "step": 183725
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.6159393787384033,
      "learning_rate": 5.8736878418293163e-05,
      "loss": 2.8088,
      "step": 183726
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.617064952850342,
      "learning_rate": 5.873444723676847e-05,
      "loss": 3.1937,
      "step": 183727
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.138038635253906,
      "learning_rate": 5.873201610009955e-05,
      "loss": 2.5172,
      "step": 183728
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4333019256591797,
      "learning_rate": 5.8729585008286964e-05,
      "loss": 2.8194,
      "step": 183729
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.403475284576416,
      "learning_rate": 5.8727153961331086e-05,
      "loss": 3.0361,
      "step": 183730
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.445540904998779,
      "learning_rate": 5.87247229592323e-05,
      "loss": 2.9429,
      "step": 183731
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.357710361480713,
      "learning_rate": 5.8722292001991256e-05,
      "loss": 3.0308,
      "step": 183732
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5444867610931396,
      "learning_rate": 5.87198610896082e-05,
      "loss": 3.0868,
      "step": 183733
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7624974250793457,
      "learning_rate": 5.871743022208373e-05,
      "loss": 2.9161,
      "step": 183734
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7095859050750732,
      "learning_rate": 5.871499939941827e-05,
      "loss": 2.9944,
      "step": 183735
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.3100411891937256,
      "learning_rate": 5.8712568621612235e-05,
      "loss": 2.8904,
      "step": 183736
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5733916759490967,
      "learning_rate": 5.8710137888666006e-05,
      "loss": 3.0267,
      "step": 183737
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.67230486869812,
      "learning_rate": 5.8707707200580234e-05,
      "loss": 2.8496,
      "step": 183738
    },
    {
      "epoch": 2.39,
      "grad_norm": 5.353817939758301,
      "learning_rate": 5.870527655735514e-05,
      "loss": 3.106,
      "step": 183739
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.348031997680664,
      "learning_rate": 5.870284595899136e-05,
      "loss": 3.1941,
      "step": 183740
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.525495767593384,
      "learning_rate": 5.87004154054893e-05,
      "loss": 3.0105,
      "step": 183741
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.900374412536621,
      "learning_rate": 5.869798489684938e-05,
      "loss": 2.6842,
      "step": 183742
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9971346855163574,
      "learning_rate": 5.8695554433071976e-05,
      "loss": 2.9134,
      "step": 183743
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.174050807952881,
      "learning_rate": 5.869312401415772e-05,
      "loss": 2.9668,
      "step": 183744
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.5193026065826416,
      "learning_rate": 5.869069364010685e-05,
      "loss": 2.8789,
      "step": 183745
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.4022302627563477,
      "learning_rate": 5.868826331092005e-05,
      "loss": 2.6597,
      "step": 183746
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.732825994491577,
      "learning_rate": 5.868583302659763e-05,
      "loss": 2.9457,
      "step": 183747
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0518581867218018,
      "learning_rate": 5.868340278714009e-05,
      "loss": 2.8801,
      "step": 183748
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.960331916809082,
      "learning_rate": 5.868097259254779e-05,
      "loss": 2.8336,
      "step": 183749
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.824082612991333,
      "learning_rate": 5.867854244282131e-05,
      "loss": 2.7149,
      "step": 183750
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.132333278656006,
      "learning_rate": 5.867611233796097e-05,
      "loss": 2.9295,
      "step": 183751
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.7027697563171387,
      "learning_rate": 5.867368227796737e-05,
      "loss": 2.7505,
      "step": 183752
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.639270544052124,
      "learning_rate": 5.867125226284092e-05,
      "loss": 2.8952,
      "step": 183753
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.3022167682647705,
      "learning_rate": 5.866882229258201e-05,
      "loss": 3.1367,
      "step": 183754
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7706899642944336,
      "learning_rate": 5.866639236719104e-05,
      "loss": 2.7921,
      "step": 183755
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.247213125228882,
      "learning_rate": 5.8663962486668616e-05,
      "loss": 2.9551,
      "step": 183756
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1312315464019775,
      "learning_rate": 5.8661532651015064e-05,
      "loss": 2.7814,
      "step": 183757
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.663200855255127,
      "learning_rate": 5.8659102860230955e-05,
      "loss": 2.9397,
      "step": 183758
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.97860050201416,
      "learning_rate": 5.8656673114316654e-05,
      "loss": 2.923,
      "step": 183759
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1004014015197754,
      "learning_rate": 5.865424341327266e-05,
      "loss": 3.0046,
      "step": 183760
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.434102535247803,
      "learning_rate": 5.865181375709931e-05,
      "loss": 3.1197,
      "step": 183761
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8433380126953125,
      "learning_rate": 5.864938414579723e-05,
      "loss": 2.9037,
      "step": 183762
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.606699228286743,
      "learning_rate": 5.864695457936669e-05,
      "loss": 2.9433,
      "step": 183763
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.6940510272979736,
      "learning_rate": 5.864452505780836e-05,
      "loss": 2.9446,
      "step": 183764
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6288039684295654,
      "learning_rate": 5.864209558112243e-05,
      "loss": 3.1351,
      "step": 183765
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.4613068103790283,
      "learning_rate": 5.863966614930967e-05,
      "loss": 2.9937,
      "step": 183766
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8906540870666504,
      "learning_rate": 5.863723676237022e-05,
      "loss": 3.0761,
      "step": 183767
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1851797103881836,
      "learning_rate": 5.863480742030471e-05,
      "loss": 2.9,
      "step": 183768
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.556547164916992,
      "learning_rate": 5.8632378123113465e-05,
      "loss": 2.9678,
      "step": 183769
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7434959411621094,
      "learning_rate": 5.8629948870797125e-05,
      "loss": 2.921,
      "step": 183770
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.66062593460083,
      "learning_rate": 5.8627519663355925e-05,
      "loss": 2.9468,
      "step": 183771
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.698371171951294,
      "learning_rate": 5.862509050079063e-05,
      "loss": 2.7933,
      "step": 183772
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.102257251739502,
      "learning_rate": 5.862266138310127e-05,
      "loss": 2.8058,
      "step": 183773
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8396103382110596,
      "learning_rate": 5.862023231028864e-05,
      "loss": 2.9101,
      "step": 183774
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6117465496063232,
      "learning_rate": 5.8617803282352946e-05,
      "loss": 2.7701,
      "step": 183775
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.3440654277801514,
      "learning_rate": 5.861537429929486e-05,
      "loss": 3.1699,
      "step": 183776
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.040332794189453,
      "learning_rate": 5.8612945361114674e-05,
      "loss": 2.8235,
      "step": 183777
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.43979811668396,
      "learning_rate": 5.861051646781302e-05,
      "loss": 2.7015,
      "step": 183778
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.520390510559082,
      "learning_rate": 5.860808761939007e-05,
      "loss": 2.9661,
      "step": 183779
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8014233112335205,
      "learning_rate": 5.860565881584655e-05,
      "loss": 2.7524,
      "step": 183780
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7123615741729736,
      "learning_rate": 5.860323005718266e-05,
      "loss": 2.9503,
      "step": 183781
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.66640305519104,
      "learning_rate": 5.860080134339911e-05,
      "loss": 3.0419,
      "step": 183782
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.6060657501220703,
      "learning_rate": 5.859837267449612e-05,
      "loss": 2.9148,
      "step": 183783
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9820988178253174,
      "learning_rate": 5.859594405047443e-05,
      "loss": 2.8646,
      "step": 183784
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.6311678886413574,
      "learning_rate": 5.859351547133414e-05,
      "loss": 2.9486,
      "step": 183785
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.917846202850342,
      "learning_rate": 5.859108693707595e-05,
      "loss": 3.063,
      "step": 183786
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.535039186477661,
      "learning_rate": 5.858865844770019e-05,
      "loss": 3.0605,
      "step": 183787
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.863678216934204,
      "learning_rate": 5.858623000320739e-05,
      "loss": 2.7905,
      "step": 183788
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.696742534637451,
      "learning_rate": 5.858380160359789e-05,
      "loss": 2.8169,
      "step": 183789
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6440622806549072,
      "learning_rate": 5.858137324887239e-05,
      "loss": 2.9963,
      "step": 183790
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.240088701248169,
      "learning_rate": 5.8578944939031015e-05,
      "loss": 2.952,
      "step": 183791
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.679091453552246,
      "learning_rate": 5.857651667407444e-05,
      "loss": 2.9111,
      "step": 183792
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.093073844909668,
      "learning_rate": 5.8574088454002984e-05,
      "loss": 2.9494,
      "step": 183793
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.6831626892089844,
      "learning_rate": 5.8571660278817236e-05,
      "loss": 2.8088,
      "step": 183794
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9573657512664795,
      "learning_rate": 5.856923214851748e-05,
      "loss": 2.9676,
      "step": 183795
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.951185703277588,
      "learning_rate": 5.8566804063104375e-05,
      "loss": 3.0392,
      "step": 183796
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.7321527004241943,
      "learning_rate": 5.856437602257821e-05,
      "loss": 2.852,
      "step": 183797
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.959331750869751,
      "learning_rate": 5.8561948026939534e-05,
      "loss": 2.9675,
      "step": 183798
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.720480442047119,
      "learning_rate": 5.855952007618865e-05,
      "loss": 2.6084,
      "step": 183799
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0557868480682373,
      "learning_rate": 5.8557092170326156e-05,
      "loss": 2.7324,
      "step": 183800
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.469660997390747,
      "learning_rate": 5.855466430935243e-05,
      "loss": 3.1556,
      "step": 183801
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5197291374206543,
      "learning_rate": 5.855223649326799e-05,
      "loss": 2.8083,
      "step": 183802
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.699601411819458,
      "learning_rate": 5.854980872207324e-05,
      "loss": 2.9062,
      "step": 183803
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.390968084335327,
      "learning_rate": 5.854738099576869e-05,
      "loss": 3.1566,
      "step": 183804
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6724588871002197,
      "learning_rate": 5.854495331435463e-05,
      "loss": 3.101,
      "step": 183805
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.18154239654541,
      "learning_rate": 5.8542525677831684e-05,
      "loss": 2.7925,
      "step": 183806
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8683555126190186,
      "learning_rate": 5.854009808620017e-05,
      "loss": 2.9039,
      "step": 183807
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9273531436920166,
      "learning_rate": 5.853767053946071e-05,
      "loss": 2.8756,
      "step": 183808
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.686748504638672,
      "learning_rate": 5.853524303761364e-05,
      "loss": 2.7386,
      "step": 183809
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0613269805908203,
      "learning_rate": 5.853281558065933e-05,
      "loss": 2.8554,
      "step": 183810
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.896254062652588,
      "learning_rate": 5.8530388168598444e-05,
      "loss": 3.1992,
      "step": 183811
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5883913040161133,
      "learning_rate": 5.852796080143127e-05,
      "loss": 2.7799,
      "step": 183812
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.5256032943725586,
      "learning_rate": 5.852553347915826e-05,
      "loss": 2.8446,
      "step": 183813
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5360119342803955,
      "learning_rate": 5.8523106201780004e-05,
      "loss": 3.2149,
      "step": 183814
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5303447246551514,
      "learning_rate": 5.852067896929683e-05,
      "loss": 3.0858,
      "step": 183815
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.705821990966797,
      "learning_rate": 5.851825178170915e-05,
      "loss": 3.0186,
      "step": 183816
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.428502082824707,
      "learning_rate": 5.8515824639017585e-05,
      "loss": 3.0322,
      "step": 183817
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0452492237091064,
      "learning_rate": 5.8513397541222374e-05,
      "loss": 2.9126,
      "step": 183818
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.459178924560547,
      "learning_rate": 5.8510970488324185e-05,
      "loss": 2.9562,
      "step": 183819
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.44966983795166,
      "learning_rate": 5.850854348032334e-05,
      "loss": 3.0122,
      "step": 183820
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.625821113586426,
      "learning_rate": 5.850611651722032e-05,
      "loss": 2.8873,
      "step": 183821
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.234600305557251,
      "learning_rate": 5.8503689599015524e-05,
      "loss": 2.9501,
      "step": 183822
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.333378791809082,
      "learning_rate": 5.850126272570951e-05,
      "loss": 2.7247,
      "step": 183823
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.068419456481934,
      "learning_rate": 5.8498835897302576e-05,
      "loss": 3.0181,
      "step": 183824
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5524330139160156,
      "learning_rate": 5.849640911379536e-05,
      "loss": 3.0894,
      "step": 183825
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.77191162109375,
      "learning_rate": 5.849398237518823e-05,
      "loss": 2.9812,
      "step": 183826
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1581318378448486,
      "learning_rate": 5.8491555681481616e-05,
      "loss": 3.0607,
      "step": 183827
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.987927198410034,
      "learning_rate": 5.848912903267592e-05,
      "loss": 2.9016,
      "step": 183828
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1572184562683105,
      "learning_rate": 5.8486702428771706e-05,
      "loss": 2.8205,
      "step": 183829
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.3349950313568115,
      "learning_rate": 5.84842758697693e-05,
      "loss": 3.0491,
      "step": 183830
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.993605136871338,
      "learning_rate": 5.848184935566932e-05,
      "loss": 3.2435,
      "step": 183831
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7733943462371826,
      "learning_rate": 5.8479422886472016e-05,
      "loss": 2.6409,
      "step": 183832
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8261256217956543,
      "learning_rate": 5.8476996462178125e-05,
      "loss": 3.0495,
      "step": 183833
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.773125410079956,
      "learning_rate": 5.8474570082787756e-05,
      "loss": 2.9724,
      "step": 183834
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.261009693145752,
      "learning_rate": 5.8472143748301625e-05,
      "loss": 2.9286,
      "step": 183835
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9308393001556396,
      "learning_rate": 5.8469717458719976e-05,
      "loss": 2.8754,
      "step": 183836
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0700650215148926,
      "learning_rate": 5.846729121404344e-05,
      "loss": 2.9953,
      "step": 183837
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.79274582862854,
      "learning_rate": 5.846486501427232e-05,
      "loss": 2.854,
      "step": 183838
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.845815658569336,
      "learning_rate": 5.8462438859407314e-05,
      "loss": 2.8807,
      "step": 183839
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.572889804840088,
      "learning_rate": 5.846001274944855e-05,
      "loss": 2.8708,
      "step": 183840
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9547083377838135,
      "learning_rate": 5.845758668439666e-05,
      "loss": 2.9248,
      "step": 183841
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9005191326141357,
      "learning_rate": 5.845516066425202e-05,
      "loss": 3.0526,
      "step": 183842
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7473480701446533,
      "learning_rate": 5.845273468901522e-05,
      "loss": 2.9883,
      "step": 183843
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9528040885925293,
      "learning_rate": 5.84503087586865e-05,
      "loss": 2.8754,
      "step": 183844
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.3078763484954834,
      "learning_rate": 5.844788287326659e-05,
      "loss": 3.1967,
      "step": 183845
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.6639316082000732,
      "learning_rate": 5.844545703275566e-05,
      "loss": 2.7249,
      "step": 183846
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.6208834648132324,
      "learning_rate": 5.844303123715434e-05,
      "loss": 3.013,
      "step": 183847
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.2540132999420166,
      "learning_rate": 5.844060548646292e-05,
      "loss": 2.8181,
      "step": 183848
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.877777338027954,
      "learning_rate": 5.8438179780682046e-05,
      "loss": 2.861,
      "step": 183849
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.889500856399536,
      "learning_rate": 5.8435754119811985e-05,
      "loss": 2.6682,
      "step": 183850
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.81758713722229,
      "learning_rate": 5.843332850385343e-05,
      "loss": 2.8659,
      "step": 183851
    },
    {
      "epoch": 2.39,
      "grad_norm": 6.495561599731445,
      "learning_rate": 5.843090293280655e-05,
      "loss": 2.903,
      "step": 183852
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.074537754058838,
      "learning_rate": 5.842847740667197e-05,
      "loss": 2.8155,
      "step": 183853
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.665186882019043,
      "learning_rate": 5.8426051925450037e-05,
      "loss": 3.0663,
      "step": 183854
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.27836012840271,
      "learning_rate": 5.842362648914134e-05,
      "loss": 3.1044,
      "step": 183855
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.1809611320495605,
      "learning_rate": 5.8421201097746184e-05,
      "loss": 3.0319,
      "step": 183856
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.873448610305786,
      "learning_rate": 5.8418775751265236e-05,
      "loss": 3.1353,
      "step": 183857
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.508986473083496,
      "learning_rate": 5.841635044969862e-05,
      "loss": 3.0123,
      "step": 183858
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.906576633453369,
      "learning_rate": 5.841392519304705e-05,
      "loss": 2.9166,
      "step": 183859
    },
    {
      "epoch": 2.39,
      "grad_norm": 6.593922138214111,
      "learning_rate": 5.8411499981310816e-05,
      "loss": 2.7773,
      "step": 183860
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.584951877593994,
      "learning_rate": 5.840907481449052e-05,
      "loss": 2.8422,
      "step": 183861
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.55301570892334,
      "learning_rate": 5.8406649692586496e-05,
      "loss": 2.9222,
      "step": 183862
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6533002853393555,
      "learning_rate": 5.840422461559934e-05,
      "loss": 3.1418,
      "step": 183863
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.125638484954834,
      "learning_rate": 5.840179958352928e-05,
      "loss": 3.1832,
      "step": 183864
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.138760566711426,
      "learning_rate": 5.8399374596376936e-05,
      "loss": 3.024,
      "step": 183865
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.6916544437408447,
      "learning_rate": 5.839694965414266e-05,
      "loss": 2.9147,
      "step": 183866
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0165798664093018,
      "learning_rate": 5.839452475682701e-05,
      "loss": 3.0213,
      "step": 183867
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.543168544769287,
      "learning_rate": 5.83920999044303e-05,
      "loss": 2.7353,
      "step": 183868
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.046947956085205,
      "learning_rate": 5.838967509695323e-05,
      "loss": 2.8021,
      "step": 183869
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.559777021408081,
      "learning_rate": 5.838725033439592e-05,
      "loss": 3.0447,
      "step": 183870
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7055139541625977,
      "learning_rate": 5.838482561675904e-05,
      "loss": 3.0197,
      "step": 183871
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.254530191421509,
      "learning_rate": 5.8382400944042904e-05,
      "loss": 2.9504,
      "step": 183872
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.690417289733887,
      "learning_rate": 5.8379976316248134e-05,
      "loss": 2.8589,
      "step": 183873
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.696542263031006,
      "learning_rate": 5.8377551733375025e-05,
      "loss": 2.8217,
      "step": 183874
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.806305408477783,
      "learning_rate": 5.837512719542422e-05,
      "loss": 2.9043,
      "step": 183875
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.491856813430786,
      "learning_rate": 5.837270270239588e-05,
      "loss": 2.8813,
      "step": 183876
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.652686357498169,
      "learning_rate": 5.83702782542907e-05,
      "loss": 3.2071,
      "step": 183877
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.044851779937744,
      "learning_rate": 5.836785385110899e-05,
      "loss": 2.8931,
      "step": 183878
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.136739730834961,
      "learning_rate": 5.836542949285131e-05,
      "loss": 2.9857,
      "step": 183879
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.65295672416687,
      "learning_rate": 5.8363005179518e-05,
      "loss": 3.0493,
      "step": 183880
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.30871844291687,
      "learning_rate": 5.8360580911109645e-05,
      "loss": 3.1123,
      "step": 183881
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4458255767822266,
      "learning_rate": 5.835815668762659e-05,
      "loss": 2.9135,
      "step": 183882
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.633368968963623,
      "learning_rate": 5.835573250906933e-05,
      "loss": 3.1547,
      "step": 183883
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9038422107696533,
      "learning_rate": 5.835330837543824e-05,
      "loss": 2.8161,
      "step": 183884
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4155232906341553,
      "learning_rate": 5.83508842867339e-05,
      "loss": 3.0118,
      "step": 183885
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0152742862701416,
      "learning_rate": 5.83484602429566e-05,
      "loss": 3.0131,
      "step": 183886
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.650240659713745,
      "learning_rate": 5.8346036244106986e-05,
      "loss": 3.0621,
      "step": 183887
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.145186185836792,
      "learning_rate": 5.834361229018537e-05,
      "loss": 3.1388,
      "step": 183888
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.400362014770508,
      "learning_rate": 5.834118838119228e-05,
      "loss": 2.9099,
      "step": 183889
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.216491937637329,
      "learning_rate": 5.833876451712802e-05,
      "loss": 3.1176,
      "step": 183890
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.5976784229278564,
      "learning_rate": 5.833634069799322e-05,
      "loss": 2.8142,
      "step": 183891
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.745123863220215,
      "learning_rate": 5.833391692378822e-05,
      "loss": 2.78,
      "step": 183892
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.853567600250244,
      "learning_rate": 5.833149319451354e-05,
      "loss": 3.0098,
      "step": 183893
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.725922107696533,
      "learning_rate": 5.832906951016961e-05,
      "loss": 3.0032,
      "step": 183894
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0290894508361816,
      "learning_rate": 5.832664587075678e-05,
      "loss": 3.2248,
      "step": 183895
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.081649303436279,
      "learning_rate": 5.832422227627568e-05,
      "loss": 2.9847,
      "step": 183896
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.83488130569458,
      "learning_rate": 5.8321798726726665e-05,
      "loss": 2.754,
      "step": 183897
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.342888355255127,
      "learning_rate": 5.831937522211011e-05,
      "loss": 2.8711,
      "step": 183898
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.532341718673706,
      "learning_rate": 5.831695176242661e-05,
      "loss": 2.9714,
      "step": 183899
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.801081418991089,
      "learning_rate": 5.831452834767657e-05,
      "loss": 2.7809,
      "step": 183900
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.226994514465332,
      "learning_rate": 5.8312104977860355e-05,
      "loss": 3.2052,
      "step": 183901
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.53572154045105,
      "learning_rate": 5.830968165297852e-05,
      "loss": 2.8018,
      "step": 183902
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.576496601104736,
      "learning_rate": 5.830725837303145e-05,
      "loss": 2.8082,
      "step": 183903
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.069180488586426,
      "learning_rate": 5.830483513801967e-05,
      "loss": 3.1569,
      "step": 183904
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.655665397644043,
      "learning_rate": 5.830241194794357e-05,
      "loss": 2.8383,
      "step": 183905
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.3678503036499023,
      "learning_rate": 5.829998880280363e-05,
      "loss": 3.0439,
      "step": 183906
    },
    {
      "epoch": 2.39,
      "grad_norm": 4.511441707611084,
      "learning_rate": 5.8297565702600216e-05,
      "loss": 2.8391,
      "step": 183907
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7021963596343994,
      "learning_rate": 5.829514264733392e-05,
      "loss": 2.9232,
      "step": 183908
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.6001999378204346,
      "learning_rate": 5.829271963700503e-05,
      "loss": 2.7705,
      "step": 183909
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0501224994659424,
      "learning_rate": 5.829029667161418e-05,
      "loss": 2.8369,
      "step": 183910
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.696044683456421,
      "learning_rate": 5.828787375116171e-05,
      "loss": 3.0095,
      "step": 183911
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.532639741897583,
      "learning_rate": 5.828545087564812e-05,
      "loss": 2.8121,
      "step": 183912
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.3241312503814697,
      "learning_rate": 5.828302804507372e-05,
      "loss": 2.9261,
      "step": 183913
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.831245183944702,
      "learning_rate": 5.828060525943914e-05,
      "loss": 2.8536,
      "step": 183914
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.523577928543091,
      "learning_rate": 5.8278182518744706e-05,
      "loss": 2.5811,
      "step": 183915
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9456708431243896,
      "learning_rate": 5.8275759822990964e-05,
      "loss": 3.1329,
      "step": 183916
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.6876044273376465,
      "learning_rate": 5.827333717217827e-05,
      "loss": 3.0255,
      "step": 183917
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0924196243286133,
      "learning_rate": 5.827091456630729e-05,
      "loss": 2.92,
      "step": 183918
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.772239923477173,
      "learning_rate": 5.826849200537813e-05,
      "loss": 2.9618,
      "step": 183919
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.901271343231201,
      "learning_rate": 5.826606948939152e-05,
      "loss": 2.8509,
      "step": 183920
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.8291165828704834,
      "learning_rate": 5.826364701834769e-05,
      "loss": 2.9381,
      "step": 183921
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.062455415725708,
      "learning_rate": 5.8261224592247344e-05,
      "loss": 2.8392,
      "step": 183922
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.328354835510254,
      "learning_rate": 5.8258802211090684e-05,
      "loss": 2.7707,
      "step": 183923
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.435103416442871,
      "learning_rate": 5.825637987487843e-05,
      "loss": 3.0309,
      "step": 183924
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.152568817138672,
      "learning_rate": 5.8253957583610767e-05,
      "loss": 2.8314,
      "step": 183925
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.568293571472168,
      "learning_rate": 5.825153533728828e-05,
      "loss": 2.9759,
      "step": 183926
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.7230424880981445,
      "learning_rate": 5.824911313591134e-05,
      "loss": 3.1259,
      "step": 183927
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.6263415813446045,
      "learning_rate": 5.824669097948055e-05,
      "loss": 2.9996,
      "step": 183928
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0830368995666504,
      "learning_rate": 5.824426886799617e-05,
      "loss": 3.189,
      "step": 183929
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.540907144546509,
      "learning_rate": 5.82418468014589e-05,
      "loss": 2.9966,
      "step": 183930
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.3400063514709473,
      "learning_rate": 5.823942477986888e-05,
      "loss": 2.9614,
      "step": 183931
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.3667993545532227,
      "learning_rate": 5.823700280322677e-05,
      "loss": 2.9425,
      "step": 183932
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.575087785720825,
      "learning_rate": 5.823458087153291e-05,
      "loss": 2.7276,
      "step": 183933
    },
    {
      "epoch": 2.39,
      "grad_norm": 5.652602195739746,
      "learning_rate": 5.8232158984787894e-05,
      "loss": 3.0502,
      "step": 183934
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.4007418155670166,
      "learning_rate": 5.8229737142991986e-05,
      "loss": 3.0834,
      "step": 183935
    },
    {
      "epoch": 2.39,
      "grad_norm": 3.0007550716400146,
      "learning_rate": 5.822731534614589e-05,
      "loss": 3.0144,
      "step": 183936
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.9410595893859863,
      "learning_rate": 5.822489359424977e-05,
      "loss": 2.8489,
      "step": 183937
    },
    {
      "epoch": 2.39,
      "grad_norm": 2.59975004196167,
      "learning_rate": 5.8222471887304256e-05,
      "loss": 2.8381,
      "step": 183938
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.201551675796509,
      "learning_rate": 5.822005022530966e-05,
      "loss": 2.8257,
      "step": 183939
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.984300136566162,
      "learning_rate": 5.821762860826663e-05,
      "loss": 3.1463,
      "step": 183940
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.009207725524902,
      "learning_rate": 5.821520703617541e-05,
      "loss": 2.7407,
      "step": 183941
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.860206365585327,
      "learning_rate": 5.8212785509036706e-05,
      "loss": 2.9208,
      "step": 183942
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0255208015441895,
      "learning_rate": 5.821036402685064e-05,
      "loss": 2.8612,
      "step": 183943
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5311930179595947,
      "learning_rate": 5.820794258961795e-05,
      "loss": 3.0559,
      "step": 183944
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.5788655281066895,
      "learning_rate": 5.8205521197338864e-05,
      "loss": 3.0581,
      "step": 183945
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.42153263092041,
      "learning_rate": 5.820309985001399e-05,
      "loss": 2.804,
      "step": 183946
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.466320753097534,
      "learning_rate": 5.820067854764368e-05,
      "loss": 2.9031,
      "step": 183947
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.889254570007324,
      "learning_rate": 5.819825729022859e-05,
      "loss": 2.8605,
      "step": 183948
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0587105751037598,
      "learning_rate": 5.8195836077768834e-05,
      "loss": 3.0269,
      "step": 183949
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.5108158588409424,
      "learning_rate": 5.819341491026511e-05,
      "loss": 3.1225,
      "step": 183950
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.143599510192871,
      "learning_rate": 5.819099378771773e-05,
      "loss": 3.093,
      "step": 183951
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.271754503250122,
      "learning_rate": 5.818857271012726e-05,
      "loss": 3.1331,
      "step": 183952
    },
    {
      "epoch": 2.4,
      "grad_norm": 5.110115051269531,
      "learning_rate": 5.818615167749405e-05,
      "loss": 3.035,
      "step": 183953
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.193833589553833,
      "learning_rate": 5.818373068981875e-05,
      "loss": 2.9986,
      "step": 183954
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.1592206954956055,
      "learning_rate": 5.818130974710149e-05,
      "loss": 2.8978,
      "step": 183955
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.89058780670166,
      "learning_rate": 5.8178888849342996e-05,
      "loss": 3.0463,
      "step": 183956
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4392333030700684,
      "learning_rate": 5.817646799654351e-05,
      "loss": 2.9757,
      "step": 183957
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.033681869506836,
      "learning_rate": 5.817404718870365e-05,
      "loss": 2.9276,
      "step": 183958
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6819231510162354,
      "learning_rate": 5.817162642582374e-05,
      "loss": 3.0122,
      "step": 183959
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9999067783355713,
      "learning_rate": 5.816920570790445e-05,
      "loss": 2.8501,
      "step": 183960
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9855782985687256,
      "learning_rate": 5.816678503494591e-05,
      "loss": 2.8662,
      "step": 183961
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.1998729705810547,
      "learning_rate": 5.816436440694879e-05,
      "loss": 2.8507,
      "step": 183962
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.778377056121826,
      "learning_rate": 5.8161943823913414e-05,
      "loss": 2.8914,
      "step": 183963
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2367987632751465,
      "learning_rate": 5.815952328584037e-05,
      "loss": 2.9063,
      "step": 183964
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.3004133701324463,
      "learning_rate": 5.8157102792729995e-05,
      "loss": 2.897,
      "step": 183965
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.90220308303833,
      "learning_rate": 5.815468234458281e-05,
      "loss": 2.7059,
      "step": 183966
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.3243157863616943,
      "learning_rate": 5.815226194139927e-05,
      "loss": 2.9358,
      "step": 183967
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0142147541046143,
      "learning_rate": 5.8149841583179754e-05,
      "loss": 2.846,
      "step": 183968
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5138070583343506,
      "learning_rate": 5.8147421269924676e-05,
      "loss": 2.6328,
      "step": 183969
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0314481258392334,
      "learning_rate": 5.814500100163466e-05,
      "loss": 3.2345,
      "step": 183970
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.064434766769409,
      "learning_rate": 5.814258077830998e-05,
      "loss": 3.0719,
      "step": 183971
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8052542209625244,
      "learning_rate": 5.8140160599951194e-05,
      "loss": 2.7578,
      "step": 183972
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8179478645324707,
      "learning_rate": 5.813774046655874e-05,
      "loss": 2.9184,
      "step": 183973
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.812408685684204,
      "learning_rate": 5.813532037813308e-05,
      "loss": 2.6679,
      "step": 183974
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4506373405456543,
      "learning_rate": 5.8132900334674515e-05,
      "loss": 2.9319,
      "step": 183975
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.975952386856079,
      "learning_rate": 5.8130480336183684e-05,
      "loss": 2.8126,
      "step": 183976
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.887582302093506,
      "learning_rate": 5.812806038266088e-05,
      "loss": 3.0915,
      "step": 183977
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.1768686771392822,
      "learning_rate": 5.812564047410674e-05,
      "loss": 2.7548,
      "step": 183978
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.7620046138763428,
      "learning_rate": 5.812322061052159e-05,
      "loss": 2.8267,
      "step": 183979
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9932703971862793,
      "learning_rate": 5.812080079190584e-05,
      "loss": 3.0661,
      "step": 183980
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.1027238368988037,
      "learning_rate": 5.811838101826005e-05,
      "loss": 2.8694,
      "step": 183981
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.3532698154449463,
      "learning_rate": 5.8115961289584624e-05,
      "loss": 2.8439,
      "step": 183982
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7055721282958984,
      "learning_rate": 5.8113541605879954e-05,
      "loss": 2.9792,
      "step": 183983
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7213363647460938,
      "learning_rate": 5.8111121967146614e-05,
      "loss": 2.9129,
      "step": 183984
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.1843020915985107,
      "learning_rate": 5.8108702373384964e-05,
      "loss": 2.8787,
      "step": 183985
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.8325886726379395,
      "learning_rate": 5.8106282824595404e-05,
      "loss": 2.9903,
      "step": 183986
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6138834953308105,
      "learning_rate": 5.810386332077851e-05,
      "loss": 3.094,
      "step": 183987
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7180891036987305,
      "learning_rate": 5.8101443861934706e-05,
      "loss": 2.9932,
      "step": 183988
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.866262674331665,
      "learning_rate": 5.809902444806432e-05,
      "loss": 3.0487,
      "step": 183989
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5556163787841797,
      "learning_rate": 5.809660507916797e-05,
      "loss": 3.081,
      "step": 183990
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0029592514038086,
      "learning_rate": 5.8094185755246034e-05,
      "loss": 3.026,
      "step": 183991
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.875641107559204,
      "learning_rate": 5.809176647629886e-05,
      "loss": 2.9225,
      "step": 183992
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0545883178710938,
      "learning_rate": 5.8089347242327075e-05,
      "loss": 2.7762,
      "step": 183993
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6256654262542725,
      "learning_rate": 5.808692805333098e-05,
      "loss": 2.8797,
      "step": 183994
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.661094903945923,
      "learning_rate": 5.808450890931118e-05,
      "loss": 3.0407,
      "step": 183995
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6159536838531494,
      "learning_rate": 5.8082089810268e-05,
      "loss": 2.8002,
      "step": 183996
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.028103828430176,
      "learning_rate": 5.8079670756201935e-05,
      "loss": 3.0562,
      "step": 183997
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.404298782348633,
      "learning_rate": 5.807725174711336e-05,
      "loss": 2.8724,
      "step": 183998
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.3784000873565674,
      "learning_rate": 5.8074832783002877e-05,
      "loss": 2.9422,
      "step": 183999
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6269874572753906,
      "learning_rate": 5.807241386387075e-05,
      "loss": 2.9625,
      "step": 184000
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.921318292617798,
      "learning_rate": 5.806999498971764e-05,
      "loss": 2.8269,
      "step": 184001
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.630902051925659,
      "learning_rate": 5.8067576160543786e-05,
      "loss": 3.187,
      "step": 184002
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.451751232147217,
      "learning_rate": 5.806515737634988e-05,
      "loss": 2.6357,
      "step": 184003
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.468977451324463,
      "learning_rate": 5.8062738637136095e-05,
      "loss": 2.6405,
      "step": 184004
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.854480028152466,
      "learning_rate": 5.8060319942903066e-05,
      "loss": 2.7756,
      "step": 184005
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.7343673706054688,
      "learning_rate": 5.805790129365112e-05,
      "loss": 2.8137,
      "step": 184006
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.438931941986084,
      "learning_rate": 5.805548268938086e-05,
      "loss": 2.9587,
      "step": 184007
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5534727573394775,
      "learning_rate": 5.805306413009259e-05,
      "loss": 2.778,
      "step": 184008
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.543077230453491,
      "learning_rate": 5.805064561578696e-05,
      "loss": 2.78,
      "step": 184009
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7535529136657715,
      "learning_rate": 5.8048227146464124e-05,
      "loss": 2.9985,
      "step": 184010
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.64080548286438,
      "learning_rate": 5.804580872212477e-05,
      "loss": 3.0263,
      "step": 184011
    },
    {
      "epoch": 2.4,
      "grad_norm": 5.657525062561035,
      "learning_rate": 5.804339034276919e-05,
      "loss": 2.9358,
      "step": 184012
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5289101600646973,
      "learning_rate": 5.8040972008398e-05,
      "loss": 3.0778,
      "step": 184013
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9808192253112793,
      "learning_rate": 5.8038553719011495e-05,
      "loss": 2.9973,
      "step": 184014
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4803855419158936,
      "learning_rate": 5.80361354746103e-05,
      "loss": 2.7979,
      "step": 184015
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.6323461532592773,
      "learning_rate": 5.803371727519463e-05,
      "loss": 2.8957,
      "step": 184016
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.775839328765869,
      "learning_rate": 5.803129912076513e-05,
      "loss": 3.1434,
      "step": 184017
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.474569797515869,
      "learning_rate": 5.802888101132212e-05,
      "loss": 3.1528,
      "step": 184018
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4513790607452393,
      "learning_rate": 5.802646294686615e-05,
      "loss": 2.8629,
      "step": 184019
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.499040126800537,
      "learning_rate": 5.8024044927397596e-05,
      "loss": 3.0531,
      "step": 184020
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.153649091720581,
      "learning_rate": 5.802162695291706e-05,
      "loss": 2.707,
      "step": 184021
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.567641258239746,
      "learning_rate": 5.801920902342473e-05,
      "loss": 2.9837,
      "step": 184022
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8309102058410645,
      "learning_rate": 5.801679113892125e-05,
      "loss": 2.8882,
      "step": 184023
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5103330612182617,
      "learning_rate": 5.801437329940698e-05,
      "loss": 2.9031,
      "step": 184024
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9184224605560303,
      "learning_rate": 5.8011955504882455e-05,
      "loss": 2.994,
      "step": 184025
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.89056396484375,
      "learning_rate": 5.800953775534801e-05,
      "loss": 2.9879,
      "step": 184026
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8361740112304688,
      "learning_rate": 5.8007120050804315e-05,
      "loss": 3.0319,
      "step": 184027
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0851333141326904,
      "learning_rate": 5.800470239125149e-05,
      "loss": 2.8688,
      "step": 184028
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.711299180984497,
      "learning_rate": 5.8002284776690244e-05,
      "loss": 2.9181,
      "step": 184029
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5937676429748535,
      "learning_rate": 5.799986720712088e-05,
      "loss": 3.1754,
      "step": 184030
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.307325839996338,
      "learning_rate": 5.799744968254398e-05,
      "loss": 3.0257,
      "step": 184031
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.273274898529053,
      "learning_rate": 5.799503220295983e-05,
      "loss": 2.747,
      "step": 184032
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.257096767425537,
      "learning_rate": 5.799261476836906e-05,
      "loss": 3.0258,
      "step": 184033
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.396805763244629,
      "learning_rate": 5.799019737877203e-05,
      "loss": 3.0233,
      "step": 184034
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.717740535736084,
      "learning_rate": 5.798778003416921e-05,
      "loss": 3.0153,
      "step": 184035
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.675692319869995,
      "learning_rate": 5.798536273456092e-05,
      "loss": 3.0237,
      "step": 184036
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.826671600341797,
      "learning_rate": 5.798294547994782e-05,
      "loss": 3.0916,
      "step": 184037
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.887876033782959,
      "learning_rate": 5.798052827033019e-05,
      "loss": 3.1013,
      "step": 184038
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.437983751296997,
      "learning_rate": 5.797811110570859e-05,
      "loss": 3.0096,
      "step": 184039
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.1322386264801025,
      "learning_rate": 5.7975693986083437e-05,
      "loss": 3.0742,
      "step": 184040
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.979686737060547,
      "learning_rate": 5.7973276911455194e-05,
      "loss": 2.9644,
      "step": 184041
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.870978355407715,
      "learning_rate": 5.797085988182418e-05,
      "loss": 3.0497,
      "step": 184042
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.9834675788879395,
      "learning_rate": 5.796844289719105e-05,
      "loss": 2.7617,
      "step": 184043
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9141364097595215,
      "learning_rate": 5.7966025957556085e-05,
      "loss": 2.8046,
      "step": 184044
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8066275119781494,
      "learning_rate": 5.796360906291986e-05,
      "loss": 2.7225,
      "step": 184045
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.679847240447998,
      "learning_rate": 5.79611922132828e-05,
      "loss": 2.7247,
      "step": 184046
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.917926788330078,
      "learning_rate": 5.795877540864529e-05,
      "loss": 2.7463,
      "step": 184047
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.846252918243408,
      "learning_rate": 5.795635864900774e-05,
      "loss": 3.06,
      "step": 184048
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.945796489715576,
      "learning_rate": 5.7953941934370765e-05,
      "loss": 2.8737,
      "step": 184049
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5819897651672363,
      "learning_rate": 5.795152526473462e-05,
      "loss": 2.8928,
      "step": 184050
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.1684484481811523,
      "learning_rate": 5.794910864009995e-05,
      "loss": 2.9322,
      "step": 184051
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.972534418106079,
      "learning_rate": 5.794669206046712e-05,
      "loss": 3.1681,
      "step": 184052
    },
    {
      "epoch": 2.4,
      "grad_norm": 5.950557231903076,
      "learning_rate": 5.794427552583655e-05,
      "loss": 2.7937,
      "step": 184053
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.204149007797241,
      "learning_rate": 5.794185903620865e-05,
      "loss": 3.0463,
      "step": 184054
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2163949012756348,
      "learning_rate": 5.793944259158398e-05,
      "loss": 3.0545,
      "step": 184055
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.296201229095459,
      "learning_rate": 5.793702619196289e-05,
      "loss": 2.8675,
      "step": 184056
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.159393787384033,
      "learning_rate": 5.793460983734592e-05,
      "loss": 3.1439,
      "step": 184057
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.1682047843933105,
      "learning_rate": 5.793219352773349e-05,
      "loss": 2.8118,
      "step": 184058
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.716579914093018,
      "learning_rate": 5.792977726312607e-05,
      "loss": 2.8964,
      "step": 184059
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2600622177124023,
      "learning_rate": 5.792736104352393e-05,
      "loss": 2.7228,
      "step": 184060
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.216738224029541,
      "learning_rate": 5.7924944868927806e-05,
      "loss": 3.0695,
      "step": 184061
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.808274745941162,
      "learning_rate": 5.792252873933787e-05,
      "loss": 2.9692,
      "step": 184062
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6216397285461426,
      "learning_rate": 5.79201126547548e-05,
      "loss": 2.8395,
      "step": 184063
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.3105568885803223,
      "learning_rate": 5.791769661517897e-05,
      "loss": 2.7511,
      "step": 184064
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.6188881397247314,
      "learning_rate": 5.791528062061073e-05,
      "loss": 3.0086,
      "step": 184065
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.206712245941162,
      "learning_rate": 5.7912864671050686e-05,
      "loss": 2.8152,
      "step": 184066
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.275767803192139,
      "learning_rate": 5.7910448766499174e-05,
      "loss": 2.9418,
      "step": 184067
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.859408140182495,
      "learning_rate": 5.790803290695663e-05,
      "loss": 2.7463,
      "step": 184068
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8534996509552,
      "learning_rate": 5.790561709242364e-05,
      "loss": 2.9543,
      "step": 184069
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5939981937408447,
      "learning_rate": 5.790320132290055e-05,
      "loss": 2.9494,
      "step": 184070
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9382145404815674,
      "learning_rate": 5.790078559838775e-05,
      "loss": 2.8959,
      "step": 184071
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.1093692779541016,
      "learning_rate": 5.7898369918885854e-05,
      "loss": 2.9255,
      "step": 184072
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.5002801418304443,
      "learning_rate": 5.789595428439522e-05,
      "loss": 2.8358,
      "step": 184073
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.520778179168701,
      "learning_rate": 5.7893538694916206e-05,
      "loss": 2.8161,
      "step": 184074
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5391221046447754,
      "learning_rate": 5.7891123150449425e-05,
      "loss": 3.1139,
      "step": 184075
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4672648906707764,
      "learning_rate": 5.788870765099527e-05,
      "loss": 3.1336,
      "step": 184076
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5370335578918457,
      "learning_rate": 5.7886292196554077e-05,
      "loss": 3.2391,
      "step": 184077
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.66121768951416,
      "learning_rate": 5.7883876787126506e-05,
      "loss": 2.6437,
      "step": 184078
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.933587074279785,
      "learning_rate": 5.788146142271279e-05,
      "loss": 3.3225,
      "step": 184079
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2350809574127197,
      "learning_rate": 5.787904610331354e-05,
      "loss": 2.9233,
      "step": 184080
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0796403884887695,
      "learning_rate": 5.787663082892918e-05,
      "loss": 2.9272,
      "step": 184081
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7266998291015625,
      "learning_rate": 5.787421559956011e-05,
      "loss": 2.6015,
      "step": 184082
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5739033222198486,
      "learning_rate": 5.787180041520674e-05,
      "loss": 2.7865,
      "step": 184083
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7115530967712402,
      "learning_rate": 5.7869385275869616e-05,
      "loss": 2.7915,
      "step": 184084
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0351576805114746,
      "learning_rate": 5.786697018154909e-05,
      "loss": 2.8915,
      "step": 184085
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.416691780090332,
      "learning_rate": 5.7864555132245714e-05,
      "loss": 2.9083,
      "step": 184086
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.02858304977417,
      "learning_rate": 5.78621401279599e-05,
      "loss": 2.9056,
      "step": 184087
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.3069376945495605,
      "learning_rate": 5.7859725168692106e-05,
      "loss": 2.8335,
      "step": 184088
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5070888996124268,
      "learning_rate": 5.7857310254442666e-05,
      "loss": 2.8622,
      "step": 184089
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4717249870300293,
      "learning_rate": 5.785489538521219e-05,
      "loss": 3.037,
      "step": 184090
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.680746555328369,
      "learning_rate": 5.7852480561000994e-05,
      "loss": 2.946,
      "step": 184091
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.378474473953247,
      "learning_rate": 5.7850065781809684e-05,
      "loss": 3.0082,
      "step": 184092
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5289714336395264,
      "learning_rate": 5.784765104763853e-05,
      "loss": 3.0844,
      "step": 184093
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2046830654144287,
      "learning_rate": 5.78452363584882e-05,
      "loss": 2.966,
      "step": 184094
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.011958360671997,
      "learning_rate": 5.784282171435889e-05,
      "loss": 3.014,
      "step": 184095
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8760504722595215,
      "learning_rate": 5.784040711525123e-05,
      "loss": 2.985,
      "step": 184096
    },
    {
      "epoch": 2.4,
      "grad_norm": 5.552980422973633,
      "learning_rate": 5.7837992561165523e-05,
      "loss": 3.1864,
      "step": 184097
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.894568681716919,
      "learning_rate": 5.7835578052102405e-05,
      "loss": 2.6989,
      "step": 184098
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6015074253082275,
      "learning_rate": 5.783316358806214e-05,
      "loss": 2.9358,
      "step": 184099
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7274396419525146,
      "learning_rate": 5.783074916904532e-05,
      "loss": 3.0775,
      "step": 184100
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7554328441619873,
      "learning_rate": 5.7828334795052354e-05,
      "loss": 3.0845,
      "step": 184101
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6313607692718506,
      "learning_rate": 5.7825920466083673e-05,
      "loss": 2.9387,
      "step": 184102
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7739291191101074,
      "learning_rate": 5.7823506182139645e-05,
      "loss": 2.8318,
      "step": 184103
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.841299057006836,
      "learning_rate": 5.782109194322087e-05,
      "loss": 3.1163,
      "step": 184104
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.582615852355957,
      "learning_rate": 5.781867774932767e-05,
      "loss": 2.7218,
      "step": 184105
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.455549478530884,
      "learning_rate": 5.781626360046059e-05,
      "loss": 2.7968,
      "step": 184106
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.833261728286743,
      "learning_rate": 5.781384949662006e-05,
      "loss": 3.0046,
      "step": 184107
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.635279893875122,
      "learning_rate": 5.7811435437806505e-05,
      "loss": 3.2882,
      "step": 184108
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.02876877784729,
      "learning_rate": 5.780902142402031e-05,
      "loss": 2.8503,
      "step": 184109
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.614922046661377,
      "learning_rate": 5.780660745526206e-05,
      "loss": 2.8619,
      "step": 184110
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2195279598236084,
      "learning_rate": 5.780419353153205e-05,
      "loss": 3.0217,
      "step": 184111
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.496884822845459,
      "learning_rate": 5.780177965283089e-05,
      "loss": 3.1078,
      "step": 184112
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.7782249450683594,
      "learning_rate": 5.779936581915895e-05,
      "loss": 2.9822,
      "step": 184113
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4780690670013428,
      "learning_rate": 5.7796952030516684e-05,
      "loss": 2.6615,
      "step": 184114
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9849934577941895,
      "learning_rate": 5.779453828690443e-05,
      "loss": 3.1106,
      "step": 184115
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.86531662940979,
      "learning_rate": 5.7792124588322865e-05,
      "loss": 2.8384,
      "step": 184116
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8948254585266113,
      "learning_rate": 5.7789710934772205e-05,
      "loss": 3.0521,
      "step": 184117
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4529597759246826,
      "learning_rate": 5.7787297326253124e-05,
      "loss": 2.9637,
      "step": 184118
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8740346431732178,
      "learning_rate": 5.778488376276592e-05,
      "loss": 2.9722,
      "step": 184119
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5154387950897217,
      "learning_rate": 5.77824702443111e-05,
      "loss": 3.009,
      "step": 184120
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.242380142211914,
      "learning_rate": 5.7780056770889014e-05,
      "loss": 2.9405,
      "step": 184121
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9036176204681396,
      "learning_rate": 5.7777643342500244e-05,
      "loss": 2.6258,
      "step": 184122
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8195276260375977,
      "learning_rate": 5.777522995914512e-05,
      "loss": 2.712,
      "step": 184123
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.696709156036377,
      "learning_rate": 5.7772816620824234e-05,
      "loss": 2.9333,
      "step": 184124
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9107701778411865,
      "learning_rate": 5.7770403327537964e-05,
      "loss": 2.9878,
      "step": 184125
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.882117986679077,
      "learning_rate": 5.776799007928673e-05,
      "loss": 2.7807,
      "step": 184126
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7100508213043213,
      "learning_rate": 5.7765576876070906e-05,
      "loss": 2.862,
      "step": 184127
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.450571060180664,
      "learning_rate": 5.776316371789113e-05,
      "loss": 2.9993,
      "step": 184128
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.1433262825012207,
      "learning_rate": 5.7760750604747685e-05,
      "loss": 2.7758,
      "step": 184129
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.971932888031006,
      "learning_rate": 5.775833753664115e-05,
      "loss": 3.0286,
      "step": 184130
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.1224822998046875,
      "learning_rate": 5.7755924513571925e-05,
      "loss": 2.8836,
      "step": 184131
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0055932998657227,
      "learning_rate": 5.775351153554044e-05,
      "loss": 2.842,
      "step": 184132
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6684837341308594,
      "learning_rate": 5.77510986025471e-05,
      "loss": 2.7416,
      "step": 184133
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6759278774261475,
      "learning_rate": 5.774868571459246e-05,
      "loss": 2.9941,
      "step": 184134
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.3344106674194336,
      "learning_rate": 5.7746272871676825e-05,
      "loss": 3.1875,
      "step": 184135
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8980720043182373,
      "learning_rate": 5.774386007380079e-05,
      "loss": 2.9211,
      "step": 184136
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9377245903015137,
      "learning_rate": 5.7741447320964776e-05,
      "loss": 2.8573,
      "step": 184137
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.169065475463867,
      "learning_rate": 5.773903461316919e-05,
      "loss": 2.726,
      "step": 184138
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8904261589050293,
      "learning_rate": 5.773662195041444e-05,
      "loss": 2.9302,
      "step": 184139
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.3729233741760254,
      "learning_rate": 5.773420933270106e-05,
      "loss": 2.9473,
      "step": 184140
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.727705240249634,
      "learning_rate": 5.773179676002943e-05,
      "loss": 3.0305,
      "step": 184141
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.615645170211792,
      "learning_rate": 5.7729384232400057e-05,
      "loss": 2.8236,
      "step": 184142
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.682140350341797,
      "learning_rate": 5.772697174981339e-05,
      "loss": 2.9893,
      "step": 184143
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5098214149475098,
      "learning_rate": 5.772455931226986e-05,
      "loss": 3.134,
      "step": 184144
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6069791316986084,
      "learning_rate": 5.772214691976983e-05,
      "loss": 3.1754,
      "step": 184145
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6164073944091797,
      "learning_rate": 5.77197345723139e-05,
      "loss": 2.9521,
      "step": 184146
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8488869667053223,
      "learning_rate": 5.771732226990238e-05,
      "loss": 2.9066,
      "step": 184147
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0399258136749268,
      "learning_rate": 5.771491001253582e-05,
      "loss": 2.9556,
      "step": 184148
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.109086751937866,
      "learning_rate": 5.771249780021466e-05,
      "loss": 2.8673,
      "step": 184149
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8173129558563232,
      "learning_rate": 5.771008563293934e-05,
      "loss": 3.1007,
      "step": 184150
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.878783941268921,
      "learning_rate": 5.770767351071018e-05,
      "loss": 2.8684,
      "step": 184151
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.751894235610962,
      "learning_rate": 5.7705261433527825e-05,
      "loss": 2.8817,
      "step": 184152
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.265676975250244,
      "learning_rate": 5.7702849401392535e-05,
      "loss": 2.94,
      "step": 184153
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.797071933746338,
      "learning_rate": 5.770043741430498e-05,
      "loss": 3.0142,
      "step": 184154
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.3604652881622314,
      "learning_rate": 5.769802547226545e-05,
      "loss": 3.0042,
      "step": 184155
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6111249923706055,
      "learning_rate": 5.769561357527439e-05,
      "loss": 2.9806,
      "step": 184156
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.9894533157348633,
      "learning_rate": 5.7693201723332325e-05,
      "loss": 3.0529,
      "step": 184157
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.5030529499053955,
      "learning_rate": 5.76907899164397e-05,
      "loss": 3.0045,
      "step": 184158
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.782222032546997,
      "learning_rate": 5.768837815459682e-05,
      "loss": 2.8569,
      "step": 184159
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6336758136749268,
      "learning_rate": 5.768596643780435e-05,
      "loss": 3.0412,
      "step": 184160
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.908398389816284,
      "learning_rate": 5.768355476606265e-05,
      "loss": 2.8355,
      "step": 184161
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.3625645637512207,
      "learning_rate": 5.768114313937203e-05,
      "loss": 2.9195,
      "step": 184162
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.460990905761719,
      "learning_rate": 5.767873155773318e-05,
      "loss": 2.8419,
      "step": 184163
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.279253959655762,
      "learning_rate": 5.767632002114633e-05,
      "loss": 2.9571,
      "step": 184164
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.378739356994629,
      "learning_rate": 5.76739085296121e-05,
      "loss": 2.8284,
      "step": 184165
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.651344060897827,
      "learning_rate": 5.7671497083130866e-05,
      "loss": 2.791,
      "step": 184166
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2529618740081787,
      "learning_rate": 5.7669085681703e-05,
      "loss": 2.9575,
      "step": 184167
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.141054153442383,
      "learning_rate": 5.766667432532909e-05,
      "loss": 2.8114,
      "step": 184168
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.100255012512207,
      "learning_rate": 5.766426301400954e-05,
      "loss": 2.8309,
      "step": 184169
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.929107666015625,
      "learning_rate": 5.7661851747744684e-05,
      "loss": 2.8136,
      "step": 184170
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.55534291267395,
      "learning_rate": 5.765944052653515e-05,
      "loss": 2.9851,
      "step": 184171
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.545644998550415,
      "learning_rate": 5.765702935038131e-05,
      "loss": 2.9473,
      "step": 184172
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.266197443008423,
      "learning_rate": 5.76546182192835e-05,
      "loss": 2.8208,
      "step": 184173
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.460025787353516,
      "learning_rate": 5.765220713324238e-05,
      "loss": 3.056,
      "step": 184174
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.970383644104004,
      "learning_rate": 5.7649796092258246e-05,
      "loss": 3.0196,
      "step": 184175
    },
    {
      "epoch": 2.4,
      "grad_norm": 5.09002161026001,
      "learning_rate": 5.7647385096331545e-05,
      "loss": 2.8907,
      "step": 184176
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.55795955657959,
      "learning_rate": 5.764497414546286e-05,
      "loss": 3.0759,
      "step": 184177
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.7901906967163086,
      "learning_rate": 5.764256323965244e-05,
      "loss": 3.1139,
      "step": 184178
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7403290271759033,
      "learning_rate": 5.764015237890094e-05,
      "loss": 3.1039,
      "step": 184179
    },
    {
      "epoch": 2.4,
      "grad_norm": 5.042366981506348,
      "learning_rate": 5.76377415632087e-05,
      "loss": 3.0253,
      "step": 184180
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.6035587787628174,
      "learning_rate": 5.763533079257617e-05,
      "loss": 3.1138,
      "step": 184181
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9255614280700684,
      "learning_rate": 5.763292006700374e-05,
      "loss": 3.0274,
      "step": 184182
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.4373531341552734,
      "learning_rate": 5.763050938649203e-05,
      "loss": 2.8038,
      "step": 184183
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.4794106483459473,
      "learning_rate": 5.762809875104127e-05,
      "loss": 3.0979,
      "step": 184184
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0702474117279053,
      "learning_rate": 5.7625688160652104e-05,
      "loss": 2.9835,
      "step": 184185
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.63777232170105,
      "learning_rate": 5.7623277615324894e-05,
      "loss": 2.8047,
      "step": 184186
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.445244789123535,
      "learning_rate": 5.76208671150601e-05,
      "loss": 2.9234,
      "step": 184187
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8144469261169434,
      "learning_rate": 5.761845665985809e-05,
      "loss": 2.8818,
      "step": 184188
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.629706621170044,
      "learning_rate": 5.761604624971947e-05,
      "loss": 3.0859,
      "step": 184189
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.034473419189453,
      "learning_rate": 5.7613635884644506e-05,
      "loss": 2.7953,
      "step": 184190
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.8975229263305664,
      "learning_rate": 5.761122556463382e-05,
      "loss": 3.0148,
      "step": 184191
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.1476123332977295,
      "learning_rate": 5.760881528968779e-05,
      "loss": 3.213,
      "step": 184192
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.245138645172119,
      "learning_rate": 5.7606405059806847e-05,
      "loss": 2.9951,
      "step": 184193
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2623915672302246,
      "learning_rate": 5.760399487499139e-05,
      "loss": 2.9451,
      "step": 184194
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2725822925567627,
      "learning_rate": 5.760158473524201e-05,
      "loss": 2.8289,
      "step": 184195
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.38037109375,
      "learning_rate": 5.7599174640558954e-05,
      "loss": 3.1992,
      "step": 184196
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.840907335281372,
      "learning_rate": 5.759676459094291e-05,
      "loss": 2.8682,
      "step": 184197
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5741968154907227,
      "learning_rate": 5.759435458639415e-05,
      "loss": 2.8935,
      "step": 184198
    },
    {
      "epoch": 2.4,
      "grad_norm": 5.9297895431518555,
      "learning_rate": 5.759194462691321e-05,
      "loss": 2.6377,
      "step": 184199
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.84362530708313,
      "learning_rate": 5.7589534712500454e-05,
      "loss": 2.8504,
      "step": 184200
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.578481435775757,
      "learning_rate": 5.7587124843156405e-05,
      "loss": 2.8922,
      "step": 184201
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.642498016357422,
      "learning_rate": 5.758471501888145e-05,
      "loss": 3.1631,
      "step": 184202
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.3783838748931885,
      "learning_rate": 5.75823052396761e-05,
      "loss": 3.0442,
      "step": 184203
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2154290676116943,
      "learning_rate": 5.757989550554079e-05,
      "loss": 3.0213,
      "step": 184204
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5155863761901855,
      "learning_rate": 5.757748581647598e-05,
      "loss": 2.9408,
      "step": 184205
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.979672908782959,
      "learning_rate": 5.7575076172482e-05,
      "loss": 2.9084,
      "step": 184206
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7077789306640625,
      "learning_rate": 5.7572666573559475e-05,
      "loss": 2.8815,
      "step": 184207
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5946240425109863,
      "learning_rate": 5.7570257019708663e-05,
      "loss": 2.9983,
      "step": 184208
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4022347927093506,
      "learning_rate": 5.756784751093023e-05,
      "loss": 3.2101,
      "step": 184209
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.117671012878418,
      "learning_rate": 5.756543804722448e-05,
      "loss": 2.9647,
      "step": 184210
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7358999252319336,
      "learning_rate": 5.756302862859193e-05,
      "loss": 2.738,
      "step": 184211
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.524925470352173,
      "learning_rate": 5.756061925503287e-05,
      "loss": 2.6591,
      "step": 184212
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.629831075668335,
      "learning_rate": 5.755820992654798e-05,
      "loss": 2.968,
      "step": 184213
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.670856475830078,
      "learning_rate": 5.7555800643137475e-05,
      "loss": 2.655,
      "step": 184214
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4490303993225098,
      "learning_rate": 5.755339140480204e-05,
      "loss": 2.9118,
      "step": 184215
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.361927032470703,
      "learning_rate": 5.755098221154198e-05,
      "loss": 2.8355,
      "step": 184216
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0507922172546387,
      "learning_rate": 5.754857306335777e-05,
      "loss": 2.8406,
      "step": 184217
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.920607566833496,
      "learning_rate": 5.7546163960249804e-05,
      "loss": 3.0723,
      "step": 184218
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7964682579040527,
      "learning_rate": 5.7543754902218645e-05,
      "loss": 2.66,
      "step": 184219
    },
    {
      "epoch": 2.4,
      "grad_norm": 5.7238545417785645,
      "learning_rate": 5.754134588926459e-05,
      "loss": 2.6632,
      "step": 184220
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4731390476226807,
      "learning_rate": 5.753893692138828e-05,
      "loss": 2.9332,
      "step": 184221
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.228492021560669,
      "learning_rate": 5.7536527998590046e-05,
      "loss": 2.9979,
      "step": 184222
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9044196605682373,
      "learning_rate": 5.753411912087035e-05,
      "loss": 2.7514,
      "step": 184223
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.803926467895508,
      "learning_rate": 5.753171028822956e-05,
      "loss": 3.0703,
      "step": 184224
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4965062141418457,
      "learning_rate": 5.752930150066825e-05,
      "loss": 3.1333,
      "step": 184225
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6622912883758545,
      "learning_rate": 5.752689275818677e-05,
      "loss": 2.7007,
      "step": 184226
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0165319442749023,
      "learning_rate": 5.7524484060785704e-05,
      "loss": 2.8472,
      "step": 184227
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.409782886505127,
      "learning_rate": 5.7522075408465404e-05,
      "loss": 2.9727,
      "step": 184228
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.831829786300659,
      "learning_rate": 5.751966680122632e-05,
      "loss": 2.9406,
      "step": 184229
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.3606581687927246,
      "learning_rate": 5.751725823906883e-05,
      "loss": 2.8204,
      "step": 184230
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.6038002967834473,
      "learning_rate": 5.751484972199355e-05,
      "loss": 2.7041,
      "step": 184231
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.545990467071533,
      "learning_rate": 5.751244125000074e-05,
      "loss": 3.2948,
      "step": 184232
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.042552947998047,
      "learning_rate": 5.751003282309104e-05,
      "loss": 2.7513,
      "step": 184233
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.714578628540039,
      "learning_rate": 5.750762444126473e-05,
      "loss": 3.3021,
      "step": 184234
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.739816188812256,
      "learning_rate": 5.7505216104522466e-05,
      "loss": 2.845,
      "step": 184235
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.317413330078125,
      "learning_rate": 5.7502807812864404e-05,
      "loss": 2.8946,
      "step": 184236
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.991434097290039,
      "learning_rate": 5.750039956629124e-05,
      "loss": 3.1315,
      "step": 184237
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.106855869293213,
      "learning_rate": 5.749799136480325e-05,
      "loss": 3.0253,
      "step": 184238
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7603859901428223,
      "learning_rate": 5.749558320840103e-05,
      "loss": 2.8974,
      "step": 184239
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2078702449798584,
      "learning_rate": 5.7493175097084875e-05,
      "loss": 3.0115,
      "step": 184240
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.65989351272583,
      "learning_rate": 5.749076703085542e-05,
      "loss": 2.9484,
      "step": 184241
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.748443603515625,
      "learning_rate": 5.748835900971297e-05,
      "loss": 2.762,
      "step": 184242
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7306559085845947,
      "learning_rate": 5.748595103365805e-05,
      "loss": 2.7331,
      "step": 184243
    },
    {
      "epoch": 2.4,
      "grad_norm": 6.007326126098633,
      "learning_rate": 5.7483543102690965e-05,
      "loss": 2.7914,
      "step": 184244
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.20184326171875,
      "learning_rate": 5.7481135216812344e-05,
      "loss": 3.075,
      "step": 184245
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5778374671936035,
      "learning_rate": 5.747872737602249e-05,
      "loss": 3.0959,
      "step": 184246
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7068824768066406,
      "learning_rate": 5.747631958032201e-05,
      "loss": 2.8127,
      "step": 184247
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.205883979797363,
      "learning_rate": 5.747391182971125e-05,
      "loss": 2.9778,
      "step": 184248
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.461970567703247,
      "learning_rate": 5.747150412419066e-05,
      "loss": 2.7476,
      "step": 184249
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.840273380279541,
      "learning_rate": 5.746909646376063e-05,
      "loss": 2.8824,
      "step": 184250
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8061273097991943,
      "learning_rate": 5.746668884842174e-05,
      "loss": 2.7611,
      "step": 184251
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.435701847076416,
      "learning_rate": 5.7464281278174306e-05,
      "loss": 2.8368,
      "step": 184252
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.648798704147339,
      "learning_rate": 5.74618737530189e-05,
      "loss": 2.891,
      "step": 184253
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0734550952911377,
      "learning_rate": 5.7459466272955935e-05,
      "loss": 3.1025,
      "step": 184254
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.646465301513672,
      "learning_rate": 5.745705883798573e-05,
      "loss": 3.3154,
      "step": 184255
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.105285882949829,
      "learning_rate": 5.745465144810895e-05,
      "loss": 3.064,
      "step": 184256
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9879541397094727,
      "learning_rate": 5.7452244103325896e-05,
      "loss": 2.7053,
      "step": 184257
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6555354595184326,
      "learning_rate": 5.7449836803636975e-05,
      "loss": 3.1739,
      "step": 184258
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5512149333953857,
      "learning_rate": 5.744742954904278e-05,
      "loss": 2.7328,
      "step": 184259
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.151740550994873,
      "learning_rate": 5.744502233954368e-05,
      "loss": 2.8475,
      "step": 184260
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7580344676971436,
      "learning_rate": 5.744261517514007e-05,
      "loss": 2.976,
      "step": 184261
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2163307666778564,
      "learning_rate": 5.7440208055832524e-05,
      "loss": 2.87,
      "step": 184262
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.555344820022583,
      "learning_rate": 5.7437800981621374e-05,
      "loss": 3.0559,
      "step": 184263
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.908938884735107,
      "learning_rate": 5.7435393952507147e-05,
      "loss": 2.9409,
      "step": 184264
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8853423595428467,
      "learning_rate": 5.743298696849028e-05,
      "loss": 2.8414,
      "step": 184265
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.784453868865967,
      "learning_rate": 5.7430580029571205e-05,
      "loss": 2.7732,
      "step": 184266
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5008342266082764,
      "learning_rate": 5.742817313575029e-05,
      "loss": 3.0851,
      "step": 184267
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.759817123413086,
      "learning_rate": 5.742576628702813e-05,
      "loss": 2.9262,
      "step": 184268
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5352280139923096,
      "learning_rate": 5.742335948340503e-05,
      "loss": 2.9397,
      "step": 184269
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.4967803955078125,
      "learning_rate": 5.7420952724881553e-05,
      "loss": 2.7302,
      "step": 184270
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.616960048675537,
      "learning_rate": 5.741854601145813e-05,
      "loss": 2.856,
      "step": 184271
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8070478439331055,
      "learning_rate": 5.741613934313517e-05,
      "loss": 2.9015,
      "step": 184272
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.95019793510437,
      "learning_rate": 5.741373271991303e-05,
      "loss": 2.8281,
      "step": 184273
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5082099437713623,
      "learning_rate": 5.741132614179235e-05,
      "loss": 3.2593,
      "step": 184274
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.687755584716797,
      "learning_rate": 5.740891960877342e-05,
      "loss": 3.2016,
      "step": 184275
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.151853322982788,
      "learning_rate": 5.740651312085681e-05,
      "loss": 3.1278,
      "step": 184276
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.907531976699829,
      "learning_rate": 5.7404106678042926e-05,
      "loss": 3.2361,
      "step": 184277
    },
    {
      "epoch": 2.4,
      "grad_norm": 5.35182523727417,
      "learning_rate": 5.74017002803322e-05,
      "loss": 2.6596,
      "step": 184278
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.51285719871521,
      "learning_rate": 5.739929392772499e-05,
      "loss": 3.0008,
      "step": 184279
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.793503761291504,
      "learning_rate": 5.7396887620221895e-05,
      "loss": 3.0121,
      "step": 184280
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.572732448577881,
      "learning_rate": 5.739448135782323e-05,
      "loss": 2.9419,
      "step": 184281
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.5579237937927246,
      "learning_rate": 5.7392075140529583e-05,
      "loss": 3.0278,
      "step": 184282
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.123974084854126,
      "learning_rate": 5.7389668968341316e-05,
      "loss": 2.9285,
      "step": 184283
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.7755446434021,
      "learning_rate": 5.738726284125894e-05,
      "loss": 2.9681,
      "step": 184284
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7264208793640137,
      "learning_rate": 5.738485675928272e-05,
      "loss": 3.0816,
      "step": 184285
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.845557451248169,
      "learning_rate": 5.7382450722413345e-05,
      "loss": 2.8579,
      "step": 184286
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.748105525970459,
      "learning_rate": 5.738004473065105e-05,
      "loss": 2.84,
      "step": 184287
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.6985819339752197,
      "learning_rate": 5.737763878399648e-05,
      "loss": 3.0361,
      "step": 184288
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5572476387023926,
      "learning_rate": 5.737523288245e-05,
      "loss": 2.8867,
      "step": 184289
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2114107608795166,
      "learning_rate": 5.7372827026012024e-05,
      "loss": 3.011,
      "step": 184290
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.484219789505005,
      "learning_rate": 5.737042121468294e-05,
      "loss": 2.983,
      "step": 184291
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.824629783630371,
      "learning_rate": 5.736801544846337e-05,
      "loss": 3.0361,
      "step": 184292
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2788593769073486,
      "learning_rate": 5.7365609727353544e-05,
      "loss": 2.9479,
      "step": 184293
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.3647468090057373,
      "learning_rate": 5.736320405135414e-05,
      "loss": 2.8897,
      "step": 184294
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.1420793533325195,
      "learning_rate": 5.736079842046552e-05,
      "loss": 3.0262,
      "step": 184295
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8967251777648926,
      "learning_rate": 5.735839283468807e-05,
      "loss": 2.8251,
      "step": 184296
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.1575160026550293,
      "learning_rate": 5.735598729402221e-05,
      "loss": 2.9347,
      "step": 184297
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.433974027633667,
      "learning_rate": 5.735358179846853e-05,
      "loss": 3.1688,
      "step": 184298
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.481412172317505,
      "learning_rate": 5.735117634802733e-05,
      "loss": 2.8461,
      "step": 184299
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.4679431915283203,
      "learning_rate": 5.7348770942699207e-05,
      "loss": 3.0322,
      "step": 184300
    },
    {
      "epoch": 2.4,
      "grad_norm": 5.578098297119141,
      "learning_rate": 5.7346365582484434e-05,
      "loss": 2.8723,
      "step": 184301
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.19559383392334,
      "learning_rate": 5.7343960267383706e-05,
      "loss": 3.1986,
      "step": 184302
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.656433582305908,
      "learning_rate": 5.7341554997397187e-05,
      "loss": 3.1182,
      "step": 184303
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.3798203468322754,
      "learning_rate": 5.733914977252552e-05,
      "loss": 2.6123,
      "step": 184304
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6559650897979736,
      "learning_rate": 5.733674459276899e-05,
      "loss": 3.1108,
      "step": 184305
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.09871768951416,
      "learning_rate": 5.733433945812821e-05,
      "loss": 2.8511,
      "step": 184306
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.5680665969848633,
      "learning_rate": 5.73319343686035e-05,
      "loss": 2.8344,
      "step": 184307
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2266201972961426,
      "learning_rate": 5.7329529324195545e-05,
      "loss": 2.666,
      "step": 184308
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.722397565841675,
      "learning_rate": 5.7327124324904425e-05,
      "loss": 3.0167,
      "step": 184309
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5661983489990234,
      "learning_rate": 5.732471937073085e-05,
      "loss": 2.9447,
      "step": 184310
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.491036415100098,
      "learning_rate": 5.732231446167512e-05,
      "loss": 3.0705,
      "step": 184311
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.47817063331604,
      "learning_rate": 5.731990959773783e-05,
      "loss": 2.8973,
      "step": 184312
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.910231590270996,
      "learning_rate": 5.731750477891925e-05,
      "loss": 2.8999,
      "step": 184313
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.653451681137085,
      "learning_rate": 5.731510000522012e-05,
      "loss": 2.8859,
      "step": 184314
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.011073589324951,
      "learning_rate": 5.731269527664052e-05,
      "loss": 2.8597,
      "step": 184315
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.713808536529541,
      "learning_rate": 5.731029059318113e-05,
      "loss": 3.2269,
      "step": 184316
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4234912395477295,
      "learning_rate": 5.7307885954842284e-05,
      "loss": 3.0566,
      "step": 184317
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5655994415283203,
      "learning_rate": 5.730548136162457e-05,
      "loss": 3.1206,
      "step": 184318
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.963819980621338,
      "learning_rate": 5.730307681352824e-05,
      "loss": 2.9642,
      "step": 184319
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.624941349029541,
      "learning_rate": 5.730067231055404e-05,
      "loss": 2.9664,
      "step": 184320
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7866404056549072,
      "learning_rate": 5.7298267852702054e-05,
      "loss": 2.8411,
      "step": 184321
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.81471586227417,
      "learning_rate": 5.729586343997297e-05,
      "loss": 3.1002,
      "step": 184322
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5747556686401367,
      "learning_rate": 5.7293459072367084e-05,
      "loss": 2.9017,
      "step": 184323
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.866011619567871,
      "learning_rate": 5.7291054749885016e-05,
      "loss": 2.9971,
      "step": 184324
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.6874780654907227,
      "learning_rate": 5.728865047252704e-05,
      "loss": 2.6395,
      "step": 184325
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6294643878936768,
      "learning_rate": 5.7286246240293774e-05,
      "loss": 3.0384,
      "step": 184326
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.539531707763672,
      "learning_rate": 5.728384205318558e-05,
      "loss": 2.8809,
      "step": 184327
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.837146282196045,
      "learning_rate": 5.728143791120289e-05,
      "loss": 2.9569,
      "step": 184328
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6652495861053467,
      "learning_rate": 5.72790338143461e-05,
      "loss": 2.9385,
      "step": 184329
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.694887161254883,
      "learning_rate": 5.727662976261578e-05,
      "loss": 2.7586,
      "step": 184330
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.703597068786621,
      "learning_rate": 5.7274225756012226e-05,
      "loss": 3.1959,
      "step": 184331
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.471055507659912,
      "learning_rate": 5.7271821794536075e-05,
      "loss": 2.8671,
      "step": 184332
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8531744480133057,
      "learning_rate": 5.7269417878187663e-05,
      "loss": 2.913,
      "step": 184333
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.511172294616699,
      "learning_rate": 5.7267014006967446e-05,
      "loss": 2.8814,
      "step": 184334
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.181906223297119,
      "learning_rate": 5.72646101808758e-05,
      "loss": 3.1997,
      "step": 184335
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8760738372802734,
      "learning_rate": 5.726220639991332e-05,
      "loss": 2.8896,
      "step": 184336
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.700376510620117,
      "learning_rate": 5.725980266408031e-05,
      "loss": 3.0055,
      "step": 184337
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.3511855602264404,
      "learning_rate": 5.7257398973377364e-05,
      "loss": 2.8544,
      "step": 184338
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.598700523376465,
      "learning_rate": 5.7254995327804854e-05,
      "loss": 2.8241,
      "step": 184339
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.6180763244628906,
      "learning_rate": 5.725259172736314e-05,
      "loss": 3.0532,
      "step": 184340
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.96271276473999,
      "learning_rate": 5.7250188172052825e-05,
      "loss": 2.8894,
      "step": 184341
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.298087120056152,
      "learning_rate": 5.7247784661874276e-05,
      "loss": 2.8036,
      "step": 184342
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.291666030883789,
      "learning_rate": 5.724538119682786e-05,
      "loss": 3.1064,
      "step": 184343
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.140188217163086,
      "learning_rate": 5.72429777769142e-05,
      "loss": 2.8015,
      "step": 184344
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.5555965900421143,
      "learning_rate": 5.7240574402133646e-05,
      "loss": 3.0613,
      "step": 184345
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7247159481048584,
      "learning_rate": 5.7238171072486584e-05,
      "loss": 3.2568,
      "step": 184346
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.1744065284729004,
      "learning_rate": 5.723576778797362e-05,
      "loss": 2.8741,
      "step": 184347
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.922013282775879,
      "learning_rate": 5.723336454859502e-05,
      "loss": 3.0082,
      "step": 184348
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.573275566101074,
      "learning_rate": 5.7230961354351376e-05,
      "loss": 2.9788,
      "step": 184349
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0943803787231445,
      "learning_rate": 5.7228558205243105e-05,
      "loss": 3.0382,
      "step": 184350
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.700667142868042,
      "learning_rate": 5.7226155101270624e-05,
      "loss": 2.9542,
      "step": 184351
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.245460033416748,
      "learning_rate": 5.7223752042434265e-05,
      "loss": 3.1036,
      "step": 184352
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.660951614379883,
      "learning_rate": 5.7221349028734706e-05,
      "loss": 3.0998,
      "step": 184353
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.66563081741333,
      "learning_rate": 5.721894606017221e-05,
      "loss": 3.0082,
      "step": 184354
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9423985481262207,
      "learning_rate": 5.721654313674737e-05,
      "loss": 2.9955,
      "step": 184355
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.3798623085021973,
      "learning_rate": 5.721414025846056e-05,
      "loss": 3.0284,
      "step": 184356
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.924023389816284,
      "learning_rate": 5.721173742531221e-05,
      "loss": 2.9162,
      "step": 184357
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.524463653564453,
      "learning_rate": 5.720933463730272e-05,
      "loss": 2.8993,
      "step": 184358
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.8168509006500244,
      "learning_rate": 5.720693189443265e-05,
      "loss": 3.1274,
      "step": 184359
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7440640926361084,
      "learning_rate": 5.7204529196702345e-05,
      "loss": 3.0635,
      "step": 184360
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.66831636428833,
      "learning_rate": 5.720212654411236e-05,
      "loss": 2.7155,
      "step": 184361
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.146285057067871,
      "learning_rate": 5.71997239366631e-05,
      "loss": 3.1151,
      "step": 184362
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8908534049987793,
      "learning_rate": 5.7197321374355e-05,
      "loss": 2.8025,
      "step": 184363
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.148735284805298,
      "learning_rate": 5.719491885718839e-05,
      "loss": 3.0128,
      "step": 184364
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.809537649154663,
      "learning_rate": 5.7192516385163935e-05,
      "loss": 3.0817,
      "step": 184365
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9876461029052734,
      "learning_rate": 5.719011395828187e-05,
      "loss": 2.8062,
      "step": 184366
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.510148763656616,
      "learning_rate": 5.7187711576542826e-05,
      "loss": 2.9953,
      "step": 184367
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.3071022033691406,
      "learning_rate": 5.718530923994711e-05,
      "loss": 2.8995,
      "step": 184368
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0492725372314453,
      "learning_rate": 5.718290694849538e-05,
      "loss": 2.9906,
      "step": 184369
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.27901029586792,
      "learning_rate": 5.71805047021878e-05,
      "loss": 2.9375,
      "step": 184370
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.204961061477661,
      "learning_rate": 5.717810250102498e-05,
      "loss": 2.7095,
      "step": 184371
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.452030658721924,
      "learning_rate": 5.717570034500728e-05,
      "loss": 2.9075,
      "step": 184372
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.6215856075286865,
      "learning_rate": 5.7173298234135267e-05,
      "loss": 2.9531,
      "step": 184373
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9907021522521973,
      "learning_rate": 5.717089616840927e-05,
      "loss": 2.7899,
      "step": 184374
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.184012413024902,
      "learning_rate": 5.716849414782993e-05,
      "loss": 3.0449,
      "step": 184375
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8314290046691895,
      "learning_rate": 5.716609217239738e-05,
      "loss": 3.1234,
      "step": 184376
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.686455726623535,
      "learning_rate": 5.7163690242112315e-05,
      "loss": 3.0903,
      "step": 184377
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6195271015167236,
      "learning_rate": 5.7161288356975034e-05,
      "loss": 2.7135,
      "step": 184378
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.954221248626709,
      "learning_rate": 5.715888651698613e-05,
      "loss": 3.2723,
      "step": 184379
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8472225666046143,
      "learning_rate": 5.7156484722145925e-05,
      "loss": 2.838,
      "step": 184380
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8157730102539062,
      "learning_rate": 5.7154082972455026e-05,
      "loss": 2.9779,
      "step": 184381
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.580716848373413,
      "learning_rate": 5.715168126791365e-05,
      "loss": 2.9894,
      "step": 184382
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.7536232471466064,
      "learning_rate": 5.714927960852239e-05,
      "loss": 2.9214,
      "step": 184383
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2683298587799072,
      "learning_rate": 5.714687799428161e-05,
      "loss": 2.7712,
      "step": 184384
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.226792097091675,
      "learning_rate": 5.714447642519192e-05,
      "loss": 3.1149,
      "step": 184385
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6543102264404297,
      "learning_rate": 5.7142074901253544e-05,
      "loss": 2.896,
      "step": 184386
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.654703140258789,
      "learning_rate": 5.713967342246718e-05,
      "loss": 2.9232,
      "step": 184387
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.730769157409668,
      "learning_rate": 5.7137271988833e-05,
      "loss": 3.0738,
      "step": 184388
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8225533962249756,
      "learning_rate": 5.713487060035167e-05,
      "loss": 3.027,
      "step": 184389
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.6637868881225586,
      "learning_rate": 5.7132469257023485e-05,
      "loss": 2.9754,
      "step": 184390
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5811359882354736,
      "learning_rate": 5.7130067958849015e-05,
      "loss": 3.0206,
      "step": 184391
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.3979039192199707,
      "learning_rate": 5.7127666705828556e-05,
      "loss": 3.0512,
      "step": 184392
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.9647347927093506,
      "learning_rate": 5.7125265497962814e-05,
      "loss": 2.9589,
      "step": 184393
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.218876600265503,
      "learning_rate": 5.712286433525195e-05,
      "loss": 3.0565,
      "step": 184394
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.847642421722412,
      "learning_rate": 5.712046321769656e-05,
      "loss": 3.1318,
      "step": 184395
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.1410071849823,
      "learning_rate": 5.711806214529703e-05,
      "loss": 2.8517,
      "step": 184396
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.569805383682251,
      "learning_rate": 5.711566111805387e-05,
      "loss": 2.8238,
      "step": 184397
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.895815134048462,
      "learning_rate": 5.711326013596742e-05,
      "loss": 2.9501,
      "step": 184398
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5293033123016357,
      "learning_rate": 5.711085919903838e-05,
      "loss": 2.9682,
      "step": 184399
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6561152935028076,
      "learning_rate": 5.7108458307266855e-05,
      "loss": 2.9876,
      "step": 184400
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0515494346618652,
      "learning_rate": 5.710605746065354e-05,
      "loss": 3.1798,
      "step": 184401
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0774612426757812,
      "learning_rate": 5.71036566591987e-05,
      "loss": 3.1824,
      "step": 184402
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.684736728668213,
      "learning_rate": 5.710125590290294e-05,
      "loss": 2.6859,
      "step": 184403
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8318123817443848,
      "learning_rate": 5.709885519176659e-05,
      "loss": 2.7076,
      "step": 184404
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8021602630615234,
      "learning_rate": 5.709645452579028e-05,
      "loss": 3.2639,
      "step": 184405
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8519177436828613,
      "learning_rate": 5.709405390497418e-05,
      "loss": 2.7382,
      "step": 184406
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.414177656173706,
      "learning_rate": 5.7091653329318955e-05,
      "loss": 2.9824,
      "step": 184407
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.17720103263855,
      "learning_rate": 5.7089252798824905e-05,
      "loss": 3.1048,
      "step": 184408
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9512829780578613,
      "learning_rate": 5.7086852313492626e-05,
      "loss": 3.0397,
      "step": 184409
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5766806602478027,
      "learning_rate": 5.708445187332242e-05,
      "loss": 3.0112,
      "step": 184410
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6783580780029297,
      "learning_rate": 5.7082051478314924e-05,
      "loss": 2.986,
      "step": 184411
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.094988822937012,
      "learning_rate": 5.7079651128470335e-05,
      "loss": 2.8867,
      "step": 184412
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9681477546691895,
      "learning_rate": 5.707725082378929e-05,
      "loss": 2.9954,
      "step": 184413
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.2301201820373535,
      "learning_rate": 5.707485056427208e-05,
      "loss": 3.0583,
      "step": 184414
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0367348194122314,
      "learning_rate": 5.707245034991934e-05,
      "loss": 2.9199,
      "step": 184415
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.8784778118133545,
      "learning_rate": 5.707005018073134e-05,
      "loss": 2.9313,
      "step": 184416
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.303046703338623,
      "learning_rate": 5.7067650056708645e-05,
      "loss": 2.8723,
      "step": 184417
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.595534563064575,
      "learning_rate": 5.706524997785169e-05,
      "loss": 2.8706,
      "step": 184418
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.230836868286133,
      "learning_rate": 5.706284994416091e-05,
      "loss": 2.831,
      "step": 184419
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.763230800628662,
      "learning_rate": 5.706044995563662e-05,
      "loss": 2.824,
      "step": 184420
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6724274158477783,
      "learning_rate": 5.7058050012279476e-05,
      "loss": 2.9919,
      "step": 184421
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4458224773406982,
      "learning_rate": 5.7055650114089734e-05,
      "loss": 2.7677,
      "step": 184422
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.23923397064209,
      "learning_rate": 5.7053250261068e-05,
      "loss": 3.0698,
      "step": 184423
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8528037071228027,
      "learning_rate": 5.705085045321469e-05,
      "loss": 2.9002,
      "step": 184424
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7954330444335938,
      "learning_rate": 5.704845069053009e-05,
      "loss": 3.1046,
      "step": 184425
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.4588961601257324,
      "learning_rate": 5.704605097301489e-05,
      "loss": 2.883,
      "step": 184426
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.271021842956543,
      "learning_rate": 5.704365130066939e-05,
      "loss": 2.9821,
      "step": 184427
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6465463638305664,
      "learning_rate": 5.7041251673493994e-05,
      "loss": 2.9864,
      "step": 184428
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.395014524459839,
      "learning_rate": 5.70388520914893e-05,
      "loss": 2.8695,
      "step": 184429
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.970463514328003,
      "learning_rate": 5.703645255465563e-05,
      "loss": 2.9028,
      "step": 184430
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9543845653533936,
      "learning_rate": 5.7034053062993435e-05,
      "loss": 3.1691,
      "step": 184431
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.042120933532715,
      "learning_rate": 5.703165361650324e-05,
      "loss": 3.0299,
      "step": 184432
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6154351234436035,
      "learning_rate": 5.702925421518547e-05,
      "loss": 3.1266,
      "step": 184433
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7756361961364746,
      "learning_rate": 5.7026854859040475e-05,
      "loss": 3.015,
      "step": 184434
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.3020286560058594,
      "learning_rate": 5.702445554806884e-05,
      "loss": 2.9055,
      "step": 184435
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.68178391456604,
      "learning_rate": 5.7022056282270935e-05,
      "loss": 3.1114,
      "step": 184436
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.557240009307861,
      "learning_rate": 5.701965706164716e-05,
      "loss": 3.2149,
      "step": 184437
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.3318166732788086,
      "learning_rate": 5.7017257886198087e-05,
      "loss": 2.9682,
      "step": 184438
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.7004363536834717,
      "learning_rate": 5.701485875592401e-05,
      "loss": 3.0891,
      "step": 184439
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9636363983154297,
      "learning_rate": 5.701245967082556e-05,
      "loss": 2.9841,
      "step": 184440
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.098071336746216,
      "learning_rate": 5.701006063090304e-05,
      "loss": 2.9512,
      "step": 184441
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7396812438964844,
      "learning_rate": 5.700766163615695e-05,
      "loss": 2.9202,
      "step": 184442
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.696521043777466,
      "learning_rate": 5.700526268658766e-05,
      "loss": 2.9683,
      "step": 184443
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.8499016761779785,
      "learning_rate": 5.700286378219576e-05,
      "loss": 2.8413,
      "step": 184444
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.067967176437378,
      "learning_rate": 5.7000464922981525e-05,
      "loss": 3.0337,
      "step": 184445
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.536068916320801,
      "learning_rate": 5.699806610894555e-05,
      "loss": 2.9949,
      "step": 184446
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7515721321105957,
      "learning_rate": 5.699566734008817e-05,
      "loss": 2.8093,
      "step": 184447
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.542625665664673,
      "learning_rate": 5.699326861641005e-05,
      "loss": 2.806,
      "step": 184448
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9824869632720947,
      "learning_rate": 5.699086993791129e-05,
      "loss": 2.962,
      "step": 184449
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.1099853515625,
      "learning_rate": 5.6988471304592586e-05,
      "loss": 2.8556,
      "step": 184450
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.547889232635498,
      "learning_rate": 5.698607271645424e-05,
      "loss": 3.0185,
      "step": 184451
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.382261276245117,
      "learning_rate": 5.698367417349686e-05,
      "loss": 2.8625,
      "step": 184452
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.678178310394287,
      "learning_rate": 5.6981275675720704e-05,
      "loss": 2.7864,
      "step": 184453
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7754361629486084,
      "learning_rate": 5.697887722312647e-05,
      "loss": 3.0082,
      "step": 184454
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.8271734714508057,
      "learning_rate": 5.697647881571433e-05,
      "loss": 3.1344,
      "step": 184455
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.6807994842529297,
      "learning_rate": 5.6974080453484885e-05,
      "loss": 3.0704,
      "step": 184456
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.232527256011963,
      "learning_rate": 5.697168213643849e-05,
      "loss": 3.0144,
      "step": 184457
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.63569712638855,
      "learning_rate": 5.696928386457572e-05,
      "loss": 2.8413,
      "step": 184458
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0867207050323486,
      "learning_rate": 5.696688563789688e-05,
      "loss": 2.8309,
      "step": 184459
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.889282703399658,
      "learning_rate": 5.6964487456402654e-05,
      "loss": 2.7186,
      "step": 184460
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.889763116836548,
      "learning_rate": 5.696208932009312e-05,
      "loss": 2.8501,
      "step": 184461
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9576802253723145,
      "learning_rate": 5.695969122896904e-05,
      "loss": 3.09,
      "step": 184462
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8153841495513916,
      "learning_rate": 5.695729318303062e-05,
      "loss": 2.9813,
      "step": 184463
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.360187292098999,
      "learning_rate": 5.695489518227855e-05,
      "loss": 2.8534,
      "step": 184464
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7698426246643066,
      "learning_rate": 5.6952497226713036e-05,
      "loss": 3.1841,
      "step": 184465
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.782104015350342,
      "learning_rate": 5.695009931633481e-05,
      "loss": 2.667,
      "step": 184466
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6177690029144287,
      "learning_rate": 5.6947701451144e-05,
      "loss": 2.8015,
      "step": 184467
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6528480052948,
      "learning_rate": 5.6945303631141284e-05,
      "loss": 3.1835,
      "step": 184468
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.788032054901123,
      "learning_rate": 5.694290585632691e-05,
      "loss": 2.9472,
      "step": 184469
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.676882266998291,
      "learning_rate": 5.694050812670157e-05,
      "loss": 3.0835,
      "step": 184470
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.4083080291748047,
      "learning_rate": 5.693811044226547e-05,
      "loss": 2.8053,
      "step": 184471
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.682556629180908,
      "learning_rate": 5.693571280301933e-05,
      "loss": 3.1878,
      "step": 184472
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4644927978515625,
      "learning_rate": 5.693331520896324e-05,
      "loss": 2.9783,
      "step": 184473
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8142340183258057,
      "learning_rate": 5.693091766009793e-05,
      "loss": 3.1886,
      "step": 184474
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.3300673961639404,
      "learning_rate": 5.6928520156423706e-05,
      "loss": 2.9662,
      "step": 184475
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.871103048324585,
      "learning_rate": 5.69261226979411e-05,
      "loss": 3.0547,
      "step": 184476
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6060242652893066,
      "learning_rate": 5.692372528465044e-05,
      "loss": 2.8136,
      "step": 184477
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4714441299438477,
      "learning_rate": 5.6921327916552404e-05,
      "loss": 2.8612,
      "step": 184478
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.995271682739258,
      "learning_rate": 5.691893059364715e-05,
      "loss": 2.9982,
      "step": 184479
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8817710876464844,
      "learning_rate": 5.691653331593531e-05,
      "loss": 2.9054,
      "step": 184480
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.913662910461426,
      "learning_rate": 5.691413608341722e-05,
      "loss": 3.1061,
      "step": 184481
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.225779056549072,
      "learning_rate": 5.691173889609344e-05,
      "loss": 2.727,
      "step": 184482
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.099778175354004,
      "learning_rate": 5.690934175396428e-05,
      "loss": 2.9331,
      "step": 184483
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.4287431240081787,
      "learning_rate": 5.690694465703043e-05,
      "loss": 2.8118,
      "step": 184484
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6747193336486816,
      "learning_rate": 5.690454760529199e-05,
      "loss": 2.8023,
      "step": 184485
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5979323387145996,
      "learning_rate": 5.6902150598749676e-05,
      "loss": 2.8878,
      "step": 184486
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5037708282470703,
      "learning_rate": 5.689975363740376e-05,
      "loss": 2.9329,
      "step": 184487
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9751360416412354,
      "learning_rate": 5.689735672125484e-05,
      "loss": 2.6826,
      "step": 184488
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.671811819076538,
      "learning_rate": 5.6894959850303224e-05,
      "loss": 2.8343,
      "step": 184489
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.230475902557373,
      "learning_rate": 5.689256302454955e-05,
      "loss": 3.0354,
      "step": 184490
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.591754674911499,
      "learning_rate": 5.689016624399403e-05,
      "loss": 3.1083,
      "step": 184491
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7975735664367676,
      "learning_rate": 5.688776950863725e-05,
      "loss": 3.1574,
      "step": 184492
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4647092819213867,
      "learning_rate": 5.688537281847958e-05,
      "loss": 3.104,
      "step": 184493
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.472813129425049,
      "learning_rate": 5.6882976173521555e-05,
      "loss": 2.9707,
      "step": 184494
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.3200433254241943,
      "learning_rate": 5.688057957376354e-05,
      "loss": 2.8982,
      "step": 184495
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.377511978149414,
      "learning_rate": 5.687818301920607e-05,
      "loss": 3.1553,
      "step": 184496
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6004478931427,
      "learning_rate": 5.687578650984951e-05,
      "loss": 2.953,
      "step": 184497
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.896667003631592,
      "learning_rate": 5.6873390045694354e-05,
      "loss": 2.9083,
      "step": 184498
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8141536712646484,
      "learning_rate": 5.687099362674095e-05,
      "loss": 2.7377,
      "step": 184499
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.539517879486084,
      "learning_rate": 5.686859725298988e-05,
      "loss": 2.9695,
      "step": 184500
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.56321382522583,
      "learning_rate": 5.686620092444145e-05,
      "loss": 2.865,
      "step": 184501
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.153491973876953,
      "learning_rate": 5.6863804641096266e-05,
      "loss": 2.7143,
      "step": 184502
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.660914897918701,
      "learning_rate": 5.686140840295472e-05,
      "loss": 2.9078,
      "step": 184503
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.590172052383423,
      "learning_rate": 5.685901221001718e-05,
      "loss": 2.8066,
      "step": 184504
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9187357425689697,
      "learning_rate": 5.6856616062284085e-05,
      "loss": 2.9609,
      "step": 184505
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.745096445083618,
      "learning_rate": 5.685421995975603e-05,
      "loss": 2.8502,
      "step": 184506
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.680954933166504,
      "learning_rate": 5.6851823902433245e-05,
      "loss": 2.8504,
      "step": 184507
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.469118118286133,
      "learning_rate": 5.6849427890316404e-05,
      "loss": 3.1679,
      "step": 184508
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.102536201477051,
      "learning_rate": 5.6847031923405827e-05,
      "loss": 2.7994,
      "step": 184509
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.495828151702881,
      "learning_rate": 5.6844636001701926e-05,
      "loss": 2.7998,
      "step": 184510
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.8452019691467285,
      "learning_rate": 5.6842240125205265e-05,
      "loss": 3.0072,
      "step": 184511
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4287772178649902,
      "learning_rate": 5.68398442939162e-05,
      "loss": 2.9668,
      "step": 184512
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.3489203453063965,
      "learning_rate": 5.683744850783515e-05,
      "loss": 2.9113,
      "step": 184513
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9772539138793945,
      "learning_rate": 5.683505276696266e-05,
      "loss": 3.0806,
      "step": 184514
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.7215964794158936,
      "learning_rate": 5.683265707129911e-05,
      "loss": 2.879,
      "step": 184515
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8405284881591797,
      "learning_rate": 5.6830261420844935e-05,
      "loss": 2.6337,
      "step": 184516
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.62888765335083,
      "learning_rate": 5.682786581560063e-05,
      "loss": 2.9376,
      "step": 184517
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.157930374145508,
      "learning_rate": 5.682547025556665e-05,
      "loss": 3.1156,
      "step": 184518
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0645692348480225,
      "learning_rate": 5.682307474074331e-05,
      "loss": 2.6734,
      "step": 184519
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.575897216796875,
      "learning_rate": 5.682067927113124e-05,
      "loss": 2.7873,
      "step": 184520
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5525283813476562,
      "learning_rate": 5.681828384673077e-05,
      "loss": 2.9501,
      "step": 184521
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6514575481414795,
      "learning_rate": 5.681588846754234e-05,
      "loss": 2.9131,
      "step": 184522
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.575270414352417,
      "learning_rate": 5.6813493133566465e-05,
      "loss": 3.1889,
      "step": 184523
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5226833820343018,
      "learning_rate": 5.6811097844803496e-05,
      "loss": 3.2126,
      "step": 184524
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.4852025508880615,
      "learning_rate": 5.6808702601253994e-05,
      "loss": 2.6661,
      "step": 184525
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6382460594177246,
      "learning_rate": 5.680630740291836e-05,
      "loss": 3.1236,
      "step": 184526
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.3591573238372803,
      "learning_rate": 5.680391224979702e-05,
      "loss": 2.9512,
      "step": 184527
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6278257369995117,
      "learning_rate": 5.680151714189032e-05,
      "loss": 2.9908,
      "step": 184528
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.949276924133301,
      "learning_rate": 5.679912207919891e-05,
      "loss": 3.1173,
      "step": 184529
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.49027156829834,
      "learning_rate": 5.6796727061723066e-05,
      "loss": 3.0218,
      "step": 184530
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6721508502960205,
      "learning_rate": 5.679433208946339e-05,
      "loss": 3.0816,
      "step": 184531
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7427120208740234,
      "learning_rate": 5.679193716242021e-05,
      "loss": 2.9956,
      "step": 184532
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9723470211029053,
      "learning_rate": 5.678954228059399e-05,
      "loss": 3.1262,
      "step": 184533
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.968259334564209,
      "learning_rate": 5.678714744398514e-05,
      "loss": 3.0286,
      "step": 184534
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.163346767425537,
      "learning_rate": 5.678475265259421e-05,
      "loss": 3.0434,
      "step": 184535
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.1122701168060303,
      "learning_rate": 5.678235790642151e-05,
      "loss": 2.8831,
      "step": 184536
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9349544048309326,
      "learning_rate": 5.677996320546765e-05,
      "loss": 2.78,
      "step": 184537
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.796999216079712,
      "learning_rate": 5.6777568549732876e-05,
      "loss": 3.0229,
      "step": 184538
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6595535278320312,
      "learning_rate": 5.677517393921793e-05,
      "loss": 2.8032,
      "step": 184539
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.7880380153656006,
      "learning_rate": 5.6772779373922914e-05,
      "loss": 2.8167,
      "step": 184540
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2698333263397217,
      "learning_rate": 5.67703848538485e-05,
      "loss": 2.7707,
      "step": 184541
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5572280883789062,
      "learning_rate": 5.676799037899497e-05,
      "loss": 3.0156,
      "step": 184542
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.3424553871154785,
      "learning_rate": 5.676559594936296e-05,
      "loss": 2.9476,
      "step": 184543
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.8083255290985107,
      "learning_rate": 5.676320156495272e-05,
      "loss": 3.0228,
      "step": 184544
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.868295669555664,
      "learning_rate": 5.676080722576497e-05,
      "loss": 2.8287,
      "step": 184545
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0593678951263428,
      "learning_rate": 5.675841293179981e-05,
      "loss": 2.7576,
      "step": 184546
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.218896389007568,
      "learning_rate": 5.6756018683057946e-05,
      "loss": 2.8372,
      "step": 184547
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.60103702545166,
      "learning_rate": 5.6753624479539636e-05,
      "loss": 3.0393,
      "step": 184548
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.166348695755005,
      "learning_rate": 5.675123032124549e-05,
      "loss": 2.9449,
      "step": 184549
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.572169780731201,
      "learning_rate": 5.6748836208175795e-05,
      "loss": 2.8011,
      "step": 184550
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.522902250289917,
      "learning_rate": 5.674644214033126e-05,
      "loss": 2.7754,
      "step": 184551
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.483931064605713,
      "learning_rate": 5.6744048117712015e-05,
      "loss": 2.9871,
      "step": 184552
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8138015270233154,
      "learning_rate": 5.674165414031869e-05,
      "loss": 3.1547,
      "step": 184553
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.1047606468200684,
      "learning_rate": 5.673926020815163e-05,
      "loss": 2.8812,
      "step": 184554
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0722367763519287,
      "learning_rate": 5.673686632121138e-05,
      "loss": 2.8317,
      "step": 184555
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.843287944793701,
      "learning_rate": 5.673447247949829e-05,
      "loss": 2.7828,
      "step": 184556
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2032852172851562,
      "learning_rate": 5.673207868301299e-05,
      "loss": 2.996,
      "step": 184557
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4020490646362305,
      "learning_rate": 5.6729684931755645e-05,
      "loss": 2.9525,
      "step": 184558
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.060356855392456,
      "learning_rate": 5.6727291225726925e-05,
      "loss": 2.7729,
      "step": 184559
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2232935428619385,
      "learning_rate": 5.672489756492709e-05,
      "loss": 3.2791,
      "step": 184560
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.7034521102905273,
      "learning_rate": 5.6722503949356803e-05,
      "loss": 2.946,
      "step": 184561
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.420105457305908,
      "learning_rate": 5.6720110379016303e-05,
      "loss": 2.866,
      "step": 184562
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0214853286743164,
      "learning_rate": 5.671771685390619e-05,
      "loss": 3.0568,
      "step": 184563
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9973409175872803,
      "learning_rate": 5.6715323374026865e-05,
      "loss": 2.8633,
      "step": 184564
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.1034882068634033,
      "learning_rate": 5.6712929939378725e-05,
      "loss": 2.9922,
      "step": 184565
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.558899402618408,
      "learning_rate": 5.6710536549962206e-05,
      "loss": 3.0461,
      "step": 184566
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0769083499908447,
      "learning_rate": 5.6708143205777836e-05,
      "loss": 3.2205,
      "step": 184567
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.865865468978882,
      "learning_rate": 5.670574990682595e-05,
      "loss": 2.9495,
      "step": 184568
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.737492799758911,
      "learning_rate": 5.6703356653107115e-05,
      "loss": 2.9474,
      "step": 184569
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6496193408966064,
      "learning_rate": 5.670096344462176e-05,
      "loss": 2.9231,
      "step": 184570
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.146981954574585,
      "learning_rate": 5.669857028137026e-05,
      "loss": 3.2272,
      "step": 184571
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.08850359916687,
      "learning_rate": 5.6696177163353006e-05,
      "loss": 2.705,
      "step": 184572
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.659467935562134,
      "learning_rate": 5.66937840905706e-05,
      "loss": 2.9793,
      "step": 184573
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.403860569000244,
      "learning_rate": 5.6691391063023384e-05,
      "loss": 3.11,
      "step": 184574
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9690215587615967,
      "learning_rate": 5.6688998080711845e-05,
      "loss": 2.8906,
      "step": 184575
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.636885404586792,
      "learning_rate": 5.668660514363645e-05,
      "loss": 2.7513,
      "step": 184576
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.470651626586914,
      "learning_rate": 5.6684212251797614e-05,
      "loss": 2.6161,
      "step": 184577
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5363411903381348,
      "learning_rate": 5.668181940519568e-05,
      "loss": 2.9469,
      "step": 184578
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.7325265407562256,
      "learning_rate": 5.667942660383127e-05,
      "loss": 2.7711,
      "step": 184579
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.66214656829834,
      "learning_rate": 5.667703384770467e-05,
      "loss": 2.9961,
      "step": 184580
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.857262134552002,
      "learning_rate": 5.667464113681648e-05,
      "loss": 2.8892,
      "step": 184581
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8711676597595215,
      "learning_rate": 5.667224847116707e-05,
      "loss": 2.9358,
      "step": 184582
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9414422512054443,
      "learning_rate": 5.666985585075687e-05,
      "loss": 3.2838,
      "step": 184583
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.61846923828125,
      "learning_rate": 5.666746327558629e-05,
      "loss": 3.0481,
      "step": 184584
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9358036518096924,
      "learning_rate": 5.666507074565588e-05,
      "loss": 2.9164,
      "step": 184585
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0965869426727295,
      "learning_rate": 5.666267826096592e-05,
      "loss": 2.9527,
      "step": 184586
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.687877655029297,
      "learning_rate": 5.666028582151707e-05,
      "loss": 2.8511,
      "step": 184587
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.831826686859131,
      "learning_rate": 5.665789342730966e-05,
      "loss": 2.8699,
      "step": 184588
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5826363563537598,
      "learning_rate": 5.665550107834417e-05,
      "loss": 2.9056,
      "step": 184589
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6791610717773438,
      "learning_rate": 5.665310877462092e-05,
      "loss": 2.7524,
      "step": 184590
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9996116161346436,
      "learning_rate": 5.66507165161405e-05,
      "loss": 2.7845,
      "step": 184591
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0483920574188232,
      "learning_rate": 5.664832430290327e-05,
      "loss": 2.9492,
      "step": 184592
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.629352569580078,
      "learning_rate": 5.664593213490974e-05,
      "loss": 2.9409,
      "step": 184593
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8913540840148926,
      "learning_rate": 5.664354001216036e-05,
      "loss": 3.006,
      "step": 184594
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.872547149658203,
      "learning_rate": 5.6641147934655515e-05,
      "loss": 2.82,
      "step": 184595
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.565610408782959,
      "learning_rate": 5.663875590239562e-05,
      "loss": 2.9415,
      "step": 184596
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.3397486209869385,
      "learning_rate": 5.663636391538125e-05,
      "loss": 2.6883,
      "step": 184597
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.145245313644409,
      "learning_rate": 5.6633971973612705e-05,
      "loss": 2.8945,
      "step": 184598
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.483107328414917,
      "learning_rate": 5.6631580077090556e-05,
      "loss": 2.9743,
      "step": 184599
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7032737731933594,
      "learning_rate": 5.662918822581518e-05,
      "loss": 2.8301,
      "step": 184600
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.263369083404541,
      "learning_rate": 5.662679641978698e-05,
      "loss": 3.0401,
      "step": 184601
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.712294578552246,
      "learning_rate": 5.662440465900652e-05,
      "loss": 2.8386,
      "step": 184602
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.6335039138793945,
      "learning_rate": 5.6622012943474195e-05,
      "loss": 3.0607,
      "step": 184603
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.00213360786438,
      "learning_rate": 5.661962127319034e-05,
      "loss": 2.9103,
      "step": 184604
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.3812625408172607,
      "learning_rate": 5.661722964815556e-05,
      "loss": 2.9289,
      "step": 184605
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6089048385620117,
      "learning_rate": 5.661483806837022e-05,
      "loss": 2.8702,
      "step": 184606
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7235336303710938,
      "learning_rate": 5.661244653383471e-05,
      "loss": 2.958,
      "step": 184607
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5570085048675537,
      "learning_rate": 5.661005504454964e-05,
      "loss": 2.9641,
      "step": 184608
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.8806533813476562,
      "learning_rate": 5.660766360051527e-05,
      "loss": 2.8638,
      "step": 184609
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.6751885414123535,
      "learning_rate": 5.6605272201732174e-05,
      "loss": 2.7548,
      "step": 184610
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8309578895568848,
      "learning_rate": 5.660288084820078e-05,
      "loss": 2.823,
      "step": 184611
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2276740074157715,
      "learning_rate": 5.6600489539921526e-05,
      "loss": 2.8,
      "step": 184612
    },
    {
      "epoch": 2.4,
      "grad_norm": 6.255665302276611,
      "learning_rate": 5.659809827689473e-05,
      "loss": 2.7936,
      "step": 184613
    },
    {
      "epoch": 2.4,
      "grad_norm": 7.1560959815979,
      "learning_rate": 5.659570705912101e-05,
      "loss": 3.0443,
      "step": 184614
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.75465202331543,
      "learning_rate": 5.659331588660069e-05,
      "loss": 3.1563,
      "step": 184615
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.5132999420166016,
      "learning_rate": 5.6590924759334333e-05,
      "loss": 3.1028,
      "step": 184616
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9434406757354736,
      "learning_rate": 5.658853367732231e-05,
      "loss": 3.1495,
      "step": 184617
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.827878713607788,
      "learning_rate": 5.6586142640565094e-05,
      "loss": 3.092,
      "step": 184618
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.9785618782043457,
      "learning_rate": 5.6583751649063e-05,
      "loss": 2.9834,
      "step": 184619
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.848374843597412,
      "learning_rate": 5.658136070281668e-05,
      "loss": 2.7464,
      "step": 184620
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0907227993011475,
      "learning_rate": 5.657896980182642e-05,
      "loss": 3.2125,
      "step": 184621
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4722399711608887,
      "learning_rate": 5.657657894609276e-05,
      "loss": 3.0285,
      "step": 184622
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.896778106689453,
      "learning_rate": 5.657418813561607e-05,
      "loss": 2.9719,
      "step": 184623
    },
    {
      "epoch": 2.4,
      "grad_norm": 5.546335220336914,
      "learning_rate": 5.6571797370396975e-05,
      "loss": 2.7563,
      "step": 184624
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.3600380420684814,
      "learning_rate": 5.656940665043561e-05,
      "loss": 3.0712,
      "step": 184625
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.752727746963501,
      "learning_rate": 5.6567015975732675e-05,
      "loss": 2.7534,
      "step": 184626
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6214566230773926,
      "learning_rate": 5.656462534628844e-05,
      "loss": 3.1333,
      "step": 184627
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.3552207946777344,
      "learning_rate": 5.6562234762103534e-05,
      "loss": 3.0431,
      "step": 184628
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.8602521419525146,
      "learning_rate": 5.655984422317822e-05,
      "loss": 3.075,
      "step": 184629
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.317288875579834,
      "learning_rate": 5.65574537295131e-05,
      "loss": 2.9535,
      "step": 184630
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0352282524108887,
      "learning_rate": 5.6555063281108547e-05,
      "loss": 2.7097,
      "step": 184631
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.797416925430298,
      "learning_rate": 5.6552672877964986e-05,
      "loss": 2.8283,
      "step": 184632
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.801421880722046,
      "learning_rate": 5.655028252008282e-05,
      "loss": 2.9538,
      "step": 184633
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.1439054012298584,
      "learning_rate": 5.654789220746262e-05,
      "loss": 2.9474,
      "step": 184634
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.509659767150879,
      "learning_rate": 5.654550194010471e-05,
      "loss": 2.9037,
      "step": 184635
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.1949267387390137,
      "learning_rate": 5.6543111718009623e-05,
      "loss": 3.0397,
      "step": 184636
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.544938564300537,
      "learning_rate": 5.65407215411778e-05,
      "loss": 2.7284,
      "step": 184637
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.016782522201538,
      "learning_rate": 5.653833140960964e-05,
      "loss": 2.7073,
      "step": 184638
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.798001766204834,
      "learning_rate": 5.65359413233055e-05,
      "loss": 3.0356,
      "step": 184639
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4443657398223877,
      "learning_rate": 5.6533551282266055e-05,
      "loss": 3.2754,
      "step": 184640
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.792440891265869,
      "learning_rate": 5.65311612864915e-05,
      "loss": 2.8607,
      "step": 184641
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7247750759124756,
      "learning_rate": 5.652877133598247e-05,
      "loss": 3.1867,
      "step": 184642
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.591526508331299,
      "learning_rate": 5.6526381430739364e-05,
      "loss": 2.9066,
      "step": 184643
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.8562567234039307,
      "learning_rate": 5.6523991570762586e-05,
      "loss": 2.9721,
      "step": 184644
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9016714096069336,
      "learning_rate": 5.652160175605253e-05,
      "loss": 2.7089,
      "step": 184645
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.832552671432495,
      "learning_rate": 5.6519211986609756e-05,
      "loss": 2.9004,
      "step": 184646
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.08253812789917,
      "learning_rate": 5.651682226243461e-05,
      "loss": 2.781,
      "step": 184647
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8579912185668945,
      "learning_rate": 5.651443258352763e-05,
      "loss": 2.9197,
      "step": 184648
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.642512083053589,
      "learning_rate": 5.651204294988923e-05,
      "loss": 2.9621,
      "step": 184649
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0193426609039307,
      "learning_rate": 5.6509653361519845e-05,
      "loss": 2.8302,
      "step": 184650
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.704057216644287,
      "learning_rate": 5.650726381841982e-05,
      "loss": 2.9889,
      "step": 184651
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.766406536102295,
      "learning_rate": 5.650487432058976e-05,
      "loss": 3.1936,
      "step": 184652
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7802295684814453,
      "learning_rate": 5.650248486802997e-05,
      "loss": 2.8138,
      "step": 184653
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8571066856384277,
      "learning_rate": 5.650009546074105e-05,
      "loss": 2.9328,
      "step": 184654
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.833768844604492,
      "learning_rate": 5.649770609872337e-05,
      "loss": 2.8944,
      "step": 184655
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.881028652191162,
      "learning_rate": 5.649531678197735e-05,
      "loss": 2.8616,
      "step": 184656
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.48834490776062,
      "learning_rate": 5.649292751050335e-05,
      "loss": 2.9774,
      "step": 184657
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.0869293212890625,
      "learning_rate": 5.6490538284302036e-05,
      "loss": 2.7515,
      "step": 184658
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.417421340942383,
      "learning_rate": 5.64881491033736e-05,
      "loss": 2.9349,
      "step": 184659
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9287095069885254,
      "learning_rate": 5.6485759967718725e-05,
      "loss": 2.8661,
      "step": 184660
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.653428077697754,
      "learning_rate": 5.648337087733773e-05,
      "loss": 3.1058,
      "step": 184661
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.8096325397491455,
      "learning_rate": 5.6480981832231085e-05,
      "loss": 2.832,
      "step": 184662
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.081009864807129,
      "learning_rate": 5.647859283239913e-05,
      "loss": 2.9673,
      "step": 184663
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.710697889328003,
      "learning_rate": 5.6476203877842485e-05,
      "loss": 2.7588,
      "step": 184664
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.028085708618164,
      "learning_rate": 5.6473814968561426e-05,
      "loss": 3.1083,
      "step": 184665
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.639188528060913,
      "learning_rate": 5.647142610455655e-05,
      "loss": 2.7589,
      "step": 184666
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.578641653060913,
      "learning_rate": 5.646903728582826e-05,
      "loss": 3.1087,
      "step": 184667
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.539130926132202,
      "learning_rate": 5.646664851237694e-05,
      "loss": 3.0278,
      "step": 184668
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7674856185913086,
      "learning_rate": 5.646425978420301e-05,
      "loss": 2.9494,
      "step": 184669
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.393637180328369,
      "learning_rate": 5.646187110130707e-05,
      "loss": 2.8944,
      "step": 184670
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.3483433723449707,
      "learning_rate": 5.6459482463689364e-05,
      "loss": 2.7182,
      "step": 184671
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.022068977355957,
      "learning_rate": 5.645709387135051e-05,
      "loss": 2.821,
      "step": 184672
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.227839708328247,
      "learning_rate": 5.645470532429091e-05,
      "loss": 2.9898,
      "step": 184673
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.739778995513916,
      "learning_rate": 5.645231682251095e-05,
      "loss": 3.0561,
      "step": 184674
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.8277957439422607,
      "learning_rate": 5.6449928366011e-05,
      "loss": 3.0669,
      "step": 184675
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.272810220718384,
      "learning_rate": 5.644753995479173e-05,
      "loss": 2.763,
      "step": 184676
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9172780513763428,
      "learning_rate": 5.6445151588853346e-05,
      "loss": 2.7576,
      "step": 184677
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.3522531986236572,
      "learning_rate": 5.64427632681965e-05,
      "loss": 2.952,
      "step": 184678
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4909491539001465,
      "learning_rate": 5.6440374992821566e-05,
      "loss": 2.7412,
      "step": 184679
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.49670672416687,
      "learning_rate": 5.6437986762728916e-05,
      "loss": 2.678,
      "step": 184680
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9864578247070312,
      "learning_rate": 5.6435598577919006e-05,
      "loss": 3.0893,
      "step": 184681
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7768609523773193,
      "learning_rate": 5.643321043839237e-05,
      "loss": 2.988,
      "step": 184682
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.068877935409546,
      "learning_rate": 5.643082234414932e-05,
      "loss": 3.0271,
      "step": 184683
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.918518304824829,
      "learning_rate": 5.642843429519047e-05,
      "loss": 2.8508,
      "step": 184684
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9776508808135986,
      "learning_rate": 5.642604629151617e-05,
      "loss": 2.9765,
      "step": 184685
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.3828978538513184,
      "learning_rate": 5.6423658333126774e-05,
      "loss": 3.0133,
      "step": 184686
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.422482967376709,
      "learning_rate": 5.642127042002292e-05,
      "loss": 2.8584,
      "step": 184687
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.991224527359009,
      "learning_rate": 5.6418882552204916e-05,
      "loss": 3.1115,
      "step": 184688
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5178723335266113,
      "learning_rate": 5.641649472967318e-05,
      "loss": 2.8666,
      "step": 184689
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7048892974853516,
      "learning_rate": 5.641410695242828e-05,
      "loss": 2.8045,
      "step": 184690
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.9443812370300293,
      "learning_rate": 5.641171922047063e-05,
      "loss": 2.7657,
      "step": 184691
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.116204261779785,
      "learning_rate": 5.640933153380055e-05,
      "loss": 2.8735,
      "step": 184692
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.4334285259246826,
      "learning_rate": 5.640694389241861e-05,
      "loss": 3.1217,
      "step": 184693
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.93503475189209,
      "learning_rate": 5.640455629632528e-05,
      "loss": 3.1363,
      "step": 184694
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2030487060546875,
      "learning_rate": 5.640216874552081e-05,
      "loss": 2.8017,
      "step": 184695
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.206782102584839,
      "learning_rate": 5.6399781240005894e-05,
      "loss": 2.7771,
      "step": 184696
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.9903745651245117,
      "learning_rate": 5.6397393779780744e-05,
      "loss": 2.9626,
      "step": 184697
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.727896213531494,
      "learning_rate": 5.6395006364846026e-05,
      "loss": 3.1941,
      "step": 184698
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.5594253540039062,
      "learning_rate": 5.639261899520206e-05,
      "loss": 2.8525,
      "step": 184699
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.6632847785949707,
      "learning_rate": 5.639023167084922e-05,
      "loss": 2.9085,
      "step": 184700
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.612412929534912,
      "learning_rate": 5.638784439178812e-05,
      "loss": 2.9519,
      "step": 184701
    },
    {
      "epoch": 2.4,
      "grad_norm": 3.2043023109436035,
      "learning_rate": 5.638545715801912e-05,
      "loss": 3.0693,
      "step": 184702
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.7896599769592285,
      "learning_rate": 5.638306996954256e-05,
      "loss": 2.9612,
      "step": 184703
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.043684482574463,
      "learning_rate": 5.63806828263591e-05,
      "loss": 2.8775,
      "step": 184704
    },
    {
      "epoch": 2.4,
      "grad_norm": 4.2929911613464355,
      "learning_rate": 5.6378295728469045e-05,
      "loss": 2.9106,
      "step": 184705
    },
    {
      "epoch": 2.4,
      "grad_norm": 2.541585922241211,
      "learning_rate": 5.6375908675872795e-05,
      "loss": 3.0343,
      "step": 184706
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.282947301864624,
      "learning_rate": 5.637352166857094e-05,
      "loss": 2.7643,
      "step": 184707
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8677642345428467,
      "learning_rate": 5.637113470656376e-05,
      "loss": 2.8782,
      "step": 184708
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6957685947418213,
      "learning_rate": 5.636874778985188e-05,
      "loss": 2.9094,
      "step": 184709
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.028113842010498,
      "learning_rate": 5.636636091843563e-05,
      "loss": 2.9153,
      "step": 184710
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1086695194244385,
      "learning_rate": 5.636397409231548e-05,
      "loss": 2.9761,
      "step": 184711
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7380316257476807,
      "learning_rate": 5.6361587311491796e-05,
      "loss": 2.926,
      "step": 184712
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.590315580368042,
      "learning_rate": 5.635920057596518e-05,
      "loss": 2.8646,
      "step": 184713
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.505742073059082,
      "learning_rate": 5.63568138857359e-05,
      "loss": 3.1762,
      "step": 184714
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.431345224380493,
      "learning_rate": 5.6354427240804546e-05,
      "loss": 3.0136,
      "step": 184715
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.883396863937378,
      "learning_rate": 5.6352040641171525e-05,
      "loss": 2.9227,
      "step": 184716
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8274447917938232,
      "learning_rate": 5.6349654086837236e-05,
      "loss": 3.0963,
      "step": 184717
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.694420099258423,
      "learning_rate": 5.634726757780208e-05,
      "loss": 2.9211,
      "step": 184718
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4014859199523926,
      "learning_rate": 5.634488111406665e-05,
      "loss": 3.0236,
      "step": 184719
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7222373485565186,
      "learning_rate": 5.6342494695631224e-05,
      "loss": 2.5714,
      "step": 184720
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9254984855651855,
      "learning_rate": 5.6340108322496426e-05,
      "loss": 2.8876,
      "step": 184721
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3167507648468018,
      "learning_rate": 5.6337721994662555e-05,
      "loss": 2.8052,
      "step": 184722
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.705763578414917,
      "learning_rate": 5.633533571213015e-05,
      "loss": 2.9646,
      "step": 184723
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8648486137390137,
      "learning_rate": 5.63329494748995e-05,
      "loss": 2.9108,
      "step": 184724
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9742214679718018,
      "learning_rate": 5.633056328297122e-05,
      "loss": 2.8705,
      "step": 184725
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6823391914367676,
      "learning_rate": 5.632817713634563e-05,
      "loss": 2.9275,
      "step": 184726
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8503165245056152,
      "learning_rate": 5.6325791035023305e-05,
      "loss": 3.0536,
      "step": 184727
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9307847023010254,
      "learning_rate": 5.632340497900464e-05,
      "loss": 3.0103,
      "step": 184728
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4238126277923584,
      "learning_rate": 5.632101896829e-05,
      "loss": 2.8841,
      "step": 184729
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4830849170684814,
      "learning_rate": 5.6318633002879855e-05,
      "loss": 3.1903,
      "step": 184730
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8340654373168945,
      "learning_rate": 5.631624708277473e-05,
      "loss": 2.94,
      "step": 184731
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.909829616546631,
      "learning_rate": 5.6313861207974943e-05,
      "loss": 3.0905,
      "step": 184732
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5052084922790527,
      "learning_rate": 5.6311475378481076e-05,
      "loss": 2.7185,
      "step": 184733
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.442659854888916,
      "learning_rate": 5.630908959429353e-05,
      "loss": 3.1546,
      "step": 184734
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4712486267089844,
      "learning_rate": 5.630670385541268e-05,
      "loss": 2.7286,
      "step": 184735
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.553497791290283,
      "learning_rate": 5.630431816183899e-05,
      "loss": 3.0765,
      "step": 184736
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1966042518615723,
      "learning_rate": 5.6301932513572954e-05,
      "loss": 3.25,
      "step": 184737
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3798742294311523,
      "learning_rate": 5.6299546910614945e-05,
      "loss": 2.8359,
      "step": 184738
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.782076835632324,
      "learning_rate": 5.629716135296553e-05,
      "loss": 2.8041,
      "step": 184739
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.0818891525268555,
      "learning_rate": 5.629477584062506e-05,
      "loss": 2.8057,
      "step": 184740
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6301698684692383,
      "learning_rate": 5.629239037359399e-05,
      "loss": 2.8454,
      "step": 184741
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7545344829559326,
      "learning_rate": 5.6290004951872704e-05,
      "loss": 3.1182,
      "step": 184742
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.0871927738189697,
      "learning_rate": 5.6287619575461775e-05,
      "loss": 3.0576,
      "step": 184743
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8324174880981445,
      "learning_rate": 5.62852342443615e-05,
      "loss": 2.9834,
      "step": 184744
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.827223539352417,
      "learning_rate": 5.628284895857248e-05,
      "loss": 2.9001,
      "step": 184745
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.0713372230529785,
      "learning_rate": 5.628046371809505e-05,
      "loss": 3.0388,
      "step": 184746
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8466038703918457,
      "learning_rate": 5.62780785229297e-05,
      "loss": 3.2552,
      "step": 184747
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.735717535018921,
      "learning_rate": 5.6275693373076815e-05,
      "loss": 2.8292,
      "step": 184748
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4743282794952393,
      "learning_rate": 5.627330826853691e-05,
      "loss": 2.8843,
      "step": 184749
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3755972385406494,
      "learning_rate": 5.6270923209310325e-05,
      "loss": 2.9151,
      "step": 184750
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0740554332733154,
      "learning_rate": 5.626853819539766e-05,
      "loss": 2.9149,
      "step": 184751
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.159894943237305,
      "learning_rate": 5.6266153226799284e-05,
      "loss": 3.1151,
      "step": 184752
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.2078375816345215,
      "learning_rate": 5.626376830351562e-05,
      "loss": 2.835,
      "step": 184753
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.7499306201934814,
      "learning_rate": 5.626138342554708e-05,
      "loss": 2.7944,
      "step": 184754
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5155622959136963,
      "learning_rate": 5.625899859289419e-05,
      "loss": 2.8366,
      "step": 184755
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.71354079246521,
      "learning_rate": 5.625661380555728e-05,
      "loss": 2.8629,
      "step": 184756
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.0243096351623535,
      "learning_rate": 5.625422906353696e-05,
      "loss": 3.0719,
      "step": 184757
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.329367637634277,
      "learning_rate": 5.625184436683355e-05,
      "loss": 2.9331,
      "step": 184758
    },
    {
      "epoch": 2.41,
      "grad_norm": 5.283511638641357,
      "learning_rate": 5.624945971544756e-05,
      "loss": 2.7291,
      "step": 184759
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8111300468444824,
      "learning_rate": 5.624707510937929e-05,
      "loss": 2.7476,
      "step": 184760
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.113463878631592,
      "learning_rate": 5.6244690548629365e-05,
      "loss": 2.8529,
      "step": 184761
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.89182186126709,
      "learning_rate": 5.624230603319809e-05,
      "loss": 2.9365,
      "step": 184762
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1545581817626953,
      "learning_rate": 5.6239921563086064e-05,
      "loss": 2.8653,
      "step": 184763
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.6981778144836426,
      "learning_rate": 5.623753713829355e-05,
      "loss": 2.8303,
      "step": 184764
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.932467460632324,
      "learning_rate": 5.6235152758821224e-05,
      "loss": 2.755,
      "step": 184765
    },
    {
      "epoch": 2.41,
      "grad_norm": 6.188534259796143,
      "learning_rate": 5.623276842466924e-05,
      "loss": 2.8202,
      "step": 184766
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.457453966140747,
      "learning_rate": 5.623038413583823e-05,
      "loss": 3.0422,
      "step": 184767
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6871962547302246,
      "learning_rate": 5.6227999892328544e-05,
      "loss": 2.9684,
      "step": 184768
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7184317111968994,
      "learning_rate": 5.622561569414077e-05,
      "loss": 2.8888,
      "step": 184769
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5570521354675293,
      "learning_rate": 5.6223231541275174e-05,
      "loss": 2.9318,
      "step": 184770
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.7692666053771973,
      "learning_rate": 5.622084743373232e-05,
      "loss": 3.0166,
      "step": 184771
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9748356342315674,
      "learning_rate": 5.6218463371512645e-05,
      "loss": 2.9998,
      "step": 184772
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.2539939880371094,
      "learning_rate": 5.621607935461655e-05,
      "loss": 2.8029,
      "step": 184773
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.566114902496338,
      "learning_rate": 5.62136953830444e-05,
      "loss": 2.8125,
      "step": 184774
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.105686902999878,
      "learning_rate": 5.6211311456796823e-05,
      "loss": 2.6034,
      "step": 184775
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5401666164398193,
      "learning_rate": 5.620892757587406e-05,
      "loss": 2.8443,
      "step": 184776
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.788522243499756,
      "learning_rate": 5.620654374027678e-05,
      "loss": 2.8471,
      "step": 184777
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.065990447998047,
      "learning_rate": 5.6204159950005266e-05,
      "loss": 2.7981,
      "step": 184778
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.985302209854126,
      "learning_rate": 5.620177620506e-05,
      "loss": 2.9143,
      "step": 184779
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.696115493774414,
      "learning_rate": 5.619939250544138e-05,
      "loss": 3.1671,
      "step": 184780
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4189889430999756,
      "learning_rate": 5.619700885114996e-05,
      "loss": 2.8067,
      "step": 184781
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.63275671005249,
      "learning_rate": 5.619462524218602e-05,
      "loss": 3.3832,
      "step": 184782
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.2169458866119385,
      "learning_rate": 5.619224167855022e-05,
      "loss": 3.0534,
      "step": 184783
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6072511672973633,
      "learning_rate": 5.618985816024286e-05,
      "loss": 2.9355,
      "step": 184784
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.746735572814941,
      "learning_rate": 5.618747468726431e-05,
      "loss": 2.9805,
      "step": 184785
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.3813276290893555,
      "learning_rate": 5.61850912596152e-05,
      "loss": 2.9108,
      "step": 184786
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.236789226531982,
      "learning_rate": 5.6182707877295895e-05,
      "loss": 3.0203,
      "step": 184787
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.834455966949463,
      "learning_rate": 5.6180324540306766e-05,
      "loss": 2.816,
      "step": 184788
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5520009994506836,
      "learning_rate": 5.617794124864837e-05,
      "loss": 2.9844,
      "step": 184789
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8719377517700195,
      "learning_rate": 5.6175558002321083e-05,
      "loss": 2.7909,
      "step": 184790
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.2004549503326416,
      "learning_rate": 5.617317480132534e-05,
      "loss": 3.1433,
      "step": 184791
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.2245290279388428,
      "learning_rate": 5.617079164566163e-05,
      "loss": 3.107,
      "step": 184792
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5429258346557617,
      "learning_rate": 5.616840853533032e-05,
      "loss": 3.0498,
      "step": 184793
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1114540100097656,
      "learning_rate": 5.6166025470331956e-05,
      "loss": 2.8555,
      "step": 184794
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.092954397201538,
      "learning_rate": 5.6163642450666956e-05,
      "loss": 2.691,
      "step": 184795
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5430893898010254,
      "learning_rate": 5.616125947633573e-05,
      "loss": 2.941,
      "step": 184796
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.5266613960266113,
      "learning_rate": 5.6158876547338673e-05,
      "loss": 2.8684,
      "step": 184797
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7913966178894043,
      "learning_rate": 5.615649366367632e-05,
      "loss": 3.2029,
      "step": 184798
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3551270961761475,
      "learning_rate": 5.615411082534903e-05,
      "loss": 2.8391,
      "step": 184799
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.8462109565734863,
      "learning_rate": 5.615172803235738e-05,
      "loss": 3.099,
      "step": 184800
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.2137928009033203,
      "learning_rate": 5.6149345284701696e-05,
      "loss": 3.055,
      "step": 184801
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.723447322845459,
      "learning_rate": 5.614696258238248e-05,
      "loss": 2.6981,
      "step": 184802
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7887158393859863,
      "learning_rate": 5.6144579925400036e-05,
      "loss": 3.0276,
      "step": 184803
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8268864154815674,
      "learning_rate": 5.614219731375502e-05,
      "loss": 2.7872,
      "step": 184804
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.828437328338623,
      "learning_rate": 5.6139814747447676e-05,
      "loss": 2.9574,
      "step": 184805
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7482879161834717,
      "learning_rate": 5.613743222647866e-05,
      "loss": 2.6696,
      "step": 184806
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.2346103191375732,
      "learning_rate": 5.613504975084825e-05,
      "loss": 3.1065,
      "step": 184807
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5035815238952637,
      "learning_rate": 5.6132667320556964e-05,
      "loss": 2.9761,
      "step": 184808
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.526397943496704,
      "learning_rate": 5.613028493560515e-05,
      "loss": 2.9284,
      "step": 184809
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.02556037902832,
      "learning_rate": 5.612790259599337e-05,
      "loss": 2.7234,
      "step": 184810
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9862728118896484,
      "learning_rate": 5.612552030172195e-05,
      "loss": 2.8864,
      "step": 184811
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9946136474609375,
      "learning_rate": 5.612313805279149e-05,
      "loss": 3.1151,
      "step": 184812
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.108391284942627,
      "learning_rate": 5.612075584920234e-05,
      "loss": 2.9663,
      "step": 184813
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1363236904144287,
      "learning_rate": 5.6118373690954944e-05,
      "loss": 3.1517,
      "step": 184814
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.6256532669067383,
      "learning_rate": 5.6115991578049644e-05,
      "loss": 2.8511,
      "step": 184815
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8650951385498047,
      "learning_rate": 5.611360951048708e-05,
      "loss": 2.676,
      "step": 184816
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1830077171325684,
      "learning_rate": 5.611122748826754e-05,
      "loss": 2.972,
      "step": 184817
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9559271335601807,
      "learning_rate": 5.6108845511391565e-05,
      "loss": 3.068,
      "step": 184818
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.705106496810913,
      "learning_rate": 5.610646357985959e-05,
      "loss": 2.8173,
      "step": 184819
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.183516502380371,
      "learning_rate": 5.610408169367203e-05,
      "loss": 3.0827,
      "step": 184820
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.217616319656372,
      "learning_rate": 5.6101699852829244e-05,
      "loss": 2.6142,
      "step": 184821
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8632090091705322,
      "learning_rate": 5.609931805733181e-05,
      "loss": 2.9193,
      "step": 184822
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5988783836364746,
      "learning_rate": 5.609693630718004e-05,
      "loss": 2.9552,
      "step": 184823
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.459728479385376,
      "learning_rate": 5.6094554602374566e-05,
      "loss": 2.856,
      "step": 184824
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.262799024581909,
      "learning_rate": 5.609217294291568e-05,
      "loss": 2.8122,
      "step": 184825
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6873297691345215,
      "learning_rate": 5.608979132880389e-05,
      "loss": 2.8576,
      "step": 184826
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8712165355682373,
      "learning_rate": 5.6087409760039525e-05,
      "loss": 3.0266,
      "step": 184827
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5296499729156494,
      "learning_rate": 5.6085028236623186e-05,
      "loss": 2.9038,
      "step": 184828
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.824215888977051,
      "learning_rate": 5.6082646758555175e-05,
      "loss": 3.0687,
      "step": 184829
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4877521991729736,
      "learning_rate": 5.608026532583608e-05,
      "loss": 3.0065,
      "step": 184830
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7813291549682617,
      "learning_rate": 5.6077883938466184e-05,
      "loss": 2.8016,
      "step": 184831
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8010952472686768,
      "learning_rate": 5.607550259644618e-05,
      "loss": 3.0104,
      "step": 184832
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7661170959472656,
      "learning_rate": 5.607312129977619e-05,
      "loss": 2.863,
      "step": 184833
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.642472743988037,
      "learning_rate": 5.60707400484569e-05,
      "loss": 2.9879,
      "step": 184834
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.741456985473633,
      "learning_rate": 5.606835884248856e-05,
      "loss": 2.8398,
      "step": 184835
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4808123111724854,
      "learning_rate": 5.606597768187181e-05,
      "loss": 3.0174,
      "step": 184836
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.168717861175537,
      "learning_rate": 5.6063596566606914e-05,
      "loss": 3.0227,
      "step": 184837
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.533303737640381,
      "learning_rate": 5.606121549669454e-05,
      "loss": 2.6828,
      "step": 184838
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6368277072906494,
      "learning_rate": 5.605883447213486e-05,
      "loss": 3.0961,
      "step": 184839
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7607133388519287,
      "learning_rate": 5.605645349292853e-05,
      "loss": 2.9533,
      "step": 184840
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9200196266174316,
      "learning_rate": 5.605407255907582e-05,
      "loss": 2.9139,
      "step": 184841
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.675175666809082,
      "learning_rate": 5.6051691670577335e-05,
      "loss": 2.9398,
      "step": 184842
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.992896795272827,
      "learning_rate": 5.604931082743337e-05,
      "loss": 2.7066,
      "step": 184843
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1157445907592773,
      "learning_rate": 5.6046930029644624e-05,
      "loss": 2.6301,
      "step": 184844
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.471691131591797,
      "learning_rate": 5.604454927721116e-05,
      "loss": 2.86,
      "step": 184845
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8815433979034424,
      "learning_rate": 5.604216857013372e-05,
      "loss": 2.8957,
      "step": 184846
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.6106531620025635,
      "learning_rate": 5.6039787908412556e-05,
      "loss": 2.6469,
      "step": 184847
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.116180419921875,
      "learning_rate": 5.6037407292048286e-05,
      "loss": 2.7817,
      "step": 184848
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.0198588371276855,
      "learning_rate": 5.60350267210412e-05,
      "loss": 3.0211,
      "step": 184849
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8442935943603516,
      "learning_rate": 5.603264619539196e-05,
      "loss": 3.1053,
      "step": 184850
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6409406661987305,
      "learning_rate": 5.603026571510071e-05,
      "loss": 2.9586,
      "step": 184851
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5866527557373047,
      "learning_rate": 5.6027885280168106e-05,
      "loss": 2.9862,
      "step": 184852
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.2566258907318115,
      "learning_rate": 5.602550489059442e-05,
      "loss": 2.8574,
      "step": 184853
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0702898502349854,
      "learning_rate": 5.602312454638032e-05,
      "loss": 2.9965,
      "step": 184854
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.471539258956909,
      "learning_rate": 5.602074424752603e-05,
      "loss": 2.9074,
      "step": 184855
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.1981241703033447,
      "learning_rate": 5.601836399403222e-05,
      "loss": 2.8655,
      "step": 184856
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.090336561203003,
      "learning_rate": 5.601598378589906e-05,
      "loss": 2.9847,
      "step": 184857
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5846049785614014,
      "learning_rate": 5.6013603623127225e-05,
      "loss": 2.9668,
      "step": 184858
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.3561999797821045,
      "learning_rate": 5.6011223505716996e-05,
      "loss": 2.941,
      "step": 184859
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.075861930847168,
      "learning_rate": 5.6008843433668945e-05,
      "loss": 2.8772,
      "step": 184860
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.767205238342285,
      "learning_rate": 5.600646340698338e-05,
      "loss": 2.9561,
      "step": 184861
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8456614017486572,
      "learning_rate": 5.6004083425660925e-05,
      "loss": 3.0731,
      "step": 184862
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1126585006713867,
      "learning_rate": 5.600170348970189e-05,
      "loss": 2.9899,
      "step": 184863
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0830841064453125,
      "learning_rate": 5.599932359910673e-05,
      "loss": 3.0412,
      "step": 184864
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.085138320922852,
      "learning_rate": 5.599694375387585e-05,
      "loss": 2.892,
      "step": 184865
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.928067207336426,
      "learning_rate": 5.5994563954009806e-05,
      "loss": 2.8343,
      "step": 184866
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0287668704986572,
      "learning_rate": 5.599218419950892e-05,
      "loss": 2.9716,
      "step": 184867
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.544542074203491,
      "learning_rate": 5.598980449037377e-05,
      "loss": 2.8047,
      "step": 184868
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.043555498123169,
      "learning_rate": 5.59874248266047e-05,
      "loss": 2.8554,
      "step": 184869
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0000100135803223,
      "learning_rate": 5.5985045208202104e-05,
      "loss": 2.7757,
      "step": 184870
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.318795680999756,
      "learning_rate": 5.5982665635166554e-05,
      "loss": 2.8895,
      "step": 184871
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.7850420475006104,
      "learning_rate": 5.598028610749848e-05,
      "loss": 3.1541,
      "step": 184872
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.389531135559082,
      "learning_rate": 5.597790662519816e-05,
      "loss": 2.7742,
      "step": 184873
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6449244022369385,
      "learning_rate": 5.597552718826623e-05,
      "loss": 2.664,
      "step": 184874
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.113710880279541,
      "learning_rate": 5.5973147796703086e-05,
      "loss": 2.9543,
      "step": 184875
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4913036823272705,
      "learning_rate": 5.597076845050905e-05,
      "loss": 3.0526,
      "step": 184876
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7587203979492188,
      "learning_rate": 5.596838914968472e-05,
      "loss": 2.7837,
      "step": 184877
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.128564834594727,
      "learning_rate": 5.596600989423047e-05,
      "loss": 3.0895,
      "step": 184878
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.817542552947998,
      "learning_rate": 5.5963630684146685e-05,
      "loss": 2.9767,
      "step": 184879
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.267853021621704,
      "learning_rate": 5.5961251519433944e-05,
      "loss": 2.4989,
      "step": 184880
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8265068531036377,
      "learning_rate": 5.595887240009261e-05,
      "loss": 2.7097,
      "step": 184881
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.589437484741211,
      "learning_rate": 5.595649332612305e-05,
      "loss": 3.0903,
      "step": 184882
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.9478437900543213,
      "learning_rate": 5.5954114297525864e-05,
      "loss": 3.1167,
      "step": 184883
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9470531940460205,
      "learning_rate": 5.595173531430132e-05,
      "loss": 2.7769,
      "step": 184884
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4465935230255127,
      "learning_rate": 5.594935637645004e-05,
      "loss": 2.8271,
      "step": 184885
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0711708068847656,
      "learning_rate": 5.59469774839724e-05,
      "loss": 2.7326,
      "step": 184886
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.065232992172241,
      "learning_rate": 5.59445986368688e-05,
      "loss": 2.9479,
      "step": 184887
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5492947101593018,
      "learning_rate": 5.594221983513967e-05,
      "loss": 3.0014,
      "step": 184888
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.394850492477417,
      "learning_rate": 5.593984107878551e-05,
      "loss": 3.0176,
      "step": 184889
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7807040214538574,
      "learning_rate": 5.5937462367806683e-05,
      "loss": 3.2363,
      "step": 184890
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.130406618118286,
      "learning_rate": 5.59350837022038e-05,
      "loss": 2.9218,
      "step": 184891
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7302000522613525,
      "learning_rate": 5.5932705081977144e-05,
      "loss": 3.15,
      "step": 184892
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.459367513656616,
      "learning_rate": 5.593032650712723e-05,
      "loss": 3.0527,
      "step": 184893
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.751269817352295,
      "learning_rate": 5.5927947977654385e-05,
      "loss": 3.0355,
      "step": 184894
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6990623474121094,
      "learning_rate": 5.592556949355924e-05,
      "loss": 2.8951,
      "step": 184895
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6217892169952393,
      "learning_rate": 5.592319105484203e-05,
      "loss": 2.863,
      "step": 184896
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9966237545013428,
      "learning_rate": 5.5920812661503426e-05,
      "loss": 3.0049,
      "step": 184897
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6974503993988037,
      "learning_rate": 5.591843431354365e-05,
      "loss": 2.846,
      "step": 184898
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7698354721069336,
      "learning_rate": 5.59160560109634e-05,
      "loss": 2.6889,
      "step": 184899
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1864540576934814,
      "learning_rate": 5.59136777537628e-05,
      "loss": 2.8996,
      "step": 184900
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7104101181030273,
      "learning_rate": 5.591129954194252e-05,
      "loss": 2.9454,
      "step": 184901
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.9514734745025635,
      "learning_rate": 5.590892137550288e-05,
      "loss": 2.7362,
      "step": 184902
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6696929931640625,
      "learning_rate": 5.590654325444447e-05,
      "loss": 2.6972,
      "step": 184903
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9186325073242188,
      "learning_rate": 5.5904165178767554e-05,
      "loss": 2.9177,
      "step": 184904
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7802786827087402,
      "learning_rate": 5.590178714847281e-05,
      "loss": 3.2404,
      "step": 184905
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.685681104660034,
      "learning_rate": 5.589940916356036e-05,
      "loss": 3.1094,
      "step": 184906
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.883690595626831,
      "learning_rate": 5.5897031224030906e-05,
      "loss": 2.727,
      "step": 184907
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.192852735519409,
      "learning_rate": 5.589465332988475e-05,
      "loss": 2.984,
      "step": 184908
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.730417251586914,
      "learning_rate": 5.589227548112243e-05,
      "loss": 2.6713,
      "step": 184909
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.944028854370117,
      "learning_rate": 5.58898976777443e-05,
      "loss": 3.0051,
      "step": 184910
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.281929016113281,
      "learning_rate": 5.5887519919750965e-05,
      "loss": 2.9966,
      "step": 184911
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.458811044692993,
      "learning_rate": 5.588514220714263e-05,
      "loss": 2.967,
      "step": 184912
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5494885444641113,
      "learning_rate": 5.588276453991989e-05,
      "loss": 2.8479,
      "step": 184913
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5316660404205322,
      "learning_rate": 5.588038691808311e-05,
      "loss": 2.999,
      "step": 184914
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9736835956573486,
      "learning_rate": 5.5878009341632823e-05,
      "loss": 2.8394,
      "step": 184915
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5150146484375,
      "learning_rate": 5.587563181056937e-05,
      "loss": 2.9263,
      "step": 184916
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7110157012939453,
      "learning_rate": 5.58732543248934e-05,
      "loss": 2.9859,
      "step": 184917
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.949507474899292,
      "learning_rate": 5.587087688460503e-05,
      "loss": 2.8374,
      "step": 184918
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0015034675598145,
      "learning_rate": 5.5868499489704956e-05,
      "loss": 3.0498,
      "step": 184919
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.527336359024048,
      "learning_rate": 5.586612214019347e-05,
      "loss": 3.015,
      "step": 184920
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0566349029541016,
      "learning_rate": 5.586374483607115e-05,
      "loss": 3.0043,
      "step": 184921
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0225987434387207,
      "learning_rate": 5.586136757733828e-05,
      "loss": 2.7792,
      "step": 184922
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0400044918060303,
      "learning_rate": 5.585899036399554e-05,
      "loss": 3.1871,
      "step": 184923
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.019520282745361,
      "learning_rate": 5.5856613196043084e-05,
      "loss": 2.8617,
      "step": 184924
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1099958419799805,
      "learning_rate": 5.585423607348156e-05,
      "loss": 3.1913,
      "step": 184925
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8378169536590576,
      "learning_rate": 5.585185899631126e-05,
      "loss": 2.6897,
      "step": 184926
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5623719692230225,
      "learning_rate": 5.584948196453282e-05,
      "loss": 3.1235,
      "step": 184927
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.909125328063965,
      "learning_rate": 5.5847104978146465e-05,
      "loss": 2.7692,
      "step": 184928
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3039379119873047,
      "learning_rate": 5.58447280371529e-05,
      "loss": 2.988,
      "step": 184929
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5284154415130615,
      "learning_rate": 5.584235114155222e-05,
      "loss": 2.8403,
      "step": 184930
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3125178813934326,
      "learning_rate": 5.583997429134517e-05,
      "loss": 2.7859,
      "step": 184931
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.478313446044922,
      "learning_rate": 5.5837597486532005e-05,
      "loss": 2.9176,
      "step": 184932
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0846729278564453,
      "learning_rate": 5.583522072711333e-05,
      "loss": 2.8439,
      "step": 184933
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.80033278465271,
      "learning_rate": 5.5832844013089375e-05,
      "loss": 3.0048,
      "step": 184934
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.3909366130828857,
      "learning_rate": 5.583046734446087e-05,
      "loss": 3.1201,
      "step": 184935
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5968148708343506,
      "learning_rate": 5.5828090721227956e-05,
      "loss": 2.9515,
      "step": 184936
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.427910089492798,
      "learning_rate": 5.5825714143391295e-05,
      "loss": 2.9145,
      "step": 184937
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8714749813079834,
      "learning_rate": 5.582333761095112e-05,
      "loss": 3.0938,
      "step": 184938
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.088249444961548,
      "learning_rate": 5.5820961123908106e-05,
      "loss": 2.8342,
      "step": 184939
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.049757957458496,
      "learning_rate": 5.58185846822625e-05,
      "loss": 2.9043,
      "step": 184940
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.001091718673706,
      "learning_rate": 5.5816208286014984e-05,
      "loss": 2.8634,
      "step": 184941
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0378153324127197,
      "learning_rate": 5.5813831935165684e-05,
      "loss": 3.0222,
      "step": 184942
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.3858132362365723,
      "learning_rate": 5.5811455629715304e-05,
      "loss": 3.0511,
      "step": 184943
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.748309850692749,
      "learning_rate": 5.580907936966408e-05,
      "loss": 3.2703,
      "step": 184944
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.473109245300293,
      "learning_rate": 5.580670315501263e-05,
      "loss": 2.8686,
      "step": 184945
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3195960521698,
      "learning_rate": 5.580432698576127e-05,
      "loss": 3.0929,
      "step": 184946
    },
    {
      "epoch": 2.41,
      "grad_norm": 5.282593727111816,
      "learning_rate": 5.580195086191056e-05,
      "loss": 2.8442,
      "step": 184947
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.474925994873047,
      "learning_rate": 5.57995747834609e-05,
      "loss": 2.9281,
      "step": 184948
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.290029287338257,
      "learning_rate": 5.579719875041269e-05,
      "loss": 2.7822,
      "step": 184949
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.2197983264923096,
      "learning_rate": 5.579482276276629e-05,
      "loss": 2.8391,
      "step": 184950
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8564841747283936,
      "learning_rate": 5.579244682052234e-05,
      "loss": 3.2408,
      "step": 184951
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7513816356658936,
      "learning_rate": 5.5790070923681105e-05,
      "loss": 2.7839,
      "step": 184952
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.5063698291778564,
      "learning_rate": 5.578769507224319e-05,
      "loss": 2.7948,
      "step": 184953
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5640878677368164,
      "learning_rate": 5.578531926620895e-05,
      "loss": 2.9319,
      "step": 184954
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.677262783050537,
      "learning_rate": 5.578294350557873e-05,
      "loss": 2.9667,
      "step": 184955
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.950052499771118,
      "learning_rate": 5.5780567790353184e-05,
      "loss": 2.8728,
      "step": 184956
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.792100191116333,
      "learning_rate": 5.577819212053262e-05,
      "loss": 3.1152,
      "step": 184957
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7513484954833984,
      "learning_rate": 5.577581649611743e-05,
      "loss": 3.3151,
      "step": 184958
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.3988142013549805,
      "learning_rate": 5.577344091710819e-05,
      "loss": 2.9436,
      "step": 184959
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.925870418548584,
      "learning_rate": 5.5771065383505297e-05,
      "loss": 2.8293,
      "step": 184960
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1335532665252686,
      "learning_rate": 5.5768689895309085e-05,
      "loss": 2.9929,
      "step": 184961
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1158454418182373,
      "learning_rate": 5.576631445252014e-05,
      "loss": 2.9884,
      "step": 184962
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4156103134155273,
      "learning_rate": 5.576393905513889e-05,
      "loss": 2.8966,
      "step": 184963
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8510186672210693,
      "learning_rate": 5.576156370316564e-05,
      "loss": 3.0199,
      "step": 184964
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.896045684814453,
      "learning_rate": 5.575918839660099e-05,
      "loss": 2.7301,
      "step": 184965
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8021597862243652,
      "learning_rate": 5.5756813135445335e-05,
      "loss": 3.0179,
      "step": 184966
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4729745388031006,
      "learning_rate": 5.575443791969898e-05,
      "loss": 2.9241,
      "step": 184967
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5197436809539795,
      "learning_rate": 5.57520627493626e-05,
      "loss": 2.8913,
      "step": 184968
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.757996082305908,
      "learning_rate": 5.574968762443643e-05,
      "loss": 2.8721,
      "step": 184969
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.537365198135376,
      "learning_rate": 5.574731254492111e-05,
      "loss": 3.0044,
      "step": 184970
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9468910694122314,
      "learning_rate": 5.574493751081696e-05,
      "loss": 2.7832,
      "step": 184971
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9237494468688965,
      "learning_rate": 5.574256252212441e-05,
      "loss": 3.0636,
      "step": 184972
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5656003952026367,
      "learning_rate": 5.5740187578843874e-05,
      "loss": 2.9925,
      "step": 184973
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.2523233890533447,
      "learning_rate": 5.5737812680975946e-05,
      "loss": 2.8702,
      "step": 184974
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.4368584156036377,
      "learning_rate": 5.5735437828520855e-05,
      "loss": 2.8398,
      "step": 184975
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3654305934906006,
      "learning_rate": 5.5733063021479274e-05,
      "loss": 2.699,
      "step": 184976
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.02575421333313,
      "learning_rate": 5.57306882598515e-05,
      "loss": 3.054,
      "step": 184977
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6211020946502686,
      "learning_rate": 5.572831354363799e-05,
      "loss": 3.1643,
      "step": 184978
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.463672161102295,
      "learning_rate": 5.5725938872839126e-05,
      "loss": 2.8528,
      "step": 184979
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.037607192993164,
      "learning_rate": 5.572356424745553e-05,
      "loss": 2.7705,
      "step": 184980
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.531345844268799,
      "learning_rate": 5.572118966748744e-05,
      "loss": 2.8833,
      "step": 184981
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.142230749130249,
      "learning_rate": 5.571881513293546e-05,
      "loss": 3.1426,
      "step": 184982
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3951528072357178,
      "learning_rate": 5.571644064379988e-05,
      "loss": 2.8707,
      "step": 184983
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1758499145507812,
      "learning_rate": 5.5714066200081396e-05,
      "loss": 2.9628,
      "step": 184984
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.901025295257568,
      "learning_rate": 5.571169180178012e-05,
      "loss": 3.1268,
      "step": 184985
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.507702350616455,
      "learning_rate": 5.570931744889672e-05,
      "loss": 2.8596,
      "step": 184986
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.181520700454712,
      "learning_rate": 5.5706943141431515e-05,
      "loss": 2.8953,
      "step": 184987
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0490660667419434,
      "learning_rate": 5.5704568879385046e-05,
      "loss": 2.878,
      "step": 184988
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.671482563018799,
      "learning_rate": 5.570219466275765e-05,
      "loss": 2.9957,
      "step": 184989
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6274943351745605,
      "learning_rate": 5.569982049154998e-05,
      "loss": 3.2389,
      "step": 184990
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.5402345657348633,
      "learning_rate": 5.5697446365762154e-05,
      "loss": 3.1119,
      "step": 184991
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.739726543426514,
      "learning_rate": 5.5695072285394895e-05,
      "loss": 2.9657,
      "step": 184992
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3263280391693115,
      "learning_rate": 5.569269825044843e-05,
      "loss": 3.0467,
      "step": 184993
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.681929588317871,
      "learning_rate": 5.56903242609234e-05,
      "loss": 2.9331,
      "step": 184994
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.579575777053833,
      "learning_rate": 5.568795031682007e-05,
      "loss": 2.9347,
      "step": 184995
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.6780877113342285,
      "learning_rate": 5.568557641813911e-05,
      "loss": 2.9809,
      "step": 184996
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.757607460021973,
      "learning_rate": 5.568320256488065e-05,
      "loss": 2.9088,
      "step": 184997
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9541566371917725,
      "learning_rate": 5.568082875704538e-05,
      "loss": 2.9796,
      "step": 184998
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6327991485595703,
      "learning_rate": 5.567845499463358e-05,
      "loss": 3.101,
      "step": 184999
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3142435550689697,
      "learning_rate": 5.567608127764585e-05,
      "loss": 3.0679,
      "step": 185000
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7174813747406006,
      "learning_rate": 5.567370760608244e-05,
      "loss": 2.8306,
      "step": 185001
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.5869526863098145,
      "learning_rate": 5.56713339799441e-05,
      "loss": 2.6641,
      "step": 185002
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.5097789764404297,
      "learning_rate": 5.566896039923089e-05,
      "loss": 2.912,
      "step": 185003
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.4108166694641113,
      "learning_rate": 5.566658686394351e-05,
      "loss": 3.0927,
      "step": 185004
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.871216058731079,
      "learning_rate": 5.566421337408226e-05,
      "loss": 2.826,
      "step": 185005
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8450398445129395,
      "learning_rate": 5.56618399296477e-05,
      "loss": 3.0811,
      "step": 185006
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1834943294525146,
      "learning_rate": 5.565946653064017e-05,
      "loss": 3.0142,
      "step": 185007
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.097069501876831,
      "learning_rate": 5.565709317706028e-05,
      "loss": 3.1244,
      "step": 185008
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.534381628036499,
      "learning_rate": 5.56547198689082e-05,
      "loss": 2.954,
      "step": 185009
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9590609073638916,
      "learning_rate": 5.5652346606184626e-05,
      "loss": 2.8841,
      "step": 185010
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.797999143600464,
      "learning_rate": 5.564997338888978e-05,
      "loss": 2.4794,
      "step": 185011
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.652600049972534,
      "learning_rate": 5.564760021702432e-05,
      "loss": 2.9381,
      "step": 185012
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4852824211120605,
      "learning_rate": 5.5645227090588494e-05,
      "loss": 2.8841,
      "step": 185013
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9435672760009766,
      "learning_rate": 5.5642854009582995e-05,
      "loss": 2.8853,
      "step": 185014
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.41428279876709,
      "learning_rate": 5.564048097400792e-05,
      "loss": 2.6269,
      "step": 185015
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.4452860355377197,
      "learning_rate": 5.5638107983864e-05,
      "loss": 2.7586,
      "step": 185016
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.9193878173828125,
      "learning_rate": 5.563573503915147e-05,
      "loss": 2.9562,
      "step": 185017
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7596075534820557,
      "learning_rate": 5.5633362139870975e-05,
      "loss": 3.0115,
      "step": 185018
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8607869148254395,
      "learning_rate": 5.5630989286022765e-05,
      "loss": 2.9888,
      "step": 185019
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.132213592529297,
      "learning_rate": 5.562861647760751e-05,
      "loss": 2.8499,
      "step": 185020
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.4104461669921875,
      "learning_rate": 5.562624371462535e-05,
      "loss": 3.0797,
      "step": 185021
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.7246809005737305,
      "learning_rate": 5.562387099707698e-05,
      "loss": 3.022,
      "step": 185022
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.661180019378662,
      "learning_rate": 5.5621498324962636e-05,
      "loss": 2.7442,
      "step": 185023
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1462502479553223,
      "learning_rate": 5.5619125698282977e-05,
      "loss": 2.9987,
      "step": 185024
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8064584732055664,
      "learning_rate": 5.561675311703824e-05,
      "loss": 2.7965,
      "step": 185025
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1118078231811523,
      "learning_rate": 5.561438058122912e-05,
      "loss": 3.0413,
      "step": 185026
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1126036643981934,
      "learning_rate": 5.561200809085577e-05,
      "loss": 2.6326,
      "step": 185027
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.522418975830078,
      "learning_rate": 5.560963564591879e-05,
      "loss": 3.09,
      "step": 185028
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7984137535095215,
      "learning_rate": 5.560726324641858e-05,
      "loss": 2.9698,
      "step": 185029
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.3100359439849854,
      "learning_rate": 5.560489089235561e-05,
      "loss": 2.9414,
      "step": 185030
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.734635591506958,
      "learning_rate": 5.560251858373024e-05,
      "loss": 2.6905,
      "step": 185031
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.5927724838256836,
      "learning_rate": 5.560014632054308e-05,
      "loss": 3.0072,
      "step": 185032
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.001851797103882,
      "learning_rate": 5.559777410279445e-05,
      "loss": 2.9031,
      "step": 185033
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9483189582824707,
      "learning_rate": 5.55954019304848e-05,
      "loss": 2.9556,
      "step": 185034
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8166604042053223,
      "learning_rate": 5.55930298036145e-05,
      "loss": 3.0391,
      "step": 185035
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7452199459075928,
      "learning_rate": 5.559065772218415e-05,
      "loss": 2.855,
      "step": 185036
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4622247219085693,
      "learning_rate": 5.558828568619406e-05,
      "loss": 2.8146,
      "step": 185037
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.815861463546753,
      "learning_rate": 5.558591369564475e-05,
      "loss": 2.6957,
      "step": 185038
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.273267984390259,
      "learning_rate": 5.5583541750536656e-05,
      "loss": 3.0351,
      "step": 185039
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7368016242980957,
      "learning_rate": 5.5581169850870214e-05,
      "loss": 2.9957,
      "step": 185040
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.811528444290161,
      "learning_rate": 5.5578797996645755e-05,
      "loss": 2.9259,
      "step": 185041
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.611504316329956,
      "learning_rate": 5.5576426187863874e-05,
      "loss": 2.922,
      "step": 185042
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5414254665374756,
      "learning_rate": 5.5574054424524885e-05,
      "loss": 2.8556,
      "step": 185043
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.525423526763916,
      "learning_rate": 5.5571682706629375e-05,
      "loss": 2.8075,
      "step": 185044
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.6841301918029785,
      "learning_rate": 5.556931103417768e-05,
      "loss": 2.6977,
      "step": 185045
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7530179023742676,
      "learning_rate": 5.55669394071702e-05,
      "loss": 2.7864,
      "step": 185046
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.3641867637634277,
      "learning_rate": 5.556456782560753e-05,
      "loss": 3.0299,
      "step": 185047
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.999803304672241,
      "learning_rate": 5.5562196289490014e-05,
      "loss": 2.8837,
      "step": 185048
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7767622470855713,
      "learning_rate": 5.555982479881801e-05,
      "loss": 2.8908,
      "step": 185049
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8268442153930664,
      "learning_rate": 5.555745335359215e-05,
      "loss": 2.8107,
      "step": 185050
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.575848340988159,
      "learning_rate": 5.555508195381274e-05,
      "loss": 3.0446,
      "step": 185051
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.091379404067993,
      "learning_rate": 5.555271059948021e-05,
      "loss": 3.0014,
      "step": 185052
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.8654603958129883,
      "learning_rate": 5.555033929059509e-05,
      "loss": 3.0457,
      "step": 185053
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6899824142456055,
      "learning_rate": 5.5547968027157716e-05,
      "loss": 3.2081,
      "step": 185054
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1251893043518066,
      "learning_rate": 5.554559680916869e-05,
      "loss": 2.9294,
      "step": 185055
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1193296909332275,
      "learning_rate": 5.554322563662834e-05,
      "loss": 2.9806,
      "step": 185056
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8556389808654785,
      "learning_rate": 5.5540854509537104e-05,
      "loss": 2.9615,
      "step": 185057
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.781745672225952,
      "learning_rate": 5.553848342789534e-05,
      "loss": 2.7733,
      "step": 185058
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.871936559677124,
      "learning_rate": 5.553611239170369e-05,
      "loss": 2.514,
      "step": 185059
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9685218334198,
      "learning_rate": 5.553374140096242e-05,
      "loss": 2.9103,
      "step": 185060
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7112510204315186,
      "learning_rate": 5.553137045567212e-05,
      "loss": 2.9896,
      "step": 185061
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.520456552505493,
      "learning_rate": 5.5528999555833164e-05,
      "loss": 3.0171,
      "step": 185062
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7028441429138184,
      "learning_rate": 5.5526628701445955e-05,
      "loss": 2.8783,
      "step": 185063
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8924896717071533,
      "learning_rate": 5.552425789251088e-05,
      "loss": 2.8811,
      "step": 185064
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8567216396331787,
      "learning_rate": 5.5521887129028555e-05,
      "loss": 2.9977,
      "step": 185065
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.2057483196258545,
      "learning_rate": 5.551951641099923e-05,
      "loss": 2.7389,
      "step": 185066
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.126276969909668,
      "learning_rate": 5.551714573842355e-05,
      "loss": 2.9077,
      "step": 185067
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.01851224899292,
      "learning_rate": 5.5514775111301746e-05,
      "loss": 3.0159,
      "step": 185068
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.805281400680542,
      "learning_rate": 5.5512404529634515e-05,
      "loss": 2.6573,
      "step": 185069
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.0555596351623535,
      "learning_rate": 5.551003399342202e-05,
      "loss": 2.7781,
      "step": 185070
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.002843379974365,
      "learning_rate": 5.550766350266487e-05,
      "loss": 3.0388,
      "step": 185071
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.486575126647949,
      "learning_rate": 5.550529305736339e-05,
      "loss": 3.3025,
      "step": 185072
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.795567274093628,
      "learning_rate": 5.550292265751815e-05,
      "loss": 2.9336,
      "step": 185073
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8112149238586426,
      "learning_rate": 5.5500552303129474e-05,
      "loss": 3.3224,
      "step": 185074
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0095391273498535,
      "learning_rate": 5.549818199419801e-05,
      "loss": 2.9628,
      "step": 185075
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.2309176921844482,
      "learning_rate": 5.5495811730723914e-05,
      "loss": 3.2196,
      "step": 185076
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.2547173500061035,
      "learning_rate": 5.549344151270783e-05,
      "loss": 2.8638,
      "step": 185077
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.980851650238037,
      "learning_rate": 5.549107134015004e-05,
      "loss": 3.0614,
      "step": 185078
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.262230396270752,
      "learning_rate": 5.5488701213051154e-05,
      "loss": 3.1082,
      "step": 185079
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.487032890319824,
      "learning_rate": 5.548633113141148e-05,
      "loss": 3.0525,
      "step": 185080
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0158791542053223,
      "learning_rate": 5.5483961095231636e-05,
      "loss": 2.8039,
      "step": 185081
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.603372573852539,
      "learning_rate": 5.548159110451179e-05,
      "loss": 2.7548,
      "step": 185082
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7398018836975098,
      "learning_rate": 5.547922115925265e-05,
      "loss": 3.0193,
      "step": 185083
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.630204439163208,
      "learning_rate": 5.5476851259454415e-05,
      "loss": 2.9167,
      "step": 185084
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4813754558563232,
      "learning_rate": 5.5474481405117746e-05,
      "loss": 2.7485,
      "step": 185085
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7792904376983643,
      "learning_rate": 5.547211159624291e-05,
      "loss": 2.8958,
      "step": 185086
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4360146522521973,
      "learning_rate": 5.546974183283057e-05,
      "loss": 2.8806,
      "step": 185087
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.410959005355835,
      "learning_rate": 5.546737211488087e-05,
      "loss": 2.9206,
      "step": 185088
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9330735206604004,
      "learning_rate": 5.546500244239447e-05,
      "loss": 2.7421,
      "step": 185089
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8884084224700928,
      "learning_rate": 5.546263281537166e-05,
      "loss": 2.8313,
      "step": 185090
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9756624698638916,
      "learning_rate": 5.5460263233813064e-05,
      "loss": 2.9892,
      "step": 185091
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6750385761260986,
      "learning_rate": 5.5457893697718925e-05,
      "loss": 2.8191,
      "step": 185092
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.121359348297119,
      "learning_rate": 5.545552420708995e-05,
      "loss": 2.8851,
      "step": 185093
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6264729499816895,
      "learning_rate": 5.545315476192624e-05,
      "loss": 2.9124,
      "step": 185094
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6200339794158936,
      "learning_rate": 5.5450785362228504e-05,
      "loss": 2.7235,
      "step": 185095
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7759320735931396,
      "learning_rate": 5.544841600799699e-05,
      "loss": 3.0357,
      "step": 185096
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.3848342895507812,
      "learning_rate": 5.544604669923231e-05,
      "loss": 2.9939,
      "step": 185097
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7736597061157227,
      "learning_rate": 5.544367743593475e-05,
      "loss": 2.9461,
      "step": 185098
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.763580799102783,
      "learning_rate": 5.5441308218104895e-05,
      "loss": 3.1463,
      "step": 185099
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.99282169342041,
      "learning_rate": 5.5438939045743136e-05,
      "loss": 2.9095,
      "step": 185100
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7000722885131836,
      "learning_rate": 5.543656991884987e-05,
      "loss": 2.961,
      "step": 185101
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.660501003265381,
      "learning_rate": 5.5434200837425505e-05,
      "loss": 2.7176,
      "step": 185102
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.494415760040283,
      "learning_rate": 5.5431831801470596e-05,
      "loss": 3.2544,
      "step": 185103
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.861161231994629,
      "learning_rate": 5.542946281098548e-05,
      "loss": 3.1535,
      "step": 185104
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0988807678222656,
      "learning_rate": 5.54270938659707e-05,
      "loss": 2.8935,
      "step": 185105
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.482509136199951,
      "learning_rate": 5.5424724966426636e-05,
      "loss": 3.0109,
      "step": 185106
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7080020904541016,
      "learning_rate": 5.5422356112353736e-05,
      "loss": 2.8543,
      "step": 185107
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4624836444854736,
      "learning_rate": 5.541998730375237e-05,
      "loss": 3.1986,
      "step": 185108
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5258100032806396,
      "learning_rate": 5.541761854062309e-05,
      "loss": 3.1154,
      "step": 185109
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0087878704071045,
      "learning_rate": 5.5415249822966266e-05,
      "loss": 2.8007,
      "step": 185110
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.06251859664917,
      "learning_rate": 5.541288115078241e-05,
      "loss": 3.079,
      "step": 185111
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.036200761795044,
      "learning_rate": 5.54105125240719e-05,
      "loss": 2.783,
      "step": 185112
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3939638137817383,
      "learning_rate": 5.5408143942835226e-05,
      "loss": 2.9666,
      "step": 185113
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.153193712234497,
      "learning_rate": 5.540577540707271e-05,
      "loss": 2.564,
      "step": 185114
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8800835609436035,
      "learning_rate": 5.540340691678494e-05,
      "loss": 2.9909,
      "step": 185115
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.855264902114868,
      "learning_rate": 5.540103847197224e-05,
      "loss": 2.9317,
      "step": 185116
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8044397830963135,
      "learning_rate": 5.5398670072635155e-05,
      "loss": 2.8668,
      "step": 185117
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6618566513061523,
      "learning_rate": 5.539630171877409e-05,
      "loss": 3.0044,
      "step": 185118
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.8222177028656006,
      "learning_rate": 5.539393341038946e-05,
      "loss": 3.0008,
      "step": 185119
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.674786329269409,
      "learning_rate": 5.539156514748164e-05,
      "loss": 2.7918,
      "step": 185120
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.325918674468994,
      "learning_rate": 5.538919693005125e-05,
      "loss": 2.8192,
      "step": 185121
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9005720615386963,
      "learning_rate": 5.538682875809851e-05,
      "loss": 3.0146,
      "step": 185122
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3042118549346924,
      "learning_rate": 5.538446063162406e-05,
      "loss": 3.0344,
      "step": 185123
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.55706524848938,
      "learning_rate": 5.5382092550628266e-05,
      "loss": 2.994,
      "step": 185124
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.747851610183716,
      "learning_rate": 5.5379724515111554e-05,
      "loss": 3.0351,
      "step": 185125
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5220022201538086,
      "learning_rate": 5.5377356525074303e-05,
      "loss": 2.9494,
      "step": 185126
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.243389368057251,
      "learning_rate": 5.53749885805171e-05,
      "loss": 3.0455,
      "step": 185127
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.558521270751953,
      "learning_rate": 5.5372620681440216e-05,
      "loss": 3.073,
      "step": 185128
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4248528480529785,
      "learning_rate": 5.537025282784425e-05,
      "loss": 3.0119,
      "step": 185129
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.638942241668701,
      "learning_rate": 5.5367885019729574e-05,
      "loss": 3.1722,
      "step": 185130
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8027608394622803,
      "learning_rate": 5.5365517257096546e-05,
      "loss": 2.8171,
      "step": 185131
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.6441586017608643,
      "learning_rate": 5.536314953994577e-05,
      "loss": 3.1098,
      "step": 185132
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.661681890487671,
      "learning_rate": 5.536078186827761e-05,
      "loss": 2.8853,
      "step": 185133
    },
    {
      "epoch": 2.41,
      "grad_norm": 5.420742511749268,
      "learning_rate": 5.535841424209241e-05,
      "loss": 2.933,
      "step": 185134
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.6649088859558105,
      "learning_rate": 5.5356046661390785e-05,
      "loss": 2.7635,
      "step": 185135
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.861857891082764,
      "learning_rate": 5.535367912617308e-05,
      "loss": 2.7992,
      "step": 185136
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.898280620574951,
      "learning_rate": 5.535131163643969e-05,
      "loss": 2.9201,
      "step": 185137
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.5591673851013184,
      "learning_rate": 5.534894419219115e-05,
      "loss": 2.9809,
      "step": 185138
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.7831385135650635,
      "learning_rate": 5.534657679342789e-05,
      "loss": 3.1653,
      "step": 185139
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.061489105224609,
      "learning_rate": 5.534420944015021e-05,
      "loss": 2.8595,
      "step": 185140
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.282444953918457,
      "learning_rate": 5.534184213235875e-05,
      "loss": 3.0118,
      "step": 185141
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.730635166168213,
      "learning_rate": 5.533947487005387e-05,
      "loss": 3.0042,
      "step": 185142
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0760726928710938,
      "learning_rate": 5.533710765323594e-05,
      "loss": 3.0288,
      "step": 185143
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1960787773132324,
      "learning_rate": 5.533474048190552e-05,
      "loss": 2.9007,
      "step": 185144
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7251861095428467,
      "learning_rate": 5.5332373356062875e-05,
      "loss": 2.7756,
      "step": 185145
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.5693986415863037,
      "learning_rate": 5.5330006275708686e-05,
      "loss": 2.9959,
      "step": 185146
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5265705585479736,
      "learning_rate": 5.5327639240843236e-05,
      "loss": 3.1032,
      "step": 185147
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6776461601257324,
      "learning_rate": 5.532527225146703e-05,
      "loss": 3.0098,
      "step": 185148
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.953234910964966,
      "learning_rate": 5.5322905307580376e-05,
      "loss": 2.9941,
      "step": 185149
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.7253408432006836,
      "learning_rate": 5.53205384091839e-05,
      "loss": 2.7742,
      "step": 185150
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.632117509841919,
      "learning_rate": 5.5318171556277866e-05,
      "loss": 3.0197,
      "step": 185151
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.917714834213257,
      "learning_rate": 5.531580474886288e-05,
      "loss": 3.055,
      "step": 185152
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.742550849914551,
      "learning_rate": 5.5313437986939235e-05,
      "loss": 2.9074,
      "step": 185153
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5574190616607666,
      "learning_rate": 5.53110712705076e-05,
      "loss": 3.2699,
      "step": 185154
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.937530279159546,
      "learning_rate": 5.5308704599568076e-05,
      "loss": 2.932,
      "step": 185155
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5878798961639404,
      "learning_rate": 5.5306337974121395e-05,
      "loss": 3.0362,
      "step": 185156
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.748706579208374,
      "learning_rate": 5.530397139416779e-05,
      "loss": 2.9257,
      "step": 185157
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.7658307552337646,
      "learning_rate": 5.530160485970786e-05,
      "loss": 2.9877,
      "step": 185158
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7774407863616943,
      "learning_rate": 5.529923837074194e-05,
      "loss": 2.8603,
      "step": 185159
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.678321361541748,
      "learning_rate": 5.5296871927270626e-05,
      "loss": 2.9776,
      "step": 185160
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.2291219234466553,
      "learning_rate": 5.529450552929412e-05,
      "loss": 2.9788,
      "step": 185161
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.714136123657227,
      "learning_rate": 5.5292139176813024e-05,
      "loss": 2.9395,
      "step": 185162
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.250302314758301,
      "learning_rate": 5.52897728698277e-05,
      "loss": 3.1597,
      "step": 185163
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.19276237487793,
      "learning_rate": 5.528740660833868e-05,
      "loss": 2.8401,
      "step": 185164
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.606173992156982,
      "learning_rate": 5.528504039234627e-05,
      "loss": 2.8469,
      "step": 185165
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.938307762145996,
      "learning_rate": 5.5282674221851096e-05,
      "loss": 3.0943,
      "step": 185166
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.622896671295166,
      "learning_rate": 5.528030809685346e-05,
      "loss": 2.7937,
      "step": 185167
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.9035861492156982,
      "learning_rate": 5.527794201735384e-05,
      "loss": 2.8594,
      "step": 185168
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.718571186065674,
      "learning_rate": 5.5275575983352615e-05,
      "loss": 2.8929,
      "step": 185169
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.654703617095947,
      "learning_rate": 5.527320999485033e-05,
      "loss": 2.9749,
      "step": 185170
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.584280490875244,
      "learning_rate": 5.527084405184732e-05,
      "loss": 3.2562,
      "step": 185171
    },
    {
      "epoch": 2.41,
      "grad_norm": 5.606506824493408,
      "learning_rate": 5.526847815434414e-05,
      "loss": 3.1411,
      "step": 185172
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.9894490242004395,
      "learning_rate": 5.526611230234117e-05,
      "loss": 2.883,
      "step": 185173
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.225452423095703,
      "learning_rate": 5.526374649583887e-05,
      "loss": 2.8705,
      "step": 185174
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0408895015716553,
      "learning_rate": 5.526138073483757e-05,
      "loss": 2.8323,
      "step": 185175
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.911261796951294,
      "learning_rate": 5.525901501933787e-05,
      "loss": 2.9955,
      "step": 185176
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.477292537689209,
      "learning_rate": 5.525664934934007e-05,
      "loss": 3.0835,
      "step": 185177
    },
    {
      "epoch": 2.41,
      "grad_norm": 5.3771257400512695,
      "learning_rate": 5.525428372484475e-05,
      "loss": 3.0986,
      "step": 185178
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.5965704917907715,
      "learning_rate": 5.525191814585229e-05,
      "loss": 2.8686,
      "step": 185179
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.4570388793945312,
      "learning_rate": 5.52495526123631e-05,
      "loss": 3.0185,
      "step": 185180
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5702579021453857,
      "learning_rate": 5.524718712437758e-05,
      "loss": 2.8344,
      "step": 185181
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.202354907989502,
      "learning_rate": 5.524482168189629e-05,
      "loss": 3.0613,
      "step": 185182
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3008956909179688,
      "learning_rate": 5.524245628491953e-05,
      "loss": 2.702,
      "step": 185183
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.313577651977539,
      "learning_rate": 5.524009093344791e-05,
      "loss": 2.8034,
      "step": 185184
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.73468279838562,
      "learning_rate": 5.523772562748175e-05,
      "loss": 2.8696,
      "step": 185185
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5439443588256836,
      "learning_rate": 5.5235360367021565e-05,
      "loss": 2.9844,
      "step": 185186
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.36979341506958,
      "learning_rate": 5.523299515206764e-05,
      "loss": 2.9231,
      "step": 185187
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.898543357849121,
      "learning_rate": 5.523062998262058e-05,
      "loss": 3.0638,
      "step": 185188
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4735841751098633,
      "learning_rate": 5.522826485868072e-05,
      "loss": 2.8667,
      "step": 185189
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.795525550842285,
      "learning_rate": 5.5225899780248614e-05,
      "loss": 3.0963,
      "step": 185190
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.219729423522949,
      "learning_rate": 5.522353474732462e-05,
      "loss": 2.8345,
      "step": 185191
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.4892337322235107,
      "learning_rate": 5.522116975990921e-05,
      "loss": 3.0195,
      "step": 185192
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4151406288146973,
      "learning_rate": 5.521880481800271e-05,
      "loss": 3.0493,
      "step": 185193
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.985250234603882,
      "learning_rate": 5.521643992160576e-05,
      "loss": 2.7474,
      "step": 185194
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.344139575958252,
      "learning_rate": 5.521407507071858e-05,
      "loss": 3.0018,
      "step": 185195
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9850540161132812,
      "learning_rate": 5.5211710265341825e-05,
      "loss": 2.9602,
      "step": 185196
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.052921772003174,
      "learning_rate": 5.520934550547583e-05,
      "loss": 3.0198,
      "step": 185197
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5917751789093018,
      "learning_rate": 5.5206980791121035e-05,
      "loss": 3.1406,
      "step": 185198
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7881951332092285,
      "learning_rate": 5.52046161222778e-05,
      "loss": 2.9347,
      "step": 185199
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.002436876296997,
      "learning_rate": 5.520225149894676e-05,
      "loss": 2.9531,
      "step": 185200
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.394990921020508,
      "learning_rate": 5.519988692112811e-05,
      "loss": 2.8591,
      "step": 185201
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1021647453308105,
      "learning_rate": 5.5197522388822555e-05,
      "loss": 2.8703,
      "step": 185202
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.759756326675415,
      "learning_rate": 5.519515790203036e-05,
      "loss": 2.784,
      "step": 185203
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4794375896453857,
      "learning_rate": 5.519279346075203e-05,
      "loss": 2.7119,
      "step": 185204
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.505312204360962,
      "learning_rate": 5.519042906498788e-05,
      "loss": 2.9324,
      "step": 185205
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.3260560035705566,
      "learning_rate": 5.518806471473857e-05,
      "loss": 2.7109,
      "step": 185206
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.1910810470581055,
      "learning_rate": 5.518570041000431e-05,
      "loss": 3.0309,
      "step": 185207
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0699918270111084,
      "learning_rate": 5.5183336150785704e-05,
      "loss": 2.9018,
      "step": 185208
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7668046951293945,
      "learning_rate": 5.518097193708316e-05,
      "loss": 2.9175,
      "step": 185209
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3200857639312744,
      "learning_rate": 5.517860776889711e-05,
      "loss": 3.0793,
      "step": 185210
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0773167610168457,
      "learning_rate": 5.517624364622792e-05,
      "loss": 2.8479,
      "step": 185211
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8757598400115967,
      "learning_rate": 5.517387956907611e-05,
      "loss": 2.8666,
      "step": 185212
    },
    {
      "epoch": 2.41,
      "grad_norm": 5.0653395652771,
      "learning_rate": 5.517151553744206e-05,
      "loss": 2.8074,
      "step": 185213
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5475897789001465,
      "learning_rate": 5.51691515513263e-05,
      "loss": 2.8663,
      "step": 185214
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.740368366241455,
      "learning_rate": 5.5166787610729227e-05,
      "loss": 2.9629,
      "step": 185215
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8934879302978516,
      "learning_rate": 5.516442371565117e-05,
      "loss": 2.8861,
      "step": 185216
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.2564713954925537,
      "learning_rate": 5.516205986609278e-05,
      "loss": 3.0797,
      "step": 185217
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.666476249694824,
      "learning_rate": 5.515969606205437e-05,
      "loss": 2.7757,
      "step": 185218
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.44197154045105,
      "learning_rate": 5.515733230353632e-05,
      "loss": 2.8837,
      "step": 185219
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.653912305831909,
      "learning_rate": 5.5154968590539216e-05,
      "loss": 2.8419,
      "step": 185220
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.832674741744995,
      "learning_rate": 5.515260492306344e-05,
      "loss": 2.8081,
      "step": 185221
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.300572633743286,
      "learning_rate": 5.515024130110931e-05,
      "loss": 2.835,
      "step": 185222
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.596729278564453,
      "learning_rate": 5.514787772467747e-05,
      "loss": 2.9609,
      "step": 185223
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.690845012664795,
      "learning_rate": 5.514551419376828e-05,
      "loss": 2.9767,
      "step": 185224
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.1851348876953125,
      "learning_rate": 5.5143150708382044e-05,
      "loss": 2.8602,
      "step": 185225
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.488925933837891,
      "learning_rate": 5.514078726851943e-05,
      "loss": 2.8989,
      "step": 185226
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.368753671646118,
      "learning_rate": 5.513842387418073e-05,
      "loss": 2.8228,
      "step": 185227
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6970293521881104,
      "learning_rate": 5.513606052536638e-05,
      "loss": 2.8951,
      "step": 185228
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6653146743774414,
      "learning_rate": 5.513369722207691e-05,
      "loss": 3.1567,
      "step": 185229
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0196430683135986,
      "learning_rate": 5.513133396431263e-05,
      "loss": 3.1406,
      "step": 185230
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9312195777893066,
      "learning_rate": 5.512897075207413e-05,
      "loss": 2.9961,
      "step": 185231
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.5918688774108887,
      "learning_rate": 5.512660758536182e-05,
      "loss": 2.692,
      "step": 185232
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.768949270248413,
      "learning_rate": 5.512424446417598e-05,
      "loss": 2.8374,
      "step": 185233
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.38844895362854,
      "learning_rate": 5.512188138851723e-05,
      "loss": 3.0618,
      "step": 185234
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8155126571655273,
      "learning_rate": 5.5119518358385965e-05,
      "loss": 2.8895,
      "step": 185235
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.2976460456848145,
      "learning_rate": 5.511715537378254e-05,
      "loss": 2.9137,
      "step": 185236
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.9407992362976074,
      "learning_rate": 5.5114792434707535e-05,
      "loss": 2.8781,
      "step": 185237
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.5185017585754395,
      "learning_rate": 5.511242954116121e-05,
      "loss": 2.9765,
      "step": 185238
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7873375415802,
      "learning_rate": 5.51100666931442e-05,
      "loss": 2.9099,
      "step": 185239
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6212778091430664,
      "learning_rate": 5.510770389065683e-05,
      "loss": 3.1754,
      "step": 185240
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.605114459991455,
      "learning_rate": 5.510534113369958e-05,
      "loss": 2.894,
      "step": 185241
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.4860754013061523,
      "learning_rate": 5.51029784222728e-05,
      "loss": 2.8202,
      "step": 185242
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.912148952484131,
      "learning_rate": 5.510061575637708e-05,
      "loss": 2.9088,
      "step": 185243
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.3919899463653564,
      "learning_rate": 5.509825313601266e-05,
      "loss": 2.8832,
      "step": 185244
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5207250118255615,
      "learning_rate": 5.509589056118022e-05,
      "loss": 3.033,
      "step": 185245
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.568441867828369,
      "learning_rate": 5.5093528031880064e-05,
      "loss": 2.9221,
      "step": 185246
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7838246822357178,
      "learning_rate": 5.5091165548112645e-05,
      "loss": 2.9065,
      "step": 185247
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7877321243286133,
      "learning_rate": 5.508880310987831e-05,
      "loss": 2.9022,
      "step": 185248
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3001084327697754,
      "learning_rate": 5.5086440717177685e-05,
      "loss": 3.0076,
      "step": 185249
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5636935234069824,
      "learning_rate": 5.508407837001103e-05,
      "loss": 3.0437,
      "step": 185250
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.172302484512329,
      "learning_rate": 5.5081716068378925e-05,
      "loss": 3.1333,
      "step": 185251
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.226576805114746,
      "learning_rate": 5.507935381228179e-05,
      "loss": 2.9377,
      "step": 185252
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.943356513977051,
      "learning_rate": 5.507699160172e-05,
      "loss": 2.6853,
      "step": 185253
    },
    {
      "epoch": 2.41,
      "grad_norm": 5.280825614929199,
      "learning_rate": 5.507462943669395e-05,
      "loss": 2.7313,
      "step": 185254
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.655508518218994,
      "learning_rate": 5.507226731720421e-05,
      "loss": 3.0452,
      "step": 185255
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9156813621520996,
      "learning_rate": 5.506990524325111e-05,
      "loss": 2.9815,
      "step": 185256
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.10675048828125,
      "learning_rate": 5.506754321483522e-05,
      "loss": 2.8226,
      "step": 185257
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.763329267501831,
      "learning_rate": 5.5065181231956866e-05,
      "loss": 2.9242,
      "step": 185258
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.177427291870117,
      "learning_rate": 5.506281929461652e-05,
      "loss": 2.9599,
      "step": 185259
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.731149196624756,
      "learning_rate": 5.506045740281455e-05,
      "loss": 2.9534,
      "step": 185260
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1976959705352783,
      "learning_rate": 5.505809555655155e-05,
      "loss": 3.0199,
      "step": 185261
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.025683879852295,
      "learning_rate": 5.505573375582779e-05,
      "loss": 2.9644,
      "step": 185262
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.80009126663208,
      "learning_rate": 5.50533720006439e-05,
      "loss": 3.0909,
      "step": 185263
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9619555473327637,
      "learning_rate": 5.505101029100019e-05,
      "loss": 2.7498,
      "step": 185264
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0327394008636475,
      "learning_rate": 5.5048648626897105e-05,
      "loss": 2.8833,
      "step": 185265
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.806746482849121,
      "learning_rate": 5.5046287008335034e-05,
      "loss": 3.0,
      "step": 185266
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.113926410675049,
      "learning_rate": 5.5043925435314563e-05,
      "loss": 3.0166,
      "step": 185267
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5883727073669434,
      "learning_rate": 5.5041563907835964e-05,
      "loss": 2.8424,
      "step": 185268
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1994454860687256,
      "learning_rate": 5.503920242589983e-05,
      "loss": 2.7174,
      "step": 185269
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.918849468231201,
      "learning_rate": 5.503684098950657e-05,
      "loss": 2.7903,
      "step": 185270
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.801328659057617,
      "learning_rate": 5.503447959865655e-05,
      "loss": 2.7555,
      "step": 185271
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7633419036865234,
      "learning_rate": 5.503211825335019e-05,
      "loss": 3.0228,
      "step": 185272
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7929327487945557,
      "learning_rate": 5.502975695358803e-05,
      "loss": 2.8221,
      "step": 185273
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.079104423522949,
      "learning_rate": 5.502739569937041e-05,
      "loss": 2.7581,
      "step": 185274
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.4026567935943604,
      "learning_rate": 5.50250344906979e-05,
      "loss": 2.8606,
      "step": 185275
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.448880434036255,
      "learning_rate": 5.5022673327570846e-05,
      "loss": 2.9903,
      "step": 185276
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6813323497772217,
      "learning_rate": 5.5020312209989725e-05,
      "loss": 3.0024,
      "step": 185277
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1181423664093018,
      "learning_rate": 5.501795113795484e-05,
      "loss": 2.9933,
      "step": 185278
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8615691661834717,
      "learning_rate": 5.5015590111466854e-05,
      "loss": 3.0046,
      "step": 185279
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.609010696411133,
      "learning_rate": 5.5013229130526005e-05,
      "loss": 2.8783,
      "step": 185280
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.952012300491333,
      "learning_rate": 5.501086819513288e-05,
      "loss": 2.5584,
      "step": 185281
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.143130302429199,
      "learning_rate": 5.50085073052879e-05,
      "loss": 2.6264,
      "step": 185282
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6464221477508545,
      "learning_rate": 5.500614646099144e-05,
      "loss": 2.7605,
      "step": 185283
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.520970582962036,
      "learning_rate": 5.500378566224388e-05,
      "loss": 3.0435,
      "step": 185284
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6378815174102783,
      "learning_rate": 5.500142490904582e-05,
      "loss": 2.9241,
      "step": 185285
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9191012382507324,
      "learning_rate": 5.499906420139753e-05,
      "loss": 2.8497,
      "step": 185286
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.547355890274048,
      "learning_rate": 5.4996703539299626e-05,
      "loss": 2.8934,
      "step": 185287
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5736939907073975,
      "learning_rate": 5.499434292275245e-05,
      "loss": 2.7345,
      "step": 185288
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7408254146575928,
      "learning_rate": 5.499198235175648e-05,
      "loss": 2.8978,
      "step": 185289
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.5360076427459717,
      "learning_rate": 5.498962182631204e-05,
      "loss": 2.8153,
      "step": 185290
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.067471504211426,
      "learning_rate": 5.498726134641972e-05,
      "loss": 2.6326,
      "step": 185291
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6573705673217773,
      "learning_rate": 5.49849009120798e-05,
      "loss": 2.7997,
      "step": 185292
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.79872989654541,
      "learning_rate": 5.498254052329294e-05,
      "loss": 3.0673,
      "step": 185293
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1508572101593018,
      "learning_rate": 5.498018018005941e-05,
      "loss": 2.8347,
      "step": 185294
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6510202884674072,
      "learning_rate": 5.4977819882379706e-05,
      "loss": 2.7852,
      "step": 185295
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6628332138061523,
      "learning_rate": 5.497545963025416e-05,
      "loss": 2.7743,
      "step": 185296
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9662253856658936,
      "learning_rate": 5.4973099423683374e-05,
      "loss": 2.764,
      "step": 185297
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.7541677951812744,
      "learning_rate": 5.497073926266765e-05,
      "loss": 2.8202,
      "step": 185298
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1099436283111572,
      "learning_rate": 5.496837914720755e-05,
      "loss": 2.8442,
      "step": 185299
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.2120134830474854,
      "learning_rate": 5.496601907730341e-05,
      "loss": 2.9781,
      "step": 185300
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9736573696136475,
      "learning_rate": 5.4963659052955834e-05,
      "loss": 2.8041,
      "step": 185301
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.437570095062256,
      "learning_rate": 5.496129907416501e-05,
      "loss": 2.9859,
      "step": 185302
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0912368297576904,
      "learning_rate": 5.495893914093158e-05,
      "loss": 2.8042,
      "step": 185303
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.554145336151123,
      "learning_rate": 5.495657925325581e-05,
      "loss": 2.715,
      "step": 185304
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7560675144195557,
      "learning_rate": 5.495421941113832e-05,
      "loss": 2.9452,
      "step": 185305
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9074087142944336,
      "learning_rate": 5.49518596145794e-05,
      "loss": 2.9534,
      "step": 185306
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5641555786132812,
      "learning_rate": 5.4949499863579624e-05,
      "loss": 2.8818,
      "step": 185307
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5744402408599854,
      "learning_rate": 5.494714015813938e-05,
      "loss": 2.9452,
      "step": 185308
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.7452471256256104,
      "learning_rate": 5.494478049825909e-05,
      "loss": 2.9609,
      "step": 185309
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.889718532562256,
      "learning_rate": 5.4942420883939087e-05,
      "loss": 2.906,
      "step": 185310
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3799407482147217,
      "learning_rate": 5.4940061315180004e-05,
      "loss": 2.8678,
      "step": 185311
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.644434690475464,
      "learning_rate": 5.493770179198211e-05,
      "loss": 2.7238,
      "step": 185312
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1943578720092773,
      "learning_rate": 5.4935342314346006e-05,
      "loss": 2.7583,
      "step": 185313
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.198885917663574,
      "learning_rate": 5.4932982882272027e-05,
      "loss": 3.1126,
      "step": 185314
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.67435622215271,
      "learning_rate": 5.493062349576059e-05,
      "loss": 2.7359,
      "step": 185315
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.527810573577881,
      "learning_rate": 5.492826415481222e-05,
      "loss": 2.9793,
      "step": 185316
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.25685453414917,
      "learning_rate": 5.492590485942733e-05,
      "loss": 2.8541,
      "step": 185317
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.137303352355957,
      "learning_rate": 5.492354560960626e-05,
      "loss": 3.0349,
      "step": 185318
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8487887382507324,
      "learning_rate": 5.49211864053496e-05,
      "loss": 3.0832,
      "step": 185319
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.98486328125,
      "learning_rate": 5.4918827246657736e-05,
      "loss": 2.7055,
      "step": 185320
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.853806495666504,
      "learning_rate": 5.491646813353099e-05,
      "loss": 2.8453,
      "step": 185321
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.879460334777832,
      "learning_rate": 5.491410906596999e-05,
      "loss": 3.1117,
      "step": 185322
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0190370082855225,
      "learning_rate": 5.491175004397508e-05,
      "loss": 3.201,
      "step": 185323
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.542797565460205,
      "learning_rate": 5.490939106754662e-05,
      "loss": 3.2509,
      "step": 185324
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.379359483718872,
      "learning_rate": 5.4907032136685235e-05,
      "loss": 2.8456,
      "step": 185325
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.3656363487243652,
      "learning_rate": 5.490467325139121e-05,
      "loss": 3.0384,
      "step": 185326
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.34808087348938,
      "learning_rate": 5.4902314411664995e-05,
      "loss": 3.0739,
      "step": 185327
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.611070394515991,
      "learning_rate": 5.489995561750713e-05,
      "loss": 2.7867,
      "step": 185328
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.289130926132202,
      "learning_rate": 5.489759686891792e-05,
      "loss": 2.7846,
      "step": 185329
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.734656572341919,
      "learning_rate": 5.489523816589795e-05,
      "loss": 2.9409,
      "step": 185330
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8698770999908447,
      "learning_rate": 5.489287950844756e-05,
      "loss": 2.7747,
      "step": 185331
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.177515983581543,
      "learning_rate": 5.4890520896567224e-05,
      "loss": 2.8883,
      "step": 185332
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.962550163269043,
      "learning_rate": 5.48881623302573e-05,
      "loss": 2.8617,
      "step": 185333
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6689205169677734,
      "learning_rate": 5.4885803809518357e-05,
      "loss": 3.0368,
      "step": 185334
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0090725421905518,
      "learning_rate": 5.48834453343507e-05,
      "loss": 2.885,
      "step": 185335
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.666109085083008,
      "learning_rate": 5.488108690475491e-05,
      "loss": 2.5698,
      "step": 185336
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.594045877456665,
      "learning_rate": 5.487872852073134e-05,
      "loss": 3.0284,
      "step": 185337
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.070120096206665,
      "learning_rate": 5.487637018228048e-05,
      "loss": 2.9673,
      "step": 185338
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.94392991065979,
      "learning_rate": 5.487401188940263e-05,
      "loss": 2.9768,
      "step": 185339
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0709173679351807,
      "learning_rate": 5.48716536420984e-05,
      "loss": 2.7301,
      "step": 185340
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8494608402252197,
      "learning_rate": 5.486929544036808e-05,
      "loss": 2.7687,
      "step": 185341
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1379787921905518,
      "learning_rate": 5.4866937284212266e-05,
      "loss": 3.0543,
      "step": 185342
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.978374481201172,
      "learning_rate": 5.486457917363133e-05,
      "loss": 2.7864,
      "step": 185343
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.686147928237915,
      "learning_rate": 5.486222110862567e-05,
      "loss": 2.7143,
      "step": 185344
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.748567581176758,
      "learning_rate": 5.4859863089195685e-05,
      "loss": 2.9481,
      "step": 185345
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5986263751983643,
      "learning_rate": 5.485750511534195e-05,
      "loss": 3.127,
      "step": 185346
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7723071575164795,
      "learning_rate": 5.4855147187064785e-05,
      "loss": 2.8847,
      "step": 185347
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.4837565422058105,
      "learning_rate": 5.4852789304364695e-05,
      "loss": 2.9703,
      "step": 185348
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6447699069976807,
      "learning_rate": 5.485043146724215e-05,
      "loss": 2.984,
      "step": 185349
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.4950599670410156,
      "learning_rate": 5.4848073675697515e-05,
      "loss": 3.1127,
      "step": 185350
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.408651351928711,
      "learning_rate": 5.484571592973118e-05,
      "loss": 2.948,
      "step": 185351
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.672910451889038,
      "learning_rate": 5.484335822934373e-05,
      "loss": 3.0784,
      "step": 185352
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.09434175491333,
      "learning_rate": 5.484100057453544e-05,
      "loss": 3.084,
      "step": 185353
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3933463096618652,
      "learning_rate": 5.483864296530693e-05,
      "loss": 3.0118,
      "step": 185354
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.062190294265747,
      "learning_rate": 5.4836285401658566e-05,
      "loss": 3.0222,
      "step": 185355
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.49808931350708,
      "learning_rate": 5.483392788359073e-05,
      "loss": 2.8882,
      "step": 185356
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.4794256687164307,
      "learning_rate": 5.483157041110381e-05,
      "loss": 2.8882,
      "step": 185357
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1783297061920166,
      "learning_rate": 5.482921298419842e-05,
      "loss": 2.9568,
      "step": 185358
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.926692485809326,
      "learning_rate": 5.4826855602874845e-05,
      "loss": 2.8533,
      "step": 185359
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.8208656311035156,
      "learning_rate": 5.482449826713363e-05,
      "loss": 2.711,
      "step": 185360
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.6586358547210693,
      "learning_rate": 5.4822140976975195e-05,
      "loss": 2.7612,
      "step": 185361
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9479572772979736,
      "learning_rate": 5.481978373239996e-05,
      "loss": 2.8745,
      "step": 185362
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0917389392852783,
      "learning_rate": 5.481742653340826e-05,
      "loss": 2.8543,
      "step": 185363
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.9995832443237305,
      "learning_rate": 5.4815069380000696e-05,
      "loss": 2.9065,
      "step": 185364
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0831446647644043,
      "learning_rate": 5.4812712272177575e-05,
      "loss": 2.9429,
      "step": 185365
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.951152801513672,
      "learning_rate": 5.4810355209939484e-05,
      "loss": 3.041,
      "step": 185366
    },
    {
      "epoch": 2.41,
      "grad_norm": 5.250036716461182,
      "learning_rate": 5.480799819328666e-05,
      "loss": 3.0489,
      "step": 185367
    },
    {
      "epoch": 2.41,
      "grad_norm": 6.146627426147461,
      "learning_rate": 5.480564122221984e-05,
      "loss": 2.6599,
      "step": 185368
    },
    {
      "epoch": 2.41,
      "grad_norm": 5.437834739685059,
      "learning_rate": 5.480328429673912e-05,
      "loss": 2.6325,
      "step": 185369
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.264440059661865,
      "learning_rate": 5.480092741684521e-05,
      "loss": 2.7665,
      "step": 185370
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.289213180541992,
      "learning_rate": 5.479857058253832e-05,
      "loss": 2.9755,
      "step": 185371
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.674600601196289,
      "learning_rate": 5.479621379381911e-05,
      "loss": 2.9263,
      "step": 185372
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1450722217559814,
      "learning_rate": 5.479385705068779e-05,
      "loss": 2.7004,
      "step": 185373
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5345206260681152,
      "learning_rate": 5.479150035314511e-05,
      "loss": 2.6937,
      "step": 185374
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8674886226654053,
      "learning_rate": 5.4789143701191165e-05,
      "loss": 3.0286,
      "step": 185375
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4635426998138428,
      "learning_rate": 5.478678709482661e-05,
      "loss": 3.0335,
      "step": 185376
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.186262845993042,
      "learning_rate": 5.478443053405176e-05,
      "loss": 2.7525,
      "step": 185377
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3423941135406494,
      "learning_rate": 5.478207401886717e-05,
      "loss": 2.8547,
      "step": 185378
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5678391456604004,
      "learning_rate": 5.477971754927315e-05,
      "loss": 2.821,
      "step": 185379
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.963564157485962,
      "learning_rate": 5.477736112527036e-05,
      "loss": 3.0229,
      "step": 185380
    },
    {
      "epoch": 2.41,
      "grad_norm": 5.0274977684021,
      "learning_rate": 5.477500474685896e-05,
      "loss": 2.6382,
      "step": 185381
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.452153205871582,
      "learning_rate": 5.477264841403957e-05,
      "loss": 3.0173,
      "step": 185382
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.674924373626709,
      "learning_rate": 5.47702921268125e-05,
      "loss": 2.9391,
      "step": 185383
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6423115730285645,
      "learning_rate": 5.476793588517833e-05,
      "loss": 2.77,
      "step": 185384
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.0925073623657227,
      "learning_rate": 5.476557968913735e-05,
      "loss": 2.8668,
      "step": 185385
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.4454288482666016,
      "learning_rate": 5.4763223538690234e-05,
      "loss": 2.7762,
      "step": 185386
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.3415770530700684,
      "learning_rate": 5.476086743383715e-05,
      "loss": 2.8672,
      "step": 185387
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6051244735717773,
      "learning_rate": 5.4758511374578696e-05,
      "loss": 2.9711,
      "step": 185388
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.434258460998535,
      "learning_rate": 5.475615536091517e-05,
      "loss": 2.7296,
      "step": 185389
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.295766830444336,
      "learning_rate": 5.4753799392847194e-05,
      "loss": 2.783,
      "step": 185390
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.976457118988037,
      "learning_rate": 5.475144347037506e-05,
      "loss": 2.9886,
      "step": 185391
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7529845237731934,
      "learning_rate": 5.474908759349931e-05,
      "loss": 2.9563,
      "step": 185392
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.138590335845947,
      "learning_rate": 5.474673176222035e-05,
      "loss": 2.8352,
      "step": 185393
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5503480434417725,
      "learning_rate": 5.474437597653859e-05,
      "loss": 2.9339,
      "step": 185394
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8621201515197754,
      "learning_rate": 5.474202023645442e-05,
      "loss": 2.8862,
      "step": 185395
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.024971961975098,
      "learning_rate": 5.473966454196841e-05,
      "loss": 2.9225,
      "step": 185396
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7643206119537354,
      "learning_rate": 5.4737308893080854e-05,
      "loss": 2.8561,
      "step": 185397
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.326202392578125,
      "learning_rate": 5.473495328979233e-05,
      "loss": 2.8552,
      "step": 185398
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.159161329269409,
      "learning_rate": 5.473259773210319e-05,
      "loss": 2.8555,
      "step": 185399
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.329929351806641,
      "learning_rate": 5.473024222001384e-05,
      "loss": 2.7719,
      "step": 185400
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7544219493865967,
      "learning_rate": 5.4727886753524816e-05,
      "loss": 2.8752,
      "step": 185401
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.934238910675049,
      "learning_rate": 5.4725531332636555e-05,
      "loss": 2.7728,
      "step": 185402
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.275167942047119,
      "learning_rate": 5.472317595734934e-05,
      "loss": 2.9259,
      "step": 185403
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.4767582416534424,
      "learning_rate": 5.472082062766379e-05,
      "loss": 2.687,
      "step": 185404
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.590315818786621,
      "learning_rate": 5.4718465343580286e-05,
      "loss": 2.9034,
      "step": 185405
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4620864391326904,
      "learning_rate": 5.471611010509914e-05,
      "loss": 3.0635,
      "step": 185406
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7360432147979736,
      "learning_rate": 5.471375491222102e-05,
      "loss": 3.0292,
      "step": 185407
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.362656831741333,
      "learning_rate": 5.4711399764946215e-05,
      "loss": 2.8535,
      "step": 185408
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.162520170211792,
      "learning_rate": 5.47090446632751e-05,
      "loss": 3.0584,
      "step": 185409
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.677137851715088,
      "learning_rate": 5.470668960720831e-05,
      "loss": 2.9173,
      "step": 185410
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8645236492156982,
      "learning_rate": 5.470433459674617e-05,
      "loss": 3.275,
      "step": 185411
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4198155403137207,
      "learning_rate": 5.470197963188905e-05,
      "loss": 2.799,
      "step": 185412
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.2294256687164307,
      "learning_rate": 5.469962471263755e-05,
      "loss": 2.7234,
      "step": 185413
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.199967861175537,
      "learning_rate": 5.469726983899193e-05,
      "loss": 3.0351,
      "step": 185414
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.191887378692627,
      "learning_rate": 5.4694915010952764e-05,
      "loss": 2.7838,
      "step": 185415
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.481735944747925,
      "learning_rate": 5.4692560228520486e-05,
      "loss": 2.9633,
      "step": 185416
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1550045013427734,
      "learning_rate": 5.469020549169549e-05,
      "loss": 3.0302,
      "step": 185417
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4589626789093018,
      "learning_rate": 5.468785080047812e-05,
      "loss": 2.8004,
      "step": 185418
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3428940773010254,
      "learning_rate": 5.468549615486899e-05,
      "loss": 2.7111,
      "step": 185419
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7552456855773926,
      "learning_rate": 5.4683141554868384e-05,
      "loss": 2.8246,
      "step": 185420
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8347253799438477,
      "learning_rate": 5.468078700047689e-05,
      "loss": 2.8223,
      "step": 185421
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7806124687194824,
      "learning_rate": 5.4678432491694876e-05,
      "loss": 3.2506,
      "step": 185422
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.661302089691162,
      "learning_rate": 5.467607802852275e-05,
      "loss": 2.9502,
      "step": 185423
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8331971168518066,
      "learning_rate": 5.467372361096091e-05,
      "loss": 2.9092,
      "step": 185424
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.5585858821868896,
      "learning_rate": 5.4671369239009945e-05,
      "loss": 2.9253,
      "step": 185425
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.647730827331543,
      "learning_rate": 5.466901491267009e-05,
      "loss": 2.5381,
      "step": 185426
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6405680179595947,
      "learning_rate": 5.4666660631941995e-05,
      "loss": 2.6285,
      "step": 185427
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5451712608337402,
      "learning_rate": 5.4664306396826006e-05,
      "loss": 3.1041,
      "step": 185428
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.494478940963745,
      "learning_rate": 5.466195220732257e-05,
      "loss": 3.162,
      "step": 185429
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.911626100540161,
      "learning_rate": 5.465959806343201e-05,
      "loss": 3.1575,
      "step": 185430
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.98098087310791,
      "learning_rate": 5.465724396515493e-05,
      "loss": 3.1126,
      "step": 185431
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.439460039138794,
      "learning_rate": 5.465488991249163e-05,
      "loss": 2.843,
      "step": 185432
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3386051654815674,
      "learning_rate": 5.4652535905442674e-05,
      "loss": 2.9869,
      "step": 185433
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6436851024627686,
      "learning_rate": 5.4650181944008396e-05,
      "loss": 2.8643,
      "step": 185434
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8078577518463135,
      "learning_rate": 5.464782802818943e-05,
      "loss": 3.1377,
      "step": 185435
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.026121139526367,
      "learning_rate": 5.4645474157985915e-05,
      "loss": 2.9406,
      "step": 185436
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3612961769104004,
      "learning_rate": 5.464312033339847e-05,
      "loss": 2.949,
      "step": 185437
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.602450370788574,
      "learning_rate": 5.4640766554427476e-05,
      "loss": 2.7196,
      "step": 185438
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9487924575805664,
      "learning_rate": 5.463841282107345e-05,
      "loss": 3.1314,
      "step": 185439
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7658817768096924,
      "learning_rate": 5.4636059133336706e-05,
      "loss": 3.3888,
      "step": 185440
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7790162563323975,
      "learning_rate": 5.4633705491217905e-05,
      "loss": 2.9702,
      "step": 185441
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.778379201889038,
      "learning_rate": 5.463135189471715e-05,
      "loss": 2.9641,
      "step": 185442
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.899930000305176,
      "learning_rate": 5.462899834383516e-05,
      "loss": 2.9417,
      "step": 185443
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.637453317642212,
      "learning_rate": 5.462664483857219e-05,
      "loss": 2.9463,
      "step": 185444
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.9526257514953613,
      "learning_rate": 5.4624291378928856e-05,
      "loss": 2.9701,
      "step": 185445
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5317296981811523,
      "learning_rate": 5.4621937964905396e-05,
      "loss": 3.0526,
      "step": 185446
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.3428759574890137,
      "learning_rate": 5.461958459650251e-05,
      "loss": 3.0615,
      "step": 185447
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.393474817276001,
      "learning_rate": 5.46172312737203e-05,
      "loss": 3.102,
      "step": 185448
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.3497097492218018,
      "learning_rate": 5.461487799655949e-05,
      "loss": 2.8486,
      "step": 185449
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.4078662395477295,
      "learning_rate": 5.4612524765020295e-05,
      "loss": 2.7927,
      "step": 185450
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.603045701980591,
      "learning_rate": 5.461017157910337e-05,
      "loss": 3.0293,
      "step": 185451
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.094452142715454,
      "learning_rate": 5.460781843880895e-05,
      "loss": 3.1184,
      "step": 185452
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.929165840148926,
      "learning_rate": 5.460546534413773e-05,
      "loss": 2.9333,
      "step": 185453
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.1466128826141357,
      "learning_rate": 5.460311229508986e-05,
      "loss": 2.9308,
      "step": 185454
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.5307939052581787,
      "learning_rate": 5.460075929166595e-05,
      "loss": 3.0158,
      "step": 185455
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.530592441558838,
      "learning_rate": 5.4598406333866315e-05,
      "loss": 2.9581,
      "step": 185456
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8541414737701416,
      "learning_rate": 5.459605342169152e-05,
      "loss": 3.0538,
      "step": 185457
    },
    {
      "epoch": 2.41,
      "grad_norm": 4.137407302856445,
      "learning_rate": 5.459370055514193e-05,
      "loss": 2.85,
      "step": 185458
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.077206611633301,
      "learning_rate": 5.45913477342181e-05,
      "loss": 2.9535,
      "step": 185459
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.546100378036499,
      "learning_rate": 5.458899495892025e-05,
      "loss": 2.9648,
      "step": 185460
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.630984306335449,
      "learning_rate": 5.458664222924903e-05,
      "loss": 2.8909,
      "step": 185461
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7237086296081543,
      "learning_rate": 5.4584289545204684e-05,
      "loss": 3.0929,
      "step": 185462
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.694424867630005,
      "learning_rate": 5.458193690678784e-05,
      "loss": 2.9039,
      "step": 185463
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8893191814422607,
      "learning_rate": 5.457958431399876e-05,
      "loss": 2.9642,
      "step": 185464
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.7369906902313232,
      "learning_rate": 5.4577231766838146e-05,
      "loss": 2.8435,
      "step": 185465
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.2896831035614014,
      "learning_rate": 5.457487926530607e-05,
      "loss": 2.9365,
      "step": 185466
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8186957836151123,
      "learning_rate": 5.457252680940326e-05,
      "loss": 2.9227,
      "step": 185467
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.8745996952056885,
      "learning_rate": 5.457017439912995e-05,
      "loss": 2.9783,
      "step": 185468
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.4902684688568115,
      "learning_rate": 5.4567822034486773e-05,
      "loss": 3.1408,
      "step": 185469
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.416940689086914,
      "learning_rate": 5.4565469715473965e-05,
      "loss": 2.8742,
      "step": 185470
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.6424243450164795,
      "learning_rate": 5.456311744209225e-05,
      "loss": 3.0962,
      "step": 185471
    },
    {
      "epoch": 2.41,
      "grad_norm": 2.807753562927246,
      "learning_rate": 5.456076521434174e-05,
      "loss": 2.9353,
      "step": 185472
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.028599500656128,
      "learning_rate": 5.4558413032223056e-05,
      "loss": 2.8467,
      "step": 185473
    },
    {
      "epoch": 2.41,
      "grad_norm": 3.2175486087799072,
      "learning_rate": 5.4556060895736544e-05,
      "loss": 2.9123,
      "step": 185474
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.405890941619873,
      "learning_rate": 5.4553708804882786e-05,
      "loss": 3.0941,
      "step": 185475
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6068222522735596,
      "learning_rate": 5.4551356759662035e-05,
      "loss": 3.1027,
      "step": 185476
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1352128982543945,
      "learning_rate": 5.454900476007488e-05,
      "loss": 2.9986,
      "step": 185477
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.772979497909546,
      "learning_rate": 5.4546652806121716e-05,
      "loss": 3.0137,
      "step": 185478
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.775320291519165,
      "learning_rate": 5.454430089780295e-05,
      "loss": 2.9956,
      "step": 185479
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6130571365356445,
      "learning_rate": 5.454194903511898e-05,
      "loss": 3.0929,
      "step": 185480
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.458071708679199,
      "learning_rate": 5.453959721807034e-05,
      "loss": 3.0779,
      "step": 185481
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.577986240386963,
      "learning_rate": 5.453724544665736e-05,
      "loss": 3.094,
      "step": 185482
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6078004837036133,
      "learning_rate": 5.453489372088061e-05,
      "loss": 2.8539,
      "step": 185483
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.796921968460083,
      "learning_rate": 5.453254204074049e-05,
      "loss": 2.9354,
      "step": 185484
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1034891605377197,
      "learning_rate": 5.453019040623736e-05,
      "loss": 2.79,
      "step": 185485
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9389076232910156,
      "learning_rate": 5.452783881737166e-05,
      "loss": 2.7601,
      "step": 185486
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8559932708740234,
      "learning_rate": 5.452548727414392e-05,
      "loss": 3.0022,
      "step": 185487
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9943931102752686,
      "learning_rate": 5.452313577655444e-05,
      "loss": 3.0049,
      "step": 185488
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.537148475646973,
      "learning_rate": 5.452078432460385e-05,
      "loss": 2.8717,
      "step": 185489
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8703839778900146,
      "learning_rate": 5.451843291829245e-05,
      "loss": 3.0208,
      "step": 185490
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.410830497741699,
      "learning_rate": 5.451608155762065e-05,
      "loss": 2.9895,
      "step": 185491
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.406085729598999,
      "learning_rate": 5.451373024258904e-05,
      "loss": 2.7477,
      "step": 185492
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.431417465209961,
      "learning_rate": 5.4511378973197916e-05,
      "loss": 3.1152,
      "step": 185493
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7192394733428955,
      "learning_rate": 5.450902774944772e-05,
      "loss": 2.7978,
      "step": 185494
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.4573428630828857,
      "learning_rate": 5.450667657133898e-05,
      "loss": 2.9252,
      "step": 185495
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.713752508163452,
      "learning_rate": 5.45043254388721e-05,
      "loss": 2.9414,
      "step": 185496
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.521641731262207,
      "learning_rate": 5.450197435204744e-05,
      "loss": 2.9675,
      "step": 185497
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.4359638690948486,
      "learning_rate": 5.4499623310865536e-05,
      "loss": 2.8034,
      "step": 185498
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9488799571990967,
      "learning_rate": 5.4497272315326725e-05,
      "loss": 2.9957,
      "step": 185499
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.601130485534668,
      "learning_rate": 5.44949213654316e-05,
      "loss": 3.0113,
      "step": 185500
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.237319231033325,
      "learning_rate": 5.449257046118046e-05,
      "loss": 3.1742,
      "step": 185501
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7815463542938232,
      "learning_rate": 5.449021960257381e-05,
      "loss": 3.1128,
      "step": 185502
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6011834144592285,
      "learning_rate": 5.4487868789612024e-05,
      "loss": 2.7455,
      "step": 185503
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8894124031066895,
      "learning_rate": 5.4485518022295616e-05,
      "loss": 2.8548,
      "step": 185504
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5139667987823486,
      "learning_rate": 5.44831673006249e-05,
      "loss": 3.0242,
      "step": 185505
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2565033435821533,
      "learning_rate": 5.448081662460047e-05,
      "loss": 2.8028,
      "step": 185506
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.9129550457000732,
      "learning_rate": 5.447846599422272e-05,
      "loss": 2.7487,
      "step": 185507
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.227498531341553,
      "learning_rate": 5.4476115409492027e-05,
      "loss": 2.7778,
      "step": 185508
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9086363315582275,
      "learning_rate": 5.447376487040882e-05,
      "loss": 2.7792,
      "step": 185509
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.343873977661133,
      "learning_rate": 5.447141437697363e-05,
      "loss": 2.8527,
      "step": 185510
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.146162271499634,
      "learning_rate": 5.446906392918676e-05,
      "loss": 2.7008,
      "step": 185511
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9459128379821777,
      "learning_rate": 5.446671352704881e-05,
      "loss": 2.9425,
      "step": 185512
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.4141461849212646,
      "learning_rate": 5.4464363170560044e-05,
      "loss": 2.9313,
      "step": 185513
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8779876232147217,
      "learning_rate": 5.446201285972113e-05,
      "loss": 2.9103,
      "step": 185514
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6666042804718018,
      "learning_rate": 5.445966259453223e-05,
      "loss": 2.9392,
      "step": 185515
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.107630968093872,
      "learning_rate": 5.445731237499398e-05,
      "loss": 2.9329,
      "step": 185516
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.722581148147583,
      "learning_rate": 5.445496220110671e-05,
      "loss": 2.8074,
      "step": 185517
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.544865131378174,
      "learning_rate": 5.445261207287093e-05,
      "loss": 2.9835,
      "step": 185518
    },
    {
      "epoch": 2.42,
      "grad_norm": 6.136079788208008,
      "learning_rate": 5.445026199028699e-05,
      "loss": 2.7621,
      "step": 185519
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.473010063171387,
      "learning_rate": 5.4447911953355505e-05,
      "loss": 2.9588,
      "step": 185520
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7211244106292725,
      "learning_rate": 5.444556196207667e-05,
      "loss": 2.942,
      "step": 185521
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2409088611602783,
      "learning_rate": 5.444321201645111e-05,
      "loss": 3.0141,
      "step": 185522
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5824568271636963,
      "learning_rate": 5.44408621164791e-05,
      "loss": 3.0727,
      "step": 185523
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.03886079788208,
      "learning_rate": 5.443851226216127e-05,
      "loss": 2.9045,
      "step": 185524
    },
    {
      "epoch": 2.42,
      "grad_norm": 5.089883804321289,
      "learning_rate": 5.443616245349786e-05,
      "loss": 2.6736,
      "step": 185525
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.945042371749878,
      "learning_rate": 5.4433812690489556e-05,
      "loss": 2.9295,
      "step": 185526
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0390987396240234,
      "learning_rate": 5.44314629731365e-05,
      "loss": 2.808,
      "step": 185527
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.136713981628418,
      "learning_rate": 5.4429113301439354e-05,
      "loss": 2.7326,
      "step": 185528
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.51525616645813,
      "learning_rate": 5.4426763675398354e-05,
      "loss": 2.8653,
      "step": 185529
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.625380277633667,
      "learning_rate": 5.442441409501417e-05,
      "loss": 3.2257,
      "step": 185530
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.08266282081604,
      "learning_rate": 5.442206456028703e-05,
      "loss": 3.0754,
      "step": 185531
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.637134075164795,
      "learning_rate": 5.441971507121763e-05,
      "loss": 2.7207,
      "step": 185532
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7972681522369385,
      "learning_rate": 5.441736562780608e-05,
      "loss": 3.0354,
      "step": 185533
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9096078872680664,
      "learning_rate": 5.441501623005307e-05,
      "loss": 2.9066,
      "step": 185534
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.578564167022705,
      "learning_rate": 5.4412666877958834e-05,
      "loss": 2.8593,
      "step": 185535
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7129366397857666,
      "learning_rate": 5.441031757152401e-05,
      "loss": 3.0646,
      "step": 185536
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1517393589019775,
      "learning_rate": 5.440796831074886e-05,
      "loss": 2.6106,
      "step": 185537
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6561291217803955,
      "learning_rate": 5.440561909563409e-05,
      "loss": 2.8807,
      "step": 185538
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.818812131881714,
      "learning_rate": 5.440326992617976e-05,
      "loss": 2.8075,
      "step": 185539
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.752152442932129,
      "learning_rate": 5.4400920802386604e-05,
      "loss": 2.8706,
      "step": 185540
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.4091548919677734,
      "learning_rate": 5.4398571724254856e-05,
      "loss": 2.8715,
      "step": 185541
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.549607992172241,
      "learning_rate": 5.4396222691785116e-05,
      "loss": 2.8005,
      "step": 185542
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8721745014190674,
      "learning_rate": 5.4393873704977716e-05,
      "loss": 2.8945,
      "step": 185543
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.412046432495117,
      "learning_rate": 5.439152476383326e-05,
      "loss": 2.9777,
      "step": 185544
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2705531120300293,
      "learning_rate": 5.438917586835191e-05,
      "loss": 2.8787,
      "step": 185545
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5221939086914062,
      "learning_rate": 5.4386827018534293e-05,
      "loss": 2.8474,
      "step": 185546
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.346109390258789,
      "learning_rate": 5.438447821438075e-05,
      "loss": 2.7825,
      "step": 185547
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8368208408355713,
      "learning_rate": 5.438212945589186e-05,
      "loss": 2.7935,
      "step": 185548
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.122572183609009,
      "learning_rate": 5.437978074306789e-05,
      "loss": 2.8066,
      "step": 185549
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.4998226165771484,
      "learning_rate": 5.43774320759095e-05,
      "loss": 2.8711,
      "step": 185550
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.848207473754883,
      "learning_rate": 5.437508345441681e-05,
      "loss": 2.9647,
      "step": 185551
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6343748569488525,
      "learning_rate": 5.437273487859053e-05,
      "loss": 2.7621,
      "step": 185552
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2130000591278076,
      "learning_rate": 5.437038634843092e-05,
      "loss": 3.2157,
      "step": 185553
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.526682138442993,
      "learning_rate": 5.436803786393854e-05,
      "loss": 2.961,
      "step": 185554
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9131500720977783,
      "learning_rate": 5.436568942511374e-05,
      "loss": 2.8584,
      "step": 185555
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7459800243377686,
      "learning_rate": 5.436334103195713e-05,
      "loss": 2.9356,
      "step": 185556
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8312039375305176,
      "learning_rate": 5.436099268446886e-05,
      "loss": 2.742,
      "step": 185557
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8749518394470215,
      "learning_rate": 5.4358644382649565e-05,
      "loss": 2.9862,
      "step": 185558
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0560734272003174,
      "learning_rate": 5.4356296126499596e-05,
      "loss": 2.7676,
      "step": 185559
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9082086086273193,
      "learning_rate": 5.43539479160195e-05,
      "loss": 3.1132,
      "step": 185560
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9698734283447266,
      "learning_rate": 5.435159975120954e-05,
      "loss": 2.8918,
      "step": 185561
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8801767826080322,
      "learning_rate": 5.4349251632070375e-05,
      "loss": 2.9656,
      "step": 185562
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.905362129211426,
      "learning_rate": 5.4346903558602285e-05,
      "loss": 2.8938,
      "step": 185563
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.4264719486236572,
      "learning_rate": 5.434455553080576e-05,
      "loss": 2.8485,
      "step": 185564
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9866433143615723,
      "learning_rate": 5.4342207548681125e-05,
      "loss": 2.9482,
      "step": 185565
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.3689661026000977,
      "learning_rate": 5.4339859612228996e-05,
      "loss": 3.0628,
      "step": 185566
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9543991088867188,
      "learning_rate": 5.433751172144967e-05,
      "loss": 2.8722,
      "step": 185567
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.129582405090332,
      "learning_rate": 5.43351638763437e-05,
      "loss": 2.8856,
      "step": 185568
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2984156608581543,
      "learning_rate": 5.433281607691143e-05,
      "loss": 2.825,
      "step": 185569
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3109242916107178,
      "learning_rate": 5.4330468323153366e-05,
      "loss": 3.0803,
      "step": 185570
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.3292415142059326,
      "learning_rate": 5.4328120615069824e-05,
      "loss": 2.9834,
      "step": 185571
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7760872840881348,
      "learning_rate": 5.432577295266138e-05,
      "loss": 2.848,
      "step": 185572
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.8984227180480957,
      "learning_rate": 5.432342533592834e-05,
      "loss": 2.893,
      "step": 185573
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.455972909927368,
      "learning_rate": 5.432107776487132e-05,
      "loss": 2.8989,
      "step": 185574
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.580100059509277,
      "learning_rate": 5.4318730239490605e-05,
      "loss": 2.8658,
      "step": 185575
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.994575262069702,
      "learning_rate": 5.4316382759786614e-05,
      "loss": 2.8126,
      "step": 185576
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.84716534614563,
      "learning_rate": 5.431403532575995e-05,
      "loss": 2.8277,
      "step": 185577
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.928701162338257,
      "learning_rate": 5.431168793741092e-05,
      "loss": 2.8597,
      "step": 185578
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.769362449645996,
      "learning_rate": 5.430934059473988e-05,
      "loss": 3.0506,
      "step": 185579
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.4439988136291504,
      "learning_rate": 5.4306993297747505e-05,
      "loss": 2.706,
      "step": 185580
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.673166275024414,
      "learning_rate": 5.4304646046434056e-05,
      "loss": 2.8412,
      "step": 185581
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.838749408721924,
      "learning_rate": 5.430229884079996e-05,
      "loss": 3.0111,
      "step": 185582
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.5320186614990234,
      "learning_rate": 5.4299951680845765e-05,
      "loss": 2.9693,
      "step": 185583
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.996455430984497,
      "learning_rate": 5.429760456657183e-05,
      "loss": 2.7931,
      "step": 185584
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8130478858947754,
      "learning_rate": 5.429525749797855e-05,
      "loss": 2.8032,
      "step": 185585
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.420264482498169,
      "learning_rate": 5.429291047506653e-05,
      "loss": 2.8593,
      "step": 185586
    },
    {
      "epoch": 2.42,
      "grad_norm": 5.531549453735352,
      "learning_rate": 5.4290563497836036e-05,
      "loss": 3.124,
      "step": 185587
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.388181447982788,
      "learning_rate": 5.428821656628753e-05,
      "loss": 2.9398,
      "step": 185588
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.7925240993499756,
      "learning_rate": 5.4285869680421556e-05,
      "loss": 3.0931,
      "step": 185589
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.793303489685059,
      "learning_rate": 5.428352284023837e-05,
      "loss": 2.9651,
      "step": 185590
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6506879329681396,
      "learning_rate": 5.428117604573864e-05,
      "loss": 2.7331,
      "step": 185591
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.101194381713867,
      "learning_rate": 5.4278829296922635e-05,
      "loss": 2.7673,
      "step": 185592
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9273900985717773,
      "learning_rate": 5.427648259379085e-05,
      "loss": 2.8908,
      "step": 185593
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.555466890335083,
      "learning_rate": 5.427413593634362e-05,
      "loss": 3.0339,
      "step": 185594
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.6925151348114014,
      "learning_rate": 5.427178932458155e-05,
      "loss": 3.0816,
      "step": 185595
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.001995325088501,
      "learning_rate": 5.4269442758504934e-05,
      "loss": 2.7175,
      "step": 185596
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.786172389984131,
      "learning_rate": 5.426709623811431e-05,
      "loss": 3.0361,
      "step": 185597
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.259945869445801,
      "learning_rate": 5.426474976341003e-05,
      "loss": 2.9416,
      "step": 185598
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.53928804397583,
      "learning_rate": 5.426240333439271e-05,
      "loss": 3.0066,
      "step": 185599
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5685932636260986,
      "learning_rate": 5.426005695106248e-05,
      "loss": 2.9504,
      "step": 185600
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.509568452835083,
      "learning_rate": 5.4257710613420035e-05,
      "loss": 2.973,
      "step": 185601
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7032718658447266,
      "learning_rate": 5.425536432146564e-05,
      "loss": 2.9982,
      "step": 185602
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.058164358139038,
      "learning_rate": 5.425301807519987e-05,
      "loss": 2.9554,
      "step": 185603
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9372918605804443,
      "learning_rate": 5.425067187462305e-05,
      "loss": 3.0742,
      "step": 185604
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.215611457824707,
      "learning_rate": 5.424832571973582e-05,
      "loss": 3.1505,
      "step": 185605
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1813275814056396,
      "learning_rate": 5.4245979610538304e-05,
      "loss": 2.8278,
      "step": 185606
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.486879348754883,
      "learning_rate": 5.4243633547031205e-05,
      "loss": 3.1245,
      "step": 185607
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.3098535537719727,
      "learning_rate": 5.424128752921473e-05,
      "loss": 2.9271,
      "step": 185608
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7634589672088623,
      "learning_rate": 5.4238941557089563e-05,
      "loss": 3.0129,
      "step": 185609
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.124191761016846,
      "learning_rate": 5.423659563065589e-05,
      "loss": 2.8623,
      "step": 185610
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1924233436584473,
      "learning_rate": 5.423424974991446e-05,
      "loss": 2.8806,
      "step": 185611
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9212937355041504,
      "learning_rate": 5.423190391486535e-05,
      "loss": 2.7767,
      "step": 185612
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0050549507141113,
      "learning_rate": 5.4229558125509254e-05,
      "loss": 3.1154,
      "step": 185613
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.433269500732422,
      "learning_rate": 5.422721238184647e-05,
      "loss": 2.9559,
      "step": 185614
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.112415075302124,
      "learning_rate": 5.4224866683877546e-05,
      "loss": 3.099,
      "step": 185615
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8733224868774414,
      "learning_rate": 5.422252103160276e-05,
      "loss": 2.9177,
      "step": 185616
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.152599573135376,
      "learning_rate": 5.422017542502283e-05,
      "loss": 3.2563,
      "step": 185617
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.969825029373169,
      "learning_rate": 5.421782986413784e-05,
      "loss": 2.8307,
      "step": 185618
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.569828510284424,
      "learning_rate": 5.421548434894847e-05,
      "loss": 2.961,
      "step": 185619
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0718860626220703,
      "learning_rate": 5.421313887945501e-05,
      "loss": 2.872,
      "step": 185620
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7753994464874268,
      "learning_rate": 5.4210793455658034e-05,
      "loss": 2.9487,
      "step": 185621
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3374338150024414,
      "learning_rate": 5.420844807755783e-05,
      "loss": 2.9522,
      "step": 185622
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.79742169380188,
      "learning_rate": 5.420610274515508e-05,
      "loss": 2.925,
      "step": 185623
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9214346408843994,
      "learning_rate": 5.42037574584499e-05,
      "loss": 2.8593,
      "step": 185624
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.212131977081299,
      "learning_rate": 5.420141221744297e-05,
      "loss": 2.7914,
      "step": 185625
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.623507022857666,
      "learning_rate": 5.4199067022134554e-05,
      "loss": 3.0452,
      "step": 185626
    },
    {
      "epoch": 2.42,
      "grad_norm": 5.788568496704102,
      "learning_rate": 5.419672187252525e-05,
      "loss": 2.6007,
      "step": 185627
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.911954879760742,
      "learning_rate": 5.419437676861532e-05,
      "loss": 2.969,
      "step": 185628
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3230903148651123,
      "learning_rate": 5.419203171040547e-05,
      "loss": 2.8249,
      "step": 185629
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.404053211212158,
      "learning_rate": 5.41896866978958e-05,
      "loss": 2.9789,
      "step": 185630
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.545133113861084,
      "learning_rate": 5.4187341731086965e-05,
      "loss": 2.9261,
      "step": 185631
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3239381313323975,
      "learning_rate": 5.418499680997931e-05,
      "loss": 2.8939,
      "step": 185632
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.4990439414978027,
      "learning_rate": 5.4182651934573326e-05,
      "loss": 3.053,
      "step": 185633
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.443406581878662,
      "learning_rate": 5.4180307104869394e-05,
      "loss": 3.0211,
      "step": 185634
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.598008632659912,
      "learning_rate": 5.4177962320868066e-05,
      "loss": 2.8096,
      "step": 185635
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.81308650970459,
      "learning_rate": 5.417561758256964e-05,
      "loss": 2.913,
      "step": 185636
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7266533374786377,
      "learning_rate": 5.417327288997466e-05,
      "loss": 2.6905,
      "step": 185637
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6070058345794678,
      "learning_rate": 5.417092824308342e-05,
      "loss": 2.974,
      "step": 185638
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.521481990814209,
      "learning_rate": 5.4168583641896525e-05,
      "loss": 2.6774,
      "step": 185639
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7352287769317627,
      "learning_rate": 5.416623908641423e-05,
      "loss": 2.9889,
      "step": 185640
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2867066860198975,
      "learning_rate": 5.4163894576637176e-05,
      "loss": 2.84,
      "step": 185641
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.71095871925354,
      "learning_rate": 5.416155011256567e-05,
      "loss": 3.1202,
      "step": 185642
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.960719585418701,
      "learning_rate": 5.415920569420019e-05,
      "loss": 2.922,
      "step": 185643
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.4069032669067383,
      "learning_rate": 5.4156861321541055e-05,
      "loss": 2.8698,
      "step": 185644
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.832392930984497,
      "learning_rate": 5.4154516994588894e-05,
      "loss": 2.8641,
      "step": 185645
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8119144439697266,
      "learning_rate": 5.4152172713343965e-05,
      "loss": 2.8177,
      "step": 185646
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.5510411262512207,
      "learning_rate": 5.414982847780688e-05,
      "loss": 2.8928,
      "step": 185647
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.656548500061035,
      "learning_rate": 5.4147484287977994e-05,
      "loss": 3.0281,
      "step": 185648
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.575173854827881,
      "learning_rate": 5.414514014385771e-05,
      "loss": 2.97,
      "step": 185649
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7494614124298096,
      "learning_rate": 5.41427960454464e-05,
      "loss": 2.9109,
      "step": 185650
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7859890460968018,
      "learning_rate": 5.414045199274466e-05,
      "loss": 3.0833,
      "step": 185651
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.837972402572632,
      "learning_rate": 5.413810798575279e-05,
      "loss": 2.9305,
      "step": 185652
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8336288928985596,
      "learning_rate": 5.4135764024471384e-05,
      "loss": 2.9027,
      "step": 185653
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.646845817565918,
      "learning_rate": 5.413342010890075e-05,
      "loss": 2.8897,
      "step": 185654
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8954150676727295,
      "learning_rate": 5.4131076239041346e-05,
      "loss": 3.0495,
      "step": 185655
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.765442371368408,
      "learning_rate": 5.412873241489355e-05,
      "loss": 2.8298,
      "step": 185656
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9364752769470215,
      "learning_rate": 5.412638863645795e-05,
      "loss": 2.9905,
      "step": 185657
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9399642944335938,
      "learning_rate": 5.412404490373482e-05,
      "loss": 3.0313,
      "step": 185658
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8705623149871826,
      "learning_rate": 5.412170121672472e-05,
      "loss": 2.8978,
      "step": 185659
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.424453020095825,
      "learning_rate": 5.411935757542809e-05,
      "loss": 3.0035,
      "step": 185660
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.740295886993408,
      "learning_rate": 5.411701397984519e-05,
      "loss": 2.9358,
      "step": 185661
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0014703273773193,
      "learning_rate": 5.411467042997666e-05,
      "loss": 3.0308,
      "step": 185662
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.115199565887451,
      "learning_rate": 5.411232692582289e-05,
      "loss": 3.0137,
      "step": 185663
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2639641761779785,
      "learning_rate": 5.4109983467384156e-05,
      "loss": 2.9329,
      "step": 185664
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3814046382904053,
      "learning_rate": 5.4107640054661116e-05,
      "loss": 2.8376,
      "step": 185665
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.778599739074707,
      "learning_rate": 5.410529668765411e-05,
      "loss": 2.7993,
      "step": 185666
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.506946325302124,
      "learning_rate": 5.41029533663635e-05,
      "loss": 3.0056,
      "step": 185667
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.546481132507324,
      "learning_rate": 5.410061009078985e-05,
      "loss": 3.0908,
      "step": 185668
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.683434247970581,
      "learning_rate": 5.4098266860933534e-05,
      "loss": 2.8188,
      "step": 185669
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.897078514099121,
      "learning_rate": 5.409592367679495e-05,
      "loss": 3.0028,
      "step": 185670
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.309960126876831,
      "learning_rate": 5.409358053837463e-05,
      "loss": 2.824,
      "step": 185671
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.863044023513794,
      "learning_rate": 5.409123744567296e-05,
      "loss": 3.01,
      "step": 185672
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.427706718444824,
      "learning_rate": 5.40888943986903e-05,
      "loss": 2.7711,
      "step": 185673
    },
    {
      "epoch": 2.42,
      "grad_norm": 5.032716274261475,
      "learning_rate": 5.4086551397427226e-05,
      "loss": 2.9164,
      "step": 185674
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.384526252746582,
      "learning_rate": 5.408420844188401e-05,
      "loss": 2.8387,
      "step": 185675
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.4906322956085205,
      "learning_rate": 5.4081865532061265e-05,
      "loss": 3.0319,
      "step": 185676
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.229476451873779,
      "learning_rate": 5.407952266795938e-05,
      "loss": 2.8201,
      "step": 185677
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.132974624633789,
      "learning_rate": 5.407717984957871e-05,
      "loss": 2.9483,
      "step": 185678
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.203141212463379,
      "learning_rate": 5.407483707691967e-05,
      "loss": 2.9412,
      "step": 185679
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7275493144989014,
      "learning_rate": 5.407249434998283e-05,
      "loss": 3.0289,
      "step": 185680
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7182862758636475,
      "learning_rate": 5.4070151668768516e-05,
      "loss": 2.8793,
      "step": 185681
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5628774166107178,
      "learning_rate": 5.406780903327726e-05,
      "loss": 2.9091,
      "step": 185682
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9085562229156494,
      "learning_rate": 5.406546644350935e-05,
      "loss": 3.1241,
      "step": 185683
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6850693225860596,
      "learning_rate": 5.4063123899465476e-05,
      "loss": 3.1311,
      "step": 185684
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.513474941253662,
      "learning_rate": 5.406078140114576e-05,
      "loss": 2.9286,
      "step": 185685
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.718410491943359,
      "learning_rate": 5.405843894855083e-05,
      "loss": 2.8527,
      "step": 185686
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9762625694274902,
      "learning_rate": 5.405609654168103e-05,
      "loss": 3.0349,
      "step": 185687
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.483160972595215,
      "learning_rate": 5.4053754180536916e-05,
      "loss": 3.0326,
      "step": 185688
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.088789463043213,
      "learning_rate": 5.405141186511879e-05,
      "loss": 3.0676,
      "step": 185689
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.893549680709839,
      "learning_rate": 5.4049069595427285e-05,
      "loss": 3.0287,
      "step": 185690
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.7083206176757812,
      "learning_rate": 5.404672737146257e-05,
      "loss": 2.9566,
      "step": 185691
    },
    {
      "epoch": 2.42,
      "grad_norm": 5.029231548309326,
      "learning_rate": 5.404438519322525e-05,
      "loss": 2.9143,
      "step": 185692
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1026792526245117,
      "learning_rate": 5.404204306071568e-05,
      "loss": 2.9893,
      "step": 185693
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.51005482673645,
      "learning_rate": 5.40397009739344e-05,
      "loss": 2.9419,
      "step": 185694
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.060330867767334,
      "learning_rate": 5.4037358932881705e-05,
      "loss": 2.9327,
      "step": 185695
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.149576187133789,
      "learning_rate": 5.403501693755826e-05,
      "loss": 2.9482,
      "step": 185696
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9046077728271484,
      "learning_rate": 5.403267498796421e-05,
      "loss": 2.9888,
      "step": 185697
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.419978141784668,
      "learning_rate": 5.4030333084100174e-05,
      "loss": 3.0333,
      "step": 185698
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.016030788421631,
      "learning_rate": 5.40279912259665e-05,
      "loss": 2.9941,
      "step": 185699
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8740217685699463,
      "learning_rate": 5.402564941356373e-05,
      "loss": 3.045,
      "step": 185700
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.991379976272583,
      "learning_rate": 5.4023307646892154e-05,
      "loss": 2.9701,
      "step": 185701
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.4905028343200684,
      "learning_rate": 5.402096592595236e-05,
      "loss": 3.0703,
      "step": 185702
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9132320880889893,
      "learning_rate": 5.401862425074472e-05,
      "loss": 2.8112,
      "step": 185703
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8290271759033203,
      "learning_rate": 5.4016282621269635e-05,
      "loss": 3.037,
      "step": 185704
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.708930253982544,
      "learning_rate": 5.401394103752753e-05,
      "loss": 2.9524,
      "step": 185705
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9211814403533936,
      "learning_rate": 5.4011599499518945e-05,
      "loss": 3.106,
      "step": 185706
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.5312483310699463,
      "learning_rate": 5.400925800724414e-05,
      "loss": 2.7685,
      "step": 185707
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.450371742248535,
      "learning_rate": 5.400691656070378e-05,
      "loss": 3.0849,
      "step": 185708
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.165302276611328,
      "learning_rate": 5.400457515989814e-05,
      "loss": 2.7667,
      "step": 185709
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.7516984939575195,
      "learning_rate": 5.400223380482772e-05,
      "loss": 2.8476,
      "step": 185710
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8769538402557373,
      "learning_rate": 5.3999892495492816e-05,
      "loss": 2.7675,
      "step": 185711
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.278807163238525,
      "learning_rate": 5.3997551231894086e-05,
      "loss": 3.062,
      "step": 185712
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2979979515075684,
      "learning_rate": 5.399521001403177e-05,
      "loss": 2.8236,
      "step": 185713
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.918766498565674,
      "learning_rate": 5.399286884190644e-05,
      "loss": 3.0683,
      "step": 185714
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.571294069290161,
      "learning_rate": 5.399052771551853e-05,
      "loss": 2.8771,
      "step": 185715
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.788478851318359,
      "learning_rate": 5.398818663486839e-05,
      "loss": 2.9959,
      "step": 185716
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.043108940124512,
      "learning_rate": 5.39858455999564e-05,
      "loss": 2.9021,
      "step": 185717
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.467904806137085,
      "learning_rate": 5.398350461078319e-05,
      "loss": 2.8266,
      "step": 185718
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.637085437774658,
      "learning_rate": 5.398116366734899e-05,
      "loss": 3.0697,
      "step": 185719
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.529362201690674,
      "learning_rate": 5.397882276965444e-05,
      "loss": 2.8857,
      "step": 185720
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9811391830444336,
      "learning_rate": 5.3976481917699865e-05,
      "loss": 2.8325,
      "step": 185721
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5825300216674805,
      "learning_rate": 5.3974141111485705e-05,
      "loss": 2.8725,
      "step": 185722
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.88449764251709,
      "learning_rate": 5.397180035101229e-05,
      "loss": 3.0569,
      "step": 185723
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.9508779048919678,
      "learning_rate": 5.3969459636280274e-05,
      "loss": 2.9997,
      "step": 185724
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.186075687408447,
      "learning_rate": 5.396711896728988e-05,
      "loss": 2.6891,
      "step": 185725
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.775916576385498,
      "learning_rate": 5.3964778344041725e-05,
      "loss": 2.6334,
      "step": 185726
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6298108100891113,
      "learning_rate": 5.396243776653618e-05,
      "loss": 2.8926,
      "step": 185727
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.356538772583008,
      "learning_rate": 5.396009723477365e-05,
      "loss": 3.0633,
      "step": 185728
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6050944328308105,
      "learning_rate": 5.3957756748754486e-05,
      "loss": 2.7498,
      "step": 185729
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.913041591644287,
      "learning_rate": 5.3955416308479336e-05,
      "loss": 2.9712,
      "step": 185730
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.9633448123931885,
      "learning_rate": 5.395307591394843e-05,
      "loss": 2.9947,
      "step": 185731
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3908984661102295,
      "learning_rate": 5.395073556516233e-05,
      "loss": 2.9454,
      "step": 185732
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6042211055755615,
      "learning_rate": 5.394839526212147e-05,
      "loss": 2.6705,
      "step": 185733
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.383917808532715,
      "learning_rate": 5.394605500482625e-05,
      "loss": 2.9859,
      "step": 185734
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.036494255065918,
      "learning_rate": 5.3943714793277006e-05,
      "loss": 3.1749,
      "step": 185735
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.341397762298584,
      "learning_rate": 5.3941374627474364e-05,
      "loss": 2.9816,
      "step": 185736
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6041040420532227,
      "learning_rate": 5.393903450741856e-05,
      "loss": 2.9862,
      "step": 185737
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0188546180725098,
      "learning_rate": 5.393669443311024e-05,
      "loss": 2.9174,
      "step": 185738
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.381124496459961,
      "learning_rate": 5.3934354404549716e-05,
      "loss": 2.7663,
      "step": 185739
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.769359827041626,
      "learning_rate": 5.393201442173747e-05,
      "loss": 3.2775,
      "step": 185740
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6876604557037354,
      "learning_rate": 5.392967448467379e-05,
      "loss": 3.2156,
      "step": 185741
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.740753412246704,
      "learning_rate": 5.3927334593359354e-05,
      "loss": 3.0528,
      "step": 185742
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9378669261932373,
      "learning_rate": 5.392499474779435e-05,
      "loss": 2.8975,
      "step": 185743
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8399710655212402,
      "learning_rate": 5.392265494797942e-05,
      "loss": 2.9425,
      "step": 185744
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.4514684677124023,
      "learning_rate": 5.392031519391492e-05,
      "loss": 3.0465,
      "step": 185745
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.678680896759033,
      "learning_rate": 5.39179754856013e-05,
      "loss": 2.7803,
      "step": 185746
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8615942001342773,
      "learning_rate": 5.3915635823038874e-05,
      "loss": 2.9875,
      "step": 185747
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.798525333404541,
      "learning_rate": 5.391329620622825e-05,
      "loss": 2.9453,
      "step": 185748
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.37691593170166,
      "learning_rate": 5.391095663516973e-05,
      "loss": 2.7225,
      "step": 185749
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.004180908203125,
      "learning_rate": 5.390861710986385e-05,
      "loss": 3.0351,
      "step": 185750
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.77675199508667,
      "learning_rate": 5.3906277630311036e-05,
      "loss": 3.0702,
      "step": 185751
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.427093029022217,
      "learning_rate": 5.390393819651162e-05,
      "loss": 2.9299,
      "step": 185752
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.00678014755249,
      "learning_rate": 5.390159880846614e-05,
      "loss": 2.9524,
      "step": 185753
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.82571268081665,
      "learning_rate": 5.389925946617503e-05,
      "loss": 2.5915,
      "step": 185754
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7504587173461914,
      "learning_rate": 5.389692016963861e-05,
      "loss": 2.9863,
      "step": 185755
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.420609951019287,
      "learning_rate": 5.38945809188575e-05,
      "loss": 2.6896,
      "step": 185756
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.674018621444702,
      "learning_rate": 5.389224171383202e-05,
      "loss": 2.8375,
      "step": 185757
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8600804805755615,
      "learning_rate": 5.3889902554562506e-05,
      "loss": 3.1271,
      "step": 185758
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.1670379638671875,
      "learning_rate": 5.3887563441049616e-05,
      "loss": 2.6935,
      "step": 185759
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.8344998359680176,
      "learning_rate": 5.38852243732936e-05,
      "loss": 2.9,
      "step": 185760
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3736557960510254,
      "learning_rate": 5.3882885351295013e-05,
      "loss": 3.0219,
      "step": 185761
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6003823280334473,
      "learning_rate": 5.3880546375054255e-05,
      "loss": 2.5343,
      "step": 185762
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7218151092529297,
      "learning_rate": 5.387820744457176e-05,
      "loss": 2.8885,
      "step": 185763
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9058990478515625,
      "learning_rate": 5.38758685598479e-05,
      "loss": 2.9409,
      "step": 185764
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.3809921741485596,
      "learning_rate": 5.38735297208832e-05,
      "loss": 2.9484,
      "step": 185765
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5332930088043213,
      "learning_rate": 5.3871190927678e-05,
      "loss": 2.8793,
      "step": 185766
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.756030559539795,
      "learning_rate": 5.386885218023286e-05,
      "loss": 2.8835,
      "step": 185767
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.759887933731079,
      "learning_rate": 5.386651347854815e-05,
      "loss": 2.8945,
      "step": 185768
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.559044599533081,
      "learning_rate": 5.3864174822624196e-05,
      "loss": 2.9138,
      "step": 185769
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.275404691696167,
      "learning_rate": 5.386183621246164e-05,
      "loss": 3.0843,
      "step": 185770
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.58593487739563,
      "learning_rate": 5.3859497648060815e-05,
      "loss": 3.0742,
      "step": 185771
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.387617588043213,
      "learning_rate": 5.385715912942208e-05,
      "loss": 3.0164,
      "step": 185772
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.7184412479400635,
      "learning_rate": 5.385482065654604e-05,
      "loss": 2.7698,
      "step": 185773
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.975480079650879,
      "learning_rate": 5.3852482229432925e-05,
      "loss": 2.8995,
      "step": 185774
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.732853651046753,
      "learning_rate": 5.385014384808337e-05,
      "loss": 3.0782,
      "step": 185775
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3525404930114746,
      "learning_rate": 5.384780551249771e-05,
      "loss": 2.9997,
      "step": 185776
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.6161417961120605,
      "learning_rate": 5.384546722267641e-05,
      "loss": 3.0626,
      "step": 185777
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6601431369781494,
      "learning_rate": 5.3843128978619796e-05,
      "loss": 3.1239,
      "step": 185778
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.568859338760376,
      "learning_rate": 5.384079078032848e-05,
      "loss": 2.9066,
      "step": 185779
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7797884941101074,
      "learning_rate": 5.383845262780272e-05,
      "loss": 2.9634,
      "step": 185780
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0438153743743896,
      "learning_rate": 5.3836114521043084e-05,
      "loss": 2.8821,
      "step": 185781
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.219578266143799,
      "learning_rate": 5.383377646005e-05,
      "loss": 2.9944,
      "step": 185782
    },
    {
      "epoch": 2.42,
      "grad_norm": 5.076645851135254,
      "learning_rate": 5.383143844482385e-05,
      "loss": 2.9948,
      "step": 185783
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7704381942749023,
      "learning_rate": 5.3829100475365016e-05,
      "loss": 3.0336,
      "step": 185784
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.767563581466675,
      "learning_rate": 5.382676255167408e-05,
      "loss": 2.7237,
      "step": 185785
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.4475154876708984,
      "learning_rate": 5.382442467375132e-05,
      "loss": 2.9543,
      "step": 185786
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.649275779724121,
      "learning_rate": 5.382208684159729e-05,
      "loss": 3.1775,
      "step": 185787
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6276371479034424,
      "learning_rate": 5.3819749055212416e-05,
      "loss": 2.8077,
      "step": 185788
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.459599494934082,
      "learning_rate": 5.381741131459709e-05,
      "loss": 3.07,
      "step": 185789
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.810316324234009,
      "learning_rate": 5.381507361975166e-05,
      "loss": 2.8971,
      "step": 185790
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9603006839752197,
      "learning_rate": 5.381273597067675e-05,
      "loss": 2.9333,
      "step": 185791
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3524062633514404,
      "learning_rate": 5.381039836737262e-05,
      "loss": 3.0412,
      "step": 185792
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.913499116897583,
      "learning_rate": 5.380806080983985e-05,
      "loss": 2.8636,
      "step": 185793
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9749274253845215,
      "learning_rate": 5.380572329807883e-05,
      "loss": 3.0165,
      "step": 185794
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.066349983215332,
      "learning_rate": 5.3803385832089964e-05,
      "loss": 2.8822,
      "step": 185795
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.958981513977051,
      "learning_rate": 5.380104841187361e-05,
      "loss": 2.938,
      "step": 185796
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6324801445007324,
      "learning_rate": 5.379871103743035e-05,
      "loss": 2.8796,
      "step": 185797
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.795769453048706,
      "learning_rate": 5.379637370876051e-05,
      "loss": 2.9368,
      "step": 185798
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6899139881134033,
      "learning_rate": 5.379403642586465e-05,
      "loss": 2.9498,
      "step": 185799
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.803467035293579,
      "learning_rate": 5.379169918874311e-05,
      "loss": 3.3722,
      "step": 185800
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.212985038757324,
      "learning_rate": 5.378936199739635e-05,
      "loss": 2.8574,
      "step": 185801
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.798576593399048,
      "learning_rate": 5.378702485182471e-05,
      "loss": 2.7211,
      "step": 185802
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7926952838897705,
      "learning_rate": 5.378468775202879e-05,
      "loss": 3.0682,
      "step": 185803
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7929704189300537,
      "learning_rate": 5.378235069800888e-05,
      "loss": 2.9303,
      "step": 185804
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.082022190093994,
      "learning_rate": 5.3780013689765525e-05,
      "loss": 3.0711,
      "step": 185805
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5288074016571045,
      "learning_rate": 5.377767672729915e-05,
      "loss": 3.0707,
      "step": 185806
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.8453478813171387,
      "learning_rate": 5.377533981061012e-05,
      "loss": 2.9699,
      "step": 185807
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.968641757965088,
      "learning_rate": 5.377300293969884e-05,
      "loss": 2.8593,
      "step": 185808
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6075599193573,
      "learning_rate": 5.377066611456587e-05,
      "loss": 3.0892,
      "step": 185809
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1278998851776123,
      "learning_rate": 5.3768329335211525e-05,
      "loss": 3.0423,
      "step": 185810
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.65614652633667,
      "learning_rate": 5.376599260163639e-05,
      "loss": 2.8818,
      "step": 185811
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9195897579193115,
      "learning_rate": 5.3763655913840765e-05,
      "loss": 2.9595,
      "step": 185812
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3708646297454834,
      "learning_rate": 5.376131927182512e-05,
      "loss": 2.9484,
      "step": 185813
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.4937305450439453,
      "learning_rate": 5.3758982675589856e-05,
      "loss": 3.1245,
      "step": 185814
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.6441149711608887,
      "learning_rate": 5.37566461251355e-05,
      "loss": 3.0417,
      "step": 185815
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9910659790039062,
      "learning_rate": 5.375430962046237e-05,
      "loss": 2.8361,
      "step": 185816
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.388728380203247,
      "learning_rate": 5.3751973161571e-05,
      "loss": 2.8938,
      "step": 185817
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2817890644073486,
      "learning_rate": 5.374963674846182e-05,
      "loss": 3.1681,
      "step": 185818
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.612981081008911,
      "learning_rate": 5.3747300381135214e-05,
      "loss": 2.9787,
      "step": 185819
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6890792846679688,
      "learning_rate": 5.374496405959159e-05,
      "loss": 2.8919,
      "step": 185820
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7662227153778076,
      "learning_rate": 5.374262778383147e-05,
      "loss": 2.9369,
      "step": 185821
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3377621173858643,
      "learning_rate": 5.3740291553855166e-05,
      "loss": 3.0246,
      "step": 185822
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.4671919345855713,
      "learning_rate": 5.37379553696633e-05,
      "loss": 2.7922,
      "step": 185823
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.689962387084961,
      "learning_rate": 5.3735619231256176e-05,
      "loss": 2.9214,
      "step": 185824
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6612703800201416,
      "learning_rate": 5.373328313863423e-05,
      "loss": 2.9815,
      "step": 185825
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9267256259918213,
      "learning_rate": 5.373094709179786e-05,
      "loss": 2.778,
      "step": 185826
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.6508405208587646,
      "learning_rate": 5.372861109074763e-05,
      "loss": 3.1087,
      "step": 185827
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6471917629241943,
      "learning_rate": 5.372627513548381e-05,
      "loss": 2.7002,
      "step": 185828
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6996042728424072,
      "learning_rate": 5.372393922600703e-05,
      "loss": 2.9161,
      "step": 185829
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2286593914031982,
      "learning_rate": 5.372160336231759e-05,
      "loss": 2.894,
      "step": 185830
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.6124558448791504,
      "learning_rate": 5.3719267544415955e-05,
      "loss": 2.7186,
      "step": 185831
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5076520442962646,
      "learning_rate": 5.3716931772302495e-05,
      "loss": 3.0451,
      "step": 185832
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.803260087966919,
      "learning_rate": 5.371459604597777e-05,
      "loss": 2.9602,
      "step": 185833
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7548587322235107,
      "learning_rate": 5.3712260365442094e-05,
      "loss": 3.0193,
      "step": 185834
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1832869052886963,
      "learning_rate": 5.370992473069602e-05,
      "loss": 2.8393,
      "step": 185835
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1365771293640137,
      "learning_rate": 5.370758914173985e-05,
      "loss": 2.9048,
      "step": 185836
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1446921825408936,
      "learning_rate": 5.370525359857415e-05,
      "loss": 3.2453,
      "step": 185837
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5257506370544434,
      "learning_rate": 5.370291810119929e-05,
      "loss": 3.0946,
      "step": 185838
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6612448692321777,
      "learning_rate": 5.370058264961573e-05,
      "loss": 2.7674,
      "step": 185839
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.665055274963379,
      "learning_rate": 5.3698247243823776e-05,
      "loss": 3.1443,
      "step": 185840
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.072622537612915,
      "learning_rate": 5.369591188382406e-05,
      "loss": 2.8864,
      "step": 185841
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.592475414276123,
      "learning_rate": 5.369357656961685e-05,
      "loss": 2.9365,
      "step": 185842
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.032578706741333,
      "learning_rate": 5.369124130120274e-05,
      "loss": 2.7763,
      "step": 185843
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.399550676345825,
      "learning_rate": 5.368890607858206e-05,
      "loss": 2.9073,
      "step": 185844
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.2873599529266357,
      "learning_rate": 5.3686570901755185e-05,
      "loss": 2.9977,
      "step": 185845
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.3003311157226562,
      "learning_rate": 5.368423577072272e-05,
      "loss": 2.9802,
      "step": 185846
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.265412330627441,
      "learning_rate": 5.368190068548498e-05,
      "loss": 3.0264,
      "step": 185847
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.904387950897217,
      "learning_rate": 5.3679565646042374e-05,
      "loss": 3.0801,
      "step": 185848
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.040107250213623,
      "learning_rate": 5.367723065239544e-05,
      "loss": 2.8256,
      "step": 185849
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5785350799560547,
      "learning_rate": 5.367489570454457e-05,
      "loss": 2.8911,
      "step": 185850
    },
    {
      "epoch": 2.42,
      "grad_norm": 5.225692272186279,
      "learning_rate": 5.36725608024901e-05,
      "loss": 2.8322,
      "step": 185851
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.559387445449829,
      "learning_rate": 5.3670225946232637e-05,
      "loss": 2.8672,
      "step": 185852
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.5168001651763916,
      "learning_rate": 5.3667891135772534e-05,
      "loss": 2.7788,
      "step": 185853
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.314138889312744,
      "learning_rate": 5.366555637111013e-05,
      "loss": 2.8384,
      "step": 185854
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.582038402557373,
      "learning_rate": 5.3663221652246055e-05,
      "loss": 2.8963,
      "step": 185855
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8651578426361084,
      "learning_rate": 5.366088697918062e-05,
      "loss": 2.8476,
      "step": 185856
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7288506031036377,
      "learning_rate": 5.365855235191417e-05,
      "loss": 2.9574,
      "step": 185857
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.903057098388672,
      "learning_rate": 5.365621777044736e-05,
      "loss": 3.1387,
      "step": 185858
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.461782455444336,
      "learning_rate": 5.3653883234780414e-05,
      "loss": 3.073,
      "step": 185859
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.696873188018799,
      "learning_rate": 5.365154874491393e-05,
      "loss": 3.0479,
      "step": 185860
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.931833505630493,
      "learning_rate": 5.364921430084831e-05,
      "loss": 2.977,
      "step": 185861
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.564793109893799,
      "learning_rate": 5.3646879902583915e-05,
      "loss": 3.1967,
      "step": 185862
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.406278133392334,
      "learning_rate": 5.3644545550121154e-05,
      "loss": 3.1469,
      "step": 185863
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.114455223083496,
      "learning_rate": 5.364221124346059e-05,
      "loss": 2.8457,
      "step": 185864
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8739821910858154,
      "learning_rate": 5.363987698260252e-05,
      "loss": 3.0418,
      "step": 185865
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3459739685058594,
      "learning_rate": 5.36375427675475e-05,
      "loss": 3.1019,
      "step": 185866
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2052531242370605,
      "learning_rate": 5.3635208598295955e-05,
      "loss": 2.9555,
      "step": 185867
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5857772827148438,
      "learning_rate": 5.3632874474848264e-05,
      "loss": 2.8016,
      "step": 185868
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.199201822280884,
      "learning_rate": 5.363054039720477e-05,
      "loss": 2.9366,
      "step": 185869
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5611815452575684,
      "learning_rate": 5.36282063653661e-05,
      "loss": 2.7466,
      "step": 185870
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.225980758666992,
      "learning_rate": 5.3625872379332526e-05,
      "loss": 3.0274,
      "step": 185871
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.343019485473633,
      "learning_rate": 5.362353843910461e-05,
      "loss": 2.8813,
      "step": 185872
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1074678897857666,
      "learning_rate": 5.362120454468275e-05,
      "loss": 2.9717,
      "step": 185873
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.328876256942749,
      "learning_rate": 5.361887069606735e-05,
      "loss": 2.9099,
      "step": 185874
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.660764217376709,
      "learning_rate": 5.361653689325878e-05,
      "loss": 3.0025,
      "step": 185875
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.021327257156372,
      "learning_rate": 5.361420313625763e-05,
      "loss": 2.9189,
      "step": 185876
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.188697576522827,
      "learning_rate": 5.361186942506417e-05,
      "loss": 2.9835,
      "step": 185877
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0086493492126465,
      "learning_rate": 5.3609535759678966e-05,
      "loss": 2.7896,
      "step": 185878
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.456577777862549,
      "learning_rate": 5.3607202140102424e-05,
      "loss": 2.9654,
      "step": 185879
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.48443865776062,
      "learning_rate": 5.360486856633497e-05,
      "loss": 2.9695,
      "step": 185880
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.351468324661255,
      "learning_rate": 5.3602535038376904e-05,
      "loss": 2.8228,
      "step": 185881
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5056519508361816,
      "learning_rate": 5.3600201556228896e-05,
      "loss": 3.134,
      "step": 185882
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.870194673538208,
      "learning_rate": 5.3597868119891174e-05,
      "loss": 2.7692,
      "step": 185883
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.4760541915893555,
      "learning_rate": 5.359553472936431e-05,
      "loss": 3.0413,
      "step": 185884
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0999600887298584,
      "learning_rate": 5.3593201384648696e-05,
      "loss": 2.8987,
      "step": 185885
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.309648275375366,
      "learning_rate": 5.3590868085744774e-05,
      "loss": 2.8745,
      "step": 185886
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1889586448669434,
      "learning_rate": 5.358853483265287e-05,
      "loss": 2.8299,
      "step": 185887
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.401154041290283,
      "learning_rate": 5.3586201625373595e-05,
      "loss": 2.8745,
      "step": 185888
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9856064319610596,
      "learning_rate": 5.358386846390723e-05,
      "loss": 2.7803,
      "step": 185889
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9656715393066406,
      "learning_rate": 5.3581535348254355e-05,
      "loss": 3.0166,
      "step": 185890
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.04954195022583,
      "learning_rate": 5.35792022784153e-05,
      "loss": 2.668,
      "step": 185891
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0876991748809814,
      "learning_rate": 5.357686925439053e-05,
      "loss": 2.7548,
      "step": 185892
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9937055110931396,
      "learning_rate": 5.3574536276180376e-05,
      "loss": 2.9638,
      "step": 185893
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8242664337158203,
      "learning_rate": 5.357220334378548e-05,
      "loss": 2.7779,
      "step": 185894
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.4325449466705322,
      "learning_rate": 5.3569870457206065e-05,
      "loss": 2.7881,
      "step": 185895
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.434793472290039,
      "learning_rate": 5.3567537616442735e-05,
      "loss": 3.1425,
      "step": 185896
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.964834451675415,
      "learning_rate": 5.356520482149589e-05,
      "loss": 2.9777,
      "step": 185897
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7779381275177,
      "learning_rate": 5.356287207236589e-05,
      "loss": 3.0252,
      "step": 185898
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.5396060943603516,
      "learning_rate": 5.356053936905311e-05,
      "loss": 2.9167,
      "step": 185899
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3070597648620605,
      "learning_rate": 5.355820671155818e-05,
      "loss": 2.7073,
      "step": 185900
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.093319892883301,
      "learning_rate": 5.355587409988137e-05,
      "loss": 3.1645,
      "step": 185901
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.581491470336914,
      "learning_rate": 5.355354153402324e-05,
      "loss": 2.8662,
      "step": 185902
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.603828191757202,
      "learning_rate": 5.355120901398409e-05,
      "loss": 2.6716,
      "step": 185903
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.61806583404541,
      "learning_rate": 5.354887653976455e-05,
      "loss": 2.8689,
      "step": 185904
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.868028163909912,
      "learning_rate": 5.354654411136476e-05,
      "loss": 2.7977,
      "step": 185905
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.636340618133545,
      "learning_rate": 5.3544211728785425e-05,
      "loss": 3.102,
      "step": 185906
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7582571506500244,
      "learning_rate": 5.3541879392026796e-05,
      "loss": 2.8779,
      "step": 185907
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5624899864196777,
      "learning_rate": 5.3539547101089486e-05,
      "loss": 2.8837,
      "step": 185908
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5932812690734863,
      "learning_rate": 5.3537214855973724e-05,
      "loss": 2.9095,
      "step": 185909
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.4064431190490723,
      "learning_rate": 5.353488265668017e-05,
      "loss": 2.9152,
      "step": 185910
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9081926345825195,
      "learning_rate": 5.3532550503209026e-05,
      "loss": 2.8422,
      "step": 185911
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.635946035385132,
      "learning_rate": 5.353021839556086e-05,
      "loss": 3.1426,
      "step": 185912
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.833534002304077,
      "learning_rate": 5.3527886333736044e-05,
      "loss": 3.1286,
      "step": 185913
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.105243444442749,
      "learning_rate": 5.352555431773511e-05,
      "loss": 3.068,
      "step": 185914
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6691184043884277,
      "learning_rate": 5.3523222347558384e-05,
      "loss": 2.9527,
      "step": 185915
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.4481565952301025,
      "learning_rate": 5.3520890423206463e-05,
      "loss": 3.1985,
      "step": 185916
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.309354782104492,
      "learning_rate": 5.351855854467949e-05,
      "loss": 2.9401,
      "step": 185917
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.604836940765381,
      "learning_rate": 5.3516226711978164e-05,
      "loss": 2.8841,
      "step": 185918
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7929399013519287,
      "learning_rate": 5.3513894925102774e-05,
      "loss": 3.1448,
      "step": 185919
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.8475332260131836,
      "learning_rate": 5.3511563184053866e-05,
      "loss": 2.9311,
      "step": 185920
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7766168117523193,
      "learning_rate": 5.3509231488831697e-05,
      "loss": 2.6621,
      "step": 185921
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0152323246002197,
      "learning_rate": 5.350689983943694e-05,
      "loss": 2.8703,
      "step": 185922
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.037912130355835,
      "learning_rate": 5.3504568235869885e-05,
      "loss": 2.822,
      "step": 185923
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1534156799316406,
      "learning_rate": 5.3502236678130975e-05,
      "loss": 2.8984,
      "step": 185924
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7107386589050293,
      "learning_rate": 5.3499905166220566e-05,
      "loss": 2.8882,
      "step": 185925
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8173587322235107,
      "learning_rate": 5.3497573700139274e-05,
      "loss": 2.9422,
      "step": 185926
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.692031145095825,
      "learning_rate": 5.3495242279887384e-05,
      "loss": 2.8481,
      "step": 185927
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.38362717628479,
      "learning_rate": 5.3492910905465405e-05,
      "loss": 2.8536,
      "step": 185928
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3676061630249023,
      "learning_rate": 5.3490579576873796e-05,
      "loss": 2.8395,
      "step": 185929
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7290380001068115,
      "learning_rate": 5.3488248294112925e-05,
      "loss": 3.0748,
      "step": 185930
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5198702812194824,
      "learning_rate": 5.348591705718312e-05,
      "loss": 2.8562,
      "step": 185931
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.4887044429779053,
      "learning_rate": 5.3483585866085066e-05,
      "loss": 3.0619,
      "step": 185932
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.603933334350586,
      "learning_rate": 5.348125472081898e-05,
      "loss": 2.7584,
      "step": 185933
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.3003952503204346,
      "learning_rate": 5.347892362138546e-05,
      "loss": 3.2083,
      "step": 185934
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3205361366271973,
      "learning_rate": 5.347659256778485e-05,
      "loss": 3.0084,
      "step": 185935
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.745971441268921,
      "learning_rate": 5.347426156001754e-05,
      "loss": 2.7821,
      "step": 185936
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9858787059783936,
      "learning_rate": 5.347193059808407e-05,
      "loss": 2.6926,
      "step": 185937
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5378732681274414,
      "learning_rate": 5.3469599681984835e-05,
      "loss": 2.9443,
      "step": 185938
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.103421211242676,
      "learning_rate": 5.3467268811720165e-05,
      "loss": 2.8386,
      "step": 185939
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.620938777923584,
      "learning_rate": 5.346493798729067e-05,
      "loss": 3.098,
      "step": 185940
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7267773151397705,
      "learning_rate": 5.3462607208696705e-05,
      "loss": 2.7488,
      "step": 185941
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.02549409866333,
      "learning_rate": 5.346027647593861e-05,
      "loss": 2.88,
      "step": 185942
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8254830837249756,
      "learning_rate": 5.345794578901699e-05,
      "loss": 2.9054,
      "step": 185943
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.02232027053833,
      "learning_rate": 5.345561514793209e-05,
      "loss": 2.9204,
      "step": 185944
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.733039140701294,
      "learning_rate": 5.345328455268454e-05,
      "loss": 2.9119,
      "step": 185945
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.187777280807495,
      "learning_rate": 5.3450954003274685e-05,
      "loss": 2.9336,
      "step": 185946
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.3532204627990723,
      "learning_rate": 5.3448623499702925e-05,
      "loss": 3.0409,
      "step": 185947
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.048267126083374,
      "learning_rate": 5.344629304196967e-05,
      "loss": 3.015,
      "step": 185948
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.696323871612549,
      "learning_rate": 5.3443962630075474e-05,
      "loss": 2.8805,
      "step": 185949
    },
    {
      "epoch": 2.42,
      "grad_norm": 5.206849098205566,
      "learning_rate": 5.344163226402062e-05,
      "loss": 2.8393,
      "step": 185950
    },
    {
      "epoch": 2.42,
      "grad_norm": 6.105905055999756,
      "learning_rate": 5.3439301943805694e-05,
      "loss": 2.6647,
      "step": 185951
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.0748138427734375,
      "learning_rate": 5.343697166943106e-05,
      "loss": 3.0356,
      "step": 185952
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.6824212074279785,
      "learning_rate": 5.343464144089713e-05,
      "loss": 2.7641,
      "step": 185953
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5613417625427246,
      "learning_rate": 5.3432311258204296e-05,
      "loss": 2.9694,
      "step": 185954
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.415459394454956,
      "learning_rate": 5.3429981121353126e-05,
      "loss": 2.8686,
      "step": 185955
    },
    {
      "epoch": 2.42,
      "grad_norm": 6.686030864715576,
      "learning_rate": 5.342765103034389e-05,
      "loss": 2.7605,
      "step": 185956
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.415018320083618,
      "learning_rate": 5.342532098517721e-05,
      "loss": 3.0047,
      "step": 185957
    },
    {
      "epoch": 2.42,
      "grad_norm": 5.23142671585083,
      "learning_rate": 5.34229909858534e-05,
      "loss": 2.8776,
      "step": 185958
    },
    {
      "epoch": 2.42,
      "grad_norm": 5.47967004776001,
      "learning_rate": 5.3420661032372917e-05,
      "loss": 3.0684,
      "step": 185959
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.012955665588379,
      "learning_rate": 5.341833112473609e-05,
      "loss": 2.9513,
      "step": 185960
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.650585412979126,
      "learning_rate": 5.341600126294353e-05,
      "loss": 2.952,
      "step": 185961
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.778921365737915,
      "learning_rate": 5.341367144699553e-05,
      "loss": 2.9458,
      "step": 185962
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3270673751831055,
      "learning_rate": 5.341134167689266e-05,
      "loss": 3.3003,
      "step": 185963
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.820429801940918,
      "learning_rate": 5.340901195263518e-05,
      "loss": 2.8383,
      "step": 185964
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.5661940574645996,
      "learning_rate": 5.34066822742238e-05,
      "loss": 2.8071,
      "step": 185965
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.292130470275879,
      "learning_rate": 5.340435264165861e-05,
      "loss": 2.8505,
      "step": 185966
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1102232933044434,
      "learning_rate": 5.340202305494028e-05,
      "loss": 2.964,
      "step": 185967
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.664341688156128,
      "learning_rate": 5.339969351406908e-05,
      "loss": 2.876,
      "step": 185968
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5085413455963135,
      "learning_rate": 5.339736401904563e-05,
      "loss": 3.1306,
      "step": 185969
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.423828125,
      "learning_rate": 5.339503456987018e-05,
      "loss": 3.1942,
      "step": 185970
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.237924575805664,
      "learning_rate": 5.339270516654338e-05,
      "loss": 2.949,
      "step": 185971
    },
    {
      "epoch": 2.42,
      "grad_norm": 5.851130485534668,
      "learning_rate": 5.339037580906541e-05,
      "loss": 2.8262,
      "step": 185972
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3011250495910645,
      "learning_rate": 5.3388046497436866e-05,
      "loss": 2.6419,
      "step": 185973
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.886107921600342,
      "learning_rate": 5.3385717231658076e-05,
      "loss": 3.2648,
      "step": 185974
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.859586715698242,
      "learning_rate": 5.3383388011729646e-05,
      "loss": 2.8453,
      "step": 185975
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.425884246826172,
      "learning_rate": 5.33810588376518e-05,
      "loss": 2.9274,
      "step": 185976
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.769259214401245,
      "learning_rate": 5.337872970942522e-05,
      "loss": 2.8422,
      "step": 185977
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1202127933502197,
      "learning_rate": 5.337640062705002e-05,
      "loss": 3.0562,
      "step": 185978
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.4962964057922363,
      "learning_rate": 5.337407159052691e-05,
      "loss": 3.0425,
      "step": 185979
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.898587942123413,
      "learning_rate": 5.337174259985613e-05,
      "loss": 3.0647,
      "step": 185980
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.377159595489502,
      "learning_rate": 5.336941365503826e-05,
      "loss": 2.8187,
      "step": 185981
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.6295855045318604,
      "learning_rate": 5.336708475607362e-05,
      "loss": 2.8413,
      "step": 185982
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.609987497329712,
      "learning_rate": 5.336475590296283e-05,
      "loss": 2.7947,
      "step": 185983
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.897150993347168,
      "learning_rate": 5.3362427095706025e-05,
      "loss": 3.0923,
      "step": 185984
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.641310930252075,
      "learning_rate": 5.336009833430388e-05,
      "loss": 2.9647,
      "step": 185985
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2705252170562744,
      "learning_rate": 5.335776961875669e-05,
      "loss": 2.7006,
      "step": 185986
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.8997719287872314,
      "learning_rate": 5.335544094906501e-05,
      "loss": 2.75,
      "step": 185987
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8158984184265137,
      "learning_rate": 5.335311232522915e-05,
      "loss": 2.9051,
      "step": 185988
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.421177864074707,
      "learning_rate": 5.3350783747249746e-05,
      "loss": 2.8533,
      "step": 185989
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.4253270626068115,
      "learning_rate": 5.334845521512693e-05,
      "loss": 2.8455,
      "step": 185990
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6673107147216797,
      "learning_rate": 5.334612672886136e-05,
      "loss": 2.8433,
      "step": 185991
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.720452308654785,
      "learning_rate": 5.334379828845334e-05,
      "loss": 3.062,
      "step": 185992
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.4755756855010986,
      "learning_rate": 5.334146989390341e-05,
      "loss": 2.9274,
      "step": 185993
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.743561029434204,
      "learning_rate": 5.3339141545211924e-05,
      "loss": 2.8116,
      "step": 185994
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.69966459274292,
      "learning_rate": 5.333681324237946e-05,
      "loss": 2.8604,
      "step": 185995
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.440826654434204,
      "learning_rate": 5.333448498540621e-05,
      "loss": 2.584,
      "step": 185996
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.511776447296143,
      "learning_rate": 5.333215677429281e-05,
      "loss": 2.7321,
      "step": 185997
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5939817428588867,
      "learning_rate": 5.332982860903956e-05,
      "loss": 3.0348,
      "step": 185998
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2589123249053955,
      "learning_rate": 5.332750048964699e-05,
      "loss": 2.9899,
      "step": 185999
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0759377479553223,
      "learning_rate": 5.332517241611544e-05,
      "loss": 2.9414,
      "step": 186000
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.556267499923706,
      "learning_rate": 5.3322844388445526e-05,
      "loss": 3.003,
      "step": 186001
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3408870697021484,
      "learning_rate": 5.3320516406637434e-05,
      "loss": 3.0184,
      "step": 186002
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7409956455230713,
      "learning_rate": 5.331818847069179e-05,
      "loss": 2.9073,
      "step": 186003
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9839792251586914,
      "learning_rate": 5.331586058060886e-05,
      "loss": 3.0914,
      "step": 186004
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.730949640274048,
      "learning_rate": 5.331353273638924e-05,
      "loss": 2.7537,
      "step": 186005
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.645198345184326,
      "learning_rate": 5.33112049380332e-05,
      "loss": 2.8887,
      "step": 186006
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.400778293609619,
      "learning_rate": 5.330887718554138e-05,
      "loss": 2.9079,
      "step": 186007
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1329829692840576,
      "learning_rate": 5.3306549478914064e-05,
      "loss": 2.6896,
      "step": 186008
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6190288066864014,
      "learning_rate": 5.330422181815173e-05,
      "loss": 2.9854,
      "step": 186009
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.05853009223938,
      "learning_rate": 5.33018942032547e-05,
      "loss": 2.9774,
      "step": 186010
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.998140335083008,
      "learning_rate": 5.3299566634223625e-05,
      "loss": 3.0075,
      "step": 186011
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6910555362701416,
      "learning_rate": 5.329723911105872e-05,
      "loss": 2.9419,
      "step": 186012
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.547410488128662,
      "learning_rate": 5.329491163376056e-05,
      "loss": 2.834,
      "step": 186013
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1299095153808594,
      "learning_rate": 5.329258420232958e-05,
      "loss": 3.0579,
      "step": 186014
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.37430739402771,
      "learning_rate": 5.329025681676614e-05,
      "loss": 3.0601,
      "step": 186015
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9139440059661865,
      "learning_rate": 5.328792947707065e-05,
      "loss": 2.7689,
      "step": 186016
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.643319845199585,
      "learning_rate": 5.328560218324363e-05,
      "loss": 2.8783,
      "step": 186017
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.818326234817505,
      "learning_rate": 5.328327493528541e-05,
      "loss": 2.8164,
      "step": 186018
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8240177631378174,
      "learning_rate": 5.328094773319658e-05,
      "loss": 3.0114,
      "step": 186019
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.959808588027954,
      "learning_rate": 5.327862057697745e-05,
      "loss": 3.0709,
      "step": 186020
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0256972312927246,
      "learning_rate": 5.327629346662843e-05,
      "loss": 3.0343,
      "step": 186021
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.552644968032837,
      "learning_rate": 5.3273966402150046e-05,
      "loss": 2.8983,
      "step": 186022
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.9574077129364014,
      "learning_rate": 5.3271639383542705e-05,
      "loss": 2.8729,
      "step": 186023
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.697139024734497,
      "learning_rate": 5.326931241080677e-05,
      "loss": 3.2243,
      "step": 186024
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1739180088043213,
      "learning_rate": 5.326698548394278e-05,
      "loss": 2.8649,
      "step": 186025
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.894678831100464,
      "learning_rate": 5.3264658602951124e-05,
      "loss": 3.0988,
      "step": 186026
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.622446060180664,
      "learning_rate": 5.326233176783218e-05,
      "loss": 2.7828,
      "step": 186027
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.116409778594971,
      "learning_rate": 5.326000497858647e-05,
      "loss": 2.7802,
      "step": 186028
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.7174880504608154,
      "learning_rate": 5.3257678235214365e-05,
      "loss": 2.7182,
      "step": 186029
    },
    {
      "epoch": 2.42,
      "grad_norm": 5.018398284912109,
      "learning_rate": 5.3255351537716265e-05,
      "loss": 2.9453,
      "step": 186030
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0737907886505127,
      "learning_rate": 5.325302488609273e-05,
      "loss": 2.8189,
      "step": 186031
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.830409288406372,
      "learning_rate": 5.32506982803441e-05,
      "loss": 3.0095,
      "step": 186032
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.819236993789673,
      "learning_rate": 5.324837172047075e-05,
      "loss": 2.9788,
      "step": 186033
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.069602012634277,
      "learning_rate": 5.324604520647327e-05,
      "loss": 2.8414,
      "step": 186034
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.7181413173675537,
      "learning_rate": 5.3243718738351914e-05,
      "loss": 3.12,
      "step": 186035
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.391880750656128,
      "learning_rate": 5.32413923161073e-05,
      "loss": 2.9859,
      "step": 186036
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6187760829925537,
      "learning_rate": 5.323906593973979e-05,
      "loss": 2.9164,
      "step": 186037
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.26802921295166,
      "learning_rate": 5.323673960924974e-05,
      "loss": 3.1105,
      "step": 186038
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.256246328353882,
      "learning_rate": 5.3234413324637603e-05,
      "loss": 3.0021,
      "step": 186039
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.689086675643921,
      "learning_rate": 5.323208708590393e-05,
      "loss": 2.8968,
      "step": 186040
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7547175884246826,
      "learning_rate": 5.322976089304899e-05,
      "loss": 3.1262,
      "step": 186041
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.52066707611084,
      "learning_rate": 5.3227434746073346e-05,
      "loss": 3.0476,
      "step": 186042
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2401394844055176,
      "learning_rate": 5.322510864497731e-05,
      "loss": 2.9516,
      "step": 186043
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5030312538146973,
      "learning_rate": 5.322278258976157e-05,
      "loss": 2.7794,
      "step": 186044
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.658975124359131,
      "learning_rate": 5.32204565804262e-05,
      "loss": 3.0399,
      "step": 186045
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6402957439422607,
      "learning_rate": 5.321813061697185e-05,
      "loss": 2.7982,
      "step": 186046
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.821272611618042,
      "learning_rate": 5.3215804699398876e-05,
      "loss": 3.1632,
      "step": 186047
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.05222487449646,
      "learning_rate": 5.3213478827707804e-05,
      "loss": 3.0757,
      "step": 186048
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.4395272731781006,
      "learning_rate": 5.321115300189892e-05,
      "loss": 3.0667,
      "step": 186049
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.7530999183654785,
      "learning_rate": 5.3208827221972904e-05,
      "loss": 3.0416,
      "step": 186050
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9978408813476562,
      "learning_rate": 5.3206501487929855e-05,
      "loss": 2.8001,
      "step": 186051
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.7221922874450684,
      "learning_rate": 5.320417579977047e-05,
      "loss": 2.8683,
      "step": 186052
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.836912155151367,
      "learning_rate": 5.320185015749501e-05,
      "loss": 3.07,
      "step": 186053
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1348040103912354,
      "learning_rate": 5.3199524561104044e-05,
      "loss": 3.0877,
      "step": 186054
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.475045919418335,
      "learning_rate": 5.319719901059791e-05,
      "loss": 2.8098,
      "step": 186055
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.947138786315918,
      "learning_rate": 5.319487350597718e-05,
      "loss": 2.8944,
      "step": 186056
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.4779982566833496,
      "learning_rate": 5.319254804724207e-05,
      "loss": 2.9388,
      "step": 186057
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.2747275829315186,
      "learning_rate": 5.3190222634393184e-05,
      "loss": 3.2305,
      "step": 186058
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.702653646469116,
      "learning_rate": 5.31878972674308e-05,
      "loss": 2.8167,
      "step": 186059
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8174781799316406,
      "learning_rate": 5.3185571946355545e-05,
      "loss": 2.9507,
      "step": 186060
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.918191909790039,
      "learning_rate": 5.318324667116768e-05,
      "loss": 2.8171,
      "step": 186061
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.822136163711548,
      "learning_rate": 5.3180921441867844e-05,
      "loss": 3.1439,
      "step": 186062
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.067345142364502,
      "learning_rate": 5.317859625845616e-05,
      "loss": 2.9507,
      "step": 186063
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.951369285583496,
      "learning_rate": 5.317627112093334e-05,
      "loss": 2.4003,
      "step": 186064
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.679706335067749,
      "learning_rate": 5.317394602929962e-05,
      "loss": 2.7763,
      "step": 186065
    },
    {
      "epoch": 2.42,
      "grad_norm": 5.110658168792725,
      "learning_rate": 5.3171620983555616e-05,
      "loss": 3.0277,
      "step": 186066
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.97815203666687,
      "learning_rate": 5.316929598370158e-05,
      "loss": 2.7425,
      "step": 186067
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.483232259750366,
      "learning_rate": 5.316697102973819e-05,
      "loss": 2.7803,
      "step": 186068
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.76676344871521,
      "learning_rate": 5.3164646121665565e-05,
      "loss": 3.04,
      "step": 186069
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.910770893096924,
      "learning_rate": 5.3162321259484364e-05,
      "loss": 3.159,
      "step": 186070
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5325679779052734,
      "learning_rate": 5.315999644319489e-05,
      "loss": 2.8754,
      "step": 186071
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2792091369628906,
      "learning_rate": 5.315767167279766e-05,
      "loss": 3.0199,
      "step": 186072
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.773089647293091,
      "learning_rate": 5.315534694829307e-05,
      "loss": 3.162,
      "step": 186073
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3368492126464844,
      "learning_rate": 5.3153022269681654e-05,
      "loss": 2.8855,
      "step": 186074
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.55641770362854,
      "learning_rate": 5.315069763696364e-05,
      "loss": 3.0797,
      "step": 186075
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.028160333633423,
      "learning_rate": 5.31483730501396e-05,
      "loss": 3.1988,
      "step": 186076
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6894381046295166,
      "learning_rate": 5.3146048509209896e-05,
      "loss": 3.0482,
      "step": 186077
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.2277026176452637,
      "learning_rate": 5.314372401417507e-05,
      "loss": 3.0692,
      "step": 186078
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.871058464050293,
      "learning_rate": 5.31413995650354e-05,
      "loss": 2.8087,
      "step": 186079
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.403591632843018,
      "learning_rate": 5.3139075161791586e-05,
      "loss": 2.8118,
      "step": 186080
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0923662185668945,
      "learning_rate": 5.3136750804443696e-05,
      "loss": 2.9039,
      "step": 186081
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.153729200363159,
      "learning_rate": 5.313442649299242e-05,
      "loss": 3.0338,
      "step": 186082
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2101590633392334,
      "learning_rate": 5.313210222743807e-05,
      "loss": 2.8024,
      "step": 186083
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5934951305389404,
      "learning_rate": 5.3129778007781164e-05,
      "loss": 2.8691,
      "step": 186084
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8862979412078857,
      "learning_rate": 5.3127453834022014e-05,
      "loss": 3.1686,
      "step": 186085
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.310307025909424,
      "learning_rate": 5.3125129706161316e-05,
      "loss": 2.8157,
      "step": 186086
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.539691209793091,
      "learning_rate": 5.312280562419914e-05,
      "loss": 3.0386,
      "step": 186087
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.978184461593628,
      "learning_rate": 5.312048158813618e-05,
      "loss": 3.0365,
      "step": 186088
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6860203742980957,
      "learning_rate": 5.31181575979727e-05,
      "loss": 2.7049,
      "step": 186089
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.751941442489624,
      "learning_rate": 5.311583365370927e-05,
      "loss": 2.978,
      "step": 186090
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.857609987258911,
      "learning_rate": 5.311350975534623e-05,
      "loss": 2.7089,
      "step": 186091
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7738261222839355,
      "learning_rate": 5.3111185902884165e-05,
      "loss": 3.2085,
      "step": 186092
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8577866554260254,
      "learning_rate": 5.310886209632329e-05,
      "loss": 3.0535,
      "step": 186093
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8918120861053467,
      "learning_rate": 5.310653833566416e-05,
      "loss": 2.814,
      "step": 186094
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.823394775390625,
      "learning_rate": 5.310421462090715e-05,
      "loss": 2.9029,
      "step": 186095
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7854530811309814,
      "learning_rate": 5.310189095205278e-05,
      "loss": 2.778,
      "step": 186096
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1683270931243896,
      "learning_rate": 5.309956732910137e-05,
      "loss": 2.8981,
      "step": 186097
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.734670877456665,
      "learning_rate": 5.3097243752053464e-05,
      "loss": 2.8262,
      "step": 186098
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.022850513458252,
      "learning_rate": 5.3094920220909445e-05,
      "loss": 2.9428,
      "step": 186099
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.048663854598999,
      "learning_rate": 5.3092596735669734e-05,
      "loss": 2.7831,
      "step": 186100
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5252110958099365,
      "learning_rate": 5.309027329633471e-05,
      "loss": 2.7014,
      "step": 186101
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.4655556678771973,
      "learning_rate": 5.308794990290496e-05,
      "loss": 2.9767,
      "step": 186102
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.271310329437256,
      "learning_rate": 5.308562655538072e-05,
      "loss": 2.8198,
      "step": 186103
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.6521270275115967,
      "learning_rate": 5.3083303253762596e-05,
      "loss": 2.9934,
      "step": 186104
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5076262950897217,
      "learning_rate": 5.308097999805095e-05,
      "loss": 2.8328,
      "step": 186105
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.19034481048584,
      "learning_rate": 5.307865678824612e-05,
      "loss": 2.7931,
      "step": 186106
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.643744707107544,
      "learning_rate": 5.307633362434873e-05,
      "loss": 2.716,
      "step": 186107
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5210654735565186,
      "learning_rate": 5.307401050635912e-05,
      "loss": 3.0295,
      "step": 186108
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5319559574127197,
      "learning_rate": 5.307168743427762e-05,
      "loss": 2.8714,
      "step": 186109
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.8281354904174805,
      "learning_rate": 5.306936440810483e-05,
      "loss": 3.0458,
      "step": 186110
    },
    {
      "epoch": 2.42,
      "grad_norm": 5.51629638671875,
      "learning_rate": 5.306704142784108e-05,
      "loss": 2.946,
      "step": 186111
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.375740051269531,
      "learning_rate": 5.306471849348678e-05,
      "loss": 2.8005,
      "step": 186112
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.5649900436401367,
      "learning_rate": 5.306239560504249e-05,
      "loss": 2.94,
      "step": 186113
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9975218772888184,
      "learning_rate": 5.306007276250857e-05,
      "loss": 2.7764,
      "step": 186114
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8744428157806396,
      "learning_rate": 5.305774996588536e-05,
      "loss": 3.0368,
      "step": 186115
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.821070432662964,
      "learning_rate": 5.305542721517342e-05,
      "loss": 2.9925,
      "step": 186116
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.410684823989868,
      "learning_rate": 5.305310451037319e-05,
      "loss": 3.1214,
      "step": 186117
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2177720069885254,
      "learning_rate": 5.305078185148494e-05,
      "loss": 3.0526,
      "step": 186118
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8597452640533447,
      "learning_rate": 5.3048459238509285e-05,
      "loss": 3.2185,
      "step": 186119
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.821774482727051,
      "learning_rate": 5.304613667144651e-05,
      "loss": 2.9689,
      "step": 186120
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.4340500831604004,
      "learning_rate": 5.3043814150297215e-05,
      "loss": 2.615,
      "step": 186121
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.840936183929443,
      "learning_rate": 5.304149167506172e-05,
      "loss": 2.6868,
      "step": 186122
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.828686237335205,
      "learning_rate": 5.303916924574047e-05,
      "loss": 2.8153,
      "step": 186123
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5935745239257812,
      "learning_rate": 5.303684686233385e-05,
      "loss": 3.0438,
      "step": 186124
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.165597438812256,
      "learning_rate": 5.3034524524842405e-05,
      "loss": 3.0307,
      "step": 186125
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.4035251140594482,
      "learning_rate": 5.30322022332664e-05,
      "loss": 3.0133,
      "step": 186126
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.625463008880615,
      "learning_rate": 5.302987998760646e-05,
      "loss": 2.98,
      "step": 186127
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9125077724456787,
      "learning_rate": 5.302755778786286e-05,
      "loss": 3.0318,
      "step": 186128
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8658738136291504,
      "learning_rate": 5.3025235634036264e-05,
      "loss": 2.8693,
      "step": 186129
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.822714328765869,
      "learning_rate": 5.3022913526126776e-05,
      "loss": 3.1304,
      "step": 186130
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.810295581817627,
      "learning_rate": 5.302059146413505e-05,
      "loss": 2.9424,
      "step": 186131
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.863300323486328,
      "learning_rate": 5.3018269448061403e-05,
      "loss": 2.9062,
      "step": 186132
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.798980712890625,
      "learning_rate": 5.301594747790639e-05,
      "loss": 2.8751,
      "step": 186133
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.8836910724639893,
      "learning_rate": 5.30136255536703e-05,
      "loss": 2.7371,
      "step": 186134
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.978681802749634,
      "learning_rate": 5.301130367535379e-05,
      "loss": 2.8873,
      "step": 186135
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.713799476623535,
      "learning_rate": 5.300898184295698e-05,
      "loss": 2.9204,
      "step": 186136
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.564389228820801,
      "learning_rate": 5.300666005648056e-05,
      "loss": 3.1253,
      "step": 186137
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3954086303710938,
      "learning_rate": 5.300433831592479e-05,
      "loss": 2.7535,
      "step": 186138
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.653501272201538,
      "learning_rate": 5.300201662129021e-05,
      "loss": 3.0866,
      "step": 186139
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6558759212493896,
      "learning_rate": 5.2999694972577166e-05,
      "loss": 2.9152,
      "step": 186140
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1882989406585693,
      "learning_rate": 5.299737336978632e-05,
      "loss": 3.031,
      "step": 186141
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.5191824436187744,
      "learning_rate": 5.299505181291774e-05,
      "loss": 2.8501,
      "step": 186142
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6591200828552246,
      "learning_rate": 5.299273030197213e-05,
      "loss": 2.9564,
      "step": 186143
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0989432334899902,
      "learning_rate": 5.299040883694975e-05,
      "loss": 2.8397,
      "step": 186144
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1973445415496826,
      "learning_rate": 5.29880874178512e-05,
      "loss": 2.9584,
      "step": 186145
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6711251735687256,
      "learning_rate": 5.298576604467675e-05,
      "loss": 3.1172,
      "step": 186146
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.077808856964111,
      "learning_rate": 5.298344471742706e-05,
      "loss": 2.9065,
      "step": 186147
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8901426792144775,
      "learning_rate": 5.298112343610227e-05,
      "loss": 2.9582,
      "step": 186148
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2531533241271973,
      "learning_rate": 5.297880220070301e-05,
      "loss": 2.9807,
      "step": 186149
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.639181137084961,
      "learning_rate": 5.2976481011229575e-05,
      "loss": 2.8837,
      "step": 186150
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.444364309310913,
      "learning_rate": 5.297415986768257e-05,
      "loss": 2.9158,
      "step": 186151
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.2146520614624023,
      "learning_rate": 5.2971838770062234e-05,
      "loss": 2.9316,
      "step": 186152
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0295748710632324,
      "learning_rate": 5.296951771836926e-05,
      "loss": 3.1178,
      "step": 186153
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.464452266693115,
      "learning_rate": 5.2967196712603776e-05,
      "loss": 2.7611,
      "step": 186154
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6345455646514893,
      "learning_rate": 5.2964875752766424e-05,
      "loss": 2.8007,
      "step": 186155
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.746727228164673,
      "learning_rate": 5.296255483885746e-05,
      "loss": 2.7492,
      "step": 186156
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9580399990081787,
      "learning_rate": 5.296023397087753e-05,
      "loss": 3.1552,
      "step": 186157
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.236161708831787,
      "learning_rate": 5.295791314882686e-05,
      "loss": 2.8514,
      "step": 186158
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0388944149017334,
      "learning_rate": 5.295559237270615e-05,
      "loss": 2.9836,
      "step": 186159
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.398472785949707,
      "learning_rate": 5.2953271642515494e-05,
      "loss": 3.1005,
      "step": 186160
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.413548707962036,
      "learning_rate": 5.295095095825557e-05,
      "loss": 2.8117,
      "step": 186161
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.692469596862793,
      "learning_rate": 5.294863031992663e-05,
      "loss": 2.8674,
      "step": 186162
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.264859914779663,
      "learning_rate": 5.294630972752929e-05,
      "loss": 3.066,
      "step": 186163
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.600346088409424,
      "learning_rate": 5.294398918106384e-05,
      "loss": 2.7887,
      "step": 186164
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6411776542663574,
      "learning_rate": 5.2941668680530814e-05,
      "loss": 3.0289,
      "step": 186165
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7019877433776855,
      "learning_rate": 5.293934822593061e-05,
      "loss": 3.1388,
      "step": 186166
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3947854042053223,
      "learning_rate": 5.2937027817263634e-05,
      "loss": 3.1467,
      "step": 186167
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.570739984512329,
      "learning_rate": 5.2934707454530245e-05,
      "loss": 3.0349,
      "step": 186168
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.6789045333862305,
      "learning_rate": 5.2932387137731046e-05,
      "loss": 3.024,
      "step": 186169
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2586276531219482,
      "learning_rate": 5.2930066866866336e-05,
      "loss": 2.7599,
      "step": 186170
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2287135124206543,
      "learning_rate": 5.292774664193662e-05,
      "loss": 2.7953,
      "step": 186171
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.5926642417907715,
      "learning_rate": 5.2925426462942324e-05,
      "loss": 2.9966,
      "step": 186172
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.902484178543091,
      "learning_rate": 5.2923106329883844e-05,
      "loss": 2.9739,
      "step": 186173
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8669168949127197,
      "learning_rate": 5.292078624276156e-05,
      "loss": 2.96,
      "step": 186174
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.791665554046631,
      "learning_rate": 5.2918466201576025e-05,
      "loss": 2.5933,
      "step": 186175
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.582303047180176,
      "learning_rate": 5.291614620632755e-05,
      "loss": 3.2448,
      "step": 186176
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.7698657512664795,
      "learning_rate": 5.291382625701669e-05,
      "loss": 3.125,
      "step": 186177
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.29402232170105,
      "learning_rate": 5.291150635364382e-05,
      "loss": 3.0428,
      "step": 186178
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7560901641845703,
      "learning_rate": 5.290918649620936e-05,
      "loss": 2.88,
      "step": 186179
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.213282108306885,
      "learning_rate": 5.2906866684713666e-05,
      "loss": 2.9066,
      "step": 186180
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2520833015441895,
      "learning_rate": 5.290454691915732e-05,
      "loss": 2.7798,
      "step": 186181
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9385509490966797,
      "learning_rate": 5.2902227199540624e-05,
      "loss": 2.9847,
      "step": 186182
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2493655681610107,
      "learning_rate": 5.2899907525864146e-05,
      "loss": 2.9096,
      "step": 186183
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0259900093078613,
      "learning_rate": 5.2897587898128256e-05,
      "loss": 2.9482,
      "step": 186184
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8688933849334717,
      "learning_rate": 5.289526831633332e-05,
      "loss": 2.9146,
      "step": 186185
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.788905143737793,
      "learning_rate": 5.2892948780479794e-05,
      "loss": 2.8992,
      "step": 186186
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9705514907836914,
      "learning_rate": 5.2890629290568185e-05,
      "loss": 3.0081,
      "step": 186187
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.7220613956451416,
      "learning_rate": 5.2888309846598797e-05,
      "loss": 3.0649,
      "step": 186188
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6789138317108154,
      "learning_rate": 5.288599044857219e-05,
      "loss": 2.7951,
      "step": 186189
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.33394193649292,
      "learning_rate": 5.2883671096488766e-05,
      "loss": 3.0361,
      "step": 186190
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9534783363342285,
      "learning_rate": 5.2881351790348924e-05,
      "loss": 2.8204,
      "step": 186191
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0388388633728027,
      "learning_rate": 5.2879032530153064e-05,
      "loss": 2.6178,
      "step": 186192
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8200130462646484,
      "learning_rate": 5.287671331590169e-05,
      "loss": 2.87,
      "step": 186193
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1382699012756348,
      "learning_rate": 5.2874394147595125e-05,
      "loss": 3.1408,
      "step": 186194
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6762804985046387,
      "learning_rate": 5.2872075025233975e-05,
      "loss": 2.7711,
      "step": 186195
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.469454765319824,
      "learning_rate": 5.286975594881857e-05,
      "loss": 2.7414,
      "step": 186196
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8736648559570312,
      "learning_rate": 5.2867436918349245e-05,
      "loss": 2.879,
      "step": 186197
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.4451637268066406,
      "learning_rate": 5.286511793382661e-05,
      "loss": 3.1305,
      "step": 186198
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0057952404022217,
      "learning_rate": 5.286279899525101e-05,
      "loss": 3.0546,
      "step": 186199
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.515815496444702,
      "learning_rate": 5.2860480102622824e-05,
      "loss": 3.0383,
      "step": 186200
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.797539710998535,
      "learning_rate": 5.285816125594262e-05,
      "loss": 3.2042,
      "step": 186201
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.058892250061035,
      "learning_rate": 5.285584245521073e-05,
      "loss": 3.1356,
      "step": 186202
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8101823329925537,
      "learning_rate": 5.285352370042752e-05,
      "loss": 3.1078,
      "step": 186203
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6175239086151123,
      "learning_rate": 5.285120499159361e-05,
      "loss": 2.896,
      "step": 186204
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6708054542541504,
      "learning_rate": 5.284888632870922e-05,
      "loss": 3.0078,
      "step": 186205
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.512454032897949,
      "learning_rate": 5.2846567711774965e-05,
      "loss": 2.7169,
      "step": 186206
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.5111382007598877,
      "learning_rate": 5.284424914079123e-05,
      "loss": 2.9815,
      "step": 186207
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.704819917678833,
      "learning_rate": 5.2841930615758396e-05,
      "loss": 2.908,
      "step": 186208
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.8225257396698,
      "learning_rate": 5.2839612136676844e-05,
      "loss": 2.8937,
      "step": 186209
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.094116449356079,
      "learning_rate": 5.2837293703547135e-05,
      "loss": 2.9671,
      "step": 186210
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3913354873657227,
      "learning_rate": 5.283497531636957e-05,
      "loss": 2.9772,
      "step": 186211
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1983888149261475,
      "learning_rate": 5.283265697514471e-05,
      "loss": 2.771,
      "step": 186212
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9826366901397705,
      "learning_rate": 5.283033867987295e-05,
      "loss": 3.0335,
      "step": 186213
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.481971025466919,
      "learning_rate": 5.282802043055467e-05,
      "loss": 3.0072,
      "step": 186214
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.702439069747925,
      "learning_rate": 5.282570222719027e-05,
      "loss": 3.2329,
      "step": 186215
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.1767332553863525,
      "learning_rate": 5.2823384069780305e-05,
      "loss": 2.8604,
      "step": 186216
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.7528960704803467,
      "learning_rate": 5.282106595832508e-05,
      "loss": 2.7952,
      "step": 186217
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.828714609146118,
      "learning_rate": 5.281874789282513e-05,
      "loss": 2.9822,
      "step": 186218
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.2465121746063232,
      "learning_rate": 5.281642987328079e-05,
      "loss": 3.0061,
      "step": 186219
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6613118648529053,
      "learning_rate": 5.281411189969268e-05,
      "loss": 2.6977,
      "step": 186220
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.015385389328003,
      "learning_rate": 5.281179397206094e-05,
      "loss": 2.9292,
      "step": 186221
    },
    {
      "epoch": 2.42,
      "grad_norm": 4.203990936279297,
      "learning_rate": 5.2809476090386246e-05,
      "loss": 2.6211,
      "step": 186222
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0278077125549316,
      "learning_rate": 5.2807158254668855e-05,
      "loss": 3.0816,
      "step": 186223
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.4521689414978027,
      "learning_rate": 5.280484046490937e-05,
      "loss": 2.7659,
      "step": 186224
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9294872283935547,
      "learning_rate": 5.280252272110802e-05,
      "loss": 2.5702,
      "step": 186225
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.6608493328094482,
      "learning_rate": 5.2800205023265505e-05,
      "loss": 2.9615,
      "step": 186226
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.758573532104492,
      "learning_rate": 5.2797887371381964e-05,
      "loss": 2.9555,
      "step": 186227
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.745241165161133,
      "learning_rate": 5.2795569765458025e-05,
      "loss": 2.8015,
      "step": 186228
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.726595401763916,
      "learning_rate": 5.2793252205493985e-05,
      "loss": 2.6752,
      "step": 186229
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.035708427429199,
      "learning_rate": 5.279093469149042e-05,
      "loss": 2.6934,
      "step": 186230
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.379460096359253,
      "learning_rate": 5.2788617223447615e-05,
      "loss": 3.1513,
      "step": 186231
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.2933173179626465,
      "learning_rate": 5.278629980136615e-05,
      "loss": 2.8215,
      "step": 186232
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.598928928375244,
      "learning_rate": 5.2783982425246385e-05,
      "loss": 3.3184,
      "step": 186233
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.3384289741516113,
      "learning_rate": 5.278166509508872e-05,
      "loss": 2.9643,
      "step": 186234
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.8757638931274414,
      "learning_rate": 5.2779347810893555e-05,
      "loss": 3.0875,
      "step": 186235
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.80855131149292,
      "learning_rate": 5.277703057266146e-05,
      "loss": 2.7615,
      "step": 186236
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.702976703643799,
      "learning_rate": 5.277471338039266e-05,
      "loss": 2.8963,
      "step": 186237
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.87837290763855,
      "learning_rate": 5.277239623408783e-05,
      "loss": 2.8014,
      "step": 186238
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.53475022315979,
      "learning_rate": 5.277007913374727e-05,
      "loss": 2.7929,
      "step": 186239
    },
    {
      "epoch": 2.42,
      "grad_norm": 3.0550408363342285,
      "learning_rate": 5.27677620793714e-05,
      "loss": 2.9016,
      "step": 186240
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.9503231048583984,
      "learning_rate": 5.2765445070960596e-05,
      "loss": 2.9151,
      "step": 186241
    },
    {
      "epoch": 2.42,
      "grad_norm": 2.799954891204834,
      "learning_rate": 5.2763128108515464e-05,
      "loss": 2.9886,
      "step": 186242
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6063013076782227,
      "learning_rate": 5.2760811192036255e-05,
      "loss": 2.988,
      "step": 186243
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6791534423828125,
      "learning_rate": 5.275849432152351e-05,
      "loss": 2.7021,
      "step": 186244
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.98154878616333,
      "learning_rate": 5.275617749697767e-05,
      "loss": 2.861,
      "step": 186245
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.7750093936920166,
      "learning_rate": 5.275386071839912e-05,
      "loss": 3.0408,
      "step": 186246
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.533689022064209,
      "learning_rate": 5.27515439857882e-05,
      "loss": 2.8152,
      "step": 186247
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6908199787139893,
      "learning_rate": 5.274922729914555e-05,
      "loss": 2.8939,
      "step": 186248
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.5580787658691406,
      "learning_rate": 5.2746910658471354e-05,
      "loss": 2.9303,
      "step": 186249
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.813814163208008,
      "learning_rate": 5.274459406376629e-05,
      "loss": 2.9153,
      "step": 186250
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.546168804168701,
      "learning_rate": 5.2742277515030654e-05,
      "loss": 3.0985,
      "step": 186251
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1171741485595703,
      "learning_rate": 5.273996101226491e-05,
      "loss": 2.9516,
      "step": 186252
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.632526159286499,
      "learning_rate": 5.2737644555469396e-05,
      "loss": 2.8758,
      "step": 186253
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.4510254859924316,
      "learning_rate": 5.2735328144644706e-05,
      "loss": 3.0148,
      "step": 186254
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.304396390914917,
      "learning_rate": 5.2733011779791076e-05,
      "loss": 3.0043,
      "step": 186255
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7403564453125,
      "learning_rate": 5.273069546090914e-05,
      "loss": 2.9074,
      "step": 186256
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8189375400543213,
      "learning_rate": 5.272837918799924e-05,
      "loss": 3.1004,
      "step": 186257
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.207509756088257,
      "learning_rate": 5.272606296106182e-05,
      "loss": 2.6631,
      "step": 186258
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5206990242004395,
      "learning_rate": 5.2723746780097196e-05,
      "loss": 2.8641,
      "step": 186259
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.375823974609375,
      "learning_rate": 5.2721430645105966e-05,
      "loss": 2.7893,
      "step": 186260
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6396801471710205,
      "learning_rate": 5.2719114556088426e-05,
      "loss": 2.5984,
      "step": 186261
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6433842182159424,
      "learning_rate": 5.271679851304514e-05,
      "loss": 3.1403,
      "step": 186262
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0366053581237793,
      "learning_rate": 5.271448251597645e-05,
      "loss": 2.7208,
      "step": 186263
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.482342004776001,
      "learning_rate": 5.271216656488285e-05,
      "loss": 2.6358,
      "step": 186264
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.4031736850738525,
      "learning_rate": 5.2709850659764606e-05,
      "loss": 3.0981,
      "step": 186265
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7320196628570557,
      "learning_rate": 5.270753480062239e-05,
      "loss": 2.9675,
      "step": 186266
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7055416107177734,
      "learning_rate": 5.270521898745639e-05,
      "loss": 2.9825,
      "step": 186267
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0541903972625732,
      "learning_rate": 5.270290322026728e-05,
      "loss": 2.911,
      "step": 186268
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0605926513671875,
      "learning_rate": 5.2700587499055325e-05,
      "loss": 2.7308,
      "step": 186269
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8652048110961914,
      "learning_rate": 5.269827182382102e-05,
      "loss": 3.1096,
      "step": 186270
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.959728717803955,
      "learning_rate": 5.269595619456468e-05,
      "loss": 2.9497,
      "step": 186271
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.485341787338257,
      "learning_rate": 5.269364061128692e-05,
      "loss": 3.2172,
      "step": 186272
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.4293406009674072,
      "learning_rate": 5.269132507398802e-05,
      "loss": 2.9464,
      "step": 186273
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.825806140899658,
      "learning_rate": 5.268900958266854e-05,
      "loss": 2.8184,
      "step": 186274
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.4410340785980225,
      "learning_rate": 5.268669413732881e-05,
      "loss": 3.0057,
      "step": 186275
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.219611406326294,
      "learning_rate": 5.268437873796934e-05,
      "loss": 2.7905,
      "step": 186276
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.2852554321289062,
      "learning_rate": 5.268206338459041e-05,
      "loss": 2.8877,
      "step": 186277
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.694289207458496,
      "learning_rate": 5.267974807719261e-05,
      "loss": 2.9814,
      "step": 186278
    },
    {
      "epoch": 2.43,
      "grad_norm": 6.930835247039795,
      "learning_rate": 5.2677432815776285e-05,
      "loss": 2.8686,
      "step": 186279
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.6209919452667236,
      "learning_rate": 5.267511760034192e-05,
      "loss": 2.8774,
      "step": 186280
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.5950968265533447,
      "learning_rate": 5.2672802430889936e-05,
      "loss": 2.8633,
      "step": 186281
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7623672485351562,
      "learning_rate": 5.26704873074207e-05,
      "loss": 2.9459,
      "step": 186282
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6111855506896973,
      "learning_rate": 5.2668172229934725e-05,
      "loss": 3.1801,
      "step": 186283
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1616647243499756,
      "learning_rate": 5.2665857198432426e-05,
      "loss": 3.0176,
      "step": 186284
    },
    {
      "epoch": 2.43,
      "grad_norm": 5.501306056976318,
      "learning_rate": 5.266354221291414e-05,
      "loss": 2.892,
      "step": 186285
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.873913049697876,
      "learning_rate": 5.266122727338044e-05,
      "loss": 3.1044,
      "step": 186286
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.3910257816314697,
      "learning_rate": 5.2658912379831696e-05,
      "loss": 3.1396,
      "step": 186287
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.825442314147949,
      "learning_rate": 5.265659753226823e-05,
      "loss": 2.9309,
      "step": 186288
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.630054235458374,
      "learning_rate": 5.265428273069068e-05,
      "loss": 3.0422,
      "step": 186289
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.8102571964263916,
      "learning_rate": 5.2651967975099274e-05,
      "loss": 2.8279,
      "step": 186290
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.453122138977051,
      "learning_rate": 5.2649653265494586e-05,
      "loss": 2.8468,
      "step": 186291
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.766005039215088,
      "learning_rate": 5.2647338601877046e-05,
      "loss": 2.8722,
      "step": 186292
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.919795036315918,
      "learning_rate": 5.264502398424702e-05,
      "loss": 2.867,
      "step": 186293
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9216079711914062,
      "learning_rate": 5.2642709412604834e-05,
      "loss": 3.0737,
      "step": 186294
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.3557164669036865,
      "learning_rate": 5.2640394886951166e-05,
      "loss": 2.8454,
      "step": 186295
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.66548228263855,
      "learning_rate": 5.263808040728621e-05,
      "loss": 2.651,
      "step": 186296
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.737333059310913,
      "learning_rate": 5.26357659736106e-05,
      "loss": 2.7604,
      "step": 186297
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.609548568725586,
      "learning_rate": 5.263345158592467e-05,
      "loss": 2.7767,
      "step": 186298
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9742894172668457,
      "learning_rate": 5.263113724422875e-05,
      "loss": 2.9081,
      "step": 186299
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5434811115264893,
      "learning_rate": 5.262882294852347e-05,
      "loss": 2.9829,
      "step": 186300
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8500816822052,
      "learning_rate": 5.262650869880918e-05,
      "loss": 2.9759,
      "step": 186301
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.526343584060669,
      "learning_rate": 5.262419449508619e-05,
      "loss": 2.9417,
      "step": 186302
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.009746551513672,
      "learning_rate": 5.2621880337355114e-05,
      "loss": 2.7187,
      "step": 186303
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1545770168304443,
      "learning_rate": 5.261956622561622e-05,
      "loss": 3.0337,
      "step": 186304
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.413231611251831,
      "learning_rate": 5.2617252159870093e-05,
      "loss": 3.2,
      "step": 186305
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.650651693344116,
      "learning_rate": 5.261493814011708e-05,
      "loss": 3.1021,
      "step": 186306
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6188600063323975,
      "learning_rate": 5.261262416635764e-05,
      "loss": 3.1994,
      "step": 186307
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5431714057922363,
      "learning_rate": 5.261031023859208e-05,
      "loss": 2.7759,
      "step": 186308
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1918206214904785,
      "learning_rate": 5.260799635682102e-05,
      "loss": 3.041,
      "step": 186309
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.678873300552368,
      "learning_rate": 5.260568252104474e-05,
      "loss": 2.8647,
      "step": 186310
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.857211112976074,
      "learning_rate": 5.260336873126381e-05,
      "loss": 3.1693,
      "step": 186311
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.204465389251709,
      "learning_rate": 5.260105498747857e-05,
      "loss": 2.878,
      "step": 186312
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9855496883392334,
      "learning_rate": 5.259874128968948e-05,
      "loss": 2.7108,
      "step": 186313
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7386069297790527,
      "learning_rate": 5.2596427637896864e-05,
      "loss": 2.884,
      "step": 186314
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5456392765045166,
      "learning_rate": 5.259411403210132e-05,
      "loss": 2.7232,
      "step": 186315
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.9758307933807373,
      "learning_rate": 5.2591800472303146e-05,
      "loss": 2.999,
      "step": 186316
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.8284926414489746,
      "learning_rate": 5.258948695850288e-05,
      "loss": 2.9883,
      "step": 186317
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0667126178741455,
      "learning_rate": 5.258717349070089e-05,
      "loss": 2.8629,
      "step": 186318
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6881041526794434,
      "learning_rate": 5.258486006889763e-05,
      "loss": 3.0553,
      "step": 186319
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.954796314239502,
      "learning_rate": 5.258254669309344e-05,
      "loss": 2.8937,
      "step": 186320
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.6359710693359375,
      "learning_rate": 5.258023336328889e-05,
      "loss": 2.8785,
      "step": 186321
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.161763668060303,
      "learning_rate": 5.257792007948428e-05,
      "loss": 2.9,
      "step": 186322
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.247298240661621,
      "learning_rate": 5.2575606841680175e-05,
      "loss": 2.9969,
      "step": 186323
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.988713026046753,
      "learning_rate": 5.2573293649876936e-05,
      "loss": 2.8584,
      "step": 186324
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0814831256866455,
      "learning_rate": 5.2570980504075e-05,
      "loss": 2.7516,
      "step": 186325
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0154519081115723,
      "learning_rate": 5.25686674042747e-05,
      "loss": 2.9369,
      "step": 186326
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6686906814575195,
      "learning_rate": 5.256635435047664e-05,
      "loss": 3.1061,
      "step": 186327
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.8281261920928955,
      "learning_rate": 5.256404134268111e-05,
      "loss": 2.6837,
      "step": 186328
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4213995933532715,
      "learning_rate": 5.2561728380888645e-05,
      "loss": 2.8771,
      "step": 186329
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0328011512756348,
      "learning_rate": 5.2559415465099616e-05,
      "loss": 2.9489,
      "step": 186330
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.68861985206604,
      "learning_rate": 5.255710259531449e-05,
      "loss": 3.0645,
      "step": 186331
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.039062738418579,
      "learning_rate": 5.255478977153357e-05,
      "loss": 2.8551,
      "step": 186332
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7760069370269775,
      "learning_rate": 5.2552476993757475e-05,
      "loss": 2.904,
      "step": 186333
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7270779609680176,
      "learning_rate": 5.255016426198645e-05,
      "loss": 2.9559,
      "step": 186334
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.606781244277954,
      "learning_rate": 5.254785157622112e-05,
      "loss": 2.9914,
      "step": 186335
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8751978874206543,
      "learning_rate": 5.2545538936461796e-05,
      "loss": 2.7875,
      "step": 186336
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9221673011779785,
      "learning_rate": 5.254322634270897e-05,
      "loss": 2.8187,
      "step": 186337
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.043739080429077,
      "learning_rate": 5.2540913794962904e-05,
      "loss": 2.9253,
      "step": 186338
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8207015991210938,
      "learning_rate": 5.2538601293224234e-05,
      "loss": 2.9756,
      "step": 186339
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9373996257781982,
      "learning_rate": 5.253628883749327e-05,
      "loss": 2.9666,
      "step": 186340
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7609221935272217,
      "learning_rate": 5.253397642777053e-05,
      "loss": 3.1984,
      "step": 186341
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4960379600524902,
      "learning_rate": 5.253166406405639e-05,
      "loss": 3.0386,
      "step": 186342
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1555991172790527,
      "learning_rate": 5.252935174635131e-05,
      "loss": 2.7775,
      "step": 186343
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.00862193107605,
      "learning_rate": 5.2527039474655594e-05,
      "loss": 3.031,
      "step": 186344
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.185546398162842,
      "learning_rate": 5.252472724896985e-05,
      "loss": 2.8718,
      "step": 186345
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.168846368789673,
      "learning_rate": 5.2522415069294354e-05,
      "loss": 3.0705,
      "step": 186346
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.784135580062866,
      "learning_rate": 5.252010293562969e-05,
      "loss": 2.8713,
      "step": 186347
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6174535751342773,
      "learning_rate": 5.251779084797622e-05,
      "loss": 2.9473,
      "step": 186348
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.898339033126831,
      "learning_rate": 5.2515478806334385e-05,
      "loss": 2.9903,
      "step": 186349
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9174253940582275,
      "learning_rate": 5.2513166810704474e-05,
      "loss": 3.111,
      "step": 186350
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.469205141067505,
      "learning_rate": 5.251085486108716e-05,
      "loss": 2.7939,
      "step": 186351
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.758899211883545,
      "learning_rate": 5.250854295748264e-05,
      "loss": 3.1471,
      "step": 186352
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.70428204536438,
      "learning_rate": 5.250623109989154e-05,
      "loss": 2.8773,
      "step": 186353
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5930335521698,
      "learning_rate": 5.25039192883142e-05,
      "loss": 3.0959,
      "step": 186354
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.309951066970825,
      "learning_rate": 5.250160752275106e-05,
      "loss": 2.6896,
      "step": 186355
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.52933931350708,
      "learning_rate": 5.2499295803202435e-05,
      "loss": 3.0255,
      "step": 186356
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.609029769897461,
      "learning_rate": 5.249698412966897e-05,
      "loss": 2.8005,
      "step": 186357
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.750340461730957,
      "learning_rate": 5.24946725021509e-05,
      "loss": 2.8376,
      "step": 186358
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.405386447906494,
      "learning_rate": 5.249236092064882e-05,
      "loss": 2.8726,
      "step": 186359
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.710784673690796,
      "learning_rate": 5.24900493851631e-05,
      "loss": 2.8841,
      "step": 186360
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0772736072540283,
      "learning_rate": 5.248773789569414e-05,
      "loss": 3.0098,
      "step": 186361
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6856560707092285,
      "learning_rate": 5.24854264522423e-05,
      "loss": 2.8383,
      "step": 186362
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.4844844341278076,
      "learning_rate": 5.2483115054808175e-05,
      "loss": 2.9229,
      "step": 186363
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.320976495742798,
      "learning_rate": 5.248080370339202e-05,
      "loss": 2.9128,
      "step": 186364
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.932781219482422,
      "learning_rate": 5.247849239799444e-05,
      "loss": 3.0367,
      "step": 186365
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.160792827606201,
      "learning_rate": 5.247618113861573e-05,
      "loss": 3.0502,
      "step": 186366
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.366065740585327,
      "learning_rate": 5.247386992525643e-05,
      "loss": 2.7674,
      "step": 186367
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5804810523986816,
      "learning_rate": 5.247155875791689e-05,
      "loss": 2.934,
      "step": 186368
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.281067371368408,
      "learning_rate": 5.2469247636597563e-05,
      "loss": 2.981,
      "step": 186369
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.359062910079956,
      "learning_rate": 5.246693656129883e-05,
      "loss": 2.853,
      "step": 186370
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9364726543426514,
      "learning_rate": 5.246462553202122e-05,
      "loss": 2.7901,
      "step": 186371
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8872504234313965,
      "learning_rate": 5.2462314548765026e-05,
      "loss": 2.766,
      "step": 186372
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7735519409179688,
      "learning_rate": 5.2460003611530856e-05,
      "loss": 3.046,
      "step": 186373
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.415268659591675,
      "learning_rate": 5.2457692720319037e-05,
      "loss": 2.8363,
      "step": 186374
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5557868480682373,
      "learning_rate": 5.245538187513e-05,
      "loss": 2.8332,
      "step": 186375
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.4029109477996826,
      "learning_rate": 5.245307107596411e-05,
      "loss": 2.9956,
      "step": 186376
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8937461376190186,
      "learning_rate": 5.2450760322821915e-05,
      "loss": 2.9131,
      "step": 186377
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7978668212890625,
      "learning_rate": 5.244844961570377e-05,
      "loss": 2.8768,
      "step": 186378
    },
    {
      "epoch": 2.43,
      "grad_norm": 7.543169975280762,
      "learning_rate": 5.2446138954610173e-05,
      "loss": 2.9443,
      "step": 186379
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.0326008796691895,
      "learning_rate": 5.244382833954153e-05,
      "loss": 3.0248,
      "step": 186380
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5785155296325684,
      "learning_rate": 5.244151777049816e-05,
      "loss": 3.1747,
      "step": 186381
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6559977531433105,
      "learning_rate": 5.2439207247480655e-05,
      "loss": 3.0235,
      "step": 186382
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5415432453155518,
      "learning_rate": 5.243689677048939e-05,
      "loss": 2.9664,
      "step": 186383
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1791110038757324,
      "learning_rate": 5.243458633952471e-05,
      "loss": 3.1753,
      "step": 186384
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.118433713912964,
      "learning_rate": 5.243227595458718e-05,
      "loss": 2.9971,
      "step": 186385
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8718810081481934,
      "learning_rate": 5.242996561567716e-05,
      "loss": 2.7145,
      "step": 186386
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.68300199508667,
      "learning_rate": 5.242765532279502e-05,
      "loss": 2.7356,
      "step": 186387
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.513446807861328,
      "learning_rate": 5.2425345075941305e-05,
      "loss": 3.2247,
      "step": 186388
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.583701133728027,
      "learning_rate": 5.2423034875116334e-05,
      "loss": 2.9013,
      "step": 186389
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0077171325683594,
      "learning_rate": 5.242072472032067e-05,
      "loss": 2.9731,
      "step": 186390
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4737424850463867,
      "learning_rate": 5.241841461155466e-05,
      "loss": 2.8602,
      "step": 186391
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0314910411834717,
      "learning_rate": 5.241610454881873e-05,
      "loss": 2.7754,
      "step": 186392
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.5505049228668213,
      "learning_rate": 5.241379453211327e-05,
      "loss": 2.7868,
      "step": 186393
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.807690382003784,
      "learning_rate": 5.2411484561438836e-05,
      "loss": 2.683,
      "step": 186394
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0243163108825684,
      "learning_rate": 5.240917463679567e-05,
      "loss": 2.9159,
      "step": 186395
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.2349369525909424,
      "learning_rate": 5.240686475818442e-05,
      "loss": 3.0492,
      "step": 186396
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.55849289894104,
      "learning_rate": 5.240455492560538e-05,
      "loss": 2.9883,
      "step": 186397
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.635246515274048,
      "learning_rate": 5.240224513905902e-05,
      "loss": 2.7982,
      "step": 186398
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4424216747283936,
      "learning_rate": 5.23999353985457e-05,
      "loss": 2.8113,
      "step": 186399
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.640833616256714,
      "learning_rate": 5.239762570406596e-05,
      "loss": 2.7924,
      "step": 186400
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7840728759765625,
      "learning_rate": 5.239531605562009e-05,
      "loss": 2.9839,
      "step": 186401
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.887968063354492,
      "learning_rate": 5.2393006453208706e-05,
      "loss": 3.0502,
      "step": 186402
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.236295700073242,
      "learning_rate": 5.239069689683213e-05,
      "loss": 2.7546,
      "step": 186403
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9144954681396484,
      "learning_rate": 5.238838738649076e-05,
      "loss": 2.78,
      "step": 186404
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.917736768722534,
      "learning_rate": 5.238607792218503e-05,
      "loss": 2.7635,
      "step": 186405
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0121054649353027,
      "learning_rate": 5.238376850391548e-05,
      "loss": 2.8996,
      "step": 186406
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4850995540618896,
      "learning_rate": 5.2381459131682365e-05,
      "loss": 2.993,
      "step": 186407
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1125102043151855,
      "learning_rate": 5.2379149805486295e-05,
      "loss": 3.0079,
      "step": 186408
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4948015213012695,
      "learning_rate": 5.2376840525327604e-05,
      "loss": 3.3295,
      "step": 186409
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.767420530319214,
      "learning_rate": 5.2374531291206745e-05,
      "loss": 2.9603,
      "step": 186410
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7606585025787354,
      "learning_rate": 5.237222210312406e-05,
      "loss": 3.0334,
      "step": 186411
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.527616024017334,
      "learning_rate": 5.236991296108012e-05,
      "loss": 3.0466,
      "step": 186412
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0438296794891357,
      "learning_rate": 5.2367603865075215e-05,
      "loss": 2.7306,
      "step": 186413
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.714162588119507,
      "learning_rate": 5.236529481510995e-05,
      "loss": 3.2538,
      "step": 186414
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.405099630355835,
      "learning_rate": 5.236298581118462e-05,
      "loss": 3.1451,
      "step": 186415
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.662970781326294,
      "learning_rate": 5.2360676853299666e-05,
      "loss": 3.0127,
      "step": 186416
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.614814519882202,
      "learning_rate": 5.235836794145552e-05,
      "loss": 3.0706,
      "step": 186417
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9781274795532227,
      "learning_rate": 5.2356059075652666e-05,
      "loss": 2.67,
      "step": 186418
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.770350217819214,
      "learning_rate": 5.235375025589142e-05,
      "loss": 2.8772,
      "step": 186419
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4871551990509033,
      "learning_rate": 5.2351441482172354e-05,
      "loss": 3.0924,
      "step": 186420
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.3378074169158936,
      "learning_rate": 5.234913275449585e-05,
      "loss": 2.8772,
      "step": 186421
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.606841564178467,
      "learning_rate": 5.2346824072862313e-05,
      "loss": 2.9234,
      "step": 186422
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9514646530151367,
      "learning_rate": 5.2344515437272086e-05,
      "loss": 2.8956,
      "step": 186423
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8912789821624756,
      "learning_rate": 5.2342206847725784e-05,
      "loss": 2.547,
      "step": 186424
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9053733348846436,
      "learning_rate": 5.233989830422366e-05,
      "loss": 2.7607,
      "step": 186425
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4049625396728516,
      "learning_rate": 5.23375898067663e-05,
      "loss": 3.0402,
      "step": 186426
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5737533569335938,
      "learning_rate": 5.233528135535404e-05,
      "loss": 3.1246,
      "step": 186427
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5882742404937744,
      "learning_rate": 5.233297294998735e-05,
      "loss": 3.0334,
      "step": 186428
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.052844762802124,
      "learning_rate": 5.2330664590666525e-05,
      "loss": 2.8898,
      "step": 186429
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.431549310684204,
      "learning_rate": 5.23283562773922e-05,
      "loss": 3.2946,
      "step": 186430
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1033670902252197,
      "learning_rate": 5.2326048010164644e-05,
      "loss": 2.8836,
      "step": 186431
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1245055198669434,
      "learning_rate": 5.232373978898442e-05,
      "loss": 2.94,
      "step": 186432
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.830174446105957,
      "learning_rate": 5.2321431613851826e-05,
      "loss": 2.7169,
      "step": 186433
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.780986785888672,
      "learning_rate": 5.231912348476747e-05,
      "loss": 3.0021,
      "step": 186434
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.3289992809295654,
      "learning_rate": 5.231681540173154e-05,
      "loss": 2.9774,
      "step": 186435
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7641215324401855,
      "learning_rate": 5.2314507364744674e-05,
      "loss": 2.8333,
      "step": 186436
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.865344762802124,
      "learning_rate": 5.2312199373807106e-05,
      "loss": 3.0736,
      "step": 186437
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7729265689849854,
      "learning_rate": 5.230989142891947e-05,
      "loss": 2.9534,
      "step": 186438
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0616936683654785,
      "learning_rate": 5.230758353008203e-05,
      "loss": 2.7724,
      "step": 186439
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8787553310394287,
      "learning_rate": 5.230527567729542e-05,
      "loss": 2.9864,
      "step": 186440
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.291823625564575,
      "learning_rate": 5.230296787055981e-05,
      "loss": 2.8596,
      "step": 186441
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1528093814849854,
      "learning_rate": 5.230066010987582e-05,
      "loss": 2.7293,
      "step": 186442
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9724984169006348,
      "learning_rate": 5.2298352395243736e-05,
      "loss": 2.8304,
      "step": 186443
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.036968469619751,
      "learning_rate": 5.229604472666411e-05,
      "loss": 2.9237,
      "step": 186444
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.186252593994141,
      "learning_rate": 5.229373710413728e-05,
      "loss": 2.8345,
      "step": 186445
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.234182119369507,
      "learning_rate": 5.2291429527663875e-05,
      "loss": 2.8808,
      "step": 186446
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.6065499782562256,
      "learning_rate": 5.2289121997244e-05,
      "loss": 3.1022,
      "step": 186447
    },
    {
      "epoch": 2.43,
      "grad_norm": 6.803228855133057,
      "learning_rate": 5.228681451287835e-05,
      "loss": 2.8111,
      "step": 186448
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.060253143310547,
      "learning_rate": 5.228450707456716e-05,
      "loss": 2.9291,
      "step": 186449
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.793483257293701,
      "learning_rate": 5.228219968231106e-05,
      "loss": 3.0191,
      "step": 186450
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.013657569885254,
      "learning_rate": 5.227989233611026e-05,
      "loss": 3.0088,
      "step": 186451
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7579030990600586,
      "learning_rate": 5.227758503596542e-05,
      "loss": 2.745,
      "step": 186452
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.2073323726654053,
      "learning_rate": 5.2275277781876836e-05,
      "loss": 2.9343,
      "step": 186453
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.3768346309661865,
      "learning_rate": 5.2272970573844944e-05,
      "loss": 2.9056,
      "step": 186454
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.073544025421143,
      "learning_rate": 5.227066341187011e-05,
      "loss": 2.8999,
      "step": 186455
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.052016735076904,
      "learning_rate": 5.226835629595293e-05,
      "loss": 2.8166,
      "step": 186456
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5676276683807373,
      "learning_rate": 5.2266049226093644e-05,
      "loss": 3.0788,
      "step": 186457
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.597212314605713,
      "learning_rate": 5.226374220229285e-05,
      "loss": 3.0067,
      "step": 186458
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0497777462005615,
      "learning_rate": 5.2261435224550904e-05,
      "loss": 2.9033,
      "step": 186459
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8179855346679688,
      "learning_rate": 5.2259128292868254e-05,
      "loss": 2.8233,
      "step": 186460
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.602325201034546,
      "learning_rate": 5.225682140724523e-05,
      "loss": 3.2358,
      "step": 186461
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.7159547805786133,
      "learning_rate": 5.2254514567682394e-05,
      "loss": 2.7691,
      "step": 186462
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.58801007270813,
      "learning_rate": 5.225220777418004e-05,
      "loss": 2.8536,
      "step": 186463
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4801852703094482,
      "learning_rate": 5.224990102673878e-05,
      "loss": 3.0704,
      "step": 186464
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0668914318084717,
      "learning_rate": 5.224759432535894e-05,
      "loss": 2.8479,
      "step": 186465
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.2055132389068604,
      "learning_rate": 5.2245287670040856e-05,
      "loss": 3.0001,
      "step": 186466
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5143768787384033,
      "learning_rate": 5.2242981060785126e-05,
      "loss": 2.8754,
      "step": 186467
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.851091384887695,
      "learning_rate": 5.224067449759212e-05,
      "loss": 2.779,
      "step": 186468
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.756075143814087,
      "learning_rate": 5.223836798046216e-05,
      "loss": 3.0127,
      "step": 186469
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6714437007904053,
      "learning_rate": 5.223606150939582e-05,
      "loss": 2.9042,
      "step": 186470
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7213544845581055,
      "learning_rate": 5.22337550843935e-05,
      "loss": 3.1272,
      "step": 186471
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4905333518981934,
      "learning_rate": 5.2231448705455504e-05,
      "loss": 2.8657,
      "step": 186472
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.557925224304199,
      "learning_rate": 5.222914237258246e-05,
      "loss": 2.8061,
      "step": 186473
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.272052049636841,
      "learning_rate": 5.2226836085774694e-05,
      "loss": 2.7912,
      "step": 186474
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.899536609649658,
      "learning_rate": 5.222452984503255e-05,
      "loss": 2.9348,
      "step": 186475
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.346768379211426,
      "learning_rate": 5.222222365035662e-05,
      "loss": 2.6431,
      "step": 186476
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.4515249729156494,
      "learning_rate": 5.221991750174724e-05,
      "loss": 2.8767,
      "step": 186477
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9104530811309814,
      "learning_rate": 5.2217611399204815e-05,
      "loss": 2.9948,
      "step": 186478
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7584283351898193,
      "learning_rate": 5.2215305342729874e-05,
      "loss": 3.0264,
      "step": 186479
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8533976078033447,
      "learning_rate": 5.221299933232268e-05,
      "loss": 2.8495,
      "step": 186480
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.492591142654419,
      "learning_rate": 5.221069336798387e-05,
      "loss": 2.7692,
      "step": 186481
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4026472568511963,
      "learning_rate": 5.220838744971377e-05,
      "loss": 2.9187,
      "step": 186482
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.582569122314453,
      "learning_rate": 5.2206081577512826e-05,
      "loss": 2.606,
      "step": 186483
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.613236427307129,
      "learning_rate": 5.2203775751381326e-05,
      "loss": 2.6606,
      "step": 186484
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.738095760345459,
      "learning_rate": 5.220146997131994e-05,
      "loss": 2.8075,
      "step": 186485
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8711628913879395,
      "learning_rate": 5.2199164237328875e-05,
      "loss": 3.1876,
      "step": 186486
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5067224502563477,
      "learning_rate": 5.219685854940875e-05,
      "loss": 2.9294,
      "step": 186487
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.624950885772705,
      "learning_rate": 5.219455290755991e-05,
      "loss": 3.0225,
      "step": 186488
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6323623657226562,
      "learning_rate": 5.219224731178278e-05,
      "loss": 2.803,
      "step": 186489
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.17673659324646,
      "learning_rate": 5.218994176207772e-05,
      "loss": 3.0852,
      "step": 186490
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.845215082168579,
      "learning_rate": 5.2187636258445283e-05,
      "loss": 2.9691,
      "step": 186491
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5551648139953613,
      "learning_rate": 5.218533080088579e-05,
      "loss": 2.9949,
      "step": 186492
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.859504461288452,
      "learning_rate": 5.218302538939978e-05,
      "loss": 3.2112,
      "step": 186493
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.3446121215820312,
      "learning_rate": 5.218072002398761e-05,
      "loss": 3.0967,
      "step": 186494
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4350008964538574,
      "learning_rate": 5.217841470464975e-05,
      "loss": 3.0415,
      "step": 186495
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.751459836959839,
      "learning_rate": 5.21761094313865e-05,
      "loss": 3.0645,
      "step": 186496
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.151988983154297,
      "learning_rate": 5.21738042041985e-05,
      "loss": 3.0725,
      "step": 186497
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.124847650527954,
      "learning_rate": 5.2171499023085975e-05,
      "loss": 2.8911,
      "step": 186498
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.354236125946045,
      "learning_rate": 5.216919388804952e-05,
      "loss": 3.0732,
      "step": 186499
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.094679832458496,
      "learning_rate": 5.216688879908941e-05,
      "loss": 2.8121,
      "step": 186500
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0143706798553467,
      "learning_rate": 5.2164583756206315e-05,
      "loss": 2.913,
      "step": 186501
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8590011596679688,
      "learning_rate": 5.216227875940032e-05,
      "loss": 2.9671,
      "step": 186502
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.907194137573242,
      "learning_rate": 5.215997380867214e-05,
      "loss": 2.9049,
      "step": 186503
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6902947425842285,
      "learning_rate": 5.215766890402203e-05,
      "loss": 3.0176,
      "step": 186504
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.39102840423584,
      "learning_rate": 5.215536404545056e-05,
      "loss": 2.9417,
      "step": 186505
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0557074546813965,
      "learning_rate": 5.2153059232958e-05,
      "loss": 2.751,
      "step": 186506
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.7238094806671143,
      "learning_rate": 5.215075446654502e-05,
      "loss": 2.8381,
      "step": 186507
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.984982490539551,
      "learning_rate": 5.214844974621174e-05,
      "loss": 2.7653,
      "step": 186508
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.763834238052368,
      "learning_rate": 5.21461450719588e-05,
      "loss": 3.2101,
      "step": 186509
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6039035320281982,
      "learning_rate": 5.214384044378653e-05,
      "loss": 3.0478,
      "step": 186510
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.630690336227417,
      "learning_rate": 5.214153586169547e-05,
      "loss": 2.8731,
      "step": 186511
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.7776989936828613,
      "learning_rate": 5.2139231325685885e-05,
      "loss": 3.0405,
      "step": 186512
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.134889125823975,
      "learning_rate": 5.213692683575843e-05,
      "loss": 2.8707,
      "step": 186513
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7712857723236084,
      "learning_rate": 5.213462239191328e-05,
      "loss": 2.8386,
      "step": 186514
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.941255569458008,
      "learning_rate": 5.213231799415104e-05,
      "loss": 3.0519,
      "step": 186515
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9509103298187256,
      "learning_rate": 5.213001364247204e-05,
      "loss": 2.8483,
      "step": 186516
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.683309555053711,
      "learning_rate": 5.2127709336876764e-05,
      "loss": 2.9311,
      "step": 186517
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.757359743118286,
      "learning_rate": 5.21254050773656e-05,
      "loss": 2.8154,
      "step": 186518
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.804945945739746,
      "learning_rate": 5.212310086393914e-05,
      "loss": 2.9508,
      "step": 186519
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.2954914569854736,
      "learning_rate": 5.212079669659751e-05,
      "loss": 2.9263,
      "step": 186520
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4734878540039062,
      "learning_rate": 5.211849257534139e-05,
      "loss": 2.9806,
      "step": 186521
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7323286533355713,
      "learning_rate": 5.211618850017103e-05,
      "loss": 2.8081,
      "step": 186522
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8074800968170166,
      "learning_rate": 5.211388447108704e-05,
      "loss": 2.8689,
      "step": 186523
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.078658103942871,
      "learning_rate": 5.211158048808969e-05,
      "loss": 2.9137,
      "step": 186524
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.123298406600952,
      "learning_rate": 5.210927655117963e-05,
      "loss": 2.9658,
      "step": 186525
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.645616292953491,
      "learning_rate": 5.210697266035698e-05,
      "loss": 2.8779,
      "step": 186526
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8007328510284424,
      "learning_rate": 5.21046688156224e-05,
      "loss": 2.9796,
      "step": 186527
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8494789600372314,
      "learning_rate": 5.210236501697614e-05,
      "loss": 2.9463,
      "step": 186528
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.04660701751709,
      "learning_rate": 5.2100061264418823e-05,
      "loss": 2.9125,
      "step": 186529
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.307145595550537,
      "learning_rate": 5.20977575579507e-05,
      "loss": 2.9766,
      "step": 186530
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.943969964981079,
      "learning_rate": 5.209545389757245e-05,
      "loss": 2.7107,
      "step": 186531
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.838214635848999,
      "learning_rate": 5.2093150283284194e-05,
      "loss": 2.91,
      "step": 186532
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.783308506011963,
      "learning_rate": 5.209084671508653e-05,
      "loss": 2.8636,
      "step": 186533
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5545620918273926,
      "learning_rate": 5.2088543192979834e-05,
      "loss": 2.6943,
      "step": 186534
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.8458478450775146,
      "learning_rate": 5.2086239716964604e-05,
      "loss": 2.8116,
      "step": 186535
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9923958778381348,
      "learning_rate": 5.208393628704114e-05,
      "loss": 2.8622,
      "step": 186536
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9338338375091553,
      "learning_rate": 5.208163290321014e-05,
      "loss": 2.8616,
      "step": 186537
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.707504987716675,
      "learning_rate": 5.207932956547166e-05,
      "loss": 3.005,
      "step": 186538
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.143150806427002,
      "learning_rate": 5.2077026273826386e-05,
      "loss": 3.0758,
      "step": 186539
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.722455978393555,
      "learning_rate": 5.207472302827463e-05,
      "loss": 2.6512,
      "step": 186540
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1160929203033447,
      "learning_rate": 5.207241982881691e-05,
      "loss": 2.8882,
      "step": 186541
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7764737606048584,
      "learning_rate": 5.207011667545352e-05,
      "loss": 2.7845,
      "step": 186542
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.523230791091919,
      "learning_rate": 5.206781356818509e-05,
      "loss": 2.8055,
      "step": 186543
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7100789546966553,
      "learning_rate": 5.206551050701192e-05,
      "loss": 2.8329,
      "step": 186544
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6079869270324707,
      "learning_rate": 5.206320749193445e-05,
      "loss": 3.0792,
      "step": 186545
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.760903835296631,
      "learning_rate": 5.2060904522953e-05,
      "loss": 3.1059,
      "step": 186546
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8139026165008545,
      "learning_rate": 5.2058601600068205e-05,
      "loss": 2.7371,
      "step": 186547
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.848515510559082,
      "learning_rate": 5.205629872328034e-05,
      "loss": 3.0815,
      "step": 186548
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.003293037414551,
      "learning_rate": 5.205399589258997e-05,
      "loss": 2.8877,
      "step": 186549
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4518849849700928,
      "learning_rate": 5.205169310799743e-05,
      "loss": 2.9561,
      "step": 186550
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9400739669799805,
      "learning_rate": 5.2049390369503076e-05,
      "loss": 2.7607,
      "step": 186551
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.553870677947998,
      "learning_rate": 5.204708767710748e-05,
      "loss": 2.9955,
      "step": 186552
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.731396436691284,
      "learning_rate": 5.204478503081104e-05,
      "loss": 2.7563,
      "step": 186553
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8854451179504395,
      "learning_rate": 5.204248243061406e-05,
      "loss": 2.7563,
      "step": 186554
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.9480645656585693,
      "learning_rate": 5.2040179876517174e-05,
      "loss": 2.7904,
      "step": 186555
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0724258422851562,
      "learning_rate": 5.203787736852067e-05,
      "loss": 2.8447,
      "step": 186556
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.041203260421753,
      "learning_rate": 5.203557490662493e-05,
      "loss": 2.6681,
      "step": 186557
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0221519470214844,
      "learning_rate": 5.2033272490830547e-05,
      "loss": 3.0199,
      "step": 186558
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.511030435562134,
      "learning_rate": 5.203097012113785e-05,
      "loss": 2.945,
      "step": 186559
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.530238628387451,
      "learning_rate": 5.202866779754718e-05,
      "loss": 2.7985,
      "step": 186560
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5878098011016846,
      "learning_rate": 5.202636552005915e-05,
      "loss": 2.8289,
      "step": 186561
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.845876693725586,
      "learning_rate": 5.202406328867412e-05,
      "loss": 2.8182,
      "step": 186562
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6766672134399414,
      "learning_rate": 5.202176110339241e-05,
      "loss": 2.8644,
      "step": 186563
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6069819927215576,
      "learning_rate": 5.201945896421462e-05,
      "loss": 2.9891,
      "step": 186564
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6904942989349365,
      "learning_rate": 5.201715687114099e-05,
      "loss": 2.9691,
      "step": 186565
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5881805419921875,
      "learning_rate": 5.201485482417217e-05,
      "loss": 2.9748,
      "step": 186566
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.881265878677368,
      "learning_rate": 5.201255282330844e-05,
      "loss": 2.5962,
      "step": 186567
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.3960423469543457,
      "learning_rate": 5.201025086855026e-05,
      "loss": 2.8508,
      "step": 186568
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.999269962310791,
      "learning_rate": 5.200794895989797e-05,
      "loss": 2.7181,
      "step": 186569
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.320463180541992,
      "learning_rate": 5.2005647097352164e-05,
      "loss": 2.8134,
      "step": 186570
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.143294334411621,
      "learning_rate": 5.2003345280913144e-05,
      "loss": 2.902,
      "step": 186571
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7709743976593018,
      "learning_rate": 5.200104351058144e-05,
      "loss": 2.8259,
      "step": 186572
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.780219554901123,
      "learning_rate": 5.199874178635732e-05,
      "loss": 2.9397,
      "step": 186573
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.057457685470581,
      "learning_rate": 5.1996440108241485e-05,
      "loss": 2.9625,
      "step": 186574
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.8147196769714355,
      "learning_rate": 5.199413847623407e-05,
      "loss": 2.9527,
      "step": 186575
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1013174057006836,
      "learning_rate": 5.199183689033567e-05,
      "loss": 3.0016,
      "step": 186576
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.02232027053833,
      "learning_rate": 5.198953535054662e-05,
      "loss": 2.9074,
      "step": 186577
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.688715934753418,
      "learning_rate": 5.1987233856867447e-05,
      "loss": 2.9254,
      "step": 186578
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.9674553871154785,
      "learning_rate": 5.198493240929846e-05,
      "loss": 2.5972,
      "step": 186579
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0520331859588623,
      "learning_rate": 5.198263100784029e-05,
      "loss": 2.6783,
      "step": 186580
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.885021686553955,
      "learning_rate": 5.19803296524931e-05,
      "loss": 2.8708,
      "step": 186581
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.076437473297119,
      "learning_rate": 5.1978028343257526e-05,
      "loss": 2.7371,
      "step": 186582
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.409198760986328,
      "learning_rate": 5.197572708013387e-05,
      "loss": 2.7888,
      "step": 186583
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9988210201263428,
      "learning_rate": 5.197342586312262e-05,
      "loss": 3.096,
      "step": 186584
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.195848226547241,
      "learning_rate": 5.197112469222415e-05,
      "loss": 2.9619,
      "step": 186585
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8597004413604736,
      "learning_rate": 5.19688235674391e-05,
      "loss": 3.0894,
      "step": 186586
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.248853921890259,
      "learning_rate": 5.196652248876756e-05,
      "loss": 2.9247,
      "step": 186587
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.694080114364624,
      "learning_rate": 5.19642214562102e-05,
      "loss": 2.9474,
      "step": 186588
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.502859115600586,
      "learning_rate": 5.196192046976728e-05,
      "loss": 2.9737,
      "step": 186589
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4036037921905518,
      "learning_rate": 5.195961952943941e-05,
      "loss": 2.7974,
      "step": 186590
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.447190523147583,
      "learning_rate": 5.195731863522685e-05,
      "loss": 2.8822,
      "step": 186591
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5040764808654785,
      "learning_rate": 5.195501778713027e-05,
      "loss": 3.2257,
      "step": 186592
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.55116868019104,
      "learning_rate": 5.1952716985149765e-05,
      "loss": 2.8836,
      "step": 186593
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.045243263244629,
      "learning_rate": 5.195041622928604e-05,
      "loss": 2.881,
      "step": 186594
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.829071044921875,
      "learning_rate": 5.1948115519539326e-05,
      "loss": 3.2337,
      "step": 186595
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.353572130203247,
      "learning_rate": 5.194581485591018e-05,
      "loss": 3.0726,
      "step": 186596
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.9218554496765137,
      "learning_rate": 5.1943514238398954e-05,
      "loss": 2.9948,
      "step": 186597
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.706080675125122,
      "learning_rate": 5.1941213667006264e-05,
      "loss": 2.853,
      "step": 186598
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.85994029045105,
      "learning_rate": 5.193891314173222e-05,
      "loss": 3.0221,
      "step": 186599
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6193628311157227,
      "learning_rate": 5.193661266257748e-05,
      "loss": 2.9445,
      "step": 186600
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.660588026046753,
      "learning_rate": 5.193431222954232e-05,
      "loss": 3.0805,
      "step": 186601
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.5684523582458496,
      "learning_rate": 5.1932011842627367e-05,
      "loss": 2.9444,
      "step": 186602
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.9128785133361816,
      "learning_rate": 5.192971150183285e-05,
      "loss": 3.0417,
      "step": 186603
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4526984691619873,
      "learning_rate": 5.192741120715941e-05,
      "loss": 3.1676,
      "step": 186604
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.93738055229187,
      "learning_rate": 5.1925110958607206e-05,
      "loss": 2.7261,
      "step": 186605
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4242289066314697,
      "learning_rate": 5.192281075617691e-05,
      "loss": 2.8829,
      "step": 186606
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9219002723693848,
      "learning_rate": 5.1920510599868726e-05,
      "loss": 3.1594,
      "step": 186607
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.4726316928863525,
      "learning_rate": 5.191821048968331e-05,
      "loss": 3.002,
      "step": 186608
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.6227409839630127,
      "learning_rate": 5.19159104256209e-05,
      "loss": 2.9053,
      "step": 186609
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8618640899658203,
      "learning_rate": 5.191361040768216e-05,
      "loss": 2.7439,
      "step": 186610
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.29440975189209,
      "learning_rate": 5.1911310435867196e-05,
      "loss": 2.9794,
      "step": 186611
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.757615804672241,
      "learning_rate": 5.1909010510176694e-05,
      "loss": 2.8401,
      "step": 186612
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9444079399108887,
      "learning_rate": 5.19067106306109e-05,
      "loss": 3.0233,
      "step": 186613
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9990487098693848,
      "learning_rate": 5.190441079717041e-05,
      "loss": 3.0714,
      "step": 186614
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.163017988204956,
      "learning_rate": 5.1902111009855495e-05,
      "loss": 2.5389,
      "step": 186615
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7749857902526855,
      "learning_rate": 5.189981126866685e-05,
      "loss": 2.8789,
      "step": 186616
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9110867977142334,
      "learning_rate": 5.18975115736045e-05,
      "loss": 3.0516,
      "step": 186617
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5496888160705566,
      "learning_rate": 5.189521192466919e-05,
      "loss": 2.8927,
      "step": 186618
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1402804851531982,
      "learning_rate": 5.189291232186118e-05,
      "loss": 2.9449,
      "step": 186619
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.87699818611145,
      "learning_rate": 5.189061276518104e-05,
      "loss": 3.1251,
      "step": 186620
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.895721673965454,
      "learning_rate": 5.188831325462903e-05,
      "loss": 3.1147,
      "step": 186621
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.992111921310425,
      "learning_rate": 5.188601379020579e-05,
      "loss": 2.994,
      "step": 186622
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.002928256988525,
      "learning_rate": 5.1883714371911524e-05,
      "loss": 3.0697,
      "step": 186623
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7582828998565674,
      "learning_rate": 5.188141499974682e-05,
      "loss": 2.9966,
      "step": 186624
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9328291416168213,
      "learning_rate": 5.1879115673711945e-05,
      "loss": 2.93,
      "step": 186625
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6926724910736084,
      "learning_rate": 5.1876816393807506e-05,
      "loss": 3.1144,
      "step": 186626
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7630207538604736,
      "learning_rate": 5.18745171600338e-05,
      "loss": 3.1109,
      "step": 186627
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9338269233703613,
      "learning_rate": 5.187221797239136e-05,
      "loss": 3.0208,
      "step": 186628
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4672696590423584,
      "learning_rate": 5.186991883088055e-05,
      "loss": 2.9598,
      "step": 186629
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.68453049659729,
      "learning_rate": 5.186761973550184e-05,
      "loss": 3.0499,
      "step": 186630
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7912302017211914,
      "learning_rate": 5.186532068625553e-05,
      "loss": 2.9362,
      "step": 186631
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.691960096359253,
      "learning_rate": 5.1863021683142215e-05,
      "loss": 2.85,
      "step": 186632
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1576015949249268,
      "learning_rate": 5.186072272616216e-05,
      "loss": 2.9982,
      "step": 186633
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.1204488277435303,
      "learning_rate": 5.1858423815315976e-05,
      "loss": 2.848,
      "step": 186634
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.885399341583252,
      "learning_rate": 5.185612495060398e-05,
      "loss": 3.0608,
      "step": 186635
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9020001888275146,
      "learning_rate": 5.185382613202662e-05,
      "loss": 3.1152,
      "step": 186636
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0858397483825684,
      "learning_rate": 5.185152735958425e-05,
      "loss": 2.8539,
      "step": 186637
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.908679723739624,
      "learning_rate": 5.1849228633277415e-05,
      "loss": 2.9657,
      "step": 186638
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.603577136993408,
      "learning_rate": 5.184692995310644e-05,
      "loss": 2.901,
      "step": 186639
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.494497299194336,
      "learning_rate": 5.18446313190719e-05,
      "loss": 2.8957,
      "step": 186640
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.3356106281280518,
      "learning_rate": 5.184233273117408e-05,
      "loss": 3.0592,
      "step": 186641
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.255070209503174,
      "learning_rate": 5.184003418941342e-05,
      "loss": 3.0559,
      "step": 186642
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.857818603515625,
      "learning_rate": 5.183773569379042e-05,
      "loss": 2.8212,
      "step": 186643
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6157031059265137,
      "learning_rate": 5.183543724430551e-05,
      "loss": 2.775,
      "step": 186644
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0182628631591797,
      "learning_rate": 5.183313884095896e-05,
      "loss": 2.8948,
      "step": 186645
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.826770544052124,
      "learning_rate": 5.1830840483751414e-05,
      "loss": 3.0934,
      "step": 186646
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.734792947769165,
      "learning_rate": 5.1828542172683186e-05,
      "loss": 2.8297,
      "step": 186647
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.2148003578186035,
      "learning_rate": 5.1826243907754686e-05,
      "loss": 3.2844,
      "step": 186648
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9083824157714844,
      "learning_rate": 5.182394568896641e-05,
      "loss": 2.9138,
      "step": 186649
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4131109714508057,
      "learning_rate": 5.182164751631865e-05,
      "loss": 3.2257,
      "step": 186650
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0132975578308105,
      "learning_rate": 5.1819349389812024e-05,
      "loss": 2.9988,
      "step": 186651
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8232288360595703,
      "learning_rate": 5.181705130944689e-05,
      "loss": 2.8761,
      "step": 186652
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8920695781707764,
      "learning_rate": 5.181475327522364e-05,
      "loss": 3.0234,
      "step": 186653
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0589022636413574,
      "learning_rate": 5.181245528714262e-05,
      "loss": 2.9328,
      "step": 186654
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8608033657073975,
      "learning_rate": 5.181015734520442e-05,
      "loss": 2.903,
      "step": 186655
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4186277389526367,
      "learning_rate": 5.180785944940934e-05,
      "loss": 2.9539,
      "step": 186656
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.988271951675415,
      "learning_rate": 5.180556159975795e-05,
      "loss": 2.9469,
      "step": 186657
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.4038007259368896,
      "learning_rate": 5.180326379625058e-05,
      "loss": 3.0143,
      "step": 186658
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.692518949508667,
      "learning_rate": 5.1800966038887636e-05,
      "loss": 2.8582,
      "step": 186659
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.7654776573181152,
      "learning_rate": 5.179866832766955e-05,
      "loss": 3.006,
      "step": 186660
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.666907787322998,
      "learning_rate": 5.1796370662596845e-05,
      "loss": 2.9936,
      "step": 186661
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8792881965637207,
      "learning_rate": 5.179407304366979e-05,
      "loss": 3.0225,
      "step": 186662
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1252336502075195,
      "learning_rate": 5.1791775470888966e-05,
      "loss": 2.9712,
      "step": 186663
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.358266830444336,
      "learning_rate": 5.1789477944254684e-05,
      "loss": 2.8514,
      "step": 186664
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.736147165298462,
      "learning_rate": 5.1787180463767564e-05,
      "loss": 2.9244,
      "step": 186665
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.78875994682312,
      "learning_rate": 5.178488302942773e-05,
      "loss": 2.9282,
      "step": 186666
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.598411798477173,
      "learning_rate": 5.178258564123587e-05,
      "loss": 2.8753,
      "step": 186667
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0466227531433105,
      "learning_rate": 5.1780288299192207e-05,
      "loss": 3.099,
      "step": 186668
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.733715534210205,
      "learning_rate": 5.177799100329739e-05,
      "loss": 2.666,
      "step": 186669
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1110880374908447,
      "learning_rate": 5.177569375355163e-05,
      "loss": 2.8428,
      "step": 186670
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8010895252227783,
      "learning_rate": 5.177339654995561e-05,
      "loss": 2.8114,
      "step": 186671
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6069040298461914,
      "learning_rate": 5.177109939250945e-05,
      "loss": 2.8848,
      "step": 186672
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0854687690734863,
      "learning_rate": 5.1768802281213804e-05,
      "loss": 2.8287,
      "step": 186673
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.3882956504821777,
      "learning_rate": 5.1766505216068943e-05,
      "loss": 3.0259,
      "step": 186674
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4501194953918457,
      "learning_rate": 5.1764208197075466e-05,
      "loss": 2.7688,
      "step": 186675
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.771733522415161,
      "learning_rate": 5.1761911224233596e-05,
      "loss": 2.8423,
      "step": 186676
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.9732489585876465,
      "learning_rate": 5.175961429754405e-05,
      "loss": 2.8575,
      "step": 186677
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.976478338241577,
      "learning_rate": 5.175731741700695e-05,
      "loss": 2.9458,
      "step": 186678
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.815178155899048,
      "learning_rate": 5.1755020582622895e-05,
      "loss": 3.0621,
      "step": 186679
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9367644786834717,
      "learning_rate": 5.1752723794392194e-05,
      "loss": 3.026,
      "step": 186680
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5309879779815674,
      "learning_rate": 5.175042705231544e-05,
      "loss": 2.874,
      "step": 186681
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.9437806606292725,
      "learning_rate": 5.1748130356392855e-05,
      "loss": 2.9028,
      "step": 186682
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7461042404174805,
      "learning_rate": 5.174583370662516e-05,
      "loss": 2.9789,
      "step": 186683
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.4927079677581787,
      "learning_rate": 5.1743537103012445e-05,
      "loss": 3.059,
      "step": 186684
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.41715407371521,
      "learning_rate": 5.1741240545555376e-05,
      "loss": 3.051,
      "step": 186685
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8106753826141357,
      "learning_rate": 5.1738944034254216e-05,
      "loss": 2.9386,
      "step": 186686
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0655453205108643,
      "learning_rate": 5.1736647569109535e-05,
      "loss": 2.6329,
      "step": 186687
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.5813839435577393,
      "learning_rate": 5.173435115012163e-05,
      "loss": 2.8825,
      "step": 186688
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7896647453308105,
      "learning_rate": 5.173205477729114e-05,
      "loss": 2.9022,
      "step": 186689
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.02081298828125,
      "learning_rate": 5.17297584506182e-05,
      "loss": 2.9732,
      "step": 186690
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.3971455097198486,
      "learning_rate": 5.172746217010346e-05,
      "loss": 2.9502,
      "step": 186691
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8132522106170654,
      "learning_rate": 5.17251659357472e-05,
      "loss": 2.9964,
      "step": 186692
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9086406230926514,
      "learning_rate": 5.172286974755002e-05,
      "loss": 2.8903,
      "step": 186693
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.9501776695251465,
      "learning_rate": 5.172057360551215e-05,
      "loss": 3.0669,
      "step": 186694
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7380924224853516,
      "learning_rate": 5.171827750963425e-05,
      "loss": 2.9719,
      "step": 186695
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.6707987785339355,
      "learning_rate": 5.1715981459916467e-05,
      "loss": 3.0083,
      "step": 186696
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.314821720123291,
      "learning_rate": 5.171368545635942e-05,
      "loss": 3.0499,
      "step": 186697
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.2548322677612305,
      "learning_rate": 5.171138949896345e-05,
      "loss": 2.9441,
      "step": 186698
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.2862374782562256,
      "learning_rate": 5.1709093587729054e-05,
      "loss": 2.7353,
      "step": 186699
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.275826930999756,
      "learning_rate": 5.1706797722656604e-05,
      "loss": 2.7909,
      "step": 186700
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.499115943908691,
      "learning_rate": 5.170450190374659e-05,
      "loss": 2.8885,
      "step": 186701
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.3811144828796387,
      "learning_rate": 5.170220613099938e-05,
      "loss": 2.9493,
      "step": 186702
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.177142381668091,
      "learning_rate": 5.1699910404415444e-05,
      "loss": 2.7756,
      "step": 186703
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7650210857391357,
      "learning_rate": 5.1697614723995085e-05,
      "loss": 3.115,
      "step": 186704
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.810497999191284,
      "learning_rate": 5.169531908973893e-05,
      "loss": 2.5792,
      "step": 186705
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.704620122909546,
      "learning_rate": 5.169302350164721e-05,
      "loss": 2.9351,
      "step": 186706
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5398447513580322,
      "learning_rate": 5.16907279597205e-05,
      "loss": 2.7789,
      "step": 186707
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1536412239074707,
      "learning_rate": 5.168843246395919e-05,
      "loss": 2.8633,
      "step": 186708
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.45948600769043,
      "learning_rate": 5.1686137014363694e-05,
      "loss": 2.6669,
      "step": 186709
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.4883322715759277,
      "learning_rate": 5.1683841610934365e-05,
      "loss": 2.9323,
      "step": 186710
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.016000747680664,
      "learning_rate": 5.168154625367173e-05,
      "loss": 3.1184,
      "step": 186711
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.3982367515563965,
      "learning_rate": 5.1679250942576144e-05,
      "loss": 2.9986,
      "step": 186712
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5810046195983887,
      "learning_rate": 5.167695567764816e-05,
      "loss": 3.0083,
      "step": 186713
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.7573773860931396,
      "learning_rate": 5.1674660458888074e-05,
      "loss": 3.0347,
      "step": 186714
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8391380310058594,
      "learning_rate": 5.167236528629639e-05,
      "loss": 2.7301,
      "step": 186715
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.4028825759887695,
      "learning_rate": 5.167007015987341e-05,
      "loss": 3.0308,
      "step": 186716
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6848220825195312,
      "learning_rate": 5.1667775079619733e-05,
      "loss": 2.9698,
      "step": 186717
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.079688549041748,
      "learning_rate": 5.1665480045535624e-05,
      "loss": 2.9944,
      "step": 186718
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7045998573303223,
      "learning_rate": 5.166318505762168e-05,
      "loss": 2.9064,
      "step": 186719
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5907130241394043,
      "learning_rate": 5.166089011587824e-05,
      "loss": 2.7483,
      "step": 186720
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9473094940185547,
      "learning_rate": 5.16585952203057e-05,
      "loss": 3.0474,
      "step": 186721
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.61409592628479,
      "learning_rate": 5.165630037090446e-05,
      "loss": 2.8677,
      "step": 186722
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.927539110183716,
      "learning_rate": 5.165400556767508e-05,
      "loss": 2.9689,
      "step": 186723
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0714027881622314,
      "learning_rate": 5.165171081061781e-05,
      "loss": 3.0377,
      "step": 186724
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.6167049407958984,
      "learning_rate": 5.164941609973326e-05,
      "loss": 2.8235,
      "step": 186725
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8918323516845703,
      "learning_rate": 5.164712143502179e-05,
      "loss": 3.1105,
      "step": 186726
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.237868309020996,
      "learning_rate": 5.164482681648371e-05,
      "loss": 3.1188,
      "step": 186727
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8242008686065674,
      "learning_rate": 5.1642532244119625e-05,
      "loss": 2.8972,
      "step": 186728
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.7147960662841797,
      "learning_rate": 5.164023771792991e-05,
      "loss": 2.7492,
      "step": 186729
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.478760004043579,
      "learning_rate": 5.163794323791486e-05,
      "loss": 2.6606,
      "step": 186730
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.3023312091827393,
      "learning_rate": 5.163564880407507e-05,
      "loss": 3.1769,
      "step": 186731
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6196208000183105,
      "learning_rate": 5.163335441641091e-05,
      "loss": 2.9712,
      "step": 186732
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1990416049957275,
      "learning_rate": 5.1631060074922746e-05,
      "loss": 3.0011,
      "step": 186733
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.90038537979126,
      "learning_rate": 5.162876577961112e-05,
      "loss": 2.8763,
      "step": 186734
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.219715118408203,
      "learning_rate": 5.1626471530476276e-05,
      "loss": 3.0484,
      "step": 186735
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1035685539245605,
      "learning_rate": 5.16241773275189e-05,
      "loss": 2.7911,
      "step": 186736
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.21337890625,
      "learning_rate": 5.162188317073922e-05,
      "loss": 2.9714,
      "step": 186737
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.020575523376465,
      "learning_rate": 5.161958906013777e-05,
      "loss": 2.943,
      "step": 186738
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.687350273132324,
      "learning_rate": 5.1617294995714844e-05,
      "loss": 2.8318,
      "step": 186739
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8545520305633545,
      "learning_rate": 5.161500097747101e-05,
      "loss": 3.0943,
      "step": 186740
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.46177077293396,
      "learning_rate": 5.161270700540654e-05,
      "loss": 2.9422,
      "step": 186741
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.706566572189331,
      "learning_rate": 5.161041307952206e-05,
      "loss": 2.977,
      "step": 186742
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9073829650878906,
      "learning_rate": 5.1608119199817874e-05,
      "loss": 2.9559,
      "step": 186743
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.694262742996216,
      "learning_rate": 5.160582536629445e-05,
      "loss": 2.9223,
      "step": 186744
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.742873430252075,
      "learning_rate": 5.1603531578952115e-05,
      "loss": 2.935,
      "step": 186745
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6657214164733887,
      "learning_rate": 5.160123783779144e-05,
      "loss": 3.0474,
      "step": 186746
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6012630462646484,
      "learning_rate": 5.159894414281273e-05,
      "loss": 3.1182,
      "step": 186747
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6100780963897705,
      "learning_rate": 5.15966504940165e-05,
      "loss": 3.0162,
      "step": 186748
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.8097691535949707,
      "learning_rate": 5.1594356891403096e-05,
      "loss": 2.877,
      "step": 186749
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.581089496612549,
      "learning_rate": 5.159206333497312e-05,
      "loss": 2.5427,
      "step": 186750
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.641448974609375,
      "learning_rate": 5.1589769824726735e-05,
      "loss": 2.9916,
      "step": 186751
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.2673044204711914,
      "learning_rate": 5.158747636066457e-05,
      "loss": 2.8859,
      "step": 186752
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0394344329833984,
      "learning_rate": 5.15851829427869e-05,
      "loss": 2.8877,
      "step": 186753
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.105543613433838,
      "learning_rate": 5.158288957109431e-05,
      "loss": 2.9968,
      "step": 186754
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0000693798065186,
      "learning_rate": 5.158059624558708e-05,
      "loss": 2.9134,
      "step": 186755
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6870055198669434,
      "learning_rate": 5.157830296626587e-05,
      "loss": 2.9617,
      "step": 186756
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8299710750579834,
      "learning_rate": 5.157600973313079e-05,
      "loss": 2.9543,
      "step": 186757
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4538798332214355,
      "learning_rate": 5.1573716546182485e-05,
      "loss": 3.2477,
      "step": 186758
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.46364426612854,
      "learning_rate": 5.157142340542124e-05,
      "loss": 2.8037,
      "step": 186759
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.3092246055603027,
      "learning_rate": 5.1569130310847616e-05,
      "loss": 2.9513,
      "step": 186760
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.103001117706299,
      "learning_rate": 5.1566837262461944e-05,
      "loss": 2.9838,
      "step": 186761
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.000120639801025,
      "learning_rate": 5.1564544260264796e-05,
      "loss": 2.8489,
      "step": 186762
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6744015216827393,
      "learning_rate": 5.156225130425636e-05,
      "loss": 3.0095,
      "step": 186763
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.3140408992767334,
      "learning_rate": 5.155995839443725e-05,
      "loss": 2.9512,
      "step": 186764
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.60776948928833,
      "learning_rate": 5.155766553080779e-05,
      "loss": 2.9125,
      "step": 186765
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0617644786834717,
      "learning_rate": 5.155537271336851e-05,
      "loss": 2.961,
      "step": 186766
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.791426420211792,
      "learning_rate": 5.155307994211969e-05,
      "loss": 3.1638,
      "step": 186767
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.6023755073547363,
      "learning_rate": 5.1550787217061916e-05,
      "loss": 2.9716,
      "step": 186768
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0915608406066895,
      "learning_rate": 5.154849453819556e-05,
      "loss": 3.1807,
      "step": 186769
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.587888479232788,
      "learning_rate": 5.154620190552102e-05,
      "loss": 3.0586,
      "step": 186770
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6891252994537354,
      "learning_rate": 5.1543909319038637e-05,
      "loss": 3.1162,
      "step": 186771
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.484476089477539,
      "learning_rate": 5.1541616778749025e-05,
      "loss": 2.8903,
      "step": 186772
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.5410101413726807,
      "learning_rate": 5.153932428465244e-05,
      "loss": 3.0367,
      "step": 186773
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.234468460083008,
      "learning_rate": 5.153703183674946e-05,
      "loss": 2.8627,
      "step": 186774
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.708280086517334,
      "learning_rate": 5.153473943504044e-05,
      "loss": 2.9607,
      "step": 186775
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.855583667755127,
      "learning_rate": 5.15324470795258e-05,
      "loss": 2.9225,
      "step": 186776
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7230777740478516,
      "learning_rate": 5.1530154770205864e-05,
      "loss": 3.0496,
      "step": 186777
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.173208236694336,
      "learning_rate": 5.152786250708128e-05,
      "loss": 3.0221,
      "step": 186778
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6981594562530518,
      "learning_rate": 5.152557029015225e-05,
      "loss": 2.8771,
      "step": 186779
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7919301986694336,
      "learning_rate": 5.1523278119419396e-05,
      "loss": 3.0495,
      "step": 186780
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.113393783569336,
      "learning_rate": 5.152098599488306e-05,
      "loss": 2.8496,
      "step": 186781
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.431440830230713,
      "learning_rate": 5.151869391654366e-05,
      "loss": 3.1676,
      "step": 186782
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.05922269821167,
      "learning_rate": 5.151640188440155e-05,
      "loss": 2.9113,
      "step": 186783
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.114480972290039,
      "learning_rate": 5.1514109898457314e-05,
      "loss": 3.1089,
      "step": 186784
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5601000785827637,
      "learning_rate": 5.151181795871122e-05,
      "loss": 2.7749,
      "step": 186785
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.4842329025268555,
      "learning_rate": 5.150952606516384e-05,
      "loss": 3.0193,
      "step": 186786
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6075170040130615,
      "learning_rate": 5.1507234217815505e-05,
      "loss": 2.8118,
      "step": 186787
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.674081563949585,
      "learning_rate": 5.1504942416666685e-05,
      "loss": 2.8681,
      "step": 186788
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.006121635437012,
      "learning_rate": 5.150265066171774e-05,
      "loss": 2.8727,
      "step": 186789
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6452393531799316,
      "learning_rate": 5.150035895296917e-05,
      "loss": 2.7853,
      "step": 186790
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9999210834503174,
      "learning_rate": 5.1498067290421344e-05,
      "loss": 3.1103,
      "step": 186791
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4715144634246826,
      "learning_rate": 5.1495775674074756e-05,
      "loss": 2.7157,
      "step": 186792
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.678847312927246,
      "learning_rate": 5.149348410392982e-05,
      "loss": 2.9775,
      "step": 186793
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.151484966278076,
      "learning_rate": 5.1491192579986916e-05,
      "loss": 2.9548,
      "step": 186794
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.4303112030029297,
      "learning_rate": 5.148890110224643e-05,
      "loss": 3.06,
      "step": 186795
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4919238090515137,
      "learning_rate": 5.148660967070891e-05,
      "loss": 3.0047,
      "step": 186796
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.137057065963745,
      "learning_rate": 5.1484318285374636e-05,
      "loss": 3.0216,
      "step": 186797
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.328172206878662,
      "learning_rate": 5.1482026946244206e-05,
      "loss": 3.0185,
      "step": 186798
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.702967405319214,
      "learning_rate": 5.147973565331798e-05,
      "loss": 2.8289,
      "step": 186799
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8469109535217285,
      "learning_rate": 5.147744440659632e-05,
      "loss": 3.1121,
      "step": 186800
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.572760820388794,
      "learning_rate": 5.1475153206079677e-05,
      "loss": 2.9143,
      "step": 186801
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9872000217437744,
      "learning_rate": 5.1472862051768535e-05,
      "loss": 2.8819,
      "step": 186802
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.2771739959716797,
      "learning_rate": 5.1470570943663204e-05,
      "loss": 2.8435,
      "step": 186803
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.033021926879883,
      "learning_rate": 5.146827988176424e-05,
      "loss": 2.779,
      "step": 186804
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.658597946166992,
      "learning_rate": 5.146598886607205e-05,
      "loss": 2.9868,
      "step": 186805
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.782956123352051,
      "learning_rate": 5.1463697896586995e-05,
      "loss": 2.9032,
      "step": 186806
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.0228729248046875,
      "learning_rate": 5.1461406973309484e-05,
      "loss": 2.8945,
      "step": 186807
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0208981037139893,
      "learning_rate": 5.145911609624004e-05,
      "loss": 3.0862,
      "step": 186808
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.115477561950684,
      "learning_rate": 5.1456825265378966e-05,
      "loss": 2.7863,
      "step": 186809
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4707865715026855,
      "learning_rate": 5.145453448072686e-05,
      "loss": 3.0949,
      "step": 186810
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.655289649963379,
      "learning_rate": 5.145224374228403e-05,
      "loss": 2.9772,
      "step": 186811
    },
    {
      "epoch": 2.43,
      "grad_norm": 5.800205230712891,
      "learning_rate": 5.1449953050050826e-05,
      "loss": 2.8871,
      "step": 186812
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.5780529975891113,
      "learning_rate": 5.1447662404027867e-05,
      "loss": 2.9535,
      "step": 186813
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8125271797180176,
      "learning_rate": 5.1445371804215474e-05,
      "loss": 2.8001,
      "step": 186814
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.8829779624938965,
      "learning_rate": 5.144308125061401e-05,
      "loss": 2.935,
      "step": 186815
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1789305210113525,
      "learning_rate": 5.144079074322403e-05,
      "loss": 3.0782,
      "step": 186816
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.109722137451172,
      "learning_rate": 5.1438500282045904e-05,
      "loss": 2.9013,
      "step": 186817
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.969128131866455,
      "learning_rate": 5.143620986707998e-05,
      "loss": 2.8849,
      "step": 186818
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0290915966033936,
      "learning_rate": 5.1433919498326824e-05,
      "loss": 3.0618,
      "step": 186819
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.188835144042969,
      "learning_rate": 5.1431629175786804e-05,
      "loss": 3.0478,
      "step": 186820
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.769902467727661,
      "learning_rate": 5.142933889946025e-05,
      "loss": 2.8847,
      "step": 186821
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8034825325012207,
      "learning_rate": 5.142704866934776e-05,
      "loss": 2.9206,
      "step": 186822
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.6416170597076416,
      "learning_rate": 5.142475848544967e-05,
      "loss": 2.9604,
      "step": 186823
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0071446895599365,
      "learning_rate": 5.142246834776634e-05,
      "loss": 3.3567,
      "step": 186824
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.512878894805908,
      "learning_rate": 5.142017825629835e-05,
      "loss": 2.9431,
      "step": 186825
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.583143472671509,
      "learning_rate": 5.141788821104595e-05,
      "loss": 2.9947,
      "step": 186826
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.3314902782440186,
      "learning_rate": 5.141559821200972e-05,
      "loss": 3.0147,
      "step": 186827
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0311710834503174,
      "learning_rate": 5.141330825919002e-05,
      "loss": 2.9425,
      "step": 186828
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.773191213607788,
      "learning_rate": 5.141101835258732e-05,
      "loss": 2.787,
      "step": 186829
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.5984435081481934,
      "learning_rate": 5.140872849220191e-05,
      "loss": 2.9843,
      "step": 186830
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.770989418029785,
      "learning_rate": 5.140643867803437e-05,
      "loss": 2.8964,
      "step": 186831
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.437673330307007,
      "learning_rate": 5.1404148910084985e-05,
      "loss": 2.5789,
      "step": 186832
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.032278537750244,
      "learning_rate": 5.1401859188354365e-05,
      "loss": 3.0703,
      "step": 186833
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.7743217945098877,
      "learning_rate": 5.139956951284274e-05,
      "loss": 2.9898,
      "step": 186834
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7565269470214844,
      "learning_rate": 5.139727988355071e-05,
      "loss": 2.9451,
      "step": 186835
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5860464572906494,
      "learning_rate": 5.1394990300478613e-05,
      "loss": 3.1263,
      "step": 186836
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7301018238067627,
      "learning_rate": 5.1392700763626873e-05,
      "loss": 2.8162,
      "step": 186837
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.732090473175049,
      "learning_rate": 5.139041127299586e-05,
      "loss": 2.839,
      "step": 186838
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.783412456512451,
      "learning_rate": 5.1388121828586136e-05,
      "loss": 2.9131,
      "step": 186839
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7397024631500244,
      "learning_rate": 5.138583243039798e-05,
      "loss": 3.0576,
      "step": 186840
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.4838573932647705,
      "learning_rate": 5.138354307843194e-05,
      "loss": 2.6946,
      "step": 186841
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.325839042663574,
      "learning_rate": 5.138125377268844e-05,
      "loss": 3.0191,
      "step": 186842
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.908048152923584,
      "learning_rate": 5.137896451316782e-05,
      "loss": 2.7588,
      "step": 186843
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.029383897781372,
      "learning_rate": 5.1376675299870464e-05,
      "loss": 3.2157,
      "step": 186844
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4010207653045654,
      "learning_rate": 5.1374386132796994e-05,
      "loss": 2.9598,
      "step": 186845
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.133450746536255,
      "learning_rate": 5.137209701194759e-05,
      "loss": 2.9471,
      "step": 186846
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.863253355026245,
      "learning_rate": 5.1369807937322936e-05,
      "loss": 2.8806,
      "step": 186847
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.374850273132324,
      "learning_rate": 5.1367518908923275e-05,
      "loss": 2.8567,
      "step": 186848
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0125510692596436,
      "learning_rate": 5.136522992674914e-05,
      "loss": 2.9974,
      "step": 186849
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7071175575256348,
      "learning_rate": 5.1362940990800797e-05,
      "loss": 2.9265,
      "step": 186850
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.8094465732574463,
      "learning_rate": 5.136065210107883e-05,
      "loss": 2.6484,
      "step": 186851
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.8556742668151855,
      "learning_rate": 5.135836325758357e-05,
      "loss": 2.9389,
      "step": 186852
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.733154773712158,
      "learning_rate": 5.135607446031555e-05,
      "loss": 2.7838,
      "step": 186853
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6507883071899414,
      "learning_rate": 5.135378570927513e-05,
      "loss": 2.9729,
      "step": 186854
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.684688091278076,
      "learning_rate": 5.135149700446273e-05,
      "loss": 2.9883,
      "step": 186855
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.853238105773926,
      "learning_rate": 5.134920834587871e-05,
      "loss": 2.9043,
      "step": 186856
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7020373344421387,
      "learning_rate": 5.134691973352365e-05,
      "loss": 2.7662,
      "step": 186857
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7755980491638184,
      "learning_rate": 5.1344631167397774e-05,
      "loss": 3.0993,
      "step": 186858
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.7192821502685547,
      "learning_rate": 5.134234264750175e-05,
      "loss": 3.0532,
      "step": 186859
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9221928119659424,
      "learning_rate": 5.1340054173835854e-05,
      "loss": 2.9488,
      "step": 186860
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0298125743865967,
      "learning_rate": 5.133776574640054e-05,
      "loss": 2.9327,
      "step": 186861
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0558290481567383,
      "learning_rate": 5.133547736519614e-05,
      "loss": 2.8044,
      "step": 186862
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.317859172821045,
      "learning_rate": 5.133318903022326e-05,
      "loss": 3.0459,
      "step": 186863
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8354477882385254,
      "learning_rate": 5.133090074148217e-05,
      "loss": 3.0417,
      "step": 186864
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.6424529552459717,
      "learning_rate": 5.132861249897343e-05,
      "loss": 3.0649,
      "step": 186865
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1971442699432373,
      "learning_rate": 5.132632430269738e-05,
      "loss": 2.9389,
      "step": 186866
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9244821071624756,
      "learning_rate": 5.132403615265447e-05,
      "loss": 2.9731,
      "step": 186867
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.856261968612671,
      "learning_rate": 5.132174804884505e-05,
      "loss": 3.0252,
      "step": 186868
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.839247703552246,
      "learning_rate": 5.131945999126967e-05,
      "loss": 3.0189,
      "step": 186869
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.2190682888031006,
      "learning_rate": 5.131717197992862e-05,
      "loss": 2.8654,
      "step": 186870
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8685553073883057,
      "learning_rate": 5.1314884014822475e-05,
      "loss": 3.0582,
      "step": 186871
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.840341329574585,
      "learning_rate": 5.131259609595162e-05,
      "loss": 2.7717,
      "step": 186872
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4464473724365234,
      "learning_rate": 5.131030822331641e-05,
      "loss": 3.0522,
      "step": 186873
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.2593367099761963,
      "learning_rate": 5.1308020396917246e-05,
      "loss": 2.7719,
      "step": 186874
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.872887134552002,
      "learning_rate": 5.13057326167547e-05,
      "loss": 2.7108,
      "step": 186875
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5191707611083984,
      "learning_rate": 5.1303444882829e-05,
      "loss": 2.7298,
      "step": 186876
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0412285327911377,
      "learning_rate": 5.130115719514082e-05,
      "loss": 3.1017,
      "step": 186877
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1640894412994385,
      "learning_rate": 5.1298869553690414e-05,
      "loss": 3.0812,
      "step": 186878
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.612480878829956,
      "learning_rate": 5.1296581958478225e-05,
      "loss": 2.9504,
      "step": 186879
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.28525972366333,
      "learning_rate": 5.129429440950464e-05,
      "loss": 3.1036,
      "step": 186880
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.6156294345855713,
      "learning_rate": 5.129200690677021e-05,
      "loss": 2.8172,
      "step": 186881
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.888490200042725,
      "learning_rate": 5.1289719450275223e-05,
      "loss": 2.9199,
      "step": 186882
    },
    {
      "epoch": 2.43,
      "grad_norm": 5.785852432250977,
      "learning_rate": 5.128743204002025e-05,
      "loss": 2.9527,
      "step": 186883
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.478081226348877,
      "learning_rate": 5.1285144676005653e-05,
      "loss": 2.74,
      "step": 186884
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.58417010307312,
      "learning_rate": 5.1282857358231835e-05,
      "loss": 3.1301,
      "step": 186885
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.889653205871582,
      "learning_rate": 5.1280570086699124e-05,
      "loss": 2.9318,
      "step": 186886
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.5032687187194824,
      "learning_rate": 5.127828286140816e-05,
      "loss": 2.6948,
      "step": 186887
    },
    {
      "epoch": 2.43,
      "grad_norm": 5.181668758392334,
      "learning_rate": 5.127599568235917e-05,
      "loss": 2.9738,
      "step": 186888
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.414245843887329,
      "learning_rate": 5.1273708549552725e-05,
      "loss": 3.0373,
      "step": 186889
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6922078132629395,
      "learning_rate": 5.1271421462989194e-05,
      "loss": 2.7908,
      "step": 186890
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5904953479766846,
      "learning_rate": 5.1269134422669034e-05,
      "loss": 2.9411,
      "step": 186891
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.923539400100708,
      "learning_rate": 5.126684742859255e-05,
      "loss": 2.9591,
      "step": 186892
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.9607059955596924,
      "learning_rate": 5.126456048076031e-05,
      "loss": 3.0086,
      "step": 186893
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.069598913192749,
      "learning_rate": 5.1262273579172606e-05,
      "loss": 2.9426,
      "step": 186894
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.3226969242095947,
      "learning_rate": 5.125998672383005e-05,
      "loss": 2.9399,
      "step": 186895
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.615818738937378,
      "learning_rate": 5.125769991473293e-05,
      "loss": 2.7957,
      "step": 186896
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7377371788024902,
      "learning_rate": 5.125541315188162e-05,
      "loss": 2.7718,
      "step": 186897
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.980696201324463,
      "learning_rate": 5.1253126435276716e-05,
      "loss": 2.8914,
      "step": 186898
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.546610116958618,
      "learning_rate": 5.125083976491855e-05,
      "loss": 3.0038,
      "step": 186899
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.6118111610412598,
      "learning_rate": 5.124855314080746e-05,
      "loss": 3.0402,
      "step": 186900
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.4497337341308594,
      "learning_rate": 5.124626656294404e-05,
      "loss": 2.9453,
      "step": 186901
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0315423011779785,
      "learning_rate": 5.124398003132859e-05,
      "loss": 2.9017,
      "step": 186902
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.085236072540283,
      "learning_rate": 5.1241693545961614e-05,
      "loss": 3.0884,
      "step": 186903
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.035756826400757,
      "learning_rate": 5.123940710684351e-05,
      "loss": 3.014,
      "step": 186904
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9142115116119385,
      "learning_rate": 5.123712071397471e-05,
      "loss": 3.0711,
      "step": 186905
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.3411176204681396,
      "learning_rate": 5.1234834367355546e-05,
      "loss": 2.9749,
      "step": 186906
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.162642002105713,
      "learning_rate": 5.123254806698659e-05,
      "loss": 2.946,
      "step": 186907
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.3953588008880615,
      "learning_rate": 5.1230261812868134e-05,
      "loss": 2.8039,
      "step": 186908
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.925804615020752,
      "learning_rate": 5.1227975605000746e-05,
      "loss": 2.9788,
      "step": 186909
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4977304935455322,
      "learning_rate": 5.1225689443384766e-05,
      "loss": 2.8151,
      "step": 186910
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9428205490112305,
      "learning_rate": 5.122340332802055e-05,
      "loss": 2.9816,
      "step": 186911
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.595214605331421,
      "learning_rate": 5.1221117258908675e-05,
      "loss": 3.0496,
      "step": 186912
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.972530841827393,
      "learning_rate": 5.1218831236049464e-05,
      "loss": 2.8611,
      "step": 186913
    },
    {
      "epoch": 2.43,
      "grad_norm": 5.2831950187683105,
      "learning_rate": 5.121654525944332e-05,
      "loss": 2.7929,
      "step": 186914
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.235595703125,
      "learning_rate": 5.121425932909081e-05,
      "loss": 3.1585,
      "step": 186915
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.219242572784424,
      "learning_rate": 5.121197344499224e-05,
      "loss": 3.0558,
      "step": 186916
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4836299419403076,
      "learning_rate": 5.1209687607147995e-05,
      "loss": 2.8945,
      "step": 186917
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1578710079193115,
      "learning_rate": 5.120740181555862e-05,
      "loss": 2.8022,
      "step": 186918
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.894829511642456,
      "learning_rate": 5.120511607022452e-05,
      "loss": 2.8808,
      "step": 186919
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.2188873291015625,
      "learning_rate": 5.120283037114598e-05,
      "loss": 2.997,
      "step": 186920
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.650296688079834,
      "learning_rate": 5.1200544718323634e-05,
      "loss": 2.808,
      "step": 186921
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.681734800338745,
      "learning_rate": 5.1198259111757786e-05,
      "loss": 2.8563,
      "step": 186922
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8064982891082764,
      "learning_rate": 5.1195973551448774e-05,
      "loss": 3.0726,
      "step": 186923
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5024166107177734,
      "learning_rate": 5.119368803739726e-05,
      "loss": 3.0592,
      "step": 186924
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.1121153831481934,
      "learning_rate": 5.119140256960341e-05,
      "loss": 3.0464,
      "step": 186925
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9357786178588867,
      "learning_rate": 5.118911714806789e-05,
      "loss": 3.0745,
      "step": 186926
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7902164459228516,
      "learning_rate": 5.1186831772791005e-05,
      "loss": 2.9158,
      "step": 186927
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.8534677028656006,
      "learning_rate": 5.118454644377314e-05,
      "loss": 3.0193,
      "step": 186928
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.249772548675537,
      "learning_rate": 5.118226116101475e-05,
      "loss": 3.1056,
      "step": 186929
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.755563259124756,
      "learning_rate": 5.1179975924516316e-05,
      "loss": 2.9342,
      "step": 186930
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9991626739501953,
      "learning_rate": 5.117769073427814e-05,
      "loss": 2.9406,
      "step": 186931
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4826176166534424,
      "learning_rate": 5.117540559030083e-05,
      "loss": 2.9108,
      "step": 186932
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.940048933029175,
      "learning_rate": 5.1173120492584686e-05,
      "loss": 2.9594,
      "step": 186933
    },
    {
      "epoch": 2.43,
      "grad_norm": 5.237053394317627,
      "learning_rate": 5.117083544113016e-05,
      "loss": 2.7813,
      "step": 186934
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7733869552612305,
      "learning_rate": 5.116855043593761e-05,
      "loss": 2.9172,
      "step": 186935
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.596087694168091,
      "learning_rate": 5.1166265477007576e-05,
      "loss": 2.7956,
      "step": 186936
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.6135756969451904,
      "learning_rate": 5.116398056434037e-05,
      "loss": 2.9847,
      "step": 186937
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5562171936035156,
      "learning_rate": 5.1161695697936556e-05,
      "loss": 2.9299,
      "step": 186938
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6582536697387695,
      "learning_rate": 5.115941087779647e-05,
      "loss": 2.8631,
      "step": 186939
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.131175994873047,
      "learning_rate": 5.1157126103920544e-05,
      "loss": 2.8406,
      "step": 186940
    },
    {
      "epoch": 2.43,
      "grad_norm": 5.05189323425293,
      "learning_rate": 5.1154841376309145e-05,
      "loss": 3.0542,
      "step": 186941
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.894639253616333,
      "learning_rate": 5.1152556694962834e-05,
      "loss": 2.8748,
      "step": 186942
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.3579885959625244,
      "learning_rate": 5.115027205988188e-05,
      "loss": 2.8754,
      "step": 186943
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.3720343112945557,
      "learning_rate": 5.1147987471066853e-05,
      "loss": 3.0706,
      "step": 186944
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5733413696289062,
      "learning_rate": 5.114570292851812e-05,
      "loss": 3.1079,
      "step": 186945
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.528630256652832,
      "learning_rate": 5.114341843223611e-05,
      "loss": 3.0701,
      "step": 186946
    },
    {
      "epoch": 2.43,
      "grad_norm": 5.0593342781066895,
      "learning_rate": 5.114113398222115e-05,
      "loss": 3.0947,
      "step": 186947
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.4527015686035156,
      "learning_rate": 5.113884957847382e-05,
      "loss": 2.9062,
      "step": 186948
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4733927249908447,
      "learning_rate": 5.1136565220994405e-05,
      "loss": 2.8922,
      "step": 186949
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.544062852859497,
      "learning_rate": 5.113428090978349e-05,
      "loss": 2.8414,
      "step": 186950
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7101409435272217,
      "learning_rate": 5.113199664484139e-05,
      "loss": 2.9924,
      "step": 186951
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.03306245803833,
      "learning_rate": 5.1129712426168546e-05,
      "loss": 3.2518,
      "step": 186952
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.184584140777588,
      "learning_rate": 5.112742825376532e-05,
      "loss": 3.0396,
      "step": 186953
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7621662616729736,
      "learning_rate": 5.112514412763229e-05,
      "loss": 3.0422,
      "step": 186954
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.728017568588257,
      "learning_rate": 5.112286004776971e-05,
      "loss": 2.996,
      "step": 186955
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.870678424835205,
      "learning_rate": 5.1120576014178146e-05,
      "loss": 2.9203,
      "step": 186956
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.3739099502563477,
      "learning_rate": 5.111829202685798e-05,
      "loss": 2.9538,
      "step": 186957
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.616566181182861,
      "learning_rate": 5.1116008085809627e-05,
      "loss": 2.9644,
      "step": 186958
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.6993205547332764,
      "learning_rate": 5.1113724191033425e-05,
      "loss": 2.8743,
      "step": 186959
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6691808700561523,
      "learning_rate": 5.111144034252994e-05,
      "loss": 2.9093,
      "step": 186960
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.095163106918335,
      "learning_rate": 5.1109156540299476e-05,
      "loss": 2.8787,
      "step": 186961
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4880495071411133,
      "learning_rate": 5.11068727843426e-05,
      "loss": 2.9647,
      "step": 186962
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.493110179901123,
      "learning_rate": 5.110458907465964e-05,
      "loss": 2.8399,
      "step": 186963
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.760830879211426,
      "learning_rate": 5.1102305411251065e-05,
      "loss": 2.839,
      "step": 186964
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.991607189178467,
      "learning_rate": 5.110002179411717e-05,
      "loss": 2.9781,
      "step": 186965
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.340332984924316,
      "learning_rate": 5.109773822325857e-05,
      "loss": 3.0501,
      "step": 186966
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.327122211456299,
      "learning_rate": 5.109545469867548e-05,
      "loss": 3.0123,
      "step": 186967
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.4160656929016113,
      "learning_rate": 5.1093171220368565e-05,
      "loss": 2.9534,
      "step": 186968
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.90226411819458,
      "learning_rate": 5.109088778833804e-05,
      "loss": 3.0901,
      "step": 186969
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5502376556396484,
      "learning_rate": 5.108860440258456e-05,
      "loss": 3.0571,
      "step": 186970
    },
    {
      "epoch": 2.43,
      "grad_norm": 5.236037731170654,
      "learning_rate": 5.108632106310827e-05,
      "loss": 2.7695,
      "step": 186971
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.796957492828369,
      "learning_rate": 5.108403776990979e-05,
      "loss": 3.2954,
      "step": 186972
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.041719436645508,
      "learning_rate": 5.1081754522989424e-05,
      "loss": 2.927,
      "step": 186973
    },
    {
      "epoch": 2.43,
      "grad_norm": 5.4035773277282715,
      "learning_rate": 5.107947132234771e-05,
      "loss": 2.8805,
      "step": 186974
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.2847206592559814,
      "learning_rate": 5.107718816798497e-05,
      "loss": 2.8003,
      "step": 186975
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5998144149780273,
      "learning_rate": 5.107490505990182e-05,
      "loss": 2.9028,
      "step": 186976
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9482269287109375,
      "learning_rate": 5.107262199809841e-05,
      "loss": 2.9748,
      "step": 186977
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.4963150024414062,
      "learning_rate": 5.107033898257538e-05,
      "loss": 2.9005,
      "step": 186978
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.055863857269287,
      "learning_rate": 5.1068056013332955e-05,
      "loss": 3.1297,
      "step": 186979
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.756134986877441,
      "learning_rate": 5.106577309037179e-05,
      "loss": 2.9781,
      "step": 186980
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.75246262550354,
      "learning_rate": 5.106349021369209e-05,
      "loss": 2.7918,
      "step": 186981
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.6692452430725098,
      "learning_rate": 5.1061207383294576e-05,
      "loss": 2.8526,
      "step": 186982
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7542552947998047,
      "learning_rate": 5.105892459917931e-05,
      "loss": 2.9763,
      "step": 186983
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.45778751373291,
      "learning_rate": 5.105664186134695e-05,
      "loss": 2.992,
      "step": 186984
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.6244871616363525,
      "learning_rate": 5.10543591697978e-05,
      "loss": 2.8333,
      "step": 186985
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.096219301223755,
      "learning_rate": 5.105207652453242e-05,
      "loss": 3.0533,
      "step": 186986
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.778954029083252,
      "learning_rate": 5.1049793925551093e-05,
      "loss": 2.898,
      "step": 186987
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.762454032897949,
      "learning_rate": 5.1047511372854375e-05,
      "loss": 3.0502,
      "step": 186988
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.574958324432373,
      "learning_rate": 5.10452288664426e-05,
      "loss": 3.1634,
      "step": 186989
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.0497231483459473,
      "learning_rate": 5.104294640631627e-05,
      "loss": 2.7138,
      "step": 186990
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9813718795776367,
      "learning_rate": 5.104066399247564e-05,
      "loss": 2.9803,
      "step": 186991
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9552524089813232,
      "learning_rate": 5.103838162492132e-05,
      "loss": 3.0437,
      "step": 186992
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.8991518020629883,
      "learning_rate": 5.103609930365361e-05,
      "loss": 2.9098,
      "step": 186993
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5646588802337646,
      "learning_rate": 5.103381702867304e-05,
      "loss": 2.8156,
      "step": 186994
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5258429050445557,
      "learning_rate": 5.103153479998001e-05,
      "loss": 2.8838,
      "step": 186995
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.5369088649749756,
      "learning_rate": 5.1029252617574824e-05,
      "loss": 3.0744,
      "step": 186996
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.272965908050537,
      "learning_rate": 5.1026970481458104e-05,
      "loss": 3.0434,
      "step": 186997
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.104372024536133,
      "learning_rate": 5.102468839163013e-05,
      "loss": 3.112,
      "step": 186998
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.9960174560546875,
      "learning_rate": 5.1022406348091325e-05,
      "loss": 2.8847,
      "step": 186999
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.756039619445801,
      "learning_rate": 5.1020124350842194e-05,
      "loss": 2.8544,
      "step": 187000
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.899047374725342,
      "learning_rate": 5.101784239988317e-05,
      "loss": 2.8162,
      "step": 187001
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.673766613006592,
      "learning_rate": 5.1015560495214516e-05,
      "loss": 2.7997,
      "step": 187002
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.699871301651001,
      "learning_rate": 5.1013278636836864e-05,
      "loss": 3.0393,
      "step": 187003
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.4115638732910156,
      "learning_rate": 5.101099682475052e-05,
      "loss": 2.859,
      "step": 187004
    },
    {
      "epoch": 2.43,
      "grad_norm": 3.697248935699463,
      "learning_rate": 5.100871505895587e-05,
      "loss": 2.836,
      "step": 187005
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.960829496383667,
      "learning_rate": 5.1006433339453465e-05,
      "loss": 3.053,
      "step": 187006
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7968332767486572,
      "learning_rate": 5.10041516662437e-05,
      "loss": 3.1788,
      "step": 187007
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.7915308475494385,
      "learning_rate": 5.100187003932687e-05,
      "loss": 3.1053,
      "step": 187008
    },
    {
      "epoch": 2.43,
      "grad_norm": 2.924680233001709,
      "learning_rate": 5.0999588458703536e-05,
      "loss": 2.8045,
      "step": 187009
    },
    {
      "epoch": 2.43,
      "grad_norm": 4.0193705558776855,
      "learning_rate": 5.099730692437404e-05,
      "loss": 2.8801,
      "step": 187010
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2190563678741455,
      "learning_rate": 5.099502543633891e-05,
      "loss": 2.8404,
      "step": 187011
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7186365127563477,
      "learning_rate": 5.099274399459848e-05,
      "loss": 2.9062,
      "step": 187012
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.4556708335876465,
      "learning_rate": 5.099046259915322e-05,
      "loss": 2.8165,
      "step": 187013
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1698591709136963,
      "learning_rate": 5.098818125000347e-05,
      "loss": 3.0284,
      "step": 187014
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.751512050628662,
      "learning_rate": 5.0985899947149775e-05,
      "loss": 2.9626,
      "step": 187015
    },
    {
      "epoch": 2.44,
      "grad_norm": 5.397187232971191,
      "learning_rate": 5.0983618690592454e-05,
      "loss": 3.106,
      "step": 187016
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.528676748275757,
      "learning_rate": 5.098133748033203e-05,
      "loss": 2.9329,
      "step": 187017
    },
    {
      "epoch": 2.44,
      "grad_norm": 5.282543659210205,
      "learning_rate": 5.0979056316368873e-05,
      "loss": 2.8997,
      "step": 187018
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9759268760681152,
      "learning_rate": 5.097677519870345e-05,
      "loss": 2.9349,
      "step": 187019
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7322497367858887,
      "learning_rate": 5.0974494127336016e-05,
      "loss": 2.7218,
      "step": 187020
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7443010807037354,
      "learning_rate": 5.097221310226722e-05,
      "loss": 2.9366,
      "step": 187021
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.685051202774048,
      "learning_rate": 5.0969932123497325e-05,
      "loss": 2.9675,
      "step": 187022
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8132479190826416,
      "learning_rate": 5.096765119102689e-05,
      "loss": 2.8657,
      "step": 187023
    },
    {
      "epoch": 2.44,
      "grad_norm": 6.484878063201904,
      "learning_rate": 5.0965370304856254e-05,
      "loss": 2.7946,
      "step": 187024
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.850156545639038,
      "learning_rate": 5.096308946498588e-05,
      "loss": 2.6503,
      "step": 187025
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9284985065460205,
      "learning_rate": 5.096080867141608e-05,
      "loss": 2.8398,
      "step": 187026
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.536879539489746,
      "learning_rate": 5.095852792414743e-05,
      "loss": 2.8399,
      "step": 187027
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9306106567382812,
      "learning_rate": 5.0956247223180246e-05,
      "loss": 3.0486,
      "step": 187028
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.492741107940674,
      "learning_rate": 5.095396656851506e-05,
      "loss": 2.9,
      "step": 187029
    },
    {
      "epoch": 2.44,
      "grad_norm": 6.336878776550293,
      "learning_rate": 5.095168596015223e-05,
      "loss": 2.5949,
      "step": 187030
    },
    {
      "epoch": 2.44,
      "grad_norm": 5.199329376220703,
      "learning_rate": 5.094940539809217e-05,
      "loss": 2.9281,
      "step": 187031
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0752832889556885,
      "learning_rate": 5.0947124882335266e-05,
      "loss": 2.8937,
      "step": 187032
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9907615184783936,
      "learning_rate": 5.094484441288206e-05,
      "loss": 3.1834,
      "step": 187033
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2813210487365723,
      "learning_rate": 5.094256398973281e-05,
      "loss": 2.9641,
      "step": 187034
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.8209054470062256,
      "learning_rate": 5.094028361288812e-05,
      "loss": 3.024,
      "step": 187035
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.128164529800415,
      "learning_rate": 5.0938003282348296e-05,
      "loss": 3.2,
      "step": 187036
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.5596871376037598,
      "learning_rate": 5.0935722998113926e-05,
      "loss": 2.7584,
      "step": 187037
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.49106502532959,
      "learning_rate": 5.093344276018515e-05,
      "loss": 2.7682,
      "step": 187038
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9806060791015625,
      "learning_rate": 5.093116256856263e-05,
      "loss": 2.93,
      "step": 187039
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.3967649936676025,
      "learning_rate": 5.0928882423246606e-05,
      "loss": 2.9668,
      "step": 187040
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6700820922851562,
      "learning_rate": 5.092660232423771e-05,
      "loss": 2.7257,
      "step": 187041
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.6070244312286377,
      "learning_rate": 5.09243222715362e-05,
      "loss": 2.9967,
      "step": 187042
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.357405185699463,
      "learning_rate": 5.0922042265142684e-05,
      "loss": 2.7548,
      "step": 187043
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.905241012573242,
      "learning_rate": 5.091976230505732e-05,
      "loss": 3.0329,
      "step": 187044
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.682624101638794,
      "learning_rate": 5.0917482391280716e-05,
      "loss": 2.9078,
      "step": 187045
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8637382984161377,
      "learning_rate": 5.0915202523813234e-05,
      "loss": 2.9422,
      "step": 187046
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9914073944091797,
      "learning_rate": 5.0912922702655346e-05,
      "loss": 2.7185,
      "step": 187047
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.646834373474121,
      "learning_rate": 5.0910642927807413e-05,
      "loss": 2.9957,
      "step": 187048
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.474522829055786,
      "learning_rate": 5.090836319927003e-05,
      "loss": 3.0649,
      "step": 187049
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8693220615386963,
      "learning_rate": 5.090608351704331e-05,
      "loss": 2.8473,
      "step": 187050
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6472620964050293,
      "learning_rate": 5.0903803881127935e-05,
      "loss": 3.0756,
      "step": 187051
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.3979127407073975,
      "learning_rate": 5.090152429152419e-05,
      "loss": 2.8299,
      "step": 187052
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.810131311416626,
      "learning_rate": 5.089924474823263e-05,
      "loss": 3.1874,
      "step": 187053
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5722086429595947,
      "learning_rate": 5.089696525125352e-05,
      "loss": 2.9633,
      "step": 187054
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.5161309242248535,
      "learning_rate": 5.08946858005875e-05,
      "loss": 2.8059,
      "step": 187055
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.018381118774414,
      "learning_rate": 5.089240639623473e-05,
      "loss": 3.0681,
      "step": 187056
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1873042583465576,
      "learning_rate": 5.0890127038195815e-05,
      "loss": 2.766,
      "step": 187057
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.58235764503479,
      "learning_rate": 5.088784772647109e-05,
      "loss": 2.7888,
      "step": 187058
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6695587635040283,
      "learning_rate": 5.0885568461061045e-05,
      "loss": 2.8806,
      "step": 187059
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6859560012817383,
      "learning_rate": 5.088328924196605e-05,
      "loss": 2.8445,
      "step": 187060
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7506630420684814,
      "learning_rate": 5.088101006918668e-05,
      "loss": 3.0101,
      "step": 187061
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.4139277935028076,
      "learning_rate": 5.087873094272306e-05,
      "loss": 3.1464,
      "step": 187062
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8471219539642334,
      "learning_rate": 5.0876451862575886e-05,
      "loss": 2.8764,
      "step": 187063
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.5706028938293457,
      "learning_rate": 5.087417282874543e-05,
      "loss": 2.7831,
      "step": 187064
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.969470977783203,
      "learning_rate": 5.087189384123219e-05,
      "loss": 3.2238,
      "step": 187065
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9397757053375244,
      "learning_rate": 5.086961490003654e-05,
      "loss": 3.0744,
      "step": 187066
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6432619094848633,
      "learning_rate": 5.0867336005159066e-05,
      "loss": 2.945,
      "step": 187067
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.4342496395111084,
      "learning_rate": 5.0865057156599876e-05,
      "loss": 3.1603,
      "step": 187068
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.692002296447754,
      "learning_rate": 5.08627783543597e-05,
      "loss": 2.8844,
      "step": 187069
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.4141087532043457,
      "learning_rate": 5.086049959843874e-05,
      "loss": 2.869,
      "step": 187070
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.930180788040161,
      "learning_rate": 5.085822088883756e-05,
      "loss": 2.8286,
      "step": 187071
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2288830280303955,
      "learning_rate": 5.08559422255565e-05,
      "loss": 2.887,
      "step": 187072
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.4936330318450928,
      "learning_rate": 5.0853663608596116e-05,
      "loss": 2.7926,
      "step": 187073
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.76794171333313,
      "learning_rate": 5.085138503795672e-05,
      "loss": 2.8407,
      "step": 187074
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.765413284301758,
      "learning_rate": 5.084910651363873e-05,
      "loss": 2.7183,
      "step": 187075
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.76434588432312,
      "learning_rate": 5.084682803564255e-05,
      "loss": 2.9557,
      "step": 187076
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0366671085357666,
      "learning_rate": 5.084454960396869e-05,
      "loss": 2.7682,
      "step": 187077
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6798200607299805,
      "learning_rate": 5.08422712186175e-05,
      "loss": 2.7447,
      "step": 187078
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0370090007781982,
      "learning_rate": 5.0839992879589464e-05,
      "loss": 2.8748,
      "step": 187079
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.970895767211914,
      "learning_rate": 5.0837714586885e-05,
      "loss": 2.7207,
      "step": 187080
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2674529552459717,
      "learning_rate": 5.0835436340504524e-05,
      "loss": 3.089,
      "step": 187081
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.232036590576172,
      "learning_rate": 5.083315814044835e-05,
      "loss": 2.6428,
      "step": 187082
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.651538372039795,
      "learning_rate": 5.083087998671705e-05,
      "loss": 3.1254,
      "step": 187083
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.773756504058838,
      "learning_rate": 5.082860187931097e-05,
      "loss": 3.0836,
      "step": 187084
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9691591262817383,
      "learning_rate": 5.0826323818230596e-05,
      "loss": 2.8762,
      "step": 187085
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.001021146774292,
      "learning_rate": 5.08240458034763e-05,
      "loss": 2.9975,
      "step": 187086
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.1577067375183105,
      "learning_rate": 5.082176783504848e-05,
      "loss": 2.7977,
      "step": 187087
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8536930084228516,
      "learning_rate": 5.081948991294766e-05,
      "loss": 2.752,
      "step": 187088
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.972414016723633,
      "learning_rate": 5.0817212037174194e-05,
      "loss": 2.9352,
      "step": 187089
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.959989070892334,
      "learning_rate": 5.0814934207728433e-05,
      "loss": 2.9355,
      "step": 187090
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2061800956726074,
      "learning_rate": 5.0812656424610944e-05,
      "loss": 3.0773,
      "step": 187091
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.002434730529785,
      "learning_rate": 5.08103786878221e-05,
      "loss": 3.0329,
      "step": 187092
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.4752800464630127,
      "learning_rate": 5.080810099736225e-05,
      "loss": 3.044,
      "step": 187093
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.994001626968384,
      "learning_rate": 5.080582335323192e-05,
      "loss": 3.0002,
      "step": 187094
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8577399253845215,
      "learning_rate": 5.080354575543143e-05,
      "loss": 3.0857,
      "step": 187095
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.917273759841919,
      "learning_rate": 5.080126820396134e-05,
      "loss": 2.8924,
      "step": 187096
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7225167751312256,
      "learning_rate": 5.079899069882203e-05,
      "loss": 2.8802,
      "step": 187097
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.1162211894989014,
      "learning_rate": 5.0796713240013857e-05,
      "loss": 2.9136,
      "step": 187098
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7842490673065186,
      "learning_rate": 5.0794435827537215e-05,
      "loss": 2.8344,
      "step": 187099
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.57483172416687,
      "learning_rate": 5.079215846139265e-05,
      "loss": 2.8509,
      "step": 187100
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.365781307220459,
      "learning_rate": 5.078988114158046e-05,
      "loss": 2.7799,
      "step": 187101
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6842002868652344,
      "learning_rate": 5.0787603868101235e-05,
      "loss": 2.6694,
      "step": 187102
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0377163887023926,
      "learning_rate": 5.078532664095528e-05,
      "loss": 2.8092,
      "step": 187103
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6242311000823975,
      "learning_rate": 5.078304946014303e-05,
      "loss": 2.7604,
      "step": 187104
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5870494842529297,
      "learning_rate": 5.078077232566485e-05,
      "loss": 2.8273,
      "step": 187105
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.850451707839966,
      "learning_rate": 5.0778495237521274e-05,
      "loss": 2.8756,
      "step": 187106
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.91322660446167,
      "learning_rate": 5.0776218195712634e-05,
      "loss": 2.7816,
      "step": 187107
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0503921508789062,
      "learning_rate": 5.07739412002395e-05,
      "loss": 2.7024,
      "step": 187108
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8808541297912598,
      "learning_rate": 5.0771664251102065e-05,
      "loss": 2.8537,
      "step": 187109
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.114527702331543,
      "learning_rate": 5.076938734830106e-05,
      "loss": 2.762,
      "step": 187110
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0808587074279785,
      "learning_rate": 5.076711049183659e-05,
      "loss": 3.2273,
      "step": 187111
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.5238990783691406,
      "learning_rate": 5.0764833681709296e-05,
      "loss": 2.8302,
      "step": 187112
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.816382646560669,
      "learning_rate": 5.076255691791943e-05,
      "loss": 3.0104,
      "step": 187113
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.656315326690674,
      "learning_rate": 5.076028020046756e-05,
      "loss": 2.752,
      "step": 187114
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9923768043518066,
      "learning_rate": 5.075800352935403e-05,
      "loss": 3.2244,
      "step": 187115
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6956727504730225,
      "learning_rate": 5.075572690457943e-05,
      "loss": 3.2348,
      "step": 187116
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9649057388305664,
      "learning_rate": 5.0753450326143895e-05,
      "loss": 2.853,
      "step": 187117
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.379176139831543,
      "learning_rate": 5.0751173794048064e-05,
      "loss": 2.9259,
      "step": 187118
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.888178825378418,
      "learning_rate": 5.074889730829223e-05,
      "loss": 2.8252,
      "step": 187119
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0172011852264404,
      "learning_rate": 5.074662086887695e-05,
      "loss": 2.7721,
      "step": 187120
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5508108139038086,
      "learning_rate": 5.074434447580251e-05,
      "loss": 2.9839,
      "step": 187121
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.331815004348755,
      "learning_rate": 5.0742068129069534e-05,
      "loss": 2.7943,
      "step": 187122
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.063661575317383,
      "learning_rate": 5.073979182867819e-05,
      "loss": 2.7626,
      "step": 187123
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.754282236099243,
      "learning_rate": 5.073751557462907e-05,
      "loss": 2.9519,
      "step": 187124
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.554597854614258,
      "learning_rate": 5.0735239366922484e-05,
      "loss": 3.0356,
      "step": 187125
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.433701753616333,
      "learning_rate": 5.0732963205558995e-05,
      "loss": 3.0394,
      "step": 187126
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.834355354309082,
      "learning_rate": 5.07306870905389e-05,
      "loss": 2.7872,
      "step": 187127
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8068511486053467,
      "learning_rate": 5.0728411021862805e-05,
      "loss": 2.8636,
      "step": 187128
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.735501766204834,
      "learning_rate": 5.0726134999530863e-05,
      "loss": 2.889,
      "step": 187129
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1621968746185303,
      "learning_rate": 5.072385902354372e-05,
      "loss": 3.0305,
      "step": 187130
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.52593731880188,
      "learning_rate": 5.072158309390164e-05,
      "loss": 2.8501,
      "step": 187131
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5790224075317383,
      "learning_rate": 5.071930721060519e-05,
      "loss": 3.0654,
      "step": 187132
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9713685512542725,
      "learning_rate": 5.0717031373654626e-05,
      "loss": 2.8546,
      "step": 187133
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7835586071014404,
      "learning_rate": 5.071475558305066e-05,
      "loss": 2.8856,
      "step": 187134
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.820178985595703,
      "learning_rate": 5.071247983879335e-05,
      "loss": 2.92,
      "step": 187135
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7514994144439697,
      "learning_rate": 5.0710204140883404e-05,
      "loss": 2.7804,
      "step": 187136
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6438097953796387,
      "learning_rate": 5.0707928489321046e-05,
      "loss": 3.1024,
      "step": 187137
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9400200843811035,
      "learning_rate": 5.0705652884106854e-05,
      "loss": 2.9032,
      "step": 187138
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1519412994384766,
      "learning_rate": 5.070337732524111e-05,
      "loss": 2.9122,
      "step": 187139
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.346625566482544,
      "learning_rate": 5.0701101812724465e-05,
      "loss": 2.9986,
      "step": 187140
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.107853651046753,
      "learning_rate": 5.0698826346557074e-05,
      "loss": 2.9303,
      "step": 187141
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.093454837799072,
      "learning_rate": 5.069655092673951e-05,
      "loss": 2.882,
      "step": 187142
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7229623794555664,
      "learning_rate": 5.06942755532721e-05,
      "loss": 2.7578,
      "step": 187143
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1214559078216553,
      "learning_rate": 5.0692000226155414e-05,
      "loss": 2.9191,
      "step": 187144
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.598175525665283,
      "learning_rate": 5.068972494538972e-05,
      "loss": 2.8063,
      "step": 187145
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8597629070281982,
      "learning_rate": 5.068744971097565e-05,
      "loss": 2.7324,
      "step": 187146
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1300806999206543,
      "learning_rate": 5.068517452291336e-05,
      "loss": 2.6612,
      "step": 187147
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.4908711910247803,
      "learning_rate": 5.068289938120344e-05,
      "loss": 3.1654,
      "step": 187148
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.384975433349609,
      "learning_rate": 5.0680624285846226e-05,
      "loss": 2.9153,
      "step": 187149
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.4977030754089355,
      "learning_rate": 5.067834923684228e-05,
      "loss": 2.8768,
      "step": 187150
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5385284423828125,
      "learning_rate": 5.0676074234191847e-05,
      "loss": 3.0444,
      "step": 187151
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8732709884643555,
      "learning_rate": 5.0673799277895565e-05,
      "loss": 2.9123,
      "step": 187152
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8898701667785645,
      "learning_rate": 5.067152436795361e-05,
      "loss": 3.1118,
      "step": 187153
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9472265243530273,
      "learning_rate": 5.066924950436657e-05,
      "loss": 3.0903,
      "step": 187154
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.274156093597412,
      "learning_rate": 5.066697468713475e-05,
      "loss": 2.8856,
      "step": 187155
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.193514585494995,
      "learning_rate": 5.0664699916258755e-05,
      "loss": 3.1578,
      "step": 187156
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.9150004386901855,
      "learning_rate": 5.066242519173881e-05,
      "loss": 2.8018,
      "step": 187157
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9406135082244873,
      "learning_rate": 5.066015051357548e-05,
      "loss": 2.8897,
      "step": 187158
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.892504930496216,
      "learning_rate": 5.065787588176917e-05,
      "loss": 3.2222,
      "step": 187159
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.845343589782715,
      "learning_rate": 5.0655601296320246e-05,
      "loss": 2.8499,
      "step": 187160
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.4759433269500732,
      "learning_rate": 5.0653326757229074e-05,
      "loss": 3.0861,
      "step": 187161
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8828554153442383,
      "learning_rate": 5.065105226449625e-05,
      "loss": 2.9724,
      "step": 187162
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.590318202972412,
      "learning_rate": 5.064877781812201e-05,
      "loss": 2.9746,
      "step": 187163
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9628615379333496,
      "learning_rate": 5.0646503418106954e-05,
      "loss": 3.1597,
      "step": 187164
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.79296612739563,
      "learning_rate": 5.064422906445141e-05,
      "loss": 2.7649,
      "step": 187165
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1199851036071777,
      "learning_rate": 5.064195475715582e-05,
      "loss": 2.9345,
      "step": 187166
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7861196994781494,
      "learning_rate": 5.063968049622051e-05,
      "loss": 2.9237,
      "step": 187167
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8865621089935303,
      "learning_rate": 5.063740628164605e-05,
      "loss": 2.9671,
      "step": 187168
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6305489540100098,
      "learning_rate": 5.0635132113432775e-05,
      "loss": 2.9898,
      "step": 187169
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9807400703430176,
      "learning_rate": 5.0632857991581176e-05,
      "loss": 3.0313,
      "step": 187170
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.8150863647460938,
      "learning_rate": 5.063058391609166e-05,
      "loss": 3.0495,
      "step": 187171
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.921247959136963,
      "learning_rate": 5.062830988696452e-05,
      "loss": 3.0098,
      "step": 187172
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.31325626373291,
      "learning_rate": 5.06260359042004e-05,
      "loss": 2.957,
      "step": 187173
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.754790782928467,
      "learning_rate": 5.062376196779956e-05,
      "loss": 2.5895,
      "step": 187174
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6629204750061035,
      "learning_rate": 5.0621488077762426e-05,
      "loss": 2.8669,
      "step": 187175
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.781074285507202,
      "learning_rate": 5.0619214234089545e-05,
      "loss": 3.0484,
      "step": 187176
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.298054218292236,
      "learning_rate": 5.061694043678124e-05,
      "loss": 2.9072,
      "step": 187177
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1009318828582764,
      "learning_rate": 5.061466668583788e-05,
      "loss": 3.1218,
      "step": 187178
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.858184337615967,
      "learning_rate": 5.0612392981260064e-05,
      "loss": 2.8907,
      "step": 187179
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0234975814819336,
      "learning_rate": 5.0610119323047995e-05,
      "loss": 2.9869,
      "step": 187180
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5420963764190674,
      "learning_rate": 5.06078457112023e-05,
      "loss": 2.8994,
      "step": 187181
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.6252715587615967,
      "learning_rate": 5.060557214572335e-05,
      "loss": 3.0076,
      "step": 187182
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.705437421798706,
      "learning_rate": 5.060329862661148e-05,
      "loss": 3.0656,
      "step": 187183
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.6971943378448486,
      "learning_rate": 5.060102515386711e-05,
      "loss": 3.0276,
      "step": 187184
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.942237377166748,
      "learning_rate": 5.0598751727490794e-05,
      "loss": 2.859,
      "step": 187185
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.679299831390381,
      "learning_rate": 5.0596478347482814e-05,
      "loss": 2.836,
      "step": 187186
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6765854358673096,
      "learning_rate": 5.0594205013843715e-05,
      "loss": 3.033,
      "step": 187187
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.267921209335327,
      "learning_rate": 5.059193172657389e-05,
      "loss": 2.8816,
      "step": 187188
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.4959046840667725,
      "learning_rate": 5.05896584856737e-05,
      "loss": 2.8487,
      "step": 187189
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.704479694366455,
      "learning_rate": 5.058738529114352e-05,
      "loss": 2.8824,
      "step": 187190
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.887934446334839,
      "learning_rate": 5.058511214298395e-05,
      "loss": 2.7808,
      "step": 187191
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7077648639678955,
      "learning_rate": 5.058283904119522e-05,
      "loss": 2.9276,
      "step": 187192
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.10579776763916,
      "learning_rate": 5.0580565985777966e-05,
      "loss": 2.6581,
      "step": 187193
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.209001541137695,
      "learning_rate": 5.057829297673238e-05,
      "loss": 2.8751,
      "step": 187194
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5634660720825195,
      "learning_rate": 5.057602001405917e-05,
      "loss": 2.9767,
      "step": 187195
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7936105728149414,
      "learning_rate": 5.05737470977584e-05,
      "loss": 2.8139,
      "step": 187196
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7086100578308105,
      "learning_rate": 5.05714742278308e-05,
      "loss": 2.9193,
      "step": 187197
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7281270027160645,
      "learning_rate": 5.0569201404276574e-05,
      "loss": 2.8221,
      "step": 187198
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.3635005950927734,
      "learning_rate": 5.056692862709632e-05,
      "loss": 3.0044,
      "step": 187199
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.3774776458740234,
      "learning_rate": 5.0564655896290296e-05,
      "loss": 3.1334,
      "step": 187200
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.555715322494507,
      "learning_rate": 5.0562383211859145e-05,
      "loss": 2.647,
      "step": 187201
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6187305450439453,
      "learning_rate": 5.0560110573803034e-05,
      "loss": 2.7171,
      "step": 187202
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.3386318683624268,
      "learning_rate": 5.055783798212259e-05,
      "loss": 3.0473,
      "step": 187203
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.860725164413452,
      "learning_rate": 5.055556543681806e-05,
      "loss": 3.0481,
      "step": 187204
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.611753463745117,
      "learning_rate": 5.0553292937890025e-05,
      "loss": 2.9621,
      "step": 187205
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7354788780212402,
      "learning_rate": 5.055102048533879e-05,
      "loss": 3.0873,
      "step": 187206
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.363370895385742,
      "learning_rate": 5.054874807916496e-05,
      "loss": 2.9457,
      "step": 187207
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.12066650390625,
      "learning_rate": 5.05464757193687e-05,
      "loss": 2.8886,
      "step": 187208
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.896726131439209,
      "learning_rate": 5.05442034059506e-05,
      "loss": 2.8477,
      "step": 187209
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5327088832855225,
      "learning_rate": 5.054193113891101e-05,
      "loss": 2.8719,
      "step": 187210
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.4109132289886475,
      "learning_rate": 5.053965891825041e-05,
      "loss": 2.9873,
      "step": 187211
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.3292160034179688,
      "learning_rate": 5.053738674396915e-05,
      "loss": 2.6826,
      "step": 187212
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8121471405029297,
      "learning_rate": 5.053511461606786e-05,
      "loss": 3.1665,
      "step": 187213
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.685559034347534,
      "learning_rate": 5.0532842534546637e-05,
      "loss": 2.8697,
      "step": 187214
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.770566463470459,
      "learning_rate": 5.0530570499406145e-05,
      "loss": 2.8502,
      "step": 187215
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6921844482421875,
      "learning_rate": 5.052829851064665e-05,
      "loss": 2.8723,
      "step": 187216
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.00907301902771,
      "learning_rate": 5.052602656826872e-05,
      "loss": 3.0051,
      "step": 187217
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7776434421539307,
      "learning_rate": 5.0523754672272655e-05,
      "loss": 3.0353,
      "step": 187218
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7019529342651367,
      "learning_rate": 5.052148282265906e-05,
      "loss": 2.8513,
      "step": 187219
    },
    {
      "epoch": 2.44,
      "grad_norm": 6.23618221282959,
      "learning_rate": 5.051921101942808e-05,
      "loss": 2.9873,
      "step": 187220
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.471508026123047,
      "learning_rate": 5.051693926258038e-05,
      "loss": 3.0473,
      "step": 187221
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5518383979797363,
      "learning_rate": 5.051466755211624e-05,
      "loss": 2.9744,
      "step": 187222
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.730678081512451,
      "learning_rate": 5.051239588803616e-05,
      "loss": 2.7441,
      "step": 187223
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.525693893432617,
      "learning_rate": 5.051012427034048e-05,
      "loss": 2.8953,
      "step": 187224
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.6831305027008057,
      "learning_rate": 5.050785269902983e-05,
      "loss": 2.9525,
      "step": 187225
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7432079315185547,
      "learning_rate": 5.050558117410434e-05,
      "loss": 2.7824,
      "step": 187226
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.3384580612182617,
      "learning_rate": 5.0503309695564645e-05,
      "loss": 3.1651,
      "step": 187227
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6496450901031494,
      "learning_rate": 5.050103826341098e-05,
      "loss": 2.8732,
      "step": 187228
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.21631121635437,
      "learning_rate": 5.0498766877644005e-05,
      "loss": 2.8101,
      "step": 187229
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7456772327423096,
      "learning_rate": 5.0496495538263925e-05,
      "loss": 2.9574,
      "step": 187230
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5781164169311523,
      "learning_rate": 5.049422424527141e-05,
      "loss": 2.9449,
      "step": 187231
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.766009569168091,
      "learning_rate": 5.049195299866655e-05,
      "loss": 3.0782,
      "step": 187232
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.017998695373535,
      "learning_rate": 5.048968179845005e-05,
      "loss": 2.9198,
      "step": 187233
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8471267223358154,
      "learning_rate": 5.048741064462215e-05,
      "loss": 3.0004,
      "step": 187234
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7533137798309326,
      "learning_rate": 5.048513953718344e-05,
      "loss": 3.0127,
      "step": 187235
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1777758598327637,
      "learning_rate": 5.048286847613415e-05,
      "loss": 2.9074,
      "step": 187236
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.490323305130005,
      "learning_rate": 5.048059746147489e-05,
      "loss": 2.9891,
      "step": 187237
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.3589422702789307,
      "learning_rate": 5.047832649320599e-05,
      "loss": 3.0477,
      "step": 187238
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.793245792388916,
      "learning_rate": 5.0476055571327876e-05,
      "loss": 2.823,
      "step": 187239
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.578979015350342,
      "learning_rate": 5.047378469584092e-05,
      "loss": 2.6551,
      "step": 187240
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6584999561309814,
      "learning_rate": 5.047151386674566e-05,
      "loss": 2.8241,
      "step": 187241
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.575645685195923,
      "learning_rate": 5.046924308404239e-05,
      "loss": 3.0965,
      "step": 187242
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.817750930786133,
      "learning_rate": 5.046697234773167e-05,
      "loss": 2.8891,
      "step": 187243
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.9168996810913086,
      "learning_rate": 5.0464701657813845e-05,
      "loss": 2.8405,
      "step": 187244
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.694199323654175,
      "learning_rate": 5.046243101428934e-05,
      "loss": 3.0626,
      "step": 187245
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.767179250717163,
      "learning_rate": 5.046016041715849e-05,
      "loss": 2.7545,
      "step": 187246
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9824812412261963,
      "learning_rate": 5.04578898664219e-05,
      "loss": 2.9379,
      "step": 187247
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7881879806518555,
      "learning_rate": 5.045561936207982e-05,
      "loss": 2.7256,
      "step": 187248
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7874581813812256,
      "learning_rate": 5.045334890413284e-05,
      "loss": 2.8696,
      "step": 187249
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0551490783691406,
      "learning_rate": 5.045107849258128e-05,
      "loss": 2.8089,
      "step": 187250
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.791511297225952,
      "learning_rate": 5.044880812742557e-05,
      "loss": 3.1915,
      "step": 187251
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.568938970565796,
      "learning_rate": 5.0446537808666075e-05,
      "loss": 2.933,
      "step": 187252
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.238762378692627,
      "learning_rate": 5.044426753630334e-05,
      "loss": 2.7389,
      "step": 187253
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7970972061157227,
      "learning_rate": 5.0441997310337653e-05,
      "loss": 3.039,
      "step": 187254
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5095598697662354,
      "learning_rate": 5.043972713076959e-05,
      "loss": 2.934,
      "step": 187255
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.149848461151123,
      "learning_rate": 5.043745699759951e-05,
      "loss": 2.791,
      "step": 187256
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5612568855285645,
      "learning_rate": 5.043518691082772e-05,
      "loss": 2.7932,
      "step": 187257
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.9314658641815186,
      "learning_rate": 5.043291687045481e-05,
      "loss": 2.8016,
      "step": 187258
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8359313011169434,
      "learning_rate": 5.043064687648115e-05,
      "loss": 2.9225,
      "step": 187259
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.322258234024048,
      "learning_rate": 5.042837692890708e-05,
      "loss": 2.9082,
      "step": 187260
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.597728967666626,
      "learning_rate": 5.042610702773312e-05,
      "loss": 2.8986,
      "step": 187261
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.121018171310425,
      "learning_rate": 5.042383717295968e-05,
      "loss": 2.7643,
      "step": 187262
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.010619640350342,
      "learning_rate": 5.042156736458709e-05,
      "loss": 3.0882,
      "step": 187263
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.423175811767578,
      "learning_rate": 5.041929760261592e-05,
      "loss": 2.779,
      "step": 187264
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.563326597213745,
      "learning_rate": 5.041702788704649e-05,
      "loss": 3.062,
      "step": 187265
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.536351203918457,
      "learning_rate": 5.0414758217879184e-05,
      "loss": 2.8132,
      "step": 187266
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.798577070236206,
      "learning_rate": 5.0412488595114554e-05,
      "loss": 3.0525,
      "step": 187267
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.6257483959198,
      "learning_rate": 5.041021901875297e-05,
      "loss": 2.9085,
      "step": 187268
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.69731068611145,
      "learning_rate": 5.040794948879477e-05,
      "loss": 3.0434,
      "step": 187269
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.80027437210083,
      "learning_rate": 5.040568000524049e-05,
      "loss": 3.0398,
      "step": 187270
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2809059619903564,
      "learning_rate": 5.040341056809045e-05,
      "loss": 2.7373,
      "step": 187271
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.895264148712158,
      "learning_rate": 5.040114117734519e-05,
      "loss": 3.1261,
      "step": 187272
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.912433624267578,
      "learning_rate": 5.039887183300508e-05,
      "loss": 3.1658,
      "step": 187273
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0101912021636963,
      "learning_rate": 5.039660253507052e-05,
      "loss": 2.9465,
      "step": 187274
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.364459276199341,
      "learning_rate": 5.039433328354186e-05,
      "loss": 2.9492,
      "step": 187275
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.541118860244751,
      "learning_rate": 5.039206407841972e-05,
      "loss": 2.866,
      "step": 187276
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.876985549926758,
      "learning_rate": 5.0389794919704295e-05,
      "loss": 2.9646,
      "step": 187277
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.687270402908325,
      "learning_rate": 5.038752580739621e-05,
      "loss": 2.6264,
      "step": 187278
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.9575319290161133,
      "learning_rate": 5.0385256741495705e-05,
      "loss": 2.8746,
      "step": 187279
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5512430667877197,
      "learning_rate": 5.038298772200348e-05,
      "loss": 2.9217,
      "step": 187280
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9178874492645264,
      "learning_rate": 5.038071874891959e-05,
      "loss": 3.205,
      "step": 187281
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8092825412750244,
      "learning_rate": 5.037844982224472e-05,
      "loss": 2.8607,
      "step": 187282
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.15134859085083,
      "learning_rate": 5.037618094197912e-05,
      "loss": 2.8855,
      "step": 187283
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.636725425720215,
      "learning_rate": 5.037391210812339e-05,
      "loss": 2.8327,
      "step": 187284
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6206729412078857,
      "learning_rate": 5.037164332067778e-05,
      "loss": 3.0162,
      "step": 187285
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.332216262817383,
      "learning_rate": 5.036937457964294e-05,
      "loss": 3.0476,
      "step": 187286
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.319112539291382,
      "learning_rate": 5.036710588501901e-05,
      "loss": 2.7356,
      "step": 187287
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7687370777130127,
      "learning_rate": 5.036483723680659e-05,
      "loss": 2.8462,
      "step": 187288
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5315797328948975,
      "learning_rate": 5.036256863500601e-05,
      "loss": 2.7105,
      "step": 187289
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.609525442123413,
      "learning_rate": 5.0360300079617797e-05,
      "loss": 2.7576,
      "step": 187290
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9233083724975586,
      "learning_rate": 5.035803157064227e-05,
      "loss": 2.9662,
      "step": 187291
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.896698236465454,
      "learning_rate": 5.035576310808004e-05,
      "loss": 2.9864,
      "step": 187292
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0159759521484375,
      "learning_rate": 5.0353494691931215e-05,
      "loss": 2.9289,
      "step": 187293
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.71118426322937,
      "learning_rate": 5.035122632219647e-05,
      "loss": 3.0218,
      "step": 187294
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6926095485687256,
      "learning_rate": 5.0348957998876096e-05,
      "loss": 2.8806,
      "step": 187295
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.4671144485473633,
      "learning_rate": 5.034668972197063e-05,
      "loss": 3.0552,
      "step": 187296
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8769805431365967,
      "learning_rate": 5.0344421491480327e-05,
      "loss": 2.852,
      "step": 187297
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.837432622909546,
      "learning_rate": 5.034215330740587e-05,
      "loss": 2.9825,
      "step": 187298
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2278521060943604,
      "learning_rate": 5.0339885169747376e-05,
      "loss": 2.8443,
      "step": 187299
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7197937965393066,
      "learning_rate": 5.033761707850549e-05,
      "loss": 2.8767,
      "step": 187300
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7582345008850098,
      "learning_rate": 5.033534903368044e-05,
      "loss": 2.9684,
      "step": 187301
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1566359996795654,
      "learning_rate": 5.0333081035272894e-05,
      "loss": 2.8249,
      "step": 187302
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.786926507949829,
      "learning_rate": 5.0330813083283015e-05,
      "loss": 3.0208,
      "step": 187303
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.287705421447754,
      "learning_rate": 5.032854517771144e-05,
      "loss": 2.7872,
      "step": 187304
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6093478202819824,
      "learning_rate": 5.032627731855854e-05,
      "loss": 3.0538,
      "step": 187305
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.024184226989746,
      "learning_rate": 5.032400950582467e-05,
      "loss": 3.1378,
      "step": 187306
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.479843854904175,
      "learning_rate": 5.03217417395102e-05,
      "loss": 3.1129,
      "step": 187307
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.9178290367126465,
      "learning_rate": 5.0319474019615705e-05,
      "loss": 2.6887,
      "step": 187308
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.6157500743865967,
      "learning_rate": 5.031720634614143e-05,
      "loss": 3.0953,
      "step": 187309
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6770033836364746,
      "learning_rate": 5.031493871908804e-05,
      "loss": 3.0992,
      "step": 187310
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.4586973190307617,
      "learning_rate": 5.031267113845577e-05,
      "loss": 3.0618,
      "step": 187311
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.761594295501709,
      "learning_rate": 5.031040360424511e-05,
      "loss": 2.8581,
      "step": 187312
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.230945110321045,
      "learning_rate": 5.030813611645638e-05,
      "loss": 3.0456,
      "step": 187313
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.663886070251465,
      "learning_rate": 5.0305868675090146e-05,
      "loss": 3.0239,
      "step": 187314
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1452620029449463,
      "learning_rate": 5.0303601280146674e-05,
      "loss": 2.8574,
      "step": 187315
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.274977922439575,
      "learning_rate": 5.030133393162658e-05,
      "loss": 2.7961,
      "step": 187316
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0958099365234375,
      "learning_rate": 5.029906662953017e-05,
      "loss": 2.726,
      "step": 187317
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8078103065490723,
      "learning_rate": 5.0296799373857866e-05,
      "loss": 2.8559,
      "step": 187318
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.530879259109497,
      "learning_rate": 5.029453216461006e-05,
      "loss": 2.7198,
      "step": 187319
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.398240566253662,
      "learning_rate": 5.0292265001787243e-05,
      "loss": 2.9374,
      "step": 187320
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.78513503074646,
      "learning_rate": 5.028999788538976e-05,
      "loss": 2.7836,
      "step": 187321
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.581573009490967,
      "learning_rate": 5.0287730815418146e-05,
      "loss": 3.0683,
      "step": 187322
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.680968761444092,
      "learning_rate": 5.028546379187276e-05,
      "loss": 2.8795,
      "step": 187323
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.957195520401001,
      "learning_rate": 5.0283196814754035e-05,
      "loss": 2.822,
      "step": 187324
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7024264335632324,
      "learning_rate": 5.02809298840623e-05,
      "loss": 2.6928,
      "step": 187325
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2861404418945312,
      "learning_rate": 5.0278662999798104e-05,
      "loss": 2.8387,
      "step": 187326
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.3414535522460938,
      "learning_rate": 5.027639616196176e-05,
      "loss": 2.8784,
      "step": 187327
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6301679611206055,
      "learning_rate": 5.027412937055382e-05,
      "loss": 3.1523,
      "step": 187328
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7819740772247314,
      "learning_rate": 5.027186262557463e-05,
      "loss": 3.1235,
      "step": 187329
    },
    {
      "epoch": 2.44,
      "grad_norm": 5.788602352142334,
      "learning_rate": 5.026959592702464e-05,
      "loss": 3.0608,
      "step": 187330
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.125279188156128,
      "learning_rate": 5.026732927490414e-05,
      "loss": 3.0563,
      "step": 187331
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.492897987365723,
      "learning_rate": 5.026506266921374e-05,
      "loss": 3.0608,
      "step": 187332
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.00408673286438,
      "learning_rate": 5.02627961099537e-05,
      "loss": 3.0236,
      "step": 187333
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6267096996307373,
      "learning_rate": 5.026052959712461e-05,
      "loss": 2.9672,
      "step": 187334
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.844082832336426,
      "learning_rate": 5.025826313072678e-05,
      "loss": 3.1481,
      "step": 187335
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8637211322784424,
      "learning_rate": 5.0255996710760684e-05,
      "loss": 3.0007,
      "step": 187336
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.40673565864563,
      "learning_rate": 5.02537303372266e-05,
      "loss": 2.7535,
      "step": 187337
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1892306804656982,
      "learning_rate": 5.025146401012519e-05,
      "loss": 3.1712,
      "step": 187338
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.825392007827759,
      "learning_rate": 5.024919772945663e-05,
      "loss": 3.0583,
      "step": 187339
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.85970401763916,
      "learning_rate": 5.024693149522152e-05,
      "loss": 3.2096,
      "step": 187340
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.996904134750366,
      "learning_rate": 5.0244665307420276e-05,
      "loss": 3.1447,
      "step": 187341
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1250991821289062,
      "learning_rate": 5.024239916605315e-05,
      "loss": 3.0076,
      "step": 187342
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.4938268661499023,
      "learning_rate": 5.0240133071120746e-05,
      "loss": 3.0547,
      "step": 187343
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.610560178756714,
      "learning_rate": 5.0237867022623435e-05,
      "loss": 2.7306,
      "step": 187344
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.28051233291626,
      "learning_rate": 5.023560102056151e-05,
      "loss": 2.9727,
      "step": 187345
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.947597026824951,
      "learning_rate": 5.023333506493561e-05,
      "loss": 3.0277,
      "step": 187346
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7882959842681885,
      "learning_rate": 5.023106915574603e-05,
      "loss": 3.2231,
      "step": 187347
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7867934703826904,
      "learning_rate": 5.0228803292993166e-05,
      "loss": 2.9057,
      "step": 187348
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5159049034118652,
      "learning_rate": 5.0226537476677534e-05,
      "loss": 2.9943,
      "step": 187349
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.7348408699035645,
      "learning_rate": 5.0224271706799514e-05,
      "loss": 3.1742,
      "step": 187350
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.338616132736206,
      "learning_rate": 5.022200598335941e-05,
      "loss": 3.0097,
      "step": 187351
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1156766414642334,
      "learning_rate": 5.021974030635784e-05,
      "loss": 2.9951,
      "step": 187352
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0259194374084473,
      "learning_rate": 5.021747467579514e-05,
      "loss": 2.8128,
      "step": 187353
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.717024087905884,
      "learning_rate": 5.021520909167167e-05,
      "loss": 3.1997,
      "step": 187354
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.372812271118164,
      "learning_rate": 5.021294355398795e-05,
      "loss": 2.8096,
      "step": 187355
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.809661388397217,
      "learning_rate": 5.0210678062744315e-05,
      "loss": 2.8931,
      "step": 187356
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2840890884399414,
      "learning_rate": 5.02084126179413e-05,
      "loss": 2.9331,
      "step": 187357
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.362396240234375,
      "learning_rate": 5.0206147219579226e-05,
      "loss": 2.931,
      "step": 187358
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8387234210968018,
      "learning_rate": 5.020388186765858e-05,
      "loss": 3.1032,
      "step": 187359
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7763285636901855,
      "learning_rate": 5.0201616562179644e-05,
      "loss": 2.9777,
      "step": 187360
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.8777477741241455,
      "learning_rate": 5.0199351303143034e-05,
      "loss": 2.9979,
      "step": 187361
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.586434841156006,
      "learning_rate": 5.0197086090549e-05,
      "loss": 2.8555,
      "step": 187362
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.992964506149292,
      "learning_rate": 5.0194820924398115e-05,
      "loss": 2.9077,
      "step": 187363
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5403974056243896,
      "learning_rate": 5.019255580469075e-05,
      "loss": 3.0041,
      "step": 187364
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.05576229095459,
      "learning_rate": 5.0190290731427294e-05,
      "loss": 2.9947,
      "step": 187365
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.3329918384552,
      "learning_rate": 5.0188025704608094e-05,
      "loss": 2.7045,
      "step": 187366
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.447890043258667,
      "learning_rate": 5.0185760724233736e-05,
      "loss": 2.8153,
      "step": 187367
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.4520857334136963,
      "learning_rate": 5.018349579030449e-05,
      "loss": 3.1402,
      "step": 187368
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8362886905670166,
      "learning_rate": 5.0181230902820904e-05,
      "loss": 2.8921,
      "step": 187369
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.576448917388916,
      "learning_rate": 5.017896606178329e-05,
      "loss": 2.9264,
      "step": 187370
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6823055744171143,
      "learning_rate": 5.0176701267192186e-05,
      "loss": 3.0493,
      "step": 187371
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.072160720825195,
      "learning_rate": 5.0174436519047936e-05,
      "loss": 3.1721,
      "step": 187372
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.436378240585327,
      "learning_rate": 5.017217181735099e-05,
      "loss": 3.2022,
      "step": 187373
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5126423835754395,
      "learning_rate": 5.01699071621017e-05,
      "loss": 2.9828,
      "step": 187374
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.5649607181549072,
      "learning_rate": 5.016764255330058e-05,
      "loss": 3.0102,
      "step": 187375
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1666862964630127,
      "learning_rate": 5.016537799094794e-05,
      "loss": 2.9089,
      "step": 187376
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1170144081115723,
      "learning_rate": 5.016311347504435e-05,
      "loss": 3.0569,
      "step": 187377
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.42630934715271,
      "learning_rate": 5.016084900559016e-05,
      "loss": 3.1852,
      "step": 187378
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9265408515930176,
      "learning_rate": 5.015858458258579e-05,
      "loss": 3.1561,
      "step": 187379
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.74269437789917,
      "learning_rate": 5.015632020603156e-05,
      "loss": 2.8338,
      "step": 187380
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.190218448638916,
      "learning_rate": 5.015405587592808e-05,
      "loss": 2.9426,
      "step": 187381
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.291724681854248,
      "learning_rate": 5.01517915922756e-05,
      "loss": 3.0275,
      "step": 187382
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8147382736206055,
      "learning_rate": 5.0149527355074706e-05,
      "loss": 2.9107,
      "step": 187383
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.629640579223633,
      "learning_rate": 5.014726316432571e-05,
      "loss": 3.129,
      "step": 187384
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.02354097366333,
      "learning_rate": 5.014499902002906e-05,
      "loss": 2.9486,
      "step": 187385
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2702744007110596,
      "learning_rate": 5.014273492218509e-05,
      "loss": 2.8424,
      "step": 187386
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9293501377105713,
      "learning_rate": 5.014047087079439e-05,
      "loss": 3.0154,
      "step": 187387
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.495298385620117,
      "learning_rate": 5.01382068658572e-05,
      "loss": 2.8723,
      "step": 187388
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0088791847229004,
      "learning_rate": 5.0135942907374115e-05,
      "loss": 2.9733,
      "step": 187389
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1773247718811035,
      "learning_rate": 5.013367899534547e-05,
      "loss": 3.2502,
      "step": 187390
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9148976802825928,
      "learning_rate": 5.01314151297717e-05,
      "loss": 2.6513,
      "step": 187391
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8054726123809814,
      "learning_rate": 5.012915131065317e-05,
      "loss": 2.8414,
      "step": 187392
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.88981556892395,
      "learning_rate": 5.012688753799038e-05,
      "loss": 2.915,
      "step": 187393
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.9369187355041504,
      "learning_rate": 5.012462381178366e-05,
      "loss": 2.8308,
      "step": 187394
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2217254638671875,
      "learning_rate": 5.012236013203355e-05,
      "loss": 2.9561,
      "step": 187395
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6993210315704346,
      "learning_rate": 5.012009649874044e-05,
      "loss": 2.7974,
      "step": 187396
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9139418601989746,
      "learning_rate": 5.01178329119047e-05,
      "loss": 3.0339,
      "step": 187397
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7537026405334473,
      "learning_rate": 5.0115569371526696e-05,
      "loss": 2.8348,
      "step": 187398
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.547295570373535,
      "learning_rate": 5.0113305877607e-05,
      "loss": 3.0557,
      "step": 187399
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9633755683898926,
      "learning_rate": 5.011104243014588e-05,
      "loss": 2.8397,
      "step": 187400
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9762115478515625,
      "learning_rate": 5.0108779029143916e-05,
      "loss": 3.0992,
      "step": 187401
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5296471118927,
      "learning_rate": 5.0106515674601434e-05,
      "loss": 2.9013,
      "step": 187402
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.069023609161377,
      "learning_rate": 5.0104252366518884e-05,
      "loss": 3.0671,
      "step": 187403
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5397820472717285,
      "learning_rate": 5.01019891048966e-05,
      "loss": 2.7351,
      "step": 187404
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.716611862182617,
      "learning_rate": 5.009972588973512e-05,
      "loss": 2.6824,
      "step": 187405
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1203081607818604,
      "learning_rate": 5.009746272103477e-05,
      "loss": 2.9017,
      "step": 187406
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.923295021057129,
      "learning_rate": 5.0095199598796096e-05,
      "loss": 2.9499,
      "step": 187407
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7467846870422363,
      "learning_rate": 5.009293652301942e-05,
      "loss": 2.8249,
      "step": 187408
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.968653917312622,
      "learning_rate": 5.009067349370521e-05,
      "loss": 2.9201,
      "step": 187409
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.139577865600586,
      "learning_rate": 5.008841051085377e-05,
      "loss": 3.1974,
      "step": 187410
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.9615750312805176,
      "learning_rate": 5.00861475744657e-05,
      "loss": 3.0682,
      "step": 187411
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7750320434570312,
      "learning_rate": 5.008388468454122e-05,
      "loss": 2.8405,
      "step": 187412
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5988340377807617,
      "learning_rate": 5.008162184108098e-05,
      "loss": 3.2076,
      "step": 187413
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6567463874816895,
      "learning_rate": 5.007935904408527e-05,
      "loss": 3.0757,
      "step": 187414
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.608164072036743,
      "learning_rate": 5.007709629355452e-05,
      "loss": 2.9026,
      "step": 187415
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.132122278213501,
      "learning_rate": 5.007483358948907e-05,
      "loss": 2.7288,
      "step": 187416
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1765403747558594,
      "learning_rate": 5.0072570931889525e-05,
      "loss": 3.0906,
      "step": 187417
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.8134384155273438,
      "learning_rate": 5.007030832075613e-05,
      "loss": 2.9178,
      "step": 187418
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.4576539993286133,
      "learning_rate": 5.0068045756089444e-05,
      "loss": 2.9893,
      "step": 187419
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.427809953689575,
      "learning_rate": 5.0065783237889824e-05,
      "loss": 2.9515,
      "step": 187420
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5436458587646484,
      "learning_rate": 5.006352076615769e-05,
      "loss": 2.7423,
      "step": 187421
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.4251670837402344,
      "learning_rate": 5.006125834089343e-05,
      "loss": 3.0425,
      "step": 187422
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6350045204162598,
      "learning_rate": 5.0058995962097524e-05,
      "loss": 2.9435,
      "step": 187423
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9679088592529297,
      "learning_rate": 5.005673362977032e-05,
      "loss": 2.9939,
      "step": 187424
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6225368976593018,
      "learning_rate": 5.005447134391237e-05,
      "loss": 2.7938,
      "step": 187425
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.3450703620910645,
      "learning_rate": 5.005220910452399e-05,
      "loss": 2.937,
      "step": 187426
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6719861030578613,
      "learning_rate": 5.004994691160563e-05,
      "loss": 2.8351,
      "step": 187427
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8407630920410156,
      "learning_rate": 5.004768476515764e-05,
      "loss": 2.8049,
      "step": 187428
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.4998950958251953,
      "learning_rate": 5.004542266518057e-05,
      "loss": 2.985,
      "step": 187429
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.886420488357544,
      "learning_rate": 5.004316061167469e-05,
      "loss": 2.9282,
      "step": 187430
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.4212820529937744,
      "learning_rate": 5.0040898604640576e-05,
      "loss": 2.9136,
      "step": 187431
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.432185649871826,
      "learning_rate": 5.0038636644078524e-05,
      "loss": 2.8654,
      "step": 187432
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.677657127380371,
      "learning_rate": 5.003637472998909e-05,
      "loss": 2.9447,
      "step": 187433
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6655237674713135,
      "learning_rate": 5.003411286237259e-05,
      "loss": 3.0902,
      "step": 187434
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.217099666595459,
      "learning_rate": 5.0031851041229475e-05,
      "loss": 2.9606,
      "step": 187435
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.824787139892578,
      "learning_rate": 5.0029589266560085e-05,
      "loss": 2.9708,
      "step": 187436
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.325538635253906,
      "learning_rate": 5.002732753836496e-05,
      "loss": 2.8477,
      "step": 187437
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.267294883728027,
      "learning_rate": 5.002506585664442e-05,
      "loss": 2.8251,
      "step": 187438
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5746490955352783,
      "learning_rate": 5.002280422139904e-05,
      "loss": 2.866,
      "step": 187439
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1128814220428467,
      "learning_rate": 5.002054263262911e-05,
      "loss": 2.959,
      "step": 187440
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.58315372467041,
      "learning_rate": 5.001828109033501e-05,
      "loss": 2.7482,
      "step": 187441
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.675227403640747,
      "learning_rate": 5.0016019594517296e-05,
      "loss": 2.94,
      "step": 187442
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6900763511657715,
      "learning_rate": 5.001375814517634e-05,
      "loss": 2.9956,
      "step": 187443
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7302069664001465,
      "learning_rate": 5.001149674231244e-05,
      "loss": 3.0027,
      "step": 187444
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.680555582046509,
      "learning_rate": 5.000923538592623e-05,
      "loss": 3.1395,
      "step": 187445
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1508195400238037,
      "learning_rate": 5.0006974076018e-05,
      "loss": 2.8949,
      "step": 187446
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.5577898025512695,
      "learning_rate": 5.0004712812588134e-05,
      "loss": 3.0075,
      "step": 187447
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9686179161071777,
      "learning_rate": 5.000245159563718e-05,
      "loss": 2.959,
      "step": 187448
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.500027656555176,
      "learning_rate": 5.0000190425165486e-05,
      "loss": 2.7289,
      "step": 187449
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9598405361175537,
      "learning_rate": 4.999792930117341e-05,
      "loss": 3.0857,
      "step": 187450
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.633612871170044,
      "learning_rate": 4.999566822366152e-05,
      "loss": 3.0874,
      "step": 187451
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7450594902038574,
      "learning_rate": 4.999340719263012e-05,
      "loss": 2.9348,
      "step": 187452
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.199756622314453,
      "learning_rate": 4.9991146208079636e-05,
      "loss": 2.7108,
      "step": 187453
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.047394037246704,
      "learning_rate": 4.998888527001057e-05,
      "loss": 2.7765,
      "step": 187454
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.1739113330841064,
      "learning_rate": 4.998662437842319e-05,
      "loss": 2.9609,
      "step": 187455
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.618290662765503,
      "learning_rate": 4.998436353331813e-05,
      "loss": 3.0775,
      "step": 187456
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1034135818481445,
      "learning_rate": 4.9982102734695686e-05,
      "loss": 2.8489,
      "step": 187457
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2867488861083984,
      "learning_rate": 4.997984198255626e-05,
      "loss": 3.0592,
      "step": 187458
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.9229166507720947,
      "learning_rate": 4.997758127690025e-05,
      "loss": 2.8643,
      "step": 187459
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6638824939727783,
      "learning_rate": 4.997532061772819e-05,
      "loss": 3.0377,
      "step": 187460
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.298823833465576,
      "learning_rate": 4.9973060005040376e-05,
      "loss": 2.55,
      "step": 187461
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.3958253860473633,
      "learning_rate": 4.9970799438837385e-05,
      "loss": 2.8648,
      "step": 187462
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.420632839202881,
      "learning_rate": 4.996853891911951e-05,
      "loss": 2.7288,
      "step": 187463
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.260992527008057,
      "learning_rate": 4.9966278445887206e-05,
      "loss": 2.7615,
      "step": 187464
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.603914499282837,
      "learning_rate": 4.996401801914083e-05,
      "loss": 2.9851,
      "step": 187465
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.962940216064453,
      "learning_rate": 4.9961757638880925e-05,
      "loss": 3.0494,
      "step": 187466
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.937979221343994,
      "learning_rate": 4.995949730510777e-05,
      "loss": 2.9631,
      "step": 187467
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6896145343780518,
      "learning_rate": 4.995723701782194e-05,
      "loss": 3.0018,
      "step": 187468
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.977501153945923,
      "learning_rate": 4.995497677702378e-05,
      "loss": 2.8874,
      "step": 187469
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.344785213470459,
      "learning_rate": 4.995271658271374e-05,
      "loss": 3.0275,
      "step": 187470
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.784311532974243,
      "learning_rate": 4.995045643489211e-05,
      "loss": 2.9072,
      "step": 187471
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.4673960208892822,
      "learning_rate": 4.994819633355949e-05,
      "loss": 2.9744,
      "step": 187472
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.737454652786255,
      "learning_rate": 4.9945936278716114e-05,
      "loss": 2.7648,
      "step": 187473
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8107664585113525,
      "learning_rate": 4.994367627036262e-05,
      "loss": 3.1704,
      "step": 187474
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8874497413635254,
      "learning_rate": 4.9941416308499306e-05,
      "loss": 3.0192,
      "step": 187475
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5856571197509766,
      "learning_rate": 4.99391563931266e-05,
      "loss": 2.7327,
      "step": 187476
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.782921075820923,
      "learning_rate": 4.9936896524244875e-05,
      "loss": 3.0698,
      "step": 187477
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7646708488464355,
      "learning_rate": 4.993463670185466e-05,
      "loss": 3.0115,
      "step": 187478
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.3755626678466797,
      "learning_rate": 4.993237692595622e-05,
      "loss": 2.9325,
      "step": 187479
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6298460960388184,
      "learning_rate": 4.993011719655019e-05,
      "loss": 2.895,
      "step": 187480
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7685158252716064,
      "learning_rate": 4.992785751363684e-05,
      "loss": 2.9332,
      "step": 187481
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6561391353607178,
      "learning_rate": 4.992559787721663e-05,
      "loss": 2.9398,
      "step": 187482
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.802154064178467,
      "learning_rate": 4.9923338287289894e-05,
      "loss": 2.9837,
      "step": 187483
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.688567638397217,
      "learning_rate": 4.9921078743857204e-05,
      "loss": 2.7757,
      "step": 187484
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.51334810256958,
      "learning_rate": 4.991881924691882e-05,
      "loss": 2.9316,
      "step": 187485
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.039492607116699,
      "learning_rate": 4.991655979647534e-05,
      "loss": 3.1234,
      "step": 187486
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2975945472717285,
      "learning_rate": 4.991430039252707e-05,
      "loss": 2.9644,
      "step": 187487
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6195340156555176,
      "learning_rate": 4.991204103507448e-05,
      "loss": 2.8982,
      "step": 187488
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.428008556365967,
      "learning_rate": 4.990978172411789e-05,
      "loss": 2.7649,
      "step": 187489
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.553668975830078,
      "learning_rate": 4.990752245965785e-05,
      "loss": 3.0361,
      "step": 187490
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.951358318328857,
      "learning_rate": 4.990526324169467e-05,
      "loss": 3.1749,
      "step": 187491
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.7307655811309814,
      "learning_rate": 4.990300407022887e-05,
      "loss": 2.8017,
      "step": 187492
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.7348380088806152,
      "learning_rate": 4.990074494526084e-05,
      "loss": 2.8392,
      "step": 187493
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6308083534240723,
      "learning_rate": 4.989848586679095e-05,
      "loss": 2.8415,
      "step": 187494
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7858188152313232,
      "learning_rate": 4.989622683481963e-05,
      "loss": 2.9113,
      "step": 187495
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.857421636581421,
      "learning_rate": 4.989396784934735e-05,
      "loss": 3.0891,
      "step": 187496
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6585159301757812,
      "learning_rate": 4.989170891037444e-05,
      "loss": 3.1689,
      "step": 187497
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9828200340270996,
      "learning_rate": 4.9889450017901466e-05,
      "loss": 3.0856,
      "step": 187498
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.403611183166504,
      "learning_rate": 4.9887191171928696e-05,
      "loss": 2.7556,
      "step": 187499
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.8172450065612793,
      "learning_rate": 4.988493237245677e-05,
      "loss": 2.9623,
      "step": 187500
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.894178867340088,
      "learning_rate": 4.98826736194858e-05,
      "loss": 2.9361,
      "step": 187501
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.530032157897949,
      "learning_rate": 4.9880414913016444e-05,
      "loss": 2.9958,
      "step": 187502
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.399388074874878,
      "learning_rate": 4.987815625304895e-05,
      "loss": 2.8439,
      "step": 187503
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9998068809509277,
      "learning_rate": 4.987589763958389e-05,
      "loss": 2.8035,
      "step": 187504
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1442744731903076,
      "learning_rate": 4.987363907262157e-05,
      "loss": 2.9179,
      "step": 187505
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9842121601104736,
      "learning_rate": 4.987138055216262e-05,
      "loss": 2.9301,
      "step": 187506
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.06990385055542,
      "learning_rate": 4.9869122078207135e-05,
      "loss": 2.7769,
      "step": 187507
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.003051519393921,
      "learning_rate": 4.986686365075578e-05,
      "loss": 2.9866,
      "step": 187508
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.4854981899261475,
      "learning_rate": 4.986460526980883e-05,
      "loss": 3.0869,
      "step": 187509
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.490210771560669,
      "learning_rate": 4.9862346935366847e-05,
      "loss": 2.96,
      "step": 187510
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.696044445037842,
      "learning_rate": 4.9860088647430095e-05,
      "loss": 2.7275,
      "step": 187511
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.661471128463745,
      "learning_rate": 4.985783040599921e-05,
      "loss": 2.9266,
      "step": 187512
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9978229999542236,
      "learning_rate": 4.985557221107432e-05,
      "loss": 2.9111,
      "step": 187513
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.556999444961548,
      "learning_rate": 4.98533140626561e-05,
      "loss": 2.8816,
      "step": 187514
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.027588367462158,
      "learning_rate": 4.9851055960744815e-05,
      "loss": 2.6794,
      "step": 187515
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.408003330230713,
      "learning_rate": 4.984879790534099e-05,
      "loss": 3.0669,
      "step": 187516
    },
    {
      "epoch": 2.44,
      "grad_norm": 5.133105278015137,
      "learning_rate": 4.9846539896444924e-05,
      "loss": 2.64,
      "step": 187517
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.9143457412719727,
      "learning_rate": 4.984428193405716e-05,
      "loss": 2.9382,
      "step": 187518
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.040285348892212,
      "learning_rate": 4.984202401817809e-05,
      "loss": 2.7495,
      "step": 187519
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.623835325241089,
      "learning_rate": 4.983976614880813e-05,
      "loss": 3.0735,
      "step": 187520
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.234546661376953,
      "learning_rate": 4.983750832594755e-05,
      "loss": 2.908,
      "step": 187521
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2010622024536133,
      "learning_rate": 4.9835250549597006e-05,
      "loss": 2.8674,
      "step": 187522
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.1056671142578125,
      "learning_rate": 4.983299281975676e-05,
      "loss": 3.0473,
      "step": 187523
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6521708965301514,
      "learning_rate": 4.9830735136427334e-05,
      "loss": 2.8192,
      "step": 187524
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.888669967651367,
      "learning_rate": 4.982847749960908e-05,
      "loss": 2.9921,
      "step": 187525
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8328323364257812,
      "learning_rate": 4.9826219909302445e-05,
      "loss": 2.7408,
      "step": 187526
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9438796043395996,
      "learning_rate": 4.982396236550777e-05,
      "loss": 3.0201,
      "step": 187527
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2406489849090576,
      "learning_rate": 4.982170486822563e-05,
      "loss": 2.9449,
      "step": 187528
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.800424098968506,
      "learning_rate": 4.981944741745628e-05,
      "loss": 2.8956,
      "step": 187529
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.890319585800171,
      "learning_rate": 4.981719001320026e-05,
      "loss": 2.9671,
      "step": 187530
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.115550518035889,
      "learning_rate": 4.9814932655458e-05,
      "loss": 2.7946,
      "step": 187531
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5938680171966553,
      "learning_rate": 4.981267534422973e-05,
      "loss": 3.0296,
      "step": 187532
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.038623094558716,
      "learning_rate": 4.9810418079516115e-05,
      "loss": 2.9974,
      "step": 187533
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6142518520355225,
      "learning_rate": 4.980816086131746e-05,
      "loss": 3.0873,
      "step": 187534
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.796003818511963,
      "learning_rate": 4.980590368963413e-05,
      "loss": 3.1248,
      "step": 187535
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.610603094100952,
      "learning_rate": 4.980364656446666e-05,
      "loss": 2.7418,
      "step": 187536
    },
    {
      "epoch": 2.44,
      "grad_norm": 5.167175769805908,
      "learning_rate": 4.980138948581541e-05,
      "loss": 2.7942,
      "step": 187537
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.341970443725586,
      "learning_rate": 4.979913245368076e-05,
      "loss": 2.8701,
      "step": 187538
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2953989505767822,
      "learning_rate": 4.9796875468063226e-05,
      "loss": 3.1717,
      "step": 187539
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0126476287841797,
      "learning_rate": 4.979461852896311e-05,
      "loss": 2.8953,
      "step": 187540
    },
    {
      "epoch": 2.44,
      "grad_norm": 5.342754364013672,
      "learning_rate": 4.9792361636380964e-05,
      "loss": 3.0085,
      "step": 187541
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7974936962127686,
      "learning_rate": 4.9790104790317167e-05,
      "loss": 3.2385,
      "step": 187542
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.887636423110962,
      "learning_rate": 4.978784799077209e-05,
      "loss": 2.8975,
      "step": 187543
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2393596172332764,
      "learning_rate": 4.9785591237746104e-05,
      "loss": 3.1261,
      "step": 187544
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.122468948364258,
      "learning_rate": 4.9783334531239774e-05,
      "loss": 2.8791,
      "step": 187545
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6503822803497314,
      "learning_rate": 4.978107787125336e-05,
      "loss": 3.1245,
      "step": 187546
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.314462184906006,
      "learning_rate": 4.977882125778744e-05,
      "loss": 2.5759,
      "step": 187547
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6338260173797607,
      "learning_rate": 4.977656469084237e-05,
      "loss": 3.1266,
      "step": 187548
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.393792152404785,
      "learning_rate": 4.977430817041859e-05,
      "loss": 2.8814,
      "step": 187549
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0576868057250977,
      "learning_rate": 4.9772051696516355e-05,
      "loss": 2.7424,
      "step": 187550
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.845869302749634,
      "learning_rate": 4.976979526913635e-05,
      "loss": 3.0375,
      "step": 187551
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0929439067840576,
      "learning_rate": 4.976753888827876e-05,
      "loss": 2.9723,
      "step": 187552
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.443632125854492,
      "learning_rate": 4.9765282553944186e-05,
      "loss": 2.7819,
      "step": 187553
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.4121906757354736,
      "learning_rate": 4.9763026266132963e-05,
      "loss": 3.1516,
      "step": 187554
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9266862869262695,
      "learning_rate": 4.9760770024845534e-05,
      "loss": 2.8761,
      "step": 187555
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.89090895652771,
      "learning_rate": 4.9758513830082216e-05,
      "loss": 3.1259,
      "step": 187556
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.572510004043579,
      "learning_rate": 4.975625768184358e-05,
      "loss": 2.745,
      "step": 187557
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.7582569122314453,
      "learning_rate": 4.97540015801299e-05,
      "loss": 2.7568,
      "step": 187558
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.429403781890869,
      "learning_rate": 4.975174552494177e-05,
      "loss": 2.7596,
      "step": 187559
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.060580253601074,
      "learning_rate": 4.9749489516279516e-05,
      "loss": 2.9724,
      "step": 187560
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.250657796859741,
      "learning_rate": 4.974723355414355e-05,
      "loss": 2.8654,
      "step": 187561
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.9602980613708496,
      "learning_rate": 4.97449776385342e-05,
      "loss": 2.8561,
      "step": 187562
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2613041400909424,
      "learning_rate": 4.97427217694521e-05,
      "loss": 3.1342,
      "step": 187563
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6128456592559814,
      "learning_rate": 4.974046594689745e-05,
      "loss": 3.0132,
      "step": 187564
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0060629844665527,
      "learning_rate": 4.9738210170870843e-05,
      "loss": 3.0192,
      "step": 187565
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.698305606842041,
      "learning_rate": 4.9735954441372586e-05,
      "loss": 2.8386,
      "step": 187566
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.32436466217041,
      "learning_rate": 4.973369875840324e-05,
      "loss": 2.7211,
      "step": 187567
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.826154947280884,
      "learning_rate": 4.973144312196301e-05,
      "loss": 2.9505,
      "step": 187568
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6342968940734863,
      "learning_rate": 4.972918753205249e-05,
      "loss": 2.6473,
      "step": 187569
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5856823921203613,
      "learning_rate": 4.972693198867196e-05,
      "loss": 3.1639,
      "step": 187570
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.385140895843506,
      "learning_rate": 4.9724676491822e-05,
      "loss": 2.9594,
      "step": 187571
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.861821413040161,
      "learning_rate": 4.972242104150286e-05,
      "loss": 3.1416,
      "step": 187572
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.6259045600891113,
      "learning_rate": 4.972016563771519e-05,
      "loss": 2.903,
      "step": 187573
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.049804210662842,
      "learning_rate": 4.9717910280459136e-05,
      "loss": 2.892,
      "step": 187574
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.6535258293151855,
      "learning_rate": 4.971565496973533e-05,
      "loss": 2.9335,
      "step": 187575
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.5821831226348877,
      "learning_rate": 4.9713399705543996e-05,
      "loss": 2.8172,
      "step": 187576
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0235719680786133,
      "learning_rate": 4.971114448788578e-05,
      "loss": 2.914,
      "step": 187577
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8213915824890137,
      "learning_rate": 4.9708889316760904e-05,
      "loss": 2.9044,
      "step": 187578
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9843883514404297,
      "learning_rate": 4.9706634192170006e-05,
      "loss": 2.6193,
      "step": 187579
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.361677408218384,
      "learning_rate": 4.9704379114113214e-05,
      "loss": 2.7789,
      "step": 187580
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.260787010192871,
      "learning_rate": 4.97021240825912e-05,
      "loss": 2.8498,
      "step": 187581
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.285264730453491,
      "learning_rate": 4.969986909760419e-05,
      "loss": 2.8862,
      "step": 187582
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.239934206008911,
      "learning_rate": 4.9697614159152797e-05,
      "loss": 2.8984,
      "step": 187583
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8538565635681152,
      "learning_rate": 4.969535926723725e-05,
      "loss": 3.019,
      "step": 187584
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.698101758956909,
      "learning_rate": 4.96931044218582e-05,
      "loss": 2.7547,
      "step": 187585
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.915510892868042,
      "learning_rate": 4.9690849623015795e-05,
      "loss": 2.8433,
      "step": 187586
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1467947959899902,
      "learning_rate": 4.968859487071066e-05,
      "loss": 2.85,
      "step": 187587
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.956618070602417,
      "learning_rate": 4.968634016494307e-05,
      "loss": 2.8897,
      "step": 187588
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9533169269561768,
      "learning_rate": 4.968408550571359e-05,
      "loss": 2.8982,
      "step": 187589
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1938235759735107,
      "learning_rate": 4.968183089302248e-05,
      "loss": 2.8948,
      "step": 187590
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.8065338134765625,
      "learning_rate": 4.967957632687037e-05,
      "loss": 3.086,
      "step": 187591
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.833745002746582,
      "learning_rate": 4.967732180725744e-05,
      "loss": 3.0776,
      "step": 187592
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.04506516456604,
      "learning_rate": 4.9675067334184284e-05,
      "loss": 2.9126,
      "step": 187593
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.8608219623565674,
      "learning_rate": 4.967281290765116e-05,
      "loss": 2.8428,
      "step": 187594
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7669787406921387,
      "learning_rate": 4.967055852765871e-05,
      "loss": 3.0317,
      "step": 187595
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.6937925815582275,
      "learning_rate": 4.9668304194207095e-05,
      "loss": 2.8936,
      "step": 187596
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8099215030670166,
      "learning_rate": 4.9666049907297054e-05,
      "loss": 3.0744,
      "step": 187597
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0580790042877197,
      "learning_rate": 4.966379566692869e-05,
      "loss": 3.1126,
      "step": 187598
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.120654582977295,
      "learning_rate": 4.9661541473102586e-05,
      "loss": 2.8165,
      "step": 187599
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.912470817565918,
      "learning_rate": 4.9659287325819055e-05,
      "loss": 2.8028,
      "step": 187600
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8374481201171875,
      "learning_rate": 4.965703322507869e-05,
      "loss": 2.937,
      "step": 187601
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2123045921325684,
      "learning_rate": 4.96547791708817e-05,
      "loss": 2.654,
      "step": 187602
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.945438861846924,
      "learning_rate": 4.9652525163228705e-05,
      "loss": 3.0393,
      "step": 187603
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.273045539855957,
      "learning_rate": 4.965027120212002e-05,
      "loss": 2.9943,
      "step": 187604
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.490238904953003,
      "learning_rate": 4.9648017287556094e-05,
      "loss": 2.6659,
      "step": 187605
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.338271617889404,
      "learning_rate": 4.9645763419537245e-05,
      "loss": 2.6977,
      "step": 187606
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9236087799072266,
      "learning_rate": 4.9643509598064056e-05,
      "loss": 2.9779,
      "step": 187607
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0765209197998047,
      "learning_rate": 4.964125582313677e-05,
      "loss": 2.6999,
      "step": 187608
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.983612060546875,
      "learning_rate": 4.9639002094755986e-05,
      "loss": 2.8312,
      "step": 187609
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.649897575378418,
      "learning_rate": 4.963674841292203e-05,
      "loss": 2.9902,
      "step": 187610
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6551601886749268,
      "learning_rate": 4.9634494777635346e-05,
      "loss": 2.8847,
      "step": 187611
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.9124763011932373,
      "learning_rate": 4.963224118889626e-05,
      "loss": 3.1062,
      "step": 187612
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.572843074798584,
      "learning_rate": 4.962998764670534e-05,
      "loss": 3.0905,
      "step": 187613
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.802954912185669,
      "learning_rate": 4.962773415106285e-05,
      "loss": 3.0761,
      "step": 187614
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6598610877990723,
      "learning_rate": 4.962548070196935e-05,
      "loss": 2.7407,
      "step": 187615
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.500291109085083,
      "learning_rate": 4.962322729942523e-05,
      "loss": 2.9126,
      "step": 187616
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.4781277179718018,
      "learning_rate": 4.962097394343076e-05,
      "loss": 3.2478,
      "step": 187617
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.141871452331543,
      "learning_rate": 4.96187206339866e-05,
      "loss": 3.1269,
      "step": 187618
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0617122650146484,
      "learning_rate": 4.961646737109303e-05,
      "loss": 2.9691,
      "step": 187619
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.371413230895996,
      "learning_rate": 4.961421415475039e-05,
      "loss": 2.8668,
      "step": 187620
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8087759017944336,
      "learning_rate": 4.9611960984959275e-05,
      "loss": 2.793,
      "step": 187621
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7990572452545166,
      "learning_rate": 4.960970786172006e-05,
      "loss": 2.9595,
      "step": 187622
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2440176010131836,
      "learning_rate": 4.9607454785033005e-05,
      "loss": 3.1471,
      "step": 187623
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.264536380767822,
      "learning_rate": 4.960520175489875e-05,
      "loss": 2.9081,
      "step": 187624
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9199650287628174,
      "learning_rate": 4.960294877131755e-05,
      "loss": 2.8635,
      "step": 187625
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.19632887840271,
      "learning_rate": 4.960069583428995e-05,
      "loss": 3.1444,
      "step": 187626
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6037135124206543,
      "learning_rate": 4.959844294381628e-05,
      "loss": 2.9703,
      "step": 187627
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.12782621383667,
      "learning_rate": 4.959619009989702e-05,
      "loss": 3.0281,
      "step": 187628
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.746129035949707,
      "learning_rate": 4.959393730253247e-05,
      "loss": 3.151,
      "step": 187629
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8893232345581055,
      "learning_rate": 4.959168455172321e-05,
      "loss": 2.8113,
      "step": 187630
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6795644760131836,
      "learning_rate": 4.958943184746951e-05,
      "loss": 2.8017,
      "step": 187631
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.781635284423828,
      "learning_rate": 4.958717918977193e-05,
      "loss": 2.9587,
      "step": 187632
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0907156467437744,
      "learning_rate": 4.958492657863081e-05,
      "loss": 3.0856,
      "step": 187633
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1221923828125,
      "learning_rate": 4.9582674014046616e-05,
      "loss": 2.8127,
      "step": 187634
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.002784252166748,
      "learning_rate": 4.9580421496019615e-05,
      "loss": 2.9294,
      "step": 187635
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.980227470397949,
      "learning_rate": 4.957816902455044e-05,
      "loss": 2.714,
      "step": 187636
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.358973979949951,
      "learning_rate": 4.957591659963932e-05,
      "loss": 2.721,
      "step": 187637
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6567001342773438,
      "learning_rate": 4.957366422128686e-05,
      "loss": 2.7736,
      "step": 187638
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1981797218322754,
      "learning_rate": 4.957141188949329e-05,
      "loss": 2.8659,
      "step": 187639
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7700345516204834,
      "learning_rate": 4.95691596042593e-05,
      "loss": 3.083,
      "step": 187640
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7995240688323975,
      "learning_rate": 4.956690736558494e-05,
      "loss": 2.9481,
      "step": 187641
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.864572763442993,
      "learning_rate": 4.9564655173470904e-05,
      "loss": 2.8359,
      "step": 187642
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.3012588024139404,
      "learning_rate": 4.956240302791749e-05,
      "loss": 2.8271,
      "step": 187643
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7072184085845947,
      "learning_rate": 4.9560150928925194e-05,
      "loss": 3.0388,
      "step": 187644
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5850491523742676,
      "learning_rate": 4.9557898876494315e-05,
      "loss": 2.7701,
      "step": 187645
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.382930755615234,
      "learning_rate": 4.95556468706255e-05,
      "loss": 3.062,
      "step": 187646
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6417832374572754,
      "learning_rate": 4.955339491131887e-05,
      "loss": 2.6884,
      "step": 187647
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.5265913009643555,
      "learning_rate": 4.955114299857506e-05,
      "loss": 3.0464,
      "step": 187648
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0439209938049316,
      "learning_rate": 4.9548891132394365e-05,
      "loss": 2.8936,
      "step": 187649
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.858900785446167,
      "learning_rate": 4.954663931277733e-05,
      "loss": 3.0509,
      "step": 187650
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9103164672851562,
      "learning_rate": 4.954438753972421e-05,
      "loss": 2.7932,
      "step": 187651
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.701637029647827,
      "learning_rate": 4.954213581323567e-05,
      "loss": 3.1513,
      "step": 187652
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0991640090942383,
      "learning_rate": 4.953988413331186e-05,
      "loss": 2.8709,
      "step": 187653
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.3776204586029053,
      "learning_rate": 4.953763249995336e-05,
      "loss": 2.8588,
      "step": 187654
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5088789463043213,
      "learning_rate": 4.953538091316045e-05,
      "loss": 2.9636,
      "step": 187655
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7333343029022217,
      "learning_rate": 4.9533129372933754e-05,
      "loss": 2.8683,
      "step": 187656
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.4066648483276367,
      "learning_rate": 4.9530877879273475e-05,
      "loss": 2.8751,
      "step": 187657
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2840735912323,
      "learning_rate": 4.952862643218028e-05,
      "loss": 2.9334,
      "step": 187658
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.919719696044922,
      "learning_rate": 4.952637503165431e-05,
      "loss": 2.7927,
      "step": 187659
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8423595428466797,
      "learning_rate": 4.952412367769618e-05,
      "loss": 2.8472,
      "step": 187660
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.979407787322998,
      "learning_rate": 4.952187237030617e-05,
      "loss": 2.7367,
      "step": 187661
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1178972721099854,
      "learning_rate": 4.951962110948484e-05,
      "loss": 2.8764,
      "step": 187662
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.361886501312256,
      "learning_rate": 4.951736989523249e-05,
      "loss": 2.7458,
      "step": 187663
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8887031078338623,
      "learning_rate": 4.95151187275497e-05,
      "loss": 2.9991,
      "step": 187664
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.4843289852142334,
      "learning_rate": 4.951286760643665e-05,
      "loss": 3.1368,
      "step": 187665
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.727397918701172,
      "learning_rate": 4.951061653189398e-05,
      "loss": 2.9435,
      "step": 187666
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.621046543121338,
      "learning_rate": 4.950836550392192e-05,
      "loss": 2.8708,
      "step": 187667
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7646405696868896,
      "learning_rate": 4.950611452252105e-05,
      "loss": 3.0243,
      "step": 187668
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.5115246772766113,
      "learning_rate": 4.950386358769165e-05,
      "loss": 3.057,
      "step": 187669
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.449324131011963,
      "learning_rate": 4.9501612699434366e-05,
      "loss": 2.8116,
      "step": 187670
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.4397988319396973,
      "learning_rate": 4.949936185774933e-05,
      "loss": 3.0606,
      "step": 187671
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.6319425106048584,
      "learning_rate": 4.949711106263714e-05,
      "loss": 3.1511,
      "step": 187672
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.4493210315704346,
      "learning_rate": 4.9494860314098096e-05,
      "loss": 2.7856,
      "step": 187673
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.4798905849456787,
      "learning_rate": 4.949260961213276e-05,
      "loss": 2.7459,
      "step": 187674
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1608710289001465,
      "learning_rate": 4.949035895674141e-05,
      "loss": 2.8154,
      "step": 187675
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7114226818084717,
      "learning_rate": 4.948810834792466e-05,
      "loss": 2.9131,
      "step": 187676
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.4660098552703857,
      "learning_rate": 4.948585778568266e-05,
      "loss": 2.6848,
      "step": 187677
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.564262628555298,
      "learning_rate": 4.9483607270016066e-05,
      "loss": 2.6565,
      "step": 187678
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2594616413116455,
      "learning_rate": 4.9481356800925085e-05,
      "loss": 2.8981,
      "step": 187679
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.2803025245666504,
      "learning_rate": 4.947910637841035e-05,
      "loss": 2.9837,
      "step": 187680
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.5074710845947266,
      "learning_rate": 4.947685600247209e-05,
      "loss": 2.7519,
      "step": 187681
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.3918235301971436,
      "learning_rate": 4.947460567311097e-05,
      "loss": 2.6226,
      "step": 187682
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.608705520629883,
      "learning_rate": 4.947235539032712e-05,
      "loss": 3.1825,
      "step": 187683
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.900343179702759,
      "learning_rate": 4.947010515412112e-05,
      "loss": 3.0202,
      "step": 187684
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5183656215667725,
      "learning_rate": 4.9467854964493326e-05,
      "loss": 2.8566,
      "step": 187685
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.15939998626709,
      "learning_rate": 4.946560482144424e-05,
      "loss": 2.9814,
      "step": 187686
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.108022928237915,
      "learning_rate": 4.946335472497412e-05,
      "loss": 3.0208,
      "step": 187687
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.895965337753296,
      "learning_rate": 4.9461104675083684e-05,
      "loss": 2.9475,
      "step": 187688
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.393787145614624,
      "learning_rate": 4.945885467177298e-05,
      "loss": 2.8218,
      "step": 187689
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6659231185913086,
      "learning_rate": 4.9456604715042725e-05,
      "loss": 2.8317,
      "step": 187690
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.336122512817383,
      "learning_rate": 4.9454354804893105e-05,
      "loss": 2.861,
      "step": 187691
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.967902660369873,
      "learning_rate": 4.945210494132472e-05,
      "loss": 2.7726,
      "step": 187692
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.3550376892089844,
      "learning_rate": 4.9449855124337876e-05,
      "loss": 2.9372,
      "step": 187693
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0545177459716797,
      "learning_rate": 4.9447605353933074e-05,
      "loss": 2.8177,
      "step": 187694
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.277461528778076,
      "learning_rate": 4.9445355630110706e-05,
      "loss": 2.8841,
      "step": 187695
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.3080079555511475,
      "learning_rate": 4.944310595287118e-05,
      "loss": 2.9394,
      "step": 187696
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.792628526687622,
      "learning_rate": 4.944085632221482e-05,
      "loss": 3.0191,
      "step": 187697
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7549455165863037,
      "learning_rate": 4.943860673814223e-05,
      "loss": 2.929,
      "step": 187698
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.604883909225464,
      "learning_rate": 4.9436357200653644e-05,
      "loss": 2.7438,
      "step": 187699
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.401641845703125,
      "learning_rate": 4.943410770974966e-05,
      "loss": 2.6818,
      "step": 187700
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6744699478149414,
      "learning_rate": 4.9431858265430576e-05,
      "loss": 3.2283,
      "step": 187701
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.950644493103027,
      "learning_rate": 4.9429608867696804e-05,
      "loss": 2.8651,
      "step": 187702
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.67620587348938,
      "learning_rate": 4.942735951654883e-05,
      "loss": 3.1112,
      "step": 187703
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7393035888671875,
      "learning_rate": 4.9425110211987054e-05,
      "loss": 2.6779,
      "step": 187704
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9541850090026855,
      "learning_rate": 4.9422860954011814e-05,
      "loss": 3.0337,
      "step": 187705
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.5718531608581543,
      "learning_rate": 4.942061174262367e-05,
      "loss": 3.0298,
      "step": 187706
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.713567018508911,
      "learning_rate": 4.941836257782297e-05,
      "loss": 2.716,
      "step": 187707
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0803279876708984,
      "learning_rate": 4.941611345961003e-05,
      "loss": 2.9485,
      "step": 187708
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7306506633758545,
      "learning_rate": 4.941386438798546e-05,
      "loss": 3.1469,
      "step": 187709
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9692819118499756,
      "learning_rate": 4.941161536294955e-05,
      "loss": 2.8044,
      "step": 187710
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9742684364318848,
      "learning_rate": 4.9409366384502714e-05,
      "loss": 2.8527,
      "step": 187711
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.267452716827393,
      "learning_rate": 4.940711745264547e-05,
      "loss": 2.8215,
      "step": 187712
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9082083702087402,
      "learning_rate": 4.9404868567378167e-05,
      "loss": 2.782,
      "step": 187713
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.546767234802246,
      "learning_rate": 4.9402619728701156e-05,
      "loss": 2.8282,
      "step": 187714
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8934683799743652,
      "learning_rate": 4.940037093661501e-05,
      "loss": 2.9044,
      "step": 187715
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8998777866363525,
      "learning_rate": 4.939812219111999e-05,
      "loss": 2.8048,
      "step": 187716
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.632854461669922,
      "learning_rate": 4.939587349221668e-05,
      "loss": 2.8182,
      "step": 187717
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.71734619140625,
      "learning_rate": 4.9393624839905355e-05,
      "loss": 2.856,
      "step": 187718
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7040560245513916,
      "learning_rate": 4.939137623418653e-05,
      "loss": 2.9226,
      "step": 187719
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1746606826782227,
      "learning_rate": 4.9389127675060504e-05,
      "loss": 3.0431,
      "step": 187720
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.3680472373962402,
      "learning_rate": 4.938687916252784e-05,
      "loss": 2.7754,
      "step": 187721
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.985490083694458,
      "learning_rate": 4.93846306965888e-05,
      "loss": 3.1507,
      "step": 187722
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6790006160736084,
      "learning_rate": 4.938238227724396e-05,
      "loss": 2.9006,
      "step": 187723
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0678327083587646,
      "learning_rate": 4.938013390449358e-05,
      "loss": 2.9778,
      "step": 187724
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0564236640930176,
      "learning_rate": 4.937788557833836e-05,
      "loss": 2.9715,
      "step": 187725
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.517753839492798,
      "learning_rate": 4.937563729877834e-05,
      "loss": 2.7873,
      "step": 187726
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.674272298812866,
      "learning_rate": 4.93733890658142e-05,
      "loss": 2.9714,
      "step": 187727
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7486934661865234,
      "learning_rate": 4.93711408794462e-05,
      "loss": 2.9665,
      "step": 187728
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.177436590194702,
      "learning_rate": 4.9368892739674895e-05,
      "loss": 2.8463,
      "step": 187729
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8804664611816406,
      "learning_rate": 4.936664464650061e-05,
      "loss": 3.0793,
      "step": 187730
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6329853534698486,
      "learning_rate": 4.936439659992392e-05,
      "loss": 2.8508,
      "step": 187731
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.939763069152832,
      "learning_rate": 4.936214859994495e-05,
      "loss": 3.0939,
      "step": 187732
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.5294482707977295,
      "learning_rate": 4.935990064656441e-05,
      "loss": 2.8238,
      "step": 187733
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8719613552093506,
      "learning_rate": 4.9357652739782495e-05,
      "loss": 3.1397,
      "step": 187734
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.3205738067626953,
      "learning_rate": 4.93554048795998e-05,
      "loss": 2.8791,
      "step": 187735
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.1689536571502686,
      "learning_rate": 4.935315706601657e-05,
      "loss": 2.9849,
      "step": 187736
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.5824742317199707,
      "learning_rate": 4.935090929903349e-05,
      "loss": 2.7793,
      "step": 187737
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.016683340072632,
      "learning_rate": 4.934866157865067e-05,
      "loss": 2.9284,
      "step": 187738
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.858853340148926,
      "learning_rate": 4.9346413904868744e-05,
      "loss": 3.0923,
      "step": 187739
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.217348098754883,
      "learning_rate": 4.9344166277687935e-05,
      "loss": 2.9921,
      "step": 187740
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9539427757263184,
      "learning_rate": 4.934191869710889e-05,
      "loss": 2.9577,
      "step": 187741
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.451350688934326,
      "learning_rate": 4.933967116313182e-05,
      "loss": 2.8692,
      "step": 187742
    },
    {
      "epoch": 2.44,
      "grad_norm": 4.818247318267822,
      "learning_rate": 4.933742367575741e-05,
      "loss": 2.8773,
      "step": 187743
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6983251571655273,
      "learning_rate": 4.9335176234985726e-05,
      "loss": 3.2487,
      "step": 187744
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7123937606811523,
      "learning_rate": 4.933292884081747e-05,
      "loss": 2.6873,
      "step": 187745
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7003822326660156,
      "learning_rate": 4.933068149325285e-05,
      "loss": 3.0663,
      "step": 187746
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9998342990875244,
      "learning_rate": 4.9328434192292464e-05,
      "loss": 2.9963,
      "step": 187747
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.639111042022705,
      "learning_rate": 4.9326186937936595e-05,
      "loss": 2.9004,
      "step": 187748
    },
    {
      "epoch": 2.44,
      "grad_norm": 5.4243550300598145,
      "learning_rate": 4.932393973018588e-05,
      "loss": 2.7979,
      "step": 187749
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.5477190017700195,
      "learning_rate": 4.9321692569040384e-05,
      "loss": 2.9005,
      "step": 187750
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6999752521514893,
      "learning_rate": 4.931944545450084e-05,
      "loss": 2.999,
      "step": 187751
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.8281450271606445,
      "learning_rate": 4.9317198386567444e-05,
      "loss": 2.8575,
      "step": 187752
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.48416805267334,
      "learning_rate": 4.9314951365240773e-05,
      "loss": 3.1374,
      "step": 187753
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.934095859527588,
      "learning_rate": 4.931270439052115e-05,
      "loss": 3.0463,
      "step": 187754
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9627599716186523,
      "learning_rate": 4.931045746240915e-05,
      "loss": 2.738,
      "step": 187755
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.75999116897583,
      "learning_rate": 4.9308210580904896e-05,
      "loss": 2.7436,
      "step": 187756
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.4928958415985107,
      "learning_rate": 4.9305963746009094e-05,
      "loss": 3.029,
      "step": 187757
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7654452323913574,
      "learning_rate": 4.9303716957721936e-05,
      "loss": 3.0624,
      "step": 187758
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6092474460601807,
      "learning_rate": 4.930147021604407e-05,
      "loss": 2.9354,
      "step": 187759
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.9587552547454834,
      "learning_rate": 4.9299223520975685e-05,
      "loss": 3.0968,
      "step": 187760
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.114241361618042,
      "learning_rate": 4.9296976872517445e-05,
      "loss": 2.9534,
      "step": 187761
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.575080394744873,
      "learning_rate": 4.9294730270669524e-05,
      "loss": 2.8631,
      "step": 187762
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.794532537460327,
      "learning_rate": 4.9292483715432484e-05,
      "loss": 2.8681,
      "step": 187763
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.616034507751465,
      "learning_rate": 4.9290237206806624e-05,
      "loss": 3.0922,
      "step": 187764
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.25239634513855,
      "learning_rate": 4.928799074479255e-05,
      "loss": 3.015,
      "step": 187765
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.272426128387451,
      "learning_rate": 4.928574432939049e-05,
      "loss": 3.109,
      "step": 187766
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.8184046745300293,
      "learning_rate": 4.9283497960601006e-05,
      "loss": 3.1701,
      "step": 187767
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.844593048095703,
      "learning_rate": 4.928125163842444e-05,
      "loss": 2.6519,
      "step": 187768
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.5765609741210938,
      "learning_rate": 4.927900536286121e-05,
      "loss": 3.0836,
      "step": 187769
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6469461917877197,
      "learning_rate": 4.9276759133911704e-05,
      "loss": 3.1281,
      "step": 187770
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.638446092605591,
      "learning_rate": 4.927451295157644e-05,
      "loss": 3.0338,
      "step": 187771
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.6644058227539062,
      "learning_rate": 4.9272266815855724e-05,
      "loss": 2.6659,
      "step": 187772
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.0687665939331055,
      "learning_rate": 4.927002072675008e-05,
      "loss": 2.7993,
      "step": 187773
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.7395901679992676,
      "learning_rate": 4.9267774684259885e-05,
      "loss": 3.0921,
      "step": 187774
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.990588903427124,
      "learning_rate": 4.9265528688385534e-05,
      "loss": 2.9609,
      "step": 187775
    },
    {
      "epoch": 2.44,
      "grad_norm": 2.3081417083740234,
      "learning_rate": 4.9263282739127364e-05,
      "loss": 2.9044,
      "step": 187776
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.6083173751831055,
      "learning_rate": 4.9261036836486e-05,
      "loss": 2.762,
      "step": 187777
    },
    {
      "epoch": 2.44,
      "grad_norm": 3.003898859024048,
      "learning_rate": 4.925879098046162e-05,
      "loss": 2.7326,
      "step": 187778
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3821396827697754,
      "learning_rate": 4.925654517105487e-05,
      "loss": 2.9337,
      "step": 187779
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0210940837860107,
      "learning_rate": 4.925429940826604e-05,
      "loss": 2.8711,
      "step": 187780
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.498767137527466,
      "learning_rate": 4.9252053692095585e-05,
      "loss": 2.9586,
      "step": 187781
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6444146633148193,
      "learning_rate": 4.9249808022543835e-05,
      "loss": 3.0959,
      "step": 187782
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.797572612762451,
      "learning_rate": 4.924756239961136e-05,
      "loss": 2.9219,
      "step": 187783
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.745546817779541,
      "learning_rate": 4.92453168232984e-05,
      "loss": 2.7887,
      "step": 187784
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8704264163970947,
      "learning_rate": 4.924307129360554e-05,
      "loss": 2.8182,
      "step": 187785
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6079823970794678,
      "learning_rate": 4.924082581053316e-05,
      "loss": 2.8596,
      "step": 187786
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4326906204223633,
      "learning_rate": 4.923858037408155e-05,
      "loss": 2.8636,
      "step": 187787
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.521052837371826,
      "learning_rate": 4.923633498425128e-05,
      "loss": 3.0664,
      "step": 187788
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.576610803604126,
      "learning_rate": 4.923408964104272e-05,
      "loss": 2.8399,
      "step": 187789
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8678174018859863,
      "learning_rate": 4.923184434445619e-05,
      "loss": 2.9368,
      "step": 187790
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9863171577453613,
      "learning_rate": 4.922959909449231e-05,
      "loss": 2.9532,
      "step": 187791
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.881937265396118,
      "learning_rate": 4.9227353891151336e-05,
      "loss": 2.9473,
      "step": 187792
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.748422622680664,
      "learning_rate": 4.922510873443369e-05,
      "loss": 2.9922,
      "step": 187793
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.787118911743164,
      "learning_rate": 4.922286362433989e-05,
      "loss": 2.8252,
      "step": 187794
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6386568546295166,
      "learning_rate": 4.9220618560870296e-05,
      "loss": 2.9657,
      "step": 187795
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7380974292755127,
      "learning_rate": 4.9218373544025236e-05,
      "loss": 3.0185,
      "step": 187796
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.921431064605713,
      "learning_rate": 4.921612857380528e-05,
      "loss": 2.9431,
      "step": 187797
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.806880474090576,
      "learning_rate": 4.9213883650210795e-05,
      "loss": 3.0275,
      "step": 187798
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.4252803325653076,
      "learning_rate": 4.921163877324211e-05,
      "loss": 2.8631,
      "step": 187799
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.603034019470215,
      "learning_rate": 4.92093939428998e-05,
      "loss": 2.9598,
      "step": 187800
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1040234565734863,
      "learning_rate": 4.920714915918412e-05,
      "loss": 2.8152,
      "step": 187801
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.2713589668273926,
      "learning_rate": 4.9204904422095645e-05,
      "loss": 2.939,
      "step": 187802
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0821821689605713,
      "learning_rate": 4.920265973163467e-05,
      "loss": 2.8038,
      "step": 187803
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.253077983856201,
      "learning_rate": 4.9200415087801706e-05,
      "loss": 2.8348,
      "step": 187804
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.4922611713409424,
      "learning_rate": 4.9198170490597035e-05,
      "loss": 2.8822,
      "step": 187805
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.611403703689575,
      "learning_rate": 4.9195925940021196e-05,
      "loss": 2.9331,
      "step": 187806
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.755068063735962,
      "learning_rate": 4.919368143607453e-05,
      "loss": 2.6379,
      "step": 187807
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1487951278686523,
      "learning_rate": 4.919143697875756e-05,
      "loss": 2.9066,
      "step": 187808
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.716982126235962,
      "learning_rate": 4.9189192568070627e-05,
      "loss": 3.0389,
      "step": 187809
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.6378962993621826,
      "learning_rate": 4.918694820401415e-05,
      "loss": 2.629,
      "step": 187810
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.265906810760498,
      "learning_rate": 4.918470388658852e-05,
      "loss": 2.9181,
      "step": 187811
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.967376947402954,
      "learning_rate": 4.918245961579421e-05,
      "loss": 3.1479,
      "step": 187812
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.3602194786071777,
      "learning_rate": 4.918021539163157e-05,
      "loss": 3.013,
      "step": 187813
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0575478076934814,
      "learning_rate": 4.917797121410113e-05,
      "loss": 3.0286,
      "step": 187814
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.078514575958252,
      "learning_rate": 4.9175727083203184e-05,
      "loss": 2.8197,
      "step": 187815
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9652254581451416,
      "learning_rate": 4.917348299893834e-05,
      "loss": 2.8035,
      "step": 187816
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7132108211517334,
      "learning_rate": 4.917123896130672e-05,
      "loss": 2.8777,
      "step": 187817
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.315915584564209,
      "learning_rate": 4.9168994970309004e-05,
      "loss": 2.906,
      "step": 187818
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4734749794006348,
      "learning_rate": 4.9166751025945416e-05,
      "loss": 2.9867,
      "step": 187819
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.407334804534912,
      "learning_rate": 4.916450712821652e-05,
      "loss": 2.8518,
      "step": 187820
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7687766551971436,
      "learning_rate": 4.9162263277122625e-05,
      "loss": 2.7656,
      "step": 187821
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.801574468612671,
      "learning_rate": 4.916001947266432e-05,
      "loss": 2.8582,
      "step": 187822
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.906951427459717,
      "learning_rate": 4.9157775714841784e-05,
      "loss": 2.9046,
      "step": 187823
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6591715812683105,
      "learning_rate": 4.915553200365564e-05,
      "loss": 2.8679,
      "step": 187824
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8182570934295654,
      "learning_rate": 4.915328833910612e-05,
      "loss": 2.8447,
      "step": 187825
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.8367481231689453,
      "learning_rate": 4.91510447211938e-05,
      "loss": 3.0296,
      "step": 187826
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9771580696105957,
      "learning_rate": 4.9148801149919006e-05,
      "loss": 2.8952,
      "step": 187827
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7790350914001465,
      "learning_rate": 4.91465576252823e-05,
      "loss": 2.7281,
      "step": 187828
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9416253566741943,
      "learning_rate": 4.9144314147283825e-05,
      "loss": 2.9479,
      "step": 187829
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.610257148742676,
      "learning_rate": 4.9142070715924244e-05,
      "loss": 2.8571,
      "step": 187830
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.981473207473755,
      "learning_rate": 4.9139827331203815e-05,
      "loss": 2.7586,
      "step": 187831
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3157145977020264,
      "learning_rate": 4.913758399312312e-05,
      "loss": 2.8287,
      "step": 187832
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.460348129272461,
      "learning_rate": 4.913534070168238e-05,
      "loss": 3.1421,
      "step": 187833
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6000099182128906,
      "learning_rate": 4.9133097456882196e-05,
      "loss": 2.9233,
      "step": 187834
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9459030628204346,
      "learning_rate": 4.913085425872291e-05,
      "loss": 2.9896,
      "step": 187835
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.587771415710449,
      "learning_rate": 4.912861110720494e-05,
      "loss": 3.0412,
      "step": 187836
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.568527936935425,
      "learning_rate": 4.912636800232863e-05,
      "loss": 2.9488,
      "step": 187837
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7709193229675293,
      "learning_rate": 4.912412494409451e-05,
      "loss": 3.1015,
      "step": 187838
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4435675144195557,
      "learning_rate": 4.912188193250292e-05,
      "loss": 3.1239,
      "step": 187839
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5090866088867188,
      "learning_rate": 4.9119638967554345e-05,
      "loss": 3.0609,
      "step": 187840
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6974196434020996,
      "learning_rate": 4.911739604924916e-05,
      "loss": 2.9938,
      "step": 187841
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.7028472423553467,
      "learning_rate": 4.9115153177587794e-05,
      "loss": 3.0733,
      "step": 187842
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.466498613357544,
      "learning_rate": 4.911291035257059e-05,
      "loss": 3.2166,
      "step": 187843
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.785717487335205,
      "learning_rate": 4.9110667574198106e-05,
      "loss": 3.2073,
      "step": 187844
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.604111909866333,
      "learning_rate": 4.910842484247061e-05,
      "loss": 2.9223,
      "step": 187845
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.6002988815307617,
      "learning_rate": 4.910618215738866e-05,
      "loss": 3.0291,
      "step": 187846
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9750735759735107,
      "learning_rate": 4.910393951895264e-05,
      "loss": 3.0389,
      "step": 187847
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.183488368988037,
      "learning_rate": 4.91016969271629e-05,
      "loss": 3.031,
      "step": 187848
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.044140815734863,
      "learning_rate": 4.909945438201982e-05,
      "loss": 2.6985,
      "step": 187849
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.066328048706055,
      "learning_rate": 4.909721188352396e-05,
      "loss": 3.3034,
      "step": 187850
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9579854011535645,
      "learning_rate": 4.9094969431675615e-05,
      "loss": 2.7094,
      "step": 187851
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6731295585632324,
      "learning_rate": 4.909272702647529e-05,
      "loss": 2.8747,
      "step": 187852
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8011739253997803,
      "learning_rate": 4.909048466792338e-05,
      "loss": 3.0976,
      "step": 187853
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.141470432281494,
      "learning_rate": 4.908824235602029e-05,
      "loss": 2.9895,
      "step": 187854
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9769768714904785,
      "learning_rate": 4.9086000090766354e-05,
      "loss": 3.1176,
      "step": 187855
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.723093271255493,
      "learning_rate": 4.908375787216213e-05,
      "loss": 3.0513,
      "step": 187856
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.802021026611328,
      "learning_rate": 4.908151570020793e-05,
      "loss": 2.9179,
      "step": 187857
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.680781841278076,
      "learning_rate": 4.907927357490424e-05,
      "loss": 2.8879,
      "step": 187858
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8075921535491943,
      "learning_rate": 4.907703149625147e-05,
      "loss": 3.0355,
      "step": 187859
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.944533586502075,
      "learning_rate": 4.9074789464250054e-05,
      "loss": 2.8863,
      "step": 187860
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.2519989013671875,
      "learning_rate": 4.907254747890025e-05,
      "loss": 2.7251,
      "step": 187861
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3126533031463623,
      "learning_rate": 4.907030554020269e-05,
      "loss": 2.8053,
      "step": 187862
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.05265474319458,
      "learning_rate": 4.906806364815762e-05,
      "loss": 2.9326,
      "step": 187863
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.610093116760254,
      "learning_rate": 4.9065821802765594e-05,
      "loss": 2.9128,
      "step": 187864
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9158084392547607,
      "learning_rate": 4.906358000402698e-05,
      "loss": 2.6193,
      "step": 187865
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5595345497131348,
      "learning_rate": 4.9061338251942186e-05,
      "loss": 3.0701,
      "step": 187866
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9788289070129395,
      "learning_rate": 4.9059096546511534e-05,
      "loss": 3.0785,
      "step": 187867
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.303924798965454,
      "learning_rate": 4.9056854887735634e-05,
      "loss": 2.8949,
      "step": 187868
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3433992862701416,
      "learning_rate": 4.905461327561471e-05,
      "loss": 2.8967,
      "step": 187869
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.872199535369873,
      "learning_rate": 4.9052371710149366e-05,
      "loss": 2.8499,
      "step": 187870
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.172997951507568,
      "learning_rate": 4.90501301913399e-05,
      "loss": 3.0881,
      "step": 187871
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.025533676147461,
      "learning_rate": 4.904788871918674e-05,
      "loss": 2.9271,
      "step": 187872
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.786547899246216,
      "learning_rate": 4.904564729369027e-05,
      "loss": 2.9937,
      "step": 187873
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.038243293762207,
      "learning_rate": 4.9043405914851005e-05,
      "loss": 2.7099,
      "step": 187874
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.7111144065856934,
      "learning_rate": 4.904116458266926e-05,
      "loss": 2.9426,
      "step": 187875
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.389451265335083,
      "learning_rate": 4.9038923297145547e-05,
      "loss": 2.6789,
      "step": 187876
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0527446269989014,
      "learning_rate": 4.903668205828025e-05,
      "loss": 2.8915,
      "step": 187877
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.795820951461792,
      "learning_rate": 4.90344408660737e-05,
      "loss": 2.8075,
      "step": 187878
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.783306837081909,
      "learning_rate": 4.9032199720526454e-05,
      "loss": 3.1219,
      "step": 187879
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7019176483154297,
      "learning_rate": 4.902995862163886e-05,
      "loss": 2.5914,
      "step": 187880
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.3073601722717285,
      "learning_rate": 4.902771756941123e-05,
      "loss": 3.0119,
      "step": 187881
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.000241756439209,
      "learning_rate": 4.902547656384418e-05,
      "loss": 2.8822,
      "step": 187882
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.2498281002044678,
      "learning_rate": 4.902323560493804e-05,
      "loss": 3.0117,
      "step": 187883
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.6241860389709473,
      "learning_rate": 4.9020994692693137e-05,
      "loss": 2.9622,
      "step": 187884
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.911966323852539,
      "learning_rate": 4.9018753827110045e-05,
      "loss": 2.9525,
      "step": 187885
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9351866245269775,
      "learning_rate": 4.901651300818906e-05,
      "loss": 2.9322,
      "step": 187886
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8723185062408447,
      "learning_rate": 4.9014272235930686e-05,
      "loss": 3.0839,
      "step": 187887
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.6749684810638428,
      "learning_rate": 4.901203151033528e-05,
      "loss": 2.8908,
      "step": 187888
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.494842052459717,
      "learning_rate": 4.900979083140332e-05,
      "loss": 3.0341,
      "step": 187889
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.383669853210449,
      "learning_rate": 4.900755019913506e-05,
      "loss": 3.0245,
      "step": 187890
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.39125919342041,
      "learning_rate": 4.900530961353114e-05,
      "loss": 2.9663,
      "step": 187891
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.74299693107605,
      "learning_rate": 4.900306907459176e-05,
      "loss": 2.8172,
      "step": 187892
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.762584686279297,
      "learning_rate": 4.900082858231755e-05,
      "loss": 2.8569,
      "step": 187893
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.206080436706543,
      "learning_rate": 4.8998588136708816e-05,
      "loss": 2.9879,
      "step": 187894
    },
    {
      "epoch": 2.45,
      "grad_norm": 5.231034278869629,
      "learning_rate": 4.899634773776599e-05,
      "loss": 2.7364,
      "step": 187895
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.909285545349121,
      "learning_rate": 4.899410738548942e-05,
      "loss": 3.0866,
      "step": 187896
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1783082485198975,
      "learning_rate": 4.899186707987963e-05,
      "loss": 2.9491,
      "step": 187897
    },
    {
      "epoch": 2.45,
      "grad_norm": 5.858252048492432,
      "learning_rate": 4.898962682093691e-05,
      "loss": 2.6802,
      "step": 187898
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4979779720306396,
      "learning_rate": 4.898738660866186e-05,
      "loss": 2.9343,
      "step": 187899
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.7812893390655518,
      "learning_rate": 4.898514644305471e-05,
      "loss": 3.0296,
      "step": 187900
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.665168046951294,
      "learning_rate": 4.898290632411603e-05,
      "loss": 2.9668,
      "step": 187901
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5265414714813232,
      "learning_rate": 4.8980666251846155e-05,
      "loss": 3.0165,
      "step": 187902
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.820009469985962,
      "learning_rate": 4.897842622624551e-05,
      "loss": 2.8996,
      "step": 187903
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.049906015396118,
      "learning_rate": 4.897618624731447e-05,
      "loss": 2.9705,
      "step": 187904
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9938464164733887,
      "learning_rate": 4.897394631505353e-05,
      "loss": 3.1028,
      "step": 187905
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.160600185394287,
      "learning_rate": 4.897170642946303e-05,
      "loss": 3.0587,
      "step": 187906
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7665762901306152,
      "learning_rate": 4.8969466590543496e-05,
      "loss": 3.0912,
      "step": 187907
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.131542444229126,
      "learning_rate": 4.896722679829529e-05,
      "loss": 2.8549,
      "step": 187908
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.4768693447113037,
      "learning_rate": 4.896498705271878e-05,
      "loss": 2.8666,
      "step": 187909
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4103317260742188,
      "learning_rate": 4.896274735381438e-05,
      "loss": 3.0378,
      "step": 187910
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1465330123901367,
      "learning_rate": 4.8960507701582576e-05,
      "loss": 3.1585,
      "step": 187911
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.106463670730591,
      "learning_rate": 4.89582680960237e-05,
      "loss": 2.9367,
      "step": 187912
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.4119632244110107,
      "learning_rate": 4.895602853713833e-05,
      "loss": 2.9277,
      "step": 187913
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7651207447052,
      "learning_rate": 4.895378902492672e-05,
      "loss": 2.8832,
      "step": 187914
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.6362810134887695,
      "learning_rate": 4.895154955938937e-05,
      "loss": 2.6861,
      "step": 187915
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.205514907836914,
      "learning_rate": 4.894931014052656e-05,
      "loss": 2.6747,
      "step": 187916
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.027556896209717,
      "learning_rate": 4.89470707683389e-05,
      "loss": 2.8659,
      "step": 187917
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.903289556503296,
      "learning_rate": 4.894483144282668e-05,
      "loss": 2.9224,
      "step": 187918
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.58833384513855,
      "learning_rate": 4.894259216399039e-05,
      "loss": 3.0803,
      "step": 187919
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3031392097473145,
      "learning_rate": 4.894035293183043e-05,
      "loss": 2.6932,
      "step": 187920
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.978118658065796,
      "learning_rate": 4.8938113746347165e-05,
      "loss": 3.1,
      "step": 187921
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.80549693107605,
      "learning_rate": 4.893587460754099e-05,
      "loss": 3.0889,
      "step": 187922
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3708996772766113,
      "learning_rate": 4.8933635515412485e-05,
      "loss": 3.0665,
      "step": 187923
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7621781826019287,
      "learning_rate": 4.893139646996184e-05,
      "loss": 2.6809,
      "step": 187924
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1865315437316895,
      "learning_rate": 4.8929157471189685e-05,
      "loss": 2.8829,
      "step": 187925
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.425040245056152,
      "learning_rate": 4.8926918519096325e-05,
      "loss": 2.713,
      "step": 187926
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.56750750541687,
      "learning_rate": 4.892467961368216e-05,
      "loss": 3.0418,
      "step": 187927
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1767382621765137,
      "learning_rate": 4.8922440754947625e-05,
      "loss": 2.9833,
      "step": 187928
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5148446559906006,
      "learning_rate": 4.892020194289318e-05,
      "loss": 2.737,
      "step": 187929
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0876708030700684,
      "learning_rate": 4.8917963177519126e-05,
      "loss": 2.7525,
      "step": 187930
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.6790571212768555,
      "learning_rate": 4.891572445882607e-05,
      "loss": 2.8444,
      "step": 187931
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.783726453781128,
      "learning_rate": 4.8913485786814296e-05,
      "loss": 3.3103,
      "step": 187932
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.866102695465088,
      "learning_rate": 4.891124716148426e-05,
      "loss": 2.7387,
      "step": 187933
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6922800540924072,
      "learning_rate": 4.8909008582836274e-05,
      "loss": 3.0965,
      "step": 187934
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.351687431335449,
      "learning_rate": 4.8906770050870944e-05,
      "loss": 3.1145,
      "step": 187935
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.108733892440796,
      "learning_rate": 4.890453156558848e-05,
      "loss": 2.7933,
      "step": 187936
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6261727809906006,
      "learning_rate": 4.89022931269895e-05,
      "loss": 2.9465,
      "step": 187937
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.538213014602661,
      "learning_rate": 4.890005473507431e-05,
      "loss": 2.8518,
      "step": 187938
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.68452787399292,
      "learning_rate": 4.8897816389843345e-05,
      "loss": 3.0397,
      "step": 187939
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3520734310150146,
      "learning_rate": 4.889557809129694e-05,
      "loss": 3.0853,
      "step": 187940
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7222137451171875,
      "learning_rate": 4.889333983943565e-05,
      "loss": 2.9557,
      "step": 187941
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5379843711853027,
      "learning_rate": 4.889110163425979e-05,
      "loss": 2.7977,
      "step": 187942
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.780376672744751,
      "learning_rate": 4.8888863475769846e-05,
      "loss": 2.8606,
      "step": 187943
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.431745767593384,
      "learning_rate": 4.8886625363966224e-05,
      "loss": 3.0655,
      "step": 187944
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5165696144104004,
      "learning_rate": 4.8884387298849324e-05,
      "loss": 3.27,
      "step": 187945
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5229272842407227,
      "learning_rate": 4.888214928041948e-05,
      "loss": 2.99,
      "step": 187946
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.4709768295288086,
      "learning_rate": 4.8879911308677254e-05,
      "loss": 3.0907,
      "step": 187947
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.403019428253174,
      "learning_rate": 4.887767338362292e-05,
      "loss": 2.7462,
      "step": 187948
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.579035758972168,
      "learning_rate": 4.8875435505257036e-05,
      "loss": 3.11,
      "step": 187949
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4510390758514404,
      "learning_rate": 4.8873197673579936e-05,
      "loss": 2.8718,
      "step": 187950
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.130601644515991,
      "learning_rate": 4.8870959888592085e-05,
      "loss": 2.8794,
      "step": 187951
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7533562183380127,
      "learning_rate": 4.886872215029376e-05,
      "loss": 2.8562,
      "step": 187952
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.2294938564300537,
      "learning_rate": 4.886648445868555e-05,
      "loss": 2.9511,
      "step": 187953
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.43101167678833,
      "learning_rate": 4.886424681376776e-05,
      "loss": 2.8508,
      "step": 187954
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.036954402923584,
      "learning_rate": 4.886200921554089e-05,
      "loss": 2.7718,
      "step": 187955
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4653396606445312,
      "learning_rate": 4.885977166400533e-05,
      "loss": 2.7011,
      "step": 187956
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.295801877975464,
      "learning_rate": 4.885753415916146e-05,
      "loss": 2.7808,
      "step": 187957
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8893165588378906,
      "learning_rate": 4.885529670100967e-05,
      "loss": 2.863,
      "step": 187958
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.68534255027771,
      "learning_rate": 4.8853059289550464e-05,
      "loss": 2.9259,
      "step": 187959
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4519991874694824,
      "learning_rate": 4.885082192478414e-05,
      "loss": 3.0321,
      "step": 187960
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.929588794708252,
      "learning_rate": 4.8848584606711304e-05,
      "loss": 2.738,
      "step": 187961
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8973798751831055,
      "learning_rate": 4.8846347335332214e-05,
      "loss": 2.887,
      "step": 187962
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1845204830169678,
      "learning_rate": 4.884411011064727e-05,
      "loss": 2.8311,
      "step": 187963
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.111785411834717,
      "learning_rate": 4.884187293265704e-05,
      "loss": 2.6963,
      "step": 187964
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0358800888061523,
      "learning_rate": 4.8839635801361834e-05,
      "loss": 3.0626,
      "step": 187965
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6346933841705322,
      "learning_rate": 4.8837398716762e-05,
      "loss": 2.7052,
      "step": 187966
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.4081215858459473,
      "learning_rate": 4.8835161678858116e-05,
      "loss": 3.0717,
      "step": 187967
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.8314013481140137,
      "learning_rate": 4.883292468765044e-05,
      "loss": 2.8085,
      "step": 187968
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.920566558837891,
      "learning_rate": 4.8830687743139516e-05,
      "loss": 2.8153,
      "step": 187969
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3925976753234863,
      "learning_rate": 4.882845084532574e-05,
      "loss": 2.7113,
      "step": 187970
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.748356342315674,
      "learning_rate": 4.8826213994209475e-05,
      "loss": 2.8326,
      "step": 187971
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.9893131256103516,
      "learning_rate": 4.8823977189791084e-05,
      "loss": 3.0257,
      "step": 187972
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.5150067806243896,
      "learning_rate": 4.8821740432071144e-05,
      "loss": 2.762,
      "step": 187973
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.697571277618408,
      "learning_rate": 4.881950372104991e-05,
      "loss": 2.7909,
      "step": 187974
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9193122386932373,
      "learning_rate": 4.881726705672796e-05,
      "loss": 2.9184,
      "step": 187975
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.3270797729492188,
      "learning_rate": 4.881503043910559e-05,
      "loss": 2.808,
      "step": 187976
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.840216636657715,
      "learning_rate": 4.8812793868183186e-05,
      "loss": 3.1891,
      "step": 187977
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.2408268451690674,
      "learning_rate": 4.88105573439613e-05,
      "loss": 2.9138,
      "step": 187978
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.894697427749634,
      "learning_rate": 4.880832086644029e-05,
      "loss": 2.9813,
      "step": 187979
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.091745615005493,
      "learning_rate": 4.880608443562045e-05,
      "loss": 2.9632,
      "step": 187980
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.631135940551758,
      "learning_rate": 4.8803848051502395e-05,
      "loss": 3.0,
      "step": 187981
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.724675416946411,
      "learning_rate": 4.8801611714086444e-05,
      "loss": 2.9721,
      "step": 187982
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.027484655380249,
      "learning_rate": 4.8799375423372936e-05,
      "loss": 2.9373,
      "step": 187983
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.937147378921509,
      "learning_rate": 4.879713917936243e-05,
      "loss": 2.7732,
      "step": 187984
    },
    {
      "epoch": 2.45,
      "grad_norm": 5.169425010681152,
      "learning_rate": 4.879490298205524e-05,
      "loss": 2.7821,
      "step": 187985
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.6477253437042236,
      "learning_rate": 4.8792666831451856e-05,
      "loss": 2.8963,
      "step": 187986
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.046936273574829,
      "learning_rate": 4.879043072755268e-05,
      "loss": 2.9406,
      "step": 187987
    },
    {
      "epoch": 2.45,
      "grad_norm": 5.006242275238037,
      "learning_rate": 4.8788194670358075e-05,
      "loss": 2.9715,
      "step": 187988
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.690584659576416,
      "learning_rate": 4.878595865986844e-05,
      "loss": 2.8495,
      "step": 187989
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.244663953781128,
      "learning_rate": 4.878372269608428e-05,
      "loss": 3.0145,
      "step": 187990
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.5899553298950195,
      "learning_rate": 4.878148677900593e-05,
      "loss": 3.001,
      "step": 187991
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.01678466796875,
      "learning_rate": 4.877925090863392e-05,
      "loss": 2.8768,
      "step": 187992
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.979562520980835,
      "learning_rate": 4.877701508496858e-05,
      "loss": 2.8214,
      "step": 187993
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.326786756515503,
      "learning_rate": 4.8774779308010306e-05,
      "loss": 2.6091,
      "step": 187994
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9007420539855957,
      "learning_rate": 4.8772543577759505e-05,
      "loss": 3.1927,
      "step": 187995
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.824885606765747,
      "learning_rate": 4.8770307894216676e-05,
      "loss": 2.7722,
      "step": 187996
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0444958209991455,
      "learning_rate": 4.876807225738215e-05,
      "loss": 3.046,
      "step": 187997
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0001134872436523,
      "learning_rate": 4.876583666725643e-05,
      "loss": 2.9284,
      "step": 187998
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7706429958343506,
      "learning_rate": 4.876360112383988e-05,
      "loss": 3.0486,
      "step": 187999
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.2211880683898926,
      "learning_rate": 4.876136562713293e-05,
      "loss": 3.0737,
      "step": 188000
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.970003128051758,
      "learning_rate": 4.875913017713592e-05,
      "loss": 2.967,
      "step": 188001
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.001877546310425,
      "learning_rate": 4.875689477384937e-05,
      "loss": 2.8618,
      "step": 188002
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.662308931350708,
      "learning_rate": 4.8754659417273635e-05,
      "loss": 3.0167,
      "step": 188003
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9428064823150635,
      "learning_rate": 4.8752424107409195e-05,
      "loss": 2.8164,
      "step": 188004
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.651259183883667,
      "learning_rate": 4.875018884425642e-05,
      "loss": 2.9641,
      "step": 188005
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3029372692108154,
      "learning_rate": 4.874795362781572e-05,
      "loss": 2.9311,
      "step": 188006
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5929083824157715,
      "learning_rate": 4.8745718458087445e-05,
      "loss": 2.8801,
      "step": 188007
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9798784255981445,
      "learning_rate": 4.874348333507218e-05,
      "loss": 2.7998,
      "step": 188008
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5480268001556396,
      "learning_rate": 4.874124825877017e-05,
      "loss": 2.898,
      "step": 188009
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.572654962539673,
      "learning_rate": 4.8739013229181966e-05,
      "loss": 3.0162,
      "step": 188010
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9822487831115723,
      "learning_rate": 4.8736778246307895e-05,
      "loss": 3.0759,
      "step": 188011
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.602999210357666,
      "learning_rate": 4.8734543310148456e-05,
      "loss": 3.0655,
      "step": 188012
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.042912244796753,
      "learning_rate": 4.873230842070388e-05,
      "loss": 2.6778,
      "step": 188013
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.2465734481811523,
      "learning_rate": 4.873007357797484e-05,
      "loss": 3.2048,
      "step": 188014
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7203071117401123,
      "learning_rate": 4.872783878196149e-05,
      "loss": 2.742,
      "step": 188015
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.645452499389648,
      "learning_rate": 4.872560403266451e-05,
      "loss": 3.0346,
      "step": 188016
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.942228078842163,
      "learning_rate": 4.872336933008413e-05,
      "loss": 2.4844,
      "step": 188017
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.6514179706573486,
      "learning_rate": 4.872113467422084e-05,
      "loss": 3.126,
      "step": 188018
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.208866596221924,
      "learning_rate": 4.8718900065074985e-05,
      "loss": 2.7035,
      "step": 188019
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.34123158454895,
      "learning_rate": 4.871666550264709e-05,
      "loss": 3.0101,
      "step": 188020
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6975722312927246,
      "learning_rate": 4.8714430986937424e-05,
      "loss": 2.9864,
      "step": 188021
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8121402263641357,
      "learning_rate": 4.871219651794655e-05,
      "loss": 2.8601,
      "step": 188022
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1860716342926025,
      "learning_rate": 4.870996209567485e-05,
      "loss": 2.9275,
      "step": 188023
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.845628023147583,
      "learning_rate": 4.87077277201227e-05,
      "loss": 2.9732,
      "step": 188024
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1829488277435303,
      "learning_rate": 4.870549339129045e-05,
      "loss": 3.1381,
      "step": 188025
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.2725751399993896,
      "learning_rate": 4.8703259109178664e-05,
      "loss": 3.011,
      "step": 188026
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.146360397338867,
      "learning_rate": 4.8701024873787634e-05,
      "loss": 3.0313,
      "step": 188027
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6934564113616943,
      "learning_rate": 4.869879068511789e-05,
      "loss": 2.9143,
      "step": 188028
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0414092540740967,
      "learning_rate": 4.869655654316979e-05,
      "loss": 2.5496,
      "step": 188029
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0740442276000977,
      "learning_rate": 4.8694322447943704e-05,
      "loss": 2.8296,
      "step": 188030
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0436251163482666,
      "learning_rate": 4.8692088399440045e-05,
      "loss": 2.8392,
      "step": 188031
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.970405340194702,
      "learning_rate": 4.8689854397659347e-05,
      "loss": 2.7379,
      "step": 188032
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.978132963180542,
      "learning_rate": 4.8687620442601873e-05,
      "loss": 2.8013,
      "step": 188033
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6975059509277344,
      "learning_rate": 4.868538653426819e-05,
      "loss": 2.9339,
      "step": 188034
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.5205960273742676,
      "learning_rate": 4.868315267265857e-05,
      "loss": 3.1221,
      "step": 188035
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.69510817527771,
      "learning_rate": 4.868091885777363e-05,
      "loss": 2.9268,
      "step": 188036
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.766929864883423,
      "learning_rate": 4.867868508961349e-05,
      "loss": 2.9421,
      "step": 188037
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6458375453948975,
      "learning_rate": 4.86764513681788e-05,
      "loss": 2.9811,
      "step": 188038
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6267967224121094,
      "learning_rate": 4.8674217693469874e-05,
      "loss": 2.8523,
      "step": 188039
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.814013957977295,
      "learning_rate": 4.86719840654872e-05,
      "loss": 3.1486,
      "step": 188040
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8508224487304688,
      "learning_rate": 4.866975048423105e-05,
      "loss": 2.8233,
      "step": 188041
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7602274417877197,
      "learning_rate": 4.866751694970212e-05,
      "loss": 2.892,
      "step": 188042
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.745269298553467,
      "learning_rate": 4.8665283461900486e-05,
      "loss": 2.9155,
      "step": 188043
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.046397686004639,
      "learning_rate": 4.86630500208268e-05,
      "loss": 2.8732,
      "step": 188044
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.844060182571411,
      "learning_rate": 4.86608166264813e-05,
      "loss": 3.0136,
      "step": 188045
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.9490957260131836,
      "learning_rate": 4.8658583278864594e-05,
      "loss": 2.9924,
      "step": 188046
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.952035665512085,
      "learning_rate": 4.865634997797694e-05,
      "loss": 2.8678,
      "step": 188047
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.575589418411255,
      "learning_rate": 4.865411672381887e-05,
      "loss": 2.9427,
      "step": 188048
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7784063816070557,
      "learning_rate": 4.8651883516390724e-05,
      "loss": 2.8869,
      "step": 188049
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7519407272338867,
      "learning_rate": 4.864965035569296e-05,
      "loss": 2.8748,
      "step": 188050
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3782362937927246,
      "learning_rate": 4.8647417241725914e-05,
      "loss": 2.8102,
      "step": 188051
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3026764392852783,
      "learning_rate": 4.864518417449009e-05,
      "loss": 2.9229,
      "step": 188052
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.957965135574341,
      "learning_rate": 4.864295115398582e-05,
      "loss": 2.9919,
      "step": 188053
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7148711681365967,
      "learning_rate": 4.8640718180213665e-05,
      "loss": 2.7135,
      "step": 188054
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.478422164916992,
      "learning_rate": 4.863848525317393e-05,
      "loss": 2.659,
      "step": 188055
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7345974445343018,
      "learning_rate": 4.863625237286701e-05,
      "loss": 2.7495,
      "step": 188056
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6751112937927246,
      "learning_rate": 4.863401953929334e-05,
      "loss": 2.9856,
      "step": 188057
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5633034706115723,
      "learning_rate": 4.8631786752453397e-05,
      "loss": 2.9037,
      "step": 188058
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4501028060913086,
      "learning_rate": 4.862955401234746e-05,
      "loss": 3.0712,
      "step": 188059
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.267551898956299,
      "learning_rate": 4.862732131897614e-05,
      "loss": 2.8951,
      "step": 188060
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.103832721710205,
      "learning_rate": 4.862508867233971e-05,
      "loss": 2.983,
      "step": 188061
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.685375690460205,
      "learning_rate": 4.862285607243859e-05,
      "loss": 2.918,
      "step": 188062
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1452715396881104,
      "learning_rate": 4.8620623519273246e-05,
      "loss": 2.7537,
      "step": 188063
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.382082939147949,
      "learning_rate": 4.861839101284413e-05,
      "loss": 3.1087,
      "step": 188064
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.210366725921631,
      "learning_rate": 4.861615855315149e-05,
      "loss": 2.8548,
      "step": 188065
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7687149047851562,
      "learning_rate": 4.861392614019592e-05,
      "loss": 3.184,
      "step": 188066
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.728567361831665,
      "learning_rate": 4.861169377397778e-05,
      "loss": 3.1191,
      "step": 188067
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.030362367630005,
      "learning_rate": 4.860946145449741e-05,
      "loss": 3.0005,
      "step": 188068
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.782353639602661,
      "learning_rate": 4.860722918175536e-05,
      "loss": 2.8597,
      "step": 188069
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6248836517333984,
      "learning_rate": 4.8604996955751885e-05,
      "loss": 2.9885,
      "step": 188070
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.7688162326812744,
      "learning_rate": 4.8602764776487554e-05,
      "loss": 2.8288,
      "step": 188071
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9577481746673584,
      "learning_rate": 4.860053264396271e-05,
      "loss": 2.8723,
      "step": 188072
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6002726554870605,
      "learning_rate": 4.8598300558177804e-05,
      "loss": 2.8996,
      "step": 188073
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.241818904876709,
      "learning_rate": 4.859606851913312e-05,
      "loss": 2.849,
      "step": 188074
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.770434856414795,
      "learning_rate": 4.8593836526829266e-05,
      "loss": 3.1203,
      "step": 188075
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3578219413757324,
      "learning_rate": 4.859160458126646e-05,
      "loss": 2.6948,
      "step": 188076
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.520265817642212,
      "learning_rate": 4.858937268244531e-05,
      "loss": 3.0178,
      "step": 188077
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.600874662399292,
      "learning_rate": 4.8587140830366155e-05,
      "loss": 2.8863,
      "step": 188078
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.763812780380249,
      "learning_rate": 4.8584909025029384e-05,
      "loss": 2.8859,
      "step": 188079
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.055819272994995,
      "learning_rate": 4.8582677266435354e-05,
      "loss": 3.0697,
      "step": 188080
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.5080292224884033,
      "learning_rate": 4.85804455545846e-05,
      "loss": 3.064,
      "step": 188081
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6841394901275635,
      "learning_rate": 4.857821388947746e-05,
      "loss": 2.8414,
      "step": 188082
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.2988646030426025,
      "learning_rate": 4.8575982271114424e-05,
      "loss": 2.9909,
      "step": 188083
    },
    {
      "epoch": 2.45,
      "grad_norm": 6.075623035430908,
      "learning_rate": 4.857375069949583e-05,
      "loss": 2.8756,
      "step": 188084
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8054842948913574,
      "learning_rate": 4.857151917462218e-05,
      "loss": 2.7633,
      "step": 188085
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.088629961013794,
      "learning_rate": 4.85692876964937e-05,
      "loss": 2.8111,
      "step": 188086
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.160677433013916,
      "learning_rate": 4.856705626511106e-05,
      "loss": 3.1925,
      "step": 188087
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.592690944671631,
      "learning_rate": 4.856482488047443e-05,
      "loss": 2.9479,
      "step": 188088
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.774961233139038,
      "learning_rate": 4.856259354258444e-05,
      "loss": 2.8763,
      "step": 188089
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.020460367202759,
      "learning_rate": 4.856036225144142e-05,
      "loss": 2.8759,
      "step": 188090
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.552055835723877,
      "learning_rate": 4.855813100704577e-05,
      "loss": 2.8447,
      "step": 188091
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.091451168060303,
      "learning_rate": 4.8555899809397834e-05,
      "loss": 3.0426,
      "step": 188092
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.579516649246216,
      "learning_rate": 4.8553668658498167e-05,
      "loss": 3.0437,
      "step": 188093
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.317382574081421,
      "learning_rate": 4.8551437554347074e-05,
      "loss": 3.1618,
      "step": 188094
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.9573988914489746,
      "learning_rate": 4.854920649694505e-05,
      "loss": 3.0227,
      "step": 188095
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6239876747131348,
      "learning_rate": 4.85469754862925e-05,
      "loss": 2.9294,
      "step": 188096
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3086130619049072,
      "learning_rate": 4.854474452238981e-05,
      "loss": 2.8522,
      "step": 188097
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.731701135635376,
      "learning_rate": 4.854251360523733e-05,
      "loss": 2.8041,
      "step": 188098
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.637387990951538,
      "learning_rate": 4.854028273483559e-05,
      "loss": 2.7214,
      "step": 188099
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6537091732025146,
      "learning_rate": 4.853805191118491e-05,
      "loss": 2.9974,
      "step": 188100
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.935777425765991,
      "learning_rate": 4.853582113428581e-05,
      "loss": 2.7271,
      "step": 188101
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.611046314239502,
      "learning_rate": 4.8533590404138615e-05,
      "loss": 3.0896,
      "step": 188102
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.419074058532715,
      "learning_rate": 4.853135972074388e-05,
      "loss": 2.7549,
      "step": 188103
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.2694451808929443,
      "learning_rate": 4.852912908410178e-05,
      "loss": 3.236,
      "step": 188104
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6437289714813232,
      "learning_rate": 4.852689849421292e-05,
      "loss": 3.0855,
      "step": 188105
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8868963718414307,
      "learning_rate": 4.852466795107759e-05,
      "loss": 3.0549,
      "step": 188106
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5102627277374268,
      "learning_rate": 4.852243745469636e-05,
      "loss": 3.0777,
      "step": 188107
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.772188901901245,
      "learning_rate": 4.852020700506947e-05,
      "loss": 2.9782,
      "step": 188108
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.667815685272217,
      "learning_rate": 4.851797660219754e-05,
      "loss": 2.9202,
      "step": 188109
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5337157249450684,
      "learning_rate": 4.851574624608074e-05,
      "loss": 2.6365,
      "step": 188110
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1387789249420166,
      "learning_rate": 4.851351593671971e-05,
      "loss": 3.0869,
      "step": 188111
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.603097915649414,
      "learning_rate": 4.851128567411465e-05,
      "loss": 2.8189,
      "step": 188112
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.017852544784546,
      "learning_rate": 4.8509055458266186e-05,
      "loss": 2.9838,
      "step": 188113
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.2023119926452637,
      "learning_rate": 4.850682528917459e-05,
      "loss": 2.8269,
      "step": 188114
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4521286487579346,
      "learning_rate": 4.850459516684042e-05,
      "loss": 2.9207,
      "step": 188115
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.5610482692718506,
      "learning_rate": 4.850236509126385e-05,
      "loss": 2.9646,
      "step": 188116
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.6399118900299072,
      "learning_rate": 4.850013506244554e-05,
      "loss": 2.9954,
      "step": 188117
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.741833448410034,
      "learning_rate": 4.84979050803857e-05,
      "loss": 2.7454,
      "step": 188118
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.45285964012146,
      "learning_rate": 4.8495675145084955e-05,
      "loss": 3.1252,
      "step": 188119
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4953994750976562,
      "learning_rate": 4.849344525654351e-05,
      "loss": 3.0036,
      "step": 188120
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8363544940948486,
      "learning_rate": 4.849121541476205e-05,
      "loss": 3.0484,
      "step": 188121
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5232791900634766,
      "learning_rate": 4.848898561974066e-05,
      "loss": 2.8184,
      "step": 188122
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.143752098083496,
      "learning_rate": 4.8486755871479966e-05,
      "loss": 2.8435,
      "step": 188123
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.912228107452393,
      "learning_rate": 4.84845261699803e-05,
      "loss": 2.741,
      "step": 188124
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1461715698242188,
      "learning_rate": 4.848229651524216e-05,
      "loss": 3.0582,
      "step": 188125
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8123526573181152,
      "learning_rate": 4.8480066907265844e-05,
      "loss": 2.9578,
      "step": 188126
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0283687114715576,
      "learning_rate": 4.847783734605196e-05,
      "loss": 2.9333,
      "step": 188127
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0686533451080322,
      "learning_rate": 4.847560783160067e-05,
      "loss": 2.9451,
      "step": 188128
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.61320161819458,
      "learning_rate": 4.847337836391258e-05,
      "loss": 2.7594,
      "step": 188129
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1205005645751953,
      "learning_rate": 4.847114894298794e-05,
      "loss": 2.8925,
      "step": 188130
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.493897438049316,
      "learning_rate": 4.846891956882736e-05,
      "loss": 2.9389,
      "step": 188131
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.116451740264893,
      "learning_rate": 4.846669024143107e-05,
      "loss": 2.6217,
      "step": 188132
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.4842941761016846,
      "learning_rate": 4.846446096079971e-05,
      "loss": 3.0555,
      "step": 188133
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.942908525466919,
      "learning_rate": 4.846223172693344e-05,
      "loss": 2.9644,
      "step": 188134
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.66923189163208,
      "learning_rate": 4.846000253983283e-05,
      "loss": 2.9148,
      "step": 188135
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3471765518188477,
      "learning_rate": 4.845777339949821e-05,
      "loss": 2.8392,
      "step": 188136
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.323967695236206,
      "learning_rate": 4.8455544305930083e-05,
      "loss": 2.9283,
      "step": 188137
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5911142826080322,
      "learning_rate": 4.8453315259128744e-05,
      "loss": 2.8138,
      "step": 188138
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.8242123126983643,
      "learning_rate": 4.84510862590948e-05,
      "loss": 3.1582,
      "step": 188139
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8027915954589844,
      "learning_rate": 4.84488573058285e-05,
      "loss": 2.999,
      "step": 188140
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.730908155441284,
      "learning_rate": 4.844662839933034e-05,
      "loss": 3.0102,
      "step": 188141
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8629071712493896,
      "learning_rate": 4.844439953960059e-05,
      "loss": 2.855,
      "step": 188142
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.476381301879883,
      "learning_rate": 4.8442170726639865e-05,
      "loss": 3.222,
      "step": 188143
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.907120704650879,
      "learning_rate": 4.8439941960448425e-05,
      "loss": 2.7369,
      "step": 188144
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.904644250869751,
      "learning_rate": 4.843771324102681e-05,
      "loss": 3.1116,
      "step": 188145
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9547348022460938,
      "learning_rate": 4.843548456837538e-05,
      "loss": 2.9823,
      "step": 188146
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8063724040985107,
      "learning_rate": 4.8433255942494475e-05,
      "loss": 2.9046,
      "step": 188147
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.950728178024292,
      "learning_rate": 4.843102736338462e-05,
      "loss": 2.9997,
      "step": 188148
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9870355129241943,
      "learning_rate": 4.8428798831046224e-05,
      "loss": 3.1466,
      "step": 188149
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.573652982711792,
      "learning_rate": 4.842657034547958e-05,
      "loss": 2.9106,
      "step": 188150
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5574400424957275,
      "learning_rate": 4.842434190668522e-05,
      "loss": 2.7804,
      "step": 188151
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.925650119781494,
      "learning_rate": 4.8422113514663576e-05,
      "loss": 2.6277,
      "step": 188152
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.857447624206543,
      "learning_rate": 4.841988516941492e-05,
      "loss": 2.8703,
      "step": 188153
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6612770557403564,
      "learning_rate": 4.841765687093982e-05,
      "loss": 2.8097,
      "step": 188154
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.5632758140563965,
      "learning_rate": 4.841542861923863e-05,
      "loss": 2.7417,
      "step": 188155
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0873866081237793,
      "learning_rate": 4.841320041431169e-05,
      "loss": 2.7691,
      "step": 188156
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.372039318084717,
      "learning_rate": 4.841097225615954e-05,
      "loss": 3.0374,
      "step": 188157
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0844058990478516,
      "learning_rate": 4.840874414478257e-05,
      "loss": 2.8306,
      "step": 188158
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6243398189544678,
      "learning_rate": 4.8406516080181055e-05,
      "loss": 2.8291,
      "step": 188159
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.016826629638672,
      "learning_rate": 4.8404288062355625e-05,
      "loss": 3.1003,
      "step": 188160
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.049877405166626,
      "learning_rate": 4.8402060091306505e-05,
      "loss": 2.8373,
      "step": 188161
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.188790798187256,
      "learning_rate": 4.839983216703427e-05,
      "loss": 3.0623,
      "step": 188162
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.069711446762085,
      "learning_rate": 4.839760428953925e-05,
      "loss": 2.6104,
      "step": 188163
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4916279315948486,
      "learning_rate": 4.8395376458821846e-05,
      "loss": 2.7041,
      "step": 188164
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.976790428161621,
      "learning_rate": 4.839314867488242e-05,
      "loss": 2.8675,
      "step": 188165
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5028510093688965,
      "learning_rate": 4.8390920937721545e-05,
      "loss": 2.92,
      "step": 188166
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.470715284347534,
      "learning_rate": 4.838869324733948e-05,
      "loss": 2.7433,
      "step": 188167
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.106016159057617,
      "learning_rate": 4.8386465603736766e-05,
      "loss": 2.6896,
      "step": 188168
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.865941047668457,
      "learning_rate": 4.83842380069137e-05,
      "loss": 2.815,
      "step": 188169
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.515906572341919,
      "learning_rate": 4.838201045687088e-05,
      "loss": 2.8836,
      "step": 188170
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.584261178970337,
      "learning_rate": 4.837978295360847e-05,
      "loss": 2.824,
      "step": 188171
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0282249450683594,
      "learning_rate": 4.837755549712707e-05,
      "loss": 2.803,
      "step": 188172
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.027278423309326,
      "learning_rate": 4.8375328087426954e-05,
      "loss": 2.9237,
      "step": 188173
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8332736492156982,
      "learning_rate": 4.837310072450868e-05,
      "loss": 3.0642,
      "step": 188174
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.662208318710327,
      "learning_rate": 4.8370873408372555e-05,
      "loss": 2.771,
      "step": 188175
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7752199172973633,
      "learning_rate": 4.8368646139019176e-05,
      "loss": 2.9066,
      "step": 188176
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3903648853302,
      "learning_rate": 4.836641891644863e-05,
      "loss": 2.7862,
      "step": 188177
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.518334150314331,
      "learning_rate": 4.836419174066164e-05,
      "loss": 2.9908,
      "step": 188178
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0677783489227295,
      "learning_rate": 4.8361964611658385e-05,
      "loss": 3.1749,
      "step": 188179
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.594646453857422,
      "learning_rate": 4.835973752943948e-05,
      "loss": 3.0475,
      "step": 188180
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.4788689613342285,
      "learning_rate": 4.835751049400518e-05,
      "loss": 3.0479,
      "step": 188181
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4876861572265625,
      "learning_rate": 4.8355283505356093e-05,
      "loss": 3.0617,
      "step": 188182
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3966410160064697,
      "learning_rate": 4.835305656349238e-05,
      "loss": 2.9192,
      "step": 188183
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8511853218078613,
      "learning_rate": 4.835082966841467e-05,
      "loss": 2.9046,
      "step": 188184
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9301884174346924,
      "learning_rate": 4.8348602820123204e-05,
      "loss": 2.8638,
      "step": 188185
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.129326581954956,
      "learning_rate": 4.8346376018618546e-05,
      "loss": 2.9074,
      "step": 188186
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0122597217559814,
      "learning_rate": 4.8344149263900966e-05,
      "loss": 2.6358,
      "step": 188187
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.566150665283203,
      "learning_rate": 4.834192255597112e-05,
      "loss": 2.8178,
      "step": 188188
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.758920431137085,
      "learning_rate": 4.833969589482912e-05,
      "loss": 2.9167,
      "step": 188189
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.622288942337036,
      "learning_rate": 4.833746928047559e-05,
      "loss": 2.8887,
      "step": 188190
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8470335006713867,
      "learning_rate": 4.83352427129108e-05,
      "loss": 3.0538,
      "step": 188191
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.340702772140503,
      "learning_rate": 4.833301619213532e-05,
      "loss": 3.3807,
      "step": 188192
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1546576023101807,
      "learning_rate": 4.833078971814941e-05,
      "loss": 2.7974,
      "step": 188193
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.4301059246063232,
      "learning_rate": 4.8328563290953705e-05,
      "loss": 2.9079,
      "step": 188194
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.693927764892578,
      "learning_rate": 4.832633691054833e-05,
      "loss": 3.0224,
      "step": 188195
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8605635166168213,
      "learning_rate": 4.832411057693391e-05,
      "loss": 2.7581,
      "step": 188196
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.126305341720581,
      "learning_rate": 4.832188429011068e-05,
      "loss": 2.9525,
      "step": 188197
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.988351821899414,
      "learning_rate": 4.831965805007926e-05,
      "loss": 3.2009,
      "step": 188198
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9248297214508057,
      "learning_rate": 4.8317431856839906e-05,
      "loss": 2.8513,
      "step": 188199
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.825086832046509,
      "learning_rate": 4.8315205710393226e-05,
      "loss": 3.0747,
      "step": 188200
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8523190021514893,
      "learning_rate": 4.831297961073934e-05,
      "loss": 3.1745,
      "step": 188201
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.766822099685669,
      "learning_rate": 4.8310753557878935e-05,
      "loss": 3.0758,
      "step": 188202
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.744030237197876,
      "learning_rate": 4.830852755181219e-05,
      "loss": 3.2031,
      "step": 188203
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.6063053607940674,
      "learning_rate": 4.8306301592539755e-05,
      "loss": 2.9097,
      "step": 188204
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6079530715942383,
      "learning_rate": 4.830407568006185e-05,
      "loss": 3.0158,
      "step": 188205
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8258745670318604,
      "learning_rate": 4.830184981437908e-05,
      "loss": 3.3496,
      "step": 188206
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5488319396972656,
      "learning_rate": 4.829962399549164e-05,
      "loss": 2.9896,
      "step": 188207
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.936919927597046,
      "learning_rate": 4.8297398223400106e-05,
      "loss": 3.0157,
      "step": 188208
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8916969299316406,
      "learning_rate": 4.8295172498104773e-05,
      "loss": 2.9804,
      "step": 188209
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.791210412979126,
      "learning_rate": 4.829294681960617e-05,
      "loss": 3.0163,
      "step": 188210
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6639320850372314,
      "learning_rate": 4.82907211879046e-05,
      "loss": 3.0619,
      "step": 188211
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.528787612915039,
      "learning_rate": 4.82884956030007e-05,
      "loss": 2.9204,
      "step": 188212
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.9945647716522217,
      "learning_rate": 4.828627006489456e-05,
      "loss": 3.0223,
      "step": 188213
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1552934646606445,
      "learning_rate": 4.828404457358682e-05,
      "loss": 2.9765,
      "step": 188214
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4625768661499023,
      "learning_rate": 4.828181912907778e-05,
      "loss": 2.8272,
      "step": 188215
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.339444637298584,
      "learning_rate": 4.827959373136794e-05,
      "loss": 2.8798,
      "step": 188216
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8428478240966797,
      "learning_rate": 4.827736838045763e-05,
      "loss": 3.1133,
      "step": 188217
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1447622776031494,
      "learning_rate": 4.827514307634744e-05,
      "loss": 3.0695,
      "step": 188218
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.279431104660034,
      "learning_rate": 4.8272917819037495e-05,
      "loss": 2.793,
      "step": 188219
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.908135175704956,
      "learning_rate": 4.827069260852847e-05,
      "loss": 2.488,
      "step": 188220
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.475292444229126,
      "learning_rate": 4.826846744482058e-05,
      "loss": 2.8979,
      "step": 188221
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.02128529548645,
      "learning_rate": 4.8266242327914416e-05,
      "loss": 3.0078,
      "step": 188222
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.302278518676758,
      "learning_rate": 4.8264017257810216e-05,
      "loss": 3.0614,
      "step": 188223
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7778003215789795,
      "learning_rate": 4.826179223450858e-05,
      "loss": 3.1271,
      "step": 188224
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.062448740005493,
      "learning_rate": 4.825956725800983e-05,
      "loss": 2.8896,
      "step": 188225
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7400269508361816,
      "learning_rate": 4.8257342328314386e-05,
      "loss": 2.6672,
      "step": 188226
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9609947204589844,
      "learning_rate": 4.8255117445422566e-05,
      "loss": 2.9429,
      "step": 188227
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.202308177947998,
      "learning_rate": 4.8252892609334934e-05,
      "loss": 2.7669,
      "step": 188228
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9022128582000732,
      "learning_rate": 4.825066782005177e-05,
      "loss": 2.985,
      "step": 188229
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.6030938625335693,
      "learning_rate": 4.8248443077573664e-05,
      "loss": 2.7451,
      "step": 188230
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.213428497314453,
      "learning_rate": 4.824621838190088e-05,
      "loss": 3.0978,
      "step": 188231
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.892533540725708,
      "learning_rate": 4.824399373303386e-05,
      "loss": 2.812,
      "step": 188232
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.781909465789795,
      "learning_rate": 4.8241769130973064e-05,
      "loss": 2.8357,
      "step": 188233
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0921216011047363,
      "learning_rate": 4.8239544575718894e-05,
      "loss": 2.8972,
      "step": 188234
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5905938148498535,
      "learning_rate": 4.823732006727164e-05,
      "loss": 3.0195,
      "step": 188235
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4591257572174072,
      "learning_rate": 4.8235095605631926e-05,
      "loss": 2.8664,
      "step": 188236
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.6758832931518555,
      "learning_rate": 4.823287119080006e-05,
      "loss": 3.1963,
      "step": 188237
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.797010898590088,
      "learning_rate": 4.8230646822776354e-05,
      "loss": 2.9375,
      "step": 188238
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6496338844299316,
      "learning_rate": 4.822842250156144e-05,
      "loss": 3.0378,
      "step": 188239
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.936032295227051,
      "learning_rate": 4.822619822715558e-05,
      "loss": 3.0252,
      "step": 188240
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.544142723083496,
      "learning_rate": 4.822397399955917e-05,
      "loss": 2.8808,
      "step": 188241
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.832571268081665,
      "learning_rate": 4.822174981877272e-05,
      "loss": 2.9576,
      "step": 188242
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8503477573394775,
      "learning_rate": 4.821952568479664e-05,
      "loss": 3.034,
      "step": 188243
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.159627914428711,
      "learning_rate": 4.82173015976312e-05,
      "loss": 3.0754,
      "step": 188244
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.643677234649658,
      "learning_rate": 4.8215077557276985e-05,
      "loss": 3.028,
      "step": 188245
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.901308298110962,
      "learning_rate": 4.8212853563734286e-05,
      "loss": 2.5059,
      "step": 188246
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0455453395843506,
      "learning_rate": 4.821062961700365e-05,
      "loss": 3.0072,
      "step": 188247
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.055102825164795,
      "learning_rate": 4.82084057170854e-05,
      "loss": 2.9693,
      "step": 188248
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7220029830932617,
      "learning_rate": 4.820618186397993e-05,
      "loss": 2.71,
      "step": 188249
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.759722948074341,
      "learning_rate": 4.820395805768765e-05,
      "loss": 2.8967,
      "step": 188250
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6102559566497803,
      "learning_rate": 4.820173429820906e-05,
      "loss": 2.7926,
      "step": 188251
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.749380111694336,
      "learning_rate": 4.819951058554449e-05,
      "loss": 2.7328,
      "step": 188252
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0175552368164062,
      "learning_rate": 4.81972869196944e-05,
      "loss": 2.9778,
      "step": 188253
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5285022258758545,
      "learning_rate": 4.819506330065923e-05,
      "loss": 2.853,
      "step": 188254
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.069009780883789,
      "learning_rate": 4.819283972843931e-05,
      "loss": 2.5964,
      "step": 188255
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5993690490722656,
      "learning_rate": 4.819061620303505e-05,
      "loss": 3.0599,
      "step": 188256
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.524280071258545,
      "learning_rate": 4.8188392724446964e-05,
      "loss": 2.9836,
      "step": 188257
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5690841674804688,
      "learning_rate": 4.818616929267537e-05,
      "loss": 2.9395,
      "step": 188258
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7592809200286865,
      "learning_rate": 4.818394590772076e-05,
      "loss": 3.2516,
      "step": 188259
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1347930431365967,
      "learning_rate": 4.8181722569583456e-05,
      "loss": 2.8824,
      "step": 188260
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8703391551971436,
      "learning_rate": 4.8179499278264044e-05,
      "loss": 2.7803,
      "step": 188261
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.905975580215454,
      "learning_rate": 4.817727603376268e-05,
      "loss": 2.9556,
      "step": 188262
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8855502605438232,
      "learning_rate": 4.817505283608e-05,
      "loss": 2.9792,
      "step": 188263
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.974168300628662,
      "learning_rate": 4.817282968521626e-05,
      "loss": 2.9391,
      "step": 188264
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4691522121429443,
      "learning_rate": 4.817060658117201e-05,
      "loss": 3.1984,
      "step": 188265
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9185233116149902,
      "learning_rate": 4.816838352394751e-05,
      "loss": 2.9391,
      "step": 188266
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5761032104492188,
      "learning_rate": 4.8166160513543415e-05,
      "loss": 2.823,
      "step": 188267
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.7546355724334717,
      "learning_rate": 4.816393754995984e-05,
      "loss": 3.142,
      "step": 188268
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.705589771270752,
      "learning_rate": 4.8161714633197415e-05,
      "loss": 3.0304,
      "step": 188269
    },
    {
      "epoch": 2.45,
      "grad_norm": 5.51503324508667,
      "learning_rate": 4.815949176325643e-05,
      "loss": 2.7045,
      "step": 188270
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.2525393962860107,
      "learning_rate": 4.815726894013739e-05,
      "loss": 2.7522,
      "step": 188271
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0872645378112793,
      "learning_rate": 4.81550461638406e-05,
      "loss": 2.7855,
      "step": 188272
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.956778049468994,
      "learning_rate": 4.815282343436673e-05,
      "loss": 3.0515,
      "step": 188273
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3484115600585938,
      "learning_rate": 4.8150600751715794e-05,
      "loss": 2.789,
      "step": 188274
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.774627447128296,
      "learning_rate": 4.814837811588854e-05,
      "loss": 2.9839,
      "step": 188275
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.631446123123169,
      "learning_rate": 4.814615552688517e-05,
      "loss": 3.2016,
      "step": 188276
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.507275342941284,
      "learning_rate": 4.814393298470623e-05,
      "loss": 2.8086,
      "step": 188277
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6662914752960205,
      "learning_rate": 4.814171048935205e-05,
      "loss": 2.8857,
      "step": 188278
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.881383180618286,
      "learning_rate": 4.81394880408232e-05,
      "loss": 3.03,
      "step": 188279
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4522974491119385,
      "learning_rate": 4.8137265639119835e-05,
      "loss": 2.8043,
      "step": 188280
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7789700031280518,
      "learning_rate": 4.813504328424258e-05,
      "loss": 2.777,
      "step": 188281
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.770462989807129,
      "learning_rate": 4.81328209761917e-05,
      "loss": 2.6914,
      "step": 188282
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.503512144088745,
      "learning_rate": 4.813059871496776e-05,
      "loss": 2.8951,
      "step": 188283
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7969679832458496,
      "learning_rate": 4.812837650057103e-05,
      "loss": 2.9697,
      "step": 188284
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.605844259262085,
      "learning_rate": 4.812615433300211e-05,
      "loss": 2.7338,
      "step": 188285
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4608612060546875,
      "learning_rate": 4.812393221226116e-05,
      "loss": 2.9851,
      "step": 188286
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8608078956604004,
      "learning_rate": 4.812171013834882e-05,
      "loss": 3.1504,
      "step": 188287
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.142591714859009,
      "learning_rate": 4.811948811126529e-05,
      "loss": 2.8322,
      "step": 188288
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.858368158340454,
      "learning_rate": 4.811726613101123e-05,
      "loss": 3.0693,
      "step": 188289
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7994837760925293,
      "learning_rate": 4.8115044197586816e-05,
      "loss": 2.8092,
      "step": 188290
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.708988666534424,
      "learning_rate": 4.811282231099274e-05,
      "loss": 2.9136,
      "step": 188291
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.604078769683838,
      "learning_rate": 4.811060047122907e-05,
      "loss": 2.6894,
      "step": 188292
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.104778289794922,
      "learning_rate": 4.8108378678296464e-05,
      "loss": 3.0764,
      "step": 188293
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.102005958557129,
      "learning_rate": 4.810615693219521e-05,
      "loss": 3.176,
      "step": 188294
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.486909866333008,
      "learning_rate": 4.810393523292585e-05,
      "loss": 3.0099,
      "step": 188295
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.59495210647583,
      "learning_rate": 4.810171358048866e-05,
      "loss": 2.9205,
      "step": 188296
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.17695689201355,
      "learning_rate": 4.8099491974884254e-05,
      "loss": 3.1027,
      "step": 188297
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.967726707458496,
      "learning_rate": 4.8097270416112744e-05,
      "loss": 2.994,
      "step": 188298
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6925883293151855,
      "learning_rate": 4.8095048904174804e-05,
      "loss": 3.0575,
      "step": 188299
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.4080023765563965,
      "learning_rate": 4.809282743907067e-05,
      "loss": 3.0405,
      "step": 188300
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.871063232421875,
      "learning_rate": 4.8090606020800906e-05,
      "loss": 2.8613,
      "step": 188301
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8025689125061035,
      "learning_rate": 4.8088384649365776e-05,
      "loss": 3.0647,
      "step": 188302
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8323814868927,
      "learning_rate": 4.808616332476585e-05,
      "loss": 2.9779,
      "step": 188303
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3534324169158936,
      "learning_rate": 4.8083942047001455e-05,
      "loss": 2.7694,
      "step": 188304
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.4685170650482178,
      "learning_rate": 4.8081720816073e-05,
      "loss": 2.912,
      "step": 188305
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7005841732025146,
      "learning_rate": 4.8079499631980847e-05,
      "loss": 2.9613,
      "step": 188306
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.770834445953369,
      "learning_rate": 4.807727849472556e-05,
      "loss": 2.9657,
      "step": 188307
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4005818367004395,
      "learning_rate": 4.807505740430737e-05,
      "loss": 2.9406,
      "step": 188308
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.3444507122039795,
      "learning_rate": 4.807283636072685e-05,
      "loss": 2.7152,
      "step": 188309
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7013397216796875,
      "learning_rate": 4.807061536398436e-05,
      "loss": 2.7628,
      "step": 188310
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.105052471160889,
      "learning_rate": 4.806839441408028e-05,
      "loss": 2.8504,
      "step": 188311
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6436705589294434,
      "learning_rate": 4.8066173511014994e-05,
      "loss": 2.8151,
      "step": 188312
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.099958658218384,
      "learning_rate": 4.806395265478901e-05,
      "loss": 2.9679,
      "step": 188313
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0999345779418945,
      "learning_rate": 4.806173184540265e-05,
      "loss": 3.3039,
      "step": 188314
    },
    {
      "epoch": 2.45,
      "grad_norm": 5.035139560699463,
      "learning_rate": 4.80595110828564e-05,
      "loss": 2.6044,
      "step": 188315
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3556201457977295,
      "learning_rate": 4.805729036715067e-05,
      "loss": 2.864,
      "step": 188316
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.544576406478882,
      "learning_rate": 4.805506969828584e-05,
      "loss": 2.956,
      "step": 188317
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7076950073242188,
      "learning_rate": 4.805284907626228e-05,
      "loss": 2.7367,
      "step": 188318
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.593217611312866,
      "learning_rate": 4.805062850108048e-05,
      "loss": 3.0945,
      "step": 188319
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8755393028259277,
      "learning_rate": 4.804840797274078e-05,
      "loss": 2.8428,
      "step": 188320
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7566964626312256,
      "learning_rate": 4.80461874912437e-05,
      "loss": 3.0152,
      "step": 188321
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.068601131439209,
      "learning_rate": 4.804396705658959e-05,
      "loss": 2.9595,
      "step": 188322
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8677704334259033,
      "learning_rate": 4.804174666877878e-05,
      "loss": 2.7826,
      "step": 188323
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8951122760772705,
      "learning_rate": 4.803952632781186e-05,
      "loss": 2.9918,
      "step": 188324
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.8920278549194336,
      "learning_rate": 4.803730603368914e-05,
      "loss": 2.8706,
      "step": 188325
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.877926826477051,
      "learning_rate": 4.803508578641098e-05,
      "loss": 2.7704,
      "step": 188326
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1388325691223145,
      "learning_rate": 4.803286558597791e-05,
      "loss": 2.8605,
      "step": 188327
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8956093788146973,
      "learning_rate": 4.803064543239027e-05,
      "loss": 2.9443,
      "step": 188328
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9789679050445557,
      "learning_rate": 4.8028425325648455e-05,
      "loss": 2.8447,
      "step": 188329
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.973517417907715,
      "learning_rate": 4.8026205265752973e-05,
      "loss": 3.2543,
      "step": 188330
    },
    {
      "epoch": 2.45,
      "grad_norm": 5.043624401092529,
      "learning_rate": 4.802398525270408e-05,
      "loss": 2.7241,
      "step": 188331
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.2643344402313232,
      "learning_rate": 4.802176528650238e-05,
      "loss": 3.0764,
      "step": 188332
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.861151695251465,
      "learning_rate": 4.801954536714817e-05,
      "loss": 2.8699,
      "step": 188333
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0529510974884033,
      "learning_rate": 4.801732549464189e-05,
      "loss": 2.8976,
      "step": 188334
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.679537773132324,
      "learning_rate": 4.80151056689839e-05,
      "loss": 3.0463,
      "step": 188335
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.529168128967285,
      "learning_rate": 4.80128858901747e-05,
      "loss": 2.8955,
      "step": 188336
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.715428113937378,
      "learning_rate": 4.801066615821459e-05,
      "loss": 2.647,
      "step": 188337
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.854382038116455,
      "learning_rate": 4.8008446473104146e-05,
      "loss": 3.1462,
      "step": 188338
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.8021295070648193,
      "learning_rate": 4.800622683484369e-05,
      "loss": 2.9624,
      "step": 188339
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4526946544647217,
      "learning_rate": 4.800400724343362e-05,
      "loss": 3.0128,
      "step": 188340
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7714920043945312,
      "learning_rate": 4.800178769887427e-05,
      "loss": 2.8252,
      "step": 188341
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1030266284942627,
      "learning_rate": 4.799956820116625e-05,
      "loss": 2.7367,
      "step": 188342
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.746765375137329,
      "learning_rate": 4.799734875030978e-05,
      "loss": 3.0347,
      "step": 188343
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9080605506896973,
      "learning_rate": 4.799512934630544e-05,
      "loss": 2.7749,
      "step": 188344
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.650516986846924,
      "learning_rate": 4.7992909989153486e-05,
      "loss": 2.9485,
      "step": 188345
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.570676326751709,
      "learning_rate": 4.799069067885455e-05,
      "loss": 2.7624,
      "step": 188346
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3616292476654053,
      "learning_rate": 4.7988471415408766e-05,
      "loss": 2.7813,
      "step": 188347
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.289514064788818,
      "learning_rate": 4.7986252198816746e-05,
      "loss": 3.0123,
      "step": 188348
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9853312969207764,
      "learning_rate": 4.7984033029078774e-05,
      "loss": 3.1588,
      "step": 188349
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.835294008255005,
      "learning_rate": 4.798181390619539e-05,
      "loss": 2.8287,
      "step": 188350
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.532925844192505,
      "learning_rate": 4.7979594830166855e-05,
      "loss": 3.0221,
      "step": 188351
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1073215007781982,
      "learning_rate": 4.7977375800993844e-05,
      "loss": 2.9979,
      "step": 188352
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.346765518188477,
      "learning_rate": 4.797515681867642e-05,
      "loss": 2.898,
      "step": 188353
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.972590684890747,
      "learning_rate": 4.797293788321528e-05,
      "loss": 3.1231,
      "step": 188354
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.523744821548462,
      "learning_rate": 4.797071899461066e-05,
      "loss": 2.9868,
      "step": 188355
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.812969446182251,
      "learning_rate": 4.796850015286309e-05,
      "loss": 3.1249,
      "step": 188356
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.931797504425049,
      "learning_rate": 4.796628135797287e-05,
      "loss": 2.9927,
      "step": 188357
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.2311294078826904,
      "learning_rate": 4.79640626099406e-05,
      "loss": 2.9067,
      "step": 188358
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7339999675750732,
      "learning_rate": 4.796184390876645e-05,
      "loss": 3.1916,
      "step": 188359
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.768174409866333,
      "learning_rate": 4.795962525445102e-05,
      "loss": 2.7493,
      "step": 188360
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.815664768218994,
      "learning_rate": 4.795740664699457e-05,
      "loss": 2.7417,
      "step": 188361
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9643640518188477,
      "learning_rate": 4.7955188086397664e-05,
      "loss": 2.7405,
      "step": 188362
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.486311435699463,
      "learning_rate": 4.795296957266058e-05,
      "loss": 3.2551,
      "step": 188363
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7522568702697754,
      "learning_rate": 4.795075110578395e-05,
      "loss": 2.7089,
      "step": 188364
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.9335083961486816,
      "learning_rate": 4.794853268576786e-05,
      "loss": 3.1256,
      "step": 188365
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.040555953979492,
      "learning_rate": 4.794631431261299e-05,
      "loss": 2.6846,
      "step": 188366
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.430617332458496,
      "learning_rate": 4.7944095986319594e-05,
      "loss": 3.0171,
      "step": 188367
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.203965425491333,
      "learning_rate": 4.794187770688822e-05,
      "loss": 2.8253,
      "step": 188368
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6630373001098633,
      "learning_rate": 4.7939659474319124e-05,
      "loss": 3.2059,
      "step": 188369
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.908233404159546,
      "learning_rate": 4.7937441288612875e-05,
      "loss": 2.8013,
      "step": 188370
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.399369478225708,
      "learning_rate": 4.793522314976981e-05,
      "loss": 3.2043,
      "step": 188371
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.2000668048858643,
      "learning_rate": 4.793300505779035e-05,
      "loss": 2.9239,
      "step": 188372
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7595818042755127,
      "learning_rate": 4.793078701267484e-05,
      "loss": 2.8193,
      "step": 188373
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5641379356384277,
      "learning_rate": 4.792856901442385e-05,
      "loss": 3.0343,
      "step": 188374
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.072742223739624,
      "learning_rate": 4.7926351063037596e-05,
      "loss": 3.1474,
      "step": 188375
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.875741720199585,
      "learning_rate": 4.7924133158516654e-05,
      "loss": 2.9587,
      "step": 188376
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.5528619289398193,
      "learning_rate": 4.7921915300861394e-05,
      "loss": 2.8962,
      "step": 188377
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.833486795425415,
      "learning_rate": 4.7919697490072175e-05,
      "loss": 2.9599,
      "step": 188378
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7673118114471436,
      "learning_rate": 4.79174797261494e-05,
      "loss": 3.1215,
      "step": 188379
    },
    {
      "epoch": 2.45,
      "grad_norm": 5.725013732910156,
      "learning_rate": 4.7915262009093605e-05,
      "loss": 3.1675,
      "step": 188380
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.843849182128906,
      "learning_rate": 4.791304433890502e-05,
      "loss": 2.8976,
      "step": 188381
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6298954486846924,
      "learning_rate": 4.7910826715584235e-05,
      "loss": 2.7357,
      "step": 188382
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4887585639953613,
      "learning_rate": 4.79086091391316e-05,
      "loss": 2.9497,
      "step": 188383
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6537866592407227,
      "learning_rate": 4.790639160954751e-05,
      "loss": 2.9609,
      "step": 188384
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8676934242248535,
      "learning_rate": 4.79041741268323e-05,
      "loss": 2.9075,
      "step": 188385
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.632384777069092,
      "learning_rate": 4.790195669098656e-05,
      "loss": 2.8543,
      "step": 188386
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3685669898986816,
      "learning_rate": 4.789973930201052e-05,
      "loss": 3.1303,
      "step": 188387
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.613537311553955,
      "learning_rate": 4.789752195990473e-05,
      "loss": 2.6689,
      "step": 188388
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9147305488586426,
      "learning_rate": 4.789530466466958e-05,
      "loss": 2.7919,
      "step": 188389
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.3089303970336914,
      "learning_rate": 4.789308741630543e-05,
      "loss": 3.0889,
      "step": 188390
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1854329109191895,
      "learning_rate": 4.789087021481263e-05,
      "loss": 2.8732,
      "step": 188391
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5313262939453125,
      "learning_rate": 4.788865306019176e-05,
      "loss": 3.0393,
      "step": 188392
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.555694341659546,
      "learning_rate": 4.7886435952443106e-05,
      "loss": 2.8359,
      "step": 188393
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.970177173614502,
      "learning_rate": 4.788421889156715e-05,
      "loss": 2.8945,
      "step": 188394
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5930140018463135,
      "learning_rate": 4.78820018775643e-05,
      "loss": 2.8563,
      "step": 188395
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.542938232421875,
      "learning_rate": 4.787978491043496e-05,
      "loss": 2.8981,
      "step": 188396
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3639986515045166,
      "learning_rate": 4.787756799017942e-05,
      "loss": 2.7231,
      "step": 188397
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.691399335861206,
      "learning_rate": 4.7875351116798286e-05,
      "loss": 3.0114,
      "step": 188398
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.251528739929199,
      "learning_rate": 4.7873134290291825e-05,
      "loss": 2.8793,
      "step": 188399
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5794594287872314,
      "learning_rate": 4.787091751066057e-05,
      "loss": 3.0868,
      "step": 188400
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9919958114624023,
      "learning_rate": 4.786870077790488e-05,
      "loss": 2.9784,
      "step": 188401
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0676653385162354,
      "learning_rate": 4.7866484092025125e-05,
      "loss": 2.908,
      "step": 188402
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.5498690605163574,
      "learning_rate": 4.786426745302171e-05,
      "loss": 3.0189,
      "step": 188403
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.006790637969971,
      "learning_rate": 4.7862050860895163e-05,
      "loss": 2.8667,
      "step": 188404
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.999274730682373,
      "learning_rate": 4.7859834315645715e-05,
      "loss": 2.8437,
      "step": 188405
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7531027793884277,
      "learning_rate": 4.7857617817273977e-05,
      "loss": 3.0279,
      "step": 188406
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0243167877197266,
      "learning_rate": 4.785540136578027e-05,
      "loss": 2.9154,
      "step": 188407
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0319631099700928,
      "learning_rate": 4.7853184961164925e-05,
      "loss": 2.9003,
      "step": 188408
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.471731185913086,
      "learning_rate": 4.785096860342852e-05,
      "loss": 3.0492,
      "step": 188409
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.267746686935425,
      "learning_rate": 4.784875229257134e-05,
      "loss": 2.9808,
      "step": 188410
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.127272129058838,
      "learning_rate": 4.784653602859381e-05,
      "loss": 3.0209,
      "step": 188411
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.123164176940918,
      "learning_rate": 4.78443198114964e-05,
      "loss": 2.8628,
      "step": 188412
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9993786811828613,
      "learning_rate": 4.7842103641279526e-05,
      "loss": 2.7602,
      "step": 188413
    },
    {
      "epoch": 2.45,
      "grad_norm": 6.010325908660889,
      "learning_rate": 4.783988751794349e-05,
      "loss": 2.887,
      "step": 188414
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.260519027709961,
      "learning_rate": 4.783767144148881e-05,
      "loss": 2.8127,
      "step": 188415
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.567082166671753,
      "learning_rate": 4.78354554119159e-05,
      "loss": 2.78,
      "step": 188416
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6987836360931396,
      "learning_rate": 4.783323942922509e-05,
      "loss": 3.2223,
      "step": 188417
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.291212320327759,
      "learning_rate": 4.783102349341688e-05,
      "loss": 2.6897,
      "step": 188418
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.127605676651001,
      "learning_rate": 4.782880760449163e-05,
      "loss": 3.126,
      "step": 188419
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.611746788024902,
      "learning_rate": 4.78265917624497e-05,
      "loss": 2.9997,
      "step": 188420
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.2681667804718018,
      "learning_rate": 4.7824375967291684e-05,
      "loss": 2.8181,
      "step": 188421
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.274854898452759,
      "learning_rate": 4.782216021901776e-05,
      "loss": 2.9016,
      "step": 188422
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.3252241611480713,
      "learning_rate": 4.781994451762853e-05,
      "loss": 2.7699,
      "step": 188423
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.605175256729126,
      "learning_rate": 4.781772886312436e-05,
      "loss": 2.7879,
      "step": 188424
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.446012258529663,
      "learning_rate": 4.7815513255505624e-05,
      "loss": 2.9038,
      "step": 188425
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.073089361190796,
      "learning_rate": 4.7813297694772646e-05,
      "loss": 2.9572,
      "step": 188426
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0776684284210205,
      "learning_rate": 4.781108218092603e-05,
      "loss": 3.0278,
      "step": 188427
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.159965515136719,
      "learning_rate": 4.780886671396601e-05,
      "loss": 2.7615,
      "step": 188428
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.703423500061035,
      "learning_rate": 4.780665129389318e-05,
      "loss": 2.9995,
      "step": 188429
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8420820236206055,
      "learning_rate": 4.780443592070775e-05,
      "loss": 3.1427,
      "step": 188430
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.44320011138916,
      "learning_rate": 4.7802220594410415e-05,
      "loss": 2.9226,
      "step": 188431
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.228468418121338,
      "learning_rate": 4.7800005315001236e-05,
      "loss": 3.0136,
      "step": 188432
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.733309507369995,
      "learning_rate": 4.779779008248088e-05,
      "loss": 2.8279,
      "step": 188433
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.3823893070220947,
      "learning_rate": 4.7795574896849587e-05,
      "loss": 3.0258,
      "step": 188434
    },
    {
      "epoch": 2.45,
      "grad_norm": 5.6593098640441895,
      "learning_rate": 4.779335975810795e-05,
      "loss": 2.6932,
      "step": 188435
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.141984224319458,
      "learning_rate": 4.779114466625621e-05,
      "loss": 2.9988,
      "step": 188436
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.6492786407470703,
      "learning_rate": 4.7788929621294925e-05,
      "loss": 3.0287,
      "step": 188437
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.250434160232544,
      "learning_rate": 4.778671462322447e-05,
      "loss": 2.9113,
      "step": 188438
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.593437433242798,
      "learning_rate": 4.778449967204516e-05,
      "loss": 2.8316,
      "step": 188439
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.7576205730438232,
      "learning_rate": 4.778228476775745e-05,
      "loss": 2.7521,
      "step": 188440
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.284753799438477,
      "learning_rate": 4.778006991036183e-05,
      "loss": 3.113,
      "step": 188441
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.134049892425537,
      "learning_rate": 4.77778550998586e-05,
      "loss": 2.8427,
      "step": 188442
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.152301788330078,
      "learning_rate": 4.777564033624829e-05,
      "loss": 2.9274,
      "step": 188443
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.014941453933716,
      "learning_rate": 4.7773425619531235e-05,
      "loss": 2.8516,
      "step": 188444
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.710844993591309,
      "learning_rate": 4.777121094970787e-05,
      "loss": 2.9471,
      "step": 188445
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.5079033374786377,
      "learning_rate": 4.776899632677853e-05,
      "loss": 3.0658,
      "step": 188446
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.737311363220215,
      "learning_rate": 4.776678175074378e-05,
      "loss": 3.0256,
      "step": 188447
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0285394191741943,
      "learning_rate": 4.776456722160388e-05,
      "loss": 2.7671,
      "step": 188448
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5845675468444824,
      "learning_rate": 4.7762352739359336e-05,
      "loss": 2.9504,
      "step": 188449
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.358213186264038,
      "learning_rate": 4.7760138304010575e-05,
      "loss": 2.8634,
      "step": 188450
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5381739139556885,
      "learning_rate": 4.775792391555794e-05,
      "loss": 2.9824,
      "step": 188451
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9188778400421143,
      "learning_rate": 4.775570957400182e-05,
      "loss": 2.6841,
      "step": 188452
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.076753616333008,
      "learning_rate": 4.775349527934273e-05,
      "loss": 2.8966,
      "step": 188453
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3556880950927734,
      "learning_rate": 4.7751281031580945e-05,
      "loss": 2.7349,
      "step": 188454
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.467953205108643,
      "learning_rate": 4.774906683071705e-05,
      "loss": 2.9875,
      "step": 188455
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.082076072692871,
      "learning_rate": 4.774685267675134e-05,
      "loss": 2.6044,
      "step": 188456
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.555384635925293,
      "learning_rate": 4.774463856968429e-05,
      "loss": 2.9458,
      "step": 188457
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.05145525932312,
      "learning_rate": 4.774242450951619e-05,
      "loss": 3.0066,
      "step": 188458
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.952983856201172,
      "learning_rate": 4.774021049624757e-05,
      "loss": 2.9951,
      "step": 188459
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8613457679748535,
      "learning_rate": 4.7737996529878765e-05,
      "loss": 2.9108,
      "step": 188460
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.292734146118164,
      "learning_rate": 4.773578261041031e-05,
      "loss": 2.7744,
      "step": 188461
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.063689231872559,
      "learning_rate": 4.773356873784251e-05,
      "loss": 2.7031,
      "step": 188462
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0026748180389404,
      "learning_rate": 4.773135491217582e-05,
      "loss": 3.0403,
      "step": 188463
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5642812252044678,
      "learning_rate": 4.772914113341055e-05,
      "loss": 2.8655,
      "step": 188464
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.241025447845459,
      "learning_rate": 4.7726927401547264e-05,
      "loss": 2.9687,
      "step": 188465
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.934983491897583,
      "learning_rate": 4.772471371658626e-05,
      "loss": 2.9961,
      "step": 188466
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.654191017150879,
      "learning_rate": 4.7722500078528034e-05,
      "loss": 2.8214,
      "step": 188467
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.25136399269104,
      "learning_rate": 4.772028648737296e-05,
      "loss": 2.6669,
      "step": 188468
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.336439371109009,
      "learning_rate": 4.7718072943121466e-05,
      "loss": 3.074,
      "step": 188469
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.898311138153076,
      "learning_rate": 4.771585944577385e-05,
      "loss": 3.0773,
      "step": 188470
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.429565906524658,
      "learning_rate": 4.7713645995330717e-05,
      "loss": 2.7479,
      "step": 188471
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.723956346511841,
      "learning_rate": 4.77114325917923e-05,
      "loss": 2.8846,
      "step": 188472
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5450117588043213,
      "learning_rate": 4.7709219235159154e-05,
      "loss": 2.9676,
      "step": 188473
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7899394035339355,
      "learning_rate": 4.770700592543163e-05,
      "loss": 2.9228,
      "step": 188474
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.740330219268799,
      "learning_rate": 4.770479266261011e-05,
      "loss": 3.0878,
      "step": 188475
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.6644434928894043,
      "learning_rate": 4.770257944669498e-05,
      "loss": 3.2511,
      "step": 188476
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7156641483306885,
      "learning_rate": 4.7700366277686784e-05,
      "loss": 2.7565,
      "step": 188477
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7876837253570557,
      "learning_rate": 4.769815315558577e-05,
      "loss": 2.6536,
      "step": 188478
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.8108158111572266,
      "learning_rate": 4.76959400803925e-05,
      "loss": 3.0022,
      "step": 188479
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.460045576095581,
      "learning_rate": 4.769372705210735e-05,
      "loss": 2.9774,
      "step": 188480
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.974492311477661,
      "learning_rate": 4.769151407073066e-05,
      "loss": 3.0915,
      "step": 188481
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9717342853546143,
      "learning_rate": 4.768930113626283e-05,
      "loss": 3.2774,
      "step": 188482
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5313632488250732,
      "learning_rate": 4.76870882487044e-05,
      "loss": 3.0872,
      "step": 188483
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5613505840301514,
      "learning_rate": 4.7684875408055615e-05,
      "loss": 2.8476,
      "step": 188484
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1787970066070557,
      "learning_rate": 4.7682662614317034e-05,
      "loss": 2.7338,
      "step": 188485
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.6302249431610107,
      "learning_rate": 4.7680449867489036e-05,
      "loss": 2.7234,
      "step": 188486
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6919748783111572,
      "learning_rate": 4.767823716757198e-05,
      "loss": 3.0453,
      "step": 188487
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6263699531555176,
      "learning_rate": 4.7676024514566255e-05,
      "loss": 2.9354,
      "step": 188488
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.301424026489258,
      "learning_rate": 4.767381190847238e-05,
      "loss": 2.8928,
      "step": 188489
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.6947572231292725,
      "learning_rate": 4.7671599349290614e-05,
      "loss": 3.101,
      "step": 188490
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.441345453262329,
      "learning_rate": 4.766938683702155e-05,
      "loss": 3.0872,
      "step": 188491
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.787958860397339,
      "learning_rate": 4.766717437166553e-05,
      "loss": 2.8113,
      "step": 188492
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.106550216674805,
      "learning_rate": 4.7664961953222844e-05,
      "loss": 3.1513,
      "step": 188493
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.71883225440979,
      "learning_rate": 4.7662749581694104e-05,
      "loss": 2.8308,
      "step": 188494
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.601436138153076,
      "learning_rate": 4.76605372570796e-05,
      "loss": 2.9564,
      "step": 188495
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.804055690765381,
      "learning_rate": 4.76583249793797e-05,
      "loss": 3.1379,
      "step": 188496
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.8293018341064453,
      "learning_rate": 4.7656112748594944e-05,
      "loss": 2.9845,
      "step": 188497
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.283812999725342,
      "learning_rate": 4.765390056472569e-05,
      "loss": 2.9192,
      "step": 188498
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.1148133277893066,
      "learning_rate": 4.765168842777227e-05,
      "loss": 3.0412,
      "step": 188499
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6497416496276855,
      "learning_rate": 4.764947633773526e-05,
      "loss": 3.1448,
      "step": 188500
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8958029747009277,
      "learning_rate": 4.764726429461492e-05,
      "loss": 2.9612,
      "step": 188501
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7817697525024414,
      "learning_rate": 4.764505229841168e-05,
      "loss": 2.8017,
      "step": 188502
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8503146171569824,
      "learning_rate": 4.7642840349126076e-05,
      "loss": 2.8963,
      "step": 188503
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.957817316055298,
      "learning_rate": 4.764062844675831e-05,
      "loss": 2.721,
      "step": 188504
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9200427532196045,
      "learning_rate": 4.7638416591309016e-05,
      "loss": 2.6959,
      "step": 188505
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7097856998443604,
      "learning_rate": 4.763620478277852e-05,
      "loss": 3.0359,
      "step": 188506
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.0617873668670654,
      "learning_rate": 4.763399302116713e-05,
      "loss": 2.746,
      "step": 188507
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6382648944854736,
      "learning_rate": 4.76317813064754e-05,
      "loss": 3.0733,
      "step": 188508
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4245102405548096,
      "learning_rate": 4.7629569638703714e-05,
      "loss": 2.6086,
      "step": 188509
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8904454708099365,
      "learning_rate": 4.762735801785236e-05,
      "loss": 3.0925,
      "step": 188510
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.493527412414551,
      "learning_rate": 4.7625146443921936e-05,
      "loss": 2.8057,
      "step": 188511
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8427846431732178,
      "learning_rate": 4.7622934916912746e-05,
      "loss": 2.6807,
      "step": 188512
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.809340476989746,
      "learning_rate": 4.7620723436825156e-05,
      "loss": 3.0281,
      "step": 188513
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.758732318878174,
      "learning_rate": 4.76185120036597e-05,
      "loss": 2.9089,
      "step": 188514
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.690753221511841,
      "learning_rate": 4.761630061741665e-05,
      "loss": 2.9466,
      "step": 188515
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.3632466793060303,
      "learning_rate": 4.761408927809659e-05,
      "loss": 2.7993,
      "step": 188516
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.612544298171997,
      "learning_rate": 4.76118779856998e-05,
      "loss": 2.801,
      "step": 188517
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.579033851623535,
      "learning_rate": 4.760966674022674e-05,
      "loss": 3.128,
      "step": 188518
    },
    {
      "epoch": 2.45,
      "grad_norm": 5.696824073791504,
      "learning_rate": 4.7607455541677744e-05,
      "loss": 2.7656,
      "step": 188519
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.412314414978027,
      "learning_rate": 4.7605244390053376e-05,
      "loss": 2.6844,
      "step": 188520
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.327007293701172,
      "learning_rate": 4.760303328535384e-05,
      "loss": 3.1999,
      "step": 188521
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.5494167804718018,
      "learning_rate": 4.760082222757977e-05,
      "loss": 2.944,
      "step": 188522
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.9986064434051514,
      "learning_rate": 4.7598611216731464e-05,
      "loss": 2.8938,
      "step": 188523
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6155643463134766,
      "learning_rate": 4.759640025280932e-05,
      "loss": 2.7616,
      "step": 188524
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.754884958267212,
      "learning_rate": 4.7594189335813736e-05,
      "loss": 2.8482,
      "step": 188525
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.768939256668091,
      "learning_rate": 4.759197846574522e-05,
      "loss": 3.0332,
      "step": 188526
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.2684125900268555,
      "learning_rate": 4.7589767642604025e-05,
      "loss": 2.763,
      "step": 188527
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.601322650909424,
      "learning_rate": 4.758755686639073e-05,
      "loss": 2.8083,
      "step": 188528
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.857387065887451,
      "learning_rate": 4.75853461371057e-05,
      "loss": 2.7887,
      "step": 188529
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.784998893737793,
      "learning_rate": 4.7583135454749286e-05,
      "loss": 2.9765,
      "step": 188530
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.613649845123291,
      "learning_rate": 4.7580924819321874e-05,
      "loss": 2.8019,
      "step": 188531
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7210986614227295,
      "learning_rate": 4.7578714230824024e-05,
      "loss": 2.8151,
      "step": 188532
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7907981872558594,
      "learning_rate": 4.757650368925596e-05,
      "loss": 3.0789,
      "step": 188533
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.264817476272583,
      "learning_rate": 4.7574293194618265e-05,
      "loss": 2.853,
      "step": 188534
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.7653942108154297,
      "learning_rate": 4.7572082746911256e-05,
      "loss": 2.9897,
      "step": 188535
    },
    {
      "epoch": 2.45,
      "grad_norm": 4.429748058319092,
      "learning_rate": 4.756987234613541e-05,
      "loss": 2.9434,
      "step": 188536
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.4621994495391846,
      "learning_rate": 4.756766199229098e-05,
      "loss": 2.8381,
      "step": 188537
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.030322790145874,
      "learning_rate": 4.7565451685378556e-05,
      "loss": 2.8005,
      "step": 188538
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.8273208141326904,
      "learning_rate": 4.7563241425398416e-05,
      "loss": 2.9597,
      "step": 188539
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.9082584381103516,
      "learning_rate": 4.756103121235113e-05,
      "loss": 2.9435,
      "step": 188540
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.167207717895508,
      "learning_rate": 4.7558821046236974e-05,
      "loss": 2.8872,
      "step": 188541
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.3684117794036865,
      "learning_rate": 4.755661092705644e-05,
      "loss": 2.9296,
      "step": 188542
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6794497966766357,
      "learning_rate": 4.7554400854809794e-05,
      "loss": 2.8899,
      "step": 188543
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.6711840629577637,
      "learning_rate": 4.755219082949764e-05,
      "loss": 2.9197,
      "step": 188544
    },
    {
      "epoch": 2.45,
      "grad_norm": 2.792249917984009,
      "learning_rate": 4.754998085112021e-05,
      "loss": 2.9148,
      "step": 188545
    },
    {
      "epoch": 2.45,
      "grad_norm": 3.5192360877990723,
      "learning_rate": 4.754777091967806e-05,
      "loss": 2.8608,
      "step": 188546
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7050108909606934,
      "learning_rate": 4.7545561035171574e-05,
      "loss": 2.9895,
      "step": 188547
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0897727012634277,
      "learning_rate": 4.754335119760114e-05,
      "loss": 2.9984,
      "step": 188548
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.16825008392334,
      "learning_rate": 4.75411414069671e-05,
      "loss": 3.0841,
      "step": 188549
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1295316219329834,
      "learning_rate": 4.7538931663269974e-05,
      "loss": 2.7819,
      "step": 188550
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.745720386505127,
      "learning_rate": 4.753672196651004e-05,
      "loss": 2.7718,
      "step": 188551
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9911227226257324,
      "learning_rate": 4.753451231668789e-05,
      "loss": 3.0205,
      "step": 188552
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.675901412963867,
      "learning_rate": 4.7532302713803824e-05,
      "loss": 2.8947,
      "step": 188553
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1326375007629395,
      "learning_rate": 4.7530093157858286e-05,
      "loss": 3.1024,
      "step": 188554
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.795457363128662,
      "learning_rate": 4.7527883648851594e-05,
      "loss": 3.1034,
      "step": 188555
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7783944606781006,
      "learning_rate": 4.7525674186784324e-05,
      "loss": 2.8862,
      "step": 188556
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.913940191268921,
      "learning_rate": 4.7523464771656706e-05,
      "loss": 2.9548,
      "step": 188557
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0300521850585938,
      "learning_rate": 4.75212554034693e-05,
      "loss": 3.0383,
      "step": 188558
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.855645656585693,
      "learning_rate": 4.751904608222245e-05,
      "loss": 3.1084,
      "step": 188559
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.719432830810547,
      "learning_rate": 4.7516836807916624e-05,
      "loss": 3.1032,
      "step": 188560
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.970144748687744,
      "learning_rate": 4.751462758055208e-05,
      "loss": 3.1081,
      "step": 188561
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9043936729431152,
      "learning_rate": 4.751241840012942e-05,
      "loss": 2.8838,
      "step": 188562
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.850388288497925,
      "learning_rate": 4.751020926664887e-05,
      "loss": 3.0705,
      "step": 188563
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.448901414871216,
      "learning_rate": 4.7508000180111005e-05,
      "loss": 2.8881,
      "step": 188564
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.9970827102661133,
      "learning_rate": 4.750579114051619e-05,
      "loss": 2.8632,
      "step": 188565
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.85331392288208,
      "learning_rate": 4.750358214786483e-05,
      "loss": 3.0746,
      "step": 188566
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9767873287200928,
      "learning_rate": 4.7501373202157245e-05,
      "loss": 3.0732,
      "step": 188567
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.14347505569458,
      "learning_rate": 4.749916430339398e-05,
      "loss": 2.8428,
      "step": 188568
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.776732921600342,
      "learning_rate": 4.7496955451575293e-05,
      "loss": 3.0181,
      "step": 188569
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.170054912567139,
      "learning_rate": 4.7494746646701785e-05,
      "loss": 2.9927,
      "step": 188570
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.656437397003174,
      "learning_rate": 4.7492537888773695e-05,
      "loss": 2.8978,
      "step": 188571
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.868420362472534,
      "learning_rate": 4.7490329177791654e-05,
      "loss": 3.1836,
      "step": 188572
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0300207138061523,
      "learning_rate": 4.748812051375579e-05,
      "loss": 2.6347,
      "step": 188573
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1956522464752197,
      "learning_rate": 4.748591189666671e-05,
      "loss": 2.8159,
      "step": 188574
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.6452176570892334,
      "learning_rate": 4.7483703326524705e-05,
      "loss": 3.1229,
      "step": 188575
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.791459083557129,
      "learning_rate": 4.748149480333031e-05,
      "loss": 2.7975,
      "step": 188576
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.949129819869995,
      "learning_rate": 4.74792863270838e-05,
      "loss": 2.9208,
      "step": 188577
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.92985200881958,
      "learning_rate": 4.7477077897785796e-05,
      "loss": 2.715,
      "step": 188578
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.5922536849975586,
      "learning_rate": 4.7474869515436445e-05,
      "loss": 3.0208,
      "step": 188579
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.350207567214966,
      "learning_rate": 4.747266118003633e-05,
      "loss": 3.0568,
      "step": 188580
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.697946071624756,
      "learning_rate": 4.747045289158576e-05,
      "loss": 3.2823,
      "step": 188581
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7302746772766113,
      "learning_rate": 4.746824465008527e-05,
      "loss": 2.7459,
      "step": 188582
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5268123149871826,
      "learning_rate": 4.7466036455535126e-05,
      "loss": 2.9069,
      "step": 188583
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.276745319366455,
      "learning_rate": 4.746382830793586e-05,
      "loss": 3.0685,
      "step": 188584
    },
    {
      "epoch": 2.46,
      "grad_norm": 5.30318546295166,
      "learning_rate": 4.746162020728786e-05,
      "loss": 2.8323,
      "step": 188585
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2208025455474854,
      "learning_rate": 4.7459412153591516e-05,
      "loss": 2.7532,
      "step": 188586
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.850625514984131,
      "learning_rate": 4.745720414684717e-05,
      "loss": 2.793,
      "step": 188587
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3355228900909424,
      "learning_rate": 4.7454996187055335e-05,
      "loss": 3.1299,
      "step": 188588
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.121342420578003,
      "learning_rate": 4.745278827421635e-05,
      "loss": 2.9159,
      "step": 188589
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.671133041381836,
      "learning_rate": 4.74505804083307e-05,
      "loss": 2.8657,
      "step": 188590
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8628618717193604,
      "learning_rate": 4.744837258939875e-05,
      "loss": 3.1639,
      "step": 188591
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.3954660892486572,
      "learning_rate": 4.744616481742089e-05,
      "loss": 2.9117,
      "step": 188592
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8882651329040527,
      "learning_rate": 4.744395709239759e-05,
      "loss": 3.0842,
      "step": 188593
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.81487774848938,
      "learning_rate": 4.744174941432923e-05,
      "loss": 3.0278,
      "step": 188594
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.245694398880005,
      "learning_rate": 4.743954178321615e-05,
      "loss": 3.0644,
      "step": 188595
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.924135684967041,
      "learning_rate": 4.7437334199058875e-05,
      "loss": 2.8718,
      "step": 188596
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6918978691101074,
      "learning_rate": 4.74351266618578e-05,
      "loss": 3.1388,
      "step": 188597
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.744518995285034,
      "learning_rate": 4.74329191716132e-05,
      "loss": 2.981,
      "step": 188598
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3615546226501465,
      "learning_rate": 4.743071172832571e-05,
      "loss": 2.8378,
      "step": 188599
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.653380870819092,
      "learning_rate": 4.7428504331995586e-05,
      "loss": 2.9354,
      "step": 188600
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.718869686126709,
      "learning_rate": 4.742629698262317e-05,
      "loss": 3.0679,
      "step": 188601
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2670459747314453,
      "learning_rate": 4.7424089680209085e-05,
      "loss": 2.9384,
      "step": 188602
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.734673261642456,
      "learning_rate": 4.742188242475361e-05,
      "loss": 2.9939,
      "step": 188603
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.606381416320801,
      "learning_rate": 4.7419675216257135e-05,
      "loss": 2.6129,
      "step": 188604
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.6260406970977783,
      "learning_rate": 4.7417468054720135e-05,
      "loss": 2.9632,
      "step": 188605
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.811522960662842,
      "learning_rate": 4.741526094014296e-05,
      "loss": 3.0228,
      "step": 188606
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.969205141067505,
      "learning_rate": 4.74130538725261e-05,
      "loss": 3.0916,
      "step": 188607
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8839943408966064,
      "learning_rate": 4.7410846851869935e-05,
      "loss": 3.0712,
      "step": 188608
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.316561698913574,
      "learning_rate": 4.740863987817487e-05,
      "loss": 3.1216,
      "step": 188609
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1389269828796387,
      "learning_rate": 4.7406432951441245e-05,
      "loss": 2.9675,
      "step": 188610
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0417237281799316,
      "learning_rate": 4.740422607166958e-05,
      "loss": 3.1397,
      "step": 188611
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6514508724212646,
      "learning_rate": 4.740201923886016e-05,
      "loss": 3.1538,
      "step": 188612
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.483945369720459,
      "learning_rate": 4.739981245301356e-05,
      "loss": 2.9801,
      "step": 188613
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8178958892822266,
      "learning_rate": 4.7397605714130105e-05,
      "loss": 3.1562,
      "step": 188614
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4498119354248047,
      "learning_rate": 4.7395399022210214e-05,
      "loss": 3.0625,
      "step": 188615
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5457828044891357,
      "learning_rate": 4.739319237725419e-05,
      "loss": 3.0747,
      "step": 188616
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.77795147895813,
      "learning_rate": 4.739098577926263e-05,
      "loss": 3.0742,
      "step": 188617
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.443232774734497,
      "learning_rate": 4.7388779228235764e-05,
      "loss": 3.0043,
      "step": 188618
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.642210006713867,
      "learning_rate": 4.738657272417417e-05,
      "loss": 3.0618,
      "step": 188619
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6730284690856934,
      "learning_rate": 4.738436626707817e-05,
      "loss": 2.9407,
      "step": 188620
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7327606678009033,
      "learning_rate": 4.7382159856948196e-05,
      "loss": 2.7618,
      "step": 188621
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.207366466522217,
      "learning_rate": 4.7379953493784594e-05,
      "loss": 2.695,
      "step": 188622
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.915771484375,
      "learning_rate": 4.7377747177587885e-05,
      "loss": 2.6419,
      "step": 188623
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.755767822265625,
      "learning_rate": 4.7375540908358346e-05,
      "loss": 2.904,
      "step": 188624
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.994213819503784,
      "learning_rate": 4.7373334686096534e-05,
      "loss": 2.9117,
      "step": 188625
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.92366886138916,
      "learning_rate": 4.737112851080278e-05,
      "loss": 3.0367,
      "step": 188626
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.406521797180176,
      "learning_rate": 4.7368922382477524e-05,
      "loss": 2.9805,
      "step": 188627
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.027245283126831,
      "learning_rate": 4.7366716301121065e-05,
      "loss": 3.0667,
      "step": 188628
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.448676109313965,
      "learning_rate": 4.7364510266733966e-05,
      "loss": 2.9118,
      "step": 188629
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8565573692321777,
      "learning_rate": 4.736230427931653e-05,
      "loss": 2.8608,
      "step": 188630
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.672544479370117,
      "learning_rate": 4.7360098338869254e-05,
      "loss": 2.7902,
      "step": 188631
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.4437997341156006,
      "learning_rate": 4.7357892445392544e-05,
      "loss": 3.0238,
      "step": 188632
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.752084732055664,
      "learning_rate": 4.7355686598886724e-05,
      "loss": 2.6589,
      "step": 188633
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.125195026397705,
      "learning_rate": 4.7353480799352194e-05,
      "loss": 2.9794,
      "step": 188634
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6234331130981445,
      "learning_rate": 4.73512750467895e-05,
      "loss": 2.7244,
      "step": 188635
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5442001819610596,
      "learning_rate": 4.734906934119892e-05,
      "loss": 3.0713,
      "step": 188636
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.4632318019866943,
      "learning_rate": 4.7346863682580936e-05,
      "loss": 2.7078,
      "step": 188637
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.400193452835083,
      "learning_rate": 4.734465807093591e-05,
      "loss": 3.0826,
      "step": 188638
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3511531352996826,
      "learning_rate": 4.7342452506264415e-05,
      "loss": 2.8524,
      "step": 188639
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.928830623626709,
      "learning_rate": 4.734024698856658e-05,
      "loss": 2.7791,
      "step": 188640
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2469496726989746,
      "learning_rate": 4.7338041517843064e-05,
      "loss": 2.9163,
      "step": 188641
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.307814359664917,
      "learning_rate": 4.733583609409407e-05,
      "loss": 2.8192,
      "step": 188642
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.044243335723877,
      "learning_rate": 4.733363071732017e-05,
      "loss": 2.9528,
      "step": 188643
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8234875202178955,
      "learning_rate": 4.7331425387521694e-05,
      "loss": 2.835,
      "step": 188644
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.189906358718872,
      "learning_rate": 4.732922010469917e-05,
      "loss": 2.8391,
      "step": 188645
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.4695804119110107,
      "learning_rate": 4.732701486885281e-05,
      "loss": 2.8467,
      "step": 188646
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7686350345611572,
      "learning_rate": 4.7324809679983165e-05,
      "loss": 3.0644,
      "step": 188647
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2953553199768066,
      "learning_rate": 4.732260453809058e-05,
      "loss": 3.0602,
      "step": 188648
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.5167996883392334,
      "learning_rate": 4.732039944317552e-05,
      "loss": 3.0449,
      "step": 188649
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.013303279876709,
      "learning_rate": 4.7318194395238306e-05,
      "loss": 2.8496,
      "step": 188650
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.569485902786255,
      "learning_rate": 4.731598939427959e-05,
      "loss": 2.8031,
      "step": 188651
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0311779975891113,
      "learning_rate": 4.731378444029942e-05,
      "loss": 2.9737,
      "step": 188652
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0662105083465576,
      "learning_rate": 4.7311579533298475e-05,
      "loss": 3.0047,
      "step": 188653
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1338613033294678,
      "learning_rate": 4.730937467327698e-05,
      "loss": 3.1081,
      "step": 188654
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.733147621154785,
      "learning_rate": 4.7307169860235536e-05,
      "loss": 2.8473,
      "step": 188655
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2778522968292236,
      "learning_rate": 4.730496509417438e-05,
      "loss": 2.7581,
      "step": 188656
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.595043659210205,
      "learning_rate": 4.730276037509415e-05,
      "loss": 3.0309,
      "step": 188657
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.412525177001953,
      "learning_rate": 4.730055570299497e-05,
      "loss": 2.9569,
      "step": 188658
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9601638317108154,
      "learning_rate": 4.7298351077877436e-05,
      "loss": 3.0551,
      "step": 188659
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7701680660247803,
      "learning_rate": 4.729614649974186e-05,
      "loss": 3.18,
      "step": 188660
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.931520938873291,
      "learning_rate": 4.7293941968588765e-05,
      "loss": 2.8892,
      "step": 188661
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3535919189453125,
      "learning_rate": 4.729173748441842e-05,
      "loss": 3.0885,
      "step": 188662
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0302178859710693,
      "learning_rate": 4.728953304723143e-05,
      "loss": 2.901,
      "step": 188663
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8041887283325195,
      "learning_rate": 4.728732865702799e-05,
      "loss": 3.1772,
      "step": 188664
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.827221155166626,
      "learning_rate": 4.728512431380866e-05,
      "loss": 3.1511,
      "step": 188665
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.458453893661499,
      "learning_rate": 4.728292001757372e-05,
      "loss": 2.8189,
      "step": 188666
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0569071769714355,
      "learning_rate": 4.728071576832373e-05,
      "loss": 2.7378,
      "step": 188667
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.8148484230041504,
      "learning_rate": 4.7278511566058954e-05,
      "loss": 2.9672,
      "step": 188668
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8677804470062256,
      "learning_rate": 4.727630741077992e-05,
      "loss": 3.0502,
      "step": 188669
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.007174015045166,
      "learning_rate": 4.727410330248701e-05,
      "loss": 2.9175,
      "step": 188670
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.159576416015625,
      "learning_rate": 4.7271899241180646e-05,
      "loss": 2.8837,
      "step": 188671
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.744011640548706,
      "learning_rate": 4.726969522686109e-05,
      "loss": 2.923,
      "step": 188672
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.576228618621826,
      "learning_rate": 4.726749125952895e-05,
      "loss": 2.894,
      "step": 188673
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.560237169265747,
      "learning_rate": 4.72652873391845e-05,
      "loss": 3.1254,
      "step": 188674
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.6730637550354004,
      "learning_rate": 4.7263083465828255e-05,
      "loss": 2.8526,
      "step": 188675
    },
    {
      "epoch": 2.46,
      "grad_norm": 6.561884880065918,
      "learning_rate": 4.7260879639460594e-05,
      "loss": 3.0647,
      "step": 188676
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7739346027374268,
      "learning_rate": 4.725867586008184e-05,
      "loss": 2.7352,
      "step": 188677
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.995394706726074,
      "learning_rate": 4.72564721276925e-05,
      "loss": 2.7801,
      "step": 188678
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.63582706451416,
      "learning_rate": 4.7254268442292977e-05,
      "loss": 2.8924,
      "step": 188679
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.851245164871216,
      "learning_rate": 4.7252064803883594e-05,
      "loss": 2.8163,
      "step": 188680
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5073742866516113,
      "learning_rate": 4.724986121246489e-05,
      "loss": 2.9068,
      "step": 188681
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0193183422088623,
      "learning_rate": 4.72476576680372e-05,
      "loss": 2.908,
      "step": 188682
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.763336181640625,
      "learning_rate": 4.724545417060088e-05,
      "loss": 3.089,
      "step": 188683
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.393343448638916,
      "learning_rate": 4.724325072015648e-05,
      "loss": 2.7779,
      "step": 188684
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1266040802001953,
      "learning_rate": 4.724104731670432e-05,
      "loss": 2.943,
      "step": 188685
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.146780252456665,
      "learning_rate": 4.723884396024473e-05,
      "loss": 2.8318,
      "step": 188686
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.2482709884643555,
      "learning_rate": 4.723664065077829e-05,
      "loss": 2.9943,
      "step": 188687
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.095766544342041,
      "learning_rate": 4.723443738830536e-05,
      "loss": 2.8096,
      "step": 188688
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9450442790985107,
      "learning_rate": 4.72322341728262e-05,
      "loss": 3.1887,
      "step": 188689
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7147891521453857,
      "learning_rate": 4.723003100434145e-05,
      "loss": 2.7741,
      "step": 188690
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5332608222961426,
      "learning_rate": 4.722782788285131e-05,
      "loss": 2.9045,
      "step": 188691
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.316056728363037,
      "learning_rate": 4.7225624808356376e-05,
      "loss": 2.8418,
      "step": 188692
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6982500553131104,
      "learning_rate": 4.7223421780856984e-05,
      "loss": 2.9884,
      "step": 188693
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4721195697784424,
      "learning_rate": 4.72212188003535e-05,
      "loss": 3.0728,
      "step": 188694
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.348938465118408,
      "learning_rate": 4.721901586684632e-05,
      "loss": 2.9164,
      "step": 188695
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.836146831512451,
      "learning_rate": 4.721681298033595e-05,
      "loss": 2.8962,
      "step": 188696
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3246986865997314,
      "learning_rate": 4.7214610140822684e-05,
      "loss": 2.9837,
      "step": 188697
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.642366409301758,
      "learning_rate": 4.721240734830706e-05,
      "loss": 2.9553,
      "step": 188698
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.690107583999634,
      "learning_rate": 4.7210204602789406e-05,
      "loss": 2.9228,
      "step": 188699
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4041523933410645,
      "learning_rate": 4.720800190427016e-05,
      "loss": 2.8221,
      "step": 188700
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0634701251983643,
      "learning_rate": 4.720579925274964e-05,
      "loss": 2.6036,
      "step": 188701
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7533063888549805,
      "learning_rate": 4.7203596648228405e-05,
      "loss": 2.9389,
      "step": 188702
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5136001110076904,
      "learning_rate": 4.7201394090706734e-05,
      "loss": 2.9292,
      "step": 188703
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.578153610229492,
      "learning_rate": 4.719919158018517e-05,
      "loss": 2.73,
      "step": 188704
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.914135694503784,
      "learning_rate": 4.7196989116663974e-05,
      "loss": 2.7384,
      "step": 188705
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0102648735046387,
      "learning_rate": 4.719478670014375e-05,
      "loss": 2.9726,
      "step": 188706
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0035805702209473,
      "learning_rate": 4.719258433062466e-05,
      "loss": 2.8171,
      "step": 188707
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.301013469696045,
      "learning_rate": 4.7190382008107344e-05,
      "loss": 3.1744,
      "step": 188708
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8633029460906982,
      "learning_rate": 4.718817973259199e-05,
      "loss": 3.0854,
      "step": 188709
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.871049165725708,
      "learning_rate": 4.718597750407921e-05,
      "loss": 3.1981,
      "step": 188710
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3647820949554443,
      "learning_rate": 4.718377532256926e-05,
      "loss": 3.2063,
      "step": 188711
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.476177930831909,
      "learning_rate": 4.718157318806278e-05,
      "loss": 3.021,
      "step": 188712
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.864777088165283,
      "learning_rate": 4.717937110055984e-05,
      "loss": 2.9433,
      "step": 188713
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.947239637374878,
      "learning_rate": 4.717716906006113e-05,
      "loss": 3.0481,
      "step": 188714
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.5274856090545654,
      "learning_rate": 4.717496706656685e-05,
      "loss": 2.7711,
      "step": 188715
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.435154676437378,
      "learning_rate": 4.717276512007764e-05,
      "loss": 2.9097,
      "step": 188716
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2348697185516357,
      "learning_rate": 4.717056322059366e-05,
      "loss": 2.6599,
      "step": 188717
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.6185128688812256,
      "learning_rate": 4.7168361368115613e-05,
      "loss": 2.7664,
      "step": 188718
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.700535535812378,
      "learning_rate": 4.7166159562643604e-05,
      "loss": 3.0052,
      "step": 188719
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.853954792022705,
      "learning_rate": 4.7163957804178223e-05,
      "loss": 2.9452,
      "step": 188720
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.4410696029663086,
      "learning_rate": 4.7161756092719804e-05,
      "loss": 3.0364,
      "step": 188721
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.658099412918091,
      "learning_rate": 4.715955442826882e-05,
      "loss": 2.9576,
      "step": 188722
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1692168712615967,
      "learning_rate": 4.71573528108256e-05,
      "loss": 2.736,
      "step": 188723
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.3991637229919434,
      "learning_rate": 4.715515124039074e-05,
      "loss": 2.9304,
      "step": 188724
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1937623023986816,
      "learning_rate": 4.715294971696435e-05,
      "loss": 3.0791,
      "step": 188725
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4882161617279053,
      "learning_rate": 4.7150748240547085e-05,
      "loss": 3.0247,
      "step": 188726
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9784538745880127,
      "learning_rate": 4.714854681113922e-05,
      "loss": 2.6556,
      "step": 188727
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.5902867317199707,
      "learning_rate": 4.714634542874125e-05,
      "loss": 3.1043,
      "step": 188728
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.713425874710083,
      "learning_rate": 4.7144144093353475e-05,
      "loss": 3.0206,
      "step": 188729
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.730537176132202,
      "learning_rate": 4.7141942804976526e-05,
      "loss": 2.9031,
      "step": 188730
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.404466390609741,
      "learning_rate": 4.7139741563610543e-05,
      "loss": 2.9553,
      "step": 188731
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8268442153930664,
      "learning_rate": 4.713754036925612e-05,
      "loss": 3.1146,
      "step": 188732
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7209091186523438,
      "learning_rate": 4.7135339221913494e-05,
      "loss": 2.9616,
      "step": 188733
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6112911701202393,
      "learning_rate": 4.7133138121583294e-05,
      "loss": 3.02,
      "step": 188734
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.469658136367798,
      "learning_rate": 4.713093706826572e-05,
      "loss": 2.796,
      "step": 188735
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0899930000305176,
      "learning_rate": 4.7128736061961436e-05,
      "loss": 3.0182,
      "step": 188736
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.3936023712158203,
      "learning_rate": 4.712653510267055e-05,
      "loss": 2.771,
      "step": 188737
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8164281845092773,
      "learning_rate": 4.7124334190393686e-05,
      "loss": 2.9395,
      "step": 188738
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8113410472869873,
      "learning_rate": 4.712213332513109e-05,
      "loss": 3.1947,
      "step": 188739
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.243288040161133,
      "learning_rate": 4.711993250688334e-05,
      "loss": 3.2057,
      "step": 188740
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.823136329650879,
      "learning_rate": 4.711773173565072e-05,
      "loss": 2.8254,
      "step": 188741
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7127699851989746,
      "learning_rate": 4.7115531011433825e-05,
      "loss": 2.8947,
      "step": 188742
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8995320796966553,
      "learning_rate": 4.711333033423276e-05,
      "loss": 2.8022,
      "step": 188743
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7929482460021973,
      "learning_rate": 4.711112970404818e-05,
      "loss": 2.9284,
      "step": 188744
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9567184448242188,
      "learning_rate": 4.7108929120880355e-05,
      "loss": 3.1436,
      "step": 188745
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5796656608581543,
      "learning_rate": 4.7106728584729826e-05,
      "loss": 2.7408,
      "step": 188746
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7469899654388428,
      "learning_rate": 4.710452809559685e-05,
      "loss": 3.1296,
      "step": 188747
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.019310712814331,
      "learning_rate": 4.710232765348206e-05,
      "loss": 2.8503,
      "step": 188748
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3586041927337646,
      "learning_rate": 4.71001272583856e-05,
      "loss": 2.8858,
      "step": 188749
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4425642490386963,
      "learning_rate": 4.709792691030803e-05,
      "loss": 2.9604,
      "step": 188750
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8440842628479004,
      "learning_rate": 4.709572660924967e-05,
      "loss": 2.6868,
      "step": 188751
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.878791570663452,
      "learning_rate": 4.7093526355211076e-05,
      "loss": 2.8968,
      "step": 188752
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2165050506591797,
      "learning_rate": 4.70913261481925e-05,
      "loss": 2.6921,
      "step": 188753
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.110966205596924,
      "learning_rate": 4.7089125988194476e-05,
      "loss": 2.7913,
      "step": 188754
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.579848289489746,
      "learning_rate": 4.708692587521737e-05,
      "loss": 2.8624,
      "step": 188755
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8727097511291504,
      "learning_rate": 4.708472580926156e-05,
      "loss": 2.8074,
      "step": 188756
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.814077138900757,
      "learning_rate": 4.708252579032743e-05,
      "loss": 2.6913,
      "step": 188757
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.4434406757354736,
      "learning_rate": 4.708032581841549e-05,
      "loss": 3.1647,
      "step": 188758
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.220876455307007,
      "learning_rate": 4.707812589352603e-05,
      "loss": 3.1574,
      "step": 188759
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4480481147766113,
      "learning_rate": 4.707592601565956e-05,
      "loss": 2.7981,
      "step": 188760
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.712918519973755,
      "learning_rate": 4.707372618481647e-05,
      "loss": 3.0676,
      "step": 188761
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.5378620624542236,
      "learning_rate": 4.707152640099717e-05,
      "loss": 2.7627,
      "step": 188762
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6315863132476807,
      "learning_rate": 4.7069326664201954e-05,
      "loss": 2.5736,
      "step": 188763
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9051852226257324,
      "learning_rate": 4.706712697443139e-05,
      "loss": 3.1677,
      "step": 188764
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.290183067321777,
      "learning_rate": 4.706492733168574e-05,
      "loss": 2.8739,
      "step": 188765
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.793027877807617,
      "learning_rate": 4.706272773596561e-05,
      "loss": 2.9116,
      "step": 188766
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6507203578948975,
      "learning_rate": 4.7060528187271264e-05,
      "loss": 2.8676,
      "step": 188767
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.013960123062134,
      "learning_rate": 4.705832868560306e-05,
      "loss": 2.8652,
      "step": 188768
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7218971252441406,
      "learning_rate": 4.705612923096158e-05,
      "loss": 2.9325,
      "step": 188769
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.830354690551758,
      "learning_rate": 4.705392982334715e-05,
      "loss": 2.9798,
      "step": 188770
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3431754112243652,
      "learning_rate": 4.705173046276006e-05,
      "loss": 2.8181,
      "step": 188771
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1446173191070557,
      "learning_rate": 4.7049531149200926e-05,
      "loss": 2.9762,
      "step": 188772
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1423165798187256,
      "learning_rate": 4.704733188267003e-05,
      "loss": 3.1018,
      "step": 188773
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.039323568344116,
      "learning_rate": 4.704513266316776e-05,
      "loss": 2.9277,
      "step": 188774
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.238515615463257,
      "learning_rate": 4.704293349069467e-05,
      "loss": 3.1577,
      "step": 188775
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.71714186668396,
      "learning_rate": 4.7040734365250986e-05,
      "loss": 3.0399,
      "step": 188776
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.636596441268921,
      "learning_rate": 4.7038535286837255e-05,
      "loss": 3.0099,
      "step": 188777
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7851645946502686,
      "learning_rate": 4.7036336255453835e-05,
      "loss": 3.0796,
      "step": 188778
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9105682373046875,
      "learning_rate": 4.703413727110116e-05,
      "loss": 2.7761,
      "step": 188779
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9983997344970703,
      "learning_rate": 4.703193833377953e-05,
      "loss": 2.7892,
      "step": 188780
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.924436092376709,
      "learning_rate": 4.702973944348951e-05,
      "loss": 3.0254,
      "step": 188781
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.145042657852173,
      "learning_rate": 4.702754060023137e-05,
      "loss": 2.7494,
      "step": 188782
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5131547451019287,
      "learning_rate": 4.702534180400564e-05,
      "loss": 2.8364,
      "step": 188783
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.8036415576934814,
      "learning_rate": 4.702314305481269e-05,
      "loss": 2.9762,
      "step": 188784
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.917158603668213,
      "learning_rate": 4.702094435265292e-05,
      "loss": 2.8435,
      "step": 188785
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7285287380218506,
      "learning_rate": 4.701874569752666e-05,
      "loss": 2.8255,
      "step": 188786
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9223709106445312,
      "learning_rate": 4.701654708943444e-05,
      "loss": 2.9349,
      "step": 188787
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8250622749328613,
      "learning_rate": 4.7014348528376563e-05,
      "loss": 2.8839,
      "step": 188788
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9201812744140625,
      "learning_rate": 4.701215001435357e-05,
      "loss": 2.837,
      "step": 188789
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.620821475982666,
      "learning_rate": 4.700995154736571e-05,
      "loss": 2.7055,
      "step": 188790
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0928196907043457,
      "learning_rate": 4.700775312741363e-05,
      "loss": 2.8506,
      "step": 188791
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.407484531402588,
      "learning_rate": 4.700555475449742e-05,
      "loss": 2.9566,
      "step": 188792
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.605578660964966,
      "learning_rate": 4.700335642861776e-05,
      "loss": 2.8873,
      "step": 188793
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1043734550476074,
      "learning_rate": 4.700115814977483e-05,
      "loss": 2.8259,
      "step": 188794
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.317744731903076,
      "learning_rate": 4.699895991796928e-05,
      "loss": 2.9041,
      "step": 188795
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2120676040649414,
      "learning_rate": 4.6996761733201305e-05,
      "loss": 2.9801,
      "step": 188796
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.97820782661438,
      "learning_rate": 4.6994563595471566e-05,
      "loss": 2.8466,
      "step": 188797
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.960681676864624,
      "learning_rate": 4.6992365504780135e-05,
      "loss": 3.1854,
      "step": 188798
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0771498680114746,
      "learning_rate": 4.699016746112772e-05,
      "loss": 2.8754,
      "step": 188799
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.749636173248291,
      "learning_rate": 4.6987969464514495e-05,
      "loss": 2.8139,
      "step": 188800
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.9528839588165283,
      "learning_rate": 4.6985771514941084e-05,
      "loss": 2.6957,
      "step": 188801
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.3955557346343994,
      "learning_rate": 4.698357361240771e-05,
      "loss": 2.9692,
      "step": 188802
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.633053779602051,
      "learning_rate": 4.698137575691501e-05,
      "loss": 2.6976,
      "step": 188803
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.629481792449951,
      "learning_rate": 4.6979177948463106e-05,
      "loss": 2.8244,
      "step": 188804
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.81048321723938,
      "learning_rate": 4.6976980187052616e-05,
      "loss": 2.6221,
      "step": 188805
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1118202209472656,
      "learning_rate": 4.697478247268379e-05,
      "loss": 3.0494,
      "step": 188806
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.100184440612793,
      "learning_rate": 4.697258480535724e-05,
      "loss": 2.7867,
      "step": 188807
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.4048044681549072,
      "learning_rate": 4.697038718507319e-05,
      "loss": 2.9167,
      "step": 188808
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.739495038986206,
      "learning_rate": 4.696818961183224e-05,
      "loss": 2.9706,
      "step": 188809
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7587645053863525,
      "learning_rate": 4.696599208563453e-05,
      "loss": 2.8469,
      "step": 188810
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1177594661712646,
      "learning_rate": 4.696379460648072e-05,
      "loss": 2.7621,
      "step": 188811
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3492937088012695,
      "learning_rate": 4.6961597174371015e-05,
      "loss": 3.0338,
      "step": 188812
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.569768190383911,
      "learning_rate": 4.6959399789306006e-05,
      "loss": 2.6982,
      "step": 188813
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3267669677734375,
      "learning_rate": 4.695720245128597e-05,
      "loss": 2.7975,
      "step": 188814
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.830517292022705,
      "learning_rate": 4.6955005160311496e-05,
      "loss": 3.0149,
      "step": 188815
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0737462043762207,
      "learning_rate": 4.695280791638273e-05,
      "loss": 2.9222,
      "step": 188816
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.960965871810913,
      "learning_rate": 4.6950610719500256e-05,
      "loss": 3.0112,
      "step": 188817
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.621915340423584,
      "learning_rate": 4.694841356966439e-05,
      "loss": 3.0226,
      "step": 188818
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9696993827819824,
      "learning_rate": 4.6946216466875655e-05,
      "loss": 3.0562,
      "step": 188819
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.001213550567627,
      "learning_rate": 4.694401941113435e-05,
      "loss": 3.0639,
      "step": 188820
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.589991092681885,
      "learning_rate": 4.6941822402441046e-05,
      "loss": 2.996,
      "step": 188821
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.4147915840148926,
      "learning_rate": 4.6939625440795867e-05,
      "loss": 2.643,
      "step": 188822
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3671507835388184,
      "learning_rate": 4.693742852619949e-05,
      "loss": 2.8895,
      "step": 188823
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.301379919052124,
      "learning_rate": 4.693523165865215e-05,
      "loss": 2.8773,
      "step": 188824
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.883826494216919,
      "learning_rate": 4.69330348381544e-05,
      "loss": 3.0432,
      "step": 188825
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3149189949035645,
      "learning_rate": 4.693083806470652e-05,
      "loss": 2.8747,
      "step": 188826
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.355461359024048,
      "learning_rate": 4.69286413383091e-05,
      "loss": 2.8623,
      "step": 188827
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6570611000061035,
      "learning_rate": 4.6926444658962284e-05,
      "loss": 2.8814,
      "step": 188828
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6939282417297363,
      "learning_rate": 4.6924248026666656e-05,
      "loss": 2.8806,
      "step": 188829
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.54677677154541,
      "learning_rate": 4.692205144142256e-05,
      "loss": 2.9757,
      "step": 188830
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.786611557006836,
      "learning_rate": 4.691985490323049e-05,
      "loss": 3.0752,
      "step": 188831
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.894834518432617,
      "learning_rate": 4.691765841209072e-05,
      "loss": 2.9666,
      "step": 188832
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.3010590076446533,
      "learning_rate": 4.691546196800388e-05,
      "loss": 3.0936,
      "step": 188833
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6471505165100098,
      "learning_rate": 4.691326557097009e-05,
      "loss": 3.1308,
      "step": 188834
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.68194055557251,
      "learning_rate": 4.6911069220989975e-05,
      "loss": 2.8635,
      "step": 188835
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.040144205093384,
      "learning_rate": 4.690887291806381e-05,
      "loss": 2.8187,
      "step": 188836
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7758371829986572,
      "learning_rate": 4.6906676662192113e-05,
      "loss": 2.975,
      "step": 188837
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.220010995864868,
      "learning_rate": 4.690448045337517e-05,
      "loss": 2.8826,
      "step": 188838
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.761190891265869,
      "learning_rate": 4.690228429161356e-05,
      "loss": 3.0869,
      "step": 188839
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.055332660675049,
      "learning_rate": 4.690008817690757e-05,
      "loss": 2.9889,
      "step": 188840
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.619802951812744,
      "learning_rate": 4.6897892109257637e-05,
      "loss": 2.8599,
      "step": 188841
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6312320232391357,
      "learning_rate": 4.689569608866407e-05,
      "loss": 2.7941,
      "step": 188842
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7143585681915283,
      "learning_rate": 4.689350011512746e-05,
      "loss": 2.7769,
      "step": 188843
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9662225246429443,
      "learning_rate": 4.689130418864807e-05,
      "loss": 2.8983,
      "step": 188844
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0279736518859863,
      "learning_rate": 4.688910830922644e-05,
      "loss": 2.9493,
      "step": 188845
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.5307090282440186,
      "learning_rate": 4.688691247686287e-05,
      "loss": 2.8619,
      "step": 188846
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1404316425323486,
      "learning_rate": 4.688471669155782e-05,
      "loss": 2.6336,
      "step": 188847
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2681500911712646,
      "learning_rate": 4.68825209533116e-05,
      "loss": 2.887,
      "step": 188848
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7214772701263428,
      "learning_rate": 4.688032526212476e-05,
      "loss": 2.8253,
      "step": 188849
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.314680337905884,
      "learning_rate": 4.687812961799762e-05,
      "loss": 2.8948,
      "step": 188850
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.634115219116211,
      "learning_rate": 4.687593402093063e-05,
      "loss": 2.8589,
      "step": 188851
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.6235878467559814,
      "learning_rate": 4.6873738470924225e-05,
      "loss": 2.9388,
      "step": 188852
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.197349548339844,
      "learning_rate": 4.687154296797868e-05,
      "loss": 2.9571,
      "step": 188853
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.697618246078491,
      "learning_rate": 4.6869347512094555e-05,
      "loss": 2.9767,
      "step": 188854
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.035508871078491,
      "learning_rate": 4.686715210327219e-05,
      "loss": 3.0214,
      "step": 188855
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.554452896118164,
      "learning_rate": 4.686495674151194e-05,
      "loss": 3.0016,
      "step": 188856
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5426840782165527,
      "learning_rate": 4.6862761426814374e-05,
      "loss": 2.9219,
      "step": 188857
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0068299770355225,
      "learning_rate": 4.686056615917977e-05,
      "loss": 2.8772,
      "step": 188858
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0542173385620117,
      "learning_rate": 4.685837093860848e-05,
      "loss": 3.0042,
      "step": 188859
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1695735454559326,
      "learning_rate": 4.6856175765101075e-05,
      "loss": 2.9578,
      "step": 188860
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5706536769866943,
      "learning_rate": 4.685398063865793e-05,
      "loss": 2.8206,
      "step": 188861
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.879117250442505,
      "learning_rate": 4.68517855592793e-05,
      "loss": 2.793,
      "step": 188862
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5365560054779053,
      "learning_rate": 4.684959052696575e-05,
      "loss": 3.0389,
      "step": 188863
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6573431491851807,
      "learning_rate": 4.684739554171769e-05,
      "loss": 3.0129,
      "step": 188864
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.84649395942688,
      "learning_rate": 4.684520060353538e-05,
      "loss": 3.1495,
      "step": 188865
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1555206775665283,
      "learning_rate": 4.684300571241939e-05,
      "loss": 2.9548,
      "step": 188866
    },
    {
      "epoch": 2.46,
      "grad_norm": 5.017740726470947,
      "learning_rate": 4.684081086836998e-05,
      "loss": 2.7913,
      "step": 188867
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7784838676452637,
      "learning_rate": 4.683861607138776e-05,
      "loss": 3.0086,
      "step": 188868
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.7380270957946777,
      "learning_rate": 4.6836421321472987e-05,
      "loss": 2.9478,
      "step": 188869
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4277563095092773,
      "learning_rate": 4.6834226618626126e-05,
      "loss": 3.0099,
      "step": 188870
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.006323337554932,
      "learning_rate": 4.683203196284745e-05,
      "loss": 2.6616,
      "step": 188871
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.935225009918213,
      "learning_rate": 4.682983735413759e-05,
      "loss": 3.0651,
      "step": 188872
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1956636905670166,
      "learning_rate": 4.6827642792496745e-05,
      "loss": 2.9303,
      "step": 188873
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.677919626235962,
      "learning_rate": 4.6825448277925515e-05,
      "loss": 3.0961,
      "step": 188874
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.553785562515259,
      "learning_rate": 4.68232538104241e-05,
      "loss": 2.8793,
      "step": 188875
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.028738260269165,
      "learning_rate": 4.68210593899932e-05,
      "loss": 3.1262,
      "step": 188876
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.506582260131836,
      "learning_rate": 4.681886501663291e-05,
      "loss": 3.1015,
      "step": 188877
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.741868495941162,
      "learning_rate": 4.6816670690343805e-05,
      "loss": 2.9303,
      "step": 188878
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.566375255584717,
      "learning_rate": 4.681447641112621e-05,
      "loss": 3.1447,
      "step": 188879
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.352430820465088,
      "learning_rate": 4.681228217898067e-05,
      "loss": 3.0305,
      "step": 188880
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6220462322235107,
      "learning_rate": 4.6810087993907395e-05,
      "loss": 2.7868,
      "step": 188881
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5782604217529297,
      "learning_rate": 4.680789385590708e-05,
      "loss": 2.8785,
      "step": 188882
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0927627086639404,
      "learning_rate": 4.68056997649798e-05,
      "loss": 2.9669,
      "step": 188883
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2277724742889404,
      "learning_rate": 4.68035057211262e-05,
      "loss": 2.6947,
      "step": 188884
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.7467570304870605,
      "learning_rate": 4.6801311724346546e-05,
      "loss": 2.7178,
      "step": 188885
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.5715551376342773,
      "learning_rate": 4.679911777464137e-05,
      "loss": 3.0858,
      "step": 188886
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6270947456359863,
      "learning_rate": 4.679692387201094e-05,
      "loss": 3.021,
      "step": 188887
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4305105209350586,
      "learning_rate": 4.679473001645588e-05,
      "loss": 2.7735,
      "step": 188888
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8067972660064697,
      "learning_rate": 4.6792536207976304e-05,
      "loss": 2.7825,
      "step": 188889
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.415027141571045,
      "learning_rate": 4.679034244657287e-05,
      "loss": 2.794,
      "step": 188890
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4525766372680664,
      "learning_rate": 4.678814873224581e-05,
      "loss": 3.1847,
      "step": 188891
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7456884384155273,
      "learning_rate": 4.67859550649957e-05,
      "loss": 3.0159,
      "step": 188892
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.884937286376953,
      "learning_rate": 4.6783761444822785e-05,
      "loss": 2.9922,
      "step": 188893
    },
    {
      "epoch": 2.46,
      "grad_norm": 5.670821666717529,
      "learning_rate": 4.678156787172769e-05,
      "loss": 2.8615,
      "step": 188894
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4126267433166504,
      "learning_rate": 4.677937434571053e-05,
      "loss": 2.7189,
      "step": 188895
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7407846450805664,
      "learning_rate": 4.677718086677195e-05,
      "loss": 3.0921,
      "step": 188896
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.695282220840454,
      "learning_rate": 4.677498743491217e-05,
      "loss": 2.8384,
      "step": 188897
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4786384105682373,
      "learning_rate": 4.67727940501318e-05,
      "loss": 3.0377,
      "step": 188898
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9522616863250732,
      "learning_rate": 4.677060071243107e-05,
      "loss": 3.026,
      "step": 188899
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.6744790077209473,
      "learning_rate": 4.676840742181052e-05,
      "loss": 2.8485,
      "step": 188900
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.252930164337158,
      "learning_rate": 4.6766214178270536e-05,
      "loss": 2.9911,
      "step": 188901
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4440627098083496,
      "learning_rate": 4.676402098181146e-05,
      "loss": 2.8728,
      "step": 188902
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3127384185791016,
      "learning_rate": 4.676182783243369e-05,
      "loss": 3.2431,
      "step": 188903
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.961527109146118,
      "learning_rate": 4.6759634730137716e-05,
      "loss": 3.1572,
      "step": 188904
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.428342580795288,
      "learning_rate": 4.6757441674923855e-05,
      "loss": 2.774,
      "step": 188905
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.284348487854004,
      "learning_rate": 4.675524866679263e-05,
      "loss": 2.8173,
      "step": 188906
    },
    {
      "epoch": 2.46,
      "grad_norm": 5.1840996742248535,
      "learning_rate": 4.675305570574438e-05,
      "loss": 2.9359,
      "step": 188907
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.21929669380188,
      "learning_rate": 4.6750862791779534e-05,
      "loss": 3.0026,
      "step": 188908
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.04379415512085,
      "learning_rate": 4.674866992489842e-05,
      "loss": 2.9531,
      "step": 188909
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9833829402923584,
      "learning_rate": 4.674647710510154e-05,
      "loss": 2.8265,
      "step": 188910
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4541847705841064,
      "learning_rate": 4.6744284332389236e-05,
      "loss": 2.819,
      "step": 188911
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9359934329986572,
      "learning_rate": 4.6742091606762e-05,
      "loss": 3.0746,
      "step": 188912
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.273831844329834,
      "learning_rate": 4.67398989282202e-05,
      "loss": 2.8354,
      "step": 188913
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.151859998703003,
      "learning_rate": 4.6737706296764235e-05,
      "loss": 3.1189,
      "step": 188914
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.461916208267212,
      "learning_rate": 4.673551371239443e-05,
      "loss": 2.9203,
      "step": 188915
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.602717161178589,
      "learning_rate": 4.673332117511137e-05,
      "loss": 3.0326,
      "step": 188916
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.51204252243042,
      "learning_rate": 4.673112868491527e-05,
      "loss": 2.9912,
      "step": 188917
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3624894618988037,
      "learning_rate": 4.6728936241806715e-05,
      "loss": 2.7003,
      "step": 188918
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0141282081604004,
      "learning_rate": 4.672674384578602e-05,
      "loss": 3.0338,
      "step": 188919
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6064298152923584,
      "learning_rate": 4.6724551496853625e-05,
      "loss": 3.1614,
      "step": 188920
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.131629705429077,
      "learning_rate": 4.672235919500983e-05,
      "loss": 3.0058,
      "step": 188921
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.8534951210021973,
      "learning_rate": 4.67201669402552e-05,
      "loss": 2.7926,
      "step": 188922
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7669949531555176,
      "learning_rate": 4.6717974732590005e-05,
      "loss": 2.7965,
      "step": 188923
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8377304077148438,
      "learning_rate": 4.67157825720148e-05,
      "loss": 3.0023,
      "step": 188924
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3832554817199707,
      "learning_rate": 4.67135904585299e-05,
      "loss": 2.7295,
      "step": 188925
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.969841957092285,
      "learning_rate": 4.6711398392135735e-05,
      "loss": 3.0996,
      "step": 188926
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6232168674468994,
      "learning_rate": 4.67092063728326e-05,
      "loss": 3.1061,
      "step": 188927
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6034021377563477,
      "learning_rate": 4.670701440062112e-05,
      "loss": 2.7451,
      "step": 188928
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0288965702056885,
      "learning_rate": 4.670482247550148e-05,
      "loss": 2.9501,
      "step": 188929
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7293307781219482,
      "learning_rate": 4.67026305974743e-05,
      "loss": 2.9627,
      "step": 188930
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.930098533630371,
      "learning_rate": 4.670043876653985e-05,
      "loss": 2.7409,
      "step": 188931
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.530712842941284,
      "learning_rate": 4.6698246982698595e-05,
      "loss": 2.8729,
      "step": 188932
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8985071182250977,
      "learning_rate": 4.6696055245950834e-05,
      "loss": 2.809,
      "step": 188933
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.4996747970581055,
      "learning_rate": 4.66938635562971e-05,
      "loss": 2.9404,
      "step": 188934
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8186392784118652,
      "learning_rate": 4.669167191373774e-05,
      "loss": 3.0354,
      "step": 188935
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6409618854522705,
      "learning_rate": 4.668948031827323e-05,
      "loss": 2.8923,
      "step": 188936
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5630197525024414,
      "learning_rate": 4.668728876990392e-05,
      "loss": 2.7207,
      "step": 188937
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7762320041656494,
      "learning_rate": 4.6685097268630167e-05,
      "loss": 2.7207,
      "step": 188938
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8893489837646484,
      "learning_rate": 4.6682905814452476e-05,
      "loss": 2.9427,
      "step": 188939
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1055850982666016,
      "learning_rate": 4.6680714407371246e-05,
      "loss": 3.1223,
      "step": 188940
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.326251983642578,
      "learning_rate": 4.667852304738677e-05,
      "loss": 3.0546,
      "step": 188941
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.932974338531494,
      "learning_rate": 4.667633173449963e-05,
      "loss": 2.8031,
      "step": 188942
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.743110418319702,
      "learning_rate": 4.667414046871011e-05,
      "loss": 2.9368,
      "step": 188943
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.056854248046875,
      "learning_rate": 4.667194925001858e-05,
      "loss": 2.9495,
      "step": 188944
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.267638683319092,
      "learning_rate": 4.666975807842561e-05,
      "loss": 2.8881,
      "step": 188945
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6426339149475098,
      "learning_rate": 4.66675669539315e-05,
      "loss": 3.0003,
      "step": 188946
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0171358585357666,
      "learning_rate": 4.6665375876536616e-05,
      "loss": 2.9196,
      "step": 188947
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.720339059829712,
      "learning_rate": 4.666318484624149e-05,
      "loss": 2.8199,
      "step": 188948
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.679612159729004,
      "learning_rate": 4.666099386304645e-05,
      "loss": 2.9478,
      "step": 188949
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.602360963821411,
      "learning_rate": 4.6658802926951834e-05,
      "loss": 3.2419,
      "step": 188950
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.194713830947876,
      "learning_rate": 4.6656612037958205e-05,
      "loss": 2.7302,
      "step": 188951
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.982682704925537,
      "learning_rate": 4.6654421196065836e-05,
      "loss": 3.0643,
      "step": 188952
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8037898540496826,
      "learning_rate": 4.665223040127526e-05,
      "loss": 2.9182,
      "step": 188953
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1206536293029785,
      "learning_rate": 4.66500396535868e-05,
      "loss": 2.7919,
      "step": 188954
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9485018253326416,
      "learning_rate": 4.66478489530009e-05,
      "loss": 2.9619,
      "step": 188955
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.440962553024292,
      "learning_rate": 4.6645658299517855e-05,
      "loss": 2.9557,
      "step": 188956
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8231399059295654,
      "learning_rate": 4.664346769313826e-05,
      "loss": 2.8839,
      "step": 188957
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.267584800720215,
      "learning_rate": 4.664127713386232e-05,
      "loss": 2.907,
      "step": 188958
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.371948003768921,
      "learning_rate": 4.6639086621690633e-05,
      "loss": 2.7745,
      "step": 188959
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7139949798583984,
      "learning_rate": 4.663689615662347e-05,
      "loss": 3.0492,
      "step": 188960
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.594426155090332,
      "learning_rate": 4.663470573866143e-05,
      "loss": 2.9379,
      "step": 188961
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.969374179840088,
      "learning_rate": 4.6632515367804635e-05,
      "loss": 3.0485,
      "step": 188962
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8549306392669678,
      "learning_rate": 4.66303250440537e-05,
      "loss": 2.9521,
      "step": 188963
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5545785427093506,
      "learning_rate": 4.662813476740891e-05,
      "loss": 2.6891,
      "step": 188964
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.655636787414551,
      "learning_rate": 4.6625944537870804e-05,
      "loss": 2.9915,
      "step": 188965
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.939927577972412,
      "learning_rate": 4.662375435543962e-05,
      "loss": 2.8018,
      "step": 188966
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.442106246948242,
      "learning_rate": 4.6621564220115995e-05,
      "loss": 3.0835,
      "step": 188967
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.041598081588745,
      "learning_rate": 4.6619374131900145e-05,
      "loss": 2.7758,
      "step": 188968
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9592182636260986,
      "learning_rate": 4.661718409079255e-05,
      "loss": 2.8822,
      "step": 188969
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5034356117248535,
      "learning_rate": 4.6614994096793535e-05,
      "loss": 3.1492,
      "step": 188970
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.4817607402801514,
      "learning_rate": 4.661280414990367e-05,
      "loss": 2.9371,
      "step": 188971
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.680861711502075,
      "learning_rate": 4.661061425012316e-05,
      "loss": 2.8509,
      "step": 188972
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8766326904296875,
      "learning_rate": 4.6608424397452624e-05,
      "loss": 2.8764,
      "step": 188973
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9876797199249268,
      "learning_rate": 4.660623459189235e-05,
      "loss": 3.0238,
      "step": 188974
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5075924396514893,
      "learning_rate": 4.660404483344274e-05,
      "loss": 2.8711,
      "step": 188975
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.285609483718872,
      "learning_rate": 4.660185512210419e-05,
      "loss": 2.7578,
      "step": 188976
    },
    {
      "epoch": 2.46,
      "grad_norm": 5.575250625610352,
      "learning_rate": 4.659966545787719e-05,
      "loss": 2.9152,
      "step": 188977
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4948067665100098,
      "learning_rate": 4.659747584076203e-05,
      "loss": 2.8855,
      "step": 188978
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.4578356742858887,
      "learning_rate": 4.6595286270759224e-05,
      "loss": 2.9246,
      "step": 188979
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.86674427986145,
      "learning_rate": 4.659309674786916e-05,
      "loss": 2.798,
      "step": 188980
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.673419952392578,
      "learning_rate": 4.6590907272092245e-05,
      "loss": 3.0302,
      "step": 188981
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0624289512634277,
      "learning_rate": 4.658871784342878e-05,
      "loss": 3.1558,
      "step": 188982
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7813405990600586,
      "learning_rate": 4.658652846187929e-05,
      "loss": 2.8051,
      "step": 188983
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0919950008392334,
      "learning_rate": 4.658433912744412e-05,
      "loss": 3.1557,
      "step": 188984
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.846676826477051,
      "learning_rate": 4.658214984012375e-05,
      "loss": 3.1526,
      "step": 188985
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9761338233947754,
      "learning_rate": 4.657996059991856e-05,
      "loss": 3.1341,
      "step": 188986
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8550755977630615,
      "learning_rate": 4.657777140682892e-05,
      "loss": 2.8583,
      "step": 188987
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3015732765197754,
      "learning_rate": 4.657558226085523e-05,
      "loss": 3.1185,
      "step": 188988
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9370038509368896,
      "learning_rate": 4.657339316199794e-05,
      "loss": 2.9002,
      "step": 188989
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2591123580932617,
      "learning_rate": 4.65712041102574e-05,
      "loss": 2.8288,
      "step": 188990
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.8157460689544678,
      "learning_rate": 4.65690151056341e-05,
      "loss": 2.7554,
      "step": 188991
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.5858070850372314,
      "learning_rate": 4.6566826148128445e-05,
      "loss": 2.7735,
      "step": 188992
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.656312942504883,
      "learning_rate": 4.656463723774077e-05,
      "loss": 2.8584,
      "step": 188993
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.8151211738586426,
      "learning_rate": 4.656244837447146e-05,
      "loss": 2.949,
      "step": 188994
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.825561761856079,
      "learning_rate": 4.656025955832103e-05,
      "loss": 2.8667,
      "step": 188995
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2569968700408936,
      "learning_rate": 4.6558070789289744e-05,
      "loss": 2.7965,
      "step": 188996
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7278099060058594,
      "learning_rate": 4.65558820673782e-05,
      "loss": 2.8029,
      "step": 188997
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.685072898864746,
      "learning_rate": 4.65536933925867e-05,
      "loss": 3.0708,
      "step": 188998
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6876161098480225,
      "learning_rate": 4.655150476491564e-05,
      "loss": 3.1374,
      "step": 188999
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.849867820739746,
      "learning_rate": 4.654931618436538e-05,
      "loss": 3.0255,
      "step": 189000
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.452563762664795,
      "learning_rate": 4.654712765093643e-05,
      "loss": 3.0304,
      "step": 189001
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9048373699188232,
      "learning_rate": 4.654493916462912e-05,
      "loss": 2.8334,
      "step": 189002
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.140718460083008,
      "learning_rate": 4.654275072544396e-05,
      "loss": 2.9479,
      "step": 189003
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5816805362701416,
      "learning_rate": 4.654056233338126e-05,
      "loss": 2.5054,
      "step": 189004
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.037461996078491,
      "learning_rate": 4.653837398844147e-05,
      "loss": 2.6437,
      "step": 189005
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.198457717895508,
      "learning_rate": 4.653618569062488e-05,
      "loss": 2.8823,
      "step": 189006
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.638331890106201,
      "learning_rate": 4.6533997439932105e-05,
      "loss": 2.9233,
      "step": 189007
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.684798240661621,
      "learning_rate": 4.6531809236363357e-05,
      "loss": 2.8486,
      "step": 189008
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.954160690307617,
      "learning_rate": 4.652962107991922e-05,
      "loss": 2.8927,
      "step": 189009
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7592763900756836,
      "learning_rate": 4.652743297059999e-05,
      "loss": 3.0348,
      "step": 189010
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8169214725494385,
      "learning_rate": 4.652524490840609e-05,
      "loss": 2.8164,
      "step": 189011
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5741677284240723,
      "learning_rate": 4.6523056893337895e-05,
      "loss": 2.8677,
      "step": 189012
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.5464751720428467,
      "learning_rate": 4.652086892539587e-05,
      "loss": 2.7505,
      "step": 189013
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8058929443359375,
      "learning_rate": 4.651868100458038e-05,
      "loss": 2.741,
      "step": 189014
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.131605625152588,
      "learning_rate": 4.6516493130891895e-05,
      "loss": 2.719,
      "step": 189015
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4016544818878174,
      "learning_rate": 4.6514305304330776e-05,
      "loss": 2.9841,
      "step": 189016
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.723104953765869,
      "learning_rate": 4.651211752489743e-05,
      "loss": 2.9979,
      "step": 189017
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.116790533065796,
      "learning_rate": 4.6509929792592215e-05,
      "loss": 2.6735,
      "step": 189018
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4213030338287354,
      "learning_rate": 4.650774210741567e-05,
      "loss": 2.8699,
      "step": 189019
    },
    {
      "epoch": 2.46,
      "grad_norm": 5.272013187408447,
      "learning_rate": 4.6505554469368025e-05,
      "loss": 2.9378,
      "step": 189020
    },
    {
      "epoch": 2.46,
      "grad_norm": 5.238572120666504,
      "learning_rate": 4.6503366878449846e-05,
      "loss": 2.8894,
      "step": 189021
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.796745538711548,
      "learning_rate": 4.6501179334661495e-05,
      "loss": 2.9365,
      "step": 189022
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.407513380050659,
      "learning_rate": 4.649899183800335e-05,
      "loss": 2.8069,
      "step": 189023
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5613386631011963,
      "learning_rate": 4.6496804388475774e-05,
      "loss": 3.0155,
      "step": 189024
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9455618858337402,
      "learning_rate": 4.649461698607926e-05,
      "loss": 2.7558,
      "step": 189025
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.465294361114502,
      "learning_rate": 4.649242963081414e-05,
      "loss": 2.9704,
      "step": 189026
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.8006772994995117,
      "learning_rate": 4.649024232268093e-05,
      "loss": 2.9572,
      "step": 189027
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7911341190338135,
      "learning_rate": 4.6488055061679973e-05,
      "loss": 3.1162,
      "step": 189028
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0516865253448486,
      "learning_rate": 4.6485867847811585e-05,
      "loss": 3.0956,
      "step": 189029
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.762446403503418,
      "learning_rate": 4.6483680681076366e-05,
      "loss": 2.9329,
      "step": 189030
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.791963577270508,
      "learning_rate": 4.648149356147457e-05,
      "loss": 3.0331,
      "step": 189031
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.444889545440674,
      "learning_rate": 4.6479306489006605e-05,
      "loss": 2.6872,
      "step": 189032
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.964186668395996,
      "learning_rate": 4.6477119463673006e-05,
      "loss": 2.832,
      "step": 189033
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.362769842147827,
      "learning_rate": 4.6474932485474005e-05,
      "loss": 2.8744,
      "step": 189034
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9620521068573,
      "learning_rate": 4.6472745554410165e-05,
      "loss": 2.8737,
      "step": 189035
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5829524993896484,
      "learning_rate": 4.647055867048185e-05,
      "loss": 3.0098,
      "step": 189036
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3171932697296143,
      "learning_rate": 4.646837183368933e-05,
      "loss": 3.0615,
      "step": 189037
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.559446334838867,
      "learning_rate": 4.646618504403324e-05,
      "loss": 2.9065,
      "step": 189038
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.831523895263672,
      "learning_rate": 4.646399830151385e-05,
      "loss": 2.8115,
      "step": 189039
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.220465660095215,
      "learning_rate": 4.646181160613155e-05,
      "loss": 2.9035,
      "step": 189040
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3135452270507812,
      "learning_rate": 4.645962495788681e-05,
      "loss": 2.9838,
      "step": 189041
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.017766237258911,
      "learning_rate": 4.645743835678003e-05,
      "loss": 2.7747,
      "step": 189042
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.736830472946167,
      "learning_rate": 4.645525180281154e-05,
      "loss": 2.8539,
      "step": 189043
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.3855581283569336,
      "learning_rate": 4.645306529598185e-05,
      "loss": 2.9774,
      "step": 189044
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.446042060852051,
      "learning_rate": 4.645087883629135e-05,
      "loss": 2.9945,
      "step": 189045
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.689626693725586,
      "learning_rate": 4.644869242374034e-05,
      "loss": 2.8901,
      "step": 189046
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.345893144607544,
      "learning_rate": 4.644650605832936e-05,
      "loss": 3.0468,
      "step": 189047
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1239941120147705,
      "learning_rate": 4.644431974005877e-05,
      "loss": 2.8781,
      "step": 189048
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.979262113571167,
      "learning_rate": 4.64421334689289e-05,
      "loss": 2.7831,
      "step": 189049
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.320956707000732,
      "learning_rate": 4.643994724494029e-05,
      "loss": 2.9469,
      "step": 189050
    },
    {
      "epoch": 2.46,
      "grad_norm": 6.3692450523376465,
      "learning_rate": 4.643776106809321e-05,
      "loss": 3.0332,
      "step": 189051
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.031730890274048,
      "learning_rate": 4.643557493838821e-05,
      "loss": 2.7067,
      "step": 189052
    },
    {
      "epoch": 2.46,
      "grad_norm": 5.021335124969482,
      "learning_rate": 4.643338885582564e-05,
      "loss": 2.8441,
      "step": 189053
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0394270420074463,
      "learning_rate": 4.643120282040585e-05,
      "loss": 3.1304,
      "step": 189054
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.307962417602539,
      "learning_rate": 4.642901683212923e-05,
      "loss": 3.0418,
      "step": 189055
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2728583812713623,
      "learning_rate": 4.6426830890996325e-05,
      "loss": 3.0232,
      "step": 189056
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5916574001312256,
      "learning_rate": 4.642464499700738e-05,
      "loss": 2.9061,
      "step": 189057
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5055911540985107,
      "learning_rate": 4.642245915016295e-05,
      "loss": 2.9185,
      "step": 189058
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7458677291870117,
      "learning_rate": 4.642027335046338e-05,
      "loss": 2.9611,
      "step": 189059
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.419440269470215,
      "learning_rate": 4.641808759790906e-05,
      "loss": 2.9345,
      "step": 189060
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.8940651416778564,
      "learning_rate": 4.641590189250033e-05,
      "loss": 2.9526,
      "step": 189061
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.891126871109009,
      "learning_rate": 4.6413716234237756e-05,
      "loss": 2.8822,
      "step": 189062
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9337878227233887,
      "learning_rate": 4.641153062312156e-05,
      "loss": 2.9673,
      "step": 189063
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.095251083374023,
      "learning_rate": 4.6409345059152356e-05,
      "loss": 2.7843,
      "step": 189064
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6051037311553955,
      "learning_rate": 4.640715954233041e-05,
      "loss": 2.9286,
      "step": 189065
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.6555678844451904,
      "learning_rate": 4.640497407265618e-05,
      "loss": 2.9486,
      "step": 189066
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.266688823699951,
      "learning_rate": 4.6402788650129964e-05,
      "loss": 2.7634,
      "step": 189067
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3204705715179443,
      "learning_rate": 4.6400603274752334e-05,
      "loss": 2.84,
      "step": 189068
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.01640248298645,
      "learning_rate": 4.639841794652356e-05,
      "loss": 3.0422,
      "step": 189069
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0004093647003174,
      "learning_rate": 4.639623266544417e-05,
      "loss": 2.7886,
      "step": 189070
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0690672397613525,
      "learning_rate": 4.639404743151452e-05,
      "loss": 2.8638,
      "step": 189071
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0500752925872803,
      "learning_rate": 4.6391862244734966e-05,
      "loss": 2.8823,
      "step": 189072
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2982704639434814,
      "learning_rate": 4.6389677105105924e-05,
      "loss": 2.9981,
      "step": 189073
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8409016132354736,
      "learning_rate": 4.6387492012627866e-05,
      "loss": 3.0227,
      "step": 189074
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9441635608673096,
      "learning_rate": 4.638530696730113e-05,
      "loss": 3.0568,
      "step": 189075
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7309446334838867,
      "learning_rate": 4.638312196912617e-05,
      "loss": 2.8306,
      "step": 189076
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.805196762084961,
      "learning_rate": 4.638093701810343e-05,
      "loss": 3.0925,
      "step": 189077
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.371544361114502,
      "learning_rate": 4.637875211423323e-05,
      "loss": 2.9654,
      "step": 189078
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.569807767868042,
      "learning_rate": 4.6376567257515914e-05,
      "loss": 2.7523,
      "step": 189079
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.008862018585205,
      "learning_rate": 4.6374382447952085e-05,
      "loss": 3.109,
      "step": 189080
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.690295934677124,
      "learning_rate": 4.637219768554197e-05,
      "loss": 2.9244,
      "step": 189081
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2626242637634277,
      "learning_rate": 4.637001297028613e-05,
      "loss": 3.1486,
      "step": 189082
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.16386079788208,
      "learning_rate": 4.6367828302184876e-05,
      "loss": 3.0323,
      "step": 189083
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.688725233078003,
      "learning_rate": 4.636564368123866e-05,
      "loss": 3.0104,
      "step": 189084
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.897038221359253,
      "learning_rate": 4.636345910744777e-05,
      "loss": 3.1563,
      "step": 189085
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6984639167785645,
      "learning_rate": 4.636127458081274e-05,
      "loss": 2.9955,
      "step": 189086
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.4240787029266357,
      "learning_rate": 4.6359090101333906e-05,
      "loss": 2.5525,
      "step": 189087
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.641352891921997,
      "learning_rate": 4.6356905669011745e-05,
      "loss": 2.8799,
      "step": 189088
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.863416910171509,
      "learning_rate": 4.6354721283846666e-05,
      "loss": 3.1526,
      "step": 189089
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8082501888275146,
      "learning_rate": 4.6352536945838994e-05,
      "loss": 2.9609,
      "step": 189090
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6672399044036865,
      "learning_rate": 4.635035265498913e-05,
      "loss": 2.9386,
      "step": 189091
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.182131052017212,
      "learning_rate": 4.634816841129758e-05,
      "loss": 3.0649,
      "step": 189092
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.532674789428711,
      "learning_rate": 4.634598421476461e-05,
      "loss": 2.9629,
      "step": 189093
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.62068510055542,
      "learning_rate": 4.634380006539081e-05,
      "loss": 2.7632,
      "step": 189094
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7800378799438477,
      "learning_rate": 4.6341615963176456e-05,
      "loss": 3.1288,
      "step": 189095
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.318678617477417,
      "learning_rate": 4.6339431908121984e-05,
      "loss": 2.8792,
      "step": 189096
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0491878986358643,
      "learning_rate": 4.633724790022775e-05,
      "loss": 2.7208,
      "step": 189097
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.939671516418457,
      "learning_rate": 4.6335063939494286e-05,
      "loss": 2.9944,
      "step": 189098
    },
    {
      "epoch": 2.46,
      "grad_norm": 5.156559467315674,
      "learning_rate": 4.6332880025921834e-05,
      "loss": 2.9594,
      "step": 189099
    },
    {
      "epoch": 2.46,
      "grad_norm": 5.133502960205078,
      "learning_rate": 4.6330696159510964e-05,
      "loss": 2.9082,
      "step": 189100
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1322410106658936,
      "learning_rate": 4.6328512340261925e-05,
      "loss": 2.8633,
      "step": 189101
    },
    {
      "epoch": 2.46,
      "grad_norm": 5.372309684753418,
      "learning_rate": 4.632632856817536e-05,
      "loss": 2.72,
      "step": 189102
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0449819564819336,
      "learning_rate": 4.632414484325138e-05,
      "loss": 2.7746,
      "step": 189103
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.893702745437622,
      "learning_rate": 4.6321961165490595e-05,
      "loss": 2.9234,
      "step": 189104
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2776622772216797,
      "learning_rate": 4.6319777534893256e-05,
      "loss": 3.0896,
      "step": 189105
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.5529401302337646,
      "learning_rate": 4.631759395145995e-05,
      "loss": 2.9846,
      "step": 189106
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.061563730239868,
      "learning_rate": 4.631541041519092e-05,
      "loss": 2.8097,
      "step": 189107
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.122628688812256,
      "learning_rate": 4.631322692608676e-05,
      "loss": 2.9153,
      "step": 189108
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8996877670288086,
      "learning_rate": 4.631104348414765e-05,
      "loss": 3.0218,
      "step": 189109
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1227455139160156,
      "learning_rate": 4.630886008937417e-05,
      "loss": 2.9201,
      "step": 189110
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.476959228515625,
      "learning_rate": 4.630667674176659e-05,
      "loss": 3.039,
      "step": 189111
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.312962293624878,
      "learning_rate": 4.6304493441325454e-05,
      "loss": 3.0505,
      "step": 189112
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.8598227500915527,
      "learning_rate": 4.630231018805105e-05,
      "loss": 2.8404,
      "step": 189113
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.927513837814331,
      "learning_rate": 4.630012698194389e-05,
      "loss": 3.0161,
      "step": 189114
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.682215690612793,
      "learning_rate": 4.6297943823004324e-05,
      "loss": 2.7906,
      "step": 189115
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.567307472229004,
      "learning_rate": 4.6295760711232765e-05,
      "loss": 2.9152,
      "step": 189116
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.181922197341919,
      "learning_rate": 4.629357764662954e-05,
      "loss": 2.8782,
      "step": 189117
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.691464424133301,
      "learning_rate": 4.629139462919519e-05,
      "loss": 2.8463,
      "step": 189118
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6922450065612793,
      "learning_rate": 4.6289211658930004e-05,
      "loss": 2.8006,
      "step": 189119
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.52251672744751,
      "learning_rate": 4.6287028735834485e-05,
      "loss": 2.8709,
      "step": 189120
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.698645830154419,
      "learning_rate": 4.6284845859909004e-05,
      "loss": 3.0315,
      "step": 189121
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.788482904434204,
      "learning_rate": 4.628266303115392e-05,
      "loss": 2.9449,
      "step": 189122
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6878459453582764,
      "learning_rate": 4.628048024956974e-05,
      "loss": 2.8448,
      "step": 189123
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9377920627593994,
      "learning_rate": 4.627829751515679e-05,
      "loss": 2.9655,
      "step": 189124
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8061914443969727,
      "learning_rate": 4.6276114827915414e-05,
      "loss": 2.8004,
      "step": 189125
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5974667072296143,
      "learning_rate": 4.62739321878462e-05,
      "loss": 3.0388,
      "step": 189126
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9681007862091064,
      "learning_rate": 4.627174959494942e-05,
      "loss": 2.6998,
      "step": 189127
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.364381790161133,
      "learning_rate": 4.626956704922547e-05,
      "loss": 2.8549,
      "step": 189128
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.954054832458496,
      "learning_rate": 4.626738455067485e-05,
      "loss": 3.0139,
      "step": 189129
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2251110076904297,
      "learning_rate": 4.626520209929793e-05,
      "loss": 2.8785,
      "step": 189130
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6925034523010254,
      "learning_rate": 4.6263019695095e-05,
      "loss": 2.9372,
      "step": 189131
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.508288383483887,
      "learning_rate": 4.626083733806665e-05,
      "loss": 2.9721,
      "step": 189132
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.7751855850219727,
      "learning_rate": 4.625865502821322e-05,
      "loss": 2.9427,
      "step": 189133
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.069995164871216,
      "learning_rate": 4.625647276553499e-05,
      "loss": 2.8507,
      "step": 189134
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7347753047943115,
      "learning_rate": 4.6254290550032555e-05,
      "loss": 2.9137,
      "step": 189135
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.457242488861084,
      "learning_rate": 4.625210838170619e-05,
      "loss": 2.8091,
      "step": 189136
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7867677211761475,
      "learning_rate": 4.624992626055638e-05,
      "loss": 3.2319,
      "step": 189137
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.6693453788757324,
      "learning_rate": 4.6247744186583504e-05,
      "loss": 2.976,
      "step": 189138
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.7767457962036133,
      "learning_rate": 4.624556215978799e-05,
      "loss": 2.7387,
      "step": 189139
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8745782375335693,
      "learning_rate": 4.624338018017013e-05,
      "loss": 2.5976,
      "step": 189140
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8764803409576416,
      "learning_rate": 4.6241198247730474e-05,
      "loss": 2.8318,
      "step": 189141
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0093162059783936,
      "learning_rate": 4.6239016362469304e-05,
      "loss": 2.9709,
      "step": 189142
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.965569257736206,
      "learning_rate": 4.62368345243872e-05,
      "loss": 2.9962,
      "step": 189143
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3607821464538574,
      "learning_rate": 4.623465273348442e-05,
      "loss": 2.9938,
      "step": 189144
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.614002227783203,
      "learning_rate": 4.623247098976141e-05,
      "loss": 3.2527,
      "step": 189145
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.355388879776001,
      "learning_rate": 4.6230289293218514e-05,
      "loss": 2.8895,
      "step": 189146
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.505991220474243,
      "learning_rate": 4.622810764385625e-05,
      "loss": 2.7497,
      "step": 189147
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.894155740737915,
      "learning_rate": 4.622592604167491e-05,
      "loss": 2.8086,
      "step": 189148
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.745178461074829,
      "learning_rate": 4.622374448667503e-05,
      "loss": 2.7373,
      "step": 189149
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.943514585494995,
      "learning_rate": 4.622156297885698e-05,
      "loss": 2.7976,
      "step": 189150
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7169253826141357,
      "learning_rate": 4.6219381518221074e-05,
      "loss": 2.8252,
      "step": 189151
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.9862520694732666,
      "learning_rate": 4.621720010476774e-05,
      "loss": 2.827,
      "step": 189152
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6748099327087402,
      "learning_rate": 4.6215018738497486e-05,
      "loss": 2.7158,
      "step": 189153
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.689758539199829,
      "learning_rate": 4.6212837419410566e-05,
      "loss": 3.1707,
      "step": 189154
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.055432081222534,
      "learning_rate": 4.621065614750756e-05,
      "loss": 2.9708,
      "step": 189155
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.693497657775879,
      "learning_rate": 4.620847492278879e-05,
      "loss": 3.1666,
      "step": 189156
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1328389644622803,
      "learning_rate": 4.620629374525463e-05,
      "loss": 3.2222,
      "step": 189157
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7893502712249756,
      "learning_rate": 4.6204112614905465e-05,
      "loss": 3.0026,
      "step": 189158
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.8693113327026367,
      "learning_rate": 4.6201931531741786e-05,
      "loss": 2.7262,
      "step": 189159
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.031882286071777,
      "learning_rate": 4.61997504957639e-05,
      "loss": 2.7792,
      "step": 189160
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6491451263427734,
      "learning_rate": 4.619756950697237e-05,
      "loss": 2.7527,
      "step": 189161
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3155479431152344,
      "learning_rate": 4.619538856536745e-05,
      "loss": 2.9441,
      "step": 189162
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0749104022979736,
      "learning_rate": 4.619320767094963e-05,
      "loss": 2.9405,
      "step": 189163
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.621220827102661,
      "learning_rate": 4.619102682371922e-05,
      "loss": 2.8093,
      "step": 189164
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.579559326171875,
      "learning_rate": 4.618884602367674e-05,
      "loss": 2.9537,
      "step": 189165
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0759758949279785,
      "learning_rate": 4.618666527082251e-05,
      "loss": 2.7338,
      "step": 189166
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0446760654449463,
      "learning_rate": 4.6184484565157e-05,
      "loss": 2.9595,
      "step": 189167
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.638841152191162,
      "learning_rate": 4.618230390668054e-05,
      "loss": 3.025,
      "step": 189168
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7697911262512207,
      "learning_rate": 4.61801232953937e-05,
      "loss": 2.9641,
      "step": 189169
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0040361881256104,
      "learning_rate": 4.617794273129665e-05,
      "loss": 2.9693,
      "step": 189170
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4074456691741943,
      "learning_rate": 4.617576221438998e-05,
      "loss": 2.8502,
      "step": 189171
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.679206132888794,
      "learning_rate": 4.6173581744673924e-05,
      "loss": 2.9423,
      "step": 189172
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6589441299438477,
      "learning_rate": 4.617140132214909e-05,
      "loss": 2.7536,
      "step": 189173
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.95825457572937,
      "learning_rate": 4.616922094681571e-05,
      "loss": 2.86,
      "step": 189174
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.946030616760254,
      "learning_rate": 4.616704061867438e-05,
      "loss": 2.9758,
      "step": 189175
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.1498403549194336,
      "learning_rate": 4.61648603377253e-05,
      "loss": 2.9579,
      "step": 189176
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8399462699890137,
      "learning_rate": 4.6162680103968994e-05,
      "loss": 2.9321,
      "step": 189177
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.696624755859375,
      "learning_rate": 4.616049991740578e-05,
      "loss": 3.0773,
      "step": 189178
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.525545358657837,
      "learning_rate": 4.615831977803618e-05,
      "loss": 2.9976,
      "step": 189179
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.651352882385254,
      "learning_rate": 4.6156139685860463e-05,
      "loss": 3.224,
      "step": 189180
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.124227285385132,
      "learning_rate": 4.6153959640879257e-05,
      "loss": 2.9597,
      "step": 189181
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.4399263858795166,
      "learning_rate": 4.61517796430927e-05,
      "loss": 2.861,
      "step": 189182
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0284628868103027,
      "learning_rate": 4.6149599692501395e-05,
      "loss": 2.9859,
      "step": 189183
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.3999392986297607,
      "learning_rate": 4.614741978910557e-05,
      "loss": 2.8051,
      "step": 189184
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.279463768005371,
      "learning_rate": 4.614523993290582e-05,
      "loss": 2.9856,
      "step": 189185
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.846122980117798,
      "learning_rate": 4.6143060123902386e-05,
      "loss": 2.8011,
      "step": 189186
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5697453022003174,
      "learning_rate": 4.61408803620959e-05,
      "loss": 3.1003,
      "step": 189187
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.9558517932891846,
      "learning_rate": 4.613870064748646e-05,
      "loss": 2.754,
      "step": 189188
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.288188934326172,
      "learning_rate": 4.61365209800747e-05,
      "loss": 2.9417,
      "step": 189189
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.22055983543396,
      "learning_rate": 4.6134341359860886e-05,
      "loss": 2.8831,
      "step": 189190
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.691596031188965,
      "learning_rate": 4.6132161786845576e-05,
      "loss": 3.0833,
      "step": 189191
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.119702100753784,
      "learning_rate": 4.612998226102899e-05,
      "loss": 3.0491,
      "step": 189192
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.39011287689209,
      "learning_rate": 4.61278027824118e-05,
      "loss": 3.1008,
      "step": 189193
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.937129020690918,
      "learning_rate": 4.612562335099407e-05,
      "loss": 2.9711,
      "step": 189194
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.739039421081543,
      "learning_rate": 4.6123443966776474e-05,
      "loss": 2.9814,
      "step": 189195
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.5171351432800293,
      "learning_rate": 4.6121264629759255e-05,
      "loss": 3.0299,
      "step": 189196
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.9473347663879395,
      "learning_rate": 4.6119085339942954e-05,
      "loss": 3.2692,
      "step": 189197
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9084997177124023,
      "learning_rate": 4.611690609732782e-05,
      "loss": 2.7819,
      "step": 189198
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.667609214782715,
      "learning_rate": 4.611472690191443e-05,
      "loss": 2.9037,
      "step": 189199
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0474040508270264,
      "learning_rate": 4.611254775370309e-05,
      "loss": 2.8182,
      "step": 189200
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.296692132949829,
      "learning_rate": 4.611036865269425e-05,
      "loss": 2.859,
      "step": 189201
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7306864261627197,
      "learning_rate": 4.610818959888819e-05,
      "loss": 3.0215,
      "step": 189202
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0662591457366943,
      "learning_rate": 4.61060105922855e-05,
      "loss": 2.7004,
      "step": 189203
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6869122982025146,
      "learning_rate": 4.610383163288639e-05,
      "loss": 3.0328,
      "step": 189204
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.262953281402588,
      "learning_rate": 4.6101652720691486e-05,
      "loss": 2.8108,
      "step": 189205
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.075793504714966,
      "learning_rate": 4.609947385570105e-05,
      "loss": 2.8776,
      "step": 189206
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8363404273986816,
      "learning_rate": 4.6097295037915495e-05,
      "loss": 2.9813,
      "step": 189207
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6864376068115234,
      "learning_rate": 4.609511626733521e-05,
      "loss": 2.9579,
      "step": 189208
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.837844133377075,
      "learning_rate": 4.60929375439607e-05,
      "loss": 2.9265,
      "step": 189209
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.894897699356079,
      "learning_rate": 4.609075886779222e-05,
      "loss": 3.099,
      "step": 189210
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.753976345062256,
      "learning_rate": 4.6088580238830355e-05,
      "loss": 3.1602,
      "step": 189211
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.538133144378662,
      "learning_rate": 4.608640165707539e-05,
      "loss": 2.7793,
      "step": 189212
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.214862585067749,
      "learning_rate": 4.60842231225277e-05,
      "loss": 2.7628,
      "step": 189213
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2751271724700928,
      "learning_rate": 4.608204463518781e-05,
      "loss": 2.7454,
      "step": 189214
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.817124366760254,
      "learning_rate": 4.607986619505606e-05,
      "loss": 2.9464,
      "step": 189215
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.137080192565918,
      "learning_rate": 4.6077687802132814e-05,
      "loss": 2.8888,
      "step": 189216
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.734182119369507,
      "learning_rate": 4.607550945641857e-05,
      "loss": 2.7521,
      "step": 189217
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8622541427612305,
      "learning_rate": 4.607333115791366e-05,
      "loss": 3.1121,
      "step": 189218
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.84251070022583,
      "learning_rate": 4.607115290661846e-05,
      "loss": 3.0558,
      "step": 189219
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2951791286468506,
      "learning_rate": 4.606897470253349e-05,
      "loss": 2.949,
      "step": 189220
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9462997913360596,
      "learning_rate": 4.6066796545659024e-05,
      "loss": 2.8519,
      "step": 189221
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.141106128692627,
      "learning_rate": 4.606461843599563e-05,
      "loss": 3.092,
      "step": 189222
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.716217041015625,
      "learning_rate": 4.606244037354356e-05,
      "loss": 2.7871,
      "step": 189223
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.559174060821533,
      "learning_rate": 4.606026235830333e-05,
      "loss": 2.8818,
      "step": 189224
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.243581533432007,
      "learning_rate": 4.605808439027521e-05,
      "loss": 2.9563,
      "step": 189225
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8937325477600098,
      "learning_rate": 4.6055906469459714e-05,
      "loss": 2.7942,
      "step": 189226
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1760199069976807,
      "learning_rate": 4.6053728595857184e-05,
      "loss": 2.8983,
      "step": 189227
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6930556297302246,
      "learning_rate": 4.605155076946813e-05,
      "loss": 3.3281,
      "step": 189228
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3958146572113037,
      "learning_rate": 4.60493729902929e-05,
      "loss": 3.1366,
      "step": 189229
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.939816951751709,
      "learning_rate": 4.604719525833184e-05,
      "loss": 3.1113,
      "step": 189230
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.823861598968506,
      "learning_rate": 4.604501757358537e-05,
      "loss": 2.8598,
      "step": 189231
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.8966064453125,
      "learning_rate": 4.6042839936053975e-05,
      "loss": 2.901,
      "step": 189232
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.607342481613159,
      "learning_rate": 4.604066234573794e-05,
      "loss": 2.8698,
      "step": 189233
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9534175395965576,
      "learning_rate": 4.603848480263784e-05,
      "loss": 2.767,
      "step": 189234
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3951687812805176,
      "learning_rate": 4.6036307306753874e-05,
      "loss": 2.8378,
      "step": 189235
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.474055767059326,
      "learning_rate": 4.60341298580867e-05,
      "loss": 2.9502,
      "step": 189236
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.248551845550537,
      "learning_rate": 4.6031952456636456e-05,
      "loss": 2.8057,
      "step": 189237
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.729915142059326,
      "learning_rate": 4.602977510240371e-05,
      "loss": 3.1977,
      "step": 189238
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.22293758392334,
      "learning_rate": 4.602759779538876e-05,
      "loss": 2.685,
      "step": 189239
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.140956878662109,
      "learning_rate": 4.602542053559214e-05,
      "loss": 3.0462,
      "step": 189240
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1054389476776123,
      "learning_rate": 4.602324332301411e-05,
      "loss": 2.9568,
      "step": 189241
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6169707775115967,
      "learning_rate": 4.6021066157655314e-05,
      "loss": 2.9206,
      "step": 189242
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.484300374984741,
      "learning_rate": 4.6018889039515846e-05,
      "loss": 3.1772,
      "step": 189243
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.345996379852295,
      "learning_rate": 4.6016711968596344e-05,
      "loss": 2.8862,
      "step": 189244
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.556159257888794,
      "learning_rate": 4.6014534944897065e-05,
      "loss": 3.1042,
      "step": 189245
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8531763553619385,
      "learning_rate": 4.601235796841851e-05,
      "loss": 2.9656,
      "step": 189246
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.670341491699219,
      "learning_rate": 4.601018103916102e-05,
      "loss": 3.0021,
      "step": 189247
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.7302839756011963,
      "learning_rate": 4.6008004157125155e-05,
      "loss": 2.7365,
      "step": 189248
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.4558918476104736,
      "learning_rate": 4.600582732231105e-05,
      "loss": 2.6875,
      "step": 189249
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8019859790802,
      "learning_rate": 4.600365053471934e-05,
      "loss": 3.0463,
      "step": 189250
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.431766986846924,
      "learning_rate": 4.6001473794350286e-05,
      "loss": 3.0644,
      "step": 189251
    },
    {
      "epoch": 2.46,
      "grad_norm": 5.171634197235107,
      "learning_rate": 4.599929710120439e-05,
      "loss": 3.0535,
      "step": 189252
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.377582311630249,
      "learning_rate": 4.599712045528195e-05,
      "loss": 2.8627,
      "step": 189253
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.833441972732544,
      "learning_rate": 4.599494385658361e-05,
      "loss": 3.1181,
      "step": 189254
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.211700677871704,
      "learning_rate": 4.5992767305109454e-05,
      "loss": 3.0221,
      "step": 189255
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.798375129699707,
      "learning_rate": 4.5990590800860096e-05,
      "loss": 2.9078,
      "step": 189256
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.513256072998047,
      "learning_rate": 4.598841434383582e-05,
      "loss": 3.0507,
      "step": 189257
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.866067409515381,
      "learning_rate": 4.5986237934037175e-05,
      "loss": 3.0201,
      "step": 189258
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.540038824081421,
      "learning_rate": 4.598406157146442e-05,
      "loss": 3.1338,
      "step": 189259
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.484752655029297,
      "learning_rate": 4.5981885256118145e-05,
      "loss": 2.766,
      "step": 189260
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4094088077545166,
      "learning_rate": 4.5979708987998466e-05,
      "loss": 2.8413,
      "step": 189261
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.777334690093994,
      "learning_rate": 4.597753276710607e-05,
      "loss": 2.9669,
      "step": 189262
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.6710290908813477,
      "learning_rate": 4.5975356593441135e-05,
      "loss": 2.9921,
      "step": 189263
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9923906326293945,
      "learning_rate": 4.597318046700429e-05,
      "loss": 2.8558,
      "step": 189264
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.068037509918213,
      "learning_rate": 4.597100438779573e-05,
      "loss": 2.9003,
      "step": 189265
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8049864768981934,
      "learning_rate": 4.596882835581609e-05,
      "loss": 3.0803,
      "step": 189266
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9268362522125244,
      "learning_rate": 4.596665237106551e-05,
      "loss": 2.6786,
      "step": 189267
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4873762130737305,
      "learning_rate": 4.5964476433544576e-05,
      "loss": 3.1164,
      "step": 189268
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.162057399749756,
      "learning_rate": 4.596230054325359e-05,
      "loss": 3.0871,
      "step": 189269
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.523416519165039,
      "learning_rate": 4.5960124700193104e-05,
      "loss": 2.9468,
      "step": 189270
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.4808292388916016,
      "learning_rate": 4.5957948904363295e-05,
      "loss": 2.7805,
      "step": 189271
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.900057792663574,
      "learning_rate": 4.595577315576488e-05,
      "loss": 2.887,
      "step": 189272
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.626239776611328,
      "learning_rate": 4.595359745439794e-05,
      "loss": 2.9836,
      "step": 189273
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.9671411514282227,
      "learning_rate": 4.595142180026309e-05,
      "loss": 2.8417,
      "step": 189274
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.522693634033203,
      "learning_rate": 4.594924619336059e-05,
      "loss": 3.0818,
      "step": 189275
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1074461936950684,
      "learning_rate": 4.594707063369097e-05,
      "loss": 2.8309,
      "step": 189276
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.8854894638061523,
      "learning_rate": 4.594489512125454e-05,
      "loss": 2.9831,
      "step": 189277
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.779012441635132,
      "learning_rate": 4.5942719656051895e-05,
      "loss": 3.0005,
      "step": 189278
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.9890215396881104,
      "learning_rate": 4.5940544238083166e-05,
      "loss": 2.7774,
      "step": 189279
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.809457778930664,
      "learning_rate": 4.593836886734892e-05,
      "loss": 2.7441,
      "step": 189280
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6962339878082275,
      "learning_rate": 4.593619354384949e-05,
      "loss": 2.8285,
      "step": 189281
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6885290145874023,
      "learning_rate": 4.593401826758537e-05,
      "loss": 2.8791,
      "step": 189282
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.429293394088745,
      "learning_rate": 4.5931843038556835e-05,
      "loss": 2.7327,
      "step": 189283
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0102427005767822,
      "learning_rate": 4.592966785676445e-05,
      "loss": 3.0507,
      "step": 189284
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0173099040985107,
      "learning_rate": 4.5927492722208516e-05,
      "loss": 2.8211,
      "step": 189285
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6756534576416016,
      "learning_rate": 4.592531763488946e-05,
      "loss": 2.8637,
      "step": 189286
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2747561931610107,
      "learning_rate": 4.5923142594807646e-05,
      "loss": 2.8796,
      "step": 189287
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.2766103744506836,
      "learning_rate": 4.5920967601963553e-05,
      "loss": 2.5529,
      "step": 189288
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9420814514160156,
      "learning_rate": 4.591879265635747e-05,
      "loss": 2.9219,
      "step": 189289
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1700127124786377,
      "learning_rate": 4.591661775798997e-05,
      "loss": 3.0558,
      "step": 189290
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.3716835975646973,
      "learning_rate": 4.591444290686138e-05,
      "loss": 2.8743,
      "step": 189291
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.3933162689208984,
      "learning_rate": 4.591226810297207e-05,
      "loss": 2.8137,
      "step": 189292
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1781744956970215,
      "learning_rate": 4.5910093346322404e-05,
      "loss": 2.9164,
      "step": 189293
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.1051716804504395,
      "learning_rate": 4.5907918636912924e-05,
      "loss": 2.8554,
      "step": 189294
    },
    {
      "epoch": 2.46,
      "grad_norm": 4.568256378173828,
      "learning_rate": 4.590574397474388e-05,
      "loss": 2.8656,
      "step": 189295
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0102739334106445,
      "learning_rate": 4.5903569359815815e-05,
      "loss": 2.7721,
      "step": 189296
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.426487922668457,
      "learning_rate": 4.5901394792129096e-05,
      "loss": 2.9902,
      "step": 189297
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6365227699279785,
      "learning_rate": 4.5899220271684026e-05,
      "loss": 2.9429,
      "step": 189298
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.9947116374969482,
      "learning_rate": 4.5897045798481155e-05,
      "loss": 2.8158,
      "step": 189299
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.120910167694092,
      "learning_rate": 4.58948713725208e-05,
      "loss": 3.0805,
      "step": 189300
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.501993417739868,
      "learning_rate": 4.5892696993803324e-05,
      "loss": 2.9676,
      "step": 189301
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1616106033325195,
      "learning_rate": 4.5890522662329286e-05,
      "loss": 3.0861,
      "step": 189302
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.9522414207458496,
      "learning_rate": 4.588834837809899e-05,
      "loss": 2.996,
      "step": 189303
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.6212799549102783,
      "learning_rate": 4.588617414111277e-05,
      "loss": 2.8449,
      "step": 189304
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.650378465652466,
      "learning_rate": 4.5883999951371164e-05,
      "loss": 2.8597,
      "step": 189305
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.229511022567749,
      "learning_rate": 4.5881825808874526e-05,
      "loss": 2.9445,
      "step": 189306
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.886533260345459,
      "learning_rate": 4.58796517136232e-05,
      "loss": 3.0304,
      "step": 189307
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.7628214359283447,
      "learning_rate": 4.587747766561768e-05,
      "loss": 2.8141,
      "step": 189308
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.1431732177734375,
      "learning_rate": 4.5875303664858365e-05,
      "loss": 2.9506,
      "step": 189309
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.299602746963501,
      "learning_rate": 4.587312971134556e-05,
      "loss": 2.812,
      "step": 189310
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.921808958053589,
      "learning_rate": 4.587095580507979e-05,
      "loss": 2.9464,
      "step": 189311
    },
    {
      "epoch": 2.46,
      "grad_norm": 3.0240070819854736,
      "learning_rate": 4.586878194606133e-05,
      "loss": 3.0129,
      "step": 189312
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.979552984237671,
      "learning_rate": 4.586660813429074e-05,
      "loss": 2.8547,
      "step": 189313
    },
    {
      "epoch": 2.46,
      "grad_norm": 2.7864990234375,
      "learning_rate": 4.586443436976836e-05,
      "loss": 3.0458,
      "step": 189314
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.169553518295288,
      "learning_rate": 4.586226065249458e-05,
      "loss": 2.8263,
      "step": 189315
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.281153678894043,
      "learning_rate": 4.58600869824697e-05,
      "loss": 2.8588,
      "step": 189316
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.663769483566284,
      "learning_rate": 4.585791335969433e-05,
      "loss": 3.1734,
      "step": 189317
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.024372577667236,
      "learning_rate": 4.5855739784168664e-05,
      "loss": 2.956,
      "step": 189318
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7741611003875732,
      "learning_rate": 4.5853566255893306e-05,
      "loss": 2.9988,
      "step": 189319
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.89486026763916,
      "learning_rate": 4.585139277486851e-05,
      "loss": 3.1198,
      "step": 189320
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6532015800476074,
      "learning_rate": 4.5849219341094855e-05,
      "loss": 2.9827,
      "step": 189321
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.661855459213257,
      "learning_rate": 4.58470459545725e-05,
      "loss": 3.0033,
      "step": 189322
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1790659427642822,
      "learning_rate": 4.5844872615302055e-05,
      "loss": 2.7457,
      "step": 189323
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.377220630645752,
      "learning_rate": 4.584269932328377e-05,
      "loss": 3.0912,
      "step": 189324
    },
    {
      "epoch": 2.47,
      "grad_norm": 5.6738786697387695,
      "learning_rate": 4.584052607851819e-05,
      "loss": 2.8912,
      "step": 189325
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.266083240509033,
      "learning_rate": 4.583835288100558e-05,
      "loss": 2.836,
      "step": 189326
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9769718647003174,
      "learning_rate": 4.583617973074657e-05,
      "loss": 3.051,
      "step": 189327
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7084033489227295,
      "learning_rate": 4.583400662774126e-05,
      "loss": 3.066,
      "step": 189328
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.952228307723999,
      "learning_rate": 4.5831833571990284e-05,
      "loss": 3.0644,
      "step": 189329
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8001420497894287,
      "learning_rate": 4.5829660563493876e-05,
      "loss": 2.8716,
      "step": 189330
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.445720672607422,
      "learning_rate": 4.5827487602252635e-05,
      "loss": 2.9368,
      "step": 189331
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.043638229370117,
      "learning_rate": 4.582531468826675e-05,
      "loss": 2.8142,
      "step": 189332
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.688343048095703,
      "learning_rate": 4.5823141821536914e-05,
      "loss": 2.9866,
      "step": 189333
    },
    {
      "epoch": 2.47,
      "grad_norm": 5.37625789642334,
      "learning_rate": 4.58209690020632e-05,
      "loss": 2.7003,
      "step": 189334
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9260313510894775,
      "learning_rate": 4.581879622984626e-05,
      "loss": 2.8817,
      "step": 189335
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.47088885307312,
      "learning_rate": 4.581662350488631e-05,
      "loss": 2.9445,
      "step": 189336
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.880467653274536,
      "learning_rate": 4.5814450827183926e-05,
      "loss": 3.1184,
      "step": 189337
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.6453685760498047,
      "learning_rate": 4.581227819673934e-05,
      "loss": 2.9747,
      "step": 189338
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.6581873893737793,
      "learning_rate": 4.581010561355322e-05,
      "loss": 3.0137,
      "step": 189339
    },
    {
      "epoch": 2.47,
      "grad_norm": 5.932385444641113,
      "learning_rate": 4.580793307762566e-05,
      "loss": 2.8038,
      "step": 189340
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.299208641052246,
      "learning_rate": 4.580576058895723e-05,
      "loss": 2.8265,
      "step": 189341
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3523635864257812,
      "learning_rate": 4.58035881475483e-05,
      "loss": 2.7257,
      "step": 189342
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.812241554260254,
      "learning_rate": 4.5801415753399295e-05,
      "loss": 2.9995,
      "step": 189343
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5503416061401367,
      "learning_rate": 4.579924340651056e-05,
      "loss": 2.9888,
      "step": 189344
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4068474769592285,
      "learning_rate": 4.579707110688268e-05,
      "loss": 2.6112,
      "step": 189345
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.991830587387085,
      "learning_rate": 4.579489885451579e-05,
      "loss": 2.9178,
      "step": 189346
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.5532305240631104,
      "learning_rate": 4.57927266494105e-05,
      "loss": 2.9007,
      "step": 189347
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.565208911895752,
      "learning_rate": 4.5790554491567076e-05,
      "loss": 3.057,
      "step": 189348
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7065200805664062,
      "learning_rate": 4.578838238098604e-05,
      "loss": 3.1238,
      "step": 189349
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6704297065734863,
      "learning_rate": 4.578621031766767e-05,
      "loss": 2.8801,
      "step": 189350
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.727954149246216,
      "learning_rate": 4.5784038301612594e-05,
      "loss": 2.828,
      "step": 189351
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2995102405548096,
      "learning_rate": 4.5781866332820905e-05,
      "loss": 2.7799,
      "step": 189352
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.9086930751800537,
      "learning_rate": 4.5779694411293275e-05,
      "loss": 3.1549,
      "step": 189353
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.7192165851593018,
      "learning_rate": 4.577752253702987e-05,
      "loss": 3.0172,
      "step": 189354
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.941152095794678,
      "learning_rate": 4.577535071003133e-05,
      "loss": 2.7647,
      "step": 189355
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5105721950531006,
      "learning_rate": 4.577317893029788e-05,
      "loss": 2.7487,
      "step": 189356
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4129252433776855,
      "learning_rate": 4.5771007197830154e-05,
      "loss": 2.8387,
      "step": 189357
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.905745267868042,
      "learning_rate": 4.576883551262822e-05,
      "loss": 2.8158,
      "step": 189358
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.5220699310302734,
      "learning_rate": 4.576666387469274e-05,
      "loss": 2.8619,
      "step": 189359
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8094794750213623,
      "learning_rate": 4.576449228402399e-05,
      "loss": 2.9839,
      "step": 189360
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.8524627685546875,
      "learning_rate": 4.576232074062246e-05,
      "loss": 2.9282,
      "step": 189361
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.012965679168701,
      "learning_rate": 4.5760149244488454e-05,
      "loss": 2.9261,
      "step": 189362
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9599030017852783,
      "learning_rate": 4.575797779562257e-05,
      "loss": 2.8661,
      "step": 189363
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.700327157974243,
      "learning_rate": 4.575580639402494e-05,
      "loss": 3.0092,
      "step": 189364
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.487424850463867,
      "learning_rate": 4.575363503969617e-05,
      "loss": 2.9713,
      "step": 189365
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.23390793800354,
      "learning_rate": 4.575146373263655e-05,
      "loss": 2.7248,
      "step": 189366
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.7112298011779785,
      "learning_rate": 4.5749292472846586e-05,
      "loss": 2.8427,
      "step": 189367
    },
    {
      "epoch": 2.47,
      "grad_norm": 5.8411688804626465,
      "learning_rate": 4.574712126032655e-05,
      "loss": 2.7564,
      "step": 189368
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.735114097595215,
      "learning_rate": 4.574495009507703e-05,
      "loss": 3.1016,
      "step": 189369
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.020408868789673,
      "learning_rate": 4.574277897709829e-05,
      "loss": 3.0054,
      "step": 189370
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7694857120513916,
      "learning_rate": 4.5740607906390746e-05,
      "loss": 2.9289,
      "step": 189371
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.234785795211792,
      "learning_rate": 4.573843688295479e-05,
      "loss": 2.9387,
      "step": 189372
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8516738414764404,
      "learning_rate": 4.5736265906790914e-05,
      "loss": 2.7597,
      "step": 189373
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4882864952087402,
      "learning_rate": 4.573409497789939e-05,
      "loss": 2.9754,
      "step": 189374
    },
    {
      "epoch": 2.47,
      "grad_norm": 6.855543613433838,
      "learning_rate": 4.573192409628079e-05,
      "loss": 3.0628,
      "step": 189375
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.075737953186035,
      "learning_rate": 4.572975326193541e-05,
      "loss": 3.164,
      "step": 189376
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.884736061096191,
      "learning_rate": 4.5727582474863675e-05,
      "loss": 3.152,
      "step": 189377
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0002732276916504,
      "learning_rate": 4.5725411735065896e-05,
      "loss": 2.9987,
      "step": 189378
    },
    {
      "epoch": 2.47,
      "grad_norm": 5.102948188781738,
      "learning_rate": 4.572324104254267e-05,
      "loss": 2.8041,
      "step": 189379
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.638718366622925,
      "learning_rate": 4.572107039729419e-05,
      "loss": 3.1142,
      "step": 189380
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.976318359375,
      "learning_rate": 4.571889979932103e-05,
      "loss": 2.7079,
      "step": 189381
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.088846206665039,
      "learning_rate": 4.571672924862355e-05,
      "loss": 2.8298,
      "step": 189382
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.389157772064209,
      "learning_rate": 4.5714558745202026e-05,
      "loss": 3.0022,
      "step": 189383
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.269840717315674,
      "learning_rate": 4.571238828905708e-05,
      "loss": 2.971,
      "step": 189384
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.8763487339019775,
      "learning_rate": 4.571021788018898e-05,
      "loss": 3.0965,
      "step": 189385
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6083297729492188,
      "learning_rate": 4.570804751859807e-05,
      "loss": 3.0007,
      "step": 189386
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7280337810516357,
      "learning_rate": 4.57058772042849e-05,
      "loss": 2.775,
      "step": 189387
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.510155200958252,
      "learning_rate": 4.570370693724982e-05,
      "loss": 2.8659,
      "step": 189388
    },
    {
      "epoch": 2.47,
      "grad_norm": 5.514301776885986,
      "learning_rate": 4.570153671749315e-05,
      "loss": 2.9904,
      "step": 189389
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.1905646324157715,
      "learning_rate": 4.569936654501543e-05,
      "loss": 2.6385,
      "step": 189390
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9589321613311768,
      "learning_rate": 4.5697196419817016e-05,
      "loss": 2.9994,
      "step": 189391
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.683681011199951,
      "learning_rate": 4.569502634189822e-05,
      "loss": 2.6597,
      "step": 189392
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.542210817337036,
      "learning_rate": 4.5692856311259574e-05,
      "loss": 2.9333,
      "step": 189393
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.655487060546875,
      "learning_rate": 4.569068632790144e-05,
      "loss": 2.7236,
      "step": 189394
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9638495445251465,
      "learning_rate": 4.568851639182412e-05,
      "loss": 2.9622,
      "step": 189395
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1702072620391846,
      "learning_rate": 4.568634650302817e-05,
      "loss": 2.9315,
      "step": 189396
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.227430582046509,
      "learning_rate": 4.568417666151391e-05,
      "loss": 2.5182,
      "step": 189397
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5545761585235596,
      "learning_rate": 4.568200686728178e-05,
      "loss": 3.025,
      "step": 189398
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.090843677520752,
      "learning_rate": 4.5679837120332174e-05,
      "loss": 2.9022,
      "step": 189399
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1775972843170166,
      "learning_rate": 4.567766742066551e-05,
      "loss": 2.7321,
      "step": 189400
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9787449836730957,
      "learning_rate": 4.567549776828209e-05,
      "loss": 2.8862,
      "step": 189401
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.323345184326172,
      "learning_rate": 4.567332816318244e-05,
      "loss": 3.0123,
      "step": 189402
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4369449615478516,
      "learning_rate": 4.5671158605366874e-05,
      "loss": 2.83,
      "step": 189403
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4545490741729736,
      "learning_rate": 4.5668989094835885e-05,
      "loss": 3.0257,
      "step": 189404
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5370113849639893,
      "learning_rate": 4.56668196315898e-05,
      "loss": 3.0635,
      "step": 189405
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.349684238433838,
      "learning_rate": 4.5664650215629164e-05,
      "loss": 2.9458,
      "step": 189406
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8004677295684814,
      "learning_rate": 4.5662480846954164e-05,
      "loss": 3.1393,
      "step": 189407
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0928640365600586,
      "learning_rate": 4.566031152556534e-05,
      "loss": 2.8368,
      "step": 189408
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.482813835144043,
      "learning_rate": 4.5658142251463026e-05,
      "loss": 3.0822,
      "step": 189409
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0311858654022217,
      "learning_rate": 4.565597302464772e-05,
      "loss": 3.2619,
      "step": 189410
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7157018184661865,
      "learning_rate": 4.5653803845119684e-05,
      "loss": 2.9262,
      "step": 189411
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4990696907043457,
      "learning_rate": 4.565163471287956e-05,
      "loss": 2.8366,
      "step": 189412
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3301076889038086,
      "learning_rate": 4.5649465627927474e-05,
      "loss": 3.0208,
      "step": 189413
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.283182144165039,
      "learning_rate": 4.5647296590264025e-05,
      "loss": 2.889,
      "step": 189414
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.940985679626465,
      "learning_rate": 4.564512759988945e-05,
      "loss": 2.9797,
      "step": 189415
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8468613624572754,
      "learning_rate": 4.564295865680432e-05,
      "loss": 2.8283,
      "step": 189416
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.275399684906006,
      "learning_rate": 4.564078976100892e-05,
      "loss": 2.8342,
      "step": 189417
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.679499626159668,
      "learning_rate": 4.563862091250383e-05,
      "loss": 3.1906,
      "step": 189418
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5694077014923096,
      "learning_rate": 4.563645211128914e-05,
      "loss": 2.6599,
      "step": 189419
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.127910852432251,
      "learning_rate": 4.563428335736556e-05,
      "loss": 3.0511,
      "step": 189420
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.680283546447754,
      "learning_rate": 4.563211465073329e-05,
      "loss": 2.9144,
      "step": 189421
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1350257396698,
      "learning_rate": 4.5629945991392845e-05,
      "loss": 2.9604,
      "step": 189422
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.312441349029541,
      "learning_rate": 4.5627777379344547e-05,
      "loss": 2.9844,
      "step": 189423
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9854183197021484,
      "learning_rate": 4.562560881458901e-05,
      "loss": 2.9723,
      "step": 189424
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9084055423736572,
      "learning_rate": 4.5623440297126314e-05,
      "loss": 2.8778,
      "step": 189425
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.3928370475769043,
      "learning_rate": 4.5621271826957084e-05,
      "loss": 2.9214,
      "step": 189426
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4464237689971924,
      "learning_rate": 4.5619103404081594e-05,
      "loss": 2.8733,
      "step": 189427
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.138413667678833,
      "learning_rate": 4.56169350285004e-05,
      "loss": 2.8708,
      "step": 189428
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.3515501022338867,
      "learning_rate": 4.561476670021375e-05,
      "loss": 2.8771,
      "step": 189429
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8798704147338867,
      "learning_rate": 4.5612598419222225e-05,
      "loss": 2.7224,
      "step": 189430
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.117321252822876,
      "learning_rate": 4.5610430185526e-05,
      "loss": 2.93,
      "step": 189431
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9237804412841797,
      "learning_rate": 4.5608261999125684e-05,
      "loss": 2.7617,
      "step": 189432
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5639989376068115,
      "learning_rate": 4.56060938600215e-05,
      "loss": 2.7595,
      "step": 189433
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8484020233154297,
      "learning_rate": 4.560392576821401e-05,
      "loss": 2.8617,
      "step": 189434
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.544550657272339,
      "learning_rate": 4.560175772370349e-05,
      "loss": 2.9244,
      "step": 189435
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.315401315689087,
      "learning_rate": 4.55995897264905e-05,
      "loss": 2.7558,
      "step": 189436
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.442567348480225,
      "learning_rate": 4.559742177657531e-05,
      "loss": 2.9606,
      "step": 189437
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.75947904586792,
      "learning_rate": 4.5595253873958395e-05,
      "loss": 3.2332,
      "step": 189438
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0341601371765137,
      "learning_rate": 4.5593086018640035e-05,
      "loss": 3.1027,
      "step": 189439
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.718445301055908,
      "learning_rate": 4.559091821062081e-05,
      "loss": 2.9099,
      "step": 189440
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0468811988830566,
      "learning_rate": 4.558875044990094e-05,
      "loss": 3.0386,
      "step": 189441
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.818540334701538,
      "learning_rate": 4.558658273648101e-05,
      "loss": 2.9504,
      "step": 189442
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2758023738861084,
      "learning_rate": 4.5584415070361344e-05,
      "loss": 2.8366,
      "step": 189443
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.854750156402588,
      "learning_rate": 4.558224745154231e-05,
      "loss": 2.7468,
      "step": 189444
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9286420345306396,
      "learning_rate": 4.55800798800243e-05,
      "loss": 2.9406,
      "step": 189445
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2429463863372803,
      "learning_rate": 4.557791235580782e-05,
      "loss": 3.1579,
      "step": 189446
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.742107629776001,
      "learning_rate": 4.557574487889311e-05,
      "loss": 3.1195,
      "step": 189447
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.014216423034668,
      "learning_rate": 4.557357744928079e-05,
      "loss": 3.0493,
      "step": 189448
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.6336052417755127,
      "learning_rate": 4.557141006697109e-05,
      "loss": 2.8481,
      "step": 189449
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2132647037506104,
      "learning_rate": 4.5569242731964496e-05,
      "loss": 2.7961,
      "step": 189450
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9418931007385254,
      "learning_rate": 4.5567075444261325e-05,
      "loss": 3.0228,
      "step": 189451
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6119282245635986,
      "learning_rate": 4.5564908203862084e-05,
      "loss": 2.9169,
      "step": 189452
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8487355709075928,
      "learning_rate": 4.5562741010767065e-05,
      "loss": 2.9755,
      "step": 189453
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4891908168792725,
      "learning_rate": 4.556057386497681e-05,
      "loss": 3.031,
      "step": 189454
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9285874366760254,
      "learning_rate": 4.555840676649165e-05,
      "loss": 3.0553,
      "step": 189455
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2054972648620605,
      "learning_rate": 4.555623971531198e-05,
      "loss": 2.8137,
      "step": 189456
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.888016700744629,
      "learning_rate": 4.555407271143814e-05,
      "loss": 2.9318,
      "step": 189457
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.61600399017334,
      "learning_rate": 4.555190575487069e-05,
      "loss": 2.9566,
      "step": 189458
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.234954357147217,
      "learning_rate": 4.554973884560983e-05,
      "loss": 3.0299,
      "step": 189459
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4099104404449463,
      "learning_rate": 4.5547571983656195e-05,
      "loss": 2.8668,
      "step": 189460
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.319352149963379,
      "learning_rate": 4.554540516901005e-05,
      "loss": 3.0971,
      "step": 189461
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.046755313873291,
      "learning_rate": 4.55432384016718e-05,
      "loss": 2.8131,
      "step": 189462
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4548633098602295,
      "learning_rate": 4.5541071681641806e-05,
      "loss": 2.9352,
      "step": 189463
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.439206838607788,
      "learning_rate": 4.55389050089206e-05,
      "loss": 2.9457,
      "step": 189464
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.224806785583496,
      "learning_rate": 4.5536738383508456e-05,
      "loss": 2.8565,
      "step": 189465
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.516899824142456,
      "learning_rate": 4.5534571805405896e-05,
      "loss": 3.0237,
      "step": 189466
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.633286237716675,
      "learning_rate": 4.553240527461326e-05,
      "loss": 2.967,
      "step": 189467
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7117886543273926,
      "learning_rate": 4.5530238791130945e-05,
      "loss": 2.887,
      "step": 189468
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3935558795928955,
      "learning_rate": 4.552807235495932e-05,
      "loss": 3.2962,
      "step": 189469
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.456576108932495,
      "learning_rate": 4.552590596609889e-05,
      "loss": 2.9565,
      "step": 189470
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9189205169677734,
      "learning_rate": 4.5523739624549904e-05,
      "loss": 2.9126,
      "step": 189471
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3643245697021484,
      "learning_rate": 4.5521573330312947e-05,
      "loss": 3.0484,
      "step": 189472
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9231531620025635,
      "learning_rate": 4.5519407083388336e-05,
      "loss": 2.9638,
      "step": 189473
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2716259956359863,
      "learning_rate": 4.551724088377642e-05,
      "loss": 2.5718,
      "step": 189474
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1976752281188965,
      "learning_rate": 4.5515074731477687e-05,
      "loss": 3.0305,
      "step": 189475
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.035029172897339,
      "learning_rate": 4.551290862649251e-05,
      "loss": 3.0221,
      "step": 189476
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.335643768310547,
      "learning_rate": 4.5510742568821214e-05,
      "loss": 2.7871,
      "step": 189477
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6766552925109863,
      "learning_rate": 4.5508576558464375e-05,
      "loss": 2.9958,
      "step": 189478
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0626542568206787,
      "learning_rate": 4.550641059542228e-05,
      "loss": 3.1391,
      "step": 189479
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.5054922103881836,
      "learning_rate": 4.550424467969528e-05,
      "loss": 2.7862,
      "step": 189480
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.33599328994751,
      "learning_rate": 4.5502078811283894e-05,
      "loss": 2.9308,
      "step": 189481
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.167548418045044,
      "learning_rate": 4.549991299018843e-05,
      "loss": 2.8824,
      "step": 189482
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.282623052597046,
      "learning_rate": 4.5497747216409374e-05,
      "loss": 3.0737,
      "step": 189483
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.824498176574707,
      "learning_rate": 4.549558148994714e-05,
      "loss": 2.9821,
      "step": 189484
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.233356475830078,
      "learning_rate": 4.5493415810802056e-05,
      "loss": 3.2285,
      "step": 189485
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3409764766693115,
      "learning_rate": 4.5491250178974456e-05,
      "loss": 2.9508,
      "step": 189486
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3394932746887207,
      "learning_rate": 4.548908459446494e-05,
      "loss": 2.8975,
      "step": 189487
    },
    {
      "epoch": 2.47,
      "grad_norm": 5.625258445739746,
      "learning_rate": 4.5486919057273744e-05,
      "loss": 3.0598,
      "step": 189488
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9991698265075684,
      "learning_rate": 4.548475356740136e-05,
      "loss": 3.1193,
      "step": 189489
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.244065999984741,
      "learning_rate": 4.5482588124848216e-05,
      "loss": 3.1429,
      "step": 189490
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1385490894317627,
      "learning_rate": 4.548042272961463e-05,
      "loss": 2.7813,
      "step": 189491
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.811906099319458,
      "learning_rate": 4.547825738170099e-05,
      "loss": 3.1158,
      "step": 189492
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.52103853225708,
      "learning_rate": 4.54760920811078e-05,
      "loss": 2.928,
      "step": 189493
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7140755653381348,
      "learning_rate": 4.5473926827835314e-05,
      "loss": 3.1747,
      "step": 189494
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3673505783081055,
      "learning_rate": 4.547176162188415e-05,
      "loss": 3.0739,
      "step": 189495
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3600409030914307,
      "learning_rate": 4.54695964632545e-05,
      "loss": 2.9922,
      "step": 189496
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4678218364715576,
      "learning_rate": 4.546743135194699e-05,
      "loss": 2.8618,
      "step": 189497
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.836965560913086,
      "learning_rate": 4.5465266287961766e-05,
      "loss": 2.8829,
      "step": 189498
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6774206161499023,
      "learning_rate": 4.5463101271299386e-05,
      "loss": 2.8335,
      "step": 189499
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1925272941589355,
      "learning_rate": 4.546093630196018e-05,
      "loss": 2.9197,
      "step": 189500
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.750154733657837,
      "learning_rate": 4.545877137994466e-05,
      "loss": 3.0603,
      "step": 189501
    },
    {
      "epoch": 2.47,
      "grad_norm": 5.278378963470459,
      "learning_rate": 4.545660650525308e-05,
      "loss": 2.9999,
      "step": 189502
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.018540382385254,
      "learning_rate": 4.545444167788601e-05,
      "loss": 2.8777,
      "step": 189503
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8749680519104004,
      "learning_rate": 4.5452276897843756e-05,
      "loss": 2.9777,
      "step": 189504
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2625181674957275,
      "learning_rate": 4.545011216512671e-05,
      "loss": 3.0698,
      "step": 189505
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5443050861358643,
      "learning_rate": 4.5447947479735234e-05,
      "loss": 2.988,
      "step": 189506
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7100889682769775,
      "learning_rate": 4.5445782841669875e-05,
      "loss": 2.908,
      "step": 189507
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.730881690979004,
      "learning_rate": 4.544361825093086e-05,
      "loss": 2.8666,
      "step": 189508
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.04410982131958,
      "learning_rate": 4.544145370751878e-05,
      "loss": 2.7513,
      "step": 189509
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0269150733947754,
      "learning_rate": 4.543928921143392e-05,
      "loss": 3.0165,
      "step": 189510
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.968611240386963,
      "learning_rate": 4.543712476267669e-05,
      "loss": 2.7619,
      "step": 189511
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.705047369003296,
      "learning_rate": 4.543496036124744e-05,
      "loss": 3.0036,
      "step": 189512
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7014808654785156,
      "learning_rate": 4.54327960071467e-05,
      "loss": 2.7643,
      "step": 189513
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8232181072235107,
      "learning_rate": 4.5430631700374766e-05,
      "loss": 3.0896,
      "step": 189514
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6724233627319336,
      "learning_rate": 4.542846744093214e-05,
      "loss": 2.6648,
      "step": 189515
    },
    {
      "epoch": 2.47,
      "grad_norm": 5.483142852783203,
      "learning_rate": 4.542630322881915e-05,
      "loss": 3.0008,
      "step": 189516
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.656338691711426,
      "learning_rate": 4.542413906403624e-05,
      "loss": 2.7898,
      "step": 189517
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9288578033447266,
      "learning_rate": 4.5421974946583695e-05,
      "loss": 2.8453,
      "step": 189518
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0027990341186523,
      "learning_rate": 4.541981087646209e-05,
      "loss": 2.8374,
      "step": 189519
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8827877044677734,
      "learning_rate": 4.5417646853671696e-05,
      "loss": 2.9787,
      "step": 189520
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3159844875335693,
      "learning_rate": 4.5415482878213006e-05,
      "loss": 3.1232,
      "step": 189521
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4129652976989746,
      "learning_rate": 4.541331895008639e-05,
      "loss": 3.149,
      "step": 189522
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7595512866973877,
      "learning_rate": 4.541115506929227e-05,
      "loss": 2.8657,
      "step": 189523
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3788530826568604,
      "learning_rate": 4.540899123583093e-05,
      "loss": 3.0265,
      "step": 189524
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2035372257232666,
      "learning_rate": 4.540682744970292e-05,
      "loss": 2.8071,
      "step": 189525
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.856391191482544,
      "learning_rate": 4.5404663710908554e-05,
      "loss": 3.1518,
      "step": 189526
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.835989236831665,
      "learning_rate": 4.5402500019448315e-05,
      "loss": 2.9641,
      "step": 189527
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1549391746520996,
      "learning_rate": 4.5400336375322554e-05,
      "loss": 2.9427,
      "step": 189528
    },
    {
      "epoch": 2.47,
      "grad_norm": 5.15808629989624,
      "learning_rate": 4.53981727785317e-05,
      "loss": 3.014,
      "step": 189529
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3358254432678223,
      "learning_rate": 4.539600922907604e-05,
      "loss": 3.2917,
      "step": 189530
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.212921142578125,
      "learning_rate": 4.539384572695615e-05,
      "loss": 3.0177,
      "step": 189531
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9990572929382324,
      "learning_rate": 4.539168227217226e-05,
      "loss": 3.1801,
      "step": 189532
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.866703510284424,
      "learning_rate": 4.538951886472494e-05,
      "loss": 3.0986,
      "step": 189533
    },
    {
      "epoch": 2.47,
      "grad_norm": 5.344903469085693,
      "learning_rate": 4.538735550461452e-05,
      "loss": 3.0997,
      "step": 189534
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8887991905212402,
      "learning_rate": 4.53851921918414e-05,
      "loss": 2.9006,
      "step": 189535
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.532346487045288,
      "learning_rate": 4.538302892640588e-05,
      "loss": 2.9031,
      "step": 189536
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4698867797851562,
      "learning_rate": 4.538086570830857e-05,
      "loss": 3.0108,
      "step": 189537
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.611690044403076,
      "learning_rate": 4.5378702537549685e-05,
      "loss": 3.0178,
      "step": 189538
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9151852130889893,
      "learning_rate": 4.5376539414129765e-05,
      "loss": 3.0424,
      "step": 189539
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1448957920074463,
      "learning_rate": 4.5374376338049147e-05,
      "loss": 3.1162,
      "step": 189540
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.910022497177124,
      "learning_rate": 4.537221330930826e-05,
      "loss": 2.9019,
      "step": 189541
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.596435785293579,
      "learning_rate": 4.5370050327907404e-05,
      "loss": 2.6114,
      "step": 189542
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.477750539779663,
      "learning_rate": 4.5367887393847147e-05,
      "loss": 3.0374,
      "step": 189543
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9920129776000977,
      "learning_rate": 4.536572450712772e-05,
      "loss": 2.8544,
      "step": 189544
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.605311155319214,
      "learning_rate": 4.536356166774966e-05,
      "loss": 2.8949,
      "step": 189545
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.681262969970703,
      "learning_rate": 4.5361398875713365e-05,
      "loss": 2.7579,
      "step": 189546
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9490365982055664,
      "learning_rate": 4.535923613101916e-05,
      "loss": 2.9,
      "step": 189547
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7597076892852783,
      "learning_rate": 4.5357073433667424e-05,
      "loss": 2.8117,
      "step": 189548
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.091813564300537,
      "learning_rate": 4.535491078365868e-05,
      "loss": 3.0643,
      "step": 189549
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8518502712249756,
      "learning_rate": 4.53527481809932e-05,
      "loss": 2.8259,
      "step": 189550
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8106095790863037,
      "learning_rate": 4.535058562567152e-05,
      "loss": 2.6358,
      "step": 189551
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.230133056640625,
      "learning_rate": 4.5348423117693965e-05,
      "loss": 3.132,
      "step": 189552
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.861835479736328,
      "learning_rate": 4.534626065706094e-05,
      "loss": 2.8437,
      "step": 189553
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.377789258956909,
      "learning_rate": 4.534409824377281e-05,
      "loss": 2.8461,
      "step": 189554
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.520904302597046,
      "learning_rate": 4.534193587783007e-05,
      "loss": 2.8576,
      "step": 189555
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9856629371643066,
      "learning_rate": 4.5339773559232994e-05,
      "loss": 2.9206,
      "step": 189556
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7885022163391113,
      "learning_rate": 4.533761128798214e-05,
      "loss": 2.9864,
      "step": 189557
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8113558292388916,
      "learning_rate": 4.533544906407785e-05,
      "loss": 2.8176,
      "step": 189558
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7483534812927246,
      "learning_rate": 4.533328688752043e-05,
      "loss": 2.8815,
      "step": 189559
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.006659746170044,
      "learning_rate": 4.533112475831039e-05,
      "loss": 3.1037,
      "step": 189560
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0470998287200928,
      "learning_rate": 4.5328962676448144e-05,
      "loss": 2.7706,
      "step": 189561
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.157433271408081,
      "learning_rate": 4.5326800641933956e-05,
      "loss": 2.8038,
      "step": 189562
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7933943271636963,
      "learning_rate": 4.5324638654768395e-05,
      "loss": 3.0438,
      "step": 189563
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8569893836975098,
      "learning_rate": 4.532247671495179e-05,
      "loss": 3.0068,
      "step": 189564
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.834322929382324,
      "learning_rate": 4.5320314822484515e-05,
      "loss": 2.894,
      "step": 189565
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.833287477493286,
      "learning_rate": 4.531815297736703e-05,
      "loss": 2.807,
      "step": 189566
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.529527425765991,
      "learning_rate": 4.531599117959963e-05,
      "loss": 2.9047,
      "step": 189567
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2269785404205322,
      "learning_rate": 4.531382942918289e-05,
      "loss": 2.9268,
      "step": 189568
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5093228816986084,
      "learning_rate": 4.531166772611711e-05,
      "loss": 3.0322,
      "step": 189569
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8268561363220215,
      "learning_rate": 4.530950607040265e-05,
      "loss": 2.7475,
      "step": 189570
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3746421337127686,
      "learning_rate": 4.5307344462040006e-05,
      "loss": 3.1682,
      "step": 189571
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.875997543334961,
      "learning_rate": 4.530518290102952e-05,
      "loss": 2.9007,
      "step": 189572
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.110483169555664,
      "learning_rate": 4.5303021387371566e-05,
      "loss": 2.8895,
      "step": 189573
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8241405487060547,
      "learning_rate": 4.530085992106662e-05,
      "loss": 2.9505,
      "step": 189574
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.5850887298583984,
      "learning_rate": 4.52986985021151e-05,
      "loss": 2.9584,
      "step": 189575
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.749418020248413,
      "learning_rate": 4.529653713051727e-05,
      "loss": 3.0368,
      "step": 189576
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.247983694076538,
      "learning_rate": 4.52943758062737e-05,
      "loss": 2.7415,
      "step": 189577
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.915898323059082,
      "learning_rate": 4.5292214529384674e-05,
      "loss": 2.9588,
      "step": 189578
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1266379356384277,
      "learning_rate": 4.5290053299850605e-05,
      "loss": 3.0999,
      "step": 189579
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.754847288131714,
      "learning_rate": 4.528789211767199e-05,
      "loss": 2.7404,
      "step": 189580
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7979817390441895,
      "learning_rate": 4.528573098284909e-05,
      "loss": 2.9685,
      "step": 189581
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.187497138977051,
      "learning_rate": 4.5283569895382445e-05,
      "loss": 2.9684,
      "step": 189582
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4492838382720947,
      "learning_rate": 4.528140885527238e-05,
      "loss": 3.0343,
      "step": 189583
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0254313945770264,
      "learning_rate": 4.5279247862519305e-05,
      "loss": 2.8626,
      "step": 189584
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0776374340057373,
      "learning_rate": 4.5277086917123584e-05,
      "loss": 2.8952,
      "step": 189585
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8705201148986816,
      "learning_rate": 4.527492601908571e-05,
      "loss": 2.9146,
      "step": 189586
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.108887195587158,
      "learning_rate": 4.527276516840599e-05,
      "loss": 2.9535,
      "step": 189587
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.9419758319854736,
      "learning_rate": 4.527060436508492e-05,
      "loss": 3.0654,
      "step": 189588
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2762296199798584,
      "learning_rate": 4.526844360912286e-05,
      "loss": 3.0322,
      "step": 189589
    },
    {
      "epoch": 2.47,
      "grad_norm": 5.050664901733398,
      "learning_rate": 4.5266282900520226e-05,
      "loss": 2.8332,
      "step": 189590
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8295419216156006,
      "learning_rate": 4.52641222392773e-05,
      "loss": 2.7906,
      "step": 189591
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4964122772216797,
      "learning_rate": 4.526196162539466e-05,
      "loss": 2.9983,
      "step": 189592
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8059632778167725,
      "learning_rate": 4.5259801058872536e-05,
      "loss": 2.9322,
      "step": 189593
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3831000328063965,
      "learning_rate": 4.5257640539711526e-05,
      "loss": 3.0664,
      "step": 189594
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.9798943996429443,
      "learning_rate": 4.525548006791193e-05,
      "loss": 2.9168,
      "step": 189595
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9132115840911865,
      "learning_rate": 4.525331964347412e-05,
      "loss": 2.9161,
      "step": 189596
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7379891872406006,
      "learning_rate": 4.525115926639848e-05,
      "loss": 3.0557,
      "step": 189597
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.5917341709136963,
      "learning_rate": 4.524899893668553e-05,
      "loss": 2.9684,
      "step": 189598
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0593206882476807,
      "learning_rate": 4.5246838654335526e-05,
      "loss": 3.06,
      "step": 189599
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8269460201263428,
      "learning_rate": 4.5244678419349e-05,
      "loss": 2.8726,
      "step": 189600
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.929680824279785,
      "learning_rate": 4.524251823172632e-05,
      "loss": 2.8633,
      "step": 189601
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2198009490966797,
      "learning_rate": 4.5240358091467856e-05,
      "loss": 3.2607,
      "step": 189602
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.372305154800415,
      "learning_rate": 4.523819799857393e-05,
      "loss": 2.7934,
      "step": 189603
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.138242244720459,
      "learning_rate": 4.5236037953045124e-05,
      "loss": 2.7586,
      "step": 189604
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.634964942932129,
      "learning_rate": 4.523387795488169e-05,
      "loss": 2.8617,
      "step": 189605
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6921303272247314,
      "learning_rate": 4.523171800408414e-05,
      "loss": 3.0198,
      "step": 189606
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.64208722114563,
      "learning_rate": 4.52295581006528e-05,
      "loss": 2.924,
      "step": 189607
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.616607666015625,
      "learning_rate": 4.522739824458813e-05,
      "loss": 3.1834,
      "step": 189608
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.421773910522461,
      "learning_rate": 4.522523843589041e-05,
      "loss": 2.9475,
      "step": 189609
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0263562202453613,
      "learning_rate": 4.52230786745602e-05,
      "loss": 2.9462,
      "step": 189610
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.068976640701294,
      "learning_rate": 4.522091896059776e-05,
      "loss": 3.1673,
      "step": 189611
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.029712200164795,
      "learning_rate": 4.52187592940036e-05,
      "loss": 2.9938,
      "step": 189612
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.942641019821167,
      "learning_rate": 4.521659967477812e-05,
      "loss": 2.8206,
      "step": 189613
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.908543825149536,
      "learning_rate": 4.521444010292168e-05,
      "loss": 2.9439,
      "step": 189614
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6973469257354736,
      "learning_rate": 4.521228057843461e-05,
      "loss": 3.0635,
      "step": 189615
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.143096923828125,
      "learning_rate": 4.521012110131745e-05,
      "loss": 2.9131,
      "step": 189616
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4165496826171875,
      "learning_rate": 4.520796167157047e-05,
      "loss": 2.8761,
      "step": 189617
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.00736403465271,
      "learning_rate": 4.5205802289194225e-05,
      "loss": 3.0171,
      "step": 189618
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4639039039611816,
      "learning_rate": 4.520364295418902e-05,
      "loss": 2.9573,
      "step": 189619
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1049041748046875,
      "learning_rate": 4.520148366655526e-05,
      "loss": 2.8583,
      "step": 189620
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.861989974975586,
      "learning_rate": 4.5199324426293303e-05,
      "loss": 3.1744,
      "step": 189621
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4963531494140625,
      "learning_rate": 4.5197165233403655e-05,
      "loss": 2.6904,
      "step": 189622
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.700085163116455,
      "learning_rate": 4.519500608788657e-05,
      "loss": 2.8102,
      "step": 189623
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.558582305908203,
      "learning_rate": 4.519284698974267e-05,
      "loss": 2.8054,
      "step": 189624
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3764679431915283,
      "learning_rate": 4.5190687938972206e-05,
      "loss": 3.182,
      "step": 189625
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.006889343261719,
      "learning_rate": 4.518852893557558e-05,
      "loss": 3.1523,
      "step": 189626
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6183364391326904,
      "learning_rate": 4.518636997955315e-05,
      "loss": 3.1263,
      "step": 189627
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8628251552581787,
      "learning_rate": 4.5184211070905464e-05,
      "loss": 2.8816,
      "step": 189628
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.880552291870117,
      "learning_rate": 4.518205220963278e-05,
      "loss": 3.1629,
      "step": 189629
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6468093395233154,
      "learning_rate": 4.5179893395735634e-05,
      "loss": 2.8004,
      "step": 189630
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3384952545166016,
      "learning_rate": 4.517773462921436e-05,
      "loss": 3.1396,
      "step": 189631
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5509135723114014,
      "learning_rate": 4.517557591006935e-05,
      "loss": 2.9187,
      "step": 189632
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.871856689453125,
      "learning_rate": 4.517341723830091e-05,
      "loss": 3.2708,
      "step": 189633
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9570186138153076,
      "learning_rate": 4.5171258613909643e-05,
      "loss": 2.9984,
      "step": 189634
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4853153228759766,
      "learning_rate": 4.516910003689578e-05,
      "loss": 2.8678,
      "step": 189635
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.399052143096924,
      "learning_rate": 4.516694150725988e-05,
      "loss": 2.9579,
      "step": 189636
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.40838885307312,
      "learning_rate": 4.516478302500215e-05,
      "loss": 3.0308,
      "step": 189637
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.912343740463257,
      "learning_rate": 4.516262459012325e-05,
      "loss": 2.9351,
      "step": 189638
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.52490496635437,
      "learning_rate": 4.5160466202623294e-05,
      "loss": 3.078,
      "step": 189639
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.428234100341797,
      "learning_rate": 4.515830786250286e-05,
      "loss": 2.8295,
      "step": 189640
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1948044300079346,
      "learning_rate": 4.515614956976227e-05,
      "loss": 2.968,
      "step": 189641
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2501864433288574,
      "learning_rate": 4.515399132440204e-05,
      "loss": 2.9354,
      "step": 189642
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9443421363830566,
      "learning_rate": 4.515183312642238e-05,
      "loss": 2.9718,
      "step": 189643
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5184545516967773,
      "learning_rate": 4.5149674975823915e-05,
      "loss": 2.7672,
      "step": 189644
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8307180404663086,
      "learning_rate": 4.5147516872606916e-05,
      "loss": 2.9029,
      "step": 189645
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.326200246810913,
      "learning_rate": 4.5145358816771816e-05,
      "loss": 2.7857,
      "step": 189646
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.613246440887451,
      "learning_rate": 4.514320080831891e-05,
      "loss": 2.8463,
      "step": 189647
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.6844370365142822,
      "learning_rate": 4.514104284724877e-05,
      "loss": 2.8672,
      "step": 189648
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.632162570953369,
      "learning_rate": 4.513888493356167e-05,
      "loss": 2.7705,
      "step": 189649
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2597107887268066,
      "learning_rate": 4.513672706725813e-05,
      "loss": 2.8763,
      "step": 189650
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4736392498016357,
      "learning_rate": 4.513456924833848e-05,
      "loss": 2.8895,
      "step": 189651
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.792158603668213,
      "learning_rate": 4.513241147680313e-05,
      "loss": 3.0622,
      "step": 189652
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7444534301757812,
      "learning_rate": 4.513025375265241e-05,
      "loss": 2.7061,
      "step": 189653
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.18587589263916,
      "learning_rate": 4.512809607588682e-05,
      "loss": 2.894,
      "step": 189654
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.323802471160889,
      "learning_rate": 4.5125938446506694e-05,
      "loss": 2.9853,
      "step": 189655
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.724853038787842,
      "learning_rate": 4.5123780864512524e-05,
      "loss": 2.9467,
      "step": 189656
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.5597915649414062,
      "learning_rate": 4.512162332990466e-05,
      "loss": 2.877,
      "step": 189657
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0324628353118896,
      "learning_rate": 4.511946584268341e-05,
      "loss": 3.0165,
      "step": 189658
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.816054344177246,
      "learning_rate": 4.511730840284936e-05,
      "loss": 2.8827,
      "step": 189659
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.211059331893921,
      "learning_rate": 4.511515101040281e-05,
      "loss": 2.9826,
      "step": 189660
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.795689105987549,
      "learning_rate": 4.5112993665344076e-05,
      "loss": 2.866,
      "step": 189661
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6448824405670166,
      "learning_rate": 4.511083636767374e-05,
      "loss": 2.6999,
      "step": 189662
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0907785892486572,
      "learning_rate": 4.51086791173921e-05,
      "loss": 2.8463,
      "step": 189663
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7813174724578857,
      "learning_rate": 4.5106521914499484e-05,
      "loss": 2.8831,
      "step": 189664
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7476394176483154,
      "learning_rate": 4.510436475899646e-05,
      "loss": 3.155,
      "step": 189665
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6329636573791504,
      "learning_rate": 4.510220765088326e-05,
      "loss": 2.8917,
      "step": 189666
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.9435484409332275,
      "learning_rate": 4.510005059016045e-05,
      "loss": 3.0348,
      "step": 189667
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2959821224212646,
      "learning_rate": 4.509789357682837e-05,
      "loss": 2.7948,
      "step": 189668
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9647395610809326,
      "learning_rate": 4.509573661088741e-05,
      "loss": 2.9499,
      "step": 189669
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.2920639514923096,
      "learning_rate": 4.5093579692337843e-05,
      "loss": 2.7109,
      "step": 189670
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.972513437271118,
      "learning_rate": 4.50914228211803e-05,
      "loss": 2.8743,
      "step": 189671
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5524158477783203,
      "learning_rate": 4.5089265997415014e-05,
      "loss": 3.0778,
      "step": 189672
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.446044921875,
      "learning_rate": 4.5087109221042486e-05,
      "loss": 3.2351,
      "step": 189673
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.092395305633545,
      "learning_rate": 4.508495249206311e-05,
      "loss": 3.0633,
      "step": 189674
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7239222526550293,
      "learning_rate": 4.508279581047722e-05,
      "loss": 2.9868,
      "step": 189675
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7905540466308594,
      "learning_rate": 4.508063917628519e-05,
      "loss": 3.1138,
      "step": 189676
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.672743558883667,
      "learning_rate": 4.5078482589487555e-05,
      "loss": 2.7059,
      "step": 189677
    },
    {
      "epoch": 2.47,
      "grad_norm": 5.023458957672119,
      "learning_rate": 4.507632605008457e-05,
      "loss": 2.8919,
      "step": 189678
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9672398567199707,
      "learning_rate": 4.507416955807677e-05,
      "loss": 3.0521,
      "step": 189679
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.613314628601074,
      "learning_rate": 4.507201311346452e-05,
      "loss": 3.0011,
      "step": 189680
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4329848289489746,
      "learning_rate": 4.506985671624817e-05,
      "loss": 2.9448,
      "step": 189681
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.958836555480957,
      "learning_rate": 4.5067700366428095e-05,
      "loss": 2.8243,
      "step": 189682
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9263298511505127,
      "learning_rate": 4.5065544064004775e-05,
      "loss": 2.8751,
      "step": 189683
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0992555618286133,
      "learning_rate": 4.5063387808978546e-05,
      "loss": 2.7538,
      "step": 189684
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6030044555664062,
      "learning_rate": 4.5061231601349936e-05,
      "loss": 3.055,
      "step": 189685
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.172538757324219,
      "learning_rate": 4.505907544111921e-05,
      "loss": 2.8134,
      "step": 189686
    },
    {
      "epoch": 2.47,
      "grad_norm": 5.055494785308838,
      "learning_rate": 4.5056919328286834e-05,
      "loss": 2.776,
      "step": 189687
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.776949167251587,
      "learning_rate": 4.505476326285314e-05,
      "loss": 2.8909,
      "step": 189688
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.958977699279785,
      "learning_rate": 4.5052607244818606e-05,
      "loss": 2.8338,
      "step": 189689
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1124000549316406,
      "learning_rate": 4.505045127418359e-05,
      "loss": 2.6861,
      "step": 189690
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7551400661468506,
      "learning_rate": 4.504829535094852e-05,
      "loss": 3.0444,
      "step": 189691
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4468801021575928,
      "learning_rate": 4.504613947511383e-05,
      "loss": 2.9531,
      "step": 189692
    },
    {
      "epoch": 2.47,
      "grad_norm": 5.852250099182129,
      "learning_rate": 4.504398364667986e-05,
      "loss": 2.9175,
      "step": 189693
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1376702785491943,
      "learning_rate": 4.504182786564694e-05,
      "loss": 2.8088,
      "step": 189694
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.06174373626709,
      "learning_rate": 4.503967213201563e-05,
      "loss": 3.1057,
      "step": 189695
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8624801635742188,
      "learning_rate": 4.503751644578622e-05,
      "loss": 2.8433,
      "step": 189696
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.133147716522217,
      "learning_rate": 4.503536080695918e-05,
      "loss": 2.6662,
      "step": 189697
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.06028151512146,
      "learning_rate": 4.503320521553489e-05,
      "loss": 2.8459,
      "step": 189698
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6610107421875,
      "learning_rate": 4.503104967151374e-05,
      "loss": 2.8679,
      "step": 189699
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.0070295333862305,
      "learning_rate": 4.5028894174896077e-05,
      "loss": 2.7756,
      "step": 189700
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.610919713973999,
      "learning_rate": 4.50267387256824e-05,
      "loss": 2.8394,
      "step": 189701
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1367273330688477,
      "learning_rate": 4.5024583323873e-05,
      "loss": 2.9351,
      "step": 189702
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3093454837799072,
      "learning_rate": 4.502242796946844e-05,
      "loss": 3.0264,
      "step": 189703
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.970073938369751,
      "learning_rate": 4.502027266246894e-05,
      "loss": 3.0006,
      "step": 189704
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.890315532684326,
      "learning_rate": 4.501811740287511e-05,
      "loss": 2.9823,
      "step": 189705
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6025679111480713,
      "learning_rate": 4.501596219068709e-05,
      "loss": 2.9973,
      "step": 189706
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.794933795928955,
      "learning_rate": 4.501380702590549e-05,
      "loss": 2.9498,
      "step": 189707
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0215635299682617,
      "learning_rate": 4.501165190853057e-05,
      "loss": 3.058,
      "step": 189708
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2807624340057373,
      "learning_rate": 4.5009496838562854e-05,
      "loss": 3.1994,
      "step": 189709
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4086289405822754,
      "learning_rate": 4.500734181600262e-05,
      "loss": 3.0979,
      "step": 189710
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9476006031036377,
      "learning_rate": 4.500518684085049e-05,
      "loss": 2.9767,
      "step": 189711
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8225197792053223,
      "learning_rate": 4.5003031913106546e-05,
      "loss": 2.9636,
      "step": 189712
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5132339000701904,
      "learning_rate": 4.500087703277144e-05,
      "loss": 2.7237,
      "step": 189713
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1159965991973877,
      "learning_rate": 4.4998722199845417e-05,
      "loss": 2.7747,
      "step": 189714
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.6828503608703613,
      "learning_rate": 4.4996567414329e-05,
      "loss": 2.7574,
      "step": 189715
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.188021659851074,
      "learning_rate": 4.499441267622249e-05,
      "loss": 3.0179,
      "step": 189716
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.994196653366089,
      "learning_rate": 4.499225798552646e-05,
      "loss": 2.979,
      "step": 189717
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.870944976806641,
      "learning_rate": 4.4990103342241036e-05,
      "loss": 3.0492,
      "step": 189718
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8362483978271484,
      "learning_rate": 4.4987948746366817e-05,
      "loss": 2.8843,
      "step": 189719
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9423956871032715,
      "learning_rate": 4.4985794197904114e-05,
      "loss": 2.7255,
      "step": 189720
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.569260835647583,
      "learning_rate": 4.498363969685341e-05,
      "loss": 3.2448,
      "step": 189721
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4661145210266113,
      "learning_rate": 4.498148524321502e-05,
      "loss": 3.0045,
      "step": 189722
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.623992443084717,
      "learning_rate": 4.49793308369895e-05,
      "loss": 3.0516,
      "step": 189723
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.432562828063965,
      "learning_rate": 4.497717647817702e-05,
      "loss": 2.938,
      "step": 189724
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5311834812164307,
      "learning_rate": 4.4975022166778154e-05,
      "loss": 2.8511,
      "step": 189725
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5529298782348633,
      "learning_rate": 4.4972867902793154e-05,
      "loss": 2.7399,
      "step": 189726
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.9516618251800537,
      "learning_rate": 4.4970713686222626e-05,
      "loss": 3.1292,
      "step": 189727
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.410527229309082,
      "learning_rate": 4.496855951706674e-05,
      "loss": 3.0739,
      "step": 189728
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.6302130222320557,
      "learning_rate": 4.4966405395326123e-05,
      "loss": 3.0442,
      "step": 189729
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4057812690734863,
      "learning_rate": 4.4964251321001043e-05,
      "loss": 2.9132,
      "step": 189730
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.255833625793457,
      "learning_rate": 4.496209729409194e-05,
      "loss": 2.9848,
      "step": 189731
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3565893173217773,
      "learning_rate": 4.49599433145991e-05,
      "loss": 3.0888,
      "step": 189732
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.624960422515869,
      "learning_rate": 4.49577893825231e-05,
      "loss": 2.9065,
      "step": 189733
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.806229829788208,
      "learning_rate": 4.4955635497864204e-05,
      "loss": 2.7307,
      "step": 189734
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.889173984527588,
      "learning_rate": 4.495348166062295e-05,
      "loss": 2.907,
      "step": 189735
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.437105178833008,
      "learning_rate": 4.4951327870799626e-05,
      "loss": 2.9801,
      "step": 189736
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7208480834960938,
      "learning_rate": 4.494917412839467e-05,
      "loss": 2.739,
      "step": 189737
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.463121175765991,
      "learning_rate": 4.494702043340842e-05,
      "loss": 2.6237,
      "step": 189738
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8964192867279053,
      "learning_rate": 4.494486678584141e-05,
      "loss": 3.0958,
      "step": 189739
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.817373037338257,
      "learning_rate": 4.494271318569389e-05,
      "loss": 2.8677,
      "step": 189740
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.461765766143799,
      "learning_rate": 4.494055963296641e-05,
      "loss": 2.8869,
      "step": 189741
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.712930202484131,
      "learning_rate": 4.493840612765926e-05,
      "loss": 3.0705,
      "step": 189742
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.103128910064697,
      "learning_rate": 4.493625266977285e-05,
      "loss": 3.0726,
      "step": 189743
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0303761959075928,
      "learning_rate": 4.493409925930764e-05,
      "loss": 3.1338,
      "step": 189744
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.783916711807251,
      "learning_rate": 4.4931945896264e-05,
      "loss": 3.113,
      "step": 189745
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.318121910095215,
      "learning_rate": 4.492979258064229e-05,
      "loss": 2.8398,
      "step": 189746
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6759631633758545,
      "learning_rate": 4.4927639312442976e-05,
      "loss": 3.0968,
      "step": 189747
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.3758442401885986,
      "learning_rate": 4.4925486091666465e-05,
      "loss": 2.9058,
      "step": 189748
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.508434295654297,
      "learning_rate": 4.492333291831302e-05,
      "loss": 2.9547,
      "step": 189749
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.511094331741333,
      "learning_rate": 4.49211797923832e-05,
      "loss": 2.9868,
      "step": 189750
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.710977554321289,
      "learning_rate": 4.491902671387738e-05,
      "loss": 3.0075,
      "step": 189751
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.833416700363159,
      "learning_rate": 4.4916873682795864e-05,
      "loss": 2.9902,
      "step": 189752
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2412314414978027,
      "learning_rate": 4.491472069913917e-05,
      "loss": 2.9518,
      "step": 189753
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.178515672683716,
      "learning_rate": 4.4912567762907645e-05,
      "loss": 2.9875,
      "step": 189754
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.151052951812744,
      "learning_rate": 4.491041487410162e-05,
      "loss": 2.9599,
      "step": 189755
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2024385929107666,
      "learning_rate": 4.490826203272162e-05,
      "loss": 2.9698,
      "step": 189756
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.15925931930542,
      "learning_rate": 4.490610923876792e-05,
      "loss": 3.1134,
      "step": 189757
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.787808895111084,
      "learning_rate": 4.4903956492241077e-05,
      "loss": 3.0747,
      "step": 189758
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0868990421295166,
      "learning_rate": 4.49018037931414e-05,
      "loss": 3.1385,
      "step": 189759
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.671647787094116,
      "learning_rate": 4.489965114146929e-05,
      "loss": 2.9805,
      "step": 189760
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.176783084869385,
      "learning_rate": 4.489749853722509e-05,
      "loss": 2.9348,
      "step": 189761
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.424221515655518,
      "learning_rate": 4.489534598040934e-05,
      "loss": 2.7508,
      "step": 189762
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.803424835205078,
      "learning_rate": 4.489319347102227e-05,
      "loss": 2.9892,
      "step": 189763
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.637965440750122,
      "learning_rate": 4.489104100906443e-05,
      "loss": 2.9571,
      "step": 189764
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.554657220840454,
      "learning_rate": 4.488888859453619e-05,
      "loss": 2.9477,
      "step": 189765
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5061235427856445,
      "learning_rate": 4.4886736227437904e-05,
      "loss": 3.0882,
      "step": 189766
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.078977108001709,
      "learning_rate": 4.488458390776991e-05,
      "loss": 2.8671,
      "step": 189767
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.586312770843506,
      "learning_rate": 4.488243163553278e-05,
      "loss": 2.9421,
      "step": 189768
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.9660892486572266,
      "learning_rate": 4.488027941072674e-05,
      "loss": 2.8626,
      "step": 189769
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.409730911254883,
      "learning_rate": 4.4878127233352326e-05,
      "loss": 2.8756,
      "step": 189770
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.093327045440674,
      "learning_rate": 4.4875975103409835e-05,
      "loss": 2.753,
      "step": 189771
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.6723554134368896,
      "learning_rate": 4.487382302089983e-05,
      "loss": 2.6728,
      "step": 189772
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.030029773712158,
      "learning_rate": 4.487167098582249e-05,
      "loss": 2.9565,
      "step": 189773
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7954509258270264,
      "learning_rate": 4.486951899817838e-05,
      "loss": 3.1106,
      "step": 189774
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.140225648880005,
      "learning_rate": 4.4867367057967784e-05,
      "loss": 3.1841,
      "step": 189775
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4875926971435547,
      "learning_rate": 4.486521516519122e-05,
      "loss": 3.1839,
      "step": 189776
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.7718522548675537,
      "learning_rate": 4.486306331984897e-05,
      "loss": 2.6235,
      "step": 189777
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.975090503692627,
      "learning_rate": 4.4860911521941613e-05,
      "loss": 3.0571,
      "step": 189778
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.851637840270996,
      "learning_rate": 4.485875977146928e-05,
      "loss": 2.7257,
      "step": 189779
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6972410678863525,
      "learning_rate": 4.4856608068432634e-05,
      "loss": 2.9288,
      "step": 189780
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4657459259033203,
      "learning_rate": 4.485445641283184e-05,
      "loss": 3.1734,
      "step": 189781
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.009783983230591,
      "learning_rate": 4.4852304804667505e-05,
      "loss": 2.9923,
      "step": 189782
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5750374794006348,
      "learning_rate": 4.485015324393989e-05,
      "loss": 2.5713,
      "step": 189783
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.272264242172241,
      "learning_rate": 4.4848001730649595e-05,
      "loss": 2.7604,
      "step": 189784
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.987557888031006,
      "learning_rate": 4.484585026479669e-05,
      "loss": 2.9229,
      "step": 189785
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.814164161682129,
      "learning_rate": 4.484369884638187e-05,
      "loss": 2.9542,
      "step": 189786
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.094444513320923,
      "learning_rate": 4.4841547475405335e-05,
      "loss": 3.0914,
      "step": 189787
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.7022337913513184,
      "learning_rate": 4.483939615186766e-05,
      "loss": 3.0345,
      "step": 189788
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9523398876190186,
      "learning_rate": 4.483724487576906e-05,
      "loss": 2.9759,
      "step": 189789
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7741219997406006,
      "learning_rate": 4.483509364711019e-05,
      "loss": 2.9186,
      "step": 189790
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6481106281280518,
      "learning_rate": 4.483294246589113e-05,
      "loss": 3.0309,
      "step": 189791
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9721951484680176,
      "learning_rate": 4.4830791332112525e-05,
      "loss": 3.0046,
      "step": 189792
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6747140884399414,
      "learning_rate": 4.4828640245774635e-05,
      "loss": 3.3038,
      "step": 189793
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1159777641296387,
      "learning_rate": 4.482648920687797e-05,
      "loss": 2.9629,
      "step": 189794
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.850679636001587,
      "learning_rate": 4.4824338215422814e-05,
      "loss": 2.6405,
      "step": 189795
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.811915874481201,
      "learning_rate": 4.4822187271409784e-05,
      "loss": 2.9699,
      "step": 189796
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.921964645385742,
      "learning_rate": 4.4820036374838994e-05,
      "loss": 3.1049,
      "step": 189797
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8996999263763428,
      "learning_rate": 4.481788552571103e-05,
      "loss": 2.8533,
      "step": 189798
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3902032375335693,
      "learning_rate": 4.481573472402614e-05,
      "loss": 3.0673,
      "step": 189799
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1250851154327393,
      "learning_rate": 4.4813583969784936e-05,
      "loss": 2.8896,
      "step": 189800
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6873695850372314,
      "learning_rate": 4.4811433262987616e-05,
      "loss": 3.1138,
      "step": 189801
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.671862840652466,
      "learning_rate": 4.480928260363481e-05,
      "loss": 2.9823,
      "step": 189802
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.914355993270874,
      "learning_rate": 4.480713199172662e-05,
      "loss": 2.8725,
      "step": 189803
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5163300037384033,
      "learning_rate": 4.4804981427263676e-05,
      "loss": 2.7866,
      "step": 189804
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2541027069091797,
      "learning_rate": 4.480283091024624e-05,
      "loss": 3.047,
      "step": 189805
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2737255096435547,
      "learning_rate": 4.4800680440674866e-05,
      "loss": 2.9655,
      "step": 189806
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.854526996612549,
      "learning_rate": 4.479853001854976e-05,
      "loss": 2.9582,
      "step": 189807
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3003597259521484,
      "learning_rate": 4.479637964387157e-05,
      "loss": 2.9648,
      "step": 189808
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.6465535163879395,
      "learning_rate": 4.479422931664043e-05,
      "loss": 2.8059,
      "step": 189809
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.3096160888671875,
      "learning_rate": 4.4792079036856935e-05,
      "loss": 2.873,
      "step": 189810
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4701671600341797,
      "learning_rate": 4.478992880452131e-05,
      "loss": 3.075,
      "step": 189811
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.260314702987671,
      "learning_rate": 4.4787778619634144e-05,
      "loss": 2.9852,
      "step": 189812
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9064695835113525,
      "learning_rate": 4.478562848219565e-05,
      "loss": 2.927,
      "step": 189813
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3602664470672607,
      "learning_rate": 4.478347839220649e-05,
      "loss": 3.057,
      "step": 189814
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.170612335205078,
      "learning_rate": 4.4781328349666755e-05,
      "loss": 3.0029,
      "step": 189815
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.868015766143799,
      "learning_rate": 4.477917835457706e-05,
      "loss": 2.9906,
      "step": 189816
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2725071907043457,
      "learning_rate": 4.477702840693768e-05,
      "loss": 2.959,
      "step": 189817
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.702756881713867,
      "learning_rate": 4.47748785067491e-05,
      "loss": 2.7661,
      "step": 189818
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6994130611419678,
      "learning_rate": 4.477272865401164e-05,
      "loss": 2.9728,
      "step": 189819
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.753727912902832,
      "learning_rate": 4.4770578848725815e-05,
      "loss": 2.843,
      "step": 189820
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.94423508644104,
      "learning_rate": 4.4768429090891964e-05,
      "loss": 2.9205,
      "step": 189821
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8049144744873047,
      "learning_rate": 4.4766279380510464e-05,
      "loss": 2.9441,
      "step": 189822
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.050147771835327,
      "learning_rate": 4.47641297175817e-05,
      "loss": 2.8965,
      "step": 189823
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7512638568878174,
      "learning_rate": 4.476198010210611e-05,
      "loss": 2.8995,
      "step": 189824
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.108430862426758,
      "learning_rate": 4.475983053408406e-05,
      "loss": 3.121,
      "step": 189825
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.3131372928619385,
      "learning_rate": 4.4757681013516025e-05,
      "loss": 2.8946,
      "step": 189826
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8851511478424072,
      "learning_rate": 4.475553154040239e-05,
      "loss": 2.8943,
      "step": 189827
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.453913927078247,
      "learning_rate": 4.4753382114743394e-05,
      "loss": 2.6314,
      "step": 189828
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4676992893218994,
      "learning_rate": 4.4751232736539675e-05,
      "loss": 2.8175,
      "step": 189829
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2091665267944336,
      "learning_rate": 4.47490834057915e-05,
      "loss": 2.8909,
      "step": 189830
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.950197696685791,
      "learning_rate": 4.474693412249922e-05,
      "loss": 2.992,
      "step": 189831
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5127596855163574,
      "learning_rate": 4.4744784886663385e-05,
      "loss": 3.0548,
      "step": 189832
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.560816764831543,
      "learning_rate": 4.474263569828432e-05,
      "loss": 3.135,
      "step": 189833
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4541656970977783,
      "learning_rate": 4.4740486557362334e-05,
      "loss": 2.7669,
      "step": 189834
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.072726011276245,
      "learning_rate": 4.4738337463897976e-05,
      "loss": 3.03,
      "step": 189835
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2174482345581055,
      "learning_rate": 4.473618841789159e-05,
      "loss": 3.0522,
      "step": 189836
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8764426708221436,
      "learning_rate": 4.4734039419343483e-05,
      "loss": 3.0649,
      "step": 189837
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.650637149810791,
      "learning_rate": 4.47318904682542e-05,
      "loss": 2.9065,
      "step": 189838
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.736342430114746,
      "learning_rate": 4.47297415646241e-05,
      "loss": 3.1189,
      "step": 189839
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4703826904296875,
      "learning_rate": 4.47275927084535e-05,
      "loss": 2.8482,
      "step": 189840
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.788048505783081,
      "learning_rate": 4.4725443899742894e-05,
      "loss": 2.8539,
      "step": 189841
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.953899383544922,
      "learning_rate": 4.4723295138492596e-05,
      "loss": 2.8845,
      "step": 189842
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.754749059677124,
      "learning_rate": 4.47211464247031e-05,
      "loss": 3.0382,
      "step": 189843
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.6798343658447266,
      "learning_rate": 4.471899775837481e-05,
      "loss": 2.8648,
      "step": 189844
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.6964733600616455,
      "learning_rate": 4.471684913950805e-05,
      "loss": 3.0147,
      "step": 189845
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8918182849884033,
      "learning_rate": 4.4714700568103155e-05,
      "loss": 3.1819,
      "step": 189846
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.8393611907958984,
      "learning_rate": 4.471255204416073e-05,
      "loss": 2.8709,
      "step": 189847
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.723642110824585,
      "learning_rate": 4.471040356768097e-05,
      "loss": 3.1199,
      "step": 189848
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.702134847640991,
      "learning_rate": 4.470825513866442e-05,
      "loss": 3.084,
      "step": 189849
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.695810556411743,
      "learning_rate": 4.470610675711136e-05,
      "loss": 3.1703,
      "step": 189850
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0364229679107666,
      "learning_rate": 4.47039584230224e-05,
      "loss": 2.9503,
      "step": 189851
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6711361408233643,
      "learning_rate": 4.4701810136397675e-05,
      "loss": 2.8145,
      "step": 189852
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.38866925239563,
      "learning_rate": 4.469966189723775e-05,
      "loss": 2.9467,
      "step": 189853
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.521095037460327,
      "learning_rate": 4.469751370554289e-05,
      "loss": 2.9466,
      "step": 189854
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.651121139526367,
      "learning_rate": 4.469536556131369e-05,
      "loss": 3.0393,
      "step": 189855
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.728214979171753,
      "learning_rate": 4.4693217464550355e-05,
      "loss": 2.9656,
      "step": 189856
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8810017108917236,
      "learning_rate": 4.469106941525349e-05,
      "loss": 2.9948,
      "step": 189857
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.946619987487793,
      "learning_rate": 4.468892141342325e-05,
      "loss": 3.0005,
      "step": 189858
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1744911670684814,
      "learning_rate": 4.4686773459060246e-05,
      "loss": 2.9709,
      "step": 189859
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.767493963241577,
      "learning_rate": 4.4684625552164696e-05,
      "loss": 2.9071,
      "step": 189860
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.154200315475464,
      "learning_rate": 4.468247769273718e-05,
      "loss": 2.7311,
      "step": 189861
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.793457508087158,
      "learning_rate": 4.468032988077793e-05,
      "loss": 2.8773,
      "step": 189862
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1570446491241455,
      "learning_rate": 4.4678182116287574e-05,
      "loss": 2.9095,
      "step": 189863
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.577545642852783,
      "learning_rate": 4.4676034399266215e-05,
      "loss": 3.1149,
      "step": 189864
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7509586811065674,
      "learning_rate": 4.467388672971448e-05,
      "loss": 2.8857,
      "step": 189865
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.5806853771209717,
      "learning_rate": 4.467173910763261e-05,
      "loss": 2.9172,
      "step": 189866
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4928183555603027,
      "learning_rate": 4.466959153302113e-05,
      "loss": 3.0083,
      "step": 189867
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.904355764389038,
      "learning_rate": 4.466744400588038e-05,
      "loss": 3.1032,
      "step": 189868
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6661691665649414,
      "learning_rate": 4.466529652621086e-05,
      "loss": 3.1992,
      "step": 189869
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.155158519744873,
      "learning_rate": 4.466314909401276e-05,
      "loss": 2.9308,
      "step": 189870
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8039910793304443,
      "learning_rate": 4.4661001709286695e-05,
      "loss": 2.9146,
      "step": 189871
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9339969158172607,
      "learning_rate": 4.4658854372032884e-05,
      "loss": 2.806,
      "step": 189872
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1512060165405273,
      "learning_rate": 4.465670708225186e-05,
      "loss": 2.9582,
      "step": 189873
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.799004554748535,
      "learning_rate": 4.4654559839943904e-05,
      "loss": 3.1769,
      "step": 189874
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.726186513900757,
      "learning_rate": 4.465241264510967e-05,
      "loss": 2.936,
      "step": 189875
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.876962900161743,
      "learning_rate": 4.4650265497749194e-05,
      "loss": 3.0101,
      "step": 189876
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.498542070388794,
      "learning_rate": 4.464811839786314e-05,
      "loss": 3.0663,
      "step": 189877
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.949997901916504,
      "learning_rate": 4.464597134545175e-05,
      "loss": 2.8404,
      "step": 189878
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1855740547180176,
      "learning_rate": 4.4643824340515546e-05,
      "loss": 2.9209,
      "step": 189879
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.817491292953491,
      "learning_rate": 4.4641677383054794e-05,
      "loss": 3.1482,
      "step": 189880
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.079380989074707,
      "learning_rate": 4.463953047307014e-05,
      "loss": 2.8995,
      "step": 189881
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9708662033081055,
      "learning_rate": 4.4637383610561705e-05,
      "loss": 2.8453,
      "step": 189882
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.311964273452759,
      "learning_rate": 4.4635236795530025e-05,
      "loss": 3.062,
      "step": 189883
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8230671882629395,
      "learning_rate": 4.4633090027975435e-05,
      "loss": 3.229,
      "step": 189884
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.665436267852783,
      "learning_rate": 4.4630943307898405e-05,
      "loss": 2.7931,
      "step": 189885
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4411849975585938,
      "learning_rate": 4.462879663529926e-05,
      "loss": 2.9933,
      "step": 189886
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.443941116333008,
      "learning_rate": 4.46266500101786e-05,
      "loss": 3.2402,
      "step": 189887
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8597021102905273,
      "learning_rate": 4.4624503432536495e-05,
      "loss": 3.0071,
      "step": 189888
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7187445163726807,
      "learning_rate": 4.4622356902373616e-05,
      "loss": 3.0009,
      "step": 189889
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.019721031188965,
      "learning_rate": 4.462021041969015e-05,
      "loss": 2.8747,
      "step": 189890
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7263879776000977,
      "learning_rate": 4.461806398448671e-05,
      "loss": 2.8543,
      "step": 189891
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.414109945297241,
      "learning_rate": 4.461591759676352e-05,
      "loss": 3.2449,
      "step": 189892
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.502197265625,
      "learning_rate": 4.461377125652118e-05,
      "loss": 2.881,
      "step": 189893
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5904314517974854,
      "learning_rate": 4.461162496375983e-05,
      "loss": 2.7614,
      "step": 189894
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8523027896881104,
      "learning_rate": 4.4609478718480065e-05,
      "loss": 3.112,
      "step": 189895
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.45806884765625,
      "learning_rate": 4.460733252068215e-05,
      "loss": 2.9759,
      "step": 189896
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.03177809715271,
      "learning_rate": 4.460518637036662e-05,
      "loss": 3.0055,
      "step": 189897
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9694161415100098,
      "learning_rate": 4.460304026753374e-05,
      "loss": 2.7775,
      "step": 189898
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.5666167736053467,
      "learning_rate": 4.460089421218411e-05,
      "loss": 2.7474,
      "step": 189899
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4075582027435303,
      "learning_rate": 4.459874820431787e-05,
      "loss": 2.9169,
      "step": 189900
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9294538497924805,
      "learning_rate": 4.459660224393557e-05,
      "loss": 2.8162,
      "step": 189901
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6200814247131348,
      "learning_rate": 4.4594456331037567e-05,
      "loss": 2.994,
      "step": 189902
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5879287719726562,
      "learning_rate": 4.4592310465624335e-05,
      "loss": 3.0337,
      "step": 189903
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9294819831848145,
      "learning_rate": 4.4590164647696124e-05,
      "loss": 3.0161,
      "step": 189904
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.9640839099884033,
      "learning_rate": 4.4588018877253494e-05,
      "loss": 3.0421,
      "step": 189905
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4394686222076416,
      "learning_rate": 4.458587315429678e-05,
      "loss": 2.7782,
      "step": 189906
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0569989681243896,
      "learning_rate": 4.458372747882638e-05,
      "loss": 3.2701,
      "step": 189907
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.666855812072754,
      "learning_rate": 4.45815818508426e-05,
      "loss": 3.0756,
      "step": 189908
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6884758472442627,
      "learning_rate": 4.457943627034602e-05,
      "loss": 2.9312,
      "step": 189909
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.661026954650879,
      "learning_rate": 4.457729073733684e-05,
      "loss": 2.6371,
      "step": 189910
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.501460552215576,
      "learning_rate": 4.457514525181566e-05,
      "loss": 3.1532,
      "step": 189911
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3501598834991455,
      "learning_rate": 4.457299981378276e-05,
      "loss": 2.893,
      "step": 189912
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0789411067962646,
      "learning_rate": 4.457085442323858e-05,
      "loss": 2.9138,
      "step": 189913
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7814319133758545,
      "learning_rate": 4.456870908018344e-05,
      "loss": 3.0369,
      "step": 189914
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.647880792617798,
      "learning_rate": 4.456656378461785e-05,
      "loss": 2.8784,
      "step": 189915
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2337982654571533,
      "learning_rate": 4.45644185365421e-05,
      "loss": 2.9801,
      "step": 189916
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.261657238006592,
      "learning_rate": 4.45622733359567e-05,
      "loss": 2.8995,
      "step": 189917
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8993756771087646,
      "learning_rate": 4.456012818286201e-05,
      "loss": 2.6571,
      "step": 189918
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.713833808898926,
      "learning_rate": 4.455798307725833e-05,
      "loss": 3.1045,
      "step": 189919
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.805048704147339,
      "learning_rate": 4.4555838019146226e-05,
      "loss": 2.7407,
      "step": 189920
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.912285804748535,
      "learning_rate": 4.4553693008526004e-05,
      "loss": 3.1273,
      "step": 189921
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9035749435424805,
      "learning_rate": 4.455154804539799e-05,
      "loss": 2.7905,
      "step": 189922
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1279196739196777,
      "learning_rate": 4.454940312976275e-05,
      "loss": 2.9084,
      "step": 189923
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2778663635253906,
      "learning_rate": 4.4547258261620625e-05,
      "loss": 3.0472,
      "step": 189924
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.019866466522217,
      "learning_rate": 4.454511344097187e-05,
      "loss": 2.9089,
      "step": 189925
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.886888027191162,
      "learning_rate": 4.454296866781709e-05,
      "loss": 2.8636,
      "step": 189926
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9293086528778076,
      "learning_rate": 4.4540823942156524e-05,
      "loss": 2.932,
      "step": 189927
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.958981513977051,
      "learning_rate": 4.453867926399073e-05,
      "loss": 2.9778,
      "step": 189928
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1585240364074707,
      "learning_rate": 4.453653463332001e-05,
      "loss": 3.0462,
      "step": 189929
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6943564414978027,
      "learning_rate": 4.453439005014473e-05,
      "loss": 2.7801,
      "step": 189930
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3203887939453125,
      "learning_rate": 4.453224551446529e-05,
      "loss": 2.9521,
      "step": 189931
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.697979211807251,
      "learning_rate": 4.4530101026282225e-05,
      "loss": 2.7761,
      "step": 189932
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.896749973297119,
      "learning_rate": 4.452795658559574e-05,
      "loss": 2.9998,
      "step": 189933
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1904923915863037,
      "learning_rate": 4.4525812192406386e-05,
      "loss": 2.9042,
      "step": 189934
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.597078323364258,
      "learning_rate": 4.452366784671451e-05,
      "loss": 2.8385,
      "step": 189935
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.992967367172241,
      "learning_rate": 4.452152354852053e-05,
      "loss": 2.8733,
      "step": 189936
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4331326484680176,
      "learning_rate": 4.451937929782473e-05,
      "loss": 3.0604,
      "step": 189937
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.371443033218384,
      "learning_rate": 4.4517235094627665e-05,
      "loss": 3.1755,
      "step": 189938
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9558353424072266,
      "learning_rate": 4.451509093892964e-05,
      "loss": 3.1734,
      "step": 189939
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1645290851593018,
      "learning_rate": 4.4512946830731125e-05,
      "loss": 3.1926,
      "step": 189940
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.811494827270508,
      "learning_rate": 4.451080277003237e-05,
      "loss": 3.0602,
      "step": 189941
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.398125648498535,
      "learning_rate": 4.450865875683406e-05,
      "loss": 2.9599,
      "step": 189942
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.044024705886841,
      "learning_rate": 4.4506514791136284e-05,
      "loss": 2.9906,
      "step": 189943
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2619340419769287,
      "learning_rate": 4.450437087293961e-05,
      "loss": 3.1902,
      "step": 189944
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1983466148376465,
      "learning_rate": 4.450222700224434e-05,
      "loss": 2.8413,
      "step": 189945
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7835285663604736,
      "learning_rate": 4.4500083179051006e-05,
      "loss": 3.2258,
      "step": 189946
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.596644401550293,
      "learning_rate": 4.449793940335984e-05,
      "loss": 2.7572,
      "step": 189947
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6823973655700684,
      "learning_rate": 4.4495795675171475e-05,
      "loss": 2.9981,
      "step": 189948
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7495884895324707,
      "learning_rate": 4.4493651994486045e-05,
      "loss": 2.9845,
      "step": 189949
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0102100372314453,
      "learning_rate": 4.449150836130412e-05,
      "loss": 2.7584,
      "step": 189950
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8274385929107666,
      "learning_rate": 4.448936477562599e-05,
      "loss": 3.0863,
      "step": 189951
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.864347219467163,
      "learning_rate": 4.448722123745216e-05,
      "loss": 2.8966,
      "step": 189952
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1916661262512207,
      "learning_rate": 4.4485077746782934e-05,
      "loss": 2.5635,
      "step": 189953
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6182315349578857,
      "learning_rate": 4.448293430361888e-05,
      "loss": 2.9834,
      "step": 189954
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.979095458984375,
      "learning_rate": 4.448079090796012e-05,
      "loss": 3.0447,
      "step": 189955
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9114716053009033,
      "learning_rate": 4.447864755980729e-05,
      "loss": 3.1799,
      "step": 189956
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.377971649169922,
      "learning_rate": 4.447650425916063e-05,
      "loss": 2.9624,
      "step": 189957
    },
    {
      "epoch": 2.47,
      "grad_norm": 6.828990459442139,
      "learning_rate": 4.4474361006020665e-05,
      "loss": 2.9264,
      "step": 189958
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.112316131591797,
      "learning_rate": 4.44722178003877e-05,
      "loss": 2.9341,
      "step": 189959
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4913244247436523,
      "learning_rate": 4.44700746422623e-05,
      "loss": 2.6406,
      "step": 189960
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8191184997558594,
      "learning_rate": 4.446793153164459e-05,
      "loss": 2.5462,
      "step": 189961
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9764902591705322,
      "learning_rate": 4.446578846853519e-05,
      "loss": 3.0916,
      "step": 189962
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.795842170715332,
      "learning_rate": 4.446364545293435e-05,
      "loss": 2.7149,
      "step": 189963
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.757765054702759,
      "learning_rate": 4.4461502484842635e-05,
      "loss": 2.9899,
      "step": 189964
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.3171119689941406,
      "learning_rate": 4.445935956426025e-05,
      "loss": 2.7567,
      "step": 189965
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.7215912342071533,
      "learning_rate": 4.445721669118784e-05,
      "loss": 2.7614,
      "step": 189966
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3777403831481934,
      "learning_rate": 4.445507386562551e-05,
      "loss": 2.7696,
      "step": 189967
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.139256000518799,
      "learning_rate": 4.445293108757392e-05,
      "loss": 2.7615,
      "step": 189968
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.119793653488159,
      "learning_rate": 4.445078835703325e-05,
      "loss": 3.0225,
      "step": 189969
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.066701650619507,
      "learning_rate": 4.444864567400405e-05,
      "loss": 2.9004,
      "step": 189970
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.8317112922668457,
      "learning_rate": 4.444650303848663e-05,
      "loss": 3.055,
      "step": 189971
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3013534545898438,
      "learning_rate": 4.4444360450481484e-05,
      "loss": 2.8382,
      "step": 189972
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.5000100135803223,
      "learning_rate": 4.444221790998896e-05,
      "loss": 2.8808,
      "step": 189973
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9988107681274414,
      "learning_rate": 4.444007541700947e-05,
      "loss": 3.1098,
      "step": 189974
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1756479740142822,
      "learning_rate": 4.443793297154333e-05,
      "loss": 2.9613,
      "step": 189975
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4832711219787598,
      "learning_rate": 4.4435790573591025e-05,
      "loss": 2.9696,
      "step": 189976
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5958549976348877,
      "learning_rate": 4.443364822315291e-05,
      "loss": 2.7567,
      "step": 189977
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1360020637512207,
      "learning_rate": 4.443150592022945e-05,
      "loss": 2.8307,
      "step": 189978
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4518373012542725,
      "learning_rate": 4.4429363664820986e-05,
      "loss": 2.8929,
      "step": 189979
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.18715500831604,
      "learning_rate": 4.4427221456927944e-05,
      "loss": 2.9924,
      "step": 189980
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.967050075531006,
      "learning_rate": 4.4425079296550624e-05,
      "loss": 2.9801,
      "step": 189981
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5000762939453125,
      "learning_rate": 4.44229371836896e-05,
      "loss": 3.0636,
      "step": 189982
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.833380937576294,
      "learning_rate": 4.44207951183451e-05,
      "loss": 2.7471,
      "step": 189983
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.301023483276367,
      "learning_rate": 4.441865310051765e-05,
      "loss": 2.9498,
      "step": 189984
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2907872200012207,
      "learning_rate": 4.44165111302076e-05,
      "loss": 3.0853,
      "step": 189985
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.206531524658203,
      "learning_rate": 4.4414369207415365e-05,
      "loss": 2.9858,
      "step": 189986
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2350103855133057,
      "learning_rate": 4.4412227332141224e-05,
      "loss": 3.0912,
      "step": 189987
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4162685871124268,
      "learning_rate": 4.4410085504385775e-05,
      "loss": 2.9913,
      "step": 189988
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.582638740539551,
      "learning_rate": 4.440794372414924e-05,
      "loss": 2.8556,
      "step": 189989
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.52687668800354,
      "learning_rate": 4.440580199143217e-05,
      "loss": 2.9751,
      "step": 189990
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.959486484527588,
      "learning_rate": 4.440366030623484e-05,
      "loss": 2.815,
      "step": 189991
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8701000213623047,
      "learning_rate": 4.4401518668557746e-05,
      "loss": 2.9106,
      "step": 189992
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9436886310577393,
      "learning_rate": 4.439937707840113e-05,
      "loss": 2.8217,
      "step": 189993
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.420292615890503,
      "learning_rate": 4.4397235535765576e-05,
      "loss": 2.816,
      "step": 189994
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.0411505699157715,
      "learning_rate": 4.4395094040651335e-05,
      "loss": 2.9888,
      "step": 189995
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7799203395843506,
      "learning_rate": 4.439295259305895e-05,
      "loss": 2.9644,
      "step": 189996
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.373460292816162,
      "learning_rate": 4.439081119298872e-05,
      "loss": 3.0667,
      "step": 189997
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8550496101379395,
      "learning_rate": 4.438866984044107e-05,
      "loss": 2.8959,
      "step": 189998
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2443971633911133,
      "learning_rate": 4.4386528535416346e-05,
      "loss": 2.9958,
      "step": 189999
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0455234050750732,
      "learning_rate": 4.438438727791503e-05,
      "loss": 2.9286,
      "step": 190000
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.306766986846924,
      "learning_rate": 4.438224606793741e-05,
      "loss": 2.9195,
      "step": 190001
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8939499855041504,
      "learning_rate": 4.438010490548404e-05,
      "loss": 2.8619,
      "step": 190002
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6418895721435547,
      "learning_rate": 4.4377963790555216e-05,
      "loss": 2.7401,
      "step": 190003
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.599747657775879,
      "learning_rate": 4.437582272315131e-05,
      "loss": 2.9007,
      "step": 190004
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7897965908050537,
      "learning_rate": 4.437368170327282e-05,
      "loss": 2.7575,
      "step": 190005
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.910276412963867,
      "learning_rate": 4.437154073092009e-05,
      "loss": 3.0756,
      "step": 190006
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.57922625541687,
      "learning_rate": 4.4369399806093466e-05,
      "loss": 2.8823,
      "step": 190007
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6439571380615234,
      "learning_rate": 4.436725892879342e-05,
      "loss": 2.9092,
      "step": 190008
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1692936420440674,
      "learning_rate": 4.436511809902037e-05,
      "loss": 3.0594,
      "step": 190009
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.1585452556610107,
      "learning_rate": 4.436297731677456e-05,
      "loss": 2.7608,
      "step": 190010
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.820005416870117,
      "learning_rate": 4.43608365820566e-05,
      "loss": 2.989,
      "step": 190011
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.542719602584839,
      "learning_rate": 4.435869589486669e-05,
      "loss": 2.9922,
      "step": 190012
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9994983673095703,
      "learning_rate": 4.435655525520543e-05,
      "loss": 2.8293,
      "step": 190013
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7560923099517822,
      "learning_rate": 4.435441466307308e-05,
      "loss": 3.0031,
      "step": 190014
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9139420986175537,
      "learning_rate": 4.4352274118470075e-05,
      "loss": 2.8903,
      "step": 190015
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0192761421203613,
      "learning_rate": 4.435013362139676e-05,
      "loss": 2.7831,
      "step": 190016
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7770187854766846,
      "learning_rate": 4.4347993171853644e-05,
      "loss": 2.6036,
      "step": 190017
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.3620846271514893,
      "learning_rate": 4.4345852769840985e-05,
      "loss": 2.9078,
      "step": 190018
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7077035903930664,
      "learning_rate": 4.43437124153593e-05,
      "loss": 3.0603,
      "step": 190019
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.4541304111480713,
      "learning_rate": 4.4341572108408995e-05,
      "loss": 2.9179,
      "step": 190020
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0386483669281006,
      "learning_rate": 4.433943184899037e-05,
      "loss": 2.9203,
      "step": 190021
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.065190076828003,
      "learning_rate": 4.433729163710382e-05,
      "loss": 2.7886,
      "step": 190022
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.671440839767456,
      "learning_rate": 4.4335151472749886e-05,
      "loss": 2.9939,
      "step": 190023
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.253373146057129,
      "learning_rate": 4.4333011355928795e-05,
      "loss": 2.8695,
      "step": 190024
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6806955337524414,
      "learning_rate": 4.433087128664108e-05,
      "loss": 3.057,
      "step": 190025
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2036972045898438,
      "learning_rate": 4.432873126488701e-05,
      "loss": 3.1518,
      "step": 190026
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6973183155059814,
      "learning_rate": 4.4326591290667215e-05,
      "loss": 2.7153,
      "step": 190027
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.425981044769287,
      "learning_rate": 4.432445136398176e-05,
      "loss": 3.0706,
      "step": 190028
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.611984968185425,
      "learning_rate": 4.4322311484831316e-05,
      "loss": 2.9242,
      "step": 190029
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.768287420272827,
      "learning_rate": 4.4320171653216085e-05,
      "loss": 2.9728,
      "step": 190030
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7146825790405273,
      "learning_rate": 4.431803186913666e-05,
      "loss": 2.8152,
      "step": 190031
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.485229730606079,
      "learning_rate": 4.431589213259324e-05,
      "loss": 3.2327,
      "step": 190032
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.563788414001465,
      "learning_rate": 4.431375244358647e-05,
      "loss": 2.8278,
      "step": 190033
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6534359455108643,
      "learning_rate": 4.431161280211647e-05,
      "loss": 2.952,
      "step": 190034
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.505847215652466,
      "learning_rate": 4.4309473208183845e-05,
      "loss": 2.9621,
      "step": 190035
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8340351581573486,
      "learning_rate": 4.4307333661788826e-05,
      "loss": 2.7398,
      "step": 190036
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6240944862365723,
      "learning_rate": 4.430519416293198e-05,
      "loss": 2.9654,
      "step": 190037
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.262000560760498,
      "learning_rate": 4.430305471161354e-05,
      "loss": 3.1594,
      "step": 190038
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8862557411193848,
      "learning_rate": 4.4300915307834076e-05,
      "loss": 2.8183,
      "step": 190039
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8955767154693604,
      "learning_rate": 4.4298775951593915e-05,
      "loss": 2.9696,
      "step": 190040
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8185694217681885,
      "learning_rate": 4.429663664289339e-05,
      "loss": 2.7321,
      "step": 190041
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.8471455574035645,
      "learning_rate": 4.429449738173291e-05,
      "loss": 2.7486,
      "step": 190042
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9577083587646484,
      "learning_rate": 4.4292358168112995e-05,
      "loss": 3.0754,
      "step": 190043
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.806703567504883,
      "learning_rate": 4.429021900203386e-05,
      "loss": 2.9308,
      "step": 190044
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.132741928100586,
      "learning_rate": 4.428807988349605e-05,
      "loss": 2.5743,
      "step": 190045
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.842517852783203,
      "learning_rate": 4.4285940812499954e-05,
      "loss": 2.8895,
      "step": 190046
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.910980224609375,
      "learning_rate": 4.4283801789045926e-05,
      "loss": 2.7267,
      "step": 190047
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.0121541023254395,
      "learning_rate": 4.4281662813134264e-05,
      "loss": 2.7734,
      "step": 190048
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.223434925079346,
      "learning_rate": 4.427952388476558e-05,
      "loss": 2.901,
      "step": 190049
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.717785120010376,
      "learning_rate": 4.4277385003940055e-05,
      "loss": 2.9006,
      "step": 190050
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.894435882568359,
      "learning_rate": 4.427524617065827e-05,
      "loss": 2.8523,
      "step": 190051
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.226287364959717,
      "learning_rate": 4.427310738492056e-05,
      "loss": 2.8387,
      "step": 190052
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1178436279296875,
      "learning_rate": 4.4270968646727276e-05,
      "loss": 2.9239,
      "step": 190053
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.144519805908203,
      "learning_rate": 4.42688299560788e-05,
      "loss": 3.0044,
      "step": 190054
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6067728996276855,
      "learning_rate": 4.4266691312975655e-05,
      "loss": 2.8666,
      "step": 190055
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3479678630828857,
      "learning_rate": 4.426455271741808e-05,
      "loss": 2.9662,
      "step": 190056
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2055115699768066,
      "learning_rate": 4.42624141694066e-05,
      "loss": 2.8201,
      "step": 190057
    },
    {
      "epoch": 2.47,
      "grad_norm": 5.1905388832092285,
      "learning_rate": 4.426027566894159e-05,
      "loss": 2.9393,
      "step": 190058
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.887282133102417,
      "learning_rate": 4.425813721602342e-05,
      "loss": 3.1824,
      "step": 190059
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.902165174484253,
      "learning_rate": 4.425599881065241e-05,
      "loss": 2.7214,
      "step": 190060
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6241583824157715,
      "learning_rate": 4.425386045282912e-05,
      "loss": 2.9501,
      "step": 190061
    },
    {
      "epoch": 2.47,
      "grad_norm": 4.064859867095947,
      "learning_rate": 4.4251722142553784e-05,
      "loss": 2.9995,
      "step": 190062
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.4581339359283447,
      "learning_rate": 4.424958387982693e-05,
      "loss": 2.8964,
      "step": 190063
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.0923898220062256,
      "learning_rate": 4.424744566464895e-05,
      "loss": 3.0214,
      "step": 190064
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.6890835762023926,
      "learning_rate": 4.4245307497020164e-05,
      "loss": 2.8723,
      "step": 190065
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3985490798950195,
      "learning_rate": 4.424316937694094e-05,
      "loss": 2.7947,
      "step": 190066
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5340969562530518,
      "learning_rate": 4.424103130441182e-05,
      "loss": 3.0924,
      "step": 190067
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.616410255432129,
      "learning_rate": 4.423889327943306e-05,
      "loss": 2.9725,
      "step": 190068
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.7987794876098633,
      "learning_rate": 4.4236755302005166e-05,
      "loss": 2.7827,
      "step": 190069
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5386459827423096,
      "learning_rate": 4.42346173721285e-05,
      "loss": 2.7226,
      "step": 190070
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.992163896560669,
      "learning_rate": 4.423247948980343e-05,
      "loss": 2.9764,
      "step": 190071
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.1884846687316895,
      "learning_rate": 4.423034165503032e-05,
      "loss": 2.841,
      "step": 190072
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.392054319381714,
      "learning_rate": 4.4228203867809675e-05,
      "loss": 3.1624,
      "step": 190073
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.2393546104431152,
      "learning_rate": 4.422606612814176e-05,
      "loss": 3.0425,
      "step": 190074
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.5395915508270264,
      "learning_rate": 4.4223928436027135e-05,
      "loss": 2.9076,
      "step": 190075
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3198275566101074,
      "learning_rate": 4.422179079146607e-05,
      "loss": 2.6685,
      "step": 190076
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.9404826164245605,
      "learning_rate": 4.4219653194459037e-05,
      "loss": 2.9916,
      "step": 190077
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3627874851226807,
      "learning_rate": 4.421751564500633e-05,
      "loss": 2.829,
      "step": 190078
    },
    {
      "epoch": 2.47,
      "grad_norm": 3.3452370166778564,
      "learning_rate": 4.421537814310848e-05,
      "loss": 2.8787,
      "step": 190079
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.533965826034546,
      "learning_rate": 4.4213240688765764e-05,
      "loss": 2.8357,
      "step": 190080
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.589317560195923,
      "learning_rate": 4.4211103281978667e-05,
      "loss": 2.7701,
      "step": 190081
    },
    {
      "epoch": 2.47,
      "grad_norm": 2.5696146488189697,
      "learning_rate": 4.420896592274756e-05,
      "loss": 3.2139,
      "step": 190082
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.886516809463501,
      "learning_rate": 4.420682861107289e-05,
      "loss": 3.1707,
      "step": 190083
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.683394193649292,
      "learning_rate": 4.420469134695487e-05,
      "loss": 2.974,
      "step": 190084
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5789570808410645,
      "learning_rate": 4.4202554130394105e-05,
      "loss": 2.8623,
      "step": 190085
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6977832317352295,
      "learning_rate": 4.4200416961390874e-05,
      "loss": 3.0041,
      "step": 190086
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.930586814880371,
      "learning_rate": 4.419827983994566e-05,
      "loss": 2.7067,
      "step": 190087
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2495064735412598,
      "learning_rate": 4.41961427660588e-05,
      "loss": 3.1127,
      "step": 190088
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.656611919403076,
      "learning_rate": 4.4194005739730676e-05,
      "loss": 2.9608,
      "step": 190089
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.6616456508636475,
      "learning_rate": 4.419186876096177e-05,
      "loss": 2.6968,
      "step": 190090
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.689516305923462,
      "learning_rate": 4.418973182975242e-05,
      "loss": 2.911,
      "step": 190091
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.029465913772583,
      "learning_rate": 4.418759494610296e-05,
      "loss": 2.9691,
      "step": 190092
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.016660690307617,
      "learning_rate": 4.418545811001392e-05,
      "loss": 2.7282,
      "step": 190093
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.5923376083374023,
      "learning_rate": 4.4183321321485634e-05,
      "loss": 2.9505,
      "step": 190094
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.757319450378418,
      "learning_rate": 4.41811845805184e-05,
      "loss": 3.1006,
      "step": 190095
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.104844570159912,
      "learning_rate": 4.4179047887112825e-05,
      "loss": 2.7542,
      "step": 190096
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.583791971206665,
      "learning_rate": 4.4176911241269174e-05,
      "loss": 2.8716,
      "step": 190097
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.4252264499664307,
      "learning_rate": 4.417477464298781e-05,
      "loss": 2.9132,
      "step": 190098
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.6607730388641357,
      "learning_rate": 4.417263809226923e-05,
      "loss": 2.9943,
      "step": 190099
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7848496437072754,
      "learning_rate": 4.41705015891138e-05,
      "loss": 2.8897,
      "step": 190100
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6256630420684814,
      "learning_rate": 4.4168365133521835e-05,
      "loss": 2.8321,
      "step": 190101
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.7819173336029053,
      "learning_rate": 4.4166228725493844e-05,
      "loss": 2.9929,
      "step": 190102
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9256069660186768,
      "learning_rate": 4.416409236503015e-05,
      "loss": 2.8909,
      "step": 190103
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0785582065582275,
      "learning_rate": 4.4161956052131234e-05,
      "loss": 2.7513,
      "step": 190104
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.424396276473999,
      "learning_rate": 4.4159819786797435e-05,
      "loss": 2.9322,
      "step": 190105
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.226316452026367,
      "learning_rate": 4.415768356902909e-05,
      "loss": 3.0279,
      "step": 190106
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7823281288146973,
      "learning_rate": 4.4155547398826727e-05,
      "loss": 2.9246,
      "step": 190107
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.938310146331787,
      "learning_rate": 4.4153411276190685e-05,
      "loss": 2.6893,
      "step": 190108
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6292295455932617,
      "learning_rate": 4.4151275201121296e-05,
      "loss": 2.8541,
      "step": 190109
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.810603380203247,
      "learning_rate": 4.4149139173619054e-05,
      "loss": 2.9212,
      "step": 190110
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.754807949066162,
      "learning_rate": 4.414700319368426e-05,
      "loss": 3.0099,
      "step": 190111
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7962534427642822,
      "learning_rate": 4.414486726131742e-05,
      "loss": 2.8218,
      "step": 190112
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.986624240875244,
      "learning_rate": 4.414273137651892e-05,
      "loss": 2.6265,
      "step": 190113
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7045998573303223,
      "learning_rate": 4.414059553928908e-05,
      "loss": 2.7585,
      "step": 190114
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5876214504241943,
      "learning_rate": 4.4138459749628284e-05,
      "loss": 2.9742,
      "step": 190115
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1602983474731445,
      "learning_rate": 4.413632400753704e-05,
      "loss": 2.7803,
      "step": 190116
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6729607582092285,
      "learning_rate": 4.413418831301561e-05,
      "loss": 2.9334,
      "step": 190117
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2707126140594482,
      "learning_rate": 4.413205266606452e-05,
      "loss": 2.9405,
      "step": 190118
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.4030489921569824,
      "learning_rate": 4.412991706668415e-05,
      "loss": 3.0396,
      "step": 190119
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9201507568359375,
      "learning_rate": 4.412778151487483e-05,
      "loss": 2.7897,
      "step": 190120
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8740334510803223,
      "learning_rate": 4.412564601063692e-05,
      "loss": 3.065,
      "step": 190121
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1080307960510254,
      "learning_rate": 4.412351055397095e-05,
      "loss": 2.8691,
      "step": 190122
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.321688175201416,
      "learning_rate": 4.412137514487717e-05,
      "loss": 3.0373,
      "step": 190123
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.935788869857788,
      "learning_rate": 4.4119239783356164e-05,
      "loss": 3.0536,
      "step": 190124
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.885408401489258,
      "learning_rate": 4.4117104469408206e-05,
      "loss": 2.7934,
      "step": 190125
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3027143478393555,
      "learning_rate": 4.411496920303369e-05,
      "loss": 2.9766,
      "step": 190126
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.635348320007324,
      "learning_rate": 4.411283398423295e-05,
      "loss": 2.8456,
      "step": 190127
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.31727933883667,
      "learning_rate": 4.411069881300656e-05,
      "loss": 2.7666,
      "step": 190128
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6784591674804688,
      "learning_rate": 4.410856368935475e-05,
      "loss": 2.8953,
      "step": 190129
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3280112743377686,
      "learning_rate": 4.410642861327804e-05,
      "loss": 2.9377,
      "step": 190130
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7036359310150146,
      "learning_rate": 4.410429358477682e-05,
      "loss": 3.0328,
      "step": 190131
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.519819498062134,
      "learning_rate": 4.41021586038514e-05,
      "loss": 2.8052,
      "step": 190132
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.571089029312134,
      "learning_rate": 4.410002367050216e-05,
      "loss": 2.7987,
      "step": 190133
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7887775897979736,
      "learning_rate": 4.409788878472963e-05,
      "loss": 2.9263,
      "step": 190134
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1452553272247314,
      "learning_rate": 4.4095753946534074e-05,
      "loss": 2.8949,
      "step": 190135
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0243380069732666,
      "learning_rate": 4.4093619155916e-05,
      "loss": 2.7961,
      "step": 190136
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.755577564239502,
      "learning_rate": 4.4091484412875754e-05,
      "loss": 2.9319,
      "step": 190137
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.763064384460449,
      "learning_rate": 4.4089349717413724e-05,
      "loss": 3.0549,
      "step": 190138
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2642226219177246,
      "learning_rate": 4.4087215069530266e-05,
      "loss": 2.9882,
      "step": 190139
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.523040771484375,
      "learning_rate": 4.408508046922589e-05,
      "loss": 2.9551,
      "step": 190140
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.4813106060028076,
      "learning_rate": 4.4082945916500844e-05,
      "loss": 2.9249,
      "step": 190141
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8727166652679443,
      "learning_rate": 4.408081141135571e-05,
      "loss": 2.8487,
      "step": 190142
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7065885066986084,
      "learning_rate": 4.407867695379075e-05,
      "loss": 2.9356,
      "step": 190143
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.452411413192749,
      "learning_rate": 4.407654254380639e-05,
      "loss": 2.951,
      "step": 190144
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.5716371536254883,
      "learning_rate": 4.407440818140301e-05,
      "loss": 2.4963,
      "step": 190145
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.568784236907959,
      "learning_rate": 4.4072273866581066e-05,
      "loss": 2.9839,
      "step": 190146
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.841493844985962,
      "learning_rate": 4.4070139599340825e-05,
      "loss": 3.0049,
      "step": 190147
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7349016666412354,
      "learning_rate": 4.406800537968289e-05,
      "loss": 2.9974,
      "step": 190148
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.408881664276123,
      "learning_rate": 4.4065871207607526e-05,
      "loss": 3.1439,
      "step": 190149
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1934573650360107,
      "learning_rate": 4.406373708311514e-05,
      "loss": 2.9014,
      "step": 190150
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.895646095275879,
      "learning_rate": 4.406160300620608e-05,
      "loss": 2.932,
      "step": 190151
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.4873106479644775,
      "learning_rate": 4.405946897688086e-05,
      "loss": 2.835,
      "step": 190152
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5900955200195312,
      "learning_rate": 4.405733499513975e-05,
      "loss": 2.7662,
      "step": 190153
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1602463722229004,
      "learning_rate": 4.405520106098327e-05,
      "loss": 3.0351,
      "step": 190154
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.341237783432007,
      "learning_rate": 4.405306717441176e-05,
      "loss": 2.7933,
      "step": 190155
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.867926836013794,
      "learning_rate": 4.405093333542562e-05,
      "loss": 2.9613,
      "step": 190156
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.5513453483581543,
      "learning_rate": 4.404879954402518e-05,
      "loss": 2.9834,
      "step": 190157
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9388203620910645,
      "learning_rate": 4.4046665800210976e-05,
      "loss": 2.9656,
      "step": 190158
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.370727062225342,
      "learning_rate": 4.4044532103983244e-05,
      "loss": 2.8437,
      "step": 190159
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7283687591552734,
      "learning_rate": 4.404239845534254e-05,
      "loss": 3.1411,
      "step": 190160
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3449952602386475,
      "learning_rate": 4.404026485428921e-05,
      "loss": 3.0828,
      "step": 190161
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9303407669067383,
      "learning_rate": 4.403813130082361e-05,
      "loss": 2.8425,
      "step": 190162
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.96506667137146,
      "learning_rate": 4.4035997794946074e-05,
      "loss": 2.8909,
      "step": 190163
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2595272064208984,
      "learning_rate": 4.403386433665714e-05,
      "loss": 3.0243,
      "step": 190164
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8995540142059326,
      "learning_rate": 4.4031730925957074e-05,
      "loss": 2.5804,
      "step": 190165
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5108237266540527,
      "learning_rate": 4.402959756284644e-05,
      "loss": 2.8103,
      "step": 190166
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.240003824234009,
      "learning_rate": 4.40274642473255e-05,
      "loss": 2.9353,
      "step": 190167
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.639732599258423,
      "learning_rate": 4.40253309793947e-05,
      "loss": 2.8764,
      "step": 190168
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.426600694656372,
      "learning_rate": 4.402319775905436e-05,
      "loss": 3.0243,
      "step": 190169
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.582026720046997,
      "learning_rate": 4.402106458630499e-05,
      "loss": 3.0882,
      "step": 190170
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.984858512878418,
      "learning_rate": 4.401893146114688e-05,
      "loss": 2.9244,
      "step": 190171
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.534432411193848,
      "learning_rate": 4.401679838358057e-05,
      "loss": 2.8307,
      "step": 190172
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1342575550079346,
      "learning_rate": 4.401466535360628e-05,
      "loss": 2.7684,
      "step": 190173
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.683084487915039,
      "learning_rate": 4.401253237122456e-05,
      "loss": 2.7029,
      "step": 190174
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1732916831970215,
      "learning_rate": 4.401039943643577e-05,
      "loss": 3.1441,
      "step": 190175
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.513155460357666,
      "learning_rate": 4.400826654924024e-05,
      "loss": 2.8404,
      "step": 190176
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.863422393798828,
      "learning_rate": 4.4006133709638344e-05,
      "loss": 2.7808,
      "step": 190177
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.281592845916748,
      "learning_rate": 4.4004000917630644e-05,
      "loss": 3.066,
      "step": 190178
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.4198853969573975,
      "learning_rate": 4.400186817321734e-05,
      "loss": 2.7777,
      "step": 190179
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.5337259769439697,
      "learning_rate": 4.399973547639899e-05,
      "loss": 3.0959,
      "step": 190180
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0681536197662354,
      "learning_rate": 4.3997602827175906e-05,
      "loss": 3.0818,
      "step": 190181
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8615777492523193,
      "learning_rate": 4.399547022554851e-05,
      "loss": 2.9243,
      "step": 190182
    },
    {
      "epoch": 2.48,
      "grad_norm": 5.124288558959961,
      "learning_rate": 4.399333767151715e-05,
      "loss": 2.7916,
      "step": 190183
    },
    {
      "epoch": 2.48,
      "grad_norm": 5.791449069976807,
      "learning_rate": 4.399120516508231e-05,
      "loss": 2.9807,
      "step": 190184
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.4763495922088623,
      "learning_rate": 4.398907270624423e-05,
      "loss": 2.9842,
      "step": 190185
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1830830574035645,
      "learning_rate": 4.398694029500355e-05,
      "loss": 3.0454,
      "step": 190186
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.856903076171875,
      "learning_rate": 4.398480793136049e-05,
      "loss": 2.9401,
      "step": 190187
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5759239196777344,
      "learning_rate": 4.398267561531542e-05,
      "loss": 2.9055,
      "step": 190188
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5350677967071533,
      "learning_rate": 4.398054334686888e-05,
      "loss": 2.9269,
      "step": 190189
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.377139091491699,
      "learning_rate": 4.39784111260212e-05,
      "loss": 2.9704,
      "step": 190190
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.014400005340576,
      "learning_rate": 4.397627895277267e-05,
      "loss": 3.0808,
      "step": 190191
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.884026050567627,
      "learning_rate": 4.397414682712387e-05,
      "loss": 2.7222,
      "step": 190192
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3698439598083496,
      "learning_rate": 4.397201474907509e-05,
      "loss": 2.7784,
      "step": 190193
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9728755950927734,
      "learning_rate": 4.39698827186267e-05,
      "loss": 2.9062,
      "step": 190194
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.298623561859131,
      "learning_rate": 4.396775073577921e-05,
      "loss": 2.9605,
      "step": 190195
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.880269765853882,
      "learning_rate": 4.396561880053293e-05,
      "loss": 2.6861,
      "step": 190196
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6602447032928467,
      "learning_rate": 4.3963486912888216e-05,
      "loss": 2.6746,
      "step": 190197
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.987977981567383,
      "learning_rate": 4.3961355072845585e-05,
      "loss": 3.0345,
      "step": 190198
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.050341844558716,
      "learning_rate": 4.395922328040538e-05,
      "loss": 3.0421,
      "step": 190199
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0594310760498047,
      "learning_rate": 4.395709153556792e-05,
      "loss": 3.0003,
      "step": 190200
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.606343984603882,
      "learning_rate": 4.3954959838333725e-05,
      "loss": 3.1535,
      "step": 190201
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.671278238296509,
      "learning_rate": 4.3952828188703085e-05,
      "loss": 2.9994,
      "step": 190202
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8850860595703125,
      "learning_rate": 4.395069658667653e-05,
      "loss": 2.86,
      "step": 190203
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.83858323097229,
      "learning_rate": 4.394856503225432e-05,
      "loss": 3.0904,
      "step": 190204
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7406632900238037,
      "learning_rate": 4.394643352543694e-05,
      "loss": 3.0514,
      "step": 190205
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2450826168060303,
      "learning_rate": 4.394430206622468e-05,
      "loss": 2.9998,
      "step": 190206
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9395997524261475,
      "learning_rate": 4.39421706546181e-05,
      "loss": 3.1431,
      "step": 190207
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.595116376876831,
      "learning_rate": 4.3940039290617414e-05,
      "loss": 2.7674,
      "step": 190208
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.4637255668640137,
      "learning_rate": 4.3937907974223175e-05,
      "loss": 3.2252,
      "step": 190209
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.427619457244873,
      "learning_rate": 4.393577670543569e-05,
      "loss": 3.3725,
      "step": 190210
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.4902138710021973,
      "learning_rate": 4.393364548425542e-05,
      "loss": 2.9477,
      "step": 190211
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9080636501312256,
      "learning_rate": 4.3931514310682635e-05,
      "loss": 3.0281,
      "step": 190212
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.164794445037842,
      "learning_rate": 4.392938318471787e-05,
      "loss": 3.0484,
      "step": 190213
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0484442710876465,
      "learning_rate": 4.392725210636142e-05,
      "loss": 3.0112,
      "step": 190214
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6016759872436523,
      "learning_rate": 4.392512107561376e-05,
      "loss": 2.8802,
      "step": 190215
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9246883392333984,
      "learning_rate": 4.3922990092475275e-05,
      "loss": 2.9349,
      "step": 190216
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9656436443328857,
      "learning_rate": 4.3920859156946345e-05,
      "loss": 2.8366,
      "step": 190217
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.748208999633789,
      "learning_rate": 4.391872826902729e-05,
      "loss": 3.0507,
      "step": 190218
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7949960231781006,
      "learning_rate": 4.3916597428718656e-05,
      "loss": 2.974,
      "step": 190219
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.919494390487671,
      "learning_rate": 4.391446663602067e-05,
      "loss": 3.0207,
      "step": 190220
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.107773542404175,
      "learning_rate": 4.3912335890933905e-05,
      "loss": 3.0243,
      "step": 190221
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8551270961761475,
      "learning_rate": 4.391020519345868e-05,
      "loss": 3.102,
      "step": 190222
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.106781005859375,
      "learning_rate": 4.3908074543595337e-05,
      "loss": 2.8761,
      "step": 190223
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6234405040740967,
      "learning_rate": 4.390594394134431e-05,
      "loss": 2.7236,
      "step": 190224
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6322546005249023,
      "learning_rate": 4.3903813386706035e-05,
      "loss": 2.8312,
      "step": 190225
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.13382887840271,
      "learning_rate": 4.39016828796808e-05,
      "loss": 2.8958,
      "step": 190226
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.559281826019287,
      "learning_rate": 4.3899552420269143e-05,
      "loss": 2.9243,
      "step": 190227
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.545916795730591,
      "learning_rate": 4.3897422008471405e-05,
      "loss": 2.8944,
      "step": 190228
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0615150928497314,
      "learning_rate": 4.3895291644287976e-05,
      "loss": 2.7563,
      "step": 190229
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7671570777893066,
      "learning_rate": 4.38931613277192e-05,
      "loss": 3.0301,
      "step": 190230
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6247293949127197,
      "learning_rate": 4.3891031058765555e-05,
      "loss": 3.1079,
      "step": 190231
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2340950965881348,
      "learning_rate": 4.3888900837427335e-05,
      "loss": 2.9184,
      "step": 190232
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2227916717529297,
      "learning_rate": 4.3886770663705086e-05,
      "loss": 3.0762,
      "step": 190233
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6863794326782227,
      "learning_rate": 4.388464053759911e-05,
      "loss": 3.1797,
      "step": 190234
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.836587905883789,
      "learning_rate": 4.388251045910979e-05,
      "loss": 2.7371,
      "step": 190235
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2535927295684814,
      "learning_rate": 4.38803804282375e-05,
      "loss": 2.9137,
      "step": 190236
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8267273902893066,
      "learning_rate": 4.387825044498277e-05,
      "loss": 3.0177,
      "step": 190237
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6059606075286865,
      "learning_rate": 4.3876120509345834e-05,
      "loss": 2.9561,
      "step": 190238
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.39849328994751,
      "learning_rate": 4.3873990621327215e-05,
      "loss": 2.8747,
      "step": 190239
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.18933367729187,
      "learning_rate": 4.387186078092717e-05,
      "loss": 2.6582,
      "step": 190240
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7572617530822754,
      "learning_rate": 4.386973098814633e-05,
      "loss": 2.9807,
      "step": 190241
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.642214775085449,
      "learning_rate": 4.386760124298484e-05,
      "loss": 2.9766,
      "step": 190242
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.88001012802124,
      "learning_rate": 4.3865471545443224e-05,
      "loss": 2.747,
      "step": 190243
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.851651191711426,
      "learning_rate": 4.3863341895521805e-05,
      "loss": 2.9271,
      "step": 190244
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.518723487854004,
      "learning_rate": 4.38612122932211e-05,
      "loss": 2.9537,
      "step": 190245
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.5820770263671875,
      "learning_rate": 4.385908273854134e-05,
      "loss": 2.8805,
      "step": 190246
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.77829909324646,
      "learning_rate": 4.385695323148315e-05,
      "loss": 2.7627,
      "step": 190247
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3821582794189453,
      "learning_rate": 4.385482377204667e-05,
      "loss": 3.0207,
      "step": 190248
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.6949539184570312,
      "learning_rate": 4.3852694360232485e-05,
      "loss": 2.7177,
      "step": 190249
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.5514633655548096,
      "learning_rate": 4.385056499604081e-05,
      "loss": 2.7487,
      "step": 190250
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6401400566101074,
      "learning_rate": 4.384843567947225e-05,
      "loss": 3.0752,
      "step": 190251
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0924196243286133,
      "learning_rate": 4.384630641052706e-05,
      "loss": 2.9992,
      "step": 190252
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.466844320297241,
      "learning_rate": 4.384417718920577e-05,
      "loss": 3.1141,
      "step": 190253
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.748613119125366,
      "learning_rate": 4.3842048015508546e-05,
      "loss": 2.8439,
      "step": 190254
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.599541425704956,
      "learning_rate": 4.3839918889435995e-05,
      "loss": 3.1056,
      "step": 190255
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1716182231903076,
      "learning_rate": 4.383778981098839e-05,
      "loss": 3.0045,
      "step": 190256
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0872068405151367,
      "learning_rate": 4.3835660780166217e-05,
      "loss": 2.6836,
      "step": 190257
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.679043292999268,
      "learning_rate": 4.383353179696978e-05,
      "loss": 3.1424,
      "step": 190258
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.796499252319336,
      "learning_rate": 4.383140286139968e-05,
      "loss": 2.949,
      "step": 190259
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.402336359024048,
      "learning_rate": 4.382927397345598e-05,
      "loss": 2.8284,
      "step": 190260
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9079434871673584,
      "learning_rate": 4.382714513313935e-05,
      "loss": 3.1041,
      "step": 190261
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0446596145629883,
      "learning_rate": 4.382501634044999e-05,
      "loss": 2.9123,
      "step": 190262
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.230489730834961,
      "learning_rate": 4.38228875953885e-05,
      "loss": 2.9514,
      "step": 190263
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6241772174835205,
      "learning_rate": 4.3820758897955075e-05,
      "loss": 2.9231,
      "step": 190264
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.64909291267395,
      "learning_rate": 4.3818630248150285e-05,
      "loss": 2.5875,
      "step": 190265
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.627084255218506,
      "learning_rate": 4.3816501645974464e-05,
      "loss": 2.9176,
      "step": 190266
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0036656856536865,
      "learning_rate": 4.381437309142798e-05,
      "loss": 2.9011,
      "step": 190267
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1006317138671875,
      "learning_rate": 4.381224458451116e-05,
      "loss": 3.0334,
      "step": 190268
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7918081283569336,
      "learning_rate": 4.381011612522457e-05,
      "loss": 2.9143,
      "step": 190269
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8663787841796875,
      "learning_rate": 4.3807987713568414e-05,
      "loss": 2.8131,
      "step": 190270
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.00711989402771,
      "learning_rate": 4.380585934954329e-05,
      "loss": 3.0848,
      "step": 190271
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.7382500171661377,
      "learning_rate": 4.380373103314947e-05,
      "loss": 2.9362,
      "step": 190272
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.742347478866577,
      "learning_rate": 4.380160276438731e-05,
      "loss": 3.0087,
      "step": 190273
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3204517364501953,
      "learning_rate": 4.379947454325735e-05,
      "loss": 3.0283,
      "step": 190274
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.9272289276123047,
      "learning_rate": 4.379734636975989e-05,
      "loss": 3.0299,
      "step": 190275
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8589653968811035,
      "learning_rate": 4.3795218243895256e-05,
      "loss": 2.9009,
      "step": 190276
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.6059460639953613,
      "learning_rate": 4.3793090165664024e-05,
      "loss": 2.886,
      "step": 190277
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8125884532928467,
      "learning_rate": 4.379096213506649e-05,
      "loss": 3.0146,
      "step": 190278
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.629472494125366,
      "learning_rate": 4.378883415210298e-05,
      "loss": 2.9391,
      "step": 190279
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3518974781036377,
      "learning_rate": 4.3786706216774036e-05,
      "loss": 3.1193,
      "step": 190280
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.488799810409546,
      "learning_rate": 4.378457832907996e-05,
      "loss": 2.8774,
      "step": 190281
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3342864513397217,
      "learning_rate": 4.37824504890211e-05,
      "loss": 2.8329,
      "step": 190282
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0782344341278076,
      "learning_rate": 4.378032269659801e-05,
      "loss": 2.9636,
      "step": 190283
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7169885635375977,
      "learning_rate": 4.377819495181099e-05,
      "loss": 2.9054,
      "step": 190284
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6899561882019043,
      "learning_rate": 4.377606725466035e-05,
      "loss": 2.9431,
      "step": 190285
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.387982130050659,
      "learning_rate": 4.377393960514665e-05,
      "loss": 2.9694,
      "step": 190286
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.639420509338379,
      "learning_rate": 4.377181200327017e-05,
      "loss": 3.1092,
      "step": 190287
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8387184143066406,
      "learning_rate": 4.376968444903138e-05,
      "loss": 2.7536,
      "step": 190288
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6222727298736572,
      "learning_rate": 4.376755694243066e-05,
      "loss": 2.9126,
      "step": 190289
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0278658866882324,
      "learning_rate": 4.3765429483468394e-05,
      "loss": 2.9765,
      "step": 190290
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.299156904220581,
      "learning_rate": 4.376330207214489e-05,
      "loss": 2.8859,
      "step": 190291
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9604523181915283,
      "learning_rate": 4.3761174708460714e-05,
      "loss": 2.9092,
      "step": 190292
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.444423198699951,
      "learning_rate": 4.375904739241609e-05,
      "loss": 2.7715,
      "step": 190293
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9650983810424805,
      "learning_rate": 4.375692012401156e-05,
      "loss": 2.9953,
      "step": 190294
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.713350772857666,
      "learning_rate": 4.375479290324746e-05,
      "loss": 2.7794,
      "step": 190295
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.96913480758667,
      "learning_rate": 4.375266573012418e-05,
      "loss": 2.9983,
      "step": 190296
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.485644578933716,
      "learning_rate": 4.3750538604642026e-05,
      "loss": 2.9758,
      "step": 190297
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.610844850540161,
      "learning_rate": 4.37484115268016e-05,
      "loss": 3.0943,
      "step": 190298
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.4439609050750732,
      "learning_rate": 4.374628449660306e-05,
      "loss": 2.85,
      "step": 190299
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0257391929626465,
      "learning_rate": 4.374415751404704e-05,
      "loss": 3.0709,
      "step": 190300
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7614526748657227,
      "learning_rate": 4.374203057913378e-05,
      "loss": 3.1607,
      "step": 190301
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.689042806625366,
      "learning_rate": 4.373990369186374e-05,
      "loss": 3.0929,
      "step": 190302
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0322952270507812,
      "learning_rate": 4.373777685223723e-05,
      "loss": 3.1071,
      "step": 190303
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.192927598953247,
      "learning_rate": 4.373565006025474e-05,
      "loss": 2.7823,
      "step": 190304
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.891218900680542,
      "learning_rate": 4.3733523315916564e-05,
      "loss": 2.9974,
      "step": 190305
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9969053268432617,
      "learning_rate": 4.373139661922325e-05,
      "loss": 3.0778,
      "step": 190306
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.847705602645874,
      "learning_rate": 4.372926997017502e-05,
      "loss": 2.9971,
      "step": 190307
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9816670417785645,
      "learning_rate": 4.372714336877251e-05,
      "loss": 2.885,
      "step": 190308
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5887625217437744,
      "learning_rate": 4.372501681501582e-05,
      "loss": 2.7846,
      "step": 190309
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.560086488723755,
      "learning_rate": 4.372289030890556e-05,
      "loss": 2.9024,
      "step": 190310
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8330156803131104,
      "learning_rate": 4.372076385044197e-05,
      "loss": 3.023,
      "step": 190311
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.4919233322143555,
      "learning_rate": 4.371863743962562e-05,
      "loss": 3.0134,
      "step": 190312
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.594264507293701,
      "learning_rate": 4.371651107645674e-05,
      "loss": 2.8342,
      "step": 190313
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9337570667266846,
      "learning_rate": 4.3714384760935916e-05,
      "loss": 2.7342,
      "step": 190314
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.161190986633301,
      "learning_rate": 4.371225849306331e-05,
      "loss": 3.0239,
      "step": 190315
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5882010459899902,
      "learning_rate": 4.371013227283946e-05,
      "loss": 2.9402,
      "step": 190316
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9398045539855957,
      "learning_rate": 4.3708006100264725e-05,
      "loss": 2.8423,
      "step": 190317
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6616721153259277,
      "learning_rate": 4.370587997533954e-05,
      "loss": 2.7375,
      "step": 190318
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.18540620803833,
      "learning_rate": 4.370375389806421e-05,
      "loss": 2.843,
      "step": 190319
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7410619258880615,
      "learning_rate": 4.370162786843933e-05,
      "loss": 3.0927,
      "step": 190320
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.221325397491455,
      "learning_rate": 4.3699501886464994e-05,
      "loss": 3.2459,
      "step": 190321
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0191688537597656,
      "learning_rate": 4.3697375952141846e-05,
      "loss": 2.9993,
      "step": 190322
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6288046836853027,
      "learning_rate": 4.369525006547014e-05,
      "loss": 2.8521,
      "step": 190323
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.737884521484375,
      "learning_rate": 4.369312422645039e-05,
      "loss": 2.7278,
      "step": 190324
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.656698703765869,
      "learning_rate": 4.3690998435082824e-05,
      "loss": 3.0139,
      "step": 190325
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.4524548053741455,
      "learning_rate": 4.36888726913681e-05,
      "loss": 2.8502,
      "step": 190326
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8087923526763916,
      "learning_rate": 4.368674699530633e-05,
      "loss": 2.7884,
      "step": 190327
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.567841053009033,
      "learning_rate": 4.368462134689811e-05,
      "loss": 2.8864,
      "step": 190328
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.529426097869873,
      "learning_rate": 4.3682495746143666e-05,
      "loss": 2.9481,
      "step": 190329
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.221428155899048,
      "learning_rate": 4.368037019304353e-05,
      "loss": 2.8747,
      "step": 190330
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.577167510986328,
      "learning_rate": 4.367824468759802e-05,
      "loss": 2.8063,
      "step": 190331
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3956034183502197,
      "learning_rate": 4.367611922980768e-05,
      "loss": 2.9945,
      "step": 190332
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7144529819488525,
      "learning_rate": 4.367399381967266e-05,
      "loss": 2.7827,
      "step": 190333
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1393744945526123,
      "learning_rate": 4.367186845719355e-05,
      "loss": 3.1246,
      "step": 190334
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.694925308227539,
      "learning_rate": 4.3669743142370616e-05,
      "loss": 3.083,
      "step": 190335
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6987459659576416,
      "learning_rate": 4.3667617875204364e-05,
      "loss": 3.0006,
      "step": 190336
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.210195541381836,
      "learning_rate": 4.366549265569509e-05,
      "loss": 2.7395,
      "step": 190337
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.892221689224243,
      "learning_rate": 4.366336748384336e-05,
      "loss": 2.902,
      "step": 190338
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.689112424850464,
      "learning_rate": 4.366124235964934e-05,
      "loss": 2.997,
      "step": 190339
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.5433502197265625,
      "learning_rate": 4.365911728311356e-05,
      "loss": 2.6959,
      "step": 190340
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2173471450805664,
      "learning_rate": 4.365699225423637e-05,
      "loss": 3.0131,
      "step": 190341
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9565281867980957,
      "learning_rate": 4.365486727301821e-05,
      "loss": 2.9614,
      "step": 190342
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.674944877624512,
      "learning_rate": 4.36527423394594e-05,
      "loss": 2.689,
      "step": 190343
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9350714683532715,
      "learning_rate": 4.3650617453560534e-05,
      "loss": 2.6997,
      "step": 190344
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1454784870147705,
      "learning_rate": 4.364849261532171e-05,
      "loss": 2.8662,
      "step": 190345
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.4281182289123535,
      "learning_rate": 4.3646367824743525e-05,
      "loss": 2.9461,
      "step": 190346
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6855709552764893,
      "learning_rate": 4.3644243081826245e-05,
      "loss": 3.0018,
      "step": 190347
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9646482467651367,
      "learning_rate": 4.3642118386570444e-05,
      "loss": 2.859,
      "step": 190348
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.087965726852417,
      "learning_rate": 4.363999373897632e-05,
      "loss": 2.7332,
      "step": 190349
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.137495279312134,
      "learning_rate": 4.3637869139044436e-05,
      "loss": 3.0711,
      "step": 190350
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.734405994415283,
      "learning_rate": 4.363574458677509e-05,
      "loss": 2.8814,
      "step": 190351
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6551613807678223,
      "learning_rate": 4.363362008216872e-05,
      "loss": 2.6261,
      "step": 190352
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.300844669342041,
      "learning_rate": 4.3631495625225624e-05,
      "loss": 2.7822,
      "step": 190353
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1180977821350098,
      "learning_rate": 4.362937121594636e-05,
      "loss": 2.876,
      "step": 190354
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9820709228515625,
      "learning_rate": 4.3627246854331146e-05,
      "loss": 3.0423,
      "step": 190355
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.888679027557373,
      "learning_rate": 4.362512254038053e-05,
      "loss": 2.9768,
      "step": 190356
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.875623941421509,
      "learning_rate": 4.3622998274094854e-05,
      "loss": 2.969,
      "step": 190357
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.92262601852417,
      "learning_rate": 4.362087405547449e-05,
      "loss": 3.1121,
      "step": 190358
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.976652145385742,
      "learning_rate": 4.361874988451979e-05,
      "loss": 2.9193,
      "step": 190359
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.9512455463409424,
      "learning_rate": 4.361662576123126e-05,
      "loss": 3.2255,
      "step": 190360
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9273455142974854,
      "learning_rate": 4.3614501685609206e-05,
      "loss": 2.9024,
      "step": 190361
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9512112140655518,
      "learning_rate": 4.3612377657654086e-05,
      "loss": 2.8364,
      "step": 190362
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.9657933712005615,
      "learning_rate": 4.361025367736627e-05,
      "loss": 2.9873,
      "step": 190363
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.75235915184021,
      "learning_rate": 4.360812974474609e-05,
      "loss": 2.8231,
      "step": 190364
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8752334117889404,
      "learning_rate": 4.360600585979408e-05,
      "loss": 2.9957,
      "step": 190365
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.4930789470672607,
      "learning_rate": 4.3603882022510506e-05,
      "loss": 2.9376,
      "step": 190366
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.580728530883789,
      "learning_rate": 4.36017582328958e-05,
      "loss": 3.0762,
      "step": 190367
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9394736289978027,
      "learning_rate": 4.3599634490950394e-05,
      "loss": 2.8404,
      "step": 190368
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.815211534500122,
      "learning_rate": 4.359751079667466e-05,
      "loss": 2.8724,
      "step": 190369
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.716797113418579,
      "learning_rate": 4.3595387150068926e-05,
      "loss": 3.0635,
      "step": 190370
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.64587664604187,
      "learning_rate": 4.359326355113373e-05,
      "loss": 3.041,
      "step": 190371
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.504091501235962,
      "learning_rate": 4.35911399998693e-05,
      "loss": 2.6706,
      "step": 190372
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.371246337890625,
      "learning_rate": 4.35890164962762e-05,
      "loss": 2.9242,
      "step": 190373
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.468830108642578,
      "learning_rate": 4.358689304035473e-05,
      "loss": 3.0416,
      "step": 190374
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.797105312347412,
      "learning_rate": 4.35847696321053e-05,
      "loss": 2.7376,
      "step": 190375
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7402915954589844,
      "learning_rate": 4.358264627152827e-05,
      "loss": 2.9682,
      "step": 190376
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.641855478286743,
      "learning_rate": 4.3580522958624064e-05,
      "loss": 3.09,
      "step": 190377
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7557780742645264,
      "learning_rate": 4.357839969339306e-05,
      "loss": 3.0538,
      "step": 190378
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2704179286956787,
      "learning_rate": 4.3576276475835726e-05,
      "loss": 2.7183,
      "step": 190379
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0990822315216064,
      "learning_rate": 4.357415330595242e-05,
      "loss": 2.882,
      "step": 190380
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0479636192321777,
      "learning_rate": 4.3572030183743514e-05,
      "loss": 2.9767,
      "step": 190381
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7922496795654297,
      "learning_rate": 4.35699071092093e-05,
      "loss": 2.988,
      "step": 190382
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.975255250930786,
      "learning_rate": 4.356778408235039e-05,
      "loss": 3.0981,
      "step": 190383
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.522134304046631,
      "learning_rate": 4.3565661103167e-05,
      "loss": 2.8114,
      "step": 190384
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.091536283493042,
      "learning_rate": 4.356353817165965e-05,
      "loss": 2.6817,
      "step": 190385
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8700151443481445,
      "learning_rate": 4.356141528782859e-05,
      "loss": 2.9475,
      "step": 190386
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5570974349975586,
      "learning_rate": 4.3559292451674464e-05,
      "loss": 2.9043,
      "step": 190387
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.39437198638916,
      "learning_rate": 4.355716966319737e-05,
      "loss": 2.8407,
      "step": 190388
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1197588443756104,
      "learning_rate": 4.355504692239793e-05,
      "loss": 3.0201,
      "step": 190389
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.707963466644287,
      "learning_rate": 4.3552924229276355e-05,
      "loss": 3.0014,
      "step": 190390
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.4976582527160645,
      "learning_rate": 4.355080158383317e-05,
      "loss": 3.0538,
      "step": 190391
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.469602346420288,
      "learning_rate": 4.354867898606872e-05,
      "loss": 2.8747,
      "step": 190392
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.525625467300415,
      "learning_rate": 4.3546556435983525e-05,
      "loss": 2.9551,
      "step": 190393
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2334189414978027,
      "learning_rate": 4.3544433933577725e-05,
      "loss": 3.0281,
      "step": 190394
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8716790676116943,
      "learning_rate": 4.354231147885192e-05,
      "loss": 3.259,
      "step": 190395
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6651155948638916,
      "learning_rate": 4.3540189071806375e-05,
      "loss": 3.0171,
      "step": 190396
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.143566846847534,
      "learning_rate": 4.3538066712441624e-05,
      "loss": 2.9508,
      "step": 190397
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1596853733062744,
      "learning_rate": 4.3535944400757896e-05,
      "loss": 3.0346,
      "step": 190398
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0474092960357666,
      "learning_rate": 4.353382213675586e-05,
      "loss": 2.984,
      "step": 190399
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.536930799484253,
      "learning_rate": 4.353169992043555e-05,
      "loss": 3.187,
      "step": 190400
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.788167715072632,
      "learning_rate": 4.3529577751797626e-05,
      "loss": 2.9918,
      "step": 190401
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7737245559692383,
      "learning_rate": 4.352745563084233e-05,
      "loss": 2.9004,
      "step": 190402
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.7021095752716064,
      "learning_rate": 4.352533355757016e-05,
      "loss": 2.909,
      "step": 190403
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8252995014190674,
      "learning_rate": 4.352321153198145e-05,
      "loss": 3.0804,
      "step": 190404
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.078512668609619,
      "learning_rate": 4.3521089554076725e-05,
      "loss": 2.9564,
      "step": 190405
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9084105491638184,
      "learning_rate": 4.351896762385613e-05,
      "loss": 2.9307,
      "step": 190406
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.857816696166992,
      "learning_rate": 4.351684574132028e-05,
      "loss": 2.9788,
      "step": 190407
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.52051043510437,
      "learning_rate": 4.351472390646939e-05,
      "loss": 3.1671,
      "step": 190408
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.635809898376465,
      "learning_rate": 4.351260211930406e-05,
      "loss": 2.8503,
      "step": 190409
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.8122334480285645,
      "learning_rate": 4.351048037982452e-05,
      "loss": 2.683,
      "step": 190410
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2330853939056396,
      "learning_rate": 4.350835868803133e-05,
      "loss": 3.0706,
      "step": 190411
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.748063325881958,
      "learning_rate": 4.3506237043924664e-05,
      "loss": 2.9714,
      "step": 190412
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.9313063621520996,
      "learning_rate": 4.350411544750508e-05,
      "loss": 2.6825,
      "step": 190413
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7983429431915283,
      "learning_rate": 4.3501993898772856e-05,
      "loss": 2.7508,
      "step": 190414
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0960733890533447,
      "learning_rate": 4.349987239772852e-05,
      "loss": 2.9904,
      "step": 190415
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.478167772293091,
      "learning_rate": 4.349775094437233e-05,
      "loss": 2.91,
      "step": 190416
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7704575061798096,
      "learning_rate": 4.34956295387049e-05,
      "loss": 3.1187,
      "step": 190417
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.5428459644317627,
      "learning_rate": 4.349350818072632e-05,
      "loss": 2.8478,
      "step": 190418
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.849714756011963,
      "learning_rate": 4.349138687043723e-05,
      "loss": 2.9339,
      "step": 190419
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.4517717361450195,
      "learning_rate": 4.348926560783785e-05,
      "loss": 2.9623,
      "step": 190420
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2087080478668213,
      "learning_rate": 4.348714439292873e-05,
      "loss": 3.1047,
      "step": 190421
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8281092643737793,
      "learning_rate": 4.348502322571013e-05,
      "loss": 3.0869,
      "step": 190422
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6560826301574707,
      "learning_rate": 4.3482902106182636e-05,
      "loss": 3.2053,
      "step": 190423
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6791536808013916,
      "learning_rate": 4.3480781034346366e-05,
      "loss": 3.1258,
      "step": 190424
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.7630817890167236,
      "learning_rate": 4.347866001020195e-05,
      "loss": 2.9043,
      "step": 190425
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.6969733238220215,
      "learning_rate": 4.347653903374961e-05,
      "loss": 2.9959,
      "step": 190426
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.725258827209473,
      "learning_rate": 4.34744181049899e-05,
      "loss": 2.6507,
      "step": 190427
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.9193084239959717,
      "learning_rate": 4.347229722392306e-05,
      "loss": 2.8044,
      "step": 190428
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3366546630859375,
      "learning_rate": 4.3470176390549704e-05,
      "loss": 2.9391,
      "step": 190429
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.477388381958008,
      "learning_rate": 4.346805560486997e-05,
      "loss": 3.0401,
      "step": 190430
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.873171091079712,
      "learning_rate": 4.346593486688441e-05,
      "loss": 2.7881,
      "step": 190431
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.775331974029541,
      "learning_rate": 4.3463814176593305e-05,
      "loss": 2.9469,
      "step": 190432
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.198975086212158,
      "learning_rate": 4.3461693533997175e-05,
      "loss": 3.1406,
      "step": 190433
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.378539085388184,
      "learning_rate": 4.345957293909633e-05,
      "loss": 2.7749,
      "step": 190434
    },
    {
      "epoch": 2.48,
      "grad_norm": 5.572230339050293,
      "learning_rate": 4.3457452391891235e-05,
      "loss": 3.1639,
      "step": 190435
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.664944887161255,
      "learning_rate": 4.345533189238225e-05,
      "loss": 2.8853,
      "step": 190436
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.5470798015594482,
      "learning_rate": 4.345321144056974e-05,
      "loss": 2.9277,
      "step": 190437
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.7783122062683105,
      "learning_rate": 4.3451091036454086e-05,
      "loss": 3.111,
      "step": 190438
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.5908889770507812,
      "learning_rate": 4.3448970680035775e-05,
      "loss": 3.1023,
      "step": 190439
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.691288471221924,
      "learning_rate": 4.344685037131505e-05,
      "loss": 2.8739,
      "step": 190440
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.244715213775635,
      "learning_rate": 4.344473011029249e-05,
      "loss": 2.8024,
      "step": 190441
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.9649031162261963,
      "learning_rate": 4.3442609896968387e-05,
      "loss": 2.9376,
      "step": 190442
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.998647928237915,
      "learning_rate": 4.3440489731343156e-05,
      "loss": 2.9985,
      "step": 190443
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.830310583114624,
      "learning_rate": 4.34383696134171e-05,
      "loss": 2.9031,
      "step": 190444
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.739595413208008,
      "learning_rate": 4.3436249543190794e-05,
      "loss": 2.9776,
      "step": 190445
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.166952610015869,
      "learning_rate": 4.343412952066443e-05,
      "loss": 3.0397,
      "step": 190446
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3986690044403076,
      "learning_rate": 4.343200954583857e-05,
      "loss": 2.9795,
      "step": 190447
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6185901165008545,
      "learning_rate": 4.342988961871356e-05,
      "loss": 2.8778,
      "step": 190448
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9684906005859375,
      "learning_rate": 4.342776973928973e-05,
      "loss": 2.7127,
      "step": 190449
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.085862636566162,
      "learning_rate": 4.3425649907567574e-05,
      "loss": 2.9141,
      "step": 190450
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6907615661621094,
      "learning_rate": 4.342353012354742e-05,
      "loss": 3.0035,
      "step": 190451
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.334148645401001,
      "learning_rate": 4.342141038722958e-05,
      "loss": 2.8188,
      "step": 190452
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.035504102706909,
      "learning_rate": 4.341929069861465e-05,
      "loss": 2.8662,
      "step": 190453
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6561121940612793,
      "learning_rate": 4.341717105770293e-05,
      "loss": 2.966,
      "step": 190454
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.560763359069824,
      "learning_rate": 4.341505146449471e-05,
      "loss": 2.5637,
      "step": 190455
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.716078758239746,
      "learning_rate": 4.3412931918990535e-05,
      "loss": 2.8004,
      "step": 190456
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8353042602539062,
      "learning_rate": 4.3410812421190664e-05,
      "loss": 2.8224,
      "step": 190457
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6279265880584717,
      "learning_rate": 4.340869297109567e-05,
      "loss": 3.099,
      "step": 190458
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.335909128189087,
      "learning_rate": 4.340657356870581e-05,
      "loss": 2.9054,
      "step": 190459
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7838940620422363,
      "learning_rate": 4.340445421402153e-05,
      "loss": 2.9599,
      "step": 190460
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9082112312316895,
      "learning_rate": 4.3402334907043154e-05,
      "loss": 2.6503,
      "step": 190461
    },
    {
      "epoch": 2.48,
      "grad_norm": 5.195574760437012,
      "learning_rate": 4.340021564777115e-05,
      "loss": 3.0405,
      "step": 190462
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.5481021404266357,
      "learning_rate": 4.339809643620586e-05,
      "loss": 2.639,
      "step": 190463
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1130192279815674,
      "learning_rate": 4.339597727234776e-05,
      "loss": 2.9057,
      "step": 190464
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8989439010620117,
      "learning_rate": 4.33938581561972e-05,
      "loss": 3.1071,
      "step": 190465
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.717271566390991,
      "learning_rate": 4.339173908775455e-05,
      "loss": 3.0919,
      "step": 190466
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6241750717163086,
      "learning_rate": 4.338962006702017e-05,
      "loss": 2.8516,
      "step": 190467
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.4470908641815186,
      "learning_rate": 4.338750109399456e-05,
      "loss": 3.0499,
      "step": 190468
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.823485851287842,
      "learning_rate": 4.3385382168678016e-05,
      "loss": 2.8228,
      "step": 190469
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3254714012145996,
      "learning_rate": 4.3383263291071014e-05,
      "loss": 2.9961,
      "step": 190470
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9817986488342285,
      "learning_rate": 4.338114446117384e-05,
      "loss": 3.0686,
      "step": 190471
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.750349998474121,
      "learning_rate": 4.337902567898708e-05,
      "loss": 2.8249,
      "step": 190472
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9244325160980225,
      "learning_rate": 4.337690694451088e-05,
      "loss": 2.8737,
      "step": 190473
    },
    {
      "epoch": 2.48,
      "grad_norm": 5.983268737792969,
      "learning_rate": 4.337478825774585e-05,
      "loss": 2.7456,
      "step": 190474
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.957420825958252,
      "learning_rate": 4.337266961869219e-05,
      "loss": 2.5666,
      "step": 190475
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.6954782009124756,
      "learning_rate": 4.3370551027350464e-05,
      "loss": 2.9987,
      "step": 190476
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7344279289245605,
      "learning_rate": 4.336843248372094e-05,
      "loss": 2.8788,
      "step": 190477
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.6202168464660645,
      "learning_rate": 4.336631398780421e-05,
      "loss": 2.8815,
      "step": 190478
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.5325217247009277,
      "learning_rate": 4.336419553960039e-05,
      "loss": 2.8938,
      "step": 190479
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1559574604034424,
      "learning_rate": 4.3362077139110064e-05,
      "loss": 3.0277,
      "step": 190480
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8141016960144043,
      "learning_rate": 4.3359958786333506e-05,
      "loss": 2.9463,
      "step": 190481
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.166599750518799,
      "learning_rate": 4.3357840481271276e-05,
      "loss": 2.8564,
      "step": 190482
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.459712505340576,
      "learning_rate": 4.335572222392355e-05,
      "loss": 3.0434,
      "step": 190483
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1522395610809326,
      "learning_rate": 4.335360401429102e-05,
      "loss": 2.8014,
      "step": 190484
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.736874580383301,
      "learning_rate": 4.3351485852373726e-05,
      "loss": 2.8402,
      "step": 190485
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.130971670150757,
      "learning_rate": 4.3349367738172325e-05,
      "loss": 2.9551,
      "step": 190486
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.417945384979248,
      "learning_rate": 4.3347249671687056e-05,
      "loss": 2.9834,
      "step": 190487
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.552377223968506,
      "learning_rate": 4.334513165291845e-05,
      "loss": 2.7934,
      "step": 190488
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.955376148223877,
      "learning_rate": 4.3343013681866744e-05,
      "loss": 3.016,
      "step": 190489
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.765906810760498,
      "learning_rate": 4.334089575853257e-05,
      "loss": 2.8188,
      "step": 190490
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7943899631500244,
      "learning_rate": 4.333877788291599e-05,
      "loss": 3.0853,
      "step": 190491
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.826725721359253,
      "learning_rate": 4.33366600550177e-05,
      "loss": 3.0857,
      "step": 190492
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.161623239517212,
      "learning_rate": 4.333454227483788e-05,
      "loss": 2.7231,
      "step": 190493
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.111799716949463,
      "learning_rate": 4.3332424542377054e-05,
      "loss": 2.9725,
      "step": 190494
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5084288120269775,
      "learning_rate": 4.3330306857635554e-05,
      "loss": 2.9676,
      "step": 190495
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5885887145996094,
      "learning_rate": 4.3328189220613884e-05,
      "loss": 2.8685,
      "step": 190496
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2802646160125732,
      "learning_rate": 4.332607163131224e-05,
      "loss": 3.0104,
      "step": 190497
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.4345037937164307,
      "learning_rate": 4.3323954089731196e-05,
      "loss": 3.1742,
      "step": 190498
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9383010864257812,
      "learning_rate": 4.332183659587101e-05,
      "loss": 2.9608,
      "step": 190499
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1267189979553223,
      "learning_rate": 4.331971914973218e-05,
      "loss": 2.9117,
      "step": 190500
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.394662857055664,
      "learning_rate": 4.3317601751315015e-05,
      "loss": 2.914,
      "step": 190501
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.588716506958008,
      "learning_rate": 4.3315484400620006e-05,
      "loss": 3.0991,
      "step": 190502
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7798984050750732,
      "learning_rate": 4.3313367097647524e-05,
      "loss": 3.1357,
      "step": 190503
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.740154981613159,
      "learning_rate": 4.33112498423979e-05,
      "loss": 3.0272,
      "step": 190504
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9382166862487793,
      "learning_rate": 4.3309132634871505e-05,
      "loss": 2.8907,
      "step": 190505
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.366831302642822,
      "learning_rate": 4.330701547506886e-05,
      "loss": 2.9173,
      "step": 190506
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0931906700134277,
      "learning_rate": 4.330489836299022e-05,
      "loss": 3.0879,
      "step": 190507
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.890186071395874,
      "learning_rate": 4.330278129863609e-05,
      "loss": 2.6489,
      "step": 190508
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.890747308731079,
      "learning_rate": 4.3300664282006825e-05,
      "loss": 2.8374,
      "step": 190509
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2966339588165283,
      "learning_rate": 4.3298547313102816e-05,
      "loss": 3.1273,
      "step": 190510
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9629695415496826,
      "learning_rate": 4.32964303919244e-05,
      "loss": 3.013,
      "step": 190511
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7016966342926025,
      "learning_rate": 4.329431351847207e-05,
      "loss": 3.278,
      "step": 190512
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.256375789642334,
      "learning_rate": 4.329219669274613e-05,
      "loss": 3.0747,
      "step": 190513
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5245091915130615,
      "learning_rate": 4.3290079914747075e-05,
      "loss": 3.0453,
      "step": 190514
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.575798749923706,
      "learning_rate": 4.328796318447522e-05,
      "loss": 2.9061,
      "step": 190515
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.665876626968384,
      "learning_rate": 4.328584650193101e-05,
      "loss": 2.7558,
      "step": 190516
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.640277862548828,
      "learning_rate": 4.328372986711472e-05,
      "loss": 3.0288,
      "step": 190517
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8843140602111816,
      "learning_rate": 4.3281613280026895e-05,
      "loss": 2.9719,
      "step": 190518
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.2379963397979736,
      "learning_rate": 4.327949674066779e-05,
      "loss": 3.0023,
      "step": 190519
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3671140670776367,
      "learning_rate": 4.3277380249037965e-05,
      "loss": 2.9109,
      "step": 190520
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.000094413757324,
      "learning_rate": 4.3275263805137696e-05,
      "loss": 2.9595,
      "step": 190521
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0892882347106934,
      "learning_rate": 4.3273147408967415e-05,
      "loss": 2.8188,
      "step": 190522
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.69417142868042,
      "learning_rate": 4.327103106052745e-05,
      "loss": 3.0415,
      "step": 190523
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.461617946624756,
      "learning_rate": 4.3268914759818274e-05,
      "loss": 2.718,
      "step": 190524
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.7881743907928467,
      "learning_rate": 4.3266798506840214e-05,
      "loss": 3.1038,
      "step": 190525
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.4472241401672363,
      "learning_rate": 4.3264682301593775e-05,
      "loss": 2.5067,
      "step": 190526
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.71663761138916,
      "learning_rate": 4.3262566144079256e-05,
      "loss": 3.0068,
      "step": 190527
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.552062511444092,
      "learning_rate": 4.326045003429709e-05,
      "loss": 3.1702,
      "step": 190528
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.915684461593628,
      "learning_rate": 4.3258333972247574e-05,
      "loss": 2.9639,
      "step": 190529
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.0212297439575195,
      "learning_rate": 4.325621795793127e-05,
      "loss": 3.1986,
      "step": 190530
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7406580448150635,
      "learning_rate": 4.325410199134839e-05,
      "loss": 3.0755,
      "step": 190531
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5981452465057373,
      "learning_rate": 4.325198607249949e-05,
      "loss": 2.9046,
      "step": 190532
    },
    {
      "epoch": 2.48,
      "grad_norm": 7.471881866455078,
      "learning_rate": 4.3249870201384874e-05,
      "loss": 2.851,
      "step": 190533
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.548780679702759,
      "learning_rate": 4.324775437800491e-05,
      "loss": 2.8392,
      "step": 190534
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.869807720184326,
      "learning_rate": 4.3245638602360124e-05,
      "loss": 3.0023,
      "step": 190535
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.038925886154175,
      "learning_rate": 4.324352287445079e-05,
      "loss": 2.9948,
      "step": 190536
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.67293381690979,
      "learning_rate": 4.324140719427727e-05,
      "loss": 3.0969,
      "step": 190537
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1945488452911377,
      "learning_rate": 4.32392915618401e-05,
      "loss": 2.7717,
      "step": 190538
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5281195640563965,
      "learning_rate": 4.323717597713957e-05,
      "loss": 2.7654,
      "step": 190539
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7972638607025146,
      "learning_rate": 4.3235060440176025e-05,
      "loss": 3.1491,
      "step": 190540
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.565183401107788,
      "learning_rate": 4.3232944950949996e-05,
      "loss": 3.0268,
      "step": 190541
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9373271465301514,
      "learning_rate": 4.323082950946185e-05,
      "loss": 2.7453,
      "step": 190542
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.780609369277954,
      "learning_rate": 4.3228714115711816e-05,
      "loss": 2.9899,
      "step": 190543
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.205442428588867,
      "learning_rate": 4.3226598769700525e-05,
      "loss": 2.8535,
      "step": 190544
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.8354451656341553,
      "learning_rate": 4.322448347142825e-05,
      "loss": 2.6308,
      "step": 190545
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1334266662597656,
      "learning_rate": 4.322236822089529e-05,
      "loss": 2.97,
      "step": 190546
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.69862961769104,
      "learning_rate": 4.3220253018102234e-05,
      "loss": 2.9733,
      "step": 190547
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.47802734375,
      "learning_rate": 4.32181378630493e-05,
      "loss": 2.9678,
      "step": 190548
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9565610885620117,
      "learning_rate": 4.3216022755737034e-05,
      "loss": 2.8954,
      "step": 190549
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8237648010253906,
      "learning_rate": 4.321390769616575e-05,
      "loss": 3.0005,
      "step": 190550
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.949171781539917,
      "learning_rate": 4.321179268433588e-05,
      "loss": 2.803,
      "step": 190551
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.046201229095459,
      "learning_rate": 4.3209677720247685e-05,
      "loss": 3.0822,
      "step": 190552
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.687289237976074,
      "learning_rate": 4.320756280390173e-05,
      "loss": 2.975,
      "step": 190553
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0101864337921143,
      "learning_rate": 4.3205447935298287e-05,
      "loss": 2.9602,
      "step": 190554
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.160815477371216,
      "learning_rate": 4.320333311443782e-05,
      "loss": 2.9969,
      "step": 190555
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.451457977294922,
      "learning_rate": 4.3201218341320666e-05,
      "loss": 2.762,
      "step": 190556
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.662543535232544,
      "learning_rate": 4.319910361594742e-05,
      "loss": 2.845,
      "step": 190557
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8745508193969727,
      "learning_rate": 4.319698893831811e-05,
      "loss": 2.6893,
      "step": 190558
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.198760509490967,
      "learning_rate": 4.3194874308433445e-05,
      "loss": 2.9489,
      "step": 190559
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7881593704223633,
      "learning_rate": 4.319275972629359e-05,
      "loss": 2.7114,
      "step": 190560
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1089365482330322,
      "learning_rate": 4.3190645191899145e-05,
      "loss": 2.6555,
      "step": 190561
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5641844272613525,
      "learning_rate": 4.3188530705250335e-05,
      "loss": 2.9552,
      "step": 190562
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.48904275894165,
      "learning_rate": 4.318641626634777e-05,
      "loss": 2.8592,
      "step": 190563
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9908032417297363,
      "learning_rate": 4.318430187519154e-05,
      "loss": 2.9239,
      "step": 190564
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.862884998321533,
      "learning_rate": 4.3182187531782286e-05,
      "loss": 2.8232,
      "step": 190565
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.73822021484375,
      "learning_rate": 4.3180073236120236e-05,
      "loss": 3.0632,
      "step": 190566
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.542820930480957,
      "learning_rate": 4.317795898820589e-05,
      "loss": 2.9413,
      "step": 190567
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.654798746109009,
      "learning_rate": 4.317584478803959e-05,
      "loss": 2.9794,
      "step": 190568
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.995053291320801,
      "learning_rate": 4.3173730635621796e-05,
      "loss": 2.7213,
      "step": 190569
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.4993696212768555,
      "learning_rate": 4.317161653095287e-05,
      "loss": 2.9498,
      "step": 190570
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2393765449523926,
      "learning_rate": 4.316950247403318e-05,
      "loss": 2.8037,
      "step": 190571
    },
    {
      "epoch": 2.48,
      "grad_norm": 5.141566276550293,
      "learning_rate": 4.316738846486303e-05,
      "loss": 2.9024,
      "step": 190572
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.696153163909912,
      "learning_rate": 4.316527450344302e-05,
      "loss": 3.0342,
      "step": 190573
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0712528228759766,
      "learning_rate": 4.316316058977334e-05,
      "loss": 2.8088,
      "step": 190574
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6433446407318115,
      "learning_rate": 4.3161046723854565e-05,
      "loss": 2.7646,
      "step": 190575
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.459613800048828,
      "learning_rate": 4.3158932905686996e-05,
      "loss": 2.9065,
      "step": 190576
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.352008819580078,
      "learning_rate": 4.315681913527099e-05,
      "loss": 2.6234,
      "step": 190577
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.269216537475586,
      "learning_rate": 4.315470541260696e-05,
      "loss": 3.1988,
      "step": 190578
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.629058837890625,
      "learning_rate": 4.3152591737695366e-05,
      "loss": 3.0271,
      "step": 190579
    },
    {
      "epoch": 2.48,
      "grad_norm": 5.04433012008667,
      "learning_rate": 4.3150478110536505e-05,
      "loss": 2.9565,
      "step": 190580
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1041550636291504,
      "learning_rate": 4.3148364531130875e-05,
      "loss": 2.85,
      "step": 190581
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1279399394989014,
      "learning_rate": 4.314625099947878e-05,
      "loss": 2.9475,
      "step": 190582
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.359248399734497,
      "learning_rate": 4.314413751558069e-05,
      "loss": 2.9134,
      "step": 190583
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.653930902481079,
      "learning_rate": 4.3142024079436864e-05,
      "loss": 3.1987,
      "step": 190584
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.889552116394043,
      "learning_rate": 4.313991069104788e-05,
      "loss": 2.9003,
      "step": 190585
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.932825803756714,
      "learning_rate": 4.3137797350413916e-05,
      "loss": 2.905,
      "step": 190586
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.543421745300293,
      "learning_rate": 4.313568405753559e-05,
      "loss": 2.819,
      "step": 190587
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1413283348083496,
      "learning_rate": 4.313357081241317e-05,
      "loss": 3.0725,
      "step": 190588
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.420398235321045,
      "learning_rate": 4.3131457615047104e-05,
      "loss": 2.9493,
      "step": 190589
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.272434949874878,
      "learning_rate": 4.3129344465437646e-05,
      "loss": 2.7497,
      "step": 190590
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.28315544128418,
      "learning_rate": 4.3127231363585377e-05,
      "loss": 2.9154,
      "step": 190591
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8982648849487305,
      "learning_rate": 4.312511830949051e-05,
      "loss": 3.0457,
      "step": 190592
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7553579807281494,
      "learning_rate": 4.3123005303153614e-05,
      "loss": 2.9333,
      "step": 190593
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.121894598007202,
      "learning_rate": 4.312089234457501e-05,
      "loss": 2.9719,
      "step": 190594
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.058610200881958,
      "learning_rate": 4.311877943375507e-05,
      "loss": 2.7512,
      "step": 190595
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.87076997756958,
      "learning_rate": 4.311666657069416e-05,
      "loss": 2.8488,
      "step": 190596
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.622176170349121,
      "learning_rate": 4.3114553755392744e-05,
      "loss": 3.0362,
      "step": 190597
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.554635524749756,
      "learning_rate": 4.3112440987851125e-05,
      "loss": 2.6721,
      "step": 190598
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.97515606880188,
      "learning_rate": 4.31103282680698e-05,
      "loss": 3.11,
      "step": 190599
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1866655349731445,
      "learning_rate": 4.3108215596049146e-05,
      "loss": 2.9328,
      "step": 190600
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.750924825668335,
      "learning_rate": 4.310610297178951e-05,
      "loss": 3.1132,
      "step": 190601
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.040132999420166,
      "learning_rate": 4.310399039529124e-05,
      "loss": 2.8008,
      "step": 190602
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9070334434509277,
      "learning_rate": 4.310187786655483e-05,
      "loss": 2.9724,
      "step": 190603
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.4685909748077393,
      "learning_rate": 4.3099765385580586e-05,
      "loss": 2.9525,
      "step": 190604
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.97121262550354,
      "learning_rate": 4.3097652952369e-05,
      "loss": 2.7573,
      "step": 190605
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.6952507495880127,
      "learning_rate": 4.3095540566920436e-05,
      "loss": 2.9326,
      "step": 190606
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7439043521881104,
      "learning_rate": 4.309342822923524e-05,
      "loss": 2.9815,
      "step": 190607
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6338367462158203,
      "learning_rate": 4.3091315939313764e-05,
      "loss": 3.0131,
      "step": 190608
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.247983455657959,
      "learning_rate": 4.3089203697156514e-05,
      "loss": 3.1365,
      "step": 190609
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.364672660827637,
      "learning_rate": 4.308709150276376e-05,
      "loss": 2.7022,
      "step": 190610
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9196228981018066,
      "learning_rate": 4.308497935613606e-05,
      "loss": 3.0223,
      "step": 190611
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3359336853027344,
      "learning_rate": 4.308286725727369e-05,
      "loss": 3.0286,
      "step": 190612
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.874199628829956,
      "learning_rate": 4.3080755206177075e-05,
      "loss": 3.0369,
      "step": 190613
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.814652442932129,
      "learning_rate": 4.307864320284652e-05,
      "loss": 2.9271,
      "step": 190614
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.131955862045288,
      "learning_rate": 4.307653124728259e-05,
      "loss": 2.9065,
      "step": 190615
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3769657611846924,
      "learning_rate": 4.307441933948548e-05,
      "loss": 2.8903,
      "step": 190616
    },
    {
      "epoch": 2.48,
      "grad_norm": 5.081803798675537,
      "learning_rate": 4.307230747945576e-05,
      "loss": 2.9576,
      "step": 190617
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.2338337898254395,
      "learning_rate": 4.3070195667193764e-05,
      "loss": 2.6173,
      "step": 190618
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5286288261413574,
      "learning_rate": 4.306808390269979e-05,
      "loss": 2.8483,
      "step": 190619
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3479700088500977,
      "learning_rate": 4.306597218597437e-05,
      "loss": 2.806,
      "step": 190620
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2934651374816895,
      "learning_rate": 4.306386051701784e-05,
      "loss": 2.8032,
      "step": 190621
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.458037853240967,
      "learning_rate": 4.306174889583054e-05,
      "loss": 2.6893,
      "step": 190622
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.256795644760132,
      "learning_rate": 4.305963732241295e-05,
      "loss": 2.8664,
      "step": 190623
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.664886474609375,
      "learning_rate": 4.305752579676546e-05,
      "loss": 2.9981,
      "step": 190624
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.911304235458374,
      "learning_rate": 4.305541431888835e-05,
      "loss": 2.8433,
      "step": 190625
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.040633201599121,
      "learning_rate": 4.3053302888782126e-05,
      "loss": 3.3041,
      "step": 190626
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8575072288513184,
      "learning_rate": 4.305119150644716e-05,
      "loss": 2.7829,
      "step": 190627
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.449873924255371,
      "learning_rate": 4.3049080171883774e-05,
      "loss": 3.0843,
      "step": 190628
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.956061601638794,
      "learning_rate": 4.304696888509245e-05,
      "loss": 2.9556,
      "step": 190629
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.4486684799194336,
      "learning_rate": 4.3044857646073575e-05,
      "loss": 2.9379,
      "step": 190630
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5061020851135254,
      "learning_rate": 4.304274645482741e-05,
      "loss": 3.0133,
      "step": 190631
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.60086989402771,
      "learning_rate": 4.3040635311354544e-05,
      "loss": 3.3243,
      "step": 190632
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.347963809967041,
      "learning_rate": 4.303852421565522e-05,
      "loss": 2.9269,
      "step": 190633
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.308453321456909,
      "learning_rate": 4.303641316772992e-05,
      "loss": 2.9473,
      "step": 190634
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.557324171066284,
      "learning_rate": 4.3034302167579005e-05,
      "loss": 3.0833,
      "step": 190635
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.066204309463501,
      "learning_rate": 4.303219121520281e-05,
      "loss": 2.7878,
      "step": 190636
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.056223154067993,
      "learning_rate": 4.303008031060186e-05,
      "loss": 3.0404,
      "step": 190637
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.559927463531494,
      "learning_rate": 4.302796945377646e-05,
      "loss": 2.8854,
      "step": 190638
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7597343921661377,
      "learning_rate": 4.3025858644726943e-05,
      "loss": 2.9521,
      "step": 190639
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.195065021514893,
      "learning_rate": 4.302374788345381e-05,
      "loss": 2.9356,
      "step": 190640
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.6428589820861816,
      "learning_rate": 4.302163716995742e-05,
      "loss": 3.1118,
      "step": 190641
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.5135252475738525,
      "learning_rate": 4.301952650423812e-05,
      "loss": 3.063,
      "step": 190642
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.9533402919769287,
      "learning_rate": 4.30174158862964e-05,
      "loss": 2.8347,
      "step": 190643
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.72196364402771,
      "learning_rate": 4.30153053161326e-05,
      "loss": 3.1858,
      "step": 190644
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9440319538116455,
      "learning_rate": 4.3013194793747006e-05,
      "loss": 3.1567,
      "step": 190645
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.390748977661133,
      "learning_rate": 4.30110843191402e-05,
      "loss": 2.8713,
      "step": 190646
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.852084159851074,
      "learning_rate": 4.30089738923124e-05,
      "loss": 2.8121,
      "step": 190647
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9726760387420654,
      "learning_rate": 4.300686351326419e-05,
      "loss": 2.8716,
      "step": 190648
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.572371244430542,
      "learning_rate": 4.300475318199582e-05,
      "loss": 2.9322,
      "step": 190649
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3868861198425293,
      "learning_rate": 4.300264289850773e-05,
      "loss": 3.0709,
      "step": 190650
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.682525634765625,
      "learning_rate": 4.300053266280022e-05,
      "loss": 2.8885,
      "step": 190651
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.841275453567505,
      "learning_rate": 4.299842247487383e-05,
      "loss": 3.0102,
      "step": 190652
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8624398708343506,
      "learning_rate": 4.299631233472885e-05,
      "loss": 2.9817,
      "step": 190653
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.888875722885132,
      "learning_rate": 4.299420224236574e-05,
      "loss": 2.7341,
      "step": 190654
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.923424482345581,
      "learning_rate": 4.299209219778488e-05,
      "loss": 2.9346,
      "step": 190655
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.398850917816162,
      "learning_rate": 4.2989982200986625e-05,
      "loss": 2.9787,
      "step": 190656
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.159909963607788,
      "learning_rate": 4.298787225197132e-05,
      "loss": 3.02,
      "step": 190657
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.284816265106201,
      "learning_rate": 4.298576235073949e-05,
      "loss": 2.8424,
      "step": 190658
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.925905227661133,
      "learning_rate": 4.298365249729138e-05,
      "loss": 3.0148,
      "step": 190659
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.687324047088623,
      "learning_rate": 4.298154269162757e-05,
      "loss": 3.0108,
      "step": 190660
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9073445796966553,
      "learning_rate": 4.29794329337483e-05,
      "loss": 2.7276,
      "step": 190661
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2415361404418945,
      "learning_rate": 4.297732322365402e-05,
      "loss": 2.9224,
      "step": 190662
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3319318294525146,
      "learning_rate": 4.2975213561345035e-05,
      "loss": 3.0852,
      "step": 190663
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.049984455108643,
      "learning_rate": 4.297310394682186e-05,
      "loss": 2.7357,
      "step": 190664
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.046797513961792,
      "learning_rate": 4.29709943800848e-05,
      "loss": 2.7204,
      "step": 190665
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.4124808311462402,
      "learning_rate": 4.296888486113434e-05,
      "loss": 3.194,
      "step": 190666
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1804943084716797,
      "learning_rate": 4.296677538997083e-05,
      "loss": 2.9957,
      "step": 190667
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8789098262786865,
      "learning_rate": 4.2964665966594626e-05,
      "loss": 3.059,
      "step": 190668
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.872119426727295,
      "learning_rate": 4.296255659100609e-05,
      "loss": 2.9964,
      "step": 190669
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.056889772415161,
      "learning_rate": 4.2960447263205735e-05,
      "loss": 3.204,
      "step": 190670
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.4689958095550537,
      "learning_rate": 4.295833798319378e-05,
      "loss": 2.7161,
      "step": 190671
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.101766109466553,
      "learning_rate": 4.2956228750970836e-05,
      "loss": 2.7392,
      "step": 190672
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.940873622894287,
      "learning_rate": 4.295411956653716e-05,
      "loss": 2.8652,
      "step": 190673
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7924296855926514,
      "learning_rate": 4.2952010429893194e-05,
      "loss": 2.9915,
      "step": 190674
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0700082778930664,
      "learning_rate": 4.294990134103919e-05,
      "loss": 2.8426,
      "step": 190675
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6075081825256348,
      "learning_rate": 4.2947792299975736e-05,
      "loss": 3.1905,
      "step": 190676
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.4290473461151123,
      "learning_rate": 4.294568330670308e-05,
      "loss": 2.8585,
      "step": 190677
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.3168866634368896,
      "learning_rate": 4.2943574361221736e-05,
      "loss": 3.0326,
      "step": 190678
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2058322429656982,
      "learning_rate": 4.2941465463532024e-05,
      "loss": 2.9136,
      "step": 190679
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.636122226715088,
      "learning_rate": 4.293935661363438e-05,
      "loss": 2.8513,
      "step": 190680
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.589369773864746,
      "learning_rate": 4.293724781152904e-05,
      "loss": 3.1672,
      "step": 190681
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0618693828582764,
      "learning_rate": 4.293513905721664e-05,
      "loss": 2.9148,
      "step": 190682
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7363102436065674,
      "learning_rate": 4.293303035069734e-05,
      "loss": 2.8819,
      "step": 190683
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5253710746765137,
      "learning_rate": 4.2930921691971745e-05,
      "loss": 2.901,
      "step": 190684
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.681985855102539,
      "learning_rate": 4.292881308104011e-05,
      "loss": 2.9035,
      "step": 190685
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.6029934883117676,
      "learning_rate": 4.292670451790289e-05,
      "loss": 2.9926,
      "step": 190686
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5139994621276855,
      "learning_rate": 4.2924596002560365e-05,
      "loss": 3.0102,
      "step": 190687
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.075580358505249,
      "learning_rate": 4.292248753501311e-05,
      "loss": 3.1759,
      "step": 190688
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.3935444355010986,
      "learning_rate": 4.2920379115261315e-05,
      "loss": 2.8607,
      "step": 190689
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8256826400756836,
      "learning_rate": 4.2918270743305526e-05,
      "loss": 3.0022,
      "step": 190690
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.078124523162842,
      "learning_rate": 4.2916162419146106e-05,
      "loss": 2.998,
      "step": 190691
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.894672870635986,
      "learning_rate": 4.2914054142783447e-05,
      "loss": 2.7822,
      "step": 190692
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.8097784519195557,
      "learning_rate": 4.2911945914217827e-05,
      "loss": 3.0491,
      "step": 190693
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.483384609222412,
      "learning_rate": 4.29098377334498e-05,
      "loss": 2.6414,
      "step": 190694
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9938483238220215,
      "learning_rate": 4.2907729600479615e-05,
      "loss": 3.0001,
      "step": 190695
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.797119379043579,
      "learning_rate": 4.290562151530779e-05,
      "loss": 3.0857,
      "step": 190696
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.116023063659668,
      "learning_rate": 4.290351347793469e-05,
      "loss": 2.9892,
      "step": 190697
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.518549680709839,
      "learning_rate": 4.2901405488360663e-05,
      "loss": 3.0613,
      "step": 190698
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.299286842346191,
      "learning_rate": 4.289929754658607e-05,
      "loss": 2.8609,
      "step": 190699
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.053871154785156,
      "learning_rate": 4.28971896526114e-05,
      "loss": 2.9041,
      "step": 190700
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.63490891456604,
      "learning_rate": 4.2895081806436925e-05,
      "loss": 2.7642,
      "step": 190701
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5128560066223145,
      "learning_rate": 4.2892974008063184e-05,
      "loss": 2.8376,
      "step": 190702
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.073751211166382,
      "learning_rate": 4.2890866257490406e-05,
      "loss": 2.891,
      "step": 190703
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.853965997695923,
      "learning_rate": 4.288875855471923e-05,
      "loss": 2.8376,
      "step": 190704
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.745323181152344,
      "learning_rate": 4.2886650899749744e-05,
      "loss": 2.7721,
      "step": 190705
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.5800600051879883,
      "learning_rate": 4.288454329258255e-05,
      "loss": 2.8376,
      "step": 190706
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.980161428451538,
      "learning_rate": 4.288243573321793e-05,
      "loss": 2.8514,
      "step": 190707
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1911609172821045,
      "learning_rate": 4.288032822165636e-05,
      "loss": 2.8631,
      "step": 190708
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1639809608459473,
      "learning_rate": 4.287822075789812e-05,
      "loss": 2.8657,
      "step": 190709
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.066183567047119,
      "learning_rate": 4.287611334194374e-05,
      "loss": 2.895,
      "step": 190710
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.921998977661133,
      "learning_rate": 4.287400597379356e-05,
      "loss": 3.326,
      "step": 190711
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.827821731567383,
      "learning_rate": 4.287189865344793e-05,
      "loss": 3.0019,
      "step": 190712
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.885690212249756,
      "learning_rate": 4.28697913809072e-05,
      "loss": 3.0583,
      "step": 190713
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0134835243225098,
      "learning_rate": 4.2867684156171936e-05,
      "loss": 3.0206,
      "step": 190714
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.920316457748413,
      "learning_rate": 4.286557697924232e-05,
      "loss": 3.2157,
      "step": 190715
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.100856065750122,
      "learning_rate": 4.2863469850118935e-05,
      "loss": 2.8977,
      "step": 190716
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.2094531059265137,
      "learning_rate": 4.2861362768802074e-05,
      "loss": 2.7686,
      "step": 190717
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.632009506225586,
      "learning_rate": 4.285925573529208e-05,
      "loss": 2.7925,
      "step": 190718
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.692863941192627,
      "learning_rate": 4.285714874958946e-05,
      "loss": 2.742,
      "step": 190719
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1556670665740967,
      "learning_rate": 4.2855041811694544e-05,
      "loss": 2.8089,
      "step": 190720
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.62679386138916,
      "learning_rate": 4.2852934921607684e-05,
      "loss": 2.9807,
      "step": 190721
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.882291555404663,
      "learning_rate": 4.285082807932938e-05,
      "loss": 2.8143,
      "step": 190722
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2835752964019775,
      "learning_rate": 4.2848721284859965e-05,
      "loss": 2.8119,
      "step": 190723
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3379721641540527,
      "learning_rate": 4.284661453819974e-05,
      "loss": 2.7824,
      "step": 190724
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7679200172424316,
      "learning_rate": 4.284450783934927e-05,
      "loss": 3.0036,
      "step": 190725
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8632607460021973,
      "learning_rate": 4.284240118830886e-05,
      "loss": 3.0793,
      "step": 190726
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.2533416748046875,
      "learning_rate": 4.284029458507881e-05,
      "loss": 2.9608,
      "step": 190727
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8866591453552246,
      "learning_rate": 4.2838188029659705e-05,
      "loss": 2.9625,
      "step": 190728
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.684349536895752,
      "learning_rate": 4.283608152205179e-05,
      "loss": 3.1874,
      "step": 190729
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8345351219177246,
      "learning_rate": 4.2833975062255467e-05,
      "loss": 2.9317,
      "step": 190730
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.453313112258911,
      "learning_rate": 4.283186865027123e-05,
      "loss": 2.9015,
      "step": 190731
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.6636900901794434,
      "learning_rate": 4.282976228609935e-05,
      "loss": 3.0206,
      "step": 190732
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.438903570175171,
      "learning_rate": 4.282765596974031e-05,
      "loss": 2.9011,
      "step": 190733
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0901451110839844,
      "learning_rate": 4.282554970119447e-05,
      "loss": 3.0235,
      "step": 190734
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9275805950164795,
      "learning_rate": 4.282344348046222e-05,
      "loss": 2.768,
      "step": 190735
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.223052978515625,
      "learning_rate": 4.282133730754388e-05,
      "loss": 2.7172,
      "step": 190736
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7319538593292236,
      "learning_rate": 4.281923118243996e-05,
      "loss": 2.9789,
      "step": 190737
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.324342966079712,
      "learning_rate": 4.2817125105150726e-05,
      "loss": 2.8617,
      "step": 190738
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0726258754730225,
      "learning_rate": 4.281501907567675e-05,
      "loss": 2.896,
      "step": 190739
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.094114065170288,
      "learning_rate": 4.2812913094018286e-05,
      "loss": 2.6444,
      "step": 190740
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.140159845352173,
      "learning_rate": 4.281080716017577e-05,
      "loss": 2.8277,
      "step": 190741
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.744508743286133,
      "learning_rate": 4.2808701274149517e-05,
      "loss": 2.9147,
      "step": 190742
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6350274085998535,
      "learning_rate": 4.280659543594004e-05,
      "loss": 2.9595,
      "step": 190743
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.5184884071350098,
      "learning_rate": 4.280448964554758e-05,
      "loss": 3.2731,
      "step": 190744
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.2958333492279053,
      "learning_rate": 4.280238390297274e-05,
      "loss": 2.866,
      "step": 190745
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.790219783782959,
      "learning_rate": 4.2800278208215746e-05,
      "loss": 3.0266,
      "step": 190746
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7928872108459473,
      "learning_rate": 4.279817256127707e-05,
      "loss": 2.9318,
      "step": 190747
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0408332347869873,
      "learning_rate": 4.279606696215697e-05,
      "loss": 2.8793,
      "step": 190748
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.503901958465576,
      "learning_rate": 4.2793961410856025e-05,
      "loss": 2.8107,
      "step": 190749
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.66808819770813,
      "learning_rate": 4.279185590737446e-05,
      "loss": 3.0708,
      "step": 190750
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.809009075164795,
      "learning_rate": 4.278975045171281e-05,
      "loss": 3.0082,
      "step": 190751
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6098031997680664,
      "learning_rate": 4.278764504387141e-05,
      "loss": 3.2819,
      "step": 190752
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7258286476135254,
      "learning_rate": 4.2785539683850626e-05,
      "loss": 3.0197,
      "step": 190753
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9999277591705322,
      "learning_rate": 4.278343437165082e-05,
      "loss": 3.1207,
      "step": 190754
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.010227680206299,
      "learning_rate": 4.278132910727249e-05,
      "loss": 3.0629,
      "step": 190755
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7564656734466553,
      "learning_rate": 4.2779223890715874e-05,
      "loss": 3.1402,
      "step": 190756
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.4014089107513428,
      "learning_rate": 4.277711872198154e-05,
      "loss": 3.2127,
      "step": 190757
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8351895809173584,
      "learning_rate": 4.277501360106982e-05,
      "loss": 2.7968,
      "step": 190758
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3910555839538574,
      "learning_rate": 4.277290852798104e-05,
      "loss": 2.981,
      "step": 190759
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.4343864917755127,
      "learning_rate": 4.2770803502715615e-05,
      "loss": 2.964,
      "step": 190760
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6469383239746094,
      "learning_rate": 4.276869852527396e-05,
      "loss": 2.9592,
      "step": 190761
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8665435314178467,
      "learning_rate": 4.276659359565642e-05,
      "loss": 2.7559,
      "step": 190762
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.685014009475708,
      "learning_rate": 4.276448871386352e-05,
      "loss": 2.8879,
      "step": 190763
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.961827516555786,
      "learning_rate": 4.276238387989553e-05,
      "loss": 3.046,
      "step": 190764
    },
    {
      "epoch": 2.48,
      "grad_norm": 5.118174076080322,
      "learning_rate": 4.276027909375286e-05,
      "loss": 2.77,
      "step": 190765
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.680035352706909,
      "learning_rate": 4.275817435543586e-05,
      "loss": 2.8439,
      "step": 190766
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.672227382659912,
      "learning_rate": 4.275606966494504e-05,
      "loss": 2.7676,
      "step": 190767
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7394227981567383,
      "learning_rate": 4.275396502228062e-05,
      "loss": 3.0105,
      "step": 190768
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8460960388183594,
      "learning_rate": 4.2751860427443187e-05,
      "loss": 3.084,
      "step": 190769
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.784818172454834,
      "learning_rate": 4.2749755880432987e-05,
      "loss": 2.9667,
      "step": 190770
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.354708671569824,
      "learning_rate": 4.274765138125057e-05,
      "loss": 2.888,
      "step": 190771
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.708923578262329,
      "learning_rate": 4.2745546929896126e-05,
      "loss": 3.0151,
      "step": 190772
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.766986846923828,
      "learning_rate": 4.2743442526370156e-05,
      "loss": 3.1076,
      "step": 190773
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.801670551300049,
      "learning_rate": 4.274133817067299e-05,
      "loss": 2.9541,
      "step": 190774
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6320788860321045,
      "learning_rate": 4.2739233862805164e-05,
      "loss": 2.9123,
      "step": 190775
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9708030223846436,
      "learning_rate": 4.2737129602766854e-05,
      "loss": 2.8971,
      "step": 190776
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2343249320983887,
      "learning_rate": 4.273502539055874e-05,
      "loss": 3.1268,
      "step": 190777
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.7144925594329834,
      "learning_rate": 4.273292122618091e-05,
      "loss": 2.8726,
      "step": 190778
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.43520450592041,
      "learning_rate": 4.273081710963391e-05,
      "loss": 3.0235,
      "step": 190779
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9405415058135986,
      "learning_rate": 4.272871304091809e-05,
      "loss": 2.9328,
      "step": 190780
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.6259875297546387,
      "learning_rate": 4.2726609020033864e-05,
      "loss": 2.8568,
      "step": 190781
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.39201283454895,
      "learning_rate": 4.272450504698162e-05,
      "loss": 2.9107,
      "step": 190782
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.8395540714263916,
      "learning_rate": 4.272240112176184e-05,
      "loss": 2.9735,
      "step": 190783
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5477378368377686,
      "learning_rate": 4.27202972443747e-05,
      "loss": 2.9781,
      "step": 190784
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.75836443901062,
      "learning_rate": 4.271819341482077e-05,
      "loss": 3.0022,
      "step": 190785
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2476742267608643,
      "learning_rate": 4.2716089633100335e-05,
      "loss": 2.8413,
      "step": 190786
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7930140495300293,
      "learning_rate": 4.2713985899213874e-05,
      "loss": 3.1021,
      "step": 190787
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.776627540588379,
      "learning_rate": 4.2711882213161687e-05,
      "loss": 2.9537,
      "step": 190788
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5672762393951416,
      "learning_rate": 4.2709778574944364e-05,
      "loss": 2.6938,
      "step": 190789
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.966064453125,
      "learning_rate": 4.2707674984562014e-05,
      "loss": 3.055,
      "step": 190790
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2161812782287598,
      "learning_rate": 4.2705571442015227e-05,
      "loss": 3.1112,
      "step": 190791
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5613315105438232,
      "learning_rate": 4.2703467947304245e-05,
      "loss": 2.7236,
      "step": 190792
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8114349842071533,
      "learning_rate": 4.270136450042963e-05,
      "loss": 2.9723,
      "step": 190793
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.99031662940979,
      "learning_rate": 4.2699261101391626e-05,
      "loss": 2.9591,
      "step": 190794
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1075007915496826,
      "learning_rate": 4.2697157750190746e-05,
      "loss": 3.0335,
      "step": 190795
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.6295924186706543,
      "learning_rate": 4.269505444682734e-05,
      "loss": 2.7759,
      "step": 190796
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9421937465667725,
      "learning_rate": 4.269295119130176e-05,
      "loss": 2.7953,
      "step": 190797
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.576907157897949,
      "learning_rate": 4.269084798361435e-05,
      "loss": 2.8455,
      "step": 190798
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7412567138671875,
      "learning_rate": 4.268874482376564e-05,
      "loss": 2.7744,
      "step": 190799
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.29228138923645,
      "learning_rate": 4.268664171175587e-05,
      "loss": 2.7677,
      "step": 190800
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7463035583496094,
      "learning_rate": 4.268453864758559e-05,
      "loss": 3.0177,
      "step": 190801
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7923672199249268,
      "learning_rate": 4.2682435631255104e-05,
      "loss": 2.8383,
      "step": 190802
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.7084386348724365,
      "learning_rate": 4.2680332662764825e-05,
      "loss": 3.2053,
      "step": 190803
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2111306190490723,
      "learning_rate": 4.267822974211504e-05,
      "loss": 2.9473,
      "step": 190804
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.766721725463867,
      "learning_rate": 4.267612686930632e-05,
      "loss": 2.9433,
      "step": 190805
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.8911991119384766,
      "learning_rate": 4.267402404433887e-05,
      "loss": 3.0207,
      "step": 190806
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.221100330352783,
      "learning_rate": 4.2671921267213285e-05,
      "loss": 2.7705,
      "step": 190807
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.3736979961395264,
      "learning_rate": 4.266981853792982e-05,
      "loss": 2.9934,
      "step": 190808
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.9044740200042725,
      "learning_rate": 4.266771585648883e-05,
      "loss": 3.0939,
      "step": 190809
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7441062927246094,
      "learning_rate": 4.266561322289083e-05,
      "loss": 2.8277,
      "step": 190810
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.1938564777374268,
      "learning_rate": 4.266351063713615e-05,
      "loss": 3.0048,
      "step": 190811
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.776491165161133,
      "learning_rate": 4.2661408099225146e-05,
      "loss": 3.0959,
      "step": 190812
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7517971992492676,
      "learning_rate": 4.2659305609158266e-05,
      "loss": 3.0387,
      "step": 190813
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6878321170806885,
      "learning_rate": 4.265720316693591e-05,
      "loss": 3.1845,
      "step": 190814
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.549353837966919,
      "learning_rate": 4.265510077255836e-05,
      "loss": 3.0384,
      "step": 190815
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5376486778259277,
      "learning_rate": 4.2652998426026165e-05,
      "loss": 2.8007,
      "step": 190816
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7657134532928467,
      "learning_rate": 4.265089612733953e-05,
      "loss": 2.9428,
      "step": 190817
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.444918632507324,
      "learning_rate": 4.264879387649905e-05,
      "loss": 2.9347,
      "step": 190818
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.5328569412231445,
      "learning_rate": 4.264669167350501e-05,
      "loss": 3.0016,
      "step": 190819
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.2524116039276123,
      "learning_rate": 4.2644589518357816e-05,
      "loss": 3.1103,
      "step": 190820
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.12673020362854,
      "learning_rate": 4.264248741105776e-05,
      "loss": 3.0078,
      "step": 190821
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6959471702575684,
      "learning_rate": 4.264038535160542e-05,
      "loss": 2.7705,
      "step": 190822
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.864257335662842,
      "learning_rate": 4.263828334000101e-05,
      "loss": 2.9625,
      "step": 190823
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7611083984375,
      "learning_rate": 4.263618137624506e-05,
      "loss": 2.9333,
      "step": 190824
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5631027221679688,
      "learning_rate": 4.26340794603379e-05,
      "loss": 3.0485,
      "step": 190825
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.149752616882324,
      "learning_rate": 4.263197759227993e-05,
      "loss": 3.1329,
      "step": 190826
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.847748279571533,
      "learning_rate": 4.262987577207149e-05,
      "loss": 2.7985,
      "step": 190827
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.610488176345825,
      "learning_rate": 4.262777399971303e-05,
      "loss": 3.0706,
      "step": 190828
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3561275005340576,
      "learning_rate": 4.2625672275204905e-05,
      "loss": 2.9007,
      "step": 190829
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6484458446502686,
      "learning_rate": 4.2623570598547566e-05,
      "loss": 2.9942,
      "step": 190830
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7382731437683105,
      "learning_rate": 4.262146896974138e-05,
      "loss": 2.8891,
      "step": 190831
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.7653133869171143,
      "learning_rate": 4.261936738878672e-05,
      "loss": 3.1906,
      "step": 190832
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.0814437866210938,
      "learning_rate": 4.261726585568391e-05,
      "loss": 2.9651,
      "step": 190833
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.3743627071380615,
      "learning_rate": 4.261516437043346e-05,
      "loss": 2.9704,
      "step": 190834
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.561464786529541,
      "learning_rate": 4.261306293303567e-05,
      "loss": 2.9062,
      "step": 190835
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.4039196968078613,
      "learning_rate": 4.261096154349103e-05,
      "loss": 2.9085,
      "step": 190836
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.096951723098755,
      "learning_rate": 4.2608860201799774e-05,
      "loss": 2.9754,
      "step": 190837
    },
    {
      "epoch": 2.48,
      "grad_norm": 4.306988716125488,
      "learning_rate": 4.2606758907962546e-05,
      "loss": 2.8099,
      "step": 190838
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5734798908233643,
      "learning_rate": 4.2604657661979435e-05,
      "loss": 3.0143,
      "step": 190839
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5599052906036377,
      "learning_rate": 4.2602556463851075e-05,
      "loss": 2.9657,
      "step": 190840
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.028775691986084,
      "learning_rate": 4.260045531357767e-05,
      "loss": 2.6015,
      "step": 190841
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.5667059421539307,
      "learning_rate": 4.259835421115976e-05,
      "loss": 2.9169,
      "step": 190842
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.4699604511260986,
      "learning_rate": 4.2596253156597624e-05,
      "loss": 3.02,
      "step": 190843
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.6295745372772217,
      "learning_rate": 4.259415214989181e-05,
      "loss": 2.986,
      "step": 190844
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.659341812133789,
      "learning_rate": 4.259205119104249e-05,
      "loss": 2.8711,
      "step": 190845
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.4194087982177734,
      "learning_rate": 4.258995028005024e-05,
      "loss": 2.8429,
      "step": 190846
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.8330674171447754,
      "learning_rate": 4.258784941691529e-05,
      "loss": 2.9927,
      "step": 190847
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.251358985900879,
      "learning_rate": 4.2585748601638184e-05,
      "loss": 3.0585,
      "step": 190848
    },
    {
      "epoch": 2.48,
      "grad_norm": 2.866882562637329,
      "learning_rate": 4.2583647834219194e-05,
      "loss": 2.9774,
      "step": 190849
    },
    {
      "epoch": 2.48,
      "grad_norm": 3.430798053741455,
      "learning_rate": 4.258154711465889e-05,
      "loss": 2.7327,
      "step": 190850
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0650789737701416,
      "learning_rate": 4.25794464429574e-05,
      "loss": 2.8291,
      "step": 190851
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1110782623291016,
      "learning_rate": 4.257734581911533e-05,
      "loss": 3.0589,
      "step": 190852
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.192501544952393,
      "learning_rate": 4.257524524313291e-05,
      "loss": 2.8931,
      "step": 190853
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.404334783554077,
      "learning_rate": 4.25731447150107e-05,
      "loss": 3.0104,
      "step": 190854
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.960630178451538,
      "learning_rate": 4.257104423474891e-05,
      "loss": 2.9381,
      "step": 190855
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6416847705841064,
      "learning_rate": 4.256894380234817e-05,
      "loss": 2.7684,
      "step": 190856
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.1717987060546875,
      "learning_rate": 4.2566843417808574e-05,
      "loss": 2.8371,
      "step": 190857
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7093775272369385,
      "learning_rate": 4.256474308113073e-05,
      "loss": 2.9146,
      "step": 190858
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.612912654876709,
      "learning_rate": 4.2562642792314896e-05,
      "loss": 3.0594,
      "step": 190859
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.841008186340332,
      "learning_rate": 4.256054255136161e-05,
      "loss": 2.9914,
      "step": 190860
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.045567274093628,
      "learning_rate": 4.255844235827111e-05,
      "loss": 3.0519,
      "step": 190861
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5647056102752686,
      "learning_rate": 4.255634221304398e-05,
      "loss": 3.0492,
      "step": 190862
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.162755489349365,
      "learning_rate": 4.2554242115680335e-05,
      "loss": 3.1322,
      "step": 190863
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6333940029144287,
      "learning_rate": 4.255214206618081e-05,
      "loss": 3.1782,
      "step": 190864
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3180456161499023,
      "learning_rate": 4.255004206454562e-05,
      "loss": 3.0022,
      "step": 190865
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.966393232345581,
      "learning_rate": 4.254794211077528e-05,
      "loss": 3.1331,
      "step": 190866
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.151604652404785,
      "learning_rate": 4.254584220487012e-05,
      "loss": 2.927,
      "step": 190867
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5087482929229736,
      "learning_rate": 4.2543742346830636e-05,
      "loss": 3.0044,
      "step": 190868
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1957411766052246,
      "learning_rate": 4.254164253665703e-05,
      "loss": 2.8834,
      "step": 190869
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.3618314266204834,
      "learning_rate": 4.253954277434983e-05,
      "loss": 2.7627,
      "step": 190870
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.62630295753479,
      "learning_rate": 4.253744305990935e-05,
      "loss": 2.6473,
      "step": 190871
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9354844093322754,
      "learning_rate": 4.2535343393336077e-05,
      "loss": 3.0559,
      "step": 190872
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.5784692764282227,
      "learning_rate": 4.253324377463028e-05,
      "loss": 3.1383,
      "step": 190873
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.142746925354004,
      "learning_rate": 4.253114420379255e-05,
      "loss": 2.7803,
      "step": 190874
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.527845859527588,
      "learning_rate": 4.252904468082301e-05,
      "loss": 3.0457,
      "step": 190875
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.913255453109741,
      "learning_rate": 4.252694520572221e-05,
      "loss": 2.95,
      "step": 190876
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.619312047958374,
      "learning_rate": 4.252484577849048e-05,
      "loss": 3.0213,
      "step": 190877
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.831570863723755,
      "learning_rate": 4.252274639912829e-05,
      "loss": 2.7368,
      "step": 190878
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.46235728263855,
      "learning_rate": 4.252064706763595e-05,
      "loss": 3.203,
      "step": 190879
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.033447027206421,
      "learning_rate": 4.251854778401391e-05,
      "loss": 2.9192,
      "step": 190880
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9973912239074707,
      "learning_rate": 4.251644854826255e-05,
      "loss": 2.8061,
      "step": 190881
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5089337825775146,
      "learning_rate": 4.251434936038223e-05,
      "loss": 2.8657,
      "step": 190882
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.4917006492614746,
      "learning_rate": 4.251225022037328e-05,
      "loss": 2.9166,
      "step": 190883
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1675353050231934,
      "learning_rate": 4.251015112823627e-05,
      "loss": 3.3624,
      "step": 190884
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9907212257385254,
      "learning_rate": 4.2508052083971366e-05,
      "loss": 2.819,
      "step": 190885
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.360898971557617,
      "learning_rate": 4.250595308757917e-05,
      "loss": 3.1285,
      "step": 190886
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6651771068573,
      "learning_rate": 4.250385413905998e-05,
      "loss": 2.8403,
      "step": 190887
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.807286500930786,
      "learning_rate": 4.250175523841419e-05,
      "loss": 2.9934,
      "step": 190888
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.004560470581055,
      "learning_rate": 4.249965638564211e-05,
      "loss": 2.9285,
      "step": 190889
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3498852252960205,
      "learning_rate": 4.249755758074427e-05,
      "loss": 2.9718,
      "step": 190890
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.3536550998687744,
      "learning_rate": 4.24954588237209e-05,
      "loss": 2.9547,
      "step": 190891
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.005331993103027,
      "learning_rate": 4.2493360114572596e-05,
      "loss": 3.0968,
      "step": 190892
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.132164716720581,
      "learning_rate": 4.249126145329963e-05,
      "loss": 2.883,
      "step": 190893
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.006356716156006,
      "learning_rate": 4.24891628399023e-05,
      "loss": 2.6874,
      "step": 190894
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0546469688415527,
      "learning_rate": 4.2487064274381175e-05,
      "loss": 2.8176,
      "step": 190895
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8864619731903076,
      "learning_rate": 4.248496575673659e-05,
      "loss": 2.986,
      "step": 190896
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.772043228149414,
      "learning_rate": 4.2482867286968836e-05,
      "loss": 2.8565,
      "step": 190897
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8610575199127197,
      "learning_rate": 4.2480768865078416e-05,
      "loss": 2.8031,
      "step": 190898
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6976637840270996,
      "learning_rate": 4.24786704910657e-05,
      "loss": 2.7643,
      "step": 190899
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.986688613891602,
      "learning_rate": 4.247657216493102e-05,
      "loss": 2.9289,
      "step": 190900
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.837855577468872,
      "learning_rate": 4.247447388667484e-05,
      "loss": 2.8927,
      "step": 190901
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.250399589538574,
      "learning_rate": 4.247237565629747e-05,
      "loss": 3.0464,
      "step": 190902
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1731622219085693,
      "learning_rate": 4.247027747379939e-05,
      "loss": 2.9098,
      "step": 190903
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.767720937728882,
      "learning_rate": 4.246817933918097e-05,
      "loss": 2.7129,
      "step": 190904
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9167933464050293,
      "learning_rate": 4.246608125244256e-05,
      "loss": 2.7739,
      "step": 190905
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9773452281951904,
      "learning_rate": 4.246398321358452e-05,
      "loss": 2.9473,
      "step": 190906
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.96441388130188,
      "learning_rate": 4.246188522260734e-05,
      "loss": 3.0164,
      "step": 190907
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.253631591796875,
      "learning_rate": 4.245978727951126e-05,
      "loss": 2.9884,
      "step": 190908
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6528897285461426,
      "learning_rate": 4.245768938429688e-05,
      "loss": 2.8816,
      "step": 190909
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.773022174835205,
      "learning_rate": 4.2455591536964464e-05,
      "loss": 2.9431,
      "step": 190910
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.599414825439453,
      "learning_rate": 4.245349373751441e-05,
      "loss": 2.9618,
      "step": 190911
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.447576522827148,
      "learning_rate": 4.245139598594705e-05,
      "loss": 2.9727,
      "step": 190912
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.5789895057678223,
      "learning_rate": 4.24492982822629e-05,
      "loss": 2.7833,
      "step": 190913
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.855689525604248,
      "learning_rate": 4.244720062646221e-05,
      "loss": 2.7465,
      "step": 190914
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7631633281707764,
      "learning_rate": 4.244510301854555e-05,
      "loss": 2.9512,
      "step": 190915
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0262022018432617,
      "learning_rate": 4.244300545851311e-05,
      "loss": 3.1395,
      "step": 190916
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.067338705062866,
      "learning_rate": 4.244090794636551e-05,
      "loss": 2.9718,
      "step": 190917
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2375683784484863,
      "learning_rate": 4.243881048210287e-05,
      "loss": 2.9402,
      "step": 190918
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.4068856239318848,
      "learning_rate": 4.2436713065725795e-05,
      "loss": 2.8234,
      "step": 190919
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.8695828914642334,
      "learning_rate": 4.243461569723454e-05,
      "loss": 2.8061,
      "step": 190920
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.4848148822784424,
      "learning_rate": 4.243251837662962e-05,
      "loss": 2.8864,
      "step": 190921
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.593794345855713,
      "learning_rate": 4.243042110391126e-05,
      "loss": 3.024,
      "step": 190922
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0374746322631836,
      "learning_rate": 4.24283238790801e-05,
      "loss": 3.0104,
      "step": 190923
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.700272560119629,
      "learning_rate": 4.2426226702136254e-05,
      "loss": 3.1451,
      "step": 190924
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.85375714302063,
      "learning_rate": 4.242412957308028e-05,
      "loss": 2.8982,
      "step": 190925
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2826156616210938,
      "learning_rate": 4.242203249191246e-05,
      "loss": 2.9775,
      "step": 190926
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.7443387508392334,
      "learning_rate": 4.2419935458633334e-05,
      "loss": 2.7701,
      "step": 190927
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.6002702713012695,
      "learning_rate": 4.2417838473243104e-05,
      "loss": 2.9574,
      "step": 190928
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.720794439315796,
      "learning_rate": 4.241574153574243e-05,
      "loss": 2.8691,
      "step": 190929
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.240943193435669,
      "learning_rate": 4.2413644646131386e-05,
      "loss": 2.9676,
      "step": 190930
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5773282051086426,
      "learning_rate": 4.241154780441056e-05,
      "loss": 3.0894,
      "step": 190931
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.149136781692505,
      "learning_rate": 4.240945101058023e-05,
      "loss": 2.6967,
      "step": 190932
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.7271196842193604,
      "learning_rate": 4.240735426464089e-05,
      "loss": 2.8973,
      "step": 190933
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1563639640808105,
      "learning_rate": 4.240525756659284e-05,
      "loss": 2.9065,
      "step": 190934
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7560620307922363,
      "learning_rate": 4.240316091643665e-05,
      "loss": 2.8297,
      "step": 190935
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.85614013671875,
      "learning_rate": 4.240106431417245e-05,
      "loss": 2.7432,
      "step": 190936
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.069852352142334,
      "learning_rate": 4.239896775980081e-05,
      "loss": 2.6678,
      "step": 190937
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3452677726745605,
      "learning_rate": 4.2396871253322016e-05,
      "loss": 2.9156,
      "step": 190938
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.849400520324707,
      "learning_rate": 4.2394774794736555e-05,
      "loss": 2.9985,
      "step": 190939
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.950716733932495,
      "learning_rate": 4.239267838404471e-05,
      "loss": 2.8461,
      "step": 190940
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.828049659729004,
      "learning_rate": 4.2390582021247053e-05,
      "loss": 2.8878,
      "step": 190941
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.955525875091553,
      "learning_rate": 4.2388485706343724e-05,
      "loss": 2.7577,
      "step": 190942
    },
    {
      "epoch": 2.49,
      "grad_norm": 5.450239181518555,
      "learning_rate": 4.2386389439335276e-05,
      "loss": 2.6938,
      "step": 190943
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.76244592666626,
      "learning_rate": 4.238429322022202e-05,
      "loss": 3.013,
      "step": 190944
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6518726348876953,
      "learning_rate": 4.2382197049004455e-05,
      "loss": 3.1395,
      "step": 190945
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.33062481880188,
      "learning_rate": 4.2380100925682834e-05,
      "loss": 3.0321,
      "step": 190946
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.716289520263672,
      "learning_rate": 4.237800485025774e-05,
      "loss": 3.0995,
      "step": 190947
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.1041717529296875,
      "learning_rate": 4.237590882272933e-05,
      "loss": 3.0009,
      "step": 190948
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1503498554229736,
      "learning_rate": 4.2373812843098144e-05,
      "loss": 3.0621,
      "step": 190949
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5801377296447754,
      "learning_rate": 4.237171691136447e-05,
      "loss": 3.0316,
      "step": 190950
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.600399971008301,
      "learning_rate": 4.236962102752882e-05,
      "loss": 2.8592,
      "step": 190951
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.124241352081299,
      "learning_rate": 4.236752519159146e-05,
      "loss": 3.1548,
      "step": 190952
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5147221088409424,
      "learning_rate": 4.2365429403552976e-05,
      "loss": 2.7382,
      "step": 190953
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.104773759841919,
      "learning_rate": 4.236333366341348e-05,
      "loss": 2.7948,
      "step": 190954
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0012269020080566,
      "learning_rate": 4.2361237971173566e-05,
      "loss": 3.0089,
      "step": 190955
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.711703300476074,
      "learning_rate": 4.2359142326833504e-05,
      "loss": 2.9685,
      "step": 190956
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2865281105041504,
      "learning_rate": 4.2357046730393793e-05,
      "loss": 2.8214,
      "step": 190957
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.105888366699219,
      "learning_rate": 4.235495118185474e-05,
      "loss": 2.9914,
      "step": 190958
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.4061813354492188,
      "learning_rate": 4.235285568121686e-05,
      "loss": 3.227,
      "step": 190959
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7638800144195557,
      "learning_rate": 4.235076022848033e-05,
      "loss": 2.9226,
      "step": 190960
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.716456890106201,
      "learning_rate": 4.2348664823645715e-05,
      "loss": 2.9185,
      "step": 190961
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.100416660308838,
      "learning_rate": 4.2346569466713284e-05,
      "loss": 3.0555,
      "step": 190962
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5420193672180176,
      "learning_rate": 4.2344474157683575e-05,
      "loss": 2.9695,
      "step": 190963
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.703784227371216,
      "learning_rate": 4.2342378896556805e-05,
      "loss": 2.997,
      "step": 190964
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1725118160247803,
      "learning_rate": 4.234028368333359e-05,
      "loss": 3.116,
      "step": 190965
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.821157217025757,
      "learning_rate": 4.233818851801405e-05,
      "loss": 2.8644,
      "step": 190966
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5236546993255615,
      "learning_rate": 4.233609340059876e-05,
      "loss": 3.057,
      "step": 190967
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.780529737472534,
      "learning_rate": 4.233399833108798e-05,
      "loss": 3.06,
      "step": 190968
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.462589979171753,
      "learning_rate": 4.233190330948225e-05,
      "loss": 2.7254,
      "step": 190969
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0131208896636963,
      "learning_rate": 4.2329808335781835e-05,
      "loss": 3.0095,
      "step": 190970
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1639277935028076,
      "learning_rate": 4.232771340998723e-05,
      "loss": 2.822,
      "step": 190971
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1313836574554443,
      "learning_rate": 4.2325618532098734e-05,
      "loss": 2.7062,
      "step": 190972
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.6570799350738525,
      "learning_rate": 4.232352370211682e-05,
      "loss": 2.8491,
      "step": 190973
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.380117177963257,
      "learning_rate": 4.232142892004172e-05,
      "loss": 2.8254,
      "step": 190974
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.625765085220337,
      "learning_rate": 4.2319334185874034e-05,
      "loss": 2.8478,
      "step": 190975
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0918285846710205,
      "learning_rate": 4.2317239499613956e-05,
      "loss": 3.0363,
      "step": 190976
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5393073558807373,
      "learning_rate": 4.231514486126202e-05,
      "loss": 2.8892,
      "step": 190977
    },
    {
      "epoch": 2.49,
      "grad_norm": 6.197739601135254,
      "learning_rate": 4.2313050270818595e-05,
      "loss": 3.1232,
      "step": 190978
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.091243267059326,
      "learning_rate": 4.231095572828395e-05,
      "loss": 2.8383,
      "step": 190979
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.061614513397217,
      "learning_rate": 4.2308861233658644e-05,
      "loss": 2.7879,
      "step": 190980
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.085454940795898,
      "learning_rate": 4.230676678694298e-05,
      "loss": 2.8901,
      "step": 190981
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.968829870223999,
      "learning_rate": 4.230467238813726e-05,
      "loss": 2.8007,
      "step": 190982
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.91719388961792,
      "learning_rate": 4.2302578037242084e-05,
      "loss": 2.9459,
      "step": 190983
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7473902702331543,
      "learning_rate": 4.2300483734257686e-05,
      "loss": 3.0193,
      "step": 190984
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.6026511192321777,
      "learning_rate": 4.2298389479184426e-05,
      "loss": 2.9656,
      "step": 190985
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0404512882232666,
      "learning_rate": 4.229629527202284e-05,
      "loss": 2.9418,
      "step": 190986
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.7294390201568604,
      "learning_rate": 4.229420111277323e-05,
      "loss": 2.8398,
      "step": 190987
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.815016984939575,
      "learning_rate": 4.2292107001435924e-05,
      "loss": 3.1895,
      "step": 190988
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.3668251037597656,
      "learning_rate": 4.2290012938011465e-05,
      "loss": 2.7975,
      "step": 190989
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.905270576477051,
      "learning_rate": 4.228791892250014e-05,
      "loss": 2.6046,
      "step": 190990
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6749885082244873,
      "learning_rate": 4.2285824954902295e-05,
      "loss": 2.7568,
      "step": 190991
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.243302345275879,
      "learning_rate": 4.228373103521845e-05,
      "loss": 3.0483,
      "step": 190992
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.709822177886963,
      "learning_rate": 4.2281637163448844e-05,
      "loss": 2.9622,
      "step": 190993
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.75777530670166,
      "learning_rate": 4.227954333959401e-05,
      "loss": 2.9338,
      "step": 190994
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.886368989944458,
      "learning_rate": 4.2277449563654284e-05,
      "loss": 2.975,
      "step": 190995
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.053096055984497,
      "learning_rate": 4.227535583563006e-05,
      "loss": 2.8452,
      "step": 190996
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8511545658111572,
      "learning_rate": 4.227326215552165e-05,
      "loss": 3.0705,
      "step": 190997
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.912092924118042,
      "learning_rate": 4.227116852332954e-05,
      "loss": 2.8121,
      "step": 190998
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0996720790863037,
      "learning_rate": 4.2269074939054024e-05,
      "loss": 3.0219,
      "step": 190999
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.646770477294922,
      "learning_rate": 4.226698140269562e-05,
      "loss": 2.9784,
      "step": 191000
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.641483783721924,
      "learning_rate": 4.226488791425459e-05,
      "loss": 2.8061,
      "step": 191001
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.021385669708252,
      "learning_rate": 4.226279447373152e-05,
      "loss": 2.8849,
      "step": 191002
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0050272941589355,
      "learning_rate": 4.226070108112652e-05,
      "loss": 3.2017,
      "step": 191003
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.95723819732666,
      "learning_rate": 4.22586077364402e-05,
      "loss": 2.9567,
      "step": 191004
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.979037046432495,
      "learning_rate": 4.22565144396728e-05,
      "loss": 2.8895,
      "step": 191005
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.7773048877716064,
      "learning_rate": 4.225442119082482e-05,
      "loss": 2.851,
      "step": 191006
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1265904903411865,
      "learning_rate": 4.225232798989656e-05,
      "loss": 3.01,
      "step": 191007
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.279299259185791,
      "learning_rate": 4.225023483688861e-05,
      "loss": 3.0312,
      "step": 191008
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.255361795425415,
      "learning_rate": 4.224814173180105e-05,
      "loss": 2.8459,
      "step": 191009
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3603570461273193,
      "learning_rate": 4.22460486746345e-05,
      "loss": 3.0214,
      "step": 191010
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.373247146606445,
      "learning_rate": 4.224395566538922e-05,
      "loss": 2.9405,
      "step": 191011
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7839276790618896,
      "learning_rate": 4.22418627040657e-05,
      "loss": 2.9879,
      "step": 191012
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.5668070316314697,
      "learning_rate": 4.2239769790664216e-05,
      "loss": 2.9663,
      "step": 191013
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.634045124053955,
      "learning_rate": 4.223767692518536e-05,
      "loss": 3.0164,
      "step": 191014
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.180757522583008,
      "learning_rate": 4.2235584107629274e-05,
      "loss": 2.9065,
      "step": 191015
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.865072011947632,
      "learning_rate": 4.2233491337996514e-05,
      "loss": 2.88,
      "step": 191016
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.8555283546447754,
      "learning_rate": 4.2231398616287327e-05,
      "loss": 2.9448,
      "step": 191017
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.640965461730957,
      "learning_rate": 4.222930594250227e-05,
      "loss": 3.0057,
      "step": 191018
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.054281234741211,
      "learning_rate": 4.222721331664161e-05,
      "loss": 2.744,
      "step": 191019
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6132090091705322,
      "learning_rate": 4.2225120738705875e-05,
      "loss": 3.0987,
      "step": 191020
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.38824462890625,
      "learning_rate": 4.222302820869524e-05,
      "loss": 2.9021,
      "step": 191021
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.655799388885498,
      "learning_rate": 4.222093572661027e-05,
      "loss": 2.9466,
      "step": 191022
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.930237293243408,
      "learning_rate": 4.22188432924512e-05,
      "loss": 3.267,
      "step": 191023
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.268069744110107,
      "learning_rate": 4.2216750906218624e-05,
      "loss": 2.9905,
      "step": 191024
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.8838109970092773,
      "learning_rate": 4.221465856791274e-05,
      "loss": 3.0241,
      "step": 191025
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.971332311630249,
      "learning_rate": 4.221256627753416e-05,
      "loss": 2.979,
      "step": 191026
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.83416485786438,
      "learning_rate": 4.221047403508297e-05,
      "loss": 2.8213,
      "step": 191027
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0599915981292725,
      "learning_rate": 4.22083818405598e-05,
      "loss": 2.8589,
      "step": 191028
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.649625301361084,
      "learning_rate": 4.220628969396487e-05,
      "loss": 2.4835,
      "step": 191029
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2142703533172607,
      "learning_rate": 4.2204197595298764e-05,
      "loss": 2.9603,
      "step": 191030
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1082777976989746,
      "learning_rate": 4.2202105544561684e-05,
      "loss": 3.0414,
      "step": 191031
    },
    {
      "epoch": 2.49,
      "grad_norm": 5.071459770202637,
      "learning_rate": 4.2200013541754194e-05,
      "loss": 2.8008,
      "step": 191032
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.79375696182251,
      "learning_rate": 4.2197921586876504e-05,
      "loss": 2.7641,
      "step": 191033
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.814134120941162,
      "learning_rate": 4.219582967992914e-05,
      "loss": 3.0055,
      "step": 191034
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.81685733795166,
      "learning_rate": 4.2193737820912366e-05,
      "loss": 2.8993,
      "step": 191035
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.777329206466675,
      "learning_rate": 4.219164600982672e-05,
      "loss": 2.9938,
      "step": 191036
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5208377838134766,
      "learning_rate": 4.218955424667243e-05,
      "loss": 2.8748,
      "step": 191037
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.167588233947754,
      "learning_rate": 4.218746253145003e-05,
      "loss": 2.792,
      "step": 191038
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.32934832572937,
      "learning_rate": 4.218537086415986e-05,
      "loss": 2.7848,
      "step": 191039
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.190239429473877,
      "learning_rate": 4.218327924480228e-05,
      "loss": 3.0261,
      "step": 191040
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9814505577087402,
      "learning_rate": 4.218118767337766e-05,
      "loss": 3.1669,
      "step": 191041
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9224371910095215,
      "learning_rate": 4.217909614988646e-05,
      "loss": 3.1231,
      "step": 191042
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.666323184967041,
      "learning_rate": 4.2177004674328984e-05,
      "loss": 2.6276,
      "step": 191043
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5029828548431396,
      "learning_rate": 4.2174913246705736e-05,
      "loss": 2.6642,
      "step": 191044
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1489038467407227,
      "learning_rate": 4.217282186701704e-05,
      "loss": 3.1808,
      "step": 191045
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.4893507957458496,
      "learning_rate": 4.217073053526326e-05,
      "loss": 3.028,
      "step": 191046
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6540443897247314,
      "learning_rate": 4.216863925144478e-05,
      "loss": 3.0877,
      "step": 191047
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.4873342514038086,
      "learning_rate": 4.216654801556205e-05,
      "loss": 3.1977,
      "step": 191048
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.923901319503784,
      "learning_rate": 4.216445682761538e-05,
      "loss": 2.8228,
      "step": 191049
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.266439914703369,
      "learning_rate": 4.216236568760526e-05,
      "loss": 2.9106,
      "step": 191050
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1539855003356934,
      "learning_rate": 4.216027459553203e-05,
      "loss": 2.9264,
      "step": 191051
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.898385763168335,
      "learning_rate": 4.2158183551396086e-05,
      "loss": 3.236,
      "step": 191052
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.1229729652404785,
      "learning_rate": 4.21560925551977e-05,
      "loss": 2.8836,
      "step": 191053
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9270620346069336,
      "learning_rate": 4.215400160693746e-05,
      "loss": 3.0101,
      "step": 191054
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3571717739105225,
      "learning_rate": 4.215191070661558e-05,
      "loss": 2.7888,
      "step": 191055
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.741279125213623,
      "learning_rate": 4.214981985423258e-05,
      "loss": 3.0272,
      "step": 191056
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.858114719390869,
      "learning_rate": 4.21477290497888e-05,
      "loss": 2.8938,
      "step": 191057
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6718533039093018,
      "learning_rate": 4.2145638293284645e-05,
      "loss": 2.8906,
      "step": 191058
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.934890031814575,
      "learning_rate": 4.2143547584720405e-05,
      "loss": 3.0484,
      "step": 191059
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.647324800491333,
      "learning_rate": 4.214145692409662e-05,
      "loss": 2.8991,
      "step": 191060
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.7344017028808594,
      "learning_rate": 4.2139366311413546e-05,
      "loss": 2.8821,
      "step": 191061
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.6368823051452637,
      "learning_rate": 4.2137275746671665e-05,
      "loss": 3.006,
      "step": 191062
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.833693027496338,
      "learning_rate": 4.213518522987136e-05,
      "loss": 2.7976,
      "step": 191063
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.433354616165161,
      "learning_rate": 4.213309476101291e-05,
      "loss": 3.0155,
      "step": 191064
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9618301391601562,
      "learning_rate": 4.213100434009687e-05,
      "loss": 2.7617,
      "step": 191065
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5675811767578125,
      "learning_rate": 4.212891396712352e-05,
      "loss": 2.8088,
      "step": 191066
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.710451364517212,
      "learning_rate": 4.212682364209322e-05,
      "loss": 2.8024,
      "step": 191067
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.124170780181885,
      "learning_rate": 4.2124733365006504e-05,
      "loss": 2.9684,
      "step": 191068
    },
    {
      "epoch": 2.49,
      "grad_norm": 5.049318790435791,
      "learning_rate": 4.212264313586363e-05,
      "loss": 2.757,
      "step": 191069
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.554077386856079,
      "learning_rate": 4.2120552954664977e-05,
      "loss": 2.9216,
      "step": 191070
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.756376028060913,
      "learning_rate": 4.211846282141104e-05,
      "loss": 2.8714,
      "step": 191071
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7357590198516846,
      "learning_rate": 4.2116372736102144e-05,
      "loss": 2.7761,
      "step": 191072
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.4124205112457275,
      "learning_rate": 4.2114282698738635e-05,
      "loss": 2.9282,
      "step": 191073
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5236337184906006,
      "learning_rate": 4.2112192709320966e-05,
      "loss": 3.0228,
      "step": 191074
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7575104236602783,
      "learning_rate": 4.211010276784955e-05,
      "loss": 3.1943,
      "step": 191075
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.0329909324646,
      "learning_rate": 4.2108012874324684e-05,
      "loss": 3.1571,
      "step": 191076
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7240612506866455,
      "learning_rate": 4.2105923028746823e-05,
      "loss": 3.0637,
      "step": 191077
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9541802406311035,
      "learning_rate": 4.210383323111631e-05,
      "loss": 2.9388,
      "step": 191078
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1905343532562256,
      "learning_rate": 4.210174348143361e-05,
      "loss": 2.6723,
      "step": 191079
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8211185932159424,
      "learning_rate": 4.20996537796991e-05,
      "loss": 3.0308,
      "step": 191080
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.7802445888519287,
      "learning_rate": 4.2097564125913084e-05,
      "loss": 2.966,
      "step": 191081
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7366418838500977,
      "learning_rate": 4.209547452007596e-05,
      "loss": 3.0804,
      "step": 191082
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1068196296691895,
      "learning_rate": 4.2093384962188203e-05,
      "loss": 3.0042,
      "step": 191083
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3953466415405273,
      "learning_rate": 4.2091295452250095e-05,
      "loss": 2.7481,
      "step": 191084
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7487263679504395,
      "learning_rate": 4.2089205990262164e-05,
      "loss": 3.1742,
      "step": 191085
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7641634941101074,
      "learning_rate": 4.208711657622471e-05,
      "loss": 3.0906,
      "step": 191086
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8543217182159424,
      "learning_rate": 4.2085027210138135e-05,
      "loss": 2.888,
      "step": 191087
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.965663433074951,
      "learning_rate": 4.208293789200273e-05,
      "loss": 2.8434,
      "step": 191088
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7916018962860107,
      "learning_rate": 4.2080848621819074e-05,
      "loss": 2.9192,
      "step": 191089
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.57481050491333,
      "learning_rate": 4.207875939958739e-05,
      "loss": 3.1522,
      "step": 191090
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7943615913391113,
      "learning_rate": 4.207667022530818e-05,
      "loss": 3.2597,
      "step": 191091
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.781346082687378,
      "learning_rate": 4.2074581098981716e-05,
      "loss": 2.8747,
      "step": 191092
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0843138694763184,
      "learning_rate": 4.2072492020608595e-05,
      "loss": 2.9622,
      "step": 191093
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.668628454208374,
      "learning_rate": 4.2070402990188944e-05,
      "loss": 3.2222,
      "step": 191094
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.2780346870422363,
      "learning_rate": 4.206831400772334e-05,
      "loss": 2.7601,
      "step": 191095
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.698251485824585,
      "learning_rate": 4.2066225073212035e-05,
      "loss": 2.6081,
      "step": 191096
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.493980646133423,
      "learning_rate": 4.206413618665554e-05,
      "loss": 2.9449,
      "step": 191097
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7033495903015137,
      "learning_rate": 4.206204734805415e-05,
      "loss": 2.783,
      "step": 191098
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9022610187530518,
      "learning_rate": 4.20599585574084e-05,
      "loss": 2.884,
      "step": 191099
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.742635488510132,
      "learning_rate": 4.205786981471846e-05,
      "loss": 3.0165,
      "step": 191100
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9300496578216553,
      "learning_rate": 4.2055781119984886e-05,
      "loss": 3.1095,
      "step": 191101
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9486241340637207,
      "learning_rate": 4.205369247320796e-05,
      "loss": 2.9831,
      "step": 191102
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.797443389892578,
      "learning_rate": 4.2051603874388165e-05,
      "loss": 2.9187,
      "step": 191103
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7383358478546143,
      "learning_rate": 4.2049515323525805e-05,
      "loss": 2.9528,
      "step": 191104
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8753087520599365,
      "learning_rate": 4.204742682062135e-05,
      "loss": 3.039,
      "step": 191105
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8636224269866943,
      "learning_rate": 4.2045338365675175e-05,
      "loss": 2.8632,
      "step": 191106
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.6237869262695312,
      "learning_rate": 4.204324995868763e-05,
      "loss": 2.8725,
      "step": 191107
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7402493953704834,
      "learning_rate": 4.204116159965902e-05,
      "loss": 3.0808,
      "step": 191108
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2113730907440186,
      "learning_rate": 4.2039073288589954e-05,
      "loss": 3.2009,
      "step": 191109
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.03889799118042,
      "learning_rate": 4.2036985025480584e-05,
      "loss": 3.0197,
      "step": 191110
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2620937824249268,
      "learning_rate": 4.2034896810331486e-05,
      "loss": 2.9636,
      "step": 191111
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.41556978225708,
      "learning_rate": 4.203280864314296e-05,
      "loss": 3.2628,
      "step": 191112
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0423812866210938,
      "learning_rate": 4.2030720523915396e-05,
      "loss": 2.8144,
      "step": 191113
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.634542226791382,
      "learning_rate": 4.2028632452649135e-05,
      "loss": 3.0573,
      "step": 191114
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.506817579269409,
      "learning_rate": 4.202654442934471e-05,
      "loss": 3.0108,
      "step": 191115
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.847445011138916,
      "learning_rate": 4.202445645400232e-05,
      "loss": 3.2023,
      "step": 191116
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0196468830108643,
      "learning_rate": 4.202236852662253e-05,
      "loss": 3.0565,
      "step": 191117
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.719467878341675,
      "learning_rate": 4.202028064720567e-05,
      "loss": 2.9149,
      "step": 191118
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.734234571456909,
      "learning_rate": 4.201819281575208e-05,
      "loss": 3.0491,
      "step": 191119
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.093622207641602,
      "learning_rate": 4.201610503226213e-05,
      "loss": 3.0779,
      "step": 191120
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8362908363342285,
      "learning_rate": 4.201401729673634e-05,
      "loss": 2.8672,
      "step": 191121
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.804701089859009,
      "learning_rate": 4.201192960917491e-05,
      "loss": 2.8411,
      "step": 191122
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6601955890655518,
      "learning_rate": 4.200984196957842e-05,
      "loss": 3.0635,
      "step": 191123
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2896268367767334,
      "learning_rate": 4.2007754377947153e-05,
      "loss": 2.9913,
      "step": 191124
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.443032741546631,
      "learning_rate": 4.200566683428153e-05,
      "loss": 2.859,
      "step": 191125
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0062410831451416,
      "learning_rate": 4.200357933858186e-05,
      "loss": 3.0862,
      "step": 191126
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.670474052429199,
      "learning_rate": 4.200149189084866e-05,
      "loss": 3.1816,
      "step": 191127
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9107768535614014,
      "learning_rate": 4.199940449108219e-05,
      "loss": 2.7175,
      "step": 191128
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6976497173309326,
      "learning_rate": 4.199731713928295e-05,
      "loss": 2.9512,
      "step": 191129
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5080854892730713,
      "learning_rate": 4.199522983545127e-05,
      "loss": 3.1625,
      "step": 191130
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9709506034851074,
      "learning_rate": 4.199314257958759e-05,
      "loss": 2.7962,
      "step": 191131
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5728347301483154,
      "learning_rate": 4.199105537169214e-05,
      "loss": 3.0999,
      "step": 191132
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8353655338287354,
      "learning_rate": 4.1988968211765514e-05,
      "loss": 3.2572,
      "step": 191133
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.990941286087036,
      "learning_rate": 4.1986881099807956e-05,
      "loss": 2.9266,
      "step": 191134
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.600203514099121,
      "learning_rate": 4.1984794035819955e-05,
      "loss": 2.7965,
      "step": 191135
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.6740427017211914,
      "learning_rate": 4.1982707019801854e-05,
      "loss": 3.1218,
      "step": 191136
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.771475315093994,
      "learning_rate": 4.198062005175404e-05,
      "loss": 2.9107,
      "step": 191137
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.673889636993408,
      "learning_rate": 4.197853313167683e-05,
      "loss": 2.8275,
      "step": 191138
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8076069355010986,
      "learning_rate": 4.197644625957074e-05,
      "loss": 2.9124,
      "step": 191139
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6720376014709473,
      "learning_rate": 4.197435943543605e-05,
      "loss": 2.7934,
      "step": 191140
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.140092372894287,
      "learning_rate": 4.1972272659273276e-05,
      "loss": 2.9124,
      "step": 191141
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.676846742630005,
      "learning_rate": 4.19701859310827e-05,
      "loss": 3.1535,
      "step": 191142
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.328012466430664,
      "learning_rate": 4.196809925086475e-05,
      "loss": 2.8574,
      "step": 191143
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0771901607513428,
      "learning_rate": 4.196601261861973e-05,
      "loss": 2.8253,
      "step": 191144
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.782259464263916,
      "learning_rate": 4.196392603434816e-05,
      "loss": 2.9243,
      "step": 191145
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1861140727996826,
      "learning_rate": 4.1961839498050286e-05,
      "loss": 3.0489,
      "step": 191146
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6059529781341553,
      "learning_rate": 4.195975300972667e-05,
      "loss": 2.9721,
      "step": 191147
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3156402111053467,
      "learning_rate": 4.1957666569377614e-05,
      "loss": 2.8587,
      "step": 191148
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2224321365356445,
      "learning_rate": 4.195558017700348e-05,
      "loss": 2.4269,
      "step": 191149
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.6905486583709717,
      "learning_rate": 4.19534938326046e-05,
      "loss": 3.0325,
      "step": 191150
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7662739753723145,
      "learning_rate": 4.195140753618152e-05,
      "loss": 3.0262,
      "step": 191151
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.847508668899536,
      "learning_rate": 4.194932128773448e-05,
      "loss": 2.7313,
      "step": 191152
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0446231365203857,
      "learning_rate": 4.1947235087263977e-05,
      "loss": 3.2927,
      "step": 191153
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7535483837127686,
      "learning_rate": 4.194514893477039e-05,
      "loss": 3.0767,
      "step": 191154
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1454389095306396,
      "learning_rate": 4.194306283025396e-05,
      "loss": 3.0638,
      "step": 191155
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.664963722229004,
      "learning_rate": 4.1940976773715285e-05,
      "loss": 2.9984,
      "step": 191156
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.917473316192627,
      "learning_rate": 4.1938890765154635e-05,
      "loss": 2.9146,
      "step": 191157
    },
    {
      "epoch": 2.49,
      "grad_norm": 5.559013843536377,
      "learning_rate": 4.1936804804572364e-05,
      "loss": 2.9995,
      "step": 191158
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.394308567047119,
      "learning_rate": 4.1934718891968986e-05,
      "loss": 2.8848,
      "step": 191159
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0771102905273438,
      "learning_rate": 4.1932633027344794e-05,
      "loss": 2.989,
      "step": 191160
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.805722236633301,
      "learning_rate": 4.1930547210700126e-05,
      "loss": 3.115,
      "step": 191161
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5203967094421387,
      "learning_rate": 4.1928461442035545e-05,
      "loss": 2.8998,
      "step": 191162
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.80977725982666,
      "learning_rate": 4.1926375721351246e-05,
      "loss": 3.0139,
      "step": 191163
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.4502272605895996,
      "learning_rate": 4.1924290048647766e-05,
      "loss": 2.9633,
      "step": 191164
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.516000270843506,
      "learning_rate": 4.192220442392544e-05,
      "loss": 2.7998,
      "step": 191165
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7579455375671387,
      "learning_rate": 4.1920118847184635e-05,
      "loss": 2.9511,
      "step": 191166
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.4834370613098145,
      "learning_rate": 4.191803331842568e-05,
      "loss": 3.1508,
      "step": 191167
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.702200174331665,
      "learning_rate": 4.191594783764911e-05,
      "loss": 2.9575,
      "step": 191168
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.786620616912842,
      "learning_rate": 4.1913862404855194e-05,
      "loss": 3.009,
      "step": 191169
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.05708646774292,
      "learning_rate": 4.191177702004439e-05,
      "loss": 2.8522,
      "step": 191170
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.5970418453216553,
      "learning_rate": 4.1909691683217106e-05,
      "loss": 2.8703,
      "step": 191171
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7053654193878174,
      "learning_rate": 4.1907606394373575e-05,
      "loss": 3.0323,
      "step": 191172
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.788076162338257,
      "learning_rate": 4.1905521153514386e-05,
      "loss": 2.9928,
      "step": 191173
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0419232845306396,
      "learning_rate": 4.190343596063982e-05,
      "loss": 2.8071,
      "step": 191174
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.769834518432617,
      "learning_rate": 4.19013508157502e-05,
      "loss": 2.8826,
      "step": 191175
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8885061740875244,
      "learning_rate": 4.189926571884606e-05,
      "loss": 3.0791,
      "step": 191176
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.845162868499756,
      "learning_rate": 4.189718066992764e-05,
      "loss": 2.9076,
      "step": 191177
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.063185214996338,
      "learning_rate": 4.18950956689955e-05,
      "loss": 2.709,
      "step": 191178
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8326010704040527,
      "learning_rate": 4.189301071604995e-05,
      "loss": 2.7565,
      "step": 191179
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.7689297199249268,
      "learning_rate": 4.189092581109134e-05,
      "loss": 3.1002,
      "step": 191180
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.509983777999878,
      "learning_rate": 4.1888840954119976e-05,
      "loss": 3.0132,
      "step": 191181
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.509525775909424,
      "learning_rate": 4.188675614513646e-05,
      "loss": 2.9857,
      "step": 191182
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7473719120025635,
      "learning_rate": 4.188467138414099e-05,
      "loss": 2.9486,
      "step": 191183
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.478026866912842,
      "learning_rate": 4.18825866711341e-05,
      "loss": 3.0068,
      "step": 191184
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.770641326904297,
      "learning_rate": 4.188050200611609e-05,
      "loss": 2.8712,
      "step": 191185
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8626699447631836,
      "learning_rate": 4.187841738908739e-05,
      "loss": 3.0769,
      "step": 191186
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.511502504348755,
      "learning_rate": 4.1876332820048306e-05,
      "loss": 3.1317,
      "step": 191187
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.941293478012085,
      "learning_rate": 4.1874248298999335e-05,
      "loss": 2.8176,
      "step": 191188
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6426925659179688,
      "learning_rate": 4.1872163825940743e-05,
      "loss": 3.0612,
      "step": 191189
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.7020041942596436,
      "learning_rate": 4.187007940087306e-05,
      "loss": 2.9809,
      "step": 191190
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5795018672943115,
      "learning_rate": 4.186799502379662e-05,
      "loss": 3.14,
      "step": 191191
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9009885787963867,
      "learning_rate": 4.186591069471176e-05,
      "loss": 2.8497,
      "step": 191192
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8981521129608154,
      "learning_rate": 4.186382641361884e-05,
      "loss": 3.0166,
      "step": 191193
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.777582883834839,
      "learning_rate": 4.1861742180518395e-05,
      "loss": 3.0412,
      "step": 191194
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.843020439147949,
      "learning_rate": 4.185965799541063e-05,
      "loss": 2.7236,
      "step": 191195
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9146103858947754,
      "learning_rate": 4.1857573858296144e-05,
      "loss": 2.8196,
      "step": 191196
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.3894083499908447,
      "learning_rate": 4.1855489769175165e-05,
      "loss": 3.0858,
      "step": 191197
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.706967830657959,
      "learning_rate": 4.185340572804813e-05,
      "loss": 2.8886,
      "step": 191198
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.990750789642334,
      "learning_rate": 4.1851321734915364e-05,
      "loss": 3.0599,
      "step": 191199
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8699376583099365,
      "learning_rate": 4.184923778977735e-05,
      "loss": 3.0187,
      "step": 191200
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5927157402038574,
      "learning_rate": 4.18471538926344e-05,
      "loss": 2.8381,
      "step": 191201
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.490926742553711,
      "learning_rate": 4.184507004348699e-05,
      "loss": 3.2658,
      "step": 191202
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.395958185195923,
      "learning_rate": 4.184298624233546e-05,
      "loss": 2.8069,
      "step": 191203
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9132137298583984,
      "learning_rate": 4.1840902489180175e-05,
      "loss": 3.046,
      "step": 191204
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6199445724487305,
      "learning_rate": 4.183881878402149e-05,
      "loss": 2.8306,
      "step": 191205
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1379411220550537,
      "learning_rate": 4.183673512685991e-05,
      "loss": 2.768,
      "step": 191206
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1129159927368164,
      "learning_rate": 4.183465151769567e-05,
      "loss": 2.7419,
      "step": 191207
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3364434242248535,
      "learning_rate": 4.18325679565293e-05,
      "loss": 2.9782,
      "step": 191208
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0652480125427246,
      "learning_rate": 4.1830484443361177e-05,
      "loss": 2.7242,
      "step": 191209
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2234766483306885,
      "learning_rate": 4.182840097819159e-05,
      "loss": 3.0307,
      "step": 191210
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.870915412902832,
      "learning_rate": 4.182631756102094e-05,
      "loss": 2.8911,
      "step": 191211
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.5413601398468018,
      "learning_rate": 4.182423419184973e-05,
      "loss": 2.8467,
      "step": 191212
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1466774940490723,
      "learning_rate": 4.182215087067815e-05,
      "loss": 2.8262,
      "step": 191213
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.21549391746521,
      "learning_rate": 4.182006759750681e-05,
      "loss": 3.089,
      "step": 191214
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.895460844039917,
      "learning_rate": 4.1817984372336013e-05,
      "loss": 2.8069,
      "step": 191215
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.4909183979034424,
      "learning_rate": 4.181590119516608e-05,
      "loss": 2.7282,
      "step": 191216
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.566439151763916,
      "learning_rate": 4.1813818065997384e-05,
      "loss": 2.7415,
      "step": 191217
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.705754041671753,
      "learning_rate": 4.181173498483046e-05,
      "loss": 2.9022,
      "step": 191218
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.6084630489349365,
      "learning_rate": 4.180965195166554e-05,
      "loss": 3.0832,
      "step": 191219
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.677290201187134,
      "learning_rate": 4.1807568966503145e-05,
      "loss": 2.9637,
      "step": 191220
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.496356964111328,
      "learning_rate": 4.180548602934359e-05,
      "loss": 3.0123,
      "step": 191221
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.576103687286377,
      "learning_rate": 4.1803403140187275e-05,
      "loss": 3.0589,
      "step": 191222
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9592480659484863,
      "learning_rate": 4.180132029903449e-05,
      "loss": 2.9616,
      "step": 191223
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7429158687591553,
      "learning_rate": 4.1799237505885805e-05,
      "loss": 3.0206,
      "step": 191224
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.4195029735565186,
      "learning_rate": 4.179715476074146e-05,
      "loss": 3.0415,
      "step": 191225
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7173213958740234,
      "learning_rate": 4.179507206360194e-05,
      "loss": 2.9453,
      "step": 191226
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0537874698638916,
      "learning_rate": 4.1792989414467595e-05,
      "loss": 2.9035,
      "step": 191227
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6647229194641113,
      "learning_rate": 4.1790906813338806e-05,
      "loss": 2.8205,
      "step": 191228
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7179861068725586,
      "learning_rate": 4.178882426021592e-05,
      "loss": 2.9332,
      "step": 191229
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.711676597595215,
      "learning_rate": 4.178674175509941e-05,
      "loss": 2.7662,
      "step": 191230
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1575987339019775,
      "learning_rate": 4.178465929798955e-05,
      "loss": 3.0944,
      "step": 191231
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8503592014312744,
      "learning_rate": 4.178257688888687e-05,
      "loss": 2.8979,
      "step": 191232
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3173110485076904,
      "learning_rate": 4.178049452779167e-05,
      "loss": 2.8184,
      "step": 191233
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.248175144195557,
      "learning_rate": 4.177841221470438e-05,
      "loss": 2.8762,
      "step": 191234
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.531895875930786,
      "learning_rate": 4.1776329949625254e-05,
      "loss": 2.8982,
      "step": 191235
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7749431133270264,
      "learning_rate": 4.177424773255489e-05,
      "loss": 3.0165,
      "step": 191236
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9315452575683594,
      "learning_rate": 4.1772165563493456e-05,
      "loss": 2.7744,
      "step": 191237
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.9832661151885986,
      "learning_rate": 4.177008344244155e-05,
      "loss": 2.6041,
      "step": 191238
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.623110771179199,
      "learning_rate": 4.1768001369399376e-05,
      "loss": 2.9259,
      "step": 191239
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.73350191116333,
      "learning_rate": 4.176591934436749e-05,
      "loss": 2.7374,
      "step": 191240
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.015561819076538,
      "learning_rate": 4.176383736734618e-05,
      "loss": 2.9847,
      "step": 191241
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.472532272338867,
      "learning_rate": 4.176175543833585e-05,
      "loss": 3.0107,
      "step": 191242
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.4679391384124756,
      "learning_rate": 4.175967355733682e-05,
      "loss": 2.7485,
      "step": 191243
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9688756465911865,
      "learning_rate": 4.175759172434959e-05,
      "loss": 2.9496,
      "step": 191244
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.452116012573242,
      "learning_rate": 4.1755509939374455e-05,
      "loss": 2.9791,
      "step": 191245
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1385674476623535,
      "learning_rate": 4.175342820241191e-05,
      "loss": 2.8632,
      "step": 191246
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6246418952941895,
      "learning_rate": 4.175134651346226e-05,
      "loss": 3.155,
      "step": 191247
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.341090202331543,
      "learning_rate": 4.1749264872525934e-05,
      "loss": 2.8727,
      "step": 191248
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.09968638420105,
      "learning_rate": 4.17471832796032e-05,
      "loss": 2.9527,
      "step": 191249
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3970446586608887,
      "learning_rate": 4.174510173469463e-05,
      "loss": 2.9285,
      "step": 191250
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.724205493927002,
      "learning_rate": 4.174302023780045e-05,
      "loss": 2.7414,
      "step": 191251
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3068060874938965,
      "learning_rate": 4.1740938788921164e-05,
      "loss": 3.1427,
      "step": 191252
    },
    {
      "epoch": 2.49,
      "grad_norm": 5.088102340698242,
      "learning_rate": 4.173885738805714e-05,
      "loss": 2.7739,
      "step": 191253
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.718123912811279,
      "learning_rate": 4.1736776035208664e-05,
      "loss": 2.9608,
      "step": 191254
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.79968523979187,
      "learning_rate": 4.173469473037625e-05,
      "loss": 2.8217,
      "step": 191255
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.867697238922119,
      "learning_rate": 4.173261347356023e-05,
      "loss": 3.0638,
      "step": 191256
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6793088912963867,
      "learning_rate": 4.173053226476096e-05,
      "loss": 2.7855,
      "step": 191257
    },
    {
      "epoch": 2.49,
      "grad_norm": 5.198348522186279,
      "learning_rate": 4.172845110397889e-05,
      "loss": 2.9284,
      "step": 191258
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.78570294380188,
      "learning_rate": 4.172636999121437e-05,
      "loss": 3.1824,
      "step": 191259
    },
    {
      "epoch": 2.49,
      "grad_norm": 5.980836391448975,
      "learning_rate": 4.1724288926467775e-05,
      "loss": 3.0747,
      "step": 191260
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.587416410446167,
      "learning_rate": 4.172220790973953e-05,
      "loss": 2.9297,
      "step": 191261
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8996903896331787,
      "learning_rate": 4.1720126941029954e-05,
      "loss": 2.9355,
      "step": 191262
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.784743309020996,
      "learning_rate": 4.1718046020339526e-05,
      "loss": 3.1843,
      "step": 191263
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.826636791229248,
      "learning_rate": 4.1715965147668616e-05,
      "loss": 3.0156,
      "step": 191264
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.894470691680908,
      "learning_rate": 4.171388432301756e-05,
      "loss": 2.9775,
      "step": 191265
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1992084980010986,
      "learning_rate": 4.171180354638674e-05,
      "loss": 2.8165,
      "step": 191266
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5941214561462402,
      "learning_rate": 4.1709722817776624e-05,
      "loss": 3.0755,
      "step": 191267
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8433308601379395,
      "learning_rate": 4.170764213718747e-05,
      "loss": 3.0792,
      "step": 191268
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6779401302337646,
      "learning_rate": 4.17055615046198e-05,
      "loss": 3.0453,
      "step": 191269
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.768111228942871,
      "learning_rate": 4.170348092007395e-05,
      "loss": 2.8003,
      "step": 191270
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.874448537826538,
      "learning_rate": 4.170140038355029e-05,
      "loss": 3.1019,
      "step": 191271
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7939887046813965,
      "learning_rate": 4.169931989504918e-05,
      "loss": 2.7283,
      "step": 191272
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5591671466827393,
      "learning_rate": 4.1697239454571085e-05,
      "loss": 3.0082,
      "step": 191273
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.804926872253418,
      "learning_rate": 4.169515906211628e-05,
      "loss": 2.6967,
      "step": 191274
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.6138315200805664,
      "learning_rate": 4.169307871768529e-05,
      "loss": 2.8833,
      "step": 191275
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9184391498565674,
      "learning_rate": 4.169099842127842e-05,
      "loss": 2.807,
      "step": 191276
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9541714191436768,
      "learning_rate": 4.1688918172896103e-05,
      "loss": 2.8869,
      "step": 191277
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.30967116355896,
      "learning_rate": 4.16868379725386e-05,
      "loss": 3.1007,
      "step": 191278
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5788674354553223,
      "learning_rate": 4.168475782020645e-05,
      "loss": 3.1607,
      "step": 191279
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1293728351593018,
      "learning_rate": 4.168267771589992e-05,
      "loss": 3.1383,
      "step": 191280
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.644819974899292,
      "learning_rate": 4.168059765961953e-05,
      "loss": 2.8434,
      "step": 191281
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.4966886043548584,
      "learning_rate": 4.1678517651365596e-05,
      "loss": 3.0006,
      "step": 191282
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6702849864959717,
      "learning_rate": 4.1676437691138506e-05,
      "loss": 3.1067,
      "step": 191283
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9126815795898438,
      "learning_rate": 4.167435777893857e-05,
      "loss": 3.0453,
      "step": 191284
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.056386947631836,
      "learning_rate": 4.1672277914766284e-05,
      "loss": 2.9804,
      "step": 191285
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.988678455352783,
      "learning_rate": 4.167019809862198e-05,
      "loss": 2.6769,
      "step": 191286
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9198338985443115,
      "learning_rate": 4.166811833050609e-05,
      "loss": 2.9631,
      "step": 191287
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9086766242980957,
      "learning_rate": 4.1666038610419004e-05,
      "loss": 2.7802,
      "step": 191288
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7114953994750977,
      "learning_rate": 4.166395893836104e-05,
      "loss": 2.9276,
      "step": 191289
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9335014820098877,
      "learning_rate": 4.166187931433259e-05,
      "loss": 2.7749,
      "step": 191290
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.036332607269287,
      "learning_rate": 4.165979973833412e-05,
      "loss": 2.6813,
      "step": 191291
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1134800910949707,
      "learning_rate": 4.165772021036593e-05,
      "loss": 2.9617,
      "step": 191292
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.807983875274658,
      "learning_rate": 4.1655640730428485e-05,
      "loss": 2.9786,
      "step": 191293
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.281611680984497,
      "learning_rate": 4.1653561298522156e-05,
      "loss": 2.9449,
      "step": 191294
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.525365114212036,
      "learning_rate": 4.16514819146473e-05,
      "loss": 3.137,
      "step": 191295
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2347681522369385,
      "learning_rate": 4.1649402578804223e-05,
      "loss": 3.0232,
      "step": 191296
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.4808549880981445,
      "learning_rate": 4.164732329099349e-05,
      "loss": 2.901,
      "step": 191297
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3785152435302734,
      "learning_rate": 4.164524405121531e-05,
      "loss": 3.0515,
      "step": 191298
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9233791828155518,
      "learning_rate": 4.164316485947023e-05,
      "loss": 2.7376,
      "step": 191299
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.55655574798584,
      "learning_rate": 4.164108571575857e-05,
      "loss": 3.1853,
      "step": 191300
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.4000697135925293,
      "learning_rate": 4.163900662008072e-05,
      "loss": 2.8394,
      "step": 191301
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8477649688720703,
      "learning_rate": 4.1636927572436977e-05,
      "loss": 2.8594,
      "step": 191302
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.751634120941162,
      "learning_rate": 4.163484857282784e-05,
      "loss": 3.2206,
      "step": 191303
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.632394790649414,
      "learning_rate": 4.1632769621253646e-05,
      "loss": 3.172,
      "step": 191304
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6089906692504883,
      "learning_rate": 4.1630690717714835e-05,
      "loss": 2.9344,
      "step": 191305
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6171159744262695,
      "learning_rate": 4.162861186221169e-05,
      "loss": 2.8593,
      "step": 191306
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2101590633392334,
      "learning_rate": 4.162653305474479e-05,
      "loss": 2.7786,
      "step": 191307
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.895777940750122,
      "learning_rate": 4.1624454295314305e-05,
      "loss": 2.8283,
      "step": 191308
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.4548792839050293,
      "learning_rate": 4.162237558392072e-05,
      "loss": 2.8399,
      "step": 191309
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0844132900238037,
      "learning_rate": 4.162029692056438e-05,
      "loss": 2.9291,
      "step": 191310
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3945112228393555,
      "learning_rate": 4.1618218305245775e-05,
      "loss": 3.0657,
      "step": 191311
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6639151573181152,
      "learning_rate": 4.161613973796511e-05,
      "loss": 2.7363,
      "step": 191312
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.4610331058502197,
      "learning_rate": 4.161406121872306e-05,
      "loss": 2.7792,
      "step": 191313
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5704338550567627,
      "learning_rate": 4.161198274751971e-05,
      "loss": 2.8624,
      "step": 191314
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1887576580047607,
      "learning_rate": 4.16099043243556e-05,
      "loss": 3.0237,
      "step": 191315
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6227669715881348,
      "learning_rate": 4.160782594923103e-05,
      "loss": 3.0689,
      "step": 191316
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.24837327003479,
      "learning_rate": 4.16057476221465e-05,
      "loss": 2.9234,
      "step": 191317
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.835660457611084,
      "learning_rate": 4.160366934310231e-05,
      "loss": 2.874,
      "step": 191318
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2078053951263428,
      "learning_rate": 4.160159111209898e-05,
      "loss": 3.0459,
      "step": 191319
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7652812004089355,
      "learning_rate": 4.159951292913667e-05,
      "loss": 2.8798,
      "step": 191320
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.065763235092163,
      "learning_rate": 4.1597434794215925e-05,
      "loss": 2.9776,
      "step": 191321
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7015717029571533,
      "learning_rate": 4.1595356707337046e-05,
      "loss": 2.5681,
      "step": 191322
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.856088161468506,
      "learning_rate": 4.159327866850054e-05,
      "loss": 2.9511,
      "step": 191323
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.789768934249878,
      "learning_rate": 4.159120067770667e-05,
      "loss": 2.7892,
      "step": 191324
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9429829120635986,
      "learning_rate": 4.158912273495594e-05,
      "loss": 2.7614,
      "step": 191325
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9284958839416504,
      "learning_rate": 4.158704484024864e-05,
      "loss": 2.8041,
      "step": 191326
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.729674816131592,
      "learning_rate": 4.158496699358521e-05,
      "loss": 2.7224,
      "step": 191327
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.84553861618042,
      "learning_rate": 4.158288919496592e-05,
      "loss": 2.87,
      "step": 191328
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8898487091064453,
      "learning_rate": 4.1580811444391326e-05,
      "loss": 2.9924,
      "step": 191329
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.878939151763916,
      "learning_rate": 4.1578733741861704e-05,
      "loss": 3.2363,
      "step": 191330
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7978100776672363,
      "learning_rate": 4.157665608737748e-05,
      "loss": 3.0021,
      "step": 191331
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9637560844421387,
      "learning_rate": 4.1574578480939093e-05,
      "loss": 2.8963,
      "step": 191332
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5552563667297363,
      "learning_rate": 4.157250092254684e-05,
      "loss": 2.9032,
      "step": 191333
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.840752363204956,
      "learning_rate": 4.157042341220108e-05,
      "loss": 2.8057,
      "step": 191334
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.641700506210327,
      "learning_rate": 4.1568345949902296e-05,
      "loss": 2.8054,
      "step": 191335
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.976365327835083,
      "learning_rate": 4.15662685356508e-05,
      "loss": 2.8962,
      "step": 191336
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0248661041259766,
      "learning_rate": 4.1564191169447084e-05,
      "loss": 2.9178,
      "step": 191337
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3515846729278564,
      "learning_rate": 4.1562113851291424e-05,
      "loss": 2.8666,
      "step": 191338
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9481382369995117,
      "learning_rate": 4.1560036581184196e-05,
      "loss": 3.2018,
      "step": 191339
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.7078099250793457,
      "learning_rate": 4.1557959359125936e-05,
      "loss": 3.1772,
      "step": 191340
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0645852088928223,
      "learning_rate": 4.155588218511687e-05,
      "loss": 2.9616,
      "step": 191341
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.4186606407165527,
      "learning_rate": 4.155380505915744e-05,
      "loss": 2.9839,
      "step": 191342
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.489252805709839,
      "learning_rate": 4.1551727981248073e-05,
      "loss": 2.8832,
      "step": 191343
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9331729412078857,
      "learning_rate": 4.15496509513891e-05,
      "loss": 2.9793,
      "step": 191344
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5938103199005127,
      "learning_rate": 4.1547573969580896e-05,
      "loss": 2.8302,
      "step": 191345
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.001908779144287,
      "learning_rate": 4.154549703582392e-05,
      "loss": 2.8346,
      "step": 191346
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.878686904907227,
      "learning_rate": 4.154342015011844e-05,
      "loss": 3.0422,
      "step": 191347
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.061598777770996,
      "learning_rate": 4.154134331246501e-05,
      "loss": 3.1502,
      "step": 191348
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.5836949348449707,
      "learning_rate": 4.153926652286389e-05,
      "loss": 2.8374,
      "step": 191349
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.286440849304199,
      "learning_rate": 4.1537189781315526e-05,
      "loss": 2.9671,
      "step": 191350
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9552555084228516,
      "learning_rate": 4.153511308782019e-05,
      "loss": 3.0439,
      "step": 191351
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5731520652770996,
      "learning_rate": 4.1533036442378456e-05,
      "loss": 3.0193,
      "step": 191352
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.446007251739502,
      "learning_rate": 4.153095984499051e-05,
      "loss": 2.8362,
      "step": 191353
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8251163959503174,
      "learning_rate": 4.152888329565692e-05,
      "loss": 2.9374,
      "step": 191354
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0731441974639893,
      "learning_rate": 4.1526806794378e-05,
      "loss": 3.2749,
      "step": 191355
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5913333892822266,
      "learning_rate": 4.15247303411541e-05,
      "loss": 2.999,
      "step": 191356
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.511464595794678,
      "learning_rate": 4.1522653935985596e-05,
      "loss": 2.8184,
      "step": 191357
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0727384090423584,
      "learning_rate": 4.152057757887295e-05,
      "loss": 3.0195,
      "step": 191358
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.482616424560547,
      "learning_rate": 4.151850126981643e-05,
      "loss": 2.8105,
      "step": 191359
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.3120217323303223,
      "learning_rate": 4.15164250088166e-05,
      "loss": 3.0643,
      "step": 191360
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.757509469985962,
      "learning_rate": 4.151434879587373e-05,
      "loss": 2.944,
      "step": 191361
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.136350154876709,
      "learning_rate": 4.151227263098822e-05,
      "loss": 2.9245,
      "step": 191362
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.739288806915283,
      "learning_rate": 4.15101965141604e-05,
      "loss": 2.9681,
      "step": 191363
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1208412647247314,
      "learning_rate": 4.150812044539077e-05,
      "loss": 2.9014,
      "step": 191364
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2206594944000244,
      "learning_rate": 4.15060444246796e-05,
      "loss": 3.0832,
      "step": 191365
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6366512775421143,
      "learning_rate": 4.150396845202738e-05,
      "loss": 2.8327,
      "step": 191366
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.767409563064575,
      "learning_rate": 4.150189252743449e-05,
      "loss": 2.9579,
      "step": 191367
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.891042947769165,
      "learning_rate": 4.149981665090125e-05,
      "loss": 2.9774,
      "step": 191368
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.4839186668395996,
      "learning_rate": 4.1497740822428006e-05,
      "loss": 3.0027,
      "step": 191369
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.867527484893799,
      "learning_rate": 4.1495665042015315e-05,
      "loss": 2.7111,
      "step": 191370
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.778804302215576,
      "learning_rate": 4.149358930966334e-05,
      "loss": 2.9438,
      "step": 191371
    },
    {
      "epoch": 2.49,
      "grad_norm": 6.028816223144531,
      "learning_rate": 4.1491513625372695e-05,
      "loss": 2.9993,
      "step": 191372
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.417386054992676,
      "learning_rate": 4.148943798914357e-05,
      "loss": 2.8942,
      "step": 191373
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.517704963684082,
      "learning_rate": 4.148736240097656e-05,
      "loss": 2.9036,
      "step": 191374
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.057222843170166,
      "learning_rate": 4.148528686087185e-05,
      "loss": 2.9948,
      "step": 191375
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1200690269470215,
      "learning_rate": 4.148321136882992e-05,
      "loss": 3.1256,
      "step": 191376
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.2967209815979,
      "learning_rate": 4.148113592485107e-05,
      "loss": 3.0685,
      "step": 191377
    },
    {
      "epoch": 2.49,
      "grad_norm": 5.4774861335754395,
      "learning_rate": 4.147906052893585e-05,
      "loss": 2.9112,
      "step": 191378
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.925416946411133,
      "learning_rate": 4.147698518108448e-05,
      "loss": 3.0707,
      "step": 191379
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.834484100341797,
      "learning_rate": 4.147490988129756e-05,
      "loss": 2.9761,
      "step": 191380
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.345322370529175,
      "learning_rate": 4.147283462957519e-05,
      "loss": 2.8475,
      "step": 191381
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1899847984313965,
      "learning_rate": 4.147075942591798e-05,
      "loss": 2.8926,
      "step": 191382
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.312941789627075,
      "learning_rate": 4.146868427032615e-05,
      "loss": 3.0463,
      "step": 191383
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5760107040405273,
      "learning_rate": 4.146660916280023e-05,
      "loss": 2.8583,
      "step": 191384
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.287258625030518,
      "learning_rate": 4.146453410334051e-05,
      "loss": 2.7361,
      "step": 191385
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2890384197235107,
      "learning_rate": 4.146245909194753e-05,
      "loss": 2.8808,
      "step": 191386
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.185914993286133,
      "learning_rate": 4.1460384128621435e-05,
      "loss": 3.2403,
      "step": 191387
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.148306131362915,
      "learning_rate": 4.1458309213362795e-05,
      "loss": 3.004,
      "step": 191388
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8733558654785156,
      "learning_rate": 4.145623434617187e-05,
      "loss": 2.9316,
      "step": 191389
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9433181285858154,
      "learning_rate": 4.1454159527049156e-05,
      "loss": 2.8688,
      "step": 191390
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.891772985458374,
      "learning_rate": 4.145208475599496e-05,
      "loss": 2.9228,
      "step": 191391
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7404704093933105,
      "learning_rate": 4.1450010033009814e-05,
      "loss": 3.2232,
      "step": 191392
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.92142915725708,
      "learning_rate": 4.144793535809389e-05,
      "loss": 2.8087,
      "step": 191393
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.473182201385498,
      "learning_rate": 4.1445860731247704e-05,
      "loss": 2.989,
      "step": 191394
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7893385887145996,
      "learning_rate": 4.144378615247157e-05,
      "loss": 3.0222,
      "step": 191395
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.403803825378418,
      "learning_rate": 4.144171162176598e-05,
      "loss": 2.8684,
      "step": 191396
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.163171291351318,
      "learning_rate": 4.1439637139131176e-05,
      "loss": 2.6607,
      "step": 191397
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.95365309715271,
      "learning_rate": 4.143756270456775e-05,
      "loss": 2.9948,
      "step": 191398
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.568657398223877,
      "learning_rate": 4.143548831807587e-05,
      "loss": 2.7612,
      "step": 191399
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5813872814178467,
      "learning_rate": 4.143341397965604e-05,
      "loss": 2.9433,
      "step": 191400
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.132174491882324,
      "learning_rate": 4.1431339689308586e-05,
      "loss": 2.8391,
      "step": 191401
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9554502964019775,
      "learning_rate": 4.1429265447033975e-05,
      "loss": 2.9986,
      "step": 191402
    },
    {
      "epoch": 2.49,
      "grad_norm": 5.516757965087891,
      "learning_rate": 4.142719125283248e-05,
      "loss": 3.1584,
      "step": 191403
    },
    {
      "epoch": 2.49,
      "grad_norm": 6.858306407928467,
      "learning_rate": 4.142511710670466e-05,
      "loss": 2.9145,
      "step": 191404
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.33477520942688,
      "learning_rate": 4.142304300865069e-05,
      "loss": 2.7893,
      "step": 191405
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.187844753265381,
      "learning_rate": 4.142096895867113e-05,
      "loss": 2.7904,
      "step": 191406
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.875136613845825,
      "learning_rate": 4.1418894956766216e-05,
      "loss": 2.9859,
      "step": 191407
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.228656530380249,
      "learning_rate": 4.141682100293647e-05,
      "loss": 3.0864,
      "step": 191408
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.971330404281616,
      "learning_rate": 4.141474709718215e-05,
      "loss": 2.7984,
      "step": 191409
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9396767616271973,
      "learning_rate": 4.141267323950382e-05,
      "loss": 2.7189,
      "step": 191410
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1058802604675293,
      "learning_rate": 4.141059942990168e-05,
      "loss": 2.7406,
      "step": 191411
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.662412643432617,
      "learning_rate": 4.140852566837621e-05,
      "loss": 2.7714,
      "step": 191412
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.3946402072906494,
      "learning_rate": 4.140645195492772e-05,
      "loss": 2.8883,
      "step": 191413
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.151531934738159,
      "learning_rate": 4.140437828955674e-05,
      "loss": 3.1957,
      "step": 191414
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.04885721206665,
      "learning_rate": 4.14023046722635e-05,
      "loss": 2.9277,
      "step": 191415
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1039037704467773,
      "learning_rate": 4.14002311030485e-05,
      "loss": 3.0481,
      "step": 191416
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.72390079498291,
      "learning_rate": 4.139815758191207e-05,
      "loss": 2.9714,
      "step": 191417
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7779862880706787,
      "learning_rate": 4.139608410885462e-05,
      "loss": 2.7332,
      "step": 191418
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5689339637756348,
      "learning_rate": 4.139401068387644e-05,
      "loss": 3.0167,
      "step": 191419
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.510916233062744,
      "learning_rate": 4.1391937306978076e-05,
      "loss": 2.966,
      "step": 191420
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.470992088317871,
      "learning_rate": 4.1389863978159774e-05,
      "loss": 2.9713,
      "step": 191421
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7189254760742188,
      "learning_rate": 4.1387790697422016e-05,
      "loss": 3.1238,
      "step": 191422
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6570379734039307,
      "learning_rate": 4.138571746476517e-05,
      "loss": 2.7428,
      "step": 191423
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.4111170768737793,
      "learning_rate": 4.1383644280189556e-05,
      "loss": 2.7585,
      "step": 191424
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8675620555877686,
      "learning_rate": 4.1381571143695613e-05,
      "loss": 2.7461,
      "step": 191425
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1283843517303467,
      "learning_rate": 4.1379498055283774e-05,
      "loss": 3.1878,
      "step": 191426
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1737282276153564,
      "learning_rate": 4.137742501495428e-05,
      "loss": 2.8176,
      "step": 191427
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.284365177154541,
      "learning_rate": 4.137535202270765e-05,
      "loss": 2.9594,
      "step": 191428
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.877997875213623,
      "learning_rate": 4.137327907854426e-05,
      "loss": 2.8872,
      "step": 191429
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.7595086097717285,
      "learning_rate": 4.137120618246438e-05,
      "loss": 2.6819,
      "step": 191430
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.771043539047241,
      "learning_rate": 4.136913333446853e-05,
      "loss": 3.0882,
      "step": 191431
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6971797943115234,
      "learning_rate": 4.136706053455705e-05,
      "loss": 2.998,
      "step": 191432
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0315377712249756,
      "learning_rate": 4.1364987782730244e-05,
      "loss": 2.8703,
      "step": 191433
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9639928340911865,
      "learning_rate": 4.136291507898863e-05,
      "loss": 3.0676,
      "step": 191434
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.960690975189209,
      "learning_rate": 4.136084242333253e-05,
      "loss": 2.8604,
      "step": 191435
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6691012382507324,
      "learning_rate": 4.1358769815762296e-05,
      "loss": 2.7901,
      "step": 191436
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.993311643600464,
      "learning_rate": 4.135669725627839e-05,
      "loss": 3.0065,
      "step": 191437
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.638948678970337,
      "learning_rate": 4.135462474488109e-05,
      "loss": 2.9292,
      "step": 191438
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.4867584705352783,
      "learning_rate": 4.1352552281570926e-05,
      "loss": 2.7355,
      "step": 191439
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.561537742614746,
      "learning_rate": 4.13504798663482e-05,
      "loss": 3.1858,
      "step": 191440
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.866769552230835,
      "learning_rate": 4.13484074992133e-05,
      "loss": 3.0825,
      "step": 191441
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9851841926574707,
      "learning_rate": 4.1346335180166534e-05,
      "loss": 2.8703,
      "step": 191442
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.573617935180664,
      "learning_rate": 4.134426290920844e-05,
      "loss": 2.8533,
      "step": 191443
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0773684978485107,
      "learning_rate": 4.134219068633927e-05,
      "loss": 2.8761,
      "step": 191444
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.424455404281616,
      "learning_rate": 4.134011851155954e-05,
      "loss": 3.0118,
      "step": 191445
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7461206912994385,
      "learning_rate": 4.133804638486947e-05,
      "loss": 2.9397,
      "step": 191446
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.647496223449707,
      "learning_rate": 4.1335974306269706e-05,
      "loss": 3.2747,
      "step": 191447
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3732314109802246,
      "learning_rate": 4.133390227576033e-05,
      "loss": 2.8389,
      "step": 191448
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.761713981628418,
      "learning_rate": 4.133183029334193e-05,
      "loss": 3.2409,
      "step": 191449
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.081775426864624,
      "learning_rate": 4.1329758359014756e-05,
      "loss": 3.1519,
      "step": 191450
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1450893878936768,
      "learning_rate": 4.132768647277931e-05,
      "loss": 2.9028,
      "step": 191451
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.2589616775512695,
      "learning_rate": 4.132561463463589e-05,
      "loss": 2.847,
      "step": 191452
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.02331805229187,
      "learning_rate": 4.132354284458504e-05,
      "loss": 3.0164,
      "step": 191453
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3114728927612305,
      "learning_rate": 4.132147110262689e-05,
      "loss": 2.6393,
      "step": 191454
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8558847904205322,
      "learning_rate": 4.1319399408762034e-05,
      "loss": 2.9032,
      "step": 191455
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7355411052703857,
      "learning_rate": 4.131732776299074e-05,
      "loss": 2.7085,
      "step": 191456
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.5251471996307373,
      "learning_rate": 4.131525616531347e-05,
      "loss": 2.8048,
      "step": 191457
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.697504758834839,
      "learning_rate": 4.1313184615730535e-05,
      "loss": 2.7457,
      "step": 191458
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.4167726039886475,
      "learning_rate": 4.131111311424249e-05,
      "loss": 2.9117,
      "step": 191459
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5299758911132812,
      "learning_rate": 4.130904166084944e-05,
      "loss": 2.6984,
      "step": 191460
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6165599822998047,
      "learning_rate": 4.1306970255552016e-05,
      "loss": 3.2298,
      "step": 191461
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6748030185699463,
      "learning_rate": 4.1304898898350424e-05,
      "loss": 2.9191,
      "step": 191462
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5703001022338867,
      "learning_rate": 4.130282758924519e-05,
      "loss": 2.7168,
      "step": 191463
    },
    {
      "epoch": 2.49,
      "grad_norm": 5.009976387023926,
      "learning_rate": 4.1300756328236586e-05,
      "loss": 2.9075,
      "step": 191464
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0915207862854004,
      "learning_rate": 4.12986851153252e-05,
      "loss": 2.5509,
      "step": 191465
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7768471240997314,
      "learning_rate": 4.129661395051115e-05,
      "loss": 2.9739,
      "step": 191466
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6566314697265625,
      "learning_rate": 4.1294542833794985e-05,
      "loss": 2.8796,
      "step": 191467
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.554443836212158,
      "learning_rate": 4.129247176517698e-05,
      "loss": 3.0314,
      "step": 191468
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.600843667984009,
      "learning_rate": 4.129040074465767e-05,
      "loss": 3.0143,
      "step": 191469
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2792165279388428,
      "learning_rate": 4.1288329772237283e-05,
      "loss": 2.7296,
      "step": 191470
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9382715225219727,
      "learning_rate": 4.128625884791642e-05,
      "loss": 3.1423,
      "step": 191471
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.528379201889038,
      "learning_rate": 4.128418797169518e-05,
      "loss": 3.0281,
      "step": 191472
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9103505611419678,
      "learning_rate": 4.128211714357417e-05,
      "loss": 2.912,
      "step": 191473
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1108741760253906,
      "learning_rate": 4.128004636355364e-05,
      "loss": 2.8613,
      "step": 191474
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.182978630065918,
      "learning_rate": 4.127797563163407e-05,
      "loss": 2.7022,
      "step": 191475
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.4556312561035156,
      "learning_rate": 4.1275904947815766e-05,
      "loss": 2.9727,
      "step": 191476
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.670971393585205,
      "learning_rate": 4.1273834312099275e-05,
      "loss": 3.0135,
      "step": 191477
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5920310020446777,
      "learning_rate": 4.127176372448474e-05,
      "loss": 2.9024,
      "step": 191478
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.8498241901397705,
      "learning_rate": 4.126969318497273e-05,
      "loss": 3.0859,
      "step": 191479
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0948269367218018,
      "learning_rate": 4.1267622693563504e-05,
      "loss": 2.8194,
      "step": 191480
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.806668758392334,
      "learning_rate": 4.1265552250257574e-05,
      "loss": 3.1199,
      "step": 191481
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3943161964416504,
      "learning_rate": 4.126348185505519e-05,
      "loss": 2.8163,
      "step": 191482
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.034524440765381,
      "learning_rate": 4.1261411507956966e-05,
      "loss": 2.722,
      "step": 191483
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.853941440582275,
      "learning_rate": 4.125934120896296e-05,
      "loss": 2.7251,
      "step": 191484
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.033487319946289,
      "learning_rate": 4.125727095807381e-05,
      "loss": 2.6916,
      "step": 191485
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6611881256103516,
      "learning_rate": 4.1255200755289776e-05,
      "loss": 2.8593,
      "step": 191486
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.389626979827881,
      "learning_rate": 4.1253130600611326e-05,
      "loss": 2.9078,
      "step": 191487
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.321944713592529,
      "learning_rate": 4.1251060494038735e-05,
      "loss": 2.86,
      "step": 191488
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5427610874176025,
      "learning_rate": 4.1248990435572595e-05,
      "loss": 2.8756,
      "step": 191489
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.343255996704102,
      "learning_rate": 4.1246920425213e-05,
      "loss": 2.786,
      "step": 191490
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.783079147338867,
      "learning_rate": 4.124485046296057e-05,
      "loss": 2.9193,
      "step": 191491
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2879209518432617,
      "learning_rate": 4.1242780548815546e-05,
      "loss": 2.8426,
      "step": 191492
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.936070442199707,
      "learning_rate": 4.1240710682778446e-05,
      "loss": 2.8923,
      "step": 191493
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.549574851989746,
      "learning_rate": 4.123864086484949e-05,
      "loss": 2.7831,
      "step": 191494
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8626625537872314,
      "learning_rate": 4.123657109502929e-05,
      "loss": 2.7501,
      "step": 191495
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2557849884033203,
      "learning_rate": 4.1234501373317976e-05,
      "loss": 2.74,
      "step": 191496
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2462456226348877,
      "learning_rate": 4.123243169971614e-05,
      "loss": 2.9104,
      "step": 191497
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.941352367401123,
      "learning_rate": 4.1230362074223986e-05,
      "loss": 2.9345,
      "step": 191498
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.7655529975891113,
      "learning_rate": 4.1228292496842054e-05,
      "loss": 3.0655,
      "step": 191499
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.6121675968170166,
      "learning_rate": 4.1226222967570596e-05,
      "loss": 2.8669,
      "step": 191500
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0575995445251465,
      "learning_rate": 4.122415348641016e-05,
      "loss": 2.7904,
      "step": 191501
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.5281105041503906,
      "learning_rate": 4.122208405336104e-05,
      "loss": 2.8011,
      "step": 191502
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.848240375518799,
      "learning_rate": 4.1220014668423596e-05,
      "loss": 2.7534,
      "step": 191503
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.700920343399048,
      "learning_rate": 4.1217945331598166e-05,
      "loss": 3.0442,
      "step": 191504
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8833916187286377,
      "learning_rate": 4.1215876042885285e-05,
      "loss": 2.804,
      "step": 191505
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5140838623046875,
      "learning_rate": 4.121380680228522e-05,
      "loss": 2.8194,
      "step": 191506
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.824791669845581,
      "learning_rate": 4.121173760979843e-05,
      "loss": 2.9913,
      "step": 191507
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.041893482208252,
      "learning_rate": 4.1209668465425264e-05,
      "loss": 2.9218,
      "step": 191508
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9762399196624756,
      "learning_rate": 4.1207599369166034e-05,
      "loss": 3.1477,
      "step": 191509
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7597479820251465,
      "learning_rate": 4.120553032102125e-05,
      "loss": 2.9817,
      "step": 191510
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.55963134765625,
      "learning_rate": 4.120346132099128e-05,
      "loss": 2.8743,
      "step": 191511
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.229207515716553,
      "learning_rate": 4.120139236907639e-05,
      "loss": 2.8186,
      "step": 191512
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7941436767578125,
      "learning_rate": 4.119932346527711e-05,
      "loss": 2.9247,
      "step": 191513
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2909300327301025,
      "learning_rate": 4.119725460959374e-05,
      "loss": 2.9762,
      "step": 191514
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.621962785720825,
      "learning_rate": 4.119518580202665e-05,
      "loss": 2.7853,
      "step": 191515
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8279616832733154,
      "learning_rate": 4.1193117042576304e-05,
      "loss": 3.0051,
      "step": 191516
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.150771379470825,
      "learning_rate": 4.119104833124307e-05,
      "loss": 2.9065,
      "step": 191517
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.6754167079925537,
      "learning_rate": 4.118897966802721e-05,
      "loss": 2.7508,
      "step": 191518
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5508408546447754,
      "learning_rate": 4.1186911052929294e-05,
      "loss": 2.8323,
      "step": 191519
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.4547839164733887,
      "learning_rate": 4.118484248594962e-05,
      "loss": 2.7537,
      "step": 191520
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1206445693969727,
      "learning_rate": 4.118277396708849e-05,
      "loss": 2.7519,
      "step": 191521
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3541924953460693,
      "learning_rate": 4.118070549634643e-05,
      "loss": 2.9468,
      "step": 191522
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.797215461730957,
      "learning_rate": 4.117863707372369e-05,
      "loss": 3.1967,
      "step": 191523
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.761298179626465,
      "learning_rate": 4.117656869922081e-05,
      "loss": 2.7196,
      "step": 191524
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6345181465148926,
      "learning_rate": 4.117450037283808e-05,
      "loss": 3.2902,
      "step": 191525
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.7457993030548096,
      "learning_rate": 4.1172432094575924e-05,
      "loss": 2.922,
      "step": 191526
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.961649179458618,
      "learning_rate": 4.117036386443461e-05,
      "loss": 3.0828,
      "step": 191527
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.411010980606079,
      "learning_rate": 4.1168295682414665e-05,
      "loss": 2.8127,
      "step": 191528
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8509035110473633,
      "learning_rate": 4.1166227548516364e-05,
      "loss": 3.1445,
      "step": 191529
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.176739692687988,
      "learning_rate": 4.1164159462740234e-05,
      "loss": 2.8736,
      "step": 191530
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9706742763519287,
      "learning_rate": 4.116209142508654e-05,
      "loss": 3.1732,
      "step": 191531
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9182968139648438,
      "learning_rate": 4.116002343555572e-05,
      "loss": 2.7864,
      "step": 191532
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9780120849609375,
      "learning_rate": 4.1157955494148076e-05,
      "loss": 2.8321,
      "step": 191533
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.317035436630249,
      "learning_rate": 4.11558876008641e-05,
      "loss": 2.8794,
      "step": 191534
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5012221336364746,
      "learning_rate": 4.1153819755704063e-05,
      "loss": 2.8378,
      "step": 191535
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3573436737060547,
      "learning_rate": 4.11517519586685e-05,
      "loss": 2.7633,
      "step": 191536
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1047263145446777,
      "learning_rate": 4.114968420975764e-05,
      "loss": 3.0303,
      "step": 191537
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.355454921722412,
      "learning_rate": 4.114761650897208e-05,
      "loss": 2.7513,
      "step": 191538
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.825185775756836,
      "learning_rate": 4.114554885631193e-05,
      "loss": 3.1265,
      "step": 191539
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8873164653778076,
      "learning_rate": 4.114348125177774e-05,
      "loss": 2.7868,
      "step": 191540
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2731778621673584,
      "learning_rate": 4.114141369536983e-05,
      "loss": 2.8841,
      "step": 191541
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.463191509246826,
      "learning_rate": 4.113934618708869e-05,
      "loss": 2.9877,
      "step": 191542
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.334557056427002,
      "learning_rate": 4.113727872693454e-05,
      "loss": 2.9116,
      "step": 191543
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0055925846099854,
      "learning_rate": 4.1135211314908034e-05,
      "loss": 3.0478,
      "step": 191544
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5472140312194824,
      "learning_rate": 4.1133143951009195e-05,
      "loss": 2.8725,
      "step": 191545
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1221415996551514,
      "learning_rate": 4.113107663523869e-05,
      "loss": 2.8367,
      "step": 191546
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3441598415374756,
      "learning_rate": 4.1129009367596724e-05,
      "loss": 2.918,
      "step": 191547
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.031176805496216,
      "learning_rate": 4.112694214808382e-05,
      "loss": 3.1341,
      "step": 191548
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.8894476890563965,
      "learning_rate": 4.112487497670025e-05,
      "loss": 2.8735,
      "step": 191549
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1585536003112793,
      "learning_rate": 4.1122807853446584e-05,
      "loss": 2.7184,
      "step": 191550
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6886258125305176,
      "learning_rate": 4.1120740778322946e-05,
      "loss": 2.7986,
      "step": 191551
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5778284072875977,
      "learning_rate": 4.111867375132991e-05,
      "loss": 2.9317,
      "step": 191552
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5810961723327637,
      "learning_rate": 4.1116606772467744e-05,
      "loss": 2.7733,
      "step": 191553
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9016687870025635,
      "learning_rate": 4.111453984173694e-05,
      "loss": 2.8531,
      "step": 191554
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8739359378814697,
      "learning_rate": 4.1112472959137764e-05,
      "loss": 2.9002,
      "step": 191555
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5267839431762695,
      "learning_rate": 4.111040612467079e-05,
      "loss": 3.0231,
      "step": 191556
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.904449939727783,
      "learning_rate": 4.1108339338336185e-05,
      "loss": 2.788,
      "step": 191557
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9064931869506836,
      "learning_rate": 4.110627260013444e-05,
      "loss": 2.8819,
      "step": 191558
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.855807304382324,
      "learning_rate": 4.110420591006589e-05,
      "loss": 2.9577,
      "step": 191559
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9240221977233887,
      "learning_rate": 4.110213926813104e-05,
      "loss": 3.0533,
      "step": 191560
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.6891748905181885,
      "learning_rate": 4.1100072674330085e-05,
      "loss": 2.9068,
      "step": 191561
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.716390609741211,
      "learning_rate": 4.109800612866366e-05,
      "loss": 2.8658,
      "step": 191562
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3287513256073,
      "learning_rate": 4.1095939631131866e-05,
      "loss": 3.0443,
      "step": 191563
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.952730178833008,
      "learning_rate": 4.1093873181735306e-05,
      "loss": 2.9978,
      "step": 191564
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0884969234466553,
      "learning_rate": 4.1091806780474204e-05,
      "loss": 3.2033,
      "step": 191565
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.914823532104492,
      "learning_rate": 4.108974042734909e-05,
      "loss": 2.7979,
      "step": 191566
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.9117414951324463,
      "learning_rate": 4.1087674122360216e-05,
      "loss": 3.0422,
      "step": 191567
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9839603900909424,
      "learning_rate": 4.108560786550816e-05,
      "loss": 3.1127,
      "step": 191568
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3967597484588623,
      "learning_rate": 4.108354165679304e-05,
      "loss": 3.0003,
      "step": 191569
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7742385864257812,
      "learning_rate": 4.108147549621544e-05,
      "loss": 2.9997,
      "step": 191570
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7467901706695557,
      "learning_rate": 4.107940938377564e-05,
      "loss": 2.7842,
      "step": 191571
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.1052675247192383,
      "learning_rate": 4.1077343319474134e-05,
      "loss": 3.0216,
      "step": 191572
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6010570526123047,
      "learning_rate": 4.107527730331115e-05,
      "loss": 2.9293,
      "step": 191573
    },
    {
      "epoch": 2.49,
      "grad_norm": 4.395142078399658,
      "learning_rate": 4.1073211335287227e-05,
      "loss": 3.1246,
      "step": 191574
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9077064990997314,
      "learning_rate": 4.107114541540266e-05,
      "loss": 3.1406,
      "step": 191575
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.636906385421753,
      "learning_rate": 4.106907954365789e-05,
      "loss": 3.2324,
      "step": 191576
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.4019381999969482,
      "learning_rate": 4.1067013720053175e-05,
      "loss": 2.8605,
      "step": 191577
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7331371307373047,
      "learning_rate": 4.106494794458909e-05,
      "loss": 2.7868,
      "step": 191578
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.293689250946045,
      "learning_rate": 4.106288221726582e-05,
      "loss": 2.86,
      "step": 191579
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.792717456817627,
      "learning_rate": 4.1060816538083916e-05,
      "loss": 3.0084,
      "step": 191580
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6297316551208496,
      "learning_rate": 4.1058750907043704e-05,
      "loss": 2.8907,
      "step": 191581
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0606799125671387,
      "learning_rate": 4.105668532414558e-05,
      "loss": 3.0,
      "step": 191582
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.177180528640747,
      "learning_rate": 4.105461978938981e-05,
      "loss": 2.8345,
      "step": 191583
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5932703018188477,
      "learning_rate": 4.105255430277696e-05,
      "loss": 2.8297,
      "step": 191584
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0618786811828613,
      "learning_rate": 4.105048886430724e-05,
      "loss": 2.8562,
      "step": 191585
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0207741260528564,
      "learning_rate": 4.104842347398121e-05,
      "loss": 2.7811,
      "step": 191586
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5701651573181152,
      "learning_rate": 4.104635813179913e-05,
      "loss": 2.8914,
      "step": 191587
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.540574073791504,
      "learning_rate": 4.1044292837761475e-05,
      "loss": 3.0766,
      "step": 191588
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8509361743927,
      "learning_rate": 4.104222759186847e-05,
      "loss": 2.7845,
      "step": 191589
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.0146708488464355,
      "learning_rate": 4.104016239412066e-05,
      "loss": 3.0236,
      "step": 191590
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.689469337463379,
      "learning_rate": 4.103809724451833e-05,
      "loss": 3.0658,
      "step": 191591
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.2902238368988037,
      "learning_rate": 4.103603214306196e-05,
      "loss": 3.2101,
      "step": 191592
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6669414043426514,
      "learning_rate": 4.1033967089751906e-05,
      "loss": 2.7601,
      "step": 191593
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.123037338256836,
      "learning_rate": 4.1031902084588506e-05,
      "loss": 2.9256,
      "step": 191594
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9307775497436523,
      "learning_rate": 4.10298371275721e-05,
      "loss": 3.0573,
      "step": 191595
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.8826897144317627,
      "learning_rate": 4.102777221870317e-05,
      "loss": 2.9109,
      "step": 191596
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.54198956489563,
      "learning_rate": 4.102570735798203e-05,
      "loss": 2.5127,
      "step": 191597
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.009626626968384,
      "learning_rate": 4.1023642545409176e-05,
      "loss": 3.0514,
      "step": 191598
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.166356086730957,
      "learning_rate": 4.102157778098491e-05,
      "loss": 2.9181,
      "step": 191599
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5723116397857666,
      "learning_rate": 4.1019513064709554e-05,
      "loss": 3.0261,
      "step": 191600
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.042544364929199,
      "learning_rate": 4.101744839658362e-05,
      "loss": 2.8553,
      "step": 191601
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.5841736793518066,
      "learning_rate": 4.10153837766074e-05,
      "loss": 2.8421,
      "step": 191602
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.3461997509002686,
      "learning_rate": 4.10133192047813e-05,
      "loss": 2.9961,
      "step": 191603
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6003644466400146,
      "learning_rate": 4.101125468110575e-05,
      "loss": 2.8116,
      "step": 191604
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.023603916168213,
      "learning_rate": 4.100919020558109e-05,
      "loss": 2.7586,
      "step": 191605
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.480888605117798,
      "learning_rate": 4.100712577820764e-05,
      "loss": 2.785,
      "step": 191606
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7386605739593506,
      "learning_rate": 4.100506139898594e-05,
      "loss": 3.0568,
      "step": 191607
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.995800733566284,
      "learning_rate": 4.100299706791622e-05,
      "loss": 2.8073,
      "step": 191608
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.101362705230713,
      "learning_rate": 4.100093278499899e-05,
      "loss": 2.9403,
      "step": 191609
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.894543409347534,
      "learning_rate": 4.09988685502346e-05,
      "loss": 3.0164,
      "step": 191610
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.7728896141052246,
      "learning_rate": 4.099680436362336e-05,
      "loss": 2.7965,
      "step": 191611
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.833733081817627,
      "learning_rate": 4.099474022516567e-05,
      "loss": 2.9418,
      "step": 191612
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6603012084960938,
      "learning_rate": 4.0992676134862023e-05,
      "loss": 2.8394,
      "step": 191613
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.9906671047210693,
      "learning_rate": 4.099061209271263e-05,
      "loss": 2.8789,
      "step": 191614
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.6943507194519043,
      "learning_rate": 4.098854809871808e-05,
      "loss": 2.6493,
      "step": 191615
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.650503158569336,
      "learning_rate": 4.0986484152878606e-05,
      "loss": 2.9048,
      "step": 191616
    },
    {
      "epoch": 2.49,
      "grad_norm": 3.804611921310425,
      "learning_rate": 4.098442025519465e-05,
      "loss": 3.1543,
      "step": 191617
    },
    {
      "epoch": 2.49,
      "grad_norm": 2.2050466537475586,
      "learning_rate": 4.09823564056665e-05,
      "loss": 2.972,
      "step": 191618
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.058566570281982,
      "learning_rate": 4.098029260429471e-05,
      "loss": 2.8075,
      "step": 191619
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.095719337463379,
      "learning_rate": 4.0978228851079485e-05,
      "loss": 2.8627,
      "step": 191620
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8513095378875732,
      "learning_rate": 4.097616514602138e-05,
      "loss": 2.889,
      "step": 191621
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3711979389190674,
      "learning_rate": 4.097410148912061e-05,
      "loss": 2.8421,
      "step": 191622
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.520798444747925,
      "learning_rate": 4.0972037880377796e-05,
      "loss": 2.7429,
      "step": 191623
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.947535991668701,
      "learning_rate": 4.096997431979305e-05,
      "loss": 3.114,
      "step": 191624
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9428982734680176,
      "learning_rate": 4.096791080736692e-05,
      "loss": 3.0836,
      "step": 191625
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5642218589782715,
      "learning_rate": 4.096584734309967e-05,
      "loss": 2.7817,
      "step": 191626
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.6032347679138184,
      "learning_rate": 4.096378392699182e-05,
      "loss": 2.8952,
      "step": 191627
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6104419231414795,
      "learning_rate": 4.0961720559043655e-05,
      "loss": 2.9223,
      "step": 191628
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.693254232406616,
      "learning_rate": 4.095965723925573e-05,
      "loss": 2.7799,
      "step": 191629
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.683701276779175,
      "learning_rate": 4.095759396762814e-05,
      "loss": 2.8679,
      "step": 191630
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.900761604309082,
      "learning_rate": 4.0955530744161467e-05,
      "loss": 2.8927,
      "step": 191631
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.664855718612671,
      "learning_rate": 4.0953467568856035e-05,
      "loss": 2.9793,
      "step": 191632
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.364987373352051,
      "learning_rate": 4.095140444171228e-05,
      "loss": 2.8514,
      "step": 191633
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1752941608428955,
      "learning_rate": 4.094934136273049e-05,
      "loss": 3.0154,
      "step": 191634
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.141740322113037,
      "learning_rate": 4.094727833191125e-05,
      "loss": 2.97,
      "step": 191635
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.835134744644165,
      "learning_rate": 4.0945215349254645e-05,
      "loss": 3.0171,
      "step": 191636
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.62296986579895,
      "learning_rate": 4.094315241476132e-05,
      "loss": 2.9259,
      "step": 191637
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.5296125411987305,
      "learning_rate": 4.094108952843143e-05,
      "loss": 2.9503,
      "step": 191638
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6175684928894043,
      "learning_rate": 4.093902669026557e-05,
      "loss": 2.8878,
      "step": 191639
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.983424663543701,
      "learning_rate": 4.0936963900264e-05,
      "loss": 2.9034,
      "step": 191640
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7460319995880127,
      "learning_rate": 4.093490115842719e-05,
      "loss": 3.0583,
      "step": 191641
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0365750789642334,
      "learning_rate": 4.093283846475546e-05,
      "loss": 3.0083,
      "step": 191642
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.783691883087158,
      "learning_rate": 4.093077581924919e-05,
      "loss": 2.8825,
      "step": 191643
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9561243057250977,
      "learning_rate": 4.0928713221908736e-05,
      "loss": 3.3445,
      "step": 191644
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.880577325820923,
      "learning_rate": 4.0926650672734586e-05,
      "loss": 2.9516,
      "step": 191645
    },
    {
      "epoch": 2.5,
      "grad_norm": 5.327330589294434,
      "learning_rate": 4.0924588171727004e-05,
      "loss": 2.9805,
      "step": 191646
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1285500526428223,
      "learning_rate": 4.0922525718886486e-05,
      "loss": 2.9103,
      "step": 191647
    },
    {
      "epoch": 2.5,
      "grad_norm": 5.17043924331665,
      "learning_rate": 4.092046331421338e-05,
      "loss": 2.9647,
      "step": 191648
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.270183563232422,
      "learning_rate": 4.091840095770804e-05,
      "loss": 2.7534,
      "step": 191649
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.155819892883301,
      "learning_rate": 4.091633864937077e-05,
      "loss": 2.9872,
      "step": 191650
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9401986598968506,
      "learning_rate": 4.0914276389202135e-05,
      "loss": 2.9558,
      "step": 191651
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.075927257537842,
      "learning_rate": 4.091221417720237e-05,
      "loss": 2.7502,
      "step": 191652
    },
    {
      "epoch": 2.5,
      "grad_norm": 7.553524971008301,
      "learning_rate": 4.0910152013371976e-05,
      "loss": 2.6919,
      "step": 191653
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.173109531402588,
      "learning_rate": 4.090808989771125e-05,
      "loss": 2.7622,
      "step": 191654
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.7358405590057373,
      "learning_rate": 4.090602783022062e-05,
      "loss": 2.8182,
      "step": 191655
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8390872478485107,
      "learning_rate": 4.090396581090036e-05,
      "loss": 2.8729,
      "step": 191656
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.264796018600464,
      "learning_rate": 4.0901903839751036e-05,
      "loss": 2.8776,
      "step": 191657
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8557186126708984,
      "learning_rate": 4.089984191677288e-05,
      "loss": 2.9421,
      "step": 191658
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6912479400634766,
      "learning_rate": 4.089778004196639e-05,
      "loss": 2.9706,
      "step": 191659
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8693530559539795,
      "learning_rate": 4.0895718215331894e-05,
      "loss": 2.8118,
      "step": 191660
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2771198749542236,
      "learning_rate": 4.089365643686977e-05,
      "loss": 3.0968,
      "step": 191661
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3684911727905273,
      "learning_rate": 4.0891594706580346e-05,
      "loss": 2.7699,
      "step": 191662
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.7527413368225098,
      "learning_rate": 4.088953302446411e-05,
      "loss": 3.0617,
      "step": 191663
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.030691623687744,
      "learning_rate": 4.0887471390521354e-05,
      "loss": 2.9308,
      "step": 191664
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.4046361446380615,
      "learning_rate": 4.088540980475259e-05,
      "loss": 2.573,
      "step": 191665
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.955195665359497,
      "learning_rate": 4.088334826715809e-05,
      "loss": 2.944,
      "step": 191666
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.613098382949829,
      "learning_rate": 4.0881286777738286e-05,
      "loss": 3.1511,
      "step": 191667
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.162851572036743,
      "learning_rate": 4.087922533649348e-05,
      "loss": 2.8465,
      "step": 191668
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8496663570404053,
      "learning_rate": 4.087716394342417e-05,
      "loss": 2.8806,
      "step": 191669
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1860592365264893,
      "learning_rate": 4.087510259853063e-05,
      "loss": 2.8572,
      "step": 191670
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.576585292816162,
      "learning_rate": 4.087304130181335e-05,
      "loss": 3.0285,
      "step": 191671
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.424091100692749,
      "learning_rate": 4.087098005327266e-05,
      "loss": 3.0587,
      "step": 191672
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.815119743347168,
      "learning_rate": 4.086891885290897e-05,
      "loss": 2.8548,
      "step": 191673
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.5680670738220215,
      "learning_rate": 4.086685770072254e-05,
      "loss": 2.6755,
      "step": 191674
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8910250663757324,
      "learning_rate": 4.086479659671395e-05,
      "loss": 2.9671,
      "step": 191675
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6661434173583984,
      "learning_rate": 4.0862735540883405e-05,
      "loss": 2.9121,
      "step": 191676
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0991668701171875,
      "learning_rate": 4.086067453323146e-05,
      "loss": 2.7758,
      "step": 191677
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.628947734832764,
      "learning_rate": 4.085861357375838e-05,
      "loss": 3.1351,
      "step": 191678
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.824801921844482,
      "learning_rate": 4.085655266246459e-05,
      "loss": 2.8764,
      "step": 191679
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8747239112854004,
      "learning_rate": 4.085449179935036e-05,
      "loss": 2.9401,
      "step": 191680
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6838219165802,
      "learning_rate": 4.0852430984416295e-05,
      "loss": 3.116,
      "step": 191681
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7437691688537598,
      "learning_rate": 4.0850370217662554e-05,
      "loss": 2.9781,
      "step": 191682
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.885749101638794,
      "learning_rate": 4.084830949908967e-05,
      "loss": 2.9048,
      "step": 191683
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7369298934936523,
      "learning_rate": 4.084624882869801e-05,
      "loss": 3.0475,
      "step": 191684
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7808382511138916,
      "learning_rate": 4.084418820648785e-05,
      "loss": 2.6732,
      "step": 191685
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0122947692871094,
      "learning_rate": 4.084212763245971e-05,
      "loss": 2.6765,
      "step": 191686
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3976621627807617,
      "learning_rate": 4.0840067106613894e-05,
      "loss": 2.6799,
      "step": 191687
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.930652618408203,
      "learning_rate": 4.083800662895077e-05,
      "loss": 3.0451,
      "step": 191688
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.611138105392456,
      "learning_rate": 4.08359461994708e-05,
      "loss": 3.0777,
      "step": 191689
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.938448429107666,
      "learning_rate": 4.083388581817429e-05,
      "loss": 2.7692,
      "step": 191690
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.731781005859375,
      "learning_rate": 4.083182548506164e-05,
      "loss": 2.6529,
      "step": 191691
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.626521587371826,
      "learning_rate": 4.082976520013328e-05,
      "loss": 2.7526,
      "step": 191692
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.51613187789917,
      "learning_rate": 4.082770496338957e-05,
      "loss": 2.6168,
      "step": 191693
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6515464782714844,
      "learning_rate": 4.082564477483083e-05,
      "loss": 2.7294,
      "step": 191694
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.291147708892822,
      "learning_rate": 4.082358463445753e-05,
      "loss": 3.1072,
      "step": 191695
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0586612224578857,
      "learning_rate": 4.0821524542270026e-05,
      "loss": 3.152,
      "step": 191696
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9454023838043213,
      "learning_rate": 4.081946449826864e-05,
      "loss": 3.0723,
      "step": 191697
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6883177757263184,
      "learning_rate": 4.081740450245384e-05,
      "loss": 3.1173,
      "step": 191698
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.580310583114624,
      "learning_rate": 4.081534455482597e-05,
      "loss": 2.8855,
      "step": 191699
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6513068675994873,
      "learning_rate": 4.081328465538545e-05,
      "loss": 3.1048,
      "step": 191700
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6239359378814697,
      "learning_rate": 4.081122480413261e-05,
      "loss": 2.7992,
      "step": 191701
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.464977741241455,
      "learning_rate": 4.08091650010679e-05,
      "loss": 2.9813,
      "step": 191702
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.558767318725586,
      "learning_rate": 4.0807105246191574e-05,
      "loss": 3.0499,
      "step": 191703
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.008917808532715,
      "learning_rate": 4.080504553950417e-05,
      "loss": 2.7853,
      "step": 191704
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0810275077819824,
      "learning_rate": 4.080298588100591e-05,
      "loss": 2.7863,
      "step": 191705
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3939146995544434,
      "learning_rate": 4.080092627069737e-05,
      "loss": 3.0269,
      "step": 191706
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.529162883758545,
      "learning_rate": 4.079886670857876e-05,
      "loss": 2.828,
      "step": 191707
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.864016532897949,
      "learning_rate": 4.079680719465059e-05,
      "loss": 2.9372,
      "step": 191708
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.588221788406372,
      "learning_rate": 4.079474772891318e-05,
      "loss": 2.8922,
      "step": 191709
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.855788469314575,
      "learning_rate": 4.079268831136692e-05,
      "loss": 2.966,
      "step": 191710
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.108585834503174,
      "learning_rate": 4.079062894201214e-05,
      "loss": 3.157,
      "step": 191711
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.118220329284668,
      "learning_rate": 4.078856962084931e-05,
      "loss": 2.6551,
      "step": 191712
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.125768661499023,
      "learning_rate": 4.078651034787873e-05,
      "loss": 2.9926,
      "step": 191713
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0908589363098145,
      "learning_rate": 4.0784451123100906e-05,
      "loss": 3.0048,
      "step": 191714
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7940673828125,
      "learning_rate": 4.078239194651616e-05,
      "loss": 2.7483,
      "step": 191715
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6568126678466797,
      "learning_rate": 4.0780332818124825e-05,
      "loss": 2.7421,
      "step": 191716
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.647137403488159,
      "learning_rate": 4.077827373792728e-05,
      "loss": 2.9114,
      "step": 191717
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7131905555725098,
      "learning_rate": 4.077621470592398e-05,
      "loss": 2.8788,
      "step": 191718
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.538919448852539,
      "learning_rate": 4.077415572211523e-05,
      "loss": 2.956,
      "step": 191719
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5544261932373047,
      "learning_rate": 4.077209678650153e-05,
      "loss": 2.7515,
      "step": 191720
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7009003162384033,
      "learning_rate": 4.077003789908321e-05,
      "loss": 2.797,
      "step": 191721
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7382123470306396,
      "learning_rate": 4.07679790598606e-05,
      "loss": 3.0353,
      "step": 191722
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8102262020111084,
      "learning_rate": 4.076592026883404e-05,
      "loss": 2.8486,
      "step": 191723
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7193048000335693,
      "learning_rate": 4.076386152600409e-05,
      "loss": 3.079,
      "step": 191724
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.289365291595459,
      "learning_rate": 4.076180283137096e-05,
      "loss": 2.7207,
      "step": 191725
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.550732374191284,
      "learning_rate": 4.075974418493514e-05,
      "loss": 3.0615,
      "step": 191726
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6559388637542725,
      "learning_rate": 4.0757685586696995e-05,
      "loss": 2.6583,
      "step": 191727
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.6376166343688965,
      "learning_rate": 4.07556270366569e-05,
      "loss": 2.9922,
      "step": 191728
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.017699718475342,
      "learning_rate": 4.0753568534815154e-05,
      "loss": 2.9676,
      "step": 191729
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2698569297790527,
      "learning_rate": 4.075151008117228e-05,
      "loss": 2.867,
      "step": 191730
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.685523509979248,
      "learning_rate": 4.0749451675728515e-05,
      "loss": 2.6342,
      "step": 191731
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1596627235412598,
      "learning_rate": 4.07473933184844e-05,
      "loss": 2.7579,
      "step": 191732
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.124204397201538,
      "learning_rate": 4.074533500944023e-05,
      "loss": 3.0373,
      "step": 191733
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6271777153015137,
      "learning_rate": 4.07432767485964e-05,
      "loss": 2.9467,
      "step": 191734
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.421792984008789,
      "learning_rate": 4.0741218535953245e-05,
      "loss": 2.7606,
      "step": 191735
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.770942211151123,
      "learning_rate": 4.0739160371511234e-05,
      "loss": 2.9083,
      "step": 191736
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.4536898136138916,
      "learning_rate": 4.0737102255270637e-05,
      "loss": 3.1975,
      "step": 191737
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.460442066192627,
      "learning_rate": 4.073504418723198e-05,
      "loss": 3.149,
      "step": 191738
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.166243314743042,
      "learning_rate": 4.073298616739556e-05,
      "loss": 3.0476,
      "step": 191739
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5971453189849854,
      "learning_rate": 4.073092819576179e-05,
      "loss": 2.8744,
      "step": 191740
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.655595302581787,
      "learning_rate": 4.072887027233096e-05,
      "loss": 2.6472,
      "step": 191741
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2268126010894775,
      "learning_rate": 4.0726812397103605e-05,
      "loss": 3.12,
      "step": 191742
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.946906089782715,
      "learning_rate": 4.0724754570079954e-05,
      "loss": 2.8075,
      "step": 191743
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.893566131591797,
      "learning_rate": 4.072269679126054e-05,
      "loss": 3.1601,
      "step": 191744
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.368107318878174,
      "learning_rate": 4.0720639060645646e-05,
      "loss": 2.9589,
      "step": 191745
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5891971588134766,
      "learning_rate": 4.071858137823571e-05,
      "loss": 2.9702,
      "step": 191746
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2692832946777344,
      "learning_rate": 4.0716523744030995e-05,
      "loss": 3.0914,
      "step": 191747
    },
    {
      "epoch": 2.5,
      "grad_norm": 6.710972785949707,
      "learning_rate": 4.0714466158032046e-05,
      "loss": 3.0227,
      "step": 191748
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.902902126312256,
      "learning_rate": 4.07124086202391e-05,
      "loss": 3.0247,
      "step": 191749
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6921751499176025,
      "learning_rate": 4.07103511306527e-05,
      "loss": 2.8302,
      "step": 191750
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2811200618743896,
      "learning_rate": 4.070829368927313e-05,
      "loss": 2.9234,
      "step": 191751
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8997228145599365,
      "learning_rate": 4.070623629610077e-05,
      "loss": 3.0872,
      "step": 191752
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3570728302001953,
      "learning_rate": 4.070417895113598e-05,
      "loss": 3.1725,
      "step": 191753
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.872842311859131,
      "learning_rate": 4.0702121654379195e-05,
      "loss": 2.8835,
      "step": 191754
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.959479331970215,
      "learning_rate": 4.0700064405830746e-05,
      "loss": 2.8393,
      "step": 191755
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.4840948581695557,
      "learning_rate": 4.06980072054911e-05,
      "loss": 2.9305,
      "step": 191756
    },
    {
      "epoch": 2.5,
      "grad_norm": 5.090083122253418,
      "learning_rate": 4.0695950053360624e-05,
      "loss": 2.952,
      "step": 191757
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0055809020996094,
      "learning_rate": 4.069389294943962e-05,
      "loss": 2.9891,
      "step": 191758
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8856778144836426,
      "learning_rate": 4.069183589372849e-05,
      "loss": 2.8356,
      "step": 191759
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1176822185516357,
      "learning_rate": 4.068977888622769e-05,
      "loss": 2.7762,
      "step": 191760
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.4246954917907715,
      "learning_rate": 4.0687721926937486e-05,
      "loss": 2.8461,
      "step": 191761
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.927438497543335,
      "learning_rate": 4.0685665015858395e-05,
      "loss": 2.9758,
      "step": 191762
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.3079888820648193,
      "learning_rate": 4.068360815299073e-05,
      "loss": 2.9697,
      "step": 191763
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7368412017822266,
      "learning_rate": 4.06815513383349e-05,
      "loss": 2.7076,
      "step": 191764
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.660083532333374,
      "learning_rate": 4.067949457189118e-05,
      "loss": 2.9252,
      "step": 191765
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3379509449005127,
      "learning_rate": 4.067743785366009e-05,
      "loss": 3.015,
      "step": 191766
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.016371250152588,
      "learning_rate": 4.067538118364193e-05,
      "loss": 2.8646,
      "step": 191767
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.8980722427368164,
      "learning_rate": 4.067332456183714e-05,
      "loss": 2.9992,
      "step": 191768
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.840953826904297,
      "learning_rate": 4.067126798824601e-05,
      "loss": 2.7194,
      "step": 191769
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5218329429626465,
      "learning_rate": 4.0669211462869086e-05,
      "loss": 2.7158,
      "step": 191770
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3429434299468994,
      "learning_rate": 4.066715498570663e-05,
      "loss": 3.1431,
      "step": 191771
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.635160446166992,
      "learning_rate": 4.066509855675904e-05,
      "loss": 2.9242,
      "step": 191772
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1173181533813477,
      "learning_rate": 4.066304217602664e-05,
      "loss": 2.9145,
      "step": 191773
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.4508249759674072,
      "learning_rate": 4.0660985843509976e-05,
      "loss": 2.9783,
      "step": 191774
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.118910551071167,
      "learning_rate": 4.065892955920921e-05,
      "loss": 3.0436,
      "step": 191775
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.551306962966919,
      "learning_rate": 4.0656873323124936e-05,
      "loss": 3.0507,
      "step": 191776
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.4681661128997803,
      "learning_rate": 4.065481713525747e-05,
      "loss": 2.9416,
      "step": 191777
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.6161653995513916,
      "learning_rate": 4.065276099560712e-05,
      "loss": 2.9524,
      "step": 191778
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8793063163757324,
      "learning_rate": 4.065070490417428e-05,
      "loss": 3.0842,
      "step": 191779
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6834421157836914,
      "learning_rate": 4.064864886095943e-05,
      "loss": 2.8451,
      "step": 191780
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3778719902038574,
      "learning_rate": 4.0646592865962815e-05,
      "loss": 2.8961,
      "step": 191781
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5830037593841553,
      "learning_rate": 4.064453691918499e-05,
      "loss": 2.6696,
      "step": 191782
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.759810447692871,
      "learning_rate": 4.0642481020626225e-05,
      "loss": 2.8965,
      "step": 191783
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.4832992553710938,
      "learning_rate": 4.0640425170286826e-05,
      "loss": 2.9111,
      "step": 191784
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.656268835067749,
      "learning_rate": 4.063836936816736e-05,
      "loss": 2.9944,
      "step": 191785
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8710107803344727,
      "learning_rate": 4.0636313614268115e-05,
      "loss": 3.0455,
      "step": 191786
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.078420877456665,
      "learning_rate": 4.063425790858943e-05,
      "loss": 2.9118,
      "step": 191787
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.441955804824829,
      "learning_rate": 4.0632202251131776e-05,
      "loss": 2.915,
      "step": 191788
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.903855323791504,
      "learning_rate": 4.063014664189548e-05,
      "loss": 2.8834,
      "step": 191789
    },
    {
      "epoch": 2.5,
      "grad_norm": 5.110413074493408,
      "learning_rate": 4.062809108088087e-05,
      "loss": 2.9956,
      "step": 191790
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.443563938140869,
      "learning_rate": 4.06260355680885e-05,
      "loss": 2.9508,
      "step": 191791
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.750837802886963,
      "learning_rate": 4.062398010351855e-05,
      "loss": 3.2036,
      "step": 191792
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1155343055725098,
      "learning_rate": 4.0621924687171556e-05,
      "loss": 2.8834,
      "step": 191793
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.257212162017822,
      "learning_rate": 4.061986931904786e-05,
      "loss": 2.6254,
      "step": 191794
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.613936424255371,
      "learning_rate": 4.061781399914782e-05,
      "loss": 2.9513,
      "step": 191795
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.3762712478637695,
      "learning_rate": 4.0615758727471736e-05,
      "loss": 3.2042,
      "step": 191796
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.108413219451904,
      "learning_rate": 4.061370350402018e-05,
      "loss": 3.0376,
      "step": 191797
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.503014087677002,
      "learning_rate": 4.061164832879334e-05,
      "loss": 2.899,
      "step": 191798
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.491417646408081,
      "learning_rate": 4.060959320179177e-05,
      "loss": 2.9609,
      "step": 191799
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.739128351211548,
      "learning_rate": 4.060753812301578e-05,
      "loss": 3.0452,
      "step": 191800
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.4782304763793945,
      "learning_rate": 4.060548309246575e-05,
      "loss": 3.0318,
      "step": 191801
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.7386744022369385,
      "learning_rate": 4.060342811014198e-05,
      "loss": 2.991,
      "step": 191802
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.275408983230591,
      "learning_rate": 4.0601373176045e-05,
      "loss": 2.9994,
      "step": 191803
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.7247464656829834,
      "learning_rate": 4.0599318290175035e-05,
      "loss": 2.8611,
      "step": 191804
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3715755939483643,
      "learning_rate": 4.059726345253263e-05,
      "loss": 2.9343,
      "step": 191805
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.676558017730713,
      "learning_rate": 4.059520866311811e-05,
      "loss": 2.9193,
      "step": 191806
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5895323753356934,
      "learning_rate": 4.059315392193181e-05,
      "loss": 3.0216,
      "step": 191807
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8556647300720215,
      "learning_rate": 4.05910992289741e-05,
      "loss": 2.779,
      "step": 191808
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.651063442230225,
      "learning_rate": 4.058904458424544e-05,
      "loss": 2.7958,
      "step": 191809
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.4985287189483643,
      "learning_rate": 4.058698998774611e-05,
      "loss": 3.1752,
      "step": 191810
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.3783745765686035,
      "learning_rate": 4.058493543947663e-05,
      "loss": 3.2678,
      "step": 191811
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0574100017547607,
      "learning_rate": 4.0582880939437336e-05,
      "loss": 2.9755,
      "step": 191812
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7851457595825195,
      "learning_rate": 4.058082648762853e-05,
      "loss": 3.0688,
      "step": 191813
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.5518798828125,
      "learning_rate": 4.057877208405061e-05,
      "loss": 3.0783,
      "step": 191814
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.017758846282959,
      "learning_rate": 4.0576717728704065e-05,
      "loss": 2.9594,
      "step": 191815
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.354557514190674,
      "learning_rate": 4.0574663421589116e-05,
      "loss": 2.9732,
      "step": 191816
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.911872148513794,
      "learning_rate": 4.057260916270632e-05,
      "loss": 2.8485,
      "step": 191817
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.3824844360351562,
      "learning_rate": 4.057055495205597e-05,
      "loss": 2.9087,
      "step": 191818
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.3259782791137695,
      "learning_rate": 4.056850078963844e-05,
      "loss": 2.9353,
      "step": 191819
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6513378620147705,
      "learning_rate": 4.0566446675454055e-05,
      "loss": 3.0263,
      "step": 191820
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.37274169921875,
      "learning_rate": 4.0564392609503325e-05,
      "loss": 2.8215,
      "step": 191821
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.2412824630737305,
      "learning_rate": 4.0562338591786514e-05,
      "loss": 2.8796,
      "step": 191822
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.588778495788574,
      "learning_rate": 4.056028462230415e-05,
      "loss": 2.906,
      "step": 191823
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.614138603210449,
      "learning_rate": 4.0558230701056505e-05,
      "loss": 2.6563,
      "step": 191824
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3533291816711426,
      "learning_rate": 4.055617682804397e-05,
      "loss": 2.7825,
      "step": 191825
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.5280776023864746,
      "learning_rate": 4.055412300326689e-05,
      "loss": 2.919,
      "step": 191826
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0528578758239746,
      "learning_rate": 4.055206922672576e-05,
      "loss": 3.0204,
      "step": 191827
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5571985244750977,
      "learning_rate": 4.055001549842084e-05,
      "loss": 2.8775,
      "step": 191828
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.829512596130371,
      "learning_rate": 4.0547961818352634e-05,
      "loss": 3.2241,
      "step": 191829
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.885084867477417,
      "learning_rate": 4.054590818652145e-05,
      "loss": 2.9193,
      "step": 191830
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.2466888427734375,
      "learning_rate": 4.054385460292767e-05,
      "loss": 3.0739,
      "step": 191831
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.415321111679077,
      "learning_rate": 4.054180106757164e-05,
      "loss": 2.9591,
      "step": 191832
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8646321296691895,
      "learning_rate": 4.053974758045386e-05,
      "loss": 3.0668,
      "step": 191833
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8084089756011963,
      "learning_rate": 4.0537694141574564e-05,
      "loss": 2.9021,
      "step": 191834
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.358754873275757,
      "learning_rate": 4.053564075093424e-05,
      "loss": 2.7679,
      "step": 191835
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3779730796813965,
      "learning_rate": 4.05335874085332e-05,
      "loss": 2.9657,
      "step": 191836
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.170416831970215,
      "learning_rate": 4.0531534114372e-05,
      "loss": 2.9369,
      "step": 191837
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7886300086975098,
      "learning_rate": 4.052948086845075e-05,
      "loss": 2.8936,
      "step": 191838
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.641523599624634,
      "learning_rate": 4.052742767077004e-05,
      "loss": 2.8122,
      "step": 191839
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.517468214035034,
      "learning_rate": 4.052537452133011e-05,
      "loss": 2.9203,
      "step": 191840
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.5435128211975098,
      "learning_rate": 4.052332142013146e-05,
      "loss": 3.1932,
      "step": 191841
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.500906467437744,
      "learning_rate": 4.052126836717438e-05,
      "loss": 2.6798,
      "step": 191842
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.906298875808716,
      "learning_rate": 4.051921536245942e-05,
      "loss": 3.0626,
      "step": 191843
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.354414939880371,
      "learning_rate": 4.051716240598667e-05,
      "loss": 2.8575,
      "step": 191844
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7157974243164062,
      "learning_rate": 4.051510949775679e-05,
      "loss": 2.8582,
      "step": 191845
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.5951428413391113,
      "learning_rate": 4.051305663776996e-05,
      "loss": 3.0064,
      "step": 191846
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.113020658493042,
      "learning_rate": 4.0511003826026734e-05,
      "loss": 2.6048,
      "step": 191847
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.036557197570801,
      "learning_rate": 4.0508951062527315e-05,
      "loss": 2.7669,
      "step": 191848
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2559428215026855,
      "learning_rate": 4.0506898347272346e-05,
      "loss": 3.0916,
      "step": 191849
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.195190191268921,
      "learning_rate": 4.050484568026188e-05,
      "loss": 2.8467,
      "step": 191850
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.984341859817505,
      "learning_rate": 4.050279306149655e-05,
      "loss": 3.094,
      "step": 191851
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0256824493408203,
      "learning_rate": 4.050074049097657e-05,
      "loss": 2.9408,
      "step": 191852
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3488259315490723,
      "learning_rate": 4.049868796870249e-05,
      "loss": 2.6881,
      "step": 191853
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5797924995422363,
      "learning_rate": 4.049663549467452e-05,
      "loss": 2.8918,
      "step": 191854
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8321380615234375,
      "learning_rate": 4.049458306889326e-05,
      "loss": 2.7811,
      "step": 191855
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6563541889190674,
      "learning_rate": 4.04925306913588e-05,
      "loss": 2.938,
      "step": 191856
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.630664348602295,
      "learning_rate": 4.0490478362071756e-05,
      "loss": 2.8638,
      "step": 191857
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0452616214752197,
      "learning_rate": 4.048842608103238e-05,
      "loss": 3.0467,
      "step": 191858
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.4001739025115967,
      "learning_rate": 4.0486373848241174e-05,
      "loss": 2.9467,
      "step": 191859
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.476780652999878,
      "learning_rate": 4.048432166369837e-05,
      "loss": 2.8835,
      "step": 191860
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5080666542053223,
      "learning_rate": 4.0482269527404474e-05,
      "loss": 2.9272,
      "step": 191861
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7674241065979004,
      "learning_rate": 4.048021743935985e-05,
      "loss": 3.0045,
      "step": 191862
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.4352736473083496,
      "learning_rate": 4.047816539956487e-05,
      "loss": 2.9372,
      "step": 191863
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.879572868347168,
      "learning_rate": 4.047611340801978e-05,
      "loss": 2.9149,
      "step": 191864
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9304778575897217,
      "learning_rate": 4.04740614647252e-05,
      "loss": 2.8798,
      "step": 191865
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8672306537628174,
      "learning_rate": 4.04720095696813e-05,
      "loss": 3.0205,
      "step": 191866
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2726223468780518,
      "learning_rate": 4.046995772288863e-05,
      "loss": 2.7964,
      "step": 191867
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7658376693725586,
      "learning_rate": 4.046790592434746e-05,
      "loss": 3.1142,
      "step": 191868
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.105262041091919,
      "learning_rate": 4.046585417405815e-05,
      "loss": 2.8628,
      "step": 191869
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.789905309677124,
      "learning_rate": 4.046380247202122e-05,
      "loss": 2.9351,
      "step": 191870
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.9293079376220703,
      "learning_rate": 4.046175081823696e-05,
      "loss": 2.9311,
      "step": 191871
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.4869847297668457,
      "learning_rate": 4.04596992127057e-05,
      "loss": 2.9601,
      "step": 191872
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.300194501876831,
      "learning_rate": 4.045764765542794e-05,
      "loss": 3.0893,
      "step": 191873
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7444803714752197,
      "learning_rate": 4.0455596146404016e-05,
      "loss": 2.912,
      "step": 191874
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.597358226776123,
      "learning_rate": 4.045354468563422e-05,
      "loss": 2.7407,
      "step": 191875
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2660622596740723,
      "learning_rate": 4.045149327311906e-05,
      "loss": 3.1225,
      "step": 191876
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.8276798725128174,
      "learning_rate": 4.0449441908858903e-05,
      "loss": 2.8123,
      "step": 191877
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.50331711769104,
      "learning_rate": 4.0447390592854014e-05,
      "loss": 3.007,
      "step": 191878
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8411142826080322,
      "learning_rate": 4.044533932510488e-05,
      "loss": 2.9294,
      "step": 191879
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6435837745666504,
      "learning_rate": 4.044328810561189e-05,
      "loss": 2.9803,
      "step": 191880
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3220648765563965,
      "learning_rate": 4.044123693437533e-05,
      "loss": 3.0173,
      "step": 191881
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.069868564605713,
      "learning_rate": 4.043918581139573e-05,
      "loss": 3.2306,
      "step": 191882
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.9505615234375,
      "learning_rate": 4.0437134736673274e-05,
      "loss": 3.0064,
      "step": 191883
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5510826110839844,
      "learning_rate": 4.043508371020854e-05,
      "loss": 2.8141,
      "step": 191884
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.85573148727417,
      "learning_rate": 4.043303273200183e-05,
      "loss": 3.0617,
      "step": 191885
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1323494911193848,
      "learning_rate": 4.0430981802053534e-05,
      "loss": 2.9669,
      "step": 191886
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7103588581085205,
      "learning_rate": 4.042893092036392e-05,
      "loss": 2.7974,
      "step": 191887
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.382127285003662,
      "learning_rate": 4.042688008693354e-05,
      "loss": 2.7174,
      "step": 191888
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.736518621444702,
      "learning_rate": 4.042482930176266e-05,
      "loss": 2.8357,
      "step": 191889
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0587615966796875,
      "learning_rate": 4.042277856485174e-05,
      "loss": 2.7822,
      "step": 191890
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.7204737663269043,
      "learning_rate": 4.0420727876201155e-05,
      "loss": 2.9543,
      "step": 191891
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7367990016937256,
      "learning_rate": 4.0418677235811225e-05,
      "loss": 2.7785,
      "step": 191892
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1727468967437744,
      "learning_rate": 4.0416626643682325e-05,
      "loss": 2.8745,
      "step": 191893
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.756425142288208,
      "learning_rate": 4.0414576099814924e-05,
      "loss": 2.9435,
      "step": 191894
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.923525810241699,
      "learning_rate": 4.041252560420928e-05,
      "loss": 2.7959,
      "step": 191895
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2095389366149902,
      "learning_rate": 4.041047515686593e-05,
      "loss": 2.9299,
      "step": 191896
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7134156227111816,
      "learning_rate": 4.0408424757785175e-05,
      "loss": 2.7085,
      "step": 191897
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8445746898651123,
      "learning_rate": 4.0406374406967376e-05,
      "loss": 2.6979,
      "step": 191898
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.5540902614593506,
      "learning_rate": 4.040432410441288e-05,
      "loss": 2.9097,
      "step": 191899
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.176877737045288,
      "learning_rate": 4.040227385012217e-05,
      "loss": 2.879,
      "step": 191900
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9195432662963867,
      "learning_rate": 4.040022364409552e-05,
      "loss": 2.9586,
      "step": 191901
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.182931900024414,
      "learning_rate": 4.039817348633343e-05,
      "loss": 2.962,
      "step": 191902
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.3995535373687744,
      "learning_rate": 4.039612337683616e-05,
      "loss": 2.7685,
      "step": 191903
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.040146589279175,
      "learning_rate": 4.039407331560429e-05,
      "loss": 3.0887,
      "step": 191904
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.5762994289398193,
      "learning_rate": 4.0392023302637945e-05,
      "loss": 2.8262,
      "step": 191905
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5950918197631836,
      "learning_rate": 4.0389973337937656e-05,
      "loss": 2.835,
      "step": 191906
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.664211750030518,
      "learning_rate": 4.038792342150372e-05,
      "loss": 2.9944,
      "step": 191907
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.7775468826293945,
      "learning_rate": 4.038587355333665e-05,
      "loss": 3.0629,
      "step": 191908
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.236186504364014,
      "learning_rate": 4.038382373343666e-05,
      "loss": 2.8631,
      "step": 191909
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.625418186187744,
      "learning_rate": 4.0381773961804366e-05,
      "loss": 2.9814,
      "step": 191910
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.4974451065063477,
      "learning_rate": 4.0379724238439823e-05,
      "loss": 2.9601,
      "step": 191911
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.522128105163574,
      "learning_rate": 4.037767456334371e-05,
      "loss": 3.0811,
      "step": 191912
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.630664587020874,
      "learning_rate": 4.037562493651622e-05,
      "loss": 3.1169,
      "step": 191913
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.810310125350952,
      "learning_rate": 4.0373575357957845e-05,
      "loss": 2.9184,
      "step": 191914
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1363725662231445,
      "learning_rate": 4.037152582766886e-05,
      "loss": 2.9124,
      "step": 191915
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1132779121398926,
      "learning_rate": 4.0369476345649834e-05,
      "loss": 2.9214,
      "step": 191916
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.493250846862793,
      "learning_rate": 4.036742691190089e-05,
      "loss": 3.0131,
      "step": 191917
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7925078868865967,
      "learning_rate": 4.036537752642264e-05,
      "loss": 3.0227,
      "step": 191918
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.6514430046081543,
      "learning_rate": 4.036332818921527e-05,
      "loss": 2.9508,
      "step": 191919
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.3240203857421875,
      "learning_rate": 4.036127890027936e-05,
      "loss": 2.8774,
      "step": 191920
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8936703205108643,
      "learning_rate": 4.035922965961507e-05,
      "loss": 2.8437,
      "step": 191921
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.052642822265625,
      "learning_rate": 4.035718046722306e-05,
      "loss": 2.9968,
      "step": 191922
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.035238265991211,
      "learning_rate": 4.035513132310344e-05,
      "loss": 2.7961,
      "step": 191923
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.699583053588867,
      "learning_rate": 4.035308222725674e-05,
      "loss": 3.1006,
      "step": 191924
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6972122192382812,
      "learning_rate": 4.035103317968326e-05,
      "loss": 2.8908,
      "step": 191925
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7974796295166016,
      "learning_rate": 4.034898418038346e-05,
      "loss": 2.7184,
      "step": 191926
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.2495312690734863,
      "learning_rate": 4.0346935229357644e-05,
      "loss": 3.1127,
      "step": 191927
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6246275901794434,
      "learning_rate": 4.034488632660635e-05,
      "loss": 2.9972,
      "step": 191928
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0018057823181152,
      "learning_rate": 4.034283747212974e-05,
      "loss": 2.9478,
      "step": 191929
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.483645915985107,
      "learning_rate": 4.034078866592832e-05,
      "loss": 2.9852,
      "step": 191930
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1747326850891113,
      "learning_rate": 4.0338739908002406e-05,
      "loss": 2.9409,
      "step": 191931
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.139626979827881,
      "learning_rate": 4.0336691198352475e-05,
      "loss": 3.0486,
      "step": 191932
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.831554651260376,
      "learning_rate": 4.0334642536978834e-05,
      "loss": 3.2015,
      "step": 191933
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.5283796787261963,
      "learning_rate": 4.033259392388197e-05,
      "loss": 2.7743,
      "step": 191934
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.4669580459594727,
      "learning_rate": 4.033054535906206e-05,
      "loss": 3.0168,
      "step": 191935
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.62392520904541,
      "learning_rate": 4.0328496842519665e-05,
      "loss": 2.94,
      "step": 191936
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0309085845947266,
      "learning_rate": 4.032644837425505e-05,
      "loss": 3.2374,
      "step": 191937
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.769949197769165,
      "learning_rate": 4.0324399954268715e-05,
      "loss": 3.0,
      "step": 191938
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.2979934215545654,
      "learning_rate": 4.0322351582560894e-05,
      "loss": 3.0302,
      "step": 191939
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.561218500137329,
      "learning_rate": 4.032030325913219e-05,
      "loss": 2.6939,
      "step": 191940
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.229097604751587,
      "learning_rate": 4.03182549839827e-05,
      "loss": 2.9688,
      "step": 191941
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6452815532684326,
      "learning_rate": 4.031620675711302e-05,
      "loss": 3.02,
      "step": 191942
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.969513416290283,
      "learning_rate": 4.031415857852343e-05,
      "loss": 3.0702,
      "step": 191943
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6046364307403564,
      "learning_rate": 4.031211044821437e-05,
      "loss": 2.843,
      "step": 191944
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.7175955772399902,
      "learning_rate": 4.0310062366186134e-05,
      "loss": 2.9603,
      "step": 191945
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.643249750137329,
      "learning_rate": 4.030801433243921e-05,
      "loss": 3.0285,
      "step": 191946
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.485698699951172,
      "learning_rate": 4.0305966346973925e-05,
      "loss": 3.1034,
      "step": 191947
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7830913066864014,
      "learning_rate": 4.030391840979069e-05,
      "loss": 2.8023,
      "step": 191948
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.592087984085083,
      "learning_rate": 4.030187052088977e-05,
      "loss": 2.6978,
      "step": 191949
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.33427095413208,
      "learning_rate": 4.029982268027172e-05,
      "loss": 2.8164,
      "step": 191950
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.6032190322875977,
      "learning_rate": 4.029777488793675e-05,
      "loss": 2.7723,
      "step": 191951
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9007017612457275,
      "learning_rate": 4.029572714388543e-05,
      "loss": 2.7904,
      "step": 191952
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.234353542327881,
      "learning_rate": 4.0293679448117976e-05,
      "loss": 2.9123,
      "step": 191953
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6472818851470947,
      "learning_rate": 4.0291631800634804e-05,
      "loss": 3.0278,
      "step": 191954
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.666494846343994,
      "learning_rate": 4.028958420143638e-05,
      "loss": 2.9928,
      "step": 191955
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.619412660598755,
      "learning_rate": 4.028753665052302e-05,
      "loss": 2.7356,
      "step": 191956
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7745800018310547,
      "learning_rate": 4.0285489147895045e-05,
      "loss": 2.9768,
      "step": 191957
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.8478119373321533,
      "learning_rate": 4.0283441693552976e-05,
      "loss": 2.9301,
      "step": 191958
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6135976314544678,
      "learning_rate": 4.028139428749712e-05,
      "loss": 2.9755,
      "step": 191959
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.100644111633301,
      "learning_rate": 4.027934692972776e-05,
      "loss": 3.0435,
      "step": 191960
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.4277541637420654,
      "learning_rate": 4.027729962024545e-05,
      "loss": 2.8236,
      "step": 191961
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0808587074279785,
      "learning_rate": 4.0275252359050516e-05,
      "loss": 2.9997,
      "step": 191962
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.166795015335083,
      "learning_rate": 4.0273205146143215e-05,
      "loss": 2.8143,
      "step": 191963
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.912277936935425,
      "learning_rate": 4.0271157981524124e-05,
      "loss": 3.1285,
      "step": 191964
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.770521402359009,
      "learning_rate": 4.026911086519351e-05,
      "loss": 3.0604,
      "step": 191965
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.080670118331909,
      "learning_rate": 4.02670637971517e-05,
      "loss": 3.2013,
      "step": 191966
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6924688816070557,
      "learning_rate": 4.026501677739923e-05,
      "loss": 2.9086,
      "step": 191967
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2505249977111816,
      "learning_rate": 4.026296980593633e-05,
      "loss": 2.8443,
      "step": 191968
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.594221591949463,
      "learning_rate": 4.02609228827635e-05,
      "loss": 2.7954,
      "step": 191969
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.922495126724243,
      "learning_rate": 4.025887600788108e-05,
      "loss": 3.1524,
      "step": 191970
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0468242168426514,
      "learning_rate": 4.025682918128943e-05,
      "loss": 2.9657,
      "step": 191971
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3076822757720947,
      "learning_rate": 4.025478240298885e-05,
      "loss": 3.0201,
      "step": 191972
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5409364700317383,
      "learning_rate": 4.0252735672979905e-05,
      "loss": 2.7364,
      "step": 191973
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8549880981445312,
      "learning_rate": 4.02506889912628e-05,
      "loss": 2.7891,
      "step": 191974
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9042046070098877,
      "learning_rate": 4.024864235783807e-05,
      "loss": 2.8852,
      "step": 191975
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.4952690601348877,
      "learning_rate": 4.0246595772706004e-05,
      "loss": 2.762,
      "step": 191976
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.45650577545166,
      "learning_rate": 4.024454923586704e-05,
      "loss": 3.14,
      "step": 191977
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3789453506469727,
      "learning_rate": 4.024250274732141e-05,
      "loss": 2.8484,
      "step": 191978
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6963465213775635,
      "learning_rate": 4.024045630706969e-05,
      "loss": 2.8958,
      "step": 191979
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.797807216644287,
      "learning_rate": 4.0238409915112104e-05,
      "loss": 3.1441,
      "step": 191980
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.8120970726013184,
      "learning_rate": 4.023636357144915e-05,
      "loss": 2.9278,
      "step": 191981
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9989097118377686,
      "learning_rate": 4.0234317276081094e-05,
      "loss": 2.8708,
      "step": 191982
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.4041404724121094,
      "learning_rate": 4.0232271029008546e-05,
      "loss": 2.92,
      "step": 191983
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5547714233398438,
      "learning_rate": 4.023022483023156e-05,
      "loss": 2.9005,
      "step": 191984
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6226418018341064,
      "learning_rate": 4.0228178679750746e-05,
      "loss": 2.8091,
      "step": 191985
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.841257333755493,
      "learning_rate": 4.022613257756636e-05,
      "loss": 2.8137,
      "step": 191986
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.277085304260254,
      "learning_rate": 4.0224086523678914e-05,
      "loss": 3.0615,
      "step": 191987
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.05686616897583,
      "learning_rate": 4.022204051808863e-05,
      "loss": 2.9357,
      "step": 191988
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.80087947845459,
      "learning_rate": 4.021999456079611e-05,
      "loss": 2.8107,
      "step": 191989
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.594104051589966,
      "learning_rate": 4.021794865180146e-05,
      "loss": 2.8908,
      "step": 191990
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6702165603637695,
      "learning_rate": 4.021590279110527e-05,
      "loss": 2.9399,
      "step": 191991
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0133914947509766,
      "learning_rate": 4.021385697870778e-05,
      "loss": 2.7444,
      "step": 191992
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8739187717437744,
      "learning_rate": 4.021181121460952e-05,
      "loss": 2.8932,
      "step": 191993
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.186722755432129,
      "learning_rate": 4.0209765498810696e-05,
      "loss": 2.8894,
      "step": 191994
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.895880699157715,
      "learning_rate": 4.020771983131194e-05,
      "loss": 3.2335,
      "step": 191995
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2905466556549072,
      "learning_rate": 4.0205674212113336e-05,
      "loss": 2.7461,
      "step": 191996
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.052520275115967,
      "learning_rate": 4.020362864121543e-05,
      "loss": 2.7604,
      "step": 191997
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8228886127471924,
      "learning_rate": 4.020158311861853e-05,
      "loss": 2.9535,
      "step": 191998
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2107667922973633,
      "learning_rate": 4.019953764432315e-05,
      "loss": 2.9081,
      "step": 191999
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2646870613098145,
      "learning_rate": 4.019749221832947e-05,
      "loss": 2.8401,
      "step": 192000
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6125752925872803,
      "learning_rate": 4.019544684063816e-05,
      "loss": 2.9692,
      "step": 192001
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9115326404571533,
      "learning_rate": 4.019340151124927e-05,
      "loss": 2.9437,
      "step": 192002
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.187061071395874,
      "learning_rate": 4.019135623016337e-05,
      "loss": 2.9515,
      "step": 192003
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7546098232269287,
      "learning_rate": 4.018931099738077e-05,
      "loss": 3.2092,
      "step": 192004
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7544615268707275,
      "learning_rate": 4.0187265812901936e-05,
      "loss": 3.1817,
      "step": 192005
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.792689323425293,
      "learning_rate": 4.0185220676727125e-05,
      "loss": 2.8392,
      "step": 192006
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0836257934570312,
      "learning_rate": 4.018317558885691e-05,
      "loss": 2.9754,
      "step": 192007
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8304789066314697,
      "learning_rate": 4.0181130549291416e-05,
      "loss": 3.0022,
      "step": 192008
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.978071451187134,
      "learning_rate": 4.017908555803126e-05,
      "loss": 2.7753,
      "step": 192009
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9068613052368164,
      "learning_rate": 4.0177040615076615e-05,
      "loss": 2.8441,
      "step": 192010
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.639457941055298,
      "learning_rate": 4.0174995720428004e-05,
      "loss": 3.0002,
      "step": 192011
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7838668823242188,
      "learning_rate": 4.017295087408575e-05,
      "loss": 3.08,
      "step": 192012
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6510672569274902,
      "learning_rate": 4.017090607605036e-05,
      "loss": 3.0133,
      "step": 192013
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7691397666931152,
      "learning_rate": 4.016886132632196e-05,
      "loss": 2.6746,
      "step": 192014
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7231369018554688,
      "learning_rate": 4.0166816624901146e-05,
      "loss": 2.9766,
      "step": 192015
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1334469318389893,
      "learning_rate": 4.0164771971788154e-05,
      "loss": 2.5439,
      "step": 192016
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.5978548526763916,
      "learning_rate": 4.0162727366983524e-05,
      "loss": 3.1543,
      "step": 192017
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.9463860988616943,
      "learning_rate": 4.0160682810487454e-05,
      "loss": 2.7935,
      "step": 192018
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7588770389556885,
      "learning_rate": 4.015863830230057e-05,
      "loss": 2.886,
      "step": 192019
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.869964122772217,
      "learning_rate": 4.015659384242298e-05,
      "loss": 2.988,
      "step": 192020
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9549734592437744,
      "learning_rate": 4.0154549430855246e-05,
      "loss": 2.8178,
      "step": 192021
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6372714042663574,
      "learning_rate": 4.015250506759759e-05,
      "loss": 3.0373,
      "step": 192022
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.934143543243408,
      "learning_rate": 4.015046075265057e-05,
      "loss": 2.8601,
      "step": 192023
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.4208521842956543,
      "learning_rate": 4.0148416486014434e-05,
      "loss": 2.8516,
      "step": 192024
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.208831310272217,
      "learning_rate": 4.014637226768972e-05,
      "loss": 2.8822,
      "step": 192025
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.762105703353882,
      "learning_rate": 4.014432809767656e-05,
      "loss": 2.9041,
      "step": 192026
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8105556964874268,
      "learning_rate": 4.0142283975975584e-05,
      "loss": 3.0915,
      "step": 192027
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.067021131515503,
      "learning_rate": 4.014023990258697e-05,
      "loss": 2.9637,
      "step": 192028
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.977802276611328,
      "learning_rate": 4.013819587751123e-05,
      "loss": 2.915,
      "step": 192029
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8103106021881104,
      "learning_rate": 4.013615190074869e-05,
      "loss": 2.9688,
      "step": 192030
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.416255474090576,
      "learning_rate": 4.01341079722998e-05,
      "loss": 2.8324,
      "step": 192031
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.480872631072998,
      "learning_rate": 4.013206409216486e-05,
      "loss": 2.9614,
      "step": 192032
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1400580406188965,
      "learning_rate": 4.0130020260344275e-05,
      "loss": 2.9603,
      "step": 192033
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.692563533782959,
      "learning_rate": 4.012797647683838e-05,
      "loss": 3.0739,
      "step": 192034
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.516230821609497,
      "learning_rate": 4.012593274164766e-05,
      "loss": 2.7695,
      "step": 192035
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9493868350982666,
      "learning_rate": 4.012388905477236e-05,
      "loss": 2.9666,
      "step": 192036
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.07631516456604,
      "learning_rate": 4.012184541621302e-05,
      "loss": 3.158,
      "step": 192037
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7142724990844727,
      "learning_rate": 4.011980182596992e-05,
      "loss": 2.8316,
      "step": 192038
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.9383599758148193,
      "learning_rate": 4.011775828404344e-05,
      "loss": 2.7928,
      "step": 192039
    },
    {
      "epoch": 2.5,
      "grad_norm": 5.904376029968262,
      "learning_rate": 4.011571479043392e-05,
      "loss": 2.8781,
      "step": 192040
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.4925737380981445,
      "learning_rate": 4.011367134514187e-05,
      "loss": 3.0075,
      "step": 192041
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.827895402908325,
      "learning_rate": 4.0111627948167513e-05,
      "loss": 2.7881,
      "step": 192042
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0853564739227295,
      "learning_rate": 4.01095845995114e-05,
      "loss": 3.1522,
      "step": 192043
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.8550026416778564,
      "learning_rate": 4.0107541299173806e-05,
      "loss": 3.0326,
      "step": 192044
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8943300247192383,
      "learning_rate": 4.010549804715506e-05,
      "loss": 3.0945,
      "step": 192045
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2829525470733643,
      "learning_rate": 4.0103454843455664e-05,
      "loss": 3.0106,
      "step": 192046
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8534996509552,
      "learning_rate": 4.0101411688075944e-05,
      "loss": 3.1605,
      "step": 192047
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1694116592407227,
      "learning_rate": 4.009936858101621e-05,
      "loss": 2.9251,
      "step": 192048
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2204477787017822,
      "learning_rate": 4.009732552227699e-05,
      "loss": 2.7647,
      "step": 192049
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9875009059906006,
      "learning_rate": 4.009528251185858e-05,
      "loss": 2.9657,
      "step": 192050
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.629009246826172,
      "learning_rate": 4.009323954976129e-05,
      "loss": 3.0169,
      "step": 192051
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5584588050842285,
      "learning_rate": 4.0091196635985645e-05,
      "loss": 3.0562,
      "step": 192052
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.7530665397644043,
      "learning_rate": 4.008915377053188e-05,
      "loss": 2.8849,
      "step": 192053
    },
    {
      "epoch": 2.5,
      "grad_norm": 5.173348903656006,
      "learning_rate": 4.0087110953400524e-05,
      "loss": 2.6132,
      "step": 192054
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.731369972229004,
      "learning_rate": 4.0085068184591884e-05,
      "loss": 2.8209,
      "step": 192055
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3247759342193604,
      "learning_rate": 4.0083025464106324e-05,
      "loss": 3.0078,
      "step": 192056
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7279162406921387,
      "learning_rate": 4.008098279194417e-05,
      "loss": 2.9523,
      "step": 192057
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7386064529418945,
      "learning_rate": 4.0078940168105936e-05,
      "loss": 2.9836,
      "step": 192058
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.036151885986328,
      "learning_rate": 4.0076897592591874e-05,
      "loss": 2.9945,
      "step": 192059
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.980484962463379,
      "learning_rate": 4.007485506540249e-05,
      "loss": 2.8204,
      "step": 192060
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1685986518859863,
      "learning_rate": 4.007281258653808e-05,
      "loss": 3.1449,
      "step": 192061
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.921027183532715,
      "learning_rate": 4.0070770155999055e-05,
      "loss": 3.0151,
      "step": 192062
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8873231410980225,
      "learning_rate": 4.0068727773785704e-05,
      "loss": 2.6775,
      "step": 192063
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.594910144805908,
      "learning_rate": 4.006668543989856e-05,
      "loss": 3.0573,
      "step": 192064
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.871065378189087,
      "learning_rate": 4.006464315433786e-05,
      "loss": 3.3052,
      "step": 192065
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.680675745010376,
      "learning_rate": 4.0062600917104105e-05,
      "loss": 3.0344,
      "step": 192066
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.598187446594238,
      "learning_rate": 4.006055872819756e-05,
      "loss": 2.9701,
      "step": 192067
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7683238983154297,
      "learning_rate": 4.005851658761878e-05,
      "loss": 2.7472,
      "step": 192068
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8525359630584717,
      "learning_rate": 4.005647449536792e-05,
      "loss": 2.8232,
      "step": 192069
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7366182804107666,
      "learning_rate": 4.005443245144553e-05,
      "loss": 2.7714,
      "step": 192070
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.645761489868164,
      "learning_rate": 4.0052390455851843e-05,
      "loss": 3.1142,
      "step": 192071
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.038567543029785,
      "learning_rate": 4.0050348508587404e-05,
      "loss": 3.0775,
      "step": 192072
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.7362349033355713,
      "learning_rate": 4.004830660965247e-05,
      "loss": 3.0309,
      "step": 192073
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.9731831550598145,
      "learning_rate": 4.004626475904754e-05,
      "loss": 3.0992,
      "step": 192074
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.650357246398926,
      "learning_rate": 4.004422295677282e-05,
      "loss": 2.7671,
      "step": 192075
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3337526321411133,
      "learning_rate": 4.004218120282887e-05,
      "loss": 2.738,
      "step": 192076
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3321022987365723,
      "learning_rate": 4.004013949721589e-05,
      "loss": 2.9216,
      "step": 192077
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.408937692642212,
      "learning_rate": 4.003809783993442e-05,
      "loss": 2.8657,
      "step": 192078
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.5294928550720215,
      "learning_rate": 4.003605623098473e-05,
      "loss": 3.0781,
      "step": 192079
    },
    {
      "epoch": 2.5,
      "grad_norm": 5.061706066131592,
      "learning_rate": 4.003401467036736e-05,
      "loss": 2.9802,
      "step": 192080
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.665560722351074,
      "learning_rate": 4.003197315808244e-05,
      "loss": 2.7024,
      "step": 192081
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.118746757507324,
      "learning_rate": 4.002993169413056e-05,
      "loss": 2.9652,
      "step": 192082
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.594789505004883,
      "learning_rate": 4.0027890278511944e-05,
      "loss": 2.8488,
      "step": 192083
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.542896270751953,
      "learning_rate": 4.002584891122713e-05,
      "loss": 2.785,
      "step": 192084
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.814558982849121,
      "learning_rate": 4.002380759227632e-05,
      "loss": 3.025,
      "step": 192085
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6611976623535156,
      "learning_rate": 4.0021766321660156e-05,
      "loss": 2.7281,
      "step": 192086
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0957815647125244,
      "learning_rate": 4.001972509937872e-05,
      "loss": 2.817,
      "step": 192087
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.220768690109253,
      "learning_rate": 4.0017683925432584e-05,
      "loss": 2.7485,
      "step": 192088
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.249106407165527,
      "learning_rate": 4.001564279982199e-05,
      "loss": 3.1202,
      "step": 192089
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0603182315826416,
      "learning_rate": 4.001360172254749e-05,
      "loss": 2.9191,
      "step": 192090
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7465808391571045,
      "learning_rate": 4.00115606936093e-05,
      "loss": 3.0473,
      "step": 192091
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.479755401611328,
      "learning_rate": 4.000951971300798e-05,
      "loss": 2.7398,
      "step": 192092
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.967578172683716,
      "learning_rate": 4.000747878074369e-05,
      "loss": 3.0883,
      "step": 192093
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.847565174102783,
      "learning_rate": 4.000543789681696e-05,
      "loss": 2.745,
      "step": 192094
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.663219690322876,
      "learning_rate": 4.000339706122807e-05,
      "loss": 3.1512,
      "step": 192095
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6206958293914795,
      "learning_rate": 4.0001356273977515e-05,
      "loss": 3.0099,
      "step": 192096
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.227872848510742,
      "learning_rate": 3.999931553506556e-05,
      "loss": 2.5495,
      "step": 192097
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7223522663116455,
      "learning_rate": 3.999727484449276e-05,
      "loss": 2.9277,
      "step": 192098
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7156527042388916,
      "learning_rate": 3.999523420225924e-05,
      "loss": 2.8632,
      "step": 192099
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.305295705795288,
      "learning_rate": 3.999319360836558e-05,
      "loss": 2.9198,
      "step": 192100
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0728936195373535,
      "learning_rate": 3.9991153062812056e-05,
      "loss": 2.7405,
      "step": 192101
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5106794834136963,
      "learning_rate": 3.998911256559912e-05,
      "loss": 2.7883,
      "step": 192102
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.282522439956665,
      "learning_rate": 3.998707211672706e-05,
      "loss": 3.1312,
      "step": 192103
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7115724086761475,
      "learning_rate": 3.998503171619639e-05,
      "loss": 2.7592,
      "step": 192104
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.301152467727661,
      "learning_rate": 3.998299136400739e-05,
      "loss": 3.1313,
      "step": 192105
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.869640350341797,
      "learning_rate": 3.998095106016048e-05,
      "loss": 2.7444,
      "step": 192106
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.795952558517456,
      "learning_rate": 3.997891080465594e-05,
      "loss": 3.0921,
      "step": 192107
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.863868236541748,
      "learning_rate": 3.9976870597494295e-05,
      "loss": 3.107,
      "step": 192108
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.4219138622283936,
      "learning_rate": 3.9974830438675806e-05,
      "loss": 3.0729,
      "step": 192109
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8809196949005127,
      "learning_rate": 3.997279032820095e-05,
      "loss": 2.8509,
      "step": 192110
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9926278591156006,
      "learning_rate": 3.9970750266070095e-05,
      "loss": 2.9689,
      "step": 192111
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9893767833709717,
      "learning_rate": 3.996871025228353e-05,
      "loss": 2.723,
      "step": 192112
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8831448554992676,
      "learning_rate": 3.996667028684166e-05,
      "loss": 2.9901,
      "step": 192113
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.151305675506592,
      "learning_rate": 3.9964630369744954e-05,
      "loss": 3.0827,
      "step": 192114
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.4458279609680176,
      "learning_rate": 3.996259050099367e-05,
      "loss": 2.6714,
      "step": 192115
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7223682403564453,
      "learning_rate": 3.996055068058832e-05,
      "loss": 2.9223,
      "step": 192116
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.816706418991089,
      "learning_rate": 3.9958510908529194e-05,
      "loss": 3.0423,
      "step": 192117
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.389256000518799,
      "learning_rate": 3.995647118481673e-05,
      "loss": 2.8347,
      "step": 192118
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.773390293121338,
      "learning_rate": 3.9954431509451154e-05,
      "loss": 2.934,
      "step": 192119
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.6963071823120117,
      "learning_rate": 3.9952391882433045e-05,
      "loss": 3.01,
      "step": 192120
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6295242309570312,
      "learning_rate": 3.995035230376262e-05,
      "loss": 3.1227,
      "step": 192121
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.4721181392669678,
      "learning_rate": 3.9948312773440425e-05,
      "loss": 3.105,
      "step": 192122
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.200636625289917,
      "learning_rate": 3.994627329146672e-05,
      "loss": 2.8051,
      "step": 192123
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9007139205932617,
      "learning_rate": 3.9944233857841914e-05,
      "loss": 2.8122,
      "step": 192124
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2037911415100098,
      "learning_rate": 3.9942194472566324e-05,
      "loss": 2.9801,
      "step": 192125
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.965909242630005,
      "learning_rate": 3.994015513564046e-05,
      "loss": 2.9298,
      "step": 192126
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.081427574157715,
      "learning_rate": 3.9938115847064556e-05,
      "loss": 3.0374,
      "step": 192127
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.018737316131592,
      "learning_rate": 3.993607660683914e-05,
      "loss": 3.2028,
      "step": 192128
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.9142355918884277,
      "learning_rate": 3.993403741496451e-05,
      "loss": 3.0864,
      "step": 192129
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.7687387466430664,
      "learning_rate": 3.993199827144098e-05,
      "loss": 3.0756,
      "step": 192130
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8074231147766113,
      "learning_rate": 3.992995917626907e-05,
      "loss": 2.8159,
      "step": 192131
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.7676565647125244,
      "learning_rate": 3.992792012944907e-05,
      "loss": 2.97,
      "step": 192132
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.016216993331909,
      "learning_rate": 3.992588113098134e-05,
      "loss": 2.9051,
      "step": 192133
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3225927352905273,
      "learning_rate": 3.992384218086636e-05,
      "loss": 2.9206,
      "step": 192134
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0959131717681885,
      "learning_rate": 3.992180327910447e-05,
      "loss": 2.9026,
      "step": 192135
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.934831142425537,
      "learning_rate": 3.9919764425695935e-05,
      "loss": 2.946,
      "step": 192136
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.058614730834961,
      "learning_rate": 3.991772562064128e-05,
      "loss": 2.8673,
      "step": 192137
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3449649810791016,
      "learning_rate": 3.991568686394085e-05,
      "loss": 3.0408,
      "step": 192138
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2326583862304688,
      "learning_rate": 3.991364815559491e-05,
      "loss": 2.977,
      "step": 192139
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.543597936630249,
      "learning_rate": 3.991160949560402e-05,
      "loss": 2.915,
      "step": 192140
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9505422115325928,
      "learning_rate": 3.990957088396849e-05,
      "loss": 2.86,
      "step": 192141
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.683004140853882,
      "learning_rate": 3.9907532320688576e-05,
      "loss": 3.0201,
      "step": 192142
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5802202224731445,
      "learning_rate": 3.9905493805764845e-05,
      "loss": 2.9282,
      "step": 192143
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.6672065258026123,
      "learning_rate": 3.9903455339197534e-05,
      "loss": 2.8931,
      "step": 192144
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.597140312194824,
      "learning_rate": 3.990141692098714e-05,
      "loss": 3.1019,
      "step": 192145
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.398245096206665,
      "learning_rate": 3.989937855113397e-05,
      "loss": 2.9616,
      "step": 192146
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3141934871673584,
      "learning_rate": 3.989734022963844e-05,
      "loss": 3.0657,
      "step": 192147
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.184077739715576,
      "learning_rate": 3.98953019565008e-05,
      "loss": 2.8037,
      "step": 192148
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.926499605178833,
      "learning_rate": 3.989326373172165e-05,
      "loss": 2.7791,
      "step": 192149
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1068596839904785,
      "learning_rate": 3.989122555530114e-05,
      "loss": 2.8975,
      "step": 192150
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.243042230606079,
      "learning_rate": 3.9889187427239856e-05,
      "loss": 3.0404,
      "step": 192151
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7464981079101562,
      "learning_rate": 3.988714934753802e-05,
      "loss": 3.007,
      "step": 192152
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6721885204315186,
      "learning_rate": 3.988511131619616e-05,
      "loss": 3.2589,
      "step": 192153
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.448547124862671,
      "learning_rate": 3.9883073333214485e-05,
      "loss": 3.0219,
      "step": 192154
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.801330804824829,
      "learning_rate": 3.98810353985935e-05,
      "loss": 3.057,
      "step": 192155
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.888237953186035,
      "learning_rate": 3.987899751233349e-05,
      "loss": 3.0257,
      "step": 192156
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.825026273727417,
      "learning_rate": 3.9876959674434926e-05,
      "loss": 2.9764,
      "step": 192157
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.040004253387451,
      "learning_rate": 3.9874921884898116e-05,
      "loss": 2.8932,
      "step": 192158
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7732527256011963,
      "learning_rate": 3.9872884143723546e-05,
      "loss": 2.838,
      "step": 192159
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.404815435409546,
      "learning_rate": 3.9870846450911434e-05,
      "loss": 2.9063,
      "step": 192160
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.649944305419922,
      "learning_rate": 3.9868808806462294e-05,
      "loss": 3.0859,
      "step": 192161
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.500058650970459,
      "learning_rate": 3.986677121037637e-05,
      "loss": 2.8968,
      "step": 192162
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.8346190452575684,
      "learning_rate": 3.9864733662654225e-05,
      "loss": 2.822,
      "step": 192163
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.200106620788574,
      "learning_rate": 3.986269616329603e-05,
      "loss": 3.1914,
      "step": 192164
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0364458560943604,
      "learning_rate": 3.986065871230244e-05,
      "loss": 2.8193,
      "step": 192165
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.906372308731079,
      "learning_rate": 3.98586213096735e-05,
      "loss": 3.0777,
      "step": 192166
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8346004486083984,
      "learning_rate": 3.985658395540984e-05,
      "loss": 2.8124,
      "step": 192167
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.4198055267333984,
      "learning_rate": 3.985454664951169e-05,
      "loss": 2.938,
      "step": 192168
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.247389793395996,
      "learning_rate": 3.9852509391979516e-05,
      "loss": 3.1791,
      "step": 192169
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.355190277099609,
      "learning_rate": 3.9850472182813655e-05,
      "loss": 2.925,
      "step": 192170
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8742456436157227,
      "learning_rate": 3.9848435022014534e-05,
      "loss": 2.9478,
      "step": 192171
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.141054153442383,
      "learning_rate": 3.984639790958249e-05,
      "loss": 3.2594,
      "step": 192172
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.314399242401123,
      "learning_rate": 3.984436084551796e-05,
      "loss": 3.1212,
      "step": 192173
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9195926189422607,
      "learning_rate": 3.9842323829821176e-05,
      "loss": 3.0278,
      "step": 192174
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0346555709838867,
      "learning_rate": 3.98402868624927e-05,
      "loss": 2.8297,
      "step": 192175
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.5742809772491455,
      "learning_rate": 3.983824994353273e-05,
      "loss": 2.9722,
      "step": 192176
    },
    {
      "epoch": 2.5,
      "grad_norm": 5.953641414642334,
      "learning_rate": 3.9836213072941834e-05,
      "loss": 2.9108,
      "step": 192177
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.878248691558838,
      "learning_rate": 3.983417625072028e-05,
      "loss": 2.8151,
      "step": 192178
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.672197103500366,
      "learning_rate": 3.983213947686847e-05,
      "loss": 2.7353,
      "step": 192179
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.558452606201172,
      "learning_rate": 3.983010275138671e-05,
      "loss": 2.9301,
      "step": 192180
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.6703126430511475,
      "learning_rate": 3.982806607427551e-05,
      "loss": 2.7986,
      "step": 192181
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7530770301818848,
      "learning_rate": 3.982602944553512e-05,
      "loss": 2.9622,
      "step": 192182
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.917825698852539,
      "learning_rate": 3.982399286516604e-05,
      "loss": 2.8243,
      "step": 192183
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.5512704849243164,
      "learning_rate": 3.9821956333168604e-05,
      "loss": 2.6913,
      "step": 192184
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.684455394744873,
      "learning_rate": 3.981991984954317e-05,
      "loss": 2.8868,
      "step": 192185
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.7233543395996094,
      "learning_rate": 3.981788341429004e-05,
      "loss": 2.9718,
      "step": 192186
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7744522094726562,
      "learning_rate": 3.981584702740975e-05,
      "loss": 2.8641,
      "step": 192187
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2774832248687744,
      "learning_rate": 3.981381068890257e-05,
      "loss": 2.881,
      "step": 192188
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0735716819763184,
      "learning_rate": 3.9811774398768955e-05,
      "loss": 3.1399,
      "step": 192189
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.4774091243743896,
      "learning_rate": 3.980973815700925e-05,
      "loss": 2.6522,
      "step": 192190
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.2366719245910645,
      "learning_rate": 3.980770196362382e-05,
      "loss": 2.8078,
      "step": 192191
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.34244441986084,
      "learning_rate": 3.980566581861299e-05,
      "loss": 2.7807,
      "step": 192192
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.4838201999664307,
      "learning_rate": 3.980362972197726e-05,
      "loss": 2.6772,
      "step": 192193
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.617126703262329,
      "learning_rate": 3.9801593673716904e-05,
      "loss": 2.7401,
      "step": 192194
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.665494203567505,
      "learning_rate": 3.979955767383238e-05,
      "loss": 2.8931,
      "step": 192195
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.776575803756714,
      "learning_rate": 3.979752172232403e-05,
      "loss": 2.896,
      "step": 192196
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2530436515808105,
      "learning_rate": 3.979548581919225e-05,
      "loss": 2.9974,
      "step": 192197
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.807058572769165,
      "learning_rate": 3.979344996443731e-05,
      "loss": 2.9438,
      "step": 192198
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6680283546447754,
      "learning_rate": 3.979141415805976e-05,
      "loss": 3.0856,
      "step": 192199
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6846280097961426,
      "learning_rate": 3.9789378400059857e-05,
      "loss": 3.1291,
      "step": 192200
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5840089321136475,
      "learning_rate": 3.978734269043805e-05,
      "loss": 2.8854,
      "step": 192201
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7987351417541504,
      "learning_rate": 3.978530702919471e-05,
      "loss": 2.8146,
      "step": 192202
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.5539207458496094,
      "learning_rate": 3.9783271416330175e-05,
      "loss": 3.2188,
      "step": 192203
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5572540760040283,
      "learning_rate": 3.978123585184477e-05,
      "loss": 2.8981,
      "step": 192204
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9195773601531982,
      "learning_rate": 3.977920033573904e-05,
      "loss": 2.6511,
      "step": 192205
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6806483268737793,
      "learning_rate": 3.97771648680132e-05,
      "loss": 2.8406,
      "step": 192206
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2031314373016357,
      "learning_rate": 3.977512944866773e-05,
      "loss": 2.8351,
      "step": 192207
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.827554702758789,
      "learning_rate": 3.977309407770299e-05,
      "loss": 3.0644,
      "step": 192208
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.693753957748413,
      "learning_rate": 3.977105875511936e-05,
      "loss": 2.8876,
      "step": 192209
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.922435998916626,
      "learning_rate": 3.976902348091715e-05,
      "loss": 3.0712,
      "step": 192210
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6454505920410156,
      "learning_rate": 3.976698825509681e-05,
      "loss": 2.8335,
      "step": 192211
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.4715776443481445,
      "learning_rate": 3.9764953077658665e-05,
      "loss": 3.1324,
      "step": 192212
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.4853734970092773,
      "learning_rate": 3.976291794860319e-05,
      "loss": 3.1519,
      "step": 192213
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5054657459259033,
      "learning_rate": 3.976088286793071e-05,
      "loss": 2.7776,
      "step": 192214
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.112299680709839,
      "learning_rate": 3.97588478356415e-05,
      "loss": 3.0463,
      "step": 192215
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3837180137634277,
      "learning_rate": 3.9756812851736106e-05,
      "loss": 2.8726,
      "step": 192216
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9476020336151123,
      "learning_rate": 3.9754777916214856e-05,
      "loss": 2.7318,
      "step": 192217
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7829668521881104,
      "learning_rate": 3.9752743029078025e-05,
      "loss": 2.8725,
      "step": 192218
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0538647174835205,
      "learning_rate": 3.975070819032613e-05,
      "loss": 2.8236,
      "step": 192219
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5381414890289307,
      "learning_rate": 3.97486733999595e-05,
      "loss": 3.0555,
      "step": 192220
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0899524688720703,
      "learning_rate": 3.9746638657978426e-05,
      "loss": 2.8936,
      "step": 192221
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6876747608184814,
      "learning_rate": 3.974460396438345e-05,
      "loss": 2.7543,
      "step": 192222
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.689460277557373,
      "learning_rate": 3.9742569319174866e-05,
      "loss": 3.1982,
      "step": 192223
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.528841733932495,
      "learning_rate": 3.9740534722352946e-05,
      "loss": 2.6551,
      "step": 192224
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0295791625976562,
      "learning_rate": 3.9738500173918286e-05,
      "loss": 2.9035,
      "step": 192225
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.6021676063537598,
      "learning_rate": 3.973646567387112e-05,
      "loss": 2.6697,
      "step": 192226
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0819942951202393,
      "learning_rate": 3.973443122221182e-05,
      "loss": 2.9176,
      "step": 192227
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2465829849243164,
      "learning_rate": 3.973239681894084e-05,
      "loss": 2.8795,
      "step": 192228
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7250046730041504,
      "learning_rate": 3.9730362464058484e-05,
      "loss": 2.8477,
      "step": 192229
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.554673910140991,
      "learning_rate": 3.972832815756519e-05,
      "loss": 3.1488,
      "step": 192230
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.830505609512329,
      "learning_rate": 3.972629389946136e-05,
      "loss": 2.749,
      "step": 192231
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.005218505859375,
      "learning_rate": 3.972425968974729e-05,
      "loss": 2.9396,
      "step": 192232
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6059377193450928,
      "learning_rate": 3.9722225528423367e-05,
      "loss": 2.94,
      "step": 192233
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.4248836040496826,
      "learning_rate": 3.972019141549001e-05,
      "loss": 3.1723,
      "step": 192234
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.444457530975342,
      "learning_rate": 3.971815735094755e-05,
      "loss": 2.9611,
      "step": 192235
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.059778928756714,
      "learning_rate": 3.971612333479647e-05,
      "loss": 2.7414,
      "step": 192236
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.705677032470703,
      "learning_rate": 3.9714089367036984e-05,
      "loss": 2.8722,
      "step": 192237
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.906895875930786,
      "learning_rate": 3.971205544766966e-05,
      "loss": 3.0977,
      "step": 192238
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.280423402786255,
      "learning_rate": 3.9710021576694754e-05,
      "loss": 3.0257,
      "step": 192239
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7323365211486816,
      "learning_rate": 3.970798775411267e-05,
      "loss": 3.0276,
      "step": 192240
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6080431938171387,
      "learning_rate": 3.970595397992371e-05,
      "loss": 3.0242,
      "step": 192241
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0451138019561768,
      "learning_rate": 3.970392025412841e-05,
      "loss": 3.0616,
      "step": 192242
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.616285800933838,
      "learning_rate": 3.9701886576726995e-05,
      "loss": 3.1174,
      "step": 192243
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.877183675765991,
      "learning_rate": 3.969985294771997e-05,
      "loss": 2.7583,
      "step": 192244
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7428081035614014,
      "learning_rate": 3.969781936710766e-05,
      "loss": 3.0604,
      "step": 192245
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.242614269256592,
      "learning_rate": 3.9695785834890445e-05,
      "loss": 2.8533,
      "step": 192246
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2230873107910156,
      "learning_rate": 3.969375235106862e-05,
      "loss": 3.1299,
      "step": 192247
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7890067100524902,
      "learning_rate": 3.969171891564271e-05,
      "loss": 3.1674,
      "step": 192248
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.850672960281372,
      "learning_rate": 3.968968552861296e-05,
      "loss": 2.9608,
      "step": 192249
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.450774908065796,
      "learning_rate": 3.968765218997989e-05,
      "loss": 2.9677,
      "step": 192250
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0826878547668457,
      "learning_rate": 3.9685618899743775e-05,
      "loss": 2.7357,
      "step": 192251
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.658731460571289,
      "learning_rate": 3.9683585657905045e-05,
      "loss": 2.7819,
      "step": 192252
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5855507850646973,
      "learning_rate": 3.968155246446394e-05,
      "loss": 2.9836,
      "step": 192253
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.463444232940674,
      "learning_rate": 3.967951931942105e-05,
      "loss": 2.6971,
      "step": 192254
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9996302127838135,
      "learning_rate": 3.967748622277658e-05,
      "loss": 3.0726,
      "step": 192255
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.816131353378296,
      "learning_rate": 3.967545317453106e-05,
      "loss": 2.5855,
      "step": 192256
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.458345890045166,
      "learning_rate": 3.967342017468476e-05,
      "loss": 2.9729,
      "step": 192257
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.590696334838867,
      "learning_rate": 3.967138722323811e-05,
      "loss": 2.8074,
      "step": 192258
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7399537563323975,
      "learning_rate": 3.9669354320191374e-05,
      "loss": 3.2149,
      "step": 192259
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.779153347015381,
      "learning_rate": 3.9667321465545086e-05,
      "loss": 2.8231,
      "step": 192260
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.373530626296997,
      "learning_rate": 3.966528865929949e-05,
      "loss": 2.8123,
      "step": 192261
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.506985902786255,
      "learning_rate": 3.9663255901455106e-05,
      "loss": 2.9253,
      "step": 192262
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.017950057983398,
      "learning_rate": 3.9661223192012234e-05,
      "loss": 3.2648,
      "step": 192263
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.093289852142334,
      "learning_rate": 3.9659190530971284e-05,
      "loss": 3.1091,
      "step": 192264
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5871124267578125,
      "learning_rate": 3.9657157918332514e-05,
      "loss": 2.7501,
      "step": 192265
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.6865222454071045,
      "learning_rate": 3.9655125354096426e-05,
      "loss": 2.8971,
      "step": 192266
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9384682178497314,
      "learning_rate": 3.965309283826336e-05,
      "loss": 2.9925,
      "step": 192267
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.695319890975952,
      "learning_rate": 3.965106037083373e-05,
      "loss": 2.9901,
      "step": 192268
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.020254611968994,
      "learning_rate": 3.964902795180789e-05,
      "loss": 3.106,
      "step": 192269
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.4645509719848633,
      "learning_rate": 3.964699558118619e-05,
      "loss": 2.9289,
      "step": 192270
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7601094245910645,
      "learning_rate": 3.9644963258968976e-05,
      "loss": 2.9693,
      "step": 192271
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0994536876678467,
      "learning_rate": 3.964293098515674e-05,
      "loss": 2.72,
      "step": 192272
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9247610569000244,
      "learning_rate": 3.964089875974975e-05,
      "loss": 2.9394,
      "step": 192273
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.4055447578430176,
      "learning_rate": 3.9638866582748474e-05,
      "loss": 2.7395,
      "step": 192274
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9238288402557373,
      "learning_rate": 3.9636834454153275e-05,
      "loss": 2.9034,
      "step": 192275
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.780601739883423,
      "learning_rate": 3.963480237396449e-05,
      "loss": 2.8398,
      "step": 192276
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.28084659576416,
      "learning_rate": 3.9632770342182455e-05,
      "loss": 2.8693,
      "step": 192277
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7982962131500244,
      "learning_rate": 3.963073835880766e-05,
      "loss": 2.7955,
      "step": 192278
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3245034217834473,
      "learning_rate": 3.962870642384034e-05,
      "loss": 2.5991,
      "step": 192279
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.304983377456665,
      "learning_rate": 3.9626674537281035e-05,
      "loss": 3.0265,
      "step": 192280
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5767123699188232,
      "learning_rate": 3.962464269913004e-05,
      "loss": 2.8933,
      "step": 192281
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.580559253692627,
      "learning_rate": 3.9622610909387755e-05,
      "loss": 3.0441,
      "step": 192282
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.922729253768921,
      "learning_rate": 3.9620579168054475e-05,
      "loss": 2.6639,
      "step": 192283
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9956765174865723,
      "learning_rate": 3.961854747513071e-05,
      "loss": 2.8913,
      "step": 192284
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0093648433685303,
      "learning_rate": 3.9616515830616693e-05,
      "loss": 3.1035,
      "step": 192285
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9843780994415283,
      "learning_rate": 3.961448423451298e-05,
      "loss": 2.7747,
      "step": 192286
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.802884340286255,
      "learning_rate": 3.9612452686819805e-05,
      "loss": 2.7821,
      "step": 192287
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.955841541290283,
      "learning_rate": 3.961042118753761e-05,
      "loss": 2.9881,
      "step": 192288
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5386111736297607,
      "learning_rate": 3.960838973666669e-05,
      "loss": 2.8462,
      "step": 192289
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5306899547576904,
      "learning_rate": 3.960635833420754e-05,
      "loss": 3.0771,
      "step": 192290
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.603559732437134,
      "learning_rate": 3.960432698016044e-05,
      "loss": 2.8845,
      "step": 192291
    },
    {
      "epoch": 2.5,
      "grad_norm": 6.38314962387085,
      "learning_rate": 3.960229567452588e-05,
      "loss": 3.226,
      "step": 192292
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.654448986053467,
      "learning_rate": 3.960026441730412e-05,
      "loss": 3.1153,
      "step": 192293
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.4422242641448975,
      "learning_rate": 3.9598233208495635e-05,
      "loss": 2.9156,
      "step": 192294
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.270660877227783,
      "learning_rate": 3.9596202048100654e-05,
      "loss": 3.0721,
      "step": 192295
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2122085094451904,
      "learning_rate": 3.959417093611975e-05,
      "loss": 2.7779,
      "step": 192296
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.764012098312378,
      "learning_rate": 3.959213987255312e-05,
      "loss": 2.8486,
      "step": 192297
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.6306519508361816,
      "learning_rate": 3.9590108857401295e-05,
      "loss": 3.085,
      "step": 192298
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.90710186958313,
      "learning_rate": 3.95880778906646e-05,
      "loss": 3.2367,
      "step": 192299
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.656165361404419,
      "learning_rate": 3.958604697234339e-05,
      "loss": 3.081,
      "step": 192300
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2408106327056885,
      "learning_rate": 3.958401610243798e-05,
      "loss": 2.9396,
      "step": 192301
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0616633892059326,
      "learning_rate": 3.958198528094887e-05,
      "loss": 2.9109,
      "step": 192302
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.183659315109253,
      "learning_rate": 3.957995450787633e-05,
      "loss": 2.7231,
      "step": 192303
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.910891056060791,
      "learning_rate": 3.957792378322087e-05,
      "loss": 2.6669,
      "step": 192304
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1138672828674316,
      "learning_rate": 3.95758931069827e-05,
      "loss": 2.4884,
      "step": 192305
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.5348355770111084,
      "learning_rate": 3.957386247916238e-05,
      "loss": 2.8905,
      "step": 192306
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.036623001098633,
      "learning_rate": 3.957183189976019e-05,
      "loss": 3.1717,
      "step": 192307
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2760555744171143,
      "learning_rate": 3.95698013687765e-05,
      "loss": 3.035,
      "step": 192308
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.648671627044678,
      "learning_rate": 3.9567770886211645e-05,
      "loss": 2.9032,
      "step": 192309
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.3970675468444824,
      "learning_rate": 3.9565740452066094e-05,
      "loss": 2.8321,
      "step": 192310
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.581758737564087,
      "learning_rate": 3.956371006634015e-05,
      "loss": 3.2745,
      "step": 192311
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0823147296905518,
      "learning_rate": 3.95616797290343e-05,
      "loss": 2.8137,
      "step": 192312
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1659512519836426,
      "learning_rate": 3.955964944014882e-05,
      "loss": 2.8052,
      "step": 192313
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.696058511734009,
      "learning_rate": 3.9557619199684074e-05,
      "loss": 2.8513,
      "step": 192314
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1842334270477295,
      "learning_rate": 3.9555589007640564e-05,
      "loss": 2.8776,
      "step": 192315
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.5951931476593018,
      "learning_rate": 3.955355886401855e-05,
      "loss": 2.9415,
      "step": 192316
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.942056894302368,
      "learning_rate": 3.9551528768818405e-05,
      "loss": 2.9818,
      "step": 192317
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6020781993865967,
      "learning_rate": 3.95494987220406e-05,
      "loss": 2.8959,
      "step": 192318
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.210180282592773,
      "learning_rate": 3.954746872368545e-05,
      "loss": 3.067,
      "step": 192319
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9112794399261475,
      "learning_rate": 3.95454387737533e-05,
      "loss": 3.2173,
      "step": 192320
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6887729167938232,
      "learning_rate": 3.954340887224462e-05,
      "loss": 2.9438,
      "step": 192321
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7390966415405273,
      "learning_rate": 3.9541379019159766e-05,
      "loss": 2.9665,
      "step": 192322
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7513110637664795,
      "learning_rate": 3.953934921449898e-05,
      "loss": 2.9881,
      "step": 192323
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2317373752593994,
      "learning_rate": 3.9537319458262826e-05,
      "loss": 3.0684,
      "step": 192324
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.619905948638916,
      "learning_rate": 3.9535289750451605e-05,
      "loss": 2.6357,
      "step": 192325
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.415714979171753,
      "learning_rate": 3.9533260091065644e-05,
      "loss": 2.9705,
      "step": 192326
    },
    {
      "epoch": 2.5,
      "grad_norm": 4.133583068847656,
      "learning_rate": 3.9531230480105383e-05,
      "loss": 2.8112,
      "step": 192327
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.393958568572998,
      "learning_rate": 3.952920091757115e-05,
      "loss": 2.7122,
      "step": 192328
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.7538838386535645,
      "learning_rate": 3.9527171403463406e-05,
      "loss": 2.8498,
      "step": 192329
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8150267601013184,
      "learning_rate": 3.95251419377825e-05,
      "loss": 3.0641,
      "step": 192330
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1439642906188965,
      "learning_rate": 3.952311252052879e-05,
      "loss": 3.0414,
      "step": 192331
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.4007139205932617,
      "learning_rate": 3.952108315170257e-05,
      "loss": 2.735,
      "step": 192332
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.471074104309082,
      "learning_rate": 3.951905383130435e-05,
      "loss": 2.6636,
      "step": 192333
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.7723276615142822,
      "learning_rate": 3.951702455933442e-05,
      "loss": 2.9413,
      "step": 192334
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.7406842708587646,
      "learning_rate": 3.951499533579322e-05,
      "loss": 2.9697,
      "step": 192335
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.802384614944458,
      "learning_rate": 3.951296616068111e-05,
      "loss": 3.0377,
      "step": 192336
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.587179183959961,
      "learning_rate": 3.951093703399847e-05,
      "loss": 3.0412,
      "step": 192337
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8229238986968994,
      "learning_rate": 3.950890795574562e-05,
      "loss": 2.7793,
      "step": 192338
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.5604820251464844,
      "learning_rate": 3.950687892592299e-05,
      "loss": 2.866,
      "step": 192339
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0085935592651367,
      "learning_rate": 3.950484994453093e-05,
      "loss": 3.0809,
      "step": 192340
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7935585975646973,
      "learning_rate": 3.950282101156988e-05,
      "loss": 3.1665,
      "step": 192341
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.7161881923675537,
      "learning_rate": 3.950079212704017e-05,
      "loss": 2.7875,
      "step": 192342
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.8950960636138916,
      "learning_rate": 3.949876329094221e-05,
      "loss": 2.9837,
      "step": 192343
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.4192397594451904,
      "learning_rate": 3.949673450327625e-05,
      "loss": 2.9128,
      "step": 192344
    },
    {
      "epoch": 2.5,
      "grad_norm": 5.054100513458252,
      "learning_rate": 3.949470576404284e-05,
      "loss": 2.9593,
      "step": 192345
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.8486742973327637,
      "learning_rate": 3.949267707324223e-05,
      "loss": 2.9491,
      "step": 192346
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.4599454402923584,
      "learning_rate": 3.9490648430874875e-05,
      "loss": 3.1334,
      "step": 192347
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.007427453994751,
      "learning_rate": 3.948861983694117e-05,
      "loss": 2.9314,
      "step": 192348
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.251905918121338,
      "learning_rate": 3.948659129144143e-05,
      "loss": 2.9492,
      "step": 192349
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.726787805557251,
      "learning_rate": 3.948456279437597e-05,
      "loss": 2.8105,
      "step": 192350
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3779256343841553,
      "learning_rate": 3.9482534345745344e-05,
      "loss": 2.7492,
      "step": 192351
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.958263635635376,
      "learning_rate": 3.948050594554973e-05,
      "loss": 3.1679,
      "step": 192352
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.720780611038208,
      "learning_rate": 3.9478477593789715e-05,
      "loss": 3.0665,
      "step": 192353
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8262221813201904,
      "learning_rate": 3.9476449290465515e-05,
      "loss": 2.9735,
      "step": 192354
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.1700212955474854,
      "learning_rate": 3.9474421035577607e-05,
      "loss": 2.7537,
      "step": 192355
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7800393104553223,
      "learning_rate": 3.9472392829126254e-05,
      "loss": 2.9546,
      "step": 192356
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8730013370513916,
      "learning_rate": 3.947036467111195e-05,
      "loss": 2.9292,
      "step": 192357
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.0824637413024902,
      "learning_rate": 3.9468336561534944e-05,
      "loss": 3.1306,
      "step": 192358
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.9595603942871094,
      "learning_rate": 3.946630850039578e-05,
      "loss": 3.1381,
      "step": 192359
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.025965929031372,
      "learning_rate": 3.946428048769474e-05,
      "loss": 2.7832,
      "step": 192360
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.5612525939941406,
      "learning_rate": 3.946225252343223e-05,
      "loss": 3.022,
      "step": 192361
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.5253450870513916,
      "learning_rate": 3.9460224607608525e-05,
      "loss": 3.0399,
      "step": 192362
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.852158308029175,
      "learning_rate": 3.9458196740224115e-05,
      "loss": 2.6588,
      "step": 192363
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9427123069763184,
      "learning_rate": 3.945616892127932e-05,
      "loss": 3.1255,
      "step": 192364
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9173152446746826,
      "learning_rate": 3.945414115077461e-05,
      "loss": 2.9459,
      "step": 192365
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.8723716735839844,
      "learning_rate": 3.945211342871025e-05,
      "loss": 3.1037,
      "step": 192366
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.565678596496582,
      "learning_rate": 3.945008575508671e-05,
      "loss": 2.8062,
      "step": 192367
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8350629806518555,
      "learning_rate": 3.944805812990422e-05,
      "loss": 3.0924,
      "step": 192368
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.832745313644409,
      "learning_rate": 3.944603055316332e-05,
      "loss": 3.0049,
      "step": 192369
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.904834032058716,
      "learning_rate": 3.944400302486427e-05,
      "loss": 2.9282,
      "step": 192370
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8307909965515137,
      "learning_rate": 3.9441975545007566e-05,
      "loss": 2.9649,
      "step": 192371
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5643279552459717,
      "learning_rate": 3.943994811359345e-05,
      "loss": 2.9736,
      "step": 192372
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6076254844665527,
      "learning_rate": 3.943792073062248e-05,
      "loss": 2.745,
      "step": 192373
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5171196460723877,
      "learning_rate": 3.943589339609482e-05,
      "loss": 2.8128,
      "step": 192374
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.88285493850708,
      "learning_rate": 3.943386611001098e-05,
      "loss": 2.87,
      "step": 192375
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.8569400310516357,
      "learning_rate": 3.9431838872371255e-05,
      "loss": 3.0381,
      "step": 192376
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.7939212322235107,
      "learning_rate": 3.9429811683176114e-05,
      "loss": 2.8686,
      "step": 192377
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.96209716796875,
      "learning_rate": 3.942778454242586e-05,
      "loss": 2.8244,
      "step": 192378
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.5520482063293457,
      "learning_rate": 3.942575745012098e-05,
      "loss": 2.9268,
      "step": 192379
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.2363076210021973,
      "learning_rate": 3.9423730406261676e-05,
      "loss": 2.8317,
      "step": 192380
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.907808303833008,
      "learning_rate": 3.942170341084849e-05,
      "loss": 3.148,
      "step": 192381
    },
    {
      "epoch": 2.5,
      "grad_norm": 3.3667807579040527,
      "learning_rate": 3.941967646388166e-05,
      "loss": 2.883,
      "step": 192382
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.655144453048706,
      "learning_rate": 3.941764956536166e-05,
      "loss": 2.9481,
      "step": 192383
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6438236236572266,
      "learning_rate": 3.941562271528882e-05,
      "loss": 3.2403,
      "step": 192384
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.9240002632141113,
      "learning_rate": 3.941359591366362e-05,
      "loss": 3.2015,
      "step": 192385
    },
    {
      "epoch": 2.5,
      "grad_norm": 2.6208510398864746,
      "learning_rate": 3.9411569160486264e-05,
      "loss": 3.0231,
      "step": 192386
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7000672817230225,
      "learning_rate": 3.940954245575726e-05,
      "loss": 3.1971,
      "step": 192387
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.311389207839966,
      "learning_rate": 3.9407515799476865e-05,
      "loss": 2.9603,
      "step": 192388
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.9447410106658936,
      "learning_rate": 3.940548919164561e-05,
      "loss": 2.8227,
      "step": 192389
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.452293395996094,
      "learning_rate": 3.940346263226374e-05,
      "loss": 3.0554,
      "step": 192390
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8586747646331787,
      "learning_rate": 3.940143612133174e-05,
      "loss": 3.0134,
      "step": 192391
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.153052568435669,
      "learning_rate": 3.939940965884992e-05,
      "loss": 3.0004,
      "step": 192392
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0944855213165283,
      "learning_rate": 3.939738324481868e-05,
      "loss": 3.1216,
      "step": 192393
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.2943716049194336,
      "learning_rate": 3.939535687923835e-05,
      "loss": 2.849,
      "step": 192394
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.706263780593872,
      "learning_rate": 3.939333056210936e-05,
      "loss": 2.8464,
      "step": 192395
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.597912073135376,
      "learning_rate": 3.9391304293432014e-05,
      "loss": 2.6953,
      "step": 192396
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5161359310150146,
      "learning_rate": 3.938927807320684e-05,
      "loss": 3.0761,
      "step": 192397
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.5997378826141357,
      "learning_rate": 3.938725190143408e-05,
      "loss": 2.7596,
      "step": 192398
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5972723960876465,
      "learning_rate": 3.9385225778114124e-05,
      "loss": 3.0176,
      "step": 192399
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.926086902618408,
      "learning_rate": 3.9383199703247414e-05,
      "loss": 2.6198,
      "step": 192400
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.834120750427246,
      "learning_rate": 3.9381173676834275e-05,
      "loss": 3.1053,
      "step": 192401
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.58719539642334,
      "learning_rate": 3.9379147698875046e-05,
      "loss": 2.8534,
      "step": 192402
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.614870548248291,
      "learning_rate": 3.937712176937022e-05,
      "loss": 3.0075,
      "step": 192403
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5606536865234375,
      "learning_rate": 3.9375095888320106e-05,
      "loss": 2.993,
      "step": 192404
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.6155433654785156,
      "learning_rate": 3.9373070055725e-05,
      "loss": 3.0372,
      "step": 192405
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6927309036254883,
      "learning_rate": 3.937104427158546e-05,
      "loss": 2.9154,
      "step": 192406
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.513284206390381,
      "learning_rate": 3.936901853590173e-05,
      "loss": 3.0314,
      "step": 192407
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6121504306793213,
      "learning_rate": 3.9366992848674174e-05,
      "loss": 2.7481,
      "step": 192408
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.866015911102295,
      "learning_rate": 3.936496720990325e-05,
      "loss": 2.7447,
      "step": 192409
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.053396463394165,
      "learning_rate": 3.936294161958934e-05,
      "loss": 2.8717,
      "step": 192410
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.159106254577637,
      "learning_rate": 3.9360916077732696e-05,
      "loss": 3.0726,
      "step": 192411
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.165987968444824,
      "learning_rate": 3.935889058433382e-05,
      "loss": 2.8463,
      "step": 192412
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.775484800338745,
      "learning_rate": 3.935686513939299e-05,
      "loss": 2.8229,
      "step": 192413
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7333950996398926,
      "learning_rate": 3.935483974291073e-05,
      "loss": 2.859,
      "step": 192414
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.724367380142212,
      "learning_rate": 3.93528143948873e-05,
      "loss": 2.9018,
      "step": 192415
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.8388166427612305,
      "learning_rate": 3.935078909532311e-05,
      "loss": 2.8588,
      "step": 192416
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3873889446258545,
      "learning_rate": 3.934876384421849e-05,
      "loss": 2.8989,
      "step": 192417
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.342818260192871,
      "learning_rate": 3.934673864157387e-05,
      "loss": 2.7932,
      "step": 192418
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8064494132995605,
      "learning_rate": 3.934471348738959e-05,
      "loss": 2.9838,
      "step": 192419
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.5232436656951904,
      "learning_rate": 3.934268838166608e-05,
      "loss": 3.1941,
      "step": 192420
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6222586631774902,
      "learning_rate": 3.93406633244037e-05,
      "loss": 2.9517,
      "step": 192421
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.5426740646362305,
      "learning_rate": 3.933863831560282e-05,
      "loss": 2.9205,
      "step": 192422
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2036736011505127,
      "learning_rate": 3.933661335526371e-05,
      "loss": 2.9496,
      "step": 192423
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9426016807556152,
      "learning_rate": 3.933458844338694e-05,
      "loss": 2.9716,
      "step": 192424
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.418506145477295,
      "learning_rate": 3.933256357997273e-05,
      "loss": 2.7499,
      "step": 192425
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6535234451293945,
      "learning_rate": 3.933053876502156e-05,
      "loss": 2.9285,
      "step": 192426
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.716271162033081,
      "learning_rate": 3.9328513998533786e-05,
      "loss": 2.9875,
      "step": 192427
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0179431438446045,
      "learning_rate": 3.932648928050974e-05,
      "loss": 3.1542,
      "step": 192428
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8947083950042725,
      "learning_rate": 3.932446461094977e-05,
      "loss": 2.9073,
      "step": 192429
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7078969478607178,
      "learning_rate": 3.932243998985436e-05,
      "loss": 2.8472,
      "step": 192430
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9818530082702637,
      "learning_rate": 3.932041541722375e-05,
      "loss": 2.9576,
      "step": 192431
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.112368106842041,
      "learning_rate": 3.9318390893058473e-05,
      "loss": 2.9549,
      "step": 192432
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.5145950317382812,
      "learning_rate": 3.931636641735886e-05,
      "loss": 3.0378,
      "step": 192433
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.953627824783325,
      "learning_rate": 3.931434199012521e-05,
      "loss": 3.0834,
      "step": 192434
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.330394744873047,
      "learning_rate": 3.931231761135789e-05,
      "loss": 3.0913,
      "step": 192435
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.00434947013855,
      "learning_rate": 3.93102932810574e-05,
      "loss": 3.0319,
      "step": 192436
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.988703966140747,
      "learning_rate": 3.930826899922398e-05,
      "loss": 3.2567,
      "step": 192437
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2386677265167236,
      "learning_rate": 3.9306244765858155e-05,
      "loss": 2.7826,
      "step": 192438
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.895906925201416,
      "learning_rate": 3.930422058096012e-05,
      "loss": 2.7442,
      "step": 192439
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1685631275177,
      "learning_rate": 3.930219644453052e-05,
      "loss": 2.93,
      "step": 192440
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.545475721359253,
      "learning_rate": 3.930017235656941e-05,
      "loss": 3.215,
      "step": 192441
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.9128594398498535,
      "learning_rate": 3.9298148317077406e-05,
      "loss": 2.8178,
      "step": 192442
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.319108486175537,
      "learning_rate": 3.9296124326054725e-05,
      "loss": 2.7309,
      "step": 192443
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.965914726257324,
      "learning_rate": 3.929410038350187e-05,
      "loss": 3.0366,
      "step": 192444
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.066622257232666,
      "learning_rate": 3.9292076489419076e-05,
      "loss": 2.9737,
      "step": 192445
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0429999828338623,
      "learning_rate": 3.929005264380698e-05,
      "loss": 2.8902,
      "step": 192446
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.06713604927063,
      "learning_rate": 3.9288028846665644e-05,
      "loss": 3.1514,
      "step": 192447
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5840518474578857,
      "learning_rate": 3.928600509799563e-05,
      "loss": 3.1422,
      "step": 192448
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.903714656829834,
      "learning_rate": 3.928398139779721e-05,
      "loss": 2.9201,
      "step": 192449
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.790565252304077,
      "learning_rate": 3.928195774607089e-05,
      "loss": 3.0971,
      "step": 192450
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.7989211082458496,
      "learning_rate": 3.927993414281689e-05,
      "loss": 2.819,
      "step": 192451
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7145497798919678,
      "learning_rate": 3.927791058803585e-05,
      "loss": 2.858,
      "step": 192452
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.4119906425476074,
      "learning_rate": 3.927588708172781e-05,
      "loss": 2.9239,
      "step": 192453
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.925973892211914,
      "learning_rate": 3.9273863623893355e-05,
      "loss": 2.774,
      "step": 192454
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3562819957733154,
      "learning_rate": 3.927184021453276e-05,
      "loss": 3.1817,
      "step": 192455
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1106653213500977,
      "learning_rate": 3.926981685364652e-05,
      "loss": 2.819,
      "step": 192456
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.8142447471618652,
      "learning_rate": 3.9267793541234874e-05,
      "loss": 2.9535,
      "step": 192457
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.403057098388672,
      "learning_rate": 3.926577027729838e-05,
      "loss": 2.9534,
      "step": 192458
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8236145973205566,
      "learning_rate": 3.926374706183722e-05,
      "loss": 2.8533,
      "step": 192459
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.54024076461792,
      "learning_rate": 3.9261723894851874e-05,
      "loss": 2.8297,
      "step": 192460
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8448870182037354,
      "learning_rate": 3.925970077634262e-05,
      "loss": 2.9284,
      "step": 192461
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.40277624130249,
      "learning_rate": 3.925767770630999e-05,
      "loss": 2.8548,
      "step": 192462
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.840193748474121,
      "learning_rate": 3.925565468475425e-05,
      "loss": 2.7205,
      "step": 192463
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2265944480895996,
      "learning_rate": 3.9253631711675894e-05,
      "loss": 2.9644,
      "step": 192464
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.140760660171509,
      "learning_rate": 3.9251608787075094e-05,
      "loss": 2.9305,
      "step": 192465
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.987462282180786,
      "learning_rate": 3.924958591095239e-05,
      "loss": 2.7779,
      "step": 192466
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.954240560531616,
      "learning_rate": 3.924756308330807e-05,
      "loss": 2.9403,
      "step": 192467
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.728149175643921,
      "learning_rate": 3.92455403041426e-05,
      "loss": 3.0584,
      "step": 192468
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.807749032974243,
      "learning_rate": 3.924351757345625e-05,
      "loss": 2.9155,
      "step": 192469
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.689765214920044,
      "learning_rate": 3.924149489124956e-05,
      "loss": 3.0273,
      "step": 192470
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.045100688934326,
      "learning_rate": 3.923947225752269e-05,
      "loss": 2.8887,
      "step": 192471
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.8075013160705566,
      "learning_rate": 3.92374496722762e-05,
      "loss": 3.0179,
      "step": 192472
    },
    {
      "epoch": 2.51,
      "grad_norm": 5.820893287658691,
      "learning_rate": 3.92354271355103e-05,
      "loss": 2.8844,
      "step": 192473
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.982602596282959,
      "learning_rate": 3.923340464722552e-05,
      "loss": 3.0817,
      "step": 192474
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1899895668029785,
      "learning_rate": 3.923138220742212e-05,
      "loss": 3.0399,
      "step": 192475
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.524498224258423,
      "learning_rate": 3.922935981610058e-05,
      "loss": 2.8294,
      "step": 192476
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.981544256210327,
      "learning_rate": 3.922733747326125e-05,
      "loss": 3.0164,
      "step": 192477
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9466476440429688,
      "learning_rate": 3.9225315178904485e-05,
      "loss": 2.8476,
      "step": 192478
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6852519512176514,
      "learning_rate": 3.922329293303056e-05,
      "loss": 2.9315,
      "step": 192479
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1991357803344727,
      "learning_rate": 3.922127073564002e-05,
      "loss": 3.0638,
      "step": 192480
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8999979496002197,
      "learning_rate": 3.92192485867331e-05,
      "loss": 2.8844,
      "step": 192481
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7665493488311768,
      "learning_rate": 3.9217226486310325e-05,
      "loss": 2.8293,
      "step": 192482
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.02583909034729,
      "learning_rate": 3.921520443437201e-05,
      "loss": 2.8114,
      "step": 192483
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5342724323272705,
      "learning_rate": 3.921318243091847e-05,
      "loss": 2.9652,
      "step": 192484
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.4732730388641357,
      "learning_rate": 3.9211160475950075e-05,
      "loss": 2.9851,
      "step": 192485
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.747026205062866,
      "learning_rate": 3.92091385694673e-05,
      "loss": 2.6793,
      "step": 192486
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7448010444641113,
      "learning_rate": 3.920711671147041e-05,
      "loss": 3.0318,
      "step": 192487
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.053767204284668,
      "learning_rate": 3.92050949019599e-05,
      "loss": 2.6674,
      "step": 192488
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5899710655212402,
      "learning_rate": 3.92030731409361e-05,
      "loss": 2.9489,
      "step": 192489
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.254263401031494,
      "learning_rate": 3.920105142839932e-05,
      "loss": 2.9344,
      "step": 192490
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.051429271697998,
      "learning_rate": 3.9199029764350024e-05,
      "loss": 2.8898,
      "step": 192491
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8265981674194336,
      "learning_rate": 3.919700814878857e-05,
      "loss": 2.8055,
      "step": 192492
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1698641777038574,
      "learning_rate": 3.9194986581715226e-05,
      "loss": 2.8796,
      "step": 192493
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9492099285125732,
      "learning_rate": 3.919296506313053e-05,
      "loss": 3.1221,
      "step": 192494
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5553202629089355,
      "learning_rate": 3.919094359303482e-05,
      "loss": 3.0568,
      "step": 192495
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.558748722076416,
      "learning_rate": 3.9188922171428315e-05,
      "loss": 2.7834,
      "step": 192496
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.7742533683776855,
      "learning_rate": 3.918690079831163e-05,
      "loss": 3.0912,
      "step": 192497
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.024606466293335,
      "learning_rate": 3.9184879473684914e-05,
      "loss": 2.8146,
      "step": 192498
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.116072654724121,
      "learning_rate": 3.918285819754875e-05,
      "loss": 2.7877,
      "step": 192499
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.701472282409668,
      "learning_rate": 3.918083696990339e-05,
      "loss": 2.9744,
      "step": 192500
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.363414764404297,
      "learning_rate": 3.9178815790749254e-05,
      "loss": 2.981,
      "step": 192501
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.625060558319092,
      "learning_rate": 3.9176794660086654e-05,
      "loss": 3.0187,
      "step": 192502
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.23745059967041,
      "learning_rate": 3.917477357791603e-05,
      "loss": 3.092,
      "step": 192503
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3291752338409424,
      "learning_rate": 3.917275254423772e-05,
      "loss": 3.0648,
      "step": 192504
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8127548694610596,
      "learning_rate": 3.9170731559052146e-05,
      "loss": 2.9889,
      "step": 192505
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8272619247436523,
      "learning_rate": 3.916871062235966e-05,
      "loss": 2.9598,
      "step": 192506
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.804182767868042,
      "learning_rate": 3.916668973416064e-05,
      "loss": 3.0097,
      "step": 192507
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8489127159118652,
      "learning_rate": 3.9164668894455395e-05,
      "loss": 2.9195,
      "step": 192508
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.728841781616211,
      "learning_rate": 3.9162648103244434e-05,
      "loss": 3.1613,
      "step": 192509
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.670178174972534,
      "learning_rate": 3.9160627360528007e-05,
      "loss": 2.7784,
      "step": 192510
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5492653846740723,
      "learning_rate": 3.9158606666306594e-05,
      "loss": 3.2092,
      "step": 192511
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5206713676452637,
      "learning_rate": 3.9156586020580446e-05,
      "loss": 2.6439,
      "step": 192512
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.497206449508667,
      "learning_rate": 3.915456542335015e-05,
      "loss": 2.7597,
      "step": 192513
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.108938694000244,
      "learning_rate": 3.9152544874615784e-05,
      "loss": 3.0611,
      "step": 192514
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7308709621429443,
      "learning_rate": 3.9150524374378e-05,
      "loss": 2.8739,
      "step": 192515
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.607370138168335,
      "learning_rate": 3.9148503922636954e-05,
      "loss": 3.0184,
      "step": 192516
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.0024566650390625,
      "learning_rate": 3.914648351939321e-05,
      "loss": 2.9359,
      "step": 192517
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.2942681312561035,
      "learning_rate": 3.914446316464698e-05,
      "loss": 2.6929,
      "step": 192518
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.197638750076294,
      "learning_rate": 3.9142442858398884e-05,
      "loss": 3.0523,
      "step": 192519
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3557755947113037,
      "learning_rate": 3.9140422600648955e-05,
      "loss": 2.9707,
      "step": 192520
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.314579486846924,
      "learning_rate": 3.913840239139784e-05,
      "loss": 3.0145,
      "step": 192521
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9885621070861816,
      "learning_rate": 3.9136382230645755e-05,
      "loss": 2.9817,
      "step": 192522
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.789783477783203,
      "learning_rate": 3.913436211839318e-05,
      "loss": 2.9917,
      "step": 192523
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.197939395904541,
      "learning_rate": 3.913234205464041e-05,
      "loss": 2.7458,
      "step": 192524
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7713847160339355,
      "learning_rate": 3.9130322039387975e-05,
      "loss": 2.8197,
      "step": 192525
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.078444480895996,
      "learning_rate": 3.912830207263601e-05,
      "loss": 3.0309,
      "step": 192526
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7008328437805176,
      "learning_rate": 3.912628215438511e-05,
      "loss": 3.0482,
      "step": 192527
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6152150630950928,
      "learning_rate": 3.912426228463546e-05,
      "loss": 2.9327,
      "step": 192528
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9055798053741455,
      "learning_rate": 3.9122242463387634e-05,
      "loss": 2.7424,
      "step": 192529
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9215705394744873,
      "learning_rate": 3.912022269064181e-05,
      "loss": 3.1281,
      "step": 192530
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.411869049072266,
      "learning_rate": 3.911820296639859e-05,
      "loss": 3.0863,
      "step": 192531
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3489668369293213,
      "learning_rate": 3.911618329065811e-05,
      "loss": 2.9404,
      "step": 192532
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1289455890655518,
      "learning_rate": 3.911416366342093e-05,
      "loss": 2.8989,
      "step": 192533
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9930529594421387,
      "learning_rate": 3.911214408468725e-05,
      "loss": 3.1671,
      "step": 192534
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7283639907836914,
      "learning_rate": 3.911012455445763e-05,
      "loss": 2.9883,
      "step": 192535
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2294023036956787,
      "learning_rate": 3.910810507273232e-05,
      "loss": 2.9365,
      "step": 192536
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3457186222076416,
      "learning_rate": 3.910608563951184e-05,
      "loss": 2.9877,
      "step": 192537
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.679482460021973,
      "learning_rate": 3.910406625479633e-05,
      "loss": 2.7912,
      "step": 192538
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9734086990356445,
      "learning_rate": 3.9102046918586386e-05,
      "loss": 2.6403,
      "step": 192539
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.0786895751953125,
      "learning_rate": 3.910002763088224e-05,
      "loss": 2.9922,
      "step": 192540
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.886228561401367,
      "learning_rate": 3.909800839168435e-05,
      "loss": 2.9421,
      "step": 192541
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8038218021392822,
      "learning_rate": 3.909598920099301e-05,
      "loss": 3.2386,
      "step": 192542
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.595003366470337,
      "learning_rate": 3.9093970058808786e-05,
      "loss": 2.6886,
      "step": 192543
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6036012172698975,
      "learning_rate": 3.9091950965131804e-05,
      "loss": 2.8325,
      "step": 192544
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.945939779281616,
      "learning_rate": 3.908993191996261e-05,
      "loss": 3.0418,
      "step": 192545
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9264326095581055,
      "learning_rate": 3.9087912923301456e-05,
      "loss": 3.0519,
      "step": 192546
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.7728638648986816,
      "learning_rate": 3.908589397514883e-05,
      "loss": 2.9287,
      "step": 192547
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.123180866241455,
      "learning_rate": 3.908387507550503e-05,
      "loss": 2.8209,
      "step": 192548
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6213741302490234,
      "learning_rate": 3.9081856224370567e-05,
      "loss": 2.7962,
      "step": 192549
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.4196360111236572,
      "learning_rate": 3.90798374217456e-05,
      "loss": 3.1705,
      "step": 192550
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7796099185943604,
      "learning_rate": 3.9077818667630654e-05,
      "loss": 2.643,
      "step": 192551
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.122567653656006,
      "learning_rate": 3.9075799962026045e-05,
      "loss": 2.6751,
      "step": 192552
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.6936607360839844,
      "learning_rate": 3.907378130493219e-05,
      "loss": 3.0254,
      "step": 192553
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.166335344314575,
      "learning_rate": 3.907176269634941e-05,
      "loss": 2.7793,
      "step": 192554
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0951826572418213,
      "learning_rate": 3.9069744136278247e-05,
      "loss": 2.8637,
      "step": 192555
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.118457555770874,
      "learning_rate": 3.906772562471878e-05,
      "loss": 3.0434,
      "step": 192556
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.003765106201172,
      "learning_rate": 3.906570716167165e-05,
      "loss": 3.0441,
      "step": 192557
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0472426414489746,
      "learning_rate": 3.9063688747137036e-05,
      "loss": 2.9274,
      "step": 192558
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.120173454284668,
      "learning_rate": 3.906167038111552e-05,
      "loss": 2.9869,
      "step": 192559
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6307835578918457,
      "learning_rate": 3.905965206360726e-05,
      "loss": 2.8284,
      "step": 192560
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6374967098236084,
      "learning_rate": 3.9057633794612796e-05,
      "loss": 2.7227,
      "step": 192561
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.4717094898223877,
      "learning_rate": 3.905561557413246e-05,
      "loss": 2.701,
      "step": 192562
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.908933162689209,
      "learning_rate": 3.9053597402166615e-05,
      "loss": 2.8645,
      "step": 192563
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.657139301300049,
      "learning_rate": 3.905157927871556e-05,
      "loss": 3.1898,
      "step": 192564
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.583420991897583,
      "learning_rate": 3.90495612037798e-05,
      "loss": 2.8424,
      "step": 192565
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.333850860595703,
      "learning_rate": 3.90475431773596e-05,
      "loss": 2.8326,
      "step": 192566
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9275691509246826,
      "learning_rate": 3.9045525199455455e-05,
      "loss": 3.1585,
      "step": 192567
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.171694040298462,
      "learning_rate": 3.9043507270067665e-05,
      "loss": 3.1749,
      "step": 192568
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9387412071228027,
      "learning_rate": 3.90414893891966e-05,
      "loss": 3.044,
      "step": 192569
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9211466312408447,
      "learning_rate": 3.9039471556842595e-05,
      "loss": 2.9236,
      "step": 192570
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.157104015350342,
      "learning_rate": 3.903745377300614e-05,
      "loss": 3.0226,
      "step": 192571
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.51749849319458,
      "learning_rate": 3.9035436037687485e-05,
      "loss": 3.1344,
      "step": 192572
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2352240085601807,
      "learning_rate": 3.9033418350887146e-05,
      "loss": 3.1095,
      "step": 192573
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1435911655426025,
      "learning_rate": 3.90314007126054e-05,
      "loss": 2.6666,
      "step": 192574
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9356484413146973,
      "learning_rate": 3.90293831228426e-05,
      "loss": 2.8861,
      "step": 192575
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.59560227394104,
      "learning_rate": 3.9027365581599204e-05,
      "loss": 2.8564,
      "step": 192576
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.896357774734497,
      "learning_rate": 3.9025348088875554e-05,
      "loss": 2.8665,
      "step": 192577
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8836398124694824,
      "learning_rate": 3.9023330644671957e-05,
      "loss": 2.9225,
      "step": 192578
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.4952781200408936,
      "learning_rate": 3.9021313248988914e-05,
      "loss": 2.7272,
      "step": 192579
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8877947330474854,
      "learning_rate": 3.9019295901826764e-05,
      "loss": 3.2636,
      "step": 192580
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.049221992492676,
      "learning_rate": 3.901727860318573e-05,
      "loss": 2.9314,
      "step": 192581
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.871943235397339,
      "learning_rate": 3.901526135306642e-05,
      "loss": 2.9773,
      "step": 192582
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9484260082244873,
      "learning_rate": 3.901324415146909e-05,
      "loss": 2.9068,
      "step": 192583
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.14288592338562,
      "learning_rate": 3.901122699839405e-05,
      "loss": 2.8831,
      "step": 192584
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.04789662361145,
      "learning_rate": 3.900920989384183e-05,
      "loss": 2.6243,
      "step": 192585
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7968294620513916,
      "learning_rate": 3.9007192837812695e-05,
      "loss": 3.0096,
      "step": 192586
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9266223907470703,
      "learning_rate": 3.900517583030701e-05,
      "loss": 3.0109,
      "step": 192587
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.603990316390991,
      "learning_rate": 3.900315887132525e-05,
      "loss": 3.0221,
      "step": 192588
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.673036813735962,
      "learning_rate": 3.9001141960867664e-05,
      "loss": 3.0009,
      "step": 192589
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2707393169403076,
      "learning_rate": 3.899912509893477e-05,
      "loss": 2.847,
      "step": 192590
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.296527624130249,
      "learning_rate": 3.899710828552686e-05,
      "loss": 2.8179,
      "step": 192591
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8512277603149414,
      "learning_rate": 3.8995091520644296e-05,
      "loss": 2.6112,
      "step": 192592
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9246625900268555,
      "learning_rate": 3.8993074804287416e-05,
      "loss": 2.7578,
      "step": 192593
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.869494915008545,
      "learning_rate": 3.899105813645672e-05,
      "loss": 3.0083,
      "step": 192594
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.337202310562134,
      "learning_rate": 3.898904151715244e-05,
      "loss": 2.8993,
      "step": 192595
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.5750653743743896,
      "learning_rate": 3.8987024946375076e-05,
      "loss": 2.9229,
      "step": 192596
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0727810859680176,
      "learning_rate": 3.898500842412493e-05,
      "loss": 3.102,
      "step": 192597
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.741685390472412,
      "learning_rate": 3.8982991950402495e-05,
      "loss": 2.9946,
      "step": 192598
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.868896007537842,
      "learning_rate": 3.898097552520791e-05,
      "loss": 3.1425,
      "step": 192599
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7144668102264404,
      "learning_rate": 3.897895914854178e-05,
      "loss": 2.8939,
      "step": 192600
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.26936936378479,
      "learning_rate": 3.897694282040432e-05,
      "loss": 3.0582,
      "step": 192601
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.124812364578247,
      "learning_rate": 3.8974926540796015e-05,
      "loss": 3.0521,
      "step": 192602
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5085902214050293,
      "learning_rate": 3.897291030971715e-05,
      "loss": 2.835,
      "step": 192603
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.041320323944092,
      "learning_rate": 3.8970894127168275e-05,
      "loss": 3.0398,
      "step": 192604
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0112075805664062,
      "learning_rate": 3.896887799314951e-05,
      "loss": 3.0674,
      "step": 192605
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.920511484146118,
      "learning_rate": 3.896686190766142e-05,
      "loss": 2.9886,
      "step": 192606
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.965822696685791,
      "learning_rate": 3.896484587070427e-05,
      "loss": 2.9668,
      "step": 192607
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2294633388519287,
      "learning_rate": 3.896282988227855e-05,
      "loss": 2.8018,
      "step": 192608
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1492345333099365,
      "learning_rate": 3.896081394238446e-05,
      "loss": 2.802,
      "step": 192609
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6514134407043457,
      "learning_rate": 3.8958798051022654e-05,
      "loss": 2.832,
      "step": 192610
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.759876012802124,
      "learning_rate": 3.8956782208193194e-05,
      "loss": 2.9878,
      "step": 192611
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5328192710876465,
      "learning_rate": 3.895476641389664e-05,
      "loss": 2.9962,
      "step": 192612
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.9927799701690674,
      "learning_rate": 3.895275066813327e-05,
      "loss": 2.8789,
      "step": 192613
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1418590545654297,
      "learning_rate": 3.895073497090357e-05,
      "loss": 2.8479,
      "step": 192614
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.320005416870117,
      "learning_rate": 3.8948719322207824e-05,
      "loss": 2.8565,
      "step": 192615
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.5731241703033447,
      "learning_rate": 3.894670372204655e-05,
      "loss": 2.9934,
      "step": 192616
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6565635204315186,
      "learning_rate": 3.894468817041988e-05,
      "loss": 3.0504,
      "step": 192617
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6771135330200195,
      "learning_rate": 3.894267266732839e-05,
      "loss": 2.8901,
      "step": 192618
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.420415878295898,
      "learning_rate": 3.8940657212772305e-05,
      "loss": 3.0225,
      "step": 192619
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.4630117416381836,
      "learning_rate": 3.893864180675214e-05,
      "loss": 2.75,
      "step": 192620
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.186422109603882,
      "learning_rate": 3.893662644926817e-05,
      "loss": 2.9554,
      "step": 192621
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.465361595153809,
      "learning_rate": 3.8934611140320916e-05,
      "loss": 3.1097,
      "step": 192622
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9409072399139404,
      "learning_rate": 3.893259587991053e-05,
      "loss": 3.0726,
      "step": 192623
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.428588390350342,
      "learning_rate": 3.893058066803756e-05,
      "loss": 2.8818,
      "step": 192624
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.692038059234619,
      "learning_rate": 3.892856550470226e-05,
      "loss": 2.7746,
      "step": 192625
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.521662473678589,
      "learning_rate": 3.892655038990513e-05,
      "loss": 2.804,
      "step": 192626
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0312917232513428,
      "learning_rate": 3.892453532364644e-05,
      "loss": 2.8895,
      "step": 192627
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8402836322784424,
      "learning_rate": 3.89225203059267e-05,
      "loss": 2.5653,
      "step": 192628
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0612471103668213,
      "learning_rate": 3.892050533674609e-05,
      "loss": 2.9624,
      "step": 192629
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.055722951889038,
      "learning_rate": 3.8918490416105155e-05,
      "loss": 3.0889,
      "step": 192630
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.745783567428589,
      "learning_rate": 3.891647554400412e-05,
      "loss": 2.6655,
      "step": 192631
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8146891593933105,
      "learning_rate": 3.89144607204435e-05,
      "loss": 2.8514,
      "step": 192632
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2209391593933105,
      "learning_rate": 3.891244594542354e-05,
      "loss": 2.8378,
      "step": 192633
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9436824321746826,
      "learning_rate": 3.891043121894483e-05,
      "loss": 3.172,
      "step": 192634
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.135664701461792,
      "learning_rate": 3.8908416541007484e-05,
      "loss": 3.0101,
      "step": 192635
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.628838062286377,
      "learning_rate": 3.890640191161204e-05,
      "loss": 2.6507,
      "step": 192636
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9016129970550537,
      "learning_rate": 3.890438733075873e-05,
      "loss": 2.9096,
      "step": 192637
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.048394680023193,
      "learning_rate": 3.890237279844813e-05,
      "loss": 2.9223,
      "step": 192638
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.7003629207611084,
      "learning_rate": 3.8900358314680434e-05,
      "loss": 2.8009,
      "step": 192639
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0908725261688232,
      "learning_rate": 3.889834387945614e-05,
      "loss": 2.8516,
      "step": 192640
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.7916743755340576,
      "learning_rate": 3.8896329492775615e-05,
      "loss": 2.8692,
      "step": 192641
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.842289924621582,
      "learning_rate": 3.889431515463916e-05,
      "loss": 2.9226,
      "step": 192642
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7084693908691406,
      "learning_rate": 3.88923008650471e-05,
      "loss": 3.1545,
      "step": 192643
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.787686586380005,
      "learning_rate": 3.8890286623999975e-05,
      "loss": 2.8942,
      "step": 192644
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.524005174636841,
      "learning_rate": 3.888827243149799e-05,
      "loss": 3.0578,
      "step": 192645
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0687527656555176,
      "learning_rate": 3.8886258287541705e-05,
      "loss": 3.1553,
      "step": 192646
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7481346130371094,
      "learning_rate": 3.8884244192131355e-05,
      "loss": 3.019,
      "step": 192647
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8370938301086426,
      "learning_rate": 3.888223014526737e-05,
      "loss": 2.7321,
      "step": 192648
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.789520740509033,
      "learning_rate": 3.888021614695005e-05,
      "loss": 3.006,
      "step": 192649
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1342971324920654,
      "learning_rate": 3.887820219717987e-05,
      "loss": 2.9342,
      "step": 192650
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.664271354675293,
      "learning_rate": 3.887618829595711e-05,
      "loss": 2.99,
      "step": 192651
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.792919874191284,
      "learning_rate": 3.8874174443282255e-05,
      "loss": 2.7334,
      "step": 192652
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6003289222717285,
      "learning_rate": 3.887216063915563e-05,
      "loss": 2.6808,
      "step": 192653
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.145627021789551,
      "learning_rate": 3.887014688357761e-05,
      "loss": 2.9457,
      "step": 192654
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0400004386901855,
      "learning_rate": 3.8868133176548485e-05,
      "loss": 3.1003,
      "step": 192655
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.7629334926605225,
      "learning_rate": 3.886611951806876e-05,
      "loss": 2.672,
      "step": 192656
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6577343940734863,
      "learning_rate": 3.88641059081387e-05,
      "loss": 3.0039,
      "step": 192657
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5941410064697266,
      "learning_rate": 3.886209234675877e-05,
      "loss": 3.0147,
      "step": 192658
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.784388542175293,
      "learning_rate": 3.886007883392933e-05,
      "loss": 2.652,
      "step": 192659
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.840019702911377,
      "learning_rate": 3.885806536965066e-05,
      "loss": 3.0817,
      "step": 192660
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.4002678394317627,
      "learning_rate": 3.885605195392328e-05,
      "loss": 2.8773,
      "step": 192661
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.684427499771118,
      "learning_rate": 3.885403858674747e-05,
      "loss": 3.07,
      "step": 192662
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.967787265777588,
      "learning_rate": 3.885202526812355e-05,
      "loss": 2.6266,
      "step": 192663
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.450840950012207,
      "learning_rate": 3.885001199805206e-05,
      "loss": 2.8461,
      "step": 192664
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.8457274436950684,
      "learning_rate": 3.8847998776533276e-05,
      "loss": 2.9699,
      "step": 192665
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0842318534851074,
      "learning_rate": 3.884598560356751e-05,
      "loss": 2.8843,
      "step": 192666
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.717846155166626,
      "learning_rate": 3.884397247915527e-05,
      "loss": 3.1433,
      "step": 192667
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7190420627593994,
      "learning_rate": 3.884195940329686e-05,
      "loss": 2.9578,
      "step": 192668
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0072944164276123,
      "learning_rate": 3.883994637599261e-05,
      "loss": 2.9732,
      "step": 192669
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1693742275238037,
      "learning_rate": 3.8837933397242995e-05,
      "loss": 2.755,
      "step": 192670
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5970451831817627,
      "learning_rate": 3.883592046704836e-05,
      "loss": 2.9318,
      "step": 192671
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6237831115722656,
      "learning_rate": 3.8833907585408966e-05,
      "loss": 2.9199,
      "step": 192672
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2395997047424316,
      "learning_rate": 3.8831894752325353e-05,
      "loss": 3.1326,
      "step": 192673
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1383941173553467,
      "learning_rate": 3.882988196779777e-05,
      "loss": 3.1538,
      "step": 192674
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9265265464782715,
      "learning_rate": 3.882786923182668e-05,
      "loss": 3.064,
      "step": 192675
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.4380085468292236,
      "learning_rate": 3.882585654441241e-05,
      "loss": 2.8844,
      "step": 192676
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.047478199005127,
      "learning_rate": 3.882384390555537e-05,
      "loss": 2.9836,
      "step": 192677
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7979280948638916,
      "learning_rate": 3.882183131525586e-05,
      "loss": 3.0851,
      "step": 192678
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6198925971984863,
      "learning_rate": 3.8819818773514336e-05,
      "loss": 2.7586,
      "step": 192679
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.287966012954712,
      "learning_rate": 3.8817806280331066e-05,
      "loss": 3.0688,
      "step": 192680
    },
    {
      "epoch": 2.51,
      "grad_norm": 5.939713478088379,
      "learning_rate": 3.881579383570659e-05,
      "loss": 2.7593,
      "step": 192681
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.631859302520752,
      "learning_rate": 3.88137814396411e-05,
      "loss": 2.6629,
      "step": 192682
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.602524518966675,
      "learning_rate": 3.8811769092135204e-05,
      "loss": 2.9898,
      "step": 192683
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.459205389022827,
      "learning_rate": 3.8809756793188996e-05,
      "loss": 3.057,
      "step": 192684
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.7344982624053955,
      "learning_rate": 3.8807744542803044e-05,
      "loss": 2.7336,
      "step": 192685
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.681253671646118,
      "learning_rate": 3.8805732340977613e-05,
      "loss": 2.7376,
      "step": 192686
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7086398601531982,
      "learning_rate": 3.880372018771317e-05,
      "loss": 3.0678,
      "step": 192687
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.2186279296875,
      "learning_rate": 3.880170808300999e-05,
      "loss": 2.8539,
      "step": 192688
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.0805277824401855,
      "learning_rate": 3.879969602686862e-05,
      "loss": 2.7933,
      "step": 192689
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5493383407592773,
      "learning_rate": 3.879768401928921e-05,
      "loss": 3.0775,
      "step": 192690
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.395932674407959,
      "learning_rate": 3.8795672060272285e-05,
      "loss": 2.8956,
      "step": 192691
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.263062000274658,
      "learning_rate": 3.879366014981815e-05,
      "loss": 2.8123,
      "step": 192692
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7981626987457275,
      "learning_rate": 3.8791648287927235e-05,
      "loss": 2.9362,
      "step": 192693
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.680112361907959,
      "learning_rate": 3.878963647459981e-05,
      "loss": 2.817,
      "step": 192694
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.340106725692749,
      "learning_rate": 3.878762470983646e-05,
      "loss": 2.7884,
      "step": 192695
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.022689342498779,
      "learning_rate": 3.8785612993637304e-05,
      "loss": 2.6869,
      "step": 192696
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7815163135528564,
      "learning_rate": 3.87836013260029e-05,
      "loss": 3.1165,
      "step": 192697
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.152923583984375,
      "learning_rate": 3.878158970693348e-05,
      "loss": 2.9782,
      "step": 192698
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.90984845161438,
      "learning_rate": 3.877957813642955e-05,
      "loss": 2.8341,
      "step": 192699
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9478375911712646,
      "learning_rate": 3.8777566614491395e-05,
      "loss": 2.8757,
      "step": 192700
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1343257427215576,
      "learning_rate": 3.877555514111953e-05,
      "loss": 3.1189,
      "step": 192701
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6609385013580322,
      "learning_rate": 3.877354371631408e-05,
      "loss": 2.7451,
      "step": 192702
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.636425495147705,
      "learning_rate": 3.877153234007562e-05,
      "loss": 2.921,
      "step": 192703
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.804950714111328,
      "learning_rate": 3.876952101240444e-05,
      "loss": 2.9314,
      "step": 192704
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1639244556427,
      "learning_rate": 3.8767509733300975e-05,
      "loss": 2.9517,
      "step": 192705
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.941659450531006,
      "learning_rate": 3.8765498502765494e-05,
      "loss": 2.9781,
      "step": 192706
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.077885866165161,
      "learning_rate": 3.876348732079849e-05,
      "loss": 2.6551,
      "step": 192707
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.4568822383880615,
      "learning_rate": 3.8761476187400306e-05,
      "loss": 2.8092,
      "step": 192708
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.694258689880371,
      "learning_rate": 3.875946510257128e-05,
      "loss": 2.8318,
      "step": 192709
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6104509830474854,
      "learning_rate": 3.8757454066311753e-05,
      "loss": 3.0105,
      "step": 192710
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8746485710144043,
      "learning_rate": 3.8755443078622184e-05,
      "loss": 2.7371,
      "step": 192711
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.028444766998291,
      "learning_rate": 3.875343213950286e-05,
      "loss": 2.8895,
      "step": 192712
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5778276920318604,
      "learning_rate": 3.8751421248954286e-05,
      "loss": 2.9521,
      "step": 192713
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3830151557922363,
      "learning_rate": 3.874941040697675e-05,
      "loss": 2.9166,
      "step": 192714
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.857272148132324,
      "learning_rate": 3.874739961357061e-05,
      "loss": 3.0271,
      "step": 192715
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.506087303161621,
      "learning_rate": 3.8745388868736205e-05,
      "loss": 2.7688,
      "step": 192716
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3071954250335693,
      "learning_rate": 3.874337817247401e-05,
      "loss": 3.0504,
      "step": 192717
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9032745361328125,
      "learning_rate": 3.874136752478434e-05,
      "loss": 3.1527,
      "step": 192718
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6969728469848633,
      "learning_rate": 3.873935692566761e-05,
      "loss": 2.6715,
      "step": 192719
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8055598735809326,
      "learning_rate": 3.873734637512416e-05,
      "loss": 2.7848,
      "step": 192720
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0400753021240234,
      "learning_rate": 3.8735335873154384e-05,
      "loss": 2.8991,
      "step": 192721
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9988086223602295,
      "learning_rate": 3.873332541975855e-05,
      "loss": 3.0484,
      "step": 192722
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.345877170562744,
      "learning_rate": 3.873131501493724e-05,
      "loss": 2.8331,
      "step": 192723
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.4455573558807373,
      "learning_rate": 3.87293046586906e-05,
      "loss": 2.7173,
      "step": 192724
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.211594820022583,
      "learning_rate": 3.87272943510192e-05,
      "loss": 2.8763,
      "step": 192725
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.585721969604492,
      "learning_rate": 3.872528409192331e-05,
      "loss": 2.9424,
      "step": 192726
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.577305793762207,
      "learning_rate": 3.872327388140334e-05,
      "loss": 2.7259,
      "step": 192727
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.540342092514038,
      "learning_rate": 3.872126371945957e-05,
      "loss": 2.5913,
      "step": 192728
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6928675174713135,
      "learning_rate": 3.871925360609251e-05,
      "loss": 3.1154,
      "step": 192729
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.500720500946045,
      "learning_rate": 3.87172435413024e-05,
      "loss": 3.1275,
      "step": 192730
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.3830273151397705,
      "learning_rate": 3.871523352508976e-05,
      "loss": 3.2847,
      "step": 192731
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.759031295776367,
      "learning_rate": 3.87132235574549e-05,
      "loss": 3.014,
      "step": 192732
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5158958435058594,
      "learning_rate": 3.8711213638398173e-05,
      "loss": 3.0701,
      "step": 192733
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.881479024887085,
      "learning_rate": 3.870920376791993e-05,
      "loss": 3.0044,
      "step": 192734
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5570273399353027,
      "learning_rate": 3.870719394602062e-05,
      "loss": 3.0948,
      "step": 192735
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.152665853500366,
      "learning_rate": 3.870518417270049e-05,
      "loss": 2.8337,
      "step": 192736
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3094990253448486,
      "learning_rate": 3.87031744479601e-05,
      "loss": 2.8966,
      "step": 192737
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.98098087310791,
      "learning_rate": 3.8701164771799674e-05,
      "loss": 2.8196,
      "step": 192738
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2911527156829834,
      "learning_rate": 3.8699155144219666e-05,
      "loss": 3.0863,
      "step": 192739
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.094467878341675,
      "learning_rate": 3.869714556522036e-05,
      "loss": 2.9785,
      "step": 192740
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1035032272338867,
      "learning_rate": 3.869513603480225e-05,
      "loss": 2.7219,
      "step": 192741
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7715365886688232,
      "learning_rate": 3.869312655296556e-05,
      "loss": 2.7736,
      "step": 192742
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8317244052886963,
      "learning_rate": 3.8691117119710834e-05,
      "loss": 3.1021,
      "step": 192743
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.552694082260132,
      "learning_rate": 3.868910773503835e-05,
      "loss": 2.7446,
      "step": 192744
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.561824083328247,
      "learning_rate": 3.86870983989485e-05,
      "loss": 2.7655,
      "step": 192745
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8819382190704346,
      "learning_rate": 3.8685089111441594e-05,
      "loss": 2.6956,
      "step": 192746
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.948129177093506,
      "learning_rate": 3.868307987251812e-05,
      "loss": 2.6125,
      "step": 192747
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9164199829101562,
      "learning_rate": 3.8681070682178316e-05,
      "loss": 2.799,
      "step": 192748
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.525125026702881,
      "learning_rate": 3.8679061540422715e-05,
      "loss": 2.7705,
      "step": 192749
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.711667060852051,
      "learning_rate": 3.867705244725162e-05,
      "loss": 2.9473,
      "step": 192750
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.2252275943756104,
      "learning_rate": 3.867504340266533e-05,
      "loss": 2.9564,
      "step": 192751
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8281266689300537,
      "learning_rate": 3.8673034406664306e-05,
      "loss": 2.9616,
      "step": 192752
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.879855155944824,
      "learning_rate": 3.867102545924895e-05,
      "loss": 2.9722,
      "step": 192753
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6715657711029053,
      "learning_rate": 3.8669016560419464e-05,
      "loss": 2.8923,
      "step": 192754
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8059215545654297,
      "learning_rate": 3.8667007710176445e-05,
      "loss": 2.7922,
      "step": 192755
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.558377265930176,
      "learning_rate": 3.8664998908520164e-05,
      "loss": 2.8588,
      "step": 192756
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.558463096618652,
      "learning_rate": 3.8662990155450914e-05,
      "loss": 2.8804,
      "step": 192757
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.909111499786377,
      "learning_rate": 3.86609814509692e-05,
      "loss": 2.8257,
      "step": 192758
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.747019052505493,
      "learning_rate": 3.865897279507528e-05,
      "loss": 3.1905,
      "step": 192759
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.847848415374756,
      "learning_rate": 3.86569641877697e-05,
      "loss": 2.8921,
      "step": 192760
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5121283531188965,
      "learning_rate": 3.865495562905269e-05,
      "loss": 2.955,
      "step": 192761
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.1843953132629395,
      "learning_rate": 3.865294711892464e-05,
      "loss": 2.9069,
      "step": 192762
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.722273349761963,
      "learning_rate": 3.8650938657385886e-05,
      "loss": 2.9318,
      "step": 192763
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.415238380432129,
      "learning_rate": 3.8648930244436935e-05,
      "loss": 3.0007,
      "step": 192764
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8043136596679688,
      "learning_rate": 3.864692188007802e-05,
      "loss": 2.8887,
      "step": 192765
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2139835357666016,
      "learning_rate": 3.8644913564309635e-05,
      "loss": 2.4806,
      "step": 192766
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5304794311523438,
      "learning_rate": 3.864290529713208e-05,
      "loss": 2.9062,
      "step": 192767
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0639710426330566,
      "learning_rate": 3.8640897078545755e-05,
      "loss": 2.8333,
      "step": 192768
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8374884128570557,
      "learning_rate": 3.863888890855096e-05,
      "loss": 2.975,
      "step": 192769
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.73988676071167,
      "learning_rate": 3.863688078714816e-05,
      "loss": 2.7849,
      "step": 192770
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9286372661590576,
      "learning_rate": 3.863487271433766e-05,
      "loss": 2.5609,
      "step": 192771
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9971976280212402,
      "learning_rate": 3.863286469011993e-05,
      "loss": 3.1693,
      "step": 192772
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.960458755493164,
      "learning_rate": 3.863085671449525e-05,
      "loss": 3.0113,
      "step": 192773
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1802151203155518,
      "learning_rate": 3.862884878746404e-05,
      "loss": 2.9664,
      "step": 192774
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.734433174133301,
      "learning_rate": 3.862684090902669e-05,
      "loss": 3.1895,
      "step": 192775
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8856849670410156,
      "learning_rate": 3.862483307918354e-05,
      "loss": 3.1356,
      "step": 192776
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6866939067840576,
      "learning_rate": 3.862282529793491e-05,
      "loss": 2.749,
      "step": 192777
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8418970108032227,
      "learning_rate": 3.862081756528128e-05,
      "loss": 3.0837,
      "step": 192778
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.838998794555664,
      "learning_rate": 3.861880988122291e-05,
      "loss": 2.8707,
      "step": 192779
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.678466320037842,
      "learning_rate": 3.861680224576034e-05,
      "loss": 2.894,
      "step": 192780
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.684321880340576,
      "learning_rate": 3.861479465889379e-05,
      "loss": 2.9211,
      "step": 192781
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.207159996032715,
      "learning_rate": 3.86127871206237e-05,
      "loss": 2.9996,
      "step": 192782
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.158652305603027,
      "learning_rate": 3.861077963095037e-05,
      "loss": 3.0398,
      "step": 192783
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0103702545166016,
      "learning_rate": 3.8608772189874306e-05,
      "loss": 2.978,
      "step": 192784
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1196041107177734,
      "learning_rate": 3.860676479739573e-05,
      "loss": 3.0527,
      "step": 192785
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.762859344482422,
      "learning_rate": 3.8604757453515144e-05,
      "loss": 3.1939,
      "step": 192786
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.233950138092041,
      "learning_rate": 3.8602750158232855e-05,
      "loss": 2.7449,
      "step": 192787
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.317368984222412,
      "learning_rate": 3.8600742911549285e-05,
      "loss": 2.9429,
      "step": 192788
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.024165630340576,
      "learning_rate": 3.859873571346468e-05,
      "loss": 3.0207,
      "step": 192789
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.671971082687378,
      "learning_rate": 3.8596728563979594e-05,
      "loss": 2.9905,
      "step": 192790
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.017063617706299,
      "learning_rate": 3.859472146309423e-05,
      "loss": 3.0795,
      "step": 192791
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.580685615539551,
      "learning_rate": 3.85927144108091e-05,
      "loss": 3.1184,
      "step": 192792
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.471390247344971,
      "learning_rate": 3.8590707407124525e-05,
      "loss": 2.9285,
      "step": 192793
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.119645118713379,
      "learning_rate": 3.858870045204087e-05,
      "loss": 3.0726,
      "step": 192794
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.673701763153076,
      "learning_rate": 3.8586693545558467e-05,
      "loss": 2.9841,
      "step": 192795
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.5749168395996094,
      "learning_rate": 3.858468668767776e-05,
      "loss": 2.8535,
      "step": 192796
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.801077127456665,
      "learning_rate": 3.858267987839907e-05,
      "loss": 2.8606,
      "step": 192797
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0362532138824463,
      "learning_rate": 3.8580673117722835e-05,
      "loss": 3.1098,
      "step": 192798
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6233956813812256,
      "learning_rate": 3.857866640564936e-05,
      "loss": 3.1121,
      "step": 192799
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.858898401260376,
      "learning_rate": 3.8576659742179105e-05,
      "loss": 2.8544,
      "step": 192800
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1309337615966797,
      "learning_rate": 3.857465312731227e-05,
      "loss": 2.9631,
      "step": 192801
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.133723258972168,
      "learning_rate": 3.8572646561049427e-05,
      "loss": 3.1225,
      "step": 192802
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3274526596069336,
      "learning_rate": 3.8570640043390797e-05,
      "loss": 2.9457,
      "step": 192803
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1819028854370117,
      "learning_rate": 3.8568633574336896e-05,
      "loss": 2.9009,
      "step": 192804
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.046271562576294,
      "learning_rate": 3.856662715388801e-05,
      "loss": 2.9628,
      "step": 192805
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.916419744491577,
      "learning_rate": 3.856462078204451e-05,
      "loss": 3.1546,
      "step": 192806
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1799662113189697,
      "learning_rate": 3.8562614458806726e-05,
      "loss": 2.9089,
      "step": 192807
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.2713165283203125,
      "learning_rate": 3.856060818417517e-05,
      "loss": 3.0224,
      "step": 192808
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8379764556884766,
      "learning_rate": 3.855860195815006e-05,
      "loss": 2.9654,
      "step": 192809
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3242099285125732,
      "learning_rate": 3.85565957807319e-05,
      "loss": 2.9644,
      "step": 192810
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8435218334198,
      "learning_rate": 3.8554589651921e-05,
      "loss": 2.8728,
      "step": 192811
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7636568546295166,
      "learning_rate": 3.855258357171774e-05,
      "loss": 2.6143,
      "step": 192812
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.456620693206787,
      "learning_rate": 3.855057754012244e-05,
      "loss": 2.9403,
      "step": 192813
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.650900363922119,
      "learning_rate": 3.8548571557135555e-05,
      "loss": 3.0299,
      "step": 192814
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6405463218688965,
      "learning_rate": 3.8546565622757385e-05,
      "loss": 2.8188,
      "step": 192815
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.4711029529571533,
      "learning_rate": 3.8544559736988403e-05,
      "loss": 2.9204,
      "step": 192816
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.867664337158203,
      "learning_rate": 3.8542553899828935e-05,
      "loss": 2.8842,
      "step": 192817
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.394345283508301,
      "learning_rate": 3.854054811127931e-05,
      "loss": 2.7658,
      "step": 192818
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.174601078033447,
      "learning_rate": 3.853854237133987e-05,
      "loss": 2.9858,
      "step": 192819
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.102768659591675,
      "learning_rate": 3.853653668001115e-05,
      "loss": 2.952,
      "step": 192820
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7901735305786133,
      "learning_rate": 3.853453103729335e-05,
      "loss": 3.0547,
      "step": 192821
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.328169345855713,
      "learning_rate": 3.8532525443186956e-05,
      "loss": 2.933,
      "step": 192822
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.4556233882904053,
      "learning_rate": 3.853051989769231e-05,
      "loss": 3.0259,
      "step": 192823
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.1167426109313965,
      "learning_rate": 3.8528514400809784e-05,
      "loss": 2.9046,
      "step": 192824
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6513047218322754,
      "learning_rate": 3.85265089525397e-05,
      "loss": 2.7304,
      "step": 192825
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6180524826049805,
      "learning_rate": 3.85245035528825e-05,
      "loss": 2.8206,
      "step": 192826
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.4784841537475586,
      "learning_rate": 3.852249820183848e-05,
      "loss": 3.1949,
      "step": 192827
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.574082612991333,
      "learning_rate": 3.852049289940814e-05,
      "loss": 3.0244,
      "step": 192828
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.900825023651123,
      "learning_rate": 3.851848764559178e-05,
      "loss": 2.8583,
      "step": 192829
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.893462657928467,
      "learning_rate": 3.851648244038973e-05,
      "loss": 2.9324,
      "step": 192830
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5107834339141846,
      "learning_rate": 3.8514477283802356e-05,
      "loss": 3.0846,
      "step": 192831
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1363022327423096,
      "learning_rate": 3.8512472175830167e-05,
      "loss": 2.9397,
      "step": 192832
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.203441858291626,
      "learning_rate": 3.851046711647335e-05,
      "loss": 3.0972,
      "step": 192833
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.38373064994812,
      "learning_rate": 3.8508462105732455e-05,
      "loss": 2.6666,
      "step": 192834
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8113043308258057,
      "learning_rate": 3.850645714360776e-05,
      "loss": 2.9205,
      "step": 192835
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.584444761276245,
      "learning_rate": 3.850445223009958e-05,
      "loss": 3.0495,
      "step": 192836
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.741823196411133,
      "learning_rate": 3.850244736520844e-05,
      "loss": 2.7643,
      "step": 192837
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8486647605895996,
      "learning_rate": 3.850044254893465e-05,
      "loss": 3.08,
      "step": 192838
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.240972995758057,
      "learning_rate": 3.849843778127847e-05,
      "loss": 3.1139,
      "step": 192839
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9778923988342285,
      "learning_rate": 3.849643306224043e-05,
      "loss": 3.0578,
      "step": 192840
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.119715452194214,
      "learning_rate": 3.84944283918208e-05,
      "loss": 3.0983,
      "step": 192841
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2030763626098633,
      "learning_rate": 3.849242377002001e-05,
      "loss": 2.8635,
      "step": 192842
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.4971790313720703,
      "learning_rate": 3.849041919683846e-05,
      "loss": 2.9732,
      "step": 192843
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2415616512298584,
      "learning_rate": 3.848841467227639e-05,
      "loss": 3.0785,
      "step": 192844
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6796858310699463,
      "learning_rate": 3.848641019633433e-05,
      "loss": 2.9309,
      "step": 192845
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.073408603668213,
      "learning_rate": 3.8484405769012574e-05,
      "loss": 2.6301,
      "step": 192846
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9140939712524414,
      "learning_rate": 3.848240139031146e-05,
      "loss": 2.6667,
      "step": 192847
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8023300170898438,
      "learning_rate": 3.848039706023145e-05,
      "loss": 2.8233,
      "step": 192848
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.163114309310913,
      "learning_rate": 3.8478392778772885e-05,
      "loss": 2.8189,
      "step": 192849
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.473695993423462,
      "learning_rate": 3.8476388545936055e-05,
      "loss": 2.9694,
      "step": 192850
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.767011880874634,
      "learning_rate": 3.8474384361721465e-05,
      "loss": 2.7622,
      "step": 192851
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.213778257369995,
      "learning_rate": 3.847238022612942e-05,
      "loss": 3.2018,
      "step": 192852
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.720898389816284,
      "learning_rate": 3.847037613916024e-05,
      "loss": 2.9042,
      "step": 192853
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2735118865966797,
      "learning_rate": 3.8468372100814406e-05,
      "loss": 2.9757,
      "step": 192854
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.03645658493042,
      "learning_rate": 3.846636811109224e-05,
      "loss": 3.3189,
      "step": 192855
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.4769020080566406,
      "learning_rate": 3.846436416999404e-05,
      "loss": 3.3554,
      "step": 192856
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.949389696121216,
      "learning_rate": 3.846236027752032e-05,
      "loss": 2.8052,
      "step": 192857
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.5075953006744385,
      "learning_rate": 3.8460356433671336e-05,
      "loss": 2.9539,
      "step": 192858
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1497793197631836,
      "learning_rate": 3.845835263844755e-05,
      "loss": 2.7701,
      "step": 192859
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.5172483921051025,
      "learning_rate": 3.845634889184934e-05,
      "loss": 3.0274,
      "step": 192860
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.8099637031555176,
      "learning_rate": 3.8454345193877004e-05,
      "loss": 2.989,
      "step": 192861
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2285068035125732,
      "learning_rate": 3.845234154453086e-05,
      "loss": 2.8872,
      "step": 192862
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.5041351318359375,
      "learning_rate": 3.845033794381143e-05,
      "loss": 2.898,
      "step": 192863
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.904981851577759,
      "learning_rate": 3.844833439171896e-05,
      "loss": 2.9114,
      "step": 192864
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.958960771560669,
      "learning_rate": 3.844633088825396e-05,
      "loss": 2.9667,
      "step": 192865
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.5964198112487793,
      "learning_rate": 3.844432743341673e-05,
      "loss": 2.8778,
      "step": 192866
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1797258853912354,
      "learning_rate": 3.844232402720761e-05,
      "loss": 2.9179,
      "step": 192867
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.743196487426758,
      "learning_rate": 3.8440320669626945e-05,
      "loss": 2.898,
      "step": 192868
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.259711742401123,
      "learning_rate": 3.843831736067522e-05,
      "loss": 2.691,
      "step": 192869
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6430044174194336,
      "learning_rate": 3.84363141003527e-05,
      "loss": 2.8494,
      "step": 192870
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.527322292327881,
      "learning_rate": 3.8434310888659866e-05,
      "loss": 2.6648,
      "step": 192871
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8357129096984863,
      "learning_rate": 3.8432307725597046e-05,
      "loss": 2.9612,
      "step": 192872
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3321592807769775,
      "learning_rate": 3.843030461116458e-05,
      "loss": 2.7391,
      "step": 192873
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.4503798484802246,
      "learning_rate": 3.8428301545362795e-05,
      "loss": 2.8871,
      "step": 192874
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.520582437515259,
      "learning_rate": 3.8426298528192164e-05,
      "loss": 3.0297,
      "step": 192875
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.389394521713257,
      "learning_rate": 3.842429555965301e-05,
      "loss": 2.9595,
      "step": 192876
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9268605709075928,
      "learning_rate": 3.842229263974574e-05,
      "loss": 2.8324,
      "step": 192877
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.4841136932373047,
      "learning_rate": 3.8420289768470715e-05,
      "loss": 3.189,
      "step": 192878
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6100521087646484,
      "learning_rate": 3.8418286945828315e-05,
      "loss": 2.6891,
      "step": 192879
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1161434650421143,
      "learning_rate": 3.8416284171818814e-05,
      "loss": 2.7703,
      "step": 192880
    },
    {
      "epoch": 2.51,
      "grad_norm": 5.895283222198486,
      "learning_rate": 3.8414281446442705e-05,
      "loss": 3.139,
      "step": 192881
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.913217782974243,
      "learning_rate": 3.84122787697003e-05,
      "loss": 2.8178,
      "step": 192882
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0831241607666016,
      "learning_rate": 3.841027614159202e-05,
      "loss": 3.0407,
      "step": 192883
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.938377618789673,
      "learning_rate": 3.8408273562118205e-05,
      "loss": 2.8865,
      "step": 192884
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.37581205368042,
      "learning_rate": 3.840627103127926e-05,
      "loss": 2.9206,
      "step": 192885
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3045654296875,
      "learning_rate": 3.8404268549075434e-05,
      "loss": 2.7676,
      "step": 192886
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5783815383911133,
      "learning_rate": 3.840226611550727e-05,
      "loss": 2.9216,
      "step": 192887
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0204546451568604,
      "learning_rate": 3.840026373057501e-05,
      "loss": 3.017,
      "step": 192888
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.7198996543884277,
      "learning_rate": 3.839826139427914e-05,
      "loss": 2.8032,
      "step": 192889
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.022218704223633,
      "learning_rate": 3.8396259106619965e-05,
      "loss": 2.8079,
      "step": 192890
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.694303035736084,
      "learning_rate": 3.839425686759785e-05,
      "loss": 2.73,
      "step": 192891
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.581939935684204,
      "learning_rate": 3.839225467721313e-05,
      "loss": 2.9981,
      "step": 192892
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8371520042419434,
      "learning_rate": 3.8390252535466304e-05,
      "loss": 3.0735,
      "step": 192893
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7253832817077637,
      "learning_rate": 3.838825044235757e-05,
      "loss": 2.9613,
      "step": 192894
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0264806747436523,
      "learning_rate": 3.83862483978875e-05,
      "loss": 2.9027,
      "step": 192895
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.758465051651001,
      "learning_rate": 3.838424640205635e-05,
      "loss": 2.9503,
      "step": 192896
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0066864490509033,
      "learning_rate": 3.83822444548645e-05,
      "loss": 2.9605,
      "step": 192897
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.838613986968994,
      "learning_rate": 3.838024255631227e-05,
      "loss": 2.9732,
      "step": 192898
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.409485101699829,
      "learning_rate": 3.837824070640013e-05,
      "loss": 2.9924,
      "step": 192899
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.571593999862671,
      "learning_rate": 3.837623890512839e-05,
      "loss": 2.9004,
      "step": 192900
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.857630729675293,
      "learning_rate": 3.83742371524975e-05,
      "loss": 3.0363,
      "step": 192901
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7385332584381104,
      "learning_rate": 3.837223544850777e-05,
      "loss": 2.9253,
      "step": 192902
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0255870819091797,
      "learning_rate": 3.837023379315957e-05,
      "loss": 3.0084,
      "step": 192903
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.477264165878296,
      "learning_rate": 3.8368232186453253e-05,
      "loss": 2.7437,
      "step": 192904
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.4389097690582275,
      "learning_rate": 3.836623062838926e-05,
      "loss": 3.0771,
      "step": 192905
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.4759247303009033,
      "learning_rate": 3.836422911896786e-05,
      "loss": 2.8049,
      "step": 192906
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.77699875831604,
      "learning_rate": 3.836222765818955e-05,
      "loss": 3.0307,
      "step": 192907
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.6639180183410645,
      "learning_rate": 3.836022624605456e-05,
      "loss": 3.1022,
      "step": 192908
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0302839279174805,
      "learning_rate": 3.835822488256349e-05,
      "loss": 2.9429,
      "step": 192909
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.5674612522125244,
      "learning_rate": 3.835622356771645e-05,
      "loss": 2.8477,
      "step": 192910
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8976123332977295,
      "learning_rate": 3.8354222301513995e-05,
      "loss": 2.921,
      "step": 192911
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.905393123626709,
      "learning_rate": 3.8352221083956326e-05,
      "loss": 3.0062,
      "step": 192912
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.863727569580078,
      "learning_rate": 3.835021991504401e-05,
      "loss": 2.9271,
      "step": 192913
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.7670929431915283,
      "learning_rate": 3.834821879477725e-05,
      "loss": 3.0746,
      "step": 192914
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8424885272979736,
      "learning_rate": 3.8346217723156644e-05,
      "loss": 3.1381,
      "step": 192915
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9693408012390137,
      "learning_rate": 3.8344216700182264e-05,
      "loss": 2.9182,
      "step": 192916
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.102122783660889,
      "learning_rate": 3.834221572585473e-05,
      "loss": 3.084,
      "step": 192917
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8249013423919678,
      "learning_rate": 3.834021480017422e-05,
      "loss": 2.8388,
      "step": 192918
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.9787051677703857,
      "learning_rate": 3.833821392314126e-05,
      "loss": 2.8453,
      "step": 192919
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1836464405059814,
      "learning_rate": 3.833621309475612e-05,
      "loss": 2.9734,
      "step": 192920
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0162734985351562,
      "learning_rate": 3.8334212315019294e-05,
      "loss": 2.9867,
      "step": 192921
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.4716076850891113,
      "learning_rate": 3.833221158393106e-05,
      "loss": 2.8456,
      "step": 192922
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.141190767288208,
      "learning_rate": 3.833021090149181e-05,
      "loss": 3.0593,
      "step": 192923
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7486677169799805,
      "learning_rate": 3.832821026770184e-05,
      "loss": 3.0573,
      "step": 192924
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.690059661865234,
      "learning_rate": 3.832620968256169e-05,
      "loss": 2.7777,
      "step": 192925
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.916459083557129,
      "learning_rate": 3.832420914607156e-05,
      "loss": 2.6376,
      "step": 192926
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8326244354248047,
      "learning_rate": 3.8322208658231945e-05,
      "loss": 3.1246,
      "step": 192927
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3504343032836914,
      "learning_rate": 3.832020821904318e-05,
      "loss": 2.8687,
      "step": 192928
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.661684989929199,
      "learning_rate": 3.8318207828505634e-05,
      "loss": 2.9088,
      "step": 192929
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1523749828338623,
      "learning_rate": 3.83162074866196e-05,
      "loss": 2.9911,
      "step": 192930
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.990642786026001,
      "learning_rate": 3.831420719338558e-05,
      "loss": 2.9931,
      "step": 192931
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.6323623657226562,
      "learning_rate": 3.8312206948803846e-05,
      "loss": 2.5179,
      "step": 192932
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.554443597793579,
      "learning_rate": 3.831020675287486e-05,
      "loss": 2.8256,
      "step": 192933
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.922797679901123,
      "learning_rate": 3.8308206605598955e-05,
      "loss": 3.0882,
      "step": 192934
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.486562252044678,
      "learning_rate": 3.830620650697643e-05,
      "loss": 3.0236,
      "step": 192935
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.021970748901367,
      "learning_rate": 3.830420645700779e-05,
      "loss": 2.9389,
      "step": 192936
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.75457501411438,
      "learning_rate": 3.830220645569333e-05,
      "loss": 3.0215,
      "step": 192937
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7059566974639893,
      "learning_rate": 3.830020650303338e-05,
      "loss": 3.1755,
      "step": 192938
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.380075454711914,
      "learning_rate": 3.829820659902841e-05,
      "loss": 3.0719,
      "step": 192939
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.4290554523468018,
      "learning_rate": 3.829620674367872e-05,
      "loss": 3.1255,
      "step": 192940
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.48006272315979,
      "learning_rate": 3.829420693698468e-05,
      "loss": 3.0421,
      "step": 192941
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.818399667739868,
      "learning_rate": 3.8292207178946754e-05,
      "loss": 2.9931,
      "step": 192942
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8693277835845947,
      "learning_rate": 3.8290207469565136e-05,
      "loss": 3.1763,
      "step": 192943
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.386796712875366,
      "learning_rate": 3.828820780884043e-05,
      "loss": 3.1133,
      "step": 192944
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7180793285369873,
      "learning_rate": 3.8286208196772836e-05,
      "loss": 3.0101,
      "step": 192945
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0547525882720947,
      "learning_rate": 3.8284208633362825e-05,
      "loss": 3.0931,
      "step": 192946
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.7536942958831787,
      "learning_rate": 3.8282209118610615e-05,
      "loss": 2.9856,
      "step": 192947
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8946430683135986,
      "learning_rate": 3.8280209652516755e-05,
      "loss": 3.0395,
      "step": 192948
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.706956148147583,
      "learning_rate": 3.827821023508151e-05,
      "loss": 2.7442,
      "step": 192949
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0004255771636963,
      "learning_rate": 3.8276210866305303e-05,
      "loss": 2.8797,
      "step": 192950
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7206997871398926,
      "learning_rate": 3.827421154618854e-05,
      "loss": 3.1042,
      "step": 192951
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.725385904312134,
      "learning_rate": 3.8272212274731486e-05,
      "loss": 2.7693,
      "step": 192952
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.738769769668579,
      "learning_rate": 3.827021305193454e-05,
      "loss": 3.2104,
      "step": 192953
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.4035255908966064,
      "learning_rate": 3.826821387779817e-05,
      "loss": 2.9594,
      "step": 192954
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.579742193222046,
      "learning_rate": 3.826621475232261e-05,
      "loss": 2.9546,
      "step": 192955
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0144548416137695,
      "learning_rate": 3.826421567550836e-05,
      "loss": 2.9495,
      "step": 192956
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.539189100265503,
      "learning_rate": 3.8262216647355716e-05,
      "loss": 2.877,
      "step": 192957
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0852243900299072,
      "learning_rate": 3.826021766786508e-05,
      "loss": 3.2076,
      "step": 192958
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2444162368774414,
      "learning_rate": 3.825821873703675e-05,
      "loss": 3.0036,
      "step": 192959
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.457909345626831,
      "learning_rate": 3.825621985487123e-05,
      "loss": 2.9265,
      "step": 192960
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8081953525543213,
      "learning_rate": 3.8254221021368745e-05,
      "loss": 2.7647,
      "step": 192961
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.4572877883911133,
      "learning_rate": 3.82522222365298e-05,
      "loss": 2.9802,
      "step": 192962
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5363662242889404,
      "learning_rate": 3.825022350035474e-05,
      "loss": 2.9493,
      "step": 192963
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9524049758911133,
      "learning_rate": 3.8248224812843875e-05,
      "loss": 2.9011,
      "step": 192964
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.6807303428649902,
      "learning_rate": 3.824622617399755e-05,
      "loss": 2.9905,
      "step": 192965
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.921909809112549,
      "learning_rate": 3.8244227583816235e-05,
      "loss": 3.0087,
      "step": 192966
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8617641925811768,
      "learning_rate": 3.824222904230022e-05,
      "loss": 2.7306,
      "step": 192967
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3437535762786865,
      "learning_rate": 3.824023054944998e-05,
      "loss": 2.6734,
      "step": 192968
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5586161613464355,
      "learning_rate": 3.823823210526581e-05,
      "loss": 2.811,
      "step": 192969
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.34163498878479,
      "learning_rate": 3.823623370974811e-05,
      "loss": 2.9665,
      "step": 192970
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.349339723587036,
      "learning_rate": 3.823423536289715e-05,
      "loss": 2.9239,
      "step": 192971
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7695233821868896,
      "learning_rate": 3.823223706471346e-05,
      "loss": 2.9207,
      "step": 192972
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0660178661346436,
      "learning_rate": 3.8230238815197264e-05,
      "loss": 2.9516,
      "step": 192973
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0966339111328125,
      "learning_rate": 3.8228240614349084e-05,
      "loss": 2.7289,
      "step": 192974
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6520004272460938,
      "learning_rate": 3.822624246216917e-05,
      "loss": 2.8127,
      "step": 192975
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8314592838287354,
      "learning_rate": 3.8224244358658015e-05,
      "loss": 2.9008,
      "step": 192976
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.980752944946289,
      "learning_rate": 3.822224630381584e-05,
      "loss": 2.8534,
      "step": 192977
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.154564380645752,
      "learning_rate": 3.822024829764313e-05,
      "loss": 2.8241,
      "step": 192978
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.88918399810791,
      "learning_rate": 3.821825034014016e-05,
      "loss": 2.8587,
      "step": 192979
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9430794715881348,
      "learning_rate": 3.821625243130741e-05,
      "loss": 3.0378,
      "step": 192980
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.852583646774292,
      "learning_rate": 3.821425457114514e-05,
      "loss": 2.9867,
      "step": 192981
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1830971240997314,
      "learning_rate": 3.8212256759653936e-05,
      "loss": 2.8961,
      "step": 192982
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.838196039199829,
      "learning_rate": 3.821025899683386e-05,
      "loss": 2.9894,
      "step": 192983
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.118252754211426,
      "learning_rate": 3.820826128268553e-05,
      "loss": 3.1372,
      "step": 192984
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.973172903060913,
      "learning_rate": 3.820626361720912e-05,
      "loss": 2.9486,
      "step": 192985
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8559021949768066,
      "learning_rate": 3.820426600040518e-05,
      "loss": 2.9246,
      "step": 192986
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.879805088043213,
      "learning_rate": 3.820226843227397e-05,
      "loss": 3.102,
      "step": 192987
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5502002239227295,
      "learning_rate": 3.820027091281603e-05,
      "loss": 2.9778,
      "step": 192988
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9071590900421143,
      "learning_rate": 3.819827344203146e-05,
      "loss": 3.1224,
      "step": 192989
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.8677265644073486,
      "learning_rate": 3.819627601992081e-05,
      "loss": 2.8047,
      "step": 192990
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3478240966796875,
      "learning_rate": 3.81942786464844e-05,
      "loss": 2.9613,
      "step": 192991
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.5806210041046143,
      "learning_rate": 3.819228132172265e-05,
      "loss": 2.8144,
      "step": 192992
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3507373332977295,
      "learning_rate": 3.8190284045635833e-05,
      "loss": 2.8796,
      "step": 192993
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.510122537612915,
      "learning_rate": 3.818828681822451e-05,
      "loss": 3.0249,
      "step": 192994
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.682565927505493,
      "learning_rate": 3.818628963948882e-05,
      "loss": 2.6456,
      "step": 192995
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.433696985244751,
      "learning_rate": 3.8184292509429295e-05,
      "loss": 3.1235,
      "step": 192996
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.4175679683685303,
      "learning_rate": 3.8182295428046164e-05,
      "loss": 2.9213,
      "step": 192997
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0256669521331787,
      "learning_rate": 3.818029839533999e-05,
      "loss": 3.1337,
      "step": 192998
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1548428535461426,
      "learning_rate": 3.8178301411310955e-05,
      "loss": 2.7917,
      "step": 192999
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.574712038040161,
      "learning_rate": 3.817630447595964e-05,
      "loss": 2.9739,
      "step": 193000
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.059664249420166,
      "learning_rate": 3.817430758928616e-05,
      "loss": 3.1137,
      "step": 193001
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.7655723094940186,
      "learning_rate": 3.81723107512911e-05,
      "loss": 2.9238,
      "step": 193002
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2154734134674072,
      "learning_rate": 3.817031396197467e-05,
      "loss": 2.9138,
      "step": 193003
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9468820095062256,
      "learning_rate": 3.8168317221337405e-05,
      "loss": 2.8627,
      "step": 193004
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6730010509490967,
      "learning_rate": 3.8166320529379535e-05,
      "loss": 2.7497,
      "step": 193005
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.6068224906921387,
      "learning_rate": 3.816432388610152e-05,
      "loss": 2.8434,
      "step": 193006
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.103919506072998,
      "learning_rate": 3.8162327291503705e-05,
      "loss": 3.1585,
      "step": 193007
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.736560821533203,
      "learning_rate": 3.816033074558648e-05,
      "loss": 2.9868,
      "step": 193008
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.829768180847168,
      "learning_rate": 3.815833424835011e-05,
      "loss": 2.9283,
      "step": 193009
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.1470723152160645,
      "learning_rate": 3.81563377997951e-05,
      "loss": 3.1082,
      "step": 193010
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.009469032287598,
      "learning_rate": 3.815434139992175e-05,
      "loss": 2.7753,
      "step": 193011
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.4630160331726074,
      "learning_rate": 3.8152345048730456e-05,
      "loss": 2.8394,
      "step": 193012
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.860614776611328,
      "learning_rate": 3.8150348746221625e-05,
      "loss": 2.7695,
      "step": 193013
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.6136159896850586,
      "learning_rate": 3.8148352492395584e-05,
      "loss": 2.7491,
      "step": 193014
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7470967769622803,
      "learning_rate": 3.8146356287252634e-05,
      "loss": 2.8303,
      "step": 193015
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9887845516204834,
      "learning_rate": 3.8144360130793275e-05,
      "loss": 3.0185,
      "step": 193016
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.622633218765259,
      "learning_rate": 3.8142364023017765e-05,
      "loss": 2.8935,
      "step": 193017
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6685705184936523,
      "learning_rate": 3.814036796392662e-05,
      "loss": 2.9083,
      "step": 193018
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.662612199783325,
      "learning_rate": 3.8138371953520094e-05,
      "loss": 2.996,
      "step": 193019
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.246913194656372,
      "learning_rate": 3.813637599179853e-05,
      "loss": 3.0505,
      "step": 193020
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.364192008972168,
      "learning_rate": 3.813438007876242e-05,
      "loss": 3.1039,
      "step": 193021
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2277846336364746,
      "learning_rate": 3.813238421441209e-05,
      "loss": 2.7612,
      "step": 193022
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3272340297698975,
      "learning_rate": 3.813038839874779e-05,
      "loss": 2.9513,
      "step": 193023
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7688844203948975,
      "learning_rate": 3.812839263177011e-05,
      "loss": 3.0298,
      "step": 193024
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2281253337860107,
      "learning_rate": 3.812639691347925e-05,
      "loss": 3.1079,
      "step": 193025
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.020482063293457,
      "learning_rate": 3.812440124387561e-05,
      "loss": 2.6858,
      "step": 193026
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.825000047683716,
      "learning_rate": 3.812240562295963e-05,
      "loss": 2.9391,
      "step": 193027
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.8139147758483887,
      "learning_rate": 3.8120410050731665e-05,
      "loss": 2.9094,
      "step": 193028
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.279432773590088,
      "learning_rate": 3.811841452719199e-05,
      "loss": 2.7228,
      "step": 193029
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8889172077178955,
      "learning_rate": 3.81164190523411e-05,
      "loss": 2.7275,
      "step": 193030
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.5450050830841064,
      "learning_rate": 3.811442362617929e-05,
      "loss": 3.055,
      "step": 193031
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.861280918121338,
      "learning_rate": 3.8112428248706906e-05,
      "loss": 2.7812,
      "step": 193032
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.707996129989624,
      "learning_rate": 3.8110432919924436e-05,
      "loss": 2.8301,
      "step": 193033
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.691124200820923,
      "learning_rate": 3.810843763983209e-05,
      "loss": 2.8588,
      "step": 193034
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.736424207687378,
      "learning_rate": 3.8106442408430424e-05,
      "loss": 3.0012,
      "step": 193035
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6437461376190186,
      "learning_rate": 3.8104447225719714e-05,
      "loss": 3.3039,
      "step": 193036
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2350125312805176,
      "learning_rate": 3.810245209170032e-05,
      "loss": 3.0864,
      "step": 193037
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.970395565032959,
      "learning_rate": 3.810045700637254e-05,
      "loss": 3.0695,
      "step": 193038
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3071818351745605,
      "learning_rate": 3.809846196973691e-05,
      "loss": 3.0257,
      "step": 193039
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.997746229171753,
      "learning_rate": 3.809646698179367e-05,
      "loss": 2.9268,
      "step": 193040
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.813312292098999,
      "learning_rate": 3.809447204254331e-05,
      "loss": 2.8638,
      "step": 193041
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.801154613494873,
      "learning_rate": 3.8092477151986034e-05,
      "loss": 2.802,
      "step": 193042
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.909169912338257,
      "learning_rate": 3.8090482310122474e-05,
      "loss": 2.9387,
      "step": 193043
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8540101051330566,
      "learning_rate": 3.808848751695266e-05,
      "loss": 2.9382,
      "step": 193044
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.695408821105957,
      "learning_rate": 3.808649277247726e-05,
      "loss": 3.1743,
      "step": 193045
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.6553685665130615,
      "learning_rate": 3.808449807669641e-05,
      "loss": 2.8144,
      "step": 193046
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.290673732757568,
      "learning_rate": 3.808250342961071e-05,
      "loss": 2.908,
      "step": 193047
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.709534168243408,
      "learning_rate": 3.8080508831220324e-05,
      "loss": 3.3072,
      "step": 193048
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6162047386169434,
      "learning_rate": 3.807851428152585e-05,
      "loss": 2.9035,
      "step": 193049
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.8536014556884766,
      "learning_rate": 3.8076519780527396e-05,
      "loss": 3.0647,
      "step": 193050
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.718412399291992,
      "learning_rate": 3.8074525328225516e-05,
      "loss": 3.154,
      "step": 193051
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7232301235198975,
      "learning_rate": 3.807253092462045e-05,
      "loss": 3.0836,
      "step": 193052
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.3604958057403564,
      "learning_rate": 3.8070536569712727e-05,
      "loss": 3.0135,
      "step": 193053
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.819484233856201,
      "learning_rate": 3.806854226350259e-05,
      "loss": 2.7863,
      "step": 193054
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.3392446041107178,
      "learning_rate": 3.806654800599056e-05,
      "loss": 2.8337,
      "step": 193055
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2847983837127686,
      "learning_rate": 3.806455379717678e-05,
      "loss": 2.8657,
      "step": 193056
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7939229011535645,
      "learning_rate": 3.8062559637061806e-05,
      "loss": 2.9507,
      "step": 193057
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.944089889526367,
      "learning_rate": 3.806056552564588e-05,
      "loss": 2.9496,
      "step": 193058
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7164759635925293,
      "learning_rate": 3.80585714629295e-05,
      "loss": 3.037,
      "step": 193059
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.799746036529541,
      "learning_rate": 3.805657744891293e-05,
      "loss": 2.8404,
      "step": 193060
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.5943541526794434,
      "learning_rate": 3.8054583483596704e-05,
      "loss": 2.9176,
      "step": 193061
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0531067848205566,
      "learning_rate": 3.805258956698095e-05,
      "loss": 2.7785,
      "step": 193062
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9573962688446045,
      "learning_rate": 3.805059569906624e-05,
      "loss": 2.9914,
      "step": 193063
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.126734495162964,
      "learning_rate": 3.804860187985278e-05,
      "loss": 2.9517,
      "step": 193064
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7430920600891113,
      "learning_rate": 3.8046608109341096e-05,
      "loss": 3.1243,
      "step": 193065
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.528334856033325,
      "learning_rate": 3.804461438753145e-05,
      "loss": 3.3404,
      "step": 193066
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7632367610931396,
      "learning_rate": 3.804262071442438e-05,
      "loss": 2.9451,
      "step": 193067
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6706650257110596,
      "learning_rate": 3.804062709001998e-05,
      "loss": 2.9599,
      "step": 193068
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.193105459213257,
      "learning_rate": 3.803863351431886e-05,
      "loss": 2.821,
      "step": 193069
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.593996047973633,
      "learning_rate": 3.8036639987321215e-05,
      "loss": 3.1232,
      "step": 193070
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.672642230987549,
      "learning_rate": 3.8034646509027576e-05,
      "loss": 3.0026,
      "step": 193071
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.578747034072876,
      "learning_rate": 3.8032653079438217e-05,
      "loss": 2.8941,
      "step": 193072
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8612072467803955,
      "learning_rate": 3.8030659698553624e-05,
      "loss": 2.7303,
      "step": 193073
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.906686782836914,
      "learning_rate": 3.802866636637394e-05,
      "loss": 2.8458,
      "step": 193074
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7793362140655518,
      "learning_rate": 3.802667308289973e-05,
      "loss": 2.7637,
      "step": 193075
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.577394962310791,
      "learning_rate": 3.8024679848131255e-05,
      "loss": 2.8445,
      "step": 193076
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8299450874328613,
      "learning_rate": 3.8022686662069025e-05,
      "loss": 3.0066,
      "step": 193077
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0421321392059326,
      "learning_rate": 3.802069352471323e-05,
      "loss": 3.0125,
      "step": 193078
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1213090419769287,
      "learning_rate": 3.8018700436064476e-05,
      "loss": 3.0184,
      "step": 193079
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6662657260894775,
      "learning_rate": 3.8016707396122856e-05,
      "loss": 3.1621,
      "step": 193080
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1711254119873047,
      "learning_rate": 3.8014714404888944e-05,
      "loss": 2.8116,
      "step": 193081
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0445194244384766,
      "learning_rate": 3.801272146236297e-05,
      "loss": 2.9994,
      "step": 193082
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1461021900177,
      "learning_rate": 3.801072856854547e-05,
      "loss": 2.9657,
      "step": 193083
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.452759265899658,
      "learning_rate": 3.8008735723436634e-05,
      "loss": 2.8484,
      "step": 193084
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5152840614318848,
      "learning_rate": 3.800674292703704e-05,
      "loss": 2.7678,
      "step": 193085
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.50860595703125,
      "learning_rate": 3.800475017934681e-05,
      "loss": 2.865,
      "step": 193086
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.27287220954895,
      "learning_rate": 3.800275748036652e-05,
      "loss": 2.9297,
      "step": 193087
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.762281894683838,
      "learning_rate": 3.80007648300964e-05,
      "loss": 3.0372,
      "step": 193088
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0620574951171875,
      "learning_rate": 3.799877222853691e-05,
      "loss": 2.7846,
      "step": 193089
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.008857727050781,
      "learning_rate": 3.799677967568836e-05,
      "loss": 3.0815,
      "step": 193090
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.6716887950897217,
      "learning_rate": 3.799478717155131e-05,
      "loss": 2.8746,
      "step": 193091
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.857583999633789,
      "learning_rate": 3.79927947161258e-05,
      "loss": 2.7698,
      "step": 193092
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.802069664001465,
      "learning_rate": 3.799080230941245e-05,
      "loss": 2.8095,
      "step": 193093
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.94415545463562,
      "learning_rate": 3.7988809951411505e-05,
      "loss": 2.9821,
      "step": 193094
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.387504816055298,
      "learning_rate": 3.798681764212346e-05,
      "loss": 2.7477,
      "step": 193095
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.512415885925293,
      "learning_rate": 3.798482538154851e-05,
      "loss": 2.8588,
      "step": 193096
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.8941211700439453,
      "learning_rate": 3.798283316968723e-05,
      "loss": 3.1622,
      "step": 193097
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.394712448120117,
      "learning_rate": 3.798084100653985e-05,
      "loss": 3.0245,
      "step": 193098
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.664702892303467,
      "learning_rate": 3.7978848892106804e-05,
      "loss": 2.9213,
      "step": 193099
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.5945520401000977,
      "learning_rate": 3.797685682638836e-05,
      "loss": 2.7294,
      "step": 193100
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7231791019439697,
      "learning_rate": 3.797486480938504e-05,
      "loss": 2.934,
      "step": 193101
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.9367783069610596,
      "learning_rate": 3.797287284109709e-05,
      "loss": 2.7842,
      "step": 193102
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2973780632019043,
      "learning_rate": 3.797088092152497e-05,
      "loss": 2.8113,
      "step": 193103
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8459579944610596,
      "learning_rate": 3.7968889050669014e-05,
      "loss": 3.0101,
      "step": 193104
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.792473793029785,
      "learning_rate": 3.796689722852949e-05,
      "loss": 3.1336,
      "step": 193105
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.4352738857269287,
      "learning_rate": 3.796490545510697e-05,
      "loss": 3.0758,
      "step": 193106
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.691030740737915,
      "learning_rate": 3.796291373040173e-05,
      "loss": 2.9943,
      "step": 193107
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.510953187942505,
      "learning_rate": 3.796092205441403e-05,
      "loss": 2.8323,
      "step": 193108
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9566822052001953,
      "learning_rate": 3.7958930427144426e-05,
      "loss": 3.0002,
      "step": 193109
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9757773876190186,
      "learning_rate": 3.795693884859322e-05,
      "loss": 3.2368,
      "step": 193110
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.7571632862091064,
      "learning_rate": 3.795494731876068e-05,
      "loss": 3.289,
      "step": 193111
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.4285197257995605,
      "learning_rate": 3.7952955837647325e-05,
      "loss": 2.7925,
      "step": 193112
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.519923210144043,
      "learning_rate": 3.795096440525347e-05,
      "loss": 3.0962,
      "step": 193113
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.441762924194336,
      "learning_rate": 3.794897302157941e-05,
      "loss": 2.9585,
      "step": 193114
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.001605272293091,
      "learning_rate": 3.7946981686625645e-05,
      "loss": 2.6685,
      "step": 193115
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.3894853591918945,
      "learning_rate": 3.794499040039247e-05,
      "loss": 2.7915,
      "step": 193116
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.923851728439331,
      "learning_rate": 3.794299916288023e-05,
      "loss": 2.8763,
      "step": 193117
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6703944206237793,
      "learning_rate": 3.7941007974089374e-05,
      "loss": 2.9719,
      "step": 193118
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.576925277709961,
      "learning_rate": 3.793901683402015e-05,
      "loss": 2.733,
      "step": 193119
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.096632719039917,
      "learning_rate": 3.793702574267312e-05,
      "loss": 2.9387,
      "step": 193120
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.439840793609619,
      "learning_rate": 3.7935034700048514e-05,
      "loss": 2.8017,
      "step": 193121
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.611618995666504,
      "learning_rate": 3.793304370614674e-05,
      "loss": 2.6359,
      "step": 193122
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.4665865898132324,
      "learning_rate": 3.7931052760968114e-05,
      "loss": 2.8939,
      "step": 193123
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.4175662994384766,
      "learning_rate": 3.792906186451309e-05,
      "loss": 3.0626,
      "step": 193124
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8356575965881348,
      "learning_rate": 3.792707101678195e-05,
      "loss": 2.866,
      "step": 193125
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.065979480743408,
      "learning_rate": 3.792508021777517e-05,
      "loss": 2.6705,
      "step": 193126
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2506065368652344,
      "learning_rate": 3.7923089467493014e-05,
      "loss": 3.0291,
      "step": 193127
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.4934356212615967,
      "learning_rate": 3.792109876593602e-05,
      "loss": 3.0699,
      "step": 193128
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8352301120758057,
      "learning_rate": 3.791910811310431e-05,
      "loss": 3.0164,
      "step": 193129
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9710144996643066,
      "learning_rate": 3.7917117508998466e-05,
      "loss": 2.6457,
      "step": 193130
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.549618721008301,
      "learning_rate": 3.7915126953618705e-05,
      "loss": 3.0634,
      "step": 193131
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5901641845703125,
      "learning_rate": 3.791313644696553e-05,
      "loss": 2.9255,
      "step": 193132
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.441890001296997,
      "learning_rate": 3.791114598903918e-05,
      "loss": 2.7572,
      "step": 193133
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7140440940856934,
      "learning_rate": 3.790915557984022e-05,
      "loss": 2.6749,
      "step": 193134
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.8150532245635986,
      "learning_rate": 3.790716521936879e-05,
      "loss": 2.7894,
      "step": 193135
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.239745616912842,
      "learning_rate": 3.79051749076254e-05,
      "loss": 2.9336,
      "step": 193136
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.218581438064575,
      "learning_rate": 3.790318464461034e-05,
      "loss": 2.837,
      "step": 193137
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.996166706085205,
      "learning_rate": 3.79011944303241e-05,
      "loss": 3.2212,
      "step": 193138
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.7240891456604004,
      "learning_rate": 3.789920426476688e-05,
      "loss": 2.9685,
      "step": 193139
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.597616672515869,
      "learning_rate": 3.789721414793932e-05,
      "loss": 2.8975,
      "step": 193140
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.5859649181365967,
      "learning_rate": 3.7895224079841436e-05,
      "loss": 3.0473,
      "step": 193141
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6731526851654053,
      "learning_rate": 3.7893234060473875e-05,
      "loss": 2.9237,
      "step": 193142
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.107621431350708,
      "learning_rate": 3.789124408983684e-05,
      "loss": 2.8951,
      "step": 193143
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.622148036956787,
      "learning_rate": 3.788925416793082e-05,
      "loss": 3.2771,
      "step": 193144
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.487084865570068,
      "learning_rate": 3.788726429475608e-05,
      "loss": 2.83,
      "step": 193145
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.2381374835968018,
      "learning_rate": 3.7885274470313164e-05,
      "loss": 3.0581,
      "step": 193146
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.010119676589966,
      "learning_rate": 3.78832846946022e-05,
      "loss": 2.7275,
      "step": 193147
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.0897958278656006,
      "learning_rate": 3.788129496762376e-05,
      "loss": 2.9705,
      "step": 193148
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.6214487552642822,
      "learning_rate": 3.787930528937806e-05,
      "loss": 2.9196,
      "step": 193149
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.021360397338867,
      "learning_rate": 3.7877315659865613e-05,
      "loss": 2.8073,
      "step": 193150
    },
    {
      "epoch": 2.51,
      "grad_norm": 2.9781107902526855,
      "learning_rate": 3.787532607908666e-05,
      "loss": 2.8401,
      "step": 193151
    },
    {
      "epoch": 2.51,
      "grad_norm": 4.0608954429626465,
      "learning_rate": 3.787333654704174e-05,
      "loss": 2.7285,
      "step": 193152
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.1281557083129883,
      "learning_rate": 3.7871347063731016e-05,
      "loss": 3.0198,
      "step": 193153
    },
    {
      "epoch": 2.51,
      "grad_norm": 3.32883620262146,
      "learning_rate": 3.786935762915501e-05,
      "loss": 3.13,
      "step": 193154
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.7658135890960693,
      "learning_rate": 3.786736824331398e-05,
      "loss": 2.8415,
      "step": 193155
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.690786361694336,
      "learning_rate": 3.78653789062084e-05,
      "loss": 3.1026,
      "step": 193156
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.468423843383789,
      "learning_rate": 3.7863389617838534e-05,
      "loss": 2.8887,
      "step": 193157
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9548845291137695,
      "learning_rate": 3.786140037820495e-05,
      "loss": 3.0831,
      "step": 193158
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.6100363731384277,
      "learning_rate": 3.785941118730775e-05,
      "loss": 3.0924,
      "step": 193159
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5723142623901367,
      "learning_rate": 3.78574220451475e-05,
      "loss": 2.9226,
      "step": 193160
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5497896671295166,
      "learning_rate": 3.785543295172443e-05,
      "loss": 2.7859,
      "step": 193161
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.9085702896118164,
      "learning_rate": 3.785344390703904e-05,
      "loss": 3.0148,
      "step": 193162
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.974815607070923,
      "learning_rate": 3.785145491109159e-05,
      "loss": 3.0702,
      "step": 193163
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1267197132110596,
      "learning_rate": 3.784946596388263e-05,
      "loss": 2.9893,
      "step": 193164
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7506442070007324,
      "learning_rate": 3.784747706541228e-05,
      "loss": 3.1027,
      "step": 193165
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.081354856491089,
      "learning_rate": 3.7845488215681107e-05,
      "loss": 2.7997,
      "step": 193166
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5407917499542236,
      "learning_rate": 3.7843499414689315e-05,
      "loss": 2.9069,
      "step": 193167
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7642650604248047,
      "learning_rate": 3.784151066243744e-05,
      "loss": 2.8371,
      "step": 193168
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.836207628250122,
      "learning_rate": 3.78395219589257e-05,
      "loss": 2.9568,
      "step": 193169
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.647922039031982,
      "learning_rate": 3.7837533304154713e-05,
      "loss": 2.7834,
      "step": 193170
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7612006664276123,
      "learning_rate": 3.7835544698124495e-05,
      "loss": 2.9731,
      "step": 193171
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.843576669692993,
      "learning_rate": 3.7833556140835697e-05,
      "loss": 3.0145,
      "step": 193172
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.840174436569214,
      "learning_rate": 3.783156763228851e-05,
      "loss": 2.796,
      "step": 193173
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.4857497215270996,
      "learning_rate": 3.782957917248346e-05,
      "loss": 2.9718,
      "step": 193174
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.829439878463745,
      "learning_rate": 3.782759076142079e-05,
      "loss": 2.9777,
      "step": 193175
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1757092475891113,
      "learning_rate": 3.7825602399100964e-05,
      "loss": 2.7839,
      "step": 193176
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.841552257537842,
      "learning_rate": 3.782361408552428e-05,
      "loss": 2.9947,
      "step": 193177
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.642782211303711,
      "learning_rate": 3.782162582069117e-05,
      "loss": 2.8923,
      "step": 193178
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.590284585952759,
      "learning_rate": 3.781963760460191e-05,
      "loss": 2.9507,
      "step": 193179
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.852221727371216,
      "learning_rate": 3.7817649437256984e-05,
      "loss": 3.1728,
      "step": 193180
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5587961673736572,
      "learning_rate": 3.781566131865663e-05,
      "loss": 2.8408,
      "step": 193181
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.939500331878662,
      "learning_rate": 3.781367324880136e-05,
      "loss": 3.1806,
      "step": 193182
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.76492977142334,
      "learning_rate": 3.781168522769149e-05,
      "loss": 3.1561,
      "step": 193183
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8313872814178467,
      "learning_rate": 3.7809697255327365e-05,
      "loss": 2.8615,
      "step": 193184
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.678853988647461,
      "learning_rate": 3.7807709331709315e-05,
      "loss": 2.7796,
      "step": 193185
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.242620468139648,
      "learning_rate": 3.78057214568378e-05,
      "loss": 2.799,
      "step": 193186
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6595778465270996,
      "learning_rate": 3.7803733630713095e-05,
      "loss": 2.6362,
      "step": 193187
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.806746244430542,
      "learning_rate": 3.780174585333573e-05,
      "loss": 2.9026,
      "step": 193188
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.582655191421509,
      "learning_rate": 3.779975812470594e-05,
      "loss": 2.8364,
      "step": 193189
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.706179141998291,
      "learning_rate": 3.779777044482411e-05,
      "loss": 2.9372,
      "step": 193190
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.952969551086426,
      "learning_rate": 3.7795782813690565e-05,
      "loss": 2.6603,
      "step": 193191
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.867823600769043,
      "learning_rate": 3.779379523130582e-05,
      "loss": 3.0265,
      "step": 193192
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1917989253997803,
      "learning_rate": 3.779180769767005e-05,
      "loss": 2.9759,
      "step": 193193
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.713679075241089,
      "learning_rate": 3.778982021278385e-05,
      "loss": 3.1067,
      "step": 193194
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.781874418258667,
      "learning_rate": 3.7787832776647455e-05,
      "loss": 3.1119,
      "step": 193195
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6382384300231934,
      "learning_rate": 3.778584538926116e-05,
      "loss": 2.761,
      "step": 193196
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9504458904266357,
      "learning_rate": 3.778385805062551e-05,
      "loss": 2.954,
      "step": 193197
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8083767890930176,
      "learning_rate": 3.7781870760740784e-05,
      "loss": 3.0112,
      "step": 193198
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.971036672592163,
      "learning_rate": 3.77798835196073e-05,
      "loss": 2.9685,
      "step": 193199
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2748985290527344,
      "learning_rate": 3.777789632722556e-05,
      "loss": 2.7772,
      "step": 193200
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.329155921936035,
      "learning_rate": 3.7775909183595845e-05,
      "loss": 2.8678,
      "step": 193201
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.476548910140991,
      "learning_rate": 3.777392208871847e-05,
      "loss": 3.1937,
      "step": 193202
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2815494537353516,
      "learning_rate": 3.777193504259396e-05,
      "loss": 2.7393,
      "step": 193203
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1266729831695557,
      "learning_rate": 3.776994804522252e-05,
      "loss": 2.8683,
      "step": 193204
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.347310543060303,
      "learning_rate": 3.776796109660465e-05,
      "loss": 2.8606,
      "step": 193205
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2119970321655273,
      "learning_rate": 3.776597419674068e-05,
      "loss": 2.8712,
      "step": 193206
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8182296752929688,
      "learning_rate": 3.7763987345630944e-05,
      "loss": 2.9642,
      "step": 193207
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.129148483276367,
      "learning_rate": 3.776200054327581e-05,
      "loss": 2.991,
      "step": 193208
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7425570487976074,
      "learning_rate": 3.7760013789675705e-05,
      "loss": 2.8712,
      "step": 193209
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.61772084236145,
      "learning_rate": 3.77580270848309e-05,
      "loss": 2.7944,
      "step": 193210
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.800143241882324,
      "learning_rate": 3.775604042874189e-05,
      "loss": 3.188,
      "step": 193211
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8314368724823,
      "learning_rate": 3.775405382140899e-05,
      "loss": 2.8366,
      "step": 193212
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.850614309310913,
      "learning_rate": 3.775206726283255e-05,
      "loss": 2.8636,
      "step": 193213
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8835761547088623,
      "learning_rate": 3.7750080753012915e-05,
      "loss": 2.6711,
      "step": 193214
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.739672899246216,
      "learning_rate": 3.7748094291950534e-05,
      "loss": 2.9002,
      "step": 193215
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.231463670730591,
      "learning_rate": 3.774610787964566e-05,
      "loss": 2.8605,
      "step": 193216
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0112485885620117,
      "learning_rate": 3.7744121516098815e-05,
      "loss": 3.0231,
      "step": 193217
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9616878032684326,
      "learning_rate": 3.774213520131023e-05,
      "loss": 2.9241,
      "step": 193218
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5468199253082275,
      "learning_rate": 3.7740148935280444e-05,
      "loss": 2.833,
      "step": 193219
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9030861854553223,
      "learning_rate": 3.773816271800959e-05,
      "loss": 3.0048,
      "step": 193220
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.880427122116089,
      "learning_rate": 3.773617654949823e-05,
      "loss": 2.8273,
      "step": 193221
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.6522457599639893,
      "learning_rate": 3.7734190429746606e-05,
      "loss": 2.6571,
      "step": 193222
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.820544481277466,
      "learning_rate": 3.77322043587552e-05,
      "loss": 3.0319,
      "step": 193223
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.56024169921875,
      "learning_rate": 3.773021833652427e-05,
      "loss": 2.9584,
      "step": 193224
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.5167136192321777,
      "learning_rate": 3.772823236305436e-05,
      "loss": 2.99,
      "step": 193225
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.75341534614563,
      "learning_rate": 3.7726246438345605e-05,
      "loss": 3.136,
      "step": 193226
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7255029678344727,
      "learning_rate": 3.7724260562398555e-05,
      "loss": 3.0691,
      "step": 193227
    },
    {
      "epoch": 2.52,
      "grad_norm": 5.299612522125244,
      "learning_rate": 3.772227473521343e-05,
      "loss": 2.7346,
      "step": 193228
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.407052516937256,
      "learning_rate": 3.77202889567908e-05,
      "loss": 2.9792,
      "step": 193229
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4238245487213135,
      "learning_rate": 3.7718303227130796e-05,
      "loss": 2.8262,
      "step": 193230
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.5860562324523926,
      "learning_rate": 3.771631754623409e-05,
      "loss": 2.69,
      "step": 193231
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.6144461631774902,
      "learning_rate": 3.77143319141007e-05,
      "loss": 2.6724,
      "step": 193232
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4443812370300293,
      "learning_rate": 3.771234633073128e-05,
      "loss": 3.1739,
      "step": 193233
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.032297372817993,
      "learning_rate": 3.7710360796125984e-05,
      "loss": 2.8078,
      "step": 193234
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5927932262420654,
      "learning_rate": 3.7708375310285344e-05,
      "loss": 2.9332,
      "step": 193235
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5807316303253174,
      "learning_rate": 3.770638987320963e-05,
      "loss": 2.941,
      "step": 193236
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.4548685550689697,
      "learning_rate": 3.770440448489931e-05,
      "loss": 2.903,
      "step": 193237
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.571234703063965,
      "learning_rate": 3.770241914535468e-05,
      "loss": 3.0502,
      "step": 193238
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9675774574279785,
      "learning_rate": 3.770043385457611e-05,
      "loss": 2.889,
      "step": 193239
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.418858528137207,
      "learning_rate": 3.7698448612563924e-05,
      "loss": 2.8671,
      "step": 193240
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1099679470062256,
      "learning_rate": 3.769646341931863e-05,
      "loss": 2.7761,
      "step": 193241
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9860403537750244,
      "learning_rate": 3.7694478274840436e-05,
      "loss": 2.7624,
      "step": 193242
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.184964895248413,
      "learning_rate": 3.7692493179129856e-05,
      "loss": 2.8387,
      "step": 193243
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.723989725112915,
      "learning_rate": 3.7690508132187206e-05,
      "loss": 2.8124,
      "step": 193244
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0357818603515625,
      "learning_rate": 3.7688523134012836e-05,
      "loss": 2.7636,
      "step": 193245
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.721700429916382,
      "learning_rate": 3.768653818460703e-05,
      "loss": 3.2621,
      "step": 193246
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.537328004837036,
      "learning_rate": 3.768455328397034e-05,
      "loss": 3.1023,
      "step": 193247
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.76882266998291,
      "learning_rate": 3.768256843210298e-05,
      "loss": 2.8368,
      "step": 193248
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.712632417678833,
      "learning_rate": 3.768058362900543e-05,
      "loss": 2.8834,
      "step": 193249
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.882622718811035,
      "learning_rate": 3.767859887467801e-05,
      "loss": 2.9096,
      "step": 193250
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9804580211639404,
      "learning_rate": 3.767661416912111e-05,
      "loss": 2.8068,
      "step": 193251
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.597508668899536,
      "learning_rate": 3.7674629512335e-05,
      "loss": 2.762,
      "step": 193252
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.490252733230591,
      "learning_rate": 3.76726449043202e-05,
      "loss": 3.0077,
      "step": 193253
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.853433132171631,
      "learning_rate": 3.767066034507696e-05,
      "loss": 2.9242,
      "step": 193254
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9493777751922607,
      "learning_rate": 3.7668675834605746e-05,
      "loss": 3.021,
      "step": 193255
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.587498188018799,
      "learning_rate": 3.766669137290684e-05,
      "loss": 2.9043,
      "step": 193256
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6721420288085938,
      "learning_rate": 3.766470695998069e-05,
      "loss": 2.8259,
      "step": 193257
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8559069633483887,
      "learning_rate": 3.766272259582757e-05,
      "loss": 2.7181,
      "step": 193258
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8587841987609863,
      "learning_rate": 3.766073828044792e-05,
      "loss": 3.0663,
      "step": 193259
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9306986331939697,
      "learning_rate": 3.7658754013842064e-05,
      "loss": 2.8382,
      "step": 193260
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0025415420532227,
      "learning_rate": 3.765676979601046e-05,
      "loss": 3.0152,
      "step": 193261
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.4827911853790283,
      "learning_rate": 3.76547856269534e-05,
      "loss": 2.9847,
      "step": 193262
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9545507431030273,
      "learning_rate": 3.765280150667126e-05,
      "loss": 2.6622,
      "step": 193263
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.7004923820495605,
      "learning_rate": 3.765081743516437e-05,
      "loss": 2.8668,
      "step": 193264
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.58931565284729,
      "learning_rate": 3.76488334124332e-05,
      "loss": 2.9142,
      "step": 193265
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.44211745262146,
      "learning_rate": 3.764684943847801e-05,
      "loss": 2.9569,
      "step": 193266
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7504429817199707,
      "learning_rate": 3.764486551329927e-05,
      "loss": 3.0411,
      "step": 193267
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0364933013916016,
      "learning_rate": 3.7642881636897314e-05,
      "loss": 3.2793,
      "step": 193268
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.773136615753174,
      "learning_rate": 3.764089780927251e-05,
      "loss": 2.8466,
      "step": 193269
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.5498268604278564,
      "learning_rate": 3.763891403042511e-05,
      "loss": 2.8503,
      "step": 193270
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1606462001800537,
      "learning_rate": 3.763693030035571e-05,
      "loss": 2.7725,
      "step": 193271
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8729939460754395,
      "learning_rate": 3.7634946619064445e-05,
      "loss": 2.9474,
      "step": 193272
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6465673446655273,
      "learning_rate": 3.76329629865519e-05,
      "loss": 3.164,
      "step": 193273
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.595003366470337,
      "learning_rate": 3.76309794028183e-05,
      "loss": 2.7538,
      "step": 193274
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.218101739883423,
      "learning_rate": 3.762899586786408e-05,
      "loss": 2.8731,
      "step": 193275
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.491010904312134,
      "learning_rate": 3.76270123816895e-05,
      "loss": 3.0755,
      "step": 193276
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3613364696502686,
      "learning_rate": 3.7625028944295074e-05,
      "loss": 2.7126,
      "step": 193277
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.262197494506836,
      "learning_rate": 3.762304555568106e-05,
      "loss": 3.0935,
      "step": 193278
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9218990802764893,
      "learning_rate": 3.762106221584793e-05,
      "loss": 2.8681,
      "step": 193279
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.715855836868286,
      "learning_rate": 3.7619078924796007e-05,
      "loss": 3.0076,
      "step": 193280
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.641676187515259,
      "learning_rate": 3.76170956825256e-05,
      "loss": 2.8452,
      "step": 193281
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.890441656112671,
      "learning_rate": 3.761511248903717e-05,
      "loss": 2.8106,
      "step": 193282
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.688119411468506,
      "learning_rate": 3.7613129344331016e-05,
      "loss": 3.2745,
      "step": 193283
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.856367588043213,
      "learning_rate": 3.761114624840751e-05,
      "loss": 2.9038,
      "step": 193284
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7230191230773926,
      "learning_rate": 3.7609163201267115e-05,
      "loss": 2.7229,
      "step": 193285
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.767559766769409,
      "learning_rate": 3.760718020291009e-05,
      "loss": 2.9856,
      "step": 193286
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6033215522766113,
      "learning_rate": 3.7605197253336816e-05,
      "loss": 2.8065,
      "step": 193287
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.775506019592285,
      "learning_rate": 3.760321435254775e-05,
      "loss": 3.0377,
      "step": 193288
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.477120876312256,
      "learning_rate": 3.7601231500543095e-05,
      "loss": 2.5713,
      "step": 193289
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.849412441253662,
      "learning_rate": 3.759924869732345e-05,
      "loss": 2.7908,
      "step": 193290
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2612085342407227,
      "learning_rate": 3.759726594288901e-05,
      "loss": 3.1104,
      "step": 193291
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7430834770202637,
      "learning_rate": 3.7595283237240216e-05,
      "loss": 2.9529,
      "step": 193292
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7881133556365967,
      "learning_rate": 3.759330058037733e-05,
      "loss": 2.9049,
      "step": 193293
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.9760990142822266,
      "learning_rate": 3.759131797230088e-05,
      "loss": 2.8234,
      "step": 193294
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7750723361968994,
      "learning_rate": 3.758933541301108e-05,
      "loss": 2.8141,
      "step": 193295
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9086310863494873,
      "learning_rate": 3.758735290250845e-05,
      "loss": 2.5996,
      "step": 193296
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7998046875,
      "learning_rate": 3.7585370440793295e-05,
      "loss": 2.7711,
      "step": 193297
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.985588312149048,
      "learning_rate": 3.7583388027865945e-05,
      "loss": 2.8272,
      "step": 193298
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3540477752685547,
      "learning_rate": 3.758140566372677e-05,
      "loss": 2.7593,
      "step": 193299
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.674097776412964,
      "learning_rate": 3.7579423348376206e-05,
      "loss": 2.9333,
      "step": 193300
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.064164876937866,
      "learning_rate": 3.757744108181451e-05,
      "loss": 2.9145,
      "step": 193301
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8582358360290527,
      "learning_rate": 3.757545886404219e-05,
      "loss": 2.9054,
      "step": 193302
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7727458477020264,
      "learning_rate": 3.75734766950595e-05,
      "loss": 2.8519,
      "step": 193303
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.3756587505340576,
      "learning_rate": 3.757149457486689e-05,
      "loss": 3.1798,
      "step": 193304
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.912767171859741,
      "learning_rate": 3.7569512503464715e-05,
      "loss": 2.6583,
      "step": 193305
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0629312992095947,
      "learning_rate": 3.756753048085331e-05,
      "loss": 2.7394,
      "step": 193306
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4088571071624756,
      "learning_rate": 3.756554850703298e-05,
      "loss": 3.3994,
      "step": 193307
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5077123641967773,
      "learning_rate": 3.7563566582004254e-05,
      "loss": 2.6636,
      "step": 193308
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0710132122039795,
      "learning_rate": 3.7561584705767366e-05,
      "loss": 3.0789,
      "step": 193309
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8785526752471924,
      "learning_rate": 3.755960287832278e-05,
      "loss": 2.7163,
      "step": 193310
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1516776084899902,
      "learning_rate": 3.75576210996708e-05,
      "loss": 2.8832,
      "step": 193311
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.038958787918091,
      "learning_rate": 3.755563936981182e-05,
      "loss": 2.8045,
      "step": 193312
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.626371145248413,
      "learning_rate": 3.755365768874614e-05,
      "loss": 2.9912,
      "step": 193313
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.025407075881958,
      "learning_rate": 3.7551676056474264e-05,
      "loss": 2.8947,
      "step": 193314
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.115455150604248,
      "learning_rate": 3.754969447299639e-05,
      "loss": 2.9617,
      "step": 193315
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.231148719787598,
      "learning_rate": 3.754771293831309e-05,
      "loss": 2.9407,
      "step": 193316
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0670011043548584,
      "learning_rate": 3.754573145242459e-05,
      "loss": 3.0821,
      "step": 193317
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0671849250793457,
      "learning_rate": 3.754375001533132e-05,
      "loss": 3.1151,
      "step": 193318
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.942767381668091,
      "learning_rate": 3.7541768627033554e-05,
      "loss": 2.9192,
      "step": 193319
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.4391565322875977,
      "learning_rate": 3.753978728753175e-05,
      "loss": 2.936,
      "step": 193320
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2014517784118652,
      "learning_rate": 3.753780599682625e-05,
      "loss": 2.9695,
      "step": 193321
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.570425271987915,
      "learning_rate": 3.7535824754917457e-05,
      "loss": 2.8418,
      "step": 193322
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.95125675201416,
      "learning_rate": 3.753384356180572e-05,
      "loss": 2.8746,
      "step": 193323
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2282707691192627,
      "learning_rate": 3.753186241749139e-05,
      "loss": 2.7676,
      "step": 193324
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9831364154815674,
      "learning_rate": 3.7529881321974756e-05,
      "loss": 3.0006,
      "step": 193325
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.607424020767212,
      "learning_rate": 3.752790027525635e-05,
      "loss": 3.0168,
      "step": 193326
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8784666061401367,
      "learning_rate": 3.752591927733641e-05,
      "loss": 2.9162,
      "step": 193327
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7109553813934326,
      "learning_rate": 3.7523938328215407e-05,
      "loss": 3.0436,
      "step": 193328
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6630468368530273,
      "learning_rate": 3.752195742789367e-05,
      "loss": 2.854,
      "step": 193329
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.6180410385131836,
      "learning_rate": 3.751997657637156e-05,
      "loss": 2.8401,
      "step": 193330
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3386337757110596,
      "learning_rate": 3.751799577364935e-05,
      "loss": 2.8461,
      "step": 193331
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.125546932220459,
      "learning_rate": 3.75160150197276e-05,
      "loss": 3.1209,
      "step": 193332
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.951307535171509,
      "learning_rate": 3.7514034314606475e-05,
      "loss": 2.972,
      "step": 193333
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5894689559936523,
      "learning_rate": 3.751205365828652e-05,
      "loss": 3.0572,
      "step": 193334
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.788214921951294,
      "learning_rate": 3.751007305076806e-05,
      "loss": 2.9418,
      "step": 193335
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.923931837081909,
      "learning_rate": 3.75080924920514e-05,
      "loss": 3.0854,
      "step": 193336
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7454216480255127,
      "learning_rate": 3.75061119821369e-05,
      "loss": 3.115,
      "step": 193337
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.645683526992798,
      "learning_rate": 3.7504131521025027e-05,
      "loss": 3.0833,
      "step": 193338
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.569038152694702,
      "learning_rate": 3.7502151108716016e-05,
      "loss": 3.0689,
      "step": 193339
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0791239738464355,
      "learning_rate": 3.7500170745210366e-05,
      "loss": 2.8451,
      "step": 193340
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4978790283203125,
      "learning_rate": 3.749819043050841e-05,
      "loss": 3.0224,
      "step": 193341
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.64754581451416,
      "learning_rate": 3.749621016461049e-05,
      "loss": 2.9682,
      "step": 193342
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9887053966522217,
      "learning_rate": 3.749422994751688e-05,
      "loss": 3.0538,
      "step": 193343
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.286345481872559,
      "learning_rate": 3.749224977922818e-05,
      "loss": 2.9852,
      "step": 193344
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.713459014892578,
      "learning_rate": 3.74902696597445e-05,
      "loss": 2.7345,
      "step": 193345
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0965218544006348,
      "learning_rate": 3.748828958906644e-05,
      "loss": 3.0034,
      "step": 193346
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.4086875915527344,
      "learning_rate": 3.748630956719425e-05,
      "loss": 3.1702,
      "step": 193347
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6105008125305176,
      "learning_rate": 3.7484329594128285e-05,
      "loss": 2.8388,
      "step": 193348
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.598252773284912,
      "learning_rate": 3.7482349669868904e-05,
      "loss": 3.0186,
      "step": 193349
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1348764896392822,
      "learning_rate": 3.748036979441655e-05,
      "loss": 3.017,
      "step": 193350
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.402832269668579,
      "learning_rate": 3.7478389967771524e-05,
      "loss": 3.0495,
      "step": 193351
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1403985023498535,
      "learning_rate": 3.747641018993426e-05,
      "loss": 2.616,
      "step": 193352
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.412808656692505,
      "learning_rate": 3.747443046090508e-05,
      "loss": 2.9121,
      "step": 193353
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0309770107269287,
      "learning_rate": 3.7472450780684396e-05,
      "loss": 2.8019,
      "step": 193354
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.169945478439331,
      "learning_rate": 3.747047114927243e-05,
      "loss": 3.0161,
      "step": 193355
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.5428943634033203,
      "learning_rate": 3.7468491566669755e-05,
      "loss": 3.1248,
      "step": 193356
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.4397497177124023,
      "learning_rate": 3.746651203287654e-05,
      "loss": 2.7667,
      "step": 193357
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.611570119857788,
      "learning_rate": 3.746453254789334e-05,
      "loss": 2.7823,
      "step": 193358
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.8217782974243164,
      "learning_rate": 3.7462553111720475e-05,
      "loss": 2.9087,
      "step": 193359
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.8660457134246826,
      "learning_rate": 3.746057372435822e-05,
      "loss": 2.9386,
      "step": 193360
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.8338165283203125,
      "learning_rate": 3.7458594385807e-05,
      "loss": 3.1641,
      "step": 193361
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1205196380615234,
      "learning_rate": 3.745661509606719e-05,
      "loss": 2.9104,
      "step": 193362
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2717244625091553,
      "learning_rate": 3.74546358551391e-05,
      "loss": 2.78,
      "step": 193363
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.6496365070343018,
      "learning_rate": 3.745265666302324e-05,
      "loss": 2.8604,
      "step": 193364
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.4912338256835938,
      "learning_rate": 3.745067751971986e-05,
      "loss": 2.9646,
      "step": 193365
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.748785972595215,
      "learning_rate": 3.7448698425229304e-05,
      "loss": 2.7574,
      "step": 193366
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7071237564086914,
      "learning_rate": 3.7446719379552035e-05,
      "loss": 2.9743,
      "step": 193367
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2238807678222656,
      "learning_rate": 3.744474038268839e-05,
      "loss": 3.0106,
      "step": 193368
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3208515644073486,
      "learning_rate": 3.744276143463866e-05,
      "loss": 2.8787,
      "step": 193369
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.720463275909424,
      "learning_rate": 3.7440782535403346e-05,
      "loss": 2.9147,
      "step": 193370
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1687557697296143,
      "learning_rate": 3.743880368498269e-05,
      "loss": 3.1865,
      "step": 193371
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.177757740020752,
      "learning_rate": 3.7436824883377145e-05,
      "loss": 2.7961,
      "step": 193372
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2413482666015625,
      "learning_rate": 3.743484613058709e-05,
      "loss": 3.1084,
      "step": 193373
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6519112586975098,
      "learning_rate": 3.743286742661282e-05,
      "loss": 2.8686,
      "step": 193374
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9866087436676025,
      "learning_rate": 3.7430888771454706e-05,
      "loss": 3.2677,
      "step": 193375
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.908092737197876,
      "learning_rate": 3.74289101651132e-05,
      "loss": 2.7647,
      "step": 193376
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5800559520721436,
      "learning_rate": 3.742693160758855e-05,
      "loss": 3.0351,
      "step": 193377
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.3772435188293457,
      "learning_rate": 3.742495309888125e-05,
      "loss": 2.9922,
      "step": 193378
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.4335196018218994,
      "learning_rate": 3.74229746389916e-05,
      "loss": 2.7007,
      "step": 193379
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.103876113891602,
      "learning_rate": 3.742099622791993e-05,
      "loss": 2.5518,
      "step": 193380
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.452634811401367,
      "learning_rate": 3.741901786566671e-05,
      "loss": 3.1155,
      "step": 193381
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4328057765960693,
      "learning_rate": 3.741703955223224e-05,
      "loss": 3.2688,
      "step": 193382
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.131841659545898,
      "learning_rate": 3.7415061287616854e-05,
      "loss": 2.8469,
      "step": 193383
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1563498973846436,
      "learning_rate": 3.741308307182102e-05,
      "loss": 2.9418,
      "step": 193384
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9657697677612305,
      "learning_rate": 3.741110490484502e-05,
      "loss": 3.2081,
      "step": 193385
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6424968242645264,
      "learning_rate": 3.7409126786689214e-05,
      "loss": 2.8837,
      "step": 193386
    },
    {
      "epoch": 2.52,
      "grad_norm": 7.464901924133301,
      "learning_rate": 3.740714871735409e-05,
      "loss": 2.8847,
      "step": 193387
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.944425106048584,
      "learning_rate": 3.740517069683987e-05,
      "loss": 2.9056,
      "step": 193388
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.228482723236084,
      "learning_rate": 3.740319272514703e-05,
      "loss": 3.1644,
      "step": 193389
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.380664825439453,
      "learning_rate": 3.740121480227588e-05,
      "loss": 2.8501,
      "step": 193390
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.116636276245117,
      "learning_rate": 3.739923692822684e-05,
      "loss": 3.1202,
      "step": 193391
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2412145137786865,
      "learning_rate": 3.7397259103000145e-05,
      "loss": 2.6959,
      "step": 193392
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.355672597885132,
      "learning_rate": 3.739528132659633e-05,
      "loss": 2.6883,
      "step": 193393
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.88573694229126,
      "learning_rate": 3.7393303599015623e-05,
      "loss": 3.0037,
      "step": 193394
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6477437019348145,
      "learning_rate": 3.739132592025853e-05,
      "loss": 2.9647,
      "step": 193395
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.629591226577759,
      "learning_rate": 3.738934829032535e-05,
      "loss": 2.8119,
      "step": 193396
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4669954776763916,
      "learning_rate": 3.7387370709216416e-05,
      "loss": 3.0891,
      "step": 193397
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.182873487472534,
      "learning_rate": 3.738539317693209e-05,
      "loss": 2.8952,
      "step": 193398
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.837334632873535,
      "learning_rate": 3.738341569347284e-05,
      "loss": 2.929,
      "step": 193399
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0796687602996826,
      "learning_rate": 3.73814382588389e-05,
      "loss": 2.9431,
      "step": 193400
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.949322462081909,
      "learning_rate": 3.7379460873030774e-05,
      "loss": 2.8991,
      "step": 193401
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.237288236618042,
      "learning_rate": 3.737748353604876e-05,
      "loss": 2.7853,
      "step": 193402
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.775465726852417,
      "learning_rate": 3.7375506247893216e-05,
      "loss": 3.1554,
      "step": 193403
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3349082469940186,
      "learning_rate": 3.737352900856448e-05,
      "loss": 3.0038,
      "step": 193404
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.66542911529541,
      "learning_rate": 3.737155181806303e-05,
      "loss": 2.8742,
      "step": 193405
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0864052772521973,
      "learning_rate": 3.736957467638908e-05,
      "loss": 2.693,
      "step": 193406
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4599599838256836,
      "learning_rate": 3.736759758354317e-05,
      "loss": 2.752,
      "step": 193407
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.331885814666748,
      "learning_rate": 3.736562053952558e-05,
      "loss": 2.9986,
      "step": 193408
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8633906841278076,
      "learning_rate": 3.7363643544336684e-05,
      "loss": 2.8939,
      "step": 193409
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.71936297416687,
      "learning_rate": 3.736166659797677e-05,
      "loss": 2.8551,
      "step": 193410
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.794144868850708,
      "learning_rate": 3.7359689700446324e-05,
      "loss": 2.8672,
      "step": 193411
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1489737033843994,
      "learning_rate": 3.7357712851745624e-05,
      "loss": 2.7387,
      "step": 193412
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.12570858001709,
      "learning_rate": 3.735573605187513e-05,
      "loss": 2.7913,
      "step": 193413
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.150994300842285,
      "learning_rate": 3.7353759300835206e-05,
      "loss": 3.1147,
      "step": 193414
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.954246997833252,
      "learning_rate": 3.7351782598626124e-05,
      "loss": 2.7136,
      "step": 193415
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2587296962738037,
      "learning_rate": 3.734980594524828e-05,
      "loss": 2.7059,
      "step": 193416
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9482390880584717,
      "learning_rate": 3.7347829340702104e-05,
      "loss": 3.0043,
      "step": 193417
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.741042137145996,
      "learning_rate": 3.734585278498787e-05,
      "loss": 2.9448,
      "step": 193418
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9567081928253174,
      "learning_rate": 3.734387627810608e-05,
      "loss": 2.9543,
      "step": 193419
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5981101989746094,
      "learning_rate": 3.734189982005699e-05,
      "loss": 3.1366,
      "step": 193420
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7934985160827637,
      "learning_rate": 3.7339923410841e-05,
      "loss": 2.9845,
      "step": 193421
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.519209384918213,
      "learning_rate": 3.733794705045845e-05,
      "loss": 2.8845,
      "step": 193422
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0267174243927,
      "learning_rate": 3.733597073890977e-05,
      "loss": 2.8679,
      "step": 193423
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.4738144874572754,
      "learning_rate": 3.733399447619523e-05,
      "loss": 2.9285,
      "step": 193424
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0716795921325684,
      "learning_rate": 3.733201826231532e-05,
      "loss": 2.7849,
      "step": 193425
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.77321457862854,
      "learning_rate": 3.7330042097270355e-05,
      "loss": 3.0295,
      "step": 193426
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.188523769378662,
      "learning_rate": 3.732806598106068e-05,
      "loss": 2.7615,
      "step": 193427
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.448228359222412,
      "learning_rate": 3.732608991368662e-05,
      "loss": 2.7137,
      "step": 193428
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.053041458129883,
      "learning_rate": 3.732411389514865e-05,
      "loss": 2.8066,
      "step": 193429
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.12111234664917,
      "learning_rate": 3.732213792544702e-05,
      "loss": 2.8045,
      "step": 193430
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.773254871368408,
      "learning_rate": 3.7320162004582264e-05,
      "loss": 2.9268,
      "step": 193431
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9281234741210938,
      "learning_rate": 3.7318186132554605e-05,
      "loss": 2.9003,
      "step": 193432
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.44445538520813,
      "learning_rate": 3.731621030936448e-05,
      "loss": 2.8642,
      "step": 193433
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.173046112060547,
      "learning_rate": 3.7314234535012155e-05,
      "loss": 2.9675,
      "step": 193434
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9665400981903076,
      "learning_rate": 3.731225880949816e-05,
      "loss": 2.9144,
      "step": 193435
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3710904121398926,
      "learning_rate": 3.731028313282267e-05,
      "loss": 2.9444,
      "step": 193436
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.041522741317749,
      "learning_rate": 3.7308307504986246e-05,
      "loss": 2.8004,
      "step": 193437
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1144776344299316,
      "learning_rate": 3.730633192598912e-05,
      "loss": 2.9594,
      "step": 193438
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3401951789855957,
      "learning_rate": 3.730435639583179e-05,
      "loss": 2.7669,
      "step": 193439
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7317230701446533,
      "learning_rate": 3.7302380914514465e-05,
      "loss": 2.9108,
      "step": 193440
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6106817722320557,
      "learning_rate": 3.73004054820376e-05,
      "loss": 3.055,
      "step": 193441
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.196129322052002,
      "learning_rate": 3.72984300984015e-05,
      "loss": 3.0031,
      "step": 193442
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.723405599594116,
      "learning_rate": 3.7296454763606665e-05,
      "loss": 2.9031,
      "step": 193443
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.99642276763916,
      "learning_rate": 3.729447947765329e-05,
      "loss": 3.1155,
      "step": 193444
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.094310760498047,
      "learning_rate": 3.7292504240541955e-05,
      "loss": 2.8564,
      "step": 193445
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1364693641662598,
      "learning_rate": 3.729052905227278e-05,
      "loss": 3.0394,
      "step": 193446
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.657036542892456,
      "learning_rate": 3.728855391284633e-05,
      "loss": 3.1662,
      "step": 193447
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7041194438934326,
      "learning_rate": 3.728657882226281e-05,
      "loss": 2.7566,
      "step": 193448
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6856179237365723,
      "learning_rate": 3.728460378052276e-05,
      "loss": 2.954,
      "step": 193449
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.048168182373047,
      "learning_rate": 3.72826287876264e-05,
      "loss": 2.922,
      "step": 193450
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.416827917098999,
      "learning_rate": 3.72806538435742e-05,
      "loss": 2.9162,
      "step": 193451
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.288441181182861,
      "learning_rate": 3.72786789483665e-05,
      "loss": 2.6432,
      "step": 193452
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.572742462158203,
      "learning_rate": 3.7276704102003664e-05,
      "loss": 2.8982,
      "step": 193453
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9687840938568115,
      "learning_rate": 3.7274729304485986e-05,
      "loss": 2.8166,
      "step": 193454
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.938319206237793,
      "learning_rate": 3.7272754555813934e-05,
      "loss": 2.9816,
      "step": 193455
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.627945899963379,
      "learning_rate": 3.727077985598778e-05,
      "loss": 3.068,
      "step": 193456
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2382116317749023,
      "learning_rate": 3.726880520500801e-05,
      "loss": 3.1934,
      "step": 193457
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.8349645137786865,
      "learning_rate": 3.7266830602874944e-05,
      "loss": 3.0666,
      "step": 193458
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.006223678588867,
      "learning_rate": 3.726485604958893e-05,
      "loss": 3.1284,
      "step": 193459
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9378628730773926,
      "learning_rate": 3.726288154515025e-05,
      "loss": 2.9284,
      "step": 193460
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0091309547424316,
      "learning_rate": 3.726090708955946e-05,
      "loss": 2.6388,
      "step": 193461
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.304047107696533,
      "learning_rate": 3.725893268281672e-05,
      "loss": 2.7871,
      "step": 193462
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7210302352905273,
      "learning_rate": 3.7256958324922615e-05,
      "loss": 3.0404,
      "step": 193463
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9759347438812256,
      "learning_rate": 3.7254984015877366e-05,
      "loss": 3.0188,
      "step": 193464
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.334719657897949,
      "learning_rate": 3.725300975568134e-05,
      "loss": 2.9946,
      "step": 193465
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.567833423614502,
      "learning_rate": 3.725103554433497e-05,
      "loss": 2.8209,
      "step": 193466
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.7167491912841797,
      "learning_rate": 3.724906138183863e-05,
      "loss": 2.6576,
      "step": 193467
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0098533630371094,
      "learning_rate": 3.724708726819254e-05,
      "loss": 2.9225,
      "step": 193468
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.17523455619812,
      "learning_rate": 3.724511320339728e-05,
      "loss": 2.9249,
      "step": 193469
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8272151947021484,
      "learning_rate": 3.724313918745311e-05,
      "loss": 2.7848,
      "step": 193470
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8497562408447266,
      "learning_rate": 3.724116522036033e-05,
      "loss": 3.1444,
      "step": 193471
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.6522364616394043,
      "learning_rate": 3.723919130211943e-05,
      "loss": 2.8883,
      "step": 193472
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4395782947540283,
      "learning_rate": 3.7237217432730725e-05,
      "loss": 2.8996,
      "step": 193473
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.464666366577148,
      "learning_rate": 3.723524361219451e-05,
      "loss": 2.7675,
      "step": 193474
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7858328819274902,
      "learning_rate": 3.723326984051131e-05,
      "loss": 3.1307,
      "step": 193475
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.2170562744140625,
      "learning_rate": 3.723129611768141e-05,
      "loss": 2.7715,
      "step": 193476
    },
    {
      "epoch": 2.52,
      "grad_norm": 5.0234856605529785,
      "learning_rate": 3.722932244370509e-05,
      "loss": 2.9971,
      "step": 193477
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6681151390075684,
      "learning_rate": 3.722734881858286e-05,
      "loss": 2.5991,
      "step": 193478
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4396464824676514,
      "learning_rate": 3.722537524231498e-05,
      "loss": 2.8954,
      "step": 193479
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.586513519287109,
      "learning_rate": 3.722340171490189e-05,
      "loss": 3.0924,
      "step": 193480
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.2475481033325195,
      "learning_rate": 3.722142823634395e-05,
      "loss": 2.9684,
      "step": 193481
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.389592409133911,
      "learning_rate": 3.7219454806641537e-05,
      "loss": 2.8476,
      "step": 193482
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.980621099472046,
      "learning_rate": 3.7217481425794906e-05,
      "loss": 2.9865,
      "step": 193483
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.580836534500122,
      "learning_rate": 3.721550809380456e-05,
      "loss": 2.8614,
      "step": 193484
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.188737154006958,
      "learning_rate": 3.721353481067073e-05,
      "loss": 3.017,
      "step": 193485
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.870121479034424,
      "learning_rate": 3.7211561576393955e-05,
      "loss": 2.9807,
      "step": 193486
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3738934993743896,
      "learning_rate": 3.7209588390974495e-05,
      "loss": 3.1284,
      "step": 193487
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.747941255569458,
      "learning_rate": 3.7207615254412756e-05,
      "loss": 3.012,
      "step": 193488
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9774065017700195,
      "learning_rate": 3.7205642166709e-05,
      "loss": 2.9628,
      "step": 193489
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.594165563583374,
      "learning_rate": 3.720366912786377e-05,
      "loss": 3.0391,
      "step": 193490
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7504537105560303,
      "learning_rate": 3.720169613787721e-05,
      "loss": 2.7943,
      "step": 193491
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.662571907043457,
      "learning_rate": 3.719972319674994e-05,
      "loss": 2.8916,
      "step": 193492
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.007812738418579,
      "learning_rate": 3.719775030448219e-05,
      "loss": 3.0155,
      "step": 193493
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.728440999984741,
      "learning_rate": 3.719577746107432e-05,
      "loss": 2.8545,
      "step": 193494
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.433506488800049,
      "learning_rate": 3.719380466652666e-05,
      "loss": 2.7248,
      "step": 193495
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3479461669921875,
      "learning_rate": 3.719183192083969e-05,
      "loss": 2.6935,
      "step": 193496
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.164114475250244,
      "learning_rate": 3.718985922401366e-05,
      "loss": 3.0182,
      "step": 193497
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8336827754974365,
      "learning_rate": 3.718788657604909e-05,
      "loss": 2.9179,
      "step": 193498
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.270521402359009,
      "learning_rate": 3.718591397694623e-05,
      "loss": 2.8171,
      "step": 193499
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.3740763664245605,
      "learning_rate": 3.7183941426705445e-05,
      "loss": 2.9179,
      "step": 193500
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.937932014465332,
      "learning_rate": 3.718196892532708e-05,
      "loss": 2.8967,
      "step": 193501
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0063705444335938,
      "learning_rate": 3.7179996472811625e-05,
      "loss": 3.0173,
      "step": 193502
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.613492488861084,
      "learning_rate": 3.7178024069159286e-05,
      "loss": 2.9392,
      "step": 193503
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.641319751739502,
      "learning_rate": 3.717605171437059e-05,
      "loss": 2.933,
      "step": 193504
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0074849128723145,
      "learning_rate": 3.7174079408445775e-05,
      "loss": 2.783,
      "step": 193505
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.727696180343628,
      "learning_rate": 3.7172107151385376e-05,
      "loss": 2.6756,
      "step": 193506
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.951228141784668,
      "learning_rate": 3.717013494318949e-05,
      "loss": 3.0513,
      "step": 193507
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2147743701934814,
      "learning_rate": 3.716816278385875e-05,
      "loss": 2.9598,
      "step": 193508
    },
    {
      "epoch": 2.52,
      "grad_norm": 6.606632232666016,
      "learning_rate": 3.716619067339332e-05,
      "loss": 3.1415,
      "step": 193509
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.152547359466553,
      "learning_rate": 3.716421861179369e-05,
      "loss": 2.9103,
      "step": 193510
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.863367795944214,
      "learning_rate": 3.716224659906018e-05,
      "loss": 2.821,
      "step": 193511
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1572725772857666,
      "learning_rate": 3.7160274635193286e-05,
      "loss": 3.0762,
      "step": 193512
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.917724370956421,
      "learning_rate": 3.71583027201931e-05,
      "loss": 2.6432,
      "step": 193513
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9928314685821533,
      "learning_rate": 3.7156330854060255e-05,
      "loss": 3.0695,
      "step": 193514
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.87422776222229,
      "learning_rate": 3.715435903679492e-05,
      "loss": 2.8224,
      "step": 193515
    },
    {
      "epoch": 2.52,
      "grad_norm": 5.411468505859375,
      "learning_rate": 3.7152387268397634e-05,
      "loss": 2.9236,
      "step": 193516
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8870227336883545,
      "learning_rate": 3.7150415548868585e-05,
      "loss": 2.6884,
      "step": 193517
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.656818151473999,
      "learning_rate": 3.714844387820838e-05,
      "loss": 3.0383,
      "step": 193518
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8426740169525146,
      "learning_rate": 3.714647225641711e-05,
      "loss": 2.9474,
      "step": 193519
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.446289300918579,
      "learning_rate": 3.7144500683495324e-05,
      "loss": 2.4492,
      "step": 193520
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1136598587036133,
      "learning_rate": 3.714252915944331e-05,
      "loss": 3.0691,
      "step": 193521
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.298657178878784,
      "learning_rate": 3.714055768426151e-05,
      "loss": 2.9561,
      "step": 193522
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.9502451419830322,
      "learning_rate": 3.7138586257950175e-05,
      "loss": 3.1871,
      "step": 193523
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.798340320587158,
      "learning_rate": 3.713661488050985e-05,
      "loss": 2.8563,
      "step": 193524
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.499602794647217,
      "learning_rate": 3.713464355194067e-05,
      "loss": 3.0438,
      "step": 193525
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9066784381866455,
      "learning_rate": 3.7132672272243194e-05,
      "loss": 2.9814,
      "step": 193526
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.914126396179199,
      "learning_rate": 3.713070104141766e-05,
      "loss": 3.0882,
      "step": 193527
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8546142578125,
      "learning_rate": 3.7128729859464553e-05,
      "loss": 3.0258,
      "step": 193528
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1316840648651123,
      "learning_rate": 3.71267587263841e-05,
      "loss": 2.8237,
      "step": 193529
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.705578327178955,
      "learning_rate": 3.7124787642176875e-05,
      "loss": 2.7262,
      "step": 193530
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.647779703140259,
      "learning_rate": 3.7122816606843e-05,
      "loss": 2.8416,
      "step": 193531
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3905270099639893,
      "learning_rate": 3.7120845620382985e-05,
      "loss": 2.813,
      "step": 193532
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.500541925430298,
      "learning_rate": 3.711887468279715e-05,
      "loss": 2.6475,
      "step": 193533
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4948325157165527,
      "learning_rate": 3.711690379408592e-05,
      "loss": 2.9667,
      "step": 193534
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3792943954467773,
      "learning_rate": 3.711493295424955e-05,
      "loss": 2.9819,
      "step": 193535
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4262728691101074,
      "learning_rate": 3.71129621632886e-05,
      "loss": 2.9294,
      "step": 193536
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.454148769378662,
      "learning_rate": 3.7110991421203206e-05,
      "loss": 2.6881,
      "step": 193537
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6338300704956055,
      "learning_rate": 3.7109020727993886e-05,
      "loss": 2.9376,
      "step": 193538
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.978949785232544,
      "learning_rate": 3.710705008366091e-05,
      "loss": 2.7632,
      "step": 193539
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.4376931190490723,
      "learning_rate": 3.710507948820476e-05,
      "loss": 2.8596,
      "step": 193540
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.016937732696533,
      "learning_rate": 3.710310894162568e-05,
      "loss": 2.771,
      "step": 193541
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.7537906169891357,
      "learning_rate": 3.710113844392414e-05,
      "loss": 2.9918,
      "step": 193542
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.033520460128784,
      "learning_rate": 3.709916799510047e-05,
      "loss": 3.0708,
      "step": 193543
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4394643306732178,
      "learning_rate": 3.709719759515503e-05,
      "loss": 2.8499,
      "step": 193544
    },
    {
      "epoch": 2.52,
      "grad_norm": 5.180543422698975,
      "learning_rate": 3.709522724408812e-05,
      "loss": 2.9247,
      "step": 193545
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.252840518951416,
      "learning_rate": 3.7093256941900216e-05,
      "loss": 2.8006,
      "step": 193546
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0155084133148193,
      "learning_rate": 3.709128668859158e-05,
      "loss": 3.1765,
      "step": 193547
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.9184131622314453,
      "learning_rate": 3.708931648416271e-05,
      "loss": 2.9953,
      "step": 193548
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.00634503364563,
      "learning_rate": 3.708734632861391e-05,
      "loss": 2.9303,
      "step": 193549
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3241465091705322,
      "learning_rate": 3.708537622194544e-05,
      "loss": 2.8566,
      "step": 193550
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.009282112121582,
      "learning_rate": 3.7083406164157866e-05,
      "loss": 2.9566,
      "step": 193551
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.895724296569824,
      "learning_rate": 3.7081436155251434e-05,
      "loss": 2.7539,
      "step": 193552
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.738788366317749,
      "learning_rate": 3.707946619522643e-05,
      "loss": 2.6596,
      "step": 193553
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.102858304977417,
      "learning_rate": 3.707749628408342e-05,
      "loss": 3.0936,
      "step": 193554
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9919934272766113,
      "learning_rate": 3.7075526421822644e-05,
      "loss": 2.6865,
      "step": 193555
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.451408863067627,
      "learning_rate": 3.707355660844443e-05,
      "loss": 2.826,
      "step": 193556
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6950700283050537,
      "learning_rate": 3.707158684394929e-05,
      "loss": 2.9135,
      "step": 193557
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.281372547149658,
      "learning_rate": 3.706961712833747e-05,
      "loss": 2.9367,
      "step": 193558
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.334649085998535,
      "learning_rate": 3.7067647461609354e-05,
      "loss": 2.9588,
      "step": 193559
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.940918207168579,
      "learning_rate": 3.706567784376536e-05,
      "loss": 3.1458,
      "step": 193560
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.989500045776367,
      "learning_rate": 3.7063708274805803e-05,
      "loss": 3.0114,
      "step": 193561
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.651052713394165,
      "learning_rate": 3.7061738754731044e-05,
      "loss": 2.9527,
      "step": 193562
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.930244207382202,
      "learning_rate": 3.705976928354151e-05,
      "loss": 2.6482,
      "step": 193563
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.745973825454712,
      "learning_rate": 3.7057799861237466e-05,
      "loss": 3.1746,
      "step": 193564
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8906824588775635,
      "learning_rate": 3.7055830487819394e-05,
      "loss": 3.1088,
      "step": 193565
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.467622756958008,
      "learning_rate": 3.705386116328761e-05,
      "loss": 3.1021,
      "step": 193566
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.431532859802246,
      "learning_rate": 3.705189188764249e-05,
      "loss": 2.9725,
      "step": 193567
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5458481311798096,
      "learning_rate": 3.704992266088434e-05,
      "loss": 3.167,
      "step": 193568
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7331573963165283,
      "learning_rate": 3.7047953483013606e-05,
      "loss": 2.8555,
      "step": 193569
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.687107801437378,
      "learning_rate": 3.7045984354030567e-05,
      "loss": 2.7729,
      "step": 193570
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9837088584899902,
      "learning_rate": 3.704401527393569e-05,
      "loss": 2.9274,
      "step": 193571
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4842469692230225,
      "learning_rate": 3.704204624272924e-05,
      "loss": 2.9177,
      "step": 193572
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.5883970260620117,
      "learning_rate": 3.7040077260411784e-05,
      "loss": 3.1007,
      "step": 193573
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.468608856201172,
      "learning_rate": 3.7038108326983383e-05,
      "loss": 2.7526,
      "step": 193574
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4669198989868164,
      "learning_rate": 3.703613944244465e-05,
      "loss": 2.932,
      "step": 193575
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.2288818359375,
      "learning_rate": 3.70341706067958e-05,
      "loss": 3.0911,
      "step": 193576
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.891172409057617,
      "learning_rate": 3.703220182003731e-05,
      "loss": 2.6538,
      "step": 193577
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7534351348876953,
      "learning_rate": 3.703023308216945e-05,
      "loss": 2.8373,
      "step": 193578
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9568474292755127,
      "learning_rate": 3.702826439319274e-05,
      "loss": 2.9034,
      "step": 193579
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3905253410339355,
      "learning_rate": 3.702629575310733e-05,
      "loss": 3.1533,
      "step": 193580
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.812357187271118,
      "learning_rate": 3.7024327161913734e-05,
      "loss": 2.8655,
      "step": 193581
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.888890266418457,
      "learning_rate": 3.7022358619612236e-05,
      "loss": 2.9114,
      "step": 193582
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9134674072265625,
      "learning_rate": 3.702039012620332e-05,
      "loss": 3.0043,
      "step": 193583
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.271094799041748,
      "learning_rate": 3.7018421681687205e-05,
      "loss": 2.9197,
      "step": 193584
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.921544313430786,
      "learning_rate": 3.701645328606444e-05,
      "loss": 2.6993,
      "step": 193585
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1235768795013428,
      "learning_rate": 3.701448493933516e-05,
      "loss": 2.7985,
      "step": 193586
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.105367660522461,
      "learning_rate": 3.7012516641499944e-05,
      "loss": 2.8716,
      "step": 193587
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3982927799224854,
      "learning_rate": 3.7010548392558983e-05,
      "loss": 2.7961,
      "step": 193588
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.028347969055176,
      "learning_rate": 3.700858019251277e-05,
      "loss": 3.1614,
      "step": 193589
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.43936824798584,
      "learning_rate": 3.7006612041361586e-05,
      "loss": 3.0519,
      "step": 193590
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.683117389678955,
      "learning_rate": 3.700464393910595e-05,
      "loss": 3.1178,
      "step": 193591
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4784910678863525,
      "learning_rate": 3.7002675885746e-05,
      "loss": 2.9431,
      "step": 193592
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.643744707107544,
      "learning_rate": 3.7000707881282276e-05,
      "loss": 2.7345,
      "step": 193593
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1216299533843994,
      "learning_rate": 3.699873992571504e-05,
      "loss": 2.8397,
      "step": 193594
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.920410394668579,
      "learning_rate": 3.699677201904475e-05,
      "loss": 2.8532,
      "step": 193595
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.255073547363281,
      "learning_rate": 3.699480416127165e-05,
      "loss": 2.7263,
      "step": 193596
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.054805278778076,
      "learning_rate": 3.69928363523963e-05,
      "loss": 2.747,
      "step": 193597
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2416727542877197,
      "learning_rate": 3.6990868592418846e-05,
      "loss": 3.2323,
      "step": 193598
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.5512893199920654,
      "learning_rate": 3.69889008813398e-05,
      "loss": 3.0294,
      "step": 193599
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.335450649261475,
      "learning_rate": 3.698693321915944e-05,
      "loss": 3.0177,
      "step": 193600
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.4504294395446777,
      "learning_rate": 3.6984965605878205e-05,
      "loss": 2.8352,
      "step": 193601
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.470088481903076,
      "learning_rate": 3.6982998041496384e-05,
      "loss": 3.0647,
      "step": 193602
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.584989070892334,
      "learning_rate": 3.698103052601454e-05,
      "loss": 2.7983,
      "step": 193603
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0539512634277344,
      "learning_rate": 3.697906305943272e-05,
      "loss": 2.6342,
      "step": 193604
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.071429491043091,
      "learning_rate": 3.697709564175155e-05,
      "loss": 3.269,
      "step": 193605
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0184051990509033,
      "learning_rate": 3.69751282729712e-05,
      "loss": 3.0731,
      "step": 193606
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0471384525299072,
      "learning_rate": 3.697316095309223e-05,
      "loss": 3.009,
      "step": 193607
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9192140102386475,
      "learning_rate": 3.697119368211488e-05,
      "loss": 3.236,
      "step": 193608
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.985011577606201,
      "learning_rate": 3.6969226460039636e-05,
      "loss": 3.0812,
      "step": 193609
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.583341598510742,
      "learning_rate": 3.696725928686665e-05,
      "loss": 3.009,
      "step": 193610
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.741128444671631,
      "learning_rate": 3.696529216259651e-05,
      "loss": 2.6461,
      "step": 193611
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.851625919342041,
      "learning_rate": 3.696332508722938e-05,
      "loss": 2.8263,
      "step": 193612
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8792247772216797,
      "learning_rate": 3.696135806076583e-05,
      "loss": 2.8791,
      "step": 193613
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.077667713165283,
      "learning_rate": 3.695939108320607e-05,
      "loss": 2.8112,
      "step": 193614
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5398576259613037,
      "learning_rate": 3.695742415455062e-05,
      "loss": 3.0859,
      "step": 193615
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5124850273132324,
      "learning_rate": 3.695545727479965e-05,
      "loss": 2.9599,
      "step": 193616
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.561413049697876,
      "learning_rate": 3.695349044395366e-05,
      "loss": 3.0493,
      "step": 193617
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5530569553375244,
      "learning_rate": 3.695152366201295e-05,
      "loss": 2.9512,
      "step": 193618
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2497260570526123,
      "learning_rate": 3.694955692897799e-05,
      "loss": 3.069,
      "step": 193619
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.110603094100952,
      "learning_rate": 3.6947590244848966e-05,
      "loss": 2.9738,
      "step": 193620
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9203457832336426,
      "learning_rate": 3.6945623609626496e-05,
      "loss": 2.936,
      "step": 193621
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6968302726745605,
      "learning_rate": 3.694365702331067e-05,
      "loss": 3.0028,
      "step": 193622
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7420883178710938,
      "learning_rate": 3.6941690485902054e-05,
      "loss": 2.8634,
      "step": 193623
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.925083637237549,
      "learning_rate": 3.693972399740088e-05,
      "loss": 3.0882,
      "step": 193624
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.960132122039795,
      "learning_rate": 3.693775755780766e-05,
      "loss": 2.9194,
      "step": 193625
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7692043781280518,
      "learning_rate": 3.693579116712258e-05,
      "loss": 2.8976,
      "step": 193626
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2422361373901367,
      "learning_rate": 3.693382482534617e-05,
      "loss": 2.8322,
      "step": 193627
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.5517146587371826,
      "learning_rate": 3.693185853247874e-05,
      "loss": 3.0726,
      "step": 193628
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.168468713760376,
      "learning_rate": 3.692989228852066e-05,
      "loss": 2.8812,
      "step": 193629
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1300597190856934,
      "learning_rate": 3.692792609347218e-05,
      "loss": 3.0211,
      "step": 193630
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.759416341781616,
      "learning_rate": 3.692595994733385e-05,
      "loss": 2.6913,
      "step": 193631
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6552135944366455,
      "learning_rate": 3.692399385010586e-05,
      "loss": 3.0403,
      "step": 193632
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.984409809112549,
      "learning_rate": 3.692202780178878e-05,
      "loss": 2.8658,
      "step": 193633
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7577879428863525,
      "learning_rate": 3.6920061802382804e-05,
      "loss": 2.9555,
      "step": 193634
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0660901069641113,
      "learning_rate": 3.691809585188837e-05,
      "loss": 3.1174,
      "step": 193635
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7502946853637695,
      "learning_rate": 3.691612995030578e-05,
      "loss": 3.1547,
      "step": 193636
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.833688497543335,
      "learning_rate": 3.691416409763554e-05,
      "loss": 2.8314,
      "step": 193637
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.867558240890503,
      "learning_rate": 3.691219829387779e-05,
      "loss": 2.9785,
      "step": 193638
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.9124834537506104,
      "learning_rate": 3.691023253903316e-05,
      "loss": 2.9952,
      "step": 193639
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.039797067642212,
      "learning_rate": 3.690826683310183e-05,
      "loss": 2.9092,
      "step": 193640
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1287314891815186,
      "learning_rate": 3.6906301176084176e-05,
      "loss": 3.0674,
      "step": 193641
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.265909671783447,
      "learning_rate": 3.6904335567980695e-05,
      "loss": 3.0664,
      "step": 193642
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.636084794998169,
      "learning_rate": 3.690237000879162e-05,
      "loss": 2.5859,
      "step": 193643
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9797418117523193,
      "learning_rate": 3.690040449851731e-05,
      "loss": 3.1344,
      "step": 193644
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.35550856590271,
      "learning_rate": 3.689843903715828e-05,
      "loss": 2.9742,
      "step": 193645
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7977030277252197,
      "learning_rate": 3.689647362471475e-05,
      "loss": 2.7458,
      "step": 193646
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.837733507156372,
      "learning_rate": 3.6894508261187094e-05,
      "loss": 3.0701,
      "step": 193647
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0197081565856934,
      "learning_rate": 3.689254294657578e-05,
      "loss": 2.851,
      "step": 193648
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4639577865600586,
      "learning_rate": 3.689057768088103e-05,
      "loss": 3.0007,
      "step": 193649
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9703705310821533,
      "learning_rate": 3.688861246410335e-05,
      "loss": 2.9178,
      "step": 193650
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0176913738250732,
      "learning_rate": 3.6886647296243047e-05,
      "loss": 2.9149,
      "step": 193651
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9907586574554443,
      "learning_rate": 3.68846821773005e-05,
      "loss": 2.8613,
      "step": 193652
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.8452041149139404,
      "learning_rate": 3.688271710727597e-05,
      "loss": 3.0003,
      "step": 193653
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.739473819732666,
      "learning_rate": 3.6880752086170007e-05,
      "loss": 3.0282,
      "step": 193654
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0508625507354736,
      "learning_rate": 3.687878711398278e-05,
      "loss": 2.7903,
      "step": 193655
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0931999683380127,
      "learning_rate": 3.687682219071485e-05,
      "loss": 2.9533,
      "step": 193656
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9484848976135254,
      "learning_rate": 3.687485731636646e-05,
      "loss": 2.9612,
      "step": 193657
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.565812110900879,
      "learning_rate": 3.6872892490938e-05,
      "loss": 2.9714,
      "step": 193658
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.505340099334717,
      "learning_rate": 3.687092771442978e-05,
      "loss": 2.9404,
      "step": 193659
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9628498554229736,
      "learning_rate": 3.6868962986842286e-05,
      "loss": 3.0113,
      "step": 193660
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.822552680969238,
      "learning_rate": 3.6866998308175766e-05,
      "loss": 3.0563,
      "step": 193661
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6225709915161133,
      "learning_rate": 3.686503367843068e-05,
      "loss": 2.9896,
      "step": 193662
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.1670403480529785,
      "learning_rate": 3.686306909760729e-05,
      "loss": 2.9221,
      "step": 193663
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.941631555557251,
      "learning_rate": 3.686110456570617e-05,
      "loss": 2.9295,
      "step": 193664
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0839688777923584,
      "learning_rate": 3.6859140082727414e-05,
      "loss": 2.9527,
      "step": 193665
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0006165504455566,
      "learning_rate": 3.6857175648671555e-05,
      "loss": 2.9534,
      "step": 193666
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.357286214828491,
      "learning_rate": 3.6855211263538834e-05,
      "loss": 2.9186,
      "step": 193667
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.968435287475586,
      "learning_rate": 3.6853246927329774e-05,
      "loss": 2.9596,
      "step": 193668
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.402921438217163,
      "learning_rate": 3.685128264004461e-05,
      "loss": 2.8239,
      "step": 193669
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.107731580734253,
      "learning_rate": 3.684931840168388e-05,
      "loss": 3.0413,
      "step": 193670
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.057048797607422,
      "learning_rate": 3.6847354212247717e-05,
      "loss": 2.9693,
      "step": 193671
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.194491386413574,
      "learning_rate": 3.684539007173665e-05,
      "loss": 2.9454,
      "step": 193672
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.351358413696289,
      "learning_rate": 3.684342598015094e-05,
      "loss": 2.9978,
      "step": 193673
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.55623197555542,
      "learning_rate": 3.6841461937491034e-05,
      "loss": 2.9982,
      "step": 193674
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8962788581848145,
      "learning_rate": 3.683949794375722e-05,
      "loss": 2.8508,
      "step": 193675
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1679136753082275,
      "learning_rate": 3.683753399895007e-05,
      "loss": 3.0578,
      "step": 193676
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2784955501556396,
      "learning_rate": 3.683557010306962e-05,
      "loss": 3.0129,
      "step": 193677
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1842939853668213,
      "learning_rate": 3.68336062561165e-05,
      "loss": 3.038,
      "step": 193678
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.751508951187134,
      "learning_rate": 3.6831642458090905e-05,
      "loss": 2.8952,
      "step": 193679
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.238460063934326,
      "learning_rate": 3.682967870899334e-05,
      "loss": 3.2574,
      "step": 193680
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9456048011779785,
      "learning_rate": 3.682771500882403e-05,
      "loss": 2.746,
      "step": 193681
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.6430327892303467,
      "learning_rate": 3.682575135758355e-05,
      "loss": 3.0038,
      "step": 193682
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5627567768096924,
      "learning_rate": 3.682378775527201e-05,
      "loss": 2.818,
      "step": 193683
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7338449954986572,
      "learning_rate": 3.682182420188995e-05,
      "loss": 2.8248,
      "step": 193684
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.798213481903076,
      "learning_rate": 3.681986069743762e-05,
      "loss": 3.147,
      "step": 193685
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.839139223098755,
      "learning_rate": 3.6817897241915514e-05,
      "loss": 2.7164,
      "step": 193686
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.074542999267578,
      "learning_rate": 3.6815933835323873e-05,
      "loss": 2.9063,
      "step": 193687
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.598616123199463,
      "learning_rate": 3.6813970477663226e-05,
      "loss": 3.2648,
      "step": 193688
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.644775152206421,
      "learning_rate": 3.68120071689337e-05,
      "loss": 2.9971,
      "step": 193689
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.588721513748169,
      "learning_rate": 3.6810043909135864e-05,
      "loss": 3.2049,
      "step": 193690
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.781985282897949,
      "learning_rate": 3.6808080698269956e-05,
      "loss": 2.8747,
      "step": 193691
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.011152505874634,
      "learning_rate": 3.6806117536336444e-05,
      "loss": 3.0703,
      "step": 193692
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0103304386138916,
      "learning_rate": 3.6804154423335585e-05,
      "loss": 2.9728,
      "step": 193693
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7543580532073975,
      "learning_rate": 3.680219135926792e-05,
      "loss": 2.8349,
      "step": 193694
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.900892734527588,
      "learning_rate": 3.680022834413357e-05,
      "loss": 2.8795,
      "step": 193695
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0365796089172363,
      "learning_rate": 3.679826537793309e-05,
      "loss": 2.8726,
      "step": 193696
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9122228622436523,
      "learning_rate": 3.6796302460666726e-05,
      "loss": 2.7685,
      "step": 193697
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3234989643096924,
      "learning_rate": 3.679433959233495e-05,
      "loss": 3.0622,
      "step": 193698
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3100502490997314,
      "learning_rate": 3.679237677293804e-05,
      "loss": 2.8199,
      "step": 193699
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.450503349304199,
      "learning_rate": 3.679041400247651e-05,
      "loss": 2.8033,
      "step": 193700
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.8623924255371094,
      "learning_rate": 3.678845128095047e-05,
      "loss": 2.9192,
      "step": 193701
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0454530715942383,
      "learning_rate": 3.678648860836049e-05,
      "loss": 3.0657,
      "step": 193702
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2821974754333496,
      "learning_rate": 3.678452598470679e-05,
      "loss": 2.9964,
      "step": 193703
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.036245346069336,
      "learning_rate": 3.678256340998992e-05,
      "loss": 2.8297,
      "step": 193704
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4346303939819336,
      "learning_rate": 3.6780600884210035e-05,
      "loss": 3.0749,
      "step": 193705
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.734840154647827,
      "learning_rate": 3.67786384073677e-05,
      "loss": 2.9899,
      "step": 193706
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.786350727081299,
      "learning_rate": 3.677667597946319e-05,
      "loss": 2.9385,
      "step": 193707
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7971179485321045,
      "learning_rate": 3.677471360049683e-05,
      "loss": 2.9173,
      "step": 193708
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.108572006225586,
      "learning_rate": 3.677275127046896e-05,
      "loss": 2.7616,
      "step": 193709
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.945013999938965,
      "learning_rate": 3.6770788989380104e-05,
      "loss": 2.9152,
      "step": 193710
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.51078462600708,
      "learning_rate": 3.6768826757230444e-05,
      "loss": 2.9309,
      "step": 193711
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8061676025390625,
      "learning_rate": 3.6766864574020496e-05,
      "loss": 3.0846,
      "step": 193712
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.450800657272339,
      "learning_rate": 3.6764902439750534e-05,
      "loss": 2.7368,
      "step": 193713
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1212804317474365,
      "learning_rate": 3.6762940354420956e-05,
      "loss": 3.0513,
      "step": 193714
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8038346767425537,
      "learning_rate": 3.676097831803206e-05,
      "loss": 3.0799,
      "step": 193715
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2840116024017334,
      "learning_rate": 3.6759016330584324e-05,
      "loss": 2.8981,
      "step": 193716
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.825396776199341,
      "learning_rate": 3.6757054392078e-05,
      "loss": 3.0124,
      "step": 193717
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9223148822784424,
      "learning_rate": 3.675509250251356e-05,
      "loss": 2.6401,
      "step": 193718
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6055140495300293,
      "learning_rate": 3.675313066189134e-05,
      "loss": 3.1134,
      "step": 193719
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.872032403945923,
      "learning_rate": 3.6751168870211666e-05,
      "loss": 3.006,
      "step": 193720
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9752962589263916,
      "learning_rate": 3.674920712747488e-05,
      "loss": 2.8676,
      "step": 193721
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.641677141189575,
      "learning_rate": 3.674724543368141e-05,
      "loss": 2.8502,
      "step": 193722
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7259299755096436,
      "learning_rate": 3.674528378883155e-05,
      "loss": 2.9309,
      "step": 193723
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9844565391540527,
      "learning_rate": 3.674332219292577e-05,
      "loss": 2.8789,
      "step": 193724
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.850984573364258,
      "learning_rate": 3.674136064596438e-05,
      "loss": 2.6762,
      "step": 193725
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3427183628082275,
      "learning_rate": 3.67393991479477e-05,
      "loss": 3.0524,
      "step": 193726
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.950798273086548,
      "learning_rate": 3.673743769887617e-05,
      "loss": 2.7402,
      "step": 193727
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8095126152038574,
      "learning_rate": 3.673547629875012e-05,
      "loss": 2.8619,
      "step": 193728
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0342190265655518,
      "learning_rate": 3.673351494756985e-05,
      "loss": 2.9453,
      "step": 193729
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.030160903930664,
      "learning_rate": 3.673155364533587e-05,
      "loss": 3.1564,
      "step": 193730
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1882965564727783,
      "learning_rate": 3.6729592392048454e-05,
      "loss": 3.1138,
      "step": 193731
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7074100971221924,
      "learning_rate": 3.67276311877079e-05,
      "loss": 3.0351,
      "step": 193732
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7601852416992188,
      "learning_rate": 3.6725670032314745e-05,
      "loss": 3.121,
      "step": 193733
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1059093475341797,
      "learning_rate": 3.672370892586918e-05,
      "loss": 2.8195,
      "step": 193734
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.9339511394500732,
      "learning_rate": 3.6721747868371686e-05,
      "loss": 2.8197,
      "step": 193735
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.814382791519165,
      "learning_rate": 3.671978685982261e-05,
      "loss": 2.9511,
      "step": 193736
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1287243366241455,
      "learning_rate": 3.671782590022231e-05,
      "loss": 2.8391,
      "step": 193737
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.630436897277832,
      "learning_rate": 3.6715864989571055e-05,
      "loss": 3.0394,
      "step": 193738
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.461359977722168,
      "learning_rate": 3.671390412786934e-05,
      "loss": 3.0169,
      "step": 193739
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.777010917663574,
      "learning_rate": 3.671194331511744e-05,
      "loss": 3.2277,
      "step": 193740
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.714930772781372,
      "learning_rate": 3.670998255131582e-05,
      "loss": 2.9019,
      "step": 193741
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.6950013637542725,
      "learning_rate": 3.6708021836464774e-05,
      "loss": 2.7812,
      "step": 193742
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.555643081665039,
      "learning_rate": 3.670606117056467e-05,
      "loss": 2.9068,
      "step": 193743
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3620383739471436,
      "learning_rate": 3.670410055361584e-05,
      "loss": 2.8568,
      "step": 193744
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.5244736671447754,
      "learning_rate": 3.670213998561873e-05,
      "loss": 2.4085,
      "step": 193745
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1648294925689697,
      "learning_rate": 3.670017946657362e-05,
      "loss": 2.9347,
      "step": 193746
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.714215040206909,
      "learning_rate": 3.669821899648096e-05,
      "loss": 2.7268,
      "step": 193747
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.702038049697876,
      "learning_rate": 3.6696258575341e-05,
      "loss": 2.8014,
      "step": 193748
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8020644187927246,
      "learning_rate": 3.669429820315431e-05,
      "loss": 2.8696,
      "step": 193749
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1105446815490723,
      "learning_rate": 3.669233787992101e-05,
      "loss": 3.0218,
      "step": 193750
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.079904794692993,
      "learning_rate": 3.669037760564164e-05,
      "loss": 2.9546,
      "step": 193751
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.016275405883789,
      "learning_rate": 3.668841738031642e-05,
      "loss": 3.1006,
      "step": 193752
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.427901268005371,
      "learning_rate": 3.668645720394586e-05,
      "loss": 2.9506,
      "step": 193753
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7345094680786133,
      "learning_rate": 3.6684497076530216e-05,
      "loss": 2.8484,
      "step": 193754
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.4814224243164062,
      "learning_rate": 3.668253699806998e-05,
      "loss": 3.0041,
      "step": 193755
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.1313605308532715,
      "learning_rate": 3.6680576968565346e-05,
      "loss": 2.9796,
      "step": 193756
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.771939992904663,
      "learning_rate": 3.667861698801682e-05,
      "loss": 2.8495,
      "step": 193757
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.3851051330566406,
      "learning_rate": 3.667665705642463e-05,
      "loss": 2.9787,
      "step": 193758
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9853146076202393,
      "learning_rate": 3.667469717378928e-05,
      "loss": 2.7905,
      "step": 193759
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.070651054382324,
      "learning_rate": 3.6672737340111e-05,
      "loss": 2.8869,
      "step": 193760
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8792591094970703,
      "learning_rate": 3.6670777555390395e-05,
      "loss": 2.8018,
      "step": 193761
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.116079807281494,
      "learning_rate": 3.666881781962749e-05,
      "loss": 2.8335,
      "step": 193762
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7208900451660156,
      "learning_rate": 3.666685813282293e-05,
      "loss": 2.9783,
      "step": 193763
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.116570234298706,
      "learning_rate": 3.6664898494976865e-05,
      "loss": 2.8736,
      "step": 193764
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4841244220733643,
      "learning_rate": 3.6662938906089875e-05,
      "loss": 3.0748,
      "step": 193765
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.214592218399048,
      "learning_rate": 3.666097936616213e-05,
      "loss": 2.8176,
      "step": 193766
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.842773914337158,
      "learning_rate": 3.6659019875194205e-05,
      "loss": 3.0705,
      "step": 193767
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.643730878829956,
      "learning_rate": 3.6657060433186224e-05,
      "loss": 3.0196,
      "step": 193768
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.845618486404419,
      "learning_rate": 3.665510104013871e-05,
      "loss": 2.9348,
      "step": 193769
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.497560024261475,
      "learning_rate": 3.6653141696051935e-05,
      "loss": 2.9383,
      "step": 193770
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.6675379276275635,
      "learning_rate": 3.665118240092636e-05,
      "loss": 3.2165,
      "step": 193771
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.738213062286377,
      "learning_rate": 3.6649223154762255e-05,
      "loss": 2.8777,
      "step": 193772
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.080131769180298,
      "learning_rate": 3.664726395756008e-05,
      "loss": 2.8549,
      "step": 193773
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1406381130218506,
      "learning_rate": 3.664530480932018e-05,
      "loss": 2.8194,
      "step": 193774
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8226633071899414,
      "learning_rate": 3.664334571004287e-05,
      "loss": 2.8619,
      "step": 193775
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2053027153015137,
      "learning_rate": 3.664138665972847e-05,
      "loss": 2.9076,
      "step": 193776
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8203647136688232,
      "learning_rate": 3.663942765837746e-05,
      "loss": 2.758,
      "step": 193777
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9896326065063477,
      "learning_rate": 3.663746870599009e-05,
      "loss": 3.1566,
      "step": 193778
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.009204149246216,
      "learning_rate": 3.663550980256685e-05,
      "loss": 2.9396,
      "step": 193779
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6290197372436523,
      "learning_rate": 3.6633550948108045e-05,
      "loss": 2.955,
      "step": 193780
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.082448720932007,
      "learning_rate": 3.6631592142614044e-05,
      "loss": 3.0438,
      "step": 193781
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.483433246612549,
      "learning_rate": 3.6629633386085135e-05,
      "loss": 3.1248,
      "step": 193782
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9979941844940186,
      "learning_rate": 3.662767467852179e-05,
      "loss": 2.9081,
      "step": 193783
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.185002326965332,
      "learning_rate": 3.662571601992431e-05,
      "loss": 2.7211,
      "step": 193784
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.461705207824707,
      "learning_rate": 3.66237574102931e-05,
      "loss": 2.7274,
      "step": 193785
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5368919372558594,
      "learning_rate": 3.662179884962852e-05,
      "loss": 3.0177,
      "step": 193786
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3642241954803467,
      "learning_rate": 3.661984033793093e-05,
      "loss": 2.7876,
      "step": 193787
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.30130672454834,
      "learning_rate": 3.66178818752006e-05,
      "loss": 2.8742,
      "step": 193788
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.598573923110962,
      "learning_rate": 3.661592346143808e-05,
      "loss": 2.7869,
      "step": 193789
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.1611328125,
      "learning_rate": 3.661396509664355e-05,
      "loss": 3.0485,
      "step": 193790
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.158849000930786,
      "learning_rate": 3.661200678081748e-05,
      "loss": 2.9756,
      "step": 193791
    },
    {
      "epoch": 2.52,
      "grad_norm": 5.8126139640808105,
      "learning_rate": 3.661004851396024e-05,
      "loss": 3.0348,
      "step": 193792
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.331986427307129,
      "learning_rate": 3.6608090296072166e-05,
      "loss": 2.898,
      "step": 193793
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0033390522003174,
      "learning_rate": 3.660613212715359e-05,
      "loss": 3.0183,
      "step": 193794
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7452492713928223,
      "learning_rate": 3.66041740072049e-05,
      "loss": 2.9861,
      "step": 193795
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.475101470947266,
      "learning_rate": 3.660221593622644e-05,
      "loss": 3.0652,
      "step": 193796
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.547008514404297,
      "learning_rate": 3.660025791421868e-05,
      "loss": 2.706,
      "step": 193797
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.546933174133301,
      "learning_rate": 3.6598299941181875e-05,
      "loss": 2.7768,
      "step": 193798
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0358598232269287,
      "learning_rate": 3.659634201711643e-05,
      "loss": 2.8281,
      "step": 193799
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.9237191677093506,
      "learning_rate": 3.659438414202265e-05,
      "loss": 2.9201,
      "step": 193800
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7090349197387695,
      "learning_rate": 3.659242631590099e-05,
      "loss": 2.841,
      "step": 193801
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0255215167999268,
      "learning_rate": 3.6590468538751695e-05,
      "loss": 3.0488,
      "step": 193802
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6883320808410645,
      "learning_rate": 3.658851081057529e-05,
      "loss": 2.9686,
      "step": 193803
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.3470144271850586,
      "learning_rate": 3.658655313137204e-05,
      "loss": 2.7327,
      "step": 193804
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8763673305511475,
      "learning_rate": 3.658459550114232e-05,
      "loss": 3.329,
      "step": 193805
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.834566354751587,
      "learning_rate": 3.658263791988646e-05,
      "loss": 2.9939,
      "step": 193806
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7397985458374023,
      "learning_rate": 3.658068038760489e-05,
      "loss": 2.9366,
      "step": 193807
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7864456176757812,
      "learning_rate": 3.657872290429787e-05,
      "loss": 3.0844,
      "step": 193808
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8617327213287354,
      "learning_rate": 3.6576765469965954e-05,
      "loss": 2.9314,
      "step": 193809
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7258288860321045,
      "learning_rate": 3.657480808460935e-05,
      "loss": 3.1275,
      "step": 193810
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8101844787597656,
      "learning_rate": 3.657285074822841e-05,
      "loss": 3.093,
      "step": 193811
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6048202514648438,
      "learning_rate": 3.657089346082359e-05,
      "loss": 2.9275,
      "step": 193812
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7701714038848877,
      "learning_rate": 3.656893622239526e-05,
      "loss": 3.0052,
      "step": 193813
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5394861698150635,
      "learning_rate": 3.656697903294365e-05,
      "loss": 2.7603,
      "step": 193814
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.652786731719971,
      "learning_rate": 3.656502189246927e-05,
      "loss": 2.8368,
      "step": 193815
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.024313449859619,
      "learning_rate": 3.656306480097241e-05,
      "loss": 3.0701,
      "step": 193816
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.207001209259033,
      "learning_rate": 3.656110775845344e-05,
      "loss": 2.9168,
      "step": 193817
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.035060167312622,
      "learning_rate": 3.655915076491275e-05,
      "loss": 2.7799,
      "step": 193818
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2293732166290283,
      "learning_rate": 3.655719382035068e-05,
      "loss": 2.9517,
      "step": 193819
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9523372650146484,
      "learning_rate": 3.655523692476757e-05,
      "loss": 3.0364,
      "step": 193820
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.975799798965454,
      "learning_rate": 3.655328007816385e-05,
      "loss": 3.0965,
      "step": 193821
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.843676805496216,
      "learning_rate": 3.6551323280539846e-05,
      "loss": 3.0507,
      "step": 193822
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0436649322509766,
      "learning_rate": 3.65493665318959e-05,
      "loss": 2.705,
      "step": 193823
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.555375576019287,
      "learning_rate": 3.6547409832232435e-05,
      "loss": 2.7927,
      "step": 193824
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1815574169158936,
      "learning_rate": 3.6545453181549724e-05,
      "loss": 2.7693,
      "step": 193825
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0431439876556396,
      "learning_rate": 3.654349657984823e-05,
      "loss": 2.8544,
      "step": 193826
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3247454166412354,
      "learning_rate": 3.654154002712829e-05,
      "loss": 2.9265,
      "step": 193827
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.655545473098755,
      "learning_rate": 3.6539583523390235e-05,
      "loss": 2.9043,
      "step": 193828
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.596834421157837,
      "learning_rate": 3.65376270686344e-05,
      "loss": 2.9723,
      "step": 193829
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.704007863998413,
      "learning_rate": 3.6535670662861247e-05,
      "loss": 2.9562,
      "step": 193830
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.862276315689087,
      "learning_rate": 3.653371430607105e-05,
      "loss": 2.8158,
      "step": 193831
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9151742458343506,
      "learning_rate": 3.653175799826423e-05,
      "loss": 2.7721,
      "step": 193832
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.558617353439331,
      "learning_rate": 3.65298017394411e-05,
      "loss": 2.8627,
      "step": 193833
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8999147415161133,
      "learning_rate": 3.652784552960215e-05,
      "loss": 3.0022,
      "step": 193834
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.766798973083496,
      "learning_rate": 3.652588936874755e-05,
      "loss": 3.1604,
      "step": 193835
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6355202198028564,
      "learning_rate": 3.65239332568778e-05,
      "loss": 2.9021,
      "step": 193836
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.922814130783081,
      "learning_rate": 3.652197719399317e-05,
      "loss": 2.6187,
      "step": 193837
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2221741676330566,
      "learning_rate": 3.652002118009415e-05,
      "loss": 2.9475,
      "step": 193838
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.033610820770264,
      "learning_rate": 3.651806521518098e-05,
      "loss": 2.8623,
      "step": 193839
    },
    {
      "epoch": 2.52,
      "grad_norm": 5.010390281677246,
      "learning_rate": 3.651610929925409e-05,
      "loss": 2.8321,
      "step": 193840
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.872840166091919,
      "learning_rate": 3.651415343231389e-05,
      "loss": 2.8561,
      "step": 193841
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.479904651641846,
      "learning_rate": 3.6512197614360624e-05,
      "loss": 2.88,
      "step": 193842
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9079806804656982,
      "learning_rate": 3.6510241845394684e-05,
      "loss": 3.06,
      "step": 193843
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.486462116241455,
      "learning_rate": 3.650828612541652e-05,
      "loss": 2.6902,
      "step": 193844
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8188259601593018,
      "learning_rate": 3.650633045442637e-05,
      "loss": 2.9707,
      "step": 193845
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4642112255096436,
      "learning_rate": 3.650437483242473e-05,
      "loss": 3.0125,
      "step": 193846
    },
    {
      "epoch": 2.52,
      "grad_norm": 5.78749942779541,
      "learning_rate": 3.650241925941194e-05,
      "loss": 2.8644,
      "step": 193847
    },
    {
      "epoch": 2.52,
      "grad_norm": 6.2808451652526855,
      "learning_rate": 3.6500463735388265e-05,
      "loss": 2.7645,
      "step": 193848
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.72743821144104,
      "learning_rate": 3.64985082603541e-05,
      "loss": 2.8384,
      "step": 193849
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.037911891937256,
      "learning_rate": 3.649655283430991e-05,
      "loss": 2.8138,
      "step": 193850
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.988996982574463,
      "learning_rate": 3.64945974572559e-05,
      "loss": 2.8362,
      "step": 193851
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1452744007110596,
      "learning_rate": 3.6492642129192574e-05,
      "loss": 2.9517,
      "step": 193852
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.7021846771240234,
      "learning_rate": 3.6490686850120255e-05,
      "loss": 2.8405,
      "step": 193853
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.149598598480225,
      "learning_rate": 3.648873162003928e-05,
      "loss": 2.9136,
      "step": 193854
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.9959306716918945,
      "learning_rate": 3.648677643894998e-05,
      "loss": 2.9441,
      "step": 193855
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.279487609863281,
      "learning_rate": 3.6484821306852794e-05,
      "loss": 3.2153,
      "step": 193856
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6187126636505127,
      "learning_rate": 3.648286622374802e-05,
      "loss": 2.9077,
      "step": 193857
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7044029235839844,
      "learning_rate": 3.6480911189636116e-05,
      "loss": 2.8198,
      "step": 193858
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5540771484375,
      "learning_rate": 3.647895620451736e-05,
      "loss": 3.0033,
      "step": 193859
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.561816692352295,
      "learning_rate": 3.6477001268392146e-05,
      "loss": 3.1411,
      "step": 193860
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.2686314582824707,
      "learning_rate": 3.647504638126081e-05,
      "loss": 2.9687,
      "step": 193861
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7650656700134277,
      "learning_rate": 3.647309154312374e-05,
      "loss": 2.7946,
      "step": 193862
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.8468706607818604,
      "learning_rate": 3.647113675398129e-05,
      "loss": 3.003,
      "step": 193863
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.25878381729126,
      "learning_rate": 3.6469182013833875e-05,
      "loss": 3.1712,
      "step": 193864
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3530540466308594,
      "learning_rate": 3.6467227322681805e-05,
      "loss": 2.6958,
      "step": 193865
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.1362051963806152,
      "learning_rate": 3.6465272680525435e-05,
      "loss": 2.8957,
      "step": 193866
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4490721225738525,
      "learning_rate": 3.646331808736511e-05,
      "loss": 2.7772,
      "step": 193867
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.424801826477051,
      "learning_rate": 3.646136354320126e-05,
      "loss": 2.7056,
      "step": 193868
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.487180709838867,
      "learning_rate": 3.6459409048034185e-05,
      "loss": 2.907,
      "step": 193869
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.968087911605835,
      "learning_rate": 3.6457454601864345e-05,
      "loss": 2.9497,
      "step": 193870
    },
    {
      "epoch": 2.52,
      "grad_norm": 6.599073886871338,
      "learning_rate": 3.6455500204692014e-05,
      "loss": 2.8368,
      "step": 193871
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3828771114349365,
      "learning_rate": 3.645354585651762e-05,
      "loss": 2.8501,
      "step": 193872
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.515422821044922,
      "learning_rate": 3.6451591557341395e-05,
      "loss": 2.6335,
      "step": 193873
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7341015338897705,
      "learning_rate": 3.6449637307163846e-05,
      "loss": 3.0836,
      "step": 193874
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8904738426208496,
      "learning_rate": 3.6447683105985235e-05,
      "loss": 2.88,
      "step": 193875
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0974931716918945,
      "learning_rate": 3.6445728953806064e-05,
      "loss": 3.0655,
      "step": 193876
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.883265972137451,
      "learning_rate": 3.644377485062656e-05,
      "loss": 2.921,
      "step": 193877
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.322680950164795,
      "learning_rate": 3.6441820796447166e-05,
      "loss": 2.6418,
      "step": 193878
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.71714448928833,
      "learning_rate": 3.643986679126814e-05,
      "loss": 2.7699,
      "step": 193879
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.717411994934082,
      "learning_rate": 3.643791283508998e-05,
      "loss": 2.7266,
      "step": 193880
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.650529861450195,
      "learning_rate": 3.6435958927912925e-05,
      "loss": 2.8217,
      "step": 193881
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.8767480850219727,
      "learning_rate": 3.6434005069737474e-05,
      "loss": 2.9754,
      "step": 193882
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9956328868865967,
      "learning_rate": 3.643205126056389e-05,
      "loss": 2.9234,
      "step": 193883
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.896930694580078,
      "learning_rate": 3.6430097500392575e-05,
      "loss": 2.798,
      "step": 193884
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.222467422485352,
      "learning_rate": 3.642814378922383e-05,
      "loss": 2.8987,
      "step": 193885
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.979036808013916,
      "learning_rate": 3.642619012705812e-05,
      "loss": 2.9206,
      "step": 193886
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.315525531768799,
      "learning_rate": 3.6424236513895676e-05,
      "loss": 2.9722,
      "step": 193887
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.6263086795806885,
      "learning_rate": 3.6422282949737037e-05,
      "loss": 2.9527,
      "step": 193888
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0322535037994385,
      "learning_rate": 3.6420329434582465e-05,
      "loss": 2.8982,
      "step": 193889
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9631266593933105,
      "learning_rate": 3.641837596843232e-05,
      "loss": 2.9383,
      "step": 193890
    },
    {
      "epoch": 2.52,
      "grad_norm": 6.085072994232178,
      "learning_rate": 3.6416422551286914e-05,
      "loss": 2.9085,
      "step": 193891
    },
    {
      "epoch": 2.52,
      "grad_norm": 5.049210548400879,
      "learning_rate": 3.641446918314671e-05,
      "loss": 2.9509,
      "step": 193892
    },
    {
      "epoch": 2.52,
      "grad_norm": 6.232291221618652,
      "learning_rate": 3.6412515864012006e-05,
      "loss": 2.8742,
      "step": 193893
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.98402738571167,
      "learning_rate": 3.641056259388323e-05,
      "loss": 2.8479,
      "step": 193894
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.6392297744750977,
      "learning_rate": 3.640860937276072e-05,
      "loss": 2.8964,
      "step": 193895
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8875370025634766,
      "learning_rate": 3.640665620064475e-05,
      "loss": 3.0619,
      "step": 193896
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.7782301902770996,
      "learning_rate": 3.640470307753583e-05,
      "loss": 2.8506,
      "step": 193897
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.658635139465332,
      "learning_rate": 3.640275000343426e-05,
      "loss": 3.0354,
      "step": 193898
    },
    {
      "epoch": 2.52,
      "grad_norm": 5.7630839347839355,
      "learning_rate": 3.640079697834031e-05,
      "loss": 2.9066,
      "step": 193899
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.9431464672088623,
      "learning_rate": 3.6398844002254525e-05,
      "loss": 2.908,
      "step": 193900
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0519323348999023,
      "learning_rate": 3.6396891075177134e-05,
      "loss": 2.9763,
      "step": 193901
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.4392409324645996,
      "learning_rate": 3.639493819710848e-05,
      "loss": 2.9165,
      "step": 193902
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8028104305267334,
      "learning_rate": 3.639298536804906e-05,
      "loss": 2.8077,
      "step": 193903
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.800278902053833,
      "learning_rate": 3.639103258799916e-05,
      "loss": 2.81,
      "step": 193904
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.5947844982147217,
      "learning_rate": 3.638907985695906e-05,
      "loss": 3.0725,
      "step": 193905
    },
    {
      "epoch": 2.52,
      "grad_norm": 5.266000747680664,
      "learning_rate": 3.638712717492929e-05,
      "loss": 2.9626,
      "step": 193906
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0828282833099365,
      "learning_rate": 3.638517454191008e-05,
      "loss": 2.9449,
      "step": 193907
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.711566686630249,
      "learning_rate": 3.638322195790187e-05,
      "loss": 2.9057,
      "step": 193908
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0135600566864014,
      "learning_rate": 3.6381269422905016e-05,
      "loss": 3.4347,
      "step": 193909
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.05802845954895,
      "learning_rate": 3.637931693691979e-05,
      "loss": 3.0858,
      "step": 193910
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.98728609085083,
      "learning_rate": 3.637736449994669e-05,
      "loss": 2.8321,
      "step": 193911
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.0694773197174072,
      "learning_rate": 3.6375412111986026e-05,
      "loss": 2.7873,
      "step": 193912
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.182018518447876,
      "learning_rate": 3.637345977303805e-05,
      "loss": 3.0978,
      "step": 193913
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.8163890838623047,
      "learning_rate": 3.63715074831033e-05,
      "loss": 3.082,
      "step": 193914
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.744525909423828,
      "learning_rate": 3.636955524218208e-05,
      "loss": 2.9222,
      "step": 193915
    },
    {
      "epoch": 2.52,
      "grad_norm": 4.040774822235107,
      "learning_rate": 3.636760305027469e-05,
      "loss": 3.0161,
      "step": 193916
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.713486433029175,
      "learning_rate": 3.6365650907381585e-05,
      "loss": 2.7832,
      "step": 193917
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.664702892303467,
      "learning_rate": 3.6363698813503074e-05,
      "loss": 2.7452,
      "step": 193918
    },
    {
      "epoch": 2.52,
      "grad_norm": 2.9400532245635986,
      "learning_rate": 3.636174676863945e-05,
      "loss": 3.0635,
      "step": 193919
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3705103397369385,
      "learning_rate": 3.635979477279123e-05,
      "loss": 3.034,
      "step": 193920
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.238980531692505,
      "learning_rate": 3.6357842825958694e-05,
      "loss": 2.8707,
      "step": 193921
    },
    {
      "epoch": 2.52,
      "grad_norm": 3.3717474937438965,
      "learning_rate": 3.635589092814218e-05,
      "loss": 3.0874,
      "step": 193922
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.08601450920105,
      "learning_rate": 3.635393907934213e-05,
      "loss": 2.8477,
      "step": 193923
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.990278482437134,
      "learning_rate": 3.635198727955877e-05,
      "loss": 3.2672,
      "step": 193924
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6312167644500732,
      "learning_rate": 3.635003552879263e-05,
      "loss": 2.8844,
      "step": 193925
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.263631820678711,
      "learning_rate": 3.634808382704402e-05,
      "loss": 2.5748,
      "step": 193926
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.3896920680999756,
      "learning_rate": 3.6346132174313257e-05,
      "loss": 2.9793,
      "step": 193927
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8196027278900146,
      "learning_rate": 3.634418057060066e-05,
      "loss": 2.8783,
      "step": 193928
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6571767330169678,
      "learning_rate": 3.634222901590674e-05,
      "loss": 2.8815,
      "step": 193929
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.837146520614624,
      "learning_rate": 3.634027751023169e-05,
      "loss": 2.9532,
      "step": 193930
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0334739685058594,
      "learning_rate": 3.6338326053576015e-05,
      "loss": 2.7404,
      "step": 193931
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.5055041313171387,
      "learning_rate": 3.6336374645940036e-05,
      "loss": 2.9353,
      "step": 193932
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.4559061527252197,
      "learning_rate": 3.633442328732411e-05,
      "loss": 2.9058,
      "step": 193933
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.116990089416504,
      "learning_rate": 3.633247197772854e-05,
      "loss": 2.8982,
      "step": 193934
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6011173725128174,
      "learning_rate": 3.633052071715379e-05,
      "loss": 2.9429,
      "step": 193935
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.4742674827575684,
      "learning_rate": 3.632856950560009e-05,
      "loss": 2.721,
      "step": 193936
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.152805805206299,
      "learning_rate": 3.632661834306798e-05,
      "loss": 3.0847,
      "step": 193937
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9506566524505615,
      "learning_rate": 3.632466722955769e-05,
      "loss": 2.9346,
      "step": 193938
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.909952402114868,
      "learning_rate": 3.632271616506965e-05,
      "loss": 2.8721,
      "step": 193939
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.449709415435791,
      "learning_rate": 3.6320765149604135e-05,
      "loss": 2.8457,
      "step": 193940
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9009170532226562,
      "learning_rate": 3.6318814183161605e-05,
      "loss": 2.9188,
      "step": 193941
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.306107521057129,
      "learning_rate": 3.631686326574236e-05,
      "loss": 2.9268,
      "step": 193942
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.7215330600738525,
      "learning_rate": 3.631491239734683e-05,
      "loss": 2.8828,
      "step": 193943
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.655442953109741,
      "learning_rate": 3.631296157797532e-05,
      "loss": 2.6539,
      "step": 193944
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.456568479537964,
      "learning_rate": 3.631101080762823e-05,
      "loss": 2.7929,
      "step": 193945
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.789283275604248,
      "learning_rate": 3.630906008630582e-05,
      "loss": 2.9758,
      "step": 193946
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.500593423843384,
      "learning_rate": 3.6307109414008625e-05,
      "loss": 3.0081,
      "step": 193947
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1234869956970215,
      "learning_rate": 3.6305158790736825e-05,
      "loss": 2.9841,
      "step": 193948
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.193848133087158,
      "learning_rate": 3.630320821649093e-05,
      "loss": 2.9212,
      "step": 193949
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.7387664318084717,
      "learning_rate": 3.630125769127129e-05,
      "loss": 2.9155,
      "step": 193950
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.8396213054656982,
      "learning_rate": 3.62993072150782e-05,
      "loss": 2.936,
      "step": 193951
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.895498752593994,
      "learning_rate": 3.629735678791196e-05,
      "loss": 2.9351,
      "step": 193952
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6820693016052246,
      "learning_rate": 3.62954064097731e-05,
      "loss": 3.0559,
      "step": 193953
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5244863033294678,
      "learning_rate": 3.629345608066185e-05,
      "loss": 2.9977,
      "step": 193954
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3825573921203613,
      "learning_rate": 3.629150580057869e-05,
      "loss": 3.128,
      "step": 193955
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.460808515548706,
      "learning_rate": 3.6289555569523875e-05,
      "loss": 3.0048,
      "step": 193956
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.705660104751587,
      "learning_rate": 3.628760538749784e-05,
      "loss": 2.8036,
      "step": 193957
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0094120502471924,
      "learning_rate": 3.6285655254500855e-05,
      "loss": 3.0714,
      "step": 193958
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.029167652130127,
      "learning_rate": 3.6283705170533414e-05,
      "loss": 2.9654,
      "step": 193959
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0890936851501465,
      "learning_rate": 3.628175513559576e-05,
      "loss": 2.8181,
      "step": 193960
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.029566764831543,
      "learning_rate": 3.6279805149688345e-05,
      "loss": 3.0366,
      "step": 193961
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0070369243621826,
      "learning_rate": 3.627785521281148e-05,
      "loss": 2.7193,
      "step": 193962
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.211630344390869,
      "learning_rate": 3.627590532496556e-05,
      "loss": 2.7986,
      "step": 193963
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3619086742401123,
      "learning_rate": 3.627395548615085e-05,
      "loss": 2.8983,
      "step": 193964
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0752363204956055,
      "learning_rate": 3.627200569636786e-05,
      "loss": 3.0442,
      "step": 193965
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.117344856262207,
      "learning_rate": 3.627005595561684e-05,
      "loss": 2.8859,
      "step": 193966
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8729164600372314,
      "learning_rate": 3.626810626389823e-05,
      "loss": 2.7197,
      "step": 193967
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6873700618743896,
      "learning_rate": 3.626615662121237e-05,
      "loss": 2.7476,
      "step": 193968
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.787278175354004,
      "learning_rate": 3.626420702755962e-05,
      "loss": 2.9708,
      "step": 193969
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0007309913635254,
      "learning_rate": 3.626225748294025e-05,
      "loss": 2.8909,
      "step": 193970
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2073864936828613,
      "learning_rate": 3.626030798735475e-05,
      "loss": 2.9104,
      "step": 193971
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0410094261169434,
      "learning_rate": 3.62583585408034e-05,
      "loss": 2.7761,
      "step": 193972
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.721945285797119,
      "learning_rate": 3.6256409143286694e-05,
      "loss": 3.1426,
      "step": 193973
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.240447998046875,
      "learning_rate": 3.625445979480479e-05,
      "loss": 2.9883,
      "step": 193974
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.819929599761963,
      "learning_rate": 3.62525104953583e-05,
      "loss": 2.8311,
      "step": 193975
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.958144187927246,
      "learning_rate": 3.625056124494732e-05,
      "loss": 2.9821,
      "step": 193976
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.5215790271759033,
      "learning_rate": 3.624861204357241e-05,
      "loss": 2.7668,
      "step": 193977
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6387782096862793,
      "learning_rate": 3.624666289123378e-05,
      "loss": 3.1118,
      "step": 193978
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.477883815765381,
      "learning_rate": 3.624471378793195e-05,
      "loss": 2.8798,
      "step": 193979
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.019256830215454,
      "learning_rate": 3.624276473366714e-05,
      "loss": 3.0724,
      "step": 193980
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0379552841186523,
      "learning_rate": 3.6240815728439933e-05,
      "loss": 2.9404,
      "step": 193981
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.6552493572235107,
      "learning_rate": 3.623886677225036e-05,
      "loss": 2.7651,
      "step": 193982
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.7770373821258545,
      "learning_rate": 3.623691786509907e-05,
      "loss": 3.0031,
      "step": 193983
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.984372615814209,
      "learning_rate": 3.6234969006986216e-05,
      "loss": 2.9096,
      "step": 193984
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.053501605987549,
      "learning_rate": 3.623302019791233e-05,
      "loss": 3.0891,
      "step": 193985
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.9304141998291016,
      "learning_rate": 3.623107143787766e-05,
      "loss": 2.8886,
      "step": 193986
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.9311225414276123,
      "learning_rate": 3.622912272688269e-05,
      "loss": 2.9511,
      "step": 193987
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.9539432525634766,
      "learning_rate": 3.622717406492766e-05,
      "loss": 2.9093,
      "step": 193988
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.373908758163452,
      "learning_rate": 3.6225225452012995e-05,
      "loss": 2.7719,
      "step": 193989
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6936190128326416,
      "learning_rate": 3.6223276888139006e-05,
      "loss": 2.9293,
      "step": 193990
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.830981731414795,
      "learning_rate": 3.6221328373306124e-05,
      "loss": 3.0546,
      "step": 193991
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.563931465148926,
      "learning_rate": 3.621937990751461e-05,
      "loss": 2.8292,
      "step": 193992
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.197177410125732,
      "learning_rate": 3.621743149076497e-05,
      "loss": 2.8353,
      "step": 193993
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.966813325881958,
      "learning_rate": 3.621548312305747e-05,
      "loss": 2.8702,
      "step": 193994
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1092159748077393,
      "learning_rate": 3.621353480439243e-05,
      "loss": 2.7882,
      "step": 193995
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8920066356658936,
      "learning_rate": 3.621158653477037e-05,
      "loss": 3.1013,
      "step": 193996
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0645806789398193,
      "learning_rate": 3.62096383141915e-05,
      "loss": 3.0316,
      "step": 193997
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.749441146850586,
      "learning_rate": 3.6207690142656206e-05,
      "loss": 3.0073,
      "step": 193998
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.672515392303467,
      "learning_rate": 3.6205742020164954e-05,
      "loss": 2.8644,
      "step": 193999
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0059401988983154,
      "learning_rate": 3.620379394671803e-05,
      "loss": 3.1256,
      "step": 194000
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8042616844177246,
      "learning_rate": 3.6201845922315744e-05,
      "loss": 2.7083,
      "step": 194001
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.082942485809326,
      "learning_rate": 3.619989794695855e-05,
      "loss": 2.9776,
      "step": 194002
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.960322618484497,
      "learning_rate": 3.6197950020646805e-05,
      "loss": 2.9863,
      "step": 194003
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9783613681793213,
      "learning_rate": 3.619600214338075e-05,
      "loss": 2.9312,
      "step": 194004
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.136504650115967,
      "learning_rate": 3.61940543151609e-05,
      "loss": 2.9206,
      "step": 194005
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2553138732910156,
      "learning_rate": 3.619210653598759e-05,
      "loss": 2.9252,
      "step": 194006
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.5980467796325684,
      "learning_rate": 3.619015880586107e-05,
      "loss": 2.9527,
      "step": 194007
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6380937099456787,
      "learning_rate": 3.618821112478182e-05,
      "loss": 3.1174,
      "step": 194008
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.951918601989746,
      "learning_rate": 3.61862634927501e-05,
      "loss": 2.7477,
      "step": 194009
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.370309829711914,
      "learning_rate": 3.6184315909766416e-05,
      "loss": 2.964,
      "step": 194010
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2198057174682617,
      "learning_rate": 3.618236837583106e-05,
      "loss": 3.0629,
      "step": 194011
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.557492733001709,
      "learning_rate": 3.618042089094434e-05,
      "loss": 2.8776,
      "step": 194012
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.463283061981201,
      "learning_rate": 3.6178473455106614e-05,
      "loss": 3.0041,
      "step": 194013
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6835315227508545,
      "learning_rate": 3.617652606831839e-05,
      "loss": 3.0124,
      "step": 194014
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.51383113861084,
      "learning_rate": 3.6174578730579826e-05,
      "loss": 2.9439,
      "step": 194015
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6417291164398193,
      "learning_rate": 3.617263144189146e-05,
      "loss": 2.8736,
      "step": 194016
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0130045413970947,
      "learning_rate": 3.6170684202253564e-05,
      "loss": 2.7538,
      "step": 194017
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3433101177215576,
      "learning_rate": 3.616873701166656e-05,
      "loss": 2.9056,
      "step": 194018
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.681334972381592,
      "learning_rate": 3.6166789870130686e-05,
      "loss": 3.0012,
      "step": 194019
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3919854164123535,
      "learning_rate": 3.616484277764644e-05,
      "loss": 2.9055,
      "step": 194020
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.879765510559082,
      "learning_rate": 3.616289573421406e-05,
      "loss": 2.7204,
      "step": 194021
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7072248458862305,
      "learning_rate": 3.616094873983404e-05,
      "loss": 2.6232,
      "step": 194022
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8143532276153564,
      "learning_rate": 3.615900179450671e-05,
      "loss": 3.0463,
      "step": 194023
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.136380434036255,
      "learning_rate": 3.615705489823238e-05,
      "loss": 3.0967,
      "step": 194024
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.538620710372925,
      "learning_rate": 3.6155108051011384e-05,
      "loss": 3.0425,
      "step": 194025
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.368237257003784,
      "learning_rate": 3.615316125284418e-05,
      "loss": 3.0646,
      "step": 194026
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.112861156463623,
      "learning_rate": 3.615121450373103e-05,
      "loss": 2.9433,
      "step": 194027
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9861302375793457,
      "learning_rate": 3.614926780367241e-05,
      "loss": 2.8828,
      "step": 194028
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.515977621078491,
      "learning_rate": 3.614732115266862e-05,
      "loss": 3.0558,
      "step": 194029
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.69844651222229,
      "learning_rate": 3.6145374550720016e-05,
      "loss": 3.0094,
      "step": 194030
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6354949474334717,
      "learning_rate": 3.6143427997826913e-05,
      "loss": 2.8588,
      "step": 194031
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.879718780517578,
      "learning_rate": 3.61414814939898e-05,
      "loss": 3.0098,
      "step": 194032
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.508748769760132,
      "learning_rate": 3.613953503920891e-05,
      "loss": 2.7802,
      "step": 194033
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.3772056102752686,
      "learning_rate": 3.613758863348472e-05,
      "loss": 2.8408,
      "step": 194034
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.794323205947876,
      "learning_rate": 3.613564227681751e-05,
      "loss": 2.8164,
      "step": 194035
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8676748275756836,
      "learning_rate": 3.613369596920767e-05,
      "loss": 2.9036,
      "step": 194036
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8042311668395996,
      "learning_rate": 3.613174971065551e-05,
      "loss": 2.8583,
      "step": 194037
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.5890138149261475,
      "learning_rate": 3.612980350116148e-05,
      "loss": 3.0654,
      "step": 194038
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7748241424560547,
      "learning_rate": 3.612785734072587e-05,
      "loss": 3.0212,
      "step": 194039
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.226135730743408,
      "learning_rate": 3.6125911229349094e-05,
      "loss": 3.1946,
      "step": 194040
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.504521369934082,
      "learning_rate": 3.612396516703147e-05,
      "loss": 2.7996,
      "step": 194041
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7994544506073,
      "learning_rate": 3.6122019153773495e-05,
      "loss": 2.9479,
      "step": 194042
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.140918254852295,
      "learning_rate": 3.6120073189575284e-05,
      "loss": 2.9556,
      "step": 194043
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3365869522094727,
      "learning_rate": 3.611812727443739e-05,
      "loss": 2.883,
      "step": 194044
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2982983589172363,
      "learning_rate": 3.611618140836006e-05,
      "loss": 2.991,
      "step": 194045
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.978789806365967,
      "learning_rate": 3.6114235591343777e-05,
      "loss": 2.8298,
      "step": 194046
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.6447417736053467,
      "learning_rate": 3.611228982338879e-05,
      "loss": 2.846,
      "step": 194047
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.08870267868042,
      "learning_rate": 3.6110344104495615e-05,
      "loss": 3.1797,
      "step": 194048
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.354572296142578,
      "learning_rate": 3.61083984346644e-05,
      "loss": 2.8508,
      "step": 194049
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.711406707763672,
      "learning_rate": 3.6106452813895635e-05,
      "loss": 3.0261,
      "step": 194050
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9954354763031006,
      "learning_rate": 3.610450724218966e-05,
      "loss": 3.1269,
      "step": 194051
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9494094848632812,
      "learning_rate": 3.6102561719546874e-05,
      "loss": 2.8126,
      "step": 194052
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7409467697143555,
      "learning_rate": 3.610061624596754e-05,
      "loss": 3.1486,
      "step": 194053
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7127251625061035,
      "learning_rate": 3.609867082145219e-05,
      "loss": 3.0714,
      "step": 194054
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.943270683288574,
      "learning_rate": 3.609672544600096e-05,
      "loss": 3.1616,
      "step": 194055
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2602148056030273,
      "learning_rate": 3.609478011961442e-05,
      "loss": 3.0636,
      "step": 194056
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8150904178619385,
      "learning_rate": 3.609283484229276e-05,
      "loss": 2.6038,
      "step": 194057
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7709171772003174,
      "learning_rate": 3.609088961403649e-05,
      "loss": 2.9644,
      "step": 194058
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8749477863311768,
      "learning_rate": 3.608894443484584e-05,
      "loss": 2.982,
      "step": 194059
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.0526957511901855,
      "learning_rate": 3.608699930472136e-05,
      "loss": 3.1074,
      "step": 194060
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8532721996307373,
      "learning_rate": 3.608505422366318e-05,
      "loss": 2.8109,
      "step": 194061
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.4865024089813232,
      "learning_rate": 3.608310919167181e-05,
      "loss": 2.7639,
      "step": 194062
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2717678546905518,
      "learning_rate": 3.608116420874753e-05,
      "loss": 2.7864,
      "step": 194063
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.74434494972229,
      "learning_rate": 3.607921927489079e-05,
      "loss": 3.1869,
      "step": 194064
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6721482276916504,
      "learning_rate": 3.607727439010184e-05,
      "loss": 2.8849,
      "step": 194065
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9594600200653076,
      "learning_rate": 3.607532955438124e-05,
      "loss": 3.0988,
      "step": 194066
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.464777946472168,
      "learning_rate": 3.607338476772908e-05,
      "loss": 2.9302,
      "step": 194067
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.533752202987671,
      "learning_rate": 3.607144003014595e-05,
      "loss": 3.0839,
      "step": 194068
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.6878855228424072,
      "learning_rate": 3.606949534163206e-05,
      "loss": 3.1658,
      "step": 194069
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8566105365753174,
      "learning_rate": 3.6067550702187884e-05,
      "loss": 3.1135,
      "step": 194070
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.4145395755767822,
      "learning_rate": 3.606560611181366e-05,
      "loss": 2.8025,
      "step": 194071
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.470125913619995,
      "learning_rate": 3.6063661570509885e-05,
      "loss": 3.0314,
      "step": 194072
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.598393440246582,
      "learning_rate": 3.606171707827685e-05,
      "loss": 2.9755,
      "step": 194073
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8393805027008057,
      "learning_rate": 3.605977263511494e-05,
      "loss": 2.7215,
      "step": 194074
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8409533500671387,
      "learning_rate": 3.6057828241024434e-05,
      "loss": 2.8034,
      "step": 194075
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.419893264770508,
      "learning_rate": 3.6055883896005846e-05,
      "loss": 2.8268,
      "step": 194076
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.887917995452881,
      "learning_rate": 3.605393960005933e-05,
      "loss": 3.1098,
      "step": 194077
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.402693033218384,
      "learning_rate": 3.6051995353185505e-05,
      "loss": 3.1145,
      "step": 194078
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.3491880893707275,
      "learning_rate": 3.605005115538455e-05,
      "loss": 2.7973,
      "step": 194079
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6403284072875977,
      "learning_rate": 3.6048107006656876e-05,
      "loss": 3.0822,
      "step": 194080
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2890138626098633,
      "learning_rate": 3.604616290700278e-05,
      "loss": 2.8505,
      "step": 194081
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.903630495071411,
      "learning_rate": 3.604421885642277e-05,
      "loss": 2.7918,
      "step": 194082
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.72442889213562,
      "learning_rate": 3.604227485491703e-05,
      "loss": 3.021,
      "step": 194083
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3714916706085205,
      "learning_rate": 3.604033090248607e-05,
      "loss": 3.0689,
      "step": 194084
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2560527324676514,
      "learning_rate": 3.603838699913022e-05,
      "loss": 2.8919,
      "step": 194085
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7559733390808105,
      "learning_rate": 3.6036443144849746e-05,
      "loss": 2.8669,
      "step": 194086
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.145963430404663,
      "learning_rate": 3.6034499339645154e-05,
      "loss": 3.1844,
      "step": 194087
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.635357618331909,
      "learning_rate": 3.603255558351673e-05,
      "loss": 2.8229,
      "step": 194088
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.551565170288086,
      "learning_rate": 3.603061187646476e-05,
      "loss": 2.7857,
      "step": 194089
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.275035858154297,
      "learning_rate": 3.602866821848975e-05,
      "loss": 2.583,
      "step": 194090
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.012681007385254,
      "learning_rate": 3.6026724609592e-05,
      "loss": 3.0379,
      "step": 194091
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1014528274536133,
      "learning_rate": 3.602478104977178e-05,
      "loss": 3.0297,
      "step": 194092
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.660987377166748,
      "learning_rate": 3.60228375390296e-05,
      "loss": 2.8915,
      "step": 194093
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.867462158203125,
      "learning_rate": 3.6020894077365736e-05,
      "loss": 2.8499,
      "step": 194094
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5620386600494385,
      "learning_rate": 3.6018950664780614e-05,
      "loss": 2.8023,
      "step": 194095
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.600151777267456,
      "learning_rate": 3.6017007301274525e-05,
      "loss": 3.0353,
      "step": 194096
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.753995180130005,
      "learning_rate": 3.6015063986847884e-05,
      "loss": 2.9437,
      "step": 194097
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.004671812057495,
      "learning_rate": 3.601312072150094e-05,
      "loss": 2.8281,
      "step": 194098
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.806256055831909,
      "learning_rate": 3.601117750523421e-05,
      "loss": 3.0755,
      "step": 194099
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7389729022979736,
      "learning_rate": 3.6009234338047956e-05,
      "loss": 2.9502,
      "step": 194100
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3135101795196533,
      "learning_rate": 3.60072912199426e-05,
      "loss": 3.303,
      "step": 194101
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9900529384613037,
      "learning_rate": 3.6005348150918486e-05,
      "loss": 3.166,
      "step": 194102
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.356356143951416,
      "learning_rate": 3.600340513097595e-05,
      "loss": 2.8634,
      "step": 194103
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8271632194519043,
      "learning_rate": 3.6001462160115316e-05,
      "loss": 2.7779,
      "step": 194104
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.249181032180786,
      "learning_rate": 3.599951923833702e-05,
      "loss": 2.6713,
      "step": 194105
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.920828342437744,
      "learning_rate": 3.599757636564136e-05,
      "loss": 3.1457,
      "step": 194106
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.078176498413086,
      "learning_rate": 3.59956335420288e-05,
      "loss": 3.0077,
      "step": 194107
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9117431640625,
      "learning_rate": 3.599369076749958e-05,
      "loss": 2.7166,
      "step": 194108
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.6156911849975586,
      "learning_rate": 3.599174804205424e-05,
      "loss": 2.9966,
      "step": 194109
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.879575490951538,
      "learning_rate": 3.598980536569286e-05,
      "loss": 2.8258,
      "step": 194110
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8722074031829834,
      "learning_rate": 3.598786273841605e-05,
      "loss": 2.7938,
      "step": 194111
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.311324119567871,
      "learning_rate": 3.598592016022401e-05,
      "loss": 2.9257,
      "step": 194112
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.080411672592163,
      "learning_rate": 3.5983977631117246e-05,
      "loss": 2.9535,
      "step": 194113
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.528411388397217,
      "learning_rate": 3.5982035151095944e-05,
      "loss": 2.9279,
      "step": 194114
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.686744213104248,
      "learning_rate": 3.598009272016075e-05,
      "loss": 2.9802,
      "step": 194115
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.2427520751953125,
      "learning_rate": 3.597815033831165e-05,
      "loss": 2.9313,
      "step": 194116
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7682580947875977,
      "learning_rate": 3.597620800554929e-05,
      "loss": 2.9769,
      "step": 194117
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.177891731262207,
      "learning_rate": 3.59742657218739e-05,
      "loss": 3.126,
      "step": 194118
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.4253411293029785,
      "learning_rate": 3.59723234872859e-05,
      "loss": 3.0766,
      "step": 194119
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.225890874862671,
      "learning_rate": 3.597038130178558e-05,
      "loss": 2.8725,
      "step": 194120
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5640110969543457,
      "learning_rate": 3.5968439165373486e-05,
      "loss": 2.7929,
      "step": 194121
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.717073440551758,
      "learning_rate": 3.5966497078049696e-05,
      "loss": 3.1679,
      "step": 194122
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.010291814804077,
      "learning_rate": 3.59645550398148e-05,
      "loss": 3.03,
      "step": 194123
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3740384578704834,
      "learning_rate": 3.596261305066901e-05,
      "loss": 3.1319,
      "step": 194124
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.29276180267334,
      "learning_rate": 3.5960671110612816e-05,
      "loss": 2.9603,
      "step": 194125
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.953643321990967,
      "learning_rate": 3.5958729219646456e-05,
      "loss": 2.8702,
      "step": 194126
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.026975154876709,
      "learning_rate": 3.595678737777049e-05,
      "loss": 3.087,
      "step": 194127
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.976375102996826,
      "learning_rate": 3.5954845584984995e-05,
      "loss": 2.6556,
      "step": 194128
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9481160640716553,
      "learning_rate": 3.5952903841290526e-05,
      "loss": 3.0899,
      "step": 194129
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9962127208709717,
      "learning_rate": 3.5950962146687354e-05,
      "loss": 3.1187,
      "step": 194130
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.986506700515747,
      "learning_rate": 3.594902050117595e-05,
      "loss": 2.9187,
      "step": 194131
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.603719711303711,
      "learning_rate": 3.594707890475654e-05,
      "loss": 3.0663,
      "step": 194132
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1417508125305176,
      "learning_rate": 3.59451373574297e-05,
      "loss": 3.0986,
      "step": 194133
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6481893062591553,
      "learning_rate": 3.5943195859195485e-05,
      "loss": 3.142,
      "step": 194134
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.658139705657959,
      "learning_rate": 3.59412544100545e-05,
      "loss": 2.8344,
      "step": 194135
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8556430339813232,
      "learning_rate": 3.593931301000694e-05,
      "loss": 2.9609,
      "step": 194136
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.564394950866699,
      "learning_rate": 3.593737165905332e-05,
      "loss": 3.1013,
      "step": 194137
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.5706498622894287,
      "learning_rate": 3.593543035719385e-05,
      "loss": 3.0069,
      "step": 194138
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.78291916847229,
      "learning_rate": 3.5933489104429114e-05,
      "loss": 2.913,
      "step": 194139
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.858604669570923,
      "learning_rate": 3.593154790075917e-05,
      "loss": 2.9641,
      "step": 194140
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9350764751434326,
      "learning_rate": 3.592960674618463e-05,
      "loss": 3.0096,
      "step": 194141
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.980036497116089,
      "learning_rate": 3.5927665640705705e-05,
      "loss": 2.9008,
      "step": 194142
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2499029636383057,
      "learning_rate": 3.5925724584322846e-05,
      "loss": 3.105,
      "step": 194143
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.049001693725586,
      "learning_rate": 3.592378357703635e-05,
      "loss": 2.8048,
      "step": 194144
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.296429395675659,
      "learning_rate": 3.592184261884671e-05,
      "loss": 3.0005,
      "step": 194145
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.4473161697387695,
      "learning_rate": 3.591990170975403e-05,
      "loss": 2.9349,
      "step": 194146
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.153160572052002,
      "learning_rate": 3.5917960849758944e-05,
      "loss": 2.9442,
      "step": 194147
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.690241813659668,
      "learning_rate": 3.591602003886158e-05,
      "loss": 2.8272,
      "step": 194148
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.6178781986236572,
      "learning_rate": 3.591407927706251e-05,
      "loss": 2.9534,
      "step": 194149
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.716865301132202,
      "learning_rate": 3.5912138564361934e-05,
      "loss": 2.8042,
      "step": 194150
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5832533836364746,
      "learning_rate": 3.591019790076038e-05,
      "loss": 2.7111,
      "step": 194151
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.51397442817688,
      "learning_rate": 3.590825728625799e-05,
      "loss": 2.7016,
      "step": 194152
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7789037227630615,
      "learning_rate": 3.5906316720855324e-05,
      "loss": 2.824,
      "step": 194153
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.862462043762207,
      "learning_rate": 3.590437620455258e-05,
      "loss": 3.0972,
      "step": 194154
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.905860662460327,
      "learning_rate": 3.590243573735026e-05,
      "loss": 3.0217,
      "step": 194155
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.4036800861358643,
      "learning_rate": 3.590049531924859e-05,
      "loss": 2.7732,
      "step": 194156
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.950381278991699,
      "learning_rate": 3.589855495024808e-05,
      "loss": 3.0155,
      "step": 194157
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.647948741912842,
      "learning_rate": 3.5896614630349e-05,
      "loss": 2.7825,
      "step": 194158
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2177884578704834,
      "learning_rate": 3.589467435955173e-05,
      "loss": 3.0642,
      "step": 194159
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.4045047760009766,
      "learning_rate": 3.589273413785656e-05,
      "loss": 2.7479,
      "step": 194160
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.776102066040039,
      "learning_rate": 3.589079396526401e-05,
      "loss": 3.1157,
      "step": 194161
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.759920358657837,
      "learning_rate": 3.588885384177425e-05,
      "loss": 2.8462,
      "step": 194162
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.653648853302002,
      "learning_rate": 3.588691376738781e-05,
      "loss": 2.9487,
      "step": 194163
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.5284907817840576,
      "learning_rate": 3.5884973742104985e-05,
      "loss": 2.8643,
      "step": 194164
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.995063304901123,
      "learning_rate": 3.588303376592612e-05,
      "loss": 2.9807,
      "step": 194165
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.310792922973633,
      "learning_rate": 3.58810938388515e-05,
      "loss": 2.489,
      "step": 194166
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.649301528930664,
      "learning_rate": 3.587915396088168e-05,
      "loss": 2.9992,
      "step": 194167
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.537518501281738,
      "learning_rate": 3.587721413201681e-05,
      "loss": 2.8321,
      "step": 194168
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.536259651184082,
      "learning_rate": 3.587527435225742e-05,
      "loss": 2.8471,
      "step": 194169
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0066144466400146,
      "learning_rate": 3.587333462160382e-05,
      "loss": 2.9943,
      "step": 194170
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.626243829727173,
      "learning_rate": 3.5871394940056274e-05,
      "loss": 2.903,
      "step": 194171
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.6780402660369873,
      "learning_rate": 3.586945530761531e-05,
      "loss": 3.1362,
      "step": 194172
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.793722152709961,
      "learning_rate": 3.586751572428117e-05,
      "loss": 3.089,
      "step": 194173
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.6145761013031006,
      "learning_rate": 3.5865576190054216e-05,
      "loss": 2.8635,
      "step": 194174
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.3663880825042725,
      "learning_rate": 3.5863636704934875e-05,
      "loss": 2.8731,
      "step": 194175
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.561141014099121,
      "learning_rate": 3.586169726892345e-05,
      "loss": 2.9572,
      "step": 194176
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7455599308013916,
      "learning_rate": 3.585975788202028e-05,
      "loss": 2.6868,
      "step": 194177
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.466446876525879,
      "learning_rate": 3.5857818544225855e-05,
      "loss": 3.0813,
      "step": 194178
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.705413341522217,
      "learning_rate": 3.585587925554035e-05,
      "loss": 2.789,
      "step": 194179
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.740163803100586,
      "learning_rate": 3.58539400159643e-05,
      "loss": 2.9934,
      "step": 194180
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.366513729095459,
      "learning_rate": 3.5852000825498e-05,
      "loss": 3.1107,
      "step": 194181
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.93436336517334,
      "learning_rate": 3.585006168414177e-05,
      "loss": 3.0879,
      "step": 194182
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1721529960632324,
      "learning_rate": 3.584812259189593e-05,
      "loss": 2.8425,
      "step": 194183
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.97625994682312,
      "learning_rate": 3.584618354876101e-05,
      "loss": 2.842,
      "step": 194184
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0195584297180176,
      "learning_rate": 3.5844244554737204e-05,
      "loss": 3.0833,
      "step": 194185
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.18192720413208,
      "learning_rate": 3.584230560982497e-05,
      "loss": 3.1669,
      "step": 194186
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.355664014816284,
      "learning_rate": 3.584036671402466e-05,
      "loss": 2.7835,
      "step": 194187
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0891811847686768,
      "learning_rate": 3.5838427867336625e-05,
      "loss": 3.081,
      "step": 194188
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.7660951614379883,
      "learning_rate": 3.5836489069761144e-05,
      "loss": 3.1921,
      "step": 194189
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.909498929977417,
      "learning_rate": 3.583455032129867e-05,
      "loss": 2.8132,
      "step": 194190
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.817000150680542,
      "learning_rate": 3.583261162194952e-05,
      "loss": 2.8253,
      "step": 194191
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7880797386169434,
      "learning_rate": 3.5830672971714136e-05,
      "loss": 2.7382,
      "step": 194192
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2326502799987793,
      "learning_rate": 3.5828734370592735e-05,
      "loss": 3.1137,
      "step": 194193
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.767456531524658,
      "learning_rate": 3.582679581858589e-05,
      "loss": 3.1062,
      "step": 194194
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0953211784362793,
      "learning_rate": 3.582485731569371e-05,
      "loss": 2.9635,
      "step": 194195
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.063946485519409,
      "learning_rate": 3.5822918861916713e-05,
      "loss": 2.7131,
      "step": 194196
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.6168880462646484,
      "learning_rate": 3.58209804572552e-05,
      "loss": 2.8069,
      "step": 194197
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.5299501419067383,
      "learning_rate": 3.5819042101709594e-05,
      "loss": 2.8168,
      "step": 194198
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.006134033203125,
      "learning_rate": 3.5817103795280164e-05,
      "loss": 2.6794,
      "step": 194199
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.441770553588867,
      "learning_rate": 3.5815165537967416e-05,
      "loss": 2.8108,
      "step": 194200
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.188438415527344,
      "learning_rate": 3.581322732977154e-05,
      "loss": 2.8176,
      "step": 194201
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.050530433654785,
      "learning_rate": 3.581128917069298e-05,
      "loss": 2.7652,
      "step": 194202
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.144543409347534,
      "learning_rate": 3.580935106073206e-05,
      "loss": 2.9995,
      "step": 194203
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.438967704772949,
      "learning_rate": 3.5807412999889216e-05,
      "loss": 3.01,
      "step": 194204
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.8609161376953125,
      "learning_rate": 3.580547498816472e-05,
      "loss": 2.9385,
      "step": 194205
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.41446852684021,
      "learning_rate": 3.5803537025559095e-05,
      "loss": 2.9829,
      "step": 194206
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.896036386489868,
      "learning_rate": 3.5801599112072444e-05,
      "loss": 2.9709,
      "step": 194207
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8770692348480225,
      "learning_rate": 3.57996612477053e-05,
      "loss": 3.0612,
      "step": 194208
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.024768352508545,
      "learning_rate": 3.579772343245797e-05,
      "loss": 2.9074,
      "step": 194209
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.9509096145629883,
      "learning_rate": 3.579578566633088e-05,
      "loss": 2.9599,
      "step": 194210
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.10428524017334,
      "learning_rate": 3.579384794932426e-05,
      "loss": 2.9895,
      "step": 194211
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7596616744995117,
      "learning_rate": 3.579191028143868e-05,
      "loss": 2.8671,
      "step": 194212
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.657853603363037,
      "learning_rate": 3.5789972662674246e-05,
      "loss": 2.764,
      "step": 194213
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.889946222305298,
      "learning_rate": 3.578803509303152e-05,
      "loss": 2.7748,
      "step": 194214
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.142536163330078,
      "learning_rate": 3.578609757251073e-05,
      "loss": 2.9949,
      "step": 194215
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.594348430633545,
      "learning_rate": 3.5784160101112314e-05,
      "loss": 2.9976,
      "step": 194216
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.029996395111084,
      "learning_rate": 3.57822226788366e-05,
      "loss": 3.0667,
      "step": 194217
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.428300142288208,
      "learning_rate": 3.578028530568403e-05,
      "loss": 2.8865,
      "step": 194218
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3762450218200684,
      "learning_rate": 3.57783479816548e-05,
      "loss": 2.8309,
      "step": 194219
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6681337356567383,
      "learning_rate": 3.577641070674944e-05,
      "loss": 2.9924,
      "step": 194220
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.381702184677124,
      "learning_rate": 3.5774473480968144e-05,
      "loss": 2.9016,
      "step": 194221
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9060094356536865,
      "learning_rate": 3.577253630431143e-05,
      "loss": 2.9909,
      "step": 194222
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.665039300918579,
      "learning_rate": 3.577059917677951e-05,
      "loss": 2.966,
      "step": 194223
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.301823139190674,
      "learning_rate": 3.576866209837297e-05,
      "loss": 2.997,
      "step": 194224
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.663036823272705,
      "learning_rate": 3.5766725069091894e-05,
      "loss": 2.6939,
      "step": 194225
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.07993483543396,
      "learning_rate": 3.576478808893683e-05,
      "loss": 2.9874,
      "step": 194226
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5506038665771484,
      "learning_rate": 3.5762851157908024e-05,
      "loss": 2.9241,
      "step": 194227
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8914873600006104,
      "learning_rate": 3.576091427600596e-05,
      "loss": 2.9707,
      "step": 194228
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.809864044189453,
      "learning_rate": 3.5758977443230865e-05,
      "loss": 2.8163,
      "step": 194229
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0627057552337646,
      "learning_rate": 3.575704065958327e-05,
      "loss": 2.8873,
      "step": 194230
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6734848022460938,
      "learning_rate": 3.575510392506331e-05,
      "loss": 2.6298,
      "step": 194231
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2386255264282227,
      "learning_rate": 3.575316723967152e-05,
      "loss": 2.9329,
      "step": 194232
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.485980749130249,
      "learning_rate": 3.5751230603408165e-05,
      "loss": 2.9743,
      "step": 194233
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8971991539001465,
      "learning_rate": 3.574929401627371e-05,
      "loss": 2.8734,
      "step": 194234
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.970221757888794,
      "learning_rate": 3.5747357478268356e-05,
      "loss": 2.8425,
      "step": 194235
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.6078076362609863,
      "learning_rate": 3.5745420989392704e-05,
      "loss": 2.4799,
      "step": 194236
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0360028743743896,
      "learning_rate": 3.574348454964685e-05,
      "loss": 2.9498,
      "step": 194237
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5493414402008057,
      "learning_rate": 3.57415481590313e-05,
      "loss": 2.7327,
      "step": 194238
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8202390670776367,
      "learning_rate": 3.5739611817546344e-05,
      "loss": 2.7378,
      "step": 194239
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7265000343322754,
      "learning_rate": 3.573767552519245e-05,
      "loss": 2.954,
      "step": 194240
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8378021717071533,
      "learning_rate": 3.573573928196983e-05,
      "loss": 2.9938,
      "step": 194241
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7109978199005127,
      "learning_rate": 3.573380308787901e-05,
      "loss": 2.8491,
      "step": 194242
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9798667430877686,
      "learning_rate": 3.573186694292025e-05,
      "loss": 3.0687,
      "step": 194243
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2099738121032715,
      "learning_rate": 3.572993084709393e-05,
      "loss": 2.949,
      "step": 194244
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.0678606033325195,
      "learning_rate": 3.5727994800400325e-05,
      "loss": 2.8931,
      "step": 194245
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.505605697631836,
      "learning_rate": 3.572605880283993e-05,
      "loss": 2.9062,
      "step": 194246
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.57297682762146,
      "learning_rate": 3.572412285441303e-05,
      "loss": 3.1679,
      "step": 194247
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8895509243011475,
      "learning_rate": 3.5722186955120026e-05,
      "loss": 2.5777,
      "step": 194248
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.7020792961120605,
      "learning_rate": 3.5720251104961284e-05,
      "loss": 2.8805,
      "step": 194249
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0877249240875244,
      "learning_rate": 3.5718315303937106e-05,
      "loss": 2.7942,
      "step": 194250
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.072995185852051,
      "learning_rate": 3.571637955204786e-05,
      "loss": 2.9213,
      "step": 194251
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.693197250366211,
      "learning_rate": 3.571444384929397e-05,
      "loss": 3.0558,
      "step": 194252
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.124837875366211,
      "learning_rate": 3.571250819567568e-05,
      "loss": 3.0836,
      "step": 194253
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.4515490531921387,
      "learning_rate": 3.571057259119349e-05,
      "loss": 2.9214,
      "step": 194254
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.8282856941223145,
      "learning_rate": 3.570863703584772e-05,
      "loss": 2.6999,
      "step": 194255
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6376030445098877,
      "learning_rate": 3.5706701529638613e-05,
      "loss": 2.9091,
      "step": 194256
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.307380199432373,
      "learning_rate": 3.570476607256667e-05,
      "loss": 2.9617,
      "step": 194257
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0834178924560547,
      "learning_rate": 3.5702830664632224e-05,
      "loss": 2.7597,
      "step": 194258
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.731058359146118,
      "learning_rate": 3.570089530583553e-05,
      "loss": 3.0346,
      "step": 194259
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6078569889068604,
      "learning_rate": 3.569895999617713e-05,
      "loss": 3.0377,
      "step": 194260
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.4945709705352783,
      "learning_rate": 3.569702473565723e-05,
      "loss": 2.8041,
      "step": 194261
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.470952272415161,
      "learning_rate": 3.569508952427622e-05,
      "loss": 3.03,
      "step": 194262
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8437561988830566,
      "learning_rate": 3.569315436203454e-05,
      "loss": 2.9761,
      "step": 194263
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9941182136535645,
      "learning_rate": 3.569121924893248e-05,
      "loss": 2.8778,
      "step": 194264
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.165407657623291,
      "learning_rate": 3.5689284184970346e-05,
      "loss": 2.9427,
      "step": 194265
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3511080741882324,
      "learning_rate": 3.568734917014864e-05,
      "loss": 2.9131,
      "step": 194266
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.039396286010742,
      "learning_rate": 3.568541420446762e-05,
      "loss": 2.8865,
      "step": 194267
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0837769508361816,
      "learning_rate": 3.568347928792763e-05,
      "loss": 2.9483,
      "step": 194268
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.025219440460205,
      "learning_rate": 3.568154442052913e-05,
      "loss": 3.0468,
      "step": 194269
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.782930612564087,
      "learning_rate": 3.567960960227235e-05,
      "loss": 2.9745,
      "step": 194270
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5584287643432617,
      "learning_rate": 3.5677674833157797e-05,
      "loss": 2.9471,
      "step": 194271
    },
    {
      "epoch": 2.53,
      "grad_norm": 6.1624932289123535,
      "learning_rate": 3.567574011318574e-05,
      "loss": 2.7877,
      "step": 194272
    },
    {
      "epoch": 2.53,
      "grad_norm": 5.583727836608887,
      "learning_rate": 3.5673805442356564e-05,
      "loss": 2.5254,
      "step": 194273
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.9616572856903076,
      "learning_rate": 3.567187082067054e-05,
      "loss": 2.9951,
      "step": 194274
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.578578472137451,
      "learning_rate": 3.5669936248128186e-05,
      "loss": 2.9673,
      "step": 194275
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.216834545135498,
      "learning_rate": 3.566800172472968e-05,
      "loss": 2.9345,
      "step": 194276
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5689682960510254,
      "learning_rate": 3.5666067250475595e-05,
      "loss": 2.9035,
      "step": 194277
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.416090726852417,
      "learning_rate": 3.5664132825366065e-05,
      "loss": 2.8127,
      "step": 194278
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.89487624168396,
      "learning_rate": 3.5662198449401726e-05,
      "loss": 3.1354,
      "step": 194279
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.052346706390381,
      "learning_rate": 3.566026412258264e-05,
      "loss": 3.0523,
      "step": 194280
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.249261856079102,
      "learning_rate": 3.565832984490934e-05,
      "loss": 2.9328,
      "step": 194281
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.5989508628845215,
      "learning_rate": 3.565639561638213e-05,
      "loss": 2.9327,
      "step": 194282
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.281551361083984,
      "learning_rate": 3.56544614370014e-05,
      "loss": 2.8399,
      "step": 194283
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.0344929695129395,
      "learning_rate": 3.565252730676746e-05,
      "loss": 2.7944,
      "step": 194284
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.486762046813965,
      "learning_rate": 3.5650593225680844e-05,
      "loss": 2.8246,
      "step": 194285
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9297375679016113,
      "learning_rate": 3.564865919374161e-05,
      "loss": 2.9659,
      "step": 194286
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9468157291412354,
      "learning_rate": 3.5646725210950364e-05,
      "loss": 2.8815,
      "step": 194287
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.296489238739014,
      "learning_rate": 3.564479127730731e-05,
      "loss": 2.8273,
      "step": 194288
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1433913707733154,
      "learning_rate": 3.5642857392812965e-05,
      "loss": 2.7197,
      "step": 194289
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3207900524139404,
      "learning_rate": 3.5640923557467506e-05,
      "loss": 2.623,
      "step": 194290
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.248873710632324,
      "learning_rate": 3.563898977127156e-05,
      "loss": 3.0662,
      "step": 194291
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.637234687805176,
      "learning_rate": 3.563705603422514e-05,
      "loss": 3.0271,
      "step": 194292
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.18129563331604,
      "learning_rate": 3.563512234632887e-05,
      "loss": 2.8915,
      "step": 194293
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.870742082595825,
      "learning_rate": 3.563318870758298e-05,
      "loss": 2.9893,
      "step": 194294
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.05988073348999,
      "learning_rate": 3.563125511798791e-05,
      "loss": 2.9737,
      "step": 194295
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6682016849517822,
      "learning_rate": 3.562932157754389e-05,
      "loss": 2.9545,
      "step": 194296
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.223322868347168,
      "learning_rate": 3.562738808625155e-05,
      "loss": 3.018,
      "step": 194297
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8280837535858154,
      "learning_rate": 3.56254546441109e-05,
      "loss": 2.8844,
      "step": 194298
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.90725040435791,
      "learning_rate": 3.5623521251122554e-05,
      "loss": 2.9418,
      "step": 194299
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7652227878570557,
      "learning_rate": 3.56215879072867e-05,
      "loss": 2.901,
      "step": 194300
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.533296585083008,
      "learning_rate": 3.561965461260389e-05,
      "loss": 3.1062,
      "step": 194301
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.813615083694458,
      "learning_rate": 3.561772136707426e-05,
      "loss": 2.6368,
      "step": 194302
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.8469595909118652,
      "learning_rate": 3.561578817069844e-05,
      "loss": 3.0031,
      "step": 194303
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7691612243652344,
      "learning_rate": 3.561385502347651e-05,
      "loss": 2.8223,
      "step": 194304
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.656277656555176,
      "learning_rate": 3.561192192540903e-05,
      "loss": 3.0147,
      "step": 194305
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.4506123065948486,
      "learning_rate": 3.560998887649619e-05,
      "loss": 2.9935,
      "step": 194306
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.986665725708008,
      "learning_rate": 3.560805587673853e-05,
      "loss": 2.8007,
      "step": 194307
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.474559783935547,
      "learning_rate": 3.560612292613625e-05,
      "loss": 2.8556,
      "step": 194308
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.848574638366699,
      "learning_rate": 3.560419002468985e-05,
      "loss": 3.1794,
      "step": 194309
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1998212337493896,
      "learning_rate": 3.560225717239963e-05,
      "loss": 2.8546,
      "step": 194310
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.366828203201294,
      "learning_rate": 3.560032436926592e-05,
      "loss": 3.1844,
      "step": 194311
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1303837299346924,
      "learning_rate": 3.559839161528906e-05,
      "loss": 3.1047,
      "step": 194312
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.92838191986084,
      "learning_rate": 3.5596458910469503e-05,
      "loss": 2.9457,
      "step": 194313
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.473053216934204,
      "learning_rate": 3.55945262548075e-05,
      "loss": 3.0675,
      "step": 194314
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8189570903778076,
      "learning_rate": 3.55925936483035e-05,
      "loss": 2.9181,
      "step": 194315
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0479800701141357,
      "learning_rate": 3.559066109095785e-05,
      "loss": 3.1114,
      "step": 194316
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8187997341156006,
      "learning_rate": 3.558872858277091e-05,
      "loss": 3.013,
      "step": 194317
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.3553900718688965,
      "learning_rate": 3.558679612374291e-05,
      "loss": 2.7282,
      "step": 194318
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5369410514831543,
      "learning_rate": 3.558486371387439e-05,
      "loss": 3.1196,
      "step": 194319
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.4319891929626465,
      "learning_rate": 3.558293135316561e-05,
      "loss": 2.8569,
      "step": 194320
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1151366233825684,
      "learning_rate": 3.558099904161697e-05,
      "loss": 2.9585,
      "step": 194321
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.732579469680786,
      "learning_rate": 3.557906677922885e-05,
      "loss": 3.1625,
      "step": 194322
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.607098340988159,
      "learning_rate": 3.557713456600153e-05,
      "loss": 2.831,
      "step": 194323
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.131553888320923,
      "learning_rate": 3.5575202401935385e-05,
      "loss": 2.9087,
      "step": 194324
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.704094409942627,
      "learning_rate": 3.5573270287030855e-05,
      "loss": 2.9872,
      "step": 194325
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0104434490203857,
      "learning_rate": 3.557133822128819e-05,
      "loss": 2.8952,
      "step": 194326
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.934540271759033,
      "learning_rate": 3.556940620470784e-05,
      "loss": 2.7985,
      "step": 194327
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.499673366546631,
      "learning_rate": 3.556747423729016e-05,
      "loss": 2.9181,
      "step": 194328
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5430917739868164,
      "learning_rate": 3.556554231903545e-05,
      "loss": 2.8216,
      "step": 194329
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7942826747894287,
      "learning_rate": 3.556361044994406e-05,
      "loss": 2.9282,
      "step": 194330
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1512179374694824,
      "learning_rate": 3.5561678630016433e-05,
      "loss": 2.92,
      "step": 194331
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.598842620849609,
      "learning_rate": 3.555974685925281e-05,
      "loss": 2.8868,
      "step": 194332
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7626590728759766,
      "learning_rate": 3.55578151376537e-05,
      "loss": 2.9755,
      "step": 194333
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.532278537750244,
      "learning_rate": 3.555588346521939e-05,
      "loss": 2.6482,
      "step": 194334
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1374189853668213,
      "learning_rate": 3.5553951841950226e-05,
      "loss": 2.879,
      "step": 194335
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.346409320831299,
      "learning_rate": 3.5552020267846525e-05,
      "loss": 2.7931,
      "step": 194336
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6157984733581543,
      "learning_rate": 3.5550088742908736e-05,
      "loss": 2.9328,
      "step": 194337
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.88710880279541,
      "learning_rate": 3.554815726713711e-05,
      "loss": 3.0516,
      "step": 194338
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.5065670013427734,
      "learning_rate": 3.554622584053216e-05,
      "loss": 2.9197,
      "step": 194339
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9932808876037598,
      "learning_rate": 3.554429446309415e-05,
      "loss": 2.7596,
      "step": 194340
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0916666984558105,
      "learning_rate": 3.554236313482337e-05,
      "loss": 3.0545,
      "step": 194341
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1070444583892822,
      "learning_rate": 3.554043185572036e-05,
      "loss": 2.8375,
      "step": 194342
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5590462684631348,
      "learning_rate": 3.5538500625785324e-05,
      "loss": 2.9429,
      "step": 194343
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.528831958770752,
      "learning_rate": 3.553656944501866e-05,
      "loss": 3.0688,
      "step": 194344
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.917634963989258,
      "learning_rate": 3.553463831342076e-05,
      "loss": 2.7337,
      "step": 194345
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.730645179748535,
      "learning_rate": 3.5532707230991996e-05,
      "loss": 2.9223,
      "step": 194346
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.987245798110962,
      "learning_rate": 3.55307761977326e-05,
      "loss": 2.8127,
      "step": 194347
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.399232864379883,
      "learning_rate": 3.552884521364311e-05,
      "loss": 2.8282,
      "step": 194348
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.337150573730469,
      "learning_rate": 3.5526914278723786e-05,
      "loss": 2.7553,
      "step": 194349
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3270795345306396,
      "learning_rate": 3.552498339297496e-05,
      "loss": 2.8983,
      "step": 194350
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.11954665184021,
      "learning_rate": 3.552305255639707e-05,
      "loss": 2.8982,
      "step": 194351
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.3704960346221924,
      "learning_rate": 3.552112176899045e-05,
      "loss": 2.9981,
      "step": 194352
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.143853187561035,
      "learning_rate": 3.551919103075536e-05,
      "loss": 2.9246,
      "step": 194353
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.549560546875,
      "learning_rate": 3.5517260341692336e-05,
      "loss": 2.9204,
      "step": 194354
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3707973957061768,
      "learning_rate": 3.5515329701801574e-05,
      "loss": 2.8107,
      "step": 194355
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7856063842773438,
      "learning_rate": 3.551339911108355e-05,
      "loss": 3.2372,
      "step": 194356
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.5625193119049072,
      "learning_rate": 3.5511468569538624e-05,
      "loss": 2.7241,
      "step": 194357
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.404386520385742,
      "learning_rate": 3.5509538077167066e-05,
      "loss": 3.128,
      "step": 194358
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9697422981262207,
      "learning_rate": 3.5507607633969234e-05,
      "loss": 2.7438,
      "step": 194359
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.182276964187622,
      "learning_rate": 3.5505677239945574e-05,
      "loss": 2.7649,
      "step": 194360
    },
    {
      "epoch": 2.53,
      "grad_norm": 5.00549840927124,
      "learning_rate": 3.550374689509638e-05,
      "loss": 2.9274,
      "step": 194361
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.8519704341888428,
      "learning_rate": 3.550181659942204e-05,
      "loss": 2.9214,
      "step": 194362
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.213739395141602,
      "learning_rate": 3.5499886352922936e-05,
      "loss": 2.9375,
      "step": 194363
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2114903926849365,
      "learning_rate": 3.5497956155599394e-05,
      "loss": 3.085,
      "step": 194364
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.4050183296203613,
      "learning_rate": 3.549602600745172e-05,
      "loss": 3.1563,
      "step": 194365
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.7026822566986084,
      "learning_rate": 3.549409590848037e-05,
      "loss": 2.9486,
      "step": 194366
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6734745502471924,
      "learning_rate": 3.5492165858685616e-05,
      "loss": 2.8468,
      "step": 194367
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.163102626800537,
      "learning_rate": 3.549023585806793e-05,
      "loss": 2.8516,
      "step": 194368
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8757147789001465,
      "learning_rate": 3.54883059066275e-05,
      "loss": 3.0049,
      "step": 194369
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8331875801086426,
      "learning_rate": 3.548637600436494e-05,
      "loss": 2.8915,
      "step": 194370
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.799161911010742,
      "learning_rate": 3.548444615128033e-05,
      "loss": 3.035,
      "step": 194371
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.927539587020874,
      "learning_rate": 3.5482516347374224e-05,
      "loss": 3.1046,
      "step": 194372
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.016259670257568,
      "learning_rate": 3.5480586592646844e-05,
      "loss": 2.6476,
      "step": 194373
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.656961441040039,
      "learning_rate": 3.547865688709866e-05,
      "loss": 2.9576,
      "step": 194374
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.622825860977173,
      "learning_rate": 3.5476727230729965e-05,
      "loss": 2.9059,
      "step": 194375
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6199872493743896,
      "learning_rate": 3.547479762354116e-05,
      "loss": 2.8514,
      "step": 194376
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.887709379196167,
      "learning_rate": 3.547286806553259e-05,
      "loss": 2.9477,
      "step": 194377
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0101711750030518,
      "learning_rate": 3.547093855670464e-05,
      "loss": 2.8718,
      "step": 194378
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1143293380737305,
      "learning_rate": 3.5469009097057555e-05,
      "loss": 2.8205,
      "step": 194379
    },
    {
      "epoch": 2.53,
      "grad_norm": 5.518587589263916,
      "learning_rate": 3.546707968659183e-05,
      "loss": 2.887,
      "step": 194380
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.541866302490234,
      "learning_rate": 3.546515032530769e-05,
      "loss": 2.9069,
      "step": 194381
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.119232416152954,
      "learning_rate": 3.546322101320568e-05,
      "loss": 2.9234,
      "step": 194382
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.455835819244385,
      "learning_rate": 3.546129175028599e-05,
      "loss": 3.0699,
      "step": 194383
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7833926677703857,
      "learning_rate": 3.5459362536549094e-05,
      "loss": 2.9364,
      "step": 194384
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8160881996154785,
      "learning_rate": 3.545743337199523e-05,
      "loss": 3.0056,
      "step": 194385
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.749992609024048,
      "learning_rate": 3.5455504256624844e-05,
      "loss": 2.6628,
      "step": 194386
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.556701183319092,
      "learning_rate": 3.5453575190438255e-05,
      "loss": 2.9343,
      "step": 194387
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0317656993865967,
      "learning_rate": 3.5451646173435854e-05,
      "loss": 2.7163,
      "step": 194388
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.876721382141113,
      "learning_rate": 3.5449717205618014e-05,
      "loss": 2.729,
      "step": 194389
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.774380683898926,
      "learning_rate": 3.544778828698509e-05,
      "loss": 2.7798,
      "step": 194390
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6084752082824707,
      "learning_rate": 3.544585941753732e-05,
      "loss": 2.7442,
      "step": 194391
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5361242294311523,
      "learning_rate": 3.5443930597275217e-05,
      "loss": 2.8785,
      "step": 194392
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.4333484172821045,
      "learning_rate": 3.544200182619903e-05,
      "loss": 2.9873,
      "step": 194393
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.4614803791046143,
      "learning_rate": 3.544007310430926e-05,
      "loss": 3.0326,
      "step": 194394
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.284411907196045,
      "learning_rate": 3.5438144431606156e-05,
      "loss": 2.8515,
      "step": 194395
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.616983652114868,
      "learning_rate": 3.5436215808090064e-05,
      "loss": 2.9106,
      "step": 194396
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.676931381225586,
      "learning_rate": 3.5434287233761325e-05,
      "loss": 2.946,
      "step": 194397
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.961805820465088,
      "learning_rate": 3.543235870862045e-05,
      "loss": 2.8203,
      "step": 194398
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.718385934829712,
      "learning_rate": 3.5430430232667585e-05,
      "loss": 2.9277,
      "step": 194399
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.1853251457214355,
      "learning_rate": 3.5428501805903274e-05,
      "loss": 2.9566,
      "step": 194400
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.514005422592163,
      "learning_rate": 3.542657342832779e-05,
      "loss": 2.9738,
      "step": 194401
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9549484252929688,
      "learning_rate": 3.5424645099941515e-05,
      "loss": 2.7464,
      "step": 194402
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0585646629333496,
      "learning_rate": 3.54227168207447e-05,
      "loss": 2.9422,
      "step": 194403
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.5694987773895264,
      "learning_rate": 3.54207885907379e-05,
      "loss": 2.8501,
      "step": 194404
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.486492156982422,
      "learning_rate": 3.541886040992129e-05,
      "loss": 2.9285,
      "step": 194405
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.3573243618011475,
      "learning_rate": 3.541693227829536e-05,
      "loss": 2.9098,
      "step": 194406
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.121519088745117,
      "learning_rate": 3.541500419586042e-05,
      "loss": 2.9716,
      "step": 194407
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6819493770599365,
      "learning_rate": 3.541307616261686e-05,
      "loss": 3.0303,
      "step": 194408
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.654284954071045,
      "learning_rate": 3.541114817856489e-05,
      "loss": 2.8072,
      "step": 194409
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.905506610870361,
      "learning_rate": 3.540922024370506e-05,
      "loss": 2.8073,
      "step": 194410
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.736055850982666,
      "learning_rate": 3.540729235803762e-05,
      "loss": 3.0026,
      "step": 194411
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.405148983001709,
      "learning_rate": 3.5405364521563e-05,
      "loss": 3.155,
      "step": 194412
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.413354873657227,
      "learning_rate": 3.540343673428149e-05,
      "loss": 2.7244,
      "step": 194413
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0269503593444824,
      "learning_rate": 3.540150899619353e-05,
      "loss": 2.8389,
      "step": 194414
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.43420672416687,
      "learning_rate": 3.539958130729932e-05,
      "loss": 3.0755,
      "step": 194415
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.7965750694274902,
      "learning_rate": 3.539765366759939e-05,
      "loss": 3.0725,
      "step": 194416
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.573216199874878,
      "learning_rate": 3.539572607709398e-05,
      "loss": 3.0871,
      "step": 194417
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7960240840911865,
      "learning_rate": 3.539379853578355e-05,
      "loss": 3.1328,
      "step": 194418
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.329186201095581,
      "learning_rate": 3.539187104366843e-05,
      "loss": 2.9501,
      "step": 194419
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.901937961578369,
      "learning_rate": 3.5389943600748925e-05,
      "loss": 2.7303,
      "step": 194420
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3802220821380615,
      "learning_rate": 3.5388016207025374e-05,
      "loss": 2.7155,
      "step": 194421
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.00561261177063,
      "learning_rate": 3.538608886249826e-05,
      "loss": 2.9628,
      "step": 194422
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3760287761688232,
      "learning_rate": 3.53841615671678e-05,
      "loss": 2.9695,
      "step": 194423
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.741298198699951,
      "learning_rate": 3.538223432103449e-05,
      "loss": 3.0151,
      "step": 194424
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3588454723358154,
      "learning_rate": 3.5380307124098584e-05,
      "loss": 2.7912,
      "step": 194425
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2078397274017334,
      "learning_rate": 3.5378379976360495e-05,
      "loss": 2.8599,
      "step": 194426
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.405925750732422,
      "learning_rate": 3.537645287782052e-05,
      "loss": 2.7668,
      "step": 194427
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.038018226623535,
      "learning_rate": 3.5374525828479083e-05,
      "loss": 3.03,
      "step": 194428
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.823120355606079,
      "learning_rate": 3.5372598828336466e-05,
      "loss": 3.1785,
      "step": 194429
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8938467502593994,
      "learning_rate": 3.537067187739315e-05,
      "loss": 2.9552,
      "step": 194430
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.8849668502807617,
      "learning_rate": 3.5368744975649424e-05,
      "loss": 3.0761,
      "step": 194431
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.398522853851318,
      "learning_rate": 3.5366818123105564e-05,
      "loss": 3.0474,
      "step": 194432
    },
    {
      "epoch": 2.53,
      "grad_norm": 5.763725757598877,
      "learning_rate": 3.536489131976209e-05,
      "loss": 2.7771,
      "step": 194433
    },
    {
      "epoch": 2.53,
      "grad_norm": 6.9262919425964355,
      "learning_rate": 3.5362964565619256e-05,
      "loss": 2.9908,
      "step": 194434
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0584795475006104,
      "learning_rate": 3.53610378606774e-05,
      "loss": 2.7807,
      "step": 194435
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.54107141494751,
      "learning_rate": 3.535911120493699e-05,
      "loss": 2.8875,
      "step": 194436
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3158349990844727,
      "learning_rate": 3.535718459839828e-05,
      "loss": 3.0173,
      "step": 194437
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.855945587158203,
      "learning_rate": 3.535525804106162e-05,
      "loss": 2.9967,
      "step": 194438
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1697726249694824,
      "learning_rate": 3.53533315329275e-05,
      "loss": 2.8541,
      "step": 194439
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.074543476104736,
      "learning_rate": 3.535140507399609e-05,
      "loss": 3.1378,
      "step": 194440
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1250123977661133,
      "learning_rate": 3.534947866426795e-05,
      "loss": 2.9807,
      "step": 194441
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.812122344970703,
      "learning_rate": 3.534755230374332e-05,
      "loss": 3.0923,
      "step": 194442
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0621678829193115,
      "learning_rate": 3.53456259924225e-05,
      "loss": 2.8513,
      "step": 194443
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.9362876415252686,
      "learning_rate": 3.5343699730306016e-05,
      "loss": 2.998,
      "step": 194444
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7540831565856934,
      "learning_rate": 3.534177351739411e-05,
      "loss": 2.9367,
      "step": 194445
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9519386291503906,
      "learning_rate": 3.533984735368711e-05,
      "loss": 3.1573,
      "step": 194446
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1089839935302734,
      "learning_rate": 3.5337921239185486e-05,
      "loss": 2.9393,
      "step": 194447
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.4294183254241943,
      "learning_rate": 3.533599517388953e-05,
      "loss": 2.9864,
      "step": 194448
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8522462844848633,
      "learning_rate": 3.533406915779955e-05,
      "loss": 3.0648,
      "step": 194449
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7862701416015625,
      "learning_rate": 3.533214319091604e-05,
      "loss": 2.6717,
      "step": 194450
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.244551658630371,
      "learning_rate": 3.533021727323927e-05,
      "loss": 3.0197,
      "step": 194451
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9901509284973145,
      "learning_rate": 3.532829140476957e-05,
      "loss": 3.0651,
      "step": 194452
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.8093204498291016,
      "learning_rate": 3.5326365585507376e-05,
      "loss": 3.0349,
      "step": 194453
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.530205488204956,
      "learning_rate": 3.5324439815452954e-05,
      "loss": 2.8301,
      "step": 194454
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.592228889465332,
      "learning_rate": 3.532251409460677e-05,
      "loss": 2.9242,
      "step": 194455
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2445597648620605,
      "learning_rate": 3.5320588422969123e-05,
      "loss": 3.1095,
      "step": 194456
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.850539207458496,
      "learning_rate": 3.531866280054037e-05,
      "loss": 2.9089,
      "step": 194457
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.671579360961914,
      "learning_rate": 3.53167372273208e-05,
      "loss": 2.8014,
      "step": 194458
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7379636764526367,
      "learning_rate": 3.531481170331093e-05,
      "loss": 2.9747,
      "step": 194459
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.132582664489746,
      "learning_rate": 3.5312886228511e-05,
      "loss": 3.0238,
      "step": 194460
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.800379753112793,
      "learning_rate": 3.531096080292143e-05,
      "loss": 3.0911,
      "step": 194461
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7123303413391113,
      "learning_rate": 3.530903542654253e-05,
      "loss": 2.7466,
      "step": 194462
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.014068603515625,
      "learning_rate": 3.5307110099374705e-05,
      "loss": 2.9745,
      "step": 194463
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0893986225128174,
      "learning_rate": 3.530518482141821e-05,
      "loss": 2.708,
      "step": 194464
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2292234897613525,
      "learning_rate": 3.5303259592673547e-05,
      "loss": 2.9612,
      "step": 194465
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6976161003112793,
      "learning_rate": 3.530133441314092e-05,
      "loss": 3.0377,
      "step": 194466
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3109962940216064,
      "learning_rate": 3.5299409282820866e-05,
      "loss": 2.6962,
      "step": 194467
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5790610313415527,
      "learning_rate": 3.529748420171364e-05,
      "loss": 2.8898,
      "step": 194468
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.82956862449646,
      "learning_rate": 3.5295559169819585e-05,
      "loss": 2.83,
      "step": 194469
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5702009201049805,
      "learning_rate": 3.5293634187139065e-05,
      "loss": 2.8435,
      "step": 194470
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.006610631942749,
      "learning_rate": 3.5291709253672474e-05,
      "loss": 2.8501,
      "step": 194471
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5429446697235107,
      "learning_rate": 3.5289784369420116e-05,
      "loss": 2.8317,
      "step": 194472
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8266897201538086,
      "learning_rate": 3.5287859534382427e-05,
      "loss": 2.8926,
      "step": 194473
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0945146083831787,
      "learning_rate": 3.528593474855973e-05,
      "loss": 3.2611,
      "step": 194474
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.9771080017089844,
      "learning_rate": 3.528401001195237e-05,
      "loss": 2.9086,
      "step": 194475
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9058613777160645,
      "learning_rate": 3.528208532456067e-05,
      "loss": 2.974,
      "step": 194476
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.948307752609253,
      "learning_rate": 3.528016068638507e-05,
      "loss": 3.2581,
      "step": 194477
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.75783109664917,
      "learning_rate": 3.5278236097425806e-05,
      "loss": 3.091,
      "step": 194478
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0261621475219727,
      "learning_rate": 3.52763115576834e-05,
      "loss": 2.964,
      "step": 194479
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.983092784881592,
      "learning_rate": 3.5274387067158125e-05,
      "loss": 2.9632,
      "step": 194480
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.69997239112854,
      "learning_rate": 3.527246262585035e-05,
      "loss": 3.1824,
      "step": 194481
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.293785810470581,
      "learning_rate": 3.5270538233760336e-05,
      "loss": 2.7596,
      "step": 194482
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.993769645690918,
      "learning_rate": 3.5268613890888585e-05,
      "loss": 2.8994,
      "step": 194483
    },
    {
      "epoch": 2.53,
      "grad_norm": 5.508762359619141,
      "learning_rate": 3.526668959723533e-05,
      "loss": 2.9363,
      "step": 194484
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.8198065757751465,
      "learning_rate": 3.526476535280107e-05,
      "loss": 2.8959,
      "step": 194485
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1119883060455322,
      "learning_rate": 3.5262841157586105e-05,
      "loss": 2.7082,
      "step": 194486
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0106418132781982,
      "learning_rate": 3.526091701159077e-05,
      "loss": 2.9881,
      "step": 194487
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0933868885040283,
      "learning_rate": 3.525899291481533e-05,
      "loss": 2.9838,
      "step": 194488
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.929727554321289,
      "learning_rate": 3.525706886726035e-05,
      "loss": 2.9069,
      "step": 194489
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2488789558410645,
      "learning_rate": 3.5255144868926e-05,
      "loss": 2.7774,
      "step": 194490
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1417832374572754,
      "learning_rate": 3.525322091981274e-05,
      "loss": 2.9961,
      "step": 194491
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.69209361076355,
      "learning_rate": 3.5251297019920945e-05,
      "loss": 2.8671,
      "step": 194492
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0684425830841064,
      "learning_rate": 3.524937316925094e-05,
      "loss": 3.0154,
      "step": 194493
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.637953519821167,
      "learning_rate": 3.5247449367802995e-05,
      "loss": 3.0195,
      "step": 194494
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7015020847320557,
      "learning_rate": 3.5245525615577605e-05,
      "loss": 2.9867,
      "step": 194495
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.9360392093658447,
      "learning_rate": 3.524360191257498e-05,
      "loss": 2.8781,
      "step": 194496
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6660025119781494,
      "learning_rate": 3.524167825879568e-05,
      "loss": 2.9457,
      "step": 194497
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.84883975982666,
      "learning_rate": 3.5239754654239936e-05,
      "loss": 2.7832,
      "step": 194498
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0353596210479736,
      "learning_rate": 3.5237831098908086e-05,
      "loss": 3.0311,
      "step": 194499
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3485267162323,
      "learning_rate": 3.523590759280049e-05,
      "loss": 2.6681,
      "step": 194500
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.993504285812378,
      "learning_rate": 3.5233984135917595e-05,
      "loss": 2.7764,
      "step": 194501
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.07488751411438,
      "learning_rate": 3.523206072825965e-05,
      "loss": 2.6877,
      "step": 194502
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9870846271514893,
      "learning_rate": 3.5230137369827095e-05,
      "loss": 2.9688,
      "step": 194503
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.658416509628296,
      "learning_rate": 3.5228214060620265e-05,
      "loss": 3.1086,
      "step": 194504
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.172621726989746,
      "learning_rate": 3.522629080063952e-05,
      "loss": 3.0059,
      "step": 194505
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9647376537323,
      "learning_rate": 3.522436758988514e-05,
      "loss": 2.7609,
      "step": 194506
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.051032304763794,
      "learning_rate": 3.522244442835761e-05,
      "loss": 2.9933,
      "step": 194507
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.046807050704956,
      "learning_rate": 3.522052131605717e-05,
      "loss": 2.9795,
      "step": 194508
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.846684455871582,
      "learning_rate": 3.521859825298429e-05,
      "loss": 2.9882,
      "step": 194509
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0222060680389404,
      "learning_rate": 3.521667523913919e-05,
      "loss": 3.0945,
      "step": 194510
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7903854846954346,
      "learning_rate": 3.521475227452245e-05,
      "loss": 2.9405,
      "step": 194511
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9123620986938477,
      "learning_rate": 3.5212829359134165e-05,
      "loss": 3.034,
      "step": 194512
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.595294713973999,
      "learning_rate": 3.5210906492974864e-05,
      "loss": 2.7927,
      "step": 194513
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.322660446166992,
      "learning_rate": 3.520898367604482e-05,
      "loss": 2.7375,
      "step": 194514
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0487136840820312,
      "learning_rate": 3.5207060908344466e-05,
      "loss": 2.983,
      "step": 194515
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.8496153354644775,
      "learning_rate": 3.5205138189874026e-05,
      "loss": 3.2087,
      "step": 194516
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.8959310054779053,
      "learning_rate": 3.520321552063404e-05,
      "loss": 2.9156,
      "step": 194517
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.423158884048462,
      "learning_rate": 3.520129290062478e-05,
      "loss": 3.3179,
      "step": 194518
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.817473888397217,
      "learning_rate": 3.51993703298466e-05,
      "loss": 2.9067,
      "step": 194519
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6970064640045166,
      "learning_rate": 3.5197447808299775e-05,
      "loss": 2.8179,
      "step": 194520
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.015378475189209,
      "learning_rate": 3.519552533598483e-05,
      "loss": 2.8847,
      "step": 194521
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.520789861679077,
      "learning_rate": 3.519360291290194e-05,
      "loss": 3.1749,
      "step": 194522
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7115509510040283,
      "learning_rate": 3.5191680539051635e-05,
      "loss": 2.8951,
      "step": 194523
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.8197731971740723,
      "learning_rate": 3.5189758214434215e-05,
      "loss": 2.7577,
      "step": 194524
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.081890344619751,
      "learning_rate": 3.518783593905001e-05,
      "loss": 3.0582,
      "step": 194525
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.6601815223693848,
      "learning_rate": 3.51859137128993e-05,
      "loss": 3.2867,
      "step": 194526
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5537571907043457,
      "learning_rate": 3.518399153598259e-05,
      "loss": 3.1026,
      "step": 194527
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.995616912841797,
      "learning_rate": 3.518206940830014e-05,
      "loss": 2.9854,
      "step": 194528
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8994884490966797,
      "learning_rate": 3.5180147329852405e-05,
      "loss": 2.8933,
      "step": 194529
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7618396282196045,
      "learning_rate": 3.517822530063965e-05,
      "loss": 2.921,
      "step": 194530
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.006268262863159,
      "learning_rate": 3.517630332066221e-05,
      "loss": 2.8816,
      "step": 194531
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6904044151306152,
      "learning_rate": 3.517438138992056e-05,
      "loss": 2.7855,
      "step": 194532
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7739651203155518,
      "learning_rate": 3.5172459508415017e-05,
      "loss": 2.8143,
      "step": 194533
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6008107662200928,
      "learning_rate": 3.5170537676145825e-05,
      "loss": 2.8203,
      "step": 194534
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7650370597839355,
      "learning_rate": 3.5168615893113486e-05,
      "loss": 3.0938,
      "step": 194535
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1998047828674316,
      "learning_rate": 3.5166694159318285e-05,
      "loss": 3.1275,
      "step": 194536
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7991480827331543,
      "learning_rate": 3.5164772474760574e-05,
      "loss": 3.1713,
      "step": 194537
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.722797155380249,
      "learning_rate": 3.516285083944077e-05,
      "loss": 3.0857,
      "step": 194538
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8004050254821777,
      "learning_rate": 3.516092925335912e-05,
      "loss": 2.854,
      "step": 194539
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9219555854797363,
      "learning_rate": 3.515900771651614e-05,
      "loss": 2.7616,
      "step": 194540
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.807112693786621,
      "learning_rate": 3.515708622891208e-05,
      "loss": 3.0712,
      "step": 194541
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.617523193359375,
      "learning_rate": 3.515516479054733e-05,
      "loss": 2.9019,
      "step": 194542
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1201741695404053,
      "learning_rate": 3.515324340142216e-05,
      "loss": 2.7867,
      "step": 194543
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5654804706573486,
      "learning_rate": 3.515132206153707e-05,
      "loss": 2.7813,
      "step": 194544
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.796297073364258,
      "learning_rate": 3.514940077089229e-05,
      "loss": 3.0022,
      "step": 194545
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9430620670318604,
      "learning_rate": 3.514747952948829e-05,
      "loss": 2.9644,
      "step": 194546
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8220860958099365,
      "learning_rate": 3.51455583373254e-05,
      "loss": 2.5902,
      "step": 194547
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6392219066619873,
      "learning_rate": 3.5143637194403915e-05,
      "loss": 2.9439,
      "step": 194548
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.4667561054229736,
      "learning_rate": 3.5141716100724184e-05,
      "loss": 2.9601,
      "step": 194549
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.874501943588257,
      "learning_rate": 3.513979505628666e-05,
      "loss": 3.0067,
      "step": 194550
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.575103521347046,
      "learning_rate": 3.513787406109158e-05,
      "loss": 3.0645,
      "step": 194551
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.868945837020874,
      "learning_rate": 3.513595311513944e-05,
      "loss": 2.9461,
      "step": 194552
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.7963199615478516,
      "learning_rate": 3.513403221843054e-05,
      "loss": 2.9788,
      "step": 194553
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.454934597015381,
      "learning_rate": 3.5132111370965186e-05,
      "loss": 2.9949,
      "step": 194554
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9453060626983643,
      "learning_rate": 3.513019057274375e-05,
      "loss": 3.0574,
      "step": 194555
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.6240315437316895,
      "learning_rate": 3.512826982376664e-05,
      "loss": 2.9588,
      "step": 194556
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.7738280296325684,
      "learning_rate": 3.512634912403415e-05,
      "loss": 3.1921,
      "step": 194557
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2320053577423096,
      "learning_rate": 3.512442847354673e-05,
      "loss": 3.0621,
      "step": 194558
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.643204927444458,
      "learning_rate": 3.512250787230465e-05,
      "loss": 2.7834,
      "step": 194559
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.5013628005981445,
      "learning_rate": 3.5120587320308316e-05,
      "loss": 2.975,
      "step": 194560
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.5179266929626465,
      "learning_rate": 3.511866681755802e-05,
      "loss": 2.7668,
      "step": 194561
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0361201763153076,
      "learning_rate": 3.511674636405419e-05,
      "loss": 2.9843,
      "step": 194562
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.789522171020508,
      "learning_rate": 3.511482595979714e-05,
      "loss": 2.8127,
      "step": 194563
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.296228885650635,
      "learning_rate": 3.5112905604787265e-05,
      "loss": 2.9244,
      "step": 194564
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6838972568511963,
      "learning_rate": 3.5110985299024926e-05,
      "loss": 3.0596,
      "step": 194565
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0284435749053955,
      "learning_rate": 3.510906504251043e-05,
      "loss": 2.9196,
      "step": 194566
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1841163635253906,
      "learning_rate": 3.510714483524414e-05,
      "loss": 3.0059,
      "step": 194567
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3591411113739014,
      "learning_rate": 3.510522467722645e-05,
      "loss": 3.0728,
      "step": 194568
    },
    {
      "epoch": 2.53,
      "grad_norm": 5.5148234367370605,
      "learning_rate": 3.5103304568457666e-05,
      "loss": 2.9183,
      "step": 194569
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7467384338378906,
      "learning_rate": 3.510138450893823e-05,
      "loss": 3.3209,
      "step": 194570
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.4972989559173584,
      "learning_rate": 3.5099464498668415e-05,
      "loss": 2.7631,
      "step": 194571
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.421196937561035,
      "learning_rate": 3.509754453764865e-05,
      "loss": 2.9434,
      "step": 194572
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.903613805770874,
      "learning_rate": 3.509562462587918e-05,
      "loss": 2.8293,
      "step": 194573
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1052868366241455,
      "learning_rate": 3.509370476336049e-05,
      "loss": 2.6903,
      "step": 194574
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6949527263641357,
      "learning_rate": 3.5091784950092826e-05,
      "loss": 3.0124,
      "step": 194575
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0346100330352783,
      "learning_rate": 3.5089865186076674e-05,
      "loss": 2.7992,
      "step": 194576
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8862242698669434,
      "learning_rate": 3.5087945471312215e-05,
      "loss": 2.8714,
      "step": 194577
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0078883171081543,
      "learning_rate": 3.5086025805800065e-05,
      "loss": 3.1059,
      "step": 194578
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5870933532714844,
      "learning_rate": 3.5084106189540286e-05,
      "loss": 3.0778,
      "step": 194579
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.051259756088257,
      "learning_rate": 3.508218662253344e-05,
      "loss": 2.801,
      "step": 194580
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.455251455307007,
      "learning_rate": 3.508026710477976e-05,
      "loss": 2.8936,
      "step": 194581
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5224111080169678,
      "learning_rate": 3.507834763627971e-05,
      "loss": 2.8594,
      "step": 194582
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.794621467590332,
      "learning_rate": 3.5076428217033536e-05,
      "loss": 2.931,
      "step": 194583
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.5577664375305176,
      "learning_rate": 3.50745088470418e-05,
      "loss": 2.8294,
      "step": 194584
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.4750163555145264,
      "learning_rate": 3.507258952630455e-05,
      "loss": 2.9075,
      "step": 194585
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8123791217803955,
      "learning_rate": 3.507067025482241e-05,
      "loss": 3.0815,
      "step": 194586
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.247098922729492,
      "learning_rate": 3.5068751032595533e-05,
      "loss": 2.6956,
      "step": 194587
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6979429721832275,
      "learning_rate": 3.506683185962446e-05,
      "loss": 3.0008,
      "step": 194588
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8553762435913086,
      "learning_rate": 3.506491273590941e-05,
      "loss": 3.1053,
      "step": 194589
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.157912254333496,
      "learning_rate": 3.50629936614509e-05,
      "loss": 2.8501,
      "step": 194590
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6480679512023926,
      "learning_rate": 3.5061074636249053e-05,
      "loss": 3.0066,
      "step": 194591
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.4899518489837646,
      "learning_rate": 3.505915566030444e-05,
      "loss": 2.7899,
      "step": 194592
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1213183403015137,
      "learning_rate": 3.5057236733617254e-05,
      "loss": 2.9703,
      "step": 194593
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.5486879348754883,
      "learning_rate": 3.5055317856188e-05,
      "loss": 2.8667,
      "step": 194594
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.195096731185913,
      "learning_rate": 3.5053399028016885e-05,
      "loss": 2.9798,
      "step": 194595
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7443959712982178,
      "learning_rate": 3.5051480249104455e-05,
      "loss": 3.0478,
      "step": 194596
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9283835887908936,
      "learning_rate": 3.504956151945087e-05,
      "loss": 3.0068,
      "step": 194597
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6369268894195557,
      "learning_rate": 3.5047642839056636e-05,
      "loss": 3.0116,
      "step": 194598
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9332916736602783,
      "learning_rate": 3.5045724207921965e-05,
      "loss": 2.7513,
      "step": 194599
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.97440767288208,
      "learning_rate": 3.504380562604733e-05,
      "loss": 2.9503,
      "step": 194600
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9885406494140625,
      "learning_rate": 3.504188709343302e-05,
      "loss": 2.622,
      "step": 194601
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6228904724121094,
      "learning_rate": 3.503996861007951e-05,
      "loss": 3.142,
      "step": 194602
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.136366367340088,
      "learning_rate": 3.503805017598702e-05,
      "loss": 2.6305,
      "step": 194603
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.612015724182129,
      "learning_rate": 3.5036131791156e-05,
      "loss": 2.9088,
      "step": 194604
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.7024857997894287,
      "learning_rate": 3.50342134555867e-05,
      "loss": 3.0384,
      "step": 194605
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.616668462753296,
      "learning_rate": 3.503229516927957e-05,
      "loss": 2.8536,
      "step": 194606
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.019813060760498,
      "learning_rate": 3.503037693223489e-05,
      "loss": 3.1474,
      "step": 194607
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.576786518096924,
      "learning_rate": 3.502845874445315e-05,
      "loss": 2.958,
      "step": 194608
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.133985757827759,
      "learning_rate": 3.50265406059346e-05,
      "loss": 2.8841,
      "step": 194609
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8728766441345215,
      "learning_rate": 3.502462251667963e-05,
      "loss": 3.0525,
      "step": 194610
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3198952674865723,
      "learning_rate": 3.502270447668854e-05,
      "loss": 2.9825,
      "step": 194611
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.931276321411133,
      "learning_rate": 3.502078648596176e-05,
      "loss": 2.7228,
      "step": 194612
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2430222034454346,
      "learning_rate": 3.501886854449955e-05,
      "loss": 2.88,
      "step": 194613
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.941568613052368,
      "learning_rate": 3.501695065230242e-05,
      "loss": 2.8546,
      "step": 194614
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.294754981994629,
      "learning_rate": 3.501503280937063e-05,
      "loss": 3.0911,
      "step": 194615
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8583414554595947,
      "learning_rate": 3.5013115015704485e-05,
      "loss": 2.8406,
      "step": 194616
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.897474527359009,
      "learning_rate": 3.501119727130447e-05,
      "loss": 2.9462,
      "step": 194617
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.44536828994751,
      "learning_rate": 3.500927957617088e-05,
      "loss": 3.0758,
      "step": 194618
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.947702169418335,
      "learning_rate": 3.500736193030398e-05,
      "loss": 2.966,
      "step": 194619
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7309563159942627,
      "learning_rate": 3.5005444333704305e-05,
      "loss": 2.8964,
      "step": 194620
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5410315990448,
      "learning_rate": 3.5003526786372116e-05,
      "loss": 2.8651,
      "step": 194621
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.151315689086914,
      "learning_rate": 3.5001609288307686e-05,
      "loss": 2.964,
      "step": 194622
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0535740852355957,
      "learning_rate": 3.4999691839511544e-05,
      "loss": 3.0359,
      "step": 194623
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6697564125061035,
      "learning_rate": 3.499777443998389e-05,
      "loss": 2.8084,
      "step": 194624
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3472187519073486,
      "learning_rate": 3.4995857089725235e-05,
      "loss": 3.0051,
      "step": 194625
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.6123905181884766,
      "learning_rate": 3.499393978873583e-05,
      "loss": 2.7945,
      "step": 194626
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0448527336120605,
      "learning_rate": 3.4992022537016076e-05,
      "loss": 3.0075,
      "step": 194627
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.098817825317383,
      "learning_rate": 3.499010533456621e-05,
      "loss": 3.0104,
      "step": 194628
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.800224542617798,
      "learning_rate": 3.4988188181386765e-05,
      "loss": 3.0376,
      "step": 194629
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.699463367462158,
      "learning_rate": 3.4986271077477976e-05,
      "loss": 2.8053,
      "step": 194630
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.429868459701538,
      "learning_rate": 3.4984354022840275e-05,
      "loss": 3.0983,
      "step": 194631
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.4078710079193115,
      "learning_rate": 3.4982437017474e-05,
      "loss": 2.7499,
      "step": 194632
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.1796793937683105,
      "learning_rate": 3.49805200613795e-05,
      "loss": 3.2995,
      "step": 194633
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7728753089904785,
      "learning_rate": 3.497860315455703e-05,
      "loss": 2.9248,
      "step": 194634
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.969170093536377,
      "learning_rate": 3.497668629700714e-05,
      "loss": 2.918,
      "step": 194635
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.1830780506134033,
      "learning_rate": 3.4974769488730006e-05,
      "loss": 3.1055,
      "step": 194636
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.070944309234619,
      "learning_rate": 3.497285272972612e-05,
      "loss": 2.5843,
      "step": 194637
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9661765098571777,
      "learning_rate": 3.497093601999579e-05,
      "loss": 2.9024,
      "step": 194638
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9681694507598877,
      "learning_rate": 3.496901935953937e-05,
      "loss": 2.9272,
      "step": 194639
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.8426051139831543,
      "learning_rate": 3.4967102748357144e-05,
      "loss": 3.0368,
      "step": 194640
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3745501041412354,
      "learning_rate": 3.49651861864496e-05,
      "loss": 2.8988,
      "step": 194641
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.4474260807037354,
      "learning_rate": 3.496326967381697e-05,
      "loss": 2.8295,
      "step": 194642
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.884821891784668,
      "learning_rate": 3.496135321045976e-05,
      "loss": 3.0655,
      "step": 194643
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8953535556793213,
      "learning_rate": 3.4959436796378135e-05,
      "loss": 2.796,
      "step": 194644
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.578039169311523,
      "learning_rate": 3.495752043157269e-05,
      "loss": 3.0232,
      "step": 194645
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.0438103675842285,
      "learning_rate": 3.495560411604353e-05,
      "loss": 2.9381,
      "step": 194646
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.774388074874878,
      "learning_rate": 3.495368784979119e-05,
      "loss": 2.728,
      "step": 194647
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7678871154785156,
      "learning_rate": 3.4951771632815896e-05,
      "loss": 3.1969,
      "step": 194648
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9770851135253906,
      "learning_rate": 3.4949855465118116e-05,
      "loss": 3.1657,
      "step": 194649
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.529531717300415,
      "learning_rate": 3.494793934669812e-05,
      "loss": 2.9211,
      "step": 194650
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.6363089084625244,
      "learning_rate": 3.494602327755644e-05,
      "loss": 2.7498,
      "step": 194651
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9752132892608643,
      "learning_rate": 3.4944107257693134e-05,
      "loss": 2.9107,
      "step": 194652
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1491565704345703,
      "learning_rate": 3.494219128710881e-05,
      "loss": 2.8411,
      "step": 194653
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.5595862865448,
      "learning_rate": 3.4940275365803674e-05,
      "loss": 2.8765,
      "step": 194654
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.149430751800537,
      "learning_rate": 3.4938359493778214e-05,
      "loss": 3.1961,
      "step": 194655
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7918457984924316,
      "learning_rate": 3.493644367103263e-05,
      "loss": 2.9436,
      "step": 194656
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.5743250846862793,
      "learning_rate": 3.49345278975675e-05,
      "loss": 3.1574,
      "step": 194657
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7143313884735107,
      "learning_rate": 3.4932612173382946e-05,
      "loss": 3.2046,
      "step": 194658
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.810696601867676,
      "learning_rate": 3.4930696498479435e-05,
      "loss": 2.8847,
      "step": 194659
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.9997897148132324,
      "learning_rate": 3.4928780872857276e-05,
      "loss": 3.028,
      "step": 194660
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.595242977142334,
      "learning_rate": 3.4926865296516924e-05,
      "loss": 2.8356,
      "step": 194661
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1513466835021973,
      "learning_rate": 3.492494976945862e-05,
      "loss": 3.1154,
      "step": 194662
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.78757905960083,
      "learning_rate": 3.492303429168286e-05,
      "loss": 3.0274,
      "step": 194663
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7638800144195557,
      "learning_rate": 3.492111886318981e-05,
      "loss": 3.0592,
      "step": 194664
    },
    {
      "epoch": 2.53,
      "grad_norm": 5.093638896942139,
      "learning_rate": 3.491920348398001e-05,
      "loss": 2.9705,
      "step": 194665
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3810317516326904,
      "learning_rate": 3.491728815405365e-05,
      "loss": 2.9194,
      "step": 194666
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7179338932037354,
      "learning_rate": 3.491537287341123e-05,
      "loss": 2.8301,
      "step": 194667
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.931102752685547,
      "learning_rate": 3.4913457642052986e-05,
      "loss": 2.6836,
      "step": 194668
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.186093807220459,
      "learning_rate": 3.491154245997946e-05,
      "loss": 2.6368,
      "step": 194669
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.57112193107605,
      "learning_rate": 3.490962732719077e-05,
      "loss": 2.8838,
      "step": 194670
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.2967476844787598,
      "learning_rate": 3.4907712243687426e-05,
      "loss": 2.851,
      "step": 194671
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8499574661254883,
      "learning_rate": 3.490579720946969e-05,
      "loss": 2.9557,
      "step": 194672
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8204140663146973,
      "learning_rate": 3.4903882224538026e-05,
      "loss": 2.9077,
      "step": 194673
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.4660627841949463,
      "learning_rate": 3.490196728889267e-05,
      "loss": 2.6688,
      "step": 194674
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.737410306930542,
      "learning_rate": 3.49000524025342e-05,
      "loss": 2.715,
      "step": 194675
    },
    {
      "epoch": 2.53,
      "grad_norm": 4.250115394592285,
      "learning_rate": 3.489813756546266e-05,
      "loss": 2.865,
      "step": 194676
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8557186126708984,
      "learning_rate": 3.4896222777678594e-05,
      "loss": 3.0258,
      "step": 194677
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.828782558441162,
      "learning_rate": 3.4894308039182305e-05,
      "loss": 2.9524,
      "step": 194678
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.3905515670776367,
      "learning_rate": 3.4892393349974226e-05,
      "loss": 2.9424,
      "step": 194679
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.6030337810516357,
      "learning_rate": 3.489047871005458e-05,
      "loss": 2.9124,
      "step": 194680
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7146825790405273,
      "learning_rate": 3.4888564119423946e-05,
      "loss": 3.0083,
      "step": 194681
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.946526050567627,
      "learning_rate": 3.488664957808238e-05,
      "loss": 2.8599,
      "step": 194682
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.8048791885375977,
      "learning_rate": 3.488473508603046e-05,
      "loss": 2.6984,
      "step": 194683
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.1464104652404785,
      "learning_rate": 3.488282064326844e-05,
      "loss": 2.9368,
      "step": 194684
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.303419828414917,
      "learning_rate": 3.488090624979673e-05,
      "loss": 2.9714,
      "step": 194685
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.9270553588867188,
      "learning_rate": 3.4878991905615614e-05,
      "loss": 2.8751,
      "step": 194686
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.921217203140259,
      "learning_rate": 3.487707761072558e-05,
      "loss": 2.9745,
      "step": 194687
    },
    {
      "epoch": 2.53,
      "grad_norm": 3.706493377685547,
      "learning_rate": 3.487516336512688e-05,
      "loss": 2.9292,
      "step": 194688
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.7229385375976562,
      "learning_rate": 3.4873249168819884e-05,
      "loss": 2.7856,
      "step": 194689
    },
    {
      "epoch": 2.53,
      "grad_norm": 2.88942289352417,
      "learning_rate": 3.487133502180489e-05,
      "loss": 3.0757,
      "step": 194690
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4457836151123047,
      "learning_rate": 3.48694209240824e-05,
      "loss": 2.9763,
      "step": 194691
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.324235200881958,
      "learning_rate": 3.486750687565265e-05,
      "loss": 2.7545,
      "step": 194692
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7429373264312744,
      "learning_rate": 3.486559287651607e-05,
      "loss": 2.9402,
      "step": 194693
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9928674697875977,
      "learning_rate": 3.486367892667299e-05,
      "loss": 2.9518,
      "step": 194694
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.2422327995300293,
      "learning_rate": 3.4861765026123746e-05,
      "loss": 2.9697,
      "step": 194695
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.480196475982666,
      "learning_rate": 3.485985117486864e-05,
      "loss": 3.0091,
      "step": 194696
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8558506965637207,
      "learning_rate": 3.4857937372908164e-05,
      "loss": 2.9497,
      "step": 194697
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.153891086578369,
      "learning_rate": 3.485602362024252e-05,
      "loss": 2.9903,
      "step": 194698
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9777028560638428,
      "learning_rate": 3.485410991687222e-05,
      "loss": 2.9079,
      "step": 194699
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.586019992828369,
      "learning_rate": 3.485219626279758e-05,
      "loss": 3.0955,
      "step": 194700
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0895450115203857,
      "learning_rate": 3.485028265801881e-05,
      "loss": 2.9234,
      "step": 194701
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.559224843978882,
      "learning_rate": 3.4848369102536474e-05,
      "loss": 2.9675,
      "step": 194702
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.8992462158203125,
      "learning_rate": 3.484645559635081e-05,
      "loss": 3.0089,
      "step": 194703
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.8509345054626465,
      "learning_rate": 3.484454213946214e-05,
      "loss": 2.8575,
      "step": 194704
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.8783609867095947,
      "learning_rate": 3.484262873187094e-05,
      "loss": 2.9896,
      "step": 194705
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.712106466293335,
      "learning_rate": 3.48407153735775e-05,
      "loss": 2.9102,
      "step": 194706
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6309924125671387,
      "learning_rate": 3.483880206458213e-05,
      "loss": 2.9713,
      "step": 194707
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.394186019897461,
      "learning_rate": 3.483688880488526e-05,
      "loss": 2.9494,
      "step": 194708
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.893618106842041,
      "learning_rate": 3.483497559448725e-05,
      "loss": 2.9129,
      "step": 194709
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.729271173477173,
      "learning_rate": 3.483306243338834e-05,
      "loss": 3.0341,
      "step": 194710
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.663200855255127,
      "learning_rate": 3.483114932158902e-05,
      "loss": 2.6476,
      "step": 194711
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.9172446727752686,
      "learning_rate": 3.482923625908961e-05,
      "loss": 2.9574,
      "step": 194712
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.9058010578155518,
      "learning_rate": 3.482732324589039e-05,
      "loss": 2.8463,
      "step": 194713
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2075631618499756,
      "learning_rate": 3.482541028199184e-05,
      "loss": 3.1854,
      "step": 194714
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6962814331054688,
      "learning_rate": 3.482349736739421e-05,
      "loss": 2.9962,
      "step": 194715
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.46189022064209,
      "learning_rate": 3.4821584502097924e-05,
      "loss": 3.0437,
      "step": 194716
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.317761182785034,
      "learning_rate": 3.481967168610332e-05,
      "loss": 3.0463,
      "step": 194717
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.96938157081604,
      "learning_rate": 3.4817758919410756e-05,
      "loss": 2.9693,
      "step": 194718
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.954824686050415,
      "learning_rate": 3.4815846202020514e-05,
      "loss": 3.1081,
      "step": 194719
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.607652187347412,
      "learning_rate": 3.481393353393307e-05,
      "loss": 3.127,
      "step": 194720
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.198747634887695,
      "learning_rate": 3.481202091514865e-05,
      "loss": 2.7899,
      "step": 194721
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.939384698867798,
      "learning_rate": 3.4810108345667765e-05,
      "loss": 2.9601,
      "step": 194722
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.655533790588379,
      "learning_rate": 3.4808195825490605e-05,
      "loss": 2.9401,
      "step": 194723
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7300267219543457,
      "learning_rate": 3.4806283354617736e-05,
      "loss": 3.0142,
      "step": 194724
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0130629539489746,
      "learning_rate": 3.4804370933049265e-05,
      "loss": 3.0369,
      "step": 194725
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.894609212875366,
      "learning_rate": 3.4802458560785714e-05,
      "loss": 2.8382,
      "step": 194726
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4342257976531982,
      "learning_rate": 3.480054623782733e-05,
      "loss": 2.9735,
      "step": 194727
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7042276859283447,
      "learning_rate": 3.4798633964174635e-05,
      "loss": 3.0614,
      "step": 194728
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.647636890411377,
      "learning_rate": 3.479672173982777e-05,
      "loss": 2.9712,
      "step": 194729
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.057201623916626,
      "learning_rate": 3.4794809564787365e-05,
      "loss": 2.9034,
      "step": 194730
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.573150157928467,
      "learning_rate": 3.4792897439053445e-05,
      "loss": 2.9504,
      "step": 194731
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2012906074523926,
      "learning_rate": 3.479098536262662e-05,
      "loss": 2.8592,
      "step": 194732
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.8459973335266113,
      "learning_rate": 3.478907333550709e-05,
      "loss": 2.8736,
      "step": 194733
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8586769104003906,
      "learning_rate": 3.4787161357695317e-05,
      "loss": 3.1119,
      "step": 194734
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.5059988498687744,
      "learning_rate": 3.4785249429191595e-05,
      "loss": 2.6594,
      "step": 194735
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.243696451187134,
      "learning_rate": 3.4783337549996406e-05,
      "loss": 2.8764,
      "step": 194736
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.674853801727295,
      "learning_rate": 3.4781425720109865e-05,
      "loss": 2.9121,
      "step": 194737
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.550722599029541,
      "learning_rate": 3.477951393953252e-05,
      "loss": 2.8726,
      "step": 194738
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6883561611175537,
      "learning_rate": 3.4777602208264635e-05,
      "loss": 3.1784,
      "step": 194739
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.679777145385742,
      "learning_rate": 3.4775690526306665e-05,
      "loss": 2.9636,
      "step": 194740
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.662912368774414,
      "learning_rate": 3.477377889365882e-05,
      "loss": 2.9817,
      "step": 194741
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9898300170898438,
      "learning_rate": 3.477186731032167e-05,
      "loss": 2.9868,
      "step": 194742
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9954144954681396,
      "learning_rate": 3.47699557762953e-05,
      "loss": 2.8772,
      "step": 194743
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7211999893188477,
      "learning_rate": 3.476804429158029e-05,
      "loss": 3.0008,
      "step": 194744
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.099313259124756,
      "learning_rate": 3.4766132856176797e-05,
      "loss": 2.917,
      "step": 194745
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.688335418701172,
      "learning_rate": 3.4764221470085395e-05,
      "loss": 3.1156,
      "step": 194746
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.031401634216309,
      "learning_rate": 3.476231013330625e-05,
      "loss": 2.7575,
      "step": 194747
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.681196928024292,
      "learning_rate": 3.476039884583992e-05,
      "loss": 3.0472,
      "step": 194748
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8930864334106445,
      "learning_rate": 3.4758487607686516e-05,
      "loss": 2.9837,
      "step": 194749
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8103389739990234,
      "learning_rate": 3.475657641884659e-05,
      "loss": 2.9559,
      "step": 194750
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.737227439880371,
      "learning_rate": 3.475466527932036e-05,
      "loss": 2.7874,
      "step": 194751
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.933185338973999,
      "learning_rate": 3.4752754189108275e-05,
      "loss": 2.8786,
      "step": 194752
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.264277935028076,
      "learning_rate": 3.475084314821065e-05,
      "loss": 2.7357,
      "step": 194753
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8093018531799316,
      "learning_rate": 3.474893215662794e-05,
      "loss": 2.7132,
      "step": 194754
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6915812492370605,
      "learning_rate": 3.474702121436028e-05,
      "loss": 3.1804,
      "step": 194755
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0556585788726807,
      "learning_rate": 3.4745110321408245e-05,
      "loss": 3.2216,
      "step": 194756
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2149198055267334,
      "learning_rate": 3.4743199477772024e-05,
      "loss": 3.2177,
      "step": 194757
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.295212745666504,
      "learning_rate": 3.4741288683452114e-05,
      "loss": 2.9217,
      "step": 194758
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.01334810256958,
      "learning_rate": 3.473937793844873e-05,
      "loss": 2.9556,
      "step": 194759
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.54256534576416,
      "learning_rate": 3.473746724276243e-05,
      "loss": 2.9937,
      "step": 194760
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.528357982635498,
      "learning_rate": 3.473555659639334e-05,
      "loss": 2.9494,
      "step": 194761
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1339571475982666,
      "learning_rate": 3.473364599934197e-05,
      "loss": 2.9308,
      "step": 194762
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.777183771133423,
      "learning_rate": 3.473173545160855e-05,
      "loss": 3.0857,
      "step": 194763
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0244295597076416,
      "learning_rate": 3.4729824953193585e-05,
      "loss": 2.9938,
      "step": 194764
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.543407917022705,
      "learning_rate": 3.472791450409727e-05,
      "loss": 2.8865,
      "step": 194765
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.322006940841675,
      "learning_rate": 3.4726004104320195e-05,
      "loss": 3.0506,
      "step": 194766
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7300233840942383,
      "learning_rate": 3.47240937538624e-05,
      "loss": 3.1599,
      "step": 194767
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.665189504623413,
      "learning_rate": 3.472218345272449e-05,
      "loss": 3.0074,
      "step": 194768
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.925706148147583,
      "learning_rate": 3.472027320090667e-05,
      "loss": 2.6851,
      "step": 194769
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4075722694396973,
      "learning_rate": 3.471836299840942e-05,
      "loss": 2.7315,
      "step": 194770
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.986647367477417,
      "learning_rate": 3.471645284523299e-05,
      "loss": 2.8491,
      "step": 194771
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.966823101043701,
      "learning_rate": 3.471454274137781e-05,
      "loss": 2.9203,
      "step": 194772
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0823991298675537,
      "learning_rate": 3.4712632686844236e-05,
      "loss": 2.9876,
      "step": 194773
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.73124361038208,
      "learning_rate": 3.471072268163257e-05,
      "loss": 2.8667,
      "step": 194774
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9346306324005127,
      "learning_rate": 3.470881272574313e-05,
      "loss": 3.1467,
      "step": 194775
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9961812496185303,
      "learning_rate": 3.4706902819176395e-05,
      "loss": 3.1356,
      "step": 194776
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.870709180831909,
      "learning_rate": 3.470499296193258e-05,
      "loss": 2.9971,
      "step": 194777
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8040974140167236,
      "learning_rate": 3.470308315401217e-05,
      "loss": 3.0401,
      "step": 194778
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.5495362281799316,
      "learning_rate": 3.4701173395415504e-05,
      "loss": 2.799,
      "step": 194779
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.007628440856934,
      "learning_rate": 3.4699263686142854e-05,
      "loss": 2.949,
      "step": 194780
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0230872631073,
      "learning_rate": 3.469735402619458e-05,
      "loss": 2.921,
      "step": 194781
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9411332607269287,
      "learning_rate": 3.469544441557115e-05,
      "loss": 2.8784,
      "step": 194782
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7131426334381104,
      "learning_rate": 3.4693534854272764e-05,
      "loss": 2.9419,
      "step": 194783
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0985922813415527,
      "learning_rate": 3.4691625342299926e-05,
      "loss": 2.8777,
      "step": 194784
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0170369148254395,
      "learning_rate": 3.468971587965293e-05,
      "loss": 2.8431,
      "step": 194785
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.777205228805542,
      "learning_rate": 3.468780646633204e-05,
      "loss": 2.99,
      "step": 194786
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9463069438934326,
      "learning_rate": 3.468589710233776e-05,
      "loss": 2.9533,
      "step": 194787
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1719212532043457,
      "learning_rate": 3.4683987787670384e-05,
      "loss": 2.8251,
      "step": 194788
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8784730434417725,
      "learning_rate": 3.4682078522330224e-05,
      "loss": 3.1678,
      "step": 194789
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4936532974243164,
      "learning_rate": 3.4680169306317704e-05,
      "loss": 3.0018,
      "step": 194790
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.25677490234375,
      "learning_rate": 3.4678260139633165e-05,
      "loss": 2.8625,
      "step": 194791
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.207466125488281,
      "learning_rate": 3.46763510222769e-05,
      "loss": 3.0306,
      "step": 194792
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2297401428222656,
      "learning_rate": 3.4674441954249334e-05,
      "loss": 2.9692,
      "step": 194793
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0867362022399902,
      "learning_rate": 3.467253293555081e-05,
      "loss": 2.9187,
      "step": 194794
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.527620553970337,
      "learning_rate": 3.467062396618163e-05,
      "loss": 2.8493,
      "step": 194795
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.42551851272583,
      "learning_rate": 3.4668715046142225e-05,
      "loss": 2.9557,
      "step": 194796
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.000609397888184,
      "learning_rate": 3.4666806175432925e-05,
      "loss": 3.1323,
      "step": 194797
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.614126443862915,
      "learning_rate": 3.466489735405403e-05,
      "loss": 2.9022,
      "step": 194798
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8749730587005615,
      "learning_rate": 3.466298858200598e-05,
      "loss": 2.8035,
      "step": 194799
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.1408796310424805,
      "learning_rate": 3.4661079859289035e-05,
      "loss": 2.671,
      "step": 194800
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.225158214569092,
      "learning_rate": 3.4659171185903656e-05,
      "loss": 2.613,
      "step": 194801
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.249918222427368,
      "learning_rate": 3.4657262561850154e-05,
      "loss": 3.01,
      "step": 194802
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2778820991516113,
      "learning_rate": 3.465535398712889e-05,
      "loss": 3.2534,
      "step": 194803
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.246776580810547,
      "learning_rate": 3.4653445461740095e-05,
      "loss": 2.9704,
      "step": 194804
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.121032238006592,
      "learning_rate": 3.4651536985684337e-05,
      "loss": 3.0753,
      "step": 194805
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.8101072311401367,
      "learning_rate": 3.464962855896178e-05,
      "loss": 3.0017,
      "step": 194806
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.093384265899658,
      "learning_rate": 3.464772018157297e-05,
      "loss": 3.0193,
      "step": 194807
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.063133716583252,
      "learning_rate": 3.4645811853518126e-05,
      "loss": 2.9464,
      "step": 194808
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.4805707931518555,
      "learning_rate": 3.4643903574797646e-05,
      "loss": 2.7527,
      "step": 194809
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.780059814453125,
      "learning_rate": 3.464199534541178e-05,
      "loss": 3.1445,
      "step": 194810
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6271185874938965,
      "learning_rate": 3.464008716536108e-05,
      "loss": 2.7675,
      "step": 194811
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.7531840801239014,
      "learning_rate": 3.4638179034645707e-05,
      "loss": 2.7748,
      "step": 194812
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0596117973327637,
      "learning_rate": 3.4636270953266177e-05,
      "loss": 2.9143,
      "step": 194813
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.84509539604187,
      "learning_rate": 3.463436292122271e-05,
      "loss": 2.8364,
      "step": 194814
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.002399444580078,
      "learning_rate": 3.463245493851585e-05,
      "loss": 2.8072,
      "step": 194815
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2071080207824707,
      "learning_rate": 3.463054700514573e-05,
      "loss": 2.7904,
      "step": 194816
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2371888160705566,
      "learning_rate": 3.46286391211128e-05,
      "loss": 2.941,
      "step": 194817
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9254441261291504,
      "learning_rate": 3.462673128641741e-05,
      "loss": 2.9504,
      "step": 194818
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8360607624053955,
      "learning_rate": 3.462482350105995e-05,
      "loss": 3.1838,
      "step": 194819
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.072251796722412,
      "learning_rate": 3.462291576504069e-05,
      "loss": 3.0498,
      "step": 194820
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.852905035018921,
      "learning_rate": 3.462100807836017e-05,
      "loss": 2.9063,
      "step": 194821
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6087639331817627,
      "learning_rate": 3.461910044101848e-05,
      "loss": 3.0406,
      "step": 194822
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6328659057617188,
      "learning_rate": 3.461719285301619e-05,
      "loss": 2.7632,
      "step": 194823
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9442803859710693,
      "learning_rate": 3.461528531435349e-05,
      "loss": 2.9232,
      "step": 194824
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.5354416370391846,
      "learning_rate": 3.4613377825030894e-05,
      "loss": 2.8855,
      "step": 194825
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.209855794906616,
      "learning_rate": 3.46114703850486e-05,
      "loss": 3.2267,
      "step": 194826
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.313627004623413,
      "learning_rate": 3.4609562994407204e-05,
      "loss": 2.754,
      "step": 194827
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1377692222595215,
      "learning_rate": 3.4607655653106734e-05,
      "loss": 2.7982,
      "step": 194828
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9636917114257812,
      "learning_rate": 3.46057483611478e-05,
      "loss": 3.051,
      "step": 194829
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.447645425796509,
      "learning_rate": 3.460384111853059e-05,
      "loss": 2.9687,
      "step": 194830
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7811825275421143,
      "learning_rate": 3.460193392525559e-05,
      "loss": 3.0434,
      "step": 194831
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.831547498703003,
      "learning_rate": 3.460002678132308e-05,
      "loss": 3.0395,
      "step": 194832
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1982033252716064,
      "learning_rate": 3.459811968673353e-05,
      "loss": 2.8731,
      "step": 194833
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0276448726654053,
      "learning_rate": 3.459621264148707e-05,
      "loss": 3.0656,
      "step": 194834
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.000790596008301,
      "learning_rate": 3.4594305645584256e-05,
      "loss": 2.8856,
      "step": 194835
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1974680423736572,
      "learning_rate": 3.459239869902529e-05,
      "loss": 3.1201,
      "step": 194836
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.021898031234741,
      "learning_rate": 3.4590491801810694e-05,
      "loss": 2.874,
      "step": 194837
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9448506832122803,
      "learning_rate": 3.4588584953940654e-05,
      "loss": 3.0382,
      "step": 194838
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.330860137939453,
      "learning_rate": 3.4586678155415684e-05,
      "loss": 2.8135,
      "step": 194839
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7392094135284424,
      "learning_rate": 3.4584771406236034e-05,
      "loss": 2.9332,
      "step": 194840
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.516784906387329,
      "learning_rate": 3.458286470640211e-05,
      "loss": 2.9608,
      "step": 194841
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.288325309753418,
      "learning_rate": 3.458095805591419e-05,
      "loss": 2.8934,
      "step": 194842
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.945655107498169,
      "learning_rate": 3.457905145477272e-05,
      "loss": 2.9213,
      "step": 194843
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8515002727508545,
      "learning_rate": 3.457714490297795e-05,
      "loss": 2.8094,
      "step": 194844
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0734102725982666,
      "learning_rate": 3.457523840053037e-05,
      "loss": 2.911,
      "step": 194845
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.7200357913970947,
      "learning_rate": 3.457333194743025e-05,
      "loss": 2.8174,
      "step": 194846
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.5731778144836426,
      "learning_rate": 3.457142554367799e-05,
      "loss": 2.9843,
      "step": 194847
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.299543857574463,
      "learning_rate": 3.456951918927382e-05,
      "loss": 2.8944,
      "step": 194848
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.264281272888184,
      "learning_rate": 3.456761288421827e-05,
      "loss": 2.8721,
      "step": 194849
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.134206771850586,
      "learning_rate": 3.456570662851152e-05,
      "loss": 2.9463,
      "step": 194850
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9081811904907227,
      "learning_rate": 3.4563800422154095e-05,
      "loss": 3.0622,
      "step": 194851
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.78949236869812,
      "learning_rate": 3.456189426514629e-05,
      "loss": 3.0497,
      "step": 194852
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4561145305633545,
      "learning_rate": 3.455998815748842e-05,
      "loss": 2.7216,
      "step": 194853
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0152130126953125,
      "learning_rate": 3.4558082099180797e-05,
      "loss": 3.0212,
      "step": 194854
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6670403480529785,
      "learning_rate": 3.45561760902239e-05,
      "loss": 2.9564,
      "step": 194855
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9044837951660156,
      "learning_rate": 3.4554270130617956e-05,
      "loss": 2.9091,
      "step": 194856
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.234844446182251,
      "learning_rate": 3.455236422036344e-05,
      "loss": 3.0843,
      "step": 194857
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.278071880340576,
      "learning_rate": 3.455045835946068e-05,
      "loss": 2.8193,
      "step": 194858
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0219783782958984,
      "learning_rate": 3.454855254790997e-05,
      "loss": 2.8738,
      "step": 194859
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0020580291748047,
      "learning_rate": 3.454664678571165e-05,
      "loss": 2.966,
      "step": 194860
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9760401248931885,
      "learning_rate": 3.454474107286616e-05,
      "loss": 3.0134,
      "step": 194861
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6793642044067383,
      "learning_rate": 3.454283540937378e-05,
      "loss": 2.8209,
      "step": 194862
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2581686973571777,
      "learning_rate": 3.454092979523493e-05,
      "loss": 2.9379,
      "step": 194863
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.468688726425171,
      "learning_rate": 3.4539024230449965e-05,
      "loss": 2.9399,
      "step": 194864
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.240403890609741,
      "learning_rate": 3.4537118715019184e-05,
      "loss": 2.7258,
      "step": 194865
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.074646472930908,
      "learning_rate": 3.453521324894289e-05,
      "loss": 2.9819,
      "step": 194866
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1569056510925293,
      "learning_rate": 3.453330783222159e-05,
      "loss": 2.9571,
      "step": 194867
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.7776684761047363,
      "learning_rate": 3.453140246485551e-05,
      "loss": 2.674,
      "step": 194868
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.841278076171875,
      "learning_rate": 3.4529497146845075e-05,
      "loss": 3.2587,
      "step": 194869
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.817920684814453,
      "learning_rate": 3.452759187819066e-05,
      "loss": 2.9379,
      "step": 194870
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9926998615264893,
      "learning_rate": 3.452568665889257e-05,
      "loss": 2.8996,
      "step": 194871
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.9205706119537354,
      "learning_rate": 3.4523781488951095e-05,
      "loss": 2.9328,
      "step": 194872
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.362086057662964,
      "learning_rate": 3.452187636836671e-05,
      "loss": 2.8834,
      "step": 194873
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.036250352859497,
      "learning_rate": 3.451997129713967e-05,
      "loss": 2.7219,
      "step": 194874
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6616156101226807,
      "learning_rate": 3.451806627527045e-05,
      "loss": 2.9809,
      "step": 194875
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.757105827331543,
      "learning_rate": 3.4516161302759316e-05,
      "loss": 3.0529,
      "step": 194876
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.3005142211914062,
      "learning_rate": 3.45142563796066e-05,
      "loss": 2.982,
      "step": 194877
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.926431179046631,
      "learning_rate": 3.451235150581273e-05,
      "loss": 2.9298,
      "step": 194878
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7325193881988525,
      "learning_rate": 3.451044668137804e-05,
      "loss": 2.6613,
      "step": 194879
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.662362813949585,
      "learning_rate": 3.450854190630278e-05,
      "loss": 2.8802,
      "step": 194880
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.978536367416382,
      "learning_rate": 3.450663718058749e-05,
      "loss": 2.9049,
      "step": 194881
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9629714488983154,
      "learning_rate": 3.450473250423242e-05,
      "loss": 2.9228,
      "step": 194882
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.552924156188965,
      "learning_rate": 3.450282787723787e-05,
      "loss": 2.9185,
      "step": 194883
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.778329610824585,
      "learning_rate": 3.45009232996043e-05,
      "loss": 3.0137,
      "step": 194884
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6734459400177,
      "learning_rate": 3.449901877133198e-05,
      "loss": 2.9223,
      "step": 194885
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1609129905700684,
      "learning_rate": 3.449711429242137e-05,
      "loss": 2.9924,
      "step": 194886
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.517799139022827,
      "learning_rate": 3.449520986287274e-05,
      "loss": 2.9949,
      "step": 194887
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.861841917037964,
      "learning_rate": 3.449330548268646e-05,
      "loss": 3.0337,
      "step": 194888
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7654900550842285,
      "learning_rate": 3.449140115186283e-05,
      "loss": 2.9163,
      "step": 194889
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7094249725341797,
      "learning_rate": 3.448949687040235e-05,
      "loss": 3.1595,
      "step": 194890
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0168635845184326,
      "learning_rate": 3.4487592638305174e-05,
      "loss": 2.945,
      "step": 194891
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.440763235092163,
      "learning_rate": 3.448568845557188e-05,
      "loss": 2.7422,
      "step": 194892
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4208977222442627,
      "learning_rate": 3.4483784322202666e-05,
      "loss": 3.0154,
      "step": 194893
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.056417226791382,
      "learning_rate": 3.4481880238197934e-05,
      "loss": 2.9086,
      "step": 194894
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7461068630218506,
      "learning_rate": 3.447997620355798e-05,
      "loss": 2.7056,
      "step": 194895
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.982633113861084,
      "learning_rate": 3.4478072218283306e-05,
      "loss": 3.0141,
      "step": 194896
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.602267265319824,
      "learning_rate": 3.447616828237407e-05,
      "loss": 3.1136,
      "step": 194897
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0847392082214355,
      "learning_rate": 3.4474264395830786e-05,
      "loss": 2.8253,
      "step": 194898
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.6121020317077637,
      "learning_rate": 3.447236055865368e-05,
      "loss": 3.036,
      "step": 194899
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9181699752807617,
      "learning_rate": 3.447045677084331e-05,
      "loss": 2.9511,
      "step": 194900
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1197667121887207,
      "learning_rate": 3.446855303239979e-05,
      "loss": 2.7989,
      "step": 194901
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1052892208099365,
      "learning_rate": 3.4466649343323616e-05,
      "loss": 2.7777,
      "step": 194902
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.5255563259124756,
      "learning_rate": 3.446474570361505e-05,
      "loss": 3.043,
      "step": 194903
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9663925170898438,
      "learning_rate": 3.4462842113274556e-05,
      "loss": 2.9554,
      "step": 194904
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.164571762084961,
      "learning_rate": 3.4460938572302376e-05,
      "loss": 2.9368,
      "step": 194905
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.392585515975952,
      "learning_rate": 3.4459035080699e-05,
      "loss": 3.0261,
      "step": 194906
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4476559162139893,
      "learning_rate": 3.4457131638464674e-05,
      "loss": 3.012,
      "step": 194907
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.126835823059082,
      "learning_rate": 3.445522824559978e-05,
      "loss": 2.8407,
      "step": 194908
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.288881301879883,
      "learning_rate": 3.445332490210464e-05,
      "loss": 3.0301,
      "step": 194909
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9525554180145264,
      "learning_rate": 3.445142160797967e-05,
      "loss": 2.976,
      "step": 194910
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1055023670196533,
      "learning_rate": 3.4449518363225135e-05,
      "loss": 2.7071,
      "step": 194911
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.918889284133911,
      "learning_rate": 3.4447615167841516e-05,
      "loss": 3.116,
      "step": 194912
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.43603515625,
      "learning_rate": 3.44457120218291e-05,
      "loss": 2.8164,
      "step": 194913
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.9973204135894775,
      "learning_rate": 3.4443808925188256e-05,
      "loss": 2.9798,
      "step": 194914
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.843438148498535,
      "learning_rate": 3.444190587791922e-05,
      "loss": 3.0749,
      "step": 194915
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.6411383152008057,
      "learning_rate": 3.4440002880022523e-05,
      "loss": 2.9851,
      "step": 194916
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0368547439575195,
      "learning_rate": 3.44380999314984e-05,
      "loss": 2.9399,
      "step": 194917
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.491123914718628,
      "learning_rate": 3.443619703234728e-05,
      "loss": 3.0087,
      "step": 194918
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.34123420715332,
      "learning_rate": 3.44342941825695e-05,
      "loss": 2.8361,
      "step": 194919
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1476950645446777,
      "learning_rate": 3.443239138216539e-05,
      "loss": 2.6289,
      "step": 194920
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.128709077835083,
      "learning_rate": 3.4430488631135256e-05,
      "loss": 3.0976,
      "step": 194921
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8470051288604736,
      "learning_rate": 3.4428585929479594e-05,
      "loss": 2.7968,
      "step": 194922
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.752737045288086,
      "learning_rate": 3.4426683277198566e-05,
      "loss": 3.103,
      "step": 194923
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.304085731506348,
      "learning_rate": 3.442478067429271e-05,
      "loss": 2.6997,
      "step": 194924
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4078078269958496,
      "learning_rate": 3.442287812076232e-05,
      "loss": 2.985,
      "step": 194925
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.206116199493408,
      "learning_rate": 3.44209756166077e-05,
      "loss": 3.1587,
      "step": 194926
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.537686824798584,
      "learning_rate": 3.441907316182919e-05,
      "loss": 3.0847,
      "step": 194927
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1138510704040527,
      "learning_rate": 3.441717075642725e-05,
      "loss": 2.8502,
      "step": 194928
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.81811785697937,
      "learning_rate": 3.441526840040207e-05,
      "loss": 2.5436,
      "step": 194929
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.682105541229248,
      "learning_rate": 3.44133660937542e-05,
      "loss": 2.9874,
      "step": 194930
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.653095006942749,
      "learning_rate": 3.44114638364839e-05,
      "loss": 2.8867,
      "step": 194931
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.647787570953369,
      "learning_rate": 3.440956162859153e-05,
      "loss": 2.8789,
      "step": 194932
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1772632598876953,
      "learning_rate": 3.440765947007737e-05,
      "loss": 3.012,
      "step": 194933
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6071012020111084,
      "learning_rate": 3.44057573609419e-05,
      "loss": 2.9834,
      "step": 194934
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.952277183532715,
      "learning_rate": 3.4403855301185334e-05,
      "loss": 2.78,
      "step": 194935
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7425692081451416,
      "learning_rate": 3.440195329080817e-05,
      "loss": 2.9777,
      "step": 194936
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.40903902053833,
      "learning_rate": 3.4400051329810705e-05,
      "loss": 2.982,
      "step": 194937
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.297900915145874,
      "learning_rate": 3.439814941819328e-05,
      "loss": 3.0222,
      "step": 194938
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.972445011138916,
      "learning_rate": 3.439624755595621e-05,
      "loss": 2.9288,
      "step": 194939
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7781336307525635,
      "learning_rate": 3.439434574309995e-05,
      "loss": 2.7816,
      "step": 194940
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.461306095123291,
      "learning_rate": 3.439244397962472e-05,
      "loss": 3.0,
      "step": 194941
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0174663066864014,
      "learning_rate": 3.439054226553098e-05,
      "loss": 3.061,
      "step": 194942
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8879294395446777,
      "learning_rate": 3.4388640600819085e-05,
      "loss": 3.097,
      "step": 194943
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.67348575592041,
      "learning_rate": 3.4386738985489384e-05,
      "loss": 2.7658,
      "step": 194944
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0915699005126953,
      "learning_rate": 3.438483741954211e-05,
      "loss": 3.2156,
      "step": 194945
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.080348253250122,
      "learning_rate": 3.438293590297777e-05,
      "loss": 2.7319,
      "step": 194946
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.881612777709961,
      "learning_rate": 3.4381034435796596e-05,
      "loss": 2.7249,
      "step": 194947
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7532706260681152,
      "learning_rate": 3.437913301799905e-05,
      "loss": 2.9488,
      "step": 194948
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.874357223510742,
      "learning_rate": 3.437723164958543e-05,
      "loss": 2.8354,
      "step": 194949
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.660635471343994,
      "learning_rate": 3.437533033055612e-05,
      "loss": 3.1686,
      "step": 194950
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4276299476623535,
      "learning_rate": 3.43734290609114e-05,
      "loss": 2.9633,
      "step": 194951
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.196593999862671,
      "learning_rate": 3.43715278406517e-05,
      "loss": 2.8451,
      "step": 194952
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7474827766418457,
      "learning_rate": 3.4369626669777305e-05,
      "loss": 2.9764,
      "step": 194953
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.074814558029175,
      "learning_rate": 3.436772554828867e-05,
      "loss": 2.9323,
      "step": 194954
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1887760162353516,
      "learning_rate": 3.43658244761861e-05,
      "loss": 3.14,
      "step": 194955
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7029261589050293,
      "learning_rate": 3.4363923453469925e-05,
      "loss": 2.8046,
      "step": 194956
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9298694133758545,
      "learning_rate": 3.436202248014047e-05,
      "loss": 2.8916,
      "step": 194957
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7913219928741455,
      "learning_rate": 3.436012155619818e-05,
      "loss": 2.9713,
      "step": 194958
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.734555959701538,
      "learning_rate": 3.435822068164329e-05,
      "loss": 2.7209,
      "step": 194959
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.8757009506225586,
      "learning_rate": 3.4356319856476254e-05,
      "loss": 3.0645,
      "step": 194960
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.938908576965332,
      "learning_rate": 3.435441908069745e-05,
      "loss": 3.0003,
      "step": 194961
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.014551162719727,
      "learning_rate": 3.435251835430707e-05,
      "loss": 3.1465,
      "step": 194962
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.922520637512207,
      "learning_rate": 3.435061767730565e-05,
      "loss": 3.0681,
      "step": 194963
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.596883773803711,
      "learning_rate": 3.434871704969345e-05,
      "loss": 2.8046,
      "step": 194964
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.200500249862671,
      "learning_rate": 3.434681647147078e-05,
      "loss": 3.034,
      "step": 194965
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.085387706756592,
      "learning_rate": 3.4344915942638144e-05,
      "loss": 2.8778,
      "step": 194966
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7648115158081055,
      "learning_rate": 3.434301546319579e-05,
      "loss": 2.9203,
      "step": 194967
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.8755743503570557,
      "learning_rate": 3.4341115033143996e-05,
      "loss": 2.8664,
      "step": 194968
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.779141664505005,
      "learning_rate": 3.433921465248329e-05,
      "loss": 2.8788,
      "step": 194969
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.5543935298919678,
      "learning_rate": 3.4337314321213914e-05,
      "loss": 2.8301,
      "step": 194970
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.496323347091675,
      "learning_rate": 3.43354140393362e-05,
      "loss": 2.9694,
      "step": 194971
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.8946101665496826,
      "learning_rate": 3.43335138068506e-05,
      "loss": 2.9389,
      "step": 194972
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.408198118209839,
      "learning_rate": 3.433161362375739e-05,
      "loss": 2.8425,
      "step": 194973
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2132911682128906,
      "learning_rate": 3.4329713490056976e-05,
      "loss": 2.942,
      "step": 194974
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.4158475399017334,
      "learning_rate": 3.432781340574968e-05,
      "loss": 2.9681,
      "step": 194975
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.293009042739868,
      "learning_rate": 3.4325913370835813e-05,
      "loss": 3.1482,
      "step": 194976
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8238577842712402,
      "learning_rate": 3.432401338531583e-05,
      "loss": 2.9522,
      "step": 194977
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6957201957702637,
      "learning_rate": 3.4322113449190005e-05,
      "loss": 2.8429,
      "step": 194978
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9212288856506348,
      "learning_rate": 3.4320213562458696e-05,
      "loss": 2.9551,
      "step": 194979
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.5534021854400635,
      "learning_rate": 3.4318313725122316e-05,
      "loss": 2.9762,
      "step": 194980
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.159703493118286,
      "learning_rate": 3.431641393718115e-05,
      "loss": 3.0554,
      "step": 194981
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.762532949447632,
      "learning_rate": 3.4314514198635545e-05,
      "loss": 3.1244,
      "step": 194982
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7877020835876465,
      "learning_rate": 3.431261450948596e-05,
      "loss": 2.8792,
      "step": 194983
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.038879156112671,
      "learning_rate": 3.431071486973259e-05,
      "loss": 3.11,
      "step": 194984
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.874103307723999,
      "learning_rate": 3.430881527937594e-05,
      "loss": 2.7315,
      "step": 194985
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1290781497955322,
      "learning_rate": 3.430691573841628e-05,
      "loss": 2.8463,
      "step": 194986
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.747666597366333,
      "learning_rate": 3.430501624685401e-05,
      "loss": 2.6841,
      "step": 194987
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.820481300354004,
      "learning_rate": 3.430311680468939e-05,
      "loss": 2.9509,
      "step": 194988
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.503396987915039,
      "learning_rate": 3.4301217411922886e-05,
      "loss": 2.9833,
      "step": 194989
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4371554851531982,
      "learning_rate": 3.4299318068554735e-05,
      "loss": 2.8459,
      "step": 194990
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.133392572402954,
      "learning_rate": 3.429741877458544e-05,
      "loss": 3.0384,
      "step": 194991
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7108278274536133,
      "learning_rate": 3.429551953001526e-05,
      "loss": 2.9069,
      "step": 194992
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1089940071105957,
      "learning_rate": 3.429362033484453e-05,
      "loss": 2.773,
      "step": 194993
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.9379563331604004,
      "learning_rate": 3.429172118907358e-05,
      "loss": 2.9733,
      "step": 194994
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.095522880554199,
      "learning_rate": 3.428982209270292e-05,
      "loss": 2.9641,
      "step": 194995
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8576345443725586,
      "learning_rate": 3.428792304573268e-05,
      "loss": 2.8905,
      "step": 194996
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7992420196533203,
      "learning_rate": 3.428602404816342e-05,
      "loss": 2.842,
      "step": 194997
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.681478500366211,
      "learning_rate": 3.4284125099995405e-05,
      "loss": 2.9853,
      "step": 194998
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0738089084625244,
      "learning_rate": 3.428222620122898e-05,
      "loss": 2.9122,
      "step": 194999
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.17121696472168,
      "learning_rate": 3.4280327351864464e-05,
      "loss": 2.9008,
      "step": 195000
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.5845093727111816,
      "learning_rate": 3.4278428551902305e-05,
      "loss": 3.1612,
      "step": 195001
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.633559465408325,
      "learning_rate": 3.4276529801342725e-05,
      "loss": 2.9031,
      "step": 195002
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.918700695037842,
      "learning_rate": 3.427463110018622e-05,
      "loss": 2.7443,
      "step": 195003
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.06058406829834,
      "learning_rate": 3.427273244843307e-05,
      "loss": 2.8686,
      "step": 195004
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8385791778564453,
      "learning_rate": 3.4270833846083634e-05,
      "loss": 2.7459,
      "step": 195005
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0437397956848145,
      "learning_rate": 3.426893529313821e-05,
      "loss": 3.2883,
      "step": 195006
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1512861251831055,
      "learning_rate": 3.4267036789597304e-05,
      "loss": 2.8767,
      "step": 195007
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.837862014770508,
      "learning_rate": 3.426513833546107e-05,
      "loss": 2.8209,
      "step": 195008
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9953722953796387,
      "learning_rate": 3.426323993073002e-05,
      "loss": 3.1101,
      "step": 195009
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8393259048461914,
      "learning_rate": 3.426134157540445e-05,
      "loss": 2.9651,
      "step": 195010
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.135309219360352,
      "learning_rate": 3.4259443269484735e-05,
      "loss": 3.2323,
      "step": 195011
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6304287910461426,
      "learning_rate": 3.4257545012971156e-05,
      "loss": 2.7531,
      "step": 195012
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.421661376953125,
      "learning_rate": 3.4255646805864155e-05,
      "loss": 3.168,
      "step": 195013
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.8817763328552246,
      "learning_rate": 3.425374864816397e-05,
      "loss": 2.557,
      "step": 195014
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.226499319076538,
      "learning_rate": 3.425185053987109e-05,
      "loss": 2.8294,
      "step": 195015
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.462329387664795,
      "learning_rate": 3.424995248098583e-05,
      "loss": 2.6636,
      "step": 195016
    },
    {
      "epoch": 2.54,
      "grad_norm": 5.460646152496338,
      "learning_rate": 3.4248054471508516e-05,
      "loss": 2.9253,
      "step": 195017
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.513925790786743,
      "learning_rate": 3.424615651143944e-05,
      "loss": 3.1061,
      "step": 195018
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.631639242172241,
      "learning_rate": 3.424425860077911e-05,
      "loss": 2.7676,
      "step": 195019
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.073770046234131,
      "learning_rate": 3.424236073952769e-05,
      "loss": 3.0015,
      "step": 195020
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7756481170654297,
      "learning_rate": 3.4240462927685705e-05,
      "loss": 3.0294,
      "step": 195021
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8777506351470947,
      "learning_rate": 3.4238565165253437e-05,
      "loss": 2.9399,
      "step": 195022
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8199896812438965,
      "learning_rate": 3.4236667452231216e-05,
      "loss": 2.8218,
      "step": 195023
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8351199626922607,
      "learning_rate": 3.423476978861937e-05,
      "loss": 2.8039,
      "step": 195024
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7541959285736084,
      "learning_rate": 3.423287217441836e-05,
      "loss": 2.9668,
      "step": 195025
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4546420574188232,
      "learning_rate": 3.423097460962842e-05,
      "loss": 2.9767,
      "step": 195026
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.977871894836426,
      "learning_rate": 3.4229077094250004e-05,
      "loss": 3.0118,
      "step": 195027
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.546292304992676,
      "learning_rate": 3.4227179628283455e-05,
      "loss": 3.0245,
      "step": 195028
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.076871395111084,
      "learning_rate": 3.422528221172904e-05,
      "loss": 2.7051,
      "step": 195029
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.791625499725342,
      "learning_rate": 3.422338484458714e-05,
      "loss": 2.9417,
      "step": 195030
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.556807279586792,
      "learning_rate": 3.422148752685818e-05,
      "loss": 2.9652,
      "step": 195031
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4206631183624268,
      "learning_rate": 3.4219590258542393e-05,
      "loss": 2.755,
      "step": 195032
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.8248445987701416,
      "learning_rate": 3.421769303964025e-05,
      "loss": 2.7439,
      "step": 195033
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.4953384399414062,
      "learning_rate": 3.421579587015207e-05,
      "loss": 2.9092,
      "step": 195034
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.774812698364258,
      "learning_rate": 3.4213898750078204e-05,
      "loss": 3.0965,
      "step": 195035
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.791494607925415,
      "learning_rate": 3.421200167941891e-05,
      "loss": 3.089,
      "step": 195036
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.474635124206543,
      "learning_rate": 3.4210104658174684e-05,
      "loss": 3.1593,
      "step": 195037
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6869688034057617,
      "learning_rate": 3.420820768634577e-05,
      "loss": 2.7388,
      "step": 195038
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.738328695297241,
      "learning_rate": 3.4206310763932624e-05,
      "loss": 2.6668,
      "step": 195039
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9872169494628906,
      "learning_rate": 3.420441389093548e-05,
      "loss": 2.8348,
      "step": 195040
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.231086254119873,
      "learning_rate": 3.420251706735487e-05,
      "loss": 2.9267,
      "step": 195041
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2961668968200684,
      "learning_rate": 3.420062029319091e-05,
      "loss": 2.9497,
      "step": 195042
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.7880008220672607,
      "learning_rate": 3.419872356844414e-05,
      "loss": 3.1041,
      "step": 195043
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.925520181655884,
      "learning_rate": 3.419682689311478e-05,
      "loss": 2.8404,
      "step": 195044
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.7645254135131836,
      "learning_rate": 3.419493026720329e-05,
      "loss": 2.9887,
      "step": 195045
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.6450037956237793,
      "learning_rate": 3.419303369070993e-05,
      "loss": 3.09,
      "step": 195046
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.6212451457977295,
      "learning_rate": 3.4191137163635185e-05,
      "loss": 2.8662,
      "step": 195047
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.050873756408691,
      "learning_rate": 3.41892406859793e-05,
      "loss": 3.1234,
      "step": 195048
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.294645071029663,
      "learning_rate": 3.418734425774266e-05,
      "loss": 3.0323,
      "step": 195049
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.989180088043213,
      "learning_rate": 3.418544787892554e-05,
      "loss": 3.0602,
      "step": 195050
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0661520957946777,
      "learning_rate": 3.418355154952843e-05,
      "loss": 2.6738,
      "step": 195051
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6238210201263428,
      "learning_rate": 3.4181655269551554e-05,
      "loss": 2.916,
      "step": 195052
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.044450521469116,
      "learning_rate": 3.4179759038995414e-05,
      "loss": 3.1592,
      "step": 195053
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9276275634765625,
      "learning_rate": 3.417786285786024e-05,
      "loss": 3.0998,
      "step": 195054
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.9906418323516846,
      "learning_rate": 3.417596672614643e-05,
      "loss": 3.0716,
      "step": 195055
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.6575348377227783,
      "learning_rate": 3.417407064385426e-05,
      "loss": 2.8157,
      "step": 195056
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.987061023712158,
      "learning_rate": 3.417217461098422e-05,
      "loss": 2.7999,
      "step": 195057
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.3448562622070312,
      "learning_rate": 3.417027862753654e-05,
      "loss": 3.1237,
      "step": 195058
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.3807244300842285,
      "learning_rate": 3.416838269351166e-05,
      "loss": 3.0008,
      "step": 195059
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.435042858123779,
      "learning_rate": 3.4166486808909886e-05,
      "loss": 2.9826,
      "step": 195060
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9346179962158203,
      "learning_rate": 3.416459097373154e-05,
      "loss": 3.0286,
      "step": 195061
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.503321886062622,
      "learning_rate": 3.41626951879771e-05,
      "loss": 2.9597,
      "step": 195062
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7261528968811035,
      "learning_rate": 3.416079945164678e-05,
      "loss": 2.9494,
      "step": 195063
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0318069458007812,
      "learning_rate": 3.415890376474096e-05,
      "loss": 2.8488,
      "step": 195064
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9052557945251465,
      "learning_rate": 3.415700812726008e-05,
      "loss": 3.0908,
      "step": 195065
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.667404890060425,
      "learning_rate": 3.415511253920439e-05,
      "loss": 2.8559,
      "step": 195066
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.4770801067352295,
      "learning_rate": 3.4153217000574265e-05,
      "loss": 2.9693,
      "step": 195067
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9926095008850098,
      "learning_rate": 3.415132151137013e-05,
      "loss": 2.9561,
      "step": 195068
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7488958835601807,
      "learning_rate": 3.41494260715922e-05,
      "loss": 2.7882,
      "step": 195069
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8306326866149902,
      "learning_rate": 3.4147530681241e-05,
      "loss": 2.9855,
      "step": 195070
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.3199949264526367,
      "learning_rate": 3.414563534031676e-05,
      "loss": 2.8369,
      "step": 195071
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.118626117706299,
      "learning_rate": 3.414374004881988e-05,
      "loss": 3.1305,
      "step": 195072
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6868863105773926,
      "learning_rate": 3.414184480675066e-05,
      "loss": 2.6083,
      "step": 195073
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.199964761734009,
      "learning_rate": 3.413994961410951e-05,
      "loss": 3.0644,
      "step": 195074
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8315062522888184,
      "learning_rate": 3.413805447089672e-05,
      "loss": 3.0798,
      "step": 195075
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.731013536453247,
      "learning_rate": 3.413615937711275e-05,
      "loss": 2.9188,
      "step": 195076
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.8009893894195557,
      "learning_rate": 3.4134264332757873e-05,
      "loss": 2.988,
      "step": 195077
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0497124195098877,
      "learning_rate": 3.4132369337832464e-05,
      "loss": 2.946,
      "step": 195078
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8126938343048096,
      "learning_rate": 3.4130474392336815e-05,
      "loss": 2.8563,
      "step": 195079
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0644173622131348,
      "learning_rate": 3.412857949627139e-05,
      "loss": 2.9714,
      "step": 195080
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.827317953109741,
      "learning_rate": 3.4126684649636394e-05,
      "loss": 2.7311,
      "step": 195081
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.665520668029785,
      "learning_rate": 3.4124789852432354e-05,
      "loss": 3.0455,
      "step": 195082
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6742353439331055,
      "learning_rate": 3.412289510465951e-05,
      "loss": 2.9203,
      "step": 195083
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.31252384185791,
      "learning_rate": 3.4121000406318256e-05,
      "loss": 2.9091,
      "step": 195084
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.5479376316070557,
      "learning_rate": 3.411910575740886e-05,
      "loss": 2.9352,
      "step": 195085
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.304450750350952,
      "learning_rate": 3.411721115793179e-05,
      "loss": 2.9894,
      "step": 195086
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1960859298706055,
      "learning_rate": 3.411531660788731e-05,
      "loss": 3.1274,
      "step": 195087
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8940000534057617,
      "learning_rate": 3.411342210727585e-05,
      "loss": 3.0602,
      "step": 195088
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2188754081726074,
      "learning_rate": 3.4111527656097756e-05,
      "loss": 2.8063,
      "step": 195089
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1587321758270264,
      "learning_rate": 3.410963325435332e-05,
      "loss": 3.3003,
      "step": 195090
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.5546975135803223,
      "learning_rate": 3.410773890204287e-05,
      "loss": 2.8205,
      "step": 195091
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.191038131713867,
      "learning_rate": 3.410584459916688e-05,
      "loss": 2.8021,
      "step": 195092
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1720330715179443,
      "learning_rate": 3.410395034572557e-05,
      "loss": 2.6903,
      "step": 195093
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.563568115234375,
      "learning_rate": 3.410205614171943e-05,
      "loss": 2.6512,
      "step": 195094
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.757859230041504,
      "learning_rate": 3.41001619871487e-05,
      "loss": 3.0269,
      "step": 195095
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8339924812316895,
      "learning_rate": 3.40982678820138e-05,
      "loss": 3.2306,
      "step": 195096
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.802107810974121,
      "learning_rate": 3.409637382631495e-05,
      "loss": 2.8071,
      "step": 195097
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.2385573387146,
      "learning_rate": 3.409447982005269e-05,
      "loss": 3.1068,
      "step": 195098
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7464165687561035,
      "learning_rate": 3.4092585863227253e-05,
      "loss": 3.0031,
      "step": 195099
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1560773849487305,
      "learning_rate": 3.4090691955839036e-05,
      "loss": 3.1005,
      "step": 195100
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1170079708099365,
      "learning_rate": 3.4088798097888405e-05,
      "loss": 3.0231,
      "step": 195101
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9514644145965576,
      "learning_rate": 3.408690428937569e-05,
      "loss": 2.855,
      "step": 195102
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7977023124694824,
      "learning_rate": 3.408501053030117e-05,
      "loss": 2.8849,
      "step": 195103
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.6265740394592285,
      "learning_rate": 3.4083116820665336e-05,
      "loss": 3.1008,
      "step": 195104
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.062047004699707,
      "learning_rate": 3.4081223160468384e-05,
      "loss": 2.8647,
      "step": 195105
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.517062187194824,
      "learning_rate": 3.407932954971082e-05,
      "loss": 2.9746,
      "step": 195106
    },
    {
      "epoch": 2.54,
      "grad_norm": 5.143229961395264,
      "learning_rate": 3.4077435988392876e-05,
      "loss": 2.8873,
      "step": 195107
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.31927227973938,
      "learning_rate": 3.4075542476515085e-05,
      "loss": 2.8156,
      "step": 195108
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2136435508728027,
      "learning_rate": 3.4073649014077544e-05,
      "loss": 3.1696,
      "step": 195109
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.5036137104034424,
      "learning_rate": 3.407175560108079e-05,
      "loss": 2.8381,
      "step": 195110
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4678404331207275,
      "learning_rate": 3.406986223752505e-05,
      "loss": 2.8953,
      "step": 195111
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.7667438983917236,
      "learning_rate": 3.406796892341083e-05,
      "loss": 2.7603,
      "step": 195112
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.169783592224121,
      "learning_rate": 3.406607565873829e-05,
      "loss": 2.7953,
      "step": 195113
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.809901475906372,
      "learning_rate": 3.406418244350804e-05,
      "loss": 2.5517,
      "step": 195114
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.199630260467529,
      "learning_rate": 3.406228927772013e-05,
      "loss": 3.2195,
      "step": 195115
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2499611377716064,
      "learning_rate": 3.406039616137515e-05,
      "loss": 2.5507,
      "step": 195116
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.875044345855713,
      "learning_rate": 3.405850309447328e-05,
      "loss": 3.0357,
      "step": 195117
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.080061435699463,
      "learning_rate": 3.405661007701502e-05,
      "loss": 2.9934,
      "step": 195118
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.847595691680908,
      "learning_rate": 3.4054717109000615e-05,
      "loss": 2.9869,
      "step": 195119
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0407533645629883,
      "learning_rate": 3.405282419043056e-05,
      "loss": 2.9806,
      "step": 195120
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.662348985671997,
      "learning_rate": 3.405093132130499e-05,
      "loss": 3.1215,
      "step": 195121
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1446585655212402,
      "learning_rate": 3.404903850162443e-05,
      "loss": 2.7115,
      "step": 195122
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0622875690460205,
      "learning_rate": 3.4047145731389116e-05,
      "loss": 2.7897,
      "step": 195123
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.147495985031128,
      "learning_rate": 3.4045253010599515e-05,
      "loss": 2.8583,
      "step": 195124
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.620494842529297,
      "learning_rate": 3.404336033925586e-05,
      "loss": 2.967,
      "step": 195125
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.759091854095459,
      "learning_rate": 3.4041467717358694e-05,
      "loss": 3.0713,
      "step": 195126
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.677520513534546,
      "learning_rate": 3.403957514490814e-05,
      "loss": 2.8093,
      "step": 195127
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.078537702560425,
      "learning_rate": 3.403768262190466e-05,
      "loss": 3.0178,
      "step": 195128
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.792705535888672,
      "learning_rate": 3.403579014834856e-05,
      "loss": 3.0483,
      "step": 195129
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8700640201568604,
      "learning_rate": 3.403389772424028e-05,
      "loss": 2.9218,
      "step": 195130
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4252967834472656,
      "learning_rate": 3.4032005349580075e-05,
      "loss": 2.8217,
      "step": 195131
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4941837787628174,
      "learning_rate": 3.403011302436838e-05,
      "loss": 3.0844,
      "step": 195132
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8569247722625732,
      "learning_rate": 3.4028220748605525e-05,
      "loss": 2.8417,
      "step": 195133
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.304605484008789,
      "learning_rate": 3.402632852229186e-05,
      "loss": 2.6235,
      "step": 195134
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8948464393615723,
      "learning_rate": 3.402443634542763e-05,
      "loss": 2.9878,
      "step": 195135
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1280863285064697,
      "learning_rate": 3.402254421801335e-05,
      "loss": 2.9918,
      "step": 195136
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1611387729644775,
      "learning_rate": 3.4020652140049207e-05,
      "loss": 2.6978,
      "step": 195137
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.587013244628906,
      "learning_rate": 3.401876011153575e-05,
      "loss": 3.1288,
      "step": 195138
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.4315123558044434,
      "learning_rate": 3.40168681324732e-05,
      "loss": 2.9328,
      "step": 195139
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8980712890625,
      "learning_rate": 3.401497620286195e-05,
      "loss": 3.045,
      "step": 195140
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.390563488006592,
      "learning_rate": 3.401308432270229e-05,
      "loss": 3.1875,
      "step": 195141
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.316528081893921,
      "learning_rate": 3.4011192491994665e-05,
      "loss": 2.7908,
      "step": 195142
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.389042615890503,
      "learning_rate": 3.4009300710739315e-05,
      "loss": 2.7528,
      "step": 195143
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9775922298431396,
      "learning_rate": 3.400740897893671e-05,
      "loss": 2.8568,
      "step": 195144
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.87370228767395,
      "learning_rate": 3.400551729658715e-05,
      "loss": 2.752,
      "step": 195145
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.833810329437256,
      "learning_rate": 3.400362566369093e-05,
      "loss": 3.1823,
      "step": 195146
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.202354431152344,
      "learning_rate": 3.400173408024851e-05,
      "loss": 2.8724,
      "step": 195147
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.033120632171631,
      "learning_rate": 3.39998425462602e-05,
      "loss": 2.7631,
      "step": 195148
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.203246831893921,
      "learning_rate": 3.39979510617263e-05,
      "loss": 2.8886,
      "step": 195149
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9538674354553223,
      "learning_rate": 3.3996059626647245e-05,
      "loss": 3.0326,
      "step": 195150
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6241581439971924,
      "learning_rate": 3.399416824102332e-05,
      "loss": 3.0835,
      "step": 195151
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.133504867553711,
      "learning_rate": 3.3992276904854876e-05,
      "loss": 2.9216,
      "step": 195152
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.351933479309082,
      "learning_rate": 3.3990385618142335e-05,
      "loss": 2.7877,
      "step": 195153
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.839667320251465,
      "learning_rate": 3.3988494380886e-05,
      "loss": 3.1311,
      "step": 195154
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.706446886062622,
      "learning_rate": 3.3986603193086174e-05,
      "loss": 2.9849,
      "step": 195155
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.94452166557312,
      "learning_rate": 3.398471205474328e-05,
      "loss": 3.0154,
      "step": 195156
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.415923833847046,
      "learning_rate": 3.3982820965857694e-05,
      "loss": 2.8207,
      "step": 195157
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.4807534217834473,
      "learning_rate": 3.3980929926429646e-05,
      "loss": 2.9682,
      "step": 195158
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0332963466644287,
      "learning_rate": 3.3979038936459635e-05,
      "loss": 2.9357,
      "step": 195159
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.3144397735595703,
      "learning_rate": 3.3977147995947864e-05,
      "loss": 2.9776,
      "step": 195160
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.6688218116760254,
      "learning_rate": 3.397525710489486e-05,
      "loss": 3.2626,
      "step": 195161
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.471924066543579,
      "learning_rate": 3.3973366263300825e-05,
      "loss": 2.6576,
      "step": 195162
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.304699659347534,
      "learning_rate": 3.39714754711662e-05,
      "loss": 2.8662,
      "step": 195163
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.442943811416626,
      "learning_rate": 3.396958472849124e-05,
      "loss": 2.7304,
      "step": 195164
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.920027732849121,
      "learning_rate": 3.3967694035276415e-05,
      "loss": 2.8279,
      "step": 195165
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8137295246124268,
      "learning_rate": 3.3965803391521926e-05,
      "loss": 3.1423,
      "step": 195166
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.59614634513855,
      "learning_rate": 3.396391279722831e-05,
      "loss": 2.8275,
      "step": 195167
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.633394479751587,
      "learning_rate": 3.396202225239579e-05,
      "loss": 3.0957,
      "step": 195168
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.109670400619507,
      "learning_rate": 3.396013175702478e-05,
      "loss": 3.1526,
      "step": 195169
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9385859966278076,
      "learning_rate": 3.395824131111553e-05,
      "loss": 2.9298,
      "step": 195170
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6874170303344727,
      "learning_rate": 3.395635091466855e-05,
      "loss": 2.7824,
      "step": 195171
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.0669169425964355,
      "learning_rate": 3.395446056768404e-05,
      "loss": 2.6954,
      "step": 195172
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9766063690185547,
      "learning_rate": 3.395257027016246e-05,
      "loss": 3.0526,
      "step": 195173
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.648078441619873,
      "learning_rate": 3.395068002210405e-05,
      "loss": 2.8036,
      "step": 195174
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6730525493621826,
      "learning_rate": 3.394878982350937e-05,
      "loss": 2.8437,
      "step": 195175
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.475054979324341,
      "learning_rate": 3.3946899674378524e-05,
      "loss": 2.7518,
      "step": 195176
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.717148780822754,
      "learning_rate": 3.394500957471201e-05,
      "loss": 2.8034,
      "step": 195177
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.5782790184020996,
      "learning_rate": 3.39431195245101e-05,
      "loss": 2.8492,
      "step": 195178
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.658748149871826,
      "learning_rate": 3.3941229523773217e-05,
      "loss": 2.596,
      "step": 195179
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.3195486068725586,
      "learning_rate": 3.3939339572501636e-05,
      "loss": 3.1395,
      "step": 195180
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1567013263702393,
      "learning_rate": 3.393744967069588e-05,
      "loss": 2.5986,
      "step": 195181
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9773685932159424,
      "learning_rate": 3.393555981835606e-05,
      "loss": 3.245,
      "step": 195182
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.749351978302002,
      "learning_rate": 3.393367001548267e-05,
      "loss": 2.97,
      "step": 195183
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.492520570755005,
      "learning_rate": 3.393178026207602e-05,
      "loss": 2.9044,
      "step": 195184
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8042874336242676,
      "learning_rate": 3.3929890558136485e-05,
      "loss": 2.7762,
      "step": 195185
    },
    {
      "epoch": 2.54,
      "grad_norm": 5.633151531219482,
      "learning_rate": 3.392800090366439e-05,
      "loss": 3.0976,
      "step": 195186
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.6651151180267334,
      "learning_rate": 3.392611129866019e-05,
      "loss": 3.0871,
      "step": 195187
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.171032667160034,
      "learning_rate": 3.392422174312405e-05,
      "loss": 2.7354,
      "step": 195188
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.991823434829712,
      "learning_rate": 3.392233223705647e-05,
      "loss": 2.9542,
      "step": 195189
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.6569297313690186,
      "learning_rate": 3.392044278045769e-05,
      "loss": 3.034,
      "step": 195190
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.45671010017395,
      "learning_rate": 3.3918553373328175e-05,
      "loss": 2.9013,
      "step": 195191
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8693032264709473,
      "learning_rate": 3.391666401566818e-05,
      "loss": 3.0479,
      "step": 195192
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.317408561706543,
      "learning_rate": 3.391477470747822e-05,
      "loss": 3.1735,
      "step": 195193
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.059407949447632,
      "learning_rate": 3.3912885448758385e-05,
      "loss": 3.2444,
      "step": 195194
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1926522254943848,
      "learning_rate": 3.391099623950925e-05,
      "loss": 2.8364,
      "step": 195195
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.462986469268799,
      "learning_rate": 3.3909107079731004e-05,
      "loss": 2.9309,
      "step": 195196
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8294496536254883,
      "learning_rate": 3.390721796942415e-05,
      "loss": 2.612,
      "step": 195197
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.264799118041992,
      "learning_rate": 3.390532890858889e-05,
      "loss": 2.8581,
      "step": 195198
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.395644187927246,
      "learning_rate": 3.3903439897225793e-05,
      "loss": 2.7456,
      "step": 195199
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7678396701812744,
      "learning_rate": 3.390155093533495e-05,
      "loss": 2.9883,
      "step": 195200
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8927650451660156,
      "learning_rate": 3.389966202291691e-05,
      "loss": 2.7178,
      "step": 195201
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.8297948837280273,
      "learning_rate": 3.3897773159971854e-05,
      "loss": 2.8559,
      "step": 195202
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4882609844207764,
      "learning_rate": 3.389588434650029e-05,
      "loss": 2.9359,
      "step": 195203
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.638444423675537,
      "learning_rate": 3.3893995582502455e-05,
      "loss": 2.9211,
      "step": 195204
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.75089168548584,
      "learning_rate": 3.389210686797884e-05,
      "loss": 3.0163,
      "step": 195205
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.3692522048950195,
      "learning_rate": 3.389021820292962e-05,
      "loss": 2.9041,
      "step": 195206
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.751704692840576,
      "learning_rate": 3.388832958735529e-05,
      "loss": 3.102,
      "step": 195207
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9622576236724854,
      "learning_rate": 3.388644102125605e-05,
      "loss": 2.698,
      "step": 195208
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6664953231811523,
      "learning_rate": 3.3884552504632434e-05,
      "loss": 2.9197,
      "step": 195209
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.848055839538574,
      "learning_rate": 3.388266403748464e-05,
      "loss": 2.8455,
      "step": 195210
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.844621419906616,
      "learning_rate": 3.38807756198132e-05,
      "loss": 2.9359,
      "step": 195211
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9887335300445557,
      "learning_rate": 3.387888725161823e-05,
      "loss": 2.7346,
      "step": 195212
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2727346420288086,
      "learning_rate": 3.387699893290024e-05,
      "loss": 2.9148,
      "step": 195213
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0886764526367188,
      "learning_rate": 3.38751106636595e-05,
      "loss": 3.2567,
      "step": 195214
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.728170871734619,
      "learning_rate": 3.387322244389645e-05,
      "loss": 2.9603,
      "step": 195215
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8842618465423584,
      "learning_rate": 3.387133427361133e-05,
      "loss": 2.739,
      "step": 195216
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.691185474395752,
      "learning_rate": 3.386944615280466e-05,
      "loss": 3.177,
      "step": 195217
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0469248294830322,
      "learning_rate": 3.386755808147657e-05,
      "loss": 2.9501,
      "step": 195218
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7880866527557373,
      "learning_rate": 3.386567005962762e-05,
      "loss": 2.7248,
      "step": 195219
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.5276787281036377,
      "learning_rate": 3.3863782087257977e-05,
      "loss": 2.7821,
      "step": 195220
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7888405323028564,
      "learning_rate": 3.386189416436812e-05,
      "loss": 2.7998,
      "step": 195221
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.745509147644043,
      "learning_rate": 3.386000629095835e-05,
      "loss": 2.7091,
      "step": 195222
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.5313897132873535,
      "learning_rate": 3.385811846702904e-05,
      "loss": 2.8982,
      "step": 195223
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1265296936035156,
      "learning_rate": 3.385623069258058e-05,
      "loss": 2.5409,
      "step": 195224
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6868481636047363,
      "learning_rate": 3.385434296761324e-05,
      "loss": 2.954,
      "step": 195225
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6166486740112305,
      "learning_rate": 3.385245529212731e-05,
      "loss": 2.7418,
      "step": 195226
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.608597755432129,
      "learning_rate": 3.385056766612334e-05,
      "loss": 2.9846,
      "step": 195227
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.4650967121124268,
      "learning_rate": 3.384868008960149e-05,
      "loss": 2.5264,
      "step": 195228
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9804465770721436,
      "learning_rate": 3.3846792562562286e-05,
      "loss": 3.0319,
      "step": 195229
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6011340618133545,
      "learning_rate": 3.384490508500597e-05,
      "loss": 2.7883,
      "step": 195230
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.622066020965576,
      "learning_rate": 3.3843017656932836e-05,
      "loss": 2.7241,
      "step": 195231
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.424600124359131,
      "learning_rate": 3.384113027834339e-05,
      "loss": 2.9546,
      "step": 195232
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4765217304229736,
      "learning_rate": 3.383924294923789e-05,
      "loss": 3.0125,
      "step": 195233
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9143357276916504,
      "learning_rate": 3.383735566961664e-05,
      "loss": 3.0033,
      "step": 195234
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.159597635269165,
      "learning_rate": 3.383546843948011e-05,
      "loss": 2.9528,
      "step": 195235
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.963381290435791,
      "learning_rate": 3.383358125882859e-05,
      "loss": 3.0212,
      "step": 195236
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7473413944244385,
      "learning_rate": 3.383169412766236e-05,
      "loss": 2.939,
      "step": 195237
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.660858392715454,
      "learning_rate": 3.382980704598191e-05,
      "loss": 2.7103,
      "step": 195238
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7674055099487305,
      "learning_rate": 3.382792001378754e-05,
      "loss": 3.0363,
      "step": 195239
    },
    {
      "epoch": 2.54,
      "grad_norm": 5.113767623901367,
      "learning_rate": 3.382603303107951e-05,
      "loss": 2.7754,
      "step": 195240
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0359702110290527,
      "learning_rate": 3.3824146097858305e-05,
      "loss": 2.8362,
      "step": 195241
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7662463188171387,
      "learning_rate": 3.3822259214124214e-05,
      "loss": 2.838,
      "step": 195242
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0807173252105713,
      "learning_rate": 3.3820372379877535e-05,
      "loss": 2.8267,
      "step": 195243
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4972684383392334,
      "learning_rate": 3.3818485595118736e-05,
      "loss": 2.9324,
      "step": 195244
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.851196050643921,
      "learning_rate": 3.381659885984802e-05,
      "loss": 3.1279,
      "step": 195245
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4244723320007324,
      "learning_rate": 3.3814712174065914e-05,
      "loss": 2.7273,
      "step": 195246
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6667463779449463,
      "learning_rate": 3.381282553777266e-05,
      "loss": 2.8812,
      "step": 195247
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7008066177368164,
      "learning_rate": 3.381093895096865e-05,
      "loss": 3.0598,
      "step": 195248
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2052645683288574,
      "learning_rate": 3.380905241365411e-05,
      "loss": 2.7452,
      "step": 195249
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8853402137756348,
      "learning_rate": 3.380716592582959e-05,
      "loss": 2.9374,
      "step": 195250
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.498636484146118,
      "learning_rate": 3.380527948749525e-05,
      "loss": 2.7872,
      "step": 195251
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6427950859069824,
      "learning_rate": 3.380339309865162e-05,
      "loss": 2.6759,
      "step": 195252
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.264085292816162,
      "learning_rate": 3.380150675929897e-05,
      "loss": 3.0097,
      "step": 195253
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.623736619949341,
      "learning_rate": 3.379962046943763e-05,
      "loss": 2.8706,
      "step": 195254
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7044074535369873,
      "learning_rate": 3.3797734229067893e-05,
      "loss": 3.032,
      "step": 195255
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.68449330329895,
      "learning_rate": 3.379584803819027e-05,
      "loss": 3.0497,
      "step": 195256
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.762967109680176,
      "learning_rate": 3.379396189680493e-05,
      "loss": 2.7176,
      "step": 195257
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8985917568206787,
      "learning_rate": 3.37920758049124e-05,
      "loss": 2.9167,
      "step": 195258
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0634939670562744,
      "learning_rate": 3.379018976251291e-05,
      "loss": 2.9907,
      "step": 195259
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4121413230895996,
      "learning_rate": 3.378830376960693e-05,
      "loss": 2.7276,
      "step": 195260
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8232951164245605,
      "learning_rate": 3.378641782619462e-05,
      "loss": 2.9677,
      "step": 195261
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.5707993507385254,
      "learning_rate": 3.378453193227649e-05,
      "loss": 2.7582,
      "step": 195262
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.797286033630371,
      "learning_rate": 3.3782646087852804e-05,
      "loss": 2.9575,
      "step": 195263
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6992475986480713,
      "learning_rate": 3.3780760292923994e-05,
      "loss": 3.1588,
      "step": 195264
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7447006702423096,
      "learning_rate": 3.377887454749032e-05,
      "loss": 3.1009,
      "step": 195265
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9138946533203125,
      "learning_rate": 3.377698885155229e-05,
      "loss": 2.9824,
      "step": 195266
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7927823066711426,
      "learning_rate": 3.3775103205110034e-05,
      "loss": 3.2062,
      "step": 195267
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.674703598022461,
      "learning_rate": 3.377321760816405e-05,
      "loss": 2.9009,
      "step": 195268
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7404367923736572,
      "learning_rate": 3.3771332060714604e-05,
      "loss": 2.9765,
      "step": 195269
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.185441255569458,
      "learning_rate": 3.376944656276214e-05,
      "loss": 2.9782,
      "step": 195270
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9843180179595947,
      "learning_rate": 3.3767561114306905e-05,
      "loss": 2.9006,
      "step": 195271
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.977848529815674,
      "learning_rate": 3.376567571534945e-05,
      "loss": 2.9228,
      "step": 195272
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.443430185317993,
      "learning_rate": 3.376379036588983e-05,
      "loss": 2.809,
      "step": 195273
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.580308675765991,
      "learning_rate": 3.376190506592862e-05,
      "loss": 2.8833,
      "step": 195274
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.566555976867676,
      "learning_rate": 3.376001981546604e-05,
      "loss": 2.6951,
      "step": 195275
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.5753331184387207,
      "learning_rate": 3.3758134614502573e-05,
      "loss": 2.7912,
      "step": 195276
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2436656951904297,
      "learning_rate": 3.375624946303841e-05,
      "loss": 2.9019,
      "step": 195277
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6997318267822266,
      "learning_rate": 3.375436436107412e-05,
      "loss": 2.9536,
      "step": 195278
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.31166934967041,
      "learning_rate": 3.3752479308609795e-05,
      "loss": 2.9122,
      "step": 195279
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.6929075717926025,
      "learning_rate": 3.375059430564597e-05,
      "loss": 2.7632,
      "step": 195280
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.078589677810669,
      "learning_rate": 3.374870935218286e-05,
      "loss": 3.0253,
      "step": 195281
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.37213397026062,
      "learning_rate": 3.3746824448220975e-05,
      "loss": 2.8268,
      "step": 195282
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.4349942207336426,
      "learning_rate": 3.3744939593760504e-05,
      "loss": 2.7908,
      "step": 195283
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0219714641571045,
      "learning_rate": 3.374305478880199e-05,
      "loss": 3.1762,
      "step": 195284
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.509751558303833,
      "learning_rate": 3.374117003334555e-05,
      "loss": 2.7322,
      "step": 195285
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4190354347229004,
      "learning_rate": 3.373928532739175e-05,
      "loss": 3.0244,
      "step": 195286
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.5732522010803223,
      "learning_rate": 3.3737400670940754e-05,
      "loss": 2.773,
      "step": 195287
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2302913665771484,
      "learning_rate": 3.3735516063993054e-05,
      "loss": 2.8542,
      "step": 195288
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2100183963775635,
      "learning_rate": 3.373363150654889e-05,
      "loss": 3.1402,
      "step": 195289
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2546675205230713,
      "learning_rate": 3.373174699860879e-05,
      "loss": 2.9415,
      "step": 195290
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0088672637939453,
      "learning_rate": 3.372986254017286e-05,
      "loss": 3.0622,
      "step": 195291
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9807798862457275,
      "learning_rate": 3.372797813124166e-05,
      "loss": 2.8755,
      "step": 195292
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1257121562957764,
      "learning_rate": 3.372609377181537e-05,
      "loss": 2.9153,
      "step": 195293
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.858208179473877,
      "learning_rate": 3.37242094618945e-05,
      "loss": 3.0728,
      "step": 195294
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.202707052230835,
      "learning_rate": 3.372232520147927e-05,
      "loss": 2.8814,
      "step": 195295
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.774793863296509,
      "learning_rate": 3.372044099057021e-05,
      "loss": 3.0589,
      "step": 195296
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.126298666000366,
      "learning_rate": 3.371855682916741e-05,
      "loss": 2.8995,
      "step": 195297
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7989375591278076,
      "learning_rate": 3.371667271727141e-05,
      "loss": 2.9153,
      "step": 195298
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.891517400741577,
      "learning_rate": 3.3714788654882444e-05,
      "loss": 2.8647,
      "step": 195299
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.5039074420928955,
      "learning_rate": 3.3712904642001004e-05,
      "loss": 3.0095,
      "step": 195300
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.27980375289917,
      "learning_rate": 3.37110206786273e-05,
      "loss": 3.0298,
      "step": 195301
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.3630309104919434,
      "learning_rate": 3.3709136764761854e-05,
      "loss": 3.1402,
      "step": 195302
    },
    {
      "epoch": 2.54,
      "grad_norm": 5.556905269622803,
      "learning_rate": 3.3707252900404815e-05,
      "loss": 2.7981,
      "step": 195303
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6682024002075195,
      "learning_rate": 3.3705369085556636e-05,
      "loss": 3.0842,
      "step": 195304
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8678669929504395,
      "learning_rate": 3.370348532021761e-05,
      "loss": 2.787,
      "step": 195305
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9266350269317627,
      "learning_rate": 3.370160160438823e-05,
      "loss": 2.8706,
      "step": 195306
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.490868330001831,
      "learning_rate": 3.369971793806867e-05,
      "loss": 2.9548,
      "step": 195307
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.3509790897369385,
      "learning_rate": 3.369783432125941e-05,
      "loss": 2.9305,
      "step": 195308
    },
    {
      "epoch": 2.54,
      "grad_norm": 5.348330020904541,
      "learning_rate": 3.3695950753960744e-05,
      "loss": 2.5945,
      "step": 195309
    },
    {
      "epoch": 2.54,
      "grad_norm": 5.314472675323486,
      "learning_rate": 3.3694067236173036e-05,
      "loss": 2.7527,
      "step": 195310
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.3762199878692627,
      "learning_rate": 3.3692183767896566e-05,
      "loss": 2.9641,
      "step": 195311
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.652484893798828,
      "learning_rate": 3.3690300349131814e-05,
      "loss": 2.8695,
      "step": 195312
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.350496768951416,
      "learning_rate": 3.3688416979879e-05,
      "loss": 2.9061,
      "step": 195313
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9379639625549316,
      "learning_rate": 3.368653366013857e-05,
      "loss": 2.8725,
      "step": 195314
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.5910937786102295,
      "learning_rate": 3.368465038991087e-05,
      "loss": 2.7813,
      "step": 195315
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.275404930114746,
      "learning_rate": 3.36827671691962e-05,
      "loss": 2.7598,
      "step": 195316
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7571258544921875,
      "learning_rate": 3.368088399799489e-05,
      "loss": 3.0626,
      "step": 195317
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.342390775680542,
      "learning_rate": 3.36790008763074e-05,
      "loss": 2.9214,
      "step": 195318
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.941448211669922,
      "learning_rate": 3.367711780413391e-05,
      "loss": 2.8999,
      "step": 195319
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8345589637756348,
      "learning_rate": 3.367523478147498e-05,
      "loss": 2.9654,
      "step": 195320
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4827373027801514,
      "learning_rate": 3.367335180833081e-05,
      "loss": 2.9306,
      "step": 195321
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.531454563140869,
      "learning_rate": 3.367146888470176e-05,
      "loss": 2.9655,
      "step": 195322
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.164015293121338,
      "learning_rate": 3.3669586010588266e-05,
      "loss": 2.8157,
      "step": 195323
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8002383708953857,
      "learning_rate": 3.3667703185990605e-05,
      "loss": 2.8328,
      "step": 195324
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.6661274433135986,
      "learning_rate": 3.366582041090909e-05,
      "loss": 2.8835,
      "step": 195325
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8764915466308594,
      "learning_rate": 3.366393768534421e-05,
      "loss": 2.7884,
      "step": 195326
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.911876916885376,
      "learning_rate": 3.366205500929622e-05,
      "loss": 2.7553,
      "step": 195327
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.3725509643554688,
      "learning_rate": 3.366017238276542e-05,
      "loss": 3.0003,
      "step": 195328
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6087303161621094,
      "learning_rate": 3.365828980575227e-05,
      "loss": 2.9145,
      "step": 195329
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.016383171081543,
      "learning_rate": 3.3656407278257016e-05,
      "loss": 3.1589,
      "step": 195330
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.3462743759155273,
      "learning_rate": 3.365452480028012e-05,
      "loss": 2.8335,
      "step": 195331
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.671912908554077,
      "learning_rate": 3.36526423718219e-05,
      "loss": 2.8088,
      "step": 195332
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9645586013793945,
      "learning_rate": 3.365075999288265e-05,
      "loss": 2.8893,
      "step": 195333
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.187138795852661,
      "learning_rate": 3.364887766346271e-05,
      "loss": 3.0032,
      "step": 195334
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.804810047149658,
      "learning_rate": 3.364699538356254e-05,
      "loss": 2.7741,
      "step": 195335
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8609766960144043,
      "learning_rate": 3.364511315318235e-05,
      "loss": 3.0673,
      "step": 195336
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.3168907165527344,
      "learning_rate": 3.364323097232261e-05,
      "loss": 3.3298,
      "step": 195337
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.183863401412964,
      "learning_rate": 3.364134884098363e-05,
      "loss": 2.8876,
      "step": 195338
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.769721508026123,
      "learning_rate": 3.3639466759165736e-05,
      "loss": 2.9497,
      "step": 195339
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6819546222686768,
      "learning_rate": 3.363758472686926e-05,
      "loss": 2.9249,
      "step": 195340
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.211435317993164,
      "learning_rate": 3.363570274409464e-05,
      "loss": 3.0481,
      "step": 195341
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.68670916557312,
      "learning_rate": 3.363382081084211e-05,
      "loss": 2.8884,
      "step": 195342
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.7128679752349854,
      "learning_rate": 3.363193892711212e-05,
      "loss": 2.9065,
      "step": 195343
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8677852153778076,
      "learning_rate": 3.363005709290493e-05,
      "loss": 2.6014,
      "step": 195344
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.012186527252197,
      "learning_rate": 3.362817530822108e-05,
      "loss": 2.8938,
      "step": 195345
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4512460231781006,
      "learning_rate": 3.3626293573060657e-05,
      "loss": 3.0535,
      "step": 195346
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.841264247894287,
      "learning_rate": 3.362441188742419e-05,
      "loss": 3.0605,
      "step": 195347
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.653864860534668,
      "learning_rate": 3.36225302513119e-05,
      "loss": 2.7405,
      "step": 195348
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9018826484680176,
      "learning_rate": 3.3620648664724256e-05,
      "loss": 2.9377,
      "step": 195349
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.903660535812378,
      "learning_rate": 3.361876712766154e-05,
      "loss": 3.0867,
      "step": 195350
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4618163108825684,
      "learning_rate": 3.361688564012424e-05,
      "loss": 3.0405,
      "step": 195351
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.092134714126587,
      "learning_rate": 3.361500420211245e-05,
      "loss": 2.9527,
      "step": 195352
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.512650489807129,
      "learning_rate": 3.361312281362671e-05,
      "loss": 2.9755,
      "step": 195353
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.892063617706299,
      "learning_rate": 3.3611241474667296e-05,
      "loss": 3.0521,
      "step": 195354
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8704323768615723,
      "learning_rate": 3.3609360185234594e-05,
      "loss": 2.8777,
      "step": 195355
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.637465238571167,
      "learning_rate": 3.3607478945328904e-05,
      "loss": 2.9896,
      "step": 195356
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.934022903442383,
      "learning_rate": 3.360559775495076e-05,
      "loss": 2.8052,
      "step": 195357
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.779304265975952,
      "learning_rate": 3.360371661410021e-05,
      "loss": 2.9216,
      "step": 195358
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.670440196990967,
      "learning_rate": 3.360183552277783e-05,
      "loss": 2.8651,
      "step": 195359
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0305683612823486,
      "learning_rate": 3.3599954480983835e-05,
      "loss": 2.8749,
      "step": 195360
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.78446888923645,
      "learning_rate": 3.359807348871869e-05,
      "loss": 2.8388,
      "step": 195361
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4012961387634277,
      "learning_rate": 3.3596192545982625e-05,
      "loss": 3.2167,
      "step": 195362
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.6488611698150635,
      "learning_rate": 3.359431165277621e-05,
      "loss": 2.5374,
      "step": 195363
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9721615314483643,
      "learning_rate": 3.359243080909947e-05,
      "loss": 2.7544,
      "step": 195364
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.670477867126465,
      "learning_rate": 3.3590550014953045e-05,
      "loss": 2.9624,
      "step": 195365
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.841187000274658,
      "learning_rate": 3.3588669270337065e-05,
      "loss": 2.8361,
      "step": 195366
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.579186201095581,
      "learning_rate": 3.3586788575252034e-05,
      "loss": 2.9315,
      "step": 195367
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.550392150878906,
      "learning_rate": 3.358490792969821e-05,
      "loss": 2.9007,
      "step": 195368
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.396512985229492,
      "learning_rate": 3.358302733367611e-05,
      "loss": 2.8835,
      "step": 195369
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.072021245956421,
      "learning_rate": 3.358114678718581e-05,
      "loss": 2.8081,
      "step": 195370
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7142481803894043,
      "learning_rate": 3.357926629022789e-05,
      "loss": 2.8132,
      "step": 195371
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8570282459259033,
      "learning_rate": 3.357738584280252e-05,
      "loss": 2.9388,
      "step": 195372
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7551193237304688,
      "learning_rate": 3.357550544491022e-05,
      "loss": 2.94,
      "step": 195373
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1063270568847656,
      "learning_rate": 3.357362509655121e-05,
      "loss": 3.0627,
      "step": 195374
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.792999744415283,
      "learning_rate": 3.357174479772593e-05,
      "loss": 2.8963,
      "step": 195375
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7843728065490723,
      "learning_rate": 3.356986454843473e-05,
      "loss": 3.1569,
      "step": 195376
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.800755500793457,
      "learning_rate": 3.356798434867788e-05,
      "loss": 3.1899,
      "step": 195377
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7625317573547363,
      "learning_rate": 3.3566104198455735e-05,
      "loss": 2.9395,
      "step": 195378
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8679327964782715,
      "learning_rate": 3.3564224097768736e-05,
      "loss": 3.0276,
      "step": 195379
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6787219047546387,
      "learning_rate": 3.3562344046617105e-05,
      "loss": 2.9095,
      "step": 195380
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.748978853225708,
      "learning_rate": 3.3560464045001356e-05,
      "loss": 3.033,
      "step": 195381
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8936145305633545,
      "learning_rate": 3.3558584092921714e-05,
      "loss": 2.8359,
      "step": 195382
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.734520435333252,
      "learning_rate": 3.355670419037858e-05,
      "loss": 3.1105,
      "step": 195383
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9840402603149414,
      "learning_rate": 3.355482433737222e-05,
      "loss": 2.851,
      "step": 195384
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2595016956329346,
      "learning_rate": 3.3552944533903104e-05,
      "loss": 2.8836,
      "step": 195385
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.929351329803467,
      "learning_rate": 3.355106477997146e-05,
      "loss": 2.9794,
      "step": 195386
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6701183319091797,
      "learning_rate": 3.3549185075577754e-05,
      "loss": 3.0027,
      "step": 195387
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.767812490463257,
      "learning_rate": 3.3547305420722326e-05,
      "loss": 2.9229,
      "step": 195388
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4678030014038086,
      "learning_rate": 3.3545425815405436e-05,
      "loss": 2.8374,
      "step": 195389
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7616214752197266,
      "learning_rate": 3.354354625962745e-05,
      "loss": 2.9719,
      "step": 195390
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.313361644744873,
      "learning_rate": 3.3541666753388804e-05,
      "loss": 2.8375,
      "step": 195391
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.869497537612915,
      "learning_rate": 3.35397872966897e-05,
      "loss": 2.8262,
      "step": 195392
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2478578090667725,
      "learning_rate": 3.3537907889530666e-05,
      "loss": 2.8295,
      "step": 195393
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6830039024353027,
      "learning_rate": 3.353602853191197e-05,
      "loss": 2.8157,
      "step": 195394
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0048279762268066,
      "learning_rate": 3.353414922383395e-05,
      "loss": 2.9587,
      "step": 195395
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.9442138671875,
      "learning_rate": 3.3532269965296933e-05,
      "loss": 2.8886,
      "step": 195396
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1685564517974854,
      "learning_rate": 3.353039075630132e-05,
      "loss": 2.8617,
      "step": 195397
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6393866539001465,
      "learning_rate": 3.352851159684737e-05,
      "loss": 2.9873,
      "step": 195398
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.967463731765747,
      "learning_rate": 3.3526632486935564e-05,
      "loss": 3.0614,
      "step": 195399
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.047686815261841,
      "learning_rate": 3.35247534265662e-05,
      "loss": 2.9937,
      "step": 195400
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1292903423309326,
      "learning_rate": 3.352287441573963e-05,
      "loss": 2.6976,
      "step": 195401
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.2213497161865234,
      "learning_rate": 3.35209954544561e-05,
      "loss": 2.9274,
      "step": 195402
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.043513774871826,
      "learning_rate": 3.3519116542716106e-05,
      "loss": 2.9032,
      "step": 195403
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.3739705085754395,
      "learning_rate": 3.351723768051992e-05,
      "loss": 2.9907,
      "step": 195404
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.677093505859375,
      "learning_rate": 3.3515358867867924e-05,
      "loss": 2.9462,
      "step": 195405
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.215047597885132,
      "learning_rate": 3.351348010476047e-05,
      "loss": 2.9345,
      "step": 195406
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.550048351287842,
      "learning_rate": 3.351160139119782e-05,
      "loss": 2.8755,
      "step": 195407
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7244110107421875,
      "learning_rate": 3.3509722727180464e-05,
      "loss": 2.8278,
      "step": 195408
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.114162921905518,
      "learning_rate": 3.350784411270868e-05,
      "loss": 3.0633,
      "step": 195409
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.096832513809204,
      "learning_rate": 3.3505965547782765e-05,
      "loss": 2.6963,
      "step": 195410
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.845172882080078,
      "learning_rate": 3.350408703240315e-05,
      "loss": 2.8964,
      "step": 195411
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.480250835418701,
      "learning_rate": 3.35022085665702e-05,
      "loss": 3.1023,
      "step": 195412
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0227928161621094,
      "learning_rate": 3.350033015028411e-05,
      "loss": 2.94,
      "step": 195413
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7888741493225098,
      "learning_rate": 3.349845178354547e-05,
      "loss": 2.8764,
      "step": 195414
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.9678306579589844,
      "learning_rate": 3.3496573466354447e-05,
      "loss": 2.7028,
      "step": 195415
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.877896785736084,
      "learning_rate": 3.34946951987114e-05,
      "loss": 3.0675,
      "step": 195416
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0933163166046143,
      "learning_rate": 3.349281698061674e-05,
      "loss": 2.837,
      "step": 195417
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8074710369110107,
      "learning_rate": 3.3490938812070855e-05,
      "loss": 3.2757,
      "step": 195418
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0190141201019287,
      "learning_rate": 3.348906069307393e-05,
      "loss": 3.0049,
      "step": 195419
    },
    {
      "epoch": 2.54,
      "grad_norm": 4.109463691711426,
      "learning_rate": 3.348718262362651e-05,
      "loss": 2.7913,
      "step": 195420
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.091770887374878,
      "learning_rate": 3.348530460372878e-05,
      "loss": 3.1635,
      "step": 195421
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.6299796104431152,
      "learning_rate": 3.3483426633381215e-05,
      "loss": 2.9046,
      "step": 195422
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7683653831481934,
      "learning_rate": 3.348154871258412e-05,
      "loss": 2.9854,
      "step": 195423
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.675661563873291,
      "learning_rate": 3.3479670841337845e-05,
      "loss": 2.8582,
      "step": 195424
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.764664649963379,
      "learning_rate": 3.347779301964264e-05,
      "loss": 2.892,
      "step": 195425
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7364466190338135,
      "learning_rate": 3.347591524749903e-05,
      "loss": 2.7662,
      "step": 195426
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7295708656311035,
      "learning_rate": 3.347403752490722e-05,
      "loss": 3.0606,
      "step": 195427
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.47912859916687,
      "learning_rate": 3.3472159851867665e-05,
      "loss": 2.9136,
      "step": 195428
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.004732370376587,
      "learning_rate": 3.347028222838061e-05,
      "loss": 2.9098,
      "step": 195429
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.4601728916168213,
      "learning_rate": 3.346840465444658e-05,
      "loss": 2.8177,
      "step": 195430
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.432384490966797,
      "learning_rate": 3.346652713006571e-05,
      "loss": 2.8664,
      "step": 195431
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.0641684532165527,
      "learning_rate": 3.3464649655238476e-05,
      "loss": 2.7997,
      "step": 195432
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.8295671939849854,
      "learning_rate": 3.3462772229965126e-05,
      "loss": 2.9148,
      "step": 195433
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4756295680999756,
      "learning_rate": 3.346089485424618e-05,
      "loss": 3.0418,
      "step": 195434
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.4202022552490234,
      "learning_rate": 3.345901752808179e-05,
      "loss": 3.1394,
      "step": 195435
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1255810260772705,
      "learning_rate": 3.3457140251472526e-05,
      "loss": 2.9168,
      "step": 195436
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1188881397247314,
      "learning_rate": 3.3455263024418486e-05,
      "loss": 2.8777,
      "step": 195437
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.5495033264160156,
      "learning_rate": 3.345338584692021e-05,
      "loss": 3.0455,
      "step": 195438
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.1046066284179688,
      "learning_rate": 3.345150871897793e-05,
      "loss": 2.8245,
      "step": 195439
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.3824007511138916,
      "learning_rate": 3.34496316405921e-05,
      "loss": 2.9841,
      "step": 195440
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.041577100753784,
      "learning_rate": 3.3447754611762935e-05,
      "loss": 3.013,
      "step": 195441
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.942927360534668,
      "learning_rate": 3.3445877632490955e-05,
      "loss": 2.7756,
      "step": 195442
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.9112205505371094,
      "learning_rate": 3.3444000702776406e-05,
      "loss": 3.0966,
      "step": 195443
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.888941526412964,
      "learning_rate": 3.344212382261965e-05,
      "loss": 3.0629,
      "step": 195444
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7849318981170654,
      "learning_rate": 3.344024699202098e-05,
      "loss": 2.9114,
      "step": 195445
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.83962345123291,
      "learning_rate": 3.3438370210980836e-05,
      "loss": 3.0346,
      "step": 195446
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.523159980773926,
      "learning_rate": 3.343649347949948e-05,
      "loss": 2.6562,
      "step": 195447
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.7347233295440674,
      "learning_rate": 3.343461679757739e-05,
      "loss": 3.0357,
      "step": 195448
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.62090802192688,
      "learning_rate": 3.343274016521481e-05,
      "loss": 3.1697,
      "step": 195449
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.59523606300354,
      "learning_rate": 3.343086358241216e-05,
      "loss": 2.8306,
      "step": 195450
    },
    {
      "epoch": 2.54,
      "grad_norm": 3.5315966606140137,
      "learning_rate": 3.342898704916963e-05,
      "loss": 2.8441,
      "step": 195451
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.750788688659668,
      "learning_rate": 3.342711056548779e-05,
      "loss": 3.0572,
      "step": 195452
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.773392915725708,
      "learning_rate": 3.342523413136677e-05,
      "loss": 2.7121,
      "step": 195453
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.763660430908203,
      "learning_rate": 3.342335774680714e-05,
      "loss": 2.8065,
      "step": 195454
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.5483713150024414,
      "learning_rate": 3.3421481411809136e-05,
      "loss": 2.8677,
      "step": 195455
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.5896332263946533,
      "learning_rate": 3.341960512637308e-05,
      "loss": 2.9608,
      "step": 195456
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.686079740524292,
      "learning_rate": 3.341772889049932e-05,
      "loss": 3.0098,
      "step": 195457
    },
    {
      "epoch": 2.54,
      "grad_norm": 2.611729621887207,
      "learning_rate": 3.3415852704188276e-05,
      "loss": 2.67,
      "step": 195458
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6885645389556885,
      "learning_rate": 3.3413976567440216e-05,
      "loss": 2.9928,
      "step": 195459
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7079410552978516,
      "learning_rate": 3.341210048025558e-05,
      "loss": 2.8589,
      "step": 195460
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8971924781799316,
      "learning_rate": 3.341022444263466e-05,
      "loss": 2.929,
      "step": 195461
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9812209606170654,
      "learning_rate": 3.3408348454577797e-05,
      "loss": 3.1181,
      "step": 195462
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6177291870117188,
      "learning_rate": 3.340647251608531e-05,
      "loss": 3.1152,
      "step": 195463
    },
    {
      "epoch": 2.55,
      "grad_norm": 5.1995415687561035,
      "learning_rate": 3.3404596627157686e-05,
      "loss": 2.8779,
      "step": 195464
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.780317783355713,
      "learning_rate": 3.340272078779508e-05,
      "loss": 2.8001,
      "step": 195465
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2313849925994873,
      "learning_rate": 3.3400844997998024e-05,
      "loss": 2.7461,
      "step": 195466
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.862502336502075,
      "learning_rate": 3.339896925776675e-05,
      "loss": 2.7005,
      "step": 195467
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7170095443725586,
      "learning_rate": 3.3397093567101663e-05,
      "loss": 2.8626,
      "step": 195468
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.912283182144165,
      "learning_rate": 3.339521792600302e-05,
      "loss": 3.0953,
      "step": 195469
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2061705589294434,
      "learning_rate": 3.339334233447131e-05,
      "loss": 2.6471,
      "step": 195470
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6903107166290283,
      "learning_rate": 3.339146679250674e-05,
      "loss": 3.0138,
      "step": 195471
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9622819423675537,
      "learning_rate": 3.338959130010979e-05,
      "loss": 2.7791,
      "step": 195472
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.294404029846191,
      "learning_rate": 3.338771585728075e-05,
      "loss": 2.9689,
      "step": 195473
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.077858924865723,
      "learning_rate": 3.3385840464019994e-05,
      "loss": 2.5689,
      "step": 195474
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.454990863800049,
      "learning_rate": 3.338396512032776e-05,
      "loss": 3.1151,
      "step": 195475
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.545743465423584,
      "learning_rate": 3.3382089826204505e-05,
      "loss": 2.8515,
      "step": 195476
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7005209922790527,
      "learning_rate": 3.338021458165053e-05,
      "loss": 2.9549,
      "step": 195477
    },
    {
      "epoch": 2.55,
      "grad_norm": 5.095449447631836,
      "learning_rate": 3.337833938666628e-05,
      "loss": 3.1409,
      "step": 195478
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.192190170288086,
      "learning_rate": 3.3376464241252e-05,
      "loss": 2.9622,
      "step": 195479
    },
    {
      "epoch": 2.55,
      "grad_norm": 5.269837856292725,
      "learning_rate": 3.337458914540807e-05,
      "loss": 3.0556,
      "step": 195480
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.133315086364746,
      "learning_rate": 3.337271409913479e-05,
      "loss": 3.1607,
      "step": 195481
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.6735165119171143,
      "learning_rate": 3.3370839102432625e-05,
      "loss": 2.9725,
      "step": 195482
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9475975036621094,
      "learning_rate": 3.336896415530177e-05,
      "loss": 2.7818,
      "step": 195483
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1139307022094727,
      "learning_rate": 3.336708925774273e-05,
      "loss": 2.8045,
      "step": 195484
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.688565492630005,
      "learning_rate": 3.336521440975577e-05,
      "loss": 2.8351,
      "step": 195485
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.884397506713867,
      "learning_rate": 3.336333961134126e-05,
      "loss": 2.9699,
      "step": 195486
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.038008689880371,
      "learning_rate": 3.336146486249949e-05,
      "loss": 3.0231,
      "step": 195487
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.9156432151794434,
      "learning_rate": 3.33595901632309e-05,
      "loss": 2.7699,
      "step": 195488
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.884990930557251,
      "learning_rate": 3.335771551353572e-05,
      "loss": 2.742,
      "step": 195489
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5257394313812256,
      "learning_rate": 3.335584091341446e-05,
      "loss": 2.8797,
      "step": 195490
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.845876932144165,
      "learning_rate": 3.335396636286737e-05,
      "loss": 2.9191,
      "step": 195491
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.744799852371216,
      "learning_rate": 3.335209186189476e-05,
      "loss": 2.9253,
      "step": 195492
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7741634845733643,
      "learning_rate": 3.3350217410497094e-05,
      "loss": 3.0446,
      "step": 195493
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9376251697540283,
      "learning_rate": 3.334834300867464e-05,
      "loss": 2.8441,
      "step": 195494
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.898521661758423,
      "learning_rate": 3.3346468656427694e-05,
      "loss": 3.1351,
      "step": 195495
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.675306797027588,
      "learning_rate": 3.334459435375676e-05,
      "loss": 3.2026,
      "step": 195496
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5896382331848145,
      "learning_rate": 3.3342720100662103e-05,
      "loss": 2.7052,
      "step": 195497
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1432766914367676,
      "learning_rate": 3.3340845897143986e-05,
      "loss": 2.9121,
      "step": 195498
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.224517822265625,
      "learning_rate": 3.333897174320288e-05,
      "loss": 2.8412,
      "step": 195499
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.86492657661438,
      "learning_rate": 3.333709763883915e-05,
      "loss": 2.7899,
      "step": 195500
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.571732521057129,
      "learning_rate": 3.3335223584053e-05,
      "loss": 2.863,
      "step": 195501
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0468246936798096,
      "learning_rate": 3.333334957884492e-05,
      "loss": 2.9446,
      "step": 195502
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.5608925819396973,
      "learning_rate": 3.333147562321517e-05,
      "loss": 2.9883,
      "step": 195503
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.109117269515991,
      "learning_rate": 3.332960171716411e-05,
      "loss": 3.0264,
      "step": 195504
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.469695806503296,
      "learning_rate": 3.332772786069219e-05,
      "loss": 3.3083,
      "step": 195505
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.110898017883301,
      "learning_rate": 3.33258540537996e-05,
      "loss": 2.9271,
      "step": 195506
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.506613254547119,
      "learning_rate": 3.332398029648683e-05,
      "loss": 2.6696,
      "step": 195507
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.157263994216919,
      "learning_rate": 3.3322106588754164e-05,
      "loss": 2.927,
      "step": 195508
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3120644092559814,
      "learning_rate": 3.332023293060186e-05,
      "loss": 2.8006,
      "step": 195509
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7465901374816895,
      "learning_rate": 3.331835932203047e-05,
      "loss": 2.7664,
      "step": 195510
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.616377353668213,
      "learning_rate": 3.331648576304019e-05,
      "loss": 3.162,
      "step": 195511
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3126537799835205,
      "learning_rate": 3.331461225363138e-05,
      "loss": 2.9203,
      "step": 195512
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.735990047454834,
      "learning_rate": 3.331273879380447e-05,
      "loss": 3.002,
      "step": 195513
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1085216999053955,
      "learning_rate": 3.3310865383559704e-05,
      "loss": 2.9154,
      "step": 195514
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8066492080688477,
      "learning_rate": 3.330899202289751e-05,
      "loss": 3.0751,
      "step": 195515
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.97623610496521,
      "learning_rate": 3.3307118711818246e-05,
      "loss": 2.9406,
      "step": 195516
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1482481956481934,
      "learning_rate": 3.330524545032219e-05,
      "loss": 2.8596,
      "step": 195517
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.756078004837036,
      "learning_rate": 3.3303372238409675e-05,
      "loss": 2.7615,
      "step": 195518
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9131267070770264,
      "learning_rate": 3.3301499076081194e-05,
      "loss": 2.9246,
      "step": 195519
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.184976100921631,
      "learning_rate": 3.3299625963336884e-05,
      "loss": 2.9968,
      "step": 195520
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.90022349357605,
      "learning_rate": 3.329775290017731e-05,
      "loss": 3.1337,
      "step": 195521
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.557399034500122,
      "learning_rate": 3.329587988660267e-05,
      "loss": 2.7108,
      "step": 195522
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.128866672515869,
      "learning_rate": 3.3294006922613406e-05,
      "loss": 3.0357,
      "step": 195523
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.4757509231567383,
      "learning_rate": 3.3292134008209734e-05,
      "loss": 2.9346,
      "step": 195524
    },
    {
      "epoch": 2.55,
      "grad_norm": 5.129805564880371,
      "learning_rate": 3.329026114339217e-05,
      "loss": 2.9352,
      "step": 195525
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9968130588531494,
      "learning_rate": 3.328838832816091e-05,
      "loss": 2.8837,
      "step": 195526
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.774186611175537,
      "learning_rate": 3.328651556251641e-05,
      "loss": 2.8496,
      "step": 195527
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1030361652374268,
      "learning_rate": 3.328464284645902e-05,
      "loss": 2.979,
      "step": 195528
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1489720344543457,
      "learning_rate": 3.328277017998903e-05,
      "loss": 3.0064,
      "step": 195529
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.5179615020751953,
      "learning_rate": 3.3280897563106765e-05,
      "loss": 2.5661,
      "step": 195530
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.899138927459717,
      "learning_rate": 3.327902499581264e-05,
      "loss": 3.1156,
      "step": 195531
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.046504497528076,
      "learning_rate": 3.327715247810695e-05,
      "loss": 2.8579,
      "step": 195532
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0035457611083984,
      "learning_rate": 3.3275280009990125e-05,
      "loss": 2.7262,
      "step": 195533
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.929689884185791,
      "learning_rate": 3.3273407591462465e-05,
      "loss": 3.1346,
      "step": 195534
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.528319835662842,
      "learning_rate": 3.32715352225243e-05,
      "loss": 2.9263,
      "step": 195535
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0110580921173096,
      "learning_rate": 3.3269662903175945e-05,
      "loss": 3.024,
      "step": 195536
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.167344570159912,
      "learning_rate": 3.3267790633417844e-05,
      "loss": 2.714,
      "step": 195537
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.250091075897217,
      "learning_rate": 3.326591841325025e-05,
      "loss": 2.8974,
      "step": 195538
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.434291362762451,
      "learning_rate": 3.326404624267358e-05,
      "loss": 2.8821,
      "step": 195539
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.221282958984375,
      "learning_rate": 3.326217412168821e-05,
      "loss": 3.2229,
      "step": 195540
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.614663600921631,
      "learning_rate": 3.326030205029441e-05,
      "loss": 3.0247,
      "step": 195541
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.582282781600952,
      "learning_rate": 3.32584300284925e-05,
      "loss": 3.0656,
      "step": 195542
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2433199882507324,
      "learning_rate": 3.325655805628292e-05,
      "loss": 2.9158,
      "step": 195543
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.610360622406006,
      "learning_rate": 3.325468613366594e-05,
      "loss": 2.896,
      "step": 195544
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0294461250305176,
      "learning_rate": 3.3252814260642026e-05,
      "loss": 3.1877,
      "step": 195545
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.648080587387085,
      "learning_rate": 3.3250942437211436e-05,
      "loss": 3.1091,
      "step": 195546
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.107508897781372,
      "learning_rate": 3.324907066337451e-05,
      "loss": 2.8568,
      "step": 195547
    },
    {
      "epoch": 2.55,
      "grad_norm": 5.3345160484313965,
      "learning_rate": 3.324719893913158e-05,
      "loss": 3.1073,
      "step": 195548
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.8261189460754395,
      "learning_rate": 3.324532726448308e-05,
      "loss": 2.9973,
      "step": 195549
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.578091621398926,
      "learning_rate": 3.324345563942927e-05,
      "loss": 2.6108,
      "step": 195550
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.4652485847473145,
      "learning_rate": 3.324158406397059e-05,
      "loss": 3.1067,
      "step": 195551
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.739656448364258,
      "learning_rate": 3.323971253810731e-05,
      "loss": 2.9917,
      "step": 195552
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.718123197555542,
      "learning_rate": 3.323784106183982e-05,
      "loss": 2.9008,
      "step": 195553
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.026947498321533,
      "learning_rate": 3.323596963516839e-05,
      "loss": 2.7732,
      "step": 195554
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9966914653778076,
      "learning_rate": 3.323409825809349e-05,
      "loss": 2.6921,
      "step": 195555
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.304074287414551,
      "learning_rate": 3.323222693061535e-05,
      "loss": 3.1612,
      "step": 195556
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.3623366355896,
      "learning_rate": 3.323035565273443e-05,
      "loss": 3.1175,
      "step": 195557
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.781331539154053,
      "learning_rate": 3.322848442445104e-05,
      "loss": 2.9318,
      "step": 195558
    },
    {
      "epoch": 2.55,
      "grad_norm": 5.250050067901611,
      "learning_rate": 3.322661324576548e-05,
      "loss": 2.9175,
      "step": 195559
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9495389461517334,
      "learning_rate": 3.3224742116678106e-05,
      "loss": 2.9739,
      "step": 195560
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.50504732131958,
      "learning_rate": 3.322287103718929e-05,
      "loss": 2.9826,
      "step": 195561
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5722367763519287,
      "learning_rate": 3.322100000729937e-05,
      "loss": 2.9427,
      "step": 195562
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6887435913085938,
      "learning_rate": 3.321912902700874e-05,
      "loss": 2.7987,
      "step": 195563
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.982466220855713,
      "learning_rate": 3.321725809631773e-05,
      "loss": 2.779,
      "step": 195564
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9941911697387695,
      "learning_rate": 3.321538721522665e-05,
      "loss": 2.8685,
      "step": 195565
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.7975828647613525,
      "learning_rate": 3.3213516383735795e-05,
      "loss": 2.9833,
      "step": 195566
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0202386379241943,
      "learning_rate": 3.321164560184566e-05,
      "loss": 3.2055,
      "step": 195567
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.501835584640503,
      "learning_rate": 3.320977486955645e-05,
      "loss": 3.026,
      "step": 195568
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.992689609527588,
      "learning_rate": 3.320790418686863e-05,
      "loss": 2.8826,
      "step": 195569
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6504547595977783,
      "learning_rate": 3.320603355378253e-05,
      "loss": 2.7315,
      "step": 195570
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.489899158477783,
      "learning_rate": 3.320416297029842e-05,
      "loss": 2.7464,
      "step": 195571
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.381070137023926,
      "learning_rate": 3.320229243641667e-05,
      "loss": 2.8812,
      "step": 195572
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9730188846588135,
      "learning_rate": 3.32004219521377e-05,
      "loss": 2.8221,
      "step": 195573
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.0696492195129395,
      "learning_rate": 3.319855151746172e-05,
      "loss": 2.9938,
      "step": 195574
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3151323795318604,
      "learning_rate": 3.3196681132389266e-05,
      "loss": 2.8107,
      "step": 195575
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.4037137031555176,
      "learning_rate": 3.31948107969205e-05,
      "loss": 2.8712,
      "step": 195576
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9133141040802,
      "learning_rate": 3.319294051105592e-05,
      "loss": 2.817,
      "step": 195577
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.682128429412842,
      "learning_rate": 3.319107027479583e-05,
      "loss": 3.0072,
      "step": 195578
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0735926628112793,
      "learning_rate": 3.3189200088140525e-05,
      "loss": 3.0794,
      "step": 195579
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.982262372970581,
      "learning_rate": 3.3187329951090334e-05,
      "loss": 2.9803,
      "step": 195580
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3604371547698975,
      "learning_rate": 3.3185459863645734e-05,
      "loss": 3.0162,
      "step": 195581
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.493820905685425,
      "learning_rate": 3.3183589825806886e-05,
      "loss": 2.8074,
      "step": 195582
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5784196853637695,
      "learning_rate": 3.318171983757436e-05,
      "loss": 3.22,
      "step": 195583
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.938511848449707,
      "learning_rate": 3.317984989894835e-05,
      "loss": 3.1397,
      "step": 195584
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3031861782073975,
      "learning_rate": 3.3177980009929264e-05,
      "loss": 3.0899,
      "step": 195585
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.4788012504577637,
      "learning_rate": 3.317611017051736e-05,
      "loss": 3.0509,
      "step": 195586
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2241835594177246,
      "learning_rate": 3.3174240380713135e-05,
      "loss": 2.8394,
      "step": 195587
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.705134630203247,
      "learning_rate": 3.3172370640516766e-05,
      "loss": 3.0721,
      "step": 195588
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.161982774734497,
      "learning_rate": 3.3170500949928745e-05,
      "loss": 2.9416,
      "step": 195589
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6745593547821045,
      "learning_rate": 3.3168631308949377e-05,
      "loss": 3.1218,
      "step": 195590
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9923622608184814,
      "learning_rate": 3.316676171757896e-05,
      "loss": 2.9106,
      "step": 195591
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1147372722625732,
      "learning_rate": 3.316489217581793e-05,
      "loss": 3.0709,
      "step": 195592
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3816874027252197,
      "learning_rate": 3.316302268366654e-05,
      "loss": 2.9338,
      "step": 195593
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.640738010406494,
      "learning_rate": 3.316115324112517e-05,
      "loss": 3.1741,
      "step": 195594
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7425975799560547,
      "learning_rate": 3.3159283848194216e-05,
      "loss": 2.855,
      "step": 195595
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5530383586883545,
      "learning_rate": 3.315741450487398e-05,
      "loss": 2.8854,
      "step": 195596
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.066098690032959,
      "learning_rate": 3.315554521116479e-05,
      "loss": 2.8556,
      "step": 195597
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.852783679962158,
      "learning_rate": 3.315367596706705e-05,
      "loss": 3.0662,
      "step": 195598
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0084826946258545,
      "learning_rate": 3.315180677258109e-05,
      "loss": 2.7833,
      "step": 195599
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.018537521362305,
      "learning_rate": 3.314993762770721e-05,
      "loss": 2.9099,
      "step": 195600
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.698847770690918,
      "learning_rate": 3.314806853244581e-05,
      "loss": 2.8552,
      "step": 195601
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5580005645751953,
      "learning_rate": 3.3146199486797263e-05,
      "loss": 2.8964,
      "step": 195602
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6323862075805664,
      "learning_rate": 3.314433049076179e-05,
      "loss": 2.8856,
      "step": 195603
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8271596431732178,
      "learning_rate": 3.314246154433987e-05,
      "loss": 3.072,
      "step": 195604
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.790757656097412,
      "learning_rate": 3.314059264753176e-05,
      "loss": 2.653,
      "step": 195605
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9766297340393066,
      "learning_rate": 3.313872380033793e-05,
      "loss": 2.8558,
      "step": 195606
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.821760654449463,
      "learning_rate": 3.3136855002758645e-05,
      "loss": 3.0012,
      "step": 195607
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.384334087371826,
      "learning_rate": 3.3134986254794235e-05,
      "loss": 2.9979,
      "step": 195608
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6292848587036133,
      "learning_rate": 3.313311755644501e-05,
      "loss": 2.9182,
      "step": 195609
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5085694789886475,
      "learning_rate": 3.313124890771143e-05,
      "loss": 3.0535,
      "step": 195610
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7982895374298096,
      "learning_rate": 3.3129380308593754e-05,
      "loss": 2.7634,
      "step": 195611
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.188180923461914,
      "learning_rate": 3.3127511759092395e-05,
      "loss": 2.9905,
      "step": 195612
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5164878368377686,
      "learning_rate": 3.312564325920771e-05,
      "loss": 3.0387,
      "step": 195613
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3983983993530273,
      "learning_rate": 3.312377480893998e-05,
      "loss": 2.8252,
      "step": 195614
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8018176555633545,
      "learning_rate": 3.312190640828951e-05,
      "loss": 2.8637,
      "step": 195615
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9578661918640137,
      "learning_rate": 3.312003805725679e-05,
      "loss": 2.8685,
      "step": 195616
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.4805800914764404,
      "learning_rate": 3.3118169755842016e-05,
      "loss": 2.9767,
      "step": 195617
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7333033084869385,
      "learning_rate": 3.311630150404568e-05,
      "loss": 2.9554,
      "step": 195618
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.4352219104766846,
      "learning_rate": 3.311443330186806e-05,
      "loss": 2.7934,
      "step": 195619
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5987823009490967,
      "learning_rate": 3.311256514930951e-05,
      "loss": 2.6478,
      "step": 195620
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.582300901412964,
      "learning_rate": 3.3110697046370303e-05,
      "loss": 2.8252,
      "step": 195621
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1318776607513428,
      "learning_rate": 3.310882899305094e-05,
      "loss": 2.8945,
      "step": 195622
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.858189344406128,
      "learning_rate": 3.310696098935158e-05,
      "loss": 2.9667,
      "step": 195623
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.9080989360809326,
      "learning_rate": 3.3105093035272766e-05,
      "loss": 2.7952,
      "step": 195624
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.169248580932617,
      "learning_rate": 3.310322513081476e-05,
      "loss": 2.9216,
      "step": 195625
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7549140453338623,
      "learning_rate": 3.310135727597789e-05,
      "loss": 2.9337,
      "step": 195626
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.863399028778076,
      "learning_rate": 3.309948947076246e-05,
      "loss": 2.8766,
      "step": 195627
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2151269912719727,
      "learning_rate": 3.309762171516891e-05,
      "loss": 2.9071,
      "step": 195628
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.9786365032196045,
      "learning_rate": 3.309575400919753e-05,
      "loss": 3.0855,
      "step": 195629
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9429948329925537,
      "learning_rate": 3.309388635284872e-05,
      "loss": 2.8865,
      "step": 195630
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2645645141601562,
      "learning_rate": 3.3092018746122785e-05,
      "loss": 2.8828,
      "step": 195631
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3810954093933105,
      "learning_rate": 3.309015118902012e-05,
      "loss": 3.131,
      "step": 195632
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1878373622894287,
      "learning_rate": 3.308828368154093e-05,
      "loss": 2.8858,
      "step": 195633
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.143845558166504,
      "learning_rate": 3.3086416223685775e-05,
      "loss": 2.9045,
      "step": 195634
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.955289602279663,
      "learning_rate": 3.308454881545479e-05,
      "loss": 3.0519,
      "step": 195635
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.4234981536865234,
      "learning_rate": 3.308268145684852e-05,
      "loss": 3.0822,
      "step": 195636
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8661327362060547,
      "learning_rate": 3.3080814147867174e-05,
      "loss": 2.8661,
      "step": 195637
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5755653381347656,
      "learning_rate": 3.3078946888511173e-05,
      "loss": 3.0147,
      "step": 195638
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0856525897979736,
      "learning_rate": 3.3077079678780774e-05,
      "loss": 2.9126,
      "step": 195639
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.571575403213501,
      "learning_rate": 3.307521251867644e-05,
      "loss": 2.8859,
      "step": 195640
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.759589910507202,
      "learning_rate": 3.3073345408198415e-05,
      "loss": 3.1279,
      "step": 195641
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.159916400909424,
      "learning_rate": 3.307147834734716e-05,
      "loss": 2.9128,
      "step": 195642
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.174959897994995,
      "learning_rate": 3.306961133612287e-05,
      "loss": 3.1167,
      "step": 195643
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.708094596862793,
      "learning_rate": 3.3067744374526116e-05,
      "loss": 2.7872,
      "step": 195644
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5255255699157715,
      "learning_rate": 3.306587746255697e-05,
      "loss": 3.1876,
      "step": 195645
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2406516075134277,
      "learning_rate": 3.3064010600215984e-05,
      "loss": 3.0942,
      "step": 195646
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.457796096801758,
      "learning_rate": 3.306214378750337e-05,
      "loss": 2.7652,
      "step": 195647
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.4854557514190674,
      "learning_rate": 3.3060277024419624e-05,
      "loss": 2.9855,
      "step": 195648
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7473130226135254,
      "learning_rate": 3.305841031096491e-05,
      "loss": 2.7781,
      "step": 195649
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.879690408706665,
      "learning_rate": 3.3056543647139835e-05,
      "loss": 2.8359,
      "step": 195650
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9849534034729004,
      "learning_rate": 3.3054677032944456e-05,
      "loss": 2.7986,
      "step": 195651
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.193152904510498,
      "learning_rate": 3.3052810468379306e-05,
      "loss": 2.7192,
      "step": 195652
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2120039463043213,
      "learning_rate": 3.305094395344463e-05,
      "loss": 2.8341,
      "step": 195653
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.047572612762451,
      "learning_rate": 3.3049077488140885e-05,
      "loss": 2.9892,
      "step": 195654
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.087553024291992,
      "learning_rate": 3.304721107246827e-05,
      "loss": 2.961,
      "step": 195655
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.776345729827881,
      "learning_rate": 3.3045344706427356e-05,
      "loss": 2.9855,
      "step": 195656
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.697033166885376,
      "learning_rate": 3.304347839001824e-05,
      "loss": 3.0002,
      "step": 195657
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.615917205810547,
      "learning_rate": 3.304161212324142e-05,
      "loss": 2.8601,
      "step": 195658
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.470944404602051,
      "learning_rate": 3.303974590609717e-05,
      "loss": 3.253,
      "step": 195659
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1659598350524902,
      "learning_rate": 3.3037879738585916e-05,
      "loss": 2.878,
      "step": 195660
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.868412494659424,
      "learning_rate": 3.303601362070789e-05,
      "loss": 2.8847,
      "step": 195661
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9975147247314453,
      "learning_rate": 3.303414755246363e-05,
      "loss": 3.0453,
      "step": 195662
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.012261152267456,
      "learning_rate": 3.303228153385327e-05,
      "loss": 2.9857,
      "step": 195663
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8735134601593018,
      "learning_rate": 3.303041556487727e-05,
      "loss": 2.9738,
      "step": 195664
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2359046936035156,
      "learning_rate": 3.3028549645535896e-05,
      "loss": 2.9327,
      "step": 195665
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.456737518310547,
      "learning_rate": 3.302668377582966e-05,
      "loss": 3.1033,
      "step": 195666
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.004254102706909,
      "learning_rate": 3.3024817955758675e-05,
      "loss": 2.9211,
      "step": 195667
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.7338669300079346,
      "learning_rate": 3.3022952185323525e-05,
      "loss": 3.2218,
      "step": 195668
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.624722480773926,
      "learning_rate": 3.302108646452444e-05,
      "loss": 2.8605,
      "step": 195669
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8891074657440186,
      "learning_rate": 3.301922079336178e-05,
      "loss": 2.8155,
      "step": 195670
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.779763698577881,
      "learning_rate": 3.3017355171835816e-05,
      "loss": 3.0529,
      "step": 195671
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.822866678237915,
      "learning_rate": 3.301548959994701e-05,
      "loss": 3.0959,
      "step": 195672
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.024588108062744,
      "learning_rate": 3.301362407769563e-05,
      "loss": 3.1869,
      "step": 195673
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0108659267425537,
      "learning_rate": 3.301175860508212e-05,
      "loss": 3.1719,
      "step": 195674
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7565879821777344,
      "learning_rate": 3.300989318210673e-05,
      "loss": 2.8107,
      "step": 195675
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.825247049331665,
      "learning_rate": 3.30080278087698e-05,
      "loss": 3.0262,
      "step": 195676
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1390295028686523,
      "learning_rate": 3.30061624850718e-05,
      "loss": 2.7676,
      "step": 195677
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.839081287384033,
      "learning_rate": 3.300429721101295e-05,
      "loss": 2.8539,
      "step": 195678
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7378756999969482,
      "learning_rate": 3.3002431986593605e-05,
      "loss": 3.038,
      "step": 195679
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7714710235595703,
      "learning_rate": 3.300056681181421e-05,
      "loss": 2.9749,
      "step": 195680
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9323644638061523,
      "learning_rate": 3.2998701686675044e-05,
      "loss": 3.0655,
      "step": 195681
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.995814323425293,
      "learning_rate": 3.29968366111764e-05,
      "loss": 2.7216,
      "step": 195682
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.272702693939209,
      "learning_rate": 3.299497158531875e-05,
      "loss": 2.9298,
      "step": 195683
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6470556259155273,
      "learning_rate": 3.299310660910239e-05,
      "loss": 3.0855,
      "step": 195684
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.623966693878174,
      "learning_rate": 3.299124168252756e-05,
      "loss": 2.8718,
      "step": 195685
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1619231700897217,
      "learning_rate": 3.298937680559478e-05,
      "loss": 3.1595,
      "step": 195686
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.859236478805542,
      "learning_rate": 3.2987511978304296e-05,
      "loss": 2.9334,
      "step": 195687
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.687605381011963,
      "learning_rate": 3.298564720065643e-05,
      "loss": 2.8431,
      "step": 195688
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.7003045082092285,
      "learning_rate": 3.298378247265162e-05,
      "loss": 2.7778,
      "step": 195689
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9017434120178223,
      "learning_rate": 3.2981917794290133e-05,
      "loss": 3.0714,
      "step": 195690
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9516379833221436,
      "learning_rate": 3.2980053165572374e-05,
      "loss": 2.9636,
      "step": 195691
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0882089138031006,
      "learning_rate": 3.2978188586498666e-05,
      "loss": 2.8909,
      "step": 195692
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9618349075317383,
      "learning_rate": 3.2976324057069384e-05,
      "loss": 2.9577,
      "step": 195693
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.155893087387085,
      "learning_rate": 3.297445957728475e-05,
      "loss": 2.8349,
      "step": 195694
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0406808853149414,
      "learning_rate": 3.297259514714527e-05,
      "loss": 2.9745,
      "step": 195695
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.99418044090271,
      "learning_rate": 3.297073076665119e-05,
      "loss": 3.0195,
      "step": 195696
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.670675039291382,
      "learning_rate": 3.296886643580292e-05,
      "loss": 2.9516,
      "step": 195697
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0033528804779053,
      "learning_rate": 3.296700215460081e-05,
      "loss": 2.8236,
      "step": 195698
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.97060227394104,
      "learning_rate": 3.296513792304515e-05,
      "loss": 2.7688,
      "step": 195699
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0998356342315674,
      "learning_rate": 3.2963273741136274e-05,
      "loss": 2.9377,
      "step": 195700
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.5609922409057617,
      "learning_rate": 3.296140960887459e-05,
      "loss": 3.1165,
      "step": 195701
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6415822505950928,
      "learning_rate": 3.295954552626039e-05,
      "loss": 2.8219,
      "step": 195702
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.689451217651367,
      "learning_rate": 3.2957681493294105e-05,
      "loss": 2.7067,
      "step": 195703
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5305538177490234,
      "learning_rate": 3.2955817509976014e-05,
      "loss": 3.0444,
      "step": 195704
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.087817907333374,
      "learning_rate": 3.29539535763065e-05,
      "loss": 2.9037,
      "step": 195705
    },
    {
      "epoch": 2.55,
      "grad_norm": 5.709176540374756,
      "learning_rate": 3.295208969228581e-05,
      "loss": 2.9391,
      "step": 195706
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.358360528945923,
      "learning_rate": 3.295022585791444e-05,
      "loss": 3.0347,
      "step": 195707
    },
    {
      "epoch": 2.55,
      "grad_norm": 5.9932074546813965,
      "learning_rate": 3.294836207319261e-05,
      "loss": 3.0335,
      "step": 195708
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.129876136779785,
      "learning_rate": 3.2946498338120775e-05,
      "loss": 2.6979,
      "step": 195709
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7563493251800537,
      "learning_rate": 3.294463465269916e-05,
      "loss": 3.1553,
      "step": 195710
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9479477405548096,
      "learning_rate": 3.2942771016928285e-05,
      "loss": 2.8854,
      "step": 195711
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.008342742919922,
      "learning_rate": 3.2940907430808296e-05,
      "loss": 2.9923,
      "step": 195712
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.434735059738159,
      "learning_rate": 3.293904389433966e-05,
      "loss": 2.9161,
      "step": 195713
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.337447166442871,
      "learning_rate": 3.293718040752267e-05,
      "loss": 2.8168,
      "step": 195714
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9362330436706543,
      "learning_rate": 3.293531697035776e-05,
      "loss": 3.0321,
      "step": 195715
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3460164070129395,
      "learning_rate": 3.293345358284514e-05,
      "loss": 3.097,
      "step": 195716
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1022913455963135,
      "learning_rate": 3.293159024498533e-05,
      "loss": 2.7769,
      "step": 195717
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.4009294509887695,
      "learning_rate": 3.29297269567785e-05,
      "loss": 2.9548,
      "step": 195718
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.567521095275879,
      "learning_rate": 3.292786371822512e-05,
      "loss": 3.0242,
      "step": 195719
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0819296836853027,
      "learning_rate": 3.292600052932543e-05,
      "loss": 2.9567,
      "step": 195720
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.303867816925049,
      "learning_rate": 3.292413739007991e-05,
      "loss": 3.1148,
      "step": 195721
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0120041370391846,
      "learning_rate": 3.2922274300488737e-05,
      "loss": 3.1825,
      "step": 195722
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.250713348388672,
      "learning_rate": 3.292041126055251e-05,
      "loss": 2.6894,
      "step": 195723
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.864727735519409,
      "learning_rate": 3.2918548270271305e-05,
      "loss": 2.9346,
      "step": 195724
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8581154346466064,
      "learning_rate": 3.291668532964561e-05,
      "loss": 2.8166,
      "step": 195725
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.962104797363281,
      "learning_rate": 3.2914822438675726e-05,
      "loss": 2.91,
      "step": 195726
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.753944158554077,
      "learning_rate": 3.2912959597362054e-05,
      "loss": 3.1182,
      "step": 195727
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.8076930046081543,
      "learning_rate": 3.291109680570483e-05,
      "loss": 2.8815,
      "step": 195728
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6389336585998535,
      "learning_rate": 3.2909234063704616e-05,
      "loss": 2.9043,
      "step": 195729
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.0269951820373535,
      "learning_rate": 3.2907371371361514e-05,
      "loss": 3.1266,
      "step": 195730
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.140716791152954,
      "learning_rate": 3.290550872867599e-05,
      "loss": 3.1194,
      "step": 195731
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8739547729492188,
      "learning_rate": 3.290364613564834e-05,
      "loss": 2.8654,
      "step": 195732
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.07509446144104,
      "learning_rate": 3.290178359227901e-05,
      "loss": 2.9587,
      "step": 195733
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.579418182373047,
      "learning_rate": 3.289992109856822e-05,
      "loss": 2.906,
      "step": 195734
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.593061923980713,
      "learning_rate": 3.289805865451647e-05,
      "loss": 2.7814,
      "step": 195735
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.387284517288208,
      "learning_rate": 3.28961962601239e-05,
      "loss": 3.0711,
      "step": 195736
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7436163425445557,
      "learning_rate": 3.289433391539104e-05,
      "loss": 3.0992,
      "step": 195737
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7618210315704346,
      "learning_rate": 3.289247162031812e-05,
      "loss": 2.9793,
      "step": 195738
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0746681690216064,
      "learning_rate": 3.2890609374905576e-05,
      "loss": 2.6644,
      "step": 195739
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.6228296756744385,
      "learning_rate": 3.288874717915364e-05,
      "loss": 2.9871,
      "step": 195740
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.111289024353027,
      "learning_rate": 3.288688503306285e-05,
      "loss": 3.0968,
      "step": 195741
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.1927714347839355,
      "learning_rate": 3.2885022936633334e-05,
      "loss": 2.6434,
      "step": 195742
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.6218044757843018,
      "learning_rate": 3.2883160889865555e-05,
      "loss": 2.8706,
      "step": 195743
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0209972858428955,
      "learning_rate": 3.2881298892759825e-05,
      "loss": 2.7071,
      "step": 195744
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.889014959335327,
      "learning_rate": 3.2879436945316526e-05,
      "loss": 2.8908,
      "step": 195745
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.320464611053467,
      "learning_rate": 3.287757504753594e-05,
      "loss": 3.0803,
      "step": 195746
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.845737934112549,
      "learning_rate": 3.287571319941856e-05,
      "loss": 2.8023,
      "step": 195747
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.4277048110961914,
      "learning_rate": 3.287385140096452e-05,
      "loss": 2.9307,
      "step": 195748
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.80100679397583,
      "learning_rate": 3.2871989652174356e-05,
      "loss": 2.7404,
      "step": 195749
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.844754457473755,
      "learning_rate": 3.287012795304823e-05,
      "loss": 3.2139,
      "step": 195750
    },
    {
      "epoch": 2.55,
      "grad_norm": 5.2319231033325195,
      "learning_rate": 3.2868266303586677e-05,
      "loss": 2.6082,
      "step": 195751
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.837735176086426,
      "learning_rate": 3.2866404703789896e-05,
      "loss": 2.7285,
      "step": 195752
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.837641954421997,
      "learning_rate": 3.286454315365832e-05,
      "loss": 2.8978,
      "step": 195753
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1404600143432617,
      "learning_rate": 3.286268165319231e-05,
      "loss": 2.9706,
      "step": 195754
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5066170692443848,
      "learning_rate": 3.286082020239214e-05,
      "loss": 2.9876,
      "step": 195755
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.434847116470337,
      "learning_rate": 3.285895880125815e-05,
      "loss": 2.8419,
      "step": 195756
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3540892601013184,
      "learning_rate": 3.285709744979076e-05,
      "loss": 3.0172,
      "step": 195757
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.376708745956421,
      "learning_rate": 3.285523614799024e-05,
      "loss": 2.9003,
      "step": 195758
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.8001489639282227,
      "learning_rate": 3.285337489585702e-05,
      "loss": 3.1531,
      "step": 195759
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0409090518951416,
      "learning_rate": 3.285151369339141e-05,
      "loss": 2.8085,
      "step": 195760
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6354312896728516,
      "learning_rate": 3.284965254059373e-05,
      "loss": 2.9606,
      "step": 195761
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8414971828460693,
      "learning_rate": 3.284779143746429e-05,
      "loss": 3.2455,
      "step": 195762
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.810922145843506,
      "learning_rate": 3.284593038400355e-05,
      "loss": 2.9589,
      "step": 195763
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.2525177001953125,
      "learning_rate": 3.2844069380211746e-05,
      "loss": 2.7688,
      "step": 195764
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3267569541931152,
      "learning_rate": 3.284220842608931e-05,
      "loss": 2.9702,
      "step": 195765
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0296249389648438,
      "learning_rate": 3.2840347521636546e-05,
      "loss": 3.1208,
      "step": 195766
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.642343759536743,
      "learning_rate": 3.283848666685378e-05,
      "loss": 3.0192,
      "step": 195767
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.6248953342437744,
      "learning_rate": 3.2836625861741415e-05,
      "loss": 2.8684,
      "step": 195768
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.271960496902466,
      "learning_rate": 3.283476510629975e-05,
      "loss": 2.7852,
      "step": 195769
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8518290519714355,
      "learning_rate": 3.2832904400529124e-05,
      "loss": 2.945,
      "step": 195770
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.048814296722412,
      "learning_rate": 3.2831043744429965e-05,
      "loss": 2.9325,
      "step": 195771
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5324649810791016,
      "learning_rate": 3.282918313800253e-05,
      "loss": 2.8593,
      "step": 195772
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8724286556243896,
      "learning_rate": 3.282732258124714e-05,
      "loss": 3.074,
      "step": 195773
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2387354373931885,
      "learning_rate": 3.282546207416424e-05,
      "loss": 2.8977,
      "step": 195774
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.511626958847046,
      "learning_rate": 3.2823601616754116e-05,
      "loss": 2.8812,
      "step": 195775
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.462895631790161,
      "learning_rate": 3.282174120901715e-05,
      "loss": 2.9661,
      "step": 195776
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5219247341156006,
      "learning_rate": 3.2819880850953686e-05,
      "loss": 2.6986,
      "step": 195777
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.630143642425537,
      "learning_rate": 3.281802054256402e-05,
      "loss": 2.8667,
      "step": 195778
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.707616090774536,
      "learning_rate": 3.281616028384848e-05,
      "loss": 2.8669,
      "step": 195779
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3489675521850586,
      "learning_rate": 3.281430007480754e-05,
      "loss": 2.917,
      "step": 195780
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6370856761932373,
      "learning_rate": 3.2812439915441366e-05,
      "loss": 2.9572,
      "step": 195781
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7329235076904297,
      "learning_rate": 3.281057980575049e-05,
      "loss": 2.9045,
      "step": 195782
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5517561435699463,
      "learning_rate": 3.280871974573518e-05,
      "loss": 2.8414,
      "step": 195783
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6341233253479004,
      "learning_rate": 3.280685973539576e-05,
      "loss": 2.7364,
      "step": 195784
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.479907751083374,
      "learning_rate": 3.2804999774732545e-05,
      "loss": 2.926,
      "step": 195785
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.118607759475708,
      "learning_rate": 3.2803139863745955e-05,
      "loss": 3.0917,
      "step": 195786
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.4193899631500244,
      "learning_rate": 3.2801280002436256e-05,
      "loss": 3.0276,
      "step": 195787
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.239879608154297,
      "learning_rate": 3.279942019080393e-05,
      "loss": 2.8897,
      "step": 195788
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7665557861328125,
      "learning_rate": 3.2797560428849125e-05,
      "loss": 2.9519,
      "step": 195789
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.954193115234375,
      "learning_rate": 3.279570071657245e-05,
      "loss": 3.1521,
      "step": 195790
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.4262917041778564,
      "learning_rate": 3.2793841053973966e-05,
      "loss": 2.7916,
      "step": 195791
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8129143714904785,
      "learning_rate": 3.279198144105422e-05,
      "loss": 3.1253,
      "step": 195792
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9119009971618652,
      "learning_rate": 3.279012187781343e-05,
      "loss": 2.9878,
      "step": 195793
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.766585111618042,
      "learning_rate": 3.278826236425206e-05,
      "loss": 2.9525,
      "step": 195794
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.662442445755005,
      "learning_rate": 3.278640290037032e-05,
      "loss": 2.7582,
      "step": 195795
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2260022163391113,
      "learning_rate": 3.278454348616878e-05,
      "loss": 2.9425,
      "step": 195796
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.689429998397827,
      "learning_rate": 3.2782684121647494e-05,
      "loss": 3.3074,
      "step": 195797
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.63195276260376,
      "learning_rate": 3.2780824806807e-05,
      "loss": 2.8699,
      "step": 195798
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.945019006729126,
      "learning_rate": 3.2778965541647575e-05,
      "loss": 2.8393,
      "step": 195799
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6952953338623047,
      "learning_rate": 3.27771063261696e-05,
      "loss": 3.0038,
      "step": 195800
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.949688673019409,
      "learning_rate": 3.277524716037339e-05,
      "loss": 2.9762,
      "step": 195801
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.136256456375122,
      "learning_rate": 3.27733880442594e-05,
      "loss": 2.6736,
      "step": 195802
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6516342163085938,
      "learning_rate": 3.277152897782776e-05,
      "loss": 2.8166,
      "step": 195803
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8949146270751953,
      "learning_rate": 3.2769669961078996e-05,
      "loss": 3.0545,
      "step": 195804
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0368552207946777,
      "learning_rate": 3.2767810994013346e-05,
      "loss": 2.8506,
      "step": 195805
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.4392731189727783,
      "learning_rate": 3.2765952076631253e-05,
      "loss": 2.8459,
      "step": 195806
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0636444091796875,
      "learning_rate": 3.276409320893295e-05,
      "loss": 2.956,
      "step": 195807
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0264785289764404,
      "learning_rate": 3.2762234390918976e-05,
      "loss": 3.1005,
      "step": 195808
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0362906455993652,
      "learning_rate": 3.276037562258942e-05,
      "loss": 2.7353,
      "step": 195809
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.543524980545044,
      "learning_rate": 3.275851690394482e-05,
      "loss": 3.1697,
      "step": 195810
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7717344760894775,
      "learning_rate": 3.2756658234985414e-05,
      "loss": 2.9888,
      "step": 195811
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6151363849639893,
      "learning_rate": 3.275479961571162e-05,
      "loss": 3.0528,
      "step": 195812
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.897231101989746,
      "learning_rate": 3.2752941046123724e-05,
      "loss": 2.9538,
      "step": 195813
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6844253540039062,
      "learning_rate": 3.275108252622218e-05,
      "loss": 3.0256,
      "step": 195814
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.455612897872925,
      "learning_rate": 3.274922405600716e-05,
      "loss": 3.0864,
      "step": 195815
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8422274589538574,
      "learning_rate": 3.274736563547916e-05,
      "loss": 2.8018,
      "step": 195816
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1829726696014404,
      "learning_rate": 3.2745507264638415e-05,
      "loss": 2.7707,
      "step": 195817
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.065218210220337,
      "learning_rate": 3.274364894348539e-05,
      "loss": 2.8724,
      "step": 195818
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.839154005050659,
      "learning_rate": 3.274179067202032e-05,
      "loss": 2.9509,
      "step": 195819
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.78507137298584,
      "learning_rate": 3.2739932450243665e-05,
      "loss": 3.1817,
      "step": 195820
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.529139757156372,
      "learning_rate": 3.273807427815564e-05,
      "loss": 2.9074,
      "step": 195821
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.909325122833252,
      "learning_rate": 3.273621615575666e-05,
      "loss": 2.9362,
      "step": 195822
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0776097774505615,
      "learning_rate": 3.273435808304703e-05,
      "loss": 2.7956,
      "step": 195823
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6943349838256836,
      "learning_rate": 3.273250006002719e-05,
      "loss": 3.1977,
      "step": 195824
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5480189323425293,
      "learning_rate": 3.2730642086697335e-05,
      "loss": 2.5566,
      "step": 195825
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.159236192703247,
      "learning_rate": 3.2728784163058064e-05,
      "loss": 2.7863,
      "step": 195826
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.8329179286956787,
      "learning_rate": 3.272692628910941e-05,
      "loss": 2.8537,
      "step": 195827
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2109203338623047,
      "learning_rate": 3.2725068464851914e-05,
      "loss": 3.0193,
      "step": 195828
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.795928001403809,
      "learning_rate": 3.272321069028583e-05,
      "loss": 2.9917,
      "step": 195829
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7117226123809814,
      "learning_rate": 3.27213529654116e-05,
      "loss": 2.8307,
      "step": 195830
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7589528560638428,
      "learning_rate": 3.2719495290229446e-05,
      "loss": 2.9497,
      "step": 195831
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.927048683166504,
      "learning_rate": 3.2717637664739945e-05,
      "loss": 2.7872,
      "step": 195832
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.12854266166687,
      "learning_rate": 3.2715780088943136e-05,
      "loss": 2.7288,
      "step": 195833
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.87052321434021,
      "learning_rate": 3.2713922562839535e-05,
      "loss": 2.8801,
      "step": 195834
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8888742923736572,
      "learning_rate": 3.271206508642945e-05,
      "loss": 2.8298,
      "step": 195835
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.4007480144500732,
      "learning_rate": 3.271020765971328e-05,
      "loss": 3.0791,
      "step": 195836
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.73471736907959,
      "learning_rate": 3.2708350282691266e-05,
      "loss": 3.0946,
      "step": 195837
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8446109294891357,
      "learning_rate": 3.270649295536386e-05,
      "loss": 3.1332,
      "step": 195838
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7589309215545654,
      "learning_rate": 3.270463567773137e-05,
      "loss": 2.9401,
      "step": 195839
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8782477378845215,
      "learning_rate": 3.270277844979417e-05,
      "loss": 2.6594,
      "step": 195840
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8056693077087402,
      "learning_rate": 3.270092127155247e-05,
      "loss": 3.0896,
      "step": 195841
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.434556722640991,
      "learning_rate": 3.269906414300679e-05,
      "loss": 2.9584,
      "step": 195842
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.008527994155884,
      "learning_rate": 3.2697207064157327e-05,
      "loss": 2.9981,
      "step": 195843
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6427483558654785,
      "learning_rate": 3.2695350035004574e-05,
      "loss": 2.9836,
      "step": 195844
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6211113929748535,
      "learning_rate": 3.26934930555488e-05,
      "loss": 2.9367,
      "step": 195845
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.168429374694824,
      "learning_rate": 3.2691636125790345e-05,
      "loss": 2.9039,
      "step": 195846
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1224567890167236,
      "learning_rate": 3.26897792457295e-05,
      "loss": 2.9595,
      "step": 195847
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.107046127319336,
      "learning_rate": 3.268792241536673e-05,
      "loss": 2.9904,
      "step": 195848
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.789170503616333,
      "learning_rate": 3.268606563470224e-05,
      "loss": 2.9036,
      "step": 195849
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2978992462158203,
      "learning_rate": 3.268420890373656e-05,
      "loss": 3.1239,
      "step": 195850
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.659385919570923,
      "learning_rate": 3.2682352222469896e-05,
      "loss": 3.077,
      "step": 195851
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.774047613143921,
      "learning_rate": 3.268049559090261e-05,
      "loss": 2.8277,
      "step": 195852
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.629509925842285,
      "learning_rate": 3.26786390090351e-05,
      "loss": 2.9725,
      "step": 195853
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1035850048065186,
      "learning_rate": 3.2676782476867666e-05,
      "loss": 2.7674,
      "step": 195854
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.093334197998047,
      "learning_rate": 3.2674925994400616e-05,
      "loss": 2.67,
      "step": 195855
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1449642181396484,
      "learning_rate": 3.2673069561634404e-05,
      "loss": 3.0662,
      "step": 195856
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.675783395767212,
      "learning_rate": 3.26712131785693e-05,
      "loss": 2.8603,
      "step": 195857
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.302158355712891,
      "learning_rate": 3.266935684520561e-05,
      "loss": 2.8547,
      "step": 195858
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.627674579620361,
      "learning_rate": 3.2667500561543825e-05,
      "loss": 2.9953,
      "step": 195859
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6355643272399902,
      "learning_rate": 3.266564432758415e-05,
      "loss": 3.121,
      "step": 195860
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5160980224609375,
      "learning_rate": 3.266378814332695e-05,
      "loss": 2.7595,
      "step": 195861
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.408304214477539,
      "learning_rate": 3.266193200877266e-05,
      "loss": 2.976,
      "step": 195862
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5490963459014893,
      "learning_rate": 3.2660075923921544e-05,
      "loss": 2.7644,
      "step": 195863
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.832807779312134,
      "learning_rate": 3.265821988877391e-05,
      "loss": 2.7045,
      "step": 195864
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.337179183959961,
      "learning_rate": 3.265636390333024e-05,
      "loss": 2.852,
      "step": 195865
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2833480834960938,
      "learning_rate": 3.2654507967590716e-05,
      "loss": 2.7021,
      "step": 195866
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1438379287719727,
      "learning_rate": 3.265265208155583e-05,
      "loss": 2.904,
      "step": 195867
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.4438095092773438,
      "learning_rate": 3.2650796245225855e-05,
      "loss": 2.8355,
      "step": 195868
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.413733959197998,
      "learning_rate": 3.264894045860115e-05,
      "loss": 2.9412,
      "step": 195869
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.548987865447998,
      "learning_rate": 3.2647084721682016e-05,
      "loss": 2.9931,
      "step": 195870
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2332539558410645,
      "learning_rate": 3.264522903446886e-05,
      "loss": 2.7867,
      "step": 195871
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.904693126678467,
      "learning_rate": 3.264337339696197e-05,
      "loss": 3.0383,
      "step": 195872
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5670318603515625,
      "learning_rate": 3.2641517809161754e-05,
      "loss": 2.9884,
      "step": 195873
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.163743495941162,
      "learning_rate": 3.263966227106848e-05,
      "loss": 2.8425,
      "step": 195874
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.7529244422912598,
      "learning_rate": 3.263780678268267e-05,
      "loss": 2.8346,
      "step": 195875
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6613333225250244,
      "learning_rate": 3.263595134400441e-05,
      "loss": 3.1365,
      "step": 195876
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.662872076034546,
      "learning_rate": 3.263409595503424e-05,
      "loss": 3.0773,
      "step": 195877
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.902628183364868,
      "learning_rate": 3.2632240615772385e-05,
      "loss": 3.1449,
      "step": 195878
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.528651714324951,
      "learning_rate": 3.26303853262193e-05,
      "loss": 3.0511,
      "step": 195879
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.865968942642212,
      "learning_rate": 3.262853008637521e-05,
      "loss": 2.8683,
      "step": 195880
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.424804210662842,
      "learning_rate": 3.2626674896240635e-05,
      "loss": 2.9734,
      "step": 195881
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.906034469604492,
      "learning_rate": 3.262481975581569e-05,
      "loss": 3.094,
      "step": 195882
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0027711391448975,
      "learning_rate": 3.262296466510091e-05,
      "loss": 3.0204,
      "step": 195883
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.412997245788574,
      "learning_rate": 3.2621109624096474e-05,
      "loss": 2.8659,
      "step": 195884
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.9884936809539795,
      "learning_rate": 3.261925463280291e-05,
      "loss": 3.2474,
      "step": 195885
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7776525020599365,
      "learning_rate": 3.261739969122044e-05,
      "loss": 2.9044,
      "step": 195886
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8987433910369873,
      "learning_rate": 3.2615544799349505e-05,
      "loss": 3.1272,
      "step": 195887
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.793987274169922,
      "learning_rate": 3.2613689957190306e-05,
      "loss": 2.955,
      "step": 195888
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6583328247070312,
      "learning_rate": 3.261183516474331e-05,
      "loss": 2.9598,
      "step": 195889
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9616498947143555,
      "learning_rate": 3.2609980422008775e-05,
      "loss": 3.1373,
      "step": 195890
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.854487180709839,
      "learning_rate": 3.260812572898714e-05,
      "loss": 2.879,
      "step": 195891
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8762521743774414,
      "learning_rate": 3.260627108567865e-05,
      "loss": 3.2199,
      "step": 195892
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.4859299659729004,
      "learning_rate": 3.260441649208382e-05,
      "loss": 3.2459,
      "step": 195893
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.440509796142578,
      "learning_rate": 3.260256194820276e-05,
      "loss": 2.637,
      "step": 195894
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.9956278800964355,
      "learning_rate": 3.260070745403599e-05,
      "loss": 2.9626,
      "step": 195895
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.076239109039307,
      "learning_rate": 3.2598853009583726e-05,
      "loss": 3.0211,
      "step": 195896
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.19889497756958,
      "learning_rate": 3.259699861484646e-05,
      "loss": 3.0046,
      "step": 195897
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7747673988342285,
      "learning_rate": 3.2595144269824395e-05,
      "loss": 3.1683,
      "step": 195898
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.356770038604736,
      "learning_rate": 3.2593289974518055e-05,
      "loss": 3.0671,
      "step": 195899
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.500850677490234,
      "learning_rate": 3.259143572892755e-05,
      "loss": 3.004,
      "step": 195900
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.578875541687012,
      "learning_rate": 3.258958153305341e-05,
      "loss": 2.9648,
      "step": 195901
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.742137908935547,
      "learning_rate": 3.258772738689583e-05,
      "loss": 2.9947,
      "step": 195902
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.788726329803467,
      "learning_rate": 3.2585873290455325e-05,
      "loss": 3.2542,
      "step": 195903
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9740235805511475,
      "learning_rate": 3.258401924373211e-05,
      "loss": 2.8528,
      "step": 195904
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6243655681610107,
      "learning_rate": 3.2582165246726656e-05,
      "loss": 2.847,
      "step": 195905
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9559051990509033,
      "learning_rate": 3.258031129943913e-05,
      "loss": 2.8577,
      "step": 195906
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9326558113098145,
      "learning_rate": 3.257845740187001e-05,
      "loss": 2.6259,
      "step": 195907
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.8207287788391113,
      "learning_rate": 3.2576603554019576e-05,
      "loss": 2.8716,
      "step": 195908
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7971911430358887,
      "learning_rate": 3.2574749755888244e-05,
      "loss": 3.0679,
      "step": 195909
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7498228549957275,
      "learning_rate": 3.2572896007476235e-05,
      "loss": 2.9828,
      "step": 195910
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.760324239730835,
      "learning_rate": 3.257104230878406e-05,
      "loss": 2.7715,
      "step": 195911
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.309171438217163,
      "learning_rate": 3.256918865981194e-05,
      "loss": 2.935,
      "step": 195912
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.9705612659454346,
      "learning_rate": 3.2567335060560286e-05,
      "loss": 2.8349,
      "step": 195913
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9629690647125244,
      "learning_rate": 3.2565481511029356e-05,
      "loss": 2.7721,
      "step": 195914
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0932159423828125,
      "learning_rate": 3.2563628011219586e-05,
      "loss": 2.9253,
      "step": 195915
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.831590175628662,
      "learning_rate": 3.2561774561131246e-05,
      "loss": 2.9327,
      "step": 195916
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3007800579071045,
      "learning_rate": 3.255992116076477e-05,
      "loss": 2.8147,
      "step": 195917
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5872573852539062,
      "learning_rate": 3.2558067810120484e-05,
      "loss": 2.9834,
      "step": 195918
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.864384412765503,
      "learning_rate": 3.255621450919865e-05,
      "loss": 2.9391,
      "step": 195919
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.52949857711792,
      "learning_rate": 3.255436125799965e-05,
      "loss": 2.7883,
      "step": 195920
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.543379545211792,
      "learning_rate": 3.2552508056523876e-05,
      "loss": 2.9731,
      "step": 195921
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6817784309387207,
      "learning_rate": 3.255065490477159e-05,
      "loss": 2.6707,
      "step": 195922
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.972930908203125,
      "learning_rate": 3.254880180274323e-05,
      "loss": 3.1452,
      "step": 195923
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5814764499664307,
      "learning_rate": 3.254694875043913e-05,
      "loss": 3.0369,
      "step": 195924
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1911869049072266,
      "learning_rate": 3.254509574785955e-05,
      "loss": 2.9899,
      "step": 195925
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8912885189056396,
      "learning_rate": 3.254324279500486e-05,
      "loss": 3.0465,
      "step": 195926
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1176164150238037,
      "learning_rate": 3.2541389891875505e-05,
      "loss": 3.0104,
      "step": 195927
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8570849895477295,
      "learning_rate": 3.2539537038471664e-05,
      "loss": 2.8725,
      "step": 195928
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8896899223327637,
      "learning_rate": 3.253768423479385e-05,
      "loss": 2.9105,
      "step": 195929
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.79642391204834,
      "learning_rate": 3.253583148084232e-05,
      "loss": 2.9213,
      "step": 195930
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.208266496658325,
      "learning_rate": 3.253397877661742e-05,
      "loss": 2.9961,
      "step": 195931
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5857393741607666,
      "learning_rate": 3.253212612211944e-05,
      "loss": 2.8546,
      "step": 195932
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9012057781219482,
      "learning_rate": 3.253027351734888e-05,
      "loss": 2.8626,
      "step": 195933
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0641119480133057,
      "learning_rate": 3.252842096230591e-05,
      "loss": 3.0176,
      "step": 195934
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.560511589050293,
      "learning_rate": 3.2526568456991e-05,
      "loss": 2.5374,
      "step": 195935
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.745044708251953,
      "learning_rate": 3.252471600140447e-05,
      "loss": 2.887,
      "step": 195936
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.705927610397339,
      "learning_rate": 3.252286359554657e-05,
      "loss": 2.9346,
      "step": 195937
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.5271999835968018,
      "learning_rate": 3.2521011239417785e-05,
      "loss": 3.0542,
      "step": 195938
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9415621757507324,
      "learning_rate": 3.251915893301838e-05,
      "loss": 3.1348,
      "step": 195939
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.561135768890381,
      "learning_rate": 3.251730667634868e-05,
      "loss": 2.8591,
      "step": 195940
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0058743953704834,
      "learning_rate": 3.251545446940912e-05,
      "loss": 3.0819,
      "step": 195941
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.808746814727783,
      "learning_rate": 3.2513602312199985e-05,
      "loss": 2.9351,
      "step": 195942
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7103004455566406,
      "learning_rate": 3.2511750204721534e-05,
      "loss": 2.895,
      "step": 195943
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9569365978240967,
      "learning_rate": 3.250989814697427e-05,
      "loss": 3.2226,
      "step": 195944
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.28824520111084,
      "learning_rate": 3.2508046138958486e-05,
      "loss": 2.8318,
      "step": 195945
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.535629987716675,
      "learning_rate": 3.2506194180674426e-05,
      "loss": 2.9644,
      "step": 195946
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.972994804382324,
      "learning_rate": 3.250434227212255e-05,
      "loss": 2.949,
      "step": 195947
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.37553334236145,
      "learning_rate": 3.2502490413303196e-05,
      "loss": 2.9983,
      "step": 195948
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.200024366378784,
      "learning_rate": 3.2500638604216624e-05,
      "loss": 3.1118,
      "step": 195949
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.754639148712158,
      "learning_rate": 3.24987868448633e-05,
      "loss": 2.8136,
      "step": 195950
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.102534532546997,
      "learning_rate": 3.2496935135243426e-05,
      "loss": 2.8418,
      "step": 195951
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6344823837280273,
      "learning_rate": 3.249508347535747e-05,
      "loss": 2.7258,
      "step": 195952
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.954831600189209,
      "learning_rate": 3.2493231865205735e-05,
      "loss": 2.7431,
      "step": 195953
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.800194501876831,
      "learning_rate": 3.249138030478855e-05,
      "loss": 2.8658,
      "step": 195954
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.430467128753662,
      "learning_rate": 3.248952879410625e-05,
      "loss": 2.7799,
      "step": 195955
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2401530742645264,
      "learning_rate": 3.248767733315919e-05,
      "loss": 2.8473,
      "step": 195956
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7327895164489746,
      "learning_rate": 3.2485825921947714e-05,
      "loss": 2.8963,
      "step": 195957
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.121635913848877,
      "learning_rate": 3.248397456047219e-05,
      "loss": 2.8849,
      "step": 195958
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.989124059677124,
      "learning_rate": 3.2482123248732914e-05,
      "loss": 2.7456,
      "step": 195959
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.5797834396362305,
      "learning_rate": 3.248027198673039e-05,
      "loss": 3.0928,
      "step": 195960
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.589793682098389,
      "learning_rate": 3.2478420774464706e-05,
      "loss": 2.797,
      "step": 195961
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.8717825412750244,
      "learning_rate": 3.247656961193637e-05,
      "loss": 2.7052,
      "step": 195962
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8210744857788086,
      "learning_rate": 3.247471849914566e-05,
      "loss": 3.0856,
      "step": 195963
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.815826654434204,
      "learning_rate": 3.2472867436092983e-05,
      "loss": 2.9601,
      "step": 195964
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.4938619136810303,
      "learning_rate": 3.24710164227786e-05,
      "loss": 2.7959,
      "step": 195965
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.4671237468719482,
      "learning_rate": 3.246916545920302e-05,
      "loss": 3.0119,
      "step": 195966
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8919739723205566,
      "learning_rate": 3.246731454536638e-05,
      "loss": 3.2642,
      "step": 195967
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.233518600463867,
      "learning_rate": 3.246546368126913e-05,
      "loss": 2.8599,
      "step": 195968
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.6216936111450195,
      "learning_rate": 3.2463612866911596e-05,
      "loss": 2.8915,
      "step": 195969
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.638311386108398,
      "learning_rate": 3.2461762102294134e-05,
      "loss": 3.0428,
      "step": 195970
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.906457424163818,
      "learning_rate": 3.245991138741705e-05,
      "loss": 3.0176,
      "step": 195971
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.5191261768341064,
      "learning_rate": 3.245806072228084e-05,
      "loss": 2.8969,
      "step": 195972
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.841235637664795,
      "learning_rate": 3.245621010688558e-05,
      "loss": 2.8529,
      "step": 195973
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3159661293029785,
      "learning_rate": 3.245435954123187e-05,
      "loss": 2.9356,
      "step": 195974
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.2160656452178955,
      "learning_rate": 3.245250902531983e-05,
      "loss": 3.2066,
      "step": 195975
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6724584102630615,
      "learning_rate": 3.245065855915e-05,
      "loss": 3.0352,
      "step": 195976
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.5536696910858154,
      "learning_rate": 3.244880814272258e-05,
      "loss": 2.7745,
      "step": 195977
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.4105987548828125,
      "learning_rate": 3.244695777603807e-05,
      "loss": 2.7442,
      "step": 195978
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.614577531814575,
      "learning_rate": 3.244510745909667e-05,
      "loss": 2.6487,
      "step": 195979
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.436461925506592,
      "learning_rate": 3.244325719189882e-05,
      "loss": 2.972,
      "step": 195980
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6422626972198486,
      "learning_rate": 3.244140697444474e-05,
      "loss": 3.0163,
      "step": 195981
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0907323360443115,
      "learning_rate": 3.24395568067349e-05,
      "loss": 2.7503,
      "step": 195982
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2911384105682373,
      "learning_rate": 3.2437706688769536e-05,
      "loss": 3.1586,
      "step": 195983
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.764124870300293,
      "learning_rate": 3.243585662054915e-05,
      "loss": 2.9736,
      "step": 195984
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0693233013153076,
      "learning_rate": 3.243400660207394e-05,
      "loss": 3.0061,
      "step": 195985
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.5892488956451416,
      "learning_rate": 3.243215663334433e-05,
      "loss": 3.0574,
      "step": 195986
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7486987113952637,
      "learning_rate": 3.243030671436057e-05,
      "loss": 2.7904,
      "step": 195987
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8958375453948975,
      "learning_rate": 3.242845684512311e-05,
      "loss": 3.1038,
      "step": 195988
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2696917057037354,
      "learning_rate": 3.2426607025632225e-05,
      "loss": 2.6859,
      "step": 195989
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9831786155700684,
      "learning_rate": 3.242475725588831e-05,
      "loss": 3.0997,
      "step": 195990
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9462873935699463,
      "learning_rate": 3.242290753589171e-05,
      "loss": 3.0274,
      "step": 195991
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3705928325653076,
      "learning_rate": 3.242105786564271e-05,
      "loss": 3.1338,
      "step": 195992
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7790985107421875,
      "learning_rate": 3.2419208245141646e-05,
      "loss": 2.9169,
      "step": 195993
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7958967685699463,
      "learning_rate": 3.241735867438896e-05,
      "loss": 3.0686,
      "step": 195994
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0368473529815674,
      "learning_rate": 3.241550915338488e-05,
      "loss": 2.9901,
      "step": 195995
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.53175687789917,
      "learning_rate": 3.241365968212987e-05,
      "loss": 3.0222,
      "step": 195996
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.563384056091309,
      "learning_rate": 3.2411810260624226e-05,
      "loss": 2.981,
      "step": 195997
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6463584899902344,
      "learning_rate": 3.240996088886826e-05,
      "loss": 3.2201,
      "step": 195998
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6847550868988037,
      "learning_rate": 3.240811156686226e-05,
      "loss": 2.9478,
      "step": 195999
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.4738001823425293,
      "learning_rate": 3.240626229460673e-05,
      "loss": 2.9789,
      "step": 196000
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.605036735534668,
      "learning_rate": 3.240441307210184e-05,
      "loss": 2.8981,
      "step": 196001
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.051725149154663,
      "learning_rate": 3.2402563899348087e-05,
      "loss": 2.7426,
      "step": 196002
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6747634410858154,
      "learning_rate": 3.240071477634577e-05,
      "loss": 2.976,
      "step": 196003
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8829212188720703,
      "learning_rate": 3.239886570309519e-05,
      "loss": 2.8176,
      "step": 196004
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6271770000457764,
      "learning_rate": 3.239701667959668e-05,
      "loss": 2.7733,
      "step": 196005
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.72129225730896,
      "learning_rate": 3.2395167705850635e-05,
      "loss": 2.9046,
      "step": 196006
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.6594889163970947,
      "learning_rate": 3.2393318781857356e-05,
      "loss": 3.0975,
      "step": 196007
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.797245502471924,
      "learning_rate": 3.239146990761725e-05,
      "loss": 3.0219,
      "step": 196008
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.870353937149048,
      "learning_rate": 3.2389621083130615e-05,
      "loss": 3.0686,
      "step": 196009
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.660839796066284,
      "learning_rate": 3.238777230839781e-05,
      "loss": 2.9644,
      "step": 196010
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.695019245147705,
      "learning_rate": 3.238592358341914e-05,
      "loss": 2.7948,
      "step": 196011
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.776862144470215,
      "learning_rate": 3.2384074908195004e-05,
      "loss": 2.8005,
      "step": 196012
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5981223583221436,
      "learning_rate": 3.238222628272567e-05,
      "loss": 2.9883,
      "step": 196013
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.706498384475708,
      "learning_rate": 3.238037770701156e-05,
      "loss": 2.6531,
      "step": 196014
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.029477834701538,
      "learning_rate": 3.237852918105302e-05,
      "loss": 3.0071,
      "step": 196015
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1042075157165527,
      "learning_rate": 3.2376680704850386e-05,
      "loss": 2.8624,
      "step": 196016
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.308154344558716,
      "learning_rate": 3.237483227840388e-05,
      "loss": 2.9491,
      "step": 196017
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8660340309143066,
      "learning_rate": 3.237298390171401e-05,
      "loss": 2.9459,
      "step": 196018
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.214566707611084,
      "learning_rate": 3.2371135574781e-05,
      "loss": 2.9921,
      "step": 196019
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.922476291656494,
      "learning_rate": 3.236928729760533e-05,
      "loss": 3.0122,
      "step": 196020
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1444175243377686,
      "learning_rate": 3.236743907018725e-05,
      "loss": 2.9763,
      "step": 196021
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.227107286453247,
      "learning_rate": 3.236559089252704e-05,
      "loss": 2.9723,
      "step": 196022
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8598883152008057,
      "learning_rate": 3.236374276462519e-05,
      "loss": 2.9506,
      "step": 196023
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.66548228263855,
      "learning_rate": 3.236189468648197e-05,
      "loss": 2.8868,
      "step": 196024
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.119340658187866,
      "learning_rate": 3.236004665809765e-05,
      "loss": 3.0258,
      "step": 196025
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9287614822387695,
      "learning_rate": 3.2358198679472726e-05,
      "loss": 3.003,
      "step": 196026
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7184324264526367,
      "learning_rate": 3.2356350750607466e-05,
      "loss": 2.6335,
      "step": 196027
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.795502185821533,
      "learning_rate": 3.2354502871502166e-05,
      "loss": 3.0553,
      "step": 196028
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.87029767036438,
      "learning_rate": 3.2352655042157226e-05,
      "loss": 3.0742,
      "step": 196029
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.563159227371216,
      "learning_rate": 3.235080726257302e-05,
      "loss": 3.0008,
      "step": 196030
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1903493404388428,
      "learning_rate": 3.234895953274981e-05,
      "loss": 2.974,
      "step": 196031
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2441577911376953,
      "learning_rate": 3.234711185268799e-05,
      "loss": 2.9566,
      "step": 196032
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.8009488582611084,
      "learning_rate": 3.2345264222387936e-05,
      "loss": 2.9249,
      "step": 196033
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8964951038360596,
      "learning_rate": 3.234341664184987e-05,
      "loss": 2.8651,
      "step": 196034
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8418149948120117,
      "learning_rate": 3.2341569111074274e-05,
      "loss": 3.204,
      "step": 196035
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.5109097957611084,
      "learning_rate": 3.233972163006137e-05,
      "loss": 3.1148,
      "step": 196036
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.685265064239502,
      "learning_rate": 3.233787419881162e-05,
      "loss": 2.8771,
      "step": 196037
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.72206711769104,
      "learning_rate": 3.2336026817325335e-05,
      "loss": 2.6215,
      "step": 196038
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2268128395080566,
      "learning_rate": 3.2334179485602805e-05,
      "loss": 3.1719,
      "step": 196039
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9811670780181885,
      "learning_rate": 3.2332332203644376e-05,
      "loss": 2.829,
      "step": 196040
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2308709621429443,
      "learning_rate": 3.233048497145047e-05,
      "loss": 3.2433,
      "step": 196041
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8011655807495117,
      "learning_rate": 3.2328637789021316e-05,
      "loss": 2.9294,
      "step": 196042
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9882099628448486,
      "learning_rate": 3.232679065635736e-05,
      "loss": 2.996,
      "step": 196043
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.218714475631714,
      "learning_rate": 3.232494357345893e-05,
      "loss": 2.9928,
      "step": 196044
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2467710971832275,
      "learning_rate": 3.2323096540326286e-05,
      "loss": 2.8196,
      "step": 196045
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.512887716293335,
      "learning_rate": 3.2321249556959906e-05,
      "loss": 2.9021,
      "step": 196046
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.689305305480957,
      "learning_rate": 3.231940262336001e-05,
      "loss": 2.8544,
      "step": 196047
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.599303722381592,
      "learning_rate": 3.231755573952698e-05,
      "loss": 3.1823,
      "step": 196048
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0147793292999268,
      "learning_rate": 3.23157089054612e-05,
      "loss": 2.8298,
      "step": 196049
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9899210929870605,
      "learning_rate": 3.231386212116291e-05,
      "loss": 2.7551,
      "step": 196050
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2153396606445312,
      "learning_rate": 3.231201538663262e-05,
      "loss": 3.0541,
      "step": 196051
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6293492317199707,
      "learning_rate": 3.2310168701870575e-05,
      "loss": 2.827,
      "step": 196052
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.6129257678985596,
      "learning_rate": 3.2308322066877124e-05,
      "loss": 2.8758,
      "step": 196053
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.803973913192749,
      "learning_rate": 3.230647548165253e-05,
      "loss": 2.8881,
      "step": 196054
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.585481882095337,
      "learning_rate": 3.230462894619729e-05,
      "loss": 2.9705,
      "step": 196055
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.799474000930786,
      "learning_rate": 3.23027824605116e-05,
      "loss": 2.9611,
      "step": 196056
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.6863107681274414,
      "learning_rate": 3.230093602459594e-05,
      "loss": 2.8532,
      "step": 196057
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.979829788208008,
      "learning_rate": 3.229908963845059e-05,
      "loss": 2.7471,
      "step": 196058
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8160250186920166,
      "learning_rate": 3.22972433020759e-05,
      "loss": 3.0124,
      "step": 196059
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.447544813156128,
      "learning_rate": 3.2295397015472135e-05,
      "loss": 3.1806,
      "step": 196060
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.063471555709839,
      "learning_rate": 3.2293550778639755e-05,
      "loss": 3.1575,
      "step": 196061
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.153836488723755,
      "learning_rate": 3.2291704591579024e-05,
      "loss": 2.9862,
      "step": 196062
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0158722400665283,
      "learning_rate": 3.228985845429038e-05,
      "loss": 3.0964,
      "step": 196063
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.961942195892334,
      "learning_rate": 3.2288012366774095e-05,
      "loss": 2.9512,
      "step": 196064
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3032968044281006,
      "learning_rate": 3.228616632903053e-05,
      "loss": 3.0306,
      "step": 196065
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.589675426483154,
      "learning_rate": 3.2284320341059944e-05,
      "loss": 2.8383,
      "step": 196066
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7564752101898193,
      "learning_rate": 3.228247440286281e-05,
      "loss": 2.9712,
      "step": 196067
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.450136184692383,
      "learning_rate": 3.2280628514439396e-05,
      "loss": 2.8758,
      "step": 196068
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7860782146453857,
      "learning_rate": 3.22787826757901e-05,
      "loss": 2.9285,
      "step": 196069
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.303028106689453,
      "learning_rate": 3.227693688691523e-05,
      "loss": 2.9201,
      "step": 196070
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.105497360229492,
      "learning_rate": 3.227509114781514e-05,
      "loss": 3.1757,
      "step": 196071
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.595296859741211,
      "learning_rate": 3.2273245458490096e-05,
      "loss": 2.5754,
      "step": 196072
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8549141883850098,
      "learning_rate": 3.2271399818940566e-05,
      "loss": 2.8135,
      "step": 196073
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.705085039138794,
      "learning_rate": 3.226955422916679e-05,
      "loss": 3.17,
      "step": 196074
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.4005579948425293,
      "learning_rate": 3.2267708689169206e-05,
      "loss": 2.9337,
      "step": 196075
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.876996040344238,
      "learning_rate": 3.226586319894813e-05,
      "loss": 3.159,
      "step": 196076
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.290736198425293,
      "learning_rate": 3.226401775850387e-05,
      "loss": 2.8365,
      "step": 196077
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.4345288276672363,
      "learning_rate": 3.226217236783672e-05,
      "loss": 2.9013,
      "step": 196078
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.66450572013855,
      "learning_rate": 3.226032702694713e-05,
      "loss": 3.0874,
      "step": 196079
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0275027751922607,
      "learning_rate": 3.2258481735835354e-05,
      "loss": 3.0389,
      "step": 196080
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.390442371368408,
      "learning_rate": 3.225663649450185e-05,
      "loss": 2.8394,
      "step": 196081
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.117880344390869,
      "learning_rate": 3.22547913029469e-05,
      "loss": 2.5806,
      "step": 196082
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.931365489959717,
      "learning_rate": 3.22529461611708e-05,
      "loss": 2.8038,
      "step": 196083
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9310755729675293,
      "learning_rate": 3.225110106917391e-05,
      "loss": 2.6047,
      "step": 196084
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6642582416534424,
      "learning_rate": 3.2249256026956637e-05,
      "loss": 2.8225,
      "step": 196085
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.103505849838257,
      "learning_rate": 3.224741103451921e-05,
      "loss": 3.0181,
      "step": 196086
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.119304895401001,
      "learning_rate": 3.224556609186213e-05,
      "loss": 2.9764,
      "step": 196087
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.324183940887451,
      "learning_rate": 3.2243721198985625e-05,
      "loss": 2.8792,
      "step": 196088
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.44610333442688,
      "learning_rate": 3.22418763558901e-05,
      "loss": 3.0379,
      "step": 196089
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8753886222839355,
      "learning_rate": 3.22400315625758e-05,
      "loss": 2.9256,
      "step": 196090
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.310868263244629,
      "learning_rate": 3.2238186819043164e-05,
      "loss": 2.7664,
      "step": 196091
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9266738891601562,
      "learning_rate": 3.2236342125292446e-05,
      "loss": 3.0446,
      "step": 196092
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5169646739959717,
      "learning_rate": 3.223449748132414e-05,
      "loss": 3.0621,
      "step": 196093
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.862945556640625,
      "learning_rate": 3.223265288713844e-05,
      "loss": 3.0666,
      "step": 196094
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.7239434719085693,
      "learning_rate": 3.2230808342735794e-05,
      "loss": 2.9826,
      "step": 196095
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.086107015609741,
      "learning_rate": 3.2228963848116416e-05,
      "loss": 2.8758,
      "step": 196096
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8082528114318848,
      "learning_rate": 3.222711940328079e-05,
      "loss": 3.0497,
      "step": 196097
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9127964973449707,
      "learning_rate": 3.222527500822913e-05,
      "loss": 2.9238,
      "step": 196098
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.4765465259552,
      "learning_rate": 3.222343066296192e-05,
      "loss": 2.8432,
      "step": 196099
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2386085987091064,
      "learning_rate": 3.222158636747942e-05,
      "loss": 3.0878,
      "step": 196100
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.762924909591675,
      "learning_rate": 3.2219742121782e-05,
      "loss": 2.8526,
      "step": 196101
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0025529861450195,
      "learning_rate": 3.2217897925869916e-05,
      "loss": 3.0188,
      "step": 196102
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9393560886383057,
      "learning_rate": 3.221605377974364e-05,
      "loss": 2.8044,
      "step": 196103
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9614834785461426,
      "learning_rate": 3.221420968340338e-05,
      "loss": 3.0046,
      "step": 196104
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.745302438735962,
      "learning_rate": 3.2212365636849624e-05,
      "loss": 3.0381,
      "step": 196105
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7219741344451904,
      "learning_rate": 3.221052164008264e-05,
      "loss": 2.9593,
      "step": 196106
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.133399486541748,
      "learning_rate": 3.220867769310277e-05,
      "loss": 2.8446,
      "step": 196107
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.821291208267212,
      "learning_rate": 3.22068337959103e-05,
      "loss": 3.0512,
      "step": 196108
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.988994598388672,
      "learning_rate": 3.220498994850571e-05,
      "loss": 2.9151,
      "step": 196109
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.5328755378723145,
      "learning_rate": 3.220314615088919e-05,
      "loss": 2.9236,
      "step": 196110
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5854201316833496,
      "learning_rate": 3.220130240306125e-05,
      "loss": 2.7964,
      "step": 196111
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.350881814956665,
      "learning_rate": 3.2199458705022074e-05,
      "loss": 2.8903,
      "step": 196112
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.5206315517425537,
      "learning_rate": 3.219761505677211e-05,
      "loss": 2.7331,
      "step": 196113
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3406012058258057,
      "learning_rate": 3.219577145831168e-05,
      "loss": 2.9651,
      "step": 196114
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1238155364990234,
      "learning_rate": 3.2193927909641094e-05,
      "loss": 2.9779,
      "step": 196115
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.901668071746826,
      "learning_rate": 3.2192084410760675e-05,
      "loss": 2.9269,
      "step": 196116
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0646255016326904,
      "learning_rate": 3.2190240961670866e-05,
      "loss": 2.7834,
      "step": 196117
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.181747913360596,
      "learning_rate": 3.218839756237186e-05,
      "loss": 2.8004,
      "step": 196118
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.233720541000366,
      "learning_rate": 3.218655421286416e-05,
      "loss": 2.8174,
      "step": 196119
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.911773443222046,
      "learning_rate": 3.218471091314806e-05,
      "loss": 2.9536,
      "step": 196120
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.738318920135498,
      "learning_rate": 3.21828676632238e-05,
      "loss": 2.9571,
      "step": 196121
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8635051250457764,
      "learning_rate": 3.218102446309184e-05,
      "loss": 2.9857,
      "step": 196122
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.76619553565979,
      "learning_rate": 3.217918131275249e-05,
      "loss": 2.8271,
      "step": 196123
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7207720279693604,
      "learning_rate": 3.217733821220604e-05,
      "loss": 2.7022,
      "step": 196124
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.515334129333496,
      "learning_rate": 3.2175495161452927e-05,
      "loss": 3.2185,
      "step": 196125
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9575178623199463,
      "learning_rate": 3.217365216049345e-05,
      "loss": 2.9711,
      "step": 196126
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8796403408050537,
      "learning_rate": 3.21718092093279e-05,
      "loss": 2.8092,
      "step": 196127
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6533491611480713,
      "learning_rate": 3.2169966307956696e-05,
      "loss": 2.9359,
      "step": 196128
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2364983558654785,
      "learning_rate": 3.216812345638016e-05,
      "loss": 3.0155,
      "step": 196129
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.621751546859741,
      "learning_rate": 3.2166280654598584e-05,
      "loss": 3.0512,
      "step": 196130
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3699278831481934,
      "learning_rate": 3.216443790261242e-05,
      "loss": 2.9106,
      "step": 196131
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.576180934906006,
      "learning_rate": 3.2162595200421916e-05,
      "loss": 3.0959,
      "step": 196132
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.374460220336914,
      "learning_rate": 3.216075254802738e-05,
      "loss": 3.0035,
      "step": 196133
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.383230686187744,
      "learning_rate": 3.215890994542928e-05,
      "loss": 2.6849,
      "step": 196134
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.901197671890259,
      "learning_rate": 3.2157067392627846e-05,
      "loss": 2.9229,
      "step": 196135
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.3754677772521973,
      "learning_rate": 3.215522488962351e-05,
      "loss": 2.9631,
      "step": 196136
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.4706335067749023,
      "learning_rate": 3.215338243641661e-05,
      "loss": 3.075,
      "step": 196137
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7967662811279297,
      "learning_rate": 3.2151540033007405e-05,
      "loss": 2.9174,
      "step": 196138
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.356548309326172,
      "learning_rate": 3.214969767939627e-05,
      "loss": 2.9554,
      "step": 196139
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6405982971191406,
      "learning_rate": 3.21478553755836e-05,
      "loss": 3.0053,
      "step": 196140
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9052319526672363,
      "learning_rate": 3.214601312156963e-05,
      "loss": 2.5984,
      "step": 196141
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.014298915863037,
      "learning_rate": 3.214417091735485e-05,
      "loss": 2.9308,
      "step": 196142
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.160264253616333,
      "learning_rate": 3.2142328762939505e-05,
      "loss": 2.9973,
      "step": 196143
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.702739953994751,
      "learning_rate": 3.214048665832399e-05,
      "loss": 2.9781,
      "step": 196144
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6855387687683105,
      "learning_rate": 3.213864460350851e-05,
      "loss": 2.9416,
      "step": 196145
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6853299140930176,
      "learning_rate": 3.2136802598493625e-05,
      "loss": 3.1175,
      "step": 196146
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.003887891769409,
      "learning_rate": 3.213496064327947e-05,
      "loss": 2.9863,
      "step": 196147
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.131412982940674,
      "learning_rate": 3.213311873786655e-05,
      "loss": 2.8971,
      "step": 196148
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.4223546981811523,
      "learning_rate": 3.213127688225512e-05,
      "loss": 2.9328,
      "step": 196149
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6073617935180664,
      "learning_rate": 3.2129435076445584e-05,
      "loss": 2.8408,
      "step": 196150
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.861528158187866,
      "learning_rate": 3.212759332043815e-05,
      "loss": 3.105,
      "step": 196151
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2502145767211914,
      "learning_rate": 3.212575161423331e-05,
      "loss": 3.0875,
      "step": 196152
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7458138465881348,
      "learning_rate": 3.212390995783133e-05,
      "loss": 3.2332,
      "step": 196153
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0238964557647705,
      "learning_rate": 3.212206835123258e-05,
      "loss": 2.6367,
      "step": 196154
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.130139112472534,
      "learning_rate": 3.212022679443742e-05,
      "loss": 3.1695,
      "step": 196155
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.69326114654541,
      "learning_rate": 3.211838528744616e-05,
      "loss": 2.5811,
      "step": 196156
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0223441123962402,
      "learning_rate": 3.211654383025909e-05,
      "loss": 2.9756,
      "step": 196157
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.4052865505218506,
      "learning_rate": 3.2114702422876684e-05,
      "loss": 2.7372,
      "step": 196158
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.4234869480133057,
      "learning_rate": 3.2112861065299134e-05,
      "loss": 2.8949,
      "step": 196159
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.567408323287964,
      "learning_rate": 3.2111019757526945e-05,
      "loss": 3.1412,
      "step": 196160
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.447014093399048,
      "learning_rate": 3.210917849956035e-05,
      "loss": 2.853,
      "step": 196161
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1692378520965576,
      "learning_rate": 3.2107337291399715e-05,
      "loss": 2.9313,
      "step": 196162
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0443453788757324,
      "learning_rate": 3.210549613304534e-05,
      "loss": 3.0704,
      "step": 196163
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.316040277481079,
      "learning_rate": 3.2103655024497656e-05,
      "loss": 2.9982,
      "step": 196164
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8086583614349365,
      "learning_rate": 3.2101813965756894e-05,
      "loss": 2.8404,
      "step": 196165
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1138522624969482,
      "learning_rate": 3.2099972956823526e-05,
      "loss": 3.1034,
      "step": 196166
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.4658851623535156,
      "learning_rate": 3.209813199769785e-05,
      "loss": 3.0812,
      "step": 196167
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.41180157661438,
      "learning_rate": 3.2096291088380165e-05,
      "loss": 2.919,
      "step": 196168
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6890616416931152,
      "learning_rate": 3.2094450228870775e-05,
      "loss": 2.868,
      "step": 196169
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.5351922512054443,
      "learning_rate": 3.209260941917017e-05,
      "loss": 2.8256,
      "step": 196170
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.900977373123169,
      "learning_rate": 3.209076865927852e-05,
      "loss": 2.8489,
      "step": 196171
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6250369548797607,
      "learning_rate": 3.208892794919633e-05,
      "loss": 3.057,
      "step": 196172
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6339452266693115,
      "learning_rate": 3.2087087288923796e-05,
      "loss": 2.9389,
      "step": 196173
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.441772937774658,
      "learning_rate": 3.208524667846145e-05,
      "loss": 2.9137,
      "step": 196174
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.255646228790283,
      "learning_rate": 3.2083406117809396e-05,
      "loss": 2.954,
      "step": 196175
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1325201988220215,
      "learning_rate": 3.208156560696816e-05,
      "loss": 2.6304,
      "step": 196176
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.770007848739624,
      "learning_rate": 3.207972514593795e-05,
      "loss": 3.1513,
      "step": 196177
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.581387758255005,
      "learning_rate": 3.207788473471923e-05,
      "loss": 3.0574,
      "step": 196178
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.011718988418579,
      "learning_rate": 3.207604437331226e-05,
      "loss": 3.1517,
      "step": 196179
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.575256586074829,
      "learning_rate": 3.207420406171748e-05,
      "loss": 3.361,
      "step": 196180
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.715623140335083,
      "learning_rate": 3.207236379993508e-05,
      "loss": 2.8475,
      "step": 196181
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2500414848327637,
      "learning_rate": 3.207052358796555e-05,
      "loss": 2.7074,
      "step": 196182
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9265620708465576,
      "learning_rate": 3.2068683425809094e-05,
      "loss": 2.8536,
      "step": 196183
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.009305953979492,
      "learning_rate": 3.206684331346619e-05,
      "loss": 2.8047,
      "step": 196184
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2034082412719727,
      "learning_rate": 3.206500325093705e-05,
      "loss": 2.567,
      "step": 196185
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.221235990524292,
      "learning_rate": 3.206316323822219e-05,
      "loss": 2.9702,
      "step": 196186
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.886188268661499,
      "learning_rate": 3.206132327532175e-05,
      "loss": 3.0976,
      "step": 196187
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6351656913757324,
      "learning_rate": 3.205948336223623e-05,
      "loss": 2.7266,
      "step": 196188
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.6102821826934814,
      "learning_rate": 3.2057643498965825e-05,
      "loss": 2.8027,
      "step": 196189
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7136027812957764,
      "learning_rate": 3.2055803685511036e-05,
      "loss": 2.8196,
      "step": 196190
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.873508930206299,
      "learning_rate": 3.20539639218721e-05,
      "loss": 2.9319,
      "step": 196191
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.4863781929016113,
      "learning_rate": 3.205212420804949e-05,
      "loss": 2.8192,
      "step": 196192
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.0490643978118896,
      "learning_rate": 3.205028454404329e-05,
      "loss": 2.9177,
      "step": 196193
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.269252061843872,
      "learning_rate": 3.204844492985411e-05,
      "loss": 2.9835,
      "step": 196194
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8703300952911377,
      "learning_rate": 3.204660536548211e-05,
      "loss": 2.9126,
      "step": 196195
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8327019214630127,
      "learning_rate": 3.204476585092773e-05,
      "loss": 2.8196,
      "step": 196196
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9382758140563965,
      "learning_rate": 3.204292638619127e-05,
      "loss": 2.7079,
      "step": 196197
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.5151758193969727,
      "learning_rate": 3.204108697127312e-05,
      "loss": 2.89,
      "step": 196198
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.884007215499878,
      "learning_rate": 3.2039247606173625e-05,
      "loss": 2.8704,
      "step": 196199
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.8428964614868164,
      "learning_rate": 3.203740829089308e-05,
      "loss": 3.057,
      "step": 196200
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7490439414978027,
      "learning_rate": 3.203556902543174e-05,
      "loss": 2.837,
      "step": 196201
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.7626380920410156,
      "learning_rate": 3.203372980979015e-05,
      "loss": 3.0447,
      "step": 196202
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.879211902618408,
      "learning_rate": 3.2031890643968447e-05,
      "loss": 2.9502,
      "step": 196203
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.400202751159668,
      "learning_rate": 3.203005152796716e-05,
      "loss": 2.8585,
      "step": 196204
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.2535901069641113,
      "learning_rate": 3.202821246178655e-05,
      "loss": 3.0452,
      "step": 196205
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.43717098236084,
      "learning_rate": 3.2026373445426954e-05,
      "loss": 2.8173,
      "step": 196206
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8327443599700928,
      "learning_rate": 3.202453447888864e-05,
      "loss": 2.8737,
      "step": 196207
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.9731457233428955,
      "learning_rate": 3.202269556217211e-05,
      "loss": 2.9472,
      "step": 196208
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.186829090118408,
      "learning_rate": 3.202085669527752e-05,
      "loss": 2.8891,
      "step": 196209
    },
    {
      "epoch": 2.55,
      "grad_norm": 4.015236854553223,
      "learning_rate": 3.201901787820538e-05,
      "loss": 3.0869,
      "step": 196210
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3247427940368652,
      "learning_rate": 3.201717911095598e-05,
      "loss": 2.7648,
      "step": 196211
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.146810531616211,
      "learning_rate": 3.20153403935296e-05,
      "loss": 2.9665,
      "step": 196212
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.225428819656372,
      "learning_rate": 3.201350172592666e-05,
      "loss": 2.9301,
      "step": 196213
    },
    {
      "epoch": 2.55,
      "grad_norm": 6.098557472229004,
      "learning_rate": 3.201166310814747e-05,
      "loss": 2.5407,
      "step": 196214
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.23388409614563,
      "learning_rate": 3.200982454019232e-05,
      "loss": 2.8662,
      "step": 196215
    },
    {
      "epoch": 2.55,
      "grad_norm": 5.365440368652344,
      "learning_rate": 3.200798602206165e-05,
      "loss": 3.0082,
      "step": 196216
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.89847469329834,
      "learning_rate": 3.200614755375576e-05,
      "loss": 2.9145,
      "step": 196217
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.022062301635742,
      "learning_rate": 3.200430913527491e-05,
      "loss": 2.817,
      "step": 196218
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.008139133453369,
      "learning_rate": 3.2002470766619605e-05,
      "loss": 2.8229,
      "step": 196219
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.1949074268341064,
      "learning_rate": 3.200063244779004e-05,
      "loss": 2.9584,
      "step": 196220
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.3842251300811768,
      "learning_rate": 3.199879417878666e-05,
      "loss": 2.9625,
      "step": 196221
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.096035957336426,
      "learning_rate": 3.199695595960975e-05,
      "loss": 2.7909,
      "step": 196222
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.5897393226623535,
      "learning_rate": 3.1995117790259684e-05,
      "loss": 2.9893,
      "step": 196223
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.8512942790985107,
      "learning_rate": 3.199327967073673e-05,
      "loss": 2.7954,
      "step": 196224
    },
    {
      "epoch": 2.55,
      "grad_norm": 3.314774751663208,
      "learning_rate": 3.1991441601041344e-05,
      "loss": 3.0604,
      "step": 196225
    },
    {
      "epoch": 2.55,
      "grad_norm": 2.6772968769073486,
      "learning_rate": 3.198960358117374e-05,
      "loss": 2.9672,
      "step": 196226
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5296499729156494,
      "learning_rate": 3.198776561113441e-05,
      "loss": 2.9534,
      "step": 196227
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2466483116149902,
      "learning_rate": 3.1985927690923585e-05,
      "loss": 2.9398,
      "step": 196228
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8199336528778076,
      "learning_rate": 3.1984089820541634e-05,
      "loss": 2.8016,
      "step": 196229
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.036659002304077,
      "learning_rate": 3.198225199998886e-05,
      "loss": 2.8631,
      "step": 196230
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1168205738067627,
      "learning_rate": 3.198041422926569e-05,
      "loss": 2.8595,
      "step": 196231
    },
    {
      "epoch": 2.56,
      "grad_norm": 6.560070514678955,
      "learning_rate": 3.197857650837237e-05,
      "loss": 3.1122,
      "step": 196232
    },
    {
      "epoch": 2.56,
      "grad_norm": 5.294573783874512,
      "learning_rate": 3.197673883730935e-05,
      "loss": 2.9458,
      "step": 196233
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9706411361694336,
      "learning_rate": 3.19749012160769e-05,
      "loss": 2.9655,
      "step": 196234
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0312509536743164,
      "learning_rate": 3.1973063644675386e-05,
      "loss": 2.9697,
      "step": 196235
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0014073848724365,
      "learning_rate": 3.1971226123105086e-05,
      "loss": 2.9966,
      "step": 196236
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.811267852783203,
      "learning_rate": 3.1969388651366425e-05,
      "loss": 2.9241,
      "step": 196237
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.778160572052002,
      "learning_rate": 3.1967551229459695e-05,
      "loss": 3.0608,
      "step": 196238
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.8588979244232178,
      "learning_rate": 3.1965713857385276e-05,
      "loss": 3.1183,
      "step": 196239
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.277956008911133,
      "learning_rate": 3.1963876535143464e-05,
      "loss": 2.9628,
      "step": 196240
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.659275531768799,
      "learning_rate": 3.1962039262734716e-05,
      "loss": 3.1479,
      "step": 196241
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0987305641174316,
      "learning_rate": 3.196020204015921e-05,
      "loss": 2.8153,
      "step": 196242
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.663811683654785,
      "learning_rate": 3.1958364867417375e-05,
      "loss": 2.8612,
      "step": 196243
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.438283681869507,
      "learning_rate": 3.195652774450951e-05,
      "loss": 3.2099,
      "step": 196244
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.591123342514038,
      "learning_rate": 3.1954690671436045e-05,
      "loss": 2.9719,
      "step": 196245
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.271690845489502,
      "learning_rate": 3.195285364819718e-05,
      "loss": 2.9387,
      "step": 196246
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.685319662094116,
      "learning_rate": 3.195101667479346e-05,
      "loss": 2.7615,
      "step": 196247
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4998080730438232,
      "learning_rate": 3.194917975122501e-05,
      "loss": 2.9714,
      "step": 196248
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.881154775619507,
      "learning_rate": 3.194734287749232e-05,
      "loss": 3.0287,
      "step": 196249
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.026571035385132,
      "learning_rate": 3.194550605359561e-05,
      "loss": 3.0023,
      "step": 196250
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2908577919006348,
      "learning_rate": 3.194366927953535e-05,
      "loss": 3.0514,
      "step": 196251
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.538419723510742,
      "learning_rate": 3.194183255531177e-05,
      "loss": 2.8774,
      "step": 196252
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5454623699188232,
      "learning_rate": 3.1939995880925394e-05,
      "loss": 3.1081,
      "step": 196253
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.085378646850586,
      "learning_rate": 3.1938159256376287e-05,
      "loss": 3.0768,
      "step": 196254
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.857900619506836,
      "learning_rate": 3.1936322681665005e-05,
      "loss": 2.9442,
      "step": 196255
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5468602180480957,
      "learning_rate": 3.1934486156791794e-05,
      "loss": 2.8555,
      "step": 196256
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5585997104644775,
      "learning_rate": 3.193264968175705e-05,
      "loss": 2.9797,
      "step": 196257
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9330742359161377,
      "learning_rate": 3.1930813256561006e-05,
      "loss": 2.7404,
      "step": 196258
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7187483310699463,
      "learning_rate": 3.192897688120423e-05,
      "loss": 3.026,
      "step": 196259
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.22845196723938,
      "learning_rate": 3.1927140555686814e-05,
      "loss": 2.9845,
      "step": 196260
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0589892864227295,
      "learning_rate": 3.192530428000923e-05,
      "loss": 3.1593,
      "step": 196261
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.762377977371216,
      "learning_rate": 3.1923468054171754e-05,
      "loss": 2.8991,
      "step": 196262
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8537111282348633,
      "learning_rate": 3.1921631878174806e-05,
      "loss": 3.1803,
      "step": 196263
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.458376884460449,
      "learning_rate": 3.191979575201865e-05,
      "loss": 2.7775,
      "step": 196264
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.922870635986328,
      "learning_rate": 3.1917959675703765e-05,
      "loss": 2.749,
      "step": 196265
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5497636795043945,
      "learning_rate": 3.191612364923028e-05,
      "loss": 2.9525,
      "step": 196266
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8368515968322754,
      "learning_rate": 3.1914287672598714e-05,
      "loss": 3.0042,
      "step": 196267
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5429296493530273,
      "learning_rate": 3.191245174580925e-05,
      "loss": 2.8509,
      "step": 196268
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.176963806152344,
      "learning_rate": 3.191061586886242e-05,
      "loss": 2.9338,
      "step": 196269
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.88373064994812,
      "learning_rate": 3.1908780041758386e-05,
      "loss": 3.0701,
      "step": 196270
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7705202102661133,
      "learning_rate": 3.190694426449771e-05,
      "loss": 3.0043,
      "step": 196271
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6986873149871826,
      "learning_rate": 3.1905108537080474e-05,
      "loss": 3.1043,
      "step": 196272
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.167041778564453,
      "learning_rate": 3.190327285950719e-05,
      "loss": 2.9966,
      "step": 196273
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0364041328430176,
      "learning_rate": 3.190143723177807e-05,
      "loss": 2.8579,
      "step": 196274
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.27856183052063,
      "learning_rate": 3.189960165389361e-05,
      "loss": 3.0173,
      "step": 196275
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.306452751159668,
      "learning_rate": 3.189776612585402e-05,
      "loss": 3.0738,
      "step": 196276
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.436363935470581,
      "learning_rate": 3.189593064765982e-05,
      "loss": 2.8736,
      "step": 196277
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5779170989990234,
      "learning_rate": 3.189409521931108e-05,
      "loss": 2.7209,
      "step": 196278
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.140180826187134,
      "learning_rate": 3.189225984080837e-05,
      "loss": 2.9001,
      "step": 196279
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5202436447143555,
      "learning_rate": 3.189042451215188e-05,
      "loss": 3.0739,
      "step": 196280
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8714492321014404,
      "learning_rate": 3.188858923334209e-05,
      "loss": 2.8674,
      "step": 196281
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.606832504272461,
      "learning_rate": 3.188675400437922e-05,
      "loss": 2.9251,
      "step": 196282
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.234201192855835,
      "learning_rate": 3.188491882526372e-05,
      "loss": 2.9049,
      "step": 196283
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6324424743652344,
      "learning_rate": 3.188308369599587e-05,
      "loss": 2.983,
      "step": 196284
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.133793592453003,
      "learning_rate": 3.188124861657602e-05,
      "loss": 2.9354,
      "step": 196285
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0142340660095215,
      "learning_rate": 3.187941358700442e-05,
      "loss": 2.8904,
      "step": 196286
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3130719661712646,
      "learning_rate": 3.187757860728159e-05,
      "loss": 3.1014,
      "step": 196287
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5560810565948486,
      "learning_rate": 3.18757436774077e-05,
      "loss": 2.9673,
      "step": 196288
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7122645378112793,
      "learning_rate": 3.187390879738325e-05,
      "loss": 2.8519,
      "step": 196289
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7726452350616455,
      "learning_rate": 3.1872073967208486e-05,
      "loss": 3.0457,
      "step": 196290
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.663672924041748,
      "learning_rate": 3.187023918688375e-05,
      "loss": 2.7469,
      "step": 196291
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6558196544647217,
      "learning_rate": 3.186840445640937e-05,
      "loss": 2.9497,
      "step": 196292
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.954906463623047,
      "learning_rate": 3.186656977578578e-05,
      "loss": 2.9249,
      "step": 196293
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.895150899887085,
      "learning_rate": 3.1864735145013175e-05,
      "loss": 2.9648,
      "step": 196294
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.702775716781616,
      "learning_rate": 3.186290056409203e-05,
      "loss": 3.0208,
      "step": 196295
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.534205436706543,
      "learning_rate": 3.186106603302264e-05,
      "loss": 2.9104,
      "step": 196296
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.020738124847412,
      "learning_rate": 3.185923155180528e-05,
      "loss": 3.0028,
      "step": 196297
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0284605026245117,
      "learning_rate": 3.185739712044043e-05,
      "loss": 2.9677,
      "step": 196298
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.145129680633545,
      "learning_rate": 3.185556273892831e-05,
      "loss": 3.0972,
      "step": 196299
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8076133728027344,
      "learning_rate": 3.185372840726928e-05,
      "loss": 2.9251,
      "step": 196300
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.848816394805908,
      "learning_rate": 3.185189412546376e-05,
      "loss": 2.8673,
      "step": 196301
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6717114448547363,
      "learning_rate": 3.1850059893512006e-05,
      "loss": 2.7578,
      "step": 196302
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.206221580505371,
      "learning_rate": 3.1848225711414366e-05,
      "loss": 2.8185,
      "step": 196303
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.593989849090576,
      "learning_rate": 3.1846391579171215e-05,
      "loss": 2.7028,
      "step": 196304
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.7747061252593994,
      "learning_rate": 3.184455749678292e-05,
      "loss": 2.8674,
      "step": 196305
    },
    {
      "epoch": 2.56,
      "grad_norm": 5.065011978149414,
      "learning_rate": 3.184272346424971e-05,
      "loss": 2.7428,
      "step": 196306
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.808410882949829,
      "learning_rate": 3.184088948157208e-05,
      "loss": 2.8393,
      "step": 196307
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.043868064880371,
      "learning_rate": 3.183905554875028e-05,
      "loss": 2.9631,
      "step": 196308
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8772759437561035,
      "learning_rate": 3.18372216657846e-05,
      "loss": 3.2351,
      "step": 196309
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.359344720840454,
      "learning_rate": 3.183538783267546e-05,
      "loss": 2.8126,
      "step": 196310
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7979190349578857,
      "learning_rate": 3.183355404942318e-05,
      "loss": 2.7366,
      "step": 196311
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6993954181671143,
      "learning_rate": 3.183172031602815e-05,
      "loss": 2.8016,
      "step": 196312
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7670137882232666,
      "learning_rate": 3.182988663249064e-05,
      "loss": 2.9602,
      "step": 196313
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.669679641723633,
      "learning_rate": 3.182805299881105e-05,
      "loss": 2.9708,
      "step": 196314
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.650068759918213,
      "learning_rate": 3.1826219414989605e-05,
      "loss": 2.9839,
      "step": 196315
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0338220596313477,
      "learning_rate": 3.182438588102682e-05,
      "loss": 3.2435,
      "step": 196316
    },
    {
      "epoch": 2.56,
      "grad_norm": 6.921464920043945,
      "learning_rate": 3.182255239692284e-05,
      "loss": 2.8244,
      "step": 196317
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.223945379257202,
      "learning_rate": 3.1820718962678214e-05,
      "loss": 2.8698,
      "step": 196318
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.968026638031006,
      "learning_rate": 3.1818885578293076e-05,
      "loss": 3.1344,
      "step": 196319
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.606168031692505,
      "learning_rate": 3.181705224376802e-05,
      "loss": 2.825,
      "step": 196320
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.429940223693848,
      "learning_rate": 3.1815218959103107e-05,
      "loss": 3.2236,
      "step": 196321
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.721067190170288,
      "learning_rate": 3.1813385724298844e-05,
      "loss": 3.057,
      "step": 196322
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6054365634918213,
      "learning_rate": 3.18115525393555e-05,
      "loss": 3.0613,
      "step": 196323
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1910011768341064,
      "learning_rate": 3.1809719404273505e-05,
      "loss": 2.4996,
      "step": 196324
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.007615804672241,
      "learning_rate": 3.1807886319053085e-05,
      "loss": 2.8948,
      "step": 196325
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.423123836517334,
      "learning_rate": 3.1806053283694785e-05,
      "loss": 2.8732,
      "step": 196326
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8536293506622314,
      "learning_rate": 3.1804220298198665e-05,
      "loss": 2.9321,
      "step": 196327
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0866053104400635,
      "learning_rate": 3.180238736256526e-05,
      "loss": 2.85,
      "step": 196328
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.9476354122161865,
      "learning_rate": 3.180055447679477e-05,
      "loss": 2.9273,
      "step": 196329
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.361983299255371,
      "learning_rate": 3.179872164088773e-05,
      "loss": 2.8957,
      "step": 196330
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.026309967041016,
      "learning_rate": 3.179688885484426e-05,
      "loss": 2.9919,
      "step": 196331
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.912558078765869,
      "learning_rate": 3.1795056118664975e-05,
      "loss": 2.8939,
      "step": 196332
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9717094898223877,
      "learning_rate": 3.1793223432349904e-05,
      "loss": 2.919,
      "step": 196333
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8602664470672607,
      "learning_rate": 3.179139079589961e-05,
      "loss": 2.9817,
      "step": 196334
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.788513422012329,
      "learning_rate": 3.178955820931429e-05,
      "loss": 2.9236,
      "step": 196335
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9354639053344727,
      "learning_rate": 3.178772567259439e-05,
      "loss": 2.8679,
      "step": 196336
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.736009120941162,
      "learning_rate": 3.1785893185740194e-05,
      "loss": 2.8973,
      "step": 196337
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.479860544204712,
      "learning_rate": 3.1784060748752185e-05,
      "loss": 3.0643,
      "step": 196338
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9760847091674805,
      "learning_rate": 3.178222836163045e-05,
      "loss": 2.7534,
      "step": 196339
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3625872135162354,
      "learning_rate": 3.178039602437549e-05,
      "loss": 2.923,
      "step": 196340
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.070842742919922,
      "learning_rate": 3.1778563736987606e-05,
      "loss": 2.8823,
      "step": 196341
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9125962257385254,
      "learning_rate": 3.177673149946717e-05,
      "loss": 2.9523,
      "step": 196342
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.3650012016296387,
      "learning_rate": 3.1774899311814475e-05,
      "loss": 2.9488,
      "step": 196343
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.583768129348755,
      "learning_rate": 3.1773067174029985e-05,
      "loss": 3.1705,
      "step": 196344
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9520273208618164,
      "learning_rate": 3.177123508611384e-05,
      "loss": 3.0399,
      "step": 196345
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.679476022720337,
      "learning_rate": 3.176940304806654e-05,
      "loss": 3.0572,
      "step": 196346
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3482818603515625,
      "learning_rate": 3.176757105988832e-05,
      "loss": 3.1046,
      "step": 196347
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.325418472290039,
      "learning_rate": 3.176573912157964e-05,
      "loss": 2.8,
      "step": 196348
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7239131927490234,
      "learning_rate": 3.17639072331407e-05,
      "loss": 2.7492,
      "step": 196349
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.61289381980896,
      "learning_rate": 3.1762075394572027e-05,
      "loss": 2.9374,
      "step": 196350
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.667523145675659,
      "learning_rate": 3.1760243605873735e-05,
      "loss": 3.0271,
      "step": 196351
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1225740909576416,
      "learning_rate": 3.175841186704632e-05,
      "loss": 2.8832,
      "step": 196352
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8317086696624756,
      "learning_rate": 3.175658017809004e-05,
      "loss": 3.1027,
      "step": 196353
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3480093479156494,
      "learning_rate": 3.175474853900534e-05,
      "loss": 2.6399,
      "step": 196354
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.055461883544922,
      "learning_rate": 3.1752916949792405e-05,
      "loss": 2.9097,
      "step": 196355
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8599748611450195,
      "learning_rate": 3.175108541045182e-05,
      "loss": 2.7912,
      "step": 196356
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.6858575344085693,
      "learning_rate": 3.174925392098363e-05,
      "loss": 2.9797,
      "step": 196357
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.534423351287842,
      "learning_rate": 3.174742248138839e-05,
      "loss": 3.1072,
      "step": 196358
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5180599689483643,
      "learning_rate": 3.174559109166631e-05,
      "loss": 2.7572,
      "step": 196359
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.783416271209717,
      "learning_rate": 3.1743759751817845e-05,
      "loss": 3.0812,
      "step": 196360
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2832562923431396,
      "learning_rate": 3.1741928461843215e-05,
      "loss": 3.0691,
      "step": 196361
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5399599075317383,
      "learning_rate": 3.174009722174292e-05,
      "loss": 3.0588,
      "step": 196362
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.7631430625915527,
      "learning_rate": 3.1738266031517135e-05,
      "loss": 2.9683,
      "step": 196363
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8319220542907715,
      "learning_rate": 3.173643489116629e-05,
      "loss": 3.2075,
      "step": 196364
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8045201301574707,
      "learning_rate": 3.173460380069067e-05,
      "loss": 2.9384,
      "step": 196365
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.397819757461548,
      "learning_rate": 3.17327727600907e-05,
      "loss": 3.0453,
      "step": 196366
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5942022800445557,
      "learning_rate": 3.173094176936659e-05,
      "loss": 2.8813,
      "step": 196367
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0003960132598877,
      "learning_rate": 3.172911082851892e-05,
      "loss": 2.8898,
      "step": 196368
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.089796781539917,
      "learning_rate": 3.172727993754772e-05,
      "loss": 2.9113,
      "step": 196369
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7606496810913086,
      "learning_rate": 3.172544909645356e-05,
      "loss": 2.8079,
      "step": 196370
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7470345497131348,
      "learning_rate": 3.172361830523666e-05,
      "loss": 2.8125,
      "step": 196371
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5339691638946533,
      "learning_rate": 3.1721787563897436e-05,
      "loss": 2.9634,
      "step": 196372
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.42000675201416,
      "learning_rate": 3.171995687243615e-05,
      "loss": 2.8987,
      "step": 196373
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7521543502807617,
      "learning_rate": 3.171812623085326e-05,
      "loss": 2.8369,
      "step": 196374
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.193685293197632,
      "learning_rate": 3.1716295639149005e-05,
      "loss": 2.8615,
      "step": 196375
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7285008430480957,
      "learning_rate": 3.171446509732378e-05,
      "loss": 2.8265,
      "step": 196376
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.273528099060059,
      "learning_rate": 3.171263460537783e-05,
      "loss": 3.0825,
      "step": 196377
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.892122983932495,
      "learning_rate": 3.171080416331161e-05,
      "loss": 3.073,
      "step": 196378
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1776511669158936,
      "learning_rate": 3.1708973771125355e-05,
      "loss": 2.935,
      "step": 196379
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.098400354385376,
      "learning_rate": 3.170714342881957e-05,
      "loss": 3.1231,
      "step": 196380
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7785837650299072,
      "learning_rate": 3.170531313639445e-05,
      "loss": 3.0983,
      "step": 196381
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5765488147735596,
      "learning_rate": 3.170348289385033e-05,
      "loss": 2.7621,
      "step": 196382
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9382002353668213,
      "learning_rate": 3.170165270118764e-05,
      "loss": 2.9309,
      "step": 196383
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5621607303619385,
      "learning_rate": 3.169982255840672e-05,
      "loss": 2.8704,
      "step": 196384
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9151451587677,
      "learning_rate": 3.169799246550776e-05,
      "loss": 2.6401,
      "step": 196385
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.190795660018921,
      "learning_rate": 3.16961624224913e-05,
      "loss": 2.9381,
      "step": 196386
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.126532793045044,
      "learning_rate": 3.169433242935757e-05,
      "loss": 2.957,
      "step": 196387
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.71834135055542,
      "learning_rate": 3.1692502486106876e-05,
      "loss": 2.609,
      "step": 196388
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7066588401794434,
      "learning_rate": 3.169067259273967e-05,
      "loss": 2.7885,
      "step": 196389
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6989047527313232,
      "learning_rate": 3.1688842749256236e-05,
      "loss": 3.0299,
      "step": 196390
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9864232540130615,
      "learning_rate": 3.1687012955656835e-05,
      "loss": 2.8745,
      "step": 196391
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9568121433258057,
      "learning_rate": 3.1685183211941955e-05,
      "loss": 2.7834,
      "step": 196392
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.074122428894043,
      "learning_rate": 3.1683353518111873e-05,
      "loss": 2.8722,
      "step": 196393
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.3847708702087402,
      "learning_rate": 3.1681523874166856e-05,
      "loss": 2.8076,
      "step": 196394
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9275450706481934,
      "learning_rate": 3.167969428010737e-05,
      "loss": 2.8567,
      "step": 196395
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.217592239379883,
      "learning_rate": 3.167786473593361e-05,
      "loss": 2.7439,
      "step": 196396
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8434503078460693,
      "learning_rate": 3.167603524164608e-05,
      "loss": 2.9163,
      "step": 196397
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.772188901901245,
      "learning_rate": 3.1674205797245045e-05,
      "loss": 2.7405,
      "step": 196398
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6269683837890625,
      "learning_rate": 3.1672376402730834e-05,
      "loss": 2.8413,
      "step": 196399
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.990253448486328,
      "learning_rate": 3.1670547058103726e-05,
      "loss": 3.1982,
      "step": 196400
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7650439739227295,
      "learning_rate": 3.16687177633642e-05,
      "loss": 2.9781,
      "step": 196401
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3485662937164307,
      "learning_rate": 3.166688851851248e-05,
      "loss": 2.8817,
      "step": 196402
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8788692951202393,
      "learning_rate": 3.166505932354898e-05,
      "loss": 2.8207,
      "step": 196403
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.544670343399048,
      "learning_rate": 3.1663230178473976e-05,
      "loss": 2.951,
      "step": 196404
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.675157070159912,
      "learning_rate": 3.166140108328793e-05,
      "loss": 3.0344,
      "step": 196405
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.4163498878479004,
      "learning_rate": 3.165957203799101e-05,
      "loss": 3.0143,
      "step": 196406
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5048129558563232,
      "learning_rate": 3.1657743042583685e-05,
      "loss": 2.9355,
      "step": 196407
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.513840913772583,
      "learning_rate": 3.165591409706618e-05,
      "loss": 3.0192,
      "step": 196408
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.4631783962249756,
      "learning_rate": 3.1654085201438975e-05,
      "loss": 2.8783,
      "step": 196409
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4190328121185303,
      "learning_rate": 3.1652256355702286e-05,
      "loss": 2.7688,
      "step": 196410
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.50683856010437,
      "learning_rate": 3.165042755985663e-05,
      "loss": 2.8732,
      "step": 196411
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.884425401687622,
      "learning_rate": 3.1648598813902096e-05,
      "loss": 2.9762,
      "step": 196412
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.6313223838806152,
      "learning_rate": 3.164677011783922e-05,
      "loss": 2.9177,
      "step": 196413
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.6319708824157715,
      "learning_rate": 3.1644941471668194e-05,
      "loss": 2.7183,
      "step": 196414
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9391708374023438,
      "learning_rate": 3.16431128753895e-05,
      "loss": 2.8492,
      "step": 196415
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.972278594970703,
      "learning_rate": 3.164128432900339e-05,
      "loss": 2.9038,
      "step": 196416
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.954464912414551,
      "learning_rate": 3.163945583251034e-05,
      "loss": 3.0245,
      "step": 196417
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.506258487701416,
      "learning_rate": 3.1637627385910444e-05,
      "loss": 3.121,
      "step": 196418
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.851001024246216,
      "learning_rate": 3.163579898920424e-05,
      "loss": 3.0138,
      "step": 196419
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0091147422790527,
      "learning_rate": 3.1633970642391956e-05,
      "loss": 2.8898,
      "step": 196420
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.684509038925171,
      "learning_rate": 3.163214234547406e-05,
      "loss": 2.8717,
      "step": 196421
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.6696643829345703,
      "learning_rate": 3.163031409845073e-05,
      "loss": 2.7515,
      "step": 196422
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.259519338607788,
      "learning_rate": 3.162848590132251e-05,
      "loss": 2.992,
      "step": 196423
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9228556156158447,
      "learning_rate": 3.1626657754089515e-05,
      "loss": 3.0893,
      "step": 196424
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.665088176727295,
      "learning_rate": 3.1624829656752236e-05,
      "loss": 3.064,
      "step": 196425
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.6035666465759277,
      "learning_rate": 3.1623001609310916e-05,
      "loss": 2.9675,
      "step": 196426
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.150757312774658,
      "learning_rate": 3.162117361176602e-05,
      "loss": 3.1414,
      "step": 196427
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8292453289031982,
      "learning_rate": 3.1619345664117734e-05,
      "loss": 3.1266,
      "step": 196428
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.320967197418213,
      "learning_rate": 3.1617517766366605e-05,
      "loss": 3.3985,
      "step": 196429
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.6206283569335938,
      "learning_rate": 3.16156899185127e-05,
      "loss": 2.9881,
      "step": 196430
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.500164985656738,
      "learning_rate": 3.161386212055661e-05,
      "loss": 2.8027,
      "step": 196431
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6962578296661377,
      "learning_rate": 3.1612034372498474e-05,
      "loss": 2.8561,
      "step": 196432
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7981865406036377,
      "learning_rate": 3.1610206674338787e-05,
      "loss": 2.8533,
      "step": 196433
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9440128803253174,
      "learning_rate": 3.160837902607779e-05,
      "loss": 3.0877,
      "step": 196434
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0404536724090576,
      "learning_rate": 3.160655142771598e-05,
      "loss": 3.0633,
      "step": 196435
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0019772052764893,
      "learning_rate": 3.1604723879253446e-05,
      "loss": 3.0624,
      "step": 196436
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.8743042945861816,
      "learning_rate": 3.1602896380690736e-05,
      "loss": 2.9768,
      "step": 196437
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8991310596466064,
      "learning_rate": 3.160106893202804e-05,
      "loss": 2.7998,
      "step": 196438
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1645545959472656,
      "learning_rate": 3.159924153326583e-05,
      "loss": 3.1212,
      "step": 196439
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3450069427490234,
      "learning_rate": 3.159741418440433e-05,
      "loss": 2.8315,
      "step": 196440
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.857189416885376,
      "learning_rate": 3.159558688544399e-05,
      "loss": 2.7959,
      "step": 196441
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.4781224727630615,
      "learning_rate": 3.15937596363851e-05,
      "loss": 2.7997,
      "step": 196442
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5979442596435547,
      "learning_rate": 3.159193243722802e-05,
      "loss": 3.4088,
      "step": 196443
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3084375858306885,
      "learning_rate": 3.159010528797299e-05,
      "loss": 2.9541,
      "step": 196444
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4499435424804688,
      "learning_rate": 3.158827818862047e-05,
      "loss": 3.0118,
      "step": 196445
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.7916436195373535,
      "learning_rate": 3.158645113917071e-05,
      "loss": 3.1408,
      "step": 196446
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.6073999404907227,
      "learning_rate": 3.158462413962416e-05,
      "loss": 2.8994,
      "step": 196447
    },
    {
      "epoch": 2.56,
      "grad_norm": 5.377870559692383,
      "learning_rate": 3.158279718998109e-05,
      "loss": 2.8871,
      "step": 196448
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.709578037261963,
      "learning_rate": 3.158097029024187e-05,
      "loss": 2.7679,
      "step": 196449
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4131381511688232,
      "learning_rate": 3.157914344040673e-05,
      "loss": 2.8302,
      "step": 196450
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.4953064918518066,
      "learning_rate": 3.1577316640476134e-05,
      "loss": 2.8909,
      "step": 196451
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4210987091064453,
      "learning_rate": 3.157548989045036e-05,
      "loss": 2.806,
      "step": 196452
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9782321453094482,
      "learning_rate": 3.157366319032979e-05,
      "loss": 2.8814,
      "step": 196453
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.820441722869873,
      "learning_rate": 3.157183654011477e-05,
      "loss": 3.0405,
      "step": 196454
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4226818084716797,
      "learning_rate": 3.1570009939805595e-05,
      "loss": 2.7749,
      "step": 196455
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.059314489364624,
      "learning_rate": 3.15681833894026e-05,
      "loss": 2.761,
      "step": 196456
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.604931116104126,
      "learning_rate": 3.1566356888906187e-05,
      "loss": 2.8117,
      "step": 196457
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.011481523513794,
      "learning_rate": 3.156453043831658e-05,
      "loss": 2.91,
      "step": 196458
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.606283664703369,
      "learning_rate": 3.1562704037634256e-05,
      "loss": 2.9631,
      "step": 196459
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.155334234237671,
      "learning_rate": 3.1560877686859507e-05,
      "loss": 2.9224,
      "step": 196460
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9895951747894287,
      "learning_rate": 3.155905138599264e-05,
      "loss": 2.7308,
      "step": 196461
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.714517593383789,
      "learning_rate": 3.1557225135033984e-05,
      "loss": 3.0398,
      "step": 196462
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.597137689590454,
      "learning_rate": 3.1555398933983936e-05,
      "loss": 2.9275,
      "step": 196463
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.177785634994507,
      "learning_rate": 3.155357278284276e-05,
      "loss": 2.9401,
      "step": 196464
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0524063110351562,
      "learning_rate": 3.15517466816109e-05,
      "loss": 3.085,
      "step": 196465
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.82037353515625,
      "learning_rate": 3.1549920630288616e-05,
      "loss": 3.0913,
      "step": 196466
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9815189838409424,
      "learning_rate": 3.1548094628876245e-05,
      "loss": 2.8268,
      "step": 196467
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.802170991897583,
      "learning_rate": 3.154626867737421e-05,
      "loss": 2.7573,
      "step": 196468
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.834108829498291,
      "learning_rate": 3.1544442775782756e-05,
      "loss": 2.6076,
      "step": 196469
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.740841865539551,
      "learning_rate": 3.1542616924102235e-05,
      "loss": 2.6929,
      "step": 196470
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.303086519241333,
      "learning_rate": 3.154079112233303e-05,
      "loss": 2.9051,
      "step": 196471
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5388753414154053,
      "learning_rate": 3.15389653704755e-05,
      "loss": 2.8614,
      "step": 196472
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.00882625579834,
      "learning_rate": 3.1537139668529874e-05,
      "loss": 2.863,
      "step": 196473
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.432438850402832,
      "learning_rate": 3.153531401649659e-05,
      "loss": 2.9239,
      "step": 196474
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.790421962738037,
      "learning_rate": 3.1533488414375985e-05,
      "loss": 2.8877,
      "step": 196475
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.663640022277832,
      "learning_rate": 3.153166286216832e-05,
      "loss": 2.7837,
      "step": 196476
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.78938889503479,
      "learning_rate": 3.152983735987402e-05,
      "loss": 2.8148,
      "step": 196477
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0567526817321777,
      "learning_rate": 3.152801190749343e-05,
      "loss": 2.6853,
      "step": 196478
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0781962871551514,
      "learning_rate": 3.152618650502675e-05,
      "loss": 3.1782,
      "step": 196479
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6419382095336914,
      "learning_rate": 3.152436115247451e-05,
      "loss": 2.5835,
      "step": 196480
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.000901937484741,
      "learning_rate": 3.1522535849836875e-05,
      "loss": 2.9703,
      "step": 196481
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.445074558258057,
      "learning_rate": 3.152071059711434e-05,
      "loss": 2.7691,
      "step": 196482
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9431231021881104,
      "learning_rate": 3.151888539430718e-05,
      "loss": 3.0783,
      "step": 196483
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.239703893661499,
      "learning_rate": 3.151706024141573e-05,
      "loss": 2.7559,
      "step": 196484
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3181254863739014,
      "learning_rate": 3.151523513844024e-05,
      "loss": 3.0236,
      "step": 196485
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3910906314849854,
      "learning_rate": 3.151341008538123e-05,
      "loss": 2.8802,
      "step": 196486
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.8028485774993896,
      "learning_rate": 3.151158508223885e-05,
      "loss": 2.7437,
      "step": 196487
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1007742881774902,
      "learning_rate": 3.150976012901362e-05,
      "loss": 2.8763,
      "step": 196488
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.0010294914245605,
      "learning_rate": 3.1507935225705786e-05,
      "loss": 2.8977,
      "step": 196489
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0548110008239746,
      "learning_rate": 3.1506110372315684e-05,
      "loss": 2.7918,
      "step": 196490
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8520991802215576,
      "learning_rate": 3.1504285568843626e-05,
      "loss": 3.0462,
      "step": 196491
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4376845359802246,
      "learning_rate": 3.150246081529003e-05,
      "loss": 2.9132,
      "step": 196492
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.7757415771484375,
      "learning_rate": 3.1500636111655144e-05,
      "loss": 2.8718,
      "step": 196493
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2827298641204834,
      "learning_rate": 3.149881145793942e-05,
      "loss": 2.9479,
      "step": 196494
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6674463748931885,
      "learning_rate": 3.149698685414307e-05,
      "loss": 2.9764,
      "step": 196495
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5860087871551514,
      "learning_rate": 3.149516230026662e-05,
      "loss": 2.8112,
      "step": 196496
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8814501762390137,
      "learning_rate": 3.149333779631017e-05,
      "loss": 2.9406,
      "step": 196497
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.094165563583374,
      "learning_rate": 3.149151334227422e-05,
      "loss": 3.0728,
      "step": 196498
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.046923875808716,
      "learning_rate": 3.148968893815904e-05,
      "loss": 2.9841,
      "step": 196499
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3556087017059326,
      "learning_rate": 3.148786458396503e-05,
      "loss": 2.7196,
      "step": 196500
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8150618076324463,
      "learning_rate": 3.148604027969245e-05,
      "loss": 2.9532,
      "step": 196501
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3646349906921387,
      "learning_rate": 3.14842160253418e-05,
      "loss": 2.7569,
      "step": 196502
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0935556888580322,
      "learning_rate": 3.148239182091315e-05,
      "loss": 2.9711,
      "step": 196503
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4123430252075195,
      "learning_rate": 3.148056766640711e-05,
      "loss": 2.7463,
      "step": 196504
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.673346757888794,
      "learning_rate": 3.147874356182379e-05,
      "loss": 3.3192,
      "step": 196505
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2416768074035645,
      "learning_rate": 3.1476919507163744e-05,
      "loss": 2.699,
      "step": 196506
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.51558780670166,
      "learning_rate": 3.1475095502427126e-05,
      "loss": 2.9627,
      "step": 196507
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.753279209136963,
      "learning_rate": 3.1473271547614406e-05,
      "loss": 2.9388,
      "step": 196508
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.648378372192383,
      "learning_rate": 3.147144764272589e-05,
      "loss": 2.7388,
      "step": 196509
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.024206638336182,
      "learning_rate": 3.14696237877619e-05,
      "loss": 3.0714,
      "step": 196510
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.7703607082366943,
      "learning_rate": 3.146779998272271e-05,
      "loss": 2.9596,
      "step": 196511
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4061617851257324,
      "learning_rate": 3.146597622760878e-05,
      "loss": 2.966,
      "step": 196512
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.868861436843872,
      "learning_rate": 3.1464152522420314e-05,
      "loss": 2.9775,
      "step": 196513
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.8445191383361816,
      "learning_rate": 3.146232886715785e-05,
      "loss": 2.6177,
      "step": 196514
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3025689125061035,
      "learning_rate": 3.1460505261821545e-05,
      "loss": 3.1887,
      "step": 196515
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.931779623031616,
      "learning_rate": 3.1458681706411836e-05,
      "loss": 2.9058,
      "step": 196516
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.937837839126587,
      "learning_rate": 3.145685820092896e-05,
      "loss": 3.0438,
      "step": 196517
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.6824231147766113,
      "learning_rate": 3.145503474537341e-05,
      "loss": 2.7157,
      "step": 196518
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8368115425109863,
      "learning_rate": 3.1453211339745324e-05,
      "loss": 2.942,
      "step": 196519
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3623368740081787,
      "learning_rate": 3.145138798404526e-05,
      "loss": 2.8892,
      "step": 196520
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.684480905532837,
      "learning_rate": 3.144956467827343e-05,
      "loss": 2.9573,
      "step": 196521
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2547686100006104,
      "learning_rate": 3.14477414224302e-05,
      "loss": 2.8008,
      "step": 196522
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2734415531158447,
      "learning_rate": 3.144591821651585e-05,
      "loss": 2.967,
      "step": 196523
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0109481811523438,
      "learning_rate": 3.144409506053084e-05,
      "loss": 2.8341,
      "step": 196524
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.767775297164917,
      "learning_rate": 3.144227195447535e-05,
      "loss": 2.995,
      "step": 196525
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.6842634677886963,
      "learning_rate": 3.144044889834989e-05,
      "loss": 2.7448,
      "step": 196526
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.875645875930786,
      "learning_rate": 3.143862589215472e-05,
      "loss": 2.7409,
      "step": 196527
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5199975967407227,
      "learning_rate": 3.143680293589018e-05,
      "loss": 3.3577,
      "step": 196528
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5090787410736084,
      "learning_rate": 3.143498002955657e-05,
      "loss": 3.091,
      "step": 196529
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.86751389503479,
      "learning_rate": 3.143315717315431e-05,
      "loss": 2.8362,
      "step": 196530
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.008847236633301,
      "learning_rate": 3.143133436668362e-05,
      "loss": 3.1337,
      "step": 196531
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2379705905914307,
      "learning_rate": 3.1429511610144976e-05,
      "loss": 2.8554,
      "step": 196532
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.891305685043335,
      "learning_rate": 3.142768890353866e-05,
      "loss": 3.0406,
      "step": 196533
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.910870313644409,
      "learning_rate": 3.142586624686504e-05,
      "loss": 2.858,
      "step": 196534
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4691312313079834,
      "learning_rate": 3.1424043640124306e-05,
      "loss": 2.8,
      "step": 196535
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.787747383117676,
      "learning_rate": 3.1422221083317e-05,
      "loss": 2.9071,
      "step": 196536
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.084357976913452,
      "learning_rate": 3.1420398576443315e-05,
      "loss": 2.8978,
      "step": 196537
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.919001340866089,
      "learning_rate": 3.141857611950372e-05,
      "loss": 3.0115,
      "step": 196538
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4474587440490723,
      "learning_rate": 3.141675371249845e-05,
      "loss": 2.9877,
      "step": 196539
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8424458503723145,
      "learning_rate": 3.141493135542791e-05,
      "loss": 2.7101,
      "step": 196540
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.780595064163208,
      "learning_rate": 3.141310904829232e-05,
      "loss": 2.9089,
      "step": 196541
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.072443723678589,
      "learning_rate": 3.141128679109215e-05,
      "loss": 2.8879,
      "step": 196542
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.644716262817383,
      "learning_rate": 3.140946458382767e-05,
      "loss": 3.2488,
      "step": 196543
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.058499813079834,
      "learning_rate": 3.140764242649928e-05,
      "loss": 2.8663,
      "step": 196544
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4529197216033936,
      "learning_rate": 3.140582031910728e-05,
      "loss": 3.1609,
      "step": 196545
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.892038345336914,
      "learning_rate": 3.1403998261651995e-05,
      "loss": 3.0459,
      "step": 196546
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.988457202911377,
      "learning_rate": 3.140217625413374e-05,
      "loss": 3.3017,
      "step": 196547
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.576404571533203,
      "learning_rate": 3.140035429655293e-05,
      "loss": 2.9291,
      "step": 196548
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5137016773223877,
      "learning_rate": 3.139853238890981e-05,
      "loss": 3.0272,
      "step": 196549
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7564713954925537,
      "learning_rate": 3.139671053120485e-05,
      "loss": 2.888,
      "step": 196550
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.247537136077881,
      "learning_rate": 3.1394888723438305e-05,
      "loss": 2.8321,
      "step": 196551
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.13828182220459,
      "learning_rate": 3.1393066965610516e-05,
      "loss": 2.7337,
      "step": 196552
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.61012864112854,
      "learning_rate": 3.139124525772178e-05,
      "loss": 2.8469,
      "step": 196553
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.092810869216919,
      "learning_rate": 3.138942359977252e-05,
      "loss": 2.9006,
      "step": 196554
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.817869186401367,
      "learning_rate": 3.138760199176299e-05,
      "loss": 2.8832,
      "step": 196555
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1703670024871826,
      "learning_rate": 3.138578043369361e-05,
      "loss": 2.9104,
      "step": 196556
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9868459701538086,
      "learning_rate": 3.1383958925564714e-05,
      "loss": 2.8219,
      "step": 196557
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2476840019226074,
      "learning_rate": 3.138213746737653e-05,
      "loss": 2.8307,
      "step": 196558
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7632365226745605,
      "learning_rate": 3.138031605912954e-05,
      "loss": 2.8968,
      "step": 196559
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6938514709472656,
      "learning_rate": 3.137849470082403e-05,
      "loss": 3.0125,
      "step": 196560
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.822265386581421,
      "learning_rate": 3.1376673392460275e-05,
      "loss": 3.0508,
      "step": 196561
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4594051837921143,
      "learning_rate": 3.137485213403873e-05,
      "loss": 2.8243,
      "step": 196562
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8954813480377197,
      "learning_rate": 3.137303092555964e-05,
      "loss": 2.8978,
      "step": 196563
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7153005599975586,
      "learning_rate": 3.137120976702333e-05,
      "loss": 3.0408,
      "step": 196564
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0143916606903076,
      "learning_rate": 3.136938865843027e-05,
      "loss": 3.0254,
      "step": 196565
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9848787784576416,
      "learning_rate": 3.136756759978063e-05,
      "loss": 3.1459,
      "step": 196566
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5322699546813965,
      "learning_rate": 3.136574659107487e-05,
      "loss": 3.1155,
      "step": 196567
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3262460231781006,
      "learning_rate": 3.136392563231329e-05,
      "loss": 2.96,
      "step": 196568
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9445769786834717,
      "learning_rate": 3.136210472349626e-05,
      "loss": 2.7809,
      "step": 196569
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.561457633972168,
      "learning_rate": 3.136028386462401e-05,
      "loss": 2.8857,
      "step": 196570
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2335731983184814,
      "learning_rate": 3.135846305569701e-05,
      "loss": 2.8887,
      "step": 196571
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9767770767211914,
      "learning_rate": 3.13566422967155e-05,
      "loss": 2.8489,
      "step": 196572
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3554062843322754,
      "learning_rate": 3.135482158767989e-05,
      "loss": 3.0253,
      "step": 196573
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.89316987991333,
      "learning_rate": 3.13530009285905e-05,
      "loss": 2.7071,
      "step": 196574
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7699930667877197,
      "learning_rate": 3.1351180319447625e-05,
      "loss": 2.683,
      "step": 196575
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.631248950958252,
      "learning_rate": 3.134935976025166e-05,
      "loss": 3.0865,
      "step": 196576
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6019070148468018,
      "learning_rate": 3.134753925100294e-05,
      "loss": 2.9537,
      "step": 196577
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.864988088607788,
      "learning_rate": 3.13457187917017e-05,
      "loss": 3.0078,
      "step": 196578
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.967020273208618,
      "learning_rate": 3.1343898382348475e-05,
      "loss": 3.1915,
      "step": 196579
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9415078163146973,
      "learning_rate": 3.13420780229434e-05,
      "loss": 2.9821,
      "step": 196580
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5461208820343018,
      "learning_rate": 3.134025771348696e-05,
      "loss": 3.1604,
      "step": 196581
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.198183536529541,
      "learning_rate": 3.133843745397944e-05,
      "loss": 2.7128,
      "step": 196582
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.585711717605591,
      "learning_rate": 3.1336617244421156e-05,
      "loss": 3.0608,
      "step": 196583
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.4788408279418945,
      "learning_rate": 3.133479708481242e-05,
      "loss": 2.993,
      "step": 196584
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1361167430877686,
      "learning_rate": 3.13329769751537e-05,
      "loss": 2.9536,
      "step": 196585
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6983964443206787,
      "learning_rate": 3.133115691544516e-05,
      "loss": 2.8938,
      "step": 196586
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.751593828201294,
      "learning_rate": 3.132933690568732e-05,
      "loss": 2.598,
      "step": 196587
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.813560962677002,
      "learning_rate": 3.1327516945880396e-05,
      "loss": 2.9905,
      "step": 196588
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.7577435970306396,
      "learning_rate": 3.1325697036024774e-05,
      "loss": 2.8269,
      "step": 196589
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9430840015411377,
      "learning_rate": 3.1323877176120705e-05,
      "loss": 2.7552,
      "step": 196590
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.797058343887329,
      "learning_rate": 3.1322057366168676e-05,
      "loss": 2.7528,
      "step": 196591
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.081618070602417,
      "learning_rate": 3.132023760616888e-05,
      "loss": 2.9626,
      "step": 196592
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.64439058303833,
      "learning_rate": 3.1318417896121774e-05,
      "loss": 2.8028,
      "step": 196593
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.59979510307312,
      "learning_rate": 3.131659823602767e-05,
      "loss": 2.879,
      "step": 196594
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2390952110290527,
      "learning_rate": 3.131477862588687e-05,
      "loss": 2.7849,
      "step": 196595
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.388740062713623,
      "learning_rate": 3.131295906569965e-05,
      "loss": 2.9234,
      "step": 196596
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2069268226623535,
      "learning_rate": 3.13111395554665e-05,
      "loss": 2.9217,
      "step": 196597
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9243369102478027,
      "learning_rate": 3.130932009518763e-05,
      "loss": 2.7513,
      "step": 196598
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.370535850524902,
      "learning_rate": 3.130750068486346e-05,
      "loss": 3.1614,
      "step": 196599
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.645087242126465,
      "learning_rate": 3.130568132449433e-05,
      "loss": 2.8102,
      "step": 196600
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.728527545928955,
      "learning_rate": 3.130386201408052e-05,
      "loss": 2.9202,
      "step": 196601
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.881101608276367,
      "learning_rate": 3.130204275362234e-05,
      "loss": 2.9942,
      "step": 196602
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8325393199920654,
      "learning_rate": 3.130022354312026e-05,
      "loss": 3.0457,
      "step": 196603
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8367037773132324,
      "learning_rate": 3.129840438257446e-05,
      "loss": 2.862,
      "step": 196604
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.913738489151001,
      "learning_rate": 3.129658527198543e-05,
      "loss": 2.7797,
      "step": 196605
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9551219940185547,
      "learning_rate": 3.129476621135344e-05,
      "loss": 2.6218,
      "step": 196606
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.694413661956787,
      "learning_rate": 3.129294720067878e-05,
      "loss": 3.1382,
      "step": 196607
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5825719833374023,
      "learning_rate": 3.129112823996184e-05,
      "loss": 2.8634,
      "step": 196608
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9853646755218506,
      "learning_rate": 3.128930932920296e-05,
      "loss": 2.7499,
      "step": 196609
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1261775493621826,
      "learning_rate": 3.128749046840245e-05,
      "loss": 2.948,
      "step": 196610
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.969820499420166,
      "learning_rate": 3.1285671657560715e-05,
      "loss": 3.1624,
      "step": 196611
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2348101139068604,
      "learning_rate": 3.128385289667802e-05,
      "loss": 3.0103,
      "step": 196612
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9565699100494385,
      "learning_rate": 3.1282034185754755e-05,
      "loss": 3.1477,
      "step": 196613
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7785484790802,
      "learning_rate": 3.128021552479116e-05,
      "loss": 2.8606,
      "step": 196614
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0607857704162598,
      "learning_rate": 3.127839691378774e-05,
      "loss": 2.7209,
      "step": 196615
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4751954078674316,
      "learning_rate": 3.127657835274465e-05,
      "loss": 3.0476,
      "step": 196616
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4177515506744385,
      "learning_rate": 3.127475984166237e-05,
      "loss": 2.9619,
      "step": 196617
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7719221115112305,
      "learning_rate": 3.127294138054118e-05,
      "loss": 2.6054,
      "step": 196618
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2088844776153564,
      "learning_rate": 3.1271122969381435e-05,
      "loss": 3.0493,
      "step": 196619
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.458832263946533,
      "learning_rate": 3.126930460818342e-05,
      "loss": 2.9167,
      "step": 196620
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5125069618225098,
      "learning_rate": 3.126748629694754e-05,
      "loss": 2.8824,
      "step": 196621
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6974246501922607,
      "learning_rate": 3.126566803567406e-05,
      "loss": 2.783,
      "step": 196622
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8013064861297607,
      "learning_rate": 3.126384982436345e-05,
      "loss": 3.0132,
      "step": 196623
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7233455181121826,
      "learning_rate": 3.126203166301594e-05,
      "loss": 2.9557,
      "step": 196624
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.921417236328125,
      "learning_rate": 3.1260213551631865e-05,
      "loss": 2.8734,
      "step": 196625
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9385921955108643,
      "learning_rate": 3.125839549021158e-05,
      "loss": 2.9122,
      "step": 196626
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.079791307449341,
      "learning_rate": 3.1256577478755475e-05,
      "loss": 3.0179,
      "step": 196627
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.09753155708313,
      "learning_rate": 3.125475951726376e-05,
      "loss": 2.9344,
      "step": 196628
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.962679862976074,
      "learning_rate": 3.125294160573695e-05,
      "loss": 2.9485,
      "step": 196629
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.703814744949341,
      "learning_rate": 3.125112374417527e-05,
      "loss": 2.9309,
      "step": 196630
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7793474197387695,
      "learning_rate": 3.124930593257908e-05,
      "loss": 2.78,
      "step": 196631
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4467005729675293,
      "learning_rate": 3.1247488170948666e-05,
      "loss": 3.0057,
      "step": 196632
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.960594415664673,
      "learning_rate": 3.1245670459284475e-05,
      "loss": 2.9227,
      "step": 196633
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.661736011505127,
      "learning_rate": 3.1243852797586724e-05,
      "loss": 2.9383,
      "step": 196634
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.908403158187866,
      "learning_rate": 3.124203518585586e-05,
      "loss": 2.9386,
      "step": 196635
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9262685775756836,
      "learning_rate": 3.12402176240922e-05,
      "loss": 2.9975,
      "step": 196636
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.684476613998413,
      "learning_rate": 3.1238400112296066e-05,
      "loss": 2.6355,
      "step": 196637
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.443514585494995,
      "learning_rate": 3.12365826504677e-05,
      "loss": 2.7684,
      "step": 196638
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.617361545562744,
      "learning_rate": 3.1234765238607594e-05,
      "loss": 3.0052,
      "step": 196639
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.84220290184021,
      "learning_rate": 3.1232947876715955e-05,
      "loss": 2.8179,
      "step": 196640
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5042314529418945,
      "learning_rate": 3.123113056479327e-05,
      "loss": 3.0033,
      "step": 196641
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8235626220703125,
      "learning_rate": 3.122931330283969e-05,
      "loss": 3.0047,
      "step": 196642
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6039116382598877,
      "learning_rate": 3.122749609085573e-05,
      "loss": 3.0344,
      "step": 196643
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2568178176879883,
      "learning_rate": 3.122567892884167e-05,
      "loss": 3.2475,
      "step": 196644
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8432648181915283,
      "learning_rate": 3.12238618167978e-05,
      "loss": 2.9816,
      "step": 196645
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.741135358810425,
      "learning_rate": 3.122204475472446e-05,
      "loss": 2.8468,
      "step": 196646
    },
    {
      "epoch": 2.56,
      "grad_norm": 5.012366771697998,
      "learning_rate": 3.122022774262205e-05,
      "loss": 3.0067,
      "step": 196647
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.989793539047241,
      "learning_rate": 3.121841078049083e-05,
      "loss": 2.8438,
      "step": 196648
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.331697702407837,
      "learning_rate": 3.121659386833124e-05,
      "loss": 2.9351,
      "step": 196649
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.83494234085083,
      "learning_rate": 3.1214777006143545e-05,
      "loss": 2.8431,
      "step": 196650
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8094379901885986,
      "learning_rate": 3.12129601939281e-05,
      "loss": 2.6086,
      "step": 196651
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.661818504333496,
      "learning_rate": 3.121114343168519e-05,
      "loss": 3.1823,
      "step": 196652
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.815727949142456,
      "learning_rate": 3.1209326719415274e-05,
      "loss": 2.8744,
      "step": 196653
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.885838747024536,
      "learning_rate": 3.120751005711851e-05,
      "loss": 3.0812,
      "step": 196654
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1463394165039062,
      "learning_rate": 3.1205693444795445e-05,
      "loss": 3.1235,
      "step": 196655
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.368074655532837,
      "learning_rate": 3.12038768824463e-05,
      "loss": 2.8072,
      "step": 196656
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.515932321548462,
      "learning_rate": 3.120206037007138e-05,
      "loss": 3.0859,
      "step": 196657
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.85512638092041,
      "learning_rate": 3.120024390767112e-05,
      "loss": 2.8375,
      "step": 196658
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.134025573730469,
      "learning_rate": 3.1198427495245815e-05,
      "loss": 2.9612,
      "step": 196659
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.791792869567871,
      "learning_rate": 3.1196611132795734e-05,
      "loss": 2.8262,
      "step": 196660
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.235281467437744,
      "learning_rate": 3.119479482032134e-05,
      "loss": 2.5825,
      "step": 196661
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.3450541496276855,
      "learning_rate": 3.119297855782291e-05,
      "loss": 2.6685,
      "step": 196662
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.227629661560059,
      "learning_rate": 3.11911623453007e-05,
      "loss": 3.0255,
      "step": 196663
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.6750314235687256,
      "learning_rate": 3.1189346182755214e-05,
      "loss": 2.6051,
      "step": 196664
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.1285929679870605,
      "learning_rate": 3.1187530070186615e-05,
      "loss": 2.851,
      "step": 196665
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5539281368255615,
      "learning_rate": 3.1185714007595406e-05,
      "loss": 3.0293,
      "step": 196666
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.638667345046997,
      "learning_rate": 3.118389799498186e-05,
      "loss": 2.8251,
      "step": 196667
    },
    {
      "epoch": 2.56,
      "grad_norm": 5.109212398529053,
      "learning_rate": 3.1182082032346255e-05,
      "loss": 2.9277,
      "step": 196668
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8776004314422607,
      "learning_rate": 3.1180266119688944e-05,
      "loss": 3.0408,
      "step": 196669
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.466444730758667,
      "learning_rate": 3.117845025701039e-05,
      "loss": 2.9185,
      "step": 196670
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.9569880962371826,
      "learning_rate": 3.117663444431072e-05,
      "loss": 2.8718,
      "step": 196671
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.308863878250122,
      "learning_rate": 3.117481868159047e-05,
      "loss": 3.0483,
      "step": 196672
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.556338787078857,
      "learning_rate": 3.117300296884991e-05,
      "loss": 3.0195,
      "step": 196673
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9916300773620605,
      "learning_rate": 3.1171187306089343e-05,
      "loss": 2.9765,
      "step": 196674
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.873745918273926,
      "learning_rate": 3.1169371693309086e-05,
      "loss": 3.0553,
      "step": 196675
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6278393268585205,
      "learning_rate": 3.116755613050955e-05,
      "loss": 3.0326,
      "step": 196676
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9761667251586914,
      "learning_rate": 3.1165740617691006e-05,
      "loss": 2.9549,
      "step": 196677
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.237743616104126,
      "learning_rate": 3.1163925154853876e-05,
      "loss": 2.8964,
      "step": 196678
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1617183685302734,
      "learning_rate": 3.1162109741998465e-05,
      "loss": 2.9885,
      "step": 196679
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8174800872802734,
      "learning_rate": 3.1160294379125073e-05,
      "loss": 3.0049,
      "step": 196680
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.95991849899292,
      "learning_rate": 3.1158479066234e-05,
      "loss": 3.0662,
      "step": 196681
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.561746597290039,
      "learning_rate": 3.115666380332568e-05,
      "loss": 2.9032,
      "step": 196682
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.614805221557617,
      "learning_rate": 3.1154848590400375e-05,
      "loss": 2.8446,
      "step": 196683
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.961641788482666,
      "learning_rate": 3.115303342745852e-05,
      "loss": 2.9932,
      "step": 196684
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.852630853652954,
      "learning_rate": 3.115121831450038e-05,
      "loss": 3.0305,
      "step": 196685
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.010321617126465,
      "learning_rate": 3.11494032515263e-05,
      "loss": 3.0167,
      "step": 196686
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9948136806488037,
      "learning_rate": 3.114758823853656e-05,
      "loss": 2.6689,
      "step": 196687
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8993420600891113,
      "learning_rate": 3.114577327553164e-05,
      "loss": 2.8701,
      "step": 196688
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7885897159576416,
      "learning_rate": 3.1143958362511735e-05,
      "loss": 3.0398,
      "step": 196689
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.4087443351745605,
      "learning_rate": 3.1142143499477276e-05,
      "loss": 2.9321,
      "step": 196690
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.69730281829834,
      "learning_rate": 3.1140328686428576e-05,
      "loss": 3.1213,
      "step": 196691
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.274744749069214,
      "learning_rate": 3.113851392336598e-05,
      "loss": 2.8511,
      "step": 196692
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.890709400177002,
      "learning_rate": 3.113669921028974e-05,
      "loss": 2.9644,
      "step": 196693
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.625796318054199,
      "learning_rate": 3.1134884547200315e-05,
      "loss": 2.906,
      "step": 196694
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.572791576385498,
      "learning_rate": 3.113306993409793e-05,
      "loss": 3.0048,
      "step": 196695
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9903013706207275,
      "learning_rate": 3.1131255370983065e-05,
      "loss": 2.9276,
      "step": 196696
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.54181170463562,
      "learning_rate": 3.1129440857855945e-05,
      "loss": 2.7692,
      "step": 196697
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5264124870300293,
      "learning_rate": 3.1127626394716975e-05,
      "loss": 2.9363,
      "step": 196698
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6407411098480225,
      "learning_rate": 3.112581198156635e-05,
      "loss": 3.0192,
      "step": 196699
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.9029743671417236,
      "learning_rate": 3.1123997618404607e-05,
      "loss": 2.8167,
      "step": 196700
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8514883518218994,
      "learning_rate": 3.1122183305231906e-05,
      "loss": 2.9845,
      "step": 196701
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.617644786834717,
      "learning_rate": 3.112036904204875e-05,
      "loss": 2.9341,
      "step": 196702
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2534031867980957,
      "learning_rate": 3.111855482885538e-05,
      "loss": 2.9216,
      "step": 196703
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.691145896911621,
      "learning_rate": 3.1116740665652154e-05,
      "loss": 2.8233,
      "step": 196704
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.988631010055542,
      "learning_rate": 3.1114926552439334e-05,
      "loss": 3.0205,
      "step": 196705
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0201005935668945,
      "learning_rate": 3.111311248921736e-05,
      "loss": 3.0022,
      "step": 196706
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9411990642547607,
      "learning_rate": 3.11112984759865e-05,
      "loss": 2.6804,
      "step": 196707
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.164010524749756,
      "learning_rate": 3.110948451274719e-05,
      "loss": 2.8561,
      "step": 196708
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2603635787963867,
      "learning_rate": 3.110767059949965e-05,
      "loss": 3.0439,
      "step": 196709
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.507354974746704,
      "learning_rate": 3.110585673624435e-05,
      "loss": 2.972,
      "step": 196710
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9484856128692627,
      "learning_rate": 3.1104042922981466e-05,
      "loss": 2.7855,
      "step": 196711
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7017149925231934,
      "learning_rate": 3.110222915971146e-05,
      "loss": 2.8719,
      "step": 196712
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.453141927719116,
      "learning_rate": 3.1100415446434555e-05,
      "loss": 3.0671,
      "step": 196713
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.821486234664917,
      "learning_rate": 3.109860178315123e-05,
      "loss": 3.0371,
      "step": 196714
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.619366407394409,
      "learning_rate": 3.109678816986172e-05,
      "loss": 3.0628,
      "step": 196715
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.284791946411133,
      "learning_rate": 3.109497460656648e-05,
      "loss": 2.8153,
      "step": 196716
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.573988914489746,
      "learning_rate": 3.1093161093265614e-05,
      "loss": 2.8845,
      "step": 196717
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.062169075012207,
      "learning_rate": 3.109134762995973e-05,
      "loss": 2.816,
      "step": 196718
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2287745475769043,
      "learning_rate": 3.1089534216648945e-05,
      "loss": 2.9325,
      "step": 196719
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5856783390045166,
      "learning_rate": 3.108772085333374e-05,
      "loss": 2.9565,
      "step": 196720
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6013824939727783,
      "learning_rate": 3.1085907540014376e-05,
      "loss": 2.7113,
      "step": 196721
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6127257347106934,
      "learning_rate": 3.108409427669132e-05,
      "loss": 2.9908,
      "step": 196722
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5086302757263184,
      "learning_rate": 3.1082281063364665e-05,
      "loss": 3.0503,
      "step": 196723
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.906803846359253,
      "learning_rate": 3.108046790003499e-05,
      "loss": 3.0025,
      "step": 196724
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6940553188323975,
      "learning_rate": 3.107865478670245e-05,
      "loss": 3.0597,
      "step": 196725
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1880648136138916,
      "learning_rate": 3.107684172336752e-05,
      "loss": 3.02,
      "step": 196726
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.391817092895508,
      "learning_rate": 3.107502871003042e-05,
      "loss": 2.7168,
      "step": 196727
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.586859703063965,
      "learning_rate": 3.1073215746691636e-05,
      "loss": 2.8948,
      "step": 196728
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0766844749450684,
      "learning_rate": 3.107140283335139e-05,
      "loss": 2.987,
      "step": 196729
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0337958335876465,
      "learning_rate": 3.1069589970010044e-05,
      "loss": 2.6888,
      "step": 196730
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.033986806869507,
      "learning_rate": 3.10677771566679e-05,
      "loss": 2.9952,
      "step": 196731
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.493591070175171,
      "learning_rate": 3.106596439332537e-05,
      "loss": 2.9144,
      "step": 196732
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.783092498779297,
      "learning_rate": 3.106415167998274e-05,
      "loss": 2.8362,
      "step": 196733
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.576721429824829,
      "learning_rate": 3.106233901664038e-05,
      "loss": 3.1602,
      "step": 196734
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7572901248931885,
      "learning_rate": 3.106052640329859e-05,
      "loss": 3.0503,
      "step": 196735
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.676159381866455,
      "learning_rate": 3.105871383995777e-05,
      "loss": 2.7756,
      "step": 196736
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7736494541168213,
      "learning_rate": 3.105690132661812e-05,
      "loss": 2.6684,
      "step": 196737
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.998429536819458,
      "learning_rate": 3.1055088863280144e-05,
      "loss": 3.232,
      "step": 196738
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9843688011169434,
      "learning_rate": 3.1053276449944034e-05,
      "loss": 2.6488,
      "step": 196739
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.35375714302063,
      "learning_rate": 3.105146408661029e-05,
      "loss": 3.1664,
      "step": 196740
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.2524943351745605,
      "learning_rate": 3.104965177327911e-05,
      "loss": 2.9566,
      "step": 196741
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.63771915435791,
      "learning_rate": 3.1047839509950844e-05,
      "loss": 2.6596,
      "step": 196742
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.5840044021606445,
      "learning_rate": 3.104602729662591e-05,
      "loss": 3.0149,
      "step": 196743
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5089800357818604,
      "learning_rate": 3.1044215133304606e-05,
      "loss": 2.9485,
      "step": 196744
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3692219257354736,
      "learning_rate": 3.104240301998717e-05,
      "loss": 2.7807,
      "step": 196745
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8534584045410156,
      "learning_rate": 3.104059095667414e-05,
      "loss": 2.9959,
      "step": 196746
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.7216062545776367,
      "learning_rate": 3.1038778943365704e-05,
      "loss": 2.6928,
      "step": 196747
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9268081188201904,
      "learning_rate": 3.103696698006217e-05,
      "loss": 2.8102,
      "step": 196748
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.898538589477539,
      "learning_rate": 3.1035155066764005e-05,
      "loss": 2.7613,
      "step": 196749
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.55966854095459,
      "learning_rate": 3.103334320347151e-05,
      "loss": 2.8145,
      "step": 196750
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8743975162506104,
      "learning_rate": 3.103153139018491e-05,
      "loss": 2.7055,
      "step": 196751
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7675962448120117,
      "learning_rate": 3.10297196269047e-05,
      "loss": 3.1305,
      "step": 196752
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4185714721679688,
      "learning_rate": 3.102790791363114e-05,
      "loss": 2.9142,
      "step": 196753
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0698368549346924,
      "learning_rate": 3.1026096250364494e-05,
      "loss": 2.9364,
      "step": 196754
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8778040409088135,
      "learning_rate": 3.1024284637105227e-05,
      "loss": 2.7917,
      "step": 196755
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6993603706359863,
      "learning_rate": 3.1022473073853585e-05,
      "loss": 2.9647,
      "step": 196756
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.664804220199585,
      "learning_rate": 3.102066156061e-05,
      "loss": 2.954,
      "step": 196757
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5157055854797363,
      "learning_rate": 3.101885009737472e-05,
      "loss": 3.1259,
      "step": 196758
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7745018005371094,
      "learning_rate": 3.1017038684148175e-05,
      "loss": 2.8103,
      "step": 196759
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.586214065551758,
      "learning_rate": 3.1015227320930526e-05,
      "loss": 2.9478,
      "step": 196760
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2820866107940674,
      "learning_rate": 3.1013416007722305e-05,
      "loss": 2.9688,
      "step": 196761
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.420743227005005,
      "learning_rate": 3.101160474452371e-05,
      "loss": 3.0512,
      "step": 196762
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5136988162994385,
      "learning_rate": 3.100979353133518e-05,
      "loss": 3.1663,
      "step": 196763
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7153286933898926,
      "learning_rate": 3.100798236815701e-05,
      "loss": 2.9423,
      "step": 196764
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.753377676010132,
      "learning_rate": 3.100617125498954e-05,
      "loss": 2.7393,
      "step": 196765
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9220693111419678,
      "learning_rate": 3.100436019183307e-05,
      "loss": 2.9021,
      "step": 196766
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0124189853668213,
      "learning_rate": 3.1002549178687984e-05,
      "loss": 2.8589,
      "step": 196767
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.579930305480957,
      "learning_rate": 3.1000738215554564e-05,
      "loss": 2.7624,
      "step": 196768
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.882834434509277,
      "learning_rate": 3.099892730243324e-05,
      "loss": 2.9633,
      "step": 196769
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.617384672164917,
      "learning_rate": 3.0997116439324276e-05,
      "loss": 2.6317,
      "step": 196770
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2455554008483887,
      "learning_rate": 3.099530562622804e-05,
      "loss": 2.9859,
      "step": 196771
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8630993366241455,
      "learning_rate": 3.09934948631448e-05,
      "loss": 2.8453,
      "step": 196772
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.93463134765625,
      "learning_rate": 3.0991684150075014e-05,
      "loss": 2.9369,
      "step": 196773
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.971144914627075,
      "learning_rate": 3.098987348701886e-05,
      "loss": 3.1298,
      "step": 196774
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.285675048828125,
      "learning_rate": 3.098806287397686e-05,
      "loss": 3.0353,
      "step": 196775
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.480931282043457,
      "learning_rate": 3.098625231094919e-05,
      "loss": 2.7464,
      "step": 196776
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.3622145652771,
      "learning_rate": 3.098444179793634e-05,
      "loss": 2.8976,
      "step": 196777
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.285309314727783,
      "learning_rate": 3.098263133493849e-05,
      "loss": 2.9368,
      "step": 196778
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.417836666107178,
      "learning_rate": 3.098082092195606e-05,
      "loss": 2.8172,
      "step": 196779
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.485356569290161,
      "learning_rate": 3.097901055898936e-05,
      "loss": 3.043,
      "step": 196780
    },
    {
      "epoch": 2.56,
      "grad_norm": 5.231925964355469,
      "learning_rate": 3.097720024603878e-05,
      "loss": 2.9296,
      "step": 196781
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.892554998397827,
      "learning_rate": 3.097538998310456e-05,
      "loss": 2.9716,
      "step": 196782
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6396114826202393,
      "learning_rate": 3.09735797701872e-05,
      "loss": 2.9684,
      "step": 196783
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.55155873298645,
      "learning_rate": 3.097176960728682e-05,
      "loss": 2.889,
      "step": 196784
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3926289081573486,
      "learning_rate": 3.0969959494403907e-05,
      "loss": 2.9547,
      "step": 196785
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.923661947250366,
      "learning_rate": 3.0968149431538716e-05,
      "loss": 2.9553,
      "step": 196786
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.567720413208008,
      "learning_rate": 3.0966339418691674e-05,
      "loss": 3.1335,
      "step": 196787
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.973148822784424,
      "learning_rate": 3.096452945586303e-05,
      "loss": 2.6551,
      "step": 196788
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.153686046600342,
      "learning_rate": 3.096271954305327e-05,
      "loss": 3.0252,
      "step": 196789
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5148696899414062,
      "learning_rate": 3.09609096802625e-05,
      "loss": 2.9995,
      "step": 196790
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.401863098144531,
      "learning_rate": 3.095909986749122e-05,
      "loss": 3.0142,
      "step": 196791
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.332252025604248,
      "learning_rate": 3.0957290104739664e-05,
      "loss": 3.0979,
      "step": 196792
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5967750549316406,
      "learning_rate": 3.095548039200829e-05,
      "loss": 3.0487,
      "step": 196793
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.047882080078125,
      "learning_rate": 3.095367072929731e-05,
      "loss": 2.688,
      "step": 196794
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0540215969085693,
      "learning_rate": 3.095186111660725e-05,
      "loss": 2.8845,
      "step": 196795
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7322936058044434,
      "learning_rate": 3.09500515539382e-05,
      "loss": 2.9554,
      "step": 196796
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.156610012054443,
      "learning_rate": 3.094824204129065e-05,
      "loss": 3.1029,
      "step": 196797
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9513330459594727,
      "learning_rate": 3.094643257866488e-05,
      "loss": 2.7902,
      "step": 196798
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.689766883850098,
      "learning_rate": 3.094462316606127e-05,
      "loss": 2.8031,
      "step": 196799
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7099924087524414,
      "learning_rate": 3.0942813803480106e-05,
      "loss": 2.9584,
      "step": 196800
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.188681602478027,
      "learning_rate": 3.094100449092183e-05,
      "loss": 3.0322,
      "step": 196801
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.1142964363098145,
      "learning_rate": 3.09391952283866e-05,
      "loss": 2.9589,
      "step": 196802
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2797658443450928,
      "learning_rate": 3.093738601587493e-05,
      "loss": 2.9847,
      "step": 196803
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4647650718688965,
      "learning_rate": 3.093557685338701e-05,
      "loss": 2.9508,
      "step": 196804
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8571174144744873,
      "learning_rate": 3.093376774092331e-05,
      "loss": 3.1816,
      "step": 196805
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.756505012512207,
      "learning_rate": 3.093195867848402e-05,
      "loss": 2.8143,
      "step": 196806
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.4751603603363037,
      "learning_rate": 3.0930149666069696e-05,
      "loss": 3.1327,
      "step": 196807
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.441462516784668,
      "learning_rate": 3.092834070368041e-05,
      "loss": 2.9956,
      "step": 196808
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0917463302612305,
      "learning_rate": 3.092653179131668e-05,
      "loss": 2.6724,
      "step": 196809
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8373353481292725,
      "learning_rate": 3.092472292897873e-05,
      "loss": 2.8688,
      "step": 196810
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.871067762374878,
      "learning_rate": 3.0922914116667e-05,
      "loss": 3.1126,
      "step": 196811
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0302231311798096,
      "learning_rate": 3.092110535438175e-05,
      "loss": 2.7281,
      "step": 196812
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.842618227005005,
      "learning_rate": 3.091929664212345e-05,
      "loss": 3.1283,
      "step": 196813
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.434105634689331,
      "learning_rate": 3.0917487979892194e-05,
      "loss": 2.8229,
      "step": 196814
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5826737880706787,
      "learning_rate": 3.091567936768856e-05,
      "loss": 2.9171,
      "step": 196815
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9485104084014893,
      "learning_rate": 3.091387080551267e-05,
      "loss": 2.8911,
      "step": 196816
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6139965057373047,
      "learning_rate": 3.091206229336507e-05,
      "loss": 2.6933,
      "step": 196817
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5515928268432617,
      "learning_rate": 3.091025383124591e-05,
      "loss": 2.8692,
      "step": 196818
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9617908000946045,
      "learning_rate": 3.0908445419155704e-05,
      "loss": 2.5995,
      "step": 196819
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5407557487487793,
      "learning_rate": 3.090663705709468e-05,
      "loss": 2.6273,
      "step": 196820
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.7718584537506104,
      "learning_rate": 3.090482874506319e-05,
      "loss": 2.9764,
      "step": 196821
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.10756254196167,
      "learning_rate": 3.0903020483061494e-05,
      "loss": 2.7869,
      "step": 196822
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7067766189575195,
      "learning_rate": 3.0901212271090104e-05,
      "loss": 3.0036,
      "step": 196823
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3467156887054443,
      "learning_rate": 3.089940410914916e-05,
      "loss": 2.9126,
      "step": 196824
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9516031742095947,
      "learning_rate": 3.08975959972392e-05,
      "loss": 2.8082,
      "step": 196825
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5944104194641113,
      "learning_rate": 3.089578793536042e-05,
      "loss": 3.1896,
      "step": 196826
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8956189155578613,
      "learning_rate": 3.0893979923513146e-05,
      "loss": 3.061,
      "step": 196827
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9164750576019287,
      "learning_rate": 3.089217196169782e-05,
      "loss": 3.0445,
      "step": 196828
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7004570960998535,
      "learning_rate": 3.089036404991471e-05,
      "loss": 2.995,
      "step": 196829
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0752434730529785,
      "learning_rate": 3.08885561881641e-05,
      "loss": 2.8879,
      "step": 196830
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.572511672973633,
      "learning_rate": 3.088674837644648e-05,
      "loss": 2.9721,
      "step": 196831
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9084415435791016,
      "learning_rate": 3.088494061476207e-05,
      "loss": 2.9963,
      "step": 196832
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1103503704071045,
      "learning_rate": 3.088313290311116e-05,
      "loss": 3.0341,
      "step": 196833
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3400349617004395,
      "learning_rate": 3.088132524149424e-05,
      "loss": 2.7891,
      "step": 196834
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2409679889678955,
      "learning_rate": 3.0879517629911557e-05,
      "loss": 2.6841,
      "step": 196835
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.10650897026062,
      "learning_rate": 3.0877710068363384e-05,
      "loss": 3.0735,
      "step": 196836
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.995326280593872,
      "learning_rate": 3.087590255685015e-05,
      "loss": 3.1123,
      "step": 196837
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6824073791503906,
      "learning_rate": 3.087409509537223e-05,
      "loss": 3.0733,
      "step": 196838
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.741527795791626,
      "learning_rate": 3.0872287683929784e-05,
      "loss": 3.0592,
      "step": 196839
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.059584379196167,
      "learning_rate": 3.087048032252335e-05,
      "loss": 2.9485,
      "step": 196840
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.74574875831604,
      "learning_rate": 3.086867301115309e-05,
      "loss": 3.1123,
      "step": 196841
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.444927453994751,
      "learning_rate": 3.08668657498195e-05,
      "loss": 2.9797,
      "step": 196842
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9216244220733643,
      "learning_rate": 3.0865058538522855e-05,
      "loss": 2.9495,
      "step": 196843
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.676875352859497,
      "learning_rate": 3.086325137726345e-05,
      "loss": 3.0404,
      "step": 196844
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.275001287460327,
      "learning_rate": 3.086144426604159e-05,
      "loss": 2.9777,
      "step": 196845
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8203659057617188,
      "learning_rate": 3.085963720485774e-05,
      "loss": 2.9664,
      "step": 196846
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6843981742858887,
      "learning_rate": 3.085783019371212e-05,
      "loss": 3.2228,
      "step": 196847
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.43707275390625,
      "learning_rate": 3.085602323260514e-05,
      "loss": 2.6292,
      "step": 196848
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.714812755584717,
      "learning_rate": 3.085421632153704e-05,
      "loss": 2.9537,
      "step": 196849
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.771070957183838,
      "learning_rate": 3.0852409460508375e-05,
      "loss": 3.0286,
      "step": 196850
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1483941078186035,
      "learning_rate": 3.085060264951919e-05,
      "loss": 2.8988,
      "step": 196851
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.6476950645446777,
      "learning_rate": 3.084879588857e-05,
      "loss": 2.8539,
      "step": 196852
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.947883367538452,
      "learning_rate": 3.0846989177661055e-05,
      "loss": 2.7385,
      "step": 196853
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9128665924072266,
      "learning_rate": 3.084518251679281e-05,
      "loss": 2.9015,
      "step": 196854
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.4757628440856934,
      "learning_rate": 3.084337590596544e-05,
      "loss": 3.0149,
      "step": 196855
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2193596363067627,
      "learning_rate": 3.084156934517951e-05,
      "loss": 2.8796,
      "step": 196856
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.823930025100708,
      "learning_rate": 3.083976283443508e-05,
      "loss": 3.0182,
      "step": 196857
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.823251724243164,
      "learning_rate": 3.0837956373732686e-05,
      "loss": 3.0405,
      "step": 196858
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.714571952819824,
      "learning_rate": 3.083614996307253e-05,
      "loss": 2.9942,
      "step": 196859
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0572328567504883,
      "learning_rate": 3.083434360245508e-05,
      "loss": 3.1089,
      "step": 196860
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2046921253204346,
      "learning_rate": 3.0832537291880535e-05,
      "loss": 2.8383,
      "step": 196861
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.8290677070617676,
      "learning_rate": 3.083073103134942e-05,
      "loss": 2.6693,
      "step": 196862
    },
    {
      "epoch": 2.56,
      "grad_norm": 5.543137550354004,
      "learning_rate": 3.082892482086184e-05,
      "loss": 2.8276,
      "step": 196863
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.826371908187866,
      "learning_rate": 3.0827118660418305e-05,
      "loss": 2.8129,
      "step": 196864
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.759489059448242,
      "learning_rate": 3.082531255001903e-05,
      "loss": 2.8323,
      "step": 196865
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7950267791748047,
      "learning_rate": 3.08235064896645e-05,
      "loss": 3.0044,
      "step": 196866
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.724812984466553,
      "learning_rate": 3.082170047935486e-05,
      "loss": 3.2218,
      "step": 196867
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1628313064575195,
      "learning_rate": 3.0819894519090694e-05,
      "loss": 3.0046,
      "step": 196868
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4376487731933594,
      "learning_rate": 3.081808860887206e-05,
      "loss": 2.7505,
      "step": 196869
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.0321197509765625,
      "learning_rate": 3.081628274869946e-05,
      "loss": 3.2564,
      "step": 196870
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0638763904571533,
      "learning_rate": 3.081447693857316e-05,
      "loss": 3.1811,
      "step": 196871
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1706485748291016,
      "learning_rate": 3.08126711784936e-05,
      "loss": 2.9156,
      "step": 196872
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7545998096466064,
      "learning_rate": 3.081086546846097e-05,
      "loss": 2.7568,
      "step": 196873
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.214289426803589,
      "learning_rate": 3.08090598084758e-05,
      "loss": 2.7859,
      "step": 196874
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.746525526046753,
      "learning_rate": 3.08072541985382e-05,
      "loss": 2.9472,
      "step": 196875
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2043590545654297,
      "learning_rate": 3.0805448638648666e-05,
      "loss": 2.7911,
      "step": 196876
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.3454270362854004,
      "learning_rate": 3.08036431288074e-05,
      "loss": 3.038,
      "step": 196877
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.404304504394531,
      "learning_rate": 3.0801837669014894e-05,
      "loss": 2.9799,
      "step": 196878
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.7872111797332764,
      "learning_rate": 3.0800032259271355e-05,
      "loss": 2.8969,
      "step": 196879
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7311384677886963,
      "learning_rate": 3.0798226899577285e-05,
      "loss": 3.0003,
      "step": 196880
    },
    {
      "epoch": 2.56,
      "grad_norm": 5.0960893630981445,
      "learning_rate": 3.0796421589932806e-05,
      "loss": 2.8512,
      "step": 196881
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0002405643463135,
      "learning_rate": 3.079461633033839e-05,
      "loss": 2.896,
      "step": 196882
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2350330352783203,
      "learning_rate": 3.079281112079428e-05,
      "loss": 3.0418,
      "step": 196883
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.766108751296997,
      "learning_rate": 3.079100596130093e-05,
      "loss": 3.0348,
      "step": 196884
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0420007705688477,
      "learning_rate": 3.078920085185854e-05,
      "loss": 2.8432,
      "step": 196885
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1352427005767822,
      "learning_rate": 3.0787395792467683e-05,
      "loss": 2.9385,
      "step": 196886
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.616442918777466,
      "learning_rate": 3.0785590783128355e-05,
      "loss": 3.0622,
      "step": 196887
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.7707607746124268,
      "learning_rate": 3.078378582384116e-05,
      "loss": 2.7902,
      "step": 196888
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.979919910430908,
      "learning_rate": 3.0781980914606284e-05,
      "loss": 3.1996,
      "step": 196889
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7092459201812744,
      "learning_rate": 3.078017605542418e-05,
      "loss": 2.8766,
      "step": 196890
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.160557508468628,
      "learning_rate": 3.0778371246295033e-05,
      "loss": 2.7507,
      "step": 196891
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.765554666519165,
      "learning_rate": 3.077656648721941e-05,
      "loss": 2.902,
      "step": 196892
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1188740730285645,
      "learning_rate": 3.077476177819739e-05,
      "loss": 2.7114,
      "step": 196893
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.479815483093262,
      "learning_rate": 3.077295711922949e-05,
      "loss": 3.0903,
      "step": 196894
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.318223237991333,
      "learning_rate": 3.077115251031589e-05,
      "loss": 2.9576,
      "step": 196895
    },
    {
      "epoch": 2.56,
      "grad_norm": 5.476465702056885,
      "learning_rate": 3.076934795145711e-05,
      "loss": 3.0637,
      "step": 196896
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6308510303497314,
      "learning_rate": 3.07675434426533e-05,
      "loss": 2.7351,
      "step": 196897
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2849013805389404,
      "learning_rate": 3.076573898390501e-05,
      "loss": 2.7363,
      "step": 196898
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1345913410186768,
      "learning_rate": 3.076393457521235e-05,
      "loss": 2.9629,
      "step": 196899
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.684248447418213,
      "learning_rate": 3.076213021657581e-05,
      "loss": 2.6026,
      "step": 196900
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.008600950241089,
      "learning_rate": 3.0760325907995595e-05,
      "loss": 3.0531,
      "step": 196901
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2069900035858154,
      "learning_rate": 3.0758521649472214e-05,
      "loss": 3.1172,
      "step": 196902
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.256437301635742,
      "learning_rate": 3.0756717441005816e-05,
      "loss": 2.8942,
      "step": 196903
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.051435947418213,
      "learning_rate": 3.0754913282596884e-05,
      "loss": 2.8937,
      "step": 196904
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0187103748321533,
      "learning_rate": 3.07531091742457e-05,
      "loss": 2.8245,
      "step": 196905
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9767746925354004,
      "learning_rate": 3.0751305115952615e-05,
      "loss": 3.0195,
      "step": 196906
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.8072848320007324,
      "learning_rate": 3.074950110771789e-05,
      "loss": 2.8548,
      "step": 196907
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.949069023132324,
      "learning_rate": 3.074769714954195e-05,
      "loss": 3.035,
      "step": 196908
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.219376564025879,
      "learning_rate": 3.074589324142504e-05,
      "loss": 2.9731,
      "step": 196909
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.34147572517395,
      "learning_rate": 3.074408938336761e-05,
      "loss": 3.1325,
      "step": 196910
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7623140811920166,
      "learning_rate": 3.074228557536994e-05,
      "loss": 2.9133,
      "step": 196911
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.742913246154785,
      "learning_rate": 3.074048181743229e-05,
      "loss": 2.796,
      "step": 196912
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1407694816589355,
      "learning_rate": 3.073867810955517e-05,
      "loss": 2.9174,
      "step": 196913
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.4956552982330322,
      "learning_rate": 3.073687445173877e-05,
      "loss": 2.7657,
      "step": 196914
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0235393047332764,
      "learning_rate": 3.073507084398342e-05,
      "loss": 3.0706,
      "step": 196915
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5466713905334473,
      "learning_rate": 3.073326728628956e-05,
      "loss": 2.9745,
      "step": 196916
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7204010486602783,
      "learning_rate": 3.073146377865746e-05,
      "loss": 2.941,
      "step": 196917
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.184816598892212,
      "learning_rate": 3.072966032108741e-05,
      "loss": 2.8101,
      "step": 196918
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.820103406906128,
      "learning_rate": 3.072785691357988e-05,
      "loss": 2.889,
      "step": 196919
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5412778854370117,
      "learning_rate": 3.072605355613511e-05,
      "loss": 2.929,
      "step": 196920
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.688859701156616,
      "learning_rate": 3.072425024875339e-05,
      "loss": 2.7402,
      "step": 196921
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.897461175918579,
      "learning_rate": 3.072244699143516e-05,
      "loss": 2.7785,
      "step": 196922
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7851977348327637,
      "learning_rate": 3.072064378418074e-05,
      "loss": 2.9743,
      "step": 196923
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0776784420013428,
      "learning_rate": 3.0718840626990346e-05,
      "loss": 2.8844,
      "step": 196924
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5733747482299805,
      "learning_rate": 3.071703751986447e-05,
      "loss": 2.8058,
      "step": 196925
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0499982833862305,
      "learning_rate": 3.071523446280332e-05,
      "loss": 2.9474,
      "step": 196926
    },
    {
      "epoch": 2.56,
      "grad_norm": 5.271966934204102,
      "learning_rate": 3.071343145580738e-05,
      "loss": 2.7425,
      "step": 196927
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.1056933403015137,
      "learning_rate": 3.071162849887686e-05,
      "loss": 2.9165,
      "step": 196928
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0455353260040283,
      "learning_rate": 3.070982559201213e-05,
      "loss": 2.9636,
      "step": 196929
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.580783367156982,
      "learning_rate": 3.0708022735213477e-05,
      "loss": 2.7846,
      "step": 196930
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0913166999816895,
      "learning_rate": 3.070621992848138e-05,
      "loss": 2.8286,
      "step": 196931
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.001101016998291,
      "learning_rate": 3.070441717181597e-05,
      "loss": 3.0354,
      "step": 196932
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8501360416412354,
      "learning_rate": 3.070261446521777e-05,
      "loss": 2.7406,
      "step": 196933
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.186398506164551,
      "learning_rate": 3.0700811808687054e-05,
      "loss": 3.1154,
      "step": 196934
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7014219760894775,
      "learning_rate": 3.0699009202224125e-05,
      "loss": 2.9817,
      "step": 196935
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6643471717834473,
      "learning_rate": 3.0697206645829274e-05,
      "loss": 2.7644,
      "step": 196936
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.478461742401123,
      "learning_rate": 3.069540413950295e-05,
      "loss": 2.9279,
      "step": 196937
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.980623245239258,
      "learning_rate": 3.069360168324536e-05,
      "loss": 2.8397,
      "step": 196938
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6547863483428955,
      "learning_rate": 3.0691799277056996e-05,
      "loss": 2.8253,
      "step": 196939
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.101508140563965,
      "learning_rate": 3.0689996920938044e-05,
      "loss": 3.177,
      "step": 196940
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6453795433044434,
      "learning_rate": 3.068819461488904e-05,
      "loss": 3.0624,
      "step": 196941
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.038572072982788,
      "learning_rate": 3.068639235891005e-05,
      "loss": 2.5986,
      "step": 196942
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.0598464012146,
      "learning_rate": 3.068459015300161e-05,
      "loss": 2.9706,
      "step": 196943
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.763662576675415,
      "learning_rate": 3.0682787997163914e-05,
      "loss": 2.8314,
      "step": 196944
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8711602687835693,
      "learning_rate": 3.0680985891397435e-05,
      "loss": 3.0971,
      "step": 196945
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.665924310684204,
      "learning_rate": 3.06791838357024e-05,
      "loss": 2.7758,
      "step": 196946
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.023832321166992,
      "learning_rate": 3.0677381830079285e-05,
      "loss": 3.0288,
      "step": 196947
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.6937594413757324,
      "learning_rate": 3.0675579874528244e-05,
      "loss": 2.8086,
      "step": 196948
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.523150682449341,
      "learning_rate": 3.067377796904972e-05,
      "loss": 2.8913,
      "step": 196949
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.126168966293335,
      "learning_rate": 3.0671976113644006e-05,
      "loss": 2.8218,
      "step": 196950
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8132736682891846,
      "learning_rate": 3.067017430831147e-05,
      "loss": 3.0494,
      "step": 196951
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.731891393661499,
      "learning_rate": 3.066837255305238e-05,
      "loss": 2.9727,
      "step": 196952
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.0706987380981445,
      "learning_rate": 3.066657084786727e-05,
      "loss": 2.9067,
      "step": 196953
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.2268781661987305,
      "learning_rate": 3.06647691927562e-05,
      "loss": 2.9188,
      "step": 196954
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8267366886138916,
      "learning_rate": 3.0662967587719676e-05,
      "loss": 3.1035,
      "step": 196955
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.148235321044922,
      "learning_rate": 3.066116603275793e-05,
      "loss": 2.7975,
      "step": 196956
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9598519802093506,
      "learning_rate": 3.0659364527871436e-05,
      "loss": 2.9661,
      "step": 196957
    },
    {
      "epoch": 2.56,
      "grad_norm": 4.656993389129639,
      "learning_rate": 3.0657563073060374e-05,
      "loss": 2.8989,
      "step": 196958
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.10201358795166,
      "learning_rate": 3.06557616683253e-05,
      "loss": 3.0012,
      "step": 196959
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.435574531555176,
      "learning_rate": 3.065396031366626e-05,
      "loss": 2.6575,
      "step": 196960
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7624173164367676,
      "learning_rate": 3.06521590090838e-05,
      "loss": 3.1098,
      "step": 196961
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.4854438304901123,
      "learning_rate": 3.0650357754578145e-05,
      "loss": 3.0672,
      "step": 196962
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.527939558029175,
      "learning_rate": 3.0648556550149705e-05,
      "loss": 2.9605,
      "step": 196963
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5278351306915283,
      "learning_rate": 3.064675539579874e-05,
      "loss": 2.8289,
      "step": 196964
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.98810076713562,
      "learning_rate": 3.0644954291525745e-05,
      "loss": 3.0053,
      "step": 196965
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.976372241973877,
      "learning_rate": 3.064315323733083e-05,
      "loss": 2.6853,
      "step": 196966
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9260616302490234,
      "learning_rate": 3.0641352233214486e-05,
      "loss": 2.9062,
      "step": 196967
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.831996202468872,
      "learning_rate": 3.063955127917692e-05,
      "loss": 2.7226,
      "step": 196968
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.738487482070923,
      "learning_rate": 3.063775037521863e-05,
      "loss": 2.888,
      "step": 196969
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.182806968688965,
      "learning_rate": 3.063594952133981e-05,
      "loss": 2.7977,
      "step": 196970
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.897282838821411,
      "learning_rate": 3.0634148717540966e-05,
      "loss": 3.0566,
      "step": 196971
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0481269359588623,
      "learning_rate": 3.063234796382219e-05,
      "loss": 2.8774,
      "step": 196972
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.5005900859832764,
      "learning_rate": 3.063054726018399e-05,
      "loss": 2.9483,
      "step": 196973
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.20444655418396,
      "learning_rate": 3.062874660662663e-05,
      "loss": 2.7071,
      "step": 196974
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0922183990478516,
      "learning_rate": 3.062694600315051e-05,
      "loss": 2.5887,
      "step": 196975
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0808868408203125,
      "learning_rate": 3.062514544975586e-05,
      "loss": 2.9318,
      "step": 196976
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.337480068206787,
      "learning_rate": 3.062334494644315e-05,
      "loss": 2.9996,
      "step": 196977
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.94795298576355,
      "learning_rate": 3.062154449321265e-05,
      "loss": 2.7977,
      "step": 196978
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9489128589630127,
      "learning_rate": 3.061974409006468e-05,
      "loss": 2.9969,
      "step": 196979
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.272573232650757,
      "learning_rate": 3.0617943736999554e-05,
      "loss": 3.0152,
      "step": 196980
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9372711181640625,
      "learning_rate": 3.061614343401766e-05,
      "loss": 2.9621,
      "step": 196981
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.753929853439331,
      "learning_rate": 3.061434318111927e-05,
      "loss": 2.9013,
      "step": 196982
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0711562633514404,
      "learning_rate": 3.0612542978304786e-05,
      "loss": 2.8643,
      "step": 196983
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.7977211475372314,
      "learning_rate": 3.061074282557454e-05,
      "loss": 2.9306,
      "step": 196984
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8004674911499023,
      "learning_rate": 3.0608942722928864e-05,
      "loss": 2.8142,
      "step": 196985
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.335090160369873,
      "learning_rate": 3.060714267036799e-05,
      "loss": 2.7771,
      "step": 196986
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.074054479598999,
      "learning_rate": 3.060534266789238e-05,
      "loss": 2.6871,
      "step": 196987
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9044690132141113,
      "learning_rate": 3.0603542715502284e-05,
      "loss": 2.6248,
      "step": 196988
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9847869873046875,
      "learning_rate": 3.060174281319812e-05,
      "loss": 2.7922,
      "step": 196989
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.9834015369415283,
      "learning_rate": 3.059994296098018e-05,
      "loss": 3.1408,
      "step": 196990
    },
    {
      "epoch": 2.56,
      "grad_norm": 2.8071632385253906,
      "learning_rate": 3.059814315884882e-05,
      "loss": 2.8451,
      "step": 196991
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5308659076690674,
      "learning_rate": 3.059634340680426e-05,
      "loss": 2.8888,
      "step": 196992
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.0779306888580322,
      "learning_rate": 3.0594543704847e-05,
      "loss": 3.1475,
      "step": 196993
    },
    {
      "epoch": 2.56,
      "grad_norm": 3.5483503341674805,
      "learning_rate": 3.059274405297724e-05,
      "loss": 2.9783,
      "step": 196994
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.260908365249634,
      "learning_rate": 3.059094445119541e-05,
      "loss": 2.9011,
      "step": 196995
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7123570442199707,
      "learning_rate": 3.058914489950185e-05,
      "loss": 3.2253,
      "step": 196996
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.832716703414917,
      "learning_rate": 3.0587345397896825e-05,
      "loss": 3.068,
      "step": 196997
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.388979434967041,
      "learning_rate": 3.058554594638063e-05,
      "loss": 2.85,
      "step": 196998
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.8424036502838135,
      "learning_rate": 3.058374654495377e-05,
      "loss": 3.1053,
      "step": 196999
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2783172130584717,
      "learning_rate": 3.058194719361637e-05,
      "loss": 2.6648,
      "step": 197000
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.758427619934082,
      "learning_rate": 3.0580147892368947e-05,
      "loss": 2.8778,
      "step": 197001
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7113358974456787,
      "learning_rate": 3.057834864121178e-05,
      "loss": 2.7566,
      "step": 197002
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8324997425079346,
      "learning_rate": 3.0576549440145115e-05,
      "loss": 2.6653,
      "step": 197003
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.958275318145752,
      "learning_rate": 3.057475028916938e-05,
      "loss": 2.7651,
      "step": 197004
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9564971923828125,
      "learning_rate": 3.0572951188284914e-05,
      "loss": 2.8532,
      "step": 197005
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.253441572189331,
      "learning_rate": 3.057115213749197e-05,
      "loss": 2.8575,
      "step": 197006
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.258444309234619,
      "learning_rate": 3.0569353136791e-05,
      "loss": 2.6017,
      "step": 197007
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6030757427215576,
      "learning_rate": 3.0567554186182256e-05,
      "loss": 2.7808,
      "step": 197008
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7710771560668945,
      "learning_rate": 3.056575528566604e-05,
      "loss": 2.8197,
      "step": 197009
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.284943103790283,
      "learning_rate": 3.0563956435242785e-05,
      "loss": 2.9826,
      "step": 197010
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.646665334701538,
      "learning_rate": 3.05621576349127e-05,
      "loss": 2.9357,
      "step": 197011
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.903972864151001,
      "learning_rate": 3.0560358884676275e-05,
      "loss": 2.759,
      "step": 197012
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.191372871398926,
      "learning_rate": 3.055856018453378e-05,
      "loss": 2.8577,
      "step": 197013
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9339804649353027,
      "learning_rate": 3.0556761534485506e-05,
      "loss": 2.7148,
      "step": 197014
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.7663629055023193,
      "learning_rate": 3.0554962934531767e-05,
      "loss": 2.8587,
      "step": 197015
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9094557762145996,
      "learning_rate": 3.0553164384673026e-05,
      "loss": 2.7955,
      "step": 197016
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.324052095413208,
      "learning_rate": 3.0551365884909473e-05,
      "loss": 3.0177,
      "step": 197017
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3340179920196533,
      "learning_rate": 3.054956743524155e-05,
      "loss": 2.9486,
      "step": 197018
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.664384365081787,
      "learning_rate": 3.0547769035669555e-05,
      "loss": 2.7356,
      "step": 197019
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.930675506591797,
      "learning_rate": 3.054597068619382e-05,
      "loss": 2.9375,
      "step": 197020
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9180121421813965,
      "learning_rate": 3.054417238681461e-05,
      "loss": 2.7276,
      "step": 197021
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.523536443710327,
      "learning_rate": 3.05423741375324e-05,
      "loss": 3.0647,
      "step": 197022
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.5099432468414307,
      "learning_rate": 3.054057593834738e-05,
      "loss": 2.7753,
      "step": 197023
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.785006284713745,
      "learning_rate": 3.0538777789259984e-05,
      "loss": 2.9635,
      "step": 197024
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8399951457977295,
      "learning_rate": 3.053697969027048e-05,
      "loss": 2.8385,
      "step": 197025
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6102564334869385,
      "learning_rate": 3.053518164137937e-05,
      "loss": 2.8042,
      "step": 197026
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4472134113311768,
      "learning_rate": 3.053338364258672e-05,
      "loss": 2.8233,
      "step": 197027
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9553353786468506,
      "learning_rate": 3.053158569389306e-05,
      "loss": 2.9719,
      "step": 197028
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.5586977005004883,
      "learning_rate": 3.052978779529862e-05,
      "loss": 3.0016,
      "step": 197029
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.9434149265289307,
      "learning_rate": 3.0527989946803846e-05,
      "loss": 2.7473,
      "step": 197030
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.158771991729736,
      "learning_rate": 3.0526192148408925e-05,
      "loss": 2.8732,
      "step": 197031
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.960902690887451,
      "learning_rate": 3.0524394400114396e-05,
      "loss": 2.817,
      "step": 197032
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3231308460235596,
      "learning_rate": 3.052259670192032e-05,
      "loss": 2.9965,
      "step": 197033
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4202873706817627,
      "learning_rate": 3.0520799053827275e-05,
      "loss": 3.1022,
      "step": 197034
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.672699451446533,
      "learning_rate": 3.051900145583544e-05,
      "loss": 2.9144,
      "step": 197035
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2624495029449463,
      "learning_rate": 3.0517203907945277e-05,
      "loss": 2.9413,
      "step": 197036
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.463390827178955,
      "learning_rate": 3.051540641015696e-05,
      "loss": 2.8241,
      "step": 197037
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.296393632888794,
      "learning_rate": 3.051360896247107e-05,
      "loss": 2.7856,
      "step": 197038
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2168831825256348,
      "learning_rate": 3.0511811564887633e-05,
      "loss": 2.9263,
      "step": 197039
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.575685739517212,
      "learning_rate": 3.051001421740722e-05,
      "loss": 2.9349,
      "step": 197040
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.946572780609131,
      "learning_rate": 3.0508216920030027e-05,
      "loss": 3.0667,
      "step": 197041
    },
    {
      "epoch": 2.57,
      "grad_norm": 5.337782859802246,
      "learning_rate": 3.050641967275649e-05,
      "loss": 2.9199,
      "step": 197042
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4173200130462646,
      "learning_rate": 3.0504622475586838e-05,
      "loss": 2.9263,
      "step": 197043
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1185834407806396,
      "learning_rate": 3.050282532852154e-05,
      "loss": 3.1152,
      "step": 197044
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0093445777893066,
      "learning_rate": 3.0501028231560832e-05,
      "loss": 2.8511,
      "step": 197045
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.112089157104492,
      "learning_rate": 3.0499231184705074e-05,
      "loss": 3.2445,
      "step": 197046
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6962945461273193,
      "learning_rate": 3.049743418795457e-05,
      "loss": 3.0228,
      "step": 197047
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8079428672790527,
      "learning_rate": 3.049563724130969e-05,
      "loss": 2.8379,
      "step": 197048
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0274155139923096,
      "learning_rate": 3.0493840344770726e-05,
      "loss": 3.0124,
      "step": 197049
    },
    {
      "epoch": 2.57,
      "grad_norm": 5.842264652252197,
      "learning_rate": 3.0492043498338116e-05,
      "loss": 2.8161,
      "step": 197050
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.891129732131958,
      "learning_rate": 3.0490246702012088e-05,
      "loss": 2.8479,
      "step": 197051
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.079010009765625,
      "learning_rate": 3.0488449955793016e-05,
      "loss": 3.0671,
      "step": 197052
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8621184825897217,
      "learning_rate": 3.048665325968119e-05,
      "loss": 3.0045,
      "step": 197053
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8231143951416016,
      "learning_rate": 3.0484856613677023e-05,
      "loss": 2.9051,
      "step": 197054
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0167107582092285,
      "learning_rate": 3.048306001778077e-05,
      "loss": 2.9234,
      "step": 197055
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9347264766693115,
      "learning_rate": 3.048126347199287e-05,
      "loss": 2.9323,
      "step": 197056
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.795724391937256,
      "learning_rate": 3.047946697631355e-05,
      "loss": 2.8089,
      "step": 197057
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.0609564781188965,
      "learning_rate": 3.0477670530743214e-05,
      "loss": 3.0079,
      "step": 197058
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.871154546737671,
      "learning_rate": 3.0475874135282098e-05,
      "loss": 3.0909,
      "step": 197059
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8499743938446045,
      "learning_rate": 3.0474077789930662e-05,
      "loss": 2.977,
      "step": 197060
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.61072039604187,
      "learning_rate": 3.0472281494689143e-05,
      "loss": 2.6299,
      "step": 197061
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.999342203140259,
      "learning_rate": 3.047048524955794e-05,
      "loss": 3.0213,
      "step": 197062
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.128540277481079,
      "learning_rate": 3.0468689054537387e-05,
      "loss": 2.6818,
      "step": 197063
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9251160621643066,
      "learning_rate": 3.0466892909627784e-05,
      "loss": 2.7988,
      "step": 197064
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.709855318069458,
      "learning_rate": 3.0465096814829425e-05,
      "loss": 2.9672,
      "step": 197065
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2392210960388184,
      "learning_rate": 3.0463300770142752e-05,
      "loss": 3.1657,
      "step": 197066
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.556243658065796,
      "learning_rate": 3.0461504775567958e-05,
      "loss": 3.1597,
      "step": 197067
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0932188034057617,
      "learning_rate": 3.0459708831105544e-05,
      "loss": 2.9518,
      "step": 197068
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.553558349609375,
      "learning_rate": 3.0457912936755747e-05,
      "loss": 2.8754,
      "step": 197069
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1002357006073,
      "learning_rate": 3.0456117092518896e-05,
      "loss": 2.9673,
      "step": 197070
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3551642894744873,
      "learning_rate": 3.045432129839529e-05,
      "loss": 2.843,
      "step": 197071
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.758748769760132,
      "learning_rate": 3.0452525554385397e-05,
      "loss": 2.8356,
      "step": 197072
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.783771514892578,
      "learning_rate": 3.0450729860489386e-05,
      "loss": 2.9996,
      "step": 197073
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7834231853485107,
      "learning_rate": 3.044893421670772e-05,
      "loss": 2.8625,
      "step": 197074
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.624366521835327,
      "learning_rate": 3.04471386230407e-05,
      "loss": 2.7963,
      "step": 197075
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.5310561656951904,
      "learning_rate": 3.044534307948866e-05,
      "loss": 2.9769,
      "step": 197076
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.100074052810669,
      "learning_rate": 3.044354758605183e-05,
      "loss": 3.0041,
      "step": 197077
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4303135871887207,
      "learning_rate": 3.0441752142730712e-05,
      "loss": 3.2085,
      "step": 197078
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9936416149139404,
      "learning_rate": 3.0439956749525506e-05,
      "loss": 2.8264,
      "step": 197079
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3463330268859863,
      "learning_rate": 3.0438161406436647e-05,
      "loss": 2.598,
      "step": 197080
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.44028639793396,
      "learning_rate": 3.0436366113464426e-05,
      "loss": 3.0007,
      "step": 197081
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.325048446655273,
      "learning_rate": 3.0434570870609153e-05,
      "loss": 2.9779,
      "step": 197082
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8051373958587646,
      "learning_rate": 3.0432775677871156e-05,
      "loss": 2.8827,
      "step": 197083
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.5289857387542725,
      "learning_rate": 3.043098053525084e-05,
      "loss": 2.8525,
      "step": 197084
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9608957767486572,
      "learning_rate": 3.0429185442748427e-05,
      "loss": 2.9174,
      "step": 197085
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1008048057556152,
      "learning_rate": 3.0427390400364392e-05,
      "loss": 3.1218,
      "step": 197086
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.926985740661621,
      "learning_rate": 3.0425595408098968e-05,
      "loss": 3.1507,
      "step": 197087
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.462219715118408,
      "learning_rate": 3.0423800465952487e-05,
      "loss": 2.9799,
      "step": 197088
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.576667547225952,
      "learning_rate": 3.0422005573925345e-05,
      "loss": 3.048,
      "step": 197089
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7602148056030273,
      "learning_rate": 3.0420210732017848e-05,
      "loss": 2.9176,
      "step": 197090
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.821088790893555,
      "learning_rate": 3.0418415940230256e-05,
      "loss": 2.806,
      "step": 197091
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.75173020362854,
      "learning_rate": 3.041662119856304e-05,
      "loss": 2.9655,
      "step": 197092
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.587005615234375,
      "learning_rate": 3.0414826507016467e-05,
      "loss": 3.0474,
      "step": 197093
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.062685012817383,
      "learning_rate": 3.04130318655908e-05,
      "loss": 3.013,
      "step": 197094
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.707883358001709,
      "learning_rate": 3.0411237274286503e-05,
      "loss": 2.8256,
      "step": 197095
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3883674144744873,
      "learning_rate": 3.040944273310385e-05,
      "loss": 2.9464,
      "step": 197096
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.5260796546936035,
      "learning_rate": 3.0407648242043105e-05,
      "loss": 2.8812,
      "step": 197097
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.246549367904663,
      "learning_rate": 3.0405853801104728e-05,
      "loss": 2.89,
      "step": 197098
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8415772914886475,
      "learning_rate": 3.040405941028896e-05,
      "loss": 3.206,
      "step": 197099
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.300896167755127,
      "learning_rate": 3.0402265069596167e-05,
      "loss": 3.0056,
      "step": 197100
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7708070278167725,
      "learning_rate": 3.0400470779026708e-05,
      "loss": 2.7954,
      "step": 197101
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.434291362762451,
      "learning_rate": 3.0398676538580823e-05,
      "loss": 2.878,
      "step": 197102
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0964956283569336,
      "learning_rate": 3.0396882348258976e-05,
      "loss": 2.991,
      "step": 197103
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9132637977600098,
      "learning_rate": 3.0395088208061436e-05,
      "loss": 2.8238,
      "step": 197104
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.9188036918640137,
      "learning_rate": 3.0393294117988565e-05,
      "loss": 3.0964,
      "step": 197105
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8832437992095947,
      "learning_rate": 3.0391500078040597e-05,
      "loss": 2.7912,
      "step": 197106
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2176170349121094,
      "learning_rate": 3.0389706088217968e-05,
      "loss": 3.0782,
      "step": 197107
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9031262397766113,
      "learning_rate": 3.0387912148520977e-05,
      "loss": 2.849,
      "step": 197108
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8611466884613037,
      "learning_rate": 3.0386118258949986e-05,
      "loss": 3.061,
      "step": 197109
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.190439462661743,
      "learning_rate": 3.0384324419505236e-05,
      "loss": 2.9883,
      "step": 197110
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.5686750411987305,
      "learning_rate": 3.038253063018722e-05,
      "loss": 2.9966,
      "step": 197111
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.08904767036438,
      "learning_rate": 3.038073689099617e-05,
      "loss": 2.8341,
      "step": 197112
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.679987668991089,
      "learning_rate": 3.0378943201932427e-05,
      "loss": 3.0547,
      "step": 197113
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.07456111907959,
      "learning_rate": 3.037714956299625e-05,
      "loss": 2.609,
      "step": 197114
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8719394207000732,
      "learning_rate": 3.037535597418814e-05,
      "loss": 2.8929,
      "step": 197115
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.909259557723999,
      "learning_rate": 3.037356243550827e-05,
      "loss": 3.0099,
      "step": 197116
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.321406841278076,
      "learning_rate": 3.0371768946957097e-05,
      "loss": 3.0758,
      "step": 197117
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9811153411865234,
      "learning_rate": 3.0369975508534892e-05,
      "loss": 2.7408,
      "step": 197118
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8000495433807373,
      "learning_rate": 3.0368182120242025e-05,
      "loss": 2.8942,
      "step": 197119
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3947930335998535,
      "learning_rate": 3.0366388782078722e-05,
      "loss": 2.9162,
      "step": 197120
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6816062927246094,
      "learning_rate": 3.0364595494045485e-05,
      "loss": 2.8435,
      "step": 197121
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7848868370056152,
      "learning_rate": 3.036280225614248e-05,
      "loss": 2.8512,
      "step": 197122
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6775989532470703,
      "learning_rate": 3.036100906837018e-05,
      "loss": 2.9156,
      "step": 197123
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2583985328674316,
      "learning_rate": 3.0359215930728842e-05,
      "loss": 2.8403,
      "step": 197124
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9014062881469727,
      "learning_rate": 3.0357422843218838e-05,
      "loss": 2.8216,
      "step": 197125
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.750037431716919,
      "learning_rate": 3.035562980584043e-05,
      "loss": 2.8334,
      "step": 197126
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.4542267322540283,
      "learning_rate": 3.0353836818594025e-05,
      "loss": 2.9578,
      "step": 197127
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1755523681640625,
      "learning_rate": 3.0352043881479915e-05,
      "loss": 2.7539,
      "step": 197128
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.701017379760742,
      "learning_rate": 3.035025099449847e-05,
      "loss": 3.1192,
      "step": 197129
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4290380477905273,
      "learning_rate": 3.0348458157650024e-05,
      "loss": 2.8568,
      "step": 197130
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1177279949188232,
      "learning_rate": 3.0346665370934874e-05,
      "loss": 2.7655,
      "step": 197131
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1115734577178955,
      "learning_rate": 3.0344872634353323e-05,
      "loss": 3.012,
      "step": 197132
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2251787185668945,
      "learning_rate": 3.0343079947905803e-05,
      "loss": 2.9335,
      "step": 197133
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6999599933624268,
      "learning_rate": 3.0341287311592543e-05,
      "loss": 2.7741,
      "step": 197134
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.052100658416748,
      "learning_rate": 3.033949472541398e-05,
      "loss": 3.2028,
      "step": 197135
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4157259464263916,
      "learning_rate": 3.0337702189370415e-05,
      "loss": 2.8861,
      "step": 197136
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.851264476776123,
      "learning_rate": 3.0335909703462147e-05,
      "loss": 2.7668,
      "step": 197137
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1838338375091553,
      "learning_rate": 3.033411726768944e-05,
      "loss": 2.805,
      "step": 197138
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8543148040771484,
      "learning_rate": 3.0332324882052793e-05,
      "loss": 2.8901,
      "step": 197139
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.761725902557373,
      "learning_rate": 3.033053254655241e-05,
      "loss": 2.9971,
      "step": 197140
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.794036626815796,
      "learning_rate": 3.032874026118872e-05,
      "loss": 2.986,
      "step": 197141
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.721386671066284,
      "learning_rate": 3.0326948025961994e-05,
      "loss": 2.9798,
      "step": 197142
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.489721298217773,
      "learning_rate": 3.0325155840872594e-05,
      "loss": 3.0505,
      "step": 197143
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.924912691116333,
      "learning_rate": 3.032336370592079e-05,
      "loss": 3.0161,
      "step": 197144
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.7020397186279297,
      "learning_rate": 3.032157162110701e-05,
      "loss": 2.7294,
      "step": 197145
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.199678421020508,
      "learning_rate": 3.031977958643146e-05,
      "loss": 2.9276,
      "step": 197146
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2191219329833984,
      "learning_rate": 3.0317987601894634e-05,
      "loss": 2.7637,
      "step": 197147
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.501552104949951,
      "learning_rate": 3.0316195667496802e-05,
      "loss": 2.8264,
      "step": 197148
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.52055287361145,
      "learning_rate": 3.031440378323826e-05,
      "loss": 2.4847,
      "step": 197149
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7415542602539062,
      "learning_rate": 3.031261194911928e-05,
      "loss": 2.8839,
      "step": 197150
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7266173362731934,
      "learning_rate": 3.0310820165140358e-05,
      "loss": 2.8882,
      "step": 197151
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9046597480773926,
      "learning_rate": 3.0309028431301696e-05,
      "loss": 3.0674,
      "step": 197152
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.5516316890716553,
      "learning_rate": 3.0307236747603726e-05,
      "loss": 2.7905,
      "step": 197153
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.546964168548584,
      "learning_rate": 3.0305445114046745e-05,
      "loss": 3.1189,
      "step": 197154
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8094828128814697,
      "learning_rate": 3.0303653530631055e-05,
      "loss": 2.9707,
      "step": 197155
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0968427658081055,
      "learning_rate": 3.0301861997356923e-05,
      "loss": 3.0361,
      "step": 197156
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.737954616546631,
      "learning_rate": 3.0300070514224882e-05,
      "loss": 2.78,
      "step": 197157
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.70367693901062,
      "learning_rate": 3.0298279081235068e-05,
      "loss": 2.8834,
      "step": 197158
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9846208095550537,
      "learning_rate": 3.029648769838794e-05,
      "loss": 2.981,
      "step": 197159
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7452661991119385,
      "learning_rate": 3.0294696365683767e-05,
      "loss": 2.7474,
      "step": 197160
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.754899501800537,
      "learning_rate": 3.0292905083122954e-05,
      "loss": 2.8289,
      "step": 197161
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7992684841156006,
      "learning_rate": 3.0291113850705697e-05,
      "loss": 2.7884,
      "step": 197162
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.535128355026245,
      "learning_rate": 3.0289322668432457e-05,
      "loss": 3.0356,
      "step": 197163
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.119738817214966,
      "learning_rate": 3.0287531536303477e-05,
      "loss": 3.0592,
      "step": 197164
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6423158645629883,
      "learning_rate": 3.0285740454319186e-05,
      "loss": 3.1199,
      "step": 197165
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.026611566543579,
      "learning_rate": 3.0283949422479882e-05,
      "loss": 2.6988,
      "step": 197166
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8867411613464355,
      "learning_rate": 3.0282158440785864e-05,
      "loss": 2.7844,
      "step": 197167
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9192569255828857,
      "learning_rate": 3.028036750923747e-05,
      "loss": 2.8288,
      "step": 197168
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.245622396469116,
      "learning_rate": 3.027857662783506e-05,
      "loss": 3.0027,
      "step": 197169
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.090362071990967,
      "learning_rate": 3.0276785796578905e-05,
      "loss": 2.9542,
      "step": 197170
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.008923053741455,
      "learning_rate": 3.0274995015469438e-05,
      "loss": 3.0796,
      "step": 197171
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4927821159362793,
      "learning_rate": 3.0273204284506957e-05,
      "loss": 3.0253,
      "step": 197172
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.5409295558929443,
      "learning_rate": 3.027141360369173e-05,
      "loss": 3.0082,
      "step": 197173
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3086953163146973,
      "learning_rate": 3.0269622973024188e-05,
      "loss": 2.9819,
      "step": 197174
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7868845462799072,
      "learning_rate": 3.0267832392504598e-05,
      "loss": 2.8696,
      "step": 197175
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.546116590499878,
      "learning_rate": 3.026604186213326e-05,
      "loss": 2.9355,
      "step": 197176
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8711209297180176,
      "learning_rate": 3.0264251381910642e-05,
      "loss": 2.9878,
      "step": 197177
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.083033561706543,
      "learning_rate": 3.0262460951836907e-05,
      "loss": 2.8704,
      "step": 197178
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.734665870666504,
      "learning_rate": 3.0260670571912527e-05,
      "loss": 2.9117,
      "step": 197179
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3547587394714355,
      "learning_rate": 3.0258880242137796e-05,
      "loss": 2.9284,
      "step": 197180
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.087658643722534,
      "learning_rate": 3.0257089962513013e-05,
      "loss": 2.7552,
      "step": 197181
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.639202356338501,
      "learning_rate": 3.0255299733038486e-05,
      "loss": 2.8487,
      "step": 197182
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.403770685195923,
      "learning_rate": 3.025350955371464e-05,
      "loss": 2.9619,
      "step": 197183
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.463172197341919,
      "learning_rate": 3.0251719424541677e-05,
      "loss": 2.9554,
      "step": 197184
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.171196222305298,
      "learning_rate": 3.0249929345520097e-05,
      "loss": 2.9206,
      "step": 197185
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8325583934783936,
      "learning_rate": 3.0248139316650165e-05,
      "loss": 2.8191,
      "step": 197186
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.624105453491211,
      "learning_rate": 3.024634933793212e-05,
      "loss": 2.7539,
      "step": 197187
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1042325496673584,
      "learning_rate": 3.024455940936642e-05,
      "loss": 3.0563,
      "step": 197188
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8327419757843018,
      "learning_rate": 3.0242769530953336e-05,
      "loss": 2.9287,
      "step": 197189
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.602135419845581,
      "learning_rate": 3.024097970269317e-05,
      "loss": 2.9896,
      "step": 197190
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.7948856353759766,
      "learning_rate": 3.0239189924586383e-05,
      "loss": 2.8221,
      "step": 197191
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0330283641815186,
      "learning_rate": 3.0237400196633176e-05,
      "loss": 2.8505,
      "step": 197192
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0693161487579346,
      "learning_rate": 3.023561051883392e-05,
      "loss": 2.9699,
      "step": 197193
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.488563299179077,
      "learning_rate": 3.0233820891188975e-05,
      "loss": 2.8342,
      "step": 197194
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9894461631774902,
      "learning_rate": 3.023203131369868e-05,
      "loss": 2.841,
      "step": 197195
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8338329792022705,
      "learning_rate": 3.0230241786363263e-05,
      "loss": 2.6881,
      "step": 197196
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7491838932037354,
      "learning_rate": 3.0228452309183193e-05,
      "loss": 3.1486,
      "step": 197197
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7690298557281494,
      "learning_rate": 3.022666288215877e-05,
      "loss": 3.2298,
      "step": 197198
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.371493101119995,
      "learning_rate": 3.0224873505290226e-05,
      "loss": 2.8298,
      "step": 197199
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.168010950088501,
      "learning_rate": 3.0223084178578027e-05,
      "loss": 3.0501,
      "step": 197200
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8844892978668213,
      "learning_rate": 3.022129490202241e-05,
      "loss": 2.8425,
      "step": 197201
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.734722852706909,
      "learning_rate": 3.0219505675623767e-05,
      "loss": 2.8973,
      "step": 197202
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9535679817199707,
      "learning_rate": 3.0217716499382437e-05,
      "loss": 2.8523,
      "step": 197203
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.5176174640655518,
      "learning_rate": 3.021592737329872e-05,
      "loss": 2.7882,
      "step": 197204
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.524555206298828,
      "learning_rate": 3.0214138297372915e-05,
      "loss": 2.8254,
      "step": 197205
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7576522827148438,
      "learning_rate": 3.021234927160545e-05,
      "loss": 2.8432,
      "step": 197206
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.441536903381348,
      "learning_rate": 3.0210560295996533e-05,
      "loss": 2.8225,
      "step": 197207
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7491581439971924,
      "learning_rate": 3.0208771370546626e-05,
      "loss": 3.127,
      "step": 197208
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7525546550750732,
      "learning_rate": 3.0206982495255995e-05,
      "loss": 2.9985,
      "step": 197209
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8628592491149902,
      "learning_rate": 3.020519367012497e-05,
      "loss": 3.206,
      "step": 197210
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.001516819000244,
      "learning_rate": 3.020340489515386e-05,
      "loss": 2.8997,
      "step": 197211
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.300283908843994,
      "learning_rate": 3.020161617034306e-05,
      "loss": 3.072,
      "step": 197212
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.989795684814453,
      "learning_rate": 3.019982749569283e-05,
      "loss": 3.1531,
      "step": 197213
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7632782459259033,
      "learning_rate": 3.019803887120361e-05,
      "loss": 2.7654,
      "step": 197214
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6004867553710938,
      "learning_rate": 3.0196250296875667e-05,
      "loss": 3.1958,
      "step": 197215
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.174043655395508,
      "learning_rate": 3.01944617727093e-05,
      "loss": 2.8977,
      "step": 197216
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4113519191741943,
      "learning_rate": 3.019267329870484e-05,
      "loss": 2.9752,
      "step": 197217
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9058713912963867,
      "learning_rate": 3.019088487486272e-05,
      "loss": 2.9684,
      "step": 197218
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.85089111328125,
      "learning_rate": 3.0189096501183175e-05,
      "loss": 2.751,
      "step": 197219
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.223377227783203,
      "learning_rate": 3.018730817766657e-05,
      "loss": 2.7947,
      "step": 197220
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.548581838607788,
      "learning_rate": 3.0185519904313272e-05,
      "loss": 2.7942,
      "step": 197221
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1167218685150146,
      "learning_rate": 3.018373168112358e-05,
      "loss": 3.1429,
      "step": 197222
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.797079086303711,
      "learning_rate": 3.0181943508097763e-05,
      "loss": 2.9448,
      "step": 197223
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.011959552764893,
      "learning_rate": 3.0180155385236283e-05,
      "loss": 2.8998,
      "step": 197224
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3827342987060547,
      "learning_rate": 3.017836731253931e-05,
      "loss": 2.6774,
      "step": 197225
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.05751895904541,
      "learning_rate": 3.0176579290007375e-05,
      "loss": 3.118,
      "step": 197226
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.41266131401062,
      "learning_rate": 3.017479131764068e-05,
      "loss": 2.8672,
      "step": 197227
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.777909517288208,
      "learning_rate": 3.0173003395439587e-05,
      "loss": 3.1109,
      "step": 197228
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.309487819671631,
      "learning_rate": 3.017121552340437e-05,
      "loss": 2.8673,
      "step": 197229
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.87387752532959,
      "learning_rate": 3.0169427701535486e-05,
      "loss": 2.8559,
      "step": 197230
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.518157482147217,
      "learning_rate": 3.0167639929833144e-05,
      "loss": 3.2169,
      "step": 197231
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0675175189971924,
      "learning_rate": 3.016585220829777e-05,
      "loss": 3.0535,
      "step": 197232
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.5026376247406006,
      "learning_rate": 3.0164064536929665e-05,
      "loss": 2.9894,
      "step": 197233
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.453874349594116,
      "learning_rate": 3.0162276915729168e-05,
      "loss": 2.9844,
      "step": 197234
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.563326835632324,
      "learning_rate": 3.0160489344696538e-05,
      "loss": 2.9579,
      "step": 197235
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.024930000305176,
      "learning_rate": 3.015870182383221e-05,
      "loss": 3.0101,
      "step": 197236
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.625308036804199,
      "learning_rate": 3.015691435313642e-05,
      "loss": 3.1514,
      "step": 197237
    },
    {
      "epoch": 2.57,
      "grad_norm": 5.330051422119141,
      "learning_rate": 3.0155126932609597e-05,
      "loss": 2.701,
      "step": 197238
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1967060565948486,
      "learning_rate": 3.0153339562252042e-05,
      "loss": 2.7291,
      "step": 197239
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9915547370910645,
      "learning_rate": 3.0151552242064093e-05,
      "loss": 3.12,
      "step": 197240
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.903583288192749,
      "learning_rate": 3.0149764972045976e-05,
      "loss": 2.9014,
      "step": 197241
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.89502215385437,
      "learning_rate": 3.0147977752198194e-05,
      "loss": 3.1338,
      "step": 197242
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.5041749477386475,
      "learning_rate": 3.014619058252091e-05,
      "loss": 2.8283,
      "step": 197243
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.869636058807373,
      "learning_rate": 3.0144403463014633e-05,
      "loss": 2.9848,
      "step": 197244
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7319772243499756,
      "learning_rate": 3.014261639367952e-05,
      "loss": 2.7132,
      "step": 197245
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.687589645385742,
      "learning_rate": 3.0140829374516106e-05,
      "loss": 2.8467,
      "step": 197246
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.745717763900757,
      "learning_rate": 3.0139042405524527e-05,
      "loss": 3.0221,
      "step": 197247
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.721062183380127,
      "learning_rate": 3.0137255486705213e-05,
      "loss": 3.1155,
      "step": 197248
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.756950855255127,
      "learning_rate": 3.0135468618058433e-05,
      "loss": 3.0057,
      "step": 197249
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.951613664627075,
      "learning_rate": 3.013368179958462e-05,
      "loss": 2.8544,
      "step": 197250
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7175655364990234,
      "learning_rate": 3.0131895031284005e-05,
      "loss": 2.7775,
      "step": 197251
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.917689085006714,
      "learning_rate": 3.0130108313157052e-05,
      "loss": 3.1162,
      "step": 197252
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.891613483428955,
      "learning_rate": 3.01283216452039e-05,
      "loss": 2.9757,
      "step": 197253
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8452696800231934,
      "learning_rate": 3.0126535027425046e-05,
      "loss": 3.2226,
      "step": 197254
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.96944260597229,
      "learning_rate": 3.0124748459820724e-05,
      "loss": 3.2204,
      "step": 197255
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.633030652999878,
      "learning_rate": 3.0122961942391367e-05,
      "loss": 2.8011,
      "step": 197256
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.207669973373413,
      "learning_rate": 3.0121175475137173e-05,
      "loss": 2.9091,
      "step": 197257
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8348026275634766,
      "learning_rate": 3.011938905805864e-05,
      "loss": 3.2056,
      "step": 197258
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9088499546051025,
      "learning_rate": 3.0117602691155907e-05,
      "loss": 3.0011,
      "step": 197259
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0177831649780273,
      "learning_rate": 3.011581637442947e-05,
      "loss": 2.8892,
      "step": 197260
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.119858741760254,
      "learning_rate": 3.011403010787956e-05,
      "loss": 2.8131,
      "step": 197261
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0202205181121826,
      "learning_rate": 3.0112243891506582e-05,
      "loss": 2.88,
      "step": 197262
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7062885761260986,
      "learning_rate": 3.0110457725310767e-05,
      "loss": 2.9543,
      "step": 197263
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1016247272491455,
      "learning_rate": 3.0108671609292577e-05,
      "loss": 3.1152,
      "step": 197264
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.758636713027954,
      "learning_rate": 3.0106885543452287e-05,
      "loss": 3.0473,
      "step": 197265
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1226916313171387,
      "learning_rate": 3.0105099527790222e-05,
      "loss": 2.82,
      "step": 197266
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.6864798069000244,
      "learning_rate": 3.0103313562306653e-05,
      "loss": 2.9037,
      "step": 197267
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7900502681732178,
      "learning_rate": 3.010152764700201e-05,
      "loss": 2.7158,
      "step": 197268
    },
    {
      "epoch": 2.57,
      "grad_norm": 5.112771987915039,
      "learning_rate": 3.0099741781876562e-05,
      "loss": 2.8297,
      "step": 197269
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.867138624191284,
      "learning_rate": 3.009795596693071e-05,
      "loss": 2.7468,
      "step": 197270
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.802023410797119,
      "learning_rate": 3.0096170202164714e-05,
      "loss": 3.125,
      "step": 197271
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.6752774715423584,
      "learning_rate": 3.0094384487578914e-05,
      "loss": 2.8889,
      "step": 197272
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4993340969085693,
      "learning_rate": 3.0092598823173708e-05,
      "loss": 2.9998,
      "step": 197273
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.920968532562256,
      "learning_rate": 3.0090813208949393e-05,
      "loss": 2.7363,
      "step": 197274
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.6270558834075928,
      "learning_rate": 3.0089027644906237e-05,
      "loss": 3.0124,
      "step": 197275
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7399134635925293,
      "learning_rate": 3.0087242131044674e-05,
      "loss": 2.9716,
      "step": 197276
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4429492950439453,
      "learning_rate": 3.0085456667364972e-05,
      "loss": 2.9796,
      "step": 197277
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.686504602432251,
      "learning_rate": 3.0083671253867425e-05,
      "loss": 2.7167,
      "step": 197278
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.983318567276001,
      "learning_rate": 3.0081885890552504e-05,
      "loss": 2.8828,
      "step": 197279
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8122355937957764,
      "learning_rate": 3.008010057742044e-05,
      "loss": 2.9461,
      "step": 197280
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.849440813064575,
      "learning_rate": 3.0078315314471503e-05,
      "loss": 3.0043,
      "step": 197281
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.871380090713501,
      "learning_rate": 3.0076530101706188e-05,
      "loss": 3.016,
      "step": 197282
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.819819927215576,
      "learning_rate": 3.007474493912473e-05,
      "loss": 2.6695,
      "step": 197283
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7649457454681396,
      "learning_rate": 3.007295982672743e-05,
      "loss": 2.835,
      "step": 197284
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.814337968826294,
      "learning_rate": 3.0071174764514682e-05,
      "loss": 2.8911,
      "step": 197285
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1679139137268066,
      "learning_rate": 3.0069389752486795e-05,
      "loss": 3.0321,
      "step": 197286
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9528210163116455,
      "learning_rate": 3.006760479064413e-05,
      "loss": 2.9462,
      "step": 197287
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.7652974128723145,
      "learning_rate": 3.0065819878986985e-05,
      "loss": 2.8955,
      "step": 197288
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2763047218322754,
      "learning_rate": 3.0064035017515697e-05,
      "loss": 2.9931,
      "step": 197289
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.5682263374328613,
      "learning_rate": 3.0062250206230566e-05,
      "loss": 2.797,
      "step": 197290
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4606106281280518,
      "learning_rate": 3.0060465445131988e-05,
      "loss": 2.9116,
      "step": 197291
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.5792129039764404,
      "learning_rate": 3.005868073422023e-05,
      "loss": 3.0655,
      "step": 197292
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.5307912826538086,
      "learning_rate": 3.0056896073495696e-05,
      "loss": 2.8213,
      "step": 197293
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.358595609664917,
      "learning_rate": 3.0055111462958715e-05,
      "loss": 2.8053,
      "step": 197294
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.08331036567688,
      "learning_rate": 3.0053326902609554e-05,
      "loss": 3.0646,
      "step": 197295
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.843381643295288,
      "learning_rate": 3.0051542392448515e-05,
      "loss": 3.0295,
      "step": 197296
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.81545352935791,
      "learning_rate": 3.0049757932476058e-05,
      "loss": 3.1499,
      "step": 197297
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.064096927642822,
      "learning_rate": 3.0047973522692394e-05,
      "loss": 2.9803,
      "step": 197298
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.9905452728271484,
      "learning_rate": 3.0046189163097976e-05,
      "loss": 2.9914,
      "step": 197299
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.5761568546295166,
      "learning_rate": 3.004440485369305e-05,
      "loss": 2.9323,
      "step": 197300
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.714866876602173,
      "learning_rate": 3.0042620594477972e-05,
      "loss": 2.9229,
      "step": 197301
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.876269578933716,
      "learning_rate": 3.0040836385452982e-05,
      "loss": 2.9202,
      "step": 197302
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6607718467712402,
      "learning_rate": 3.0039052226618577e-05,
      "loss": 2.9286,
      "step": 197303
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9806673526763916,
      "learning_rate": 3.0037268117974956e-05,
      "loss": 2.9316,
      "step": 197304
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.690303087234497,
      "learning_rate": 3.003548405952255e-05,
      "loss": 2.9926,
      "step": 197305
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.258511543273926,
      "learning_rate": 3.0033700051261665e-05,
      "loss": 2.6713,
      "step": 197306
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.191918134689331,
      "learning_rate": 3.0031916093192598e-05,
      "loss": 2.8876,
      "step": 197307
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6341962814331055,
      "learning_rate": 3.0030132185315613e-05,
      "loss": 2.7389,
      "step": 197308
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8968353271484375,
      "learning_rate": 3.0028348327631212e-05,
      "loss": 2.916,
      "step": 197309
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.102811336517334,
      "learning_rate": 3.002656452013956e-05,
      "loss": 2.8134,
      "step": 197310
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3280749320983887,
      "learning_rate": 3.0024780762841128e-05,
      "loss": 2.7265,
      "step": 197311
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.852132797241211,
      "learning_rate": 3.0022997055736142e-05,
      "loss": 2.9541,
      "step": 197312
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.738718271255493,
      "learning_rate": 3.0021213398825072e-05,
      "loss": 3.0389,
      "step": 197313
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2609987258911133,
      "learning_rate": 3.0019429792108053e-05,
      "loss": 2.9007,
      "step": 197314
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.041154861450195,
      "learning_rate": 3.0017646235585546e-05,
      "loss": 2.6611,
      "step": 197315
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.248363971710205,
      "learning_rate": 3.001586272925782e-05,
      "loss": 2.8284,
      "step": 197316
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.931623697280884,
      "learning_rate": 3.0014079273125313e-05,
      "loss": 2.8659,
      "step": 197317
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.115708351135254,
      "learning_rate": 3.001229586718822e-05,
      "loss": 2.8292,
      "step": 197318
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.690913200378418,
      "learning_rate": 3.0010512511447038e-05,
      "loss": 2.6949,
      "step": 197319
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.180753469467163,
      "learning_rate": 3.0008729205901904e-05,
      "loss": 3.0577,
      "step": 197320
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2347214221954346,
      "learning_rate": 3.0006945950553284e-05,
      "loss": 2.991,
      "step": 197321
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.298144578933716,
      "learning_rate": 3.0005162745401412e-05,
      "loss": 2.834,
      "step": 197322
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.780364751815796,
      "learning_rate": 3.0003379590446753e-05,
      "loss": 3.0387,
      "step": 197323
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6900687217712402,
      "learning_rate": 3.0001596485689505e-05,
      "loss": 3.0798,
      "step": 197324
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.971172332763672,
      "learning_rate": 2.9999813431130137e-05,
      "loss": 2.9896,
      "step": 197325
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.864284038543701,
      "learning_rate": 2.9998030426768814e-05,
      "loss": 3.074,
      "step": 197326
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1563940048217773,
      "learning_rate": 2.9996247472606005e-05,
      "loss": 3.0965,
      "step": 197327
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4962520599365234,
      "learning_rate": 2.999446456864194e-05,
      "loss": 2.9281,
      "step": 197328
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7717084884643555,
      "learning_rate": 2.9992681714877054e-05,
      "loss": 2.8617,
      "step": 197329
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.133479118347168,
      "learning_rate": 2.9990898911311546e-05,
      "loss": 3.096,
      "step": 197330
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.403902530670166,
      "learning_rate": 2.998911615794598e-05,
      "loss": 2.9067,
      "step": 197331
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9849414825439453,
      "learning_rate": 2.9987333454780392e-05,
      "loss": 2.7755,
      "step": 197332
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.4104557037353516,
      "learning_rate": 2.9985550801815316e-05,
      "loss": 2.7696,
      "step": 197333
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.11507511138916,
      "learning_rate": 2.9983768199050982e-05,
      "loss": 2.9746,
      "step": 197334
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.721282720565796,
      "learning_rate": 2.9981985646487794e-05,
      "loss": 3.0779,
      "step": 197335
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1027092933654785,
      "learning_rate": 2.9980203144126013e-05,
      "loss": 2.6909,
      "step": 197336
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3416848182678223,
      "learning_rate": 2.997842069196611e-05,
      "loss": 2.9556,
      "step": 197337
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7866718769073486,
      "learning_rate": 2.9976638290008215e-05,
      "loss": 2.8522,
      "step": 197338
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.979524850845337,
      "learning_rate": 2.997485593825283e-05,
      "loss": 2.9145,
      "step": 197339
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7657084465026855,
      "learning_rate": 2.9973073636700117e-05,
      "loss": 2.9961,
      "step": 197340
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8138084411621094,
      "learning_rate": 2.9971291385350583e-05,
      "loss": 2.8477,
      "step": 197341
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.7018561363220215,
      "learning_rate": 2.9969509184204455e-05,
      "loss": 3.047,
      "step": 197342
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4523909091949463,
      "learning_rate": 2.996772703326217e-05,
      "loss": 2.8386,
      "step": 197343
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8979849815368652,
      "learning_rate": 2.996594493252389e-05,
      "loss": 2.8983,
      "step": 197344
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.205024480819702,
      "learning_rate": 2.9964162881990085e-05,
      "loss": 2.7842,
      "step": 197345
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1268792152404785,
      "learning_rate": 2.9962380881660987e-05,
      "loss": 2.9628,
      "step": 197346
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.674055337905884,
      "learning_rate": 2.9960598931537027e-05,
      "loss": 2.771,
      "step": 197347
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.699246883392334,
      "learning_rate": 2.9958817031618444e-05,
      "loss": 2.8133,
      "step": 197348
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0767219066619873,
      "learning_rate": 2.9957035181905666e-05,
      "loss": 3.0946,
      "step": 197349
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6820497512817383,
      "learning_rate": 2.995525338239896e-05,
      "loss": 3.0238,
      "step": 197350
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.40643048286438,
      "learning_rate": 2.995347163309869e-05,
      "loss": 3.0163,
      "step": 197351
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.555893898010254,
      "learning_rate": 2.9951689934005097e-05,
      "loss": 2.8078,
      "step": 197352
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.427207946777344,
      "learning_rate": 2.994990828511864e-05,
      "loss": 2.884,
      "step": 197353
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.3754351139068604,
      "learning_rate": 2.9948126686439556e-05,
      "loss": 2.9285,
      "step": 197354
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.5709521770477295,
      "learning_rate": 2.994634513796824e-05,
      "loss": 3.0311,
      "step": 197355
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0867202281951904,
      "learning_rate": 2.994456363970503e-05,
      "loss": 2.8217,
      "step": 197356
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9790477752685547,
      "learning_rate": 2.9942782191650195e-05,
      "loss": 2.6715,
      "step": 197357
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8632702827453613,
      "learning_rate": 2.9941000793804025e-05,
      "loss": 2.7639,
      "step": 197358
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0624806880950928,
      "learning_rate": 2.9939219446167027e-05,
      "loss": 2.9592,
      "step": 197359
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2986514568328857,
      "learning_rate": 2.9937438148739334e-05,
      "loss": 2.8386,
      "step": 197360
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.4181933403015137,
      "learning_rate": 2.9935656901521443e-05,
      "loss": 2.9603,
      "step": 197361
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2484889030456543,
      "learning_rate": 2.993387570451359e-05,
      "loss": 3.0894,
      "step": 197362
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.7152981758117676,
      "learning_rate": 2.993209455771607e-05,
      "loss": 2.9783,
      "step": 197363
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.222180366516113,
      "learning_rate": 2.993031346112935e-05,
      "loss": 3.0113,
      "step": 197364
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.046924114227295,
      "learning_rate": 2.992853241475367e-05,
      "loss": 2.8215,
      "step": 197365
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4131722450256348,
      "learning_rate": 2.9926751418589322e-05,
      "loss": 3.1743,
      "step": 197366
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.277792453765869,
      "learning_rate": 2.9924970472636745e-05,
      "loss": 2.8917,
      "step": 197367
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.242016315460205,
      "learning_rate": 2.9923189576896202e-05,
      "loss": 2.6694,
      "step": 197368
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.3167924880981445,
      "learning_rate": 2.992140873136799e-05,
      "loss": 3.1453,
      "step": 197369
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7492482662200928,
      "learning_rate": 2.9919627936052547e-05,
      "loss": 3.0248,
      "step": 197370
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.755429983139038,
      "learning_rate": 2.991784719095007e-05,
      "loss": 3.0525,
      "step": 197371
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8013243675231934,
      "learning_rate": 2.9916066496061064e-05,
      "loss": 2.8547,
      "step": 197372
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1964826583862305,
      "learning_rate": 2.9914285851385722e-05,
      "loss": 2.6393,
      "step": 197373
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3006157875061035,
      "learning_rate": 2.9912505256924414e-05,
      "loss": 2.8609,
      "step": 197374
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6049931049346924,
      "learning_rate": 2.99107247126774e-05,
      "loss": 2.9391,
      "step": 197375
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6832406520843506,
      "learning_rate": 2.9908944218645158e-05,
      "loss": 2.7619,
      "step": 197376
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.787386417388916,
      "learning_rate": 2.990716377482788e-05,
      "loss": 3.0079,
      "step": 197377
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8523340225219727,
      "learning_rate": 2.9905383381226033e-05,
      "loss": 2.7413,
      "step": 197378
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0713253021240234,
      "learning_rate": 2.9903603037839853e-05,
      "loss": 2.7732,
      "step": 197379
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2388980388641357,
      "learning_rate": 2.9901822744669702e-05,
      "loss": 3.0463,
      "step": 197380
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.496462345123291,
      "learning_rate": 2.990004250171585e-05,
      "loss": 2.9447,
      "step": 197381
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0787501335144043,
      "learning_rate": 2.9898262308978694e-05,
      "loss": 2.7685,
      "step": 197382
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.134696960449219,
      "learning_rate": 2.9896482166458537e-05,
      "loss": 2.9017,
      "step": 197383
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.671548843383789,
      "learning_rate": 2.9894702074155775e-05,
      "loss": 2.916,
      "step": 197384
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.560490369796753,
      "learning_rate": 2.989292203207061e-05,
      "loss": 2.8738,
      "step": 197385
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.865699052810669,
      "learning_rate": 2.9891142040203576e-05,
      "loss": 2.9767,
      "step": 197386
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.705583333969116,
      "learning_rate": 2.988936209855477e-05,
      "loss": 3.1983,
      "step": 197387
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.5536375045776367,
      "learning_rate": 2.988758220712466e-05,
      "loss": 3.0126,
      "step": 197388
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.9422178268432617,
      "learning_rate": 2.9885802365913513e-05,
      "loss": 2.8889,
      "step": 197389
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9791994094848633,
      "learning_rate": 2.988402257492173e-05,
      "loss": 3.105,
      "step": 197390
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1708295345306396,
      "learning_rate": 2.988224283414957e-05,
      "loss": 2.7361,
      "step": 197391
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.671578884124756,
      "learning_rate": 2.988046314359751e-05,
      "loss": 2.9065,
      "step": 197392
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.687547445297241,
      "learning_rate": 2.9878683503265675e-05,
      "loss": 2.8019,
      "step": 197393
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.477936267852783,
      "learning_rate": 2.9876903913154504e-05,
      "loss": 3.0749,
      "step": 197394
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.612700939178467,
      "learning_rate": 2.9875124373264293e-05,
      "loss": 3.1307,
      "step": 197395
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.226468324661255,
      "learning_rate": 2.987334488359544e-05,
      "loss": 3.3344,
      "step": 197396
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.063149929046631,
      "learning_rate": 2.987156544414818e-05,
      "loss": 3.1661,
      "step": 197397
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.012079238891602,
      "learning_rate": 2.9869786054923017e-05,
      "loss": 3.0678,
      "step": 197398
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7922821044921875,
      "learning_rate": 2.9868006715920046e-05,
      "loss": 2.7013,
      "step": 197399
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8122079372406006,
      "learning_rate": 2.9866227427139734e-05,
      "loss": 2.9378,
      "step": 197400
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.613466739654541,
      "learning_rate": 2.9864448188582378e-05,
      "loss": 2.9809,
      "step": 197401
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.739633321762085,
      "learning_rate": 2.9862669000248353e-05,
      "loss": 2.981,
      "step": 197402
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.294137954711914,
      "learning_rate": 2.9860889862137883e-05,
      "loss": 3.0029,
      "step": 197403
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.1531982421875,
      "learning_rate": 2.9859110774251503e-05,
      "loss": 2.8786,
      "step": 197404
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.94836688041687,
      "learning_rate": 2.985733173658932e-05,
      "loss": 2.7634,
      "step": 197405
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.585087776184082,
      "learning_rate": 2.985555274915179e-05,
      "loss": 2.8287,
      "step": 197406
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.558671712875366,
      "learning_rate": 2.9853773811939185e-05,
      "loss": 2.7811,
      "step": 197407
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.4441981315612793,
      "learning_rate": 2.9851994924951872e-05,
      "loss": 2.802,
      "step": 197408
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.144458770751953,
      "learning_rate": 2.9850216088190148e-05,
      "loss": 2.9983,
      "step": 197409
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3224735260009766,
      "learning_rate": 2.9848437301654482e-05,
      "loss": 2.9191,
      "step": 197410
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.148638725280762,
      "learning_rate": 2.9846658565344973e-05,
      "loss": 2.6755,
      "step": 197411
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1844124794006348,
      "learning_rate": 2.984487987926212e-05,
      "loss": 3.09,
      "step": 197412
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.853154182434082,
      "learning_rate": 2.9843101243406154e-05,
      "loss": 3.0949,
      "step": 197413
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.609800100326538,
      "learning_rate": 2.984132265777751e-05,
      "loss": 2.9697,
      "step": 197414
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.5649425983428955,
      "learning_rate": 2.9839544122376425e-05,
      "loss": 2.8489,
      "step": 197415
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3845155239105225,
      "learning_rate": 2.983776563720336e-05,
      "loss": 3.1044,
      "step": 197416
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.965968608856201,
      "learning_rate": 2.9835987202258415e-05,
      "loss": 2.7769,
      "step": 197417
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.113473415374756,
      "learning_rate": 2.983420881754216e-05,
      "loss": 2.8908,
      "step": 197418
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9066500663757324,
      "learning_rate": 2.983243048305476e-05,
      "loss": 2.7955,
      "step": 197419
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4716219902038574,
      "learning_rate": 2.9830652198796645e-05,
      "loss": 3.0366,
      "step": 197420
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.463571071624756,
      "learning_rate": 2.9828873964768085e-05,
      "loss": 2.5895,
      "step": 197421
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.706650495529175,
      "learning_rate": 2.9827095780969545e-05,
      "loss": 2.7196,
      "step": 197422
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.5749423503875732,
      "learning_rate": 2.9825317647401126e-05,
      "loss": 2.7404,
      "step": 197423
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.259192705154419,
      "learning_rate": 2.9823539564063326e-05,
      "loss": 2.7614,
      "step": 197424
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.4543285369873047,
      "learning_rate": 2.982176153095638e-05,
      "loss": 2.9186,
      "step": 197425
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9399166107177734,
      "learning_rate": 2.9819983548080752e-05,
      "loss": 2.7919,
      "step": 197426
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.4498238563537598,
      "learning_rate": 2.9818205615436608e-05,
      "loss": 3.2214,
      "step": 197427
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8360652923583984,
      "learning_rate": 2.9816427733024483e-05,
      "loss": 3.0583,
      "step": 197428
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6073052883148193,
      "learning_rate": 2.9814649900844444e-05,
      "loss": 2.8203,
      "step": 197429
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7644729614257812,
      "learning_rate": 2.9812872118897057e-05,
      "loss": 2.7666,
      "step": 197430
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.5297887325286865,
      "learning_rate": 2.9811094387182453e-05,
      "loss": 3.1675,
      "step": 197431
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4602322578430176,
      "learning_rate": 2.980931670570117e-05,
      "loss": 2.8311,
      "step": 197432
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.4540770053863525,
      "learning_rate": 2.9807539074453368e-05,
      "loss": 2.768,
      "step": 197433
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0130579471588135,
      "learning_rate": 2.9805761493439482e-05,
      "loss": 2.7861,
      "step": 197434
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2085483074188232,
      "learning_rate": 2.9803983962659817e-05,
      "loss": 2.9341,
      "step": 197435
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9355719089508057,
      "learning_rate": 2.9802206482114698e-05,
      "loss": 3.0938,
      "step": 197436
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6544814109802246,
      "learning_rate": 2.98004290518044e-05,
      "loss": 2.7736,
      "step": 197437
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.686275005340576,
      "learning_rate": 2.9798651671729344e-05,
      "loss": 2.8989,
      "step": 197438
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7174224853515625,
      "learning_rate": 2.9796874341889743e-05,
      "loss": 3.1101,
      "step": 197439
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.182279586791992,
      "learning_rate": 2.9795097062286087e-05,
      "loss": 2.7878,
      "step": 197440
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7873637676239014,
      "learning_rate": 2.9793319832918617e-05,
      "loss": 3.1148,
      "step": 197441
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9190144538879395,
      "learning_rate": 2.9791542653787658e-05,
      "loss": 2.9365,
      "step": 197442
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9477458000183105,
      "learning_rate": 2.9789765524893515e-05,
      "loss": 2.9057,
      "step": 197443
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.999993085861206,
      "learning_rate": 2.9787988446236622e-05,
      "loss": 2.8398,
      "step": 197444
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.072524070739746,
      "learning_rate": 2.9786211417817175e-05,
      "loss": 2.7319,
      "step": 197445
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.854574203491211,
      "learning_rate": 2.9784434439635607e-05,
      "loss": 3.1222,
      "step": 197446
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.8990445137023926,
      "learning_rate": 2.978265751169222e-05,
      "loss": 3.1969,
      "step": 197447
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0144295692443848,
      "learning_rate": 2.978088063398728e-05,
      "loss": 2.835,
      "step": 197448
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.238447427749634,
      "learning_rate": 2.9779103806521254e-05,
      "loss": 2.7425,
      "step": 197449
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.779076337814331,
      "learning_rate": 2.9777327029294374e-05,
      "loss": 2.9744,
      "step": 197450
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8350634574890137,
      "learning_rate": 2.977555030230694e-05,
      "loss": 2.7831,
      "step": 197451
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6836867332458496,
      "learning_rate": 2.9773773625559384e-05,
      "loss": 3.0879,
      "step": 197452
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8573410511016846,
      "learning_rate": 2.9771996999052005e-05,
      "loss": 2.765,
      "step": 197453
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3470568656921387,
      "learning_rate": 2.977022042278504e-05,
      "loss": 2.8836,
      "step": 197454
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0167057514190674,
      "learning_rate": 2.9768443896758954e-05,
      "loss": 2.909,
      "step": 197455
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4730350971221924,
      "learning_rate": 2.9766667420973946e-05,
      "loss": 2.9213,
      "step": 197456
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.875685214996338,
      "learning_rate": 2.976489099543048e-05,
      "loss": 3.1051,
      "step": 197457
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.484457492828369,
      "learning_rate": 2.9763114620128824e-05,
      "loss": 3.1061,
      "step": 197458
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.5359244346618652,
      "learning_rate": 2.976133829506928e-05,
      "loss": 3.2006,
      "step": 197459
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.6606760025024414,
      "learning_rate": 2.975956202025218e-05,
      "loss": 3.0133,
      "step": 197460
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8762471675872803,
      "learning_rate": 2.9757785795677923e-05,
      "loss": 2.8939,
      "step": 197461
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.505689859390259,
      "learning_rate": 2.9756009621346743e-05,
      "loss": 3.0503,
      "step": 197462
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.109680414199829,
      "learning_rate": 2.975423349725907e-05,
      "loss": 2.9586,
      "step": 197463
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1477444171905518,
      "learning_rate": 2.9752457423415212e-05,
      "loss": 2.7961,
      "step": 197464
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.282320022583008,
      "learning_rate": 2.9750681399815456e-05,
      "loss": 3.2827,
      "step": 197465
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.566826105117798,
      "learning_rate": 2.974890542646008e-05,
      "loss": 2.7177,
      "step": 197466
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.132303476333618,
      "learning_rate": 2.9747129503349544e-05,
      "loss": 3.071,
      "step": 197467
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.6648406982421875,
      "learning_rate": 2.974535363048408e-05,
      "loss": 2.7974,
      "step": 197468
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.570525884628296,
      "learning_rate": 2.9743577807864095e-05,
      "loss": 3.0659,
      "step": 197469
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.719562292098999,
      "learning_rate": 2.9741802035489846e-05,
      "loss": 2.7155,
      "step": 197470
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.249373435974121,
      "learning_rate": 2.9740026313361774e-05,
      "loss": 2.8079,
      "step": 197471
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.776378631591797,
      "learning_rate": 2.9738250641480043e-05,
      "loss": 3.0717,
      "step": 197472
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8130431175231934,
      "learning_rate": 2.9736475019845152e-05,
      "loss": 3.0098,
      "step": 197473
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.29156494140625,
      "learning_rate": 2.9734699448457266e-05,
      "loss": 2.9627,
      "step": 197474
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.878726005554199,
      "learning_rate": 2.9732923927316855e-05,
      "loss": 3.0302,
      "step": 197475
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2848007678985596,
      "learning_rate": 2.9731148456424146e-05,
      "loss": 3.0559,
      "step": 197476
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9731452465057373,
      "learning_rate": 2.9729373035779647e-05,
      "loss": 2.9782,
      "step": 197477
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9189512729644775,
      "learning_rate": 2.972759766538342e-05,
      "loss": 3.0637,
      "step": 197478
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.111931085586548,
      "learning_rate": 2.9725822345235995e-05,
      "loss": 3.023,
      "step": 197479
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.524258613586426,
      "learning_rate": 2.9724047075337608e-05,
      "loss": 2.9261,
      "step": 197480
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.869871616363525,
      "learning_rate": 2.972227185568866e-05,
      "loss": 2.8639,
      "step": 197481
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.96378493309021,
      "learning_rate": 2.9720496686289384e-05,
      "loss": 3.0366,
      "step": 197482
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.751117706298828,
      "learning_rate": 2.971872156714028e-05,
      "loss": 2.7857,
      "step": 197483
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.613751173019409,
      "learning_rate": 2.971694649824148e-05,
      "loss": 3.0253,
      "step": 197484
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.829590320587158,
      "learning_rate": 2.9715171479593413e-05,
      "loss": 2.7776,
      "step": 197485
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7537834644317627,
      "learning_rate": 2.9713396511196386e-05,
      "loss": 3.0804,
      "step": 197486
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.931402206420898,
      "learning_rate": 2.9711621593050795e-05,
      "loss": 3.039,
      "step": 197487
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8191447257995605,
      "learning_rate": 2.970984672515684e-05,
      "loss": 2.9276,
      "step": 197488
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2816524505615234,
      "learning_rate": 2.9708071907515018e-05,
      "loss": 2.8508,
      "step": 197489
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0438547134399414,
      "learning_rate": 2.97062971401255e-05,
      "loss": 3.1427,
      "step": 197490
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.741151809692383,
      "learning_rate": 2.9704522422988718e-05,
      "loss": 2.7788,
      "step": 197491
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.273247003555298,
      "learning_rate": 2.9702747756104907e-05,
      "loss": 2.7698,
      "step": 197492
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1647043228149414,
      "learning_rate": 2.9700973139474527e-05,
      "loss": 3.054,
      "step": 197493
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3314428329467773,
      "learning_rate": 2.9699198573097782e-05,
      "loss": 2.7769,
      "step": 197494
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8834497928619385,
      "learning_rate": 2.9697424056975173e-05,
      "loss": 2.8821,
      "step": 197495
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9390156269073486,
      "learning_rate": 2.969564959110676e-05,
      "loss": 2.8919,
      "step": 197496
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.848440647125244,
      "learning_rate": 2.9693875175493153e-05,
      "loss": 2.9891,
      "step": 197497
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.3744561672210693,
      "learning_rate": 2.9692100810134444e-05,
      "loss": 2.6102,
      "step": 197498
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8922550678253174,
      "learning_rate": 2.9690326495031168e-05,
      "loss": 2.9503,
      "step": 197499
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6464221477508545,
      "learning_rate": 2.9688552230183495e-05,
      "loss": 2.8259,
      "step": 197500
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.587062358856201,
      "learning_rate": 2.9686778015591916e-05,
      "loss": 2.8148,
      "step": 197501
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6047661304473877,
      "learning_rate": 2.968500385125657e-05,
      "loss": 3.1833,
      "step": 197502
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.657160520553589,
      "learning_rate": 2.968322973717796e-05,
      "loss": 2.9845,
      "step": 197503
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.582796335220337,
      "learning_rate": 2.968145567335625e-05,
      "loss": 2.9833,
      "step": 197504
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.101835012435913,
      "learning_rate": 2.967968165979193e-05,
      "loss": 2.7437,
      "step": 197505
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.65371036529541,
      "learning_rate": 2.9677907696485216e-05,
      "loss": 2.9331,
      "step": 197506
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6949446201324463,
      "learning_rate": 2.9676133783436594e-05,
      "loss": 2.9304,
      "step": 197507
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.467190742492676,
      "learning_rate": 2.967435992064614e-05,
      "loss": 2.7151,
      "step": 197508
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.186537742614746,
      "learning_rate": 2.9672586108114414e-05,
      "loss": 3.1512,
      "step": 197509
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.710298776626587,
      "learning_rate": 2.9670812345841587e-05,
      "loss": 2.8946,
      "step": 197510
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.4725687503814697,
      "learning_rate": 2.9669038633828125e-05,
      "loss": 2.7395,
      "step": 197511
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0481584072113037,
      "learning_rate": 2.9667264972074222e-05,
      "loss": 3.1029,
      "step": 197512
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.987987518310547,
      "learning_rate": 2.966549136058035e-05,
      "loss": 2.9163,
      "step": 197513
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6085457801818848,
      "learning_rate": 2.9663717799346742e-05,
      "loss": 2.5398,
      "step": 197514
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.551199436187744,
      "learning_rate": 2.966194428837376e-05,
      "loss": 2.9827,
      "step": 197515
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.558000087738037,
      "learning_rate": 2.966017082766168e-05,
      "loss": 3.1717,
      "step": 197516
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.68172287940979,
      "learning_rate": 2.9658397417210922e-05,
      "loss": 2.6648,
      "step": 197517
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7707252502441406,
      "learning_rate": 2.965662405702173e-05,
      "loss": 3.0039,
      "step": 197518
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.923189640045166,
      "learning_rate": 2.9654850747094528e-05,
      "loss": 3.0354,
      "step": 197519
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.025017738342285,
      "learning_rate": 2.9653077487429588e-05,
      "loss": 2.6291,
      "step": 197520
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9819648265838623,
      "learning_rate": 2.965130427802721e-05,
      "loss": 2.8655,
      "step": 197521
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.3024823665618896,
      "learning_rate": 2.964953111888776e-05,
      "loss": 2.9332,
      "step": 197522
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.922720432281494,
      "learning_rate": 2.964775801001157e-05,
      "loss": 2.8965,
      "step": 197523
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.6007490158081055,
      "learning_rate": 2.9645984951398936e-05,
      "loss": 2.768,
      "step": 197524
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.5867598056793213,
      "learning_rate": 2.9644211943050268e-05,
      "loss": 2.9675,
      "step": 197525
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4355039596557617,
      "learning_rate": 2.9642438984965856e-05,
      "loss": 2.9476,
      "step": 197526
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.654221534729004,
      "learning_rate": 2.9640666077146e-05,
      "loss": 3.0015,
      "step": 197527
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1415317058563232,
      "learning_rate": 2.9638893219591007e-05,
      "loss": 2.9398,
      "step": 197528
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9832844734191895,
      "learning_rate": 2.9637120412301306e-05,
      "loss": 3.0776,
      "step": 197529
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7272067070007324,
      "learning_rate": 2.9635347655277098e-05,
      "loss": 3.0108,
      "step": 197530
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9068613052368164,
      "learning_rate": 2.9633574948518847e-05,
      "loss": 2.9791,
      "step": 197531
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.306225061416626,
      "learning_rate": 2.9631802292026818e-05,
      "loss": 2.8397,
      "step": 197532
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.977750301361084,
      "learning_rate": 2.963002968580128e-05,
      "loss": 2.6625,
      "step": 197533
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.485337734222412,
      "learning_rate": 2.962825712984267e-05,
      "loss": 2.9563,
      "step": 197534
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.7816131114959717,
      "learning_rate": 2.9626484624151282e-05,
      "loss": 2.7863,
      "step": 197535
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2635631561279297,
      "learning_rate": 2.962471216872738e-05,
      "loss": 2.9535,
      "step": 197536
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1761722564697266,
      "learning_rate": 2.9622939763571407e-05,
      "loss": 3.0795,
      "step": 197537
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.716315746307373,
      "learning_rate": 2.9621167408683622e-05,
      "loss": 2.9584,
      "step": 197538
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.561351776123047,
      "learning_rate": 2.9619395104064292e-05,
      "loss": 2.5665,
      "step": 197539
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7835729122161865,
      "learning_rate": 2.9617622849713918e-05,
      "loss": 2.8518,
      "step": 197540
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0373785495758057,
      "learning_rate": 2.96158506456327e-05,
      "loss": 3.0054,
      "step": 197541
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.774702787399292,
      "learning_rate": 2.9614078491820968e-05,
      "loss": 2.6517,
      "step": 197542
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3502421379089355,
      "learning_rate": 2.9612306388279128e-05,
      "loss": 2.7876,
      "step": 197543
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1876869201660156,
      "learning_rate": 2.961053433500744e-05,
      "loss": 2.7716,
      "step": 197544
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.893542766571045,
      "learning_rate": 2.9608762332006242e-05,
      "loss": 2.8583,
      "step": 197545
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.039241313934326,
      "learning_rate": 2.9606990379275898e-05,
      "loss": 2.8504,
      "step": 197546
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0123558044433594,
      "learning_rate": 2.9605218476816705e-05,
      "loss": 2.8841,
      "step": 197547
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8702149391174316,
      "learning_rate": 2.9603446624629035e-05,
      "loss": 2.7514,
      "step": 197548
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1978492736816406,
      "learning_rate": 2.9601674822713185e-05,
      "loss": 2.7884,
      "step": 197549
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2645864486694336,
      "learning_rate": 2.9599903071069485e-05,
      "loss": 2.815,
      "step": 197550
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.963136911392212,
      "learning_rate": 2.959813136969824e-05,
      "loss": 2.7985,
      "step": 197551
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.787308931350708,
      "learning_rate": 2.959635971859985e-05,
      "loss": 2.8163,
      "step": 197552
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2802577018737793,
      "learning_rate": 2.959458811777451e-05,
      "loss": 2.8059,
      "step": 197553
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1407225131988525,
      "learning_rate": 2.9592816567222755e-05,
      "loss": 2.8309,
      "step": 197554
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.49296236038208,
      "learning_rate": 2.9591045066944685e-05,
      "loss": 2.9572,
      "step": 197555
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.846466064453125,
      "learning_rate": 2.95892736169409e-05,
      "loss": 3.0757,
      "step": 197556
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.924950361251831,
      "learning_rate": 2.9587502217211435e-05,
      "loss": 2.864,
      "step": 197557
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.742905855178833,
      "learning_rate": 2.9585730867756817e-05,
      "loss": 3.0992,
      "step": 197558
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.655951976776123,
      "learning_rate": 2.9583959568577288e-05,
      "loss": 2.9403,
      "step": 197559
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8977348804473877,
      "learning_rate": 2.958218831967324e-05,
      "loss": 3.2077,
      "step": 197560
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6546103954315186,
      "learning_rate": 2.9580417121044907e-05,
      "loss": 3.1145,
      "step": 197561
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7368695735931396,
      "learning_rate": 2.9578645972692794e-05,
      "loss": 2.8254,
      "step": 197562
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.687077045440674,
      "learning_rate": 2.957687487461703e-05,
      "loss": 2.9196,
      "step": 197563
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.9230804443359375,
      "learning_rate": 2.9575103826818048e-05,
      "loss": 2.8904,
      "step": 197564
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.159266233444214,
      "learning_rate": 2.9573332829296116e-05,
      "loss": 2.9866,
      "step": 197565
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.117968797683716,
      "learning_rate": 2.9571561882051664e-05,
      "loss": 2.9975,
      "step": 197566
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.094749689102173,
      "learning_rate": 2.9569790985084896e-05,
      "loss": 2.9243,
      "step": 197567
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3866138458251953,
      "learning_rate": 2.956802013839634e-05,
      "loss": 2.7296,
      "step": 197568
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.7163023948669434,
      "learning_rate": 2.9566249341986072e-05,
      "loss": 3.2397,
      "step": 197569
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9390180110931396,
      "learning_rate": 2.956447859585461e-05,
      "loss": 2.9077,
      "step": 197570
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6172564029693604,
      "learning_rate": 2.9562707900002136e-05,
      "loss": 3.1207,
      "step": 197571
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1283304691314697,
      "learning_rate": 2.956093725442914e-05,
      "loss": 2.973,
      "step": 197572
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.691939115524292,
      "learning_rate": 2.955916665913579e-05,
      "loss": 2.7821,
      "step": 197573
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.854689598083496,
      "learning_rate": 2.955739611412262e-05,
      "loss": 2.7967,
      "step": 197574
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9448304176330566,
      "learning_rate": 2.9555625619389733e-05,
      "loss": 3.0625,
      "step": 197575
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6034770011901855,
      "learning_rate": 2.9553855174937625e-05,
      "loss": 3.2513,
      "step": 197576
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.965785503387451,
      "learning_rate": 2.955208478076646e-05,
      "loss": 3.2439,
      "step": 197577
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0944604873657227,
      "learning_rate": 2.9550314436876777e-05,
      "loss": 2.8831,
      "step": 197578
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.294792890548706,
      "learning_rate": 2.9548544143268705e-05,
      "loss": 2.8458,
      "step": 197579
    },
    {
      "epoch": 2.57,
      "grad_norm": 5.561676025390625,
      "learning_rate": 2.9546773899942744e-05,
      "loss": 2.9004,
      "step": 197580
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.771630048751831,
      "learning_rate": 2.954500370689913e-05,
      "loss": 2.8532,
      "step": 197581
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.782576084136963,
      "learning_rate": 2.9543233564138192e-05,
      "loss": 2.6385,
      "step": 197582
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8779938220977783,
      "learning_rate": 2.954146347166023e-05,
      "loss": 2.9679,
      "step": 197583
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.35444974899292,
      "learning_rate": 2.953969342946565e-05,
      "loss": 3.0486,
      "step": 197584
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.664445877075195,
      "learning_rate": 2.953792343755471e-05,
      "loss": 2.7175,
      "step": 197585
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.473876476287842,
      "learning_rate": 2.953615349592785e-05,
      "loss": 2.8824,
      "step": 197586
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.662785530090332,
      "learning_rate": 2.95343836045853e-05,
      "loss": 2.8645,
      "step": 197587
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.769976854324341,
      "learning_rate": 2.9532613763527423e-05,
      "loss": 2.8316,
      "step": 197588
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2762088775634766,
      "learning_rate": 2.953084397275446e-05,
      "loss": 2.8507,
      "step": 197589
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.020916700363159,
      "learning_rate": 2.9529074232266904e-05,
      "loss": 2.9948,
      "step": 197590
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.927541494369507,
      "learning_rate": 2.9527304542064923e-05,
      "loss": 3.2308,
      "step": 197591
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8915133476257324,
      "learning_rate": 2.9525534902148985e-05,
      "loss": 2.7844,
      "step": 197592
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.923872232437134,
      "learning_rate": 2.9523765312519353e-05,
      "loss": 2.8753,
      "step": 197593
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.702180862426758,
      "learning_rate": 2.9521995773176366e-05,
      "loss": 2.9035,
      "step": 197594
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.657672882080078,
      "learning_rate": 2.9520226284120285e-05,
      "loss": 2.7212,
      "step": 197595
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.512512445449829,
      "learning_rate": 2.9518456845351545e-05,
      "loss": 3.1047,
      "step": 197596
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.963393449783325,
      "learning_rate": 2.9516687456870414e-05,
      "loss": 2.8443,
      "step": 197597
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.8717446327209473,
      "learning_rate": 2.9514918118677254e-05,
      "loss": 2.8069,
      "step": 197598
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.729050397872925,
      "learning_rate": 2.951314883077237e-05,
      "loss": 2.8347,
      "step": 197599
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.609234571456909,
      "learning_rate": 2.9511379593156125e-05,
      "loss": 2.7404,
      "step": 197600
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4087955951690674,
      "learning_rate": 2.9509610405828756e-05,
      "loss": 3.0892,
      "step": 197601
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.120501756668091,
      "learning_rate": 2.9507841268790687e-05,
      "loss": 2.7197,
      "step": 197602
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.5555367469787598,
      "learning_rate": 2.9506072182042195e-05,
      "loss": 3.1942,
      "step": 197603
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8283088207244873,
      "learning_rate": 2.9504303145583676e-05,
      "loss": 2.7864,
      "step": 197604
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.8454246520996094,
      "learning_rate": 2.9502534159415426e-05,
      "loss": 2.6041,
      "step": 197605
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8943915367126465,
      "learning_rate": 2.950076522353775e-05,
      "loss": 2.9909,
      "step": 197606
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.14640212059021,
      "learning_rate": 2.949899633795091e-05,
      "loss": 2.9762,
      "step": 197607
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.4168646335601807,
      "learning_rate": 2.9497227502655375e-05,
      "loss": 3.0007,
      "step": 197608
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7042112350463867,
      "learning_rate": 2.9495458717651376e-05,
      "loss": 2.9252,
      "step": 197609
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.7676827907562256,
      "learning_rate": 2.9493689982939318e-05,
      "loss": 3.0145,
      "step": 197610
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2820730209350586,
      "learning_rate": 2.9491921298519495e-05,
      "loss": 2.9367,
      "step": 197611
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9848361015319824,
      "learning_rate": 2.949015266439224e-05,
      "loss": 2.9443,
      "step": 197612
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8296992778778076,
      "learning_rate": 2.9488384080557794e-05,
      "loss": 2.9462,
      "step": 197613
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4129791259765625,
      "learning_rate": 2.9486615547016646e-05,
      "loss": 2.8618,
      "step": 197614
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9344563484191895,
      "learning_rate": 2.9484847063768968e-05,
      "loss": 2.8293,
      "step": 197615
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8925318717956543,
      "learning_rate": 2.9483078630815226e-05,
      "loss": 2.8584,
      "step": 197616
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0402462482452393,
      "learning_rate": 2.948131024815569e-05,
      "loss": 2.7954,
      "step": 197617
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0399887561798096,
      "learning_rate": 2.9479541915790616e-05,
      "loss": 2.8607,
      "step": 197618
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.683861017227173,
      "learning_rate": 2.9477773633720446e-05,
      "loss": 2.9894,
      "step": 197619
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8624637126922607,
      "learning_rate": 2.9476005401945513e-05,
      "loss": 2.9368,
      "step": 197620
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.239051342010498,
      "learning_rate": 2.947423722046598e-05,
      "loss": 2.93,
      "step": 197621
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7171895503997803,
      "learning_rate": 2.947246908928238e-05,
      "loss": 2.9655,
      "step": 197622
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.5944554805755615,
      "learning_rate": 2.9470701008394982e-05,
      "loss": 2.8829,
      "step": 197623
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1906445026397705,
      "learning_rate": 2.946893297780398e-05,
      "loss": 2.8271,
      "step": 197624
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2934560775756836,
      "learning_rate": 2.9467164997509885e-05,
      "loss": 3.1206,
      "step": 197625
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1713008880615234,
      "learning_rate": 2.9465397067512987e-05,
      "loss": 2.9416,
      "step": 197626
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.024110317230225,
      "learning_rate": 2.9463629187813486e-05,
      "loss": 3.0117,
      "step": 197627
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.64467191696167,
      "learning_rate": 2.9461861358411854e-05,
      "loss": 2.7664,
      "step": 197628
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8923439979553223,
      "learning_rate": 2.9460093579308384e-05,
      "loss": 2.9835,
      "step": 197629
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8350319862365723,
      "learning_rate": 2.9458325850503316e-05,
      "loss": 2.8487,
      "step": 197630
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6017589569091797,
      "learning_rate": 2.9456558171997115e-05,
      "loss": 3.1545,
      "step": 197631
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2868905067443848,
      "learning_rate": 2.9454790543790006e-05,
      "loss": 3.0342,
      "step": 197632
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1769068241119385,
      "learning_rate": 2.94530229658824e-05,
      "loss": 2.9659,
      "step": 197633
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9258499145507812,
      "learning_rate": 2.945125543827459e-05,
      "loss": 2.9048,
      "step": 197634
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.809581756591797,
      "learning_rate": 2.944948796096691e-05,
      "loss": 2.8417,
      "step": 197635
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.626222133636475,
      "learning_rate": 2.9447720533959596e-05,
      "loss": 3.1392,
      "step": 197636
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.401245355606079,
      "learning_rate": 2.9445953157253144e-05,
      "loss": 2.8588,
      "step": 197637
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8720366954803467,
      "learning_rate": 2.944418583084769e-05,
      "loss": 2.929,
      "step": 197638
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.876422166824341,
      "learning_rate": 2.944241855474376e-05,
      "loss": 2.853,
      "step": 197639
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1984832286834717,
      "learning_rate": 2.94406513289416e-05,
      "loss": 2.7792,
      "step": 197640
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0760107040405273,
      "learning_rate": 2.9438884153441465e-05,
      "loss": 2.9926,
      "step": 197641
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.673306941986084,
      "learning_rate": 2.9437117028243795e-05,
      "loss": 2.6158,
      "step": 197642
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7212870121002197,
      "learning_rate": 2.9435349953348887e-05,
      "loss": 2.7921,
      "step": 197643
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.472313165664673,
      "learning_rate": 2.9433582928757003e-05,
      "loss": 2.8441,
      "step": 197644
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7416701316833496,
      "learning_rate": 2.9431815954468552e-05,
      "loss": 2.7609,
      "step": 197645
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6906473636627197,
      "learning_rate": 2.9430049030483793e-05,
      "loss": 2.8518,
      "step": 197646
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.619316339492798,
      "learning_rate": 2.942828215680316e-05,
      "loss": 3.0932,
      "step": 197647
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0545666217803955,
      "learning_rate": 2.942651533342689e-05,
      "loss": 3.1919,
      "step": 197648
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.59687876701355,
      "learning_rate": 2.9424748560355382e-05,
      "loss": 2.6294,
      "step": 197649
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.57340145111084,
      "learning_rate": 2.9422981837588832e-05,
      "loss": 3.0449,
      "step": 197650
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.86474609375,
      "learning_rate": 2.942121516512771e-05,
      "loss": 3.1436,
      "step": 197651
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9577760696411133,
      "learning_rate": 2.9419448542972246e-05,
      "loss": 3.1356,
      "step": 197652
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8133983612060547,
      "learning_rate": 2.9417681971122873e-05,
      "loss": 2.9596,
      "step": 197653
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.152348518371582,
      "learning_rate": 2.9415915449579862e-05,
      "loss": 3.1472,
      "step": 197654
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.238001346588135,
      "learning_rate": 2.9414148978343544e-05,
      "loss": 2.8717,
      "step": 197655
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.640432119369507,
      "learning_rate": 2.9412382557414182e-05,
      "loss": 2.5703,
      "step": 197656
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.615450620651245,
      "learning_rate": 2.941061618679221e-05,
      "loss": 2.7649,
      "step": 197657
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.877680778503418,
      "learning_rate": 2.940884986647787e-05,
      "loss": 3.1106,
      "step": 197658
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.05721116065979,
      "learning_rate": 2.940708359647158e-05,
      "loss": 3.2395,
      "step": 197659
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6435344219207764,
      "learning_rate": 2.940531737677362e-05,
      "loss": 3.0212,
      "step": 197660
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7352097034454346,
      "learning_rate": 2.9403551207384345e-05,
      "loss": 2.7447,
      "step": 197661
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0590672492980957,
      "learning_rate": 2.9401785088303963e-05,
      "loss": 3.0554,
      "step": 197662
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.955700397491455,
      "learning_rate": 2.940001901953297e-05,
      "loss": 2.7664,
      "step": 197663
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.37858247756958,
      "learning_rate": 2.939825300107157e-05,
      "loss": 2.9952,
      "step": 197664
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.300170421600342,
      "learning_rate": 2.939648703292019e-05,
      "loss": 2.9204,
      "step": 197665
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.821655035018921,
      "learning_rate": 2.9394721115079135e-05,
      "loss": 2.9445,
      "step": 197666
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8203704357147217,
      "learning_rate": 2.93929552475487e-05,
      "loss": 2.9828,
      "step": 197667
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9968924522399902,
      "learning_rate": 2.9391189430329154e-05,
      "loss": 2.7155,
      "step": 197668
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.912217140197754,
      "learning_rate": 2.938942366342093e-05,
      "loss": 2.8762,
      "step": 197669
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.8960773944854736,
      "learning_rate": 2.9387657946824293e-05,
      "loss": 2.7635,
      "step": 197670
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.166447639465332,
      "learning_rate": 2.9385892280539647e-05,
      "loss": 3.1463,
      "step": 197671
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.712648868560791,
      "learning_rate": 2.9384126664567288e-05,
      "loss": 2.8428,
      "step": 197672
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.595881700515747,
      "learning_rate": 2.9382361098907514e-05,
      "loss": 2.8923,
      "step": 197673
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.5929439067840576,
      "learning_rate": 2.9380595583560595e-05,
      "loss": 2.9912,
      "step": 197674
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2088496685028076,
      "learning_rate": 2.937883011852703e-05,
      "loss": 2.8145,
      "step": 197675
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.32507061958313,
      "learning_rate": 2.9377064703806952e-05,
      "loss": 3.1617,
      "step": 197676
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.589066982269287,
      "learning_rate": 2.937529933940086e-05,
      "loss": 2.7526,
      "step": 197677
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2801904678344727,
      "learning_rate": 2.937353402530902e-05,
      "loss": 3.0145,
      "step": 197678
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6387109756469727,
      "learning_rate": 2.9371768761531732e-05,
      "loss": 2.9141,
      "step": 197679
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7059850692749023,
      "learning_rate": 2.9370003548069267e-05,
      "loss": 3.0471,
      "step": 197680
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6492366790771484,
      "learning_rate": 2.9368238384922117e-05,
      "loss": 2.8125,
      "step": 197681
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.434149980545044,
      "learning_rate": 2.9366473272090453e-05,
      "loss": 3.0911,
      "step": 197682
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6618857383728027,
      "learning_rate": 2.9364708209574707e-05,
      "loss": 2.8715,
      "step": 197683
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2676239013671875,
      "learning_rate": 2.936294319737521e-05,
      "loss": 2.8977,
      "step": 197684
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.942594528198242,
      "learning_rate": 2.93611782354922e-05,
      "loss": 2.7114,
      "step": 197685
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.579127073287964,
      "learning_rate": 2.935941332392604e-05,
      "loss": 2.977,
      "step": 197686
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.54706072807312,
      "learning_rate": 2.935764846267713e-05,
      "loss": 2.757,
      "step": 197687
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.488389253616333,
      "learning_rate": 2.935588365174567e-05,
      "loss": 3.033,
      "step": 197688
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6998136043548584,
      "learning_rate": 2.9354118891132095e-05,
      "loss": 2.9351,
      "step": 197689
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.964125156402588,
      "learning_rate": 2.9352354180836734e-05,
      "loss": 3.0902,
      "step": 197690
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6537911891937256,
      "learning_rate": 2.9350589520859858e-05,
      "loss": 3.088,
      "step": 197691
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2157721519470215,
      "learning_rate": 2.9348824911201797e-05,
      "loss": 2.9232,
      "step": 197692
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2838854789733887,
      "learning_rate": 2.9347060351862917e-05,
      "loss": 2.6582,
      "step": 197693
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.5134358406066895,
      "learning_rate": 2.9345295842843486e-05,
      "loss": 2.8535,
      "step": 197694
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4717564582824707,
      "learning_rate": 2.9343531384143905e-05,
      "loss": 3.2026,
      "step": 197695
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.112720012664795,
      "learning_rate": 2.93417669757645e-05,
      "loss": 2.9814,
      "step": 197696
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4584569931030273,
      "learning_rate": 2.934000261770555e-05,
      "loss": 2.8655,
      "step": 197697
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.674670457839966,
      "learning_rate": 2.9338238309967377e-05,
      "loss": 3.0215,
      "step": 197698
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1560752391815186,
      "learning_rate": 2.9336474052550353e-05,
      "loss": 2.8964,
      "step": 197699
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.072859048843384,
      "learning_rate": 2.933470984545474e-05,
      "loss": 2.7226,
      "step": 197700
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3666951656341553,
      "learning_rate": 2.933294568868101e-05,
      "loss": 2.7978,
      "step": 197701
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.9939379692077637,
      "learning_rate": 2.9331181582229356e-05,
      "loss": 2.956,
      "step": 197702
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7040910720825195,
      "learning_rate": 2.9329417526100153e-05,
      "loss": 2.9076,
      "step": 197703
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6775333881378174,
      "learning_rate": 2.932765352029366e-05,
      "loss": 2.8567,
      "step": 197704
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.675583839416504,
      "learning_rate": 2.932588956481031e-05,
      "loss": 2.8421,
      "step": 197705
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8186025619506836,
      "learning_rate": 2.9324125659650344e-05,
      "loss": 2.9618,
      "step": 197706
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3972127437591553,
      "learning_rate": 2.9322361804814188e-05,
      "loss": 2.9141,
      "step": 197707
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0303449630737305,
      "learning_rate": 2.9320598000302075e-05,
      "loss": 3.0622,
      "step": 197708
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.5919458866119385,
      "learning_rate": 2.931883424611441e-05,
      "loss": 2.8343,
      "step": 197709
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.3696560859680176,
      "learning_rate": 2.9317070542251488e-05,
      "loss": 2.869,
      "step": 197710
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.5090036392211914,
      "learning_rate": 2.931530688871364e-05,
      "loss": 2.9819,
      "step": 197711
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7017173767089844,
      "learning_rate": 2.9313543285501106e-05,
      "loss": 2.8686,
      "step": 197712
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.890617847442627,
      "learning_rate": 2.9311779732614384e-05,
      "loss": 2.8784,
      "step": 197713
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.736126184463501,
      "learning_rate": 2.9310016230053636e-05,
      "loss": 2.7631,
      "step": 197714
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2391278743743896,
      "learning_rate": 2.9308252777819297e-05,
      "loss": 2.9938,
      "step": 197715
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8244287967681885,
      "learning_rate": 2.9306489375911702e-05,
      "loss": 3.2399,
      "step": 197716
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4133687019348145,
      "learning_rate": 2.930472602433105e-05,
      "loss": 2.9478,
      "step": 197717
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.862670660018921,
      "learning_rate": 2.9302962723077837e-05,
      "loss": 2.9496,
      "step": 197718
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.5649197101593018,
      "learning_rate": 2.9301199472152337e-05,
      "loss": 2.9596,
      "step": 197719
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2540619373321533,
      "learning_rate": 2.9299436271554776e-05,
      "loss": 3.1756,
      "step": 197720
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.4534051418304443,
      "learning_rate": 2.9297673121285593e-05,
      "loss": 2.8701,
      "step": 197721
    },
    {
      "epoch": 2.57,
      "grad_norm": 4.145489692687988,
      "learning_rate": 2.9295910021345114e-05,
      "loss": 3.1168,
      "step": 197722
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.451643943786621,
      "learning_rate": 2.929414697173358e-05,
      "loss": 2.8191,
      "step": 197723
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8623485565185547,
      "learning_rate": 2.9292383972451416e-05,
      "loss": 2.9597,
      "step": 197724
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.830707550048828,
      "learning_rate": 2.9290621023498894e-05,
      "loss": 3.1923,
      "step": 197725
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0032336711883545,
      "learning_rate": 2.9288858124876314e-05,
      "loss": 2.9701,
      "step": 197726
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.376962184906006,
      "learning_rate": 2.9287095276584103e-05,
      "loss": 3.0397,
      "step": 197727
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.8364968299865723,
      "learning_rate": 2.9285332478622536e-05,
      "loss": 3.2384,
      "step": 197728
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8461246490478516,
      "learning_rate": 2.9283569730991873e-05,
      "loss": 2.9199,
      "step": 197729
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.7138657569885254,
      "learning_rate": 2.928180703369255e-05,
      "loss": 3.0065,
      "step": 197730
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.12905216217041,
      "learning_rate": 2.92800443867248e-05,
      "loss": 2.8621,
      "step": 197731
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2315640449523926,
      "learning_rate": 2.9278281790089086e-05,
      "loss": 2.7712,
      "step": 197732
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6819851398468018,
      "learning_rate": 2.927651924378561e-05,
      "loss": 2.9409,
      "step": 197733
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8444950580596924,
      "learning_rate": 2.9274756747814743e-05,
      "loss": 2.8733,
      "step": 197734
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.209556818008423,
      "learning_rate": 2.9272994302176777e-05,
      "loss": 2.7613,
      "step": 197735
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.131603956222534,
      "learning_rate": 2.927123190687212e-05,
      "loss": 2.82,
      "step": 197736
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.1756608486175537,
      "learning_rate": 2.9269469561900993e-05,
      "loss": 2.7943,
      "step": 197737
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.038651943206787,
      "learning_rate": 2.9267707267263842e-05,
      "loss": 2.7048,
      "step": 197738
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.0379111766815186,
      "learning_rate": 2.9265945022960922e-05,
      "loss": 3.1624,
      "step": 197739
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4543397426605225,
      "learning_rate": 2.9264182828992576e-05,
      "loss": 3.0546,
      "step": 197740
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.199970245361328,
      "learning_rate": 2.9262420685359068e-05,
      "loss": 2.803,
      "step": 197741
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6288979053497314,
      "learning_rate": 2.926065859206086e-05,
      "loss": 3.0349,
      "step": 197742
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7038183212280273,
      "learning_rate": 2.925889654909812e-05,
      "loss": 3.2831,
      "step": 197743
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.8996407985687256,
      "learning_rate": 2.925713455647135e-05,
      "loss": 2.9181,
      "step": 197744
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.5256705284118652,
      "learning_rate": 2.9255372614180783e-05,
      "loss": 2.8488,
      "step": 197745
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.854604721069336,
      "learning_rate": 2.925361072222675e-05,
      "loss": 3.0708,
      "step": 197746
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.960430383682251,
      "learning_rate": 2.9251848880609518e-05,
      "loss": 2.7573,
      "step": 197747
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.943798780441284,
      "learning_rate": 2.9250087089329556e-05,
      "loss": 2.9619,
      "step": 197748
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.5202553272247314,
      "learning_rate": 2.9248325348387024e-05,
      "loss": 2.9797,
      "step": 197749
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.776855230331421,
      "learning_rate": 2.924656365778243e-05,
      "loss": 2.8419,
      "step": 197750
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.4716901779174805,
      "learning_rate": 2.9244802017515968e-05,
      "loss": 2.9318,
      "step": 197751
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.2618656158447266,
      "learning_rate": 2.924304042758804e-05,
      "loss": 2.8299,
      "step": 197752
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.7273995876312256,
      "learning_rate": 2.9241278887998876e-05,
      "loss": 2.9293,
      "step": 197753
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.962878465652466,
      "learning_rate": 2.9239517398748946e-05,
      "loss": 2.8541,
      "step": 197754
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.148071527481079,
      "learning_rate": 2.9237755959838417e-05,
      "loss": 2.8863,
      "step": 197755
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.118067979812622,
      "learning_rate": 2.923599457126775e-05,
      "loss": 2.8684,
      "step": 197756
    },
    {
      "epoch": 2.57,
      "grad_norm": 3.479079246520996,
      "learning_rate": 2.9234233233037252e-05,
      "loss": 2.7248,
      "step": 197757
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.6296675205230713,
      "learning_rate": 2.923247194514722e-05,
      "loss": 3.1464,
      "step": 197758
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.934390068054199,
      "learning_rate": 2.9230710707597916e-05,
      "loss": 2.9596,
      "step": 197759
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.725421190261841,
      "learning_rate": 2.9228949520389776e-05,
      "loss": 3.0058,
      "step": 197760
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.615598201751709,
      "learning_rate": 2.9227188383523038e-05,
      "loss": 2.7506,
      "step": 197761
    },
    {
      "epoch": 2.57,
      "grad_norm": 2.594686985015869,
      "learning_rate": 2.9225427296998127e-05,
      "loss": 2.7694,
      "step": 197762
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.557445764541626,
      "learning_rate": 2.922366626081535e-05,
      "loss": 2.8737,
      "step": 197763
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0001909732818604,
      "learning_rate": 2.9221905274974965e-05,
      "loss": 2.9745,
      "step": 197764
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.6632676124572754,
      "learning_rate": 2.9220144339477312e-05,
      "loss": 3.0011,
      "step": 197765
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.270189046859741,
      "learning_rate": 2.9218383454322792e-05,
      "loss": 2.777,
      "step": 197766
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0310823917388916,
      "learning_rate": 2.921662261951163e-05,
      "loss": 2.8231,
      "step": 197767
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.5801801681518555,
      "learning_rate": 2.9214861835044267e-05,
      "loss": 2.8361,
      "step": 197768
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1533448696136475,
      "learning_rate": 2.9213101100920966e-05,
      "loss": 3.1871,
      "step": 197769
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.721168279647827,
      "learning_rate": 2.9211340417142058e-05,
      "loss": 2.7203,
      "step": 197770
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.6854639053344727,
      "learning_rate": 2.9209579783707848e-05,
      "loss": 2.9566,
      "step": 197771
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.499293565750122,
      "learning_rate": 2.920781920061873e-05,
      "loss": 2.6761,
      "step": 197772
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.3715295791625977,
      "learning_rate": 2.920605866787491e-05,
      "loss": 2.9873,
      "step": 197773
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.4914681911468506,
      "learning_rate": 2.9204298185476884e-05,
      "loss": 2.9944,
      "step": 197774
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.477240562438965,
      "learning_rate": 2.9202537753424814e-05,
      "loss": 3.0762,
      "step": 197775
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6724298000335693,
      "learning_rate": 2.920077737171921e-05,
      "loss": 3.0147,
      "step": 197776
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9992799758911133,
      "learning_rate": 2.9199017040360194e-05,
      "loss": 2.8158,
      "step": 197777
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.064251899719238,
      "learning_rate": 2.919725675934824e-05,
      "loss": 2.832,
      "step": 197778
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7928593158721924,
      "learning_rate": 2.919549652868358e-05,
      "loss": 2.8809,
      "step": 197779
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1206414699554443,
      "learning_rate": 2.9193736348366647e-05,
      "loss": 3.0791,
      "step": 197780
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.546031951904297,
      "learning_rate": 2.919197621839764e-05,
      "loss": 2.7438,
      "step": 197781
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.8902688026428223,
      "learning_rate": 2.919021613877709e-05,
      "loss": 2.7043,
      "step": 197782
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.85144567489624,
      "learning_rate": 2.9188456109505066e-05,
      "loss": 2.748,
      "step": 197783
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6656434535980225,
      "learning_rate": 2.9186696130582067e-05,
      "loss": 2.8704,
      "step": 197784
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.666055679321289,
      "learning_rate": 2.9184936202008326e-05,
      "loss": 2.7885,
      "step": 197785
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.011235475540161,
      "learning_rate": 2.9183176323784273e-05,
      "loss": 2.8781,
      "step": 197786
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8756980895996094,
      "learning_rate": 2.918141649591015e-05,
      "loss": 2.905,
      "step": 197787
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.361320972442627,
      "learning_rate": 2.917965671838638e-05,
      "loss": 2.7953,
      "step": 197788
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.231217861175537,
      "learning_rate": 2.9177896991213133e-05,
      "loss": 2.8868,
      "step": 197789
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.929962635040283,
      "learning_rate": 2.917613731439088e-05,
      "loss": 3.0551,
      "step": 197790
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9649429321289062,
      "learning_rate": 2.9174377687919848e-05,
      "loss": 2.7763,
      "step": 197791
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.745011806488037,
      "learning_rate": 2.9172618111800473e-05,
      "loss": 2.9633,
      "step": 197792
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.888521671295166,
      "learning_rate": 2.9170858586032952e-05,
      "loss": 3.0419,
      "step": 197793
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6749916076660156,
      "learning_rate": 2.9169099110617755e-05,
      "loss": 3.2105,
      "step": 197794
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8231723308563232,
      "learning_rate": 2.9167339685555112e-05,
      "loss": 2.892,
      "step": 197795
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0080277919769287,
      "learning_rate": 2.916558031084536e-05,
      "loss": 3.161,
      "step": 197796
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.664165496826172,
      "learning_rate": 2.9163820986488795e-05,
      "loss": 2.8011,
      "step": 197797
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.036746978759766,
      "learning_rate": 2.9162061712485853e-05,
      "loss": 2.8655,
      "step": 197798
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.89286732673645,
      "learning_rate": 2.9160302488836763e-05,
      "loss": 3.0241,
      "step": 197799
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.421590328216553,
      "learning_rate": 2.9158543315541894e-05,
      "loss": 2.777,
      "step": 197800
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2769622802734375,
      "learning_rate": 2.9156784192601612e-05,
      "loss": 3.1059,
      "step": 197801
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7114031314849854,
      "learning_rate": 2.9155025120016152e-05,
      "loss": 2.9848,
      "step": 197802
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8774197101593018,
      "learning_rate": 2.9153266097785842e-05,
      "loss": 2.6998,
      "step": 197803
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.608527183532715,
      "learning_rate": 2.9151507125911122e-05,
      "loss": 2.9496,
      "step": 197804
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5719263553619385,
      "learning_rate": 2.914974820439219e-05,
      "loss": 2.9129,
      "step": 197805
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0084519386291504,
      "learning_rate": 2.9147989333229503e-05,
      "loss": 3.0438,
      "step": 197806
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9648032188415527,
      "learning_rate": 2.9146230512423275e-05,
      "loss": 3.0093,
      "step": 197807
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.4930262565612793,
      "learning_rate": 2.9144471741973864e-05,
      "loss": 2.8157,
      "step": 197808
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0629220008850098,
      "learning_rate": 2.9142713021881636e-05,
      "loss": 2.9,
      "step": 197809
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0684902667999268,
      "learning_rate": 2.9140954352146896e-05,
      "loss": 2.9722,
      "step": 197810
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.96140193939209,
      "learning_rate": 2.9139195732769904e-05,
      "loss": 2.8944,
      "step": 197811
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1830227375030518,
      "learning_rate": 2.913743716375113e-05,
      "loss": 2.9499,
      "step": 197812
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2645840644836426,
      "learning_rate": 2.913567864509081e-05,
      "loss": 3.0804,
      "step": 197813
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9010159969329834,
      "learning_rate": 2.9133920176789204e-05,
      "loss": 3.014,
      "step": 197814
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7736709117889404,
      "learning_rate": 2.9132161758846784e-05,
      "loss": 2.854,
      "step": 197815
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1294970512390137,
      "learning_rate": 2.9130403391263746e-05,
      "loss": 2.9138,
      "step": 197816
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.9777040481567383,
      "learning_rate": 2.9128645074040557e-05,
      "loss": 3.0011,
      "step": 197817
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7269043922424316,
      "learning_rate": 2.912688680717745e-05,
      "loss": 2.7926,
      "step": 197818
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.664341449737549,
      "learning_rate": 2.9125128590674763e-05,
      "loss": 2.8696,
      "step": 197819
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.102050542831421,
      "learning_rate": 2.9123370424532755e-05,
      "loss": 3.0346,
      "step": 197820
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8128662109375,
      "learning_rate": 2.912161230875193e-05,
      "loss": 2.8741,
      "step": 197821
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.86442494392395,
      "learning_rate": 2.911985424333242e-05,
      "loss": 2.8482,
      "step": 197822
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.102884292602539,
      "learning_rate": 2.911809622827469e-05,
      "loss": 2.9288,
      "step": 197823
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9664902687072754,
      "learning_rate": 2.9116338263579043e-05,
      "loss": 2.9733,
      "step": 197824
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.843292474746704,
      "learning_rate": 2.9114580349245775e-05,
      "loss": 2.7881,
      "step": 197825
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.850703239440918,
      "learning_rate": 2.9112822485275157e-05,
      "loss": 2.9965,
      "step": 197826
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.4638566970825195,
      "learning_rate": 2.911106467166765e-05,
      "loss": 2.9616,
      "step": 197827
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8133127689361572,
      "learning_rate": 2.9109306908423458e-05,
      "loss": 2.9456,
      "step": 197828
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8000216484069824,
      "learning_rate": 2.910754919554298e-05,
      "loss": 2.7498,
      "step": 197829
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8473081588745117,
      "learning_rate": 2.9105791533026545e-05,
      "loss": 2.8942,
      "step": 197830
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8389618396759033,
      "learning_rate": 2.910403392087446e-05,
      "loss": 3.0578,
      "step": 197831
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8489603996276855,
      "learning_rate": 2.9102276359086986e-05,
      "loss": 2.971,
      "step": 197832
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.6161978244781494,
      "learning_rate": 2.910051884766459e-05,
      "loss": 3.0028,
      "step": 197833
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.105450391769409,
      "learning_rate": 2.909876138660744e-05,
      "loss": 2.703,
      "step": 197834
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.872213125228882,
      "learning_rate": 2.9097003975916e-05,
      "loss": 2.888,
      "step": 197835
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.746377944946289,
      "learning_rate": 2.909524661559054e-05,
      "loss": 3.0921,
      "step": 197836
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.637149810791016,
      "learning_rate": 2.909348930563139e-05,
      "loss": 3.1734,
      "step": 197837
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.791511297225952,
      "learning_rate": 2.9091732046038818e-05,
      "loss": 2.969,
      "step": 197838
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9000675678253174,
      "learning_rate": 2.908997483681329e-05,
      "loss": 2.9736,
      "step": 197839
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9172956943511963,
      "learning_rate": 2.9088217677954973e-05,
      "loss": 2.772,
      "step": 197840
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8904709815979004,
      "learning_rate": 2.90864605694643e-05,
      "loss": 2.7299,
      "step": 197841
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8390581607818604,
      "learning_rate": 2.9084703511341533e-05,
      "loss": 2.7165,
      "step": 197842
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.057605266571045,
      "learning_rate": 2.9082946503587177e-05,
      "loss": 2.9019,
      "step": 197843
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2230381965637207,
      "learning_rate": 2.9081189546201266e-05,
      "loss": 2.911,
      "step": 197844
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.63739275932312,
      "learning_rate": 2.907943263918433e-05,
      "loss": 3.2188,
      "step": 197845
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8564352989196777,
      "learning_rate": 2.9077675782536602e-05,
      "loss": 2.759,
      "step": 197846
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7331321239471436,
      "learning_rate": 2.9075918976258482e-05,
      "loss": 2.9904,
      "step": 197847
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7154641151428223,
      "learning_rate": 2.907416222035024e-05,
      "loss": 2.885,
      "step": 197848
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8408381938934326,
      "learning_rate": 2.9072405514812304e-05,
      "loss": 3.0782,
      "step": 197849
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0447890758514404,
      "learning_rate": 2.907064885964484e-05,
      "loss": 2.856,
      "step": 197850
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9989709854125977,
      "learning_rate": 2.9068892254848287e-05,
      "loss": 3.31,
      "step": 197851
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6965420246124268,
      "learning_rate": 2.9067135700422905e-05,
      "loss": 2.9401,
      "step": 197852
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.894254684448242,
      "learning_rate": 2.90653791963691e-05,
      "loss": 2.8212,
      "step": 197853
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.103990316390991,
      "learning_rate": 2.9063622742687098e-05,
      "loss": 2.7691,
      "step": 197854
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.436861515045166,
      "learning_rate": 2.9061866339377405e-05,
      "loss": 3.1459,
      "step": 197855
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8832881450653076,
      "learning_rate": 2.9060109986440118e-05,
      "loss": 3.0349,
      "step": 197856
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9636685848236084,
      "learning_rate": 2.90583536838757e-05,
      "loss": 2.9559,
      "step": 197857
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.579892873764038,
      "learning_rate": 2.9056597431684427e-05,
      "loss": 2.8793,
      "step": 197858
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.064021587371826,
      "learning_rate": 2.9054841229866687e-05,
      "loss": 2.7878,
      "step": 197859
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.4518508911132812,
      "learning_rate": 2.9053085078422722e-05,
      "loss": 3.0887,
      "step": 197860
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9900872707366943,
      "learning_rate": 2.9051328977352995e-05,
      "loss": 2.9121,
      "step": 197861
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.586146831512451,
      "learning_rate": 2.904957292665764e-05,
      "loss": 3.0602,
      "step": 197862
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0226027965545654,
      "learning_rate": 2.904781692633712e-05,
      "loss": 2.7205,
      "step": 197863
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2109994888305664,
      "learning_rate": 2.9046060976391704e-05,
      "loss": 3.0534,
      "step": 197864
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7679357528686523,
      "learning_rate": 2.904430507682176e-05,
      "loss": 3.1588,
      "step": 197865
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.7377400398254395,
      "learning_rate": 2.9042549227627554e-05,
      "loss": 3.1207,
      "step": 197866
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7178452014923096,
      "learning_rate": 2.9040793428809585e-05,
      "loss": 2.7129,
      "step": 197867
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8375799655914307,
      "learning_rate": 2.9039037680367914e-05,
      "loss": 2.7504,
      "step": 197868
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.990784168243408,
      "learning_rate": 2.903728198230305e-05,
      "loss": 2.9338,
      "step": 197869
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6290674209594727,
      "learning_rate": 2.9035526334615224e-05,
      "loss": 2.8787,
      "step": 197870
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.416254758834839,
      "learning_rate": 2.903377073730486e-05,
      "loss": 2.9486,
      "step": 197871
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.770047426223755,
      "learning_rate": 2.903201519037217e-05,
      "loss": 2.8946,
      "step": 197872
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2892940044403076,
      "learning_rate": 2.903025969381768e-05,
      "loss": 2.9507,
      "step": 197873
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.871738910675049,
      "learning_rate": 2.902850424764146e-05,
      "loss": 2.9211,
      "step": 197874
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.4833922386169434,
      "learning_rate": 2.9026748851844005e-05,
      "loss": 2.9765,
      "step": 197875
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6378695964813232,
      "learning_rate": 2.902499350642552e-05,
      "loss": 2.9535,
      "step": 197876
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1605401039123535,
      "learning_rate": 2.9023238211386502e-05,
      "loss": 3.0025,
      "step": 197877
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7975594997406006,
      "learning_rate": 2.9021482966727082e-05,
      "loss": 3.0031,
      "step": 197878
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.9226741790771484,
      "learning_rate": 2.9019727772447766e-05,
      "loss": 2.6009,
      "step": 197879
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.3052003383636475,
      "learning_rate": 2.901797262854878e-05,
      "loss": 3.0838,
      "step": 197880
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.003357887268066,
      "learning_rate": 2.9016217535030494e-05,
      "loss": 2.577,
      "step": 197881
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.359907865524292,
      "learning_rate": 2.9014462491893107e-05,
      "loss": 2.7716,
      "step": 197882
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.5468735694885254,
      "learning_rate": 2.9012707499137156e-05,
      "loss": 2.9375,
      "step": 197883
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7014222145080566,
      "learning_rate": 2.901095255676277e-05,
      "loss": 3.239,
      "step": 197884
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0383126735687256,
      "learning_rate": 2.9009197664770412e-05,
      "loss": 2.8609,
      "step": 197885
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.93664813041687,
      "learning_rate": 2.900744282316039e-05,
      "loss": 2.705,
      "step": 197886
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2919909954071045,
      "learning_rate": 2.9005688031932962e-05,
      "loss": 2.8741,
      "step": 197887
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5868425369262695,
      "learning_rate": 2.9003933291088465e-05,
      "loss": 2.8026,
      "step": 197888
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.764370918273926,
      "learning_rate": 2.90021786006273e-05,
      "loss": 2.7889,
      "step": 197889
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6588876247406006,
      "learning_rate": 2.90004239605497e-05,
      "loss": 2.8362,
      "step": 197890
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.155008554458618,
      "learning_rate": 2.8998669370856065e-05,
      "loss": 2.7931,
      "step": 197891
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2905988693237305,
      "learning_rate": 2.8996914831546724e-05,
      "loss": 2.946,
      "step": 197892
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7671401500701904,
      "learning_rate": 2.899516034262188e-05,
      "loss": 2.7503,
      "step": 197893
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1822097301483154,
      "learning_rate": 2.8993405904082035e-05,
      "loss": 2.8751,
      "step": 197894
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0628106594085693,
      "learning_rate": 2.8991651515927417e-05,
      "loss": 2.735,
      "step": 197895
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.091094493865967,
      "learning_rate": 2.8989897178158327e-05,
      "loss": 2.9145,
      "step": 197896
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.279327630996704,
      "learning_rate": 2.8988142890775168e-05,
      "loss": 3.0905,
      "step": 197897
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.507594347000122,
      "learning_rate": 2.898638865377827e-05,
      "loss": 2.9399,
      "step": 197898
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.244945764541626,
      "learning_rate": 2.89846344671678e-05,
      "loss": 3.0412,
      "step": 197899
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.706530809402466,
      "learning_rate": 2.8982880330944292e-05,
      "loss": 2.7921,
      "step": 197900
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.648993968963623,
      "learning_rate": 2.8981126245107944e-05,
      "loss": 3.0522,
      "step": 197901
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.959151029586792,
      "learning_rate": 2.8979372209659156e-05,
      "loss": 2.7726,
      "step": 197902
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7889413833618164,
      "learning_rate": 2.897761822459823e-05,
      "loss": 2.9619,
      "step": 197903
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7261898517608643,
      "learning_rate": 2.8975864289925466e-05,
      "loss": 3.174,
      "step": 197904
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5768051147460938,
      "learning_rate": 2.8974110405641126e-05,
      "loss": 2.9839,
      "step": 197905
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.481266736984253,
      "learning_rate": 2.8972356571745715e-05,
      "loss": 3.1815,
      "step": 197906
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.5961036682128906,
      "learning_rate": 2.8970602788239393e-05,
      "loss": 2.9633,
      "step": 197907
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2857396602630615,
      "learning_rate": 2.89688490551226e-05,
      "loss": 3.3327,
      "step": 197908
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2707197666168213,
      "learning_rate": 2.8967095372395598e-05,
      "loss": 2.857,
      "step": 197909
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9202513694763184,
      "learning_rate": 2.8965341740058755e-05,
      "loss": 3.0291,
      "step": 197910
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.126929998397827,
      "learning_rate": 2.8963588158112305e-05,
      "loss": 2.7802,
      "step": 197911
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.567485809326172,
      "learning_rate": 2.8961834626556713e-05,
      "loss": 2.8891,
      "step": 197912
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.45037841796875,
      "learning_rate": 2.8960081145392144e-05,
      "loss": 2.8132,
      "step": 197913
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1795318126678467,
      "learning_rate": 2.89583277146191e-05,
      "loss": 2.7691,
      "step": 197914
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0787975788116455,
      "learning_rate": 2.8956574334237746e-05,
      "loss": 2.8868,
      "step": 197915
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.7127676010131836,
      "learning_rate": 2.8954821004248585e-05,
      "loss": 3.0827,
      "step": 197916
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5386834144592285,
      "learning_rate": 2.8953067724651745e-05,
      "loss": 2.9785,
      "step": 197917
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2272396087646484,
      "learning_rate": 2.8951314495447697e-05,
      "loss": 2.6534,
      "step": 197918
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.646909475326538,
      "learning_rate": 2.8949561316636638e-05,
      "loss": 2.9865,
      "step": 197919
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.011415004730225,
      "learning_rate": 2.894780818821907e-05,
      "loss": 2.9863,
      "step": 197920
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.026576519012451,
      "learning_rate": 2.894605511019512e-05,
      "loss": 2.9468,
      "step": 197921
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.065908193588257,
      "learning_rate": 2.8944302082565362e-05,
      "loss": 2.8678,
      "step": 197922
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0078237056732178,
      "learning_rate": 2.894254910532986e-05,
      "loss": 2.8174,
      "step": 197923
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.358811378479004,
      "learning_rate": 2.8940796178489078e-05,
      "loss": 3.1955,
      "step": 197924
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0869064331054688,
      "learning_rate": 2.8939043302043285e-05,
      "loss": 2.7804,
      "step": 197925
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2341723442077637,
      "learning_rate": 2.8937290475992913e-05,
      "loss": 2.8026,
      "step": 197926
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8198676109313965,
      "learning_rate": 2.893553770033813e-05,
      "loss": 2.9339,
      "step": 197927
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.8424315452575684,
      "learning_rate": 2.89337849750795e-05,
      "loss": 2.8637,
      "step": 197928
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2881903648376465,
      "learning_rate": 2.8932032300217056e-05,
      "loss": 2.7955,
      "step": 197929
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5152621269226074,
      "learning_rate": 2.89302796757513e-05,
      "loss": 3.1376,
      "step": 197930
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5235443115234375,
      "learning_rate": 2.8928527101682497e-05,
      "loss": 2.8941,
      "step": 197931
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.599407911300659,
      "learning_rate": 2.8926774578011046e-05,
      "loss": 3.0135,
      "step": 197932
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1511287689208984,
      "learning_rate": 2.892502210473715e-05,
      "loss": 2.826,
      "step": 197933
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.727956771850586,
      "learning_rate": 2.892326968186134e-05,
      "loss": 2.8928,
      "step": 197934
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.660367250442505,
      "learning_rate": 2.8921517309383714e-05,
      "loss": 2.834,
      "step": 197935
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.100956678390503,
      "learning_rate": 2.891976498730474e-05,
      "loss": 2.9713,
      "step": 197936
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6160027980804443,
      "learning_rate": 2.8918012715624618e-05,
      "loss": 2.589,
      "step": 197937
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.973315477371216,
      "learning_rate": 2.8916260494343844e-05,
      "loss": 2.9964,
      "step": 197938
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.013972282409668,
      "learning_rate": 2.891450832346256e-05,
      "loss": 2.8147,
      "step": 197939
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2336649894714355,
      "learning_rate": 2.8912756202981324e-05,
      "loss": 2.8899,
      "step": 197940
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7591779232025146,
      "learning_rate": 2.8911004132900207e-05,
      "loss": 2.8845,
      "step": 197941
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.293501615524292,
      "learning_rate": 2.8909252113219706e-05,
      "loss": 2.8378,
      "step": 197942
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.181734800338745,
      "learning_rate": 2.8907500143940053e-05,
      "loss": 3.0038,
      "step": 197943
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.4475769996643066,
      "learning_rate": 2.8905748225061653e-05,
      "loss": 3.0055,
      "step": 197944
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.657538890838623,
      "learning_rate": 2.8903996356584735e-05,
      "loss": 2.8908,
      "step": 197945
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0171799659729004,
      "learning_rate": 2.89022445385098e-05,
      "loss": 3.1004,
      "step": 197946
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.084552764892578,
      "learning_rate": 2.8900492770836947e-05,
      "loss": 3.0186,
      "step": 197947
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.6527693271636963,
      "learning_rate": 2.8898741053566642e-05,
      "loss": 2.8945,
      "step": 197948
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.910752058029175,
      "learning_rate": 2.8896989386699154e-05,
      "loss": 2.8723,
      "step": 197949
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8021748065948486,
      "learning_rate": 2.889523777023488e-05,
      "loss": 2.8205,
      "step": 197950
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.668412208557129,
      "learning_rate": 2.889348620417402e-05,
      "loss": 2.922,
      "step": 197951
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6257710456848145,
      "learning_rate": 2.889173468851711e-05,
      "loss": 2.9769,
      "step": 197952
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.267021656036377,
      "learning_rate": 2.8889983223264247e-05,
      "loss": 3.0879,
      "step": 197953
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.782205820083618,
      "learning_rate": 2.8888231808415896e-05,
      "loss": 2.7531,
      "step": 197954
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.410461187362671,
      "learning_rate": 2.888648044397226e-05,
      "loss": 2.9666,
      "step": 197955
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7257258892059326,
      "learning_rate": 2.888472912993384e-05,
      "loss": 2.7415,
      "step": 197956
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.3153977394104004,
      "learning_rate": 2.8882977866300793e-05,
      "loss": 3.0564,
      "step": 197957
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7277956008911133,
      "learning_rate": 2.888122665307363e-05,
      "loss": 2.8626,
      "step": 197958
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8924736976623535,
      "learning_rate": 2.8879475490252446e-05,
      "loss": 2.8015,
      "step": 197959
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.485123634338379,
      "learning_rate": 2.887772437783774e-05,
      "loss": 2.8139,
      "step": 197960
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.489373207092285,
      "learning_rate": 2.887597331582975e-05,
      "loss": 2.7452,
      "step": 197961
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5695712566375732,
      "learning_rate": 2.8874222304228867e-05,
      "loss": 2.6978,
      "step": 197962
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.8195831775665283,
      "learning_rate": 2.887247134303533e-05,
      "loss": 2.9875,
      "step": 197963
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.607734203338623,
      "learning_rate": 2.8870720432249572e-05,
      "loss": 2.9998,
      "step": 197964
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7841758728027344,
      "learning_rate": 2.886896957187189e-05,
      "loss": 2.8979,
      "step": 197965
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8164448738098145,
      "learning_rate": 2.8867218761902554e-05,
      "loss": 3.0387,
      "step": 197966
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.699490547180176,
      "learning_rate": 2.8865468002341862e-05,
      "loss": 2.7592,
      "step": 197967
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6432366371154785,
      "learning_rate": 2.886371729319028e-05,
      "loss": 2.9639,
      "step": 197968
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.74957537651062,
      "learning_rate": 2.8861966634447975e-05,
      "loss": 2.9303,
      "step": 197969
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.425899982452393,
      "learning_rate": 2.8860216026115414e-05,
      "loss": 2.7232,
      "step": 197970
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.4423139095306396,
      "learning_rate": 2.885846546819286e-05,
      "loss": 2.912,
      "step": 197971
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.8548765182495117,
      "learning_rate": 2.8856714960680648e-05,
      "loss": 3.0948,
      "step": 197972
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.6346733570098877,
      "learning_rate": 2.8854964503579016e-05,
      "loss": 2.7574,
      "step": 197973
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.886110544204712,
      "learning_rate": 2.885321409688842e-05,
      "loss": 2.76,
      "step": 197974
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.083606481552124,
      "learning_rate": 2.885146374060907e-05,
      "loss": 2.982,
      "step": 197975
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9146792888641357,
      "learning_rate": 2.8849713434741427e-05,
      "loss": 3.0643,
      "step": 197976
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.9596714973449707,
      "learning_rate": 2.8847963179285726e-05,
      "loss": 3.0128,
      "step": 197977
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.235250473022461,
      "learning_rate": 2.8846212974242267e-05,
      "loss": 2.9573,
      "step": 197978
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.6996285915374756,
      "learning_rate": 2.8844462819611446e-05,
      "loss": 2.6883,
      "step": 197979
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.03218150138855,
      "learning_rate": 2.884271271539357e-05,
      "loss": 2.7602,
      "step": 197980
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5881526470184326,
      "learning_rate": 2.8840962661588897e-05,
      "loss": 2.7904,
      "step": 197981
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7913756370544434,
      "learning_rate": 2.8839212658197864e-05,
      "loss": 2.8644,
      "step": 197982
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.20024037361145,
      "learning_rate": 2.883746270522074e-05,
      "loss": 2.9625,
      "step": 197983
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.967782974243164,
      "learning_rate": 2.8835712802657784e-05,
      "loss": 3.1716,
      "step": 197984
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.088665008544922,
      "learning_rate": 2.8833962950509438e-05,
      "loss": 2.7159,
      "step": 197985
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.999706506729126,
      "learning_rate": 2.8832213148775995e-05,
      "loss": 2.9394,
      "step": 197986
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1065239906311035,
      "learning_rate": 2.8830463397457725e-05,
      "loss": 2.8214,
      "step": 197987
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.899195432662964,
      "learning_rate": 2.8828713696554996e-05,
      "loss": 2.8979,
      "step": 197988
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.736231565475464,
      "learning_rate": 2.882696404606817e-05,
      "loss": 2.8142,
      "step": 197989
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.7612242698669434,
      "learning_rate": 2.8825214445997448e-05,
      "loss": 2.9106,
      "step": 197990
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.148192405700684,
      "learning_rate": 2.882346489634333e-05,
      "loss": 2.8829,
      "step": 197991
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7999846935272217,
      "learning_rate": 2.882171539710595e-05,
      "loss": 3.0357,
      "step": 197992
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.3889925479888916,
      "learning_rate": 2.8819965948285805e-05,
      "loss": 2.7077,
      "step": 197993
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8260936737060547,
      "learning_rate": 2.8818216549883166e-05,
      "loss": 3.1043,
      "step": 197994
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.7266664505004883,
      "learning_rate": 2.8816467201898296e-05,
      "loss": 2.8427,
      "step": 197995
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.516462802886963,
      "learning_rate": 2.8814717904331562e-05,
      "loss": 3.1528,
      "step": 197996
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8498706817626953,
      "learning_rate": 2.8812968657183298e-05,
      "loss": 2.9573,
      "step": 197997
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.664839029312134,
      "learning_rate": 2.8811219460453803e-05,
      "loss": 3.0379,
      "step": 197998
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.660382032394409,
      "learning_rate": 2.8809470314143446e-05,
      "loss": 3.0416,
      "step": 197999
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.345707893371582,
      "learning_rate": 2.880772121825249e-05,
      "loss": 3.0923,
      "step": 198000
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.740920066833496,
      "learning_rate": 2.88059721727814e-05,
      "loss": 3.0542,
      "step": 198001
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.127851724624634,
      "learning_rate": 2.8804223177730312e-05,
      "loss": 3.0426,
      "step": 198002
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.511920928955078,
      "learning_rate": 2.880247423309966e-05,
      "loss": 3.1358,
      "step": 198003
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.4656665325164795,
      "learning_rate": 2.880072533888971e-05,
      "loss": 2.9894,
      "step": 198004
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9706900119781494,
      "learning_rate": 2.879897649510089e-05,
      "loss": 2.8884,
      "step": 198005
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.731090784072876,
      "learning_rate": 2.8797227701733373e-05,
      "loss": 2.7673,
      "step": 198006
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.037811040878296,
      "learning_rate": 2.8795478958787722e-05,
      "loss": 2.8589,
      "step": 198007
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9220995903015137,
      "learning_rate": 2.879373026626397e-05,
      "loss": 2.7355,
      "step": 198008
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.749289035797119,
      "learning_rate": 2.879198162416265e-05,
      "loss": 2.9533,
      "step": 198009
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.6996476650238037,
      "learning_rate": 2.8790233032483935e-05,
      "loss": 2.8562,
      "step": 198010
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7584891319274902,
      "learning_rate": 2.878848449122835e-05,
      "loss": 2.9557,
      "step": 198011
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.621987819671631,
      "learning_rate": 2.8786736000395993e-05,
      "loss": 2.9437,
      "step": 198012
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.6319572925567627,
      "learning_rate": 2.8784987559987438e-05,
      "loss": 2.9242,
      "step": 198013
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6670703887939453,
      "learning_rate": 2.8783239170002747e-05,
      "loss": 2.7603,
      "step": 198014
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1011295318603516,
      "learning_rate": 2.8781490830442457e-05,
      "loss": 3.0454,
      "step": 198015
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.800442695617676,
      "learning_rate": 2.877974254130673e-05,
      "loss": 2.8752,
      "step": 198016
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.570993661880493,
      "learning_rate": 2.877799430259603e-05,
      "loss": 2.8274,
      "step": 198017
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.8053157329559326,
      "learning_rate": 2.8776246114310562e-05,
      "loss": 3.0581,
      "step": 198018
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.140665292739868,
      "learning_rate": 2.877449797645083e-05,
      "loss": 2.9025,
      "step": 198019
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.983445167541504,
      "learning_rate": 2.8772749889016922e-05,
      "loss": 2.8246,
      "step": 198020
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.729487419128418,
      "learning_rate": 2.8771001852009312e-05,
      "loss": 2.8628,
      "step": 198021
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.4420430660247803,
      "learning_rate": 2.8769253865428264e-05,
      "loss": 2.855,
      "step": 198022
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.5179901123046875,
      "learning_rate": 2.876750592927418e-05,
      "loss": 3.0494,
      "step": 198023
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.060091495513916,
      "learning_rate": 2.8765758043547292e-05,
      "loss": 2.7866,
      "step": 198024
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7346153259277344,
      "learning_rate": 2.8764010208248066e-05,
      "loss": 3.2463,
      "step": 198025
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.852475166320801,
      "learning_rate": 2.8762262423376635e-05,
      "loss": 2.9251,
      "step": 198026
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.594886302947998,
      "learning_rate": 2.8760514688933467e-05,
      "loss": 2.9755,
      "step": 198027
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9921791553497314,
      "learning_rate": 2.875876700491876e-05,
      "loss": 3.3944,
      "step": 198028
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5703001022338867,
      "learning_rate": 2.875701937133301e-05,
      "loss": 2.7249,
      "step": 198029
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.3943400382995605,
      "learning_rate": 2.875527178817639e-05,
      "loss": 2.8088,
      "step": 198030
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.4811103343963623,
      "learning_rate": 2.8753524255449366e-05,
      "loss": 3.126,
      "step": 198031
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9549498558044434,
      "learning_rate": 2.8751776773152103e-05,
      "loss": 2.8558,
      "step": 198032
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.5482699871063232,
      "learning_rate": 2.875002934128503e-05,
      "loss": 2.8857,
      "step": 198033
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.3077056407928467,
      "learning_rate": 2.8748281959848417e-05,
      "loss": 3.0123,
      "step": 198034
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.444312810897827,
      "learning_rate": 2.8746534628842667e-05,
      "loss": 2.6876,
      "step": 198035
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0613696575164795,
      "learning_rate": 2.8744787348268005e-05,
      "loss": 2.7172,
      "step": 198036
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1658976078033447,
      "learning_rate": 2.8743040118124906e-05,
      "loss": 2.839,
      "step": 198037
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.82379150390625,
      "learning_rate": 2.8741292938413494e-05,
      "loss": 2.9162,
      "step": 198038
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7157695293426514,
      "learning_rate": 2.873954580913428e-05,
      "loss": 3.0805,
      "step": 198039
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0994558334350586,
      "learning_rate": 2.8737798730287388e-05,
      "loss": 2.8896,
      "step": 198040
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1464757919311523,
      "learning_rate": 2.8736051701873354e-05,
      "loss": 2.804,
      "step": 198041
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0785913467407227,
      "learning_rate": 2.8734304723892342e-05,
      "loss": 3.0527,
      "step": 198042
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5090785026550293,
      "learning_rate": 2.8732557796344824e-05,
      "loss": 2.9658,
      "step": 198043
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.455315351486206,
      "learning_rate": 2.8730810919231027e-05,
      "loss": 2.8668,
      "step": 198044
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.7850565910339355,
      "learning_rate": 2.872906409255129e-05,
      "loss": 3.0096,
      "step": 198045
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.677727699279785,
      "learning_rate": 2.8727317316305875e-05,
      "loss": 2.8934,
      "step": 198046
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2784204483032227,
      "learning_rate": 2.872557059049522e-05,
      "loss": 3.0098,
      "step": 198047
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7405805587768555,
      "learning_rate": 2.872382391511958e-05,
      "loss": 2.9682,
      "step": 198048
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1725082397460938,
      "learning_rate": 2.8722077290179337e-05,
      "loss": 2.7656,
      "step": 198049
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.3363449573516846,
      "learning_rate": 2.872033071567478e-05,
      "loss": 3.0545,
      "step": 198050
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.3136284351348877,
      "learning_rate": 2.871858419160624e-05,
      "loss": 2.7969,
      "step": 198051
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9894185066223145,
      "learning_rate": 2.8716837717973993e-05,
      "loss": 2.9218,
      "step": 198052
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.882540702819824,
      "learning_rate": 2.8715091294778437e-05,
      "loss": 2.9383,
      "step": 198053
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0913374423980713,
      "learning_rate": 2.871334492201983e-05,
      "loss": 2.9159,
      "step": 198054
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5803728103637695,
      "learning_rate": 2.871159859969858e-05,
      "loss": 3.1401,
      "step": 198055
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9641013145446777,
      "learning_rate": 2.8709852327814954e-05,
      "loss": 2.7785,
      "step": 198056
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.849007606506348,
      "learning_rate": 2.870810610636931e-05,
      "loss": 2.8584,
      "step": 198057
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1685917377471924,
      "learning_rate": 2.870635993536189e-05,
      "loss": 2.9243,
      "step": 198058
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5766117572784424,
      "learning_rate": 2.870461381479312e-05,
      "loss": 2.8519,
      "step": 198059
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.512166738510132,
      "learning_rate": 2.8702867744663238e-05,
      "loss": 2.7807,
      "step": 198060
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.407256603240967,
      "learning_rate": 2.8701121724972677e-05,
      "loss": 2.8083,
      "step": 198061
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.093400478363037,
      "learning_rate": 2.8699375755721664e-05,
      "loss": 2.8622,
      "step": 198062
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6923012733459473,
      "learning_rate": 2.869762983691054e-05,
      "loss": 3.0665,
      "step": 198063
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8024351596832275,
      "learning_rate": 2.8695883968539703e-05,
      "loss": 3.0399,
      "step": 198064
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.675877571105957,
      "learning_rate": 2.8694138150609415e-05,
      "loss": 2.7297,
      "step": 198065
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.5726981163024902,
      "learning_rate": 2.8692392383119946e-05,
      "loss": 2.9321,
      "step": 198066
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.557814121246338,
      "learning_rate": 2.8690646666071726e-05,
      "loss": 3.0688,
      "step": 198067
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.920440196990967,
      "learning_rate": 2.868890099946506e-05,
      "loss": 2.8855,
      "step": 198068
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8973705768585205,
      "learning_rate": 2.8687155383300175e-05,
      "loss": 2.9443,
      "step": 198069
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.705142021179199,
      "learning_rate": 2.8685409817577542e-05,
      "loss": 2.7392,
      "step": 198070
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.781745433807373,
      "learning_rate": 2.8683664302297395e-05,
      "loss": 2.7781,
      "step": 198071
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.150311231613159,
      "learning_rate": 2.8681918837460028e-05,
      "loss": 3.0823,
      "step": 198072
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.753904342651367,
      "learning_rate": 2.868017342306588e-05,
      "loss": 3.0673,
      "step": 198073
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7850430011749268,
      "learning_rate": 2.8678428059115212e-05,
      "loss": 3.0375,
      "step": 198074
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.730855703353882,
      "learning_rate": 2.867668274560826e-05,
      "loss": 2.9388,
      "step": 198075
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.884274959564209,
      "learning_rate": 2.8674937482545522e-05,
      "loss": 2.9687,
      "step": 198076
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9283206462860107,
      "learning_rate": 2.867319226992717e-05,
      "loss": 2.9697,
      "step": 198077
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.301642894744873,
      "learning_rate": 2.8671447107753633e-05,
      "loss": 2.8417,
      "step": 198078
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.209726810455322,
      "learning_rate": 2.8669701996025206e-05,
      "loss": 3.1099,
      "step": 198079
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1425700187683105,
      "learning_rate": 2.8667956934742196e-05,
      "loss": 2.9626,
      "step": 198080
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.752976655960083,
      "learning_rate": 2.8666211923904904e-05,
      "loss": 3.2067,
      "step": 198081
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.4757027626037598,
      "learning_rate": 2.866446696351372e-05,
      "loss": 2.5571,
      "step": 198082
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.075223207473755,
      "learning_rate": 2.8662722053568887e-05,
      "loss": 2.8272,
      "step": 198083
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.3328709602355957,
      "learning_rate": 2.8660977194070833e-05,
      "loss": 2.8156,
      "step": 198084
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.895871162414551,
      "learning_rate": 2.8659232385019825e-05,
      "loss": 2.9824,
      "step": 198085
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.765580654144287,
      "learning_rate": 2.8657487626416165e-05,
      "loss": 2.8797,
      "step": 198086
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.3536174297332764,
      "learning_rate": 2.865574291826015e-05,
      "loss": 2.9528,
      "step": 198087
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.861753225326538,
      "learning_rate": 2.8653998260552246e-05,
      "loss": 2.7182,
      "step": 198088
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.132965564727783,
      "learning_rate": 2.865225365329259e-05,
      "loss": 2.9353,
      "step": 198089
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.945463180541992,
      "learning_rate": 2.865050909648168e-05,
      "loss": 3.021,
      "step": 198090
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.3760337829589844,
      "learning_rate": 2.864876459011971e-05,
      "loss": 3.1047,
      "step": 198091
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.81658673286438,
      "learning_rate": 2.8647020134207156e-05,
      "loss": 3.0772,
      "step": 198092
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.759334087371826,
      "learning_rate": 2.8645275728744112e-05,
      "loss": 2.8919,
      "step": 198093
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8206570148468018,
      "learning_rate": 2.864353137373111e-05,
      "loss": 3.0617,
      "step": 198094
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0362348556518555,
      "learning_rate": 2.8641787069168353e-05,
      "loss": 3.0692,
      "step": 198095
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.56050443649292,
      "learning_rate": 2.8640042815056242e-05,
      "loss": 2.8371,
      "step": 198096
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6402170658111572,
      "learning_rate": 2.863829861139504e-05,
      "loss": 2.8815,
      "step": 198097
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.4703400135040283,
      "learning_rate": 2.8636554458185214e-05,
      "loss": 2.8524,
      "step": 198098
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.990427255630493,
      "learning_rate": 2.863481035542683e-05,
      "loss": 2.9735,
      "step": 198099
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0297956466674805,
      "learning_rate": 2.8633066303120456e-05,
      "loss": 3.0471,
      "step": 198100
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0676052570343018,
      "learning_rate": 2.8631322301266223e-05,
      "loss": 2.8255,
      "step": 198101
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.26792311668396,
      "learning_rate": 2.8629578349864636e-05,
      "loss": 3.1326,
      "step": 198102
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.26310658454895,
      "learning_rate": 2.8627834448915855e-05,
      "loss": 3.0234,
      "step": 198103
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.4636943340301514,
      "learning_rate": 2.862609059842038e-05,
      "loss": 2.8805,
      "step": 198104
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.962644100189209,
      "learning_rate": 2.862434679837835e-05,
      "loss": 3.0147,
      "step": 198105
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.915151357650757,
      "learning_rate": 2.8622603048790227e-05,
      "loss": 3.2141,
      "step": 198106
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.315417766571045,
      "learning_rate": 2.8620859349656244e-05,
      "loss": 2.8123,
      "step": 198107
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.647190570831299,
      "learning_rate": 2.8619115700976803e-05,
      "loss": 3.0105,
      "step": 198108
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.620515823364258,
      "learning_rate": 2.8617372102752135e-05,
      "loss": 2.9337,
      "step": 198109
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.122643232345581,
      "learning_rate": 2.8615628554982673e-05,
      "loss": 2.9499,
      "step": 198110
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0050594806671143,
      "learning_rate": 2.8613885057668716e-05,
      "loss": 3.0579,
      "step": 198111
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.723337173461914,
      "learning_rate": 2.8612141610810534e-05,
      "loss": 3.063,
      "step": 198112
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.26210355758667,
      "learning_rate": 2.8610398214408424e-05,
      "loss": 2.7438,
      "step": 198113
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7918589115142822,
      "learning_rate": 2.860865486846282e-05,
      "loss": 3.0363,
      "step": 198114
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.200579881668091,
      "learning_rate": 2.8606911572973955e-05,
      "loss": 2.746,
      "step": 198115
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.365699529647827,
      "learning_rate": 2.860516832794223e-05,
      "loss": 3.1497,
      "step": 198116
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6873748302459717,
      "learning_rate": 2.860342513336791e-05,
      "loss": 3.0229,
      "step": 198117
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7518467903137207,
      "learning_rate": 2.8601681989251357e-05,
      "loss": 2.8013,
      "step": 198118
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0760698318481445,
      "learning_rate": 2.8599938895592812e-05,
      "loss": 2.9542,
      "step": 198119
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5865046977996826,
      "learning_rate": 2.8598195852392704e-05,
      "loss": 2.7516,
      "step": 198120
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0248098373413086,
      "learning_rate": 2.8596452859651297e-05,
      "loss": 2.8808,
      "step": 198121
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.9770748615264893,
      "learning_rate": 2.8594709917368964e-05,
      "loss": 3.1225,
      "step": 198122
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.741936445236206,
      "learning_rate": 2.8592967025545997e-05,
      "loss": 3.1493,
      "step": 198123
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.6194074153900146,
      "learning_rate": 2.8591224184182704e-05,
      "loss": 2.8231,
      "step": 198124
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.286178112030029,
      "learning_rate": 2.858948139327938e-05,
      "loss": 3.0907,
      "step": 198125
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.005995273590088,
      "learning_rate": 2.8587738652836456e-05,
      "loss": 2.9577,
      "step": 198126
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.734027624130249,
      "learning_rate": 2.8585995962854135e-05,
      "loss": 2.8028,
      "step": 198127
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.92641544342041,
      "learning_rate": 2.8584253323332884e-05,
      "loss": 2.8147,
      "step": 198128
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1312265396118164,
      "learning_rate": 2.85825107342729e-05,
      "loss": 2.8518,
      "step": 198129
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.4697701930999756,
      "learning_rate": 2.8580768195674587e-05,
      "loss": 3.1581,
      "step": 198130
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.766012668609619,
      "learning_rate": 2.857902570753817e-05,
      "loss": 2.7647,
      "step": 198131
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.648716449737549,
      "learning_rate": 2.857728326986406e-05,
      "loss": 2.9568,
      "step": 198132
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8656325340270996,
      "learning_rate": 2.8575540882652547e-05,
      "loss": 2.9078,
      "step": 198133
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.575111150741577,
      "learning_rate": 2.8573798545904002e-05,
      "loss": 3.0131,
      "step": 198134
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7612366676330566,
      "learning_rate": 2.8572056259618692e-05,
      "loss": 3.0846,
      "step": 198135
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.5715389251708984,
      "learning_rate": 2.8570314023796982e-05,
      "loss": 3.2567,
      "step": 198136
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.880850076675415,
      "learning_rate": 2.8568571838439104e-05,
      "loss": 3.0205,
      "step": 198137
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.792879343032837,
      "learning_rate": 2.8566829703545524e-05,
      "loss": 3.0682,
      "step": 198138
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.154989242553711,
      "learning_rate": 2.856508761911641e-05,
      "loss": 2.8981,
      "step": 198139
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.102526903152466,
      "learning_rate": 2.8563345585152265e-05,
      "loss": 2.8323,
      "step": 198140
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2002761363983154,
      "learning_rate": 2.8561603601653282e-05,
      "loss": 2.7937,
      "step": 198141
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.848909854888916,
      "learning_rate": 2.8559861668619864e-05,
      "loss": 2.7546,
      "step": 198142
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1998302936553955,
      "learning_rate": 2.855811978605218e-05,
      "loss": 3.1057,
      "step": 198143
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8993735313415527,
      "learning_rate": 2.855637795395076e-05,
      "loss": 3.1437,
      "step": 198144
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8006319999694824,
      "learning_rate": 2.855463617231577e-05,
      "loss": 2.9863,
      "step": 198145
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.556281566619873,
      "learning_rate": 2.8552894441147643e-05,
      "loss": 3.0213,
      "step": 198146
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.580045461654663,
      "learning_rate": 2.8551152760446682e-05,
      "loss": 2.9639,
      "step": 198147
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0056042671203613,
      "learning_rate": 2.854941113021315e-05,
      "loss": 2.8535,
      "step": 198148
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.357548475265503,
      "learning_rate": 2.8547669550447383e-05,
      "loss": 2.8298,
      "step": 198149
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.054262638092041,
      "learning_rate": 2.8545928021149776e-05,
      "loss": 2.9874,
      "step": 198150
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.481386661529541,
      "learning_rate": 2.8544186542320534e-05,
      "loss": 2.7247,
      "step": 198151
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2654261589050293,
      "learning_rate": 2.854244511396012e-05,
      "loss": 2.9847,
      "step": 198152
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.888822317123413,
      "learning_rate": 2.854070373606877e-05,
      "loss": 2.9756,
      "step": 198153
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.508047580718994,
      "learning_rate": 2.853896240864678e-05,
      "loss": 3.0262,
      "step": 198154
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1005492210388184,
      "learning_rate": 2.8537221131694588e-05,
      "loss": 3.2213,
      "step": 198155
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.909640312194824,
      "learning_rate": 2.8535479905212456e-05,
      "loss": 3.0871,
      "step": 198156
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8648993968963623,
      "learning_rate": 2.853373872920065e-05,
      "loss": 2.5745,
      "step": 198157
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.94990873336792,
      "learning_rate": 2.8531997603659574e-05,
      "loss": 2.8715,
      "step": 198158
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.6898226737976074,
      "learning_rate": 2.8530256528589555e-05,
      "loss": 2.9002,
      "step": 198159
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.53682541847229,
      "learning_rate": 2.85285155039908e-05,
      "loss": 2.7921,
      "step": 198160
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7034494876861572,
      "learning_rate": 2.8526774529863806e-05,
      "loss": 3.2118,
      "step": 198161
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0273008346557617,
      "learning_rate": 2.8525033606208737e-05,
      "loss": 2.9292,
      "step": 198162
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.006601095199585,
      "learning_rate": 2.8523292733026026e-05,
      "loss": 2.9842,
      "step": 198163
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.602344274520874,
      "learning_rate": 2.8521551910315976e-05,
      "loss": 2.7666,
      "step": 198164
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6588423252105713,
      "learning_rate": 2.8519811138078918e-05,
      "loss": 2.885,
      "step": 198165
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.6577529907226562,
      "learning_rate": 2.8518070416315052e-05,
      "loss": 2.9735,
      "step": 198166
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.3813552856445312,
      "learning_rate": 2.851632974502488e-05,
      "loss": 2.9537,
      "step": 198167
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.80318546295166,
      "learning_rate": 2.85145891242086e-05,
      "loss": 3.1845,
      "step": 198168
    },
    {
      "epoch": 2.58,
      "grad_norm": 5.593231678009033,
      "learning_rate": 2.8512848553866642e-05,
      "loss": 2.7294,
      "step": 198169
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.531038761138916,
      "learning_rate": 2.8511108033999243e-05,
      "loss": 2.9162,
      "step": 198170
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.69572377204895,
      "learning_rate": 2.8509367564606767e-05,
      "loss": 2.8928,
      "step": 198171
    },
    {
      "epoch": 2.58,
      "grad_norm": 5.217345237731934,
      "learning_rate": 2.8507627145689482e-05,
      "loss": 2.9678,
      "step": 198172
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.576646327972412,
      "learning_rate": 2.850588677724779e-05,
      "loss": 2.9563,
      "step": 198173
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.496567249298096,
      "learning_rate": 2.8504146459281918e-05,
      "loss": 2.8563,
      "step": 198174
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.688977003097534,
      "learning_rate": 2.850240619179234e-05,
      "loss": 3.1267,
      "step": 198175
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.8132760524749756,
      "learning_rate": 2.850066597477918e-05,
      "loss": 3.2019,
      "step": 198176
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.499373197555542,
      "learning_rate": 2.849892580824298e-05,
      "loss": 2.909,
      "step": 198177
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.698869228363037,
      "learning_rate": 2.849718569218393e-05,
      "loss": 2.7992,
      "step": 198178
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.229637145996094,
      "learning_rate": 2.8495445626602375e-05,
      "loss": 2.9465,
      "step": 198179
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0293898582458496,
      "learning_rate": 2.8493705611498575e-05,
      "loss": 3.1045,
      "step": 198180
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.5764591693878174,
      "learning_rate": 2.8491965646872996e-05,
      "loss": 2.8395,
      "step": 198181
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.103474140167236,
      "learning_rate": 2.8490225732725836e-05,
      "loss": 3.0405,
      "step": 198182
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9576752185821533,
      "learning_rate": 2.848848586905753e-05,
      "loss": 2.8544,
      "step": 198183
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.614500522613525,
      "learning_rate": 2.8486746055868313e-05,
      "loss": 2.943,
      "step": 198184
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1753721237182617,
      "learning_rate": 2.8485006293158553e-05,
      "loss": 2.7894,
      "step": 198185
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2314445972442627,
      "learning_rate": 2.8483266580928476e-05,
      "loss": 3.0022,
      "step": 198186
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.928394317626953,
      "learning_rate": 2.8481526919178554e-05,
      "loss": 2.5869,
      "step": 198187
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.714716911315918,
      "learning_rate": 2.8479787307908984e-05,
      "loss": 2.8929,
      "step": 198188
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9418838024139404,
      "learning_rate": 2.8478047747120237e-05,
      "loss": 3.0554,
      "step": 198189
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.20700740814209,
      "learning_rate": 2.8476308236812505e-05,
      "loss": 2.928,
      "step": 198190
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.4263010025024414,
      "learning_rate": 2.847456877698616e-05,
      "loss": 3.0835,
      "step": 198191
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.6322431564331055,
      "learning_rate": 2.847282936764147e-05,
      "loss": 2.9317,
      "step": 198192
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.206038236618042,
      "learning_rate": 2.847109000877886e-05,
      "loss": 3.0199,
      "step": 198193
    },
    {
      "epoch": 2.58,
      "grad_norm": 5.231323719024658,
      "learning_rate": 2.8469350700398543e-05,
      "loss": 3.102,
      "step": 198194
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.068021297454834,
      "learning_rate": 2.846761144250097e-05,
      "loss": 2.7423,
      "step": 198195
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.174321413040161,
      "learning_rate": 2.8465872235086383e-05,
      "loss": 2.8858,
      "step": 198196
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1170384883880615,
      "learning_rate": 2.8464133078155084e-05,
      "loss": 2.9112,
      "step": 198197
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.8799946308135986,
      "learning_rate": 2.846239397170743e-05,
      "loss": 2.8373,
      "step": 198198
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0705747604370117,
      "learning_rate": 2.8460654915743763e-05,
      "loss": 2.9097,
      "step": 198199
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.22821307182312,
      "learning_rate": 2.8458915910264313e-05,
      "loss": 2.7906,
      "step": 198200
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0461301803588867,
      "learning_rate": 2.845717695526958e-05,
      "loss": 2.963,
      "step": 198201
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.8983778953552246,
      "learning_rate": 2.845543805075976e-05,
      "loss": 2.9038,
      "step": 198202
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.787841796875,
      "learning_rate": 2.845369919673519e-05,
      "loss": 3.0767,
      "step": 198203
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.88464093208313,
      "learning_rate": 2.8451960393196172e-05,
      "loss": 3.0914,
      "step": 198204
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7750065326690674,
      "learning_rate": 2.8450221640143068e-05,
      "loss": 2.737,
      "step": 198205
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.933828115463257,
      "learning_rate": 2.844848293757618e-05,
      "loss": 2.7118,
      "step": 198206
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0592753887176514,
      "learning_rate": 2.8446744285495904e-05,
      "loss": 3.1171,
      "step": 198207
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.7158541679382324,
      "learning_rate": 2.844500568390248e-05,
      "loss": 2.7676,
      "step": 198208
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9477524757385254,
      "learning_rate": 2.8443267132796266e-05,
      "loss": 2.7907,
      "step": 198209
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.363652229309082,
      "learning_rate": 2.8441528632177536e-05,
      "loss": 2.7855,
      "step": 198210
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9359099864959717,
      "learning_rate": 2.8439790182046684e-05,
      "loss": 3.0079,
      "step": 198211
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.410001039505005,
      "learning_rate": 2.8438051782403948e-05,
      "loss": 2.7873,
      "step": 198212
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.4924237728118896,
      "learning_rate": 2.843631343324976e-05,
      "loss": 3.171,
      "step": 198213
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.494999647140503,
      "learning_rate": 2.8434575134584413e-05,
      "loss": 2.7933,
      "step": 198214
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.046560049057007,
      "learning_rate": 2.8432836886408184e-05,
      "loss": 3.0901,
      "step": 198215
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2550759315490723,
      "learning_rate": 2.843109868872133e-05,
      "loss": 2.9431,
      "step": 198216
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5411932468414307,
      "learning_rate": 2.8429360541524358e-05,
      "loss": 2.774,
      "step": 198217
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9852850437164307,
      "learning_rate": 2.8427622444817433e-05,
      "loss": 2.8379,
      "step": 198218
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7125418186187744,
      "learning_rate": 2.8425884398600984e-05,
      "loss": 2.675,
      "step": 198219
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.261176109313965,
      "learning_rate": 2.8424146402875314e-05,
      "loss": 2.8368,
      "step": 198220
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.4172520637512207,
      "learning_rate": 2.842240845764069e-05,
      "loss": 3.0044,
      "step": 198221
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.848818063735962,
      "learning_rate": 2.842067056289744e-05,
      "loss": 3.0937,
      "step": 198222
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.110283851623535,
      "learning_rate": 2.8418932718645938e-05,
      "loss": 2.8671,
      "step": 198223
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5321033000946045,
      "learning_rate": 2.8417194924886442e-05,
      "loss": 3.2927,
      "step": 198224
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.886590003967285,
      "learning_rate": 2.841545718161936e-05,
      "loss": 3.1025,
      "step": 198225
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.562323570251465,
      "learning_rate": 2.8413719488844988e-05,
      "loss": 3.0408,
      "step": 198226
    },
    {
      "epoch": 2.58,
      "grad_norm": 5.039204120635986,
      "learning_rate": 2.8411981846563626e-05,
      "loss": 2.878,
      "step": 198227
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7719643115997314,
      "learning_rate": 2.8410244254775572e-05,
      "loss": 2.8112,
      "step": 198228
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8218204975128174,
      "learning_rate": 2.8408506713481195e-05,
      "loss": 2.9793,
      "step": 198229
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7264389991760254,
      "learning_rate": 2.840676922268076e-05,
      "loss": 2.9146,
      "step": 198230
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6673953533172607,
      "learning_rate": 2.84050317823747e-05,
      "loss": 3.1311,
      "step": 198231
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.5963566303253174,
      "learning_rate": 2.840329439256328e-05,
      "loss": 2.7324,
      "step": 198232
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.177518606185913,
      "learning_rate": 2.8401557053246803e-05,
      "loss": 2.8999,
      "step": 198233
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.7780892848968506,
      "learning_rate": 2.8399819764425535e-05,
      "loss": 2.8305,
      "step": 198234
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.507913589477539,
      "learning_rate": 2.8398082526099942e-05,
      "loss": 3.1224,
      "step": 198235
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.182762622833252,
      "learning_rate": 2.8396345338270188e-05,
      "loss": 2.8107,
      "step": 198236
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8145909309387207,
      "learning_rate": 2.8394608200936774e-05,
      "loss": 2.9528,
      "step": 198237
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.588672161102295,
      "learning_rate": 2.8392871114099903e-05,
      "loss": 2.5275,
      "step": 198238
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.533501386642456,
      "learning_rate": 2.839113407775987e-05,
      "loss": 3.0811,
      "step": 198239
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.088755130767822,
      "learning_rate": 2.8389397091917144e-05,
      "loss": 2.7371,
      "step": 198240
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1314666271209717,
      "learning_rate": 2.8387660156571923e-05,
      "loss": 3.001,
      "step": 198241
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5332679748535156,
      "learning_rate": 2.838592327172451e-05,
      "loss": 2.8594,
      "step": 198242
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.917126178741455,
      "learning_rate": 2.8384186437375332e-05,
      "loss": 2.8764,
      "step": 198243
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.789454460144043,
      "learning_rate": 2.8382449653524596e-05,
      "loss": 2.9277,
      "step": 198244
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.8826048374176025,
      "learning_rate": 2.8380712920172765e-05,
      "loss": 3.0813,
      "step": 198245
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7982821464538574,
      "learning_rate": 2.8378976237320073e-05,
      "loss": 3.1406,
      "step": 198246
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.791558027267456,
      "learning_rate": 2.8377239604966882e-05,
      "loss": 2.7552,
      "step": 198247
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9393932819366455,
      "learning_rate": 2.83755030231134e-05,
      "loss": 2.9228,
      "step": 198248
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.243372917175293,
      "learning_rate": 2.837376649176012e-05,
      "loss": 2.8029,
      "step": 198249
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.181572437286377,
      "learning_rate": 2.8372030010907244e-05,
      "loss": 2.6037,
      "step": 198250
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.801978349685669,
      "learning_rate": 2.8370293580555138e-05,
      "loss": 2.9588,
      "step": 198251
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.305648326873779,
      "learning_rate": 2.8368557200704166e-05,
      "loss": 2.8667,
      "step": 198252
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7363626956939697,
      "learning_rate": 2.8366820871354535e-05,
      "loss": 2.9023,
      "step": 198253
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9748353958129883,
      "learning_rate": 2.8365084592506702e-05,
      "loss": 3.2093,
      "step": 198254
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.7532105445861816,
      "learning_rate": 2.836334836416091e-05,
      "loss": 3.1145,
      "step": 198255
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.615166664123535,
      "learning_rate": 2.836161218631745e-05,
      "loss": 3.0456,
      "step": 198256
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1486623287200928,
      "learning_rate": 2.835987605897676e-05,
      "loss": 2.9576,
      "step": 198257
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.946829080581665,
      "learning_rate": 2.8358139982139106e-05,
      "loss": 2.8383,
      "step": 198258
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7547354698181152,
      "learning_rate": 2.8356403955804717e-05,
      "loss": 2.7107,
      "step": 198259
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0381555557250977,
      "learning_rate": 2.8354667979974065e-05,
      "loss": 3.0955,
      "step": 198260
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.4045662879943848,
      "learning_rate": 2.8352932054647383e-05,
      "loss": 2.9218,
      "step": 198261
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.773750066757202,
      "learning_rate": 2.835119617982503e-05,
      "loss": 2.8313,
      "step": 198262
    },
    {
      "epoch": 2.58,
      "grad_norm": 5.912798881530762,
      "learning_rate": 2.834946035550735e-05,
      "loss": 2.6462,
      "step": 198263
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2714617252349854,
      "learning_rate": 2.8347724581694596e-05,
      "loss": 3.0508,
      "step": 198264
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.651821136474609,
      "learning_rate": 2.8345988858387115e-05,
      "loss": 3.1294,
      "step": 198265
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.659249782562256,
      "learning_rate": 2.8344253185585265e-05,
      "loss": 2.9713,
      "step": 198266
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1467864513397217,
      "learning_rate": 2.8342517563289312e-05,
      "loss": 3.1141,
      "step": 198267
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2970328330993652,
      "learning_rate": 2.834078199149966e-05,
      "loss": 2.7766,
      "step": 198268
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5974607467651367,
      "learning_rate": 2.8339046470216575e-05,
      "loss": 3.0939,
      "step": 198269
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.3096694946289062,
      "learning_rate": 2.833731099944042e-05,
      "loss": 3.0443,
      "step": 198270
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0173046588897705,
      "learning_rate": 2.83355755791714e-05,
      "loss": 2.9127,
      "step": 198271
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.3016645908355713,
      "learning_rate": 2.8333840209410006e-05,
      "loss": 3.0023,
      "step": 198272
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.011979103088379,
      "learning_rate": 2.833210489015638e-05,
      "loss": 2.943,
      "step": 198273
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.8224477767944336,
      "learning_rate": 2.8330369621411052e-05,
      "loss": 3.0003,
      "step": 198274
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9302120208740234,
      "learning_rate": 2.832863440317419e-05,
      "loss": 3.2516,
      "step": 198275
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.69077467918396,
      "learning_rate": 2.8326899235446186e-05,
      "loss": 2.965,
      "step": 198276
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.110386610031128,
      "learning_rate": 2.8325164118227283e-05,
      "loss": 2.8348,
      "step": 198277
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.29579758644104,
      "learning_rate": 2.8323429051517943e-05,
      "loss": 2.9855,
      "step": 198278
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.367790699005127,
      "learning_rate": 2.83216940353183e-05,
      "loss": 2.9219,
      "step": 198279
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.184231996536255,
      "learning_rate": 2.8319959069628884e-05,
      "loss": 2.9637,
      "step": 198280
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.464643955230713,
      "learning_rate": 2.8318224154449897e-05,
      "loss": 2.8975,
      "step": 198281
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.826807975769043,
      "learning_rate": 2.8316489289781673e-05,
      "loss": 2.6786,
      "step": 198282
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8713881969451904,
      "learning_rate": 2.831475447562448e-05,
      "loss": 2.9402,
      "step": 198283
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.4034085273742676,
      "learning_rate": 2.831301971197878e-05,
      "loss": 2.7967,
      "step": 198284
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.059899091720581,
      "learning_rate": 2.8311284998844742e-05,
      "loss": 2.792,
      "step": 198285
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.7013638019561768,
      "learning_rate": 2.830955033622283e-05,
      "loss": 3.0286,
      "step": 198286
    },
    {
      "epoch": 2.58,
      "grad_norm": 5.062912940979004,
      "learning_rate": 2.8307815724113313e-05,
      "loss": 3.1694,
      "step": 198287
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0231685638427734,
      "learning_rate": 2.8306081162516493e-05,
      "loss": 2.9255,
      "step": 198288
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.793215274810791,
      "learning_rate": 2.830434665143263e-05,
      "loss": 2.7239,
      "step": 198289
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.9480068683624268,
      "learning_rate": 2.8302612190862193e-05,
      "loss": 3.1393,
      "step": 198290
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.585505485534668,
      "learning_rate": 2.8300877780805355e-05,
      "loss": 3.194,
      "step": 198291
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.6074321269989014,
      "learning_rate": 2.8299143421262572e-05,
      "loss": 2.9699,
      "step": 198292
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9137797355651855,
      "learning_rate": 2.8297409112234114e-05,
      "loss": 2.8843,
      "step": 198293
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.384016752243042,
      "learning_rate": 2.8295674853720286e-05,
      "loss": 3.0856,
      "step": 198294
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9318037033081055,
      "learning_rate": 2.829394064572138e-05,
      "loss": 2.8273,
      "step": 198295
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.004800796508789,
      "learning_rate": 2.8292206488237834e-05,
      "loss": 3.2166,
      "step": 198296
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9283504486083984,
      "learning_rate": 2.8290472381269814e-05,
      "loss": 2.9874,
      "step": 198297
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5463969707489014,
      "learning_rate": 2.8288738324817782e-05,
      "loss": 3.0598,
      "step": 198298
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.360529661178589,
      "learning_rate": 2.828700431888198e-05,
      "loss": 3.0083,
      "step": 198299
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6046953201293945,
      "learning_rate": 2.8285270363462796e-05,
      "loss": 3.0947,
      "step": 198300
    },
    {
      "epoch": 2.58,
      "grad_norm": 5.604679107666016,
      "learning_rate": 2.8283536458560408e-05,
      "loss": 3.0061,
      "step": 198301
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.3359503746032715,
      "learning_rate": 2.828180260417534e-05,
      "loss": 2.9982,
      "step": 198302
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.121677398681641,
      "learning_rate": 2.8280068800307733e-05,
      "loss": 2.9393,
      "step": 198303
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.3983561992645264,
      "learning_rate": 2.8278335046958045e-05,
      "loss": 2.9181,
      "step": 198304
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.520742177963257,
      "learning_rate": 2.8276601344126548e-05,
      "loss": 2.8931,
      "step": 198305
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8200204372406006,
      "learning_rate": 2.8274867691813573e-05,
      "loss": 2.7845,
      "step": 198306
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.048163890838623,
      "learning_rate": 2.8273134090019357e-05,
      "loss": 3.0685,
      "step": 198307
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.368570566177368,
      "learning_rate": 2.827140053874436e-05,
      "loss": 3.017,
      "step": 198308
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.78657865524292,
      "learning_rate": 2.8269667037988785e-05,
      "loss": 2.9037,
      "step": 198309
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5007808208465576,
      "learning_rate": 2.8267933587753066e-05,
      "loss": 2.9748,
      "step": 198310
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.127206325531006,
      "learning_rate": 2.82662001880374e-05,
      "loss": 2.8217,
      "step": 198311
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.4461703300476074,
      "learning_rate": 2.826446683884229e-05,
      "loss": 3.1458,
      "step": 198312
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0514416694641113,
      "learning_rate": 2.8262733540167835e-05,
      "loss": 3.0954,
      "step": 198313
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.5508999824523926,
      "learning_rate": 2.8261000292014537e-05,
      "loss": 2.7719,
      "step": 198314
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9175431728363037,
      "learning_rate": 2.8259267094382552e-05,
      "loss": 2.9187,
      "step": 198315
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0421738624572754,
      "learning_rate": 2.8257533947272394e-05,
      "loss": 2.7331,
      "step": 198316
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.981825828552246,
      "learning_rate": 2.825580085068422e-05,
      "loss": 3.0935,
      "step": 198317
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.891059398651123,
      "learning_rate": 2.825406780461853e-05,
      "loss": 2.8898,
      "step": 198318
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9386987686157227,
      "learning_rate": 2.825233480907543e-05,
      "loss": 2.6963,
      "step": 198319
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1092400550842285,
      "learning_rate": 2.8250601864055412e-05,
      "loss": 3.2284,
      "step": 198320
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.4558322429656982,
      "learning_rate": 2.824886896955868e-05,
      "loss": 2.8359,
      "step": 198321
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.342435121536255,
      "learning_rate": 2.8247136125585666e-05,
      "loss": 2.9417,
      "step": 198322
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1312642097473145,
      "learning_rate": 2.8245403332136574e-05,
      "loss": 3.0017,
      "step": 198323
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6980669498443604,
      "learning_rate": 2.8243670589211863e-05,
      "loss": 3.0648,
      "step": 198324
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.074432373046875,
      "learning_rate": 2.8241937896811772e-05,
      "loss": 2.8543,
      "step": 198325
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8173811435699463,
      "learning_rate": 2.8240205254936664e-05,
      "loss": 3.0085,
      "step": 198326
    },
    {
      "epoch": 2.58,
      "grad_norm": 5.604573726654053,
      "learning_rate": 2.8238472663586744e-05,
      "loss": 3.1433,
      "step": 198327
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.661877393722534,
      "learning_rate": 2.8236740122762504e-05,
      "loss": 2.8038,
      "step": 198328
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.536550998687744,
      "learning_rate": 2.8235007632464078e-05,
      "loss": 3.0143,
      "step": 198329
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9060850143432617,
      "learning_rate": 2.8233275192692007e-05,
      "loss": 2.8372,
      "step": 198330
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.4512431621551514,
      "learning_rate": 2.823154280344645e-05,
      "loss": 3.006,
      "step": 198331
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.842581272125244,
      "learning_rate": 2.8229810464727807e-05,
      "loss": 2.891,
      "step": 198332
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7306699752807617,
      "learning_rate": 2.8228078176536318e-05,
      "loss": 2.8731,
      "step": 198333
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0265042781829834,
      "learning_rate": 2.8226345938872407e-05,
      "loss": 2.9901,
      "step": 198334
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.042147636413574,
      "learning_rate": 2.8224613751736313e-05,
      "loss": 2.776,
      "step": 198335
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.942291498184204,
      "learning_rate": 2.8222881615128433e-05,
      "loss": 3.125,
      "step": 198336
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6986260414123535,
      "learning_rate": 2.8221149529049038e-05,
      "loss": 2.8752,
      "step": 198337
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.497894763946533,
      "learning_rate": 2.821941749349842e-05,
      "loss": 3.1384,
      "step": 198338
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2649199962615967,
      "learning_rate": 2.8217685508476983e-05,
      "loss": 2.7603,
      "step": 198339
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.647617816925049,
      "learning_rate": 2.821595357398503e-05,
      "loss": 2.8757,
      "step": 198340
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.808635711669922,
      "learning_rate": 2.821422169002282e-05,
      "loss": 3.2077,
      "step": 198341
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.819399118423462,
      "learning_rate": 2.8212489856590726e-05,
      "loss": 2.6748,
      "step": 198342
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6957149505615234,
      "learning_rate": 2.8210758073689077e-05,
      "loss": 3.1366,
      "step": 198343
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.461878538131714,
      "learning_rate": 2.820902634131814e-05,
      "loss": 3.1142,
      "step": 198344
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8072211742401123,
      "learning_rate": 2.8207294659478318e-05,
      "loss": 2.7305,
      "step": 198345
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7240874767303467,
      "learning_rate": 2.820556302816984e-05,
      "loss": 3.1354,
      "step": 198346
    },
    {
      "epoch": 2.58,
      "grad_norm": 5.042270660400391,
      "learning_rate": 2.8203831447393144e-05,
      "loss": 2.8749,
      "step": 198347
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8026275634765625,
      "learning_rate": 2.8202099917148457e-05,
      "loss": 2.6341,
      "step": 198348
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.526270866394043,
      "learning_rate": 2.820036843743615e-05,
      "loss": 2.8069,
      "step": 198349
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.939779281616211,
      "learning_rate": 2.8198637008256453e-05,
      "loss": 2.9187,
      "step": 198350
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.04970645904541,
      "learning_rate": 2.8196905629609833e-05,
      "loss": 3.2046,
      "step": 198351
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7754440307617188,
      "learning_rate": 2.819517430149649e-05,
      "loss": 2.9534,
      "step": 198352
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.9620425701141357,
      "learning_rate": 2.819344302391683e-05,
      "loss": 2.7708,
      "step": 198353
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.9540820121765137,
      "learning_rate": 2.8191711796871175e-05,
      "loss": 2.8241,
      "step": 198354
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.015223503112793,
      "learning_rate": 2.8189980620359766e-05,
      "loss": 3.3128,
      "step": 198355
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.563171863555908,
      "learning_rate": 2.818824949438293e-05,
      "loss": 3.0748,
      "step": 198356
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.066802978515625,
      "learning_rate": 2.8186518418941108e-05,
      "loss": 3.0683,
      "step": 198357
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.895551919937134,
      "learning_rate": 2.8184787394034458e-05,
      "loss": 3.194,
      "step": 198358
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.786862850189209,
      "learning_rate": 2.8183056419663454e-05,
      "loss": 2.8552,
      "step": 198359
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2411961555480957,
      "learning_rate": 2.8181325495828356e-05,
      "loss": 3.0657,
      "step": 198360
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.488086462020874,
      "learning_rate": 2.8179594622529467e-05,
      "loss": 2.7956,
      "step": 198361
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.297142744064331,
      "learning_rate": 2.8177863799767088e-05,
      "loss": 2.893,
      "step": 198362
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7899651527404785,
      "learning_rate": 2.8176133027541615e-05,
      "loss": 2.8218,
      "step": 198363
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.6526787281036377,
      "learning_rate": 2.8174402305853282e-05,
      "loss": 2.7669,
      "step": 198364
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1788995265960693,
      "learning_rate": 2.8172671634702492e-05,
      "loss": 2.8863,
      "step": 198365
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.761554002761841,
      "learning_rate": 2.8170941014089543e-05,
      "loss": 2.8662,
      "step": 198366
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.024829149246216,
      "learning_rate": 2.8169210444014768e-05,
      "loss": 2.8391,
      "step": 198367
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.3996920585632324,
      "learning_rate": 2.8167479924478396e-05,
      "loss": 2.6939,
      "step": 198368
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.31172251701355,
      "learning_rate": 2.816574945548087e-05,
      "loss": 2.8586,
      "step": 198369
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.167998790740967,
      "learning_rate": 2.8164019037022413e-05,
      "loss": 3.0292,
      "step": 198370
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.045396566390991,
      "learning_rate": 2.816228866910346e-05,
      "loss": 2.9561,
      "step": 198371
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8283181190490723,
      "learning_rate": 2.8160558351724283e-05,
      "loss": 2.865,
      "step": 198372
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.278508186340332,
      "learning_rate": 2.8158828084885175e-05,
      "loss": 2.9947,
      "step": 198373
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.835435152053833,
      "learning_rate": 2.815709786858641e-05,
      "loss": 3.0532,
      "step": 198374
    },
    {
      "epoch": 2.58,
      "grad_norm": 6.326016426086426,
      "learning_rate": 2.815536770282841e-05,
      "loss": 3.008,
      "step": 198375
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.442352771759033,
      "learning_rate": 2.8153637587611423e-05,
      "loss": 2.834,
      "step": 198376
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0716238021850586,
      "learning_rate": 2.8151907522935868e-05,
      "loss": 2.7992,
      "step": 198377
    },
    {
      "epoch": 2.58,
      "grad_norm": 5.328118801116943,
      "learning_rate": 2.8150177508801952e-05,
      "loss": 2.7184,
      "step": 198378
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.198195457458496,
      "learning_rate": 2.814844754521014e-05,
      "loss": 3.053,
      "step": 198379
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7221977710723877,
      "learning_rate": 2.8146717632160566e-05,
      "loss": 2.7416,
      "step": 198380
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.724515199661255,
      "learning_rate": 2.8144987769653692e-05,
      "loss": 3.0183,
      "step": 198381
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.5234732627868652,
      "learning_rate": 2.8143257957689756e-05,
      "loss": 2.7574,
      "step": 198382
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1569316387176514,
      "learning_rate": 2.814152819626919e-05,
      "loss": 2.8501,
      "step": 198383
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.334120512008667,
      "learning_rate": 2.813979848539216e-05,
      "loss": 2.9899,
      "step": 198384
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9981613159179688,
      "learning_rate": 2.81380688250592e-05,
      "loss": 2.9288,
      "step": 198385
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2440500259399414,
      "learning_rate": 2.8136339215270375e-05,
      "loss": 2.8083,
      "step": 198386
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0417537689208984,
      "learning_rate": 2.813460965602622e-05,
      "loss": 2.8673,
      "step": 198387
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0198962688446045,
      "learning_rate": 2.81328801473269e-05,
      "loss": 2.7599,
      "step": 198388
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.722790479660034,
      "learning_rate": 2.8131150689172876e-05,
      "loss": 2.7858,
      "step": 198389
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.4078078269958496,
      "learning_rate": 2.8129421281564326e-05,
      "loss": 2.7391,
      "step": 198390
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7050302028656006,
      "learning_rate": 2.812769192450177e-05,
      "loss": 2.6381,
      "step": 198391
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.3858859539031982,
      "learning_rate": 2.812596261798532e-05,
      "loss": 3.1666,
      "step": 198392
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0490942001342773,
      "learning_rate": 2.81242333620154e-05,
      "loss": 2.8177,
      "step": 198393
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7160730361938477,
      "learning_rate": 2.8122504156592283e-05,
      "loss": 2.7906,
      "step": 198394
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0628111362457275,
      "learning_rate": 2.81207750017164e-05,
      "loss": 3.2105,
      "step": 198395
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.844592571258545,
      "learning_rate": 2.8119045897387914e-05,
      "loss": 2.7757,
      "step": 198396
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7764651775360107,
      "learning_rate": 2.811731684360733e-05,
      "loss": 2.7546,
      "step": 198397
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9842445850372314,
      "learning_rate": 2.8115587840374777e-05,
      "loss": 3.0105,
      "step": 198398
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.102656841278076,
      "learning_rate": 2.8113858887690722e-05,
      "loss": 2.7662,
      "step": 198399
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.871701240539551,
      "learning_rate": 2.8112129985555398e-05,
      "loss": 3.0145,
      "step": 198400
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9899065494537354,
      "learning_rate": 2.8110401133969174e-05,
      "loss": 2.963,
      "step": 198401
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0209176540374756,
      "learning_rate": 2.810867233293235e-05,
      "loss": 2.9436,
      "step": 198402
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6349496841430664,
      "learning_rate": 2.8106943582445318e-05,
      "loss": 2.9893,
      "step": 198403
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.759690046310425,
      "learning_rate": 2.8105214882508254e-05,
      "loss": 2.9184,
      "step": 198404
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.72139573097229,
      "learning_rate": 2.810348623312162e-05,
      "loss": 3.0787,
      "step": 198405
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.399261951446533,
      "learning_rate": 2.8101757634285615e-05,
      "loss": 2.7771,
      "step": 198406
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7821459770202637,
      "learning_rate": 2.8100029086000707e-05,
      "loss": 2.8662,
      "step": 198407
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8991057872772217,
      "learning_rate": 2.8098300588267064e-05,
      "loss": 3.0265,
      "step": 198408
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.936023712158203,
      "learning_rate": 2.8096572141085116e-05,
      "loss": 2.9797,
      "step": 198409
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0728907585144043,
      "learning_rate": 2.8094843744455165e-05,
      "loss": 2.9294,
      "step": 198410
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1162779331207275,
      "learning_rate": 2.809311539837751e-05,
      "loss": 2.9271,
      "step": 198411
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6589155197143555,
      "learning_rate": 2.8091387102852447e-05,
      "loss": 2.8706,
      "step": 198412
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.795844078063965,
      "learning_rate": 2.8089658857880348e-05,
      "loss": 3.0278,
      "step": 198413
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.3607966899871826,
      "learning_rate": 2.8087930663461478e-05,
      "loss": 3.0875,
      "step": 198414
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.72902512550354,
      "learning_rate": 2.8086202519596235e-05,
      "loss": 2.6706,
      "step": 198415
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.513587713241577,
      "learning_rate": 2.808447442628492e-05,
      "loss": 2.8555,
      "step": 198416
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8493542671203613,
      "learning_rate": 2.8082746383527832e-05,
      "loss": 3.0959,
      "step": 198417
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.423499584197998,
      "learning_rate": 2.8081018391325204e-05,
      "loss": 2.7011,
      "step": 198418
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0186338424682617,
      "learning_rate": 2.8079290449677537e-05,
      "loss": 2.8286,
      "step": 198419
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7350950241088867,
      "learning_rate": 2.8077562558584998e-05,
      "loss": 3.2885,
      "step": 198420
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6093478202819824,
      "learning_rate": 2.807583471804805e-05,
      "loss": 2.81,
      "step": 198421
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8828024864196777,
      "learning_rate": 2.8074106928066898e-05,
      "loss": 2.7923,
      "step": 198422
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6473116874694824,
      "learning_rate": 2.807237918864187e-05,
      "loss": 2.9765,
      "step": 198423
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.5609097480773926,
      "learning_rate": 2.8070651499773367e-05,
      "loss": 2.8838,
      "step": 198424
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9776723384857178,
      "learning_rate": 2.806892386146169e-05,
      "loss": 2.9609,
      "step": 198425
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.982588768005371,
      "learning_rate": 2.806719627370704e-05,
      "loss": 3.105,
      "step": 198426
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7391045093536377,
      "learning_rate": 2.806546873650991e-05,
      "loss": 2.802,
      "step": 198427
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.746162176132202,
      "learning_rate": 2.806374124987054e-05,
      "loss": 2.8409,
      "step": 198428
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7912356853485107,
      "learning_rate": 2.8062013813789197e-05,
      "loss": 2.9118,
      "step": 198429
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.794325351715088,
      "learning_rate": 2.806028642826631e-05,
      "loss": 2.8702,
      "step": 198430
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1522083282470703,
      "learning_rate": 2.8058559093302147e-05,
      "loss": 2.847,
      "step": 198431
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.937422752380371,
      "learning_rate": 2.8056831808896973e-05,
      "loss": 2.7542,
      "step": 198432
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.105565309524536,
      "learning_rate": 2.8055104575051223e-05,
      "loss": 3.0277,
      "step": 198433
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6521716117858887,
      "learning_rate": 2.8053377391765164e-05,
      "loss": 3.0403,
      "step": 198434
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7379977703094482,
      "learning_rate": 2.805165025903906e-05,
      "loss": 2.7889,
      "step": 198435
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.5959362983703613,
      "learning_rate": 2.8049923176873344e-05,
      "loss": 3.0323,
      "step": 198436
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7693843841552734,
      "learning_rate": 2.804819614526822e-05,
      "loss": 2.8064,
      "step": 198437
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9162769317626953,
      "learning_rate": 2.804646916422415e-05,
      "loss": 2.7855,
      "step": 198438
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9352078437805176,
      "learning_rate": 2.804474223374137e-05,
      "loss": 3.0712,
      "step": 198439
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8666584491729736,
      "learning_rate": 2.8043015353820173e-05,
      "loss": 3.0865,
      "step": 198440
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.4385807514190674,
      "learning_rate": 2.804128852446087e-05,
      "loss": 3.0371,
      "step": 198441
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1139426231384277,
      "learning_rate": 2.803956174566392e-05,
      "loss": 2.6908,
      "step": 198442
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.6430554389953613,
      "learning_rate": 2.8037835017429455e-05,
      "loss": 2.9484,
      "step": 198443
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2716262340545654,
      "learning_rate": 2.8036108339757945e-05,
      "loss": 2.8456,
      "step": 198444
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7183594703674316,
      "learning_rate": 2.8034381712649624e-05,
      "loss": 2.8516,
      "step": 198445
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.638947010040283,
      "learning_rate": 2.803265513610492e-05,
      "loss": 2.9082,
      "step": 198446
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.374319314956665,
      "learning_rate": 2.8030928610124005e-05,
      "loss": 2.9114,
      "step": 198447
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.192484140396118,
      "learning_rate": 2.8029202134707307e-05,
      "loss": 3.0673,
      "step": 198448
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1352639198303223,
      "learning_rate": 2.8027475709855064e-05,
      "loss": 3.0038,
      "step": 198449
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.3317675590515137,
      "learning_rate": 2.8025749335567703e-05,
      "loss": 2.651,
      "step": 198450
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.3106789588928223,
      "learning_rate": 2.802402301184543e-05,
      "loss": 2.906,
      "step": 198451
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.12349796295166,
      "learning_rate": 2.8022296738688743e-05,
      "loss": 2.9411,
      "step": 198452
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8804843425750732,
      "learning_rate": 2.802057051609774e-05,
      "loss": 3.0252,
      "step": 198453
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.662351131439209,
      "learning_rate": 2.8018844344072888e-05,
      "loss": 2.8037,
      "step": 198454
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.4068970680236816,
      "learning_rate": 2.801711822261442e-05,
      "loss": 2.972,
      "step": 198455
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1986873149871826,
      "learning_rate": 2.8015392151722772e-05,
      "loss": 3.2,
      "step": 198456
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.19793438911438,
      "learning_rate": 2.8013666131398106e-05,
      "loss": 2.9114,
      "step": 198457
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.493371963500977,
      "learning_rate": 2.8011940161640955e-05,
      "loss": 2.7167,
      "step": 198458
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.488370418548584,
      "learning_rate": 2.8010214242451424e-05,
      "loss": 3.0792,
      "step": 198459
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.133491039276123,
      "learning_rate": 2.8008488373829973e-05,
      "loss": 2.702,
      "step": 198460
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2967724800109863,
      "learning_rate": 2.800676255577684e-05,
      "loss": 2.9263,
      "step": 198461
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.96667218208313,
      "learning_rate": 2.8005036788292424e-05,
      "loss": 2.9893,
      "step": 198462
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8196024894714355,
      "learning_rate": 2.8003311071376956e-05,
      "loss": 2.9616,
      "step": 198463
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.32304048538208,
      "learning_rate": 2.8001585405030903e-05,
      "loss": 2.7673,
      "step": 198464
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0056941509246826,
      "learning_rate": 2.7999859789254398e-05,
      "loss": 2.8783,
      "step": 198465
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.022707462310791,
      "learning_rate": 2.7998134224047907e-05,
      "loss": 2.8347,
      "step": 198466
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0169448852539062,
      "learning_rate": 2.7996408709411632e-05,
      "loss": 2.8894,
      "step": 198467
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.60075044631958,
      "learning_rate": 2.7994683245346005e-05,
      "loss": 3.146,
      "step": 198468
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9685263633728027,
      "learning_rate": 2.799295783185126e-05,
      "loss": 2.7854,
      "step": 198469
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.484837055206299,
      "learning_rate": 2.7991232468927893e-05,
      "loss": 2.8487,
      "step": 198470
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1395297050476074,
      "learning_rate": 2.798950715657594e-05,
      "loss": 3.0431,
      "step": 198471
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.5020339488983154,
      "learning_rate": 2.7987781894795934e-05,
      "loss": 3.0225,
      "step": 198472
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.118332862854004,
      "learning_rate": 2.798605668358811e-05,
      "loss": 3.0027,
      "step": 198473
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.060502052307129,
      "learning_rate": 2.798433152295283e-05,
      "loss": 2.9718,
      "step": 198474
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.283982753753662,
      "learning_rate": 2.7982606412890362e-05,
      "loss": 2.8647,
      "step": 198475
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.759972333908081,
      "learning_rate": 2.7980881353401173e-05,
      "loss": 2.8579,
      "step": 198476
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0003464221954346,
      "learning_rate": 2.7979156344485367e-05,
      "loss": 3.1668,
      "step": 198477
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1867871284484863,
      "learning_rate": 2.79774313861434e-05,
      "loss": 3.0511,
      "step": 198478
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.585155725479126,
      "learning_rate": 2.7975706478375515e-05,
      "loss": 3.0301,
      "step": 198479
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.1884453296661377,
      "learning_rate": 2.7973981621182174e-05,
      "loss": 2.8766,
      "step": 198480
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9220776557922363,
      "learning_rate": 2.797225681456351e-05,
      "loss": 2.9609,
      "step": 198481
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.907217264175415,
      "learning_rate": 2.7970532058520056e-05,
      "loss": 2.8142,
      "step": 198482
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.541914463043213,
      "learning_rate": 2.7968807353051913e-05,
      "loss": 2.9351,
      "step": 198483
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9405198097229004,
      "learning_rate": 2.7967082698159547e-05,
      "loss": 3.1016,
      "step": 198484
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.730803966522217,
      "learning_rate": 2.7965358093843194e-05,
      "loss": 2.9079,
      "step": 198485
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.515566349029541,
      "learning_rate": 2.796363354010328e-05,
      "loss": 2.9812,
      "step": 198486
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.62726092338562,
      "learning_rate": 2.7961909036939976e-05,
      "loss": 3.0111,
      "step": 198487
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.054011583328247,
      "learning_rate": 2.7960184584353817e-05,
      "loss": 2.8964,
      "step": 198488
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.472853660583496,
      "learning_rate": 2.7958460182344866e-05,
      "loss": 3.0432,
      "step": 198489
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.336052179336548,
      "learning_rate": 2.7956735830913658e-05,
      "loss": 2.8769,
      "step": 198490
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7804598808288574,
      "learning_rate": 2.7955011530060356e-05,
      "loss": 2.8297,
      "step": 198491
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.550215482711792,
      "learning_rate": 2.795328727978543e-05,
      "loss": 2.6646,
      "step": 198492
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8934144973754883,
      "learning_rate": 2.7951563080089046e-05,
      "loss": 2.7144,
      "step": 198493
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.954648971557617,
      "learning_rate": 2.7949838930971702e-05,
      "loss": 2.989,
      "step": 198494
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9326953887939453,
      "learning_rate": 2.7948114832433534e-05,
      "loss": 2.9065,
      "step": 198495
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.005131959915161,
      "learning_rate": 2.794639078447497e-05,
      "loss": 3.0828,
      "step": 198496
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.270047187805176,
      "learning_rate": 2.794466678709628e-05,
      "loss": 2.7877,
      "step": 198497
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6388492584228516,
      "learning_rate": 2.7942942840297868e-05,
      "loss": 3.0872,
      "step": 198498
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2106194496154785,
      "learning_rate": 2.7941218944079923e-05,
      "loss": 2.9771,
      "step": 198499
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.724355936050415,
      "learning_rate": 2.7939495098442886e-05,
      "loss": 2.6363,
      "step": 198500
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7195379734039307,
      "learning_rate": 2.793777130338706e-05,
      "loss": 2.9384,
      "step": 198501
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.174082040786743,
      "learning_rate": 2.7936047558912732e-05,
      "loss": 2.8538,
      "step": 198502
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6610119342803955,
      "learning_rate": 2.793432386502018e-05,
      "loss": 3.1093,
      "step": 198503
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.614637851715088,
      "learning_rate": 2.79326002217098e-05,
      "loss": 2.9973,
      "step": 198504
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.635374069213867,
      "learning_rate": 2.7930876628981857e-05,
      "loss": 2.9199,
      "step": 198505
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8564462661743164,
      "learning_rate": 2.7929153086836752e-05,
      "loss": 3.061,
      "step": 198506
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.4426417350769043,
      "learning_rate": 2.7927429595274754e-05,
      "loss": 2.7767,
      "step": 198507
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7633204460144043,
      "learning_rate": 2.7925706154296124e-05,
      "loss": 2.9489,
      "step": 198508
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0911107063293457,
      "learning_rate": 2.7923982763901298e-05,
      "loss": 2.8532,
      "step": 198509
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.917606830596924,
      "learning_rate": 2.792225942409051e-05,
      "loss": 3.009,
      "step": 198510
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.818329334259033,
      "learning_rate": 2.792053613486409e-05,
      "loss": 2.9634,
      "step": 198511
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.6992716789245605,
      "learning_rate": 2.7918812896222408e-05,
      "loss": 2.8593,
      "step": 198512
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2049145698547363,
      "learning_rate": 2.7917089708165796e-05,
      "loss": 2.8633,
      "step": 198513
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.655276298522949,
      "learning_rate": 2.7915366570694453e-05,
      "loss": 2.8711,
      "step": 198514
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7645857334136963,
      "learning_rate": 2.791364348380881e-05,
      "loss": 3.1556,
      "step": 198515
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.926528215408325,
      "learning_rate": 2.7911920447509205e-05,
      "loss": 2.9789,
      "step": 198516
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.6069557666778564,
      "learning_rate": 2.79101974617958e-05,
      "loss": 3.2852,
      "step": 198517
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.2472543716430664,
      "learning_rate": 2.7908474526669133e-05,
      "loss": 3.4058,
      "step": 198518
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.78985857963562,
      "learning_rate": 2.79067516421294e-05,
      "loss": 2.8635,
      "step": 198519
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.3114826679229736,
      "learning_rate": 2.7905028808176867e-05,
      "loss": 2.9275,
      "step": 198520
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.9890005588531494,
      "learning_rate": 2.7903306024812e-05,
      "loss": 2.8184,
      "step": 198521
    },
    {
      "epoch": 2.58,
      "grad_norm": 4.485289573669434,
      "learning_rate": 2.790158329203497e-05,
      "loss": 2.8409,
      "step": 198522
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.8004865646362305,
      "learning_rate": 2.7899860609846237e-05,
      "loss": 2.7127,
      "step": 198523
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.876357316970825,
      "learning_rate": 2.789813797824607e-05,
      "loss": 2.7835,
      "step": 198524
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.79732608795166,
      "learning_rate": 2.789641539723474e-05,
      "loss": 3.065,
      "step": 198525
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.6352269649505615,
      "learning_rate": 2.7894692866812573e-05,
      "loss": 2.5678,
      "step": 198526
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.0634896755218506,
      "learning_rate": 2.7892970386979975e-05,
      "loss": 2.7937,
      "step": 198527
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.8172807693481445,
      "learning_rate": 2.7891247957737138e-05,
      "loss": 2.804,
      "step": 198528
    },
    {
      "epoch": 2.58,
      "grad_norm": 2.7400896549224854,
      "learning_rate": 2.7889525579084537e-05,
      "loss": 2.7374,
      "step": 198529
    },
    {
      "epoch": 2.58,
      "grad_norm": 3.335906744003296,
      "learning_rate": 2.78878032510224e-05,
      "loss": 2.7831,
      "step": 198530
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.6026856899261475,
      "learning_rate": 2.7886080973551063e-05,
      "loss": 3.103,
      "step": 198531
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9919562339782715,
      "learning_rate": 2.7884358746670753e-05,
      "loss": 3.1835,
      "step": 198532
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8248109817504883,
      "learning_rate": 2.7882636570381946e-05,
      "loss": 2.8735,
      "step": 198533
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7096049785614014,
      "learning_rate": 2.7880914444684865e-05,
      "loss": 3.0228,
      "step": 198534
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3335793018341064,
      "learning_rate": 2.7879192369579883e-05,
      "loss": 2.815,
      "step": 198535
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.814708948135376,
      "learning_rate": 2.7877470345067267e-05,
      "loss": 2.98,
      "step": 198536
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.143465757369995,
      "learning_rate": 2.7875748371147445e-05,
      "loss": 2.9485,
      "step": 198537
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.861069679260254,
      "learning_rate": 2.787402644782055e-05,
      "loss": 3.0052,
      "step": 198538
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.011309623718262,
      "learning_rate": 2.787230457508709e-05,
      "loss": 3.1399,
      "step": 198539
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.162501335144043,
      "learning_rate": 2.7870582752947256e-05,
      "loss": 2.8461,
      "step": 198540
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8423774242401123,
      "learning_rate": 2.7868860981401454e-05,
      "loss": 2.9172,
      "step": 198541
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.201766014099121,
      "learning_rate": 2.7867139260449912e-05,
      "loss": 2.8606,
      "step": 198542
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.668468475341797,
      "learning_rate": 2.7865417590093132e-05,
      "loss": 2.9499,
      "step": 198543
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.551668643951416,
      "learning_rate": 2.7863695970331178e-05,
      "loss": 2.9575,
      "step": 198544
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2584636211395264,
      "learning_rate": 2.7861974401164556e-05,
      "loss": 2.9717,
      "step": 198545
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.065514087677002,
      "learning_rate": 2.7860252882593457e-05,
      "loss": 2.9121,
      "step": 198546
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.99582576751709,
      "learning_rate": 2.7858531414618358e-05,
      "loss": 2.8395,
      "step": 198547
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.90547251701355,
      "learning_rate": 2.785680999723945e-05,
      "loss": 2.8176,
      "step": 198548
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.7173972129821777,
      "learning_rate": 2.78550886304572e-05,
      "loss": 3.1482,
      "step": 198549
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.4287824630737305,
      "learning_rate": 2.7853367314271714e-05,
      "loss": 3.1057,
      "step": 198550
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6242787837982178,
      "learning_rate": 2.785164604868345e-05,
      "loss": 2.8766,
      "step": 198551
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.13000750541687,
      "learning_rate": 2.784992483369268e-05,
      "loss": 3.0317,
      "step": 198552
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6268014907836914,
      "learning_rate": 2.7848203669299774e-05,
      "loss": 3.107,
      "step": 198553
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.4792699813842773,
      "learning_rate": 2.784648255550499e-05,
      "loss": 3.0732,
      "step": 198554
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.972682237625122,
      "learning_rate": 2.7844761492308798e-05,
      "loss": 2.9054,
      "step": 198555
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.630401372909546,
      "learning_rate": 2.78430404797113e-05,
      "loss": 2.7503,
      "step": 198556
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.66473913192749,
      "learning_rate": 2.7841319517712957e-05,
      "loss": 3.0303,
      "step": 198557
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.753410577774048,
      "learning_rate": 2.7839598606313974e-05,
      "loss": 2.9225,
      "step": 198558
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8183586597442627,
      "learning_rate": 2.7837877745514846e-05,
      "loss": 2.5421,
      "step": 198559
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2599470615386963,
      "learning_rate": 2.783615693531571e-05,
      "loss": 2.9553,
      "step": 198560
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.624147891998291,
      "learning_rate": 2.7834436175717067e-05,
      "loss": 2.7966,
      "step": 198561
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.678781747817993,
      "learning_rate": 2.7832715466719047e-05,
      "loss": 3.0082,
      "step": 198562
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1723103523254395,
      "learning_rate": 2.7830994808322148e-05,
      "loss": 2.7974,
      "step": 198563
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6524558067321777,
      "learning_rate": 2.782927420052651e-05,
      "loss": 3.0134,
      "step": 198564
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.982914924621582,
      "learning_rate": 2.7827553643332622e-05,
      "loss": 2.9967,
      "step": 198565
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.507347345352173,
      "learning_rate": 2.782583313674066e-05,
      "loss": 2.9142,
      "step": 198566
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1850600242614746,
      "learning_rate": 2.782411268075112e-05,
      "loss": 3.0168,
      "step": 198567
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.17575216293335,
      "learning_rate": 2.7822392275364104e-05,
      "loss": 3.0048,
      "step": 198568
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.4804320335388184,
      "learning_rate": 2.782067192058014e-05,
      "loss": 2.7039,
      "step": 198569
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6168832778930664,
      "learning_rate": 2.7818951616399332e-05,
      "loss": 2.7588,
      "step": 198570
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.480041027069092,
      "learning_rate": 2.7817231362822213e-05,
      "loss": 2.7035,
      "step": 198571
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4215247631073,
      "learning_rate": 2.7815511159848946e-05,
      "loss": 2.7555,
      "step": 198572
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.866055488586426,
      "learning_rate": 2.7813791007480036e-05,
      "loss": 2.7163,
      "step": 198573
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3680741786956787,
      "learning_rate": 2.7812070905715545e-05,
      "loss": 3.0304,
      "step": 198574
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8396434783935547,
      "learning_rate": 2.7810350854556006e-05,
      "loss": 3.033,
      "step": 198575
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.5947656631469727,
      "learning_rate": 2.780863085400159e-05,
      "loss": 2.7124,
      "step": 198576
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.605762481689453,
      "learning_rate": 2.780691090405276e-05,
      "loss": 2.6581,
      "step": 198577
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.6531121730804443,
      "learning_rate": 2.780519100470968e-05,
      "loss": 2.8678,
      "step": 198578
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3396570682525635,
      "learning_rate": 2.7803471155972823e-05,
      "loss": 3.0877,
      "step": 198579
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.244076728820801,
      "learning_rate": 2.780175135784245e-05,
      "loss": 2.9239,
      "step": 198580
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.7046501636505127,
      "learning_rate": 2.7800031610318863e-05,
      "loss": 2.9312,
      "step": 198581
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0919158458709717,
      "learning_rate": 2.7798311913402327e-05,
      "loss": 2.6995,
      "step": 198582
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.761723518371582,
      "learning_rate": 2.7796592267093275e-05,
      "loss": 3.1656,
      "step": 198583
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.9814839363098145,
      "learning_rate": 2.7794872671391942e-05,
      "loss": 3.1665,
      "step": 198584
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3959944248199463,
      "learning_rate": 2.779315312629873e-05,
      "loss": 3.0994,
      "step": 198585
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.601466178894043,
      "learning_rate": 2.7791433631813896e-05,
      "loss": 2.7145,
      "step": 198586
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.694694757461548,
      "learning_rate": 2.778971418793775e-05,
      "loss": 2.9962,
      "step": 198587
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.043710708618164,
      "learning_rate": 2.778799479467062e-05,
      "loss": 3.0842,
      "step": 198588
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8621349334716797,
      "learning_rate": 2.7786275452012875e-05,
      "loss": 3.0283,
      "step": 198589
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.3887414932250977,
      "learning_rate": 2.7784556159964745e-05,
      "loss": 3.0768,
      "step": 198590
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.349822521209717,
      "learning_rate": 2.7782836918526664e-05,
      "loss": 3.134,
      "step": 198591
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.9991819858551025,
      "learning_rate": 2.7781117727698897e-05,
      "loss": 3.0002,
      "step": 198592
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.036142587661743,
      "learning_rate": 2.777939858748175e-05,
      "loss": 2.9481,
      "step": 198593
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.257737159729004,
      "learning_rate": 2.7777679497875517e-05,
      "loss": 2.8059,
      "step": 198594
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.146984815597534,
      "learning_rate": 2.7775960458880598e-05,
      "loss": 2.8205,
      "step": 198595
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7861945629119873,
      "learning_rate": 2.7774241470497195e-05,
      "loss": 2.9044,
      "step": 198596
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.621483325958252,
      "learning_rate": 2.7772522532725772e-05,
      "loss": 3.1219,
      "step": 198597
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.8085179328918457,
      "learning_rate": 2.7770803645566564e-05,
      "loss": 3.1232,
      "step": 198598
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4818902015686035,
      "learning_rate": 2.7769084809019838e-05,
      "loss": 2.9805,
      "step": 198599
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0143887996673584,
      "learning_rate": 2.7767366023086057e-05,
      "loss": 2.778,
      "step": 198600
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.685028076171875,
      "learning_rate": 2.7765647287765457e-05,
      "loss": 3.0119,
      "step": 198601
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1643972396850586,
      "learning_rate": 2.7763928603058273e-05,
      "loss": 2.9419,
      "step": 198602
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.477233409881592,
      "learning_rate": 2.7762209968964998e-05,
      "loss": 2.8304,
      "step": 198603
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.917576789855957,
      "learning_rate": 2.776049138548587e-05,
      "loss": 2.9977,
      "step": 198604
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.4679200649261475,
      "learning_rate": 2.775877285262116e-05,
      "loss": 2.8454,
      "step": 198605
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1470794677734375,
      "learning_rate": 2.7757054370371258e-05,
      "loss": 2.7402,
      "step": 198606
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.9261581897735596,
      "learning_rate": 2.7755335938736435e-05,
      "loss": 3.0115,
      "step": 198607
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.790595293045044,
      "learning_rate": 2.7753617557717055e-05,
      "loss": 3.1253,
      "step": 198608
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7847495079040527,
      "learning_rate": 2.7751899227313456e-05,
      "loss": 2.6125,
      "step": 198609
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.76547908782959,
      "learning_rate": 2.7750180947525867e-05,
      "loss": 2.8016,
      "step": 198610
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.616297483444214,
      "learning_rate": 2.774846271835466e-05,
      "loss": 3.0788,
      "step": 198611
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2415084838867188,
      "learning_rate": 2.774674453980016e-05,
      "loss": 2.962,
      "step": 198612
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0088284015655518,
      "learning_rate": 2.774502641186267e-05,
      "loss": 3.005,
      "step": 198613
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.974376916885376,
      "learning_rate": 2.7743308334542525e-05,
      "loss": 2.8012,
      "step": 198614
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.751473903656006,
      "learning_rate": 2.774159030784009e-05,
      "loss": 2.986,
      "step": 198615
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8945679664611816,
      "learning_rate": 2.7739872331755597e-05,
      "loss": 3.0583,
      "step": 198616
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2880795001983643,
      "learning_rate": 2.7738154406289347e-05,
      "loss": 2.8024,
      "step": 198617
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5231709480285645,
      "learning_rate": 2.7736436531441776e-05,
      "loss": 2.8521,
      "step": 198618
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.944645643234253,
      "learning_rate": 2.7734718707213076e-05,
      "loss": 2.8096,
      "step": 198619
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.8035190105438232,
      "learning_rate": 2.773300093360369e-05,
      "loss": 2.785,
      "step": 198620
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8517568111419678,
      "learning_rate": 2.773128321061384e-05,
      "loss": 2.7249,
      "step": 198621
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.085759162902832,
      "learning_rate": 2.772956553824397e-05,
      "loss": 2.8862,
      "step": 198622
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9218313694000244,
      "learning_rate": 2.7727847916494237e-05,
      "loss": 2.5791,
      "step": 198623
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.734368324279785,
      "learning_rate": 2.772613034536505e-05,
      "loss": 2.9482,
      "step": 198624
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0410351753234863,
      "learning_rate": 2.77244128248567e-05,
      "loss": 2.8574,
      "step": 198625
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.743303060531616,
      "learning_rate": 2.772269535496956e-05,
      "loss": 2.9346,
      "step": 198626
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.8920023441314697,
      "learning_rate": 2.7720977935703858e-05,
      "loss": 2.8936,
      "step": 198627
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.663390874862671,
      "learning_rate": 2.7719260567060065e-05,
      "loss": 2.8975,
      "step": 198628
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.731055974960327,
      "learning_rate": 2.7717543249038276e-05,
      "loss": 3.0633,
      "step": 198629
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.315051794052124,
      "learning_rate": 2.7715825981639027e-05,
      "loss": 2.739,
      "step": 198630
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.865910768508911,
      "learning_rate": 2.7714108764862486e-05,
      "loss": 2.9978,
      "step": 198631
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.059845447540283,
      "learning_rate": 2.7712391598709048e-05,
      "loss": 2.7043,
      "step": 198632
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.188664436340332,
      "learning_rate": 2.7710674483178985e-05,
      "loss": 3.1075,
      "step": 198633
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.168917417526245,
      "learning_rate": 2.770895741827279e-05,
      "loss": 3.0074,
      "step": 198634
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9369335174560547,
      "learning_rate": 2.770724040399047e-05,
      "loss": 3.1005,
      "step": 198635
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8851068019866943,
      "learning_rate": 2.7705523440332623e-05,
      "loss": 2.981,
      "step": 198636
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.800039052963257,
      "learning_rate": 2.7703806527299376e-05,
      "loss": 2.8172,
      "step": 198637
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8127005100250244,
      "learning_rate": 2.7702089664891204e-05,
      "loss": 2.7644,
      "step": 198638
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.781378984451294,
      "learning_rate": 2.7700372853108267e-05,
      "loss": 2.7615,
      "step": 198639
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.839662790298462,
      "learning_rate": 2.76986560919511e-05,
      "loss": 2.9129,
      "step": 198640
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.821521759033203,
      "learning_rate": 2.7696939381419803e-05,
      "loss": 2.7486,
      "step": 198641
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.5863497257232666,
      "learning_rate": 2.769522272151481e-05,
      "loss": 2.8213,
      "step": 198642
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.925445795059204,
      "learning_rate": 2.7693506112236353e-05,
      "loss": 2.9857,
      "step": 198643
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.169787883758545,
      "learning_rate": 2.76917895535849e-05,
      "loss": 2.9108,
      "step": 198644
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6382203102111816,
      "learning_rate": 2.7690073045560612e-05,
      "loss": 3.0175,
      "step": 198645
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3363773822784424,
      "learning_rate": 2.7688356588163928e-05,
      "loss": 3.0474,
      "step": 198646
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9754810333251953,
      "learning_rate": 2.7686640181395148e-05,
      "loss": 2.7277,
      "step": 198647
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5133824348449707,
      "learning_rate": 2.7684923825254534e-05,
      "loss": 2.9449,
      "step": 198648
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.057096481323242,
      "learning_rate": 2.768320751974239e-05,
      "loss": 3.0241,
      "step": 198649
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6787984371185303,
      "learning_rate": 2.7681491264859113e-05,
      "loss": 2.861,
      "step": 198650
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.374790668487549,
      "learning_rate": 2.767977506060497e-05,
      "loss": 2.7841,
      "step": 198651
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.4141664505004883,
      "learning_rate": 2.767805890698033e-05,
      "loss": 2.8628,
      "step": 198652
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.148674964904785,
      "learning_rate": 2.767634280398545e-05,
      "loss": 2.9137,
      "step": 198653
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.7506582736968994,
      "learning_rate": 2.767462675162071e-05,
      "loss": 2.8141,
      "step": 198654
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.987957000732422,
      "learning_rate": 2.7672910749886333e-05,
      "loss": 2.7484,
      "step": 198655
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.643336534500122,
      "learning_rate": 2.7671194798782793e-05,
      "loss": 3.1039,
      "step": 198656
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0245320796966553,
      "learning_rate": 2.7669478898310214e-05,
      "loss": 2.9084,
      "step": 198657
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.707956314086914,
      "learning_rate": 2.7667763048469104e-05,
      "loss": 2.8636,
      "step": 198658
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.569477081298828,
      "learning_rate": 2.7666047249259693e-05,
      "loss": 2.7836,
      "step": 198659
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4996798038482666,
      "learning_rate": 2.7664331500682314e-05,
      "loss": 2.8273,
      "step": 198660
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8241395950317383,
      "learning_rate": 2.76626158027372e-05,
      "loss": 3.0075,
      "step": 198661
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7027506828308105,
      "learning_rate": 2.7660900155424815e-05,
      "loss": 2.9854,
      "step": 198662
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6888961791992188,
      "learning_rate": 2.765918455874533e-05,
      "loss": 2.8789,
      "step": 198663
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0787909030914307,
      "learning_rate": 2.765746901269921e-05,
      "loss": 2.8571,
      "step": 198664
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6496949195861816,
      "learning_rate": 2.7655753517286716e-05,
      "loss": 2.8066,
      "step": 198665
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.940542697906494,
      "learning_rate": 2.7654038072508155e-05,
      "loss": 2.9035,
      "step": 198666
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4173712730407715,
      "learning_rate": 2.7652322678363825e-05,
      "loss": 2.779,
      "step": 198667
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0368642807006836,
      "learning_rate": 2.765060733485409e-05,
      "loss": 2.9143,
      "step": 198668
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.914292335510254,
      "learning_rate": 2.7648892041979187e-05,
      "loss": 2.9084,
      "step": 198669
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8283324241638184,
      "learning_rate": 2.764717679973958e-05,
      "loss": 2.8928,
      "step": 198670
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.74168062210083,
      "learning_rate": 2.7645461608135465e-05,
      "loss": 3.0738,
      "step": 198671
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9568023681640625,
      "learning_rate": 2.7643746467167216e-05,
      "loss": 2.9818,
      "step": 198672
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3778438568115234,
      "learning_rate": 2.7642031376835094e-05,
      "loss": 2.7135,
      "step": 198673
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.958073854446411,
      "learning_rate": 2.76403163371395e-05,
      "loss": 2.7823,
      "step": 198674
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4044461250305176,
      "learning_rate": 2.7638601348080667e-05,
      "loss": 2.6705,
      "step": 198675
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.81589674949646,
      "learning_rate": 2.7636886409659033e-05,
      "loss": 2.5467,
      "step": 198676
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3535854816436768,
      "learning_rate": 2.763517152187482e-05,
      "loss": 2.9602,
      "step": 198677
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.282735824584961,
      "learning_rate": 2.7633456684728372e-05,
      "loss": 2.9225,
      "step": 198678
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9298107624053955,
      "learning_rate": 2.763174189821995e-05,
      "loss": 2.9043,
      "step": 198679
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3269641399383545,
      "learning_rate": 2.7630027162349953e-05,
      "loss": 2.9119,
      "step": 198680
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5282139778137207,
      "learning_rate": 2.7628312477118654e-05,
      "loss": 3.0009,
      "step": 198681
    },
    {
      "epoch": 2.59,
      "grad_norm": 5.004096984863281,
      "learning_rate": 2.7626597842526443e-05,
      "loss": 2.7478,
      "step": 198682
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.571880340576172,
      "learning_rate": 2.7624883258573593e-05,
      "loss": 2.7619,
      "step": 198683
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6573822498321533,
      "learning_rate": 2.762316872526037e-05,
      "loss": 2.8555,
      "step": 198684
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7875404357910156,
      "learning_rate": 2.7621454242587204e-05,
      "loss": 3.0708,
      "step": 198685
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.273832082748413,
      "learning_rate": 2.761973981055433e-05,
      "loss": 2.9812,
      "step": 198686
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.759429693222046,
      "learning_rate": 2.7618025429162017e-05,
      "loss": 2.9387,
      "step": 198687
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.178044557571411,
      "learning_rate": 2.7616311098410726e-05,
      "loss": 3.1182,
      "step": 198688
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0419816970825195,
      "learning_rate": 2.7614596818300726e-05,
      "loss": 2.9967,
      "step": 198689
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1018664836883545,
      "learning_rate": 2.761288258883225e-05,
      "loss": 2.9462,
      "step": 198690
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.858705759048462,
      "learning_rate": 2.7611168410005736e-05,
      "loss": 2.7374,
      "step": 198691
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6049909591674805,
      "learning_rate": 2.760945428182141e-05,
      "loss": 3.0597,
      "step": 198692
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.137017250061035,
      "learning_rate": 2.7607740204279606e-05,
      "loss": 2.797,
      "step": 198693
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.6185033321380615,
      "learning_rate": 2.7606026177380724e-05,
      "loss": 2.8931,
      "step": 198694
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.1923136711120605,
      "learning_rate": 2.7604312201125035e-05,
      "loss": 3.0541,
      "step": 198695
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.862849235534668,
      "learning_rate": 2.7602598275512767e-05,
      "loss": 2.9633,
      "step": 198696
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9388482570648193,
      "learning_rate": 2.7600884400544356e-05,
      "loss": 3.1362,
      "step": 198697
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.7505016326904297,
      "learning_rate": 2.7599170576220064e-05,
      "loss": 2.7084,
      "step": 198698
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9310009479522705,
      "learning_rate": 2.7597456802540262e-05,
      "loss": 2.9346,
      "step": 198699
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3244616985321045,
      "learning_rate": 2.7595743079505252e-05,
      "loss": 2.9984,
      "step": 198700
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.743314266204834,
      "learning_rate": 2.7594029407115327e-05,
      "loss": 2.997,
      "step": 198701
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.1298065185546875,
      "learning_rate": 2.7592315785370756e-05,
      "loss": 2.8814,
      "step": 198702
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.432419538497925,
      "learning_rate": 2.759060221427197e-05,
      "loss": 2.9297,
      "step": 198703
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8337244987487793,
      "learning_rate": 2.7588888693819178e-05,
      "loss": 2.8412,
      "step": 198704
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8878014087677,
      "learning_rate": 2.7587175224012802e-05,
      "loss": 2.784,
      "step": 198705
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.943337917327881,
      "learning_rate": 2.758546180485308e-05,
      "loss": 2.9818,
      "step": 198706
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.902582883834839,
      "learning_rate": 2.7583748436340447e-05,
      "loss": 2.8773,
      "step": 198707
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.038646697998047,
      "learning_rate": 2.758203511847503e-05,
      "loss": 2.7723,
      "step": 198708
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.972707986831665,
      "learning_rate": 2.7580321851257336e-05,
      "loss": 3.0965,
      "step": 198709
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2639968395233154,
      "learning_rate": 2.7578608634687526e-05,
      "loss": 2.862,
      "step": 198710
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6531689167022705,
      "learning_rate": 2.7576895468766068e-05,
      "loss": 2.9009,
      "step": 198711
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.3363852500915527,
      "learning_rate": 2.757518235349313e-05,
      "loss": 3.0563,
      "step": 198712
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3111379146575928,
      "learning_rate": 2.7573469288869177e-05,
      "loss": 2.8901,
      "step": 198713
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0772485733032227,
      "learning_rate": 2.7571756274894442e-05,
      "loss": 3.1146,
      "step": 198714
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.767730236053467,
      "learning_rate": 2.757004331156929e-05,
      "loss": 2.8268,
      "step": 198715
    },
    {
      "epoch": 2.59,
      "grad_norm": 5.179778575897217,
      "learning_rate": 2.7568330398893924e-05,
      "loss": 2.7888,
      "step": 198716
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.644460678100586,
      "learning_rate": 2.7566617536868806e-05,
      "loss": 2.9509,
      "step": 198717
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.805123805999756,
      "learning_rate": 2.756490472549414e-05,
      "loss": 2.9176,
      "step": 198718
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9811267852783203,
      "learning_rate": 2.7563191964770394e-05,
      "loss": 2.7535,
      "step": 198719
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.253616809844971,
      "learning_rate": 2.7561479254697762e-05,
      "loss": 2.8665,
      "step": 198720
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.705273151397705,
      "learning_rate": 2.7559766595276578e-05,
      "loss": 2.9642,
      "step": 198721
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.995288610458374,
      "learning_rate": 2.7558053986507145e-05,
      "loss": 2.9136,
      "step": 198722
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.037278175354004,
      "learning_rate": 2.7556341428389862e-05,
      "loss": 3.0391,
      "step": 198723
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3092284202575684,
      "learning_rate": 2.755462892092496e-05,
      "loss": 2.9628,
      "step": 198724
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.4579012393951416,
      "learning_rate": 2.755291646411284e-05,
      "loss": 3.0766,
      "step": 198725
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.721620798110962,
      "learning_rate": 2.755120405795377e-05,
      "loss": 3.0006,
      "step": 198726
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8799188137054443,
      "learning_rate": 2.7549491702448045e-05,
      "loss": 2.785,
      "step": 198727
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.5920958518981934,
      "learning_rate": 2.7547779397596005e-05,
      "loss": 3.0555,
      "step": 198728
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.098417282104492,
      "learning_rate": 2.754606714339801e-05,
      "loss": 2.9763,
      "step": 198729
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.877755880355835,
      "learning_rate": 2.7544354939854295e-05,
      "loss": 2.8907,
      "step": 198730
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.881150960922241,
      "learning_rate": 2.7542642786965265e-05,
      "loss": 3.002,
      "step": 198731
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.645768404006958,
      "learning_rate": 2.754093068473121e-05,
      "loss": 3.0455,
      "step": 198732
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.447356939315796,
      "learning_rate": 2.7539218633152437e-05,
      "loss": 2.8011,
      "step": 198733
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.824498176574707,
      "learning_rate": 2.7537506632229246e-05,
      "loss": 2.9844,
      "step": 198734
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5510940551757812,
      "learning_rate": 2.7535794681962e-05,
      "loss": 2.8734,
      "step": 198735
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2975354194641113,
      "learning_rate": 2.753408278235093e-05,
      "loss": 2.7759,
      "step": 198736
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.579084873199463,
      "learning_rate": 2.7532370933396476e-05,
      "loss": 2.9841,
      "step": 198737
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.921696901321411,
      "learning_rate": 2.7530659135098897e-05,
      "loss": 3.1046,
      "step": 198738
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.093936920166016,
      "learning_rate": 2.7528947387458534e-05,
      "loss": 3.0376,
      "step": 198739
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.656242847442627,
      "learning_rate": 2.7527235690475613e-05,
      "loss": 3.0052,
      "step": 198740
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.076826333999634,
      "learning_rate": 2.752552404415057e-05,
      "loss": 2.7301,
      "step": 198741
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.661924362182617,
      "learning_rate": 2.752381244848364e-05,
      "loss": 2.8668,
      "step": 198742
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.99649977684021,
      "learning_rate": 2.752210090347522e-05,
      "loss": 2.8443,
      "step": 198743
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5610239505767822,
      "learning_rate": 2.7520389409125575e-05,
      "loss": 3.1007,
      "step": 198744
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.126952886581421,
      "learning_rate": 2.7518677965435043e-05,
      "loss": 2.7272,
      "step": 198745
    },
    {
      "epoch": 2.59,
      "grad_norm": 5.139982223510742,
      "learning_rate": 2.7516966572403854e-05,
      "loss": 2.9756,
      "step": 198746
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.002103328704834,
      "learning_rate": 2.7515255230032506e-05,
      "loss": 2.8416,
      "step": 198747
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.479584217071533,
      "learning_rate": 2.7513543938321135e-05,
      "loss": 2.8474,
      "step": 198748
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.051562547683716,
      "learning_rate": 2.7511832697270174e-05,
      "loss": 2.9905,
      "step": 198749
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.613455057144165,
      "learning_rate": 2.7510121506879956e-05,
      "loss": 2.9894,
      "step": 198750
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.098352432250977,
      "learning_rate": 2.7508410367150713e-05,
      "loss": 2.9204,
      "step": 198751
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.355715036392212,
      "learning_rate": 2.7506699278082746e-05,
      "loss": 3.1518,
      "step": 198752
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.267397165298462,
      "learning_rate": 2.7504988239676486e-05,
      "loss": 2.9743,
      "step": 198753
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.681682825088501,
      "learning_rate": 2.7503277251932164e-05,
      "loss": 2.7058,
      "step": 198754
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6537461280822754,
      "learning_rate": 2.7501566314850154e-05,
      "loss": 3.0685,
      "step": 198755
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.28670334815979,
      "learning_rate": 2.749985542843075e-05,
      "loss": 3.1001,
      "step": 198756
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0706586837768555,
      "learning_rate": 2.7498144592674253e-05,
      "loss": 3.0244,
      "step": 198757
    },
    {
      "epoch": 2.59,
      "grad_norm": 5.58535623550415,
      "learning_rate": 2.7496433807580965e-05,
      "loss": 2.7662,
      "step": 198758
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6563327312469482,
      "learning_rate": 2.7494723073151282e-05,
      "loss": 3.0888,
      "step": 198759
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.5447587966918945,
      "learning_rate": 2.7493012389385405e-05,
      "loss": 3.052,
      "step": 198760
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.345358371734619,
      "learning_rate": 2.74913017562838e-05,
      "loss": 2.933,
      "step": 198761
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9977455139160156,
      "learning_rate": 2.7489591173846703e-05,
      "loss": 3.0934,
      "step": 198762
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.9624083042144775,
      "learning_rate": 2.748788064207441e-05,
      "loss": 2.908,
      "step": 198763
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0923054218292236,
      "learning_rate": 2.7486170160967223e-05,
      "loss": 3.2648,
      "step": 198764
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.698887825012207,
      "learning_rate": 2.7484459730525577e-05,
      "loss": 2.9561,
      "step": 198765
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1897077560424805,
      "learning_rate": 2.7482749350749633e-05,
      "loss": 2.8576,
      "step": 198766
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5777313709259033,
      "learning_rate": 2.7481039021639862e-05,
      "loss": 3.1685,
      "step": 198767
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6604843139648438,
      "learning_rate": 2.7479328743196492e-05,
      "loss": 3.1223,
      "step": 198768
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.342803478240967,
      "learning_rate": 2.7477618515419798e-05,
      "loss": 2.9124,
      "step": 198769
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5671331882476807,
      "learning_rate": 2.747590833831024e-05,
      "loss": 3.051,
      "step": 198770
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2228763103485107,
      "learning_rate": 2.747419821186805e-05,
      "loss": 2.9193,
      "step": 198771
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.254862070083618,
      "learning_rate": 2.7472488136093497e-05,
      "loss": 2.9134,
      "step": 198772
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0820975303649902,
      "learning_rate": 2.747077811098698e-05,
      "loss": 2.9811,
      "step": 198773
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.090629816055298,
      "learning_rate": 2.74690681365488e-05,
      "loss": 2.871,
      "step": 198774
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.822559356689453,
      "learning_rate": 2.7467358212779222e-05,
      "loss": 3.0998,
      "step": 198775
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.602094888687134,
      "learning_rate": 2.7465648339678682e-05,
      "loss": 3.1337,
      "step": 198776
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7341198921203613,
      "learning_rate": 2.7463938517247375e-05,
      "loss": 2.8514,
      "step": 198777
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.2038164138793945,
      "learning_rate": 2.7462228745485636e-05,
      "loss": 2.7543,
      "step": 198778
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3294196128845215,
      "learning_rate": 2.746051902439387e-05,
      "loss": 2.6949,
      "step": 198779
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3668105602264404,
      "learning_rate": 2.74588093539723e-05,
      "loss": 2.7616,
      "step": 198780
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7853565216064453,
      "learning_rate": 2.74570997342213e-05,
      "loss": 2.6572,
      "step": 198781
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.4417130947113037,
      "learning_rate": 2.74553901651412e-05,
      "loss": 2.8891,
      "step": 198782
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0213463306427,
      "learning_rate": 2.74536806467322e-05,
      "loss": 3.1127,
      "step": 198783
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6005446910858154,
      "learning_rate": 2.7451971178994804e-05,
      "loss": 2.937,
      "step": 198784
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.397254705429077,
      "learning_rate": 2.7450261761929203e-05,
      "loss": 3.0531,
      "step": 198785
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7731316089630127,
      "learning_rate": 2.7448552395535705e-05,
      "loss": 2.882,
      "step": 198786
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.490525960922241,
      "learning_rate": 2.7446843079814706e-05,
      "loss": 3.192,
      "step": 198787
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9443628787994385,
      "learning_rate": 2.7445133814766473e-05,
      "loss": 2.8243,
      "step": 198788
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9232559204101562,
      "learning_rate": 2.7443424600391306e-05,
      "loss": 2.9576,
      "step": 198789
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.805908441543579,
      "learning_rate": 2.744171543668957e-05,
      "loss": 3.1646,
      "step": 198790
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4322409629821777,
      "learning_rate": 2.7440006323661533e-05,
      "loss": 2.964,
      "step": 198791
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0018861293792725,
      "learning_rate": 2.7438297261307628e-05,
      "loss": 2.9567,
      "step": 198792
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.920438289642334,
      "learning_rate": 2.7436588249628055e-05,
      "loss": 3.0851,
      "step": 198793
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9084010124206543,
      "learning_rate": 2.7434879288623146e-05,
      "loss": 2.8918,
      "step": 198794
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1710896492004395,
      "learning_rate": 2.7433170378293234e-05,
      "loss": 2.8101,
      "step": 198795
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.6227872371673584,
      "learning_rate": 2.7431461518638653e-05,
      "loss": 3.0319,
      "step": 198796
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.673950433731079,
      "learning_rate": 2.7429752709659668e-05,
      "loss": 2.8146,
      "step": 198797
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0110995769500732,
      "learning_rate": 2.7428043951356683e-05,
      "loss": 3.1752,
      "step": 198798
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0764224529266357,
      "learning_rate": 2.742633524372999e-05,
      "loss": 2.956,
      "step": 198799
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7868075370788574,
      "learning_rate": 2.7424626586779865e-05,
      "loss": 2.9538,
      "step": 198800
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.335657835006714,
      "learning_rate": 2.7422917980506598e-05,
      "loss": 2.9643,
      "step": 198801
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.5270209312438965,
      "learning_rate": 2.7421209424910628e-05,
      "loss": 2.6592,
      "step": 198802
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.551464796066284,
      "learning_rate": 2.741950091999212e-05,
      "loss": 3.0933,
      "step": 198803
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.009540319442749,
      "learning_rate": 2.7417792465751544e-05,
      "loss": 2.8221,
      "step": 198804
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.517946243286133,
      "learning_rate": 2.741608406218916e-05,
      "loss": 2.7746,
      "step": 198805
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.659811496734619,
      "learning_rate": 2.7414375709305235e-05,
      "loss": 2.7939,
      "step": 198806
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.949568748474121,
      "learning_rate": 2.7412667407100107e-05,
      "loss": 2.854,
      "step": 198807
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.619396686553955,
      "learning_rate": 2.7410959155574142e-05,
      "loss": 2.8517,
      "step": 198808
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.6701178550720215,
      "learning_rate": 2.7409250954727567e-05,
      "loss": 2.9207,
      "step": 198809
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.517270803451538,
      "learning_rate": 2.740754280456082e-05,
      "loss": 2.9831,
      "step": 198810
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5601866245269775,
      "learning_rate": 2.740583470507417e-05,
      "loss": 2.8683,
      "step": 198811
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.582714796066284,
      "learning_rate": 2.740412665626788e-05,
      "loss": 2.9223,
      "step": 198812
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.189275026321411,
      "learning_rate": 2.740241865814228e-05,
      "loss": 3.2196,
      "step": 198813
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.39664363861084,
      "learning_rate": 2.740071071069777e-05,
      "loss": 3.0526,
      "step": 198814
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.102114677429199,
      "learning_rate": 2.7399002813934557e-05,
      "loss": 2.9406,
      "step": 198815
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.149249315261841,
      "learning_rate": 2.7397294967853066e-05,
      "loss": 2.7111,
      "step": 198816
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7791149616241455,
      "learning_rate": 2.739558717245357e-05,
      "loss": 2.8277,
      "step": 198817
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.658491611480713,
      "learning_rate": 2.73938794277364e-05,
      "loss": 3.0216,
      "step": 198818
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2734668254852295,
      "learning_rate": 2.7392171733701785e-05,
      "loss": 2.6587,
      "step": 198819
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2369062900543213,
      "learning_rate": 2.739046409035016e-05,
      "loss": 3.1563,
      "step": 198820
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.393759250640869,
      "learning_rate": 2.738875649768173e-05,
      "loss": 2.7201,
      "step": 198821
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3881280422210693,
      "learning_rate": 2.7387048955696922e-05,
      "loss": 2.9748,
      "step": 198822
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9091358184814453,
      "learning_rate": 2.7385341464396038e-05,
      "loss": 3.1337,
      "step": 198823
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8491408824920654,
      "learning_rate": 2.7383634023779345e-05,
      "loss": 2.9169,
      "step": 198824
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4558980464935303,
      "learning_rate": 2.738192663384714e-05,
      "loss": 2.9944,
      "step": 198825
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.5905590057373047,
      "learning_rate": 2.7380219294599825e-05,
      "loss": 2.8158,
      "step": 198826
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.702194929122925,
      "learning_rate": 2.7378512006037636e-05,
      "loss": 3.0229,
      "step": 198827
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8467297554016113,
      "learning_rate": 2.7376804768160966e-05,
      "loss": 2.846,
      "step": 198828
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8118391036987305,
      "learning_rate": 2.7375097580970083e-05,
      "loss": 2.8039,
      "step": 198829
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2618801593780518,
      "learning_rate": 2.7373390444465327e-05,
      "loss": 2.8938,
      "step": 198830
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6945643424987793,
      "learning_rate": 2.7371683358646958e-05,
      "loss": 2.9753,
      "step": 198831
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7301485538482666,
      "learning_rate": 2.7369976323515376e-05,
      "loss": 2.5943,
      "step": 198832
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7181222438812256,
      "learning_rate": 2.7368269339070815e-05,
      "loss": 3.053,
      "step": 198833
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.912628173828125,
      "learning_rate": 2.7366562405313708e-05,
      "loss": 2.6777,
      "step": 198834
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.218536615371704,
      "learning_rate": 2.736485552224429e-05,
      "loss": 2.8836,
      "step": 198835
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.067967653274536,
      "learning_rate": 2.736314868986289e-05,
      "loss": 3.0235,
      "step": 198836
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3390445709228516,
      "learning_rate": 2.736144190816978e-05,
      "loss": 2.9381,
      "step": 198837
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9482343196868896,
      "learning_rate": 2.7359735177165388e-05,
      "loss": 2.8367,
      "step": 198838
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3935554027557373,
      "learning_rate": 2.735802849684988e-05,
      "loss": 2.8076,
      "step": 198839
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6412107944488525,
      "learning_rate": 2.735632186722376e-05,
      "loss": 2.8828,
      "step": 198840
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9311482906341553,
      "learning_rate": 2.7354615288287198e-05,
      "loss": 2.9244,
      "step": 198841
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.847928047180176,
      "learning_rate": 2.7352908760040582e-05,
      "loss": 2.9911,
      "step": 198842
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.7631306648254395,
      "learning_rate": 2.7351202282484152e-05,
      "loss": 3.3182,
      "step": 198843
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.197760581970215,
      "learning_rate": 2.734949585561834e-05,
      "loss": 3.068,
      "step": 198844
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.913912296295166,
      "learning_rate": 2.7347789479443348e-05,
      "loss": 2.917,
      "step": 198845
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.435162544250488,
      "learning_rate": 2.7346083153959608e-05,
      "loss": 3.0401,
      "step": 198846
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9712107181549072,
      "learning_rate": 2.7344376879167317e-05,
      "loss": 2.9247,
      "step": 198847
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1653313636779785,
      "learning_rate": 2.734267065506698e-05,
      "loss": 2.7864,
      "step": 198848
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.899000644683838,
      "learning_rate": 2.7340964481658654e-05,
      "loss": 2.9227,
      "step": 198849
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0470328330993652,
      "learning_rate": 2.7339258358942852e-05,
      "loss": 3.1895,
      "step": 198850
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6600916385650635,
      "learning_rate": 2.7337552286919762e-05,
      "loss": 3.1288,
      "step": 198851
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.264465093612671,
      "learning_rate": 2.7335846265589823e-05,
      "loss": 2.8987,
      "step": 198852
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.608901262283325,
      "learning_rate": 2.7334140294953267e-05,
      "loss": 2.8269,
      "step": 198853
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4394283294677734,
      "learning_rate": 2.7332434375010494e-05,
      "loss": 2.9938,
      "step": 198854
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.873350143432617,
      "learning_rate": 2.7330728505761735e-05,
      "loss": 2.8528,
      "step": 198855
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4983363151550293,
      "learning_rate": 2.7329022687207356e-05,
      "loss": 2.9939,
      "step": 198856
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.729194402694702,
      "learning_rate": 2.7327316919347632e-05,
      "loss": 3.0034,
      "step": 198857
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.328065872192383,
      "learning_rate": 2.7325611202182917e-05,
      "loss": 2.9798,
      "step": 198858
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9545323848724365,
      "learning_rate": 2.7323905535713487e-05,
      "loss": 2.8037,
      "step": 198859
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9745428562164307,
      "learning_rate": 2.7322199919939734e-05,
      "loss": 2.6423,
      "step": 198860
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.851881980895996,
      "learning_rate": 2.7320494354861967e-05,
      "loss": 2.9673,
      "step": 198861
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.021276950836182,
      "learning_rate": 2.7318788840480443e-05,
      "loss": 2.8071,
      "step": 198862
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.188152551651001,
      "learning_rate": 2.7317083376795434e-05,
      "loss": 2.8265,
      "step": 198863
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.168304920196533,
      "learning_rate": 2.7315377963807406e-05,
      "loss": 2.9959,
      "step": 198864
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1545157432556152,
      "learning_rate": 2.7313672601516523e-05,
      "loss": 2.9503,
      "step": 198865
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6944069862365723,
      "learning_rate": 2.731196728992322e-05,
      "loss": 3.1035,
      "step": 198866
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.340972423553467,
      "learning_rate": 2.7310262029027797e-05,
      "loss": 3.0692,
      "step": 198867
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.213090419769287,
      "learning_rate": 2.7308556818830485e-05,
      "loss": 2.7715,
      "step": 198868
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3791823387145996,
      "learning_rate": 2.7306851659331687e-05,
      "loss": 2.9089,
      "step": 198869
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9538443088531494,
      "learning_rate": 2.730514655053173e-05,
      "loss": 2.7828,
      "step": 198870
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.136413812637329,
      "learning_rate": 2.7303441492430822e-05,
      "loss": 2.8546,
      "step": 198871
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.831648588180542,
      "learning_rate": 2.730173648502939e-05,
      "loss": 2.9978,
      "step": 198872
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0693562030792236,
      "learning_rate": 2.730003152832774e-05,
      "loss": 2.9849,
      "step": 198873
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0061941146850586,
      "learning_rate": 2.7298326622326094e-05,
      "loss": 2.7667,
      "step": 198874
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.327620267868042,
      "learning_rate": 2.7296621767024895e-05,
      "loss": 2.939,
      "step": 198875
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.5015668869018555,
      "learning_rate": 2.7294916962424407e-05,
      "loss": 2.9787,
      "step": 198876
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.312300205230713,
      "learning_rate": 2.7293212208524862e-05,
      "loss": 3.0127,
      "step": 198877
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3729090690612793,
      "learning_rate": 2.7291507505326758e-05,
      "loss": 2.9965,
      "step": 198878
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8488802909851074,
      "learning_rate": 2.7289802852830267e-05,
      "loss": 2.9318,
      "step": 198879
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7713136672973633,
      "learning_rate": 2.728809825103572e-05,
      "loss": 2.798,
      "step": 198880
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.961066961288452,
      "learning_rate": 2.728639369994351e-05,
      "loss": 3.1873,
      "step": 198881
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.9732608795166016,
      "learning_rate": 2.7284689199553844e-05,
      "loss": 2.6424,
      "step": 198882
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.935817241668701,
      "learning_rate": 2.7282984749867154e-05,
      "loss": 2.9718,
      "step": 198883
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0009753704071045,
      "learning_rate": 2.728128035088374e-05,
      "loss": 3.0459,
      "step": 198884
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.489017963409424,
      "learning_rate": 2.727957600260383e-05,
      "loss": 2.7478,
      "step": 198885
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1525728702545166,
      "learning_rate": 2.7277871705027764e-05,
      "loss": 2.8511,
      "step": 198886
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9328465461730957,
      "learning_rate": 2.7276167458155972e-05,
      "loss": 2.8454,
      "step": 198887
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7113752365112305,
      "learning_rate": 2.7274463261988588e-05,
      "loss": 3.0112,
      "step": 198888
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.117314100265503,
      "learning_rate": 2.727275911652611e-05,
      "loss": 2.842,
      "step": 198889
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7537412643432617,
      "learning_rate": 2.727105502176877e-05,
      "loss": 2.8212,
      "step": 198890
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.77146577835083,
      "learning_rate": 2.7269350977716907e-05,
      "loss": 2.811,
      "step": 198891
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.367824077606201,
      "learning_rate": 2.726764698437075e-05,
      "loss": 2.813,
      "step": 198892
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.8091092109680176,
      "learning_rate": 2.7265943041730732e-05,
      "loss": 2.8092,
      "step": 198893
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.971477031707764,
      "learning_rate": 2.7264239149797084e-05,
      "loss": 2.6896,
      "step": 198894
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2818899154663086,
      "learning_rate": 2.726253530857021e-05,
      "loss": 2.6802,
      "step": 198895
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2194013595581055,
      "learning_rate": 2.7260831518050375e-05,
      "loss": 2.7793,
      "step": 198896
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.851653575897217,
      "learning_rate": 2.725912777823791e-05,
      "loss": 3.0847,
      "step": 198897
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.303343772888184,
      "learning_rate": 2.725742408913305e-05,
      "loss": 2.9988,
      "step": 198898
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.751987934112549,
      "learning_rate": 2.7255720450736262e-05,
      "loss": 2.8697,
      "step": 198899
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3062894344329834,
      "learning_rate": 2.7254016863047712e-05,
      "loss": 2.6757,
      "step": 198900
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8055102825164795,
      "learning_rate": 2.7252313326067832e-05,
      "loss": 3.0493,
      "step": 198901
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.795962333679199,
      "learning_rate": 2.7250609839796922e-05,
      "loss": 2.9883,
      "step": 198902
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8489208221435547,
      "learning_rate": 2.7248906404235248e-05,
      "loss": 2.871,
      "step": 198903
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6625430583953857,
      "learning_rate": 2.7247203019383113e-05,
      "loss": 3.2199,
      "step": 198904
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.4693827629089355,
      "learning_rate": 2.7245499685240912e-05,
      "loss": 2.7756,
      "step": 198905
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.799034833908081,
      "learning_rate": 2.7243796401808883e-05,
      "loss": 2.6915,
      "step": 198906
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7907021045684814,
      "learning_rate": 2.724209316908742e-05,
      "loss": 3.0002,
      "step": 198907
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6530821323394775,
      "learning_rate": 2.724038998707683e-05,
      "loss": 2.9695,
      "step": 198908
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0861752033233643,
      "learning_rate": 2.723868685577737e-05,
      "loss": 2.8338,
      "step": 198909
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6711935997009277,
      "learning_rate": 2.7236983775189348e-05,
      "loss": 3.1445,
      "step": 198910
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.597137689590454,
      "learning_rate": 2.723528074531316e-05,
      "loss": 2.9097,
      "step": 198911
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9544386863708496,
      "learning_rate": 2.723357776614904e-05,
      "loss": 2.9641,
      "step": 198912
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.966669797897339,
      "learning_rate": 2.723187483769739e-05,
      "loss": 3.1099,
      "step": 198913
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.9797515869140625,
      "learning_rate": 2.7230171959958434e-05,
      "loss": 2.9136,
      "step": 198914
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9106788635253906,
      "learning_rate": 2.722846913293265e-05,
      "loss": 2.6314,
      "step": 198915
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.186052083969116,
      "learning_rate": 2.7226766356620134e-05,
      "loss": 2.8799,
      "step": 198916
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.908390760421753,
      "learning_rate": 2.7225063631021348e-05,
      "loss": 2.7523,
      "step": 198917
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1106719970703125,
      "learning_rate": 2.7223360956136532e-05,
      "loss": 2.8919,
      "step": 198918
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.4901955127716064,
      "learning_rate": 2.7221658331966113e-05,
      "loss": 2.9502,
      "step": 198919
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.682093858718872,
      "learning_rate": 2.721995575851026e-05,
      "loss": 2.9817,
      "step": 198920
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8567473888397217,
      "learning_rate": 2.721825323576947e-05,
      "loss": 2.8932,
      "step": 198921
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8384578227996826,
      "learning_rate": 2.7216550763743882e-05,
      "loss": 3.0515,
      "step": 198922
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3089492321014404,
      "learning_rate": 2.7214848342433893e-05,
      "loss": 2.9398,
      "step": 198923
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.805483102798462,
      "learning_rate": 2.72131459718398e-05,
      "loss": 2.7144,
      "step": 198924
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.017672061920166,
      "learning_rate": 2.7211443651961974e-05,
      "loss": 2.9977,
      "step": 198925
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1034209728240967,
      "learning_rate": 2.720974138280061e-05,
      "loss": 2.8842,
      "step": 198926
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1778969764709473,
      "learning_rate": 2.7208039164356245e-05,
      "loss": 2.8267,
      "step": 198927
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9477686882019043,
      "learning_rate": 2.7206336996628943e-05,
      "loss": 3.0288,
      "step": 198928
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.62109637260437,
      "learning_rate": 2.720463487961917e-05,
      "loss": 2.9707,
      "step": 198929
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8143346309661865,
      "learning_rate": 2.720293281332716e-05,
      "loss": 2.9743,
      "step": 198930
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.112837076187134,
      "learning_rate": 2.7201230797753348e-05,
      "loss": 2.9126,
      "step": 198931
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.872453212738037,
      "learning_rate": 2.7199528832897898e-05,
      "loss": 2.9176,
      "step": 198932
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.642744541168213,
      "learning_rate": 2.719782691876131e-05,
      "loss": 2.838,
      "step": 198933
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1146507263183594,
      "learning_rate": 2.7196125055343686e-05,
      "loss": 2.8532,
      "step": 198934
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.863406181335449,
      "learning_rate": 2.7194423242645525e-05,
      "loss": 2.6725,
      "step": 198935
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.770946979522705,
      "learning_rate": 2.719272148066699e-05,
      "loss": 3.0824,
      "step": 198936
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4196009635925293,
      "learning_rate": 2.719101976940855e-05,
      "loss": 2.9363,
      "step": 198937
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.781829833984375,
      "learning_rate": 2.7189318108870405e-05,
      "loss": 3.0137,
      "step": 198938
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.900573492050171,
      "learning_rate": 2.7187616499053023e-05,
      "loss": 2.7984,
      "step": 198939
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0491280555725098,
      "learning_rate": 2.71859149399565e-05,
      "loss": 2.9699,
      "step": 198940
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7759697437286377,
      "learning_rate": 2.7184213431581303e-05,
      "loss": 2.9064,
      "step": 198941
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0624523162841797,
      "learning_rate": 2.7182511973927667e-05,
      "loss": 3.0326,
      "step": 198942
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5958807468414307,
      "learning_rate": 2.718081056699599e-05,
      "loss": 3.061,
      "step": 198943
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0301315784454346,
      "learning_rate": 2.717910921078651e-05,
      "loss": 2.7198,
      "step": 198944
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.581428050994873,
      "learning_rate": 2.7177407905299653e-05,
      "loss": 3.1643,
      "step": 198945
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5534279346466064,
      "learning_rate": 2.7175706650535657e-05,
      "loss": 2.7801,
      "step": 198946
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.4017865657806396,
      "learning_rate": 2.7174005446494853e-05,
      "loss": 2.8791,
      "step": 198947
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6424901485443115,
      "learning_rate": 2.717230429317747e-05,
      "loss": 2.9724,
      "step": 198948
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.0147833824157715,
      "learning_rate": 2.7170603190583984e-05,
      "loss": 2.9688,
      "step": 198949
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6327357292175293,
      "learning_rate": 2.7168902138714556e-05,
      "loss": 2.8695,
      "step": 198950
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.393345594406128,
      "learning_rate": 2.716720113756965e-05,
      "loss": 2.7115,
      "step": 198951
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0030243396759033,
      "learning_rate": 2.7165500187149503e-05,
      "loss": 2.8879,
      "step": 198952
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6779415607452393,
      "learning_rate": 2.7163799287454413e-05,
      "loss": 2.9877,
      "step": 198953
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4583845138549805,
      "learning_rate": 2.716209843848475e-05,
      "loss": 3.0801,
      "step": 198954
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.012340545654297,
      "learning_rate": 2.7160397640240804e-05,
      "loss": 2.8261,
      "step": 198955
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.344557285308838,
      "learning_rate": 2.7158696892722854e-05,
      "loss": 3.1119,
      "step": 198956
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.318143129348755,
      "learning_rate": 2.715699619593129e-05,
      "loss": 3.0246,
      "step": 198957
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.712282657623291,
      "learning_rate": 2.7155295549866387e-05,
      "loss": 2.9616,
      "step": 198958
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.921889305114746,
      "learning_rate": 2.7153594954528436e-05,
      "loss": 3.0734,
      "step": 198959
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0576043128967285,
      "learning_rate": 2.7151894409917807e-05,
      "loss": 2.9444,
      "step": 198960
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.481370687484741,
      "learning_rate": 2.7150193916034803e-05,
      "loss": 2.7769,
      "step": 198961
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4519941806793213,
      "learning_rate": 2.7148493472879684e-05,
      "loss": 3.1523,
      "step": 198962
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.607192277908325,
      "learning_rate": 2.7146793080452856e-05,
      "loss": 3.042,
      "step": 198963
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.275654315948486,
      "learning_rate": 2.714509273875458e-05,
      "loss": 2.9263,
      "step": 198964
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.954242706298828,
      "learning_rate": 2.714339244778513e-05,
      "loss": 2.9591,
      "step": 198965
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.073291301727295,
      "learning_rate": 2.714169220754493e-05,
      "loss": 2.7341,
      "step": 198966
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8179867267608643,
      "learning_rate": 2.7139992018034184e-05,
      "loss": 3.0109,
      "step": 198967
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3552944660186768,
      "learning_rate": 2.7138291879253328e-05,
      "loss": 3.0016,
      "step": 198968
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.936027765274048,
      "learning_rate": 2.7136591791202623e-05,
      "loss": 3.1621,
      "step": 198969
    },
    {
      "epoch": 2.59,
      "grad_norm": 5.228043556213379,
      "learning_rate": 2.713489175388237e-05,
      "loss": 2.8645,
      "step": 198970
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.336561441421509,
      "learning_rate": 2.7133191767292807e-05,
      "loss": 2.9389,
      "step": 198971
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.967639684677124,
      "learning_rate": 2.7131491831434427e-05,
      "loss": 2.7535,
      "step": 198972
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9388277530670166,
      "learning_rate": 2.71297919463074e-05,
      "loss": 2.8632,
      "step": 198973
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.968761920928955,
      "learning_rate": 2.7128092111912124e-05,
      "loss": 2.8153,
      "step": 198974
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.931777238845825,
      "learning_rate": 2.7126392328248902e-05,
      "loss": 2.8151,
      "step": 198975
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.087621688842773,
      "learning_rate": 2.712469259531803e-05,
      "loss": 2.8235,
      "step": 198976
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.031529426574707,
      "learning_rate": 2.7122992913119778e-05,
      "loss": 3.025,
      "step": 198977
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8250370025634766,
      "learning_rate": 2.7121293281654543e-05,
      "loss": 2.8989,
      "step": 198978
    },
    {
      "epoch": 2.59,
      "grad_norm": 5.3186798095703125,
      "learning_rate": 2.7119593700922594e-05,
      "loss": 2.9278,
      "step": 198979
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.128638744354248,
      "learning_rate": 2.7117894170924293e-05,
      "loss": 2.9149,
      "step": 198980
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3923869132995605,
      "learning_rate": 2.7116194691659878e-05,
      "loss": 2.9152,
      "step": 198981
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.216854095458984,
      "learning_rate": 2.7114495263129808e-05,
      "loss": 2.9637,
      "step": 198982
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.928600311279297,
      "learning_rate": 2.7112795885334227e-05,
      "loss": 2.8624,
      "step": 198983
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.439972400665283,
      "learning_rate": 2.711109655827356e-05,
      "loss": 2.9599,
      "step": 198984
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9169223308563232,
      "learning_rate": 2.7109397281948042e-05,
      "loss": 2.8902,
      "step": 198985
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.726111888885498,
      "learning_rate": 2.7107698056358105e-05,
      "loss": 3.234,
      "step": 198986
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.020118236541748,
      "learning_rate": 2.710599888150392e-05,
      "loss": 3.0965,
      "step": 198987
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.968644142150879,
      "learning_rate": 2.7104299757386017e-05,
      "loss": 2.7228,
      "step": 198988
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9533538818359375,
      "learning_rate": 2.7102600684004462e-05,
      "loss": 2.9466,
      "step": 198989
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8279850482940674,
      "learning_rate": 2.710090166135972e-05,
      "loss": 2.721,
      "step": 198990
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1390326023101807,
      "learning_rate": 2.7099202689452027e-05,
      "loss": 2.8418,
      "step": 198991
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4562788009643555,
      "learning_rate": 2.7097503768281815e-05,
      "loss": 2.9949,
      "step": 198992
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3685286045074463,
      "learning_rate": 2.709580489784925e-05,
      "loss": 3.036,
      "step": 198993
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5713844299316406,
      "learning_rate": 2.709410607815483e-05,
      "loss": 2.7514,
      "step": 198994
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.004258155822754,
      "learning_rate": 2.709240730919866e-05,
      "loss": 2.8877,
      "step": 198995
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7103137969970703,
      "learning_rate": 2.7090708590981237e-05,
      "loss": 3.138,
      "step": 198996
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8265175819396973,
      "learning_rate": 2.708900992350276e-05,
      "loss": 2.918,
      "step": 198997
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.215287446975708,
      "learning_rate": 2.7087311306763625e-05,
      "loss": 2.8861,
      "step": 198998
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6947193145751953,
      "learning_rate": 2.708561274076404e-05,
      "loss": 3.1632,
      "step": 198999
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0498428344726562,
      "learning_rate": 2.70839142255045e-05,
      "loss": 2.833,
      "step": 199000
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.978429079055786,
      "learning_rate": 2.7082215760985138e-05,
      "loss": 2.9823,
      "step": 199001
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.321293830871582,
      "learning_rate": 2.7080517347206353e-05,
      "loss": 3.1642,
      "step": 199002
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.1189117431640625,
      "learning_rate": 2.707881898416845e-05,
      "loss": 2.9631,
      "step": 199003
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7574408054351807,
      "learning_rate": 2.7077120671871755e-05,
      "loss": 2.8924,
      "step": 199004
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.636911392211914,
      "learning_rate": 2.7075422410316538e-05,
      "loss": 2.7303,
      "step": 199005
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8708279132843018,
      "learning_rate": 2.7073724199503266e-05,
      "loss": 2.9237,
      "step": 199006
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.338047742843628,
      "learning_rate": 2.707202603943204e-05,
      "loss": 2.9463,
      "step": 199007
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.887612819671631,
      "learning_rate": 2.7070327930103286e-05,
      "loss": 2.8707,
      "step": 199008
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.925957679748535,
      "learning_rate": 2.7068629871517312e-05,
      "loss": 3.0602,
      "step": 199009
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7196149826049805,
      "learning_rate": 2.7066931863674446e-05,
      "loss": 2.9668,
      "step": 199010
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.124532222747803,
      "learning_rate": 2.7065233906574955e-05,
      "loss": 2.7984,
      "step": 199011
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.15302038192749,
      "learning_rate": 2.7063536000219278e-05,
      "loss": 2.784,
      "step": 199012
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.113616943359375,
      "learning_rate": 2.7061838144607572e-05,
      "loss": 2.973,
      "step": 199013
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9657506942749023,
      "learning_rate": 2.7060140339740244e-05,
      "loss": 2.8225,
      "step": 199014
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.785994291305542,
      "learning_rate": 2.7058442585617522e-05,
      "loss": 2.7214,
      "step": 199015
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.33901047706604,
      "learning_rate": 2.7056744882239877e-05,
      "loss": 3.0246,
      "step": 199016
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.416074275970459,
      "learning_rate": 2.7055047229607474e-05,
      "loss": 2.7915,
      "step": 199017
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.9482131004333496,
      "learning_rate": 2.7053349627720777e-05,
      "loss": 2.6995,
      "step": 199018
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.680086612701416,
      "learning_rate": 2.7051652076579923e-05,
      "loss": 2.8875,
      "step": 199019
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.1549224853515625,
      "learning_rate": 2.7049954576185375e-05,
      "loss": 3.1194,
      "step": 199020
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0552947521209717,
      "learning_rate": 2.7048257126537333e-05,
      "loss": 2.9088,
      "step": 199021
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.7603516578674316,
      "learning_rate": 2.704655972763623e-05,
      "loss": 3.1107,
      "step": 199022
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.672624111175537,
      "learning_rate": 2.704486237948227e-05,
      "loss": 3.0433,
      "step": 199023
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.648543357849121,
      "learning_rate": 2.7043165082075914e-05,
      "loss": 3.135,
      "step": 199024
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.929002285003662,
      "learning_rate": 2.7041467835417298e-05,
      "loss": 3.0545,
      "step": 199025
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.388730049133301,
      "learning_rate": 2.7039770639506886e-05,
      "loss": 2.8469,
      "step": 199026
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4656407833099365,
      "learning_rate": 2.7038073494344848e-05,
      "loss": 2.8509,
      "step": 199027
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.511453866958618,
      "learning_rate": 2.7036376399931646e-05,
      "loss": 2.9761,
      "step": 199028
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.995208263397217,
      "learning_rate": 2.703467935626752e-05,
      "loss": 2.9766,
      "step": 199029
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0231521129608154,
      "learning_rate": 2.7032982363352795e-05,
      "loss": 2.8675,
      "step": 199030
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5886130332946777,
      "learning_rate": 2.7031285421187843e-05,
      "loss": 3.223,
      "step": 199031
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.70955753326416,
      "learning_rate": 2.7029588529772894e-05,
      "loss": 2.9968,
      "step": 199032
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.96366810798645,
      "learning_rate": 2.702789168910825e-05,
      "loss": 2.9982,
      "step": 199033
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.5099825859069824,
      "learning_rate": 2.7026194899194343e-05,
      "loss": 2.9746,
      "step": 199034
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.776643991470337,
      "learning_rate": 2.7024498160031337e-05,
      "loss": 3.2053,
      "step": 199035
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5723116397857666,
      "learning_rate": 2.7022801471619703e-05,
      "loss": 3.2012,
      "step": 199036
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.856208324432373,
      "learning_rate": 2.7021104833959707e-05,
      "loss": 2.8365,
      "step": 199037
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.224318265914917,
      "learning_rate": 2.701940824705161e-05,
      "loss": 3.109,
      "step": 199038
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0023996829986572,
      "learning_rate": 2.701771171089572e-05,
      "loss": 2.8224,
      "step": 199039
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.748246431350708,
      "learning_rate": 2.7016015225492427e-05,
      "loss": 2.7947,
      "step": 199040
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.288484811782837,
      "learning_rate": 2.701431879084194e-05,
      "loss": 3.0123,
      "step": 199041
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.090630292892456,
      "learning_rate": 2.7012622406944718e-05,
      "loss": 2.9499,
      "step": 199042
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1374075412750244,
      "learning_rate": 2.7010926073801032e-05,
      "loss": 2.7306,
      "step": 199043
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.3662807941436768,
      "learning_rate": 2.700922979141108e-05,
      "loss": 2.9502,
      "step": 199044
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.4285292625427246,
      "learning_rate": 2.700753355977533e-05,
      "loss": 2.8977,
      "step": 199045
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.269698143005371,
      "learning_rate": 2.700583737889401e-05,
      "loss": 2.8594,
      "step": 199046
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9308598041534424,
      "learning_rate": 2.700414124876743e-05,
      "loss": 2.8806,
      "step": 199047
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.934703826904297,
      "learning_rate": 2.7002445169395982e-05,
      "loss": 2.8033,
      "step": 199048
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.117077589035034,
      "learning_rate": 2.700074914077993e-05,
      "loss": 2.8904,
      "step": 199049
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8524880409240723,
      "learning_rate": 2.699905316291955e-05,
      "loss": 3.0357,
      "step": 199050
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6979174613952637,
      "learning_rate": 2.6997357235815232e-05,
      "loss": 2.9291,
      "step": 199051
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.830333948135376,
      "learning_rate": 2.6995661359467213e-05,
      "loss": 2.9396,
      "step": 199052
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9735934734344482,
      "learning_rate": 2.6993965533875895e-05,
      "loss": 3.0831,
      "step": 199053
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.018259048461914,
      "learning_rate": 2.6992269759041573e-05,
      "loss": 2.6407,
      "step": 199054
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.914984941482544,
      "learning_rate": 2.6990574034964552e-05,
      "loss": 3.2986,
      "step": 199055
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.696580410003662,
      "learning_rate": 2.6988878361645062e-05,
      "loss": 3.0671,
      "step": 199056
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6526472568511963,
      "learning_rate": 2.6987182739083533e-05,
      "loss": 2.9806,
      "step": 199057
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8700222969055176,
      "learning_rate": 2.6985487167280207e-05,
      "loss": 2.8156,
      "step": 199058
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.85624623298645,
      "learning_rate": 2.6983791646235475e-05,
      "loss": 3.0207,
      "step": 199059
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.299002170562744,
      "learning_rate": 2.698209617594964e-05,
      "loss": 2.6254,
      "step": 199060
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7092835903167725,
      "learning_rate": 2.6980400756422936e-05,
      "loss": 3.0037,
      "step": 199061
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.86624813079834,
      "learning_rate": 2.697870538765573e-05,
      "loss": 2.928,
      "step": 199062
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3022589683532715,
      "learning_rate": 2.697701006964835e-05,
      "loss": 2.8417,
      "step": 199063
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4970717430114746,
      "learning_rate": 2.697531480240107e-05,
      "loss": 2.9711,
      "step": 199064
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.597083568572998,
      "learning_rate": 2.6973619585914252e-05,
      "loss": 2.9751,
      "step": 199065
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.690664052963257,
      "learning_rate": 2.6971924420188163e-05,
      "loss": 3.1034,
      "step": 199066
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.745518684387207,
      "learning_rate": 2.6970229305223268e-05,
      "loss": 3.106,
      "step": 199067
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.5452659130096436,
      "learning_rate": 2.6968534241019634e-05,
      "loss": 3.1426,
      "step": 199068
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.011685609817505,
      "learning_rate": 2.6966839227577764e-05,
      "loss": 2.978,
      "step": 199069
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2941436767578125,
      "learning_rate": 2.6965144264897852e-05,
      "loss": 2.9737,
      "step": 199070
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1110637187957764,
      "learning_rate": 2.6963449352980338e-05,
      "loss": 2.9658,
      "step": 199071
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8682687282562256,
      "learning_rate": 2.6961754491825416e-05,
      "loss": 3.1341,
      "step": 199072
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0427463054656982,
      "learning_rate": 2.6960059681433555e-05,
      "loss": 2.9055,
      "step": 199073
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7005319595336914,
      "learning_rate": 2.6958364921804853e-05,
      "loss": 2.9936,
      "step": 199074
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.8623342514038086,
      "learning_rate": 2.6956670212939845e-05,
      "loss": 2.7947,
      "step": 199075
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.747406482696533,
      "learning_rate": 2.6954975554838666e-05,
      "loss": 2.854,
      "step": 199076
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3551766872406006,
      "learning_rate": 2.6953280947501745e-05,
      "loss": 2.9947,
      "step": 199077
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0299646854400635,
      "learning_rate": 2.6951586390929347e-05,
      "loss": 2.8659,
      "step": 199078
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.135540008544922,
      "learning_rate": 2.6949891885121877e-05,
      "loss": 2.98,
      "step": 199079
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0247395038604736,
      "learning_rate": 2.6948197430079497e-05,
      "loss": 2.8953,
      "step": 199080
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.148101329803467,
      "learning_rate": 2.6946503025802645e-05,
      "loss": 2.8334,
      "step": 199081
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5913426876068115,
      "learning_rate": 2.694480867229152e-05,
      "loss": 3.0627,
      "step": 199082
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.687535047531128,
      "learning_rate": 2.694311436954658e-05,
      "loss": 2.6434,
      "step": 199083
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6567065715789795,
      "learning_rate": 2.6941420117568e-05,
      "loss": 2.8146,
      "step": 199084
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.875974416732788,
      "learning_rate": 2.6939725916356313e-05,
      "loss": 2.7243,
      "step": 199085
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.786677598953247,
      "learning_rate": 2.6938031765911548e-05,
      "loss": 2.9816,
      "step": 199086
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.908329963684082,
      "learning_rate": 2.6936337666234208e-05,
      "loss": 2.9272,
      "step": 199087
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.6099236011505127,
      "learning_rate": 2.6934643617324526e-05,
      "loss": 2.7952,
      "step": 199088
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.543518304824829,
      "learning_rate": 2.6932949619182865e-05,
      "loss": 2.9479,
      "step": 199089
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.123749256134033,
      "learning_rate": 2.6931255671809492e-05,
      "loss": 2.721,
      "step": 199090
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8217415809631348,
      "learning_rate": 2.692956177520488e-05,
      "loss": 2.87,
      "step": 199091
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.092438697814941,
      "learning_rate": 2.6927867929369086e-05,
      "loss": 2.8662,
      "step": 199092
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.803107738494873,
      "learning_rate": 2.6926174134302615e-05,
      "loss": 2.8962,
      "step": 199093
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.4530556201934814,
      "learning_rate": 2.6924480390005666e-05,
      "loss": 2.8617,
      "step": 199094
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.239100456237793,
      "learning_rate": 2.692278669647864e-05,
      "loss": 3.0038,
      "step": 199095
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7320477962493896,
      "learning_rate": 2.6921093053721797e-05,
      "loss": 2.8784,
      "step": 199096
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.7682483196258545,
      "learning_rate": 2.6919399461735582e-05,
      "loss": 2.9811,
      "step": 199097
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2850332260131836,
      "learning_rate": 2.6917705920520115e-05,
      "loss": 2.8718,
      "step": 199098
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7464632987976074,
      "learning_rate": 2.691601243007584e-05,
      "loss": 2.6707,
      "step": 199099
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.812077522277832,
      "learning_rate": 2.691431899040295e-05,
      "loss": 3.0798,
      "step": 199100
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.653977394104004,
      "learning_rate": 2.6912625601501947e-05,
      "loss": 2.8246,
      "step": 199101
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.090701580047607,
      "learning_rate": 2.6910932263372963e-05,
      "loss": 2.8301,
      "step": 199102
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.151585578918457,
      "learning_rate": 2.69092389760165e-05,
      "loss": 2.8312,
      "step": 199103
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.235899448394775,
      "learning_rate": 2.6907545739432657e-05,
      "loss": 2.7154,
      "step": 199104
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.895822763442993,
      "learning_rate": 2.6905852553621898e-05,
      "loss": 2.9822,
      "step": 199105
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.706952095031738,
      "learning_rate": 2.690415941858446e-05,
      "loss": 2.9127,
      "step": 199106
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.074954509735107,
      "learning_rate": 2.690246633432074e-05,
      "loss": 3.1074,
      "step": 199107
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.230804443359375,
      "learning_rate": 2.690077330083097e-05,
      "loss": 2.9688,
      "step": 199108
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3850574493408203,
      "learning_rate": 2.6899080318115553e-05,
      "loss": 2.9556,
      "step": 199109
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7195260524749756,
      "learning_rate": 2.6897387386174717e-05,
      "loss": 3.0512,
      "step": 199110
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7596373558044434,
      "learning_rate": 2.6895694505008837e-05,
      "loss": 2.8502,
      "step": 199111
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.644639730453491,
      "learning_rate": 2.6894001674618137e-05,
      "loss": 2.9736,
      "step": 199112
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8055808544158936,
      "learning_rate": 2.6892308895003088e-05,
      "loss": 3.1082,
      "step": 199113
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0347445011138916,
      "learning_rate": 2.6890616166163826e-05,
      "loss": 2.9007,
      "step": 199114
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5563418865203857,
      "learning_rate": 2.688892348810081e-05,
      "loss": 2.7264,
      "step": 199115
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.899953842163086,
      "learning_rate": 2.6887230860814313e-05,
      "loss": 2.755,
      "step": 199116
    },
    {
      "epoch": 2.59,
      "grad_norm": 5.727809906005859,
      "learning_rate": 2.688553828430463e-05,
      "loss": 2.8212,
      "step": 199117
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.667764902114868,
      "learning_rate": 2.6883845758572e-05,
      "loss": 2.9142,
      "step": 199118
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0022945404052734,
      "learning_rate": 2.6882153283616914e-05,
      "loss": 2.8759,
      "step": 199119
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9557101726531982,
      "learning_rate": 2.6880460859439512e-05,
      "loss": 2.8332,
      "step": 199120
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0546555519104004,
      "learning_rate": 2.6878768486040258e-05,
      "loss": 3.0707,
      "step": 199121
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.007237672805786,
      "learning_rate": 2.6877076163419386e-05,
      "loss": 2.9753,
      "step": 199122
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.609804153442383,
      "learning_rate": 2.6875383891577197e-05,
      "loss": 2.7482,
      "step": 199123
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.760020732879639,
      "learning_rate": 2.687369167051402e-05,
      "loss": 2.6404,
      "step": 199124
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.8243284225463867,
      "learning_rate": 2.6871999500230225e-05,
      "loss": 2.8831,
      "step": 199125
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8038575649261475,
      "learning_rate": 2.6870307380726007e-05,
      "loss": 3.1333,
      "step": 199126
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6585466861724854,
      "learning_rate": 2.6868615312001808e-05,
      "loss": 2.6432,
      "step": 199127
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8998820781707764,
      "learning_rate": 2.6866923294057884e-05,
      "loss": 2.7813,
      "step": 199128
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3124773502349854,
      "learning_rate": 2.6865231326894544e-05,
      "loss": 2.6977,
      "step": 199129
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.55582594871521,
      "learning_rate": 2.6863539410512114e-05,
      "loss": 2.7258,
      "step": 199130
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2442262172698975,
      "learning_rate": 2.686184754491093e-05,
      "loss": 2.9491,
      "step": 199131
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0418999195098877,
      "learning_rate": 2.686015573009126e-05,
      "loss": 2.8887,
      "step": 199132
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.512812614440918,
      "learning_rate": 2.6858463966053466e-05,
      "loss": 2.8872,
      "step": 199133
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.021768093109131,
      "learning_rate": 2.6856772252797854e-05,
      "loss": 2.9627,
      "step": 199134
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.5873076915740967,
      "learning_rate": 2.6855080590324652e-05,
      "loss": 2.8023,
      "step": 199135
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.757028341293335,
      "learning_rate": 2.6853388978634295e-05,
      "loss": 2.7759,
      "step": 199136
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1957600116729736,
      "learning_rate": 2.685169741772708e-05,
      "loss": 2.8469,
      "step": 199137
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.417524814605713,
      "learning_rate": 2.6850005907603212e-05,
      "loss": 2.7227,
      "step": 199138
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.744227886199951,
      "learning_rate": 2.6848314448263154e-05,
      "loss": 2.9877,
      "step": 199139
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.091094493865967,
      "learning_rate": 2.6846623039707137e-05,
      "loss": 3.1209,
      "step": 199140
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.504598140716553,
      "learning_rate": 2.6844931681935434e-05,
      "loss": 2.7231,
      "step": 199141
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2525036334991455,
      "learning_rate": 2.6843240374948472e-05,
      "loss": 2.7805,
      "step": 199142
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9106650352478027,
      "learning_rate": 2.684154911874645e-05,
      "loss": 2.8373,
      "step": 199143
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1087069511413574,
      "learning_rate": 2.683985791332981e-05,
      "loss": 3.0431,
      "step": 199144
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4936702251434326,
      "learning_rate": 2.6838166758698775e-05,
      "loss": 2.7297,
      "step": 199145
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7465004920959473,
      "learning_rate": 2.6836475654853685e-05,
      "loss": 2.8659,
      "step": 199146
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9742324352264404,
      "learning_rate": 2.68347846017948e-05,
      "loss": 2.8257,
      "step": 199147
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.854919195175171,
      "learning_rate": 2.683309359952256e-05,
      "loss": 2.647,
      "step": 199148
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.396977424621582,
      "learning_rate": 2.6831402648037126e-05,
      "loss": 3.0608,
      "step": 199149
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.850389003753662,
      "learning_rate": 2.6829711747338967e-05,
      "loss": 2.8293,
      "step": 199150
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3962910175323486,
      "learning_rate": 2.682802089742825e-05,
      "loss": 3.1229,
      "step": 199151
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5960259437561035,
      "learning_rate": 2.682633009830547e-05,
      "loss": 3.1078,
      "step": 199152
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5628774166107178,
      "learning_rate": 2.68246393499707e-05,
      "loss": 2.8898,
      "step": 199153
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0646657943725586,
      "learning_rate": 2.682294865242447e-05,
      "loss": 2.9934,
      "step": 199154
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1494479179382324,
      "learning_rate": 2.6821258005666947e-05,
      "loss": 3.1644,
      "step": 199155
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.179929256439209,
      "learning_rate": 2.6819567409698594e-05,
      "loss": 3.2761,
      "step": 199156
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.57991099357605,
      "learning_rate": 2.681787686451955e-05,
      "loss": 2.9121,
      "step": 199157
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.979835271835327,
      "learning_rate": 2.6816186370130343e-05,
      "loss": 2.9206,
      "step": 199158
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0229451656341553,
      "learning_rate": 2.6814495926531043e-05,
      "loss": 3.177,
      "step": 199159
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.571553945541382,
      "learning_rate": 2.6812805533722147e-05,
      "loss": 2.744,
      "step": 199160
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.7014920711517334,
      "learning_rate": 2.6811115191703857e-05,
      "loss": 3.251,
      "step": 199161
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.005746841430664,
      "learning_rate": 2.6809424900476573e-05,
      "loss": 3.0356,
      "step": 199162
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6877756118774414,
      "learning_rate": 2.6807734660040525e-05,
      "loss": 3.1556,
      "step": 199163
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9267592430114746,
      "learning_rate": 2.6806044470396214e-05,
      "loss": 2.8014,
      "step": 199164
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.017819881439209,
      "learning_rate": 2.6804354331543676e-05,
      "loss": 2.9562,
      "step": 199165
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.862848997116089,
      "learning_rate": 2.680266424348344e-05,
      "loss": 2.8786,
      "step": 199166
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6944496631622314,
      "learning_rate": 2.6800974206215675e-05,
      "loss": 2.9473,
      "step": 199167
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3935155868530273,
      "learning_rate": 2.6799284219740813e-05,
      "loss": 3.0067,
      "step": 199168
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.674020767211914,
      "learning_rate": 2.6797594284059088e-05,
      "loss": 3.1543,
      "step": 199169
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.775057315826416,
      "learning_rate": 2.679590439917093e-05,
      "loss": 2.5392,
      "step": 199170
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.818967819213867,
      "learning_rate": 2.679421456507648e-05,
      "loss": 2.8492,
      "step": 199171
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7766265869140625,
      "learning_rate": 2.6792524781776225e-05,
      "loss": 2.8099,
      "step": 199172
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.946049213409424,
      "learning_rate": 2.679083504927031e-05,
      "loss": 2.836,
      "step": 199173
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9776690006256104,
      "learning_rate": 2.6789145367559194e-05,
      "loss": 3.1451,
      "step": 199174
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7190353870391846,
      "learning_rate": 2.678745573664308e-05,
      "loss": 2.926,
      "step": 199175
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.922733783721924,
      "learning_rate": 2.6785766156522372e-05,
      "loss": 3.0444,
      "step": 199176
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8151891231536865,
      "learning_rate": 2.678407662719736e-05,
      "loss": 2.9071,
      "step": 199177
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.7583091259002686,
      "learning_rate": 2.678238714866835e-05,
      "loss": 2.9615,
      "step": 199178
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0373685359954834,
      "learning_rate": 2.6780697720935607e-05,
      "loss": 2.843,
      "step": 199179
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4419405460357666,
      "learning_rate": 2.6779008343999532e-05,
      "loss": 3.0565,
      "step": 199180
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.7154064178466797,
      "learning_rate": 2.6777319017860354e-05,
      "loss": 2.973,
      "step": 199181
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.785446882247925,
      "learning_rate": 2.6775629742518478e-05,
      "loss": 2.8361,
      "step": 199182
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8496360778808594,
      "learning_rate": 2.677394051797417e-05,
      "loss": 2.7485,
      "step": 199183
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6388087272644043,
      "learning_rate": 2.6772251344227726e-05,
      "loss": 3.0357,
      "step": 199184
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1879796981811523,
      "learning_rate": 2.6770562221279446e-05,
      "loss": 2.8675,
      "step": 199185
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.04974889755249,
      "learning_rate": 2.6768873149129732e-05,
      "loss": 2.9941,
      "step": 199186
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3613381385803223,
      "learning_rate": 2.6767184127778752e-05,
      "loss": 3.0135,
      "step": 199187
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1271238327026367,
      "learning_rate": 2.6765495157227e-05,
      "loss": 2.9311,
      "step": 199188
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.320821762084961,
      "learning_rate": 2.676380623747468e-05,
      "loss": 2.7729,
      "step": 199189
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.62678599357605,
      "learning_rate": 2.6762117368522164e-05,
      "loss": 2.958,
      "step": 199190
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.05073618888855,
      "learning_rate": 2.6760428550369638e-05,
      "loss": 2.9068,
      "step": 199191
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3334922790527344,
      "learning_rate": 2.675873978301758e-05,
      "loss": 2.8096,
      "step": 199192
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.066388130187988,
      "learning_rate": 2.675705106646615e-05,
      "loss": 2.8403,
      "step": 199193
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2927682399749756,
      "learning_rate": 2.6755362400715818e-05,
      "loss": 2.8545,
      "step": 199194
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6358044147491455,
      "learning_rate": 2.6753673785766818e-05,
      "loss": 2.8175,
      "step": 199195
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.6489598751068115,
      "learning_rate": 2.675198522161944e-05,
      "loss": 3.285,
      "step": 199196
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.275603294372559,
      "learning_rate": 2.6750296708273998e-05,
      "loss": 3.011,
      "step": 199197
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.434159278869629,
      "learning_rate": 2.6748608245730918e-05,
      "loss": 2.8511,
      "step": 199198
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2843985557556152,
      "learning_rate": 2.6746919833990332e-05,
      "loss": 3.1961,
      "step": 199199
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2493717670440674,
      "learning_rate": 2.674523147305271e-05,
      "loss": 2.8523,
      "step": 199200
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6678550243377686,
      "learning_rate": 2.6743543162918314e-05,
      "loss": 3.0546,
      "step": 199201
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8875300884246826,
      "learning_rate": 2.674185490358748e-05,
      "loss": 2.968,
      "step": 199202
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.892711639404297,
      "learning_rate": 2.6740166695060406e-05,
      "loss": 2.9792,
      "step": 199203
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.5524516105651855,
      "learning_rate": 2.673847853733756e-05,
      "loss": 2.862,
      "step": 199204
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.992011785507202,
      "learning_rate": 2.6736790430419108e-05,
      "loss": 3.1473,
      "step": 199205
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2032008171081543,
      "learning_rate": 2.6735102374305516e-05,
      "loss": 2.8221,
      "step": 199206
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.262876510620117,
      "learning_rate": 2.673341436899705e-05,
      "loss": 2.9602,
      "step": 199207
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0788187980651855,
      "learning_rate": 2.673172641449398e-05,
      "loss": 2.8592,
      "step": 199208
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.604938507080078,
      "learning_rate": 2.673003851079657e-05,
      "loss": 3.1301,
      "step": 199209
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.620619058609009,
      "learning_rate": 2.6728350657905286e-05,
      "loss": 2.803,
      "step": 199210
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9734673500061035,
      "learning_rate": 2.672666285582029e-05,
      "loss": 2.931,
      "step": 199211
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5780441761016846,
      "learning_rate": 2.6724975104542023e-05,
      "loss": 2.8059,
      "step": 199212
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9876277446746826,
      "learning_rate": 2.672328740407075e-05,
      "loss": 3.0082,
      "step": 199213
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.817290782928467,
      "learning_rate": 2.672159975440673e-05,
      "loss": 2.8903,
      "step": 199214
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.779391288757324,
      "learning_rate": 2.671991215555037e-05,
      "loss": 2.8545,
      "step": 199215
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.224679946899414,
      "learning_rate": 2.6718224607501903e-05,
      "loss": 3.0971,
      "step": 199216
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.836754322052002,
      "learning_rate": 2.671653711026166e-05,
      "loss": 2.8982,
      "step": 199217
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6879162788391113,
      "learning_rate": 2.6714849663830008e-05,
      "loss": 2.6884,
      "step": 199218
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.587158679962158,
      "learning_rate": 2.6713162268207243e-05,
      "loss": 2.9398,
      "step": 199219
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8335189819335938,
      "learning_rate": 2.6711474923393607e-05,
      "loss": 2.9774,
      "step": 199220
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.9267868995666504,
      "learning_rate": 2.670978762938949e-05,
      "loss": 2.6511,
      "step": 199221
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.85469126701355,
      "learning_rate": 2.67081003861952e-05,
      "loss": 2.977,
      "step": 199222
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.966259241104126,
      "learning_rate": 2.6706413193810992e-05,
      "loss": 3.2289,
      "step": 199223
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.211716890335083,
      "learning_rate": 2.6704726052237247e-05,
      "loss": 2.9014,
      "step": 199224
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.646979570388794,
      "learning_rate": 2.6703038961474287e-05,
      "loss": 2.8969,
      "step": 199225
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.107208728790283,
      "learning_rate": 2.670135192152232e-05,
      "loss": 2.8103,
      "step": 199226
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8722288608551025,
      "learning_rate": 2.669966493238177e-05,
      "loss": 2.8615,
      "step": 199227
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8983449935913086,
      "learning_rate": 2.6697977994052877e-05,
      "loss": 2.9089,
      "step": 199228
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.655325174331665,
      "learning_rate": 2.669629110653604e-05,
      "loss": 3.0544,
      "step": 199229
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.643707275390625,
      "learning_rate": 2.669460426983152e-05,
      "loss": 3.1119,
      "step": 199230
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.8032596111297607,
      "learning_rate": 2.6692917483939623e-05,
      "loss": 2.7282,
      "step": 199231
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.18107271194458,
      "learning_rate": 2.6691230748860614e-05,
      "loss": 2.8761,
      "step": 199232
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9882922172546387,
      "learning_rate": 2.6689544064594925e-05,
      "loss": 2.7683,
      "step": 199233
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.72810435295105,
      "learning_rate": 2.6687857431142757e-05,
      "loss": 2.9809,
      "step": 199234
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3046720027923584,
      "learning_rate": 2.668617084850454e-05,
      "loss": 2.9147,
      "step": 199235
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.609875202178955,
      "learning_rate": 2.6684484316680443e-05,
      "loss": 2.7593,
      "step": 199236
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.8048272132873535,
      "learning_rate": 2.6682797835671e-05,
      "loss": 2.8411,
      "step": 199237
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7703664302825928,
      "learning_rate": 2.6681111405476242e-05,
      "loss": 2.9203,
      "step": 199238
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9304423332214355,
      "learning_rate": 2.66794250260967e-05,
      "loss": 2.8462,
      "step": 199239
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.254718065261841,
      "learning_rate": 2.6677738697532582e-05,
      "loss": 3.0391,
      "step": 199240
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.848661422729492,
      "learning_rate": 2.6676052419784246e-05,
      "loss": 3.0097,
      "step": 199241
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.952404499053955,
      "learning_rate": 2.667436619285196e-05,
      "loss": 2.7258,
      "step": 199242
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2042486667633057,
      "learning_rate": 2.6672680016736092e-05,
      "loss": 2.9219,
      "step": 199243
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.096217632293701,
      "learning_rate": 2.6670993891436976e-05,
      "loss": 3.0786,
      "step": 199244
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.9776101112365723,
      "learning_rate": 2.6669307816954843e-05,
      "loss": 2.9079,
      "step": 199245
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0026702880859375,
      "learning_rate": 2.6667621793289994e-05,
      "loss": 3.023,
      "step": 199246
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.0156917572021484,
      "learning_rate": 2.6665935820442863e-05,
      "loss": 2.8211,
      "step": 199247
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9683239459991455,
      "learning_rate": 2.666424989841365e-05,
      "loss": 3.1825,
      "step": 199248
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.128551483154297,
      "learning_rate": 2.6662564027202747e-05,
      "loss": 2.9911,
      "step": 199249
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9086859226226807,
      "learning_rate": 2.6660878206810433e-05,
      "loss": 2.8703,
      "step": 199250
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6817781925201416,
      "learning_rate": 2.665919243723703e-05,
      "loss": 3.2082,
      "step": 199251
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.608288288116455,
      "learning_rate": 2.665750671848278e-05,
      "loss": 2.8983,
      "step": 199252
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5026609897613525,
      "learning_rate": 2.6655821050548142e-05,
      "loss": 2.9134,
      "step": 199253
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.5025832653045654,
      "learning_rate": 2.6654135433433256e-05,
      "loss": 2.8684,
      "step": 199254
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.570995807647705,
      "learning_rate": 2.6652449867138616e-05,
      "loss": 2.789,
      "step": 199255
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.442573070526123,
      "learning_rate": 2.665076435166442e-05,
      "loss": 2.9959,
      "step": 199256
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.664341688156128,
      "learning_rate": 2.664907888701101e-05,
      "loss": 2.7736,
      "step": 199257
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9585604667663574,
      "learning_rate": 2.6647393473178648e-05,
      "loss": 2.6842,
      "step": 199258
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.3480117321014404,
      "learning_rate": 2.664570811016773e-05,
      "loss": 2.8497,
      "step": 199259
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.2952051162719727,
      "learning_rate": 2.6644022797978492e-05,
      "loss": 2.8492,
      "step": 199260
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5259361267089844,
      "learning_rate": 2.664233753661137e-05,
      "loss": 2.9447,
      "step": 199261
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.5058016777038574,
      "learning_rate": 2.664065232606656e-05,
      "loss": 2.7111,
      "step": 199262
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9592232704162598,
      "learning_rate": 2.6638967166344426e-05,
      "loss": 2.7417,
      "step": 199263
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.6298413276672363,
      "learning_rate": 2.663728205744521e-05,
      "loss": 2.9232,
      "step": 199264
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9480812549591064,
      "learning_rate": 2.663559699936937e-05,
      "loss": 2.8359,
      "step": 199265
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.693425416946411,
      "learning_rate": 2.663391199211704e-05,
      "loss": 2.708,
      "step": 199266
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6703906059265137,
      "learning_rate": 2.6632227035688692e-05,
      "loss": 3.0497,
      "step": 199267
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.604795932769775,
      "learning_rate": 2.6630542130084586e-05,
      "loss": 3.0101,
      "step": 199268
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.4156503677368164,
      "learning_rate": 2.6628857275304994e-05,
      "loss": 2.8877,
      "step": 199269
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.482961654663086,
      "learning_rate": 2.662717247135021e-05,
      "loss": 2.7674,
      "step": 199270
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5779638290405273,
      "learning_rate": 2.662548771822067e-05,
      "loss": 2.9356,
      "step": 199271
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.480959415435791,
      "learning_rate": 2.6623803015916544e-05,
      "loss": 2.804,
      "step": 199272
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.833745002746582,
      "learning_rate": 2.662211836443826e-05,
      "loss": 2.9886,
      "step": 199273
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7611422538757324,
      "learning_rate": 2.662043376378612e-05,
      "loss": 2.8089,
      "step": 199274
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.310321569442749,
      "learning_rate": 2.6618749213960357e-05,
      "loss": 2.88,
      "step": 199275
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7653043270111084,
      "learning_rate": 2.66170647149613e-05,
      "loss": 2.9764,
      "step": 199276
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.900261163711548,
      "learning_rate": 2.6615380266789355e-05,
      "loss": 2.8939,
      "step": 199277
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.5499165058135986,
      "learning_rate": 2.6613695869444686e-05,
      "loss": 2.9233,
      "step": 199278
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4761271476745605,
      "learning_rate": 2.6612011522927756e-05,
      "loss": 3.0449,
      "step": 199279
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.820908546447754,
      "learning_rate": 2.6610327227238837e-05,
      "loss": 2.8765,
      "step": 199280
    },
    {
      "epoch": 2.59,
      "grad_norm": 5.664841651916504,
      "learning_rate": 2.6608642982378193e-05,
      "loss": 2.844,
      "step": 199281
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.726208209991455,
      "learning_rate": 2.6606958788346123e-05,
      "loss": 2.9398,
      "step": 199282
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.829782247543335,
      "learning_rate": 2.6605274645143027e-05,
      "loss": 2.97,
      "step": 199283
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.1543991565704346,
      "learning_rate": 2.6603590552769104e-05,
      "loss": 2.9926,
      "step": 199284
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.872757911682129,
      "learning_rate": 2.6601906511224824e-05,
      "loss": 2.779,
      "step": 199285
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.9124457836151123,
      "learning_rate": 2.6600222520510382e-05,
      "loss": 2.8051,
      "step": 199286
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.221944570541382,
      "learning_rate": 2.6598538580626117e-05,
      "loss": 3.0503,
      "step": 199287
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.6830098628997803,
      "learning_rate": 2.6596854691572288e-05,
      "loss": 2.7354,
      "step": 199288
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.761244773864746,
      "learning_rate": 2.6595170853349333e-05,
      "loss": 3.113,
      "step": 199289
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.277298927307129,
      "learning_rate": 2.6593487065957453e-05,
      "loss": 2.8906,
      "step": 199290
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4504153728485107,
      "learning_rate": 2.6591803329397042e-05,
      "loss": 3.0856,
      "step": 199291
    },
    {
      "epoch": 2.59,
      "grad_norm": 4.040042877197266,
      "learning_rate": 2.6590119643668373e-05,
      "loss": 2.8076,
      "step": 199292
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.635510206222534,
      "learning_rate": 2.6588436008771774e-05,
      "loss": 3.0658,
      "step": 199293
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.855325222015381,
      "learning_rate": 2.658675242470748e-05,
      "loss": 3.15,
      "step": 199294
    },
    {
      "epoch": 2.59,
      "grad_norm": 3.4805374145507812,
      "learning_rate": 2.6585068891475925e-05,
      "loss": 3.0085,
      "step": 199295
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.977715015411377,
      "learning_rate": 2.658338540907734e-05,
      "loss": 3.0773,
      "step": 199296
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.7529635429382324,
      "learning_rate": 2.6581701977512094e-05,
      "loss": 3.0454,
      "step": 199297
    },
    {
      "epoch": 2.59,
      "grad_norm": 2.759281873703003,
      "learning_rate": 2.658001859678045e-05,
      "loss": 2.7805,
      "step": 199298
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9317076206207275,
      "learning_rate": 2.6578335266882712e-05,
      "loss": 3.0838,
      "step": 199299
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.634148597717285,
      "learning_rate": 2.6576651987819274e-05,
      "loss": 2.8981,
      "step": 199300
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.183937072753906,
      "learning_rate": 2.657496875959041e-05,
      "loss": 2.9786,
      "step": 199301
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4842562675476074,
      "learning_rate": 2.6573285582196347e-05,
      "loss": 2.9324,
      "step": 199302
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7557942867279053,
      "learning_rate": 2.6571602455637552e-05,
      "loss": 2.8624,
      "step": 199303
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.493286371231079,
      "learning_rate": 2.656991937991423e-05,
      "loss": 2.8385,
      "step": 199304
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6626744270324707,
      "learning_rate": 2.6568236355026673e-05,
      "loss": 2.8035,
      "step": 199305
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.191788673400879,
      "learning_rate": 2.656655338097532e-05,
      "loss": 2.7805,
      "step": 199306
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.546278715133667,
      "learning_rate": 2.6564870457760367e-05,
      "loss": 2.9513,
      "step": 199307
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.1354079246521,
      "learning_rate": 2.656318758538215e-05,
      "loss": 3.0356,
      "step": 199308
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9257538318634033,
      "learning_rate": 2.6561504763841034e-05,
      "loss": 3.1189,
      "step": 199309
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.6495184898376465,
      "learning_rate": 2.655982199313722e-05,
      "loss": 3.096,
      "step": 199310
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.731149911880493,
      "learning_rate": 2.655813927327117e-05,
      "loss": 2.9933,
      "step": 199311
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2485671043395996,
      "learning_rate": 2.655645660424316e-05,
      "loss": 3.0157,
      "step": 199312
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.611156702041626,
      "learning_rate": 2.6554773986053346e-05,
      "loss": 2.6946,
      "step": 199313
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.916749954223633,
      "learning_rate": 2.6553091418702267e-05,
      "loss": 2.9471,
      "step": 199314
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.823859691619873,
      "learning_rate": 2.655140890219012e-05,
      "loss": 3.0123,
      "step": 199315
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.331343173980713,
      "learning_rate": 2.6549726436517137e-05,
      "loss": 3.06,
      "step": 199316
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0279855728149414,
      "learning_rate": 2.6548044021683824e-05,
      "loss": 3.098,
      "step": 199317
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.10455584526062,
      "learning_rate": 2.6546361657690374e-05,
      "loss": 3.048,
      "step": 199318
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3884456157684326,
      "learning_rate": 2.6544679344537058e-05,
      "loss": 2.6851,
      "step": 199319
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.676334857940674,
      "learning_rate": 2.6542997082224305e-05,
      "loss": 3.1766,
      "step": 199320
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.146528720855713,
      "learning_rate": 2.654131487075235e-05,
      "loss": 2.8062,
      "step": 199321
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4671506881713867,
      "learning_rate": 2.6539632710121496e-05,
      "loss": 2.9481,
      "step": 199322
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0964059829711914,
      "learning_rate": 2.6537950600332137e-05,
      "loss": 3.1213,
      "step": 199323
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1231462955474854,
      "learning_rate": 2.653626854138454e-05,
      "loss": 2.8871,
      "step": 199324
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.914790391921997,
      "learning_rate": 2.6534586533278947e-05,
      "loss": 3.2645,
      "step": 199325
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.18904972076416,
      "learning_rate": 2.6532904576015813e-05,
      "loss": 3.0054,
      "step": 199326
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.8964309692382812,
      "learning_rate": 2.6531222669595277e-05,
      "loss": 2.8588,
      "step": 199327
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.956150531768799,
      "learning_rate": 2.6529540814017836e-05,
      "loss": 2.9732,
      "step": 199328
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0162081718444824,
      "learning_rate": 2.6527859009283723e-05,
      "loss": 3.1289,
      "step": 199329
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.166029691696167,
      "learning_rate": 2.652617725539321e-05,
      "loss": 3.1646,
      "step": 199330
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1057960987091064,
      "learning_rate": 2.6524495552346624e-05,
      "loss": 3.062,
      "step": 199331
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8333988189697266,
      "learning_rate": 2.6522813900144334e-05,
      "loss": 3.0161,
      "step": 199332
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9811480045318604,
      "learning_rate": 2.652113229878654e-05,
      "loss": 3.1565,
      "step": 199333
    },
    {
      "epoch": 2.6,
      "grad_norm": 5.448768615722656,
      "learning_rate": 2.651945074827371e-05,
      "loss": 2.8564,
      "step": 199334
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.765939235687256,
      "learning_rate": 2.6517769248606068e-05,
      "loss": 2.8932,
      "step": 199335
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.178225040435791,
      "learning_rate": 2.6516087799783958e-05,
      "loss": 2.9055,
      "step": 199336
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0170137882232666,
      "learning_rate": 2.6514406401807576e-05,
      "loss": 3.0163,
      "step": 199337
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4243993759155273,
      "learning_rate": 2.651272505467742e-05,
      "loss": 3.2678,
      "step": 199338
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.015007734298706,
      "learning_rate": 2.6511043758393624e-05,
      "loss": 2.9782,
      "step": 199339
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.398503541946411,
      "learning_rate": 2.6509362512956654e-05,
      "loss": 2.7009,
      "step": 199340
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2027156352996826,
      "learning_rate": 2.650768131836678e-05,
      "loss": 3.2186,
      "step": 199341
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.771186351776123,
      "learning_rate": 2.6506000174624265e-05,
      "loss": 3.181,
      "step": 199342
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.7139058113098145,
      "learning_rate": 2.6504319081729408e-05,
      "loss": 2.9096,
      "step": 199343
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.942417860031128,
      "learning_rate": 2.650263803968258e-05,
      "loss": 3.069,
      "step": 199344
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.747474193572998,
      "learning_rate": 2.650095704848404e-05,
      "loss": 3.0006,
      "step": 199345
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.8563168048858643,
      "learning_rate": 2.6499276108134194e-05,
      "loss": 2.9749,
      "step": 199346
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2137794494628906,
      "learning_rate": 2.6497595218633307e-05,
      "loss": 2.8174,
      "step": 199347
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.772218227386475,
      "learning_rate": 2.6495914379981677e-05,
      "loss": 3.1252,
      "step": 199348
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6185708045959473,
      "learning_rate": 2.6494233592179536e-05,
      "loss": 2.8575,
      "step": 199349
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3325326442718506,
      "learning_rate": 2.6492552855227357e-05,
      "loss": 3.2271,
      "step": 199350
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.961301803588867,
      "learning_rate": 2.649087216912533e-05,
      "loss": 3.1309,
      "step": 199351
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8835017681121826,
      "learning_rate": 2.6489191533873865e-05,
      "loss": 2.987,
      "step": 199352
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.779283046722412,
      "learning_rate": 2.648751094947319e-05,
      "loss": 2.8298,
      "step": 199353
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0978453159332275,
      "learning_rate": 2.648583041592367e-05,
      "loss": 2.8984,
      "step": 199354
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.5125412940979004,
      "learning_rate": 2.648414993322554e-05,
      "loss": 2.9023,
      "step": 199355
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4908576011657715,
      "learning_rate": 2.6482469501379233e-05,
      "loss": 2.7158,
      "step": 199356
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.5488638877868652,
      "learning_rate": 2.648078912038495e-05,
      "loss": 2.9712,
      "step": 199357
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.115346908569336,
      "learning_rate": 2.6479108790243085e-05,
      "loss": 2.862,
      "step": 199358
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.675147294998169,
      "learning_rate": 2.6477428510953945e-05,
      "loss": 2.8011,
      "step": 199359
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.210302352905273,
      "learning_rate": 2.6475748282517793e-05,
      "loss": 2.8673,
      "step": 199360
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.133843898773193,
      "learning_rate": 2.6474068104934897e-05,
      "loss": 3.0197,
      "step": 199361
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.894707679748535,
      "learning_rate": 2.647238797820572e-05,
      "loss": 2.7291,
      "step": 199362
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.5630061626434326,
      "learning_rate": 2.6470707902330402e-05,
      "loss": 3.0471,
      "step": 199363
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.580894947052002,
      "learning_rate": 2.6469027877309436e-05,
      "loss": 2.6753,
      "step": 199364
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9694223403930664,
      "learning_rate": 2.6467347903142986e-05,
      "loss": 2.9677,
      "step": 199365
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.982100486755371,
      "learning_rate": 2.6465667979831463e-05,
      "loss": 2.8242,
      "step": 199366
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.6616857051849365,
      "learning_rate": 2.646398810737509e-05,
      "loss": 2.9587,
      "step": 199367
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.263091564178467,
      "learning_rate": 2.6462308285774237e-05,
      "loss": 2.9732,
      "step": 199368
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.839632034301758,
      "learning_rate": 2.646062851502917e-05,
      "loss": 2.9187,
      "step": 199369
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.111801862716675,
      "learning_rate": 2.645894879514029e-05,
      "loss": 2.8703,
      "step": 199370
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.5076282024383545,
      "learning_rate": 2.645726912610786e-05,
      "loss": 2.8048,
      "step": 199371
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.97052264213562,
      "learning_rate": 2.6455589507932185e-05,
      "loss": 2.9874,
      "step": 199372
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8688583374023438,
      "learning_rate": 2.6453909940613526e-05,
      "loss": 3.0178,
      "step": 199373
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9318718910217285,
      "learning_rate": 2.645223042415232e-05,
      "loss": 2.9805,
      "step": 199374
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.388885498046875,
      "learning_rate": 2.645055095854873e-05,
      "loss": 2.8833,
      "step": 199375
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1485893726348877,
      "learning_rate": 2.6448871543803196e-05,
      "loss": 3.0867,
      "step": 199376
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.418130397796631,
      "learning_rate": 2.6447192179915942e-05,
      "loss": 2.8817,
      "step": 199377
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.056313991546631,
      "learning_rate": 2.644551286688741e-05,
      "loss": 2.5762,
      "step": 199378
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3707778453826904,
      "learning_rate": 2.6443833604717722e-05,
      "loss": 3.0545,
      "step": 199379
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9811296463012695,
      "learning_rate": 2.6442154393407356e-05,
      "loss": 2.8757,
      "step": 199380
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.830782175064087,
      "learning_rate": 2.6440475232956504e-05,
      "loss": 3.0357,
      "step": 199381
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7896950244903564,
      "learning_rate": 2.643879612336557e-05,
      "loss": 2.9456,
      "step": 199382
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.522444725036621,
      "learning_rate": 2.643711706463475e-05,
      "loss": 3.0603,
      "step": 199383
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7798874378204346,
      "learning_rate": 2.643543805676458e-05,
      "loss": 3.0584,
      "step": 199384
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.139756202697754,
      "learning_rate": 2.6433759099755127e-05,
      "loss": 2.8441,
      "step": 199385
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.698943614959717,
      "learning_rate": 2.6432080193606818e-05,
      "loss": 2.8142,
      "step": 199386
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6376326084136963,
      "learning_rate": 2.6430401338319928e-05,
      "loss": 2.9364,
      "step": 199387
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.577911853790283,
      "learning_rate": 2.642872253389482e-05,
      "loss": 3.2305,
      "step": 199388
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.89859938621521,
      "learning_rate": 2.6427043780331726e-05,
      "loss": 2.8651,
      "step": 199389
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.128812789916992,
      "learning_rate": 2.6425365077631078e-05,
      "loss": 3.0856,
      "step": 199390
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.506603956222534,
      "learning_rate": 2.6423686425793113e-05,
      "loss": 2.91,
      "step": 199391
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.120952606201172,
      "learning_rate": 2.6422007824818127e-05,
      "loss": 3.0228,
      "step": 199392
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7288553714752197,
      "learning_rate": 2.642032927470642e-05,
      "loss": 2.7655,
      "step": 199393
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.697861671447754,
      "learning_rate": 2.6418650775458394e-05,
      "loss": 2.7098,
      "step": 199394
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.822576999664307,
      "learning_rate": 2.641697232707425e-05,
      "loss": 2.8691,
      "step": 199395
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.761263370513916,
      "learning_rate": 2.6415293929554416e-05,
      "loss": 3.0003,
      "step": 199396
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.5489652156829834,
      "learning_rate": 2.6413615582899128e-05,
      "loss": 2.9744,
      "step": 199397
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6486153602600098,
      "learning_rate": 2.6411937287108653e-05,
      "loss": 3.0406,
      "step": 199398
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1002631187438965,
      "learning_rate": 2.6410259042183425e-05,
      "loss": 2.9164,
      "step": 199399
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.987778663635254,
      "learning_rate": 2.6408580848123707e-05,
      "loss": 2.7219,
      "step": 199400
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.063538074493408,
      "learning_rate": 2.6406902704929735e-05,
      "loss": 3.1645,
      "step": 199401
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9858720302581787,
      "learning_rate": 2.6405224612601937e-05,
      "loss": 3.0039,
      "step": 199402
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.192396879196167,
      "learning_rate": 2.640354657114059e-05,
      "loss": 3.0132,
      "step": 199403
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.711317539215088,
      "learning_rate": 2.6401868580545914e-05,
      "loss": 3.1245,
      "step": 199404
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7333855628967285,
      "learning_rate": 2.6400190640818352e-05,
      "loss": 2.7836,
      "step": 199405
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3873391151428223,
      "learning_rate": 2.639851275195817e-05,
      "loss": 2.9173,
      "step": 199406
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2053701877593994,
      "learning_rate": 2.6396834913965593e-05,
      "loss": 2.8726,
      "step": 199407
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.975402593612671,
      "learning_rate": 2.6395157126841093e-05,
      "loss": 3.0406,
      "step": 199408
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.5586440563201904,
      "learning_rate": 2.639347939058487e-05,
      "loss": 2.708,
      "step": 199409
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6723716259002686,
      "learning_rate": 2.6391801705197226e-05,
      "loss": 2.7989,
      "step": 199410
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.026214122772217,
      "learning_rate": 2.6390124070678552e-05,
      "loss": 3.0065,
      "step": 199411
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8257768154144287,
      "learning_rate": 2.638844648702906e-05,
      "loss": 2.8928,
      "step": 199412
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9867706298828125,
      "learning_rate": 2.6386768954249204e-05,
      "loss": 2.8969,
      "step": 199413
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.6463143825531006,
      "learning_rate": 2.6385091472339193e-05,
      "loss": 2.9188,
      "step": 199414
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.9475717544555664,
      "learning_rate": 2.6383414041299355e-05,
      "loss": 3.0237,
      "step": 199415
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.270663738250732,
      "learning_rate": 2.638173666112996e-05,
      "loss": 2.8255,
      "step": 199416
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.268179178237915,
      "learning_rate": 2.6380059331831404e-05,
      "loss": 2.9375,
      "step": 199417
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.879865884780884,
      "learning_rate": 2.6378382053403925e-05,
      "loss": 3.1297,
      "step": 199418
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3471899032592773,
      "learning_rate": 2.6376704825847917e-05,
      "loss": 2.9617,
      "step": 199419
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.705752372741699,
      "learning_rate": 2.637502764916365e-05,
      "loss": 2.8474,
      "step": 199420
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.079511880874634,
      "learning_rate": 2.6373350523351423e-05,
      "loss": 2.6306,
      "step": 199421
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.337210178375244,
      "learning_rate": 2.63716734484115e-05,
      "loss": 3.0,
      "step": 199422
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.496659278869629,
      "learning_rate": 2.636999642434432e-05,
      "loss": 3.0455,
      "step": 199423
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9560446739196777,
      "learning_rate": 2.6368319451150044e-05,
      "loss": 3.1407,
      "step": 199424
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.503509283065796,
      "learning_rate": 2.636664252882914e-05,
      "loss": 3.101,
      "step": 199425
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7439074516296387,
      "learning_rate": 2.636496565738181e-05,
      "loss": 2.8206,
      "step": 199426
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.124856472015381,
      "learning_rate": 2.6363288836808415e-05,
      "loss": 3.1189,
      "step": 199427
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.812361717224121,
      "learning_rate": 2.6361612067109195e-05,
      "loss": 3.0049,
      "step": 199428
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.212015151977539,
      "learning_rate": 2.6359935348284577e-05,
      "loss": 2.6456,
      "step": 199429
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2097244262695312,
      "learning_rate": 2.6358258680334766e-05,
      "loss": 2.8521,
      "step": 199430
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9293112754821777,
      "learning_rate": 2.6356582063260156e-05,
      "loss": 2.7733,
      "step": 199431
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.6302249431610107,
      "learning_rate": 2.6354905497061052e-05,
      "loss": 3.1487,
      "step": 199432
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2020442485809326,
      "learning_rate": 2.6353228981737685e-05,
      "loss": 2.7011,
      "step": 199433
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8033196926116943,
      "learning_rate": 2.635155251729042e-05,
      "loss": 2.9094,
      "step": 199434
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.649383068084717,
      "learning_rate": 2.6349876103719592e-05,
      "loss": 2.7248,
      "step": 199435
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.616107702255249,
      "learning_rate": 2.6348199741025432e-05,
      "loss": 2.9386,
      "step": 199436
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.96697998046875,
      "learning_rate": 2.6346523429208344e-05,
      "loss": 2.6525,
      "step": 199437
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.4257097244262695,
      "learning_rate": 2.6344847168268656e-05,
      "loss": 2.833,
      "step": 199438
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.154496669769287,
      "learning_rate": 2.634317095820657e-05,
      "loss": 2.9129,
      "step": 199439
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.5404624938964844,
      "learning_rate": 2.6341494799022424e-05,
      "loss": 2.923,
      "step": 199440
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.491950273513794,
      "learning_rate": 2.633981869071664e-05,
      "loss": 3.1787,
      "step": 199441
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2633824348449707,
      "learning_rate": 2.633814263328936e-05,
      "loss": 3.1201,
      "step": 199442
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.075460433959961,
      "learning_rate": 2.6336466626741048e-05,
      "loss": 3.1687,
      "step": 199443
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7895636558532715,
      "learning_rate": 2.6334790671071905e-05,
      "loss": 2.9339,
      "step": 199444
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.052685022354126,
      "learning_rate": 2.633311476628239e-05,
      "loss": 2.8466,
      "step": 199445
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.876917600631714,
      "learning_rate": 2.6331438912372615e-05,
      "loss": 2.6201,
      "step": 199446
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1832027435302734,
      "learning_rate": 2.6329763109343038e-05,
      "loss": 2.9764,
      "step": 199447
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.313498020172119,
      "learning_rate": 2.6328087357193862e-05,
      "loss": 2.852,
      "step": 199448
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9392037391662598,
      "learning_rate": 2.6326411655925515e-05,
      "loss": 2.8288,
      "step": 199449
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7032649517059326,
      "learning_rate": 2.6324736005538238e-05,
      "loss": 3.0144,
      "step": 199450
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0555577278137207,
      "learning_rate": 2.6323060406032425e-05,
      "loss": 2.8141,
      "step": 199451
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7275514602661133,
      "learning_rate": 2.632138485740821e-05,
      "loss": 2.8997,
      "step": 199452
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.333308219909668,
      "learning_rate": 2.6319709359666096e-05,
      "loss": 2.9278,
      "step": 199453
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.492138147354126,
      "learning_rate": 2.6318033912806246e-05,
      "loss": 2.7858,
      "step": 199454
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8274714946746826,
      "learning_rate": 2.6316358516829095e-05,
      "loss": 3.1488,
      "step": 199455
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.5086159706115723,
      "learning_rate": 2.6314683171734874e-05,
      "loss": 2.7093,
      "step": 199456
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0320804119110107,
      "learning_rate": 2.6313007877523985e-05,
      "loss": 2.979,
      "step": 199457
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.733543872833252,
      "learning_rate": 2.631133263419659e-05,
      "loss": 2.9231,
      "step": 199458
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0263359546661377,
      "learning_rate": 2.630965744175313e-05,
      "loss": 3.0691,
      "step": 199459
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.568838119506836,
      "learning_rate": 2.630798230019383e-05,
      "loss": 2.9931,
      "step": 199460
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0087785720825195,
      "learning_rate": 2.6306307209519063e-05,
      "loss": 2.8226,
      "step": 199461
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.893289089202881,
      "learning_rate": 2.630463216972909e-05,
      "loss": 2.9005,
      "step": 199462
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2726924419403076,
      "learning_rate": 2.630295718082438e-05,
      "loss": 2.7182,
      "step": 199463
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4598922729492188,
      "learning_rate": 2.6301282242804965e-05,
      "loss": 2.9976,
      "step": 199464
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8806333541870117,
      "learning_rate": 2.629960735567138e-05,
      "loss": 2.9277,
      "step": 199465
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9315474033355713,
      "learning_rate": 2.6297932519423824e-05,
      "loss": 3.1956,
      "step": 199466
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3285462856292725,
      "learning_rate": 2.62962577340627e-05,
      "loss": 2.7998,
      "step": 199467
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.16977858543396,
      "learning_rate": 2.6294582999588197e-05,
      "loss": 2.8105,
      "step": 199468
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.986351490020752,
      "learning_rate": 2.6292908316000825e-05,
      "loss": 2.9262,
      "step": 199469
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1390492916107178,
      "learning_rate": 2.629123368330065e-05,
      "loss": 3.0327,
      "step": 199470
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3158366680145264,
      "learning_rate": 2.628955910148817e-05,
      "loss": 2.7098,
      "step": 199471
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.48286509513855,
      "learning_rate": 2.6287884570563544e-05,
      "loss": 2.8388,
      "step": 199472
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.415252447128296,
      "learning_rate": 2.628621009052725e-05,
      "loss": 3.1843,
      "step": 199473
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2297635078430176,
      "learning_rate": 2.6284535661379447e-05,
      "loss": 3.1033,
      "step": 199474
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.955811023712158,
      "learning_rate": 2.628286128312057e-05,
      "loss": 2.9445,
      "step": 199475
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1274964809417725,
      "learning_rate": 2.6281186955750887e-05,
      "loss": 2.93,
      "step": 199476
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.137744903564453,
      "learning_rate": 2.6279512679270663e-05,
      "loss": 3.022,
      "step": 199477
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.5385472774505615,
      "learning_rate": 2.6277838453680233e-05,
      "loss": 2.9398,
      "step": 199478
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0283071994781494,
      "learning_rate": 2.627616427897996e-05,
      "loss": 2.9446,
      "step": 199479
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1189124584198,
      "learning_rate": 2.6274490155170046e-05,
      "loss": 2.9837,
      "step": 199480
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6045563220977783,
      "learning_rate": 2.627281608225096e-05,
      "loss": 2.9967,
      "step": 199481
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.75577712059021,
      "learning_rate": 2.6271142060222895e-05,
      "loss": 2.811,
      "step": 199482
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.6192333698272705,
      "learning_rate": 2.6269468089086188e-05,
      "loss": 3.0656,
      "step": 199483
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.790942668914795,
      "learning_rate": 2.6267794168841107e-05,
      "loss": 2.969,
      "step": 199484
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.866118907928467,
      "learning_rate": 2.626612029948808e-05,
      "loss": 2.9325,
      "step": 199485
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.09733247756958,
      "learning_rate": 2.6264446481027278e-05,
      "loss": 3.0441,
      "step": 199486
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0836312770843506,
      "learning_rate": 2.6262772713459133e-05,
      "loss": 2.8379,
      "step": 199487
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9886581897735596,
      "learning_rate": 2.6261098996783913e-05,
      "loss": 2.8802,
      "step": 199488
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.856907367706299,
      "learning_rate": 2.625942533100188e-05,
      "loss": 2.7924,
      "step": 199489
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.323045253753662,
      "learning_rate": 2.6257751716113438e-05,
      "loss": 2.892,
      "step": 199490
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.9766173362731934,
      "learning_rate": 2.625607815211882e-05,
      "loss": 2.8496,
      "step": 199491
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.89615535736084,
      "learning_rate": 2.6254404639018323e-05,
      "loss": 3.1114,
      "step": 199492
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.067318916320801,
      "learning_rate": 2.625273117681238e-05,
      "loss": 2.803,
      "step": 199493
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.146420478820801,
      "learning_rate": 2.6251057765501194e-05,
      "loss": 2.6526,
      "step": 199494
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.3452045917510986,
      "learning_rate": 2.624938440508506e-05,
      "loss": 3.0536,
      "step": 199495
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1498968601226807,
      "learning_rate": 2.6247711095564384e-05,
      "loss": 2.9582,
      "step": 199496
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.61889386177063,
      "learning_rate": 2.624603783693936e-05,
      "loss": 2.7317,
      "step": 199497
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9019877910614014,
      "learning_rate": 2.6244364629210423e-05,
      "loss": 3.0046,
      "step": 199498
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3663408756256104,
      "learning_rate": 2.624269147237784e-05,
      "loss": 2.9189,
      "step": 199499
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.340575218200684,
      "learning_rate": 2.6241018366441914e-05,
      "loss": 2.8931,
      "step": 199500
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7761330604553223,
      "learning_rate": 2.6239345311402872e-05,
      "loss": 3.0315,
      "step": 199501
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4564523696899414,
      "learning_rate": 2.6237672307261183e-05,
      "loss": 2.9287,
      "step": 199502
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.863328456878662,
      "learning_rate": 2.6235999354017012e-05,
      "loss": 2.7401,
      "step": 199503
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.433235168457031,
      "learning_rate": 2.6234326451670797e-05,
      "loss": 2.8228,
      "step": 199504
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.96144962310791,
      "learning_rate": 2.6232653600222764e-05,
      "loss": 2.9341,
      "step": 199505
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.011256694793701,
      "learning_rate": 2.6230980799673284e-05,
      "loss": 2.864,
      "step": 199506
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.193904399871826,
      "learning_rate": 2.6229308050022556e-05,
      "loss": 2.9129,
      "step": 199507
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7214882373809814,
      "learning_rate": 2.6227635351271016e-05,
      "loss": 3.0251,
      "step": 199508
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9326324462890625,
      "learning_rate": 2.622596270341889e-05,
      "loss": 2.9609,
      "step": 199509
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3154044151306152,
      "learning_rate": 2.6224290106466585e-05,
      "loss": 3.0254,
      "step": 199510
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.738015174865723,
      "learning_rate": 2.6222617560414262e-05,
      "loss": 3.0671,
      "step": 199511
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4605014324188232,
      "learning_rate": 2.6220945065262457e-05,
      "loss": 2.86,
      "step": 199512
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.826732873916626,
      "learning_rate": 2.6219272621011233e-05,
      "loss": 2.9401,
      "step": 199513
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1858699321746826,
      "learning_rate": 2.621760022766106e-05,
      "loss": 2.8507,
      "step": 199514
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1618661880493164,
      "learning_rate": 2.6215927885212173e-05,
      "loss": 2.6971,
      "step": 199515
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3981773853302,
      "learning_rate": 2.6214255593664967e-05,
      "loss": 2.885,
      "step": 199516
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.5863523483276367,
      "learning_rate": 2.6212583353019613e-05,
      "loss": 2.9479,
      "step": 199517
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.6069512367248535,
      "learning_rate": 2.621091116327664e-05,
      "loss": 2.7065,
      "step": 199518
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2322707176208496,
      "learning_rate": 2.6209239024436114e-05,
      "loss": 2.9922,
      "step": 199519
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.5928752422332764,
      "learning_rate": 2.6207566936498502e-05,
      "loss": 2.7996,
      "step": 199520
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.129162311553955,
      "learning_rate": 2.6205894899464042e-05,
      "loss": 3.0686,
      "step": 199521
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0156896114349365,
      "learning_rate": 2.6204222913333095e-05,
      "loss": 3.0813,
      "step": 199522
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3482367992401123,
      "learning_rate": 2.620255097810593e-05,
      "loss": 2.9606,
      "step": 199523
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9591500759124756,
      "learning_rate": 2.6200879093782977e-05,
      "loss": 2.9956,
      "step": 199524
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8398115634918213,
      "learning_rate": 2.6199207260364342e-05,
      "loss": 3.0358,
      "step": 199525
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.433978796005249,
      "learning_rate": 2.6197535477850483e-05,
      "loss": 2.9235,
      "step": 199526
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.633384704589844,
      "learning_rate": 2.6195863746241607e-05,
      "loss": 2.7307,
      "step": 199527
    },
    {
      "epoch": 2.6,
      "grad_norm": 5.413680076599121,
      "learning_rate": 2.6194192065538177e-05,
      "loss": 2.8929,
      "step": 199528
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.696220636367798,
      "learning_rate": 2.6192520435740326e-05,
      "loss": 3.0122,
      "step": 199529
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.405590772628784,
      "learning_rate": 2.619084885684859e-05,
      "loss": 2.9687,
      "step": 199530
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.786412000656128,
      "learning_rate": 2.6189177328863032e-05,
      "loss": 2.8742,
      "step": 199531
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9871063232421875,
      "learning_rate": 2.6187505851784118e-05,
      "loss": 2.9412,
      "step": 199532
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9066927433013916,
      "learning_rate": 2.6185834425612052e-05,
      "loss": 3.0671,
      "step": 199533
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3254706859588623,
      "learning_rate": 2.6184163050347294e-05,
      "loss": 2.7698,
      "step": 199534
    },
    {
      "epoch": 2.6,
      "grad_norm": 5.898952007293701,
      "learning_rate": 2.6182491725989985e-05,
      "loss": 2.8813,
      "step": 199535
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2316203117370605,
      "learning_rate": 2.618082045254065e-05,
      "loss": 2.6921,
      "step": 199536
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.5334646701812744,
      "learning_rate": 2.6179149229999365e-05,
      "loss": 2.7082,
      "step": 199537
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2242050170898438,
      "learning_rate": 2.617747805836655e-05,
      "loss": 3.0209,
      "step": 199538
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.5957112312316895,
      "learning_rate": 2.617580693764252e-05,
      "loss": 2.8519,
      "step": 199539
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.624960422515869,
      "learning_rate": 2.6174135867827596e-05,
      "loss": 3.0037,
      "step": 199540
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.984287977218628,
      "learning_rate": 2.6172464848922016e-05,
      "loss": 3.0502,
      "step": 199541
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.9587926864624023,
      "learning_rate": 2.6170793880926243e-05,
      "loss": 2.7877,
      "step": 199542
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.419682264328003,
      "learning_rate": 2.6169122963840417e-05,
      "loss": 3.0038,
      "step": 199543
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.9081146717071533,
      "learning_rate": 2.616745209766493e-05,
      "loss": 2.8146,
      "step": 199544
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.560749053955078,
      "learning_rate": 2.6165781282400057e-05,
      "loss": 2.824,
      "step": 199545
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0686514377593994,
      "learning_rate": 2.6164110518046187e-05,
      "loss": 2.8068,
      "step": 199546
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9724557399749756,
      "learning_rate": 2.6162439804603497e-05,
      "loss": 2.866,
      "step": 199547
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.7104310989379883,
      "learning_rate": 2.616076914207251e-05,
      "loss": 2.7352,
      "step": 199548
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2122933864593506,
      "learning_rate": 2.6159098530453305e-05,
      "loss": 2.9656,
      "step": 199549
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.404068946838379,
      "learning_rate": 2.6157427969746336e-05,
      "loss": 2.8962,
      "step": 199550
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.443047523498535,
      "learning_rate": 2.615575745995181e-05,
      "loss": 2.7856,
      "step": 199551
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9679274559020996,
      "learning_rate": 2.6154087001070157e-05,
      "loss": 3.0102,
      "step": 199552
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.548056125640869,
      "learning_rate": 2.6152416593101578e-05,
      "loss": 2.9547,
      "step": 199553
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.171959400177002,
      "learning_rate": 2.6150746236046537e-05,
      "loss": 3.008,
      "step": 199554
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.666349172592163,
      "learning_rate": 2.6149075929905138e-05,
      "loss": 2.9073,
      "step": 199555
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.974801778793335,
      "learning_rate": 2.6147405674677846e-05,
      "loss": 2.7909,
      "step": 199556
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.207174777984619,
      "learning_rate": 2.6145735470364858e-05,
      "loss": 2.9774,
      "step": 199557
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4262702465057373,
      "learning_rate": 2.614406531696661e-05,
      "loss": 2.8413,
      "step": 199558
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4417803287506104,
      "learning_rate": 2.6142395214483303e-05,
      "loss": 2.9645,
      "step": 199559
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.97249436378479,
      "learning_rate": 2.6140725162915332e-05,
      "loss": 2.7921,
      "step": 199560
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6254727840423584,
      "learning_rate": 2.6139055162263e-05,
      "loss": 2.9276,
      "step": 199561
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6302661895751953,
      "learning_rate": 2.6137385212526574e-05,
      "loss": 3.0253,
      "step": 199562
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.926319122314453,
      "learning_rate": 2.6135715313706318e-05,
      "loss": 2.8972,
      "step": 199563
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9942450523376465,
      "learning_rate": 2.6134045465802632e-05,
      "loss": 2.9052,
      "step": 199564
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1472134590148926,
      "learning_rate": 2.6132375668815784e-05,
      "loss": 3.0227,
      "step": 199565
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.5116851329803467,
      "learning_rate": 2.613070592274611e-05,
      "loss": 2.7038,
      "step": 199566
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.097808837890625,
      "learning_rate": 2.6129036227593936e-05,
      "loss": 2.913,
      "step": 199567
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.783562660217285,
      "learning_rate": 2.6127366583359533e-05,
      "loss": 3.0785,
      "step": 199568
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6581621170043945,
      "learning_rate": 2.61256969900432e-05,
      "loss": 3.0216,
      "step": 199569
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9159955978393555,
      "learning_rate": 2.6124027447645267e-05,
      "loss": 2.7738,
      "step": 199570
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.856049060821533,
      "learning_rate": 2.612235795616604e-05,
      "loss": 2.8366,
      "step": 199571
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8771393299102783,
      "learning_rate": 2.612068851560588e-05,
      "loss": 2.9012,
      "step": 199572
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.407616138458252,
      "learning_rate": 2.6119019125965025e-05,
      "loss": 3.0327,
      "step": 199573
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.885331153869629,
      "learning_rate": 2.61173497872438e-05,
      "loss": 3.0297,
      "step": 199574
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.858135461807251,
      "learning_rate": 2.611568049944255e-05,
      "loss": 2.8954,
      "step": 199575
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.945777654647827,
      "learning_rate": 2.6114011262561597e-05,
      "loss": 2.8349,
      "step": 199576
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2919797897338867,
      "learning_rate": 2.6112342076601146e-05,
      "loss": 2.7702,
      "step": 199577
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2644460201263428,
      "learning_rate": 2.611067294156163e-05,
      "loss": 2.6598,
      "step": 199578
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7710821628570557,
      "learning_rate": 2.6109003857443313e-05,
      "loss": 2.7952,
      "step": 199579
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.455019950866699,
      "learning_rate": 2.6107334824246463e-05,
      "loss": 3.0951,
      "step": 199580
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.996227979660034,
      "learning_rate": 2.610566584197148e-05,
      "loss": 2.9381,
      "step": 199581
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.7382826805114746,
      "learning_rate": 2.6103996910618598e-05,
      "loss": 3.1295,
      "step": 199582
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6738362312316895,
      "learning_rate": 2.6102328030188114e-05,
      "loss": 2.7875,
      "step": 199583
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.7108161449432373,
      "learning_rate": 2.6100659200680462e-05,
      "loss": 3.2043,
      "step": 199584
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.4094672203063965,
      "learning_rate": 2.609899042209581e-05,
      "loss": 2.8198,
      "step": 199585
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8793625831604004,
      "learning_rate": 2.6097321694434526e-05,
      "loss": 2.9005,
      "step": 199586
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.977328300476074,
      "learning_rate": 2.6095653017696937e-05,
      "loss": 2.7076,
      "step": 199587
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7687275409698486,
      "learning_rate": 2.6093984391883317e-05,
      "loss": 3.1124,
      "step": 199588
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.813171148300171,
      "learning_rate": 2.6092315816994023e-05,
      "loss": 2.9215,
      "step": 199589
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0558667182922363,
      "learning_rate": 2.6090647293029333e-05,
      "loss": 2.9065,
      "step": 199590
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1979382038116455,
      "learning_rate": 2.608897881998957e-05,
      "loss": 2.8454,
      "step": 199591
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.120467185974121,
      "learning_rate": 2.6087310397875005e-05,
      "loss": 2.9389,
      "step": 199592
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3641550540924072,
      "learning_rate": 2.6085642026686005e-05,
      "loss": 2.9562,
      "step": 199593
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.340427875518799,
      "learning_rate": 2.60839737064228e-05,
      "loss": 3.233,
      "step": 199594
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0965285301208496,
      "learning_rate": 2.6082305437085826e-05,
      "loss": 2.7345,
      "step": 199595
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.539809465408325,
      "learning_rate": 2.608063721867525e-05,
      "loss": 2.8305,
      "step": 199596
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9835822582244873,
      "learning_rate": 2.60789690511916e-05,
      "loss": 3.0561,
      "step": 199597
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.5613768100738525,
      "learning_rate": 2.6077300934634882e-05,
      "loss": 2.9425,
      "step": 199598
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2077720165252686,
      "learning_rate": 2.607563286900566e-05,
      "loss": 2.6959,
      "step": 199599
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.967196464538574,
      "learning_rate": 2.6073964854304064e-05,
      "loss": 3.1051,
      "step": 199600
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3936192989349365,
      "learning_rate": 2.6072296890530563e-05,
      "loss": 2.8766,
      "step": 199601
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.349691152572632,
      "learning_rate": 2.6070628977685327e-05,
      "loss": 2.7654,
      "step": 199602
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.710332155227661,
      "learning_rate": 2.606896111576885e-05,
      "loss": 2.9107,
      "step": 199603
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.826533317565918,
      "learning_rate": 2.6067293304781202e-05,
      "loss": 3.0109,
      "step": 199604
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8290627002716064,
      "learning_rate": 2.606562554472288e-05,
      "loss": 2.8317,
      "step": 199605
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8779408931732178,
      "learning_rate": 2.6063957835594085e-05,
      "loss": 2.8282,
      "step": 199606
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.584533929824829,
      "learning_rate": 2.6062290177395217e-05,
      "loss": 2.991,
      "step": 199607
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2029101848602295,
      "learning_rate": 2.606062257012648e-05,
      "loss": 2.849,
      "step": 199608
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4242923259735107,
      "learning_rate": 2.605895501378833e-05,
      "loss": 2.8483,
      "step": 199609
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0007243156433105,
      "learning_rate": 2.6057287508380908e-05,
      "loss": 2.6855,
      "step": 199610
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0217673778533936,
      "learning_rate": 2.605562005390468e-05,
      "loss": 2.8503,
      "step": 199611
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.747832775115967,
      "learning_rate": 2.6053952650359812e-05,
      "loss": 2.8211,
      "step": 199612
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.975984573364258,
      "learning_rate": 2.6052285297746735e-05,
      "loss": 2.8952,
      "step": 199613
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.5685014724731445,
      "learning_rate": 2.6050617996065647e-05,
      "loss": 2.8792,
      "step": 199614
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.548482894897461,
      "learning_rate": 2.6048950745317054e-05,
      "loss": 2.9456,
      "step": 199615
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9212021827697754,
      "learning_rate": 2.6047283545500986e-05,
      "loss": 2.8988,
      "step": 199616
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.7271173000335693,
      "learning_rate": 2.6045616396617974e-05,
      "loss": 2.9927,
      "step": 199617
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8785550594329834,
      "learning_rate": 2.6043949298668222e-05,
      "loss": 2.8712,
      "step": 199618
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8493709564208984,
      "learning_rate": 2.604228225165209e-05,
      "loss": 3.294,
      "step": 199619
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6641197204589844,
      "learning_rate": 2.604061525556982e-05,
      "loss": 2.7348,
      "step": 199620
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1981680393218994,
      "learning_rate": 2.6038948310421902e-05,
      "loss": 2.6056,
      "step": 199621
    },
    {
      "epoch": 2.6,
      "grad_norm": 5.639659881591797,
      "learning_rate": 2.6037281416208413e-05,
      "loss": 3.0222,
      "step": 199622
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8697402477264404,
      "learning_rate": 2.6035614572929776e-05,
      "loss": 2.6952,
      "step": 199623
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8848347663879395,
      "learning_rate": 2.6033947780586262e-05,
      "loss": 3.0161,
      "step": 199624
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.285275936126709,
      "learning_rate": 2.6032281039178272e-05,
      "loss": 3.101,
      "step": 199625
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8815743923187256,
      "learning_rate": 2.603061434870597e-05,
      "loss": 3.122,
      "step": 199626
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2054648399353027,
      "learning_rate": 2.6028947709169888e-05,
      "loss": 2.7641,
      "step": 199627
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.4537744522094727,
      "learning_rate": 2.6027281120570063e-05,
      "loss": 3.0264,
      "step": 199628
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.115659236907959,
      "learning_rate": 2.6025614582906995e-05,
      "loss": 2.9116,
      "step": 199629
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2292003631591797,
      "learning_rate": 2.6023948096180914e-05,
      "loss": 3.0135,
      "step": 199630
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3149499893188477,
      "learning_rate": 2.6022281660392152e-05,
      "loss": 2.8461,
      "step": 199631
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7957985401153564,
      "learning_rate": 2.602061527554101e-05,
      "loss": 3.0696,
      "step": 199632
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.630967617034912,
      "learning_rate": 2.6018948941627893e-05,
      "loss": 2.7627,
      "step": 199633
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.6698076725006104,
      "learning_rate": 2.6017282658652928e-05,
      "loss": 2.8379,
      "step": 199634
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.838250160217285,
      "learning_rate": 2.601561642661658e-05,
      "loss": 2.7068,
      "step": 199635
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4566001892089844,
      "learning_rate": 2.6013950245519054e-05,
      "loss": 2.7921,
      "step": 199636
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.275193452835083,
      "learning_rate": 2.601228411536075e-05,
      "loss": 2.8057,
      "step": 199637
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0632572174072266,
      "learning_rate": 2.6010618036141863e-05,
      "loss": 2.8865,
      "step": 199638
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7148401737213135,
      "learning_rate": 2.6008952007862895e-05,
      "loss": 2.7661,
      "step": 199639
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7728025913238525,
      "learning_rate": 2.6007286030523943e-05,
      "loss": 3.1113,
      "step": 199640
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.693134069442749,
      "learning_rate": 2.6005620104125447e-05,
      "loss": 2.8821,
      "step": 199641
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9079999923706055,
      "learning_rate": 2.60039542286676e-05,
      "loss": 2.8756,
      "step": 199642
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7605104446411133,
      "learning_rate": 2.6002288404150872e-05,
      "loss": 2.8838,
      "step": 199643
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9625511169433594,
      "learning_rate": 2.600062263057543e-05,
      "loss": 3.0197,
      "step": 199644
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.327354907989502,
      "learning_rate": 2.5998956907941703e-05,
      "loss": 2.9575,
      "step": 199645
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1062004566192627,
      "learning_rate": 2.5997291236249962e-05,
      "loss": 2.8009,
      "step": 199646
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9425647258758545,
      "learning_rate": 2.599562561550047e-05,
      "loss": 2.8374,
      "step": 199647
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.577755928039551,
      "learning_rate": 2.5993960045693497e-05,
      "loss": 2.903,
      "step": 199648
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7093143463134766,
      "learning_rate": 2.5992294526829504e-05,
      "loss": 2.7957,
      "step": 199649
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9055705070495605,
      "learning_rate": 2.5990629058908662e-05,
      "loss": 3.2869,
      "step": 199650
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.878166437149048,
      "learning_rate": 2.598896364193137e-05,
      "loss": 2.7251,
      "step": 199651
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.841862201690674,
      "learning_rate": 2.5987298275897895e-05,
      "loss": 2.6888,
      "step": 199652
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8564083576202393,
      "learning_rate": 2.5985632960808566e-05,
      "loss": 3.0582,
      "step": 199653
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1030702590942383,
      "learning_rate": 2.598396769666362e-05,
      "loss": 2.964,
      "step": 199654
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4651472568511963,
      "learning_rate": 2.598230248346349e-05,
      "loss": 3.2777,
      "step": 199655
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.817284107208252,
      "learning_rate": 2.5980637321208376e-05,
      "loss": 3.0046,
      "step": 199656
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8481850624084473,
      "learning_rate": 2.5978972209898674e-05,
      "loss": 2.92,
      "step": 199657
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8906476497650146,
      "learning_rate": 2.5977307149534653e-05,
      "loss": 2.9077,
      "step": 199658
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.184530735015869,
      "learning_rate": 2.597564214011658e-05,
      "loss": 2.8832,
      "step": 199659
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0143234729766846,
      "learning_rate": 2.5973977181644856e-05,
      "loss": 3.1316,
      "step": 199660
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.771556854248047,
      "learning_rate": 2.5972312274119745e-05,
      "loss": 3.0903,
      "step": 199661
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.4895427227020264,
      "learning_rate": 2.597064741754148e-05,
      "loss": 2.9402,
      "step": 199662
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.70778751373291,
      "learning_rate": 2.596898261191053e-05,
      "loss": 3.0405,
      "step": 199663
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4599609375,
      "learning_rate": 2.5967317857227088e-05,
      "loss": 2.8568,
      "step": 199664
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6844370365142822,
      "learning_rate": 2.5965653153491463e-05,
      "loss": 2.9466,
      "step": 199665
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9000015258789062,
      "learning_rate": 2.5963988500704046e-05,
      "loss": 2.7874,
      "step": 199666
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.790598154067993,
      "learning_rate": 2.5962323898865113e-05,
      "loss": 3.0794,
      "step": 199667
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6916799545288086,
      "learning_rate": 2.5960659347974855e-05,
      "loss": 2.7698,
      "step": 199668
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.213606357574463,
      "learning_rate": 2.5958994848033775e-05,
      "loss": 2.7814,
      "step": 199669
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2805588245391846,
      "learning_rate": 2.5957330399042075e-05,
      "loss": 2.9586,
      "step": 199670
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.632507801055908,
      "learning_rate": 2.5955666001000054e-05,
      "loss": 2.8902,
      "step": 199671
    },
    {
      "epoch": 2.6,
      "grad_norm": 5.310548782348633,
      "learning_rate": 2.5954001653908075e-05,
      "loss": 2.9998,
      "step": 199672
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.463754415512085,
      "learning_rate": 2.595233735776637e-05,
      "loss": 2.8422,
      "step": 199673
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.010796308517456,
      "learning_rate": 2.595067311257535e-05,
      "loss": 2.8879,
      "step": 199674
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9825384616851807,
      "learning_rate": 2.5949008918335266e-05,
      "loss": 3.048,
      "step": 199675
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.951873302459717,
      "learning_rate": 2.5947344775046465e-05,
      "loss": 2.9262,
      "step": 199676
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0730109214782715,
      "learning_rate": 2.5945680682709135e-05,
      "loss": 2.9357,
      "step": 199677
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.5665318965911865,
      "learning_rate": 2.594401664132375e-05,
      "loss": 3.014,
      "step": 199678
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0746471881866455,
      "learning_rate": 2.5942352650890508e-05,
      "loss": 2.9813,
      "step": 199679
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.038207530975342,
      "learning_rate": 2.5940688711409773e-05,
      "loss": 3.0104,
      "step": 199680
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.673902988433838,
      "learning_rate": 2.5939024822881814e-05,
      "loss": 2.9745,
      "step": 199681
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.742006301879883,
      "learning_rate": 2.593736098530703e-05,
      "loss": 2.981,
      "step": 199682
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2620162963867188,
      "learning_rate": 2.593569719868559e-05,
      "loss": 2.8776,
      "step": 199683
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.908874034881592,
      "learning_rate": 2.5934033463017923e-05,
      "loss": 3.1076,
      "step": 199684
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.149894952774048,
      "learning_rate": 2.593236977830426e-05,
      "loss": 3.0637,
      "step": 199685
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.380686044692993,
      "learning_rate": 2.593070614454498e-05,
      "loss": 2.9137,
      "step": 199686
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1734249591827393,
      "learning_rate": 2.5929042561740297e-05,
      "loss": 2.9318,
      "step": 199687
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.878587245941162,
      "learning_rate": 2.5927379029890693e-05,
      "loss": 3.0192,
      "step": 199688
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.717445135116577,
      "learning_rate": 2.592571554899623e-05,
      "loss": 2.7547,
      "step": 199689
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.5250818729400635,
      "learning_rate": 2.5924052119057437e-05,
      "loss": 2.7775,
      "step": 199690
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8564367294311523,
      "learning_rate": 2.592238874007445e-05,
      "loss": 2.6559,
      "step": 199691
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.534329414367676,
      "learning_rate": 2.5920725412047772e-05,
      "loss": 2.7338,
      "step": 199692
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.648273229598999,
      "learning_rate": 2.59190621349775e-05,
      "loss": 2.7032,
      "step": 199693
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.302567720413208,
      "learning_rate": 2.5917398908864162e-05,
      "loss": 2.8981,
      "step": 199694
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.392559766769409,
      "learning_rate": 2.5915735733707866e-05,
      "loss": 2.9563,
      "step": 199695
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6794025897979736,
      "learning_rate": 2.591407260950904e-05,
      "loss": 2.7633,
      "step": 199696
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.52150559425354,
      "learning_rate": 2.591240953626792e-05,
      "loss": 2.6906,
      "step": 199697
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6341516971588135,
      "learning_rate": 2.5910746513984937e-05,
      "loss": 3.0996,
      "step": 199698
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0967326164245605,
      "learning_rate": 2.5909083542660225e-05,
      "loss": 2.8081,
      "step": 199699
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8018763065338135,
      "learning_rate": 2.5907420622294317e-05,
      "loss": 2.9269,
      "step": 199700
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.043616771697998,
      "learning_rate": 2.590575775288728e-05,
      "loss": 2.6957,
      "step": 199701
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.041626930236816,
      "learning_rate": 2.590409493443958e-05,
      "loss": 2.8519,
      "step": 199702
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.5497238636016846,
      "learning_rate": 2.5902432166951446e-05,
      "loss": 3.0245,
      "step": 199703
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0694048404693604,
      "learning_rate": 2.5900769450423288e-05,
      "loss": 2.8329,
      "step": 199704
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0454771518707275,
      "learning_rate": 2.589910678485526e-05,
      "loss": 2.8822,
      "step": 199705
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.774698495864868,
      "learning_rate": 2.5897444170247904e-05,
      "loss": 2.7627,
      "step": 199706
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9733500480651855,
      "learning_rate": 2.589578160660125e-05,
      "loss": 3.0146,
      "step": 199707
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.132122039794922,
      "learning_rate": 2.5894119093915834e-05,
      "loss": 2.7081,
      "step": 199708
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7601583003997803,
      "learning_rate": 2.589245663219178e-05,
      "loss": 2.7899,
      "step": 199709
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8397459983825684,
      "learning_rate": 2.5890794221429567e-05,
      "loss": 2.9598,
      "step": 199710
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0460610389709473,
      "learning_rate": 2.5889131861629386e-05,
      "loss": 3.0823,
      "step": 199711
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.2445807456970215,
      "learning_rate": 2.588746955279164e-05,
      "loss": 3.1048,
      "step": 199712
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.133216619491577,
      "learning_rate": 2.588580729491656e-05,
      "loss": 3.1197,
      "step": 199713
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7578165531158447,
      "learning_rate": 2.5884145088004516e-05,
      "loss": 2.9163,
      "step": 199714
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8986055850982666,
      "learning_rate": 2.5882482932055705e-05,
      "loss": 2.9663,
      "step": 199715
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9996140003204346,
      "learning_rate": 2.5880820827070592e-05,
      "loss": 2.8505,
      "step": 199716
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0184977054595947,
      "learning_rate": 2.5879158773049345e-05,
      "loss": 2.9584,
      "step": 199717
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.175091028213501,
      "learning_rate": 2.58774967699924e-05,
      "loss": 2.9602,
      "step": 199718
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1623141765594482,
      "learning_rate": 2.5875834817899988e-05,
      "loss": 2.845,
      "step": 199719
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0158467292785645,
      "learning_rate": 2.587417291677244e-05,
      "loss": 2.7141,
      "step": 199720
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.671459436416626,
      "learning_rate": 2.587251106660999e-05,
      "loss": 2.7309,
      "step": 199721
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.53191614151001,
      "learning_rate": 2.5870849267413076e-05,
      "loss": 2.8015,
      "step": 199722
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1514713764190674,
      "learning_rate": 2.586918751918192e-05,
      "loss": 3.1823,
      "step": 199723
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.504537582397461,
      "learning_rate": 2.586752582191687e-05,
      "loss": 3.1956,
      "step": 199724
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8457775115966797,
      "learning_rate": 2.5865864175618245e-05,
      "loss": 2.936,
      "step": 199725
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7844603061676025,
      "learning_rate": 2.586420258028632e-05,
      "loss": 2.9015,
      "step": 199726
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8844003677368164,
      "learning_rate": 2.5862541035921357e-05,
      "loss": 2.8448,
      "step": 199727
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.973442792892456,
      "learning_rate": 2.5860879542523794e-05,
      "loss": 2.9352,
      "step": 199728
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.956289291381836,
      "learning_rate": 2.5859218100093792e-05,
      "loss": 2.8349,
      "step": 199729
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0391805171966553,
      "learning_rate": 2.585755670863182e-05,
      "loss": 3.1042,
      "step": 199730
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.847289562225342,
      "learning_rate": 2.585589536813808e-05,
      "loss": 2.5292,
      "step": 199731
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.500383138656616,
      "learning_rate": 2.58542340786129e-05,
      "loss": 2.9359,
      "step": 199732
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.831063985824585,
      "learning_rate": 2.585257284005655e-05,
      "loss": 2.9296,
      "step": 199733
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.720309257507324,
      "learning_rate": 2.5850911652469432e-05,
      "loss": 2.6688,
      "step": 199734
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.540600061416626,
      "learning_rate": 2.5849250515851772e-05,
      "loss": 3.0919,
      "step": 199735
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.75248384475708,
      "learning_rate": 2.584758943020394e-05,
      "loss": 2.9271,
      "step": 199736
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3457679748535156,
      "learning_rate": 2.5845928395526205e-05,
      "loss": 3.0397,
      "step": 199737
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9231598377227783,
      "learning_rate": 2.584426741181893e-05,
      "loss": 2.9615,
      "step": 199738
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.033708095550537,
      "learning_rate": 2.5842606479082284e-05,
      "loss": 2.7421,
      "step": 199739
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.553312301635742,
      "learning_rate": 2.584094559731673e-05,
      "loss": 2.8444,
      "step": 199740
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.002342939376831,
      "learning_rate": 2.5839284766522504e-05,
      "loss": 2.9386,
      "step": 199741
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.771597385406494,
      "learning_rate": 2.583762398669994e-05,
      "loss": 2.601,
      "step": 199742
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.280054807662964,
      "learning_rate": 2.583596325784937e-05,
      "loss": 2.8025,
      "step": 199743
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.001233100891113,
      "learning_rate": 2.583430257997099e-05,
      "loss": 3.0098,
      "step": 199744
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3456504344940186,
      "learning_rate": 2.583264195306527e-05,
      "loss": 2.8375,
      "step": 199745
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.149609565734863,
      "learning_rate": 2.583098137713241e-05,
      "loss": 3.028,
      "step": 199746
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4679720401763916,
      "learning_rate": 2.5829320852172707e-05,
      "loss": 2.7983,
      "step": 199747
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.98898458480835,
      "learning_rate": 2.5827660378186564e-05,
      "loss": 2.7517,
      "step": 199748
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.5024962425231934,
      "learning_rate": 2.5825999955174213e-05,
      "loss": 2.7,
      "step": 199749
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2116808891296387,
      "learning_rate": 2.5824339583135956e-05,
      "loss": 3.3034,
      "step": 199750
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8020808696746826,
      "learning_rate": 2.5822679262072155e-05,
      "loss": 2.8958,
      "step": 199751
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4466214179992676,
      "learning_rate": 2.5821018991983112e-05,
      "loss": 2.9316,
      "step": 199752
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.779886484146118,
      "learning_rate": 2.5819358772869057e-05,
      "loss": 2.704,
      "step": 199753
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.5871009826660156,
      "learning_rate": 2.581769860473043e-05,
      "loss": 2.8392,
      "step": 199754
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8346173763275146,
      "learning_rate": 2.5816038487567457e-05,
      "loss": 2.8107,
      "step": 199755
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.461318016052246,
      "learning_rate": 2.581437842138041e-05,
      "loss": 3.079,
      "step": 199756
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6729509830474854,
      "learning_rate": 2.5812718406169686e-05,
      "loss": 3.0398,
      "step": 199757
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.728243589401245,
      "learning_rate": 2.5811058441935516e-05,
      "loss": 2.8396,
      "step": 199758
    },
    {
      "epoch": 2.6,
      "grad_norm": 5.054378032684326,
      "learning_rate": 2.5809398528678306e-05,
      "loss": 2.7453,
      "step": 199759
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7120985984802246,
      "learning_rate": 2.580773866639828e-05,
      "loss": 2.8086,
      "step": 199760
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6534974575042725,
      "learning_rate": 2.580607885509578e-05,
      "loss": 3.0327,
      "step": 199761
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.514742851257324,
      "learning_rate": 2.5804419094771035e-05,
      "loss": 2.7246,
      "step": 199762
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9860687255859375,
      "learning_rate": 2.580275938542451e-05,
      "loss": 2.4768,
      "step": 199763
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7710208892822266,
      "learning_rate": 2.580109972705634e-05,
      "loss": 2.913,
      "step": 199764
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.038276433944702,
      "learning_rate": 2.5799440119667024e-05,
      "loss": 2.8007,
      "step": 199765
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8223111629486084,
      "learning_rate": 2.579778056325673e-05,
      "loss": 2.7233,
      "step": 199766
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.965278148651123,
      "learning_rate": 2.5796121057825824e-05,
      "loss": 3.0585,
      "step": 199767
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.41910719871521,
      "learning_rate": 2.5794461603374506e-05,
      "loss": 2.9559,
      "step": 199768
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.748917579650879,
      "learning_rate": 2.5792802199903272e-05,
      "loss": 2.7328,
      "step": 199769
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.196388006210327,
      "learning_rate": 2.579114284741226e-05,
      "loss": 2.8774,
      "step": 199770
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6527597904205322,
      "learning_rate": 2.57894835459019e-05,
      "loss": 2.9252,
      "step": 199771
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3045809268951416,
      "learning_rate": 2.5787824295372396e-05,
      "loss": 2.7257,
      "step": 199772
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0172369480133057,
      "learning_rate": 2.578616509582421e-05,
      "loss": 2.8992,
      "step": 199773
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.681972026824951,
      "learning_rate": 2.578450594725747e-05,
      "loss": 2.7723,
      "step": 199774
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.5220139026641846,
      "learning_rate": 2.5782846849672623e-05,
      "loss": 2.8749,
      "step": 199775
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8296008110046387,
      "learning_rate": 2.5781187803069824e-05,
      "loss": 2.8892,
      "step": 199776
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.776655912399292,
      "learning_rate": 2.577952880744958e-05,
      "loss": 3.0797,
      "step": 199777
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.505212306976318,
      "learning_rate": 2.5777869862812017e-05,
      "loss": 2.9753,
      "step": 199778
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.971465826034546,
      "learning_rate": 2.577621096915761e-05,
      "loss": 2.9179,
      "step": 199779
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.898043155670166,
      "learning_rate": 2.5774552126486547e-05,
      "loss": 2.9025,
      "step": 199780
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.664434909820557,
      "learning_rate": 2.577289333479917e-05,
      "loss": 2.9172,
      "step": 199781
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.4700067043304443,
      "learning_rate": 2.5771234594095748e-05,
      "loss": 2.9869,
      "step": 199782
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.5797524452209473,
      "learning_rate": 2.576957590437667e-05,
      "loss": 2.897,
      "step": 199783
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6166112422943115,
      "learning_rate": 2.576791726564218e-05,
      "loss": 2.7472,
      "step": 199784
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.639127731323242,
      "learning_rate": 2.5766258677892637e-05,
      "loss": 2.947,
      "step": 199785
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6789348125457764,
      "learning_rate": 2.5764600141128344e-05,
      "loss": 3.0442,
      "step": 199786
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.606802225112915,
      "learning_rate": 2.5762941655349567e-05,
      "loss": 3.0411,
      "step": 199787
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7399260997772217,
      "learning_rate": 2.5761283220556606e-05,
      "loss": 2.9476,
      "step": 199788
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9348912239074707,
      "learning_rate": 2.5759624836749827e-05,
      "loss": 2.5847,
      "step": 199789
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.89985728263855,
      "learning_rate": 2.5757966503929496e-05,
      "loss": 3.0011,
      "step": 199790
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3868138790130615,
      "learning_rate": 2.5756308222095947e-05,
      "loss": 2.9279,
      "step": 199791
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7430810928344727,
      "learning_rate": 2.575464999124951e-05,
      "loss": 3.0788,
      "step": 199792
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2274649143218994,
      "learning_rate": 2.5752991811390422e-05,
      "loss": 2.8393,
      "step": 199793
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.458296298980713,
      "learning_rate": 2.5751333682519015e-05,
      "loss": 2.7504,
      "step": 199794
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4864799976348877,
      "learning_rate": 2.574967560463562e-05,
      "loss": 2.9427,
      "step": 199795
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2945024967193604,
      "learning_rate": 2.5748017577740544e-05,
      "loss": 2.8134,
      "step": 199796
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.476886510848999,
      "learning_rate": 2.574635960183411e-05,
      "loss": 2.9584,
      "step": 199797
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4967031478881836,
      "learning_rate": 2.5744701676916592e-05,
      "loss": 2.6364,
      "step": 199798
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.376466989517212,
      "learning_rate": 2.5743043802988316e-05,
      "loss": 2.9312,
      "step": 199799
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0096821784973145,
      "learning_rate": 2.5741385980049557e-05,
      "loss": 3.0262,
      "step": 199800
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.797978162765503,
      "learning_rate": 2.5739728208100674e-05,
      "loss": 2.6596,
      "step": 199801
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.049569845199585,
      "learning_rate": 2.5738070487141904e-05,
      "loss": 2.759,
      "step": 199802
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.831651449203491,
      "learning_rate": 2.573641281717368e-05,
      "loss": 2.8375,
      "step": 199803
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7657673358917236,
      "learning_rate": 2.57347551981962e-05,
      "loss": 3.034,
      "step": 199804
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.707636594772339,
      "learning_rate": 2.573309763020983e-05,
      "loss": 3.0228,
      "step": 199805
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9433481693267822,
      "learning_rate": 2.573144011321481e-05,
      "loss": 2.7348,
      "step": 199806
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0611326694488525,
      "learning_rate": 2.572978264721153e-05,
      "loss": 3.0418,
      "step": 199807
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.679438829421997,
      "learning_rate": 2.572812523220019e-05,
      "loss": 2.9489,
      "step": 199808
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6164278984069824,
      "learning_rate": 2.5726467868181268e-05,
      "loss": 3.0557,
      "step": 199809
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6446781158447266,
      "learning_rate": 2.572481055515495e-05,
      "loss": 3.1371,
      "step": 199810
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4035239219665527,
      "learning_rate": 2.5723153293121544e-05,
      "loss": 2.7325,
      "step": 199811
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7710349559783936,
      "learning_rate": 2.572149608208135e-05,
      "loss": 2.8968,
      "step": 199812
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7784299850463867,
      "learning_rate": 2.5719838922034765e-05,
      "loss": 2.8673,
      "step": 199813
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7992208003997803,
      "learning_rate": 2.5718181812981988e-05,
      "loss": 3.0379,
      "step": 199814
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.007051467895508,
      "learning_rate": 2.571652475492342e-05,
      "loss": 3.0015,
      "step": 199815
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.97220516204834,
      "learning_rate": 2.5714867747859324e-05,
      "loss": 2.8856,
      "step": 199816
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.8228073120117188,
      "learning_rate": 2.5713210791790008e-05,
      "loss": 2.9982,
      "step": 199817
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8644328117370605,
      "learning_rate": 2.5711553886715764e-05,
      "loss": 2.8407,
      "step": 199818
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4342875480651855,
      "learning_rate": 2.570989703263693e-05,
      "loss": 2.961,
      "step": 199819
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9486477375030518,
      "learning_rate": 2.5708240229553767e-05,
      "loss": 2.8712,
      "step": 199820
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.307739496231079,
      "learning_rate": 2.570658347746668e-05,
      "loss": 2.6351,
      "step": 199821
    },
    {
      "epoch": 2.6,
      "grad_norm": 7.299400806427002,
      "learning_rate": 2.5704926776375935e-05,
      "loss": 2.6804,
      "step": 199822
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.6654791831970215,
      "learning_rate": 2.5703270126281794e-05,
      "loss": 2.9131,
      "step": 199823
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.087664842605591,
      "learning_rate": 2.5701613527184528e-05,
      "loss": 2.9565,
      "step": 199824
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1349964141845703,
      "learning_rate": 2.5699956979084602e-05,
      "loss": 3.0822,
      "step": 199825
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.479134559631348,
      "learning_rate": 2.5698300481982148e-05,
      "loss": 2.9624,
      "step": 199826
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7767257690429688,
      "learning_rate": 2.56966440358776e-05,
      "loss": 2.896,
      "step": 199827
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7916975021362305,
      "learning_rate": 2.5694987640771258e-05,
      "loss": 2.8708,
      "step": 199828
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.316237688064575,
      "learning_rate": 2.5693331296663388e-05,
      "loss": 2.9315,
      "step": 199829
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.081085443496704,
      "learning_rate": 2.5691675003554257e-05,
      "loss": 2.9034,
      "step": 199830
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.20282244682312,
      "learning_rate": 2.5690018761444265e-05,
      "loss": 2.9235,
      "step": 199831
    },
    {
      "epoch": 2.6,
      "grad_norm": 5.0090012550354,
      "learning_rate": 2.5688362570333608e-05,
      "loss": 2.8424,
      "step": 199832
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.829612970352173,
      "learning_rate": 2.5686706430222724e-05,
      "loss": 3.0257,
      "step": 199833
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4269344806671143,
      "learning_rate": 2.568505034111188e-05,
      "loss": 3.0186,
      "step": 199834
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.5500757694244385,
      "learning_rate": 2.568339430300127e-05,
      "loss": 2.833,
      "step": 199835
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.834188222885132,
      "learning_rate": 2.56817383158914e-05,
      "loss": 2.8162,
      "step": 199836
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3026809692382812,
      "learning_rate": 2.568008237978243e-05,
      "loss": 2.9324,
      "step": 199837
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.834766387939453,
      "learning_rate": 2.5678426494674664e-05,
      "loss": 3.0318,
      "step": 199838
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.316725969314575,
      "learning_rate": 2.5676770660568536e-05,
      "loss": 2.7714,
      "step": 199839
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.80372953414917,
      "learning_rate": 2.567511487746424e-05,
      "loss": 3.0131,
      "step": 199840
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.78401517868042,
      "learning_rate": 2.5673459145362084e-05,
      "loss": 2.9143,
      "step": 199841
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.057591199874878,
      "learning_rate": 2.5671803464262464e-05,
      "loss": 2.6675,
      "step": 199842
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8776307106018066,
      "learning_rate": 2.567014783416561e-05,
      "loss": 2.9926,
      "step": 199843
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0247766971588135,
      "learning_rate": 2.566849225507186e-05,
      "loss": 3.0088,
      "step": 199844
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9278907775878906,
      "learning_rate": 2.5666836726981543e-05,
      "loss": 2.6516,
      "step": 199845
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9616124629974365,
      "learning_rate": 2.5665181249894863e-05,
      "loss": 2.8781,
      "step": 199846
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.161311149597168,
      "learning_rate": 2.5663525823812282e-05,
      "loss": 2.9137,
      "step": 199847
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.875312566757202,
      "learning_rate": 2.5661870448734e-05,
      "loss": 2.9555,
      "step": 199848
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3771562576293945,
      "learning_rate": 2.5660215124660322e-05,
      "loss": 3.0924,
      "step": 199849
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.915600299835205,
      "learning_rate": 2.565855985159164e-05,
      "loss": 2.91,
      "step": 199850
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.072901248931885,
      "learning_rate": 2.565690462952823e-05,
      "loss": 2.7747,
      "step": 199851
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.8944764137268066,
      "learning_rate": 2.5655249458470283e-05,
      "loss": 2.8364,
      "step": 199852
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.314220905303955,
      "learning_rate": 2.5653594338418304e-05,
      "loss": 3.167,
      "step": 199853
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.400442123413086,
      "learning_rate": 2.5651939269372457e-05,
      "loss": 3.1407,
      "step": 199854
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2960071563720703,
      "learning_rate": 2.5650284251333077e-05,
      "loss": 3.0912,
      "step": 199855
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.7894577980041504,
      "learning_rate": 2.5648629284300527e-05,
      "loss": 2.7851,
      "step": 199856
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.099092483520508,
      "learning_rate": 2.564697436827501e-05,
      "loss": 2.8379,
      "step": 199857
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9688591957092285,
      "learning_rate": 2.5645319503256957e-05,
      "loss": 2.7694,
      "step": 199858
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.558554172515869,
      "learning_rate": 2.5643664689246635e-05,
      "loss": 2.8143,
      "step": 199859
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7594082355499268,
      "learning_rate": 2.564200992624431e-05,
      "loss": 3.0429,
      "step": 199860
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1051061153411865,
      "learning_rate": 2.5640355214250285e-05,
      "loss": 2.7476,
      "step": 199861
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1330227851867676,
      "learning_rate": 2.5638700553264956e-05,
      "loss": 3.066,
      "step": 199862
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.6302762031555176,
      "learning_rate": 2.5637045943288492e-05,
      "loss": 3.0995,
      "step": 199863
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8652892112731934,
      "learning_rate": 2.5635391384321356e-05,
      "loss": 2.9958,
      "step": 199864
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.222926616668701,
      "learning_rate": 2.5633736876363753e-05,
      "loss": 2.985,
      "step": 199865
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.652744770050049,
      "learning_rate": 2.5632082419416044e-05,
      "loss": 3.1834,
      "step": 199866
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.666415214538574,
      "learning_rate": 2.5630428013478433e-05,
      "loss": 2.9633,
      "step": 199867
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.332239866256714,
      "learning_rate": 2.5628773658551383e-05,
      "loss": 2.809,
      "step": 199868
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2835211753845215,
      "learning_rate": 2.562711935463503e-05,
      "loss": 3.2305,
      "step": 199869
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1998586654663086,
      "learning_rate": 2.5625465101729868e-05,
      "loss": 2.9839,
      "step": 199870
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.244452714920044,
      "learning_rate": 2.5623810899836105e-05,
      "loss": 2.857,
      "step": 199871
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9203333854675293,
      "learning_rate": 2.5622156748954036e-05,
      "loss": 2.9427,
      "step": 199872
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3023340702056885,
      "learning_rate": 2.5620502649083928e-05,
      "loss": 2.9273,
      "step": 199873
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0957999229431152,
      "learning_rate": 2.561884860022625e-05,
      "loss": 3.1357,
      "step": 199874
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.417935371398926,
      "learning_rate": 2.5617194602381097e-05,
      "loss": 3.228,
      "step": 199875
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4554603099823,
      "learning_rate": 2.561554065554897e-05,
      "loss": 2.8814,
      "step": 199876
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.58864164352417,
      "learning_rate": 2.5613886759730106e-05,
      "loss": 2.6767,
      "step": 199877
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.128844738006592,
      "learning_rate": 2.561223291492477e-05,
      "loss": 2.928,
      "step": 199878
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.893799066543579,
      "learning_rate": 2.5610579121133258e-05,
      "loss": 3.1493,
      "step": 199879
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1349973678588867,
      "learning_rate": 2.560892537835597e-05,
      "loss": 3.1686,
      "step": 199880
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.9550955295562744,
      "learning_rate": 2.560727168659311e-05,
      "loss": 3.0962,
      "step": 199881
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2756435871124268,
      "learning_rate": 2.560561804584511e-05,
      "loss": 2.8032,
      "step": 199882
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.57073974609375,
      "learning_rate": 2.560396445611217e-05,
      "loss": 3.0902,
      "step": 199883
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7425427436828613,
      "learning_rate": 2.5602310917394654e-05,
      "loss": 3.0005,
      "step": 199884
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.081085205078125,
      "learning_rate": 2.5600657429692793e-05,
      "loss": 2.9103,
      "step": 199885
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.428075790405273,
      "learning_rate": 2.5599003993006995e-05,
      "loss": 2.7234,
      "step": 199886
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1843459606170654,
      "learning_rate": 2.5597350607337487e-05,
      "loss": 2.8396,
      "step": 199887
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.351480722427368,
      "learning_rate": 2.5595697272684634e-05,
      "loss": 2.8879,
      "step": 199888
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1292951107025146,
      "learning_rate": 2.559404398904874e-05,
      "loss": 2.9634,
      "step": 199889
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.936591625213623,
      "learning_rate": 2.5592390756430104e-05,
      "loss": 2.7478,
      "step": 199890
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.976928949356079,
      "learning_rate": 2.5590737574828955e-05,
      "loss": 2.8713,
      "step": 199891
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.026477575302124,
      "learning_rate": 2.5589084444245733e-05,
      "loss": 2.9828,
      "step": 199892
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.5444867610931396,
      "learning_rate": 2.55874313646806e-05,
      "loss": 3.1851,
      "step": 199893
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.023273944854736,
      "learning_rate": 2.5585778336134022e-05,
      "loss": 2.8317,
      "step": 199894
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.138317108154297,
      "learning_rate": 2.5584125358606234e-05,
      "loss": 3.0655,
      "step": 199895
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8150060176849365,
      "learning_rate": 2.55824724320975e-05,
      "loss": 2.9428,
      "step": 199896
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.214094877243042,
      "learning_rate": 2.5580819556608158e-05,
      "loss": 2.8772,
      "step": 199897
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9155988693237305,
      "learning_rate": 2.5579166732138533e-05,
      "loss": 3.2102,
      "step": 199898
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.581216812133789,
      "learning_rate": 2.55775139586889e-05,
      "loss": 3.0139,
      "step": 199899
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0695528984069824,
      "learning_rate": 2.557586123625962e-05,
      "loss": 2.9773,
      "step": 199900
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0054776668548584,
      "learning_rate": 2.5574208564850963e-05,
      "loss": 2.7546,
      "step": 199901
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.158038139343262,
      "learning_rate": 2.5572555944463258e-05,
      "loss": 2.7749,
      "step": 199902
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8758201599121094,
      "learning_rate": 2.557090337509674e-05,
      "loss": 3.0164,
      "step": 199903
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.199357032775879,
      "learning_rate": 2.556925085675181e-05,
      "loss": 2.8935,
      "step": 199904
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0529396533966064,
      "learning_rate": 2.55675983894287e-05,
      "loss": 2.8334,
      "step": 199905
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.624840259552002,
      "learning_rate": 2.556594597312781e-05,
      "loss": 3.0781,
      "step": 199906
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3939049243927,
      "learning_rate": 2.5564293607849372e-05,
      "loss": 2.9639,
      "step": 199907
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.904214382171631,
      "learning_rate": 2.5562641293593722e-05,
      "loss": 2.851,
      "step": 199908
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.5367801189422607,
      "learning_rate": 2.556098903036109e-05,
      "loss": 3.0486,
      "step": 199909
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.580331563949585,
      "learning_rate": 2.5559336818151942e-05,
      "loss": 2.9385,
      "step": 199910
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.634157657623291,
      "learning_rate": 2.5557684656966415e-05,
      "loss": 3.0165,
      "step": 199911
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.944695234298706,
      "learning_rate": 2.555603254680494e-05,
      "loss": 2.8698,
      "step": 199912
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.276723623275757,
      "learning_rate": 2.5554380487667746e-05,
      "loss": 3.0393,
      "step": 199913
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8931894302368164,
      "learning_rate": 2.555272847955524e-05,
      "loss": 2.9106,
      "step": 199914
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0835330486297607,
      "learning_rate": 2.5551076522467585e-05,
      "loss": 2.8882,
      "step": 199915
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0500950813293457,
      "learning_rate": 2.5549424616405244e-05,
      "loss": 3.2642,
      "step": 199916
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.3590192794799805,
      "learning_rate": 2.5547772761368358e-05,
      "loss": 3.0334,
      "step": 199917
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.808772325515747,
      "learning_rate": 2.5546120957357387e-05,
      "loss": 3.0232,
      "step": 199918
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.420752763748169,
      "learning_rate": 2.5544469204372497e-05,
      "loss": 2.9787,
      "step": 199919
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2207295894622803,
      "learning_rate": 2.554281750241416e-05,
      "loss": 2.9586,
      "step": 199920
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8691649436950684,
      "learning_rate": 2.554116585148257e-05,
      "loss": 2.8809,
      "step": 199921
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7267751693725586,
      "learning_rate": 2.5539514251578065e-05,
      "loss": 3.0151,
      "step": 199922
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.789924383163452,
      "learning_rate": 2.5537862702700906e-05,
      "loss": 2.9391,
      "step": 199923
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6693115234375,
      "learning_rate": 2.5536211204851464e-05,
      "loss": 2.793,
      "step": 199924
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6930127143859863,
      "learning_rate": 2.5534559758030004e-05,
      "loss": 2.9992,
      "step": 199925
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.213820457458496,
      "learning_rate": 2.5532908362236858e-05,
      "loss": 2.8439,
      "step": 199926
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.658902883529663,
      "learning_rate": 2.553125701747236e-05,
      "loss": 2.7956,
      "step": 199927
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1310956478118896,
      "learning_rate": 2.552960572373678e-05,
      "loss": 3.0847,
      "step": 199928
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.679647445678711,
      "learning_rate": 2.5527954481030344e-05,
      "loss": 2.819,
      "step": 199929
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.208733320236206,
      "learning_rate": 2.5526303289353524e-05,
      "loss": 3.1186,
      "step": 199930
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.661820888519287,
      "learning_rate": 2.552465214870648e-05,
      "loss": 3.0973,
      "step": 199931
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2492024898529053,
      "learning_rate": 2.5523001059089654e-05,
      "loss": 2.9767,
      "step": 199932
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2357640266418457,
      "learning_rate": 2.5521350020503272e-05,
      "loss": 2.9382,
      "step": 199933
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.775278329849243,
      "learning_rate": 2.5519699032947606e-05,
      "loss": 2.9581,
      "step": 199934
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7991044521331787,
      "learning_rate": 2.551804809642305e-05,
      "loss": 3.0966,
      "step": 199935
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7978484630584717,
      "learning_rate": 2.5516397210929873e-05,
      "loss": 2.9859,
      "step": 199936
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0109689235687256,
      "learning_rate": 2.551474637646834e-05,
      "loss": 2.9669,
      "step": 199937
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0567431449890137,
      "learning_rate": 2.5513095593038823e-05,
      "loss": 3.0261,
      "step": 199938
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0092718601226807,
      "learning_rate": 2.5511444860641617e-05,
      "loss": 2.7598,
      "step": 199939
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.631617307662964,
      "learning_rate": 2.5509794179276988e-05,
      "loss": 2.638,
      "step": 199940
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2093653678894043,
      "learning_rate": 2.550814354894527e-05,
      "loss": 2.8281,
      "step": 199941
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7092714309692383,
      "learning_rate": 2.5506492969646762e-05,
      "loss": 3.189,
      "step": 199942
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.681267738342285,
      "learning_rate": 2.55048424413818e-05,
      "loss": 2.7276,
      "step": 199943
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.437459945678711,
      "learning_rate": 2.550319196415068e-05,
      "loss": 2.988,
      "step": 199944
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6602938175201416,
      "learning_rate": 2.5501541537953707e-05,
      "loss": 3.0402,
      "step": 199945
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.881408929824829,
      "learning_rate": 2.5499891162791142e-05,
      "loss": 2.9386,
      "step": 199946
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0415008068084717,
      "learning_rate": 2.5498240838663352e-05,
      "loss": 3.0783,
      "step": 199947
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4155147075653076,
      "learning_rate": 2.5496590565570575e-05,
      "loss": 2.9476,
      "step": 199948
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.226710319519043,
      "learning_rate": 2.5494940343513204e-05,
      "loss": 3.0245,
      "step": 199949
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9529755115509033,
      "learning_rate": 2.5493290172491542e-05,
      "loss": 2.7722,
      "step": 199950
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.777036666870117,
      "learning_rate": 2.5491640052505825e-05,
      "loss": 3.0597,
      "step": 199951
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.359402656555176,
      "learning_rate": 2.5489989983556347e-05,
      "loss": 2.9128,
      "step": 199952
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7607545852661133,
      "learning_rate": 2.548833996564351e-05,
      "loss": 3.0142,
      "step": 199953
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8846089839935303,
      "learning_rate": 2.548668999876755e-05,
      "loss": 3.1128,
      "step": 199954
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4312331676483154,
      "learning_rate": 2.5485040082928832e-05,
      "loss": 2.9897,
      "step": 199955
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9973931312561035,
      "learning_rate": 2.548339021812762e-05,
      "loss": 3.0364,
      "step": 199956
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.396657705307007,
      "learning_rate": 2.5481740404364215e-05,
      "loss": 2.9221,
      "step": 199957
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.761286735534668,
      "learning_rate": 2.5480090641638916e-05,
      "loss": 3.1414,
      "step": 199958
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.5584733486175537,
      "learning_rate": 2.547844092995206e-05,
      "loss": 2.9737,
      "step": 199959
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2436976432800293,
      "learning_rate": 2.5476791269303943e-05,
      "loss": 3.0352,
      "step": 199960
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.700836181640625,
      "learning_rate": 2.54751416596949e-05,
      "loss": 2.9106,
      "step": 199961
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7259247303009033,
      "learning_rate": 2.547349210112519e-05,
      "loss": 2.9308,
      "step": 199962
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9952094554901123,
      "learning_rate": 2.547184259359516e-05,
      "loss": 2.6708,
      "step": 199963
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.057391405105591,
      "learning_rate": 2.5470193137105032e-05,
      "loss": 2.9521,
      "step": 199964
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1681761741638184,
      "learning_rate": 2.5468543731655245e-05,
      "loss": 3.0782,
      "step": 199965
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9926440715789795,
      "learning_rate": 2.5466894377245993e-05,
      "loss": 2.6965,
      "step": 199966
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0199108123779297,
      "learning_rate": 2.5465245073877683e-05,
      "loss": 2.8558,
      "step": 199967
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.155822515487671,
      "learning_rate": 2.546359582155054e-05,
      "loss": 2.994,
      "step": 199968
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.386004447937012,
      "learning_rate": 2.5461946620264907e-05,
      "loss": 3.133,
      "step": 199969
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1487622261047363,
      "learning_rate": 2.546029747002104e-05,
      "loss": 3.1742,
      "step": 199970
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.715290069580078,
      "learning_rate": 2.5458648370819313e-05,
      "loss": 3.0032,
      "step": 199971
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.085464954376221,
      "learning_rate": 2.5456999322659953e-05,
      "loss": 3.0398,
      "step": 199972
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.5538744926452637,
      "learning_rate": 2.5455350325543398e-05,
      "loss": 2.7798,
      "step": 199973
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.553976058959961,
      "learning_rate": 2.5453701379469882e-05,
      "loss": 2.8524,
      "step": 199974
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.8756868839263916,
      "learning_rate": 2.5452052484439666e-05,
      "loss": 2.7621,
      "step": 199975
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6985416412353516,
      "learning_rate": 2.5450403640453088e-05,
      "loss": 3.3558,
      "step": 199976
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.981255531311035,
      "learning_rate": 2.5448754847510478e-05,
      "loss": 2.7926,
      "step": 199977
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8429489135742188,
      "learning_rate": 2.5447106105612102e-05,
      "loss": 2.902,
      "step": 199978
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8303651809692383,
      "learning_rate": 2.544545741475833e-05,
      "loss": 2.8414,
      "step": 199979
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6972413063049316,
      "learning_rate": 2.544380877494936e-05,
      "loss": 2.8429,
      "step": 199980
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7609968185424805,
      "learning_rate": 2.5442160186185688e-05,
      "loss": 3.0266,
      "step": 199981
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.872870683670044,
      "learning_rate": 2.544051164846742e-05,
      "loss": 2.8515,
      "step": 199982
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.7306461334228516,
      "learning_rate": 2.5438863161794985e-05,
      "loss": 2.8441,
      "step": 199983
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.4628608226776123,
      "learning_rate": 2.5437214726168585e-05,
      "loss": 3.1079,
      "step": 199984
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.945265293121338,
      "learning_rate": 2.543556634158862e-05,
      "loss": 2.6705,
      "step": 199985
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7325422763824463,
      "learning_rate": 2.5433918008055354e-05,
      "loss": 2.8466,
      "step": 199986
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2717814445495605,
      "learning_rate": 2.5432269725569187e-05,
      "loss": 3.0699,
      "step": 199987
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.872992515563965,
      "learning_rate": 2.5430621494130254e-05,
      "loss": 2.8048,
      "step": 199988
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2543299198150635,
      "learning_rate": 2.5428973313738987e-05,
      "loss": 2.8304,
      "step": 199989
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.314470052719116,
      "learning_rate": 2.5427325184395618e-05,
      "loss": 2.9521,
      "step": 199990
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.732766628265381,
      "learning_rate": 2.542567710610055e-05,
      "loss": 3.0505,
      "step": 199991
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1081509590148926,
      "learning_rate": 2.5424029078853948e-05,
      "loss": 2.9816,
      "step": 199992
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9943361282348633,
      "learning_rate": 2.542238110265634e-05,
      "loss": 2.8953,
      "step": 199993
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.798191547393799,
      "learning_rate": 2.5420733177507767e-05,
      "loss": 2.6088,
      "step": 199994
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1782703399658203,
      "learning_rate": 2.5419085303408727e-05,
      "loss": 3.0215,
      "step": 199995
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8756072521209717,
      "learning_rate": 2.5417437480359414e-05,
      "loss": 2.745,
      "step": 199996
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.407423496246338,
      "learning_rate": 2.5415789708360235e-05,
      "loss": 2.9958,
      "step": 199997
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.76879620552063,
      "learning_rate": 2.5414141987411386e-05,
      "loss": 3.0155,
      "step": 199998
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.63527512550354,
      "learning_rate": 2.541249431751333e-05,
      "loss": 2.8602,
      "step": 199999
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.695570707321167,
      "learning_rate": 2.5410846698666175e-05,
      "loss": 3.0501,
      "step": 200000
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.6691579818725586,
      "learning_rate": 2.5409199130870383e-05,
      "loss": 3.0526,
      "step": 200001
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.7584304809570312,
      "learning_rate": 2.5407551614126154e-05,
      "loss": 2.8754,
      "step": 200002
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.855536460876465,
      "learning_rate": 2.5405904148433885e-05,
      "loss": 2.9903,
      "step": 200003
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1330740451812744,
      "learning_rate": 2.5404256733793783e-05,
      "loss": 2.6963,
      "step": 200004
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9520139694213867,
      "learning_rate": 2.5402609370206274e-05,
      "loss": 3.0189,
      "step": 200005
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4964160919189453,
      "learning_rate": 2.5400962057671626e-05,
      "loss": 3.0061,
      "step": 200006
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3878843784332275,
      "learning_rate": 2.5399314796190107e-05,
      "loss": 2.8016,
      "step": 200007
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6840627193450928,
      "learning_rate": 2.539766758576198e-05,
      "loss": 3.1027,
      "step": 200008
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.589339256286621,
      "learning_rate": 2.5396020426387655e-05,
      "loss": 2.9735,
      "step": 200009
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.914052963256836,
      "learning_rate": 2.539437331806735e-05,
      "loss": 2.991,
      "step": 200010
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.643599033355713,
      "learning_rate": 2.5392726260801478e-05,
      "loss": 3.0511,
      "step": 200011
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.846269369125366,
      "learning_rate": 2.5391079254590264e-05,
      "loss": 2.8133,
      "step": 200012
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.60610294342041,
      "learning_rate": 2.5389432299434043e-05,
      "loss": 3.0831,
      "step": 200013
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.067342519760132,
      "learning_rate": 2.5387785395333048e-05,
      "loss": 2.8678,
      "step": 200014
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2349021434783936,
      "learning_rate": 2.5386138542287716e-05,
      "loss": 2.862,
      "step": 200015
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.271237850189209,
      "learning_rate": 2.538449174029824e-05,
      "loss": 2.7218,
      "step": 200016
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8726162910461426,
      "learning_rate": 2.5382844989364993e-05,
      "loss": 3.2399,
      "step": 200017
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.482987403869629,
      "learning_rate": 2.538119828948827e-05,
      "loss": 3.0351,
      "step": 200018
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.521911144256592,
      "learning_rate": 2.5379551640668338e-05,
      "loss": 2.9902,
      "step": 200019
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0661914348602295,
      "learning_rate": 2.5377905042905566e-05,
      "loss": 2.9388,
      "step": 200020
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0902533531188965,
      "learning_rate": 2.537625849620022e-05,
      "loss": 2.6692,
      "step": 200021
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.015352725982666,
      "learning_rate": 2.537461200055253e-05,
      "loss": 2.8657,
      "step": 200022
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9501895904541016,
      "learning_rate": 2.5372965555962997e-05,
      "loss": 2.9664,
      "step": 200023
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.6998093128204346,
      "learning_rate": 2.537131916243176e-05,
      "loss": 2.8793,
      "step": 200024
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4576144218444824,
      "learning_rate": 2.5369672819959174e-05,
      "loss": 2.9679,
      "step": 200025
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.4112460613250732,
      "learning_rate": 2.536802652854555e-05,
      "loss": 2.6192,
      "step": 200026
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.400909423828125,
      "learning_rate": 2.5366380288191244e-05,
      "loss": 3.0419,
      "step": 200027
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.6419742107391357,
      "learning_rate": 2.5364734098896434e-05,
      "loss": 2.8058,
      "step": 200028
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.380690574645996,
      "learning_rate": 2.5363087960661542e-05,
      "loss": 2.9104,
      "step": 200029
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1670846939086914,
      "learning_rate": 2.5361441873486877e-05,
      "loss": 2.8605,
      "step": 200030
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.194209575653076,
      "learning_rate": 2.5359795837372632e-05,
      "loss": 2.781,
      "step": 200031
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.004855155944824,
      "learning_rate": 2.535814985231921e-05,
      "loss": 3.001,
      "step": 200032
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.358757734298706,
      "learning_rate": 2.5356503918326875e-05,
      "loss": 2.826,
      "step": 200033
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6982078552246094,
      "learning_rate": 2.5354858035395998e-05,
      "loss": 3.0167,
      "step": 200034
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.492417812347412,
      "learning_rate": 2.535321220352684e-05,
      "loss": 2.9342,
      "step": 200035
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3471527099609375,
      "learning_rate": 2.535156642271967e-05,
      "loss": 2.9998,
      "step": 200036
    },
    {
      "epoch": 2.6,
      "grad_norm": 6.639121055603027,
      "learning_rate": 2.5349920692974823e-05,
      "loss": 2.9615,
      "step": 200037
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6059505939483643,
      "learning_rate": 2.5348275014292628e-05,
      "loss": 2.9992,
      "step": 200038
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.291708469390869,
      "learning_rate": 2.5346629386673324e-05,
      "loss": 2.7759,
      "step": 200039
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.079052925109863,
      "learning_rate": 2.5344983810117336e-05,
      "loss": 2.8287,
      "step": 200040
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6365573406219482,
      "learning_rate": 2.5343338284624902e-05,
      "loss": 3.0436,
      "step": 200041
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.323050022125244,
      "learning_rate": 2.5341692810196324e-05,
      "loss": 2.9182,
      "step": 200042
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.6823415756225586,
      "learning_rate": 2.534004738683183e-05,
      "loss": 2.825,
      "step": 200043
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.5647971630096436,
      "learning_rate": 2.533840201453189e-05,
      "loss": 2.8616,
      "step": 200044
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.012956619262695,
      "learning_rate": 2.5336756693296634e-05,
      "loss": 3.1174,
      "step": 200045
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.3198444843292236,
      "learning_rate": 2.5335111423126565e-05,
      "loss": 3.1203,
      "step": 200046
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.163032293319702,
      "learning_rate": 2.533346620402178e-05,
      "loss": 2.9609,
      "step": 200047
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.398282766342163,
      "learning_rate": 2.5331821035982845e-05,
      "loss": 2.9306,
      "step": 200048
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9008586406707764,
      "learning_rate": 2.5330175919009767e-05,
      "loss": 2.9772,
      "step": 200049
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.104825019836426,
      "learning_rate": 2.532853085310307e-05,
      "loss": 2.8137,
      "step": 200050
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.8249073028564453,
      "learning_rate": 2.532688583826289e-05,
      "loss": 2.9152,
      "step": 200051
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.70032000541687,
      "learning_rate": 2.5325240874489726e-05,
      "loss": 3.0221,
      "step": 200052
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.114471197128296,
      "learning_rate": 2.5323595961783715e-05,
      "loss": 2.7293,
      "step": 200053
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0338470935821533,
      "learning_rate": 2.532195110014532e-05,
      "loss": 3.0003,
      "step": 200054
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.0846192836761475,
      "learning_rate": 2.5320306289574678e-05,
      "loss": 2.7831,
      "step": 200055
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.5163912773132324,
      "learning_rate": 2.5318661530072214e-05,
      "loss": 2.95,
      "step": 200056
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1632301807403564,
      "learning_rate": 2.531701682163817e-05,
      "loss": 3.056,
      "step": 200057
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.019805908203125,
      "learning_rate": 2.5315372164272907e-05,
      "loss": 2.9904,
      "step": 200058
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.804518222808838,
      "learning_rate": 2.531372755797666e-05,
      "loss": 3.0683,
      "step": 200059
    },
    {
      "epoch": 2.6,
      "grad_norm": 2.9678471088409424,
      "learning_rate": 2.531208300274986e-05,
      "loss": 2.7061,
      "step": 200060
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.1880440711975098,
      "learning_rate": 2.5310438498592644e-05,
      "loss": 2.8698,
      "step": 200061
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.25839900970459,
      "learning_rate": 2.5308794045505443e-05,
      "loss": 3.0618,
      "step": 200062
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.2255356311798096,
      "learning_rate": 2.5307149643488488e-05,
      "loss": 3.1299,
      "step": 200063
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.31549072265625,
      "learning_rate": 2.530550529254215e-05,
      "loss": 2.9178,
      "step": 200064
    },
    {
      "epoch": 2.6,
      "grad_norm": 3.8160171508789062,
      "learning_rate": 2.5303860992666656e-05,
      "loss": 2.7259,
      "step": 200065
    },
    {
      "epoch": 2.6,
      "grad_norm": 4.634091377258301,
      "learning_rate": 2.530221674386248e-05,
      "loss": 3.0804,
      "step": 200066
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9688684940338135,
      "learning_rate": 2.5300572546129683e-05,
      "loss": 2.7021,
      "step": 200067
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.82717227935791,
      "learning_rate": 2.5298928399468764e-05,
      "loss": 2.5517,
      "step": 200068
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.738774538040161,
      "learning_rate": 2.5297284303879896e-05,
      "loss": 3.1479,
      "step": 200069
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.999690294265747,
      "learning_rate": 2.5295640259363503e-05,
      "loss": 2.7804,
      "step": 200070
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.4765357971191406,
      "learning_rate": 2.529399626591979e-05,
      "loss": 2.9389,
      "step": 200071
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.202023983001709,
      "learning_rate": 2.5292352323549192e-05,
      "loss": 2.7229,
      "step": 200072
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9461426734924316,
      "learning_rate": 2.529070843225184e-05,
      "loss": 3.0186,
      "step": 200073
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8375790119171143,
      "learning_rate": 2.5289064592028196e-05,
      "loss": 2.7264,
      "step": 200074
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2620480060577393,
      "learning_rate": 2.5287420802878434e-05,
      "loss": 2.8617,
      "step": 200075
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8609189987182617,
      "learning_rate": 2.5285777064802982e-05,
      "loss": 3.0128,
      "step": 200076
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2082302570343018,
      "learning_rate": 2.5284133377802042e-05,
      "loss": 2.8577,
      "step": 200077
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9725093841552734,
      "learning_rate": 2.5282489741876044e-05,
      "loss": 2.6974,
      "step": 200078
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.9334912300109863,
      "learning_rate": 2.528084615702516e-05,
      "loss": 2.756,
      "step": 200079
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.786287307739258,
      "learning_rate": 2.5279202623249782e-05,
      "loss": 3.0521,
      "step": 200080
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.596863269805908,
      "learning_rate": 2.527755914055012e-05,
      "loss": 2.771,
      "step": 200081
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0768423080444336,
      "learning_rate": 2.5275915708926598e-05,
      "loss": 2.8703,
      "step": 200082
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0023200511932373,
      "learning_rate": 2.527427232837942e-05,
      "loss": 3.0062,
      "step": 200083
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9273130893707275,
      "learning_rate": 2.5272628998909084e-05,
      "loss": 3.0812,
      "step": 200084
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7751848697662354,
      "learning_rate": 2.527098572051559e-05,
      "loss": 2.9589,
      "step": 200085
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.6184115409851074,
      "learning_rate": 2.5269342493199506e-05,
      "loss": 2.9082,
      "step": 200086
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.814053535461426,
      "learning_rate": 2.5267699316960965e-05,
      "loss": 2.6674,
      "step": 200087
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1277644634246826,
      "learning_rate": 2.52660561918004e-05,
      "loss": 3.0798,
      "step": 200088
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8297488689422607,
      "learning_rate": 2.5264413117717973e-05,
      "loss": 3.045,
      "step": 200089
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6601169109344482,
      "learning_rate": 2.5262770094714224e-05,
      "loss": 2.82,
      "step": 200090
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.923762798309326,
      "learning_rate": 2.5261127122789215e-05,
      "loss": 2.9439,
      "step": 200091
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.070117950439453,
      "learning_rate": 2.5259484201943382e-05,
      "loss": 2.8084,
      "step": 200092
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.109785795211792,
      "learning_rate": 2.525784133217692e-05,
      "loss": 2.7411,
      "step": 200093
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7023186683654785,
      "learning_rate": 2.52561985134903e-05,
      "loss": 2.8665,
      "step": 200094
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.146393060684204,
      "learning_rate": 2.5254555745883685e-05,
      "loss": 2.83,
      "step": 200095
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.192880868911743,
      "learning_rate": 2.525291302935748e-05,
      "loss": 2.8987,
      "step": 200096
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9195828437805176,
      "learning_rate": 2.5251270363911945e-05,
      "loss": 2.9519,
      "step": 200097
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2538981437683105,
      "learning_rate": 2.5249627749547387e-05,
      "loss": 3.087,
      "step": 200098
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.037095785140991,
      "learning_rate": 2.524798518626403e-05,
      "loss": 3.0103,
      "step": 200099
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2255208492279053,
      "learning_rate": 2.524634267406235e-05,
      "loss": 3.0403,
      "step": 200100
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.6784653663635254,
      "learning_rate": 2.524470021294247e-05,
      "loss": 3.0864,
      "step": 200101
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.4181904792785645,
      "learning_rate": 2.5243057802904863e-05,
      "loss": 3.1344,
      "step": 200102
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.868236780166626,
      "learning_rate": 2.5241415443949764e-05,
      "loss": 3.0005,
      "step": 200103
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6181888580322266,
      "learning_rate": 2.5239773136077433e-05,
      "loss": 3.0705,
      "step": 200104
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.5530412197113037,
      "learning_rate": 2.5238130879288243e-05,
      "loss": 3.1371,
      "step": 200105
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.710554838180542,
      "learning_rate": 2.5236488673582455e-05,
      "loss": 2.8876,
      "step": 200106
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3734681606292725,
      "learning_rate": 2.5234846518960373e-05,
      "loss": 2.9207,
      "step": 200107
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.957479476928711,
      "learning_rate": 2.5233204415422357e-05,
      "loss": 2.5164,
      "step": 200108
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6172776222229004,
      "learning_rate": 2.5231562362968682e-05,
      "loss": 3.0776,
      "step": 200109
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2606053352355957,
      "learning_rate": 2.5229920361599577e-05,
      "loss": 3.0615,
      "step": 200110
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.910128116607666,
      "learning_rate": 2.5228278411315472e-05,
      "loss": 3.2596,
      "step": 200111
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.070194721221924,
      "learning_rate": 2.5226636512116637e-05,
      "loss": 2.8194,
      "step": 200112
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.227100372314453,
      "learning_rate": 2.5224994664003273e-05,
      "loss": 2.8743,
      "step": 200113
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1549665927886963,
      "learning_rate": 2.5223352866975842e-05,
      "loss": 2.9894,
      "step": 200114
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.917386770248413,
      "learning_rate": 2.5221711121034584e-05,
      "loss": 2.8942,
      "step": 200115
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.239875793457031,
      "learning_rate": 2.5220069426179723e-05,
      "loss": 2.9179,
      "step": 200116
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.651047945022583,
      "learning_rate": 2.52184277824117e-05,
      "loss": 2.96,
      "step": 200117
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9652795791625977,
      "learning_rate": 2.521678618973071e-05,
      "loss": 2.9661,
      "step": 200118
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.119193077087402,
      "learning_rate": 2.5215144648137152e-05,
      "loss": 3.0776,
      "step": 200119
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.743829727172852,
      "learning_rate": 2.52135031576313e-05,
      "loss": 2.9189,
      "step": 200120
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8115150928497314,
      "learning_rate": 2.5211861718213444e-05,
      "loss": 2.8938,
      "step": 200121
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.9200170040130615,
      "learning_rate": 2.5210220329883825e-05,
      "loss": 3.0324,
      "step": 200122
    },
    {
      "epoch": 2.61,
      "grad_norm": 6.49386739730835,
      "learning_rate": 2.520857899264287e-05,
      "loss": 2.8435,
      "step": 200123
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.344212293624878,
      "learning_rate": 2.5206937706490783e-05,
      "loss": 2.8845,
      "step": 200124
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.492511749267578,
      "learning_rate": 2.5205296471427926e-05,
      "loss": 2.9127,
      "step": 200125
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2233173847198486,
      "learning_rate": 2.520365528745457e-05,
      "loss": 3.0395,
      "step": 200126
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.287645101547241,
      "learning_rate": 2.5202014154571148e-05,
      "loss": 3.078,
      "step": 200127
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.8028483390808105,
      "learning_rate": 2.5200373072777758e-05,
      "loss": 2.9166,
      "step": 200128
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.910712957382202,
      "learning_rate": 2.5198732042074865e-05,
      "loss": 2.8771,
      "step": 200129
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.573756217956543,
      "learning_rate": 2.5197091062462637e-05,
      "loss": 2.7294,
      "step": 200130
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0918264389038086,
      "learning_rate": 2.5195450133941543e-05,
      "loss": 3.0137,
      "step": 200131
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.664123058319092,
      "learning_rate": 2.519380925651171e-05,
      "loss": 2.882,
      "step": 200132
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.17067551612854,
      "learning_rate": 2.5192168430173676e-05,
      "loss": 2.8733,
      "step": 200133
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.89961314201355,
      "learning_rate": 2.5190527654927473e-05,
      "loss": 2.9403,
      "step": 200134
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.7272651195526123,
      "learning_rate": 2.51888869307736e-05,
      "loss": 2.8324,
      "step": 200135
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.560145378112793,
      "learning_rate": 2.518724625771226e-05,
      "loss": 3.0504,
      "step": 200136
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.8628804683685303,
      "learning_rate": 2.5185605635743843e-05,
      "loss": 2.9933,
      "step": 200137
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1519343852996826,
      "learning_rate": 2.5183965064868527e-05,
      "loss": 3.0034,
      "step": 200138
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.061715841293335,
      "learning_rate": 2.518232454508684e-05,
      "loss": 2.8862,
      "step": 200139
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.704394578933716,
      "learning_rate": 2.518068407639885e-05,
      "loss": 2.7798,
      "step": 200140
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.4823007583618164,
      "learning_rate": 2.5179043658804986e-05,
      "loss": 2.9386,
      "step": 200141
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.625624656677246,
      "learning_rate": 2.517740329230549e-05,
      "loss": 2.9263,
      "step": 200142
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.035011291503906,
      "learning_rate": 2.5175762976900716e-05,
      "loss": 2.8341,
      "step": 200143
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.4571688175201416,
      "learning_rate": 2.517412271259094e-05,
      "loss": 2.8644,
      "step": 200144
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8438241481781006,
      "learning_rate": 2.517248249937659e-05,
      "loss": 2.708,
      "step": 200145
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3459994792938232,
      "learning_rate": 2.5170842337257736e-05,
      "loss": 2.8786,
      "step": 200146
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.402810573577881,
      "learning_rate": 2.5169202226234875e-05,
      "loss": 2.5291,
      "step": 200147
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.495603561401367,
      "learning_rate": 2.516756216630821e-05,
      "loss": 3.007,
      "step": 200148
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.298093557357788,
      "learning_rate": 2.51659221574781e-05,
      "loss": 2.9039,
      "step": 200149
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.105618476867676,
      "learning_rate": 2.5164282199744822e-05,
      "loss": 2.8653,
      "step": 200150
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6973607540130615,
      "learning_rate": 2.516264229310877e-05,
      "loss": 2.6349,
      "step": 200151
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7822935581207275,
      "learning_rate": 2.5161002437570076e-05,
      "loss": 3.0037,
      "step": 200152
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.663783550262451,
      "learning_rate": 2.5159362633129177e-05,
      "loss": 2.7542,
      "step": 200153
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3195056915283203,
      "learning_rate": 2.51577228797863e-05,
      "loss": 2.6734,
      "step": 200154
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.060446739196777,
      "learning_rate": 2.515608317754185e-05,
      "loss": 3.0152,
      "step": 200155
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.5106472969055176,
      "learning_rate": 2.5154443526395996e-05,
      "loss": 2.749,
      "step": 200156
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8487112522125244,
      "learning_rate": 2.5152803926349264e-05,
      "loss": 3.1616,
      "step": 200157
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.950518846511841,
      "learning_rate": 2.5151164377401654e-05,
      "loss": 3.064,
      "step": 200158
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6933302879333496,
      "learning_rate": 2.514952487955374e-05,
      "loss": 2.9588,
      "step": 200159
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.949770927429199,
      "learning_rate": 2.5147885432805615e-05,
      "loss": 2.6829,
      "step": 200160
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1708498001098633,
      "learning_rate": 2.514624603715778e-05,
      "loss": 2.8806,
      "step": 200161
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9142870903015137,
      "learning_rate": 2.514460669261037e-05,
      "loss": 2.9772,
      "step": 200162
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.6331870555877686,
      "learning_rate": 2.5142967399163883e-05,
      "loss": 3.1498,
      "step": 200163
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.185058355331421,
      "learning_rate": 2.5141328156818385e-05,
      "loss": 2.9137,
      "step": 200164
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.4703075885772705,
      "learning_rate": 2.5139688965574378e-05,
      "loss": 3.0863,
      "step": 200165
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.725624084472656,
      "learning_rate": 2.5138049825432028e-05,
      "loss": 2.8406,
      "step": 200166
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8368427753448486,
      "learning_rate": 2.513641073639173e-05,
      "loss": 2.8794,
      "step": 200167
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9593863487243652,
      "learning_rate": 2.5134771698453725e-05,
      "loss": 2.9137,
      "step": 200168
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2270376682281494,
      "learning_rate": 2.5133132711618477e-05,
      "loss": 2.946,
      "step": 200169
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.737907886505127,
      "learning_rate": 2.5131493775886048e-05,
      "loss": 2.948,
      "step": 200170
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.540558338165283,
      "learning_rate": 2.512985489125694e-05,
      "loss": 3.077,
      "step": 200171
    },
    {
      "epoch": 2.61,
      "grad_norm": 5.2872161865234375,
      "learning_rate": 2.5128216057731287e-05,
      "loss": 3.0818,
      "step": 200172
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1212985515594482,
      "learning_rate": 2.512657727530959e-05,
      "loss": 3.0079,
      "step": 200173
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8672640323638916,
      "learning_rate": 2.5124938543991946e-05,
      "loss": 3.0542,
      "step": 200174
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.540356397628784,
      "learning_rate": 2.5123299863778888e-05,
      "loss": 2.9675,
      "step": 200175
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7297792434692383,
      "learning_rate": 2.512166123467052e-05,
      "loss": 3.1029,
      "step": 200176
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.927684307098389,
      "learning_rate": 2.512002265666724e-05,
      "loss": 2.9101,
      "step": 200177
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6571571826934814,
      "learning_rate": 2.511838412976931e-05,
      "loss": 2.9467,
      "step": 200178
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.715747594833374,
      "learning_rate": 2.51167456539771e-05,
      "loss": 2.9385,
      "step": 200179
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.5821690559387207,
      "learning_rate": 2.511510722929081e-05,
      "loss": 3.1686,
      "step": 200180
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9604685306549072,
      "learning_rate": 2.5113468855710873e-05,
      "loss": 2.7993,
      "step": 200181
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7863545417785645,
      "learning_rate": 2.5111830533237555e-05,
      "loss": 3.06,
      "step": 200182
    },
    {
      "epoch": 2.61,
      "grad_norm": 5.046694278717041,
      "learning_rate": 2.5110192261871122e-05,
      "loss": 3.0463,
      "step": 200183
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9977011680603027,
      "learning_rate": 2.5108554041611838e-05,
      "loss": 2.8003,
      "step": 200184
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.198289394378662,
      "learning_rate": 2.510691587246011e-05,
      "loss": 3.2111,
      "step": 200185
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.897507905960083,
      "learning_rate": 2.510527775441613e-05,
      "loss": 3.031,
      "step": 200186
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6649022102355957,
      "learning_rate": 2.5103639687480337e-05,
      "loss": 3.0315,
      "step": 200187
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0570497512817383,
      "learning_rate": 2.510200167165296e-05,
      "loss": 3.1011,
      "step": 200188
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.6101889610290527,
      "learning_rate": 2.5100363706934267e-05,
      "loss": 2.9512,
      "step": 200189
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7863574028015137,
      "learning_rate": 2.5098725793324657e-05,
      "loss": 2.9285,
      "step": 200190
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.631519317626953,
      "learning_rate": 2.50970879308244e-05,
      "loss": 2.8379,
      "step": 200191
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1428864002227783,
      "learning_rate": 2.5095450119433692e-05,
      "loss": 2.8902,
      "step": 200192
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.55546236038208,
      "learning_rate": 2.5093812359152998e-05,
      "loss": 3.1182,
      "step": 200193
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9718234539031982,
      "learning_rate": 2.5092174649982556e-05,
      "loss": 2.6623,
      "step": 200194
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0984113216400146,
      "learning_rate": 2.5090536991922627e-05,
      "loss": 2.9495,
      "step": 200195
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.7260308265686035,
      "learning_rate": 2.5088899384973615e-05,
      "loss": 2.8353,
      "step": 200196
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3529701232910156,
      "learning_rate": 2.508726182913575e-05,
      "loss": 3.215,
      "step": 200197
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.6966519355773926,
      "learning_rate": 2.508562432440927e-05,
      "loss": 2.8066,
      "step": 200198
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.143592834472656,
      "learning_rate": 2.508398687079467e-05,
      "loss": 2.8437,
      "step": 200199
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.105013847351074,
      "learning_rate": 2.508234946829212e-05,
      "loss": 2.9342,
      "step": 200200
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9414608478546143,
      "learning_rate": 2.508071211690188e-05,
      "loss": 3.0508,
      "step": 200201
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.804710626602173,
      "learning_rate": 2.507907481662439e-05,
      "loss": 2.8858,
      "step": 200202
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1805076599121094,
      "learning_rate": 2.5077437567459846e-05,
      "loss": 3.068,
      "step": 200203
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.546322822570801,
      "learning_rate": 2.507580036940865e-05,
      "loss": 2.8433,
      "step": 200204
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9902303218841553,
      "learning_rate": 2.5074163222471066e-05,
      "loss": 2.8389,
      "step": 200205
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7305004596710205,
      "learning_rate": 2.5072526126647362e-05,
      "loss": 2.8964,
      "step": 200206
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0822370052337646,
      "learning_rate": 2.5070889081937807e-05,
      "loss": 3.0105,
      "step": 200207
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0582451820373535,
      "learning_rate": 2.506925208834283e-05,
      "loss": 3.0484,
      "step": 200208
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.139063596725464,
      "learning_rate": 2.5067615145862595e-05,
      "loss": 2.9511,
      "step": 200209
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.546513795852661,
      "learning_rate": 2.5065978254497575e-05,
      "loss": 2.9531,
      "step": 200210
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3787002563476562,
      "learning_rate": 2.5064341414247936e-05,
      "loss": 2.7008,
      "step": 200211
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.379748582839966,
      "learning_rate": 2.506270462511404e-05,
      "loss": 3.1211,
      "step": 200212
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9189181327819824,
      "learning_rate": 2.506106788709612e-05,
      "loss": 2.8006,
      "step": 200213
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.979299306869507,
      "learning_rate": 2.505943120019461e-05,
      "loss": 2.9879,
      "step": 200214
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.532263994216919,
      "learning_rate": 2.5057794564409684e-05,
      "loss": 3.169,
      "step": 200215
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.471564531326294,
      "learning_rate": 2.505615797974173e-05,
      "loss": 2.9272,
      "step": 200216
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.635260105133057,
      "learning_rate": 2.5054521446191023e-05,
      "loss": 2.9528,
      "step": 200217
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.6173863410949707,
      "learning_rate": 2.5052884963757924e-05,
      "loss": 3.1355,
      "step": 200218
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.4785513877868652,
      "learning_rate": 2.5051248532442603e-05,
      "loss": 2.9505,
      "step": 200219
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.156355381011963,
      "learning_rate": 2.504961215224549e-05,
      "loss": 2.8347,
      "step": 200220
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.385754108428955,
      "learning_rate": 2.504797582316682e-05,
      "loss": 3.0443,
      "step": 200221
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9780545234680176,
      "learning_rate": 2.5046339545206927e-05,
      "loss": 2.758,
      "step": 200222
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8899331092834473,
      "learning_rate": 2.5044703318366077e-05,
      "loss": 2.7109,
      "step": 200223
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.866196393966675,
      "learning_rate": 2.5043067142644737e-05,
      "loss": 2.7565,
      "step": 200224
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.142791271209717,
      "learning_rate": 2.5041431018042935e-05,
      "loss": 3.0389,
      "step": 200225
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0560929775238037,
      "learning_rate": 2.503979494456121e-05,
      "loss": 3.2711,
      "step": 200226
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.417686939239502,
      "learning_rate": 2.503815892219969e-05,
      "loss": 2.6798,
      "step": 200227
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.4896066188812256,
      "learning_rate": 2.503652295095885e-05,
      "loss": 2.593,
      "step": 200228
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.8246850967407227,
      "learning_rate": 2.5034887030838848e-05,
      "loss": 3.0309,
      "step": 200229
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9417152404785156,
      "learning_rate": 2.503325116184015e-05,
      "loss": 3.0946,
      "step": 200230
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9655189514160156,
      "learning_rate": 2.5031615343962862e-05,
      "loss": 2.9296,
      "step": 200231
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7122223377227783,
      "learning_rate": 2.502997957720745e-05,
      "loss": 3.0121,
      "step": 200232
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9490699768066406,
      "learning_rate": 2.5028343861574074e-05,
      "loss": 2.6931,
      "step": 200233
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.356889247894287,
      "learning_rate": 2.502670819706317e-05,
      "loss": 2.9983,
      "step": 200234
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.880323886871338,
      "learning_rate": 2.5025072583674976e-05,
      "loss": 2.9739,
      "step": 200235
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.5732221603393555,
      "learning_rate": 2.5023437021409886e-05,
      "loss": 2.8013,
      "step": 200236
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.5825204849243164,
      "learning_rate": 2.5021801510268037e-05,
      "loss": 2.8683,
      "step": 200237
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.737083911895752,
      "learning_rate": 2.502016605024989e-05,
      "loss": 2.9591,
      "step": 200238
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.965672492980957,
      "learning_rate": 2.5018530641355617e-05,
      "loss": 2.9189,
      "step": 200239
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.4576632976531982,
      "learning_rate": 2.501689528358565e-05,
      "loss": 2.8595,
      "step": 200240
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7939443588256836,
      "learning_rate": 2.5015259976940183e-05,
      "loss": 2.7547,
      "step": 200241
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.125498056411743,
      "learning_rate": 2.5013624721419655e-05,
      "loss": 2.945,
      "step": 200242
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.178396463394165,
      "learning_rate": 2.5011989517024202e-05,
      "loss": 2.9674,
      "step": 200243
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.651850938796997,
      "learning_rate": 2.5010354363754248e-05,
      "loss": 3.0295,
      "step": 200244
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7097549438476562,
      "learning_rate": 2.500871926161e-05,
      "loss": 2.9752,
      "step": 200245
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.622072696685791,
      "learning_rate": 2.500708421059189e-05,
      "loss": 2.8404,
      "step": 200246
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1588358879089355,
      "learning_rate": 2.5005449210700113e-05,
      "loss": 2.8548,
      "step": 200247
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.255980968475342,
      "learning_rate": 2.500381426193504e-05,
      "loss": 2.9143,
      "step": 200248
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.434009313583374,
      "learning_rate": 2.500217936429697e-05,
      "loss": 3.2591,
      "step": 200249
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.5556962490081787,
      "learning_rate": 2.500054451778617e-05,
      "loss": 3.0012,
      "step": 200250
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.716193675994873,
      "learning_rate": 2.4998909722402905e-05,
      "loss": 2.6447,
      "step": 200251
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.5768070220947266,
      "learning_rate": 2.4997274978147607e-05,
      "loss": 2.6638,
      "step": 200252
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.352144956588745,
      "learning_rate": 2.4995640285020445e-05,
      "loss": 3.0078,
      "step": 200253
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.711831569671631,
      "learning_rate": 2.4994005643021818e-05,
      "loss": 2.7386,
      "step": 200254
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0319366455078125,
      "learning_rate": 2.4992371052152027e-05,
      "loss": 2.9031,
      "step": 200255
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.088181495666504,
      "learning_rate": 2.4990736512411334e-05,
      "loss": 2.9968,
      "step": 200256
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6814825534820557,
      "learning_rate": 2.498910202379998e-05,
      "loss": 2.9105,
      "step": 200257
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.976247549057007,
      "learning_rate": 2.4987467586318423e-05,
      "loss": 2.7564,
      "step": 200258
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3919968605041504,
      "learning_rate": 2.498583319996683e-05,
      "loss": 2.7792,
      "step": 200259
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.823129415512085,
      "learning_rate": 2.4984198864745607e-05,
      "loss": 2.8574,
      "step": 200260
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7124431133270264,
      "learning_rate": 2.498256458065502e-05,
      "loss": 2.8752,
      "step": 200261
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.488988161087036,
      "learning_rate": 2.498093034769536e-05,
      "loss": 2.926,
      "step": 200262
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.293877601623535,
      "learning_rate": 2.4979296165866868e-05,
      "loss": 3.1756,
      "step": 200263
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.434776544570923,
      "learning_rate": 2.497766203517001e-05,
      "loss": 2.7075,
      "step": 200264
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3886656761169434,
      "learning_rate": 2.4976027955604915e-05,
      "loss": 2.9859,
      "step": 200265
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3416242599487305,
      "learning_rate": 2.4974393927172022e-05,
      "loss": 2.8019,
      "step": 200266
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1220219135284424,
      "learning_rate": 2.497275994987159e-05,
      "loss": 2.954,
      "step": 200267
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.064377784729004,
      "learning_rate": 2.4971126023703892e-05,
      "loss": 3.0128,
      "step": 200268
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.2700114250183105,
      "learning_rate": 2.4969492148669224e-05,
      "loss": 2.7586,
      "step": 200269
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1612229347229004,
      "learning_rate": 2.4967858324767987e-05,
      "loss": 2.6985,
      "step": 200270
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8368098735809326,
      "learning_rate": 2.4966224552000313e-05,
      "loss": 3.0386,
      "step": 200271
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6863012313842773,
      "learning_rate": 2.4964590830366704e-05,
      "loss": 3.1191,
      "step": 200272
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9369735717773438,
      "learning_rate": 2.496295715986736e-05,
      "loss": 3.0275,
      "step": 200273
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6958446502685547,
      "learning_rate": 2.4961323540502576e-05,
      "loss": 3.0924,
      "step": 200274
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.813638210296631,
      "learning_rate": 2.4959689972272655e-05,
      "loss": 2.8234,
      "step": 200275
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1791505813598633,
      "learning_rate": 2.4958056455177932e-05,
      "loss": 2.9483,
      "step": 200276
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.6456997394561768,
      "learning_rate": 2.4956422989218673e-05,
      "loss": 3.1048,
      "step": 200277
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.4981157779693604,
      "learning_rate": 2.495478957439524e-05,
      "loss": 2.9467,
      "step": 200278
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9281129837036133,
      "learning_rate": 2.4953156210707936e-05,
      "loss": 2.9644,
      "step": 200279
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.722219228744507,
      "learning_rate": 2.4951522898156963e-05,
      "loss": 2.9029,
      "step": 200280
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9948508739471436,
      "learning_rate": 2.4949889636742716e-05,
      "loss": 2.8582,
      "step": 200281
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8596622943878174,
      "learning_rate": 2.494825642646553e-05,
      "loss": 2.7906,
      "step": 200282
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.5251901149749756,
      "learning_rate": 2.4946623267325572e-05,
      "loss": 3.0023,
      "step": 200283
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.15692663192749,
      "learning_rate": 2.4944990159323274e-05,
      "loss": 2.768,
      "step": 200284
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0962610244750977,
      "learning_rate": 2.4943357102458906e-05,
      "loss": 2.876,
      "step": 200285
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.756690263748169,
      "learning_rate": 2.4941724096732695e-05,
      "loss": 2.8952,
      "step": 200286
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.802274703979492,
      "learning_rate": 2.494009114214508e-05,
      "loss": 3.0201,
      "step": 200287
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3937084674835205,
      "learning_rate": 2.4938458238696223e-05,
      "loss": 2.8811,
      "step": 200288
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.7488834857940674,
      "learning_rate": 2.4936825386386595e-05,
      "loss": 2.8191,
      "step": 200289
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.841111898422241,
      "learning_rate": 2.4935192585216357e-05,
      "loss": 2.7865,
      "step": 200290
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.696471929550171,
      "learning_rate": 2.493355983518588e-05,
      "loss": 2.883,
      "step": 200291
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.293057918548584,
      "learning_rate": 2.4931927136295392e-05,
      "loss": 2.8752,
      "step": 200292
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6898062229156494,
      "learning_rate": 2.4930294488545298e-05,
      "loss": 3.0033,
      "step": 200293
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9485127925872803,
      "learning_rate": 2.4928661891935798e-05,
      "loss": 2.8754,
      "step": 200294
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.8162946701049805,
      "learning_rate": 2.492702934646732e-05,
      "loss": 3.1179,
      "step": 200295
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2393596172332764,
      "learning_rate": 2.4925396852140067e-05,
      "loss": 3.1797,
      "step": 200296
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.558427095413208,
      "learning_rate": 2.4923764408954407e-05,
      "loss": 2.9308,
      "step": 200297
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6659343242645264,
      "learning_rate": 2.4922132016910536e-05,
      "loss": 2.9467,
      "step": 200298
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2291762828826904,
      "learning_rate": 2.492049967600892e-05,
      "loss": 2.9116,
      "step": 200299
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.332818031311035,
      "learning_rate": 2.4918867386249696e-05,
      "loss": 2.8223,
      "step": 200300
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.866590738296509,
      "learning_rate": 2.4917235147633297e-05,
      "loss": 2.736,
      "step": 200301
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.423372268676758,
      "learning_rate": 2.491560296015995e-05,
      "loss": 3.2018,
      "step": 200302
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.678858995437622,
      "learning_rate": 2.4913970823830066e-05,
      "loss": 2.8174,
      "step": 200303
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.541516065597534,
      "learning_rate": 2.4912338738643768e-05,
      "loss": 2.9133,
      "step": 200304
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.4159343242645264,
      "learning_rate": 2.4910706704601523e-05,
      "loss": 3.1239,
      "step": 200305
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0988240242004395,
      "learning_rate": 2.4909074721703503e-05,
      "loss": 3.0363,
      "step": 200306
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.7104146480560303,
      "learning_rate": 2.490744278995014e-05,
      "loss": 2.7936,
      "step": 200307
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.3564064502716064,
      "learning_rate": 2.4905810909341628e-05,
      "loss": 2.7955,
      "step": 200308
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.6079885959625244,
      "learning_rate": 2.4904179079878406e-05,
      "loss": 2.8039,
      "step": 200309
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.169043779373169,
      "learning_rate": 2.4902547301560605e-05,
      "loss": 2.9406,
      "step": 200310
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2033143043518066,
      "learning_rate": 2.490091557438866e-05,
      "loss": 2.8194,
      "step": 200311
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2622900009155273,
      "learning_rate": 2.4899283898362765e-05,
      "loss": 2.9691,
      "step": 200312
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2243173122406006,
      "learning_rate": 2.4897652273483326e-05,
      "loss": 3.0787,
      "step": 200313
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6322317123413086,
      "learning_rate": 2.4896020699750575e-05,
      "loss": 3.037,
      "step": 200314
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.978705406188965,
      "learning_rate": 2.489438917716491e-05,
      "loss": 2.9625,
      "step": 200315
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.600696325302124,
      "learning_rate": 2.4892757705726563e-05,
      "loss": 2.8062,
      "step": 200316
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8345842361450195,
      "learning_rate": 2.4891126285435837e-05,
      "loss": 2.864,
      "step": 200317
    },
    {
      "epoch": 2.61,
      "grad_norm": 5.86718225479126,
      "learning_rate": 2.4889494916292995e-05,
      "loss": 3.0165,
      "step": 200318
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.065755844116211,
      "learning_rate": 2.488786359829844e-05,
      "loss": 2.8901,
      "step": 200319
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.463878870010376,
      "learning_rate": 2.488623233145237e-05,
      "loss": 3.164,
      "step": 200320
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1025214195251465,
      "learning_rate": 2.488460111575519e-05,
      "loss": 2.8234,
      "step": 200321
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8360178470611572,
      "learning_rate": 2.4882969951207155e-05,
      "loss": 2.772,
      "step": 200322
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.215061902999878,
      "learning_rate": 2.4881338837808575e-05,
      "loss": 2.9687,
      "step": 200323
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.585542917251587,
      "learning_rate": 2.4879707775559708e-05,
      "loss": 2.8058,
      "step": 200324
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.815180778503418,
      "learning_rate": 2.487807676446093e-05,
      "loss": 3.1702,
      "step": 200325
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8674938678741455,
      "learning_rate": 2.4876445804512467e-05,
      "loss": 3.0055,
      "step": 200326
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7229082584381104,
      "learning_rate": 2.4874814895714722e-05,
      "loss": 2.7232,
      "step": 200327
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.263042449951172,
      "learning_rate": 2.4873184038067962e-05,
      "loss": 2.9371,
      "step": 200328
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7979350090026855,
      "learning_rate": 2.4871553231572417e-05,
      "loss": 3.025,
      "step": 200329
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8138017654418945,
      "learning_rate": 2.4869922476228422e-05,
      "loss": 2.9216,
      "step": 200330
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.398193597793579,
      "learning_rate": 2.4868291772036375e-05,
      "loss": 2.761,
      "step": 200331
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.827267646789551,
      "learning_rate": 2.4866661118996413e-05,
      "loss": 2.7746,
      "step": 200332
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2858095169067383,
      "learning_rate": 2.486503051710903e-05,
      "loss": 3.0476,
      "step": 200333
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.782491683959961,
      "learning_rate": 2.4863399966374398e-05,
      "loss": 2.8617,
      "step": 200334
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.020876169204712,
      "learning_rate": 2.4861769466792845e-05,
      "loss": 3.0121,
      "step": 200335
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7606139183044434,
      "learning_rate": 2.4860139018364643e-05,
      "loss": 2.6194,
      "step": 200336
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.642388105392456,
      "learning_rate": 2.485850862109019e-05,
      "loss": 3.1788,
      "step": 200337
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3382716178894043,
      "learning_rate": 2.4856878274969682e-05,
      "loss": 3.0246,
      "step": 200338
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.761195421218872,
      "learning_rate": 2.4855247980003558e-05,
      "loss": 2.7974,
      "step": 200339
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8953850269317627,
      "learning_rate": 2.4853617736191976e-05,
      "loss": 2.8953,
      "step": 200340
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2649052143096924,
      "learning_rate": 2.4851987543535346e-05,
      "loss": 3.081,
      "step": 200341
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0298995971679688,
      "learning_rate": 2.485035740203386e-05,
      "loss": 2.902,
      "step": 200342
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.208730459213257,
      "learning_rate": 2.4848727311687923e-05,
      "loss": 2.9214,
      "step": 200343
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6918654441833496,
      "learning_rate": 2.4847097272497763e-05,
      "loss": 3.0027,
      "step": 200344
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.5325264930725098,
      "learning_rate": 2.4845467284463782e-05,
      "loss": 2.8599,
      "step": 200345
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.147488594055176,
      "learning_rate": 2.4843837347586216e-05,
      "loss": 2.9708,
      "step": 200346
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.390319347381592,
      "learning_rate": 2.4842207461865358e-05,
      "loss": 2.7495,
      "step": 200347
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1468801498413086,
      "learning_rate": 2.4840577627301482e-05,
      "loss": 2.7093,
      "step": 200348
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.4199070930480957,
      "learning_rate": 2.4838947843895018e-05,
      "loss": 2.8003,
      "step": 200349
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.4110939502716064,
      "learning_rate": 2.4837318111646098e-05,
      "loss": 3.0858,
      "step": 200350
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.015519142150879,
      "learning_rate": 2.483568843055519e-05,
      "loss": 2.8558,
      "step": 200351
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.984811782836914,
      "learning_rate": 2.4834058800622526e-05,
      "loss": 3.099,
      "step": 200352
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.168032646179199,
      "learning_rate": 2.483242922184837e-05,
      "loss": 3.0577,
      "step": 200353
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0604798793792725,
      "learning_rate": 2.4830799694233062e-05,
      "loss": 3.0513,
      "step": 200354
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.281156063079834,
      "learning_rate": 2.4829170217776894e-05,
      "loss": 2.9917,
      "step": 200355
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8915493488311768,
      "learning_rate": 2.482754079248017e-05,
      "loss": 2.6472,
      "step": 200356
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7813384532928467,
      "learning_rate": 2.4825911418343226e-05,
      "loss": 2.8805,
      "step": 200357
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.01415753364563,
      "learning_rate": 2.4824282095366355e-05,
      "loss": 2.7116,
      "step": 200358
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7306082248687744,
      "learning_rate": 2.4822652823549827e-05,
      "loss": 2.9983,
      "step": 200359
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9820923805236816,
      "learning_rate": 2.482102360289394e-05,
      "loss": 2.8594,
      "step": 200360
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2124838829040527,
      "learning_rate": 2.4819394433399063e-05,
      "loss": 2.8821,
      "step": 200361
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.800743579864502,
      "learning_rate": 2.4817765315065396e-05,
      "loss": 2.8753,
      "step": 200362
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.059379577636719,
      "learning_rate": 2.4816136247893336e-05,
      "loss": 3.0436,
      "step": 200363
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.888230323791504,
      "learning_rate": 2.481450723188315e-05,
      "loss": 2.8416,
      "step": 200364
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8419501781463623,
      "learning_rate": 2.4812878267035106e-05,
      "loss": 3.1787,
      "step": 200365
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.383279323577881,
      "learning_rate": 2.4811249353349604e-05,
      "loss": 3.0582,
      "step": 200366
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8320000171661377,
      "learning_rate": 2.4809620490826877e-05,
      "loss": 2.882,
      "step": 200367
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.862569570541382,
      "learning_rate": 2.4807991679467186e-05,
      "loss": 2.8559,
      "step": 200368
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.259950876235962,
      "learning_rate": 2.480636291927094e-05,
      "loss": 2.8487,
      "step": 200369
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.112492084503174,
      "learning_rate": 2.4804734210238364e-05,
      "loss": 2.9401,
      "step": 200370
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.616637706756592,
      "learning_rate": 2.480310555236976e-05,
      "loss": 2.9967,
      "step": 200371
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.072359561920166,
      "learning_rate": 2.4801476945665466e-05,
      "loss": 2.9568,
      "step": 200372
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.784601211547852,
      "learning_rate": 2.479984839012581e-05,
      "loss": 2.9224,
      "step": 200373
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.7641124725341797,
      "learning_rate": 2.479821988575099e-05,
      "loss": 2.8206,
      "step": 200374
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.4646494388580322,
      "learning_rate": 2.4796591432541445e-05,
      "loss": 3.0141,
      "step": 200375
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9123668670654297,
      "learning_rate": 2.479496303049737e-05,
      "loss": 2.9818,
      "step": 200376
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7880401611328125,
      "learning_rate": 2.479333467961907e-05,
      "loss": 2.8408,
      "step": 200377
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.283172369003296,
      "learning_rate": 2.4791706379906974e-05,
      "loss": 2.8771,
      "step": 200378
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.558694839477539,
      "learning_rate": 2.4790078131361212e-05,
      "loss": 2.8712,
      "step": 200379
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.466714859008789,
      "learning_rate": 2.478844993398219e-05,
      "loss": 2.8847,
      "step": 200380
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0856926441192627,
      "learning_rate": 2.478682178777024e-05,
      "loss": 3.0316,
      "step": 200381
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.433445453643799,
      "learning_rate": 2.4785193692725525e-05,
      "loss": 2.8989,
      "step": 200382
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.5886166095733643,
      "learning_rate": 2.4783565648848514e-05,
      "loss": 3.0813,
      "step": 200383
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9365718364715576,
      "learning_rate": 2.4781937656139438e-05,
      "loss": 2.9399,
      "step": 200384
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6623945236206055,
      "learning_rate": 2.4780309714598535e-05,
      "loss": 3.2049,
      "step": 200385
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.713444232940674,
      "learning_rate": 2.477868182422623e-05,
      "loss": 2.8541,
      "step": 200386
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.741137981414795,
      "learning_rate": 2.47770539850227e-05,
      "loss": 2.6999,
      "step": 200387
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.792252540588379,
      "learning_rate": 2.4775426196988335e-05,
      "loss": 2.9103,
      "step": 200388
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.826746702194214,
      "learning_rate": 2.477379846012344e-05,
      "loss": 2.8776,
      "step": 200389
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.6889944076538086,
      "learning_rate": 2.4772170774428313e-05,
      "loss": 3.0521,
      "step": 200390
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.393014430999756,
      "learning_rate": 2.4770543139903155e-05,
      "loss": 2.9308,
      "step": 200391
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.907555341720581,
      "learning_rate": 2.4768915556548396e-05,
      "loss": 2.8206,
      "step": 200392
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.603041648864746,
      "learning_rate": 2.4767288024364242e-05,
      "loss": 2.6196,
      "step": 200393
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1145968437194824,
      "learning_rate": 2.4765660543351117e-05,
      "loss": 2.7793,
      "step": 200394
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0452966690063477,
      "learning_rate": 2.476403311350923e-05,
      "loss": 3.0483,
      "step": 200395
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.607660293579102,
      "learning_rate": 2.476240573483891e-05,
      "loss": 2.9779,
      "step": 200396
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9295473098754883,
      "learning_rate": 2.476077840734042e-05,
      "loss": 2.9074,
      "step": 200397
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.742406129837036,
      "learning_rate": 2.475915113101413e-05,
      "loss": 3.1381,
      "step": 200398
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0537519454956055,
      "learning_rate": 2.4757523905860277e-05,
      "loss": 3.082,
      "step": 200399
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8250091075897217,
      "learning_rate": 2.475589673187922e-05,
      "loss": 2.9142,
      "step": 200400
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.614637851715088,
      "learning_rate": 2.4754269609071265e-05,
      "loss": 2.8994,
      "step": 200401
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.4420361518859863,
      "learning_rate": 2.475264253743664e-05,
      "loss": 3.0422,
      "step": 200402
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.733515977859497,
      "learning_rate": 2.4751015516975682e-05,
      "loss": 2.6029,
      "step": 200403
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.344407320022583,
      "learning_rate": 2.4749388547688754e-05,
      "loss": 2.9056,
      "step": 200404
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.5777926445007324,
      "learning_rate": 2.4747761629576056e-05,
      "loss": 2.8947,
      "step": 200405
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.060197353363037,
      "learning_rate": 2.4746134762637993e-05,
      "loss": 3.0207,
      "step": 200406
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.316246032714844,
      "learning_rate": 2.4744507946874824e-05,
      "loss": 2.9548,
      "step": 200407
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.181672096252441,
      "learning_rate": 2.4742881182286855e-05,
      "loss": 3.0646,
      "step": 200408
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.989886999130249,
      "learning_rate": 2.4741254468874316e-05,
      "loss": 3.0503,
      "step": 200409
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.290639877319336,
      "learning_rate": 2.4739627806637607e-05,
      "loss": 3.035,
      "step": 200410
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.035994052886963,
      "learning_rate": 2.4738001195576996e-05,
      "loss": 3.0851,
      "step": 200411
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.491481304168701,
      "learning_rate": 2.473637463569278e-05,
      "loss": 2.7709,
      "step": 200412
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.694685935974121,
      "learning_rate": 2.4734748126985292e-05,
      "loss": 3.0664,
      "step": 200413
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9335899353027344,
      "learning_rate": 2.4733121669454837e-05,
      "loss": 2.9484,
      "step": 200414
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3223700523376465,
      "learning_rate": 2.4731495263101608e-05,
      "loss": 2.7285,
      "step": 200415
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.649905204772949,
      "learning_rate": 2.4729868907926042e-05,
      "loss": 2.9204,
      "step": 200416
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.713367462158203,
      "learning_rate": 2.4728242603928372e-05,
      "loss": 2.7956,
      "step": 200417
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.780596971511841,
      "learning_rate": 2.472661635110893e-05,
      "loss": 2.9457,
      "step": 200418
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.726529598236084,
      "learning_rate": 2.4724990149468017e-05,
      "loss": 2.828,
      "step": 200419
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2140250205993652,
      "learning_rate": 2.4723363999005928e-05,
      "loss": 2.7753,
      "step": 200420
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.686065196990967,
      "learning_rate": 2.4721737899722905e-05,
      "loss": 3.2058,
      "step": 200421
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.232999801635742,
      "learning_rate": 2.4720111851619374e-05,
      "loss": 2.7254,
      "step": 200422
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.4633920192718506,
      "learning_rate": 2.47184858546955e-05,
      "loss": 2.8421,
      "step": 200423
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.412598133087158,
      "learning_rate": 2.4716859908951725e-05,
      "loss": 2.8885,
      "step": 200424
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.7337894439697266,
      "learning_rate": 2.4715234014388274e-05,
      "loss": 2.9197,
      "step": 200425
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.5475611686706543,
      "learning_rate": 2.471360817100545e-05,
      "loss": 3.0805,
      "step": 200426
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7385804653167725,
      "learning_rate": 2.471198237880352e-05,
      "loss": 3.1127,
      "step": 200427
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7539634704589844,
      "learning_rate": 2.471035663778288e-05,
      "loss": 2.9961,
      "step": 200428
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.729757785797119,
      "learning_rate": 2.4708730947943734e-05,
      "loss": 2.9228,
      "step": 200429
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3705334663391113,
      "learning_rate": 2.4707105309286477e-05,
      "loss": 3.0001,
      "step": 200430
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.580981731414795,
      "learning_rate": 2.470547972181135e-05,
      "loss": 2.8968,
      "step": 200431
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.051076889038086,
      "learning_rate": 2.4703854185518677e-05,
      "loss": 2.7374,
      "step": 200432
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.527600049972534,
      "learning_rate": 2.4702228700408734e-05,
      "loss": 2.9005,
      "step": 200433
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2678701877593994,
      "learning_rate": 2.4700603266481878e-05,
      "loss": 2.8171,
      "step": 200434
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6378142833709717,
      "learning_rate": 2.4698977883738313e-05,
      "loss": 2.8331,
      "step": 200435
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2887208461761475,
      "learning_rate": 2.469735255217844e-05,
      "loss": 2.9015,
      "step": 200436
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.567046880722046,
      "learning_rate": 2.4695727271802558e-05,
      "loss": 2.8071,
      "step": 200437
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3013076782226562,
      "learning_rate": 2.469410204261093e-05,
      "loss": 2.8265,
      "step": 200438
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.5323991775512695,
      "learning_rate": 2.4692476864603828e-05,
      "loss": 3.2043,
      "step": 200439
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7141013145446777,
      "learning_rate": 2.4690851737781615e-05,
      "loss": 3.1936,
      "step": 200440
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2401371002197266,
      "learning_rate": 2.4689226662144522e-05,
      "loss": 2.9358,
      "step": 200441
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.073779821395874,
      "learning_rate": 2.4687601637692956e-05,
      "loss": 3.0511,
      "step": 200442
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.121181488037109,
      "learning_rate": 2.4685976664427177e-05,
      "loss": 2.9539,
      "step": 200443
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9523565769195557,
      "learning_rate": 2.4684351742347454e-05,
      "loss": 2.8607,
      "step": 200444
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1406445503234863,
      "learning_rate": 2.468272687145405e-05,
      "loss": 2.983,
      "step": 200445
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6559276580810547,
      "learning_rate": 2.4681102051747403e-05,
      "loss": 2.822,
      "step": 200446
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1240084171295166,
      "learning_rate": 2.4679477283227645e-05,
      "loss": 2.9716,
      "step": 200447
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.512563943862915,
      "learning_rate": 2.467785256589524e-05,
      "loss": 3.097,
      "step": 200448
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3936305046081543,
      "learning_rate": 2.467622789975039e-05,
      "loss": 3.26,
      "step": 200449
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.014749050140381,
      "learning_rate": 2.467460328479346e-05,
      "loss": 2.9389,
      "step": 200450
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.716580867767334,
      "learning_rate": 2.4672978721024717e-05,
      "loss": 2.9571,
      "step": 200451
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0051276683807373,
      "learning_rate": 2.467135420844446e-05,
      "loss": 3.0625,
      "step": 200452
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2114920616149902,
      "learning_rate": 2.4669729747052957e-05,
      "loss": 2.8453,
      "step": 200453
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3217244148254395,
      "learning_rate": 2.4668105336850575e-05,
      "loss": 2.8661,
      "step": 200454
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.530855894088745,
      "learning_rate": 2.4666480977837576e-05,
      "loss": 2.8663,
      "step": 200455
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1093242168426514,
      "learning_rate": 2.4664856670014298e-05,
      "loss": 3.0493,
      "step": 200456
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.4913032054901123,
      "learning_rate": 2.466323241338101e-05,
      "loss": 3.0287,
      "step": 200457
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.77762770652771,
      "learning_rate": 2.4661608207938067e-05,
      "loss": 2.918,
      "step": 200458
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.127392292022705,
      "learning_rate": 2.4659984053685643e-05,
      "loss": 3.0165,
      "step": 200459
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9943695068359375,
      "learning_rate": 2.4658359950624174e-05,
      "loss": 2.9599,
      "step": 200460
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7070424556732178,
      "learning_rate": 2.4656735898753887e-05,
      "loss": 3.1002,
      "step": 200461
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.5275673866271973,
      "learning_rate": 2.4655111898075153e-05,
      "loss": 2.9283,
      "step": 200462
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.5430872440338135,
      "learning_rate": 2.46534879485882e-05,
      "loss": 2.886,
      "step": 200463
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.191770076751709,
      "learning_rate": 2.465186405029337e-05,
      "loss": 2.8521,
      "step": 200464
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0904452800750732,
      "learning_rate": 2.4650240203190952e-05,
      "loss": 3.021,
      "step": 200465
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8225457668304443,
      "learning_rate": 2.4648616407281285e-05,
      "loss": 3.0096,
      "step": 200466
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6615028381347656,
      "learning_rate": 2.4646992662564568e-05,
      "loss": 2.9936,
      "step": 200467
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7080090045928955,
      "learning_rate": 2.4645368969041235e-05,
      "loss": 2.7245,
      "step": 200468
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.776268482208252,
      "learning_rate": 2.4643745326711517e-05,
      "loss": 2.7833,
      "step": 200469
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.248558282852173,
      "learning_rate": 2.4642121735575683e-05,
      "loss": 3.1972,
      "step": 200470
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.766409397125244,
      "learning_rate": 2.4640498195634128e-05,
      "loss": 3.0105,
      "step": 200471
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3666574954986572,
      "learning_rate": 2.4638874706887092e-05,
      "loss": 2.8422,
      "step": 200472
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9046435356140137,
      "learning_rate": 2.4637251269334835e-05,
      "loss": 2.9514,
      "step": 200473
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6149849891662598,
      "learning_rate": 2.4635627882977792e-05,
      "loss": 3.0756,
      "step": 200474
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9207851886749268,
      "learning_rate": 2.4634004547816132e-05,
      "loss": 3.0327,
      "step": 200475
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3158206939697266,
      "learning_rate": 2.4632381263850187e-05,
      "loss": 3.1561,
      "step": 200476
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.898014545440674,
      "learning_rate": 2.4630758031080356e-05,
      "loss": 3.0534,
      "step": 200477
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.805159330368042,
      "learning_rate": 2.4629134849506772e-05,
      "loss": 2.7258,
      "step": 200478
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8475632667541504,
      "learning_rate": 2.46275117191299e-05,
      "loss": 2.7743,
      "step": 200479
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1933722496032715,
      "learning_rate": 2.4625888639949975e-05,
      "loss": 3.0427,
      "step": 200480
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7809886932373047,
      "learning_rate": 2.4624265611967296e-05,
      "loss": 3.0926,
      "step": 200481
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.233412742614746,
      "learning_rate": 2.4622642635182098e-05,
      "loss": 2.9979,
      "step": 200482
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.500934600830078,
      "learning_rate": 2.4621019709594813e-05,
      "loss": 2.9501,
      "step": 200483
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3379464149475098,
      "learning_rate": 2.461939683520561e-05,
      "loss": 2.9079,
      "step": 200484
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8750696182250977,
      "learning_rate": 2.4617774012014914e-05,
      "loss": 2.9793,
      "step": 200485
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7410128116607666,
      "learning_rate": 2.4616151240022997e-05,
      "loss": 2.9345,
      "step": 200486
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0502917766571045,
      "learning_rate": 2.4614528519230125e-05,
      "loss": 3.0609,
      "step": 200487
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.00966215133667,
      "learning_rate": 2.4612905849636533e-05,
      "loss": 2.8967,
      "step": 200488
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.721722364425659,
      "learning_rate": 2.4611283231242684e-05,
      "loss": 3.1084,
      "step": 200489
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.4947962760925293,
      "learning_rate": 2.4609660664048714e-05,
      "loss": 3.1305,
      "step": 200490
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.5287084579467773,
      "learning_rate": 2.4608038148055087e-05,
      "loss": 2.7774,
      "step": 200491
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.266946315765381,
      "learning_rate": 2.4606415683262003e-05,
      "loss": 2.9947,
      "step": 200492
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.4320011138916016,
      "learning_rate": 2.4604793269669798e-05,
      "loss": 2.9183,
      "step": 200493
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.5771610736846924,
      "learning_rate": 2.4603170907278735e-05,
      "loss": 2.7093,
      "step": 200494
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.4112045764923096,
      "learning_rate": 2.460154859608915e-05,
      "loss": 2.7231,
      "step": 200495
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.5165061950683594,
      "learning_rate": 2.4599926336101304e-05,
      "loss": 2.9677,
      "step": 200496
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.917867660522461,
      "learning_rate": 2.459830412731557e-05,
      "loss": 2.8955,
      "step": 200497
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.963437795639038,
      "learning_rate": 2.459668196973221e-05,
      "loss": 3.1683,
      "step": 200498
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3768439292907715,
      "learning_rate": 2.459505986335153e-05,
      "loss": 3.0153,
      "step": 200499
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8960256576538086,
      "learning_rate": 2.4593437808173788e-05,
      "loss": 2.8567,
      "step": 200500
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.326025485992432,
      "learning_rate": 2.4591815804199357e-05,
      "loss": 2.9469,
      "step": 200501
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8054418563842773,
      "learning_rate": 2.4590193851428463e-05,
      "loss": 3.1105,
      "step": 200502
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.085690975189209,
      "learning_rate": 2.458857194986151e-05,
      "loss": 2.848,
      "step": 200503
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8164291381835938,
      "learning_rate": 2.4586950099498738e-05,
      "loss": 2.9578,
      "step": 200504
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9383997917175293,
      "learning_rate": 2.4585328300340434e-05,
      "loss": 2.9465,
      "step": 200505
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.856576681137085,
      "learning_rate": 2.458370655238687e-05,
      "loss": 2.9421,
      "step": 200506
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.713453769683838,
      "learning_rate": 2.4582084855638452e-05,
      "loss": 2.7228,
      "step": 200507
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6464760303497314,
      "learning_rate": 2.458046321009537e-05,
      "loss": 2.8234,
      "step": 200508
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.135721206665039,
      "learning_rate": 2.4578841615758026e-05,
      "loss": 2.7289,
      "step": 200509
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.766464948654175,
      "learning_rate": 2.4577220072626625e-05,
      "loss": 3.1436,
      "step": 200510
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.507132053375244,
      "learning_rate": 2.4575598580701628e-05,
      "loss": 2.9821,
      "step": 200511
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9727604389190674,
      "learning_rate": 2.4573977139983137e-05,
      "loss": 2.8654,
      "step": 200512
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2994019985198975,
      "learning_rate": 2.457235575047155e-05,
      "loss": 3.0189,
      "step": 200513
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.633495330810547,
      "learning_rate": 2.4570734412167138e-05,
      "loss": 3.1475,
      "step": 200514
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.5083582401275635,
      "learning_rate": 2.4569113125070294e-05,
      "loss": 3.2048,
      "step": 200515
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.314382314682007,
      "learning_rate": 2.4567491889181192e-05,
      "loss": 3.0767,
      "step": 200516
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0523149967193604,
      "learning_rate": 2.4565870704500256e-05,
      "loss": 2.9057,
      "step": 200517
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.8038291931152344,
      "learning_rate": 2.456424957102766e-05,
      "loss": 3.0455,
      "step": 200518
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7649803161621094,
      "learning_rate": 2.4562628488763802e-05,
      "loss": 3.0198,
      "step": 200519
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.041783571243286,
      "learning_rate": 2.4561007457708913e-05,
      "loss": 2.8957,
      "step": 200520
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.483628749847412,
      "learning_rate": 2.4559386477863364e-05,
      "loss": 2.9363,
      "step": 200521
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.677877187728882,
      "learning_rate": 2.4557765549227417e-05,
      "loss": 3.0772,
      "step": 200522
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7472894191741943,
      "learning_rate": 2.4556144671801438e-05,
      "loss": 3.0524,
      "step": 200523
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.246340036392212,
      "learning_rate": 2.4554523845585595e-05,
      "loss": 3.029,
      "step": 200524
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7346084117889404,
      "learning_rate": 2.455290307058029e-05,
      "loss": 2.9138,
      "step": 200525
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9601898193359375,
      "learning_rate": 2.455128234678575e-05,
      "loss": 2.8105,
      "step": 200526
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6239469051361084,
      "learning_rate": 2.4549661674202413e-05,
      "loss": 2.8683,
      "step": 200527
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3724658489227295,
      "learning_rate": 2.4548041052830416e-05,
      "loss": 2.9563,
      "step": 200528
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.149314880371094,
      "learning_rate": 2.4546420482670247e-05,
      "loss": 2.9029,
      "step": 200529
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.112488269805908,
      "learning_rate": 2.4544799963721985e-05,
      "loss": 2.8082,
      "step": 200530
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.234353542327881,
      "learning_rate": 2.454317949598612e-05,
      "loss": 2.6975,
      "step": 200531
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0358335971832275,
      "learning_rate": 2.454155907946279e-05,
      "loss": 3.1963,
      "step": 200532
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8051509857177734,
      "learning_rate": 2.4539938714152464e-05,
      "loss": 2.8489,
      "step": 200533
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.583890676498413,
      "learning_rate": 2.4538318400055302e-05,
      "loss": 2.9244,
      "step": 200534
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9755704402923584,
      "learning_rate": 2.4536698137171774e-05,
      "loss": 2.8008,
      "step": 200535
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.710758686065674,
      "learning_rate": 2.4535077925501946e-05,
      "loss": 2.918,
      "step": 200536
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0213115215301514,
      "learning_rate": 2.453345776504635e-05,
      "loss": 2.878,
      "step": 200537
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9718024730682373,
      "learning_rate": 2.4531837655805087e-05,
      "loss": 3.2337,
      "step": 200538
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.7521302700042725,
      "learning_rate": 2.4530217597778623e-05,
      "loss": 2.9528,
      "step": 200539
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8624091148376465,
      "learning_rate": 2.4528597590967124e-05,
      "loss": 2.8857,
      "step": 200540
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.5797359943389893,
      "learning_rate": 2.4526977635371026e-05,
      "loss": 2.9369,
      "step": 200541
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.618980884552002,
      "learning_rate": 2.4525357730990558e-05,
      "loss": 3.0743,
      "step": 200542
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9289133548736572,
      "learning_rate": 2.4523737877826056e-05,
      "loss": 3.0154,
      "step": 200543
    },
    {
      "epoch": 2.61,
      "grad_norm": 5.33729362487793,
      "learning_rate": 2.4522118075877685e-05,
      "loss": 3.0216,
      "step": 200544
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.196376085281372,
      "learning_rate": 2.4520498325145944e-05,
      "loss": 3.0,
      "step": 200545
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2839694023132324,
      "learning_rate": 2.4518878625630966e-05,
      "loss": 3.2115,
      "step": 200546
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.715529441833496,
      "learning_rate": 2.4517258977333186e-05,
      "loss": 2.9494,
      "step": 200547
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.199824333190918,
      "learning_rate": 2.4515639380252873e-05,
      "loss": 3.1799,
      "step": 200548
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6170201301574707,
      "learning_rate": 2.451401983439022e-05,
      "loss": 2.9259,
      "step": 200549
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.078129529953003,
      "learning_rate": 2.4512400339745697e-05,
      "loss": 2.9058,
      "step": 200550
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.354710578918457,
      "learning_rate": 2.451078089631947e-05,
      "loss": 3.01,
      "step": 200551
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1231698989868164,
      "learning_rate": 2.4509161504111874e-05,
      "loss": 3.0844,
      "step": 200552
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.95117449760437,
      "learning_rate": 2.4507542163123272e-05,
      "loss": 3.0003,
      "step": 200553
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1566250324249268,
      "learning_rate": 2.4505922873353902e-05,
      "loss": 2.8807,
      "step": 200554
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6522276401519775,
      "learning_rate": 2.450430363480406e-05,
      "loss": 3.0117,
      "step": 200555
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.148104190826416,
      "learning_rate": 2.4502684447474107e-05,
      "loss": 2.7532,
      "step": 200556
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.5324535369873047,
      "learning_rate": 2.4501065311364286e-05,
      "loss": 3.0406,
      "step": 200557
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.920070171356201,
      "learning_rate": 2.4499446226474862e-05,
      "loss": 2.7686,
      "step": 200558
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.987553119659424,
      "learning_rate": 2.4497827192806263e-05,
      "loss": 2.9558,
      "step": 200559
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2402472496032715,
      "learning_rate": 2.4496208210358725e-05,
      "loss": 2.7608,
      "step": 200560
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.350177049636841,
      "learning_rate": 2.4494589279132482e-05,
      "loss": 2.8735,
      "step": 200561
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1057138442993164,
      "learning_rate": 2.4492970399127964e-05,
      "loss": 2.9532,
      "step": 200562
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7025113105773926,
      "learning_rate": 2.449135157034531e-05,
      "loss": 2.8107,
      "step": 200563
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8391425609588623,
      "learning_rate": 2.448973279278501e-05,
      "loss": 2.8133,
      "step": 200564
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.755171775817871,
      "learning_rate": 2.448811406644724e-05,
      "loss": 3.004,
      "step": 200565
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.025411605834961,
      "learning_rate": 2.4486495391332328e-05,
      "loss": 2.852,
      "step": 200566
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.092951536178589,
      "learning_rate": 2.4484876767440542e-05,
      "loss": 2.8531,
      "step": 200567
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1805579662323,
      "learning_rate": 2.448325819477228e-05,
      "loss": 2.9358,
      "step": 200568
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1185216903686523,
      "learning_rate": 2.4481639673327713e-05,
      "loss": 2.849,
      "step": 200569
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1137776374816895,
      "learning_rate": 2.448002120310727e-05,
      "loss": 3.0305,
      "step": 200570
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.5451815128326416,
      "learning_rate": 2.4478402784111184e-05,
      "loss": 2.9274,
      "step": 200571
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.888896942138672,
      "learning_rate": 2.4476784416339755e-05,
      "loss": 3.118,
      "step": 200572
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8842666149139404,
      "learning_rate": 2.4475166099793254e-05,
      "loss": 3.1223,
      "step": 200573
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.532602310180664,
      "learning_rate": 2.4473547834472073e-05,
      "loss": 2.854,
      "step": 200574
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.18328857421875,
      "learning_rate": 2.4471929620376385e-05,
      "loss": 2.8335,
      "step": 200575
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.881834030151367,
      "learning_rate": 2.4470311457506654e-05,
      "loss": 2.9873,
      "step": 200576
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.732187271118164,
      "learning_rate": 2.446869334586301e-05,
      "loss": 3.0822,
      "step": 200577
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.330097198486328,
      "learning_rate": 2.446707528544596e-05,
      "loss": 3.1466,
      "step": 200578
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.288824081420898,
      "learning_rate": 2.446545727625556e-05,
      "loss": 2.8159,
      "step": 200579
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.893857717514038,
      "learning_rate": 2.4463839318292323e-05,
      "loss": 2.753,
      "step": 200580
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.910522699356079,
      "learning_rate": 2.446222141155637e-05,
      "loss": 3.1209,
      "step": 200581
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.059088706970215,
      "learning_rate": 2.4460603556048174e-05,
      "loss": 2.9105,
      "step": 200582
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6893086433410645,
      "learning_rate": 2.4458985751767866e-05,
      "loss": 2.7534,
      "step": 200583
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.4877235889434814,
      "learning_rate": 2.445736799871595e-05,
      "loss": 2.9355,
      "step": 200584
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1361851692199707,
      "learning_rate": 2.445575029689252e-05,
      "loss": 3.1434,
      "step": 200585
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7958641052246094,
      "learning_rate": 2.445413264629801e-05,
      "loss": 2.6575,
      "step": 200586
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7372350692749023,
      "learning_rate": 2.445251504693262e-05,
      "loss": 3.0321,
      "step": 200587
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.114838600158691,
      "learning_rate": 2.4450897498796783e-05,
      "loss": 2.9387,
      "step": 200588
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.968517780303955,
      "learning_rate": 2.4449280001890637e-05,
      "loss": 2.8936,
      "step": 200589
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1795654296875,
      "learning_rate": 2.4447662556214707e-05,
      "loss": 2.8121,
      "step": 200590
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.760526418685913,
      "learning_rate": 2.4446045161769035e-05,
      "loss": 2.9251,
      "step": 200591
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.669630765914917,
      "learning_rate": 2.4444427818554114e-05,
      "loss": 2.9,
      "step": 200592
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8324267864227295,
      "learning_rate": 2.444281052657011e-05,
      "loss": 2.9191,
      "step": 200593
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.869550943374634,
      "learning_rate": 2.444119328581746e-05,
      "loss": 2.667,
      "step": 200594
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7088623046875,
      "learning_rate": 2.4439576096296333e-05,
      "loss": 3.0958,
      "step": 200595
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.8274765014648438,
      "learning_rate": 2.4437958958007186e-05,
      "loss": 2.9185,
      "step": 200596
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.664187431335449,
      "learning_rate": 2.4436341870950126e-05,
      "loss": 2.9852,
      "step": 200597
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.731578826904297,
      "learning_rate": 2.4434724835125586e-05,
      "loss": 2.9365,
      "step": 200598
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6825435161590576,
      "learning_rate": 2.4433107850533796e-05,
      "loss": 2.9473,
      "step": 200599
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.074846029281616,
      "learning_rate": 2.4431490917175157e-05,
      "loss": 2.8808,
      "step": 200600
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.844982624053955,
      "learning_rate": 2.4429874035049834e-05,
      "loss": 3.0716,
      "step": 200601
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.948822021484375,
      "learning_rate": 2.44282572041583e-05,
      "loss": 2.815,
      "step": 200602
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8117287158966064,
      "learning_rate": 2.4426640424500675e-05,
      "loss": 2.8102,
      "step": 200603
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.895202875137329,
      "learning_rate": 2.4425023696077372e-05,
      "loss": 2.8975,
      "step": 200604
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.90173077583313,
      "learning_rate": 2.4423407018888618e-05,
      "loss": 2.8199,
      "step": 200605
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.9802417755126953,
      "learning_rate": 2.4421790392934783e-05,
      "loss": 2.8897,
      "step": 200606
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0018060207366943,
      "learning_rate": 2.4420173818216127e-05,
      "loss": 2.8124,
      "step": 200607
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.715822696685791,
      "learning_rate": 2.441855729473302e-05,
      "loss": 2.8338,
      "step": 200608
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6862101554870605,
      "learning_rate": 2.441694082248563e-05,
      "loss": 3.2157,
      "step": 200609
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2009544372558594,
      "learning_rate": 2.4415324401474388e-05,
      "loss": 2.9695,
      "step": 200610
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.98085355758667,
      "learning_rate": 2.4413708031699464e-05,
      "loss": 2.85,
      "step": 200611
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.357461929321289,
      "learning_rate": 2.441209171316132e-05,
      "loss": 2.9244,
      "step": 200612
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.910677671432495,
      "learning_rate": 2.441047544586009e-05,
      "loss": 2.9589,
      "step": 200613
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9243805408477783,
      "learning_rate": 2.4408859229796275e-05,
      "loss": 3.211,
      "step": 200614
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.665191411972046,
      "learning_rate": 2.4407243064969938e-05,
      "loss": 2.8504,
      "step": 200615
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.5468688011169434,
      "learning_rate": 2.4405626951381552e-05,
      "loss": 2.9887,
      "step": 200616
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.5532350540161133,
      "learning_rate": 2.440401088903131e-05,
      "loss": 2.834,
      "step": 200617
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.065830230712891,
      "learning_rate": 2.4402394877919616e-05,
      "loss": 2.7409,
      "step": 200618
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.748072862625122,
      "learning_rate": 2.440077891804667e-05,
      "loss": 2.8622,
      "step": 200619
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.418036460876465,
      "learning_rate": 2.4399163009412935e-05,
      "loss": 2.7669,
      "step": 200620
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.9435064792633057,
      "learning_rate": 2.439754715201848e-05,
      "loss": 2.9678,
      "step": 200621
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.5866928100585938,
      "learning_rate": 2.4395931345863806e-05,
      "loss": 3.0959,
      "step": 200622
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3689470291137695,
      "learning_rate": 2.439431559094904e-05,
      "loss": 2.8655,
      "step": 200623
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.53961181640625,
      "learning_rate": 2.4392699887274624e-05,
      "loss": 2.8988,
      "step": 200624
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.367769241333008,
      "learning_rate": 2.4391084234840786e-05,
      "loss": 2.9523,
      "step": 200625
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.867208242416382,
      "learning_rate": 2.4389468633647892e-05,
      "loss": 2.7342,
      "step": 200626
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8233718872070312,
      "learning_rate": 2.4387853083696175e-05,
      "loss": 2.8059,
      "step": 200627
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0804412364959717,
      "learning_rate": 2.438623758498597e-05,
      "loss": 2.9423,
      "step": 200628
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.628530979156494,
      "learning_rate": 2.438462213751754e-05,
      "loss": 2.8895,
      "step": 200629
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8016791343688965,
      "learning_rate": 2.438300674129122e-05,
      "loss": 3.0597,
      "step": 200630
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.577202558517456,
      "learning_rate": 2.438139139630728e-05,
      "loss": 2.8977,
      "step": 200631
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.479123830795288,
      "learning_rate": 2.4379776102566086e-05,
      "loss": 3.0784,
      "step": 200632
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.5912277698516846,
      "learning_rate": 2.4378160860067897e-05,
      "loss": 2.6282,
      "step": 200633
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3554556369781494,
      "learning_rate": 2.4376545668812952e-05,
      "loss": 3.1673,
      "step": 200634
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9833879470825195,
      "learning_rate": 2.4374930528801684e-05,
      "loss": 3.1769,
      "step": 200635
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1459908485412598,
      "learning_rate": 2.437331544003429e-05,
      "loss": 2.9824,
      "step": 200636
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.469744920730591,
      "learning_rate": 2.437170040251104e-05,
      "loss": 2.9519,
      "step": 200637
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.4652481079101562,
      "learning_rate": 2.4370085416232364e-05,
      "loss": 3.0147,
      "step": 200638
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1633899211883545,
      "learning_rate": 2.43684704811985e-05,
      "loss": 2.8224,
      "step": 200639
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7847812175750732,
      "learning_rate": 2.4366855597409674e-05,
      "loss": 3.0461,
      "step": 200640
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0269203186035156,
      "learning_rate": 2.4365240764866324e-05,
      "loss": 2.9329,
      "step": 200641
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6280345916748047,
      "learning_rate": 2.436362598356868e-05,
      "loss": 2.9588,
      "step": 200642
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6476070880889893,
      "learning_rate": 2.4362011253516976e-05,
      "loss": 2.9293,
      "step": 200643
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9166929721832275,
      "learning_rate": 2.4360396574711615e-05,
      "loss": 2.9578,
      "step": 200644
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.739550828933716,
      "learning_rate": 2.435878194715286e-05,
      "loss": 2.9394,
      "step": 200645
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.200024127960205,
      "learning_rate": 2.435716737084098e-05,
      "loss": 2.9116,
      "step": 200646
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.832711935043335,
      "learning_rate": 2.435555284577634e-05,
      "loss": 2.976,
      "step": 200647
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.4855964183807373,
      "learning_rate": 2.435393837195917e-05,
      "loss": 3.0794,
      "step": 200648
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.641134738922119,
      "learning_rate": 2.4352323949389842e-05,
      "loss": 2.7307,
      "step": 200649
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6347949504852295,
      "learning_rate": 2.435070957806865e-05,
      "loss": 2.8163,
      "step": 200650
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.254483938217163,
      "learning_rate": 2.434909525799583e-05,
      "loss": 3.1994,
      "step": 200651
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.783879041671753,
      "learning_rate": 2.4347480989171685e-05,
      "loss": 2.6586,
      "step": 200652
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9689908027648926,
      "learning_rate": 2.434586677159658e-05,
      "loss": 2.7997,
      "step": 200653
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7838454246520996,
      "learning_rate": 2.4344252605270742e-05,
      "loss": 2.684,
      "step": 200654
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.712376117706299,
      "learning_rate": 2.4342638490194575e-05,
      "loss": 3.1835,
      "step": 200655
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8633275032043457,
      "learning_rate": 2.4341024426368283e-05,
      "loss": 2.8487,
      "step": 200656
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.7050352096557617,
      "learning_rate": 2.4339410413792227e-05,
      "loss": 2.9751,
      "step": 200657
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.323237657546997,
      "learning_rate": 2.4337796452466608e-05,
      "loss": 2.7497,
      "step": 200658
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.583338499069214,
      "learning_rate": 2.433618254239186e-05,
      "loss": 2.7574,
      "step": 200659
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.5940003395080566,
      "learning_rate": 2.4334568683568177e-05,
      "loss": 2.7097,
      "step": 200660
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9909157752990723,
      "learning_rate": 2.4332954875995934e-05,
      "loss": 2.9502,
      "step": 200661
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7338976860046387,
      "learning_rate": 2.433134111967536e-05,
      "loss": 2.9339,
      "step": 200662
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.4159395694732666,
      "learning_rate": 2.4329727414606892e-05,
      "loss": 2.8998,
      "step": 200663
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2330830097198486,
      "learning_rate": 2.4328113760790625e-05,
      "loss": 2.9995,
      "step": 200664
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.4978601932525635,
      "learning_rate": 2.4326500158226993e-05,
      "loss": 2.8361,
      "step": 200665
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.609365940093994,
      "learning_rate": 2.432488660691626e-05,
      "loss": 2.9444,
      "step": 200666
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.5577447414398193,
      "learning_rate": 2.4323273106858764e-05,
      "loss": 2.9111,
      "step": 200667
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8391895294189453,
      "learning_rate": 2.432165965805474e-05,
      "loss": 2.9263,
      "step": 200668
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0089809894561768,
      "learning_rate": 2.432004626050461e-05,
      "loss": 2.8741,
      "step": 200669
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.534250497817993,
      "learning_rate": 2.4318432914208486e-05,
      "loss": 2.8245,
      "step": 200670
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0205206871032715,
      "learning_rate": 2.4316819619166793e-05,
      "loss": 3.1729,
      "step": 200671
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.453896522521973,
      "learning_rate": 2.43152063753798e-05,
      "loss": 2.8662,
      "step": 200672
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7355756759643555,
      "learning_rate": 2.4313593182847845e-05,
      "loss": 2.9126,
      "step": 200673
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.5160863399505615,
      "learning_rate": 2.4311980041571156e-05,
      "loss": 2.6246,
      "step": 200674
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9730918407440186,
      "learning_rate": 2.4310366951550163e-05,
      "loss": 3.1275,
      "step": 200675
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9898815155029297,
      "learning_rate": 2.4308753912784972e-05,
      "loss": 2.8053,
      "step": 200676
    },
    {
      "epoch": 2.61,
      "grad_norm": 5.514517307281494,
      "learning_rate": 2.430714092527608e-05,
      "loss": 2.8366,
      "step": 200677
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9632482528686523,
      "learning_rate": 2.4305527989023588e-05,
      "loss": 2.8487,
      "step": 200678
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.470062494277954,
      "learning_rate": 2.4303915104027993e-05,
      "loss": 2.7779,
      "step": 200679
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.4230687618255615,
      "learning_rate": 2.430230227028943e-05,
      "loss": 2.9684,
      "step": 200680
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9537177085876465,
      "learning_rate": 2.43006894878084e-05,
      "loss": 2.8914,
      "step": 200681
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.784433364868164,
      "learning_rate": 2.4299076756584933e-05,
      "loss": 2.9083,
      "step": 200682
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7609646320343018,
      "learning_rate": 2.4297464076619565e-05,
      "loss": 2.9757,
      "step": 200683
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1063337326049805,
      "learning_rate": 2.429585144791243e-05,
      "loss": 2.8689,
      "step": 200684
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.150981903076172,
      "learning_rate": 2.4294238870463955e-05,
      "loss": 2.9559,
      "step": 200685
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7307050228118896,
      "learning_rate": 2.4292626344274345e-05,
      "loss": 3.0567,
      "step": 200686
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.5593631267547607,
      "learning_rate": 2.4291013869344033e-05,
      "loss": 2.728,
      "step": 200687
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.865628957748413,
      "learning_rate": 2.428940144567312e-05,
      "loss": 2.9587,
      "step": 200688
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.642108201980591,
      "learning_rate": 2.4287789073262066e-05,
      "loss": 2.8642,
      "step": 200689
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.6463875770568848,
      "learning_rate": 2.4286176752111077e-05,
      "loss": 2.9777,
      "step": 200690
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8547232151031494,
      "learning_rate": 2.4284564482220515e-05,
      "loss": 3.1949,
      "step": 200691
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.045164108276367,
      "learning_rate": 2.4282952263590615e-05,
      "loss": 2.8739,
      "step": 200692
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.71964168548584,
      "learning_rate": 2.4281340096221845e-05,
      "loss": 2.7687,
      "step": 200693
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9892988204956055,
      "learning_rate": 2.427972798011424e-05,
      "loss": 2.6217,
      "step": 200694
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0085277557373047,
      "learning_rate": 2.4278115915268327e-05,
      "loss": 2.8613,
      "step": 200695
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.5353312492370605,
      "learning_rate": 2.427650390168424e-05,
      "loss": 3.1898,
      "step": 200696
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6499178409576416,
      "learning_rate": 2.4274891939362417e-05,
      "loss": 2.7282,
      "step": 200697
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1863439083099365,
      "learning_rate": 2.4273280028303056e-05,
      "loss": 2.9745,
      "step": 200698
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.774568557739258,
      "learning_rate": 2.4271668168506586e-05,
      "loss": 2.9574,
      "step": 200699
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.18693470954895,
      "learning_rate": 2.4270056359973112e-05,
      "loss": 2.7876,
      "step": 200700
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9914608001708984,
      "learning_rate": 2.4268444602703095e-05,
      "loss": 2.8269,
      "step": 200701
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.931525707244873,
      "learning_rate": 2.426683289669671e-05,
      "loss": 2.6924,
      "step": 200702
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.89827036857605,
      "learning_rate": 2.4265221241954413e-05,
      "loss": 2.8803,
      "step": 200703
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.707528591156006,
      "learning_rate": 2.4263609638476343e-05,
      "loss": 3.0499,
      "step": 200704
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.01306414604187,
      "learning_rate": 2.4261998086262967e-05,
      "loss": 3.0623,
      "step": 200705
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.165286540985107,
      "learning_rate": 2.4260386585314418e-05,
      "loss": 2.77,
      "step": 200706
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7050185203552246,
      "learning_rate": 2.4258775135631094e-05,
      "loss": 2.7578,
      "step": 200707
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0855324268341064,
      "learning_rate": 2.4257163737213226e-05,
      "loss": 2.918,
      "step": 200708
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.75140905380249,
      "learning_rate": 2.4255552390061184e-05,
      "loss": 3.0322,
      "step": 200709
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0103254318237305,
      "learning_rate": 2.4253941094175234e-05,
      "loss": 2.903,
      "step": 200710
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7955620288848877,
      "learning_rate": 2.4252329849555707e-05,
      "loss": 2.9049,
      "step": 200711
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.592407703399658,
      "learning_rate": 2.4250718656202873e-05,
      "loss": 2.7795,
      "step": 200712
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0720701217651367,
      "learning_rate": 2.424910751411703e-05,
      "loss": 2.9833,
      "step": 200713
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1199021339416504,
      "learning_rate": 2.424749642329844e-05,
      "loss": 2.9238,
      "step": 200714
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.306633472442627,
      "learning_rate": 2.424588538374751e-05,
      "loss": 2.9137,
      "step": 200715
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9004268646240234,
      "learning_rate": 2.4244274395464402e-05,
      "loss": 3.3774,
      "step": 200716
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0979673862457275,
      "learning_rate": 2.4242663458449552e-05,
      "loss": 2.8791,
      "step": 200717
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.576559543609619,
      "learning_rate": 2.4241052572703224e-05,
      "loss": 2.8809,
      "step": 200718
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.026811122894287,
      "learning_rate": 2.4239441738225652e-05,
      "loss": 3.0472,
      "step": 200719
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.1721954345703125,
      "learning_rate": 2.4237830955017135e-05,
      "loss": 3.0727,
      "step": 200720
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.911104440689087,
      "learning_rate": 2.423622022307804e-05,
      "loss": 2.9568,
      "step": 200721
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.814629316329956,
      "learning_rate": 2.42346095424086e-05,
      "loss": 2.8654,
      "step": 200722
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.2195587158203125,
      "learning_rate": 2.4232998913009215e-05,
      "loss": 2.9417,
      "step": 200723
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9269375801086426,
      "learning_rate": 2.423138833488012e-05,
      "loss": 2.9451,
      "step": 200724
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6509804725646973,
      "learning_rate": 2.4229777808021544e-05,
      "loss": 2.9395,
      "step": 200725
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.663918972015381,
      "learning_rate": 2.4228167332433922e-05,
      "loss": 3.0635,
      "step": 200726
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.006660461425781,
      "learning_rate": 2.4226556908117524e-05,
      "loss": 2.7691,
      "step": 200727
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0003902912139893,
      "learning_rate": 2.422494653507251e-05,
      "loss": 3.0397,
      "step": 200728
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.562542676925659,
      "learning_rate": 2.422333621329935e-05,
      "loss": 3.1166,
      "step": 200729
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.597893476486206,
      "learning_rate": 2.4221725942798276e-05,
      "loss": 2.8267,
      "step": 200730
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.4293460845947266,
      "learning_rate": 2.4220115723569557e-05,
      "loss": 3.0532,
      "step": 200731
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.6126937866210938,
      "learning_rate": 2.4218505555613554e-05,
      "loss": 3.0067,
      "step": 200732
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7989256381988525,
      "learning_rate": 2.4216895438930506e-05,
      "loss": 2.7827,
      "step": 200733
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.5007686614990234,
      "learning_rate": 2.4215285373520775e-05,
      "loss": 3.1118,
      "step": 200734
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.529320478439331,
      "learning_rate": 2.421367535938463e-05,
      "loss": 2.6935,
      "step": 200735
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6352481842041016,
      "learning_rate": 2.4212065396522373e-05,
      "loss": 2.8401,
      "step": 200736
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.634911060333252,
      "learning_rate": 2.4210455484934265e-05,
      "loss": 2.8485,
      "step": 200737
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.15110182762146,
      "learning_rate": 2.4208845624620677e-05,
      "loss": 2.78,
      "step": 200738
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.9590189456939697,
      "learning_rate": 2.4207235815581804e-05,
      "loss": 2.8313,
      "step": 200739
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.781994104385376,
      "learning_rate": 2.4205626057818085e-05,
      "loss": 3.0016,
      "step": 200740
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.764214515686035,
      "learning_rate": 2.4204016351329745e-05,
      "loss": 2.7388,
      "step": 200741
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9315547943115234,
      "learning_rate": 2.420240669611706e-05,
      "loss": 2.9719,
      "step": 200742
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.902292490005493,
      "learning_rate": 2.4200797092180324e-05,
      "loss": 2.9779,
      "step": 200743
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.235943555831909,
      "learning_rate": 2.4199187539519904e-05,
      "loss": 2.6994,
      "step": 200744
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.358522891998291,
      "learning_rate": 2.4197578038136036e-05,
      "loss": 2.9996,
      "step": 200745
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.767794132232666,
      "learning_rate": 2.419596858802908e-05,
      "loss": 3.0605,
      "step": 200746
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0963985919952393,
      "learning_rate": 2.419435918919924e-05,
      "loss": 3.1462,
      "step": 200747
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3032479286193848,
      "learning_rate": 2.4192749841646987e-05,
      "loss": 3.0158,
      "step": 200748
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.995131731033325,
      "learning_rate": 2.4191140545372377e-05,
      "loss": 2.9743,
      "step": 200749
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.234084129333496,
      "learning_rate": 2.418953130037592e-05,
      "loss": 2.9306,
      "step": 200750
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.4036355018615723,
      "learning_rate": 2.4187922106657775e-05,
      "loss": 2.9607,
      "step": 200751
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1249771118164062,
      "learning_rate": 2.418631296421838e-05,
      "loss": 2.9894,
      "step": 200752
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.540771484375,
      "learning_rate": 2.4184703873057866e-05,
      "loss": 3.057,
      "step": 200753
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.918233871459961,
      "learning_rate": 2.4183094833176765e-05,
      "loss": 2.8249,
      "step": 200754
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7082571983337402,
      "learning_rate": 2.4181485844575076e-05,
      "loss": 2.9495,
      "step": 200755
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.424765110015869,
      "learning_rate": 2.4179876907253337e-05,
      "loss": 2.7545,
      "step": 200756
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0890004634857178,
      "learning_rate": 2.4178268021211746e-05,
      "loss": 2.848,
      "step": 200757
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.649277925491333,
      "learning_rate": 2.417665918645063e-05,
      "loss": 2.9656,
      "step": 200758
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8502302169799805,
      "learning_rate": 2.417505040297023e-05,
      "loss": 2.623,
      "step": 200759
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9583072662353516,
      "learning_rate": 2.417344167077101e-05,
      "loss": 2.893,
      "step": 200760
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0769026279449463,
      "learning_rate": 2.4171832989853035e-05,
      "loss": 2.5868,
      "step": 200761
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.5006866455078125,
      "learning_rate": 2.4170224360216807e-05,
      "loss": 3.1535,
      "step": 200762
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.510672092437744,
      "learning_rate": 2.4168615781862455e-05,
      "loss": 2.6893,
      "step": 200763
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9043800830841064,
      "learning_rate": 2.4167007254790448e-05,
      "loss": 2.5447,
      "step": 200764
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.5713822841644287,
      "learning_rate": 2.4165398779000923e-05,
      "loss": 3.0298,
      "step": 200765
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.314457893371582,
      "learning_rate": 2.416379035449437e-05,
      "loss": 3.0079,
      "step": 200766
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.4554433822631836,
      "learning_rate": 2.4162181981270866e-05,
      "loss": 3.102,
      "step": 200767
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.470982074737549,
      "learning_rate": 2.4160573659330873e-05,
      "loss": 2.8457,
      "step": 200768
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3899502754211426,
      "learning_rate": 2.4158965388674588e-05,
      "loss": 2.8509,
      "step": 200769
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8966739177703857,
      "learning_rate": 2.4157357169302417e-05,
      "loss": 2.7235,
      "step": 200770
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9776806831359863,
      "learning_rate": 2.4155749001214554e-05,
      "loss": 2.9519,
      "step": 200771
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.078871250152588,
      "learning_rate": 2.4154140884411433e-05,
      "loss": 2.7063,
      "step": 200772
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2728936672210693,
      "learning_rate": 2.4152532818893155e-05,
      "loss": 2.8139,
      "step": 200773
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8454318046569824,
      "learning_rate": 2.4150924804660187e-05,
      "loss": 3.0446,
      "step": 200774
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.095121383666992,
      "learning_rate": 2.414931684171273e-05,
      "loss": 2.943,
      "step": 200775
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.5886547565460205,
      "learning_rate": 2.4147708930051146e-05,
      "loss": 2.9687,
      "step": 200776
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.443376302719116,
      "learning_rate": 2.4146101069675672e-05,
      "loss": 3.1295,
      "step": 200777
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6016104221343994,
      "learning_rate": 2.4144493260586705e-05,
      "loss": 2.9979,
      "step": 200778
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0490386486053467,
      "learning_rate": 2.414288550278448e-05,
      "loss": 2.9713,
      "step": 200779
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.980342149734497,
      "learning_rate": 2.41412777962693e-05,
      "loss": 2.9151,
      "step": 200780
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6743974685668945,
      "learning_rate": 2.413967014104139e-05,
      "loss": 2.9747,
      "step": 200781
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8869826793670654,
      "learning_rate": 2.413806253710122e-05,
      "loss": 2.8245,
      "step": 200782
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1992321014404297,
      "learning_rate": 2.4136454984448894e-05,
      "loss": 2.9348,
      "step": 200783
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.57749342918396,
      "learning_rate": 2.4134847483084875e-05,
      "loss": 2.9287,
      "step": 200784
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.672652244567871,
      "learning_rate": 2.4133240033009393e-05,
      "loss": 2.9787,
      "step": 200785
    },
    {
      "epoch": 2.61,
      "grad_norm": 4.834356784820557,
      "learning_rate": 2.4131632634222786e-05,
      "loss": 2.6079,
      "step": 200786
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2009994983673096,
      "learning_rate": 2.413002528672522e-05,
      "loss": 3.0548,
      "step": 200787
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1573195457458496,
      "learning_rate": 2.4128417990517158e-05,
      "loss": 2.7524,
      "step": 200788
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0068187713623047,
      "learning_rate": 2.4126810745598768e-05,
      "loss": 2.5668,
      "step": 200789
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.9348654747009277,
      "learning_rate": 2.4125203551970486e-05,
      "loss": 2.9213,
      "step": 200790
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0526156425476074,
      "learning_rate": 2.4123596409632505e-05,
      "loss": 2.9013,
      "step": 200791
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.17018723487854,
      "learning_rate": 2.4121989318585165e-05,
      "loss": 3.0975,
      "step": 200792
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.2577412128448486,
      "learning_rate": 2.4120382278828733e-05,
      "loss": 2.7499,
      "step": 200793
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6519105434417725,
      "learning_rate": 2.4118775290363533e-05,
      "loss": 3.0467,
      "step": 200794
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3124072551727295,
      "learning_rate": 2.411716835318984e-05,
      "loss": 2.8973,
      "step": 200795
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.596815824508667,
      "learning_rate": 2.411556146730802e-05,
      "loss": 2.8294,
      "step": 200796
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1869943141937256,
      "learning_rate": 2.4113954632718335e-05,
      "loss": 2.5308,
      "step": 200797
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.538331985473633,
      "learning_rate": 2.4112347849421053e-05,
      "loss": 2.8486,
      "step": 200798
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8837943077087402,
      "learning_rate": 2.4110741117416444e-05,
      "loss": 2.8084,
      "step": 200799
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.4358816146850586,
      "learning_rate": 2.4109134436704935e-05,
      "loss": 2.8606,
      "step": 200800
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.279014825820923,
      "learning_rate": 2.4107527807286665e-05,
      "loss": 2.617,
      "step": 200801
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.658911943435669,
      "learning_rate": 2.4105921229162063e-05,
      "loss": 2.9072,
      "step": 200802
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.84001088142395,
      "learning_rate": 2.4104314702331396e-05,
      "loss": 3.1332,
      "step": 200803
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.711024284362793,
      "learning_rate": 2.4102708226794932e-05,
      "loss": 2.9136,
      "step": 200804
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1548097133636475,
      "learning_rate": 2.4101101802552935e-05,
      "loss": 3.0184,
      "step": 200805
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.920269250869751,
      "learning_rate": 2.409949542960581e-05,
      "loss": 2.652,
      "step": 200806
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0651330947875977,
      "learning_rate": 2.409788910795375e-05,
      "loss": 2.8354,
      "step": 200807
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.377938985824585,
      "learning_rate": 2.4096282837597125e-05,
      "loss": 2.8307,
      "step": 200808
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8716952800750732,
      "learning_rate": 2.4094676618536234e-05,
      "loss": 2.7333,
      "step": 200809
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0902082920074463,
      "learning_rate": 2.409307045077128e-05,
      "loss": 2.6439,
      "step": 200810
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.7018063068389893,
      "learning_rate": 2.409146433430269e-05,
      "loss": 2.8012,
      "step": 200811
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.018298387527466,
      "learning_rate": 2.4089858269130736e-05,
      "loss": 3.1739,
      "step": 200812
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.994356155395508,
      "learning_rate": 2.408825225525558e-05,
      "loss": 2.7339,
      "step": 200813
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.3345260620117188,
      "learning_rate": 2.4086646292677723e-05,
      "loss": 3.1786,
      "step": 200814
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9115047454833984,
      "learning_rate": 2.4085040381397337e-05,
      "loss": 3.036,
      "step": 200815
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.9173355102539062,
      "learning_rate": 2.4083434521414712e-05,
      "loss": 3.1102,
      "step": 200816
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.9610366821289062,
      "learning_rate": 2.4081828712730256e-05,
      "loss": 2.8299,
      "step": 200817
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.919086217880249,
      "learning_rate": 2.4080222955344165e-05,
      "loss": 2.7737,
      "step": 200818
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7704944610595703,
      "learning_rate": 2.4078617249256738e-05,
      "loss": 3.0599,
      "step": 200819
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.5477616786956787,
      "learning_rate": 2.4077011594468343e-05,
      "loss": 3.0315,
      "step": 200820
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.930292844772339,
      "learning_rate": 2.4075405990979248e-05,
      "loss": 3.0015,
      "step": 200821
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.7232043743133545,
      "learning_rate": 2.407380043878968e-05,
      "loss": 2.8936,
      "step": 200822
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0316028594970703,
      "learning_rate": 2.4072194937900046e-05,
      "loss": 2.8024,
      "step": 200823
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.6481027603149414,
      "learning_rate": 2.4070589488310577e-05,
      "loss": 2.7514,
      "step": 200824
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.880826711654663,
      "learning_rate": 2.4068984090021604e-05,
      "loss": 2.6893,
      "step": 200825
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0570626258850098,
      "learning_rate": 2.4067378743033462e-05,
      "loss": 2.923,
      "step": 200826
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.8522958755493164,
      "learning_rate": 2.4065773447346382e-05,
      "loss": 3.0275,
      "step": 200827
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.1561312675476074,
      "learning_rate": 2.4064168202960632e-05,
      "loss": 2.8304,
      "step": 200828
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.722811698913574,
      "learning_rate": 2.4062563009876612e-05,
      "loss": 2.9855,
      "step": 200829
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.851083278656006,
      "learning_rate": 2.406095786809452e-05,
      "loss": 2.9842,
      "step": 200830
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.194589853286743,
      "learning_rate": 2.405935277761476e-05,
      "loss": 2.84,
      "step": 200831
    },
    {
      "epoch": 2.61,
      "grad_norm": 2.397873878479004,
      "learning_rate": 2.4057747738437493e-05,
      "loss": 2.8742,
      "step": 200832
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.053708791732788,
      "learning_rate": 2.4056142750563257e-05,
      "loss": 2.8501,
      "step": 200833
    },
    {
      "epoch": 2.61,
      "grad_norm": 3.0552377700805664,
      "learning_rate": 2.4054537813992048e-05,
      "loss": 2.9467,
      "step": 200834
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.6300692558288574,
      "learning_rate": 2.4052932928724368e-05,
      "loss": 2.959,
      "step": 200835
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6628713607788086,
      "learning_rate": 2.4051328094760413e-05,
      "loss": 2.81,
      "step": 200836
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.5365405082702637,
      "learning_rate": 2.404972331210059e-05,
      "loss": 2.6932,
      "step": 200837
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.7163901329040527,
      "learning_rate": 2.4048118580745057e-05,
      "loss": 2.9512,
      "step": 200838
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.568202495574951,
      "learning_rate": 2.404651390069432e-05,
      "loss": 3.1683,
      "step": 200839
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.891981601715088,
      "learning_rate": 2.404490927194841e-05,
      "loss": 2.8617,
      "step": 200840
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7641525268554688,
      "learning_rate": 2.4043304694507824e-05,
      "loss": 3.1264,
      "step": 200841
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6463401317596436,
      "learning_rate": 2.4041700168372735e-05,
      "loss": 3.2454,
      "step": 200842
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.6722910404205322,
      "learning_rate": 2.404009569354357e-05,
      "loss": 2.4707,
      "step": 200843
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.20297384262085,
      "learning_rate": 2.40384912700205e-05,
      "loss": 2.9246,
      "step": 200844
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.214380979537964,
      "learning_rate": 2.403688689780395e-05,
      "loss": 2.9358,
      "step": 200845
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.729893684387207,
      "learning_rate": 2.403528257689413e-05,
      "loss": 3.0463,
      "step": 200846
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2100744247436523,
      "learning_rate": 2.403367830729137e-05,
      "loss": 3.149,
      "step": 200847
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.863231897354126,
      "learning_rate": 2.4032074088995934e-05,
      "loss": 2.8402,
      "step": 200848
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7498700618743896,
      "learning_rate": 2.4030469922008155e-05,
      "loss": 2.8716,
      "step": 200849
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1384899616241455,
      "learning_rate": 2.4028865806328302e-05,
      "loss": 2.8484,
      "step": 200850
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7474560737609863,
      "learning_rate": 2.4027261741956704e-05,
      "loss": 2.8114,
      "step": 200851
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9751687049865723,
      "learning_rate": 2.4025657728893666e-05,
      "loss": 3.1698,
      "step": 200852
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.5408883094787598,
      "learning_rate": 2.4024053767139482e-05,
      "loss": 2.9373,
      "step": 200853
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.5660042762756348,
      "learning_rate": 2.402244985669436e-05,
      "loss": 2.9376,
      "step": 200854
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.5594403743743896,
      "learning_rate": 2.4020845997558758e-05,
      "loss": 2.8565,
      "step": 200855
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.818004846572876,
      "learning_rate": 2.4019242189732812e-05,
      "loss": 2.9386,
      "step": 200856
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.3167519569396973,
      "learning_rate": 2.401763843321696e-05,
      "loss": 2.7666,
      "step": 200857
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.5731635093688965,
      "learning_rate": 2.4016034728011457e-05,
      "loss": 2.9297,
      "step": 200858
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.6042275428771973,
      "learning_rate": 2.4014431074116548e-05,
      "loss": 2.9498,
      "step": 200859
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.5528037548065186,
      "learning_rate": 2.4012827471532524e-05,
      "loss": 2.922,
      "step": 200860
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.649355173110962,
      "learning_rate": 2.4011223920259794e-05,
      "loss": 2.8103,
      "step": 200861
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6086480617523193,
      "learning_rate": 2.4009620420298513e-05,
      "loss": 2.6505,
      "step": 200862
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1149682998657227,
      "learning_rate": 2.4008016971649124e-05,
      "loss": 3.1341,
      "step": 200863
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.858790636062622,
      "learning_rate": 2.400641357431182e-05,
      "loss": 3.0311,
      "step": 200864
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7516915798187256,
      "learning_rate": 2.4004810228286975e-05,
      "loss": 2.9759,
      "step": 200865
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.872206211090088,
      "learning_rate": 2.4003206933574782e-05,
      "loss": 3.1676,
      "step": 200866
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.056623697280884,
      "learning_rate": 2.400160369017564e-05,
      "loss": 2.8723,
      "step": 200867
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9298255443573,
      "learning_rate": 2.400000049808979e-05,
      "loss": 3.0516,
      "step": 200868
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1734673976898193,
      "learning_rate": 2.3998397357317554e-05,
      "loss": 2.7723,
      "step": 200869
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.964118719100952,
      "learning_rate": 2.3996794267859276e-05,
      "loss": 2.852,
      "step": 200870
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.015564441680908,
      "learning_rate": 2.3995191229715182e-05,
      "loss": 2.6491,
      "step": 200871
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1480565071105957,
      "learning_rate": 2.399358824288551e-05,
      "loss": 3.028,
      "step": 200872
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.6442623138427734,
      "learning_rate": 2.399198530737072e-05,
      "loss": 2.7854,
      "step": 200873
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.071913957595825,
      "learning_rate": 2.3990382423170983e-05,
      "loss": 3.0404,
      "step": 200874
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7518506050109863,
      "learning_rate": 2.39887795902867e-05,
      "loss": 2.9523,
      "step": 200875
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.378582000732422,
      "learning_rate": 2.39871768087181e-05,
      "loss": 3.1549,
      "step": 200876
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.4656319618225098,
      "learning_rate": 2.3985574078465485e-05,
      "loss": 2.807,
      "step": 200877
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.961073160171509,
      "learning_rate": 2.3983971399529124e-05,
      "loss": 2.9377,
      "step": 200878
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.3470122814178467,
      "learning_rate": 2.398236877190941e-05,
      "loss": 2.6315,
      "step": 200879
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7546138763427734,
      "learning_rate": 2.3980766195606548e-05,
      "loss": 2.7806,
      "step": 200880
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.692723274230957,
      "learning_rate": 2.397916367062087e-05,
      "loss": 2.9996,
      "step": 200881
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.4463586807250977,
      "learning_rate": 2.397756119695271e-05,
      "loss": 2.9881,
      "step": 200882
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8101377487182617,
      "learning_rate": 2.3975958774602334e-05,
      "loss": 2.848,
      "step": 200883
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.004397392272949,
      "learning_rate": 2.397435640356997e-05,
      "loss": 2.9025,
      "step": 200884
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8302040100097656,
      "learning_rate": 2.397275408385606e-05,
      "loss": 2.9206,
      "step": 200885
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.975515127182007,
      "learning_rate": 2.3971151815460764e-05,
      "loss": 2.8616,
      "step": 200886
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.935147762298584,
      "learning_rate": 2.3969549598384486e-05,
      "loss": 3.1342,
      "step": 200887
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8447651863098145,
      "learning_rate": 2.396794743262749e-05,
      "loss": 2.9086,
      "step": 200888
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.938873529434204,
      "learning_rate": 2.396634531819004e-05,
      "loss": 2.9628,
      "step": 200889
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1214423179626465,
      "learning_rate": 2.3964743255072438e-05,
      "loss": 3.0487,
      "step": 200890
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6914639472961426,
      "learning_rate": 2.3963141243275018e-05,
      "loss": 2.6025,
      "step": 200891
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.233673334121704,
      "learning_rate": 2.3961539282798047e-05,
      "loss": 2.9275,
      "step": 200892
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.501466751098633,
      "learning_rate": 2.3959937373641858e-05,
      "loss": 2.9326,
      "step": 200893
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9916625022888184,
      "learning_rate": 2.3958335515806713e-05,
      "loss": 2.9945,
      "step": 200894
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.037473201751709,
      "learning_rate": 2.395673370929292e-05,
      "loss": 2.9303,
      "step": 200895
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.4860901832580566,
      "learning_rate": 2.3955131954100804e-05,
      "loss": 3.16,
      "step": 200896
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6947083473205566,
      "learning_rate": 2.3953530250230635e-05,
      "loss": 2.9211,
      "step": 200897
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.361845016479492,
      "learning_rate": 2.395192859768268e-05,
      "loss": 2.9,
      "step": 200898
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9491560459136963,
      "learning_rate": 2.3950326996457302e-05,
      "loss": 2.9066,
      "step": 200899
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.180647611618042,
      "learning_rate": 2.3948725446554806e-05,
      "loss": 2.8093,
      "step": 200900
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.813218116760254,
      "learning_rate": 2.3947123947975354e-05,
      "loss": 2.8143,
      "step": 200901
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.553239107131958,
      "learning_rate": 2.394552250071945e-05,
      "loss": 2.8726,
      "step": 200902
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1606996059417725,
      "learning_rate": 2.3943921104787222e-05,
      "loss": 2.797,
      "step": 200903
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7483301162719727,
      "learning_rate": 2.3942319760179007e-05,
      "loss": 2.9391,
      "step": 200904
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.95587158203125,
      "learning_rate": 2.3940718466895205e-05,
      "loss": 3.143,
      "step": 200905
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.059251546859741,
      "learning_rate": 2.3939117224936012e-05,
      "loss": 3.087,
      "step": 200906
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.025972843170166,
      "learning_rate": 2.3937516034301696e-05,
      "loss": 2.9269,
      "step": 200907
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0415141582489014,
      "learning_rate": 2.3935914894992658e-05,
      "loss": 2.871,
      "step": 200908
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.683232069015503,
      "learning_rate": 2.3934313807009066e-05,
      "loss": 3.0187,
      "step": 200909
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.167560577392578,
      "learning_rate": 2.393271277035138e-05,
      "loss": 3.018,
      "step": 200910
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.963747262954712,
      "learning_rate": 2.3931111785019807e-05,
      "loss": 2.8528,
      "step": 200911
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7777154445648193,
      "learning_rate": 2.392951085101461e-05,
      "loss": 3.0243,
      "step": 200912
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7919037342071533,
      "learning_rate": 2.3927909968336156e-05,
      "loss": 2.9985,
      "step": 200913
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.12518048286438,
      "learning_rate": 2.3926309136984746e-05,
      "loss": 2.9704,
      "step": 200914
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.4028661251068115,
      "learning_rate": 2.3924708356960575e-05,
      "loss": 3.0774,
      "step": 200915
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.5610268115997314,
      "learning_rate": 2.392310762826405e-05,
      "loss": 2.906,
      "step": 200916
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0313985347747803,
      "learning_rate": 2.3921506950895464e-05,
      "loss": 2.9154,
      "step": 200917
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.890533208847046,
      "learning_rate": 2.3919906324854988e-05,
      "loss": 2.8212,
      "step": 200918
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.8922319412231445,
      "learning_rate": 2.391830575014312e-05,
      "loss": 2.9976,
      "step": 200919
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7103254795074463,
      "learning_rate": 2.391670522675999e-05,
      "loss": 3.0774,
      "step": 200920
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.669921875,
      "learning_rate": 2.3915104754705938e-05,
      "loss": 2.9302,
      "step": 200921
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.066277503967285,
      "learning_rate": 2.3913504333981325e-05,
      "loss": 2.7733,
      "step": 200922
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.348179340362549,
      "learning_rate": 2.3911903964586354e-05,
      "loss": 3.1012,
      "step": 200923
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.575319766998291,
      "learning_rate": 2.3910303646521423e-05,
      "loss": 2.9047,
      "step": 200924
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.590548515319824,
      "learning_rate": 2.3908703379786797e-05,
      "loss": 2.5728,
      "step": 200925
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.078907012939453,
      "learning_rate": 2.3907103164382713e-05,
      "loss": 3.0023,
      "step": 200926
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.4507815837860107,
      "learning_rate": 2.39055030003095e-05,
      "loss": 3.1488,
      "step": 200927
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0633575916290283,
      "learning_rate": 2.3903902887567494e-05,
      "loss": 2.6899,
      "step": 200928
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0108745098114014,
      "learning_rate": 2.3902302826156928e-05,
      "loss": 2.8726,
      "step": 200929
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2178289890289307,
      "learning_rate": 2.3900702816078164e-05,
      "loss": 3.1138,
      "step": 200930
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.567631959915161,
      "learning_rate": 2.3899102857331477e-05,
      "loss": 3.1208,
      "step": 200931
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.110943078994751,
      "learning_rate": 2.389750294991716e-05,
      "loss": 2.9435,
      "step": 200932
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.157998561859131,
      "learning_rate": 2.3895903093835476e-05,
      "loss": 3.0432,
      "step": 200933
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0314924716949463,
      "learning_rate": 2.389430328908677e-05,
      "loss": 2.9646,
      "step": 200934
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.4820687770843506,
      "learning_rate": 2.3892703535671297e-05,
      "loss": 2.9874,
      "step": 200935
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.643810272216797,
      "learning_rate": 2.3891103833589464e-05,
      "loss": 2.8549,
      "step": 200936
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.791489601135254,
      "learning_rate": 2.3889504182841433e-05,
      "loss": 2.8523,
      "step": 200937
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.771366596221924,
      "learning_rate": 2.3887904583427576e-05,
      "loss": 3.2496,
      "step": 200938
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6733345985412598,
      "learning_rate": 2.388630503534812e-05,
      "loss": 2.968,
      "step": 200939
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.856130361557007,
      "learning_rate": 2.3884705538603466e-05,
      "loss": 3.1048,
      "step": 200940
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.246650218963623,
      "learning_rate": 2.3883106093193816e-05,
      "loss": 2.8991,
      "step": 200941
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.541433811187744,
      "learning_rate": 2.3881506699119536e-05,
      "loss": 2.7993,
      "step": 200942
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9700212478637695,
      "learning_rate": 2.3879907356380924e-05,
      "loss": 2.8852,
      "step": 200943
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.4120330810546875,
      "learning_rate": 2.3878308064978215e-05,
      "loss": 2.7769,
      "step": 200944
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8889386653900146,
      "learning_rate": 2.3876708824911706e-05,
      "loss": 2.9229,
      "step": 200945
    },
    {
      "epoch": 2.62,
      "grad_norm": 5.129274368286133,
      "learning_rate": 2.38751096361818e-05,
      "loss": 2.416,
      "step": 200946
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.970287322998047,
      "learning_rate": 2.387351049878866e-05,
      "loss": 3.1292,
      "step": 200947
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7716355323791504,
      "learning_rate": 2.387191141273269e-05,
      "loss": 2.7586,
      "step": 200948
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.5805225372314453,
      "learning_rate": 2.387031237801412e-05,
      "loss": 3.0018,
      "step": 200949
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.4387707710266113,
      "learning_rate": 2.386871339463332e-05,
      "loss": 3.0118,
      "step": 200950
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.789637804031372,
      "learning_rate": 2.3867114462590455e-05,
      "loss": 2.8857,
      "step": 200951
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9472005367279053,
      "learning_rate": 2.386551558188595e-05,
      "loss": 2.6725,
      "step": 200952
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2046544551849365,
      "learning_rate": 2.3863916752520053e-05,
      "loss": 2.9218,
      "step": 200953
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.698793411254883,
      "learning_rate": 2.3862317974493083e-05,
      "loss": 2.9823,
      "step": 200954
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.18166446685791,
      "learning_rate": 2.3860719247805315e-05,
      "loss": 2.9394,
      "step": 200955
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.13338565826416,
      "learning_rate": 2.385912057245708e-05,
      "loss": 3.0125,
      "step": 200956
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.8042075634002686,
      "learning_rate": 2.3857521948448575e-05,
      "loss": 2.9342,
      "step": 200957
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.9458937644958496,
      "learning_rate": 2.3855923375780207e-05,
      "loss": 2.8795,
      "step": 200958
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.5034501552581787,
      "learning_rate": 2.38543248544522e-05,
      "loss": 2.8024,
      "step": 200959
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.475409507751465,
      "learning_rate": 2.385272638446496e-05,
      "loss": 3.0467,
      "step": 200960
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.5491814613342285,
      "learning_rate": 2.3851127965818683e-05,
      "loss": 2.9735,
      "step": 200961
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8138351440429688,
      "learning_rate": 2.3849529598513706e-05,
      "loss": 2.8721,
      "step": 200962
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6493113040924072,
      "learning_rate": 2.384793128255026e-05,
      "loss": 2.8356,
      "step": 200963
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9375126361846924,
      "learning_rate": 2.3846333017928742e-05,
      "loss": 3.0832,
      "step": 200964
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2339928150177,
      "learning_rate": 2.384473480464936e-05,
      "loss": 2.725,
      "step": 200965
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6682331562042236,
      "learning_rate": 2.3843136642712467e-05,
      "loss": 2.9385,
      "step": 200966
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.241703987121582,
      "learning_rate": 2.384153853211841e-05,
      "loss": 3.0764,
      "step": 200967
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0238585472106934,
      "learning_rate": 2.383994047286738e-05,
      "loss": 3.2217,
      "step": 200968
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6775083541870117,
      "learning_rate": 2.383834246495968e-05,
      "loss": 3.1848,
      "step": 200969
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2138381004333496,
      "learning_rate": 2.383674450839571e-05,
      "loss": 2.7814,
      "step": 200970
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0183353424072266,
      "learning_rate": 2.3835146603175635e-05,
      "loss": 2.7598,
      "step": 200971
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.4974236488342285,
      "learning_rate": 2.383354874929989e-05,
      "loss": 2.9456,
      "step": 200972
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.388735294342041,
      "learning_rate": 2.3831950946768673e-05,
      "loss": 2.8867,
      "step": 200973
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.981661319732666,
      "learning_rate": 2.383035319558232e-05,
      "loss": 2.7823,
      "step": 200974
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8564233779907227,
      "learning_rate": 2.382875549574106e-05,
      "loss": 2.8674,
      "step": 200975
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.948965311050415,
      "learning_rate": 2.3827157847245326e-05,
      "loss": 2.922,
      "step": 200976
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.225147008895874,
      "learning_rate": 2.3825560250095255e-05,
      "loss": 2.9589,
      "step": 200977
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.5032777786254883,
      "learning_rate": 2.382396270429131e-05,
      "loss": 2.9891,
      "step": 200978
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.098110198974609,
      "learning_rate": 2.3822365209833627e-05,
      "loss": 3.0455,
      "step": 200979
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.0373311042785645,
      "learning_rate": 2.382076776672267e-05,
      "loss": 2.9126,
      "step": 200980
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.311394214630127,
      "learning_rate": 2.3819170374958574e-05,
      "loss": 2.975,
      "step": 200981
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1526668071746826,
      "learning_rate": 2.3817573034541736e-05,
      "loss": 2.8314,
      "step": 200982
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.574688673019409,
      "learning_rate": 2.3815975745472394e-05,
      "loss": 3.0162,
      "step": 200983
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.890690565109253,
      "learning_rate": 2.381437850775091e-05,
      "loss": 2.8094,
      "step": 200984
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8869335651397705,
      "learning_rate": 2.381278132137748e-05,
      "loss": 2.7648,
      "step": 200985
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.195643186569214,
      "learning_rate": 2.381118418635255e-05,
      "loss": 2.8685,
      "step": 200986
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.3996386528015137,
      "learning_rate": 2.3809587102676342e-05,
      "loss": 2.9834,
      "step": 200987
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7071452140808105,
      "learning_rate": 2.3807990070349124e-05,
      "loss": 3.0626,
      "step": 200988
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6592695713043213,
      "learning_rate": 2.380639308937117e-05,
      "loss": 2.7597,
      "step": 200989
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8385610580444336,
      "learning_rate": 2.3804796159742833e-05,
      "loss": 3.0132,
      "step": 200990
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.008063793182373,
      "learning_rate": 2.380319928146439e-05,
      "loss": 2.9819,
      "step": 200991
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.78275203704834,
      "learning_rate": 2.3801602454536207e-05,
      "loss": 2.9336,
      "step": 200992
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.464087963104248,
      "learning_rate": 2.380000567895848e-05,
      "loss": 3.1179,
      "step": 200993
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6776747703552246,
      "learning_rate": 2.3798408954731507e-05,
      "loss": 3.2951,
      "step": 200994
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8442270755767822,
      "learning_rate": 2.3796812281855694e-05,
      "loss": 2.8092,
      "step": 200995
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7789390087127686,
      "learning_rate": 2.379521566033127e-05,
      "loss": 3.0124,
      "step": 200996
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.726041316986084,
      "learning_rate": 2.3793619090158465e-05,
      "loss": 2.7878,
      "step": 200997
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.738579273223877,
      "learning_rate": 2.3792022571337688e-05,
      "loss": 2.8196,
      "step": 200998
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.9543402194976807,
      "learning_rate": 2.3790426103869197e-05,
      "loss": 2.921,
      "step": 200999
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.710463285446167,
      "learning_rate": 2.3788829687753197e-05,
      "loss": 2.9724,
      "step": 201000
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0155904293060303,
      "learning_rate": 2.378723332299015e-05,
      "loss": 2.9133,
      "step": 201001
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8545658588409424,
      "learning_rate": 2.378563700958026e-05,
      "loss": 2.8176,
      "step": 201002
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.8722476959228516,
      "learning_rate": 2.3784040747523792e-05,
      "loss": 3.0092,
      "step": 201003
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9764773845672607,
      "learning_rate": 2.3782444536821145e-05,
      "loss": 3.0511,
      "step": 201004
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0842702388763428,
      "learning_rate": 2.3780848377472517e-05,
      "loss": 3.0906,
      "step": 201005
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.477693796157837,
      "learning_rate": 2.377925226947821e-05,
      "loss": 3.1446,
      "step": 201006
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.155566930770874,
      "learning_rate": 2.3777656212838625e-05,
      "loss": 2.8704,
      "step": 201007
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2541749477386475,
      "learning_rate": 2.3776060207553926e-05,
      "loss": 2.9289,
      "step": 201008
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.051473617553711,
      "learning_rate": 2.3774464253624513e-05,
      "loss": 3.0707,
      "step": 201009
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2885639667510986,
      "learning_rate": 2.3772868351050657e-05,
      "loss": 2.908,
      "step": 201010
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1925296783447266,
      "learning_rate": 2.3771272499832618e-05,
      "loss": 2.666,
      "step": 201011
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.909817695617676,
      "learning_rate": 2.3769676699970695e-05,
      "loss": 2.8074,
      "step": 201012
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.809281587600708,
      "learning_rate": 2.376808095146523e-05,
      "loss": 2.8327,
      "step": 201013
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.3184773921966553,
      "learning_rate": 2.3766485254316448e-05,
      "loss": 2.9487,
      "step": 201014
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7909677028656006,
      "learning_rate": 2.376488960852472e-05,
      "loss": 3.1433,
      "step": 201015
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7396624088287354,
      "learning_rate": 2.376329401409034e-05,
      "loss": 3.0862,
      "step": 201016
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.983654260635376,
      "learning_rate": 2.376169847101358e-05,
      "loss": 3.0042,
      "step": 201017
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2185373306274414,
      "learning_rate": 2.376010297929467e-05,
      "loss": 2.9214,
      "step": 201018
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.532005548477173,
      "learning_rate": 2.3758507538934012e-05,
      "loss": 3.075,
      "step": 201019
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7471301555633545,
      "learning_rate": 2.375691214993184e-05,
      "loss": 2.8427,
      "step": 201020
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.728208303451538,
      "learning_rate": 2.3755316812288516e-05,
      "loss": 2.8579,
      "step": 201021
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.736492872238159,
      "learning_rate": 2.3753721526004275e-05,
      "loss": 2.7672,
      "step": 201022
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1732301712036133,
      "learning_rate": 2.3752126291079454e-05,
      "loss": 2.8674,
      "step": 201023
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.3222737312316895,
      "learning_rate": 2.3750531107514248e-05,
      "loss": 2.8969,
      "step": 201024
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.533686637878418,
      "learning_rate": 2.3748935975309126e-05,
      "loss": 2.8054,
      "step": 201025
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.946838140487671,
      "learning_rate": 2.374734089446422e-05,
      "loss": 2.9251,
      "step": 201026
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.6203155517578125,
      "learning_rate": 2.3745745864979927e-05,
      "loss": 2.71,
      "step": 201027
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.045100688934326,
      "learning_rate": 2.374415088685655e-05,
      "loss": 3.2087,
      "step": 201028
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.815810203552246,
      "learning_rate": 2.374255596009432e-05,
      "loss": 3.0881,
      "step": 201029
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.861301898956299,
      "learning_rate": 2.374096108469351e-05,
      "loss": 2.8726,
      "step": 201030
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8050291538238525,
      "learning_rate": 2.3739366260654547e-05,
      "loss": 3.0049,
      "step": 201031
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.915544271469116,
      "learning_rate": 2.3737771487977598e-05,
      "loss": 2.886,
      "step": 201032
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.3197357654571533,
      "learning_rate": 2.3736176766663028e-05,
      "loss": 3.1667,
      "step": 201033
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.496619701385498,
      "learning_rate": 2.3734582096711175e-05,
      "loss": 3.0509,
      "step": 201034
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6741912364959717,
      "learning_rate": 2.3732987478122234e-05,
      "loss": 3.0313,
      "step": 201035
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.055239677429199,
      "learning_rate": 2.3731392910896507e-05,
      "loss": 2.9068,
      "step": 201036
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.64345121383667,
      "learning_rate": 2.3729798395034393e-05,
      "loss": 2.6994,
      "step": 201037
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9667141437530518,
      "learning_rate": 2.372820393053606e-05,
      "loss": 2.9802,
      "step": 201038
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2800698280334473,
      "learning_rate": 2.3726609517401906e-05,
      "loss": 2.7901,
      "step": 201039
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8365392684936523,
      "learning_rate": 2.37250151556322e-05,
      "loss": 3.0547,
      "step": 201040
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.989705801010132,
      "learning_rate": 2.3723420845227238e-05,
      "loss": 3.1229,
      "step": 201041
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.308156967163086,
      "learning_rate": 2.3721826586187252e-05,
      "loss": 2.9279,
      "step": 201042
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0665924549102783,
      "learning_rate": 2.372023237851265e-05,
      "loss": 3.1573,
      "step": 201043
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.256396770477295,
      "learning_rate": 2.3718638222203624e-05,
      "loss": 2.7188,
      "step": 201044
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7469382286071777,
      "learning_rate": 2.371704411726054e-05,
      "loss": 2.9409,
      "step": 201045
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.903885841369629,
      "learning_rate": 2.371545006368364e-05,
      "loss": 2.9782,
      "step": 201046
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.821649551391602,
      "learning_rate": 2.371385606147338e-05,
      "loss": 2.9754,
      "step": 201047
    },
    {
      "epoch": 2.62,
      "grad_norm": 5.008420944213867,
      "learning_rate": 2.3712262110629797e-05,
      "loss": 3.0984,
      "step": 201048
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.4912912845611572,
      "learning_rate": 2.3710668211153393e-05,
      "loss": 2.6856,
      "step": 201049
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2382853031158447,
      "learning_rate": 2.37090743630443e-05,
      "loss": 2.917,
      "step": 201050
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7519869804382324,
      "learning_rate": 2.3707480566302984e-05,
      "loss": 2.8898,
      "step": 201051
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0797224044799805,
      "learning_rate": 2.370588682092961e-05,
      "loss": 2.8463,
      "step": 201052
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9490432739257812,
      "learning_rate": 2.3704293126924645e-05,
      "loss": 2.9815,
      "step": 201053
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2566449642181396,
      "learning_rate": 2.3702699484288157e-05,
      "loss": 2.797,
      "step": 201054
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.163573265075684,
      "learning_rate": 2.3701105893020578e-05,
      "loss": 3.2223,
      "step": 201055
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.251040458679199,
      "learning_rate": 2.3699512353122142e-05,
      "loss": 2.9431,
      "step": 201056
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.780590295791626,
      "learning_rate": 2.3697918864593246e-05,
      "loss": 2.9866,
      "step": 201057
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.597046136856079,
      "learning_rate": 2.369632542743406e-05,
      "loss": 2.6391,
      "step": 201058
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7949702739715576,
      "learning_rate": 2.369473204164505e-05,
      "loss": 2.8131,
      "step": 201059
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.323460578918457,
      "learning_rate": 2.3693138707226313e-05,
      "loss": 2.8325,
      "step": 201060
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8544862270355225,
      "learning_rate": 2.3691545424178282e-05,
      "loss": 3.0532,
      "step": 201061
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.5728511810302734,
      "learning_rate": 2.3689952192501126e-05,
      "loss": 2.9754,
      "step": 201062
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.6378846168518066,
      "learning_rate": 2.3688359012195312e-05,
      "loss": 2.7863,
      "step": 201063
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.243192195892334,
      "learning_rate": 2.3686765883260973e-05,
      "loss": 3.0145,
      "step": 201064
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.93986177444458,
      "learning_rate": 2.3685172805698605e-05,
      "loss": 3.04,
      "step": 201065
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.3070876598358154,
      "learning_rate": 2.3683579779508278e-05,
      "loss": 2.9438,
      "step": 201066
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1349925994873047,
      "learning_rate": 2.3681986804690424e-05,
      "loss": 3.0524,
      "step": 201067
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0157272815704346,
      "learning_rate": 2.368039388124524e-05,
      "loss": 2.8998,
      "step": 201068
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.5659940242767334,
      "learning_rate": 2.3678801009173164e-05,
      "loss": 2.7104,
      "step": 201069
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.7497429847717285,
      "learning_rate": 2.367720818847436e-05,
      "loss": 2.8232,
      "step": 201070
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.5332157611846924,
      "learning_rate": 2.367561541914923e-05,
      "loss": 2.9029,
      "step": 201071
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.5800459384918213,
      "learning_rate": 2.3674022701198005e-05,
      "loss": 2.8507,
      "step": 201072
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.544617652893066,
      "learning_rate": 2.3672430034621014e-05,
      "loss": 2.9254,
      "step": 201073
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0263559818267822,
      "learning_rate": 2.3670837419418466e-05,
      "loss": 2.7812,
      "step": 201074
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6588971614837646,
      "learning_rate": 2.3669244855590786e-05,
      "loss": 3.0065,
      "step": 201075
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1001813411712646,
      "learning_rate": 2.3667652343138176e-05,
      "loss": 2.9595,
      "step": 201076
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8388421535491943,
      "learning_rate": 2.3666059882060973e-05,
      "loss": 2.9449,
      "step": 201077
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.987898111343384,
      "learning_rate": 2.3664467472359504e-05,
      "loss": 2.7957,
      "step": 201078
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.212965965270996,
      "learning_rate": 2.3662875114033974e-05,
      "loss": 2.9913,
      "step": 201079
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0656161308288574,
      "learning_rate": 2.3661282807084746e-05,
      "loss": 2.8066,
      "step": 201080
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.4037744998931885,
      "learning_rate": 2.3659690551512157e-05,
      "loss": 2.9501,
      "step": 201081
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6596927642822266,
      "learning_rate": 2.3658098347316367e-05,
      "loss": 2.8956,
      "step": 201082
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.21579647064209,
      "learning_rate": 2.365650619449778e-05,
      "loss": 2.7394,
      "step": 201083
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.729443073272705,
      "learning_rate": 2.3654914093056697e-05,
      "loss": 3.013,
      "step": 201084
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9455783367156982,
      "learning_rate": 2.365332204299332e-05,
      "loss": 2.9167,
      "step": 201085
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6828439235687256,
      "learning_rate": 2.365173004430807e-05,
      "loss": 2.9575,
      "step": 201086
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.899951696395874,
      "learning_rate": 2.365013809700116e-05,
      "loss": 2.7255,
      "step": 201087
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2723352909088135,
      "learning_rate": 2.364854620107288e-05,
      "loss": 2.849,
      "step": 201088
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.106790781021118,
      "learning_rate": 2.3646954356523574e-05,
      "loss": 2.8655,
      "step": 201089
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.955594539642334,
      "learning_rate": 2.364536256335353e-05,
      "loss": 2.9034,
      "step": 201090
    },
    {
      "epoch": 2.62,
      "grad_norm": 5.1690263748168945,
      "learning_rate": 2.3643770821562992e-05,
      "loss": 2.9873,
      "step": 201091
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0151500701904297,
      "learning_rate": 2.3642179131152316e-05,
      "loss": 2.83,
      "step": 201092
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.88907790184021,
      "learning_rate": 2.3640587492121744e-05,
      "loss": 2.9547,
      "step": 201093
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.830606698989868,
      "learning_rate": 2.3638995904471635e-05,
      "loss": 3.0022,
      "step": 201094
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.738481044769287,
      "learning_rate": 2.3637404368202294e-05,
      "loss": 3.0287,
      "step": 201095
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.147216558456421,
      "learning_rate": 2.3635812883313955e-05,
      "loss": 3.0241,
      "step": 201096
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.5767946243286133,
      "learning_rate": 2.3634221449806878e-05,
      "loss": 2.9305,
      "step": 201097
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.8906984329223633,
      "learning_rate": 2.363263006768147e-05,
      "loss": 2.7315,
      "step": 201098
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.301193952560425,
      "learning_rate": 2.3631038736937924e-05,
      "loss": 2.9219,
      "step": 201099
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.875502109527588,
      "learning_rate": 2.362944745757661e-05,
      "loss": 2.9303,
      "step": 201100
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0250306129455566,
      "learning_rate": 2.362785622959783e-05,
      "loss": 2.929,
      "step": 201101
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.636812210083008,
      "learning_rate": 2.362626505300185e-05,
      "loss": 2.8055,
      "step": 201102
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0887982845306396,
      "learning_rate": 2.3624673927788896e-05,
      "loss": 3.0332,
      "step": 201103
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.584707021713257,
      "learning_rate": 2.3623082853959374e-05,
      "loss": 2.9888,
      "step": 201104
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7059712409973145,
      "learning_rate": 2.3621491831513518e-05,
      "loss": 3.0477,
      "step": 201105
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.945786237716675,
      "learning_rate": 2.361990086045169e-05,
      "loss": 2.9794,
      "step": 201106
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.109590530395508,
      "learning_rate": 2.3618309940774126e-05,
      "loss": 2.98,
      "step": 201107
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.501284599304199,
      "learning_rate": 2.3616719072481127e-05,
      "loss": 2.965,
      "step": 201108
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.8892385959625244,
      "learning_rate": 2.3615128255572958e-05,
      "loss": 2.9119,
      "step": 201109
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.4843902587890625,
      "learning_rate": 2.3613537490050015e-05,
      "loss": 2.9584,
      "step": 201110
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7411141395568848,
      "learning_rate": 2.3611946775912472e-05,
      "loss": 2.9552,
      "step": 201111
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7027769088745117,
      "learning_rate": 2.361035611316072e-05,
      "loss": 2.9481,
      "step": 201112
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.054984092712402,
      "learning_rate": 2.3608765501795e-05,
      "loss": 2.7641,
      "step": 201113
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.3935885429382324,
      "learning_rate": 2.360717494181571e-05,
      "loss": 2.9341,
      "step": 201114
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6786208152770996,
      "learning_rate": 2.3605584433222945e-05,
      "loss": 2.8605,
      "step": 201115
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.943060874938965,
      "learning_rate": 2.360399397601721e-05,
      "loss": 2.7048,
      "step": 201116
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.092914581298828,
      "learning_rate": 2.3602403570198637e-05,
      "loss": 2.8045,
      "step": 201117
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.863762378692627,
      "learning_rate": 2.3600813215767655e-05,
      "loss": 2.9581,
      "step": 201118
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.43220591545105,
      "learning_rate": 2.3599222912724434e-05,
      "loss": 2.869,
      "step": 201119
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.3156023025512695,
      "learning_rate": 2.3597632661069442e-05,
      "loss": 2.9489,
      "step": 201120
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9006550312042236,
      "learning_rate": 2.3596042460802777e-05,
      "loss": 3.0631,
      "step": 201121
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.735544443130493,
      "learning_rate": 2.359445231192487e-05,
      "loss": 2.8389,
      "step": 201122
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.935335636138916,
      "learning_rate": 2.3592862214435927e-05,
      "loss": 3.1727,
      "step": 201123
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.544893980026245,
      "learning_rate": 2.3591272168336305e-05,
      "loss": 2.8353,
      "step": 201124
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.72109842300415,
      "learning_rate": 2.358968217362628e-05,
      "loss": 2.7551,
      "step": 201125
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0292670726776123,
      "learning_rate": 2.358809223030621e-05,
      "loss": 3.233,
      "step": 201126
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.6613738536834717,
      "learning_rate": 2.3586502338376266e-05,
      "loss": 2.9667,
      "step": 201127
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.5400068759918213,
      "learning_rate": 2.358491249783685e-05,
      "loss": 2.7865,
      "step": 201128
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.406623125076294,
      "learning_rate": 2.3583322708688156e-05,
      "loss": 2.6376,
      "step": 201129
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8948352336883545,
      "learning_rate": 2.358173297093059e-05,
      "loss": 2.9801,
      "step": 201130
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.7205862998962402,
      "learning_rate": 2.3580143284564346e-05,
      "loss": 2.7093,
      "step": 201131
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.302338123321533,
      "learning_rate": 2.3578553649589892e-05,
      "loss": 2.9557,
      "step": 201132
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.356311082839966,
      "learning_rate": 2.3576964066007265e-05,
      "loss": 2.9819,
      "step": 201133
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1831796169281006,
      "learning_rate": 2.3575374533816992e-05,
      "loss": 2.874,
      "step": 201134
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.032841205596924,
      "learning_rate": 2.3573785053019177e-05,
      "loss": 2.5793,
      "step": 201135
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9531807899475098,
      "learning_rate": 2.3572195623614286e-05,
      "loss": 2.8436,
      "step": 201136
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.928339958190918,
      "learning_rate": 2.357060624560252e-05,
      "loss": 2.8803,
      "step": 201137
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7235121726989746,
      "learning_rate": 2.3569016918984274e-05,
      "loss": 2.6551,
      "step": 201138
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8945112228393555,
      "learning_rate": 2.356742764375965e-05,
      "loss": 2.9737,
      "step": 201139
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7036426067352295,
      "learning_rate": 2.3565838419929118e-05,
      "loss": 2.7878,
      "step": 201140
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.3862271308898926,
      "learning_rate": 2.356424924749287e-05,
      "loss": 2.7324,
      "step": 201141
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7333767414093018,
      "learning_rate": 2.3562660126451317e-05,
      "loss": 2.987,
      "step": 201142
    },
    {
      "epoch": 2.62,
      "grad_norm": 5.189426422119141,
      "learning_rate": 2.3561071056804616e-05,
      "loss": 3.0157,
      "step": 201143
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8543474674224854,
      "learning_rate": 2.35594820385532e-05,
      "loss": 2.8135,
      "step": 201144
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2694435119628906,
      "learning_rate": 2.3557893071697244e-05,
      "loss": 2.7546,
      "step": 201145
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8961801528930664,
      "learning_rate": 2.3556304156237105e-05,
      "loss": 2.9267,
      "step": 201146
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.4214670658111572,
      "learning_rate": 2.3554715292173056e-05,
      "loss": 2.8403,
      "step": 201147
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.4706764221191406,
      "learning_rate": 2.3553126479505424e-05,
      "loss": 2.9507,
      "step": 201148
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.840766668319702,
      "learning_rate": 2.3551537718234446e-05,
      "loss": 3.1074,
      "step": 201149
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6189868450164795,
      "learning_rate": 2.3549949008360558e-05,
      "loss": 3.1229,
      "step": 201150
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7050294876098633,
      "learning_rate": 2.3548360349883822e-05,
      "loss": 2.966,
      "step": 201151
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.671982526779175,
      "learning_rate": 2.3546771742804737e-05,
      "loss": 2.8716,
      "step": 201152
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7003164291381836,
      "learning_rate": 2.3545183187123472e-05,
      "loss": 2.7493,
      "step": 201153
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0048532485961914,
      "learning_rate": 2.3543594682840428e-05,
      "loss": 2.9343,
      "step": 201154
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.769131660461426,
      "learning_rate": 2.35420062299558e-05,
      "loss": 2.8878,
      "step": 201155
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7741072177886963,
      "learning_rate": 2.3540417828469992e-05,
      "loss": 2.8517,
      "step": 201156
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.957608699798584,
      "learning_rate": 2.3538829478383238e-05,
      "loss": 3.0857,
      "step": 201157
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6589064598083496,
      "learning_rate": 2.3537241179695832e-05,
      "loss": 2.8506,
      "step": 201158
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.83024263381958,
      "learning_rate": 2.353565293240801e-05,
      "loss": 2.7123,
      "step": 201159
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0705301761627197,
      "learning_rate": 2.3534064736520176e-05,
      "loss": 3.236,
      "step": 201160
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.800661563873291,
      "learning_rate": 2.3532476592032557e-05,
      "loss": 2.9031,
      "step": 201161
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.776732921600342,
      "learning_rate": 2.353088849894549e-05,
      "loss": 2.7605,
      "step": 201162
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.408409833908081,
      "learning_rate": 2.3529300457259236e-05,
      "loss": 2.8979,
      "step": 201163
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9642598628997803,
      "learning_rate": 2.3527712466974137e-05,
      "loss": 2.957,
      "step": 201164
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7994282245635986,
      "learning_rate": 2.3526124528090383e-05,
      "loss": 2.9816,
      "step": 201165
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.101372718811035,
      "learning_rate": 2.3524536640608416e-05,
      "loss": 2.8583,
      "step": 201166
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.687563419342041,
      "learning_rate": 2.3522948804528396e-05,
      "loss": 3.1263,
      "step": 201167
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1195454597473145,
      "learning_rate": 2.3521361019850727e-05,
      "loss": 2.9951,
      "step": 201168
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2573282718658447,
      "learning_rate": 2.3519773286575638e-05,
      "loss": 2.708,
      "step": 201169
    },
    {
      "epoch": 2.62,
      "grad_norm": 5.260882377624512,
      "learning_rate": 2.351818560470343e-05,
      "loss": 2.6661,
      "step": 201170
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1591877937316895,
      "learning_rate": 2.3516597974234443e-05,
      "loss": 2.8576,
      "step": 201171
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.963322401046753,
      "learning_rate": 2.3515010395168932e-05,
      "loss": 3.296,
      "step": 201172
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.934724807739258,
      "learning_rate": 2.3513422867507137e-05,
      "loss": 3.0248,
      "step": 201173
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.91349720954895,
      "learning_rate": 2.351183539124949e-05,
      "loss": 2.9701,
      "step": 201174
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9838857650756836,
      "learning_rate": 2.351024796639619e-05,
      "loss": 2.9631,
      "step": 201175
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0147464275360107,
      "learning_rate": 2.3508660592947537e-05,
      "loss": 2.8409,
      "step": 201176
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9872989654541016,
      "learning_rate": 2.3507073270903864e-05,
      "loss": 2.788,
      "step": 201177
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.308422088623047,
      "learning_rate": 2.3505486000265405e-05,
      "loss": 2.9907,
      "step": 201178
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.706611394882202,
      "learning_rate": 2.350389878103256e-05,
      "loss": 2.7467,
      "step": 201179
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9833595752716064,
      "learning_rate": 2.3502311613205528e-05,
      "loss": 2.8667,
      "step": 201180
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.922088146209717,
      "learning_rate": 2.350072449678464e-05,
      "loss": 2.9122,
      "step": 201181
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.008402109146118,
      "learning_rate": 2.3499137431770164e-05,
      "loss": 3.0785,
      "step": 201182
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.670461654663086,
      "learning_rate": 2.349755041816247e-05,
      "loss": 2.836,
      "step": 201183
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.649014472961426,
      "learning_rate": 2.349596345596172e-05,
      "loss": 3.315,
      "step": 201184
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.481976270675659,
      "learning_rate": 2.3494376545168346e-05,
      "loss": 2.9964,
      "step": 201185
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0305702686309814,
      "learning_rate": 2.3492789685782587e-05,
      "loss": 3.0687,
      "step": 201186
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.246020317077637,
      "learning_rate": 2.3491202877804772e-05,
      "loss": 2.9852,
      "step": 201187
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9114038944244385,
      "learning_rate": 2.3489616121235067e-05,
      "loss": 2.7469,
      "step": 201188
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.5590386390686035,
      "learning_rate": 2.348802941607394e-05,
      "loss": 2.958,
      "step": 201189
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.006096124649048,
      "learning_rate": 2.3486442762321556e-05,
      "loss": 2.9081,
      "step": 201190
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8270835876464844,
      "learning_rate": 2.348485615997828e-05,
      "loss": 2.8712,
      "step": 201191
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.8462769985198975,
      "learning_rate": 2.3483269609044354e-05,
      "loss": 3.0826,
      "step": 201192
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7312798500061035,
      "learning_rate": 2.3481683109520235e-05,
      "loss": 2.9328,
      "step": 201193
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.3317747116088867,
      "learning_rate": 2.3480096661405956e-05,
      "loss": 2.9569,
      "step": 201194
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.3321139812469482,
      "learning_rate": 2.347851026470202e-05,
      "loss": 2.9434,
      "step": 201195
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1446456909179688,
      "learning_rate": 2.3476923919408597e-05,
      "loss": 2.8831,
      "step": 201196
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.7620489597320557,
      "learning_rate": 2.347533762552608e-05,
      "loss": 2.9048,
      "step": 201197
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0816290378570557,
      "learning_rate": 2.3473751383054673e-05,
      "loss": 3.0093,
      "step": 201198
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.885996103286743,
      "learning_rate": 2.3472165191994806e-05,
      "loss": 3.1538,
      "step": 201199
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.6213672161102295,
      "learning_rate": 2.3470579052346583e-05,
      "loss": 3.0666,
      "step": 201200
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9819209575653076,
      "learning_rate": 2.3468992964110434e-05,
      "loss": 2.9371,
      "step": 201201
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9269349575042725,
      "learning_rate": 2.3467406927286593e-05,
      "loss": 2.7811,
      "step": 201202
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6397106647491455,
      "learning_rate": 2.3465820941875422e-05,
      "loss": 2.8895,
      "step": 201203
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7479264736175537,
      "learning_rate": 2.346423500787713e-05,
      "loss": 2.8521,
      "step": 201204
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.173984050750732,
      "learning_rate": 2.3462649125292177e-05,
      "loss": 3.2053,
      "step": 201205
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.633021593093872,
      "learning_rate": 2.3461063294120597e-05,
      "loss": 3.0199,
      "step": 201206
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.572617530822754,
      "learning_rate": 2.345947751436289e-05,
      "loss": 3.1269,
      "step": 201207
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.785790205001831,
      "learning_rate": 2.3457891786019256e-05,
      "loss": 2.9917,
      "step": 201208
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.071802854537964,
      "learning_rate": 2.3456306109090062e-05,
      "loss": 3.2218,
      "step": 201209
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.947455644607544,
      "learning_rate": 2.3454720483575472e-05,
      "loss": 2.8794,
      "step": 201210
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.42844557762146,
      "learning_rate": 2.3453134909476023e-05,
      "loss": 3.0693,
      "step": 201211
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.5393855571746826,
      "learning_rate": 2.345154938679171e-05,
      "loss": 2.9899,
      "step": 201212
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7300093173980713,
      "learning_rate": 2.3449963915523072e-05,
      "loss": 2.8543,
      "step": 201213
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.13097882270813,
      "learning_rate": 2.344837849567024e-05,
      "loss": 2.9499,
      "step": 201214
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.724487066268921,
      "learning_rate": 2.344679312723361e-05,
      "loss": 2.7726,
      "step": 201215
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.5961503982543945,
      "learning_rate": 2.3445207810213383e-05,
      "loss": 2.9186,
      "step": 201216
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.915672779083252,
      "learning_rate": 2.344362254461003e-05,
      "loss": 3.0581,
      "step": 201217
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.830021858215332,
      "learning_rate": 2.3442037330423648e-05,
      "loss": 2.8769,
      "step": 201218
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.4550116062164307,
      "learning_rate": 2.3440452167654633e-05,
      "loss": 3.0239,
      "step": 201219
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.665142774581909,
      "learning_rate": 2.343886705630319e-05,
      "loss": 2.9443,
      "step": 201220
    },
    {
      "epoch": 2.62,
      "grad_norm": 5.69351053237915,
      "learning_rate": 2.3437281996369784e-05,
      "loss": 2.8631,
      "step": 201221
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.3788628578186035,
      "learning_rate": 2.343569698785451e-05,
      "loss": 3.2371,
      "step": 201222
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.7295844554901123,
      "learning_rate": 2.343411203075788e-05,
      "loss": 2.8302,
      "step": 201223
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.855830192565918,
      "learning_rate": 2.343252712507998e-05,
      "loss": 2.9783,
      "step": 201224
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6606805324554443,
      "learning_rate": 2.343094227082122e-05,
      "loss": 3.1753,
      "step": 201225
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8731930255889893,
      "learning_rate": 2.3429357467981857e-05,
      "loss": 2.6837,
      "step": 201226
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9093475341796875,
      "learning_rate": 2.34277727165622e-05,
      "loss": 3.2432,
      "step": 201227
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.4351489543914795,
      "learning_rate": 2.342618801656251e-05,
      "loss": 2.7596,
      "step": 201228
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7797958850860596,
      "learning_rate": 2.342460336798322e-05,
      "loss": 2.8936,
      "step": 201229
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.182978391647339,
      "learning_rate": 2.3423018770824433e-05,
      "loss": 2.7775,
      "step": 201230
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0292561054229736,
      "learning_rate": 2.3421434225086543e-05,
      "loss": 2.9177,
      "step": 201231
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1210317611694336,
      "learning_rate": 2.341984973076979e-05,
      "loss": 2.9328,
      "step": 201232
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8518013954162598,
      "learning_rate": 2.3418265287874572e-05,
      "loss": 3.1439,
      "step": 201233
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.7525017261505127,
      "learning_rate": 2.341668089640105e-05,
      "loss": 2.8871,
      "step": 201234
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.130876064300537,
      "learning_rate": 2.34150965563497e-05,
      "loss": 3.0423,
      "step": 201235
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.923011302947998,
      "learning_rate": 2.3413512267720614e-05,
      "loss": 2.899,
      "step": 201236
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7096171379089355,
      "learning_rate": 2.3411928030514194e-05,
      "loss": 2.7841,
      "step": 201237
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.742269992828369,
      "learning_rate": 2.3410343844730704e-05,
      "loss": 2.7749,
      "step": 201238
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.242398738861084,
      "learning_rate": 2.3408759710370483e-05,
      "loss": 2.908,
      "step": 201239
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.942739248275757,
      "learning_rate": 2.3407175627433727e-05,
      "loss": 3.0915,
      "step": 201240
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.829489231109619,
      "learning_rate": 2.340559159592087e-05,
      "loss": 2.9873,
      "step": 201241
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.633805274963379,
      "learning_rate": 2.3404007615832144e-05,
      "loss": 2.6861,
      "step": 201242
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9903390407562256,
      "learning_rate": 2.3402423687167816e-05,
      "loss": 2.8288,
      "step": 201243
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.332801103591919,
      "learning_rate": 2.3400839809928186e-05,
      "loss": 2.9146,
      "step": 201244
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9404854774475098,
      "learning_rate": 2.3399255984113553e-05,
      "loss": 2.9639,
      "step": 201245
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.008755207061768,
      "learning_rate": 2.3397672209724217e-05,
      "loss": 2.8978,
      "step": 201246
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.629824161529541,
      "learning_rate": 2.3396088486760512e-05,
      "loss": 3.0254,
      "step": 201247
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8198859691619873,
      "learning_rate": 2.339450481522267e-05,
      "loss": 2.7614,
      "step": 201248
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.981541395187378,
      "learning_rate": 2.3392921195111026e-05,
      "loss": 2.8391,
      "step": 201249
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8563036918640137,
      "learning_rate": 2.339133762642581e-05,
      "loss": 3.1954,
      "step": 201250
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.521127939224243,
      "learning_rate": 2.3389754109167424e-05,
      "loss": 2.9221,
      "step": 201251
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.3125274181365967,
      "learning_rate": 2.3388170643336067e-05,
      "loss": 2.8662,
      "step": 201252
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7562172412872314,
      "learning_rate": 2.3386587228932106e-05,
      "loss": 3.0113,
      "step": 201253
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.6605618000030518,
      "learning_rate": 2.3385003865955777e-05,
      "loss": 2.9748,
      "step": 201254
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.63417387008667,
      "learning_rate": 2.3383420554407372e-05,
      "loss": 2.5864,
      "step": 201255
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.512075424194336,
      "learning_rate": 2.3381837294287264e-05,
      "loss": 3.0621,
      "step": 201256
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9038186073303223,
      "learning_rate": 2.3380254085595686e-05,
      "loss": 2.8967,
      "step": 201257
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.548168420791626,
      "learning_rate": 2.3378670928332865e-05,
      "loss": 2.9697,
      "step": 201258
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1410651206970215,
      "learning_rate": 2.3377087822499273e-05,
      "loss": 2.8508,
      "step": 201259
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9030141830444336,
      "learning_rate": 2.3375504768095078e-05,
      "loss": 3.1833,
      "step": 201260
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.955688238143921,
      "learning_rate": 2.337392176512054e-05,
      "loss": 2.9952,
      "step": 201261
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.351241111755371,
      "learning_rate": 2.3372338813576064e-05,
      "loss": 3.1804,
      "step": 201262
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.603466749191284,
      "learning_rate": 2.337075591346188e-05,
      "loss": 2.8838,
      "step": 201263
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9589457511901855,
      "learning_rate": 2.3369173064778257e-05,
      "loss": 2.9222,
      "step": 201264
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1661267280578613,
      "learning_rate": 2.3367590267525594e-05,
      "loss": 2.8674,
      "step": 201265
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.6685352325439453,
      "learning_rate": 2.3366007521704088e-05,
      "loss": 3.0304,
      "step": 201266
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.7016777992248535,
      "learning_rate": 2.3364424827314042e-05,
      "loss": 2.9897,
      "step": 201267
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1371560096740723,
      "learning_rate": 2.336284218435579e-05,
      "loss": 2.8462,
      "step": 201268
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2202742099761963,
      "learning_rate": 2.3361259592829595e-05,
      "loss": 2.7906,
      "step": 201269
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2177939414978027,
      "learning_rate": 2.3359677052735793e-05,
      "loss": 2.8414,
      "step": 201270
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.643589973449707,
      "learning_rate": 2.335809456407465e-05,
      "loss": 2.7471,
      "step": 201271
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.491529941558838,
      "learning_rate": 2.3356512126846462e-05,
      "loss": 2.8952,
      "step": 201272
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.441495418548584,
      "learning_rate": 2.3354929741051465e-05,
      "loss": 3.0581,
      "step": 201273
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.088477373123169,
      "learning_rate": 2.335334740669006e-05,
      "loss": 2.7845,
      "step": 201274
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.110346555709839,
      "learning_rate": 2.3351765123762444e-05,
      "loss": 2.8468,
      "step": 201275
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7698798179626465,
      "learning_rate": 2.335018289226899e-05,
      "loss": 2.9412,
      "step": 201276
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1474921703338623,
      "learning_rate": 2.334860071220992e-05,
      "loss": 3.2121,
      "step": 201277
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7609686851501465,
      "learning_rate": 2.3347018583585675e-05,
      "loss": 2.9035,
      "step": 201278
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.67598819732666,
      "learning_rate": 2.334543650639632e-05,
      "loss": 2.9957,
      "step": 201279
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0044374465942383,
      "learning_rate": 2.3343854480642354e-05,
      "loss": 2.9785,
      "step": 201280
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.889500141143799,
      "learning_rate": 2.334227250632391e-05,
      "loss": 2.9312,
      "step": 201281
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.5000195503234863,
      "learning_rate": 2.3340690583441425e-05,
      "loss": 3.0579,
      "step": 201282
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1602604389190674,
      "learning_rate": 2.333910871199506e-05,
      "loss": 2.8134,
      "step": 201283
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.577054262161255,
      "learning_rate": 2.3337526891985314e-05,
      "loss": 3.239,
      "step": 201284
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.4548940658569336,
      "learning_rate": 2.3335945123412224e-05,
      "loss": 3.054,
      "step": 201285
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.974213123321533,
      "learning_rate": 2.3334363406276223e-05,
      "loss": 2.8146,
      "step": 201286
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.090423345565796,
      "learning_rate": 2.3332781740577578e-05,
      "loss": 2.9614,
      "step": 201287
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.969352960586548,
      "learning_rate": 2.333120012631662e-05,
      "loss": 2.7905,
      "step": 201288
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.89164662361145,
      "learning_rate": 2.3329618563493578e-05,
      "loss": 2.9995,
      "step": 201289
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1979846954345703,
      "learning_rate": 2.3328037052108893e-05,
      "loss": 2.7334,
      "step": 201290
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.86979341506958,
      "learning_rate": 2.332645559216263e-05,
      "loss": 3.1517,
      "step": 201291
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.924959897994995,
      "learning_rate": 2.3324874183655252e-05,
      "loss": 2.8491,
      "step": 201292
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.145613431930542,
      "learning_rate": 2.332329282658696e-05,
      "loss": 2.9716,
      "step": 201293
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2922348976135254,
      "learning_rate": 2.3321711520958118e-05,
      "loss": 2.7001,
      "step": 201294
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.140692949295044,
      "learning_rate": 2.3320130266768967e-05,
      "loss": 2.7733,
      "step": 201295
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0379011631011963,
      "learning_rate": 2.3318549064019897e-05,
      "loss": 3.1323,
      "step": 201296
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.098193407058716,
      "learning_rate": 2.3316967912711083e-05,
      "loss": 2.9511,
      "step": 201297
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.014437675476074,
      "learning_rate": 2.3315386812842885e-05,
      "loss": 2.8208,
      "step": 201298
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.204014778137207,
      "learning_rate": 2.3313805764415506e-05,
      "loss": 2.6797,
      "step": 201299
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.4291632175445557,
      "learning_rate": 2.331222476742941e-05,
      "loss": 2.9497,
      "step": 201300
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1541450023651123,
      "learning_rate": 2.33106438218847e-05,
      "loss": 2.7927,
      "step": 201301
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.983217239379883,
      "learning_rate": 2.330906292778191e-05,
      "loss": 2.9581,
      "step": 201302
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1366119384765625,
      "learning_rate": 2.3307482085121066e-05,
      "loss": 2.6679,
      "step": 201303
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.233605861663818,
      "learning_rate": 2.330590129390264e-05,
      "loss": 2.8652,
      "step": 201304
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.366121292114258,
      "learning_rate": 2.3304320554126797e-05,
      "loss": 2.9176,
      "step": 201305
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0473361015319824,
      "learning_rate": 2.3302739865793975e-05,
      "loss": 2.8893,
      "step": 201306
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.593486785888672,
      "learning_rate": 2.3301159228904333e-05,
      "loss": 2.9613,
      "step": 201307
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.657626152038574,
      "learning_rate": 2.329957864345834e-05,
      "loss": 2.9139,
      "step": 201308
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.896101713180542,
      "learning_rate": 2.3297998109456095e-05,
      "loss": 3.1565,
      "step": 201309
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7858006954193115,
      "learning_rate": 2.3296417626898e-05,
      "loss": 2.6221,
      "step": 201310
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1738994121551514,
      "learning_rate": 2.3294837195784287e-05,
      "loss": 2.8812,
      "step": 201311
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.75176739692688,
      "learning_rate": 2.3293256816115326e-05,
      "loss": 2.8566,
      "step": 201312
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2475273609161377,
      "learning_rate": 2.329167648789131e-05,
      "loss": 2.9629,
      "step": 201313
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.7645740509033203,
      "learning_rate": 2.3290096211112674e-05,
      "loss": 2.8816,
      "step": 201314
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8578639030456543,
      "learning_rate": 2.328851598577962e-05,
      "loss": 2.8419,
      "step": 201315
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.977374792098999,
      "learning_rate": 2.328693581189245e-05,
      "loss": 2.9334,
      "step": 201316
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.5557522773742676,
      "learning_rate": 2.3285355689451425e-05,
      "loss": 2.712,
      "step": 201317
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1091275215148926,
      "learning_rate": 2.3283775618456912e-05,
      "loss": 2.8511,
      "step": 201318
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9008865356445312,
      "learning_rate": 2.3282195598909114e-05,
      "loss": 2.9443,
      "step": 201319
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.5361344814300537,
      "learning_rate": 2.3280615630808465e-05,
      "loss": 2.9139,
      "step": 201320
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0666096210479736,
      "learning_rate": 2.3279035714155126e-05,
      "loss": 2.7019,
      "step": 201321
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.4776971340179443,
      "learning_rate": 2.3277455848949467e-05,
      "loss": 2.8772,
      "step": 201322
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.764549255371094,
      "learning_rate": 2.3275876035191688e-05,
      "loss": 2.7517,
      "step": 201323
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.664288282394409,
      "learning_rate": 2.327429627288222e-05,
      "loss": 2.794,
      "step": 201324
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.795281171798706,
      "learning_rate": 2.32727165620212e-05,
      "loss": 2.9253,
      "step": 201325
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.040964126586914,
      "learning_rate": 2.327113690260909e-05,
      "loss": 2.8319,
      "step": 201326
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.741276502609253,
      "learning_rate": 2.3269557294646056e-05,
      "loss": 2.9592,
      "step": 201327
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9465627670288086,
      "learning_rate": 2.326797773813247e-05,
      "loss": 3.028,
      "step": 201328
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8950533866882324,
      "learning_rate": 2.3266398233068528e-05,
      "loss": 2.9482,
      "step": 201329
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8704326152801514,
      "learning_rate": 2.3264818779454664e-05,
      "loss": 2.7456,
      "step": 201330
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8731162548065186,
      "learning_rate": 2.326323937729101e-05,
      "loss": 3.1188,
      "step": 201331
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.127368450164795,
      "learning_rate": 2.3261660026578e-05,
      "loss": 2.876,
      "step": 201332
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8340001106262207,
      "learning_rate": 2.3260080727315865e-05,
      "loss": 3.1173,
      "step": 201333
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.5583302974700928,
      "learning_rate": 2.3258501479504908e-05,
      "loss": 2.8805,
      "step": 201334
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6496798992156982,
      "learning_rate": 2.3256922283145395e-05,
      "loss": 2.9736,
      "step": 201335
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.673487424850464,
      "learning_rate": 2.325534313823766e-05,
      "loss": 2.8772,
      "step": 201336
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.551271677017212,
      "learning_rate": 2.3253764044781963e-05,
      "loss": 2.9904,
      "step": 201337
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.104970455169678,
      "learning_rate": 2.3252185002778644e-05,
      "loss": 2.6843,
      "step": 201338
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.238595962524414,
      "learning_rate": 2.3250606012227968e-05,
      "loss": 2.6694,
      "step": 201339
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1288046836853027,
      "learning_rate": 2.32490270731302e-05,
      "loss": 2.9737,
      "step": 201340
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0437240600585938,
      "learning_rate": 2.3247448185485672e-05,
      "loss": 2.8421,
      "step": 201341
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7034173011779785,
      "learning_rate": 2.324586934929469e-05,
      "loss": 2.6942,
      "step": 201342
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.362248182296753,
      "learning_rate": 2.3244290564557478e-05,
      "loss": 2.9455,
      "step": 201343
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.488879919052124,
      "learning_rate": 2.3242711831274442e-05,
      "loss": 2.9033,
      "step": 201344
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.19158935546875,
      "learning_rate": 2.3241133149445777e-05,
      "loss": 2.8536,
      "step": 201345
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.834319829940796,
      "learning_rate": 2.323955451907179e-05,
      "loss": 2.9303,
      "step": 201346
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.18575382232666,
      "learning_rate": 2.323797594015281e-05,
      "loss": 2.895,
      "step": 201347
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.966221332550049,
      "learning_rate": 2.3236397412689134e-05,
      "loss": 2.9604,
      "step": 201348
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.797708511352539,
      "learning_rate": 2.3234818936681e-05,
      "loss": 2.6983,
      "step": 201349
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.566718578338623,
      "learning_rate": 2.323324051212877e-05,
      "loss": 2.868,
      "step": 201350
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.4327986240386963,
      "learning_rate": 2.3231662139032716e-05,
      "loss": 2.7288,
      "step": 201351
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.5465636253356934,
      "learning_rate": 2.323008381739303e-05,
      "loss": 2.9496,
      "step": 201352
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.337139368057251,
      "learning_rate": 2.3228505547210186e-05,
      "loss": 3.0307,
      "step": 201353
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.822253704071045,
      "learning_rate": 2.3226927328484347e-05,
      "loss": 3.0597,
      "step": 201354
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1813175678253174,
      "learning_rate": 2.322534916121588e-05,
      "loss": 2.7756,
      "step": 201355
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9399571418762207,
      "learning_rate": 2.3223771045405015e-05,
      "loss": 2.9053,
      "step": 201356
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8109309673309326,
      "learning_rate": 2.3222192981052124e-05,
      "loss": 3.2259,
      "step": 201357
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.250786304473877,
      "learning_rate": 2.322061496815737e-05,
      "loss": 2.9434,
      "step": 201358
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.4805359840393066,
      "learning_rate": 2.3219037006721186e-05,
      "loss": 2.7635,
      "step": 201359
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.3457794189453125,
      "learning_rate": 2.3217459096743774e-05,
      "loss": 2.8024,
      "step": 201360
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9226694107055664,
      "learning_rate": 2.3215881238225496e-05,
      "loss": 2.8965,
      "step": 201361
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.156057834625244,
      "learning_rate": 2.3214303431166625e-05,
      "loss": 2.7614,
      "step": 201362
    },
    {
      "epoch": 2.62,
      "grad_norm": 5.529271602630615,
      "learning_rate": 2.3212725675567425e-05,
      "loss": 2.8726,
      "step": 201363
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.879068613052368,
      "learning_rate": 2.3211147971428156e-05,
      "loss": 3.1343,
      "step": 201364
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.848815441131592,
      "learning_rate": 2.3209570318749194e-05,
      "loss": 2.8552,
      "step": 201365
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7292613983154297,
      "learning_rate": 2.3207992717530765e-05,
      "loss": 2.9381,
      "step": 201366
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8906917572021484,
      "learning_rate": 2.320641516777324e-05,
      "loss": 2.9987,
      "step": 201367
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0865821838378906,
      "learning_rate": 2.320483766947685e-05,
      "loss": 2.8794,
      "step": 201368
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.846055507659912,
      "learning_rate": 2.3203260222641962e-05,
      "loss": 3.2501,
      "step": 201369
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0934314727783203,
      "learning_rate": 2.320168282726874e-05,
      "loss": 2.854,
      "step": 201370
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.825124740600586,
      "learning_rate": 2.3200105483357588e-05,
      "loss": 3.1048,
      "step": 201371
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.308091402053833,
      "learning_rate": 2.3198528190908705e-05,
      "loss": 3.245,
      "step": 201372
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.567852020263672,
      "learning_rate": 2.3196950949922488e-05,
      "loss": 2.688,
      "step": 201373
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.3657710552215576,
      "learning_rate": 2.319537376039914e-05,
      "loss": 3.0583,
      "step": 201374
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.4544668197631836,
      "learning_rate": 2.3193796622339122e-05,
      "loss": 2.7586,
      "step": 201375
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.624877452850342,
      "learning_rate": 2.3192219535742474e-05,
      "loss": 3.0344,
      "step": 201376
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7473714351654053,
      "learning_rate": 2.3190642500609658e-05,
      "loss": 2.9841,
      "step": 201377
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2729384899139404,
      "learning_rate": 2.3189065516940907e-05,
      "loss": 3.0418,
      "step": 201378
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.867372989654541,
      "learning_rate": 2.318748858473656e-05,
      "loss": 3.1662,
      "step": 201379
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.751539468765259,
      "learning_rate": 2.318591170399684e-05,
      "loss": 2.8905,
      "step": 201380
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0299408435821533,
      "learning_rate": 2.3184334874722153e-05,
      "loss": 2.7118,
      "step": 201381
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.27006196975708,
      "learning_rate": 2.31827580969127e-05,
      "loss": 3.4066,
      "step": 201382
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.7586874961853027,
      "learning_rate": 2.3181181370568812e-05,
      "loss": 2.9045,
      "step": 201383
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.802495241165161,
      "learning_rate": 2.317960469569069e-05,
      "loss": 2.9445,
      "step": 201384
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.912393093109131,
      "learning_rate": 2.31780280722788e-05,
      "loss": 2.9509,
      "step": 201385
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9740889072418213,
      "learning_rate": 2.3176451500333238e-05,
      "loss": 3.0982,
      "step": 201386
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8642327785491943,
      "learning_rate": 2.3174874979854476e-05,
      "loss": 3.0249,
      "step": 201387
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2373688220977783,
      "learning_rate": 2.3173298510842743e-05,
      "loss": 2.8856,
      "step": 201388
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6653451919555664,
      "learning_rate": 2.3171722093298305e-05,
      "loss": 2.9983,
      "step": 201389
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1084084510803223,
      "learning_rate": 2.3170145727221435e-05,
      "loss": 2.9751,
      "step": 201390
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8208112716674805,
      "learning_rate": 2.3168569412612492e-05,
      "loss": 3.022,
      "step": 201391
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.84385347366333,
      "learning_rate": 2.316699314947168e-05,
      "loss": 2.8513,
      "step": 201392
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0965378284454346,
      "learning_rate": 2.316541693779943e-05,
      "loss": 3.2885,
      "step": 201393
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2938504219055176,
      "learning_rate": 2.3163840777595944e-05,
      "loss": 3.0062,
      "step": 201394
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.403174638748169,
      "learning_rate": 2.316226466886152e-05,
      "loss": 3.0965,
      "step": 201395
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.917449951171875,
      "learning_rate": 2.316068861159639e-05,
      "loss": 2.9979,
      "step": 201396
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.924539804458618,
      "learning_rate": 2.315911260580099e-05,
      "loss": 2.7197,
      "step": 201397
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.859485626220703,
      "learning_rate": 2.315753665147545e-05,
      "loss": 2.8812,
      "step": 201398
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.968920946121216,
      "learning_rate": 2.315596074862024e-05,
      "loss": 2.8408,
      "step": 201399
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.674583673477173,
      "learning_rate": 2.3154384897235556e-05,
      "loss": 2.9301,
      "step": 201400
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.755737781524658,
      "learning_rate": 2.3152809097321667e-05,
      "loss": 2.9445,
      "step": 201401
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8390679359436035,
      "learning_rate": 2.315123334887887e-05,
      "loss": 2.9759,
      "step": 201402
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1514790058135986,
      "learning_rate": 2.314965765190754e-05,
      "loss": 2.8819,
      "step": 201403
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.7710978984832764,
      "learning_rate": 2.3148082006407866e-05,
      "loss": 2.9524,
      "step": 201404
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.4316365718841553,
      "learning_rate": 2.314650641238025e-05,
      "loss": 3.0689,
      "step": 201405
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8389573097229004,
      "learning_rate": 2.3144930869824894e-05,
      "loss": 2.903,
      "step": 201406
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1404316425323486,
      "learning_rate": 2.3143355378742135e-05,
      "loss": 2.745,
      "step": 201407
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7498350143432617,
      "learning_rate": 2.31417799391322e-05,
      "loss": 2.7893,
      "step": 201408
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8271236419677734,
      "learning_rate": 2.3140204550995488e-05,
      "loss": 2.8742,
      "step": 201409
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.035122871398926,
      "learning_rate": 2.3138629214332172e-05,
      "loss": 2.9273,
      "step": 201410
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.320256233215332,
      "learning_rate": 2.313705392914268e-05,
      "loss": 2.8939,
      "step": 201411
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6611061096191406,
      "learning_rate": 2.3135478695427245e-05,
      "loss": 2.8266,
      "step": 201412
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1752777099609375,
      "learning_rate": 2.3133903513186136e-05,
      "loss": 2.7281,
      "step": 201413
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.158635139465332,
      "learning_rate": 2.313232838241962e-05,
      "loss": 2.9342,
      "step": 201414
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7212960720062256,
      "learning_rate": 2.3130753303128057e-05,
      "loss": 3.0841,
      "step": 201415
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.6302664279937744,
      "learning_rate": 2.312917827531169e-05,
      "loss": 3.059,
      "step": 201416
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8721365928649902,
      "learning_rate": 2.3127603298970877e-05,
      "loss": 2.9232,
      "step": 201417
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.861649513244629,
      "learning_rate": 2.3126028374105855e-05,
      "loss": 2.9155,
      "step": 201418
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6773324012756348,
      "learning_rate": 2.3124453500716923e-05,
      "loss": 2.9184,
      "step": 201419
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.1419477462768555,
      "learning_rate": 2.312287867880438e-05,
      "loss": 3.0337,
      "step": 201420
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9931046962738037,
      "learning_rate": 2.3121303908368527e-05,
      "loss": 2.7279,
      "step": 201421
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.4100141525268555,
      "learning_rate": 2.31197291894096e-05,
      "loss": 2.8267,
      "step": 201422
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0336456298828125,
      "learning_rate": 2.3118154521928023e-05,
      "loss": 2.8967,
      "step": 201423
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2823691368103027,
      "learning_rate": 2.311657990592397e-05,
      "loss": 2.9978,
      "step": 201424
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8798916339874268,
      "learning_rate": 2.3115005341397807e-05,
      "loss": 3.1999,
      "step": 201425
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.024385929107666,
      "learning_rate": 2.311343082834973e-05,
      "loss": 2.7844,
      "step": 201426
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1667165756225586,
      "learning_rate": 2.3111856366780114e-05,
      "loss": 2.8034,
      "step": 201427
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.924875259399414,
      "learning_rate": 2.3110281956689215e-05,
      "loss": 3.0682,
      "step": 201428
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.087451934814453,
      "learning_rate": 2.310870759807737e-05,
      "loss": 2.9393,
      "step": 201429
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.510979652404785,
      "learning_rate": 2.310713329094488e-05,
      "loss": 2.8934,
      "step": 201430
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8935277462005615,
      "learning_rate": 2.3105559035291908e-05,
      "loss": 2.7979,
      "step": 201431
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.939028263092041,
      "learning_rate": 2.3103984831118893e-05,
      "loss": 2.8016,
      "step": 201432
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7149980068206787,
      "learning_rate": 2.3102410678426097e-05,
      "loss": 2.9159,
      "step": 201433
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8253400325775146,
      "learning_rate": 2.310083657721372e-05,
      "loss": 2.7705,
      "step": 201434
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1102638244628906,
      "learning_rate": 2.30992625274822e-05,
      "loss": 2.6664,
      "step": 201435
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.189864158630371,
      "learning_rate": 2.309768852923173e-05,
      "loss": 2.8102,
      "step": 201436
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0921120643615723,
      "learning_rate": 2.3096114582462578e-05,
      "loss": 2.8701,
      "step": 201437
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.780369997024536,
      "learning_rate": 2.3094540687175143e-05,
      "loss": 2.9627,
      "step": 201438
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.818807363510132,
      "learning_rate": 2.3092966843369632e-05,
      "loss": 3.083,
      "step": 201439
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0801610946655273,
      "learning_rate": 2.309139305104637e-05,
      "loss": 2.7652,
      "step": 201440
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.3873164653778076,
      "learning_rate": 2.308981931020566e-05,
      "loss": 2.7696,
      "step": 201441
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9960668087005615,
      "learning_rate": 2.30882456208478e-05,
      "loss": 2.8111,
      "step": 201442
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.599503517150879,
      "learning_rate": 2.308667198297299e-05,
      "loss": 2.9169,
      "step": 201443
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1916418075561523,
      "learning_rate": 2.308509839658167e-05,
      "loss": 2.9305,
      "step": 201444
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.7059550285339355,
      "learning_rate": 2.3083524861673996e-05,
      "loss": 2.8051,
      "step": 201445
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.894859790802002,
      "learning_rate": 2.308195137825034e-05,
      "loss": 3.0426,
      "step": 201446
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.808180809020996,
      "learning_rate": 2.3080377946311003e-05,
      "loss": 2.9904,
      "step": 201447
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.923161268234253,
      "learning_rate": 2.3078804565856214e-05,
      "loss": 2.7718,
      "step": 201448
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8483333587646484,
      "learning_rate": 2.307723123688634e-05,
      "loss": 3.0857,
      "step": 201449
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.181800603866577,
      "learning_rate": 2.307565795940165e-05,
      "loss": 2.8855,
      "step": 201450
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.751708745956421,
      "learning_rate": 2.3074084733402342e-05,
      "loss": 3.0299,
      "step": 201451
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9073774814605713,
      "learning_rate": 2.3072511558888852e-05,
      "loss": 3.0516,
      "step": 201452
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1104624271392822,
      "learning_rate": 2.3070938435861376e-05,
      "loss": 2.9544,
      "step": 201453
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6694719791412354,
      "learning_rate": 2.306936536432028e-05,
      "loss": 2.9946,
      "step": 201454
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.840662956237793,
      "learning_rate": 2.3067792344265802e-05,
      "loss": 3.1464,
      "step": 201455
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.6288387775421143,
      "learning_rate": 2.306621937569827e-05,
      "loss": 3.0032,
      "step": 201456
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1456687450408936,
      "learning_rate": 2.3064646458617887e-05,
      "loss": 2.9867,
      "step": 201457
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.103497266769409,
      "learning_rate": 2.3063073593025082e-05,
      "loss": 2.8406,
      "step": 201458
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9452643394470215,
      "learning_rate": 2.306150077892003e-05,
      "loss": 2.8407,
      "step": 201459
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.437523603439331,
      "learning_rate": 2.3059928016303086e-05,
      "loss": 2.8477,
      "step": 201460
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.5938720703125,
      "learning_rate": 2.305835530517456e-05,
      "loss": 2.8858,
      "step": 201461
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.032766819000244,
      "learning_rate": 2.3056782645534712e-05,
      "loss": 2.8969,
      "step": 201462
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1016111373901367,
      "learning_rate": 2.3055210037383775e-05,
      "loss": 2.8145,
      "step": 201463
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.4636361598968506,
      "learning_rate": 2.3053637480722155e-05,
      "loss": 2.9518,
      "step": 201464
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1841912269592285,
      "learning_rate": 2.3052064975550045e-05,
      "loss": 2.7744,
      "step": 201465
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.685025691986084,
      "learning_rate": 2.3050492521867848e-05,
      "loss": 2.8147,
      "step": 201466
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.879221200942993,
      "learning_rate": 2.3048920119675795e-05,
      "loss": 2.9668,
      "step": 201467
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7931392192840576,
      "learning_rate": 2.3047347768974156e-05,
      "loss": 2.8048,
      "step": 201468
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0344691276550293,
      "learning_rate": 2.3045775469763196e-05,
      "loss": 2.7544,
      "step": 201469
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9188497066497803,
      "learning_rate": 2.304420322204331e-05,
      "loss": 2.8202,
      "step": 201470
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.933903932571411,
      "learning_rate": 2.3042631025814674e-05,
      "loss": 3.06,
      "step": 201471
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.780677556991577,
      "learning_rate": 2.304105888107768e-05,
      "loss": 2.8599,
      "step": 201472
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.9523978233337402,
      "learning_rate": 2.3039486787832595e-05,
      "loss": 2.9427,
      "step": 201473
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9314379692077637,
      "learning_rate": 2.3037914746079722e-05,
      "loss": 2.8928,
      "step": 201474
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.901991844177246,
      "learning_rate": 2.303634275581926e-05,
      "loss": 2.7517,
      "step": 201475
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.5108909606933594,
      "learning_rate": 2.3034770817051606e-05,
      "loss": 3.1032,
      "step": 201476
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.7340281009674072,
      "learning_rate": 2.3033198929776963e-05,
      "loss": 2.9357,
      "step": 201477
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.48850417137146,
      "learning_rate": 2.3031627093995763e-05,
      "loss": 3.0801,
      "step": 201478
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.616351366043091,
      "learning_rate": 2.3030055309708173e-05,
      "loss": 2.9764,
      "step": 201479
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.8122546672821045,
      "learning_rate": 2.302848357691456e-05,
      "loss": 2.8429,
      "step": 201480
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.8788669109344482,
      "learning_rate": 2.3026911895615086e-05,
      "loss": 3.0659,
      "step": 201481
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0436148643493652,
      "learning_rate": 2.3025340265810222e-05,
      "loss": 2.8001,
      "step": 201482
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2862234115600586,
      "learning_rate": 2.3023768687500133e-05,
      "loss": 2.9826,
      "step": 201483
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.5986380577087402,
      "learning_rate": 2.3022197160685186e-05,
      "loss": 2.8971,
      "step": 201484
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.98945951461792,
      "learning_rate": 2.3020625685365613e-05,
      "loss": 2.9677,
      "step": 201485
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6177175045013428,
      "learning_rate": 2.301905426154178e-05,
      "loss": 2.9175,
      "step": 201486
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.77841854095459,
      "learning_rate": 2.3017482889213855e-05,
      "loss": 3.0605,
      "step": 201487
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.247488021850586,
      "learning_rate": 2.3015911568382273e-05,
      "loss": 2.6623,
      "step": 201488
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.711183547973633,
      "learning_rate": 2.3014340299047195e-05,
      "loss": 2.9663,
      "step": 201489
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2290475368499756,
      "learning_rate": 2.301276908120906e-05,
      "loss": 2.884,
      "step": 201490
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.965947151184082,
      "learning_rate": 2.301119791486806e-05,
      "loss": 3.1392,
      "step": 201491
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.6836929321289062,
      "learning_rate": 2.3009626800024504e-05,
      "loss": 2.9464,
      "step": 201492
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1059067249298096,
      "learning_rate": 2.3008055736678653e-05,
      "loss": 2.929,
      "step": 201493
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.958343029022217,
      "learning_rate": 2.3006484724830877e-05,
      "loss": 2.8394,
      "step": 201494
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.9136197566986084,
      "learning_rate": 2.300491376448137e-05,
      "loss": 2.9,
      "step": 201495
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7793052196502686,
      "learning_rate": 2.3003342855630503e-05,
      "loss": 2.8047,
      "step": 201496
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0391905307769775,
      "learning_rate": 2.3001771998278572e-05,
      "loss": 2.9713,
      "step": 201497
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.3092973232269287,
      "learning_rate": 2.3000201192425848e-05,
      "loss": 3.0924,
      "step": 201498
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0140111446380615,
      "learning_rate": 2.299863043807253e-05,
      "loss": 2.8813,
      "step": 201499
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.801558494567871,
      "learning_rate": 2.299705973521908e-05,
      "loss": 2.8129,
      "step": 201500
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2769153118133545,
      "learning_rate": 2.2995489083865637e-05,
      "loss": 3.0026,
      "step": 201501
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.2960245609283447,
      "learning_rate": 2.299391848401263e-05,
      "loss": 3.1361,
      "step": 201502
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.3552744388580322,
      "learning_rate": 2.299234793566026e-05,
      "loss": 2.9254,
      "step": 201503
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.087237358093262,
      "learning_rate": 2.299077743880886e-05,
      "loss": 2.8833,
      "step": 201504
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.195734739303589,
      "learning_rate": 2.2989206993458665e-05,
      "loss": 2.9134,
      "step": 201505
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.801161766052246,
      "learning_rate": 2.2987636599610004e-05,
      "loss": 3.0104,
      "step": 201506
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.5556294918060303,
      "learning_rate": 2.298606625726318e-05,
      "loss": 2.7711,
      "step": 201507
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.751861810684204,
      "learning_rate": 2.2984495966418493e-05,
      "loss": 2.6343,
      "step": 201508
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8191027641296387,
      "learning_rate": 2.2982925727076207e-05,
      "loss": 2.8688,
      "step": 201509
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7239255905151367,
      "learning_rate": 2.2981355539236658e-05,
      "loss": 2.9891,
      "step": 201510
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9375317096710205,
      "learning_rate": 2.297978540290001e-05,
      "loss": 3.027,
      "step": 201511
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.989945650100708,
      "learning_rate": 2.2978215318066727e-05,
      "loss": 2.8798,
      "step": 201512
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.813094139099121,
      "learning_rate": 2.2976645284736982e-05,
      "loss": 2.8712,
      "step": 201513
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0461580753326416,
      "learning_rate": 2.297507530291114e-05,
      "loss": 2.8491,
      "step": 201514
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9313511848449707,
      "learning_rate": 2.297350537258943e-05,
      "loss": 2.7843,
      "step": 201515
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.948392868041992,
      "learning_rate": 2.297193549377222e-05,
      "loss": 2.8066,
      "step": 201516
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.521016836166382,
      "learning_rate": 2.2970365666459743e-05,
      "loss": 2.8378,
      "step": 201517
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.919663190841675,
      "learning_rate": 2.2968795890652304e-05,
      "loss": 2.9179,
      "step": 201518
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.5108656883239746,
      "learning_rate": 2.296722616635016e-05,
      "loss": 2.8745,
      "step": 201519
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.304842233657837,
      "learning_rate": 2.2965656493553653e-05,
      "loss": 3.1418,
      "step": 201520
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7375035285949707,
      "learning_rate": 2.2964086872263044e-05,
      "loss": 2.7365,
      "step": 201521
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7754929065704346,
      "learning_rate": 2.29625173024787e-05,
      "loss": 3.1239,
      "step": 201522
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.592226505279541,
      "learning_rate": 2.2960947784200823e-05,
      "loss": 2.9599,
      "step": 201523
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1222877502441406,
      "learning_rate": 2.2959378317429712e-05,
      "loss": 3.1148,
      "step": 201524
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6006391048431396,
      "learning_rate": 2.2957808902165696e-05,
      "loss": 2.9938,
      "step": 201525
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1330947875976562,
      "learning_rate": 2.2956239538409082e-05,
      "loss": 2.6519,
      "step": 201526
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.379913091659546,
      "learning_rate": 2.2954670226160067e-05,
      "loss": 2.8322,
      "step": 201527
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9394633769989014,
      "learning_rate": 2.295310096541908e-05,
      "loss": 2.9346,
      "step": 201528
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7436461448669434,
      "learning_rate": 2.2951531756186326e-05,
      "loss": 2.9905,
      "step": 201529
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.747668981552124,
      "learning_rate": 2.2949962598462067e-05,
      "loss": 2.9518,
      "step": 201530
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.390989303588867,
      "learning_rate": 2.2948393492246675e-05,
      "loss": 2.9131,
      "step": 201531
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.166602611541748,
      "learning_rate": 2.2946824437540413e-05,
      "loss": 2.7444,
      "step": 201532
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.032843828201294,
      "learning_rate": 2.2945255434343546e-05,
      "loss": 2.7428,
      "step": 201533
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8143460750579834,
      "learning_rate": 2.294368648265641e-05,
      "loss": 3.0744,
      "step": 201534
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.206155300140381,
      "learning_rate": 2.294211758247927e-05,
      "loss": 2.8471,
      "step": 201535
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1585211753845215,
      "learning_rate": 2.2940548733812357e-05,
      "loss": 2.9118,
      "step": 201536
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.861281633377075,
      "learning_rate": 2.293897993665611e-05,
      "loss": 2.9717,
      "step": 201537
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8212220668792725,
      "learning_rate": 2.2937411191010658e-05,
      "loss": 3.0197,
      "step": 201538
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.4831693172454834,
      "learning_rate": 2.293584249687643e-05,
      "loss": 2.8038,
      "step": 201539
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.4694416522979736,
      "learning_rate": 2.293427385425367e-05,
      "loss": 2.9646,
      "step": 201540
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.025873899459839,
      "learning_rate": 2.2932705263142635e-05,
      "loss": 2.777,
      "step": 201541
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.8336071968078613,
      "learning_rate": 2.293113672354363e-05,
      "loss": 3.2681,
      "step": 201542
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9038660526275635,
      "learning_rate": 2.2929568235456986e-05,
      "loss": 2.8965,
      "step": 201543
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.045841693878174,
      "learning_rate": 2.29279997988829e-05,
      "loss": 2.7163,
      "step": 201544
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0741825103759766,
      "learning_rate": 2.2926431413821778e-05,
      "loss": 2.8561,
      "step": 201545
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.3117446899414062,
      "learning_rate": 2.2924863080273882e-05,
      "loss": 2.8776,
      "step": 201546
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8311848640441895,
      "learning_rate": 2.2923294798239477e-05,
      "loss": 2.8849,
      "step": 201547
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1853713989257812,
      "learning_rate": 2.29217265677188e-05,
      "loss": 2.844,
      "step": 201548
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9152157306671143,
      "learning_rate": 2.2920158388712282e-05,
      "loss": 2.9897,
      "step": 201549
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0157597064971924,
      "learning_rate": 2.291859026122006e-05,
      "loss": 2.9472,
      "step": 201550
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8538169860839844,
      "learning_rate": 2.2917022185242563e-05,
      "loss": 2.7327,
      "step": 201551
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0952041149139404,
      "learning_rate": 2.291545416078002e-05,
      "loss": 2.7898,
      "step": 201552
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.261627197265625,
      "learning_rate": 2.291388618783274e-05,
      "loss": 2.8906,
      "step": 201553
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7573206424713135,
      "learning_rate": 2.291231826640092e-05,
      "loss": 2.8845,
      "step": 201554
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1269094944000244,
      "learning_rate": 2.2910750396485024e-05,
      "loss": 3.0397,
      "step": 201555
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.545013427734375,
      "learning_rate": 2.290918257808515e-05,
      "loss": 3.031,
      "step": 201556
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.513119697570801,
      "learning_rate": 2.290761481120177e-05,
      "loss": 2.9825,
      "step": 201557
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0728535652160645,
      "learning_rate": 2.290604709583508e-05,
      "loss": 3.0378,
      "step": 201558
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.266770362854004,
      "learning_rate": 2.290447943198541e-05,
      "loss": 2.9408,
      "step": 201559
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.3985235691070557,
      "learning_rate": 2.290291181965297e-05,
      "loss": 3.06,
      "step": 201560
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.808043956756592,
      "learning_rate": 2.2901344258838118e-05,
      "loss": 2.7837,
      "step": 201561
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9775259494781494,
      "learning_rate": 2.289977674954112e-05,
      "loss": 2.7896,
      "step": 201562
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.7662124633789062,
      "learning_rate": 2.2898209291762347e-05,
      "loss": 2.9128,
      "step": 201563
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.907741069793701,
      "learning_rate": 2.2896641885501998e-05,
      "loss": 2.9842,
      "step": 201564
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.7882494926452637,
      "learning_rate": 2.289507453076044e-05,
      "loss": 2.8899,
      "step": 201565
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.891228675842285,
      "learning_rate": 2.2893507227537833e-05,
      "loss": 2.8691,
      "step": 201566
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.7478432655334473,
      "learning_rate": 2.2891939975834583e-05,
      "loss": 3.0539,
      "step": 201567
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1325509548187256,
      "learning_rate": 2.2890372775650957e-05,
      "loss": 2.7533,
      "step": 201568
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.501669406890869,
      "learning_rate": 2.288880562698725e-05,
      "loss": 2.8554,
      "step": 201569
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.9521772861480713,
      "learning_rate": 2.2887238529843765e-05,
      "loss": 3.0032,
      "step": 201570
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0419137477874756,
      "learning_rate": 2.2885671484220768e-05,
      "loss": 2.8963,
      "step": 201571
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.314566135406494,
      "learning_rate": 2.2884104490118492e-05,
      "loss": 3.1274,
      "step": 201572
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.813610792160034,
      "learning_rate": 2.2882537547537372e-05,
      "loss": 3.0837,
      "step": 201573
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.608610153198242,
      "learning_rate": 2.288097065647754e-05,
      "loss": 2.8744,
      "step": 201574
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.95438814163208,
      "learning_rate": 2.2879403816939423e-05,
      "loss": 2.9383,
      "step": 201575
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.174489974975586,
      "learning_rate": 2.2877837028923262e-05,
      "loss": 2.9393,
      "step": 201576
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.654524803161621,
      "learning_rate": 2.287627029242932e-05,
      "loss": 2.9236,
      "step": 201577
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.057027816772461,
      "learning_rate": 2.28747036074579e-05,
      "loss": 3.162,
      "step": 201578
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.64003849029541,
      "learning_rate": 2.287313697400933e-05,
      "loss": 2.958,
      "step": 201579
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.547917366027832,
      "learning_rate": 2.2871570392083817e-05,
      "loss": 2.9801,
      "step": 201580
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6060001850128174,
      "learning_rate": 2.2870003861681784e-05,
      "loss": 3.0592,
      "step": 201581
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.220126628875732,
      "learning_rate": 2.286843738280337e-05,
      "loss": 2.903,
      "step": 201582
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.1527528762817383,
      "learning_rate": 2.286687095544908e-05,
      "loss": 2.9162,
      "step": 201583
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.003061532974243,
      "learning_rate": 2.2865304579618937e-05,
      "loss": 2.9331,
      "step": 201584
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.319171667098999,
      "learning_rate": 2.2863738255313447e-05,
      "loss": 2.8981,
      "step": 201585
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8432962894439697,
      "learning_rate": 2.286217198253274e-05,
      "loss": 3.2026,
      "step": 201586
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.282011032104492,
      "learning_rate": 2.286060576127725e-05,
      "loss": 2.9218,
      "step": 201587
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.498518466949463,
      "learning_rate": 2.285903959154718e-05,
      "loss": 2.8832,
      "step": 201588
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.799196720123291,
      "learning_rate": 2.285747347334289e-05,
      "loss": 2.7081,
      "step": 201589
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.341496467590332,
      "learning_rate": 2.2855907406664585e-05,
      "loss": 3.0168,
      "step": 201590
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.6917262077331543,
      "learning_rate": 2.28543413915126e-05,
      "loss": 2.9869,
      "step": 201591
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.976717233657837,
      "learning_rate": 2.2852775427887195e-05,
      "loss": 2.9628,
      "step": 201592
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.0755937099456787,
      "learning_rate": 2.2851209515788736e-05,
      "loss": 2.8189,
      "step": 201593
    },
    {
      "epoch": 2.62,
      "grad_norm": 4.063088893890381,
      "learning_rate": 2.284964365521743e-05,
      "loss": 3.091,
      "step": 201594
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.214923143386841,
      "learning_rate": 2.2848077846173706e-05,
      "loss": 2.8726,
      "step": 201595
    },
    {
      "epoch": 2.62,
      "grad_norm": 5.853288650512695,
      "learning_rate": 2.2846512088657666e-05,
      "loss": 2.9359,
      "step": 201596
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.8774709701538086,
      "learning_rate": 2.2844946382669704e-05,
      "loss": 2.963,
      "step": 201597
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.391500473022461,
      "learning_rate": 2.284338072821006e-05,
      "loss": 2.9429,
      "step": 201598
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.808635711669922,
      "learning_rate": 2.2841815125279127e-05,
      "loss": 2.8104,
      "step": 201599
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.3541083335876465,
      "learning_rate": 2.284024957387708e-05,
      "loss": 3.0266,
      "step": 201600
    },
    {
      "epoch": 2.62,
      "grad_norm": 3.392979145050049,
      "learning_rate": 2.283868407400431e-05,
      "loss": 2.827,
      "step": 201601
    },
    {
      "epoch": 2.62,
      "grad_norm": 2.993114471435547,
      "learning_rate": 2.2837118625661054e-05,
      "loss": 2.8673,
      "step": 201602
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.6776843070983887,
      "learning_rate": 2.283555322884765e-05,
      "loss": 2.7631,
      "step": 201603
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0673129558563232,
      "learning_rate": 2.283398788356425e-05,
      "loss": 2.8367,
      "step": 201604
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.701566457748413,
      "learning_rate": 2.2832422589811307e-05,
      "loss": 2.8539,
      "step": 201605
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.6371636390686035,
      "learning_rate": 2.2830857347589004e-05,
      "loss": 2.6491,
      "step": 201606
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.598435163497925,
      "learning_rate": 2.2829292156897715e-05,
      "loss": 2.7672,
      "step": 201607
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.4789772033691406,
      "learning_rate": 2.2827727017737706e-05,
      "loss": 2.8303,
      "step": 201608
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.3870084285736084,
      "learning_rate": 2.2826161930109243e-05,
      "loss": 2.8378,
      "step": 201609
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.077806234359741,
      "learning_rate": 2.2824596894012594e-05,
      "loss": 2.9922,
      "step": 201610
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9049482345581055,
      "learning_rate": 2.2823031909448118e-05,
      "loss": 2.8801,
      "step": 201611
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.103248596191406,
      "learning_rate": 2.2821466976416026e-05,
      "loss": 2.8157,
      "step": 201612
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.067970275878906,
      "learning_rate": 2.281990209491671e-05,
      "loss": 2.9187,
      "step": 201613
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1317598819732666,
      "learning_rate": 2.2818337264950404e-05,
      "loss": 2.979,
      "step": 201614
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.506908893585205,
      "learning_rate": 2.2816772486517342e-05,
      "loss": 2.7948,
      "step": 201615
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.4485340118408203,
      "learning_rate": 2.281520775961796e-05,
      "loss": 2.7427,
      "step": 201616
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7499639987945557,
      "learning_rate": 2.281364308425242e-05,
      "loss": 3.0495,
      "step": 201617
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.923675060272217,
      "learning_rate": 2.2812078460421023e-05,
      "loss": 2.9094,
      "step": 201618
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9977102279663086,
      "learning_rate": 2.281051388812414e-05,
      "loss": 3.1511,
      "step": 201619
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6025688648223877,
      "learning_rate": 2.280894936736203e-05,
      "loss": 2.8752,
      "step": 201620
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.777930974960327,
      "learning_rate": 2.2807384898134896e-05,
      "loss": 2.7902,
      "step": 201621
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.070300817489624,
      "learning_rate": 2.2805820480443172e-05,
      "loss": 2.8378,
      "step": 201622
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.601316452026367,
      "learning_rate": 2.2804256114287022e-05,
      "loss": 3.0196,
      "step": 201623
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.977477550506592,
      "learning_rate": 2.280269179966685e-05,
      "loss": 2.9548,
      "step": 201624
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7371859550476074,
      "learning_rate": 2.2801127536582854e-05,
      "loss": 3.0567,
      "step": 201625
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0260872840881348,
      "learning_rate": 2.27995633250354e-05,
      "loss": 2.8817,
      "step": 201626
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.6922430992126465,
      "learning_rate": 2.2797999165024683e-05,
      "loss": 2.9905,
      "step": 201627
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9850950241088867,
      "learning_rate": 2.2796435056551077e-05,
      "loss": 3.0421,
      "step": 201628
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.6487348079681396,
      "learning_rate": 2.279487099961481e-05,
      "loss": 3.0307,
      "step": 201629
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1395719051361084,
      "learning_rate": 2.2793306994216286e-05,
      "loss": 2.8605,
      "step": 201630
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0099494457244873,
      "learning_rate": 2.2791743040355704e-05,
      "loss": 3.0018,
      "step": 201631
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.131730556488037,
      "learning_rate": 2.279017913803336e-05,
      "loss": 3.0339,
      "step": 201632
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.857200860977173,
      "learning_rate": 2.278861528724949e-05,
      "loss": 2.9984,
      "step": 201633
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1847028732299805,
      "learning_rate": 2.2787051488004528e-05,
      "loss": 3.0411,
      "step": 201634
    },
    {
      "epoch": 2.63,
      "grad_norm": 5.092190742492676,
      "learning_rate": 2.2785487740298602e-05,
      "loss": 2.5393,
      "step": 201635
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.914492607116699,
      "learning_rate": 2.2783924044132185e-05,
      "loss": 3.0292,
      "step": 201636
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.514922857284546,
      "learning_rate": 2.278236039950544e-05,
      "loss": 2.9522,
      "step": 201637
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.4250028133392334,
      "learning_rate": 2.2780796806418666e-05,
      "loss": 2.8853,
      "step": 201638
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8811891078948975,
      "learning_rate": 2.2779233264872167e-05,
      "loss": 2.8052,
      "step": 201639
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.777726411819458,
      "learning_rate": 2.277766977486627e-05,
      "loss": 2.6438,
      "step": 201640
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8970425128936768,
      "learning_rate": 2.2776106336401178e-05,
      "loss": 2.8341,
      "step": 201641
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8421454429626465,
      "learning_rate": 2.2774542949477325e-05,
      "loss": 3.1795,
      "step": 201642
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.4359891414642334,
      "learning_rate": 2.2772979614094878e-05,
      "loss": 2.9079,
      "step": 201643
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1694345474243164,
      "learning_rate": 2.27714163302542e-05,
      "loss": 2.9868,
      "step": 201644
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.682723045349121,
      "learning_rate": 2.276985309795546e-05,
      "loss": 2.9655,
      "step": 201645
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.330230712890625,
      "learning_rate": 2.2768289917199124e-05,
      "loss": 2.6999,
      "step": 201646
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.5558369159698486,
      "learning_rate": 2.2766726787985323e-05,
      "loss": 2.5302,
      "step": 201647
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.538701057434082,
      "learning_rate": 2.2765163710314493e-05,
      "loss": 2.7802,
      "step": 201648
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1300504207611084,
      "learning_rate": 2.27636006841868e-05,
      "loss": 2.7193,
      "step": 201649
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8826961517333984,
      "learning_rate": 2.2762037709602676e-05,
      "loss": 3.1258,
      "step": 201650
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0878682136535645,
      "learning_rate": 2.276047478656222e-05,
      "loss": 3.0065,
      "step": 201651
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.4307126998901367,
      "learning_rate": 2.2758911915065868e-05,
      "loss": 2.7873,
      "step": 201652
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.757089614868164,
      "learning_rate": 2.2757349095113818e-05,
      "loss": 3.1403,
      "step": 201653
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.718174695968628,
      "learning_rate": 2.275578632670647e-05,
      "loss": 2.8313,
      "step": 201654
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.593827962875366,
      "learning_rate": 2.2754223609844026e-05,
      "loss": 2.8646,
      "step": 201655
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.6097025871276855,
      "learning_rate": 2.275266094452688e-05,
      "loss": 2.9875,
      "step": 201656
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9646451473236084,
      "learning_rate": 2.2751098330755135e-05,
      "loss": 2.8688,
      "step": 201657
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.659958839416504,
      "learning_rate": 2.2749535768529258e-05,
      "loss": 2.8322,
      "step": 201658
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1739253997802734,
      "learning_rate": 2.274797325784945e-05,
      "loss": 3.0061,
      "step": 201659
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.987278938293457,
      "learning_rate": 2.274641079871604e-05,
      "loss": 2.9671,
      "step": 201660
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.302267074584961,
      "learning_rate": 2.2744848391129265e-05,
      "loss": 3.1792,
      "step": 201661
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0171313285827637,
      "learning_rate": 2.274328603508959e-05,
      "loss": 2.694,
      "step": 201662
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2848923206329346,
      "learning_rate": 2.274172373059705e-05,
      "loss": 3.0077,
      "step": 201663
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.114898204803467,
      "learning_rate": 2.2740161477652073e-05,
      "loss": 2.7164,
      "step": 201664
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2665326595306396,
      "learning_rate": 2.273859927625493e-05,
      "loss": 2.9535,
      "step": 201665
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2548983097076416,
      "learning_rate": 2.2737037126405956e-05,
      "loss": 3.1548,
      "step": 201666
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.256443977355957,
      "learning_rate": 2.2735475028105344e-05,
      "loss": 2.8714,
      "step": 201667
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.740550994873047,
      "learning_rate": 2.273391298135353e-05,
      "loss": 3.0158,
      "step": 201668
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.777205467224121,
      "learning_rate": 2.2732350986150617e-05,
      "loss": 2.5574,
      "step": 201669
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.669058084487915,
      "learning_rate": 2.2730789042497065e-05,
      "loss": 2.8347,
      "step": 201670
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.85793137550354,
      "learning_rate": 2.2729227150393014e-05,
      "loss": 2.9988,
      "step": 201671
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.876570224761963,
      "learning_rate": 2.2727665309838892e-05,
      "loss": 2.9307,
      "step": 201672
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.697387456893921,
      "learning_rate": 2.2726103520834905e-05,
      "loss": 3.0553,
      "step": 201673
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1849114894866943,
      "learning_rate": 2.2724541783381445e-05,
      "loss": 2.5997,
      "step": 201674
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.9107933044433594,
      "learning_rate": 2.272298009747865e-05,
      "loss": 3.0149,
      "step": 201675
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8379220962524414,
      "learning_rate": 2.2721418463126917e-05,
      "loss": 2.9642,
      "step": 201676
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.532092809677124,
      "learning_rate": 2.2719856880326447e-05,
      "loss": 2.5922,
      "step": 201677
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9474740028381348,
      "learning_rate": 2.2718295349077675e-05,
      "loss": 3.1472,
      "step": 201678
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6057684421539307,
      "learning_rate": 2.271673386938073e-05,
      "loss": 2.7672,
      "step": 201679
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.3646891117095947,
      "learning_rate": 2.2715172441236083e-05,
      "loss": 2.6991,
      "step": 201680
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8472609519958496,
      "learning_rate": 2.27136110646438e-05,
      "loss": 2.671,
      "step": 201681
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.4905359745025635,
      "learning_rate": 2.2712049739604378e-05,
      "loss": 2.7601,
      "step": 201682
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.518173694610596,
      "learning_rate": 2.2710488466117948e-05,
      "loss": 2.8413,
      "step": 201683
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.039905071258545,
      "learning_rate": 2.2708927244184917e-05,
      "loss": 2.8533,
      "step": 201684
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.871000051498413,
      "learning_rate": 2.2707366073805476e-05,
      "loss": 2.8986,
      "step": 201685
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.00604248046875,
      "learning_rate": 2.2705804954980066e-05,
      "loss": 2.8236,
      "step": 201686
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7070720195770264,
      "learning_rate": 2.270424388770885e-05,
      "loss": 3.0731,
      "step": 201687
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.489845037460327,
      "learning_rate": 2.2702682871992128e-05,
      "loss": 2.7024,
      "step": 201688
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8342630863189697,
      "learning_rate": 2.27011219078302e-05,
      "loss": 2.9972,
      "step": 201689
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.021332263946533,
      "learning_rate": 2.2699560995223397e-05,
      "loss": 2.9826,
      "step": 201690
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.57442307472229,
      "learning_rate": 2.269800013417192e-05,
      "loss": 3.208,
      "step": 201691
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.7754454612731934,
      "learning_rate": 2.2696439324676174e-05,
      "loss": 3.0604,
      "step": 201692
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.918484926223755,
      "learning_rate": 2.2694878566736418e-05,
      "loss": 2.916,
      "step": 201693
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.107912063598633,
      "learning_rate": 2.2693317860352888e-05,
      "loss": 3.0938,
      "step": 201694
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.896237850189209,
      "learning_rate": 2.2691757205525884e-05,
      "loss": 2.682,
      "step": 201695
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.6901602745056152,
      "learning_rate": 2.269019660225574e-05,
      "loss": 2.954,
      "step": 201696
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7372562885284424,
      "learning_rate": 2.2688636050542686e-05,
      "loss": 3.2412,
      "step": 201697
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.635648727416992,
      "learning_rate": 2.2687075550387125e-05,
      "loss": 2.979,
      "step": 201698
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.807323694229126,
      "learning_rate": 2.268551510178922e-05,
      "loss": 2.8916,
      "step": 201699
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.002138614654541,
      "learning_rate": 2.2683954704749307e-05,
      "loss": 3.1228,
      "step": 201700
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.43880033493042,
      "learning_rate": 2.268239435926772e-05,
      "loss": 2.9997,
      "step": 201701
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6714890003204346,
      "learning_rate": 2.268083406534469e-05,
      "loss": 2.8465,
      "step": 201702
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.940557956695557,
      "learning_rate": 2.2679273822980482e-05,
      "loss": 2.9261,
      "step": 201703
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9694900512695312,
      "learning_rate": 2.26777136321755e-05,
      "loss": 2.879,
      "step": 201704
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7337560653686523,
      "learning_rate": 2.2676153492929973e-05,
      "loss": 3.1177,
      "step": 201705
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0656886100769043,
      "learning_rate": 2.26745934052441e-05,
      "loss": 3.1566,
      "step": 201706
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8950724601745605,
      "learning_rate": 2.2673033369118322e-05,
      "loss": 3.2049,
      "step": 201707
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0009987354278564,
      "learning_rate": 2.2671473384552864e-05,
      "loss": 2.8163,
      "step": 201708
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.76602840423584,
      "learning_rate": 2.2669913451547962e-05,
      "loss": 2.9421,
      "step": 201709
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.4505515098571777,
      "learning_rate": 2.2668353570104015e-05,
      "loss": 2.984,
      "step": 201710
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0830917358398438,
      "learning_rate": 2.2666793740221258e-05,
      "loss": 2.8136,
      "step": 201711
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.61780047416687,
      "learning_rate": 2.266523396189992e-05,
      "loss": 2.7418,
      "step": 201712
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.5934574604034424,
      "learning_rate": 2.2663674235140405e-05,
      "loss": 3.0322,
      "step": 201713
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.6243486404418945,
      "learning_rate": 2.266211455994288e-05,
      "loss": 3.0231,
      "step": 201714
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.063267469406128,
      "learning_rate": 2.2660554936307772e-05,
      "loss": 2.7345,
      "step": 201715
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1050422191619873,
      "learning_rate": 2.2658995364235288e-05,
      "loss": 2.9919,
      "step": 201716
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9128289222717285,
      "learning_rate": 2.2657435843725758e-05,
      "loss": 2.9981,
      "step": 201717
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.138835906982422,
      "learning_rate": 2.265587637477938e-05,
      "loss": 2.7983,
      "step": 201718
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7639365196228027,
      "learning_rate": 2.2654316957396558e-05,
      "loss": 2.8343,
      "step": 201719
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.5856735706329346,
      "learning_rate": 2.2652757591577487e-05,
      "loss": 3.1224,
      "step": 201720
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6383235454559326,
      "learning_rate": 2.265119827732257e-05,
      "loss": 2.8968,
      "step": 201721
    },
    {
      "epoch": 2.63,
      "grad_norm": 5.222764015197754,
      "learning_rate": 2.2649639014631936e-05,
      "loss": 2.8758,
      "step": 201722
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9938411712646484,
      "learning_rate": 2.2648079803506093e-05,
      "loss": 3.0635,
      "step": 201723
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0550403594970703,
      "learning_rate": 2.264652064394513e-05,
      "loss": 2.7746,
      "step": 201724
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6698062419891357,
      "learning_rate": 2.2644961535949426e-05,
      "loss": 2.8573,
      "step": 201725
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.375600814819336,
      "learning_rate": 2.2643402479519236e-05,
      "loss": 2.9849,
      "step": 201726
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.631326675415039,
      "learning_rate": 2.264184347465493e-05,
      "loss": 3.0626,
      "step": 201727
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.763749599456787,
      "learning_rate": 2.2640284521356677e-05,
      "loss": 2.9797,
      "step": 201728
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7963967323303223,
      "learning_rate": 2.263872561962491e-05,
      "loss": 2.814,
      "step": 201729
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7726728916168213,
      "learning_rate": 2.2637166769459757e-05,
      "loss": 2.8737,
      "step": 201730
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.038109064102173,
      "learning_rate": 2.263560797086166e-05,
      "loss": 2.8792,
      "step": 201731
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.97975754737854,
      "learning_rate": 2.2634049223830775e-05,
      "loss": 2.8373,
      "step": 201732
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0855679512023926,
      "learning_rate": 2.2632490528367507e-05,
      "loss": 2.9763,
      "step": 201733
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.267936944961548,
      "learning_rate": 2.2630931884472026e-05,
      "loss": 2.8516,
      "step": 201734
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.685422897338867,
      "learning_rate": 2.262937329214479e-05,
      "loss": 2.4675,
      "step": 201735
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.240331172943115,
      "learning_rate": 2.2627814751385908e-05,
      "loss": 2.7928,
      "step": 201736
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8120553493499756,
      "learning_rate": 2.2626256262195807e-05,
      "loss": 2.8517,
      "step": 201737
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6686017513275146,
      "learning_rate": 2.2624697824574657e-05,
      "loss": 2.859,
      "step": 201738
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9175314903259277,
      "learning_rate": 2.2623139438522886e-05,
      "loss": 3.0212,
      "step": 201739
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0823256969451904,
      "learning_rate": 2.2621581104040632e-05,
      "loss": 2.8971,
      "step": 201740
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.8471410274505615,
      "learning_rate": 2.262002282112836e-05,
      "loss": 2.8763,
      "step": 201741
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.798689365386963,
      "learning_rate": 2.2618464589786167e-05,
      "loss": 2.8122,
      "step": 201742
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.716245174407959,
      "learning_rate": 2.261690641001449e-05,
      "loss": 2.862,
      "step": 201743
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6096224784851074,
      "learning_rate": 2.2615348281813528e-05,
      "loss": 2.8722,
      "step": 201744
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.857571840286255,
      "learning_rate": 2.2613790205183645e-05,
      "loss": 2.83,
      "step": 201745
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9322474002838135,
      "learning_rate": 2.2612232180125044e-05,
      "loss": 2.8858,
      "step": 201746
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0641701221466064,
      "learning_rate": 2.261067420663816e-05,
      "loss": 3.1605,
      "step": 201747
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0226855278015137,
      "learning_rate": 2.2609116284723116e-05,
      "loss": 3.041,
      "step": 201748
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8317348957061768,
      "learning_rate": 2.260755841438029e-05,
      "loss": 3.1696,
      "step": 201749
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.033337116241455,
      "learning_rate": 2.260600059560991e-05,
      "loss": 2.9297,
      "step": 201750
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9733028411865234,
      "learning_rate": 2.2604442828412373e-05,
      "loss": 2.8192,
      "step": 201751
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.035797119140625,
      "learning_rate": 2.260288511278785e-05,
      "loss": 2.9381,
      "step": 201752
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8479437828063965,
      "learning_rate": 2.2601327448736806e-05,
      "loss": 3.0218,
      "step": 201753
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.697385311126709,
      "learning_rate": 2.2599769836259273e-05,
      "loss": 2.8688,
      "step": 201754
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.433286428451538,
      "learning_rate": 2.2598212275355753e-05,
      "loss": 2.8941,
      "step": 201755
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.829232931137085,
      "learning_rate": 2.2596654766026413e-05,
      "loss": 2.8042,
      "step": 201756
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.188732624053955,
      "learning_rate": 2.259509730827165e-05,
      "loss": 2.9928,
      "step": 201757
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.8311519622802734,
      "learning_rate": 2.259353990209163e-05,
      "loss": 2.8359,
      "step": 201758
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.767637014389038,
      "learning_rate": 2.259198254748682e-05,
      "loss": 2.921,
      "step": 201759
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9436309337615967,
      "learning_rate": 2.2590425244457288e-05,
      "loss": 2.836,
      "step": 201760
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.311607599258423,
      "learning_rate": 2.2588867993003466e-05,
      "loss": 3.0834,
      "step": 201761
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6506543159484863,
      "learning_rate": 2.258731079312559e-05,
      "loss": 3.0191,
      "step": 201762
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.514153003692627,
      "learning_rate": 2.258575364482399e-05,
      "loss": 2.785,
      "step": 201763
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0401039123535156,
      "learning_rate": 2.2584196548098898e-05,
      "loss": 2.8623,
      "step": 201764
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9726195335388184,
      "learning_rate": 2.258263950295075e-05,
      "loss": 2.8015,
      "step": 201765
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.700532913208008,
      "learning_rate": 2.258108250937961e-05,
      "loss": 2.9994,
      "step": 201766
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7817225456237793,
      "learning_rate": 2.2579525567385946e-05,
      "loss": 2.8671,
      "step": 201767
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6368744373321533,
      "learning_rate": 2.2577968676969926e-05,
      "loss": 2.8943,
      "step": 201768
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.270751953125,
      "learning_rate": 2.257641183813198e-05,
      "loss": 2.9673,
      "step": 201769
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.4060566425323486,
      "learning_rate": 2.257485505087221e-05,
      "loss": 2.8027,
      "step": 201770
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6241676807403564,
      "learning_rate": 2.2573298315191145e-05,
      "loss": 2.9397,
      "step": 201771
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.796290397644043,
      "learning_rate": 2.2571741631088858e-05,
      "loss": 2.9889,
      "step": 201772
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.893899917602539,
      "learning_rate": 2.2570184998565745e-05,
      "loss": 2.9127,
      "step": 201773
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.90938138961792,
      "learning_rate": 2.2568628417622005e-05,
      "loss": 2.7879,
      "step": 201774
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.140493869781494,
      "learning_rate": 2.256707188825807e-05,
      "loss": 2.855,
      "step": 201775
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7840232849121094,
      "learning_rate": 2.256551541047411e-05,
      "loss": 2.5924,
      "step": 201776
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.683507204055786,
      "learning_rate": 2.256395898427049e-05,
      "loss": 2.9494,
      "step": 201777
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.323509931564331,
      "learning_rate": 2.256240260964748e-05,
      "loss": 3.0986,
      "step": 201778
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.574988603591919,
      "learning_rate": 2.256084628660534e-05,
      "loss": 2.8348,
      "step": 201779
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.004779577255249,
      "learning_rate": 2.2559290015144338e-05,
      "loss": 2.7623,
      "step": 201780
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.951636552810669,
      "learning_rate": 2.2557733795264844e-05,
      "loss": 2.8207,
      "step": 201781
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.5207695960998535,
      "learning_rate": 2.2556177626967055e-05,
      "loss": 2.9769,
      "step": 201782
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.5394890308380127,
      "learning_rate": 2.255462151025137e-05,
      "loss": 2.7156,
      "step": 201783
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.997708320617676,
      "learning_rate": 2.2553065445118023e-05,
      "loss": 2.9952,
      "step": 201784
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8058624267578125,
      "learning_rate": 2.2551509431567217e-05,
      "loss": 2.7258,
      "step": 201785
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.015002489089966,
      "learning_rate": 2.2549953469599414e-05,
      "loss": 3.0906,
      "step": 201786
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.7636942863464355,
      "learning_rate": 2.254839755921478e-05,
      "loss": 2.5933,
      "step": 201787
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.450306415557861,
      "learning_rate": 2.254684170041359e-05,
      "loss": 2.8307,
      "step": 201788
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.274598121643066,
      "learning_rate": 2.254528589319623e-05,
      "loss": 2.8813,
      "step": 201789
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.5865025520324707,
      "learning_rate": 2.2543730137562943e-05,
      "loss": 2.9633,
      "step": 201790
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.443284511566162,
      "learning_rate": 2.254217443351396e-05,
      "loss": 2.7908,
      "step": 201791
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.5838799476623535,
      "learning_rate": 2.254061878104968e-05,
      "loss": 2.8294,
      "step": 201792
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9048328399658203,
      "learning_rate": 2.2539063180170336e-05,
      "loss": 3.0961,
      "step": 201793
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.131981134414673,
      "learning_rate": 2.2537507630876195e-05,
      "loss": 2.8528,
      "step": 201794
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.3058600425720215,
      "learning_rate": 2.253595213316759e-05,
      "loss": 3.2885,
      "step": 201795
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.221524953842163,
      "learning_rate": 2.2534396687044786e-05,
      "loss": 3.0858,
      "step": 201796
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.612452507019043,
      "learning_rate": 2.253284129250802e-05,
      "loss": 2.8683,
      "step": 201797
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8806636333465576,
      "learning_rate": 2.2531285949557688e-05,
      "loss": 2.9101,
      "step": 201798
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.5704030990600586,
      "learning_rate": 2.252973065819399e-05,
      "loss": 2.9512,
      "step": 201799
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2224745750427246,
      "learning_rate": 2.2528175418417295e-05,
      "loss": 3.0006,
      "step": 201800
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.8960161209106445,
      "learning_rate": 2.2526620230227865e-05,
      "loss": 3.0059,
      "step": 201801
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.384271144866943,
      "learning_rate": 2.252506509362597e-05,
      "loss": 2.9195,
      "step": 201802
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0838208198547363,
      "learning_rate": 2.2523510008611844e-05,
      "loss": 2.8237,
      "step": 201803
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.695051431655884,
      "learning_rate": 2.2521954975185884e-05,
      "loss": 2.9758,
      "step": 201804
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9001619815826416,
      "learning_rate": 2.2520399993348293e-05,
      "loss": 3.0171,
      "step": 201805
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.645054578781128,
      "learning_rate": 2.2518845063099433e-05,
      "loss": 2.842,
      "step": 201806
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.8215157985687256,
      "learning_rate": 2.2517290184439542e-05,
      "loss": 2.8422,
      "step": 201807
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.931119441986084,
      "learning_rate": 2.2515735357368948e-05,
      "loss": 2.7667,
      "step": 201808
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.7943708896636963,
      "learning_rate": 2.2514180581887885e-05,
      "loss": 2.7476,
      "step": 201809
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.6848649978637695,
      "learning_rate": 2.251262585799669e-05,
      "loss": 2.7368,
      "step": 201810
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.201671838760376,
      "learning_rate": 2.2511071185695595e-05,
      "loss": 2.95,
      "step": 201811
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.002824306488037,
      "learning_rate": 2.2509516564984998e-05,
      "loss": 2.7453,
      "step": 201812
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.011589288711548,
      "learning_rate": 2.250796199586503e-05,
      "loss": 2.9515,
      "step": 201813
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.8299412727355957,
      "learning_rate": 2.2506407478336196e-05,
      "loss": 2.6551,
      "step": 201814
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2453839778900146,
      "learning_rate": 2.250485301239856e-05,
      "loss": 3.0257,
      "step": 201815
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0300323963165283,
      "learning_rate": 2.2503298598052555e-05,
      "loss": 2.9714,
      "step": 201816
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0429294109344482,
      "learning_rate": 2.250174423529838e-05,
      "loss": 2.9275,
      "step": 201817
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1056149005889893,
      "learning_rate": 2.2500189924136437e-05,
      "loss": 3.2253,
      "step": 201818
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9690582752227783,
      "learning_rate": 2.2498635664566855e-05,
      "loss": 2.8296,
      "step": 201819
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.4901113510131836,
      "learning_rate": 2.249708145659014e-05,
      "loss": 2.9341,
      "step": 201820
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.5573835372924805,
      "learning_rate": 2.249552730020635e-05,
      "loss": 2.991,
      "step": 201821
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.086647033691406,
      "learning_rate": 2.2493973195415927e-05,
      "loss": 2.6996,
      "step": 201822
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.632228136062622,
      "learning_rate": 2.2492419142219065e-05,
      "loss": 2.9885,
      "step": 201823
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.015775442123413,
      "learning_rate": 2.2490865140616166e-05,
      "loss": 3.0052,
      "step": 201824
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.903609275817871,
      "learning_rate": 2.2489311190607396e-05,
      "loss": 2.9653,
      "step": 201825
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6984665393829346,
      "learning_rate": 2.2487757292193188e-05,
      "loss": 2.9276,
      "step": 201826
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2092740535736084,
      "learning_rate": 2.2486203445373674e-05,
      "loss": 3.0776,
      "step": 201827
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7813940048217773,
      "learning_rate": 2.248464965014922e-05,
      "loss": 2.8474,
      "step": 201828
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.5849926471710205,
      "learning_rate": 2.24830959065201e-05,
      "loss": 3.1603,
      "step": 201829
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.262059211730957,
      "learning_rate": 2.2481542214486638e-05,
      "loss": 2.7907,
      "step": 201830
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1466739177703857,
      "learning_rate": 2.2479988574049033e-05,
      "loss": 3.0275,
      "step": 201831
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.8772380352020264,
      "learning_rate": 2.247843498520776e-05,
      "loss": 2.9271,
      "step": 201832
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.711812734603882,
      "learning_rate": 2.247688144796288e-05,
      "loss": 2.8712,
      "step": 201833
    },
    {
      "epoch": 2.63,
      "grad_norm": 6.59433650970459,
      "learning_rate": 2.2475327962314828e-05,
      "loss": 2.8281,
      "step": 201834
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.932614803314209,
      "learning_rate": 2.24737745282638e-05,
      "loss": 2.9661,
      "step": 201835
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6695733070373535,
      "learning_rate": 2.24722211458102e-05,
      "loss": 3.0136,
      "step": 201836
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.349015474319458,
      "learning_rate": 2.247066781495419e-05,
      "loss": 2.7787,
      "step": 201837
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.110485792160034,
      "learning_rate": 2.2469114535696243e-05,
      "loss": 3.1103,
      "step": 201838
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8445401191711426,
      "learning_rate": 2.2467561308036387e-05,
      "loss": 2.9068,
      "step": 201839
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.244894027709961,
      "learning_rate": 2.2466008131975157e-05,
      "loss": 2.8852,
      "step": 201840
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.3449277877807617,
      "learning_rate": 2.246445500751265e-05,
      "loss": 2.8325,
      "step": 201841
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.166027545928955,
      "learning_rate": 2.246290193464927e-05,
      "loss": 2.9538,
      "step": 201842
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.787421703338623,
      "learning_rate": 2.2461348913385245e-05,
      "loss": 2.718,
      "step": 201843
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.361938953399658,
      "learning_rate": 2.2459795943721014e-05,
      "loss": 2.7905,
      "step": 201844
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.068848609924316,
      "learning_rate": 2.2458243025656606e-05,
      "loss": 2.8251,
      "step": 201845
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.5583112239837646,
      "learning_rate": 2.2456690159192525e-05,
      "loss": 2.8343,
      "step": 201846
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2548296451568604,
      "learning_rate": 2.2455137344328935e-05,
      "loss": 3.1088,
      "step": 201847
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.061039447784424,
      "learning_rate": 2.2453584581066232e-05,
      "loss": 2.8461,
      "step": 201848
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9134533405303955,
      "learning_rate": 2.2452031869404584e-05,
      "loss": 2.9109,
      "step": 201849
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.930939197540283,
      "learning_rate": 2.2450479209344397e-05,
      "loss": 2.856,
      "step": 201850
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2191648483276367,
      "learning_rate": 2.2448926600885897e-05,
      "loss": 2.8728,
      "step": 201851
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.784876585006714,
      "learning_rate": 2.2447374044029386e-05,
      "loss": 2.8746,
      "step": 201852
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.8351781368255615,
      "learning_rate": 2.2445821538775098e-05,
      "loss": 2.9216,
      "step": 201853
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.691446304321289,
      "learning_rate": 2.2444269085123403e-05,
      "loss": 2.8756,
      "step": 201854
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.845973491668701,
      "learning_rate": 2.2442716683074523e-05,
      "loss": 3.0133,
      "step": 201855
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.035479784011841,
      "learning_rate": 2.2441164332628835e-05,
      "loss": 2.9107,
      "step": 201856
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.944972276687622,
      "learning_rate": 2.243961203378657e-05,
      "loss": 3.0083,
      "step": 201857
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0870683193206787,
      "learning_rate": 2.2438059786548024e-05,
      "loss": 2.9002,
      "step": 201858
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.3171794414520264,
      "learning_rate": 2.2436507590913434e-05,
      "loss": 2.9204,
      "step": 201859
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.986078977584839,
      "learning_rate": 2.2434955446883197e-05,
      "loss": 2.684,
      "step": 201860
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.439124345779419,
      "learning_rate": 2.2433403354457446e-05,
      "loss": 2.7705,
      "step": 201861
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.963637351989746,
      "learning_rate": 2.243185131363665e-05,
      "loss": 2.731,
      "step": 201862
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.447005271911621,
      "learning_rate": 2.243029932442101e-05,
      "loss": 2.9089,
      "step": 201863
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7582883834838867,
      "learning_rate": 2.242874738681082e-05,
      "loss": 2.9183,
      "step": 201864
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.5276894569396973,
      "learning_rate": 2.2427195500806316e-05,
      "loss": 2.9694,
      "step": 201865
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.334979295730591,
      "learning_rate": 2.2425643666407866e-05,
      "loss": 3.1758,
      "step": 201866
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8671162128448486,
      "learning_rate": 2.2424091883615704e-05,
      "loss": 2.9884,
      "step": 201867
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6238656044006348,
      "learning_rate": 2.242254015243019e-05,
      "loss": 3.0312,
      "step": 201868
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.333559513092041,
      "learning_rate": 2.2420988472851564e-05,
      "loss": 2.8024,
      "step": 201869
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1587579250335693,
      "learning_rate": 2.241943684488009e-05,
      "loss": 2.7075,
      "step": 201870
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.798414945602417,
      "learning_rate": 2.2417885268516068e-05,
      "loss": 3.0809,
      "step": 201871
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.983067512512207,
      "learning_rate": 2.241633374375983e-05,
      "loss": 2.9618,
      "step": 201872
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6792168617248535,
      "learning_rate": 2.2414782270611575e-05,
      "loss": 3.0652,
      "step": 201873
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6296908855438232,
      "learning_rate": 2.2413230849071738e-05,
      "loss": 3.0048,
      "step": 201874
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.93347430229187,
      "learning_rate": 2.2411679479140487e-05,
      "loss": 3.0096,
      "step": 201875
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2882401943206787,
      "learning_rate": 2.241012816081812e-05,
      "loss": 2.749,
      "step": 201876
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.308896541595459,
      "learning_rate": 2.240857689410497e-05,
      "loss": 2.9596,
      "step": 201877
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.879570722579956,
      "learning_rate": 2.2407025679001332e-05,
      "loss": 2.8817,
      "step": 201878
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.4951117038726807,
      "learning_rate": 2.2405474515507416e-05,
      "loss": 2.8435,
      "step": 201879
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7704427242279053,
      "learning_rate": 2.240392340362358e-05,
      "loss": 3.0297,
      "step": 201880
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.674635887145996,
      "learning_rate": 2.240237234335013e-05,
      "loss": 3.0552,
      "step": 201881
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.4861087799072266,
      "learning_rate": 2.240082133468726e-05,
      "loss": 2.8482,
      "step": 201882
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.919931650161743,
      "learning_rate": 2.239927037763537e-05,
      "loss": 2.954,
      "step": 201883
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.8247499465942383,
      "learning_rate": 2.239771947219463e-05,
      "loss": 2.9035,
      "step": 201884
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.355367422103882,
      "learning_rate": 2.2396168618365473e-05,
      "loss": 2.8938,
      "step": 201885
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.749128818511963,
      "learning_rate": 2.2394617816148097e-05,
      "loss": 2.9729,
      "step": 201886
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.253251552581787,
      "learning_rate": 2.239306706554277e-05,
      "loss": 2.9168,
      "step": 201887
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.029516220092773,
      "learning_rate": 2.239151636654979e-05,
      "loss": 3.1067,
      "step": 201888
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.836455821990967,
      "learning_rate": 2.2389965719169524e-05,
      "loss": 2.979,
      "step": 201889
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.007378578186035,
      "learning_rate": 2.2388415123402138e-05,
      "loss": 2.8334,
      "step": 201890
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8039462566375732,
      "learning_rate": 2.2386864579248064e-05,
      "loss": 2.7689,
      "step": 201891
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.768306255340576,
      "learning_rate": 2.2385314086707472e-05,
      "loss": 2.8408,
      "step": 201892
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.705890655517578,
      "learning_rate": 2.2383763645780727e-05,
      "loss": 3.1416,
      "step": 201893
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.558218955993652,
      "learning_rate": 2.2382213256467996e-05,
      "loss": 2.8273,
      "step": 201894
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.056087017059326,
      "learning_rate": 2.2380662918769743e-05,
      "loss": 2.8667,
      "step": 201895
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.611967086791992,
      "learning_rate": 2.237911263268607e-05,
      "loss": 2.8765,
      "step": 201896
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.3112967014312744,
      "learning_rate": 2.2377562398217443e-05,
      "loss": 2.9433,
      "step": 201897
    },
    {
      "epoch": 2.63,
      "grad_norm": 5.316897869110107,
      "learning_rate": 2.2376012215363992e-05,
      "loss": 2.8095,
      "step": 201898
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8455283641815186,
      "learning_rate": 2.237446208412622e-05,
      "loss": 2.7066,
      "step": 201899
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7140254974365234,
      "learning_rate": 2.2372912004504128e-05,
      "loss": 2.9006,
      "step": 201900
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9136886596679688,
      "learning_rate": 2.2371361976498248e-05,
      "loss": 2.8587,
      "step": 201901
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.721987247467041,
      "learning_rate": 2.2369812000108678e-05,
      "loss": 3.0429,
      "step": 201902
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7962334156036377,
      "learning_rate": 2.2368262075335886e-05,
      "loss": 3.2164,
      "step": 201903
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.383350133895874,
      "learning_rate": 2.2366712202180005e-05,
      "loss": 3.2886,
      "step": 201904
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.781447172164917,
      "learning_rate": 2.23651623806415e-05,
      "loss": 3.139,
      "step": 201905
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.742401361465454,
      "learning_rate": 2.2363612610720472e-05,
      "loss": 2.8812,
      "step": 201906
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.611783266067505,
      "learning_rate": 2.236206289241732e-05,
      "loss": 2.8396,
      "step": 201907
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.5897984504699707,
      "learning_rate": 2.2360513225732247e-05,
      "loss": 2.6593,
      "step": 201908
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.345947742462158,
      "learning_rate": 2.2358963610665648e-05,
      "loss": 2.7819,
      "step": 201909
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.717813491821289,
      "learning_rate": 2.235741404721769e-05,
      "loss": 2.8346,
      "step": 201910
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.4664738178253174,
      "learning_rate": 2.2355864535388877e-05,
      "loss": 3.1737,
      "step": 201911
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.6677138805389404,
      "learning_rate": 2.2354315075179207e-05,
      "loss": 2.9872,
      "step": 201912
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.072934150695801,
      "learning_rate": 2.2352765666589178e-05,
      "loss": 3.0492,
      "step": 201913
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.208463430404663,
      "learning_rate": 2.235121630961896e-05,
      "loss": 2.9205,
      "step": 201914
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.4534592628479004,
      "learning_rate": 2.2349667004268945e-05,
      "loss": 3.0733,
      "step": 201915
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1376657485961914,
      "learning_rate": 2.2348117750539306e-05,
      "loss": 2.9691,
      "step": 201916
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7874157428741455,
      "learning_rate": 2.2346568548430445e-05,
      "loss": 2.7419,
      "step": 201917
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2508814334869385,
      "learning_rate": 2.234501939794262e-05,
      "loss": 3.0447,
      "step": 201918
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.5695738792419434,
      "learning_rate": 2.2343470299076073e-05,
      "loss": 2.8834,
      "step": 201919
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9018523693084717,
      "learning_rate": 2.2341921251831062e-05,
      "loss": 2.9457,
      "step": 201920
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.255429267883301,
      "learning_rate": 2.2340372256207995e-05,
      "loss": 2.7979,
      "step": 201921
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7252039909362793,
      "learning_rate": 2.2338823312207034e-05,
      "loss": 2.804,
      "step": 201922
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1534929275512695,
      "learning_rate": 2.233727441982861e-05,
      "loss": 2.9627,
      "step": 201923
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8369531631469727,
      "learning_rate": 2.2335725579072894e-05,
      "loss": 3.0116,
      "step": 201924
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7369394302368164,
      "learning_rate": 2.2334176789940218e-05,
      "loss": 2.9282,
      "step": 201925
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.021775484085083,
      "learning_rate": 2.233262805243081e-05,
      "loss": 2.8969,
      "step": 201926
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.000967979431152,
      "learning_rate": 2.2331079366545047e-05,
      "loss": 3.0634,
      "step": 201927
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.142760753631592,
      "learning_rate": 2.232953073228312e-05,
      "loss": 2.9287,
      "step": 201928
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.249950885772705,
      "learning_rate": 2.232798214964546e-05,
      "loss": 2.9683,
      "step": 201929
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0550925731658936,
      "learning_rate": 2.2326433618632245e-05,
      "loss": 3.0336,
      "step": 201930
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2796690464019775,
      "learning_rate": 2.2324885139243796e-05,
      "loss": 2.8639,
      "step": 201931
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6980369091033936,
      "learning_rate": 2.2323336711480322e-05,
      "loss": 2.8617,
      "step": 201932
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9203875064849854,
      "learning_rate": 2.232178833534225e-05,
      "loss": 2.5179,
      "step": 201933
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0145695209503174,
      "learning_rate": 2.232024001082975e-05,
      "loss": 2.9817,
      "step": 201934
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2525527477264404,
      "learning_rate": 2.2318691737943217e-05,
      "loss": 2.9403,
      "step": 201935
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.363689661026001,
      "learning_rate": 2.2317143516682855e-05,
      "loss": 2.8793,
      "step": 201936
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.312636613845825,
      "learning_rate": 2.2315595347049e-05,
      "loss": 2.9066,
      "step": 201937
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7840397357940674,
      "learning_rate": 2.231404722904184e-05,
      "loss": 2.8445,
      "step": 201938
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.568462371826172,
      "learning_rate": 2.2312499162661823e-05,
      "loss": 2.8153,
      "step": 201939
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.764162302017212,
      "learning_rate": 2.2310951147909106e-05,
      "loss": 2.9291,
      "step": 201940
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.944730758666992,
      "learning_rate": 2.2309403184784058e-05,
      "loss": 3.0384,
      "step": 201941
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8037877082824707,
      "learning_rate": 2.230785527328691e-05,
      "loss": 3.1759,
      "step": 201942
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.5408923625946045,
      "learning_rate": 2.2306307413418e-05,
      "loss": 2.9467,
      "step": 201943
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.4784862995147705,
      "learning_rate": 2.2304759605177558e-05,
      "loss": 2.973,
      "step": 201944
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.68381667137146,
      "learning_rate": 2.2303211848565915e-05,
      "loss": 2.8339,
      "step": 201945
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.216569423675537,
      "learning_rate": 2.2301664143583342e-05,
      "loss": 2.5489,
      "step": 201946
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.122488260269165,
      "learning_rate": 2.2300116490230134e-05,
      "loss": 2.6996,
      "step": 201947
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9554965496063232,
      "learning_rate": 2.2298568888506597e-05,
      "loss": 2.8845,
      "step": 201948
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.779799461364746,
      "learning_rate": 2.2297021338413025e-05,
      "loss": 2.8576,
      "step": 201949
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.847688913345337,
      "learning_rate": 2.2295473839949583e-05,
      "loss": 2.9181,
      "step": 201950
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.5055742263793945,
      "learning_rate": 2.2293926393116747e-05,
      "loss": 2.8932,
      "step": 201951
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8967745304107666,
      "learning_rate": 2.2292378997914608e-05,
      "loss": 3.019,
      "step": 201952
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.977639675140381,
      "learning_rate": 2.2290831654343667e-05,
      "loss": 2.9215,
      "step": 201953
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9782700538635254,
      "learning_rate": 2.228928436240406e-05,
      "loss": 3.0166,
      "step": 201954
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.492169141769409,
      "learning_rate": 2.2287737122096118e-05,
      "loss": 3.0146,
      "step": 201955
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.9696462154388428,
      "learning_rate": 2.2286189933420106e-05,
      "loss": 2.7663,
      "step": 201956
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.275614023208618,
      "learning_rate": 2.228464279637636e-05,
      "loss": 2.9817,
      "step": 201957
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7189950942993164,
      "learning_rate": 2.2283095710965117e-05,
      "loss": 2.8095,
      "step": 201958
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9183902740478516,
      "learning_rate": 2.22815486771867e-05,
      "loss": 2.891,
      "step": 201959
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1996021270751953,
      "learning_rate": 2.2280001695041416e-05,
      "loss": 3.091,
      "step": 201960
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6727404594421387,
      "learning_rate": 2.2278454764529463e-05,
      "loss": 2.8428,
      "step": 201961
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.859881639480591,
      "learning_rate": 2.2276907885651242e-05,
      "loss": 2.9774,
      "step": 201962
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.15767765045166,
      "learning_rate": 2.227536105840698e-05,
      "loss": 3.0309,
      "step": 201963
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8993935585021973,
      "learning_rate": 2.227381428279692e-05,
      "loss": 3.0465,
      "step": 201964
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.391235589981079,
      "learning_rate": 2.2272267558821456e-05,
      "loss": 2.9811,
      "step": 201965
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.370980978012085,
      "learning_rate": 2.2270720886480818e-05,
      "loss": 3.0126,
      "step": 201966
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6535961627960205,
      "learning_rate": 2.2269174265775248e-05,
      "loss": 3.0092,
      "step": 201967
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.769265651702881,
      "learning_rate": 2.226762769670514e-05,
      "loss": 3.0503,
      "step": 201968
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8463289737701416,
      "learning_rate": 2.2266081179270656e-05,
      "loss": 2.9582,
      "step": 201969
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.659288167953491,
      "learning_rate": 2.2264534713472206e-05,
      "loss": 2.8997,
      "step": 201970
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2466344833374023,
      "learning_rate": 2.2262988299310013e-05,
      "loss": 2.6653,
      "step": 201971
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8514347076416016,
      "learning_rate": 2.2261441936784384e-05,
      "loss": 2.8991,
      "step": 201972
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0995874404907227,
      "learning_rate": 2.225989562589555e-05,
      "loss": 2.9871,
      "step": 201973
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.059081554412842,
      "learning_rate": 2.225834936664388e-05,
      "loss": 2.7897,
      "step": 201974
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.9276700019836426,
      "learning_rate": 2.22568031590296e-05,
      "loss": 2.9869,
      "step": 201975
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0771870613098145,
      "learning_rate": 2.225525700305305e-05,
      "loss": 3.0036,
      "step": 201976
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.793138027191162,
      "learning_rate": 2.2253710898714494e-05,
      "loss": 2.9836,
      "step": 201977
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6777498722076416,
      "learning_rate": 2.2252164846014165e-05,
      "loss": 3.0634,
      "step": 201978
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6500680446624756,
      "learning_rate": 2.225061884495246e-05,
      "loss": 3.0527,
      "step": 201979
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8640668392181396,
      "learning_rate": 2.2249072895529585e-05,
      "loss": 3.0725,
      "step": 201980
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6608989238739014,
      "learning_rate": 2.2247526997745836e-05,
      "loss": 2.7713,
      "step": 201981
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.7715797424316406,
      "learning_rate": 2.2245981151601545e-05,
      "loss": 2.9345,
      "step": 201982
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.670734405517578,
      "learning_rate": 2.2244435357096913e-05,
      "loss": 3.1175,
      "step": 201983
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.119232177734375,
      "learning_rate": 2.224288961423234e-05,
      "loss": 3.0944,
      "step": 201984
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0017712116241455,
      "learning_rate": 2.224134392300806e-05,
      "loss": 2.9543,
      "step": 201985
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.640840530395508,
      "learning_rate": 2.223979828342437e-05,
      "loss": 2.9997,
      "step": 201986
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.7114288806915283,
      "learning_rate": 2.2238252695481472e-05,
      "loss": 2.8494,
      "step": 201987
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7928366661071777,
      "learning_rate": 2.2236707159179767e-05,
      "loss": 2.9261,
      "step": 201988
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.930048704147339,
      "learning_rate": 2.2235161674519485e-05,
      "loss": 3.0115,
      "step": 201989
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.817270517349243,
      "learning_rate": 2.2233616241500962e-05,
      "loss": 2.6499,
      "step": 201990
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2955052852630615,
      "learning_rate": 2.2232070860124463e-05,
      "loss": 2.9129,
      "step": 201991
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1001579761505127,
      "learning_rate": 2.2230525530390254e-05,
      "loss": 3.1576,
      "step": 201992
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0967764854431152,
      "learning_rate": 2.2228980252298567e-05,
      "loss": 2.8433,
      "step": 201993
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0005812644958496,
      "learning_rate": 2.222743502584984e-05,
      "loss": 3.0631,
      "step": 201994
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.884183406829834,
      "learning_rate": 2.2225889851044197e-05,
      "loss": 2.7551,
      "step": 201995
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7679688930511475,
      "learning_rate": 2.2224344727882083e-05,
      "loss": 3.2408,
      "step": 201996
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.5301218032836914,
      "learning_rate": 2.2222799656363687e-05,
      "loss": 2.8275,
      "step": 201997
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0960302352905273,
      "learning_rate": 2.2221254636489316e-05,
      "loss": 2.7398,
      "step": 201998
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.827247142791748,
      "learning_rate": 2.2219709668259232e-05,
      "loss": 2.8466,
      "step": 201999
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.783639669418335,
      "learning_rate": 2.2218164751673773e-05,
      "loss": 3.0864,
      "step": 202000
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.271869421005249,
      "learning_rate": 2.2216619886733133e-05,
      "loss": 3.0341,
      "step": 202001
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0210299491882324,
      "learning_rate": 2.2215075073437748e-05,
      "loss": 2.8914,
      "step": 202002
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.3408267498016357,
      "learning_rate": 2.221353031178782e-05,
      "loss": 2.5936,
      "step": 202003
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0440080165863037,
      "learning_rate": 2.2211985601783643e-05,
      "loss": 2.9908,
      "step": 202004
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6742265224456787,
      "learning_rate": 2.2210440943425455e-05,
      "loss": 3.1522,
      "step": 202005
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.5375242233276367,
      "learning_rate": 2.220889633671362e-05,
      "loss": 2.7185,
      "step": 202006
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.5754475593566895,
      "learning_rate": 2.2207351781648342e-05,
      "loss": 2.8227,
      "step": 202007
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.631469488143921,
      "learning_rate": 2.2205807278230014e-05,
      "loss": 2.9158,
      "step": 202008
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.877805709838867,
      "learning_rate": 2.2204262826458906e-05,
      "loss": 2.7447,
      "step": 202009
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.822781562805176,
      "learning_rate": 2.220271842633522e-05,
      "loss": 2.6987,
      "step": 202010
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.694267749786377,
      "learning_rate": 2.2201174077859284e-05,
      "loss": 2.8405,
      "step": 202011
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.379110336303711,
      "learning_rate": 2.2199629781031435e-05,
      "loss": 2.8392,
      "step": 202012
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.690910816192627,
      "learning_rate": 2.219808553585184e-05,
      "loss": 3.1074,
      "step": 202013
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.477272987365723,
      "learning_rate": 2.2196541342320927e-05,
      "loss": 3.052,
      "step": 202014
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.269002914428711,
      "learning_rate": 2.2194997200438934e-05,
      "loss": 2.7672,
      "step": 202015
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.937861680984497,
      "learning_rate": 2.2193453110206127e-05,
      "loss": 2.6464,
      "step": 202016
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.687612771987915,
      "learning_rate": 2.219190907162277e-05,
      "loss": 2.7247,
      "step": 202017
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2325639724731445,
      "learning_rate": 2.2190365084689198e-05,
      "loss": 2.8931,
      "step": 202018
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6044974327087402,
      "learning_rate": 2.2188821149405645e-05,
      "loss": 2.7585,
      "step": 202019
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2833049297332764,
      "learning_rate": 2.2187277265772475e-05,
      "loss": 3.2134,
      "step": 202020
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9863274097442627,
      "learning_rate": 2.2185733433789954e-05,
      "loss": 2.9755,
      "step": 202021
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.787670373916626,
      "learning_rate": 2.218418965345835e-05,
      "loss": 2.975,
      "step": 202022
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9959585666656494,
      "learning_rate": 2.2182645924777897e-05,
      "loss": 3.0617,
      "step": 202023
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.6404473781585693,
      "learning_rate": 2.218110224774896e-05,
      "loss": 3.116,
      "step": 202024
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8318939208984375,
      "learning_rate": 2.2179558622371775e-05,
      "loss": 2.8304,
      "step": 202025
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.978832244873047,
      "learning_rate": 2.2178015048646703e-05,
      "loss": 3.0189,
      "step": 202026
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.841472864151001,
      "learning_rate": 2.217647152657398e-05,
      "loss": 2.9106,
      "step": 202027
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.079745054244995,
      "learning_rate": 2.2174928056153873e-05,
      "loss": 2.9296,
      "step": 202028
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.691455602645874,
      "learning_rate": 2.2173384637386682e-05,
      "loss": 3.0225,
      "step": 202029
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8849470615386963,
      "learning_rate": 2.2171841270272705e-05,
      "loss": 2.7783,
      "step": 202030
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.36518931388855,
      "learning_rate": 2.217029795481221e-05,
      "loss": 2.9177,
      "step": 202031
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.247648239135742,
      "learning_rate": 2.2168754691005563e-05,
      "loss": 3.0732,
      "step": 202032
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.965343952178955,
      "learning_rate": 2.2167211478852963e-05,
      "loss": 3.0002,
      "step": 202033
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.840500831604004,
      "learning_rate": 2.2165668318354714e-05,
      "loss": 3.0311,
      "step": 202034
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.716468095779419,
      "learning_rate": 2.2164125209511074e-05,
      "loss": 3.0247,
      "step": 202035
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.844444513320923,
      "learning_rate": 2.216258215232245e-05,
      "loss": 2.7543,
      "step": 202036
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.335268020629883,
      "learning_rate": 2.216103914678894e-05,
      "loss": 2.8027,
      "step": 202037
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.858643054962158,
      "learning_rate": 2.215949619291104e-05,
      "loss": 2.866,
      "step": 202038
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.3217666149139404,
      "learning_rate": 2.215795329068889e-05,
      "loss": 2.8539,
      "step": 202039
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7295985221862793,
      "learning_rate": 2.215641044012285e-05,
      "loss": 2.8522,
      "step": 202040
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.857733726501465,
      "learning_rate": 2.2154867641213126e-05,
      "loss": 2.7625,
      "step": 202041
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.569504976272583,
      "learning_rate": 2.215332489396008e-05,
      "loss": 2.6858,
      "step": 202042
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.735257387161255,
      "learning_rate": 2.2151782198363943e-05,
      "loss": 2.8331,
      "step": 202043
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0720736980438232,
      "learning_rate": 2.2150239554425087e-05,
      "loss": 2.8319,
      "step": 202044
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.541607618331909,
      "learning_rate": 2.2148696962143708e-05,
      "loss": 2.8475,
      "step": 202045
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.615506649017334,
      "learning_rate": 2.2147154421520174e-05,
      "loss": 3.1859,
      "step": 202046
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.994642734527588,
      "learning_rate": 2.214561193255472e-05,
      "loss": 2.8325,
      "step": 202047
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6755008697509766,
      "learning_rate": 2.214406949524764e-05,
      "loss": 2.7411,
      "step": 202048
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8145127296447754,
      "learning_rate": 2.2142527109599173e-05,
      "loss": 2.9686,
      "step": 202049
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8614912033081055,
      "learning_rate": 2.2140984775609716e-05,
      "loss": 2.7907,
      "step": 202050
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.251136541366577,
      "learning_rate": 2.2139442493279437e-05,
      "loss": 3.1001,
      "step": 202051
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.81885027885437,
      "learning_rate": 2.2137900262608732e-05,
      "loss": 2.8363,
      "step": 202052
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.5702438354492188,
      "learning_rate": 2.2136358083597838e-05,
      "loss": 2.9179,
      "step": 202053
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0874407291412354,
      "learning_rate": 2.2134815956247053e-05,
      "loss": 3.043,
      "step": 202054
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.8864502906799316,
      "learning_rate": 2.213327388055658e-05,
      "loss": 2.8866,
      "step": 202055
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.804965019226074,
      "learning_rate": 2.2131731856526847e-05,
      "loss": 2.845,
      "step": 202056
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.417953968048096,
      "learning_rate": 2.2130189884157988e-05,
      "loss": 2.9458,
      "step": 202057
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.5500898361206055,
      "learning_rate": 2.2128647963450442e-05,
      "loss": 2.9907,
      "step": 202058
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0854642391204834,
      "learning_rate": 2.2127106094404434e-05,
      "loss": 2.9324,
      "step": 202059
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.8884623050689697,
      "learning_rate": 2.212556427702017e-05,
      "loss": 3.0488,
      "step": 202060
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9891254901885986,
      "learning_rate": 2.212402251129808e-05,
      "loss": 2.8172,
      "step": 202061
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1402175426483154,
      "learning_rate": 2.2122480797238363e-05,
      "loss": 2.9002,
      "step": 202062
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1436569690704346,
      "learning_rate": 2.2120939134841286e-05,
      "loss": 2.6767,
      "step": 202063
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.210393905639648,
      "learning_rate": 2.211939752410722e-05,
      "loss": 2.9142,
      "step": 202064
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.576505422592163,
      "learning_rate": 2.211785596503639e-05,
      "loss": 2.9802,
      "step": 202065
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.037302255630493,
      "learning_rate": 2.211631445762907e-05,
      "loss": 2.6113,
      "step": 202066
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8617563247680664,
      "learning_rate": 2.211477300188559e-05,
      "loss": 2.7125,
      "step": 202067
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.049463987350464,
      "learning_rate": 2.2113231597806213e-05,
      "loss": 3.213,
      "step": 202068
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1298460960388184,
      "learning_rate": 2.2111690245391245e-05,
      "loss": 2.9458,
      "step": 202069
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7256808280944824,
      "learning_rate": 2.211014894464098e-05,
      "loss": 2.805,
      "step": 202070
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9899210929870605,
      "learning_rate": 2.210860769555566e-05,
      "loss": 2.912,
      "step": 202071
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.997211217880249,
      "learning_rate": 2.2107066498135574e-05,
      "loss": 2.9372,
      "step": 202072
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.903306722640991,
      "learning_rate": 2.210552535238106e-05,
      "loss": 2.9906,
      "step": 202073
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.333557605743408,
      "learning_rate": 2.2103984258292316e-05,
      "loss": 3.1315,
      "step": 202074
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.7042624950408936,
      "learning_rate": 2.2102443215869746e-05,
      "loss": 2.8224,
      "step": 202075
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.124936103820801,
      "learning_rate": 2.210090222511358e-05,
      "loss": 2.9554,
      "step": 202076
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6935856342315674,
      "learning_rate": 2.209936128602412e-05,
      "loss": 3.1236,
      "step": 202077
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.578127861022949,
      "learning_rate": 2.2097820398601564e-05,
      "loss": 2.9763,
      "step": 202078
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1467831134796143,
      "learning_rate": 2.2096279562846342e-05,
      "loss": 2.9816,
      "step": 202079
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1328299045562744,
      "learning_rate": 2.2094738778758593e-05,
      "loss": 2.9046,
      "step": 202080
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.568215847015381,
      "learning_rate": 2.209319804633871e-05,
      "loss": 3.1667,
      "step": 202081
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.695970058441162,
      "learning_rate": 2.2091657365587002e-05,
      "loss": 2.7265,
      "step": 202082
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0476443767547607,
      "learning_rate": 2.209011673650366e-05,
      "loss": 2.8408,
      "step": 202083
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.641819477081299,
      "learning_rate": 2.2088576159088954e-05,
      "loss": 2.8962,
      "step": 202084
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7889270782470703,
      "learning_rate": 2.208703563334332e-05,
      "loss": 2.8471,
      "step": 202085
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.65979266166687,
      "learning_rate": 2.2085495159266853e-05,
      "loss": 2.769,
      "step": 202086
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.5588395595550537,
      "learning_rate": 2.208395473686002e-05,
      "loss": 2.7343,
      "step": 202087
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9331934452056885,
      "learning_rate": 2.2082414366123024e-05,
      "loss": 2.6782,
      "step": 202088
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.339862585067749,
      "learning_rate": 2.2080874047056163e-05,
      "loss": 2.7921,
      "step": 202089
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.076906442642212,
      "learning_rate": 2.2079333779659636e-05,
      "loss": 3.1447,
      "step": 202090
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8393709659576416,
      "learning_rate": 2.2077793563933877e-05,
      "loss": 2.8237,
      "step": 202091
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.4072515964508057,
      "learning_rate": 2.2076253399879052e-05,
      "loss": 2.8671,
      "step": 202092
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.94149112701416,
      "learning_rate": 2.2074713287495528e-05,
      "loss": 2.8728,
      "step": 202093
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.140921115875244,
      "learning_rate": 2.2073173226783568e-05,
      "loss": 2.789,
      "step": 202094
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.3278815746307373,
      "learning_rate": 2.2071633217743477e-05,
      "loss": 2.8986,
      "step": 202095
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.214179277420044,
      "learning_rate": 2.2070093260375422e-05,
      "loss": 2.7843,
      "step": 202096
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.4996252059936523,
      "learning_rate": 2.2068553354679863e-05,
      "loss": 3.0481,
      "step": 202097
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.589812994003296,
      "learning_rate": 2.206701350065697e-05,
      "loss": 2.9176,
      "step": 202098
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0495078563690186,
      "learning_rate": 2.2065473698307077e-05,
      "loss": 2.8846,
      "step": 202099
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6247782707214355,
      "learning_rate": 2.2063933947630484e-05,
      "loss": 2.9209,
      "step": 202100
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.995063304901123,
      "learning_rate": 2.2062394248627458e-05,
      "loss": 2.9841,
      "step": 202101
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.77687406539917,
      "learning_rate": 2.2060854601298194e-05,
      "loss": 3.0512,
      "step": 202102
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.908397912979126,
      "learning_rate": 2.2059315005643162e-05,
      "loss": 2.9635,
      "step": 202103
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.306814670562744,
      "learning_rate": 2.2057775461662463e-05,
      "loss": 2.9071,
      "step": 202104
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6008946895599365,
      "learning_rate": 2.205623596935653e-05,
      "loss": 2.739,
      "step": 202105
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2460739612579346,
      "learning_rate": 2.2054696528725592e-05,
      "loss": 2.826,
      "step": 202106
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8842639923095703,
      "learning_rate": 2.205315713976995e-05,
      "loss": 2.7163,
      "step": 202107
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7616848945617676,
      "learning_rate": 2.2051617802489772e-05,
      "loss": 2.8605,
      "step": 202108
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6268727779388428,
      "learning_rate": 2.205007851688556e-05,
      "loss": 2.753,
      "step": 202109
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.6950876712799072,
      "learning_rate": 2.2048539282957378e-05,
      "loss": 2.5864,
      "step": 202110
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.987797975540161,
      "learning_rate": 2.2047000100705693e-05,
      "loss": 2.8375,
      "step": 202111
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.4526124000549316,
      "learning_rate": 2.2045460970130668e-05,
      "loss": 2.981,
      "step": 202112
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6176111698150635,
      "learning_rate": 2.204392189123274e-05,
      "loss": 2.6298,
      "step": 202113
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.968173027038574,
      "learning_rate": 2.204238286401201e-05,
      "loss": 2.9932,
      "step": 202114
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.8273580074310303,
      "learning_rate": 2.2040843888468872e-05,
      "loss": 2.9091,
      "step": 202115
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.207338333129883,
      "learning_rate": 2.2039304964603565e-05,
      "loss": 2.8346,
      "step": 202116
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0895442962646484,
      "learning_rate": 2.203776609241642e-05,
      "loss": 3.0257,
      "step": 202117
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2961506843566895,
      "learning_rate": 2.203622727190767e-05,
      "loss": 3.1732,
      "step": 202118
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9425597190856934,
      "learning_rate": 2.2034688503077714e-05,
      "loss": 2.8777,
      "step": 202119
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2939655780792236,
      "learning_rate": 2.203314978592665e-05,
      "loss": 3.0136,
      "step": 202120
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.6161723136901855,
      "learning_rate": 2.203161112045495e-05,
      "loss": 3.2563,
      "step": 202121
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.8316562175750732,
      "learning_rate": 2.2030072506662743e-05,
      "loss": 3.0064,
      "step": 202122
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.110945701599121,
      "learning_rate": 2.2028533944550463e-05,
      "loss": 2.756,
      "step": 202123
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.170874834060669,
      "learning_rate": 2.2026995434118277e-05,
      "loss": 3.1171,
      "step": 202124
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2345468997955322,
      "learning_rate": 2.202545697536662e-05,
      "loss": 2.9701,
      "step": 202125
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.531275987625122,
      "learning_rate": 2.202391856829555e-05,
      "loss": 2.6446,
      "step": 202126
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.009629964828491,
      "learning_rate": 2.2022380212905543e-05,
      "loss": 2.8704,
      "step": 202127
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.3052608966827393,
      "learning_rate": 2.2020841909196797e-05,
      "loss": 2.8651,
      "step": 202128
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0655457973480225,
      "learning_rate": 2.201930365716964e-05,
      "loss": 2.7616,
      "step": 202129
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.011796474456787,
      "learning_rate": 2.201776545682431e-05,
      "loss": 3.1354,
      "step": 202130
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1031548976898193,
      "learning_rate": 2.2016227308161172e-05,
      "loss": 2.7795,
      "step": 202131
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.3205838203430176,
      "learning_rate": 2.201468921118049e-05,
      "loss": 2.855,
      "step": 202132
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.859135866165161,
      "learning_rate": 2.2013151165882503e-05,
      "loss": 2.8671,
      "step": 202133
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.198065757751465,
      "learning_rate": 2.201161317226744e-05,
      "loss": 2.8889,
      "step": 202134
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.059540033340454,
      "learning_rate": 2.2010075230335766e-05,
      "loss": 3.1696,
      "step": 202135
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8339457511901855,
      "learning_rate": 2.2008537340087583e-05,
      "loss": 2.9798,
      "step": 202136
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.3469791412353516,
      "learning_rate": 2.2006999501523324e-05,
      "loss": 2.9736,
      "step": 202137
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.747708320617676,
      "learning_rate": 2.200546171464319e-05,
      "loss": 2.9918,
      "step": 202138
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.062051773071289,
      "learning_rate": 2.200392397944751e-05,
      "loss": 2.8553,
      "step": 202139
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7848825454711914,
      "learning_rate": 2.2002386295936493e-05,
      "loss": 3.1737,
      "step": 202140
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.4324140548706055,
      "learning_rate": 2.2000848664110527e-05,
      "loss": 2.9319,
      "step": 202141
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.52093243598938,
      "learning_rate": 2.1999311083969816e-05,
      "loss": 2.8627,
      "step": 202142
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.820894718170166,
      "learning_rate": 2.199777355551473e-05,
      "loss": 2.8277,
      "step": 202143
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1345980167388916,
      "learning_rate": 2.19962360787455e-05,
      "loss": 2.7325,
      "step": 202144
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.665435552597046,
      "learning_rate": 2.199469865366236e-05,
      "loss": 2.7129,
      "step": 202145
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.109712600708008,
      "learning_rate": 2.1993161280265704e-05,
      "loss": 2.8554,
      "step": 202146
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.222432613372803,
      "learning_rate": 2.199162395855577e-05,
      "loss": 2.8497,
      "step": 202147
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.4718873500823975,
      "learning_rate": 2.199008668853276e-05,
      "loss": 3.0442,
      "step": 202148
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.947300434112549,
      "learning_rate": 2.1988549470197136e-05,
      "loss": 2.8045,
      "step": 202149
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.373490333557129,
      "learning_rate": 2.1987012303549067e-05,
      "loss": 2.9529,
      "step": 202150
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.697221279144287,
      "learning_rate": 2.198547518858882e-05,
      "loss": 2.9686,
      "step": 202151
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.970970392227173,
      "learning_rate": 2.1983938125316758e-05,
      "loss": 2.7739,
      "step": 202152
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1809487342834473,
      "learning_rate": 2.198240111373315e-05,
      "loss": 3.0169,
      "step": 202153
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1302247047424316,
      "learning_rate": 2.1980864153838195e-05,
      "loss": 2.7971,
      "step": 202154
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0140552520751953,
      "learning_rate": 2.1979327245632296e-05,
      "loss": 2.9505,
      "step": 202155
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1792025566101074,
      "learning_rate": 2.1977790389115712e-05,
      "loss": 3.0901,
      "step": 202156
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.8863003253936768,
      "learning_rate": 2.1976253584288616e-05,
      "loss": 2.9276,
      "step": 202157
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.3561055660247803,
      "learning_rate": 2.1974716831151475e-05,
      "loss": 3.0848,
      "step": 202158
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.010354995727539,
      "learning_rate": 2.1973180129704383e-05,
      "loss": 2.7373,
      "step": 202159
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7671432495117188,
      "learning_rate": 2.197164347994781e-05,
      "loss": 3.064,
      "step": 202160
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.354825735092163,
      "learning_rate": 2.1970106881881954e-05,
      "loss": 3.0387,
      "step": 202161
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.581106662750244,
      "learning_rate": 2.1968570335507084e-05,
      "loss": 2.9672,
      "step": 202162
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.3209939002990723,
      "learning_rate": 2.1967033840823466e-05,
      "loss": 2.914,
      "step": 202163
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.4365124702453613,
      "learning_rate": 2.19654973978315e-05,
      "loss": 2.7763,
      "step": 202164
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.91070818901062,
      "learning_rate": 2.1963961006531317e-05,
      "loss": 2.7594,
      "step": 202165
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.267686367034912,
      "learning_rate": 2.1962424666923352e-05,
      "loss": 2.8257,
      "step": 202166
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7622146606445312,
      "learning_rate": 2.19608883790078e-05,
      "loss": 2.8366,
      "step": 202167
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9020559787750244,
      "learning_rate": 2.1959352142784968e-05,
      "loss": 3.1695,
      "step": 202168
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.683952808380127,
      "learning_rate": 2.1957815958255088e-05,
      "loss": 2.8349,
      "step": 202169
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.3911547660827637,
      "learning_rate": 2.1956279825418555e-05,
      "loss": 3.0156,
      "step": 202170
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7351558208465576,
      "learning_rate": 2.195474374427557e-05,
      "loss": 3.012,
      "step": 202171
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.092310905456543,
      "learning_rate": 2.195320771482647e-05,
      "loss": 2.9192,
      "step": 202172
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.382595539093018,
      "learning_rate": 2.195167173707152e-05,
      "loss": 3.0037,
      "step": 202173
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.9849071502685547,
      "learning_rate": 2.1950135811011015e-05,
      "loss": 2.9354,
      "step": 202174
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9187240600585938,
      "learning_rate": 2.1948599936645163e-05,
      "loss": 2.8969,
      "step": 202175
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9348645210266113,
      "learning_rate": 2.194706411397439e-05,
      "loss": 3.2011,
      "step": 202176
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.508350849151611,
      "learning_rate": 2.194552834299883e-05,
      "loss": 2.7607,
      "step": 202177
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7143514156341553,
      "learning_rate": 2.1943992623718887e-05,
      "loss": 2.8517,
      "step": 202178
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7876574993133545,
      "learning_rate": 2.194245695613479e-05,
      "loss": 2.7909,
      "step": 202179
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9862983226776123,
      "learning_rate": 2.194092134024691e-05,
      "loss": 3.0313,
      "step": 202180
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0093157291412354,
      "learning_rate": 2.1939385776055375e-05,
      "loss": 2.728,
      "step": 202181
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7094407081604004,
      "learning_rate": 2.193785026356062e-05,
      "loss": 2.9427,
      "step": 202182
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.513212203979492,
      "learning_rate": 2.1936314802762777e-05,
      "loss": 2.7458,
      "step": 202183
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1452338695526123,
      "learning_rate": 2.1934779393662318e-05,
      "loss": 2.8555,
      "step": 202184
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.621232032775879,
      "learning_rate": 2.1933244036259335e-05,
      "loss": 2.8454,
      "step": 202185
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6151413917541504,
      "learning_rate": 2.1931708730554364e-05,
      "loss": 2.9483,
      "step": 202186
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1647164821624756,
      "learning_rate": 2.193017347654741e-05,
      "loss": 2.8097,
      "step": 202187
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.7775959968566895,
      "learning_rate": 2.192863827423893e-05,
      "loss": 3.0156,
      "step": 202188
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.004683017730713,
      "learning_rate": 2.192710312362913e-05,
      "loss": 2.6701,
      "step": 202189
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.959196090698242,
      "learning_rate": 2.1925568024718344e-05,
      "loss": 2.8549,
      "step": 202190
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.4739983081817627,
      "learning_rate": 2.1924032977506832e-05,
      "loss": 2.8296,
      "step": 202191
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0408942699432373,
      "learning_rate": 2.192249798199497e-05,
      "loss": 3.031,
      "step": 202192
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6950314044952393,
      "learning_rate": 2.192096303818288e-05,
      "loss": 2.8428,
      "step": 202193
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.8749101161956787,
      "learning_rate": 2.1919428146070973e-05,
      "loss": 3.0236,
      "step": 202194
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.6917409896850586,
      "learning_rate": 2.1917893305659472e-05,
      "loss": 3.1426,
      "step": 202195
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.6215567588806152,
      "learning_rate": 2.1916358516948684e-05,
      "loss": 2.9464,
      "step": 202196
    },
    {
      "epoch": 2.63,
      "grad_norm": 5.24146842956543,
      "learning_rate": 2.1914823779938873e-05,
      "loss": 2.6017,
      "step": 202197
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.730201005935669,
      "learning_rate": 2.191328909463047e-05,
      "loss": 3.0603,
      "step": 202198
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.4508790969848633,
      "learning_rate": 2.191175446102348e-05,
      "loss": 2.8551,
      "step": 202199
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.4026846885681152,
      "learning_rate": 2.1910219879118428e-05,
      "loss": 3.0176,
      "step": 202200
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.065735101699829,
      "learning_rate": 2.1908685348915454e-05,
      "loss": 2.9796,
      "step": 202201
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.7624218463897705,
      "learning_rate": 2.1907150870414993e-05,
      "loss": 2.9729,
      "step": 202202
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9746148586273193,
      "learning_rate": 2.1905616443617134e-05,
      "loss": 3.0219,
      "step": 202203
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1482012271881104,
      "learning_rate": 2.1904082068522387e-05,
      "loss": 2.9891,
      "step": 202204
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.021580696105957,
      "learning_rate": 2.190254774513085e-05,
      "loss": 2.9844,
      "step": 202205
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9683070182800293,
      "learning_rate": 2.1901013473442918e-05,
      "loss": 2.8169,
      "step": 202206
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.530653238296509,
      "learning_rate": 2.189947925345876e-05,
      "loss": 3.0165,
      "step": 202207
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.5520288944244385,
      "learning_rate": 2.1897945085178815e-05,
      "loss": 3.1048,
      "step": 202208
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2305476665496826,
      "learning_rate": 2.189641096860324e-05,
      "loss": 2.6745,
      "step": 202209
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.735452651977539,
      "learning_rate": 2.1894876903732473e-05,
      "loss": 3.0121,
      "step": 202210
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7807161808013916,
      "learning_rate": 2.189334289056658e-05,
      "loss": 2.8858,
      "step": 202211
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.5904786586761475,
      "learning_rate": 2.1891808929106026e-05,
      "loss": 2.9417,
      "step": 202212
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0410044193267822,
      "learning_rate": 2.1890275019350977e-05,
      "loss": 2.7378,
      "step": 202213
    },
    {
      "epoch": 2.63,
      "grad_norm": 5.345383644104004,
      "learning_rate": 2.1888741161301838e-05,
      "loss": 3.0988,
      "step": 202214
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.017027854919434,
      "learning_rate": 2.188720735495877e-05,
      "loss": 3.0524,
      "step": 202215
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.281674861907959,
      "learning_rate": 2.1885673600322207e-05,
      "loss": 3.0056,
      "step": 202216
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.985193967819214,
      "learning_rate": 2.1884139897392284e-05,
      "loss": 2.6877,
      "step": 202217
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8124985694885254,
      "learning_rate": 2.1882606246169366e-05,
      "loss": 3.0779,
      "step": 202218
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.825396776199341,
      "learning_rate": 2.1881072646653686e-05,
      "loss": 2.8821,
      "step": 202219
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7888550758361816,
      "learning_rate": 2.187953909884561e-05,
      "loss": 2.9616,
      "step": 202220
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.4738688468933105,
      "learning_rate": 2.1878005602745342e-05,
      "loss": 2.6759,
      "step": 202221
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.5172994136810303,
      "learning_rate": 2.1876472158353243e-05,
      "loss": 2.9422,
      "step": 202222
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.969393253326416,
      "learning_rate": 2.1874938765669548e-05,
      "loss": 3.0225,
      "step": 202223
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.303886890411377,
      "learning_rate": 2.1873405424694558e-05,
      "loss": 2.821,
      "step": 202224
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.013854503631592,
      "learning_rate": 2.1871872135428503e-05,
      "loss": 2.9126,
      "step": 202225
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0102803707122803,
      "learning_rate": 2.1870338897871787e-05,
      "loss": 2.8281,
      "step": 202226
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2420759201049805,
      "learning_rate": 2.186880571202454e-05,
      "loss": 3.1041,
      "step": 202227
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.587265729904175,
      "learning_rate": 2.1867272577887195e-05,
      "loss": 2.9395,
      "step": 202228
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2295563220977783,
      "learning_rate": 2.186573949545999e-05,
      "loss": 2.7063,
      "step": 202229
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6779110431671143,
      "learning_rate": 2.186420646474315e-05,
      "loss": 3.0066,
      "step": 202230
    },
    {
      "epoch": 2.63,
      "grad_norm": 5.323492527008057,
      "learning_rate": 2.1862673485737013e-05,
      "loss": 2.7745,
      "step": 202231
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.130479335784912,
      "learning_rate": 2.1861140558441882e-05,
      "loss": 2.9133,
      "step": 202232
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.993586301803589,
      "learning_rate": 2.185960768285798e-05,
      "loss": 3.0779,
      "step": 202233
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.9205446243286133,
      "learning_rate": 2.185807485898565e-05,
      "loss": 3.0476,
      "step": 202234
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.4366183280944824,
      "learning_rate": 2.1856542086825158e-05,
      "loss": 2.8294,
      "step": 202235
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9240283966064453,
      "learning_rate": 2.185500936637673e-05,
      "loss": 3.0416,
      "step": 202236
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.341963291168213,
      "learning_rate": 2.185347669764077e-05,
      "loss": 2.9022,
      "step": 202237
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.974379777908325,
      "learning_rate": 2.1851944080617513e-05,
      "loss": 2.8838,
      "step": 202238
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.707841634750366,
      "learning_rate": 2.1850411515307154e-05,
      "loss": 2.9779,
      "step": 202239
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.4823107719421387,
      "learning_rate": 2.184887900171013e-05,
      "loss": 3.0092,
      "step": 202240
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.732119560241699,
      "learning_rate": 2.184734653982664e-05,
      "loss": 2.9549,
      "step": 202241
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.766958475112915,
      "learning_rate": 2.1845814129656914e-05,
      "loss": 2.9191,
      "step": 202242
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.207908630371094,
      "learning_rate": 2.1844281771201355e-05,
      "loss": 3.0168,
      "step": 202243
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0199506282806396,
      "learning_rate": 2.184274946446013e-05,
      "loss": 2.8561,
      "step": 202244
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.967945098876953,
      "learning_rate": 2.184121720943367e-05,
      "loss": 3.0473,
      "step": 202245
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.4964168071746826,
      "learning_rate": 2.1839685006122177e-05,
      "loss": 3.1727,
      "step": 202246
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2859246730804443,
      "learning_rate": 2.1838152854525915e-05,
      "loss": 2.6975,
      "step": 202247
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7237796783447266,
      "learning_rate": 2.183662075464515e-05,
      "loss": 2.6823,
      "step": 202248
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9052700996398926,
      "learning_rate": 2.183508870648025e-05,
      "loss": 2.862,
      "step": 202249
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9057412147521973,
      "learning_rate": 2.183355671003142e-05,
      "loss": 2.8947,
      "step": 202250
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.880136013031006,
      "learning_rate": 2.183202476529905e-05,
      "loss": 2.9888,
      "step": 202251
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.099888324737549,
      "learning_rate": 2.1830492872283344e-05,
      "loss": 2.9636,
      "step": 202252
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.5673727989196777,
      "learning_rate": 2.182896103098457e-05,
      "loss": 2.9364,
      "step": 202253
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6159958839416504,
      "learning_rate": 2.1827429241403028e-05,
      "loss": 2.9772,
      "step": 202254
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1002700328826904,
      "learning_rate": 2.1825897503539046e-05,
      "loss": 2.7281,
      "step": 202255
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.470649242401123,
      "learning_rate": 2.1824365817392864e-05,
      "loss": 3.0884,
      "step": 202256
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.945504903793335,
      "learning_rate": 2.182283418296481e-05,
      "loss": 3.1922,
      "step": 202257
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.141811847686768,
      "learning_rate": 2.1821302600255085e-05,
      "loss": 3.0554,
      "step": 202258
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9971776008605957,
      "learning_rate": 2.181977106926416e-05,
      "loss": 2.799,
      "step": 202259
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.110999584197998,
      "learning_rate": 2.181823958999206e-05,
      "loss": 2.7312,
      "step": 202260
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9501523971557617,
      "learning_rate": 2.1816708162439255e-05,
      "loss": 2.9946,
      "step": 202261
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.5976369380950928,
      "learning_rate": 2.1815176786605913e-05,
      "loss": 3.08,
      "step": 202262
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.6399059295654297,
      "learning_rate": 2.1813645462492468e-05,
      "loss": 2.8563,
      "step": 202263
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6256442070007324,
      "learning_rate": 2.181211419009905e-05,
      "loss": 2.9786,
      "step": 202264
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0623621940612793,
      "learning_rate": 2.1810582969426093e-05,
      "loss": 3.0171,
      "step": 202265
    },
    {
      "epoch": 2.63,
      "grad_norm": 5.2101545333862305,
      "learning_rate": 2.1809051800473732e-05,
      "loss": 2.8831,
      "step": 202266
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2553837299346924,
      "learning_rate": 2.180752068324233e-05,
      "loss": 2.8917,
      "step": 202267
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8865880966186523,
      "learning_rate": 2.1805989617732154e-05,
      "loss": 3.0238,
      "step": 202268
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.889486789703369,
      "learning_rate": 2.180445860394351e-05,
      "loss": 3.0064,
      "step": 202269
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.92240309715271,
      "learning_rate": 2.1802927641876656e-05,
      "loss": 2.9078,
      "step": 202270
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1244957447052,
      "learning_rate": 2.180139673153196e-05,
      "loss": 2.9665,
      "step": 202271
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.062422037124634,
      "learning_rate": 2.1799865872909528e-05,
      "loss": 3.0363,
      "step": 202272
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7652902603149414,
      "learning_rate": 2.179833506600982e-05,
      "loss": 2.8077,
      "step": 202273
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7733120918273926,
      "learning_rate": 2.1796804310833006e-05,
      "loss": 3.0913,
      "step": 202274
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.666017770767212,
      "learning_rate": 2.1795273607379483e-05,
      "loss": 3.0453,
      "step": 202275
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9264888763427734,
      "learning_rate": 2.1793742955649384e-05,
      "loss": 3.0,
      "step": 202276
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7229065895080566,
      "learning_rate": 2.1792212355643213e-05,
      "loss": 3.1583,
      "step": 202277
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.8034722805023193,
      "learning_rate": 2.1790681807361e-05,
      "loss": 2.8234,
      "step": 202278
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7440192699432373,
      "learning_rate": 2.178915131080321e-05,
      "loss": 3.1918,
      "step": 202279
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1049845218658447,
      "learning_rate": 2.178762086597001e-05,
      "loss": 3.1843,
      "step": 202280
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.508997917175293,
      "learning_rate": 2.178609047286177e-05,
      "loss": 2.8017,
      "step": 202281
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.989208221435547,
      "learning_rate": 2.1784560131478723e-05,
      "loss": 3.0999,
      "step": 202282
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.5250208377838135,
      "learning_rate": 2.1783029841821296e-05,
      "loss": 3.089,
      "step": 202283
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0018718242645264,
      "learning_rate": 2.178149960388953e-05,
      "loss": 2.9929,
      "step": 202284
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1421353816986084,
      "learning_rate": 2.1779969417683886e-05,
      "loss": 2.8953,
      "step": 202285
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2103967666625977,
      "learning_rate": 2.1778439283204563e-05,
      "loss": 2.8254,
      "step": 202286
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0332720279693604,
      "learning_rate": 2.1776909200451932e-05,
      "loss": 3.0669,
      "step": 202287
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.165379762649536,
      "learning_rate": 2.1775379169426153e-05,
      "loss": 2.8758,
      "step": 202288
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1187870502471924,
      "learning_rate": 2.17738491901277e-05,
      "loss": 2.9659,
      "step": 202289
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.096634864807129,
      "learning_rate": 2.1772319262556636e-05,
      "loss": 2.9611,
      "step": 202290
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9226152896881104,
      "learning_rate": 2.1770789386713395e-05,
      "loss": 2.822,
      "step": 202291
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.657474994659424,
      "learning_rate": 2.1769259562598175e-05,
      "loss": 2.9674,
      "step": 202292
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.836833953857422,
      "learning_rate": 2.1767729790211342e-05,
      "loss": 2.8994,
      "step": 202293
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.064999580383301,
      "learning_rate": 2.1766200069553096e-05,
      "loss": 3.1333,
      "step": 202294
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.04174542427063,
      "learning_rate": 2.1764670400623873e-05,
      "loss": 2.9436,
      "step": 202295
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8873820304870605,
      "learning_rate": 2.176314078342374e-05,
      "loss": 2.8587,
      "step": 202296
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.170466184616089,
      "learning_rate": 2.1761611217953123e-05,
      "loss": 2.8185,
      "step": 202297
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.876339912414551,
      "learning_rate": 2.1760081704212262e-05,
      "loss": 2.8288,
      "step": 202298
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8119866847991943,
      "learning_rate": 2.175855224220149e-05,
      "loss": 2.9504,
      "step": 202299
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7617247104644775,
      "learning_rate": 2.1757022831921e-05,
      "loss": 3.1901,
      "step": 202300
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.097612380981445,
      "learning_rate": 2.1755493473371233e-05,
      "loss": 2.8971,
      "step": 202301
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.842636823654175,
      "learning_rate": 2.175396416655225e-05,
      "loss": 2.9108,
      "step": 202302
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.542714834213257,
      "learning_rate": 2.1752434911464556e-05,
      "loss": 2.8356,
      "step": 202303
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.3426926136016846,
      "learning_rate": 2.1750905708108247e-05,
      "loss": 2.9613,
      "step": 202304
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.667551279067993,
      "learning_rate": 2.1749376556483756e-05,
      "loss": 3.0281,
      "step": 202305
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9227426052093506,
      "learning_rate": 2.1747847456591285e-05,
      "loss": 2.8482,
      "step": 202306
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7045857906341553,
      "learning_rate": 2.1746318408431164e-05,
      "loss": 2.7819,
      "step": 202307
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9607503414154053,
      "learning_rate": 2.174478941200366e-05,
      "loss": 2.8427,
      "step": 202308
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.859307289123535,
      "learning_rate": 2.1743260467309042e-05,
      "loss": 2.9546,
      "step": 202309
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8458149433135986,
      "learning_rate": 2.1741731574347577e-05,
      "loss": 2.9355,
      "step": 202310
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1451609134674072,
      "learning_rate": 2.1740202733119594e-05,
      "loss": 2.9793,
      "step": 202311
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.115119457244873,
      "learning_rate": 2.1738673943625327e-05,
      "loss": 2.9239,
      "step": 202312
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6494741439819336,
      "learning_rate": 2.1737145205865146e-05,
      "loss": 2.729,
      "step": 202313
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.707517385482788,
      "learning_rate": 2.1735616519839284e-05,
      "loss": 3.0881,
      "step": 202314
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8595337867736816,
      "learning_rate": 2.1734087885548034e-05,
      "loss": 2.8768,
      "step": 202315
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.136657238006592,
      "learning_rate": 2.1732559302991604e-05,
      "loss": 3.1063,
      "step": 202316
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.688699722290039,
      "learning_rate": 2.1731030772170388e-05,
      "loss": 2.8184,
      "step": 202317
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.334773540496826,
      "learning_rate": 2.172950229308459e-05,
      "loss": 2.7527,
      "step": 202318
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.2342047691345215,
      "learning_rate": 2.1727973865734573e-05,
      "loss": 2.9298,
      "step": 202319
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.27868914604187,
      "learning_rate": 2.1726445490120604e-05,
      "loss": 2.7341,
      "step": 202320
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.9577016830444336,
      "learning_rate": 2.172491716624285e-05,
      "loss": 3.1354,
      "step": 202321
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.910360336303711,
      "learning_rate": 2.1723388894101745e-05,
      "loss": 3.0937,
      "step": 202322
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.0241546630859375,
      "learning_rate": 2.172186067369752e-05,
      "loss": 2.8578,
      "step": 202323
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.692650079727173,
      "learning_rate": 2.1720332505030412e-05,
      "loss": 2.9934,
      "step": 202324
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.036747455596924,
      "learning_rate": 2.171880438810082e-05,
      "loss": 2.8403,
      "step": 202325
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.675320625305176,
      "learning_rate": 2.1717276322908906e-05,
      "loss": 3.3074,
      "step": 202326
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.394556999206543,
      "learning_rate": 2.1715748309455004e-05,
      "loss": 3.167,
      "step": 202327
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.681432008743286,
      "learning_rate": 2.171422034773942e-05,
      "loss": 3.0448,
      "step": 202328
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.657907485961914,
      "learning_rate": 2.1712692437762346e-05,
      "loss": 3.0786,
      "step": 202329
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9691317081451416,
      "learning_rate": 2.171116457952422e-05,
      "loss": 3.0563,
      "step": 202330
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.6486129760742188,
      "learning_rate": 2.1709636773025207e-05,
      "loss": 2.856,
      "step": 202331
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.619901180267334,
      "learning_rate": 2.1708109018265674e-05,
      "loss": 2.9595,
      "step": 202332
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2474381923675537,
      "learning_rate": 2.1706581315245752e-05,
      "loss": 2.9799,
      "step": 202333
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.3638901710510254,
      "learning_rate": 2.170505366396591e-05,
      "loss": 3.0103,
      "step": 202334
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0417702198028564,
      "learning_rate": 2.1703526064426313e-05,
      "loss": 2.8944,
      "step": 202335
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9173429012298584,
      "learning_rate": 2.1701998516627327e-05,
      "loss": 2.7579,
      "step": 202336
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8477234840393066,
      "learning_rate": 2.170047102056919e-05,
      "loss": 3.0772,
      "step": 202337
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.6073906421661377,
      "learning_rate": 2.1698943576252158e-05,
      "loss": 2.8683,
      "step": 202338
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.963433265686035,
      "learning_rate": 2.169741618367654e-05,
      "loss": 3.0255,
      "step": 202339
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.27869987487793,
      "learning_rate": 2.1695888842842635e-05,
      "loss": 3.0055,
      "step": 202340
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.350273609161377,
      "learning_rate": 2.1694361553750707e-05,
      "loss": 3.1534,
      "step": 202341
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0402512550354004,
      "learning_rate": 2.1692834316401087e-05,
      "loss": 2.8653,
      "step": 202342
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8040096759796143,
      "learning_rate": 2.1691307130793945e-05,
      "loss": 2.923,
      "step": 202343
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.383333444595337,
      "learning_rate": 2.168977999692978e-05,
      "loss": 3.0952,
      "step": 202344
    },
    {
      "epoch": 2.63,
      "grad_norm": 4.132096767425537,
      "learning_rate": 2.1688252914808623e-05,
      "loss": 2.8689,
      "step": 202345
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7962305545806885,
      "learning_rate": 2.168672588443091e-05,
      "loss": 3.1028,
      "step": 202346
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0980136394500732,
      "learning_rate": 2.1685198905796876e-05,
      "loss": 2.9862,
      "step": 202347
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.8521881103515625,
      "learning_rate": 2.1683671978906847e-05,
      "loss": 2.8181,
      "step": 202348
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.7214434146881104,
      "learning_rate": 2.1682145103761028e-05,
      "loss": 2.9608,
      "step": 202349
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2106096744537354,
      "learning_rate": 2.1680618280359818e-05,
      "loss": 2.8267,
      "step": 202350
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.9547743797302246,
      "learning_rate": 2.1679091508703383e-05,
      "loss": 3.2218,
      "step": 202351
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.591740131378174,
      "learning_rate": 2.167756478879209e-05,
      "loss": 3.0793,
      "step": 202352
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.033811569213867,
      "learning_rate": 2.167603812062614e-05,
      "loss": 3.0192,
      "step": 202353
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1967313289642334,
      "learning_rate": 2.1674511504205927e-05,
      "loss": 2.7674,
      "step": 202354
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.968811273574829,
      "learning_rate": 2.1672984939531622e-05,
      "loss": 3.0073,
      "step": 202355
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.254408121109009,
      "learning_rate": 2.167145842660366e-05,
      "loss": 2.6997,
      "step": 202356
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0646119117736816,
      "learning_rate": 2.1669931965422104e-05,
      "loss": 2.7957,
      "step": 202357
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.1207919120788574,
      "learning_rate": 2.1668405555987457e-05,
      "loss": 2.6976,
      "step": 202358
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.681689739227295,
      "learning_rate": 2.1666879198299846e-05,
      "loss": 2.9987,
      "step": 202359
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.5828001499176025,
      "learning_rate": 2.1665352892359645e-05,
      "loss": 3.0189,
      "step": 202360
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.176772117614746,
      "learning_rate": 2.1663826638167083e-05,
      "loss": 2.7702,
      "step": 202361
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8373546600341797,
      "learning_rate": 2.1662300435722524e-05,
      "loss": 2.9984,
      "step": 202362
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.4414713382720947,
      "learning_rate": 2.166077428502614e-05,
      "loss": 2.8972,
      "step": 202363
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.752201795578003,
      "learning_rate": 2.1659248186078328e-05,
      "loss": 2.9973,
      "step": 202364
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.0020153522491455,
      "learning_rate": 2.1657722138879252e-05,
      "loss": 2.864,
      "step": 202365
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.28251051902771,
      "learning_rate": 2.1656196143429282e-05,
      "loss": 2.7193,
      "step": 202366
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.2228643894195557,
      "learning_rate": 2.1654670199728654e-05,
      "loss": 3.0492,
      "step": 202367
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.8371002674102783,
      "learning_rate": 2.165314430777779e-05,
      "loss": 3.22,
      "step": 202368
    },
    {
      "epoch": 2.63,
      "grad_norm": 2.5865652561187744,
      "learning_rate": 2.1651618467576736e-05,
      "loss": 2.8731,
      "step": 202369
    },
    {
      "epoch": 2.63,
      "grad_norm": 3.6497907638549805,
      "learning_rate": 2.1650092679125985e-05,
      "loss": 3.2422,
      "step": 202370
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9758591651916504,
      "learning_rate": 2.1648566942425637e-05,
      "loss": 2.6744,
      "step": 202371
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.5720322132110596,
      "learning_rate": 2.1647041257476163e-05,
      "loss": 2.8254,
      "step": 202372
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.008615732192993,
      "learning_rate": 2.1645515624277688e-05,
      "loss": 3.0431,
      "step": 202373
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.014953136444092,
      "learning_rate": 2.1643990042830683e-05,
      "loss": 2.8905,
      "step": 202374
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.631932258605957,
      "learning_rate": 2.1642464513135215e-05,
      "loss": 3.029,
      "step": 202375
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.053633689880371,
      "learning_rate": 2.1640939035191717e-05,
      "loss": 2.7841,
      "step": 202376
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8197977542877197,
      "learning_rate": 2.1639413609000355e-05,
      "loss": 2.8594,
      "step": 202377
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.944016456604004,
      "learning_rate": 2.1637888234561528e-05,
      "loss": 2.6708,
      "step": 202378
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.4131693840026855,
      "learning_rate": 2.1636362911875438e-05,
      "loss": 3.1645,
      "step": 202379
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8983962535858154,
      "learning_rate": 2.1634837640942448e-05,
      "loss": 2.8395,
      "step": 202380
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8085134029388428,
      "learning_rate": 2.1633312421762795e-05,
      "loss": 2.6897,
      "step": 202381
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.894530773162842,
      "learning_rate": 2.1631787254336773e-05,
      "loss": 3.0757,
      "step": 202382
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3435752391815186,
      "learning_rate": 2.163026213866459e-05,
      "loss": 3.1511,
      "step": 202383
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.297091007232666,
      "learning_rate": 2.1628737074746673e-05,
      "loss": 2.8946,
      "step": 202384
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6351823806762695,
      "learning_rate": 2.1627212062583156e-05,
      "loss": 2.7785,
      "step": 202385
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0955681800842285,
      "learning_rate": 2.1625687102174437e-05,
      "loss": 2.9509,
      "step": 202386
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.085735321044922,
      "learning_rate": 2.162416219352079e-05,
      "loss": 2.8382,
      "step": 202387
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.696584463119507,
      "learning_rate": 2.162263733662244e-05,
      "loss": 2.9881,
      "step": 202388
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.883949041366577,
      "learning_rate": 2.1621112531479657e-05,
      "loss": 3.0745,
      "step": 202389
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.834773302078247,
      "learning_rate": 2.1619587778092807e-05,
      "loss": 2.9803,
      "step": 202390
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.879650115966797,
      "learning_rate": 2.161806307646209e-05,
      "loss": 2.9368,
      "step": 202391
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.073188543319702,
      "learning_rate": 2.1616538426587872e-05,
      "loss": 2.8824,
      "step": 202392
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.210137844085693,
      "learning_rate": 2.1615013828470384e-05,
      "loss": 2.928,
      "step": 202393
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8425450325012207,
      "learning_rate": 2.161348928210993e-05,
      "loss": 3.0073,
      "step": 202394
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.65893292427063,
      "learning_rate": 2.1611964787506742e-05,
      "loss": 2.8715,
      "step": 202395
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.782177448272705,
      "learning_rate": 2.1610440344661184e-05,
      "loss": 2.7175,
      "step": 202396
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.697246551513672,
      "learning_rate": 2.160891595357346e-05,
      "loss": 2.9305,
      "step": 202397
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0284810066223145,
      "learning_rate": 2.160739161424393e-05,
      "loss": 2.9882,
      "step": 202398
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8372156620025635,
      "learning_rate": 2.160586732667283e-05,
      "loss": 2.8642,
      "step": 202399
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1921186447143555,
      "learning_rate": 2.16043430908605e-05,
      "loss": 2.8931,
      "step": 202400
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.3188323974609375,
      "learning_rate": 2.1602818906807095e-05,
      "loss": 2.698,
      "step": 202401
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.044778347015381,
      "learning_rate": 2.160129477451302e-05,
      "loss": 2.7857,
      "step": 202402
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.5629353523254395,
      "learning_rate": 2.159977069397848e-05,
      "loss": 2.8537,
      "step": 202403
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9375739097595215,
      "learning_rate": 2.1598246665203866e-05,
      "loss": 2.7349,
      "step": 202404
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6446034908294678,
      "learning_rate": 2.1596722688189383e-05,
      "loss": 2.9058,
      "step": 202405
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.956735134124756,
      "learning_rate": 2.159519876293526e-05,
      "loss": 2.7922,
      "step": 202406
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.4210543632507324,
      "learning_rate": 2.1593674889441903e-05,
      "loss": 3.1832,
      "step": 202407
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.242925643920898,
      "learning_rate": 2.159215106770954e-05,
      "loss": 2.8291,
      "step": 202408
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.5862090587615967,
      "learning_rate": 2.1590627297738407e-05,
      "loss": 3.0052,
      "step": 202409
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.443824529647827,
      "learning_rate": 2.15891035795289e-05,
      "loss": 2.8197,
      "step": 202410
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.3617753982543945,
      "learning_rate": 2.1587579913081187e-05,
      "loss": 2.9274,
      "step": 202411
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.785752058029175,
      "learning_rate": 2.158605629839557e-05,
      "loss": 3.0687,
      "step": 202412
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3254408836364746,
      "learning_rate": 2.1584532735472414e-05,
      "loss": 2.8964,
      "step": 202413
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.841867446899414,
      "learning_rate": 2.158300922431192e-05,
      "loss": 3.0532,
      "step": 202414
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.051966428756714,
      "learning_rate": 2.158148576491442e-05,
      "loss": 3.1805,
      "step": 202415
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8386940956115723,
      "learning_rate": 2.1579962357280177e-05,
      "loss": 3.0488,
      "step": 202416
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.7492780685424805,
      "learning_rate": 2.1578439001409497e-05,
      "loss": 2.9261,
      "step": 202417
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.745605230331421,
      "learning_rate": 2.157691569730258e-05,
      "loss": 2.894,
      "step": 202418
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7835657596588135,
      "learning_rate": 2.1575392444959816e-05,
      "loss": 2.7781,
      "step": 202419
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.514347791671753,
      "learning_rate": 2.157386924438138e-05,
      "loss": 3.0416,
      "step": 202420
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.5297513008117676,
      "learning_rate": 2.157234609556767e-05,
      "loss": 2.9327,
      "step": 202421
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3468434810638428,
      "learning_rate": 2.1570822998518956e-05,
      "loss": 2.8489,
      "step": 202422
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.67881441116333,
      "learning_rate": 2.156929995323543e-05,
      "loss": 3.001,
      "step": 202423
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.588066339492798,
      "learning_rate": 2.1567776959717397e-05,
      "loss": 2.8532,
      "step": 202424
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.700704574584961,
      "learning_rate": 2.1566254017965225e-05,
      "loss": 2.8535,
      "step": 202425
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.005380868911743,
      "learning_rate": 2.1564731127979108e-05,
      "loss": 2.88,
      "step": 202426
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.195880651473999,
      "learning_rate": 2.1563208289759382e-05,
      "loss": 2.8447,
      "step": 202427
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.755810022354126,
      "learning_rate": 2.1561685503306282e-05,
      "loss": 3.0703,
      "step": 202428
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.481984853744507,
      "learning_rate": 2.1560162768620204e-05,
      "loss": 3.0135,
      "step": 202429
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7621264457702637,
      "learning_rate": 2.1558640085701252e-05,
      "loss": 2.9215,
      "step": 202430
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.876891851425171,
      "learning_rate": 2.1557117454549888e-05,
      "loss": 3.0446,
      "step": 202431
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7531607151031494,
      "learning_rate": 2.1555594875166216e-05,
      "loss": 2.9807,
      "step": 202432
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.829360008239746,
      "learning_rate": 2.15540723475507e-05,
      "loss": 3.0349,
      "step": 202433
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.816714286804199,
      "learning_rate": 2.1552549871703474e-05,
      "loss": 2.877,
      "step": 202434
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8426859378814697,
      "learning_rate": 2.1551027447624967e-05,
      "loss": 2.815,
      "step": 202435
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8041365146636963,
      "learning_rate": 2.1549505075315288e-05,
      "loss": 3.0054,
      "step": 202436
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7523295879364014,
      "learning_rate": 2.1547982754774862e-05,
      "loss": 3.0719,
      "step": 202437
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.21389102935791,
      "learning_rate": 2.154646048600389e-05,
      "loss": 2.8104,
      "step": 202438
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2469422817230225,
      "learning_rate": 2.1544938269002744e-05,
      "loss": 2.7711,
      "step": 202439
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.600497007369995,
      "learning_rate": 2.1543416103771583e-05,
      "loss": 3.2461,
      "step": 202440
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7265679836273193,
      "learning_rate": 2.1541893990310842e-05,
      "loss": 3.0257,
      "step": 202441
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7979226112365723,
      "learning_rate": 2.1540371928620626e-05,
      "loss": 2.931,
      "step": 202442
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8051998615264893,
      "learning_rate": 2.1538849918701396e-05,
      "loss": 2.979,
      "step": 202443
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.412013053894043,
      "learning_rate": 2.1537327960553253e-05,
      "loss": 2.9531,
      "step": 202444
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.028032064437866,
      "learning_rate": 2.1535806054176662e-05,
      "loss": 2.6928,
      "step": 202445
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.937053680419922,
      "learning_rate": 2.153428419957176e-05,
      "loss": 3.2201,
      "step": 202446
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7321207523345947,
      "learning_rate": 2.153276239673891e-05,
      "loss": 2.9746,
      "step": 202447
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.877384901046753,
      "learning_rate": 2.1531240645678417e-05,
      "loss": 2.8807,
      "step": 202448
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.572361946105957,
      "learning_rate": 2.1529718946390506e-05,
      "loss": 2.8894,
      "step": 202449
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.256481885910034,
      "learning_rate": 2.152819729887545e-05,
      "loss": 2.8973,
      "step": 202450
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7362771034240723,
      "learning_rate": 2.152667570313358e-05,
      "loss": 3.0829,
      "step": 202451
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.00040864944458,
      "learning_rate": 2.1525154159165127e-05,
      "loss": 2.8219,
      "step": 202452
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7552311420440674,
      "learning_rate": 2.1523632666970425e-05,
      "loss": 3.123,
      "step": 202453
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.398310422897339,
      "learning_rate": 2.1522111226549745e-05,
      "loss": 2.89,
      "step": 202454
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.077277421951294,
      "learning_rate": 2.152058983790338e-05,
      "loss": 2.9493,
      "step": 202455
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.502406597137451,
      "learning_rate": 2.1519068501031533e-05,
      "loss": 2.7552,
      "step": 202456
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.142808198928833,
      "learning_rate": 2.1517547215934604e-05,
      "loss": 3.038,
      "step": 202457
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7776856422424316,
      "learning_rate": 2.151602598261276e-05,
      "loss": 2.8919,
      "step": 202458
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9214839935302734,
      "learning_rate": 2.15145048010664e-05,
      "loss": 2.7866,
      "step": 202459
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.16198468208313,
      "learning_rate": 2.1512983671295758e-05,
      "loss": 3.0884,
      "step": 202460
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9062840938568115,
      "learning_rate": 2.15114625933011e-05,
      "loss": 2.8282,
      "step": 202461
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0295181274414062,
      "learning_rate": 2.1509941567082688e-05,
      "loss": 2.8315,
      "step": 202462
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8367576599121094,
      "learning_rate": 2.1508420592640863e-05,
      "loss": 3.0376,
      "step": 202463
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.300689697265625,
      "learning_rate": 2.150689966997582e-05,
      "loss": 3.1815,
      "step": 202464
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8288326263427734,
      "learning_rate": 2.1505378799087957e-05,
      "loss": 2.6388,
      "step": 202465
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.812748908996582,
      "learning_rate": 2.150385797997751e-05,
      "loss": 2.9944,
      "step": 202466
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9160850048065186,
      "learning_rate": 2.150233721264475e-05,
      "loss": 2.9467,
      "step": 202467
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.295670986175537,
      "learning_rate": 2.1500816497089935e-05,
      "loss": 2.6339,
      "step": 202468
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1224732398986816,
      "learning_rate": 2.14992958333134e-05,
      "loss": 2.8544,
      "step": 202469
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.728161334991455,
      "learning_rate": 2.149777522131535e-05,
      "loss": 2.7882,
      "step": 202470
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.167912006378174,
      "learning_rate": 2.1496254661096178e-05,
      "loss": 3.0053,
      "step": 202471
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.4220006465911865,
      "learning_rate": 2.1494734152656122e-05,
      "loss": 3.0405,
      "step": 202472
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6970930099487305,
      "learning_rate": 2.1493213695995414e-05,
      "loss": 2.851,
      "step": 202473
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1484835147857666,
      "learning_rate": 2.1491693291114352e-05,
      "loss": 2.8572,
      "step": 202474
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.612730026245117,
      "learning_rate": 2.1490172938013305e-05,
      "loss": 2.9898,
      "step": 202475
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0275633335113525,
      "learning_rate": 2.148865263669244e-05,
      "loss": 2.9523,
      "step": 202476
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.980149269104004,
      "learning_rate": 2.1487132387152116e-05,
      "loss": 2.7953,
      "step": 202477
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1399710178375244,
      "learning_rate": 2.148561218939261e-05,
      "loss": 2.8547,
      "step": 202478
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.731663227081299,
      "learning_rate": 2.148409204341418e-05,
      "loss": 2.8053,
      "step": 202479
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.791881561279297,
      "learning_rate": 2.1482571949217065e-05,
      "loss": 2.9004,
      "step": 202480
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7218105792999268,
      "learning_rate": 2.1481051906801628e-05,
      "loss": 3.1108,
      "step": 202481
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0203232765197754,
      "learning_rate": 2.1479531916168103e-05,
      "loss": 2.877,
      "step": 202482
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.836925983428955,
      "learning_rate": 2.1478011977316823e-05,
      "loss": 2.8517,
      "step": 202483
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.723353624343872,
      "learning_rate": 2.1476492090248055e-05,
      "loss": 3.1901,
      "step": 202484
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.4474222660064697,
      "learning_rate": 2.147497225496203e-05,
      "loss": 3.1356,
      "step": 202485
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1383447647094727,
      "learning_rate": 2.1473452471459052e-05,
      "loss": 2.873,
      "step": 202486
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.5296237468719482,
      "learning_rate": 2.147193273973945e-05,
      "loss": 2.6991,
      "step": 202487
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3376705646514893,
      "learning_rate": 2.1470413059803425e-05,
      "loss": 2.8952,
      "step": 202488
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.595754623413086,
      "learning_rate": 2.1468893431651345e-05,
      "loss": 2.7928,
      "step": 202489
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0241634845733643,
      "learning_rate": 2.1467373855283475e-05,
      "loss": 2.782,
      "step": 202490
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.8569815158843994,
      "learning_rate": 2.1465854330700018e-05,
      "loss": 2.7366,
      "step": 202491
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6388368606567383,
      "learning_rate": 2.1464334857901366e-05,
      "loss": 2.9019,
      "step": 202492
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.5617892742156982,
      "learning_rate": 2.1462815436887758e-05,
      "loss": 2.739,
      "step": 202493
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.428093671798706,
      "learning_rate": 2.146129606765943e-05,
      "loss": 2.8203,
      "step": 202494
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.58056902885437,
      "learning_rate": 2.145977675021674e-05,
      "loss": 2.7871,
      "step": 202495
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8553719520568848,
      "learning_rate": 2.1458257484559926e-05,
      "loss": 2.6632,
      "step": 202496
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.760636329650879,
      "learning_rate": 2.1456738270689254e-05,
      "loss": 2.9323,
      "step": 202497
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6729445457458496,
      "learning_rate": 2.145521910860506e-05,
      "loss": 2.8989,
      "step": 202498
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.200782537460327,
      "learning_rate": 2.1453699998307605e-05,
      "loss": 3.0345,
      "step": 202499
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.918523073196411,
      "learning_rate": 2.1452180939797126e-05,
      "loss": 2.9201,
      "step": 202500
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9206714630126953,
      "learning_rate": 2.1450661933073986e-05,
      "loss": 3.0933,
      "step": 202501
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.555898427963257,
      "learning_rate": 2.1449142978138424e-05,
      "loss": 3.0458,
      "step": 202502
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.975111722946167,
      "learning_rate": 2.14476240749907e-05,
      "loss": 3.0092,
      "step": 202503
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1570043563842773,
      "learning_rate": 2.144610522363115e-05,
      "loss": 3.1489,
      "step": 202504
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1331071853637695,
      "learning_rate": 2.1444586424059974e-05,
      "loss": 2.8826,
      "step": 202505
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.531715393066406,
      "learning_rate": 2.144306767627757e-05,
      "loss": 2.975,
      "step": 202506
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.5300066471099854,
      "learning_rate": 2.144154898028414e-05,
      "loss": 3.0978,
      "step": 202507
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3065154552459717,
      "learning_rate": 2.1440030336079983e-05,
      "loss": 2.9755,
      "step": 202508
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.4475693702697754,
      "learning_rate": 2.143851174366533e-05,
      "loss": 3.0793,
      "step": 202509
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8642232418060303,
      "learning_rate": 2.1436993203040586e-05,
      "loss": 2.957,
      "step": 202510
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.562204599380493,
      "learning_rate": 2.1435474714205913e-05,
      "loss": 2.9511,
      "step": 202511
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.7660229206085205,
      "learning_rate": 2.143395627716168e-05,
      "loss": 2.973,
      "step": 202512
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.619654417037964,
      "learning_rate": 2.143243789190808e-05,
      "loss": 2.625,
      "step": 202513
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.4870052337646484,
      "learning_rate": 2.1430919558445526e-05,
      "loss": 3.0087,
      "step": 202514
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2572407722473145,
      "learning_rate": 2.1429401276774204e-05,
      "loss": 2.5335,
      "step": 202515
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6189022064208984,
      "learning_rate": 2.142788304689439e-05,
      "loss": 3.1629,
      "step": 202516
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9692623615264893,
      "learning_rate": 2.1426364868806377e-05,
      "loss": 2.9153,
      "step": 202517
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0697314739227295,
      "learning_rate": 2.1424846742510506e-05,
      "loss": 2.9942,
      "step": 202518
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0238101482391357,
      "learning_rate": 2.1423328668006935e-05,
      "loss": 3.2612,
      "step": 202519
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.5320723056793213,
      "learning_rate": 2.14218106452961e-05,
      "loss": 2.574,
      "step": 202520
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0670785903930664,
      "learning_rate": 2.1420292674378204e-05,
      "loss": 2.8483,
      "step": 202521
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.020242691040039,
      "learning_rate": 2.1418774755253544e-05,
      "loss": 2.8195,
      "step": 202522
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.087625980377197,
      "learning_rate": 2.141725688792235e-05,
      "loss": 2.9205,
      "step": 202523
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1401100158691406,
      "learning_rate": 2.1415739072384965e-05,
      "loss": 3.0245,
      "step": 202524
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.054558038711548,
      "learning_rate": 2.1414221308641645e-05,
      "loss": 2.9471,
      "step": 202525
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.276871681213379,
      "learning_rate": 2.1412703596692692e-05,
      "loss": 2.9391,
      "step": 202526
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.818162679672241,
      "learning_rate": 2.1411185936538378e-05,
      "loss": 2.7922,
      "step": 202527
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.089096784591675,
      "learning_rate": 2.1409668328178997e-05,
      "loss": 2.8388,
      "step": 202528
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.4228692054748535,
      "learning_rate": 2.1408150771614786e-05,
      "loss": 2.6626,
      "step": 202529
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2622532844543457,
      "learning_rate": 2.1406633266846074e-05,
      "loss": 2.8471,
      "step": 202530
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.4544119834899902,
      "learning_rate": 2.14051158138731e-05,
      "loss": 3.026,
      "step": 202531
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.210902214050293,
      "learning_rate": 2.140359841269622e-05,
      "loss": 2.8049,
      "step": 202532
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1011929512023926,
      "learning_rate": 2.1402081063315646e-05,
      "loss": 2.7781,
      "step": 202533
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8846027851104736,
      "learning_rate": 2.1400563765731705e-05,
      "loss": 2.9689,
      "step": 202534
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.862952709197998,
      "learning_rate": 2.1399046519944597e-05,
      "loss": 2.9095,
      "step": 202535
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.4478225708007812,
      "learning_rate": 2.139752932595472e-05,
      "loss": 2.9029,
      "step": 202536
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1999869346618652,
      "learning_rate": 2.139601218376228e-05,
      "loss": 2.8943,
      "step": 202537
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.854379653930664,
      "learning_rate": 2.1394495093367604e-05,
      "loss": 3.0629,
      "step": 202538
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.848750114440918,
      "learning_rate": 2.139297805477096e-05,
      "loss": 2.8034,
      "step": 202539
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8467745780944824,
      "learning_rate": 2.1391461067972615e-05,
      "loss": 2.9323,
      "step": 202540
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.5190589427948,
      "learning_rate": 2.1389944132972802e-05,
      "loss": 2.9904,
      "step": 202541
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7172505855560303,
      "learning_rate": 2.138842724977189e-05,
      "loss": 2.9482,
      "step": 202542
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.894763469696045,
      "learning_rate": 2.1386910418370106e-05,
      "loss": 2.7925,
      "step": 202543
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0636978149414062,
      "learning_rate": 2.138539363876779e-05,
      "loss": 2.967,
      "step": 202544
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9801528453826904,
      "learning_rate": 2.1383876910965203e-05,
      "loss": 2.9229,
      "step": 202545
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.84523868560791,
      "learning_rate": 2.1382360234962614e-05,
      "loss": 2.914,
      "step": 202546
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8954896926879883,
      "learning_rate": 2.1380843610760223e-05,
      "loss": 2.8186,
      "step": 202547
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.449765205383301,
      "learning_rate": 2.1379327038358464e-05,
      "loss": 3.0361,
      "step": 202548
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3852481842041016,
      "learning_rate": 2.13778105177575e-05,
      "loss": 3.1015,
      "step": 202549
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.149745464324951,
      "learning_rate": 2.1376294048957733e-05,
      "loss": 3.2991,
      "step": 202550
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.6578774452209473,
      "learning_rate": 2.1374777631959326e-05,
      "loss": 2.968,
      "step": 202551
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0269014835357666,
      "learning_rate": 2.1373261266762653e-05,
      "loss": 2.8341,
      "step": 202552
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.645505905151367,
      "learning_rate": 2.1371744953367876e-05,
      "loss": 2.8663,
      "step": 202553
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2047669887542725,
      "learning_rate": 2.1370228691775393e-05,
      "loss": 3.0252,
      "step": 202554
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.492962598800659,
      "learning_rate": 2.1368712481985407e-05,
      "loss": 2.8668,
      "step": 202555
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1675736904144287,
      "learning_rate": 2.136719632399828e-05,
      "loss": 3.062,
      "step": 202556
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9681475162506104,
      "learning_rate": 2.136568021781425e-05,
      "loss": 2.8381,
      "step": 202557
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.123518943786621,
      "learning_rate": 2.1364164163433584e-05,
      "loss": 2.9909,
      "step": 202558
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3970282077789307,
      "learning_rate": 2.1362648160856577e-05,
      "loss": 3.1309,
      "step": 202559
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.32283878326416,
      "learning_rate": 2.1361132210083533e-05,
      "loss": 2.8657,
      "step": 202560
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0822083950042725,
      "learning_rate": 2.1359616311114648e-05,
      "loss": 3.1949,
      "step": 202561
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9195001125335693,
      "learning_rate": 2.1358100463950355e-05,
      "loss": 2.901,
      "step": 202562
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.0877685546875,
      "learning_rate": 2.1356584668590825e-05,
      "loss": 2.9517,
      "step": 202563
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.151029348373413,
      "learning_rate": 2.1355068925036354e-05,
      "loss": 2.894,
      "step": 202564
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.034119129180908,
      "learning_rate": 2.1353553233287212e-05,
      "loss": 2.9613,
      "step": 202565
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.272700309753418,
      "learning_rate": 2.1352037593343763e-05,
      "loss": 2.7671,
      "step": 202566
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7790162563323975,
      "learning_rate": 2.135052200520617e-05,
      "loss": 3.0229,
      "step": 202567
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7902379035949707,
      "learning_rate": 2.1349006468874808e-05,
      "loss": 3.101,
      "step": 202568
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.7165029048919678,
      "learning_rate": 2.1347490984349935e-05,
      "loss": 3.0766,
      "step": 202569
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.822480201721191,
      "learning_rate": 2.1345975551631823e-05,
      "loss": 2.8529,
      "step": 202570
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.068572521209717,
      "learning_rate": 2.1344460170720733e-05,
      "loss": 2.9373,
      "step": 202571
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.387751340866089,
      "learning_rate": 2.134294484161697e-05,
      "loss": 2.9309,
      "step": 202572
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.7031736373901367,
      "learning_rate": 2.13414295643208e-05,
      "loss": 3.2003,
      "step": 202573
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.122025489807129,
      "learning_rate": 2.1339914338832553e-05,
      "loss": 2.888,
      "step": 202574
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.788184881210327,
      "learning_rate": 2.1338399165152464e-05,
      "loss": 2.8443,
      "step": 202575
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0653493404388428,
      "learning_rate": 2.13368840432808e-05,
      "loss": 3.0064,
      "step": 202576
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6768269538879395,
      "learning_rate": 2.1335368973217927e-05,
      "loss": 2.9174,
      "step": 202577
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.96630597114563,
      "learning_rate": 2.133385395496404e-05,
      "loss": 2.8269,
      "step": 202578
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8232152462005615,
      "learning_rate": 2.1332338988519416e-05,
      "loss": 3.3337,
      "step": 202579
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.584571361541748,
      "learning_rate": 2.1330824073884412e-05,
      "loss": 2.7822,
      "step": 202580
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.776024103164673,
      "learning_rate": 2.1329309211059232e-05,
      "loss": 2.8597,
      "step": 202581
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.5766379833221436,
      "learning_rate": 2.1327794400044208e-05,
      "loss": 2.7311,
      "step": 202582
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.078206777572632,
      "learning_rate": 2.1326279640839638e-05,
      "loss": 3.0713,
      "step": 202583
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8677847385406494,
      "learning_rate": 2.1324764933445793e-05,
      "loss": 2.9361,
      "step": 202584
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.916865110397339,
      "learning_rate": 2.1323250277862836e-05,
      "loss": 3.106,
      "step": 202585
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8371052742004395,
      "learning_rate": 2.1321735674091234e-05,
      "loss": 3.1074,
      "step": 202586
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.5483829975128174,
      "learning_rate": 2.132022112213112e-05,
      "loss": 2.8221,
      "step": 202587
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8126907348632812,
      "learning_rate": 2.1318706621982896e-05,
      "loss": 3.0174,
      "step": 202588
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.102719306945801,
      "learning_rate": 2.1317192173646756e-05,
      "loss": 2.9677,
      "step": 202589
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.712885618209839,
      "learning_rate": 2.1315677777122975e-05,
      "loss": 2.6674,
      "step": 202590
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.305541515350342,
      "learning_rate": 2.1314163432411945e-05,
      "loss": 3.1624,
      "step": 202591
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.934577703475952,
      "learning_rate": 2.131264913951384e-05,
      "loss": 3.0676,
      "step": 202592
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6889007091522217,
      "learning_rate": 2.131113489842895e-05,
      "loss": 2.9049,
      "step": 202593
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.465633869171143,
      "learning_rate": 2.1309620709157615e-05,
      "loss": 2.7669,
      "step": 202594
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.360736608505249,
      "learning_rate": 2.1308106571700066e-05,
      "loss": 3.0633,
      "step": 202595
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8358352184295654,
      "learning_rate": 2.1306592486056574e-05,
      "loss": 2.8768,
      "step": 202596
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7302420139312744,
      "learning_rate": 2.13050784522275e-05,
      "loss": 3.1973,
      "step": 202597
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6454272270202637,
      "learning_rate": 2.1303564470213073e-05,
      "loss": 2.9529,
      "step": 202598
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.5017168521881104,
      "learning_rate": 2.1302050540013503e-05,
      "loss": 3.0339,
      "step": 202599
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.453965663909912,
      "learning_rate": 2.1300536661629218e-05,
      "loss": 2.9164,
      "step": 202600
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.120061159133911,
      "learning_rate": 2.1299022835060388e-05,
      "loss": 2.7441,
      "step": 202601
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0914769172668457,
      "learning_rate": 2.1297509060307304e-05,
      "loss": 2.6972,
      "step": 202602
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.804081916809082,
      "learning_rate": 2.1295995337370343e-05,
      "loss": 2.8965,
      "step": 202603
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.7464215755462646,
      "learning_rate": 2.129448166624963e-05,
      "loss": 2.9368,
      "step": 202604
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3647053241729736,
      "learning_rate": 2.1292968046945602e-05,
      "loss": 2.731,
      "step": 202605
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.391883373260498,
      "learning_rate": 2.1291454479458492e-05,
      "loss": 2.9193,
      "step": 202606
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.314692497253418,
      "learning_rate": 2.128994096378853e-05,
      "loss": 2.9484,
      "step": 202607
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.65582013130188,
      "learning_rate": 2.128842749993599e-05,
      "loss": 2.7666,
      "step": 202608
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.282792806625366,
      "learning_rate": 2.1286914087901264e-05,
      "loss": 2.8654,
      "step": 202609
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.042100429534912,
      "learning_rate": 2.128540072768449e-05,
      "loss": 2.7674,
      "step": 202610
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2403438091278076,
      "learning_rate": 2.128388741928606e-05,
      "loss": 2.8633,
      "step": 202611
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3680217266082764,
      "learning_rate": 2.1282374162706217e-05,
      "loss": 3.0619,
      "step": 202612
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.95007586479187,
      "learning_rate": 2.1280860957945256e-05,
      "loss": 2.8582,
      "step": 202613
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.4444515705108643,
      "learning_rate": 2.127934780500341e-05,
      "loss": 2.6649,
      "step": 202614
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2112045288085938,
      "learning_rate": 2.1277834703881013e-05,
      "loss": 3.1744,
      "step": 202615
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8947575092315674,
      "learning_rate": 2.12763216545783e-05,
      "loss": 2.9502,
      "step": 202616
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.461981773376465,
      "learning_rate": 2.127480865709563e-05,
      "loss": 2.8836,
      "step": 202617
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.889524221420288,
      "learning_rate": 2.1273295711433213e-05,
      "loss": 2.825,
      "step": 202618
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.731184244155884,
      "learning_rate": 2.1271782817591376e-05,
      "loss": 2.8411,
      "step": 202619
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3517954349517822,
      "learning_rate": 2.127026997557032e-05,
      "loss": 2.9551,
      "step": 202620
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7508583068847656,
      "learning_rate": 2.126875718537041e-05,
      "loss": 3.0408,
      "step": 202621
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6695752143859863,
      "learning_rate": 2.1267244446991882e-05,
      "loss": 3.1851,
      "step": 202622
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0442070960998535,
      "learning_rate": 2.1265731760435067e-05,
      "loss": 3.0915,
      "step": 202623
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.599900960922241,
      "learning_rate": 2.12642191257002e-05,
      "loss": 2.889,
      "step": 202624
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0172884464263916,
      "learning_rate": 2.1262706542787612e-05,
      "loss": 2.9009,
      "step": 202625
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.008955240249634,
      "learning_rate": 2.126119401169747e-05,
      "loss": 2.9647,
      "step": 202626
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.593021869659424,
      "learning_rate": 2.1259681532430207e-05,
      "loss": 2.9808,
      "step": 202627
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1252670288085938,
      "learning_rate": 2.1258169104985957e-05,
      "loss": 2.944,
      "step": 202628
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7056801319122314,
      "learning_rate": 2.1256656729365118e-05,
      "loss": 2.811,
      "step": 202629
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6148159503936768,
      "learning_rate": 2.125514440556796e-05,
      "loss": 2.7589,
      "step": 202630
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.697117328643799,
      "learning_rate": 2.125363213359471e-05,
      "loss": 2.8304,
      "step": 202631
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6387462615966797,
      "learning_rate": 2.125211991344564e-05,
      "loss": 3.0009,
      "step": 202632
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.917424201965332,
      "learning_rate": 2.1250607745121086e-05,
      "loss": 2.559,
      "step": 202633
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3999552726745605,
      "learning_rate": 2.1249095628621273e-05,
      "loss": 2.9829,
      "step": 202634
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2153379917144775,
      "learning_rate": 2.1247583563946567e-05,
      "loss": 2.8183,
      "step": 202635
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.801596164703369,
      "learning_rate": 2.124607155109721e-05,
      "loss": 2.9534,
      "step": 202636
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9761719703674316,
      "learning_rate": 2.124455959007343e-05,
      "loss": 3.1651,
      "step": 202637
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9434468746185303,
      "learning_rate": 2.1243047680875524e-05,
      "loss": 3.1266,
      "step": 202638
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8395073413848877,
      "learning_rate": 2.1241535823503865e-05,
      "loss": 2.9019,
      "step": 202639
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1996867656707764,
      "learning_rate": 2.1240024017958578e-05,
      "loss": 3.0154,
      "step": 202640
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.911175489425659,
      "learning_rate": 2.1238512264240103e-05,
      "loss": 2.765,
      "step": 202641
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8149006366729736,
      "learning_rate": 2.123700056234867e-05,
      "loss": 2.8837,
      "step": 202642
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.4927892684936523,
      "learning_rate": 2.1235488912284514e-05,
      "loss": 2.6853,
      "step": 202643
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.07629132270813,
      "learning_rate": 2.12339773140479e-05,
      "loss": 3.1343,
      "step": 202644
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.8324837684631348,
      "learning_rate": 2.1232465767639195e-05,
      "loss": 3.0063,
      "step": 202645
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8514859676361084,
      "learning_rate": 2.12309542730586e-05,
      "loss": 3.1106,
      "step": 202646
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.302537202835083,
      "learning_rate": 2.1229442830306474e-05,
      "loss": 2.7897,
      "step": 202647
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8318636417388916,
      "learning_rate": 2.1227931439382995e-05,
      "loss": 3.1266,
      "step": 202648
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.423534631729126,
      "learning_rate": 2.122642010028862e-05,
      "loss": 3.1325,
      "step": 202649
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3429455757141113,
      "learning_rate": 2.122490881302342e-05,
      "loss": 2.8825,
      "step": 202650
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.6467971801757812,
      "learning_rate": 2.12233975775878e-05,
      "loss": 2.9243,
      "step": 202651
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.903071165084839,
      "learning_rate": 2.122188639398198e-05,
      "loss": 2.8636,
      "step": 202652
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.697984457015991,
      "learning_rate": 2.1220375262206336e-05,
      "loss": 3.0547,
      "step": 202653
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.7544655799865723,
      "learning_rate": 2.1218864182260998e-05,
      "loss": 2.8838,
      "step": 202654
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.896631956100464,
      "learning_rate": 2.1217353154146467e-05,
      "loss": 2.9282,
      "step": 202655
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.846524715423584,
      "learning_rate": 2.1215842177862807e-05,
      "loss": 2.8918,
      "step": 202656
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.372828960418701,
      "learning_rate": 2.1214331253410387e-05,
      "loss": 3.1272,
      "step": 202657
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.026761770248413,
      "learning_rate": 2.1212820380789475e-05,
      "loss": 2.9679,
      "step": 202658
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.163658142089844,
      "learning_rate": 2.12113095600004e-05,
      "loss": 2.935,
      "step": 202659
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8671042919158936,
      "learning_rate": 2.1209798791043364e-05,
      "loss": 2.9979,
      "step": 202660
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.361285448074341,
      "learning_rate": 2.1208288073918768e-05,
      "loss": 2.9955,
      "step": 202661
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7791624069213867,
      "learning_rate": 2.1206777408626707e-05,
      "loss": 3.0695,
      "step": 202662
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.155667781829834,
      "learning_rate": 2.120526679516765e-05,
      "loss": 2.9955,
      "step": 202663
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.495174884796143,
      "learning_rate": 2.1203756233541737e-05,
      "loss": 2.9522,
      "step": 202664
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8596620559692383,
      "learning_rate": 2.1202245723749356e-05,
      "loss": 2.9319,
      "step": 202665
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9120187759399414,
      "learning_rate": 2.120073526579068e-05,
      "loss": 2.9807,
      "step": 202666
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.6814115047454834,
      "learning_rate": 2.119922485966611e-05,
      "loss": 3.1515,
      "step": 202667
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.287193775177002,
      "learning_rate": 2.1197714505375872e-05,
      "loss": 2.6506,
      "step": 202668
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9169695377349854,
      "learning_rate": 2.119620420292024e-05,
      "loss": 2.8646,
      "step": 202669
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.4298675060272217,
      "learning_rate": 2.1194693952299413e-05,
      "loss": 2.9969,
      "step": 202670
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.922882318496704,
      "learning_rate": 2.1193183753513855e-05,
      "loss": 2.8975,
      "step": 202671
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.256626605987549,
      "learning_rate": 2.1191673606563663e-05,
      "loss": 2.9291,
      "step": 202672
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0191216468811035,
      "learning_rate": 2.1190163511449276e-05,
      "loss": 2.9442,
      "step": 202673
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7873735427856445,
      "learning_rate": 2.118865346817089e-05,
      "loss": 2.9279,
      "step": 202674
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.212471961975098,
      "learning_rate": 2.118714347672874e-05,
      "loss": 3.1017,
      "step": 202675
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0334692001342773,
      "learning_rate": 2.1185633537123226e-05,
      "loss": 3.0223,
      "step": 202676
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.960963249206543,
      "learning_rate": 2.1184123649354545e-05,
      "loss": 2.8773,
      "step": 202677
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.081363677978516,
      "learning_rate": 2.1182613813422966e-05,
      "loss": 2.8253,
      "step": 202678
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.909113883972168,
      "learning_rate": 2.1181104029328855e-05,
      "loss": 2.6134,
      "step": 202679
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.6098737716674805,
      "learning_rate": 2.117959429707241e-05,
      "loss": 3.2498,
      "step": 202680
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.7148993015289307,
      "learning_rate": 2.1178084616653934e-05,
      "loss": 2.8695,
      "step": 202681
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.524951696395874,
      "learning_rate": 2.1176574988073723e-05,
      "loss": 3.1112,
      "step": 202682
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.6017754077911377,
      "learning_rate": 2.1175065411332082e-05,
      "loss": 2.7159,
      "step": 202683
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.5653178691864014,
      "learning_rate": 2.1173555886429205e-05,
      "loss": 3.0277,
      "step": 202684
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.7877445220947266,
      "learning_rate": 2.1172046413365464e-05,
      "loss": 2.934,
      "step": 202685
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.5960378646850586,
      "learning_rate": 2.1170536992141118e-05,
      "loss": 2.9371,
      "step": 202686
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.194649696350098,
      "learning_rate": 2.1169027622756374e-05,
      "loss": 3.0263,
      "step": 202687
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.35685133934021,
      "learning_rate": 2.116751830521163e-05,
      "loss": 2.7043,
      "step": 202688
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1560752391815186,
      "learning_rate": 2.1166009039507048e-05,
      "loss": 3.1369,
      "step": 202689
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.8098011016845703,
      "learning_rate": 2.1164499825643032e-05,
      "loss": 3.2349,
      "step": 202690
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.31321382522583,
      "learning_rate": 2.1162990663619784e-05,
      "loss": 2.9127,
      "step": 202691
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.621953248977661,
      "learning_rate": 2.1161481553437598e-05,
      "loss": 2.845,
      "step": 202692
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.923011302947998,
      "learning_rate": 2.115997249509671e-05,
      "loss": 2.8926,
      "step": 202693
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.741377592086792,
      "learning_rate": 2.1158463488597487e-05,
      "loss": 2.8158,
      "step": 202694
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2466440200805664,
      "learning_rate": 2.115695453394013e-05,
      "loss": 3.0064,
      "step": 202695
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7694883346557617,
      "learning_rate": 2.1155445631125002e-05,
      "loss": 3.0847,
      "step": 202696
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.07968807220459,
      "learning_rate": 2.115393678015237e-05,
      "loss": 2.8642,
      "step": 202697
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.9710700511932373,
      "learning_rate": 2.1152427981022434e-05,
      "loss": 2.9676,
      "step": 202698
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.704660415649414,
      "learning_rate": 2.115091923373553e-05,
      "loss": 2.9006,
      "step": 202699
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6961560249328613,
      "learning_rate": 2.1149410538291954e-05,
      "loss": 3.1441,
      "step": 202700
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.879916191101074,
      "learning_rate": 2.114790189469191e-05,
      "loss": 2.9044,
      "step": 202701
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7718117237091064,
      "learning_rate": 2.1146393302935793e-05,
      "loss": 2.7477,
      "step": 202702
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1066243648529053,
      "learning_rate": 2.1144884763023838e-05,
      "loss": 2.9218,
      "step": 202703
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7550835609436035,
      "learning_rate": 2.1143376274956282e-05,
      "loss": 2.9649,
      "step": 202704
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9682672023773193,
      "learning_rate": 2.1141867838733418e-05,
      "loss": 2.8801,
      "step": 202705
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0594465732574463,
      "learning_rate": 2.114035945435558e-05,
      "loss": 3.0504,
      "step": 202706
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.932337760925293,
      "learning_rate": 2.1138851121822977e-05,
      "loss": 2.8812,
      "step": 202707
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.294376850128174,
      "learning_rate": 2.1137342841135928e-05,
      "loss": 2.9774,
      "step": 202708
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.048117160797119,
      "learning_rate": 2.1135834612294776e-05,
      "loss": 2.992,
      "step": 202709
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9789068698883057,
      "learning_rate": 2.1134326435299688e-05,
      "loss": 3.0509,
      "step": 202710
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.211421489715576,
      "learning_rate": 2.1132818310150957e-05,
      "loss": 2.9995,
      "step": 202711
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.957420825958252,
      "learning_rate": 2.113131023684895e-05,
      "loss": 2.9184,
      "step": 202712
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2423713207244873,
      "learning_rate": 2.1129802215393876e-05,
      "loss": 3.0363,
      "step": 202713
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.183907508850098,
      "learning_rate": 2.112829424578606e-05,
      "loss": 2.8754,
      "step": 202714
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0272605419158936,
      "learning_rate": 2.1126786328025703e-05,
      "loss": 3.0234,
      "step": 202715
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.865583658218384,
      "learning_rate": 2.1125278462113236e-05,
      "loss": 2.8604,
      "step": 202716
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0063486099243164,
      "learning_rate": 2.1123770648048764e-05,
      "loss": 3.0421,
      "step": 202717
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1290032863616943,
      "learning_rate": 2.1122262885832686e-05,
      "loss": 2.8184,
      "step": 202718
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.742112636566162,
      "learning_rate": 2.1120755175465165e-05,
      "loss": 2.6493,
      "step": 202719
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.6312568187713623,
      "learning_rate": 2.111924751694667e-05,
      "loss": 2.8934,
      "step": 202720
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.081343173980713,
      "learning_rate": 2.1117739910277265e-05,
      "loss": 2.737,
      "step": 202721
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0091915130615234,
      "learning_rate": 2.1116232355457485e-05,
      "loss": 2.8006,
      "step": 202722
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.434244394302368,
      "learning_rate": 2.111472485248733e-05,
      "loss": 3.1224,
      "step": 202723
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.960402727127075,
      "learning_rate": 2.1113217401367266e-05,
      "loss": 2.9109,
      "step": 202724
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8701748847961426,
      "learning_rate": 2.111171000209746e-05,
      "loss": 3.0064,
      "step": 202725
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6619110107421875,
      "learning_rate": 2.111020265467831e-05,
      "loss": 2.9772,
      "step": 202726
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6679277420043945,
      "learning_rate": 2.1108695359110016e-05,
      "loss": 3.054,
      "step": 202727
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7486090660095215,
      "learning_rate": 2.110718811539295e-05,
      "loss": 2.8525,
      "step": 202728
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.4737191200256348,
      "learning_rate": 2.1105680923527235e-05,
      "loss": 3.0798,
      "step": 202729
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.4796555042266846,
      "learning_rate": 2.110417378351328e-05,
      "loss": 2.915,
      "step": 202730
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.499641180038452,
      "learning_rate": 2.110266669535128e-05,
      "loss": 3.1897,
      "step": 202731
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.802056312561035,
      "learning_rate": 2.11011596590416e-05,
      "loss": 2.9078,
      "step": 202732
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2152724266052246,
      "learning_rate": 2.1099652674584445e-05,
      "loss": 3.0219,
      "step": 202733
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.905902862548828,
      "learning_rate": 2.109814574198021e-05,
      "loss": 3.023,
      "step": 202734
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.962202787399292,
      "learning_rate": 2.1096638861228997e-05,
      "loss": 2.8469,
      "step": 202735
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.844867706298828,
      "learning_rate": 2.1095132032331242e-05,
      "loss": 3.1341,
      "step": 202736
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.840625524520874,
      "learning_rate": 2.1093625255287107e-05,
      "loss": 2.9554,
      "step": 202737
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8337507247924805,
      "learning_rate": 2.109211853009699e-05,
      "loss": 2.6684,
      "step": 202738
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2683162689208984,
      "learning_rate": 2.1090611856761064e-05,
      "loss": 2.8487,
      "step": 202739
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.859440326690674,
      "learning_rate": 2.1089105235279757e-05,
      "loss": 2.9292,
      "step": 202740
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.5872628688812256,
      "learning_rate": 2.108759866565314e-05,
      "loss": 3.1934,
      "step": 202741
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3465731143951416,
      "learning_rate": 2.1086092147881672e-05,
      "loss": 2.9791,
      "step": 202742
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3122000694274902,
      "learning_rate": 2.1084585681965494e-05,
      "loss": 3.1645,
      "step": 202743
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.4281938076019287,
      "learning_rate": 2.1083079267905033e-05,
      "loss": 2.8792,
      "step": 202744
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.5928266048431396,
      "learning_rate": 2.108157290570043e-05,
      "loss": 2.8244,
      "step": 202745
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.4381253719329834,
      "learning_rate": 2.1080066595352108e-05,
      "loss": 2.8509,
      "step": 202746
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.099085330963135,
      "learning_rate": 2.1078560336860174e-05,
      "loss": 2.9305,
      "step": 202747
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8874287605285645,
      "learning_rate": 2.1077054130225057e-05,
      "loss": 2.8593,
      "step": 202748
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0956907272338867,
      "learning_rate": 2.107554797544696e-05,
      "loss": 2.8392,
      "step": 202749
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1375038623809814,
      "learning_rate": 2.1074041872526183e-05,
      "loss": 2.5335,
      "step": 202750
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.754352569580078,
      "learning_rate": 2.107253582146299e-05,
      "loss": 2.7169,
      "step": 202751
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1185531616210938,
      "learning_rate": 2.1071029822257713e-05,
      "loss": 2.9661,
      "step": 202752
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.829589605331421,
      "learning_rate": 2.106952387491062e-05,
      "loss": 2.8524,
      "step": 202753
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.282124996185303,
      "learning_rate": 2.1068017979421946e-05,
      "loss": 2.9714,
      "step": 202754
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.845764398574829,
      "learning_rate": 2.1066512135791923e-05,
      "loss": 3.2423,
      "step": 202755
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0743041038513184,
      "learning_rate": 2.1065006344020984e-05,
      "loss": 3.141,
      "step": 202756
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.98380708694458,
      "learning_rate": 2.1063500604109264e-05,
      "loss": 2.9037,
      "step": 202757
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.7055180072784424,
      "learning_rate": 2.1061994916057157e-05,
      "loss": 3.0949,
      "step": 202758
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.5829362869262695,
      "learning_rate": 2.10604892798649e-05,
      "loss": 2.8834,
      "step": 202759
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.4836370944976807,
      "learning_rate": 2.105898369553276e-05,
      "loss": 2.891,
      "step": 202760
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.507309675216675,
      "learning_rate": 2.1057478163060936e-05,
      "loss": 2.9923,
      "step": 202761
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6247949600219727,
      "learning_rate": 2.1055972682449895e-05,
      "loss": 2.8963,
      "step": 202762
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.888838291168213,
      "learning_rate": 2.1054467253699737e-05,
      "loss": 2.9249,
      "step": 202763
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0981860160827637,
      "learning_rate": 2.1052961876810858e-05,
      "loss": 2.8741,
      "step": 202764
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0555782318115234,
      "learning_rate": 2.105145655178353e-05,
      "loss": 2.951,
      "step": 202765
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6698567867279053,
      "learning_rate": 2.1049951278617914e-05,
      "loss": 3.0837,
      "step": 202766
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.5950281620025635,
      "learning_rate": 2.104844605731445e-05,
      "loss": 2.7166,
      "step": 202767
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.09890079498291,
      "learning_rate": 2.104694088787333e-05,
      "loss": 3.1916,
      "step": 202768
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.4302518367767334,
      "learning_rate": 2.1045435770294828e-05,
      "loss": 2.8319,
      "step": 202769
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.3479087352752686,
      "learning_rate": 2.104393070457927e-05,
      "loss": 2.9911,
      "step": 202770
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1401920318603516,
      "learning_rate": 2.1042425690726927e-05,
      "loss": 3.0016,
      "step": 202771
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1503098011016846,
      "learning_rate": 2.1040920728737998e-05,
      "loss": 2.5736,
      "step": 202772
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.686466932296753,
      "learning_rate": 2.1039415818612913e-05,
      "loss": 2.8791,
      "step": 202773
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7903833389282227,
      "learning_rate": 2.1037910960351778e-05,
      "loss": 2.8532,
      "step": 202774
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2059319019317627,
      "learning_rate": 2.103640615395502e-05,
      "loss": 2.8951,
      "step": 202775
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.757319211959839,
      "learning_rate": 2.1034901399422876e-05,
      "loss": 3.0151,
      "step": 202776
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7293357849121094,
      "learning_rate": 2.1033396696755577e-05,
      "loss": 2.7881,
      "step": 202777
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8579394817352295,
      "learning_rate": 2.1031892045953424e-05,
      "loss": 2.6388,
      "step": 202778
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9141180515289307,
      "learning_rate": 2.1030387447016717e-05,
      "loss": 2.7329,
      "step": 202779
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7922472953796387,
      "learning_rate": 2.1028882899945722e-05,
      "loss": 2.8876,
      "step": 202780
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.778531074523926,
      "learning_rate": 2.1027378404740736e-05,
      "loss": 2.8645,
      "step": 202781
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.851077079772949,
      "learning_rate": 2.102587396140203e-05,
      "loss": 2.8224,
      "step": 202782
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.019796133041382,
      "learning_rate": 2.10243695699299e-05,
      "loss": 2.9069,
      "step": 202783
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.724452018737793,
      "learning_rate": 2.1022865230324548e-05,
      "loss": 2.9796,
      "step": 202784
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6656856536865234,
      "learning_rate": 2.1021360942586374e-05,
      "loss": 2.9233,
      "step": 202785
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6943938732147217,
      "learning_rate": 2.101985670671551e-05,
      "loss": 2.973,
      "step": 202786
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9989216327667236,
      "learning_rate": 2.101835252271239e-05,
      "loss": 2.9655,
      "step": 202787
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7069318294525146,
      "learning_rate": 2.1016848390577178e-05,
      "loss": 2.9434,
      "step": 202788
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1505870819091797,
      "learning_rate": 2.1015344310310312e-05,
      "loss": 2.8194,
      "step": 202789
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8665826320648193,
      "learning_rate": 2.1013840281911855e-05,
      "loss": 2.906,
      "step": 202790
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1933095455169678,
      "learning_rate": 2.1012336305382205e-05,
      "loss": 3.0301,
      "step": 202791
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9056599140167236,
      "learning_rate": 2.101083238072163e-05,
      "loss": 2.8862,
      "step": 202792
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9083662033081055,
      "learning_rate": 2.1009328507930434e-05,
      "loss": 2.9466,
      "step": 202793
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2848799228668213,
      "learning_rate": 2.100782468700881e-05,
      "loss": 3.1118,
      "step": 202794
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.762136459350586,
      "learning_rate": 2.10063209179572e-05,
      "loss": 2.7628,
      "step": 202795
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.7569565773010254,
      "learning_rate": 2.100481720077569e-05,
      "loss": 2.7988,
      "step": 202796
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.73805570602417,
      "learning_rate": 2.1003313535464693e-05,
      "loss": 3.1636,
      "step": 202797
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.397972822189331,
      "learning_rate": 2.1001809922024403e-05,
      "loss": 3.1263,
      "step": 202798
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2017805576324463,
      "learning_rate": 2.100030636045519e-05,
      "loss": 3.1598,
      "step": 202799
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.510042190551758,
      "learning_rate": 2.0998802850757244e-05,
      "loss": 3.0904,
      "step": 202800
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.180656671524048,
      "learning_rate": 2.0997299392930978e-05,
      "loss": 2.8074,
      "step": 202801
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8543810844421387,
      "learning_rate": 2.099579598697648e-05,
      "loss": 2.675,
      "step": 202802
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0910685062408447,
      "learning_rate": 2.0994292632894193e-05,
      "loss": 3.2222,
      "step": 202803
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9356791973114014,
      "learning_rate": 2.0992789330684278e-05,
      "loss": 2.8199,
      "step": 202804
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.91009259223938,
      "learning_rate": 2.0991286080347135e-05,
      "loss": 2.9583,
      "step": 202805
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.5102157592773438,
      "learning_rate": 2.09897828818829e-05,
      "loss": 2.9792,
      "step": 202806
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8473103046417236,
      "learning_rate": 2.0988279735292035e-05,
      "loss": 3.039,
      "step": 202807
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.7465455532073975,
      "learning_rate": 2.0986776640574644e-05,
      "loss": 2.7237,
      "step": 202808
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.9259424209594727,
      "learning_rate": 2.098527359773109e-05,
      "loss": 3.1119,
      "step": 202809
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.545783519744873,
      "learning_rate": 2.0983770606761608e-05,
      "loss": 2.9939,
      "step": 202810
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.376763343811035,
      "learning_rate": 2.0982267667666562e-05,
      "loss": 2.9333,
      "step": 202811
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.6122636795043945,
      "learning_rate": 2.0980764780446124e-05,
      "loss": 3.1329,
      "step": 202812
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7099218368530273,
      "learning_rate": 2.0979261945100722e-05,
      "loss": 2.8655,
      "step": 202813
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.4905529022216797,
      "learning_rate": 2.097775916163046e-05,
      "loss": 3.1305,
      "step": 202814
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8392984867095947,
      "learning_rate": 2.097625643003573e-05,
      "loss": 3.1486,
      "step": 202815
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.824622631072998,
      "learning_rate": 2.0974753750316743e-05,
      "loss": 3.0993,
      "step": 202816
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.385873556137085,
      "learning_rate": 2.0973251122473887e-05,
      "loss": 3.1956,
      "step": 202817
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.765371799468994,
      "learning_rate": 2.0971748546507307e-05,
      "loss": 2.7709,
      "step": 202818
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7905142307281494,
      "learning_rate": 2.0970246022417425e-05,
      "loss": 2.8608,
      "step": 202819
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3500137329101562,
      "learning_rate": 2.0968743550204348e-05,
      "loss": 2.8524,
      "step": 202820
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6817612648010254,
      "learning_rate": 2.0967241129868505e-05,
      "loss": 2.702,
      "step": 202821
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.602081775665283,
      "learning_rate": 2.096573876141007e-05,
      "loss": 2.9627,
      "step": 202822
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.923830986022949,
      "learning_rate": 2.096423644482943e-05,
      "loss": 3.0504,
      "step": 202823
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6141159534454346,
      "learning_rate": 2.0962734180126728e-05,
      "loss": 2.8794,
      "step": 202824
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.310454368591309,
      "learning_rate": 2.096123196730246e-05,
      "loss": 2.7702,
      "step": 202825
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.4678847789764404,
      "learning_rate": 2.0959729806356627e-05,
      "loss": 2.8079,
      "step": 202826
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.24411678314209,
      "learning_rate": 2.0958227697289732e-05,
      "loss": 2.9743,
      "step": 202827
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.426490545272827,
      "learning_rate": 2.09567256401019e-05,
      "loss": 2.803,
      "step": 202828
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.91910719871521,
      "learning_rate": 2.095522363479354e-05,
      "loss": 3.0705,
      "step": 202829
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.4919376373291016,
      "learning_rate": 2.095372168136481e-05,
      "loss": 3.0907,
      "step": 202830
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.975917339324951,
      "learning_rate": 2.0952219779816147e-05,
      "loss": 3.0132,
      "step": 202831
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1946346759796143,
      "learning_rate": 2.0950717930147654e-05,
      "loss": 2.8406,
      "step": 202832
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1802499294281006,
      "learning_rate": 2.0949216132359726e-05,
      "loss": 2.7201,
      "step": 202833
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.6067235469818115,
      "learning_rate": 2.0947714386452563e-05,
      "loss": 2.9876,
      "step": 202834
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.322033166885376,
      "learning_rate": 2.0946212692426533e-05,
      "loss": 2.7469,
      "step": 202835
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.5659000873565674,
      "learning_rate": 2.0944711050281803e-05,
      "loss": 2.7889,
      "step": 202836
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.9120967388153076,
      "learning_rate": 2.0943209460018807e-05,
      "loss": 2.599,
      "step": 202837
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3286304473876953,
      "learning_rate": 2.0941707921637707e-05,
      "loss": 2.982,
      "step": 202838
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.422241449356079,
      "learning_rate": 2.0940206435138805e-05,
      "loss": 3.0579,
      "step": 202839
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.662109136581421,
      "learning_rate": 2.0938705000522372e-05,
      "loss": 2.8408,
      "step": 202840
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.6567277908325195,
      "learning_rate": 2.09372036177887e-05,
      "loss": 2.9236,
      "step": 202841
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.325143575668335,
      "learning_rate": 2.093570228693806e-05,
      "loss": 2.801,
      "step": 202842
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9062743186950684,
      "learning_rate": 2.0934201007970786e-05,
      "loss": 2.9528,
      "step": 202843
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9526281356811523,
      "learning_rate": 2.093269978088711e-05,
      "loss": 3.0873,
      "step": 202844
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.4799764156341553,
      "learning_rate": 2.0931198605687292e-05,
      "loss": 2.7408,
      "step": 202845
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9203879833221436,
      "learning_rate": 2.092969748237161e-05,
      "loss": 3.1261,
      "step": 202846
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.999901056289673,
      "learning_rate": 2.092819641094039e-05,
      "loss": 2.9531,
      "step": 202847
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8854830265045166,
      "learning_rate": 2.0926695391393868e-05,
      "loss": 2.8891,
      "step": 202848
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.359117269515991,
      "learning_rate": 2.0925194423732373e-05,
      "loss": 2.9759,
      "step": 202849
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9489829540252686,
      "learning_rate": 2.092369350795614e-05,
      "loss": 2.7579,
      "step": 202850
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.26659893989563,
      "learning_rate": 2.0922192644065438e-05,
      "loss": 3.0699,
      "step": 202851
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.051619529724121,
      "learning_rate": 2.09206918320606e-05,
      "loss": 2.9747,
      "step": 202852
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.503784418106079,
      "learning_rate": 2.0919191071941856e-05,
      "loss": 2.7381,
      "step": 202853
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2560038566589355,
      "learning_rate": 2.091769036370947e-05,
      "loss": 2.9131,
      "step": 202854
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.73656964302063,
      "learning_rate": 2.091618970736382e-05,
      "loss": 3.141,
      "step": 202855
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6328325271606445,
      "learning_rate": 2.0914689102905092e-05,
      "loss": 2.9734,
      "step": 202856
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.918712854385376,
      "learning_rate": 2.091318855033356e-05,
      "loss": 3.0256,
      "step": 202857
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6695785522460938,
      "learning_rate": 2.0911688049649587e-05,
      "loss": 2.8426,
      "step": 202858
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0806326866149902,
      "learning_rate": 2.0910187600853344e-05,
      "loss": 2.8173,
      "step": 202859
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.817267417907715,
      "learning_rate": 2.0908687203945194e-05,
      "loss": 3.0183,
      "step": 202860
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7670395374298096,
      "learning_rate": 2.0907186858925405e-05,
      "loss": 3.0049,
      "step": 202861
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3385415077209473,
      "learning_rate": 2.0905686565794245e-05,
      "loss": 2.9769,
      "step": 202862
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.84668231010437,
      "learning_rate": 2.090418632455194e-05,
      "loss": 2.758,
      "step": 202863
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8015873432159424,
      "learning_rate": 2.0902686135198866e-05,
      "loss": 3.0125,
      "step": 202864
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.856213331222534,
      "learning_rate": 2.090118599773518e-05,
      "loss": 2.8699,
      "step": 202865
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.578497886657715,
      "learning_rate": 2.0899685912161292e-05,
      "loss": 2.9797,
      "step": 202866
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.8190512657165527,
      "learning_rate": 2.0898185878477424e-05,
      "loss": 2.813,
      "step": 202867
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.611783027648926,
      "learning_rate": 2.0896685896683852e-05,
      "loss": 2.7014,
      "step": 202868
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.432622194290161,
      "learning_rate": 2.0895185966780802e-05,
      "loss": 3.0122,
      "step": 202869
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.247283935546875,
      "learning_rate": 2.0893686088768648e-05,
      "loss": 2.8696,
      "step": 202870
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8572402000427246,
      "learning_rate": 2.089218626264758e-05,
      "loss": 3.0072,
      "step": 202871
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.043562173843384,
      "learning_rate": 2.0890686488417972e-05,
      "loss": 2.7545,
      "step": 202872
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.82588791847229,
      "learning_rate": 2.0889186766080025e-05,
      "loss": 2.8768,
      "step": 202873
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9837234020233154,
      "learning_rate": 2.0887687095634097e-05,
      "loss": 2.9685,
      "step": 202874
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.10044527053833,
      "learning_rate": 2.088618747708036e-05,
      "loss": 2.7652,
      "step": 202875
    },
    {
      "epoch": 2.64,
      "grad_norm": 5.569416522979736,
      "learning_rate": 2.0884687910419185e-05,
      "loss": 2.9953,
      "step": 202876
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.7290127277374268,
      "learning_rate": 2.0883188395650764e-05,
      "loss": 3.1204,
      "step": 202877
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.0594563484191895,
      "learning_rate": 2.0881688932775498e-05,
      "loss": 3.0877,
      "step": 202878
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.439703941345215,
      "learning_rate": 2.0880189521793522e-05,
      "loss": 3.1499,
      "step": 202879
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7089433670043945,
      "learning_rate": 2.08786901627053e-05,
      "loss": 2.8938,
      "step": 202880
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3832733631134033,
      "learning_rate": 2.0877190855510907e-05,
      "loss": 2.7464,
      "step": 202881
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.847989559173584,
      "learning_rate": 2.087569160021073e-05,
      "loss": 2.9561,
      "step": 202882
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8748836517333984,
      "learning_rate": 2.087419239680501e-05,
      "loss": 2.9739,
      "step": 202883
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.084156036376953,
      "learning_rate": 2.0872693245294082e-05,
      "loss": 2.9366,
      "step": 202884
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.5532939434051514,
      "learning_rate": 2.087119414567814e-05,
      "loss": 2.9199,
      "step": 202885
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.813253164291382,
      "learning_rate": 2.0869695097957616e-05,
      "loss": 2.958,
      "step": 202886
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3243021965026855,
      "learning_rate": 2.0868196102132584e-05,
      "loss": 2.9932,
      "step": 202887
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.022849082946777,
      "learning_rate": 2.086669715820347e-05,
      "loss": 2.9375,
      "step": 202888
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.5261480808258057,
      "learning_rate": 2.0865198266170447e-05,
      "loss": 2.9343,
      "step": 202889
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3096301555633545,
      "learning_rate": 2.0863699426033942e-05,
      "loss": 2.7828,
      "step": 202890
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.243556022644043,
      "learning_rate": 2.086220063779406e-05,
      "loss": 3.0434,
      "step": 202891
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8387339115142822,
      "learning_rate": 2.086070190145126e-05,
      "loss": 2.873,
      "step": 202892
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.053387641906738,
      "learning_rate": 2.085920321700565e-05,
      "loss": 2.7339,
      "step": 202893
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.869046211242676,
      "learning_rate": 2.0857704584457626e-05,
      "loss": 2.9066,
      "step": 202894
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3116986751556396,
      "learning_rate": 2.085620600380735e-05,
      "loss": 2.9047,
      "step": 202895
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.5069363117218018,
      "learning_rate": 2.085470747505523e-05,
      "loss": 2.8436,
      "step": 202896
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1620078086853027,
      "learning_rate": 2.0853208998201465e-05,
      "loss": 2.8419,
      "step": 202897
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3106954097747803,
      "learning_rate": 2.085171057324645e-05,
      "loss": 2.964,
      "step": 202898
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1981067657470703,
      "learning_rate": 2.0850212200190252e-05,
      "loss": 2.9676,
      "step": 202899
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.864657402038574,
      "learning_rate": 2.0848713879033342e-05,
      "loss": 2.9756,
      "step": 202900
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.575949192047119,
      "learning_rate": 2.084721560977588e-05,
      "loss": 2.9165,
      "step": 202901
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.81581449508667,
      "learning_rate": 2.0845717392418204e-05,
      "loss": 3.0447,
      "step": 202902
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.621646404266357,
      "learning_rate": 2.0844219226960545e-05,
      "loss": 3.0811,
      "step": 202903
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.194213390350342,
      "learning_rate": 2.084272111340334e-05,
      "loss": 2.7572,
      "step": 202904
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.525343179702759,
      "learning_rate": 2.0841223051746614e-05,
      "loss": 2.9502,
      "step": 202905
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.4187514781951904,
      "learning_rate": 2.083972504199084e-05,
      "loss": 3.0558,
      "step": 202906
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6445884704589844,
      "learning_rate": 2.0838227084136183e-05,
      "loss": 2.855,
      "step": 202907
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7420241832733154,
      "learning_rate": 2.083672917818301e-05,
      "loss": 2.9313,
      "step": 202908
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.530008554458618,
      "learning_rate": 2.0835231324131484e-05,
      "loss": 2.7286,
      "step": 202909
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.519619941711426,
      "learning_rate": 2.0833733521982075e-05,
      "loss": 2.7156,
      "step": 202910
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7929625511169434,
      "learning_rate": 2.0832235771734885e-05,
      "loss": 2.9377,
      "step": 202911
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6028525829315186,
      "learning_rate": 2.0830738073390242e-05,
      "loss": 2.9868,
      "step": 202912
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.312750816345215,
      "learning_rate": 2.0829240426948414e-05,
      "loss": 3.2273,
      "step": 202913
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7150375843048096,
      "learning_rate": 2.0827742832409766e-05,
      "loss": 3.0964,
      "step": 202914
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8589584827423096,
      "learning_rate": 2.0826245289774434e-05,
      "loss": 3.0432,
      "step": 202915
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.886869430541992,
      "learning_rate": 2.0824747799042818e-05,
      "loss": 3.0739,
      "step": 202916
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.676069498062134,
      "learning_rate": 2.082325036021518e-05,
      "loss": 2.9831,
      "step": 202917
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1255853176116943,
      "learning_rate": 2.082175297329176e-05,
      "loss": 2.9821,
      "step": 202918
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6355233192443848,
      "learning_rate": 2.082025563827279e-05,
      "loss": 3.0098,
      "step": 202919
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8157403469085693,
      "learning_rate": 2.081875835515866e-05,
      "loss": 2.5722,
      "step": 202920
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8549208641052246,
      "learning_rate": 2.0817261123949514e-05,
      "loss": 3.0017,
      "step": 202921
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.401711940765381,
      "learning_rate": 2.081576394464578e-05,
      "loss": 2.8297,
      "step": 202922
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3382654190063477,
      "learning_rate": 2.0814266817247693e-05,
      "loss": 2.7217,
      "step": 202923
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.82275128364563,
      "learning_rate": 2.0812769741755453e-05,
      "loss": 3.1166,
      "step": 202924
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.6359784603118896,
      "learning_rate": 2.0811272718169392e-05,
      "loss": 2.7535,
      "step": 202925
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.060511589050293,
      "learning_rate": 2.0809775746489808e-05,
      "loss": 2.992,
      "step": 202926
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9600143432617188,
      "learning_rate": 2.0808278826716908e-05,
      "loss": 2.9547,
      "step": 202927
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.666585922241211,
      "learning_rate": 2.0806781958851047e-05,
      "loss": 2.909,
      "step": 202928
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2735140323638916,
      "learning_rate": 2.08052851428925e-05,
      "loss": 2.8682,
      "step": 202929
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.438750743865967,
      "learning_rate": 2.0803788378841536e-05,
      "loss": 2.8278,
      "step": 202930
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.625157594680786,
      "learning_rate": 2.0802291666698346e-05,
      "loss": 3.0091,
      "step": 202931
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7636001110076904,
      "learning_rate": 2.0800795006463334e-05,
      "loss": 2.9257,
      "step": 202932
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.137577056884766,
      "learning_rate": 2.0799298398136667e-05,
      "loss": 3.0023,
      "step": 202933
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.701814651489258,
      "learning_rate": 2.0797801841718743e-05,
      "loss": 3.1457,
      "step": 202934
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9168546199798584,
      "learning_rate": 2.0796305337209762e-05,
      "loss": 2.8224,
      "step": 202935
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8003571033477783,
      "learning_rate": 2.0794808884609993e-05,
      "loss": 2.9469,
      "step": 202936
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.4553701877593994,
      "learning_rate": 2.0793312483919768e-05,
      "loss": 2.9858,
      "step": 202937
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.502896785736084,
      "learning_rate": 2.079181613513935e-05,
      "loss": 2.604,
      "step": 202938
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2963955402374268,
      "learning_rate": 2.0790319838268943e-05,
      "loss": 2.8938,
      "step": 202939
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9692468643188477,
      "learning_rate": 2.0788823593308944e-05,
      "loss": 3.0515,
      "step": 202940
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9126646518707275,
      "learning_rate": 2.0787327400259558e-05,
      "loss": 2.8024,
      "step": 202941
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.6093032360076904,
      "learning_rate": 2.0785831259121045e-05,
      "loss": 2.8595,
      "step": 202942
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2950057983398438,
      "learning_rate": 2.078433516989377e-05,
      "loss": 2.7015,
      "step": 202943
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1178905963897705,
      "learning_rate": 2.0782839132577943e-05,
      "loss": 3.0831,
      "step": 202944
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.8659493923187256,
      "learning_rate": 2.0781343147173822e-05,
      "loss": 2.9373,
      "step": 202945
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6186277866363525,
      "learning_rate": 2.0779847213681743e-05,
      "loss": 2.7655,
      "step": 202946
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.414426565170288,
      "learning_rate": 2.077835133210197e-05,
      "loss": 2.9712,
      "step": 202947
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8542776107788086,
      "learning_rate": 2.077685550243474e-05,
      "loss": 2.7893,
      "step": 202948
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.708024024963379,
      "learning_rate": 2.077535972468042e-05,
      "loss": 2.7059,
      "step": 202949
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.670659303665161,
      "learning_rate": 2.0773863998839168e-05,
      "loss": 3.0086,
      "step": 202950
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7468035221099854,
      "learning_rate": 2.077236832491136e-05,
      "loss": 3.0789,
      "step": 202951
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.022383451461792,
      "learning_rate": 2.0770872702897255e-05,
      "loss": 2.6336,
      "step": 202952
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.277524948120117,
      "learning_rate": 2.0769377132797126e-05,
      "loss": 2.8567,
      "step": 202953
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0638506412506104,
      "learning_rate": 2.0767881614611172e-05,
      "loss": 2.7971,
      "step": 202954
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9350357055664062,
      "learning_rate": 2.0766386148339786e-05,
      "loss": 2.938,
      "step": 202955
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7902815341949463,
      "learning_rate": 2.0764890733983174e-05,
      "loss": 2.8729,
      "step": 202956
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.776268720626831,
      "learning_rate": 2.076339537154167e-05,
      "loss": 2.935,
      "step": 202957
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7110443115234375,
      "learning_rate": 2.0761900061015468e-05,
      "loss": 2.8789,
      "step": 202958
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.775709867477417,
      "learning_rate": 2.0760404802405007e-05,
      "loss": 2.8121,
      "step": 202959
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2726454734802246,
      "learning_rate": 2.075890959571038e-05,
      "loss": 2.801,
      "step": 202960
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.501743793487549,
      "learning_rate": 2.0757414440931963e-05,
      "loss": 2.8828,
      "step": 202961
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.7676503658294678,
      "learning_rate": 2.0755919338069983e-05,
      "loss": 3.249,
      "step": 202962
    },
    {
      "epoch": 2.64,
      "grad_norm": 5.3666887283325195,
      "learning_rate": 2.0754424287124804e-05,
      "loss": 2.9841,
      "step": 202963
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.5810840129852295,
      "learning_rate": 2.0752929288096565e-05,
      "loss": 2.8311,
      "step": 202964
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1320838928222656,
      "learning_rate": 2.0751434340985763e-05,
      "loss": 3.0386,
      "step": 202965
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.792973756790161,
      "learning_rate": 2.074993944579243e-05,
      "loss": 2.898,
      "step": 202966
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.033783435821533,
      "learning_rate": 2.0748444602516968e-05,
      "loss": 2.9803,
      "step": 202967
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.86785626411438,
      "learning_rate": 2.0746949811159642e-05,
      "loss": 2.6027,
      "step": 202968
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7444331645965576,
      "learning_rate": 2.074545507172075e-05,
      "loss": 2.9103,
      "step": 202969
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.7454750537872314,
      "learning_rate": 2.0743960384200532e-05,
      "loss": 2.8186,
      "step": 202970
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.056692600250244,
      "learning_rate": 2.0742465748599348e-05,
      "loss": 2.8656,
      "step": 202971
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.966905117034912,
      "learning_rate": 2.074097116491733e-05,
      "loss": 2.8649,
      "step": 202972
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3754076957702637,
      "learning_rate": 2.0739476633154883e-05,
      "loss": 2.8397,
      "step": 202973
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.770233631134033,
      "learning_rate": 2.0737982153312172e-05,
      "loss": 3.2515,
      "step": 202974
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.692329168319702,
      "learning_rate": 2.0736487725389595e-05,
      "loss": 2.922,
      "step": 202975
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.9177420139312744,
      "learning_rate": 2.0734993349387353e-05,
      "loss": 2.9851,
      "step": 202976
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7791402339935303,
      "learning_rate": 2.0733499025305812e-05,
      "loss": 2.8673,
      "step": 202977
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.643563747406006,
      "learning_rate": 2.0732004753145103e-05,
      "loss": 3.1426,
      "step": 202978
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.768141984939575,
      "learning_rate": 2.0730510532905664e-05,
      "loss": 2.7325,
      "step": 202979
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.772672653198242,
      "learning_rate": 2.0729016364587593e-05,
      "loss": 3.0347,
      "step": 202980
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7633142471313477,
      "learning_rate": 2.072752224819135e-05,
      "loss": 2.6526,
      "step": 202981
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.5607120990753174,
      "learning_rate": 2.072602818371708e-05,
      "loss": 2.7325,
      "step": 202982
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.260629653930664,
      "learning_rate": 2.0724534171165173e-05,
      "loss": 2.9341,
      "step": 202983
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.763497829437256,
      "learning_rate": 2.0723040210535833e-05,
      "loss": 2.9037,
      "step": 202984
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.28511381149292,
      "learning_rate": 2.0721546301829363e-05,
      "loss": 3.0357,
      "step": 202985
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.4938111305236816,
      "learning_rate": 2.0720052445045954e-05,
      "loss": 2.8146,
      "step": 202986
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1872174739837646,
      "learning_rate": 2.071855864018601e-05,
      "loss": 2.9629,
      "step": 202987
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.584019422531128,
      "learning_rate": 2.0717064887249736e-05,
      "loss": 2.6821,
      "step": 202988
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.5254621505737305,
      "learning_rate": 2.0715571186237455e-05,
      "loss": 2.8354,
      "step": 202989
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.846461296081543,
      "learning_rate": 2.0714077537149443e-05,
      "loss": 2.7997,
      "step": 202990
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.612269878387451,
      "learning_rate": 2.071258393998596e-05,
      "loss": 3.1334,
      "step": 202991
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.081021308898926,
      "learning_rate": 2.0711090394747208e-05,
      "loss": 2.9754,
      "step": 202992
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.5045266151428223,
      "learning_rate": 2.070959690143359e-05,
      "loss": 2.8277,
      "step": 202993
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.035470485687256,
      "learning_rate": 2.07081034600453e-05,
      "loss": 3.0606,
      "step": 202994
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.61708927154541,
      "learning_rate": 2.0706610070582673e-05,
      "loss": 3.0463,
      "step": 202995
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.07380747795105,
      "learning_rate": 2.0705116733045945e-05,
      "loss": 3.0687,
      "step": 202996
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.8492214679718018,
      "learning_rate": 2.0703623447435446e-05,
      "loss": 3.0294,
      "step": 202997
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1093227863311768,
      "learning_rate": 2.0702130213751344e-05,
      "loss": 2.8934,
      "step": 202998
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.431824207305908,
      "learning_rate": 2.0700637031994038e-05,
      "loss": 3.0075,
      "step": 202999
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.5210177898406982,
      "learning_rate": 2.0699143902163727e-05,
      "loss": 3.1559,
      "step": 203000
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0491724014282227,
      "learning_rate": 2.0697650824260748e-05,
      "loss": 2.6984,
      "step": 203001
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.039121150970459,
      "learning_rate": 2.069615779828533e-05,
      "loss": 2.9716,
      "step": 203002
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.5980911254882812,
      "learning_rate": 2.0694664824237805e-05,
      "loss": 2.8786,
      "step": 203003
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8767194747924805,
      "learning_rate": 2.0693171902118343e-05,
      "loss": 3.0491,
      "step": 203004
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7918076515197754,
      "learning_rate": 2.0691679031927345e-05,
      "loss": 2.969,
      "step": 203005
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2107632160186768,
      "learning_rate": 2.0690186213665005e-05,
      "loss": 3.0687,
      "step": 203006
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2466647624969482,
      "learning_rate": 2.068869344733166e-05,
      "loss": 2.8741,
      "step": 203007
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.080179214477539,
      "learning_rate": 2.0687200732927544e-05,
      "loss": 3.2266,
      "step": 203008
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.645524263381958,
      "learning_rate": 2.0685708070452988e-05,
      "loss": 3.2167,
      "step": 203009
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.7510359287261963,
      "learning_rate": 2.068421545990816e-05,
      "loss": 2.7505,
      "step": 203010
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.086885690689087,
      "learning_rate": 2.068272290129346e-05,
      "loss": 2.9351,
      "step": 203011
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2701923847198486,
      "learning_rate": 2.0681230394609082e-05,
      "loss": 3.0231,
      "step": 203012
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.8752224445343018,
      "learning_rate": 2.0679737939855367e-05,
      "loss": 2.7623,
      "step": 203013
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.816251754760742,
      "learning_rate": 2.0678245537032544e-05,
      "loss": 2.7924,
      "step": 203014
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7490460872650146,
      "learning_rate": 2.0676753186140948e-05,
      "loss": 2.8324,
      "step": 203015
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.256533145904541,
      "learning_rate": 2.0675260887180746e-05,
      "loss": 2.9444,
      "step": 203016
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.735901355743408,
      "learning_rate": 2.0673768640152334e-05,
      "loss": 2.938,
      "step": 203017
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7535266876220703,
      "learning_rate": 2.067227644505588e-05,
      "loss": 2.9713,
      "step": 203018
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.915235996246338,
      "learning_rate": 2.0670784301891817e-05,
      "loss": 2.8829,
      "step": 203019
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1097512245178223,
      "learning_rate": 2.0669292210660282e-05,
      "loss": 2.8792,
      "step": 203020
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.943882703781128,
      "learning_rate": 2.066780017136157e-05,
      "loss": 3.0103,
      "step": 203021
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6927452087402344,
      "learning_rate": 2.0666308183996018e-05,
      "loss": 3.0148,
      "step": 203022
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.322328805923462,
      "learning_rate": 2.0664816248563888e-05,
      "loss": 2.8168,
      "step": 203023
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6238555908203125,
      "learning_rate": 2.066332436506538e-05,
      "loss": 2.9207,
      "step": 203024
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.7187206745147705,
      "learning_rate": 2.06618325335009e-05,
      "loss": 2.9656,
      "step": 203025
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7927117347717285,
      "learning_rate": 2.066034075387064e-05,
      "loss": 3.1365,
      "step": 203026
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.897963047027588,
      "learning_rate": 2.0658849026174873e-05,
      "loss": 3.1841,
      "step": 203027
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.427865743637085,
      "learning_rate": 2.0657357350413928e-05,
      "loss": 2.9335,
      "step": 203028
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.4024569988250732,
      "learning_rate": 2.065586572658807e-05,
      "loss": 3.0774,
      "step": 203029
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.877213478088379,
      "learning_rate": 2.065437415469747e-05,
      "loss": 2.8761,
      "step": 203030
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.911808729171753,
      "learning_rate": 2.0652882634742595e-05,
      "loss": 3.0239,
      "step": 203031
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1477344036102295,
      "learning_rate": 2.0651391166723574e-05,
      "loss": 2.7828,
      "step": 203032
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3667569160461426,
      "learning_rate": 2.0649899750640707e-05,
      "loss": 2.6735,
      "step": 203033
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.671419382095337,
      "learning_rate": 2.0648408386494363e-05,
      "loss": 2.7176,
      "step": 203034
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6751790046691895,
      "learning_rate": 2.0646917074284675e-05,
      "loss": 2.9532,
      "step": 203035
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7419238090515137,
      "learning_rate": 2.0645425814012075e-05,
      "loss": 2.8412,
      "step": 203036
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8080272674560547,
      "learning_rate": 2.0643934605676727e-05,
      "loss": 2.8073,
      "step": 203037
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.085860013961792,
      "learning_rate": 2.0642443449278966e-05,
      "loss": 2.7726,
      "step": 203038
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.167215347290039,
      "learning_rate": 2.064095234481903e-05,
      "loss": 2.9748,
      "step": 203039
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.777064323425293,
      "learning_rate": 2.063946129229721e-05,
      "loss": 3.1467,
      "step": 203040
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.4847657680511475,
      "learning_rate": 2.063797029171378e-05,
      "loss": 3.1867,
      "step": 203041
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.191643476486206,
      "learning_rate": 2.0636479343069036e-05,
      "loss": 2.7489,
      "step": 203042
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.392611265182495,
      "learning_rate": 2.0634988446363275e-05,
      "loss": 2.6556,
      "step": 203043
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6737582683563232,
      "learning_rate": 2.0633497601596738e-05,
      "loss": 2.8889,
      "step": 203044
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.603052854537964,
      "learning_rate": 2.0632006808769618e-05,
      "loss": 2.7672,
      "step": 203045
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.5158302783966064,
      "learning_rate": 2.0630516067882385e-05,
      "loss": 2.7469,
      "step": 203046
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7314977645874023,
      "learning_rate": 2.0629025378935138e-05,
      "loss": 3.0031,
      "step": 203047
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7515149116516113,
      "learning_rate": 2.0627534741928276e-05,
      "loss": 3.0176,
      "step": 203048
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.056854248046875,
      "learning_rate": 2.0626044156861997e-05,
      "loss": 3.0465,
      "step": 203049
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1391594409942627,
      "learning_rate": 2.0624553623736638e-05,
      "loss": 2.8821,
      "step": 203050
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0719268321990967,
      "learning_rate": 2.0623063142552465e-05,
      "loss": 3.0103,
      "step": 203051
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.730261325836182,
      "learning_rate": 2.0621572713309707e-05,
      "loss": 2.851,
      "step": 203052
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0931830406188965,
      "learning_rate": 2.062008233600867e-05,
      "loss": 3.045,
      "step": 203053
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.124462842941284,
      "learning_rate": 2.0618592010649648e-05,
      "loss": 2.9626,
      "step": 203054
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.603280544281006,
      "learning_rate": 2.061710173723288e-05,
      "loss": 2.9021,
      "step": 203055
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.233598470687866,
      "learning_rate": 2.061561151575869e-05,
      "loss": 3.0057,
      "step": 203056
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8217153549194336,
      "learning_rate": 2.0614121346227354e-05,
      "loss": 2.9601,
      "step": 203057
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.944960594177246,
      "learning_rate": 2.0612631228639097e-05,
      "loss": 2.9914,
      "step": 203058
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.059562921524048,
      "learning_rate": 2.0611141162994193e-05,
      "loss": 3.0713,
      "step": 203059
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7125744819641113,
      "learning_rate": 2.0609651149293006e-05,
      "loss": 2.7981,
      "step": 203060
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7467896938323975,
      "learning_rate": 2.06081611875357e-05,
      "loss": 3.1505,
      "step": 203061
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.562919616699219,
      "learning_rate": 2.0606671277722674e-05,
      "loss": 3.0484,
      "step": 203062
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.340885639190674,
      "learning_rate": 2.0605181419854132e-05,
      "loss": 3.0148,
      "step": 203063
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.8957180976867676,
      "learning_rate": 2.0603691613930374e-05,
      "loss": 2.8453,
      "step": 203064
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2705271244049072,
      "learning_rate": 2.0602201859951596e-05,
      "loss": 2.9932,
      "step": 203065
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.741612672805786,
      "learning_rate": 2.0600712157918197e-05,
      "loss": 3.0271,
      "step": 203066
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.6096742153167725,
      "learning_rate": 2.0599222507830348e-05,
      "loss": 2.9279,
      "step": 203067
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.694906711578369,
      "learning_rate": 2.0597732909688447e-05,
      "loss": 2.8681,
      "step": 203068
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6573128700256348,
      "learning_rate": 2.0596243363492692e-05,
      "loss": 3.0793,
      "step": 203069
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.5838913917541504,
      "learning_rate": 2.0594753869243353e-05,
      "loss": 2.8032,
      "step": 203070
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7441160678863525,
      "learning_rate": 2.0593264426940692e-05,
      "loss": 2.9214,
      "step": 203071
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.577718734741211,
      "learning_rate": 2.0591775036585047e-05,
      "loss": 3.0472,
      "step": 203072
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.825448751449585,
      "learning_rate": 2.0590285698176645e-05,
      "loss": 3.2149,
      "step": 203073
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.5953660011291504,
      "learning_rate": 2.0588796411715825e-05,
      "loss": 2.8976,
      "step": 203074
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.994076728820801,
      "learning_rate": 2.0587307177202783e-05,
      "loss": 2.784,
      "step": 203075
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.440624713897705,
      "learning_rate": 2.0585817994637886e-05,
      "loss": 3.0237,
      "step": 203076
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7946159839630127,
      "learning_rate": 2.0584328864021272e-05,
      "loss": 2.9353,
      "step": 203077
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.6542091369628906,
      "learning_rate": 2.0582839785353366e-05,
      "loss": 2.8306,
      "step": 203078
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.897824764251709,
      "learning_rate": 2.0581350758634375e-05,
      "loss": 3.0801,
      "step": 203079
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2598793506622314,
      "learning_rate": 2.0579861783864594e-05,
      "loss": 3.0571,
      "step": 203080
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0444350242614746,
      "learning_rate": 2.057837286104429e-05,
      "loss": 2.8541,
      "step": 203081
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.551821708679199,
      "learning_rate": 2.0576883990173766e-05,
      "loss": 2.9969,
      "step": 203082
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.976747512817383,
      "learning_rate": 2.0575395171253185e-05,
      "loss": 2.8166,
      "step": 203083
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.5225915908813477,
      "learning_rate": 2.057390640428298e-05,
      "loss": 2.993,
      "step": 203084
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.841390371322632,
      "learning_rate": 2.057241768926332e-05,
      "loss": 3.1834,
      "step": 203085
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.6223554611206055,
      "learning_rate": 2.057092902619457e-05,
      "loss": 3.0184,
      "step": 203086
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.2751944065093994,
      "learning_rate": 2.0569440415076965e-05,
      "loss": 2.9747,
      "step": 203087
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.721482276916504,
      "learning_rate": 2.056795185591077e-05,
      "loss": 2.8961,
      "step": 203088
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.905425786972046,
      "learning_rate": 2.0566463348696216e-05,
      "loss": 2.947,
      "step": 203089
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0637614727020264,
      "learning_rate": 2.0564974893433672e-05,
      "loss": 2.9844,
      "step": 203090
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.194528579711914,
      "learning_rate": 2.056348649012334e-05,
      "loss": 2.9096,
      "step": 203091
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8277487754821777,
      "learning_rate": 2.0561998138765578e-05,
      "loss": 2.9759,
      "step": 203092
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.534287929534912,
      "learning_rate": 2.0560509839360596e-05,
      "loss": 2.8556,
      "step": 203093
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.495739698410034,
      "learning_rate": 2.0559021591908685e-05,
      "loss": 2.7794,
      "step": 203094
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8116443157196045,
      "learning_rate": 2.055753339641012e-05,
      "loss": 2.9244,
      "step": 203095
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.5012807846069336,
      "learning_rate": 2.0556045252865194e-05,
      "loss": 2.7009,
      "step": 203096
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7080838680267334,
      "learning_rate": 2.0554557161274142e-05,
      "loss": 2.9823,
      "step": 203097
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.578303575515747,
      "learning_rate": 2.0553069121637333e-05,
      "loss": 3.0205,
      "step": 203098
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1724212169647217,
      "learning_rate": 2.0551581133954964e-05,
      "loss": 3.0123,
      "step": 203099
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.5773396492004395,
      "learning_rate": 2.0550093198227335e-05,
      "loss": 2.9276,
      "step": 203100
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.5430517196655273,
      "learning_rate": 2.0548605314454682e-05,
      "loss": 2.9462,
      "step": 203101
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.041877269744873,
      "learning_rate": 2.0547117482637366e-05,
      "loss": 3.0952,
      "step": 203102
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1359870433807373,
      "learning_rate": 2.0545629702775557e-05,
      "loss": 2.9132,
      "step": 203103
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6718485355377197,
      "learning_rate": 2.054414197486962e-05,
      "loss": 2.9754,
      "step": 203104
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7160205841064453,
      "learning_rate": 2.0542654298919824e-05,
      "loss": 2.8761,
      "step": 203105
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.086747884750366,
      "learning_rate": 2.0541166674926436e-05,
      "loss": 2.9828,
      "step": 203106
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.100755453109741,
      "learning_rate": 2.053967910288965e-05,
      "loss": 2.8574,
      "step": 203107
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.7750797271728516,
      "learning_rate": 2.053819158280987e-05,
      "loss": 3.0168,
      "step": 203108
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.807060480117798,
      "learning_rate": 2.0536704114687262e-05,
      "loss": 3.042,
      "step": 203109
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3998992443084717,
      "learning_rate": 2.0535216698522227e-05,
      "loss": 2.7692,
      "step": 203110
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6585564613342285,
      "learning_rate": 2.0533729334314962e-05,
      "loss": 2.9028,
      "step": 203111
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7206735610961914,
      "learning_rate": 2.053224202206567e-05,
      "loss": 2.8801,
      "step": 203112
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.5299131870269775,
      "learning_rate": 2.0530754761774782e-05,
      "loss": 3.0112,
      "step": 203113
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6569838523864746,
      "learning_rate": 2.052926755344253e-05,
      "loss": 2.6856,
      "step": 203114
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.245396614074707,
      "learning_rate": 2.052778039706908e-05,
      "loss": 2.9242,
      "step": 203115
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.7784857749938965,
      "learning_rate": 2.0526293292654838e-05,
      "loss": 2.8612,
      "step": 203116
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0497958660125732,
      "learning_rate": 2.0524806240199997e-05,
      "loss": 3.0266,
      "step": 203117
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.213472366333008,
      "learning_rate": 2.0523319239704926e-05,
      "loss": 3.03,
      "step": 203118
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.258094549179077,
      "learning_rate": 2.0521832291169827e-05,
      "loss": 2.9265,
      "step": 203119
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.3002874851226807,
      "learning_rate": 2.052034539459496e-05,
      "loss": 2.7699,
      "step": 203120
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8586723804473877,
      "learning_rate": 2.0518858549980667e-05,
      "loss": 3.0824,
      "step": 203121
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.957428455352783,
      "learning_rate": 2.0517371757327206e-05,
      "loss": 3.033,
      "step": 203122
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0180013179779053,
      "learning_rate": 2.0515885016634782e-05,
      "loss": 2.8307,
      "step": 203123
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1374318599700928,
      "learning_rate": 2.051439832790379e-05,
      "loss": 2.9778,
      "step": 203124
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.228597640991211,
      "learning_rate": 2.051291169113447e-05,
      "loss": 2.8745,
      "step": 203125
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.72054123878479,
      "learning_rate": 2.0511425106326985e-05,
      "loss": 2.7904,
      "step": 203126
    },
    {
      "epoch": 2.64,
      "grad_norm": 4.114144802093506,
      "learning_rate": 2.0509938573481765e-05,
      "loss": 2.9491,
      "step": 203127
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.060392379760742,
      "learning_rate": 2.050845209259905e-05,
      "loss": 3.0192,
      "step": 203128
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.799459218978882,
      "learning_rate": 2.0506965663678998e-05,
      "loss": 2.7423,
      "step": 203129
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8157548904418945,
      "learning_rate": 2.050547928672205e-05,
      "loss": 2.9539,
      "step": 203130
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1234793663024902,
      "learning_rate": 2.0503992961728398e-05,
      "loss": 2.8762,
      "step": 203131
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.8089518547058105,
      "learning_rate": 2.0502506688698283e-05,
      "loss": 2.9663,
      "step": 203132
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.0354526042938232,
      "learning_rate": 2.05010204676321e-05,
      "loss": 2.6967,
      "step": 203133
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.6426126956939697,
      "learning_rate": 2.0499534298529985e-05,
      "loss": 3.0225,
      "step": 203134
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.767404079437256,
      "learning_rate": 2.0498048181392333e-05,
      "loss": 2.8328,
      "step": 203135
    },
    {
      "epoch": 2.64,
      "grad_norm": 2.8281874656677246,
      "learning_rate": 2.049656211621935e-05,
      "loss": 2.8646,
      "step": 203136
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.289898157119751,
      "learning_rate": 2.0495076103011364e-05,
      "loss": 2.6602,
      "step": 203137
    },
    {
      "epoch": 2.64,
      "grad_norm": 3.1191868782043457,
      "learning_rate": 2.0493590141768547e-05,
      "loss": 3.1989,
      "step": 203138
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9397335052490234,
      "learning_rate": 2.0492104232491324e-05,
      "loss": 2.9559,
      "step": 203139
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4141271114349365,
      "learning_rate": 2.0490618375179836e-05,
      "loss": 2.8271,
      "step": 203140
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.76096773147583,
      "learning_rate": 2.0489132569834443e-05,
      "loss": 3.0166,
      "step": 203141
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7137932777404785,
      "learning_rate": 2.0487646816455416e-05,
      "loss": 3.1087,
      "step": 203142
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8488669395446777,
      "learning_rate": 2.048616111504302e-05,
      "loss": 2.9631,
      "step": 203143
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.092823505401611,
      "learning_rate": 2.0484675465597456e-05,
      "loss": 3.103,
      "step": 203144
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.169562816619873,
      "learning_rate": 2.048318986811912e-05,
      "loss": 2.8152,
      "step": 203145
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6793205738067627,
      "learning_rate": 2.0481704322608183e-05,
      "loss": 2.9195,
      "step": 203146
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.81581974029541,
      "learning_rate": 2.0480218829065044e-05,
      "loss": 2.9127,
      "step": 203147
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.168088912963867,
      "learning_rate": 2.0478733387489897e-05,
      "loss": 2.8691,
      "step": 203148
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1516244411468506,
      "learning_rate": 2.0477247997883016e-05,
      "loss": 3.0868,
      "step": 203149
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.611083745956421,
      "learning_rate": 2.0475762660244667e-05,
      "loss": 2.9305,
      "step": 203150
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.371683597564697,
      "learning_rate": 2.0474277374575176e-05,
      "loss": 2.9222,
      "step": 203151
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.041093111038208,
      "learning_rate": 2.047279214087475e-05,
      "loss": 2.8416,
      "step": 203152
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.026683807373047,
      "learning_rate": 2.0471306959143753e-05,
      "loss": 3.0095,
      "step": 203153
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2239856719970703,
      "learning_rate": 2.046982182938245e-05,
      "loss": 2.7913,
      "step": 203154
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6585452556610107,
      "learning_rate": 2.046833675159104e-05,
      "loss": 2.8362,
      "step": 203155
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9602746963500977,
      "learning_rate": 2.0466851725769828e-05,
      "loss": 2.7781,
      "step": 203156
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.02201509475708,
      "learning_rate": 2.0465366751919143e-05,
      "loss": 3.1904,
      "step": 203157
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0511422157287598,
      "learning_rate": 2.0463881830039153e-05,
      "loss": 2.849,
      "step": 203158
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3109781742095947,
      "learning_rate": 2.0462396960130255e-05,
      "loss": 3.0135,
      "step": 203159
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.812246084213257,
      "learning_rate": 2.0460912142192687e-05,
      "loss": 2.9586,
      "step": 203160
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7613604068756104,
      "learning_rate": 2.0459427376226712e-05,
      "loss": 2.8693,
      "step": 203161
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.133129358291626,
      "learning_rate": 2.045794266223256e-05,
      "loss": 3.1484,
      "step": 203162
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.632750988006592,
      "learning_rate": 2.0456458000210608e-05,
      "loss": 2.8089,
      "step": 203163
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.792393684387207,
      "learning_rate": 2.045497339016101e-05,
      "loss": 2.9602,
      "step": 203164
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7730207443237305,
      "learning_rate": 2.0453488832084175e-05,
      "loss": 3.0888,
      "step": 203165
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9556257724761963,
      "learning_rate": 2.0452004325980333e-05,
      "loss": 2.7811,
      "step": 203166
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1175942420959473,
      "learning_rate": 2.0450519871849714e-05,
      "loss": 2.7817,
      "step": 203167
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.706289768218994,
      "learning_rate": 2.0449035469692555e-05,
      "loss": 2.7875,
      "step": 203168
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0534965991973877,
      "learning_rate": 2.0447551119509254e-05,
      "loss": 2.938,
      "step": 203169
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7853453159332275,
      "learning_rate": 2.0446066821300013e-05,
      "loss": 2.6927,
      "step": 203170
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2350709438323975,
      "learning_rate": 2.0444582575065127e-05,
      "loss": 3.092,
      "step": 203171
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.909209728240967,
      "learning_rate": 2.04430983808049e-05,
      "loss": 3.0277,
      "step": 203172
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.959815740585327,
      "learning_rate": 2.0441614238519564e-05,
      "loss": 2.9567,
      "step": 203173
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.9352316856384277,
      "learning_rate": 2.044013014820939e-05,
      "loss": 3.0679,
      "step": 203174
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.395261526107788,
      "learning_rate": 2.0438646109874702e-05,
      "loss": 2.8252,
      "step": 203175
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5124545097351074,
      "learning_rate": 2.043716212351567e-05,
      "loss": 2.9815,
      "step": 203176
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2111165523529053,
      "learning_rate": 2.043567818913273e-05,
      "loss": 3.0513,
      "step": 203177
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.158815383911133,
      "learning_rate": 2.0434194306726082e-05,
      "loss": 2.8329,
      "step": 203178
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9353222846984863,
      "learning_rate": 2.0432710476295987e-05,
      "loss": 3.1428,
      "step": 203179
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.196807384490967,
      "learning_rate": 2.0431226697842652e-05,
      "loss": 2.9912,
      "step": 203180
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.258699893951416,
      "learning_rate": 2.04297429713665e-05,
      "loss": 3.1128,
      "step": 203181
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.191892623901367,
      "learning_rate": 2.0428259296867677e-05,
      "loss": 2.9037,
      "step": 203182
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1211185455322266,
      "learning_rate": 2.0426775674346572e-05,
      "loss": 2.9864,
      "step": 203183
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.036421537399292,
      "learning_rate": 2.042529210380336e-05,
      "loss": 2.88,
      "step": 203184
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.07492208480835,
      "learning_rate": 2.0423808585238466e-05,
      "loss": 3.062,
      "step": 203185
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.45147442817688,
      "learning_rate": 2.042232511865196e-05,
      "loss": 3.0624,
      "step": 203186
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.785888433456421,
      "learning_rate": 2.0420841704044277e-05,
      "loss": 2.8836,
      "step": 203187
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5825881958007812,
      "learning_rate": 2.041935834141558e-05,
      "loss": 3.0479,
      "step": 203188
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2214772701263428,
      "learning_rate": 2.0417875030766274e-05,
      "loss": 3.255,
      "step": 203189
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.251427173614502,
      "learning_rate": 2.0416391772096486e-05,
      "loss": 2.8598,
      "step": 203190
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.719731569290161,
      "learning_rate": 2.041490856540665e-05,
      "loss": 2.7706,
      "step": 203191
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0599381923675537,
      "learning_rate": 2.0413425410696903e-05,
      "loss": 2.6951,
      "step": 203192
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.074083089828491,
      "learning_rate": 2.0411942307967612e-05,
      "loss": 2.9695,
      "step": 203193
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.22577166557312,
      "learning_rate": 2.041045925721897e-05,
      "loss": 2.9198,
      "step": 203194
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.567969560623169,
      "learning_rate": 2.040897625845135e-05,
      "loss": 3.0373,
      "step": 203195
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.847485065460205,
      "learning_rate": 2.0407493311664914e-05,
      "loss": 2.8594,
      "step": 203196
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6252384185791016,
      "learning_rate": 2.0406010416860063e-05,
      "loss": 3.0451,
      "step": 203197
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.05001163482666,
      "learning_rate": 2.0404527574037032e-05,
      "loss": 3.0632,
      "step": 203198
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9791946411132812,
      "learning_rate": 2.0403044783196053e-05,
      "loss": 2.8586,
      "step": 203199
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.3933372497558594,
      "learning_rate": 2.040156204433736e-05,
      "loss": 3.3578,
      "step": 203200
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.755885601043701,
      "learning_rate": 2.0400079357461386e-05,
      "loss": 3.0321,
      "step": 203201
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.062587261199951,
      "learning_rate": 2.039859672256823e-05,
      "loss": 2.8295,
      "step": 203202
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5790963172912598,
      "learning_rate": 2.039711413965832e-05,
      "loss": 2.9033,
      "step": 203203
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.617393732070923,
      "learning_rate": 2.0395631608731866e-05,
      "loss": 2.9715,
      "step": 203204
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.7466726303100586,
      "learning_rate": 2.0394149129789126e-05,
      "loss": 3.0893,
      "step": 203205
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0063724517822266,
      "learning_rate": 2.0392666702830374e-05,
      "loss": 3.1419,
      "step": 203206
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.890770435333252,
      "learning_rate": 2.0391184327855935e-05,
      "loss": 2.9676,
      "step": 203207
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.886357069015503,
      "learning_rate": 2.0389702004866016e-05,
      "loss": 2.8441,
      "step": 203208
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6842432022094727,
      "learning_rate": 2.0388219733860944e-05,
      "loss": 2.8433,
      "step": 203209
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.774437665939331,
      "learning_rate": 2.0386737514840988e-05,
      "loss": 3.0084,
      "step": 203210
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.994856595993042,
      "learning_rate": 2.0385255347806385e-05,
      "loss": 2.8299,
      "step": 203211
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.729984760284424,
      "learning_rate": 2.0383773232757494e-05,
      "loss": 2.7508,
      "step": 203212
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.748966932296753,
      "learning_rate": 2.038229116969452e-05,
      "loss": 2.6757,
      "step": 203213
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2519118785858154,
      "learning_rate": 2.038080915861773e-05,
      "loss": 3.083,
      "step": 203214
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.539062738418579,
      "learning_rate": 2.037932719952745e-05,
      "loss": 2.9229,
      "step": 203215
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.015249490737915,
      "learning_rate": 2.0377845292423956e-05,
      "loss": 2.7763,
      "step": 203216
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8521687984466553,
      "learning_rate": 2.0376363437307443e-05,
      "loss": 3.1444,
      "step": 203217
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.278036594390869,
      "learning_rate": 2.0374881634178275e-05,
      "loss": 3.1449,
      "step": 203218
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0934176445007324,
      "learning_rate": 2.0373399883036656e-05,
      "loss": 3.1278,
      "step": 203219
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.9659059047698975,
      "learning_rate": 2.037191818388295e-05,
      "loss": 2.7026,
      "step": 203220
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3109230995178223,
      "learning_rate": 2.037043653671736e-05,
      "loss": 2.6126,
      "step": 203221
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5503108501434326,
      "learning_rate": 2.0368954941540216e-05,
      "loss": 2.6777,
      "step": 203222
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6716055870056152,
      "learning_rate": 2.0367473398351684e-05,
      "loss": 3.0373,
      "step": 203223
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4798412322998047,
      "learning_rate": 2.03659919071522e-05,
      "loss": 2.9386,
      "step": 203224
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3350062370300293,
      "learning_rate": 2.0364510467941897e-05,
      "loss": 2.7864,
      "step": 203225
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.863905429840088,
      "learning_rate": 2.036302908072114e-05,
      "loss": 2.9213,
      "step": 203226
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.353760242462158,
      "learning_rate": 2.0361547745490194e-05,
      "loss": 2.8154,
      "step": 203227
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7052814960479736,
      "learning_rate": 2.0360066462249324e-05,
      "loss": 2.8349,
      "step": 203228
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7732319831848145,
      "learning_rate": 2.0358585230998737e-05,
      "loss": 2.8839,
      "step": 203229
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.750673770904541,
      "learning_rate": 2.035710405173879e-05,
      "loss": 2.8772,
      "step": 203230
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.327606678009033,
      "learning_rate": 2.0355622924469728e-05,
      "loss": 2.8938,
      "step": 203231
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6985514163970947,
      "learning_rate": 2.0354141849191842e-05,
      "loss": 2.7396,
      "step": 203232
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7321367263793945,
      "learning_rate": 2.035266082590543e-05,
      "loss": 2.7504,
      "step": 203233
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.724731683731079,
      "learning_rate": 2.0351179854610733e-05,
      "loss": 2.9143,
      "step": 203234
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6766629219055176,
      "learning_rate": 2.0349698935307977e-05,
      "loss": 2.8911,
      "step": 203235
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.093942403793335,
      "learning_rate": 2.0348218067997534e-05,
      "loss": 3.1223,
      "step": 203236
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.715512990951538,
      "learning_rate": 2.0346737252679602e-05,
      "loss": 3.0904,
      "step": 203237
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8764469623565674,
      "learning_rate": 2.034525648935451e-05,
      "loss": 2.9294,
      "step": 203238
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9891083240509033,
      "learning_rate": 2.0343775778022566e-05,
      "loss": 2.8899,
      "step": 203239
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.885607957839966,
      "learning_rate": 2.034229511868396e-05,
      "loss": 2.8772,
      "step": 203240
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.00410795211792,
      "learning_rate": 2.0340814511338932e-05,
      "loss": 2.9494,
      "step": 203241
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9703750610351562,
      "learning_rate": 2.0339333955987914e-05,
      "loss": 2.8653,
      "step": 203242
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.811119318008423,
      "learning_rate": 2.0337853452631004e-05,
      "loss": 3.0438,
      "step": 203243
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.661799669265747,
      "learning_rate": 2.0336373001268635e-05,
      "loss": 2.95,
      "step": 203244
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.069542169570923,
      "learning_rate": 2.0334892601901008e-05,
      "loss": 2.9017,
      "step": 203245
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0066721439361572,
      "learning_rate": 2.0333412254528424e-05,
      "loss": 2.8688,
      "step": 203246
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6277108192443848,
      "learning_rate": 2.0331931959151082e-05,
      "loss": 3.0386,
      "step": 203247
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9167051315307617,
      "learning_rate": 2.0330451715769346e-05,
      "loss": 2.8163,
      "step": 203248
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.6678850650787354,
      "learning_rate": 2.032897152438342e-05,
      "loss": 2.6538,
      "step": 203249
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0054571628570557,
      "learning_rate": 2.0327491384993666e-05,
      "loss": 2.8341,
      "step": 203250
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.766038179397583,
      "learning_rate": 2.0326011297600254e-05,
      "loss": 2.9704,
      "step": 203251
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3922278881073,
      "learning_rate": 2.0324531262203612e-05,
      "loss": 3.1596,
      "step": 203252
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1695544719696045,
      "learning_rate": 2.0323051278803847e-05,
      "loss": 2.8152,
      "step": 203253
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1248059272766113,
      "learning_rate": 2.0321571347401323e-05,
      "loss": 2.9814,
      "step": 203254
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3380537033081055,
      "learning_rate": 2.032009146799627e-05,
      "loss": 3.0928,
      "step": 203255
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.701427936553955,
      "learning_rate": 2.031861164058902e-05,
      "loss": 3.1718,
      "step": 203256
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.173478603363037,
      "learning_rate": 2.0317131865179815e-05,
      "loss": 2.6706,
      "step": 203257
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.336087465286255,
      "learning_rate": 2.031565214176898e-05,
      "loss": 3.0519,
      "step": 203258
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4577746391296387,
      "learning_rate": 2.0314172470356684e-05,
      "loss": 2.8938,
      "step": 203259
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.8847403526306152,
      "learning_rate": 2.031269285094329e-05,
      "loss": 2.9922,
      "step": 203260
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8246426582336426,
      "learning_rate": 2.0311213283529004e-05,
      "loss": 3.1254,
      "step": 203261
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.656367540359497,
      "learning_rate": 2.030973376811419e-05,
      "loss": 2.8647,
      "step": 203262
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2049813270568848,
      "learning_rate": 2.0308254304699013e-05,
      "loss": 3.0607,
      "step": 203263
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1970436573028564,
      "learning_rate": 2.030677489328394e-05,
      "loss": 2.8416,
      "step": 203264
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5461394786834717,
      "learning_rate": 2.0305295533869003e-05,
      "loss": 2.8215,
      "step": 203265
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.979357957839966,
      "learning_rate": 2.030381622645464e-05,
      "loss": 2.8139,
      "step": 203266
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.851317882537842,
      "learning_rate": 2.0302336971041012e-05,
      "loss": 2.7518,
      "step": 203267
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.672849416732788,
      "learning_rate": 2.0300857767628555e-05,
      "loss": 2.9772,
      "step": 203268
    },
    {
      "epoch": 2.65,
      "grad_norm": 6.185529708862305,
      "learning_rate": 2.0299378616217364e-05,
      "loss": 3.0174,
      "step": 203269
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.229614019393921,
      "learning_rate": 2.029789951680788e-05,
      "loss": 2.9017,
      "step": 203270
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.149674415588379,
      "learning_rate": 2.029642046940023e-05,
      "loss": 2.8778,
      "step": 203271
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8930516242980957,
      "learning_rate": 2.0294941473994785e-05,
      "loss": 2.9731,
      "step": 203272
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.689054250717163,
      "learning_rate": 2.0293462530591776e-05,
      "loss": 2.8316,
      "step": 203273
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.020840644836426,
      "learning_rate": 2.02919836391915e-05,
      "loss": 2.9714,
      "step": 203274
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.201979637145996,
      "learning_rate": 2.0290504799794193e-05,
      "loss": 2.6535,
      "step": 203275
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.952786445617676,
      "learning_rate": 2.0289026012400257e-05,
      "loss": 3.1109,
      "step": 203276
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.144796133041382,
      "learning_rate": 2.0287547277009752e-05,
      "loss": 2.7881,
      "step": 203277
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.860513925552368,
      "learning_rate": 2.028606859362315e-05,
      "loss": 2.5593,
      "step": 203278
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.881350517272949,
      "learning_rate": 2.0284589962240584e-05,
      "loss": 2.8861,
      "step": 203279
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.375284433364868,
      "learning_rate": 2.028311138286245e-05,
      "loss": 3.0026,
      "step": 203280
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7072043418884277,
      "learning_rate": 2.0281632855488918e-05,
      "loss": 2.976,
      "step": 203281
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1932733058929443,
      "learning_rate": 2.028015438012035e-05,
      "loss": 2.7244,
      "step": 203282
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8618693351745605,
      "learning_rate": 2.0278675956757017e-05,
      "loss": 3.0468,
      "step": 203283
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.214909791946411,
      "learning_rate": 2.0277197585399118e-05,
      "loss": 3.0388,
      "step": 203284
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.5727624893188477,
      "learning_rate": 2.0275719266046918e-05,
      "loss": 2.8814,
      "step": 203285
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.442669630050659,
      "learning_rate": 2.027424099870082e-05,
      "loss": 2.9154,
      "step": 203286
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.899019241333008,
      "learning_rate": 2.027276278336095e-05,
      "loss": 2.8072,
      "step": 203287
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.4053192138671875,
      "learning_rate": 2.0271284620027717e-05,
      "loss": 2.7582,
      "step": 203288
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.106527328491211,
      "learning_rate": 2.0269806508701313e-05,
      "loss": 3.0954,
      "step": 203289
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.192476749420166,
      "learning_rate": 2.0268328449382045e-05,
      "loss": 2.8727,
      "step": 203290
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.416983604431152,
      "learning_rate": 2.0266850442070138e-05,
      "loss": 2.7913,
      "step": 203291
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8577871322631836,
      "learning_rate": 2.026537248676593e-05,
      "loss": 2.6978,
      "step": 203292
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8107266426086426,
      "learning_rate": 2.026389458346962e-05,
      "loss": 2.8754,
      "step": 203293
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.037057638168335,
      "learning_rate": 2.0262416732181573e-05,
      "loss": 3.0621,
      "step": 203294
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.553598165512085,
      "learning_rate": 2.026093893290206e-05,
      "loss": 3.0739,
      "step": 203295
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3524744510650635,
      "learning_rate": 2.0259461185631243e-05,
      "loss": 2.8163,
      "step": 203296
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.390180826187134,
      "learning_rate": 2.0257983490369523e-05,
      "loss": 2.9309,
      "step": 203297
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.114105224609375,
      "learning_rate": 2.0256505847117134e-05,
      "loss": 2.7874,
      "step": 203298
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.2936906814575195,
      "learning_rate": 2.0255028255874306e-05,
      "loss": 2.9732,
      "step": 203299
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.897472620010376,
      "learning_rate": 2.0253550716641375e-05,
      "loss": 3.0945,
      "step": 203300
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.024528980255127,
      "learning_rate": 2.0252073229418608e-05,
      "loss": 2.8755,
      "step": 203301
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.565846920013428,
      "learning_rate": 2.025059579420617e-05,
      "loss": 2.7442,
      "step": 203302
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.715718984603882,
      "learning_rate": 2.0249118411004528e-05,
      "loss": 2.9801,
      "step": 203303
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.666008234024048,
      "learning_rate": 2.024764107981378e-05,
      "loss": 3.0057,
      "step": 203304
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.98628830909729,
      "learning_rate": 2.024616380063433e-05,
      "loss": 2.8584,
      "step": 203305
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.922769069671631,
      "learning_rate": 2.0244686573466406e-05,
      "loss": 2.9401,
      "step": 203306
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.828484058380127,
      "learning_rate": 2.0243209398310244e-05,
      "loss": 2.8048,
      "step": 203307
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.108362913131714,
      "learning_rate": 2.0241732275166145e-05,
      "loss": 3.0217,
      "step": 203308
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6931822299957275,
      "learning_rate": 2.0240255204034404e-05,
      "loss": 2.9092,
      "step": 203309
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1987674236297607,
      "learning_rate": 2.023877818491526e-05,
      "loss": 3.0399,
      "step": 203310
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3019485473632812,
      "learning_rate": 2.0237301217809043e-05,
      "loss": 3.12,
      "step": 203311
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.88393497467041,
      "learning_rate": 2.0235824302715985e-05,
      "loss": 2.5818,
      "step": 203312
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1452670097351074,
      "learning_rate": 2.0234347439636357e-05,
      "loss": 2.8954,
      "step": 203313
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9310643672943115,
      "learning_rate": 2.023287062857042e-05,
      "loss": 2.9617,
      "step": 203314
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.6191182136535645,
      "learning_rate": 2.023139386951851e-05,
      "loss": 2.7704,
      "step": 203315
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8626418113708496,
      "learning_rate": 2.0229917162480825e-05,
      "loss": 2.8473,
      "step": 203316
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.786848783493042,
      "learning_rate": 2.0228440507457734e-05,
      "loss": 2.9685,
      "step": 203317
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.4724788665771484,
      "learning_rate": 2.02269639044494e-05,
      "loss": 2.8348,
      "step": 203318
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6780316829681396,
      "learning_rate": 2.022548735345626e-05,
      "loss": 2.9889,
      "step": 203319
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1615726947784424,
      "learning_rate": 2.022401085447838e-05,
      "loss": 2.869,
      "step": 203320
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.9567244052886963,
      "learning_rate": 2.022253440751619e-05,
      "loss": 2.5493,
      "step": 203321
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7250399589538574,
      "learning_rate": 2.0221058012569857e-05,
      "loss": 3.0075,
      "step": 203322
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.919830322265625,
      "learning_rate": 2.0219581669639752e-05,
      "loss": 2.6399,
      "step": 203323
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8201184272766113,
      "learning_rate": 2.021810537872607e-05,
      "loss": 3.153,
      "step": 203324
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.629079818725586,
      "learning_rate": 2.021662913982921e-05,
      "loss": 3.0305,
      "step": 203325
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9080450534820557,
      "learning_rate": 2.0215152952949275e-05,
      "loss": 3.0201,
      "step": 203326
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7251787185668945,
      "learning_rate": 2.0213676818086667e-05,
      "loss": 2.9456,
      "step": 203327
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4188859462738037,
      "learning_rate": 2.021220073524158e-05,
      "loss": 2.9199,
      "step": 203328
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8509888648986816,
      "learning_rate": 2.021072470441435e-05,
      "loss": 2.7175,
      "step": 203329
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.8413710594177246,
      "learning_rate": 2.020924872560521e-05,
      "loss": 2.6872,
      "step": 203330
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0947868824005127,
      "learning_rate": 2.0207772798814526e-05,
      "loss": 3.155,
      "step": 203331
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.624440908432007,
      "learning_rate": 2.0206296924042397e-05,
      "loss": 3.0185,
      "step": 203332
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.582669734954834,
      "learning_rate": 2.0204821101289258e-05,
      "loss": 2.894,
      "step": 203333
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0035431385040283,
      "learning_rate": 2.0203345330555275e-05,
      "loss": 2.8243,
      "step": 203334
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1009750366210938,
      "learning_rate": 2.0201869611840815e-05,
      "loss": 2.6714,
      "step": 203335
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9829347133636475,
      "learning_rate": 2.0200393945146076e-05,
      "loss": 2.9668,
      "step": 203336
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.249166488647461,
      "learning_rate": 2.019891833047146e-05,
      "loss": 2.7788,
      "step": 203337
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6798460483551025,
      "learning_rate": 2.0197442767817063e-05,
      "loss": 3.0738,
      "step": 203338
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6827874183654785,
      "learning_rate": 2.0195967257183253e-05,
      "loss": 3.0359,
      "step": 203339
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6761441230773926,
      "learning_rate": 2.0194491798570265e-05,
      "loss": 2.7173,
      "step": 203340
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8924827575683594,
      "learning_rate": 2.0193016391978467e-05,
      "loss": 2.9976,
      "step": 203341
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.716573715209961,
      "learning_rate": 2.0191541037407987e-05,
      "loss": 2.8481,
      "step": 203342
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.903735160827637,
      "learning_rate": 2.0190065734859296e-05,
      "loss": 2.9825,
      "step": 203343
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.077417850494385,
      "learning_rate": 2.0188590484332458e-05,
      "loss": 3.1588,
      "step": 203344
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.465627431869507,
      "learning_rate": 2.0187115285827903e-05,
      "loss": 2.9316,
      "step": 203345
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.31160831451416,
      "learning_rate": 2.018564013934577e-05,
      "loss": 2.9908,
      "step": 203346
    },
    {
      "epoch": 2.65,
      "grad_norm": 5.402170658111572,
      "learning_rate": 2.018416504488649e-05,
      "loss": 2.8007,
      "step": 203347
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7271969318389893,
      "learning_rate": 2.0182690002450163e-05,
      "loss": 2.997,
      "step": 203348
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.170017957687378,
      "learning_rate": 2.0181215012037287e-05,
      "loss": 3.1552,
      "step": 203349
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.6517539024353027,
      "learning_rate": 2.0179740073647897e-05,
      "loss": 2.8532,
      "step": 203350
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.6955723762512207,
      "learning_rate": 2.0178265187282427e-05,
      "loss": 2.9138,
      "step": 203351
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.814990520477295,
      "learning_rate": 2.0176790352941074e-05,
      "loss": 3.0419,
      "step": 203352
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7907280921936035,
      "learning_rate": 2.017531557062414e-05,
      "loss": 3.0049,
      "step": 203353
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.790163993835449,
      "learning_rate": 2.017384084033189e-05,
      "loss": 2.7942,
      "step": 203354
    },
    {
      "epoch": 2.65,
      "grad_norm": 6.04389762878418,
      "learning_rate": 2.0172366162064657e-05,
      "loss": 3.0728,
      "step": 203355
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8593075275421143,
      "learning_rate": 2.017089153582261e-05,
      "loss": 2.9865,
      "step": 203356
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4665794372558594,
      "learning_rate": 2.016941696160611e-05,
      "loss": 2.887,
      "step": 203357
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0738563537597656,
      "learning_rate": 2.016794243941533e-05,
      "loss": 3.0233,
      "step": 203358
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.890432834625244,
      "learning_rate": 2.0166467969250698e-05,
      "loss": 2.9447,
      "step": 203359
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9252612590789795,
      "learning_rate": 2.016499355111232e-05,
      "loss": 2.9408,
      "step": 203360
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.436657905578613,
      "learning_rate": 2.0163519185000655e-05,
      "loss": 2.7718,
      "step": 203361
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.962035894393921,
      "learning_rate": 2.0162044870915805e-05,
      "loss": 2.8491,
      "step": 203362
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.500274181365967,
      "learning_rate": 2.016057060885814e-05,
      "loss": 2.8959,
      "step": 203363
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.968904733657837,
      "learning_rate": 2.0159096398827856e-05,
      "loss": 2.774,
      "step": 203364
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.655702590942383,
      "learning_rate": 2.0157622240825323e-05,
      "loss": 2.9492,
      "step": 203365
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7803890705108643,
      "learning_rate": 2.0156148134850735e-05,
      "loss": 3.0023,
      "step": 203366
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1843862533569336,
      "learning_rate": 2.0154674080904465e-05,
      "loss": 2.9978,
      "step": 203367
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.118581771850586,
      "learning_rate": 2.0153200078986675e-05,
      "loss": 2.9249,
      "step": 203368
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.237884759902954,
      "learning_rate": 2.01517261290977e-05,
      "loss": 2.9251,
      "step": 203369
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.142056465148926,
      "learning_rate": 2.0150252231237773e-05,
      "loss": 2.9761,
      "step": 203370
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.530941963195801,
      "learning_rate": 2.0148778385407227e-05,
      "loss": 2.7265,
      "step": 203371
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.4495134353637695,
      "learning_rate": 2.0147304591606293e-05,
      "loss": 2.9845,
      "step": 203372
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.685763359069824,
      "learning_rate": 2.0145830849835276e-05,
      "loss": 2.8865,
      "step": 203373
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8426997661590576,
      "learning_rate": 2.0144357160094437e-05,
      "loss": 2.932,
      "step": 203374
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1451141834259033,
      "learning_rate": 2.0142883522384046e-05,
      "loss": 3.072,
      "step": 203375
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.006654977798462,
      "learning_rate": 2.0141409936704332e-05,
      "loss": 3.0181,
      "step": 203376
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.853045701980591,
      "learning_rate": 2.0139936403055666e-05,
      "loss": 3.068,
      "step": 203377
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9402239322662354,
      "learning_rate": 2.0138462921438215e-05,
      "loss": 2.9252,
      "step": 203378
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.146289110183716,
      "learning_rate": 2.013698949185234e-05,
      "loss": 2.7874,
      "step": 203379
    },
    {
      "epoch": 2.65,
      "grad_norm": 5.491210460662842,
      "learning_rate": 2.0135516114298278e-05,
      "loss": 3.0125,
      "step": 203380
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.5156006813049316,
      "learning_rate": 2.0134042788776293e-05,
      "loss": 2.6816,
      "step": 203381
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.304670810699463,
      "learning_rate": 2.013256951528669e-05,
      "loss": 2.9161,
      "step": 203382
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8228352069854736,
      "learning_rate": 2.013109629382973e-05,
      "loss": 2.9402,
      "step": 203383
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.515255928039551,
      "learning_rate": 2.0129623124405647e-05,
      "loss": 2.773,
      "step": 203384
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8406224250793457,
      "learning_rate": 2.0128150007014776e-05,
      "loss": 3.1309,
      "step": 203385
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2917189598083496,
      "learning_rate": 2.0126676941657382e-05,
      "loss": 3.0889,
      "step": 203386
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.846465587615967,
      "learning_rate": 2.01252039283337e-05,
      "loss": 3.0075,
      "step": 203387
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.758479595184326,
      "learning_rate": 2.0123730967044028e-05,
      "loss": 2.7234,
      "step": 203388
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.517460584640503,
      "learning_rate": 2.0122258057788664e-05,
      "loss": 2.8906,
      "step": 203389
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.906724691390991,
      "learning_rate": 2.012078520056778e-05,
      "loss": 2.7329,
      "step": 203390
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8272829055786133,
      "learning_rate": 2.0119312395381805e-05,
      "loss": 3.0942,
      "step": 203391
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.352482557296753,
      "learning_rate": 2.0117839642230903e-05,
      "loss": 2.7843,
      "step": 203392
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9069724082946777,
      "learning_rate": 2.0116366941115346e-05,
      "loss": 3.0079,
      "step": 203393
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9295384883880615,
      "learning_rate": 2.01148942920355e-05,
      "loss": 3.0789,
      "step": 203394
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0737481117248535,
      "learning_rate": 2.0113421694991526e-05,
      "loss": 2.9824,
      "step": 203395
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1105146408081055,
      "learning_rate": 2.0111949149983765e-05,
      "loss": 2.9823,
      "step": 203396
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.978360176086426,
      "learning_rate": 2.0110476657012508e-05,
      "loss": 2.884,
      "step": 203397
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.7639594078063965,
      "learning_rate": 2.0109004216077994e-05,
      "loss": 2.7225,
      "step": 203398
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.939092636108398,
      "learning_rate": 2.0107531827180455e-05,
      "loss": 2.8591,
      "step": 203399
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3312909603118896,
      "learning_rate": 2.010605949032026e-05,
      "loss": 2.8846,
      "step": 203400
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.303709030151367,
      "learning_rate": 2.0104587205497567e-05,
      "loss": 2.9806,
      "step": 203401
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9922444820404053,
      "learning_rate": 2.0103114972712754e-05,
      "loss": 2.9986,
      "step": 203402
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5225160121917725,
      "learning_rate": 2.010164279196601e-05,
      "loss": 2.8242,
      "step": 203403
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.427929401397705,
      "learning_rate": 2.0100170663257774e-05,
      "loss": 2.9838,
      "step": 203404
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9257924556732178,
      "learning_rate": 2.0098698586588113e-05,
      "loss": 2.8726,
      "step": 203405
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.009205341339111,
      "learning_rate": 2.009722656195739e-05,
      "loss": 2.9721,
      "step": 203406
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1027629375457764,
      "learning_rate": 2.0095754589365876e-05,
      "loss": 3.0089,
      "step": 203407
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.681152820587158,
      "learning_rate": 2.0094282668813867e-05,
      "loss": 3.0894,
      "step": 203408
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3625552654266357,
      "learning_rate": 2.0092810800301563e-05,
      "loss": 2.9173,
      "step": 203409
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.264521837234497,
      "learning_rate": 2.0091338983829398e-05,
      "loss": 2.7967,
      "step": 203410
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.127281665802002,
      "learning_rate": 2.0089867219397436e-05,
      "loss": 2.7438,
      "step": 203411
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.807410717010498,
      "learning_rate": 2.0088395507006084e-05,
      "loss": 3.1341,
      "step": 203412
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6360177993774414,
      "learning_rate": 2.0086923846655535e-05,
      "loss": 2.8704,
      "step": 203413
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.216660022735596,
      "learning_rate": 2.008545223834619e-05,
      "loss": 2.9513,
      "step": 203414
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.998570442199707,
      "learning_rate": 2.008398068207818e-05,
      "loss": 2.7826,
      "step": 203415
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.249147891998291,
      "learning_rate": 2.0082509177851912e-05,
      "loss": 2.9831,
      "step": 203416
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3190982341766357,
      "learning_rate": 2.0081037725667516e-05,
      "loss": 2.8056,
      "step": 203417
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4568567276000977,
      "learning_rate": 2.0079566325525387e-05,
      "loss": 2.8033,
      "step": 203418
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.5239689350128174,
      "learning_rate": 2.0078094977425728e-05,
      "loss": 2.731,
      "step": 203419
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.114817142486572,
      "learning_rate": 2.007662368136884e-05,
      "loss": 3.0145,
      "step": 203420
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1758787631988525,
      "learning_rate": 2.0075152437354958e-05,
      "loss": 2.9757,
      "step": 203421
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9756364822387695,
      "learning_rate": 2.0073681245384508e-05,
      "loss": 2.9921,
      "step": 203422
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.835468053817749,
      "learning_rate": 2.007221010545753e-05,
      "loss": 3.0453,
      "step": 203423
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0329697132110596,
      "learning_rate": 2.0070739017574454e-05,
      "loss": 2.8141,
      "step": 203424
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2554121017456055,
      "learning_rate": 2.0069267981735483e-05,
      "loss": 2.7144,
      "step": 203425
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.5157995223999023,
      "learning_rate": 2.0067796997940944e-05,
      "loss": 2.8618,
      "step": 203426
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.782515287399292,
      "learning_rate": 2.006632606619104e-05,
      "loss": 2.9739,
      "step": 203427
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.875866413116455,
      "learning_rate": 2.0064855186486208e-05,
      "loss": 3.0937,
      "step": 203428
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4206676483154297,
      "learning_rate": 2.006338435882647e-05,
      "loss": 2.9845,
      "step": 203429
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.116554021835327,
      "learning_rate": 2.006191358321231e-05,
      "loss": 2.8685,
      "step": 203430
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3688955307006836,
      "learning_rate": 2.0060442859643875e-05,
      "loss": 2.6228,
      "step": 203431
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.953245162963867,
      "learning_rate": 2.0058972188121514e-05,
      "loss": 3.0386,
      "step": 203432
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9894509315490723,
      "learning_rate": 2.005750156864545e-05,
      "loss": 3.1688,
      "step": 203433
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9592068195343018,
      "learning_rate": 2.005603100121609e-05,
      "loss": 2.961,
      "step": 203434
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.668454647064209,
      "learning_rate": 2.005456048583346e-05,
      "loss": 2.8258,
      "step": 203435
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.500570774078369,
      "learning_rate": 2.0053090022498065e-05,
      "loss": 3.0159,
      "step": 203436
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.207962989807129,
      "learning_rate": 2.005161961121e-05,
      "loss": 2.9147,
      "step": 203437
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2128190994262695,
      "learning_rate": 2.0050149251969703e-05,
      "loss": 3.0131,
      "step": 203438
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.448420763015747,
      "learning_rate": 2.0048678944777273e-05,
      "loss": 2.5389,
      "step": 203439
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.566390037536621,
      "learning_rate": 2.004720868963321e-05,
      "loss": 2.9441,
      "step": 203440
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4500844478607178,
      "learning_rate": 2.0045738486537576e-05,
      "loss": 2.9419,
      "step": 203441
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.236558198928833,
      "learning_rate": 2.0044268335490743e-05,
      "loss": 2.8524,
      "step": 203442
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.064495086669922,
      "learning_rate": 2.004279823649291e-05,
      "loss": 2.9787,
      "step": 203443
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7489395141601562,
      "learning_rate": 2.004132818954447e-05,
      "loss": 2.7586,
      "step": 203444
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5918221473693848,
      "learning_rate": 2.0039858194645596e-05,
      "loss": 2.9557,
      "step": 203445
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.813542604446411,
      "learning_rate": 2.0038388251796622e-05,
      "loss": 3.3106,
      "step": 203446
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2456889152526855,
      "learning_rate": 2.0036918360997813e-05,
      "loss": 3.0949,
      "step": 203447
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1000986099243164,
      "learning_rate": 2.0035448522249398e-05,
      "loss": 2.946,
      "step": 203448
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6752266883850098,
      "learning_rate": 2.003397873555165e-05,
      "loss": 2.9294,
      "step": 203449
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5085718631744385,
      "learning_rate": 2.003250900090493e-05,
      "loss": 2.7255,
      "step": 203450
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7819368839263916,
      "learning_rate": 2.0031039318309373e-05,
      "loss": 3.0696,
      "step": 203451
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5413925647735596,
      "learning_rate": 2.002956968776538e-05,
      "loss": 2.8103,
      "step": 203452
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2474524974823,
      "learning_rate": 2.0028100109273216e-05,
      "loss": 3.1646,
      "step": 203453
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8421411514282227,
      "learning_rate": 2.0026630582833082e-05,
      "loss": 3.0919,
      "step": 203454
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7000179290771484,
      "learning_rate": 2.002516110844524e-05,
      "loss": 3.0569,
      "step": 203455
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9621949195861816,
      "learning_rate": 2.002369168611003e-05,
      "loss": 2.9273,
      "step": 203456
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8973915576934814,
      "learning_rate": 2.0022222315827686e-05,
      "loss": 3.0125,
      "step": 203457
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.821021795272827,
      "learning_rate": 2.0020752997598534e-05,
      "loss": 3.0237,
      "step": 203458
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7453744411468506,
      "learning_rate": 2.001928373142281e-05,
      "loss": 3.2586,
      "step": 203459
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.762484550476074,
      "learning_rate": 2.001781451730078e-05,
      "loss": 3.1677,
      "step": 203460
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.541724920272827,
      "learning_rate": 2.0016345355232675e-05,
      "loss": 2.8387,
      "step": 203461
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2751588821411133,
      "learning_rate": 2.001487624521887e-05,
      "loss": 2.7247,
      "step": 203462
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.702082633972168,
      "learning_rate": 2.0013407187259557e-05,
      "loss": 3.1502,
      "step": 203463
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.9776771068573,
      "learning_rate": 2.0011938181355037e-05,
      "loss": 2.784,
      "step": 203464
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.455216646194458,
      "learning_rate": 2.001046922750561e-05,
      "loss": 2.9846,
      "step": 203465
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8371734619140625,
      "learning_rate": 2.0009000325711478e-05,
      "loss": 3.0323,
      "step": 203466
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8951592445373535,
      "learning_rate": 2.0007531475973003e-05,
      "loss": 2.9627,
      "step": 203467
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5445353984832764,
      "learning_rate": 2.0006062678290426e-05,
      "loss": 2.7023,
      "step": 203468
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.162710428237915,
      "learning_rate": 2.000459393266394e-05,
      "loss": 3.0107,
      "step": 203469
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.597792625427246,
      "learning_rate": 2.000312523909394e-05,
      "loss": 2.9402,
      "step": 203470
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6900298595428467,
      "learning_rate": 2.000165659758064e-05,
      "loss": 2.6488,
      "step": 203471
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.281181573867798,
      "learning_rate": 2.000018800812426e-05,
      "loss": 2.9274,
      "step": 203472
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9303321838378906,
      "learning_rate": 1.9998719470725212e-05,
      "loss": 2.7781,
      "step": 203473
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7414119243621826,
      "learning_rate": 1.9997250985383652e-05,
      "loss": 2.8599,
      "step": 203474
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.447397470474243,
      "learning_rate": 1.999578255209985e-05,
      "loss": 2.901,
      "step": 203475
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.8266472816467285,
      "learning_rate": 1.999431417087417e-05,
      "loss": 2.9746,
      "step": 203476
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.146918773651123,
      "learning_rate": 1.999284584170685e-05,
      "loss": 2.8627,
      "step": 203477
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6594555377960205,
      "learning_rate": 1.9991377564598054e-05,
      "loss": 2.9153,
      "step": 203478
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.694288969039917,
      "learning_rate": 1.998990933954825e-05,
      "loss": 2.9646,
      "step": 203479
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1016199588775635,
      "learning_rate": 1.99884411665575e-05,
      "loss": 2.7351,
      "step": 203480
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0017852783203125,
      "learning_rate": 1.9986973045626276e-05,
      "loss": 3.0453,
      "step": 203481
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0460262298583984,
      "learning_rate": 1.998550497675474e-05,
      "loss": 2.8931,
      "step": 203482
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.915126323699951,
      "learning_rate": 1.9984036959943194e-05,
      "loss": 2.8579,
      "step": 203483
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3835699558258057,
      "learning_rate": 1.998256899519184e-05,
      "loss": 2.701,
      "step": 203484
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.058000087738037,
      "learning_rate": 1.9981101082501074e-05,
      "loss": 2.7565,
      "step": 203485
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.961975336074829,
      "learning_rate": 1.997963322187106e-05,
      "loss": 3.0448,
      "step": 203486
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.516782283782959,
      "learning_rate": 1.9978165413302173e-05,
      "loss": 2.8143,
      "step": 203487
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7651760578155518,
      "learning_rate": 1.9976697656794605e-05,
      "loss": 2.9676,
      "step": 203488
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.263376235961914,
      "learning_rate": 1.997522995234866e-05,
      "loss": 2.9908,
      "step": 203489
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0504567623138428,
      "learning_rate": 1.997376229996457e-05,
      "loss": 2.9007,
      "step": 203490
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8340461254119873,
      "learning_rate": 1.9972294699642698e-05,
      "loss": 2.8604,
      "step": 203491
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.5524799823760986,
      "learning_rate": 1.9970827151383184e-05,
      "loss": 2.9858,
      "step": 203492
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7534186840057373,
      "learning_rate": 1.9969359655186456e-05,
      "loss": 2.8836,
      "step": 203493
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0311660766601562,
      "learning_rate": 1.996789221105265e-05,
      "loss": 2.9941,
      "step": 203494
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2101972103118896,
      "learning_rate": 1.9966424818982196e-05,
      "loss": 2.9606,
      "step": 203495
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5746140480041504,
      "learning_rate": 1.9964957478975195e-05,
      "loss": 2.8144,
      "step": 203496
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8673832416534424,
      "learning_rate": 1.9963490191032017e-05,
      "loss": 2.8184,
      "step": 203497
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9223906993865967,
      "learning_rate": 1.9962022955152857e-05,
      "loss": 2.9704,
      "step": 203498
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6755306720733643,
      "learning_rate": 1.9960555771338116e-05,
      "loss": 3.155,
      "step": 203499
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.6097447872161865,
      "learning_rate": 1.9959088639587928e-05,
      "loss": 3.1394,
      "step": 203500
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.906911611557007,
      "learning_rate": 1.9957621559902725e-05,
      "loss": 2.9797,
      "step": 203501
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5984394550323486,
      "learning_rate": 1.995615453228261e-05,
      "loss": 2.7064,
      "step": 203502
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8237674236297607,
      "learning_rate": 1.9954687556727978e-05,
      "loss": 2.8335,
      "step": 203503
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.824026107788086,
      "learning_rate": 1.9953220633238998e-05,
      "loss": 2.8505,
      "step": 203504
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3589696884155273,
      "learning_rate": 1.9951753761816035e-05,
      "loss": 2.9235,
      "step": 203505
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8428943157196045,
      "learning_rate": 1.9950286942459292e-05,
      "loss": 2.7367,
      "step": 203506
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.625659465789795,
      "learning_rate": 1.9948820175169167e-05,
      "loss": 2.9747,
      "step": 203507
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.972776174545288,
      "learning_rate": 1.9947353459945758e-05,
      "loss": 3.0382,
      "step": 203508
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9158477783203125,
      "learning_rate": 1.9945886796789467e-05,
      "loss": 2.7398,
      "step": 203509
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8850879669189453,
      "learning_rate": 1.9944420185700494e-05,
      "loss": 2.8344,
      "step": 203510
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.987157106399536,
      "learning_rate": 1.9942953626679135e-05,
      "loss": 2.8156,
      "step": 203511
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.7994890213012695,
      "learning_rate": 1.9941487119725664e-05,
      "loss": 2.8246,
      "step": 203512
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.706849575042725,
      "learning_rate": 1.9940020664840406e-05,
      "loss": 2.8968,
      "step": 203513
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6587560176849365,
      "learning_rate": 1.9938554262023564e-05,
      "loss": 3.0234,
      "step": 203514
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0329673290252686,
      "learning_rate": 1.993708791127544e-05,
      "loss": 2.7013,
      "step": 203515
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9908576011657715,
      "learning_rate": 1.9935621612596264e-05,
      "loss": 3.0746,
      "step": 203516
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.775477170944214,
      "learning_rate": 1.9934155365986372e-05,
      "loss": 3.047,
      "step": 203517
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.652660608291626,
      "learning_rate": 1.993268917144596e-05,
      "loss": 2.884,
      "step": 203518
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.821784019470215,
      "learning_rate": 1.9931223028975397e-05,
      "loss": 3.05,
      "step": 203519
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8992254734039307,
      "learning_rate": 1.9929756938574915e-05,
      "loss": 2.672,
      "step": 203520
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.861539840698242,
      "learning_rate": 1.992829090024475e-05,
      "loss": 3.0676,
      "step": 203521
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0666863918304443,
      "learning_rate": 1.9926824913985195e-05,
      "loss": 2.8997,
      "step": 203522
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2280209064483643,
      "learning_rate": 1.992535897979656e-05,
      "loss": 3.0039,
      "step": 203523
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.7843172550201416,
      "learning_rate": 1.9923893097679035e-05,
      "loss": 2.9279,
      "step": 203524
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.968837022781372,
      "learning_rate": 1.992242726763299e-05,
      "loss": 2.8127,
      "step": 203525
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.974200963973999,
      "learning_rate": 1.992096148965866e-05,
      "loss": 3.1067,
      "step": 203526
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4529216289520264,
      "learning_rate": 1.991949576375631e-05,
      "loss": 2.602,
      "step": 203527
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3989574909210205,
      "learning_rate": 1.991803008992614e-05,
      "loss": 2.8004,
      "step": 203528
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.869227886199951,
      "learning_rate": 1.9916564468168584e-05,
      "loss": 2.9617,
      "step": 203529
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.076035976409912,
      "learning_rate": 1.991509889848377e-05,
      "loss": 2.6061,
      "step": 203530
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.852592706680298,
      "learning_rate": 1.991363338087204e-05,
      "loss": 2.9277,
      "step": 203531
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8550288677215576,
      "learning_rate": 1.9912167915333688e-05,
      "loss": 2.9066,
      "step": 203532
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.218034029006958,
      "learning_rate": 1.9910702501868947e-05,
      "loss": 2.6809,
      "step": 203533
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.882455825805664,
      "learning_rate": 1.990923714047802e-05,
      "loss": 2.6379,
      "step": 203534
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.6260483264923096,
      "learning_rate": 1.9907771831161335e-05,
      "loss": 2.8953,
      "step": 203535
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.70206880569458,
      "learning_rate": 1.9906306573919028e-05,
      "loss": 2.9258,
      "step": 203536
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8868463039398193,
      "learning_rate": 1.9904841368751435e-05,
      "loss": 2.9092,
      "step": 203537
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7264938354492188,
      "learning_rate": 1.990337621565885e-05,
      "loss": 2.9152,
      "step": 203538
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9433107376098633,
      "learning_rate": 1.990191111464151e-05,
      "loss": 2.8747,
      "step": 203539
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.429748058319092,
      "learning_rate": 1.990044606569965e-05,
      "loss": 3.0836,
      "step": 203540
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.253906488418579,
      "learning_rate": 1.989898106883363e-05,
      "loss": 2.7838,
      "step": 203541
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.549066066741943,
      "learning_rate": 1.9897516124043656e-05,
      "loss": 2.9025,
      "step": 203542
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.85014271736145,
      "learning_rate": 1.9896051231330024e-05,
      "loss": 2.9078,
      "step": 203543
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9807636737823486,
      "learning_rate": 1.9894586390693035e-05,
      "loss": 2.7625,
      "step": 203544
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.963113784790039,
      "learning_rate": 1.9893121602132922e-05,
      "loss": 2.9876,
      "step": 203545
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.5259273052215576,
      "learning_rate": 1.9891656865649885e-05,
      "loss": 3.1016,
      "step": 203546
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8256280422210693,
      "learning_rate": 1.989019218124436e-05,
      "loss": 2.8047,
      "step": 203547
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4200549125671387,
      "learning_rate": 1.988872754891647e-05,
      "loss": 2.9207,
      "step": 203548
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7989978790283203,
      "learning_rate": 1.988726296866663e-05,
      "loss": 2.956,
      "step": 203549
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.048147201538086,
      "learning_rate": 1.9885798440494995e-05,
      "loss": 2.9793,
      "step": 203550
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1965742111206055,
      "learning_rate": 1.98843339644019e-05,
      "loss": 2.8002,
      "step": 203551
    },
    {
      "epoch": 2.65,
      "grad_norm": 6.62954044342041,
      "learning_rate": 1.9882869540387546e-05,
      "loss": 3.0157,
      "step": 203552
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.644838333129883,
      "learning_rate": 1.98814051684523e-05,
      "loss": 2.9426,
      "step": 203553
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9839367866516113,
      "learning_rate": 1.987994084859633e-05,
      "loss": 2.8179,
      "step": 203554
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2294788360595703,
      "learning_rate": 1.9878476580820037e-05,
      "loss": 3.1357,
      "step": 203555
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7453439235687256,
      "learning_rate": 1.9877012365123614e-05,
      "loss": 2.8063,
      "step": 203556
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9382283687591553,
      "learning_rate": 1.9875548201507296e-05,
      "loss": 3.0236,
      "step": 203557
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2032413482666016,
      "learning_rate": 1.9874084089971453e-05,
      "loss": 2.7582,
      "step": 203558
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6393182277679443,
      "learning_rate": 1.9872620030516285e-05,
      "loss": 3.1878,
      "step": 203559
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.658729076385498,
      "learning_rate": 1.9871156023142055e-05,
      "loss": 3.0855,
      "step": 203560
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.249978542327881,
      "learning_rate": 1.9869692067849096e-05,
      "loss": 2.7514,
      "step": 203561
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.631964921951294,
      "learning_rate": 1.986822816463768e-05,
      "loss": 2.8962,
      "step": 203562
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.803373336791992,
      "learning_rate": 1.9866764313507967e-05,
      "loss": 2.9475,
      "step": 203563
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.323331832885742,
      "learning_rate": 1.986530051446036e-05,
      "loss": 2.9811,
      "step": 203564
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6719067096710205,
      "learning_rate": 1.9863836767495056e-05,
      "loss": 3.104,
      "step": 203565
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5501537322998047,
      "learning_rate": 1.986237307261239e-05,
      "loss": 2.927,
      "step": 203566
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9468812942504883,
      "learning_rate": 1.9860909429812598e-05,
      "loss": 3.3407,
      "step": 203567
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5777812004089355,
      "learning_rate": 1.9859445839095945e-05,
      "loss": 2.9265,
      "step": 203568
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8331003189086914,
      "learning_rate": 1.985798230046266e-05,
      "loss": 2.8448,
      "step": 203569
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9693615436553955,
      "learning_rate": 1.9856518813913113e-05,
      "loss": 3.1728,
      "step": 203570
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3288915157318115,
      "learning_rate": 1.9855055379447503e-05,
      "loss": 3.0507,
      "step": 203571
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.31929349899292,
      "learning_rate": 1.9853591997066166e-05,
      "loss": 2.9878,
      "step": 203572
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0552167892456055,
      "learning_rate": 1.9852128666769295e-05,
      "loss": 3.0593,
      "step": 203573
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.224219560623169,
      "learning_rate": 1.985066538855723e-05,
      "loss": 3.0682,
      "step": 203574
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.087062120437622,
      "learning_rate": 1.9849202162430167e-05,
      "loss": 3.0782,
      "step": 203575
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.5001275539398193,
      "learning_rate": 1.9847738988388472e-05,
      "loss": 2.618,
      "step": 203576
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.54571270942688,
      "learning_rate": 1.9846275866432348e-05,
      "loss": 2.8261,
      "step": 203577
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7493834495544434,
      "learning_rate": 1.9844812796562092e-05,
      "loss": 3.0008,
      "step": 203578
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7254748344421387,
      "learning_rate": 1.9843349778777973e-05,
      "loss": 2.7507,
      "step": 203579
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3573708534240723,
      "learning_rate": 1.9841886813080288e-05,
      "loss": 2.7301,
      "step": 203580
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7140228748321533,
      "learning_rate": 1.984042389946927e-05,
      "loss": 3.0105,
      "step": 203581
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4654622077941895,
      "learning_rate": 1.9838961037945224e-05,
      "loss": 2.9521,
      "step": 203582
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6230239868164062,
      "learning_rate": 1.9837498228508343e-05,
      "loss": 2.9892,
      "step": 203583
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8043699264526367,
      "learning_rate": 1.9836035471159028e-05,
      "loss": 3.1654,
      "step": 203584
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5562987327575684,
      "learning_rate": 1.9834572765897418e-05,
      "loss": 3.2122,
      "step": 203585
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4760403633117676,
      "learning_rate": 1.9833110112723906e-05,
      "loss": 3.0738,
      "step": 203586
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7338428497314453,
      "learning_rate": 1.9831647511638692e-05,
      "loss": 3.0289,
      "step": 203587
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.5342609882354736,
      "learning_rate": 1.983018496264208e-05,
      "loss": 3.1173,
      "step": 203588
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8075594902038574,
      "learning_rate": 1.9828722465734272e-05,
      "loss": 2.6892,
      "step": 203589
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8794617652893066,
      "learning_rate": 1.9827260020915625e-05,
      "loss": 2.9118,
      "step": 203590
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.479950904846191,
      "learning_rate": 1.9825797628186347e-05,
      "loss": 2.8318,
      "step": 203591
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0671048164367676,
      "learning_rate": 1.98243352875468e-05,
      "loss": 3.123,
      "step": 203592
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3614156246185303,
      "learning_rate": 1.9822872998997187e-05,
      "loss": 3.098,
      "step": 203593
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4960575103759766,
      "learning_rate": 1.982141076253777e-05,
      "loss": 3.0207,
      "step": 203594
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1431491374969482,
      "learning_rate": 1.981994857816882e-05,
      "loss": 2.5461,
      "step": 203595
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.239891529083252,
      "learning_rate": 1.9818486445890668e-05,
      "loss": 2.95,
      "step": 203596
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2878329753875732,
      "learning_rate": 1.9817024365703516e-05,
      "loss": 3.0996,
      "step": 203597
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0294833183288574,
      "learning_rate": 1.981556233760769e-05,
      "loss": 2.7194,
      "step": 203598
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.207965850830078,
      "learning_rate": 1.9814100361603468e-05,
      "loss": 2.9059,
      "step": 203599
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.065775156021118,
      "learning_rate": 1.9812638437691077e-05,
      "loss": 2.8891,
      "step": 203600
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8090672492980957,
      "learning_rate": 1.9811176565870747e-05,
      "loss": 2.8603,
      "step": 203601
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.796297073364258,
      "learning_rate": 1.980971474614288e-05,
      "loss": 3.1086,
      "step": 203602
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.464491367340088,
      "learning_rate": 1.980825297850761e-05,
      "loss": 3.0503,
      "step": 203603
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.4960811138153076,
      "learning_rate": 1.980679126296534e-05,
      "loss": 2.9583,
      "step": 203604
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8072073459625244,
      "learning_rate": 1.9805329599516263e-05,
      "loss": 2.8065,
      "step": 203605
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2122251987457275,
      "learning_rate": 1.9803867988160683e-05,
      "loss": 2.8239,
      "step": 203606
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0267858505249023,
      "learning_rate": 1.9802406428898763e-05,
      "loss": 2.9705,
      "step": 203607
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.6242542266845703,
      "learning_rate": 1.9800944921730942e-05,
      "loss": 2.6274,
      "step": 203608
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.571274995803833,
      "learning_rate": 1.9799483466657386e-05,
      "loss": 2.8421,
      "step": 203609
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6971473693847656,
      "learning_rate": 1.9798022063678422e-05,
      "loss": 2.825,
      "step": 203610
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.68410587310791,
      "learning_rate": 1.9796560712794285e-05,
      "loss": 2.9245,
      "step": 203611
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1548373699188232,
      "learning_rate": 1.9795099414005276e-05,
      "loss": 2.9973,
      "step": 203612
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1977176666259766,
      "learning_rate": 1.9793638167311566e-05,
      "loss": 3.0993,
      "step": 203613
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.736994743347168,
      "learning_rate": 1.979217697271358e-05,
      "loss": 3.0143,
      "step": 203614
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.533647060394287,
      "learning_rate": 1.979071583021149e-05,
      "loss": 3.0871,
      "step": 203615
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.7806835174560547,
      "learning_rate": 1.9789254739805628e-05,
      "loss": 2.8871,
      "step": 203616
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.6237144470214844,
      "learning_rate": 1.9787793701496225e-05,
      "loss": 3.0337,
      "step": 203617
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6073827743530273,
      "learning_rate": 1.9786332715283547e-05,
      "loss": 2.868,
      "step": 203618
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8385584354400635,
      "learning_rate": 1.9784871781167867e-05,
      "loss": 2.9657,
      "step": 203619
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.343275308609009,
      "learning_rate": 1.9783410899149477e-05,
      "loss": 2.7075,
      "step": 203620
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.489234685897827,
      "learning_rate": 1.9781950069228614e-05,
      "loss": 2.8125,
      "step": 203621
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.589045524597168,
      "learning_rate": 1.9780489291405644e-05,
      "loss": 2.6322,
      "step": 203622
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2774853706359863,
      "learning_rate": 1.9779028565680733e-05,
      "loss": 3.2124,
      "step": 203623
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.899627923965454,
      "learning_rate": 1.9777567892054214e-05,
      "loss": 3.0882,
      "step": 203624
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8591885566711426,
      "learning_rate": 1.9776107270526255e-05,
      "loss": 2.9804,
      "step": 203625
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.697528600692749,
      "learning_rate": 1.9774646701097286e-05,
      "loss": 2.8888,
      "step": 203626
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.989922523498535,
      "learning_rate": 1.977318618376744e-05,
      "loss": 2.9912,
      "step": 203627
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.807992935180664,
      "learning_rate": 1.9771725718537125e-05,
      "loss": 2.7035,
      "step": 203628
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6830852031707764,
      "learning_rate": 1.9770265305406498e-05,
      "loss": 3.0,
      "step": 203629
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0286474227905273,
      "learning_rate": 1.9768804944375892e-05,
      "loss": 2.7497,
      "step": 203630
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.765005588531494,
      "learning_rate": 1.9767344635445482e-05,
      "loss": 2.9946,
      "step": 203631
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.742309808731079,
      "learning_rate": 1.9765884378615694e-05,
      "loss": 3.1524,
      "step": 203632
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1247146129608154,
      "learning_rate": 1.9764424173886627e-05,
      "loss": 2.9755,
      "step": 203633
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.082242965698242,
      "learning_rate": 1.976296402125872e-05,
      "loss": 2.8509,
      "step": 203634
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.323167324066162,
      "learning_rate": 1.9761503920732168e-05,
      "loss": 3.2261,
      "step": 203635
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.111097812652588,
      "learning_rate": 1.976004387230724e-05,
      "loss": 3.1209,
      "step": 203636
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.347667932510376,
      "learning_rate": 1.975858387598417e-05,
      "loss": 2.9531,
      "step": 203637
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0283701419830322,
      "learning_rate": 1.975712393176332e-05,
      "loss": 2.886,
      "step": 203638
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2814207077026367,
      "learning_rate": 1.9755664039644824e-05,
      "loss": 2.8446,
      "step": 203639
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0801923274993896,
      "learning_rate": 1.975420419962912e-05,
      "loss": 2.8325,
      "step": 203640
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0189359188079834,
      "learning_rate": 1.9752744411716402e-05,
      "loss": 3.0231,
      "step": 203641
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.825920581817627,
      "learning_rate": 1.9751284675906876e-05,
      "loss": 3.0721,
      "step": 203642
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.679074764251709,
      "learning_rate": 1.9749824992200934e-05,
      "loss": 3.2832,
      "step": 203643
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.030540943145752,
      "learning_rate": 1.9748365360598785e-05,
      "loss": 2.9661,
      "step": 203644
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.4333882331848145,
      "learning_rate": 1.9746905781100654e-05,
      "loss": 2.9785,
      "step": 203645
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9951529502868652,
      "learning_rate": 1.9745446253706908e-05,
      "loss": 2.8763,
      "step": 203646
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.512037515640259,
      "learning_rate": 1.9743986778417754e-05,
      "loss": 2.8762,
      "step": 203647
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8543386459350586,
      "learning_rate": 1.9742527355233516e-05,
      "loss": 2.8054,
      "step": 203648
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7212085723876953,
      "learning_rate": 1.9741067984154435e-05,
      "loss": 3.0293,
      "step": 203649
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.6501102447509766,
      "learning_rate": 1.973960866518074e-05,
      "loss": 2.7636,
      "step": 203650
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.746381998062134,
      "learning_rate": 1.973814939831273e-05,
      "loss": 2.9692,
      "step": 203651
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.458955764770508,
      "learning_rate": 1.973669018355074e-05,
      "loss": 2.8449,
      "step": 203652
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.186094045639038,
      "learning_rate": 1.9735231020894936e-05,
      "loss": 3.0462,
      "step": 203653
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.1472907066345215,
      "learning_rate": 1.9733771910345687e-05,
      "loss": 2.843,
      "step": 203654
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.676211357116699,
      "learning_rate": 1.973231285190322e-05,
      "loss": 2.8594,
      "step": 203655
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.965266704559326,
      "learning_rate": 1.9730853845567773e-05,
      "loss": 2.878,
      "step": 203656
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.354788303375244,
      "learning_rate": 1.9729394891339677e-05,
      "loss": 2.9208,
      "step": 203657
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8424551486968994,
      "learning_rate": 1.97279359892192e-05,
      "loss": 2.7672,
      "step": 203658
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.64212703704834,
      "learning_rate": 1.972647713920651e-05,
      "loss": 2.8572,
      "step": 203659
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.04044771194458,
      "learning_rate": 1.9725018341302034e-05,
      "loss": 3.0393,
      "step": 203660
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9811062812805176,
      "learning_rate": 1.9723559595505978e-05,
      "loss": 3.1189,
      "step": 203661
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9893054962158203,
      "learning_rate": 1.9722100901818505e-05,
      "loss": 2.9106,
      "step": 203662
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.292354106903076,
      "learning_rate": 1.9720642260240082e-05,
      "loss": 2.9797,
      "step": 203663
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0824122428894043,
      "learning_rate": 1.971918367077081e-05,
      "loss": 3.1725,
      "step": 203664
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8482699394226074,
      "learning_rate": 1.971772513341109e-05,
      "loss": 2.8039,
      "step": 203665
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1070382595062256,
      "learning_rate": 1.9716266648161117e-05,
      "loss": 2.7573,
      "step": 203666
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1023359298706055,
      "learning_rate": 1.9714808215021192e-05,
      "loss": 2.7826,
      "step": 203667
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2374751567840576,
      "learning_rate": 1.9713349833991554e-05,
      "loss": 2.9856,
      "step": 203668
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4925005435943604,
      "learning_rate": 1.9711891505072498e-05,
      "loss": 2.9048,
      "step": 203669
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.237380027770996,
      "learning_rate": 1.9710433228264287e-05,
      "loss": 2.9311,
      "step": 203670
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7650883197784424,
      "learning_rate": 1.970897500356723e-05,
      "loss": 2.6737,
      "step": 203671
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.207690715789795,
      "learning_rate": 1.9707516830981552e-05,
      "loss": 3.1315,
      "step": 203672
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.774953603744507,
      "learning_rate": 1.9706058710507522e-05,
      "loss": 3.0058,
      "step": 203673
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.697406053543091,
      "learning_rate": 1.9704600642145406e-05,
      "loss": 2.7729,
      "step": 203674
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.2191996574401855,
      "learning_rate": 1.970314262589554e-05,
      "loss": 2.8449,
      "step": 203675
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.021881580352783,
      "learning_rate": 1.970168466175812e-05,
      "loss": 2.939,
      "step": 203676
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.750812292098999,
      "learning_rate": 1.9700226749733482e-05,
      "loss": 2.7753,
      "step": 203677
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.414184093475342,
      "learning_rate": 1.9698768889821858e-05,
      "loss": 2.8643,
      "step": 203678
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2115635871887207,
      "learning_rate": 1.9697311082023515e-05,
      "loss": 2.837,
      "step": 203679
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8268728256225586,
      "learning_rate": 1.9695853326338684e-05,
      "loss": 2.9101,
      "step": 203680
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.5290191173553467,
      "learning_rate": 1.9694395622767766e-05,
      "loss": 3.1581,
      "step": 203681
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4031801223754883,
      "learning_rate": 1.969293797131086e-05,
      "loss": 2.9021,
      "step": 203682
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0025761127471924,
      "learning_rate": 1.9691480371968405e-05,
      "loss": 2.9555,
      "step": 203683
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.804250478744507,
      "learning_rate": 1.969002282474059e-05,
      "loss": 3.0361,
      "step": 203684
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9140419960021973,
      "learning_rate": 1.9688565329627694e-05,
      "loss": 2.9568,
      "step": 203685
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2989606857299805,
      "learning_rate": 1.9687107886629938e-05,
      "loss": 2.7822,
      "step": 203686
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0536081790924072,
      "learning_rate": 1.9685650495747662e-05,
      "loss": 2.8732,
      "step": 203687
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9845516681671143,
      "learning_rate": 1.96841931569811e-05,
      "loss": 3.0166,
      "step": 203688
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.78676438331604,
      "learning_rate": 1.9682735870330547e-05,
      "loss": 2.6533,
      "step": 203689
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0796616077423096,
      "learning_rate": 1.9681278635796304e-05,
      "loss": 3.0561,
      "step": 203690
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.172093629837036,
      "learning_rate": 1.9679821453378574e-05,
      "loss": 3.0345,
      "step": 203691
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7142953872680664,
      "learning_rate": 1.9678364323077624e-05,
      "loss": 2.9314,
      "step": 203692
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.873621702194214,
      "learning_rate": 1.9676907244893814e-05,
      "loss": 3.0345,
      "step": 203693
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5897865295410156,
      "learning_rate": 1.9675450218827314e-05,
      "loss": 3.0509,
      "step": 203694
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1714906692504883,
      "learning_rate": 1.967399324487846e-05,
      "loss": 2.8029,
      "step": 203695
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5393030643463135,
      "learning_rate": 1.9672536323047518e-05,
      "loss": 3.1601,
      "step": 203696
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.560640573501587,
      "learning_rate": 1.967107945333475e-05,
      "loss": 2.9389,
      "step": 203697
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3710811138153076,
      "learning_rate": 1.9669622635740357e-05,
      "loss": 2.5747,
      "step": 203698
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.372276544570923,
      "learning_rate": 1.966816587026474e-05,
      "loss": 3.1148,
      "step": 203699
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.696950674057007,
      "learning_rate": 1.9666709156908033e-05,
      "loss": 2.9752,
      "step": 203700
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.071106433868408,
      "learning_rate": 1.9665252495670635e-05,
      "loss": 2.9105,
      "step": 203701
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.758225440979004,
      "learning_rate": 1.9663795886552746e-05,
      "loss": 2.8532,
      "step": 203702
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9885010719299316,
      "learning_rate": 1.9662339329554665e-05,
      "loss": 2.7568,
      "step": 203703
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.044262409210205,
      "learning_rate": 1.9660882824676594e-05,
      "loss": 3.1484,
      "step": 203704
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.891197443008423,
      "learning_rate": 1.9659426371918897e-05,
      "loss": 2.8844,
      "step": 203705
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.88777756690979,
      "learning_rate": 1.9657969971281773e-05,
      "loss": 3.0986,
      "step": 203706
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9745705127716064,
      "learning_rate": 1.9656513622765558e-05,
      "loss": 3.0818,
      "step": 203707
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5397396087646484,
      "learning_rate": 1.9655057326370482e-05,
      "loss": 2.9818,
      "step": 203708
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0399723052978516,
      "learning_rate": 1.965360108209685e-05,
      "loss": 2.9923,
      "step": 203709
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.751271963119507,
      "learning_rate": 1.9652144889944854e-05,
      "loss": 3.065,
      "step": 203710
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9640748500823975,
      "learning_rate": 1.9650688749914835e-05,
      "loss": 3.0212,
      "step": 203711
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.629143476486206,
      "learning_rate": 1.964923266200702e-05,
      "loss": 2.8625,
      "step": 203712
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2297439575195312,
      "learning_rate": 1.9647776626221744e-05,
      "loss": 2.8512,
      "step": 203713
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8335211277008057,
      "learning_rate": 1.964632064255921e-05,
      "loss": 3.1781,
      "step": 203714
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7227494716644287,
      "learning_rate": 1.9644864711019782e-05,
      "loss": 2.8905,
      "step": 203715
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.078352928161621,
      "learning_rate": 1.9643408831603592e-05,
      "loss": 2.9486,
      "step": 203716
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.280308961868286,
      "learning_rate": 1.9641953004311005e-05,
      "loss": 3.2114,
      "step": 203717
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.952284574508667,
      "learning_rate": 1.964049722914226e-05,
      "loss": 2.9746,
      "step": 203718
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5112133026123047,
      "learning_rate": 1.963904150609765e-05,
      "loss": 2.8222,
      "step": 203719
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9648969173431396,
      "learning_rate": 1.9637585835177415e-05,
      "loss": 2.8816,
      "step": 203720
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6640985012054443,
      "learning_rate": 1.963613021638195e-05,
      "loss": 3.0309,
      "step": 203721
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8182320594787598,
      "learning_rate": 1.9634674649711292e-05,
      "loss": 2.9408,
      "step": 203722
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2236080169677734,
      "learning_rate": 1.96332191351659e-05,
      "loss": 2.941,
      "step": 203723
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.124671697616577,
      "learning_rate": 1.9631763672745947e-05,
      "loss": 3.0731,
      "step": 203724
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.879488945007324,
      "learning_rate": 1.9630308262451765e-05,
      "loss": 2.9452,
      "step": 203725
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2876532077789307,
      "learning_rate": 1.962885290428359e-05,
      "loss": 2.7765,
      "step": 203726
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.122692584991455,
      "learning_rate": 1.9627397598241745e-05,
      "loss": 2.95,
      "step": 203727
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6575968265533447,
      "learning_rate": 1.962594234432644e-05,
      "loss": 2.8482,
      "step": 203728
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5708861351013184,
      "learning_rate": 1.9624487142537972e-05,
      "loss": 3.1008,
      "step": 203729
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.5553019046783447,
      "learning_rate": 1.9623031992876537e-05,
      "loss": 2.842,
      "step": 203730
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9339239597320557,
      "learning_rate": 1.962157689534254e-05,
      "loss": 2.9948,
      "step": 203731
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8675856590270996,
      "learning_rate": 1.9620121849936142e-05,
      "loss": 3.0333,
      "step": 203732
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8659274578094482,
      "learning_rate": 1.9618666856657718e-05,
      "loss": 3.0919,
      "step": 203733
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.9219000339508057,
      "learning_rate": 1.9617211915507425e-05,
      "loss": 2.7916,
      "step": 203734
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.36730694770813,
      "learning_rate": 1.9615757026485635e-05,
      "loss": 3.1027,
      "step": 203735
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.678845167160034,
      "learning_rate": 1.961430218959251e-05,
      "loss": 2.784,
      "step": 203736
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.693908929824829,
      "learning_rate": 1.961284740482839e-05,
      "loss": 3.0334,
      "step": 203737
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8341896533966064,
      "learning_rate": 1.9611392672193537e-05,
      "loss": 2.9978,
      "step": 203738
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.711426019668579,
      "learning_rate": 1.9609937991688253e-05,
      "loss": 2.9159,
      "step": 203739
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.750267267227173,
      "learning_rate": 1.9608483363312767e-05,
      "loss": 2.8766,
      "step": 203740
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1591579914093018,
      "learning_rate": 1.960702878706728e-05,
      "loss": 2.8687,
      "step": 203741
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1138198375701904,
      "learning_rate": 1.9605574262952227e-05,
      "loss": 2.8421,
      "step": 203742
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1325557231903076,
      "learning_rate": 1.9604119790967775e-05,
      "loss": 2.7995,
      "step": 203743
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1421444416046143,
      "learning_rate": 1.9602665371114156e-05,
      "loss": 2.7915,
      "step": 203744
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.893143892288208,
      "learning_rate": 1.960121100339177e-05,
      "loss": 2.7468,
      "step": 203745
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.697594404220581,
      "learning_rate": 1.9599756687800783e-05,
      "loss": 2.9611,
      "step": 203746
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9689223766326904,
      "learning_rate": 1.959830242434146e-05,
      "loss": 2.7586,
      "step": 203747
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.809391498565674,
      "learning_rate": 1.959684821301414e-05,
      "loss": 2.8463,
      "step": 203748
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8198704719543457,
      "learning_rate": 1.9595394053819046e-05,
      "loss": 2.9933,
      "step": 203749
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4701647758483887,
      "learning_rate": 1.9593939946756486e-05,
      "loss": 2.9557,
      "step": 203750
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7953858375549316,
      "learning_rate": 1.9592485891826694e-05,
      "loss": 2.8558,
      "step": 203751
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8466579914093018,
      "learning_rate": 1.9591031889029963e-05,
      "loss": 3.0564,
      "step": 203752
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0616776943206787,
      "learning_rate": 1.9589577938366497e-05,
      "loss": 2.7512,
      "step": 203753
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.5436487197875977,
      "learning_rate": 1.9588124039836693e-05,
      "loss": 2.8012,
      "step": 203754
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9603564739227295,
      "learning_rate": 1.9586670193440656e-05,
      "loss": 3.1088,
      "step": 203755
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0137860774993896,
      "learning_rate": 1.958521639917885e-05,
      "loss": 2.8607,
      "step": 203756
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4841091632843018,
      "learning_rate": 1.9583762657051406e-05,
      "loss": 2.7959,
      "step": 203757
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4893267154693604,
      "learning_rate": 1.9582308967058657e-05,
      "loss": 2.8593,
      "step": 203758
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3435041904449463,
      "learning_rate": 1.9580855329200806e-05,
      "loss": 2.9484,
      "step": 203759
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4218761920928955,
      "learning_rate": 1.9579401743478186e-05,
      "loss": 3.0092,
      "step": 203760
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5565805435180664,
      "learning_rate": 1.9577948209891026e-05,
      "loss": 2.9279,
      "step": 203761
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.852386713027954,
      "learning_rate": 1.9576494728439663e-05,
      "loss": 3.0254,
      "step": 203762
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2525975704193115,
      "learning_rate": 1.957504129912433e-05,
      "loss": 3.0361,
      "step": 203763
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8241233825683594,
      "learning_rate": 1.957358792194529e-05,
      "loss": 2.8802,
      "step": 203764
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.887568712234497,
      "learning_rate": 1.9572134596902744e-05,
      "loss": 3.1532,
      "step": 203765
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6640470027923584,
      "learning_rate": 1.9570681323997096e-05,
      "loss": 2.7756,
      "step": 203766
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.4695935249328613,
      "learning_rate": 1.9569228103228507e-05,
      "loss": 2.9805,
      "step": 203767
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.845693826675415,
      "learning_rate": 1.9567774934597314e-05,
      "loss": 2.9647,
      "step": 203768
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.754897356033325,
      "learning_rate": 1.956632181810378e-05,
      "loss": 3.0976,
      "step": 203769
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.9574341773986816,
      "learning_rate": 1.9564868753748174e-05,
      "loss": 2.8358,
      "step": 203770
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.359738349914551,
      "learning_rate": 1.95634157415307e-05,
      "loss": 3.0175,
      "step": 203771
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2434587478637695,
      "learning_rate": 1.9561962781451712e-05,
      "loss": 2.8683,
      "step": 203772
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6259171962738037,
      "learning_rate": 1.9560509873511422e-05,
      "loss": 3.2355,
      "step": 203773
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9916622638702393,
      "learning_rate": 1.9559057017710157e-05,
      "loss": 2.9007,
      "step": 203774
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.072277307510376,
      "learning_rate": 1.9557604214048185e-05,
      "loss": 2.8045,
      "step": 203775
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1124610900878906,
      "learning_rate": 1.9556151462525705e-05,
      "loss": 2.6405,
      "step": 203776
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.881054162979126,
      "learning_rate": 1.9554698763143016e-05,
      "loss": 2.954,
      "step": 203777
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.791785478591919,
      "learning_rate": 1.9553246115900423e-05,
      "loss": 2.8701,
      "step": 203778
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8274178504943848,
      "learning_rate": 1.9551793520798152e-05,
      "loss": 3.1506,
      "step": 203779
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.095118999481201,
      "learning_rate": 1.9550340977836543e-05,
      "loss": 3.0121,
      "step": 203780
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8668787479400635,
      "learning_rate": 1.9548888487015758e-05,
      "loss": 2.9043,
      "step": 203781
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.363297462463379,
      "learning_rate": 1.954743604833623e-05,
      "loss": 2.8229,
      "step": 203782
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4380381107330322,
      "learning_rate": 1.954598366179806e-05,
      "loss": 2.8537,
      "step": 203783
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.693941354751587,
      "learning_rate": 1.954453132740158e-05,
      "loss": 3.0228,
      "step": 203784
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.822190999984741,
      "learning_rate": 1.9543079045147058e-05,
      "loss": 2.8852,
      "step": 203785
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.194037914276123,
      "learning_rate": 1.9541626815034794e-05,
      "loss": 2.8491,
      "step": 203786
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1170754432678223,
      "learning_rate": 1.954017463706502e-05,
      "loss": 2.9206,
      "step": 203787
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.031951904296875,
      "learning_rate": 1.9538722511238102e-05,
      "loss": 3.0462,
      "step": 203788
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6363894939422607,
      "learning_rate": 1.9537270437554108e-05,
      "loss": 2.8367,
      "step": 203789
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1303000450134277,
      "learning_rate": 1.9535818416013506e-05,
      "loss": 2.9027,
      "step": 203790
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.864097833633423,
      "learning_rate": 1.953436644661642e-05,
      "loss": 3.0564,
      "step": 203791
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.469337224960327,
      "learning_rate": 1.9532914529363265e-05,
      "loss": 2.5645,
      "step": 203792
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3613476753234863,
      "learning_rate": 1.9531462664254195e-05,
      "loss": 3.061,
      "step": 203793
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0534346103668213,
      "learning_rate": 1.9530010851289578e-05,
      "loss": 2.837,
      "step": 203794
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7840123176574707,
      "learning_rate": 1.9528559090469552e-05,
      "loss": 2.7074,
      "step": 203795
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5467207431793213,
      "learning_rate": 1.9527107381794515e-05,
      "loss": 2.8555,
      "step": 203796
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1709656715393066,
      "learning_rate": 1.95256557252646e-05,
      "loss": 2.9328,
      "step": 203797
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.134681463241577,
      "learning_rate": 1.9524204120880237e-05,
      "loss": 2.7889,
      "step": 203798
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9261481761932373,
      "learning_rate": 1.9522752568641563e-05,
      "loss": 2.836,
      "step": 203799
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.560844898223877,
      "learning_rate": 1.9521301068549013e-05,
      "loss": 2.751,
      "step": 203800
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7065885066986084,
      "learning_rate": 1.9519849620602646e-05,
      "loss": 3.0142,
      "step": 203801
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6784019470214844,
      "learning_rate": 1.9518398224802867e-05,
      "loss": 2.9429,
      "step": 203802
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7941277027130127,
      "learning_rate": 1.9516946881149874e-05,
      "loss": 2.9298,
      "step": 203803
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.121817588806152,
      "learning_rate": 1.9515495589644035e-05,
      "loss": 2.9117,
      "step": 203804
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8713176250457764,
      "learning_rate": 1.9514044350285485e-05,
      "loss": 2.9482,
      "step": 203805
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3887503147125244,
      "learning_rate": 1.9512593163074687e-05,
      "loss": 3.0205,
      "step": 203806
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0682880878448486,
      "learning_rate": 1.9511142028011707e-05,
      "loss": 2.8823,
      "step": 203807
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.938105344772339,
      "learning_rate": 1.9509690945096913e-05,
      "loss": 2.8867,
      "step": 203808
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4663233757019043,
      "learning_rate": 1.9508239914330536e-05,
      "loss": 2.9175,
      "step": 203809
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8541064262390137,
      "learning_rate": 1.9506788935712914e-05,
      "loss": 2.7488,
      "step": 203810
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7327892780303955,
      "learning_rate": 1.950533800924424e-05,
      "loss": 2.9811,
      "step": 203811
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8397092819213867,
      "learning_rate": 1.950388713492489e-05,
      "loss": 2.872,
      "step": 203812
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.7888684272766113,
      "learning_rate": 1.9502436312754986e-05,
      "loss": 2.9486,
      "step": 203813
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.317256212234497,
      "learning_rate": 1.9500985542734905e-05,
      "loss": 2.795,
      "step": 203814
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2411441802978516,
      "learning_rate": 1.949953482486487e-05,
      "loss": 2.901,
      "step": 203815
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2081298828125,
      "learning_rate": 1.949808415914519e-05,
      "loss": 2.8761,
      "step": 203816
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7896311283111572,
      "learning_rate": 1.949663354557606e-05,
      "loss": 2.987,
      "step": 203817
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8785548210144043,
      "learning_rate": 1.949518298415784e-05,
      "loss": 2.9187,
      "step": 203818
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.799121141433716,
      "learning_rate": 1.9493732474890777e-05,
      "loss": 2.9921,
      "step": 203819
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.004082441329956,
      "learning_rate": 1.949228201777513e-05,
      "loss": 3.0588,
      "step": 203820
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0534474849700928,
      "learning_rate": 1.9490831612811096e-05,
      "loss": 2.8588,
      "step": 203821
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.584822416305542,
      "learning_rate": 1.9489381259999082e-05,
      "loss": 3.1058,
      "step": 203822
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.0921525955200195,
      "learning_rate": 1.948793095933925e-05,
      "loss": 2.8123,
      "step": 203823
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.755134344100952,
      "learning_rate": 1.94864807108319e-05,
      "loss": 3.2434,
      "step": 203824
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.064174175262451,
      "learning_rate": 1.9485030514477363e-05,
      "loss": 2.8746,
      "step": 203825
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.296313762664795,
      "learning_rate": 1.9483580370275777e-05,
      "loss": 2.8979,
      "step": 203826
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8139548301696777,
      "learning_rate": 1.9482130278227538e-05,
      "loss": 2.8935,
      "step": 203827
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.821742057800293,
      "learning_rate": 1.948068023833288e-05,
      "loss": 3.0132,
      "step": 203828
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.278045654296875,
      "learning_rate": 1.9479230250592004e-05,
      "loss": 3.0785,
      "step": 203829
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5960946083068848,
      "learning_rate": 1.9477780315005275e-05,
      "loss": 2.9031,
      "step": 203830
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.801786422729492,
      "learning_rate": 1.947633043157293e-05,
      "loss": 3.1836,
      "step": 203831
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7861673831939697,
      "learning_rate": 1.947488060029516e-05,
      "loss": 2.7847,
      "step": 203832
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.968474864959717,
      "learning_rate": 1.9473430821172375e-05,
      "loss": 2.996,
      "step": 203833
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.517517328262329,
      "learning_rate": 1.947198109420477e-05,
      "loss": 3.0785,
      "step": 203834
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.741823673248291,
      "learning_rate": 1.9470531419392576e-05,
      "loss": 2.8408,
      "step": 203835
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.8477020263671875,
      "learning_rate": 1.9469081796736162e-05,
      "loss": 2.7626,
      "step": 203836
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.386762619018555,
      "learning_rate": 1.946763222623573e-05,
      "loss": 2.9834,
      "step": 203837
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1634857654571533,
      "learning_rate": 1.946618270789151e-05,
      "loss": 2.8812,
      "step": 203838
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5944907665252686,
      "learning_rate": 1.9464733241703867e-05,
      "loss": 2.5918,
      "step": 203839
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.573406457901001,
      "learning_rate": 1.9463283827673004e-05,
      "loss": 3.1272,
      "step": 203840
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6740033626556396,
      "learning_rate": 1.946183446579922e-05,
      "loss": 2.7556,
      "step": 203841
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.001783609390259,
      "learning_rate": 1.9460385156082815e-05,
      "loss": 2.596,
      "step": 203842
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.093364715576172,
      "learning_rate": 1.9458935898523985e-05,
      "loss": 2.9588,
      "step": 203843
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.081232786178589,
      "learning_rate": 1.9457486693123005e-05,
      "loss": 2.8611,
      "step": 203844
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.921682357788086,
      "learning_rate": 1.9456037539880233e-05,
      "loss": 2.7654,
      "step": 203845
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.6283252239227295,
      "learning_rate": 1.9454588438795805e-05,
      "loss": 2.8393,
      "step": 203846
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.41282320022583,
      "learning_rate": 1.945313938987012e-05,
      "loss": 2.8317,
      "step": 203847
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.74482798576355,
      "learning_rate": 1.9451690393103347e-05,
      "loss": 2.8448,
      "step": 203848
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1680703163146973,
      "learning_rate": 1.9450241448495886e-05,
      "loss": 2.9784,
      "step": 203849
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3970701694488525,
      "learning_rate": 1.9448792556047864e-05,
      "loss": 2.8174,
      "step": 203850
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4347543716430664,
      "learning_rate": 1.9447343715759622e-05,
      "loss": 2.8375,
      "step": 203851
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3631534576416016,
      "learning_rate": 1.9445894927631354e-05,
      "loss": 2.7926,
      "step": 203852
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.530945301055908,
      "learning_rate": 1.9444446191663467e-05,
      "loss": 2.5631,
      "step": 203853
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.714099407196045,
      "learning_rate": 1.944299750785612e-05,
      "loss": 2.8325,
      "step": 203854
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.8094000816345215,
      "learning_rate": 1.944154887620968e-05,
      "loss": 2.8391,
      "step": 203855
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.243976593017578,
      "learning_rate": 1.9440100296724248e-05,
      "loss": 2.8429,
      "step": 203856
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.040198802947998,
      "learning_rate": 1.943865176940026e-05,
      "loss": 3.0123,
      "step": 203857
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9835495948791504,
      "learning_rate": 1.943720329423788e-05,
      "loss": 2.9036,
      "step": 203858
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.7429885864257812,
      "learning_rate": 1.9435754871237475e-05,
      "loss": 2.657,
      "step": 203859
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7522733211517334,
      "learning_rate": 1.943430650039918e-05,
      "loss": 2.8821,
      "step": 203860
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.7680954933166504,
      "learning_rate": 1.9432858181723455e-05,
      "loss": 3.0013,
      "step": 203861
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.569960594177246,
      "learning_rate": 1.943140991521037e-05,
      "loss": 2.8319,
      "step": 203862
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3515658378601074,
      "learning_rate": 1.942996170086033e-05,
      "loss": 2.8816,
      "step": 203863
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.637049674987793,
      "learning_rate": 1.9428513538673497e-05,
      "loss": 2.7161,
      "step": 203864
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.390050172805786,
      "learning_rate": 1.942706542865027e-05,
      "loss": 2.7529,
      "step": 203865
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6550917625427246,
      "learning_rate": 1.942561737079078e-05,
      "loss": 3.0297,
      "step": 203866
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2382164001464844,
      "learning_rate": 1.9424169365095464e-05,
      "loss": 2.5012,
      "step": 203867
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.6262996196746826,
      "learning_rate": 1.9422721411564392e-05,
      "loss": 3.1579,
      "step": 203868
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.019648790359497,
      "learning_rate": 1.9421273510197986e-05,
      "loss": 2.9155,
      "step": 203869
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.9412379264831543,
      "learning_rate": 1.9419825660996423e-05,
      "loss": 2.661,
      "step": 203870
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.7521934509277344,
      "learning_rate": 1.9418377863960034e-05,
      "loss": 2.8195,
      "step": 203871
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9079818725585938,
      "learning_rate": 1.9416930119089047e-05,
      "loss": 2.656,
      "step": 203872
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.5753607749938965,
      "learning_rate": 1.9415482426383834e-05,
      "loss": 3.096,
      "step": 203873
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.952366590499878,
      "learning_rate": 1.9414034785844457e-05,
      "loss": 2.772,
      "step": 203874
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.4244375228881836,
      "learning_rate": 1.9412587197471385e-05,
      "loss": 2.9533,
      "step": 203875
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.8462581634521484,
      "learning_rate": 1.9411139661264752e-05,
      "loss": 2.7982,
      "step": 203876
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.281149387359619,
      "learning_rate": 1.9409692177224923e-05,
      "loss": 2.7167,
      "step": 203877
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.794520139694214,
      "learning_rate": 1.94082447453521e-05,
      "loss": 3.2064,
      "step": 203878
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.950251817703247,
      "learning_rate": 1.9406797365646675e-05,
      "loss": 2.8888,
      "step": 203879
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.824707269668579,
      "learning_rate": 1.9405350038108726e-05,
      "loss": 3.0577,
      "step": 203880
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.388906478881836,
      "learning_rate": 1.9403902762738642e-05,
      "loss": 2.9636,
      "step": 203881
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.221564292907715,
      "learning_rate": 1.9402455539536633e-05,
      "loss": 2.8846,
      "step": 203882
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.045224666595459,
      "learning_rate": 1.9401008368503057e-05,
      "loss": 2.912,
      "step": 203883
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.6212778091430664,
      "learning_rate": 1.9399561249638085e-05,
      "loss": 2.8574,
      "step": 203884
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2877373695373535,
      "learning_rate": 1.9398114182942147e-05,
      "loss": 2.8778,
      "step": 203885
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.706430435180664,
      "learning_rate": 1.939666716841528e-05,
      "loss": 2.647,
      "step": 203886
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.846926212310791,
      "learning_rate": 1.9395220206057917e-05,
      "loss": 2.9876,
      "step": 203887
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2343592643737793,
      "learning_rate": 1.9393773295870218e-05,
      "loss": 2.9526,
      "step": 203888
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.1683037281036377,
      "learning_rate": 1.939232643785259e-05,
      "loss": 3.1214,
      "step": 203889
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.498265504837036,
      "learning_rate": 1.9390879632005163e-05,
      "loss": 2.8356,
      "step": 203890
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7966620922088623,
      "learning_rate": 1.938943287832837e-05,
      "loss": 2.7262,
      "step": 203891
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.72729754447937,
      "learning_rate": 1.9387986176822278e-05,
      "loss": 3.1265,
      "step": 203892
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.616974115371704,
      "learning_rate": 1.938653952748729e-05,
      "loss": 2.8434,
      "step": 203893
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.664449691772461,
      "learning_rate": 1.9385092930323598e-05,
      "loss": 3.0009,
      "step": 203894
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9380693435668945,
      "learning_rate": 1.9383646385331576e-05,
      "loss": 2.7175,
      "step": 203895
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.688119649887085,
      "learning_rate": 1.9382199892511386e-05,
      "loss": 2.7022,
      "step": 203896
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.2351393699645996,
      "learning_rate": 1.9380753451863428e-05,
      "loss": 2.7516,
      "step": 203897
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.995027542114258,
      "learning_rate": 1.9379307063387773e-05,
      "loss": 2.6449,
      "step": 203898
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.205315113067627,
      "learning_rate": 1.9377860727084883e-05,
      "loss": 2.9703,
      "step": 203899
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.7043356895446777,
      "learning_rate": 1.9376414442954892e-05,
      "loss": 2.7841,
      "step": 203900
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.5391407012939453,
      "learning_rate": 1.9374968210998166e-05,
      "loss": 2.869,
      "step": 203901
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.623048305511475,
      "learning_rate": 1.9373522031214872e-05,
      "loss": 2.975,
      "step": 203902
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.9039347171783447,
      "learning_rate": 1.937207590360538e-05,
      "loss": 3.0039,
      "step": 203903
    },
    {
      "epoch": 2.65,
      "grad_norm": 2.987635850906372,
      "learning_rate": 1.9370629828169913e-05,
      "loss": 2.9602,
      "step": 203904
    },
    {
      "epoch": 2.65,
      "grad_norm": 4.159180164337158,
      "learning_rate": 1.9369183804908783e-05,
      "loss": 2.8091,
      "step": 203905
    },
    {
      "epoch": 2.65,
      "grad_norm": 3.3087284564971924,
      "learning_rate": 1.936773783382215e-05,
      "loss": 2.893,
      "step": 203906
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.175635814666748,
      "learning_rate": 1.936629191491038e-05,
      "loss": 2.9503,
      "step": 203907
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7824134826660156,
      "learning_rate": 1.9364846048173678e-05,
      "loss": 2.9646,
      "step": 203908
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.314715623855591,
      "learning_rate": 1.9363400233612403e-05,
      "loss": 3.01,
      "step": 203909
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.560253143310547,
      "learning_rate": 1.936195447122676e-05,
      "loss": 3.0119,
      "step": 203910
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.905634880065918,
      "learning_rate": 1.9360508761017013e-05,
      "loss": 3.0709,
      "step": 203911
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0022940635681152,
      "learning_rate": 1.9359063102983463e-05,
      "loss": 2.8739,
      "step": 203912
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.827855348587036,
      "learning_rate": 1.9357617497126343e-05,
      "loss": 2.8068,
      "step": 203913
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.563688278198242,
      "learning_rate": 1.935617194344592e-05,
      "loss": 3.057,
      "step": 203914
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.717496633529663,
      "learning_rate": 1.9354726441942524e-05,
      "loss": 2.917,
      "step": 203915
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.482485294342041,
      "learning_rate": 1.935328099261636e-05,
      "loss": 2.8373,
      "step": 203916
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.775722026824951,
      "learning_rate": 1.9351835595467725e-05,
      "loss": 2.7982,
      "step": 203917
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.745687246322632,
      "learning_rate": 1.935039025049688e-05,
      "loss": 3.1402,
      "step": 203918
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9068033695220947,
      "learning_rate": 1.9348944957704104e-05,
      "loss": 2.9185,
      "step": 203919
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9146833419799805,
      "learning_rate": 1.934749971708962e-05,
      "loss": 3.1132,
      "step": 203920
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.931166648864746,
      "learning_rate": 1.9346054528653766e-05,
      "loss": 2.8771,
      "step": 203921
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.8748958110809326,
      "learning_rate": 1.9344609392396804e-05,
      "loss": 2.8901,
      "step": 203922
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2837865352630615,
      "learning_rate": 1.9343164308318905e-05,
      "loss": 2.6392,
      "step": 203923
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.492300271987915,
      "learning_rate": 1.9341719276420464e-05,
      "loss": 2.8699,
      "step": 203924
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8406755924224854,
      "learning_rate": 1.9340274296701653e-05,
      "loss": 3.0433,
      "step": 203925
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7635016441345215,
      "learning_rate": 1.9338829369162835e-05,
      "loss": 3.0353,
      "step": 203926
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.879659414291382,
      "learning_rate": 1.9337384493804208e-05,
      "loss": 2.9646,
      "step": 203927
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8687236309051514,
      "learning_rate": 1.9335939670626077e-05,
      "loss": 2.9083,
      "step": 203928
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0216290950775146,
      "learning_rate": 1.9334494899628638e-05,
      "loss": 2.5315,
      "step": 203929
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.5964770317077637,
      "learning_rate": 1.933305018081226e-05,
      "loss": 2.9021,
      "step": 203930
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.9924018383026123,
      "learning_rate": 1.9331605514177107e-05,
      "loss": 3.0552,
      "step": 203931
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.787992477416992,
      "learning_rate": 1.9330160899723578e-05,
      "loss": 2.9031,
      "step": 203932
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0650906562805176,
      "learning_rate": 1.9328716337451845e-05,
      "loss": 2.8141,
      "step": 203933
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.861114740371704,
      "learning_rate": 1.9327271827362234e-05,
      "loss": 3.0286,
      "step": 203934
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.5426552295684814,
      "learning_rate": 1.9325827369454915e-05,
      "loss": 3.0008,
      "step": 203935
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.047684669494629,
      "learning_rate": 1.9324382963730257e-05,
      "loss": 2.8629,
      "step": 203936
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7330398559570312,
      "learning_rate": 1.9322938610188486e-05,
      "loss": 2.8605,
      "step": 203937
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.862586498260498,
      "learning_rate": 1.9321494308829878e-05,
      "loss": 2.859,
      "step": 203938
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.942440986633301,
      "learning_rate": 1.932005005965469e-05,
      "loss": 2.9487,
      "step": 203939
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.939152240753174,
      "learning_rate": 1.9318605862663296e-05,
      "loss": 2.8992,
      "step": 203940
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.968310594558716,
      "learning_rate": 1.9317161717855756e-05,
      "loss": 2.79,
      "step": 203941
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8828139305114746,
      "learning_rate": 1.9315717625232507e-05,
      "loss": 2.8419,
      "step": 203942
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8389601707458496,
      "learning_rate": 1.9314273584793717e-05,
      "loss": 3.2206,
      "step": 203943
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.638833522796631,
      "learning_rate": 1.9312829596539747e-05,
      "loss": 2.8685,
      "step": 203944
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.002365827560425,
      "learning_rate": 1.93113856604708e-05,
      "loss": 2.9413,
      "step": 203945
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.051863670349121,
      "learning_rate": 1.9309941776587245e-05,
      "loss": 2.6374,
      "step": 203946
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.189948558807373,
      "learning_rate": 1.9308497944889178e-05,
      "loss": 2.8043,
      "step": 203947
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.233849048614502,
      "learning_rate": 1.9307054165377e-05,
      "loss": 3.0615,
      "step": 203948
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.9137063026428223,
      "learning_rate": 1.930561043805088e-05,
      "loss": 3.0164,
      "step": 203949
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.616455554962158,
      "learning_rate": 1.930416676291121e-05,
      "loss": 2.9923,
      "step": 203950
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0453708171844482,
      "learning_rate": 1.930272313995813e-05,
      "loss": 2.9227,
      "step": 203951
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0158209800720215,
      "learning_rate": 1.9301279569192073e-05,
      "loss": 3.0072,
      "step": 203952
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3245720863342285,
      "learning_rate": 1.929983605061314e-05,
      "loss": 2.7578,
      "step": 203953
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.329272747039795,
      "learning_rate": 1.929839258422169e-05,
      "loss": 2.936,
      "step": 203954
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.5312063694000244,
      "learning_rate": 1.9296949170017895e-05,
      "loss": 2.791,
      "step": 203955
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.8611695766448975,
      "learning_rate": 1.9295505808002186e-05,
      "loss": 2.8571,
      "step": 203956
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.920898675918579,
      "learning_rate": 1.9294062498174667e-05,
      "loss": 3.1351,
      "step": 203957
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.562652826309204,
      "learning_rate": 1.92926192405358e-05,
      "loss": 2.7643,
      "step": 203958
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.840930461883545,
      "learning_rate": 1.929117603508562e-05,
      "loss": 2.6424,
      "step": 203959
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3806371688842773,
      "learning_rate": 1.928973288182456e-05,
      "loss": 2.9871,
      "step": 203960
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1902248859405518,
      "learning_rate": 1.9288289780752785e-05,
      "loss": 2.9919,
      "step": 203961
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.4181387424468994,
      "learning_rate": 1.9286846731870665e-05,
      "loss": 2.7416,
      "step": 203962
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.878901243209839,
      "learning_rate": 1.9285403735178362e-05,
      "loss": 2.9392,
      "step": 203963
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.768150806427002,
      "learning_rate": 1.9283960790676312e-05,
      "loss": 3.2057,
      "step": 203964
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.411201238632202,
      "learning_rate": 1.928251789836458e-05,
      "loss": 3.078,
      "step": 203965
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.92708683013916,
      "learning_rate": 1.9281075058243567e-05,
      "loss": 2.7903,
      "step": 203966
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.24387788772583,
      "learning_rate": 1.927963227031347e-05,
      "loss": 2.6967,
      "step": 203967
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.4373300075531006,
      "learning_rate": 1.9278189534574627e-05,
      "loss": 2.9391,
      "step": 203968
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.816699266433716,
      "learning_rate": 1.9276746851027203e-05,
      "loss": 3.0014,
      "step": 203969
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1262497901916504,
      "learning_rate": 1.9275304219671628e-05,
      "loss": 2.8756,
      "step": 203970
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.963256359100342,
      "learning_rate": 1.9273861640508004e-05,
      "loss": 2.9876,
      "step": 203971
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.880418300628662,
      "learning_rate": 1.9272419113536697e-05,
      "loss": 2.9878,
      "step": 203972
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.4076685905456543,
      "learning_rate": 1.9270976638757906e-05,
      "loss": 3.0943,
      "step": 203973
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.901137351989746,
      "learning_rate": 1.926953421617197e-05,
      "loss": 2.8708,
      "step": 203974
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9272258281707764,
      "learning_rate": 1.926809184577911e-05,
      "loss": 3.0166,
      "step": 203975
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.674095630645752,
      "learning_rate": 1.926664952757967e-05,
      "loss": 3.1641,
      "step": 203976
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.037537097930908,
      "learning_rate": 1.9265207261573778e-05,
      "loss": 3.1115,
      "step": 203977
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3857929706573486,
      "learning_rate": 1.9263765047761838e-05,
      "loss": 3.1554,
      "step": 203978
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.5944759845733643,
      "learning_rate": 1.926232288614401e-05,
      "loss": 3.0381,
      "step": 203979
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6155858039855957,
      "learning_rate": 1.9260880776720667e-05,
      "loss": 2.7683,
      "step": 203980
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.138791084289551,
      "learning_rate": 1.9259438719491972e-05,
      "loss": 2.9151,
      "step": 203981
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2022182941436768,
      "learning_rate": 1.9257996714458325e-05,
      "loss": 2.9756,
      "step": 203982
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.264843463897705,
      "learning_rate": 1.9256554761619892e-05,
      "loss": 3.0085,
      "step": 203983
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0381686687469482,
      "learning_rate": 1.925511286097694e-05,
      "loss": 3.0368,
      "step": 203984
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.5965685844421387,
      "learning_rate": 1.9253671012529737e-05,
      "loss": 2.9045,
      "step": 203985
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6495115756988525,
      "learning_rate": 1.9252229216278646e-05,
      "loss": 2.926,
      "step": 203986
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.137406826019287,
      "learning_rate": 1.925078747222377e-05,
      "loss": 2.9339,
      "step": 203987
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6678597927093506,
      "learning_rate": 1.9249345780365544e-05,
      "loss": 2.6832,
      "step": 203988
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8129992485046387,
      "learning_rate": 1.9247904140704163e-05,
      "loss": 3.1306,
      "step": 203989
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.697547197341919,
      "learning_rate": 1.9246462553239927e-05,
      "loss": 2.8382,
      "step": 203990
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.834566593170166,
      "learning_rate": 1.9245021017972972e-05,
      "loss": 2.8372,
      "step": 203991
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7283551692962646,
      "learning_rate": 1.9243579534903765e-05,
      "loss": 2.8815,
      "step": 203992
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1426544189453125,
      "learning_rate": 1.92421381040324e-05,
      "loss": 2.8923,
      "step": 203993
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0354669094085693,
      "learning_rate": 1.924069672535925e-05,
      "loss": 2.6215,
      "step": 203994
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2566590309143066,
      "learning_rate": 1.923925539888458e-05,
      "loss": 2.8284,
      "step": 203995
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6880791187286377,
      "learning_rate": 1.9237814124608652e-05,
      "loss": 2.793,
      "step": 203996
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8093550205230713,
      "learning_rate": 1.9236372902531638e-05,
      "loss": 2.8578,
      "step": 203997
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7663652896881104,
      "learning_rate": 1.9234931732653934e-05,
      "loss": 3.1731,
      "step": 203998
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.952439069747925,
      "learning_rate": 1.9233490614975712e-05,
      "loss": 2.8995,
      "step": 203999
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1131670475006104,
      "learning_rate": 1.923204954949733e-05,
      "loss": 2.8012,
      "step": 204000
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8689236640930176,
      "learning_rate": 1.9230608536218995e-05,
      "loss": 2.8261,
      "step": 204001
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.106968402862549,
      "learning_rate": 1.922916757514097e-05,
      "loss": 3.0395,
      "step": 204002
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6286652088165283,
      "learning_rate": 1.9227726666263553e-05,
      "loss": 2.9252,
      "step": 204003
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1143832206726074,
      "learning_rate": 1.922628580958705e-05,
      "loss": 2.7018,
      "step": 204004
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.971565008163452,
      "learning_rate": 1.922484500511159e-05,
      "loss": 3.0514,
      "step": 204005
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.362733840942383,
      "learning_rate": 1.9223404252837604e-05,
      "loss": 3.0275,
      "step": 204006
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.78128719329834,
      "learning_rate": 1.9221963552765298e-05,
      "loss": 2.9814,
      "step": 204007
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.460613965988159,
      "learning_rate": 1.9220522904894862e-05,
      "loss": 3.0794,
      "step": 204008
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6055359840393066,
      "learning_rate": 1.9219082309226675e-05,
      "loss": 2.8788,
      "step": 204009
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7036197185516357,
      "learning_rate": 1.9217641765760928e-05,
      "loss": 3.0212,
      "step": 204010
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6924233436584473,
      "learning_rate": 1.9216201274497954e-05,
      "loss": 2.8035,
      "step": 204011
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7481677532196045,
      "learning_rate": 1.9214760835437993e-05,
      "loss": 2.9023,
      "step": 204012
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.954204797744751,
      "learning_rate": 1.9213320448581303e-05,
      "loss": 3.1062,
      "step": 204013
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.645620822906494,
      "learning_rate": 1.9211880113928124e-05,
      "loss": 2.9223,
      "step": 204014
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6374053955078125,
      "learning_rate": 1.9210439831478785e-05,
      "loss": 2.7621,
      "step": 204015
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.289991617202759,
      "learning_rate": 1.920899960123349e-05,
      "loss": 2.864,
      "step": 204016
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.175166368484497,
      "learning_rate": 1.92075594231926e-05,
      "loss": 2.818,
      "step": 204017
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2235729694366455,
      "learning_rate": 1.9206119297356314e-05,
      "loss": 2.8824,
      "step": 204018
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.4833881855010986,
      "learning_rate": 1.9204679223724907e-05,
      "loss": 2.8036,
      "step": 204019
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8587870597839355,
      "learning_rate": 1.9203239202298603e-05,
      "loss": 3.1377,
      "step": 204020
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7871625423431396,
      "learning_rate": 1.9201799233077775e-05,
      "loss": 2.7568,
      "step": 204021
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8924427032470703,
      "learning_rate": 1.9200359316062553e-05,
      "loss": 2.7752,
      "step": 204022
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8224236965179443,
      "learning_rate": 1.919891945125337e-05,
      "loss": 3.0211,
      "step": 204023
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.075859308242798,
      "learning_rate": 1.919747963865036e-05,
      "loss": 2.9978,
      "step": 204024
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.792249917984009,
      "learning_rate": 1.919603987825392e-05,
      "loss": 2.8919,
      "step": 204025
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.117459535598755,
      "learning_rate": 1.919460017006412e-05,
      "loss": 3.2264,
      "step": 204026
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.967499017715454,
      "learning_rate": 1.9193160514081428e-05,
      "loss": 2.8379,
      "step": 204027
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2454335689544678,
      "learning_rate": 1.9191720910305976e-05,
      "loss": 2.8542,
      "step": 204028
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.492380380630493,
      "learning_rate": 1.9190281358738124e-05,
      "loss": 3.1717,
      "step": 204029
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7031898498535156,
      "learning_rate": 1.9188841859378046e-05,
      "loss": 3.2151,
      "step": 204030
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9883229732513428,
      "learning_rate": 1.9187402412226172e-05,
      "loss": 2.8145,
      "step": 204031
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0165598392486572,
      "learning_rate": 1.918596301728257e-05,
      "loss": 2.878,
      "step": 204032
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.5733392238616943,
      "learning_rate": 1.9184523674547636e-05,
      "loss": 3.0974,
      "step": 204033
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.00658917427063,
      "learning_rate": 1.9183084384021542e-05,
      "loss": 3.0002,
      "step": 204034
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.906536102294922,
      "learning_rate": 1.9181645145704682e-05,
      "loss": 2.8465,
      "step": 204035
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.139960765838623,
      "learning_rate": 1.9180205959597195e-05,
      "loss": 3.0096,
      "step": 204036
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.213592290878296,
      "learning_rate": 1.9178766825699478e-05,
      "loss": 2.9058,
      "step": 204037
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.02482271194458,
      "learning_rate": 1.917732774401166e-05,
      "loss": 3.0172,
      "step": 204038
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8491458892822266,
      "learning_rate": 1.917588871453415e-05,
      "loss": 3.0308,
      "step": 204039
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.64975643157959,
      "learning_rate": 1.917444973726704e-05,
      "loss": 3.1635,
      "step": 204040
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2627811431884766,
      "learning_rate": 1.9173010812210798e-05,
      "loss": 3.0573,
      "step": 204041
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.618513584136963,
      "learning_rate": 1.9171571939365527e-05,
      "loss": 2.8014,
      "step": 204042
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.065920829772949,
      "learning_rate": 1.9170133118731656e-05,
      "loss": 2.811,
      "step": 204043
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.739793300628662,
      "learning_rate": 1.9168694350309254e-05,
      "loss": 2.8997,
      "step": 204044
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.6970512866973877,
      "learning_rate": 1.9167255634098756e-05,
      "loss": 2.8128,
      "step": 204045
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6006617546081543,
      "learning_rate": 1.916581697010032e-05,
      "loss": 2.7918,
      "step": 204046
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2598445415496826,
      "learning_rate": 1.9164378358314292e-05,
      "loss": 2.9356,
      "step": 204047
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.82562255859375,
      "learning_rate": 1.916293979874086e-05,
      "loss": 2.789,
      "step": 204048
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.9383833408355713,
      "learning_rate": 1.91615012913804e-05,
      "loss": 2.9627,
      "step": 204049
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3130900859832764,
      "learning_rate": 1.9160062836233137e-05,
      "loss": 2.9155,
      "step": 204050
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7936737537384033,
      "learning_rate": 1.9158624433299276e-05,
      "loss": 3.0404,
      "step": 204051
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.247488498687744,
      "learning_rate": 1.915718608257911e-05,
      "loss": 2.7611,
      "step": 204052
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.116589069366455,
      "learning_rate": 1.915574778407295e-05,
      "loss": 2.8167,
      "step": 204053
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.486689567565918,
      "learning_rate": 1.915430953778102e-05,
      "loss": 2.7131,
      "step": 204054
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.071193218231201,
      "learning_rate": 1.9152871343703623e-05,
      "loss": 2.6265,
      "step": 204055
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2872347831726074,
      "learning_rate": 1.9151433201841027e-05,
      "loss": 2.9875,
      "step": 204056
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6990227699279785,
      "learning_rate": 1.9149995112193462e-05,
      "loss": 2.9903,
      "step": 204057
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.263092279434204,
      "learning_rate": 1.9148557074761195e-05,
      "loss": 3.018,
      "step": 204058
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9475150108337402,
      "learning_rate": 1.914711908954456e-05,
      "loss": 2.693,
      "step": 204059
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2179477214813232,
      "learning_rate": 1.914568115654369e-05,
      "loss": 2.9808,
      "step": 204060
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.497220516204834,
      "learning_rate": 1.914424327575902e-05,
      "loss": 2.6882,
      "step": 204061
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.689483404159546,
      "learning_rate": 1.9142805447190712e-05,
      "loss": 2.7958,
      "step": 204062
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.5310287475585938,
      "learning_rate": 1.9141367670839102e-05,
      "loss": 2.7721,
      "step": 204063
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9111719131469727,
      "learning_rate": 1.9139929946704326e-05,
      "loss": 2.9197,
      "step": 204064
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.390587091445923,
      "learning_rate": 1.913849227478681e-05,
      "loss": 3.047,
      "step": 204065
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0061354637145996,
      "learning_rate": 1.9137054655086693e-05,
      "loss": 2.9953,
      "step": 204066
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.056192636489868,
      "learning_rate": 1.913561708760434e-05,
      "loss": 2.8714,
      "step": 204067
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.7375621795654297,
      "learning_rate": 1.913417957233998e-05,
      "loss": 2.9387,
      "step": 204068
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.904449939727783,
      "learning_rate": 1.9132742109293887e-05,
      "loss": 2.9006,
      "step": 204069
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9644927978515625,
      "learning_rate": 1.9131304698466254e-05,
      "loss": 2.9378,
      "step": 204070
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9003829956054688,
      "learning_rate": 1.9129867339857487e-05,
      "loss": 2.839,
      "step": 204071
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3696560859680176,
      "learning_rate": 1.9128430033467712e-05,
      "loss": 2.8814,
      "step": 204072
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.087311029434204,
      "learning_rate": 1.91269927792973e-05,
      "loss": 2.8669,
      "step": 204073
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9081287384033203,
      "learning_rate": 1.9125555577346485e-05,
      "loss": 2.8774,
      "step": 204074
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.091860055923462,
      "learning_rate": 1.9124118427615566e-05,
      "loss": 3.0574,
      "step": 204075
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1422853469848633,
      "learning_rate": 1.9122681330104705e-05,
      "loss": 2.9345,
      "step": 204076
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.902141571044922,
      "learning_rate": 1.9121244284814276e-05,
      "loss": 2.7413,
      "step": 204077
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9906227588653564,
      "learning_rate": 1.911980729174447e-05,
      "loss": 2.9014,
      "step": 204078
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.479944944381714,
      "learning_rate": 1.911837035089566e-05,
      "loss": 3.0313,
      "step": 204079
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8576102256774902,
      "learning_rate": 1.9116933462268014e-05,
      "loss": 2.9153,
      "step": 204080
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.131329298019409,
      "learning_rate": 1.911549662586186e-05,
      "loss": 2.8089,
      "step": 204081
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.74275803565979,
      "learning_rate": 1.9114059841677366e-05,
      "loss": 2.7516,
      "step": 204082
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1278698444366455,
      "learning_rate": 1.911262310971493e-05,
      "loss": 2.9859,
      "step": 204083
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2680249214172363,
      "learning_rate": 1.9111186429974723e-05,
      "loss": 2.7391,
      "step": 204084
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8365495204925537,
      "learning_rate": 1.910974980245711e-05,
      "loss": 2.9323,
      "step": 204085
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2165560722351074,
      "learning_rate": 1.9108313227162253e-05,
      "loss": 2.8513,
      "step": 204086
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.79746150970459,
      "learning_rate": 1.9106876704090424e-05,
      "loss": 2.505,
      "step": 204087
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2256507873535156,
      "learning_rate": 1.910544023324202e-05,
      "loss": 3.1397,
      "step": 204088
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.414689779281616,
      "learning_rate": 1.9104003814617176e-05,
      "loss": 2.7954,
      "step": 204089
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.902998924255371,
      "learning_rate": 1.9102567448216156e-05,
      "loss": 2.7493,
      "step": 204090
    },
    {
      "epoch": 2.66,
      "grad_norm": 5.4860944747924805,
      "learning_rate": 1.9101131134039326e-05,
      "loss": 2.9868,
      "step": 204091
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.48354172706604,
      "learning_rate": 1.909969487208689e-05,
      "loss": 2.9118,
      "step": 204092
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.278195381164551,
      "learning_rate": 1.909825866235908e-05,
      "loss": 2.9551,
      "step": 204093
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.705249071121216,
      "learning_rate": 1.9096822504856258e-05,
      "loss": 2.9265,
      "step": 204094
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.367218494415283,
      "learning_rate": 1.9095386399578626e-05,
      "loss": 2.9565,
      "step": 204095
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.104838848114014,
      "learning_rate": 1.9093950346526454e-05,
      "loss": 3.0338,
      "step": 204096
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2995011806488037,
      "learning_rate": 1.909251434570004e-05,
      "loss": 2.8633,
      "step": 204097
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.145646095275879,
      "learning_rate": 1.9091078397099614e-05,
      "loss": 3.0428,
      "step": 204098
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8586583137512207,
      "learning_rate": 1.9089642500725444e-05,
      "loss": 3.2042,
      "step": 204099
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7492895126342773,
      "learning_rate": 1.908820665657783e-05,
      "loss": 3.2073,
      "step": 204100
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.393961191177368,
      "learning_rate": 1.9086770864656975e-05,
      "loss": 2.9923,
      "step": 204101
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.435208320617676,
      "learning_rate": 1.9085335124963276e-05,
      "loss": 2.8301,
      "step": 204102
    },
    {
      "epoch": 2.66,
      "grad_norm": 6.275005340576172,
      "learning_rate": 1.9083899437496864e-05,
      "loss": 2.9387,
      "step": 204103
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.8693270683288574,
      "learning_rate": 1.908246380225811e-05,
      "loss": 2.9052,
      "step": 204104
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8170223236083984,
      "learning_rate": 1.9081028219247142e-05,
      "loss": 3.0153,
      "step": 204105
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9933221340179443,
      "learning_rate": 1.9079592688464396e-05,
      "loss": 2.9582,
      "step": 204106
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.448991537094116,
      "learning_rate": 1.9078157209909972e-05,
      "loss": 3.0361,
      "step": 204107
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.663498640060425,
      "learning_rate": 1.907672178358427e-05,
      "loss": 2.7831,
      "step": 204108
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0836734771728516,
      "learning_rate": 1.907528640948749e-05,
      "loss": 3.0764,
      "step": 204109
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1729118824005127,
      "learning_rate": 1.9073851087619995e-05,
      "loss": 3.0158,
      "step": 204110
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8174753189086914,
      "learning_rate": 1.907241581798189e-05,
      "loss": 2.6659,
      "step": 204111
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.649129629135132,
      "learning_rate": 1.9070980600573573e-05,
      "loss": 2.7958,
      "step": 204112
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8022046089172363,
      "learning_rate": 1.9069545435395207e-05,
      "loss": 2.6302,
      "step": 204113
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.5823068618774414,
      "learning_rate": 1.9068110322447163e-05,
      "loss": 2.9993,
      "step": 204114
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.464427947998047,
      "learning_rate": 1.9066675261729603e-05,
      "loss": 2.8713,
      "step": 204115
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9302194118499756,
      "learning_rate": 1.906524025324293e-05,
      "loss": 3.0401,
      "step": 204116
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.88348650932312,
      "learning_rate": 1.906380529698731e-05,
      "loss": 2.9589,
      "step": 204117
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0987918376922607,
      "learning_rate": 1.906237039296301e-05,
      "loss": 2.8813,
      "step": 204118
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6784520149230957,
      "learning_rate": 1.9060935541170297e-05,
      "loss": 2.9499,
      "step": 204119
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.8484530448913574,
      "learning_rate": 1.90595007416095e-05,
      "loss": 3.0237,
      "step": 204120
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1309263706207275,
      "learning_rate": 1.9058065994280823e-05,
      "loss": 2.871,
      "step": 204121
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.540304660797119,
      "learning_rate": 1.9056631299184568e-05,
      "loss": 2.8132,
      "step": 204122
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.752893924713135,
      "learning_rate": 1.905519665632099e-05,
      "loss": 3.0956,
      "step": 204123
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.733380079269409,
      "learning_rate": 1.905376206569037e-05,
      "loss": 3.0004,
      "step": 204124
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0046544075012207,
      "learning_rate": 1.9052327527292932e-05,
      "loss": 2.9229,
      "step": 204125
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1433706283569336,
      "learning_rate": 1.9050893041128977e-05,
      "loss": 3.1401,
      "step": 204126
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.4359545707702637,
      "learning_rate": 1.9049458607198743e-05,
      "loss": 2.9882,
      "step": 204127
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.5479214191436768,
      "learning_rate": 1.9048024225502556e-05,
      "loss": 2.9485,
      "step": 204128
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8669962882995605,
      "learning_rate": 1.904658989604062e-05,
      "loss": 3.0593,
      "step": 204129
    },
    {
      "epoch": 2.66,
      "grad_norm": 5.405270099639893,
      "learning_rate": 1.9045155618813268e-05,
      "loss": 2.8403,
      "step": 204130
    },
    {
      "epoch": 2.66,
      "grad_norm": 6.5863037109375,
      "learning_rate": 1.9043721393820667e-05,
      "loss": 2.633,
      "step": 204131
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.520605564117432,
      "learning_rate": 1.904228722106318e-05,
      "loss": 2.849,
      "step": 204132
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.5479934215545654,
      "learning_rate": 1.9040853100540975e-05,
      "loss": 2.885,
      "step": 204133
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.5117363929748535,
      "learning_rate": 1.9039419032254423e-05,
      "loss": 2.7611,
      "step": 204134
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.909515857696533,
      "learning_rate": 1.9037985016203783e-05,
      "loss": 2.9801,
      "step": 204135
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2907845973968506,
      "learning_rate": 1.9036551052389258e-05,
      "loss": 3.0301,
      "step": 204136
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.5636181831359863,
      "learning_rate": 1.903511714081112e-05,
      "loss": 2.8642,
      "step": 204137
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.894414186477661,
      "learning_rate": 1.903368328146966e-05,
      "loss": 2.9056,
      "step": 204138
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.6365904808044434,
      "learning_rate": 1.9032249474365147e-05,
      "loss": 2.9715,
      "step": 204139
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.700772762298584,
      "learning_rate": 1.9030815719497848e-05,
      "loss": 2.7541,
      "step": 204140
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.6325292587280273,
      "learning_rate": 1.9029382016868033e-05,
      "loss": 2.7105,
      "step": 204141
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.562906503677368,
      "learning_rate": 1.9027948366475966e-05,
      "loss": 2.8633,
      "step": 204142
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.744861364364624,
      "learning_rate": 1.9026514768321875e-05,
      "loss": 3.1459,
      "step": 204143
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.954632520675659,
      "learning_rate": 1.90250812224061e-05,
      "loss": 2.9936,
      "step": 204144
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9539523124694824,
      "learning_rate": 1.9023647728728808e-05,
      "loss": 3.0425,
      "step": 204145
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.164323568344116,
      "learning_rate": 1.902221428729036e-05,
      "loss": 2.7388,
      "step": 204146
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.732795476913452,
      "learning_rate": 1.9020780898090993e-05,
      "loss": 2.7205,
      "step": 204147
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.755207061767578,
      "learning_rate": 1.901934756113097e-05,
      "loss": 2.5249,
      "step": 204148
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3466501235961914,
      "learning_rate": 1.9017914276410528e-05,
      "loss": 2.7642,
      "step": 204149
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.98075008392334,
      "learning_rate": 1.901648104393e-05,
      "loss": 3.0893,
      "step": 204150
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.665841817855835,
      "learning_rate": 1.9015047863689547e-05,
      "loss": 3.0569,
      "step": 204151
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.39564847946167,
      "learning_rate": 1.9013614735689543e-05,
      "loss": 2.8876,
      "step": 204152
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7297372817993164,
      "learning_rate": 1.901218165993025e-05,
      "loss": 2.9851,
      "step": 204153
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.423252820968628,
      "learning_rate": 1.9010748636411864e-05,
      "loss": 2.7564,
      "step": 204154
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.817667245864868,
      "learning_rate": 1.9009315665134626e-05,
      "loss": 2.8774,
      "step": 204155
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.297849178314209,
      "learning_rate": 1.9007882746098935e-05,
      "loss": 3.1397,
      "step": 204156
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6145272254943848,
      "learning_rate": 1.900644987930492e-05,
      "loss": 3.0926,
      "step": 204157
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.800820827484131,
      "learning_rate": 1.900501706475298e-05,
      "loss": 2.9872,
      "step": 204158
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7223763465881348,
      "learning_rate": 1.9003584302443287e-05,
      "loss": 3.0319,
      "step": 204159
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2827813625335693,
      "learning_rate": 1.9002151592376136e-05,
      "loss": 3.0172,
      "step": 204160
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7857964038848877,
      "learning_rate": 1.9000718934551727e-05,
      "loss": 3.1806,
      "step": 204161
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.893911361694336,
      "learning_rate": 1.8999286328970464e-05,
      "loss": 2.8417,
      "step": 204162
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6405680179595947,
      "learning_rate": 1.8997853775632476e-05,
      "loss": 2.9365,
      "step": 204163
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1008291244506836,
      "learning_rate": 1.8996421274538165e-05,
      "loss": 2.8541,
      "step": 204164
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.092857599258423,
      "learning_rate": 1.8994988825687697e-05,
      "loss": 3.171,
      "step": 204165
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6618924140930176,
      "learning_rate": 1.899355642908137e-05,
      "loss": 2.9484,
      "step": 204166
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.4969003200531006,
      "learning_rate": 1.8992124084719385e-05,
      "loss": 2.7528,
      "step": 204167
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.8336806297302246,
      "learning_rate": 1.8990691792602142e-05,
      "loss": 3.0583,
      "step": 204168
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2981479167938232,
      "learning_rate": 1.8989259552729773e-05,
      "loss": 2.897,
      "step": 204169
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.804431200027466,
      "learning_rate": 1.8987827365102647e-05,
      "loss": 3.0422,
      "step": 204170
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0240068435668945,
      "learning_rate": 1.8986395229720995e-05,
      "loss": 2.9801,
      "step": 204171
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.431358814239502,
      "learning_rate": 1.898496314658502e-05,
      "loss": 2.8802,
      "step": 204172
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.853440999984741,
      "learning_rate": 1.8983531115695116e-05,
      "loss": 3.0575,
      "step": 204173
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9027953147888184,
      "learning_rate": 1.8982099137051455e-05,
      "loss": 2.788,
      "step": 204174
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.149360418319702,
      "learning_rate": 1.8980667210654298e-05,
      "loss": 2.9732,
      "step": 204175
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.7407259941101074,
      "learning_rate": 1.8979235336503983e-05,
      "loss": 2.9746,
      "step": 204176
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.368725299835205,
      "learning_rate": 1.897780351460071e-05,
      "loss": 3.0104,
      "step": 204177
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.031871795654297,
      "learning_rate": 1.897637174494474e-05,
      "loss": 2.8461,
      "step": 204178
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9320006370544434,
      "learning_rate": 1.897494002753641e-05,
      "loss": 2.8955,
      "step": 204179
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.050879955291748,
      "learning_rate": 1.8973508362375954e-05,
      "loss": 2.9096,
      "step": 204180
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6818978786468506,
      "learning_rate": 1.8972076749463573e-05,
      "loss": 2.878,
      "step": 204181
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6765990257263184,
      "learning_rate": 1.8970645188799626e-05,
      "loss": 2.7801,
      "step": 204182
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.850921869277954,
      "learning_rate": 1.8969213680384287e-05,
      "loss": 2.822,
      "step": 204183
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.847740411758423,
      "learning_rate": 1.8967782224217955e-05,
      "loss": 2.8084,
      "step": 204184
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.5912210941314697,
      "learning_rate": 1.896635082030079e-05,
      "loss": 3.0021,
      "step": 204185
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8791098594665527,
      "learning_rate": 1.8964919468633034e-05,
      "loss": 2.6859,
      "step": 204186
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.89640212059021,
      "learning_rate": 1.8963488169215078e-05,
      "loss": 2.9931,
      "step": 204187
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6950831413269043,
      "learning_rate": 1.89620569220471e-05,
      "loss": 2.7947,
      "step": 204188
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.212404727935791,
      "learning_rate": 1.8960625727129353e-05,
      "loss": 2.9632,
      "step": 204189
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.365182876586914,
      "learning_rate": 1.8959194584462146e-05,
      "loss": 3.042,
      "step": 204190
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.5591671466827393,
      "learning_rate": 1.895776349404574e-05,
      "loss": 2.8259,
      "step": 204191
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2320034503936768,
      "learning_rate": 1.895633245588034e-05,
      "loss": 2.8723,
      "step": 204192
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9319379329681396,
      "learning_rate": 1.895490146996631e-05,
      "loss": 2.9597,
      "step": 204193
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6045918464660645,
      "learning_rate": 1.895347053630385e-05,
      "loss": 3.0547,
      "step": 204194
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.661920070648193,
      "learning_rate": 1.8952039654893258e-05,
      "loss": 2.5699,
      "step": 204195
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.854962110519409,
      "learning_rate": 1.89506088257348e-05,
      "loss": 2.8458,
      "step": 204196
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0448648929595947,
      "learning_rate": 1.894917804882875e-05,
      "loss": 2.9771,
      "step": 204197
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.584613561630249,
      "learning_rate": 1.8947747324175267e-05,
      "loss": 2.7417,
      "step": 204198
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8579201698303223,
      "learning_rate": 1.8946316651774786e-05,
      "loss": 2.9094,
      "step": 204199
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.025601863861084,
      "learning_rate": 1.8944886031627404e-05,
      "loss": 2.8553,
      "step": 204200
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.64394474029541,
      "learning_rate": 1.894345546373356e-05,
      "loss": 2.7116,
      "step": 204201
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.640115737915039,
      "learning_rate": 1.8942024948093415e-05,
      "loss": 2.8101,
      "step": 204202
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.150033473968506,
      "learning_rate": 1.8940594484707238e-05,
      "loss": 3.0338,
      "step": 204203
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9701058864593506,
      "learning_rate": 1.8939164073575296e-05,
      "loss": 2.9242,
      "step": 204204
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3234939575195312,
      "learning_rate": 1.893773371469789e-05,
      "loss": 2.785,
      "step": 204205
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.901026725769043,
      "learning_rate": 1.8936303408075214e-05,
      "loss": 2.9115,
      "step": 204206
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0217764377593994,
      "learning_rate": 1.893487315370764e-05,
      "loss": 2.8622,
      "step": 204207
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.5697147846221924,
      "learning_rate": 1.8933442951595403e-05,
      "loss": 2.8198,
      "step": 204208
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.540600538253784,
      "learning_rate": 1.8932012801738694e-05,
      "loss": 2.7708,
      "step": 204209
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.9398491382598877,
      "learning_rate": 1.893058270413782e-05,
      "loss": 2.8037,
      "step": 204210
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.855165719985962,
      "learning_rate": 1.8929152658793112e-05,
      "loss": 2.7654,
      "step": 204211
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.950232744216919,
      "learning_rate": 1.8927722665704703e-05,
      "loss": 3.0644,
      "step": 204212
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.822563886642456,
      "learning_rate": 1.8926292724872992e-05,
      "loss": 3.0185,
      "step": 204213
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.136317014694214,
      "learning_rate": 1.8924862836298182e-05,
      "loss": 2.9596,
      "step": 204214
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.902472734451294,
      "learning_rate": 1.892343299998057e-05,
      "loss": 2.8748,
      "step": 204215
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.4075968265533447,
      "learning_rate": 1.8922003215920324e-05,
      "loss": 2.8471,
      "step": 204216
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.926820755004883,
      "learning_rate": 1.892057348411784e-05,
      "loss": 3.1165,
      "step": 204217
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.727975606918335,
      "learning_rate": 1.891914380457329e-05,
      "loss": 3.0206,
      "step": 204218
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0342180728912354,
      "learning_rate": 1.8917714177287037e-05,
      "loss": 2.9972,
      "step": 204219
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1798644065856934,
      "learning_rate": 1.8916284602259247e-05,
      "loss": 2.8507,
      "step": 204220
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0825583934783936,
      "learning_rate": 1.8914855079490255e-05,
      "loss": 3.2418,
      "step": 204221
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7302753925323486,
      "learning_rate": 1.8913425608980225e-05,
      "loss": 3.054,
      "step": 204222
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.618480682373047,
      "learning_rate": 1.891199619072956e-05,
      "loss": 2.7987,
      "step": 204223
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.059180736541748,
      "learning_rate": 1.8910566824738425e-05,
      "loss": 2.9811,
      "step": 204224
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.752500295639038,
      "learning_rate": 1.8909137511007157e-05,
      "loss": 2.7349,
      "step": 204225
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7438533306121826,
      "learning_rate": 1.890770824953598e-05,
      "loss": 2.8545,
      "step": 204226
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2457830905914307,
      "learning_rate": 1.8906279040325167e-05,
      "loss": 3.0261,
      "step": 204227
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.233562469482422,
      "learning_rate": 1.890484988337495e-05,
      "loss": 3.0694,
      "step": 204228
    },
    {
      "epoch": 2.66,
      "grad_norm": 5.3874335289001465,
      "learning_rate": 1.8903420778685663e-05,
      "loss": 3.0514,
      "step": 204229
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.207594871520996,
      "learning_rate": 1.8901991726257472e-05,
      "loss": 2.9119,
      "step": 204230
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.477199077606201,
      "learning_rate": 1.8900562726090774e-05,
      "loss": 3.0593,
      "step": 204231
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6934142112731934,
      "learning_rate": 1.889913377818577e-05,
      "loss": 3.1218,
      "step": 204232
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.91904616355896,
      "learning_rate": 1.889770488254273e-05,
      "loss": 2.9253,
      "step": 204233
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.75253963470459,
      "learning_rate": 1.8896276039161852e-05,
      "loss": 2.9593,
      "step": 204234
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.492884635925293,
      "learning_rate": 1.8894847248043498e-05,
      "loss": 3.0817,
      "step": 204235
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.4216649532318115,
      "learning_rate": 1.8893418509187873e-05,
      "loss": 2.6005,
      "step": 204236
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3446578979492188,
      "learning_rate": 1.889198982259531e-05,
      "loss": 2.8791,
      "step": 204237
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3627073764801025,
      "learning_rate": 1.889056118826604e-05,
      "loss": 2.6505,
      "step": 204238
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.5693631172180176,
      "learning_rate": 1.8889132606200297e-05,
      "loss": 2.821,
      "step": 204239
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.31475567817688,
      "learning_rate": 1.8887704076398346e-05,
      "loss": 2.9982,
      "step": 204240
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.00801944732666,
      "learning_rate": 1.888627559886049e-05,
      "loss": 2.9213,
      "step": 204241
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.186203718185425,
      "learning_rate": 1.888484717358696e-05,
      "loss": 2.9375,
      "step": 204242
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6516990661621094,
      "learning_rate": 1.8883418800578122e-05,
      "loss": 2.9872,
      "step": 204243
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.180262565612793,
      "learning_rate": 1.8881990479834108e-05,
      "loss": 2.7639,
      "step": 204244
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.855895519256592,
      "learning_rate": 1.8880562211355255e-05,
      "loss": 3.024,
      "step": 204245
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2470338344573975,
      "learning_rate": 1.887913399514179e-05,
      "loss": 2.9343,
      "step": 204246
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0440948009490967,
      "learning_rate": 1.887770583119402e-05,
      "loss": 2.7637,
      "step": 204247
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0864293575286865,
      "learning_rate": 1.8876277719512145e-05,
      "loss": 3.1173,
      "step": 204248
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.167819976806641,
      "learning_rate": 1.887484966009656e-05,
      "loss": 2.7021,
      "step": 204249
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.4028854370117188,
      "learning_rate": 1.8873421652947364e-05,
      "loss": 2.9435,
      "step": 204250
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8195998668670654,
      "learning_rate": 1.887199369806499e-05,
      "loss": 3.1617,
      "step": 204251
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.918388605117798,
      "learning_rate": 1.8870565795449545e-05,
      "loss": 2.953,
      "step": 204252
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.8066325187683105,
      "learning_rate": 1.886913794510142e-05,
      "loss": 2.8488,
      "step": 204253
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8021533489227295,
      "learning_rate": 1.8867710147020786e-05,
      "loss": 2.7502,
      "step": 204254
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.5800435543060303,
      "learning_rate": 1.8866282401207977e-05,
      "loss": 3.0041,
      "step": 204255
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9946348667144775,
      "learning_rate": 1.886485470766319e-05,
      "loss": 3.1076,
      "step": 204256
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.582185983657837,
      "learning_rate": 1.8863427066386827e-05,
      "loss": 2.9987,
      "step": 204257
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9227657318115234,
      "learning_rate": 1.8861999477378988e-05,
      "loss": 2.9241,
      "step": 204258
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3922247886657715,
      "learning_rate": 1.8860571940640033e-05,
      "loss": 3.0204,
      "step": 204259
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.077348232269287,
      "learning_rate": 1.885914445617017e-05,
      "loss": 2.8657,
      "step": 204260
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.804427146911621,
      "learning_rate": 1.8857717023969766e-05,
      "loss": 2.8327,
      "step": 204261
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8061928749084473,
      "learning_rate": 1.8856289644038948e-05,
      "loss": 2.9282,
      "step": 204262
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9575846195220947,
      "learning_rate": 1.8854862316378082e-05,
      "loss": 3.0174,
      "step": 204263
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.68101167678833,
      "learning_rate": 1.885343504098744e-05,
      "loss": 2.9485,
      "step": 204264
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3992574214935303,
      "learning_rate": 1.8852007817867222e-05,
      "loss": 2.9063,
      "step": 204265
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3987343311309814,
      "learning_rate": 1.8850580647017688e-05,
      "loss": 2.9893,
      "step": 204266
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.905093193054199,
      "learning_rate": 1.8849153528439177e-05,
      "loss": 3.0912,
      "step": 204267
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.709876775741577,
      "learning_rate": 1.8847726462131886e-05,
      "loss": 2.7572,
      "step": 204268
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1765213012695312,
      "learning_rate": 1.8846299448096147e-05,
      "loss": 2.71,
      "step": 204269
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.207495927810669,
      "learning_rate": 1.8844872486332196e-05,
      "loss": 2.9432,
      "step": 204270
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.008265972137451,
      "learning_rate": 1.884344557684023e-05,
      "loss": 2.8799,
      "step": 204271
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.087785243988037,
      "learning_rate": 1.884201871962062e-05,
      "loss": 2.9211,
      "step": 204272
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.067854404449463,
      "learning_rate": 1.8840591914673596e-05,
      "loss": 3.0596,
      "step": 204273
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9039981365203857,
      "learning_rate": 1.883916516199936e-05,
      "loss": 2.9067,
      "step": 204274
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.254517078399658,
      "learning_rate": 1.8837738461598273e-05,
      "loss": 2.6823,
      "step": 204275
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.156219720840454,
      "learning_rate": 1.8836311813470572e-05,
      "loss": 2.7133,
      "step": 204276
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.526766061782837,
      "learning_rate": 1.8834885217616456e-05,
      "loss": 2.978,
      "step": 204277
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.844470500946045,
      "learning_rate": 1.8833458674036294e-05,
      "loss": 2.9748,
      "step": 204278
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.767683267593384,
      "learning_rate": 1.8832032182730285e-05,
      "loss": 2.8012,
      "step": 204279
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.4471046924591064,
      "learning_rate": 1.883060574369869e-05,
      "loss": 2.8651,
      "step": 204280
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8380866050720215,
      "learning_rate": 1.8829179356941814e-05,
      "loss": 2.9742,
      "step": 204281
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.6748104095458984,
      "learning_rate": 1.8827753022459923e-05,
      "loss": 2.9658,
      "step": 204282
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8763437271118164,
      "learning_rate": 1.8826326740253183e-05,
      "loss": 2.8412,
      "step": 204283
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0801239013671875,
      "learning_rate": 1.8824900510321996e-05,
      "loss": 2.9229,
      "step": 204284
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.44915246963501,
      "learning_rate": 1.882347433266652e-05,
      "loss": 2.6069,
      "step": 204285
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8123958110809326,
      "learning_rate": 1.882204820728713e-05,
      "loss": 3.0614,
      "step": 204286
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9237942695617676,
      "learning_rate": 1.8820622134184026e-05,
      "loss": 2.9015,
      "step": 204287
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.497004508972168,
      "learning_rate": 1.8819196113357437e-05,
      "loss": 3.0291,
      "step": 204288
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.8320181369781494,
      "learning_rate": 1.8817770144807664e-05,
      "loss": 2.9451,
      "step": 204289
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.268984794616699,
      "learning_rate": 1.8816344228535007e-05,
      "loss": 3.0338,
      "step": 204290
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6263232231140137,
      "learning_rate": 1.8814918364539668e-05,
      "loss": 2.9633,
      "step": 204291
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7668912410736084,
      "learning_rate": 1.8813492552821972e-05,
      "loss": 2.9289,
      "step": 204292
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1212007999420166,
      "learning_rate": 1.8812066793382164e-05,
      "loss": 2.9451,
      "step": 204293
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8479244709014893,
      "learning_rate": 1.8810641086220468e-05,
      "loss": 3.0604,
      "step": 204294
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.819121360778809,
      "learning_rate": 1.8809215431337187e-05,
      "loss": 2.8428,
      "step": 204295
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.921064853668213,
      "learning_rate": 1.880778982873259e-05,
      "loss": 2.9297,
      "step": 204296
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.085115909576416,
      "learning_rate": 1.8806364278406904e-05,
      "loss": 2.9229,
      "step": 204297
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2867467403411865,
      "learning_rate": 1.8804938780360467e-05,
      "loss": 3.035,
      "step": 204298
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.801396369934082,
      "learning_rate": 1.880351333459351e-05,
      "loss": 2.8537,
      "step": 204299
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3628668785095215,
      "learning_rate": 1.8802087941106268e-05,
      "loss": 2.6008,
      "step": 204300
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6575570106506348,
      "learning_rate": 1.8800662599898976e-05,
      "loss": 2.8029,
      "step": 204301
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.098204135894775,
      "learning_rate": 1.8799237310972025e-05,
      "loss": 3.1091,
      "step": 204302
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8536860942840576,
      "learning_rate": 1.8797812074325526e-05,
      "loss": 2.9453,
      "step": 204303
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8692593574523926,
      "learning_rate": 1.879638688995987e-05,
      "loss": 2.9874,
      "step": 204304
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.873878002166748,
      "learning_rate": 1.8794961757875295e-05,
      "loss": 2.8573,
      "step": 204305
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0606465339660645,
      "learning_rate": 1.879353667807203e-05,
      "loss": 2.8027,
      "step": 204306
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.624161720275879,
      "learning_rate": 1.8792111650550312e-05,
      "loss": 2.8612,
      "step": 204307
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8166587352752686,
      "learning_rate": 1.8790686675310507e-05,
      "loss": 2.9674,
      "step": 204308
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.450174570083618,
      "learning_rate": 1.8789261752352746e-05,
      "loss": 2.8092,
      "step": 204309
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.6377689838409424,
      "learning_rate": 1.878783688167743e-05,
      "loss": 2.7758,
      "step": 204310
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.345933675765991,
      "learning_rate": 1.8786412063284796e-05,
      "loss": 3.0811,
      "step": 204311
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.408721446990967,
      "learning_rate": 1.8784987297175037e-05,
      "loss": 3.0064,
      "step": 204312
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6159350872039795,
      "learning_rate": 1.8783562583348423e-05,
      "loss": 3.1409,
      "step": 204313
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2685840129852295,
      "learning_rate": 1.8782137921805284e-05,
      "loss": 2.8252,
      "step": 204314
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8835253715515137,
      "learning_rate": 1.8780713312545827e-05,
      "loss": 2.8918,
      "step": 204315
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1881284713745117,
      "learning_rate": 1.8779288755570376e-05,
      "loss": 3.0318,
      "step": 204316
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2913033962249756,
      "learning_rate": 1.8777864250879138e-05,
      "loss": 2.943,
      "step": 204317
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6456451416015625,
      "learning_rate": 1.8776439798472476e-05,
      "loss": 2.8783,
      "step": 204318
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.009401559829712,
      "learning_rate": 1.8775015398350525e-05,
      "loss": 2.75,
      "step": 204319
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.8881783485412598,
      "learning_rate": 1.8773591050513614e-05,
      "loss": 3.1091,
      "step": 204320
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.531630754470825,
      "learning_rate": 1.8772166754961946e-05,
      "loss": 3.0853,
      "step": 204321
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0686848163604736,
      "learning_rate": 1.8770742511695925e-05,
      "loss": 3.0021,
      "step": 204322
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.9365832805633545,
      "learning_rate": 1.8769318320715675e-05,
      "loss": 3.0416,
      "step": 204323
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.6019351482391357,
      "learning_rate": 1.8767894182021604e-05,
      "loss": 2.7261,
      "step": 204324
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1953091621398926,
      "learning_rate": 1.8766470095613806e-05,
      "loss": 2.9091,
      "step": 204325
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.245447158813477,
      "learning_rate": 1.876504606149265e-05,
      "loss": 2.9991,
      "step": 204326
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.908155679702759,
      "learning_rate": 1.8763622079658368e-05,
      "loss": 2.8025,
      "step": 204327
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.112647771835327,
      "learning_rate": 1.8762198150111263e-05,
      "loss": 2.8321,
      "step": 204328
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.950181484222412,
      "learning_rate": 1.8760774272851532e-05,
      "loss": 2.7264,
      "step": 204329
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.895613193511963,
      "learning_rate": 1.8759350447879572e-05,
      "loss": 2.9035,
      "step": 204330
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.9426968097686768,
      "learning_rate": 1.8757926675195454e-05,
      "loss": 3.1128,
      "step": 204331
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.098942279815674,
      "learning_rate": 1.8756502954799612e-05,
      "loss": 2.8327,
      "step": 204332
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.4494946002960205,
      "learning_rate": 1.8755079286692208e-05,
      "loss": 2.9739,
      "step": 204333
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.953730344772339,
      "learning_rate": 1.8753655670873547e-05,
      "loss": 2.7751,
      "step": 204334
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.6540660858154297,
      "learning_rate": 1.875223210734389e-05,
      "loss": 2.9107,
      "step": 204335
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6565983295440674,
      "learning_rate": 1.8750808596103572e-05,
      "loss": 2.9464,
      "step": 204336
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2952449321746826,
      "learning_rate": 1.8749385137152696e-05,
      "loss": 2.7793,
      "step": 204337
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.189927816390991,
      "learning_rate": 1.8747961730491657e-05,
      "loss": 2.943,
      "step": 204338
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.646055221557617,
      "learning_rate": 1.8746538376120655e-05,
      "loss": 2.8368,
      "step": 204339
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.49785852432251,
      "learning_rate": 1.874511507404003e-05,
      "loss": 2.8654,
      "step": 204340
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.960979461669922,
      "learning_rate": 1.874369182424994e-05,
      "loss": 2.7767,
      "step": 204341
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.69482946395874,
      "learning_rate": 1.874226862675079e-05,
      "loss": 2.7802,
      "step": 204342
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.15348219871521,
      "learning_rate": 1.8740845481542677e-05,
      "loss": 2.5965,
      "step": 204343
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.5616207122802734,
      "learning_rate": 1.8739422388626003e-05,
      "loss": 2.8554,
      "step": 204344
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.7087111473083496,
      "learning_rate": 1.873799934800093e-05,
      "loss": 2.8326,
      "step": 204345
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.7798471450805664,
      "learning_rate": 1.8736576359667798e-05,
      "loss": 2.8294,
      "step": 204346
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.9585227966308594,
      "learning_rate": 1.8735153423626836e-05,
      "loss": 2.7486,
      "step": 204347
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1327972412109375,
      "learning_rate": 1.8733730539878345e-05,
      "loss": 3.0074,
      "step": 204348
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.238454818725586,
      "learning_rate": 1.8732307708422557e-05,
      "loss": 2.9099,
      "step": 204349
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7776756286621094,
      "learning_rate": 1.8730884929259738e-05,
      "loss": 2.9454,
      "step": 204350
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8342995643615723,
      "learning_rate": 1.872946220239012e-05,
      "loss": 2.7806,
      "step": 204351
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.167174816131592,
      "learning_rate": 1.8728039527814044e-05,
      "loss": 2.7774,
      "step": 204352
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9870920181274414,
      "learning_rate": 1.87266169055317e-05,
      "loss": 3.0444,
      "step": 204353
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7643420696258545,
      "learning_rate": 1.8725194335543427e-05,
      "loss": 3.0277,
      "step": 204354
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.071299076080322,
      "learning_rate": 1.8723771817849453e-05,
      "loss": 2.8783,
      "step": 204355
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.716529369354248,
      "learning_rate": 1.8722349352450016e-05,
      "loss": 2.9934,
      "step": 204356
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.047351121902466,
      "learning_rate": 1.8720926939345417e-05,
      "loss": 3.0377,
      "step": 204357
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0732085704803467,
      "learning_rate": 1.8719504578535914e-05,
      "loss": 2.733,
      "step": 204358
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.165952205657959,
      "learning_rate": 1.8718082270021717e-05,
      "loss": 3.0282,
      "step": 204359
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.708975076675415,
      "learning_rate": 1.8716660013803185e-05,
      "loss": 2.879,
      "step": 204360
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.4117822647094727,
      "learning_rate": 1.8715237809880555e-05,
      "loss": 2.9076,
      "step": 204361
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2564709186553955,
      "learning_rate": 1.8713815658253994e-05,
      "loss": 2.8452,
      "step": 204362
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.5533480644226074,
      "learning_rate": 1.87123935589239e-05,
      "loss": 2.9075,
      "step": 204363
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8519678115844727,
      "learning_rate": 1.8710971511890506e-05,
      "loss": 2.7044,
      "step": 204364
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.763097047805786,
      "learning_rate": 1.8709549517153975e-05,
      "loss": 2.8058,
      "step": 204365
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0108816623687744,
      "learning_rate": 1.8708127574714716e-05,
      "loss": 2.8894,
      "step": 204366
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3167660236358643,
      "learning_rate": 1.870670568457292e-05,
      "loss": 3.1741,
      "step": 204367
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9220428466796875,
      "learning_rate": 1.870528384672879e-05,
      "loss": 3.1961,
      "step": 204368
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7765326499938965,
      "learning_rate": 1.870386206118273e-05,
      "loss": 2.8107,
      "step": 204369
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0449013710021973,
      "learning_rate": 1.87024403279349e-05,
      "loss": 2.7099,
      "step": 204370
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.338259696960449,
      "learning_rate": 1.87010186469856e-05,
      "loss": 3.0868,
      "step": 204371
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7672500610351562,
      "learning_rate": 1.86995970183351e-05,
      "loss": 2.7527,
      "step": 204372
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.236642837524414,
      "learning_rate": 1.8698175441983664e-05,
      "loss": 3.2178,
      "step": 204373
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.201954364776611,
      "learning_rate": 1.8696753917931528e-05,
      "loss": 3.0873,
      "step": 204374
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0343329906463623,
      "learning_rate": 1.869533244617899e-05,
      "loss": 2.8922,
      "step": 204375
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.516972541809082,
      "learning_rate": 1.869391102672625e-05,
      "loss": 3.0396,
      "step": 204376
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.814925193786621,
      "learning_rate": 1.8692489659573674e-05,
      "loss": 3.0742,
      "step": 204377
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.044833660125732,
      "learning_rate": 1.8691068344721495e-05,
      "loss": 2.9656,
      "step": 204378
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9913103580474854,
      "learning_rate": 1.8689647082169945e-05,
      "loss": 2.9024,
      "step": 204379
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0048632621765137,
      "learning_rate": 1.8688225871919226e-05,
      "loss": 2.9148,
      "step": 204380
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0471458435058594,
      "learning_rate": 1.8686804713969773e-05,
      "loss": 3.0417,
      "step": 204381
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.51718807220459,
      "learning_rate": 1.868538360832168e-05,
      "loss": 2.7902,
      "step": 204382
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0889079570770264,
      "learning_rate": 1.8683962554975318e-05,
      "loss": 2.9763,
      "step": 204383
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.873596429824829,
      "learning_rate": 1.8682541553930886e-05,
      "loss": 2.7829,
      "step": 204384
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.403393268585205,
      "learning_rate": 1.868112060518875e-05,
      "loss": 2.9376,
      "step": 204385
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7605373859405518,
      "learning_rate": 1.867969970874904e-05,
      "loss": 3.0543,
      "step": 204386
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.263394594192505,
      "learning_rate": 1.8678278864612127e-05,
      "loss": 2.848,
      "step": 204387
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.253028154373169,
      "learning_rate": 1.8676858072778178e-05,
      "loss": 3.0229,
      "step": 204388
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6033823490142822,
      "learning_rate": 1.8675437333247554e-05,
      "loss": 3.0165,
      "step": 204389
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2670841217041016,
      "learning_rate": 1.867401664602043e-05,
      "loss": 3.0339,
      "step": 204390
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.866697072982788,
      "learning_rate": 1.867259601109723e-05,
      "loss": 2.9281,
      "step": 204391
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.082139015197754,
      "learning_rate": 1.8671175428477993e-05,
      "loss": 2.971,
      "step": 204392
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.050870418548584,
      "learning_rate": 1.866975489816315e-05,
      "loss": 2.9985,
      "step": 204393
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3218932151794434,
      "learning_rate": 1.8668334420152864e-05,
      "loss": 3.0998,
      "step": 204394
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.237558126449585,
      "learning_rate": 1.8666913994447473e-05,
      "loss": 3.0309,
      "step": 204395
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9534928798675537,
      "learning_rate": 1.866549362104718e-05,
      "loss": 2.9355,
      "step": 204396
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1405029296875,
      "learning_rate": 1.8664073299952377e-05,
      "loss": 2.959,
      "step": 204397
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.831794023513794,
      "learning_rate": 1.8662653031163133e-05,
      "loss": 2.7332,
      "step": 204398
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8986823558807373,
      "learning_rate": 1.8661232814679885e-05,
      "loss": 2.9009,
      "step": 204399
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.9424803256988525,
      "learning_rate": 1.865981265050276e-05,
      "loss": 2.9336,
      "step": 204400
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.461301565170288,
      "learning_rate": 1.8658392538632126e-05,
      "loss": 3.0409,
      "step": 204401
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.706203937530518,
      "learning_rate": 1.8656972479068155e-05,
      "loss": 2.5876,
      "step": 204402
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6254680156707764,
      "learning_rate": 1.8655552471811274e-05,
      "loss": 2.9293,
      "step": 204403
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1279659271240234,
      "learning_rate": 1.8654132516861553e-05,
      "loss": 3.304,
      "step": 204404
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.544295310974121,
      "learning_rate": 1.8652712614219357e-05,
      "loss": 3.2078,
      "step": 204405
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.5353000164031982,
      "learning_rate": 1.8651292763884917e-05,
      "loss": 2.8622,
      "step": 204406
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.7674310207366943,
      "learning_rate": 1.8649872965858537e-05,
      "loss": 2.7623,
      "step": 204407
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.874941825866699,
      "learning_rate": 1.8648453220140415e-05,
      "loss": 2.8009,
      "step": 204408
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7407143115997314,
      "learning_rate": 1.8647033526730948e-05,
      "loss": 3.0508,
      "step": 204409
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3197646141052246,
      "learning_rate": 1.8645613885630208e-05,
      "loss": 2.8451,
      "step": 204410
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0584146976470947,
      "learning_rate": 1.864419429683862e-05,
      "loss": 2.8402,
      "step": 204411
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7438929080963135,
      "learning_rate": 1.8642774760356326e-05,
      "loss": 2.8548,
      "step": 204412
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8423831462860107,
      "learning_rate": 1.8641355276183723e-05,
      "loss": 3.059,
      "step": 204413
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8394484519958496,
      "learning_rate": 1.863993584432094e-05,
      "loss": 3.0951,
      "step": 204414
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.342165946960449,
      "learning_rate": 1.8638516464768382e-05,
      "loss": 2.843,
      "step": 204415
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8428099155426025,
      "learning_rate": 1.863709713752618e-05,
      "loss": 3.1377,
      "step": 204416
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8208580017089844,
      "learning_rate": 1.8635677862594666e-05,
      "loss": 2.6308,
      "step": 204417
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8467040061950684,
      "learning_rate": 1.8634258639974043e-05,
      "loss": 3.1171,
      "step": 204418
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.099231004714966,
      "learning_rate": 1.8632839469664708e-05,
      "loss": 3.227,
      "step": 204419
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0653090476989746,
      "learning_rate": 1.863142035166676e-05,
      "loss": 3.0484,
      "step": 204420
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0430192947387695,
      "learning_rate": 1.8630001285980633e-05,
      "loss": 2.7916,
      "step": 204421
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2301666736602783,
      "learning_rate": 1.8628582272606428e-05,
      "loss": 3.0411,
      "step": 204422
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.081028938293457,
      "learning_rate": 1.8627163311544514e-05,
      "loss": 3.1295,
      "step": 204423
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7374892234802246,
      "learning_rate": 1.862574440279505e-05,
      "loss": 2.856,
      "step": 204424
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.5812277793884277,
      "learning_rate": 1.8624325546358442e-05,
      "loss": 2.6159,
      "step": 204425
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.868771553039551,
      "learning_rate": 1.8622906742234857e-05,
      "loss": 3.1977,
      "step": 204426
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0227878093719482,
      "learning_rate": 1.8621487990424653e-05,
      "loss": 3.011,
      "step": 204427
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7715485095977783,
      "learning_rate": 1.862006929092794e-05,
      "loss": 2.9185,
      "step": 204428
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.927523612976074,
      "learning_rate": 1.8618650643745113e-05,
      "loss": 2.9585,
      "step": 204429
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.4722819328308105,
      "learning_rate": 1.861723204887634e-05,
      "loss": 2.8841,
      "step": 204430
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.061432123184204,
      "learning_rate": 1.8615813506321985e-05,
      "loss": 2.7728,
      "step": 204431
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1498219966888428,
      "learning_rate": 1.8614395016082217e-05,
      "loss": 2.6789,
      "step": 204432
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.009746551513672,
      "learning_rate": 1.8612976578157402e-05,
      "loss": 2.8851,
      "step": 204433
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.5793824195861816,
      "learning_rate": 1.8611558192547738e-05,
      "loss": 2.8399,
      "step": 204434
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7179410457611084,
      "learning_rate": 1.861013985925349e-05,
      "loss": 2.8582,
      "step": 204435
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.457786798477173,
      "learning_rate": 1.86087215782749e-05,
      "loss": 3.0396,
      "step": 204436
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.4706978797912598,
      "learning_rate": 1.860730334961229e-05,
      "loss": 2.919,
      "step": 204437
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9392571449279785,
      "learning_rate": 1.8605885173265834e-05,
      "loss": 2.9697,
      "step": 204438
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.6810710430145264,
      "learning_rate": 1.8604467049235925e-05,
      "loss": 2.9228,
      "step": 204439
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.151244878768921,
      "learning_rate": 1.860304897752274e-05,
      "loss": 3.031,
      "step": 204440
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.4780666828155518,
      "learning_rate": 1.8601630958126567e-05,
      "loss": 2.9685,
      "step": 204441
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.959728240966797,
      "learning_rate": 1.860021299104765e-05,
      "loss": 2.8706,
      "step": 204442
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.635075569152832,
      "learning_rate": 1.8598795076286276e-05,
      "loss": 2.7248,
      "step": 204443
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.438154458999634,
      "learning_rate": 1.8597377213842656e-05,
      "loss": 3.1603,
      "step": 204444
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.4386467933654785,
      "learning_rate": 1.859595940371712e-05,
      "loss": 2.8853,
      "step": 204445
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.327601909637451,
      "learning_rate": 1.8594541645909933e-05,
      "loss": 2.7421,
      "step": 204446
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6184966564178467,
      "learning_rate": 1.8593123940421294e-05,
      "loss": 2.804,
      "step": 204447
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.119755744934082,
      "learning_rate": 1.859170628725154e-05,
      "loss": 2.9615,
      "step": 204448
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.808849811553955,
      "learning_rate": 1.8590288686400902e-05,
      "loss": 2.7053,
      "step": 204449
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0831832885742188,
      "learning_rate": 1.8588871137869576e-05,
      "loss": 3.0248,
      "step": 204450
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.987384796142578,
      "learning_rate": 1.8587453641657967e-05,
      "loss": 2.9231,
      "step": 204451
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8180959224700928,
      "learning_rate": 1.858603619776624e-05,
      "loss": 2.7275,
      "step": 204452
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0732269287109375,
      "learning_rate": 1.858461880619466e-05,
      "loss": 2.945,
      "step": 204453
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8926923274993896,
      "learning_rate": 1.8583201466943532e-05,
      "loss": 3.0827,
      "step": 204454
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.105501651763916,
      "learning_rate": 1.858178418001308e-05,
      "loss": 3.0773,
      "step": 204455
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.976536750793457,
      "learning_rate": 1.8580366945403613e-05,
      "loss": 3.1151,
      "step": 204456
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9224631786346436,
      "learning_rate": 1.8578949763115357e-05,
      "loss": 2.9398,
      "step": 204457
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2821202278137207,
      "learning_rate": 1.8577532633148618e-05,
      "loss": 2.7544,
      "step": 204458
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0599524974823,
      "learning_rate": 1.8576115555503557e-05,
      "loss": 3.0057,
      "step": 204459
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.035757064819336,
      "learning_rate": 1.8574698530180542e-05,
      "loss": 2.9981,
      "step": 204460
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.9343416690826416,
      "learning_rate": 1.8573281557179776e-05,
      "loss": 2.7187,
      "step": 204461
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.724618434906006,
      "learning_rate": 1.857186463650162e-05,
      "loss": 3.0329,
      "step": 204462
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8592138290405273,
      "learning_rate": 1.8570447768146248e-05,
      "loss": 2.8968,
      "step": 204463
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3293802738189697,
      "learning_rate": 1.8569030952113917e-05,
      "loss": 3.0064,
      "step": 204464
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8832180500030518,
      "learning_rate": 1.85676141884049e-05,
      "loss": 2.9066,
      "step": 204465
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9588751792907715,
      "learning_rate": 1.8566197477019496e-05,
      "loss": 2.897,
      "step": 204466
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8359999656677246,
      "learning_rate": 1.8564780817957936e-05,
      "loss": 3.0255,
      "step": 204467
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.840446710586548,
      "learning_rate": 1.856336421122052e-05,
      "loss": 2.811,
      "step": 204468
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8586583137512207,
      "learning_rate": 1.856194765680745e-05,
      "loss": 2.8488,
      "step": 204469
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6981468200683594,
      "learning_rate": 1.8560531154719093e-05,
      "loss": 2.659,
      "step": 204470
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.268089771270752,
      "learning_rate": 1.855911470495558e-05,
      "loss": 2.8546,
      "step": 204471
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2941336631774902,
      "learning_rate": 1.8557698307517278e-05,
      "loss": 2.8242,
      "step": 204472
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.955185890197754,
      "learning_rate": 1.8556281962404383e-05,
      "loss": 2.9371,
      "step": 204473
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1367146968841553,
      "learning_rate": 1.8554865669617202e-05,
      "loss": 2.642,
      "step": 204474
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.748601913452148,
      "learning_rate": 1.8553449429155963e-05,
      "loss": 2.7993,
      "step": 204475
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9601614475250244,
      "learning_rate": 1.8552033241021035e-05,
      "loss": 2.9979,
      "step": 204476
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1186771392822266,
      "learning_rate": 1.855061710521252e-05,
      "loss": 2.8674,
      "step": 204477
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3692405223846436,
      "learning_rate": 1.8549201021730774e-05,
      "loss": 2.7708,
      "step": 204478
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7400214672088623,
      "learning_rate": 1.8547784990576007e-05,
      "loss": 2.7205,
      "step": 204479
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.899470567703247,
      "learning_rate": 1.8546369011748585e-05,
      "loss": 2.9483,
      "step": 204480
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.138801574707031,
      "learning_rate": 1.8544953085248636e-05,
      "loss": 2.8461,
      "step": 204481
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.79724383354187,
      "learning_rate": 1.8543537211076596e-05,
      "loss": 2.7464,
      "step": 204482
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9027233123779297,
      "learning_rate": 1.8542121389232535e-05,
      "loss": 3.0608,
      "step": 204483
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6926069259643555,
      "learning_rate": 1.8540705619716844e-05,
      "loss": 3.1934,
      "step": 204484
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.5952248573303223,
      "learning_rate": 1.853928990252973e-05,
      "loss": 3.1004,
      "step": 204485
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3229148387908936,
      "learning_rate": 1.8537874237671492e-05,
      "loss": 3.0993,
      "step": 204486
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.938754081726074,
      "learning_rate": 1.8536458625142326e-05,
      "loss": 2.8495,
      "step": 204487
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.4973459243774414,
      "learning_rate": 1.853504306494267e-05,
      "loss": 3.0258,
      "step": 204488
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.806309700012207,
      "learning_rate": 1.853362755707255e-05,
      "loss": 2.9026,
      "step": 204489
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9793405532836914,
      "learning_rate": 1.8532212101532374e-05,
      "loss": 3.1765,
      "step": 204490
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.138742208480835,
      "learning_rate": 1.8530796698322335e-05,
      "loss": 3.1925,
      "step": 204491
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2660796642303467,
      "learning_rate": 1.8529381347442805e-05,
      "loss": 2.9119,
      "step": 204492
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.767223596572876,
      "learning_rate": 1.8527966048893916e-05,
      "loss": 3.0854,
      "step": 204493
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.765967845916748,
      "learning_rate": 1.8526550802676064e-05,
      "loss": 3.1745,
      "step": 204494
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.51357102394104,
      "learning_rate": 1.8525135608789355e-05,
      "loss": 2.8133,
      "step": 204495
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.6170575618743896,
      "learning_rate": 1.8523720467234183e-05,
      "loss": 2.9433,
      "step": 204496
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9597301483154297,
      "learning_rate": 1.8522305378010747e-05,
      "loss": 2.9079,
      "step": 204497
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1408145427703857,
      "learning_rate": 1.8520890341119355e-05,
      "loss": 2.9756,
      "step": 204498
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8141489028930664,
      "learning_rate": 1.85194753565602e-05,
      "loss": 2.9861,
      "step": 204499
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.318871259689331,
      "learning_rate": 1.8518060424333646e-05,
      "loss": 2.9012,
      "step": 204500
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6202428340911865,
      "learning_rate": 1.8516645544439834e-05,
      "loss": 3.0461,
      "step": 204501
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1377012729644775,
      "learning_rate": 1.851523071687916e-05,
      "loss": 2.9911,
      "step": 204502
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0154082775115967,
      "learning_rate": 1.851381594165172e-05,
      "loss": 3.107,
      "step": 204503
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.870875835418701,
      "learning_rate": 1.8512401218757956e-05,
      "loss": 2.5994,
      "step": 204504
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.833282947540283,
      "learning_rate": 1.8510986548197993e-05,
      "loss": 2.7223,
      "step": 204505
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.606968402862549,
      "learning_rate": 1.8509571929972266e-05,
      "loss": 3.0921,
      "step": 204506
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.251757860183716,
      "learning_rate": 1.850815736408081e-05,
      "loss": 3.0974,
      "step": 204507
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9784016609191895,
      "learning_rate": 1.850674285052406e-05,
      "loss": 2.7491,
      "step": 204508
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7410104274749756,
      "learning_rate": 1.8505328389302143e-05,
      "loss": 3.0746,
      "step": 204509
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2990968227386475,
      "learning_rate": 1.8503913980415462e-05,
      "loss": 2.927,
      "step": 204510
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9844062328338623,
      "learning_rate": 1.8502499623864186e-05,
      "loss": 2.8437,
      "step": 204511
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7775650024414062,
      "learning_rate": 1.8501085319648713e-05,
      "loss": 2.7961,
      "step": 204512
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6516897678375244,
      "learning_rate": 1.8499671067769072e-05,
      "loss": 2.6416,
      "step": 204513
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.5833029747009277,
      "learning_rate": 1.8498256868225703e-05,
      "loss": 3.0738,
      "step": 204514
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8048477172851562,
      "learning_rate": 1.8496842721018802e-05,
      "loss": 2.9562,
      "step": 204515
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0732715129852295,
      "learning_rate": 1.8495428626148702e-05,
      "loss": 2.6399,
      "step": 204516
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0064175128936768,
      "learning_rate": 1.8494014583615534e-05,
      "loss": 2.6213,
      "step": 204517
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.034449815750122,
      "learning_rate": 1.8492600593419705e-05,
      "loss": 2.8138,
      "step": 204518
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7104296684265137,
      "learning_rate": 1.849118665556144e-05,
      "loss": 2.8897,
      "step": 204519
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.043153762817383,
      "learning_rate": 1.8489772770040946e-05,
      "loss": 2.8064,
      "step": 204520
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7819080352783203,
      "learning_rate": 1.8488358936858483e-05,
      "loss": 3.0065,
      "step": 204521
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9594180583953857,
      "learning_rate": 1.848694515601439e-05,
      "loss": 3.1544,
      "step": 204522
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6708030700683594,
      "learning_rate": 1.8485531427508827e-05,
      "loss": 3.089,
      "step": 204523
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7299699783325195,
      "learning_rate": 1.8484117751342197e-05,
      "loss": 3.1787,
      "step": 204524
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.439545154571533,
      "learning_rate": 1.848270412751467e-05,
      "loss": 2.9304,
      "step": 204525
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8669652938842773,
      "learning_rate": 1.8481290556026506e-05,
      "loss": 2.8605,
      "step": 204526
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.272872447967529,
      "learning_rate": 1.8479877036877944e-05,
      "loss": 3.0364,
      "step": 204527
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.134982109069824,
      "learning_rate": 1.8478463570069347e-05,
      "loss": 2.9725,
      "step": 204528
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9675180912017822,
      "learning_rate": 1.847705015560088e-05,
      "loss": 2.7278,
      "step": 204529
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.296109437942505,
      "learning_rate": 1.8475636793472847e-05,
      "loss": 2.8827,
      "step": 204530
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6861960887908936,
      "learning_rate": 1.8474223483685547e-05,
      "loss": 2.8636,
      "step": 204531
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.843395948410034,
      "learning_rate": 1.8472810226239144e-05,
      "loss": 2.9343,
      "step": 204532
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.341317653656006,
      "learning_rate": 1.8471397021134003e-05,
      "loss": 2.9876,
      "step": 204533
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1942296028137207,
      "learning_rate": 1.8469983868370362e-05,
      "loss": 3.0238,
      "step": 204534
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.063077211380005,
      "learning_rate": 1.8468570767948387e-05,
      "loss": 2.8728,
      "step": 204535
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3966362476348877,
      "learning_rate": 1.8467157719868508e-05,
      "loss": 2.8451,
      "step": 204536
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0324277877807617,
      "learning_rate": 1.846574472413086e-05,
      "loss": 3.0821,
      "step": 204537
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7023065090179443,
      "learning_rate": 1.846433178073571e-05,
      "loss": 3.0945,
      "step": 204538
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9894626140594482,
      "learning_rate": 1.846291888968342e-05,
      "loss": 2.9068,
      "step": 204539
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6603753566741943,
      "learning_rate": 1.846150605097416e-05,
      "loss": 2.8751,
      "step": 204540
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.970402717590332,
      "learning_rate": 1.8460093264608167e-05,
      "loss": 2.7387,
      "step": 204541
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.522814989089966,
      "learning_rate": 1.8458680530585835e-05,
      "loss": 2.9527,
      "step": 204542
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.47312593460083,
      "learning_rate": 1.845726784890733e-05,
      "loss": 3.182,
      "step": 204543
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1617672443389893,
      "learning_rate": 1.845585521957289e-05,
      "loss": 3.025,
      "step": 204544
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.491276979446411,
      "learning_rate": 1.845444264258288e-05,
      "loss": 2.8985,
      "step": 204545
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.602350950241089,
      "learning_rate": 1.8453030117937462e-05,
      "loss": 2.9076,
      "step": 204546
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.722992420196533,
      "learning_rate": 1.845161764563694e-05,
      "loss": 2.9602,
      "step": 204547
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9687952995300293,
      "learning_rate": 1.8450205225681614e-05,
      "loss": 2.58,
      "step": 204548
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8699185848236084,
      "learning_rate": 1.8448792858071714e-05,
      "loss": 3.053,
      "step": 204549
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1396596431732178,
      "learning_rate": 1.8447380542807445e-05,
      "loss": 3.0135,
      "step": 204550
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.843496322631836,
      "learning_rate": 1.8445968279889168e-05,
      "loss": 2.7233,
      "step": 204551
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.036736011505127,
      "learning_rate": 1.8444556069317052e-05,
      "loss": 3.065,
      "step": 204552
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.5713424682617188,
      "learning_rate": 1.844314391109146e-05,
      "loss": 3.0496,
      "step": 204553
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.86960768699646,
      "learning_rate": 1.8441731805212533e-05,
      "loss": 2.7246,
      "step": 204554
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9909749031066895,
      "learning_rate": 1.8440319751680733e-05,
      "loss": 3.2276,
      "step": 204555
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.693908214569092,
      "learning_rate": 1.8438907750496057e-05,
      "loss": 2.9402,
      "step": 204556
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8629631996154785,
      "learning_rate": 1.8437495801658975e-05,
      "loss": 2.845,
      "step": 204557
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0459485054016113,
      "learning_rate": 1.8436083905169617e-05,
      "loss": 3.0302,
      "step": 204558
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.617741107940674,
      "learning_rate": 1.8434672061028387e-05,
      "loss": 3.0302,
      "step": 204559
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.090256452560425,
      "learning_rate": 1.8433260269235384e-05,
      "loss": 2.9531,
      "step": 204560
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.654019594192505,
      "learning_rate": 1.843184852979107e-05,
      "loss": 2.9712,
      "step": 204561
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8641741275787354,
      "learning_rate": 1.8430436842695485e-05,
      "loss": 2.9303,
      "step": 204562
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.201322078704834,
      "learning_rate": 1.8429025207949022e-05,
      "loss": 3.1345,
      "step": 204563
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.023163080215454,
      "learning_rate": 1.842761362555192e-05,
      "loss": 2.7537,
      "step": 204564
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.841308832168579,
      "learning_rate": 1.8426202095504437e-05,
      "loss": 3.3227,
      "step": 204565
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.093918800354004,
      "learning_rate": 1.8424790617806817e-05,
      "loss": 2.8267,
      "step": 204566
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.487144947052002,
      "learning_rate": 1.8423379192459455e-05,
      "loss": 3.0314,
      "step": 204567
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.160928726196289,
      "learning_rate": 1.842196781946238e-05,
      "loss": 2.7531,
      "step": 204568
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1175057888031006,
      "learning_rate": 1.842055649881603e-05,
      "loss": 3.1303,
      "step": 204569
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6788153648376465,
      "learning_rate": 1.841914523052057e-05,
      "loss": 2.8815,
      "step": 204570
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.038607120513916,
      "learning_rate": 1.8417734014576334e-05,
      "loss": 3.0306,
      "step": 204571
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.4295709133148193,
      "learning_rate": 1.841632285098352e-05,
      "loss": 2.9274,
      "step": 204572
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2495675086975098,
      "learning_rate": 1.8414911739742532e-05,
      "loss": 2.987,
      "step": 204573
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3967602252960205,
      "learning_rate": 1.8413500680853434e-05,
      "loss": 2.7948,
      "step": 204574
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.5630578994750977,
      "learning_rate": 1.841208967431662e-05,
      "loss": 3.1021,
      "step": 204575
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.36879825592041,
      "learning_rate": 1.8410678720132266e-05,
      "loss": 2.5958,
      "step": 204576
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.670393705368042,
      "learning_rate": 1.8409267818300732e-05,
      "loss": 2.8627,
      "step": 204577
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.5721991062164307,
      "learning_rate": 1.8407856968822187e-05,
      "loss": 2.7083,
      "step": 204578
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7789204120635986,
      "learning_rate": 1.840644617169703e-05,
      "loss": 3.1681,
      "step": 204579
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8749468326568604,
      "learning_rate": 1.840503542692533e-05,
      "loss": 3.0739,
      "step": 204580
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.483283042907715,
      "learning_rate": 1.8403624734507517e-05,
      "loss": 2.9322,
      "step": 204581
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6401515007019043,
      "learning_rate": 1.8402214094443723e-05,
      "loss": 3.0424,
      "step": 204582
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.575593948364258,
      "learning_rate": 1.840080350673432e-05,
      "loss": 2.9585,
      "step": 204583
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.8381683826446533,
      "learning_rate": 1.8399392971379468e-05,
      "loss": 2.9898,
      "step": 204584
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8800055980682373,
      "learning_rate": 1.8397982488379536e-05,
      "loss": 3.1471,
      "step": 204585
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.5766024589538574,
      "learning_rate": 1.839657205773476e-05,
      "loss": 2.8882,
      "step": 204586
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.850322723388672,
      "learning_rate": 1.8395161679445337e-05,
      "loss": 2.8294,
      "step": 204587
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.553144693374634,
      "learning_rate": 1.839375135351153e-05,
      "loss": 2.842,
      "step": 204588
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7020950317382812,
      "learning_rate": 1.8392341079933715e-05,
      "loss": 3.1576,
      "step": 204589
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9681777954101562,
      "learning_rate": 1.8390930858712016e-05,
      "loss": 3.137,
      "step": 204590
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9596831798553467,
      "learning_rate": 1.8389520689846802e-05,
      "loss": 2.7275,
      "step": 204591
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7590808868408203,
      "learning_rate": 1.8388110573338277e-05,
      "loss": 3.1198,
      "step": 204592
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0492289066314697,
      "learning_rate": 1.8386700509186736e-05,
      "loss": 2.6115,
      "step": 204593
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8401784896850586,
      "learning_rate": 1.838529049739238e-05,
      "loss": 2.908,
      "step": 204594
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.916517972946167,
      "learning_rate": 1.8383880537955575e-05,
      "loss": 2.8441,
      "step": 204595
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6224169731140137,
      "learning_rate": 1.8382470630876455e-05,
      "loss": 3.0233,
      "step": 204596
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.002372741699219,
      "learning_rate": 1.8381060776155388e-05,
      "loss": 2.8602,
      "step": 204597
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.739689588546753,
      "learning_rate": 1.8379650973792603e-05,
      "loss": 2.7974,
      "step": 204598
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8536860942840576,
      "learning_rate": 1.837824122378837e-05,
      "loss": 2.8629,
      "step": 204599
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7537269592285156,
      "learning_rate": 1.837683152614289e-05,
      "loss": 2.8758,
      "step": 204600
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0607573986053467,
      "learning_rate": 1.8375421880856522e-05,
      "loss": 3.1869,
      "step": 204601
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7349131107330322,
      "learning_rate": 1.8374012287929406e-05,
      "loss": 3.0578,
      "step": 204602
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.4951539039611816,
      "learning_rate": 1.8372602747361943e-05,
      "loss": 2.8922,
      "step": 204603
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.914842128753662,
      "learning_rate": 1.8371193259154327e-05,
      "loss": 2.9807,
      "step": 204604
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0121610164642334,
      "learning_rate": 1.8369783823306828e-05,
      "loss": 2.7945,
      "step": 204605
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1657626628875732,
      "learning_rate": 1.8368374439819677e-05,
      "loss": 3.0673,
      "step": 204606
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.6379282474517822,
      "learning_rate": 1.836696510869318e-05,
      "loss": 3.1206,
      "step": 204607
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.631101369857788,
      "learning_rate": 1.8365555829927526e-05,
      "loss": 2.9861,
      "step": 204608
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.700357675552368,
      "learning_rate": 1.836414660352309e-05,
      "loss": 2.8262,
      "step": 204609
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7547833919525146,
      "learning_rate": 1.8362737429480067e-05,
      "loss": 2.849,
      "step": 204610
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.6155316829681396,
      "learning_rate": 1.836132830779873e-05,
      "loss": 2.6297,
      "step": 204611
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0321075916290283,
      "learning_rate": 1.8359919238479303e-05,
      "loss": 2.7276,
      "step": 204612
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.8892059326171875,
      "learning_rate": 1.8358510221522126e-05,
      "loss": 3.0001,
      "step": 204613
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.618105888366699,
      "learning_rate": 1.8357101256927363e-05,
      "loss": 2.6845,
      "step": 204614
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8546462059020996,
      "learning_rate": 1.835569234469538e-05,
      "loss": 2.9534,
      "step": 204615
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.979473829269409,
      "learning_rate": 1.8354283484826414e-05,
      "loss": 3.0294,
      "step": 204616
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3927102088928223,
      "learning_rate": 1.8352874677320627e-05,
      "loss": 2.994,
      "step": 204617
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.644078493118286,
      "learning_rate": 1.835146592217842e-05,
      "loss": 3.1546,
      "step": 204618
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1341795921325684,
      "learning_rate": 1.8350057219399993e-05,
      "loss": 2.8938,
      "step": 204619
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9645261764526367,
      "learning_rate": 1.8348648568985547e-05,
      "loss": 3.0592,
      "step": 204620
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.655484199523926,
      "learning_rate": 1.8347239970935446e-05,
      "loss": 3.0422,
      "step": 204621
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7329609394073486,
      "learning_rate": 1.8345831425249924e-05,
      "loss": 2.8721,
      "step": 204622
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.210775852203369,
      "learning_rate": 1.8344422931929183e-05,
      "loss": 2.6056,
      "step": 204623
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0256917476654053,
      "learning_rate": 1.834301449097355e-05,
      "loss": 2.9269,
      "step": 204624
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2948291301727295,
      "learning_rate": 1.83416061023833e-05,
      "loss": 3.0477,
      "step": 204625
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3514773845672607,
      "learning_rate": 1.8340197766158627e-05,
      "loss": 2.8319,
      "step": 204626
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8467154502868652,
      "learning_rate": 1.8338789482299833e-05,
      "loss": 3.0461,
      "step": 204627
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8446953296661377,
      "learning_rate": 1.833738125080718e-05,
      "loss": 2.9414,
      "step": 204628
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2723746299743652,
      "learning_rate": 1.833597307168091e-05,
      "loss": 2.6762,
      "step": 204629
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.087451219558716,
      "learning_rate": 1.8334564944921317e-05,
      "loss": 2.7547,
      "step": 204630
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.058605194091797,
      "learning_rate": 1.833315687052863e-05,
      "loss": 2.648,
      "step": 204631
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7897634506225586,
      "learning_rate": 1.833174884850316e-05,
      "loss": 3.0163,
      "step": 204632
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.161134958267212,
      "learning_rate": 1.8330340878845095e-05,
      "loss": 2.9442,
      "step": 204633
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.179018259048462,
      "learning_rate": 1.8328932961554777e-05,
      "loss": 3.038,
      "step": 204634
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9181149005889893,
      "learning_rate": 1.8327525096632367e-05,
      "loss": 2.8673,
      "step": 204635
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.950894594192505,
      "learning_rate": 1.832611728407827e-05,
      "loss": 3.0803,
      "step": 204636
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1447479724884033,
      "learning_rate": 1.8324709523892577e-05,
      "loss": 3.03,
      "step": 204637
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0106613636016846,
      "learning_rate": 1.8323301816075696e-05,
      "loss": 2.8637,
      "step": 204638
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0799331665039062,
      "learning_rate": 1.832189416062776e-05,
      "loss": 3.0405,
      "step": 204639
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0845320224761963,
      "learning_rate": 1.832048655754923e-05,
      "loss": 2.9896,
      "step": 204640
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0895392894744873,
      "learning_rate": 1.831907900684014e-05,
      "loss": 2.8671,
      "step": 204641
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.981900215148926,
      "learning_rate": 1.8317671508500865e-05,
      "loss": 2.8373,
      "step": 204642
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.743311882019043,
      "learning_rate": 1.831626406253166e-05,
      "loss": 2.8689,
      "step": 204643
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6546831130981445,
      "learning_rate": 1.8314856668932763e-05,
      "loss": 2.6425,
      "step": 204644
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8763060569763184,
      "learning_rate": 1.8313449327704444e-05,
      "loss": 3.0194,
      "step": 204645
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.5967278480529785,
      "learning_rate": 1.8312042038847064e-05,
      "loss": 3.0204,
      "step": 204646
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.354801893234253,
      "learning_rate": 1.831063480236069e-05,
      "loss": 2.9681,
      "step": 204647
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8667168617248535,
      "learning_rate": 1.8309227618245724e-05,
      "loss": 3.0217,
      "step": 204648
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2774622440338135,
      "learning_rate": 1.8307820486502368e-05,
      "loss": 2.8793,
      "step": 204649
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.360393762588501,
      "learning_rate": 1.830641340713095e-05,
      "loss": 2.835,
      "step": 204650
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.186978816986084,
      "learning_rate": 1.8305006380131637e-05,
      "loss": 2.7521,
      "step": 204651
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9664177894592285,
      "learning_rate": 1.8303599405504765e-05,
      "loss": 2.773,
      "step": 204652
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.5094339847564697,
      "learning_rate": 1.83021924832506e-05,
      "loss": 2.864,
      "step": 204653
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.041982650756836,
      "learning_rate": 1.830078561336934e-05,
      "loss": 2.9196,
      "step": 204654
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.8056998252868652,
      "learning_rate": 1.8299378795861254e-05,
      "loss": 3.0533,
      "step": 204655
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.116213798522949,
      "learning_rate": 1.8297972030726703e-05,
      "loss": 2.6293,
      "step": 204656
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.910928249359131,
      "learning_rate": 1.8296565317965795e-05,
      "loss": 2.8655,
      "step": 204657
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.641422271728516,
      "learning_rate": 1.8295158657578923e-05,
      "loss": 3.1523,
      "step": 204658
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.072474956512451,
      "learning_rate": 1.8293752049566325e-05,
      "loss": 3.0162,
      "step": 204659
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.0735738277435303,
      "learning_rate": 1.8292345493928196e-05,
      "loss": 2.8249,
      "step": 204660
    },
    {
      "epoch": 2.66,
      "grad_norm": 4.3729143142700195,
      "learning_rate": 1.8290938990664838e-05,
      "loss": 3.0234,
      "step": 204661
    },
    {
      "epoch": 2.66,
      "grad_norm": 5.126671314239502,
      "learning_rate": 1.828953253977652e-05,
      "loss": 3.2288,
      "step": 204662
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.9825594425201416,
      "learning_rate": 1.828812614126347e-05,
      "loss": 2.8188,
      "step": 204663
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.2788360118865967,
      "learning_rate": 1.8286719795125993e-05,
      "loss": 2.8086,
      "step": 204664
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.044992446899414,
      "learning_rate": 1.8285313501364353e-05,
      "loss": 2.8674,
      "step": 204665
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.3707969188690186,
      "learning_rate": 1.8283907259978815e-05,
      "loss": 2.9712,
      "step": 204666
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.1164333820343018,
      "learning_rate": 1.8282501070969546e-05,
      "loss": 3.0732,
      "step": 204667
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.7619636058807373,
      "learning_rate": 1.8281094934336915e-05,
      "loss": 3.0069,
      "step": 204668
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.6624197959899902,
      "learning_rate": 1.8279688850081122e-05,
      "loss": 2.9354,
      "step": 204669
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.8790574073791504,
      "learning_rate": 1.8278282818202495e-05,
      "loss": 2.7136,
      "step": 204670
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.7129688262939453,
      "learning_rate": 1.827687683870124e-05,
      "loss": 3.0688,
      "step": 204671
    },
    {
      "epoch": 2.66,
      "grad_norm": 2.906013011932373,
      "learning_rate": 1.8275470911577616e-05,
      "loss": 3.0849,
      "step": 204672
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.14432692527771,
      "learning_rate": 1.82740650368319e-05,
      "loss": 2.7485,
      "step": 204673
    },
    {
      "epoch": 2.66,
      "grad_norm": 3.602727174758911,
      "learning_rate": 1.8272659214464348e-05,
      "loss": 2.8267,
      "step": 204674
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6877052783966064,
      "learning_rate": 1.827125344447523e-05,
      "loss": 2.7924,
      "step": 204675
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.260662794113159,
      "learning_rate": 1.8269847726864818e-05,
      "loss": 2.7511,
      "step": 204676
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.345282793045044,
      "learning_rate": 1.826844206163337e-05,
      "loss": 3.1159,
      "step": 204677
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.286907196044922,
      "learning_rate": 1.8267036448781123e-05,
      "loss": 3.279,
      "step": 204678
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9517529010772705,
      "learning_rate": 1.8265630888308313e-05,
      "loss": 2.7933,
      "step": 204679
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.4018468856811523,
      "learning_rate": 1.82642253802153e-05,
      "loss": 2.7992,
      "step": 204680
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0076546669006348,
      "learning_rate": 1.8262819924502224e-05,
      "loss": 2.9972,
      "step": 204681
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.4909465312957764,
      "learning_rate": 1.8261414521169482e-05,
      "loss": 3.0238,
      "step": 204682
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.65657377243042,
      "learning_rate": 1.8260009170217237e-05,
      "loss": 2.9634,
      "step": 204683
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.050499439239502,
      "learning_rate": 1.825860387164576e-05,
      "loss": 2.9491,
      "step": 204684
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.056149482727051,
      "learning_rate": 1.8257198625455315e-05,
      "loss": 3.105,
      "step": 204685
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0183963775634766,
      "learning_rate": 1.8255793431646204e-05,
      "loss": 2.814,
      "step": 204686
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.484210252761841,
      "learning_rate": 1.825438829021859e-05,
      "loss": 2.8105,
      "step": 204687
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.095074415206909,
      "learning_rate": 1.8252983201172876e-05,
      "loss": 2.8915,
      "step": 204688
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.7735135555267334,
      "learning_rate": 1.825157816450926e-05,
      "loss": 2.8346,
      "step": 204689
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.990687370300293,
      "learning_rate": 1.8250173180227977e-05,
      "loss": 3.235,
      "step": 204690
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3187544345855713,
      "learning_rate": 1.824876824832926e-05,
      "loss": 2.9231,
      "step": 204691
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9923994541168213,
      "learning_rate": 1.8247363368813473e-05,
      "loss": 3.1155,
      "step": 204692
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2321407794952393,
      "learning_rate": 1.824595854168078e-05,
      "loss": 2.996,
      "step": 204693
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7858617305755615,
      "learning_rate": 1.824455376693149e-05,
      "loss": 2.9834,
      "step": 204694
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9579806327819824,
      "learning_rate": 1.8243149044565895e-05,
      "loss": 2.8989,
      "step": 204695
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8104665279388428,
      "learning_rate": 1.82417443745842e-05,
      "loss": 3.0733,
      "step": 204696
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.655914545059204,
      "learning_rate": 1.8240339756986632e-05,
      "loss": 2.7968,
      "step": 204697
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.831813097000122,
      "learning_rate": 1.8238935191773563e-05,
      "loss": 3.0179,
      "step": 204698
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5511536598205566,
      "learning_rate": 1.8237530678945156e-05,
      "loss": 2.9719,
      "step": 204699
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.103703737258911,
      "learning_rate": 1.8236126218501745e-05,
      "loss": 2.668,
      "step": 204700
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.493454933166504,
      "learning_rate": 1.823472181044353e-05,
      "loss": 3.0577,
      "step": 204701
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.684955596923828,
      "learning_rate": 1.8233317454770844e-05,
      "loss": 2.9865,
      "step": 204702
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6419849395751953,
      "learning_rate": 1.8231913151483823e-05,
      "loss": 2.8543,
      "step": 204703
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5666403770446777,
      "learning_rate": 1.8230508900582895e-05,
      "loss": 2.7322,
      "step": 204704
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5299439430236816,
      "learning_rate": 1.8229104702068163e-05,
      "loss": 2.7718,
      "step": 204705
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.525855302810669,
      "learning_rate": 1.8227700555939994e-05,
      "loss": 3.1416,
      "step": 204706
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.882646322250366,
      "learning_rate": 1.822629646219862e-05,
      "loss": 3.0791,
      "step": 204707
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9165563583374023,
      "learning_rate": 1.822489242084424e-05,
      "loss": 2.8052,
      "step": 204708
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.659393787384033,
      "learning_rate": 1.8223488431877254e-05,
      "loss": 2.7616,
      "step": 204709
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.299797773361206,
      "learning_rate": 1.822208449529783e-05,
      "loss": 2.9387,
      "step": 204710
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.4173786640167236,
      "learning_rate": 1.8220680611106165e-05,
      "loss": 2.8943,
      "step": 204711
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8526272773742676,
      "learning_rate": 1.821927677930266e-05,
      "loss": 2.9842,
      "step": 204712
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.896446466445923,
      "learning_rate": 1.8217872999887517e-05,
      "loss": 3.0135,
      "step": 204713
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2981414794921875,
      "learning_rate": 1.8216469272860933e-05,
      "loss": 2.6448,
      "step": 204714
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.744150400161743,
      "learning_rate": 1.821506559822328e-05,
      "loss": 3.0296,
      "step": 204715
    },
    {
      "epoch": 2.67,
      "grad_norm": 5.483236312866211,
      "learning_rate": 1.8213661975974714e-05,
      "loss": 2.7215,
      "step": 204716
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5934083461761475,
      "learning_rate": 1.8212258406115578e-05,
      "loss": 2.7264,
      "step": 204717
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.15755295753479,
      "learning_rate": 1.821085488864613e-05,
      "loss": 3.2268,
      "step": 204718
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6465275287628174,
      "learning_rate": 1.8209451423566545e-05,
      "loss": 2.9215,
      "step": 204719
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.747403860092163,
      "learning_rate": 1.8208048010877184e-05,
      "loss": 3.1675,
      "step": 204720
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8634228706359863,
      "learning_rate": 1.820664465057825e-05,
      "loss": 3.027,
      "step": 204721
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.9503817558288574,
      "learning_rate": 1.8205241342670006e-05,
      "loss": 2.8774,
      "step": 204722
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8723855018615723,
      "learning_rate": 1.8203838087152755e-05,
      "loss": 3.0283,
      "step": 204723
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.959015369415283,
      "learning_rate": 1.8202434884026762e-05,
      "loss": 2.7619,
      "step": 204724
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.990358829498291,
      "learning_rate": 1.820103173329216e-05,
      "loss": 3.1289,
      "step": 204725
    },
    {
      "epoch": 2.67,
      "grad_norm": 5.249796390533447,
      "learning_rate": 1.8199628634949383e-05,
      "loss": 2.7852,
      "step": 204726
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.182631492614746,
      "learning_rate": 1.8198225588998593e-05,
      "loss": 3.1257,
      "step": 204727
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8082849979400635,
      "learning_rate": 1.8196822595440064e-05,
      "loss": 3.0486,
      "step": 204728
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.25046968460083,
      "learning_rate": 1.819541965427409e-05,
      "loss": 3.2542,
      "step": 204729
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.4108002185821533,
      "learning_rate": 1.819401676550084e-05,
      "loss": 2.7239,
      "step": 204730
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6545939445495605,
      "learning_rate": 1.8192613929120714e-05,
      "loss": 2.9412,
      "step": 204731
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.880829334259033,
      "learning_rate": 1.819121114513391e-05,
      "loss": 3.0334,
      "step": 204732
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8577916622161865,
      "learning_rate": 1.8189808413540663e-05,
      "loss": 2.9839,
      "step": 204733
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.011627674102783,
      "learning_rate": 1.818840573434117e-05,
      "loss": 2.8759,
      "step": 204734
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8420095443725586,
      "learning_rate": 1.818700310753587e-05,
      "loss": 2.8316,
      "step": 204735
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1090047359466553,
      "learning_rate": 1.8185600533124855e-05,
      "loss": 2.7915,
      "step": 204736
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.816542148590088,
      "learning_rate": 1.8184198011108497e-05,
      "loss": 2.7167,
      "step": 204737
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.386453151702881,
      "learning_rate": 1.818279554148706e-05,
      "loss": 2.9576,
      "step": 204738
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.4953715801239014,
      "learning_rate": 1.8181393124260712e-05,
      "loss": 2.7732,
      "step": 204739
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.668916702270508,
      "learning_rate": 1.8179990759429718e-05,
      "loss": 2.65,
      "step": 204740
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.6154656410217285,
      "learning_rate": 1.8178588446994448e-05,
      "loss": 3.0937,
      "step": 204741
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.296322822570801,
      "learning_rate": 1.817718618695506e-05,
      "loss": 2.8793,
      "step": 204742
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.82270884513855,
      "learning_rate": 1.8175783979311897e-05,
      "loss": 2.8904,
      "step": 204743
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0203182697296143,
      "learning_rate": 1.8174381824065156e-05,
      "loss": 3.0384,
      "step": 204744
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.420743703842163,
      "learning_rate": 1.817297972121513e-05,
      "loss": 2.8932,
      "step": 204745
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9316439628601074,
      "learning_rate": 1.8171577670762028e-05,
      "loss": 3.1195,
      "step": 204746
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.804533004760742,
      "learning_rate": 1.8170175672706176e-05,
      "loss": 2.7691,
      "step": 204747
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.712921619415283,
      "learning_rate": 1.816877372704778e-05,
      "loss": 2.94,
      "step": 204748
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.80139684677124,
      "learning_rate": 1.816737183378717e-05,
      "loss": 2.8748,
      "step": 204749
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0873537063598633,
      "learning_rate": 1.8165969992924578e-05,
      "loss": 2.7943,
      "step": 204750
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7004146575927734,
      "learning_rate": 1.8164568204460206e-05,
      "loss": 3.0946,
      "step": 204751
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8025963306427,
      "learning_rate": 1.8163166468394352e-05,
      "loss": 2.9538,
      "step": 204752
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5897114276885986,
      "learning_rate": 1.816176478472735e-05,
      "loss": 3.112,
      "step": 204753
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.195030450820923,
      "learning_rate": 1.8160363153459302e-05,
      "loss": 2.8022,
      "step": 204754
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7306854724884033,
      "learning_rate": 1.8158961574590634e-05,
      "loss": 2.9374,
      "step": 204755
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.052785873413086,
      "learning_rate": 1.8157560048121556e-05,
      "loss": 2.8849,
      "step": 204756
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.4396307468414307,
      "learning_rate": 1.815615857405226e-05,
      "loss": 2.8707,
      "step": 204757
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.441352605819702,
      "learning_rate": 1.8154757152383047e-05,
      "loss": 2.6943,
      "step": 204758
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.671710252761841,
      "learning_rate": 1.815335578311422e-05,
      "loss": 2.8328,
      "step": 204759
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7804481983184814,
      "learning_rate": 1.8151954466245944e-05,
      "loss": 2.7667,
      "step": 204760
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.437275409698486,
      "learning_rate": 1.8150553201778617e-05,
      "loss": 2.9003,
      "step": 204761
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.012364149093628,
      "learning_rate": 1.8149151989712406e-05,
      "loss": 2.867,
      "step": 204762
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.098618745803833,
      "learning_rate": 1.814775083004758e-05,
      "loss": 2.8818,
      "step": 204763
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.153857469558716,
      "learning_rate": 1.814634972278437e-05,
      "loss": 3.1285,
      "step": 204764
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3052117824554443,
      "learning_rate": 1.814494866792311e-05,
      "loss": 2.8539,
      "step": 204765
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.7847466468811035,
      "learning_rate": 1.8143547665463997e-05,
      "loss": 2.7589,
      "step": 204766
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3936617374420166,
      "learning_rate": 1.8142146715407334e-05,
      "loss": 2.7128,
      "step": 204767
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.937950849533081,
      "learning_rate": 1.8140745817753387e-05,
      "loss": 3.1975,
      "step": 204768
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9398841857910156,
      "learning_rate": 1.813934497250239e-05,
      "loss": 2.8707,
      "step": 204769
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.3905439376831055,
      "learning_rate": 1.813794417965457e-05,
      "loss": 3.1138,
      "step": 204770
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3979806900024414,
      "learning_rate": 1.8136543439210272e-05,
      "loss": 2.869,
      "step": 204771
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.726250171661377,
      "learning_rate": 1.8135142751169685e-05,
      "loss": 2.9187,
      "step": 204772
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.737654209136963,
      "learning_rate": 1.8133742115533112e-05,
      "loss": 3.1031,
      "step": 204773
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7397773265838623,
      "learning_rate": 1.813234153230082e-05,
      "loss": 3.0844,
      "step": 204774
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5713508129119873,
      "learning_rate": 1.813094100147301e-05,
      "loss": 2.882,
      "step": 204775
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.10385799407959,
      "learning_rate": 1.8129540523049946e-05,
      "loss": 3.1754,
      "step": 204776
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.6065351963043213,
      "learning_rate": 1.8128140097031994e-05,
      "loss": 3.0422,
      "step": 204777
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3583924770355225,
      "learning_rate": 1.812673972341926e-05,
      "loss": 3.1374,
      "step": 204778
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7135422229766846,
      "learning_rate": 1.812533940221217e-05,
      "loss": 2.9607,
      "step": 204779
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3715837001800537,
      "learning_rate": 1.8123939133410858e-05,
      "loss": 2.7857,
      "step": 204780
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.939404249191284,
      "learning_rate": 1.8122538917015662e-05,
      "loss": 2.6947,
      "step": 204781
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1693313121795654,
      "learning_rate": 1.8121138753026743e-05,
      "loss": 2.8526,
      "step": 204782
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0940780639648438,
      "learning_rate": 1.811973864144447e-05,
      "loss": 2.7624,
      "step": 204783
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.085660696029663,
      "learning_rate": 1.811833858226904e-05,
      "loss": 2.8673,
      "step": 204784
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.021320343017578,
      "learning_rate": 1.8116938575500727e-05,
      "loss": 2.6935,
      "step": 204785
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5287468433380127,
      "learning_rate": 1.811553862113979e-05,
      "loss": 3.1062,
      "step": 204786
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.015672445297241,
      "learning_rate": 1.8114138719186565e-05,
      "loss": 2.9206,
      "step": 204787
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0400021076202393,
      "learning_rate": 1.8112738869641186e-05,
      "loss": 2.7065,
      "step": 204788
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1437220573425293,
      "learning_rate": 1.8111339072503985e-05,
      "loss": 2.7169,
      "step": 204789
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.935906410217285,
      "learning_rate": 1.8109939327775158e-05,
      "loss": 2.9942,
      "step": 204790
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.8937184810638428,
      "learning_rate": 1.8108539635455075e-05,
      "loss": 2.8622,
      "step": 204791
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7435739040374756,
      "learning_rate": 1.8107139995543872e-05,
      "loss": 2.7017,
      "step": 204792
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9350204467773438,
      "learning_rate": 1.8105740408041945e-05,
      "loss": 2.9633,
      "step": 204793
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.885824680328369,
      "learning_rate": 1.8104340872949463e-05,
      "loss": 2.8691,
      "step": 204794
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8040237426757812,
      "learning_rate": 1.8102941390266722e-05,
      "loss": 3.0646,
      "step": 204795
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8978915214538574,
      "learning_rate": 1.8101541959993893e-05,
      "loss": 2.9732,
      "step": 204796
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.327989101409912,
      "learning_rate": 1.8100142582131373e-05,
      "loss": 3.0273,
      "step": 204797
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1085476875305176,
      "learning_rate": 1.8098743256679326e-05,
      "loss": 2.8609,
      "step": 204798
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7430126667022705,
      "learning_rate": 1.809734398363806e-05,
      "loss": 2.7254,
      "step": 204799
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.664698600769043,
      "learning_rate": 1.809594476300783e-05,
      "loss": 2.6352,
      "step": 204800
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.298163890838623,
      "learning_rate": 1.8094545594788844e-05,
      "loss": 2.7921,
      "step": 204801
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.322756767272949,
      "learning_rate": 1.8093146478981435e-05,
      "loss": 2.9082,
      "step": 204802
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8559482097625732,
      "learning_rate": 1.8091747415585867e-05,
      "loss": 2.9193,
      "step": 204803
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.955156087875366,
      "learning_rate": 1.8090348404602272e-05,
      "loss": 2.8183,
      "step": 204804
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3498945236206055,
      "learning_rate": 1.8088949446031087e-05,
      "loss": 3.0273,
      "step": 204805
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.5265719890594482,
      "learning_rate": 1.8087550539872474e-05,
      "loss": 2.8998,
      "step": 204806
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.838792324066162,
      "learning_rate": 1.8086151686126637e-05,
      "loss": 2.7857,
      "step": 204807
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9238874912261963,
      "learning_rate": 1.8084752884793973e-05,
      "loss": 3.0991,
      "step": 204808
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.2154340744018555,
      "learning_rate": 1.8083354135874684e-05,
      "loss": 2.9945,
      "step": 204809
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9462640285491943,
      "learning_rate": 1.8081955439368967e-05,
      "loss": 2.8362,
      "step": 204810
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.853377342224121,
      "learning_rate": 1.8080556795277192e-05,
      "loss": 2.9885,
      "step": 204811
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.027331829071045,
      "learning_rate": 1.8079158203599554e-05,
      "loss": 2.9881,
      "step": 204812
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1453328132629395,
      "learning_rate": 1.8077759664336254e-05,
      "loss": 2.9679,
      "step": 204813
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7230746746063232,
      "learning_rate": 1.8076361177487696e-05,
      "loss": 2.6864,
      "step": 204814
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.899622678756714,
      "learning_rate": 1.807496274305401e-05,
      "loss": 2.8293,
      "step": 204815
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.8514516353607178,
      "learning_rate": 1.8073564361035563e-05,
      "loss": 2.7452,
      "step": 204816
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.4893317222595215,
      "learning_rate": 1.8072166031432556e-05,
      "loss": 2.7202,
      "step": 204817
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.684586524963379,
      "learning_rate": 1.807076775424525e-05,
      "loss": 2.7046,
      "step": 204818
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.225839853286743,
      "learning_rate": 1.8069369529473853e-05,
      "loss": 3.0561,
      "step": 204819
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9718098640441895,
      "learning_rate": 1.806797135711876e-05,
      "loss": 2.9899,
      "step": 204820
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.766286849975586,
      "learning_rate": 1.806657323718007e-05,
      "loss": 2.8808,
      "step": 204821
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8031296730041504,
      "learning_rate": 1.8065175169658185e-05,
      "loss": 3.0846,
      "step": 204822
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.447136402130127,
      "learning_rate": 1.8063777154553337e-05,
      "loss": 3.1271,
      "step": 204823
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2900023460388184,
      "learning_rate": 1.8062379191865727e-05,
      "loss": 2.8793,
      "step": 204824
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.5674924850463867,
      "learning_rate": 1.806098128159559e-05,
      "loss": 2.8093,
      "step": 204825
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8621091842651367,
      "learning_rate": 1.8059583423743285e-05,
      "loss": 2.9043,
      "step": 204826
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.872725248336792,
      "learning_rate": 1.8058185618309018e-05,
      "loss": 2.6068,
      "step": 204827
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.293100118637085,
      "learning_rate": 1.8056787865293054e-05,
      "loss": 2.6805,
      "step": 204828
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7150957584381104,
      "learning_rate": 1.8055390164695693e-05,
      "loss": 3.0542,
      "step": 204829
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8599870204925537,
      "learning_rate": 1.8053992516517135e-05,
      "loss": 3.0849,
      "step": 204830
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.2074971199035645,
      "learning_rate": 1.805259492075761e-05,
      "loss": 2.8028,
      "step": 204831
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8002517223358154,
      "learning_rate": 1.805119737741749e-05,
      "loss": 2.9326,
      "step": 204832
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.28717303276062,
      "learning_rate": 1.804979988649691e-05,
      "loss": 2.7629,
      "step": 204833
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.4050204753875732,
      "learning_rate": 1.8048402447996258e-05,
      "loss": 2.9713,
      "step": 204834
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.613941192626953,
      "learning_rate": 1.8047005061915742e-05,
      "loss": 3.0741,
      "step": 204835
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0437607765197754,
      "learning_rate": 1.8045607728255595e-05,
      "loss": 3.124,
      "step": 204836
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6889564990997314,
      "learning_rate": 1.8044210447016017e-05,
      "loss": 2.9779,
      "step": 204837
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1303272247314453,
      "learning_rate": 1.8042813218197437e-05,
      "loss": 2.893,
      "step": 204838
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.614706039428711,
      "learning_rate": 1.804141604179996e-05,
      "loss": 2.8756,
      "step": 204839
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9051930904388428,
      "learning_rate": 1.8040018917823952e-05,
      "loss": 3.0898,
      "step": 204840
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.566561222076416,
      "learning_rate": 1.803862184626961e-05,
      "loss": 2.8644,
      "step": 204841
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.228177785873413,
      "learning_rate": 1.803722482713723e-05,
      "loss": 2.8747,
      "step": 204842
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.305917263031006,
      "learning_rate": 1.803582786042699e-05,
      "loss": 2.9616,
      "step": 204843
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2860543727874756,
      "learning_rate": 1.803443094613928e-05,
      "loss": 2.9309,
      "step": 204844
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8964922428131104,
      "learning_rate": 1.80330340842742e-05,
      "loss": 3.1278,
      "step": 204845
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3777434825897217,
      "learning_rate": 1.803163727483219e-05,
      "loss": 2.8089,
      "step": 204846
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.744638442993164,
      "learning_rate": 1.803024051781341e-05,
      "loss": 2.7968,
      "step": 204847
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.589355707168579,
      "learning_rate": 1.802884381321813e-05,
      "loss": 2.8071,
      "step": 204848
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9607150554656982,
      "learning_rate": 1.8027447161046582e-05,
      "loss": 2.7498,
      "step": 204849
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.60398530960083,
      "learning_rate": 1.8026050561299064e-05,
      "loss": 2.9227,
      "step": 204850
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.071012258529663,
      "learning_rate": 1.8024654013975814e-05,
      "loss": 3.05,
      "step": 204851
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.047466516494751,
      "learning_rate": 1.8023257519077128e-05,
      "loss": 2.7665,
      "step": 204852
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.786041259765625,
      "learning_rate": 1.8021861076603204e-05,
      "loss": 2.7412,
      "step": 204853
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.4418528079986572,
      "learning_rate": 1.8020464686554447e-05,
      "loss": 2.8288,
      "step": 204854
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.5654666423797607,
      "learning_rate": 1.8019068348930887e-05,
      "loss": 2.9667,
      "step": 204855
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.122089385986328,
      "learning_rate": 1.8017672063732957e-05,
      "loss": 2.7548,
      "step": 204856
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.698340892791748,
      "learning_rate": 1.8016275830960826e-05,
      "loss": 2.8197,
      "step": 204857
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.047755002975464,
      "learning_rate": 1.8014879650614855e-05,
      "loss": 3.218,
      "step": 204858
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.8455607891082764,
      "learning_rate": 1.801348352269518e-05,
      "loss": 2.8884,
      "step": 204859
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.9224467277526855,
      "learning_rate": 1.8012087447202206e-05,
      "loss": 2.7027,
      "step": 204860
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.647365093231201,
      "learning_rate": 1.8010691424136026e-05,
      "loss": 2.9476,
      "step": 204861
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0956242084503174,
      "learning_rate": 1.800929545349701e-05,
      "loss": 2.4938,
      "step": 204862
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0169777870178223,
      "learning_rate": 1.8007899535285352e-05,
      "loss": 2.932,
      "step": 204863
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.638839244842529,
      "learning_rate": 1.8006503669501393e-05,
      "loss": 2.9045,
      "step": 204864
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.93485426902771,
      "learning_rate": 1.8005107856145294e-05,
      "loss": 2.7141,
      "step": 204865
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.28755521774292,
      "learning_rate": 1.8003712095217492e-05,
      "loss": 3.0538,
      "step": 204866
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9731853008270264,
      "learning_rate": 1.8002316386718017e-05,
      "loss": 3.1764,
      "step": 204867
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.781724214553833,
      "learning_rate": 1.8000920730647273e-05,
      "loss": 2.9565,
      "step": 204868
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.82761549949646,
      "learning_rate": 1.7999525127005422e-05,
      "loss": 3.1229,
      "step": 204869
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.926741361618042,
      "learning_rate": 1.7998129575792864e-05,
      "loss": 2.75,
      "step": 204870
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9275150299072266,
      "learning_rate": 1.79967340770097e-05,
      "loss": 2.7577,
      "step": 204871
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7711427211761475,
      "learning_rate": 1.7995338630656362e-05,
      "loss": 3.1175,
      "step": 204872
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5374081134796143,
      "learning_rate": 1.799394323673292e-05,
      "loss": 2.9483,
      "step": 204873
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.415443181991577,
      "learning_rate": 1.7992547895239808e-05,
      "loss": 3.07,
      "step": 204874
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.354182243347168,
      "learning_rate": 1.7991152606177116e-05,
      "loss": 2.7515,
      "step": 204875
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3593199253082275,
      "learning_rate": 1.7989757369545256e-05,
      "loss": 2.9375,
      "step": 204876
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.927375316619873,
      "learning_rate": 1.7988362185344383e-05,
      "loss": 2.7359,
      "step": 204877
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.4856505393981934,
      "learning_rate": 1.798696705357484e-05,
      "loss": 2.9683,
      "step": 204878
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.588568687438965,
      "learning_rate": 1.798557197423682e-05,
      "loss": 2.8095,
      "step": 204879
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9370381832122803,
      "learning_rate": 1.798417694733063e-05,
      "loss": 3.0658,
      "step": 204880
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6703155040740967,
      "learning_rate": 1.7982781972856464e-05,
      "loss": 2.6973,
      "step": 204881
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.4761831760406494,
      "learning_rate": 1.7981387050814657e-05,
      "loss": 3.1575,
      "step": 204882
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.301955223083496,
      "learning_rate": 1.797999218120537e-05,
      "loss": 2.8383,
      "step": 204883
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2477827072143555,
      "learning_rate": 1.7978597364029014e-05,
      "loss": 3.0369,
      "step": 204884
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.336346387863159,
      "learning_rate": 1.797720259928571e-05,
      "loss": 2.8447,
      "step": 204885
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.26053524017334,
      "learning_rate": 1.7975807886975803e-05,
      "loss": 2.5463,
      "step": 204886
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.333338737487793,
      "learning_rate": 1.797441322709948e-05,
      "loss": 2.7772,
      "step": 204887
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8771467208862305,
      "learning_rate": 1.7973018619657054e-05,
      "loss": 2.9104,
      "step": 204888
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.710594654083252,
      "learning_rate": 1.7971624064648748e-05,
      "loss": 2.7681,
      "step": 204889
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.591925859451294,
      "learning_rate": 1.7970229562074867e-05,
      "loss": 2.9904,
      "step": 204890
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.850280284881592,
      "learning_rate": 1.7968835111935643e-05,
      "loss": 2.8241,
      "step": 204891
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.015555381774902,
      "learning_rate": 1.7967440714231306e-05,
      "loss": 3.0132,
      "step": 204892
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.15047550201416,
      "learning_rate": 1.796604636896216e-05,
      "loss": 2.8374,
      "step": 204893
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0037906169891357,
      "learning_rate": 1.796465207612847e-05,
      "loss": 2.6219,
      "step": 204894
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.296233892440796,
      "learning_rate": 1.7963257835730438e-05,
      "loss": 2.9034,
      "step": 204895
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.197561740875244,
      "learning_rate": 1.7961863647768393e-05,
      "loss": 2.9398,
      "step": 204896
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.373042583465576,
      "learning_rate": 1.796046951224257e-05,
      "loss": 2.7528,
      "step": 204897
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.860191583633423,
      "learning_rate": 1.795907542915317e-05,
      "loss": 3.0105,
      "step": 204898
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.6181998252868652,
      "learning_rate": 1.7957681398500555e-05,
      "loss": 2.9416,
      "step": 204899
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7647483348846436,
      "learning_rate": 1.7956287420284864e-05,
      "loss": 3.0981,
      "step": 204900
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.721778392791748,
      "learning_rate": 1.7954893494506495e-05,
      "loss": 2.9012,
      "step": 204901
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.769152879714966,
      "learning_rate": 1.7953499621165612e-05,
      "loss": 2.9093,
      "step": 204902
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.4680845737457275,
      "learning_rate": 1.7952105800262516e-05,
      "loss": 2.8266,
      "step": 204903
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0126729011535645,
      "learning_rate": 1.795071203179741e-05,
      "loss": 2.9404,
      "step": 204904
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.23665189743042,
      "learning_rate": 1.7949318315770624e-05,
      "loss": 2.7103,
      "step": 204905
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9109272956848145,
      "learning_rate": 1.7947924652182322e-05,
      "loss": 2.962,
      "step": 204906
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.355355739593506,
      "learning_rate": 1.7946531041032874e-05,
      "loss": 3.1112,
      "step": 204907
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.5025954246520996,
      "learning_rate": 1.7945137482322515e-05,
      "loss": 3.1443,
      "step": 204908
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9961180686950684,
      "learning_rate": 1.794374397605147e-05,
      "loss": 2.8088,
      "step": 204909
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.881418228149414,
      "learning_rate": 1.7942350522219952e-05,
      "loss": 2.9014,
      "step": 204910
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.0329203605651855,
      "learning_rate": 1.7940957120828314e-05,
      "loss": 2.8827,
      "step": 204911
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.094662666320801,
      "learning_rate": 1.7939563771876763e-05,
      "loss": 2.8167,
      "step": 204912
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0017545223236084,
      "learning_rate": 1.7938170475365598e-05,
      "loss": 2.6449,
      "step": 204913
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.782335042953491,
      "learning_rate": 1.7936777231294985e-05,
      "loss": 3.0531,
      "step": 204914
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.681199789047241,
      "learning_rate": 1.7935384039665358e-05,
      "loss": 2.8085,
      "step": 204915
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0393128395080566,
      "learning_rate": 1.793399090047678e-05,
      "loss": 2.8459,
      "step": 204916
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.7630083560943604,
      "learning_rate": 1.7932597813729655e-05,
      "loss": 2.9644,
      "step": 204917
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.575164318084717,
      "learning_rate": 1.7931204779424112e-05,
      "loss": 3.0835,
      "step": 204918
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.4709999561309814,
      "learning_rate": 1.7929811797560557e-05,
      "loss": 2.7529,
      "step": 204919
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.5363645553588867,
      "learning_rate": 1.7928418868139116e-05,
      "loss": 2.9424,
      "step": 204920
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7339673042297363,
      "learning_rate": 1.7927025991160192e-05,
      "loss": 2.8729,
      "step": 204921
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.110722780227661,
      "learning_rate": 1.7925633166623853e-05,
      "loss": 2.9302,
      "step": 204922
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.777677059173584,
      "learning_rate": 1.792424039453053e-05,
      "loss": 2.8143,
      "step": 204923
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.703693389892578,
      "learning_rate": 1.7922847674880357e-05,
      "loss": 3.1412,
      "step": 204924
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.660291910171509,
      "learning_rate": 1.7921455007673703e-05,
      "loss": 2.915,
      "step": 204925
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6948866844177246,
      "learning_rate": 1.7920062392910728e-05,
      "loss": 2.9041,
      "step": 204926
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.069429874420166,
      "learning_rate": 1.7918669830591835e-05,
      "loss": 3.0435,
      "step": 204927
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0756614208221436,
      "learning_rate": 1.7917277320717095e-05,
      "loss": 3.0787,
      "step": 204928
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0537586212158203,
      "learning_rate": 1.79158848632869e-05,
      "loss": 2.8077,
      "step": 204929
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.919667959213257,
      "learning_rate": 1.791449245830142e-05,
      "loss": 2.974,
      "step": 204930
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.02524995803833,
      "learning_rate": 1.7913100105761025e-05,
      "loss": 3.0559,
      "step": 204931
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.522183895111084,
      "learning_rate": 1.7911707805665843e-05,
      "loss": 3.0294,
      "step": 204932
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3867757320404053,
      "learning_rate": 1.7910315558016308e-05,
      "loss": 2.8103,
      "step": 204933
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7332050800323486,
      "learning_rate": 1.7908923362812455e-05,
      "loss": 2.9106,
      "step": 204934
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0535030364990234,
      "learning_rate": 1.7907531220054717e-05,
      "loss": 3.1128,
      "step": 204935
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7133123874664307,
      "learning_rate": 1.7906139129743257e-05,
      "loss": 2.8782,
      "step": 204936
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.456446647644043,
      "learning_rate": 1.790474709187838e-05,
      "loss": 3.1197,
      "step": 204937
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7579777240753174,
      "learning_rate": 1.7903355106460314e-05,
      "loss": 3.183,
      "step": 204938
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6934850215911865,
      "learning_rate": 1.7901963173489432e-05,
      "loss": 2.9812,
      "step": 204939
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6308505535125732,
      "learning_rate": 1.7900571292965827e-05,
      "loss": 3.0038,
      "step": 204940
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6035799980163574,
      "learning_rate": 1.7899179464889868e-05,
      "loss": 2.9472,
      "step": 204941
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.222855806350708,
      "learning_rate": 1.7897787689261722e-05,
      "loss": 2.7457,
      "step": 204942
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.55004620552063,
      "learning_rate": 1.7896395966081724e-05,
      "loss": 2.8841,
      "step": 204943
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.957994222640991,
      "learning_rate": 1.78950042953501e-05,
      "loss": 2.852,
      "step": 204944
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.268784046173096,
      "learning_rate": 1.7893612677067195e-05,
      "loss": 2.8292,
      "step": 204945
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.9359676837921143,
      "learning_rate": 1.7892221111233097e-05,
      "loss": 2.7657,
      "step": 204946
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0439271926879883,
      "learning_rate": 1.789082959784821e-05,
      "loss": 2.7947,
      "step": 204947
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.7302095890045166,
      "learning_rate": 1.7889438136912704e-05,
      "loss": 3.0639,
      "step": 204948
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.684997320175171,
      "learning_rate": 1.788804672842694e-05,
      "loss": 2.9094,
      "step": 204949
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.37751841545105,
      "learning_rate": 1.7886655372391025e-05,
      "loss": 2.8108,
      "step": 204950
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.45869517326355,
      "learning_rate": 1.788526406880545e-05,
      "loss": 3.2945,
      "step": 204951
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9778647422790527,
      "learning_rate": 1.7883872817670188e-05,
      "loss": 3.2506,
      "step": 204952
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.924487829208374,
      "learning_rate": 1.78824816189857e-05,
      "loss": 3.0617,
      "step": 204953
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0501904487609863,
      "learning_rate": 1.7881090472752158e-05,
      "loss": 3.0519,
      "step": 204954
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0521271228790283,
      "learning_rate": 1.787969937896989e-05,
      "loss": 2.9086,
      "step": 204955
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8615190982818604,
      "learning_rate": 1.7878308337639035e-05,
      "loss": 2.866,
      "step": 204956
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.4317033290863037,
      "learning_rate": 1.7876917348760054e-05,
      "loss": 2.8945,
      "step": 204957
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.22894024848938,
      "learning_rate": 1.787552641233295e-05,
      "loss": 2.7906,
      "step": 204958
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.325624704360962,
      "learning_rate": 1.787413552835819e-05,
      "loss": 2.8232,
      "step": 204959
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.95086932182312,
      "learning_rate": 1.7872744696835906e-05,
      "loss": 3.0011,
      "step": 204960
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9004952907562256,
      "learning_rate": 1.787135391776643e-05,
      "loss": 3.0239,
      "step": 204961
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6420891284942627,
      "learning_rate": 1.786996319114996e-05,
      "loss": 2.9996,
      "step": 204962
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7522456645965576,
      "learning_rate": 1.7868572516986834e-05,
      "loss": 3.1026,
      "step": 204963
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.804893970489502,
      "learning_rate": 1.7867181895277283e-05,
      "loss": 2.7465,
      "step": 204964
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.022249698638916,
      "learning_rate": 1.7865791326021538e-05,
      "loss": 3.2347,
      "step": 204965
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.802639961242676,
      "learning_rate": 1.7864400809219802e-05,
      "loss": 2.774,
      "step": 204966
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.873298168182373,
      "learning_rate": 1.7863010344872475e-05,
      "loss": 3.0298,
      "step": 204967
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0564796924591064,
      "learning_rate": 1.7861619932979687e-05,
      "loss": 2.8879,
      "step": 204968
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.815145492553711,
      "learning_rate": 1.7860229573541772e-05,
      "loss": 3.0162,
      "step": 204969
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7334110736846924,
      "learning_rate": 1.7858839266559e-05,
      "loss": 3.2218,
      "step": 204970
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.222320318222046,
      "learning_rate": 1.7857449012031565e-05,
      "loss": 3.127,
      "step": 204971
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8511409759521484,
      "learning_rate": 1.785605880995974e-05,
      "loss": 3.0837,
      "step": 204972
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.855717182159424,
      "learning_rate": 1.785466866034382e-05,
      "loss": 3.0288,
      "step": 204973
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.032273769378662,
      "learning_rate": 1.7853278563184004e-05,
      "loss": 2.9926,
      "step": 204974
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.732893228530884,
      "learning_rate": 1.785188851848063e-05,
      "loss": 2.9687,
      "step": 204975
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.570044994354248,
      "learning_rate": 1.7850498526233925e-05,
      "loss": 2.9644,
      "step": 204976
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.104253053665161,
      "learning_rate": 1.7849108586444095e-05,
      "loss": 2.9096,
      "step": 204977
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0353167057037354,
      "learning_rate": 1.7847718699111468e-05,
      "loss": 3.0234,
      "step": 204978
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.8667263984680176,
      "learning_rate": 1.7846328864236314e-05,
      "loss": 2.9354,
      "step": 204979
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.025444507598877,
      "learning_rate": 1.7844939081818764e-05,
      "loss": 2.9046,
      "step": 204980
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.679769277572632,
      "learning_rate": 1.7843549351859255e-05,
      "loss": 2.8432,
      "step": 204981
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.4783406257629395,
      "learning_rate": 1.7842159674357913e-05,
      "loss": 2.7857,
      "step": 204982
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.615506649017334,
      "learning_rate": 1.7840770049315046e-05,
      "loss": 2.9202,
      "step": 204983
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.578629732131958,
      "learning_rate": 1.7839380476730915e-05,
      "loss": 2.976,
      "step": 204984
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.367931842803955,
      "learning_rate": 1.7837990956605753e-05,
      "loss": 2.9592,
      "step": 204985
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2391679286956787,
      "learning_rate": 1.783660148893983e-05,
      "loss": 3.0306,
      "step": 204986
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.634003639221191,
      "learning_rate": 1.783521207373344e-05,
      "loss": 3.1159,
      "step": 204987
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.484740972518921,
      "learning_rate": 1.783382271098679e-05,
      "loss": 2.8571,
      "step": 204988
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.504514694213867,
      "learning_rate": 1.7832433400700143e-05,
      "loss": 2.948,
      "step": 204989
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.009936809539795,
      "learning_rate": 1.7831044142873797e-05,
      "loss": 2.9822,
      "step": 204990
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5769567489624023,
      "learning_rate": 1.7829654937507952e-05,
      "loss": 2.922,
      "step": 204991
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3620476722717285,
      "learning_rate": 1.7828265784602947e-05,
      "loss": 2.8696,
      "step": 204992
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.110417127609253,
      "learning_rate": 1.7826876684158975e-05,
      "loss": 2.9539,
      "step": 204993
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.272366046905518,
      "learning_rate": 1.7825487636176338e-05,
      "loss": 2.7954,
      "step": 204994
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.5085535049438477,
      "learning_rate": 1.7824098640655205e-05,
      "loss": 3.1192,
      "step": 204995
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.253415822982788,
      "learning_rate": 1.7822709697595972e-05,
      "loss": 2.912,
      "step": 204996
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2356674671173096,
      "learning_rate": 1.782132080699874e-05,
      "loss": 3.0224,
      "step": 204997
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.868166446685791,
      "learning_rate": 1.781993196886391e-05,
      "loss": 2.9546,
      "step": 204998
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.703760862350464,
      "learning_rate": 1.7818543183191647e-05,
      "loss": 3.0251,
      "step": 204999
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1762747764587402,
      "learning_rate": 1.781715444998235e-05,
      "loss": 3.1304,
      "step": 205000
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.580296277999878,
      "learning_rate": 1.781576576923606e-05,
      "loss": 2.8301,
      "step": 205001
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.447402238845825,
      "learning_rate": 1.781437714095316e-05,
      "loss": 2.8753,
      "step": 205002
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.632810354232788,
      "learning_rate": 1.7812988565133902e-05,
      "loss": 3.2638,
      "step": 205003
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7855348587036133,
      "learning_rate": 1.781160004177854e-05,
      "loss": 2.8959,
      "step": 205004
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.793362617492676,
      "learning_rate": 1.7810211570887312e-05,
      "loss": 2.7397,
      "step": 205005
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5866756439208984,
      "learning_rate": 1.780882315246058e-05,
      "loss": 2.8215,
      "step": 205006
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.11535382270813,
      "learning_rate": 1.7807434786498387e-05,
      "loss": 3.0168,
      "step": 205007
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6721537113189697,
      "learning_rate": 1.7806046473001224e-05,
      "loss": 2.9196,
      "step": 205008
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.855895519256592,
      "learning_rate": 1.780465821196916e-05,
      "loss": 2.8672,
      "step": 205009
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.943805456161499,
      "learning_rate": 1.7803270003402593e-05,
      "loss": 2.7254,
      "step": 205010
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8137147426605225,
      "learning_rate": 1.7801881847301658e-05,
      "loss": 2.8579,
      "step": 205011
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.797577381134033,
      "learning_rate": 1.7800493743666787e-05,
      "loss": 2.9026,
      "step": 205012
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3767011165618896,
      "learning_rate": 1.779910569249805e-05,
      "loss": 2.7231,
      "step": 205013
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9190218448638916,
      "learning_rate": 1.779771769379581e-05,
      "loss": 2.8213,
      "step": 205014
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.861909866333008,
      "learning_rate": 1.7796329747560268e-05,
      "loss": 2.8624,
      "step": 205015
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.108506917953491,
      "learning_rate": 1.779494185379179e-05,
      "loss": 2.665,
      "step": 205016
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0938796997070312,
      "learning_rate": 1.7793554012490473e-05,
      "loss": 2.7413,
      "step": 205017
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.813960075378418,
      "learning_rate": 1.7792166223656755e-05,
      "loss": 2.8259,
      "step": 205018
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.570643663406372,
      "learning_rate": 1.7790778487290735e-05,
      "loss": 2.8262,
      "step": 205019
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0161221027374268,
      "learning_rate": 1.7789390803392745e-05,
      "loss": 2.8178,
      "step": 205020
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.998572826385498,
      "learning_rate": 1.7788003171963018e-05,
      "loss": 3.0205,
      "step": 205021
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.679415225982666,
      "learning_rate": 1.7786615593001886e-05,
      "loss": 3.0206,
      "step": 205022
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7873668670654297,
      "learning_rate": 1.778522806650948e-05,
      "loss": 2.8748,
      "step": 205023
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2401583194732666,
      "learning_rate": 1.7783840592486208e-05,
      "loss": 2.8951,
      "step": 205024
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1264777183532715,
      "learning_rate": 1.7782453170932166e-05,
      "loss": 2.8234,
      "step": 205025
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.199199914932251,
      "learning_rate": 1.778106580184775e-05,
      "loss": 3.0014,
      "step": 205026
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6665539741516113,
      "learning_rate": 1.777967848523313e-05,
      "loss": 3.029,
      "step": 205027
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8249258995056152,
      "learning_rate": 1.7778291221088604e-05,
      "loss": 2.7027,
      "step": 205028
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.8178257942199707,
      "learning_rate": 1.7776904009414405e-05,
      "loss": 2.9549,
      "step": 205029
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.524252891540527,
      "learning_rate": 1.777551685021087e-05,
      "loss": 2.7268,
      "step": 205030
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7382993698120117,
      "learning_rate": 1.7774129743478127e-05,
      "loss": 2.9922,
      "step": 205031
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.706991672515869,
      "learning_rate": 1.777274268921651e-05,
      "loss": 2.7889,
      "step": 205032
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.752620220184326,
      "learning_rate": 1.777135568742626e-05,
      "loss": 2.8018,
      "step": 205033
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.9713215827941895,
      "learning_rate": 1.7769968738107665e-05,
      "loss": 2.9959,
      "step": 205034
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.203216314315796,
      "learning_rate": 1.7768581841260932e-05,
      "loss": 2.7886,
      "step": 205035
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7151474952697754,
      "learning_rate": 1.7767194996886457e-05,
      "loss": 2.9732,
      "step": 205036
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2578229904174805,
      "learning_rate": 1.7765808204984244e-05,
      "loss": 3.0946,
      "step": 205037
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.526090621948242,
      "learning_rate": 1.776442146555479e-05,
      "loss": 2.8844,
      "step": 205038
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.155634641647339,
      "learning_rate": 1.7763034778598196e-05,
      "loss": 2.9292,
      "step": 205039
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.104454755783081,
      "learning_rate": 1.7761648144114793e-05,
      "loss": 2.9307,
      "step": 205040
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.078941345214844,
      "learning_rate": 1.7760261562104816e-05,
      "loss": 2.7224,
      "step": 205041
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9363465309143066,
      "learning_rate": 1.7758875032568632e-05,
      "loss": 2.9865,
      "step": 205042
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.765800714492798,
      "learning_rate": 1.7757488555506305e-05,
      "loss": 2.8821,
      "step": 205043
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.9942800998687744,
      "learning_rate": 1.7756102130918205e-05,
      "loss": 2.9652,
      "step": 205044
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.526254415512085,
      "learning_rate": 1.775471575880456e-05,
      "loss": 2.9373,
      "step": 205045
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6253647804260254,
      "learning_rate": 1.7753329439165676e-05,
      "loss": 2.9005,
      "step": 205046
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1968727111816406,
      "learning_rate": 1.775194317200175e-05,
      "loss": 2.9885,
      "step": 205047
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.016934633255005,
      "learning_rate": 1.7750556957313078e-05,
      "loss": 2.8258,
      "step": 205048
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.995241641998291,
      "learning_rate": 1.7749170795099898e-05,
      "loss": 3.0018,
      "step": 205049
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.526822805404663,
      "learning_rate": 1.774778468536251e-05,
      "loss": 2.8945,
      "step": 205050
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8829801082611084,
      "learning_rate": 1.7746398628101078e-05,
      "loss": 2.8812,
      "step": 205051
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.428621768951416,
      "learning_rate": 1.7745012623315967e-05,
      "loss": 3.1217,
      "step": 205052
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7739779949188232,
      "learning_rate": 1.774362667100735e-05,
      "loss": 2.9561,
      "step": 205053
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9820549488067627,
      "learning_rate": 1.7742240771175553e-05,
      "loss": 2.6937,
      "step": 205054
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6746742725372314,
      "learning_rate": 1.774085492382078e-05,
      "loss": 2.8604,
      "step": 205055
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.4282145500183105,
      "learning_rate": 1.7739469128943328e-05,
      "loss": 2.9117,
      "step": 205056
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.205329656600952,
      "learning_rate": 1.77380833865434e-05,
      "loss": 2.7027,
      "step": 205057
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.598597288131714,
      "learning_rate": 1.7736697696621327e-05,
      "loss": 3.0821,
      "step": 205058
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.827852487564087,
      "learning_rate": 1.773531205917731e-05,
      "loss": 3.0049,
      "step": 205059
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.600229263305664,
      "learning_rate": 1.7733926474211647e-05,
      "loss": 2.9829,
      "step": 205060
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.189622163772583,
      "learning_rate": 1.7732540941724573e-05,
      "loss": 2.8373,
      "step": 205061
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7126524448394775,
      "learning_rate": 1.773115546171632e-05,
      "loss": 3.0283,
      "step": 205062
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.503082513809204,
      "learning_rate": 1.7729770034187184e-05,
      "loss": 2.9551,
      "step": 205063
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3067305088043213,
      "learning_rate": 1.772838465913744e-05,
      "loss": 2.9265,
      "step": 205064
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2852697372436523,
      "learning_rate": 1.7726999336567283e-05,
      "loss": 3.0675,
      "step": 205065
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.392563819885254,
      "learning_rate": 1.7725614066477013e-05,
      "loss": 2.9203,
      "step": 205066
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7862465381622314,
      "learning_rate": 1.7724228848866894e-05,
      "loss": 3.017,
      "step": 205067
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.669175863265991,
      "learning_rate": 1.772284368373713e-05,
      "loss": 3.0498,
      "step": 205068
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3030478954315186,
      "learning_rate": 1.7721458571088087e-05,
      "loss": 2.9298,
      "step": 205069
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9004809856414795,
      "learning_rate": 1.772007351091993e-05,
      "loss": 2.9723,
      "step": 205070
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6588809490203857,
      "learning_rate": 1.7718688503232893e-05,
      "loss": 2.9991,
      "step": 205071
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5748565196990967,
      "learning_rate": 1.771730354802734e-05,
      "loss": 3.0185,
      "step": 205072
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2437498569488525,
      "learning_rate": 1.771591864530344e-05,
      "loss": 2.5205,
      "step": 205073
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.901510715484619,
      "learning_rate": 1.771453379506146e-05,
      "loss": 2.7799,
      "step": 205074
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.847273826599121,
      "learning_rate": 1.7713148997301698e-05,
      "loss": 2.9388,
      "step": 205075
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.170339584350586,
      "learning_rate": 1.771176425202435e-05,
      "loss": 2.8214,
      "step": 205076
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.55926513671875,
      "learning_rate": 1.771037955922979e-05,
      "loss": 2.8626,
      "step": 205077
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6282763481140137,
      "learning_rate": 1.7708994918918183e-05,
      "loss": 3.0143,
      "step": 205078
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7631185054779053,
      "learning_rate": 1.7707610331089794e-05,
      "loss": 2.9764,
      "step": 205079
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.962573289871216,
      "learning_rate": 1.7706225795744823e-05,
      "loss": 2.9603,
      "step": 205080
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9309608936309814,
      "learning_rate": 1.7704841312883667e-05,
      "loss": 3.0315,
      "step": 205081
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0975382328033447,
      "learning_rate": 1.770345688250646e-05,
      "loss": 2.763,
      "step": 205082
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6103720664978027,
      "learning_rate": 1.7702072504613574e-05,
      "loss": 3.0108,
      "step": 205083
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.4911513328552246,
      "learning_rate": 1.7700688179205136e-05,
      "loss": 3.1501,
      "step": 205084
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7837774753570557,
      "learning_rate": 1.7699303906281547e-05,
      "loss": 2.8788,
      "step": 205085
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0411994457244873,
      "learning_rate": 1.769791968584291e-05,
      "loss": 3.0693,
      "step": 205086
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9437379837036133,
      "learning_rate": 1.7696535517889586e-05,
      "loss": 2.9964,
      "step": 205087
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.773341655731201,
      "learning_rate": 1.769515140242178e-05,
      "loss": 2.7053,
      "step": 205088
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.5441927909851074,
      "learning_rate": 1.7693767339439824e-05,
      "loss": 2.9098,
      "step": 205089
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9279778003692627,
      "learning_rate": 1.7692383328943883e-05,
      "loss": 2.8782,
      "step": 205090
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.809645175933838,
      "learning_rate": 1.7690999370934323e-05,
      "loss": 2.7612,
      "step": 205091
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1514410972595215,
      "learning_rate": 1.7689615465411243e-05,
      "loss": 2.9336,
      "step": 205092
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9321422576904297,
      "learning_rate": 1.7688231612375083e-05,
      "loss": 2.7712,
      "step": 205093
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8822460174560547,
      "learning_rate": 1.768684781182593e-05,
      "loss": 2.7594,
      "step": 205094
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8041720390319824,
      "learning_rate": 1.7685464063764166e-05,
      "loss": 2.9186,
      "step": 205095
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.870018720626831,
      "learning_rate": 1.7684080368189946e-05,
      "loss": 2.8965,
      "step": 205096
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7689785957336426,
      "learning_rate": 1.7682696725103708e-05,
      "loss": 2.7587,
      "step": 205097
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1172966957092285,
      "learning_rate": 1.7681313134505483e-05,
      "loss": 2.804,
      "step": 205098
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8918569087982178,
      "learning_rate": 1.767992959639567e-05,
      "loss": 2.9595,
      "step": 205099
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.16176438331604,
      "learning_rate": 1.767854611077444e-05,
      "loss": 2.8435,
      "step": 205100
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9715287685394287,
      "learning_rate": 1.7677162677642153e-05,
      "loss": 2.8933,
      "step": 205101
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8449225425720215,
      "learning_rate": 1.7675779296998947e-05,
      "loss": 2.89,
      "step": 205102
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.195143699645996,
      "learning_rate": 1.7674395968845258e-05,
      "loss": 2.8799,
      "step": 205103
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9930174350738525,
      "learning_rate": 1.767301269318111e-05,
      "loss": 2.923,
      "step": 205104
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.69242000579834,
      "learning_rate": 1.7671629470006942e-05,
      "loss": 2.9375,
      "step": 205105
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.727571964263916,
      "learning_rate": 1.7670246299322888e-05,
      "loss": 2.8632,
      "step": 205106
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.065289258956909,
      "learning_rate": 1.7668863181129344e-05,
      "loss": 3.0052,
      "step": 205107
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.902097702026367,
      "learning_rate": 1.7667480115426415e-05,
      "loss": 3.1869,
      "step": 205108
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5680642127990723,
      "learning_rate": 1.7666097102214527e-05,
      "loss": 2.7088,
      "step": 205109
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5236384868621826,
      "learning_rate": 1.7664714141493752e-05,
      "loss": 2.7363,
      "step": 205110
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1508851051330566,
      "learning_rate": 1.7663331233264456e-05,
      "loss": 2.7827,
      "step": 205111
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.029345989227295,
      "learning_rate": 1.766194837752687e-05,
      "loss": 2.6334,
      "step": 205112
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.4310526847839355,
      "learning_rate": 1.7660565574281294e-05,
      "loss": 2.7219,
      "step": 205113
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.087364912033081,
      "learning_rate": 1.7659182823527894e-05,
      "loss": 2.9771,
      "step": 205114
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8893823623657227,
      "learning_rate": 1.7657800125267007e-05,
      "loss": 2.9596,
      "step": 205115
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.33231520652771,
      "learning_rate": 1.7656417479498896e-05,
      "loss": 3.0123,
      "step": 205116
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.267084121704102,
      "learning_rate": 1.7655034886223795e-05,
      "loss": 2.8332,
      "step": 205117
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.74448299407959,
      "learning_rate": 1.765365234544187e-05,
      "loss": 2.8917,
      "step": 205118
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8856465816497803,
      "learning_rate": 1.7652269857153523e-05,
      "loss": 3.0567,
      "step": 205119
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.673672914505005,
      "learning_rate": 1.7650887421358917e-05,
      "loss": 2.7834,
      "step": 205120
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.481752395629883,
      "learning_rate": 1.7649505038058354e-05,
      "loss": 2.936,
      "step": 205121
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9884605407714844,
      "learning_rate": 1.76481227072521e-05,
      "loss": 2.8212,
      "step": 205122
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.081697463989258,
      "learning_rate": 1.7646740428940386e-05,
      "loss": 3.1053,
      "step": 205123
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.8785507678985596,
      "learning_rate": 1.7645358203123448e-05,
      "loss": 2.7466,
      "step": 205124
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3796980381011963,
      "learning_rate": 1.7643976029801587e-05,
      "loss": 2.6864,
      "step": 205125
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0662178993225098,
      "learning_rate": 1.7642593908975e-05,
      "loss": 2.7445,
      "step": 205126
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2112364768981934,
      "learning_rate": 1.7641211840644055e-05,
      "loss": 2.7325,
      "step": 205127
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.589247941970825,
      "learning_rate": 1.7639829824808916e-05,
      "loss": 3.0621,
      "step": 205128
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0704944133758545,
      "learning_rate": 1.763844786146985e-05,
      "loss": 3.1567,
      "step": 205129
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9077112674713135,
      "learning_rate": 1.7637065950627095e-05,
      "loss": 2.9376,
      "step": 205130
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.750671863555908,
      "learning_rate": 1.7635684092280978e-05,
      "loss": 3.095,
      "step": 205131
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.392664670944214,
      "learning_rate": 1.763430228643167e-05,
      "loss": 3.0169,
      "step": 205132
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7412960529327393,
      "learning_rate": 1.763292053307953e-05,
      "loss": 2.7963,
      "step": 205133
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.707170009613037,
      "learning_rate": 1.7631538832224768e-05,
      "loss": 2.9192,
      "step": 205134
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.4389657974243164,
      "learning_rate": 1.7630157183867643e-05,
      "loss": 2.7758,
      "step": 205135
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.135287284851074,
      "learning_rate": 1.7628775588008325e-05,
      "loss": 3.0402,
      "step": 205136
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7607223987579346,
      "learning_rate": 1.7627394044647215e-05,
      "loss": 2.9137,
      "step": 205137
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.3659653663635254,
      "learning_rate": 1.7626012553784442e-05,
      "loss": 2.8119,
      "step": 205138
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.0392560958862305,
      "learning_rate": 1.7624631115420374e-05,
      "loss": 2.945,
      "step": 205139
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3530845642089844,
      "learning_rate": 1.7623249729555212e-05,
      "loss": 2.946,
      "step": 205140
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.729508399963379,
      "learning_rate": 1.762186839618922e-05,
      "loss": 2.9985,
      "step": 205141
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.856278896331787,
      "learning_rate": 1.7620487115322634e-05,
      "loss": 2.9941,
      "step": 205142
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.783961534500122,
      "learning_rate": 1.7619105886955754e-05,
      "loss": 3.1741,
      "step": 205143
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.9422764778137207,
      "learning_rate": 1.7617724711088745e-05,
      "loss": 2.881,
      "step": 205144
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7187280654907227,
      "learning_rate": 1.7616343587722003e-05,
      "loss": 2.8657,
      "step": 205145
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.939136505126953,
      "learning_rate": 1.7614962516855702e-05,
      "loss": 3.0185,
      "step": 205146
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.8084867000579834,
      "learning_rate": 1.7613581498490136e-05,
      "loss": 3.146,
      "step": 205147
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.474863290786743,
      "learning_rate": 1.761220053262544e-05,
      "loss": 3.0328,
      "step": 205148
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.489572525024414,
      "learning_rate": 1.761081961926205e-05,
      "loss": 2.9257,
      "step": 205149
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8840954303741455,
      "learning_rate": 1.7609438758400096e-05,
      "loss": 3.0033,
      "step": 205150
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.5422492027282715,
      "learning_rate": 1.760805795003991e-05,
      "loss": 2.7393,
      "step": 205151
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.851214647293091,
      "learning_rate": 1.760667719418173e-05,
      "loss": 3.0019,
      "step": 205152
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3674416542053223,
      "learning_rate": 1.7605296490825748e-05,
      "loss": 2.8284,
      "step": 205153
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.113015651702881,
      "learning_rate": 1.7603915839972305e-05,
      "loss": 3.0695,
      "step": 205154
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.590050220489502,
      "learning_rate": 1.760253524162163e-05,
      "loss": 3.0858,
      "step": 205155
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.130478620529175,
      "learning_rate": 1.760115469577392e-05,
      "loss": 2.9774,
      "step": 205156
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5088603496551514,
      "learning_rate": 1.759977420242955e-05,
      "loss": 2.8959,
      "step": 205157
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5665085315704346,
      "learning_rate": 1.7598393761588714e-05,
      "loss": 2.8534,
      "step": 205158
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3901922702789307,
      "learning_rate": 1.759701337325161e-05,
      "loss": 3.0188,
      "step": 205159
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2227425575256348,
      "learning_rate": 1.759563303741861e-05,
      "loss": 2.6849,
      "step": 205160
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.076960325241089,
      "learning_rate": 1.7594252754089842e-05,
      "loss": 2.9738,
      "step": 205161
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5324931144714355,
      "learning_rate": 1.7592872523265712e-05,
      "loss": 2.9976,
      "step": 205162
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3235363960266113,
      "learning_rate": 1.759149234494638e-05,
      "loss": 2.9028,
      "step": 205163
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9719619750976562,
      "learning_rate": 1.759011221913211e-05,
      "loss": 2.9961,
      "step": 205164
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.73675799369812,
      "learning_rate": 1.7588732145823146e-05,
      "loss": 3.0633,
      "step": 205165
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.194411277770996,
      "learning_rate": 1.7587352125019782e-05,
      "loss": 2.8976,
      "step": 205166
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.677304744720459,
      "learning_rate": 1.758597215672225e-05,
      "loss": 3.0974,
      "step": 205167
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.9276134967803955,
      "learning_rate": 1.758459224093085e-05,
      "loss": 2.7594,
      "step": 205168
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.964601755142212,
      "learning_rate": 1.7583212377645785e-05,
      "loss": 2.8555,
      "step": 205169
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1068952083587646,
      "learning_rate": 1.758183256686735e-05,
      "loss": 2.8202,
      "step": 205170
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6439731121063232,
      "learning_rate": 1.7580452808595745e-05,
      "loss": 2.832,
      "step": 205171
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3299152851104736,
      "learning_rate": 1.757907310283131e-05,
      "loss": 3.0984,
      "step": 205172
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.557929515838623,
      "learning_rate": 1.7577693449574202e-05,
      "loss": 2.9178,
      "step": 205173
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8278238773345947,
      "learning_rate": 1.7576313848824763e-05,
      "loss": 2.5244,
      "step": 205174
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8332748413085938,
      "learning_rate": 1.7574934300583187e-05,
      "loss": 2.8812,
      "step": 205175
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9880027770996094,
      "learning_rate": 1.7573554804849878e-05,
      "loss": 2.7114,
      "step": 205176
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.862572431564331,
      "learning_rate": 1.7572175361624863e-05,
      "loss": 2.9581,
      "step": 205177
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3441579341888428,
      "learning_rate": 1.7570795970908545e-05,
      "loss": 3.1165,
      "step": 205178
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.329756259918213,
      "learning_rate": 1.7569416632701128e-05,
      "loss": 2.7154,
      "step": 205179
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.572481155395508,
      "learning_rate": 1.7568037347002906e-05,
      "loss": 2.8254,
      "step": 205180
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9632010459899902,
      "learning_rate": 1.756665811381408e-05,
      "loss": 2.964,
      "step": 205181
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0901260375976562,
      "learning_rate": 1.7565278933134984e-05,
      "loss": 2.898,
      "step": 205182
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3414664268493652,
      "learning_rate": 1.7563899804965852e-05,
      "loss": 3.2196,
      "step": 205183
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7302112579345703,
      "learning_rate": 1.7562520729306916e-05,
      "loss": 3.07,
      "step": 205184
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.583189487457275,
      "learning_rate": 1.7561141706158377e-05,
      "loss": 2.6278,
      "step": 205185
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7955169677734375,
      "learning_rate": 1.7559762735520633e-05,
      "loss": 2.7975,
      "step": 205186
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2157034873962402,
      "learning_rate": 1.7558383817393785e-05,
      "loss": 2.9368,
      "step": 205187
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.5145020484924316,
      "learning_rate": 1.755700495177823e-05,
      "loss": 2.9208,
      "step": 205188
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.4981071949005127,
      "learning_rate": 1.755562613867414e-05,
      "loss": 2.9201,
      "step": 205189
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9667370319366455,
      "learning_rate": 1.755424737808181e-05,
      "loss": 3.0064,
      "step": 205190
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9023020267486572,
      "learning_rate": 1.7552868670001406e-05,
      "loss": 2.9392,
      "step": 205191
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.9189696311950684,
      "learning_rate": 1.7551490014433335e-05,
      "loss": 2.8517,
      "step": 205192
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.324703216552734,
      "learning_rate": 1.755011141137769e-05,
      "loss": 3.1147,
      "step": 205193
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0677125453948975,
      "learning_rate": 1.754873286083487e-05,
      "loss": 3.1165,
      "step": 205194
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1659393310546875,
      "learning_rate": 1.7547354362805077e-05,
      "loss": 2.8715,
      "step": 205195
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.92118501663208,
      "learning_rate": 1.754597591728858e-05,
      "loss": 2.8381,
      "step": 205196
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8625190258026123,
      "learning_rate": 1.754459752428554e-05,
      "loss": 2.8455,
      "step": 205197
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.43255615234375,
      "learning_rate": 1.7543219183796365e-05,
      "loss": 3.0275,
      "step": 205198
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.668980836868286,
      "learning_rate": 1.754184089582118e-05,
      "loss": 2.7538,
      "step": 205199
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1390764713287354,
      "learning_rate": 1.7540462660360355e-05,
      "loss": 3.3638,
      "step": 205200
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.109910488128662,
      "learning_rate": 1.7539084477414057e-05,
      "loss": 2.8005,
      "step": 205201
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7199511528015137,
      "learning_rate": 1.7537706346982615e-05,
      "loss": 2.9243,
      "step": 205202
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.975739479064941,
      "learning_rate": 1.753632826906617e-05,
      "loss": 2.7891,
      "step": 205203
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3219587802886963,
      "learning_rate": 1.7534950243665115e-05,
      "loss": 3.2993,
      "step": 205204
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7967448234558105,
      "learning_rate": 1.7533572270779582e-05,
      "loss": 3.0217,
      "step": 205205
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.77207612991333,
      "learning_rate": 1.7532194350409946e-05,
      "loss": 2.8444,
      "step": 205206
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.846926689147949,
      "learning_rate": 1.753081648255643e-05,
      "loss": 2.7784,
      "step": 205207
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.4093542098999023,
      "learning_rate": 1.7529438667219243e-05,
      "loss": 3.0683,
      "step": 205208
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7203145027160645,
      "learning_rate": 1.7528060904398612e-05,
      "loss": 2.9703,
      "step": 205209
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.533236026763916,
      "learning_rate": 1.7526683194094902e-05,
      "loss": 2.9517,
      "step": 205210
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8118162155151367,
      "learning_rate": 1.7525305536308286e-05,
      "loss": 2.576,
      "step": 205211
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.832115650177002,
      "learning_rate": 1.752392793103906e-05,
      "loss": 2.892,
      "step": 205212
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.102675437927246,
      "learning_rate": 1.7522550378287493e-05,
      "loss": 3.0394,
      "step": 205213
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.674076795578003,
      "learning_rate": 1.7521172878053812e-05,
      "loss": 2.9358,
      "step": 205214
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.180903434753418,
      "learning_rate": 1.7519795430338224e-05,
      "loss": 3.0544,
      "step": 205215
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.197037220001221,
      "learning_rate": 1.751841803514109e-05,
      "loss": 2.7726,
      "step": 205216
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1535696983337402,
      "learning_rate": 1.7517040692462548e-05,
      "loss": 2.6349,
      "step": 205217
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1471002101898193,
      "learning_rate": 1.7515663402302992e-05,
      "loss": 2.9099,
      "step": 205218
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.779423236846924,
      "learning_rate": 1.7514286164662593e-05,
      "loss": 3.043,
      "step": 205219
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7923619747161865,
      "learning_rate": 1.7512908979541618e-05,
      "loss": 2.7187,
      "step": 205220
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.872931957244873,
      "learning_rate": 1.7511531846940297e-05,
      "loss": 3.1458,
      "step": 205221
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.039419174194336,
      "learning_rate": 1.7510154766858963e-05,
      "loss": 2.7435,
      "step": 205222
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9349119663238525,
      "learning_rate": 1.7508777739297754e-05,
      "loss": 2.7717,
      "step": 205223
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.544982671737671,
      "learning_rate": 1.7507400764257064e-05,
      "loss": 3.0846,
      "step": 205224
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.141146421432495,
      "learning_rate": 1.750602384173706e-05,
      "loss": 2.9317,
      "step": 205225
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.793919801712036,
      "learning_rate": 1.7504646971738045e-05,
      "loss": 2.9998,
      "step": 205226
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8402373790740967,
      "learning_rate": 1.7503270154260184e-05,
      "loss": 2.9031,
      "step": 205227
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.806330680847168,
      "learning_rate": 1.7501893389303844e-05,
      "loss": 2.8994,
      "step": 205228
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.802543878555298,
      "learning_rate": 1.750051667686919e-05,
      "loss": 2.7781,
      "step": 205229
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.7467617988586426,
      "learning_rate": 1.749914001695656e-05,
      "loss": 2.9542,
      "step": 205230
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.92370343208313,
      "learning_rate": 1.749776340956618e-05,
      "loss": 2.9062,
      "step": 205231
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9806344509124756,
      "learning_rate": 1.7496386854698286e-05,
      "loss": 2.9991,
      "step": 205232
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.421903371810913,
      "learning_rate": 1.7495010352353145e-05,
      "loss": 2.9799,
      "step": 205233
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.6287403106689453,
      "learning_rate": 1.7493633902531022e-05,
      "loss": 2.9244,
      "step": 205234
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.779482126235962,
      "learning_rate": 1.749225750523212e-05,
      "loss": 2.8652,
      "step": 205235
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0831704139709473,
      "learning_rate": 1.7490881160456806e-05,
      "loss": 2.8939,
      "step": 205236
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.9847445487976074,
      "learning_rate": 1.748950486820524e-05,
      "loss": 2.8969,
      "step": 205237
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.513845682144165,
      "learning_rate": 1.7488128628477694e-05,
      "loss": 2.9548,
      "step": 205238
    },
    {
      "epoch": 2.67,
      "grad_norm": 5.8152384757995605,
      "learning_rate": 1.7486752441274464e-05,
      "loss": 2.7498,
      "step": 205239
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.881631851196289,
      "learning_rate": 1.7485376306595787e-05,
      "loss": 3.0255,
      "step": 205240
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3306567668914795,
      "learning_rate": 1.7484000224441865e-05,
      "loss": 3.0605,
      "step": 205241
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9529778957366943,
      "learning_rate": 1.748262419481302e-05,
      "loss": 2.914,
      "step": 205242
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.319955587387085,
      "learning_rate": 1.748124821770953e-05,
      "loss": 3.0449,
      "step": 205243
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.202380418777466,
      "learning_rate": 1.7479872293131525e-05,
      "loss": 2.9831,
      "step": 205244
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9049265384674072,
      "learning_rate": 1.7478496421079436e-05,
      "loss": 2.7715,
      "step": 205245
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2888240814208984,
      "learning_rate": 1.747712060155333e-05,
      "loss": 3.04,
      "step": 205246
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0217936038970947,
      "learning_rate": 1.747574483455364e-05,
      "loss": 2.913,
      "step": 205247
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5618534088134766,
      "learning_rate": 1.7474369120080532e-05,
      "loss": 2.8007,
      "step": 205248
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0629031658172607,
      "learning_rate": 1.7472993458134244e-05,
      "loss": 2.8337,
      "step": 205249
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.687208414077759,
      "learning_rate": 1.747161784871507e-05,
      "loss": 3.0091,
      "step": 205250
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3411571979522705,
      "learning_rate": 1.7470242291823276e-05,
      "loss": 2.7502,
      "step": 205251
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3077828884124756,
      "learning_rate": 1.746886678745907e-05,
      "loss": 2.8666,
      "step": 205252
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1343655586242676,
      "learning_rate": 1.746749133562274e-05,
      "loss": 2.868,
      "step": 205253
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9355320930480957,
      "learning_rate": 1.7466115936314563e-05,
      "loss": 2.9505,
      "step": 205254
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5479094982147217,
      "learning_rate": 1.746474058953473e-05,
      "loss": 2.928,
      "step": 205255
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6566474437713623,
      "learning_rate": 1.746336529528355e-05,
      "loss": 2.8386,
      "step": 205256
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1793439388275146,
      "learning_rate": 1.746199005356128e-05,
      "loss": 2.8703,
      "step": 205257
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8895163536071777,
      "learning_rate": 1.746061486436813e-05,
      "loss": 3.0603,
      "step": 205258
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.471540927886963,
      "learning_rate": 1.745923972770442e-05,
      "loss": 2.9791,
      "step": 205259
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.173313617706299,
      "learning_rate": 1.7457864643570296e-05,
      "loss": 2.93,
      "step": 205260
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.235766410827637,
      "learning_rate": 1.7456489611966184e-05,
      "loss": 2.8397,
      "step": 205261
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.8801188468933105,
      "learning_rate": 1.745511463289222e-05,
      "loss": 2.8918,
      "step": 205262
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8767902851104736,
      "learning_rate": 1.7453739706348668e-05,
      "loss": 2.9474,
      "step": 205263
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.839505195617676,
      "learning_rate": 1.7452364832335764e-05,
      "loss": 2.7946,
      "step": 205264
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7666468620300293,
      "learning_rate": 1.7450990010853872e-05,
      "loss": 2.9793,
      "step": 205265
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3566131591796875,
      "learning_rate": 1.7449615241903093e-05,
      "loss": 3.212,
      "step": 205266
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2729084491729736,
      "learning_rate": 1.7448240525483824e-05,
      "loss": 2.9452,
      "step": 205267
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7875874042510986,
      "learning_rate": 1.744686586159627e-05,
      "loss": 2.7385,
      "step": 205268
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.4790713787078857,
      "learning_rate": 1.744549125024066e-05,
      "loss": 3.1591,
      "step": 205269
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3478691577911377,
      "learning_rate": 1.744411669141723e-05,
      "loss": 2.8006,
      "step": 205270
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3545384407043457,
      "learning_rate": 1.744274218512631e-05,
      "loss": 2.8121,
      "step": 205271
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.697282552719116,
      "learning_rate": 1.7441367731368073e-05,
      "loss": 2.7295,
      "step": 205272
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.011159896850586,
      "learning_rate": 1.7439993330142875e-05,
      "loss": 2.8369,
      "step": 205273
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.26422119140625,
      "learning_rate": 1.743861898145089e-05,
      "loss": 2.8869,
      "step": 205274
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8927855491638184,
      "learning_rate": 1.7437244685292416e-05,
      "loss": 2.3453,
      "step": 205275
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9369680881500244,
      "learning_rate": 1.7435870441667653e-05,
      "loss": 2.9056,
      "step": 205276
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9589593410491943,
      "learning_rate": 1.7434496250576967e-05,
      "loss": 2.7399,
      "step": 205277
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8211472034454346,
      "learning_rate": 1.7433122112020457e-05,
      "loss": 2.9714,
      "step": 205278
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1645898818969727,
      "learning_rate": 1.7431748025998527e-05,
      "loss": 2.7873,
      "step": 205279
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.4144089221954346,
      "learning_rate": 1.7430373992511338e-05,
      "loss": 2.9579,
      "step": 205280
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.970742702484131,
      "learning_rate": 1.7429000011559223e-05,
      "loss": 2.9673,
      "step": 205281
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.943146228790283,
      "learning_rate": 1.742762608314232e-05,
      "loss": 2.8207,
      "step": 205282
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.920503854751587,
      "learning_rate": 1.7426252207260995e-05,
      "loss": 2.7733,
      "step": 205283
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8051154613494873,
      "learning_rate": 1.742487838391544e-05,
      "loss": 2.943,
      "step": 205284
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.502049446105957,
      "learning_rate": 1.7423504613105965e-05,
      "loss": 2.9982,
      "step": 205285
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.4212729930877686,
      "learning_rate": 1.7422130894832798e-05,
      "loss": 2.9916,
      "step": 205286
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.567837715148926,
      "learning_rate": 1.742075722909617e-05,
      "loss": 3.2351,
      "step": 205287
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.132368564605713,
      "learning_rate": 1.7419383615896353e-05,
      "loss": 2.9815,
      "step": 205288
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0777766704559326,
      "learning_rate": 1.741801005523361e-05,
      "loss": 2.7817,
      "step": 205289
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0152368545532227,
      "learning_rate": 1.7416636547108176e-05,
      "loss": 2.9348,
      "step": 205290
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.041798114776611,
      "learning_rate": 1.741526309152035e-05,
      "loss": 2.7504,
      "step": 205291
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1451876163482666,
      "learning_rate": 1.741388968847036e-05,
      "loss": 2.9199,
      "step": 205292
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7530500888824463,
      "learning_rate": 1.7412516337958482e-05,
      "loss": 2.9962,
      "step": 205293
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1928186416625977,
      "learning_rate": 1.741114303998491e-05,
      "loss": 3.0435,
      "step": 205294
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.687955617904663,
      "learning_rate": 1.740976979454998e-05,
      "loss": 3.1201,
      "step": 205295
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.9908900260925293,
      "learning_rate": 1.7408396601653852e-05,
      "loss": 3.0857,
      "step": 205296
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8435616493225098,
      "learning_rate": 1.7407023461296865e-05,
      "loss": 2.7768,
      "step": 205297
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.889291286468506,
      "learning_rate": 1.7405650373479285e-05,
      "loss": 3.0846,
      "step": 205298
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.907026529312134,
      "learning_rate": 1.7404277338201312e-05,
      "loss": 2.7584,
      "step": 205299
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.154909133911133,
      "learning_rate": 1.7402904355463177e-05,
      "loss": 2.781,
      "step": 205300
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.102607250213623,
      "learning_rate": 1.7401531425265215e-05,
      "loss": 2.77,
      "step": 205301
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6632628440856934,
      "learning_rate": 1.740015854760759e-05,
      "loss": 2.8767,
      "step": 205302
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.91914439201355,
      "learning_rate": 1.739878572249067e-05,
      "loss": 2.8028,
      "step": 205303
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0839264392852783,
      "learning_rate": 1.7397412949914657e-05,
      "loss": 2.9051,
      "step": 205304
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.824596881866455,
      "learning_rate": 1.7396040229879782e-05,
      "loss": 2.6379,
      "step": 205305
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9248788356781006,
      "learning_rate": 1.7394667562386276e-05,
      "loss": 3.0464,
      "step": 205306
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5692520141601562,
      "learning_rate": 1.739329494743448e-05,
      "loss": 3.1865,
      "step": 205307
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.833527088165283,
      "learning_rate": 1.739192238502458e-05,
      "loss": 3.0262,
      "step": 205308
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.526959180831909,
      "learning_rate": 1.739054987515689e-05,
      "loss": 2.9352,
      "step": 205309
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.317925453186035,
      "learning_rate": 1.7389177417831602e-05,
      "loss": 2.9892,
      "step": 205310
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9025042057037354,
      "learning_rate": 1.738780501304905e-05,
      "loss": 2.77,
      "step": 205311
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0322530269622803,
      "learning_rate": 1.738643266080937e-05,
      "loss": 2.793,
      "step": 205312
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.496135950088501,
      "learning_rate": 1.7385060361112925e-05,
      "loss": 2.7127,
      "step": 205313
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.4582486152648926,
      "learning_rate": 1.7383688113959882e-05,
      "loss": 2.9867,
      "step": 205314
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.95546293258667,
      "learning_rate": 1.738231591935061e-05,
      "loss": 2.96,
      "step": 205315
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8383188247680664,
      "learning_rate": 1.7380943777285238e-05,
      "loss": 3.0143,
      "step": 205316
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0520596504211426,
      "learning_rate": 1.7379571687764203e-05,
      "loss": 2.9249,
      "step": 205317
    },
    {
      "epoch": 2.67,
      "grad_norm": 5.551529407501221,
      "learning_rate": 1.737819965078754e-05,
      "loss": 3.0928,
      "step": 205318
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.986975908279419,
      "learning_rate": 1.7376827666355642e-05,
      "loss": 2.7901,
      "step": 205319
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.045161247253418,
      "learning_rate": 1.737545573446868e-05,
      "loss": 3.0964,
      "step": 205320
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.963376045227051,
      "learning_rate": 1.7374083855126984e-05,
      "loss": 2.9159,
      "step": 205321
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.022243022918701,
      "learning_rate": 1.737271202833076e-05,
      "loss": 2.8843,
      "step": 205322
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.369067430496216,
      "learning_rate": 1.7371340254080333e-05,
      "loss": 2.8938,
      "step": 205323
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.795055866241455,
      "learning_rate": 1.7369968532375878e-05,
      "loss": 2.9386,
      "step": 205324
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.390366315841675,
      "learning_rate": 1.736859686321772e-05,
      "loss": 2.9885,
      "step": 205325
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9083971977233887,
      "learning_rate": 1.7367225246606e-05,
      "loss": 2.7622,
      "step": 205326
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.942897081375122,
      "learning_rate": 1.7365853682541108e-05,
      "loss": 2.8525,
      "step": 205327
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.801448345184326,
      "learning_rate": 1.7364482171023186e-05,
      "loss": 2.8081,
      "step": 205328
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.174791097640991,
      "learning_rate": 1.7363110712052596e-05,
      "loss": 2.8577,
      "step": 205329
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6978919506073,
      "learning_rate": 1.7361739305629542e-05,
      "loss": 2.8905,
      "step": 205330
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.112117290496826,
      "learning_rate": 1.736036795175425e-05,
      "loss": 2.9035,
      "step": 205331
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0371580123901367,
      "learning_rate": 1.735899665042699e-05,
      "loss": 3.01,
      "step": 205332
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0784060955047607,
      "learning_rate": 1.7357625401648034e-05,
      "loss": 2.9859,
      "step": 205333
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0641286373138428,
      "learning_rate": 1.735625420541761e-05,
      "loss": 2.9477,
      "step": 205334
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.885852813720703,
      "learning_rate": 1.7354883061736046e-05,
      "loss": 3.1534,
      "step": 205335
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.812682867050171,
      "learning_rate": 1.735351197060355e-05,
      "loss": 2.8079,
      "step": 205336
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0408358573913574,
      "learning_rate": 1.7352140932020285e-05,
      "loss": 2.8862,
      "step": 205337
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0867345333099365,
      "learning_rate": 1.7350769945986685e-05,
      "loss": 3.0058,
      "step": 205338
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.202683925628662,
      "learning_rate": 1.7349399012502885e-05,
      "loss": 2.9121,
      "step": 205339
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.5647106170654297,
      "learning_rate": 1.7348028131569113e-05,
      "loss": 3.1965,
      "step": 205340
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9184553623199463,
      "learning_rate": 1.734665730318574e-05,
      "loss": 2.8875,
      "step": 205341
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2173125743865967,
      "learning_rate": 1.7345286527352965e-05,
      "loss": 2.9355,
      "step": 205342
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.852459669113159,
      "learning_rate": 1.7343915804070985e-05,
      "loss": 2.9532,
      "step": 205343
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.936582565307617,
      "learning_rate": 1.734254513334017e-05,
      "loss": 3.1506,
      "step": 205344
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.850139617919922,
      "learning_rate": 1.734117451516065e-05,
      "loss": 2.9821,
      "step": 205345
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7369680404663086,
      "learning_rate": 1.733980394953276e-05,
      "loss": 2.8598,
      "step": 205346
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6478185653686523,
      "learning_rate": 1.7338433436456766e-05,
      "loss": 2.7023,
      "step": 205347
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7941980361938477,
      "learning_rate": 1.7337062975932903e-05,
      "loss": 2.7733,
      "step": 205348
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0446865558624268,
      "learning_rate": 1.7335692567961333e-05,
      "loss": 2.9935,
      "step": 205349
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.63055157661438,
      "learning_rate": 1.7334322212542462e-05,
      "loss": 2.8611,
      "step": 205350
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.296886682510376,
      "learning_rate": 1.733295190967642e-05,
      "loss": 2.8939,
      "step": 205351
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7858641147613525,
      "learning_rate": 1.733158165936357e-05,
      "loss": 3.0982,
      "step": 205352
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.608149766921997,
      "learning_rate": 1.7330211461604115e-05,
      "loss": 2.8276,
      "step": 205353
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3143460750579834,
      "learning_rate": 1.7328841316398324e-05,
      "loss": 2.855,
      "step": 205354
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.092575550079346,
      "learning_rate": 1.7327471223746393e-05,
      "loss": 2.7847,
      "step": 205355
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.6462135314941406,
      "learning_rate": 1.7326101183648655e-05,
      "loss": 2.7233,
      "step": 205356
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0769639015197754,
      "learning_rate": 1.732473119610528e-05,
      "loss": 2.8373,
      "step": 205357
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.456894636154175,
      "learning_rate": 1.7323361261116596e-05,
      "loss": 2.9436,
      "step": 205358
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.334116220474243,
      "learning_rate": 1.7321991378682877e-05,
      "loss": 2.9871,
      "step": 205359
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8727011680603027,
      "learning_rate": 1.7320621548804314e-05,
      "loss": 2.8497,
      "step": 205360
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.685307264328003,
      "learning_rate": 1.7319251771481147e-05,
      "loss": 2.9689,
      "step": 205361
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2575297355651855,
      "learning_rate": 1.7317882046713704e-05,
      "loss": 2.8048,
      "step": 205362
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.992607355117798,
      "learning_rate": 1.7316512374502155e-05,
      "loss": 3.0124,
      "step": 205363
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.0340704917907715,
      "learning_rate": 1.7315142754846866e-05,
      "loss": 3.0064,
      "step": 205364
    },
    {
      "epoch": 2.67,
      "grad_norm": 5.008622646331787,
      "learning_rate": 1.7313773187748003e-05,
      "loss": 3.1918,
      "step": 205365
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.231931686401367,
      "learning_rate": 1.7312403673205864e-05,
      "loss": 2.91,
      "step": 205366
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.179864406585693,
      "learning_rate": 1.7311034211220653e-05,
      "loss": 2.7417,
      "step": 205367
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.6214730739593506,
      "learning_rate": 1.7309664801792666e-05,
      "loss": 2.7519,
      "step": 205368
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.995328664779663,
      "learning_rate": 1.730829544492214e-05,
      "loss": 2.8376,
      "step": 205369
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.6771583557128906,
      "learning_rate": 1.7306926140609366e-05,
      "loss": 3.2093,
      "step": 205370
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2312819957733154,
      "learning_rate": 1.7305556888854554e-05,
      "loss": 3.1408,
      "step": 205371
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.912043809890747,
      "learning_rate": 1.7304187689658e-05,
      "loss": 3.0573,
      "step": 205372
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.3466715812683105,
      "learning_rate": 1.730281854301987e-05,
      "loss": 2.7858,
      "step": 205373
    },
    {
      "epoch": 2.67,
      "grad_norm": 5.2212395668029785,
      "learning_rate": 1.7301449448940565e-05,
      "loss": 2.7741,
      "step": 205374
    },
    {
      "epoch": 2.67,
      "grad_norm": 5.565932750701904,
      "learning_rate": 1.730008040742018e-05,
      "loss": 3.0032,
      "step": 205375
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0855250358581543,
      "learning_rate": 1.7298711418459088e-05,
      "loss": 2.8286,
      "step": 205376
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.646909236907959,
      "learning_rate": 1.729734248205752e-05,
      "loss": 2.9496,
      "step": 205377
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.326716184616089,
      "learning_rate": 1.7295973598215672e-05,
      "loss": 3.0439,
      "step": 205378
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8503220081329346,
      "learning_rate": 1.7294604766933852e-05,
      "loss": 2.9477,
      "step": 205379
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.4554309844970703,
      "learning_rate": 1.7293235988212318e-05,
      "loss": 2.814,
      "step": 205380
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.2201027870178223,
      "learning_rate": 1.729186726205124e-05,
      "loss": 2.9584,
      "step": 205381
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.700525999069214,
      "learning_rate": 1.7290498588451018e-05,
      "loss": 3.0199,
      "step": 205382
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.485116481781006,
      "learning_rate": 1.7289129967411785e-05,
      "loss": 2.6589,
      "step": 205383
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0977537631988525,
      "learning_rate": 1.7287761398933908e-05,
      "loss": 2.8417,
      "step": 205384
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.253549575805664,
      "learning_rate": 1.7286392883017484e-05,
      "loss": 2.9205,
      "step": 205385
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.4526591300964355,
      "learning_rate": 1.7285024419662918e-05,
      "loss": 2.8403,
      "step": 205386
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0731115341186523,
      "learning_rate": 1.728365600887037e-05,
      "loss": 2.7657,
      "step": 205387
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7492892742156982,
      "learning_rate": 1.728228765064015e-05,
      "loss": 2.7869,
      "step": 205388
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.560835838317871,
      "learning_rate": 1.7280919344972445e-05,
      "loss": 3.0055,
      "step": 205389
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6456525325775146,
      "learning_rate": 1.7279551091867627e-05,
      "loss": 2.9557,
      "step": 205390
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.6972033977508545,
      "learning_rate": 1.7278182891325832e-05,
      "loss": 2.9552,
      "step": 205391
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.658073902130127,
      "learning_rate": 1.7276814743347355e-05,
      "loss": 3.0371,
      "step": 205392
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.992992877960205,
      "learning_rate": 1.7275446647932435e-05,
      "loss": 2.7279,
      "step": 205393
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.967991352081299,
      "learning_rate": 1.72740786050814e-05,
      "loss": 2.6591,
      "step": 205394
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9100277423858643,
      "learning_rate": 1.7272710614794384e-05,
      "loss": 2.8522,
      "step": 205395
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.836381435394287,
      "learning_rate": 1.727134267707182e-05,
      "loss": 3.1092,
      "step": 205396
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.917773962020874,
      "learning_rate": 1.7269974791913743e-05,
      "loss": 2.7544,
      "step": 205397
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.567488193511963,
      "learning_rate": 1.7268606959320587e-05,
      "loss": 2.9472,
      "step": 205398
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9810800552368164,
      "learning_rate": 1.7267239179292446e-05,
      "loss": 3.0293,
      "step": 205399
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.8311147689819336,
      "learning_rate": 1.7265871451829726e-05,
      "loss": 2.8158,
      "step": 205400
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.362943649291992,
      "learning_rate": 1.726450377693259e-05,
      "loss": 2.8918,
      "step": 205401
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.124356985092163,
      "learning_rate": 1.7263136154601408e-05,
      "loss": 2.9407,
      "step": 205402
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.4052035808563232,
      "learning_rate": 1.7261768584836243e-05,
      "loss": 2.9756,
      "step": 205403
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.338345766067505,
      "learning_rate": 1.7260401067637496e-05,
      "loss": 2.8386,
      "step": 205404
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.797839641571045,
      "learning_rate": 1.7259033603005336e-05,
      "loss": 2.9548,
      "step": 205405
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1533050537109375,
      "learning_rate": 1.725766619094009e-05,
      "loss": 2.6753,
      "step": 205406
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0701794624328613,
      "learning_rate": 1.725629883144196e-05,
      "loss": 3.1074,
      "step": 205407
    },
    {
      "epoch": 2.67,
      "grad_norm": 4.846527099609375,
      "learning_rate": 1.7254931524511252e-05,
      "loss": 2.9519,
      "step": 205408
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.619936466217041,
      "learning_rate": 1.725356427014819e-05,
      "loss": 2.9077,
      "step": 205409
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1512675285339355,
      "learning_rate": 1.7252197068353013e-05,
      "loss": 2.9987,
      "step": 205410
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.121666669845581,
      "learning_rate": 1.7250829919125985e-05,
      "loss": 2.7645,
      "step": 205411
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.9751458168029785,
      "learning_rate": 1.7249462822467373e-05,
      "loss": 2.8335,
      "step": 205412
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.1855108737945557,
      "learning_rate": 1.7248095778377415e-05,
      "loss": 3.0038,
      "step": 205413
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.0263631343841553,
      "learning_rate": 1.7246728786856368e-05,
      "loss": 2.8826,
      "step": 205414
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.714776039123535,
      "learning_rate": 1.7245361847904537e-05,
      "loss": 3.1034,
      "step": 205415
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.7879374027252197,
      "learning_rate": 1.724399496152209e-05,
      "loss": 2.9732,
      "step": 205416
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.83927059173584,
      "learning_rate": 1.7242628127709322e-05,
      "loss": 2.8072,
      "step": 205417
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.834275007247925,
      "learning_rate": 1.7241261346466505e-05,
      "loss": 2.8954,
      "step": 205418
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.977208375930786,
      "learning_rate": 1.7239894617793835e-05,
      "loss": 2.78,
      "step": 205419
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7787036895751953,
      "learning_rate": 1.723852794169165e-05,
      "loss": 2.7067,
      "step": 205420
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.982903480529785,
      "learning_rate": 1.7237161318160142e-05,
      "loss": 2.9701,
      "step": 205421
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7862131595611572,
      "learning_rate": 1.7235794747199583e-05,
      "loss": 2.8707,
      "step": 205422
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9577577114105225,
      "learning_rate": 1.723442822881024e-05,
      "loss": 2.9701,
      "step": 205423
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.7068753242492676,
      "learning_rate": 1.7233061762992374e-05,
      "loss": 2.8471,
      "step": 205424
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.938445806503296,
      "learning_rate": 1.7231695349746154e-05,
      "loss": 2.734,
      "step": 205425
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.6128487586975098,
      "learning_rate": 1.723032898907195e-05,
      "loss": 3.1078,
      "step": 205426
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.163942813873291,
      "learning_rate": 1.722896268096996e-05,
      "loss": 2.7931,
      "step": 205427
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.5013699531555176,
      "learning_rate": 1.7227596425440414e-05,
      "loss": 2.8892,
      "step": 205428
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.225809335708618,
      "learning_rate": 1.7226230222483615e-05,
      "loss": 2.7837,
      "step": 205429
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.7897541522979736,
      "learning_rate": 1.722486407209983e-05,
      "loss": 2.9212,
      "step": 205430
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.7599198818206787,
      "learning_rate": 1.7223497974289223e-05,
      "loss": 2.9412,
      "step": 205431
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.894561290740967,
      "learning_rate": 1.722213192905213e-05,
      "loss": 3.3222,
      "step": 205432
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.928091287612915,
      "learning_rate": 1.7220765936388815e-05,
      "loss": 2.6571,
      "step": 205433
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9908370971679688,
      "learning_rate": 1.7219399996299445e-05,
      "loss": 3.0313,
      "step": 205434
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.454153299331665,
      "learning_rate": 1.721803410878435e-05,
      "loss": 2.9548,
      "step": 205435
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.952545642852783,
      "learning_rate": 1.7216668273843736e-05,
      "loss": 2.839,
      "step": 205436
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.882694721221924,
      "learning_rate": 1.72153024914779e-05,
      "loss": 2.9078,
      "step": 205437
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.194810152053833,
      "learning_rate": 1.7213936761687107e-05,
      "loss": 3.1009,
      "step": 205438
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.062126398086548,
      "learning_rate": 1.721257108447156e-05,
      "loss": 3.0267,
      "step": 205439
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.7796521186828613,
      "learning_rate": 1.721120545983149e-05,
      "loss": 3.0386,
      "step": 205440
    },
    {
      "epoch": 2.67,
      "grad_norm": 2.9385972023010254,
      "learning_rate": 1.7209839887767262e-05,
      "loss": 3.2161,
      "step": 205441
    },
    {
      "epoch": 2.67,
      "grad_norm": 3.685373544692993,
      "learning_rate": 1.7208474368279014e-05,
      "loss": 3.0651,
      "step": 205442
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.900700092315674,
      "learning_rate": 1.7207108901367074e-05,
      "loss": 3.0321,
      "step": 205443
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.706834077835083,
      "learning_rate": 1.7205743487031674e-05,
      "loss": 2.8753,
      "step": 205444
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4752697944641113,
      "learning_rate": 1.7204378125273087e-05,
      "loss": 3.0465,
      "step": 205445
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.217372179031372,
      "learning_rate": 1.7203012816091478e-05,
      "loss": 2.9437,
      "step": 205446
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.631340503692627,
      "learning_rate": 1.720164755948721e-05,
      "loss": 2.8731,
      "step": 205447
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6978068351745605,
      "learning_rate": 1.720028235546045e-05,
      "loss": 2.9182,
      "step": 205448
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.826453447341919,
      "learning_rate": 1.7198917204011564e-05,
      "loss": 3.0542,
      "step": 205449
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.842677354812622,
      "learning_rate": 1.7197552105140656e-05,
      "loss": 2.7405,
      "step": 205450
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.588299036026001,
      "learning_rate": 1.7196187058848188e-05,
      "loss": 2.7635,
      "step": 205451
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9507248401641846,
      "learning_rate": 1.7194822065134195e-05,
      "loss": 2.768,
      "step": 205452
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.639615058898926,
      "learning_rate": 1.7193457123999043e-05,
      "loss": 2.9898,
      "step": 205453
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9218192100524902,
      "learning_rate": 1.7192092235442934e-05,
      "loss": 2.8767,
      "step": 205454
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8441274166107178,
      "learning_rate": 1.7190727399466198e-05,
      "loss": 3.0136,
      "step": 205455
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.196305513381958,
      "learning_rate": 1.7189362616069002e-05,
      "loss": 2.8353,
      "step": 205456
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8453590869903564,
      "learning_rate": 1.7187997885251715e-05,
      "loss": 2.7065,
      "step": 205457
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9613113403320312,
      "learning_rate": 1.718663320701443e-05,
      "loss": 3.0238,
      "step": 205458
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.887852430343628,
      "learning_rate": 1.718526858135756e-05,
      "loss": 2.9101,
      "step": 205459
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1968414783477783,
      "learning_rate": 1.7183904008281225e-05,
      "loss": 2.7568,
      "step": 205460
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.5773890018463135,
      "learning_rate": 1.7182539487785763e-05,
      "loss": 3.0395,
      "step": 205461
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.51582407951355,
      "learning_rate": 1.7181175019871406e-05,
      "loss": 2.8631,
      "step": 205462
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.446058750152588,
      "learning_rate": 1.7179810604538458e-05,
      "loss": 2.9618,
      "step": 205463
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9074716567993164,
      "learning_rate": 1.7178446241787046e-05,
      "loss": 2.6442,
      "step": 205464
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6043829917907715,
      "learning_rate": 1.7177081931617542e-05,
      "loss": 3.1475,
      "step": 205465
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2176918983459473,
      "learning_rate": 1.7175717674030143e-05,
      "loss": 3.2699,
      "step": 205466
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.7310006618499756,
      "learning_rate": 1.7174353469025115e-05,
      "loss": 2.5735,
      "step": 205467
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.663602352142334,
      "learning_rate": 1.717298931660269e-05,
      "loss": 2.8129,
      "step": 205468
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7140116691589355,
      "learning_rate": 1.717162521676324e-05,
      "loss": 2.8792,
      "step": 205469
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7892653942108154,
      "learning_rate": 1.7170261169506828e-05,
      "loss": 2.9824,
      "step": 205470
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1565732955932617,
      "learning_rate": 1.716889717483385e-05,
      "loss": 3.0163,
      "step": 205471
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6172118186950684,
      "learning_rate": 1.716753323274448e-05,
      "loss": 2.6793,
      "step": 205472
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.625911235809326,
      "learning_rate": 1.716616934323901e-05,
      "loss": 2.9791,
      "step": 205473
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.847329616546631,
      "learning_rate": 1.716480550631768e-05,
      "loss": 2.9566,
      "step": 205474
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.581332206726074,
      "learning_rate": 1.716344172198082e-05,
      "loss": 3.1243,
      "step": 205475
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.840442180633545,
      "learning_rate": 1.716207799022853e-05,
      "loss": 2.8924,
      "step": 205476
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.680833339691162,
      "learning_rate": 1.7160714311061206e-05,
      "loss": 3.0517,
      "step": 205477
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2500529289245605,
      "learning_rate": 1.715935068447899e-05,
      "loss": 2.7768,
      "step": 205478
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2866172790527344,
      "learning_rate": 1.715798711048224e-05,
      "loss": 2.9288,
      "step": 205479
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.747796058654785,
      "learning_rate": 1.7156623589071127e-05,
      "loss": 2.8656,
      "step": 205480
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7117795944213867,
      "learning_rate": 1.7155260120245985e-05,
      "loss": 2.9399,
      "step": 205481
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8247640132904053,
      "learning_rate": 1.7153896704006976e-05,
      "loss": 3.0042,
      "step": 205482
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.889164686203003,
      "learning_rate": 1.7152533340354402e-05,
      "loss": 2.7892,
      "step": 205483
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.072603702545166,
      "learning_rate": 1.7151170029288496e-05,
      "loss": 2.646,
      "step": 205484
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.3034005165100098,
      "learning_rate": 1.714980677080956e-05,
      "loss": 2.911,
      "step": 205485
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.709014654159546,
      "learning_rate": 1.7148443564917756e-05,
      "loss": 2.8159,
      "step": 205486
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.3590407371520996,
      "learning_rate": 1.7147080411613522e-05,
      "loss": 2.6959,
      "step": 205487
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.9727988243103027,
      "learning_rate": 1.7145717310896857e-05,
      "loss": 3.106,
      "step": 205488
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2098989486694336,
      "learning_rate": 1.7144354262768223e-05,
      "loss": 3.023,
      "step": 205489
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1671018600463867,
      "learning_rate": 1.7142991267227725e-05,
      "loss": 2.835,
      "step": 205490
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2981903553009033,
      "learning_rate": 1.7141628324275724e-05,
      "loss": 2.9158,
      "step": 205491
    },
    {
      "epoch": 2.68,
      "grad_norm": 5.2752275466918945,
      "learning_rate": 1.7140265433912394e-05,
      "loss": 2.9065,
      "step": 205492
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.81482195854187,
      "learning_rate": 1.7138902596138127e-05,
      "loss": 2.671,
      "step": 205493
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8613836765289307,
      "learning_rate": 1.7137539810952962e-05,
      "loss": 2.7675,
      "step": 205494
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.41645884513855,
      "learning_rate": 1.7136177078357362e-05,
      "loss": 2.759,
      "step": 205495
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8297619819641113,
      "learning_rate": 1.713481439835139e-05,
      "loss": 2.8121,
      "step": 205496
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.554431438446045,
      "learning_rate": 1.7133451770935457e-05,
      "loss": 2.9568,
      "step": 205497
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0288355350494385,
      "learning_rate": 1.713208919610972e-05,
      "loss": 2.7673,
      "step": 205498
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0648159980773926,
      "learning_rate": 1.7130726673874516e-05,
      "loss": 2.7798,
      "step": 205499
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.623504161834717,
      "learning_rate": 1.7129364204230046e-05,
      "loss": 2.9657,
      "step": 205500
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.978187322616577,
      "learning_rate": 1.7128001787176536e-05,
      "loss": 3.0722,
      "step": 205501
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.956946611404419,
      "learning_rate": 1.7126639422714262e-05,
      "loss": 2.7872,
      "step": 205502
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8460536003112793,
      "learning_rate": 1.7125277110843515e-05,
      "loss": 2.9826,
      "step": 205503
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6583058834075928,
      "learning_rate": 1.7123914851564467e-05,
      "loss": 3.0038,
      "step": 205504
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7950479984283447,
      "learning_rate": 1.7122552644877485e-05,
      "loss": 2.8916,
      "step": 205505
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0118203163146973,
      "learning_rate": 1.7121190490782766e-05,
      "loss": 2.8401,
      "step": 205506
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.677700996398926,
      "learning_rate": 1.711982838928051e-05,
      "loss": 2.9227,
      "step": 205507
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1927497386932373,
      "learning_rate": 1.711846634037105e-05,
      "loss": 2.9122,
      "step": 205508
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.9343743324279785,
      "learning_rate": 1.711710434405459e-05,
      "loss": 2.9477,
      "step": 205509
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7894484996795654,
      "learning_rate": 1.711574240033139e-05,
      "loss": 2.8268,
      "step": 205510
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.71441912651062,
      "learning_rate": 1.711438050920172e-05,
      "loss": 2.9204,
      "step": 205511
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.291451930999756,
      "learning_rate": 1.7113018670665878e-05,
      "loss": 3.0595,
      "step": 205512
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8460068702697754,
      "learning_rate": 1.711165688472397e-05,
      "loss": 2.9539,
      "step": 205513
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1193344593048096,
      "learning_rate": 1.711029515137642e-05,
      "loss": 2.932,
      "step": 205514
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.266869068145752,
      "learning_rate": 1.71089334706234e-05,
      "loss": 2.8406,
      "step": 205515
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.124148845672607,
      "learning_rate": 1.7107571842465108e-05,
      "loss": 2.9482,
      "step": 205516
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8179361820220947,
      "learning_rate": 1.710621026690194e-05,
      "loss": 3.1951,
      "step": 205517
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7535877227783203,
      "learning_rate": 1.7104848743934042e-05,
      "loss": 2.764,
      "step": 205518
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.773367166519165,
      "learning_rate": 1.7103487273561634e-05,
      "loss": 3.0528,
      "step": 205519
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2441911697387695,
      "learning_rate": 1.7102125855785086e-05,
      "loss": 2.9596,
      "step": 205520
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6764938831329346,
      "learning_rate": 1.710076449060457e-05,
      "loss": 2.7513,
      "step": 205521
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.864166736602783,
      "learning_rate": 1.7099403178020375e-05,
      "loss": 3.0552,
      "step": 205522
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.629662036895752,
      "learning_rate": 1.7098041918032747e-05,
      "loss": 2.9238,
      "step": 205523
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.46663761138916,
      "learning_rate": 1.709668071064194e-05,
      "loss": 2.9861,
      "step": 205524
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.5003740787506104,
      "learning_rate": 1.7095319555848164e-05,
      "loss": 3.0865,
      "step": 205525
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.066056251525879,
      "learning_rate": 1.7093958453651745e-05,
      "loss": 3.1264,
      "step": 205526
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.7660253047943115,
      "learning_rate": 1.7092597404052854e-05,
      "loss": 2.9319,
      "step": 205527
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.455606460571289,
      "learning_rate": 1.7091236407051856e-05,
      "loss": 2.8594,
      "step": 205528
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.325823783874512,
      "learning_rate": 1.708987546264885e-05,
      "loss": 2.7986,
      "step": 205529
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.211556911468506,
      "learning_rate": 1.70885145708443e-05,
      "loss": 3.1799,
      "step": 205530
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.085153341293335,
      "learning_rate": 1.7087153731638216e-05,
      "loss": 2.7313,
      "step": 205531
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.928107500076294,
      "learning_rate": 1.708579294503105e-05,
      "loss": 2.7195,
      "step": 205532
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.153881072998047,
      "learning_rate": 1.7084432211022913e-05,
      "loss": 3.1012,
      "step": 205533
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7161197662353516,
      "learning_rate": 1.7083071529614167e-05,
      "loss": 2.6803,
      "step": 205534
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.996429920196533,
      "learning_rate": 1.708171090080498e-05,
      "loss": 2.9785,
      "step": 205535
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.04748010635376,
      "learning_rate": 1.7080350324595714e-05,
      "loss": 2.6547,
      "step": 205536
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.181976556777954,
      "learning_rate": 1.7078989800986475e-05,
      "loss": 3.0782,
      "step": 205537
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.960779905319214,
      "learning_rate": 1.7077629329977626e-05,
      "loss": 3.1853,
      "step": 205538
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7004127502441406,
      "learning_rate": 1.7076268911569367e-05,
      "loss": 2.9744,
      "step": 205539
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.093534231185913,
      "learning_rate": 1.7074908545762e-05,
      "loss": 2.8566,
      "step": 205540
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6476833820343018,
      "learning_rate": 1.707354823255569e-05,
      "loss": 2.8904,
      "step": 205541
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.9428157806396484,
      "learning_rate": 1.707218797195087e-05,
      "loss": 2.8815,
      "step": 205542
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0197696685791016,
      "learning_rate": 1.7070827763947536e-05,
      "loss": 3.0267,
      "step": 205543
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8333868980407715,
      "learning_rate": 1.7069467608546162e-05,
      "loss": 2.8717,
      "step": 205544
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.563382148742676,
      "learning_rate": 1.706810750574684e-05,
      "loss": 2.807,
      "step": 205545
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1406891345977783,
      "learning_rate": 1.7066747455549978e-05,
      "loss": 2.8162,
      "step": 205546
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.4872541427612305,
      "learning_rate": 1.706538745795567e-05,
      "loss": 2.7448,
      "step": 205547
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.77736234664917,
      "learning_rate": 1.706402751296435e-05,
      "loss": 2.8062,
      "step": 205548
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.817429542541504,
      "learning_rate": 1.7062667620576088e-05,
      "loss": 2.8809,
      "step": 205549
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.774336814880371,
      "learning_rate": 1.7061307780791245e-05,
      "loss": 2.8989,
      "step": 205550
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.579522132873535,
      "learning_rate": 1.705994799360999e-05,
      "loss": 3.0087,
      "step": 205551
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.560729742050171,
      "learning_rate": 1.7058588259032725e-05,
      "loss": 2.643,
      "step": 205552
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.3868789672851562,
      "learning_rate": 1.705722857705951e-05,
      "loss": 2.7313,
      "step": 205553
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4474868774414062,
      "learning_rate": 1.7055868947690822e-05,
      "loss": 2.9108,
      "step": 205554
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.982907295227051,
      "learning_rate": 1.7054509370926684e-05,
      "loss": 2.8077,
      "step": 205555
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2344186305999756,
      "learning_rate": 1.70531498467675e-05,
      "loss": 2.9301,
      "step": 205556
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.5455069541931152,
      "learning_rate": 1.7051790375213438e-05,
      "loss": 2.7145,
      "step": 205557
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2661287784576416,
      "learning_rate": 1.7050430956264826e-05,
      "loss": 2.6079,
      "step": 205558
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.653838634490967,
      "learning_rate": 1.7049071589921835e-05,
      "loss": 2.7877,
      "step": 205559
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9961864948272705,
      "learning_rate": 1.7047712276184866e-05,
      "loss": 2.7347,
      "step": 205560
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.096999168395996,
      "learning_rate": 1.704635301505395e-05,
      "loss": 2.8399,
      "step": 205561
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.882601261138916,
      "learning_rate": 1.7044993806529517e-05,
      "loss": 3.0608,
      "step": 205562
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.891406536102295,
      "learning_rate": 1.7043634650611736e-05,
      "loss": 3.0301,
      "step": 205563
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2821929454803467,
      "learning_rate": 1.704227554730091e-05,
      "loss": 2.8046,
      "step": 205564
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9537084102630615,
      "learning_rate": 1.7040916496597235e-05,
      "loss": 2.8284,
      "step": 205565
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.214036703109741,
      "learning_rate": 1.7039557498501076e-05,
      "loss": 2.6606,
      "step": 205566
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.028394937515259,
      "learning_rate": 1.7038198553012504e-05,
      "loss": 2.7389,
      "step": 205567
    },
    {
      "epoch": 2.68,
      "grad_norm": 5.134878158569336,
      "learning_rate": 1.7036839660131918e-05,
      "loss": 3.0249,
      "step": 205568
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9695160388946533,
      "learning_rate": 1.7035480819859515e-05,
      "loss": 2.9786,
      "step": 205569
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.7516062259674072,
      "learning_rate": 1.7034122032195563e-05,
      "loss": 2.8075,
      "step": 205570
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.440035581588745,
      "learning_rate": 1.7032763297140262e-05,
      "loss": 3.0838,
      "step": 205571
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.717743158340454,
      "learning_rate": 1.7031404614694045e-05,
      "loss": 2.9824,
      "step": 205572
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.980856418609619,
      "learning_rate": 1.703004598485691e-05,
      "loss": 3.1242,
      "step": 205573
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.756772041320801,
      "learning_rate": 1.702868740762926e-05,
      "loss": 2.8742,
      "step": 205574
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.197723150253296,
      "learning_rate": 1.7027328883011294e-05,
      "loss": 2.8851,
      "step": 205575
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7987565994262695,
      "learning_rate": 1.702597041100334e-05,
      "loss": 2.6961,
      "step": 205576
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8156485557556152,
      "learning_rate": 1.702461199160554e-05,
      "loss": 2.8591,
      "step": 205577
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.553788900375366,
      "learning_rate": 1.7023253624818324e-05,
      "loss": 2.9011,
      "step": 205578
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9992499351501465,
      "learning_rate": 1.702189531064172e-05,
      "loss": 3.0364,
      "step": 205579
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.314229965209961,
      "learning_rate": 1.7020537049076098e-05,
      "loss": 2.9919,
      "step": 205580
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.6908037662506104,
      "learning_rate": 1.7019178840121695e-05,
      "loss": 2.9057,
      "step": 205581
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0196287631988525,
      "learning_rate": 1.7017820683778805e-05,
      "loss": 2.8595,
      "step": 205582
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.86098575592041,
      "learning_rate": 1.7016462580047595e-05,
      "loss": 2.8613,
      "step": 205583
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4453325271606445,
      "learning_rate": 1.7015104528928436e-05,
      "loss": 2.9767,
      "step": 205584
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.213256597518921,
      "learning_rate": 1.701374653042149e-05,
      "loss": 3.1345,
      "step": 205585
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1625657081604004,
      "learning_rate": 1.7012388584527026e-05,
      "loss": 2.931,
      "step": 205586
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.965233325958252,
      "learning_rate": 1.701103069124524e-05,
      "loss": 2.8544,
      "step": 205587
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1690609455108643,
      "learning_rate": 1.7009672850576507e-05,
      "loss": 2.9318,
      "step": 205588
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7918245792388916,
      "learning_rate": 1.7008315062520985e-05,
      "loss": 2.8508,
      "step": 205589
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.995863199234009,
      "learning_rate": 1.700695732707901e-05,
      "loss": 2.8747,
      "step": 205590
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.70829439163208,
      "learning_rate": 1.7005599644250745e-05,
      "loss": 2.9468,
      "step": 205591
    },
    {
      "epoch": 2.68,
      "grad_norm": 5.476195812225342,
      "learning_rate": 1.7004242014036495e-05,
      "loss": 3.068,
      "step": 205592
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0523228645324707,
      "learning_rate": 1.7002884436436493e-05,
      "loss": 2.7765,
      "step": 205593
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0544779300689697,
      "learning_rate": 1.7001526911451003e-05,
      "loss": 2.9698,
      "step": 205594
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2387800216674805,
      "learning_rate": 1.7000169439080225e-05,
      "loss": 2.6634,
      "step": 205595
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0239250659942627,
      "learning_rate": 1.6998812019324525e-05,
      "loss": 2.9662,
      "step": 205596
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.737837553024292,
      "learning_rate": 1.6997454652184072e-05,
      "loss": 2.8448,
      "step": 205597
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.929624080657959,
      "learning_rate": 1.6996097337659096e-05,
      "loss": 2.8334,
      "step": 205598
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.963226079940796,
      "learning_rate": 1.6994740075749904e-05,
      "loss": 3.0066,
      "step": 205599
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.090765953063965,
      "learning_rate": 1.699338286645675e-05,
      "loss": 2.701,
      "step": 205600
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.823622226715088,
      "learning_rate": 1.6992025709779845e-05,
      "loss": 2.9397,
      "step": 205601
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6086480617523193,
      "learning_rate": 1.6990668605719483e-05,
      "loss": 2.715,
      "step": 205602
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.502938270568848,
      "learning_rate": 1.698931155427593e-05,
      "loss": 3.1107,
      "step": 205603
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.8823797702789307,
      "learning_rate": 1.6987954555449324e-05,
      "loss": 2.7518,
      "step": 205604
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.662842035293579,
      "learning_rate": 1.698659760924006e-05,
      "loss": 2.9811,
      "step": 205605
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.860008955001831,
      "learning_rate": 1.698524071564831e-05,
      "loss": 3.0822,
      "step": 205606
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.38376784324646,
      "learning_rate": 1.6983883874674365e-05,
      "loss": 3.1113,
      "step": 205607
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0310842990875244,
      "learning_rate": 1.698252708631843e-05,
      "loss": 3.1066,
      "step": 205608
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.205080986022949,
      "learning_rate": 1.6981170350580842e-05,
      "loss": 3.1956,
      "step": 205609
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4783437252044678,
      "learning_rate": 1.6979813667461727e-05,
      "loss": 3.166,
      "step": 205610
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.916354179382324,
      "learning_rate": 1.6978457036961424e-05,
      "loss": 3.0128,
      "step": 205611
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.004446506500244,
      "learning_rate": 1.6977100459080162e-05,
      "loss": 3.0011,
      "step": 205612
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0755956172943115,
      "learning_rate": 1.697574393381824e-05,
      "loss": 2.9717,
      "step": 205613
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6371521949768066,
      "learning_rate": 1.6974387461175866e-05,
      "loss": 3.0132,
      "step": 205614
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.524627685546875,
      "learning_rate": 1.6973031041153295e-05,
      "loss": 2.9765,
      "step": 205615
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.152794599533081,
      "learning_rate": 1.6971674673750702e-05,
      "loss": 2.9117,
      "step": 205616
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9491302967071533,
      "learning_rate": 1.6970318358968516e-05,
      "loss": 2.9555,
      "step": 205617
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.7999703884124756,
      "learning_rate": 1.6968962096806805e-05,
      "loss": 2.7312,
      "step": 205618
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.529378414154053,
      "learning_rate": 1.6967605887265966e-05,
      "loss": 2.9891,
      "step": 205619
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.683480739593506,
      "learning_rate": 1.696624973034617e-05,
      "loss": 2.8613,
      "step": 205620
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2420198917388916,
      "learning_rate": 1.696489362604775e-05,
      "loss": 2.7864,
      "step": 205621
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.772749185562134,
      "learning_rate": 1.6963537574370835e-05,
      "loss": 3.0636,
      "step": 205622
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.682614326477051,
      "learning_rate": 1.696218157531576e-05,
      "loss": 3.1334,
      "step": 205623
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8702242374420166,
      "learning_rate": 1.6960825628882722e-05,
      "loss": 2.921,
      "step": 205624
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6853246688842773,
      "learning_rate": 1.6959469735072062e-05,
      "loss": 3.0698,
      "step": 205625
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9924697875976562,
      "learning_rate": 1.6958113893883907e-05,
      "loss": 3.008,
      "step": 205626
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1196460723876953,
      "learning_rate": 1.695675810531869e-05,
      "loss": 2.8244,
      "step": 205627
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.929075002670288,
      "learning_rate": 1.695540236937648e-05,
      "loss": 2.9243,
      "step": 205628
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0727128982543945,
      "learning_rate": 1.6954046686057642e-05,
      "loss": 2.7214,
      "step": 205629
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.132172584533691,
      "learning_rate": 1.695269105536231e-05,
      "loss": 3.1162,
      "step": 205630
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.103815793991089,
      "learning_rate": 1.6951335477290916e-05,
      "loss": 2.891,
      "step": 205631
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8722217082977295,
      "learning_rate": 1.6949979951843528e-05,
      "loss": 2.814,
      "step": 205632
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0759642124176025,
      "learning_rate": 1.694862447902058e-05,
      "loss": 2.8604,
      "step": 205633
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.477935791015625,
      "learning_rate": 1.6947269058822133e-05,
      "loss": 3.0333,
      "step": 205634
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.930910587310791,
      "learning_rate": 1.694591369124859e-05,
      "loss": 2.923,
      "step": 205635
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.6146693229675293,
      "learning_rate": 1.6944558376300088e-05,
      "loss": 2.8927,
      "step": 205636
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8090829849243164,
      "learning_rate": 1.694320311397699e-05,
      "loss": 3.0029,
      "step": 205637
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.145432233810425,
      "learning_rate": 1.6941847904279428e-05,
      "loss": 3.1089,
      "step": 205638
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6195123195648193,
      "learning_rate": 1.6940492747207834e-05,
      "loss": 2.9279,
      "step": 205639
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.6231021881103516,
      "learning_rate": 1.6939137642762248e-05,
      "loss": 2.6911,
      "step": 205640
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.308223247528076,
      "learning_rate": 1.6937782590943062e-05,
      "loss": 2.7603,
      "step": 205641
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.825998544692993,
      "learning_rate": 1.6936427591750444e-05,
      "loss": 3.0017,
      "step": 205642
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7832539081573486,
      "learning_rate": 1.693507264518473e-05,
      "loss": 3.092,
      "step": 205643
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.131199836730957,
      "learning_rate": 1.693371775124609e-05,
      "loss": 2.8947,
      "step": 205644
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.045294761657715,
      "learning_rate": 1.693236290993488e-05,
      "loss": 2.8297,
      "step": 205645
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6862173080444336,
      "learning_rate": 1.6931008121251245e-05,
      "loss": 2.9478,
      "step": 205646
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.128213405609131,
      "learning_rate": 1.6929653385195473e-05,
      "loss": 2.9453,
      "step": 205647
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.963919162750244,
      "learning_rate": 1.6928298701767806e-05,
      "loss": 2.7827,
      "step": 205648
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.856724500656128,
      "learning_rate": 1.692694407096854e-05,
      "loss": 2.9315,
      "step": 205649
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0924201011657715,
      "learning_rate": 1.6925589492797876e-05,
      "loss": 2.7543,
      "step": 205650
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4732704162597656,
      "learning_rate": 1.692423496725611e-05,
      "loss": 2.7325,
      "step": 205651
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9736850261688232,
      "learning_rate": 1.6922880494343484e-05,
      "loss": 2.8132,
      "step": 205652
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.647345781326294,
      "learning_rate": 1.6921526074060253e-05,
      "loss": 2.9105,
      "step": 205653
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1589772701263428,
      "learning_rate": 1.6920171706406594e-05,
      "loss": 3.0126,
      "step": 205654
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.221339464187622,
      "learning_rate": 1.6918817391382867e-05,
      "loss": 3.1905,
      "step": 205655
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7908194065093994,
      "learning_rate": 1.6917463128989207e-05,
      "loss": 3.0678,
      "step": 205656
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.4979090690612793,
      "learning_rate": 1.6916108919226013e-05,
      "loss": 2.8305,
      "step": 205657
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.535585403442383,
      "learning_rate": 1.691475476209342e-05,
      "loss": 2.8462,
      "step": 205658
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.114227056503296,
      "learning_rate": 1.6913400657591758e-05,
      "loss": 3.1011,
      "step": 205659
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8543388843536377,
      "learning_rate": 1.6912046605721164e-05,
      "loss": 3.0478,
      "step": 205660
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.371095657348633,
      "learning_rate": 1.6910692606482033e-05,
      "loss": 2.8846,
      "step": 205661
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.7369892597198486,
      "learning_rate": 1.6909338659874473e-05,
      "loss": 2.9564,
      "step": 205662
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.680799961090088,
      "learning_rate": 1.6907984765898874e-05,
      "loss": 2.9986,
      "step": 205663
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.09608793258667,
      "learning_rate": 1.6906630924555443e-05,
      "loss": 2.644,
      "step": 205664
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9842288494110107,
      "learning_rate": 1.6905277135844375e-05,
      "loss": 2.9711,
      "step": 205665
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.5567526817321777,
      "learning_rate": 1.690392339976594e-05,
      "loss": 3.1361,
      "step": 205666
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0747740268707275,
      "learning_rate": 1.6902569716320468e-05,
      "loss": 2.7255,
      "step": 205667
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9725186824798584,
      "learning_rate": 1.690121608550806e-05,
      "loss": 2.9094,
      "step": 205668
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2084968090057373,
      "learning_rate": 1.6899862507329154e-05,
      "loss": 3.1288,
      "step": 205669
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.255850076675415,
      "learning_rate": 1.6898508981783874e-05,
      "loss": 3.185,
      "step": 205670
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.792110443115234,
      "learning_rate": 1.6897155508872528e-05,
      "loss": 2.998,
      "step": 205671
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.544780969619751,
      "learning_rate": 1.689580208859528e-05,
      "loss": 3.0045,
      "step": 205672
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.5159194469451904,
      "learning_rate": 1.6894448720952493e-05,
      "loss": 2.9019,
      "step": 205673
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9537248611450195,
      "learning_rate": 1.6893095405944334e-05,
      "loss": 2.8743,
      "step": 205674
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7103044986724854,
      "learning_rate": 1.689174214357114e-05,
      "loss": 2.7348,
      "step": 205675
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.552274703979492,
      "learning_rate": 1.689038893383311e-05,
      "loss": 2.8408,
      "step": 205676
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.770294427871704,
      "learning_rate": 1.688903577673051e-05,
      "loss": 2.9201,
      "step": 205677
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.814330816268921,
      "learning_rate": 1.6887682672263536e-05,
      "loss": 3.0532,
      "step": 205678
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.5016398429870605,
      "learning_rate": 1.6886329620432525e-05,
      "loss": 2.7163,
      "step": 205679
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.889845848083496,
      "learning_rate": 1.6884976621237646e-05,
      "loss": 2.8671,
      "step": 205680
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.354909896850586,
      "learning_rate": 1.6883623674679257e-05,
      "loss": 2.9254,
      "step": 205681
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.079299211502075,
      "learning_rate": 1.6882270780757502e-05,
      "loss": 3.0958,
      "step": 205682
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.64103627204895,
      "learning_rate": 1.688091793947267e-05,
      "loss": 2.7437,
      "step": 205683
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.709623098373413,
      "learning_rate": 1.687956515082507e-05,
      "loss": 2.9468,
      "step": 205684
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.193480968475342,
      "learning_rate": 1.6878212414814863e-05,
      "loss": 2.897,
      "step": 205685
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9232254028320312,
      "learning_rate": 1.6876859731442317e-05,
      "loss": 2.8877,
      "step": 205686
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9851460456848145,
      "learning_rate": 1.6875507100707762e-05,
      "loss": 2.9554,
      "step": 205687
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.989607810974121,
      "learning_rate": 1.6874154522611406e-05,
      "loss": 3.117,
      "step": 205688
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8625876903533936,
      "learning_rate": 1.6872801997153408e-05,
      "loss": 2.9197,
      "step": 205689
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.557729721069336,
      "learning_rate": 1.687144952433417e-05,
      "loss": 2.9432,
      "step": 205690
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.853151321411133,
      "learning_rate": 1.6870097104153822e-05,
      "loss": 3.0445,
      "step": 205691
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9288973808288574,
      "learning_rate": 1.6868744736612706e-05,
      "loss": 2.8287,
      "step": 205692
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.581880807876587,
      "learning_rate": 1.6867392421711045e-05,
      "loss": 3.1381,
      "step": 205693
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.3155736923217773,
      "learning_rate": 1.686604015944908e-05,
      "loss": 3.2902,
      "step": 205694
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1874425411224365,
      "learning_rate": 1.6864687949827005e-05,
      "loss": 3.2824,
      "step": 205695
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0449156761169434,
      "learning_rate": 1.686333579284519e-05,
      "loss": 3.1419,
      "step": 205696
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8156898021698,
      "learning_rate": 1.6861983688503768e-05,
      "loss": 2.8135,
      "step": 205697
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2056665420532227,
      "learning_rate": 1.68606316368031e-05,
      "loss": 2.8405,
      "step": 205698
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.744403123855591,
      "learning_rate": 1.6859279637743394e-05,
      "loss": 2.8353,
      "step": 205699
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.748690128326416,
      "learning_rate": 1.685792769132488e-05,
      "loss": 2.73,
      "step": 205700
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.987069606781006,
      "learning_rate": 1.6856575797547757e-05,
      "loss": 2.6014,
      "step": 205701
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.458736896514893,
      "learning_rate": 1.6855223956412423e-05,
      "loss": 2.8029,
      "step": 205702
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.004026174545288,
      "learning_rate": 1.685387216791898e-05,
      "loss": 2.8524,
      "step": 205703
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.739286422729492,
      "learning_rate": 1.6852520432067796e-05,
      "loss": 2.9466,
      "step": 205704
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.03873610496521,
      "learning_rate": 1.6851168748859035e-05,
      "loss": 2.9128,
      "step": 205705
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.973998785018921,
      "learning_rate": 1.6849817118293063e-05,
      "loss": 3.1253,
      "step": 205706
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.743987798690796,
      "learning_rate": 1.6848465540369982e-05,
      "loss": 3.0845,
      "step": 205707
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8625006675720215,
      "learning_rate": 1.6847114015090158e-05,
      "loss": 2.9492,
      "step": 205708
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.28312349319458,
      "learning_rate": 1.6845762542453757e-05,
      "loss": 2.9546,
      "step": 205709
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.370271921157837,
      "learning_rate": 1.684441112246111e-05,
      "loss": 3.0149,
      "step": 205710
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7656776905059814,
      "learning_rate": 1.6843059755112386e-05,
      "loss": 2.693,
      "step": 205711
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1836628913879395,
      "learning_rate": 1.6841708440407985e-05,
      "loss": 3.1166,
      "step": 205712
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7870559692382812,
      "learning_rate": 1.6840357178347976e-05,
      "loss": 2.8378,
      "step": 205713
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8536672592163086,
      "learning_rate": 1.6839005968932684e-05,
      "loss": 3.049,
      "step": 205714
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.5109901428222656,
      "learning_rate": 1.6837654812162384e-05,
      "loss": 3.0439,
      "step": 205715
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.5876970291137695,
      "learning_rate": 1.6836303708037302e-05,
      "loss": 3.4717,
      "step": 205716
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8485875129699707,
      "learning_rate": 1.6834952656557676e-05,
      "loss": 2.8996,
      "step": 205717
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.317915201187134,
      "learning_rate": 1.683360165772384e-05,
      "loss": 2.7348,
      "step": 205718
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0110504627227783,
      "learning_rate": 1.6832250711535954e-05,
      "loss": 3.0274,
      "step": 205719
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.019721746444702,
      "learning_rate": 1.6830899817994292e-05,
      "loss": 3.1094,
      "step": 205720
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2082200050354004,
      "learning_rate": 1.6829548977099085e-05,
      "loss": 2.9263,
      "step": 205721
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8291096687316895,
      "learning_rate": 1.682819818885066e-05,
      "loss": 3.1035,
      "step": 205722
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4842326641082764,
      "learning_rate": 1.6826847453249158e-05,
      "loss": 2.8967,
      "step": 205723
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8838417530059814,
      "learning_rate": 1.6825496770294943e-05,
      "loss": 2.8589,
      "step": 205724
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.03863263130188,
      "learning_rate": 1.6824146139988217e-05,
      "loss": 3.0751,
      "step": 205725
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2248926162719727,
      "learning_rate": 1.6822795562329206e-05,
      "loss": 3.0668,
      "step": 205726
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.000643253326416,
      "learning_rate": 1.682144503731815e-05,
      "loss": 2.8262,
      "step": 205727
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.267277717590332,
      "learning_rate": 1.682009456495538e-05,
      "loss": 2.8186,
      "step": 205728
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.393918752670288,
      "learning_rate": 1.6818744145241057e-05,
      "loss": 2.9685,
      "step": 205729
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.830112934112549,
      "learning_rate": 1.6817393778175525e-05,
      "loss": 2.9197,
      "step": 205730
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7745068073272705,
      "learning_rate": 1.6816043463758976e-05,
      "loss": 2.8796,
      "step": 205731
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.444883108139038,
      "learning_rate": 1.681469320199168e-05,
      "loss": 3.0343,
      "step": 205732
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1302990913391113,
      "learning_rate": 1.68133429928738e-05,
      "loss": 2.7053,
      "step": 205733
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.679258108139038,
      "learning_rate": 1.6811992836405742e-05,
      "loss": 2.6713,
      "step": 205734
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9400951862335205,
      "learning_rate": 1.681064273258763e-05,
      "loss": 2.9215,
      "step": 205735
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.5487282276153564,
      "learning_rate": 1.6809292681419805e-05,
      "loss": 2.9798,
      "step": 205736
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.8559069633483887,
      "learning_rate": 1.68079426829025e-05,
      "loss": 3.013,
      "step": 205737
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4596712589263916,
      "learning_rate": 1.6806592737035908e-05,
      "loss": 3.1788,
      "step": 205738
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.989743232727051,
      "learning_rate": 1.6805242843820298e-05,
      "loss": 3.0835,
      "step": 205739
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.647064208984375,
      "learning_rate": 1.6803893003255975e-05,
      "loss": 2.7848,
      "step": 205740
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.590651512145996,
      "learning_rate": 1.6802543215343102e-05,
      "loss": 2.9586,
      "step": 205741
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9597177505493164,
      "learning_rate": 1.6801193480082044e-05,
      "loss": 2.9069,
      "step": 205742
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.160280227661133,
      "learning_rate": 1.6799843797472967e-05,
      "loss": 2.8738,
      "step": 205743
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.222465753555298,
      "learning_rate": 1.6798494167516173e-05,
      "loss": 3.0779,
      "step": 205744
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.8036346435546875,
      "learning_rate": 1.6797144590211832e-05,
      "loss": 3.0483,
      "step": 205745
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.5008511543273926,
      "learning_rate": 1.6795795065560268e-05,
      "loss": 2.7731,
      "step": 205746
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.7318339347839355,
      "learning_rate": 1.6794445593561688e-05,
      "loss": 2.7839,
      "step": 205747
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.7569756507873535,
      "learning_rate": 1.679309617421639e-05,
      "loss": 2.8287,
      "step": 205748
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.10473370552063,
      "learning_rate": 1.6791746807524643e-05,
      "loss": 2.979,
      "step": 205749
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.8602182865142822,
      "learning_rate": 1.679039749348664e-05,
      "loss": 3.0534,
      "step": 205750
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1183011531829834,
      "learning_rate": 1.6789048232102585e-05,
      "loss": 3.1213,
      "step": 205751
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.227433443069458,
      "learning_rate": 1.6787699023372847e-05,
      "loss": 2.9504,
      "step": 205752
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.5265166759490967,
      "learning_rate": 1.6786349867297587e-05,
      "loss": 2.9805,
      "step": 205753
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.983837842941284,
      "learning_rate": 1.678500076387711e-05,
      "loss": 2.897,
      "step": 205754
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.97829270362854,
      "learning_rate": 1.6783651713111677e-05,
      "loss": 2.9525,
      "step": 205755
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.7193872928619385,
      "learning_rate": 1.6782302715001496e-05,
      "loss": 2.9341,
      "step": 205756
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0144474506378174,
      "learning_rate": 1.6780953769546755e-05,
      "loss": 2.8187,
      "step": 205757
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.001378297805786,
      "learning_rate": 1.6779604876747865e-05,
      "loss": 2.93,
      "step": 205758
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.882889986038208,
      "learning_rate": 1.6778256036604954e-05,
      "loss": 3.2177,
      "step": 205759
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.281339406967163,
      "learning_rate": 1.677690724911832e-05,
      "loss": 2.6886,
      "step": 205760
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.287858009338379,
      "learning_rate": 1.6775558514288235e-05,
      "loss": 2.9347,
      "step": 205761
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.968682289123535,
      "learning_rate": 1.6774209832114926e-05,
      "loss": 2.9144,
      "step": 205762
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9665400981903076,
      "learning_rate": 1.6772861202598565e-05,
      "loss": 2.9669,
      "step": 205763
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2570667266845703,
      "learning_rate": 1.677151262573955e-05,
      "loss": 3.1758,
      "step": 205764
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.968087673187256,
      "learning_rate": 1.6770164101537975e-05,
      "loss": 2.8552,
      "step": 205765
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.2695722579956055,
      "learning_rate": 1.6768815629994248e-05,
      "loss": 2.9762,
      "step": 205766
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.927863597869873,
      "learning_rate": 1.6767467211108533e-05,
      "loss": 2.81,
      "step": 205767
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.654845952987671,
      "learning_rate": 1.676611884488106e-05,
      "loss": 3.0211,
      "step": 205768
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.3127896785736084,
      "learning_rate": 1.6764770531312133e-05,
      "loss": 2.8627,
      "step": 205769
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9419620037078857,
      "learning_rate": 1.6763422270401983e-05,
      "loss": 2.8153,
      "step": 205770
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.6014647483825684,
      "learning_rate": 1.676207406215084e-05,
      "loss": 2.8033,
      "step": 205771
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.86387300491333,
      "learning_rate": 1.6760725906559013e-05,
      "loss": 2.766,
      "step": 205772
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4029664993286133,
      "learning_rate": 1.6759377803626694e-05,
      "loss": 2.9504,
      "step": 205773
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.061033248901367,
      "learning_rate": 1.675802975335412e-05,
      "loss": 2.6895,
      "step": 205774
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.028489828109741,
      "learning_rate": 1.675668175574162e-05,
      "loss": 2.8041,
      "step": 205775
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.769674301147461,
      "learning_rate": 1.6755333810789396e-05,
      "loss": 2.923,
      "step": 205776
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0643012523651123,
      "learning_rate": 1.675398591849768e-05,
      "loss": 2.91,
      "step": 205777
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.169419765472412,
      "learning_rate": 1.6752638078866777e-05,
      "loss": 2.8526,
      "step": 205778
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.568013906478882,
      "learning_rate": 1.675129029189688e-05,
      "loss": 2.9758,
      "step": 205779
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9809396266937256,
      "learning_rate": 1.674994255758826e-05,
      "loss": 2.9414,
      "step": 205780
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.740354299545288,
      "learning_rate": 1.674859487594118e-05,
      "loss": 2.8744,
      "step": 205781
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.3316845893859863,
      "learning_rate": 1.674724724695584e-05,
      "loss": 2.8516,
      "step": 205782
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.8215787410736084,
      "learning_rate": 1.6745899670632612e-05,
      "loss": 2.9625,
      "step": 205783
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7293365001678467,
      "learning_rate": 1.6744552146971656e-05,
      "loss": 2.643,
      "step": 205784
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.8605849742889404,
      "learning_rate": 1.6743204675973177e-05,
      "loss": 2.8422,
      "step": 205785
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.841310977935791,
      "learning_rate": 1.6741857257637538e-05,
      "loss": 2.8405,
      "step": 205786
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.876795768737793,
      "learning_rate": 1.6740509891964902e-05,
      "loss": 2.981,
      "step": 205787
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.980163335800171,
      "learning_rate": 1.6739162578955544e-05,
      "loss": 2.7176,
      "step": 205788
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.3572804927825928,
      "learning_rate": 1.673781531860976e-05,
      "loss": 2.8824,
      "step": 205789
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.93422532081604,
      "learning_rate": 1.6736468110927713e-05,
      "loss": 2.7087,
      "step": 205790
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.94753360748291,
      "learning_rate": 1.6735120955909776e-05,
      "loss": 3.0298,
      "step": 205791
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.846759557723999,
      "learning_rate": 1.6733773853556075e-05,
      "loss": 2.8042,
      "step": 205792
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.415839433670044,
      "learning_rate": 1.673242680386695e-05,
      "loss": 3.0013,
      "step": 205793
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.5358989238739014,
      "learning_rate": 1.673107980684256e-05,
      "loss": 2.7239,
      "step": 205794
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.396459579467773,
      "learning_rate": 1.6729732862483246e-05,
      "loss": 2.7081,
      "step": 205795
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2602124214172363,
      "learning_rate": 1.6728385970789204e-05,
      "loss": 2.8247,
      "step": 205796
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.868042469024658,
      "learning_rate": 1.6727039131760733e-05,
      "loss": 2.9778,
      "step": 205797
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9863033294677734,
      "learning_rate": 1.6725692345398033e-05,
      "loss": 3.0246,
      "step": 205798
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.817125082015991,
      "learning_rate": 1.6724345611701406e-05,
      "loss": 2.8213,
      "step": 205799
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.194007635116577,
      "learning_rate": 1.6722998930670983e-05,
      "loss": 3.0211,
      "step": 205800
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.975449323654175,
      "learning_rate": 1.67216523023072e-05,
      "loss": 2.71,
      "step": 205801
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.746062994003296,
      "learning_rate": 1.6720305726610117e-05,
      "loss": 2.8754,
      "step": 205802
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.316997766494751,
      "learning_rate": 1.671895920358014e-05,
      "loss": 2.8942,
      "step": 205803
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.012424468994141,
      "learning_rate": 1.6717612733217466e-05,
      "loss": 3.034,
      "step": 205804
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.00846791267395,
      "learning_rate": 1.6716266315522332e-05,
      "loss": 2.7823,
      "step": 205805
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.5951406955718994,
      "learning_rate": 1.6714919950494932e-05,
      "loss": 3.0088,
      "step": 205806
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.885739326477051,
      "learning_rate": 1.6713573638135637e-05,
      "loss": 3.0717,
      "step": 205807
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7812910079956055,
      "learning_rate": 1.6712227378444576e-05,
      "loss": 2.8487,
      "step": 205808
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.627685785293579,
      "learning_rate": 1.671088117142212e-05,
      "loss": 2.8152,
      "step": 205809
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.805161476135254,
      "learning_rate": 1.670953501706843e-05,
      "loss": 2.8255,
      "step": 205810
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.718977928161621,
      "learning_rate": 1.6708188915383814e-05,
      "loss": 3.0613,
      "step": 205811
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.767296314239502,
      "learning_rate": 1.670684286636843e-05,
      "loss": 2.6285,
      "step": 205812
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.04449725151062,
      "learning_rate": 1.6705496870022648e-05,
      "loss": 2.7871,
      "step": 205813
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.3514325618743896,
      "learning_rate": 1.6704150926346604e-05,
      "loss": 2.7223,
      "step": 205814
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9315881729125977,
      "learning_rate": 1.670280503534066e-05,
      "loss": 2.9578,
      "step": 205815
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0237650871276855,
      "learning_rate": 1.6701459197005017e-05,
      "loss": 2.8804,
      "step": 205816
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4988062381744385,
      "learning_rate": 1.6700113411339943e-05,
      "loss": 2.8797,
      "step": 205817
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.864304542541504,
      "learning_rate": 1.669876767834557e-05,
      "loss": 3.0608,
      "step": 205818
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.780580997467041,
      "learning_rate": 1.669742199802233e-05,
      "loss": 2.898,
      "step": 205819
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2268683910369873,
      "learning_rate": 1.6696076370370325e-05,
      "loss": 3.0899,
      "step": 205820
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.456416130065918,
      "learning_rate": 1.669473079538992e-05,
      "loss": 2.8788,
      "step": 205821
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0252702236175537,
      "learning_rate": 1.6693385273081285e-05,
      "loss": 2.8323,
      "step": 205822
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.006657123565674,
      "learning_rate": 1.6692039803444712e-05,
      "loss": 2.739,
      "step": 205823
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.699472188949585,
      "learning_rate": 1.6690694386480408e-05,
      "loss": 2.8469,
      "step": 205824
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.5295755863189697,
      "learning_rate": 1.6689349022188703e-05,
      "loss": 2.9721,
      "step": 205825
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.3596813678741455,
      "learning_rate": 1.668800371056973e-05,
      "loss": 3.1216,
      "step": 205826
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.7104384899139404,
      "learning_rate": 1.668665845162386e-05,
      "loss": 2.9671,
      "step": 205827
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.1869611740112305,
      "learning_rate": 1.6685313245351286e-05,
      "loss": 3.0249,
      "step": 205828
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.167410373687744,
      "learning_rate": 1.6683968091752243e-05,
      "loss": 2.9463,
      "step": 205829
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.9442005157470703,
      "learning_rate": 1.6682622990826967e-05,
      "loss": 3.1153,
      "step": 205830
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.984168291091919,
      "learning_rate": 1.6681277942575786e-05,
      "loss": 2.8152,
      "step": 205831
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.771543025970459,
      "learning_rate": 1.667993294699884e-05,
      "loss": 2.9192,
      "step": 205832
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.720284938812256,
      "learning_rate": 1.6678588004096526e-05,
      "loss": 2.7836,
      "step": 205833
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.330404043197632,
      "learning_rate": 1.6677243113868978e-05,
      "loss": 2.8095,
      "step": 205834
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6909799575805664,
      "learning_rate": 1.667589827631649e-05,
      "loss": 3.04,
      "step": 205835
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.707491874694824,
      "learning_rate": 1.667455349143927e-05,
      "loss": 2.6603,
      "step": 205836
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.344297170639038,
      "learning_rate": 1.667320875923761e-05,
      "loss": 2.8125,
      "step": 205837
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.156790256500244,
      "learning_rate": 1.6671864079711716e-05,
      "loss": 3.1582,
      "step": 205838
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.72628116607666,
      "learning_rate": 1.6670519452861918e-05,
      "loss": 2.9442,
      "step": 205839
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8004770278930664,
      "learning_rate": 1.666917487868845e-05,
      "loss": 2.6759,
      "step": 205840
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.768395185470581,
      "learning_rate": 1.6667830357191482e-05,
      "loss": 2.9024,
      "step": 205841
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.9082276821136475,
      "learning_rate": 1.6666485888371305e-05,
      "loss": 2.9074,
      "step": 205842
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1738569736480713,
      "learning_rate": 1.6665141472228193e-05,
      "loss": 2.8683,
      "step": 205843
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.525615930557251,
      "learning_rate": 1.6663797108762344e-05,
      "loss": 2.828,
      "step": 205844
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.598161220550537,
      "learning_rate": 1.666245279797409e-05,
      "loss": 2.9428,
      "step": 205845
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.3612420558929443,
      "learning_rate": 1.6661108539863632e-05,
      "loss": 3.1376,
      "step": 205846
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.761425018310547,
      "learning_rate": 1.6659764334431234e-05,
      "loss": 2.8,
      "step": 205847
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.2048516273498535,
      "learning_rate": 1.6658420181677103e-05,
      "loss": 2.9014,
      "step": 205848
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.322801113128662,
      "learning_rate": 1.665707608160153e-05,
      "loss": 2.9346,
      "step": 205849
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.006791591644287,
      "learning_rate": 1.6655732034204716e-05,
      "loss": 2.9241,
      "step": 205850
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.607017993927002,
      "learning_rate": 1.6654388039487e-05,
      "loss": 2.8629,
      "step": 205851
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.9240710735321045,
      "learning_rate": 1.6653044097448544e-05,
      "loss": 2.8138,
      "step": 205852
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.580235242843628,
      "learning_rate": 1.665170020808968e-05,
      "loss": 3.092,
      "step": 205853
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.744089365005493,
      "learning_rate": 1.665035637141061e-05,
      "loss": 3.0747,
      "step": 205854
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1343820095062256,
      "learning_rate": 1.66490125874116e-05,
      "loss": 2.8455,
      "step": 205855
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7864608764648438,
      "learning_rate": 1.664766885609282e-05,
      "loss": 3.0622,
      "step": 205856
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4959146976470947,
      "learning_rate": 1.6646325177454634e-05,
      "loss": 3.0474,
      "step": 205857
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.187406301498413,
      "learning_rate": 1.6644981551497205e-05,
      "loss": 2.8204,
      "step": 205858
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2359471321105957,
      "learning_rate": 1.664363797822087e-05,
      "loss": 2.8055,
      "step": 205859
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.133066177368164,
      "learning_rate": 1.6642294457625828e-05,
      "loss": 2.7087,
      "step": 205860
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.5592925548553467,
      "learning_rate": 1.6640950989712342e-05,
      "loss": 2.8535,
      "step": 205861
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9296915531158447,
      "learning_rate": 1.6639607574480616e-05,
      "loss": 3.0538,
      "step": 205862
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.8838887214660645,
      "learning_rate": 1.663826421193095e-05,
      "loss": 3.1,
      "step": 205863
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.960590362548828,
      "learning_rate": 1.6636920902063578e-05,
      "loss": 2.9468,
      "step": 205864
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.767298460006714,
      "learning_rate": 1.663557764487876e-05,
      "loss": 2.8654,
      "step": 205865
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.4375360012054443,
      "learning_rate": 1.6634234440376736e-05,
      "loss": 2.9221,
      "step": 205866
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.737422466278076,
      "learning_rate": 1.663289128855774e-05,
      "loss": 2.9437,
      "step": 205867
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1434435844421387,
      "learning_rate": 1.6631548189422062e-05,
      "loss": 2.8841,
      "step": 205868
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7609143257141113,
      "learning_rate": 1.663020514296991e-05,
      "loss": 3.0731,
      "step": 205869
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.048393726348877,
      "learning_rate": 1.662886214920155e-05,
      "loss": 3.0954,
      "step": 205870
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.780014991760254,
      "learning_rate": 1.6627519208117247e-05,
      "loss": 3.1705,
      "step": 205871
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.307361602783203,
      "learning_rate": 1.6626176319717232e-05,
      "loss": 2.9944,
      "step": 205872
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9372661113739014,
      "learning_rate": 1.6624833484001742e-05,
      "loss": 3.0731,
      "step": 205873
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6493425369262695,
      "learning_rate": 1.6623490700971077e-05,
      "loss": 2.8045,
      "step": 205874
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.8709681034088135,
      "learning_rate": 1.6622147970625466e-05,
      "loss": 2.8143,
      "step": 205875
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0774917602539062,
      "learning_rate": 1.6620805292965077e-05,
      "loss": 2.8783,
      "step": 205876
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0333478450775146,
      "learning_rate": 1.661946266799028e-05,
      "loss": 3.0903,
      "step": 205877
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6913130283355713,
      "learning_rate": 1.6618120095701303e-05,
      "loss": 2.9209,
      "step": 205878
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.786210536956787,
      "learning_rate": 1.6616777576098282e-05,
      "loss": 2.9374,
      "step": 205879
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.479105234146118,
      "learning_rate": 1.6615435109181618e-05,
      "loss": 2.7301,
      "step": 205880
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.911850929260254,
      "learning_rate": 1.6614092694951443e-05,
      "loss": 2.858,
      "step": 205881
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.265277862548828,
      "learning_rate": 1.661275033340812e-05,
      "loss": 3.1809,
      "step": 205882
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.5222647190093994,
      "learning_rate": 1.661140802455182e-05,
      "loss": 2.8466,
      "step": 205883
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9515435695648193,
      "learning_rate": 1.6610065768382807e-05,
      "loss": 2.9544,
      "step": 205884
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.77740740776062,
      "learning_rate": 1.6608723564901284e-05,
      "loss": 2.899,
      "step": 205885
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9685347080230713,
      "learning_rate": 1.6607381414107612e-05,
      "loss": 2.9033,
      "step": 205886
    },
    {
      "epoch": 2.68,
      "grad_norm": 7.7228922843933105,
      "learning_rate": 1.6606039316001897e-05,
      "loss": 2.9368,
      "step": 205887
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8543646335601807,
      "learning_rate": 1.6604697270584565e-05,
      "loss": 2.9044,
      "step": 205888
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0455784797668457,
      "learning_rate": 1.6603355277855722e-05,
      "loss": 2.8654,
      "step": 205889
    },
    {
      "epoch": 2.68,
      "grad_norm": 5.34407901763916,
      "learning_rate": 1.66020133378157e-05,
      "loss": 2.8767,
      "step": 205890
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.3669707775115967,
      "learning_rate": 1.660067145046463e-05,
      "loss": 2.9761,
      "step": 205891
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.556377410888672,
      "learning_rate": 1.6599329615802915e-05,
      "loss": 2.6795,
      "step": 205892
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8951902389526367,
      "learning_rate": 1.6597987833830682e-05,
      "loss": 2.934,
      "step": 205893
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0351836681365967,
      "learning_rate": 1.6596646104548306e-05,
      "loss": 2.8687,
      "step": 205894
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8721001148223877,
      "learning_rate": 1.6595304427955913e-05,
      "loss": 2.9047,
      "step": 205895
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1938393115997314,
      "learning_rate": 1.6593962804053838e-05,
      "loss": 2.8087,
      "step": 205896
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9103524684906006,
      "learning_rate": 1.659262123284225e-05,
      "loss": 2.9178,
      "step": 205897
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.807342767715454,
      "learning_rate": 1.6591279714321483e-05,
      "loss": 2.9085,
      "step": 205898
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.41395378112793,
      "learning_rate": 1.6589938248491697e-05,
      "loss": 3.1769,
      "step": 205899
    },
    {
      "epoch": 2.68,
      "grad_norm": 5.508414268493652,
      "learning_rate": 1.658859683535323e-05,
      "loss": 2.7798,
      "step": 205900
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.830425977706909,
      "learning_rate": 1.6587255474906315e-05,
      "loss": 2.7644,
      "step": 205901
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0373451709747314,
      "learning_rate": 1.658591416715115e-05,
      "loss": 2.838,
      "step": 205902
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.905647039413452,
      "learning_rate": 1.6584572912088002e-05,
      "loss": 3.1364,
      "step": 205903
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1345393657684326,
      "learning_rate": 1.658323170971714e-05,
      "loss": 2.9365,
      "step": 205904
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7639658451080322,
      "learning_rate": 1.658189056003879e-05,
      "loss": 2.9948,
      "step": 205905
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7131764888763428,
      "learning_rate": 1.6580549463053226e-05,
      "loss": 2.7742,
      "step": 205906
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.828714609146118,
      "learning_rate": 1.6579208418760713e-05,
      "loss": 3.0755,
      "step": 205907
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.837301015853882,
      "learning_rate": 1.6577867427161486e-05,
      "loss": 2.8378,
      "step": 205908
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.165076494216919,
      "learning_rate": 1.6576526488255704e-05,
      "loss": 2.6764,
      "step": 205909
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8257901668548584,
      "learning_rate": 1.657518560204377e-05,
      "loss": 2.9137,
      "step": 205910
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0087950229644775,
      "learning_rate": 1.6573844768525825e-05,
      "loss": 2.7038,
      "step": 205911
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.7032511234283447,
      "learning_rate": 1.6572503987702158e-05,
      "loss": 2.9194,
      "step": 205912
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2643003463745117,
      "learning_rate": 1.657116325957304e-05,
      "loss": 2.9481,
      "step": 205913
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.156522750854492,
      "learning_rate": 1.6569822584138703e-05,
      "loss": 2.8481,
      "step": 205914
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8195061683654785,
      "learning_rate": 1.6568481961399316e-05,
      "loss": 2.9273,
      "step": 205915
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.714841842651367,
      "learning_rate": 1.6567141391355244e-05,
      "loss": 2.972,
      "step": 205916
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8480851650238037,
      "learning_rate": 1.656580087400665e-05,
      "loss": 2.7972,
      "step": 205917
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.6911263465881348,
      "learning_rate": 1.6564460409353874e-05,
      "loss": 2.6005,
      "step": 205918
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2814533710479736,
      "learning_rate": 1.656311999739708e-05,
      "loss": 3.2051,
      "step": 205919
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9517204761505127,
      "learning_rate": 1.656177963813663e-05,
      "loss": 2.8182,
      "step": 205920
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9716567993164062,
      "learning_rate": 1.6560439331572594e-05,
      "loss": 2.8636,
      "step": 205921
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.062077045440674,
      "learning_rate": 1.6559099077705372e-05,
      "loss": 2.9741,
      "step": 205922
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.310049533843994,
      "learning_rate": 1.6557758876535133e-05,
      "loss": 3.1806,
      "step": 205923
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6324386596679688,
      "learning_rate": 1.6556418728062205e-05,
      "loss": 3.0123,
      "step": 205924
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.194699287414551,
      "learning_rate": 1.6555078632286724e-05,
      "loss": 2.9971,
      "step": 205925
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0401782989501953,
      "learning_rate": 1.655373858920912e-05,
      "loss": 3.1539,
      "step": 205926
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7233145236968994,
      "learning_rate": 1.65523985988294e-05,
      "loss": 2.7669,
      "step": 205927
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2044761180877686,
      "learning_rate": 1.655105866114802e-05,
      "loss": 2.8165,
      "step": 205928
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.86295223236084,
      "learning_rate": 1.654971877616509e-05,
      "loss": 3.0493,
      "step": 205929
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0509254932403564,
      "learning_rate": 1.654837894388097e-05,
      "loss": 2.906,
      "step": 205930
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.14072847366333,
      "learning_rate": 1.6547039164295795e-05,
      "loss": 3.0061,
      "step": 205931
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9565377235412598,
      "learning_rate": 1.6545699437409966e-05,
      "loss": 2.7182,
      "step": 205932
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4534714221954346,
      "learning_rate": 1.654435976322358e-05,
      "loss": 2.8137,
      "step": 205933
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.162935495376587,
      "learning_rate": 1.6543020141736973e-05,
      "loss": 2.7706,
      "step": 205934
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7743992805480957,
      "learning_rate": 1.654168057295031e-05,
      "loss": 2.9072,
      "step": 205935
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.57893967628479,
      "learning_rate": 1.6540341056863958e-05,
      "loss": 2.7492,
      "step": 205936
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.5518059730529785,
      "learning_rate": 1.6539001593478085e-05,
      "loss": 3.0652,
      "step": 205937
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2951536178588867,
      "learning_rate": 1.6537662182793022e-05,
      "loss": 3.1194,
      "step": 205938
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.694607734680176,
      "learning_rate": 1.6536322824808866e-05,
      "loss": 2.7623,
      "step": 205939
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.625854015350342,
      "learning_rate": 1.6534983519526025e-05,
      "loss": 2.7924,
      "step": 205940
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.981485366821289,
      "learning_rate": 1.6533644266944623e-05,
      "loss": 2.7936,
      "step": 205941
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.165473461151123,
      "learning_rate": 1.653230506706503e-05,
      "loss": 2.9673,
      "step": 205942
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4397647380828857,
      "learning_rate": 1.653096591988735e-05,
      "loss": 3.0099,
      "step": 205943
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.314805030822754,
      "learning_rate": 1.652962682541198e-05,
      "loss": 3.11,
      "step": 205944
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.149362087249756,
      "learning_rate": 1.6528287783639114e-05,
      "loss": 3.0868,
      "step": 205945
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7006335258483887,
      "learning_rate": 1.6526948794568994e-05,
      "loss": 2.7734,
      "step": 205946
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1300711631774902,
      "learning_rate": 1.6525609858201816e-05,
      "loss": 2.9168,
      "step": 205947
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.3167765140533447,
      "learning_rate": 1.6524270974537913e-05,
      "loss": 2.9359,
      "step": 205948
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8311569690704346,
      "learning_rate": 1.652293214357745e-05,
      "loss": 2.9674,
      "step": 205949
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.0448737144470215,
      "learning_rate": 1.6521593365320762e-05,
      "loss": 2.7147,
      "step": 205950
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.2294464111328125,
      "learning_rate": 1.6520254639768084e-05,
      "loss": 2.8503,
      "step": 205951
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.161298990249634,
      "learning_rate": 1.6518915966919578e-05,
      "loss": 3.0483,
      "step": 205952
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.8593926429748535,
      "learning_rate": 1.6517577346775613e-05,
      "loss": 3.0338,
      "step": 205953
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.01932430267334,
      "learning_rate": 1.6516238779336387e-05,
      "loss": 2.9214,
      "step": 205954
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0462327003479004,
      "learning_rate": 1.6514900264602105e-05,
      "loss": 2.8161,
      "step": 205955
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.767122745513916,
      "learning_rate": 1.6513561802573063e-05,
      "loss": 2.634,
      "step": 205956
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1060791015625,
      "learning_rate": 1.6512223393249524e-05,
      "loss": 2.8755,
      "step": 205957
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4147591590881348,
      "learning_rate": 1.6510885036631695e-05,
      "loss": 3.0784,
      "step": 205958
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9001314640045166,
      "learning_rate": 1.6509546732719835e-05,
      "loss": 2.9137,
      "step": 205959
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.3124172687530518,
      "learning_rate": 1.650820848151425e-05,
      "loss": 2.9098,
      "step": 205960
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.795518636703491,
      "learning_rate": 1.6506870283015074e-05,
      "loss": 2.7205,
      "step": 205961
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.359863519668579,
      "learning_rate": 1.6505532137222665e-05,
      "loss": 2.8047,
      "step": 205962
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6900434494018555,
      "learning_rate": 1.650419404413723e-05,
      "loss": 3.1227,
      "step": 205963
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8899354934692383,
      "learning_rate": 1.650285600375897e-05,
      "loss": 2.8462,
      "step": 205964
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7497525215148926,
      "learning_rate": 1.6501518016088243e-05,
      "loss": 2.868,
      "step": 205965
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.5399484634399414,
      "learning_rate": 1.6500180081125157e-05,
      "loss": 2.8845,
      "step": 205966
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4866137504577637,
      "learning_rate": 1.649884219887011e-05,
      "loss": 3.0738,
      "step": 205967
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6616673469543457,
      "learning_rate": 1.6497504369323265e-05,
      "loss": 2.7292,
      "step": 205968
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.602733850479126,
      "learning_rate": 1.6496166592484893e-05,
      "loss": 2.6841,
      "step": 205969
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2796807289123535,
      "learning_rate": 1.6494828868355192e-05,
      "loss": 2.7362,
      "step": 205970
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9128997325897217,
      "learning_rate": 1.6493491196934495e-05,
      "loss": 2.9623,
      "step": 205971
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.96608567237854,
      "learning_rate": 1.6492153578222965e-05,
      "loss": 2.7902,
      "step": 205972
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.955230474472046,
      "learning_rate": 1.6490816012220942e-05,
      "loss": 2.8746,
      "step": 205973
    },
    {
      "epoch": 2.68,
      "grad_norm": 5.953855991363525,
      "learning_rate": 1.6489478498928656e-05,
      "loss": 2.827,
      "step": 205974
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.194094657897949,
      "learning_rate": 1.6488141038346304e-05,
      "loss": 2.8986,
      "step": 205975
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.981804609298706,
      "learning_rate": 1.6486803630474088e-05,
      "loss": 2.9869,
      "step": 205976
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.3168840408325195,
      "learning_rate": 1.648546627531241e-05,
      "loss": 3.0578,
      "step": 205977
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.3799922466278076,
      "learning_rate": 1.6484128972861366e-05,
      "loss": 3.0256,
      "step": 205978
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.961517333984375,
      "learning_rate": 1.6482791723121324e-05,
      "loss": 3.1592,
      "step": 205979
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.991565227508545,
      "learning_rate": 1.6481454526092484e-05,
      "loss": 2.8159,
      "step": 205980
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.5949153900146484,
      "learning_rate": 1.648011738177508e-05,
      "loss": 2.9885,
      "step": 205981
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7796034812927246,
      "learning_rate": 1.6478780290169348e-05,
      "loss": 3.004,
      "step": 205982
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.73745059967041,
      "learning_rate": 1.6477443251275614e-05,
      "loss": 2.7326,
      "step": 205983
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.236912250518799,
      "learning_rate": 1.6476106265094014e-05,
      "loss": 2.8835,
      "step": 205984
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.17480206489563,
      "learning_rate": 1.6474769331624914e-05,
      "loss": 3.0446,
      "step": 205985
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.597062826156616,
      "learning_rate": 1.647343245086845e-05,
      "loss": 2.9479,
      "step": 205986
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.5041697025299072,
      "learning_rate": 1.647209562282502e-05,
      "loss": 2.6291,
      "step": 205987
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6936147212982178,
      "learning_rate": 1.6470758847494692e-05,
      "loss": 3.1117,
      "step": 205988
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.94979190826416,
      "learning_rate": 1.6469422124877863e-05,
      "loss": 3.2358,
      "step": 205989
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0470383167266846,
      "learning_rate": 1.6468085454974665e-05,
      "loss": 3.0774,
      "step": 205990
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6488523483276367,
      "learning_rate": 1.646674883778544e-05,
      "loss": 3.0083,
      "step": 205991
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.742832660675049,
      "learning_rate": 1.6465412273310342e-05,
      "loss": 2.8728,
      "step": 205992
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1626486778259277,
      "learning_rate": 1.6464075761549778e-05,
      "loss": 2.9676,
      "step": 205993
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.601280689239502,
      "learning_rate": 1.6462739302503815e-05,
      "loss": 2.798,
      "step": 205994
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7820544242858887,
      "learning_rate": 1.6461402896172815e-05,
      "loss": 2.8416,
      "step": 205995
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.3680455684661865,
      "learning_rate": 1.6460066542556948e-05,
      "loss": 2.7892,
      "step": 205996
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8997089862823486,
      "learning_rate": 1.645873024165655e-05,
      "loss": 2.8557,
      "step": 205997
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.53302001953125,
      "learning_rate": 1.6457393993471778e-05,
      "loss": 2.7642,
      "step": 205998
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0989718437194824,
      "learning_rate": 1.645605779800304e-05,
      "loss": 2.9477,
      "step": 205999
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.608837842941284,
      "learning_rate": 1.645472165525037e-05,
      "loss": 2.8869,
      "step": 206000
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9163591861724854,
      "learning_rate": 1.6453385565214162e-05,
      "loss": 2.6881,
      "step": 206001
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.362959861755371,
      "learning_rate": 1.6452049527894584e-05,
      "loss": 2.9444,
      "step": 206002
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.5877819061279297,
      "learning_rate": 1.6450713543291937e-05,
      "loss": 2.9214,
      "step": 206003
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1577343940734863,
      "learning_rate": 1.6449377611406456e-05,
      "loss": 3.05,
      "step": 206004
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1709911823272705,
      "learning_rate": 1.6448041732238436e-05,
      "loss": 3.0242,
      "step": 206005
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1122286319732666,
      "learning_rate": 1.6446705905788014e-05,
      "loss": 2.8046,
      "step": 206006
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.707583427429199,
      "learning_rate": 1.644537013205556e-05,
      "loss": 3.0371,
      "step": 206007
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.600820541381836,
      "learning_rate": 1.6444034411041196e-05,
      "loss": 2.9104,
      "step": 206008
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9532485008239746,
      "learning_rate": 1.6442698742745264e-05,
      "loss": 3.098,
      "step": 206009
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.6343908309936523,
      "learning_rate": 1.6441363127167994e-05,
      "loss": 2.7959,
      "step": 206010
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.036259651184082,
      "learning_rate": 1.644002756430969e-05,
      "loss": 2.9555,
      "step": 206011
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7510125637054443,
      "learning_rate": 1.6438692054170444e-05,
      "loss": 2.8593,
      "step": 206012
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.616091012954712,
      "learning_rate": 1.6437356596750628e-05,
      "loss": 2.86,
      "step": 206013
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.156010627746582,
      "learning_rate": 1.643602119205044e-05,
      "loss": 2.988,
      "step": 206014
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1136958599090576,
      "learning_rate": 1.6434685840070183e-05,
      "loss": 3.137,
      "step": 206015
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.977590560913086,
      "learning_rate": 1.643335054081002e-05,
      "loss": 2.9192,
      "step": 206016
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6902084350585938,
      "learning_rate": 1.643201529427035e-05,
      "loss": 2.702,
      "step": 206017
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.881054401397705,
      "learning_rate": 1.6430680100451244e-05,
      "loss": 2.9545,
      "step": 206018
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7761054039001465,
      "learning_rate": 1.642934495935303e-05,
      "loss": 3.0525,
      "step": 206019
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4328107833862305,
      "learning_rate": 1.6428009870975943e-05,
      "loss": 3.0937,
      "step": 206020
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.015742540359497,
      "learning_rate": 1.6426674835320286e-05,
      "loss": 3.0751,
      "step": 206021
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.268019914627075,
      "learning_rate": 1.6425339852386223e-05,
      "loss": 2.9402,
      "step": 206022
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8627729415893555,
      "learning_rate": 1.6424004922174116e-05,
      "loss": 2.9386,
      "step": 206023
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.236950635910034,
      "learning_rate": 1.642267004468404e-05,
      "loss": 2.6898,
      "step": 206024
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0467724800109863,
      "learning_rate": 1.642133521991642e-05,
      "loss": 2.5295,
      "step": 206025
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9795358180999756,
      "learning_rate": 1.6420000447871363e-05,
      "loss": 2.8955,
      "step": 206026
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7113609313964844,
      "learning_rate": 1.6418665728549196e-05,
      "loss": 2.7764,
      "step": 206027
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0441219806671143,
      "learning_rate": 1.6417331061950158e-05,
      "loss": 2.8046,
      "step": 206028
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.422272682189941,
      "learning_rate": 1.641599644807451e-05,
      "loss": 3.0551,
      "step": 206029
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6036994457244873,
      "learning_rate": 1.6414661886922487e-05,
      "loss": 3.0293,
      "step": 206030
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.3416342735290527,
      "learning_rate": 1.6413327378494323e-05,
      "loss": 3.046,
      "step": 206031
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.06876540184021,
      "learning_rate": 1.6411992922790252e-05,
      "loss": 2.6915,
      "step": 206032
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0774948596954346,
      "learning_rate": 1.641065851981057e-05,
      "loss": 2.8754,
      "step": 206033
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.954526424407959,
      "learning_rate": 1.6409324169555448e-05,
      "loss": 2.8321,
      "step": 206034
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.910661220550537,
      "learning_rate": 1.640798987202525e-05,
      "loss": 2.8213,
      "step": 206035
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6108202934265137,
      "learning_rate": 1.6406655627220146e-05,
      "loss": 2.8322,
      "step": 206036
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9568636417388916,
      "learning_rate": 1.640532143514043e-05,
      "loss": 2.7272,
      "step": 206037
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.587495803833008,
      "learning_rate": 1.6403987295786235e-05,
      "loss": 2.8303,
      "step": 206038
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.022303581237793,
      "learning_rate": 1.6402653209157933e-05,
      "loss": 2.96,
      "step": 206039
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9169862270355225,
      "learning_rate": 1.640131917525572e-05,
      "loss": 2.9479,
      "step": 206040
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1475346088409424,
      "learning_rate": 1.6399985194079868e-05,
      "loss": 2.8414,
      "step": 206041
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.660414457321167,
      "learning_rate": 1.6398651265630635e-05,
      "loss": 2.9723,
      "step": 206042
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.920936346054077,
      "learning_rate": 1.6397317389908195e-05,
      "loss": 2.9463,
      "step": 206043
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.7622928619384766,
      "learning_rate": 1.6395983566912908e-05,
      "loss": 3.0777,
      "step": 206044
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2444145679473877,
      "learning_rate": 1.6394649796644944e-05,
      "loss": 2.9176,
      "step": 206045
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1038572788238525,
      "learning_rate": 1.63933160791045e-05,
      "loss": 3.2528,
      "step": 206046
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.715482711791992,
      "learning_rate": 1.6391982414291982e-05,
      "loss": 2.9097,
      "step": 206047
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.311758041381836,
      "learning_rate": 1.6390648802207516e-05,
      "loss": 3.1046,
      "step": 206048
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7482409477233887,
      "learning_rate": 1.6389315242851343e-05,
      "loss": 2.948,
      "step": 206049
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.680248260498047,
      "learning_rate": 1.6387981736223786e-05,
      "loss": 2.5877,
      "step": 206050
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7660582065582275,
      "learning_rate": 1.638664828232502e-05,
      "loss": 3.0604,
      "step": 206051
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.104556083679199,
      "learning_rate": 1.6385314881155377e-05,
      "loss": 2.6674,
      "step": 206052
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.055870771408081,
      "learning_rate": 1.6383981532715086e-05,
      "loss": 3.2299,
      "step": 206053
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7291951179504395,
      "learning_rate": 1.6382648237004314e-05,
      "loss": 3.0636,
      "step": 206054
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9590952396392822,
      "learning_rate": 1.6381314994023366e-05,
      "loss": 2.9717,
      "step": 206055
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1298201084136963,
      "learning_rate": 1.6379981803772502e-05,
      "loss": 2.964,
      "step": 206056
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.633958101272583,
      "learning_rate": 1.6378648666251892e-05,
      "loss": 2.8024,
      "step": 206057
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.787248611450195,
      "learning_rate": 1.6377315581461936e-05,
      "loss": 2.8312,
      "step": 206058
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.619079828262329,
      "learning_rate": 1.6375982549402766e-05,
      "loss": 2.9016,
      "step": 206059
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2599995136260986,
      "learning_rate": 1.637464957007465e-05,
      "loss": 2.8038,
      "step": 206060
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.128323793411255,
      "learning_rate": 1.6373316643477786e-05,
      "loss": 2.7334,
      "step": 206061
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.6921491622924805,
      "learning_rate": 1.637198376961254e-05,
      "loss": 3.2029,
      "step": 206062
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.093466281890869,
      "learning_rate": 1.6370650948479046e-05,
      "loss": 2.9174,
      "step": 206063
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.3380026817321777,
      "learning_rate": 1.636931818007764e-05,
      "loss": 2.8505,
      "step": 206064
    },
    {
      "epoch": 2.68,
      "grad_norm": 5.330498695373535,
      "learning_rate": 1.6367985464408516e-05,
      "loss": 2.7659,
      "step": 206065
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.4658613204956055,
      "learning_rate": 1.636665280147198e-05,
      "loss": 2.9099,
      "step": 206066
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.932124614715576,
      "learning_rate": 1.6365320191268193e-05,
      "loss": 2.7794,
      "step": 206067
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.175070762634277,
      "learning_rate": 1.636398763379746e-05,
      "loss": 2.8331,
      "step": 206068
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1038498878479004,
      "learning_rate": 1.6362655129059976e-05,
      "loss": 2.9722,
      "step": 206069
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7436654567718506,
      "learning_rate": 1.6361322677056076e-05,
      "loss": 2.8315,
      "step": 206070
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.955381393432617,
      "learning_rate": 1.6359990277785927e-05,
      "loss": 2.882,
      "step": 206071
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.348684549331665,
      "learning_rate": 1.6358657931249898e-05,
      "loss": 2.9832,
      "step": 206072
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.695214748382568,
      "learning_rate": 1.635732563744805e-05,
      "loss": 2.9325,
      "step": 206073
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.750974416732788,
      "learning_rate": 1.6355993396380785e-05,
      "loss": 2.7338,
      "step": 206074
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1219093799591064,
      "learning_rate": 1.6354661208048237e-05,
      "loss": 2.8223,
      "step": 206075
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.15999698638916,
      "learning_rate": 1.635332907245077e-05,
      "loss": 3.1209,
      "step": 206076
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0503323078155518,
      "learning_rate": 1.6351996989588522e-05,
      "loss": 2.9514,
      "step": 206077
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.82285475730896,
      "learning_rate": 1.6350664959461857e-05,
      "loss": 2.8399,
      "step": 206078
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.909877300262451,
      "learning_rate": 1.6349332982070908e-05,
      "loss": 3.1825,
      "step": 206079
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1648452281951904,
      "learning_rate": 1.6348001057416006e-05,
      "loss": 2.8075,
      "step": 206080
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8982317447662354,
      "learning_rate": 1.6346669185497284e-05,
      "loss": 3.0307,
      "step": 206081
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8659920692443848,
      "learning_rate": 1.6345337366315148e-05,
      "loss": 2.7964,
      "step": 206082
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2506885528564453,
      "learning_rate": 1.6344005599869724e-05,
      "loss": 2.7173,
      "step": 206083
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.578397035598755,
      "learning_rate": 1.6342673886161385e-05,
      "loss": 2.831,
      "step": 206084
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.316812515258789,
      "learning_rate": 1.6341342225190225e-05,
      "loss": 2.9166,
      "step": 206085
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9210216999053955,
      "learning_rate": 1.634001061695661e-05,
      "loss": 2.7684,
      "step": 206086
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.5419983863830566,
      "learning_rate": 1.633867906146068e-05,
      "loss": 2.9878,
      "step": 206087
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.977050542831421,
      "learning_rate": 1.6337347558702795e-05,
      "loss": 3.0611,
      "step": 206088
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0425565242767334,
      "learning_rate": 1.6336016108683093e-05,
      "loss": 2.9327,
      "step": 206089
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.639960527420044,
      "learning_rate": 1.6334684711401967e-05,
      "loss": 2.7342,
      "step": 206090
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.740839719772339,
      "learning_rate": 1.6333353366859524e-05,
      "loss": 2.943,
      "step": 206091
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.697070598602295,
      "learning_rate": 1.6332022075056063e-05,
      "loss": 2.8738,
      "step": 206092
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.68027400970459,
      "learning_rate": 1.6330690835991844e-05,
      "loss": 3.0313,
      "step": 206093
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.535377025604248,
      "learning_rate": 1.6329359649667106e-05,
      "loss": 2.815,
      "step": 206094
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.5897040367126465,
      "learning_rate": 1.6328028516082047e-05,
      "loss": 2.7741,
      "step": 206095
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.3874967098236084,
      "learning_rate": 1.6326697435237068e-05,
      "loss": 2.6868,
      "step": 206096
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1119630336761475,
      "learning_rate": 1.6325366407132236e-05,
      "loss": 2.5472,
      "step": 206097
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.903200626373291,
      "learning_rate": 1.6324035431767914e-05,
      "loss": 2.9949,
      "step": 206098
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.600933074951172,
      "learning_rate": 1.6322704509144237e-05,
      "loss": 2.9809,
      "step": 206099
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4412729740142822,
      "learning_rate": 1.632137363926157e-05,
      "loss": 3.1237,
      "step": 206100
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7831594944000244,
      "learning_rate": 1.6320042822120117e-05,
      "loss": 2.816,
      "step": 206101
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.896631956100464,
      "learning_rate": 1.6318712057720172e-05,
      "loss": 2.9306,
      "step": 206102
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.929443120956421,
      "learning_rate": 1.631738134606184e-05,
      "loss": 2.8818,
      "step": 206103
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.125140428543091,
      "learning_rate": 1.6316050687145522e-05,
      "loss": 2.9078,
      "step": 206104
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.650723457336426,
      "learning_rate": 1.6314720080971345e-05,
      "loss": 2.9775,
      "step": 206105
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1941161155700684,
      "learning_rate": 1.6313389527539677e-05,
      "loss": 2.9974,
      "step": 206106
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6625912189483643,
      "learning_rate": 1.6312059026850653e-05,
      "loss": 2.9229,
      "step": 206107
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.554485559463501,
      "learning_rate": 1.6310728578904674e-05,
      "loss": 3.2789,
      "step": 206108
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.889324188232422,
      "learning_rate": 1.6309398183701773e-05,
      "loss": 3.0421,
      "step": 206109
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4799699783325195,
      "learning_rate": 1.630806784124238e-05,
      "loss": 2.6852,
      "step": 206110
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1652636528015137,
      "learning_rate": 1.6306737551526627e-05,
      "loss": 2.8995,
      "step": 206111
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0385212898254395,
      "learning_rate": 1.630540731455482e-05,
      "loss": 2.8699,
      "step": 206112
    },
    {
      "epoch": 2.68,
      "grad_norm": 5.139636993408203,
      "learning_rate": 1.6304077130327185e-05,
      "loss": 3.0313,
      "step": 206113
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.9570071697235107,
      "learning_rate": 1.6302746998843997e-05,
      "loss": 2.8806,
      "step": 206114
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8687233924865723,
      "learning_rate": 1.630141692010548e-05,
      "loss": 2.8885,
      "step": 206115
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.372502565383911,
      "learning_rate": 1.6300086894111908e-05,
      "loss": 2.7593,
      "step": 206116
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.854548215866089,
      "learning_rate": 1.6298756920863443e-05,
      "loss": 3.1837,
      "step": 206117
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.855966567993164,
      "learning_rate": 1.629742700036045e-05,
      "loss": 3.005,
      "step": 206118
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.929063320159912,
      "learning_rate": 1.629609713260307e-05,
      "loss": 2.8086,
      "step": 206119
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0379390716552734,
      "learning_rate": 1.629476731759166e-05,
      "loss": 2.9721,
      "step": 206120
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.891535997390747,
      "learning_rate": 1.6293437555326395e-05,
      "loss": 2.8007,
      "step": 206121
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.075308084487915,
      "learning_rate": 1.6292107845807533e-05,
      "loss": 2.9201,
      "step": 206122
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9141271114349365,
      "learning_rate": 1.629077818903528e-05,
      "loss": 2.7784,
      "step": 206123
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6171329021453857,
      "learning_rate": 1.6289448585009968e-05,
      "loss": 2.8715,
      "step": 206124
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8585402965545654,
      "learning_rate": 1.6288119033731762e-05,
      "loss": 2.8202,
      "step": 206125
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.709463596343994,
      "learning_rate": 1.6286789535200994e-05,
      "loss": 3.0724,
      "step": 206126
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.107933521270752,
      "learning_rate": 1.628546008941787e-05,
      "loss": 2.8682,
      "step": 206127
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6245570182800293,
      "learning_rate": 1.628413069638258e-05,
      "loss": 2.9653,
      "step": 206128
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.8848845958709717,
      "learning_rate": 1.6282801356095465e-05,
      "loss": 3.1326,
      "step": 206129
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0371780395507812,
      "learning_rate": 1.6281472068556756e-05,
      "loss": 2.82,
      "step": 206130
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2014052867889404,
      "learning_rate": 1.6280142833766617e-05,
      "loss": 2.9145,
      "step": 206131
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0161688327789307,
      "learning_rate": 1.627881365172542e-05,
      "loss": 2.9986,
      "step": 206132
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2989554405212402,
      "learning_rate": 1.6277484522433328e-05,
      "loss": 2.8466,
      "step": 206133
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.014392137527466,
      "learning_rate": 1.627615544589057e-05,
      "loss": 2.9043,
      "step": 206134
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.335280418395996,
      "learning_rate": 1.6274826422097487e-05,
      "loss": 2.9398,
      "step": 206135
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.966172456741333,
      "learning_rate": 1.6273497451054207e-05,
      "loss": 2.8099,
      "step": 206136
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.811882257461548,
      "learning_rate": 1.6272168532761066e-05,
      "loss": 3.0204,
      "step": 206137
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.748046636581421,
      "learning_rate": 1.627083966721833e-05,
      "loss": 2.9247,
      "step": 206138
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.4577674865722656,
      "learning_rate": 1.626951085442616e-05,
      "loss": 2.8511,
      "step": 206139
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0833892822265625,
      "learning_rate": 1.626818209438483e-05,
      "loss": 2.9008,
      "step": 206140
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.634955406188965,
      "learning_rate": 1.6266853387094636e-05,
      "loss": 2.8429,
      "step": 206141
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9785385131835938,
      "learning_rate": 1.6265524732555714e-05,
      "loss": 2.9838,
      "step": 206142
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.471742868423462,
      "learning_rate": 1.6264196130768458e-05,
      "loss": 2.6071,
      "step": 206143
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.796973466873169,
      "learning_rate": 1.6262867581733042e-05,
      "loss": 2.9902,
      "step": 206144
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.165872573852539,
      "learning_rate": 1.6261539085449727e-05,
      "loss": 2.8262,
      "step": 206145
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.116748094558716,
      "learning_rate": 1.626021064191868e-05,
      "loss": 2.6828,
      "step": 206146
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.699963331222534,
      "learning_rate": 1.6258882251140304e-05,
      "loss": 3.0373,
      "step": 206147
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.204859495162964,
      "learning_rate": 1.6257553913114663e-05,
      "loss": 2.7994,
      "step": 206148
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.110795736312866,
      "learning_rate": 1.6256225627842156e-05,
      "loss": 2.834,
      "step": 206149
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9042670726776123,
      "learning_rate": 1.6254897395322952e-05,
      "loss": 3.0353,
      "step": 206150
    },
    {
      "epoch": 2.68,
      "grad_norm": 5.632496356964111,
      "learning_rate": 1.625356921555738e-05,
      "loss": 2.5173,
      "step": 206151
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.988647222518921,
      "learning_rate": 1.6252241088545547e-05,
      "loss": 3.1891,
      "step": 206152
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2444980144500732,
      "learning_rate": 1.6250913014287846e-05,
      "loss": 3.08,
      "step": 206153
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1586947441101074,
      "learning_rate": 1.624958499278438e-05,
      "loss": 2.9947,
      "step": 206154
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.092658281326294,
      "learning_rate": 1.6248257024035516e-05,
      "loss": 2.9597,
      "step": 206155
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4582152366638184,
      "learning_rate": 1.624692910804142e-05,
      "loss": 2.9442,
      "step": 206156
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8544015884399414,
      "learning_rate": 1.6245601244802485e-05,
      "loss": 2.9855,
      "step": 206157
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9591941833496094,
      "learning_rate": 1.624427343431872e-05,
      "loss": 2.9531,
      "step": 206158
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8697876930236816,
      "learning_rate": 1.6242945676590592e-05,
      "loss": 2.8475,
      "step": 206159
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.5197317600250244,
      "learning_rate": 1.6241617971618192e-05,
      "loss": 2.7545,
      "step": 206160
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7021310329437256,
      "learning_rate": 1.624029031940186e-05,
      "loss": 3.0799,
      "step": 206161
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4615070819854736,
      "learning_rate": 1.6238962719941794e-05,
      "loss": 2.9897,
      "step": 206162
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.8436660766601562,
      "learning_rate": 1.6237635173238327e-05,
      "loss": 2.7929,
      "step": 206163
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.2379937171936035,
      "learning_rate": 1.6236307679291562e-05,
      "loss": 3.045,
      "step": 206164
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.106440305709839,
      "learning_rate": 1.6234980238101857e-05,
      "loss": 3.0007,
      "step": 206165
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.783344268798828,
      "learning_rate": 1.623365284966942e-05,
      "loss": 2.9279,
      "step": 206166
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1396572589874268,
      "learning_rate": 1.6232325513994516e-05,
      "loss": 3.1351,
      "step": 206167
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7326135635375977,
      "learning_rate": 1.6230998231077343e-05,
      "loss": 3.0231,
      "step": 206168
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.528449296951294,
      "learning_rate": 1.6229671000918264e-05,
      "loss": 2.8248,
      "step": 206169
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1544172763824463,
      "learning_rate": 1.6228343823517354e-05,
      "loss": 3.1395,
      "step": 206170
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7228658199310303,
      "learning_rate": 1.6227016698875005e-05,
      "loss": 3.1365,
      "step": 206171
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.3257534503936768,
      "learning_rate": 1.622568962699139e-05,
      "loss": 3.0434,
      "step": 206172
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.244501829147339,
      "learning_rate": 1.622436260786677e-05,
      "loss": 2.886,
      "step": 206173
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0897674560546875,
      "learning_rate": 1.622303564150138e-05,
      "loss": 3.0658,
      "step": 206174
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.994739294052124,
      "learning_rate": 1.6221708727895587e-05,
      "loss": 2.8133,
      "step": 206175
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.5351433753967285,
      "learning_rate": 1.6220381867049426e-05,
      "loss": 2.8316,
      "step": 206176
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2110884189605713,
      "learning_rate": 1.6219055058963294e-05,
      "loss": 2.8451,
      "step": 206177
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.827594757080078,
      "learning_rate": 1.621772830363739e-05,
      "loss": 2.8056,
      "step": 206178
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7500014305114746,
      "learning_rate": 1.6216401601071983e-05,
      "loss": 2.8016,
      "step": 206179
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9252312183380127,
      "learning_rate": 1.621507495126724e-05,
      "loss": 2.924,
      "step": 206180
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.5670039653778076,
      "learning_rate": 1.6213748354223586e-05,
      "loss": 2.9416,
      "step": 206181
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.201100826263428,
      "learning_rate": 1.6212421809941068e-05,
      "loss": 2.8843,
      "step": 206182
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.2981162071228027,
      "learning_rate": 1.6211095318420042e-05,
      "loss": 2.8695,
      "step": 206183
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7913529872894287,
      "learning_rate": 1.6209768879660678e-05,
      "loss": 2.7292,
      "step": 206184
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.888824701309204,
      "learning_rate": 1.620844249366334e-05,
      "loss": 2.8915,
      "step": 206185
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.868962526321411,
      "learning_rate": 1.620711616042817e-05,
      "loss": 2.7044,
      "step": 206186
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7966716289520264,
      "learning_rate": 1.6205789879955488e-05,
      "loss": 2.5388,
      "step": 206187
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.476975202560425,
      "learning_rate": 1.6204463652245503e-05,
      "loss": 2.942,
      "step": 206188
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.114582061767578,
      "learning_rate": 1.6203137477298477e-05,
      "loss": 2.8786,
      "step": 206189
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.7209620475769043,
      "learning_rate": 1.6201811355114614e-05,
      "loss": 2.9497,
      "step": 206190
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1137797832489014,
      "learning_rate": 1.620048528569421e-05,
      "loss": 2.6507,
      "step": 206191
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0765650272369385,
      "learning_rate": 1.6199159269037464e-05,
      "loss": 3.0557,
      "step": 206192
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.704152822494507,
      "learning_rate": 1.619783330514468e-05,
      "loss": 2.768,
      "step": 206193
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8501787185668945,
      "learning_rate": 1.619650739401609e-05,
      "loss": 2.9658,
      "step": 206194
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.404197931289673,
      "learning_rate": 1.619518153565189e-05,
      "loss": 2.8366,
      "step": 206195
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9479598999023438,
      "learning_rate": 1.6193855730052352e-05,
      "loss": 3.016,
      "step": 206196
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.504349708557129,
      "learning_rate": 1.619252997721777e-05,
      "loss": 2.9718,
      "step": 206197
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.5210447311401367,
      "learning_rate": 1.619120427714832e-05,
      "loss": 2.7943,
      "step": 206198
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.4891357421875,
      "learning_rate": 1.6189878629844323e-05,
      "loss": 2.9002,
      "step": 206199
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1778876781463623,
      "learning_rate": 1.6188553035305984e-05,
      "loss": 2.9407,
      "step": 206200
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.1276259422302246,
      "learning_rate": 1.618722749353354e-05,
      "loss": 3.0703,
      "step": 206201
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.852452516555786,
      "learning_rate": 1.6185902004527217e-05,
      "loss": 2.8036,
      "step": 206202
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.8489890098571777,
      "learning_rate": 1.6184576568287322e-05,
      "loss": 2.7898,
      "step": 206203
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0195975303649902,
      "learning_rate": 1.618325118481405e-05,
      "loss": 2.8613,
      "step": 206204
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.6668965816497803,
      "learning_rate": 1.6181925854107702e-05,
      "loss": 3.0499,
      "step": 206205
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.959165573120117,
      "learning_rate": 1.618060057616848e-05,
      "loss": 3.0961,
      "step": 206206
    },
    {
      "epoch": 2.68,
      "grad_norm": 2.9246251583099365,
      "learning_rate": 1.6179275350996644e-05,
      "loss": 2.8406,
      "step": 206207
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.0058553218841553,
      "learning_rate": 1.6177950178592435e-05,
      "loss": 3.0266,
      "step": 206208
    },
    {
      "epoch": 2.68,
      "grad_norm": 4.629800319671631,
      "learning_rate": 1.6176625058956084e-05,
      "loss": 2.8933,
      "step": 206209
    },
    {
      "epoch": 2.68,
      "grad_norm": 3.074636936187744,
      "learning_rate": 1.6175299992087852e-05,
      "loss": 3.1337,
      "step": 206210
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9230575561523438,
      "learning_rate": 1.6173974977988012e-05,
      "loss": 2.9339,
      "step": 206211
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9737720489501953,
      "learning_rate": 1.6172650016656796e-05,
      "loss": 3.1202,
      "step": 206212
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9384305477142334,
      "learning_rate": 1.61713251080944e-05,
      "loss": 2.9065,
      "step": 206213
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.094606637954712,
      "learning_rate": 1.6170000252301163e-05,
      "loss": 2.9182,
      "step": 206214
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.293426275253296,
      "learning_rate": 1.6168675449277278e-05,
      "loss": 2.7506,
      "step": 206215
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.949902057647705,
      "learning_rate": 1.6167350699022952e-05,
      "loss": 3.2425,
      "step": 206216
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.504648208618164,
      "learning_rate": 1.6166026001538512e-05,
      "loss": 2.9063,
      "step": 206217
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8546807765960693,
      "learning_rate": 1.6164701356824163e-05,
      "loss": 3.0695,
      "step": 206218
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.729224920272827,
      "learning_rate": 1.6163376764880133e-05,
      "loss": 2.6167,
      "step": 206219
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.765753984451294,
      "learning_rate": 1.616205222570669e-05,
      "loss": 3.018,
      "step": 206220
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.915823221206665,
      "learning_rate": 1.6160727739304102e-05,
      "loss": 2.8294,
      "step": 206221
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8349621295928955,
      "learning_rate": 1.6159403305672535e-05,
      "loss": 2.7591,
      "step": 206222
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.244553804397583,
      "learning_rate": 1.6158078924812358e-05,
      "loss": 2.7339,
      "step": 206223
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6181061267852783,
      "learning_rate": 1.6156754596723766e-05,
      "loss": 2.8487,
      "step": 206224
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.995260000228882,
      "learning_rate": 1.6155430321406926e-05,
      "loss": 2.9923,
      "step": 206225
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8769969940185547,
      "learning_rate": 1.6154106098862174e-05,
      "loss": 2.7903,
      "step": 206226
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.673189878463745,
      "learning_rate": 1.6152781929089708e-05,
      "loss": 2.7569,
      "step": 206227
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.056356906890869,
      "learning_rate": 1.615145781208983e-05,
      "loss": 2.8633,
      "step": 206228
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9949071407318115,
      "learning_rate": 1.615013374786277e-05,
      "loss": 2.8073,
      "step": 206229
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.706796169281006,
      "learning_rate": 1.6148809736408762e-05,
      "loss": 3.0105,
      "step": 206230
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8695197105407715,
      "learning_rate": 1.6147485777727976e-05,
      "loss": 2.9506,
      "step": 206231
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9866180419921875,
      "learning_rate": 1.6146161871820805e-05,
      "loss": 3.0898,
      "step": 206232
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.043543815612793,
      "learning_rate": 1.6144838018687355e-05,
      "loss": 2.9928,
      "step": 206233
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.079841136932373,
      "learning_rate": 1.614351421832799e-05,
      "loss": 3.0133,
      "step": 206234
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.023054599761963,
      "learning_rate": 1.614219047074288e-05,
      "loss": 3.0033,
      "step": 206235
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.832883834838867,
      "learning_rate": 1.6140866775932348e-05,
      "loss": 2.7621,
      "step": 206236
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.297615051269531,
      "learning_rate": 1.6139543133896537e-05,
      "loss": 3.0184,
      "step": 206237
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.5908541679382324,
      "learning_rate": 1.6138219544635744e-05,
      "loss": 3.0179,
      "step": 206238
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.961063861846924,
      "learning_rate": 1.6136896008150202e-05,
      "loss": 3.0642,
      "step": 206239
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0551979541778564,
      "learning_rate": 1.613557252444021e-05,
      "loss": 3.0266,
      "step": 206240
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7713937759399414,
      "learning_rate": 1.6134249093505935e-05,
      "loss": 2.9797,
      "step": 206241
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.87779426574707,
      "learning_rate": 1.613292571534771e-05,
      "loss": 3.0591,
      "step": 206242
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.262588024139404,
      "learning_rate": 1.6131602389965704e-05,
      "loss": 2.6473,
      "step": 206243
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7364323139190674,
      "learning_rate": 1.613027911736018e-05,
      "loss": 2.9697,
      "step": 206244
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3320839405059814,
      "learning_rate": 1.612895589753137e-05,
      "loss": 2.8995,
      "step": 206245
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.099388837814331,
      "learning_rate": 1.6127632730479613e-05,
      "loss": 2.9993,
      "step": 206246
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1407625675201416,
      "learning_rate": 1.6126309616205034e-05,
      "loss": 2.7848,
      "step": 206247
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.889679431915283,
      "learning_rate": 1.6124986554708007e-05,
      "loss": 2.6457,
      "step": 206248
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3776283264160156,
      "learning_rate": 1.6123663545988596e-05,
      "loss": 2.9092,
      "step": 206249
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9614741802215576,
      "learning_rate": 1.6122340590047233e-05,
      "loss": 2.8921,
      "step": 206250
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.585246562957764,
      "learning_rate": 1.6121017686884052e-05,
      "loss": 2.8312,
      "step": 206251
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3499410152435303,
      "learning_rate": 1.611969483649935e-05,
      "loss": 2.8227,
      "step": 206252
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.767108201980591,
      "learning_rate": 1.61183720388933e-05,
      "loss": 3.0528,
      "step": 206253
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.896972894668579,
      "learning_rate": 1.6117049294066264e-05,
      "loss": 3.1263,
      "step": 206254
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8197712898254395,
      "learning_rate": 1.6115726602018442e-05,
      "loss": 2.712,
      "step": 206255
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.694739580154419,
      "learning_rate": 1.6114403962750032e-05,
      "loss": 3.0434,
      "step": 206256
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.854884624481201,
      "learning_rate": 1.6113081376261306e-05,
      "loss": 2.9788,
      "step": 206257
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.675652503967285,
      "learning_rate": 1.6111758842552525e-05,
      "loss": 2.977,
      "step": 206258
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8934714794158936,
      "learning_rate": 1.6110436361623924e-05,
      "loss": 2.9345,
      "step": 206259
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.903036594390869,
      "learning_rate": 1.610911393347577e-05,
      "loss": 2.7503,
      "step": 206260
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2261602878570557,
      "learning_rate": 1.6107791558108295e-05,
      "loss": 2.769,
      "step": 206261
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.998936653137207,
      "learning_rate": 1.6106469235521734e-05,
      "loss": 2.731,
      "step": 206262
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1606409549713135,
      "learning_rate": 1.6105146965716285e-05,
      "loss": 3.0499,
      "step": 206263
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.788653612136841,
      "learning_rate": 1.610382474869232e-05,
      "loss": 3.242,
      "step": 206264
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.4636056423187256,
      "learning_rate": 1.610250258444996e-05,
      "loss": 2.6751,
      "step": 206265
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8692331314086914,
      "learning_rate": 1.610118047298955e-05,
      "loss": 2.881,
      "step": 206266
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.732372760772705,
      "learning_rate": 1.6099858414311285e-05,
      "loss": 2.7615,
      "step": 206267
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8190107345581055,
      "learning_rate": 1.6098536408415398e-05,
      "loss": 2.6794,
      "step": 206268
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7249338626861572,
      "learning_rate": 1.609721445530212e-05,
      "loss": 3.0794,
      "step": 206269
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9345359802246094,
      "learning_rate": 1.609589255497179e-05,
      "loss": 2.674,
      "step": 206270
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.838691234588623,
      "learning_rate": 1.6094570707424538e-05,
      "loss": 2.6761,
      "step": 206271
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.059756278991699,
      "learning_rate": 1.6093248912660696e-05,
      "loss": 3.0559,
      "step": 206272
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.7292258739471436,
      "learning_rate": 1.6091927170680498e-05,
      "loss": 2.8167,
      "step": 206273
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9599874019622803,
      "learning_rate": 1.609060548148414e-05,
      "loss": 3.0124,
      "step": 206274
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.6590678691864014,
      "learning_rate": 1.60892838450719e-05,
      "loss": 2.9141,
      "step": 206275
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.407320499420166,
      "learning_rate": 1.6087962261444033e-05,
      "loss": 2.9951,
      "step": 206276
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8771512508392334,
      "learning_rate": 1.608664073060071e-05,
      "loss": 3.0028,
      "step": 206277
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.338733673095703,
      "learning_rate": 1.6085319252542326e-05,
      "loss": 2.7686,
      "step": 206278
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.731882333755493,
      "learning_rate": 1.6083997827269025e-05,
      "loss": 2.9799,
      "step": 206279
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0075790882110596,
      "learning_rate": 1.608267645478106e-05,
      "loss": 2.947,
      "step": 206280
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.012261390686035,
      "learning_rate": 1.6081355135078643e-05,
      "loss": 2.7768,
      "step": 206281
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.5498783588409424,
      "learning_rate": 1.60800338681621e-05,
      "loss": 2.749,
      "step": 206282
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.443068027496338,
      "learning_rate": 1.6078712654031634e-05,
      "loss": 3.0184,
      "step": 206283
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.601207733154297,
      "learning_rate": 1.6077391492687507e-05,
      "loss": 2.9125,
      "step": 206284
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.662294864654541,
      "learning_rate": 1.6076070384129924e-05,
      "loss": 3.1048,
      "step": 206285
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.897258996963501,
      "learning_rate": 1.6074749328359215e-05,
      "loss": 3.005,
      "step": 206286
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.094923257827759,
      "learning_rate": 1.607342832537548e-05,
      "loss": 2.7721,
      "step": 206287
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1251413822174072,
      "learning_rate": 1.6072107375179123e-05,
      "loss": 2.8076,
      "step": 206288
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.263153553009033,
      "learning_rate": 1.607078647777027e-05,
      "loss": 2.8192,
      "step": 206289
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.41556453704834,
      "learning_rate": 1.6069465633149258e-05,
      "loss": 3.0009,
      "step": 206290
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3387844562530518,
      "learning_rate": 1.6068144841316322e-05,
      "loss": 2.7585,
      "step": 206291
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0502166748046875,
      "learning_rate": 1.6066824102271624e-05,
      "loss": 2.9933,
      "step": 206292
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.236359119415283,
      "learning_rate": 1.6065503416015467e-05,
      "loss": 3.1187,
      "step": 206293
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2994072437286377,
      "learning_rate": 1.6064182782548117e-05,
      "loss": 2.7521,
      "step": 206294
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.311023712158203,
      "learning_rate": 1.606286220186974e-05,
      "loss": 2.9319,
      "step": 206295
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4395766258239746,
      "learning_rate": 1.6061541673980705e-05,
      "loss": 2.9985,
      "step": 206296
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.492482900619507,
      "learning_rate": 1.6060221198881174e-05,
      "loss": 2.6495,
      "step": 206297
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.471036195755005,
      "learning_rate": 1.605890077657135e-05,
      "loss": 3.2316,
      "step": 206298
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.6857612133026123,
      "learning_rate": 1.6057580407051595e-05,
      "loss": 2.9631,
      "step": 206299
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.898857355117798,
      "learning_rate": 1.6056260090322116e-05,
      "loss": 2.9927,
      "step": 206300
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.827486515045166,
      "learning_rate": 1.6054939826383075e-05,
      "loss": 2.796,
      "step": 206301
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.807358980178833,
      "learning_rate": 1.6053619615234802e-05,
      "loss": 2.9397,
      "step": 206302
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1856346130371094,
      "learning_rate": 1.6052299456877536e-05,
      "loss": 3.0257,
      "step": 206303
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.019537925720215,
      "learning_rate": 1.6050979351311477e-05,
      "loss": 2.9044,
      "step": 206304
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.002514362335205,
      "learning_rate": 1.6049659298536953e-05,
      "loss": 3.1402,
      "step": 206305
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4407999515533447,
      "learning_rate": 1.6048339298554137e-05,
      "loss": 2.7031,
      "step": 206306
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1478075981140137,
      "learning_rate": 1.6047019351363255e-05,
      "loss": 2.6928,
      "step": 206307
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1163485050201416,
      "learning_rate": 1.6045699456964644e-05,
      "loss": 3.0453,
      "step": 206308
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9014945030212402,
      "learning_rate": 1.604437961535847e-05,
      "loss": 2.9558,
      "step": 206309
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.352739334106445,
      "learning_rate": 1.6043059826545003e-05,
      "loss": 2.9909,
      "step": 206310
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.953770160675049,
      "learning_rate": 1.6041740090524504e-05,
      "loss": 2.9771,
      "step": 206311
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6379735469818115,
      "learning_rate": 1.6040420407297173e-05,
      "loss": 2.8514,
      "step": 206312
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.976151466369629,
      "learning_rate": 1.6039100776863346e-05,
      "loss": 3.167,
      "step": 206313
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.4276022911071777,
      "learning_rate": 1.6037781199223188e-05,
      "loss": 3.1043,
      "step": 206314
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8284387588500977,
      "learning_rate": 1.6036461674376965e-05,
      "loss": 2.9148,
      "step": 206315
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8849270343780518,
      "learning_rate": 1.6035142202324912e-05,
      "loss": 3.0389,
      "step": 206316
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7575860023498535,
      "learning_rate": 1.6033822783067295e-05,
      "loss": 3.0505,
      "step": 206317
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.014874219894409,
      "learning_rate": 1.6032503416604313e-05,
      "loss": 2.6904,
      "step": 206318
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8487095832824707,
      "learning_rate": 1.6031184102936302e-05,
      "loss": 3.0893,
      "step": 206319
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.685575008392334,
      "learning_rate": 1.6029864842063454e-05,
      "loss": 2.7664,
      "step": 206320
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.683072090148926,
      "learning_rate": 1.6028545633985977e-05,
      "loss": 3.0351,
      "step": 206321
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.543950319290161,
      "learning_rate": 1.602722647870417e-05,
      "loss": 2.6744,
      "step": 206322
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.351283311843872,
      "learning_rate": 1.6025907376218294e-05,
      "loss": 2.8683,
      "step": 206323
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3473169803619385,
      "learning_rate": 1.602458832652852e-05,
      "loss": 2.9069,
      "step": 206324
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.5508365631103516,
      "learning_rate": 1.6023269329635148e-05,
      "loss": 2.7446,
      "step": 206325
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0026700496673584,
      "learning_rate": 1.6021950385538372e-05,
      "loss": 3.1115,
      "step": 206326
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7730462551116943,
      "learning_rate": 1.6020631494238533e-05,
      "loss": 3.0544,
      "step": 206327
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0458590984344482,
      "learning_rate": 1.601931265573583e-05,
      "loss": 2.8773,
      "step": 206328
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.809816837310791,
      "learning_rate": 1.6017993870030487e-05,
      "loss": 3.1025,
      "step": 206329
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.9484763145446777,
      "learning_rate": 1.6016675137122715e-05,
      "loss": 2.6591,
      "step": 206330
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7745308876037598,
      "learning_rate": 1.601535645701284e-05,
      "loss": 2.9811,
      "step": 206331
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8696908950805664,
      "learning_rate": 1.601403782970103e-05,
      "loss": 2.8208,
      "step": 206332
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1634697914123535,
      "learning_rate": 1.6012719255187624e-05,
      "loss": 2.7334,
      "step": 206333
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6864559650421143,
      "learning_rate": 1.6011400733472813e-05,
      "loss": 2.9341,
      "step": 206334
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0613417625427246,
      "learning_rate": 1.6010082264556835e-05,
      "loss": 3.0258,
      "step": 206335
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7707293033599854,
      "learning_rate": 1.6008763848439886e-05,
      "loss": 3.111,
      "step": 206336
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9638609886169434,
      "learning_rate": 1.600744548512234e-05,
      "loss": 2.8285,
      "step": 206337
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.478405475616455,
      "learning_rate": 1.6006127174604322e-05,
      "loss": 3.0953,
      "step": 206338
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8791866302490234,
      "learning_rate": 1.600480891688617e-05,
      "loss": 3.0025,
      "step": 206339
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6878886222839355,
      "learning_rate": 1.600349071196808e-05,
      "loss": 2.8614,
      "step": 206340
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2794365882873535,
      "learning_rate": 1.600217255985029e-05,
      "loss": 2.8769,
      "step": 206341
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.869950294494629,
      "learning_rate": 1.6000854460533032e-05,
      "loss": 2.9747,
      "step": 206342
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0190865993499756,
      "learning_rate": 1.59995364140166e-05,
      "loss": 2.7136,
      "step": 206343
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0641403198242188,
      "learning_rate": 1.5998218420301202e-05,
      "loss": 2.9543,
      "step": 206344
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7706117630004883,
      "learning_rate": 1.5996900479387098e-05,
      "loss": 2.8383,
      "step": 206345
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3582258224487305,
      "learning_rate": 1.5995582591274558e-05,
      "loss": 2.8946,
      "step": 206346
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9220566749572754,
      "learning_rate": 1.5994264755963816e-05,
      "loss": 2.9917,
      "step": 206347
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7259156703948975,
      "learning_rate": 1.5992946973455034e-05,
      "loss": 2.8348,
      "step": 206348
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.5827901363372803,
      "learning_rate": 1.599162924374855e-05,
      "loss": 3.0918,
      "step": 206349
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.43178129196167,
      "learning_rate": 1.599031156684456e-05,
      "loss": 3.0494,
      "step": 206350
    },
    {
      "epoch": 2.69,
      "grad_norm": 5.04799747467041,
      "learning_rate": 1.59889939427434e-05,
      "loss": 2.7136,
      "step": 206351
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8120899200439453,
      "learning_rate": 1.59876763714452e-05,
      "loss": 2.8305,
      "step": 206352
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8981809616088867,
      "learning_rate": 1.5986358852950297e-05,
      "loss": 2.7817,
      "step": 206353
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8932881355285645,
      "learning_rate": 1.5985041387258824e-05,
      "loss": 2.9269,
      "step": 206354
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.177155017852783,
      "learning_rate": 1.5983723974371144e-05,
      "loss": 2.7588,
      "step": 206355
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.003968238830566,
      "learning_rate": 1.598240661428739e-05,
      "loss": 2.7884,
      "step": 206356
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.821483612060547,
      "learning_rate": 1.5981089307007934e-05,
      "loss": 2.9251,
      "step": 206357
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.022428035736084,
      "learning_rate": 1.5979772052532935e-05,
      "loss": 2.9948,
      "step": 206358
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.177909851074219,
      "learning_rate": 1.5978454850862665e-05,
      "loss": 2.9438,
      "step": 206359
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.022862434387207,
      "learning_rate": 1.5977137701997323e-05,
      "loss": 3.0578,
      "step": 206360
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8518824577331543,
      "learning_rate": 1.5975820605937238e-05,
      "loss": 2.9924,
      "step": 206361
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.55749773979187,
      "learning_rate": 1.597450356268255e-05,
      "loss": 2.822,
      "step": 206362
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.450472354888916,
      "learning_rate": 1.597318657223362e-05,
      "loss": 2.9733,
      "step": 206363
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4711616039276123,
      "learning_rate": 1.597186963459065e-05,
      "loss": 3.0428,
      "step": 206364
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.212686061859131,
      "learning_rate": 1.597055274975384e-05,
      "loss": 3.1686,
      "step": 206365
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0069758892059326,
      "learning_rate": 1.5969235917723456e-05,
      "loss": 3.1646,
      "step": 206366
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.697866439819336,
      "learning_rate": 1.5967919138499762e-05,
      "loss": 2.9781,
      "step": 206367
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9345128536224365,
      "learning_rate": 1.596660241208296e-05,
      "loss": 2.7525,
      "step": 206368
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.013727188110352,
      "learning_rate": 1.5965285738473386e-05,
      "loss": 2.9398,
      "step": 206369
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.789811134338379,
      "learning_rate": 1.5963969117671206e-05,
      "loss": 2.9285,
      "step": 206370
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3741495609283447,
      "learning_rate": 1.5962652549676713e-05,
      "loss": 2.8543,
      "step": 206371
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.473728656768799,
      "learning_rate": 1.5961336034490048e-05,
      "loss": 2.9834,
      "step": 206372
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.5706160068511963,
      "learning_rate": 1.5960019572111604e-05,
      "loss": 2.9546,
      "step": 206373
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.657438278198242,
      "learning_rate": 1.5958703162541487e-05,
      "loss": 2.7651,
      "step": 206374
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.517107009887695,
      "learning_rate": 1.5957386805780094e-05,
      "loss": 3.008,
      "step": 206375
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.5663564205169678,
      "learning_rate": 1.5956070501827555e-05,
      "loss": 3.0022,
      "step": 206376
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.5972797870635986,
      "learning_rate": 1.5954754250684143e-05,
      "loss": 2.8112,
      "step": 206377
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.678717613220215,
      "learning_rate": 1.5953438052350054e-05,
      "loss": 2.7705,
      "step": 206378
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.037706136703491,
      "learning_rate": 1.595212190682562e-05,
      "loss": 2.9656,
      "step": 206379
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1516530513763428,
      "learning_rate": 1.5950805814111013e-05,
      "loss": 2.834,
      "step": 206380
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.238757610321045,
      "learning_rate": 1.5949489774206593e-05,
      "loss": 2.8377,
      "step": 206381
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.353245258331299,
      "learning_rate": 1.5948173787112427e-05,
      "loss": 2.7672,
      "step": 206382
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8197104930877686,
      "learning_rate": 1.5946857852828987e-05,
      "loss": 3.136,
      "step": 206383
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.171900272369385,
      "learning_rate": 1.594554197135627e-05,
      "loss": 2.7828,
      "step": 206384
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7672195434570312,
      "learning_rate": 1.5944226142694704e-05,
      "loss": 2.7705,
      "step": 206385
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.920368194580078,
      "learning_rate": 1.5942910366844396e-05,
      "loss": 3.1491,
      "step": 206386
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.295419692993164,
      "learning_rate": 1.5941594643805745e-05,
      "loss": 3.0199,
      "step": 206387
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1694071292877197,
      "learning_rate": 1.5940278973578847e-05,
      "loss": 2.982,
      "step": 206388
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.715160846710205,
      "learning_rate": 1.593896335616407e-05,
      "loss": 2.7565,
      "step": 206389
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.385136365890503,
      "learning_rate": 1.593764779156158e-05,
      "loss": 2.7015,
      "step": 206390
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6710586547851562,
      "learning_rate": 1.5936332279771648e-05,
      "loss": 3.0616,
      "step": 206391
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0525310039520264,
      "learning_rate": 1.59350168207945e-05,
      "loss": 3.1736,
      "step": 206392
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.07289457321167,
      "learning_rate": 1.5933701414630406e-05,
      "loss": 2.9224,
      "step": 206393
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3112850189208984,
      "learning_rate": 1.5932386061279566e-05,
      "loss": 2.8949,
      "step": 206394
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.8540515899658203,
      "learning_rate": 1.593107076074228e-05,
      "loss": 2.5718,
      "step": 206395
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.6540677547454834,
      "learning_rate": 1.5929755513018815e-05,
      "loss": 2.9793,
      "step": 206396
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6855742931365967,
      "learning_rate": 1.5928440318109303e-05,
      "loss": 3.0247,
      "step": 206397
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.2991509437561035,
      "learning_rate": 1.5927125176014077e-05,
      "loss": 2.7995,
      "step": 206398
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3628408908843994,
      "learning_rate": 1.5925810086733402e-05,
      "loss": 2.9605,
      "step": 206399
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0218026638031006,
      "learning_rate": 1.5924495050267417e-05,
      "loss": 2.9495,
      "step": 206400
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.584928274154663,
      "learning_rate": 1.592318006661648e-05,
      "loss": 2.7935,
      "step": 206401
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.819196939468384,
      "learning_rate": 1.5921865135780766e-05,
      "loss": 2.9906,
      "step": 206402
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.9812750816345215,
      "learning_rate": 1.5920550257760533e-05,
      "loss": 2.8747,
      "step": 206403
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.048112392425537,
      "learning_rate": 1.5919235432556054e-05,
      "loss": 3.0207,
      "step": 206404
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.37481427192688,
      "learning_rate": 1.591792066016756e-05,
      "loss": 2.8543,
      "step": 206405
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2988319396972656,
      "learning_rate": 1.5916605940595217e-05,
      "loss": 2.8947,
      "step": 206406
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.6091973781585693,
      "learning_rate": 1.5915291273839425e-05,
      "loss": 2.6792,
      "step": 206407
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1592352390289307,
      "learning_rate": 1.5913976659900318e-05,
      "loss": 2.83,
      "step": 206408
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8009488582611084,
      "learning_rate": 1.5912662098778096e-05,
      "loss": 3.1322,
      "step": 206409
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.538213014602661,
      "learning_rate": 1.5911347590473156e-05,
      "loss": 3.0375,
      "step": 206410
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.858731746673584,
      "learning_rate": 1.5910033134985602e-05,
      "loss": 2.9807,
      "step": 206411
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.148118257522583,
      "learning_rate": 1.5908718732315794e-05,
      "loss": 2.7971,
      "step": 206412
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.644343376159668,
      "learning_rate": 1.5907404382463908e-05,
      "loss": 3.0013,
      "step": 206413
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.031014919281006,
      "learning_rate": 1.5906090085430167e-05,
      "loss": 2.9099,
      "step": 206414
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.22128963470459,
      "learning_rate": 1.5904775841214846e-05,
      "loss": 2.7522,
      "step": 206415
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9091408252716064,
      "learning_rate": 1.5903461649818206e-05,
      "loss": 2.7732,
      "step": 206416
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3094773292541504,
      "learning_rate": 1.5902147511240447e-05,
      "loss": 3.1516,
      "step": 206417
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.881082534790039,
      "learning_rate": 1.5900833425481907e-05,
      "loss": 2.9671,
      "step": 206418
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.628788948059082,
      "learning_rate": 1.5899519392542716e-05,
      "loss": 2.9112,
      "step": 206419
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.7837231159210205,
      "learning_rate": 1.5898205412423205e-05,
      "loss": 2.8971,
      "step": 206420
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.910043478012085,
      "learning_rate": 1.589689148512351e-05,
      "loss": 2.7851,
      "step": 206421
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4675238132476807,
      "learning_rate": 1.5895577610644027e-05,
      "loss": 3.1817,
      "step": 206422
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9062728881835938,
      "learning_rate": 1.5894263788984827e-05,
      "loss": 3.1144,
      "step": 206423
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.9215950965881348,
      "learning_rate": 1.589295002014631e-05,
      "loss": 2.8756,
      "step": 206424
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1549599170684814,
      "learning_rate": 1.5891636304128674e-05,
      "loss": 2.8687,
      "step": 206425
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0542213916778564,
      "learning_rate": 1.5890322640932118e-05,
      "loss": 2.9485,
      "step": 206426
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1450676918029785,
      "learning_rate": 1.588900903055691e-05,
      "loss": 2.9452,
      "step": 206427
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.349330425262451,
      "learning_rate": 1.5887695473003313e-05,
      "loss": 2.9849,
      "step": 206428
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4843859672546387,
      "learning_rate": 1.58863819682715e-05,
      "loss": 2.9125,
      "step": 206429
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1210291385650635,
      "learning_rate": 1.5885068516361832e-05,
      "loss": 3.2111,
      "step": 206430
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4593465328216553,
      "learning_rate": 1.588375511727451e-05,
      "loss": 3.0241,
      "step": 206431
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8196089267730713,
      "learning_rate": 1.5882441771009736e-05,
      "loss": 2.8998,
      "step": 206432
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.7181010246276855,
      "learning_rate": 1.588112847756774e-05,
      "loss": 2.8731,
      "step": 206433
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0198302268981934,
      "learning_rate": 1.5879815236948855e-05,
      "loss": 2.9303,
      "step": 206434
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1284401416778564,
      "learning_rate": 1.587850204915322e-05,
      "loss": 3.2122,
      "step": 206435
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.522338390350342,
      "learning_rate": 1.5877188914181194e-05,
      "loss": 3.1402,
      "step": 206436
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.459068298339844,
      "learning_rate": 1.5875875832032946e-05,
      "loss": 2.9315,
      "step": 206437
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.632286310195923,
      "learning_rate": 1.5874562802708745e-05,
      "loss": 3.0622,
      "step": 206438
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.872126817703247,
      "learning_rate": 1.5873249826208788e-05,
      "loss": 2.8633,
      "step": 206439
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4018471240997314,
      "learning_rate": 1.5871936902533376e-05,
      "loss": 2.9105,
      "step": 206440
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2592899799346924,
      "learning_rate": 1.587062403168271e-05,
      "loss": 2.7221,
      "step": 206441
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6840972900390625,
      "learning_rate": 1.586931121365712e-05,
      "loss": 2.7039,
      "step": 206442
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3810200691223145,
      "learning_rate": 1.5867998448456775e-05,
      "loss": 3.0308,
      "step": 206443
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.13643479347229,
      "learning_rate": 1.5866685736081908e-05,
      "loss": 2.6391,
      "step": 206444
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8218331336975098,
      "learning_rate": 1.5865373076532783e-05,
      "loss": 2.9526,
      "step": 206445
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.780118703842163,
      "learning_rate": 1.586406046980967e-05,
      "loss": 2.6458,
      "step": 206446
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.679931163787842,
      "learning_rate": 1.5862747915912733e-05,
      "loss": 2.8566,
      "step": 206447
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2378547191619873,
      "learning_rate": 1.5861435414842337e-05,
      "loss": 2.9814,
      "step": 206448
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6966028213500977,
      "learning_rate": 1.586012296659862e-05,
      "loss": 3.1453,
      "step": 206449
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0283803939819336,
      "learning_rate": 1.585881057118198e-05,
      "loss": 2.8934,
      "step": 206450
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.806516408920288,
      "learning_rate": 1.5857498228592413e-05,
      "loss": 2.7565,
      "step": 206451
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.427886486053467,
      "learning_rate": 1.585618593883039e-05,
      "loss": 3.1038,
      "step": 206452
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.731597900390625,
      "learning_rate": 1.585487370189601e-05,
      "loss": 3.0788,
      "step": 206453
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7969932556152344,
      "learning_rate": 1.5853561517789604e-05,
      "loss": 2.8895,
      "step": 206454
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.5324573516845703,
      "learning_rate": 1.585224938651134e-05,
      "loss": 2.8055,
      "step": 206455
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.3489205837249756,
      "learning_rate": 1.5850937308061586e-05,
      "loss": 2.9309,
      "step": 206456
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8688671588897705,
      "learning_rate": 1.5849625282440436e-05,
      "loss": 2.8652,
      "step": 206457
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.637017250061035,
      "learning_rate": 1.5848313309648263e-05,
      "loss": 2.9497,
      "step": 206458
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8945460319519043,
      "learning_rate": 1.5847001389685167e-05,
      "loss": 3.0131,
      "step": 206459
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.473201751708984,
      "learning_rate": 1.5845689522551542e-05,
      "loss": 3.0487,
      "step": 206460
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.6669814586639404,
      "learning_rate": 1.584437770824756e-05,
      "loss": 2.8465,
      "step": 206461
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.516916513442993,
      "learning_rate": 1.5843065946773515e-05,
      "loss": 2.9226,
      "step": 206462
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7970809936523438,
      "learning_rate": 1.5841754238129512e-05,
      "loss": 2.7471,
      "step": 206463
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.941291570663452,
      "learning_rate": 1.5840442582315947e-05,
      "loss": 3.0279,
      "step": 206464
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.3578100204467773,
      "learning_rate": 1.5839130979332993e-05,
      "loss": 2.9031,
      "step": 206465
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1044163703918457,
      "learning_rate": 1.583781942918091e-05,
      "loss": 2.9561,
      "step": 206466
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.88850474357605,
      "learning_rate": 1.583650793185993e-05,
      "loss": 2.8263,
      "step": 206467
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.210970878601074,
      "learning_rate": 1.5835196487370394e-05,
      "loss": 3.2022,
      "step": 206468
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6347405910491943,
      "learning_rate": 1.583388509571236e-05,
      "loss": 2.9845,
      "step": 206469
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.850882053375244,
      "learning_rate": 1.5832573756886203e-05,
      "loss": 2.8208,
      "step": 206470
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.212031602859497,
      "learning_rate": 1.583126247089208e-05,
      "loss": 3.2009,
      "step": 206471
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.106110095977783,
      "learning_rate": 1.5829951237730366e-05,
      "loss": 2.7926,
      "step": 206472
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7757232189178467,
      "learning_rate": 1.582864005740119e-05,
      "loss": 2.9008,
      "step": 206473
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.149977445602417,
      "learning_rate": 1.582732892990485e-05,
      "loss": 3.1018,
      "step": 206474
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9339888095855713,
      "learning_rate": 1.5826017855241578e-05,
      "loss": 2.9993,
      "step": 206475
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6689822673797607,
      "learning_rate": 1.5824706833411615e-05,
      "loss": 3.1167,
      "step": 206476
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.290292263031006,
      "learning_rate": 1.5823395864415156e-05,
      "loss": 2.6789,
      "step": 206477
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6306254863739014,
      "learning_rate": 1.5822084948252566e-05,
      "loss": 2.8692,
      "step": 206478
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.059128284454346,
      "learning_rate": 1.582077408492395e-05,
      "loss": 2.8935,
      "step": 206479
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1722426414489746,
      "learning_rate": 1.5819463274429634e-05,
      "loss": 2.9233,
      "step": 206480
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.278212070465088,
      "learning_rate": 1.5818152516769887e-05,
      "loss": 2.9886,
      "step": 206481
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0689897537231445,
      "learning_rate": 1.581684181194488e-05,
      "loss": 3.2777,
      "step": 206482
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0596370697021484,
      "learning_rate": 1.5815531159954873e-05,
      "loss": 2.8472,
      "step": 206483
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7083821296691895,
      "learning_rate": 1.5814220560800138e-05,
      "loss": 3.0336,
      "step": 206484
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.6327908039093018,
      "learning_rate": 1.581291001448087e-05,
      "loss": 2.828,
      "step": 206485
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.5457875728607178,
      "learning_rate": 1.5811599520997376e-05,
      "loss": 2.7599,
      "step": 206486
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8684439659118652,
      "learning_rate": 1.5810289080349915e-05,
      "loss": 2.7452,
      "step": 206487
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2384023666381836,
      "learning_rate": 1.5808978692538587e-05,
      "loss": 2.8067,
      "step": 206488
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6460893154144287,
      "learning_rate": 1.5807668357563795e-05,
      "loss": 2.7899,
      "step": 206489
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.7151708602905273,
      "learning_rate": 1.580635807542574e-05,
      "loss": 2.6875,
      "step": 206490
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.995392322540283,
      "learning_rate": 1.5805047846124586e-05,
      "loss": 2.8388,
      "step": 206491
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.677229881286621,
      "learning_rate": 1.5803737669660698e-05,
      "loss": 2.7508,
      "step": 206492
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.362384796142578,
      "learning_rate": 1.5802427546034247e-05,
      "loss": 2.8964,
      "step": 206493
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4323909282684326,
      "learning_rate": 1.580111747524546e-05,
      "loss": 2.8992,
      "step": 206494
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9817237854003906,
      "learning_rate": 1.5799807457294643e-05,
      "loss": 3.1099,
      "step": 206495
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1608641147613525,
      "learning_rate": 1.579849749218196e-05,
      "loss": 3.2924,
      "step": 206496
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4905357360839844,
      "learning_rate": 1.5797187579907744e-05,
      "loss": 2.9651,
      "step": 206497
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.179934501647949,
      "learning_rate": 1.5795877720472194e-05,
      "loss": 3.0389,
      "step": 206498
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.619330883026123,
      "learning_rate": 1.579456791387558e-05,
      "loss": 2.9372,
      "step": 206499
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.7333011627197266,
      "learning_rate": 1.5793258160118062e-05,
      "loss": 2.9761,
      "step": 206500
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2866342067718506,
      "learning_rate": 1.5791948459199978e-05,
      "loss": 2.8027,
      "step": 206501
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7348523139953613,
      "learning_rate": 1.5790638811121493e-05,
      "loss": 3.007,
      "step": 206502
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.015439987182617,
      "learning_rate": 1.578932921588294e-05,
      "loss": 3.0143,
      "step": 206503
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.9003262519836426,
      "learning_rate": 1.578801967348452e-05,
      "loss": 2.7461,
      "step": 206504
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.9114205837249756,
      "learning_rate": 1.5786710183926465e-05,
      "loss": 2.9655,
      "step": 206505
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1844587326049805,
      "learning_rate": 1.5785400747208976e-05,
      "loss": 2.9955,
      "step": 206506
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.107504367828369,
      "learning_rate": 1.5784091363332386e-05,
      "loss": 2.8543,
      "step": 206507
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1328582763671875,
      "learning_rate": 1.5782782032296893e-05,
      "loss": 3.006,
      "step": 206508
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.5553882122039795,
      "learning_rate": 1.578147275410273e-05,
      "loss": 2.7975,
      "step": 206509
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.515329360961914,
      "learning_rate": 1.57801635287502e-05,
      "loss": 2.9006,
      "step": 206510
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.502887487411499,
      "learning_rate": 1.57788543562395e-05,
      "loss": 2.9656,
      "step": 206511
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6846842765808105,
      "learning_rate": 1.5777545236570833e-05,
      "loss": 3.0217,
      "step": 206512
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2581489086151123,
      "learning_rate": 1.5776236169744493e-05,
      "loss": 3.1064,
      "step": 206513
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.45931339263916,
      "learning_rate": 1.577492715576072e-05,
      "loss": 2.8758,
      "step": 206514
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.321141004562378,
      "learning_rate": 1.5773618194619775e-05,
      "loss": 2.8087,
      "step": 206515
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.966188907623291,
      "learning_rate": 1.5772309286321828e-05,
      "loss": 2.7318,
      "step": 206516
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1192548274993896,
      "learning_rate": 1.5771000430867275e-05,
      "loss": 2.7525,
      "step": 206517
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1572647094726562,
      "learning_rate": 1.5769691628256153e-05,
      "loss": 2.7899,
      "step": 206518
    },
    {
      "epoch": 2.69,
      "grad_norm": 6.063955307006836,
      "learning_rate": 1.5768382878488895e-05,
      "loss": 2.7743,
      "step": 206519
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2646002769470215,
      "learning_rate": 1.5767074181565564e-05,
      "loss": 3.0172,
      "step": 206520
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.35099458694458,
      "learning_rate": 1.5765765537486564e-05,
      "loss": 2.6584,
      "step": 206521
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.255934000015259,
      "learning_rate": 1.5764456946252057e-05,
      "loss": 2.9439,
      "step": 206522
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1382598876953125,
      "learning_rate": 1.5763148407862345e-05,
      "loss": 3.0779,
      "step": 206523
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2457475662231445,
      "learning_rate": 1.5761839922317566e-05,
      "loss": 2.8266,
      "step": 206524
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.5730488300323486,
      "learning_rate": 1.576053148961808e-05,
      "loss": 3.0594,
      "step": 206525
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3829736709594727,
      "learning_rate": 1.5759223109764017e-05,
      "loss": 3.1374,
      "step": 206526
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.039534091949463,
      "learning_rate": 1.5757914782755722e-05,
      "loss": 2.9547,
      "step": 206527
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.837066411972046,
      "learning_rate": 1.575660650859335e-05,
      "loss": 2.9145,
      "step": 206528
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.550208806991577,
      "learning_rate": 1.575529828727731e-05,
      "loss": 2.923,
      "step": 206529
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.587380886077881,
      "learning_rate": 1.575399011880759e-05,
      "loss": 2.8135,
      "step": 206530
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4537532329559326,
      "learning_rate": 1.5752682003184637e-05,
      "loss": 2.9864,
      "step": 206531
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.200777530670166,
      "learning_rate": 1.5751373940408574e-05,
      "loss": 2.9492,
      "step": 206532
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.619673013687134,
      "learning_rate": 1.575006593047977e-05,
      "loss": 2.8499,
      "step": 206533
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2350661754608154,
      "learning_rate": 1.574875797339833e-05,
      "loss": 2.8459,
      "step": 206534
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9804418087005615,
      "learning_rate": 1.5747450069164646e-05,
      "loss": 2.9313,
      "step": 206535
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.982469081878662,
      "learning_rate": 1.574614221777879e-05,
      "loss": 2.7748,
      "step": 206536
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.945343017578125,
      "learning_rate": 1.5744834419241127e-05,
      "loss": 2.8722,
      "step": 206537
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.5341150760650635,
      "learning_rate": 1.574352667355182e-05,
      "loss": 2.9637,
      "step": 206538
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9028074741363525,
      "learning_rate": 1.5742218980711208e-05,
      "loss": 2.904,
      "step": 206539
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1253740787506104,
      "learning_rate": 1.5740911340719454e-05,
      "loss": 2.8424,
      "step": 206540
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.63946270942688,
      "learning_rate": 1.573960375357689e-05,
      "loss": 2.6906,
      "step": 206541
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2380833625793457,
      "learning_rate": 1.5738296219283654e-05,
      "loss": 3.0007,
      "step": 206542
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.394498825073242,
      "learning_rate": 1.573698873784004e-05,
      "loss": 2.8718,
      "step": 206543
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.553166151046753,
      "learning_rate": 1.573568130924625e-05,
      "loss": 3.0143,
      "step": 206544
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.680102586746216,
      "learning_rate": 1.573437393350262e-05,
      "loss": 3.0699,
      "step": 206545
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.694495439529419,
      "learning_rate": 1.573306661060928e-05,
      "loss": 2.9877,
      "step": 206546
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.5347061157226562,
      "learning_rate": 1.573175934056663e-05,
      "loss": 3.0547,
      "step": 206547
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.196931838989258,
      "learning_rate": 1.5730452123374702e-05,
      "loss": 3.14,
      "step": 206548
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.43212890625,
      "learning_rate": 1.57291449590339e-05,
      "loss": 2.915,
      "step": 206549
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.4611642360687256,
      "learning_rate": 1.572783784754439e-05,
      "loss": 3.2855,
      "step": 206550
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4657204151153564,
      "learning_rate": 1.5726530788906465e-05,
      "loss": 2.7962,
      "step": 206551
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.681861639022827,
      "learning_rate": 1.57252237831203e-05,
      "loss": 2.7344,
      "step": 206552
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7558083534240723,
      "learning_rate": 1.572391683018629e-05,
      "loss": 3.0212,
      "step": 206553
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.548600196838379,
      "learning_rate": 1.572260993010447e-05,
      "loss": 3.062,
      "step": 206554
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.204806327819824,
      "learning_rate": 1.5721303082875204e-05,
      "loss": 3.0494,
      "step": 206555
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.5684304237365723,
      "learning_rate": 1.5719996288498694e-05,
      "loss": 2.8449,
      "step": 206556
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.985365867614746,
      "learning_rate": 1.5718689546975238e-05,
      "loss": 3.0401,
      "step": 206557
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.6893203258514404,
      "learning_rate": 1.5717382858305005e-05,
      "loss": 2.7701,
      "step": 206558
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.997314214706421,
      "learning_rate": 1.5716076222488326e-05,
      "loss": 2.7919,
      "step": 206559
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.5336966514587402,
      "learning_rate": 1.5714769639525406e-05,
      "loss": 3.004,
      "step": 206560
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8931658267974854,
      "learning_rate": 1.5713463109416436e-05,
      "loss": 2.7343,
      "step": 206561
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.522563934326172,
      "learning_rate": 1.5712156632161686e-05,
      "loss": 2.9867,
      "step": 206562
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.969961166381836,
      "learning_rate": 1.571085020776146e-05,
      "loss": 2.9972,
      "step": 206563
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.369992733001709,
      "learning_rate": 1.5709543836215886e-05,
      "loss": 2.9675,
      "step": 206564
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.906461000442505,
      "learning_rate": 1.5708237517525334e-05,
      "loss": 3.0787,
      "step": 206565
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.784146547317505,
      "learning_rate": 1.570693125169e-05,
      "loss": 3.1437,
      "step": 206566
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7852704524993896,
      "learning_rate": 1.570562503871009e-05,
      "loss": 2.9515,
      "step": 206567
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.787794351577759,
      "learning_rate": 1.570431887858583e-05,
      "loss": 2.9203,
      "step": 206568
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.5788016319274902,
      "learning_rate": 1.5703012771317558e-05,
      "loss": 3.0205,
      "step": 206569
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.377027988433838,
      "learning_rate": 1.570170671690544e-05,
      "loss": 2.61,
      "step": 206570
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.900736093521118,
      "learning_rate": 1.5700400715349735e-05,
      "loss": 2.7469,
      "step": 206571
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.536837100982666,
      "learning_rate": 1.569909476665072e-05,
      "loss": 3.1483,
      "step": 206572
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.028221368789673,
      "learning_rate": 1.569778887080856e-05,
      "loss": 3.0564,
      "step": 206573
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.91178560256958,
      "learning_rate": 1.5696483027823614e-05,
      "loss": 3.1111,
      "step": 206574
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.710314989089966,
      "learning_rate": 1.569517723769602e-05,
      "loss": 2.9775,
      "step": 206575
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2351081371307373,
      "learning_rate": 1.569387150042605e-05,
      "loss": 2.8684,
      "step": 206576
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2007555961608887,
      "learning_rate": 1.569256581601399e-05,
      "loss": 2.8437,
      "step": 206577
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8056743144989014,
      "learning_rate": 1.569126018446002e-05,
      "loss": 2.9597,
      "step": 206578
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8401472568511963,
      "learning_rate": 1.56899546057644e-05,
      "loss": 2.9405,
      "step": 206579
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.291466236114502,
      "learning_rate": 1.568864907992743e-05,
      "loss": 2.7677,
      "step": 206580
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.081178188323975,
      "learning_rate": 1.5687343606949243e-05,
      "loss": 2.6914,
      "step": 206581
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8867664337158203,
      "learning_rate": 1.568603818683021e-05,
      "loss": 2.842,
      "step": 206582
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0080463886260986,
      "learning_rate": 1.568473281957049e-05,
      "loss": 2.7741,
      "step": 206583
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6643643379211426,
      "learning_rate": 1.5683427505170355e-05,
      "loss": 2.7558,
      "step": 206584
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.736478567123413,
      "learning_rate": 1.568212224362997e-05,
      "loss": 2.8865,
      "step": 206585
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.033508539199829,
      "learning_rate": 1.5680817034949732e-05,
      "loss": 2.7662,
      "step": 206586
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8296210765838623,
      "learning_rate": 1.5679511879129714e-05,
      "loss": 3.0893,
      "step": 206587
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.236292839050293,
      "learning_rate": 1.567820677617031e-05,
      "loss": 2.862,
      "step": 206588
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.770954132080078,
      "learning_rate": 1.5676901726071687e-05,
      "loss": 2.7178,
      "step": 206589
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6709084510803223,
      "learning_rate": 1.5675596728834082e-05,
      "loss": 3.0576,
      "step": 206590
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1681888103485107,
      "learning_rate": 1.5674291784457726e-05,
      "loss": 2.9205,
      "step": 206591
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6486122608184814,
      "learning_rate": 1.5672986892942918e-05,
      "loss": 3.0394,
      "step": 206592
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.89744234085083,
      "learning_rate": 1.5671682054289857e-05,
      "loss": 2.8391,
      "step": 206593
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4134278297424316,
      "learning_rate": 1.5670377268498814e-05,
      "loss": 2.9482,
      "step": 206594
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7761123180389404,
      "learning_rate": 1.566907253556995e-05,
      "loss": 3.1826,
      "step": 206595
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0607986450195312,
      "learning_rate": 1.56677678555037e-05,
      "loss": 3.0858,
      "step": 206596
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1245970726013184,
      "learning_rate": 1.5666463228300064e-05,
      "loss": 3.1086,
      "step": 206597
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.372372627258301,
      "learning_rate": 1.5665158653959477e-05,
      "loss": 2.8526,
      "step": 206598
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2372183799743652,
      "learning_rate": 1.5663854132482033e-05,
      "loss": 3.0155,
      "step": 206599
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7285614013671875,
      "learning_rate": 1.5662549663868075e-05,
      "loss": 2.7466,
      "step": 206600
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.293426513671875,
      "learning_rate": 1.566124524811779e-05,
      "loss": 3.0671,
      "step": 206601
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.92195200920105,
      "learning_rate": 1.565994088523156e-05,
      "loss": 2.6745,
      "step": 206602
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8037095069885254,
      "learning_rate": 1.5658636575209373e-05,
      "loss": 2.8954,
      "step": 206603
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.44952392578125,
      "learning_rate": 1.5657332318051698e-05,
      "loss": 2.897,
      "step": 206604
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.185019016265869,
      "learning_rate": 1.5656028113758633e-05,
      "loss": 2.5411,
      "step": 206605
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.960000514984131,
      "learning_rate": 1.5654723962330517e-05,
      "loss": 2.9563,
      "step": 206606
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1133604049682617,
      "learning_rate": 1.5653419863767547e-05,
      "loss": 2.9816,
      "step": 206607
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4028539657592773,
      "learning_rate": 1.565211581807002e-05,
      "loss": 2.8229,
      "step": 206608
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7269763946533203,
      "learning_rate": 1.5650811825238074e-05,
      "loss": 2.981,
      "step": 206609
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2502071857452393,
      "learning_rate": 1.564950788527204e-05,
      "loss": 2.7554,
      "step": 206610
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6259653568267822,
      "learning_rate": 1.564820399817208e-05,
      "loss": 2.8316,
      "step": 206611
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.681623697280884,
      "learning_rate": 1.5646900163938535e-05,
      "loss": 2.957,
      "step": 206612
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8782594203948975,
      "learning_rate": 1.564559638257157e-05,
      "loss": 2.8134,
      "step": 206613
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7773118019104004,
      "learning_rate": 1.564429265407151e-05,
      "loss": 2.9396,
      "step": 206614
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.661525249481201,
      "learning_rate": 1.5642988978438498e-05,
      "loss": 3.1375,
      "step": 206615
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.225048065185547,
      "learning_rate": 1.564168535567283e-05,
      "loss": 2.8733,
      "step": 206616
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.481645107269287,
      "learning_rate": 1.5640381785774703e-05,
      "loss": 2.8779,
      "step": 206617
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8617610931396484,
      "learning_rate": 1.5639078268744452e-05,
      "loss": 2.6827,
      "step": 206618
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1020870208740234,
      "learning_rate": 1.5637774804582214e-05,
      "loss": 3.0154,
      "step": 206619
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9567489624023438,
      "learning_rate": 1.5636471393288384e-05,
      "loss": 2.8979,
      "step": 206620
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.7050769329071045,
      "learning_rate": 1.5635168034863e-05,
      "loss": 2.5906,
      "step": 206621
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.151874303817749,
      "learning_rate": 1.5633864729306455e-05,
      "loss": 2.959,
      "step": 206622
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.765533924102783,
      "learning_rate": 1.5632561476618887e-05,
      "loss": 3.1373,
      "step": 206623
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9244589805603027,
      "learning_rate": 1.563125827680063e-05,
      "loss": 2.8931,
      "step": 206624
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.128937005996704,
      "learning_rate": 1.562995512985188e-05,
      "loss": 3.0756,
      "step": 206625
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.78202486038208,
      "learning_rate": 1.5628652035772936e-05,
      "loss": 3.0994,
      "step": 206626
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9760313034057617,
      "learning_rate": 1.562734899456394e-05,
      "loss": 3.0202,
      "step": 206627
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.279510498046875,
      "learning_rate": 1.5626046006225213e-05,
      "loss": 2.7247,
      "step": 206628
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7543931007385254,
      "learning_rate": 1.5624743070756962e-05,
      "loss": 2.7096,
      "step": 206629
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2042746543884277,
      "learning_rate": 1.562344018815945e-05,
      "loss": 2.8852,
      "step": 206630
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1463518142700195,
      "learning_rate": 1.5622137358432853e-05,
      "loss": 2.9505,
      "step": 206631
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8106796741485596,
      "learning_rate": 1.562083458157759e-05,
      "loss": 3.0625,
      "step": 206632
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.692246913909912,
      "learning_rate": 1.561953185759367e-05,
      "loss": 3.0731,
      "step": 206633
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.9817492961883545,
      "learning_rate": 1.561822918648149e-05,
      "loss": 2.8719,
      "step": 206634
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7065956592559814,
      "learning_rate": 1.5616926568241218e-05,
      "loss": 3.1571,
      "step": 206635
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.87813401222229,
      "learning_rate": 1.5615624002873182e-05,
      "loss": 2.95,
      "step": 206636
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8524584770202637,
      "learning_rate": 1.5614321490377522e-05,
      "loss": 2.9182,
      "step": 206637
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4741647243499756,
      "learning_rate": 1.5613019030754604e-05,
      "loss": 3.0769,
      "step": 206638
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.685058355331421,
      "learning_rate": 1.561171662400449e-05,
      "loss": 2.7698,
      "step": 206639
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.084964275360107,
      "learning_rate": 1.561041427012758e-05,
      "loss": 2.9687,
      "step": 206640
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.024991512298584,
      "learning_rate": 1.5609111969124045e-05,
      "loss": 2.6502,
      "step": 206641
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.118410348892212,
      "learning_rate": 1.560780972099418e-05,
      "loss": 2.762,
      "step": 206642
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7100348472595215,
      "learning_rate": 1.560650752573812e-05,
      "loss": 2.8767,
      "step": 206643
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.173964738845825,
      "learning_rate": 1.5605205383356302e-05,
      "loss": 3.0155,
      "step": 206644
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2570998668670654,
      "learning_rate": 1.560390329384872e-05,
      "loss": 2.6767,
      "step": 206645
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.7095417976379395,
      "learning_rate": 1.5602601257215806e-05,
      "loss": 3.1138,
      "step": 206646
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1079094409942627,
      "learning_rate": 1.5601299273457702e-05,
      "loss": 2.9304,
      "step": 206647
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8550071716308594,
      "learning_rate": 1.5599997342574698e-05,
      "loss": 2.9053,
      "step": 206648
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.5480775833129883,
      "learning_rate": 1.5598695464567e-05,
      "loss": 2.993,
      "step": 206649
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.269329786300659,
      "learning_rate": 1.5597393639434908e-05,
      "loss": 2.7634,
      "step": 206650
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8973922729492188,
      "learning_rate": 1.559609186717865e-05,
      "loss": 2.8551,
      "step": 206651
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2508082389831543,
      "learning_rate": 1.559479014779843e-05,
      "loss": 2.6989,
      "step": 206652
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.547166585922241,
      "learning_rate": 1.559348848129448e-05,
      "loss": 3.1682,
      "step": 206653
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1662282943725586,
      "learning_rate": 1.55921868676671e-05,
      "loss": 2.6975,
      "step": 206654
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.7251620292663574,
      "learning_rate": 1.5590885306916455e-05,
      "loss": 2.62,
      "step": 206655
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6219534873962402,
      "learning_rate": 1.558958379904288e-05,
      "loss": 3.1274,
      "step": 206656
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6595590114593506,
      "learning_rate": 1.5588282344046543e-05,
      "loss": 2.9539,
      "step": 206657
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0300028324127197,
      "learning_rate": 1.5586980941927705e-05,
      "loss": 2.9695,
      "step": 206658
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.284022092819214,
      "learning_rate": 1.558567959268664e-05,
      "loss": 2.8391,
      "step": 206659
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.139038324356079,
      "learning_rate": 1.5584378296323573e-05,
      "loss": 2.8657,
      "step": 206660
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0421993732452393,
      "learning_rate": 1.5583077052838712e-05,
      "loss": 2.9309,
      "step": 206661
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.882369041442871,
      "learning_rate": 1.558177586223235e-05,
      "loss": 2.9656,
      "step": 206662
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9628658294677734,
      "learning_rate": 1.5580474724504722e-05,
      "loss": 2.7677,
      "step": 206663
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8139138221740723,
      "learning_rate": 1.5579173639655996e-05,
      "loss": 2.8013,
      "step": 206664
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.077960729598999,
      "learning_rate": 1.5577872607686504e-05,
      "loss": 2.9123,
      "step": 206665
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.204045534133911,
      "learning_rate": 1.557657162859648e-05,
      "loss": 2.8456,
      "step": 206666
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.890585422515869,
      "learning_rate": 1.5575270702386057e-05,
      "loss": 2.7981,
      "step": 206667
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.343661069869995,
      "learning_rate": 1.5573969829055634e-05,
      "loss": 2.8373,
      "step": 206668
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.197231769561768,
      "learning_rate": 1.5572669008605376e-05,
      "loss": 3.0056,
      "step": 206669
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9011688232421875,
      "learning_rate": 1.5571368241035487e-05,
      "loss": 2.8189,
      "step": 206670
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6891825199127197,
      "learning_rate": 1.5570067526346295e-05,
      "loss": 2.7553,
      "step": 206671
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.6837198734283447,
      "learning_rate": 1.556876686453794e-05,
      "loss": 2.7935,
      "step": 206672
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6313626766204834,
      "learning_rate": 1.556746625561075e-05,
      "loss": 2.9478,
      "step": 206673
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.12904953956604,
      "learning_rate": 1.556616569956496e-05,
      "loss": 2.7551,
      "step": 206674
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.819365978240967,
      "learning_rate": 1.55648651964008e-05,
      "loss": 2.8959,
      "step": 206675
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3062469959259033,
      "learning_rate": 1.556356474611844e-05,
      "loss": 2.9655,
      "step": 206676
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0557796955108643,
      "learning_rate": 1.5562264348718213e-05,
      "loss": 3.0226,
      "step": 206677
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.619448661804199,
      "learning_rate": 1.5560964004200315e-05,
      "loss": 2.8943,
      "step": 206678
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.9662985801696777,
      "learning_rate": 1.555966371256502e-05,
      "loss": 2.9859,
      "step": 206679
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7041921615600586,
      "learning_rate": 1.555836347381255e-05,
      "loss": 3.1067,
      "step": 206680
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8745031356811523,
      "learning_rate": 1.5557063287943185e-05,
      "loss": 2.7598,
      "step": 206681
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.415538787841797,
      "learning_rate": 1.5555763154957078e-05,
      "loss": 2.726,
      "step": 206682
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4147164821624756,
      "learning_rate": 1.5554463074854572e-05,
      "loss": 3.1914,
      "step": 206683
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.6818764209747314,
      "learning_rate": 1.5553163047635796e-05,
      "loss": 3.1162,
      "step": 206684
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7863974571228027,
      "learning_rate": 1.5551863073301084e-05,
      "loss": 2.829,
      "step": 206685
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.589449882507324,
      "learning_rate": 1.5550563151850638e-05,
      "loss": 2.9471,
      "step": 206686
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8749098777770996,
      "learning_rate": 1.5549263283284785e-05,
      "loss": 2.7506,
      "step": 206687
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.133273601531982,
      "learning_rate": 1.554796346760363e-05,
      "loss": 2.5968,
      "step": 206688
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.240656614303589,
      "learning_rate": 1.5546663704807506e-05,
      "loss": 2.9504,
      "step": 206689
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.104037284851074,
      "learning_rate": 1.5545363994896576e-05,
      "loss": 2.8477,
      "step": 206690
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.099461078643799,
      "learning_rate": 1.5544064337871177e-05,
      "loss": 2.9362,
      "step": 206691
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.169532537460327,
      "learning_rate": 1.5542764733731473e-05,
      "loss": 3.1047,
      "step": 206692
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.876668930053711,
      "learning_rate": 1.5541465182477796e-05,
      "loss": 3.0104,
      "step": 206693
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4341845512390137,
      "learning_rate": 1.5540165684110285e-05,
      "loss": 2.9057,
      "step": 206694
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7072794437408447,
      "learning_rate": 1.5538866238629234e-05,
      "loss": 3.1556,
      "step": 206695
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.800140380859375,
      "learning_rate": 1.5537566846034877e-05,
      "loss": 3.2342,
      "step": 206696
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.765333890914917,
      "learning_rate": 1.5536267506327448e-05,
      "loss": 2.8626,
      "step": 206697
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.82838773727417,
      "learning_rate": 1.553496821950718e-05,
      "loss": 2.9246,
      "step": 206698
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2152650356292725,
      "learning_rate": 1.553366898557441e-05,
      "loss": 2.8627,
      "step": 206699
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.707826852798462,
      "learning_rate": 1.5532369804529233e-05,
      "loss": 2.8403,
      "step": 206700
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2590725421905518,
      "learning_rate": 1.5531070676371982e-05,
      "loss": 2.8041,
      "step": 206701
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6940131187438965,
      "learning_rate": 1.552977160110286e-05,
      "loss": 2.9159,
      "step": 206702
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.363790988922119,
      "learning_rate": 1.5528472578722128e-05,
      "loss": 3.0608,
      "step": 206703
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.616159677505493,
      "learning_rate": 1.552717360922999e-05,
      "loss": 2.9369,
      "step": 206704
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9165077209472656,
      "learning_rate": 1.5525874692626815e-05,
      "loss": 3.0405,
      "step": 206705
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.184690237045288,
      "learning_rate": 1.5524575828912665e-05,
      "loss": 2.6883,
      "step": 206706
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7610297203063965,
      "learning_rate": 1.552327701808791e-05,
      "loss": 3.1974,
      "step": 206707
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0835344791412354,
      "learning_rate": 1.5521978260152713e-05,
      "loss": 3.0198,
      "step": 206708
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.6115565299987793,
      "learning_rate": 1.5520679555107373e-05,
      "loss": 3.0419,
      "step": 206709
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.570336103439331,
      "learning_rate": 1.551938090295206e-05,
      "loss": 3.0973,
      "step": 206710
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3958821296691895,
      "learning_rate": 1.5518082303687174e-05,
      "loss": 3.0548,
      "step": 206711
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.190974473953247,
      "learning_rate": 1.5516783757312745e-05,
      "loss": 3.0207,
      "step": 206712
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.5904717445373535,
      "learning_rate": 1.5515485263829174e-05,
      "loss": 3.1326,
      "step": 206713
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4107720851898193,
      "learning_rate": 1.5514186823236563e-05,
      "loss": 2.8162,
      "step": 206714
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.876063108444214,
      "learning_rate": 1.5512888435535308e-05,
      "loss": 2.7769,
      "step": 206715
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.5601563453674316,
      "learning_rate": 1.5511590100725545e-05,
      "loss": 3.075,
      "step": 206716
    },
    {
      "epoch": 2.69,
      "grad_norm": 5.237724304199219,
      "learning_rate": 1.5510291818807575e-05,
      "loss": 2.7605,
      "step": 206717
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.5128214359283447,
      "learning_rate": 1.5508993589781625e-05,
      "loss": 3.108,
      "step": 206718
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0666379928588867,
      "learning_rate": 1.5507695413647903e-05,
      "loss": 3.0855,
      "step": 206719
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.468519687652588,
      "learning_rate": 1.5506397290406635e-05,
      "loss": 3.0053,
      "step": 206720
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.201946496963501,
      "learning_rate": 1.550509922005816e-05,
      "loss": 2.8335,
      "step": 206721
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0639214515686035,
      "learning_rate": 1.550380120260257e-05,
      "loss": 3.2073,
      "step": 206722
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2376670837402344,
      "learning_rate": 1.5502503238040274e-05,
      "loss": 2.7927,
      "step": 206723
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4158387184143066,
      "learning_rate": 1.5501205326371434e-05,
      "loss": 2.9027,
      "step": 206724
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.9312355518341064,
      "learning_rate": 1.5499907467596285e-05,
      "loss": 2.8338,
      "step": 206725
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.036721706390381,
      "learning_rate": 1.5498609661715022e-05,
      "loss": 2.9271,
      "step": 206726
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9726991653442383,
      "learning_rate": 1.5497311908727983e-05,
      "loss": 2.941,
      "step": 206727
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.662160634994507,
      "learning_rate": 1.5496014208635298e-05,
      "loss": 2.7741,
      "step": 206728
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6462483406066895,
      "learning_rate": 1.549471656143737e-05,
      "loss": 2.6938,
      "step": 206729
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.06477689743042,
      "learning_rate": 1.5493418967134297e-05,
      "loss": 2.7009,
      "step": 206730
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8858163356781006,
      "learning_rate": 1.5492121425726377e-05,
      "loss": 3.0317,
      "step": 206731
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0679099559783936,
      "learning_rate": 1.5490823937213815e-05,
      "loss": 2.7596,
      "step": 206732
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.65531849861145,
      "learning_rate": 1.548952650159694e-05,
      "loss": 2.753,
      "step": 206733
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4568426609039307,
      "learning_rate": 1.548822911887585e-05,
      "loss": 2.9101,
      "step": 206734
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.008267879486084,
      "learning_rate": 1.5486931789050917e-05,
      "loss": 2.9956,
      "step": 206735
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.946331262588501,
      "learning_rate": 1.548563451212237e-05,
      "loss": 2.8285,
      "step": 206736
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.985461711883545,
      "learning_rate": 1.5484337288090377e-05,
      "loss": 3.0664,
      "step": 206737
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.782166004180908,
      "learning_rate": 1.548304011695517e-05,
      "loss": 2.8501,
      "step": 206738
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.9321439266204834,
      "learning_rate": 1.5481742998717117e-05,
      "loss": 2.798,
      "step": 206739
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7456142902374268,
      "learning_rate": 1.5480445933376286e-05,
      "loss": 3.0677,
      "step": 206740
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.120396137237549,
      "learning_rate": 1.5479148920933103e-05,
      "loss": 2.8526,
      "step": 206741
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.74519944190979,
      "learning_rate": 1.5477851961387676e-05,
      "loss": 2.6844,
      "step": 206742
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7588682174682617,
      "learning_rate": 1.547655505474027e-05,
      "loss": 2.7639,
      "step": 206743
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2884740829467773,
      "learning_rate": 1.547525820099118e-05,
      "loss": 3.0641,
      "step": 206744
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9013895988464355,
      "learning_rate": 1.5473961400140612e-05,
      "loss": 3.0659,
      "step": 206745
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2299630641937256,
      "learning_rate": 1.547266465218876e-05,
      "loss": 2.8454,
      "step": 206746
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0103378295898438,
      "learning_rate": 1.547136795713596e-05,
      "loss": 3.1568,
      "step": 206747
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8534109592437744,
      "learning_rate": 1.5470071314982378e-05,
      "loss": 2.9398,
      "step": 206748
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9045839309692383,
      "learning_rate": 1.5468774725728284e-05,
      "loss": 2.7723,
      "step": 206749
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.85721492767334,
      "learning_rate": 1.5467478189373904e-05,
      "loss": 3.0442,
      "step": 206750
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.250293493270874,
      "learning_rate": 1.5466181705919544e-05,
      "loss": 3.0455,
      "step": 206751
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9481866359710693,
      "learning_rate": 1.5464885275365302e-05,
      "loss": 3.1579,
      "step": 206752
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.984339952468872,
      "learning_rate": 1.5463588897711575e-05,
      "loss": 2.9286,
      "step": 206753
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.8629562854766846,
      "learning_rate": 1.5462292572958535e-05,
      "loss": 2.8993,
      "step": 206754
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.5469167232513428,
      "learning_rate": 1.5460996301106378e-05,
      "loss": 2.9762,
      "step": 206755
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3655946254730225,
      "learning_rate": 1.5459700082155467e-05,
      "loss": 2.8778,
      "step": 206756
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9854090213775635,
      "learning_rate": 1.5458403916105878e-05,
      "loss": 2.9383,
      "step": 206757
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.215416431427002,
      "learning_rate": 1.5457107802958002e-05,
      "loss": 3.0557,
      "step": 206758
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6199848651885986,
      "learning_rate": 1.5455811742712043e-05,
      "loss": 3.0169,
      "step": 206759
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7036521434783936,
      "learning_rate": 1.54545157353682e-05,
      "loss": 2.9162,
      "step": 206760
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7120821475982666,
      "learning_rate": 1.5453219780926706e-05,
      "loss": 3.1243,
      "step": 206761
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.687680721282959,
      "learning_rate": 1.545192387938786e-05,
      "loss": 2.7867,
      "step": 206762
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0332751274108887,
      "learning_rate": 1.5450628030751833e-05,
      "loss": 2.824,
      "step": 206763
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0923805236816406,
      "learning_rate": 1.5449332235018953e-05,
      "loss": 2.8591,
      "step": 206764
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.804556131362915,
      "learning_rate": 1.5448036492189386e-05,
      "loss": 3.0396,
      "step": 206765
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.772364377975464,
      "learning_rate": 1.5446740802263436e-05,
      "loss": 2.8427,
      "step": 206766
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.405102491378784,
      "learning_rate": 1.5445445165241233e-05,
      "loss": 3.0272,
      "step": 206767
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7970974445343018,
      "learning_rate": 1.5444149581123177e-05,
      "loss": 3.0826,
      "step": 206768
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.10148286819458,
      "learning_rate": 1.5442854049909338e-05,
      "loss": 2.8959,
      "step": 206769
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.132046937942505,
      "learning_rate": 1.544155857160011e-05,
      "loss": 2.8803,
      "step": 206770
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3804643154144287,
      "learning_rate": 1.544026314619563e-05,
      "loss": 2.9603,
      "step": 206771
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.5969772338867188,
      "learning_rate": 1.5438967773696264e-05,
      "loss": 2.8812,
      "step": 206772
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.98342227935791,
      "learning_rate": 1.5437672454102047e-05,
      "loss": 2.8355,
      "step": 206773
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9261722564697266,
      "learning_rate": 1.5436377187413406e-05,
      "loss": 3.1359,
      "step": 206774
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.755066156387329,
      "learning_rate": 1.543508197363048e-05,
      "loss": 2.877,
      "step": 206775
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2473018169403076,
      "learning_rate": 1.5433786812753567e-05,
      "loss": 2.9173,
      "step": 206776
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.766679048538208,
      "learning_rate": 1.5432491704782868e-05,
      "loss": 3.3369,
      "step": 206777
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6394360065460205,
      "learning_rate": 1.543119664971868e-05,
      "loss": 2.9837,
      "step": 206778
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9673917293548584,
      "learning_rate": 1.5429901647561173e-05,
      "loss": 2.8458,
      "step": 206779
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.566124677658081,
      "learning_rate": 1.542860669831064e-05,
      "loss": 3.2247,
      "step": 206780
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.333517551422119,
      "learning_rate": 1.5427311801967258e-05,
      "loss": 2.7432,
      "step": 206781
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2191827297210693,
      "learning_rate": 1.5426016958531318e-05,
      "loss": 2.9452,
      "step": 206782
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6779181957244873,
      "learning_rate": 1.5424722168003058e-05,
      "loss": 2.8153,
      "step": 206783
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7365877628326416,
      "learning_rate": 1.542342743038274e-05,
      "loss": 2.7943,
      "step": 206784
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.897523880004883,
      "learning_rate": 1.5422132745670568e-05,
      "loss": 2.8938,
      "step": 206785
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7795138359069824,
      "learning_rate": 1.5420838113866806e-05,
      "loss": 2.8771,
      "step": 206786
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.910214424133301,
      "learning_rate": 1.5419543534971655e-05,
      "loss": 2.8998,
      "step": 206787
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3592398166656494,
      "learning_rate": 1.541824900898542e-05,
      "loss": 2.6947,
      "step": 206788
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.651634931564331,
      "learning_rate": 1.541695453590822e-05,
      "loss": 3.1154,
      "step": 206789
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.922510862350464,
      "learning_rate": 1.541566011574047e-05,
      "loss": 2.802,
      "step": 206790
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3628270626068115,
      "learning_rate": 1.5414365748482294e-05,
      "loss": 2.8669,
      "step": 206791
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.164057731628418,
      "learning_rate": 1.541307143413396e-05,
      "loss": 2.8817,
      "step": 206792
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6424407958984375,
      "learning_rate": 1.5411777172695673e-05,
      "loss": 2.8526,
      "step": 206793
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8270721435546875,
      "learning_rate": 1.541048296416776e-05,
      "loss": 2.7815,
      "step": 206794
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.090730667114258,
      "learning_rate": 1.5409188808550355e-05,
      "loss": 2.9304,
      "step": 206795
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.014397621154785,
      "learning_rate": 1.5407894705843826e-05,
      "loss": 2.7179,
      "step": 206796
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.678611993789673,
      "learning_rate": 1.5406600656048308e-05,
      "loss": 3.0103,
      "step": 206797
    },
    {
      "epoch": 2.69,
      "grad_norm": 6.536431789398193,
      "learning_rate": 1.5405306659164095e-05,
      "loss": 2.6743,
      "step": 206798
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4155852794647217,
      "learning_rate": 1.5404012715191327e-05,
      "loss": 2.7267,
      "step": 206799
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8356106281280518,
      "learning_rate": 1.54027188241304e-05,
      "loss": 2.6259,
      "step": 206800
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0279505252838135,
      "learning_rate": 1.540142498598145e-05,
      "loss": 2.7749,
      "step": 206801
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.400691032409668,
      "learning_rate": 1.540013120074477e-05,
      "loss": 3.003,
      "step": 206802
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0133309364318848,
      "learning_rate": 1.5398837468420567e-05,
      "loss": 2.8169,
      "step": 206803
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.692385673522949,
      "learning_rate": 1.5397543789009104e-05,
      "loss": 2.8145,
      "step": 206804
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.7700324058532715,
      "learning_rate": 1.5396250162510583e-05,
      "loss": 2.8724,
      "step": 206805
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6820037364959717,
      "learning_rate": 1.5394956588925302e-05,
      "loss": 2.9044,
      "step": 206806
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6893324851989746,
      "learning_rate": 1.5393663068253427e-05,
      "loss": 3.0788,
      "step": 206807
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.125010967254639,
      "learning_rate": 1.5392369600495257e-05,
      "loss": 2.9086,
      "step": 206808
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1288068294525146,
      "learning_rate": 1.5391076185651062e-05,
      "loss": 2.9488,
      "step": 206809
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8656883239746094,
      "learning_rate": 1.538978282372101e-05,
      "loss": 2.7894,
      "step": 206810
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8065154552459717,
      "learning_rate": 1.538848951470536e-05,
      "loss": 2.8089,
      "step": 206811
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8281610012054443,
      "learning_rate": 1.5387196258604383e-05,
      "loss": 2.8866,
      "step": 206812
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.8971657752990723,
      "learning_rate": 1.5385903055418248e-05,
      "loss": 3.1332,
      "step": 206813
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.5944015979766846,
      "learning_rate": 1.538460990514728e-05,
      "loss": 2.7502,
      "step": 206814
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8178484439849854,
      "learning_rate": 1.5383316807791723e-05,
      "loss": 2.8832,
      "step": 206815
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9651331901550293,
      "learning_rate": 1.5382023763351736e-05,
      "loss": 2.8857,
      "step": 206816
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.286790609359741,
      "learning_rate": 1.5380730771827588e-05,
      "loss": 3.0837,
      "step": 206817
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.892569065093994,
      "learning_rate": 1.5379437833219576e-05,
      "loss": 2.8385,
      "step": 206818
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.373255729675293,
      "learning_rate": 1.537814494752787e-05,
      "loss": 3.2445,
      "step": 206819
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8204615116119385,
      "learning_rate": 1.5376852114752768e-05,
      "loss": 2.7767,
      "step": 206820
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.624631404876709,
      "learning_rate": 1.5375559334894473e-05,
      "loss": 3.1664,
      "step": 206821
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.728513240814209,
      "learning_rate": 1.5374266607953246e-05,
      "loss": 2.7683,
      "step": 206822
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1205992698669434,
      "learning_rate": 1.5372973933929257e-05,
      "loss": 2.7374,
      "step": 206823
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.509354591369629,
      "learning_rate": 1.537168131282287e-05,
      "loss": 2.8492,
      "step": 206824
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1852333545684814,
      "learning_rate": 1.5370388744634187e-05,
      "loss": 2.8669,
      "step": 206825
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7685818672180176,
      "learning_rate": 1.5369096229363607e-05,
      "loss": 2.7881,
      "step": 206826
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.956186056137085,
      "learning_rate": 1.5367803767011265e-05,
      "loss": 2.6758,
      "step": 206827
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.731778383255005,
      "learning_rate": 1.5366511357577394e-05,
      "loss": 2.8566,
      "step": 206828
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.5174739360809326,
      "learning_rate": 1.5365219001062256e-05,
      "loss": 2.7375,
      "step": 206829
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.204287052154541,
      "learning_rate": 1.536392669746612e-05,
      "loss": 2.9,
      "step": 206830
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.86787486076355,
      "learning_rate": 1.536263444678916e-05,
      "loss": 3.0243,
      "step": 206831
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.889495849609375,
      "learning_rate": 1.5361342249031728e-05,
      "loss": 3.1721,
      "step": 206832
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.801830291748047,
      "learning_rate": 1.5360050104193965e-05,
      "loss": 2.895,
      "step": 206833
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.668809175491333,
      "learning_rate": 1.5358758012276107e-05,
      "loss": 2.9796,
      "step": 206834
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.842564821243286,
      "learning_rate": 1.5357465973278482e-05,
      "loss": 2.9528,
      "step": 206835
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.761399269104004,
      "learning_rate": 1.535617398720126e-05,
      "loss": 3.0591,
      "step": 206836
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.53444242477417,
      "learning_rate": 1.535488205404467e-05,
      "loss": 2.8033,
      "step": 206837
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.987848997116089,
      "learning_rate": 1.5353590173809017e-05,
      "loss": 2.9116,
      "step": 206838
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2082502841949463,
      "learning_rate": 1.5352298346494497e-05,
      "loss": 2.8665,
      "step": 206839
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8943700790405273,
      "learning_rate": 1.5351006572101342e-05,
      "loss": 2.8324,
      "step": 206840
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8538200855255127,
      "learning_rate": 1.5349714850629823e-05,
      "loss": 3.0508,
      "step": 206841
    },
    {
      "epoch": 2.69,
      "grad_norm": 7.8611249923706055,
      "learning_rate": 1.5348423182080137e-05,
      "loss": 2.7643,
      "step": 206842
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.458451509475708,
      "learning_rate": 1.5347131566452588e-05,
      "loss": 3.0411,
      "step": 206843
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0650534629821777,
      "learning_rate": 1.5345840003747366e-05,
      "loss": 2.906,
      "step": 206844
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.717337131500244,
      "learning_rate": 1.534454849396475e-05,
      "loss": 2.9814,
      "step": 206845
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.4984993934631348,
      "learning_rate": 1.5343257037104893e-05,
      "loss": 2.7983,
      "step": 206846
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1616432666778564,
      "learning_rate": 1.5341965633168174e-05,
      "loss": 2.7713,
      "step": 206847
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7863545417785645,
      "learning_rate": 1.5340674282154687e-05,
      "loss": 2.9764,
      "step": 206848
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.967749834060669,
      "learning_rate": 1.5339382984064797e-05,
      "loss": 3.1992,
      "step": 206849
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.983579635620117,
      "learning_rate": 1.533809173889867e-05,
      "loss": 2.7546,
      "step": 206850
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.034574508666992,
      "learning_rate": 1.5336800546656547e-05,
      "loss": 2.7317,
      "step": 206851
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8958046436309814,
      "learning_rate": 1.533550940733872e-05,
      "loss": 2.887,
      "step": 206852
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9701380729675293,
      "learning_rate": 1.533421832094539e-05,
      "loss": 3.1658,
      "step": 206853
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.893557071685791,
      "learning_rate": 1.533292728747676e-05,
      "loss": 2.9631,
      "step": 206854
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.824155569076538,
      "learning_rate": 1.533163630693316e-05,
      "loss": 2.9414,
      "step": 206855
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0928547382354736,
      "learning_rate": 1.5330345379314724e-05,
      "loss": 2.839,
      "step": 206856
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0538036823272705,
      "learning_rate": 1.532905450462182e-05,
      "loss": 3.05,
      "step": 206857
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.171204090118408,
      "learning_rate": 1.532776368285458e-05,
      "loss": 2.9451,
      "step": 206858
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2482855319976807,
      "learning_rate": 1.532647291401333e-05,
      "loss": 2.7272,
      "step": 206859
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.686635732650757,
      "learning_rate": 1.5325182198098185e-05,
      "loss": 2.9629,
      "step": 206860
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4106428623199463,
      "learning_rate": 1.53238915351095e-05,
      "loss": 2.9739,
      "step": 206861
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.772444725036621,
      "learning_rate": 1.5322600925047478e-05,
      "loss": 3.1511,
      "step": 206862
    },
    {
      "epoch": 2.69,
      "grad_norm": 5.18928337097168,
      "learning_rate": 1.5321310367912355e-05,
      "loss": 2.935,
      "step": 206863
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0007307529449463,
      "learning_rate": 1.5320019863704393e-05,
      "loss": 2.7543,
      "step": 206864
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9808061122894287,
      "learning_rate": 1.5318729412423824e-05,
      "loss": 2.7439,
      "step": 206865
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.023231029510498,
      "learning_rate": 1.531743901407082e-05,
      "loss": 2.9712,
      "step": 206866
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0116777420043945,
      "learning_rate": 1.531614866864571e-05,
      "loss": 3.1305,
      "step": 206867
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8724522590637207,
      "learning_rate": 1.5314858376148697e-05,
      "loss": 2.8761,
      "step": 206868
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.140816688537598,
      "learning_rate": 1.5313568136580045e-05,
      "loss": 2.9856,
      "step": 206869
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.160635232925415,
      "learning_rate": 1.5312277949939955e-05,
      "loss": 2.8866,
      "step": 206870
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0869064331054688,
      "learning_rate": 1.5310987816228694e-05,
      "loss": 3.0541,
      "step": 206871
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.932814359664917,
      "learning_rate": 1.5309697735446492e-05,
      "loss": 2.8944,
      "step": 206872
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0670692920684814,
      "learning_rate": 1.5308407707593584e-05,
      "loss": 2.7693,
      "step": 206873
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7247555255889893,
      "learning_rate": 1.5307117732670205e-05,
      "loss": 2.6604,
      "step": 206874
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.190916061401367,
      "learning_rate": 1.530582781067662e-05,
      "loss": 2.8041,
      "step": 206875
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.302579402923584,
      "learning_rate": 1.5304537941613094e-05,
      "loss": 2.8713,
      "step": 206876
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2549986839294434,
      "learning_rate": 1.5303248125479794e-05,
      "loss": 2.9779,
      "step": 206877
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8550078868865967,
      "learning_rate": 1.530195836227699e-05,
      "loss": 2.8053,
      "step": 206878
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.260037422180176,
      "learning_rate": 1.5300668652004943e-05,
      "loss": 2.8255,
      "step": 206879
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.5896360874176025,
      "learning_rate": 1.5299378994663825e-05,
      "loss": 2.8842,
      "step": 206880
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7530927658081055,
      "learning_rate": 1.5298089390253966e-05,
      "loss": 3.1331,
      "step": 206881
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8743488788604736,
      "learning_rate": 1.52967998387756e-05,
      "loss": 3.0608,
      "step": 206882
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.020732879638672,
      "learning_rate": 1.529551034022889e-05,
      "loss": 2.9291,
      "step": 206883
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3401331901550293,
      "learning_rate": 1.5294220894614106e-05,
      "loss": 2.7801,
      "step": 206884
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.704969644546509,
      "learning_rate": 1.529293150193155e-05,
      "loss": 2.909,
      "step": 206885
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.535243034362793,
      "learning_rate": 1.529164216218135e-05,
      "loss": 3.0802,
      "step": 206886
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.007688999176025,
      "learning_rate": 1.5290352875363842e-05,
      "loss": 2.6501,
      "step": 206887
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9303903579711914,
      "learning_rate": 1.528906364147926e-05,
      "loss": 2.5785,
      "step": 206888
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.819638967514038,
      "learning_rate": 1.52877744605278e-05,
      "loss": 2.8266,
      "step": 206889
    },
    {
      "epoch": 2.69,
      "grad_norm": 5.108663082122803,
      "learning_rate": 1.5286485332509667e-05,
      "loss": 2.8105,
      "step": 206890
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.962184429168701,
      "learning_rate": 1.5285196257425225e-05,
      "loss": 2.8564,
      "step": 206891
    },
    {
      "epoch": 2.69,
      "grad_norm": 5.089627265930176,
      "learning_rate": 1.5283907235274572e-05,
      "loss": 2.8747,
      "step": 206892
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.8737430572509766,
      "learning_rate": 1.5282618266058044e-05,
      "loss": 2.9777,
      "step": 206893
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.208202600479126,
      "learning_rate": 1.5281329349775874e-05,
      "loss": 3.0615,
      "step": 206894
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.999375343322754,
      "learning_rate": 1.528004048642829e-05,
      "loss": 2.8869,
      "step": 206895
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7455620765686035,
      "learning_rate": 1.5278751676015467e-05,
      "loss": 2.8746,
      "step": 206896
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9969801902770996,
      "learning_rate": 1.527746291853773e-05,
      "loss": 2.9612,
      "step": 206897
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.751620054244995,
      "learning_rate": 1.527617421399525e-05,
      "loss": 2.9545,
      "step": 206898
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.471359729766846,
      "learning_rate": 1.527488556238833e-05,
      "loss": 3.2029,
      "step": 206899
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.776449203491211,
      "learning_rate": 1.5273596963717195e-05,
      "loss": 3.0228,
      "step": 206900
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7924013137817383,
      "learning_rate": 1.5272308417982084e-05,
      "loss": 2.7076,
      "step": 206901
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4120936393737793,
      "learning_rate": 1.527101992518319e-05,
      "loss": 3.1017,
      "step": 206902
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.165055990219116,
      "learning_rate": 1.5269731485320824e-05,
      "loss": 3.0446,
      "step": 206903
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.953688859939575,
      "learning_rate": 1.5268443098395144e-05,
      "loss": 2.9798,
      "step": 206904
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0866386890411377,
      "learning_rate": 1.526715476440645e-05,
      "loss": 2.9231,
      "step": 206905
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.258949041366577,
      "learning_rate": 1.5265866483355017e-05,
      "loss": 2.8285,
      "step": 206906
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.329959392547607,
      "learning_rate": 1.5264578255241e-05,
      "loss": 2.8272,
      "step": 206907
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9690804481506348,
      "learning_rate": 1.526329008006464e-05,
      "loss": 2.6374,
      "step": 206908
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1754536628723145,
      "learning_rate": 1.5262001957826232e-05,
      "loss": 2.9787,
      "step": 206909
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.7895967960357666,
      "learning_rate": 1.5260713888525978e-05,
      "loss": 2.9273,
      "step": 206910
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.140190362930298,
      "learning_rate": 1.5259425872164178e-05,
      "loss": 2.8357,
      "step": 206911
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.330901622772217,
      "learning_rate": 1.5258137908740997e-05,
      "loss": 2.8315,
      "step": 206912
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6570374965667725,
      "learning_rate": 1.5256849998256738e-05,
      "loss": 2.777,
      "step": 206913
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7965774536132812,
      "learning_rate": 1.525556214071153e-05,
      "loss": 2.9294,
      "step": 206914
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.525541067123413,
      "learning_rate": 1.5254274336105743e-05,
      "loss": 2.5782,
      "step": 206915
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7403926849365234,
      "learning_rate": 1.5252986584439508e-05,
      "loss": 3.1485,
      "step": 206916
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2044050693511963,
      "learning_rate": 1.5251698885713193e-05,
      "loss": 2.832,
      "step": 206917
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.04695725440979,
      "learning_rate": 1.5250411239926896e-05,
      "loss": 2.9193,
      "step": 206918
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.567096471786499,
      "learning_rate": 1.5249123647080951e-05,
      "loss": 2.9187,
      "step": 206919
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.182866096496582,
      "learning_rate": 1.5247836107175592e-05,
      "loss": 2.8183,
      "step": 206920
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.752305746078491,
      "learning_rate": 1.5246548620211053e-05,
      "loss": 2.8207,
      "step": 206921
    },
    {
      "epoch": 2.69,
      "grad_norm": 5.099448204040527,
      "learning_rate": 1.5245261186187464e-05,
      "loss": 3.0883,
      "step": 206922
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.6263904571533203,
      "learning_rate": 1.5243973805105226e-05,
      "loss": 2.7863,
      "step": 206923
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8069570064544678,
      "learning_rate": 1.5242686476964472e-05,
      "loss": 3.1783,
      "step": 206924
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7772722244262695,
      "learning_rate": 1.5241399201765503e-05,
      "loss": 2.6296,
      "step": 206925
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.5248327255249023,
      "learning_rate": 1.5240111979508551e-05,
      "loss": 2.8111,
      "step": 206926
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.6863033771514893,
      "learning_rate": 1.5238824810193817e-05,
      "loss": 2.7423,
      "step": 206927
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.88175630569458,
      "learning_rate": 1.5237537693821533e-05,
      "loss": 2.9071,
      "step": 206928
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.379101276397705,
      "learning_rate": 1.5236250630392033e-05,
      "loss": 3.0736,
      "step": 206929
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.3894057273864746,
      "learning_rate": 1.5234963619905416e-05,
      "loss": 3.1281,
      "step": 206930
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8653695583343506,
      "learning_rate": 1.5233676662362015e-05,
      "loss": 3.0655,
      "step": 206931
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.633495569229126,
      "learning_rate": 1.5232389757762098e-05,
      "loss": 2.8478,
      "step": 206932
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.099481105804443,
      "learning_rate": 1.5231102906105797e-05,
      "loss": 3.0325,
      "step": 206933
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.535003423690796,
      "learning_rate": 1.5229816107393444e-05,
      "loss": 3.1259,
      "step": 206934
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8856732845306396,
      "learning_rate": 1.522852936162524e-05,
      "loss": 3.0593,
      "step": 206935
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8886420726776123,
      "learning_rate": 1.5227242668801388e-05,
      "loss": 2.8942,
      "step": 206936
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6562743186950684,
      "learning_rate": 1.5225956028922215e-05,
      "loss": 2.6995,
      "step": 206937
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.5548946857452393,
      "learning_rate": 1.522466944198789e-05,
      "loss": 2.9366,
      "step": 206938
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.38633394241333,
      "learning_rate": 1.522338290799865e-05,
      "loss": 2.9645,
      "step": 206939
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8821427822113037,
      "learning_rate": 1.5222096426954821e-05,
      "loss": 2.7142,
      "step": 206940
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7701423168182373,
      "learning_rate": 1.522080999885651e-05,
      "loss": 2.9804,
      "step": 206941
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.778217315673828,
      "learning_rate": 1.5219523623704078e-05,
      "loss": 2.8674,
      "step": 206942
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.262819290161133,
      "learning_rate": 1.5218237301497694e-05,
      "loss": 2.8274,
      "step": 206943
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9040191173553467,
      "learning_rate": 1.5216951032237657e-05,
      "loss": 3.1842,
      "step": 206944
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7610056400299072,
      "learning_rate": 1.5215664815924101e-05,
      "loss": 2.9422,
      "step": 206945
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1874747276306152,
      "learning_rate": 1.5214378652557357e-05,
      "loss": 2.8869,
      "step": 206946
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.247725963592529,
      "learning_rate": 1.5213092542137595e-05,
      "loss": 2.8397,
      "step": 206947
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9101579189300537,
      "learning_rate": 1.5211806484665146e-05,
      "loss": 2.8329,
      "step": 206948
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.6893153190612793,
      "learning_rate": 1.5210520480140209e-05,
      "loss": 3.0818,
      "step": 206949
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0740814208984375,
      "learning_rate": 1.5209234528562985e-05,
      "loss": 3.0472,
      "step": 206950
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.90124249458313,
      "learning_rate": 1.5207948629933709e-05,
      "loss": 2.8031,
      "step": 206951
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.2000577449798584,
      "learning_rate": 1.520666278425271e-05,
      "loss": 3.0165,
      "step": 206952
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.066749334335327,
      "learning_rate": 1.5205376991520123e-05,
      "loss": 2.8556,
      "step": 206953
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.6878819465637207,
      "learning_rate": 1.520409125173625e-05,
      "loss": 2.5868,
      "step": 206954
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.943671703338623,
      "learning_rate": 1.520280556490132e-05,
      "loss": 2.9966,
      "step": 206955
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4452691078186035,
      "learning_rate": 1.5201519931015604e-05,
      "loss": 2.7168,
      "step": 206956
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.969041109085083,
      "learning_rate": 1.5200234350079198e-05,
      "loss": 3.1543,
      "step": 206957
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.9278512001037598,
      "learning_rate": 1.5198948822092539e-05,
      "loss": 2.9397,
      "step": 206958
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.075637102127075,
      "learning_rate": 1.519766334705569e-05,
      "loss": 2.9032,
      "step": 206959
    },
    {
      "epoch": 2.69,
      "grad_norm": 4.2599101066589355,
      "learning_rate": 1.519637792496905e-05,
      "loss": 2.7889,
      "step": 206960
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9397084712982178,
      "learning_rate": 1.5195092555832755e-05,
      "loss": 3.0644,
      "step": 206961
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.8360302448272705,
      "learning_rate": 1.5193807239647072e-05,
      "loss": 2.8381,
      "step": 206962
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0115013122558594,
      "learning_rate": 1.5192521976412198e-05,
      "loss": 2.8611,
      "step": 206963
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0974271297454834,
      "learning_rate": 1.5191236766128434e-05,
      "loss": 2.8782,
      "step": 206964
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.620727062225342,
      "learning_rate": 1.518995160879598e-05,
      "loss": 2.8729,
      "step": 206965
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.4923713207244873,
      "learning_rate": 1.5188666504415104e-05,
      "loss": 2.9075,
      "step": 206966
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.5254967212677,
      "learning_rate": 1.5187381452986069e-05,
      "loss": 2.596,
      "step": 206967
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.608006477355957,
      "learning_rate": 1.5186096454509044e-05,
      "loss": 3.0448,
      "step": 206968
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.7610902786254883,
      "learning_rate": 1.5184811508984262e-05,
      "loss": 2.8038,
      "step": 206969
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.119685411453247,
      "learning_rate": 1.5183526616412057e-05,
      "loss": 3.02,
      "step": 206970
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.9199976921081543,
      "learning_rate": 1.518224177679256e-05,
      "loss": 3.0203,
      "step": 206971
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.129558801651001,
      "learning_rate": 1.5180956990126103e-05,
      "loss": 2.9382,
      "step": 206972
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.685248374938965,
      "learning_rate": 1.5179672256412889e-05,
      "loss": 2.9999,
      "step": 206973
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.649428606033325,
      "learning_rate": 1.517838757565315e-05,
      "loss": 2.9787,
      "step": 206974
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.89805006980896,
      "learning_rate": 1.5177102947847086e-05,
      "loss": 2.9671,
      "step": 206975
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.0085031986236572,
      "learning_rate": 1.5175818372994998e-05,
      "loss": 2.8221,
      "step": 206976
    },
    {
      "epoch": 2.69,
      "grad_norm": 2.992448091506958,
      "learning_rate": 1.5174533851097081e-05,
      "loss": 3.1201,
      "step": 206977
    },
    {
      "epoch": 2.69,
      "grad_norm": 3.1394827365875244,
      "learning_rate": 1.5173249382153606e-05,
      "loss": 2.8101,
      "step": 206978
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.4704983234405518,
      "learning_rate": 1.517196496616484e-05,
      "loss": 2.7888,
      "step": 206979
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2146739959716797,
      "learning_rate": 1.5170680603130947e-05,
      "loss": 3.0046,
      "step": 206980
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.738213300704956,
      "learning_rate": 1.5169396293052193e-05,
      "loss": 2.9272,
      "step": 206981
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7503137588500977,
      "learning_rate": 1.5168112035928848e-05,
      "loss": 2.7632,
      "step": 206982
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3556690216064453,
      "learning_rate": 1.5166827831761074e-05,
      "loss": 2.6712,
      "step": 206983
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2671620845794678,
      "learning_rate": 1.5165543680549208e-05,
      "loss": 3.1355,
      "step": 206984
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9415390491485596,
      "learning_rate": 1.5164259582293414e-05,
      "loss": 2.9711,
      "step": 206985
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.497551441192627,
      "learning_rate": 1.5162975536994059e-05,
      "loss": 2.841,
      "step": 206986
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.5286943912506104,
      "learning_rate": 1.5161691544651178e-05,
      "loss": 2.892,
      "step": 206987
    },
    {
      "epoch": 2.7,
      "grad_norm": 5.04923152923584,
      "learning_rate": 1.5160407605265167e-05,
      "loss": 2.9359,
      "step": 206988
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.959095001220703,
      "learning_rate": 1.5159123718836164e-05,
      "loss": 2.8971,
      "step": 206989
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2904202938079834,
      "learning_rate": 1.5157839885364497e-05,
      "loss": 2.6614,
      "step": 206990
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.5999577045440674,
      "learning_rate": 1.5156556104850337e-05,
      "loss": 3.006,
      "step": 206991
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0346999168395996,
      "learning_rate": 1.5155272377294015e-05,
      "loss": 3.0608,
      "step": 206992
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.186345100402832,
      "learning_rate": 1.515398870269563e-05,
      "loss": 3.0461,
      "step": 206993
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3683578968048096,
      "learning_rate": 1.5152705081055517e-05,
      "loss": 2.8459,
      "step": 206994
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3336613178253174,
      "learning_rate": 1.5151421512373873e-05,
      "loss": 2.891,
      "step": 206995
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3207271099090576,
      "learning_rate": 1.5150137996651001e-05,
      "loss": 2.8692,
      "step": 206996
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.609661817550659,
      "learning_rate": 1.5148854533887066e-05,
      "loss": 2.9208,
      "step": 206997
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.990373134613037,
      "learning_rate": 1.5147571124082403e-05,
      "loss": 2.7436,
      "step": 206998
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.554884672164917,
      "learning_rate": 1.5146287767237076e-05,
      "loss": 2.6194,
      "step": 206999
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.234867811203003,
      "learning_rate": 1.5145004463351517e-05,
      "loss": 2.7746,
      "step": 207000
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.838777542114258,
      "learning_rate": 1.5143721212425797e-05,
      "loss": 2.8268,
      "step": 207001
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.573204278945923,
      "learning_rate": 1.5142438014460313e-05,
      "loss": 2.9521,
      "step": 207002
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.956087827682495,
      "learning_rate": 1.5141154869455164e-05,
      "loss": 2.919,
      "step": 207003
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6319782733917236,
      "learning_rate": 1.5139871777410684e-05,
      "loss": 2.9731,
      "step": 207004
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.7434117794036865,
      "learning_rate": 1.5138588738327107e-05,
      "loss": 3.0577,
      "step": 207005
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6435446739196777,
      "learning_rate": 1.5137305752204631e-05,
      "loss": 2.8394,
      "step": 207006
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.943848133087158,
      "learning_rate": 1.5136022819043459e-05,
      "loss": 2.8915,
      "step": 207007
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0189831256866455,
      "learning_rate": 1.513473993884392e-05,
      "loss": 2.8452,
      "step": 207008
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.5796892642974854,
      "learning_rate": 1.5133457111606185e-05,
      "loss": 2.9078,
      "step": 207009
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0878007411956787,
      "learning_rate": 1.5132174337330549e-05,
      "loss": 2.9913,
      "step": 207010
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9830567836761475,
      "learning_rate": 1.5130891616017216e-05,
      "loss": 2.9539,
      "step": 207011
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.731980323791504,
      "learning_rate": 1.5129608947666416e-05,
      "loss": 3.0773,
      "step": 207012
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2673606872558594,
      "learning_rate": 1.5128326332278384e-05,
      "loss": 2.9569,
      "step": 207013
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.7997331619262695,
      "learning_rate": 1.5127043769853386e-05,
      "loss": 2.9631,
      "step": 207014
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.8962788581848145,
      "learning_rate": 1.5125761260391623e-05,
      "loss": 3.147,
      "step": 207015
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.6206915378570557,
      "learning_rate": 1.5124478803893426e-05,
      "loss": 2.7962,
      "step": 207016
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.650507926940918,
      "learning_rate": 1.5123196400358928e-05,
      "loss": 2.957,
      "step": 207017
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.012951374053955,
      "learning_rate": 1.5121914049788365e-05,
      "loss": 2.8304,
      "step": 207018
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.5517311096191406,
      "learning_rate": 1.5120631752182066e-05,
      "loss": 2.829,
      "step": 207019
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.114957094192505,
      "learning_rate": 1.5119349507540235e-05,
      "loss": 2.978,
      "step": 207020
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.145038604736328,
      "learning_rate": 1.5118067315863036e-05,
      "loss": 2.8805,
      "step": 207021
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.502117872238159,
      "learning_rate": 1.5116785177150803e-05,
      "loss": 3.1274,
      "step": 207022
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3221516609191895,
      "learning_rate": 1.5115503091403736e-05,
      "loss": 2.7948,
      "step": 207023
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.454249620437622,
      "learning_rate": 1.5114221058622033e-05,
      "loss": 2.9306,
      "step": 207024
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.782855749130249,
      "learning_rate": 1.511293907880603e-05,
      "loss": 2.9272,
      "step": 207025
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.986643075942993,
      "learning_rate": 1.5111657151955858e-05,
      "loss": 3.2193,
      "step": 207026
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6693925857543945,
      "learning_rate": 1.5110375278071852e-05,
      "loss": 3.0629,
      "step": 207027
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7373716831207275,
      "learning_rate": 1.5109093457154208e-05,
      "loss": 2.9918,
      "step": 207028
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0636234283447266,
      "learning_rate": 1.5107811689203165e-05,
      "loss": 2.9118,
      "step": 207029
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.4459660053253174,
      "learning_rate": 1.5106529974218918e-05,
      "loss": 2.9839,
      "step": 207030
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.6517226696014404,
      "learning_rate": 1.5105248312201767e-05,
      "loss": 2.9171,
      "step": 207031
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2783920764923096,
      "learning_rate": 1.5103966703151882e-05,
      "loss": 2.7288,
      "step": 207032
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.132514476776123,
      "learning_rate": 1.5102685147069593e-05,
      "loss": 2.8863,
      "step": 207033
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.131659507751465,
      "learning_rate": 1.5101403643955135e-05,
      "loss": 2.9195,
      "step": 207034
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8980209827423096,
      "learning_rate": 1.510012219380864e-05,
      "loss": 3.2302,
      "step": 207035
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2588846683502197,
      "learning_rate": 1.5098840796630407e-05,
      "loss": 2.8735,
      "step": 207036
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8317837715148926,
      "learning_rate": 1.5097559452420704e-05,
      "loss": 2.7682,
      "step": 207037
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1818418502807617,
      "learning_rate": 1.509627816117973e-05,
      "loss": 2.7345,
      "step": 207038
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8983664512634277,
      "learning_rate": 1.5094996922907754e-05,
      "loss": 3.0134,
      "step": 207039
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6157803535461426,
      "learning_rate": 1.5093715737605005e-05,
      "loss": 3.0896,
      "step": 207040
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.6258797645568848,
      "learning_rate": 1.5092434605271686e-05,
      "loss": 2.7007,
      "step": 207041
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.769486665725708,
      "learning_rate": 1.5091153525908061e-05,
      "loss": 2.9442,
      "step": 207042
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.6084821224212646,
      "learning_rate": 1.5089872499514366e-05,
      "loss": 2.9183,
      "step": 207043
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8124189376831055,
      "learning_rate": 1.5088591526090832e-05,
      "loss": 2.8966,
      "step": 207044
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.7810444831848145,
      "learning_rate": 1.5087310605637759e-05,
      "loss": 3.0726,
      "step": 207045
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.179429769515991,
      "learning_rate": 1.5086029738155314e-05,
      "loss": 2.7573,
      "step": 207046
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3407530784606934,
      "learning_rate": 1.5084748923643731e-05,
      "loss": 2.7999,
      "step": 207047
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0448551177978516,
      "learning_rate": 1.5083468162103274e-05,
      "loss": 2.9013,
      "step": 207048
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.087118148803711,
      "learning_rate": 1.5082187453534178e-05,
      "loss": 2.7787,
      "step": 207049
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.524833917617798,
      "learning_rate": 1.5080906797936676e-05,
      "loss": 2.6989,
      "step": 207050
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.321234703063965,
      "learning_rate": 1.5079626195311034e-05,
      "loss": 2.8609,
      "step": 207051
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9277777671813965,
      "learning_rate": 1.5078345645657419e-05,
      "loss": 3.1497,
      "step": 207052
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0651497840881348,
      "learning_rate": 1.507706514897623e-05,
      "loss": 2.7477,
      "step": 207053
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.136993646621704,
      "learning_rate": 1.5075784705267469e-05,
      "loss": 2.8352,
      "step": 207054
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3834760189056396,
      "learning_rate": 1.5074504314531565e-05,
      "loss": 3.1595,
      "step": 207055
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.562584400177002,
      "learning_rate": 1.5073223976768655e-05,
      "loss": 2.9332,
      "step": 207056
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8153951168060303,
      "learning_rate": 1.5071943691979038e-05,
      "loss": 3.0536,
      "step": 207057
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0142087936401367,
      "learning_rate": 1.507066346016288e-05,
      "loss": 2.8226,
      "step": 207058
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7459216117858887,
      "learning_rate": 1.5069383281320579e-05,
      "loss": 3.1882,
      "step": 207059
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.6431705951690674,
      "learning_rate": 1.5068103155452139e-05,
      "loss": 2.817,
      "step": 207060
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.223071813583374,
      "learning_rate": 1.5066823082557988e-05,
      "loss": 2.6346,
      "step": 207061
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.235238552093506,
      "learning_rate": 1.506554306263823e-05,
      "loss": 3.034,
      "step": 207062
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.674619674682617,
      "learning_rate": 1.5064263095693196e-05,
      "loss": 2.9367,
      "step": 207063
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.191042423248291,
      "learning_rate": 1.5062983181723087e-05,
      "loss": 2.5269,
      "step": 207064
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.6765241622924805,
      "learning_rate": 1.5061703320728202e-05,
      "loss": 2.7026,
      "step": 207065
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.849656105041504,
      "learning_rate": 1.5060423512708642e-05,
      "loss": 2.9927,
      "step": 207066
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0507662296295166,
      "learning_rate": 1.5059143757664804e-05,
      "loss": 2.7279,
      "step": 207067
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.138270139694214,
      "learning_rate": 1.5057864055596791e-05,
      "loss": 3.163,
      "step": 207068
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2193849086761475,
      "learning_rate": 1.5056584406504935e-05,
      "loss": 3.1525,
      "step": 207069
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.4228262901306152,
      "learning_rate": 1.5055304810389367e-05,
      "loss": 2.9949,
      "step": 207070
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.0519843101501465,
      "learning_rate": 1.5054025267250525e-05,
      "loss": 2.9697,
      "step": 207071
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.945516586303711,
      "learning_rate": 1.5052745777088404e-05,
      "loss": 2.9393,
      "step": 207072
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9839141368865967,
      "learning_rate": 1.505146633990344e-05,
      "loss": 3.0127,
      "step": 207073
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.795923948287964,
      "learning_rate": 1.5050186955695698e-05,
      "loss": 2.9502,
      "step": 207074
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.673154830932617,
      "learning_rate": 1.5048907624465579e-05,
      "loss": 2.9382,
      "step": 207075
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8259146213531494,
      "learning_rate": 1.504762834621318e-05,
      "loss": 2.7324,
      "step": 207076
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.924776554107666,
      "learning_rate": 1.5046349120938906e-05,
      "loss": 3.0137,
      "step": 207077
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0633702278137207,
      "learning_rate": 1.5045069948642818e-05,
      "loss": 2.832,
      "step": 207078
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.8555638790130615,
      "learning_rate": 1.5043790829325253e-05,
      "loss": 2.8812,
      "step": 207079
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9187068939208984,
      "learning_rate": 1.5042511762986408e-05,
      "loss": 2.8507,
      "step": 207080
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.061040163040161,
      "learning_rate": 1.5041232749626586e-05,
      "loss": 2.9172,
      "step": 207081
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8129398822784424,
      "learning_rate": 1.5039953789245918e-05,
      "loss": 3.1297,
      "step": 207082
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.152867794036865,
      "learning_rate": 1.503867488184477e-05,
      "loss": 2.9714,
      "step": 207083
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.580085277557373,
      "learning_rate": 1.5037396027423242e-05,
      "loss": 3.1292,
      "step": 207084
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.801149368286133,
      "learning_rate": 1.503611722598167e-05,
      "loss": 3.2475,
      "step": 207085
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.6209769248962402,
      "learning_rate": 1.503483847752025e-05,
      "loss": 3.0911,
      "step": 207086
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6427338123321533,
      "learning_rate": 1.5033559782039251e-05,
      "loss": 2.8588,
      "step": 207087
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9412317276000977,
      "learning_rate": 1.5032281139538871e-05,
      "loss": 2.8566,
      "step": 207088
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.5054638385772705,
      "learning_rate": 1.5031002550019444e-05,
      "loss": 2.9225,
      "step": 207089
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.284285068511963,
      "learning_rate": 1.5029724013481038e-05,
      "loss": 2.8884,
      "step": 207090
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0283522605895996,
      "learning_rate": 1.5028445529924049e-05,
      "loss": 2.7233,
      "step": 207091
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.8710856437683105,
      "learning_rate": 1.5027167099348581e-05,
      "loss": 2.9329,
      "step": 207092
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7365989685058594,
      "learning_rate": 1.5025888721754997e-05,
      "loss": 3.005,
      "step": 207093
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1850244998931885,
      "learning_rate": 1.5024610397143433e-05,
      "loss": 2.9196,
      "step": 207094
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9888722896575928,
      "learning_rate": 1.502333212551422e-05,
      "loss": 2.788,
      "step": 207095
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.725590229034424,
      "learning_rate": 1.5022053906867559e-05,
      "loss": 3.1387,
      "step": 207096
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.5564959049224854,
      "learning_rate": 1.5020775741203684e-05,
      "loss": 3.1639,
      "step": 207097
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.952087879180908,
      "learning_rate": 1.501949762852276e-05,
      "loss": 3.0728,
      "step": 207098
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9552764892578125,
      "learning_rate": 1.5018219568825118e-05,
      "loss": 2.8775,
      "step": 207099
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.555180311203003,
      "learning_rate": 1.5016941562110963e-05,
      "loss": 3.2395,
      "step": 207100
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.796811819076538,
      "learning_rate": 1.5015663608380557e-05,
      "loss": 3.0961,
      "step": 207101
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9001996517181396,
      "learning_rate": 1.5014385707634135e-05,
      "loss": 2.7762,
      "step": 207102
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8640475273132324,
      "learning_rate": 1.5013107859871864e-05,
      "loss": 2.7099,
      "step": 207103
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3355696201324463,
      "learning_rate": 1.501183006509411e-05,
      "loss": 3.0672,
      "step": 207104
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9647624492645264,
      "learning_rate": 1.5010552323301006e-05,
      "loss": 2.9068,
      "step": 207105
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.104684829711914,
      "learning_rate": 1.5009274634492785e-05,
      "loss": 3.137,
      "step": 207106
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1104609966278076,
      "learning_rate": 1.5007996998669746e-05,
      "loss": 3.061,
      "step": 207107
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.502051591873169,
      "learning_rate": 1.5006719415832125e-05,
      "loss": 2.8222,
      "step": 207108
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.581660509109497,
      "learning_rate": 1.5005441885980085e-05,
      "loss": 2.9398,
      "step": 207109
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1647698879241943,
      "learning_rate": 1.5004164409113928e-05,
      "loss": 2.9733,
      "step": 207110
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.4383416175842285,
      "learning_rate": 1.500288698523392e-05,
      "loss": 2.8159,
      "step": 207111
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1452243328094482,
      "learning_rate": 1.5001609614340193e-05,
      "loss": 3.0337,
      "step": 207112
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.609611749649048,
      "learning_rate": 1.5000332296433082e-05,
      "loss": 2.7694,
      "step": 207113
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.289782762527466,
      "learning_rate": 1.4999055031512785e-05,
      "loss": 2.9318,
      "step": 207114
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3128011226654053,
      "learning_rate": 1.4997777819579504e-05,
      "loss": 2.9627,
      "step": 207115
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.4322831630706787,
      "learning_rate": 1.499650066063357e-05,
      "loss": 2.7765,
      "step": 207116
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2916882038116455,
      "learning_rate": 1.4995223554675117e-05,
      "loss": 2.8512,
      "step": 207117
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2125015258789062,
      "learning_rate": 1.4993946501704479e-05,
      "loss": 3.1049,
      "step": 207118
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0632290840148926,
      "learning_rate": 1.4992669501721855e-05,
      "loss": 2.9299,
      "step": 207119
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.812066078186035,
      "learning_rate": 1.4991392554727444e-05,
      "loss": 3.0094,
      "step": 207120
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9445724487304688,
      "learning_rate": 1.499011566072148e-05,
      "loss": 2.8426,
      "step": 207121
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.329418659210205,
      "learning_rate": 1.4988838819704297e-05,
      "loss": 2.7418,
      "step": 207122
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.951014757156372,
      "learning_rate": 1.4987562031676027e-05,
      "loss": 3.0978,
      "step": 207123
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1637704372406006,
      "learning_rate": 1.4986285296636969e-05,
      "loss": 2.9393,
      "step": 207124
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.047703266143799,
      "learning_rate": 1.4985008614587291e-05,
      "loss": 2.7974,
      "step": 207125
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.011615753173828,
      "learning_rate": 1.4983731985527391e-05,
      "loss": 2.8063,
      "step": 207126
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.4008376598358154,
      "learning_rate": 1.4982455409457305e-05,
      "loss": 2.8276,
      "step": 207127
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.5060875415802,
      "learning_rate": 1.4981178886377399e-05,
      "loss": 2.8311,
      "step": 207128
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.126058578491211,
      "learning_rate": 1.4979902416287836e-05,
      "loss": 2.9932,
      "step": 207129
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0157079696655273,
      "learning_rate": 1.497862599918892e-05,
      "loss": 3.0324,
      "step": 207130
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.327256679534912,
      "learning_rate": 1.4977349635080815e-05,
      "loss": 2.7328,
      "step": 207131
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.671489953994751,
      "learning_rate": 1.4976073323963888e-05,
      "loss": 3.1255,
      "step": 207132
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.5438623428344727,
      "learning_rate": 1.4974797065838207e-05,
      "loss": 2.9952,
      "step": 207133
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.906902551651001,
      "learning_rate": 1.4973520860704137e-05,
      "loss": 2.8791,
      "step": 207134
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8448727130889893,
      "learning_rate": 1.4972244708561843e-05,
      "loss": 3.083,
      "step": 207135
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.6065306663513184,
      "learning_rate": 1.4970968609411593e-05,
      "loss": 3.0722,
      "step": 207136
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0585649013519287,
      "learning_rate": 1.4969692563253621e-05,
      "loss": 2.7385,
      "step": 207137
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.569532871246338,
      "learning_rate": 1.4968416570088226e-05,
      "loss": 2.9738,
      "step": 207138
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.5581254959106445,
      "learning_rate": 1.4967140629915474e-05,
      "loss": 3.115,
      "step": 207139
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8587355613708496,
      "learning_rate": 1.49658647427358e-05,
      "loss": 3.0474,
      "step": 207140
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.338386297225952,
      "learning_rate": 1.4964588908549269e-05,
      "loss": 2.9997,
      "step": 207141
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.157014846801758,
      "learning_rate": 1.496331312735628e-05,
      "loss": 3.0663,
      "step": 207142
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8966076374053955,
      "learning_rate": 1.4962037399156934e-05,
      "loss": 2.9741,
      "step": 207143
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.619657516479492,
      "learning_rate": 1.4960761723951597e-05,
      "loss": 2.9755,
      "step": 207144
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9483094215393066,
      "learning_rate": 1.495948610174037e-05,
      "loss": 2.976,
      "step": 207145
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1627159118652344,
      "learning_rate": 1.4958210532523584e-05,
      "loss": 2.9194,
      "step": 207146
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3608052730560303,
      "learning_rate": 1.4956935016301408e-05,
      "loss": 2.9209,
      "step": 207147
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7163608074188232,
      "learning_rate": 1.495565955307414e-05,
      "loss": 3.0786,
      "step": 207148
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9808261394500732,
      "learning_rate": 1.495438414284198e-05,
      "loss": 2.7012,
      "step": 207149
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.926354169845581,
      "learning_rate": 1.4953108785605262e-05,
      "loss": 2.7883,
      "step": 207150
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.939624547958374,
      "learning_rate": 1.495183348136405e-05,
      "loss": 2.8223,
      "step": 207151
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.797765016555786,
      "learning_rate": 1.4950558230118715e-05,
      "loss": 3.0416,
      "step": 207152
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2365646362304688,
      "learning_rate": 1.494928303186942e-05,
      "loss": 2.9342,
      "step": 207153
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9787516593933105,
      "learning_rate": 1.4948007886616498e-05,
      "loss": 3.3192,
      "step": 207154
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.223733901977539,
      "learning_rate": 1.494673279436005e-05,
      "loss": 2.9316,
      "step": 207155
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0217151641845703,
      "learning_rate": 1.4945457755100443e-05,
      "loss": 3.0818,
      "step": 207156
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1260623931884766,
      "learning_rate": 1.4944182768837809e-05,
      "loss": 3.1791,
      "step": 207157
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.573007822036743,
      "learning_rate": 1.4942907835572481e-05,
      "loss": 2.9465,
      "step": 207158
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7542691230773926,
      "learning_rate": 1.4941632955304595e-05,
      "loss": 2.8061,
      "step": 207159
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.4581480026245117,
      "learning_rate": 1.494035812803448e-05,
      "loss": 2.8666,
      "step": 207160
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.138106107711792,
      "learning_rate": 1.4939083353762271e-05,
      "loss": 2.9727,
      "step": 207161
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.359314441680908,
      "learning_rate": 1.4937808632488368e-05,
      "loss": 2.9,
      "step": 207162
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7630887031555176,
      "learning_rate": 1.4936533964212837e-05,
      "loss": 3.0839,
      "step": 207163
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0183920860290527,
      "learning_rate": 1.4935259348936012e-05,
      "loss": 2.8087,
      "step": 207164
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.2340826988220215,
      "learning_rate": 1.493398478665806e-05,
      "loss": 2.7503,
      "step": 207165
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.897493839263916,
      "learning_rate": 1.4932710277379312e-05,
      "loss": 2.911,
      "step": 207166
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.5039212703704834,
      "learning_rate": 1.4931435821099902e-05,
      "loss": 2.843,
      "step": 207167
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.259338617324829,
      "learning_rate": 1.493016141782023e-05,
      "loss": 2.8906,
      "step": 207168
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.957547187805176,
      "learning_rate": 1.4928887067540296e-05,
      "loss": 3.1074,
      "step": 207169
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.4258692264556885,
      "learning_rate": 1.4927612770260532e-05,
      "loss": 3.0206,
      "step": 207170
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6727001667022705,
      "learning_rate": 1.4926338525981074e-05,
      "loss": 2.858,
      "step": 207171
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3967936038970947,
      "learning_rate": 1.492506433470222e-05,
      "loss": 2.7399,
      "step": 207172
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.015881061553955,
      "learning_rate": 1.4923790196424135e-05,
      "loss": 3.016,
      "step": 207173
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.726407766342163,
      "learning_rate": 1.4922516111147154e-05,
      "loss": 3.1558,
      "step": 207174
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8191657066345215,
      "learning_rate": 1.4921242078871409e-05,
      "loss": 2.9875,
      "step": 207175
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1939761638641357,
      "learning_rate": 1.4919968099597234e-05,
      "loss": 3.0122,
      "step": 207176
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7865500450134277,
      "learning_rate": 1.4918694173324763e-05,
      "loss": 2.8927,
      "step": 207177
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.5837104320526123,
      "learning_rate": 1.4917420300054329e-05,
      "loss": 3.0162,
      "step": 207178
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9020631313323975,
      "learning_rate": 1.4916146479786096e-05,
      "loss": 2.9607,
      "step": 207179
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.4970905780792236,
      "learning_rate": 1.4914872712520365e-05,
      "loss": 3.123,
      "step": 207180
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.823171615600586,
      "learning_rate": 1.4913598998257336e-05,
      "loss": 2.7514,
      "step": 207181
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7356131076812744,
      "learning_rate": 1.4912325336997243e-05,
      "loss": 2.8839,
      "step": 207182
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6380252838134766,
      "learning_rate": 1.491105172874032e-05,
      "loss": 2.7288,
      "step": 207183
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.023714780807495,
      "learning_rate": 1.490977817348683e-05,
      "loss": 3.0256,
      "step": 207184
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.620962381362915,
      "learning_rate": 1.4908504671236943e-05,
      "loss": 2.8635,
      "step": 207185
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.437532663345337,
      "learning_rate": 1.4907231221991023e-05,
      "loss": 3.0816,
      "step": 207186
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.744687557220459,
      "learning_rate": 1.4905957825749205e-05,
      "loss": 2.6536,
      "step": 207187
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2268500328063965,
      "learning_rate": 1.490468448251172e-05,
      "loss": 2.8631,
      "step": 207188
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.943007230758667,
      "learning_rate": 1.490341119227887e-05,
      "loss": 3.0809,
      "step": 207189
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.135852813720703,
      "learning_rate": 1.4902137955050853e-05,
      "loss": 2.8917,
      "step": 207190
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.887752056121826,
      "learning_rate": 1.490086477082787e-05,
      "loss": 2.8613,
      "step": 207191
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.16105055809021,
      "learning_rate": 1.4899591639610253e-05,
      "loss": 3.003,
      "step": 207192
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8226442337036133,
      "learning_rate": 1.489831856139817e-05,
      "loss": 2.9993,
      "step": 207193
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.584595203399658,
      "learning_rate": 1.4897045536191854e-05,
      "loss": 2.669,
      "step": 207194
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.5297024250030518,
      "learning_rate": 1.4895772563991571e-05,
      "loss": 2.8344,
      "step": 207195
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6933064460754395,
      "learning_rate": 1.4894499644797553e-05,
      "loss": 2.7571,
      "step": 207196
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0606911182403564,
      "learning_rate": 1.4893226778610001e-05,
      "loss": 2.8577,
      "step": 207197
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.037320137023926,
      "learning_rate": 1.4891953965429216e-05,
      "loss": 2.7774,
      "step": 207198
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0279266834259033,
      "learning_rate": 1.4890681205255395e-05,
      "loss": 3.0664,
      "step": 207199
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.693192481994629,
      "learning_rate": 1.488940849808874e-05,
      "loss": 2.8176,
      "step": 207200
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.055842399597168,
      "learning_rate": 1.4888135843929549e-05,
      "loss": 2.9235,
      "step": 207201
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.631178140640259,
      "learning_rate": 1.4886863242778025e-05,
      "loss": 2.7185,
      "step": 207202
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.967498540878296,
      "learning_rate": 1.488559069463443e-05,
      "loss": 2.8386,
      "step": 207203
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.482344388961792,
      "learning_rate": 1.4884318199499002e-05,
      "loss": 2.7116,
      "step": 207204
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6005773544311523,
      "learning_rate": 1.488304575737197e-05,
      "loss": 2.9156,
      "step": 207205
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.87723708152771,
      "learning_rate": 1.4881773368253503e-05,
      "loss": 2.7682,
      "step": 207206
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.099846839904785,
      "learning_rate": 1.4880501032143932e-05,
      "loss": 2.9565,
      "step": 207207
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3477559089660645,
      "learning_rate": 1.4879228749043425e-05,
      "loss": 2.8782,
      "step": 207208
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3987972736358643,
      "learning_rate": 1.4877956518952282e-05,
      "loss": 3.044,
      "step": 207209
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.612229108810425,
      "learning_rate": 1.4876684341870736e-05,
      "loss": 3.0352,
      "step": 207210
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1291074752807617,
      "learning_rate": 1.4875412217798955e-05,
      "loss": 3.0987,
      "step": 207211
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2016897201538086,
      "learning_rate": 1.4874140146737201e-05,
      "loss": 2.7782,
      "step": 207212
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1817209720611572,
      "learning_rate": 1.4872868128685777e-05,
      "loss": 3.0812,
      "step": 207213
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.974142074584961,
      "learning_rate": 1.4871596163644816e-05,
      "loss": 2.8097,
      "step": 207214
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7155346870422363,
      "learning_rate": 1.4870324251614652e-05,
      "loss": 3.0178,
      "step": 207215
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3800549507141113,
      "learning_rate": 1.4869052392595448e-05,
      "loss": 2.9842,
      "step": 207216
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1184170246124268,
      "learning_rate": 1.4867780586587542e-05,
      "loss": 2.7026,
      "step": 207217
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.675581932067871,
      "learning_rate": 1.4866508833590996e-05,
      "loss": 3.0755,
      "step": 207218
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9459924697875977,
      "learning_rate": 1.486523713360621e-05,
      "loss": 2.8523,
      "step": 207219
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.731759548187256,
      "learning_rate": 1.4863965486633323e-05,
      "loss": 2.7722,
      "step": 207220
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1161909103393555,
      "learning_rate": 1.4862693892672627e-05,
      "loss": 3.0874,
      "step": 207221
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9243175983428955,
      "learning_rate": 1.4861422351724328e-05,
      "loss": 2.9568,
      "step": 207222
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.6701042652130127,
      "learning_rate": 1.4860150863788723e-05,
      "loss": 3.1021,
      "step": 207223
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.944528102874756,
      "learning_rate": 1.4858879428865944e-05,
      "loss": 2.8437,
      "step": 207224
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.961730718612671,
      "learning_rate": 1.4857608046956293e-05,
      "loss": 2.654,
      "step": 207225
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.992316484451294,
      "learning_rate": 1.4856336718059969e-05,
      "loss": 2.8149,
      "step": 207226
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8022518157958984,
      "learning_rate": 1.4855065442177272e-05,
      "loss": 2.8846,
      "step": 207227
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0306646823883057,
      "learning_rate": 1.4853794219308369e-05,
      "loss": 3.0281,
      "step": 207228
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.6451587677001953,
      "learning_rate": 1.4852523049453624e-05,
      "loss": 2.6708,
      "step": 207229
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.214350700378418,
      "learning_rate": 1.4851251932613072e-05,
      "loss": 2.7903,
      "step": 207230
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8160645961761475,
      "learning_rate": 1.4849980868787081e-05,
      "loss": 2.8836,
      "step": 207231
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6239004135131836,
      "learning_rate": 1.484870985797585e-05,
      "loss": 3.161,
      "step": 207232
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2555367946624756,
      "learning_rate": 1.4847438900179642e-05,
      "loss": 2.9422,
      "step": 207233
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9196009635925293,
      "learning_rate": 1.484616799539866e-05,
      "loss": 2.9127,
      "step": 207234
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.35258412361145,
      "learning_rate": 1.4844897143633238e-05,
      "loss": 3.1251,
      "step": 207235
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.137922525405884,
      "learning_rate": 1.4843626344883441e-05,
      "loss": 2.9144,
      "step": 207236
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.036952257156372,
      "learning_rate": 1.4842355599149635e-05,
      "loss": 2.813,
      "step": 207237
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.159700393676758,
      "learning_rate": 1.4841084906431989e-05,
      "loss": 3.0463,
      "step": 207238
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.7870543003082275,
      "learning_rate": 1.4839814266730798e-05,
      "loss": 2.8392,
      "step": 207239
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7565677165985107,
      "learning_rate": 1.4838543680046234e-05,
      "loss": 2.8381,
      "step": 207240
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.856051445007324,
      "learning_rate": 1.483727314637866e-05,
      "loss": 2.7272,
      "step": 207241
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.689331531524658,
      "learning_rate": 1.4836002665728108e-05,
      "loss": 2.7508,
      "step": 207242
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3815064430236816,
      "learning_rate": 1.4834732238094981e-05,
      "loss": 2.988,
      "step": 207243
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.972379446029663,
      "learning_rate": 1.4833461863479445e-05,
      "loss": 3.0817,
      "step": 207244
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.934140205383301,
      "learning_rate": 1.4832191541881766e-05,
      "loss": 2.7945,
      "step": 207245
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.525068521499634,
      "learning_rate": 1.4830921273302143e-05,
      "loss": 2.8574,
      "step": 207246
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2928194999694824,
      "learning_rate": 1.4829651057740876e-05,
      "loss": 2.9611,
      "step": 207247
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1557857990264893,
      "learning_rate": 1.4828380895198099e-05,
      "loss": 3.0078,
      "step": 207248
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.599994659423828,
      "learning_rate": 1.4827110785674178e-05,
      "loss": 2.9517,
      "step": 207249
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.249446153640747,
      "learning_rate": 1.4825840729169215e-05,
      "loss": 2.8029,
      "step": 207250
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3594558238983154,
      "learning_rate": 1.4824570725683538e-05,
      "loss": 3.1104,
      "step": 207251
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3930351734161377,
      "learning_rate": 1.4823300775217351e-05,
      "loss": 3.03,
      "step": 207252
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0255942344665527,
      "learning_rate": 1.482203087777092e-05,
      "loss": 2.9699,
      "step": 207253
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2436769008636475,
      "learning_rate": 1.4820761033344442e-05,
      "loss": 2.8551,
      "step": 207254
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.585879325866699,
      "learning_rate": 1.4819491241938186e-05,
      "loss": 3.067,
      "step": 207255
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.675133466720581,
      "learning_rate": 1.481822150355232e-05,
      "loss": 2.9637,
      "step": 207256
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0297200679779053,
      "learning_rate": 1.4816951818187172e-05,
      "loss": 3.058,
      "step": 207257
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.814958095550537,
      "learning_rate": 1.4815682185842881e-05,
      "loss": 2.8468,
      "step": 207258
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.690748453140259,
      "learning_rate": 1.481441260651981e-05,
      "loss": 2.9224,
      "step": 207259
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.978227376937866,
      "learning_rate": 1.4813143080218092e-05,
      "loss": 3.0346,
      "step": 207260
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.5101356506347656,
      "learning_rate": 1.481187360693803e-05,
      "loss": 3.0106,
      "step": 207261
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.155707836151123,
      "learning_rate": 1.4810604186679754e-05,
      "loss": 2.8881,
      "step": 207262
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.624201536178589,
      "learning_rate": 1.480933481944363e-05,
      "loss": 2.9433,
      "step": 207263
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1409058570861816,
      "learning_rate": 1.480806550522976e-05,
      "loss": 3.3124,
      "step": 207264
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8828213214874268,
      "learning_rate": 1.4806796244038511e-05,
      "loss": 3.1385,
      "step": 207265
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.5251941680908203,
      "learning_rate": 1.480552703587008e-05,
      "loss": 3.0488,
      "step": 207266
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8416175842285156,
      "learning_rate": 1.4804257880724668e-05,
      "loss": 2.9161,
      "step": 207267
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7972192764282227,
      "learning_rate": 1.480298877860251e-05,
      "loss": 2.8509,
      "step": 207268
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.6705379486083984,
      "learning_rate": 1.480171972950387e-05,
      "loss": 2.8176,
      "step": 207269
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8417086601257324,
      "learning_rate": 1.4800450733428948e-05,
      "loss": 2.9141,
      "step": 207270
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7977585792541504,
      "learning_rate": 1.4799181790378046e-05,
      "loss": 2.6358,
      "step": 207271
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.579695701599121,
      "learning_rate": 1.4797912900351361e-05,
      "loss": 3.0685,
      "step": 207272
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0343809127807617,
      "learning_rate": 1.4796644063349129e-05,
      "loss": 3.0635,
      "step": 207273
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0272576808929443,
      "learning_rate": 1.4795375279371513e-05,
      "loss": 2.8167,
      "step": 207274
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.6963977813720703,
      "learning_rate": 1.4794106548418916e-05,
      "loss": 2.5571,
      "step": 207275
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8416762351989746,
      "learning_rate": 1.4792837870491402e-05,
      "loss": 3.0647,
      "step": 207276
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.673710584640503,
      "learning_rate": 1.4791569245589308e-05,
      "loss": 2.866,
      "step": 207277
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6353490352630615,
      "learning_rate": 1.4790300673712896e-05,
      "loss": 2.8982,
      "step": 207278
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.920792579650879,
      "learning_rate": 1.4789032154862267e-05,
      "loss": 2.6962,
      "step": 207279
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.018310070037842,
      "learning_rate": 1.478776368903779e-05,
      "loss": 3.0172,
      "step": 207280
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3389174938201904,
      "learning_rate": 1.4786495276239663e-05,
      "loss": 2.6295,
      "step": 207281
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.917332887649536,
      "learning_rate": 1.4785226916468051e-05,
      "loss": 3.0117,
      "step": 207282
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7565877437591553,
      "learning_rate": 1.4783958609723322e-05,
      "loss": 2.9294,
      "step": 207283
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6731069087982178,
      "learning_rate": 1.4782690356005611e-05,
      "loss": 2.8888,
      "step": 207284
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1331236362457275,
      "learning_rate": 1.4781422155315148e-05,
      "loss": 2.9472,
      "step": 207285
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.802002429962158,
      "learning_rate": 1.4780154007652267e-05,
      "loss": 2.9733,
      "step": 207286
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.134941816329956,
      "learning_rate": 1.4778885913017069e-05,
      "loss": 3.1794,
      "step": 207287
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.971283197402954,
      "learning_rate": 1.4777617871409919e-05,
      "loss": 2.8609,
      "step": 207288
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9863529205322266,
      "learning_rate": 1.4776349882830984e-05,
      "loss": 2.7591,
      "step": 207289
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.467592239379883,
      "learning_rate": 1.4775081947280498e-05,
      "loss": 2.978,
      "step": 207290
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.822934865951538,
      "learning_rate": 1.477381406475866e-05,
      "loss": 2.9376,
      "step": 207291
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3402762413024902,
      "learning_rate": 1.4772546235265836e-05,
      "loss": 2.8867,
      "step": 207292
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.01731538772583,
      "learning_rate": 1.4771278458802094e-05,
      "loss": 2.696,
      "step": 207293
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.832508087158203,
      "learning_rate": 1.4770010735367831e-05,
      "loss": 2.774,
      "step": 207294
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1567330360412598,
      "learning_rate": 1.4768743064963184e-05,
      "loss": 2.884,
      "step": 207295
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.718940258026123,
      "learning_rate": 1.476747544758845e-05,
      "loss": 3.1628,
      "step": 207296
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.11700701713562,
      "learning_rate": 1.476620788324373e-05,
      "loss": 2.9215,
      "step": 207297
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.218705177307129,
      "learning_rate": 1.4764940371929458e-05,
      "loss": 2.9248,
      "step": 207298
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.244257926940918,
      "learning_rate": 1.4763672913645664e-05,
      "loss": 2.972,
      "step": 207299
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.888601541519165,
      "learning_rate": 1.4762405508392783e-05,
      "loss": 2.732,
      "step": 207300
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.562328338623047,
      "learning_rate": 1.4761138156170882e-05,
      "loss": 2.8843,
      "step": 207301
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.273446798324585,
      "learning_rate": 1.475987085698036e-05,
      "loss": 2.9205,
      "step": 207302
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9885013103485107,
      "learning_rate": 1.4758603610821284e-05,
      "loss": 2.7495,
      "step": 207303
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1149742603302,
      "learning_rate": 1.4757336417693988e-05,
      "loss": 3.0459,
      "step": 207304
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.757849931716919,
      "learning_rate": 1.475606927759867e-05,
      "loss": 3.1861,
      "step": 207305
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.4976413249969482,
      "learning_rate": 1.475480219053563e-05,
      "loss": 2.7914,
      "step": 207306
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6368954181671143,
      "learning_rate": 1.4753535156505003e-05,
      "loss": 2.8606,
      "step": 207307
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3344674110412598,
      "learning_rate": 1.4752268175507153e-05,
      "loss": 2.7623,
      "step": 207308
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7933123111724854,
      "learning_rate": 1.475100124754215e-05,
      "loss": 2.9513,
      "step": 207309
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7582273483276367,
      "learning_rate": 1.4749734372610388e-05,
      "loss": 2.724,
      "step": 207310
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.6767239570617676,
      "learning_rate": 1.4748467550711973e-05,
      "loss": 3.0896,
      "step": 207311
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0865068435668945,
      "learning_rate": 1.4747200781847269e-05,
      "loss": 2.8574,
      "step": 207312
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.562502384185791,
      "learning_rate": 1.4745934066016374e-05,
      "loss": 2.8464,
      "step": 207313
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.479405164718628,
      "learning_rate": 1.4744667403219724e-05,
      "loss": 3.0436,
      "step": 207314
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.2886083126068115,
      "learning_rate": 1.4743400793457283e-05,
      "loss": 2.9513,
      "step": 207315
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9466395378112793,
      "learning_rate": 1.4742134236729519e-05,
      "loss": 2.9952,
      "step": 207316
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1065571308135986,
      "learning_rate": 1.4740867733036498e-05,
      "loss": 2.96,
      "step": 207317
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.659358263015747,
      "learning_rate": 1.4739601282378589e-05,
      "loss": 2.8046,
      "step": 207318
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9790639877319336,
      "learning_rate": 1.4738334884755953e-05,
      "loss": 2.8763,
      "step": 207319
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2061538696289062,
      "learning_rate": 1.473706854016886e-05,
      "loss": 2.9219,
      "step": 207320
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.918891429901123,
      "learning_rate": 1.4735802248617546e-05,
      "loss": 2.7835,
      "step": 207321
    },
    {
      "epoch": 2.7,
      "grad_norm": 5.33945894241333,
      "learning_rate": 1.4734536010102204e-05,
      "loss": 2.9293,
      "step": 207322
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.828606128692627,
      "learning_rate": 1.4733269824623073e-05,
      "loss": 2.8136,
      "step": 207323
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.581076145172119,
      "learning_rate": 1.473200369218045e-05,
      "loss": 2.6641,
      "step": 207324
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.6654951572418213,
      "learning_rate": 1.47307376127745e-05,
      "loss": 2.7822,
      "step": 207325
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9629809856414795,
      "learning_rate": 1.4729471586405528e-05,
      "loss": 2.7933,
      "step": 207326
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7813899517059326,
      "learning_rate": 1.4728205613073729e-05,
      "loss": 2.8325,
      "step": 207327
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7184510231018066,
      "learning_rate": 1.4726939692779338e-05,
      "loss": 2.9802,
      "step": 207328
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.626856803894043,
      "learning_rate": 1.4725673825522588e-05,
      "loss": 2.7431,
      "step": 207329
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.394580364227295,
      "learning_rate": 1.4724408011303712e-05,
      "loss": 3.1707,
      "step": 207330
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.9545981884002686,
      "learning_rate": 1.4723142250122944e-05,
      "loss": 2.8674,
      "step": 207331
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.187463283538818,
      "learning_rate": 1.4721876541980548e-05,
      "loss": 2.808,
      "step": 207332
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.4659225940704346,
      "learning_rate": 1.472061088687676e-05,
      "loss": 3.0908,
      "step": 207333
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.665670394897461,
      "learning_rate": 1.4719345284811779e-05,
      "loss": 2.8697,
      "step": 207334
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.137026309967041,
      "learning_rate": 1.4718079735785837e-05,
      "loss": 2.9634,
      "step": 207335
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9783387184143066,
      "learning_rate": 1.4716814239799201e-05,
      "loss": 3.0344,
      "step": 207336
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.7848050594329834,
      "learning_rate": 1.4715548796852072e-05,
      "loss": 3.0916,
      "step": 207337
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.120384693145752,
      "learning_rate": 1.471428340694475e-05,
      "loss": 3.0109,
      "step": 207338
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.22282600402832,
      "learning_rate": 1.4713018070077432e-05,
      "loss": 2.4763,
      "step": 207339
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8315391540527344,
      "learning_rate": 1.471175278625032e-05,
      "loss": 2.8362,
      "step": 207340
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9041965007781982,
      "learning_rate": 1.4710487555463678e-05,
      "loss": 2.9458,
      "step": 207341
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.178799867630005,
      "learning_rate": 1.4709222377717744e-05,
      "loss": 2.8176,
      "step": 207342
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.077118158340454,
      "learning_rate": 1.4707957253012747e-05,
      "loss": 3.0695,
      "step": 207343
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.8658034801483154,
      "learning_rate": 1.4706692181348923e-05,
      "loss": 3.0179,
      "step": 207344
    },
    {
      "epoch": 2.7,
      "grad_norm": 5.08595609664917,
      "learning_rate": 1.4705427162726536e-05,
      "loss": 2.8337,
      "step": 207345
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6305642127990723,
      "learning_rate": 1.4704162197145819e-05,
      "loss": 3.2345,
      "step": 207346
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.704588890075684,
      "learning_rate": 1.4702897284606907e-05,
      "loss": 2.8789,
      "step": 207347
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.28944730758667,
      "learning_rate": 1.4701632425110165e-05,
      "loss": 2.9072,
      "step": 207348
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3698577880859375,
      "learning_rate": 1.4700367618655728e-05,
      "loss": 3.0928,
      "step": 207349
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1141605377197266,
      "learning_rate": 1.4699102865243929e-05,
      "loss": 2.9481,
      "step": 207350
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.958919048309326,
      "learning_rate": 1.4697838164874964e-05,
      "loss": 2.926,
      "step": 207351
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.346930980682373,
      "learning_rate": 1.4696573517549037e-05,
      "loss": 2.66,
      "step": 207352
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6043765544891357,
      "learning_rate": 1.4695308923266346e-05,
      "loss": 3.0168,
      "step": 207353
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0785458087921143,
      "learning_rate": 1.4694044382027259e-05,
      "loss": 2.9558,
      "step": 207354
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.4119346141815186,
      "learning_rate": 1.4692779893831874e-05,
      "loss": 2.6475,
      "step": 207355
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.237135410308838,
      "learning_rate": 1.4691515458680526e-05,
      "loss": 3.168,
      "step": 207356
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.809889793395996,
      "learning_rate": 1.4690251076573411e-05,
      "loss": 2.7109,
      "step": 207357
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.9132003784179688,
      "learning_rate": 1.4688986747510767e-05,
      "loss": 2.7765,
      "step": 207358
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.029384136199951,
      "learning_rate": 1.4687722471492791e-05,
      "loss": 2.9604,
      "step": 207359
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.4240097999572754,
      "learning_rate": 1.4686458248519783e-05,
      "loss": 2.6805,
      "step": 207360
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8964526653289795,
      "learning_rate": 1.4685194078591911e-05,
      "loss": 2.8801,
      "step": 207361
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7232093811035156,
      "learning_rate": 1.4683929961709507e-05,
      "loss": 2.8483,
      "step": 207362
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.97760272026062,
      "learning_rate": 1.4682665897872736e-05,
      "loss": 2.9542,
      "step": 207363
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.5965280532836914,
      "learning_rate": 1.4681401887081767e-05,
      "loss": 2.9123,
      "step": 207364
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.5499887466430664,
      "learning_rate": 1.4680137929336999e-05,
      "loss": 2.9707,
      "step": 207365
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.941058874130249,
      "learning_rate": 1.4678874024638565e-05,
      "loss": 3.1973,
      "step": 207366
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.911867618560791,
      "learning_rate": 1.4677610172986665e-05,
      "loss": 2.9038,
      "step": 207367
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7974231243133545,
      "learning_rate": 1.4676346374381632e-05,
      "loss": 2.7958,
      "step": 207368
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.722743511199951,
      "learning_rate": 1.4675082628823632e-05,
      "loss": 2.923,
      "step": 207369
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.976304292678833,
      "learning_rate": 1.4673818936312897e-05,
      "loss": 3.1158,
      "step": 207370
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.306018352508545,
      "learning_rate": 1.4672555296849731e-05,
      "loss": 3.078,
      "step": 207371
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1625759601593018,
      "learning_rate": 1.4671291710434329e-05,
      "loss": 2.8915,
      "step": 207372
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.793159008026123,
      "learning_rate": 1.467002817706686e-05,
      "loss": 2.8689,
      "step": 207373
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2053496837615967,
      "learning_rate": 1.466876469674766e-05,
      "loss": 2.9616,
      "step": 207374
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7794392108917236,
      "learning_rate": 1.4667501269476955e-05,
      "loss": 2.9548,
      "step": 207375
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8961563110351562,
      "learning_rate": 1.4666237895254851e-05,
      "loss": 2.793,
      "step": 207376
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2796618938446045,
      "learning_rate": 1.4664974574081777e-05,
      "loss": 2.6689,
      "step": 207377
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1189544200897217,
      "learning_rate": 1.4663711305957804e-05,
      "loss": 2.9301,
      "step": 207378
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.770273208618164,
      "learning_rate": 1.466244809088326e-05,
      "loss": 2.925,
      "step": 207379
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.893380641937256,
      "learning_rate": 1.4661184928858382e-05,
      "loss": 3.1583,
      "step": 207380
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.019094944000244,
      "learning_rate": 1.4659921819883336e-05,
      "loss": 2.8691,
      "step": 207381
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.972921133041382,
      "learning_rate": 1.4658658763958386e-05,
      "loss": 2.6877,
      "step": 207382
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.840027332305908,
      "learning_rate": 1.4657395761083802e-05,
      "loss": 2.6633,
      "step": 207383
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.905097246170044,
      "learning_rate": 1.4656132811259779e-05,
      "loss": 3.0772,
      "step": 207384
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.4952127933502197,
      "learning_rate": 1.4654869914486588e-05,
      "loss": 2.9144,
      "step": 207385
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.168980121612549,
      "learning_rate": 1.4653607070764394e-05,
      "loss": 2.8639,
      "step": 207386
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.6566596031188965,
      "learning_rate": 1.4652344280093564e-05,
      "loss": 2.9721,
      "step": 207387
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.78615140914917,
      "learning_rate": 1.4651081542474197e-05,
      "loss": 2.914,
      "step": 207388
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.77044939994812,
      "learning_rate": 1.4649818857906625e-05,
      "loss": 2.9282,
      "step": 207389
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.200899600982666,
      "learning_rate": 1.464855622639095e-05,
      "loss": 2.9094,
      "step": 207390
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.967090129852295,
      "learning_rate": 1.4647293647927572e-05,
      "loss": 2.8975,
      "step": 207391
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9725656509399414,
      "learning_rate": 1.4646031122516589e-05,
      "loss": 2.8151,
      "step": 207392
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0356285572052,
      "learning_rate": 1.4644768650158367e-05,
      "loss": 2.8317,
      "step": 207393
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.677349090576172,
      "learning_rate": 1.4643506230853042e-05,
      "loss": 2.9888,
      "step": 207394
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.5104117393493652,
      "learning_rate": 1.4642243864600878e-05,
      "loss": 3.1047,
      "step": 207395
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.643044948577881,
      "learning_rate": 1.4640981551402076e-05,
      "loss": 2.9517,
      "step": 207396
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0301332473754883,
      "learning_rate": 1.4639719291256934e-05,
      "loss": 2.838,
      "step": 207397
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1423850059509277,
      "learning_rate": 1.463845708416559e-05,
      "loss": 2.9173,
      "step": 207398
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3226897716522217,
      "learning_rate": 1.4637194930128438e-05,
      "loss": 2.9636,
      "step": 207399
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1363210678100586,
      "learning_rate": 1.463593282914558e-05,
      "loss": 2.6664,
      "step": 207400
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8337454795837402,
      "learning_rate": 1.4634670781217283e-05,
      "loss": 2.9074,
      "step": 207401
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.817793130874634,
      "learning_rate": 1.4633408786343782e-05,
      "loss": 2.9644,
      "step": 207402
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.513712167739868,
      "learning_rate": 1.463214684452534e-05,
      "loss": 2.9959,
      "step": 207403
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.329937696456909,
      "learning_rate": 1.4630884955762124e-05,
      "loss": 2.9967,
      "step": 207404
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0454916954040527,
      "learning_rate": 1.4629623120054434e-05,
      "loss": 2.782,
      "step": 207405
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7038514614105225,
      "learning_rate": 1.4628361337402505e-05,
      "loss": 3.1527,
      "step": 207406
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.774714708328247,
      "learning_rate": 1.462709960780657e-05,
      "loss": 2.8804,
      "step": 207407
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8768112659454346,
      "learning_rate": 1.4625837931266794e-05,
      "loss": 3.021,
      "step": 207408
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1218109130859375,
      "learning_rate": 1.4624576307783475e-05,
      "loss": 2.85,
      "step": 207409
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.729637622833252,
      "learning_rate": 1.4623314737356817e-05,
      "loss": 3.1745,
      "step": 207410
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.5055630207061768,
      "learning_rate": 1.4622053219987118e-05,
      "loss": 3.1118,
      "step": 207411
    },
    {
      "epoch": 2.7,
      "grad_norm": 5.316819190979004,
      "learning_rate": 1.4620791755674543e-05,
      "loss": 2.9847,
      "step": 207412
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.4278881549835205,
      "learning_rate": 1.4619530344419361e-05,
      "loss": 2.9061,
      "step": 207413
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8426764011383057,
      "learning_rate": 1.4618268986221737e-05,
      "loss": 2.7933,
      "step": 207414
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3176276683807373,
      "learning_rate": 1.4617007681082039e-05,
      "loss": 2.9369,
      "step": 207415
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7085769176483154,
      "learning_rate": 1.4615746429000364e-05,
      "loss": 3.0132,
      "step": 207416
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3532845973968506,
      "learning_rate": 1.4614485229977046e-05,
      "loss": 2.9746,
      "step": 207417
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.5758562088012695,
      "learning_rate": 1.4613224084012287e-05,
      "loss": 2.7582,
      "step": 207418
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8870582580566406,
      "learning_rate": 1.4611962991106351e-05,
      "loss": 2.8336,
      "step": 207419
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.358841896057129,
      "learning_rate": 1.461070195125934e-05,
      "loss": 2.8626,
      "step": 207420
    },
    {
      "epoch": 2.7,
      "grad_norm": 6.850244045257568,
      "learning_rate": 1.4609440964471652e-05,
      "loss": 2.7308,
      "step": 207421
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.896582841873169,
      "learning_rate": 1.4608180030743422e-05,
      "loss": 3.0948,
      "step": 207422
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.91386079788208,
      "learning_rate": 1.4606919150074981e-05,
      "loss": 3.0534,
      "step": 207423
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.838163375854492,
      "learning_rate": 1.4605658322466463e-05,
      "loss": 2.7358,
      "step": 207424
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.094754219055176,
      "learning_rate": 1.4604397547918135e-05,
      "loss": 2.8974,
      "step": 207425
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.171185255050659,
      "learning_rate": 1.4603136826430229e-05,
      "loss": 3.0026,
      "step": 207426
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.5870931148529053,
      "learning_rate": 1.4601876158003011e-05,
      "loss": 2.88,
      "step": 207427
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.981140375137329,
      "learning_rate": 1.4600615542636651e-05,
      "loss": 2.6901,
      "step": 207428
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9443752765655518,
      "learning_rate": 1.459935498033148e-05,
      "loss": 2.9676,
      "step": 207429
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9880754947662354,
      "learning_rate": 1.4598094471087662e-05,
      "loss": 2.9581,
      "step": 207430
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.4135308265686035,
      "learning_rate": 1.4596834014905434e-05,
      "loss": 3.0065,
      "step": 207431
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.548064947128296,
      "learning_rate": 1.4595573611785027e-05,
      "loss": 2.8018,
      "step": 207432
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.857898235321045,
      "learning_rate": 1.4594313261726708e-05,
      "loss": 3.0338,
      "step": 207433
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3345658779144287,
      "learning_rate": 1.4593052964730645e-05,
      "loss": 2.8291,
      "step": 207434
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6354787349700928,
      "learning_rate": 1.4591792720797202e-05,
      "loss": 3.0037,
      "step": 207435
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.651134967803955,
      "learning_rate": 1.459053252992648e-05,
      "loss": 2.9961,
      "step": 207436
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.947967767715454,
      "learning_rate": 1.4589272392118812e-05,
      "loss": 3.0822,
      "step": 207437
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.611903667449951,
      "learning_rate": 1.458801230737433e-05,
      "loss": 3.0039,
      "step": 207438
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.927816390991211,
      "learning_rate": 1.4586752275693369e-05,
      "loss": 2.9898,
      "step": 207439
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.5990002155303955,
      "learning_rate": 1.4585492297076062e-05,
      "loss": 3.0666,
      "step": 207440
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9823710918426514,
      "learning_rate": 1.4584232371522741e-05,
      "loss": 2.9056,
      "step": 207441
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.916465997695923,
      "learning_rate": 1.4582972499033607e-05,
      "loss": 2.9319,
      "step": 207442
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.515040397644043,
      "learning_rate": 1.4581712679608892e-05,
      "loss": 2.8591,
      "step": 207443
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9806153774261475,
      "learning_rate": 1.4580452913248763e-05,
      "loss": 3.1403,
      "step": 207444
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.820020914077759,
      "learning_rate": 1.4579193199953588e-05,
      "loss": 2.9174,
      "step": 207445
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1398568153381348,
      "learning_rate": 1.4577933539723463e-05,
      "loss": 2.8288,
      "step": 207446
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7718796730041504,
      "learning_rate": 1.4576673932558725e-05,
      "loss": 2.7276,
      "step": 207447
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.573662042617798,
      "learning_rate": 1.4575414378459604e-05,
      "loss": 2.8502,
      "step": 207448
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9208555221557617,
      "learning_rate": 1.4574154877426237e-05,
      "loss": 2.8176,
      "step": 207449
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9015085697174072,
      "learning_rate": 1.4572895429458952e-05,
      "loss": 2.9937,
      "step": 207450
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9333057403564453,
      "learning_rate": 1.4571636034557987e-05,
      "loss": 3.085,
      "step": 207451
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3853657245635986,
      "learning_rate": 1.4570376692723473e-05,
      "loss": 2.7598,
      "step": 207452
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3846096992492676,
      "learning_rate": 1.4569117403955776e-05,
      "loss": 2.7653,
      "step": 207453
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.4856808185577393,
      "learning_rate": 1.4567858168254997e-05,
      "loss": 3.2119,
      "step": 207454
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.066601037979126,
      "learning_rate": 1.4566598985621503e-05,
      "loss": 2.9825,
      "step": 207455
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.894960641860962,
      "learning_rate": 1.4565339856055458e-05,
      "loss": 2.9368,
      "step": 207456
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6836888790130615,
      "learning_rate": 1.4564080779557097e-05,
      "loss": 2.9365,
      "step": 207457
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.033038854598999,
      "learning_rate": 1.4562821756126652e-05,
      "loss": 3.0097,
      "step": 207458
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.659456253051758,
      "learning_rate": 1.4561562785764392e-05,
      "loss": 3.1618,
      "step": 207459
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.5994489192962646,
      "learning_rate": 1.456030386847048e-05,
      "loss": 2.9682,
      "step": 207460
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.484236717224121,
      "learning_rate": 1.4559045004245218e-05,
      "loss": 2.7359,
      "step": 207461
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3690285682678223,
      "learning_rate": 1.4557786193088838e-05,
      "loss": 3.1096,
      "step": 207462
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.251753091812134,
      "learning_rate": 1.4556527435001508e-05,
      "loss": 2.8243,
      "step": 207463
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.9084296226501465,
      "learning_rate": 1.455526872998356e-05,
      "loss": 3.1154,
      "step": 207464
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.6516971588134766,
      "learning_rate": 1.455401007803516e-05,
      "loss": 2.9551,
      "step": 207465
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.025938034057617,
      "learning_rate": 1.4552751479156543e-05,
      "loss": 3.15,
      "step": 207466
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2144665718078613,
      "learning_rate": 1.4551492933347974e-05,
      "loss": 2.9687,
      "step": 207467
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8350257873535156,
      "learning_rate": 1.4550234440609686e-05,
      "loss": 2.8998,
      "step": 207468
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.7685177326202393,
      "learning_rate": 1.4548976000941848e-05,
      "loss": 2.9408,
      "step": 207469
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.843456745147705,
      "learning_rate": 1.454771761434479e-05,
      "loss": 3.0627,
      "step": 207470
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.4028303623199463,
      "learning_rate": 1.4546459280818646e-05,
      "loss": 2.8948,
      "step": 207471
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.090713977813721,
      "learning_rate": 1.4545201000363748e-05,
      "loss": 3.0469,
      "step": 207472
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8505375385284424,
      "learning_rate": 1.4543942772980299e-05,
      "loss": 3.1271,
      "step": 207473
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.368894577026367,
      "learning_rate": 1.4542684598668498e-05,
      "loss": 3.2169,
      "step": 207474
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0859928131103516,
      "learning_rate": 1.4541426477428575e-05,
      "loss": 2.9042,
      "step": 207475
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.857301712036133,
      "learning_rate": 1.4540168409260832e-05,
      "loss": 3.1488,
      "step": 207476
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.566404104232788,
      "learning_rate": 1.4538910394165404e-05,
      "loss": 3.0953,
      "step": 207477
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.658710479736328,
      "learning_rate": 1.4537652432142622e-05,
      "loss": 2.8116,
      "step": 207478
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.778172492980957,
      "learning_rate": 1.4536394523192686e-05,
      "loss": 3.1225,
      "step": 207479
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.5737156867980957,
      "learning_rate": 1.4535136667315828e-05,
      "loss": 2.9284,
      "step": 207480
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.5423636436462402,
      "learning_rate": 1.453387886451225e-05,
      "loss": 2.8101,
      "step": 207481
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.82035756111145,
      "learning_rate": 1.4532621114782217e-05,
      "loss": 3.0189,
      "step": 207482
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8258936405181885,
      "learning_rate": 1.4531363418125963e-05,
      "loss": 3.0346,
      "step": 207483
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.813570499420166,
      "learning_rate": 1.453010577454372e-05,
      "loss": 2.9248,
      "step": 207484
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3026046752929688,
      "learning_rate": 1.4528848184035724e-05,
      "loss": 2.8847,
      "step": 207485
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.221439838409424,
      "learning_rate": 1.4527590646602205e-05,
      "loss": 3.0564,
      "step": 207486
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.186933994293213,
      "learning_rate": 1.4526333162243364e-05,
      "loss": 2.979,
      "step": 207487
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9819889068603516,
      "learning_rate": 1.4525075730959502e-05,
      "loss": 2.7697,
      "step": 207488
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.760061502456665,
      "learning_rate": 1.4523818352750783e-05,
      "loss": 2.9888,
      "step": 207489
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1650519371032715,
      "learning_rate": 1.4522561027617508e-05,
      "loss": 2.9759,
      "step": 207490
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.5809216499328613,
      "learning_rate": 1.4521303755559876e-05,
      "loss": 2.892,
      "step": 207491
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.859705924987793,
      "learning_rate": 1.4520046536578122e-05,
      "loss": 2.8473,
      "step": 207492
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.8684933185577393,
      "learning_rate": 1.4518789370672445e-05,
      "loss": 2.8516,
      "step": 207493
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.202843189239502,
      "learning_rate": 1.4517532257843179e-05,
      "loss": 3.0178,
      "step": 207494
    },
    {
      "epoch": 2.7,
      "grad_norm": 5.527035713195801,
      "learning_rate": 1.4516275198090422e-05,
      "loss": 2.9727,
      "step": 207495
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.323974847793579,
      "learning_rate": 1.4515018191414506e-05,
      "loss": 2.8261,
      "step": 207496
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.493795394897461,
      "learning_rate": 1.4513761237815669e-05,
      "loss": 2.6927,
      "step": 207497
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.1975250244140625,
      "learning_rate": 1.4512504337294106e-05,
      "loss": 3.114,
      "step": 207498
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0778210163116455,
      "learning_rate": 1.4511247489849986e-05,
      "loss": 2.9662,
      "step": 207499
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.5763094425201416,
      "learning_rate": 1.4509990695483675e-05,
      "loss": 2.7716,
      "step": 207500
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.9188692569732666,
      "learning_rate": 1.450873395419534e-05,
      "loss": 2.9638,
      "step": 207501
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.020547389984131,
      "learning_rate": 1.4507477265985212e-05,
      "loss": 2.7995,
      "step": 207502
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.923116683959961,
      "learning_rate": 1.450622063085356e-05,
      "loss": 3.2049,
      "step": 207503
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.090682029724121,
      "learning_rate": 1.4504964048800583e-05,
      "loss": 2.9942,
      "step": 207504
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.370591640472412,
      "learning_rate": 1.450370751982648e-05,
      "loss": 2.8743,
      "step": 207505
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.789135694503784,
      "learning_rate": 1.4502451043931552e-05,
      "loss": 2.9993,
      "step": 207506
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.809412717819214,
      "learning_rate": 1.4501194621115997e-05,
      "loss": 2.9791,
      "step": 207507
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6733551025390625,
      "learning_rate": 1.4499938251380084e-05,
      "loss": 2.9936,
      "step": 207508
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.049424648284912,
      "learning_rate": 1.449868193472401e-05,
      "loss": 2.976,
      "step": 207509
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8799588680267334,
      "learning_rate": 1.4497425671148044e-05,
      "loss": 2.9943,
      "step": 207510
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9880800247192383,
      "learning_rate": 1.4496169460652352e-05,
      "loss": 2.9586,
      "step": 207511
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1397130489349365,
      "learning_rate": 1.4494913303237232e-05,
      "loss": 2.8952,
      "step": 207512
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.9513978958129883,
      "learning_rate": 1.4493657198902886e-05,
      "loss": 2.8527,
      "step": 207513
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.540665864944458,
      "learning_rate": 1.4492401147649579e-05,
      "loss": 2.8698,
      "step": 207514
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.202643632888794,
      "learning_rate": 1.4491145149477513e-05,
      "loss": 2.9874,
      "step": 207515
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.002603769302368,
      "learning_rate": 1.4489889204386951e-05,
      "loss": 2.8682,
      "step": 207516
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9503748416900635,
      "learning_rate": 1.4488633312378062e-05,
      "loss": 3.0452,
      "step": 207517
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.875572443008423,
      "learning_rate": 1.4487377473451178e-05,
      "loss": 2.8877,
      "step": 207518
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.771710157394409,
      "learning_rate": 1.44861216876064e-05,
      "loss": 2.925,
      "step": 207519
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.6302859783172607,
      "learning_rate": 1.4484865954844128e-05,
      "loss": 2.9261,
      "step": 207520
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0774149894714355,
      "learning_rate": 1.4483610275164459e-05,
      "loss": 2.8552,
      "step": 207521
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.165914535522461,
      "learning_rate": 1.4482354648567728e-05,
      "loss": 2.7265,
      "step": 207522
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.696155548095703,
      "learning_rate": 1.448109907505407e-05,
      "loss": 2.8075,
      "step": 207523
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1326334476470947,
      "learning_rate": 1.4479843554623783e-05,
      "loss": 2.8203,
      "step": 207524
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9766621589660645,
      "learning_rate": 1.4478588087277065e-05,
      "loss": 3.2563,
      "step": 207525
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1528263092041016,
      "learning_rate": 1.4477332673014186e-05,
      "loss": 3.0417,
      "step": 207526
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.197577714920044,
      "learning_rate": 1.4476077311835343e-05,
      "loss": 3.0091,
      "step": 207527
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8559181690216064,
      "learning_rate": 1.4474822003740837e-05,
      "loss": 2.9853,
      "step": 207528
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6182596683502197,
      "learning_rate": 1.4473566748730803e-05,
      "loss": 2.878,
      "step": 207529
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6932761669158936,
      "learning_rate": 1.4472311546805536e-05,
      "loss": 2.9104,
      "step": 207530
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.415564775466919,
      "learning_rate": 1.4471056397965209e-05,
      "loss": 2.7892,
      "step": 207531
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8261611461639404,
      "learning_rate": 1.4469801302210183e-05,
      "loss": 2.7925,
      "step": 207532
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8525984287261963,
      "learning_rate": 1.4468546259540525e-05,
      "loss": 2.883,
      "step": 207533
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7791848182678223,
      "learning_rate": 1.4467291269956671e-05,
      "loss": 2.8266,
      "step": 207534
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8526439666748047,
      "learning_rate": 1.4466036333458652e-05,
      "loss": 3.234,
      "step": 207535
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.7899787425994873,
      "learning_rate": 1.4464781450046803e-05,
      "loss": 2.7777,
      "step": 207536
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.10919189453125,
      "learning_rate": 1.4463526619721321e-05,
      "loss": 2.741,
      "step": 207537
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.7802798748016357,
      "learning_rate": 1.446227184248251e-05,
      "loss": 2.958,
      "step": 207538
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6843881607055664,
      "learning_rate": 1.4461017118330498e-05,
      "loss": 2.8613,
      "step": 207539
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.672236919403076,
      "learning_rate": 1.4459762447265588e-05,
      "loss": 3.1418,
      "step": 207540
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1603055000305176,
      "learning_rate": 1.4458507829288045e-05,
      "loss": 2.9074,
      "step": 207541
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8002724647521973,
      "learning_rate": 1.4457253264398005e-05,
      "loss": 2.9602,
      "step": 207542
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.860135078430176,
      "learning_rate": 1.4455998752595765e-05,
      "loss": 2.789,
      "step": 207543
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.871638536453247,
      "learning_rate": 1.445474429388156e-05,
      "loss": 2.7988,
      "step": 207544
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.907778739929199,
      "learning_rate": 1.4453489888255554e-05,
      "loss": 2.9579,
      "step": 207545
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.454355239868164,
      "learning_rate": 1.4452235535718082e-05,
      "loss": 2.953,
      "step": 207546
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0841522216796875,
      "learning_rate": 1.4450981236269343e-05,
      "loss": 2.9182,
      "step": 207547
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.704430341720581,
      "learning_rate": 1.4449726989909505e-05,
      "loss": 3.1252,
      "step": 207548
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8028812408447266,
      "learning_rate": 1.4448472796638899e-05,
      "loss": 3.0346,
      "step": 207549
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6843206882476807,
      "learning_rate": 1.4447218656457726e-05,
      "loss": 2.9102,
      "step": 207550
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.7630560398101807,
      "learning_rate": 1.444596456936612e-05,
      "loss": 3.1714,
      "step": 207551
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1971254348754883,
      "learning_rate": 1.4444710535364479e-05,
      "loss": 2.8506,
      "step": 207552
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1761300563812256,
      "learning_rate": 1.4443456554452937e-05,
      "loss": 2.7785,
      "step": 207553
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.780987501144409,
      "learning_rate": 1.4442202626631728e-05,
      "loss": 2.8211,
      "step": 207554
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0296473503112793,
      "learning_rate": 1.4440948751901116e-05,
      "loss": 3.1064,
      "step": 207555
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.5006520748138428,
      "learning_rate": 1.443969493026137e-05,
      "loss": 2.9967,
      "step": 207556
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7396862506866455,
      "learning_rate": 1.4438441161712589e-05,
      "loss": 2.8379,
      "step": 207557
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.080838680267334,
      "learning_rate": 1.4437187446255138e-05,
      "loss": 3.1322,
      "step": 207558
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.445004940032959,
      "learning_rate": 1.443593378388922e-05,
      "loss": 2.7656,
      "step": 207559
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.814332962036133,
      "learning_rate": 1.4434680174614998e-05,
      "loss": 3.3337,
      "step": 207560
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0053484439849854,
      "learning_rate": 1.4433426618432808e-05,
      "loss": 2.5981,
      "step": 207561
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8787074089050293,
      "learning_rate": 1.4432173115342782e-05,
      "loss": 2.9672,
      "step": 207562
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.159559488296509,
      "learning_rate": 1.4430919665345254e-05,
      "loss": 2.8605,
      "step": 207563
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2024710178375244,
      "learning_rate": 1.442966626844042e-05,
      "loss": 3.0393,
      "step": 207564
    },
    {
      "epoch": 2.7,
      "grad_norm": 6.976889133453369,
      "learning_rate": 1.4428412924628485e-05,
      "loss": 2.8995,
      "step": 207565
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.774049758911133,
      "learning_rate": 1.4427159633909679e-05,
      "loss": 2.8386,
      "step": 207566
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0182883739471436,
      "learning_rate": 1.442590639628427e-05,
      "loss": 2.6858,
      "step": 207567
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.043398380279541,
      "learning_rate": 1.4424653211752457e-05,
      "loss": 3.0853,
      "step": 207568
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.042184352874756,
      "learning_rate": 1.4423400080314507e-05,
      "loss": 2.928,
      "step": 207569
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.124005079269409,
      "learning_rate": 1.4422147001970652e-05,
      "loss": 2.9095,
      "step": 207570
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9021332263946533,
      "learning_rate": 1.4420893976721127e-05,
      "loss": 2.9131,
      "step": 207571
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.426795482635498,
      "learning_rate": 1.4419641004566096e-05,
      "loss": 2.7886,
      "step": 207572
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.954617738723755,
      "learning_rate": 1.4418388085505861e-05,
      "loss": 2.9914,
      "step": 207573
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7088303565979004,
      "learning_rate": 1.4417135219540622e-05,
      "loss": 2.7745,
      "step": 207574
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.3775289058685303,
      "learning_rate": 1.4415882406670675e-05,
      "loss": 3.0866,
      "step": 207575
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7763988971710205,
      "learning_rate": 1.4414629646896192e-05,
      "loss": 2.9386,
      "step": 207576
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7796504497528076,
      "learning_rate": 1.4413376940217436e-05,
      "loss": 2.7825,
      "step": 207577
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.078003406524658,
      "learning_rate": 1.4412124286634575e-05,
      "loss": 3.0913,
      "step": 207578
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3411643505096436,
      "learning_rate": 1.4410871686147908e-05,
      "loss": 3.0165,
      "step": 207579
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.5578572750091553,
      "learning_rate": 1.4409619138757633e-05,
      "loss": 2.7852,
      "step": 207580
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.992283344268799,
      "learning_rate": 1.4408366644464053e-05,
      "loss": 3.0147,
      "step": 207581
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.515491485595703,
      "learning_rate": 1.4407114203267334e-05,
      "loss": 2.7956,
      "step": 207582
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.174915313720703,
      "learning_rate": 1.4405861815167707e-05,
      "loss": 2.9968,
      "step": 207583
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8449058532714844,
      "learning_rate": 1.4404609480165408e-05,
      "loss": 2.9563,
      "step": 207584
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7937421798706055,
      "learning_rate": 1.4403357198260701e-05,
      "loss": 2.9007,
      "step": 207585
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.206721305847168,
      "learning_rate": 1.4402104969453787e-05,
      "loss": 2.8707,
      "step": 207586
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.886786460876465,
      "learning_rate": 1.4400852793744932e-05,
      "loss": 2.7021,
      "step": 207587
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.170703172683716,
      "learning_rate": 1.4399600671134337e-05,
      "loss": 2.7688,
      "step": 207588
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.9432384967803955,
      "learning_rate": 1.43983486016223e-05,
      "loss": 2.958,
      "step": 207589
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6042635440826416,
      "learning_rate": 1.4397096585208923e-05,
      "loss": 3.1267,
      "step": 207590
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.951630115509033,
      "learning_rate": 1.439584462189457e-05,
      "loss": 2.9977,
      "step": 207591
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7006986141204834,
      "learning_rate": 1.4394592711679375e-05,
      "loss": 2.7629,
      "step": 207592
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.559696912765503,
      "learning_rate": 1.439334085456364e-05,
      "loss": 3.0082,
      "step": 207593
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.96797776222229,
      "learning_rate": 1.4392089050547562e-05,
      "loss": 3.2294,
      "step": 207594
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.326904535293579,
      "learning_rate": 1.4390837299631441e-05,
      "loss": 2.8052,
      "step": 207595
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1718926429748535,
      "learning_rate": 1.4389585601815378e-05,
      "loss": 2.8525,
      "step": 207596
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6248111724853516,
      "learning_rate": 1.438833395709974e-05,
      "loss": 2.8759,
      "step": 207597
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.66283917427063,
      "learning_rate": 1.438708236548466e-05,
      "loss": 2.9911,
      "step": 207598
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8320491313934326,
      "learning_rate": 1.4385830826970434e-05,
      "loss": 3.1781,
      "step": 207599
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6660680770874023,
      "learning_rate": 1.4384579341557234e-05,
      "loss": 3.1054,
      "step": 207600
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6160340309143066,
      "learning_rate": 1.4383327909245424e-05,
      "loss": 3.0477,
      "step": 207601
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.5871880054473877,
      "learning_rate": 1.438207653003507e-05,
      "loss": 3.0101,
      "step": 207602
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.988760232925415,
      "learning_rate": 1.4380825203926505e-05,
      "loss": 2.9242,
      "step": 207603
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3819336891174316,
      "learning_rate": 1.4379573930919929e-05,
      "loss": 2.7815,
      "step": 207604
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2567203044891357,
      "learning_rate": 1.4378322711015578e-05,
      "loss": 3.0105,
      "step": 207605
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.206817865371704,
      "learning_rate": 1.437707154421368e-05,
      "loss": 2.8805,
      "step": 207606
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0695066452026367,
      "learning_rate": 1.437582043051454e-05,
      "loss": 3.2088,
      "step": 207607
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1370439529418945,
      "learning_rate": 1.4374569369918255e-05,
      "loss": 2.9847,
      "step": 207608
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7470204830169678,
      "learning_rate": 1.4373318362425157e-05,
      "loss": 2.9464,
      "step": 207609
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.881173849105835,
      "learning_rate": 1.4372067408035414e-05,
      "loss": 3.1082,
      "step": 207610
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.906550884246826,
      "learning_rate": 1.4370816506749328e-05,
      "loss": 2.8302,
      "step": 207611
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.4664416313171387,
      "learning_rate": 1.436956565856706e-05,
      "loss": 2.9711,
      "step": 207612
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.0024285316467285,
      "learning_rate": 1.4368314863488984e-05,
      "loss": 2.9272,
      "step": 207613
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9629428386688232,
      "learning_rate": 1.4367064121515126e-05,
      "loss": 2.9972,
      "step": 207614
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1692309379577637,
      "learning_rate": 1.4365813432645889e-05,
      "loss": 3.0469,
      "step": 207615
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.069809675216675,
      "learning_rate": 1.4364562796881374e-05,
      "loss": 2.7905,
      "step": 207616
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9387800693511963,
      "learning_rate": 1.4363312214221945e-05,
      "loss": 3.0021,
      "step": 207617
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.18904709815979,
      "learning_rate": 1.4362061684667703e-05,
      "loss": 2.8345,
      "step": 207618
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.9324934482574463,
      "learning_rate": 1.4360811208219047e-05,
      "loss": 2.7705,
      "step": 207619
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6322360038757324,
      "learning_rate": 1.4359560784876012e-05,
      "loss": 2.9523,
      "step": 207620
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.5357439517974854,
      "learning_rate": 1.4358310414638962e-05,
      "loss": 2.7138,
      "step": 207621
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3391757011413574,
      "learning_rate": 1.4357060097508067e-05,
      "loss": 2.764,
      "step": 207622
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.594059944152832,
      "learning_rate": 1.4355809833483622e-05,
      "loss": 2.9261,
      "step": 207623
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.345881462097168,
      "learning_rate": 1.4354559622565765e-05,
      "loss": 2.8761,
      "step": 207624
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.569481134414673,
      "learning_rate": 1.4353309464754859e-05,
      "loss": 3.0962,
      "step": 207625
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.343883514404297,
      "learning_rate": 1.4352059360051038e-05,
      "loss": 2.81,
      "step": 207626
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.902324914932251,
      "learning_rate": 1.4350809308454569e-05,
      "loss": 2.8969,
      "step": 207627
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.6122372150421143,
      "learning_rate": 1.4349559309965653e-05,
      "loss": 2.9917,
      "step": 207628
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8719582557678223,
      "learning_rate": 1.4348309364584587e-05,
      "loss": 2.8764,
      "step": 207629
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.985355854034424,
      "learning_rate": 1.4347059472311505e-05,
      "loss": 2.7552,
      "step": 207630
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.005220890045166,
      "learning_rate": 1.4345809633146744e-05,
      "loss": 2.9608,
      "step": 207631
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0325889587402344,
      "learning_rate": 1.4344559847090464e-05,
      "loss": 3.1362,
      "step": 207632
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.5917582511901855,
      "learning_rate": 1.4343310114142903e-05,
      "loss": 2.9025,
      "step": 207633
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9166557788848877,
      "learning_rate": 1.4342060434304358e-05,
      "loss": 2.886,
      "step": 207634
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.078555107116699,
      "learning_rate": 1.4340810807575031e-05,
      "loss": 2.8318,
      "step": 207635
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.015156269073486,
      "learning_rate": 1.4339561233955088e-05,
      "loss": 2.7929,
      "step": 207636
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.6020705699920654,
      "learning_rate": 1.4338311713444828e-05,
      "loss": 2.8944,
      "step": 207637
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2548842430114746,
      "learning_rate": 1.4337062246044484e-05,
      "loss": 2.8903,
      "step": 207638
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.806452989578247,
      "learning_rate": 1.4335812831754223e-05,
      "loss": 2.9224,
      "step": 207639
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7580361366271973,
      "learning_rate": 1.4334563470574411e-05,
      "loss": 2.7238,
      "step": 207640
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1733956336975098,
      "learning_rate": 1.433331416250515e-05,
      "loss": 2.7295,
      "step": 207641
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.193403720855713,
      "learning_rate": 1.4332064907546703e-05,
      "loss": 2.8443,
      "step": 207642
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.213205337524414,
      "learning_rate": 1.4330815705699338e-05,
      "loss": 3.1726,
      "step": 207643
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.073646068572998,
      "learning_rate": 1.4329566556963256e-05,
      "loss": 2.8413,
      "step": 207644
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0857489109039307,
      "learning_rate": 1.432831746133869e-05,
      "loss": 3.045,
      "step": 207645
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.032609701156616,
      "learning_rate": 1.4327068418825905e-05,
      "loss": 2.8702,
      "step": 207646
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1422181129455566,
      "learning_rate": 1.43258194294251e-05,
      "loss": 2.8086,
      "step": 207647
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.039325714111328,
      "learning_rate": 1.4324570493136512e-05,
      "loss": 2.916,
      "step": 207648
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1067049503326416,
      "learning_rate": 1.4323321609960404e-05,
      "loss": 3.1131,
      "step": 207649
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0587339401245117,
      "learning_rate": 1.4322072779896977e-05,
      "loss": 2.9538,
      "step": 207650
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.83388614654541,
      "learning_rate": 1.4320824002946429e-05,
      "loss": 3.0333,
      "step": 207651
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.127617597579956,
      "learning_rate": 1.4319575279109097e-05,
      "loss": 3.0274,
      "step": 207652
    },
    {
      "epoch": 2.7,
      "grad_norm": 6.146833896636963,
      "learning_rate": 1.4318326608385078e-05,
      "loss": 2.9659,
      "step": 207653
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.936985969543457,
      "learning_rate": 1.431707799077474e-05,
      "loss": 2.8542,
      "step": 207654
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2293434143066406,
      "learning_rate": 1.4315829426278213e-05,
      "loss": 3.0249,
      "step": 207655
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9719057083129883,
      "learning_rate": 1.4314580914895801e-05,
      "loss": 3.1964,
      "step": 207656
    },
    {
      "epoch": 2.7,
      "grad_norm": 8.649859428405762,
      "learning_rate": 1.4313332456627635e-05,
      "loss": 2.7803,
      "step": 207657
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.172537326812744,
      "learning_rate": 1.4312084051474081e-05,
      "loss": 2.9935,
      "step": 207658
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.397221565246582,
      "learning_rate": 1.4310835699435241e-05,
      "loss": 3.0785,
      "step": 207659
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.7326853275299072,
      "learning_rate": 1.4309587400511447e-05,
      "loss": 2.9755,
      "step": 207660
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.684576988220215,
      "learning_rate": 1.4308339154702863e-05,
      "loss": 2.9465,
      "step": 207661
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.933974266052246,
      "learning_rate": 1.430709096200986e-05,
      "loss": 3.2021,
      "step": 207662
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9498634338378906,
      "learning_rate": 1.4305842822432433e-05,
      "loss": 2.8363,
      "step": 207663
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.101820230484009,
      "learning_rate": 1.430459473597102e-05,
      "loss": 2.8239,
      "step": 207664
    },
    {
      "epoch": 2.7,
      "grad_norm": 5.193875312805176,
      "learning_rate": 1.4303346702625717e-05,
      "loss": 2.9096,
      "step": 207665
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.5910215377807617,
      "learning_rate": 1.430209872239686e-05,
      "loss": 3.0892,
      "step": 207666
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.4291512966156006,
      "learning_rate": 1.430085079528458e-05,
      "loss": 2.8075,
      "step": 207667
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.401989459991455,
      "learning_rate": 1.4299602921289244e-05,
      "loss": 2.7162,
      "step": 207668
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.5640053749084473,
      "learning_rate": 1.4298355100410952e-05,
      "loss": 2.9716,
      "step": 207669
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1045010089874268,
      "learning_rate": 1.4297107332650004e-05,
      "loss": 2.9169,
      "step": 207670
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.984462261199951,
      "learning_rate": 1.42958596180066e-05,
      "loss": 2.7656,
      "step": 207671
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.9530913829803467,
      "learning_rate": 1.4294611956481006e-05,
      "loss": 2.6516,
      "step": 207672
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7335147857666016,
      "learning_rate": 1.4293364348073422e-05,
      "loss": 2.9517,
      "step": 207673
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.828958749771118,
      "learning_rate": 1.4292116792784147e-05,
      "loss": 3.0791,
      "step": 207674
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.952083110809326,
      "learning_rate": 1.4290869290613315e-05,
      "loss": 2.9052,
      "step": 207675
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.67753267288208,
      "learning_rate": 1.4289621841561227e-05,
      "loss": 2.8836,
      "step": 207676
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.4329967498779297,
      "learning_rate": 1.4288374445628048e-05,
      "loss": 2.9516,
      "step": 207677
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0291037559509277,
      "learning_rate": 1.4287127102814079e-05,
      "loss": 2.7361,
      "step": 207678
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8151562213897705,
      "learning_rate": 1.4285879813119484e-05,
      "loss": 2.8449,
      "step": 207679
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.694140672683716,
      "learning_rate": 1.428463257654463e-05,
      "loss": 2.8752,
      "step": 207680
    },
    {
      "epoch": 2.7,
      "grad_norm": 6.213022708892822,
      "learning_rate": 1.4283385393089587e-05,
      "loss": 2.9535,
      "step": 207681
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2000834941864014,
      "learning_rate": 1.4282138262754683e-05,
      "loss": 2.8907,
      "step": 207682
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.077998638153076,
      "learning_rate": 1.4280891185540088e-05,
      "loss": 2.8743,
      "step": 207683
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.688549518585205,
      "learning_rate": 1.4279644161446135e-05,
      "loss": 2.9043,
      "step": 207684
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.857393264770508,
      "learning_rate": 1.4278397190472923e-05,
      "loss": 2.8405,
      "step": 207685
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0362436771392822,
      "learning_rate": 1.4277150272620818e-05,
      "loss": 2.818,
      "step": 207686
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2022998332977295,
      "learning_rate": 1.4275903407889922e-05,
      "loss": 2.8411,
      "step": 207687
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7555489540100098,
      "learning_rate": 1.4274656596280565e-05,
      "loss": 2.6571,
      "step": 207688
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.304696083068848,
      "learning_rate": 1.4273409837792883e-05,
      "loss": 2.8661,
      "step": 207689
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.742342233657837,
      "learning_rate": 1.427216313242724e-05,
      "loss": 3.0506,
      "step": 207690
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.5264081954956055,
      "learning_rate": 1.4270916480183736e-05,
      "loss": 2.9773,
      "step": 207691
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.635730028152466,
      "learning_rate": 1.426966988106274e-05,
      "loss": 2.7374,
      "step": 207692
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.5110387802124023,
      "learning_rate": 1.426842333506435e-05,
      "loss": 3.1006,
      "step": 207693
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.072798013687134,
      "learning_rate": 1.4267176842188865e-05,
      "loss": 2.9783,
      "step": 207694
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.312268257141113,
      "learning_rate": 1.4265930402436487e-05,
      "loss": 2.7834,
      "step": 207695
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1007320880889893,
      "learning_rate": 1.4264684015807482e-05,
      "loss": 2.9186,
      "step": 207696
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0305027961730957,
      "learning_rate": 1.4263437682302048e-05,
      "loss": 2.7457,
      "step": 207697
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.386472463607788,
      "learning_rate": 1.4262191401920519e-05,
      "loss": 2.9905,
      "step": 207698
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.92744779586792,
      "learning_rate": 1.426094517466293e-05,
      "loss": 3.1366,
      "step": 207699
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.236557722091675,
      "learning_rate": 1.4259699000529711e-05,
      "loss": 2.9101,
      "step": 207700
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.669159412384033,
      "learning_rate": 1.4258452879520933e-05,
      "loss": 2.8349,
      "step": 207701
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8563737869262695,
      "learning_rate": 1.4257206811636956e-05,
      "loss": 3.0744,
      "step": 207702
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.710724115371704,
      "learning_rate": 1.4255960796877918e-05,
      "loss": 2.9439,
      "step": 207703
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8169851303100586,
      "learning_rate": 1.4254714835244152e-05,
      "loss": 2.9631,
      "step": 207704
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.336691379547119,
      "learning_rate": 1.4253468926735757e-05,
      "loss": 2.9805,
      "step": 207705
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.9839653968811035,
      "learning_rate": 1.4252223071353097e-05,
      "loss": 2.7806,
      "step": 207706
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3566977977752686,
      "learning_rate": 1.4250977269096275e-05,
      "loss": 2.8468,
      "step": 207707
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1801955699920654,
      "learning_rate": 1.4249731519965657e-05,
      "loss": 2.9285,
      "step": 207708
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2564802169799805,
      "learning_rate": 1.4248485823961342e-05,
      "loss": 3.1605,
      "step": 207709
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0708675384521484,
      "learning_rate": 1.4247240181083697e-05,
      "loss": 2.8843,
      "step": 207710
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.7340164184570312,
      "learning_rate": 1.4245994591332854e-05,
      "loss": 2.7212,
      "step": 207711
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.579054117202759,
      "learning_rate": 1.4244749054709082e-05,
      "loss": 3.0811,
      "step": 207712
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.2154455184936523,
      "learning_rate": 1.4243503571212579e-05,
      "loss": 3.0065,
      "step": 207713
    },
    {
      "epoch": 2.7,
      "grad_norm": 4.803467273712158,
      "learning_rate": 1.4242258140843643e-05,
      "loss": 2.7994,
      "step": 207714
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0856800079345703,
      "learning_rate": 1.424101276360241e-05,
      "loss": 2.9427,
      "step": 207715
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.817319869995117,
      "learning_rate": 1.4239767439489213e-05,
      "loss": 3.2582,
      "step": 207716
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0085184574127197,
      "learning_rate": 1.4238522168504218e-05,
      "loss": 3.0798,
      "step": 207717
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0756123065948486,
      "learning_rate": 1.4237276950647724e-05,
      "loss": 2.7907,
      "step": 207718
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.552002191543579,
      "learning_rate": 1.4236031785919832e-05,
      "loss": 2.7969,
      "step": 207719
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6772804260253906,
      "learning_rate": 1.4234786674320908e-05,
      "loss": 2.8987,
      "step": 207720
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.18184232711792,
      "learning_rate": 1.4233541615851085e-05,
      "loss": 2.9984,
      "step": 207721
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.963632345199585,
      "learning_rate": 1.4232296610510696e-05,
      "loss": 2.7763,
      "step": 207722
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9411613941192627,
      "learning_rate": 1.4231051658299908e-05,
      "loss": 3.0261,
      "step": 207723
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.69978404045105,
      "learning_rate": 1.4229806759218921e-05,
      "loss": 3.039,
      "step": 207724
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7549734115600586,
      "learning_rate": 1.4228561913268065e-05,
      "loss": 2.8957,
      "step": 207725
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.073214054107666,
      "learning_rate": 1.4227317120447479e-05,
      "loss": 3.0462,
      "step": 207726
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.0092482566833496,
      "learning_rate": 1.4226072380757425e-05,
      "loss": 2.834,
      "step": 207727
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.768338918685913,
      "learning_rate": 1.4224827694198171e-05,
      "loss": 3.0636,
      "step": 207728
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.060708522796631,
      "learning_rate": 1.4223583060769883e-05,
      "loss": 3.3828,
      "step": 207729
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.844688653945923,
      "learning_rate": 1.4222338480472827e-05,
      "loss": 3.0382,
      "step": 207730
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.6803362369537354,
      "learning_rate": 1.4221093953307238e-05,
      "loss": 2.9426,
      "step": 207731
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.4826672077178955,
      "learning_rate": 1.4219849479273349e-05,
      "loss": 2.842,
      "step": 207732
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8996872901916504,
      "learning_rate": 1.421860505837139e-05,
      "loss": 2.7295,
      "step": 207733
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.1347873210906982,
      "learning_rate": 1.4217360690601597e-05,
      "loss": 2.7082,
      "step": 207734
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.742642402648926,
      "learning_rate": 1.421611637596417e-05,
      "loss": 2.6204,
      "step": 207735
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.918726921081543,
      "learning_rate": 1.4214872114459342e-05,
      "loss": 3.0573,
      "step": 207736
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.3565473556518555,
      "learning_rate": 1.421362790608741e-05,
      "loss": 3.1871,
      "step": 207737
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.441415548324585,
      "learning_rate": 1.421238375084851e-05,
      "loss": 2.8599,
      "step": 207738
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9871363639831543,
      "learning_rate": 1.4211139648742941e-05,
      "loss": 2.9224,
      "step": 207739
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.8866353034973145,
      "learning_rate": 1.4209895599770937e-05,
      "loss": 2.7758,
      "step": 207740
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.5411510467529297,
      "learning_rate": 1.420865160393273e-05,
      "loss": 2.8412,
      "step": 207741
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.4410409927368164,
      "learning_rate": 1.4207407661228454e-05,
      "loss": 3.1311,
      "step": 207742
    },
    {
      "epoch": 2.7,
      "grad_norm": 3.4081387519836426,
      "learning_rate": 1.4206163771658474e-05,
      "loss": 2.6545,
      "step": 207743
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.9829201698303223,
      "learning_rate": 1.4204919935222925e-05,
      "loss": 2.946,
      "step": 207744
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.670527219772339,
      "learning_rate": 1.4203676151922105e-05,
      "loss": 2.8359,
      "step": 207745
    },
    {
      "epoch": 2.7,
      "grad_norm": 2.7731363773345947,
      "learning_rate": 1.4202432421756182e-05,
      "loss": 2.9385,
      "step": 207746
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.780723810195923,
      "learning_rate": 1.4201188744725488e-05,
      "loss": 2.7484,
      "step": 207747
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.5046887397766113,
      "learning_rate": 1.4199945120830125e-05,
      "loss": 2.8021,
      "step": 207748
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.784517288208008,
      "learning_rate": 1.4198701550070424e-05,
      "loss": 2.9047,
      "step": 207749
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0284178256988525,
      "learning_rate": 1.4197458032446518e-05,
      "loss": 3.0012,
      "step": 207750
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.771836996078491,
      "learning_rate": 1.4196214567958774e-05,
      "loss": 2.969,
      "step": 207751
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2311911582946777,
      "learning_rate": 1.4194971156607293e-05,
      "loss": 2.7489,
      "step": 207752
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.023809909820557,
      "learning_rate": 1.4193727798392441e-05,
      "loss": 2.8331,
      "step": 207753
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6222012042999268,
      "learning_rate": 1.4192484493314282e-05,
      "loss": 2.9712,
      "step": 207754
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.7713537216186523,
      "learning_rate": 1.4191241241373186e-05,
      "loss": 2.8501,
      "step": 207755
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.222638130187988,
      "learning_rate": 1.4189998042569283e-05,
      "loss": 3.1054,
      "step": 207756
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3337979316711426,
      "learning_rate": 1.418875489690291e-05,
      "loss": 3.1078,
      "step": 207757
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.70634388923645,
      "learning_rate": 1.4187511804374196e-05,
      "loss": 2.8034,
      "step": 207758
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.753594160079956,
      "learning_rate": 1.4186268764983511e-05,
      "loss": 3.1695,
      "step": 207759
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.017274856567383,
      "learning_rate": 1.4185025778730886e-05,
      "loss": 2.9308,
      "step": 207760
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.5175044536590576,
      "learning_rate": 1.418378284561672e-05,
      "loss": 2.6875,
      "step": 207761
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2883381843566895,
      "learning_rate": 1.4182539965641148e-05,
      "loss": 3.0256,
      "step": 207762
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.950287342071533,
      "learning_rate": 1.4181297138804471e-05,
      "loss": 2.8171,
      "step": 207763
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.941277503967285,
      "learning_rate": 1.4180054365106819e-05,
      "loss": 2.9087,
      "step": 207764
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3356823921203613,
      "learning_rate": 1.4178811644548594e-05,
      "loss": 2.8822,
      "step": 207765
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.396846294403076,
      "learning_rate": 1.4177568977129861e-05,
      "loss": 2.7897,
      "step": 207766
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2517566680908203,
      "learning_rate": 1.417632636285092e-05,
      "loss": 3.1668,
      "step": 207767
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.151130199432373,
      "learning_rate": 1.4175083801711973e-05,
      "loss": 2.7875,
      "step": 207768
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.901723861694336,
      "learning_rate": 1.4173841293713316e-05,
      "loss": 2.7689,
      "step": 207769
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7895562648773193,
      "learning_rate": 1.4172598838855086e-05,
      "loss": 2.9324,
      "step": 207770
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.966844081878662,
      "learning_rate": 1.4171356437137682e-05,
      "loss": 3.051,
      "step": 207771
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0136892795562744,
      "learning_rate": 1.4170114088561101e-05,
      "loss": 3.3725,
      "step": 207772
    },
    {
      "epoch": 2.71,
      "grad_norm": 5.068337917327881,
      "learning_rate": 1.4168871793125714e-05,
      "loss": 3.0519,
      "step": 207773
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.654372453689575,
      "learning_rate": 1.4167629550831716e-05,
      "loss": 2.8665,
      "step": 207774
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1166112422943115,
      "learning_rate": 1.416638736167941e-05,
      "loss": 2.803,
      "step": 207775
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.489145040512085,
      "learning_rate": 1.4165145225668895e-05,
      "loss": 3.0867,
      "step": 207776
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9888148307800293,
      "learning_rate": 1.416390314280057e-05,
      "loss": 2.8199,
      "step": 207777
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6767351627349854,
      "learning_rate": 1.4162661113074503e-05,
      "loss": 2.9837,
      "step": 207778
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.54012393951416,
      "learning_rate": 1.4161419136490992e-05,
      "loss": 3.0445,
      "step": 207779
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3559162616729736,
      "learning_rate": 1.4160177213050273e-05,
      "loss": 2.9744,
      "step": 207780
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.147423028945923,
      "learning_rate": 1.4158935342752575e-05,
      "loss": 2.9451,
      "step": 207781
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.880105495452881,
      "learning_rate": 1.4157693525598102e-05,
      "loss": 3.0114,
      "step": 207782
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8197009563446045,
      "learning_rate": 1.4156451761587218e-05,
      "loss": 2.9941,
      "step": 207783
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6143670082092285,
      "learning_rate": 1.4155210050719923e-05,
      "loss": 2.691,
      "step": 207784
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.803828477859497,
      "learning_rate": 1.4153968392996651e-05,
      "loss": 2.8583,
      "step": 207785
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.69736909866333,
      "learning_rate": 1.4152726788417468e-05,
      "loss": 2.8515,
      "step": 207786
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.439876556396484,
      "learning_rate": 1.415148523698274e-05,
      "loss": 2.9619,
      "step": 207787
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.327713966369629,
      "learning_rate": 1.4150243738692635e-05,
      "loss": 2.916,
      "step": 207788
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8944807052612305,
      "learning_rate": 1.4149002293547418e-05,
      "loss": 2.8682,
      "step": 207789
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2235045433044434,
      "learning_rate": 1.4147760901547289e-05,
      "loss": 3.2504,
      "step": 207790
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.743771553039551,
      "learning_rate": 1.4146519562692482e-05,
      "loss": 2.934,
      "step": 207791
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.5956015586853027,
      "learning_rate": 1.414527827698323e-05,
      "loss": 2.9182,
      "step": 207792
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7376837730407715,
      "learning_rate": 1.4144037044419765e-05,
      "loss": 2.8609,
      "step": 207793
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6723365783691406,
      "learning_rate": 1.4142795865002322e-05,
      "loss": 2.8279,
      "step": 207794
    },
    {
      "epoch": 2.71,
      "grad_norm": 5.78145694732666,
      "learning_rate": 1.4141554738731131e-05,
      "loss": 2.8811,
      "step": 207795
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.210836887359619,
      "learning_rate": 1.4140313665606428e-05,
      "loss": 2.9564,
      "step": 207796
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8332738876342773,
      "learning_rate": 1.4139072645628446e-05,
      "loss": 2.917,
      "step": 207797
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7317087650299072,
      "learning_rate": 1.413783167879735e-05,
      "loss": 2.8071,
      "step": 207798
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.349138021469116,
      "learning_rate": 1.4136590765113476e-05,
      "loss": 2.6426,
      "step": 207799
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.007798433303833,
      "learning_rate": 1.4135349904576987e-05,
      "loss": 2.9224,
      "step": 207800
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9020073413848877,
      "learning_rate": 1.413410909718815e-05,
      "loss": 2.9917,
      "step": 207801
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8670620918273926,
      "learning_rate": 1.4132868342947202e-05,
      "loss": 2.8891,
      "step": 207802
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9643990993499756,
      "learning_rate": 1.4131627641854304e-05,
      "loss": 2.7287,
      "step": 207803
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3840012550354004,
      "learning_rate": 1.4130386993909725e-05,
      "loss": 2.8194,
      "step": 207804
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3833413124084473,
      "learning_rate": 1.4129146399113733e-05,
      "loss": 3.018,
      "step": 207805
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.546213388442993,
      "learning_rate": 1.4127905857466493e-05,
      "loss": 2.8265,
      "step": 207806
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9896655082702637,
      "learning_rate": 1.4126665368968337e-05,
      "loss": 3.1628,
      "step": 207807
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1731958389282227,
      "learning_rate": 1.41254249336194e-05,
      "loss": 2.9082,
      "step": 207808
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0333285331726074,
      "learning_rate": 1.4124184551419915e-05,
      "loss": 2.7392,
      "step": 207809
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.612761974334717,
      "learning_rate": 1.4122944222370181e-05,
      "loss": 2.9103,
      "step": 207810
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.685330629348755,
      "learning_rate": 1.4121703946470364e-05,
      "loss": 2.8281,
      "step": 207811
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9534707069396973,
      "learning_rate": 1.4120463723720698e-05,
      "loss": 3.0949,
      "step": 207812
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.861687421798706,
      "learning_rate": 1.4119223554121483e-05,
      "loss": 2.7474,
      "step": 207813
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.9686903953552246,
      "learning_rate": 1.4117983437672886e-05,
      "loss": 2.8192,
      "step": 207814
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8377702236175537,
      "learning_rate": 1.4116743374375106e-05,
      "loss": 2.7875,
      "step": 207815
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.71927547454834,
      "learning_rate": 1.4115503364228476e-05,
      "loss": 2.9294,
      "step": 207816
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.935445785522461,
      "learning_rate": 1.4114263407233162e-05,
      "loss": 2.8747,
      "step": 207817
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2818117141723633,
      "learning_rate": 1.4113023503389365e-05,
      "loss": 2.868,
      "step": 207818
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.332172155380249,
      "learning_rate": 1.4111783652697383e-05,
      "loss": 2.8644,
      "step": 207819
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0116360187530518,
      "learning_rate": 1.4110543855157453e-05,
      "loss": 2.9999,
      "step": 207820
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.81422758102417,
      "learning_rate": 1.4109304110769704e-05,
      "loss": 2.8604,
      "step": 207821
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1412150859832764,
      "learning_rate": 1.4108064419534471e-05,
      "loss": 3.0749,
      "step": 207822
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9015235900878906,
      "learning_rate": 1.4106824781451886e-05,
      "loss": 2.963,
      "step": 207823
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.143399477005005,
      "learning_rate": 1.4105585196522318e-05,
      "loss": 2.8132,
      "step": 207824
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.017204761505127,
      "learning_rate": 1.4104345664745897e-05,
      "loss": 2.6953,
      "step": 207825
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1007370948791504,
      "learning_rate": 1.410310618612286e-05,
      "loss": 2.9226,
      "step": 207826
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.615100383758545,
      "learning_rate": 1.4101866760653435e-05,
      "loss": 3.0884,
      "step": 207827
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.794801712036133,
      "learning_rate": 1.4100627388337925e-05,
      "loss": 2.8751,
      "step": 207828
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7405474185943604,
      "learning_rate": 1.4099388069176432e-05,
      "loss": 2.9963,
      "step": 207829
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7317349910736084,
      "learning_rate": 1.4098148803169318e-05,
      "loss": 2.9992,
      "step": 207830
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.193561315536499,
      "learning_rate": 1.4096909590316719e-05,
      "loss": 2.9005,
      "step": 207831
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.4466466903686523,
      "learning_rate": 1.4095670430618966e-05,
      "loss": 3.0846,
      "step": 207832
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.883330821990967,
      "learning_rate": 1.4094431324076127e-05,
      "loss": 2.9881,
      "step": 207833
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8809754848480225,
      "learning_rate": 1.4093192270688603e-05,
      "loss": 2.9361,
      "step": 207834
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.663250207901001,
      "learning_rate": 1.4091953270456524e-05,
      "loss": 2.7884,
      "step": 207835
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0480005741119385,
      "learning_rate": 1.409071432338016e-05,
      "loss": 2.7781,
      "step": 207836
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9988718032836914,
      "learning_rate": 1.4089475429459673e-05,
      "loss": 2.6923,
      "step": 207837
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.617410182952881,
      "learning_rate": 1.4088236588695468e-05,
      "loss": 2.976,
      "step": 207838
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.721713542938232,
      "learning_rate": 1.4086997801087574e-05,
      "loss": 3.1441,
      "step": 207839
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.937130928039551,
      "learning_rate": 1.4085759066636327e-05,
      "loss": 2.825,
      "step": 207840
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.569761276245117,
      "learning_rate": 1.4084520385341891e-05,
      "loss": 2.8644,
      "step": 207841
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.934870958328247,
      "learning_rate": 1.4083281757204567e-05,
      "loss": 2.6532,
      "step": 207842
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.7508599758148193,
      "learning_rate": 1.4082043182224555e-05,
      "loss": 2.8692,
      "step": 207843
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.424405336380005,
      "learning_rate": 1.4080804660402156e-05,
      "loss": 2.9911,
      "step": 207844
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9526681900024414,
      "learning_rate": 1.4079566191737434e-05,
      "loss": 2.8905,
      "step": 207845
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3265268802642822,
      "learning_rate": 1.407832777623079e-05,
      "loss": 2.8972,
      "step": 207846
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.298943281173706,
      "learning_rate": 1.4077089413882325e-05,
      "loss": 3.0301,
      "step": 207847
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.750319719314575,
      "learning_rate": 1.4075851104692337e-05,
      "loss": 2.9205,
      "step": 207848
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.4465038776397705,
      "learning_rate": 1.407461284866106e-05,
      "loss": 2.89,
      "step": 207849
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.172318935394287,
      "learning_rate": 1.4073374645788726e-05,
      "loss": 2.753,
      "step": 207850
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6946685314178467,
      "learning_rate": 1.4072136496075537e-05,
      "loss": 2.8039,
      "step": 207851
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0462582111358643,
      "learning_rate": 1.4070898399521723e-05,
      "loss": 2.7426,
      "step": 207852
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8915083408355713,
      "learning_rate": 1.406966035612752e-05,
      "loss": 3.1288,
      "step": 207853
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1064682006835938,
      "learning_rate": 1.4068422365893194e-05,
      "loss": 3.0761,
      "step": 207854
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.825111150741577,
      "learning_rate": 1.4067184428818911e-05,
      "loss": 2.7137,
      "step": 207855
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.943035364151001,
      "learning_rate": 1.4065946544904971e-05,
      "loss": 2.7551,
      "step": 207856
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.14201021194458,
      "learning_rate": 1.4064708714151573e-05,
      "loss": 3.2078,
      "step": 207857
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8943803310394287,
      "learning_rate": 1.4063470936558919e-05,
      "loss": 2.8255,
      "step": 207858
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0050578117370605,
      "learning_rate": 1.406223321212724e-05,
      "loss": 2.8867,
      "step": 207859
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.228882074356079,
      "learning_rate": 1.4060995540856835e-05,
      "loss": 2.8359,
      "step": 207860
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1580095291137695,
      "learning_rate": 1.4059757922747838e-05,
      "loss": 2.833,
      "step": 207861
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.998640298843384,
      "learning_rate": 1.4058520357800585e-05,
      "loss": 3.2122,
      "step": 207862
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8393328189849854,
      "learning_rate": 1.4057282846015238e-05,
      "loss": 2.7192,
      "step": 207863
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.5315163135528564,
      "learning_rate": 1.4056045387392035e-05,
      "loss": 3.0592,
      "step": 207864
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.958935499191284,
      "learning_rate": 1.4054807981931171e-05,
      "loss": 2.9538,
      "step": 207865
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.269789695739746,
      "learning_rate": 1.4053570629632981e-05,
      "loss": 3.1038,
      "step": 207866
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.940208673477173,
      "learning_rate": 1.4052333330497566e-05,
      "loss": 3.1464,
      "step": 207867
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.5427234172821045,
      "learning_rate": 1.405109608452526e-05,
      "loss": 2.9139,
      "step": 207868
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.148324966430664,
      "learning_rate": 1.4049858891716259e-05,
      "loss": 2.866,
      "step": 207869
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.183916091918945,
      "learning_rate": 1.4048621752070798e-05,
      "loss": 2.8452,
      "step": 207870
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.755216360092163,
      "learning_rate": 1.4047384665589045e-05,
      "loss": 2.7435,
      "step": 207871
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0208728313446045,
      "learning_rate": 1.4046147632271299e-05,
      "loss": 3.0705,
      "step": 207872
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.12847900390625,
      "learning_rate": 1.4044910652117758e-05,
      "loss": 2.8774,
      "step": 207873
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8367538452148438,
      "learning_rate": 1.4043673725128723e-05,
      "loss": 2.9629,
      "step": 207874
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.531081438064575,
      "learning_rate": 1.404243685130433e-05,
      "loss": 2.8033,
      "step": 207875
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.843646764755249,
      "learning_rate": 1.4041200030644873e-05,
      "loss": 2.6667,
      "step": 207876
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7444703578948975,
      "learning_rate": 1.4039963263150489e-05,
      "loss": 2.7146,
      "step": 207877
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.467240571975708,
      "learning_rate": 1.4038726548821543e-05,
      "loss": 2.9284,
      "step": 207878
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.5682973861694336,
      "learning_rate": 1.4037489887658137e-05,
      "loss": 2.9127,
      "step": 207879
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2157530784606934,
      "learning_rate": 1.4036253279660604e-05,
      "loss": 2.8708,
      "step": 207880
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6791324615478516,
      "learning_rate": 1.4035016724829108e-05,
      "loss": 2.6032,
      "step": 207881
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0413076877593994,
      "learning_rate": 1.4033780223163948e-05,
      "loss": 2.9756,
      "step": 207882
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.732060670852661,
      "learning_rate": 1.4032543774665228e-05,
      "loss": 2.8828,
      "step": 207883
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6688249111175537,
      "learning_rate": 1.4031307379333312e-05,
      "loss": 3.011,
      "step": 207884
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.492790699005127,
      "learning_rate": 1.4030071037168333e-05,
      "loss": 2.5698,
      "step": 207885
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7116456031799316,
      "learning_rate": 1.4028834748170592e-05,
      "loss": 2.6426,
      "step": 207886
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.956587076187134,
      "learning_rate": 1.4027598512340288e-05,
      "loss": 2.8361,
      "step": 207887
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.924220085144043,
      "learning_rate": 1.4026362329677654e-05,
      "loss": 2.8588,
      "step": 207888
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8885600566864014,
      "learning_rate": 1.402512620018289e-05,
      "loss": 2.9289,
      "step": 207889
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.7202816009521484,
      "learning_rate": 1.4023890123856296e-05,
      "loss": 3.0593,
      "step": 207890
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.5247671604156494,
      "learning_rate": 1.4022654100698006e-05,
      "loss": 3.0829,
      "step": 207891
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.726393938064575,
      "learning_rate": 1.4021418130708351e-05,
      "loss": 3.0061,
      "step": 207892
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7107882499694824,
      "learning_rate": 1.4020182213887499e-05,
      "loss": 3.0852,
      "step": 207893
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8741416931152344,
      "learning_rate": 1.4018946350235648e-05,
      "loss": 2.7786,
      "step": 207894
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.552422046661377,
      "learning_rate": 1.4017710539753135e-05,
      "loss": 2.9996,
      "step": 207895
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.6631317138671875,
      "learning_rate": 1.4016474782440123e-05,
      "loss": 2.9615,
      "step": 207896
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6785812377929688,
      "learning_rate": 1.4015239078296814e-05,
      "loss": 2.6413,
      "step": 207897
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.996124267578125,
      "learning_rate": 1.4014003427323473e-05,
      "loss": 3.0789,
      "step": 207898
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6537535190582275,
      "learning_rate": 1.4012767829520365e-05,
      "loss": 2.844,
      "step": 207899
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2643325328826904,
      "learning_rate": 1.4011532284887628e-05,
      "loss": 3.0559,
      "step": 207900
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7034709453582764,
      "learning_rate": 1.401029679342559e-05,
      "loss": 2.6882,
      "step": 207901
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6931586265563965,
      "learning_rate": 1.400906135513442e-05,
      "loss": 3.044,
      "step": 207902
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.928931474685669,
      "learning_rate": 1.4007825970014352e-05,
      "loss": 2.9194,
      "step": 207903
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7163171768188477,
      "learning_rate": 1.4006590638065652e-05,
      "loss": 2.8881,
      "step": 207904
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.5006725788116455,
      "learning_rate": 1.4005355359288517e-05,
      "loss": 3.0349,
      "step": 207905
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.7630789279937744,
      "learning_rate": 1.4004120133683183e-05,
      "loss": 2.878,
      "step": 207906
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2176547050476074,
      "learning_rate": 1.4002884961249883e-05,
      "loss": 2.9564,
      "step": 207907
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2831461429595947,
      "learning_rate": 1.4001649841988816e-05,
      "loss": 3.0015,
      "step": 207908
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1846635341644287,
      "learning_rate": 1.4000414775900281e-05,
      "loss": 2.9874,
      "step": 207909
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7138924598693848,
      "learning_rate": 1.399917976298448e-05,
      "loss": 2.9724,
      "step": 207910
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1188924312591553,
      "learning_rate": 1.3997944803241612e-05,
      "loss": 2.7867,
      "step": 207911
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.8703150749206543,
      "learning_rate": 1.3996709896671876e-05,
      "loss": 2.9092,
      "step": 207912
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.937586784362793,
      "learning_rate": 1.3995475043275606e-05,
      "loss": 2.9812,
      "step": 207913
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.884239673614502,
      "learning_rate": 1.3994240243052935e-05,
      "loss": 2.8703,
      "step": 207914
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.2554755210876465,
      "learning_rate": 1.3993005496004195e-05,
      "loss": 2.9654,
      "step": 207915
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1359620094299316,
      "learning_rate": 1.3991770802129488e-05,
      "loss": 3.1627,
      "step": 207916
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.666250228881836,
      "learning_rate": 1.3990536161429144e-05,
      "loss": 2.6812,
      "step": 207917
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2045860290527344,
      "learning_rate": 1.3989301573903366e-05,
      "loss": 2.9112,
      "step": 207918
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.085076332092285,
      "learning_rate": 1.3988067039552387e-05,
      "loss": 2.7918,
      "step": 207919
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.698782205581665,
      "learning_rate": 1.398683255837637e-05,
      "loss": 3.0227,
      "step": 207920
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.350834608078003,
      "learning_rate": 1.3985598130375653e-05,
      "loss": 2.8272,
      "step": 207921
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8230934143066406,
      "learning_rate": 1.3984363755550365e-05,
      "loss": 2.5709,
      "step": 207922
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.736558198928833,
      "learning_rate": 1.3983129433900809e-05,
      "loss": 2.5844,
      "step": 207923
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2255985736846924,
      "learning_rate": 1.3981895165427216e-05,
      "loss": 2.9284,
      "step": 207924
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6996383666992188,
      "learning_rate": 1.3980660950129785e-05,
      "loss": 2.7882,
      "step": 207925
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.5482983589172363,
      "learning_rate": 1.3979426788008686e-05,
      "loss": 3.0188,
      "step": 207926
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7400124073028564,
      "learning_rate": 1.3978192679064248e-05,
      "loss": 2.8325,
      "step": 207927
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.777056932449341,
      "learning_rate": 1.3976958623296642e-05,
      "loss": 2.9744,
      "step": 207928
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.4676008224487305,
      "learning_rate": 1.3975724620706164e-05,
      "loss": 3.1068,
      "step": 207929
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7076797485351562,
      "learning_rate": 1.3974490671292981e-05,
      "loss": 2.7367,
      "step": 207930
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1560027599334717,
      "learning_rate": 1.3973256775057363e-05,
      "loss": 2.8708,
      "step": 207931
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6346397399902344,
      "learning_rate": 1.3972022931999438e-05,
      "loss": 2.9162,
      "step": 207932
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9839184284210205,
      "learning_rate": 1.3970789142119576e-05,
      "loss": 2.9884,
      "step": 207933
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.4036617279052734,
      "learning_rate": 1.396955540541791e-05,
      "loss": 2.9606,
      "step": 207934
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3164420127868652,
      "learning_rate": 1.3968321721894738e-05,
      "loss": 2.7946,
      "step": 207935
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1315531730651855,
      "learning_rate": 1.3967088091550227e-05,
      "loss": 2.8387,
      "step": 207936
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.5467159748077393,
      "learning_rate": 1.3965854514384678e-05,
      "loss": 2.8795,
      "step": 207937
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.576066732406616,
      "learning_rate": 1.396462099039819e-05,
      "loss": 2.8767,
      "step": 207938
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.746041774749756,
      "learning_rate": 1.3963387519591162e-05,
      "loss": 2.7608,
      "step": 207939
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.429779529571533,
      "learning_rate": 1.3962154101963664e-05,
      "loss": 2.9817,
      "step": 207940
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.8410470485687256,
      "learning_rate": 1.3960920737516056e-05,
      "loss": 2.8171,
      "step": 207941
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0790531635284424,
      "learning_rate": 1.3959687426248511e-05,
      "loss": 2.886,
      "step": 207942
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.5224878787994385,
      "learning_rate": 1.3958454168161293e-05,
      "loss": 3.0467,
      "step": 207943
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.8936479091644287,
      "learning_rate": 1.3957220963254501e-05,
      "loss": 3.0867,
      "step": 207944
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.098292827606201,
      "learning_rate": 1.3955987811528536e-05,
      "loss": 2.907,
      "step": 207945
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.317586660385132,
      "learning_rate": 1.3954754712983497e-05,
      "loss": 2.7833,
      "step": 207946
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6660993099212646,
      "learning_rate": 1.3953521667619716e-05,
      "loss": 2.9113,
      "step": 207947
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1673920154571533,
      "learning_rate": 1.3952288675437395e-05,
      "loss": 2.9693,
      "step": 207948
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.34956955909729,
      "learning_rate": 1.39510557364367e-05,
      "loss": 2.8115,
      "step": 207949
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.727705001831055,
      "learning_rate": 1.3949822850617897e-05,
      "loss": 3.2314,
      "step": 207950
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9995439052581787,
      "learning_rate": 1.3948590017981254e-05,
      "loss": 2.9307,
      "step": 207951
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.250119686126709,
      "learning_rate": 1.3947357238526935e-05,
      "loss": 2.8681,
      "step": 207952
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1257145404815674,
      "learning_rate": 1.3946124512255209e-05,
      "loss": 2.9313,
      "step": 207953
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.352208137512207,
      "learning_rate": 1.3944891839166338e-05,
      "loss": 3.0924,
      "step": 207954
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.5377140045166016,
      "learning_rate": 1.3943659219260495e-05,
      "loss": 3.1899,
      "step": 207955
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.013465404510498,
      "learning_rate": 1.3942426652537875e-05,
      "loss": 3.031,
      "step": 207956
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.300328493118286,
      "learning_rate": 1.3941194138998813e-05,
      "loss": 2.8522,
      "step": 207957
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.566767692565918,
      "learning_rate": 1.3939961678643441e-05,
      "loss": 2.9821,
      "step": 207958
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7203001976013184,
      "learning_rate": 1.393872927147206e-05,
      "loss": 2.8153,
      "step": 207959
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.865917921066284,
      "learning_rate": 1.393749691748487e-05,
      "loss": 2.8334,
      "step": 207960
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7776145935058594,
      "learning_rate": 1.3936264616682102e-05,
      "loss": 3.0217,
      "step": 207961
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9838223457336426,
      "learning_rate": 1.3935032369063926e-05,
      "loss": 2.6874,
      "step": 207962
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6414031982421875,
      "learning_rate": 1.3933800174630704e-05,
      "loss": 2.6729,
      "step": 207963
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.962215900421143,
      "learning_rate": 1.3932568033382541e-05,
      "loss": 2.9745,
      "step": 207964
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2655084133148193,
      "learning_rate": 1.3931335945319732e-05,
      "loss": 2.6142,
      "step": 207965
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.654101610183716,
      "learning_rate": 1.3930103910442481e-05,
      "loss": 2.789,
      "step": 207966
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.66374397277832,
      "learning_rate": 1.3928871928751051e-05,
      "loss": 2.8378,
      "step": 207967
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.255650281906128,
      "learning_rate": 1.3927640000245577e-05,
      "loss": 2.7784,
      "step": 207968
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.31387186050415,
      "learning_rate": 1.3926408124926424e-05,
      "loss": 2.8154,
      "step": 207969
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7030959129333496,
      "learning_rate": 1.3925176302793695e-05,
      "loss": 2.6747,
      "step": 207970
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9330146312713623,
      "learning_rate": 1.392394453384772e-05,
      "loss": 2.794,
      "step": 207971
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0183868408203125,
      "learning_rate": 1.3922712818088699e-05,
      "loss": 2.8178,
      "step": 207972
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2557272911071777,
      "learning_rate": 1.3921481155516833e-05,
      "loss": 2.9106,
      "step": 207973
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.291186809539795,
      "learning_rate": 1.3920249546132323e-05,
      "loss": 2.8965,
      "step": 207974
    },
    {
      "epoch": 2.71,
      "grad_norm": 5.068101406097412,
      "learning_rate": 1.3919017989935466e-05,
      "loss": 2.9496,
      "step": 207975
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.4336633682250977,
      "learning_rate": 1.391778648692643e-05,
      "loss": 2.85,
      "step": 207976
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.767644166946411,
      "learning_rate": 1.3916555037105515e-05,
      "loss": 3.0132,
      "step": 207977
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.4019153118133545,
      "learning_rate": 1.391532364047292e-05,
      "loss": 2.9864,
      "step": 207978
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.365483283996582,
      "learning_rate": 1.3914092297028879e-05,
      "loss": 2.8348,
      "step": 207979
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9034922122955322,
      "learning_rate": 1.3912861006773557e-05,
      "loss": 3.0199,
      "step": 207980
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1529178619384766,
      "learning_rate": 1.3911629769707255e-05,
      "loss": 3.0137,
      "step": 207981
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9383597373962402,
      "learning_rate": 1.3910398585830173e-05,
      "loss": 3.0523,
      "step": 207982
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3391690254211426,
      "learning_rate": 1.3909167455142578e-05,
      "loss": 2.8833,
      "step": 207983
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7305169105529785,
      "learning_rate": 1.3907936377644635e-05,
      "loss": 2.9616,
      "step": 207984
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.91947340965271,
      "learning_rate": 1.3906705353336611e-05,
      "loss": 3.0327,
      "step": 207985
    },
    {
      "epoch": 2.71,
      "grad_norm": 7.92901611328125,
      "learning_rate": 1.3905474382218774e-05,
      "loss": 2.9335,
      "step": 207986
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.740389585494995,
      "learning_rate": 1.3904243464291287e-05,
      "loss": 2.9319,
      "step": 207987
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.084928512573242,
      "learning_rate": 1.3903012599554386e-05,
      "loss": 3.1567,
      "step": 207988
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.242330074310303,
      "learning_rate": 1.3901781788008337e-05,
      "loss": 3.1913,
      "step": 207989
    },
    {
      "epoch": 2.71,
      "grad_norm": 5.052183151245117,
      "learning_rate": 1.3900551029653307e-05,
      "loss": 2.7104,
      "step": 207990
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.994370222091675,
      "learning_rate": 1.3899320324489593e-05,
      "loss": 3.0382,
      "step": 207991
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.655890464782715,
      "learning_rate": 1.3898089672517399e-05,
      "loss": 2.8995,
      "step": 207992
    },
    {
      "epoch": 2.71,
      "grad_norm": 5.085003852844238,
      "learning_rate": 1.3896859073736922e-05,
      "loss": 2.9459,
      "step": 207993
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.927396535873413,
      "learning_rate": 1.3895628528148461e-05,
      "loss": 2.7346,
      "step": 207994
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.781485080718994,
      "learning_rate": 1.3894398035752219e-05,
      "loss": 3.0109,
      "step": 207995
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.198025941848755,
      "learning_rate": 1.3893167596548328e-05,
      "loss": 2.8623,
      "step": 207996
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.705868721008301,
      "learning_rate": 1.3891937210537186e-05,
      "loss": 2.8731,
      "step": 207997
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9390869140625,
      "learning_rate": 1.3890706877718893e-05,
      "loss": 2.8561,
      "step": 207998
    },
    {
      "epoch": 2.71,
      "grad_norm": 6.15828275680542,
      "learning_rate": 1.3889476598093685e-05,
      "loss": 2.6635,
      "step": 207999
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.326401472091675,
      "learning_rate": 1.3888246371661894e-05,
      "loss": 2.9229,
      "step": 208000
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0049901008605957,
      "learning_rate": 1.3887016198423651e-05,
      "loss": 2.9509,
      "step": 208001
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.845959424972534,
      "learning_rate": 1.3885786078379191e-05,
      "loss": 2.9418,
      "step": 208002
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.630744218826294,
      "learning_rate": 1.388455601152878e-05,
      "loss": 3.0617,
      "step": 208003
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8272769451141357,
      "learning_rate": 1.3883325997872652e-05,
      "loss": 2.8054,
      "step": 208004
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.5433349609375,
      "learning_rate": 1.388209603741094e-05,
      "loss": 3.022,
      "step": 208005
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.312394618988037,
      "learning_rate": 1.3880866130144042e-05,
      "loss": 3.0373,
      "step": 208006
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.690598249435425,
      "learning_rate": 1.3879636276071992e-05,
      "loss": 3.2266,
      "step": 208007
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.876774549484253,
      "learning_rate": 1.3878406475195191e-05,
      "loss": 3.0632,
      "step": 208008
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7961843013763428,
      "learning_rate": 1.3877176727513806e-05,
      "loss": 2.7958,
      "step": 208009
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.01094388961792,
      "learning_rate": 1.3875947033028034e-05,
      "loss": 2.7755,
      "step": 208010
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9818918704986572,
      "learning_rate": 1.3874717391738111e-05,
      "loss": 3.079,
      "step": 208011
    },
    {
      "epoch": 2.71,
      "grad_norm": 5.372875213623047,
      "learning_rate": 1.38734878036443e-05,
      "loss": 2.9196,
      "step": 208012
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.908280372619629,
      "learning_rate": 1.3872258268746772e-05,
      "loss": 3.0571,
      "step": 208013
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.040753126144409,
      "learning_rate": 1.3871028787045823e-05,
      "loss": 3.087,
      "step": 208014
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.5100483894348145,
      "learning_rate": 1.3869799358541655e-05,
      "loss": 3.0205,
      "step": 208015
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.4192287921905518,
      "learning_rate": 1.3868569983234502e-05,
      "loss": 2.7744,
      "step": 208016
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.913935422897339,
      "learning_rate": 1.3867340661124526e-05,
      "loss": 2.7972,
      "step": 208017
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9493730068206787,
      "learning_rate": 1.3866111392212065e-05,
      "loss": 2.8129,
      "step": 208018
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0097551345825195,
      "learning_rate": 1.3864882176497249e-05,
      "loss": 2.9232,
      "step": 208019
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3094801902770996,
      "learning_rate": 1.3863653013980413e-05,
      "loss": 2.9082,
      "step": 208020
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.889846086502075,
      "learning_rate": 1.386242390466169e-05,
      "loss": 2.7664,
      "step": 208021
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.4190263748168945,
      "learning_rate": 1.386119484854138e-05,
      "loss": 2.8905,
      "step": 208022
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.781000852584839,
      "learning_rate": 1.3859965845619614e-05,
      "loss": 2.8304,
      "step": 208023
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8085062503814697,
      "learning_rate": 1.3858736895896726e-05,
      "loss": 2.79,
      "step": 208024
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1571545600891113,
      "learning_rate": 1.3857507999372853e-05,
      "loss": 2.8446,
      "step": 208025
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.117249011993408,
      "learning_rate": 1.3856279156048322e-05,
      "loss": 2.8647,
      "step": 208026
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.157919406890869,
      "learning_rate": 1.3855050365923304e-05,
      "loss": 2.9369,
      "step": 208027
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1199676990509033,
      "learning_rate": 1.385382162899803e-05,
      "loss": 2.8537,
      "step": 208028
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.835233449935913,
      "learning_rate": 1.3852592945272701e-05,
      "loss": 2.8855,
      "step": 208029
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.169582843780518,
      "learning_rate": 1.3851364314747615e-05,
      "loss": 2.8276,
      "step": 208030
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9925169944763184,
      "learning_rate": 1.3850135737422907e-05,
      "loss": 3.0979,
      "step": 208031
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.63690447807312,
      "learning_rate": 1.384890721329891e-05,
      "loss": 2.9665,
      "step": 208032
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.857572317123413,
      "learning_rate": 1.384767874237579e-05,
      "loss": 2.8447,
      "step": 208033
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0758042335510254,
      "learning_rate": 1.3846450324653812e-05,
      "loss": 2.9309,
      "step": 208034
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.837533950805664,
      "learning_rate": 1.3845221960133145e-05,
      "loss": 2.8954,
      "step": 208035
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8454153537750244,
      "learning_rate": 1.3843993648814055e-05,
      "loss": 3.0234,
      "step": 208036
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2126784324645996,
      "learning_rate": 1.3842765390696774e-05,
      "loss": 2.8577,
      "step": 208037
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.458705186843872,
      "learning_rate": 1.3841537185781538e-05,
      "loss": 2.8514,
      "step": 208038
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8851189613342285,
      "learning_rate": 1.3840309034068575e-05,
      "loss": 2.805,
      "step": 208039
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7450613975524902,
      "learning_rate": 1.3839080935558088e-05,
      "loss": 3.004,
      "step": 208040
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1092166900634766,
      "learning_rate": 1.3837852890250278e-05,
      "loss": 3.0946,
      "step": 208041
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8722949028015137,
      "learning_rate": 1.3836624898145442e-05,
      "loss": 3.1642,
      "step": 208042
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1591663360595703,
      "learning_rate": 1.383539695924375e-05,
      "loss": 3.2034,
      "step": 208043
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.012394905090332,
      "learning_rate": 1.3834169073545498e-05,
      "loss": 2.8854,
      "step": 208044
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.139239549636841,
      "learning_rate": 1.3832941241050887e-05,
      "loss": 2.8815,
      "step": 208045
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.306884765625,
      "learning_rate": 1.3831713461760152e-05,
      "loss": 2.9434,
      "step": 208046
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9287314414978027,
      "learning_rate": 1.3830485735673424e-05,
      "loss": 2.8758,
      "step": 208047
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9534664154052734,
      "learning_rate": 1.382925806279107e-05,
      "loss": 2.8859,
      "step": 208048
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.491807222366333,
      "learning_rate": 1.3828030443113192e-05,
      "loss": 2.834,
      "step": 208049
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2515621185302734,
      "learning_rate": 1.3826802876640152e-05,
      "loss": 2.8699,
      "step": 208050
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.4289560317993164,
      "learning_rate": 1.3825575363372088e-05,
      "loss": 2.8085,
      "step": 208051
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6759719848632812,
      "learning_rate": 1.3824347903309296e-05,
      "loss": 2.9456,
      "step": 208052
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8118410110473633,
      "learning_rate": 1.3823120496451912e-05,
      "loss": 2.9115,
      "step": 208053
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0787315368652344,
      "learning_rate": 1.3821893142800234e-05,
      "loss": 3.073,
      "step": 208054
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1159467697143555,
      "learning_rate": 1.3820665842354427e-05,
      "loss": 2.8276,
      "step": 208055
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9572012424468994,
      "learning_rate": 1.3819438595114796e-05,
      "loss": 2.8106,
      "step": 208056
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9795339107513428,
      "learning_rate": 1.3818211401081503e-05,
      "loss": 2.8074,
      "step": 208057
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.653444528579712,
      "learning_rate": 1.3816984260254882e-05,
      "loss": 2.8756,
      "step": 208058
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2605226039886475,
      "learning_rate": 1.3815757172635e-05,
      "loss": 2.594,
      "step": 208059
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.724553108215332,
      "learning_rate": 1.3814530138222224e-05,
      "loss": 2.9878,
      "step": 208060
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.766651153564453,
      "learning_rate": 1.3813303157016653e-05,
      "loss": 2.8289,
      "step": 208061
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.441455841064453,
      "learning_rate": 1.3812076229018687e-05,
      "loss": 3.0763,
      "step": 208062
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.415865659713745,
      "learning_rate": 1.3810849354228393e-05,
      "loss": 3.033,
      "step": 208063
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.4169561862945557,
      "learning_rate": 1.3809622532646136e-05,
      "loss": 2.7632,
      "step": 208064
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1380960941314697,
      "learning_rate": 1.3808395764271985e-05,
      "loss": 2.6957,
      "step": 208065
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.896221160888672,
      "learning_rate": 1.3807169049106304e-05,
      "loss": 2.9705,
      "step": 208066
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.692896842956543,
      "learning_rate": 1.380594238714926e-05,
      "loss": 2.9776,
      "step": 208067
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.019085645675659,
      "learning_rate": 1.3804715778401087e-05,
      "loss": 2.9842,
      "step": 208068
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9958293437957764,
      "learning_rate": 1.3803489222862018e-05,
      "loss": 2.8373,
      "step": 208069
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1797091960906982,
      "learning_rate": 1.380226272053232e-05,
      "loss": 2.9493,
      "step": 208070
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.580584764480591,
      "learning_rate": 1.3801036271412157e-05,
      "loss": 2.963,
      "step": 208071
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.4441113471984863,
      "learning_rate": 1.37998098755018e-05,
      "loss": 3.2328,
      "step": 208072
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.113813638687134,
      "learning_rate": 1.379858353280141e-05,
      "loss": 2.8452,
      "step": 208073
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.64414381980896,
      "learning_rate": 1.3797357243311324e-05,
      "loss": 2.9226,
      "step": 208074
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.029170274734497,
      "learning_rate": 1.3796131007031674e-05,
      "loss": 2.8482,
      "step": 208075
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.5822222232818604,
      "learning_rate": 1.379490482396276e-05,
      "loss": 2.9542,
      "step": 208076
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9794187545776367,
      "learning_rate": 1.3793678694104748e-05,
      "loss": 2.8679,
      "step": 208077
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3305418491363525,
      "learning_rate": 1.3792452617457905e-05,
      "loss": 2.9329,
      "step": 208078
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8325889110565186,
      "learning_rate": 1.3791226594022465e-05,
      "loss": 2.8339,
      "step": 208079
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9440627098083496,
      "learning_rate": 1.3790000623798625e-05,
      "loss": 3.0701,
      "step": 208080
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.602114677429199,
      "learning_rate": 1.3788774706786587e-05,
      "loss": 3.0227,
      "step": 208081
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7901413440704346,
      "learning_rate": 1.3787548842986652e-05,
      "loss": 2.9614,
      "step": 208082
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.003478527069092,
      "learning_rate": 1.378632303239905e-05,
      "loss": 2.9919,
      "step": 208083
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8106281757354736,
      "learning_rate": 1.3785097275023915e-05,
      "loss": 2.9194,
      "step": 208084
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0201354026794434,
      "learning_rate": 1.3783871570861549e-05,
      "loss": 3.1103,
      "step": 208085
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6343445777893066,
      "learning_rate": 1.3782645919912183e-05,
      "loss": 3.0488,
      "step": 208086
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.952301025390625,
      "learning_rate": 1.3781420322175984e-05,
      "loss": 2.9351,
      "step": 208087
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7354540824890137,
      "learning_rate": 1.3780194777653286e-05,
      "loss": 2.9469,
      "step": 208088
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.925258159637451,
      "learning_rate": 1.3778969286344221e-05,
      "loss": 2.8295,
      "step": 208089
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.962702751159668,
      "learning_rate": 1.3777743848249024e-05,
      "loss": 2.8119,
      "step": 208090
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9186344146728516,
      "learning_rate": 1.3776518463367992e-05,
      "loss": 2.9331,
      "step": 208091
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3397295475006104,
      "learning_rate": 1.377529313170126e-05,
      "loss": 2.8316,
      "step": 208092
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0582938194274902,
      "learning_rate": 1.3774067853249126e-05,
      "loss": 3.0823,
      "step": 208093
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.379038095474243,
      "learning_rate": 1.3772842628011826e-05,
      "loss": 3.1079,
      "step": 208094
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.040856122970581,
      "learning_rate": 1.3771617455989525e-05,
      "loss": 2.9259,
      "step": 208095
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.124521493911743,
      "learning_rate": 1.3770392337182489e-05,
      "loss": 3.1386,
      "step": 208096
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.185235023498535,
      "learning_rate": 1.3769167271590953e-05,
      "loss": 2.9703,
      "step": 208097
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9280872344970703,
      "learning_rate": 1.3767942259215081e-05,
      "loss": 2.8031,
      "step": 208098
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6793081760406494,
      "learning_rate": 1.3766717300055207e-05,
      "loss": 2.9175,
      "step": 208099
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8981728553771973,
      "learning_rate": 1.37654923941115e-05,
      "loss": 2.8662,
      "step": 208100
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.714592218399048,
      "learning_rate": 1.3764267541384189e-05,
      "loss": 2.9771,
      "step": 208101
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9811487197875977,
      "learning_rate": 1.3763042741873475e-05,
      "loss": 2.7953,
      "step": 208102
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.87045955657959,
      "learning_rate": 1.3761817995579627e-05,
      "loss": 3.0125,
      "step": 208103
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.057690382003784,
      "learning_rate": 1.3760593302502842e-05,
      "loss": 2.9831,
      "step": 208104
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.783881425857544,
      "learning_rate": 1.3759368662643422e-05,
      "loss": 2.7784,
      "step": 208105
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.65617299079895,
      "learning_rate": 1.3758144076001498e-05,
      "loss": 2.8665,
      "step": 208106
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.6652700901031494,
      "learning_rate": 1.3756919542577372e-05,
      "loss": 3.0597,
      "step": 208107
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7664475440979004,
      "learning_rate": 1.3755695062371175e-05,
      "loss": 2.7924,
      "step": 208108
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.001725435256958,
      "learning_rate": 1.375447063538324e-05,
      "loss": 2.8112,
      "step": 208109
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9576375484466553,
      "learning_rate": 1.3753246261613737e-05,
      "loss": 2.8245,
      "step": 208110
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.707613706588745,
      "learning_rate": 1.3752021941062929e-05,
      "loss": 3.0825,
      "step": 208111
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3123960494995117,
      "learning_rate": 1.3750797673731018e-05,
      "loss": 3.0357,
      "step": 208112
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0238327980041504,
      "learning_rate": 1.3749573459618235e-05,
      "loss": 3.133,
      "step": 208113
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.963243007659912,
      "learning_rate": 1.374834929872478e-05,
      "loss": 2.7313,
      "step": 208114
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.615537405014038,
      "learning_rate": 1.3747125191050955e-05,
      "loss": 3.064,
      "step": 208115
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.974160671234131,
      "learning_rate": 1.3745901136596893e-05,
      "loss": 2.7254,
      "step": 208116
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.8017654418945312,
      "learning_rate": 1.3744677135362925e-05,
      "loss": 2.9358,
      "step": 208117
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6680541038513184,
      "learning_rate": 1.3743453187349185e-05,
      "loss": 3.1668,
      "step": 208118
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.5537257194519043,
      "learning_rate": 1.3742229292556006e-05,
      "loss": 2.8352,
      "step": 208119
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.5650739669799805,
      "learning_rate": 1.3741005450983455e-05,
      "loss": 3.074,
      "step": 208120
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1667284965515137,
      "learning_rate": 1.3739781662631933e-05,
      "loss": 2.6578,
      "step": 208121
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.715686321258545,
      "learning_rate": 1.3738557927501536e-05,
      "loss": 2.8926,
      "step": 208122
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8232407569885254,
      "learning_rate": 1.3737334245592568e-05,
      "loss": 2.9313,
      "step": 208123
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.005512237548828,
      "learning_rate": 1.3736110616905227e-05,
      "loss": 2.7734,
      "step": 208124
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.693314790725708,
      "learning_rate": 1.373488704143978e-05,
      "loss": 2.8501,
      "step": 208125
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9362871646881104,
      "learning_rate": 1.3733663519196391e-05,
      "loss": 2.822,
      "step": 208126
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8417322635650635,
      "learning_rate": 1.373244005017533e-05,
      "loss": 2.811,
      "step": 208127
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2295820713043213,
      "learning_rate": 1.3731216634376763e-05,
      "loss": 2.9384,
      "step": 208128
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3197085857391357,
      "learning_rate": 1.3729993271801021e-05,
      "loss": 3.2557,
      "step": 208129
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.074936866760254,
      "learning_rate": 1.3728769962448239e-05,
      "loss": 3.0692,
      "step": 208130
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8362998962402344,
      "learning_rate": 1.3727546706318748e-05,
      "loss": 2.8686,
      "step": 208131
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0304830074310303,
      "learning_rate": 1.372632350341265e-05,
      "loss": 2.916,
      "step": 208132
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.915717363357544,
      "learning_rate": 1.3725100353730246e-05,
      "loss": 2.8445,
      "step": 208133
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8349452018737793,
      "learning_rate": 1.372387725727173e-05,
      "loss": 2.9873,
      "step": 208134
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.517759084701538,
      "learning_rate": 1.3722654214037376e-05,
      "loss": 3.1053,
      "step": 208135
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.57836651802063,
      "learning_rate": 1.3721431224027379e-05,
      "loss": 2.9139,
      "step": 208136
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.475428819656372,
      "learning_rate": 1.3720208287242008e-05,
      "loss": 2.9991,
      "step": 208137
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.729550838470459,
      "learning_rate": 1.3718985403681392e-05,
      "loss": 2.9544,
      "step": 208138
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.089506149291992,
      "learning_rate": 1.3717762573345869e-05,
      "loss": 3.0034,
      "step": 208139
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.9099414348602295,
      "learning_rate": 1.371653979623557e-05,
      "loss": 2.8678,
      "step": 208140
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.205313205718994,
      "learning_rate": 1.3715317072350795e-05,
      "loss": 2.8275,
      "step": 208141
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9263947010040283,
      "learning_rate": 1.3714094401691745e-05,
      "loss": 3.0021,
      "step": 208142
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.924544095993042,
      "learning_rate": 1.3712871784258683e-05,
      "loss": 2.9402,
      "step": 208143
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1293797492980957,
      "learning_rate": 1.3711649220051746e-05,
      "loss": 3.1009,
      "step": 208144
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.336568593978882,
      "learning_rate": 1.3710426709071265e-05,
      "loss": 3.1362,
      "step": 208145
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.830120801925659,
      "learning_rate": 1.3709204251317374e-05,
      "loss": 3.0316,
      "step": 208146
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.6338627338409424,
      "learning_rate": 1.3707981846790372e-05,
      "loss": 2.8752,
      "step": 208147
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.4377124309539795,
      "learning_rate": 1.370675949549046e-05,
      "loss": 2.9124,
      "step": 208148
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8080739974975586,
      "learning_rate": 1.3705537197417904e-05,
      "loss": 2.9895,
      "step": 208149
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.377220869064331,
      "learning_rate": 1.3704314952572838e-05,
      "loss": 2.7124,
      "step": 208150
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.621739625930786,
      "learning_rate": 1.370309276095556e-05,
      "loss": 2.8871,
      "step": 208151
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.380521774291992,
      "learning_rate": 1.3701870622566268e-05,
      "loss": 3.0711,
      "step": 208152
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.827084541320801,
      "learning_rate": 1.3700648537405202e-05,
      "loss": 2.9488,
      "step": 208153
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.4839890003204346,
      "learning_rate": 1.3699426505472589e-05,
      "loss": 2.8055,
      "step": 208154
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3156936168670654,
      "learning_rate": 1.3698204526768697e-05,
      "loss": 2.8903,
      "step": 208155
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.192439317703247,
      "learning_rate": 1.3696982601293694e-05,
      "loss": 2.8159,
      "step": 208156
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0368804931640625,
      "learning_rate": 1.3695760729047811e-05,
      "loss": 3.1801,
      "step": 208157
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.88887619972229,
      "learning_rate": 1.3694538910031283e-05,
      "loss": 2.6525,
      "step": 208158
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.4051778316497803,
      "learning_rate": 1.3693317144244376e-05,
      "loss": 2.855,
      "step": 208159
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.7853238582611084,
      "learning_rate": 1.3692095431687255e-05,
      "loss": 2.9559,
      "step": 208160
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.7863447666168213,
      "learning_rate": 1.3690873772360189e-05,
      "loss": 3.0655,
      "step": 208161
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.804456949234009,
      "learning_rate": 1.368965216626341e-05,
      "loss": 2.7639,
      "step": 208162
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.816450357437134,
      "learning_rate": 1.368843061339715e-05,
      "loss": 3.0605,
      "step": 208163
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.069568157196045,
      "learning_rate": 1.3687209113761544e-05,
      "loss": 2.8073,
      "step": 208164
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1592941284179688,
      "learning_rate": 1.3685987667356958e-05,
      "loss": 2.9901,
      "step": 208165
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0668561458587646,
      "learning_rate": 1.368476627418349e-05,
      "loss": 3.0734,
      "step": 208166
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.5328752994537354,
      "learning_rate": 1.3683544934241475e-05,
      "loss": 2.8583,
      "step": 208167
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3188669681549072,
      "learning_rate": 1.368232364753108e-05,
      "loss": 2.8199,
      "step": 208168
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.683241367340088,
      "learning_rate": 1.3681102414052536e-05,
      "loss": 2.8499,
      "step": 208169
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.029655456542969,
      "learning_rate": 1.3679881233806078e-05,
      "loss": 3.0804,
      "step": 208170
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.642188787460327,
      "learning_rate": 1.3678660106791971e-05,
      "loss": 2.8053,
      "step": 208171
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3566484451293945,
      "learning_rate": 1.3677439033010352e-05,
      "loss": 2.6782,
      "step": 208172
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1543655395507812,
      "learning_rate": 1.3676218012461548e-05,
      "loss": 2.9242,
      "step": 208173
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7190847396850586,
      "learning_rate": 1.367499704514573e-05,
      "loss": 2.8549,
      "step": 208174
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.702310562133789,
      "learning_rate": 1.3673776131063097e-05,
      "loss": 2.8956,
      "step": 208175
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.5473644733428955,
      "learning_rate": 1.367255527021398e-05,
      "loss": 3.0301,
      "step": 208176
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6940176486968994,
      "learning_rate": 1.3671334462598449e-05,
      "loss": 3.0034,
      "step": 208177
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.458106756210327,
      "learning_rate": 1.36701137082169e-05,
      "loss": 2.8426,
      "step": 208178
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2446155548095703,
      "learning_rate": 1.366889300706947e-05,
      "loss": 2.6743,
      "step": 208179
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.659367084503174,
      "learning_rate": 1.366767235915639e-05,
      "loss": 2.9237,
      "step": 208180
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1030936241149902,
      "learning_rate": 1.3666451764477892e-05,
      "loss": 2.8116,
      "step": 208181
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.370668411254883,
      "learning_rate": 1.3665231223034213e-05,
      "loss": 2.867,
      "step": 208182
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.089431285858154,
      "learning_rate": 1.3664010734825514e-05,
      "loss": 3.0929,
      "step": 208183
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.656769275665283,
      "learning_rate": 1.3662790299852167e-05,
      "loss": 2.9264,
      "step": 208184
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0758605003356934,
      "learning_rate": 1.3661569918114268e-05,
      "loss": 2.9222,
      "step": 208185
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.039869785308838,
      "learning_rate": 1.3660349589612119e-05,
      "loss": 2.9673,
      "step": 208186
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.044703006744385,
      "learning_rate": 1.3659129314345884e-05,
      "loss": 2.9357,
      "step": 208187
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8281736373901367,
      "learning_rate": 1.3657909092315834e-05,
      "loss": 2.8331,
      "step": 208188
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.696438789367676,
      "learning_rate": 1.3656688923522163e-05,
      "loss": 2.8754,
      "step": 208189
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.43994140625,
      "learning_rate": 1.3655468807965142e-05,
      "loss": 2.905,
      "step": 208190
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.5985348224639893,
      "learning_rate": 1.3654248745644935e-05,
      "loss": 2.9977,
      "step": 208191
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.3667261600494385,
      "learning_rate": 1.365302873656191e-05,
      "loss": 2.8487,
      "step": 208192
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.215017795562744,
      "learning_rate": 1.36518087807161e-05,
      "loss": 2.5299,
      "step": 208193
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.243361473083496,
      "learning_rate": 1.3650588878107838e-05,
      "loss": 2.9015,
      "step": 208194
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.771836280822754,
      "learning_rate": 1.3649369028737323e-05,
      "loss": 2.898,
      "step": 208195
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.7877438068389893,
      "learning_rate": 1.3648149232604854e-05,
      "loss": 3.0022,
      "step": 208196
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2284300327301025,
      "learning_rate": 1.3646929489710534e-05,
      "loss": 2.8119,
      "step": 208197
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.647035598754883,
      "learning_rate": 1.3645709800054726e-05,
      "loss": 2.8674,
      "step": 208198
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8109514713287354,
      "learning_rate": 1.3644490163637533e-05,
      "loss": 2.8273,
      "step": 208199
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8820419311523438,
      "learning_rate": 1.3643270580459253e-05,
      "loss": 2.8353,
      "step": 208200
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.998361587524414,
      "learning_rate": 1.3642051050520052e-05,
      "loss": 2.8027,
      "step": 208201
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7487103939056396,
      "learning_rate": 1.3640831573820266e-05,
      "loss": 2.8795,
      "step": 208202
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8433501720428467,
      "learning_rate": 1.3639612150359992e-05,
      "loss": 2.9265,
      "step": 208203
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8720040321350098,
      "learning_rate": 1.3638392780139595e-05,
      "loss": 2.8504,
      "step": 208204
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.6195759773254395,
      "learning_rate": 1.3637173463159179e-05,
      "loss": 2.7964,
      "step": 208205
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.143124580383301,
      "learning_rate": 1.363595419941904e-05,
      "loss": 2.7818,
      "step": 208206
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.699059009552002,
      "learning_rate": 1.3634734988919316e-05,
      "loss": 2.6655,
      "step": 208207
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8115315437316895,
      "learning_rate": 1.3633515831660368e-05,
      "loss": 2.4481,
      "step": 208208
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.988342761993408,
      "learning_rate": 1.36322967276423e-05,
      "loss": 2.684,
      "step": 208209
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8710732460021973,
      "learning_rate": 1.363107767686551e-05,
      "loss": 3.0666,
      "step": 208210
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.957270622253418,
      "learning_rate": 1.3629858679329997e-05,
      "loss": 2.8978,
      "step": 208211
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.653573989868164,
      "learning_rate": 1.3628639735036129e-05,
      "loss": 2.8832,
      "step": 208212
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.649415969848633,
      "learning_rate": 1.3627420843984072e-05,
      "loss": 3.144,
      "step": 208213
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6722466945648193,
      "learning_rate": 1.3626202006174158e-05,
      "loss": 3.0686,
      "step": 208214
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0581095218658447,
      "learning_rate": 1.3624983221606456e-05,
      "loss": 3.1618,
      "step": 208215
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1084237098693848,
      "learning_rate": 1.3623764490281364e-05,
      "loss": 2.7365,
      "step": 208216
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.490905523300171,
      "learning_rate": 1.3622545812198948e-05,
      "loss": 2.9917,
      "step": 208217
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.23157000541687,
      "learning_rate": 1.3621327187359543e-05,
      "loss": 2.997,
      "step": 208218
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.185063600540161,
      "learning_rate": 1.3620108615763314e-05,
      "loss": 3.0768,
      "step": 208219
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6624741554260254,
      "learning_rate": 1.3618890097410529e-05,
      "loss": 3.0072,
      "step": 208220
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9013001918792725,
      "learning_rate": 1.3617671632301353e-05,
      "loss": 2.9174,
      "step": 208221
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.328360557556152,
      "learning_rate": 1.3616453220436153e-05,
      "loss": 3.0935,
      "step": 208222
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.193850040435791,
      "learning_rate": 1.3615234861814995e-05,
      "loss": 2.8581,
      "step": 208223
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7208781242370605,
      "learning_rate": 1.3614016556438179e-05,
      "loss": 3.0982,
      "step": 208224
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.103463172912598,
      "learning_rate": 1.3612798304305906e-05,
      "loss": 2.8743,
      "step": 208225
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.3434367179870605,
      "learning_rate": 1.3611580105418474e-05,
      "loss": 2.9308,
      "step": 208226
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.6204354763031006,
      "learning_rate": 1.3610361959775985e-05,
      "loss": 3.0756,
      "step": 208227
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.5638952255249023,
      "learning_rate": 1.3609143867378835e-05,
      "loss": 2.8416,
      "step": 208228
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8008193969726562,
      "learning_rate": 1.3607925828227061e-05,
      "loss": 2.9597,
      "step": 208229
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7348010540008545,
      "learning_rate": 1.3606707842320997e-05,
      "loss": 2.8707,
      "step": 208230
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.130096197128296,
      "learning_rate": 1.3605489909660837e-05,
      "loss": 2.8744,
      "step": 208231
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.644963026046753,
      "learning_rate": 1.3604272030246855e-05,
      "loss": 2.8522,
      "step": 208232
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.484006404876709,
      "learning_rate": 1.3603054204079212e-05,
      "loss": 2.9967,
      "step": 208233
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.5190582275390625,
      "learning_rate": 1.3601836431158241e-05,
      "loss": 2.9968,
      "step": 208234
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.86991810798645,
      "learning_rate": 1.3600618711484046e-05,
      "loss": 2.8936,
      "step": 208235
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7128474712371826,
      "learning_rate": 1.3599401045056923e-05,
      "loss": 3.0741,
      "step": 208236
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.009969711303711,
      "learning_rate": 1.3598183431877007e-05,
      "loss": 3.0231,
      "step": 208237
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.3186116218566895,
      "learning_rate": 1.3596965871944665e-05,
      "loss": 3.0382,
      "step": 208238
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.668118476867676,
      "learning_rate": 1.3595748365259996e-05,
      "loss": 3.0557,
      "step": 208239
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9223501682281494,
      "learning_rate": 1.3594530911823331e-05,
      "loss": 2.8882,
      "step": 208240
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0542101860046387,
      "learning_rate": 1.3593313511634873e-05,
      "loss": 2.9596,
      "step": 208241
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.694913864135742,
      "learning_rate": 1.3592096164694788e-05,
      "loss": 2.8748,
      "step": 208242
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6141343116760254,
      "learning_rate": 1.3590878871003341e-05,
      "loss": 2.7799,
      "step": 208243
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8667516708374023,
      "learning_rate": 1.3589661630560767e-05,
      "loss": 3.0524,
      "step": 208244
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.4549732208251953,
      "learning_rate": 1.358844444336723e-05,
      "loss": 2.9464,
      "step": 208245
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.401555299758911,
      "learning_rate": 1.3587227309423065e-05,
      "loss": 3.0069,
      "step": 208246
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.3316168785095215,
      "learning_rate": 1.358601022872844e-05,
      "loss": 2.6674,
      "step": 208247
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.390169620513916,
      "learning_rate": 1.3584793201283583e-05,
      "loss": 2.8416,
      "step": 208248
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.093430995941162,
      "learning_rate": 1.3583576227088667e-05,
      "loss": 2.8195,
      "step": 208249
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.965398073196411,
      "learning_rate": 1.358235930614402e-05,
      "loss": 3.2049,
      "step": 208250
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7380778789520264,
      "learning_rate": 1.3581142438449777e-05,
      "loss": 2.8995,
      "step": 208251
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.9997432231903076,
      "learning_rate": 1.3579925624006238e-05,
      "loss": 2.9329,
      "step": 208252
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0274288654327393,
      "learning_rate": 1.3578708862813636e-05,
      "loss": 3.035,
      "step": 208253
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7342259883880615,
      "learning_rate": 1.3577492154872071e-05,
      "loss": 2.689,
      "step": 208254
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1430611610412598,
      "learning_rate": 1.3576275500181911e-05,
      "loss": 2.9206,
      "step": 208255
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.591005563735962,
      "learning_rate": 1.3575058898743351e-05,
      "loss": 2.8543,
      "step": 208256
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.535383701324463,
      "learning_rate": 1.357384235055653e-05,
      "loss": 2.7273,
      "step": 208257
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3803393840789795,
      "learning_rate": 1.3572625855621778e-05,
      "loss": 2.9439,
      "step": 208258
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2965331077575684,
      "learning_rate": 1.3571409413939294e-05,
      "loss": 3.1846,
      "step": 208259
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6280601024627686,
      "learning_rate": 1.3570193025509246e-05,
      "loss": 2.978,
      "step": 208260
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.639181137084961,
      "learning_rate": 1.3568976690331934e-05,
      "loss": 3.0639,
      "step": 208261
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.776200771331787,
      "learning_rate": 1.3567760408407591e-05,
      "loss": 2.9394,
      "step": 208262
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.397775173187256,
      "learning_rate": 1.356654417973635e-05,
      "loss": 3.0288,
      "step": 208263
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.330947160720825,
      "learning_rate": 1.356532800431851e-05,
      "loss": 2.9003,
      "step": 208264
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.374562978744507,
      "learning_rate": 1.3564111882154306e-05,
      "loss": 2.8292,
      "step": 208265
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6970577239990234,
      "learning_rate": 1.3562895813243902e-05,
      "loss": 3.0964,
      "step": 208266
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.623046398162842,
      "learning_rate": 1.3561679797587599e-05,
      "loss": 2.9349,
      "step": 208267
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9105024337768555,
      "learning_rate": 1.3560463835185532e-05,
      "loss": 2.7378,
      "step": 208268
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.191889762878418,
      "learning_rate": 1.355924792603803e-05,
      "loss": 3.015,
      "step": 208269
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8437743186950684,
      "learning_rate": 1.3558032070145298e-05,
      "loss": 2.9678,
      "step": 208270
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.059046506881714,
      "learning_rate": 1.3556816267507498e-05,
      "loss": 3.001,
      "step": 208271
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1152639389038086,
      "learning_rate": 1.3555600518124864e-05,
      "loss": 2.8552,
      "step": 208272
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.974977970123291,
      "learning_rate": 1.3554384821997699e-05,
      "loss": 2.7421,
      "step": 208273
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.4896554946899414,
      "learning_rate": 1.3553169179126133e-05,
      "loss": 2.9742,
      "step": 208274
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6832058429718018,
      "learning_rate": 1.3551953589510501e-05,
      "loss": 2.7893,
      "step": 208275
    },
    {
      "epoch": 2.71,
      "grad_norm": 5.192810535430908,
      "learning_rate": 1.35507380531509e-05,
      "loss": 2.7101,
      "step": 208276
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.5876667499542236,
      "learning_rate": 1.3549522570047732e-05,
      "loss": 2.6524,
      "step": 208277
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.061279296875,
      "learning_rate": 1.3548307140200998e-05,
      "loss": 2.8238,
      "step": 208278
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8801047801971436,
      "learning_rate": 1.3547091763611095e-05,
      "loss": 3.0078,
      "step": 208279
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.5347952842712402,
      "learning_rate": 1.3545876440278159e-05,
      "loss": 2.8975,
      "step": 208280
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9726781845092773,
      "learning_rate": 1.354466117020252e-05,
      "loss": 2.803,
      "step": 208281
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.060201168060303,
      "learning_rate": 1.3543445953384247e-05,
      "loss": 2.7591,
      "step": 208282
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.729689836502075,
      "learning_rate": 1.3542230789823772e-05,
      "loss": 2.7889,
      "step": 208283
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.551323413848877,
      "learning_rate": 1.3541015679521095e-05,
      "loss": 2.8113,
      "step": 208284
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.916984796524048,
      "learning_rate": 1.3539800622476616e-05,
      "loss": 2.9456,
      "step": 208285
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.740514039993286,
      "learning_rate": 1.3538585618690434e-05,
      "loss": 2.9446,
      "step": 208286
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7475287914276123,
      "learning_rate": 1.3537370668162884e-05,
      "loss": 2.74,
      "step": 208287
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.590656042098999,
      "learning_rate": 1.3536155770894097e-05,
      "loss": 2.9623,
      "step": 208288
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.60176157951355,
      "learning_rate": 1.3534940926884442e-05,
      "loss": 2.8889,
      "step": 208289
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3000681400299072,
      "learning_rate": 1.3533726136133982e-05,
      "loss": 2.9404,
      "step": 208290
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7897822856903076,
      "learning_rate": 1.353251139864302e-05,
      "loss": 2.7885,
      "step": 208291
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.06461501121521,
      "learning_rate": 1.353129671441172e-05,
      "loss": 2.8027,
      "step": 208292
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.667445659637451,
      "learning_rate": 1.3530082083440452e-05,
      "loss": 3.0004,
      "step": 208293
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6390368938446045,
      "learning_rate": 1.3528867505729246e-05,
      "loss": 3.0631,
      "step": 208294
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8762121200561523,
      "learning_rate": 1.352765298127857e-05,
      "loss": 2.803,
      "step": 208295
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.840679883956909,
      "learning_rate": 1.352643851008839e-05,
      "loss": 3.1362,
      "step": 208296
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.483524799346924,
      "learning_rate": 1.3525224092159104e-05,
      "loss": 3.0035,
      "step": 208297
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.217207431793213,
      "learning_rate": 1.3524009727490848e-05,
      "loss": 2.8751,
      "step": 208298
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.850362539291382,
      "learning_rate": 1.352279541608392e-05,
      "loss": 2.9566,
      "step": 208299
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.934417486190796,
      "learning_rate": 1.3521581157938488e-05,
      "loss": 2.8731,
      "step": 208300
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9613442420959473,
      "learning_rate": 1.3520366953054884e-05,
      "loss": 2.988,
      "step": 208301
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.692112445831299,
      "learning_rate": 1.3519152801433142e-05,
      "loss": 2.8772,
      "step": 208302
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1615583896636963,
      "learning_rate": 1.351793870307366e-05,
      "loss": 2.6604,
      "step": 208303
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.868220806121826,
      "learning_rate": 1.3516724657976574e-05,
      "loss": 2.7287,
      "step": 208304
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2320661544799805,
      "learning_rate": 1.3515510666142148e-05,
      "loss": 2.9957,
      "step": 208305
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.806084632873535,
      "learning_rate": 1.3514296727570584e-05,
      "loss": 2.8412,
      "step": 208306
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7982096672058105,
      "learning_rate": 1.351308284226218e-05,
      "loss": 2.9103,
      "step": 208307
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.425614833831787,
      "learning_rate": 1.3511869010217002e-05,
      "loss": 2.8441,
      "step": 208308
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.142979145050049,
      "learning_rate": 1.3510655231435452e-05,
      "loss": 2.9844,
      "step": 208309
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.578911781311035,
      "learning_rate": 1.3509441505917628e-05,
      "loss": 2.7973,
      "step": 208310
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2316219806671143,
      "learning_rate": 1.3508227833663831e-05,
      "loss": 2.9938,
      "step": 208311
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.897657871246338,
      "learning_rate": 1.350701421467426e-05,
      "loss": 2.8815,
      "step": 208312
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.4295825958251953,
      "learning_rate": 1.3505800648949184e-05,
      "loss": 3.133,
      "step": 208313
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.605707883834839,
      "learning_rate": 1.3504587136488731e-05,
      "loss": 2.8522,
      "step": 208314
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.907655954360962,
      "learning_rate": 1.3503373677293206e-05,
      "loss": 2.8518,
      "step": 208315
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.482146978378296,
      "learning_rate": 1.3502160271362771e-05,
      "loss": 2.942,
      "step": 208316
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1366069316864014,
      "learning_rate": 1.3500946918697764e-05,
      "loss": 2.9061,
      "step": 208317
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.85390567779541,
      "learning_rate": 1.349973361929828e-05,
      "loss": 3.0084,
      "step": 208318
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.7487759590148926,
      "learning_rate": 1.3498520373164623e-05,
      "loss": 2.9064,
      "step": 208319
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7814464569091797,
      "learning_rate": 1.3497307180297024e-05,
      "loss": 2.7511,
      "step": 208320
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6745402812957764,
      "learning_rate": 1.3496094040695682e-05,
      "loss": 2.7339,
      "step": 208321
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1059482097625732,
      "learning_rate": 1.3494880954360765e-05,
      "loss": 3.1151,
      "step": 208322
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7839198112487793,
      "learning_rate": 1.3493667921292606e-05,
      "loss": 2.7884,
      "step": 208323
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.018681764602661,
      "learning_rate": 1.349245494149137e-05,
      "loss": 3.0111,
      "step": 208324
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.856795072555542,
      "learning_rate": 1.3491242014957293e-05,
      "loss": 2.848,
      "step": 208325
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.747008800506592,
      "learning_rate": 1.3490029141690639e-05,
      "loss": 2.8776,
      "step": 208326
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.283998727798462,
      "learning_rate": 1.3488816321691576e-05,
      "loss": 2.9005,
      "step": 208327
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3860182762145996,
      "learning_rate": 1.3487603554960303e-05,
      "loss": 2.954,
      "step": 208328
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.494631290435791,
      "learning_rate": 1.3486390841497153e-05,
      "loss": 2.7651,
      "step": 208329
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0048747062683105,
      "learning_rate": 1.3485178181302258e-05,
      "loss": 2.9488,
      "step": 208330
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.766122817993164,
      "learning_rate": 1.3483965574375888e-05,
      "loss": 2.7185,
      "step": 208331
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7561838626861572,
      "learning_rate": 1.3482753020718274e-05,
      "loss": 2.8458,
      "step": 208332
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2325401306152344,
      "learning_rate": 1.3481540520329614e-05,
      "loss": 2.6649,
      "step": 208333
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.230968475341797,
      "learning_rate": 1.3480328073210112e-05,
      "loss": 3.1566,
      "step": 208334
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.144193410873413,
      "learning_rate": 1.3479115679360064e-05,
      "loss": 3.0666,
      "step": 208335
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6715404987335205,
      "learning_rate": 1.3477903338779638e-05,
      "loss": 2.8712,
      "step": 208336
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.219943046569824,
      "learning_rate": 1.3476691051469101e-05,
      "loss": 3.0428,
      "step": 208337
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.958366870880127,
      "learning_rate": 1.3475478817428653e-05,
      "loss": 2.9657,
      "step": 208338
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1506059169769287,
      "learning_rate": 1.347426663665846e-05,
      "loss": 2.9604,
      "step": 208339
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.051729917526245,
      "learning_rate": 1.3473054509158886e-05,
      "loss": 2.9605,
      "step": 208340
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.9027864933013916,
      "learning_rate": 1.3471842434930068e-05,
      "loss": 2.8084,
      "step": 208341
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.097314357757568,
      "learning_rate": 1.3470630413972206e-05,
      "loss": 2.9343,
      "step": 208342
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.012929677963257,
      "learning_rate": 1.3469418446285596e-05,
      "loss": 3.1426,
      "step": 208343
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9659223556518555,
      "learning_rate": 1.3468206531870406e-05,
      "loss": 2.9316,
      "step": 208344
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8658647537231445,
      "learning_rate": 1.3466994670726871e-05,
      "loss": 2.8572,
      "step": 208345
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.656851053237915,
      "learning_rate": 1.346578286285529e-05,
      "loss": 2.9651,
      "step": 208346
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1748881340026855,
      "learning_rate": 1.3464571108255795e-05,
      "loss": 2.9317,
      "step": 208347
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6997928619384766,
      "learning_rate": 1.346335940692862e-05,
      "loss": 3.011,
      "step": 208348
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2133824825286865,
      "learning_rate": 1.3462147758874064e-05,
      "loss": 2.9148,
      "step": 208349
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.425529956817627,
      "learning_rate": 1.3460936164092295e-05,
      "loss": 3.1103,
      "step": 208350
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.166559934616089,
      "learning_rate": 1.3459724622583512e-05,
      "loss": 2.9453,
      "step": 208351
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9727962017059326,
      "learning_rate": 1.3458513134347981e-05,
      "loss": 2.8051,
      "step": 208352
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7652597427368164,
      "learning_rate": 1.3457301699385936e-05,
      "loss": 2.8305,
      "step": 208353
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.120241165161133,
      "learning_rate": 1.3456090317697577e-05,
      "loss": 2.6752,
      "step": 208354
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0164849758148193,
      "learning_rate": 1.345487898928317e-05,
      "loss": 2.8006,
      "step": 208355
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9622154235839844,
      "learning_rate": 1.3453667714142913e-05,
      "loss": 2.7701,
      "step": 208356
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.02951979637146,
      "learning_rate": 1.3452456492276975e-05,
      "loss": 2.846,
      "step": 208357
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.069554567337036,
      "learning_rate": 1.3451245323685656e-05,
      "loss": 2.8649,
      "step": 208358
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.300534963607788,
      "learning_rate": 1.3450034208369154e-05,
      "loss": 3.1493,
      "step": 208359
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.846703052520752,
      "learning_rate": 1.3448823146327735e-05,
      "loss": 2.8704,
      "step": 208360
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.200251817703247,
      "learning_rate": 1.3447612137561536e-05,
      "loss": 2.9186,
      "step": 208361
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2260420322418213,
      "learning_rate": 1.3446401182070921e-05,
      "loss": 2.5972,
      "step": 208362
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.66196870803833,
      "learning_rate": 1.3445190279855922e-05,
      "loss": 2.8976,
      "step": 208363
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9282073974609375,
      "learning_rate": 1.344397943091694e-05,
      "loss": 2.9818,
      "step": 208364
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8478844165802,
      "learning_rate": 1.3442768635254109e-05,
      "loss": 2.843,
      "step": 208365
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.420923709869385,
      "learning_rate": 1.3441557892867693e-05,
      "loss": 2.8054,
      "step": 208366
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.876729965209961,
      "learning_rate": 1.3440347203757863e-05,
      "loss": 2.8096,
      "step": 208367
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.522031307220459,
      "learning_rate": 1.3439136567924946e-05,
      "loss": 2.7795,
      "step": 208368
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6712284088134766,
      "learning_rate": 1.3437925985369047e-05,
      "loss": 2.9663,
      "step": 208369
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1101930141448975,
      "learning_rate": 1.3436715456090463e-05,
      "loss": 2.7764,
      "step": 208370
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.4330296516418457,
      "learning_rate": 1.3435504980089395e-05,
      "loss": 2.8238,
      "step": 208371
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0210819244384766,
      "learning_rate": 1.343429455736611e-05,
      "loss": 2.9817,
      "step": 208372
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.245039701461792,
      "learning_rate": 1.3433084187920739e-05,
      "loss": 2.8448,
      "step": 208373
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.686924934387207,
      "learning_rate": 1.3431873871753652e-05,
      "loss": 2.8075,
      "step": 208374
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1753532886505127,
      "learning_rate": 1.3430663608864911e-05,
      "loss": 3.0163,
      "step": 208375
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.768420457839966,
      "learning_rate": 1.3429453399254852e-05,
      "loss": 3.0228,
      "step": 208376
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.5295426845550537,
      "learning_rate": 1.3428243242923642e-05,
      "loss": 2.7299,
      "step": 208377
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.02427077293396,
      "learning_rate": 1.342703313987158e-05,
      "loss": 2.9112,
      "step": 208378
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0290818214416504,
      "learning_rate": 1.3425823090098763e-05,
      "loss": 2.7773,
      "step": 208379
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.609884023666382,
      "learning_rate": 1.3424613093605597e-05,
      "loss": 2.8928,
      "step": 208380
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.135972738265991,
      "learning_rate": 1.3423403150392142e-05,
      "loss": 3.1112,
      "step": 208381
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.7706708908081055,
      "learning_rate": 1.3422193260458702e-05,
      "loss": 2.7104,
      "step": 208382
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.912213087081909,
      "learning_rate": 1.3420983423805475e-05,
      "loss": 2.9438,
      "step": 208383
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.008450508117676,
      "learning_rate": 1.3419773640432697e-05,
      "loss": 3.0112,
      "step": 208384
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3446948528289795,
      "learning_rate": 1.3418563910340596e-05,
      "loss": 3.0339,
      "step": 208385
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.2580084800720215,
      "learning_rate": 1.341735423352941e-05,
      "loss": 2.9625,
      "step": 208386
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7793922424316406,
      "learning_rate": 1.3416144609999336e-05,
      "loss": 2.9755,
      "step": 208387
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.016415596008301,
      "learning_rate": 1.3414935039750607e-05,
      "loss": 2.9676,
      "step": 208388
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7895827293395996,
      "learning_rate": 1.3413725522783424e-05,
      "loss": 2.7144,
      "step": 208389
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.904928684234619,
      "learning_rate": 1.3412516059098088e-05,
      "loss": 3.1018,
      "step": 208390
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9016292095184326,
      "learning_rate": 1.341130664869473e-05,
      "loss": 2.8852,
      "step": 208391
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2970712184906006,
      "learning_rate": 1.341009729157365e-05,
      "loss": 2.5476,
      "step": 208392
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9639124870300293,
      "learning_rate": 1.3408887987735051e-05,
      "loss": 3.0148,
      "step": 208393
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.5933778285980225,
      "learning_rate": 1.340767873717916e-05,
      "loss": 2.9342,
      "step": 208394
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7742249965667725,
      "learning_rate": 1.340646953990615e-05,
      "loss": 3.0714,
      "step": 208395
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0119080543518066,
      "learning_rate": 1.3405260395916318e-05,
      "loss": 3.0649,
      "step": 208396
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.4874207973480225,
      "learning_rate": 1.3404051305209829e-05,
      "loss": 2.8178,
      "step": 208397
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7913572788238525,
      "learning_rate": 1.340284226778695e-05,
      "loss": 2.8423,
      "step": 208398
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0851006507873535,
      "learning_rate": 1.3401633283647916e-05,
      "loss": 3.0713,
      "step": 208399
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.177868604660034,
      "learning_rate": 1.3400424352792926e-05,
      "loss": 2.857,
      "step": 208400
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.964087963104248,
      "learning_rate": 1.3399215475222148e-05,
      "loss": 2.6933,
      "step": 208401
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.180525064468384,
      "learning_rate": 1.339800665093591e-05,
      "loss": 2.8351,
      "step": 208402
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8163177967071533,
      "learning_rate": 1.3396797879934384e-05,
      "loss": 2.8908,
      "step": 208403
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3558309078216553,
      "learning_rate": 1.3395589162217801e-05,
      "loss": 2.9057,
      "step": 208404
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.5136592388153076,
      "learning_rate": 1.3394380497786428e-05,
      "loss": 2.9581,
      "step": 208405
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.3690671920776367,
      "learning_rate": 1.339317188664043e-05,
      "loss": 2.7625,
      "step": 208406
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.105971097946167,
      "learning_rate": 1.339196332878001e-05,
      "loss": 2.8344,
      "step": 208407
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7753756046295166,
      "learning_rate": 1.3390754824205463e-05,
      "loss": 2.9534,
      "step": 208408
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8124308586120605,
      "learning_rate": 1.338954637291696e-05,
      "loss": 2.5798,
      "step": 208409
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.594219923019409,
      "learning_rate": 1.3388337974914798e-05,
      "loss": 2.7721,
      "step": 208410
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.529064416885376,
      "learning_rate": 1.3387129630199145e-05,
      "loss": 2.9691,
      "step": 208411
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.289201021194458,
      "learning_rate": 1.3385921338770233e-05,
      "loss": 2.6359,
      "step": 208412
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.5749783515930176,
      "learning_rate": 1.338471310062823e-05,
      "loss": 3.0514,
      "step": 208413
    },
    {
      "epoch": 2.71,
      "grad_norm": 5.246588230133057,
      "learning_rate": 1.3383504915773502e-05,
      "loss": 2.9631,
      "step": 208414
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0282442569732666,
      "learning_rate": 1.3382296784206114e-05,
      "loss": 2.8936,
      "step": 208415
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.080141544342041,
      "learning_rate": 1.3381088705926435e-05,
      "loss": 2.7046,
      "step": 208416
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.076967477798462,
      "learning_rate": 1.3379880680934596e-05,
      "loss": 2.6907,
      "step": 208417
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.6324329376220703,
      "learning_rate": 1.3378672709230864e-05,
      "loss": 3.0617,
      "step": 208418
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.759127378463745,
      "learning_rate": 1.3377464790815406e-05,
      "loss": 3.019,
      "step": 208419
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.811009407043457,
      "learning_rate": 1.3376256925688522e-05,
      "loss": 3.0965,
      "step": 208420
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.49955153465271,
      "learning_rate": 1.3375049113850344e-05,
      "loss": 2.6666,
      "step": 208421
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.668842315673828,
      "learning_rate": 1.337384135530124e-05,
      "loss": 2.8949,
      "step": 208422
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8409807682037354,
      "learning_rate": 1.3372633650041308e-05,
      "loss": 2.8013,
      "step": 208423
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.428269863128662,
      "learning_rate": 1.337142599807085e-05,
      "loss": 2.922,
      "step": 208424
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.698105812072754,
      "learning_rate": 1.3370218399389998e-05,
      "loss": 2.8145,
      "step": 208425
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.5151963233947754,
      "learning_rate": 1.3369010853999052e-05,
      "loss": 3.0805,
      "step": 208426
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.90445613861084,
      "learning_rate": 1.336780336189821e-05,
      "loss": 2.9404,
      "step": 208427
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9993457794189453,
      "learning_rate": 1.3366595923087708e-05,
      "loss": 3.1171,
      "step": 208428
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1065993309020996,
      "learning_rate": 1.3365388537567811e-05,
      "loss": 2.7912,
      "step": 208429
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.624363660812378,
      "learning_rate": 1.336418120533862e-05,
      "loss": 2.7737,
      "step": 208430
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.552464723587036,
      "learning_rate": 1.33629739264005e-05,
      "loss": 3.0358,
      "step": 208431
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7415926456451416,
      "learning_rate": 1.3361766700753618e-05,
      "loss": 2.9588,
      "step": 208432
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2541451454162598,
      "learning_rate": 1.336055952839814e-05,
      "loss": 2.8132,
      "step": 208433
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.4151487350463867,
      "learning_rate": 1.3359352409334401e-05,
      "loss": 2.8194,
      "step": 208434
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2212917804718018,
      "learning_rate": 1.3358145343562531e-05,
      "loss": 2.7897,
      "step": 208435
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7918479442596436,
      "learning_rate": 1.33569383310828e-05,
      "loss": 2.9959,
      "step": 208436
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.3080532550811768,
      "learning_rate": 1.335573137189544e-05,
      "loss": 2.9264,
      "step": 208437
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7282378673553467,
      "learning_rate": 1.3354524466000615e-05,
      "loss": 2.7527,
      "step": 208438
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3919317722320557,
      "learning_rate": 1.3353317613398661e-05,
      "loss": 2.7163,
      "step": 208439
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.674055576324463,
      "learning_rate": 1.3352110814089711e-05,
      "loss": 3.0132,
      "step": 208440
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8340561389923096,
      "learning_rate": 1.3350904068074031e-05,
      "loss": 2.8124,
      "step": 208441
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1222457885742188,
      "learning_rate": 1.3349697375351753e-05,
      "loss": 3.0379,
      "step": 208442
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.562579870223999,
      "learning_rate": 1.3348490735923245e-05,
      "loss": 2.8214,
      "step": 208443
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.414031505584717,
      "learning_rate": 1.334728414978864e-05,
      "loss": 2.9341,
      "step": 208444
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.521566390991211,
      "learning_rate": 1.3346077616948204e-05,
      "loss": 2.7253,
      "step": 208445
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1492984294891357,
      "learning_rate": 1.3344871137402136e-05,
      "loss": 2.9509,
      "step": 208446
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.76641845703125,
      "learning_rate": 1.3343664711150703e-05,
      "loss": 2.8339,
      "step": 208447
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9770734310150146,
      "learning_rate": 1.3342458338194006e-05,
      "loss": 3.0483,
      "step": 208448
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7728543281555176,
      "learning_rate": 1.3341252018532445e-05,
      "loss": 3.0245,
      "step": 208449
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8304953575134277,
      "learning_rate": 1.3340045752166085e-05,
      "loss": 2.9067,
      "step": 208450
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.107597589492798,
      "learning_rate": 1.3338839539095258e-05,
      "loss": 2.7769,
      "step": 208451
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7827913761138916,
      "learning_rate": 1.3337633379320134e-05,
      "loss": 2.8412,
      "step": 208452
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.511141300201416,
      "learning_rate": 1.3336427272840976e-05,
      "loss": 2.6961,
      "step": 208453
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.041888952255249,
      "learning_rate": 1.3335221219658022e-05,
      "loss": 2.6398,
      "step": 208454
    },
    {
      "epoch": 2.71,
      "grad_norm": 4.406855583190918,
      "learning_rate": 1.3334015219771432e-05,
      "loss": 2.7034,
      "step": 208455
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.3433291912078857,
      "learning_rate": 1.333280927318141e-05,
      "loss": 2.8608,
      "step": 208456
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0188090801239014,
      "learning_rate": 1.333160337988829e-05,
      "loss": 2.9153,
      "step": 208457
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.6745450496673584,
      "learning_rate": 1.3330397539892169e-05,
      "loss": 3.0861,
      "step": 208458
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.298746347427368,
      "learning_rate": 1.3329191753193413e-05,
      "loss": 2.8331,
      "step": 208459
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.249117136001587,
      "learning_rate": 1.3327986019792158e-05,
      "loss": 2.7851,
      "step": 208460
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.768357276916504,
      "learning_rate": 1.3326780339688636e-05,
      "loss": 3.0126,
      "step": 208461
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.468656539916992,
      "learning_rate": 1.3325574712883048e-05,
      "loss": 3.1185,
      "step": 208462
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.5725018978118896,
      "learning_rate": 1.332436913937569e-05,
      "loss": 3.027,
      "step": 208463
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0184326171875,
      "learning_rate": 1.3323163619166699e-05,
      "loss": 2.8064,
      "step": 208464
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9482789039611816,
      "learning_rate": 1.3321958152256373e-05,
      "loss": 2.8854,
      "step": 208465
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7779786586761475,
      "learning_rate": 1.3320752738644914e-05,
      "loss": 2.7955,
      "step": 208466
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.976123094558716,
      "learning_rate": 1.3319547378332518e-05,
      "loss": 2.8298,
      "step": 208467
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2146594524383545,
      "learning_rate": 1.3318342071319421e-05,
      "loss": 2.9588,
      "step": 208468
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.1832621097564697,
      "learning_rate": 1.331713681760589e-05,
      "loss": 2.8012,
      "step": 208469
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.7922489643096924,
      "learning_rate": 1.3315931617192055e-05,
      "loss": 2.8504,
      "step": 208470
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.264254093170166,
      "learning_rate": 1.3314726470078252e-05,
      "loss": 2.9607,
      "step": 208471
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.4196603298187256,
      "learning_rate": 1.3313521376264647e-05,
      "loss": 2.9919,
      "step": 208472
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.668034315109253,
      "learning_rate": 1.3312316335751472e-05,
      "loss": 2.9761,
      "step": 208473
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.964059591293335,
      "learning_rate": 1.3311111348538927e-05,
      "loss": 2.786,
      "step": 208474
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.7604286670684814,
      "learning_rate": 1.330990641462728e-05,
      "loss": 2.7914,
      "step": 208475
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0066773891448975,
      "learning_rate": 1.3308701534016697e-05,
      "loss": 3.0404,
      "step": 208476
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2462613582611084,
      "learning_rate": 1.3307496706707477e-05,
      "loss": 3.0287,
      "step": 208477
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9900736808776855,
      "learning_rate": 1.330629193269982e-05,
      "loss": 3.225,
      "step": 208478
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.6842424869537354,
      "learning_rate": 1.3305087211993926e-05,
      "loss": 3.1379,
      "step": 208479
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.568089485168457,
      "learning_rate": 1.3303882544589995e-05,
      "loss": 2.9293,
      "step": 208480
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.932673931121826,
      "learning_rate": 1.3302677930488292e-05,
      "loss": 2.8761,
      "step": 208481
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.6799516677856445,
      "learning_rate": 1.330147336968902e-05,
      "loss": 3.0585,
      "step": 208482
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9255597591400146,
      "learning_rate": 1.3300268862192443e-05,
      "loss": 3.0356,
      "step": 208483
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.094452142715454,
      "learning_rate": 1.3299064407998762e-05,
      "loss": 2.8532,
      "step": 208484
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.47320818901062,
      "learning_rate": 1.3297860007108208e-05,
      "loss": 3.0363,
      "step": 208485
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.048840284347534,
      "learning_rate": 1.329665565952095e-05,
      "loss": 2.8785,
      "step": 208486
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.16733717918396,
      "learning_rate": 1.3295451365237286e-05,
      "loss": 3.0093,
      "step": 208487
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2317192554473877,
      "learning_rate": 1.3294247124257384e-05,
      "loss": 3.0006,
      "step": 208488
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.164475440979004,
      "learning_rate": 1.329304293658151e-05,
      "loss": 3.0431,
      "step": 208489
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8233208656311035,
      "learning_rate": 1.3291838802209898e-05,
      "loss": 2.9345,
      "step": 208490
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.7800724506378174,
      "learning_rate": 1.3290634721142746e-05,
      "loss": 3.0633,
      "step": 208491
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.6663033962249756,
      "learning_rate": 1.328943069338022e-05,
      "loss": 2.7783,
      "step": 208492
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.713682174682617,
      "learning_rate": 1.3288226718922623e-05,
      "loss": 3.0082,
      "step": 208493
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.827561378479004,
      "learning_rate": 1.3287022797770153e-05,
      "loss": 3.133,
      "step": 208494
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.445218086242676,
      "learning_rate": 1.3285818929923075e-05,
      "loss": 2.7078,
      "step": 208495
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0166175365448,
      "learning_rate": 1.3284615115381558e-05,
      "loss": 2.8304,
      "step": 208496
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8023746013641357,
      "learning_rate": 1.3283411354145835e-05,
      "loss": 2.9754,
      "step": 208497
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.787360906600952,
      "learning_rate": 1.3282207646216103e-05,
      "loss": 3.0624,
      "step": 208498
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.2089285850524902,
      "learning_rate": 1.3281003991592698e-05,
      "loss": 2.9754,
      "step": 208499
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.827094793319702,
      "learning_rate": 1.3279800390275685e-05,
      "loss": 2.9335,
      "step": 208500
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8216822147369385,
      "learning_rate": 1.3278596842265432e-05,
      "loss": 3.0406,
      "step": 208501
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8098676204681396,
      "learning_rate": 1.327739334756207e-05,
      "loss": 2.7147,
      "step": 208502
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.0029475688934326,
      "learning_rate": 1.3276189906165901e-05,
      "loss": 2.7475,
      "step": 208503
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9705374240875244,
      "learning_rate": 1.3274986518077024e-05,
      "loss": 2.9769,
      "step": 208504
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.319600820541382,
      "learning_rate": 1.3273783183295772e-05,
      "loss": 2.8806,
      "step": 208505
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.353209972381592,
      "learning_rate": 1.3272579901822311e-05,
      "loss": 2.9385,
      "step": 208506
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.9281933307647705,
      "learning_rate": 1.3271376673656942e-05,
      "loss": 3.0275,
      "step": 208507
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.391331434249878,
      "learning_rate": 1.327017349879983e-05,
      "loss": 2.8159,
      "step": 208508
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.857802152633667,
      "learning_rate": 1.326897037725121e-05,
      "loss": 2.8286,
      "step": 208509
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.743206024169922,
      "learning_rate": 1.3267767309011246e-05,
      "loss": 2.8698,
      "step": 208510
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.409921169281006,
      "learning_rate": 1.3266564294080274e-05,
      "loss": 2.9975,
      "step": 208511
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.436769485473633,
      "learning_rate": 1.326536133245839e-05,
      "loss": 2.8676,
      "step": 208512
    },
    {
      "epoch": 2.71,
      "grad_norm": 2.8740170001983643,
      "learning_rate": 1.3264158424145966e-05,
      "loss": 3.0149,
      "step": 208513
    },
    {
      "epoch": 2.71,
      "grad_norm": 3.210238456726074,
      "learning_rate": 1.326295556914313e-05,
      "loss": 3.1246,
      "step": 208514
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4826455116271973,
      "learning_rate": 1.3261752767450085e-05,
      "loss": 2.9726,
      "step": 208515
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.793788194656372,
      "learning_rate": 1.3260550019067128e-05,
      "loss": 2.8734,
      "step": 208516
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.077423095703125,
      "learning_rate": 1.325934732399443e-05,
      "loss": 2.7578,
      "step": 208517
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8370707035064697,
      "learning_rate": 1.325814468223222e-05,
      "loss": 2.6438,
      "step": 208518
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.14497447013855,
      "learning_rate": 1.3256942093780764e-05,
      "loss": 2.9265,
      "step": 208519
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.24263596534729,
      "learning_rate": 1.32557395586402e-05,
      "loss": 2.7086,
      "step": 208520
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.3319129943847656,
      "learning_rate": 1.325453707681089e-05,
      "loss": 2.8008,
      "step": 208521
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.743234634399414,
      "learning_rate": 1.3253334648292935e-05,
      "loss": 2.8557,
      "step": 208522
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.593437671661377,
      "learning_rate": 1.3252132273086569e-05,
      "loss": 2.9931,
      "step": 208523
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.097371816635132,
      "learning_rate": 1.325092995119209e-05,
      "loss": 2.9382,
      "step": 208524
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.5229079723358154,
      "learning_rate": 1.3249727682609668e-05,
      "loss": 2.7656,
      "step": 208525
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.868211030960083,
      "learning_rate": 1.3248525467339499e-05,
      "loss": 2.9495,
      "step": 208526
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6431849002838135,
      "learning_rate": 1.3247323305381885e-05,
      "loss": 2.8921,
      "step": 208527
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.810823678970337,
      "learning_rate": 1.3246121196736991e-05,
      "loss": 2.8817,
      "step": 208528
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8414323329925537,
      "learning_rate": 1.324491914140502e-05,
      "loss": 3.037,
      "step": 208529
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7524235248565674,
      "learning_rate": 1.3243717139386268e-05,
      "loss": 2.8062,
      "step": 208530
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2200028896331787,
      "learning_rate": 1.3242515190680935e-05,
      "loss": 2.8753,
      "step": 208531
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.673243999481201,
      "learning_rate": 1.324131329528919e-05,
      "loss": 2.7032,
      "step": 208532
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.4764621257781982,
      "learning_rate": 1.3240111453211333e-05,
      "loss": 3.0284,
      "step": 208533
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.84822940826416,
      "learning_rate": 1.3238909664447561e-05,
      "loss": 2.8296,
      "step": 208534
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0324268341064453,
      "learning_rate": 1.3237707928998043e-05,
      "loss": 2.7414,
      "step": 208535
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6222879886627197,
      "learning_rate": 1.323650624686311e-05,
      "loss": 2.7246,
      "step": 208536
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6919186115264893,
      "learning_rate": 1.323530461804283e-05,
      "loss": 2.9893,
      "step": 208537
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.246553897857666,
      "learning_rate": 1.3234103042537602e-05,
      "loss": 3.0952,
      "step": 208538
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.9287643432617188,
      "learning_rate": 1.323290152034756e-05,
      "loss": 3.0478,
      "step": 208539
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8218913078308105,
      "learning_rate": 1.3231700051472938e-05,
      "loss": 2.945,
      "step": 208540
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.898075103759766,
      "learning_rate": 1.32304986359139e-05,
      "loss": 2.8852,
      "step": 208541
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.109637498855591,
      "learning_rate": 1.3229297273670781e-05,
      "loss": 2.7865,
      "step": 208542
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4837708473205566,
      "learning_rate": 1.3228095964743713e-05,
      "loss": 2.9328,
      "step": 208543
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.727124214172363,
      "learning_rate": 1.3226894709132962e-05,
      "loss": 2.6262,
      "step": 208544
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.095952987670898,
      "learning_rate": 1.3225693506838797e-05,
      "loss": 3.0237,
      "step": 208545
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.271681070327759,
      "learning_rate": 1.3224492357861349e-05,
      "loss": 2.914,
      "step": 208546
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.216287851333618,
      "learning_rate": 1.3223291262200853e-05,
      "loss": 2.9918,
      "step": 208547
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.896280288696289,
      "learning_rate": 1.3222090219857606e-05,
      "loss": 2.8575,
      "step": 208548
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.930940628051758,
      "learning_rate": 1.3220889230831744e-05,
      "loss": 3.0924,
      "step": 208549
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.083808183670044,
      "learning_rate": 1.3219688295123565e-05,
      "loss": 3.1774,
      "step": 208550
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.761774778366089,
      "learning_rate": 1.3218487412733236e-05,
      "loss": 2.8399,
      "step": 208551
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7548086643218994,
      "learning_rate": 1.3217286583661057e-05,
      "loss": 2.7672,
      "step": 208552
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.901249885559082,
      "learning_rate": 1.321608580790713e-05,
      "loss": 2.7897,
      "step": 208553
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6023190021514893,
      "learning_rate": 1.3214885085471782e-05,
      "loss": 2.8939,
      "step": 208554
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.33981990814209,
      "learning_rate": 1.3213684416355152e-05,
      "loss": 2.8126,
      "step": 208555
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6910502910614014,
      "learning_rate": 1.3212483800557572e-05,
      "loss": 3.149,
      "step": 208556
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7463271617889404,
      "learning_rate": 1.3211283238079206e-05,
      "loss": 2.9705,
      "step": 208557
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0916552543640137,
      "learning_rate": 1.3210082728920257e-05,
      "loss": 2.9599,
      "step": 208558
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.654383897781372,
      "learning_rate": 1.3208882273080922e-05,
      "loss": 2.974,
      "step": 208559
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.027120351791382,
      "learning_rate": 1.3207681870561537e-05,
      "loss": 2.9817,
      "step": 208560
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.5748987197875977,
      "learning_rate": 1.32064815213622e-05,
      "loss": 3.2034,
      "step": 208561
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9211761951446533,
      "learning_rate": 1.3205281225483244e-05,
      "loss": 2.9948,
      "step": 208562
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.021576404571533,
      "learning_rate": 1.3204080982924836e-05,
      "loss": 3.2245,
      "step": 208563
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.533313274383545,
      "learning_rate": 1.320288079368721e-05,
      "loss": 3.0588,
      "step": 208564
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.4481167793273926,
      "learning_rate": 1.320168065777053e-05,
      "loss": 2.7554,
      "step": 208565
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7334132194519043,
      "learning_rate": 1.3200480575175099e-05,
      "loss": 2.8837,
      "step": 208566
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.818054437637329,
      "learning_rate": 1.319928054590108e-05,
      "loss": 2.9247,
      "step": 208567
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.972214698791504,
      "learning_rate": 1.3198080569948777e-05,
      "loss": 3.3311,
      "step": 208568
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.016176223754883,
      "learning_rate": 1.3196880647318387e-05,
      "loss": 3.1558,
      "step": 208569
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.578738212585449,
      "learning_rate": 1.3195680778010076e-05,
      "loss": 2.9449,
      "step": 208570
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8890604972839355,
      "learning_rate": 1.319448096202408e-05,
      "loss": 2.9131,
      "step": 208571
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8949520587921143,
      "learning_rate": 1.3193281199360662e-05,
      "loss": 3.029,
      "step": 208572
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.380641222000122,
      "learning_rate": 1.3192081490020024e-05,
      "loss": 2.9883,
      "step": 208573
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.908137798309326,
      "learning_rate": 1.3190881834002398e-05,
      "loss": 3.0541,
      "step": 208574
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.5552890300750732,
      "learning_rate": 1.3189682231308019e-05,
      "loss": 3.0377,
      "step": 208575
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8810296058654785,
      "learning_rate": 1.3188482681937085e-05,
      "loss": 2.8274,
      "step": 208576
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9749677181243896,
      "learning_rate": 1.3187283185889763e-05,
      "loss": 2.842,
      "step": 208577
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.946901559829712,
      "learning_rate": 1.318608374316642e-05,
      "loss": 2.9958,
      "step": 208578
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.817941427230835,
      "learning_rate": 1.3184884353767123e-05,
      "loss": 2.6411,
      "step": 208579
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.213914632797241,
      "learning_rate": 1.3183685017692236e-05,
      "loss": 2.7347,
      "step": 208580
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.515493154525757,
      "learning_rate": 1.3182485734941894e-05,
      "loss": 3.0071,
      "step": 208581
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.287764072418213,
      "learning_rate": 1.3181286505516365e-05,
      "loss": 2.752,
      "step": 208582
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.246257781982422,
      "learning_rate": 1.3180087329415811e-05,
      "loss": 3.0649,
      "step": 208583
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.4687201976776123,
      "learning_rate": 1.3178888206640503e-05,
      "loss": 2.7464,
      "step": 208584
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.5255467891693115,
      "learning_rate": 1.3177689137190639e-05,
      "loss": 3.0155,
      "step": 208585
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6866908073425293,
      "learning_rate": 1.3176490121066484e-05,
      "loss": 3.0009,
      "step": 208586
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.011725664138794,
      "learning_rate": 1.3175291158268209e-05,
      "loss": 2.7672,
      "step": 208587
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8919878005981445,
      "learning_rate": 1.3174092248796143e-05,
      "loss": 2.6625,
      "step": 208588
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0427825450897217,
      "learning_rate": 1.3172893392650318e-05,
      "loss": 2.9039,
      "step": 208589
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6264965534210205,
      "learning_rate": 1.3171694589831139e-05,
      "loss": 2.8416,
      "step": 208590
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.065746307373047,
      "learning_rate": 1.3170495840338702e-05,
      "loss": 3.0415,
      "step": 208591
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8024017810821533,
      "learning_rate": 1.316929714417334e-05,
      "loss": 2.9596,
      "step": 208592
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.4083328247070312,
      "learning_rate": 1.3168098501335155e-05,
      "loss": 2.7015,
      "step": 208593
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.809556007385254,
      "learning_rate": 1.3166899911824513e-05,
      "loss": 2.825,
      "step": 208594
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7598726749420166,
      "learning_rate": 1.3165701375641514e-05,
      "loss": 2.9823,
      "step": 208595
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.636768102645874,
      "learning_rate": 1.316450289278642e-05,
      "loss": 2.9284,
      "step": 208596
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4682159423828125,
      "learning_rate": 1.3163304463259438e-05,
      "loss": 2.674,
      "step": 208597
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0554940700531006,
      "learning_rate": 1.3162106087060864e-05,
      "loss": 2.808,
      "step": 208598
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1540896892547607,
      "learning_rate": 1.3160907764190832e-05,
      "loss": 2.912,
      "step": 208599
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1282761096954346,
      "learning_rate": 1.3159709494649606e-05,
      "loss": 3.1113,
      "step": 208600
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8650407791137695,
      "learning_rate": 1.3158511278437456e-05,
      "loss": 3.0556,
      "step": 208601
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.6961536407470703,
      "learning_rate": 1.3157313115554512e-05,
      "loss": 2.8639,
      "step": 208602
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.70051646232605,
      "learning_rate": 1.3156115006001012e-05,
      "loss": 2.7945,
      "step": 208603
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.575279712677002,
      "learning_rate": 1.315491694977725e-05,
      "loss": 3.0597,
      "step": 208604
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8691697120666504,
      "learning_rate": 1.3153718946883363e-05,
      "loss": 2.9676,
      "step": 208605
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2597203254699707,
      "learning_rate": 1.315252099731965e-05,
      "loss": 2.9478,
      "step": 208606
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9772884845733643,
      "learning_rate": 1.3151323101086276e-05,
      "loss": 3.1072,
      "step": 208607
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0388143062591553,
      "learning_rate": 1.3150125258183508e-05,
      "loss": 2.593,
      "step": 208608
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.7313404083251953,
      "learning_rate": 1.3148927468611514e-05,
      "loss": 2.8823,
      "step": 208609
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.938911199569702,
      "learning_rate": 1.3147729732370594e-05,
      "loss": 2.9136,
      "step": 208610
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.450495958328247,
      "learning_rate": 1.3146532049460879e-05,
      "loss": 3.1103,
      "step": 208611
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.900620698928833,
      "learning_rate": 1.3145334419882669e-05,
      "loss": 3.0475,
      "step": 208612
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.6155054569244385,
      "learning_rate": 1.3144136843636132e-05,
      "loss": 2.8828,
      "step": 208613
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.829141616821289,
      "learning_rate": 1.3142939320721501e-05,
      "loss": 2.8577,
      "step": 208614
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6434295177459717,
      "learning_rate": 1.3141741851139075e-05,
      "loss": 3.2216,
      "step": 208615
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.04024338722229,
      "learning_rate": 1.3140544434888988e-05,
      "loss": 2.9044,
      "step": 208616
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7219910621643066,
      "learning_rate": 1.313934707197144e-05,
      "loss": 2.8947,
      "step": 208617
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2666618824005127,
      "learning_rate": 1.3138149762386762e-05,
      "loss": 2.8205,
      "step": 208618
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1486501693725586,
      "learning_rate": 1.3136952506135124e-05,
      "loss": 2.8018,
      "step": 208619
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.8141121864318848,
      "learning_rate": 1.3135755303216688e-05,
      "loss": 3.0089,
      "step": 208620
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.95607328414917,
      "learning_rate": 1.3134558153631758e-05,
      "loss": 2.8268,
      "step": 208621
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8881895542144775,
      "learning_rate": 1.3133361057380497e-05,
      "loss": 3.0798,
      "step": 208622
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1904356479644775,
      "learning_rate": 1.3132164014463209e-05,
      "loss": 2.8862,
      "step": 208623
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.3124845027923584,
      "learning_rate": 1.3130967024880056e-05,
      "loss": 3.0472,
      "step": 208624
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9270241260528564,
      "learning_rate": 1.3129770088631275e-05,
      "loss": 2.9845,
      "step": 208625
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4782469272613525,
      "learning_rate": 1.3128573205717063e-05,
      "loss": 3.0969,
      "step": 208626
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0502970218658447,
      "learning_rate": 1.3127376376137687e-05,
      "loss": 3.0286,
      "step": 208627
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.6684184074401855,
      "learning_rate": 1.3126179599893283e-05,
      "loss": 2.696,
      "step": 208628
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.6419198513031006,
      "learning_rate": 1.3124982876984213e-05,
      "loss": 2.7416,
      "step": 208629
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.4955599308013916,
      "learning_rate": 1.3123786207410615e-05,
      "loss": 3.1301,
      "step": 208630
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9952335357666016,
      "learning_rate": 1.3122589591172717e-05,
      "loss": 2.7949,
      "step": 208631
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.153153419494629,
      "learning_rate": 1.3121393028270721e-05,
      "loss": 3.1158,
      "step": 208632
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.925535202026367,
      "learning_rate": 1.3120196518704895e-05,
      "loss": 2.8812,
      "step": 208633
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0987305641174316,
      "learning_rate": 1.3119000062475405e-05,
      "loss": 2.7306,
      "step": 208634
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.688552141189575,
      "learning_rate": 1.3117803659582548e-05,
      "loss": 2.8787,
      "step": 208635
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.3857085704803467,
      "learning_rate": 1.3116607310026495e-05,
      "loss": 2.9725,
      "step": 208636
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8517403602600098,
      "learning_rate": 1.3115411013807476e-05,
      "loss": 2.8894,
      "step": 208637
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8049135208129883,
      "learning_rate": 1.311421477092569e-05,
      "loss": 2.913,
      "step": 208638
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2381479740142822,
      "learning_rate": 1.311301858138144e-05,
      "loss": 2.9223,
      "step": 208639
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.942826986312866,
      "learning_rate": 1.3111822445174824e-05,
      "loss": 2.9864,
      "step": 208640
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.6664412021636963,
      "learning_rate": 1.311062636230621e-05,
      "loss": 2.644,
      "step": 208641
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.6010196208953857,
      "learning_rate": 1.3109430332775729e-05,
      "loss": 2.821,
      "step": 208642
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.626542806625366,
      "learning_rate": 1.3108234356583614e-05,
      "loss": 2.7149,
      "step": 208643
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.74128794670105,
      "learning_rate": 1.3107038433730065e-05,
      "loss": 3.3048,
      "step": 208644
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7872724533081055,
      "learning_rate": 1.3105842564215386e-05,
      "loss": 2.5725,
      "step": 208645
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.638430118560791,
      "learning_rate": 1.310464674803967e-05,
      "loss": 3.1866,
      "step": 208646
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.454589605331421,
      "learning_rate": 1.310345098520329e-05,
      "loss": 2.958,
      "step": 208647
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.8117640018463135,
      "learning_rate": 1.3102255275706375e-05,
      "loss": 2.8252,
      "step": 208648
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9108264446258545,
      "learning_rate": 1.310105961954916e-05,
      "loss": 2.8641,
      "step": 208649
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6829121112823486,
      "learning_rate": 1.3099864016731843e-05,
      "loss": 3.1575,
      "step": 208650
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.144886016845703,
      "learning_rate": 1.3098668467254725e-05,
      "loss": 2.9935,
      "step": 208651
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.5383572578430176,
      "learning_rate": 1.3097472971117939e-05,
      "loss": 2.9484,
      "step": 208652
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7318146228790283,
      "learning_rate": 1.3096277528321785e-05,
      "loss": 3.1206,
      "step": 208653
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6871676445007324,
      "learning_rate": 1.3095082138866397e-05,
      "loss": 2.9033,
      "step": 208654
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.607458591461182,
      "learning_rate": 1.309388680275214e-05,
      "loss": 2.9176,
      "step": 208655
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.910653591156006,
      "learning_rate": 1.3092691519979082e-05,
      "loss": 2.5353,
      "step": 208656
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.218418836593628,
      "learning_rate": 1.3091496290547521e-05,
      "loss": 2.9324,
      "step": 208657
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.671517848968506,
      "learning_rate": 1.3090301114457624e-05,
      "loss": 2.9638,
      "step": 208658
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8504819869995117,
      "learning_rate": 1.3089105991709726e-05,
      "loss": 3.1077,
      "step": 208659
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1229922771453857,
      "learning_rate": 1.3087910922303923e-05,
      "loss": 2.7565,
      "step": 208660
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.772937059402466,
      "learning_rate": 1.3086715906240553e-05,
      "loss": 3.2139,
      "step": 208661
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2702457904815674,
      "learning_rate": 1.3085520943519744e-05,
      "loss": 2.7205,
      "step": 208662
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.548344135284424,
      "learning_rate": 1.3084326034141768e-05,
      "loss": 3.1111,
      "step": 208663
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2957282066345215,
      "learning_rate": 1.3083131178106787e-05,
      "loss": 2.9473,
      "step": 208664
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.335864782333374,
      "learning_rate": 1.3081936375415103e-05,
      "loss": 3.023,
      "step": 208665
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6213600635528564,
      "learning_rate": 1.3080741626066882e-05,
      "loss": 2.7769,
      "step": 208666
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.309539556503296,
      "learning_rate": 1.3079546930062423e-05,
      "loss": 3.1537,
      "step": 208667
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6332225799560547,
      "learning_rate": 1.3078352287401828e-05,
      "loss": 2.8992,
      "step": 208668
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.542618989944458,
      "learning_rate": 1.3077157698085427e-05,
      "loss": 2.7846,
      "step": 208669
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.729006052017212,
      "learning_rate": 1.3075963162113357e-05,
      "loss": 3.0847,
      "step": 208670
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.6246135234832764,
      "learning_rate": 1.3074768679485913e-05,
      "loss": 2.9641,
      "step": 208671
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.5517215728759766,
      "learning_rate": 1.3073574250203233e-05,
      "loss": 3.0044,
      "step": 208672
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9168288707733154,
      "learning_rate": 1.3072379874265681e-05,
      "loss": 2.8359,
      "step": 208673
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7131831645965576,
      "learning_rate": 1.3071185551673324e-05,
      "loss": 3.0818,
      "step": 208674
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1672449111938477,
      "learning_rate": 1.3069991282426494e-05,
      "loss": 2.8441,
      "step": 208675
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2803335189819336,
      "learning_rate": 1.3068797066525295e-05,
      "loss": 2.9749,
      "step": 208676
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.362034320831299,
      "learning_rate": 1.3067602903970087e-05,
      "loss": 3.1314,
      "step": 208677
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.801574468612671,
      "learning_rate": 1.3066408794761007e-05,
      "loss": 3.1111,
      "step": 208678
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.192080974578857,
      "learning_rate": 1.3065214738898354e-05,
      "loss": 2.9021,
      "step": 208679
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0565361976623535,
      "learning_rate": 1.3064020736382196e-05,
      "loss": 3.1102,
      "step": 208680
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.3049850463867188,
      "learning_rate": 1.3062826787212932e-05,
      "loss": 2.9017,
      "step": 208681
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.205421209335327,
      "learning_rate": 1.3061632891390627e-05,
      "loss": 2.9645,
      "step": 208682
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4610729217529297,
      "learning_rate": 1.3060439048915649e-05,
      "loss": 3.0446,
      "step": 208683
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0447819232940674,
      "learning_rate": 1.3059245259788098e-05,
      "loss": 2.866,
      "step": 208684
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6521549224853516,
      "learning_rate": 1.3058051524008305e-05,
      "loss": 2.6561,
      "step": 208685
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1755475997924805,
      "learning_rate": 1.305685784157644e-05,
      "loss": 2.8887,
      "step": 208686
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.173964500427246,
      "learning_rate": 1.30556642124927e-05,
      "loss": 2.9664,
      "step": 208687
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.354224443435669,
      "learning_rate": 1.3054470636757285e-05,
      "loss": 2.5373,
      "step": 208688
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.26432466506958,
      "learning_rate": 1.305327711437053e-05,
      "loss": 3.0131,
      "step": 208689
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4776546955108643,
      "learning_rate": 1.30520836453325e-05,
      "loss": 2.9154,
      "step": 208690
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.8745839595794678,
      "learning_rate": 1.3050890229643595e-05,
      "loss": 3.0005,
      "step": 208691
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.184821605682373,
      "learning_rate": 1.3049696867303917e-05,
      "loss": 2.5826,
      "step": 208692
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.082557201385498,
      "learning_rate": 1.3048503558313728e-05,
      "loss": 3.0369,
      "step": 208693
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.899705410003662,
      "learning_rate": 1.3047310302673197e-05,
      "loss": 2.9504,
      "step": 208694
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1342804431915283,
      "learning_rate": 1.3046117100382625e-05,
      "loss": 2.7986,
      "step": 208695
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6266748905181885,
      "learning_rate": 1.3044923951442144e-05,
      "loss": 3.0593,
      "step": 208696
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9311718940734863,
      "learning_rate": 1.3043730855852085e-05,
      "loss": 2.8891,
      "step": 208697
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.520297050476074,
      "learning_rate": 1.3042537813612619e-05,
      "loss": 2.7714,
      "step": 208698
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8640542030334473,
      "learning_rate": 1.3041344824723909e-05,
      "loss": 2.9856,
      "step": 208699
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.112513303756714,
      "learning_rate": 1.304015188918629e-05,
      "loss": 2.9594,
      "step": 208700
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.704671859741211,
      "learning_rate": 1.3038959006999893e-05,
      "loss": 3.0991,
      "step": 208701
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.738429546356201,
      "learning_rate": 1.3037766178164955e-05,
      "loss": 2.8826,
      "step": 208702
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.1094560623168945,
      "learning_rate": 1.3036573402681738e-05,
      "loss": 2.7624,
      "step": 208703
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.5122103691101074,
      "learning_rate": 1.3035380680550445e-05,
      "loss": 3.0896,
      "step": 208704
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1123859882354736,
      "learning_rate": 1.303418801177124e-05,
      "loss": 3.0234,
      "step": 208705
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.21087908744812,
      "learning_rate": 1.303299539634446e-05,
      "loss": 3.0412,
      "step": 208706
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.030996561050415,
      "learning_rate": 1.3031802834270233e-05,
      "loss": 2.8852,
      "step": 208707
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.742223024368286,
      "learning_rate": 1.3030610325548796e-05,
      "loss": 3.0008,
      "step": 208708
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0616934299468994,
      "learning_rate": 1.3029417870180414e-05,
      "loss": 2.9705,
      "step": 208709
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.01717209815979,
      "learning_rate": 1.3028225468165288e-05,
      "loss": 3.0293,
      "step": 208710
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8416595458984375,
      "learning_rate": 1.3027033119503583e-05,
      "loss": 2.8636,
      "step": 208711
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1059861183166504,
      "learning_rate": 1.30258408241956e-05,
      "loss": 2.989,
      "step": 208712
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9969890117645264,
      "learning_rate": 1.3024648582241504e-05,
      "loss": 2.8703,
      "step": 208713
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9255316257476807,
      "learning_rate": 1.3023456393641596e-05,
      "loss": 2.9793,
      "step": 208714
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.053858518600464,
      "learning_rate": 1.3022264258396043e-05,
      "loss": 3.0005,
      "step": 208715
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2844460010528564,
      "learning_rate": 1.3021072176505042e-05,
      "loss": 2.9754,
      "step": 208716
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.083897352218628,
      "learning_rate": 1.301988014796883e-05,
      "loss": 2.8535,
      "step": 208717
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.921454668045044,
      "learning_rate": 1.3018688172787638e-05,
      "loss": 2.7975,
      "step": 208718
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.195446014404297,
      "learning_rate": 1.3017496250961701e-05,
      "loss": 3.1096,
      "step": 208719
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.904146909713745,
      "learning_rate": 1.3016304382491216e-05,
      "loss": 2.7916,
      "step": 208720
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.9833285808563232,
      "learning_rate": 1.3015112567376419e-05,
      "loss": 2.7424,
      "step": 208721
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1481893062591553,
      "learning_rate": 1.3013920805617606e-05,
      "loss": 2.8231,
      "step": 208722
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0974223613739014,
      "learning_rate": 1.3012729097214814e-05,
      "loss": 3.1141,
      "step": 208723
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.216738224029541,
      "learning_rate": 1.3011537442168441e-05,
      "loss": 2.8701,
      "step": 208724
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4519715309143066,
      "learning_rate": 1.3010345840478587e-05,
      "loss": 2.8899,
      "step": 208725
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7811203002929688,
      "learning_rate": 1.3009154292145585e-05,
      "loss": 2.8901,
      "step": 208726
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.130079746246338,
      "learning_rate": 1.3007962797169535e-05,
      "loss": 2.9396,
      "step": 208727
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0578742027282715,
      "learning_rate": 1.3006771355550805e-05,
      "loss": 3.0025,
      "step": 208728
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9091808795928955,
      "learning_rate": 1.300557996728946e-05,
      "loss": 3.0057,
      "step": 208729
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.289585590362549,
      "learning_rate": 1.3004388632385831e-05,
      "loss": 3.1087,
      "step": 208730
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1402275562286377,
      "learning_rate": 1.300319735084009e-05,
      "loss": 2.9344,
      "step": 208731
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7468936443328857,
      "learning_rate": 1.3002006122652465e-05,
      "loss": 2.9082,
      "step": 208732
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9676520824432373,
      "learning_rate": 1.3000814947823191e-05,
      "loss": 2.9306,
      "step": 208733
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7506446838378906,
      "learning_rate": 1.2999623826352534e-05,
      "loss": 2.9339,
      "step": 208734
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6278693675994873,
      "learning_rate": 1.2998432758240596e-05,
      "loss": 2.7123,
      "step": 208735
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.3309824466705322,
      "learning_rate": 1.2997241743487708e-05,
      "loss": 3.106,
      "step": 208736
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.264040231704712,
      "learning_rate": 1.2996050782094003e-05,
      "loss": 2.8863,
      "step": 208737
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.5960333347320557,
      "learning_rate": 1.2994859874059783e-05,
      "loss": 2.8078,
      "step": 208738
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.256941556930542,
      "learning_rate": 1.299366901938521e-05,
      "loss": 2.7804,
      "step": 208739
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.5847795009613037,
      "learning_rate": 1.299247821807059e-05,
      "loss": 2.9921,
      "step": 208740
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7859530448913574,
      "learning_rate": 1.2991287470116018e-05,
      "loss": 3.149,
      "step": 208741
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8293871879577637,
      "learning_rate": 1.299009677552183e-05,
      "loss": 3.017,
      "step": 208742
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.379676342010498,
      "learning_rate": 1.2988906134288158e-05,
      "loss": 2.6872,
      "step": 208743
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0221073627471924,
      "learning_rate": 1.29877155464153e-05,
      "loss": 2.9756,
      "step": 208744
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.399829149246216,
      "learning_rate": 1.2986525011903425e-05,
      "loss": 2.9014,
      "step": 208745
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.0056681632995605,
      "learning_rate": 1.2985334530752834e-05,
      "loss": 3.1512,
      "step": 208746
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.141270160675049,
      "learning_rate": 1.2984144102963623e-05,
      "loss": 2.8896,
      "step": 208747
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6225361824035645,
      "learning_rate": 1.2982953728536095e-05,
      "loss": 2.9454,
      "step": 208748
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.6926753520965576,
      "learning_rate": 1.2981763407470414e-05,
      "loss": 3.065,
      "step": 208749
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8090736865997314,
      "learning_rate": 1.2980573139766915e-05,
      "loss": 3.0574,
      "step": 208750
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8028833866119385,
      "learning_rate": 1.2979382925425662e-05,
      "loss": 2.9204,
      "step": 208751
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6437594890594482,
      "learning_rate": 1.2978192764447059e-05,
      "loss": 2.7995,
      "step": 208752
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.012260913848877,
      "learning_rate": 1.297700265683117e-05,
      "loss": 2.7063,
      "step": 208753
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9546215534210205,
      "learning_rate": 1.2975812602578295e-05,
      "loss": 3.0951,
      "step": 208754
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.091308116912842,
      "learning_rate": 1.2974622601688566e-05,
      "loss": 3.0684,
      "step": 208755
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7553329467773438,
      "learning_rate": 1.2973432654162352e-05,
      "loss": 2.6063,
      "step": 208756
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.803509473800659,
      "learning_rate": 1.2972242759999752e-05,
      "loss": 3.0119,
      "step": 208757
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2483251094818115,
      "learning_rate": 1.2971052919201097e-05,
      "loss": 2.8486,
      "step": 208758
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.240556001663208,
      "learning_rate": 1.2969863131766456e-05,
      "loss": 2.8199,
      "step": 208759
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2176101207733154,
      "learning_rate": 1.2968673397696195e-05,
      "loss": 2.8448,
      "step": 208760
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.694807291030884,
      "learning_rate": 1.2967483716990412e-05,
      "loss": 2.8985,
      "step": 208761
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6882078647613525,
      "learning_rate": 1.2966294089649442e-05,
      "loss": 2.9063,
      "step": 208762
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.714735269546509,
      "learning_rate": 1.296510451567342e-05,
      "loss": 3.0906,
      "step": 208763
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.296889305114746,
      "learning_rate": 1.2963914995062674e-05,
      "loss": 2.8996,
      "step": 208764
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9554882049560547,
      "learning_rate": 1.2962725527817308e-05,
      "loss": 3.1807,
      "step": 208765
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.3200478553771973,
      "learning_rate": 1.2961536113937588e-05,
      "loss": 3.0795,
      "step": 208766
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2979743480682373,
      "learning_rate": 1.2960346753423712e-05,
      "loss": 2.9399,
      "step": 208767
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7896816730499268,
      "learning_rate": 1.295915744627598e-05,
      "loss": 2.7757,
      "step": 208768
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.134124517440796,
      "learning_rate": 1.2957968192494495e-05,
      "loss": 2.7428,
      "step": 208769
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6535322666168213,
      "learning_rate": 1.2956778992079619e-05,
      "loss": 2.8202,
      "step": 208770
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.729510545730591,
      "learning_rate": 1.2955589845031422e-05,
      "loss": 2.8024,
      "step": 208771
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4693338871002197,
      "learning_rate": 1.2954400751350268e-05,
      "loss": 2.5325,
      "step": 208772
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.855154514312744,
      "learning_rate": 1.295321171103626e-05,
      "loss": 2.7829,
      "step": 208773
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.928335666656494,
      "learning_rate": 1.2952022724089695e-05,
      "loss": 3.0404,
      "step": 208774
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8540496826171875,
      "learning_rate": 1.2950833790510706e-05,
      "loss": 2.9838,
      "step": 208775
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.882709503173828,
      "learning_rate": 1.2949644910299662e-05,
      "loss": 2.9675,
      "step": 208776
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.907402276992798,
      "learning_rate": 1.294845608345666e-05,
      "loss": 2.9924,
      "step": 208777
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8295083045959473,
      "learning_rate": 1.2947267309981968e-05,
      "loss": 3.0744,
      "step": 208778
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.5291502475738525,
      "learning_rate": 1.2946078589875753e-05,
      "loss": 3.1953,
      "step": 208779
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.886228084564209,
      "learning_rate": 1.2944889923138346e-05,
      "loss": 2.9117,
      "step": 208780
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0971839427948,
      "learning_rate": 1.294370130976985e-05,
      "loss": 2.9006,
      "step": 208781
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.5520873069763184,
      "learning_rate": 1.294251274977056e-05,
      "loss": 2.9165,
      "step": 208782
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.258006572723389,
      "learning_rate": 1.2941324243140682e-05,
      "loss": 3.0381,
      "step": 208783
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7308685779571533,
      "learning_rate": 1.2940135789880413e-05,
      "loss": 2.9844,
      "step": 208784
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.5654757022857666,
      "learning_rate": 1.2938947389990018e-05,
      "loss": 2.9667,
      "step": 208785
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8796727657318115,
      "learning_rate": 1.2937759043469698e-05,
      "loss": 3.0158,
      "step": 208786
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.93681263923645,
      "learning_rate": 1.2936570750319618e-05,
      "loss": 2.9353,
      "step": 208787
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8812179565429688,
      "learning_rate": 1.2935382510540082e-05,
      "loss": 2.8415,
      "step": 208788
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7234959602355957,
      "learning_rate": 1.2934194324131287e-05,
      "loss": 2.8657,
      "step": 208789
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2186338901519775,
      "learning_rate": 1.2933006191093399e-05,
      "loss": 2.7374,
      "step": 208790
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.977043628692627,
      "learning_rate": 1.2931818111426718e-05,
      "loss": 2.9833,
      "step": 208791
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1141250133514404,
      "learning_rate": 1.2930630085131444e-05,
      "loss": 2.7509,
      "step": 208792
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.14720344543457,
      "learning_rate": 1.2929442112207744e-05,
      "loss": 2.6467,
      "step": 208793
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.006688356399536,
      "learning_rate": 1.2928254192655918e-05,
      "loss": 3.1372,
      "step": 208794
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.3594138622283936,
      "learning_rate": 1.2927066326476133e-05,
      "loss": 2.9533,
      "step": 208795
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1332595348358154,
      "learning_rate": 1.2925878513668619e-05,
      "loss": 3.0133,
      "step": 208796
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9529922008514404,
      "learning_rate": 1.2924690754233613e-05,
      "loss": 2.9872,
      "step": 208797
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.5582735538482666,
      "learning_rate": 1.2923503048171313e-05,
      "loss": 3.0536,
      "step": 208798
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.249816656112671,
      "learning_rate": 1.2922315395481952e-05,
      "loss": 2.7941,
      "step": 208799
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.864391565322876,
      "learning_rate": 1.2921127796165798e-05,
      "loss": 2.8713,
      "step": 208800
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.853392124176025,
      "learning_rate": 1.2919940250222981e-05,
      "loss": 2.7016,
      "step": 208801
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7627925872802734,
      "learning_rate": 1.2918752757653739e-05,
      "loss": 2.8787,
      "step": 208802
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2221343517303467,
      "learning_rate": 1.2917565318458366e-05,
      "loss": 2.7047,
      "step": 208803
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0724310874938965,
      "learning_rate": 1.2916377932637001e-05,
      "loss": 3.0191,
      "step": 208804
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9961538314819336,
      "learning_rate": 1.2915190600189939e-05,
      "loss": 2.901,
      "step": 208805
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.4657416343688965,
      "learning_rate": 1.291400332111735e-05,
      "loss": 2.9925,
      "step": 208806
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.104015350341797,
      "learning_rate": 1.2912816095419498e-05,
      "loss": 2.9299,
      "step": 208807
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8335378170013428,
      "learning_rate": 1.2911628923096518e-05,
      "loss": 2.9814,
      "step": 208808
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.663029193878174,
      "learning_rate": 1.2910441804148707e-05,
      "loss": 2.8989,
      "step": 208809
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9320383071899414,
      "learning_rate": 1.290925473857627e-05,
      "loss": 3.0714,
      "step": 208810
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.867093086242676,
      "learning_rate": 1.2908067726379401e-05,
      "loss": 2.7268,
      "step": 208811
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.924516201019287,
      "learning_rate": 1.2906880767558336e-05,
      "loss": 3.0346,
      "step": 208812
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.833415985107422,
      "learning_rate": 1.2905693862113376e-05,
      "loss": 2.933,
      "step": 208813
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.328399419784546,
      "learning_rate": 1.2904507010044585e-05,
      "loss": 2.9422,
      "step": 208814
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0313520431518555,
      "learning_rate": 1.290332021135233e-05,
      "loss": 3.0023,
      "step": 208815
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.6108779907226562,
      "learning_rate": 1.2902133466036679e-05,
      "loss": 2.915,
      "step": 208816
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.88828444480896,
      "learning_rate": 1.2900946774098031e-05,
      "loss": 2.9135,
      "step": 208817
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0051960945129395,
      "learning_rate": 1.2899760135536453e-05,
      "loss": 2.8566,
      "step": 208818
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.172947406768799,
      "learning_rate": 1.2898573550352276e-05,
      "loss": 3.1143,
      "step": 208819
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8094308376312256,
      "learning_rate": 1.2897387018545635e-05,
      "loss": 2.791,
      "step": 208820
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9835691452026367,
      "learning_rate": 1.289620054011683e-05,
      "loss": 2.8595,
      "step": 208821
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.7480554580688477,
      "learning_rate": 1.2895014115065993e-05,
      "loss": 2.814,
      "step": 208822
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.092357635498047,
      "learning_rate": 1.2893827743393425e-05,
      "loss": 2.7521,
      "step": 208823
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7114555835723877,
      "learning_rate": 1.2892641425099293e-05,
      "loss": 2.8815,
      "step": 208824
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8676509857177734,
      "learning_rate": 1.2891455160183895e-05,
      "loss": 3.2012,
      "step": 208825
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8964626789093018,
      "learning_rate": 1.2890268948647331e-05,
      "loss": 3.1515,
      "step": 208826
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.360832929611206,
      "learning_rate": 1.2889082790489902e-05,
      "loss": 2.8488,
      "step": 208827
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.912449598312378,
      "learning_rate": 1.2887896685711807e-05,
      "loss": 2.8,
      "step": 208828
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.580623149871826,
      "learning_rate": 1.288671063431328e-05,
      "loss": 2.9224,
      "step": 208829
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.283172369003296,
      "learning_rate": 1.2885524636294519e-05,
      "loss": 3.0254,
      "step": 208830
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.990437030792236,
      "learning_rate": 1.2884338691655794e-05,
      "loss": 2.7929,
      "step": 208831
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.016942262649536,
      "learning_rate": 1.2883152800397267e-05,
      "loss": 3.0105,
      "step": 208832
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1700358390808105,
      "learning_rate": 1.2881966962519174e-05,
      "loss": 2.6919,
      "step": 208833
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.952995777130127,
      "learning_rate": 1.2880781178021748e-05,
      "loss": 2.8769,
      "step": 208834
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4057998657226562,
      "learning_rate": 1.2879595446905223e-05,
      "loss": 2.8755,
      "step": 208835
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.9077253341674805,
      "learning_rate": 1.2878409769169762e-05,
      "loss": 2.9819,
      "step": 208836
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9221339225769043,
      "learning_rate": 1.2877224144815702e-05,
      "loss": 3.0361,
      "step": 208837
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.5706913471221924,
      "learning_rate": 1.2876038573843106e-05,
      "loss": 2.8668,
      "step": 208838
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9951980113983154,
      "learning_rate": 1.287485305625231e-05,
      "loss": 3.1088,
      "step": 208839
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.14609432220459,
      "learning_rate": 1.287366759204348e-05,
      "loss": 2.9122,
      "step": 208840
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.594503879547119,
      "learning_rate": 1.2872482181216848e-05,
      "loss": 3.1505,
      "step": 208841
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.489123582839966,
      "learning_rate": 1.2871296823772647e-05,
      "loss": 2.9009,
      "step": 208842
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.771153211593628,
      "learning_rate": 1.2870111519711147e-05,
      "loss": 2.7726,
      "step": 208843
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.188451051712036,
      "learning_rate": 1.2868926269032442e-05,
      "loss": 2.6578,
      "step": 208844
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9123170375823975,
      "learning_rate": 1.286774107173687e-05,
      "loss": 2.6077,
      "step": 208845
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.025789737701416,
      "learning_rate": 1.2866555927824563e-05,
      "loss": 2.9762,
      "step": 208846
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8826370239257812,
      "learning_rate": 1.2865370837295819e-05,
      "loss": 2.8874,
      "step": 208847
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.099414825439453,
      "learning_rate": 1.2864185800150805e-05,
      "loss": 2.8588,
      "step": 208848
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.898533582687378,
      "learning_rate": 1.2863000816389791e-05,
      "loss": 2.8405,
      "step": 208849
    },
    {
      "epoch": 2.72,
      "grad_norm": 5.619033336639404,
      "learning_rate": 1.2861815886012938e-05,
      "loss": 2.8385,
      "step": 208850
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1912119388580322,
      "learning_rate": 1.2860631009020484e-05,
      "loss": 2.67,
      "step": 208851
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7398836612701416,
      "learning_rate": 1.2859446185412658e-05,
      "loss": 2.7432,
      "step": 208852
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8264477252960205,
      "learning_rate": 1.2858261415189696e-05,
      "loss": 2.6163,
      "step": 208853
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.3253490924835205,
      "learning_rate": 1.2857076698351797e-05,
      "loss": 2.9696,
      "step": 208854
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2457563877105713,
      "learning_rate": 1.2855892034899195e-05,
      "loss": 2.9216,
      "step": 208855
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.893960952758789,
      "learning_rate": 1.2854707424832122e-05,
      "loss": 2.9854,
      "step": 208856
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.364837169647217,
      "learning_rate": 1.2853522868150746e-05,
      "loss": 2.8987,
      "step": 208857
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.850174903869629,
      "learning_rate": 1.285233836485533e-05,
      "loss": 2.9263,
      "step": 208858
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.335693836212158,
      "learning_rate": 1.2851153914946077e-05,
      "loss": 2.8584,
      "step": 208859
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.903733491897583,
      "learning_rate": 1.284996951842322e-05,
      "loss": 3.1158,
      "step": 208860
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.102733850479126,
      "learning_rate": 1.284878517528699e-05,
      "loss": 2.9222,
      "step": 208861
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.028149127960205,
      "learning_rate": 1.284760088553759e-05,
      "loss": 2.996,
      "step": 208862
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.16379714012146,
      "learning_rate": 1.2846416649175217e-05,
      "loss": 2.7497,
      "step": 208863
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0157008171081543,
      "learning_rate": 1.2845232466200107e-05,
      "loss": 2.9689,
      "step": 208864
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.99832820892334,
      "learning_rate": 1.2844048336612522e-05,
      "loss": 2.8237,
      "step": 208865
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0499424934387207,
      "learning_rate": 1.2842864260412633e-05,
      "loss": 2.8324,
      "step": 208866
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.173588991165161,
      "learning_rate": 1.2841680237600671e-05,
      "loss": 2.9291,
      "step": 208867
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9554951190948486,
      "learning_rate": 1.2840496268176903e-05,
      "loss": 2.9147,
      "step": 208868
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8105127811431885,
      "learning_rate": 1.2839312352141461e-05,
      "loss": 3.0844,
      "step": 208869
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9188432693481445,
      "learning_rate": 1.2838128489494614e-05,
      "loss": 2.9281,
      "step": 208870
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7597954273223877,
      "learning_rate": 1.2836944680236594e-05,
      "loss": 2.8809,
      "step": 208871
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0506057739257812,
      "learning_rate": 1.28357609243676e-05,
      "loss": 2.9524,
      "step": 208872
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.8854472637176514,
      "learning_rate": 1.2834577221887864e-05,
      "loss": 2.6944,
      "step": 208873
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1533567905426025,
      "learning_rate": 1.2833393572797624e-05,
      "loss": 2.8081,
      "step": 208874
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.5033864974975586,
      "learning_rate": 1.2832209977097008e-05,
      "loss": 2.8496,
      "step": 208875
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.801835536956787,
      "learning_rate": 1.2831026434786351e-05,
      "loss": 2.7704,
      "step": 208876
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.400744438171387,
      "learning_rate": 1.2829842945865853e-05,
      "loss": 3.062,
      "step": 208877
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.64276123046875,
      "learning_rate": 1.2828659510335647e-05,
      "loss": 3.0033,
      "step": 208878
    },
    {
      "epoch": 2.72,
      "grad_norm": 5.008631706237793,
      "learning_rate": 1.2827476128196068e-05,
      "loss": 3.1203,
      "step": 208879
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.7033395767211914,
      "learning_rate": 1.282629279944728e-05,
      "loss": 2.8789,
      "step": 208880
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4595024585723877,
      "learning_rate": 1.2825109524089449e-05,
      "loss": 2.914,
      "step": 208881
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1763994693756104,
      "learning_rate": 1.282392630212291e-05,
      "loss": 2.9012,
      "step": 208882
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8223185539245605,
      "learning_rate": 1.2822743133547764e-05,
      "loss": 2.8337,
      "step": 208883
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.5557241439819336,
      "learning_rate": 1.282156001836434e-05,
      "loss": 3.1174,
      "step": 208884
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.800804615020752,
      "learning_rate": 1.282037695657281e-05,
      "loss": 3.1699,
      "step": 208885
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.6810548305511475,
      "learning_rate": 1.2819193948173401e-05,
      "loss": 2.622,
      "step": 208886
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0731992721557617,
      "learning_rate": 1.2818010993166284e-05,
      "loss": 3.3189,
      "step": 208887
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.025010347366333,
      "learning_rate": 1.2816828091551723e-05,
      "loss": 2.9392,
      "step": 208888
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7976186275482178,
      "learning_rate": 1.2815645243329953e-05,
      "loss": 2.719,
      "step": 208889
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9570109844207764,
      "learning_rate": 1.2814462448501172e-05,
      "loss": 2.9345,
      "step": 208890
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.906989097595215,
      "learning_rate": 1.2813279707065615e-05,
      "loss": 2.6375,
      "step": 208891
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.911242961883545,
      "learning_rate": 1.2812097019023481e-05,
      "loss": 2.9398,
      "step": 208892
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2480876445770264,
      "learning_rate": 1.2810914384375004e-05,
      "loss": 3.2765,
      "step": 208893
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8315229415893555,
      "learning_rate": 1.2809731803120383e-05,
      "loss": 2.8544,
      "step": 208894
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.5542166233062744,
      "learning_rate": 1.280854927525985e-05,
      "loss": 2.7559,
      "step": 208895
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2243778705596924,
      "learning_rate": 1.280736680079364e-05,
      "loss": 2.7628,
      "step": 208896
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.5084457397460938,
      "learning_rate": 1.280618437972195e-05,
      "loss": 2.7927,
      "step": 208897
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8594613075256348,
      "learning_rate": 1.2805002012045085e-05,
      "loss": 2.595,
      "step": 208898
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2434604167938232,
      "learning_rate": 1.2803819697763107e-05,
      "loss": 2.9316,
      "step": 208899
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7725486755371094,
      "learning_rate": 1.2802637436876351e-05,
      "loss": 2.5967,
      "step": 208900
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9920780658721924,
      "learning_rate": 1.2801455229384982e-05,
      "loss": 2.972,
      "step": 208901
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7017199993133545,
      "learning_rate": 1.2800273075289268e-05,
      "loss": 2.9642,
      "step": 208902
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8847248554229736,
      "learning_rate": 1.2799090974589377e-05,
      "loss": 2.9681,
      "step": 208903
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.3568878173828125,
      "learning_rate": 1.2797908927285638e-05,
      "loss": 2.8296,
      "step": 208904
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2501296997070312,
      "learning_rate": 1.2796726933378087e-05,
      "loss": 2.8008,
      "step": 208905
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8126444816589355,
      "learning_rate": 1.2795544992867123e-05,
      "loss": 3.028,
      "step": 208906
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.190958261489868,
      "learning_rate": 1.2794363105752813e-05,
      "loss": 2.932,
      "step": 208907
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.262302875518799,
      "learning_rate": 1.2793181272035525e-05,
      "loss": 3.0398,
      "step": 208908
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9926912784576416,
      "learning_rate": 1.2791999491715355e-05,
      "loss": 2.835,
      "step": 208909
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.6828885078430176,
      "learning_rate": 1.2790817764792637e-05,
      "loss": 2.864,
      "step": 208910
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.889993190765381,
      "learning_rate": 1.2789636091267441e-05,
      "loss": 3.0817,
      "step": 208911
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.7713496685028076,
      "learning_rate": 1.278845447114013e-05,
      "loss": 2.5244,
      "step": 208912
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.5747148990631104,
      "learning_rate": 1.278727290441084e-05,
      "loss": 2.8141,
      "step": 208913
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.485560178756714,
      "learning_rate": 1.2786091391079833e-05,
      "loss": 3.0276,
      "step": 208914
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0706779956817627,
      "learning_rate": 1.278490993114728e-05,
      "loss": 3.1291,
      "step": 208915
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.127476930618286,
      "learning_rate": 1.2783728524613513e-05,
      "loss": 2.685,
      "step": 208916
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.53348445892334,
      "learning_rate": 1.278254717147863e-05,
      "loss": 2.9131,
      "step": 208917
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.873556613922119,
      "learning_rate": 1.27813658717429e-05,
      "loss": 2.9593,
      "step": 208918
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1593286991119385,
      "learning_rate": 1.2780184625406487e-05,
      "loss": 2.8712,
      "step": 208919
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.646831750869751,
      "learning_rate": 1.2779003432469692e-05,
      "loss": 2.7671,
      "step": 208920
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.916560649871826,
      "learning_rate": 1.2777822292932682e-05,
      "loss": 2.9152,
      "step": 208921
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2872910499572754,
      "learning_rate": 1.2776641206795757e-05,
      "loss": 2.9703,
      "step": 208922
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1400063037872314,
      "learning_rate": 1.2775460174059049e-05,
      "loss": 3.0151,
      "step": 208923
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.9367330074310303,
      "learning_rate": 1.2774279194722791e-05,
      "loss": 2.9984,
      "step": 208924
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.155776262283325,
      "learning_rate": 1.2773098268787186e-05,
      "loss": 2.8616,
      "step": 208925
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9924044609069824,
      "learning_rate": 1.277191739625253e-05,
      "loss": 2.8729,
      "step": 208926
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.340724468231201,
      "learning_rate": 1.2770736577118956e-05,
      "loss": 2.9976,
      "step": 208927
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6558997631073,
      "learning_rate": 1.2769555811386768e-05,
      "loss": 2.8014,
      "step": 208928
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2829339504241943,
      "learning_rate": 1.276837509905616e-05,
      "loss": 2.8059,
      "step": 208929
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.645188331604004,
      "learning_rate": 1.2767194440127304e-05,
      "loss": 3.0516,
      "step": 208930
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0049386024475098,
      "learning_rate": 1.276601383460043e-05,
      "loss": 3.1815,
      "step": 208931
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.8690826892852783,
      "learning_rate": 1.2764833282475773e-05,
      "loss": 2.825,
      "step": 208932
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9358503818511963,
      "learning_rate": 1.2763652783753564e-05,
      "loss": 2.8986,
      "step": 208933
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.288632869720459,
      "learning_rate": 1.2762472338434037e-05,
      "loss": 2.8945,
      "step": 208934
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0856335163116455,
      "learning_rate": 1.2761291946517393e-05,
      "loss": 2.6254,
      "step": 208935
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6227288246154785,
      "learning_rate": 1.2760111608003831e-05,
      "loss": 2.8662,
      "step": 208936
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8313379287719727,
      "learning_rate": 1.2758931322893551e-05,
      "loss": 3.1502,
      "step": 208937
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.166863441467285,
      "learning_rate": 1.2757751091186852e-05,
      "loss": 2.7725,
      "step": 208938
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0327343940734863,
      "learning_rate": 1.2756570912883867e-05,
      "loss": 2.8469,
      "step": 208939
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.507925271987915,
      "learning_rate": 1.2755390787984899e-05,
      "loss": 2.7913,
      "step": 208940
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.958750009536743,
      "learning_rate": 1.2754210716490143e-05,
      "loss": 2.886,
      "step": 208941
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0600664615631104,
      "learning_rate": 1.2753030698399768e-05,
      "loss": 2.9629,
      "step": 208942
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.338660955429077,
      "learning_rate": 1.2751850733714009e-05,
      "loss": 2.7675,
      "step": 208943
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0576868057250977,
      "learning_rate": 1.275067082243313e-05,
      "loss": 2.7746,
      "step": 208944
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6735808849334717,
      "learning_rate": 1.2749490964557296e-05,
      "loss": 2.821,
      "step": 208945
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.359400510787964,
      "learning_rate": 1.2748311160086811e-05,
      "loss": 3.2274,
      "step": 208946
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.152003288269043,
      "learning_rate": 1.2747131409021805e-05,
      "loss": 2.926,
      "step": 208947
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2255449295043945,
      "learning_rate": 1.2745951711362546e-05,
      "loss": 2.9453,
      "step": 208948
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.032687187194824,
      "learning_rate": 1.27447720671092e-05,
      "loss": 3.111,
      "step": 208949
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.252138376235962,
      "learning_rate": 1.2743592476262033e-05,
      "loss": 2.8601,
      "step": 208950
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.712494134902954,
      "learning_rate": 1.2742412938821245e-05,
      "loss": 2.9954,
      "step": 208951
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.8741326332092285,
      "learning_rate": 1.2741233454787103e-05,
      "loss": 2.8106,
      "step": 208952
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.602220058441162,
      "learning_rate": 1.2740054024159774e-05,
      "loss": 2.8061,
      "step": 208953
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.2771477699279785,
      "learning_rate": 1.2738874646939489e-05,
      "loss": 3.0334,
      "step": 208954
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.752696990966797,
      "learning_rate": 1.2737695323126452e-05,
      "loss": 2.9501,
      "step": 208955
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8023252487182617,
      "learning_rate": 1.2736516052720924e-05,
      "loss": 3.0608,
      "step": 208956
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.016674280166626,
      "learning_rate": 1.2735336835723075e-05,
      "loss": 2.7234,
      "step": 208957
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.112208366394043,
      "learning_rate": 1.2734157672133171e-05,
      "loss": 2.969,
      "step": 208958
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0457491874694824,
      "learning_rate": 1.2732978561951413e-05,
      "loss": 2.8598,
      "step": 208959
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.4319303035736084,
      "learning_rate": 1.2731799505177998e-05,
      "loss": 2.744,
      "step": 208960
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8305630683898926,
      "learning_rate": 1.273062050181316e-05,
      "loss": 2.7488,
      "step": 208961
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4536147117614746,
      "learning_rate": 1.2729441551857167e-05,
      "loss": 2.8791,
      "step": 208962
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.721432685852051,
      "learning_rate": 1.2728262655310117e-05,
      "loss": 3.0278,
      "step": 208963
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7087244987487793,
      "learning_rate": 1.2727083812172379e-05,
      "loss": 2.8626,
      "step": 208964
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2789700031280518,
      "learning_rate": 1.2725905022444083e-05,
      "loss": 2.8226,
      "step": 208965
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9275479316711426,
      "learning_rate": 1.272472628612543e-05,
      "loss": 3.0552,
      "step": 208966
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.999192237854004,
      "learning_rate": 1.2723547603216688e-05,
      "loss": 2.8015,
      "step": 208967
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.295152187347412,
      "learning_rate": 1.2722368973718056e-05,
      "loss": 2.7112,
      "step": 208968
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0529253482818604,
      "learning_rate": 1.2721190397629799e-05,
      "loss": 3.1639,
      "step": 208969
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7571959495544434,
      "learning_rate": 1.2720011874952051e-05,
      "loss": 2.7459,
      "step": 208970
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.664126396179199,
      "learning_rate": 1.2718833405685113e-05,
      "loss": 2.8859,
      "step": 208971
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.801252603530884,
      "learning_rate": 1.2717654989829119e-05,
      "loss": 3.0665,
      "step": 208972
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8768746852874756,
      "learning_rate": 1.2716476627384365e-05,
      "loss": 2.964,
      "step": 208973
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.424745798110962,
      "learning_rate": 1.2715298318351019e-05,
      "loss": 2.9052,
      "step": 208974
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4547455310821533,
      "learning_rate": 1.271412006272935e-05,
      "loss": 2.9992,
      "step": 208975
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.897629499435425,
      "learning_rate": 1.2712941860519554e-05,
      "loss": 2.6977,
      "step": 208976
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8126988410949707,
      "learning_rate": 1.2711763711721868e-05,
      "loss": 2.8588,
      "step": 208977
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.632209062576294,
      "learning_rate": 1.2710585616336421e-05,
      "loss": 2.9434,
      "step": 208978
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0206398963928223,
      "learning_rate": 1.2709407574363551e-05,
      "loss": 2.7311,
      "step": 208979
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.292367696762085,
      "learning_rate": 1.2708229585803354e-05,
      "loss": 2.9858,
      "step": 208980
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4103808403015137,
      "learning_rate": 1.2707051650656197e-05,
      "loss": 3.144,
      "step": 208981
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.5921919345855713,
      "learning_rate": 1.2705873768922148e-05,
      "loss": 2.9766,
      "step": 208982
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9872279167175293,
      "learning_rate": 1.2704695940601606e-05,
      "loss": 3.0433,
      "step": 208983
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9234087467193604,
      "learning_rate": 1.2703518165694604e-05,
      "loss": 2.8956,
      "step": 208984
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.685453414916992,
      "learning_rate": 1.2702340444201475e-05,
      "loss": 2.8913,
      "step": 208985
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.950319528579712,
      "learning_rate": 1.2701162776122353e-05,
      "loss": 2.9488,
      "step": 208986
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8205156326293945,
      "learning_rate": 1.269998516145757e-05,
      "loss": 2.8815,
      "step": 208987
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0624983310699463,
      "learning_rate": 1.2698807600207228e-05,
      "loss": 3.0569,
      "step": 208988
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.051000118255615,
      "learning_rate": 1.2697630092371657e-05,
      "loss": 2.8261,
      "step": 208989
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0681896209716797,
      "learning_rate": 1.2696452637950994e-05,
      "loss": 2.9601,
      "step": 208990
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0142571926116943,
      "learning_rate": 1.2695275236945502e-05,
      "loss": 3.1078,
      "step": 208991
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.5105700492858887,
      "learning_rate": 1.2694097889355348e-05,
      "loss": 2.759,
      "step": 208992
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9953436851501465,
      "learning_rate": 1.26929205951808e-05,
      "loss": 2.9424,
      "step": 208993
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8498003482818604,
      "learning_rate": 1.2691743354422057e-05,
      "loss": 2.8728,
      "step": 208994
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.790658473968506,
      "learning_rate": 1.2690566167079352e-05,
      "loss": 3.0494,
      "step": 208995
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7260677814483643,
      "learning_rate": 1.2689389033152886e-05,
      "loss": 2.7958,
      "step": 208996
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8748843669891357,
      "learning_rate": 1.2688211952642924e-05,
      "loss": 2.6737,
      "step": 208997
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1631081104278564,
      "learning_rate": 1.2687034925549566e-05,
      "loss": 2.8574,
      "step": 208998
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.5198850631713867,
      "learning_rate": 1.2685857951873179e-05,
      "loss": 2.9734,
      "step": 208999
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.690988540649414,
      "learning_rate": 1.2684681031613864e-05,
      "loss": 2.9038,
      "step": 209000
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.048403739929199,
      "learning_rate": 1.2683504164771951e-05,
      "loss": 2.7804,
      "step": 209001
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.274735927581787,
      "learning_rate": 1.2682327351347577e-05,
      "loss": 3.1699,
      "step": 209002
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.026392936706543,
      "learning_rate": 1.2681150591340971e-05,
      "loss": 3.032,
      "step": 209003
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7208590507507324,
      "learning_rate": 1.2679973884752337e-05,
      "loss": 2.6855,
      "step": 209004
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.7289929389953613,
      "learning_rate": 1.2678797231581972e-05,
      "loss": 2.9687,
      "step": 209005
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9611992835998535,
      "learning_rate": 1.2677620631829977e-05,
      "loss": 3.1317,
      "step": 209006
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6313207149505615,
      "learning_rate": 1.2676444085496685e-05,
      "loss": 2.6867,
      "step": 209007
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2680575847625732,
      "learning_rate": 1.2675267592582295e-05,
      "loss": 2.9084,
      "step": 209008
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4530766010284424,
      "learning_rate": 1.2674091153086973e-05,
      "loss": 2.8284,
      "step": 209009
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.4970335960388184,
      "learning_rate": 1.2672914767010889e-05,
      "loss": 2.9632,
      "step": 209010
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.295602321624756,
      "learning_rate": 1.2671738434354406e-05,
      "loss": 2.787,
      "step": 209011
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0858397483825684,
      "learning_rate": 1.2670562155117624e-05,
      "loss": 2.8049,
      "step": 209012
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.035219192504883,
      "learning_rate": 1.266938592930088e-05,
      "loss": 2.7474,
      "step": 209013
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8199079036712646,
      "learning_rate": 1.2668209756904269e-05,
      "loss": 2.8893,
      "step": 209014
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.9106905460357666,
      "learning_rate": 1.2667033637928093e-05,
      "loss": 2.9031,
      "step": 209015
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.6612346172332764,
      "learning_rate": 1.2665857572372484e-05,
      "loss": 3.0169,
      "step": 209016
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.5653395652770996,
      "learning_rate": 1.2664681560237777e-05,
      "loss": 2.6829,
      "step": 209017
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.8138787746429443,
      "learning_rate": 1.2663505601524071e-05,
      "loss": 2.7045,
      "step": 209018
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9107446670532227,
      "learning_rate": 1.2662329696231699e-05,
      "loss": 2.8904,
      "step": 209019
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8550913333892822,
      "learning_rate": 1.2661153844360827e-05,
      "loss": 2.7712,
      "step": 209020
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.304946184158325,
      "learning_rate": 1.2659978045911656e-05,
      "loss": 3.1581,
      "step": 209021
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.8964109420776367,
      "learning_rate": 1.2658802300884352e-05,
      "loss": 2.8602,
      "step": 209022
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4030561447143555,
      "learning_rate": 1.2657626609279281e-05,
      "loss": 3.1386,
      "step": 209023
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.4614455699920654,
      "learning_rate": 1.2656450971096543e-05,
      "loss": 3.1106,
      "step": 209024
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.115987539291382,
      "learning_rate": 1.2655275386336405e-05,
      "loss": 2.8769,
      "step": 209025
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.295161008834839,
      "learning_rate": 1.2654099854999101e-05,
      "loss": 2.9537,
      "step": 209026
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0819621086120605,
      "learning_rate": 1.2652924377084795e-05,
      "loss": 2.9006,
      "step": 209027
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9368250370025635,
      "learning_rate": 1.2651748952593721e-05,
      "loss": 2.9764,
      "step": 209028
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.977459669113159,
      "learning_rate": 1.2650573581526147e-05,
      "loss": 3.0804,
      "step": 209029
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.5960066318511963,
      "learning_rate": 1.2649398263882204e-05,
      "loss": 3.0378,
      "step": 209030
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.5937535762786865,
      "learning_rate": 1.2648222999662227e-05,
      "loss": 2.8167,
      "step": 209031
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.5030930042266846,
      "learning_rate": 1.2647047788866349e-05,
      "loss": 2.7096,
      "step": 209032
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2757859230041504,
      "learning_rate": 1.2645872631494803e-05,
      "loss": 2.8249,
      "step": 209033
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.945387363433838,
      "learning_rate": 1.2644697527547787e-05,
      "loss": 3.0062,
      "step": 209034
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.8677263259887695,
      "learning_rate": 1.2643522477025604e-05,
      "loss": 2.7936,
      "step": 209035
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1017813682556152,
      "learning_rate": 1.2642347479928349e-05,
      "loss": 3.2258,
      "step": 209036
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4071407318115234,
      "learning_rate": 1.2641172536256327e-05,
      "loss": 2.8757,
      "step": 209037
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.7871203422546387,
      "learning_rate": 1.263999764600977e-05,
      "loss": 2.9442,
      "step": 209038
    },
    {
      "epoch": 2.72,
      "grad_norm": 5.1202216148376465,
      "learning_rate": 1.2638822809188842e-05,
      "loss": 2.8747,
      "step": 209039
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.3292596340179443,
      "learning_rate": 1.2637648025793744e-05,
      "loss": 2.9475,
      "step": 209040
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.710275650024414,
      "learning_rate": 1.2636473295824779e-05,
      "loss": 2.7386,
      "step": 209041
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.5723602771759033,
      "learning_rate": 1.2635298619282075e-05,
      "loss": 2.9474,
      "step": 209042
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.857706308364868,
      "learning_rate": 1.2634123996165935e-05,
      "loss": 3.0051,
      "step": 209043
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.54492449760437,
      "learning_rate": 1.2632949426476557e-05,
      "loss": 2.8982,
      "step": 209044
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.954678535461426,
      "learning_rate": 1.2631774910214077e-05,
      "loss": 3.1612,
      "step": 209045
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1182518005371094,
      "learning_rate": 1.2630600447378792e-05,
      "loss": 2.6776,
      "step": 209046
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.971564292907715,
      "learning_rate": 1.2629426037970936e-05,
      "loss": 2.8637,
      "step": 209047
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.067749500274658,
      "learning_rate": 1.2628251681990643e-05,
      "loss": 3.0564,
      "step": 209048
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2416529655456543,
      "learning_rate": 1.262707737943821e-05,
      "loss": 2.8168,
      "step": 209049
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.636326313018799,
      "learning_rate": 1.262590313031384e-05,
      "loss": 3.0078,
      "step": 209050
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.4849791526794434,
      "learning_rate": 1.26247289346177e-05,
      "loss": 2.914,
      "step": 209051
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0973873138427734,
      "learning_rate": 1.2623554792350087e-05,
      "loss": 2.852,
      "step": 209052
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.118551731109619,
      "learning_rate": 1.262238070351117e-05,
      "loss": 2.9028,
      "step": 209053
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.050102472305298,
      "learning_rate": 1.2621206668101147e-05,
      "loss": 2.8443,
      "step": 209054
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.5319831371307373,
      "learning_rate": 1.2620032686120318e-05,
      "loss": 3.2334,
      "step": 209055
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.035703182220459,
      "learning_rate": 1.2618858757568818e-05,
      "loss": 2.8603,
      "step": 209056
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.075465679168701,
      "learning_rate": 1.261768488244691e-05,
      "loss": 2.6911,
      "step": 209057
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.134434938430786,
      "learning_rate": 1.2616511060754797e-05,
      "loss": 2.7238,
      "step": 209058
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6562340259552,
      "learning_rate": 1.2615337292492678e-05,
      "loss": 2.9899,
      "step": 209059
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0628318786621094,
      "learning_rate": 1.261416357766082e-05,
      "loss": 2.9247,
      "step": 209060
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.0102386474609375,
      "learning_rate": 1.261298991625942e-05,
      "loss": 2.9303,
      "step": 209061
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8501455783843994,
      "learning_rate": 1.2611816308288646e-05,
      "loss": 3.1523,
      "step": 209062
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.873061180114746,
      "learning_rate": 1.2610642753748801e-05,
      "loss": 2.9391,
      "step": 209063
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8030359745025635,
      "learning_rate": 1.2609469252640047e-05,
      "loss": 2.8385,
      "step": 209064
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1439871788024902,
      "learning_rate": 1.2608295804962587e-05,
      "loss": 2.8732,
      "step": 209065
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4649250507354736,
      "learning_rate": 1.2607122410716718e-05,
      "loss": 3.0633,
      "step": 209066
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.741469144821167,
      "learning_rate": 1.2605949069902576e-05,
      "loss": 2.7183,
      "step": 209067
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7559309005737305,
      "learning_rate": 1.2604775782520426e-05,
      "loss": 3.1326,
      "step": 209068
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2232797145843506,
      "learning_rate": 1.2603602548570502e-05,
      "loss": 2.9091,
      "step": 209069
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.799797296524048,
      "learning_rate": 1.2602429368053003e-05,
      "loss": 2.9324,
      "step": 209070
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9443187713623047,
      "learning_rate": 1.260125624096806e-05,
      "loss": 2.838,
      "step": 209071
    },
    {
      "epoch": 2.72,
      "grad_norm": 5.2333478927612305,
      "learning_rate": 1.2600083167316044e-05,
      "loss": 3.0081,
      "step": 209072
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6373438835144043,
      "learning_rate": 1.2598910147097019e-05,
      "loss": 3.0861,
      "step": 209073
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9738574028015137,
      "learning_rate": 1.2597737180311352e-05,
      "loss": 3.119,
      "step": 209074
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.779370069503784,
      "learning_rate": 1.2596564266959175e-05,
      "loss": 2.775,
      "step": 209075
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.734487295150757,
      "learning_rate": 1.2595391407040722e-05,
      "loss": 2.9085,
      "step": 209076
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.706157684326172,
      "learning_rate": 1.2594218600556193e-05,
      "loss": 2.6412,
      "step": 209077
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.034475564956665,
      "learning_rate": 1.2593045847505855e-05,
      "loss": 2.9465,
      "step": 209078
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.7613956928253174,
      "learning_rate": 1.2591873147889842e-05,
      "loss": 2.988,
      "step": 209079
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0199077129364014,
      "learning_rate": 1.2590700501708451e-05,
      "loss": 2.8465,
      "step": 209080
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.3567028045654297,
      "learning_rate": 1.2589527908961916e-05,
      "loss": 2.9219,
      "step": 209081
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1567630767822266,
      "learning_rate": 1.2588355369650372e-05,
      "loss": 2.8639,
      "step": 209082
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.782299518585205,
      "learning_rate": 1.258718288377405e-05,
      "loss": 2.9911,
      "step": 209083
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8989970684051514,
      "learning_rate": 1.2586010451333251e-05,
      "loss": 3.0963,
      "step": 209084
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.948923110961914,
      "learning_rate": 1.2584838072328107e-05,
      "loss": 2.6779,
      "step": 209085
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.751020908355713,
      "learning_rate": 1.2583665746758887e-05,
      "loss": 2.776,
      "step": 209086
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.5840842723846436,
      "learning_rate": 1.2582493474625787e-05,
      "loss": 3.162,
      "step": 209087
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9958200454711914,
      "learning_rate": 1.2581321255929045e-05,
      "loss": 2.9312,
      "step": 209088
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.411324501037598,
      "learning_rate": 1.258014909066879e-05,
      "loss": 3.1956,
      "step": 209089
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.163996458053589,
      "learning_rate": 1.2578976978845357e-05,
      "loss": 2.69,
      "step": 209090
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.821923017501831,
      "learning_rate": 1.257780492045891e-05,
      "loss": 2.7361,
      "step": 209091
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0275609493255615,
      "learning_rate": 1.2576632915509688e-05,
      "loss": 2.9748,
      "step": 209092
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.627115249633789,
      "learning_rate": 1.2575460963997886e-05,
      "loss": 2.6857,
      "step": 209093
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.3662264347076416,
      "learning_rate": 1.257428906592377e-05,
      "loss": 2.9575,
      "step": 209094
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.70453143119812,
      "learning_rate": 1.2573117221287444e-05,
      "loss": 2.9032,
      "step": 209095
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.033996343612671,
      "learning_rate": 1.257194543008927e-05,
      "loss": 2.6362,
      "step": 209096
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.125532388687134,
      "learning_rate": 1.2570773692329316e-05,
      "loss": 2.88,
      "step": 209097
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.5146102905273438,
      "learning_rate": 1.256960200800795e-05,
      "loss": 2.7824,
      "step": 209098
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0000088214874268,
      "learning_rate": 1.2568430377125304e-05,
      "loss": 2.9287,
      "step": 209099
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0485761165618896,
      "learning_rate": 1.2567258799681645e-05,
      "loss": 3.0104,
      "step": 209100
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.538811683654785,
      "learning_rate": 1.2566087275677106e-05,
      "loss": 2.952,
      "step": 209101
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.040029764175415,
      "learning_rate": 1.2564915805111986e-05,
      "loss": 2.7529,
      "step": 209102
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.888136625289917,
      "learning_rate": 1.2563744387986418e-05,
      "loss": 2.9814,
      "step": 209103
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.862063407897949,
      "learning_rate": 1.2562573024300737e-05,
      "loss": 2.9129,
      "step": 209104
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.7964260578155518,
      "learning_rate": 1.2561401714055108e-05,
      "loss": 2.9105,
      "step": 209105
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7518720626831055,
      "learning_rate": 1.2560230457249731e-05,
      "loss": 2.8126,
      "step": 209106
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.906986713409424,
      "learning_rate": 1.2559059253884774e-05,
      "loss": 2.987,
      "step": 209107
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.066797733306885,
      "learning_rate": 1.2557888103960568e-05,
      "loss": 2.8869,
      "step": 209108
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7780139446258545,
      "learning_rate": 1.2556717007477245e-05,
      "loss": 2.9682,
      "step": 209109
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8867809772491455,
      "learning_rate": 1.2555545964435077e-05,
      "loss": 2.8667,
      "step": 209110
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.268043279647827,
      "learning_rate": 1.2554374974834292e-05,
      "loss": 2.9523,
      "step": 209111
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2526473999023438,
      "learning_rate": 1.2553204038675058e-05,
      "loss": 2.7429,
      "step": 209112
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.5596423149108887,
      "learning_rate": 1.2552033155957542e-05,
      "loss": 2.8793,
      "step": 209113
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.5352623462677,
      "learning_rate": 1.255086232668211e-05,
      "loss": 2.8182,
      "step": 209114
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4422993659973145,
      "learning_rate": 1.2549691550848829e-05,
      "loss": 3.1099,
      "step": 209115
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4364356994628906,
      "learning_rate": 1.2548520828458031e-05,
      "loss": 3.024,
      "step": 209116
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8208088874816895,
      "learning_rate": 1.2547350159509917e-05,
      "loss": 2.8895,
      "step": 209117
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.230325222015381,
      "learning_rate": 1.2546179544004653e-05,
      "loss": 2.7653,
      "step": 209118
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8494136333465576,
      "learning_rate": 1.2545008981942439e-05,
      "loss": 2.8392,
      "step": 209119
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.5945427417755127,
      "learning_rate": 1.2543838473323575e-05,
      "loss": 3.0149,
      "step": 209120
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.7406022548675537,
      "learning_rate": 1.2542668018148194e-05,
      "loss": 3.0494,
      "step": 209121
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.370669364929199,
      "learning_rate": 1.2541497616416596e-05,
      "loss": 2.8022,
      "step": 209122
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.872814416885376,
      "learning_rate": 1.2540327268128947e-05,
      "loss": 2.8159,
      "step": 209123
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2276837825775146,
      "learning_rate": 1.2539156973285513e-05,
      "loss": 2.9013,
      "step": 209124
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9888148307800293,
      "learning_rate": 1.2537986731886429e-05,
      "loss": 2.9625,
      "step": 209125
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9815707206726074,
      "learning_rate": 1.2536816543931994e-05,
      "loss": 3.023,
      "step": 209126
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.784550428390503,
      "learning_rate": 1.253564640942234e-05,
      "loss": 2.9964,
      "step": 209127
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9558534622192383,
      "learning_rate": 1.2534476328357802e-05,
      "loss": 2.9115,
      "step": 209128
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.449272394180298,
      "learning_rate": 1.2533306300738444e-05,
      "loss": 2.8838,
      "step": 209129
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.146942138671875,
      "learning_rate": 1.2532136326564667e-05,
      "loss": 2.9756,
      "step": 209130
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.250354051589966,
      "learning_rate": 1.253096640583654e-05,
      "loss": 3.1135,
      "step": 209131
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.812617063522339,
      "learning_rate": 1.252979653855436e-05,
      "loss": 2.8209,
      "step": 209132
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9872002601623535,
      "learning_rate": 1.2528626724718293e-05,
      "loss": 3.0935,
      "step": 209133
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.3796794414520264,
      "learning_rate": 1.2527456964328575e-05,
      "loss": 3.3044,
      "step": 209134
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8921875953674316,
      "learning_rate": 1.2526287257385437e-05,
      "loss": 2.9217,
      "step": 209135
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7963707447052,
      "learning_rate": 1.2525117603889112e-05,
      "loss": 2.9638,
      "step": 209136
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.099156379699707,
      "learning_rate": 1.2523948003839768e-05,
      "loss": 3.2242,
      "step": 209137
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.819922924041748,
      "learning_rate": 1.252277845723767e-05,
      "loss": 3.0984,
      "step": 209138
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0619301795959473,
      "learning_rate": 1.2521608964082986e-05,
      "loss": 2.8714,
      "step": 209139
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0369110107421875,
      "learning_rate": 1.252043952437598e-05,
      "loss": 2.961,
      "step": 209140
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8414576053619385,
      "learning_rate": 1.2519270138116822e-05,
      "loss": 2.9204,
      "step": 209141
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1601614952087402,
      "learning_rate": 1.2518100805305808e-05,
      "loss": 3.0938,
      "step": 209142
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.939246416091919,
      "learning_rate": 1.2516931525943108e-05,
      "loss": 2.9843,
      "step": 209143
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8842551708221436,
      "learning_rate": 1.2515762300028886e-05,
      "loss": 3.0677,
      "step": 209144
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.8228118419647217,
      "learning_rate": 1.2514593127563444e-05,
      "loss": 2.9291,
      "step": 209145
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.635624885559082,
      "learning_rate": 1.251342400854698e-05,
      "loss": 3.1286,
      "step": 209146
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.111806869506836,
      "learning_rate": 1.2512254942979626e-05,
      "loss": 3.2828,
      "step": 209147
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.873157501220703,
      "learning_rate": 1.2511085930861753e-05,
      "loss": 2.6875,
      "step": 209148
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.487147331237793,
      "learning_rate": 1.2509916972193457e-05,
      "loss": 2.9885,
      "step": 209149
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.5049712657928467,
      "learning_rate": 1.2508748066974972e-05,
      "loss": 3.0687,
      "step": 209150
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.07944393157959,
      "learning_rate": 1.2507579215206597e-05,
      "loss": 2.7102,
      "step": 209151
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.213667392730713,
      "learning_rate": 1.2506410416888468e-05,
      "loss": 2.9333,
      "step": 209152
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7719390392303467,
      "learning_rate": 1.2505241672020782e-05,
      "loss": 3.1952,
      "step": 209153
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8880081176757812,
      "learning_rate": 1.250407298060384e-05,
      "loss": 2.7975,
      "step": 209154
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.896113872528076,
      "learning_rate": 1.250290434263781e-05,
      "loss": 2.9966,
      "step": 209155
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7762527465820312,
      "learning_rate": 1.2501735758122889e-05,
      "loss": 2.9779,
      "step": 209156
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.945868968963623,
      "learning_rate": 1.2500567227059377e-05,
      "loss": 2.8828,
      "step": 209157
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.059671640396118,
      "learning_rate": 1.2499398749447376e-05,
      "loss": 2.9124,
      "step": 209158
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8251445293426514,
      "learning_rate": 1.2498230325287217e-05,
      "loss": 2.9936,
      "step": 209159
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.419421434402466,
      "learning_rate": 1.2497061954579036e-05,
      "loss": 2.991,
      "step": 209160
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8141913414001465,
      "learning_rate": 1.2495893637323096e-05,
      "loss": 3.0087,
      "step": 209161
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.710894823074341,
      "learning_rate": 1.2494725373519565e-05,
      "loss": 2.8627,
      "step": 209162
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8189404010772705,
      "learning_rate": 1.2493557163168743e-05,
      "loss": 2.5925,
      "step": 209163
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.228736162185669,
      "learning_rate": 1.2492389006270731e-05,
      "loss": 2.6932,
      "step": 209164
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.869472026824951,
      "learning_rate": 1.249122090282586e-05,
      "loss": 2.8231,
      "step": 209165
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7953832149505615,
      "learning_rate": 1.2490052852834298e-05,
      "loss": 2.9997,
      "step": 209166
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.804231882095337,
      "learning_rate": 1.2488884856296245e-05,
      "loss": 2.9928,
      "step": 209167
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6421806812286377,
      "learning_rate": 1.248771691321193e-05,
      "loss": 3.0152,
      "step": 209168
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.525928497314453,
      "learning_rate": 1.2486549023581594e-05,
      "loss": 2.7148,
      "step": 209169
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8958117961883545,
      "learning_rate": 1.2485381187405396e-05,
      "loss": 3.034,
      "step": 209170
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.038172721862793,
      "learning_rate": 1.248421340468364e-05,
      "loss": 2.9511,
      "step": 209171
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.843122720718384,
      "learning_rate": 1.2483045675416492e-05,
      "loss": 3.0281,
      "step": 209172
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.90262508392334,
      "learning_rate": 1.248187799960415e-05,
      "loss": 2.9374,
      "step": 209173
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.946254014968872,
      "learning_rate": 1.248071037724685e-05,
      "loss": 3.2245,
      "step": 209174
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.666769027709961,
      "learning_rate": 1.2479542808344822e-05,
      "loss": 2.9581,
      "step": 209175
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.009748935699463,
      "learning_rate": 1.2478375292898268e-05,
      "loss": 2.7216,
      "step": 209176
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8519704341888428,
      "learning_rate": 1.2477207830907421e-05,
      "loss": 2.7297,
      "step": 209177
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9202098846435547,
      "learning_rate": 1.2476040422372513e-05,
      "loss": 2.9871,
      "step": 209178
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.889143228530884,
      "learning_rate": 1.2474873067293745e-05,
      "loss": 2.9561,
      "step": 209179
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.5002875328063965,
      "learning_rate": 1.247370576567125e-05,
      "loss": 2.9024,
      "step": 209180
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9643068313598633,
      "learning_rate": 1.2472538517505393e-05,
      "loss": 2.9157,
      "step": 209181
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.095577716827393,
      "learning_rate": 1.2471371322796241e-05,
      "loss": 3.146,
      "step": 209182
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9455583095550537,
      "learning_rate": 1.2470204181544164e-05,
      "loss": 2.8134,
      "step": 209183
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.792163848876953,
      "learning_rate": 1.246903709374929e-05,
      "loss": 3.2545,
      "step": 209184
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.933534622192383,
      "learning_rate": 1.2467870059411856e-05,
      "loss": 2.9184,
      "step": 209185
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4167351722717285,
      "learning_rate": 1.2466703078532025e-05,
      "loss": 2.744,
      "step": 209186
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.728898286819458,
      "learning_rate": 1.24655361511101e-05,
      "loss": 2.9697,
      "step": 209187
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7076871395111084,
      "learning_rate": 1.2464369277146213e-05,
      "loss": 3.0017,
      "step": 209188
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1075031757354736,
      "learning_rate": 1.2463202456640698e-05,
      "loss": 2.9824,
      "step": 209189
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.184776544570923,
      "learning_rate": 1.246203568959362e-05,
      "loss": 2.8959,
      "step": 209190
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.713404893875122,
      "learning_rate": 1.2460868976005377e-05,
      "loss": 2.8865,
      "step": 209191
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8644533157348633,
      "learning_rate": 1.2459702315876009e-05,
      "loss": 2.8203,
      "step": 209192
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.58353853225708,
      "learning_rate": 1.2458535709205842e-05,
      "loss": 2.9149,
      "step": 209193
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9279356002807617,
      "learning_rate": 1.2457369155995045e-05,
      "loss": 2.713,
      "step": 209194
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.978658437728882,
      "learning_rate": 1.2456202656243853e-05,
      "loss": 2.7506,
      "step": 209195
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.093729019165039,
      "learning_rate": 1.2455036209952463e-05,
      "loss": 2.8062,
      "step": 209196
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.066951274871826,
      "learning_rate": 1.245386981712121e-05,
      "loss": 2.884,
      "step": 209197
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.029223442077637,
      "learning_rate": 1.2452703477750093e-05,
      "loss": 2.6672,
      "step": 209198
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.181081771850586,
      "learning_rate": 1.245153719183951e-05,
      "loss": 2.9184,
      "step": 209199
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.123152494430542,
      "learning_rate": 1.2450370959389567e-05,
      "loss": 2.8921,
      "step": 209200
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.954982042312622,
      "learning_rate": 1.2449204780400557e-05,
      "loss": 2.7965,
      "step": 209201
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.928327798843384,
      "learning_rate": 1.244803865487265e-05,
      "loss": 2.9555,
      "step": 209202
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.85087513923645,
      "learning_rate": 1.2446872582806145e-05,
      "loss": 2.9841,
      "step": 209203
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1620240211486816,
      "learning_rate": 1.2445706564201107e-05,
      "loss": 2.9662,
      "step": 209204
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9155426025390625,
      "learning_rate": 1.2444540599057872e-05,
      "loss": 2.7818,
      "step": 209205
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.980210304260254,
      "learning_rate": 1.2443374687376606e-05,
      "loss": 2.7622,
      "step": 209206
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.582463026046753,
      "learning_rate": 1.2442208829157607e-05,
      "loss": 2.9176,
      "step": 209207
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7367124557495117,
      "learning_rate": 1.2441043024400943e-05,
      "loss": 2.9375,
      "step": 209208
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0180282592773438,
      "learning_rate": 1.2439877273107013e-05,
      "loss": 2.6681,
      "step": 209209
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8485429286956787,
      "learning_rate": 1.2438711575275884e-05,
      "loss": 2.9005,
      "step": 209210
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8432493209838867,
      "learning_rate": 1.2437545930907822e-05,
      "loss": 2.8291,
      "step": 209211
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2015249729156494,
      "learning_rate": 1.243638034000306e-05,
      "loss": 2.9069,
      "step": 209212
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6740620136260986,
      "learning_rate": 1.2435214802561799e-05,
      "loss": 2.8591,
      "step": 209213
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.883643388748169,
      "learning_rate": 1.2434049318584239e-05,
      "loss": 2.7034,
      "step": 209214
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8314852714538574,
      "learning_rate": 1.243288388807071e-05,
      "loss": 2.8406,
      "step": 209215
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.5524609088897705,
      "learning_rate": 1.2431718511021215e-05,
      "loss": 2.8666,
      "step": 209216
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0311853885650635,
      "learning_rate": 1.2430553187436154e-05,
      "loss": 2.6598,
      "step": 209217
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.621984481811523,
      "learning_rate": 1.2429387917315658e-05,
      "loss": 3.0114,
      "step": 209218
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6176462173461914,
      "learning_rate": 1.2428222700659995e-05,
      "loss": 3.063,
      "step": 209219
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8898909091949463,
      "learning_rate": 1.2427057537469331e-05,
      "loss": 2.8837,
      "step": 209220
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.439689874649048,
      "learning_rate": 1.24258924277439e-05,
      "loss": 2.9852,
      "step": 209221
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.151895046234131,
      "learning_rate": 1.2424727371483967e-05,
      "loss": 2.8371,
      "step": 209222
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.036844253540039,
      "learning_rate": 1.24235623686897e-05,
      "loss": 2.8668,
      "step": 209223
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.892491340637207,
      "learning_rate": 1.2422397419361264e-05,
      "loss": 3.2138,
      "step": 209224
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.279392242431641,
      "learning_rate": 1.2421232523498958e-05,
      "loss": 3.0128,
      "step": 209225
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.539371490478516,
      "learning_rate": 1.2420067681102952e-05,
      "loss": 3.1059,
      "step": 209226
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0576188564300537,
      "learning_rate": 1.2418902892173543e-05,
      "loss": 2.839,
      "step": 209227
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.154324054718018,
      "learning_rate": 1.2417738156710865e-05,
      "loss": 2.7882,
      "step": 209228
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9140522480010986,
      "learning_rate": 1.241657347471512e-05,
      "loss": 3.1492,
      "step": 209229
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8322103023529053,
      "learning_rate": 1.2415408846186604e-05,
      "loss": 2.8706,
      "step": 209230
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2349627017974854,
      "learning_rate": 1.2414244271125518e-05,
      "loss": 2.6611,
      "step": 209231
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9711642265319824,
      "learning_rate": 1.2413079749531995e-05,
      "loss": 2.9931,
      "step": 209232
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.872635841369629,
      "learning_rate": 1.2411915281406337e-05,
      "loss": 2.6603,
      "step": 209233
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.944913148880005,
      "learning_rate": 1.2410750866748742e-05,
      "loss": 2.9403,
      "step": 209234
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.898995876312256,
      "learning_rate": 1.2409586505559377e-05,
      "loss": 2.8737,
      "step": 209235
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8382110595703125,
      "learning_rate": 1.2408422197838541e-05,
      "loss": 3.0875,
      "step": 209236
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9097611904144287,
      "learning_rate": 1.2407257943586402e-05,
      "loss": 2.8902,
      "step": 209237
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.901296615600586,
      "learning_rate": 1.2406093742803125e-05,
      "loss": 2.9494,
      "step": 209238
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6540520191192627,
      "learning_rate": 1.2404929595489044e-05,
      "loss": 3.0863,
      "step": 209239
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.135925054550171,
      "learning_rate": 1.2403765501644325e-05,
      "loss": 2.8502,
      "step": 209240
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9501538276672363,
      "learning_rate": 1.2402601461269135e-05,
      "loss": 2.8083,
      "step": 209241
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.4960272312164307,
      "learning_rate": 1.240143747436374e-05,
      "loss": 3.0228,
      "step": 209242
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.3158209323883057,
      "learning_rate": 1.240027354092834e-05,
      "loss": 2.6216,
      "step": 209243
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.188537359237671,
      "learning_rate": 1.2399109660963169e-05,
      "loss": 2.8821,
      "step": 209244
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0515756607055664,
      "learning_rate": 1.2397945834468459e-05,
      "loss": 2.8047,
      "step": 209245
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.146451711654663,
      "learning_rate": 1.2396782061444377e-05,
      "loss": 2.913,
      "step": 209246
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.7451820373535156,
      "learning_rate": 1.2395618341891156e-05,
      "loss": 2.9624,
      "step": 209247
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.868828773498535,
      "learning_rate": 1.2394454675809029e-05,
      "loss": 2.9202,
      "step": 209248
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.624647617340088,
      "learning_rate": 1.2393291063198162e-05,
      "loss": 2.8547,
      "step": 209249
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.5802299976348877,
      "learning_rate": 1.2392127504058858e-05,
      "loss": 2.7799,
      "step": 209250
    },
    {
      "epoch": 2.72,
      "grad_norm": 5.389827728271484,
      "learning_rate": 1.239096399839128e-05,
      "loss": 2.9751,
      "step": 209251
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.7087812423706055,
      "learning_rate": 1.238980054619566e-05,
      "loss": 3.0163,
      "step": 209252
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2693428993225098,
      "learning_rate": 1.238863714747217e-05,
      "loss": 3.036,
      "step": 209253
    },
    {
      "epoch": 2.72,
      "grad_norm": 4.19282865524292,
      "learning_rate": 1.2387473802221104e-05,
      "loss": 2.7886,
      "step": 209254
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.8365042209625244,
      "learning_rate": 1.2386310510442565e-05,
      "loss": 2.8258,
      "step": 209255
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.836498260498047,
      "learning_rate": 1.238514727213692e-05,
      "loss": 2.8501,
      "step": 209256
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.611954927444458,
      "learning_rate": 1.2383984087304233e-05,
      "loss": 2.9281,
      "step": 209257
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0328457355499268,
      "learning_rate": 1.2382820955944871e-05,
      "loss": 2.923,
      "step": 209258
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.187727928161621,
      "learning_rate": 1.2381657878058937e-05,
      "loss": 2.9836,
      "step": 209259
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1218605041503906,
      "learning_rate": 1.2380494853646662e-05,
      "loss": 2.7768,
      "step": 209260
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.9804141521453857,
      "learning_rate": 1.2379331882708276e-05,
      "loss": 3.2488,
      "step": 209261
    },
    {
      "epoch": 2.72,
      "grad_norm": 5.030287265777588,
      "learning_rate": 1.237816896524405e-05,
      "loss": 2.8221,
      "step": 209262
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2743308544158936,
      "learning_rate": 1.2377006101254083e-05,
      "loss": 2.9204,
      "step": 209263
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.4051320552825928,
      "learning_rate": 1.2375843290738774e-05,
      "loss": 2.7648,
      "step": 209264
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.7520570755004883,
      "learning_rate": 1.2374680533698122e-05,
      "loss": 2.7755,
      "step": 209265
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0991601943969727,
      "learning_rate": 1.2373517830132496e-05,
      "loss": 2.8968,
      "step": 209266
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.827219009399414,
      "learning_rate": 1.2372355180042025e-05,
      "loss": 2.9068,
      "step": 209267
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.8805344104766846,
      "learning_rate": 1.2371192583426981e-05,
      "loss": 3.1823,
      "step": 209268
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.86031174659729,
      "learning_rate": 1.2370030040287527e-05,
      "loss": 3.117,
      "step": 209269
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.2971036434173584,
      "learning_rate": 1.2368867550623995e-05,
      "loss": 2.9485,
      "step": 209270
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.7644379138946533,
      "learning_rate": 1.2367705114436454e-05,
      "loss": 2.6876,
      "step": 209271
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1594245433807373,
      "learning_rate": 1.2366542731725238e-05,
      "loss": 2.9683,
      "step": 209272
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.812117576599121,
      "learning_rate": 1.2365380402490443e-05,
      "loss": 3.0009,
      "step": 209273
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.1323065757751465,
      "learning_rate": 1.2364218126732406e-05,
      "loss": 2.9068,
      "step": 209274
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.818476438522339,
      "learning_rate": 1.2363055904451258e-05,
      "loss": 2.7596,
      "step": 209275
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.7000463008880615,
      "learning_rate": 1.2361893735647298e-05,
      "loss": 2.8119,
      "step": 209276
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.0680673122406006,
      "learning_rate": 1.236073162032063e-05,
      "loss": 2.8362,
      "step": 209277
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.25270676612854,
      "learning_rate": 1.2359569558471583e-05,
      "loss": 2.7467,
      "step": 209278
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.996521234512329,
      "learning_rate": 1.2358407550100258e-05,
      "loss": 2.819,
      "step": 209279
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.6490767002105713,
      "learning_rate": 1.2357245595206988e-05,
      "loss": 3.0581,
      "step": 209280
    },
    {
      "epoch": 2.72,
      "grad_norm": 3.268911838531494,
      "learning_rate": 1.2356083693791907e-05,
      "loss": 2.7308,
      "step": 209281
    },
    {
      "epoch": 2.72,
      "grad_norm": 2.9379096031188965,
      "learning_rate": 1.2354921845855314e-05,
      "loss": 2.7176,
      "step": 209282
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0254385471343994,
      "learning_rate": 1.235376005139731e-05,
      "loss": 2.6588,
      "step": 209283
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.500823497772217,
      "learning_rate": 1.2352598310418228e-05,
      "loss": 2.9158,
      "step": 209284
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.763010263442993,
      "learning_rate": 1.2351436622918165e-05,
      "loss": 2.7505,
      "step": 209285
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.4454147815704346,
      "learning_rate": 1.2350274988897423e-05,
      "loss": 3.0118,
      "step": 209286
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.818885087966919,
      "learning_rate": 1.234911340835617e-05,
      "loss": 2.7935,
      "step": 209287
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2959470748901367,
      "learning_rate": 1.2347951881294737e-05,
      "loss": 2.7923,
      "step": 209288
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3989102840423584,
      "learning_rate": 1.2346790407713159e-05,
      "loss": 2.816,
      "step": 209289
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9971024990081787,
      "learning_rate": 1.23456289876118e-05,
      "loss": 2.8939,
      "step": 209290
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.962019443511963,
      "learning_rate": 1.2344467620990762e-05,
      "loss": 3.051,
      "step": 209291
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.618741273880005,
      "learning_rate": 1.2343306307850342e-05,
      "loss": 2.8382,
      "step": 209292
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.541456937789917,
      "learning_rate": 1.2342145048190711e-05,
      "loss": 2.89,
      "step": 209293
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3643956184387207,
      "learning_rate": 1.2340983842012198e-05,
      "loss": 2.5952,
      "step": 209294
    },
    {
      "epoch": 2.73,
      "grad_norm": 5.688719749450684,
      "learning_rate": 1.2339822689314804e-05,
      "loss": 3.0544,
      "step": 209295
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.514181137084961,
      "learning_rate": 1.233866159009893e-05,
      "loss": 2.9229,
      "step": 209296
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9772162437438965,
      "learning_rate": 1.2337500544364709e-05,
      "loss": 2.8737,
      "step": 209297
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.39388370513916,
      "learning_rate": 1.2336339552112407e-05,
      "loss": 2.8905,
      "step": 209298
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.821235418319702,
      "learning_rate": 1.2335178613342156e-05,
      "loss": 2.9234,
      "step": 209299
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3052523136138916,
      "learning_rate": 1.2334017728054324e-05,
      "loss": 2.9513,
      "step": 209300
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2858219146728516,
      "learning_rate": 1.233285689624891e-05,
      "loss": 2.7598,
      "step": 209301
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8160016536712646,
      "learning_rate": 1.2331696117926315e-05,
      "loss": 3.0255,
      "step": 209302
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6473515033721924,
      "learning_rate": 1.2330535393086672e-05,
      "loss": 3.005,
      "step": 209303
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.037757635116577,
      "learning_rate": 1.2329374721730211e-05,
      "loss": 2.6803,
      "step": 209304
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.879948139190674,
      "learning_rate": 1.2328214103857137e-05,
      "loss": 2.8953,
      "step": 209305
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7270584106445312,
      "learning_rate": 1.2327053539467712e-05,
      "loss": 3.0945,
      "step": 209306
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.142935037612915,
      "learning_rate": 1.2325893028562106e-05,
      "loss": 2.8231,
      "step": 209307
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.65189528465271,
      "learning_rate": 1.232473257114055e-05,
      "loss": 2.9248,
      "step": 209308
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.4114365577697754,
      "learning_rate": 1.2323572167203211e-05,
      "loss": 2.7562,
      "step": 209309
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9736320972442627,
      "learning_rate": 1.232241181675042e-05,
      "loss": 3.0528,
      "step": 209310
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.62062931060791,
      "learning_rate": 1.2321251519782249e-05,
      "loss": 2.8462,
      "step": 209311
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.4410669803619385,
      "learning_rate": 1.232009127629906e-05,
      "loss": 2.5632,
      "step": 209312
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3548343181610107,
      "learning_rate": 1.2318931086300954e-05,
      "loss": 3.0149,
      "step": 209313
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.080589294433594,
      "learning_rate": 1.231777094978823e-05,
      "loss": 2.7392,
      "step": 209314
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.330909490585327,
      "learning_rate": 1.2316610866760989e-05,
      "loss": 2.9382,
      "step": 209315
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9571025371551514,
      "learning_rate": 1.2315450837219599e-05,
      "loss": 2.8238,
      "step": 209316
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8193418979644775,
      "learning_rate": 1.2314290861164123e-05,
      "loss": 2.7434,
      "step": 209317
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7935991287231445,
      "learning_rate": 1.2313130938594896e-05,
      "loss": 2.7676,
      "step": 209318
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.327026844024658,
      "learning_rate": 1.2311971069512117e-05,
      "loss": 2.896,
      "step": 209319
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1086642742156982,
      "learning_rate": 1.231081125391592e-05,
      "loss": 3.0171,
      "step": 209320
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1803269386291504,
      "learning_rate": 1.2309651491806605e-05,
      "loss": 3.0274,
      "step": 209321
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6368138790130615,
      "learning_rate": 1.2308491783184338e-05,
      "loss": 2.8459,
      "step": 209322
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.053776741027832,
      "learning_rate": 1.2307332128049353e-05,
      "loss": 2.7441,
      "step": 209323
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.665593147277832,
      "learning_rate": 1.230617252640188e-05,
      "loss": 2.7894,
      "step": 209324
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.5831966400146484,
      "learning_rate": 1.2305012978242123e-05,
      "loss": 3.2313,
      "step": 209325
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1393871307373047,
      "learning_rate": 1.2303853483570248e-05,
      "loss": 2.8288,
      "step": 209326
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9718406200408936,
      "learning_rate": 1.2302694042386552e-05,
      "loss": 2.7335,
      "step": 209327
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.348245859146118,
      "learning_rate": 1.2301534654691203e-05,
      "loss": 2.6944,
      "step": 209328
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.4387030601501465,
      "learning_rate": 1.2300375320484468e-05,
      "loss": 3.0788,
      "step": 209329
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.130213499069214,
      "learning_rate": 1.2299216039766512e-05,
      "loss": 2.7596,
      "step": 209330
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.743612766265869,
      "learning_rate": 1.2298056812537539e-05,
      "loss": 3.1589,
      "step": 209331
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0337634086608887,
      "learning_rate": 1.2296897638797776e-05,
      "loss": 3.0993,
      "step": 209332
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.747091054916382,
      "learning_rate": 1.2295738518547492e-05,
      "loss": 2.9554,
      "step": 209333
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.055638313293457,
      "learning_rate": 1.2294579451786823e-05,
      "loss": 3.2537,
      "step": 209334
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.039607048034668,
      "learning_rate": 1.2293420438516033e-05,
      "loss": 2.8942,
      "step": 209335
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.345289707183838,
      "learning_rate": 1.2292261478735355e-05,
      "loss": 2.8985,
      "step": 209336
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0535731315612793,
      "learning_rate": 1.2291102572444988e-05,
      "loss": 2.8756,
      "step": 209337
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2495603561401367,
      "learning_rate": 1.2289943719645069e-05,
      "loss": 2.9428,
      "step": 209338
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.701634168624878,
      "learning_rate": 1.2288784920335925e-05,
      "loss": 2.9979,
      "step": 209339
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2930774688720703,
      "learning_rate": 1.2287626174517695e-05,
      "loss": 2.8924,
      "step": 209340
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.413602113723755,
      "learning_rate": 1.2286467482190676e-05,
      "loss": 2.9439,
      "step": 209341
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.107513427734375,
      "learning_rate": 1.2285308843354969e-05,
      "loss": 2.6614,
      "step": 209342
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.669614315032959,
      "learning_rate": 1.2284150258010938e-05,
      "loss": 2.8136,
      "step": 209343
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9844038486480713,
      "learning_rate": 1.2282991726158653e-05,
      "loss": 2.8196,
      "step": 209344
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.306447744369507,
      "learning_rate": 1.2281833247798411e-05,
      "loss": 2.861,
      "step": 209345
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.06125807762146,
      "learning_rate": 1.228067482293038e-05,
      "loss": 2.7703,
      "step": 209346
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.035667896270752,
      "learning_rate": 1.227951645155486e-05,
      "loss": 2.7146,
      "step": 209347
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2030160427093506,
      "learning_rate": 1.2278358133671951e-05,
      "loss": 2.8176,
      "step": 209348
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.007967233657837,
      "learning_rate": 1.2277199869281984e-05,
      "loss": 2.8755,
      "step": 209349
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9892866611480713,
      "learning_rate": 1.227604165838506e-05,
      "loss": 2.8194,
      "step": 209350
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.115436553955078,
      "learning_rate": 1.227488350098148e-05,
      "loss": 3.0064,
      "step": 209351
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.968759775161743,
      "learning_rate": 1.227372539707141e-05,
      "loss": 2.6503,
      "step": 209352
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.165693521499634,
      "learning_rate": 1.2272567346655115e-05,
      "loss": 2.9568,
      "step": 209353
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.227185010910034,
      "learning_rate": 1.227140934973273e-05,
      "loss": 2.656,
      "step": 209354
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1000301837921143,
      "learning_rate": 1.2270251406304621e-05,
      "loss": 3.0616,
      "step": 209355
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.576888084411621,
      "learning_rate": 1.226909351637082e-05,
      "loss": 2.9298,
      "step": 209356
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.757887601852417,
      "learning_rate": 1.2267935679931628e-05,
      "loss": 2.9863,
      "step": 209357
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7879974842071533,
      "learning_rate": 1.2266777896987279e-05,
      "loss": 3.0313,
      "step": 209358
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7756476402282715,
      "learning_rate": 1.226562016753797e-05,
      "loss": 2.8784,
      "step": 209359
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6980903148651123,
      "learning_rate": 1.226446249158387e-05,
      "loss": 2.9718,
      "step": 209360
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8570761680603027,
      "learning_rate": 1.2263304869125346e-05,
      "loss": 2.8371,
      "step": 209361
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8752222061157227,
      "learning_rate": 1.2262147300162396e-05,
      "loss": 2.9093,
      "step": 209362
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.947908878326416,
      "learning_rate": 1.2260989784695385e-05,
      "loss": 2.87,
      "step": 209363
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1143739223480225,
      "learning_rate": 1.2259832322724483e-05,
      "loss": 3.0069,
      "step": 209364
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.767761468887329,
      "learning_rate": 1.2258674914249922e-05,
      "loss": 2.8186,
      "step": 209365
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7735166549682617,
      "learning_rate": 1.2257517559271868e-05,
      "loss": 2.9868,
      "step": 209366
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8505942821502686,
      "learning_rate": 1.2256360257790655e-05,
      "loss": 2.9655,
      "step": 209367
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.765280246734619,
      "learning_rate": 1.2255203009806313e-05,
      "loss": 3.139,
      "step": 209368
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0643439292907715,
      "learning_rate": 1.2254045815319247e-05,
      "loss": 3.0727,
      "step": 209369
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.988217353820801,
      "learning_rate": 1.2252888674329519e-05,
      "loss": 2.7481,
      "step": 209370
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.5666425228118896,
      "learning_rate": 1.2251731586837433e-05,
      "loss": 2.7858,
      "step": 209371
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9082984924316406,
      "learning_rate": 1.2250574552843185e-05,
      "loss": 3.0122,
      "step": 209372
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.408740520477295,
      "learning_rate": 1.2249417572347042e-05,
      "loss": 2.8543,
      "step": 209373
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.6911139488220215,
      "learning_rate": 1.2248260645349073e-05,
      "loss": 3.1237,
      "step": 209374
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3776657581329346,
      "learning_rate": 1.2247103771849642e-05,
      "loss": 2.9904,
      "step": 209375
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0710084438323975,
      "learning_rate": 1.2245946951848851e-05,
      "loss": 2.9905,
      "step": 209376
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.14919114112854,
      "learning_rate": 1.224479018534703e-05,
      "loss": 3.0096,
      "step": 209377
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.183894634246826,
      "learning_rate": 1.2243633472344284e-05,
      "loss": 2.9555,
      "step": 209378
    },
    {
      "epoch": 2.73,
      "grad_norm": 5.363419532775879,
      "learning_rate": 1.224247681284094e-05,
      "loss": 2.8735,
      "step": 209379
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.0465087890625,
      "learning_rate": 1.2241320206837101e-05,
      "loss": 2.6612,
      "step": 209380
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.190631866455078,
      "learning_rate": 1.2240163654333068e-05,
      "loss": 2.7014,
      "step": 209381
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.778672218322754,
      "learning_rate": 1.2239007155328973e-05,
      "loss": 3.034,
      "step": 209382
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8095130920410156,
      "learning_rate": 1.2237850709825114e-05,
      "loss": 2.8989,
      "step": 209383
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9452571868896484,
      "learning_rate": 1.2236694317821627e-05,
      "loss": 3.0752,
      "step": 209384
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8373892307281494,
      "learning_rate": 1.223553797931881e-05,
      "loss": 2.8454,
      "step": 209385
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.377755641937256,
      "learning_rate": 1.2234381694316863e-05,
      "loss": 3.006,
      "step": 209386
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2149672508239746,
      "learning_rate": 1.2233225462815955e-05,
      "loss": 2.8313,
      "step": 209387
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.113189220428467,
      "learning_rate": 1.2232069284816282e-05,
      "loss": 2.8572,
      "step": 209388
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.845665216445923,
      "learning_rate": 1.2230913160318145e-05,
      "loss": 3.0801,
      "step": 209389
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.5883917808532715,
      "learning_rate": 1.222975708932168e-05,
      "loss": 2.8627,
      "step": 209390
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9694228172302246,
      "learning_rate": 1.2228601071827182e-05,
      "loss": 2.9897,
      "step": 209391
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.525015115737915,
      "learning_rate": 1.2227445107834788e-05,
      "loss": 2.7748,
      "step": 209392
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.002614974975586,
      "learning_rate": 1.2226289197344763e-05,
      "loss": 2.7416,
      "step": 209393
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.68385648727417,
      "learning_rate": 1.2225133340357274e-05,
      "loss": 3.0807,
      "step": 209394
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.708171844482422,
      "learning_rate": 1.2223977536872586e-05,
      "loss": 2.8972,
      "step": 209395
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.366738796234131,
      "learning_rate": 1.2222821786890869e-05,
      "loss": 2.9554,
      "step": 209396
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0290677547454834,
      "learning_rate": 1.2221666090412385e-05,
      "loss": 2.7311,
      "step": 209397
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.001997232437134,
      "learning_rate": 1.2220510447437338e-05,
      "loss": 2.9414,
      "step": 209398
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.02020788192749,
      "learning_rate": 1.2219354857965924e-05,
      "loss": 2.9049,
      "step": 209399
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.301517963409424,
      "learning_rate": 1.2218199321998345e-05,
      "loss": 2.9943,
      "step": 209400
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.854144811630249,
      "learning_rate": 1.2217043839534835e-05,
      "loss": 2.8858,
      "step": 209401
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9726059436798096,
      "learning_rate": 1.2215888410575592e-05,
      "loss": 2.9601,
      "step": 209402
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9831700325012207,
      "learning_rate": 1.2214733035120883e-05,
      "loss": 2.9319,
      "step": 209403
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.015702486038208,
      "learning_rate": 1.2213577713170908e-05,
      "loss": 2.8289,
      "step": 209404
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8372912406921387,
      "learning_rate": 1.22124224447258e-05,
      "loss": 2.9473,
      "step": 209405
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.90264892578125,
      "learning_rate": 1.221126722978586e-05,
      "loss": 2.9302,
      "step": 209406
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9316742420196533,
      "learning_rate": 1.2210112068351318e-05,
      "loss": 2.9253,
      "step": 209407
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6813392639160156,
      "learning_rate": 1.2208956960422277e-05,
      "loss": 2.8745,
      "step": 209408
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8612189292907715,
      "learning_rate": 1.2207801905999071e-05,
      "loss": 2.9785,
      "step": 209409
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.4179248809814453,
      "learning_rate": 1.2206646905081864e-05,
      "loss": 3.0305,
      "step": 209410
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8161985874176025,
      "learning_rate": 1.2205491957670855e-05,
      "loss": 2.8986,
      "step": 209411
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6941773891448975,
      "learning_rate": 1.2204337063766279e-05,
      "loss": 3.1073,
      "step": 209412
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7436416149139404,
      "learning_rate": 1.2203182223368336e-05,
      "loss": 2.8497,
      "step": 209413
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0088016986846924,
      "learning_rate": 1.220202743647729e-05,
      "loss": 2.6898,
      "step": 209414
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.169511556625366,
      "learning_rate": 1.2200872703093312e-05,
      "loss": 2.869,
      "step": 209415
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.011918783187866,
      "learning_rate": 1.2199718023216631e-05,
      "loss": 2.9695,
      "step": 209416
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.771444320678711,
      "learning_rate": 1.2198563396847417e-05,
      "loss": 3.0861,
      "step": 209417
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1776790618896484,
      "learning_rate": 1.2197408823985966e-05,
      "loss": 2.838,
      "step": 209418
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0165815353393555,
      "learning_rate": 1.2196254304632414e-05,
      "loss": 3.195,
      "step": 209419
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.081414222717285,
      "learning_rate": 1.219509983878706e-05,
      "loss": 3.0209,
      "step": 209420
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8397305011749268,
      "learning_rate": 1.2193945426450035e-05,
      "loss": 2.9747,
      "step": 209421
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.884674310684204,
      "learning_rate": 1.219279106762161e-05,
      "loss": 2.9061,
      "step": 209422
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.5472192764282227,
      "learning_rate": 1.2191636762301914e-05,
      "loss": 2.9799,
      "step": 209423
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.9214746952056885,
      "learning_rate": 1.2190482510491284e-05,
      "loss": 2.7382,
      "step": 209424
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7340853214263916,
      "learning_rate": 1.218932831218985e-05,
      "loss": 2.8861,
      "step": 209425
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.965656042098999,
      "learning_rate": 1.218817416739788e-05,
      "loss": 2.9473,
      "step": 209426
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.856590747833252,
      "learning_rate": 1.218702007611554e-05,
      "loss": 2.9226,
      "step": 209427
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.6756091117858887,
      "learning_rate": 1.2185866038343094e-05,
      "loss": 2.8495,
      "step": 209428
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.639942169189453,
      "learning_rate": 1.2184712054080714e-05,
      "loss": 2.7474,
      "step": 209429
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.127669095993042,
      "learning_rate": 1.2183558123328629e-05,
      "loss": 3.046,
      "step": 209430
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.504377841949463,
      "learning_rate": 1.2182404246087008e-05,
      "loss": 2.9291,
      "step": 209431
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.337038040161133,
      "learning_rate": 1.2181250422356182e-05,
      "loss": 2.7723,
      "step": 209432
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.109614133834839,
      "learning_rate": 1.2180096652136217e-05,
      "loss": 2.9781,
      "step": 209433
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.020437479019165,
      "learning_rate": 1.2178942935427516e-05,
      "loss": 2.7799,
      "step": 209434
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9264426231384277,
      "learning_rate": 1.2177789272230076e-05,
      "loss": 2.8525,
      "step": 209435
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.583488941192627,
      "learning_rate": 1.2176635662544266e-05,
      "loss": 2.9755,
      "step": 209436
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0136518478393555,
      "learning_rate": 1.217548210637025e-05,
      "loss": 2.8276,
      "step": 209437
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.4058709144592285,
      "learning_rate": 1.2174328603708228e-05,
      "loss": 2.9313,
      "step": 209438
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.9636714458465576,
      "learning_rate": 1.2173175154558434e-05,
      "loss": 2.9577,
      "step": 209439
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.672240734100342,
      "learning_rate": 1.2172021758921135e-05,
      "loss": 2.8939,
      "step": 209440
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.830160140991211,
      "learning_rate": 1.217086841679643e-05,
      "loss": 2.7677,
      "step": 209441
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6197545528411865,
      "learning_rate": 1.216971512818462e-05,
      "loss": 2.7486,
      "step": 209442
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.17546010017395,
      "learning_rate": 1.2168561893085838e-05,
      "loss": 3.1436,
      "step": 209443
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.275998115539551,
      "learning_rate": 1.2167408711500415e-05,
      "loss": 2.8343,
      "step": 209444
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.226234197616577,
      "learning_rate": 1.2166255583428485e-05,
      "loss": 3.0892,
      "step": 209445
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.5311882495880127,
      "learning_rate": 1.2165102508870317e-05,
      "loss": 3.1916,
      "step": 209446
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.137425661087036,
      "learning_rate": 1.2163949487826042e-05,
      "loss": 2.761,
      "step": 209447
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9175753593444824,
      "learning_rate": 1.2162796520295926e-05,
      "loss": 2.9752,
      "step": 209448
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.261936187744141,
      "learning_rate": 1.216164360628017e-05,
      "loss": 2.7789,
      "step": 209449
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2238380908966064,
      "learning_rate": 1.216049074577904e-05,
      "loss": 2.9388,
      "step": 209450
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1743812561035156,
      "learning_rate": 1.2159337938792669e-05,
      "loss": 2.8104,
      "step": 209451
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.707500457763672,
      "learning_rate": 1.2158185185321323e-05,
      "loss": 3.0256,
      "step": 209452
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1620774269104004,
      "learning_rate": 1.2157032485365237e-05,
      "loss": 3.0345,
      "step": 209453
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.785358428955078,
      "learning_rate": 1.2155879838924576e-05,
      "loss": 2.8487,
      "step": 209454
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3416688442230225,
      "learning_rate": 1.215472724599954e-05,
      "loss": 2.8122,
      "step": 209455
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.994898557662964,
      "learning_rate": 1.2153574706590396e-05,
      "loss": 3.1727,
      "step": 209456
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8828067779541016,
      "learning_rate": 1.215242222069731e-05,
      "loss": 2.8374,
      "step": 209457
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7926735877990723,
      "learning_rate": 1.215126978832055e-05,
      "loss": 3.082,
      "step": 209458
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.898737668991089,
      "learning_rate": 1.2150117409460313e-05,
      "loss": 2.9545,
      "step": 209459
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.992401599884033,
      "learning_rate": 1.2148965084116802e-05,
      "loss": 2.7469,
      "step": 209460
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0767982006073,
      "learning_rate": 1.214781281229018e-05,
      "loss": 2.906,
      "step": 209461
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.4123122692108154,
      "learning_rate": 1.214666059398075e-05,
      "loss": 2.5773,
      "step": 209462
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.168997287750244,
      "learning_rate": 1.2145508429188677e-05,
      "loss": 2.851,
      "step": 209463
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2613425254821777,
      "learning_rate": 1.2144356317914194e-05,
      "loss": 2.9621,
      "step": 209464
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1458587646484375,
      "learning_rate": 1.2143204260157535e-05,
      "loss": 3.2947,
      "step": 209465
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.885462522506714,
      "learning_rate": 1.21420522559189e-05,
      "loss": 2.983,
      "step": 209466
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.7600631713867188,
      "learning_rate": 1.214090030519842e-05,
      "loss": 2.7869,
      "step": 209467
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.125685214996338,
      "learning_rate": 1.2139748407996431e-05,
      "loss": 2.8443,
      "step": 209468
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.011979103088379,
      "learning_rate": 1.2138596564313063e-05,
      "loss": 3.0237,
      "step": 209469
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.5306215286254883,
      "learning_rate": 1.2137444774148587e-05,
      "loss": 2.9174,
      "step": 209470
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.077745199203491,
      "learning_rate": 1.2136293037503197e-05,
      "loss": 2.8135,
      "step": 209471
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6462020874023438,
      "learning_rate": 1.2135141354377132e-05,
      "loss": 2.7968,
      "step": 209472
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9684088230133057,
      "learning_rate": 1.213398972477052e-05,
      "loss": 2.9131,
      "step": 209473
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.6599888801574707,
      "learning_rate": 1.2132838148683665e-05,
      "loss": 2.9222,
      "step": 209474
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6747422218322754,
      "learning_rate": 1.2131686626116732e-05,
      "loss": 3.1553,
      "step": 209475
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.898847818374634,
      "learning_rate": 1.2130535157069954e-05,
      "loss": 2.8773,
      "step": 209476
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0276920795440674,
      "learning_rate": 1.2129383741543564e-05,
      "loss": 3.0925,
      "step": 209477
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.982576847076416,
      "learning_rate": 1.2128232379537762e-05,
      "loss": 2.6945,
      "step": 209478
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.998548746109009,
      "learning_rate": 1.2127081071052713e-05,
      "loss": 3.0927,
      "step": 209479
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8314101696014404,
      "learning_rate": 1.2125929816088719e-05,
      "loss": 2.9928,
      "step": 209480
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0648443698883057,
      "learning_rate": 1.212477861464588e-05,
      "loss": 2.9319,
      "step": 209481
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.3824968338012695,
      "learning_rate": 1.2123627466724562e-05,
      "loss": 2.8357,
      "step": 209482
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0718390941619873,
      "learning_rate": 1.2122476372324862e-05,
      "loss": 3.0757,
      "step": 209483
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.32277774810791,
      "learning_rate": 1.2121325331447052e-05,
      "loss": 3.0177,
      "step": 209484
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0550241470336914,
      "learning_rate": 1.212017434409126e-05,
      "loss": 2.8134,
      "step": 209485
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9636640548706055,
      "learning_rate": 1.2119023410257823e-05,
      "loss": 2.984,
      "step": 209486
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8281443119049072,
      "learning_rate": 1.2117872529946837e-05,
      "loss": 2.7549,
      "step": 209487
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0815324783325195,
      "learning_rate": 1.2116721703158605e-05,
      "loss": 2.9945,
      "step": 209488
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.4966588020324707,
      "learning_rate": 1.2115570929893326e-05,
      "loss": 2.8437,
      "step": 209489
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7742209434509277,
      "learning_rate": 1.2114420210151166e-05,
      "loss": 2.7687,
      "step": 209490
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.773967981338501,
      "learning_rate": 1.2113269543932392e-05,
      "loss": 2.8543,
      "step": 209491
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.4327378273010254,
      "learning_rate": 1.211211893123717e-05,
      "loss": 3.0572,
      "step": 209492
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.240530967712402,
      "learning_rate": 1.2110968372065733e-05,
      "loss": 2.846,
      "step": 209493
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7650411128997803,
      "learning_rate": 1.2109817866418347e-05,
      "loss": 2.9551,
      "step": 209494
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.5999984741210938,
      "learning_rate": 1.2108667414295181e-05,
      "loss": 3.0823,
      "step": 209495
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.682706832885742,
      "learning_rate": 1.21075170156964e-05,
      "loss": 3.0844,
      "step": 209496
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.435798168182373,
      "learning_rate": 1.2106366670622304e-05,
      "loss": 2.9847,
      "step": 209497
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.521058082580566,
      "learning_rate": 1.2105216379073057e-05,
      "loss": 2.7818,
      "step": 209498
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.855194330215454,
      "learning_rate": 1.2104066141048863e-05,
      "loss": 2.7697,
      "step": 209499
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.359858751296997,
      "learning_rate": 1.2102915956549986e-05,
      "loss": 3.2925,
      "step": 209500
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.943406105041504,
      "learning_rate": 1.2101765825576592e-05,
      "loss": 2.9209,
      "step": 209501
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.900813579559326,
      "learning_rate": 1.2100615748128916e-05,
      "loss": 2.8261,
      "step": 209502
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.482095241546631,
      "learning_rate": 1.209946572420719e-05,
      "loss": 2.9956,
      "step": 209503
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9050824642181396,
      "learning_rate": 1.2098315753811549e-05,
      "loss": 3.0262,
      "step": 209504
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9062135219573975,
      "learning_rate": 1.2097165836942358e-05,
      "loss": 2.9982,
      "step": 209505
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.83854341506958,
      "learning_rate": 1.2096015973599682e-05,
      "loss": 3.1131,
      "step": 209506
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.082840442657471,
      "learning_rate": 1.2094866163783822e-05,
      "loss": 3.005,
      "step": 209507
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.109609127044678,
      "learning_rate": 1.2093716407494913e-05,
      "loss": 2.8569,
      "step": 209508
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.997622013092041,
      "learning_rate": 1.2092566704733252e-05,
      "loss": 2.866,
      "step": 209509
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.730174541473389,
      "learning_rate": 1.2091417055499009e-05,
      "loss": 3.036,
      "step": 209510
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2028005123138428,
      "learning_rate": 1.2090267459792413e-05,
      "loss": 3.1035,
      "step": 209511
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9783785343170166,
      "learning_rate": 1.2089117917613634e-05,
      "loss": 2.8907,
      "step": 209512
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7902326583862305,
      "learning_rate": 1.2087968428963001e-05,
      "loss": 2.9309,
      "step": 209513
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2731199264526367,
      "learning_rate": 1.2086818993840586e-05,
      "loss": 3.0473,
      "step": 209514
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2376484870910645,
      "learning_rate": 1.2085669612246685e-05,
      "loss": 2.9336,
      "step": 209515
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.93105411529541,
      "learning_rate": 1.2084520284181465e-05,
      "loss": 3.1076,
      "step": 209516
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8920435905456543,
      "learning_rate": 1.2083371009645226e-05,
      "loss": 2.8118,
      "step": 209517
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.4368200302124023,
      "learning_rate": 1.2082221788638035e-05,
      "loss": 3.0379,
      "step": 209518
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.128420829772949,
      "learning_rate": 1.2081072621160292e-05,
      "loss": 2.8479,
      "step": 209519
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.974336862564087,
      "learning_rate": 1.2079923507212064e-05,
      "loss": 2.852,
      "step": 209520
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.806814670562744,
      "learning_rate": 1.2078774446793648e-05,
      "loss": 2.9758,
      "step": 209521
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.854048252105713,
      "learning_rate": 1.2077625439905148e-05,
      "loss": 2.731,
      "step": 209522
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8846049308776855,
      "learning_rate": 1.2076476486546926e-05,
      "loss": 3.0042,
      "step": 209523
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9289088249206543,
      "learning_rate": 1.2075327586719053e-05,
      "loss": 2.7937,
      "step": 209524
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0331549644470215,
      "learning_rate": 1.2074178740421859e-05,
      "loss": 3.0119,
      "step": 209525
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1702044010162354,
      "learning_rate": 1.2073029947655544e-05,
      "loss": 2.7427,
      "step": 209526
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.993966817855835,
      "learning_rate": 1.2071881208420243e-05,
      "loss": 2.8813,
      "step": 209527
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.609407424926758,
      "learning_rate": 1.2070732522716187e-05,
      "loss": 2.8913,
      "step": 209528
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6425962448120117,
      "learning_rate": 1.2069583890543677e-05,
      "loss": 2.9918,
      "step": 209529
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.770021438598633,
      "learning_rate": 1.2068435311902814e-05,
      "loss": 3.3842,
      "step": 209530
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.62265682220459,
      "learning_rate": 1.2067286786793894e-05,
      "loss": 2.8961,
      "step": 209531
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1949987411499023,
      "learning_rate": 1.2066138315217121e-05,
      "loss": 3.0857,
      "step": 209532
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.228348970413208,
      "learning_rate": 1.2064989897172662e-05,
      "loss": 2.8039,
      "step": 209533
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8782505989074707,
      "learning_rate": 1.2063841532660745e-05,
      "loss": 2.9584,
      "step": 209534
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.860788345336914,
      "learning_rate": 1.2062693221681607e-05,
      "loss": 3.024,
      "step": 209535
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.5886659622192383,
      "learning_rate": 1.2061544964235414e-05,
      "loss": 2.8487,
      "step": 209536
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.8411219120025635,
      "learning_rate": 1.2060396760322465e-05,
      "loss": 2.9722,
      "step": 209537
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.712584972381592,
      "learning_rate": 1.2059248609942929e-05,
      "loss": 2.9615,
      "step": 209538
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.4951529502868652,
      "learning_rate": 1.2058100513097002e-05,
      "loss": 2.8612,
      "step": 209539
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3715243339538574,
      "learning_rate": 1.2056952469784886e-05,
      "loss": 2.8175,
      "step": 209540
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.4723007678985596,
      "learning_rate": 1.2055804480006847e-05,
      "loss": 3.1387,
      "step": 209541
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9425368309020996,
      "learning_rate": 1.2054656543763019e-05,
      "loss": 2.7849,
      "step": 209542
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.026298761367798,
      "learning_rate": 1.2053508661053734e-05,
      "loss": 2.7352,
      "step": 209543
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.9234001636505127,
      "learning_rate": 1.2052360831879093e-05,
      "loss": 2.8986,
      "step": 209544
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.9876620769500732,
      "learning_rate": 1.2051213056239394e-05,
      "loss": 2.9255,
      "step": 209545
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0347275733947754,
      "learning_rate": 1.2050065334134739e-05,
      "loss": 3.1678,
      "step": 209546
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.6845672130584717,
      "learning_rate": 1.2048917665565494e-05,
      "loss": 2.9902,
      "step": 209547
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.4997541904449463,
      "learning_rate": 1.2047770050531724e-05,
      "loss": 2.7922,
      "step": 209548
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9537930488586426,
      "learning_rate": 1.2046622489033731e-05,
      "loss": 3.0479,
      "step": 209549
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8183746337890625,
      "learning_rate": 1.2045474981071746e-05,
      "loss": 2.8076,
      "step": 209550
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7818644046783447,
      "learning_rate": 1.2044327526645903e-05,
      "loss": 2.8959,
      "step": 209551
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8554368019104004,
      "learning_rate": 1.2043180125756436e-05,
      "loss": 2.774,
      "step": 209552
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0393307209014893,
      "learning_rate": 1.204203277840361e-05,
      "loss": 3.1096,
      "step": 209553
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.922083854675293,
      "learning_rate": 1.2040885484587592e-05,
      "loss": 2.9766,
      "step": 209554
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3976361751556396,
      "learning_rate": 1.2039738244308617e-05,
      "loss": 3.1855,
      "step": 209555
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.945141077041626,
      "learning_rate": 1.2038591057566916e-05,
      "loss": 2.9665,
      "step": 209556
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.321431875228882,
      "learning_rate": 1.2037443924362654e-05,
      "loss": 2.9119,
      "step": 209557
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.5367324352264404,
      "learning_rate": 1.2036296844696037e-05,
      "loss": 2.9287,
      "step": 209558
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8736412525177,
      "learning_rate": 1.2035149818567358e-05,
      "loss": 2.9652,
      "step": 209559
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0985355377197266,
      "learning_rate": 1.203400284597672e-05,
      "loss": 2.8859,
      "step": 209560
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9945895671844482,
      "learning_rate": 1.2032855926924422e-05,
      "loss": 2.9748,
      "step": 209561
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.805196762084961,
      "learning_rate": 1.2031709061410698e-05,
      "loss": 2.9024,
      "step": 209562
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0534844398498535,
      "learning_rate": 1.2030562249435683e-05,
      "loss": 2.8271,
      "step": 209563
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1315243244171143,
      "learning_rate": 1.2029415490999605e-05,
      "loss": 3.0073,
      "step": 209564
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3156955242156982,
      "learning_rate": 1.2028268786102702e-05,
      "loss": 2.7865,
      "step": 209565
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.529444932937622,
      "learning_rate": 1.202712213474517e-05,
      "loss": 2.7963,
      "step": 209566
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.3864333629608154,
      "learning_rate": 1.2025975536927246e-05,
      "loss": 2.9898,
      "step": 209567
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2788283824920654,
      "learning_rate": 1.202482899264916e-05,
      "loss": 2.7779,
      "step": 209568
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9184935092926025,
      "learning_rate": 1.2023682501911081e-05,
      "loss": 3.0425,
      "step": 209569
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.5865907669067383,
      "learning_rate": 1.2022536064713206e-05,
      "loss": 2.9318,
      "step": 209570
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2971503734588623,
      "learning_rate": 1.2021389681055804e-05,
      "loss": 2.864,
      "step": 209571
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7975289821624756,
      "learning_rate": 1.2020243350939008e-05,
      "loss": 2.9557,
      "step": 209572
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.739609956741333,
      "learning_rate": 1.2019097074363148e-05,
      "loss": 2.9647,
      "step": 209573
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9167771339416504,
      "learning_rate": 1.2017950851328362e-05,
      "loss": 3.0857,
      "step": 209574
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.438371419906616,
      "learning_rate": 1.2016804681834847e-05,
      "loss": 2.9051,
      "step": 209575
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8356356620788574,
      "learning_rate": 1.2015658565882869e-05,
      "loss": 3.0347,
      "step": 209576
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.205599069595337,
      "learning_rate": 1.2014512503472595e-05,
      "loss": 3.1299,
      "step": 209577
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.5063154697418213,
      "learning_rate": 1.201336649460426e-05,
      "loss": 2.8212,
      "step": 209578
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0222418308258057,
      "learning_rate": 1.2012220539278094e-05,
      "loss": 2.9601,
      "step": 209579
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9667868614196777,
      "learning_rate": 1.20110746374943e-05,
      "loss": 2.7783,
      "step": 209580
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.889221668243408,
      "learning_rate": 1.2009928789253042e-05,
      "loss": 2.8884,
      "step": 209581
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.106365203857422,
      "learning_rate": 1.2008782994554623e-05,
      "loss": 3.1774,
      "step": 209582
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8371667861938477,
      "learning_rate": 1.2007637253399204e-05,
      "loss": 2.8962,
      "step": 209583
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.933032274246216,
      "learning_rate": 1.2006491565786958e-05,
      "loss": 2.8886,
      "step": 209584
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7117841243743896,
      "learning_rate": 1.200534593171818e-05,
      "loss": 2.9951,
      "step": 209585
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.710721492767334,
      "learning_rate": 1.2004200351193005e-05,
      "loss": 3.1307,
      "step": 209586
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0718939304351807,
      "learning_rate": 1.2003054824211733e-05,
      "loss": 3.0437,
      "step": 209587
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.985377311706543,
      "learning_rate": 1.200190935077453e-05,
      "loss": 2.8113,
      "step": 209588
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6930577754974365,
      "learning_rate": 1.2000763930881563e-05,
      "loss": 2.9087,
      "step": 209589
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.4251620769500732,
      "learning_rate": 1.1999618564533131e-05,
      "loss": 2.7727,
      "step": 209590
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.374633312225342,
      "learning_rate": 1.1998473251729401e-05,
      "loss": 3.1235,
      "step": 209591
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.77860951423645,
      "learning_rate": 1.1997327992470573e-05,
      "loss": 3.1358,
      "step": 209592
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7331089973449707,
      "learning_rate": 1.1996182786756881e-05,
      "loss": 3.0034,
      "step": 209593
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8056228160858154,
      "learning_rate": 1.1995037634588555e-05,
      "loss": 3.0619,
      "step": 209594
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.6548707485198975,
      "learning_rate": 1.1993892535965766e-05,
      "loss": 2.7448,
      "step": 209595
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0526821613311768,
      "learning_rate": 1.1992747490888776e-05,
      "loss": 2.9076,
      "step": 209596
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9379639625549316,
      "learning_rate": 1.1991602499357755e-05,
      "loss": 2.907,
      "step": 209597
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9262876510620117,
      "learning_rate": 1.19904575613729e-05,
      "loss": 3.026,
      "step": 209598
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.009605646133423,
      "learning_rate": 1.1989312676934481e-05,
      "loss": 2.8481,
      "step": 209599
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8452069759368896,
      "learning_rate": 1.1988167846042728e-05,
      "loss": 2.9997,
      "step": 209600
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.943237066268921,
      "learning_rate": 1.1987023068697743e-05,
      "loss": 3.0616,
      "step": 209601
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.266308307647705,
      "learning_rate": 1.1985878344899824e-05,
      "loss": 3.1009,
      "step": 209602
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.4683687686920166,
      "learning_rate": 1.1984733674649172e-05,
      "loss": 2.7086,
      "step": 209603
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.5011518001556396,
      "learning_rate": 1.1983589057946018e-05,
      "loss": 2.8347,
      "step": 209604
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.294318199157715,
      "learning_rate": 1.1982444494790533e-05,
      "loss": 2.862,
      "step": 209605
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6744065284729004,
      "learning_rate": 1.198129998518298e-05,
      "loss": 2.8775,
      "step": 209606
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8615918159484863,
      "learning_rate": 1.1980155529123492e-05,
      "loss": 2.756,
      "step": 209607
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7296788692474365,
      "learning_rate": 1.1979011126612337e-05,
      "loss": 3.0396,
      "step": 209608
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1185548305511475,
      "learning_rate": 1.1977866777649713e-05,
      "loss": 2.8873,
      "step": 209609
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.120877742767334,
      "learning_rate": 1.197672248223589e-05,
      "loss": 2.852,
      "step": 209610
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2869019508361816,
      "learning_rate": 1.1975578240370997e-05,
      "loss": 2.8334,
      "step": 209611
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.6414506435394287,
      "learning_rate": 1.1974434052055304e-05,
      "loss": 3.0477,
      "step": 209612
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.319870948791504,
      "learning_rate": 1.1973289917288942e-05,
      "loss": 3.0308,
      "step": 209613
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9178638458251953,
      "learning_rate": 1.1972145836072244e-05,
      "loss": 2.7878,
      "step": 209614
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2106103897094727,
      "learning_rate": 1.1971001808405311e-05,
      "loss": 2.7881,
      "step": 209615
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7805449962615967,
      "learning_rate": 1.1969857834288443e-05,
      "loss": 2.7931,
      "step": 209616
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.7816946506500244,
      "learning_rate": 1.1968713913721838e-05,
      "loss": 2.8216,
      "step": 209617
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.060532569885254,
      "learning_rate": 1.1967570046705666e-05,
      "loss": 2.9298,
      "step": 209618
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2911622524261475,
      "learning_rate": 1.1966426233240123e-05,
      "loss": 2.7797,
      "step": 209619
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.67153000831604,
      "learning_rate": 1.1965282473325477e-05,
      "loss": 3.0216,
      "step": 209620
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.412914276123047,
      "learning_rate": 1.1964138766961895e-05,
      "loss": 2.8221,
      "step": 209621
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1429319381713867,
      "learning_rate": 1.1962995114149675e-05,
      "loss": 2.6699,
      "step": 209622
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3815758228302,
      "learning_rate": 1.1961851514888954e-05,
      "loss": 3.062,
      "step": 209623
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3698112964630127,
      "learning_rate": 1.1960707969179962e-05,
      "loss": 2.8923,
      "step": 209624
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8050527572631836,
      "learning_rate": 1.1959564477022897e-05,
      "loss": 3.0559,
      "step": 209625
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2547683715820312,
      "learning_rate": 1.1958421038417997e-05,
      "loss": 3.1043,
      "step": 209626
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3324952125549316,
      "learning_rate": 1.1957277653365427e-05,
      "loss": 2.779,
      "step": 209627
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2065603733062744,
      "learning_rate": 1.1956134321865485e-05,
      "loss": 2.7614,
      "step": 209628
    },
    {
      "epoch": 2.73,
      "grad_norm": 5.1294684410095215,
      "learning_rate": 1.1954991043918338e-05,
      "loss": 2.7879,
      "step": 209629
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.177877187728882,
      "learning_rate": 1.1953847819524188e-05,
      "loss": 2.7606,
      "step": 209630
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.593034029006958,
      "learning_rate": 1.1952704648683231e-05,
      "loss": 2.8813,
      "step": 209631
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.774437427520752,
      "learning_rate": 1.1951561531395704e-05,
      "loss": 3.1002,
      "step": 209632
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6247856616973877,
      "learning_rate": 1.1950418467661804e-05,
      "loss": 3.0245,
      "step": 209633
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9960010051727295,
      "learning_rate": 1.19492754574818e-05,
      "loss": 2.8035,
      "step": 209634
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7322328090667725,
      "learning_rate": 1.1948132500855856e-05,
      "loss": 2.8576,
      "step": 209635
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.535468816757202,
      "learning_rate": 1.1946989597784206e-05,
      "loss": 2.9567,
      "step": 209636
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.021387815475464,
      "learning_rate": 1.1945846748267018e-05,
      "loss": 2.6381,
      "step": 209637
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8531203269958496,
      "learning_rate": 1.1944703952304558e-05,
      "loss": 2.7152,
      "step": 209638
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.419199466705322,
      "learning_rate": 1.1943561209896957e-05,
      "loss": 2.868,
      "step": 209639
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.062203884124756,
      "learning_rate": 1.194241852104455e-05,
      "loss": 2.766,
      "step": 209640
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.5275113582611084,
      "learning_rate": 1.194127588574747e-05,
      "loss": 2.7698,
      "step": 209641
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1564395427703857,
      "learning_rate": 1.194013330400595e-05,
      "loss": 3.0071,
      "step": 209642
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1467807292938232,
      "learning_rate": 1.1938990775820156e-05,
      "loss": 3.1446,
      "step": 209643
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8100709915161133,
      "learning_rate": 1.1937848301190389e-05,
      "loss": 2.9933,
      "step": 209644
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9108359813690186,
      "learning_rate": 1.1936705880116749e-05,
      "loss": 2.9968,
      "step": 209645
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2082579135894775,
      "learning_rate": 1.1935563512599566e-05,
      "loss": 2.865,
      "step": 209646
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.766708850860596,
      "learning_rate": 1.193442119863901e-05,
      "loss": 2.853,
      "step": 209647
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.230851173400879,
      "learning_rate": 1.193327893823528e-05,
      "loss": 2.9874,
      "step": 209648
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.858651876449585,
      "learning_rate": 1.1932136731388542e-05,
      "loss": 2.9449,
      "step": 209649
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8530099391937256,
      "learning_rate": 1.193099457809913e-05,
      "loss": 3.0372,
      "step": 209650
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6261494159698486,
      "learning_rate": 1.1929852478367108e-05,
      "loss": 2.8525,
      "step": 209651
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.502223491668701,
      "learning_rate": 1.1928710432192812e-05,
      "loss": 2.8241,
      "step": 209652
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.4464964866638184,
      "learning_rate": 1.1927568439576374e-05,
      "loss": 2.9654,
      "step": 209653
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.71525764465332,
      "learning_rate": 1.1926426500518093e-05,
      "loss": 2.9749,
      "step": 209654
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.82513427734375,
      "learning_rate": 1.1925284615018072e-05,
      "loss": 2.7463,
      "step": 209655
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.975947618484497,
      "learning_rate": 1.1924142783076606e-05,
      "loss": 2.9313,
      "step": 209656
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.7881085872650146,
      "learning_rate": 1.1923001004693866e-05,
      "loss": 2.859,
      "step": 209657
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.4488747119903564,
      "learning_rate": 1.1921859279870084e-05,
      "loss": 2.7345,
      "step": 209658
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.219435930252075,
      "learning_rate": 1.1920717608605457e-05,
      "loss": 2.7662,
      "step": 209659
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6960673332214355,
      "learning_rate": 1.1919575990900253e-05,
      "loss": 2.8083,
      "step": 209660
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.796471118927002,
      "learning_rate": 1.1918434426754608e-05,
      "loss": 2.8138,
      "step": 209661
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1116790771484375,
      "learning_rate": 1.1917292916168752e-05,
      "loss": 2.753,
      "step": 209662
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.294232130050659,
      "learning_rate": 1.1916151459142887e-05,
      "loss": 2.847,
      "step": 209663
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.973250389099121,
      "learning_rate": 1.191501005567731e-05,
      "loss": 2.8716,
      "step": 209664
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7879865169525146,
      "learning_rate": 1.1913868705772089e-05,
      "loss": 2.888,
      "step": 209665
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.071331739425659,
      "learning_rate": 1.191272740942759e-05,
      "loss": 3.1226,
      "step": 209666
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2107553482055664,
      "learning_rate": 1.1911586166643949e-05,
      "loss": 2.8951,
      "step": 209667
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6156842708587646,
      "learning_rate": 1.1910444977421363e-05,
      "loss": 2.7288,
      "step": 209668
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.761415958404541,
      "learning_rate": 1.1909303841760031e-05,
      "loss": 2.9002,
      "step": 209669
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.564387798309326,
      "learning_rate": 1.1908162759660255e-05,
      "loss": 3.1842,
      "step": 209670
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9078876972198486,
      "learning_rate": 1.1907021731122135e-05,
      "loss": 2.9639,
      "step": 209671
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.010576248168945,
      "learning_rate": 1.1905880756145969e-05,
      "loss": 2.9964,
      "step": 209672
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8699920177459717,
      "learning_rate": 1.1904739834731958e-05,
      "loss": 2.8403,
      "step": 209673
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.925722599029541,
      "learning_rate": 1.1903598966880234e-05,
      "loss": 2.741,
      "step": 209674
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.858497381210327,
      "learning_rate": 1.1902458152591132e-05,
      "loss": 2.7027,
      "step": 209675
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7206954956054688,
      "learning_rate": 1.1901317391864784e-05,
      "loss": 3.063,
      "step": 209676
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.07940411567688,
      "learning_rate": 1.190017668470139e-05,
      "loss": 2.9145,
      "step": 209677
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.99642276763916,
      "learning_rate": 1.1899036031101216e-05,
      "loss": 3.187,
      "step": 209678
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.042525291442871,
      "learning_rate": 1.1897895431064464e-05,
      "loss": 3.0074,
      "step": 209679
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7162811756134033,
      "learning_rate": 1.1896754884591298e-05,
      "loss": 2.9754,
      "step": 209680
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.5748648643493652,
      "learning_rate": 1.1895614391681984e-05,
      "loss": 2.9252,
      "step": 209681
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.07705020904541,
      "learning_rate": 1.1894473952336724e-05,
      "loss": 2.6409,
      "step": 209682
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.1366424560546875,
      "learning_rate": 1.1893333566555685e-05,
      "loss": 2.9344,
      "step": 209683
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8500664234161377,
      "learning_rate": 1.1892193234339131e-05,
      "loss": 3.0888,
      "step": 209684
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.709259033203125,
      "learning_rate": 1.1891052955687297e-05,
      "loss": 3.1216,
      "step": 209685
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.592073440551758,
      "learning_rate": 1.1889912730600282e-05,
      "loss": 2.9244,
      "step": 209686
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9139533042907715,
      "learning_rate": 1.1888772559078419e-05,
      "loss": 2.8297,
      "step": 209687
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.5671818256378174,
      "learning_rate": 1.1887632441121842e-05,
      "loss": 3.0036,
      "step": 209688
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3310258388519287,
      "learning_rate": 1.1886492376730816e-05,
      "loss": 2.9422,
      "step": 209689
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.036656141281128,
      "learning_rate": 1.1885352365905543e-05,
      "loss": 2.8734,
      "step": 209690
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7965927124023438,
      "learning_rate": 1.1884212408646187e-05,
      "loss": 2.929,
      "step": 209691
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0263102054595947,
      "learning_rate": 1.1883072504952984e-05,
      "loss": 2.8985,
      "step": 209692
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.388587236404419,
      "learning_rate": 1.1881932654826199e-05,
      "loss": 2.9979,
      "step": 209693
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7961692810058594,
      "learning_rate": 1.1880792858265964e-05,
      "loss": 2.9905,
      "step": 209694
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.4777324199676514,
      "learning_rate": 1.1879653115272547e-05,
      "loss": 2.931,
      "step": 209695
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.145825147628784,
      "learning_rate": 1.187851342584618e-05,
      "loss": 2.7867,
      "step": 209696
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2359635829925537,
      "learning_rate": 1.1877373789987e-05,
      "loss": 2.9017,
      "step": 209697
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3077993392944336,
      "learning_rate": 1.1876234207695234e-05,
      "loss": 2.7956,
      "step": 209698
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.5609307289123535,
      "learning_rate": 1.1875094678971152e-05,
      "loss": 2.8609,
      "step": 209699
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.354723930358887,
      "learning_rate": 1.187395520381489e-05,
      "loss": 2.747,
      "step": 209700
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.913560628890991,
      "learning_rate": 1.1872815782226707e-05,
      "loss": 2.9297,
      "step": 209701
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8407232761383057,
      "learning_rate": 1.1871676414206842e-05,
      "loss": 2.9751,
      "step": 209702
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9165213108062744,
      "learning_rate": 1.1870537099755462e-05,
      "loss": 2.9941,
      "step": 209703
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.849419593811035,
      "learning_rate": 1.1869397838872763e-05,
      "loss": 2.6979,
      "step": 209704
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.464124917984009,
      "learning_rate": 1.186825863155898e-05,
      "loss": 2.799,
      "step": 209705
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9159884452819824,
      "learning_rate": 1.1867119477814313e-05,
      "loss": 2.898,
      "step": 209706
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.670872926712036,
      "learning_rate": 1.1865980377639028e-05,
      "loss": 2.7714,
      "step": 209707
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9262053966522217,
      "learning_rate": 1.1864841331033292e-05,
      "loss": 2.9054,
      "step": 209708
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.595762014389038,
      "learning_rate": 1.1863702337997339e-05,
      "loss": 3.1303,
      "step": 209709
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.5281810760498047,
      "learning_rate": 1.18625633985313e-05,
      "loss": 2.8978,
      "step": 209710
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.601127862930298,
      "learning_rate": 1.1861424512635509e-05,
      "loss": 2.6417,
      "step": 209711
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.841531991958618,
      "learning_rate": 1.1860285680310066e-05,
      "loss": 3.0026,
      "step": 209712
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.637637138366699,
      "learning_rate": 1.1859146901555273e-05,
      "loss": 2.9998,
      "step": 209713
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9705758094787598,
      "learning_rate": 1.1858008176371292e-05,
      "loss": 2.8875,
      "step": 209714
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0105690956115723,
      "learning_rate": 1.185686950475836e-05,
      "loss": 3.2456,
      "step": 209715
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.132967233657837,
      "learning_rate": 1.1855730886716642e-05,
      "loss": 3.1803,
      "step": 209716
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.5005640983581543,
      "learning_rate": 1.1854592322246437e-05,
      "loss": 2.9999,
      "step": 209717
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.80745792388916,
      "learning_rate": 1.1853453811347812e-05,
      "loss": 2.9624,
      "step": 209718
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8117668628692627,
      "learning_rate": 1.1852315354021169e-05,
      "loss": 2.9971,
      "step": 209719
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6142282485961914,
      "learning_rate": 1.1851176950266538e-05,
      "loss": 2.9647,
      "step": 209720
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.5892534255981445,
      "learning_rate": 1.1850038600084289e-05,
      "loss": 2.9193,
      "step": 209721
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.523117780685425,
      "learning_rate": 1.1848900303474518e-05,
      "loss": 2.9491,
      "step": 209722
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.7617251873016357,
      "learning_rate": 1.1847762060437493e-05,
      "loss": 2.8819,
      "step": 209723
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9257640838623047,
      "learning_rate": 1.1846623870973348e-05,
      "loss": 3.0412,
      "step": 209724
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.760486364364624,
      "learning_rate": 1.1845485735082416e-05,
      "loss": 2.8543,
      "step": 209725
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.677527904510498,
      "learning_rate": 1.1844347652764797e-05,
      "loss": 3.0669,
      "step": 209726
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.055450916290283,
      "learning_rate": 1.1843209624020855e-05,
      "loss": 3.008,
      "step": 209727
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.421358585357666,
      "learning_rate": 1.1842071648850592e-05,
      "loss": 3.0097,
      "step": 209728
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1938459873199463,
      "learning_rate": 1.1840933727254376e-05,
      "loss": 2.831,
      "step": 209729
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.8389949798583984,
      "learning_rate": 1.1839795859232337e-05,
      "loss": 2.7745,
      "step": 209730
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.5991666316986084,
      "learning_rate": 1.1838658044784777e-05,
      "loss": 2.7624,
      "step": 209731
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.4948925971984863,
      "learning_rate": 1.183752028391176e-05,
      "loss": 2.9238,
      "step": 209732
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0674116611480713,
      "learning_rate": 1.1836382576613723e-05,
      "loss": 3.0125,
      "step": 209733
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.861093759536743,
      "learning_rate": 1.183524492289063e-05,
      "loss": 2.8546,
      "step": 209734
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.704828977584839,
      "learning_rate": 1.1834107322742848e-05,
      "loss": 3.0348,
      "step": 209735
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.897031784057617,
      "learning_rate": 1.1832969776170508e-05,
      "loss": 2.7853,
      "step": 209736
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9997730255126953,
      "learning_rate": 1.1831832283173882e-05,
      "loss": 3.2024,
      "step": 209737
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.371114492416382,
      "learning_rate": 1.1830694843753131e-05,
      "loss": 2.6451,
      "step": 209738
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.5052478313446045,
      "learning_rate": 1.182955745790859e-05,
      "loss": 3.0356,
      "step": 209739
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.291595697402954,
      "learning_rate": 1.1828420125640292e-05,
      "loss": 3.1006,
      "step": 209740
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7467381954193115,
      "learning_rate": 1.1827282846948539e-05,
      "loss": 2.9114,
      "step": 209741
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.472447156906128,
      "learning_rate": 1.1826145621833527e-05,
      "loss": 3.0659,
      "step": 209742
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.851206064224243,
      "learning_rate": 1.1825008450295493e-05,
      "loss": 3.1073,
      "step": 209743
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.8112826347351074,
      "learning_rate": 1.18238713323346e-05,
      "loss": 2.9418,
      "step": 209744
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.6565024852752686,
      "learning_rate": 1.182273426795115e-05,
      "loss": 3.0864,
      "step": 209745
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7599399089813232,
      "learning_rate": 1.1821597257145243e-05,
      "loss": 2.9491,
      "step": 209746
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1470587253570557,
      "learning_rate": 1.1820460299917179e-05,
      "loss": 2.8777,
      "step": 209747
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8409976959228516,
      "learning_rate": 1.1819323396267054e-05,
      "loss": 2.7233,
      "step": 209748
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.568732976913452,
      "learning_rate": 1.181818654619524e-05,
      "loss": 2.9657,
      "step": 209749
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.002730369567871,
      "learning_rate": 1.1817049749701802e-05,
      "loss": 2.8657,
      "step": 209750
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6148812770843506,
      "learning_rate": 1.181591300678707e-05,
      "loss": 2.8931,
      "step": 209751
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3431031703948975,
      "learning_rate": 1.181477631745118e-05,
      "loss": 2.8233,
      "step": 209752
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6029887199401855,
      "learning_rate": 1.1813639681694364e-05,
      "loss": 2.9927,
      "step": 209753
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.226104974746704,
      "learning_rate": 1.1812503099516824e-05,
      "loss": 2.9439,
      "step": 209754
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.156461715698242,
      "learning_rate": 1.1811366570918789e-05,
      "loss": 2.9206,
      "step": 209755
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.617988348007202,
      "learning_rate": 1.181023009590043e-05,
      "loss": 2.841,
      "step": 209756
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.819523572921753,
      "learning_rate": 1.1809093674462011e-05,
      "loss": 2.8163,
      "step": 209757
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7523343563079834,
      "learning_rate": 1.1807957306603733e-05,
      "loss": 2.7745,
      "step": 209758
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.396304130554199,
      "learning_rate": 1.1806820992325827e-05,
      "loss": 2.7909,
      "step": 209759
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.391249656677246,
      "learning_rate": 1.1805684731628395e-05,
      "loss": 2.8813,
      "step": 209760
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.4784481525421143,
      "learning_rate": 1.180454852451177e-05,
      "loss": 2.9965,
      "step": 209761
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.8376994132995605,
      "learning_rate": 1.1803412370976084e-05,
      "loss": 2.8298,
      "step": 209762
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0661795139312744,
      "learning_rate": 1.1802276271021637e-05,
      "loss": 2.8782,
      "step": 209763
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2079250812530518,
      "learning_rate": 1.1801140224648564e-05,
      "loss": 3.0037,
      "step": 209764
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.271730899810791,
      "learning_rate": 1.1800004231857097e-05,
      "loss": 3.108,
      "step": 209765
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.5288476943969727,
      "learning_rate": 1.1798868292647468e-05,
      "loss": 2.9309,
      "step": 209766
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.7474377155303955,
      "learning_rate": 1.1797732407019844e-05,
      "loss": 2.9023,
      "step": 209767
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.671884298324585,
      "learning_rate": 1.179659657497446e-05,
      "loss": 2.7842,
      "step": 209768
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.7803759574890137,
      "learning_rate": 1.1795460796511547e-05,
      "loss": 2.8142,
      "step": 209769
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.911168098449707,
      "learning_rate": 1.1794325071631305e-05,
      "loss": 2.8086,
      "step": 209770
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.959665060043335,
      "learning_rate": 1.1793189400333903e-05,
      "loss": 2.883,
      "step": 209771
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0603740215301514,
      "learning_rate": 1.1792053782619603e-05,
      "loss": 2.8456,
      "step": 209772
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.060123920440674,
      "learning_rate": 1.179091821848861e-05,
      "loss": 2.9927,
      "step": 209773
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.710552930831909,
      "learning_rate": 1.178978270794112e-05,
      "loss": 2.9235,
      "step": 209774
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.912823438644409,
      "learning_rate": 1.1788647250977368e-05,
      "loss": 2.9747,
      "step": 209775
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0425500869750977,
      "learning_rate": 1.1787511847597553e-05,
      "loss": 3.0329,
      "step": 209776
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.205292224884033,
      "learning_rate": 1.1786376497801807e-05,
      "loss": 2.9362,
      "step": 209777
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.792266845703125,
      "learning_rate": 1.1785241201590502e-05,
      "loss": 3.0457,
      "step": 209778
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9319536685943604,
      "learning_rate": 1.1784105958963696e-05,
      "loss": 2.7481,
      "step": 209779
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.041165828704834,
      "learning_rate": 1.1782970769921695e-05,
      "loss": 2.9416,
      "step": 209780
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.843963861465454,
      "learning_rate": 1.1781835634464698e-05,
      "loss": 2.6665,
      "step": 209781
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.441319704055786,
      "learning_rate": 1.178070055259287e-05,
      "loss": 2.7224,
      "step": 209782
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.6177098751068115,
      "learning_rate": 1.1779565524306444e-05,
      "loss": 2.7938,
      "step": 209783
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6314046382904053,
      "learning_rate": 1.1778430549605655e-05,
      "loss": 2.9237,
      "step": 209784
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.322286605834961,
      "learning_rate": 1.1777295628490669e-05,
      "loss": 3.1462,
      "step": 209785
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2142508029937744,
      "learning_rate": 1.1776160760961784e-05,
      "loss": 2.9589,
      "step": 209786
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.884159803390503,
      "learning_rate": 1.1775025947019068e-05,
      "loss": 3.1023,
      "step": 209787
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2114665508270264,
      "learning_rate": 1.177389118666292e-05,
      "loss": 2.8076,
      "step": 209788
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9015371799468994,
      "learning_rate": 1.1772756479893375e-05,
      "loss": 3.2188,
      "step": 209789
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.824242115020752,
      "learning_rate": 1.1771621826710731e-05,
      "loss": 2.9298,
      "step": 209790
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.71064829826355,
      "learning_rate": 1.1770487227115156e-05,
      "loss": 2.8574,
      "step": 209791
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.638348340988159,
      "learning_rate": 1.1769352681106946e-05,
      "loss": 2.922,
      "step": 209792
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.742506742477417,
      "learning_rate": 1.1768218188686174e-05,
      "loss": 2.9863,
      "step": 209793
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.034202814102173,
      "learning_rate": 1.1767083749853235e-05,
      "loss": 2.8973,
      "step": 209794
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.132704257965088,
      "learning_rate": 1.1765949364608162e-05,
      "loss": 2.5238,
      "step": 209795
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.092813730239868,
      "learning_rate": 1.1764815032951258e-05,
      "loss": 2.9173,
      "step": 209796
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.5739452838897705,
      "learning_rate": 1.1763680754882721e-05,
      "loss": 3.0119,
      "step": 209797
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.277298450469971,
      "learning_rate": 1.176254653040275e-05,
      "loss": 2.9829,
      "step": 209798
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.957106351852417,
      "learning_rate": 1.1761412359511545e-05,
      "loss": 2.9413,
      "step": 209799
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.9638519287109375,
      "learning_rate": 1.1760278242209408e-05,
      "loss": 2.6288,
      "step": 209800
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.884246587753296,
      "learning_rate": 1.1759144178496404e-05,
      "loss": 2.7618,
      "step": 209801
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0292115211486816,
      "learning_rate": 1.1758010168372834e-05,
      "loss": 2.9455,
      "step": 209802
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1774115562438965,
      "learning_rate": 1.1756876211838861e-05,
      "loss": 3.0176,
      "step": 209803
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.861868381500244,
      "learning_rate": 1.1755742308894755e-05,
      "loss": 2.6909,
      "step": 209804
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7272534370422363,
      "learning_rate": 1.1754608459540683e-05,
      "loss": 2.9055,
      "step": 209805
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3510541915893555,
      "learning_rate": 1.1753474663776941e-05,
      "loss": 3.1649,
      "step": 209806
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.9610488414764404,
      "learning_rate": 1.1752340921603597e-05,
      "loss": 3.1188,
      "step": 209807
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.5274031162261963,
      "learning_rate": 1.1751207233020954e-05,
      "loss": 2.9652,
      "step": 209808
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.020252227783203,
      "learning_rate": 1.1750073598029175e-05,
      "loss": 2.885,
      "step": 209809
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.686371326446533,
      "learning_rate": 1.1748940016628527e-05,
      "loss": 2.7411,
      "step": 209810
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.70306396484375,
      "learning_rate": 1.1747806488819178e-05,
      "loss": 3.039,
      "step": 209811
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3378794193267822,
      "learning_rate": 1.1746673014601393e-05,
      "loss": 3.2098,
      "step": 209812
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0165703296661377,
      "learning_rate": 1.1745539593975306e-05,
      "loss": 2.728,
      "step": 209813
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.414414882659912,
      "learning_rate": 1.1744406226941182e-05,
      "loss": 2.8059,
      "step": 209814
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.192638874053955,
      "learning_rate": 1.174327291349919e-05,
      "loss": 2.94,
      "step": 209815
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7547264099121094,
      "learning_rate": 1.1742139653649563e-05,
      "loss": 3.1336,
      "step": 209816
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8369200229644775,
      "learning_rate": 1.1741006447392531e-05,
      "loss": 2.8012,
      "step": 209817
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.7810750007629395,
      "learning_rate": 1.173987329472833e-05,
      "loss": 2.7682,
      "step": 209818
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.683472156524658,
      "learning_rate": 1.173874019565706e-05,
      "loss": 3.1649,
      "step": 209819
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7800421714782715,
      "learning_rate": 1.1737607150179018e-05,
      "loss": 2.7418,
      "step": 209820
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.345541477203369,
      "learning_rate": 1.1736474158294373e-05,
      "loss": 2.7775,
      "step": 209821
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2060658931732178,
      "learning_rate": 1.173534122000339e-05,
      "loss": 3.1514,
      "step": 209822
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6980526447296143,
      "learning_rate": 1.173420833530624e-05,
      "loss": 2.7609,
      "step": 209823
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7839884757995605,
      "learning_rate": 1.1733075504203182e-05,
      "loss": 2.7116,
      "step": 209824
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.763624429702759,
      "learning_rate": 1.1731942726694321e-05,
      "loss": 2.9262,
      "step": 209825
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1641433238983154,
      "learning_rate": 1.173081000277999e-05,
      "loss": 3.0464,
      "step": 209826
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.9616787433624268,
      "learning_rate": 1.1729677332460285e-05,
      "loss": 2.8665,
      "step": 209827
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.456374168395996,
      "learning_rate": 1.1728544715735544e-05,
      "loss": 2.7642,
      "step": 209828
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0092833042144775,
      "learning_rate": 1.172741215260583e-05,
      "loss": 2.9415,
      "step": 209829
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9647061824798584,
      "learning_rate": 1.1726279643071512e-05,
      "loss": 3.1183,
      "step": 209830
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0393145084381104,
      "learning_rate": 1.1725147187132655e-05,
      "loss": 3.0741,
      "step": 209831
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.748037099838257,
      "learning_rate": 1.1724014784789593e-05,
      "loss": 2.9594,
      "step": 209832
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.141923666000366,
      "learning_rate": 1.1722882436042424e-05,
      "loss": 3.0261,
      "step": 209833
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.7086498737335205,
      "learning_rate": 1.1721750140891417e-05,
      "loss": 3.0053,
      "step": 209834
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1798813343048096,
      "learning_rate": 1.172061789933677e-05,
      "loss": 2.6749,
      "step": 209835
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.74240779876709,
      "learning_rate": 1.1719485711378751e-05,
      "loss": 2.73,
      "step": 209836
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.391355037689209,
      "learning_rate": 1.1718353577017525e-05,
      "loss": 2.9092,
      "step": 209837
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.407015800476074,
      "learning_rate": 1.1717221496253259e-05,
      "loss": 2.7072,
      "step": 209838
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.587824821472168,
      "learning_rate": 1.171608946908622e-05,
      "loss": 3.1089,
      "step": 209839
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.5715432167053223,
      "learning_rate": 1.1714957495516608e-05,
      "loss": 2.9759,
      "step": 209840
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9725494384765625,
      "learning_rate": 1.171382557554459e-05,
      "loss": 2.9031,
      "step": 209841
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6378700733184814,
      "learning_rate": 1.1712693709170429e-05,
      "loss": 3.0074,
      "step": 209842
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.77341628074646,
      "learning_rate": 1.1711561896394361e-05,
      "loss": 3.0491,
      "step": 209843
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.746220588684082,
      "learning_rate": 1.171043013721652e-05,
      "loss": 3.0115,
      "step": 209844
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1234207153320312,
      "learning_rate": 1.1709298431637137e-05,
      "loss": 2.856,
      "step": 209845
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7764759063720703,
      "learning_rate": 1.170816677965648e-05,
      "loss": 2.97,
      "step": 209846
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.224433183670044,
      "learning_rate": 1.1707035181274682e-05,
      "loss": 2.9139,
      "step": 209847
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8557512760162354,
      "learning_rate": 1.170590363649201e-05,
      "loss": 3.0351,
      "step": 209848
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6553499698638916,
      "learning_rate": 1.170477214530866e-05,
      "loss": 2.9834,
      "step": 209849
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.720583915710449,
      "learning_rate": 1.1703640707724771e-05,
      "loss": 2.8979,
      "step": 209850
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.227630138397217,
      "learning_rate": 1.1702509323740671e-05,
      "loss": 2.7745,
      "step": 209851
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1505250930786133,
      "learning_rate": 1.1701377993356532e-05,
      "loss": 2.8619,
      "step": 209852
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.753351926803589,
      "learning_rate": 1.1700246716572514e-05,
      "loss": 2.7588,
      "step": 209853
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.281282663345337,
      "learning_rate": 1.169911549338889e-05,
      "loss": 2.9454,
      "step": 209854
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2691361904144287,
      "learning_rate": 1.169798432380582e-05,
      "loss": 3.0176,
      "step": 209855
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.0286641120910645,
      "learning_rate": 1.1696853207823543e-05,
      "loss": 2.9613,
      "step": 209856
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.231226921081543,
      "learning_rate": 1.1695722145442254e-05,
      "loss": 2.7717,
      "step": 209857
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.14772629737854,
      "learning_rate": 1.1694591136662158e-05,
      "loss": 2.9056,
      "step": 209858
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.980487823486328,
      "learning_rate": 1.1693460181483515e-05,
      "loss": 2.8992,
      "step": 209859
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7347052097320557,
      "learning_rate": 1.1692329279906531e-05,
      "loss": 2.8445,
      "step": 209860
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.444092273712158,
      "learning_rate": 1.1691198431931336e-05,
      "loss": 2.7785,
      "step": 209861
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.771589517593384,
      "learning_rate": 1.1690067637558165e-05,
      "loss": 2.9075,
      "step": 209862
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.936997652053833,
      "learning_rate": 1.1688936896787315e-05,
      "loss": 2.8347,
      "step": 209863
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.4684391021728516,
      "learning_rate": 1.1687806209618889e-05,
      "loss": 2.89,
      "step": 209864
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7596209049224854,
      "learning_rate": 1.1686675576053151e-05,
      "loss": 2.9389,
      "step": 209865
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.944226026535034,
      "learning_rate": 1.1685544996090335e-05,
      "loss": 2.8841,
      "step": 209866
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.940502882003784,
      "learning_rate": 1.1684414469730608e-05,
      "loss": 2.885,
      "step": 209867
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.072493314743042,
      "learning_rate": 1.1683283996974136e-05,
      "loss": 3.0057,
      "step": 209868
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0624074935913086,
      "learning_rate": 1.1682153577821251e-05,
      "loss": 2.8373,
      "step": 209869
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.379194498062134,
      "learning_rate": 1.1681023212272023e-05,
      "loss": 3.1369,
      "step": 209870
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.875659704208374,
      "learning_rate": 1.1679892900326815e-05,
      "loss": 3.2128,
      "step": 209871
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.885457754135132,
      "learning_rate": 1.1678762641985695e-05,
      "loss": 3.086,
      "step": 209872
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.4406566619873047,
      "learning_rate": 1.1677632437248996e-05,
      "loss": 3.1065,
      "step": 209873
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6898932456970215,
      "learning_rate": 1.1676502286116818e-05,
      "loss": 2.809,
      "step": 209874
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.617562294006348,
      "learning_rate": 1.167537218858946e-05,
      "loss": 2.8569,
      "step": 209875
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2562742233276367,
      "learning_rate": 1.1674242144667023e-05,
      "loss": 3.1236,
      "step": 209876
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.045689105987549,
      "learning_rate": 1.167311215434984e-05,
      "loss": 3.0347,
      "step": 209877
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.4033560752868652,
      "learning_rate": 1.1671982217638043e-05,
      "loss": 3.0125,
      "step": 209878
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.501595973968506,
      "learning_rate": 1.1670852334531933e-05,
      "loss": 2.9919,
      "step": 209879
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3649566173553467,
      "learning_rate": 1.1669722505031575e-05,
      "loss": 3.1482,
      "step": 209880
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9177606105804443,
      "learning_rate": 1.1668592729137305e-05,
      "loss": 2.9906,
      "step": 209881
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0869994163513184,
      "learning_rate": 1.1667463006849254e-05,
      "loss": 2.8517,
      "step": 209882
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7305469512939453,
      "learning_rate": 1.1666333338167688e-05,
      "loss": 2.8021,
      "step": 209883
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8946588039398193,
      "learning_rate": 1.1665203723092775e-05,
      "loss": 2.8091,
      "step": 209884
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.5061588287353516,
      "learning_rate": 1.1664074161624814e-05,
      "loss": 2.8118,
      "step": 209885
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3061535358428955,
      "learning_rate": 1.1662944653763873e-05,
      "loss": 2.8713,
      "step": 209886
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.854620933532715,
      "learning_rate": 1.1661815199510249e-05,
      "loss": 3.3049,
      "step": 209887
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6287713050842285,
      "learning_rate": 1.166068579886411e-05,
      "loss": 2.9342,
      "step": 209888
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8100109100341797,
      "learning_rate": 1.1659556451825725e-05,
      "loss": 2.8721,
      "step": 209889
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7834208011627197,
      "learning_rate": 1.1658427158395223e-05,
      "loss": 2.6527,
      "step": 209890
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.078801155090332,
      "learning_rate": 1.1657297918572972e-05,
      "loss": 3.2,
      "step": 209891
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.3322224617004395,
      "learning_rate": 1.1656168732358973e-05,
      "loss": 2.9748,
      "step": 209892
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.219882011413574,
      "learning_rate": 1.1655039599753556e-05,
      "loss": 2.6674,
      "step": 209893
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9639687538146973,
      "learning_rate": 1.1653910520756926e-05,
      "loss": 2.8502,
      "step": 209894
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.567568063735962,
      "learning_rate": 1.1652781495369278e-05,
      "loss": 3.0181,
      "step": 209895
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.13991117477417,
      "learning_rate": 1.1651652523590781e-05,
      "loss": 3.0855,
      "step": 209896
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.007145881652832,
      "learning_rate": 1.1650523605421769e-05,
      "loss": 2.9657,
      "step": 209897
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.933244228363037,
      "learning_rate": 1.1649394740862273e-05,
      "loss": 3.0025,
      "step": 209898
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.03682279586792,
      "learning_rate": 1.1648265929912659e-05,
      "loss": 2.7828,
      "step": 209899
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.180401563644409,
      "learning_rate": 1.164713717257303e-05,
      "loss": 2.9884,
      "step": 209900
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.581144332885742,
      "learning_rate": 1.1646008468843682e-05,
      "loss": 2.9447,
      "step": 209901
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.528623580932617,
      "learning_rate": 1.164487981872475e-05,
      "loss": 2.8779,
      "step": 209902
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8054137229919434,
      "learning_rate": 1.1643751222216535e-05,
      "loss": 2.7433,
      "step": 209903
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8984222412109375,
      "learning_rate": 1.1642622679319136e-05,
      "loss": 2.6266,
      "step": 209904
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.775592803955078,
      "learning_rate": 1.1641494190032852e-05,
      "loss": 2.9356,
      "step": 209905
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.806103467941284,
      "learning_rate": 1.1640365754357816e-05,
      "loss": 2.9301,
      "step": 209906
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7698209285736084,
      "learning_rate": 1.1639237372294297e-05,
      "loss": 2.7531,
      "step": 209907
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7297310829162598,
      "learning_rate": 1.1638109043842458e-05,
      "loss": 2.8377,
      "step": 209908
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.927298069000244,
      "learning_rate": 1.1636980769002635e-05,
      "loss": 2.8616,
      "step": 209909
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.756854772567749,
      "learning_rate": 1.163585254777486e-05,
      "loss": 3.0873,
      "step": 209910
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6256189346313477,
      "learning_rate": 1.1634724380159433e-05,
      "loss": 2.9632,
      "step": 209911
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0818393230438232,
      "learning_rate": 1.1633596266156553e-05,
      "loss": 3.0322,
      "step": 209912
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0097947120666504,
      "learning_rate": 1.1632468205766454e-05,
      "loss": 2.8118,
      "step": 209913
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.5966405868530273,
      "learning_rate": 1.163134019898927e-05,
      "loss": 2.8648,
      "step": 209914
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.138274908065796,
      "learning_rate": 1.1630212245825365e-05,
      "loss": 3.1101,
      "step": 209915
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.659313678741455,
      "learning_rate": 1.1629084346274741e-05,
      "loss": 2.812,
      "step": 209916
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.9569432735443115,
      "learning_rate": 1.1627956500337765e-05,
      "loss": 2.7816,
      "step": 209917
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.888671875,
      "learning_rate": 1.1626828708014568e-05,
      "loss": 2.8132,
      "step": 209918
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.808117628097534,
      "learning_rate": 1.1625700969305419e-05,
      "loss": 2.8155,
      "step": 209919
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1360867023468018,
      "learning_rate": 1.1624573284210448e-05,
      "loss": 2.9411,
      "step": 209920
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0952043533325195,
      "learning_rate": 1.1623445652729956e-05,
      "loss": 2.7696,
      "step": 209921
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.320636034011841,
      "learning_rate": 1.1622318074864112e-05,
      "loss": 2.8575,
      "step": 209922
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0649688243865967,
      "learning_rate": 1.1621190550613113e-05,
      "loss": 2.9177,
      "step": 209923
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6166839599609375,
      "learning_rate": 1.162006307997716e-05,
      "loss": 3.1596,
      "step": 209924
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7271511554718018,
      "learning_rate": 1.1618935662956519e-05,
      "loss": 3.1127,
      "step": 209925
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.89583683013916,
      "learning_rate": 1.161780829955129e-05,
      "loss": 2.9104,
      "step": 209926
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.391633987426758,
      "learning_rate": 1.161668098976184e-05,
      "loss": 3.2182,
      "step": 209927
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.831834554672241,
      "learning_rate": 1.1615553733588267e-05,
      "loss": 3.0573,
      "step": 209928
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.335365056991577,
      "learning_rate": 1.1614426531030773e-05,
      "loss": 2.9152,
      "step": 209929
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3447892665863037,
      "learning_rate": 1.1613299382089625e-05,
      "loss": 3.0929,
      "step": 209930
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.647042989730835,
      "learning_rate": 1.161217228676502e-05,
      "loss": 2.8449,
      "step": 209931
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.110736846923828,
      "learning_rate": 1.1611045245057093e-05,
      "loss": 3.1842,
      "step": 209932
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1479861736297607,
      "learning_rate": 1.1609918256966177e-05,
      "loss": 2.6896,
      "step": 209933
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.5352494716644287,
      "learning_rate": 1.160879132249244e-05,
      "loss": 2.825,
      "step": 209934
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.04752516746521,
      "learning_rate": 1.1607664441636012e-05,
      "loss": 2.796,
      "step": 209935
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.5266151428222656,
      "learning_rate": 1.1606537614397194e-05,
      "loss": 2.9558,
      "step": 209936
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7162933349609375,
      "learning_rate": 1.1605410840776186e-05,
      "loss": 2.9535,
      "step": 209937
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.466492176055908,
      "learning_rate": 1.1604284120773122e-05,
      "loss": 2.8979,
      "step": 209938
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7071077823638916,
      "learning_rate": 1.16031574543883e-05,
      "loss": 2.9391,
      "step": 209939
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.22216796875,
      "learning_rate": 1.160203084162189e-05,
      "loss": 2.8707,
      "step": 209940
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.965701103210449,
      "learning_rate": 1.1600904282474089e-05,
      "loss": 2.9694,
      "step": 209941
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7226881980895996,
      "learning_rate": 1.1599777776945163e-05,
      "loss": 3.0065,
      "step": 209942
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0027613639831543,
      "learning_rate": 1.159865132503528e-05,
      "loss": 2.6232,
      "step": 209943
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0295767784118652,
      "learning_rate": 1.1597524926744605e-05,
      "loss": 2.5757,
      "step": 209944
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8614163398742676,
      "learning_rate": 1.1596398582073441e-05,
      "loss": 2.8889,
      "step": 209945
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.584373950958252,
      "learning_rate": 1.1595272291021917e-05,
      "loss": 2.7702,
      "step": 209946
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.896915912628174,
      "learning_rate": 1.1594146053590269e-05,
      "loss": 2.9417,
      "step": 209947
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3105711936950684,
      "learning_rate": 1.1593019869778763e-05,
      "loss": 2.7533,
      "step": 209948
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7629730701446533,
      "learning_rate": 1.15918937395875e-05,
      "loss": 2.7231,
      "step": 209949
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.5757806301116943,
      "learning_rate": 1.1590767663016809e-05,
      "loss": 3.2036,
      "step": 209950
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.000018835067749,
      "learning_rate": 1.1589641640066793e-05,
      "loss": 2.8191,
      "step": 209951
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.063032388687134,
      "learning_rate": 1.1588515670737753e-05,
      "loss": 2.9465,
      "step": 209952
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3439218997955322,
      "learning_rate": 1.1587389755029786e-05,
      "loss": 2.7581,
      "step": 209953
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9310951232910156,
      "learning_rate": 1.1586263892943226e-05,
      "loss": 2.818,
      "step": 209954
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8901114463806152,
      "learning_rate": 1.1585138084478175e-05,
      "loss": 3.0484,
      "step": 209955
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6799752712249756,
      "learning_rate": 1.1584012329634929e-05,
      "loss": 3.2134,
      "step": 209956
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7820334434509277,
      "learning_rate": 1.1582886628413624e-05,
      "loss": 2.5881,
      "step": 209957
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.888484239578247,
      "learning_rate": 1.1581760980814558e-05,
      "loss": 2.9693,
      "step": 209958
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1672537326812744,
      "learning_rate": 1.1580635386837832e-05,
      "loss": 3.1414,
      "step": 209959
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.6176400184631348,
      "learning_rate": 1.1579509846483747e-05,
      "loss": 2.914,
      "step": 209960
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0068933963775635,
      "learning_rate": 1.1578384359752435e-05,
      "loss": 2.824,
      "step": 209961
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.366321563720703,
      "learning_rate": 1.1577258926644195e-05,
      "loss": 2.9099,
      "step": 209962
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.951268196105957,
      "learning_rate": 1.1576133547159128e-05,
      "loss": 2.6967,
      "step": 209963
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.299600601196289,
      "learning_rate": 1.1575008221297598e-05,
      "loss": 3.0975,
      "step": 209964
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1279244422912598,
      "learning_rate": 1.1573882949059643e-05,
      "loss": 2.8252,
      "step": 209965
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.278303384780884,
      "learning_rate": 1.1572757730445558e-05,
      "loss": 2.7971,
      "step": 209966
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.252617597579956,
      "learning_rate": 1.1571632565455547e-05,
      "loss": 3.0062,
      "step": 209967
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7836062908172607,
      "learning_rate": 1.1570507454089806e-05,
      "loss": 2.889,
      "step": 209968
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.322446346282959,
      "learning_rate": 1.1569382396348536e-05,
      "loss": 2.9829,
      "step": 209969
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.4421708583831787,
      "learning_rate": 1.1568257392232038e-05,
      "loss": 3.0594,
      "step": 209970
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.855282783508301,
      "learning_rate": 1.1567132441740378e-05,
      "loss": 2.6434,
      "step": 209971
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6993844509124756,
      "learning_rate": 1.1566007544873857e-05,
      "loss": 2.8933,
      "step": 209972
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.164905071258545,
      "learning_rate": 1.1564882701632638e-05,
      "loss": 2.9253,
      "step": 209973
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.417161703109741,
      "learning_rate": 1.1563757912016991e-05,
      "loss": 2.8766,
      "step": 209974
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.392566204071045,
      "learning_rate": 1.1562633176027015e-05,
      "loss": 2.9748,
      "step": 209975
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8318028450012207,
      "learning_rate": 1.1561508493663107e-05,
      "loss": 2.8169,
      "step": 209976
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.757692813873291,
      "learning_rate": 1.1560383864925272e-05,
      "loss": 3.0218,
      "step": 209977
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0895988941192627,
      "learning_rate": 1.1559259289813838e-05,
      "loss": 3.1578,
      "step": 209978
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.4692153930664062,
      "learning_rate": 1.1558134768328941e-05,
      "loss": 2.9627,
      "step": 209979
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8119726181030273,
      "learning_rate": 1.1557010300470849e-05,
      "loss": 2.8543,
      "step": 209980
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.615446090698242,
      "learning_rate": 1.1555885886239757e-05,
      "loss": 2.7625,
      "step": 209981
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1579928398132324,
      "learning_rate": 1.1554761525635902e-05,
      "loss": 3.0595,
      "step": 209982
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.826662540435791,
      "learning_rate": 1.1553637218659418e-05,
      "loss": 2.8586,
      "step": 209983
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.138852119445801,
      "learning_rate": 1.1552512965310601e-05,
      "loss": 2.8452,
      "step": 209984
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.848518133163452,
      "learning_rate": 1.1551388765589553e-05,
      "loss": 2.9376,
      "step": 209985
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.023667812347412,
      "learning_rate": 1.1550264619496608e-05,
      "loss": 2.7863,
      "step": 209986
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8034679889678955,
      "learning_rate": 1.1549140527031864e-05,
      "loss": 2.6309,
      "step": 209987
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2464423179626465,
      "learning_rate": 1.1548016488195622e-05,
      "loss": 2.957,
      "step": 209988
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2648913860321045,
      "learning_rate": 1.1546892502988048e-05,
      "loss": 2.8549,
      "step": 209989
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.655395984649658,
      "learning_rate": 1.1545768571409342e-05,
      "loss": 3.0835,
      "step": 209990
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6598405838012695,
      "learning_rate": 1.1544644693459704e-05,
      "loss": 2.8389,
      "step": 209991
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.685292959213257,
      "learning_rate": 1.15435208691394e-05,
      "loss": 2.8419,
      "step": 209992
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.089372158050537,
      "learning_rate": 1.154239709844853e-05,
      "loss": 3.0594,
      "step": 209993
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.231144905090332,
      "learning_rate": 1.1541273381387428e-05,
      "loss": 3.113,
      "step": 209994
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3294734954833984,
      "learning_rate": 1.154014971795626e-05,
      "loss": 2.8647,
      "step": 209995
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8119468688964844,
      "learning_rate": 1.1539026108155225e-05,
      "loss": 3.0207,
      "step": 209996
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.933371067047119,
      "learning_rate": 1.1537902551984491e-05,
      "loss": 2.8629,
      "step": 209997
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.488382816314697,
      "learning_rate": 1.1536779049444322e-05,
      "loss": 2.9744,
      "step": 209998
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8041412830352783,
      "learning_rate": 1.1535655600534888e-05,
      "loss": 2.7035,
      "step": 209999
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.830029249191284,
      "learning_rate": 1.1534532205256453e-05,
      "loss": 2.7426,
      "step": 210000
    },
    {
      "epoch": 2.73,
      "grad_norm": 5.1992011070251465,
      "learning_rate": 1.1533408863609217e-05,
      "loss": 2.8116,
      "step": 210001
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.383119583129883,
      "learning_rate": 1.1532285575593347e-05,
      "loss": 2.9592,
      "step": 210002
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.656026840209961,
      "learning_rate": 1.153116234120901e-05,
      "loss": 2.9799,
      "step": 210003
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.927448272705078,
      "learning_rate": 1.153003916045654e-05,
      "loss": 2.7147,
      "step": 210004
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.576519250869751,
      "learning_rate": 1.1528916033336033e-05,
      "loss": 2.9756,
      "step": 210005
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.650904893875122,
      "learning_rate": 1.1527792959847793e-05,
      "loss": 2.8615,
      "step": 210006
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9481186866760254,
      "learning_rate": 1.1526669939991984e-05,
      "loss": 3.2662,
      "step": 210007
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.7258260250091553,
      "learning_rate": 1.1525546973768806e-05,
      "loss": 2.8065,
      "step": 210008
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0741419792175293,
      "learning_rate": 1.1524424061178428e-05,
      "loss": 2.9867,
      "step": 210009
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.5207748413085938,
      "learning_rate": 1.1523301202221147e-05,
      "loss": 3.0799,
      "step": 210010
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6535701751708984,
      "learning_rate": 1.1522178396897097e-05,
      "loss": 2.8818,
      "step": 210011
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0875513553619385,
      "learning_rate": 1.1521055645206579e-05,
      "loss": 3.007,
      "step": 210012
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1792337894439697,
      "learning_rate": 1.1519932947149723e-05,
      "loss": 2.9188,
      "step": 210013
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.9718663692474365,
      "learning_rate": 1.1518810302726732e-05,
      "loss": 2.9561,
      "step": 210014
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.0003116130828857,
      "learning_rate": 1.1517687711937839e-05,
      "loss": 2.9522,
      "step": 210015
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6780664920806885,
      "learning_rate": 1.1516565174783277e-05,
      "loss": 3.0667,
      "step": 210016
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6777985095977783,
      "learning_rate": 1.1515442691263176e-05,
      "loss": 2.914,
      "step": 210017
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2065024375915527,
      "learning_rate": 1.1514320261377875e-05,
      "loss": 3.1233,
      "step": 210018
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.792344808578491,
      "learning_rate": 1.1513197885127468e-05,
      "loss": 3.0538,
      "step": 210019
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.58309006690979,
      "learning_rate": 1.1512075562512191e-05,
      "loss": 2.7794,
      "step": 210020
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.4815549850463867,
      "learning_rate": 1.151095329353231e-05,
      "loss": 2.9866,
      "step": 210021
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.6622726917266846,
      "learning_rate": 1.1509831078187959e-05,
      "loss": 2.9596,
      "step": 210022
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.853410243988037,
      "learning_rate": 1.150870891647937e-05,
      "loss": 3.0935,
      "step": 210023
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.581580638885498,
      "learning_rate": 1.1507586808406778e-05,
      "loss": 2.8564,
      "step": 210024
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3727362155914307,
      "learning_rate": 1.1506464753970346e-05,
      "loss": 3.1102,
      "step": 210025
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.4943716526031494,
      "learning_rate": 1.150534275317031e-05,
      "loss": 3.1135,
      "step": 210026
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.5716888904571533,
      "learning_rate": 1.1504220806006904e-05,
      "loss": 2.9034,
      "step": 210027
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9352259635925293,
      "learning_rate": 1.1503098912480291e-05,
      "loss": 2.7769,
      "step": 210028
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.3082494735717773,
      "learning_rate": 1.1501977072590674e-05,
      "loss": 2.917,
      "step": 210029
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.646378993988037,
      "learning_rate": 1.1500855286338318e-05,
      "loss": 3.0961,
      "step": 210030
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.1066620349884033,
      "learning_rate": 1.1499733553723423e-05,
      "loss": 2.8594,
      "step": 210031
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.8509163856506348,
      "learning_rate": 1.1498611874746123e-05,
      "loss": 2.8936,
      "step": 210032
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7839231491088867,
      "learning_rate": 1.1497490249406682e-05,
      "loss": 3.0487,
      "step": 210033
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.5126149654388428,
      "learning_rate": 1.1496368677705303e-05,
      "loss": 3.0301,
      "step": 210034
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.6641745567321777,
      "learning_rate": 1.1495247159642217e-05,
      "loss": 2.8397,
      "step": 210035
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.6867520809173584,
      "learning_rate": 1.1494125695217627e-05,
      "loss": 2.8216,
      "step": 210036
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.811866044998169,
      "learning_rate": 1.1493004284431694e-05,
      "loss": 3.2266,
      "step": 210037
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.6084847450256348,
      "learning_rate": 1.1491882927284657e-05,
      "loss": 2.7486,
      "step": 210038
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7353217601776123,
      "learning_rate": 1.1490761623776745e-05,
      "loss": 3.0716,
      "step": 210039
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.237523078918457,
      "learning_rate": 1.1489640373908094e-05,
      "loss": 3.1349,
      "step": 210040
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.2373368740081787,
      "learning_rate": 1.1488519177679034e-05,
      "loss": 3.0462,
      "step": 210041
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.727362871170044,
      "learning_rate": 1.1487398035089667e-05,
      "loss": 2.9371,
      "step": 210042
    },
    {
      "epoch": 2.73,
      "grad_norm": 3.431004524230957,
      "learning_rate": 1.148627694614026e-05,
      "loss": 2.7699,
      "step": 210043
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.787379503250122,
      "learning_rate": 1.148515591083098e-05,
      "loss": 2.8898,
      "step": 210044
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9989655017852783,
      "learning_rate": 1.1484034929162089e-05,
      "loss": 2.9787,
      "step": 210045
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9721925258636475,
      "learning_rate": 1.1482914001133692e-05,
      "loss": 2.8443,
      "step": 210046
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9034106731414795,
      "learning_rate": 1.1481793126746119e-05,
      "loss": 2.9333,
      "step": 210047
    },
    {
      "epoch": 2.73,
      "grad_norm": 4.24781608581543,
      "learning_rate": 1.1480672305999505e-05,
      "loss": 3.0083,
      "step": 210048
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.7793190479278564,
      "learning_rate": 1.1479551538894149e-05,
      "loss": 2.6579,
      "step": 210049
    },
    {
      "epoch": 2.73,
      "grad_norm": 2.9606335163116455,
      "learning_rate": 1.1478430825430118e-05,
      "loss": 2.889,
      "step": 210050
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.932129144668579,
      "learning_rate": 1.1477310165607711e-05,
      "loss": 3.0466,
      "step": 210051
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.5648393630981445,
      "learning_rate": 1.1476189559427096e-05,
      "loss": 3.0365,
      "step": 210052
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.897599935531616,
      "learning_rate": 1.1475069006888538e-05,
      "loss": 2.9949,
      "step": 210053
    },
    {
      "epoch": 2.74,
      "grad_norm": 6.407894611358643,
      "learning_rate": 1.147394850799217e-05,
      "loss": 2.6995,
      "step": 210054
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.8889200687408447,
      "learning_rate": 1.1472828062738293e-05,
      "loss": 2.8913,
      "step": 210055
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.572368621826172,
      "learning_rate": 1.147170767112704e-05,
      "loss": 2.8147,
      "step": 210056
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.7008750438690186,
      "learning_rate": 1.1470587333158676e-05,
      "loss": 3.0334,
      "step": 210057
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.5342233180999756,
      "learning_rate": 1.1469467048833303e-05,
      "loss": 3.048,
      "step": 210058
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2176754474639893,
      "learning_rate": 1.1468346818151252e-05,
      "loss": 2.6268,
      "step": 210059
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.742518663406372,
      "learning_rate": 1.1467226641112658e-05,
      "loss": 2.6323,
      "step": 210060
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.033437728881836,
      "learning_rate": 1.1466106517717788e-05,
      "loss": 2.8637,
      "step": 210061
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.253774404525757,
      "learning_rate": 1.1464986447966806e-05,
      "loss": 3.1702,
      "step": 210062
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.930795431137085,
      "learning_rate": 1.1463866431859913e-05,
      "loss": 2.8111,
      "step": 210063
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.540419101715088,
      "learning_rate": 1.146274646939731e-05,
      "loss": 3.0398,
      "step": 210064
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.6721816062927246,
      "learning_rate": 1.1461626560579296e-05,
      "loss": 3.0346,
      "step": 210065
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.558006525039673,
      "learning_rate": 1.1460506705405936e-05,
      "loss": 2.5881,
      "step": 210066
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.609438896179199,
      "learning_rate": 1.1459386903877565e-05,
      "loss": 3.1171,
      "step": 210067
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8706071376800537,
      "learning_rate": 1.145826715599435e-05,
      "loss": 2.9478,
      "step": 210068
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.5526108741760254,
      "learning_rate": 1.1457147461756489e-05,
      "loss": 3.1539,
      "step": 210069
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.8274989128112793,
      "learning_rate": 1.1456027821164148e-05,
      "loss": 3.0441,
      "step": 210070
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.751300573348999,
      "learning_rate": 1.1454908234217596e-05,
      "loss": 3.0734,
      "step": 210071
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.50978684425354,
      "learning_rate": 1.1453788700917e-05,
      "loss": 2.9219,
      "step": 210072
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.773989200592041,
      "learning_rate": 1.1452669221262656e-05,
      "loss": 2.9432,
      "step": 210073
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.380751609802246,
      "learning_rate": 1.1451549795254666e-05,
      "loss": 2.8623,
      "step": 210074
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.20658016204834,
      "learning_rate": 1.1450430422893298e-05,
      "loss": 2.9582,
      "step": 210075
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.257235527038574,
      "learning_rate": 1.1449311104178716e-05,
      "loss": 2.8979,
      "step": 210076
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3945415019989014,
      "learning_rate": 1.1448191839111153e-05,
      "loss": 2.8678,
      "step": 210077
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.162485361099243,
      "learning_rate": 1.1447072627690812e-05,
      "loss": 3.0868,
      "step": 210078
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.386986017227173,
      "learning_rate": 1.1445953469917957e-05,
      "loss": 2.9437,
      "step": 210079
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0643887519836426,
      "learning_rate": 1.144483436579272e-05,
      "loss": 2.9874,
      "step": 210080
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.431013584136963,
      "learning_rate": 1.1443715315315338e-05,
      "loss": 2.9422,
      "step": 210081
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.706037759780884,
      "learning_rate": 1.1442596318486008e-05,
      "loss": 2.8617,
      "step": 210082
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.830052137374878,
      "learning_rate": 1.1441477375304931e-05,
      "loss": 2.8928,
      "step": 210083
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8921594619750977,
      "learning_rate": 1.1440358485772338e-05,
      "loss": 2.8152,
      "step": 210084
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9605724811553955,
      "learning_rate": 1.1439239649888432e-05,
      "loss": 2.7241,
      "step": 210085
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.023019552230835,
      "learning_rate": 1.1438120867653444e-05,
      "loss": 2.8506,
      "step": 210086
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.204075574874878,
      "learning_rate": 1.1437002139067575e-05,
      "loss": 3.0155,
      "step": 210087
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.07730770111084,
      "learning_rate": 1.1435883464130924e-05,
      "loss": 3.0755,
      "step": 210088
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9903564453125,
      "learning_rate": 1.1434764842843857e-05,
      "loss": 2.6027,
      "step": 210089
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.882303476333618,
      "learning_rate": 1.1433646275206476e-05,
      "loss": 2.9605,
      "step": 210090
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.256937026977539,
      "learning_rate": 1.1432527761219079e-05,
      "loss": 2.8129,
      "step": 210091
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2639901638031006,
      "learning_rate": 1.14314093008818e-05,
      "loss": 2.9217,
      "step": 210092
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.640775203704834,
      "learning_rate": 1.1430290894194872e-05,
      "loss": 2.9688,
      "step": 210093
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1984384059906006,
      "learning_rate": 1.142917254115846e-05,
      "loss": 2.7723,
      "step": 210094
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7975974082946777,
      "learning_rate": 1.1428054241772865e-05,
      "loss": 2.7784,
      "step": 210095
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.733353853225708,
      "learning_rate": 1.1426935996038189e-05,
      "loss": 2.8973,
      "step": 210096
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.349637508392334,
      "learning_rate": 1.1425817803954728e-05,
      "loss": 2.9795,
      "step": 210097
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1162478923797607,
      "learning_rate": 1.1424699665522685e-05,
      "loss": 3.0252,
      "step": 210098
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9236204624176025,
      "learning_rate": 1.1423581580742192e-05,
      "loss": 2.6848,
      "step": 210099
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.485712766647339,
      "learning_rate": 1.1422463549613515e-05,
      "loss": 3.1738,
      "step": 210100
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4457314014434814,
      "learning_rate": 1.1421345572136853e-05,
      "loss": 2.8979,
      "step": 210101
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7500972747802734,
      "learning_rate": 1.1420227648312374e-05,
      "loss": 2.9946,
      "step": 210102
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1894609928131104,
      "learning_rate": 1.1419109778140346e-05,
      "loss": 2.9487,
      "step": 210103
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8239872455596924,
      "learning_rate": 1.1417991961620998e-05,
      "loss": 3.1759,
      "step": 210104
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.210017204284668,
      "learning_rate": 1.1416874198754466e-05,
      "loss": 2.7279,
      "step": 210105
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.725250005722046,
      "learning_rate": 1.141575648954095e-05,
      "loss": 3.1627,
      "step": 210106
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.640850067138672,
      "learning_rate": 1.1414638833980716e-05,
      "loss": 2.8203,
      "step": 210107
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9378180503845215,
      "learning_rate": 1.141352123207393e-05,
      "loss": 2.7609,
      "step": 210108
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.850252151489258,
      "learning_rate": 1.1412403683820826e-05,
      "loss": 3.1187,
      "step": 210109
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.261517286300659,
      "learning_rate": 1.1411286189221636e-05,
      "loss": 3.0615,
      "step": 210110
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.779130458831787,
      "learning_rate": 1.1410168748276494e-05,
      "loss": 2.9978,
      "step": 210111
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1073029041290283,
      "learning_rate": 1.1409051360985665e-05,
      "loss": 2.7857,
      "step": 210112
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.70379638671875,
      "learning_rate": 1.1407934027349353e-05,
      "loss": 2.9253,
      "step": 210113
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7562620639801025,
      "learning_rate": 1.140681674736772e-05,
      "loss": 3.0897,
      "step": 210114
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.946729898452759,
      "learning_rate": 1.1405699521041068e-05,
      "loss": 3.0477,
      "step": 210115
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0701870918273926,
      "learning_rate": 1.1404582348369496e-05,
      "loss": 2.9098,
      "step": 210116
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4970288276672363,
      "learning_rate": 1.140346522935327e-05,
      "loss": 3.1023,
      "step": 210117
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7452962398529053,
      "learning_rate": 1.1402348163992593e-05,
      "loss": 3.0921,
      "step": 210118
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.6814959049224854,
      "learning_rate": 1.1401231152287627e-05,
      "loss": 3.0287,
      "step": 210119
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9534354209899902,
      "learning_rate": 1.1400114194238674e-05,
      "loss": 3.0943,
      "step": 210120
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.8227241039276123,
      "learning_rate": 1.1398997289845902e-05,
      "loss": 2.9911,
      "step": 210121
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.772143840789795,
      "learning_rate": 1.1397880439109442e-05,
      "loss": 2.9371,
      "step": 210122
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0014426708221436,
      "learning_rate": 1.1396763642029594e-05,
      "loss": 2.682,
      "step": 210123
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.4806528091430664,
      "learning_rate": 1.1395646898606558e-05,
      "loss": 2.9014,
      "step": 210124
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.670499324798584,
      "learning_rate": 1.1394530208840469e-05,
      "loss": 2.7759,
      "step": 210125
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0194036960601807,
      "learning_rate": 1.1393413572731624e-05,
      "loss": 2.9948,
      "step": 210126
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.424020290374756,
      "learning_rate": 1.1392296990280191e-05,
      "loss": 2.7542,
      "step": 210127
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.051222085952759,
      "learning_rate": 1.1391180461486337e-05,
      "loss": 2.626,
      "step": 210128
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8891119956970215,
      "learning_rate": 1.1390063986350362e-05,
      "loss": 2.9252,
      "step": 210129
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1459555625915527,
      "learning_rate": 1.1388947564872397e-05,
      "loss": 2.7944,
      "step": 210130
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.824258327484131,
      "learning_rate": 1.1387831197052643e-05,
      "loss": 2.7575,
      "step": 210131
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.760063409805298,
      "learning_rate": 1.1386714882891402e-05,
      "loss": 2.715,
      "step": 210132
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7841811180114746,
      "learning_rate": 1.1385598622388769e-05,
      "loss": 2.7364,
      "step": 210133
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9519286155700684,
      "learning_rate": 1.1384482415545016e-05,
      "loss": 2.6749,
      "step": 210134
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.5253000259399414,
      "learning_rate": 1.1383366262360372e-05,
      "loss": 3.0439,
      "step": 210135
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6513030529022217,
      "learning_rate": 1.1382250162834972e-05,
      "loss": 2.7025,
      "step": 210136
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8050310611724854,
      "learning_rate": 1.1381134116969048e-05,
      "loss": 2.8191,
      "step": 210137
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.999912738800049,
      "learning_rate": 1.1380018124762868e-05,
      "loss": 3.037,
      "step": 210138
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6716625690460205,
      "learning_rate": 1.1378902186216532e-05,
      "loss": 2.7947,
      "step": 210139
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.265838623046875,
      "learning_rate": 1.1377786301330338e-05,
      "loss": 3.0051,
      "step": 210140
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.5098843574523926,
      "learning_rate": 1.1376670470104455e-05,
      "loss": 2.7094,
      "step": 210141
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.700978994369507,
      "learning_rate": 1.1375554692539112e-05,
      "loss": 2.7887,
      "step": 210142
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.046104907989502,
      "learning_rate": 1.137443896863448e-05,
      "loss": 2.8486,
      "step": 210143
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0611765384674072,
      "learning_rate": 1.1373323298390825e-05,
      "loss": 3.0721,
      "step": 210144
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1176750659942627,
      "learning_rate": 1.1372207681808243e-05,
      "loss": 2.8848,
      "step": 210145
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.989914894104004,
      "learning_rate": 1.1371092118887071e-05,
      "loss": 2.8807,
      "step": 210146
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3730905055999756,
      "learning_rate": 1.1369976609627473e-05,
      "loss": 2.8702,
      "step": 210147
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.861954689025879,
      "learning_rate": 1.1368861154029652e-05,
      "loss": 2.8167,
      "step": 210148
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2494680881500244,
      "learning_rate": 1.1367745752093738e-05,
      "loss": 2.9169,
      "step": 210149
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.742790937423706,
      "learning_rate": 1.1366630403820065e-05,
      "loss": 2.9565,
      "step": 210150
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6253325939178467,
      "learning_rate": 1.1365515109208733e-05,
      "loss": 2.9771,
      "step": 210151
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.293806552886963,
      "learning_rate": 1.1364399868260077e-05,
      "loss": 2.685,
      "step": 210152
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.961305856704712,
      "learning_rate": 1.1363284680974194e-05,
      "loss": 3.0065,
      "step": 210153
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8849711418151855,
      "learning_rate": 1.1362169547351318e-05,
      "loss": 2.8907,
      "step": 210154
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.838811159133911,
      "learning_rate": 1.136105446739165e-05,
      "loss": 3.1643,
      "step": 210155
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.68356990814209,
      "learning_rate": 1.1359939441095422e-05,
      "loss": 2.7956,
      "step": 210156
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.61907696723938,
      "learning_rate": 1.1358824468462835e-05,
      "loss": 3.0342,
      "step": 210157
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7471723556518555,
      "learning_rate": 1.1357709549494087e-05,
      "loss": 2.9754,
      "step": 210158
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1219305992126465,
      "learning_rate": 1.1356594684189412e-05,
      "loss": 3.1083,
      "step": 210159
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.894257068634033,
      "learning_rate": 1.1355479872548979e-05,
      "loss": 2.9415,
      "step": 210160
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9005484580993652,
      "learning_rate": 1.1354365114572983e-05,
      "loss": 2.9495,
      "step": 210161
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0020151138305664,
      "learning_rate": 1.1353250410261694e-05,
      "loss": 3.04,
      "step": 210162
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3322019577026367,
      "learning_rate": 1.1352135759615244e-05,
      "loss": 2.8631,
      "step": 210163
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8528833389282227,
      "learning_rate": 1.13510211626339e-05,
      "loss": 3.0063,
      "step": 210164
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.524561643600464,
      "learning_rate": 1.1349906619317895e-05,
      "loss": 2.8508,
      "step": 210165
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.909912109375,
      "learning_rate": 1.1348792129667361e-05,
      "loss": 2.8853,
      "step": 210166
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6080808639526367,
      "learning_rate": 1.13476776936825e-05,
      "loss": 2.8371,
      "step": 210167
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.613460063934326,
      "learning_rate": 1.1346563311363577e-05,
      "loss": 3.2835,
      "step": 210168
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2958366870880127,
      "learning_rate": 1.1345448982710759e-05,
      "loss": 2.8356,
      "step": 210169
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8313815593719482,
      "learning_rate": 1.1344334707724312e-05,
      "loss": 2.859,
      "step": 210170
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8867552280426025,
      "learning_rate": 1.1343220486404402e-05,
      "loss": 3.1725,
      "step": 210171
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.341459035873413,
      "learning_rate": 1.1342106318751231e-05,
      "loss": 2.9033,
      "step": 210172
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4757699966430664,
      "learning_rate": 1.1340992204764965e-05,
      "loss": 2.9781,
      "step": 210173
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.08463191986084,
      "learning_rate": 1.1339878144445902e-05,
      "loss": 2.7102,
      "step": 210174
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.933363914489746,
      "learning_rate": 1.1338764137794177e-05,
      "loss": 2.9846,
      "step": 210175
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.487682580947876,
      "learning_rate": 1.1337650184810054e-05,
      "loss": 2.7384,
      "step": 210176
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4364495277404785,
      "learning_rate": 1.1336536285493702e-05,
      "loss": 2.7651,
      "step": 210177
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.932753801345825,
      "learning_rate": 1.1335422439845355e-05,
      "loss": 3.0112,
      "step": 210178
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3466567993164062,
      "learning_rate": 1.1334308647865143e-05,
      "loss": 2.9375,
      "step": 210179
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.034014940261841,
      "learning_rate": 1.13331949095534e-05,
      "loss": 2.9472,
      "step": 210180
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.131221771240234,
      "learning_rate": 1.1332081224910194e-05,
      "loss": 2.8152,
      "step": 210181
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9631614685058594,
      "learning_rate": 1.1330967593935857e-05,
      "loss": 2.8757,
      "step": 210182
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1724727153778076,
      "learning_rate": 1.1329854016630557e-05,
      "loss": 3.0105,
      "step": 210183
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3576183319091797,
      "learning_rate": 1.132874049299446e-05,
      "loss": 3.1865,
      "step": 210184
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.574091911315918,
      "learning_rate": 1.1327627023027763e-05,
      "loss": 2.8219,
      "step": 210185
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1733760833740234,
      "learning_rate": 1.1326513606730769e-05,
      "loss": 2.9231,
      "step": 210186
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.200228452682495,
      "learning_rate": 1.1325400244103577e-05,
      "loss": 3.0633,
      "step": 210187
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.009608030319214,
      "learning_rate": 1.1324286935146454e-05,
      "loss": 2.9673,
      "step": 210188
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1045584678649902,
      "learning_rate": 1.1323173679859599e-05,
      "loss": 2.969,
      "step": 210189
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.135606527328491,
      "learning_rate": 1.1322060478243278e-05,
      "loss": 3.055,
      "step": 210190
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.749490976333618,
      "learning_rate": 1.1320947330297525e-05,
      "loss": 2.8765,
      "step": 210191
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.92777943611145,
      "learning_rate": 1.131983423602274e-05,
      "loss": 2.9034,
      "step": 210192
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.344656467437744,
      "learning_rate": 1.131872119541899e-05,
      "loss": 2.9018,
      "step": 210193
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.564290761947632,
      "learning_rate": 1.1317608208486572e-05,
      "loss": 2.841,
      "step": 210194
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0365917682647705,
      "learning_rate": 1.1316495275225623e-05,
      "loss": 2.945,
      "step": 210195
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.733048677444458,
      "learning_rate": 1.1315382395636441e-05,
      "loss": 2.7413,
      "step": 210196
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.168444871902466,
      "learning_rate": 1.131426956971916e-05,
      "loss": 2.7968,
      "step": 210197
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0165297985076904,
      "learning_rate": 1.1313156797474044e-05,
      "loss": 2.8287,
      "step": 210198
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.770569324493408,
      "learning_rate": 1.1312044078901195e-05,
      "loss": 2.7544,
      "step": 210199
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6622066497802734,
      "learning_rate": 1.1310931414000912e-05,
      "loss": 2.927,
      "step": 210200
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7800188064575195,
      "learning_rate": 1.130981880277333e-05,
      "loss": 2.8171,
      "step": 210201
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2669856548309326,
      "learning_rate": 1.130870624521878e-05,
      "loss": 2.8028,
      "step": 210202
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.697148323059082,
      "learning_rate": 1.1307593741337361e-05,
      "loss": 2.9002,
      "step": 210203
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.66799259185791,
      "learning_rate": 1.130648129112931e-05,
      "loss": 2.8467,
      "step": 210204
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.052138328552246,
      "learning_rate": 1.1305368894594824e-05,
      "loss": 3.1727,
      "step": 210205
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.6040639877319336,
      "learning_rate": 1.1304256551734137e-05,
      "loss": 2.8746,
      "step": 210206
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8937904834747314,
      "learning_rate": 1.1303144262547414e-05,
      "loss": 3.1072,
      "step": 210207
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.261199951171875,
      "learning_rate": 1.130203202703489e-05,
      "loss": 2.5387,
      "step": 210208
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1245157718658447,
      "learning_rate": 1.13009198451968e-05,
      "loss": 3.0354,
      "step": 210209
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.510627031326294,
      "learning_rate": 1.1299807717033272e-05,
      "loss": 2.701,
      "step": 210210
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1042418479919434,
      "learning_rate": 1.1298695642544608e-05,
      "loss": 2.8007,
      "step": 210211
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.6996877193450928,
      "learning_rate": 1.1297583621730944e-05,
      "loss": 3.07,
      "step": 210212
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.002408742904663,
      "learning_rate": 1.1296471654592476e-05,
      "loss": 3.0168,
      "step": 210213
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.794382333755493,
      "learning_rate": 1.1295359741129473e-05,
      "loss": 2.9026,
      "step": 210214
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0335826873779297,
      "learning_rate": 1.1294247881342133e-05,
      "loss": 2.9194,
      "step": 210215
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2579410076141357,
      "learning_rate": 1.129313607523059e-05,
      "loss": 3.0269,
      "step": 210216
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3198771476745605,
      "learning_rate": 1.129202432279518e-05,
      "loss": 3.0373,
      "step": 210217
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.715663909912109,
      "learning_rate": 1.129091262403593e-05,
      "loss": 2.8572,
      "step": 210218
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9503538608551025,
      "learning_rate": 1.1289800978953245e-05,
      "loss": 3.1101,
      "step": 210219
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8354814052581787,
      "learning_rate": 1.128868938754719e-05,
      "loss": 2.8784,
      "step": 210220
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.428255081176758,
      "learning_rate": 1.1287577849818063e-05,
      "loss": 3.0368,
      "step": 210221
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.608064889907837,
      "learning_rate": 1.1286466365765933e-05,
      "loss": 2.7424,
      "step": 210222
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.8056881427764893,
      "learning_rate": 1.1285354935391167e-05,
      "loss": 2.8426,
      "step": 210223
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.5469589233398438,
      "learning_rate": 1.1284243558693895e-05,
      "loss": 2.8761,
      "step": 210224
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2970123291015625,
      "learning_rate": 1.1283132235674318e-05,
      "loss": 2.9119,
      "step": 210225
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.19069242477417,
      "learning_rate": 1.1282020966332672e-05,
      "loss": 2.8214,
      "step": 210226
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7320799827575684,
      "learning_rate": 1.1280909750669153e-05,
      "loss": 2.9895,
      "step": 210227
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.64121150970459,
      "learning_rate": 1.127979858868393e-05,
      "loss": 3.1711,
      "step": 210228
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8303534984588623,
      "learning_rate": 1.1278687480377269e-05,
      "loss": 2.8988,
      "step": 210229
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.059424638748169,
      "learning_rate": 1.1277576425749335e-05,
      "loss": 3.1904,
      "step": 210230
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.070995807647705,
      "learning_rate": 1.1276465424800363e-05,
      "loss": 2.707,
      "step": 210231
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.5542752742767334,
      "learning_rate": 1.1275354477530551e-05,
      "loss": 2.9004,
      "step": 210232
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.291276454925537,
      "learning_rate": 1.1274243583940101e-05,
      "loss": 3.1812,
      "step": 210233
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0115559101104736,
      "learning_rate": 1.1273132744029212e-05,
      "loss": 3.0511,
      "step": 210234
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.894584894180298,
      "learning_rate": 1.1272021957798083e-05,
      "loss": 2.9314,
      "step": 210235
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.86903977394104,
      "learning_rate": 1.1270911225246948e-05,
      "loss": 2.7991,
      "step": 210236
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.676072835922241,
      "learning_rate": 1.1269800546376007e-05,
      "loss": 3.1143,
      "step": 210237
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.841355800628662,
      "learning_rate": 1.1268689921185458e-05,
      "loss": 3.1055,
      "step": 210238
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.708580732345581,
      "learning_rate": 1.1267579349675537e-05,
      "loss": 3.0153,
      "step": 210239
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2305033206939697,
      "learning_rate": 1.1266468831846343e-05,
      "loss": 2.847,
      "step": 210240
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.121825933456421,
      "learning_rate": 1.1265358367698241e-05,
      "loss": 2.9312,
      "step": 210241
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0615482330322266,
      "learning_rate": 1.1264247957231298e-05,
      "loss": 2.8443,
      "step": 210242
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.779421806335449,
      "learning_rate": 1.1263137600445849e-05,
      "loss": 2.5675,
      "step": 210243
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.899336814880371,
      "learning_rate": 1.126202729734199e-05,
      "loss": 2.808,
      "step": 210244
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8451390266418457,
      "learning_rate": 1.1260917047919994e-05,
      "loss": 2.9453,
      "step": 210245
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.922152042388916,
      "learning_rate": 1.125980685218002e-05,
      "loss": 2.8341,
      "step": 210246
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1132943630218506,
      "learning_rate": 1.1258696710122306e-05,
      "loss": 2.8171,
      "step": 210247
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.679074764251709,
      "learning_rate": 1.1257586621747017e-05,
      "loss": 2.8004,
      "step": 210248
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4240918159484863,
      "learning_rate": 1.1256476587054452e-05,
      "loss": 2.6604,
      "step": 210249
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.149245023727417,
      "learning_rate": 1.1255366606044746e-05,
      "loss": 3.1194,
      "step": 210250
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.653411865234375,
      "learning_rate": 1.1254256678718098e-05,
      "loss": 3.0901,
      "step": 210251
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.933394432067871,
      "learning_rate": 1.1253146805074741e-05,
      "loss": 2.7844,
      "step": 210252
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3548049926757812,
      "learning_rate": 1.1252036985114876e-05,
      "loss": 2.9543,
      "step": 210253
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7681374549865723,
      "learning_rate": 1.1250927218838702e-05,
      "loss": 2.9317,
      "step": 210254
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.5133213996887207,
      "learning_rate": 1.124981750624645e-05,
      "loss": 3.1062,
      "step": 210255
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2403125762939453,
      "learning_rate": 1.1248707847338256e-05,
      "loss": 2.8794,
      "step": 210256
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1559393405914307,
      "learning_rate": 1.1247598242114453e-05,
      "loss": 2.883,
      "step": 210257
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.059966802597046,
      "learning_rate": 1.1246488690575107e-05,
      "loss": 2.9164,
      "step": 210258
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.618664026260376,
      "learning_rate": 1.124537919272055e-05,
      "loss": 2.9695,
      "step": 210259
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.140476703643799,
      "learning_rate": 1.1244269748550883e-05,
      "loss": 2.9228,
      "step": 210260
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9838624000549316,
      "learning_rate": 1.1243160358066372e-05,
      "loss": 2.7458,
      "step": 210261
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.838879346847534,
      "learning_rate": 1.1242051021267184e-05,
      "loss": 2.8761,
      "step": 210262
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.540426731109619,
      "learning_rate": 1.124094173815362e-05,
      "loss": 3.0946,
      "step": 210263
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.567561388015747,
      "learning_rate": 1.1239832508725743e-05,
      "loss": 2.8393,
      "step": 210264
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.122368812561035,
      "learning_rate": 1.1238723332983857e-05,
      "loss": 2.7781,
      "step": 210265
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.210391044616699,
      "learning_rate": 1.1237614210928125e-05,
      "loss": 3.0538,
      "step": 210266
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8372435569763184,
      "learning_rate": 1.1236505142558783e-05,
      "loss": 3.1896,
      "step": 210267
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.765606641769409,
      "learning_rate": 1.1235396127876029e-05,
      "loss": 2.7915,
      "step": 210268
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.5754165649414062,
      "learning_rate": 1.1234287166880096e-05,
      "loss": 2.8111,
      "step": 210269
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.324531316757202,
      "learning_rate": 1.1233178259571118e-05,
      "loss": 2.836,
      "step": 210270
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1033365726470947,
      "learning_rate": 1.1232069405949362e-05,
      "loss": 2.9912,
      "step": 210271
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1272199153900146,
      "learning_rate": 1.1230960606014994e-05,
      "loss": 2.8929,
      "step": 210272
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.835803985595703,
      "learning_rate": 1.1229851859768247e-05,
      "loss": 2.9341,
      "step": 210273
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8597896099090576,
      "learning_rate": 1.1228743167209319e-05,
      "loss": 3.0607,
      "step": 210274
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8853278160095215,
      "learning_rate": 1.122763452833848e-05,
      "loss": 2.6125,
      "step": 210275
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.0720391273498535,
      "learning_rate": 1.1226525943155795e-05,
      "loss": 2.8135,
      "step": 210276
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0681538581848145,
      "learning_rate": 1.1225417411661596e-05,
      "loss": 2.6574,
      "step": 210277
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.5103821754455566,
      "learning_rate": 1.1224308933856019e-05,
      "loss": 2.9224,
      "step": 210278
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.802443504333496,
      "learning_rate": 1.1223200509739294e-05,
      "loss": 2.8914,
      "step": 210279
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4471731185913086,
      "learning_rate": 1.1222092139311623e-05,
      "loss": 2.9484,
      "step": 210280
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7505271434783936,
      "learning_rate": 1.1220983822573238e-05,
      "loss": 3.0164,
      "step": 210281
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.210981845855713,
      "learning_rate": 1.1219875559524338e-05,
      "loss": 2.7397,
      "step": 210282
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1738333702087402,
      "learning_rate": 1.1218767350165092e-05,
      "loss": 3.0612,
      "step": 210283
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8908843994140625,
      "learning_rate": 1.1217659194495699e-05,
      "loss": 2.8472,
      "step": 210284
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.493685722351074,
      "learning_rate": 1.1216551092516424e-05,
      "loss": 2.8679,
      "step": 210285
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2922565937042236,
      "learning_rate": 1.1215443044227434e-05,
      "loss": 3.0218,
      "step": 210286
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3751208782196045,
      "learning_rate": 1.1214335049628964e-05,
      "loss": 2.8717,
      "step": 210287
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2555091381073,
      "learning_rate": 1.1213227108721178e-05,
      "loss": 2.8519,
      "step": 210288
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3358006477355957,
      "learning_rate": 1.1212119221504346e-05,
      "loss": 3.0495,
      "step": 210289
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9833972454071045,
      "learning_rate": 1.1211011387978564e-05,
      "loss": 2.9094,
      "step": 210290
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2025539875030518,
      "learning_rate": 1.1209903608144166e-05,
      "loss": 2.9904,
      "step": 210291
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.036173105239868,
      "learning_rate": 1.1208795882001254e-05,
      "loss": 2.8648,
      "step": 210292
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4925806522369385,
      "learning_rate": 1.1207688209550091e-05,
      "loss": 2.8363,
      "step": 210293
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.259352445602417,
      "learning_rate": 1.1206580590790914e-05,
      "loss": 2.8051,
      "step": 210294
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.011476755142212,
      "learning_rate": 1.120547302572382e-05,
      "loss": 2.8696,
      "step": 210295
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6057982444763184,
      "learning_rate": 1.1204365514349112e-05,
      "loss": 2.6666,
      "step": 210296
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.131502628326416,
      "learning_rate": 1.1203258056666985e-05,
      "loss": 2.7224,
      "step": 210297
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1997337341308594,
      "learning_rate": 1.1202150652677577e-05,
      "loss": 2.967,
      "step": 210298
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7504501342773438,
      "learning_rate": 1.1201043302381153e-05,
      "loss": 3.1051,
      "step": 210299
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9117891788482666,
      "learning_rate": 1.1199936005777943e-05,
      "loss": 2.8002,
      "step": 210300
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.5949835777282715,
      "learning_rate": 1.1198828762868084e-05,
      "loss": 3.0222,
      "step": 210301
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.526261568069458,
      "learning_rate": 1.1197721573651809e-05,
      "loss": 2.7358,
      "step": 210302
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.398737668991089,
      "learning_rate": 1.1196614438129314e-05,
      "loss": 2.6397,
      "step": 210303
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.8393192291259766,
      "learning_rate": 1.1195507356300871e-05,
      "loss": 2.8871,
      "step": 210304
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8114097118377686,
      "learning_rate": 1.1194400328166609e-05,
      "loss": 2.9497,
      "step": 210305
    },
    {
      "epoch": 2.74,
      "grad_norm": 5.641390800476074,
      "learning_rate": 1.1193293353726796e-05,
      "loss": 2.9068,
      "step": 210306
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9825825691223145,
      "learning_rate": 1.1192186432981531e-05,
      "loss": 2.6913,
      "step": 210307
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3239095211029053,
      "learning_rate": 1.119107956593115e-05,
      "loss": 3.1064,
      "step": 210308
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.184819221496582,
      "learning_rate": 1.118997275257575e-05,
      "loss": 2.7686,
      "step": 210309
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.644153118133545,
      "learning_rate": 1.1188865992915629e-05,
      "loss": 2.8647,
      "step": 210310
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.5964839458465576,
      "learning_rate": 1.1187759286950926e-05,
      "loss": 2.9529,
      "step": 210311
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.9852185249328613,
      "learning_rate": 1.1186652634681904e-05,
      "loss": 3.0451,
      "step": 210312
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7071735858917236,
      "learning_rate": 1.1185546036108661e-05,
      "loss": 2.9303,
      "step": 210313
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8540780544281006,
      "learning_rate": 1.1184439491231534e-05,
      "loss": 2.879,
      "step": 210314
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.6032590866088867,
      "learning_rate": 1.1183333000050653e-05,
      "loss": 3.0269,
      "step": 210315
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.289303779602051,
      "learning_rate": 1.1182226562566255e-05,
      "loss": 2.6933,
      "step": 210316
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0626096725463867,
      "learning_rate": 1.1181120178778535e-05,
      "loss": 2.7325,
      "step": 210317
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.034860372543335,
      "learning_rate": 1.1180013848687696e-05,
      "loss": 2.6763,
      "step": 210318
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.322516441345215,
      "learning_rate": 1.1178907572293938e-05,
      "loss": 2.68,
      "step": 210319
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6489365100860596,
      "learning_rate": 1.117780134959746e-05,
      "loss": 2.9808,
      "step": 210320
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.633103370666504,
      "learning_rate": 1.1176695180598494e-05,
      "loss": 3.129,
      "step": 210321
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.848555564880371,
      "learning_rate": 1.1175589065297241e-05,
      "loss": 2.6657,
      "step": 210322
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2478723526000977,
      "learning_rate": 1.1174483003693834e-05,
      "loss": 2.7569,
      "step": 210323
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0240490436553955,
      "learning_rate": 1.1173376995788675e-05,
      "loss": 2.8585,
      "step": 210324
    },
    {
      "epoch": 2.74,
      "grad_norm": 5.164907455444336,
      "learning_rate": 1.1172271041581727e-05,
      "loss": 2.9655,
      "step": 210325
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2144882678985596,
      "learning_rate": 1.1171165141073358e-05,
      "loss": 3.1461,
      "step": 210326
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0924065113067627,
      "learning_rate": 1.1170059294263667e-05,
      "loss": 2.819,
      "step": 210327
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4384090900421143,
      "learning_rate": 1.1168953501152955e-05,
      "loss": 2.9602,
      "step": 210328
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.799255609512329,
      "learning_rate": 1.1167847761741355e-05,
      "loss": 2.9428,
      "step": 210329
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9474053382873535,
      "learning_rate": 1.1166742076029167e-05,
      "loss": 2.8837,
      "step": 210330
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.49377179145813,
      "learning_rate": 1.116563644401649e-05,
      "loss": 2.9403,
      "step": 210331
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.5845625400543213,
      "learning_rate": 1.1164530865703591e-05,
      "loss": 3.0343,
      "step": 210332
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7950122356414795,
      "learning_rate": 1.1163425341090604e-05,
      "loss": 2.862,
      "step": 210333
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.7930591106414795,
      "learning_rate": 1.116231987017786e-05,
      "loss": 2.9519,
      "step": 210334
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.507133960723877,
      "learning_rate": 1.1161214452965428e-05,
      "loss": 2.8434,
      "step": 210335
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.0351881980896,
      "learning_rate": 1.1160109089453672e-05,
      "loss": 2.7901,
      "step": 210336
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1604864597320557,
      "learning_rate": 1.1159003779642627e-05,
      "loss": 2.807,
      "step": 210337
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.392202615737915,
      "learning_rate": 1.1157898523532594e-05,
      "loss": 2.8176,
      "step": 210338
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8573431968688965,
      "learning_rate": 1.1156793321123737e-05,
      "loss": 2.8309,
      "step": 210339
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.8502707481384277,
      "learning_rate": 1.1155688172416322e-05,
      "loss": 3.022,
      "step": 210340
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.5816402435302734,
      "learning_rate": 1.1154583077410483e-05,
      "loss": 2.9756,
      "step": 210341
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1169004440307617,
      "learning_rate": 1.1153478036106522e-05,
      "loss": 3.1594,
      "step": 210342
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.6499927043914795,
      "learning_rate": 1.1152373048504537e-05,
      "loss": 3.0164,
      "step": 210343
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.333425760269165,
      "learning_rate": 1.1151268114604795e-05,
      "loss": 2.8661,
      "step": 210344
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.5207138061523438,
      "learning_rate": 1.1150163234407428e-05,
      "loss": 3.228,
      "step": 210345
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9211440086364746,
      "learning_rate": 1.114905840791277e-05,
      "loss": 2.9046,
      "step": 210346
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4237329959869385,
      "learning_rate": 1.1147953635120888e-05,
      "loss": 3.1212,
      "step": 210347
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8993139266967773,
      "learning_rate": 1.1146848916032147e-05,
      "loss": 3.1495,
      "step": 210348
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.6038625240325928,
      "learning_rate": 1.1145744250646583e-05,
      "loss": 2.7324,
      "step": 210349
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.5753424167633057,
      "learning_rate": 1.1144639638964493e-05,
      "loss": 3.0257,
      "step": 210350
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.4113619327545166,
      "learning_rate": 1.1143535080986043e-05,
      "loss": 2.766,
      "step": 210351
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2345471382141113,
      "learning_rate": 1.1142430576711504e-05,
      "loss": 3.0392,
      "step": 210352
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.206648826599121,
      "learning_rate": 1.1141326126141004e-05,
      "loss": 2.8981,
      "step": 210353
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.684492349624634,
      "learning_rate": 1.114022172927488e-05,
      "loss": 2.9288,
      "step": 210354
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.351029634475708,
      "learning_rate": 1.1139117386113128e-05,
      "loss": 2.9801,
      "step": 210355
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.843414068222046,
      "learning_rate": 1.1138013096656117e-05,
      "loss": 2.9725,
      "step": 210356
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9663777351379395,
      "learning_rate": 1.1136908860903981e-05,
      "loss": 2.9979,
      "step": 210357
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.302976608276367,
      "learning_rate": 1.1135804678856953e-05,
      "loss": 2.816,
      "step": 210358
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.7663111686706543,
      "learning_rate": 1.113470055051523e-05,
      "loss": 2.8681,
      "step": 210359
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.841012716293335,
      "learning_rate": 1.1133596475879048e-05,
      "loss": 3.0305,
      "step": 210360
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.564678192138672,
      "learning_rate": 1.1132492454948538e-05,
      "loss": 2.8875,
      "step": 210361
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.129364013671875,
      "learning_rate": 1.1131388487724002e-05,
      "loss": 2.8703,
      "step": 210362
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0761990547180176,
      "learning_rate": 1.113028457420554e-05,
      "loss": 2.8072,
      "step": 210363
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.956564903259277,
      "learning_rate": 1.1129180714393448e-05,
      "loss": 2.9745,
      "step": 210364
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.9592156410217285,
      "learning_rate": 1.1128076908287863e-05,
      "loss": 2.9693,
      "step": 210365
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7239415645599365,
      "learning_rate": 1.1126973155889085e-05,
      "loss": 2.9354,
      "step": 210366
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.5090415477752686,
      "learning_rate": 1.1125869457197178e-05,
      "loss": 2.9417,
      "step": 210367
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.435596466064453,
      "learning_rate": 1.1124765812212476e-05,
      "loss": 2.8681,
      "step": 210368
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.120234727859497,
      "learning_rate": 1.112366222093508e-05,
      "loss": 2.9059,
      "step": 210369
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0314126014709473,
      "learning_rate": 1.112255868336529e-05,
      "loss": 2.8543,
      "step": 210370
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3747448921203613,
      "learning_rate": 1.1121455199503237e-05,
      "loss": 2.9044,
      "step": 210371
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.859299898147583,
      "learning_rate": 1.1120351769349223e-05,
      "loss": 2.9139,
      "step": 210372
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6933510303497314,
      "learning_rate": 1.1119248392903346e-05,
      "loss": 2.9943,
      "step": 210373
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.200066089630127,
      "learning_rate": 1.1118145070165874e-05,
      "loss": 3.0915,
      "step": 210374
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8547141551971436,
      "learning_rate": 1.1117041801136939e-05,
      "loss": 2.9481,
      "step": 210375
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.615807056427002,
      "learning_rate": 1.1115938585816842e-05,
      "loss": 2.9043,
      "step": 210376
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7351717948913574,
      "learning_rate": 1.1114835424205714e-05,
      "loss": 2.9448,
      "step": 210377
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.494471311569214,
      "learning_rate": 1.1113732316303825e-05,
      "loss": 2.9795,
      "step": 210378
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.5895819664001465,
      "learning_rate": 1.1112629262111373e-05,
      "loss": 3.1369,
      "step": 210379
    },
    {
      "epoch": 2.74,
      "grad_norm": 6.165869235992432,
      "learning_rate": 1.1111526261628457e-05,
      "loss": 2.7775,
      "step": 210380
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.403738021850586,
      "learning_rate": 1.111042331485541e-05,
      "loss": 2.6833,
      "step": 210381
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9799883365631104,
      "learning_rate": 1.1109320421792401e-05,
      "loss": 3.0017,
      "step": 210382
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.385793685913086,
      "learning_rate": 1.1108217582439593e-05,
      "loss": 2.8589,
      "step": 210383
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.025592803955078,
      "learning_rate": 1.1107114796797257e-05,
      "loss": 2.8009,
      "step": 210384
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.212686777114868,
      "learning_rate": 1.1106012064865522e-05,
      "loss": 2.7331,
      "step": 210385
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.5083539485931396,
      "learning_rate": 1.1104909386644622e-05,
      "loss": 2.9338,
      "step": 210386
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.940567970275879,
      "learning_rate": 1.1103806762134827e-05,
      "loss": 3.0282,
      "step": 210387
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.805781841278076,
      "learning_rate": 1.1102704191336265e-05,
      "loss": 2.8118,
      "step": 210388
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.983480453491211,
      "learning_rate": 1.1101601674249139e-05,
      "loss": 3.2165,
      "step": 210389
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.105175256729126,
      "learning_rate": 1.1100499210873715e-05,
      "loss": 2.709,
      "step": 210390
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.874629497528076,
      "learning_rate": 1.1099396801210159e-05,
      "loss": 3.034,
      "step": 210391
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0351316928863525,
      "learning_rate": 1.1098294445258637e-05,
      "loss": 2.9105,
      "step": 210392
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.641083240509033,
      "learning_rate": 1.1097192143019418e-05,
      "loss": 2.8734,
      "step": 210393
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.256697177886963,
      "learning_rate": 1.1096089894492666e-05,
      "loss": 2.9805,
      "step": 210394
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.6443803310394287,
      "learning_rate": 1.1094987699678648e-05,
      "loss": 2.9578,
      "step": 210395
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1294424533843994,
      "learning_rate": 1.1093885558577498e-05,
      "loss": 3.0085,
      "step": 210396
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.870042562484741,
      "learning_rate": 1.1092783471189482e-05,
      "loss": 3.0135,
      "step": 210397
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.022899150848389,
      "learning_rate": 1.1091681437514698e-05,
      "loss": 2.8397,
      "step": 210398
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.807159185409546,
      "learning_rate": 1.1090579457553484e-05,
      "loss": 3.0288,
      "step": 210399
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.30845046043396,
      "learning_rate": 1.1089477531305968e-05,
      "loss": 3.1481,
      "step": 210400
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.4414052963256836,
      "learning_rate": 1.1088375658772386e-05,
      "loss": 3.0451,
      "step": 210401
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0926196575164795,
      "learning_rate": 1.108727383995287e-05,
      "loss": 2.8435,
      "step": 210402
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3297348022460938,
      "learning_rate": 1.1086172074847788e-05,
      "loss": 2.9849,
      "step": 210403
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.396200180053711,
      "learning_rate": 1.1085070363457172e-05,
      "loss": 2.9042,
      "step": 210404
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.983382225036621,
      "learning_rate": 1.1083968705781321e-05,
      "loss": 2.7887,
      "step": 210405
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4345223903656006,
      "learning_rate": 1.108286710182037e-05,
      "loss": 3.1948,
      "step": 210406
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.693937063217163,
      "learning_rate": 1.1081765551574584e-05,
      "loss": 2.9508,
      "step": 210407
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9428393840789795,
      "learning_rate": 1.1080664055044164e-05,
      "loss": 2.8666,
      "step": 210408
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2322487831115723,
      "learning_rate": 1.1079562612229342e-05,
      "loss": 2.9031,
      "step": 210409
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9440762996673584,
      "learning_rate": 1.1078461223130219e-05,
      "loss": 2.7975,
      "step": 210410
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2860424518585205,
      "learning_rate": 1.1077359887747095e-05,
      "loss": 2.8863,
      "step": 210411
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8373255729675293,
      "learning_rate": 1.1076258606080101e-05,
      "loss": 2.8105,
      "step": 210412
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.051762104034424,
      "learning_rate": 1.107515737812954e-05,
      "loss": 3.1261,
      "step": 210413
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.159269332885742,
      "learning_rate": 1.1074056203895509e-05,
      "loss": 3.0751,
      "step": 210414
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1264357566833496,
      "learning_rate": 1.1072955083378343e-05,
      "loss": 2.8641,
      "step": 210415
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8225629329681396,
      "learning_rate": 1.1071854016578107e-05,
      "loss": 3.0222,
      "step": 210416
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.345660448074341,
      "learning_rate": 1.1070753003495102e-05,
      "loss": 2.8405,
      "step": 210417
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.252211809158325,
      "learning_rate": 1.1069652044129462e-05,
      "loss": 2.9397,
      "step": 210418
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.040721416473389,
      "learning_rate": 1.1068551138481452e-05,
      "loss": 3.0178,
      "step": 210419
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4158530235290527,
      "learning_rate": 1.1067450286551206e-05,
      "loss": 2.8863,
      "step": 210420
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.140117883682251,
      "learning_rate": 1.1066349488339055e-05,
      "loss": 2.8954,
      "step": 210421
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.592808246612549,
      "learning_rate": 1.1065248743845068e-05,
      "loss": 3.0286,
      "step": 210422
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6145975589752197,
      "learning_rate": 1.1064148053069544e-05,
      "loss": 2.9147,
      "step": 210423
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.5289628505706787,
      "learning_rate": 1.1063047416012583e-05,
      "loss": 2.9076,
      "step": 210424
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.604609489440918,
      "learning_rate": 1.1061946832674518e-05,
      "loss": 2.6409,
      "step": 210425
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0919723510742188,
      "learning_rate": 1.1060846303055448e-05,
      "loss": 2.9718,
      "step": 210426
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9964818954467773,
      "learning_rate": 1.1059745827155674e-05,
      "loss": 2.9696,
      "step": 210427
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0436036586761475,
      "learning_rate": 1.1058645404975298e-05,
      "loss": 2.7021,
      "step": 210428
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2385318279266357,
      "learning_rate": 1.1057545036514615e-05,
      "loss": 2.8887,
      "step": 210429
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.52133846282959,
      "learning_rate": 1.1056444721773727e-05,
      "loss": 2.842,
      "step": 210430
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.03256893157959,
      "learning_rate": 1.1055344460752968e-05,
      "loss": 2.8559,
      "step": 210431
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.896343231201172,
      "learning_rate": 1.1054244253452404e-05,
      "loss": 2.8294,
      "step": 210432
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3406660556793213,
      "learning_rate": 1.1053144099872403e-05,
      "loss": 2.7307,
      "step": 210433
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6532440185546875,
      "learning_rate": 1.1052044000013028e-05,
      "loss": 2.9811,
      "step": 210434
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3869874477386475,
      "learning_rate": 1.1050943953874514e-05,
      "loss": 3.0159,
      "step": 210435
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6512656211853027,
      "learning_rate": 1.1049843961457095e-05,
      "loss": 3.0605,
      "step": 210436
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4875645637512207,
      "learning_rate": 1.104874402276097e-05,
      "loss": 2.7345,
      "step": 210437
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7077226638793945,
      "learning_rate": 1.1047644137786305e-05,
      "loss": 2.7132,
      "step": 210438
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4494872093200684,
      "learning_rate": 1.1046544306533433e-05,
      "loss": 2.82,
      "step": 210439
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.721832036972046,
      "learning_rate": 1.1045444529002389e-05,
      "loss": 2.7155,
      "step": 210440
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.250243663787842,
      "learning_rate": 1.1044344805193473e-05,
      "loss": 2.9567,
      "step": 210441
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.133359909057617,
      "learning_rate": 1.1043245135106815e-05,
      "loss": 2.8243,
      "step": 210442
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0050270557403564,
      "learning_rate": 1.1042145518742718e-05,
      "loss": 2.909,
      "step": 210443
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.092240333557129,
      "learning_rate": 1.1041045956101314e-05,
      "loss": 2.7755,
      "step": 210444
    },
    {
      "epoch": 2.74,
      "grad_norm": 5.040396213531494,
      "learning_rate": 1.1039946447182901e-05,
      "loss": 2.958,
      "step": 210445
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.218371868133545,
      "learning_rate": 1.1038846991987548e-05,
      "loss": 2.9509,
      "step": 210446
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.5047447681427,
      "learning_rate": 1.1037747590515588e-05,
      "loss": 3.0478,
      "step": 210447
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.014443874359131,
      "learning_rate": 1.1036648242767088e-05,
      "loss": 2.9512,
      "step": 210448
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.6649527549743652,
      "learning_rate": 1.103554894874238e-05,
      "loss": 2.8301,
      "step": 210449
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2066080570220947,
      "learning_rate": 1.1034449708441596e-05,
      "loss": 3.0621,
      "step": 210450
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9707794189453125,
      "learning_rate": 1.1033350521865037e-05,
      "loss": 2.86,
      "step": 210451
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4818034172058105,
      "learning_rate": 1.1032251389012736e-05,
      "loss": 2.9192,
      "step": 210452
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1860523223876953,
      "learning_rate": 1.1031152309885028e-05,
      "loss": 3.0326,
      "step": 210453
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0537936687469482,
      "learning_rate": 1.1030053284482076e-05,
      "loss": 2.7909,
      "step": 210454
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.592611074447632,
      "learning_rate": 1.1028954312804117e-05,
      "loss": 3.0768,
      "step": 210455
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2303452491760254,
      "learning_rate": 1.1027855394851281e-05,
      "loss": 2.9121,
      "step": 210456
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0139784812927246,
      "learning_rate": 1.102675653062387e-05,
      "loss": 3.0751,
      "step": 210457
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8563663959503174,
      "learning_rate": 1.1025657720122017e-05,
      "loss": 2.9344,
      "step": 210458
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6812663078308105,
      "learning_rate": 1.1024558963345986e-05,
      "loss": 2.7754,
      "step": 210459
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.115922451019287,
      "learning_rate": 1.102346026029588e-05,
      "loss": 2.9374,
      "step": 210460
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8082692623138428,
      "learning_rate": 1.102236161097203e-05,
      "loss": 2.9773,
      "step": 210461
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.992049217224121,
      "learning_rate": 1.1021263015374505e-05,
      "loss": 2.8517,
      "step": 210462
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.745636224746704,
      "learning_rate": 1.1020164473503667e-05,
      "loss": 2.7892,
      "step": 210463
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.64190936088562,
      "learning_rate": 1.1019065985359587e-05,
      "loss": 2.7012,
      "step": 210464
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.252542734146118,
      "learning_rate": 1.1017967550942497e-05,
      "loss": 2.8233,
      "step": 210465
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0209648609161377,
      "learning_rate": 1.1016869170252662e-05,
      "loss": 2.9782,
      "step": 210466
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.211076498031616,
      "learning_rate": 1.1015770843290251e-05,
      "loss": 2.9397,
      "step": 210467
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9462127685546875,
      "learning_rate": 1.1014672570055426e-05,
      "loss": 3.0111,
      "step": 210468
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.047988176345825,
      "learning_rate": 1.1013574350548459e-05,
      "loss": 2.8025,
      "step": 210469
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.878105401992798,
      "learning_rate": 1.1012476184769547e-05,
      "loss": 3.0142,
      "step": 210470
    },
    {
      "epoch": 2.74,
      "grad_norm": 6.292731761932373,
      "learning_rate": 1.101137807271879e-05,
      "loss": 3.0061,
      "step": 210471
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.033071279525757,
      "learning_rate": 1.1010280014396555e-05,
      "loss": 2.8666,
      "step": 210472
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3080992698669434,
      "learning_rate": 1.1009182009802941e-05,
      "loss": 2.967,
      "step": 210473
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.7585394382476807,
      "learning_rate": 1.1008084058938116e-05,
      "loss": 2.9373,
      "step": 210474
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.44504451751709,
      "learning_rate": 1.1006986161802412e-05,
      "loss": 2.9177,
      "step": 210475
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7627370357513428,
      "learning_rate": 1.1005888318395961e-05,
      "loss": 2.8635,
      "step": 210476
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1184802055358887,
      "learning_rate": 1.1004790528718932e-05,
      "loss": 2.9748,
      "step": 210477
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.8483057022094727,
      "learning_rate": 1.100369279277159e-05,
      "loss": 2.9893,
      "step": 210478
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.454437732696533,
      "learning_rate": 1.1002595110554102e-05,
      "loss": 2.8143,
      "step": 210479
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.316384792327881,
      "learning_rate": 1.1001497482066701e-05,
      "loss": 3.2754,
      "step": 210480
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.7740535736083984,
      "learning_rate": 1.1000399907309587e-05,
      "loss": 2.9894,
      "step": 210481
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.842898368835449,
      "learning_rate": 1.099930238628296e-05,
      "loss": 2.782,
      "step": 210482
    },
    {
      "epoch": 2.74,
      "grad_norm": 6.086693286895752,
      "learning_rate": 1.0998204918986985e-05,
      "loss": 2.6303,
      "step": 210483
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.807345390319824,
      "learning_rate": 1.0997107505421931e-05,
      "loss": 2.8959,
      "step": 210484
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2775421142578125,
      "learning_rate": 1.099601014558793e-05,
      "loss": 2.7021,
      "step": 210485
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0329601764678955,
      "learning_rate": 1.0994912839485248e-05,
      "loss": 2.9868,
      "step": 210486
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.26594614982605,
      "learning_rate": 1.0993815587114085e-05,
      "loss": 3.2078,
      "step": 210487
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.488959312438965,
      "learning_rate": 1.0992718388474609e-05,
      "loss": 2.8541,
      "step": 210488
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1617939472198486,
      "learning_rate": 1.099162124356705e-05,
      "loss": 2.7957,
      "step": 210489
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8572704792022705,
      "learning_rate": 1.0990524152391612e-05,
      "loss": 2.9469,
      "step": 210490
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8002781867980957,
      "learning_rate": 1.0989427114948423e-05,
      "loss": 2.4614,
      "step": 210491
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.770915985107422,
      "learning_rate": 1.0988330131237821e-05,
      "loss": 2.8213,
      "step": 210492
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1006624698638916,
      "learning_rate": 1.0987233201259937e-05,
      "loss": 2.9633,
      "step": 210493
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2222399711608887,
      "learning_rate": 1.0986136325015005e-05,
      "loss": 2.9481,
      "step": 210494
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.333690881729126,
      "learning_rate": 1.0985039502503157e-05,
      "loss": 2.7832,
      "step": 210495
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.959747791290283,
      "learning_rate": 1.0983942733724692e-05,
      "loss": 2.8001,
      "step": 210496
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.155602216720581,
      "learning_rate": 1.0982846018679714e-05,
      "loss": 2.8946,
      "step": 210497
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.739124298095703,
      "learning_rate": 1.0981749357368519e-05,
      "loss": 2.81,
      "step": 210498
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.064151287078857,
      "learning_rate": 1.098065274979124e-05,
      "loss": 2.8032,
      "step": 210499
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1769943237304688,
      "learning_rate": 1.0979556195948181e-05,
      "loss": 2.8178,
      "step": 210500
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.779252290725708,
      "learning_rate": 1.0978459695839404e-05,
      "loss": 2.7777,
      "step": 210501
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6466493606567383,
      "learning_rate": 1.0977363249465243e-05,
      "loss": 2.9066,
      "step": 210502
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6462395191192627,
      "learning_rate": 1.0976266856825799e-05,
      "loss": 2.7415,
      "step": 210503
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.170274019241333,
      "learning_rate": 1.0975170517921339e-05,
      "loss": 2.9838,
      "step": 210504
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9305975437164307,
      "learning_rate": 1.0974074232752027e-05,
      "loss": 2.9915,
      "step": 210505
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.054734706878662,
      "learning_rate": 1.0972978001318168e-05,
      "loss": 2.9942,
      "step": 210506
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.006821632385254,
      "learning_rate": 1.0971881823619822e-05,
      "loss": 2.9084,
      "step": 210507
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.074448585510254,
      "learning_rate": 1.0970785699657292e-05,
      "loss": 2.7765,
      "step": 210508
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6297123432159424,
      "learning_rate": 1.0969689629430711e-05,
      "loss": 2.8447,
      "step": 210509
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.64473032951355,
      "learning_rate": 1.0968593612940347e-05,
      "loss": 3.0304,
      "step": 210510
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1112585067749023,
      "learning_rate": 1.096749765018633e-05,
      "loss": 2.9179,
      "step": 210511
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.261718273162842,
      "learning_rate": 1.0966401741168996e-05,
      "loss": 2.806,
      "step": 210512
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.405665159225464,
      "learning_rate": 1.0965305885888409e-05,
      "loss": 3.18,
      "step": 210513
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4649922847747803,
      "learning_rate": 1.0964210084344837e-05,
      "loss": 3.0124,
      "step": 210514
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.025632619857788,
      "learning_rate": 1.096311433653848e-05,
      "loss": 2.9319,
      "step": 210515
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.635009288787842,
      "learning_rate": 1.0962018642469539e-05,
      "loss": 2.9867,
      "step": 210516
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.124417543411255,
      "learning_rate": 1.0960923002138177e-05,
      "loss": 2.8874,
      "step": 210517
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9065356254577637,
      "learning_rate": 1.095982741554473e-05,
      "loss": 2.8832,
      "step": 210518
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7431390285491943,
      "learning_rate": 1.0958731882689198e-05,
      "loss": 2.9695,
      "step": 210519
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8935184478759766,
      "learning_rate": 1.0957636403571946e-05,
      "loss": 3.0685,
      "step": 210520
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1243653297424316,
      "learning_rate": 1.0956540978193108e-05,
      "loss": 3.0133,
      "step": 210521
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0777814388275146,
      "learning_rate": 1.0955445606552916e-05,
      "loss": 2.9377,
      "step": 210522
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6030001640319824,
      "learning_rate": 1.095435028865157e-05,
      "loss": 2.9935,
      "step": 210523
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8985512256622314,
      "learning_rate": 1.0953255024489272e-05,
      "loss": 3.1828,
      "step": 210524
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.689296007156372,
      "learning_rate": 1.095215981406622e-05,
      "loss": 2.9924,
      "step": 210525
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.546440601348877,
      "learning_rate": 1.0951064657382613e-05,
      "loss": 2.8378,
      "step": 210526
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4806923866271973,
      "learning_rate": 1.094996955443862e-05,
      "loss": 2.9737,
      "step": 210527
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0410497188568115,
      "learning_rate": 1.094887450523454e-05,
      "loss": 2.9247,
      "step": 210528
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.6620707511901855,
      "learning_rate": 1.0947779509770472e-05,
      "loss": 2.9107,
      "step": 210529
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8727498054504395,
      "learning_rate": 1.0946684568046682e-05,
      "loss": 2.7994,
      "step": 210530
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8664603233337402,
      "learning_rate": 1.0945589680063372e-05,
      "loss": 2.9504,
      "step": 210531
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.711873769760132,
      "learning_rate": 1.0944494845820772e-05,
      "loss": 3.0237,
      "step": 210532
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.25482439994812,
      "learning_rate": 1.0943400065318952e-05,
      "loss": 2.8608,
      "step": 210533
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0792932510375977,
      "learning_rate": 1.0942305338558277e-05,
      "loss": 3.0225,
      "step": 210534
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.867400646209717,
      "learning_rate": 1.0941210665538847e-05,
      "loss": 2.8897,
      "step": 210535
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.675704002380371,
      "learning_rate": 1.0940116046260961e-05,
      "loss": 3.0403,
      "step": 210536
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4349002838134766,
      "learning_rate": 1.0939021480724719e-05,
      "loss": 3.1619,
      "step": 210537
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.220888376235962,
      "learning_rate": 1.0937926968930389e-05,
      "loss": 3.1077,
      "step": 210538
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4102542400360107,
      "learning_rate": 1.0936832510878135e-05,
      "loss": 2.9788,
      "step": 210539
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.642646312713623,
      "learning_rate": 1.0935738106568193e-05,
      "loss": 2.8566,
      "step": 210540
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.118435859680176,
      "learning_rate": 1.0934643756000727e-05,
      "loss": 2.9625,
      "step": 210541
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0365121364593506,
      "learning_rate": 1.0933549459176005e-05,
      "loss": 2.8611,
      "step": 210542
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2855982780456543,
      "learning_rate": 1.0932455216094194e-05,
      "loss": 3.1148,
      "step": 210543
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1738293170928955,
      "learning_rate": 1.0931361026755492e-05,
      "loss": 2.9393,
      "step": 210544
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6384713649749756,
      "learning_rate": 1.0930266891160067e-05,
      "loss": 3.1006,
      "step": 210545
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7020230293273926,
      "learning_rate": 1.0929172809308218e-05,
      "loss": 3.0337,
      "step": 210546
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.19242787361145,
      "learning_rate": 1.0928078781200045e-05,
      "loss": 3.1331,
      "step": 210547
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3421504497528076,
      "learning_rate": 1.0926984806835815e-05,
      "loss": 2.8391,
      "step": 210548
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.295325994491577,
      "learning_rate": 1.092589088621576e-05,
      "loss": 3.0172,
      "step": 210549
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7639386653900146,
      "learning_rate": 1.092479701933998e-05,
      "loss": 2.7813,
      "step": 210550
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3416619300842285,
      "learning_rate": 1.0923703206208745e-05,
      "loss": 3.189,
      "step": 210551
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.456082820892334,
      "learning_rate": 1.0922609446822284e-05,
      "loss": 2.9595,
      "step": 210552
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8544223308563232,
      "learning_rate": 1.0921515741180698e-05,
      "loss": 2.7209,
      "step": 210553
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.618788957595825,
      "learning_rate": 1.092042208928432e-05,
      "loss": 2.9376,
      "step": 210554
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.764563798904419,
      "learning_rate": 1.0919328491133283e-05,
      "loss": 2.8645,
      "step": 210555
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0817975997924805,
      "learning_rate": 1.0918234946727788e-05,
      "loss": 3.0348,
      "step": 210556
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0053389072418213,
      "learning_rate": 1.0917141456068068e-05,
      "loss": 2.7985,
      "step": 210557
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.950498342514038,
      "learning_rate": 1.0916048019154288e-05,
      "loss": 2.9677,
      "step": 210558
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9697647094726562,
      "learning_rate": 1.091495463598665e-05,
      "loss": 2.9311,
      "step": 210559
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.296889543533325,
      "learning_rate": 1.0913861306565419e-05,
      "loss": 2.9759,
      "step": 210560
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0922887325286865,
      "learning_rate": 1.0912768030890762e-05,
      "loss": 3.046,
      "step": 210561
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6592612266540527,
      "learning_rate": 1.0911674808962845e-05,
      "loss": 2.9112,
      "step": 210562
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.336557626724243,
      "learning_rate": 1.0910581640781934e-05,
      "loss": 3.0405,
      "step": 210563
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.609515428543091,
      "learning_rate": 1.0909488526348164e-05,
      "loss": 2.8606,
      "step": 210564
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.7535159587860107,
      "learning_rate": 1.0908395465661835e-05,
      "loss": 2.8796,
      "step": 210565
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.868006706237793,
      "learning_rate": 1.0907302458723078e-05,
      "loss": 2.7907,
      "step": 210566
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8963851928710938,
      "learning_rate": 1.0906209505532092e-05,
      "loss": 2.9407,
      "step": 210567
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.672471284866333,
      "learning_rate": 1.0905116606089081e-05,
      "loss": 2.9568,
      "step": 210568
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9124672412872314,
      "learning_rate": 1.090402376039431e-05,
      "loss": 3.0061,
      "step": 210569
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.731064558029175,
      "learning_rate": 1.0902930968447875e-05,
      "loss": 2.9102,
      "step": 210570
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.403186321258545,
      "learning_rate": 1.0901838230250081e-05,
      "loss": 2.864,
      "step": 210571
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.718604564666748,
      "learning_rate": 1.0900745545801126e-05,
      "loss": 2.8708,
      "step": 210572
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.210279941558838,
      "learning_rate": 1.0899652915101142e-05,
      "loss": 3.0682,
      "step": 210573
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.876094341278076,
      "learning_rate": 1.0898560338150364e-05,
      "loss": 2.6352,
      "step": 210574
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.8980612754821777,
      "learning_rate": 1.0897467814949023e-05,
      "loss": 3.0249,
      "step": 210575
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.618715763092041,
      "learning_rate": 1.0896375345497254e-05,
      "loss": 2.896,
      "step": 210576
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0220792293548584,
      "learning_rate": 1.0895282929795357e-05,
      "loss": 3.0443,
      "step": 210577
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.8039021492004395,
      "learning_rate": 1.089419056784343e-05,
      "loss": 2.6287,
      "step": 210578
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.960486650466919,
      "learning_rate": 1.0893098259641841e-05,
      "loss": 2.9203,
      "step": 210579
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8024039268493652,
      "learning_rate": 1.0892006005190557e-05,
      "loss": 2.935,
      "step": 210580
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.4981610774993896,
      "learning_rate": 1.0890913804489975e-05,
      "loss": 2.6908,
      "step": 210581
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9091343879699707,
      "learning_rate": 1.0889821657540199e-05,
      "loss": 2.9896,
      "step": 210582
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.372066020965576,
      "learning_rate": 1.0888729564341492e-05,
      "loss": 3.0393,
      "step": 210583
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2506892681121826,
      "learning_rate": 1.0887637524893988e-05,
      "loss": 2.7395,
      "step": 210584
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.901996612548828,
      "learning_rate": 1.0886545539197989e-05,
      "loss": 2.9142,
      "step": 210585
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3058691024780273,
      "learning_rate": 1.088545360725359e-05,
      "loss": 2.8173,
      "step": 210586
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.1880879402160645,
      "learning_rate": 1.0884361729061063e-05,
      "loss": 3.0344,
      "step": 210587
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.771091938018799,
      "learning_rate": 1.0883269904620572e-05,
      "loss": 2.9272,
      "step": 210588
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.344024181365967,
      "learning_rate": 1.088217813393235e-05,
      "loss": 3.1141,
      "step": 210589
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.659867286682129,
      "learning_rate": 1.0881086416996564e-05,
      "loss": 3.2424,
      "step": 210590
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7141666412353516,
      "learning_rate": 1.0879994753813481e-05,
      "loss": 2.955,
      "step": 210591
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.455115556716919,
      "learning_rate": 1.0878903144383266e-05,
      "loss": 2.9286,
      "step": 210592
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9342563152313232,
      "learning_rate": 1.0877811588706121e-05,
      "loss": 3.1043,
      "step": 210593
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.8725943565368652,
      "learning_rate": 1.0876720086782208e-05,
      "loss": 2.8558,
      "step": 210594
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.324498414993286,
      "learning_rate": 1.08756286386118e-05,
      "loss": 2.8637,
      "step": 210595
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.174621105194092,
      "learning_rate": 1.0874537244195025e-05,
      "loss": 2.9623,
      "step": 210596
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9823906421661377,
      "learning_rate": 1.0873445903532186e-05,
      "loss": 2.9577,
      "step": 210597
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7523765563964844,
      "learning_rate": 1.0872354616623447e-05,
      "loss": 2.8203,
      "step": 210598
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.703892469406128,
      "learning_rate": 1.0871263383468975e-05,
      "loss": 3.0612,
      "step": 210599
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9908995628356934,
      "learning_rate": 1.087017220406894e-05,
      "loss": 2.8859,
      "step": 210600
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3212223052978516,
      "learning_rate": 1.0869081078423669e-05,
      "loss": 2.9317,
      "step": 210601
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9712417125701904,
      "learning_rate": 1.0867990006533234e-05,
      "loss": 2.8493,
      "step": 210602
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.857895851135254,
      "learning_rate": 1.0866898988397932e-05,
      "loss": 2.7967,
      "step": 210603
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0379161834716797,
      "learning_rate": 1.086580802401793e-05,
      "loss": 2.6521,
      "step": 210604
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.201221466064453,
      "learning_rate": 1.0864717113393462e-05,
      "loss": 2.7848,
      "step": 210605
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9249444007873535,
      "learning_rate": 1.0863626256524628e-05,
      "loss": 3.0416,
      "step": 210606
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.909637451171875,
      "learning_rate": 1.0862535453411758e-05,
      "loss": 3.0432,
      "step": 210607
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0602431297302246,
      "learning_rate": 1.0861444704054956e-05,
      "loss": 2.9692,
      "step": 210608
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.467689514160156,
      "learning_rate": 1.086035400845452e-05,
      "loss": 2.967,
      "step": 210609
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.740255355834961,
      "learning_rate": 1.0859263366610583e-05,
      "loss": 2.7073,
      "step": 210610
    },
    {
      "epoch": 2.74,
      "grad_norm": 5.045081615447998,
      "learning_rate": 1.0858172778523378e-05,
      "loss": 2.6075,
      "step": 210611
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8129708766937256,
      "learning_rate": 1.0857082244193072e-05,
      "loss": 2.6021,
      "step": 210612
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.845926523208618,
      "learning_rate": 1.0855991763619898e-05,
      "loss": 3.2499,
      "step": 210613
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.161382675170898,
      "learning_rate": 1.0854901336804057e-05,
      "loss": 3.0617,
      "step": 210614
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2711360454559326,
      "learning_rate": 1.0853810963745746e-05,
      "loss": 2.9226,
      "step": 210615
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.994267225265503,
      "learning_rate": 1.08527206444452e-05,
      "loss": 2.8023,
      "step": 210616
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2983014583587646,
      "learning_rate": 1.0851630378902554e-05,
      "loss": 2.9978,
      "step": 210617
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9687771797180176,
      "learning_rate": 1.0850540167118039e-05,
      "loss": 2.8686,
      "step": 210618
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0315561294555664,
      "learning_rate": 1.084945000909192e-05,
      "loss": 2.9048,
      "step": 210619
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9817447662353516,
      "learning_rate": 1.0848359904824266e-05,
      "loss": 3.0082,
      "step": 210620
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.386829376220703,
      "learning_rate": 1.0847269854315444e-05,
      "loss": 2.9215,
      "step": 210621
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0405220985412598,
      "learning_rate": 1.0846179857565552e-05,
      "loss": 2.7121,
      "step": 210622
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.370765209197998,
      "learning_rate": 1.0845089914574789e-05,
      "loss": 3.1014,
      "step": 210623
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0667426586151123,
      "learning_rate": 1.0844000025343358e-05,
      "loss": 2.7793,
      "step": 210624
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.934727430343628,
      "learning_rate": 1.0842910189871556e-05,
      "loss": 2.812,
      "step": 210625
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2780227661132812,
      "learning_rate": 1.0841820408159451e-05,
      "loss": 2.787,
      "step": 210626
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3509814739227295,
      "learning_rate": 1.0840730680207343e-05,
      "loss": 2.8347,
      "step": 210627
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8465328216552734,
      "learning_rate": 1.0839641006015398e-05,
      "loss": 2.7104,
      "step": 210628
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2043850421905518,
      "learning_rate": 1.0838551385583849e-05,
      "loss": 3.0491,
      "step": 210629
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3661420345306396,
      "learning_rate": 1.0837461818912796e-05,
      "loss": 2.9433,
      "step": 210630
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.07439923286438,
      "learning_rate": 1.0836372306002572e-05,
      "loss": 2.9775,
      "step": 210631
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.624502420425415,
      "learning_rate": 1.083528284685331e-05,
      "loss": 2.9468,
      "step": 210632
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.5256752967834473,
      "learning_rate": 1.0834193441465244e-05,
      "loss": 3.092,
      "step": 210633
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4768142700195312,
      "learning_rate": 1.0833104089838541e-05,
      "loss": 2.7693,
      "step": 210634
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0146870613098145,
      "learning_rate": 1.0832014791973465e-05,
      "loss": 2.6766,
      "step": 210635
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7143304347991943,
      "learning_rate": 1.0830925547870118e-05,
      "loss": 2.8366,
      "step": 210636
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7052483558654785,
      "learning_rate": 1.0829836357528798e-05,
      "loss": 2.9174,
      "step": 210637
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.846468210220337,
      "learning_rate": 1.0828747220949607e-05,
      "loss": 2.9525,
      "step": 210638
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9165079593658447,
      "learning_rate": 1.0827658138132878e-05,
      "loss": 2.9494,
      "step": 210639
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.042201280593872,
      "learning_rate": 1.0826569109078742e-05,
      "loss": 2.9062,
      "step": 210640
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8046162128448486,
      "learning_rate": 1.0825480133787367e-05,
      "loss": 2.5476,
      "step": 210641
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.5604419708251953,
      "learning_rate": 1.0824391212259021e-05,
      "loss": 3.1816,
      "step": 210642
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4777042865753174,
      "learning_rate": 1.0823302344493901e-05,
      "loss": 2.9934,
      "step": 210643
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.900611639022827,
      "learning_rate": 1.082221353049214e-05,
      "loss": 2.7943,
      "step": 210644
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1201250553131104,
      "learning_rate": 1.0821124770254008e-05,
      "loss": 2.8783,
      "step": 210645
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.049434185028076,
      "learning_rate": 1.0820036063779702e-05,
      "loss": 2.893,
      "step": 210646
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9059414863586426,
      "learning_rate": 1.081894741106939e-05,
      "loss": 2.8663,
      "step": 210647
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7771761417388916,
      "learning_rate": 1.0817858812123303e-05,
      "loss": 2.8076,
      "step": 210648
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8093106746673584,
      "learning_rate": 1.0816770266941643e-05,
      "loss": 2.8745,
      "step": 210649
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.791456460952759,
      "learning_rate": 1.0815681775524576e-05,
      "loss": 3.2091,
      "step": 210650
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4021947383880615,
      "learning_rate": 1.0814593337872368e-05,
      "loss": 2.886,
      "step": 210651
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.751359224319458,
      "learning_rate": 1.0813504953985153e-05,
      "loss": 3.1581,
      "step": 210652
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6765835285186768,
      "learning_rate": 1.0812416623863163e-05,
      "loss": 2.7576,
      "step": 210653
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.722128391265869,
      "learning_rate": 1.0811328347506631e-05,
      "loss": 2.7267,
      "step": 210654
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8618109226226807,
      "learning_rate": 1.0810240124915726e-05,
      "loss": 2.8278,
      "step": 210655
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.551753282546997,
      "learning_rate": 1.0809151956090644e-05,
      "loss": 3.1153,
      "step": 210656
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7974417209625244,
      "learning_rate": 1.0808063841031622e-05,
      "loss": 2.8026,
      "step": 210657
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.1859517097473145,
      "learning_rate": 1.0806975779738824e-05,
      "loss": 3.005,
      "step": 210658
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.5195860862731934,
      "learning_rate": 1.0805887772212484e-05,
      "loss": 2.9241,
      "step": 210659
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9436941146850586,
      "learning_rate": 1.0804799818452769e-05,
      "loss": 2.8457,
      "step": 210660
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6047377586364746,
      "learning_rate": 1.080371191845988e-05,
      "loss": 2.8535,
      "step": 210661
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.489654779434204,
      "learning_rate": 1.0802624072234078e-05,
      "loss": 2.8734,
      "step": 210662
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8011953830718994,
      "learning_rate": 1.0801536279775503e-05,
      "loss": 2.9422,
      "step": 210663
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.046900987625122,
      "learning_rate": 1.0800448541084417e-05,
      "loss": 3.025,
      "step": 210664
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1041626930236816,
      "learning_rate": 1.0799360856160989e-05,
      "loss": 2.721,
      "step": 210665
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.818687915802002,
      "learning_rate": 1.0798273225005382e-05,
      "loss": 3.0316,
      "step": 210666
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.6419334411621094,
      "learning_rate": 1.0797185647617835e-05,
      "loss": 2.9835,
      "step": 210667
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6016533374786377,
      "learning_rate": 1.0796098123998576e-05,
      "loss": 2.9931,
      "step": 210668
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7250659465789795,
      "learning_rate": 1.079501065414774e-05,
      "loss": 2.8082,
      "step": 210669
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.307111978530884,
      "learning_rate": 1.0793923238065626e-05,
      "loss": 2.806,
      "step": 210670
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.861204147338867,
      "learning_rate": 1.079283587575237e-05,
      "loss": 2.917,
      "step": 210671
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8589420318603516,
      "learning_rate": 1.079174856720817e-05,
      "loss": 2.8297,
      "step": 210672
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.7832283973693848,
      "learning_rate": 1.0790661312433224e-05,
      "loss": 3.1525,
      "step": 210673
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.769517421722412,
      "learning_rate": 1.0789574111427768e-05,
      "loss": 2.9117,
      "step": 210674
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1558260917663574,
      "learning_rate": 1.0788486964191966e-05,
      "loss": 2.9827,
      "step": 210675
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.972668170928955,
      "learning_rate": 1.0787399870726088e-05,
      "loss": 2.7658,
      "step": 210676
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.57568359375,
      "learning_rate": 1.0786312831030298e-05,
      "loss": 2.9634,
      "step": 210677
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7357499599456787,
      "learning_rate": 1.0785225845104761e-05,
      "loss": 2.8433,
      "step": 210678
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.497464418411255,
      "learning_rate": 1.0784138912949713e-05,
      "loss": 2.9478,
      "step": 210679
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.324237823486328,
      "learning_rate": 1.0783052034565353e-05,
      "loss": 2.7658,
      "step": 210680
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.048780918121338,
      "learning_rate": 1.0781965209951882e-05,
      "loss": 2.8243,
      "step": 210681
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2036166191101074,
      "learning_rate": 1.0780878439109497e-05,
      "loss": 3.2288,
      "step": 210682
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.19669246673584,
      "learning_rate": 1.0779791722038433e-05,
      "loss": 2.8628,
      "step": 210683
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.527425765991211,
      "learning_rate": 1.0778705058738856e-05,
      "loss": 2.9233,
      "step": 210684
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.4897193908691406,
      "learning_rate": 1.0777618449210968e-05,
      "loss": 2.7676,
      "step": 210685
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7833406925201416,
      "learning_rate": 1.0776531893454999e-05,
      "loss": 2.8996,
      "step": 210686
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.326314687728882,
      "learning_rate": 1.0775445391471083e-05,
      "loss": 2.9373,
      "step": 210687
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0436134338378906,
      "learning_rate": 1.077435894325952e-05,
      "loss": 3.0194,
      "step": 210688
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.641386032104492,
      "learning_rate": 1.0773272548820444e-05,
      "loss": 3.0204,
      "step": 210689
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.424593925476074,
      "learning_rate": 1.0772186208154122e-05,
      "loss": 2.929,
      "step": 210690
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9711272716522217,
      "learning_rate": 1.0771099921260618e-05,
      "loss": 2.7721,
      "step": 210691
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7849388122558594,
      "learning_rate": 1.0770013688140299e-05,
      "loss": 2.8916,
      "step": 210692
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.621918201446533,
      "learning_rate": 1.0768927508793235e-05,
      "loss": 2.8525,
      "step": 210693
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9629077911376953,
      "learning_rate": 1.0767841383219755e-05,
      "loss": 2.8391,
      "step": 210694
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7882351875305176,
      "learning_rate": 1.076675531141996e-05,
      "loss": 2.8168,
      "step": 210695
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.961106061935425,
      "learning_rate": 1.076566929339412e-05,
      "loss": 2.7166,
      "step": 210696
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6681783199310303,
      "learning_rate": 1.076458332914233e-05,
      "loss": 2.8651,
      "step": 210697
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4546597003936768,
      "learning_rate": 1.0763497418664924e-05,
      "loss": 3.0626,
      "step": 210698
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.971641540527344,
      "learning_rate": 1.076241156196197e-05,
      "loss": 2.7685,
      "step": 210699
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7666680812835693,
      "learning_rate": 1.0761325759033834e-05,
      "loss": 3.0277,
      "step": 210700
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9175314903259277,
      "learning_rate": 1.0760240009880584e-05,
      "loss": 2.7754,
      "step": 210701
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6505115032196045,
      "learning_rate": 1.0759154314502482e-05,
      "loss": 3.0557,
      "step": 210702
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.778101921081543,
      "learning_rate": 1.0758068672899667e-05,
      "loss": 3.0648,
      "step": 210703
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.973679780960083,
      "learning_rate": 1.0756983085072434e-05,
      "loss": 2.9132,
      "step": 210704
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.984071731567383,
      "learning_rate": 1.075589755102092e-05,
      "loss": 2.7139,
      "step": 210705
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.76870059967041,
      "learning_rate": 1.0754812070745356e-05,
      "loss": 2.9405,
      "step": 210706
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.4615836143493652,
      "learning_rate": 1.075372664424594e-05,
      "loss": 3.0175,
      "step": 210707
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.169267416000366,
      "learning_rate": 1.0752641271522843e-05,
      "loss": 3.1231,
      "step": 210708
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9421091079711914,
      "learning_rate": 1.0751555952576296e-05,
      "loss": 2.9454,
      "step": 210709
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4765725135803223,
      "learning_rate": 1.0750470687406498e-05,
      "loss": 2.9189,
      "step": 210710
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8982579708099365,
      "learning_rate": 1.0749385476013617e-05,
      "loss": 3.0955,
      "step": 210711
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.981306552886963,
      "learning_rate": 1.0748300318397918e-05,
      "loss": 2.9727,
      "step": 210712
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1414847373962402,
      "learning_rate": 1.07472152145596e-05,
      "loss": 2.9186,
      "step": 210713
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.5517122745513916,
      "learning_rate": 1.07461301644988e-05,
      "loss": 2.7597,
      "step": 210714
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.5983335971832275,
      "learning_rate": 1.0745045168215749e-05,
      "loss": 2.9167,
      "step": 210715
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.499145984649658,
      "learning_rate": 1.074396022571068e-05,
      "loss": 3.0323,
      "step": 210716
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2473580837249756,
      "learning_rate": 1.0742875336983725e-05,
      "loss": 2.845,
      "step": 210717
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.911412477493286,
      "learning_rate": 1.0741790502035153e-05,
      "loss": 3.0034,
      "step": 210718
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0542168617248535,
      "learning_rate": 1.0740705720865129e-05,
      "loss": 2.8986,
      "step": 210719
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.625155210494995,
      "learning_rate": 1.0739620993473918e-05,
      "loss": 2.9843,
      "step": 210720
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3393962383270264,
      "learning_rate": 1.0738536319861624e-05,
      "loss": 2.7877,
      "step": 210721
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8296432495117188,
      "learning_rate": 1.073745170002851e-05,
      "loss": 2.8544,
      "step": 210722
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9570021629333496,
      "learning_rate": 1.0736367133974743e-05,
      "loss": 2.9776,
      "step": 210723
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.529386520385742,
      "learning_rate": 1.0735282621700592e-05,
      "loss": 3.2032,
      "step": 210724
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.533568859100342,
      "learning_rate": 1.0734198163206153e-05,
      "loss": 2.8324,
      "step": 210725
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.380091667175293,
      "learning_rate": 1.0733113758491729e-05,
      "loss": 2.7751,
      "step": 210726
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.409620761871338,
      "learning_rate": 1.0732029407557486e-05,
      "loss": 2.9582,
      "step": 210727
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.536560535430908,
      "learning_rate": 1.0730945110403622e-05,
      "loss": 2.8277,
      "step": 210728
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0507333278656006,
      "learning_rate": 1.0729860867030271e-05,
      "loss": 3.0012,
      "step": 210729
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7585997581481934,
      "learning_rate": 1.0728776677437767e-05,
      "loss": 3.0055,
      "step": 210730
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.335784435272217,
      "learning_rate": 1.072769254162621e-05,
      "loss": 2.7405,
      "step": 210731
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7101593017578125,
      "learning_rate": 1.0726608459595832e-05,
      "loss": 2.8612,
      "step": 210732
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0332539081573486,
      "learning_rate": 1.0725524431346866e-05,
      "loss": 2.7386,
      "step": 210733
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.5722804069519043,
      "learning_rate": 1.072444045687948e-05,
      "loss": 2.8665,
      "step": 210734
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7038938999176025,
      "learning_rate": 1.0723356536193873e-05,
      "loss": 3.0034,
      "step": 210735
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.840332269668579,
      "learning_rate": 1.0722272669290277e-05,
      "loss": 2.9742,
      "step": 210736
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8320236206054688,
      "learning_rate": 1.0721188856168794e-05,
      "loss": 2.9177,
      "step": 210737
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.883007049560547,
      "learning_rate": 1.0720105096829789e-05,
      "loss": 2.887,
      "step": 210738
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7501258850097656,
      "learning_rate": 1.0719021391273364e-05,
      "loss": 2.83,
      "step": 210739
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2967324256896973,
      "learning_rate": 1.0717937739499683e-05,
      "loss": 3.1254,
      "step": 210740
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0057485103607178,
      "learning_rate": 1.0716854141509046e-05,
      "loss": 2.897,
      "step": 210741
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9678757190704346,
      "learning_rate": 1.0715770597301588e-05,
      "loss": 2.9848,
      "step": 210742
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.144039630889893,
      "learning_rate": 1.0714687106877506e-05,
      "loss": 2.8405,
      "step": 210743
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.883530855178833,
      "learning_rate": 1.071360367023707e-05,
      "loss": 2.7838,
      "step": 210744
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.9997291564941406,
      "learning_rate": 1.0712520287380443e-05,
      "loss": 2.7101,
      "step": 210745
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.893557548522949,
      "learning_rate": 1.0711436958307763e-05,
      "loss": 2.7333,
      "step": 210746
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.674381732940674,
      "learning_rate": 1.0710353683019323e-05,
      "loss": 2.7672,
      "step": 210747
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.8672597408294678,
      "learning_rate": 1.0709270461515263e-05,
      "loss": 3.1333,
      "step": 210748
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8695199489593506,
      "learning_rate": 1.0708187293795844e-05,
      "loss": 2.9959,
      "step": 210749
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.923232316970825,
      "learning_rate": 1.0707104179861236e-05,
      "loss": 2.7906,
      "step": 210750
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.008594274520874,
      "learning_rate": 1.0706021119711639e-05,
      "loss": 2.8879,
      "step": 210751
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.961824893951416,
      "learning_rate": 1.0704938113347217e-05,
      "loss": 2.9372,
      "step": 210752
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.093932867050171,
      "learning_rate": 1.0703855160768237e-05,
      "loss": 2.8757,
      "step": 210753
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.382981777191162,
      "learning_rate": 1.0702772261974834e-05,
      "loss": 3.0591,
      "step": 210754
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0192997455596924,
      "learning_rate": 1.0701689416967274e-05,
      "loss": 2.7887,
      "step": 210755
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.820298910140991,
      "learning_rate": 1.0700606625745756e-05,
      "loss": 2.9199,
      "step": 210756
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1154303550720215,
      "learning_rate": 1.0699523888310446e-05,
      "loss": 2.9345,
      "step": 210757
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.950392723083496,
      "learning_rate": 1.0698441204661511e-05,
      "loss": 2.8981,
      "step": 210758
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.7894654273986816,
      "learning_rate": 1.069735857479922e-05,
      "loss": 3.1864,
      "step": 210759
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1656558513641357,
      "learning_rate": 1.0696275998723735e-05,
      "loss": 3.3013,
      "step": 210760
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.91791033744812,
      "learning_rate": 1.0695193476435326e-05,
      "loss": 3.0358,
      "step": 210761
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7782716751098633,
      "learning_rate": 1.0694111007934125e-05,
      "loss": 2.8561,
      "step": 210762
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.774667978286743,
      "learning_rate": 1.0693028593220332e-05,
      "loss": 3.0522,
      "step": 210763
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.90091609954834,
      "learning_rate": 1.0691946232294147e-05,
      "loss": 2.8138,
      "step": 210764
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0148754119873047,
      "learning_rate": 1.0690863925155834e-05,
      "loss": 2.8305,
      "step": 210765
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.477362871170044,
      "learning_rate": 1.0689781671805497e-05,
      "loss": 2.9582,
      "step": 210766
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.878572940826416,
      "learning_rate": 1.0688699472243433e-05,
      "loss": 2.8039,
      "step": 210767
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6459269523620605,
      "learning_rate": 1.068761732646981e-05,
      "loss": 2.8666,
      "step": 210768
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.672407865524292,
      "learning_rate": 1.0686535234484828e-05,
      "loss": 3.0018,
      "step": 210769
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6669230461120605,
      "learning_rate": 1.0685453196288618e-05,
      "loss": 2.7964,
      "step": 210770
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.945574998855591,
      "learning_rate": 1.0684371211881481e-05,
      "loss": 2.8669,
      "step": 210771
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.574023485183716,
      "learning_rate": 1.0683289281263585e-05,
      "loss": 2.8361,
      "step": 210772
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.411684274673462,
      "learning_rate": 1.0682207404435129e-05,
      "loss": 2.8838,
      "step": 210773
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2506039142608643,
      "learning_rate": 1.068112558139631e-05,
      "loss": 2.8406,
      "step": 210774
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.7407565116882324,
      "learning_rate": 1.0680043812147332e-05,
      "loss": 3.0108,
      "step": 210775
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.146814823150635,
      "learning_rate": 1.0678962096688359e-05,
      "loss": 2.8969,
      "step": 210776
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4316232204437256,
      "learning_rate": 1.0677880435019693e-05,
      "loss": 3.0395,
      "step": 210777
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.092121124267578,
      "learning_rate": 1.0676798827141397e-05,
      "loss": 2.9543,
      "step": 210778
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6908981800079346,
      "learning_rate": 1.0675717273053808e-05,
      "loss": 2.9834,
      "step": 210779
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.863267660140991,
      "learning_rate": 1.0674635772757057e-05,
      "loss": 2.8795,
      "step": 210780
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.119161605834961,
      "learning_rate": 1.0673554326251343e-05,
      "loss": 3.0703,
      "step": 210781
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.5759239196777344,
      "learning_rate": 1.0672472933536836e-05,
      "loss": 2.9536,
      "step": 210782
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.699418306350708,
      "learning_rate": 1.0671391594613832e-05,
      "loss": 2.8261,
      "step": 210783
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4335758686065674,
      "learning_rate": 1.0670310309482433e-05,
      "loss": 2.8657,
      "step": 210784
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8369171619415283,
      "learning_rate": 1.0669229078142905e-05,
      "loss": 2.8213,
      "step": 210785
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7823379039764404,
      "learning_rate": 1.0668147900595413e-05,
      "loss": 3.0622,
      "step": 210786
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.77631950378418,
      "learning_rate": 1.0667066776840227e-05,
      "loss": 3.0015,
      "step": 210787
    },
    {
      "epoch": 2.74,
      "grad_norm": 4.105210781097412,
      "learning_rate": 1.0665985706877444e-05,
      "loss": 2.8933,
      "step": 210788
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.959333896636963,
      "learning_rate": 1.0664904690707365e-05,
      "loss": 2.9145,
      "step": 210789
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0024125576019287,
      "learning_rate": 1.066382372833009e-05,
      "loss": 2.9742,
      "step": 210790
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0807597637176514,
      "learning_rate": 1.0662742819745885e-05,
      "loss": 2.829,
      "step": 210791
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7475016117095947,
      "learning_rate": 1.0661661964954916e-05,
      "loss": 2.8133,
      "step": 210792
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.7638187408447266,
      "learning_rate": 1.0660581163957483e-05,
      "loss": 2.9194,
      "step": 210793
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6284022331237793,
      "learning_rate": 1.0659500416753653e-05,
      "loss": 2.6744,
      "step": 210794
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.1910629272460938,
      "learning_rate": 1.0658419723343692e-05,
      "loss": 3.0481,
      "step": 210795
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.6729893684387207,
      "learning_rate": 1.0657339083727767e-05,
      "loss": 3.1342,
      "step": 210796
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.8173904418945312,
      "learning_rate": 1.0656258497906144e-05,
      "loss": 2.8328,
      "step": 210797
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.080334424972534,
      "learning_rate": 1.0655177965878959e-05,
      "loss": 2.9909,
      "step": 210798
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.381213903427124,
      "learning_rate": 1.0654097487646507e-05,
      "loss": 2.9111,
      "step": 210799
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.633988857269287,
      "learning_rate": 1.0653017063208823e-05,
      "loss": 2.9782,
      "step": 210800
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.81231951713562,
      "learning_rate": 1.0651936692566243e-05,
      "loss": 2.9761,
      "step": 210801
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.6147806644439697,
      "learning_rate": 1.0650856375718931e-05,
      "loss": 3.0736,
      "step": 210802
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.932084321975708,
      "learning_rate": 1.0649776112667085e-05,
      "loss": 2.7725,
      "step": 210803
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.0334973335266113,
      "learning_rate": 1.064869590341091e-05,
      "loss": 2.7848,
      "step": 210804
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3377671241760254,
      "learning_rate": 1.0647615747950634e-05,
      "loss": 2.9116,
      "step": 210805
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.233254909515381,
      "learning_rate": 1.0646535646286359e-05,
      "loss": 3.0009,
      "step": 210806
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.986872673034668,
      "learning_rate": 1.0645455598418418e-05,
      "loss": 2.8885,
      "step": 210807
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.2813425064086914,
      "learning_rate": 1.0644375604346911e-05,
      "loss": 2.8939,
      "step": 210808
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.296934127807617,
      "learning_rate": 1.0643295664072104e-05,
      "loss": 3.0608,
      "step": 210809
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.8804726600646973,
      "learning_rate": 1.0642215777594132e-05,
      "loss": 3.1441,
      "step": 210810
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.913722515106201,
      "learning_rate": 1.0641135944913293e-05,
      "loss": 3.0768,
      "step": 210811
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.58396315574646,
      "learning_rate": 1.0640056166029687e-05,
      "loss": 3.1195,
      "step": 210812
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.673572540283203,
      "learning_rate": 1.063897644094358e-05,
      "loss": 3.002,
      "step": 210813
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.778082847595215,
      "learning_rate": 1.0637896769655108e-05,
      "loss": 2.8664,
      "step": 210814
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.4838788509368896,
      "learning_rate": 1.0636817152164534e-05,
      "loss": 2.8662,
      "step": 210815
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3744077682495117,
      "learning_rate": 1.0635737588472027e-05,
      "loss": 2.6534,
      "step": 210816
    },
    {
      "epoch": 2.74,
      "grad_norm": 2.835177183151245,
      "learning_rate": 1.0634658078577819e-05,
      "loss": 2.7136,
      "step": 210817
    },
    {
      "epoch": 2.74,
      "grad_norm": 3.3142662048339844,
      "learning_rate": 1.0633578622482108e-05,
      "loss": 2.9177,
      "step": 210818
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9839236736297607,
      "learning_rate": 1.0632499220185065e-05,
      "loss": 2.891,
      "step": 210819
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.975703477859497,
      "learning_rate": 1.0631419871686852e-05,
      "loss": 3.0306,
      "step": 210820
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9471611976623535,
      "learning_rate": 1.0630340576987773e-05,
      "loss": 3.014,
      "step": 210821
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3162574768066406,
      "learning_rate": 1.0629261336087957e-05,
      "loss": 2.669,
      "step": 210822
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6562130451202393,
      "learning_rate": 1.062818214898764e-05,
      "loss": 3.0199,
      "step": 210823
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2247660160064697,
      "learning_rate": 1.0627103015687022e-05,
      "loss": 2.871,
      "step": 210824
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0472891330718994,
      "learning_rate": 1.0626023936186234e-05,
      "loss": 2.8894,
      "step": 210825
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5245866775512695,
      "learning_rate": 1.0624944910485577e-05,
      "loss": 2.7805,
      "step": 210826
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.84911847114563,
      "learning_rate": 1.0623865938585185e-05,
      "loss": 2.8561,
      "step": 210827
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.498304843902588,
      "learning_rate": 1.0622787020485257e-05,
      "loss": 3.2279,
      "step": 210828
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.246580123901367,
      "learning_rate": 1.0621708156186059e-05,
      "loss": 2.9012,
      "step": 210829
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.015756607055664,
      "learning_rate": 1.0620629345687726e-05,
      "loss": 2.8119,
      "step": 210830
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7431323528289795,
      "learning_rate": 1.0619550588990456e-05,
      "loss": 2.9035,
      "step": 210831
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.70773983001709,
      "learning_rate": 1.0618471886094514e-05,
      "loss": 2.8283,
      "step": 210832
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.769779920578003,
      "learning_rate": 1.0617393237000071e-05,
      "loss": 2.9953,
      "step": 210833
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.712186336517334,
      "learning_rate": 1.0616314641707257e-05,
      "loss": 2.8198,
      "step": 210834
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7096786499023438,
      "learning_rate": 1.0615236100216373e-05,
      "loss": 2.9006,
      "step": 210835
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.125018358230591,
      "learning_rate": 1.0614157612527585e-05,
      "loss": 2.8519,
      "step": 210836
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.982818126678467,
      "learning_rate": 1.0613079178641059e-05,
      "loss": 2.8373,
      "step": 210837
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0427896976470947,
      "learning_rate": 1.0612000798557063e-05,
      "loss": 2.7511,
      "step": 210838
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.855343818664551,
      "learning_rate": 1.061092247227573e-05,
      "loss": 3.1769,
      "step": 210839
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.269927978515625,
      "learning_rate": 1.0609844199797291e-05,
      "loss": 3.0674,
      "step": 210840
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9238836765289307,
      "learning_rate": 1.0608765981121981e-05,
      "loss": 2.8676,
      "step": 210841
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.390497922897339,
      "learning_rate": 1.0607687816249933e-05,
      "loss": 2.8646,
      "step": 210842
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.127413272857666,
      "learning_rate": 1.0606609705181347e-05,
      "loss": 2.9997,
      "step": 210843
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8805770874023438,
      "learning_rate": 1.0605531647916521e-05,
      "loss": 2.8178,
      "step": 210844
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.8875181674957275,
      "learning_rate": 1.0604453644455524e-05,
      "loss": 2.8352,
      "step": 210845
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7048633098602295,
      "learning_rate": 1.0603375694798656e-05,
      "loss": 2.8642,
      "step": 210846
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2060813903808594,
      "learning_rate": 1.0602297798946113e-05,
      "loss": 3.0874,
      "step": 210847
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.800093173980713,
      "learning_rate": 1.0601219956898033e-05,
      "loss": 3.0306,
      "step": 210848
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.327798366546631,
      "learning_rate": 1.0600142168654612e-05,
      "loss": 3.0564,
      "step": 210849
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.7496185302734375,
      "learning_rate": 1.0599064434216153e-05,
      "loss": 2.901,
      "step": 210850
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6547648906707764,
      "learning_rate": 1.0597986753582754e-05,
      "loss": 3.1516,
      "step": 210851
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8571414947509766,
      "learning_rate": 1.0596909126754648e-05,
      "loss": 3.0184,
      "step": 210852
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8679275512695312,
      "learning_rate": 1.0595831553732037e-05,
      "loss": 3.0162,
      "step": 210853
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.264155149459839,
      "learning_rate": 1.0594754034515185e-05,
      "loss": 2.8803,
      "step": 210854
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.6400179862976074,
      "learning_rate": 1.0593676569104159e-05,
      "loss": 2.8781,
      "step": 210855
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5704691410064697,
      "learning_rate": 1.0592599157499293e-05,
      "loss": 3.0219,
      "step": 210856
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.981375217437744,
      "learning_rate": 1.0591521799700652e-05,
      "loss": 2.8777,
      "step": 210857
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.935127019882202,
      "learning_rate": 1.0590444495708571e-05,
      "loss": 2.8688,
      "step": 210858
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.0481343269348145,
      "learning_rate": 1.058936724552315e-05,
      "loss": 2.7181,
      "step": 210859
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7280280590057373,
      "learning_rate": 1.0588290049144688e-05,
      "loss": 2.857,
      "step": 210860
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.763702630996704,
      "learning_rate": 1.058721290657325e-05,
      "loss": 2.8923,
      "step": 210861
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8657872676849365,
      "learning_rate": 1.0586135817809171e-05,
      "loss": 2.84,
      "step": 210862
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7400596141815186,
      "learning_rate": 1.0585058782852519e-05,
      "loss": 3.0297,
      "step": 210863
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.37799072265625,
      "learning_rate": 1.0583981801703656e-05,
      "loss": 2.8629,
      "step": 210864
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5828187465667725,
      "learning_rate": 1.0582904874362619e-05,
      "loss": 2.7533,
      "step": 210865
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.172959566116333,
      "learning_rate": 1.0581828000829773e-05,
      "loss": 2.836,
      "step": 210866
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2839534282684326,
      "learning_rate": 1.0580751181105151e-05,
      "loss": 2.8618,
      "step": 210867
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.6648497581481934,
      "learning_rate": 1.0579674415189054e-05,
      "loss": 2.963,
      "step": 210868
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.778754711151123,
      "learning_rate": 1.0578597703081649e-05,
      "loss": 3.0987,
      "step": 210869
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3865714073181152,
      "learning_rate": 1.0577521044783166e-05,
      "loss": 2.7777,
      "step": 210870
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6522510051727295,
      "learning_rate": 1.0576444440293741e-05,
      "loss": 3.0346,
      "step": 210871
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3862884044647217,
      "learning_rate": 1.0575367889613707e-05,
      "loss": 2.9169,
      "step": 210872
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5475642681121826,
      "learning_rate": 1.0574291392743094e-05,
      "loss": 3.1091,
      "step": 210873
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.450819969177246,
      "learning_rate": 1.0573214949682208e-05,
      "loss": 3.1468,
      "step": 210874
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.526731014251709,
      "learning_rate": 1.0572138560431208e-05,
      "loss": 2.9009,
      "step": 210875
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.801485300064087,
      "learning_rate": 1.0571062224990334e-05,
      "loss": 2.8445,
      "step": 210876
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8370635509490967,
      "learning_rate": 1.056998594335975e-05,
      "loss": 2.8822,
      "step": 210877
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9906723499298096,
      "learning_rate": 1.056890971553972e-05,
      "loss": 3.0643,
      "step": 210878
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5916383266448975,
      "learning_rate": 1.0567833541530312e-05,
      "loss": 3.101,
      "step": 210879
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.85422945022583,
      "learning_rate": 1.0566757421331861e-05,
      "loss": 2.7878,
      "step": 210880
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.264900207519531,
      "learning_rate": 1.05656813549445e-05,
      "loss": 2.9925,
      "step": 210881
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.354823589324951,
      "learning_rate": 1.0564605342368426e-05,
      "loss": 2.9256,
      "step": 210882
    },
    {
      "epoch": 2.75,
      "grad_norm": 5.03398323059082,
      "learning_rate": 1.0563529383603841e-05,
      "loss": 2.8705,
      "step": 210883
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.836615562438965,
      "learning_rate": 1.0562453478651045e-05,
      "loss": 2.6521,
      "step": 210884
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.916210889816284,
      "learning_rate": 1.056137762751007e-05,
      "loss": 2.747,
      "step": 210885
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.651451826095581,
      "learning_rate": 1.0560301830181251e-05,
      "loss": 2.9646,
      "step": 210886
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.480724573135376,
      "learning_rate": 1.0559226086664684e-05,
      "loss": 3.0357,
      "step": 210887
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.040677309036255,
      "learning_rate": 1.055815039696064e-05,
      "loss": 3.1043,
      "step": 210888
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.950932264328003,
      "learning_rate": 1.0557074761069284e-05,
      "loss": 2.9231,
      "step": 210889
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.620617628097534,
      "learning_rate": 1.0555999178990915e-05,
      "loss": 2.8213,
      "step": 210890
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1408932209014893,
      "learning_rate": 1.0554923650725566e-05,
      "loss": 2.7163,
      "step": 210891
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6857759952545166,
      "learning_rate": 1.0553848176273538e-05,
      "loss": 2.7691,
      "step": 210892
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.844923496246338,
      "learning_rate": 1.0552772755634997e-05,
      "loss": 2.7506,
      "step": 210893
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.8376305103302,
      "learning_rate": 1.0551697388810176e-05,
      "loss": 2.8385,
      "step": 210894
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.769732713699341,
      "learning_rate": 1.0550622075799242e-05,
      "loss": 2.9023,
      "step": 210895
    },
    {
      "epoch": 2.75,
      "grad_norm": 5.574662685394287,
      "learning_rate": 1.0549546816602494e-05,
      "loss": 2.8577,
      "step": 210896
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0706658363342285,
      "learning_rate": 1.0548471611219933e-05,
      "loss": 3.0499,
      "step": 210897
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2087676525115967,
      "learning_rate": 1.0547396459651957e-05,
      "loss": 2.975,
      "step": 210898
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.667313814163208,
      "learning_rate": 1.0546321361898635e-05,
      "loss": 2.9532,
      "step": 210899
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.299463748931885,
      "learning_rate": 1.0545246317960232e-05,
      "loss": 2.9625,
      "step": 210900
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.746635675430298,
      "learning_rate": 1.0544171327836915e-05,
      "loss": 2.9489,
      "step": 210901
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8496949672698975,
      "learning_rate": 1.0543096391528916e-05,
      "loss": 3.0208,
      "step": 210902
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.4470832347869873,
      "learning_rate": 1.0542021509036436e-05,
      "loss": 2.9989,
      "step": 210903
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.758028984069824,
      "learning_rate": 1.0540946680359674e-05,
      "loss": 2.709,
      "step": 210904
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7332515716552734,
      "learning_rate": 1.0539871905498764e-05,
      "loss": 2.8062,
      "step": 210905
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.959359884262085,
      "learning_rate": 1.0538797184453974e-05,
      "loss": 3.0364,
      "step": 210906
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9046664237976074,
      "learning_rate": 1.0537722517225466e-05,
      "loss": 2.9521,
      "step": 210907
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.007267475128174,
      "learning_rate": 1.0536647903813511e-05,
      "loss": 2.7771,
      "step": 210908
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.815574884414673,
      "learning_rate": 1.0535573344218239e-05,
      "loss": 2.8461,
      "step": 210909
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5634031295776367,
      "learning_rate": 1.053449883843982e-05,
      "loss": 3.2032,
      "step": 210910
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.135275363922119,
      "learning_rate": 1.053342438647855e-05,
      "loss": 2.8354,
      "step": 210911
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0860490798950195,
      "learning_rate": 1.0532349988334599e-05,
      "loss": 2.8309,
      "step": 210912
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3033335208892822,
      "learning_rate": 1.0531275644008064e-05,
      "loss": 2.733,
      "step": 210913
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9668400287628174,
      "learning_rate": 1.0530201353499313e-05,
      "loss": 2.9665,
      "step": 210914
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.195746898651123,
      "learning_rate": 1.0529127116808445e-05,
      "loss": 2.8851,
      "step": 210915
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.144319772720337,
      "learning_rate": 1.0528052933935626e-05,
      "loss": 2.7729,
      "step": 210916
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0726263523101807,
      "learning_rate": 1.0526978804881158e-05,
      "loss": 3.219,
      "step": 210917
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.72928786277771,
      "learning_rate": 1.0525904729645207e-05,
      "loss": 3.0948,
      "step": 210918
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8145222663879395,
      "learning_rate": 1.0524830708227871e-05,
      "loss": 2.9857,
      "step": 210919
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5491702556610107,
      "learning_rate": 1.052375674062952e-05,
      "loss": 2.9487,
      "step": 210920
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.890770196914673,
      "learning_rate": 1.0522682826850215e-05,
      "loss": 2.6218,
      "step": 210921
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.2777910232543945,
      "learning_rate": 1.0521608966890226e-05,
      "loss": 2.9045,
      "step": 210922
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1351940631866455,
      "learning_rate": 1.052053516074972e-05,
      "loss": 2.8741,
      "step": 210923
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3641700744628906,
      "learning_rate": 1.0519461408428897e-05,
      "loss": 2.8955,
      "step": 210924
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1503067016601562,
      "learning_rate": 1.0518387709928022e-05,
      "loss": 2.8603,
      "step": 210925
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.052302837371826,
      "learning_rate": 1.051731406524723e-05,
      "loss": 3.0006,
      "step": 210926
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.602444648742676,
      "learning_rate": 1.051624047438675e-05,
      "loss": 2.9229,
      "step": 210927
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2330193519592285,
      "learning_rate": 1.0515166937346686e-05,
      "loss": 2.8937,
      "step": 210928
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.978647470474243,
      "learning_rate": 1.0514093454127403e-05,
      "loss": 2.9244,
      "step": 210929
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.697962760925293,
      "learning_rate": 1.0513020024728936e-05,
      "loss": 2.9199,
      "step": 210930
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.44059681892395,
      "learning_rate": 1.0511946649151615e-05,
      "loss": 2.6735,
      "step": 210931
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5678939819335938,
      "learning_rate": 1.0510873327395608e-05,
      "loss": 2.8201,
      "step": 210932
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9284825325012207,
      "learning_rate": 1.0509800059461082e-05,
      "loss": 2.7527,
      "step": 210933
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9541618824005127,
      "learning_rate": 1.0508726845348203e-05,
      "loss": 2.8877,
      "step": 210934
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5164332389831543,
      "learning_rate": 1.0507653685057238e-05,
      "loss": 2.9,
      "step": 210935
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7518463134765625,
      "learning_rate": 1.0506580578588352e-05,
      "loss": 3.2313,
      "step": 210936
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8796591758728027,
      "learning_rate": 1.050550752594178e-05,
      "loss": 2.9786,
      "step": 210937
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5272128582000732,
      "learning_rate": 1.0504434527117689e-05,
      "loss": 2.9951,
      "step": 210938
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7790424823760986,
      "learning_rate": 1.0503361582116342e-05,
      "loss": 3.06,
      "step": 210939
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.025517463684082,
      "learning_rate": 1.050228869093781e-05,
      "loss": 3.2196,
      "step": 210940
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.790241003036499,
      "learning_rate": 1.050121585358239e-05,
      "loss": 2.8892,
      "step": 210941
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.899319887161255,
      "learning_rate": 1.0500143070050249e-05,
      "loss": 3.0153,
      "step": 210942
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5512826442718506,
      "learning_rate": 1.049907034034162e-05,
      "loss": 3.013,
      "step": 210943
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0329675674438477,
      "learning_rate": 1.049799766445667e-05,
      "loss": 2.6382,
      "step": 210944
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1239609718322754,
      "learning_rate": 1.0496925042395632e-05,
      "loss": 2.6374,
      "step": 210945
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7033915519714355,
      "learning_rate": 1.049585247415864e-05,
      "loss": 2.8807,
      "step": 210946
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8726019859313965,
      "learning_rate": 1.0494779959745959e-05,
      "loss": 3.0731,
      "step": 210947
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.4472103118896484,
      "learning_rate": 1.0493707499157722e-05,
      "loss": 2.8821,
      "step": 210948
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8118748664855957,
      "learning_rate": 1.0492635092394231e-05,
      "loss": 2.9573,
      "step": 210949
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.430664539337158,
      "learning_rate": 1.049156273945555e-05,
      "loss": 3.0686,
      "step": 210950
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.858572006225586,
      "learning_rate": 1.0490490440342048e-05,
      "loss": 2.959,
      "step": 210951
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2338292598724365,
      "learning_rate": 1.0489418195053755e-05,
      "loss": 2.9567,
      "step": 210952
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.391713857650757,
      "learning_rate": 1.0488346003590974e-05,
      "loss": 3.0907,
      "step": 210953
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.074634552001953,
      "learning_rate": 1.0487273865953839e-05,
      "loss": 2.8797,
      "step": 210954
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2013919353485107,
      "learning_rate": 1.0486201782142645e-05,
      "loss": 3.0402,
      "step": 210955
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.932119846343994,
      "learning_rate": 1.0485129752157461e-05,
      "loss": 2.881,
      "step": 210956
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.6976826190948486,
      "learning_rate": 1.0484057775998622e-05,
      "loss": 3.1016,
      "step": 210957
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.4107468128204346,
      "learning_rate": 1.0482985853666227e-05,
      "loss": 2.9878,
      "step": 210958
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6735613346099854,
      "learning_rate": 1.0481913985160539e-05,
      "loss": 2.8309,
      "step": 210959
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1907918453216553,
      "learning_rate": 1.0480842170481662e-05,
      "loss": 2.7257,
      "step": 210960
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.212280511856079,
      "learning_rate": 1.0479770409629928e-05,
      "loss": 2.6038,
      "step": 210961
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0863475799560547,
      "learning_rate": 1.0478698702605403e-05,
      "loss": 2.9476,
      "step": 210962
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5584537982940674,
      "learning_rate": 1.0477627049408455e-05,
      "loss": 3.0164,
      "step": 210963
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7989184856414795,
      "learning_rate": 1.0476555450039114e-05,
      "loss": 3.0242,
      "step": 210964
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.076582431793213,
      "learning_rate": 1.047548390449765e-05,
      "loss": 2.7511,
      "step": 210965
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.6899030208587646,
      "learning_rate": 1.0474412412784262e-05,
      "loss": 2.7211,
      "step": 210966
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0135138034820557,
      "learning_rate": 1.0473340974899147e-05,
      "loss": 2.799,
      "step": 210967
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.376920461654663,
      "learning_rate": 1.0472269590842476e-05,
      "loss": 2.9442,
      "step": 210968
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.096247673034668,
      "learning_rate": 1.0471198260614543e-05,
      "loss": 3.0813,
      "step": 210969
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3461856842041016,
      "learning_rate": 1.0470126984215421e-05,
      "loss": 2.7975,
      "step": 210970
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7344729900360107,
      "learning_rate": 1.0469055761645405e-05,
      "loss": 2.9027,
      "step": 210971
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9169485569000244,
      "learning_rate": 1.04679845929046e-05,
      "loss": 3.0662,
      "step": 210972
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.657244920730591,
      "learning_rate": 1.0466913477993334e-05,
      "loss": 3.201,
      "step": 210973
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2340166568756104,
      "learning_rate": 1.0465842416911674e-05,
      "loss": 3.1433,
      "step": 210974
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0976147651672363,
      "learning_rate": 1.0464771409659956e-05,
      "loss": 2.827,
      "step": 210975
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.92669939994812,
      "learning_rate": 1.0463700456238211e-05,
      "loss": 2.7767,
      "step": 210976
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1170012950897217,
      "learning_rate": 1.0462629556646807e-05,
      "loss": 3.0579,
      "step": 210977
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5176382064819336,
      "learning_rate": 1.0461558710885776e-05,
      "loss": 2.9797,
      "step": 210978
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.261390686035156,
      "learning_rate": 1.0460487918955484e-05,
      "loss": 3.074,
      "step": 210979
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.943621873855591,
      "learning_rate": 1.0459417180856e-05,
      "loss": 2.8778,
      "step": 210980
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2776408195495605,
      "learning_rate": 1.0458346496587655e-05,
      "loss": 2.9612,
      "step": 210981
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.699827194213867,
      "learning_rate": 1.0457275866150483e-05,
      "loss": 2.847,
      "step": 210982
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.4835288524627686,
      "learning_rate": 1.0456205289544817e-05,
      "loss": 2.9346,
      "step": 210983
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.3852016925811768,
      "learning_rate": 1.045513476677079e-05,
      "loss": 2.865,
      "step": 210984
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5719337463378906,
      "learning_rate": 1.0454064297828635e-05,
      "loss": 2.9921,
      "step": 210985
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9876811504364014,
      "learning_rate": 1.0452993882718485e-05,
      "loss": 2.7511,
      "step": 210986
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.458592176437378,
      "learning_rate": 1.0451923521440642e-05,
      "loss": 2.9821,
      "step": 210987
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.756736993789673,
      "learning_rate": 1.045085321399527e-05,
      "loss": 2.9554,
      "step": 210988
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8734724521636963,
      "learning_rate": 1.0449782960382536e-05,
      "loss": 3.0164,
      "step": 210989
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7332777976989746,
      "learning_rate": 1.0448712760602606e-05,
      "loss": 2.997,
      "step": 210990
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.078063726425171,
      "learning_rate": 1.044764261465575e-05,
      "loss": 2.7451,
      "step": 210991
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.145718574523926,
      "learning_rate": 1.044657252254213e-05,
      "loss": 2.9559,
      "step": 210992
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8972115516662598,
      "learning_rate": 1.044550248426198e-05,
      "loss": 2.74,
      "step": 210993
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0814898014068604,
      "learning_rate": 1.0444432499815503e-05,
      "loss": 2.8901,
      "step": 210994
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.257929801940918,
      "learning_rate": 1.0443362569202863e-05,
      "loss": 3.2361,
      "step": 210995
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9128613471984863,
      "learning_rate": 1.0442292692424226e-05,
      "loss": 2.6927,
      "step": 210996
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7797751426696777,
      "learning_rate": 1.044122286947986e-05,
      "loss": 2.7506,
      "step": 210997
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0164859294891357,
      "learning_rate": 1.0440153100369896e-05,
      "loss": 2.915,
      "step": 210998
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.122666597366333,
      "learning_rate": 1.0439083385094637e-05,
      "loss": 2.8545,
      "step": 210999
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6309375762939453,
      "learning_rate": 1.0438013723654182e-05,
      "loss": 3.1253,
      "step": 211000
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2805778980255127,
      "learning_rate": 1.0436944116048762e-05,
      "loss": 2.8734,
      "step": 211001
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8248181343078613,
      "learning_rate": 1.0435874562278578e-05,
      "loss": 2.9633,
      "step": 211002
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1447434425354004,
      "learning_rate": 1.0434805062343865e-05,
      "loss": 3.077,
      "step": 211003
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.548731803894043,
      "learning_rate": 1.0433735616244754e-05,
      "loss": 2.7314,
      "step": 211004
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.03399395942688,
      "learning_rate": 1.0432666223981511e-05,
      "loss": 3.1152,
      "step": 211005
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8730804920196533,
      "learning_rate": 1.0431596885554272e-05,
      "loss": 2.9023,
      "step": 211006
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5704617500305176,
      "learning_rate": 1.0430527600963268e-05,
      "loss": 2.9057,
      "step": 211007
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.266426086425781,
      "learning_rate": 1.0429458370208698e-05,
      "loss": 2.6179,
      "step": 211008
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2823853492736816,
      "learning_rate": 1.0428389193290731e-05,
      "loss": 2.8557,
      "step": 211009
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.071152687072754,
      "learning_rate": 1.0427320070209633e-05,
      "loss": 2.9299,
      "step": 211010
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3974082469940186,
      "learning_rate": 1.0426251000965568e-05,
      "loss": 2.915,
      "step": 211011
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.424530029296875,
      "learning_rate": 1.042518198555874e-05,
      "loss": 3.0537,
      "step": 211012
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.161063194274902,
      "learning_rate": 1.0424113023989277e-05,
      "loss": 2.7563,
      "step": 211013
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.046921730041504,
      "learning_rate": 1.0423044116257485e-05,
      "loss": 2.9581,
      "step": 211014
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.313530206680298,
      "learning_rate": 1.0421975262363457e-05,
      "loss": 2.9375,
      "step": 211015
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0357401371002197,
      "learning_rate": 1.0420906462307532e-05,
      "loss": 2.9126,
      "step": 211016
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.174254894256592,
      "learning_rate": 1.0419837716089774e-05,
      "loss": 2.9812,
      "step": 211017
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.423401355743408,
      "learning_rate": 1.0418769023710483e-05,
      "loss": 2.8809,
      "step": 211018
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.517929553985596,
      "learning_rate": 1.0417700385169758e-05,
      "loss": 2.9151,
      "step": 211019
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.87515115737915,
      "learning_rate": 1.0416631800467868e-05,
      "loss": 2.8688,
      "step": 211020
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.049389123916626,
      "learning_rate": 1.0415563269604944e-05,
      "loss": 2.8975,
      "step": 211021
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6889116764068604,
      "learning_rate": 1.0414494792581318e-05,
      "loss": 2.7815,
      "step": 211022
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.125176191329956,
      "learning_rate": 1.0413426369397027e-05,
      "loss": 2.8784,
      "step": 211023
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.711984395980835,
      "learning_rate": 1.0412358000052434e-05,
      "loss": 3.0006,
      "step": 211024
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.488387107849121,
      "learning_rate": 1.0411289684547574e-05,
      "loss": 2.9647,
      "step": 211025
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.899960517883301,
      "learning_rate": 1.0410221422882747e-05,
      "loss": 2.8427,
      "step": 211026
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.301514625549316,
      "learning_rate": 1.040915321505812e-05,
      "loss": 2.8704,
      "step": 211027
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.8098504543304443,
      "learning_rate": 1.0408085061073923e-05,
      "loss": 2.8001,
      "step": 211028
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.6284050941467285,
      "learning_rate": 1.0407016960930292e-05,
      "loss": 2.7917,
      "step": 211029
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.355191469192505,
      "learning_rate": 1.0405948914627526e-05,
      "loss": 2.8476,
      "step": 211030
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0332882404327393,
      "learning_rate": 1.0404880922165692e-05,
      "loss": 2.8883,
      "step": 211031
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.916895627975464,
      "learning_rate": 1.040381298354509e-05,
      "loss": 2.9341,
      "step": 211032
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8231894969940186,
      "learning_rate": 1.0402745098765852e-05,
      "loss": 3.0291,
      "step": 211033
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0237789154052734,
      "learning_rate": 1.0401677267828246e-05,
      "loss": 2.721,
      "step": 211034
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1246631145477295,
      "learning_rate": 1.0400609490732403e-05,
      "loss": 3.1325,
      "step": 211035
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.994384527206421,
      "learning_rate": 1.0399541767478625e-05,
      "loss": 2.9063,
      "step": 211036
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.070632219314575,
      "learning_rate": 1.0398474098066945e-05,
      "loss": 3.2071,
      "step": 211037
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3117129802703857,
      "learning_rate": 1.0397406482497727e-05,
      "loss": 2.9084,
      "step": 211038
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.922586679458618,
      "learning_rate": 1.0396338920771041e-05,
      "loss": 2.921,
      "step": 211039
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3108503818511963,
      "learning_rate": 1.0395271412887185e-05,
      "loss": 2.8855,
      "step": 211040
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.760671138763428,
      "learning_rate": 1.0394203958846291e-05,
      "loss": 2.9297,
      "step": 211041
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.992335796356201,
      "learning_rate": 1.039313655864863e-05,
      "loss": 3.0022,
      "step": 211042
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5532500743865967,
      "learning_rate": 1.0392069212294295e-05,
      "loss": 2.8154,
      "step": 211043
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5806009769439697,
      "learning_rate": 1.0391001919783559e-05,
      "loss": 2.98,
      "step": 211044
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.714081287384033,
      "learning_rate": 1.0389934681116586e-05,
      "loss": 2.9317,
      "step": 211045
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.8785359859466553,
      "learning_rate": 1.0388867496293607e-05,
      "loss": 2.7848,
      "step": 211046
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.902616262435913,
      "learning_rate": 1.038780036531479e-05,
      "loss": 3.0182,
      "step": 211047
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.835364818572998,
      "learning_rate": 1.0386733288180404e-05,
      "loss": 3.0986,
      "step": 211048
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3625872135162354,
      "learning_rate": 1.0385666264890513e-05,
      "loss": 2.7579,
      "step": 211049
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7551331520080566,
      "learning_rate": 1.038459929544545e-05,
      "loss": 3.014,
      "step": 211050
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.4447970390319824,
      "learning_rate": 1.0383532379845316e-05,
      "loss": 2.9861,
      "step": 211051
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7071707248687744,
      "learning_rate": 1.0382465518090377e-05,
      "loss": 3.1523,
      "step": 211052
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8441321849823,
      "learning_rate": 1.03813987101808e-05,
      "loss": 2.895,
      "step": 211053
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.059516429901123,
      "learning_rate": 1.0380331956116784e-05,
      "loss": 2.7789,
      "step": 211054
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5637524127960205,
      "learning_rate": 1.0379265255898562e-05,
      "loss": 2.8727,
      "step": 211055
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.829331159591675,
      "learning_rate": 1.0378198609526267e-05,
      "loss": 3.0585,
      "step": 211056
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0877597332000732,
      "learning_rate": 1.0377132017000134e-05,
      "loss": 3.3238,
      "step": 211057
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.755169630050659,
      "learning_rate": 1.037606547832036e-05,
      "loss": 2.9239,
      "step": 211058
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.6027297973632812,
      "learning_rate": 1.0374998993487117e-05,
      "loss": 2.9029,
      "step": 211059
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3633880615234375,
      "learning_rate": 1.0373932562500663e-05,
      "loss": 2.6842,
      "step": 211060
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.722540855407715,
      "learning_rate": 1.0372866185361172e-05,
      "loss": 2.8735,
      "step": 211061
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5130085945129395,
      "learning_rate": 1.0371799862068841e-05,
      "loss": 2.9645,
      "step": 211062
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.797619342803955,
      "learning_rate": 1.0370733592623804e-05,
      "loss": 3.0091,
      "step": 211063
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.604445219039917,
      "learning_rate": 1.0369667377026359e-05,
      "loss": 2.7426,
      "step": 211064
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1512844562530518,
      "learning_rate": 1.036860121527664e-05,
      "loss": 2.9898,
      "step": 211065
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8967628479003906,
      "learning_rate": 1.036753510737488e-05,
      "loss": 2.9346,
      "step": 211066
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5812344551086426,
      "learning_rate": 1.036646905332128e-05,
      "loss": 2.985,
      "step": 211067
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7497758865356445,
      "learning_rate": 1.0365403053116005e-05,
      "loss": 2.9427,
      "step": 211068
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.487539529800415,
      "learning_rate": 1.0364337106759224e-05,
      "loss": 2.9884,
      "step": 211069
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8777518272399902,
      "learning_rate": 1.0363271214251235e-05,
      "loss": 2.9209,
      "step": 211070
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.990224838256836,
      "learning_rate": 1.0362205375592136e-05,
      "loss": 3.0998,
      "step": 211071
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.678849697113037,
      "learning_rate": 1.0361139590782197e-05,
      "loss": 2.9629,
      "step": 211072
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.126784086227417,
      "learning_rate": 1.0360073859821615e-05,
      "loss": 3.0875,
      "step": 211073
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.666378974914551,
      "learning_rate": 1.0359008182710526e-05,
      "loss": 2.9514,
      "step": 211074
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.633887767791748,
      "learning_rate": 1.0357942559449162e-05,
      "loss": 3.0884,
      "step": 211075
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.742527961730957,
      "learning_rate": 1.0356876990037755e-05,
      "loss": 2.902,
      "step": 211076
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5728800296783447,
      "learning_rate": 1.0355811474476439e-05,
      "loss": 3.0393,
      "step": 211077
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3466222286224365,
      "learning_rate": 1.035474601276548e-05,
      "loss": 2.9674,
      "step": 211078
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.559011936187744,
      "learning_rate": 1.0353680604905012e-05,
      "loss": 2.9295,
      "step": 211079
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9069595336914062,
      "learning_rate": 1.0352615250895302e-05,
      "loss": 2.8549,
      "step": 211080
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.812277317047119,
      "learning_rate": 1.0351549950736448e-05,
      "loss": 3.0295,
      "step": 211081
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.754441261291504,
      "learning_rate": 1.0350484704428752e-05,
      "loss": 2.9575,
      "step": 211082
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8969202041625977,
      "learning_rate": 1.0349419511972312e-05,
      "loss": 2.8605,
      "step": 211083
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9170339107513428,
      "learning_rate": 1.0348354373367463e-05,
      "loss": 2.9813,
      "step": 211084
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3184757232666016,
      "learning_rate": 1.0347289288614301e-05,
      "loss": 3.1514,
      "step": 211085
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6082043647766113,
      "learning_rate": 1.0346224257712998e-05,
      "loss": 2.847,
      "step": 211086
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1852481365203857,
      "learning_rate": 1.034515928066385e-05,
      "loss": 3.3145,
      "step": 211087
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.4793848991394043,
      "learning_rate": 1.0344094357466993e-05,
      "loss": 2.9385,
      "step": 211088
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1551625728607178,
      "learning_rate": 1.0343029488122589e-05,
      "loss": 2.8985,
      "step": 211089
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.4495723247528076,
      "learning_rate": 1.0341964672630942e-05,
      "loss": 2.9222,
      "step": 211090
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.123070240020752,
      "learning_rate": 1.0340899910992185e-05,
      "loss": 2.912,
      "step": 211091
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.6506059169769287,
      "learning_rate": 1.0339835203206482e-05,
      "loss": 2.9407,
      "step": 211092
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.4302287101745605,
      "learning_rate": 1.0338770549274134e-05,
      "loss": 2.9446,
      "step": 211093
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.769298076629639,
      "learning_rate": 1.033770594919524e-05,
      "loss": 2.9201,
      "step": 211094
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9656991958618164,
      "learning_rate": 1.0336641402970003e-05,
      "loss": 3.0372,
      "step": 211095
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1870503425598145,
      "learning_rate": 1.033557691059872e-05,
      "loss": 2.8299,
      "step": 211096
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1171603202819824,
      "learning_rate": 1.033451247208149e-05,
      "loss": 2.7206,
      "step": 211097
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.842470407485962,
      "learning_rate": 1.0333448087418516e-05,
      "loss": 2.8975,
      "step": 211098
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7298784255981445,
      "learning_rate": 1.0332383756610064e-05,
      "loss": 3.0093,
      "step": 211099
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.4436769485473633,
      "learning_rate": 1.0331319479656231e-05,
      "loss": 3.018,
      "step": 211100
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.973583698272705,
      "learning_rate": 1.0330255256557318e-05,
      "loss": 3.0221,
      "step": 211101
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8099567890167236,
      "learning_rate": 1.0329191087313493e-05,
      "loss": 2.9628,
      "step": 211102
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.458592414855957,
      "learning_rate": 1.0328126971924955e-05,
      "loss": 2.7583,
      "step": 211103
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2483599185943604,
      "learning_rate": 1.0327062910391804e-05,
      "loss": 2.6595,
      "step": 211104
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6583316326141357,
      "learning_rate": 1.0325998902714405e-05,
      "loss": 2.8381,
      "step": 211105
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.156782865524292,
      "learning_rate": 1.0324934948892794e-05,
      "loss": 2.9987,
      "step": 211106
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6750779151916504,
      "learning_rate": 1.0323871048927302e-05,
      "loss": 2.8187,
      "step": 211107
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.24348783493042,
      "learning_rate": 1.0322807202818029e-05,
      "loss": 2.7036,
      "step": 211108
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9270410537719727,
      "learning_rate": 1.0321743410565275e-05,
      "loss": 2.9258,
      "step": 211109
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8397605419158936,
      "learning_rate": 1.0320679672169142e-05,
      "loss": 2.7511,
      "step": 211110
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.269120931625366,
      "learning_rate": 1.0319615987629859e-05,
      "loss": 2.9157,
      "step": 211111
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9818596839904785,
      "learning_rate": 1.0318552356947595e-05,
      "loss": 2.6834,
      "step": 211112
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.265012741088867,
      "learning_rate": 1.0317488780122651e-05,
      "loss": 2.8278,
      "step": 211113
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.719557285308838,
      "learning_rate": 1.0316425257155091e-05,
      "loss": 2.7564,
      "step": 211114
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.4001073837280273,
      "learning_rate": 1.031536178804525e-05,
      "loss": 2.7889,
      "step": 211115
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.039656639099121,
      "learning_rate": 1.0314298372793161e-05,
      "loss": 3.1106,
      "step": 211116
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2352163791656494,
      "learning_rate": 1.031323501139919e-05,
      "loss": 3.1145,
      "step": 211117
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.905423879623413,
      "learning_rate": 1.031217170386337e-05,
      "loss": 3.063,
      "step": 211118
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5785839557647705,
      "learning_rate": 1.0311108450186067e-05,
      "loss": 3.0272,
      "step": 211119
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.448935031890869,
      "learning_rate": 1.031004525036735e-05,
      "loss": 3.1025,
      "step": 211120
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.70585298538208,
      "learning_rate": 1.0308982104407481e-05,
      "loss": 2.9136,
      "step": 211121
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9819881916046143,
      "learning_rate": 1.0307919012306664e-05,
      "loss": 2.8123,
      "step": 211122
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.209219217300415,
      "learning_rate": 1.0306855974065065e-05,
      "loss": 2.993,
      "step": 211123
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.362579584121704,
      "learning_rate": 1.0305792989682849e-05,
      "loss": 2.9504,
      "step": 211124
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9824633598327637,
      "learning_rate": 1.0304730059160283e-05,
      "loss": 2.952,
      "step": 211125
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.126450538635254,
      "learning_rate": 1.0303667182497532e-05,
      "loss": 2.6588,
      "step": 211126
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.219058036804199,
      "learning_rate": 1.0302604359694799e-05,
      "loss": 2.7459,
      "step": 211127
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.085451126098633,
      "learning_rate": 1.0301541590752282e-05,
      "loss": 2.9419,
      "step": 211128
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9517643451690674,
      "learning_rate": 1.0300478875670181e-05,
      "loss": 2.9957,
      "step": 211129
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6220099925994873,
      "learning_rate": 1.0299416214448663e-05,
      "loss": 2.9504,
      "step": 211130
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6025078296661377,
      "learning_rate": 1.0298353607087994e-05,
      "loss": 3.077,
      "step": 211131
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.518381118774414,
      "learning_rate": 1.0297291053588308e-05,
      "loss": 3.0448,
      "step": 211132
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9857661724090576,
      "learning_rate": 1.0296228553949837e-05,
      "loss": 3.015,
      "step": 211133
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.4350368976593018,
      "learning_rate": 1.0295166108172748e-05,
      "loss": 2.9626,
      "step": 211134
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1144189834594727,
      "learning_rate": 1.0294103716257274e-05,
      "loss": 3.1328,
      "step": 211135
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.050902366638184,
      "learning_rate": 1.0293041378203548e-05,
      "loss": 3.0177,
      "step": 211136
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8869619369506836,
      "learning_rate": 1.029197909401187e-05,
      "loss": 2.9303,
      "step": 211137
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7807774543762207,
      "learning_rate": 1.029091686368234e-05,
      "loss": 2.9574,
      "step": 211138
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7229726314544678,
      "learning_rate": 1.0289854687215227e-05,
      "loss": 2.9087,
      "step": 211139
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.231987714767456,
      "learning_rate": 1.0288792564610726e-05,
      "loss": 2.8681,
      "step": 211140
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.757662296295166,
      "learning_rate": 1.0287730495868973e-05,
      "loss": 2.711,
      "step": 211141
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.7531204223632812,
      "learning_rate": 1.0286668480990168e-05,
      "loss": 2.8389,
      "step": 211142
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0925512313842773,
      "learning_rate": 1.0285606519974576e-05,
      "loss": 3.0666,
      "step": 211143
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.641702890396118,
      "learning_rate": 1.0284544612822332e-05,
      "loss": 2.9879,
      "step": 211144
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0180506706237793,
      "learning_rate": 1.0283482759533702e-05,
      "loss": 3.1663,
      "step": 211145
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5788211822509766,
      "learning_rate": 1.0282420960108816e-05,
      "loss": 2.8789,
      "step": 211146
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.020329475402832,
      "learning_rate": 1.0281359214547913e-05,
      "loss": 2.9001,
      "step": 211147
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.101048231124878,
      "learning_rate": 1.0280297522851155e-05,
      "loss": 3.0461,
      "step": 211148
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.343682050704956,
      "learning_rate": 1.0279235885018777e-05,
      "loss": 2.899,
      "step": 211149
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7745511531829834,
      "learning_rate": 1.0278174301050912e-05,
      "loss": 2.9061,
      "step": 211150
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7824292182922363,
      "learning_rate": 1.027711277094786e-05,
      "loss": 2.8144,
      "step": 211151
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.185850143432617,
      "learning_rate": 1.0276051294709753e-05,
      "loss": 2.8977,
      "step": 211152
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.4391613006591797,
      "learning_rate": 1.0274989872336792e-05,
      "loss": 3.1365,
      "step": 211153
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.3675684928894043,
      "learning_rate": 1.0273928503829143e-05,
      "loss": 3.1225,
      "step": 211154
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7191100120544434,
      "learning_rate": 1.0272867189187073e-05,
      "loss": 2.9524,
      "step": 211155
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.6213266849517822,
      "learning_rate": 1.0271805928410715e-05,
      "loss": 2.9798,
      "step": 211156
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.222893476486206,
      "learning_rate": 1.0270744721500335e-05,
      "loss": 2.954,
      "step": 211157
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.383841037750244,
      "learning_rate": 1.0269683568456099e-05,
      "loss": 3.0126,
      "step": 211158
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0812156200408936,
      "learning_rate": 1.0268622469278176e-05,
      "loss": 2.9321,
      "step": 211159
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.055572032928467,
      "learning_rate": 1.0267561423966797e-05,
      "loss": 2.8335,
      "step": 211160
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.305068016052246,
      "learning_rate": 1.0266500432522129e-05,
      "loss": 3.0793,
      "step": 211161
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.643674850463867,
      "learning_rate": 1.0265439494944372e-05,
      "loss": 2.8724,
      "step": 211162
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2149202823638916,
      "learning_rate": 1.0264378611233792e-05,
      "loss": 2.7377,
      "step": 211163
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.960373878479004,
      "learning_rate": 1.0263317781390523e-05,
      "loss": 2.8537,
      "step": 211164
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.6192307472229004,
      "learning_rate": 1.0262257005414764e-05,
      "loss": 3.0298,
      "step": 211165
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7911126613616943,
      "learning_rate": 1.0261196283306683e-05,
      "loss": 3.0368,
      "step": 211166
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3189680576324463,
      "learning_rate": 1.0260135615066545e-05,
      "loss": 2.8671,
      "step": 211167
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2265524864196777,
      "learning_rate": 1.0259075000694516e-05,
      "loss": 2.5734,
      "step": 211168
    },
    {
      "epoch": 2.75,
      "grad_norm": 7.259756088256836,
      "learning_rate": 1.0258014440190799e-05,
      "loss": 2.8116,
      "step": 211169
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.102618455886841,
      "learning_rate": 1.0256953933555589e-05,
      "loss": 2.9754,
      "step": 211170
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0238847732543945,
      "learning_rate": 1.0255893480789057e-05,
      "loss": 2.9074,
      "step": 211171
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.633312463760376,
      "learning_rate": 1.0254833081891434e-05,
      "loss": 2.7833,
      "step": 211172
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.831381320953369,
      "learning_rate": 1.025377273686292e-05,
      "loss": 3.0279,
      "step": 211173
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.720750331878662,
      "learning_rate": 1.025271244570368e-05,
      "loss": 3.3643,
      "step": 211174
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8892154693603516,
      "learning_rate": 1.0251652208413952e-05,
      "loss": 2.9277,
      "step": 211175
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9236676692962646,
      "learning_rate": 1.0250592024993897e-05,
      "loss": 2.8322,
      "step": 211176
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.149353265762329,
      "learning_rate": 1.0249531895443718e-05,
      "loss": 2.8018,
      "step": 211177
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.131603717803955,
      "learning_rate": 1.0248471819763649e-05,
      "loss": 2.6641,
      "step": 211178
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9303078651428223,
      "learning_rate": 1.0247411797953852e-05,
      "loss": 2.8141,
      "step": 211179
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8188564777374268,
      "learning_rate": 1.0246351830014498e-05,
      "loss": 3.0483,
      "step": 211180
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.4195027351379395,
      "learning_rate": 1.0245291915945852e-05,
      "loss": 2.9938,
      "step": 211181
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1273200511932373,
      "learning_rate": 1.024423205574808e-05,
      "loss": 3.0531,
      "step": 211182
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.326340675354004,
      "learning_rate": 1.0243172249421349e-05,
      "loss": 3.0117,
      "step": 211183
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.78828763961792,
      "learning_rate": 1.0242112496965893e-05,
      "loss": 2.7003,
      "step": 211184
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.249326229095459,
      "learning_rate": 1.0241052798381877e-05,
      "loss": 2.8329,
      "step": 211185
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.908188581466675,
      "learning_rate": 1.0239993153669535e-05,
      "loss": 2.8609,
      "step": 211186
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.08593487739563,
      "learning_rate": 1.0238933562829066e-05,
      "loss": 3.0739,
      "step": 211187
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0630502700805664,
      "learning_rate": 1.0237874025860603e-05,
      "loss": 2.8103,
      "step": 211188
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5381784439086914,
      "learning_rate": 1.0236814542764449e-05,
      "loss": 3.0297,
      "step": 211189
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1164488792419434,
      "learning_rate": 1.0235755113540733e-05,
      "loss": 2.9737,
      "step": 211190
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.4190797805786133,
      "learning_rate": 1.0234695738189591e-05,
      "loss": 2.792,
      "step": 211191
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.009983539581299,
      "learning_rate": 1.0233636416711355e-05,
      "loss": 2.8901,
      "step": 211192
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0433685779571533,
      "learning_rate": 1.0232577149106125e-05,
      "loss": 2.9893,
      "step": 211193
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0276730060577393,
      "learning_rate": 1.0231517935374133e-05,
      "loss": 2.8176,
      "step": 211194
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9688472747802734,
      "learning_rate": 1.0230458775515616e-05,
      "loss": 3.2567,
      "step": 211195
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.244236946105957,
      "learning_rate": 1.0229399669530703e-05,
      "loss": 2.9104,
      "step": 211196
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.816202402114868,
      "learning_rate": 1.0228340617419562e-05,
      "loss": 2.9457,
      "step": 211197
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.952559232711792,
      "learning_rate": 1.0227281619182493e-05,
      "loss": 2.7824,
      "step": 211198
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2440879344940186,
      "learning_rate": 1.0226222674819628e-05,
      "loss": 3.1217,
      "step": 211199
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.05815052986145,
      "learning_rate": 1.022516378433117e-05,
      "loss": 2.7171,
      "step": 211200
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0059900283813477,
      "learning_rate": 1.0224104947717348e-05,
      "loss": 2.8963,
      "step": 211201
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.168517589569092,
      "learning_rate": 1.0223046164978333e-05,
      "loss": 3.0681,
      "step": 211202
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6885623931884766,
      "learning_rate": 1.0221987436114287e-05,
      "loss": 3.0122,
      "step": 211203
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1388320922851562,
      "learning_rate": 1.0220928761125479e-05,
      "loss": 2.692,
      "step": 211204
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.733494281768799,
      "learning_rate": 1.0219870140012044e-05,
      "loss": 2.9436,
      "step": 211205
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8023386001586914,
      "learning_rate": 1.0218811572774244e-05,
      "loss": 2.7626,
      "step": 211206
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.582595109939575,
      "learning_rate": 1.0217753059412214e-05,
      "loss": 3.0043,
      "step": 211207
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.252678394317627,
      "learning_rate": 1.021669459992619e-05,
      "loss": 2.9475,
      "step": 211208
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.514967441558838,
      "learning_rate": 1.0215636194316335e-05,
      "loss": 2.8062,
      "step": 211209
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5177721977233887,
      "learning_rate": 1.0214577842582849e-05,
      "loss": 3.094,
      "step": 211210
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0223822593688965,
      "learning_rate": 1.0213519544725935e-05,
      "loss": 2.9647,
      "step": 211211
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.135199785232544,
      "learning_rate": 1.0212461300745856e-05,
      "loss": 2.9199,
      "step": 211212
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2255706787109375,
      "learning_rate": 1.0211403110642713e-05,
      "loss": 3.011,
      "step": 211213
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.85970401763916,
      "learning_rate": 1.0210344974416773e-05,
      "loss": 2.768,
      "step": 211214
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.4091389179229736,
      "learning_rate": 1.0209286892068136e-05,
      "loss": 2.9536,
      "step": 211215
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.563469171524048,
      "learning_rate": 1.0208228863597102e-05,
      "loss": 2.8169,
      "step": 211216
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6167562007904053,
      "learning_rate": 1.0207170889003802e-05,
      "loss": 2.7873,
      "step": 211217
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2997593879699707,
      "learning_rate": 1.0206112968288505e-05,
      "loss": 2.943,
      "step": 211218
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9947493076324463,
      "learning_rate": 1.0205055101451342e-05,
      "loss": 3.1307,
      "step": 211219
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.819499969482422,
      "learning_rate": 1.0203997288492548e-05,
      "loss": 3.0491,
      "step": 211220
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5193288326263428,
      "learning_rate": 1.0202939529412257e-05,
      "loss": 3.1647,
      "step": 211221
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.63273286819458,
      "learning_rate": 1.0201881824210733e-05,
      "loss": 3.0332,
      "step": 211222
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3862597942352295,
      "learning_rate": 1.0200824172888145e-05,
      "loss": 2.9012,
      "step": 211223
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9110567569732666,
      "learning_rate": 1.019976657544469e-05,
      "loss": 2.9805,
      "step": 211224
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.220431327819824,
      "learning_rate": 1.0198709031880603e-05,
      "loss": 3.0482,
      "step": 211225
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.035626173019409,
      "learning_rate": 1.0197651542196017e-05,
      "loss": 3.097,
      "step": 211226
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.693638324737549,
      "learning_rate": 1.0196594106391131e-05,
      "loss": 2.9606,
      "step": 211227
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7874791622161865,
      "learning_rate": 1.0195536724466214e-05,
      "loss": 2.9862,
      "step": 211228
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9536147117614746,
      "learning_rate": 1.0194479396421395e-05,
      "loss": 2.8576,
      "step": 211229
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0175774097442627,
      "learning_rate": 1.0193422122256912e-05,
      "loss": 2.6967,
      "step": 211230
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2241179943084717,
      "learning_rate": 1.0192364901972926e-05,
      "loss": 2.8961,
      "step": 211231
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.980008363723755,
      "learning_rate": 1.0191307735569675e-05,
      "loss": 2.865,
      "step": 211232
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9287893772125244,
      "learning_rate": 1.0190250623047291e-05,
      "loss": 2.8696,
      "step": 211233
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7942938804626465,
      "learning_rate": 1.018919356440604e-05,
      "loss": 2.7827,
      "step": 211234
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.066739082336426,
      "learning_rate": 1.0188136559646054e-05,
      "loss": 3.0786,
      "step": 211235
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7103257179260254,
      "learning_rate": 1.01870796087676e-05,
      "loss": 2.6665,
      "step": 211236
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.9009552001953125,
      "learning_rate": 1.0186022711770848e-05,
      "loss": 2.9216,
      "step": 211237
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.313692331314087,
      "learning_rate": 1.018496586865596e-05,
      "loss": 3.0733,
      "step": 211238
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6956076622009277,
      "learning_rate": 1.0183909079423136e-05,
      "loss": 2.8746,
      "step": 211239
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.6200528144836426,
      "learning_rate": 1.0182852344072644e-05,
      "loss": 2.6935,
      "step": 211240
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0963003635406494,
      "learning_rate": 1.0181795662604585e-05,
      "loss": 2.6276,
      "step": 211241
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.647179365158081,
      "learning_rate": 1.0180739035019226e-05,
      "loss": 2.958,
      "step": 211242
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.084770679473877,
      "learning_rate": 1.017968246131673e-05,
      "loss": 2.7274,
      "step": 211243
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.963447332382202,
      "learning_rate": 1.0178625941497332e-05,
      "loss": 2.9236,
      "step": 211244
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8205299377441406,
      "learning_rate": 1.0177569475561131e-05,
      "loss": 2.8614,
      "step": 211245
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.271627187728882,
      "learning_rate": 1.0176513063508462e-05,
      "loss": 2.682,
      "step": 211246
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.615107297897339,
      "learning_rate": 1.0175456705339391e-05,
      "loss": 2.9121,
      "step": 211247
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.297959566116333,
      "learning_rate": 1.0174400401054217e-05,
      "loss": 3.2365,
      "step": 211248
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9651551246643066,
      "learning_rate": 1.0173344150653073e-05,
      "loss": 2.9277,
      "step": 211249
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2668275833129883,
      "learning_rate": 1.0172287954136193e-05,
      "loss": 3.0939,
      "step": 211250
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.852773904800415,
      "learning_rate": 1.017123181150371e-05,
      "loss": 2.8852,
      "step": 211251
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.753394842147827,
      "learning_rate": 1.0170175722755924e-05,
      "loss": 2.909,
      "step": 211252
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8921802043914795,
      "learning_rate": 1.0169119687892935e-05,
      "loss": 2.8303,
      "step": 211253
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.555522918701172,
      "learning_rate": 1.0168063706914976e-05,
      "loss": 2.8675,
      "step": 211254
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.9624335765838623,
      "learning_rate": 1.0167007779822245e-05,
      "loss": 2.8585,
      "step": 211255
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.684678792953491,
      "learning_rate": 1.0165951906615012e-05,
      "loss": 2.8122,
      "step": 211256
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.962001323699951,
      "learning_rate": 1.0164896087293305e-05,
      "loss": 2.7241,
      "step": 211257
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.611318826675415,
      "learning_rate": 1.0163840321857463e-05,
      "loss": 2.9276,
      "step": 211258
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1007702350616455,
      "learning_rate": 1.0162784610307584e-05,
      "loss": 2.89,
      "step": 211259
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9187726974487305,
      "learning_rate": 1.0161728952643966e-05,
      "loss": 2.9922,
      "step": 211260
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8405649662017822,
      "learning_rate": 1.0160673348866744e-05,
      "loss": 3.0484,
      "step": 211261
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.693852663040161,
      "learning_rate": 1.0159617798976116e-05,
      "loss": 2.9576,
      "step": 211262
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.227372884750366,
      "learning_rate": 1.0158562302972285e-05,
      "loss": 2.9489,
      "step": 211263
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.385690689086914,
      "learning_rate": 1.015750686085548e-05,
      "loss": 2.9657,
      "step": 211264
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7495267391204834,
      "learning_rate": 1.0156451472625803e-05,
      "loss": 2.6573,
      "step": 211265
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8646557331085205,
      "learning_rate": 1.0155396138283557e-05,
      "loss": 2.9982,
      "step": 211266
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.8631703853607178,
      "learning_rate": 1.0154340857828868e-05,
      "loss": 2.9438,
      "step": 211267
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0400242805480957,
      "learning_rate": 1.015328563126201e-05,
      "loss": 2.8724,
      "step": 211268
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8539509773254395,
      "learning_rate": 1.0152230458583076e-05,
      "loss": 3.1676,
      "step": 211269
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.615002155303955,
      "learning_rate": 1.0151175339792305e-05,
      "loss": 2.7664,
      "step": 211270
    },
    {
      "epoch": 2.75,
      "grad_norm": 8.003350257873535,
      "learning_rate": 1.0150120274889962e-05,
      "loss": 3.2028,
      "step": 211271
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.4473471641540527,
      "learning_rate": 1.0149065263876144e-05,
      "loss": 3.0161,
      "step": 211272
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.411430835723877,
      "learning_rate": 1.0148010306751087e-05,
      "loss": 3.0006,
      "step": 211273
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.763491153717041,
      "learning_rate": 1.014695540351499e-05,
      "loss": 3.0363,
      "step": 211274
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.21767258644104,
      "learning_rate": 1.0145900554168052e-05,
      "loss": 3.0113,
      "step": 211275
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.3591508865356445,
      "learning_rate": 1.014484575871044e-05,
      "loss": 3.0367,
      "step": 211276
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.219341278076172,
      "learning_rate": 1.0143791017142388e-05,
      "loss": 2.9596,
      "step": 211277
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.04157018661499,
      "learning_rate": 1.0142736329464096e-05,
      "loss": 2.9791,
      "step": 211278
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8462958335876465,
      "learning_rate": 1.0141681695675696e-05,
      "loss": 2.6979,
      "step": 211279
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7168829441070557,
      "learning_rate": 1.014062711577749e-05,
      "loss": 2.9075,
      "step": 211280
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.19977068901062,
      "learning_rate": 1.0139572589769573e-05,
      "loss": 2.6486,
      "step": 211281
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.691274642944336,
      "learning_rate": 1.013851811765215e-05,
      "loss": 2.7193,
      "step": 211282
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.00518798828125,
      "learning_rate": 1.013746369942552e-05,
      "loss": 3.0169,
      "step": 211283
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7455997467041016,
      "learning_rate": 1.0136409335089745e-05,
      "loss": 2.7405,
      "step": 211284
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5508933067321777,
      "learning_rate": 1.0135355024645098e-05,
      "loss": 2.8717,
      "step": 211285
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9678022861480713,
      "learning_rate": 1.0134300768091808e-05,
      "loss": 3.0224,
      "step": 211286
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2063937187194824,
      "learning_rate": 1.0133246565429975e-05,
      "loss": 2.821,
      "step": 211287
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0492019653320312,
      "learning_rate": 1.0132192416659834e-05,
      "loss": 2.8067,
      "step": 211288
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.754361867904663,
      "learning_rate": 1.0131138321781617e-05,
      "loss": 2.7899,
      "step": 211289
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8641576766967773,
      "learning_rate": 1.0130084280795492e-05,
      "loss": 2.9885,
      "step": 211290
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.938480854034424,
      "learning_rate": 1.0129030293701657e-05,
      "loss": 2.7047,
      "step": 211291
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.774325132369995,
      "learning_rate": 1.0127976360500312e-05,
      "loss": 2.9893,
      "step": 211292
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.658663749694824,
      "learning_rate": 1.0126922481191624e-05,
      "loss": 3.0944,
      "step": 211293
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.625936269760132,
      "learning_rate": 1.0125868655775827e-05,
      "loss": 3.213,
      "step": 211294
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.911773443222046,
      "learning_rate": 1.0124814884253084e-05,
      "loss": 2.7068,
      "step": 211295
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1676106452941895,
      "learning_rate": 1.0123761166623633e-05,
      "loss": 3.0587,
      "step": 211296
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.060122013092041,
      "learning_rate": 1.012270750288764e-05,
      "loss": 3.0374,
      "step": 211297
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9177470207214355,
      "learning_rate": 1.0121653893045334e-05,
      "loss": 3.0778,
      "step": 211298
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1936683654785156,
      "learning_rate": 1.012060033709685e-05,
      "loss": 2.822,
      "step": 211299
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.349144220352173,
      "learning_rate": 1.0119546835042425e-05,
      "loss": 2.7786,
      "step": 211300
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8114118576049805,
      "learning_rate": 1.0118493386882254e-05,
      "loss": 3.0919,
      "step": 211301
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2411692142486572,
      "learning_rate": 1.0117439992616505e-05,
      "loss": 3.1231,
      "step": 211302
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9447593688964844,
      "learning_rate": 1.0116386652245412e-05,
      "loss": 2.9815,
      "step": 211303
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2758407592773438,
      "learning_rate": 1.0115333365769174e-05,
      "loss": 3.0196,
      "step": 211304
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8706395626068115,
      "learning_rate": 1.0114280133187958e-05,
      "loss": 2.9667,
      "step": 211305
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9335439205169678,
      "learning_rate": 1.0113226954501929e-05,
      "loss": 2.8476,
      "step": 211306
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0533125400543213,
      "learning_rate": 1.011217382971139e-05,
      "loss": 3.0728,
      "step": 211307
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.290602445602417,
      "learning_rate": 1.0111120758816404e-05,
      "loss": 2.9512,
      "step": 211308
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2445900440216064,
      "learning_rate": 1.0110067741817274e-05,
      "loss": 2.9512,
      "step": 211309
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.239586114883423,
      "learning_rate": 1.0109014778714164e-05,
      "loss": 2.9214,
      "step": 211310
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1477251052856445,
      "learning_rate": 1.0107961869507242e-05,
      "loss": 2.9616,
      "step": 211311
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.3268327713012695,
      "learning_rate": 1.0106909014196707e-05,
      "loss": 2.8087,
      "step": 211312
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7689170837402344,
      "learning_rate": 1.0105856212782793e-05,
      "loss": 2.9556,
      "step": 211313
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1475541591644287,
      "learning_rate": 1.0104803465265632e-05,
      "loss": 2.9889,
      "step": 211314
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3853211402893066,
      "learning_rate": 1.0103750771645492e-05,
      "loss": 3.0688,
      "step": 211315
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.533942699432373,
      "learning_rate": 1.0102698131922571e-05,
      "loss": 2.6816,
      "step": 211316
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.924006938934326,
      "learning_rate": 1.0101645546097003e-05,
      "loss": 2.73,
      "step": 211317
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.871805191040039,
      "learning_rate": 1.010059301416899e-05,
      "loss": 2.7977,
      "step": 211318
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7651124000549316,
      "learning_rate": 1.0099540536138762e-05,
      "loss": 3.079,
      "step": 211319
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7218809127807617,
      "learning_rate": 1.0098488112006486e-05,
      "loss": 2.8708,
      "step": 211320
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5919268131256104,
      "learning_rate": 1.0097435741772397e-05,
      "loss": 2.6099,
      "step": 211321
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9913716316223145,
      "learning_rate": 1.0096383425436627e-05,
      "loss": 2.8424,
      "step": 211322
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.972241163253784,
      "learning_rate": 1.009533116299951e-05,
      "loss": 2.9361,
      "step": 211323
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.360447883605957,
      "learning_rate": 1.0094278954461044e-05,
      "loss": 2.8292,
      "step": 211324
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.860356330871582,
      "learning_rate": 1.0093226799821563e-05,
      "loss": 2.7775,
      "step": 211325
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.817573070526123,
      "learning_rate": 1.0092174699081201e-05,
      "loss": 2.9788,
      "step": 211326
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9086859226226807,
      "learning_rate": 1.0091122652240224e-05,
      "loss": 3.0285,
      "step": 211327
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6962528228759766,
      "learning_rate": 1.0090070659298699e-05,
      "loss": 2.9522,
      "step": 211328
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9573707580566406,
      "learning_rate": 1.0089018720257025e-05,
      "loss": 2.9172,
      "step": 211329
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.783107280731201,
      "learning_rate": 1.008796683511517e-05,
      "loss": 2.9943,
      "step": 211330
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5971670150756836,
      "learning_rate": 1.0086915003873464e-05,
      "loss": 2.7051,
      "step": 211331
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7979938983917236,
      "learning_rate": 1.0085863226532075e-05,
      "loss": 2.8902,
      "step": 211332
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.141219139099121,
      "learning_rate": 1.0084811503091206e-05,
      "loss": 3.0254,
      "step": 211333
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.590970754623413,
      "learning_rate": 1.008375983355102e-05,
      "loss": 2.8482,
      "step": 211334
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9862730503082275,
      "learning_rate": 1.0082708217911783e-05,
      "loss": 2.9481,
      "step": 211335
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1637070178985596,
      "learning_rate": 1.0081656656173598e-05,
      "loss": 2.705,
      "step": 211336
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.125094413757324,
      "learning_rate": 1.008060514833673e-05,
      "loss": 3.194,
      "step": 211337
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8314719200134277,
      "learning_rate": 1.007955369440131e-05,
      "loss": 3.1189,
      "step": 211338
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0427844524383545,
      "learning_rate": 1.0078502294367607e-05,
      "loss": 2.88,
      "step": 211339
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.031195640563965,
      "learning_rate": 1.0077450948235754e-05,
      "loss": 2.8896,
      "step": 211340
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.6928703784942627,
      "learning_rate": 1.0076399656006051e-05,
      "loss": 2.8175,
      "step": 211341
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.752866744995117,
      "learning_rate": 1.0075348417678565e-05,
      "loss": 3.0383,
      "step": 211342
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7969865798950195,
      "learning_rate": 1.0074297233253525e-05,
      "loss": 2.7045,
      "step": 211343
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.67733097076416,
      "learning_rate": 1.0073246102731169e-05,
      "loss": 3.0416,
      "step": 211344
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.217266798019409,
      "learning_rate": 1.0072195026111662e-05,
      "loss": 2.8028,
      "step": 211345
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0626139640808105,
      "learning_rate": 1.0071144003395204e-05,
      "loss": 2.7822,
      "step": 211346
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0176708698272705,
      "learning_rate": 1.0070093034581995e-05,
      "loss": 2.8658,
      "step": 211347
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.860560417175293,
      "learning_rate": 1.0069042119672265e-05,
      "loss": 2.7527,
      "step": 211348
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7204580307006836,
      "learning_rate": 1.0067991258666152e-05,
      "loss": 3.076,
      "step": 211349
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.826371192932129,
      "learning_rate": 1.0066940451563853e-05,
      "loss": 2.9004,
      "step": 211350
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.96450662612915,
      "learning_rate": 1.0065889698365603e-05,
      "loss": 2.9866,
      "step": 211351
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.030752658843994,
      "learning_rate": 1.0064838999071534e-05,
      "loss": 2.9205,
      "step": 211352
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.15291166305542,
      "learning_rate": 1.0063788353681945e-05,
      "loss": 3.0722,
      "step": 211353
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.892123699188232,
      "learning_rate": 1.0062737762196937e-05,
      "loss": 3.0507,
      "step": 211354
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1742570400238037,
      "learning_rate": 1.006168722461671e-05,
      "loss": 2.7945,
      "step": 211355
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.8157410621643066,
      "learning_rate": 1.0060636740941564e-05,
      "loss": 2.8247,
      "step": 211356
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.896415948867798,
      "learning_rate": 1.0059586311171564e-05,
      "loss": 2.5995,
      "step": 211357
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1375179290771484,
      "learning_rate": 1.0058535935306977e-05,
      "loss": 2.789,
      "step": 211358
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7922356128692627,
      "learning_rate": 1.005748561334797e-05,
      "loss": 2.8052,
      "step": 211359
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.714456081390381,
      "learning_rate": 1.0056435345294778e-05,
      "loss": 2.8613,
      "step": 211360
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8092174530029297,
      "learning_rate": 1.005538513114753e-05,
      "loss": 3.0597,
      "step": 211361
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7947869300842285,
      "learning_rate": 1.0054334970906497e-05,
      "loss": 3.1054,
      "step": 211362
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.841498613357544,
      "learning_rate": 1.0053284864571808e-05,
      "loss": 2.8891,
      "step": 211363
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6476633548736572,
      "learning_rate": 1.0052234812143667e-05,
      "loss": 2.9227,
      "step": 211364
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2114219665527344,
      "learning_rate": 1.0051184813622337e-05,
      "loss": 2.9373,
      "step": 211365
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.697153329849243,
      "learning_rate": 1.0050134869007954e-05,
      "loss": 2.7614,
      "step": 211366
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.90093469619751,
      "learning_rate": 1.0049084978300714e-05,
      "loss": 3.0742,
      "step": 211367
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8525919914245605,
      "learning_rate": 1.0048035141500821e-05,
      "loss": 2.9752,
      "step": 211368
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3748857975006104,
      "learning_rate": 1.004698535860844e-05,
      "loss": 2.9648,
      "step": 211369
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1220521926879883,
      "learning_rate": 1.0045935629623836e-05,
      "loss": 2.7566,
      "step": 211370
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.899153232574463,
      "learning_rate": 1.0044885954547176e-05,
      "loss": 2.9209,
      "step": 211371
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.375924587249756,
      "learning_rate": 1.0043836333378663e-05,
      "loss": 2.8306,
      "step": 211372
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7209181785583496,
      "learning_rate": 1.0042786766118393e-05,
      "loss": 2.9269,
      "step": 211373
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0313005447387695,
      "learning_rate": 1.0041737252766734e-05,
      "loss": 3.0233,
      "step": 211374
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.8122520446777344,
      "learning_rate": 1.0040687793323721e-05,
      "loss": 2.9634,
      "step": 211375
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.8414268493652344,
      "learning_rate": 1.003963838778965e-05,
      "loss": 2.8795,
      "step": 211376
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3595869541168213,
      "learning_rate": 1.003858903616469e-05,
      "loss": 2.8556,
      "step": 211377
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7689695358276367,
      "learning_rate": 1.0037539738449007e-05,
      "loss": 3.0314,
      "step": 211378
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.754880905151367,
      "learning_rate": 1.0036490494642801e-05,
      "loss": 2.9099,
      "step": 211379
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5179357528686523,
      "learning_rate": 1.0035441304746338e-05,
      "loss": 3.1072,
      "step": 211380
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1238315105438232,
      "learning_rate": 1.003439216875972e-05,
      "loss": 2.9788,
      "step": 211381
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.067626476287842,
      "learning_rate": 1.0033343086683209e-05,
      "loss": 3.0371,
      "step": 211382
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5829479694366455,
      "learning_rate": 1.0032294058516977e-05,
      "loss": 2.9014,
      "step": 211383
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.006023406982422,
      "learning_rate": 1.0031245084261185e-05,
      "loss": 2.9657,
      "step": 211384
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2156009674072266,
      "learning_rate": 1.0030196163916071e-05,
      "loss": 2.9314,
      "step": 211385
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.96222186088562,
      "learning_rate": 1.0029147297481798e-05,
      "loss": 2.7484,
      "step": 211386
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.901317596435547,
      "learning_rate": 1.0028098484958601e-05,
      "loss": 2.8522,
      "step": 211387
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1335597038269043,
      "learning_rate": 1.0027049726346647e-05,
      "loss": 2.6852,
      "step": 211388
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5214409828186035,
      "learning_rate": 1.0026001021646135e-05,
      "loss": 3.133,
      "step": 211389
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9217538833618164,
      "learning_rate": 1.0024952370857298e-05,
      "loss": 2.8803,
      "step": 211390
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.489790678024292,
      "learning_rate": 1.0023903773980268e-05,
      "loss": 2.87,
      "step": 211391
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1112020015716553,
      "learning_rate": 1.002285523101528e-05,
      "loss": 3.0437,
      "step": 211392
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2221388816833496,
      "learning_rate": 1.0021806741962468e-05,
      "loss": 2.9103,
      "step": 211393
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.4182891845703125,
      "learning_rate": 1.002075830682213e-05,
      "loss": 2.8896,
      "step": 211394
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.204334259033203,
      "learning_rate": 1.0019709925594365e-05,
      "loss": 2.8083,
      "step": 211395
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9216809272766113,
      "learning_rate": 1.001866159827951e-05,
      "loss": 2.6934,
      "step": 211396
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.291529417037964,
      "learning_rate": 1.001761332487756e-05,
      "loss": 2.9073,
      "step": 211397
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9953455924987793,
      "learning_rate": 1.001656510538882e-05,
      "loss": 3.1563,
      "step": 211398
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.4193832874298096,
      "learning_rate": 1.0015516939813483e-05,
      "loss": 2.8949,
      "step": 211399
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5047571659088135,
      "learning_rate": 1.0014468828151756e-05,
      "loss": 2.934,
      "step": 211400
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9598188400268555,
      "learning_rate": 1.0013420770403768e-05,
      "loss": 2.829,
      "step": 211401
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9483468532562256,
      "learning_rate": 1.0012372766569855e-05,
      "loss": 3.0595,
      "step": 211402
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9329376220703125,
      "learning_rate": 1.0011324816650012e-05,
      "loss": 2.9339,
      "step": 211403
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.067795753479004,
      "learning_rate": 1.0010276920644578e-05,
      "loss": 2.884,
      "step": 211404
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8104734420776367,
      "learning_rate": 1.0009229078553681e-05,
      "loss": 2.7181,
      "step": 211405
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2452073097229004,
      "learning_rate": 1.0008181290377592e-05,
      "loss": 2.8007,
      "step": 211406
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.152985095977783,
      "learning_rate": 1.0007133556116408e-05,
      "loss": 2.7604,
      "step": 211407
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.897822141647339,
      "learning_rate": 1.000608587577043e-05,
      "loss": 2.9177,
      "step": 211408
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.771486282348633,
      "learning_rate": 1.0005038249339758e-05,
      "loss": 2.7577,
      "step": 211409
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.280355930328369,
      "learning_rate": 1.0003990676824625e-05,
      "loss": 2.9768,
      "step": 211410
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7834091186523438,
      "learning_rate": 1.0002943158225197e-05,
      "loss": 3.0128,
      "step": 211411
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9798624515533447,
      "learning_rate": 1.0001895693541739e-05,
      "loss": 2.863,
      "step": 211412
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.378150463104248,
      "learning_rate": 1.0000848282774388e-05,
      "loss": 2.9306,
      "step": 211413
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3734726905822754,
      "learning_rate": 9.999800925923374e-06,
      "loss": 2.9121,
      "step": 211414
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.798015594482422,
      "learning_rate": 9.998753622988831e-06,
      "loss": 3.0918,
      "step": 211415
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3545191287994385,
      "learning_rate": 9.997706373971027e-06,
      "loss": 3.0509,
      "step": 211416
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.6913061141967773,
      "learning_rate": 9.996659178870092e-06,
      "loss": 2.7573,
      "step": 211417
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0376899242401123,
      "learning_rate": 9.995612037686296e-06,
      "loss": 2.9385,
      "step": 211418
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.7146074771881104,
      "learning_rate": 9.994564950419736e-06,
      "loss": 3.021,
      "step": 211419
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.030031681060791,
      "learning_rate": 9.993517917070715e-06,
      "loss": 3.192,
      "step": 211420
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1405789852142334,
      "learning_rate": 9.992470937639329e-06,
      "loss": 2.9223,
      "step": 211421
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0052740573883057,
      "learning_rate": 9.991424012125849e-06,
      "loss": 2.8866,
      "step": 211422
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.7680962085723877,
      "learning_rate": 9.990377140530403e-06,
      "loss": 2.851,
      "step": 211423
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9699859619140625,
      "learning_rate": 9.989330322853262e-06,
      "loss": 2.8295,
      "step": 211424
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.943607807159424,
      "learning_rate": 9.988283559094523e-06,
      "loss": 2.9605,
      "step": 211425
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.042257308959961,
      "learning_rate": 9.987236849254522e-06,
      "loss": 3.112,
      "step": 211426
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5780599117279053,
      "learning_rate": 9.986190193333289e-06,
      "loss": 3.1,
      "step": 211427
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.084012985229492,
      "learning_rate": 9.985143591331124e-06,
      "loss": 2.8471,
      "step": 211428
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.613785743713379,
      "learning_rate": 9.984097043248196e-06,
      "loss": 2.8807,
      "step": 211429
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.582608699798584,
      "learning_rate": 9.983050549084703e-06,
      "loss": 2.8503,
      "step": 211430
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.905245065689087,
      "learning_rate": 9.982004108840813e-06,
      "loss": 2.6896,
      "step": 211431
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.4343042373657227,
      "learning_rate": 9.980957722516758e-06,
      "loss": 2.7937,
      "step": 211432
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.790067195892334,
      "learning_rate": 9.979911390112738e-06,
      "loss": 2.7796,
      "step": 211433
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8538625240325928,
      "learning_rate": 9.97886511162892e-06,
      "loss": 2.9775,
      "step": 211434
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5630738735198975,
      "learning_rate": 9.977818887065504e-06,
      "loss": 3.0787,
      "step": 211435
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.841496467590332,
      "learning_rate": 9.976772716422687e-06,
      "loss": 2.7608,
      "step": 211436
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0942466259002686,
      "learning_rate": 9.975726599700607e-06,
      "loss": 2.9719,
      "step": 211437
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.6089606285095215,
      "learning_rate": 9.974680536899592e-06,
      "loss": 2.8747,
      "step": 211438
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.904153823852539,
      "learning_rate": 9.973634528019746e-06,
      "loss": 2.7964,
      "step": 211439
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.815678596496582,
      "learning_rate": 9.97258857306127e-06,
      "loss": 2.9249,
      "step": 211440
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5382561683654785,
      "learning_rate": 9.971542672024323e-06,
      "loss": 2.8774,
      "step": 211441
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0406901836395264,
      "learning_rate": 9.97049682490918e-06,
      "loss": 2.9068,
      "step": 211442
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5515363216400146,
      "learning_rate": 9.969451031715969e-06,
      "loss": 2.7678,
      "step": 211443
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.905292272567749,
      "learning_rate": 9.968405292444925e-06,
      "loss": 2.8797,
      "step": 211444
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.780812978744507,
      "learning_rate": 9.967359607096249e-06,
      "loss": 2.8296,
      "step": 211445
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.797823190689087,
      "learning_rate": 9.966313975670071e-06,
      "loss": 2.9916,
      "step": 211446
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.715937614440918,
      "learning_rate": 9.965268398166692e-06,
      "loss": 2.9465,
      "step": 211447
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.06048321723938,
      "learning_rate": 9.964222874586215e-06,
      "loss": 3.0209,
      "step": 211448
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.065938711166382,
      "learning_rate": 9.963177404928836e-06,
      "loss": 3.2027,
      "step": 211449
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3918204307556152,
      "learning_rate": 9.96213198919482e-06,
      "loss": 2.8449,
      "step": 211450
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2112061977386475,
      "learning_rate": 9.961086627384307e-06,
      "loss": 3.1732,
      "step": 211451
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9969944953918457,
      "learning_rate": 9.960041319497458e-06,
      "loss": 2.9442,
      "step": 211452
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.178007125854492,
      "learning_rate": 9.958996065534575e-06,
      "loss": 2.9919,
      "step": 211453
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6528947353363037,
      "learning_rate": 9.957950865495723e-06,
      "loss": 2.8319,
      "step": 211454
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.541445732116699,
      "learning_rate": 9.956905719381236e-06,
      "loss": 3.1112,
      "step": 211455
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.559765577316284,
      "learning_rate": 9.955860627191214e-06,
      "loss": 2.9267,
      "step": 211456
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.09971284866333,
      "learning_rate": 9.954815588925858e-06,
      "loss": 3.0183,
      "step": 211457
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7170217037200928,
      "learning_rate": 9.953770604585364e-06,
      "loss": 3.2253,
      "step": 211458
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.422919511795044,
      "learning_rate": 9.952725674169971e-06,
      "loss": 2.8584,
      "step": 211459
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5822083950042725,
      "learning_rate": 9.951680797679807e-06,
      "loss": 2.7493,
      "step": 211460
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.247802495956421,
      "learning_rate": 9.95063597511514e-06,
      "loss": 2.7692,
      "step": 211461
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9183409214019775,
      "learning_rate": 9.949591206476105e-06,
      "loss": 2.6652,
      "step": 211462
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0699095726013184,
      "learning_rate": 9.948546491762932e-06,
      "loss": 3.1366,
      "step": 211463
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.5560836791992188,
      "learning_rate": 9.947501830975758e-06,
      "loss": 2.9734,
      "step": 211464
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.4382104873657227,
      "learning_rate": 9.946457224114846e-06,
      "loss": 2.8126,
      "step": 211465
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.8457846641540527,
      "learning_rate": 9.945412671180363e-06,
      "loss": 2.8303,
      "step": 211466
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1080071926116943,
      "learning_rate": 9.944368172172513e-06,
      "loss": 2.9717,
      "step": 211467
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.198732376098633,
      "learning_rate": 9.943323727091423e-06,
      "loss": 2.7848,
      "step": 211468
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.118914604187012,
      "learning_rate": 9.942279335937431e-06,
      "loss": 3.1366,
      "step": 211469
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8241987228393555,
      "learning_rate": 9.9412349987106e-06,
      "loss": 2.6682,
      "step": 211470
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.088679313659668,
      "learning_rate": 9.940190715411167e-06,
      "loss": 2.7564,
      "step": 211471
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0073275566101074,
      "learning_rate": 9.939146486039328e-06,
      "loss": 3.0234,
      "step": 211472
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6968140602111816,
      "learning_rate": 9.938102310595285e-06,
      "loss": 2.8151,
      "step": 211473
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8958115577697754,
      "learning_rate": 9.937058189079206e-06,
      "loss": 2.7355,
      "step": 211474
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.768664598464966,
      "learning_rate": 9.936014121491353e-06,
      "loss": 2.8301,
      "step": 211475
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.016573667526245,
      "learning_rate": 9.93497010783183e-06,
      "loss": 2.8259,
      "step": 211476
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8786847591400146,
      "learning_rate": 9.933926148100868e-06,
      "loss": 3.08,
      "step": 211477
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0788490772247314,
      "learning_rate": 9.932882242298667e-06,
      "loss": 2.9966,
      "step": 211478
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9400010108947754,
      "learning_rate": 9.931838390425428e-06,
      "loss": 3.0255,
      "step": 211479
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.962675094604492,
      "learning_rate": 9.930794592481317e-06,
      "loss": 2.8012,
      "step": 211480
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.8186237812042236,
      "learning_rate": 9.9297508484666e-06,
      "loss": 3.1451,
      "step": 211481
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9879233837127686,
      "learning_rate": 9.928707158381344e-06,
      "loss": 3.1689,
      "step": 211482
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.201331377029419,
      "learning_rate": 9.927663522225882e-06,
      "loss": 3.1451,
      "step": 211483
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.999145746231079,
      "learning_rate": 9.92661994000028e-06,
      "loss": 2.8958,
      "step": 211484
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.717329978942871,
      "learning_rate": 9.92557641170484e-06,
      "loss": 3.0321,
      "step": 211485
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0640006065368652,
      "learning_rate": 9.92453293733969e-06,
      "loss": 2.9708,
      "step": 211486
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.90958309173584,
      "learning_rate": 9.923489516905103e-06,
      "loss": 2.8194,
      "step": 211487
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3638885021209717,
      "learning_rate": 9.922446150401143e-06,
      "loss": 3.1794,
      "step": 211488
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7030651569366455,
      "learning_rate": 9.921402837828108e-06,
      "loss": 3.107,
      "step": 211489
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.824599027633667,
      "learning_rate": 9.920359579186132e-06,
      "loss": 2.9239,
      "step": 211490
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.303230047225952,
      "learning_rate": 9.91931637447545e-06,
      "loss": 2.848,
      "step": 211491
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.6902883052825928,
      "learning_rate": 9.918273223696227e-06,
      "loss": 2.6824,
      "step": 211492
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.4812512397766113,
      "learning_rate": 9.917230126848763e-06,
      "loss": 2.8325,
      "step": 211493
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.237348794937134,
      "learning_rate": 9.916187083933058e-06,
      "loss": 2.8841,
      "step": 211494
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3987088203430176,
      "learning_rate": 9.915144094949445e-06,
      "loss": 3.1281,
      "step": 211495
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.571664810180664,
      "learning_rate": 9.91410115989809e-06,
      "loss": 2.8631,
      "step": 211496
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9363272190093994,
      "learning_rate": 9.913058278779162e-06,
      "loss": 2.6625,
      "step": 211497
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3387789726257324,
      "learning_rate": 9.912015451592858e-06,
      "loss": 2.7977,
      "step": 211498
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5175530910491943,
      "learning_rate": 9.910972678339479e-06,
      "loss": 2.9143,
      "step": 211499
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9714584350585938,
      "learning_rate": 9.909929959019025e-06,
      "loss": 2.9366,
      "step": 211500
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6220438480377197,
      "learning_rate": 9.908887293631829e-06,
      "loss": 3.1237,
      "step": 211501
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.275540351867676,
      "learning_rate": 9.907844682178024e-06,
      "loss": 2.9793,
      "step": 211502
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6646764278411865,
      "learning_rate": 9.906802124657842e-06,
      "loss": 3.0205,
      "step": 211503
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.09539794921875,
      "learning_rate": 9.905759621071452e-06,
      "loss": 2.9639,
      "step": 211504
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6872496604919434,
      "learning_rate": 9.90471717141912e-06,
      "loss": 2.9356,
      "step": 211505
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8830530643463135,
      "learning_rate": 9.90367477570091e-06,
      "loss": 2.8983,
      "step": 211506
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.995898723602295,
      "learning_rate": 9.902632433917091e-06,
      "loss": 3.0778,
      "step": 211507
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.823164463043213,
      "learning_rate": 9.901590146067828e-06,
      "loss": 3.1053,
      "step": 211508
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.160893440246582,
      "learning_rate": 9.900547912153389e-06,
      "loss": 2.7859,
      "step": 211509
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.643043279647827,
      "learning_rate": 9.899505732173873e-06,
      "loss": 2.8273,
      "step": 211510
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.857041835784912,
      "learning_rate": 9.898463606129581e-06,
      "loss": 3.0246,
      "step": 211511
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3896636962890625,
      "learning_rate": 9.897421534020545e-06,
      "loss": 2.8646,
      "step": 211512
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.096012115478516,
      "learning_rate": 9.89637951584713e-06,
      "loss": 2.929,
      "step": 211513
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9300529956817627,
      "learning_rate": 9.895337551609373e-06,
      "loss": 3.1438,
      "step": 211514
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0576064586639404,
      "learning_rate": 9.894295641307637e-06,
      "loss": 3.0788,
      "step": 211515
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.475897789001465,
      "learning_rate": 9.893253784941956e-06,
      "loss": 3.0201,
      "step": 211516
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.097346305847168,
      "learning_rate": 9.892211982512632e-06,
      "loss": 3.2645,
      "step": 211517
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6572105884552,
      "learning_rate": 9.89117023401983e-06,
      "loss": 3.0886,
      "step": 211518
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.710634231567383,
      "learning_rate": 9.890128539463748e-06,
      "loss": 2.9559,
      "step": 211519
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.932614803314209,
      "learning_rate": 9.889086898844523e-06,
      "loss": 2.7818,
      "step": 211520
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9559342861175537,
      "learning_rate": 9.888045312162418e-06,
      "loss": 3.0524,
      "step": 211521
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.834792137145996,
      "learning_rate": 9.887003779417568e-06,
      "loss": 2.9266,
      "step": 211522
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.950544834136963,
      "learning_rate": 9.885962300610273e-06,
      "loss": 2.9499,
      "step": 211523
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.710911750793457,
      "learning_rate": 9.884920875740599e-06,
      "loss": 3.1095,
      "step": 211524
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0640993118286133,
      "learning_rate": 9.883879504808812e-06,
      "loss": 2.8214,
      "step": 211525
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.6998233795166016,
      "learning_rate": 9.882838187815079e-06,
      "loss": 2.8495,
      "step": 211526
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.283290147781372,
      "learning_rate": 9.881796924759599e-06,
      "loss": 2.9739,
      "step": 211527
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3093457221984863,
      "learning_rate": 9.880755715642574e-06,
      "loss": 2.9337,
      "step": 211528
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9854419231414795,
      "learning_rate": 9.879714560464203e-06,
      "loss": 2.9668,
      "step": 211529
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.916189670562744,
      "learning_rate": 9.878673459224684e-06,
      "loss": 3.0486,
      "step": 211530
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8737683296203613,
      "learning_rate": 9.877632411924153e-06,
      "loss": 2.8451,
      "step": 211531
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7645115852355957,
      "learning_rate": 9.876591418562874e-06,
      "loss": 2.8162,
      "step": 211532
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.252913236618042,
      "learning_rate": 9.875550479141014e-06,
      "loss": 2.9127,
      "step": 211533
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1367480754852295,
      "learning_rate": 9.874509593658741e-06,
      "loss": 3.455,
      "step": 211534
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.658601760864258,
      "learning_rate": 9.873468762116287e-06,
      "loss": 2.5578,
      "step": 211535
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.391289234161377,
      "learning_rate": 9.872427984513853e-06,
      "loss": 2.934,
      "step": 211536
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.3839025497436523,
      "learning_rate": 9.871387260851571e-06,
      "loss": 2.9983,
      "step": 211537
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9287424087524414,
      "learning_rate": 9.870346591129708e-06,
      "loss": 2.8202,
      "step": 211538
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.908696413040161,
      "learning_rate": 9.86930597534843e-06,
      "loss": 3.1354,
      "step": 211539
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.6618869304656982,
      "learning_rate": 9.868265413507903e-06,
      "loss": 2.8913,
      "step": 211540
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.176250696182251,
      "learning_rate": 9.867224905608362e-06,
      "loss": 3.0612,
      "step": 211541
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.512568950653076,
      "learning_rate": 9.866184451649972e-06,
      "loss": 2.7677,
      "step": 211542
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.395358085632324,
      "learning_rate": 9.865144051632901e-06,
      "loss": 2.7498,
      "step": 211543
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5432088375091553,
      "learning_rate": 9.864103705557414e-06,
      "loss": 2.9084,
      "step": 211544
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.823920488357544,
      "learning_rate": 9.863063413423643e-06,
      "loss": 3.013,
      "step": 211545
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7424376010894775,
      "learning_rate": 9.862023175231826e-06,
      "loss": 3.056,
      "step": 211546
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.529834270477295,
      "learning_rate": 9.860982990982158e-06,
      "loss": 2.967,
      "step": 211547
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9660120010375977,
      "learning_rate": 9.859942860674774e-06,
      "loss": 2.8744,
      "step": 211548
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.5661044120788574,
      "learning_rate": 9.858902784309907e-06,
      "loss": 2.9803,
      "step": 211549
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1494507789611816,
      "learning_rate": 9.857862761887757e-06,
      "loss": 3.0851,
      "step": 211550
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.866062641143799,
      "learning_rate": 9.856822793408459e-06,
      "loss": 2.8201,
      "step": 211551
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.399410724639893,
      "learning_rate": 9.85578287887231e-06,
      "loss": 2.7471,
      "step": 211552
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.498115301132202,
      "learning_rate": 9.85474301827941e-06,
      "loss": 2.9701,
      "step": 211553
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.7485463619232178,
      "learning_rate": 9.853703211630059e-06,
      "loss": 2.7656,
      "step": 211554
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2444467544555664,
      "learning_rate": 9.852663458924326e-06,
      "loss": 2.7277,
      "step": 211555
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.8086225986480713,
      "learning_rate": 9.851623760162475e-06,
      "loss": 2.7539,
      "step": 211556
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.456507682800293,
      "learning_rate": 9.850584115344672e-06,
      "loss": 2.8579,
      "step": 211557
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.4977080821990967,
      "learning_rate": 9.849544524471154e-06,
      "loss": 2.8603,
      "step": 211558
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9529707431793213,
      "learning_rate": 9.848504987542017e-06,
      "loss": 2.9069,
      "step": 211559
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.4397921562194824,
      "learning_rate": 9.847465504557628e-06,
      "loss": 2.9696,
      "step": 211560
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.443345546722412,
      "learning_rate": 9.846426075517989e-06,
      "loss": 2.9554,
      "step": 211561
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0117480754852295,
      "learning_rate": 9.845386700423396e-06,
      "loss": 2.865,
      "step": 211562
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.150974750518799,
      "learning_rate": 9.844347379273986e-06,
      "loss": 3.009,
      "step": 211563
    },
    {
      "epoch": 2.75,
      "grad_norm": 4.0815510749816895,
      "learning_rate": 9.843308112070026e-06,
      "loss": 2.7775,
      "step": 211564
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9178755283355713,
      "learning_rate": 9.842268898811678e-06,
      "loss": 3.1368,
      "step": 211565
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.040825605392456,
      "learning_rate": 9.841229739499146e-06,
      "loss": 2.8327,
      "step": 211566
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1574788093566895,
      "learning_rate": 9.840190634132561e-06,
      "loss": 2.8701,
      "step": 211567
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.961016893386841,
      "learning_rate": 9.839151582712191e-06,
      "loss": 2.7625,
      "step": 211568
    },
    {
      "epoch": 2.75,
      "grad_norm": 5.212052345275879,
      "learning_rate": 9.838112585238166e-06,
      "loss": 2.6265,
      "step": 211569
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.4113755226135254,
      "learning_rate": 9.837073641710757e-06,
      "loss": 2.9189,
      "step": 211570
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9736483097076416,
      "learning_rate": 9.836034752130061e-06,
      "loss": 2.9789,
      "step": 211571
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.669867515563965,
      "learning_rate": 9.834995916496413e-06,
      "loss": 3.0006,
      "step": 211572
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.2941477298736572,
      "learning_rate": 9.833957134809844e-06,
      "loss": 2.8592,
      "step": 211573
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.689671039581299,
      "learning_rate": 9.832918407070656e-06,
      "loss": 3.0246,
      "step": 211574
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.9911251068115234,
      "learning_rate": 9.831879733278947e-06,
      "loss": 2.788,
      "step": 211575
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.892698049545288,
      "learning_rate": 9.830841113435018e-06,
      "loss": 2.9591,
      "step": 211576
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.071092128753662,
      "learning_rate": 9.829802547539001e-06,
      "loss": 2.8701,
      "step": 211577
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.6470437049865723,
      "learning_rate": 9.82876403559113e-06,
      "loss": 2.5546,
      "step": 211578
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0441715717315674,
      "learning_rate": 9.827725577591539e-06,
      "loss": 2.7959,
      "step": 211579
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.183797836303711,
      "learning_rate": 9.826687173540459e-06,
      "loss": 2.8982,
      "step": 211580
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.1895668506622314,
      "learning_rate": 9.82564882343806e-06,
      "loss": 2.6138,
      "step": 211581
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.604950428009033,
      "learning_rate": 9.82461052728457e-06,
      "loss": 3.0148,
      "step": 211582
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0355939865112305,
      "learning_rate": 9.823572285080128e-06,
      "loss": 2.9296,
      "step": 211583
    },
    {
      "epoch": 2.75,
      "grad_norm": 2.99971342086792,
      "learning_rate": 9.82253409682503e-06,
      "loss": 2.8854,
      "step": 211584
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.748732805252075,
      "learning_rate": 9.821495962519344e-06,
      "loss": 2.9372,
      "step": 211585
    },
    {
      "epoch": 2.75,
      "grad_norm": 3.0494272708892822,
      "learning_rate": 9.820457882163335e-06,
      "loss": 3.0243,
      "step": 211586
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.067512273788452,
      "learning_rate": 9.819419855757137e-06,
      "loss": 2.8557,
      "step": 211587
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.390969753265381,
      "learning_rate": 9.818381883301052e-06,
      "loss": 3.2808,
      "step": 211588
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.985393524169922,
      "learning_rate": 9.817343964795177e-06,
      "loss": 2.7204,
      "step": 211589
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9322547912597656,
      "learning_rate": 9.816306100239745e-06,
      "loss": 2.9757,
      "step": 211590
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.374577522277832,
      "learning_rate": 9.815268289634925e-06,
      "loss": 3.003,
      "step": 211591
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8727304935455322,
      "learning_rate": 9.814230532980949e-06,
      "loss": 2.8005,
      "step": 211592
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2058353424072266,
      "learning_rate": 9.813192830277949e-06,
      "loss": 3.1004,
      "step": 211593
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7068300247192383,
      "learning_rate": 9.812155181526194e-06,
      "loss": 2.978,
      "step": 211594
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.74765682220459,
      "learning_rate": 9.811117586725814e-06,
      "loss": 2.8957,
      "step": 211595
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.920210123062134,
      "learning_rate": 9.810080045877044e-06,
      "loss": 2.8116,
      "step": 211596
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1706278324127197,
      "learning_rate": 9.809042558980051e-06,
      "loss": 2.463,
      "step": 211597
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.930697202682495,
      "learning_rate": 9.808005126035035e-06,
      "loss": 3.135,
      "step": 211598
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.11079478263855,
      "learning_rate": 9.80696774704216e-06,
      "loss": 3.1082,
      "step": 211599
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.659513473510742,
      "learning_rate": 9.805930422001695e-06,
      "loss": 2.8433,
      "step": 211600
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.151344060897827,
      "learning_rate": 9.80489315091374e-06,
      "loss": 3.0454,
      "step": 211601
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1915109157562256,
      "learning_rate": 9.80385593377856e-06,
      "loss": 2.8504,
      "step": 211602
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.798799991607666,
      "learning_rate": 9.802818770596321e-06,
      "loss": 3.0779,
      "step": 211603
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.249568462371826,
      "learning_rate": 9.801781661367225e-06,
      "loss": 2.8822,
      "step": 211604
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0475234985351562,
      "learning_rate": 9.800744606091404e-06,
      "loss": 2.9568,
      "step": 211605
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.937828302383423,
      "learning_rate": 9.799707604769159e-06,
      "loss": 2.958,
      "step": 211606
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0782530307769775,
      "learning_rate": 9.798670657400586e-06,
      "loss": 2.7933,
      "step": 211607
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0821712017059326,
      "learning_rate": 9.797633763985923e-06,
      "loss": 3.0465,
      "step": 211608
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9291701316833496,
      "learning_rate": 9.796596924525402e-06,
      "loss": 2.9291,
      "step": 211609
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.812016248703003,
      "learning_rate": 9.795560139019154e-06,
      "loss": 2.9815,
      "step": 211610
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.953986167907715,
      "learning_rate": 9.794523407467348e-06,
      "loss": 2.9924,
      "step": 211611
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.048609972000122,
      "learning_rate": 9.793486729870281e-06,
      "loss": 2.8334,
      "step": 211612
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.7787859439849854,
      "learning_rate": 9.792450106228023e-06,
      "loss": 2.6238,
      "step": 211613
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.338385581970215,
      "learning_rate": 9.791413536540838e-06,
      "loss": 2.9533,
      "step": 211614
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.754516839981079,
      "learning_rate": 9.79037702080896e-06,
      "loss": 2.861,
      "step": 211615
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0449867248535156,
      "learning_rate": 9.789340559032454e-06,
      "loss": 2.8771,
      "step": 211616
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.703780174255371,
      "learning_rate": 9.788304151211656e-06,
      "loss": 3.0018,
      "step": 211617
    },
    {
      "epoch": 2.76,
      "grad_norm": 5.535952568054199,
      "learning_rate": 9.787267797346665e-06,
      "loss": 2.7579,
      "step": 211618
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.887390375137329,
      "learning_rate": 9.78623149743768e-06,
      "loss": 2.7204,
      "step": 211619
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.680299997329712,
      "learning_rate": 9.785195251484935e-06,
      "loss": 2.9896,
      "step": 211620
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.127687454223633,
      "learning_rate": 9.784159059488594e-06,
      "loss": 2.8051,
      "step": 211621
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5300610065460205,
      "learning_rate": 9.783122921448827e-06,
      "loss": 2.7976,
      "step": 211622
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.967794418334961,
      "learning_rate": 9.782086837365898e-06,
      "loss": 2.9028,
      "step": 211623
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8232288360595703,
      "learning_rate": 9.781050807239943e-06,
      "loss": 3.1322,
      "step": 211624
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.6953721046447754,
      "learning_rate": 9.78001483107116e-06,
      "loss": 2.7815,
      "step": 211625
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.795290470123291,
      "learning_rate": 9.77897890885978e-06,
      "loss": 2.7271,
      "step": 211626
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.6952409744262695,
      "learning_rate": 9.77794304060594e-06,
      "loss": 2.887,
      "step": 211627
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5162315368652344,
      "learning_rate": 9.776907226309838e-06,
      "loss": 2.9923,
      "step": 211628
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7726316452026367,
      "learning_rate": 9.77587146597174e-06,
      "loss": 3.0179,
      "step": 211629
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.6415634155273438,
      "learning_rate": 9.774835759591714e-06,
      "loss": 3.0094,
      "step": 211630
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.862423896789551,
      "learning_rate": 9.773800107170093e-06,
      "loss": 2.9216,
      "step": 211631
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.303417682647705,
      "learning_rate": 9.772764508706975e-06,
      "loss": 2.9535,
      "step": 211632
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.067113161087036,
      "learning_rate": 9.771728964202596e-06,
      "loss": 3.1496,
      "step": 211633
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8986399173736572,
      "learning_rate": 9.770693473657121e-06,
      "loss": 2.8753,
      "step": 211634
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8466672897338867,
      "learning_rate": 9.769658037070748e-06,
      "loss": 2.6826,
      "step": 211635
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7485859394073486,
      "learning_rate": 9.768622654443647e-06,
      "loss": 2.9243,
      "step": 211636
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2705368995666504,
      "learning_rate": 9.767587325776082e-06,
      "loss": 2.8369,
      "step": 211637
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9454081058502197,
      "learning_rate": 9.766552051068154e-06,
      "loss": 3.2151,
      "step": 211638
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0282702445983887,
      "learning_rate": 9.765516830320164e-06,
      "loss": 2.972,
      "step": 211639
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.735313892364502,
      "learning_rate": 9.764481663532208e-06,
      "loss": 2.8506,
      "step": 211640
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.142056465148926,
      "learning_rate": 9.763446550704523e-06,
      "loss": 2.8507,
      "step": 211641
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0506508350372314,
      "learning_rate": 9.762411491837274e-06,
      "loss": 2.9426,
      "step": 211642
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.567676305770874,
      "learning_rate": 9.761376486930695e-06,
      "loss": 3.0565,
      "step": 211643
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.941606283187866,
      "learning_rate": 9.760341535984916e-06,
      "loss": 2.8454,
      "step": 211644
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.8985891342163086,
      "learning_rate": 9.759306639000242e-06,
      "loss": 3.0293,
      "step": 211645
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8062796592712402,
      "learning_rate": 9.758271795976702e-06,
      "loss": 2.9024,
      "step": 211646
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0893912315368652,
      "learning_rate": 9.757237006914631e-06,
      "loss": 2.945,
      "step": 211647
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0078847408294678,
      "learning_rate": 9.756202271814162e-06,
      "loss": 2.8971,
      "step": 211648
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.7903172969818115,
      "learning_rate": 9.755167590675494e-06,
      "loss": 2.904,
      "step": 211649
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.589315414428711,
      "learning_rate": 9.754132963498795e-06,
      "loss": 2.8911,
      "step": 211650
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.748814105987549,
      "learning_rate": 9.753098390284331e-06,
      "loss": 2.9781,
      "step": 211651
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.42756986618042,
      "learning_rate": 9.752063871032201e-06,
      "loss": 2.7845,
      "step": 211652
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.3208136558532715,
      "learning_rate": 9.751029405742673e-06,
      "loss": 3.0696,
      "step": 211653
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3286736011505127,
      "learning_rate": 9.749994994415843e-06,
      "loss": 2.9988,
      "step": 211654
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.000095844268799,
      "learning_rate": 9.74896063705205e-06,
      "loss": 3.0092,
      "step": 211655
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0465590953826904,
      "learning_rate": 9.747926333651324e-06,
      "loss": 2.7791,
      "step": 211656
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9493587017059326,
      "learning_rate": 9.746892084213997e-06,
      "loss": 2.8292,
      "step": 211657
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.813964366912842,
      "learning_rate": 9.745857888740206e-06,
      "loss": 2.9741,
      "step": 211658
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.898716926574707,
      "learning_rate": 9.744823747230147e-06,
      "loss": 3.0044,
      "step": 211659
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2230653762817383,
      "learning_rate": 9.743789659683953e-06,
      "loss": 2.8005,
      "step": 211660
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.018934965133667,
      "learning_rate": 9.742755626101895e-06,
      "loss": 3.1086,
      "step": 211661
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.972034215927124,
      "learning_rate": 9.741721646484102e-06,
      "loss": 2.7382,
      "step": 211662
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2839367389678955,
      "learning_rate": 9.740687720830843e-06,
      "loss": 2.9711,
      "step": 211663
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.267332077026367,
      "learning_rate": 9.739653849142282e-06,
      "loss": 2.8025,
      "step": 211664
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.531792402267456,
      "learning_rate": 9.738620031418586e-06,
      "loss": 3.0102,
      "step": 211665
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.4461662769317627,
      "learning_rate": 9.737586267659924e-06,
      "loss": 2.8439,
      "step": 211666
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.6410329341888428,
      "learning_rate": 9.736552557866561e-06,
      "loss": 3.1495,
      "step": 211667
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.070061683654785,
      "learning_rate": 9.735518902038631e-06,
      "loss": 2.9562,
      "step": 211668
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3296267986297607,
      "learning_rate": 9.734485300176364e-06,
      "loss": 2.743,
      "step": 211669
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.350268602371216,
      "learning_rate": 9.73345175227993e-06,
      "loss": 2.749,
      "step": 211670
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8122940063476562,
      "learning_rate": 9.732418258349561e-06,
      "loss": 2.575,
      "step": 211671
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7230262756347656,
      "learning_rate": 9.731384818385357e-06,
      "loss": 2.8949,
      "step": 211672
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.057241439819336,
      "learning_rate": 9.730351432387584e-06,
      "loss": 2.8543,
      "step": 211673
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.02085280418396,
      "learning_rate": 9.729318100356409e-06,
      "loss": 3.0744,
      "step": 211674
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.334771156311035,
      "learning_rate": 9.728284822292064e-06,
      "loss": 3.0757,
      "step": 211675
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.023219347000122,
      "learning_rate": 9.727251598194685e-06,
      "loss": 2.6092,
      "step": 211676
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.396031141281128,
      "learning_rate": 9.726218428064537e-06,
      "loss": 3.1969,
      "step": 211677
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.980891704559326,
      "learning_rate": 9.725185311901684e-06,
      "loss": 2.9751,
      "step": 211678
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.4531259536743164,
      "learning_rate": 9.724152249706463e-06,
      "loss": 2.687,
      "step": 211679
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.36120343208313,
      "learning_rate": 9.723119241478938e-06,
      "loss": 3.1104,
      "step": 211680
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.347170114517212,
      "learning_rate": 9.722086287219445e-06,
      "loss": 2.702,
      "step": 211681
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2996013164520264,
      "learning_rate": 9.721053386928046e-06,
      "loss": 2.5645,
      "step": 211682
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2657549381256104,
      "learning_rate": 9.72002054060501e-06,
      "loss": 2.9113,
      "step": 211683
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8549840450286865,
      "learning_rate": 9.718987748250473e-06,
      "loss": 2.8736,
      "step": 211684
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7861900329589844,
      "learning_rate": 9.717955009864664e-06,
      "loss": 2.9841,
      "step": 211685
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8523268699645996,
      "learning_rate": 9.716922325447751e-06,
      "loss": 2.8317,
      "step": 211686
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.834179401397705,
      "learning_rate": 9.715889694999967e-06,
      "loss": 3.0217,
      "step": 211687
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0939695835113525,
      "learning_rate": 9.71485711852148e-06,
      "loss": 2.6412,
      "step": 211688
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7143468856811523,
      "learning_rate": 9.713824596012488e-06,
      "loss": 2.9636,
      "step": 211689
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.836097002029419,
      "learning_rate": 9.712792127473123e-06,
      "loss": 2.6902,
      "step": 211690
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5955448150634766,
      "learning_rate": 9.711759712903688e-06,
      "loss": 2.8606,
      "step": 211691
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.272556781768799,
      "learning_rate": 9.710727352304282e-06,
      "loss": 2.8706,
      "step": 211692
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0025055408477783,
      "learning_rate": 9.709695045675136e-06,
      "loss": 2.9186,
      "step": 211693
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.325007915496826,
      "learning_rate": 9.708662793016486e-06,
      "loss": 2.9288,
      "step": 211694
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7331230640411377,
      "learning_rate": 9.70763059432843e-06,
      "loss": 2.7991,
      "step": 211695
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9500906467437744,
      "learning_rate": 9.706598449611203e-06,
      "loss": 3.2148,
      "step": 211696
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.365946054458618,
      "learning_rate": 9.705566358865003e-06,
      "loss": 2.8029,
      "step": 211697
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1265270709991455,
      "learning_rate": 9.704534322089997e-06,
      "loss": 2.9838,
      "step": 211698
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.269181728363037,
      "learning_rate": 9.70350233928645e-06,
      "loss": 2.8847,
      "step": 211699
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8718419075012207,
      "learning_rate": 9.702470410454465e-06,
      "loss": 3.0737,
      "step": 211700
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8231406211853027,
      "learning_rate": 9.701438535594309e-06,
      "loss": 2.8806,
      "step": 211701
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.698075294494629,
      "learning_rate": 9.700406714706076e-06,
      "loss": 2.9615,
      "step": 211702
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1323795318603516,
      "learning_rate": 9.69937494779004e-06,
      "loss": 3.0725,
      "step": 211703
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.625366687774658,
      "learning_rate": 9.698343234846362e-06,
      "loss": 3.1177,
      "step": 211704
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.535310983657837,
      "learning_rate": 9.697311575875278e-06,
      "loss": 2.7189,
      "step": 211705
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.835031509399414,
      "learning_rate": 9.69627997087692e-06,
      "loss": 2.8151,
      "step": 211706
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.346057891845703,
      "learning_rate": 9.695248419851487e-06,
      "loss": 2.8863,
      "step": 211707
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5401580333709717,
      "learning_rate": 9.694216922799214e-06,
      "loss": 2.9502,
      "step": 211708
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5954604148864746,
      "learning_rate": 9.693185479720267e-06,
      "loss": 2.8164,
      "step": 211709
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.6954026222229004,
      "learning_rate": 9.692154090614811e-06,
      "loss": 2.9771,
      "step": 211710
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.8558478355407715,
      "learning_rate": 9.691122755483083e-06,
      "loss": 2.7951,
      "step": 211711
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.189250946044922,
      "learning_rate": 9.690091474325278e-06,
      "loss": 3.1031,
      "step": 211712
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9661977291107178,
      "learning_rate": 9.689060247141533e-06,
      "loss": 2.9166,
      "step": 211713
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3847572803497314,
      "learning_rate": 9.68802907393208e-06,
      "loss": 2.9662,
      "step": 211714
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7764153480529785,
      "learning_rate": 9.686997954697085e-06,
      "loss": 2.9335,
      "step": 211715
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7446112632751465,
      "learning_rate": 9.68596688943678e-06,
      "loss": 2.9601,
      "step": 211716
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0598878860473633,
      "learning_rate": 9.684935878151334e-06,
      "loss": 2.7324,
      "step": 211717
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.220554828643799,
      "learning_rate": 9.683904920840945e-06,
      "loss": 2.8132,
      "step": 211718
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7729740142822266,
      "learning_rate": 9.682874017505749e-06,
      "loss": 3.0455,
      "step": 211719
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.018190860748291,
      "learning_rate": 9.681843168146042e-06,
      "loss": 2.9752,
      "step": 211720
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.834545612335205,
      "learning_rate": 9.680812372761926e-06,
      "loss": 2.9796,
      "step": 211721
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.890059232711792,
      "learning_rate": 9.679781631353634e-06,
      "loss": 2.6132,
      "step": 211722
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.946549892425537,
      "learning_rate": 9.678750943921398e-06,
      "loss": 2.7778,
      "step": 211723
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.908780813217163,
      "learning_rate": 9.677720310465287e-06,
      "loss": 2.6137,
      "step": 211724
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1393139362335205,
      "learning_rate": 9.676689730985632e-06,
      "loss": 3.2383,
      "step": 211725
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1901090145111084,
      "learning_rate": 9.675659205482533e-06,
      "loss": 2.9668,
      "step": 211726
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7078232765197754,
      "learning_rate": 9.67462873395619e-06,
      "loss": 2.9843,
      "step": 211727
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.6204347610473633,
      "learning_rate": 9.67359831640684e-06,
      "loss": 2.8494,
      "step": 211728
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.288516044616699,
      "learning_rate": 9.672567952834609e-06,
      "loss": 2.752,
      "step": 211729
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5681545734405518,
      "learning_rate": 9.671537643239768e-06,
      "loss": 3.0879,
      "step": 211730
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8206121921539307,
      "learning_rate": 9.670507387622484e-06,
      "loss": 2.659,
      "step": 211731
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1784870624542236,
      "learning_rate": 9.66947718598292e-06,
      "loss": 2.9781,
      "step": 211732
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8985774517059326,
      "learning_rate": 9.668447038321247e-06,
      "loss": 2.8867,
      "step": 211733
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.500943422317505,
      "learning_rate": 9.66741694463773e-06,
      "loss": 3.1581,
      "step": 211734
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.305941343307495,
      "learning_rate": 9.666386904932467e-06,
      "loss": 2.7361,
      "step": 211735
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1651599407196045,
      "learning_rate": 9.665356919205759e-06,
      "loss": 2.5201,
      "step": 211736
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1645350456237793,
      "learning_rate": 9.664326987457738e-06,
      "loss": 3.0485,
      "step": 211737
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.918278932571411,
      "learning_rate": 9.663297109688606e-06,
      "loss": 2.8194,
      "step": 211738
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5893890857696533,
      "learning_rate": 9.662267285898496e-06,
      "loss": 2.7593,
      "step": 211739
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.810127019882202,
      "learning_rate": 9.661237516087706e-06,
      "loss": 2.9917,
      "step": 211740
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0801424980163574,
      "learning_rate": 9.660207800256303e-06,
      "loss": 2.9192,
      "step": 211741
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7993428707122803,
      "learning_rate": 9.659178138404622e-06,
      "loss": 2.939,
      "step": 211742
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.54392147064209,
      "learning_rate": 9.65814853053276e-06,
      "loss": 3.0183,
      "step": 211743
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8990375995635986,
      "learning_rate": 9.657118976640922e-06,
      "loss": 3.0267,
      "step": 211744
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.375483512878418,
      "learning_rate": 9.656089476729301e-06,
      "loss": 2.575,
      "step": 211745
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.4051148891448975,
      "learning_rate": 9.6550600307981e-06,
      "loss": 3.0965,
      "step": 211746
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.498897075653076,
      "learning_rate": 9.654030638847487e-06,
      "loss": 2.9994,
      "step": 211747
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.6212849617004395,
      "learning_rate": 9.653001300877694e-06,
      "loss": 2.6954,
      "step": 211748
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.4581432342529297,
      "learning_rate": 9.651972016888887e-06,
      "loss": 2.921,
      "step": 211749
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8279802799224854,
      "learning_rate": 9.650942786881266e-06,
      "loss": 2.7812,
      "step": 211750
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.114319324493408,
      "learning_rate": 9.649913610854965e-06,
      "loss": 2.6908,
      "step": 211751
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2021644115448,
      "learning_rate": 9.648884488810282e-06,
      "loss": 2.9267,
      "step": 211752
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.054687738418579,
      "learning_rate": 9.647855420747286e-06,
      "loss": 2.9922,
      "step": 211753
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.69844651222229,
      "learning_rate": 9.646826406666308e-06,
      "loss": 2.9137,
      "step": 211754
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.4876599311828613,
      "learning_rate": 9.645797446567449e-06,
      "loss": 3.0619,
      "step": 211755
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.725283622741699,
      "learning_rate": 9.644768540450909e-06,
      "loss": 3.0083,
      "step": 211756
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.275268077850342,
      "learning_rate": 9.643739688316854e-06,
      "loss": 2.9451,
      "step": 211757
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0317442417144775,
      "learning_rate": 9.64271089016555e-06,
      "loss": 2.9835,
      "step": 211758
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9443109035491943,
      "learning_rate": 9.641682145997132e-06,
      "loss": 2.9501,
      "step": 211759
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9511594772338867,
      "learning_rate": 9.640653455811831e-06,
      "loss": 2.7154,
      "step": 211760
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9529175758361816,
      "learning_rate": 9.639624819609782e-06,
      "loss": 2.7663,
      "step": 211761
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.20269513130188,
      "learning_rate": 9.63859623739125e-06,
      "loss": 3.0733,
      "step": 211762
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2366840839385986,
      "learning_rate": 9.637567709156335e-06,
      "loss": 2.8916,
      "step": 211763
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.251430034637451,
      "learning_rate": 9.636539234905304e-06,
      "loss": 2.8593,
      "step": 211764
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.316985845565796,
      "learning_rate": 9.635510814638293e-06,
      "loss": 3.2226,
      "step": 211765
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9008612632751465,
      "learning_rate": 9.63448244835553e-06,
      "loss": 2.7049,
      "step": 211766
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3725106716156006,
      "learning_rate": 9.633454136057218e-06,
      "loss": 2.8948,
      "step": 211767
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1117422580718994,
      "learning_rate": 9.632425877743522e-06,
      "loss": 3.0033,
      "step": 211768
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.358786106109619,
      "learning_rate": 9.631397673414643e-06,
      "loss": 2.9975,
      "step": 211769
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.820512533187866,
      "learning_rate": 9.630369523070747e-06,
      "loss": 3.1138,
      "step": 211770
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1582367420196533,
      "learning_rate": 9.629341426712034e-06,
      "loss": 3.0495,
      "step": 211771
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1617887020111084,
      "learning_rate": 9.62831338433877e-06,
      "loss": 2.8878,
      "step": 211772
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.716653823852539,
      "learning_rate": 9.627285395951056e-06,
      "loss": 3.4422,
      "step": 211773
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.252819061279297,
      "learning_rate": 9.626257461549092e-06,
      "loss": 2.7335,
      "step": 211774
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.286973476409912,
      "learning_rate": 9.625229581133076e-06,
      "loss": 2.9276,
      "step": 211775
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.632314682006836,
      "learning_rate": 9.624201754703276e-06,
      "loss": 2.8674,
      "step": 211776
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.075932741165161,
      "learning_rate": 9.623173982259723e-06,
      "loss": 2.9515,
      "step": 211777
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.160888671875,
      "learning_rate": 9.622146263802789e-06,
      "loss": 2.6593,
      "step": 211778
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.69012713432312,
      "learning_rate": 9.621118599332534e-06,
      "loss": 2.8255,
      "step": 211779
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1314404010772705,
      "learning_rate": 9.620090988849228e-06,
      "loss": 2.8641,
      "step": 211780
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.633904457092285,
      "learning_rate": 9.619063432353002e-06,
      "loss": 2.9016,
      "step": 211781
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8839058876037598,
      "learning_rate": 9.618035929844091e-06,
      "loss": 2.885,
      "step": 211782
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2286734580993652,
      "learning_rate": 9.61700848132263e-06,
      "loss": 3.0512,
      "step": 211783
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.418846368789673,
      "learning_rate": 9.615981086788882e-06,
      "loss": 2.8732,
      "step": 211784
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9153571128845215,
      "learning_rate": 9.614953746243015e-06,
      "loss": 2.908,
      "step": 211785
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.91786527633667,
      "learning_rate": 9.613926459685195e-06,
      "loss": 2.6581,
      "step": 211786
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7943289279937744,
      "learning_rate": 9.61289922711559e-06,
      "loss": 2.9307,
      "step": 211787
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.867922067642212,
      "learning_rate": 9.611872048534497e-06,
      "loss": 2.8846,
      "step": 211788
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3043301105499268,
      "learning_rate": 9.610844923941951e-06,
      "loss": 2.9268,
      "step": 211789
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.075892448425293,
      "learning_rate": 9.60981785333832e-06,
      "loss": 2.8597,
      "step": 211790
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3706278800964355,
      "learning_rate": 9.608790836723634e-06,
      "loss": 2.8504,
      "step": 211791
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.712615489959717,
      "learning_rate": 9.607763874098196e-06,
      "loss": 2.888,
      "step": 211792
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.595940351486206,
      "learning_rate": 9.606736965462169e-06,
      "loss": 2.8993,
      "step": 211793
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.180868625640869,
      "learning_rate": 9.605710110815724e-06,
      "loss": 3.2107,
      "step": 211794
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9501731395721436,
      "learning_rate": 9.604683310159023e-06,
      "loss": 2.9281,
      "step": 211795
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.885451555252075,
      "learning_rate": 9.603656563492335e-06,
      "loss": 3.0308,
      "step": 211796
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1899662017822266,
      "learning_rate": 9.602629870815758e-06,
      "loss": 2.8117,
      "step": 211797
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.029784917831421,
      "learning_rate": 9.601603232129596e-06,
      "loss": 2.8006,
      "step": 211798
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7303640842437744,
      "learning_rate": 9.600576647433945e-06,
      "loss": 2.9529,
      "step": 211799
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.37908935546875,
      "learning_rate": 9.599550116729004e-06,
      "loss": 2.8786,
      "step": 211800
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9312469959259033,
      "learning_rate": 9.598523640015043e-06,
      "loss": 2.9853,
      "step": 211801
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.033881187438965,
      "learning_rate": 9.597497217292194e-06,
      "loss": 3.0758,
      "step": 211802
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.4564871788024902,
      "learning_rate": 9.59647084856059e-06,
      "loss": 2.8148,
      "step": 211803
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5131185054779053,
      "learning_rate": 9.59544453382053e-06,
      "loss": 2.9734,
      "step": 211804
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9422452449798584,
      "learning_rate": 9.59441827307218e-06,
      "loss": 3.0817,
      "step": 211805
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0414326190948486,
      "learning_rate": 9.593392066315675e-06,
      "loss": 3.0152,
      "step": 211806
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0487663745880127,
      "learning_rate": 9.592365913551248e-06,
      "loss": 2.8735,
      "step": 211807
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8582773208618164,
      "learning_rate": 9.591339814779097e-06,
      "loss": 2.9519,
      "step": 211808
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.629023313522339,
      "learning_rate": 9.590313769999359e-06,
      "loss": 2.8553,
      "step": 211809
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9853508472442627,
      "learning_rate": 9.589287779212296e-06,
      "loss": 2.8361,
      "step": 211810
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.9834048748016357,
      "learning_rate": 9.588261842418077e-06,
      "loss": 3.0574,
      "step": 211811
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0799498558044434,
      "learning_rate": 9.587235959616834e-06,
      "loss": 2.8765,
      "step": 211812
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.416346788406372,
      "learning_rate": 9.586210130808835e-06,
      "loss": 2.6236,
      "step": 211813
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.300047874450684,
      "learning_rate": 9.585184355994213e-06,
      "loss": 3.0407,
      "step": 211814
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.97155499458313,
      "learning_rate": 9.584158635173233e-06,
      "loss": 2.7976,
      "step": 211815
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.835188388824463,
      "learning_rate": 9.58313296834603e-06,
      "loss": 2.8985,
      "step": 211816
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0028746128082275,
      "learning_rate": 9.582107355512803e-06,
      "loss": 2.9056,
      "step": 211817
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.158036231994629,
      "learning_rate": 9.581081796673718e-06,
      "loss": 2.7944,
      "step": 211818
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.71297287940979,
      "learning_rate": 9.580056291829042e-06,
      "loss": 3.1091,
      "step": 211819
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9332292079925537,
      "learning_rate": 9.579030840978842e-06,
      "loss": 2.6683,
      "step": 211820
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7955269813537598,
      "learning_rate": 9.57800544412345e-06,
      "loss": 2.9713,
      "step": 211821
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.146813154220581,
      "learning_rate": 9.576980101262965e-06,
      "loss": 2.5128,
      "step": 211822
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.6333839893341064,
      "learning_rate": 9.575954812397623e-06,
      "loss": 3.2686,
      "step": 211823
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2829928398132324,
      "learning_rate": 9.574929577527556e-06,
      "loss": 2.9401,
      "step": 211824
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.874560594558716,
      "learning_rate": 9.573904396652999e-06,
      "loss": 2.9695,
      "step": 211825
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.958026647567749,
      "learning_rate": 9.572879269774148e-06,
      "loss": 3.172,
      "step": 211826
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9839136600494385,
      "learning_rate": 9.57185419689117e-06,
      "loss": 2.9237,
      "step": 211827
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.382765769958496,
      "learning_rate": 9.570829178004302e-06,
      "loss": 2.8798,
      "step": 211828
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5793330669403076,
      "learning_rate": 9.569804213113675e-06,
      "loss": 2.8559,
      "step": 211829
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0757412910461426,
      "learning_rate": 9.568779302219453e-06,
      "loss": 3.0328,
      "step": 211830
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.033810615539551,
      "learning_rate": 9.56775444532194e-06,
      "loss": 2.9696,
      "step": 211831
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.928001880645752,
      "learning_rate": 9.566729642421233e-06,
      "loss": 2.9253,
      "step": 211832
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2440545558929443,
      "learning_rate": 9.565704893517568e-06,
      "loss": 2.8275,
      "step": 211833
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0605664253234863,
      "learning_rate": 9.56468019861114e-06,
      "loss": 2.835,
      "step": 211834
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.6071372032165527,
      "learning_rate": 9.563655557702122e-06,
      "loss": 2.7845,
      "step": 211835
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5415074825286865,
      "learning_rate": 9.562630970790642e-06,
      "loss": 2.8612,
      "step": 211836
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5821003913879395,
      "learning_rate": 9.561606437877e-06,
      "loss": 3.0797,
      "step": 211837
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.70595121383667,
      "learning_rate": 9.560581958961334e-06,
      "loss": 2.9432,
      "step": 211838
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1211512088775635,
      "learning_rate": 9.559557534043838e-06,
      "loss": 2.8295,
      "step": 211839
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.961045503616333,
      "learning_rate": 9.558533163124716e-06,
      "loss": 2.5477,
      "step": 211840
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0528459548950195,
      "learning_rate": 9.557508846204132e-06,
      "loss": 2.8934,
      "step": 211841
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.7955117225646973,
      "learning_rate": 9.556484583282253e-06,
      "loss": 3.0701,
      "step": 211842
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1829147338867188,
      "learning_rate": 9.555460374359348e-06,
      "loss": 3.0674,
      "step": 211843
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0952041149139404,
      "learning_rate": 9.554436219435547e-06,
      "loss": 2.8759,
      "step": 211844
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.700491189956665,
      "learning_rate": 9.55341211851105e-06,
      "loss": 2.6944,
      "step": 211845
    },
    {
      "epoch": 2.76,
      "grad_norm": 5.061702728271484,
      "learning_rate": 9.552388071586092e-06,
      "loss": 2.7434,
      "step": 211846
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.927501678466797,
      "learning_rate": 9.551364078660839e-06,
      "loss": 2.8712,
      "step": 211847
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8141510486602783,
      "learning_rate": 9.550340139735391e-06,
      "loss": 3.0495,
      "step": 211848
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0062716007232666,
      "learning_rate": 9.54931625481008e-06,
      "loss": 2.8581,
      "step": 211849
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.587582588195801,
      "learning_rate": 9.548292423885007e-06,
      "loss": 3.0246,
      "step": 211850
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5958824157714844,
      "learning_rate": 9.547268646960404e-06,
      "loss": 3.0893,
      "step": 211851
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.4059979915618896,
      "learning_rate": 9.546244924036472e-06,
      "loss": 2.8014,
      "step": 211852
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3072056770324707,
      "learning_rate": 9.545221255113344e-06,
      "loss": 2.938,
      "step": 211853
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1022439002990723,
      "learning_rate": 9.54419764019122e-06,
      "loss": 2.6858,
      "step": 211854
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.709951400756836,
      "learning_rate": 9.543174079270365e-06,
      "loss": 2.8042,
      "step": 211855
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.075673580169678,
      "learning_rate": 9.542150572350882e-06,
      "loss": 2.7321,
      "step": 211856
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5987703800201416,
      "learning_rate": 9.541127119433e-06,
      "loss": 2.77,
      "step": 211857
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1332404613494873,
      "learning_rate": 9.54010372051689e-06,
      "loss": 2.6737,
      "step": 211858
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.833225727081299,
      "learning_rate": 9.539080375602815e-06,
      "loss": 3.0614,
      "step": 211859
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9829397201538086,
      "learning_rate": 9.538057084690842e-06,
      "loss": 3.1002,
      "step": 211860
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.030841827392578,
      "learning_rate": 9.537033847781273e-06,
      "loss": 2.7408,
      "step": 211861
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9624898433685303,
      "learning_rate": 9.536010664874238e-06,
      "loss": 2.996,
      "step": 211862
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.320880651473999,
      "learning_rate": 9.53498753596994e-06,
      "loss": 3.1428,
      "step": 211863
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.949273109436035,
      "learning_rate": 9.533964461068545e-06,
      "loss": 2.9706,
      "step": 211864
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7963614463806152,
      "learning_rate": 9.53294144017035e-06,
      "loss": 2.9317,
      "step": 211865
    },
    {
      "epoch": 2.76,
      "grad_norm": 5.960292816162109,
      "learning_rate": 9.531918473275357e-06,
      "loss": 3.0327,
      "step": 211866
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.969186305999756,
      "learning_rate": 9.530895560383933e-06,
      "loss": 2.8736,
      "step": 211867
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.892155170440674,
      "learning_rate": 9.529872701496177e-06,
      "loss": 3.0034,
      "step": 211868
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.252654790878296,
      "learning_rate": 9.52884989661229e-06,
      "loss": 3.1166,
      "step": 211869
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.4780938625335693,
      "learning_rate": 9.52782714573247e-06,
      "loss": 3.1374,
      "step": 211870
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.013585090637207,
      "learning_rate": 9.526804448856983e-06,
      "loss": 2.8172,
      "step": 211871
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.856189250946045,
      "learning_rate": 9.525781805985865e-06,
      "loss": 2.9426,
      "step": 211872
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2258095741271973,
      "learning_rate": 9.524759217119415e-06,
      "loss": 2.9042,
      "step": 211873
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3016319274902344,
      "learning_rate": 9.523736682257765e-06,
      "loss": 2.7163,
      "step": 211874
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.79841685295105,
      "learning_rate": 9.522714201401183e-06,
      "loss": 3.0853,
      "step": 211875
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.426616907119751,
      "learning_rate": 9.521691774549767e-06,
      "loss": 3.0085,
      "step": 211876
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.228281259536743,
      "learning_rate": 9.520669401703785e-06,
      "loss": 2.9248,
      "step": 211877
    },
    {
      "epoch": 2.76,
      "grad_norm": 5.426458358764648,
      "learning_rate": 9.519647082863402e-06,
      "loss": 3.1126,
      "step": 211878
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3624532222747803,
      "learning_rate": 9.518624818028785e-06,
      "loss": 2.9882,
      "step": 211879
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.66850209236145,
      "learning_rate": 9.517602607200104e-06,
      "loss": 3.1281,
      "step": 211880
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3704824447631836,
      "learning_rate": 9.516580450377654e-06,
      "loss": 3.0256,
      "step": 211881
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.126136302947998,
      "learning_rate": 9.51555834756147e-06,
      "loss": 3.0259,
      "step": 211882
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5723836421966553,
      "learning_rate": 9.514536298751885e-06,
      "loss": 2.7221,
      "step": 211883
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2900099754333496,
      "learning_rate": 9.513514303949033e-06,
      "loss": 2.9159,
      "step": 211884
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9619696140289307,
      "learning_rate": 9.512492363153112e-06,
      "loss": 2.9257,
      "step": 211885
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7357723712921143,
      "learning_rate": 9.511470476364258e-06,
      "loss": 2.8435,
      "step": 211886
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.911344528198242,
      "learning_rate": 9.510448643582736e-06,
      "loss": 2.8132,
      "step": 211887
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2099645137786865,
      "learning_rate": 9.509426864808678e-06,
      "loss": 2.9499,
      "step": 211888
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.234773635864258,
      "learning_rate": 9.50840514004232e-06,
      "loss": 2.8661,
      "step": 211889
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8674561977386475,
      "learning_rate": 9.507383469283858e-06,
      "loss": 2.74,
      "step": 211890
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7823853492736816,
      "learning_rate": 9.506361852533396e-06,
      "loss": 3.0663,
      "step": 211891
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.734666347503662,
      "learning_rate": 9.50534028979123e-06,
      "loss": 2.9524,
      "step": 211892
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8924267292022705,
      "learning_rate": 9.504318781057496e-06,
      "loss": 3.0193,
      "step": 211893
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8824594020843506,
      "learning_rate": 9.50329732633236e-06,
      "loss": 2.6412,
      "step": 211894
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.884098529815674,
      "learning_rate": 9.502275925616087e-06,
      "loss": 3.1441,
      "step": 211895
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3046250343322754,
      "learning_rate": 9.501254578908846e-06,
      "loss": 2.9337,
      "step": 211896
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9064431190490723,
      "learning_rate": 9.500233286210735e-06,
      "loss": 2.9401,
      "step": 211897
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7023837566375732,
      "learning_rate": 9.499212047522053e-06,
      "loss": 3.0198,
      "step": 211898
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5693440437316895,
      "learning_rate": 9.498190862842937e-06,
      "loss": 2.7267,
      "step": 211899
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.17667818069458,
      "learning_rate": 9.497169732173616e-06,
      "loss": 2.8694,
      "step": 211900
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.057992458343506,
      "learning_rate": 9.496148655514258e-06,
      "loss": 3.0418,
      "step": 211901
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.158108711242676,
      "learning_rate": 9.49512763286503e-06,
      "loss": 3.0156,
      "step": 211902
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7198874950408936,
      "learning_rate": 9.494106664226131e-06,
      "loss": 2.9599,
      "step": 211903
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.903273582458496,
      "learning_rate": 9.493085749597795e-06,
      "loss": 3.0565,
      "step": 211904
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.2715630531311035,
      "learning_rate": 9.492064888980123e-06,
      "loss": 3.064,
      "step": 211905
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.6311745643615723,
      "learning_rate": 9.491044082373412e-06,
      "loss": 2.9647,
      "step": 211906
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1732330322265625,
      "learning_rate": 9.490023329777796e-06,
      "loss": 3.0721,
      "step": 211907
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.258053779602051,
      "learning_rate": 9.489002631193476e-06,
      "loss": 3.0462,
      "step": 211908
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.452786445617676,
      "learning_rate": 9.487981986620585e-06,
      "loss": 2.9002,
      "step": 211909
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1300578117370605,
      "learning_rate": 9.48696139605939e-06,
      "loss": 2.7781,
      "step": 211910
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0567026138305664,
      "learning_rate": 9.485940859510055e-06,
      "loss": 2.982,
      "step": 211911
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5935592651367188,
      "learning_rate": 9.48492037697275e-06,
      "loss": 3.1651,
      "step": 211912
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.196521759033203,
      "learning_rate": 9.483899948447738e-06,
      "loss": 2.7596,
      "step": 211913
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3491568565368652,
      "learning_rate": 9.48287957393512e-06,
      "loss": 2.7964,
      "step": 211914
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0370585918426514,
      "learning_rate": 9.481859253435064e-06,
      "loss": 2.9873,
      "step": 211915
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5132696628570557,
      "learning_rate": 9.480838986947902e-06,
      "loss": 2.9354,
      "step": 211916
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.6667745113372803,
      "learning_rate": 9.479818774473668e-06,
      "loss": 3.0549,
      "step": 211917
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.235240936279297,
      "learning_rate": 9.478798616012662e-06,
      "loss": 2.8887,
      "step": 211918
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.9635021686553955,
      "learning_rate": 9.477778511565016e-06,
      "loss": 2.606,
      "step": 211919
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5659375190734863,
      "learning_rate": 9.476758461130963e-06,
      "loss": 3.2355,
      "step": 211920
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0849597454071045,
      "learning_rate": 9.475738464710603e-06,
      "loss": 2.8694,
      "step": 211921
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.233905076980591,
      "learning_rate": 9.474718522304236e-06,
      "loss": 2.8248,
      "step": 211922
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.506655216217041,
      "learning_rate": 9.473698633911963e-06,
      "loss": 2.9131,
      "step": 211923
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.258643388748169,
      "learning_rate": 9.47267879953405e-06,
      "loss": 2.7889,
      "step": 211924
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5906522274017334,
      "learning_rate": 9.47165901917063e-06,
      "loss": 2.8332,
      "step": 211925
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2012557983398438,
      "learning_rate": 9.470639292821935e-06,
      "loss": 2.8544,
      "step": 211926
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9511992931365967,
      "learning_rate": 9.4696196204881e-06,
      "loss": 2.9343,
      "step": 211927
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.101264715194702,
      "learning_rate": 9.468600002169391e-06,
      "loss": 3.0288,
      "step": 211928
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.511573076248169,
      "learning_rate": 9.467580437865907e-06,
      "loss": 2.8123,
      "step": 211929
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.607169151306152,
      "learning_rate": 9.466560927577916e-06,
      "loss": 3.0235,
      "step": 211930
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.339585781097412,
      "learning_rate": 9.465541471305549e-06,
      "loss": 2.992,
      "step": 211931
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.114953994750977,
      "learning_rate": 9.464522069049075e-06,
      "loss": 3.3377,
      "step": 211932
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.712184190750122,
      "learning_rate": 9.463502720808591e-06,
      "loss": 2.8195,
      "step": 211933
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.999616861343384,
      "learning_rate": 9.462483426584334e-06,
      "loss": 2.8871,
      "step": 211934
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.918546676635742,
      "learning_rate": 9.461464186376467e-06,
      "loss": 3.021,
      "step": 211935
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.891335964202881,
      "learning_rate": 9.46044500018519e-06,
      "loss": 3.0155,
      "step": 211936
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5277483463287354,
      "learning_rate": 9.459425868010705e-06,
      "loss": 3.0876,
      "step": 211937
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0229227542877197,
      "learning_rate": 9.458406789853279e-06,
      "loss": 3.0145,
      "step": 211938
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5020713806152344,
      "learning_rate": 9.457387765712942e-06,
      "loss": 2.7762,
      "step": 211939
    },
    {
      "epoch": 2.76,
      "grad_norm": 5.178700923919678,
      "learning_rate": 9.456368795589964e-06,
      "loss": 3.0833,
      "step": 211940
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9580607414245605,
      "learning_rate": 9.455349879484541e-06,
      "loss": 2.523,
      "step": 211941
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.631335973739624,
      "learning_rate": 9.454331017396844e-06,
      "loss": 2.8107,
      "step": 211942
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1861424446105957,
      "learning_rate": 9.453312209327069e-06,
      "loss": 2.7694,
      "step": 211943
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.058753490447998,
      "learning_rate": 9.45229345527545e-06,
      "loss": 2.9121,
      "step": 211944
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8867814540863037,
      "learning_rate": 9.451274755242088e-06,
      "loss": 3.063,
      "step": 211945
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.323951482772827,
      "learning_rate": 9.45025610922725e-06,
      "loss": 2.9517,
      "step": 211946
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0717270374298096,
      "learning_rate": 9.449237517231068e-06,
      "loss": 2.8996,
      "step": 211947
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.4375858306884766,
      "learning_rate": 9.448218979253774e-06,
      "loss": 2.8895,
      "step": 211948
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.832179307937622,
      "learning_rate": 9.447200495295504e-06,
      "loss": 3.0008,
      "step": 211949
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5956931114196777,
      "learning_rate": 9.446182065356556e-06,
      "loss": 3.0461,
      "step": 211950
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0637547969818115,
      "learning_rate": 9.445163689436997e-06,
      "loss": 2.9755,
      "step": 211951
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.695469856262207,
      "learning_rate": 9.444145367537092e-06,
      "loss": 2.9301,
      "step": 211952
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.073472261428833,
      "learning_rate": 9.443127099656945e-06,
      "loss": 2.8553,
      "step": 211953
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3007988929748535,
      "learning_rate": 9.442108885796884e-06,
      "loss": 2.7821,
      "step": 211954
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0858521461486816,
      "learning_rate": 9.441090725956946e-06,
      "loss": 2.8726,
      "step": 211955
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.753307342529297,
      "learning_rate": 9.440072620137496e-06,
      "loss": 2.926,
      "step": 211956
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.7405929565429688,
      "learning_rate": 9.439054568338533e-06,
      "loss": 2.7389,
      "step": 211957
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0244832038879395,
      "learning_rate": 9.438036570560358e-06,
      "loss": 2.9438,
      "step": 211958
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.6501376628875732,
      "learning_rate": 9.437018626803106e-06,
      "loss": 2.7863,
      "step": 211959
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.166027545928955,
      "learning_rate": 9.43600073706704e-06,
      "loss": 3.3471,
      "step": 211960
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1314539909362793,
      "learning_rate": 9.434982901352295e-06,
      "loss": 2.9646,
      "step": 211961
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.852454423904419,
      "learning_rate": 9.433965119659071e-06,
      "loss": 2.93,
      "step": 211962
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.988893508911133,
      "learning_rate": 9.432947391987567e-06,
      "loss": 3.0665,
      "step": 211963
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5227324962615967,
      "learning_rate": 9.431929718337983e-06,
      "loss": 3.0517,
      "step": 211964
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.348228693008423,
      "learning_rate": 9.430912098710419e-06,
      "loss": 2.886,
      "step": 211965
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.137565851211548,
      "learning_rate": 9.429894533105176e-06,
      "loss": 3.056,
      "step": 211966
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.261920928955078,
      "learning_rate": 9.428877021522385e-06,
      "loss": 2.8934,
      "step": 211967
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7150442600250244,
      "learning_rate": 9.427859563962281e-06,
      "loss": 2.9468,
      "step": 211968
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9983251094818115,
      "learning_rate": 9.42684216042503e-06,
      "loss": 2.7739,
      "step": 211969
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7307662963867188,
      "learning_rate": 9.425824810910798e-06,
      "loss": 3.0835,
      "step": 211970
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.8214354515075684,
      "learning_rate": 9.424807515419752e-06,
      "loss": 2.7409,
      "step": 211971
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.789144992828369,
      "learning_rate": 9.423790273952158e-06,
      "loss": 2.7271,
      "step": 211972
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.706758975982666,
      "learning_rate": 9.422773086508151e-06,
      "loss": 2.7623,
      "step": 211973
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1522700786590576,
      "learning_rate": 9.421755953087929e-06,
      "loss": 2.8097,
      "step": 211974
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8456711769104004,
      "learning_rate": 9.420738873691724e-06,
      "loss": 3.1128,
      "step": 211975
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0398595333099365,
      "learning_rate": 9.41972184831964e-06,
      "loss": 2.5978,
      "step": 211976
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.823509931564331,
      "learning_rate": 9.418704876971972e-06,
      "loss": 2.788,
      "step": 211977
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.324493408203125,
      "learning_rate": 9.417687959648823e-06,
      "loss": 2.911,
      "step": 211978
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.4915401935577393,
      "learning_rate": 9.416671096350392e-06,
      "loss": 2.8568,
      "step": 211979
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.17502760887146,
      "learning_rate": 9.415654287076912e-06,
      "loss": 2.8579,
      "step": 211980
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3827223777770996,
      "learning_rate": 9.414637531828551e-06,
      "loss": 2.9353,
      "step": 211981
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.6579105854034424,
      "learning_rate": 9.413620830605472e-06,
      "loss": 3.1605,
      "step": 211982
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.745004177093506,
      "learning_rate": 9.41260418340788e-06,
      "loss": 2.9307,
      "step": 211983
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1052281856536865,
      "learning_rate": 9.411587590236003e-06,
      "loss": 3.201,
      "step": 211984
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.616387367248535,
      "learning_rate": 9.410571051089976e-06,
      "loss": 2.9585,
      "step": 211985
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8403728008270264,
      "learning_rate": 9.409554565970002e-06,
      "loss": 2.7863,
      "step": 211986
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2303409576416016,
      "learning_rate": 9.40853813487631e-06,
      "loss": 3.0321,
      "step": 211987
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.902693271636963,
      "learning_rate": 9.407521757809e-06,
      "loss": 2.964,
      "step": 211988
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7910315990448,
      "learning_rate": 9.406505434768374e-06,
      "loss": 3.0446,
      "step": 211989
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.930731773376465,
      "learning_rate": 9.405489165754532e-06,
      "loss": 2.8529,
      "step": 211990
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.011050224304199,
      "learning_rate": 9.404472950767705e-06,
      "loss": 2.83,
      "step": 211991
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.945465087890625,
      "learning_rate": 9.403456789808095e-06,
      "loss": 3.0416,
      "step": 211992
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.6667027473449707,
      "learning_rate": 9.402440682875834e-06,
      "loss": 3.0799,
      "step": 211993
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.291205644607544,
      "learning_rate": 9.401424629971154e-06,
      "loss": 2.8237,
      "step": 211994
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1107661724090576,
      "learning_rate": 9.400408631094258e-06,
      "loss": 2.9068,
      "step": 211995
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7808189392089844,
      "learning_rate": 9.399392686245243e-06,
      "loss": 2.9445,
      "step": 211996
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.139274835586548,
      "learning_rate": 9.398376795424445e-06,
      "loss": 2.8512,
      "step": 211997
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.4122273921966553,
      "learning_rate": 9.397360958631894e-06,
      "loss": 2.8734,
      "step": 211998
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.706913948059082,
      "learning_rate": 9.396345175867958e-06,
      "loss": 3.0315,
      "step": 211999
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7729263305664062,
      "learning_rate": 9.395329447132671e-06,
      "loss": 2.8537,
      "step": 212000
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5881009101867676,
      "learning_rate": 9.394313772426299e-06,
      "loss": 2.9078,
      "step": 212001
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.86690616607666,
      "learning_rate": 9.393298151748973e-06,
      "loss": 3.012,
      "step": 212002
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2325096130371094,
      "learning_rate": 9.39228258510093e-06,
      "loss": 2.8127,
      "step": 212003
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.35172176361084,
      "learning_rate": 9.391267072482333e-06,
      "loss": 2.9672,
      "step": 212004
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5996906757354736,
      "learning_rate": 9.39025161389345e-06,
      "loss": 2.9274,
      "step": 212005
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.353217363357544,
      "learning_rate": 9.38923620933435e-06,
      "loss": 2.9378,
      "step": 212006
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8746206760406494,
      "learning_rate": 9.388220858805295e-06,
      "loss": 2.8639,
      "step": 212007
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9288887977600098,
      "learning_rate": 9.387205562306455e-06,
      "loss": 2.8694,
      "step": 212008
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9280200004577637,
      "learning_rate": 9.386190319838027e-06,
      "loss": 3.0391,
      "step": 212009
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9313251972198486,
      "learning_rate": 9.385175131400146e-06,
      "loss": 2.8894,
      "step": 212010
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3853039741516113,
      "learning_rate": 9.384159996993112e-06,
      "loss": 2.9893,
      "step": 212011
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.81610107421875,
      "learning_rate": 9.383144916616992e-06,
      "loss": 3.0299,
      "step": 212012
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2172434329986572,
      "learning_rate": 9.382129890272082e-06,
      "loss": 3.0197,
      "step": 212013
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.186770439147949,
      "learning_rate": 9.381114917958488e-06,
      "loss": 2.8516,
      "step": 212014
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2014517784118652,
      "learning_rate": 9.380099999676439e-06,
      "loss": 2.9415,
      "step": 212015
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.032519578933716,
      "learning_rate": 9.379085135426101e-06,
      "loss": 2.9801,
      "step": 212016
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2251062393188477,
      "learning_rate": 9.37807032520771e-06,
      "loss": 2.8284,
      "step": 212017
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5612072944641113,
      "learning_rate": 9.377055569021397e-06,
      "loss": 2.793,
      "step": 212018
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3052802085876465,
      "learning_rate": 9.376040866867395e-06,
      "loss": 2.7364,
      "step": 212019
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.012888193130493,
      "learning_rate": 9.375026218745807e-06,
      "loss": 2.7167,
      "step": 212020
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.835465669631958,
      "learning_rate": 9.374011624656963e-06,
      "loss": 3.14,
      "step": 212021
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.171985149383545,
      "learning_rate": 9.37299708460093e-06,
      "loss": 3.1529,
      "step": 212022
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2356159687042236,
      "learning_rate": 9.37198259857801e-06,
      "loss": 3.0072,
      "step": 212023
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.081089496612549,
      "learning_rate": 9.370968166588267e-06,
      "loss": 2.9843,
      "step": 212024
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7928061485290527,
      "learning_rate": 9.369953788631967e-06,
      "loss": 2.9461,
      "step": 212025
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.885528326034546,
      "learning_rate": 9.368939464709247e-06,
      "loss": 2.7528,
      "step": 212026
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7370152473449707,
      "learning_rate": 9.367925194820336e-06,
      "loss": 2.9143,
      "step": 212027
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9443092346191406,
      "learning_rate": 9.366910978965436e-06,
      "loss": 2.792,
      "step": 212028
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1078319549560547,
      "learning_rate": 9.365896817144746e-06,
      "loss": 2.7926,
      "step": 212029
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.705850124359131,
      "learning_rate": 9.364882709358335e-06,
      "loss": 3.0339,
      "step": 212030
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.828880548477173,
      "learning_rate": 9.363868655606565e-06,
      "loss": 3.1202,
      "step": 212031
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.859713077545166,
      "learning_rate": 9.362854655889474e-06,
      "loss": 3.1994,
      "step": 212032
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.76248836517334,
      "learning_rate": 9.361840710207324e-06,
      "loss": 3.205,
      "step": 212033
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9768898487091064,
      "learning_rate": 9.360826818560285e-06,
      "loss": 2.8111,
      "step": 212034
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.4801828861236572,
      "learning_rate": 9.359812980948622e-06,
      "loss": 2.7794,
      "step": 212035
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.079185962677002,
      "learning_rate": 9.358799197372402e-06,
      "loss": 2.7713,
      "step": 212036
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.306605339050293,
      "learning_rate": 9.357785467831892e-06,
      "loss": 3.1895,
      "step": 212037
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.8249902725219727,
      "learning_rate": 9.356771792327222e-06,
      "loss": 2.9857,
      "step": 212038
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9365265369415283,
      "learning_rate": 9.355758170858629e-06,
      "loss": 3.0454,
      "step": 212039
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.985389471054077,
      "learning_rate": 9.35474460342628e-06,
      "loss": 3.078,
      "step": 212040
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1258983612060547,
      "learning_rate": 9.353731090030437e-06,
      "loss": 2.8941,
      "step": 212041
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5418355464935303,
      "learning_rate": 9.352717630671136e-06,
      "loss": 2.8953,
      "step": 212042
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9303441047668457,
      "learning_rate": 9.351704225348711e-06,
      "loss": 2.9658,
      "step": 212043
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8044962882995605,
      "learning_rate": 9.350690874063227e-06,
      "loss": 3.0217,
      "step": 212044
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.8687005043029785,
      "learning_rate": 9.349677576814985e-06,
      "loss": 2.6227,
      "step": 212045
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5105345249176025,
      "learning_rate": 9.348664333604083e-06,
      "loss": 2.8804,
      "step": 212046
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.7890853881835938,
      "learning_rate": 9.347651144430823e-06,
      "loss": 2.9167,
      "step": 212047
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8889896869659424,
      "learning_rate": 9.346638009295238e-06,
      "loss": 2.9279,
      "step": 212048
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.846132278442383,
      "learning_rate": 9.34562492819766e-06,
      "loss": 2.7584,
      "step": 212049
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.545302152633667,
      "learning_rate": 9.344611901138188e-06,
      "loss": 3.0753,
      "step": 212050
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5648751258850098,
      "learning_rate": 9.343598928117058e-06,
      "loss": 2.8763,
      "step": 212051
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3636884689331055,
      "learning_rate": 9.342586009134401e-06,
      "loss": 2.8883,
      "step": 212052
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.6699793338775635,
      "learning_rate": 9.341573144190485e-06,
      "loss": 2.6265,
      "step": 212053
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8115031719207764,
      "learning_rate": 9.34056033328544e-06,
      "loss": 2.8623,
      "step": 212054
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.762411117553711,
      "learning_rate": 9.339547576419471e-06,
      "loss": 2.5576,
      "step": 212055
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.110748767852783,
      "learning_rate": 9.338534873592773e-06,
      "loss": 3.0027,
      "step": 212056
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8872883319854736,
      "learning_rate": 9.337522224805516e-06,
      "loss": 2.9487,
      "step": 212057
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3221871852874756,
      "learning_rate": 9.336509630057898e-06,
      "loss": 3.0691,
      "step": 212058
    },
    {
      "epoch": 2.76,
      "grad_norm": 5.596153259277344,
      "learning_rate": 9.335497089350118e-06,
      "loss": 2.9022,
      "step": 212059
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9790308475494385,
      "learning_rate": 9.33448460268238e-06,
      "loss": 3.1666,
      "step": 212060
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.717440128326416,
      "learning_rate": 9.333472170054812e-06,
      "loss": 2.8048,
      "step": 212061
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.112441301345825,
      "learning_rate": 9.33245979146765e-06,
      "loss": 2.8656,
      "step": 212062
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5472991466522217,
      "learning_rate": 9.331447466921093e-06,
      "loss": 2.7223,
      "step": 212063
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1028811931610107,
      "learning_rate": 9.330435196415276e-06,
      "loss": 3.1027,
      "step": 212064
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.136490821838379,
      "learning_rate": 9.329422979950429e-06,
      "loss": 3.0096,
      "step": 212065
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.301664352416992,
      "learning_rate": 9.328410817526722e-06,
      "loss": 2.759,
      "step": 212066
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1432666778564453,
      "learning_rate": 9.327398709144351e-06,
      "loss": 2.8252,
      "step": 212067
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8155815601348877,
      "learning_rate": 9.326386654803519e-06,
      "loss": 2.9709,
      "step": 212068
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.938199520111084,
      "learning_rate": 9.325374654504392e-06,
      "loss": 2.9469,
      "step": 212069
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7634716033935547,
      "learning_rate": 9.324362708247135e-06,
      "loss": 2.8568,
      "step": 212070
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.262273073196411,
      "learning_rate": 9.323350816032015e-06,
      "loss": 2.9086,
      "step": 212071
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.726616621017456,
      "learning_rate": 9.322338977859167e-06,
      "loss": 2.8136,
      "step": 212072
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.689819574356079,
      "learning_rate": 9.321327193728723e-06,
      "loss": 3.2484,
      "step": 212073
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.673593521118164,
      "learning_rate": 9.320315463640982e-06,
      "loss": 2.7855,
      "step": 212074
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.9226937294006348,
      "learning_rate": 9.319303787596077e-06,
      "loss": 2.9728,
      "step": 212075
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.294133186340332,
      "learning_rate": 9.31829216559421e-06,
      "loss": 2.8658,
      "step": 212076
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.208181619644165,
      "learning_rate": 9.317280597635546e-06,
      "loss": 3.0773,
      "step": 212077
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0601346492767334,
      "learning_rate": 9.316269083720285e-06,
      "loss": 2.8465,
      "step": 212078
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.097801446914673,
      "learning_rate": 9.315257623848593e-06,
      "loss": 2.9207,
      "step": 212079
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7046520709991455,
      "learning_rate": 9.314246218020738e-06,
      "loss": 3.0557,
      "step": 212080
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7838144302368164,
      "learning_rate": 9.313234866236786e-06,
      "loss": 2.8163,
      "step": 212081
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1615235805511475,
      "learning_rate": 9.312223568497035e-06,
      "loss": 2.8185,
      "step": 212082
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9848291873931885,
      "learning_rate": 9.311212324801586e-06,
      "loss": 2.6539,
      "step": 212083
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.47996187210083,
      "learning_rate": 9.310201135150775e-06,
      "loss": 2.7192,
      "step": 212084
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.4160406589508057,
      "learning_rate": 9.309189999544564e-06,
      "loss": 3.073,
      "step": 212085
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7737860679626465,
      "learning_rate": 9.308178917983322e-06,
      "loss": 2.8694,
      "step": 212086
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.139688730239868,
      "learning_rate": 9.30716789046715e-06,
      "loss": 2.9182,
      "step": 212087
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0159871578216553,
      "learning_rate": 9.306156916996277e-06,
      "loss": 2.8414,
      "step": 212088
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.760370969772339,
      "learning_rate": 9.305145997570873e-06,
      "loss": 2.8717,
      "step": 212089
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.124927043914795,
      "learning_rate": 9.30413513219117e-06,
      "loss": 3.0854,
      "step": 212090
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7615950107574463,
      "learning_rate": 9.30312432085727e-06,
      "loss": 2.8042,
      "step": 212091
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.853518009185791,
      "learning_rate": 9.302113563569435e-06,
      "loss": 2.781,
      "step": 212092
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7122833728790283,
      "learning_rate": 9.30110286032777e-06,
      "loss": 2.8361,
      "step": 212093
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.230013847351074,
      "learning_rate": 9.30009221113257e-06,
      "loss": 2.829,
      "step": 212094
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2944862842559814,
      "learning_rate": 9.299081615983938e-06,
      "loss": 3.0017,
      "step": 212095
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5453219413757324,
      "learning_rate": 9.298071074882141e-06,
      "loss": 2.7529,
      "step": 212096
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.871567487716675,
      "learning_rate": 9.297060587827276e-06,
      "loss": 2.9085,
      "step": 212097
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.7720072269439697,
      "learning_rate": 9.296050154819578e-06,
      "loss": 2.5933,
      "step": 212098
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8336493968963623,
      "learning_rate": 9.295039775859215e-06,
      "loss": 2.9557,
      "step": 212099
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.7549307346343994,
      "learning_rate": 9.29402945094645e-06,
      "loss": 3.0295,
      "step": 212100
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5817368030548096,
      "learning_rate": 9.293019180081352e-06,
      "loss": 2.9874,
      "step": 212101
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0379416942596436,
      "learning_rate": 9.292008963264252e-06,
      "loss": 2.9974,
      "step": 212102
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.6583564281463623,
      "learning_rate": 9.290998800495154e-06,
      "loss": 2.8469,
      "step": 212103
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3087656497955322,
      "learning_rate": 9.28998869177442e-06,
      "loss": 2.933,
      "step": 212104
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.88327693939209,
      "learning_rate": 9.288978637102118e-06,
      "loss": 2.8259,
      "step": 212105
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0078392028808594,
      "learning_rate": 9.287968636478516e-06,
      "loss": 2.9503,
      "step": 212106
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.706280708312988,
      "learning_rate": 9.286958689903712e-06,
      "loss": 3.219,
      "step": 212107
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.676661491394043,
      "learning_rate": 9.28594879737804e-06,
      "loss": 2.9558,
      "step": 212108
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.4421803951263428,
      "learning_rate": 9.284938958901533e-06,
      "loss": 2.5427,
      "step": 212109
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.015995025634766,
      "learning_rate": 9.283929174474457e-06,
      "loss": 2.8508,
      "step": 212110
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.831294059753418,
      "learning_rate": 9.282919444096981e-06,
      "loss": 3.0437,
      "step": 212111
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.070268154144287,
      "learning_rate": 9.281909767769335e-06,
      "loss": 2.7895,
      "step": 212112
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0581307411193848,
      "learning_rate": 9.280900145491622e-06,
      "loss": 2.7864,
      "step": 212113
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5951309204101562,
      "learning_rate": 9.279890577264138e-06,
      "loss": 3.0381,
      "step": 212114
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.927098274230957,
      "learning_rate": 9.278881063086919e-06,
      "loss": 2.9181,
      "step": 212115
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9352850914001465,
      "learning_rate": 9.27787160296033e-06,
      "loss": 3.0593,
      "step": 212116
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7418806552886963,
      "learning_rate": 9.276862196884405e-06,
      "loss": 3.1018,
      "step": 212117
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5880024433135986,
      "learning_rate": 9.275852844859445e-06,
      "loss": 2.9981,
      "step": 212118
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9763762950897217,
      "learning_rate": 9.274843546885546e-06,
      "loss": 2.8729,
      "step": 212119
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8910508155822754,
      "learning_rate": 9.273834302963013e-06,
      "loss": 3.1198,
      "step": 212120
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.944340944290161,
      "learning_rate": 9.272825113091908e-06,
      "loss": 2.8798,
      "step": 212121
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.842052459716797,
      "learning_rate": 9.2718159772725e-06,
      "loss": 2.741,
      "step": 212122
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.010941505432129,
      "learning_rate": 9.270806895504923e-06,
      "loss": 3.0646,
      "step": 212123
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.706916093826294,
      "learning_rate": 9.269797867789407e-06,
      "loss": 2.8669,
      "step": 212124
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.603586196899414,
      "learning_rate": 9.26878889412609e-06,
      "loss": 2.8595,
      "step": 212125
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.566366195678711,
      "learning_rate": 9.267779974515233e-06,
      "loss": 2.7547,
      "step": 212126
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.863010883331299,
      "learning_rate": 9.266771108957006e-06,
      "loss": 2.8861,
      "step": 212127
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.738659143447876,
      "learning_rate": 9.26576229745154e-06,
      "loss": 2.7915,
      "step": 212128
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.17576003074646,
      "learning_rate": 9.264753539999036e-06,
      "loss": 3.0182,
      "step": 212129
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.896169662475586,
      "learning_rate": 9.263744836599728e-06,
      "loss": 3.2019,
      "step": 212130
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.031362295150757,
      "learning_rate": 9.26273618725375e-06,
      "loss": 3.0665,
      "step": 212131
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.571887254714966,
      "learning_rate": 9.261727591961366e-06,
      "loss": 2.7363,
      "step": 212132
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.64866304397583,
      "learning_rate": 9.26071905072271e-06,
      "loss": 2.9472,
      "step": 212133
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.992985248565674,
      "learning_rate": 9.259710563537948e-06,
      "loss": 2.7887,
      "step": 212134
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.6150949001312256,
      "learning_rate": 9.25870213040728e-06,
      "loss": 2.8803,
      "step": 212135
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.750917911529541,
      "learning_rate": 9.257693751330942e-06,
      "loss": 3.0241,
      "step": 212136
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.592837333679199,
      "learning_rate": 9.256685426309063e-06,
      "loss": 2.8797,
      "step": 212137
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.691577196121216,
      "learning_rate": 9.255677155341879e-06,
      "loss": 2.9336,
      "step": 212138
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2355642318725586,
      "learning_rate": 9.254668938429554e-06,
      "loss": 3.2082,
      "step": 212139
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.087308168411255,
      "learning_rate": 9.253660775572259e-06,
      "loss": 2.9082,
      "step": 212140
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8368496894836426,
      "learning_rate": 9.252652666770188e-06,
      "loss": 2.842,
      "step": 212141
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.4405086040496826,
      "learning_rate": 9.251644612023546e-06,
      "loss": 2.9408,
      "step": 212142
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3779873847961426,
      "learning_rate": 9.250636611332495e-06,
      "loss": 2.8779,
      "step": 212143
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5673153400421143,
      "learning_rate": 9.249628664697273e-06,
      "loss": 2.7711,
      "step": 212144
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.809704542160034,
      "learning_rate": 9.248620772118043e-06,
      "loss": 2.7801,
      "step": 212145
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.475586414337158,
      "learning_rate": 9.24761293359494e-06,
      "loss": 2.8276,
      "step": 212146
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9440865516662598,
      "learning_rate": 9.246605149128195e-06,
      "loss": 2.6776,
      "step": 212147
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0644962787628174,
      "learning_rate": 9.245597418718042e-06,
      "loss": 3.0411,
      "step": 212148
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.65730357170105,
      "learning_rate": 9.24458974236455e-06,
      "loss": 3.0676,
      "step": 212149
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.6798484325408936,
      "learning_rate": 9.24358212006805e-06,
      "loss": 2.9096,
      "step": 212150
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3843331336975098,
      "learning_rate": 9.242574551828608e-06,
      "loss": 2.8197,
      "step": 212151
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.149627923965454,
      "learning_rate": 9.241567037646458e-06,
      "loss": 3.0086,
      "step": 212152
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.6709344387054443,
      "learning_rate": 9.240559577521833e-06,
      "loss": 3.2055,
      "step": 212153
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8813509941101074,
      "learning_rate": 9.239552171454835e-06,
      "loss": 2.8619,
      "step": 212154
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.645700454711914,
      "learning_rate": 9.238544819445692e-06,
      "loss": 3.1506,
      "step": 212155
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.436516523361206,
      "learning_rate": 9.23753752149461e-06,
      "loss": 3.0287,
      "step": 212156
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.8112173080444336,
      "learning_rate": 9.236530277601783e-06,
      "loss": 2.9413,
      "step": 212157
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.597799777984619,
      "learning_rate": 9.235523087767316e-06,
      "loss": 2.84,
      "step": 212158
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.96692156791687,
      "learning_rate": 9.234515951991506e-06,
      "loss": 3.0043,
      "step": 212159
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.211155414581299,
      "learning_rate": 9.233508870274453e-06,
      "loss": 3.0353,
      "step": 212160
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0186610221862793,
      "learning_rate": 9.232501842616392e-06,
      "loss": 3.0257,
      "step": 212161
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3402187824249268,
      "learning_rate": 9.23149486901752e-06,
      "loss": 2.7538,
      "step": 212162
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1026434898376465,
      "learning_rate": 9.23048794947797e-06,
      "loss": 3.0256,
      "step": 212163
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7130470275878906,
      "learning_rate": 9.229481083997947e-06,
      "loss": 3.0174,
      "step": 212164
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.070540189743042,
      "learning_rate": 9.228474272577713e-06,
      "loss": 2.9211,
      "step": 212165
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.220778226852417,
      "learning_rate": 9.227467515217335e-06,
      "loss": 3.1354,
      "step": 212166
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0771806240081787,
      "learning_rate": 9.22646081191708e-06,
      "loss": 2.6309,
      "step": 212167
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.926422119140625,
      "learning_rate": 9.225454162677115e-06,
      "loss": 2.9448,
      "step": 212168
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.6019606590270996,
      "learning_rate": 9.224447567497639e-06,
      "loss": 2.8388,
      "step": 212169
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.166835069656372,
      "learning_rate": 9.22344102637882e-06,
      "loss": 2.8915,
      "step": 212170
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.8527235984802246,
      "learning_rate": 9.222434539320856e-06,
      "loss": 2.6481,
      "step": 212171
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0170528888702393,
      "learning_rate": 9.22142810632388e-06,
      "loss": 2.7754,
      "step": 212172
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0731515884399414,
      "learning_rate": 9.220421727388195e-06,
      "loss": 2.7201,
      "step": 212173
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.928746223449707,
      "learning_rate": 9.219415402513897e-06,
      "loss": 3.0547,
      "step": 212174
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.097641706466675,
      "learning_rate": 9.21840913170122e-06,
      "loss": 2.9878,
      "step": 212175
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7691614627838135,
      "learning_rate": 9.2174029149503e-06,
      "loss": 2.8716,
      "step": 212176
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.430217742919922,
      "learning_rate": 9.216396752261368e-06,
      "loss": 2.8619,
      "step": 212177
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7975375652313232,
      "learning_rate": 9.215390643634591e-06,
      "loss": 3.0367,
      "step": 212178
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7559196949005127,
      "learning_rate": 9.214384589070167e-06,
      "loss": 3.0065,
      "step": 212179
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.22487211227417,
      "learning_rate": 9.213378588568266e-06,
      "loss": 2.7311,
      "step": 212180
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0938146114349365,
      "learning_rate": 9.212372642129118e-06,
      "loss": 3.0726,
      "step": 212181
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0372817516326904,
      "learning_rate": 9.211366749752858e-06,
      "loss": 2.9225,
      "step": 212182
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8573966026306152,
      "learning_rate": 9.210360911439686e-06,
      "loss": 2.7828,
      "step": 212183
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1074776649475098,
      "learning_rate": 9.209355127189799e-06,
      "loss": 3.068,
      "step": 212184
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.8026123046875,
      "learning_rate": 9.208349397003367e-06,
      "loss": 2.8982,
      "step": 212185
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1152584552764893,
      "learning_rate": 9.207343720880622e-06,
      "loss": 2.8741,
      "step": 212186
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5604779720306396,
      "learning_rate": 9.206338098821697e-06,
      "loss": 2.5834,
      "step": 212187
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7337799072265625,
      "learning_rate": 9.205332530826826e-06,
      "loss": 2.6859,
      "step": 212188
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3227767944335938,
      "learning_rate": 9.204327016896174e-06,
      "loss": 2.8977,
      "step": 212189
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.6527187824249268,
      "learning_rate": 9.203321557029908e-06,
      "loss": 3.0624,
      "step": 212190
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.958477020263672,
      "learning_rate": 9.202316151228262e-06,
      "loss": 2.9731,
      "step": 212191
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.022897958755493,
      "learning_rate": 9.201310799491334e-06,
      "loss": 2.8879,
      "step": 212192
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.472590446472168,
      "learning_rate": 9.200305501819427e-06,
      "loss": 2.9265,
      "step": 212193
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7109129428863525,
      "learning_rate": 9.199300258212671e-06,
      "loss": 2.6983,
      "step": 212194
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.627049446105957,
      "learning_rate": 9.19829506867127e-06,
      "loss": 2.8596,
      "step": 212195
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0982465744018555,
      "learning_rate": 9.19728993319535e-06,
      "loss": 2.8027,
      "step": 212196
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.810678482055664,
      "learning_rate": 9.196284851785151e-06,
      "loss": 3.0595,
      "step": 212197
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.545910120010376,
      "learning_rate": 9.195279824440838e-06,
      "loss": 2.9902,
      "step": 212198
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.798431158065796,
      "learning_rate": 9.194274851162641e-06,
      "loss": 2.9381,
      "step": 212199
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8634042739868164,
      "learning_rate": 9.19326993195073e-06,
      "loss": 2.8277,
      "step": 212200
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.292872667312622,
      "learning_rate": 9.192265066805271e-06,
      "loss": 2.7741,
      "step": 212201
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.415409803390503,
      "learning_rate": 9.19126025572643e-06,
      "loss": 2.8908,
      "step": 212202
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8035566806793213,
      "learning_rate": 9.190255498714472e-06,
      "loss": 2.8648,
      "step": 212203
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.100160598754883,
      "learning_rate": 9.189250795769465e-06,
      "loss": 2.7514,
      "step": 212204
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3913354873657227,
      "learning_rate": 9.188246146891709e-06,
      "loss": 2.8055,
      "step": 212205
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9372291564941406,
      "learning_rate": 9.18724155208137e-06,
      "loss": 2.9622,
      "step": 212206
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.761990785598755,
      "learning_rate": 9.186237011338583e-06,
      "loss": 2.7996,
      "step": 212207
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.6993541717529297,
      "learning_rate": 9.185232524663543e-06,
      "loss": 2.905,
      "step": 212208
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.082738161087036,
      "learning_rate": 9.184228092056522e-06,
      "loss": 2.8238,
      "step": 212209
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.520789623260498,
      "learning_rate": 9.183223713517585e-06,
      "loss": 2.8702,
      "step": 212210
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.261606454849243,
      "learning_rate": 9.182219389046997e-06,
      "loss": 2.9179,
      "step": 212211
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.866452932357788,
      "learning_rate": 9.181215118644925e-06,
      "loss": 2.5434,
      "step": 212212
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8902573585510254,
      "learning_rate": 9.180210902311569e-06,
      "loss": 2.7393,
      "step": 212213
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9593794345855713,
      "learning_rate": 9.179206740047063e-06,
      "loss": 2.9848,
      "step": 212214
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.759042978286743,
      "learning_rate": 9.178202631851673e-06,
      "loss": 2.7761,
      "step": 212215
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5628082752227783,
      "learning_rate": 9.177198577725497e-06,
      "loss": 2.9584,
      "step": 212216
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.819535255432129,
      "learning_rate": 9.176194577668805e-06,
      "loss": 3.0912,
      "step": 212217
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.055168867111206,
      "learning_rate": 9.175190631681762e-06,
      "loss": 2.6932,
      "step": 212218
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.663566827774048,
      "learning_rate": 9.1741867397645e-06,
      "loss": 2.7471,
      "step": 212219
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8534843921661377,
      "learning_rate": 9.173182901917253e-06,
      "loss": 2.9035,
      "step": 212220
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2146260738372803,
      "learning_rate": 9.172179118140221e-06,
      "loss": 2.7916,
      "step": 212221
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7839388847351074,
      "learning_rate": 9.171175388433537e-06,
      "loss": 2.7413,
      "step": 212222
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.523324728012085,
      "learning_rate": 9.17017171279747e-06,
      "loss": 3.0292,
      "step": 212223
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.288311004638672,
      "learning_rate": 9.169168091232148e-06,
      "loss": 3.0882,
      "step": 212224
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8413727283477783,
      "learning_rate": 9.168164523737776e-06,
      "loss": 2.6794,
      "step": 212225
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8798985481262207,
      "learning_rate": 9.167161010314484e-06,
      "loss": 3.1104,
      "step": 212226
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.4613564014434814,
      "learning_rate": 9.166157550962538e-06,
      "loss": 2.9599,
      "step": 212227
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.272724151611328,
      "learning_rate": 9.165154145682075e-06,
      "loss": 2.7114,
      "step": 212228
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8962600231170654,
      "learning_rate": 9.164150794473324e-06,
      "loss": 2.6934,
      "step": 212229
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.548370838165283,
      "learning_rate": 9.163147497336454e-06,
      "loss": 2.8323,
      "step": 212230
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3102505207061768,
      "learning_rate": 9.16214425427163e-06,
      "loss": 3.1563,
      "step": 212231
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.677158832550049,
      "learning_rate": 9.16114106527902e-06,
      "loss": 2.7872,
      "step": 212232
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2241153717041016,
      "learning_rate": 9.16013793035889e-06,
      "loss": 3.0628,
      "step": 212233
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8624229431152344,
      "learning_rate": 9.15913484951134e-06,
      "loss": 2.9852,
      "step": 212234
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.218843460083008,
      "learning_rate": 9.158131822736637e-06,
      "loss": 2.8202,
      "step": 212235
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.743095636367798,
      "learning_rate": 9.157128850034945e-06,
      "loss": 3.0165,
      "step": 212236
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5648069381713867,
      "learning_rate": 9.156125931406365e-06,
      "loss": 3.0602,
      "step": 212237
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7990987300872803,
      "learning_rate": 9.1551230668512e-06,
      "loss": 3.1346,
      "step": 212238
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.051666736602783,
      "learning_rate": 9.154120256369579e-06,
      "loss": 3.0183,
      "step": 212239
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3978917598724365,
      "learning_rate": 9.15311749996167e-06,
      "loss": 3.0385,
      "step": 212240
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7999768257141113,
      "learning_rate": 9.152114797627741e-06,
      "loss": 2.9876,
      "step": 212241
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.7993388175964355,
      "learning_rate": 9.15111214936789e-06,
      "loss": 2.8507,
      "step": 212242
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0004146099090576,
      "learning_rate": 9.150109555182316e-06,
      "loss": 2.9624,
      "step": 212243
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.042285919189453,
      "learning_rate": 9.149107015071255e-06,
      "loss": 2.804,
      "step": 212244
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1875927448272705,
      "learning_rate": 9.148104529034872e-06,
      "loss": 2.8921,
      "step": 212245
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.6482415199279785,
      "learning_rate": 9.147102097073333e-06,
      "loss": 3.2525,
      "step": 212246
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.74186372756958,
      "learning_rate": 9.146099719186838e-06,
      "loss": 2.6127,
      "step": 212247
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.4879870414733887,
      "learning_rate": 9.14509739537559e-06,
      "loss": 3.1331,
      "step": 212248
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9091575145721436,
      "learning_rate": 9.14409512563975e-06,
      "loss": 2.8054,
      "step": 212249
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.4558475017547607,
      "learning_rate": 9.143092909979488e-06,
      "loss": 2.9033,
      "step": 212250
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1227524280548096,
      "learning_rate": 9.142090748395037e-06,
      "loss": 3.0215,
      "step": 212251
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8498871326446533,
      "learning_rate": 9.141088640886563e-06,
      "loss": 3.0903,
      "step": 212252
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.262373685836792,
      "learning_rate": 9.140086587454265e-06,
      "loss": 3.1761,
      "step": 212253
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.277374267578125,
      "learning_rate": 9.139084588098277e-06,
      "loss": 2.7851,
      "step": 212254
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9641454219818115,
      "learning_rate": 9.138082642818868e-06,
      "loss": 2.9129,
      "step": 212255
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.682076930999756,
      "learning_rate": 9.137080751616166e-06,
      "loss": 3.0328,
      "step": 212256
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.116761207580566,
      "learning_rate": 9.136078914490341e-06,
      "loss": 2.8407,
      "step": 212257
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.8505663871765137,
      "learning_rate": 9.135077131441626e-06,
      "loss": 2.7424,
      "step": 212258
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.630201578140259,
      "learning_rate": 9.134075402470186e-06,
      "loss": 2.4786,
      "step": 212259
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.152806282043457,
      "learning_rate": 9.133073727576223e-06,
      "loss": 2.7721,
      "step": 212260
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.4125187397003174,
      "learning_rate": 9.132072106759903e-06,
      "loss": 3.0146,
      "step": 212261
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.895941734313965,
      "learning_rate": 9.131070540021457e-06,
      "loss": 3.0846,
      "step": 212262
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9534592628479004,
      "learning_rate": 9.130069027360986e-06,
      "loss": 2.8268,
      "step": 212263
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1882400512695312,
      "learning_rate": 9.129067568778758e-06,
      "loss": 3.1203,
      "step": 212264
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7379863262176514,
      "learning_rate": 9.128066164274905e-06,
      "loss": 3.0451,
      "step": 212265
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2163760662078857,
      "learning_rate": 9.127064813849628e-06,
      "loss": 3.1089,
      "step": 212266
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.4114530086517334,
      "learning_rate": 9.126063517503157e-06,
      "loss": 3.0289,
      "step": 212267
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.151329278945923,
      "learning_rate": 9.125062275235629e-06,
      "loss": 2.7762,
      "step": 212268
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.829284191131592,
      "learning_rate": 9.124061087047241e-06,
      "loss": 3.0508,
      "step": 212269
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.841097116470337,
      "learning_rate": 9.123059952938161e-06,
      "loss": 3.0128,
      "step": 212270
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.338514804840088,
      "learning_rate": 9.122058872908588e-06,
      "loss": 2.9188,
      "step": 212271
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0988097190856934,
      "learning_rate": 9.121057846958756e-06,
      "loss": 2.962,
      "step": 212272
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0863215923309326,
      "learning_rate": 9.1200568750888e-06,
      "loss": 3.1318,
      "step": 212273
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.4147861003875732,
      "learning_rate": 9.119055957298914e-06,
      "loss": 2.8809,
      "step": 212274
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3410933017730713,
      "learning_rate": 9.118055093589272e-06,
      "loss": 2.9608,
      "step": 212275
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.016949653625488,
      "learning_rate": 9.117054283960101e-06,
      "loss": 3.008,
      "step": 212276
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.797438383102417,
      "learning_rate": 9.116053528411505e-06,
      "loss": 2.8941,
      "step": 212277
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.733494520187378,
      "learning_rate": 9.115052826943781e-06,
      "loss": 2.8429,
      "step": 212278
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8917810916900635,
      "learning_rate": 9.114052179557063e-06,
      "loss": 3.0484,
      "step": 212279
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8813846111297607,
      "learning_rate": 9.113051586251518e-06,
      "loss": 2.7787,
      "step": 212280
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.757382869720459,
      "learning_rate": 9.112051047027313e-06,
      "loss": 2.894,
      "step": 212281
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.370164394378662,
      "learning_rate": 9.111050561884714e-06,
      "loss": 3.1406,
      "step": 212282
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.062441349029541,
      "learning_rate": 9.110050130823821e-06,
      "loss": 2.9717,
      "step": 212283
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.185250759124756,
      "learning_rate": 9.1090497538449e-06,
      "loss": 2.8634,
      "step": 212284
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.908863067626953,
      "learning_rate": 9.108049430948084e-06,
      "loss": 2.8066,
      "step": 212285
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7350966930389404,
      "learning_rate": 9.107049162133573e-06,
      "loss": 2.8933,
      "step": 212286
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1189136505126953,
      "learning_rate": 9.106048947401534e-06,
      "loss": 3.0576,
      "step": 212287
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.6353354454040527,
      "learning_rate": 9.1050487867522e-06,
      "loss": 3.1132,
      "step": 212288
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.4354169368743896,
      "learning_rate": 9.104048680185704e-06,
      "loss": 2.907,
      "step": 212289
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.345741033554077,
      "learning_rate": 9.103048627702248e-06,
      "loss": 3.0342,
      "step": 212290
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.77463698387146,
      "learning_rate": 9.10204862930206e-06,
      "loss": 3.0534,
      "step": 212291
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.555967330932617,
      "learning_rate": 9.101048684985313e-06,
      "loss": 2.8157,
      "step": 212292
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.943464756011963,
      "learning_rate": 9.100048794752102e-06,
      "loss": 2.7994,
      "step": 212293
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0871493816375732,
      "learning_rate": 9.099048958602728e-06,
      "loss": 2.9083,
      "step": 212294
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2557871341705322,
      "learning_rate": 9.098049176537291e-06,
      "loss": 2.8288,
      "step": 212295
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2731029987335205,
      "learning_rate": 9.097049448556092e-06,
      "loss": 2.8195,
      "step": 212296
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.362657308578491,
      "learning_rate": 9.096049774659198e-06,
      "loss": 2.9373,
      "step": 212297
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.8211607933044434,
      "learning_rate": 9.095050154846839e-06,
      "loss": 2.8737,
      "step": 212298
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3079090118408203,
      "learning_rate": 9.094050589119184e-06,
      "loss": 2.8441,
      "step": 212299
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.865070104598999,
      "learning_rate": 9.093051077476466e-06,
      "loss": 2.6469,
      "step": 212300
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.6207950115203857,
      "learning_rate": 9.092051619918816e-06,
      "loss": 2.8634,
      "step": 212301
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7714076042175293,
      "learning_rate": 9.09105221644647e-06,
      "loss": 3.0707,
      "step": 212302
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7886457443237305,
      "learning_rate": 9.090052867059595e-06,
      "loss": 2.7566,
      "step": 212303
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1899309158325195,
      "learning_rate": 9.089053571758387e-06,
      "loss": 2.906,
      "step": 212304
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.595430850982666,
      "learning_rate": 9.088054330542949e-06,
      "loss": 3.0835,
      "step": 212305
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2990148067474365,
      "learning_rate": 9.08705514341358e-06,
      "loss": 2.8688,
      "step": 212306
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.8100860118865967,
      "learning_rate": 9.086056010370379e-06,
      "loss": 2.8183,
      "step": 212307
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8415188789367676,
      "learning_rate": 9.085056931413614e-06,
      "loss": 2.9987,
      "step": 212308
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.598430871963501,
      "learning_rate": 9.084057906543418e-06,
      "loss": 2.9313,
      "step": 212309
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.6900622844696045,
      "learning_rate": 9.08305893575999e-06,
      "loss": 2.9061,
      "step": 212310
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.835174083709717,
      "learning_rate": 9.082060019063498e-06,
      "loss": 2.8514,
      "step": 212311
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7127652168273926,
      "learning_rate": 9.08106115645414e-06,
      "loss": 3.0905,
      "step": 212312
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.97680926322937,
      "learning_rate": 9.080062347932116e-06,
      "loss": 3.1092,
      "step": 212313
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.405144691467285,
      "learning_rate": 9.079063593497593e-06,
      "loss": 2.6964,
      "step": 212314
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7277791500091553,
      "learning_rate": 9.078064893150771e-06,
      "loss": 3.1883,
      "step": 212315
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.281630039215088,
      "learning_rate": 9.07706624689185e-06,
      "loss": 3.0034,
      "step": 212316
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.077000617980957,
      "learning_rate": 9.076067654720965e-06,
      "loss": 2.7649,
      "step": 212317
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.045781135559082,
      "learning_rate": 9.075069116638345e-06,
      "loss": 3.1027,
      "step": 212318
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.907283067703247,
      "learning_rate": 9.074070632644126e-06,
      "loss": 2.7368,
      "step": 212319
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.9426891803741455,
      "learning_rate": 9.073072202738574e-06,
      "loss": 2.8954,
      "step": 212320
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.572206974029541,
      "learning_rate": 9.072073826921788e-06,
      "loss": 2.9329,
      "step": 212321
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3481736183166504,
      "learning_rate": 9.071075505194003e-06,
      "loss": 2.8293,
      "step": 212322
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.7341012954711914,
      "learning_rate": 9.070077237555418e-06,
      "loss": 2.819,
      "step": 212323
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.4333248138427734,
      "learning_rate": 9.069079024006232e-06,
      "loss": 3.0657,
      "step": 212324
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1115524768829346,
      "learning_rate": 9.068080864546511e-06,
      "loss": 2.911,
      "step": 212325
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.933797836303711,
      "learning_rate": 9.067082759176592e-06,
      "loss": 2.7145,
      "step": 212326
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.000117540359497,
      "learning_rate": 9.066084707896538e-06,
      "loss": 2.9504,
      "step": 212327
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5537681579589844,
      "learning_rate": 9.065086710706648e-06,
      "loss": 2.6605,
      "step": 212328
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.3021535873413086,
      "learning_rate": 9.064088767607024e-06,
      "loss": 2.8667,
      "step": 212329
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.457719564437866,
      "learning_rate": 9.0630908785979e-06,
      "loss": 2.755,
      "step": 212330
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.2487263679504395,
      "learning_rate": 9.062093043679409e-06,
      "loss": 2.9776,
      "step": 212331
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5029261112213135,
      "learning_rate": 9.061095262851781e-06,
      "loss": 2.6075,
      "step": 212332
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8015449047088623,
      "learning_rate": 9.060097536115152e-06,
      "loss": 3.0289,
      "step": 212333
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.811337471008301,
      "learning_rate": 9.059099863469788e-06,
      "loss": 2.9402,
      "step": 212334
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.9826836585998535,
      "learning_rate": 9.058102244915822e-06,
      "loss": 2.7745,
      "step": 212335
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.291876792907715,
      "learning_rate": 9.05710468045342e-06,
      "loss": 3.0908,
      "step": 212336
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.5191805362701416,
      "learning_rate": 9.056107170082815e-06,
      "loss": 3.0346,
      "step": 212337
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.019691228866577,
      "learning_rate": 9.05510971380421e-06,
      "loss": 2.8387,
      "step": 212338
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.073840618133545,
      "learning_rate": 9.054112311617668e-06,
      "loss": 2.9654,
      "step": 212339
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.223073720932007,
      "learning_rate": 9.053114963523523e-06,
      "loss": 2.9928,
      "step": 212340
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.1488349437713623,
      "learning_rate": 9.052117669521875e-06,
      "loss": 2.9214,
      "step": 212341
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.969785213470459,
      "learning_rate": 9.051120429612924e-06,
      "loss": 2.8743,
      "step": 212342
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.98616099357605,
      "learning_rate": 9.05012324379687e-06,
      "loss": 2.8917,
      "step": 212343
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.449812412261963,
      "learning_rate": 9.04912611207388e-06,
      "loss": 2.9862,
      "step": 212344
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.8553836345672607,
      "learning_rate": 9.048129034444151e-06,
      "loss": 3.0352,
      "step": 212345
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.044293165206909,
      "learning_rate": 9.047132010907886e-06,
      "loss": 3.1542,
      "step": 212346
    },
    {
      "epoch": 2.76,
      "grad_norm": 4.3206915855407715,
      "learning_rate": 9.046135041465252e-06,
      "loss": 2.8351,
      "step": 212347
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0047991275787354,
      "learning_rate": 9.045138126116414e-06,
      "loss": 2.7555,
      "step": 212348
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.5540897846221924,
      "learning_rate": 9.044141264861605e-06,
      "loss": 2.8523,
      "step": 212349
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.087592363357544,
      "learning_rate": 9.043144457700925e-06,
      "loss": 2.7052,
      "step": 212350
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0154342651367188,
      "learning_rate": 9.042147704634672e-06,
      "loss": 2.7396,
      "step": 212351
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.4524903297424316,
      "learning_rate": 9.041151005662983e-06,
      "loss": 2.8478,
      "step": 212352
    },
    {
      "epoch": 2.76,
      "grad_norm": 2.799551248550415,
      "learning_rate": 9.040154360786023e-06,
      "loss": 2.9449,
      "step": 212353
    },
    {
      "epoch": 2.76,
      "grad_norm": 3.0803298950195312,
      "learning_rate": 9.039157770003958e-06,
      "loss": 2.9547,
      "step": 212354
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7852346897125244,
      "learning_rate": 9.03816123331702e-06,
      "loss": 2.9971,
      "step": 212355
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.193986654281616,
      "learning_rate": 9.03716475072538e-06,
      "loss": 2.873,
      "step": 212356
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.5519161224365234,
      "learning_rate": 9.036168322229232e-06,
      "loss": 2.9219,
      "step": 212357
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8417859077453613,
      "learning_rate": 9.035171947828779e-06,
      "loss": 3.0814,
      "step": 212358
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.888822317123413,
      "learning_rate": 9.034175627524155e-06,
      "loss": 3.0821,
      "step": 212359
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9451966285705566,
      "learning_rate": 9.033179361315557e-06,
      "loss": 2.9429,
      "step": 212360
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.246434450149536,
      "learning_rate": 9.032183149203187e-06,
      "loss": 2.8856,
      "step": 212361
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6986608505249023,
      "learning_rate": 9.031186991187212e-06,
      "loss": 3.2444,
      "step": 212362
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.466464042663574,
      "learning_rate": 9.030190887267862e-06,
      "loss": 2.8865,
      "step": 212363
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.731356143951416,
      "learning_rate": 9.029194837445309e-06,
      "loss": 3.0798,
      "step": 212364
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6355080604553223,
      "learning_rate": 9.028198841719681e-06,
      "loss": 2.9048,
      "step": 212365
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1176280975341797,
      "learning_rate": 9.027202900091212e-06,
      "loss": 2.9151,
      "step": 212366
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.5838515758514404,
      "learning_rate": 9.026207012560105e-06,
      "loss": 2.7995,
      "step": 212367
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6023757457733154,
      "learning_rate": 9.025211179126457e-06,
      "loss": 2.9169,
      "step": 212368
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9651877880096436,
      "learning_rate": 9.02421539979057e-06,
      "loss": 2.853,
      "step": 212369
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.548260450363159,
      "learning_rate": 9.023219674552572e-06,
      "loss": 2.8362,
      "step": 212370
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1407814025878906,
      "learning_rate": 9.02222400341267e-06,
      "loss": 2.796,
      "step": 212371
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9663047790527344,
      "learning_rate": 9.02122838637096e-06,
      "loss": 2.8841,
      "step": 212372
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7739288806915283,
      "learning_rate": 9.020232823427743e-06,
      "loss": 2.8824,
      "step": 212373
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7670414447784424,
      "learning_rate": 9.019237314583117e-06,
      "loss": 2.9407,
      "step": 212374
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.5135393142700195,
      "learning_rate": 9.01824185983735e-06,
      "loss": 2.8377,
      "step": 212375
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.709803342819214,
      "learning_rate": 9.017246459190608e-06,
      "loss": 2.9093,
      "step": 212376
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.916414499282837,
      "learning_rate": 9.016251112643025e-06,
      "loss": 2.7082,
      "step": 212377
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.6894876956939697,
      "learning_rate": 9.015255820194766e-06,
      "loss": 3.0631,
      "step": 212378
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.840954542160034,
      "learning_rate": 9.014260581846134e-06,
      "loss": 3.0618,
      "step": 212379
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.972520589828491,
      "learning_rate": 9.01326539759719e-06,
      "loss": 3.1615,
      "step": 212380
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8056232929229736,
      "learning_rate": 9.012270267448207e-06,
      "loss": 2.9757,
      "step": 212381
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.880537986755371,
      "learning_rate": 9.011275191399315e-06,
      "loss": 2.7733,
      "step": 212382
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9952774047851562,
      "learning_rate": 9.010280169450745e-06,
      "loss": 2.8421,
      "step": 212383
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.59963059425354,
      "learning_rate": 9.009285201602635e-06,
      "loss": 3.0014,
      "step": 212384
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.919285535812378,
      "learning_rate": 9.008290287855213e-06,
      "loss": 2.7499,
      "step": 212385
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0853755474090576,
      "learning_rate": 9.007295428208583e-06,
      "loss": 2.9647,
      "step": 212386
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.656991481781006,
      "learning_rate": 9.006300622663043e-06,
      "loss": 3.0854,
      "step": 212387
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.948007583618164,
      "learning_rate": 9.005305871218693e-06,
      "loss": 2.9363,
      "step": 212388
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7420430183410645,
      "learning_rate": 9.004311173875833e-06,
      "loss": 3.042,
      "step": 212389
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6907577514648438,
      "learning_rate": 9.003316530634497e-06,
      "loss": 3.0959,
      "step": 212390
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0352115631103516,
      "learning_rate": 9.00232194149495e-06,
      "loss": 2.7214,
      "step": 212391
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.587944984436035,
      "learning_rate": 9.001327406457325e-06,
      "loss": 2.9794,
      "step": 212392
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.777700185775757,
      "learning_rate": 9.000332925521892e-06,
      "loss": 2.7365,
      "step": 212393
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3104381561279297,
      "learning_rate": 8.999338498688746e-06,
      "loss": 2.8166,
      "step": 212394
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.503411054611206,
      "learning_rate": 8.99834412595819e-06,
      "loss": 2.8619,
      "step": 212395
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7401514053344727,
      "learning_rate": 8.997349807330289e-06,
      "loss": 2.9087,
      "step": 212396
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7569997310638428,
      "learning_rate": 8.996355542805278e-06,
      "loss": 2.9626,
      "step": 212397
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.715658664703369,
      "learning_rate": 8.995361332383322e-06,
      "loss": 3.1303,
      "step": 212398
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.825673818588257,
      "learning_rate": 8.994367176064654e-06,
      "loss": 2.9271,
      "step": 212399
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.5118560791015625,
      "learning_rate": 8.993373073849374e-06,
      "loss": 2.6028,
      "step": 212400
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.653959035873413,
      "learning_rate": 8.992379025737817e-06,
      "loss": 2.8371,
      "step": 212401
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9751107692718506,
      "learning_rate": 8.99138503172998e-06,
      "loss": 3.0144,
      "step": 212402
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.8226308822631836,
      "learning_rate": 8.990391091826166e-06,
      "loss": 2.9248,
      "step": 212403
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.5723400115966797,
      "learning_rate": 8.989397206026538e-06,
      "loss": 2.9881,
      "step": 212404
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.261744737625122,
      "learning_rate": 8.988403374331266e-06,
      "loss": 3.0662,
      "step": 212405
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1252989768981934,
      "learning_rate": 8.987409596740513e-06,
      "loss": 3.0165,
      "step": 212406
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.4591434001922607,
      "learning_rate": 8.986415873254548e-06,
      "loss": 2.8453,
      "step": 212407
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.307286024093628,
      "learning_rate": 8.985422203873472e-06,
      "loss": 2.7534,
      "step": 212408
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3524956703186035,
      "learning_rate": 8.984428588597515e-06,
      "loss": 3.1336,
      "step": 212409
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.046180486679077,
      "learning_rate": 8.983435027426844e-06,
      "loss": 2.8466,
      "step": 212410
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2982101440429688,
      "learning_rate": 8.982441520361628e-06,
      "loss": 3.2274,
      "step": 212411
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2172791957855225,
      "learning_rate": 8.981448067402064e-06,
      "loss": 3.2173,
      "step": 212412
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7670509815216064,
      "learning_rate": 8.980454668548386e-06,
      "loss": 2.9105,
      "step": 212413
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.543433666229248,
      "learning_rate": 8.979461323800697e-06,
      "loss": 2.8523,
      "step": 212414
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.520235776901245,
      "learning_rate": 8.97846803315926e-06,
      "loss": 2.8305,
      "step": 212415
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.652818441390991,
      "learning_rate": 8.977474796624174e-06,
      "loss": 2.8633,
      "step": 212416
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.944272041320801,
      "learning_rate": 8.976481614195675e-06,
      "loss": 2.8147,
      "step": 212417
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.898364543914795,
      "learning_rate": 8.975488485873927e-06,
      "loss": 2.8386,
      "step": 212418
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.5861005783081055,
      "learning_rate": 8.974495411659166e-06,
      "loss": 2.7768,
      "step": 212419
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.677121877670288,
      "learning_rate": 8.973502391551556e-06,
      "loss": 2.9898,
      "step": 212420
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.968034505844116,
      "learning_rate": 8.9725094255512e-06,
      "loss": 2.9562,
      "step": 212421
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9921391010284424,
      "learning_rate": 8.971516513658395e-06,
      "loss": 3.0995,
      "step": 212422
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7831013202667236,
      "learning_rate": 8.970523655873274e-06,
      "loss": 3.0793,
      "step": 212423
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.113631010055542,
      "learning_rate": 8.969530852196006e-06,
      "loss": 2.932,
      "step": 212424
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9836459159851074,
      "learning_rate": 8.968538102626821e-06,
      "loss": 2.7724,
      "step": 212425
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8012583255767822,
      "learning_rate": 8.967545407165888e-06,
      "loss": 2.7677,
      "step": 212426
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.5339066982269287,
      "learning_rate": 8.96655276581334e-06,
      "loss": 3.1487,
      "step": 212427
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0023114681243896,
      "learning_rate": 8.965560178569441e-06,
      "loss": 2.9285,
      "step": 212428
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.861564874649048,
      "learning_rate": 8.964567645434329e-06,
      "loss": 2.8262,
      "step": 212429
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.759248733520508,
      "learning_rate": 8.963575166408166e-06,
      "loss": 2.8023,
      "step": 212430
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2042508125305176,
      "learning_rate": 8.962582741491187e-06,
      "loss": 3.0209,
      "step": 212431
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.5007054805755615,
      "learning_rate": 8.961590370683591e-06,
      "loss": 3.0485,
      "step": 212432
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.771095037460327,
      "learning_rate": 8.960598053985479e-06,
      "loss": 2.804,
      "step": 212433
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.7132508754730225,
      "learning_rate": 8.959605791397118e-06,
      "loss": 2.7201,
      "step": 212434
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7258925437927246,
      "learning_rate": 8.958613582918638e-06,
      "loss": 2.9549,
      "step": 212435
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.7556238174438477,
      "learning_rate": 8.957621428550244e-06,
      "loss": 2.7437,
      "step": 212436
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6806693077087402,
      "learning_rate": 8.956629328292165e-06,
      "loss": 2.968,
      "step": 212437
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.370973825454712,
      "learning_rate": 8.955637282144534e-06,
      "loss": 2.907,
      "step": 212438
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.145120859146118,
      "learning_rate": 8.954645290107487e-06,
      "loss": 2.7466,
      "step": 212439
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.798093795776367,
      "learning_rate": 8.953653352181322e-06,
      "loss": 2.9426,
      "step": 212440
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.549351215362549,
      "learning_rate": 8.952661468366107e-06,
      "loss": 3.0737,
      "step": 212441
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.981597900390625,
      "learning_rate": 8.95166963866214e-06,
      "loss": 2.8869,
      "step": 212442
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2246291637420654,
      "learning_rate": 8.950677863069555e-06,
      "loss": 2.7844,
      "step": 212443
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.914966106414795,
      "learning_rate": 8.94968614158852e-06,
      "loss": 2.7159,
      "step": 212444
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8019680976867676,
      "learning_rate": 8.948694474219198e-06,
      "loss": 2.9628,
      "step": 212445
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.996769905090332,
      "learning_rate": 8.947702860961858e-06,
      "loss": 3.0177,
      "step": 212446
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.720872640609741,
      "learning_rate": 8.9467113018166e-06,
      "loss": 2.936,
      "step": 212447
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8999805450439453,
      "learning_rate": 8.945719796783656e-06,
      "loss": 2.9569,
      "step": 212448
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1250483989715576,
      "learning_rate": 8.944728345863228e-06,
      "loss": 3.031,
      "step": 212449
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8230907917022705,
      "learning_rate": 8.943736949055447e-06,
      "loss": 2.8345,
      "step": 212450
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9288926124572754,
      "learning_rate": 8.94274560636048e-06,
      "loss": 2.857,
      "step": 212451
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0643253326416016,
      "learning_rate": 8.941754317778593e-06,
      "loss": 2.8514,
      "step": 212452
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.6782307624816895,
      "learning_rate": 8.940763083309888e-06,
      "loss": 2.8666,
      "step": 212453
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.469494581222534,
      "learning_rate": 8.93977190295463e-06,
      "loss": 2.6618,
      "step": 212454
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.431957960128784,
      "learning_rate": 8.938780776712918e-06,
      "loss": 3.1659,
      "step": 212455
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.938204288482666,
      "learning_rate": 8.937789704585085e-06,
      "loss": 2.8435,
      "step": 212456
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2177536487579346,
      "learning_rate": 8.9367986865711e-06,
      "loss": 2.8441,
      "step": 212457
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3518688678741455,
      "learning_rate": 8.935807722671295e-06,
      "loss": 3.043,
      "step": 212458
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.386110544204712,
      "learning_rate": 8.934816812885803e-06,
      "loss": 2.8534,
      "step": 212459
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9065043926239014,
      "learning_rate": 8.933825957214857e-06,
      "loss": 3.1537,
      "step": 212460
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.901266574859619,
      "learning_rate": 8.932835155658558e-06,
      "loss": 3.2662,
      "step": 212461
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.4982001781463623,
      "learning_rate": 8.931844408217238e-06,
      "loss": 3.1835,
      "step": 212462
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.8035683631896973,
      "learning_rate": 8.930853714890896e-06,
      "loss": 2.647,
      "step": 212463
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6051318645477295,
      "learning_rate": 8.929863075679834e-06,
      "loss": 2.7851,
      "step": 212464
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.956718683242798,
      "learning_rate": 8.92887249058415e-06,
      "loss": 2.9381,
      "step": 212465
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.7489395141601562,
      "learning_rate": 8.927881959604144e-06,
      "loss": 2.9418,
      "step": 212466
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.662200927734375,
      "learning_rate": 8.926891482739884e-06,
      "loss": 2.9819,
      "step": 212467
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.21494460105896,
      "learning_rate": 8.925901059991703e-06,
      "loss": 3.0592,
      "step": 212468
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.879047393798828,
      "learning_rate": 8.924910691359632e-06,
      "loss": 2.9491,
      "step": 212469
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7851366996765137,
      "learning_rate": 8.923920376843907e-06,
      "loss": 2.9059,
      "step": 212470
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9490230083465576,
      "learning_rate": 8.922930116444727e-06,
      "loss": 2.7711,
      "step": 212471
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.633457660675049,
      "learning_rate": 8.921939910162257e-06,
      "loss": 2.8894,
      "step": 212472
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.947511911392212,
      "learning_rate": 8.920949757996698e-06,
      "loss": 3.1302,
      "step": 212473
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.4178199768066406,
      "learning_rate": 8.919959659948284e-06,
      "loss": 2.9416,
      "step": 212474
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.975630283355713,
      "learning_rate": 8.91896961601708e-06,
      "loss": 3.0897,
      "step": 212475
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7611961364746094,
      "learning_rate": 8.917979626203353e-06,
      "loss": 3.0073,
      "step": 212476
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7081189155578613,
      "learning_rate": 8.91698969050727e-06,
      "loss": 2.925,
      "step": 212477
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.754585027694702,
      "learning_rate": 8.915999808928997e-06,
      "loss": 2.9142,
      "step": 212478
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8239505290985107,
      "learning_rate": 8.915009981468734e-06,
      "loss": 2.953,
      "step": 212479
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.590214252471924,
      "learning_rate": 8.914020208126715e-06,
      "loss": 2.686,
      "step": 212480
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3751559257507324,
      "learning_rate": 8.91303048890304e-06,
      "loss": 3.1154,
      "step": 212481
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2152059078216553,
      "learning_rate": 8.912040823797939e-06,
      "loss": 2.9703,
      "step": 212482
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.5136780738830566,
      "learning_rate": 8.911051212811549e-06,
      "loss": 2.757,
      "step": 212483
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6287105083465576,
      "learning_rate": 8.910061655944101e-06,
      "loss": 3.1332,
      "step": 212484
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.423537492752075,
      "learning_rate": 8.909072153195762e-06,
      "loss": 2.7686,
      "step": 212485
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6388421058654785,
      "learning_rate": 8.908082704566799e-06,
      "loss": 2.8304,
      "step": 212486
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0917134284973145,
      "learning_rate": 8.907093310057212e-06,
      "loss": 2.8717,
      "step": 212487
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.240891933441162,
      "learning_rate": 8.906103969667366e-06,
      "loss": 3.0522,
      "step": 212488
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.646148443222046,
      "learning_rate": 8.905114683397297e-06,
      "loss": 3.0046,
      "step": 212489
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.0638837814331055,
      "learning_rate": 8.904125451247335e-06,
      "loss": 2.7627,
      "step": 212490
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.6741819381713867,
      "learning_rate": 8.903136273217548e-06,
      "loss": 2.9081,
      "step": 212491
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.84698224067688,
      "learning_rate": 8.902147149308203e-06,
      "loss": 2.9075,
      "step": 212492
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9460182189941406,
      "learning_rate": 8.9011580795194e-06,
      "loss": 2.7916,
      "step": 212493
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9413657188415527,
      "learning_rate": 8.900169063851404e-06,
      "loss": 2.8233,
      "step": 212494
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.537766933441162,
      "learning_rate": 8.899180102304316e-06,
      "loss": 2.8236,
      "step": 212495
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0454390048980713,
      "learning_rate": 8.898191194878401e-06,
      "loss": 2.8545,
      "step": 212496
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7717833518981934,
      "learning_rate": 8.897202341573762e-06,
      "loss": 2.8155,
      "step": 212497
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9895687103271484,
      "learning_rate": 8.896213542390662e-06,
      "loss": 2.9671,
      "step": 212498
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.34671950340271,
      "learning_rate": 8.89522479732927e-06,
      "loss": 2.6628,
      "step": 212499
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.887698173522949,
      "learning_rate": 8.894236106389751e-06,
      "loss": 2.9534,
      "step": 212500
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1336922645568848,
      "learning_rate": 8.89324746957224e-06,
      "loss": 2.8861,
      "step": 212501
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.5124013423919678,
      "learning_rate": 8.892258886877002e-06,
      "loss": 3.0599,
      "step": 212502
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.699812889099121,
      "learning_rate": 8.89127035830417e-06,
      "loss": 3.1803,
      "step": 212503
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.5016934871673584,
      "learning_rate": 8.890281883853978e-06,
      "loss": 3.043,
      "step": 212504
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.5024259090423584,
      "learning_rate": 8.889293463526558e-06,
      "loss": 2.911,
      "step": 212505
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1714205741882324,
      "learning_rate": 8.888305097322113e-06,
      "loss": 2.928,
      "step": 212506
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.911653757095337,
      "learning_rate": 8.88731678524084e-06,
      "loss": 3.0397,
      "step": 212507
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.566397190093994,
      "learning_rate": 8.886328527282904e-06,
      "loss": 2.8693,
      "step": 212508
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.747248888015747,
      "learning_rate": 8.885340323448476e-06,
      "loss": 2.9139,
      "step": 212509
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.797173023223877,
      "learning_rate": 8.884352173737785e-06,
      "loss": 2.8266,
      "step": 212510
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.647740602493286,
      "learning_rate": 8.883364078151e-06,
      "loss": 2.961,
      "step": 212511
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3520941734313965,
      "learning_rate": 8.882376036688222e-06,
      "loss": 3.0557,
      "step": 212512
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.70958948135376,
      "learning_rate": 8.881388049349781e-06,
      "loss": 2.9945,
      "step": 212513
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.5787458419799805,
      "learning_rate": 8.880400116135778e-06,
      "loss": 2.7565,
      "step": 212514
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.087782859802246,
      "learning_rate": 8.879412237046346e-06,
      "loss": 2.8384,
      "step": 212515
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8317689895629883,
      "learning_rate": 8.878424412081787e-06,
      "loss": 2.9363,
      "step": 212516
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2925543785095215,
      "learning_rate": 8.877436641242197e-06,
      "loss": 3.1826,
      "step": 212517
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0683858394622803,
      "learning_rate": 8.87644892452778e-06,
      "loss": 2.6571,
      "step": 212518
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.4315185546875,
      "learning_rate": 8.875461261938732e-06,
      "loss": 2.9943,
      "step": 212519
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.145545482635498,
      "learning_rate": 8.874473653475223e-06,
      "loss": 2.8622,
      "step": 212520
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0338211059570312,
      "learning_rate": 8.873486099137484e-06,
      "loss": 2.6516,
      "step": 212521
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0985376834869385,
      "learning_rate": 8.872498598925648e-06,
      "loss": 3.2574,
      "step": 212522
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3436391353607178,
      "learning_rate": 8.871511152839883e-06,
      "loss": 3.0345,
      "step": 212523
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6858084201812744,
      "learning_rate": 8.870523760880388e-06,
      "loss": 3.1571,
      "step": 212524
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0721731185913086,
      "learning_rate": 8.869536423047396e-06,
      "loss": 2.7041,
      "step": 212525
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.5395781993865967,
      "learning_rate": 8.868549139341042e-06,
      "loss": 2.6486,
      "step": 212526
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0837299823760986,
      "learning_rate": 8.867561909761522e-06,
      "loss": 2.7597,
      "step": 212527
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7886335849761963,
      "learning_rate": 8.866574734308974e-06,
      "loss": 2.9047,
      "step": 212528
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.689213991165161,
      "learning_rate": 8.865587612983727e-06,
      "loss": 3.0807,
      "step": 212529
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.014816999435425,
      "learning_rate": 8.864600545785783e-06,
      "loss": 2.9722,
      "step": 212530
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.8050754070281982,
      "learning_rate": 8.863613532715441e-06,
      "loss": 2.4995,
      "step": 212531
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.945082187652588,
      "learning_rate": 8.862626573772802e-06,
      "loss": 3.0641,
      "step": 212532
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.960002899169922,
      "learning_rate": 8.861639668958131e-06,
      "loss": 2.9622,
      "step": 212533
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9719667434692383,
      "learning_rate": 8.860652818271562e-06,
      "loss": 2.6954,
      "step": 212534
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.390720844268799,
      "learning_rate": 8.85966602171333e-06,
      "loss": 2.9905,
      "step": 212535
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.5597944259643555,
      "learning_rate": 8.858679279283532e-06,
      "loss": 2.8093,
      "step": 212536
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6320459842681885,
      "learning_rate": 8.857692590982434e-06,
      "loss": 2.6761,
      "step": 212537
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6915934085845947,
      "learning_rate": 8.85670595681014e-06,
      "loss": 2.776,
      "step": 212538
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.475663900375366,
      "learning_rate": 8.855719376766946e-06,
      "loss": 2.826,
      "step": 212539
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7103614807128906,
      "learning_rate": 8.85473285085292e-06,
      "loss": 2.6868,
      "step": 212540
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.872772693634033,
      "learning_rate": 8.853746379068361e-06,
      "loss": 2.9205,
      "step": 212541
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.166900396347046,
      "learning_rate": 8.852759961413302e-06,
      "loss": 2.7986,
      "step": 212542
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.952228546142578,
      "learning_rate": 8.851773597888079e-06,
      "loss": 2.8022,
      "step": 212543
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6474289894104004,
      "learning_rate": 8.850787288492755e-06,
      "loss": 3.0263,
      "step": 212544
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.014404058456421,
      "learning_rate": 8.849801033227599e-06,
      "loss": 2.9227,
      "step": 212545
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.836427688598633,
      "learning_rate": 8.84881483209271e-06,
      "loss": 3.0082,
      "step": 212546
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.298673152923584,
      "learning_rate": 8.84782868508842e-06,
      "loss": 3.0144,
      "step": 212547
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2068777084350586,
      "learning_rate": 8.846842592214731e-06,
      "loss": 2.8356,
      "step": 212548
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3790762424468994,
      "learning_rate": 8.845856553471942e-06,
      "loss": 3.1445,
      "step": 212549
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8163974285125732,
      "learning_rate": 8.844870568860185e-06,
      "loss": 2.9674,
      "step": 212550
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.878533363342285,
      "learning_rate": 8.843884638379662e-06,
      "loss": 3.0426,
      "step": 212551
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.367910385131836,
      "learning_rate": 8.84289876203057e-06,
      "loss": 2.8855,
      "step": 212552
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6491992473602295,
      "learning_rate": 8.841912939813111e-06,
      "loss": 2.9094,
      "step": 212553
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.337197780609131,
      "learning_rate": 8.840927171727386e-06,
      "loss": 2.9739,
      "step": 212554
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3719398975372314,
      "learning_rate": 8.839941457773658e-06,
      "loss": 2.8966,
      "step": 212555
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.777557611465454,
      "learning_rate": 8.838955797952063e-06,
      "loss": 3.1148,
      "step": 212556
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.740217924118042,
      "learning_rate": 8.8379701922628e-06,
      "loss": 2.9665,
      "step": 212557
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.116999387741089,
      "learning_rate": 8.836984640706035e-06,
      "loss": 2.7466,
      "step": 212558
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.980530261993408,
      "learning_rate": 8.83599914328207e-06,
      "loss": 3.0609,
      "step": 212559
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.49006724357605,
      "learning_rate": 8.835013699990867e-06,
      "loss": 2.858,
      "step": 212560
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.881577253341675,
      "learning_rate": 8.834028310832798e-06,
      "loss": 2.7985,
      "step": 212561
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.4138150215148926,
      "learning_rate": 8.833042975807957e-06,
      "loss": 2.9107,
      "step": 212562
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.4157631397247314,
      "learning_rate": 8.83205769491655e-06,
      "loss": 2.8345,
      "step": 212563
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.135573148727417,
      "learning_rate": 8.83107246815874e-06,
      "loss": 3.0982,
      "step": 212564
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.8199923038482666,
      "learning_rate": 8.830087295534826e-06,
      "loss": 2.7473,
      "step": 212565
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.980957508087158,
      "learning_rate": 8.829102177044777e-06,
      "loss": 2.9063,
      "step": 212566
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2610785961151123,
      "learning_rate": 8.82811711268896e-06,
      "loss": 3.1076,
      "step": 212567
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.773549795150757,
      "learning_rate": 8.827132102467471e-06,
      "loss": 2.6659,
      "step": 212568
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0145182609558105,
      "learning_rate": 8.826147146380514e-06,
      "loss": 2.8887,
      "step": 212569
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0386669635772705,
      "learning_rate": 8.825162244428253e-06,
      "loss": 2.7321,
      "step": 212570
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.6429569721221924,
      "learning_rate": 8.824177396610955e-06,
      "loss": 2.9274,
      "step": 212571
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.4121809005737305,
      "learning_rate": 8.823192602928686e-06,
      "loss": 2.967,
      "step": 212572
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.276606559753418,
      "learning_rate": 8.822207863381714e-06,
      "loss": 3.0343,
      "step": 212573
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.4164717197418213,
      "learning_rate": 8.821223177970137e-06,
      "loss": 3.0697,
      "step": 212574
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.679567575454712,
      "learning_rate": 8.820238546694225e-06,
      "loss": 2.7553,
      "step": 212575
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1608664989471436,
      "learning_rate": 8.819253969554108e-06,
      "loss": 3.1043,
      "step": 212576
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2819530963897705,
      "learning_rate": 8.818269446550052e-06,
      "loss": 2.8399,
      "step": 212577
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.707996129989624,
      "learning_rate": 8.817284977682127e-06,
      "loss": 2.9439,
      "step": 212578
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.35390043258667,
      "learning_rate": 8.816300562950562e-06,
      "loss": 2.7787,
      "step": 212579
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8841421604156494,
      "learning_rate": 8.815316202355526e-06,
      "loss": 2.9867,
      "step": 212580
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.5162065029144287,
      "learning_rate": 8.814331895897252e-06,
      "loss": 2.8288,
      "step": 212581
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.5340828895568848,
      "learning_rate": 8.813347643575875e-06,
      "loss": 2.8549,
      "step": 212582
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6198928356170654,
      "learning_rate": 8.81236344539159e-06,
      "loss": 3.0876,
      "step": 212583
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7539868354797363,
      "learning_rate": 8.811379301344601e-06,
      "loss": 2.8592,
      "step": 212584
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.614522933959961,
      "learning_rate": 8.810395211435073e-06,
      "loss": 2.7135,
      "step": 212585
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.908933401107788,
      "learning_rate": 8.809411175663172e-06,
      "loss": 2.8539,
      "step": 212586
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9746243953704834,
      "learning_rate": 8.808427194029133e-06,
      "loss": 2.8091,
      "step": 212587
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8320484161376953,
      "learning_rate": 8.807443266533055e-06,
      "loss": 2.7666,
      "step": 212588
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7879669666290283,
      "learning_rate": 8.806459393175202e-06,
      "loss": 2.5636,
      "step": 212589
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.855703592300415,
      "learning_rate": 8.80547557395571e-06,
      "loss": 2.8056,
      "step": 212590
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.5197532176971436,
      "learning_rate": 8.804491808874813e-06,
      "loss": 2.9043,
      "step": 212591
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.5214879512786865,
      "learning_rate": 8.803508097932577e-06,
      "loss": 2.8135,
      "step": 212592
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.39174222946167,
      "learning_rate": 8.802524441129333e-06,
      "loss": 3.1231,
      "step": 212593
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0632553100585938,
      "learning_rate": 8.801540838465182e-06,
      "loss": 2.9166,
      "step": 212594
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.820054769515991,
      "learning_rate": 8.800557289940324e-06,
      "loss": 2.9548,
      "step": 212595
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.77532696723938,
      "learning_rate": 8.799573795554925e-06,
      "loss": 3.0034,
      "step": 212596
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1977643966674805,
      "learning_rate": 8.798590355309188e-06,
      "loss": 3.01,
      "step": 212597
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1235804557800293,
      "learning_rate": 8.797606969203308e-06,
      "loss": 2.7645,
      "step": 212598
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9456732273101807,
      "learning_rate": 8.79662363723742e-06,
      "loss": 3.093,
      "step": 212599
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9279282093048096,
      "learning_rate": 8.795640359411726e-06,
      "loss": 2.8274,
      "step": 212600
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.838358163833618,
      "learning_rate": 8.794657135726457e-06,
      "loss": 3.1199,
      "step": 212601
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.504692792892456,
      "learning_rate": 8.79367396618178e-06,
      "loss": 2.8117,
      "step": 212602
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9626777172088623,
      "learning_rate": 8.792690850777794e-06,
      "loss": 2.8046,
      "step": 212603
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.613499879837036,
      "learning_rate": 8.791707789514768e-06,
      "loss": 3.058,
      "step": 212604
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.147379398345947,
      "learning_rate": 8.790724782392866e-06,
      "loss": 2.8771,
      "step": 212605
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.060810089111328,
      "learning_rate": 8.789741829412255e-06,
      "loss": 2.984,
      "step": 212606
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.71506929397583,
      "learning_rate": 8.78875893057317e-06,
      "loss": 3.1052,
      "step": 212607
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.756474494934082,
      "learning_rate": 8.787776085875709e-06,
      "loss": 2.6845,
      "step": 212608
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.419560194015503,
      "learning_rate": 8.786793295320105e-06,
      "loss": 2.9972,
      "step": 212609
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.645057439804077,
      "learning_rate": 8.785810558906525e-06,
      "loss": 2.7664,
      "step": 212610
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1943235397338867,
      "learning_rate": 8.78482787663517e-06,
      "loss": 3.0769,
      "step": 212611
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0573861598968506,
      "learning_rate": 8.783845248506204e-06,
      "loss": 2.7778,
      "step": 212612
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.682892084121704,
      "learning_rate": 8.78286267451983e-06,
      "loss": 3.0405,
      "step": 212613
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9439165592193604,
      "learning_rate": 8.781880154676246e-06,
      "loss": 3.0254,
      "step": 212614
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0036513805389404,
      "learning_rate": 8.780897688975552e-06,
      "loss": 2.7911,
      "step": 212615
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1056854724884033,
      "learning_rate": 8.779915277418015e-06,
      "loss": 2.9883,
      "step": 212616
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.593198537826538,
      "learning_rate": 8.778932920003768e-06,
      "loss": 2.9539,
      "step": 212617
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.088672161102295,
      "learning_rate": 8.777950616733042e-06,
      "loss": 2.7251,
      "step": 212618
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8551180362701416,
      "learning_rate": 8.776968367605975e-06,
      "loss": 2.7846,
      "step": 212619
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1665680408477783,
      "learning_rate": 8.775986172622796e-06,
      "loss": 3.3461,
      "step": 212620
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.6453540325164795,
      "learning_rate": 8.775004031783606e-06,
      "loss": 3.0025,
      "step": 212621
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.118638515472412,
      "learning_rate": 8.774021945088705e-06,
      "loss": 2.8201,
      "step": 212622
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.859480381011963,
      "learning_rate": 8.77303991253816e-06,
      "loss": 3.0342,
      "step": 212623
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.886245012283325,
      "learning_rate": 8.772057934132204e-06,
      "loss": 2.7004,
      "step": 212624
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.446127414703369,
      "learning_rate": 8.771076009871036e-06,
      "loss": 2.8735,
      "step": 212625
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8503990173339844,
      "learning_rate": 8.770094139754857e-06,
      "loss": 2.8248,
      "step": 212626
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.195112466812134,
      "learning_rate": 8.769112323783767e-06,
      "loss": 3.1415,
      "step": 212627
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.810530662536621,
      "learning_rate": 8.768130561957997e-06,
      "loss": 2.7162,
      "step": 212628
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.450358867645264,
      "learning_rate": 8.76714885427775e-06,
      "loss": 2.8684,
      "step": 212629
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.112612247467041,
      "learning_rate": 8.76616720074319e-06,
      "loss": 3.0427,
      "step": 212630
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7215802669525146,
      "learning_rate": 8.765185601354452e-06,
      "loss": 2.7924,
      "step": 212631
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.734686851501465,
      "learning_rate": 8.764204056111834e-06,
      "loss": 3.0186,
      "step": 212632
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.96970796585083,
      "learning_rate": 8.763222565015404e-06,
      "loss": 2.7198,
      "step": 212633
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2152199745178223,
      "learning_rate": 8.762241128065394e-06,
      "loss": 2.9343,
      "step": 212634
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.900394916534424,
      "learning_rate": 8.761259745261972e-06,
      "loss": 2.9749,
      "step": 212635
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.56097412109375,
      "learning_rate": 8.760278416605338e-06,
      "loss": 3.2162,
      "step": 212636
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.895996332168579,
      "learning_rate": 8.759297142095656e-06,
      "loss": 2.9832,
      "step": 212637
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.5242207050323486,
      "learning_rate": 8.75831592173316e-06,
      "loss": 3.1928,
      "step": 212638
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2340335845947266,
      "learning_rate": 8.757334755517952e-06,
      "loss": 2.9309,
      "step": 212639
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.69828462600708,
      "learning_rate": 8.756353643450265e-06,
      "loss": 2.8254,
      "step": 212640
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.205235004425049,
      "learning_rate": 8.755372585530262e-06,
      "loss": 2.9902,
      "step": 212641
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7462828159332275,
      "learning_rate": 8.754391581758146e-06,
      "loss": 2.9067,
      "step": 212642
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.771108627319336,
      "learning_rate": 8.75341063213405e-06,
      "loss": 2.7727,
      "step": 212643
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.670583486557007,
      "learning_rate": 8.752429736658273e-06,
      "loss": 2.9229,
      "step": 212644
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.189723014831543,
      "learning_rate": 8.751448895330847e-06,
      "loss": 2.8792,
      "step": 212645
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.75221848487854,
      "learning_rate": 8.750468108152043e-06,
      "loss": 2.8992,
      "step": 212646
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.626322031021118,
      "learning_rate": 8.749487375121989e-06,
      "loss": 3.174,
      "step": 212647
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.362510919570923,
      "learning_rate": 8.748506696240953e-06,
      "loss": 2.9949,
      "step": 212648
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.4047627449035645,
      "learning_rate": 8.747526071509037e-06,
      "loss": 2.9297,
      "step": 212649
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8127763271331787,
      "learning_rate": 8.746545500926504e-06,
      "loss": 3.1575,
      "step": 212650
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.359273910522461,
      "learning_rate": 8.745564984493424e-06,
      "loss": 2.7552,
      "step": 212651
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3720908164978027,
      "learning_rate": 8.744584522210097e-06,
      "loss": 3.009,
      "step": 212652
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9564015865325928,
      "learning_rate": 8.743604114076586e-06,
      "loss": 2.9094,
      "step": 212653
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.786752939224243,
      "learning_rate": 8.742623760093192e-06,
      "loss": 2.8555,
      "step": 212654
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.036379814147949,
      "learning_rate": 8.741643460260018e-06,
      "loss": 3.1298,
      "step": 212655
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7224652767181396,
      "learning_rate": 8.740663214577259e-06,
      "loss": 3.0074,
      "step": 212656
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8571155071258545,
      "learning_rate": 8.739683023045151e-06,
      "loss": 2.9062,
      "step": 212657
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.710416555404663,
      "learning_rate": 8.738702885663796e-06,
      "loss": 2.895,
      "step": 212658
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.472341775894165,
      "learning_rate": 8.737722802433423e-06,
      "loss": 2.9595,
      "step": 212659
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.030623197555542,
      "learning_rate": 8.7367427733542e-06,
      "loss": 2.8618,
      "step": 212660
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.969172239303589,
      "learning_rate": 8.735762798426327e-06,
      "loss": 2.7585,
      "step": 212661
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.634871006011963,
      "learning_rate": 8.73478287764997e-06,
      "loss": 3.147,
      "step": 212662
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.332463026046753,
      "learning_rate": 8.73380301102533e-06,
      "loss": 2.8849,
      "step": 212663
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.8027701377868652,
      "learning_rate": 8.732823198552575e-06,
      "loss": 2.9244,
      "step": 212664
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.501058578491211,
      "learning_rate": 8.731843440231835e-06,
      "loss": 2.9639,
      "step": 212665
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.534780263900757,
      "learning_rate": 8.730863736063376e-06,
      "loss": 3.1838,
      "step": 212666
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.072892189025879,
      "learning_rate": 8.729884086047333e-06,
      "loss": 2.886,
      "step": 212667
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8843650817871094,
      "learning_rate": 8.728904490183941e-06,
      "loss": 3.0585,
      "step": 212668
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3850691318511963,
      "learning_rate": 8.72792494847333e-06,
      "loss": 2.9651,
      "step": 212669
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7596325874328613,
      "learning_rate": 8.726945460915701e-06,
      "loss": 2.7929,
      "step": 212670
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.039034843444824,
      "learning_rate": 8.725966027511189e-06,
      "loss": 2.7566,
      "step": 212671
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.5988497734069824,
      "learning_rate": 8.724986648260058e-06,
      "loss": 3.0272,
      "step": 212672
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.6648619174957275,
      "learning_rate": 8.724007323162408e-06,
      "loss": 3.1082,
      "step": 212673
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.9127869606018066,
      "learning_rate": 8.723028052218506e-06,
      "loss": 2.9702,
      "step": 212674
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.459588527679443,
      "learning_rate": 8.722048835428486e-06,
      "loss": 2.7609,
      "step": 212675
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.9248569011688232,
      "learning_rate": 8.721069672792546e-06,
      "loss": 2.8831,
      "step": 212676
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8882532119750977,
      "learning_rate": 8.720090564310823e-06,
      "loss": 2.8636,
      "step": 212677
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.764698028564453,
      "learning_rate": 8.719111509983578e-06,
      "loss": 3.1854,
      "step": 212678
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.7089457511901855,
      "learning_rate": 8.718132509810882e-06,
      "loss": 3.0733,
      "step": 212679
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.4983322620391846,
      "learning_rate": 8.717153563793034e-06,
      "loss": 2.7487,
      "step": 212680
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7713048458099365,
      "learning_rate": 8.716174671930165e-06,
      "loss": 3.1276,
      "step": 212681
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8815040588378906,
      "learning_rate": 8.715195834222443e-06,
      "loss": 2.9273,
      "step": 212682
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.385864496231079,
      "learning_rate": 8.714217050670068e-06,
      "loss": 2.8955,
      "step": 212683
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7985363006591797,
      "learning_rate": 8.713238321273241e-06,
      "loss": 3.048,
      "step": 212684
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.8107712268829346,
      "learning_rate": 8.71225964603206e-06,
      "loss": 2.8804,
      "step": 212685
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.750535488128662,
      "learning_rate": 8.711281024946825e-06,
      "loss": 2.9515,
      "step": 212686
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.118945598602295,
      "learning_rate": 8.710302458017637e-06,
      "loss": 2.8184,
      "step": 212687
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9298205375671387,
      "learning_rate": 8.709323945244696e-06,
      "loss": 2.8143,
      "step": 212688
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.4340062141418457,
      "learning_rate": 8.708345486628232e-06,
      "loss": 3.0491,
      "step": 212689
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.680821180343628,
      "learning_rate": 8.707367082168315e-06,
      "loss": 2.9414,
      "step": 212690
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.669222354888916,
      "learning_rate": 8.706388731865244e-06,
      "loss": 3.0564,
      "step": 212691
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3976244926452637,
      "learning_rate": 8.705410435719185e-06,
      "loss": 2.9458,
      "step": 212692
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0241456031799316,
      "learning_rate": 8.704432193730237e-06,
      "loss": 2.8999,
      "step": 212693
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2480051517486572,
      "learning_rate": 8.703454005898636e-06,
      "loss": 2.8743,
      "step": 212694
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8273391723632812,
      "learning_rate": 8.70247587222458e-06,
      "loss": 2.9925,
      "step": 212695
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9399759769439697,
      "learning_rate": 8.701497792708235e-06,
      "loss": 2.7442,
      "step": 212696
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.532670497894287,
      "learning_rate": 8.700519767349767e-06,
      "loss": 2.9412,
      "step": 212697
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3302001953125,
      "learning_rate": 8.699541796149378e-06,
      "loss": 3.0897,
      "step": 212698
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.041386604309082,
      "learning_rate": 8.698563879107268e-06,
      "loss": 3.1288,
      "step": 212699
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.7151687145233154,
      "learning_rate": 8.697586016223535e-06,
      "loss": 2.889,
      "step": 212700
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7776708602905273,
      "learning_rate": 8.696608207498479e-06,
      "loss": 2.9392,
      "step": 212701
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.922123908996582,
      "learning_rate": 8.695630452932167e-06,
      "loss": 3.0815,
      "step": 212702
    },
    {
      "epoch": 2.77,
      "grad_norm": 6.061809539794922,
      "learning_rate": 8.694652752524867e-06,
      "loss": 2.633,
      "step": 212703
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3830647468566895,
      "learning_rate": 8.69367510627671e-06,
      "loss": 2.8309,
      "step": 212704
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2999279499053955,
      "learning_rate": 8.692697514187963e-06,
      "loss": 2.8154,
      "step": 212705
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.61659836769104,
      "learning_rate": 8.691719976258659e-06,
      "loss": 2.8162,
      "step": 212706
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.882523775100708,
      "learning_rate": 8.690742492489133e-06,
      "loss": 2.9885,
      "step": 212707
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9701008796691895,
      "learning_rate": 8.689765062879417e-06,
      "loss": 2.8531,
      "step": 212708
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.282100677490234,
      "learning_rate": 8.688787687429843e-06,
      "loss": 3.3985,
      "step": 212709
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7614669799804688,
      "learning_rate": 8.68781036614048e-06,
      "loss": 2.9957,
      "step": 212710
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.208961248397827,
      "learning_rate": 8.686833099011592e-06,
      "loss": 2.8626,
      "step": 212711
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.244481086730957,
      "learning_rate": 8.68585588604328e-06,
      "loss": 2.8464,
      "step": 212712
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.926204204559326,
      "learning_rate": 8.68487872723581e-06,
      "loss": 3.0076,
      "step": 212713
    },
    {
      "epoch": 2.77,
      "grad_norm": 5.480041027069092,
      "learning_rate": 8.683901622589251e-06,
      "loss": 2.7482,
      "step": 212714
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.4873766899108887,
      "learning_rate": 8.682924572103934e-06,
      "loss": 2.9324,
      "step": 212715
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.959615468978882,
      "learning_rate": 8.681947575779891e-06,
      "loss": 3.2061,
      "step": 212716
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.8674492835998535,
      "learning_rate": 8.680970633617457e-06,
      "loss": 2.6363,
      "step": 212717
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1957688331604004,
      "learning_rate": 8.679993745616631e-06,
      "loss": 2.957,
      "step": 212718
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6603190898895264,
      "learning_rate": 8.679016911777747e-06,
      "loss": 2.8121,
      "step": 212719
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.262862205505371,
      "learning_rate": 8.678040132100905e-06,
      "loss": 2.8321,
      "step": 212720
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9597291946411133,
      "learning_rate": 8.677063406586371e-06,
      "loss": 3.1929,
      "step": 212721
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8371095657348633,
      "learning_rate": 8.676086735234212e-06,
      "loss": 3.0133,
      "step": 212722
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0523674488067627,
      "learning_rate": 8.675110118044692e-06,
      "loss": 3.008,
      "step": 212723
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0059919357299805,
      "learning_rate": 8.674133555017982e-06,
      "loss": 2.5746,
      "step": 212724
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.880615711212158,
      "learning_rate": 8.673157046154243e-06,
      "loss": 2.7835,
      "step": 212725
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.308285713195801,
      "learning_rate": 8.672180591453615e-06,
      "loss": 2.9172,
      "step": 212726
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.852468490600586,
      "learning_rate": 8.671204190916392e-06,
      "loss": 3.0504,
      "step": 212727
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2365050315856934,
      "learning_rate": 8.670227844542643e-06,
      "loss": 2.5487,
      "step": 212728
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.222225666046143,
      "learning_rate": 8.669251552332634e-06,
      "loss": 3.0981,
      "step": 212729
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9477427005767822,
      "learning_rate": 8.6682753142865e-06,
      "loss": 3.1014,
      "step": 212730
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6422266960144043,
      "learning_rate": 8.667299130404437e-06,
      "loss": 3.0389,
      "step": 212731
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.5532755851745605,
      "learning_rate": 8.666323000686615e-06,
      "loss": 2.8089,
      "step": 212732
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.062370777130127,
      "learning_rate": 8.665346925133231e-06,
      "loss": 2.8318,
      "step": 212733
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.615143299102783,
      "learning_rate": 8.664370903744421e-06,
      "loss": 2.9893,
      "step": 212734
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.260377407073975,
      "learning_rate": 8.663394936520451e-06,
      "loss": 2.9111,
      "step": 212735
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8335909843444824,
      "learning_rate": 8.662419023461453e-06,
      "loss": 2.7981,
      "step": 212736
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2458584308624268,
      "learning_rate": 8.661443164567594e-06,
      "loss": 3.0508,
      "step": 212737
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0723466873168945,
      "learning_rate": 8.660467359839074e-06,
      "loss": 2.991,
      "step": 212738
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6462457180023193,
      "learning_rate": 8.659491609276058e-06,
      "loss": 2.9372,
      "step": 212739
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.508410930633545,
      "learning_rate": 8.65851591287875e-06,
      "loss": 2.7425,
      "step": 212740
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.136150360107422,
      "learning_rate": 8.657540270647344e-06,
      "loss": 2.8361,
      "step": 212741
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.608537197113037,
      "learning_rate": 8.65656468258198e-06,
      "loss": 3.0568,
      "step": 212742
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.5870721340179443,
      "learning_rate": 8.655589148682884e-06,
      "loss": 2.7617,
      "step": 212743
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.5634634494781494,
      "learning_rate": 8.654613668950195e-06,
      "loss": 2.9208,
      "step": 212744
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9165608882904053,
      "learning_rate": 8.65363824338411e-06,
      "loss": 2.9743,
      "step": 212745
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.5664596557617188,
      "learning_rate": 8.652662871984795e-06,
      "loss": 2.7656,
      "step": 212746
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.014900207519531,
      "learning_rate": 8.651687554752484e-06,
      "loss": 2.9306,
      "step": 212747
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.488016605377197,
      "learning_rate": 8.650712291687345e-06,
      "loss": 2.854,
      "step": 212748
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7904250621795654,
      "learning_rate": 8.64973708278951e-06,
      "loss": 3.0615,
      "step": 212749
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.4203221797943115,
      "learning_rate": 8.648761928059144e-06,
      "loss": 3.0299,
      "step": 212750
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.396756410598755,
      "learning_rate": 8.64778682749655e-06,
      "loss": 2.9278,
      "step": 212751
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.173081874847412,
      "learning_rate": 8.64681178110176e-06,
      "loss": 2.9635,
      "step": 212752
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1047816276550293,
      "learning_rate": 8.645836788875072e-06,
      "loss": 3.0037,
      "step": 212753
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3651933670043945,
      "learning_rate": 8.64486185081662e-06,
      "loss": 2.9986,
      "step": 212754
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2197508811950684,
      "learning_rate": 8.643886966926606e-06,
      "loss": 2.96,
      "step": 212755
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.157710075378418,
      "learning_rate": 8.642912137205127e-06,
      "loss": 2.699,
      "step": 212756
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.7411022186279297,
      "learning_rate": 8.641937361652484e-06,
      "loss": 2.8808,
      "step": 212757
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.43604040145874,
      "learning_rate": 8.640962640268778e-06,
      "loss": 3.0003,
      "step": 212758
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.586933612823486,
      "learning_rate": 8.639987973054241e-06,
      "loss": 3.0787,
      "step": 212759
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7597527503967285,
      "learning_rate": 8.639013360009039e-06,
      "loss": 2.9145,
      "step": 212760
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.397444725036621,
      "learning_rate": 8.63803880113334e-06,
      "loss": 2.8679,
      "step": 212761
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.69067120552063,
      "learning_rate": 8.637064296427276e-06,
      "loss": 2.7657,
      "step": 212762
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7827672958374023,
      "learning_rate": 8.636089845891147e-06,
      "loss": 3.015,
      "step": 212763
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.427699089050293,
      "learning_rate": 8.63511544952502e-06,
      "loss": 2.802,
      "step": 212764
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.202023029327393,
      "learning_rate": 8.634141107329162e-06,
      "loss": 2.7646,
      "step": 212765
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7305946350097656,
      "learning_rate": 8.633166819303705e-06,
      "loss": 2.8516,
      "step": 212766
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.110148906707764,
      "learning_rate": 8.632192585448815e-06,
      "loss": 2.791,
      "step": 212767
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2080957889556885,
      "learning_rate": 8.631218405764729e-06,
      "loss": 3.0182,
      "step": 212768
    },
    {
      "epoch": 2.77,
      "grad_norm": 6.017460346221924,
      "learning_rate": 8.630244280251607e-06,
      "loss": 2.8554,
      "step": 212769
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.194176435470581,
      "learning_rate": 8.629270208909589e-06,
      "loss": 2.8185,
      "step": 212770
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6642086505889893,
      "learning_rate": 8.628296191738937e-06,
      "loss": 2.8895,
      "step": 212771
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.6125104427337646,
      "learning_rate": 8.627322228739753e-06,
      "loss": 2.9441,
      "step": 212772
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.187983989715576,
      "learning_rate": 8.626348319912235e-06,
      "loss": 2.7994,
      "step": 212773
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1660406589508057,
      "learning_rate": 8.625374465256618e-06,
      "loss": 3.0083,
      "step": 212774
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.058682680130005,
      "learning_rate": 8.624400664773034e-06,
      "loss": 2.9989,
      "step": 212775
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.195446729660034,
      "learning_rate": 8.62342691846165e-06,
      "loss": 2.7842,
      "step": 212776
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.7891993522644043,
      "learning_rate": 8.622453226322701e-06,
      "loss": 2.83,
      "step": 212777
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8318755626678467,
      "learning_rate": 8.62147958835635e-06,
      "loss": 3.2042,
      "step": 212778
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9023070335388184,
      "learning_rate": 8.6205060045627e-06,
      "loss": 2.8896,
      "step": 212779
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.955080032348633,
      "learning_rate": 8.619532474942048e-06,
      "loss": 2.95,
      "step": 212780
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.16619610786438,
      "learning_rate": 8.618558999494496e-06,
      "loss": 3.07,
      "step": 212781
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.666358232498169,
      "learning_rate": 8.61758557822031e-06,
      "loss": 2.6353,
      "step": 212782
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.554464340209961,
      "learning_rate": 8.616612211119589e-06,
      "loss": 2.8689,
      "step": 212783
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.9252965450286865,
      "learning_rate": 8.615638898192534e-06,
      "loss": 2.6884,
      "step": 212784
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8186676502227783,
      "learning_rate": 8.61466563943931e-06,
      "loss": 3.062,
      "step": 212785
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.543889284133911,
      "learning_rate": 8.613692434860154e-06,
      "loss": 2.8688,
      "step": 212786
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9625020027160645,
      "learning_rate": 8.612719284455161e-06,
      "loss": 2.8152,
      "step": 212787
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.821887731552124,
      "learning_rate": 8.611746188224633e-06,
      "loss": 2.9581,
      "step": 212788
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.105274200439453,
      "learning_rate": 8.610773146168603e-06,
      "loss": 2.8503,
      "step": 212789
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9022037982940674,
      "learning_rate": 8.609800158287405e-06,
      "loss": 2.8896,
      "step": 212790
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.357903003692627,
      "learning_rate": 8.608827224581138e-06,
      "loss": 2.8399,
      "step": 212791
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1038269996643066,
      "learning_rate": 8.607854345049969e-06,
      "loss": 2.998,
      "step": 212792
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.5017197132110596,
      "learning_rate": 8.606881519694065e-06,
      "loss": 2.7512,
      "step": 212793
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6471824645996094,
      "learning_rate": 8.60590874851369e-06,
      "loss": 2.7525,
      "step": 212794
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.219334602355957,
      "learning_rate": 8.604936031508946e-06,
      "loss": 2.9961,
      "step": 212795
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.090029239654541,
      "learning_rate": 8.6039633686801e-06,
      "loss": 2.7435,
      "step": 212796
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0130767822265625,
      "learning_rate": 8.60299076002725e-06,
      "loss": 2.8901,
      "step": 212797
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1733365058898926,
      "learning_rate": 8.602018205550598e-06,
      "loss": 2.8008,
      "step": 212798
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2088334560394287,
      "learning_rate": 8.601045705250309e-06,
      "loss": 2.8367,
      "step": 212799
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.539853096008301,
      "learning_rate": 8.600073259126617e-06,
      "loss": 3.0933,
      "step": 212800
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.66774845123291,
      "learning_rate": 8.599100867179653e-06,
      "loss": 2.817,
      "step": 212801
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6352524757385254,
      "learning_rate": 8.59812852940962e-06,
      "loss": 2.8832,
      "step": 212802
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1505558490753174,
      "learning_rate": 8.597156245816716e-06,
      "loss": 2.9645,
      "step": 212803
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.42313289642334,
      "learning_rate": 8.59618401640111e-06,
      "loss": 2.9889,
      "step": 212804
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8161323070526123,
      "learning_rate": 8.59521184116293e-06,
      "loss": 2.9851,
      "step": 212805
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.813643217086792,
      "learning_rate": 8.594239720102414e-06,
      "loss": 2.9154,
      "step": 212806
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.901905059814453,
      "learning_rate": 8.593267653219726e-06,
      "loss": 2.9491,
      "step": 212807
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6806602478027344,
      "learning_rate": 8.592295640515068e-06,
      "loss": 2.8477,
      "step": 212808
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.5229392051696777,
      "learning_rate": 8.591323681988604e-06,
      "loss": 3.0081,
      "step": 212809
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0939841270446777,
      "learning_rate": 8.590351777640536e-06,
      "loss": 2.8575,
      "step": 212810
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.7912204265594482,
      "learning_rate": 8.589379927470962e-06,
      "loss": 2.9194,
      "step": 212811
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.818939685821533,
      "learning_rate": 8.588408131480151e-06,
      "loss": 2.7944,
      "step": 212812
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.166365385055542,
      "learning_rate": 8.587436389668234e-06,
      "loss": 2.9499,
      "step": 212813
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.330988883972168,
      "learning_rate": 8.586464702035479e-06,
      "loss": 2.7993,
      "step": 212814
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.998793840408325,
      "learning_rate": 8.58549306858195e-06,
      "loss": 2.8691,
      "step": 212815
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.589700937271118,
      "learning_rate": 8.584521489307916e-06,
      "loss": 2.791,
      "step": 212816
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3992912769317627,
      "learning_rate": 8.583549964213476e-06,
      "loss": 2.89,
      "step": 212817
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2324838638305664,
      "learning_rate": 8.582578493298864e-06,
      "loss": 2.7591,
      "step": 212818
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.119492530822754,
      "learning_rate": 8.581607076564245e-06,
      "loss": 2.9665,
      "step": 212819
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6509041786193848,
      "learning_rate": 8.58063571400982e-06,
      "loss": 2.657,
      "step": 212820
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1165664196014404,
      "learning_rate": 8.57966440563579e-06,
      "loss": 2.9599,
      "step": 212821
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2728054523468018,
      "learning_rate": 8.578693151442251e-06,
      "loss": 3.1967,
      "step": 212822
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1612203121185303,
      "learning_rate": 8.57772195142944e-06,
      "loss": 3.186,
      "step": 212823
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.530186653137207,
      "learning_rate": 8.576750805597555e-06,
      "loss": 2.9187,
      "step": 212824
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.5941648483276367,
      "learning_rate": 8.575779713946728e-06,
      "loss": 3.2048,
      "step": 212825
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1548190116882324,
      "learning_rate": 8.574808676477197e-06,
      "loss": 2.702,
      "step": 212826
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.092984914779663,
      "learning_rate": 8.57383769318909e-06,
      "loss": 2.8456,
      "step": 212827
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6462535858154297,
      "learning_rate": 8.572866764082608e-06,
      "loss": 2.9493,
      "step": 212828
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.674077033996582,
      "learning_rate": 8.57189588915792e-06,
      "loss": 2.8418,
      "step": 212829
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.6536552906036377,
      "learning_rate": 8.570925068415223e-06,
      "loss": 2.7382,
      "step": 212830
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.756383180618286,
      "learning_rate": 8.569954301854687e-06,
      "loss": 3.0472,
      "step": 212831
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.962022304534912,
      "learning_rate": 8.568983589476509e-06,
      "loss": 2.8067,
      "step": 212832
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0726144313812256,
      "learning_rate": 8.568012931280888e-06,
      "loss": 3.1122,
      "step": 212833
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9665284156799316,
      "learning_rate": 8.567042327267958e-06,
      "loss": 2.8018,
      "step": 212834
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0033464431762695,
      "learning_rate": 8.566071777437888e-06,
      "loss": 2.9639,
      "step": 212835
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.101099967956543,
      "learning_rate": 8.565101281790909e-06,
      "loss": 2.9422,
      "step": 212836
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.821326732635498,
      "learning_rate": 8.564130840327122e-06,
      "loss": 2.8891,
      "step": 212837
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.9976162910461426,
      "learning_rate": 8.563160453046824e-06,
      "loss": 2.8506,
      "step": 212838
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.492586135864258,
      "learning_rate": 8.562190119950152e-06,
      "loss": 2.9646,
      "step": 212839
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.551827430725098,
      "learning_rate": 8.561219841037237e-06,
      "loss": 3.0115,
      "step": 212840
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6619746685028076,
      "learning_rate": 8.56024961630828e-06,
      "loss": 2.906,
      "step": 212841
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1231908798217773,
      "learning_rate": 8.559279445763511e-06,
      "loss": 2.6848,
      "step": 212842
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.6165273189544678,
      "learning_rate": 8.558309329403035e-06,
      "loss": 2.8054,
      "step": 212843
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8324501514434814,
      "learning_rate": 8.557339267227115e-06,
      "loss": 2.8839,
      "step": 212844
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.014397144317627,
      "learning_rate": 8.556369259235852e-06,
      "loss": 2.6681,
      "step": 212845
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1434364318847656,
      "learning_rate": 8.555399305429512e-06,
      "loss": 2.7657,
      "step": 212846
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.839953660964966,
      "learning_rate": 8.55442940580816e-06,
      "loss": 2.896,
      "step": 212847
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0681872367858887,
      "learning_rate": 8.553459560372066e-06,
      "loss": 2.7399,
      "step": 212848
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3371524810791016,
      "learning_rate": 8.552489769121362e-06,
      "loss": 2.9831,
      "step": 212849
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3193790912628174,
      "learning_rate": 8.551520032056281e-06,
      "loss": 2.6123,
      "step": 212850
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.541259765625,
      "learning_rate": 8.550550349176989e-06,
      "loss": 2.8749,
      "step": 212851
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0074195861816406,
      "learning_rate": 8.549580720483618e-06,
      "loss": 2.9027,
      "step": 212852
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8221428394317627,
      "learning_rate": 8.548611145976403e-06,
      "loss": 2.7849,
      "step": 212853
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0225841999053955,
      "learning_rate": 8.547641625655477e-06,
      "loss": 3.0228,
      "step": 212854
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.623412847518921,
      "learning_rate": 8.54667215952104e-06,
      "loss": 3.0558,
      "step": 212855
    },
    {
      "epoch": 2.77,
      "grad_norm": 5.658309459686279,
      "learning_rate": 8.545702747573325e-06,
      "loss": 2.8468,
      "step": 212856
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2978336811065674,
      "learning_rate": 8.544733389812431e-06,
      "loss": 2.8746,
      "step": 212857
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.2559661865234375,
      "learning_rate": 8.543764086238592e-06,
      "loss": 2.6751,
      "step": 212858
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.963449716567993,
      "learning_rate": 8.542794836851975e-06,
      "loss": 2.9505,
      "step": 212859
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.013880729675293,
      "learning_rate": 8.541825641652745e-06,
      "loss": 2.935,
      "step": 212860
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.4959354400634766,
      "learning_rate": 8.54085650064107e-06,
      "loss": 3.0017,
      "step": 212861
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.4867804050445557,
      "learning_rate": 8.539887413817147e-06,
      "loss": 3.0074,
      "step": 212862
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8102943897247314,
      "learning_rate": 8.53891838118118e-06,
      "loss": 3.2515,
      "step": 212863
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6119658946990967,
      "learning_rate": 8.537949402733335e-06,
      "loss": 2.9757,
      "step": 212864
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.900393009185791,
      "learning_rate": 8.53698047847381e-06,
      "loss": 2.6923,
      "step": 212865
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8008813858032227,
      "learning_rate": 8.536011608402705e-06,
      "loss": 3.2309,
      "step": 212866
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1543195247650146,
      "learning_rate": 8.535042792520286e-06,
      "loss": 3.0913,
      "step": 212867
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0336177349090576,
      "learning_rate": 8.534074030826721e-06,
      "loss": 2.8779,
      "step": 212868
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.5424232482910156,
      "learning_rate": 8.533105323322142e-06,
      "loss": 3.0234,
      "step": 212869
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9731688499450684,
      "learning_rate": 8.532136670006784e-06,
      "loss": 2.7544,
      "step": 212870
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.2056074142456055,
      "learning_rate": 8.531168070880811e-06,
      "loss": 2.9307,
      "step": 212871
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.981341600418091,
      "learning_rate": 8.530199525944359e-06,
      "loss": 2.6898,
      "step": 212872
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.138756275177002,
      "learning_rate": 8.529231035197659e-06,
      "loss": 2.7349,
      "step": 212873
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.6260910034179688,
      "learning_rate": 8.528262598640912e-06,
      "loss": 2.8482,
      "step": 212874
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6113178730010986,
      "learning_rate": 8.527294216274216e-06,
      "loss": 3.0098,
      "step": 212875
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8895390033721924,
      "learning_rate": 8.526325888097808e-06,
      "loss": 2.9482,
      "step": 212876
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.012928009033203,
      "learning_rate": 8.525357614111882e-06,
      "loss": 2.842,
      "step": 212877
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.274221897125244,
      "learning_rate": 8.524389394316577e-06,
      "loss": 2.8518,
      "step": 212878
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9509079456329346,
      "learning_rate": 8.52342122871209e-06,
      "loss": 2.9787,
      "step": 212879
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3026845455169678,
      "learning_rate": 8.52245311729859e-06,
      "loss": 2.9855,
      "step": 212880
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1235296726226807,
      "learning_rate": 8.521485060076271e-06,
      "loss": 2.8989,
      "step": 212881
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.546818733215332,
      "learning_rate": 8.52051705704534e-06,
      "loss": 2.6939,
      "step": 212882
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.862394332885742,
      "learning_rate": 8.519549108205926e-06,
      "loss": 2.9065,
      "step": 212883
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7942962646484375,
      "learning_rate": 8.518581213558229e-06,
      "loss": 2.986,
      "step": 212884
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.8778717517852783,
      "learning_rate": 8.51761337310245e-06,
      "loss": 2.8694,
      "step": 212885
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8633129596710205,
      "learning_rate": 8.516645586838689e-06,
      "loss": 2.756,
      "step": 212886
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9599411487579346,
      "learning_rate": 8.515677854767245e-06,
      "loss": 3.0481,
      "step": 212887
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2600061893463135,
      "learning_rate": 8.514710176888217e-06,
      "loss": 2.8459,
      "step": 212888
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.095755100250244,
      "learning_rate": 8.51374255320184e-06,
      "loss": 2.7561,
      "step": 212889
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.165433645248413,
      "learning_rate": 8.512774983708215e-06,
      "loss": 2.8209,
      "step": 212890
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8492982387542725,
      "learning_rate": 8.511807468407573e-06,
      "loss": 2.8224,
      "step": 212891
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.033116579055786,
      "learning_rate": 8.51084000730008e-06,
      "loss": 2.9749,
      "step": 212892
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8258376121520996,
      "learning_rate": 8.50987260038597e-06,
      "loss": 2.9175,
      "step": 212893
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0910539627075195,
      "learning_rate": 8.508905247665344e-06,
      "loss": 2.9769,
      "step": 212894
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.24165415763855,
      "learning_rate": 8.507937949138432e-06,
      "loss": 2.7846,
      "step": 212895
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8035619258880615,
      "learning_rate": 8.506970704805371e-06,
      "loss": 2.9128,
      "step": 212896
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1716232299804688,
      "learning_rate": 8.506003514666393e-06,
      "loss": 3.0104,
      "step": 212897
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.258387565612793,
      "learning_rate": 8.50503637872163e-06,
      "loss": 2.985,
      "step": 212898
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.84846568107605,
      "learning_rate": 8.504069296971283e-06,
      "loss": 2.7907,
      "step": 212899
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.982722043991089,
      "learning_rate": 8.503102269415551e-06,
      "loss": 2.9642,
      "step": 212900
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9533863067626953,
      "learning_rate": 8.502135296054602e-06,
      "loss": 3.0883,
      "step": 212901
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7380383014678955,
      "learning_rate": 8.501168376888567e-06,
      "loss": 2.9337,
      "step": 212902
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.762270212173462,
      "learning_rate": 8.500201511917714e-06,
      "loss": 2.8368,
      "step": 212903
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7421956062316895,
      "learning_rate": 8.499234701142143e-06,
      "loss": 2.951,
      "step": 212904
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.5867297649383545,
      "learning_rate": 8.498267944562087e-06,
      "loss": 3.2001,
      "step": 212905
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.071450710296631,
      "learning_rate": 8.497301242177713e-06,
      "loss": 2.798,
      "step": 212906
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.167980432510376,
      "learning_rate": 8.496334593989184e-06,
      "loss": 2.9209,
      "step": 212907
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8414387702941895,
      "learning_rate": 8.495367999996638e-06,
      "loss": 3.1022,
      "step": 212908
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.41310977935791,
      "learning_rate": 8.494401460200373e-06,
      "loss": 3.1331,
      "step": 212909
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.816988229751587,
      "learning_rate": 8.493434974600455e-06,
      "loss": 2.8061,
      "step": 212910
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1979081630706787,
      "learning_rate": 8.49246854319715e-06,
      "loss": 2.7671,
      "step": 212911
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.816462516784668,
      "learning_rate": 8.491502165990594e-06,
      "loss": 2.9356,
      "step": 212912
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7550668716430664,
      "learning_rate": 8.49053584298095e-06,
      "loss": 2.9847,
      "step": 212913
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.941415309906006,
      "learning_rate": 8.48956957416842e-06,
      "loss": 2.962,
      "step": 212914
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.775279998779297,
      "learning_rate": 8.488603359553203e-06,
      "loss": 2.9741,
      "step": 212915
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9828410148620605,
      "learning_rate": 8.4876371991354e-06,
      "loss": 2.9177,
      "step": 212916
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9216196537017822,
      "learning_rate": 8.48667109291531e-06,
      "loss": 3.0296,
      "step": 212917
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2445685863494873,
      "learning_rate": 8.485705040893032e-06,
      "loss": 2.8747,
      "step": 212918
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.201341152191162,
      "learning_rate": 8.484739043068766e-06,
      "loss": 2.8567,
      "step": 212919
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.991950750350952,
      "learning_rate": 8.48377309944268e-06,
      "loss": 2.8193,
      "step": 212920
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.5328516960144043,
      "learning_rate": 8.482807210014974e-06,
      "loss": 2.7811,
      "step": 212921
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9292819499969482,
      "learning_rate": 8.48184137478578e-06,
      "loss": 2.9878,
      "step": 212922
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.116424560546875,
      "learning_rate": 8.480875593755365e-06,
      "loss": 2.8501,
      "step": 212923
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8128206729888916,
      "learning_rate": 8.479909866923828e-06,
      "loss": 2.6376,
      "step": 212924
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.4162254333496094,
      "learning_rate": 8.478944194291436e-06,
      "loss": 2.9865,
      "step": 212925
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.288431167602539,
      "learning_rate": 8.477978575858225e-06,
      "loss": 2.6321,
      "step": 212926
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.113699436187744,
      "learning_rate": 8.477013011624524e-06,
      "loss": 2.9026,
      "step": 212927
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6266188621520996,
      "learning_rate": 8.476047501590399e-06,
      "loss": 2.7279,
      "step": 212928
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.852945566177368,
      "learning_rate": 8.47508204575612e-06,
      "loss": 2.91,
      "step": 212929
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9478979110717773,
      "learning_rate": 8.474116644121786e-06,
      "loss": 2.9927,
      "step": 212930
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8434579372406006,
      "learning_rate": 8.473151296687698e-06,
      "loss": 2.9437,
      "step": 212931
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.876542806625366,
      "learning_rate": 8.472186003453851e-06,
      "loss": 3.1848,
      "step": 212932
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.133692979812622,
      "learning_rate": 8.471220764420617e-06,
      "loss": 2.9846,
      "step": 212933
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.4533851146698,
      "learning_rate": 8.470255579588026e-06,
      "loss": 2.9137,
      "step": 212934
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.763885498046875,
      "learning_rate": 8.469290448956346e-06,
      "loss": 3.0759,
      "step": 212935
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8673455715179443,
      "learning_rate": 8.468325372525675e-06,
      "loss": 3.1689,
      "step": 212936
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8901870250701904,
      "learning_rate": 8.467360350296349e-06,
      "loss": 2.9588,
      "step": 212937
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1146411895751953,
      "learning_rate": 8.466395382268365e-06,
      "loss": 2.9079,
      "step": 212938
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.216986656188965,
      "learning_rate": 8.465430468441992e-06,
      "loss": 2.8223,
      "step": 212939
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.889174461364746,
      "learning_rate": 8.464465608817395e-06,
      "loss": 2.8024,
      "step": 212940
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9686501026153564,
      "learning_rate": 8.463500803394773e-06,
      "loss": 3.0077,
      "step": 212941
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9900264739990234,
      "learning_rate": 8.462536052174262e-06,
      "loss": 2.9903,
      "step": 212942
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.926408290863037,
      "learning_rate": 8.461571355156095e-06,
      "loss": 3.1727,
      "step": 212943
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2595810890197754,
      "learning_rate": 8.460606712340434e-06,
      "loss": 2.8607,
      "step": 212944
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.987727403640747,
      "learning_rate": 8.45964212372745e-06,
      "loss": 2.9494,
      "step": 212945
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7825663089752197,
      "learning_rate": 8.45867758931731e-06,
      "loss": 2.922,
      "step": 212946
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.229763984680176,
      "learning_rate": 8.45771310911021e-06,
      "loss": 2.8117,
      "step": 212947
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.982694387435913,
      "learning_rate": 8.456748683106284e-06,
      "loss": 2.894,
      "step": 212948
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.4105312824249268,
      "learning_rate": 8.455784311305802e-06,
      "loss": 2.6666,
      "step": 212949
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0476741790771484,
      "learning_rate": 8.454819993708895e-06,
      "loss": 2.8887,
      "step": 212950
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.4962308406829834,
      "learning_rate": 8.453855730315696e-06,
      "loss": 3.124,
      "step": 212951
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.7758548259735107,
      "learning_rate": 8.45289152112647e-06,
      "loss": 2.9567,
      "step": 212952
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.100630044937134,
      "learning_rate": 8.451927366141354e-06,
      "loss": 3.0032,
      "step": 212953
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.4180853366851807,
      "learning_rate": 8.450963265360477e-06,
      "loss": 3.13,
      "step": 212954
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7547709941864014,
      "learning_rate": 8.449999218784142e-06,
      "loss": 2.8513,
      "step": 212955
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8567440509796143,
      "learning_rate": 8.449035226412416e-06,
      "loss": 3.0705,
      "step": 212956
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.808133840560913,
      "learning_rate": 8.448071288245495e-06,
      "loss": 2.9743,
      "step": 212957
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6885130405426025,
      "learning_rate": 8.447107404283615e-06,
      "loss": 2.956,
      "step": 212958
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.866142511367798,
      "learning_rate": 8.446143574526942e-06,
      "loss": 2.7705,
      "step": 212959
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.340306520462036,
      "learning_rate": 8.445179798975577e-06,
      "loss": 2.9755,
      "step": 212960
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.4146509170532227,
      "learning_rate": 8.444216077629817e-06,
      "loss": 2.9404,
      "step": 212961
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1671242713928223,
      "learning_rate": 8.443252410489732e-06,
      "loss": 2.8181,
      "step": 212962
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.266002893447876,
      "learning_rate": 8.442288797555552e-06,
      "loss": 2.9485,
      "step": 212963
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7549312114715576,
      "learning_rate": 8.441325238827479e-06,
      "loss": 2.826,
      "step": 212964
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2150449752807617,
      "learning_rate": 8.440361734305645e-06,
      "loss": 2.6893,
      "step": 212965
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.092515468597412,
      "learning_rate": 8.439398283990285e-06,
      "loss": 2.8401,
      "step": 212966
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8484206199645996,
      "learning_rate": 8.43843488788153e-06,
      "loss": 2.7905,
      "step": 212967
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.364448070526123,
      "learning_rate": 8.43747154597958e-06,
      "loss": 3.1429,
      "step": 212968
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3355600833892822,
      "learning_rate": 8.436508258284569e-06,
      "loss": 3.0131,
      "step": 212969
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1890811920166016,
      "learning_rate": 8.435545024796763e-06,
      "loss": 2.7937,
      "step": 212970
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7718613147735596,
      "learning_rate": 8.43458184551623e-06,
      "loss": 3.0581,
      "step": 212971
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6221933364868164,
      "learning_rate": 8.43361872044327e-06,
      "loss": 2.7977,
      "step": 212972
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8711180686950684,
      "learning_rate": 8.432655649578014e-06,
      "loss": 2.9599,
      "step": 212973
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.986924171447754,
      "learning_rate": 8.431692632920595e-06,
      "loss": 2.9269,
      "step": 212974
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.96010684967041,
      "learning_rate": 8.430729670471214e-06,
      "loss": 2.7319,
      "step": 212975
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.654716968536377,
      "learning_rate": 8.429766762230106e-06,
      "loss": 3.0159,
      "step": 212976
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.4184176921844482,
      "learning_rate": 8.428803908197369e-06,
      "loss": 3.0059,
      "step": 212977
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8400156497955322,
      "learning_rate": 8.427841108373268e-06,
      "loss": 2.9991,
      "step": 212978
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9719293117523193,
      "learning_rate": 8.426878362757905e-06,
      "loss": 2.639,
      "step": 212979
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7910029888153076,
      "learning_rate": 8.425915671351513e-06,
      "loss": 2.8485,
      "step": 212980
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9543943405151367,
      "learning_rate": 8.42495303415419e-06,
      "loss": 2.9871,
      "step": 212981
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.863706111907959,
      "learning_rate": 8.42399045116624e-06,
      "loss": 2.7895,
      "step": 212982
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.836477518081665,
      "learning_rate": 8.423027922387727e-06,
      "loss": 3.0327,
      "step": 212983
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.596877336502075,
      "learning_rate": 8.422065447818882e-06,
      "loss": 2.6121,
      "step": 212984
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2772061824798584,
      "learning_rate": 8.421103027459908e-06,
      "loss": 2.9049,
      "step": 212985
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.03614616394043,
      "learning_rate": 8.420140661310937e-06,
      "loss": 2.7846,
      "step": 212986
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.5734455585479736,
      "learning_rate": 8.419178349372168e-06,
      "loss": 3.1349,
      "step": 212987
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8147928714752197,
      "learning_rate": 8.41821609164377e-06,
      "loss": 2.9636,
      "step": 212988
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8737754821777344,
      "learning_rate": 8.417253888125908e-06,
      "loss": 2.8477,
      "step": 212989
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.08573055267334,
      "learning_rate": 8.416291738818848e-06,
      "loss": 2.8785,
      "step": 212990
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8609731197357178,
      "learning_rate": 8.415329643722624e-06,
      "loss": 3.0338,
      "step": 212991
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.5935921669006348,
      "learning_rate": 8.414367602837602e-06,
      "loss": 3.0423,
      "step": 212992
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0135645866394043,
      "learning_rate": 8.413405616163748e-06,
      "loss": 2.8835,
      "step": 212993
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.935535192489624,
      "learning_rate": 8.412443683701397e-06,
      "loss": 2.9417,
      "step": 212994
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1118640899658203,
      "learning_rate": 8.411481805450648e-06,
      "loss": 2.9409,
      "step": 212995
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.333873748779297,
      "learning_rate": 8.410519981411735e-06,
      "loss": 2.6993,
      "step": 212996
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.962721586227417,
      "learning_rate": 8.409558211584788e-06,
      "loss": 3.1044,
      "step": 212997
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7235708236694336,
      "learning_rate": 8.408596495970044e-06,
      "loss": 3.1608,
      "step": 212998
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0745880603790283,
      "learning_rate": 8.4076348345676e-06,
      "loss": 3.0807,
      "step": 212999
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.176443338394165,
      "learning_rate": 8.406673227377724e-06,
      "loss": 2.9461,
      "step": 213000
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.59858775138855,
      "learning_rate": 8.405711674400517e-06,
      "loss": 3.1203,
      "step": 213001
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.309695243835449,
      "learning_rate": 8.40475017563621e-06,
      "loss": 2.7048,
      "step": 213002
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1766157150268555,
      "learning_rate": 8.403788731084938e-06,
      "loss": 2.7506,
      "step": 213003
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.740870237350464,
      "learning_rate": 8.402827340746998e-06,
      "loss": 3.1276,
      "step": 213004
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3290207386016846,
      "learning_rate": 8.401866004622393e-06,
      "loss": 2.9125,
      "step": 213005
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0629053115844727,
      "learning_rate": 8.400904722711388e-06,
      "loss": 3.0402,
      "step": 213006
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.6758005619049072,
      "learning_rate": 8.39994349501415e-06,
      "loss": 2.8997,
      "step": 213007
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7573277950286865,
      "learning_rate": 8.398982321530911e-06,
      "loss": 2.7974,
      "step": 213008
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0989081859588623,
      "learning_rate": 8.398021202261773e-06,
      "loss": 2.7636,
      "step": 213009
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7674062252044678,
      "learning_rate": 8.397060137207001e-06,
      "loss": 2.8308,
      "step": 213010
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3115365505218506,
      "learning_rate": 8.39609912636663e-06,
      "loss": 2.8541,
      "step": 213011
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.691127777099609,
      "learning_rate": 8.395138169740989e-06,
      "loss": 2.8694,
      "step": 213012
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9183199405670166,
      "learning_rate": 8.394177267330182e-06,
      "loss": 2.7837,
      "step": 213013
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.988497018814087,
      "learning_rate": 8.393216419134408e-06,
      "loss": 2.6515,
      "step": 213014
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.872560501098633,
      "learning_rate": 8.392255625153798e-06,
      "loss": 2.7299,
      "step": 213015
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.922729969024658,
      "learning_rate": 8.391294885388655e-06,
      "loss": 2.7126,
      "step": 213016
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.731814384460449,
      "learning_rate": 8.39033419983901e-06,
      "loss": 2.7712,
      "step": 213017
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.230189085006714,
      "learning_rate": 8.389373568505132e-06,
      "loss": 3.1909,
      "step": 213018
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8480825424194336,
      "learning_rate": 8.38841299138715e-06,
      "loss": 2.7395,
      "step": 213019
    },
    {
      "epoch": 2.77,
      "grad_norm": 7.096922397613525,
      "learning_rate": 8.387452468485301e-06,
      "loss": 3.063,
      "step": 213020
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.378162384033203,
      "learning_rate": 8.386491999799683e-06,
      "loss": 2.9629,
      "step": 213021
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9172589778900146,
      "learning_rate": 8.385531585330596e-06,
      "loss": 2.8559,
      "step": 213022
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.771738290786743,
      "learning_rate": 8.384571225078074e-06,
      "loss": 3.0218,
      "step": 213023
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.126356840133667,
      "learning_rate": 8.383610919042416e-06,
      "loss": 2.9566,
      "step": 213024
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2518317699432373,
      "learning_rate": 8.382650667223689e-06,
      "loss": 3.0351,
      "step": 213025
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.677294969558716,
      "learning_rate": 8.38169046962216e-06,
      "loss": 2.7495,
      "step": 213026
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1393284797668457,
      "learning_rate": 8.380730326237994e-06,
      "loss": 2.8801,
      "step": 213027
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8114731311798096,
      "learning_rate": 8.379770237071325e-06,
      "loss": 2.5993,
      "step": 213028
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.788102388381958,
      "learning_rate": 8.378810202122421e-06,
      "loss": 2.7133,
      "step": 213029
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0632309913635254,
      "learning_rate": 8.377850221391347e-06,
      "loss": 3.0065,
      "step": 213030
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9917595386505127,
      "learning_rate": 8.376890294878336e-06,
      "loss": 2.9081,
      "step": 213031
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8517889976501465,
      "learning_rate": 8.375930422583588e-06,
      "loss": 3.1626,
      "step": 213032
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.4155876636505127,
      "learning_rate": 8.374970604507236e-06,
      "loss": 2.7842,
      "step": 213033
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7237303256988525,
      "learning_rate": 8.374010840649514e-06,
      "loss": 2.899,
      "step": 213034
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7004382610321045,
      "learning_rate": 8.373051131010556e-06,
      "loss": 3.0237,
      "step": 213035
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.984111785888672,
      "learning_rate": 8.37209147559056e-06,
      "loss": 3.0972,
      "step": 213036
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.089485168457031,
      "learning_rate": 8.371131874389658e-06,
      "loss": 3.0919,
      "step": 213037
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7295398712158203,
      "learning_rate": 8.37017232740812e-06,
      "loss": 2.9682,
      "step": 213038
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.167370319366455,
      "learning_rate": 8.369212834646011e-06,
      "loss": 2.8692,
      "step": 213039
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6015024185180664,
      "learning_rate": 8.36825339610363e-06,
      "loss": 2.9501,
      "step": 213040
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.370760202407837,
      "learning_rate": 8.367294011781079e-06,
      "loss": 2.7703,
      "step": 213041
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.642439126968384,
      "learning_rate": 8.366334681678522e-06,
      "loss": 2.7103,
      "step": 213042
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2805182933807373,
      "learning_rate": 8.365375405796193e-06,
      "loss": 2.9276,
      "step": 213043
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.861226797103882,
      "learning_rate": 8.364416184134259e-06,
      "loss": 3.2008,
      "step": 213044
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.716395378112793,
      "learning_rate": 8.363457016692854e-06,
      "loss": 2.9212,
      "step": 213045
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.74573016166687,
      "learning_rate": 8.36249790347221e-06,
      "loss": 3.0273,
      "step": 213046
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.402799606323242,
      "learning_rate": 8.361538844472492e-06,
      "loss": 2.9101,
      "step": 213047
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.547229290008545,
      "learning_rate": 8.360579839693837e-06,
      "loss": 3.1445,
      "step": 213048
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8888587951660156,
      "learning_rate": 8.359620889136475e-06,
      "loss": 3.0415,
      "step": 213049
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.473824977874756,
      "learning_rate": 8.358661992800541e-06,
      "loss": 2.733,
      "step": 213050
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.862800359725952,
      "learning_rate": 8.357703150686268e-06,
      "loss": 2.9759,
      "step": 213051
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.128770351409912,
      "learning_rate": 8.356744362793787e-06,
      "loss": 2.7872,
      "step": 213052
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8645851612091064,
      "learning_rate": 8.355785629123335e-06,
      "loss": 3.2043,
      "step": 213053
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.005899667739868,
      "learning_rate": 8.354826949674975e-06,
      "loss": 2.6484,
      "step": 213054
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.658839702606201,
      "learning_rate": 8.353868324449008e-06,
      "loss": 2.7752,
      "step": 213055
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.5117645263671875,
      "learning_rate": 8.352909753445536e-06,
      "loss": 2.9867,
      "step": 213056
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.7625420093536377,
      "learning_rate": 8.351951236664756e-06,
      "loss": 2.8318,
      "step": 213057
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6208531856536865,
      "learning_rate": 8.350992774106902e-06,
      "loss": 2.9655,
      "step": 213058
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1773886680603027,
      "learning_rate": 8.350034365772074e-06,
      "loss": 2.9928,
      "step": 213059
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.792858362197876,
      "learning_rate": 8.349076011660471e-06,
      "loss": 3.1119,
      "step": 213060
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.501500129699707,
      "learning_rate": 8.348117711772296e-06,
      "loss": 2.7598,
      "step": 213061
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.727426767349243,
      "learning_rate": 8.347159466107679e-06,
      "loss": 3.0635,
      "step": 213062
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.768275737762451,
      "learning_rate": 8.346201274666887e-06,
      "loss": 2.8685,
      "step": 213063
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.360703945159912,
      "learning_rate": 8.345243137449987e-06,
      "loss": 2.9837,
      "step": 213064
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9494755268096924,
      "learning_rate": 8.344285054457278e-06,
      "loss": 3.0869,
      "step": 213065
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.845618486404419,
      "learning_rate": 8.343327025688796e-06,
      "loss": 2.9315,
      "step": 213066
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0484023094177246,
      "learning_rate": 8.342369051144837e-06,
      "loss": 3.1214,
      "step": 213067
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2307705879211426,
      "learning_rate": 8.341411130825504e-06,
      "loss": 2.8964,
      "step": 213068
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1662967205047607,
      "learning_rate": 8.340453264731029e-06,
      "loss": 2.9973,
      "step": 213069
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.423539638519287,
      "learning_rate": 8.339495452861578e-06,
      "loss": 2.8407,
      "step": 213070
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.145169496536255,
      "learning_rate": 8.338537695217351e-06,
      "loss": 2.9794,
      "step": 213071
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.315138578414917,
      "learning_rate": 8.33757999179845e-06,
      "loss": 2.9248,
      "step": 213072
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.4634885787963867,
      "learning_rate": 8.336622342605104e-06,
      "loss": 2.8557,
      "step": 213073
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3766956329345703,
      "learning_rate": 8.33566474763745e-06,
      "loss": 2.8248,
      "step": 213074
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.935779333114624,
      "learning_rate": 8.334707206895752e-06,
      "loss": 3.0117,
      "step": 213075
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.054750442504883,
      "learning_rate": 8.333749720380112e-06,
      "loss": 3.14,
      "step": 213076
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.521556854248047,
      "learning_rate": 8.332792288090795e-06,
      "loss": 2.9761,
      "step": 213077
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.6470701694488525,
      "learning_rate": 8.331834910027868e-06,
      "loss": 3.1471,
      "step": 213078
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.6007566452026367,
      "learning_rate": 8.330877586191564e-06,
      "loss": 3.1126,
      "step": 213079
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9633371829986572,
      "learning_rate": 8.329920316582018e-06,
      "loss": 2.7959,
      "step": 213080
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8124232292175293,
      "learning_rate": 8.328963101199493e-06,
      "loss": 2.8048,
      "step": 213081
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.1040847301483154,
      "learning_rate": 8.328005940044091e-06,
      "loss": 2.8505,
      "step": 213082
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8531413078308105,
      "learning_rate": 8.327048833116079e-06,
      "loss": 3.0341,
      "step": 213083
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.57051682472229,
      "learning_rate": 8.32609178041549e-06,
      "loss": 2.7547,
      "step": 213084
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.165900707244873,
      "learning_rate": 8.325134781942655e-06,
      "loss": 3.0642,
      "step": 213085
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3854150772094727,
      "learning_rate": 8.32417783769761e-06,
      "loss": 2.8954,
      "step": 213086
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.6663665771484375,
      "learning_rate": 8.323220947680687e-06,
      "loss": 2.852,
      "step": 213087
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.721426248550415,
      "learning_rate": 8.322264111891919e-06,
      "loss": 2.8037,
      "step": 213088
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.127027988433838,
      "learning_rate": 8.321307330331606e-06,
      "loss": 2.8503,
      "step": 213089
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.768270254135132,
      "learning_rate": 8.320350602999815e-06,
      "loss": 2.9271,
      "step": 213090
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.3193368911743164,
      "learning_rate": 8.319393929896811e-06,
      "loss": 2.8853,
      "step": 213091
    },
    {
      "epoch": 2.77,
      "grad_norm": 5.337150573730469,
      "learning_rate": 8.318437311022696e-06,
      "loss": 2.9755,
      "step": 213092
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.236323833465576,
      "learning_rate": 8.317480746377737e-06,
      "loss": 2.6968,
      "step": 213093
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8901851177215576,
      "learning_rate": 8.31652423596203e-06,
      "loss": 2.8061,
      "step": 213094
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2257113456726074,
      "learning_rate": 8.315567779775845e-06,
      "loss": 2.7875,
      "step": 213095
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2202694416046143,
      "learning_rate": 8.314611377819246e-06,
      "loss": 3.154,
      "step": 213096
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.0749142169952393,
      "learning_rate": 8.31365503009247e-06,
      "loss": 3.1126,
      "step": 213097
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.911905288696289,
      "learning_rate": 8.31269873659568e-06,
      "loss": 2.9381,
      "step": 213098
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.163991928100586,
      "learning_rate": 8.31174249732911e-06,
      "loss": 2.9099,
      "step": 213099
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8893237113952637,
      "learning_rate": 8.31078631229286e-06,
      "loss": 2.8351,
      "step": 213100
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7226290702819824,
      "learning_rate": 8.309830181487199e-06,
      "loss": 2.7056,
      "step": 213101
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.973374366760254,
      "learning_rate": 8.308874104912156e-06,
      "loss": 2.8722,
      "step": 213102
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9763503074645996,
      "learning_rate": 8.307918082568065e-06,
      "loss": 2.7687,
      "step": 213103
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9769015312194824,
      "learning_rate": 8.306962114454996e-06,
      "loss": 2.7517,
      "step": 213104
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.4283480644226074,
      "learning_rate": 8.306006200573178e-06,
      "loss": 2.9445,
      "step": 213105
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2021710872650146,
      "learning_rate": 8.305050340922782e-06,
      "loss": 2.9046,
      "step": 213106
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.125636339187622,
      "learning_rate": 8.304094535504035e-06,
      "loss": 2.8469,
      "step": 213107
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.843048095703125,
      "learning_rate": 8.303138784317009e-06,
      "loss": 2.9634,
      "step": 213108
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.575886249542236,
      "learning_rate": 8.302183087361935e-06,
      "loss": 2.5991,
      "step": 213109
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7400288581848145,
      "learning_rate": 8.30122744463898e-06,
      "loss": 2.7318,
      "step": 213110
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.107917547225952,
      "learning_rate": 8.300271856148377e-06,
      "loss": 3.0198,
      "step": 213111
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.7073371410369873,
      "learning_rate": 8.299316321890225e-06,
      "loss": 3.0464,
      "step": 213112
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.74060320854187,
      "learning_rate": 8.29836084186476e-06,
      "loss": 3.0428,
      "step": 213113
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9544849395751953,
      "learning_rate": 8.297405416072111e-06,
      "loss": 2.8583,
      "step": 213114
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.954158306121826,
      "learning_rate": 8.296450044512516e-06,
      "loss": 3.0269,
      "step": 213115
    },
    {
      "epoch": 2.77,
      "grad_norm": 4.719544887542725,
      "learning_rate": 8.29549472718607e-06,
      "loss": 2.9151,
      "step": 213116
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.7440998554229736,
      "learning_rate": 8.294539464093041e-06,
      "loss": 3.1141,
      "step": 213117
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.5964767932891846,
      "learning_rate": 8.293584255233532e-06,
      "loss": 2.7867,
      "step": 213118
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.9630613327026367,
      "learning_rate": 8.292629100607773e-06,
      "loss": 2.9144,
      "step": 213119
    },
    {
      "epoch": 2.77,
      "grad_norm": 2.8370578289031982,
      "learning_rate": 8.29167400021593e-06,
      "loss": 2.8916,
      "step": 213120
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.236514091491699,
      "learning_rate": 8.290718954058173e-06,
      "loss": 2.9849,
      "step": 213121
    },
    {
      "epoch": 2.77,
      "grad_norm": 3.2330760955810547,
      "learning_rate": 8.289763962134632e-06,
      "loss": 2.7758,
      "step": 213122
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1377835273742676,
      "learning_rate": 8.288809024445575e-06,
      "loss": 3.0077,
      "step": 213123
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.102155923843384,
      "learning_rate": 8.287854140991102e-06,
      "loss": 2.8106,
      "step": 213124
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.235518217086792,
      "learning_rate": 8.286899311771445e-06,
      "loss": 3.0366,
      "step": 213125
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.717971086502075,
      "learning_rate": 8.285944536786771e-06,
      "loss": 3.1763,
      "step": 213126
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.618943691253662,
      "learning_rate": 8.284989816037212e-06,
      "loss": 3.0459,
      "step": 213127
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.9825007915496826,
      "learning_rate": 8.284035149523006e-06,
      "loss": 2.8578,
      "step": 213128
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0416336059570312,
      "learning_rate": 8.283080537244313e-06,
      "loss": 2.9645,
      "step": 213129
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.007915496826172,
      "learning_rate": 8.282125979201237e-06,
      "loss": 2.8084,
      "step": 213130
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0982553958892822,
      "learning_rate": 8.281171475394077e-06,
      "loss": 3.0526,
      "step": 213131
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.982506275177002,
      "learning_rate": 8.280217025822966e-06,
      "loss": 2.7843,
      "step": 213132
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0700206756591797,
      "learning_rate": 8.279262630488037e-06,
      "loss": 2.9387,
      "step": 213133
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.873835325241089,
      "learning_rate": 8.278308289389523e-06,
      "loss": 3.0025,
      "step": 213134
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.258291482925415,
      "learning_rate": 8.277354002527559e-06,
      "loss": 3.0535,
      "step": 213135
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.742830276489258,
      "learning_rate": 8.276399769902342e-06,
      "loss": 3.0022,
      "step": 213136
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.6942615509033203,
      "learning_rate": 8.275445591514073e-06,
      "loss": 3.0026,
      "step": 213137
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7024128437042236,
      "learning_rate": 8.274491467362887e-06,
      "loss": 2.965,
      "step": 213138
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9321248531341553,
      "learning_rate": 8.273537397448981e-06,
      "loss": 2.8688,
      "step": 213139
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.5730631351470947,
      "learning_rate": 8.272583381772525e-06,
      "loss": 2.8167,
      "step": 213140
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.709456443786621,
      "learning_rate": 8.271629420333681e-06,
      "loss": 3.0,
      "step": 213141
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.729128360748291,
      "learning_rate": 8.270675513132685e-06,
      "loss": 2.8218,
      "step": 213142
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.889101982116699,
      "learning_rate": 8.269721660169704e-06,
      "loss": 2.7995,
      "step": 213143
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2400357723236084,
      "learning_rate": 8.26876786144487e-06,
      "loss": 2.8256,
      "step": 213144
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.847184181213379,
      "learning_rate": 8.267814116958315e-06,
      "loss": 2.9117,
      "step": 213145
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0402743816375732,
      "learning_rate": 8.266860426710343e-06,
      "loss": 3.0969,
      "step": 213146
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.830803155899048,
      "learning_rate": 8.26590679070105e-06,
      "loss": 3.1535,
      "step": 213147
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8326873779296875,
      "learning_rate": 8.264953208930635e-06,
      "loss": 2.8432,
      "step": 213148
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.429291248321533,
      "learning_rate": 8.263999681399235e-06,
      "loss": 2.7957,
      "step": 213149
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5460150241851807,
      "learning_rate": 8.26304620810715e-06,
      "loss": 3.0344,
      "step": 213150
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6450700759887695,
      "learning_rate": 8.26209278905441e-06,
      "loss": 2.8878,
      "step": 213151
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.301217555999756,
      "learning_rate": 8.261139424241281e-06,
      "loss": 3.0627,
      "step": 213152
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.361891984939575,
      "learning_rate": 8.260186113667865e-06,
      "loss": 3.0539,
      "step": 213153
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.255591869354248,
      "learning_rate": 8.25923285733443e-06,
      "loss": 2.9689,
      "step": 213154
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.469449520111084,
      "learning_rate": 8.258279655241106e-06,
      "loss": 2.8015,
      "step": 213155
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.251530885696411,
      "learning_rate": 8.257326507388096e-06,
      "loss": 2.8261,
      "step": 213156
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.676656723022461,
      "learning_rate": 8.25637341377553e-06,
      "loss": 3.2928,
      "step": 213157
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7959635257720947,
      "learning_rate": 8.25542037440361e-06,
      "loss": 2.8289,
      "step": 213158
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7436411380767822,
      "learning_rate": 8.2544673892725e-06,
      "loss": 2.913,
      "step": 213159
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8493871688842773,
      "learning_rate": 8.253514458382404e-06,
      "loss": 2.8674,
      "step": 213160
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.123641014099121,
      "learning_rate": 8.252561581733486e-06,
      "loss": 3.0618,
      "step": 213161
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7505147457122803,
      "learning_rate": 8.251608759325978e-06,
      "loss": 3.1019,
      "step": 213162
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0024991035461426,
      "learning_rate": 8.250655991159915e-06,
      "loss": 2.7341,
      "step": 213163
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.6355412006378174,
      "learning_rate": 8.24970327723563e-06,
      "loss": 2.8627,
      "step": 213164
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0121958255767822,
      "learning_rate": 8.248750617553158e-06,
      "loss": 2.7685,
      "step": 213165
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8859293460845947,
      "learning_rate": 8.247798012112827e-06,
      "loss": 2.6004,
      "step": 213166
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.935952663421631,
      "learning_rate": 8.246845460914676e-06,
      "loss": 3.0338,
      "step": 213167
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.819465398788452,
      "learning_rate": 8.245892963959034e-06,
      "loss": 2.977,
      "step": 213168
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.813636302947998,
      "learning_rate": 8.244940521245903e-06,
      "loss": 3.1679,
      "step": 213169
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.140817403793335,
      "learning_rate": 8.24398813277558e-06,
      "loss": 3.1002,
      "step": 213170
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.735186815261841,
      "learning_rate": 8.24303579854817e-06,
      "loss": 2.8226,
      "step": 213171
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.7140495777130127,
      "learning_rate": 8.242083518563935e-06,
      "loss": 3.0581,
      "step": 213172
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.054290294647217,
      "learning_rate": 8.241131292822945e-06,
      "loss": 2.9504,
      "step": 213173
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.218937635421753,
      "learning_rate": 8.24017912132553e-06,
      "loss": 3.0319,
      "step": 213174
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3277058601379395,
      "learning_rate": 8.239227004071692e-06,
      "loss": 2.9687,
      "step": 213175
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7669146060943604,
      "learning_rate": 8.238274941061728e-06,
      "loss": 2.8924,
      "step": 213176
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3972408771514893,
      "learning_rate": 8.237322932295743e-06,
      "loss": 2.8914,
      "step": 213177
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7773940563201904,
      "learning_rate": 8.236370977773966e-06,
      "loss": 3.0113,
      "step": 213178
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0333878993988037,
      "learning_rate": 8.23541907749653e-06,
      "loss": 3.1812,
      "step": 213179
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8264803886413574,
      "learning_rate": 8.234467231463705e-06,
      "loss": 2.9633,
      "step": 213180
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9059364795684814,
      "learning_rate": 8.233515439675553e-06,
      "loss": 2.889,
      "step": 213181
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.797880172729492,
      "learning_rate": 8.23256370213231e-06,
      "loss": 3.3026,
      "step": 213182
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0504982471466064,
      "learning_rate": 8.23161201883411e-06,
      "loss": 2.8184,
      "step": 213183
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0059406757354736,
      "learning_rate": 8.230660389781185e-06,
      "loss": 2.8037,
      "step": 213184
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.827415943145752,
      "learning_rate": 8.229708814973667e-06,
      "loss": 3.2728,
      "step": 213185
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.6027235984802246,
      "learning_rate": 8.228757294411826e-06,
      "loss": 2.7618,
      "step": 213186
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.7534821033477783,
      "learning_rate": 8.22780582809569e-06,
      "loss": 2.9588,
      "step": 213187
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7650609016418457,
      "learning_rate": 8.226854416025563e-06,
      "loss": 2.8586,
      "step": 213188
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.841688394546509,
      "learning_rate": 8.225903058201511e-06,
      "loss": 3.0518,
      "step": 213189
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2072041034698486,
      "learning_rate": 8.224951754623832e-06,
      "loss": 2.707,
      "step": 213190
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2016141414642334,
      "learning_rate": 8.224000505292593e-06,
      "loss": 2.8984,
      "step": 213191
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2965636253356934,
      "learning_rate": 8.223049310208029e-06,
      "loss": 2.9353,
      "step": 213192
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.68625020980835,
      "learning_rate": 8.22209816937034e-06,
      "loss": 2.9699,
      "step": 213193
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0322914123535156,
      "learning_rate": 8.221147082779656e-06,
      "loss": 3.0118,
      "step": 213194
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7415411472320557,
      "learning_rate": 8.220196050436145e-06,
      "loss": 3.112,
      "step": 213195
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.692605495452881,
      "learning_rate": 8.219245072340041e-06,
      "loss": 3.0199,
      "step": 213196
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.600238800048828,
      "learning_rate": 8.218294148491478e-06,
      "loss": 2.8633,
      "step": 213197
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.432855129241943,
      "learning_rate": 8.21734327889062e-06,
      "loss": 2.9446,
      "step": 213198
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0610930919647217,
      "learning_rate": 8.216392463537703e-06,
      "loss": 2.9086,
      "step": 213199
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.865773916244507,
      "learning_rate": 8.215441702432856e-06,
      "loss": 3.1217,
      "step": 213200
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5015923976898193,
      "learning_rate": 8.214490995576251e-06,
      "loss": 3.0526,
      "step": 213201
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.524324655532837,
      "learning_rate": 8.213540342968084e-06,
      "loss": 3.0483,
      "step": 213202
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.88075590133667,
      "learning_rate": 8.212589744608522e-06,
      "loss": 3.2117,
      "step": 213203
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.6707265377044678,
      "learning_rate": 8.211639200497767e-06,
      "loss": 3.1469,
      "step": 213204
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.623101234436035,
      "learning_rate": 8.210688710635948e-06,
      "loss": 3.0688,
      "step": 213205
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0701968669891357,
      "learning_rate": 8.209738275023303e-06,
      "loss": 2.6224,
      "step": 213206
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2277886867523193,
      "learning_rate": 8.208787893659962e-06,
      "loss": 2.7586,
      "step": 213207
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.751382827758789,
      "learning_rate": 8.207837566546094e-06,
      "loss": 3.0895,
      "step": 213208
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.206192970275879,
      "learning_rate": 8.206887293681897e-06,
      "loss": 2.9832,
      "step": 213209
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.852325916290283,
      "learning_rate": 8.205937075067604e-06,
      "loss": 3.0109,
      "step": 213210
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.486583709716797,
      "learning_rate": 8.204986910703283e-06,
      "loss": 3.0487,
      "step": 213211
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.5944676399230957,
      "learning_rate": 8.204036800589165e-06,
      "loss": 3.0852,
      "step": 213212
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5160882472991943,
      "learning_rate": 8.203086744725451e-06,
      "loss": 2.8371,
      "step": 213213
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0817692279815674,
      "learning_rate": 8.202136743112275e-06,
      "loss": 2.8361,
      "step": 213214
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3558902740478516,
      "learning_rate": 8.201186795749836e-06,
      "loss": 2.916,
      "step": 213215
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8108325004577637,
      "learning_rate": 8.200236902638302e-06,
      "loss": 3.0965,
      "step": 213216
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.025892734527588,
      "learning_rate": 8.19928706377787e-06,
      "loss": 2.8594,
      "step": 213217
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.413330078125,
      "learning_rate": 8.198337279168643e-06,
      "loss": 3.0179,
      "step": 213218
    },
    {
      "epoch": 2.78,
      "grad_norm": 5.844026565551758,
      "learning_rate": 8.197387548810919e-06,
      "loss": 2.7815,
      "step": 213219
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9833600521087646,
      "learning_rate": 8.196437872704764e-06,
      "loss": 2.7561,
      "step": 213220
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9711074829101562,
      "learning_rate": 8.195488250850414e-06,
      "loss": 2.7501,
      "step": 213221
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3032360076904297,
      "learning_rate": 8.19453868324803e-06,
      "loss": 2.9213,
      "step": 213222
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1198408603668213,
      "learning_rate": 8.193589169897818e-06,
      "loss": 2.9356,
      "step": 213223
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.697516679763794,
      "learning_rate": 8.192639710799875e-06,
      "loss": 2.8761,
      "step": 213224
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0236785411834717,
      "learning_rate": 8.191690305954468e-06,
      "loss": 2.7345,
      "step": 213225
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.558763265609741,
      "learning_rate": 8.190740955361697e-06,
      "loss": 2.9383,
      "step": 213226
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.606618881225586,
      "learning_rate": 8.189791659021794e-06,
      "loss": 3.1344,
      "step": 213227
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.939096212387085,
      "learning_rate": 8.188842416934926e-06,
      "loss": 2.8567,
      "step": 213228
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8351457118988037,
      "learning_rate": 8.187893229101261e-06,
      "loss": 2.7669,
      "step": 213229
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2989468574523926,
      "learning_rate": 8.186944095520964e-06,
      "loss": 3.2402,
      "step": 213230
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.865297317504883,
      "learning_rate": 8.185995016194235e-06,
      "loss": 3.0221,
      "step": 213231
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8775811195373535,
      "learning_rate": 8.185045991121209e-06,
      "loss": 2.7297,
      "step": 213232
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.461383819580078,
      "learning_rate": 8.184097020302116e-06,
      "loss": 3.0187,
      "step": 213233
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2582767009735107,
      "learning_rate": 8.183148103737092e-06,
      "loss": 2.6878,
      "step": 213234
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1984000205993652,
      "learning_rate": 8.182199241426401e-06,
      "loss": 3.0608,
      "step": 213235
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.838324546813965,
      "learning_rate": 8.181250433370046e-06,
      "loss": 2.9059,
      "step": 213236
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.480717658996582,
      "learning_rate": 8.180301679568357e-06,
      "loss": 2.9045,
      "step": 213237
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9995005130767822,
      "learning_rate": 8.179352980021437e-06,
      "loss": 3.073,
      "step": 213238
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.909821033477783,
      "learning_rate": 8.178404334729517e-06,
      "loss": 2.9335,
      "step": 213239
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0368309020996094,
      "learning_rate": 8.177455743692696e-06,
      "loss": 3.0299,
      "step": 213240
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.987724304199219,
      "learning_rate": 8.176507206911243e-06,
      "loss": 2.9279,
      "step": 213241
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.760868549346924,
      "learning_rate": 8.175558724385257e-06,
      "loss": 3.0439,
      "step": 213242
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.7400431632995605,
      "learning_rate": 8.17461029611497e-06,
      "loss": 2.7179,
      "step": 213243
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.765122890472412,
      "learning_rate": 8.173661922100483e-06,
      "loss": 3.0646,
      "step": 213244
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7640151977539062,
      "learning_rate": 8.172713602342062e-06,
      "loss": 2.8444,
      "step": 213245
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5530776977539062,
      "learning_rate": 8.171765336839775e-06,
      "loss": 2.891,
      "step": 213246
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.664579153060913,
      "learning_rate": 8.170817125593988e-06,
      "loss": 2.9854,
      "step": 213247
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9401845932006836,
      "learning_rate": 8.169868968604664e-06,
      "loss": 2.7322,
      "step": 213248
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6177988052368164,
      "learning_rate": 8.168920865872075e-06,
      "loss": 2.6402,
      "step": 213249
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.680166244506836,
      "learning_rate": 8.167972817396385e-06,
      "loss": 2.9125,
      "step": 213250
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7807745933532715,
      "learning_rate": 8.167024823177825e-06,
      "loss": 2.8503,
      "step": 213251
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.300295829772949,
      "learning_rate": 8.166076883216466e-06,
      "loss": 2.9207,
      "step": 213252
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.54895281791687,
      "learning_rate": 8.165128997512637e-06,
      "loss": 2.6859,
      "step": 213253
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.195648193359375,
      "learning_rate": 8.16418116606634e-06,
      "loss": 2.8248,
      "step": 213254
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.284163475036621,
      "learning_rate": 8.163233388877844e-06,
      "loss": 2.9384,
      "step": 213255
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.5318422317504883,
      "learning_rate": 8.16228566594731e-06,
      "loss": 3.1405,
      "step": 213256
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7364258766174316,
      "learning_rate": 8.161337997274909e-06,
      "loss": 2.735,
      "step": 213257
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7006406784057617,
      "learning_rate": 8.160390382860837e-06,
      "loss": 2.9637,
      "step": 213258
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6385440826416016,
      "learning_rate": 8.159442822705265e-06,
      "loss": 3.0927,
      "step": 213259
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.739280939102173,
      "learning_rate": 8.158495316808355e-06,
      "loss": 2.8071,
      "step": 213260
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5202248096466064,
      "learning_rate": 8.157547865170311e-06,
      "loss": 3.0654,
      "step": 213261
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1307528018951416,
      "learning_rate": 8.15660046779123e-06,
      "loss": 2.8603,
      "step": 213262
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2594192028045654,
      "learning_rate": 8.155653124671414e-06,
      "loss": 3.2442,
      "step": 213263
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.4802091121673584,
      "learning_rate": 8.154705835810893e-06,
      "loss": 2.78,
      "step": 213264
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.465707302093506,
      "learning_rate": 8.153758601209969e-06,
      "loss": 2.8908,
      "step": 213265
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.4943299293518066,
      "learning_rate": 8.15281142086881e-06,
      "loss": 2.678,
      "step": 213266
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1998636722564697,
      "learning_rate": 8.151864294787514e-06,
      "loss": 3.0275,
      "step": 213267
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9499642848968506,
      "learning_rate": 8.150917222966246e-06,
      "loss": 2.9804,
      "step": 213268
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.773836612701416,
      "learning_rate": 8.14997020540531e-06,
      "loss": 2.9134,
      "step": 213269
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0444858074188232,
      "learning_rate": 8.149023242104734e-06,
      "loss": 2.9387,
      "step": 213270
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1535069942474365,
      "learning_rate": 8.148076333064824e-06,
      "loss": 2.8096,
      "step": 213271
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.66630220413208,
      "learning_rate": 8.147129478285674e-06,
      "loss": 2.8952,
      "step": 213272
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.6838021278381348,
      "learning_rate": 8.146182677767488e-06,
      "loss": 2.7205,
      "step": 213273
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.736111879348755,
      "learning_rate": 8.14523593151043e-06,
      "loss": 3.1722,
      "step": 213274
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.371243476867676,
      "learning_rate": 8.144289239514667e-06,
      "loss": 2.8233,
      "step": 213275
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.907472848892212,
      "learning_rate": 8.1433426017804e-06,
      "loss": 3.3052,
      "step": 213276
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8145592212677,
      "learning_rate": 8.142396018307795e-06,
      "loss": 2.8786,
      "step": 213277
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.564436912536621,
      "learning_rate": 8.141449489097051e-06,
      "loss": 3.1452,
      "step": 213278
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0729923248291016,
      "learning_rate": 8.140503014148302e-06,
      "loss": 2.9534,
      "step": 213279
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1776230335235596,
      "learning_rate": 8.139556593461715e-06,
      "loss": 2.928,
      "step": 213280
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6963040828704834,
      "learning_rate": 8.138610227037523e-06,
      "loss": 2.7447,
      "step": 213281
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.934617519378662,
      "learning_rate": 8.137663914875859e-06,
      "loss": 2.9882,
      "step": 213282
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.9620230197906494,
      "learning_rate": 8.13671765697692e-06,
      "loss": 2.8195,
      "step": 213283
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.015635013580322,
      "learning_rate": 8.135771453340912e-06,
      "loss": 2.8044,
      "step": 213284
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.952065944671631,
      "learning_rate": 8.13482530396793e-06,
      "loss": 2.4106,
      "step": 213285
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.964876890182495,
      "learning_rate": 8.133879208858207e-06,
      "loss": 2.8751,
      "step": 213286
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.188545227050781,
      "learning_rate": 8.132933168011912e-06,
      "loss": 2.928,
      "step": 213287
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.63189435005188,
      "learning_rate": 8.131987181429177e-06,
      "loss": 2.9472,
      "step": 213288
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0956122875213623,
      "learning_rate": 8.131041249110238e-06,
      "loss": 2.9995,
      "step": 213289
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.672802448272705,
      "learning_rate": 8.130095371055256e-06,
      "loss": 2.844,
      "step": 213290
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3224434852600098,
      "learning_rate": 8.129149547264401e-06,
      "loss": 2.9653,
      "step": 213291
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1830568313598633,
      "learning_rate": 8.12820377773784e-06,
      "loss": 3.0374,
      "step": 213292
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9882354736328125,
      "learning_rate": 8.127258062475738e-06,
      "loss": 2.94,
      "step": 213293
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.588549852371216,
      "learning_rate": 8.126312401478296e-06,
      "loss": 2.798,
      "step": 213294
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8585634231567383,
      "learning_rate": 8.125366794745714e-06,
      "loss": 2.824,
      "step": 213295
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8818745613098145,
      "learning_rate": 8.124421242278124e-06,
      "loss": 2.8583,
      "step": 213296
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3327829837799072,
      "learning_rate": 8.123475744075658e-06,
      "loss": 2.8247,
      "step": 213297
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9385838508605957,
      "learning_rate": 8.12253030013862e-06,
      "loss": 3.0488,
      "step": 213298
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.649118185043335,
      "learning_rate": 8.121584910467106e-06,
      "loss": 3.115,
      "step": 213299
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7053756713867188,
      "learning_rate": 8.120639575061249e-06,
      "loss": 2.9138,
      "step": 213300
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.396796941757202,
      "learning_rate": 8.11969429392132e-06,
      "loss": 2.8463,
      "step": 213301
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6531667709350586,
      "learning_rate": 8.118749067047414e-06,
      "loss": 2.7454,
      "step": 213302
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7589471340179443,
      "learning_rate": 8.117803894439767e-06,
      "loss": 3.2028,
      "step": 213303
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3911936283111572,
      "learning_rate": 8.116858776098512e-06,
      "loss": 2.9457,
      "step": 213304
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0566043853759766,
      "learning_rate": 8.11591371202388e-06,
      "loss": 2.9786,
      "step": 213305
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.942924737930298,
      "learning_rate": 8.114968702215973e-06,
      "loss": 2.8288,
      "step": 213306
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.545517921447754,
      "learning_rate": 8.114023746675025e-06,
      "loss": 2.8524,
      "step": 213307
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1460318565368652,
      "learning_rate": 8.113078845401167e-06,
      "loss": 2.8727,
      "step": 213308
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8143532276153564,
      "learning_rate": 8.1121339983946e-06,
      "loss": 2.7068,
      "step": 213309
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8721096515655518,
      "learning_rate": 8.111189205655521e-06,
      "loss": 2.9119,
      "step": 213310
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3618369102478027,
      "learning_rate": 8.110244467184035e-06,
      "loss": 2.9506,
      "step": 213311
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.9785549640655518,
      "learning_rate": 8.109299782980405e-06,
      "loss": 2.6946,
      "step": 213312
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.839277744293213,
      "learning_rate": 8.108355153044766e-06,
      "loss": 2.9324,
      "step": 213313
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0605156421661377,
      "learning_rate": 8.107410577377315e-06,
      "loss": 2.7023,
      "step": 213314
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7988052368164062,
      "learning_rate": 8.106466055978156e-06,
      "loss": 2.946,
      "step": 213315
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.882444381713867,
      "learning_rate": 8.10552158884752e-06,
      "loss": 2.8198,
      "step": 213316
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5338499546051025,
      "learning_rate": 8.104577175985571e-06,
      "loss": 2.9435,
      "step": 213317
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.56919527053833,
      "learning_rate": 8.103632817392546e-06,
      "loss": 2.9758,
      "step": 213318
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.774343490600586,
      "learning_rate": 8.102688513068544e-06,
      "loss": 3.0497,
      "step": 213319
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.149180889129639,
      "learning_rate": 8.101744263013765e-06,
      "loss": 2.9924,
      "step": 213320
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.671973943710327,
      "learning_rate": 8.100800067228341e-06,
      "loss": 2.9181,
      "step": 213321
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8676185607910156,
      "learning_rate": 8.099855925712507e-06,
      "loss": 2.9206,
      "step": 213322
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.15325927734375,
      "learning_rate": 8.098911838466427e-06,
      "loss": 2.7773,
      "step": 213323
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1370513439178467,
      "learning_rate": 8.097967805490268e-06,
      "loss": 3.0434,
      "step": 213324
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.533592700958252,
      "learning_rate": 8.0970238267842e-06,
      "loss": 3.0979,
      "step": 213325
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.192244529724121,
      "learning_rate": 8.096079902348418e-06,
      "loss": 2.9215,
      "step": 213326
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1001136302948,
      "learning_rate": 8.095136032183091e-06,
      "loss": 2.9984,
      "step": 213327
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.243997812271118,
      "learning_rate": 8.094192216288387e-06,
      "loss": 2.7809,
      "step": 213328
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.293252944946289,
      "learning_rate": 8.093248454664469e-06,
      "loss": 2.8658,
      "step": 213329
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.604154348373413,
      "learning_rate": 8.09230474731154e-06,
      "loss": 3.0264,
      "step": 213330
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.937373399734497,
      "learning_rate": 8.091361094229732e-06,
      "loss": 2.9036,
      "step": 213331
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9265975952148438,
      "learning_rate": 8.090417495419277e-06,
      "loss": 2.8212,
      "step": 213332
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.043397903442383,
      "learning_rate": 8.08947395088031e-06,
      "loss": 2.9303,
      "step": 213333
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.095027923583984,
      "learning_rate": 8.088530460613063e-06,
      "loss": 2.9607,
      "step": 213334
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.7508721351623535,
      "learning_rate": 8.087587024617603e-06,
      "loss": 2.9813,
      "step": 213335
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9830141067504883,
      "learning_rate": 8.086643642894198e-06,
      "loss": 2.8354,
      "step": 213336
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.241929054260254,
      "learning_rate": 8.085700315442977e-06,
      "loss": 2.8475,
      "step": 213337
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.895284414291382,
      "learning_rate": 8.084757042264178e-06,
      "loss": 3.1543,
      "step": 213338
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0200469493865967,
      "learning_rate": 8.083813823357932e-06,
      "loss": 3.0195,
      "step": 213339
    },
    {
      "epoch": 2.78,
      "grad_norm": 5.395112991333008,
      "learning_rate": 8.082870658724406e-06,
      "loss": 2.7823,
      "step": 213340
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.51003360748291,
      "learning_rate": 8.081927548363765e-06,
      "loss": 3.0016,
      "step": 213341
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3863985538482666,
      "learning_rate": 8.08098449227621e-06,
      "loss": 2.9051,
      "step": 213342
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.70570969581604,
      "learning_rate": 8.080041490461908e-06,
      "loss": 2.9366,
      "step": 213343
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.822999954223633,
      "learning_rate": 8.079098542921026e-06,
      "loss": 2.7576,
      "step": 213344
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.170243740081787,
      "learning_rate": 8.078155649653795e-06,
      "loss": 2.9509,
      "step": 213345
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0558958053588867,
      "learning_rate": 8.077212810660316e-06,
      "loss": 2.8329,
      "step": 213346
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.977527141571045,
      "learning_rate": 8.07627002594079e-06,
      "loss": 3.062,
      "step": 213347
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.322460174560547,
      "learning_rate": 8.075327295495415e-06,
      "loss": 3.0461,
      "step": 213348
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8306221961975098,
      "learning_rate": 8.074384619324292e-06,
      "loss": 2.7383,
      "step": 213349
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9502925872802734,
      "learning_rate": 8.073441997427721e-06,
      "loss": 2.9876,
      "step": 213350
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1258649826049805,
      "learning_rate": 8.072499429805768e-06,
      "loss": 3.0036,
      "step": 213351
    },
    {
      "epoch": 2.78,
      "grad_norm": 5.268842697143555,
      "learning_rate": 8.071556916458667e-06,
      "loss": 2.9118,
      "step": 213352
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6766488552093506,
      "learning_rate": 8.070614457386548e-06,
      "loss": 2.9034,
      "step": 213353
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.362442493438721,
      "learning_rate": 8.069672052589648e-06,
      "loss": 2.8316,
      "step": 213354
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0967838764190674,
      "learning_rate": 8.068729702068067e-06,
      "loss": 2.8688,
      "step": 213355
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8741090297698975,
      "learning_rate": 8.067787405822035e-06,
      "loss": 2.9321,
      "step": 213356
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9369616508483887,
      "learning_rate": 8.066845163851754e-06,
      "loss": 2.9767,
      "step": 213357
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.017679214477539,
      "learning_rate": 8.065902976157324e-06,
      "loss": 2.7773,
      "step": 213358
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.327785015106201,
      "learning_rate": 8.064960842738943e-06,
      "loss": 2.7342,
      "step": 213359
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9786722660064697,
      "learning_rate": 8.064018763596814e-06,
      "loss": 3.0661,
      "step": 213360
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8742940425872803,
      "learning_rate": 8.063076738731067e-06,
      "loss": 2.9214,
      "step": 213361
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8092446327209473,
      "learning_rate": 8.062134768141903e-06,
      "loss": 3.0497,
      "step": 213362
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7084622383117676,
      "learning_rate": 8.061192851829556e-06,
      "loss": 2.8259,
      "step": 213363
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7449750900268555,
      "learning_rate": 8.060250989794092e-06,
      "loss": 2.7143,
      "step": 213364
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.8572611808776855,
      "learning_rate": 8.059309182035744e-06,
      "loss": 2.8531,
      "step": 213365
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6608991622924805,
      "learning_rate": 8.058367428554712e-06,
      "loss": 3.0169,
      "step": 213366
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.4927213191986084,
      "learning_rate": 8.057425729351097e-06,
      "loss": 2.5989,
      "step": 213367
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.5338377952575684,
      "learning_rate": 8.05648408442513e-06,
      "loss": 2.7612,
      "step": 213368
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.432744026184082,
      "learning_rate": 8.055542493777012e-06,
      "loss": 3.0335,
      "step": 213369
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.7146880626678467,
      "learning_rate": 8.054600957406843e-06,
      "loss": 2.8335,
      "step": 213370
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.945584774017334,
      "learning_rate": 8.053659475314822e-06,
      "loss": 2.8805,
      "step": 213371
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.4509871006011963,
      "learning_rate": 8.052718047501183e-06,
      "loss": 2.785,
      "step": 213372
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0475854873657227,
      "learning_rate": 8.051776673965992e-06,
      "loss": 2.7872,
      "step": 213373
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2184433937072754,
      "learning_rate": 8.050835354709551e-06,
      "loss": 2.9458,
      "step": 213374
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9990389347076416,
      "learning_rate": 8.049894089731924e-06,
      "loss": 2.8734,
      "step": 213375
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9107582569122314,
      "learning_rate": 8.048952879033377e-06,
      "loss": 2.9772,
      "step": 213376
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.6463968753814697,
      "learning_rate": 8.048011722613978e-06,
      "loss": 2.6892,
      "step": 213377
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0588431358337402,
      "learning_rate": 8.047070620474027e-06,
      "loss": 2.9521,
      "step": 213378
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.999201536178589,
      "learning_rate": 8.046129572613592e-06,
      "loss": 2.8068,
      "step": 213379
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.026900053024292,
      "learning_rate": 8.045188579032901e-06,
      "loss": 3.0097,
      "step": 213380
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.126291275024414,
      "learning_rate": 8.04424763973216e-06,
      "loss": 3.0835,
      "step": 213381
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.796957492828369,
      "learning_rate": 8.043306754711465e-06,
      "loss": 2.9444,
      "step": 213382
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.172244071960449,
      "learning_rate": 8.042365923971051e-06,
      "loss": 3.0244,
      "step": 213383
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2187607288360596,
      "learning_rate": 8.04142514751105e-06,
      "loss": 2.9741,
      "step": 213384
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.200247049331665,
      "learning_rate": 8.040484425331662e-06,
      "loss": 2.8001,
      "step": 213385
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5024893283843994,
      "learning_rate": 8.039543757433086e-06,
      "loss": 2.9522,
      "step": 213386
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.527048110961914,
      "learning_rate": 8.038603143815459e-06,
      "loss": 2.9492,
      "step": 213387
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.947326183319092,
      "learning_rate": 8.037662584478943e-06,
      "loss": 2.8747,
      "step": 213388
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.030158042907715,
      "learning_rate": 8.03672207942374e-06,
      "loss": 2.899,
      "step": 213389
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3256144523620605,
      "learning_rate": 8.03578162865005e-06,
      "loss": 2.9744,
      "step": 213390
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.964505672454834,
      "learning_rate": 8.034841232157974e-06,
      "loss": 2.7846,
      "step": 213391
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.260022163391113,
      "learning_rate": 8.033900889947776e-06,
      "loss": 2.7411,
      "step": 213392
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.009443759918213,
      "learning_rate": 8.032960602019556e-06,
      "loss": 2.9305,
      "step": 213393
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5822579860687256,
      "learning_rate": 8.032020368373548e-06,
      "loss": 2.7904,
      "step": 213394
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8971965312957764,
      "learning_rate": 8.031080189009919e-06,
      "loss": 2.8691,
      "step": 213395
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.445988178253174,
      "learning_rate": 8.030140063928769e-06,
      "loss": 3.2022,
      "step": 213396
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.778409719467163,
      "learning_rate": 8.029199993130364e-06,
      "loss": 2.861,
      "step": 213397
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3941469192504883,
      "learning_rate": 8.028259976614836e-06,
      "loss": 3.0263,
      "step": 213398
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8699896335601807,
      "learning_rate": 8.027320014382321e-06,
      "loss": 2.86,
      "step": 213399
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8066422939300537,
      "learning_rate": 8.026380106433117e-06,
      "loss": 2.7583,
      "step": 213400
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.4810056686401367,
      "learning_rate": 8.02544025276729e-06,
      "loss": 2.8706,
      "step": 213401
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.4212136268615723,
      "learning_rate": 8.024500453385008e-06,
      "loss": 2.9253,
      "step": 213402
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.883967161178589,
      "learning_rate": 8.023560708286502e-06,
      "loss": 2.9447,
      "step": 213403
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.516265869140625,
      "learning_rate": 8.022621017471943e-06,
      "loss": 2.8733,
      "step": 213404
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.093418598175049,
      "learning_rate": 8.021681380941492e-06,
      "loss": 2.9033,
      "step": 213405
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.14790415763855,
      "learning_rate": 8.020741798695318e-06,
      "loss": 2.9636,
      "step": 213406
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.5865275859832764,
      "learning_rate": 8.019802270733588e-06,
      "loss": 3.0581,
      "step": 213407
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.25484037399292,
      "learning_rate": 8.018862797056468e-06,
      "loss": 2.9021,
      "step": 213408
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7064201831817627,
      "learning_rate": 8.017923377664193e-06,
      "loss": 3.0147,
      "step": 213409
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8232576847076416,
      "learning_rate": 8.016984012556893e-06,
      "loss": 3.0915,
      "step": 213410
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5265512466430664,
      "learning_rate": 8.016044701734736e-06,
      "loss": 2.8367,
      "step": 213411
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8277270793914795,
      "learning_rate": 8.015105445197923e-06,
      "loss": 2.8103,
      "step": 213412
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.764897108078003,
      "learning_rate": 8.014166242946617e-06,
      "loss": 2.885,
      "step": 213413
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9914655685424805,
      "learning_rate": 8.013227094980956e-06,
      "loss": 3.0573,
      "step": 213414
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7756128311157227,
      "learning_rate": 8.01228800130117e-06,
      "loss": 2.6594,
      "step": 213415
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0755248069763184,
      "learning_rate": 8.011348961907393e-06,
      "loss": 3.022,
      "step": 213416
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.947763204574585,
      "learning_rate": 8.010409976799825e-06,
      "loss": 2.9043,
      "step": 213417
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.7053637504577637,
      "learning_rate": 8.009471045978665e-06,
      "loss": 2.7855,
      "step": 213418
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9900758266448975,
      "learning_rate": 8.008532169444015e-06,
      "loss": 2.84,
      "step": 213419
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.5810320377349854,
      "learning_rate": 8.007593347196107e-06,
      "loss": 2.8559,
      "step": 213420
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.4341344833374023,
      "learning_rate": 8.006654579235105e-06,
      "loss": 2.755,
      "step": 213421
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.803496837615967,
      "learning_rate": 8.005715865561147e-06,
      "loss": 2.856,
      "step": 213422
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1877803802490234,
      "learning_rate": 8.004777206174462e-06,
      "loss": 2.8988,
      "step": 213423
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7341904640197754,
      "learning_rate": 8.003838601075218e-06,
      "loss": 2.7615,
      "step": 213424
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.497431993484497,
      "learning_rate": 8.002900050263583e-06,
      "loss": 2.9991,
      "step": 213425
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9878029823303223,
      "learning_rate": 8.001961553739655e-06,
      "loss": 3.1259,
      "step": 213426
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.798618793487549,
      "learning_rate": 8.001023111503735e-06,
      "loss": 3.1742,
      "step": 213427
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.5365710258483887,
      "learning_rate": 8.000084723555889e-06,
      "loss": 3.0914,
      "step": 213428
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.841780185699463,
      "learning_rate": 7.999146389896383e-06,
      "loss": 2.6822,
      "step": 213429
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.855631113052368,
      "learning_rate": 7.998208110525351e-06,
      "loss": 2.8486,
      "step": 213430
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8133723735809326,
      "learning_rate": 7.997269885442926e-06,
      "loss": 3.0438,
      "step": 213431
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.5631954669952393,
      "learning_rate": 7.99633171464934e-06,
      "loss": 2.9556,
      "step": 213432
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.011296033859253,
      "learning_rate": 7.995393598144729e-06,
      "loss": 3.1703,
      "step": 213433
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.738278865814209,
      "learning_rate": 7.994455535929289e-06,
      "loss": 2.7687,
      "step": 213434
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.886329412460327,
      "learning_rate": 7.993517528003224e-06,
      "loss": 2.8871,
      "step": 213435
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6654515266418457,
      "learning_rate": 7.992579574366665e-06,
      "loss": 2.9697,
      "step": 213436
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.559119462966919,
      "learning_rate": 7.99164167501981e-06,
      "loss": 2.6639,
      "step": 213437
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1168205738067627,
      "learning_rate": 7.990703829962763e-06,
      "loss": 3.0432,
      "step": 213438
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.4143807888031006,
      "learning_rate": 7.989766039195822e-06,
      "loss": 2.879,
      "step": 213439
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.880601644515991,
      "learning_rate": 7.988828302719052e-06,
      "loss": 2.8631,
      "step": 213440
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.6809701919555664,
      "learning_rate": 7.98789062053269e-06,
      "loss": 3.0763,
      "step": 213441
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6986489295959473,
      "learning_rate": 7.986952992636896e-06,
      "loss": 2.7113,
      "step": 213442
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1546239852905273,
      "learning_rate": 7.986015419031843e-06,
      "loss": 2.8679,
      "step": 213443
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8807053565979004,
      "learning_rate": 7.985077899717663e-06,
      "loss": 3.073,
      "step": 213444
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8609673976898193,
      "learning_rate": 7.98414043469462e-06,
      "loss": 2.8394,
      "step": 213445
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.55045223236084,
      "learning_rate": 7.98320302396278e-06,
      "loss": 3.0523,
      "step": 213446
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0638206005096436,
      "learning_rate": 7.982265667522414e-06,
      "loss": 2.9842,
      "step": 213447
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.19484543800354,
      "learning_rate": 7.981328365373685e-06,
      "loss": 2.8303,
      "step": 213448
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0944130420684814,
      "learning_rate": 7.980391117516693e-06,
      "loss": 3.1406,
      "step": 213449
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5170931816101074,
      "learning_rate": 7.979453923951672e-06,
      "loss": 2.988,
      "step": 213450
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9145030975341797,
      "learning_rate": 7.97851678467879e-06,
      "loss": 3.1847,
      "step": 213451
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.6420722007751465,
      "learning_rate": 7.977579699698179e-06,
      "loss": 3.0666,
      "step": 213452
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.4393584728240967,
      "learning_rate": 7.97664266901007e-06,
      "loss": 2.8378,
      "step": 213453
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6949093341827393,
      "learning_rate": 7.97570569261463e-06,
      "loss": 2.7257,
      "step": 213454
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8622238636016846,
      "learning_rate": 7.97476877051203e-06,
      "loss": 3.0152,
      "step": 213455
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.898780345916748,
      "learning_rate": 7.973831902702398e-06,
      "loss": 3.0849,
      "step": 213456
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7063610553741455,
      "learning_rate": 7.972895089185972e-06,
      "loss": 2.7946,
      "step": 213457
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1714091300964355,
      "learning_rate": 7.971958329962846e-06,
      "loss": 2.9687,
      "step": 213458
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1020777225494385,
      "learning_rate": 7.971021625033292e-06,
      "loss": 2.8766,
      "step": 213459
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.02335524559021,
      "learning_rate": 7.970084974397439e-06,
      "loss": 2.9139,
      "step": 213460
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.148800849914551,
      "learning_rate": 7.96914837805549e-06,
      "loss": 2.7736,
      "step": 213461
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8795862197875977,
      "learning_rate": 7.96821183600751e-06,
      "loss": 2.911,
      "step": 213462
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.906114339828491,
      "learning_rate": 7.967275348253799e-06,
      "loss": 2.821,
      "step": 213463
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.908416986465454,
      "learning_rate": 7.966338914794457e-06,
      "loss": 3.2943,
      "step": 213464
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3329923152923584,
      "learning_rate": 7.965402535629716e-06,
      "loss": 2.854,
      "step": 213465
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.963843584060669,
      "learning_rate": 7.96446621075968e-06,
      "loss": 2.7571,
      "step": 213466
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.48417067527771,
      "learning_rate": 7.963529940184644e-06,
      "loss": 3.1841,
      "step": 213467
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5396311283111572,
      "learning_rate": 7.96259372390461e-06,
      "loss": 2.7373,
      "step": 213468
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3596107959747314,
      "learning_rate": 7.961657561919876e-06,
      "loss": 2.7801,
      "step": 213469
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1742441654205322,
      "learning_rate": 7.960721454230578e-06,
      "loss": 2.7962,
      "step": 213470
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9649736881256104,
      "learning_rate": 7.959785400836916e-06,
      "loss": 2.9803,
      "step": 213471
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.927025079727173,
      "learning_rate": 7.958849401739021e-06,
      "loss": 2.8414,
      "step": 213472
    },
    {
      "epoch": 2.78,
      "grad_norm": 5.154551982879639,
      "learning_rate": 7.957913456937093e-06,
      "loss": 2.7649,
      "step": 213473
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.928943634033203,
      "learning_rate": 7.9569775664313e-06,
      "loss": 3.022,
      "step": 213474
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1569693088531494,
      "learning_rate": 7.95604173022184e-06,
      "loss": 3.0209,
      "step": 213475
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0103838443756104,
      "learning_rate": 7.955105948308816e-06,
      "loss": 2.8312,
      "step": 213476
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.97414493560791,
      "learning_rate": 7.95417022069249e-06,
      "loss": 2.9337,
      "step": 213477
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.5845515727996826,
      "learning_rate": 7.953234547372967e-06,
      "loss": 2.9188,
      "step": 213478
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0340664386749268,
      "learning_rate": 7.952298928350476e-06,
      "loss": 2.8427,
      "step": 213479
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9325618743896484,
      "learning_rate": 7.951363363625185e-06,
      "loss": 2.9335,
      "step": 213480
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5192790031433105,
      "learning_rate": 7.950427853197228e-06,
      "loss": 3.035,
      "step": 213481
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7140870094299316,
      "learning_rate": 7.949492397066771e-06,
      "loss": 2.853,
      "step": 213482
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7745251655578613,
      "learning_rate": 7.948556995234045e-06,
      "loss": 3.0957,
      "step": 213483
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.5924901962280273,
      "learning_rate": 7.947621647699187e-06,
      "loss": 2.8991,
      "step": 213484
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.84199595451355,
      "learning_rate": 7.946686354462395e-06,
      "loss": 3.0027,
      "step": 213485
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8490469455718994,
      "learning_rate": 7.945751115523802e-06,
      "loss": 3.0095,
      "step": 213486
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7350287437438965,
      "learning_rate": 7.944815930883608e-06,
      "loss": 2.8198,
      "step": 213487
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.546455144882202,
      "learning_rate": 7.943880800542013e-06,
      "loss": 3.1244,
      "step": 213488
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1812326908111572,
      "learning_rate": 7.94294572449915e-06,
      "loss": 3.0838,
      "step": 213489
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9079060554504395,
      "learning_rate": 7.942010702755186e-06,
      "loss": 2.9235,
      "step": 213490
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.4358298778533936,
      "learning_rate": 7.941075735310354e-06,
      "loss": 2.7602,
      "step": 213491
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1770310401916504,
      "learning_rate": 7.940140822164753e-06,
      "loss": 3.0761,
      "step": 213492
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8273990154266357,
      "learning_rate": 7.939205963318618e-06,
      "loss": 2.667,
      "step": 213493
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1903457641601562,
      "learning_rate": 7.938271158772081e-06,
      "loss": 3.0752,
      "step": 213494
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3300209045410156,
      "learning_rate": 7.937336408525308e-06,
      "loss": 3.1056,
      "step": 213495
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1401376724243164,
      "learning_rate": 7.936401712578532e-06,
      "loss": 2.7737,
      "step": 213496
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.983321189880371,
      "learning_rate": 7.935467070931922e-06,
      "loss": 3.1159,
      "step": 213497
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8435542583465576,
      "learning_rate": 7.934532483585576e-06,
      "loss": 2.8449,
      "step": 213498
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.7364883422851562,
      "learning_rate": 7.933597950539728e-06,
      "loss": 2.9064,
      "step": 213499
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.738043785095215,
      "learning_rate": 7.932663471794543e-06,
      "loss": 2.7855,
      "step": 213500
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.633838176727295,
      "learning_rate": 7.931729047350155e-06,
      "loss": 2.9442,
      "step": 213501
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.499699115753174,
      "learning_rate": 7.930794677206798e-06,
      "loss": 2.7804,
      "step": 213502
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0276904106140137,
      "learning_rate": 7.929860361364637e-06,
      "loss": 3.1112,
      "step": 213503
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0654735565185547,
      "learning_rate": 7.928926099823808e-06,
      "loss": 2.852,
      "step": 213504
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.975066661834717,
      "learning_rate": 7.927991892584507e-06,
      "loss": 2.9262,
      "step": 213505
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.707207441329956,
      "learning_rate": 7.927057739646903e-06,
      "loss": 2.8593,
      "step": 213506
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.771080255508423,
      "learning_rate": 7.926123641011161e-06,
      "loss": 2.933,
      "step": 213507
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5737929344177246,
      "learning_rate": 7.925189596677484e-06,
      "loss": 2.9638,
      "step": 213508
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.06329607963562,
      "learning_rate": 7.924255606646035e-06,
      "loss": 3.1794,
      "step": 213509
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.023585796356201,
      "learning_rate": 7.923321670916983e-06,
      "loss": 2.8314,
      "step": 213510
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9878499507904053,
      "learning_rate": 7.922387789490459e-06,
      "loss": 2.8347,
      "step": 213511
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.844259262084961,
      "learning_rate": 7.92145396236673e-06,
      "loss": 3.1723,
      "step": 213512
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.459141731262207,
      "learning_rate": 7.920520189545865e-06,
      "loss": 2.5713,
      "step": 213513
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2612600326538086,
      "learning_rate": 7.919586471028128e-06,
      "loss": 2.8862,
      "step": 213514
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0682895183563232,
      "learning_rate": 7.918652806813619e-06,
      "loss": 3.1914,
      "step": 213515
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0986289978027344,
      "learning_rate": 7.917719196902572e-06,
      "loss": 2.7559,
      "step": 213516
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8826024532318115,
      "learning_rate": 7.91678564129512e-06,
      "loss": 2.6804,
      "step": 213517
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.063660144805908,
      "learning_rate": 7.915852139991463e-06,
      "loss": 2.8363,
      "step": 213518
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7170982360839844,
      "learning_rate": 7.914918692991734e-06,
      "loss": 2.8574,
      "step": 213519
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1285946369171143,
      "learning_rate": 7.913985300296166e-06,
      "loss": 2.6193,
      "step": 213520
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.22113037109375,
      "learning_rate": 7.913051961904926e-06,
      "loss": 3.1512,
      "step": 213521
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.325955867767334,
      "learning_rate": 7.912118677818147e-06,
      "loss": 2.8092,
      "step": 213522
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9645559787750244,
      "learning_rate": 7.911185448035962e-06,
      "loss": 3.0587,
      "step": 213523
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.75909161567688,
      "learning_rate": 7.91025227255867e-06,
      "loss": 2.7262,
      "step": 213524
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.8418447971343994,
      "learning_rate": 7.909319151386307e-06,
      "loss": 2.9076,
      "step": 213525
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3352787494659424,
      "learning_rate": 7.908386084519169e-06,
      "loss": 3.1088,
      "step": 213526
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1938908100128174,
      "learning_rate": 7.90745307195736e-06,
      "loss": 2.9339,
      "step": 213527
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7315475940704346,
      "learning_rate": 7.90652011370111e-06,
      "loss": 3.282,
      "step": 213528
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8759331703186035,
      "learning_rate": 7.905587209750486e-06,
      "loss": 3.1028,
      "step": 213529
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.7202954292297363,
      "learning_rate": 7.904654360105756e-06,
      "loss": 2.7749,
      "step": 213530
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2911601066589355,
      "learning_rate": 7.903721564767052e-06,
      "loss": 2.9749,
      "step": 213531
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8122293949127197,
      "learning_rate": 7.902788823734573e-06,
      "loss": 2.882,
      "step": 213532
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.094616413116455,
      "learning_rate": 7.901856137008456e-06,
      "loss": 2.8259,
      "step": 213533
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.592543125152588,
      "learning_rate": 7.900923504588964e-06,
      "loss": 2.8676,
      "step": 213534
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6430983543395996,
      "learning_rate": 7.89999092647613e-06,
      "loss": 3.0328,
      "step": 213535
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.152369499206543,
      "learning_rate": 7.899058402670255e-06,
      "loss": 2.8773,
      "step": 213536
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0814263820648193,
      "learning_rate": 7.898125933171407e-06,
      "loss": 3.1886,
      "step": 213537
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3880615234375,
      "learning_rate": 7.89719351797985e-06,
      "loss": 3.1046,
      "step": 213538
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0410656929016113,
      "learning_rate": 7.896261157095685e-06,
      "loss": 2.8798,
      "step": 213539
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2311782836914062,
      "learning_rate": 7.895328850519178e-06,
      "loss": 3.0066,
      "step": 213540
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.776766538619995,
      "learning_rate": 7.894396598250396e-06,
      "loss": 2.8846,
      "step": 213541
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.439380407333374,
      "learning_rate": 7.893464400289573e-06,
      "loss": 2.9699,
      "step": 213542
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5508313179016113,
      "learning_rate": 7.892532256636841e-06,
      "loss": 2.9989,
      "step": 213543
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.098684310913086,
      "learning_rate": 7.891600167292433e-06,
      "loss": 3.167,
      "step": 213544
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8152308464050293,
      "learning_rate": 7.890668132256483e-06,
      "loss": 3.0537,
      "step": 213545
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.87292742729187,
      "learning_rate": 7.889736151529224e-06,
      "loss": 3.1428,
      "step": 213546
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.6059396266937256,
      "learning_rate": 7.88880422511069e-06,
      "loss": 2.7793,
      "step": 213547
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.454310417175293,
      "learning_rate": 7.887872353001179e-06,
      "loss": 2.7759,
      "step": 213548
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.506795883178711,
      "learning_rate": 7.886940535200826e-06,
      "loss": 2.909,
      "step": 213549
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.56445574760437,
      "learning_rate": 7.886008771709796e-06,
      "loss": 2.9312,
      "step": 213550
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8711752891540527,
      "learning_rate": 7.885077062528256e-06,
      "loss": 3.2267,
      "step": 213551
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.727238178253174,
      "learning_rate": 7.884145407656472e-06,
      "loss": 2.9051,
      "step": 213552
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5633888244628906,
      "learning_rate": 7.883213807094446e-06,
      "loss": 2.9395,
      "step": 213553
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2056326866149902,
      "learning_rate": 7.88228226084251e-06,
      "loss": 2.6783,
      "step": 213554
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3912646770477295,
      "learning_rate": 7.88135076890073e-06,
      "loss": 2.9135,
      "step": 213555
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.670714855194092,
      "learning_rate": 7.88041933126934e-06,
      "loss": 3.2319,
      "step": 213556
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.4718337059020996,
      "learning_rate": 7.87948794794847e-06,
      "loss": 3.0042,
      "step": 213557
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8753373622894287,
      "learning_rate": 7.878556618938326e-06,
      "loss": 2.8847,
      "step": 213558
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1749064922332764,
      "learning_rate": 7.877625344239103e-06,
      "loss": 2.7546,
      "step": 213559
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.138845443725586,
      "learning_rate": 7.876694123850936e-06,
      "loss": 2.9371,
      "step": 213560
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.06464147567749,
      "learning_rate": 7.87576295777399e-06,
      "loss": 2.8738,
      "step": 213561
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7056782245635986,
      "learning_rate": 7.874831846008467e-06,
      "loss": 2.8967,
      "step": 213562
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.7279903888702393,
      "learning_rate": 7.8739007885545e-06,
      "loss": 3.1347,
      "step": 213563
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.005404472351074,
      "learning_rate": 7.87296978541232e-06,
      "loss": 2.9888,
      "step": 213564
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.780040979385376,
      "learning_rate": 7.872038836582062e-06,
      "loss": 2.9046,
      "step": 213565
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.636528253555298,
      "learning_rate": 7.871107942063892e-06,
      "loss": 3.0245,
      "step": 213566
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.122723340988159,
      "learning_rate": 7.87017710185801e-06,
      "loss": 2.7905,
      "step": 213567
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.952054977416992,
      "learning_rate": 7.869246315964584e-06,
      "loss": 2.8799,
      "step": 213568
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.598085880279541,
      "learning_rate": 7.868315584383744e-06,
      "loss": 3.0498,
      "step": 213569
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3556482791900635,
      "learning_rate": 7.867384907115727e-06,
      "loss": 2.9388,
      "step": 213570
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.688836097717285,
      "learning_rate": 7.866454284160695e-06,
      "loss": 2.65,
      "step": 213571
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.008800745010376,
      "learning_rate": 7.865523715518752e-06,
      "loss": 2.8678,
      "step": 213572
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3539276123046875,
      "learning_rate": 7.864593201190161e-06,
      "loss": 2.864,
      "step": 213573
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.65116548538208,
      "learning_rate": 7.863662741175059e-06,
      "loss": 2.946,
      "step": 213574
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8589625358581543,
      "learning_rate": 7.862732335473576e-06,
      "loss": 2.9735,
      "step": 213575
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.483313083648682,
      "learning_rate": 7.861801984085981e-06,
      "loss": 2.9102,
      "step": 213576
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.527331352233887,
      "learning_rate": 7.86087168701237e-06,
      "loss": 2.8848,
      "step": 213577
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0710949897766113,
      "learning_rate": 7.859941444252915e-06,
      "loss": 2.7423,
      "step": 213578
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0698184967041016,
      "learning_rate": 7.859011255807845e-06,
      "loss": 2.7419,
      "step": 213579
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.735067367553711,
      "learning_rate": 7.858081121677262e-06,
      "loss": 2.8078,
      "step": 213580
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.799565076828003,
      "learning_rate": 7.857151041861397e-06,
      "loss": 3.0133,
      "step": 213581
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.762787342071533,
      "learning_rate": 7.85622101636042e-06,
      "loss": 2.8661,
      "step": 213582
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1826109886169434,
      "learning_rate": 7.855291045174494e-06,
      "loss": 2.6761,
      "step": 213583
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.003375768661499,
      "learning_rate": 7.854361128303722e-06,
      "loss": 3.0289,
      "step": 213584
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0979344844818115,
      "learning_rate": 7.8534312657484e-06,
      "loss": 3.1479,
      "step": 213585
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.5638394355773926,
      "learning_rate": 7.852501457508598e-06,
      "loss": 3.0704,
      "step": 213586
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.514636278152466,
      "learning_rate": 7.851571703584547e-06,
      "loss": 3.0386,
      "step": 213587
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.12724232673645,
      "learning_rate": 7.850642003976415e-06,
      "loss": 2.9756,
      "step": 213588
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.908524513244629,
      "learning_rate": 7.849712358684368e-06,
      "loss": 2.8367,
      "step": 213589
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1894030570983887,
      "learning_rate": 7.84878276770854e-06,
      "loss": 2.7248,
      "step": 213590
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.310347080230713,
      "learning_rate": 7.847853231049161e-06,
      "loss": 2.7802,
      "step": 213591
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3620660305023193,
      "learning_rate": 7.846923748706368e-06,
      "loss": 2.8177,
      "step": 213592
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1428465843200684,
      "learning_rate": 7.84599432068036e-06,
      "loss": 2.9258,
      "step": 213593
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.66079044342041,
      "learning_rate": 7.845064946971268e-06,
      "loss": 2.7264,
      "step": 213594
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.700373649597168,
      "learning_rate": 7.844135627579363e-06,
      "loss": 2.819,
      "step": 213595
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7271995544433594,
      "learning_rate": 7.843206362504705e-06,
      "loss": 2.8821,
      "step": 213596
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.065476179122925,
      "learning_rate": 7.8422771517475e-06,
      "loss": 2.9852,
      "step": 213597
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9965555667877197,
      "learning_rate": 7.841347995307944e-06,
      "loss": 3.0312,
      "step": 213598
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.980647087097168,
      "learning_rate": 7.840418893186173e-06,
      "loss": 2.8734,
      "step": 213599
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.296156644821167,
      "learning_rate": 7.839489845382385e-06,
      "loss": 2.8115,
      "step": 213600
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2764697074890137,
      "learning_rate": 7.838560851896814e-06,
      "loss": 2.7174,
      "step": 213601
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9068894386291504,
      "learning_rate": 7.837631912729525e-06,
      "loss": 2.8071,
      "step": 213602
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.7792534828186035,
      "learning_rate": 7.83670302788072e-06,
      "loss": 3.0501,
      "step": 213603
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0058438777923584,
      "learning_rate": 7.835774197350597e-06,
      "loss": 3.0036,
      "step": 213604
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9240405559539795,
      "learning_rate": 7.834845421139324e-06,
      "loss": 3.0548,
      "step": 213605
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.912621259689331,
      "learning_rate": 7.833916699247033e-06,
      "loss": 3.1162,
      "step": 213606
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9015817642211914,
      "learning_rate": 7.832988031674026e-06,
      "loss": 2.6713,
      "step": 213607
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.800597906112671,
      "learning_rate": 7.8320594184203e-06,
      "loss": 2.8745,
      "step": 213608
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7849605083465576,
      "learning_rate": 7.831130859486123e-06,
      "loss": 2.819,
      "step": 213609
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.740205764770508,
      "learning_rate": 7.830202354871662e-06,
      "loss": 3.0319,
      "step": 213610
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.853351593017578,
      "learning_rate": 7.829273904577082e-06,
      "loss": 2.9893,
      "step": 213611
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2185771465301514,
      "learning_rate": 7.828345508602518e-06,
      "loss": 3.0963,
      "step": 213612
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7814364433288574,
      "learning_rate": 7.827417166948268e-06,
      "loss": 3.0052,
      "step": 213613
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0626041889190674,
      "learning_rate": 7.826488879614335e-06,
      "loss": 2.9367,
      "step": 213614
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0597968101501465,
      "learning_rate": 7.825560646601014e-06,
      "loss": 3.1686,
      "step": 213615
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.511859655380249,
      "learning_rate": 7.824632467908409e-06,
      "loss": 3.096,
      "step": 213616
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0019948482513428,
      "learning_rate": 7.823704343536752e-06,
      "loss": 3.0621,
      "step": 213617
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.116127967834473,
      "learning_rate": 7.822776273486142e-06,
      "loss": 3.0374,
      "step": 213618
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.655320405960083,
      "learning_rate": 7.821848257756847e-06,
      "loss": 3.1908,
      "step": 213619
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.705815315246582,
      "learning_rate": 7.820920296348966e-06,
      "loss": 3.1071,
      "step": 213620
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.7586669921875,
      "learning_rate": 7.819992389262664e-06,
      "loss": 2.8629,
      "step": 213621
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.897791624069214,
      "learning_rate": 7.819064536498144e-06,
      "loss": 2.7861,
      "step": 213622
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1392457485198975,
      "learning_rate": 7.818136738055603e-06,
      "loss": 3.1699,
      "step": 213623
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.186701774597168,
      "learning_rate": 7.817208993935142e-06,
      "loss": 2.6698,
      "step": 213624
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0971972942352295,
      "learning_rate": 7.816281304137063e-06,
      "loss": 3.2651,
      "step": 213625
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.968353509902954,
      "learning_rate": 7.815353668661394e-06,
      "loss": 2.9764,
      "step": 213626
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.799175977706909,
      "learning_rate": 7.814426087508373e-06,
      "loss": 2.6889,
      "step": 213627
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.984527349472046,
      "learning_rate": 7.813498560678133e-06,
      "loss": 2.8353,
      "step": 213628
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6163430213928223,
      "learning_rate": 7.812571088170905e-06,
      "loss": 2.9748,
      "step": 213629
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7981297969818115,
      "learning_rate": 7.811643669986823e-06,
      "loss": 2.8154,
      "step": 213630
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.4620602130889893,
      "learning_rate": 7.810716306126152e-06,
      "loss": 2.8318,
      "step": 213631
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.4340109825134277,
      "learning_rate": 7.809788996588894e-06,
      "loss": 3.0076,
      "step": 213632
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.474409580230713,
      "learning_rate": 7.808861741375349e-06,
      "loss": 2.7308,
      "step": 213633
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.844005823135376,
      "learning_rate": 7.807934540485616e-06,
      "loss": 2.9831,
      "step": 213634
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8706936836242676,
      "learning_rate": 7.807007393919929e-06,
      "loss": 3.044,
      "step": 213635
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1722426414489746,
      "learning_rate": 7.80608030167842e-06,
      "loss": 2.9746,
      "step": 213636
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8320560455322266,
      "learning_rate": 7.80515326376132e-06,
      "loss": 2.6448,
      "step": 213637
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.461228847503662,
      "learning_rate": 7.804226280168734e-06,
      "loss": 2.8727,
      "step": 213638
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.7038047313690186,
      "learning_rate": 7.80329935090086e-06,
      "loss": 2.9398,
      "step": 213639
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.084369421005249,
      "learning_rate": 7.80237247595783e-06,
      "loss": 2.8745,
      "step": 213640
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0544471740722656,
      "learning_rate": 7.80144565533991e-06,
      "loss": 2.9639,
      "step": 213641
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9746816158294678,
      "learning_rate": 7.800518889047169e-06,
      "loss": 3.133,
      "step": 213642
    },
    {
      "epoch": 2.78,
      "grad_norm": 5.556479454040527,
      "learning_rate": 7.799592177079872e-06,
      "loss": 2.7817,
      "step": 213643
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9468939304351807,
      "learning_rate": 7.798665519438118e-06,
      "loss": 2.8703,
      "step": 213644
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0167953968048096,
      "learning_rate": 7.79773891612211e-06,
      "loss": 2.7407,
      "step": 213645
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.034743309020996,
      "learning_rate": 7.79681236713201e-06,
      "loss": 3.1047,
      "step": 213646
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.693624973297119,
      "learning_rate": 7.795885872468022e-06,
      "loss": 2.938,
      "step": 213647
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.498771905899048,
      "learning_rate": 7.794959432130277e-06,
      "loss": 2.8094,
      "step": 213648
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.671658515930176,
      "learning_rate": 7.794033046118974e-06,
      "loss": 2.8606,
      "step": 213649
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9185256958007812,
      "learning_rate": 7.793106714434283e-06,
      "loss": 2.9113,
      "step": 213650
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.751359462738037,
      "learning_rate": 7.792180437076367e-06,
      "loss": 3.085,
      "step": 213651
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.234623432159424,
      "learning_rate": 7.791254214045362e-06,
      "loss": 2.7295,
      "step": 213652
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.15960431098938,
      "learning_rate": 7.790328045341532e-06,
      "loss": 2.8097,
      "step": 213653
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8850655555725098,
      "learning_rate": 7.789401930964945e-06,
      "loss": 3.0526,
      "step": 213654
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.733175039291382,
      "learning_rate": 7.788475870915866e-06,
      "loss": 2.9946,
      "step": 213655
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.153122901916504,
      "learning_rate": 7.787549865194432e-06,
      "loss": 2.9358,
      "step": 213656
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.408086061477661,
      "learning_rate": 7.78662391380077e-06,
      "loss": 2.9424,
      "step": 213657
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1399569511413574,
      "learning_rate": 7.78569801673512e-06,
      "loss": 2.9932,
      "step": 213658
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2323412895202637,
      "learning_rate": 7.78477217399761e-06,
      "loss": 3.2302,
      "step": 213659
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.821756362915039,
      "learning_rate": 7.783846385588411e-06,
      "loss": 2.862,
      "step": 213660
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7379379272460938,
      "learning_rate": 7.78292065150775e-06,
      "loss": 3.0452,
      "step": 213661
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0790812969207764,
      "learning_rate": 7.781994971755767e-06,
      "loss": 2.939,
      "step": 213662
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.406033515930176,
      "learning_rate": 7.781069346332557e-06,
      "loss": 2.7501,
      "step": 213663
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.954277515411377,
      "learning_rate": 7.780143775238424e-06,
      "loss": 2.7183,
      "step": 213664
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1607747077941895,
      "learning_rate": 7.779218258473463e-06,
      "loss": 2.7487,
      "step": 213665
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.370709180831909,
      "learning_rate": 7.778292796037843e-06,
      "loss": 2.8766,
      "step": 213666
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9100496768951416,
      "learning_rate": 7.777367387931799e-06,
      "loss": 3.0558,
      "step": 213667
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.868203639984131,
      "learning_rate": 7.776442034155428e-06,
      "loss": 2.963,
      "step": 213668
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5188677310943604,
      "learning_rate": 7.77551673470893e-06,
      "loss": 2.6973,
      "step": 213669
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.676220655441284,
      "learning_rate": 7.774591489592474e-06,
      "loss": 2.6777,
      "step": 213670
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.149765729904175,
      "learning_rate": 7.773666298806226e-06,
      "loss": 2.7924,
      "step": 213671
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.841343641281128,
      "learning_rate": 7.772741162350415e-06,
      "loss": 3.1634,
      "step": 213672
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1160590648651123,
      "learning_rate": 7.771816080225146e-06,
      "loss": 2.9498,
      "step": 213673
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.4008255004882812,
      "learning_rate": 7.770891052430617e-06,
      "loss": 3.0823,
      "step": 213674
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3942453861236572,
      "learning_rate": 7.769966078966961e-06,
      "loss": 2.9847,
      "step": 213675
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.267937183380127,
      "learning_rate": 7.769041159834443e-06,
      "loss": 2.8091,
      "step": 213676
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8717970848083496,
      "learning_rate": 7.768116295033134e-06,
      "loss": 2.651,
      "step": 213677
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.543339967727661,
      "learning_rate": 7.767191484563262e-06,
      "loss": 2.8655,
      "step": 213678
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.723940849304199,
      "learning_rate": 7.766266728424964e-06,
      "loss": 2.5819,
      "step": 213679
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8919413089752197,
      "learning_rate": 7.765342026618503e-06,
      "loss": 2.811,
      "step": 213680
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9075963497161865,
      "learning_rate": 7.764417379143917e-06,
      "loss": 2.8798,
      "step": 213681
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.925746202468872,
      "learning_rate": 7.763492786001436e-06,
      "loss": 2.8498,
      "step": 213682
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.9476077556610107,
      "learning_rate": 7.762568247191258e-06,
      "loss": 2.8614,
      "step": 213683
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9283833503723145,
      "learning_rate": 7.761643762713555e-06,
      "loss": 2.9192,
      "step": 213684
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3004963397979736,
      "learning_rate": 7.760719332568422e-06,
      "loss": 2.8845,
      "step": 213685
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.6273510456085205,
      "learning_rate": 7.75979495675616e-06,
      "loss": 3.2287,
      "step": 213686
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0882203578948975,
      "learning_rate": 7.758870635276837e-06,
      "loss": 3.0892,
      "step": 213687
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.131540536880493,
      "learning_rate": 7.757946368130653e-06,
      "loss": 2.9354,
      "step": 213688
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.826951742172241,
      "learning_rate": 7.75702215531777e-06,
      "loss": 2.8297,
      "step": 213689
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.7483983039855957,
      "learning_rate": 7.756097996838395e-06,
      "loss": 2.9427,
      "step": 213690
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2993836402893066,
      "learning_rate": 7.755173892692656e-06,
      "loss": 2.8845,
      "step": 213691
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.271793842315674,
      "learning_rate": 7.754249842880788e-06,
      "loss": 2.8883,
      "step": 213692
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.218923568725586,
      "learning_rate": 7.753325847402891e-06,
      "loss": 2.7906,
      "step": 213693
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.4249496459960938,
      "learning_rate": 7.752401906259165e-06,
      "loss": 3.0697,
      "step": 213694
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.142642021179199,
      "learning_rate": 7.751478019449776e-06,
      "loss": 2.9008,
      "step": 213695
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8339056968688965,
      "learning_rate": 7.750554186974923e-06,
      "loss": 2.8527,
      "step": 213696
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6946163177490234,
      "learning_rate": 7.74963040883474e-06,
      "loss": 3.0376,
      "step": 213697
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.785679817199707,
      "learning_rate": 7.74870668502946e-06,
      "loss": 2.9677,
      "step": 213698
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9857306480407715,
      "learning_rate": 7.747783015559183e-06,
      "loss": 2.7543,
      "step": 213699
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7353851795196533,
      "learning_rate": 7.74685940042411e-06,
      "loss": 2.9082,
      "step": 213700
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0329785346984863,
      "learning_rate": 7.745935839624406e-06,
      "loss": 2.8677,
      "step": 213701
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.682933807373047,
      "learning_rate": 7.74501233316024e-06,
      "loss": 3.0525,
      "step": 213702
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0669825077056885,
      "learning_rate": 7.744088881031808e-06,
      "loss": 3.1201,
      "step": 213703
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7662527561187744,
      "learning_rate": 7.743165483239311e-06,
      "loss": 2.7466,
      "step": 213704
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1729366779327393,
      "learning_rate": 7.742242139782784e-06,
      "loss": 2.9048,
      "step": 213705
    },
    {
      "epoch": 2.78,
      "grad_norm": 5.111870288848877,
      "learning_rate": 7.741318850662559e-06,
      "loss": 2.9558,
      "step": 213706
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1738593578338623,
      "learning_rate": 7.740395615878702e-06,
      "loss": 3.0502,
      "step": 213707
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7197680473327637,
      "learning_rate": 7.739472435431448e-06,
      "loss": 2.9437,
      "step": 213708
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.780649423599243,
      "learning_rate": 7.73854930932093e-06,
      "loss": 2.8209,
      "step": 213709
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0718507766723633,
      "learning_rate": 7.737626237547378e-06,
      "loss": 2.9787,
      "step": 213710
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.12785005569458,
      "learning_rate": 7.736703220110862e-06,
      "loss": 2.805,
      "step": 213711
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.946321487426758,
      "learning_rate": 7.735780257011614e-06,
      "loss": 2.8727,
      "step": 213712
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.214392900466919,
      "learning_rate": 7.7348573482498e-06,
      "loss": 2.8415,
      "step": 213713
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8099050521850586,
      "learning_rate": 7.733934493825623e-06,
      "loss": 2.8672,
      "step": 213714
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.057926893234253,
      "learning_rate": 7.733011693739178e-06,
      "loss": 2.9838,
      "step": 213715
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.127638339996338,
      "learning_rate": 7.732088947990766e-06,
      "loss": 3.1014,
      "step": 213716
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9506516456604004,
      "learning_rate": 7.73116625658039e-06,
      "loss": 2.9871,
      "step": 213717
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0798797607421875,
      "learning_rate": 7.730243619508348e-06,
      "loss": 3.2611,
      "step": 213718
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.628385543823242,
      "learning_rate": 7.729321036774738e-06,
      "loss": 2.9618,
      "step": 213719
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9946401119232178,
      "learning_rate": 7.728398508379797e-06,
      "loss": 2.7048,
      "step": 213720
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.916506290435791,
      "learning_rate": 7.727476034323655e-06,
      "loss": 2.79,
      "step": 213721
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8490400314331055,
      "learning_rate": 7.72655361460648e-06,
      "loss": 2.8031,
      "step": 213722
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.79670786857605,
      "learning_rate": 7.725631249228504e-06,
      "loss": 2.9109,
      "step": 213723
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8301358222961426,
      "learning_rate": 7.724708938189828e-06,
      "loss": 2.8286,
      "step": 213724
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1847355365753174,
      "learning_rate": 7.723786681490585e-06,
      "loss": 2.8544,
      "step": 213725
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7835168838500977,
      "learning_rate": 7.722864479131075e-06,
      "loss": 3.1464,
      "step": 213726
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.916895627975464,
      "learning_rate": 7.721942331111364e-06,
      "loss": 3.1236,
      "step": 213727
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9803357124328613,
      "learning_rate": 7.721020237431686e-06,
      "loss": 2.7749,
      "step": 213728
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2737839221954346,
      "learning_rate": 7.720098198092172e-06,
      "loss": 2.9051,
      "step": 213729
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8654375076293945,
      "learning_rate": 7.719176213093059e-06,
      "loss": 3.1009,
      "step": 213730
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0982701778411865,
      "learning_rate": 7.718254282434377e-06,
      "loss": 2.86,
      "step": 213731
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3592002391815186,
      "learning_rate": 7.717332406116461e-06,
      "loss": 2.9596,
      "step": 213732
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.279613733291626,
      "learning_rate": 7.716410584139376e-06,
      "loss": 2.8074,
      "step": 213733
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.5659735202789307,
      "learning_rate": 7.715488816503357e-06,
      "loss": 2.8529,
      "step": 213734
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.5573318004608154,
      "learning_rate": 7.714567103208536e-06,
      "loss": 2.8897,
      "step": 213735
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3797430992126465,
      "learning_rate": 7.713645444255079e-06,
      "loss": 3.0201,
      "step": 213736
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.160229682922363,
      "learning_rate": 7.712723839643153e-06,
      "loss": 2.7417,
      "step": 213737
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.9190826416015625,
      "learning_rate": 7.711802289372992e-06,
      "loss": 2.9038,
      "step": 213738
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.4428749084472656,
      "learning_rate": 7.710880793444696e-06,
      "loss": 2.8186,
      "step": 213739
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7686750888824463,
      "learning_rate": 7.709959351858464e-06,
      "loss": 2.8344,
      "step": 213740
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0472049713134766,
      "learning_rate": 7.709037964614495e-06,
      "loss": 3.0924,
      "step": 213741
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.4847748279571533,
      "learning_rate": 7.708116631712924e-06,
      "loss": 2.7572,
      "step": 213742
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.402539014816284,
      "learning_rate": 7.707195353153917e-06,
      "loss": 3.1448,
      "step": 213743
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.97438645362854,
      "learning_rate": 7.706274128937673e-06,
      "loss": 2.7451,
      "step": 213744
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7703375816345215,
      "learning_rate": 7.705352959064326e-06,
      "loss": 2.8668,
      "step": 213745
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.226207733154297,
      "learning_rate": 7.704431843534108e-06,
      "loss": 3.0754,
      "step": 213746
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.012889862060547,
      "learning_rate": 7.703510782347156e-06,
      "loss": 2.8332,
      "step": 213747
    },
    {
      "epoch": 2.78,
      "grad_norm": 5.555646896362305,
      "learning_rate": 7.702589775503598e-06,
      "loss": 2.6931,
      "step": 213748
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.303422212600708,
      "learning_rate": 7.70166882300367e-06,
      "loss": 2.6597,
      "step": 213749
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.991541862487793,
      "learning_rate": 7.700747924847539e-06,
      "loss": 2.7904,
      "step": 213750
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.737102746963501,
      "learning_rate": 7.699827081035304e-06,
      "loss": 3.0545,
      "step": 213751
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.828585147857666,
      "learning_rate": 7.698906291567231e-06,
      "loss": 2.7064,
      "step": 213752
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.571359157562256,
      "learning_rate": 7.697985556443453e-06,
      "loss": 2.8419,
      "step": 213753
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6896932125091553,
      "learning_rate": 7.697064875664105e-06,
      "loss": 2.8933,
      "step": 213754
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.245628833770752,
      "learning_rate": 7.696144249229419e-06,
      "loss": 2.6942,
      "step": 213755
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9305243492126465,
      "learning_rate": 7.695223677139495e-06,
      "loss": 2.9883,
      "step": 213756
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6059048175811768,
      "learning_rate": 7.694303159394599e-06,
      "loss": 2.7525,
      "step": 213757
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.217546224594116,
      "learning_rate": 7.693382695994832e-06,
      "loss": 3.1299,
      "step": 213758
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.4023702144622803,
      "learning_rate": 7.692462286940392e-06,
      "loss": 2.7037,
      "step": 213759
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5348093509674072,
      "learning_rate": 7.691541932231415e-06,
      "loss": 2.9159,
      "step": 213760
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9386539459228516,
      "learning_rate": 7.6906216318681e-06,
      "loss": 2.9514,
      "step": 213761
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2370715141296387,
      "learning_rate": 7.68970138585061e-06,
      "loss": 2.806,
      "step": 213762
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.0085978507995605,
      "learning_rate": 7.68878119417915e-06,
      "loss": 2.9103,
      "step": 213763
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.850414276123047,
      "learning_rate": 7.68786105685385e-06,
      "loss": 2.9427,
      "step": 213764
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8139712810516357,
      "learning_rate": 7.686940973874912e-06,
      "loss": 2.7094,
      "step": 213765
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.066450357437134,
      "learning_rate": 7.686020945242466e-06,
      "loss": 3.1208,
      "step": 213766
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9600138664245605,
      "learning_rate": 7.685100970956714e-06,
      "loss": 2.8169,
      "step": 213767
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.118562936782837,
      "learning_rate": 7.684181051017824e-06,
      "loss": 2.9927,
      "step": 213768
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9237794876098633,
      "learning_rate": 7.68326118542596e-06,
      "loss": 2.7278,
      "step": 213769
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3549013137817383,
      "learning_rate": 7.682341374181256e-06,
      "loss": 3.204,
      "step": 213770
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7975194454193115,
      "learning_rate": 7.681421617284012e-06,
      "loss": 2.8302,
      "step": 213771
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0549557209014893,
      "learning_rate": 7.680501914734227e-06,
      "loss": 2.9085,
      "step": 213772
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6258835792541504,
      "learning_rate": 7.679582266532203e-06,
      "loss": 2.9713,
      "step": 213773
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8558526039123535,
      "learning_rate": 7.678662672678038e-06,
      "loss": 2.7222,
      "step": 213774
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2239186763763428,
      "learning_rate": 7.677743133171931e-06,
      "loss": 2.9586,
      "step": 213775
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0173304080963135,
      "learning_rate": 7.676823648014052e-06,
      "loss": 2.7656,
      "step": 213776
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.106106996536255,
      "learning_rate": 7.675904217204597e-06,
      "loss": 2.7224,
      "step": 213777
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6240406036376953,
      "learning_rate": 7.67498484074367e-06,
      "loss": 2.6779,
      "step": 213778
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3832130432128906,
      "learning_rate": 7.6740655186315e-06,
      "loss": 2.7026,
      "step": 213779
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5051286220550537,
      "learning_rate": 7.673146250868223e-06,
      "loss": 2.6485,
      "step": 213780
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.870974063873291,
      "learning_rate": 7.67222703745407e-06,
      "loss": 2.9056,
      "step": 213781
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.5815789699554443,
      "learning_rate": 7.67130787838911e-06,
      "loss": 2.9016,
      "step": 213782
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0179755687713623,
      "learning_rate": 7.670388773673642e-06,
      "loss": 3.2927,
      "step": 213783
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9750866889953613,
      "learning_rate": 7.669469723307697e-06,
      "loss": 2.869,
      "step": 213784
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9049148559570312,
      "learning_rate": 7.668550727291545e-06,
      "loss": 3.0412,
      "step": 213785
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9817678928375244,
      "learning_rate": 7.667631785625317e-06,
      "loss": 3.1324,
      "step": 213786
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.763221263885498,
      "learning_rate": 7.666712898309213e-06,
      "loss": 2.7337,
      "step": 213787
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.383512020111084,
      "learning_rate": 7.665794065343334e-06,
      "loss": 2.943,
      "step": 213788
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.613042116165161,
      "learning_rate": 7.664875286727978e-06,
      "loss": 2.9109,
      "step": 213789
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.714552879333496,
      "learning_rate": 7.663956562463214e-06,
      "loss": 2.8734,
      "step": 213790
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.5381250381469727,
      "learning_rate": 7.663037892549239e-06,
      "loss": 2.6003,
      "step": 213791
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9142909049987793,
      "learning_rate": 7.66211927698619e-06,
      "loss": 2.7565,
      "step": 213792
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.778245687484741,
      "learning_rate": 7.661200715774296e-06,
      "loss": 3.0203,
      "step": 213793
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1210601329803467,
      "learning_rate": 7.660282208913693e-06,
      "loss": 2.9397,
      "step": 213794
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0380916595458984,
      "learning_rate": 7.65936375640458e-06,
      "loss": 2.755,
      "step": 213795
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8912651538848877,
      "learning_rate": 7.65844535824709e-06,
      "loss": 2.6931,
      "step": 213796
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7479569911956787,
      "learning_rate": 7.657527014441456e-06,
      "loss": 3.0567,
      "step": 213797
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0800235271453857,
      "learning_rate": 7.656608724987745e-06,
      "loss": 2.982,
      "step": 213798
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9355409145355225,
      "learning_rate": 7.655690489886224e-06,
      "loss": 2.7275,
      "step": 213799
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.631296396255493,
      "learning_rate": 7.654772309136992e-06,
      "loss": 2.8663,
      "step": 213800
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7123820781707764,
      "learning_rate": 7.653854182740315e-06,
      "loss": 2.7203,
      "step": 213801
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.698132276535034,
      "learning_rate": 7.652936110696296e-06,
      "loss": 2.8919,
      "step": 213802
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.5533037185668945,
      "learning_rate": 7.652018093005097e-06,
      "loss": 2.8276,
      "step": 213803
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.39699125289917,
      "learning_rate": 7.651100129666887e-06,
      "loss": 3.1261,
      "step": 213804
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1375715732574463,
      "learning_rate": 7.650182220681867e-06,
      "loss": 3.1201,
      "step": 213805
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3028337955474854,
      "learning_rate": 7.6492643660502e-06,
      "loss": 2.7054,
      "step": 213806
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7210099697113037,
      "learning_rate": 7.648346565772057e-06,
      "loss": 2.8482,
      "step": 213807
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.696066379547119,
      "learning_rate": 7.647428819847634e-06,
      "loss": 2.7723,
      "step": 213808
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.707916021347046,
      "learning_rate": 7.646511128277033e-06,
      "loss": 3.0766,
      "step": 213809
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9704301357269287,
      "learning_rate": 7.645593491060454e-06,
      "loss": 3.0465,
      "step": 213810
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.062589168548584,
      "learning_rate": 7.644675908198128e-06,
      "loss": 2.9735,
      "step": 213811
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3402483463287354,
      "learning_rate": 7.643758379690123e-06,
      "loss": 2.8866,
      "step": 213812
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5068423748016357,
      "learning_rate": 7.642840905536673e-06,
      "loss": 3.0403,
      "step": 213813
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3712799549102783,
      "learning_rate": 7.641923485737977e-06,
      "loss": 2.7815,
      "step": 213814
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.2211594581604,
      "learning_rate": 7.641006120294135e-06,
      "loss": 2.9231,
      "step": 213815
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3317840099334717,
      "learning_rate": 7.640088809205347e-06,
      "loss": 2.9478,
      "step": 213816
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.543153762817383,
      "learning_rate": 7.639171552471778e-06,
      "loss": 2.7864,
      "step": 213817
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9074292182922363,
      "learning_rate": 7.638254350093632e-06,
      "loss": 2.8864,
      "step": 213818
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6857075691223145,
      "learning_rate": 7.637337202071036e-06,
      "loss": 2.9519,
      "step": 213819
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0746684074401855,
      "learning_rate": 7.636420108404195e-06,
      "loss": 2.9682,
      "step": 213820
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0743253231048584,
      "learning_rate": 7.635503069093241e-06,
      "loss": 2.742,
      "step": 213821
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0531063079833984,
      "learning_rate": 7.634586084138373e-06,
      "loss": 2.8269,
      "step": 213822
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.326371669769287,
      "learning_rate": 7.633669153539756e-06,
      "loss": 3.077,
      "step": 213823
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.101045846939087,
      "learning_rate": 7.632752277297528e-06,
      "loss": 3.0873,
      "step": 213824
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9224369525909424,
      "learning_rate": 7.63183545541195e-06,
      "loss": 2.8127,
      "step": 213825
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.889571189880371,
      "learning_rate": 7.630918687883093e-06,
      "loss": 2.8566,
      "step": 213826
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.041294813156128,
      "learning_rate": 7.630001974711186e-06,
      "loss": 2.8741,
      "step": 213827
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7209315299987793,
      "learning_rate": 7.629085315896366e-06,
      "loss": 2.89,
      "step": 213828
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0935850143432617,
      "learning_rate": 7.6281687114388314e-06,
      "loss": 2.9399,
      "step": 213829
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.961472988128662,
      "learning_rate": 7.627252161338715e-06,
      "loss": 2.6105,
      "step": 213830
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9247024059295654,
      "learning_rate": 7.626335665596217e-06,
      "loss": 2.5791,
      "step": 213831
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.805312156677246,
      "learning_rate": 7.625419224211537e-06,
      "loss": 3.165,
      "step": 213832
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.980809450149536,
      "learning_rate": 7.624502837184743e-06,
      "loss": 2.9654,
      "step": 213833
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8710851669311523,
      "learning_rate": 7.623586504516132e-06,
      "loss": 3.0499,
      "step": 213834
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8204782009124756,
      "learning_rate": 7.622670226205807e-06,
      "loss": 3.2022,
      "step": 213835
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.1373136043548584,
      "learning_rate": 7.621754002253932e-06,
      "loss": 2.7984,
      "step": 213836
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.464203119277954,
      "learning_rate": 7.6208378326607085e-06,
      "loss": 2.9094,
      "step": 213837
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.332636594772339,
      "learning_rate": 7.619921717426303e-06,
      "loss": 2.9069,
      "step": 213838
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.880692481994629,
      "learning_rate": 7.619005656550814e-06,
      "loss": 2.8062,
      "step": 213839
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8112916946411133,
      "learning_rate": 7.618089650034509e-06,
      "loss": 3.1565,
      "step": 213840
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.9222681522369385,
      "learning_rate": 7.617173697877521e-06,
      "loss": 2.8738,
      "step": 213841
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.305450201034546,
      "learning_rate": 7.616257800080017e-06,
      "loss": 2.7593,
      "step": 213842
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.744044780731201,
      "learning_rate": 7.615341956642196e-06,
      "loss": 2.7702,
      "step": 213843
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0831642150878906,
      "learning_rate": 7.614426167564158e-06,
      "loss": 2.7361,
      "step": 213844
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7742788791656494,
      "learning_rate": 7.613510432846137e-06,
      "loss": 3.0349,
      "step": 213845
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.8610494136810303,
      "learning_rate": 7.6125947524882994e-06,
      "loss": 2.7225,
      "step": 213846
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.955695152282715,
      "learning_rate": 7.611679126490744e-06,
      "loss": 2.9344,
      "step": 213847
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0930697917938232,
      "learning_rate": 7.610763554853738e-06,
      "loss": 2.9339,
      "step": 213848
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2469654083251953,
      "learning_rate": 7.609848037577415e-06,
      "loss": 3.0686,
      "step": 213849
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.36224365234375,
      "learning_rate": 7.608932574661941e-06,
      "loss": 2.8663,
      "step": 213850
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.880694627761841,
      "learning_rate": 7.608017166107483e-06,
      "loss": 2.9103,
      "step": 213851
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8180694580078125,
      "learning_rate": 7.607101811914207e-06,
      "loss": 2.9995,
      "step": 213852
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0953567028045654,
      "learning_rate": 7.606186512082279e-06,
      "loss": 3.0894,
      "step": 213853
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.6195068359375,
      "learning_rate": 7.605271266611901e-06,
      "loss": 2.7749,
      "step": 213854
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7642030715942383,
      "learning_rate": 7.604356075503204e-06,
      "loss": 3.1596,
      "step": 213855
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5125696659088135,
      "learning_rate": 7.6034409387563885e-06,
      "loss": 3.0584,
      "step": 213856
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.9345669746398926,
      "learning_rate": 7.6025258563716216e-06,
      "loss": 2.7792,
      "step": 213857
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.935749053955078,
      "learning_rate": 7.601610828349069e-06,
      "loss": 2.833,
      "step": 213858
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3851675987243652,
      "learning_rate": 7.600695854688865e-06,
      "loss": 2.9359,
      "step": 213859
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.941732406616211,
      "learning_rate": 7.599780935391242e-06,
      "loss": 2.9524,
      "step": 213860
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.860905885696411,
      "learning_rate": 7.5988660704563e-06,
      "loss": 2.8394,
      "step": 213861
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.430540084838867,
      "learning_rate": 7.597951259884272e-06,
      "loss": 3.2834,
      "step": 213862
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.8569560050964355,
      "learning_rate": 7.597036503675324e-06,
      "loss": 2.8594,
      "step": 213863
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.736088752746582,
      "learning_rate": 7.5961218018295915e-06,
      "loss": 2.7429,
      "step": 213864
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.9731459617614746,
      "learning_rate": 7.595207154347238e-06,
      "loss": 2.8253,
      "step": 213865
    },
    {
      "epoch": 2.78,
      "grad_norm": 5.306157112121582,
      "learning_rate": 7.594292561228466e-06,
      "loss": 3.0907,
      "step": 213866
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7912299633026123,
      "learning_rate": 7.59337802247344e-06,
      "loss": 2.821,
      "step": 213867
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0780529975891113,
      "learning_rate": 7.592463538082328e-06,
      "loss": 2.673,
      "step": 213868
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0309081077575684,
      "learning_rate": 7.591549108055328e-06,
      "loss": 2.8835,
      "step": 213869
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2433462142944336,
      "learning_rate": 7.590634732392542e-06,
      "loss": 2.9752,
      "step": 213870
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.314683437347412,
      "learning_rate": 7.589720411094169e-06,
      "loss": 2.9678,
      "step": 213871
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.414335250854492,
      "learning_rate": 7.588806144160409e-06,
      "loss": 3.1048,
      "step": 213872
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.847989559173584,
      "learning_rate": 7.587891931591361e-06,
      "loss": 3.0313,
      "step": 213873
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.0104472637176514,
      "learning_rate": 7.586977773387292e-06,
      "loss": 2.8769,
      "step": 213874
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.788864850997925,
      "learning_rate": 7.5860636695483364e-06,
      "loss": 2.9799,
      "step": 213875
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.192749261856079,
      "learning_rate": 7.585149620074627e-06,
      "loss": 2.8113,
      "step": 213876
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.846933603286743,
      "learning_rate": 7.584235624966328e-06,
      "loss": 3.0424,
      "step": 213877
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.2157645225524902,
      "learning_rate": 7.583321684223708e-06,
      "loss": 2.7896,
      "step": 213878
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.3640143871307373,
      "learning_rate": 7.5824077978468015e-06,
      "loss": 3.0812,
      "step": 213879
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.5632612705230713,
      "learning_rate": 7.581493965835872e-06,
      "loss": 2.8394,
      "step": 213880
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.4485106468200684,
      "learning_rate": 7.580580188191088e-06,
      "loss": 2.6705,
      "step": 213881
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.702071189880371,
      "learning_rate": 7.579666464912581e-06,
      "loss": 2.9873,
      "step": 213882
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.951475143432617,
      "learning_rate": 7.578752796000487e-06,
      "loss": 3.1207,
      "step": 213883
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.752903699874878,
      "learning_rate": 7.5778391814550695e-06,
      "loss": 2.8449,
      "step": 213884
    },
    {
      "epoch": 2.78,
      "grad_norm": 3.215675115585327,
      "learning_rate": 7.576925621276431e-06,
      "loss": 2.9285,
      "step": 213885
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6984424591064453,
      "learning_rate": 7.576012115464769e-06,
      "loss": 2.789,
      "step": 213886
    },
    {
      "epoch": 2.78,
      "grad_norm": 4.0965189933776855,
      "learning_rate": 7.575098664020285e-06,
      "loss": 3.0317,
      "step": 213887
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.7663238048553467,
      "learning_rate": 7.574185266943078e-06,
      "loss": 3.0237,
      "step": 213888
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.6502389907836914,
      "learning_rate": 7.5732719242333154e-06,
      "loss": 2.9883,
      "step": 213889
    },
    {
      "epoch": 2.78,
      "grad_norm": 2.5724480152130127,
      "learning_rate": 7.572358635891262e-06,
      "loss": 2.8986,
      "step": 213890
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3595035076141357,
      "learning_rate": 7.571445401916954e-06,
      "loss": 3.1682,
      "step": 213891
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0469396114349365,
      "learning_rate": 7.570532222310688e-06,
      "loss": 2.9428,
      "step": 213892
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.687983274459839,
      "learning_rate": 7.569619097072599e-06,
      "loss": 2.8749,
      "step": 213893
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.614577293395996,
      "learning_rate": 7.56870602620282e-06,
      "loss": 2.9903,
      "step": 213894
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0821495056152344,
      "learning_rate": 7.567793009701517e-06,
      "loss": 3.0343,
      "step": 213895
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.037511825561523,
      "learning_rate": 7.566880047568891e-06,
      "loss": 2.7584,
      "step": 213896
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0190131664276123,
      "learning_rate": 7.565967139805074e-06,
      "loss": 2.9471,
      "step": 213897
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2609715461730957,
      "learning_rate": 7.5650542864102996e-06,
      "loss": 3.0954,
      "step": 213898
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7251222133636475,
      "learning_rate": 7.564141487384701e-06,
      "loss": 2.8203,
      "step": 213899
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.858829975128174,
      "learning_rate": 7.563228742728477e-06,
      "loss": 2.9594,
      "step": 213900
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.201434135437012,
      "learning_rate": 7.562316052441697e-06,
      "loss": 2.7103,
      "step": 213901
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.5973494052886963,
      "learning_rate": 7.561403416524659e-06,
      "loss": 2.9329,
      "step": 213902
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.908719301223755,
      "learning_rate": 7.560490834977429e-06,
      "loss": 3.3235,
      "step": 213903
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7372004985809326,
      "learning_rate": 7.559578307800274e-06,
      "loss": 2.9626,
      "step": 213904
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.242983102798462,
      "learning_rate": 7.558665834993294e-06,
      "loss": 3.137,
      "step": 213905
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.8762667179107666,
      "learning_rate": 7.557753416556689e-06,
      "loss": 2.8941,
      "step": 213906
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0885040760040283,
      "learning_rate": 7.556841052490592e-06,
      "loss": 3.1483,
      "step": 213907
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.8382768630981445,
      "learning_rate": 7.555928742795236e-06,
      "loss": 2.8698,
      "step": 213908
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1247854232788086,
      "learning_rate": 7.5550164874706884e-06,
      "loss": 2.8914,
      "step": 213909
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.637054920196533,
      "learning_rate": 7.554104286517248e-06,
      "loss": 3.2197,
      "step": 213910
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1777541637420654,
      "learning_rate": 7.553192139935016e-06,
      "loss": 2.8682,
      "step": 213911
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.531435489654541,
      "learning_rate": 7.552280047724157e-06,
      "loss": 2.9948,
      "step": 213912
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.6781373023986816,
      "learning_rate": 7.551368009884806e-06,
      "loss": 2.827,
      "step": 213913
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.965362787246704,
      "learning_rate": 7.5504560264172284e-06,
      "loss": 2.9289,
      "step": 213914
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8314578533172607,
      "learning_rate": 7.549544097321492e-06,
      "loss": 3.1414,
      "step": 213915
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.324103593826294,
      "learning_rate": 7.5486322225978615e-06,
      "loss": 2.746,
      "step": 213916
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7314488887786865,
      "learning_rate": 7.547720402246437e-06,
      "loss": 2.7713,
      "step": 213917
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.9505367279052734,
      "learning_rate": 7.546808636267421e-06,
      "loss": 3.0489,
      "step": 213918
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.107224941253662,
      "learning_rate": 7.5458969246609765e-06,
      "loss": 3.0421,
      "step": 213919
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.538688898086548,
      "learning_rate": 7.544985267427273e-06,
      "loss": 2.8275,
      "step": 213920
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8820877075195312,
      "learning_rate": 7.544073664566441e-06,
      "loss": 2.9973,
      "step": 213921
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.4277503490448,
      "learning_rate": 7.543162116078749e-06,
      "loss": 3.1273,
      "step": 213922
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.9984118938446045,
      "learning_rate": 7.542250621964229e-06,
      "loss": 2.845,
      "step": 213923
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3799238204956055,
      "learning_rate": 7.541339182223183e-06,
      "loss": 3.0082,
      "step": 213924
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9538817405700684,
      "learning_rate": 7.540427796855708e-06,
      "loss": 2.8619,
      "step": 213925
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3536832332611084,
      "learning_rate": 7.539516465862005e-06,
      "loss": 2.887,
      "step": 213926
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1454567909240723,
      "learning_rate": 7.538605189242175e-06,
      "loss": 2.9344,
      "step": 213927
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2939329147338867,
      "learning_rate": 7.537693966996483e-06,
      "loss": 2.7277,
      "step": 213928
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.177951335906982,
      "learning_rate": 7.536782799125029e-06,
      "loss": 3.0567,
      "step": 213929
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7339465618133545,
      "learning_rate": 7.535871685628048e-06,
      "loss": 2.6564,
      "step": 213930
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.900123119354248,
      "learning_rate": 7.534960626505637e-06,
      "loss": 3.0549,
      "step": 213931
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2290985584259033,
      "learning_rate": 7.5340496217579985e-06,
      "loss": 2.9054,
      "step": 213932
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1624279022216797,
      "learning_rate": 7.533138671385297e-06,
      "loss": 3.0606,
      "step": 213933
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.846611261367798,
      "learning_rate": 7.532227775387734e-06,
      "loss": 2.7752,
      "step": 213934
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8044655323028564,
      "learning_rate": 7.5313169337654075e-06,
      "loss": 3.0989,
      "step": 213935
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.175238847732544,
      "learning_rate": 7.530406146518586e-06,
      "loss": 2.7806,
      "step": 213936
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0459625720977783,
      "learning_rate": 7.5294954136473355e-06,
      "loss": 2.9561,
      "step": 213937
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8154194355010986,
      "learning_rate": 7.528584735151888e-06,
      "loss": 2.9832,
      "step": 213938
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2382659912109375,
      "learning_rate": 7.5276741110324114e-06,
      "loss": 3.0017,
      "step": 213939
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3906025886535645,
      "learning_rate": 7.5267635412890045e-06,
      "loss": 2.7974,
      "step": 213940
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.585892915725708,
      "learning_rate": 7.525853025921969e-06,
      "loss": 2.9872,
      "step": 213941
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8475475311279297,
      "learning_rate": 7.524942564931369e-06,
      "loss": 3.0664,
      "step": 213942
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.5917916297912598,
      "learning_rate": 7.5240321583174055e-06,
      "loss": 2.9191,
      "step": 213943
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.330232620239258,
      "learning_rate": 7.523121806080212e-06,
      "loss": 2.8797,
      "step": 213944
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1274776458740234,
      "learning_rate": 7.5222115082200214e-06,
      "loss": 2.8357,
      "step": 213945
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.045252561569214,
      "learning_rate": 7.521301264736968e-06,
      "loss": 3.0956,
      "step": 213946
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7816927433013916,
      "learning_rate": 7.520391075631216e-06,
      "loss": 2.6813,
      "step": 213947
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8691811561584473,
      "learning_rate": 7.519480940902966e-06,
      "loss": 2.9733,
      "step": 213948
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.106729030609131,
      "learning_rate": 7.518570860552387e-06,
      "loss": 3.005,
      "step": 213949
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.190643548965454,
      "learning_rate": 7.5176608345795756e-06,
      "loss": 2.6585,
      "step": 213950
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3185033798217773,
      "learning_rate": 7.516750862984766e-06,
      "loss": 2.8648,
      "step": 213951
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.261697292327881,
      "learning_rate": 7.515840945768093e-06,
      "loss": 2.8703,
      "step": 213952
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.438533067703247,
      "learning_rate": 7.514931082929787e-06,
      "loss": 2.9098,
      "step": 213953
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.103410005569458,
      "learning_rate": 7.514021274469983e-06,
      "loss": 3.0004,
      "step": 213954
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2068653106689453,
      "learning_rate": 7.513111520388815e-06,
      "loss": 2.983,
      "step": 213955
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.292880058288574,
      "learning_rate": 7.512201820686481e-06,
      "loss": 3.0482,
      "step": 213956
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.337765693664551,
      "learning_rate": 7.511292175363148e-06,
      "loss": 2.9966,
      "step": 213957
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.190756320953369,
      "learning_rate": 7.510382584418984e-06,
      "loss": 2.7568,
      "step": 213958
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.4942569732666016,
      "learning_rate": 7.509473047854186e-06,
      "loss": 2.8211,
      "step": 213959
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8368263244628906,
      "learning_rate": 7.50856356566889e-06,
      "loss": 2.9465,
      "step": 213960
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.807736396789551,
      "learning_rate": 7.507654137863261e-06,
      "loss": 2.8079,
      "step": 213961
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9577243328094482,
      "learning_rate": 7.506744764437467e-06,
      "loss": 2.6941,
      "step": 213962
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.019848108291626,
      "learning_rate": 7.505835445391739e-06,
      "loss": 2.8017,
      "step": 213963
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.7392148971557617,
      "learning_rate": 7.504926180726145e-06,
      "loss": 2.775,
      "step": 213964
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.235353469848633,
      "learning_rate": 7.504016970440952e-06,
      "loss": 2.9593,
      "step": 213965
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9229636192321777,
      "learning_rate": 7.503107814536257e-06,
      "loss": 2.9421,
      "step": 213966
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9298441410064697,
      "learning_rate": 7.502198713012264e-06,
      "loss": 2.9991,
      "step": 213967
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9210705757141113,
      "learning_rate": 7.501289665869104e-06,
      "loss": 2.9037,
      "step": 213968
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.5552966594696045,
      "learning_rate": 7.500380673107009e-06,
      "loss": 2.9359,
      "step": 213969
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.974113941192627,
      "learning_rate": 7.499471734726081e-06,
      "loss": 3.0587,
      "step": 213970
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7438080310821533,
      "learning_rate": 7.498562850726586e-06,
      "loss": 2.994,
      "step": 213971
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.679018974304199,
      "learning_rate": 7.49765402110859e-06,
      "loss": 2.8164,
      "step": 213972
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.501908302307129,
      "learning_rate": 7.4967452458723265e-06,
      "loss": 2.9387,
      "step": 213973
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.77020263671875,
      "learning_rate": 7.495836525017895e-06,
      "loss": 3.1421,
      "step": 213974
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.099616050720215,
      "learning_rate": 7.49492785854553e-06,
      "loss": 2.7967,
      "step": 213975
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2537026405334473,
      "learning_rate": 7.494019246455363e-06,
      "loss": 2.6508,
      "step": 213976
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6422007083892822,
      "learning_rate": 7.493110688747595e-06,
      "loss": 2.6956,
      "step": 213977
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8235161304473877,
      "learning_rate": 7.492202185422391e-06,
      "loss": 2.9387,
      "step": 213978
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.971395969390869,
      "learning_rate": 7.4912937364799194e-06,
      "loss": 2.9219,
      "step": 213979
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.722712755203247,
      "learning_rate": 7.49038534192028e-06,
      "loss": 2.9192,
      "step": 213980
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0184359550476074,
      "learning_rate": 7.489477001743738e-06,
      "loss": 3.0116,
      "step": 213981
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9540507793426514,
      "learning_rate": 7.488568715950427e-06,
      "loss": 2.9838,
      "step": 213982
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.861912250518799,
      "learning_rate": 7.487660484540514e-06,
      "loss": 2.8271,
      "step": 213983
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.4783215522766113,
      "learning_rate": 7.486752307514166e-06,
      "loss": 2.8917,
      "step": 213984
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.428138256072998,
      "learning_rate": 7.4858441848715475e-06,
      "loss": 2.7352,
      "step": 213985
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6682209968566895,
      "learning_rate": 7.484936116612828e-06,
      "loss": 2.9639,
      "step": 213986
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.9908697605133057,
      "learning_rate": 7.484028102738171e-06,
      "loss": 2.9677,
      "step": 213987
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.945061445236206,
      "learning_rate": 7.483120143247745e-06,
      "loss": 3.0257,
      "step": 213988
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.254753589630127,
      "learning_rate": 7.482212238141783e-06,
      "loss": 2.9365,
      "step": 213989
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.620849132537842,
      "learning_rate": 7.481304387420317e-06,
      "loss": 2.9826,
      "step": 213990
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.5201222896575928,
      "learning_rate": 7.480396591083715e-06,
      "loss": 3.1982,
      "step": 213991
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.87137508392334,
      "learning_rate": 7.479488849131943e-06,
      "loss": 2.8401,
      "step": 213992
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2248687744140625,
      "learning_rate": 7.478581161565267e-06,
      "loss": 2.7192,
      "step": 213993
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.4419713020324707,
      "learning_rate": 7.47767352838382e-06,
      "loss": 2.7219,
      "step": 213994
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.4722487926483154,
      "learning_rate": 7.476765949587871e-06,
      "loss": 2.854,
      "step": 213995
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1486153602600098,
      "learning_rate": 7.475858425177417e-06,
      "loss": 3.0279,
      "step": 213996
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.9120466709136963,
      "learning_rate": 7.4749509551528256e-06,
      "loss": 2.6785,
      "step": 213997
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6791231632232666,
      "learning_rate": 7.474043539514096e-06,
      "loss": 3.124,
      "step": 213998
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2236666679382324,
      "learning_rate": 7.473136178261496e-06,
      "loss": 2.8835,
      "step": 213999
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8132126331329346,
      "learning_rate": 7.472228871395125e-06,
      "loss": 3.0402,
      "step": 214000
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.674006223678589,
      "learning_rate": 7.471321618915216e-06,
      "loss": 3.0158,
      "step": 214001
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.865415573120117,
      "learning_rate": 7.470414420821868e-06,
      "loss": 2.8646,
      "step": 214002
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0405659675598145,
      "learning_rate": 7.46950727711535e-06,
      "loss": 2.9314,
      "step": 214003
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.4094018936157227,
      "learning_rate": 7.468600187795759e-06,
      "loss": 2.8916,
      "step": 214004
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1300485134124756,
      "learning_rate": 7.467693152863296e-06,
      "loss": 2.8956,
      "step": 214005
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.165499210357666,
      "learning_rate": 7.466786172318062e-06,
      "loss": 2.789,
      "step": 214006
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9010567665100098,
      "learning_rate": 7.465879246160289e-06,
      "loss": 2.8862,
      "step": 214007
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0789763927459717,
      "learning_rate": 7.46497237439011e-06,
      "loss": 3.0602,
      "step": 214008
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.978318929672241,
      "learning_rate": 7.464065557007759e-06,
      "loss": 2.9021,
      "step": 214009
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.980901002883911,
      "learning_rate": 7.463158794013369e-06,
      "loss": 2.8006,
      "step": 214010
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0032694339752197,
      "learning_rate": 7.4622520854070724e-06,
      "loss": 3.0663,
      "step": 214011
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.483346939086914,
      "learning_rate": 7.461345431189037e-06,
      "loss": 2.8588,
      "step": 214012
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.77217173576355,
      "learning_rate": 7.460438831359494e-06,
      "loss": 3.0966,
      "step": 214013
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9280617237091064,
      "learning_rate": 7.459532285918579e-06,
      "loss": 2.7098,
      "step": 214014
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.15860652923584,
      "learning_rate": 7.458625794866457e-06,
      "loss": 2.8802,
      "step": 214015
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8777761459350586,
      "learning_rate": 7.4577193582032954e-06,
      "loss": 2.8598,
      "step": 214016
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.6189045906066895,
      "learning_rate": 7.456812975929261e-06,
      "loss": 2.8634,
      "step": 214017
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.98700213432312,
      "learning_rate": 7.455906648044519e-06,
      "loss": 3.0477,
      "step": 214018
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8366589546203613,
      "learning_rate": 7.455000374549269e-06,
      "loss": 2.9355,
      "step": 214019
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.629528522491455,
      "learning_rate": 7.454094155443646e-06,
      "loss": 2.772,
      "step": 214020
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0648648738861084,
      "learning_rate": 7.4531879907278155e-06,
      "loss": 2.9442,
      "step": 214021
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.090914487838745,
      "learning_rate": 7.452281880402011e-06,
      "loss": 2.7118,
      "step": 214022
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9796979427337646,
      "learning_rate": 7.451375824466266e-06,
      "loss": 2.8174,
      "step": 214023
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.838052749633789,
      "learning_rate": 7.450469822920913e-06,
      "loss": 2.9915,
      "step": 214024
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.93900728225708,
      "learning_rate": 7.449563875765985e-06,
      "loss": 2.9848,
      "step": 214025
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.562474250793457,
      "learning_rate": 7.448657983001715e-06,
      "loss": 2.7813,
      "step": 214026
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6735999584198,
      "learning_rate": 7.4477521446283054e-06,
      "loss": 2.7764,
      "step": 214027
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.498835325241089,
      "learning_rate": 7.4468463606458525e-06,
      "loss": 3.0435,
      "step": 214028
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9977309703826904,
      "learning_rate": 7.445940631054526e-06,
      "loss": 2.9756,
      "step": 214029
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.848055839538574,
      "learning_rate": 7.445034955854556e-06,
      "loss": 3.0594,
      "step": 214030
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.976217269897461,
      "learning_rate": 7.444129335046045e-06,
      "loss": 2.6519,
      "step": 214031
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2595646381378174,
      "learning_rate": 7.443223768629225e-06,
      "loss": 2.9879,
      "step": 214032
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.456538200378418,
      "learning_rate": 7.442318256604229e-06,
      "loss": 2.8381,
      "step": 214033
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7890894412994385,
      "learning_rate": 7.4414127989712246e-06,
      "loss": 2.8948,
      "step": 214034
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1662023067474365,
      "learning_rate": 7.440507395730344e-06,
      "loss": 2.9839,
      "step": 214035
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.274280548095703,
      "learning_rate": 7.439602046881854e-06,
      "loss": 2.4519,
      "step": 214036
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8740296363830566,
      "learning_rate": 7.438696752425821e-06,
      "loss": 2.9094,
      "step": 214037
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3924131393432617,
      "learning_rate": 7.437791512362478e-06,
      "loss": 2.9449,
      "step": 214038
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8982584476470947,
      "learning_rate": 7.436886326691993e-06,
      "loss": 2.8951,
      "step": 214039
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1002919673919678,
      "learning_rate": 7.435981195414497e-06,
      "loss": 2.8683,
      "step": 214040
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3403491973876953,
      "learning_rate": 7.435076118530159e-06,
      "loss": 3.0159,
      "step": 214041
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7199764251708984,
      "learning_rate": 7.434171096039176e-06,
      "loss": 2.9363,
      "step": 214042
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.041602849960327,
      "learning_rate": 7.433266127941684e-06,
      "loss": 2.8395,
      "step": 214043
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1967034339904785,
      "learning_rate": 7.432361214237881e-06,
      "loss": 2.897,
      "step": 214044
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.41351318359375,
      "learning_rate": 7.431456354927934e-06,
      "loss": 2.7291,
      "step": 214045
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.8745996952056885,
      "learning_rate": 7.4305515500120095e-06,
      "loss": 2.9629,
      "step": 214046
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.5105419158935547,
      "learning_rate": 7.429646799490241e-06,
      "loss": 2.8318,
      "step": 214047
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9134366512298584,
      "learning_rate": 7.428742103362861e-06,
      "loss": 2.8528,
      "step": 214048
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.014385223388672,
      "learning_rate": 7.427837461629971e-06,
      "loss": 2.9054,
      "step": 214049
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7733876705169678,
      "learning_rate": 7.4269328742917686e-06,
      "loss": 3.1017,
      "step": 214050
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.882054090499878,
      "learning_rate": 7.426028341348456e-06,
      "loss": 2.803,
      "step": 214051
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8626370429992676,
      "learning_rate": 7.425123862800131e-06,
      "loss": 2.7145,
      "step": 214052
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.19544792175293,
      "learning_rate": 7.424219438646995e-06,
      "loss": 2.5599,
      "step": 214053
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.405071973800659,
      "learning_rate": 7.423315068889246e-06,
      "loss": 3.0595,
      "step": 214054
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.3858137130737305,
      "learning_rate": 7.4224107535269864e-06,
      "loss": 3.1216,
      "step": 214055
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7113442420959473,
      "learning_rate": 7.42150649256048e-06,
      "loss": 2.8982,
      "step": 214056
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.176295280456543,
      "learning_rate": 7.4206022859897625e-06,
      "loss": 3.0778,
      "step": 214057
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0900306701660156,
      "learning_rate": 7.419698133815166e-06,
      "loss": 2.8458,
      "step": 214058
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.504835367202759,
      "learning_rate": 7.418794036036724e-06,
      "loss": 2.732,
      "step": 214059
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6124861240386963,
      "learning_rate": 7.417889992654668e-06,
      "loss": 2.8464,
      "step": 214060
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9459683895111084,
      "learning_rate": 7.416986003669101e-06,
      "loss": 2.8239,
      "step": 214061
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9136059284210205,
      "learning_rate": 7.416082069080287e-06,
      "loss": 2.8103,
      "step": 214062
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0401813983917236,
      "learning_rate": 7.4151781888883265e-06,
      "loss": 3.2125,
      "step": 214063
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1662702560424805,
      "learning_rate": 7.414274363093453e-06,
      "loss": 2.8934,
      "step": 214064
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6159164905548096,
      "learning_rate": 7.413370591695733e-06,
      "loss": 3.0544,
      "step": 214065
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0990569591522217,
      "learning_rate": 7.412466874695399e-06,
      "loss": 2.9399,
      "step": 214066
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3877711296081543,
      "learning_rate": 7.411563212092619e-06,
      "loss": 2.9408,
      "step": 214067
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0822834968566895,
      "learning_rate": 7.4106596038875585e-06,
      "loss": 2.7964,
      "step": 214068
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0504722595214844,
      "learning_rate": 7.409756050080351e-06,
      "loss": 2.9324,
      "step": 214069
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.7510528564453125,
      "learning_rate": 7.408852550671263e-06,
      "loss": 2.667,
      "step": 214070
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.967078447341919,
      "learning_rate": 7.407949105660327e-06,
      "loss": 2.8753,
      "step": 214071
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8216216564178467,
      "learning_rate": 7.40704571504781e-06,
      "loss": 2.8143,
      "step": 214072
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6025426387786865,
      "learning_rate": 7.406142378833813e-06,
      "loss": 2.9847,
      "step": 214073
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.662548065185547,
      "learning_rate": 7.4052390970185675e-06,
      "loss": 2.7929,
      "step": 214074
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7117843627929688,
      "learning_rate": 7.4043358696021735e-06,
      "loss": 3.0883,
      "step": 214075
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.958350658416748,
      "learning_rate": 7.403432696584899e-06,
      "loss": 2.9794,
      "step": 214076
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0309228897094727,
      "learning_rate": 7.402529577966776e-06,
      "loss": 2.7297,
      "step": 214077
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.561047077178955,
      "learning_rate": 7.401626513748105e-06,
      "loss": 2.8344,
      "step": 214078
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.579962968826294,
      "learning_rate": 7.4007235039289516e-06,
      "loss": 2.7106,
      "step": 214079
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.408576488494873,
      "learning_rate": 7.39982054850955e-06,
      "loss": 2.9371,
      "step": 214080
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.786203145980835,
      "learning_rate": 7.398917647490033e-06,
      "loss": 3.0434,
      "step": 214081
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.67673921585083,
      "learning_rate": 7.3980148008706335e-06,
      "loss": 2.8968,
      "step": 214082
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7952921390533447,
      "learning_rate": 7.397112008651385e-06,
      "loss": 2.8008,
      "step": 214083
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.231311321258545,
      "learning_rate": 7.396209270832587e-06,
      "loss": 2.8372,
      "step": 214084
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.5069997310638428,
      "learning_rate": 7.39530658741434e-06,
      "loss": 3.0276,
      "step": 214085
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3937647342681885,
      "learning_rate": 7.394403958396844e-06,
      "loss": 2.9256,
      "step": 214086
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.277012586593628,
      "learning_rate": 7.393501383780232e-06,
      "loss": 2.9014,
      "step": 214087
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.077239513397217,
      "learning_rate": 7.392598863564736e-06,
      "loss": 2.74,
      "step": 214088
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.010197877883911,
      "learning_rate": 7.391696397750424e-06,
      "loss": 2.9,
      "step": 214089
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.695488691329956,
      "learning_rate": 7.390793986337529e-06,
      "loss": 2.897,
      "step": 214090
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.825147867202759,
      "learning_rate": 7.389891629326216e-06,
      "loss": 2.8288,
      "step": 214091
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.721745014190674,
      "learning_rate": 7.388989326716655e-06,
      "loss": 2.8123,
      "step": 214092
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.5634825229644775,
      "learning_rate": 7.388087078508975e-06,
      "loss": 2.961,
      "step": 214093
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.174431800842285,
      "learning_rate": 7.387184884703412e-06,
      "loss": 2.9616,
      "step": 214094
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9235687255859375,
      "learning_rate": 7.386282745300098e-06,
      "loss": 2.8706,
      "step": 214095
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.956892251968384,
      "learning_rate": 7.385380660299167e-06,
      "loss": 2.9688,
      "step": 214096
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.5572900772094727,
      "learning_rate": 7.384478629700819e-06,
      "loss": 3.1106,
      "step": 214097
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7161712646484375,
      "learning_rate": 7.3835766535052524e-06,
      "loss": 3.1604,
      "step": 214098
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.222395420074463,
      "learning_rate": 7.382674731712535e-06,
      "loss": 3.0396,
      "step": 214099
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7788805961608887,
      "learning_rate": 7.381772864322966e-06,
      "loss": 2.9864,
      "step": 214100
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9070229530334473,
      "learning_rate": 7.380871051336646e-06,
      "loss": 2.9247,
      "step": 214101
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.80706524848938,
      "learning_rate": 7.3799692927537065e-06,
      "loss": 2.8447,
      "step": 214102
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9478042125701904,
      "learning_rate": 7.379067588574383e-06,
      "loss": 2.5651,
      "step": 214103
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.953063488006592,
      "learning_rate": 7.378165938798808e-06,
      "loss": 3.0286,
      "step": 214104
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.80639910697937,
      "learning_rate": 7.377264343427147e-06,
      "loss": 3.0016,
      "step": 214105
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.714282989501953,
      "learning_rate": 7.376362802459601e-06,
      "loss": 2.9573,
      "step": 214106
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8687562942504883,
      "learning_rate": 7.375461315896303e-06,
      "loss": 2.7638,
      "step": 214107
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.967515230178833,
      "learning_rate": 7.374559883737418e-06,
      "loss": 2.9043,
      "step": 214108
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.78951096534729,
      "learning_rate": 7.3736585059831155e-06,
      "loss": 2.9837,
      "step": 214109
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2908999919891357,
      "learning_rate": 7.372757182633626e-06,
      "loss": 2.8806,
      "step": 214110
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8942198753356934,
      "learning_rate": 7.371855913688984e-06,
      "loss": 3.0185,
      "step": 214111
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.5763347148895264,
      "learning_rate": 7.370954699149523e-06,
      "loss": 2.6101,
      "step": 214112
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3049261569976807,
      "learning_rate": 7.370053539015275e-06,
      "loss": 2.7438,
      "step": 214113
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1593708992004395,
      "learning_rate": 7.3691524332864405e-06,
      "loss": 3.0529,
      "step": 214114
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.185459613800049,
      "learning_rate": 7.368251381963253e-06,
      "loss": 2.7785,
      "step": 214115
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0161054134368896,
      "learning_rate": 7.367350385045779e-06,
      "loss": 2.9205,
      "step": 214116
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.879861831665039,
      "learning_rate": 7.366449442534284e-06,
      "loss": 2.9889,
      "step": 214117
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0326931476593018,
      "learning_rate": 7.365548554428902e-06,
      "loss": 2.9566,
      "step": 214118
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.016671657562256,
      "learning_rate": 7.364647720729766e-06,
      "loss": 2.8582,
      "step": 214119
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.647376537322998,
      "learning_rate": 7.363746941437043e-06,
      "loss": 2.9463,
      "step": 214120
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.525083065032959,
      "learning_rate": 7.362846216550966e-06,
      "loss": 2.8775,
      "step": 214121
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6661882400512695,
      "learning_rate": 7.361945546071602e-06,
      "loss": 2.7975,
      "step": 214122
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7532596588134766,
      "learning_rate": 7.361044929999216e-06,
      "loss": 2.8086,
      "step": 214123
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.4873032569885254,
      "learning_rate": 7.360144368333909e-06,
      "loss": 2.9702,
      "step": 214124
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.4116454124450684,
      "learning_rate": 7.359243861075947e-06,
      "loss": 2.995,
      "step": 214125
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6928839683532715,
      "learning_rate": 7.358343408225364e-06,
      "loss": 2.8863,
      "step": 214126
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0652880668640137,
      "learning_rate": 7.357443009782393e-06,
      "loss": 2.9037,
      "step": 214127
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9418039321899414,
      "learning_rate": 7.356542665747201e-06,
      "loss": 2.932,
      "step": 214128
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6326723098754883,
      "learning_rate": 7.3556423761199525e-06,
      "loss": 3.1753,
      "step": 214129
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6817784309387207,
      "learning_rate": 7.354742140900815e-06,
      "loss": 3.0611,
      "step": 214130
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.833108901977539,
      "learning_rate": 7.35384196008999e-06,
      "loss": 2.773,
      "step": 214131
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9946129322052,
      "learning_rate": 7.3529418336875425e-06,
      "loss": 3.1095,
      "step": 214132
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.117853164672852,
      "learning_rate": 7.352041761693772e-06,
      "loss": 2.7931,
      "step": 214133
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.698817253112793,
      "learning_rate": 7.351141744108713e-06,
      "loss": 2.9754,
      "step": 214134
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.4377572536468506,
      "learning_rate": 7.3502417809326645e-06,
      "loss": 2.7353,
      "step": 214135
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.401736259460449,
      "learning_rate": 7.349341872165693e-06,
      "loss": 3.1728,
      "step": 214136
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0447263717651367,
      "learning_rate": 7.348442017808031e-06,
      "loss": 2.9992,
      "step": 214137
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7859256267547607,
      "learning_rate": 7.347542217859781e-06,
      "loss": 3.1718,
      "step": 214138
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0237927436828613,
      "learning_rate": 7.346642472321207e-06,
      "loss": 2.958,
      "step": 214139
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.320333957672119,
      "learning_rate": 7.345742781192343e-06,
      "loss": 2.8429,
      "step": 214140
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.5835120677948,
      "learning_rate": 7.344843144473489e-06,
      "loss": 3.0465,
      "step": 214141
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.982008934020996,
      "learning_rate": 7.343943562164711e-06,
      "loss": 3.0768,
      "step": 214142
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7722907066345215,
      "learning_rate": 7.343044034266244e-06,
      "loss": 3.0256,
      "step": 214143
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8793888092041016,
      "learning_rate": 7.342144560778218e-06,
      "loss": 2.9146,
      "step": 214144
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9130094051361084,
      "learning_rate": 7.341245141700835e-06,
      "loss": 3.0665,
      "step": 214145
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1340036392211914,
      "learning_rate": 7.340345777034196e-06,
      "loss": 2.8611,
      "step": 214146
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.6432738304138184,
      "learning_rate": 7.339446466778531e-06,
      "loss": 2.7699,
      "step": 214147
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9882473945617676,
      "learning_rate": 7.338547210933976e-06,
      "loss": 3.004,
      "step": 214148
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8877604007720947,
      "learning_rate": 7.337648009500763e-06,
      "loss": 2.9588,
      "step": 214149
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2477080821990967,
      "learning_rate": 7.336748862478958e-06,
      "loss": 2.8085,
      "step": 214150
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.9809372425079346,
      "learning_rate": 7.3358497698687625e-06,
      "loss": 2.7395,
      "step": 214151
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.635221004486084,
      "learning_rate": 7.334950731670375e-06,
      "loss": 2.9585,
      "step": 214152
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.7415013313293457,
      "learning_rate": 7.334051747883962e-06,
      "loss": 3.1435,
      "step": 214153
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.5885133743286133,
      "learning_rate": 7.333152818509624e-06,
      "loss": 2.8197,
      "step": 214154
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.990528106689453,
      "learning_rate": 7.332253943547661e-06,
      "loss": 3.0131,
      "step": 214155
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.525545597076416,
      "learning_rate": 7.331355122998106e-06,
      "loss": 3.0279,
      "step": 214156
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.5755436420440674,
      "learning_rate": 7.330456356861158e-06,
      "loss": 2.8219,
      "step": 214157
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0416877269744873,
      "learning_rate": 7.329557645137018e-06,
      "loss": 2.9341,
      "step": 214158
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1847009658813477,
      "learning_rate": 7.328658987825853e-06,
      "loss": 3.0815,
      "step": 214159
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.059065818786621,
      "learning_rate": 7.327760384927794e-06,
      "loss": 3.2249,
      "step": 214160
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.295987129211426,
      "learning_rate": 7.326861836443076e-06,
      "loss": 2.8258,
      "step": 214161
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2597296237945557,
      "learning_rate": 7.325963342371765e-06,
      "loss": 2.8523,
      "step": 214162
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.075347661972046,
      "learning_rate": 7.325064902714095e-06,
      "loss": 2.8072,
      "step": 214163
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2193610668182373,
      "learning_rate": 7.324166517470232e-06,
      "loss": 2.8276,
      "step": 214164
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.593080759048462,
      "learning_rate": 7.323268186640307e-06,
      "loss": 2.8713,
      "step": 214165
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.676835536956787,
      "learning_rate": 7.322369910224524e-06,
      "loss": 2.7525,
      "step": 214166
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9662282466888428,
      "learning_rate": 7.32147168822308e-06,
      "loss": 2.916,
      "step": 214167
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.5199785232543945,
      "learning_rate": 7.320573520636042e-06,
      "loss": 2.8941,
      "step": 214168
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1770613193511963,
      "learning_rate": 7.319675407463644e-06,
      "loss": 2.7948,
      "step": 214169
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0455613136291504,
      "learning_rate": 7.318777348706051e-06,
      "loss": 2.9071,
      "step": 214170
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.5373430252075195,
      "learning_rate": 7.317879344363431e-06,
      "loss": 2.9147,
      "step": 214171
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.691253900527954,
      "learning_rate": 7.316981394435917e-06,
      "loss": 3.0377,
      "step": 214172
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.981750249862671,
      "learning_rate": 7.316083498923741e-06,
      "loss": 2.9305,
      "step": 214173
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2514255046844482,
      "learning_rate": 7.3151856578270055e-06,
      "loss": 3.0285,
      "step": 214174
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.4276978969573975,
      "learning_rate": 7.3142878711459075e-06,
      "loss": 2.864,
      "step": 214175
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.434941530227661,
      "learning_rate": 7.313390138880582e-06,
      "loss": 2.9444,
      "step": 214176
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.559028387069702,
      "learning_rate": 7.312492461031261e-06,
      "loss": 2.9637,
      "step": 214177
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0349979400634766,
      "learning_rate": 7.3115948375980445e-06,
      "loss": 3.0786,
      "step": 214178
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.8504457473754883,
      "learning_rate": 7.310697268581134e-06,
      "loss": 2.8422,
      "step": 214179
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.9318432807922363,
      "learning_rate": 7.309799753980694e-06,
      "loss": 3.0465,
      "step": 214180
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.809199333190918,
      "learning_rate": 7.308902293796925e-06,
      "loss": 2.764,
      "step": 214181
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.530555248260498,
      "learning_rate": 7.3080048880298944e-06,
      "loss": 2.8614,
      "step": 214182
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9144599437713623,
      "learning_rate": 7.307107536679868e-06,
      "loss": 2.9309,
      "step": 214183
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.6472434997558594,
      "learning_rate": 7.306210239746946e-06,
      "loss": 2.8258,
      "step": 214184
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2756073474884033,
      "learning_rate": 7.305312997231327e-06,
      "loss": 2.9803,
      "step": 214185
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.804492712020874,
      "learning_rate": 7.304415809133213e-06,
      "loss": 2.7086,
      "step": 214186
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7683191299438477,
      "learning_rate": 7.3035186754526685e-06,
      "loss": 2.9388,
      "step": 214187
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.9978535175323486,
      "learning_rate": 7.302621596189995e-06,
      "loss": 2.8229,
      "step": 214188
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.162600517272949,
      "learning_rate": 7.301724571345258e-06,
      "loss": 2.8633,
      "step": 214189
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0980265140533447,
      "learning_rate": 7.300827600918657e-06,
      "loss": 3.1904,
      "step": 214190
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3111624717712402,
      "learning_rate": 7.29993068491036e-06,
      "loss": 2.663,
      "step": 214191
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9000656604766846,
      "learning_rate": 7.299033823320533e-06,
      "loss": 3.1499,
      "step": 214192
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6078085899353027,
      "learning_rate": 7.298137016149308e-06,
      "loss": 2.7168,
      "step": 214193
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3018758296966553,
      "learning_rate": 7.29724026339692e-06,
      "loss": 3.019,
      "step": 214194
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.83992338180542,
      "learning_rate": 7.296343565063534e-06,
      "loss": 2.929,
      "step": 214195
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9298293590545654,
      "learning_rate": 7.2954469211492175e-06,
      "loss": 2.7519,
      "step": 214196
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1724371910095215,
      "learning_rate": 7.294550331654236e-06,
      "loss": 2.7541,
      "step": 214197
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.5599658489227295,
      "learning_rate": 7.293653796578725e-06,
      "loss": 3.091,
      "step": 214198
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.7938787937164307,
      "learning_rate": 7.292757315922815e-06,
      "loss": 2.9273,
      "step": 214199
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1698038578033447,
      "learning_rate": 7.29186088968674e-06,
      "loss": 2.9666,
      "step": 214200
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.963783025741577,
      "learning_rate": 7.290964517870601e-06,
      "loss": 2.9564,
      "step": 214201
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3892526626586914,
      "learning_rate": 7.29006820047463e-06,
      "loss": 2.8161,
      "step": 214202
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.884148120880127,
      "learning_rate": 7.289171937498961e-06,
      "loss": 2.9567,
      "step": 214203
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1831562519073486,
      "learning_rate": 7.28827572894376e-06,
      "loss": 2.8774,
      "step": 214204
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.860564947128296,
      "learning_rate": 7.28737957480916e-06,
      "loss": 2.999,
      "step": 214205
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1830451488494873,
      "learning_rate": 7.2864834750953945e-06,
      "loss": 3.1719,
      "step": 214206
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7018542289733887,
      "learning_rate": 7.285587429802564e-06,
      "loss": 3.0245,
      "step": 214207
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8401567935943604,
      "learning_rate": 7.284691438930901e-06,
      "loss": 2.8533,
      "step": 214208
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.421133518218994,
      "learning_rate": 7.283795502480538e-06,
      "loss": 2.8768,
      "step": 214209
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.746101140975952,
      "learning_rate": 7.282899620451643e-06,
      "loss": 2.9926,
      "step": 214210
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.224274158477783,
      "learning_rate": 7.282003792844349e-06,
      "loss": 3.1055,
      "step": 214211
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.4892544746398926,
      "learning_rate": 7.281108019658921e-06,
      "loss": 2.9994,
      "step": 214212
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.017782688140869,
      "learning_rate": 7.280212300895394e-06,
      "loss": 2.8703,
      "step": 214213
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3790180683135986,
      "learning_rate": 7.279316636554034e-06,
      "loss": 2.8746,
      "step": 214214
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.4549295902252197,
      "learning_rate": 7.27842102663494e-06,
      "loss": 2.9793,
      "step": 214215
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8236963748931885,
      "learning_rate": 7.277525471138379e-06,
      "loss": 2.9039,
      "step": 214216
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.145266056060791,
      "learning_rate": 7.276629970064385e-06,
      "loss": 2.9148,
      "step": 214217
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9278039932250977,
      "learning_rate": 7.275734523413257e-06,
      "loss": 2.7258,
      "step": 214218
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3137054443359375,
      "learning_rate": 7.274839131185028e-06,
      "loss": 2.7834,
      "step": 214219
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.178583145141602,
      "learning_rate": 7.273943793379966e-06,
      "loss": 2.9962,
      "step": 214220
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2432281970977783,
      "learning_rate": 7.273048509998169e-06,
      "loss": 3.1499,
      "step": 214221
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7781450748443604,
      "learning_rate": 7.272153281039938e-06,
      "loss": 2.8297,
      "step": 214222
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3533213138580322,
      "learning_rate": 7.27125810650524e-06,
      "loss": 3.2029,
      "step": 214223
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.982059955596924,
      "learning_rate": 7.2703629863943735e-06,
      "loss": 3.1264,
      "step": 214224
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.184755325317383,
      "learning_rate": 7.2694679207074725e-06,
      "loss": 2.8278,
      "step": 214225
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.6272265911102295,
      "learning_rate": 7.268572909444703e-06,
      "loss": 2.7501,
      "step": 214226
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.5393340587615967,
      "learning_rate": 7.267677952606199e-06,
      "loss": 2.8874,
      "step": 214227
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.586646318435669,
      "learning_rate": 7.266783050192226e-06,
      "loss": 2.9458,
      "step": 214228
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9363291263580322,
      "learning_rate": 7.265888202202819e-06,
      "loss": 2.7256,
      "step": 214229
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1159956455230713,
      "learning_rate": 7.2649934086382755e-06,
      "loss": 2.8261,
      "step": 214230
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.019911766052246,
      "learning_rate": 7.26409866949863e-06,
      "loss": 2.96,
      "step": 214231
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2997660636901855,
      "learning_rate": 7.2632039847841495e-06,
      "loss": 2.8821,
      "step": 214232
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.891535758972168,
      "learning_rate": 7.262309354494933e-06,
      "loss": 2.8535,
      "step": 214233
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.922363519668579,
      "learning_rate": 7.261414778631247e-06,
      "loss": 2.7963,
      "step": 214234
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6829276084899902,
      "learning_rate": 7.2605202571931255e-06,
      "loss": 2.9925,
      "step": 214235
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.318142890930176,
      "learning_rate": 7.259625790180834e-06,
      "loss": 2.81,
      "step": 214236
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.984739303588867,
      "learning_rate": 7.258731377594473e-06,
      "loss": 3.0229,
      "step": 214237
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8183276653289795,
      "learning_rate": 7.257837019434242e-06,
      "loss": 3.1326,
      "step": 214238
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.494211196899414,
      "learning_rate": 7.256942715700309e-06,
      "loss": 3.0464,
      "step": 214239
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.936880350112915,
      "learning_rate": 7.2560484663929055e-06,
      "loss": 2.6971,
      "step": 214240
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.893303632736206,
      "learning_rate": 7.255154271512032e-06,
      "loss": 2.9965,
      "step": 214241
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.629049062728882,
      "learning_rate": 7.2542601310580205e-06,
      "loss": 2.8741,
      "step": 214242
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.093587875366211,
      "learning_rate": 7.253366045030906e-06,
      "loss": 2.8891,
      "step": 214243
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9749398231506348,
      "learning_rate": 7.252472013430955e-06,
      "loss": 2.6578,
      "step": 214244
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.972954034805298,
      "learning_rate": 7.251578036258265e-06,
      "loss": 2.8665,
      "step": 214245
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3425378799438477,
      "learning_rate": 7.250684113513106e-06,
      "loss": 2.9934,
      "step": 214246
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1771185398101807,
      "learning_rate": 7.2497902451955084e-06,
      "loss": 3.1215,
      "step": 214247
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.7060155868530273,
      "learning_rate": 7.2488964313057065e-06,
      "loss": 2.9682,
      "step": 214248
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.740389823913574,
      "learning_rate": 7.248002671843867e-06,
      "loss": 2.9632,
      "step": 214249
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.839271068572998,
      "learning_rate": 7.247108966810156e-06,
      "loss": 2.9922,
      "step": 214250
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.861905574798584,
      "learning_rate": 7.246215316204707e-06,
      "loss": 2.8678,
      "step": 214251
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.567927598953247,
      "learning_rate": 7.245321720027752e-06,
      "loss": 2.9544,
      "step": 214252
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0887234210968018,
      "learning_rate": 7.244428178279393e-06,
      "loss": 3.2158,
      "step": 214253
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.680041790008545,
      "learning_rate": 7.243534690959829e-06,
      "loss": 2.8338,
      "step": 214254
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9523978233337402,
      "learning_rate": 7.242641258069193e-06,
      "loss": 3.0634,
      "step": 214255
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1475138664245605,
      "learning_rate": 7.2417478796077175e-06,
      "loss": 2.9774,
      "step": 214256
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.99418306350708,
      "learning_rate": 7.240854555575504e-06,
      "loss": 2.8884,
      "step": 214257
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3471474647521973,
      "learning_rate": 7.239961285972751e-06,
      "loss": 2.7605,
      "step": 214258
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8410940170288086,
      "learning_rate": 7.239068070799625e-06,
      "loss": 2.8584,
      "step": 214259
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.794182300567627,
      "learning_rate": 7.23817491005626e-06,
      "loss": 2.8487,
      "step": 214260
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0028510093688965,
      "learning_rate": 7.237281803742856e-06,
      "loss": 2.9179,
      "step": 214261
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7551424503326416,
      "learning_rate": 7.236388751859579e-06,
      "loss": 2.8643,
      "step": 214262
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.929995536804199,
      "learning_rate": 7.2354957544065284e-06,
      "loss": 2.631,
      "step": 214263
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.681778907775879,
      "learning_rate": 7.234602811384005e-06,
      "loss": 2.8559,
      "step": 214264
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2210052013397217,
      "learning_rate": 7.233709922792041e-06,
      "loss": 2.9822,
      "step": 214265
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2151410579681396,
      "learning_rate": 7.232817088630905e-06,
      "loss": 3.1748,
      "step": 214266
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.505537986755371,
      "learning_rate": 7.231924308900661e-06,
      "loss": 2.7737,
      "step": 214267
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.293940305709839,
      "learning_rate": 7.231031583601543e-06,
      "loss": 3.0403,
      "step": 214268
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.5753746032714844,
      "learning_rate": 7.230138912733719e-06,
      "loss": 2.8745,
      "step": 214269
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.774513006210327,
      "learning_rate": 7.22924629629732e-06,
      "loss": 3.0268,
      "step": 214270
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.708080768585205,
      "learning_rate": 7.228353734292547e-06,
      "loss": 3.0979,
      "step": 214271
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.740206718444824,
      "learning_rate": 7.227461226719567e-06,
      "loss": 2.7954,
      "step": 214272
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8706166744232178,
      "learning_rate": 7.226568773578478e-06,
      "loss": 2.704,
      "step": 214273
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.750138998031616,
      "learning_rate": 7.225676374869549e-06,
      "loss": 2.8308,
      "step": 214274
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.112674713134766,
      "learning_rate": 7.224784030592845e-06,
      "loss": 2.6797,
      "step": 214275
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9603395462036133,
      "learning_rate": 7.2238917407486e-06,
      "loss": 2.9464,
      "step": 214276
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7933712005615234,
      "learning_rate": 7.22299950533698e-06,
      "loss": 2.85,
      "step": 214277
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.756787061691284,
      "learning_rate": 7.222107324358084e-06,
      "loss": 2.97,
      "step": 214278
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6551764011383057,
      "learning_rate": 7.2212151978121805e-06,
      "loss": 2.7599,
      "step": 214279
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.060519218444824,
      "learning_rate": 7.220323125699368e-06,
      "loss": 2.8212,
      "step": 214280
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6684229373931885,
      "learning_rate": 7.2194311080197795e-06,
      "loss": 2.8277,
      "step": 214281
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1075477600097656,
      "learning_rate": 7.2185391447736826e-06,
      "loss": 3.0057,
      "step": 214282
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9768307209014893,
      "learning_rate": 7.217647235961144e-06,
      "loss": 2.9619,
      "step": 214283
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2170469760894775,
      "learning_rate": 7.216755381582395e-06,
      "loss": 2.7716,
      "step": 214284
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.187514305114746,
      "learning_rate": 7.215863581637571e-06,
      "loss": 2.7545,
      "step": 214285
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.364888668060303,
      "learning_rate": 7.214971836126804e-06,
      "loss": 2.7531,
      "step": 214286
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6275551319122314,
      "learning_rate": 7.214080145050361e-06,
      "loss": 2.8273,
      "step": 214287
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.4755725860595703,
      "learning_rate": 7.213188508408341e-06,
      "loss": 3.2135,
      "step": 214288
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.907069683074951,
      "learning_rate": 7.212296926200911e-06,
      "loss": 2.7874,
      "step": 214289
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.087329149246216,
      "learning_rate": 7.211405398428205e-06,
      "loss": 2.9665,
      "step": 214290
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6905009746551514,
      "learning_rate": 7.210513925090456e-06,
      "loss": 2.7227,
      "step": 214291
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.878122091293335,
      "learning_rate": 7.209622506187795e-06,
      "loss": 2.8442,
      "step": 214292
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.4600741863250732,
      "learning_rate": 7.208731141720392e-06,
      "loss": 2.8962,
      "step": 214293
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3568553924560547,
      "learning_rate": 7.207839831688445e-06,
      "loss": 2.9671,
      "step": 214294
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.950995445251465,
      "learning_rate": 7.2069485760920534e-06,
      "loss": 2.8843,
      "step": 214295
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1379737854003906,
      "learning_rate": 7.206057374931384e-06,
      "loss": 2.9703,
      "step": 214296
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.020106554031372,
      "learning_rate": 7.205166228206705e-06,
      "loss": 2.993,
      "step": 214297
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0951216220855713,
      "learning_rate": 7.204275135918047e-06,
      "loss": 3.0335,
      "step": 214298
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.842020273208618,
      "learning_rate": 7.203384098065679e-06,
      "loss": 2.7596,
      "step": 214299
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.621711254119873,
      "learning_rate": 7.202493114649699e-06,
      "loss": 2.9896,
      "step": 214300
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.5987799167633057,
      "learning_rate": 7.2016021856703745e-06,
      "loss": 2.8787,
      "step": 214301
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.061199903488159,
      "learning_rate": 7.200711311127738e-06,
      "loss": 3.1004,
      "step": 214302
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.740446090698242,
      "learning_rate": 7.199820491022024e-06,
      "loss": 2.8774,
      "step": 214303
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.974400281906128,
      "learning_rate": 7.198929725353397e-06,
      "loss": 2.8432,
      "step": 214304
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7729156017303467,
      "learning_rate": 7.198039014122026e-06,
      "loss": 3.1454,
      "step": 214305
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0422706604003906,
      "learning_rate": 7.197148357328042e-06,
      "loss": 2.9043,
      "step": 214306
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.406264543533325,
      "learning_rate": 7.19625775497168e-06,
      "loss": 2.8438,
      "step": 214307
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9697999954223633,
      "learning_rate": 7.195367207053005e-06,
      "loss": 3.037,
      "step": 214308
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.666260242462158,
      "learning_rate": 7.1944767135722834e-06,
      "loss": 2.9588,
      "step": 214309
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8411707878112793,
      "learning_rate": 7.193586274529584e-06,
      "loss": 2.7738,
      "step": 214310
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.767897129058838,
      "learning_rate": 7.1926958899251705e-06,
      "loss": 3.091,
      "step": 214311
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3616721630096436,
      "learning_rate": 7.191805559759112e-06,
      "loss": 2.8223,
      "step": 214312
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7350571155548096,
      "learning_rate": 7.190915284031706e-06,
      "loss": 2.5759,
      "step": 214313
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.9520092010498047,
      "learning_rate": 7.190025062742955e-06,
      "loss": 2.9268,
      "step": 214314
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.557007074356079,
      "learning_rate": 7.189134895893156e-06,
      "loss": 2.9307,
      "step": 214315
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.5348503589630127,
      "learning_rate": 7.188244783482377e-06,
      "loss": 2.8201,
      "step": 214316
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.6526262760162354,
      "learning_rate": 7.187354725510885e-06,
      "loss": 2.9434,
      "step": 214317
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.070983409881592,
      "learning_rate": 7.1864647219787465e-06,
      "loss": 3.231,
      "step": 214318
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8893401622772217,
      "learning_rate": 7.185574772886227e-06,
      "loss": 3.0412,
      "step": 214319
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1016647815704346,
      "learning_rate": 7.1846848782333936e-06,
      "loss": 2.8816,
      "step": 214320
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.085078239440918,
      "learning_rate": 7.183795038020479e-06,
      "loss": 2.9032,
      "step": 214321
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.228482484817505,
      "learning_rate": 7.182905252247584e-06,
      "loss": 2.9391,
      "step": 214322
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2988688945770264,
      "learning_rate": 7.182015520914941e-06,
      "loss": 3.0232,
      "step": 214323
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.768076181411743,
      "learning_rate": 7.18112584402265e-06,
      "loss": 3.2142,
      "step": 214324
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.7201411724090576,
      "learning_rate": 7.1802362215709784e-06,
      "loss": 3.0383,
      "step": 214325
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.658560037612915,
      "learning_rate": 7.179346653559992e-06,
      "loss": 2.8044,
      "step": 214326
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1050217151641846,
      "learning_rate": 7.178457139989924e-06,
      "loss": 2.9068,
      "step": 214327
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.944952964782715,
      "learning_rate": 7.177567680860874e-06,
      "loss": 2.9174,
      "step": 214328
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.8203201293945312,
      "learning_rate": 7.176678276173043e-06,
      "loss": 3.2208,
      "step": 214329
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.50321102142334,
      "learning_rate": 7.175788925926595e-06,
      "loss": 2.9143,
      "step": 214330
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.903271436691284,
      "learning_rate": 7.174899630121733e-06,
      "loss": 2.829,
      "step": 214331
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.060837745666504,
      "learning_rate": 7.1740103887585555e-06,
      "loss": 2.8512,
      "step": 214332
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1633379459381104,
      "learning_rate": 7.173121201837262e-06,
      "loss": 2.8812,
      "step": 214333
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0182912349700928,
      "learning_rate": 7.172232069357986e-06,
      "loss": 3.0053,
      "step": 214334
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.957852602005005,
      "learning_rate": 7.171342991320961e-06,
      "loss": 3.1139,
      "step": 214335
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.6032299995422363,
      "learning_rate": 7.1704539677262865e-06,
      "loss": 2.932,
      "step": 214336
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.009645700454712,
      "learning_rate": 7.169564998574162e-06,
      "loss": 2.8377,
      "step": 214337
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9206700325012207,
      "learning_rate": 7.168676083864755e-06,
      "loss": 2.9307,
      "step": 214338
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.145817518234253,
      "learning_rate": 7.1677872235981985e-06,
      "loss": 2.9895,
      "step": 214339
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9168953895568848,
      "learning_rate": 7.166898417774658e-06,
      "loss": 3.0349,
      "step": 214340
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.855515480041504,
      "learning_rate": 7.166009666394368e-06,
      "loss": 2.8161,
      "step": 214341
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1562633514404297,
      "learning_rate": 7.165120969457394e-06,
      "loss": 2.7664,
      "step": 214342
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.865933656692505,
      "learning_rate": 7.16423232696397e-06,
      "loss": 2.7408,
      "step": 214343
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0049448013305664,
      "learning_rate": 7.1633437389142625e-06,
      "loss": 2.9896,
      "step": 214344
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.074430465698242,
      "learning_rate": 7.162455205308437e-06,
      "loss": 2.9012,
      "step": 214345
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8725790977478027,
      "learning_rate": 7.161566726146561e-06,
      "loss": 2.826,
      "step": 214346
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.319537401199341,
      "learning_rate": 7.160678301428935e-06,
      "loss": 3.0397,
      "step": 214347
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7276039123535156,
      "learning_rate": 7.159789931155657e-06,
      "loss": 2.8846,
      "step": 214348
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.4101712703704834,
      "learning_rate": 7.158901615326895e-06,
      "loss": 3.0632,
      "step": 214349
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9414515495300293,
      "learning_rate": 7.158013353942849e-06,
      "loss": 2.6278,
      "step": 214350
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7718324661254883,
      "learning_rate": 7.15712514700365e-06,
      "loss": 3.22,
      "step": 214351
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9103286266326904,
      "learning_rate": 7.156236994509435e-06,
      "loss": 3.0418,
      "step": 214352
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.761711597442627,
      "learning_rate": 7.1553488964604e-06,
      "loss": 2.8734,
      "step": 214353
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.864583730697632,
      "learning_rate": 7.154460852856713e-06,
      "loss": 3.2735,
      "step": 214354
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.106863498687744,
      "learning_rate": 7.153572863698576e-06,
      "loss": 3.0151,
      "step": 214355
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.339901924133301,
      "learning_rate": 7.152684928986119e-06,
      "loss": 2.935,
      "step": 214356
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.080021381378174,
      "learning_rate": 7.15179704871951e-06,
      "loss": 2.6951,
      "step": 214357
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.964228391647339,
      "learning_rate": 7.150909222898849e-06,
      "loss": 3.0264,
      "step": 214358
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9156932830810547,
      "learning_rate": 7.150021451524435e-06,
      "loss": 3.0016,
      "step": 214359
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9508683681488037,
      "learning_rate": 7.1491337345963015e-06,
      "loss": 3.1287,
      "step": 214360
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6770801544189453,
      "learning_rate": 7.148246072114716e-06,
      "loss": 2.9009,
      "step": 214361
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1991186141967773,
      "learning_rate": 7.1473584640797774e-06,
      "loss": 2.6524,
      "step": 214362
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.936885118484497,
      "learning_rate": 7.1464709104916525e-06,
      "loss": 3.0173,
      "step": 214363
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.023564338684082,
      "learning_rate": 7.145583411350575e-06,
      "loss": 2.8646,
      "step": 214364
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8535115718841553,
      "learning_rate": 7.144695966656644e-06,
      "loss": 3.0372,
      "step": 214365
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.544168472290039,
      "learning_rate": 7.143808576409993e-06,
      "loss": 2.9798,
      "step": 214366
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.6575963497161865,
      "learning_rate": 7.1429212406108884e-06,
      "loss": 2.847,
      "step": 214367
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6702628135681152,
      "learning_rate": 7.142033959259464e-06,
      "loss": 2.7504,
      "step": 214368
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0761778354644775,
      "learning_rate": 7.141146732355818e-06,
      "loss": 2.9976,
      "step": 214369
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.547722816467285,
      "learning_rate": 7.1402595599001855e-06,
      "loss": 2.6992,
      "step": 214370
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.5428225994110107,
      "learning_rate": 7.139372441892699e-06,
      "loss": 3.1215,
      "step": 214371
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.6148841381073,
      "learning_rate": 7.138485378333492e-06,
      "loss": 2.8798,
      "step": 214372
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.86999773979187,
      "learning_rate": 7.13759836922283e-06,
      "loss": 2.88,
      "step": 214373
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.268643379211426,
      "learning_rate": 7.136711414560814e-06,
      "loss": 2.5812,
      "step": 214374
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.4097132682800293,
      "learning_rate": 7.135824514347544e-06,
      "loss": 3.0539,
      "step": 214375
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8698694705963135,
      "learning_rate": 7.1349376685833185e-06,
      "loss": 2.9155,
      "step": 214376
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.083817481994629,
      "learning_rate": 7.134050877268205e-06,
      "loss": 2.9591,
      "step": 214377
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7597460746765137,
      "learning_rate": 7.133164140402403e-06,
      "loss": 3.0939,
      "step": 214378
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7628591060638428,
      "learning_rate": 7.132277457986113e-06,
      "loss": 2.7306,
      "step": 214379
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.123288631439209,
      "learning_rate": 7.131390830019434e-06,
      "loss": 2.8459,
      "step": 214380
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.771059989929199,
      "learning_rate": 7.1305042565025336e-06,
      "loss": 3.0414,
      "step": 214381
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1616337299346924,
      "learning_rate": 7.129617737435644e-06,
      "loss": 3.0421,
      "step": 214382
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8282699584960938,
      "learning_rate": 7.128731272818833e-06,
      "loss": 3.0563,
      "step": 214383
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.776657819747925,
      "learning_rate": 7.127844862652365e-06,
      "loss": 3.2001,
      "step": 214384
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8940587043762207,
      "learning_rate": 7.126958506936309e-06,
      "loss": 2.7517,
      "step": 214385
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9446418285369873,
      "learning_rate": 7.126072205670963e-06,
      "loss": 2.9646,
      "step": 214386
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.497333526611328,
      "learning_rate": 7.1251859588563275e-06,
      "loss": 2.7103,
      "step": 214387
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9238598346710205,
      "learning_rate": 7.124299766492703e-06,
      "loss": 2.9477,
      "step": 214388
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.914421558380127,
      "learning_rate": 7.123413628580155e-06,
      "loss": 3.0554,
      "step": 214389
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.713435649871826,
      "learning_rate": 7.12252754511895e-06,
      "loss": 2.6908,
      "step": 214390
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.9774482250213623,
      "learning_rate": 7.121641516109122e-06,
      "loss": 2.8867,
      "step": 214391
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.5202977657318115,
      "learning_rate": 7.120755541550971e-06,
      "loss": 3.2998,
      "step": 214392
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7527377605438232,
      "learning_rate": 7.119869621444596e-06,
      "loss": 2.7466,
      "step": 214393
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.715172529220581,
      "learning_rate": 7.1189837557901305e-06,
      "loss": 3.0049,
      "step": 214394
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6503896713256836,
      "learning_rate": 7.1180979445877755e-06,
      "loss": 2.953,
      "step": 214395
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.078463554382324,
      "learning_rate": 7.117212187837729e-06,
      "loss": 2.8752,
      "step": 214396
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9135663509368896,
      "learning_rate": 7.116326485540091e-06,
      "loss": 3.0113,
      "step": 214397
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.914003610610962,
      "learning_rate": 7.115440837695064e-06,
      "loss": 2.9817,
      "step": 214398
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.192445755004883,
      "learning_rate": 7.114555244302844e-06,
      "loss": 2.8919,
      "step": 214399
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.4996182918548584,
      "learning_rate": 7.113669705363501e-06,
      "loss": 2.7802,
      "step": 214400
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.22938871383667,
      "learning_rate": 7.112784220877266e-06,
      "loss": 2.9673,
      "step": 214401
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3953349590301514,
      "learning_rate": 7.111898790844306e-06,
      "loss": 3.0202,
      "step": 214402
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0166728496551514,
      "learning_rate": 7.111013415264755e-06,
      "loss": 2.9605,
      "step": 214403
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.4416940212249756,
      "learning_rate": 7.110128094138845e-06,
      "loss": 2.8735,
      "step": 214404
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.824016571044922,
      "learning_rate": 7.109242827466643e-06,
      "loss": 3.0073,
      "step": 214405
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2943098545074463,
      "learning_rate": 7.108357615248384e-06,
      "loss": 2.7094,
      "step": 214406
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.897128582000732,
      "learning_rate": 7.1074724574841976e-06,
      "loss": 2.7111,
      "step": 214407
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.50655460357666,
      "learning_rate": 7.106587354174287e-06,
      "loss": 3.1034,
      "step": 214408
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.4519217014312744,
      "learning_rate": 7.10570230531875e-06,
      "loss": 2.7999,
      "step": 214409
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.8341455459594727,
      "learning_rate": 7.104817310917821e-06,
      "loss": 2.8329,
      "step": 214410
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.799638271331787,
      "learning_rate": 7.1039323709716325e-06,
      "loss": 2.8755,
      "step": 214411
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7314019203186035,
      "learning_rate": 7.103047485480351e-06,
      "loss": 2.8468,
      "step": 214412
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0990004539489746,
      "learning_rate": 7.10216265444411e-06,
      "loss": 3.0309,
      "step": 214413
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3066256046295166,
      "learning_rate": 7.101277877863143e-06,
      "loss": 3.0585,
      "step": 214414
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1164753437042236,
      "learning_rate": 7.100393155737549e-06,
      "loss": 2.8896,
      "step": 214415
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.096804141998291,
      "learning_rate": 7.099508488067529e-06,
      "loss": 2.6267,
      "step": 214416
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9858086109161377,
      "learning_rate": 7.098623874853282e-06,
      "loss": 2.7888,
      "step": 214417
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7759079933166504,
      "learning_rate": 7.097739316094875e-06,
      "loss": 2.9586,
      "step": 214418
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2227609157562256,
      "learning_rate": 7.09685481179254e-06,
      "loss": 2.711,
      "step": 214419
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3608510494232178,
      "learning_rate": 7.095970361946446e-06,
      "loss": 3.0093,
      "step": 214420
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.5207090377807617,
      "learning_rate": 7.095085966556724e-06,
      "loss": 3.0865,
      "step": 214421
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2202343940734863,
      "learning_rate": 7.094201625623575e-06,
      "loss": 2.982,
      "step": 214422
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.067485809326172,
      "learning_rate": 7.093317339147131e-06,
      "loss": 2.8273,
      "step": 214423
    },
    {
      "epoch": 2.79,
      "grad_norm": 5.091028213500977,
      "learning_rate": 7.09243310712756e-06,
      "loss": 2.9372,
      "step": 214424
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.565178394317627,
      "learning_rate": 7.0915489295650274e-06,
      "loss": 2.9742,
      "step": 214425
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9812188148498535,
      "learning_rate": 7.090664806459734e-06,
      "loss": 2.9166,
      "step": 214426
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.249810218811035,
      "learning_rate": 7.089780737811779e-06,
      "loss": 2.9398,
      "step": 214427
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1302425861358643,
      "learning_rate": 7.088896723621395e-06,
      "loss": 3.0907,
      "step": 214428
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8084144592285156,
      "learning_rate": 7.0880127638887175e-06,
      "loss": 2.7214,
      "step": 214429
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0210824012756348,
      "learning_rate": 7.0871288586138775e-06,
      "loss": 3.1375,
      "step": 214430
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0646755695343018,
      "learning_rate": 7.086245007797075e-06,
      "loss": 3.0982,
      "step": 214431
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6145644187927246,
      "learning_rate": 7.0853612114384784e-06,
      "loss": 3.0066,
      "step": 214432
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.809645891189575,
      "learning_rate": 7.084477469538219e-06,
      "loss": 3.0799,
      "step": 214433
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.7826573848724365,
      "learning_rate": 7.08359378209653e-06,
      "loss": 2.9635,
      "step": 214434
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.812742233276367,
      "learning_rate": 7.082710149113513e-06,
      "loss": 2.8796,
      "step": 214435
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3750157356262207,
      "learning_rate": 7.0818265705893665e-06,
      "loss": 3.1173,
      "step": 214436
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2681422233581543,
      "learning_rate": 7.0809430465241905e-06,
      "loss": 2.6722,
      "step": 214437
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9533708095550537,
      "learning_rate": 7.080059576918218e-06,
      "loss": 2.8042,
      "step": 214438
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2831852436065674,
      "learning_rate": 7.079176161771583e-06,
      "loss": 3.1864,
      "step": 214439
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.776860475540161,
      "learning_rate": 7.078292801084484e-06,
      "loss": 3.1471,
      "step": 214440
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.437412738800049,
      "learning_rate": 7.077409494857056e-06,
      "loss": 2.824,
      "step": 214441
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.5077714920043945,
      "learning_rate": 7.076526243089464e-06,
      "loss": 3.1262,
      "step": 214442
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3722076416015625,
      "learning_rate": 7.075643045781843e-06,
      "loss": 3.0177,
      "step": 214443
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6750869750976562,
      "learning_rate": 7.074759902934424e-06,
      "loss": 2.9893,
      "step": 214444
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8200016021728516,
      "learning_rate": 7.073876814547308e-06,
      "loss": 3.0198,
      "step": 214445
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7895498275756836,
      "learning_rate": 7.072993780620728e-06,
      "loss": 3.0191,
      "step": 214446
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.970743417739868,
      "learning_rate": 7.072110801154784e-06,
      "loss": 2.925,
      "step": 214447
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1497695446014404,
      "learning_rate": 7.071227876149644e-06,
      "loss": 2.8731,
      "step": 214448
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3325023651123047,
      "learning_rate": 7.0703450056055375e-06,
      "loss": 3.1408,
      "step": 214449
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.870340585708618,
      "learning_rate": 7.0694621895225674e-06,
      "loss": 2.9892,
      "step": 214450
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.000796318054199,
      "learning_rate": 7.0685794279009e-06,
      "loss": 2.8358,
      "step": 214451
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.941277265548706,
      "learning_rate": 7.067696720740735e-06,
      "loss": 2.7562,
      "step": 214452
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.30108380317688,
      "learning_rate": 7.066814068042204e-06,
      "loss": 2.8401,
      "step": 214453
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0953948497772217,
      "learning_rate": 7.065931469805441e-06,
      "loss": 2.8072,
      "step": 214454
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3845632076263428,
      "learning_rate": 7.065048926030714e-06,
      "loss": 2.8398,
      "step": 214455
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.769131898880005,
      "learning_rate": 7.064166436718088e-06,
      "loss": 2.7939,
      "step": 214456
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7137720584869385,
      "learning_rate": 7.063284001867764e-06,
      "loss": 2.8955,
      "step": 214457
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.643122434616089,
      "learning_rate": 7.062401621479907e-06,
      "loss": 3.1029,
      "step": 214458
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8304080963134766,
      "learning_rate": 7.061519295554685e-06,
      "loss": 2.8612,
      "step": 214459
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.747553586959839,
      "learning_rate": 7.060637024092264e-06,
      "loss": 2.837,
      "step": 214460
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.5670158863067627,
      "learning_rate": 7.05975480709281e-06,
      "loss": 3.1115,
      "step": 214461
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9399633407592773,
      "learning_rate": 7.058872644556423e-06,
      "loss": 2.8145,
      "step": 214462
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.618252754211426,
      "learning_rate": 7.057990536483371e-06,
      "loss": 3.1148,
      "step": 214463
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3823132514953613,
      "learning_rate": 7.0571084828737854e-06,
      "loss": 2.8876,
      "step": 214464
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.781140089035034,
      "learning_rate": 7.056226483727767e-06,
      "loss": 3.0732,
      "step": 214465
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9510648250579834,
      "learning_rate": 7.055344539045549e-06,
      "loss": 2.8504,
      "step": 214466
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9246864318847656,
      "learning_rate": 7.054462648827264e-06,
      "loss": 3.1416,
      "step": 214467
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7075753211975098,
      "learning_rate": 7.053580813073079e-06,
      "loss": 2.615,
      "step": 214468
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.063521385192871,
      "learning_rate": 7.05269903178316e-06,
      "loss": 2.8559,
      "step": 214469
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2534339427948,
      "learning_rate": 7.051817304957674e-06,
      "loss": 2.8223,
      "step": 214470
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.341724872589111,
      "learning_rate": 7.050935632596788e-06,
      "loss": 2.8497,
      "step": 214471
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8245232105255127,
      "learning_rate": 7.050054014700701e-06,
      "loss": 3.0855,
      "step": 214472
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9318978786468506,
      "learning_rate": 7.0491724512695135e-06,
      "loss": 2.7949,
      "step": 214473
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6801352500915527,
      "learning_rate": 7.048290942303391e-06,
      "loss": 2.9529,
      "step": 214474
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.885281562805176,
      "learning_rate": 7.047409487802536e-06,
      "loss": 3.0906,
      "step": 214475
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8967456817626953,
      "learning_rate": 7.046528087767078e-06,
      "loss": 2.8894,
      "step": 214476
    },
    {
      "epoch": 2.79,
      "grad_norm": 5.5382080078125,
      "learning_rate": 7.045646742197253e-06,
      "loss": 2.9536,
      "step": 214477
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.161396026611328,
      "learning_rate": 7.044765451093159e-06,
      "loss": 2.7724,
      "step": 214478
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.081566333770752,
      "learning_rate": 7.043884214454964e-06,
      "loss": 2.9178,
      "step": 214479
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.71315598487854,
      "learning_rate": 7.043003032282802e-06,
      "loss": 3.0769,
      "step": 214480
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.128268718719482,
      "learning_rate": 7.042121904576903e-06,
      "loss": 2.9849,
      "step": 214481
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8997695446014404,
      "learning_rate": 7.041240831337403e-06,
      "loss": 3.111,
      "step": 214482
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.845271348953247,
      "learning_rate": 7.040359812564467e-06,
      "loss": 2.952,
      "step": 214483
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.602935791015625,
      "learning_rate": 7.0394788482582955e-06,
      "loss": 2.9771,
      "step": 214484
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.4748342037200928,
      "learning_rate": 7.038597938418988e-06,
      "loss": 3.0425,
      "step": 214485
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3888144493103027,
      "learning_rate": 7.037717083046712e-06,
      "loss": 2.7371,
      "step": 214486
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.320127487182617,
      "learning_rate": 7.0368362821416665e-06,
      "loss": 3.053,
      "step": 214487
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.810009002685547,
      "learning_rate": 7.035955535703985e-06,
      "loss": 3.0023,
      "step": 214488
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.108599662780762,
      "learning_rate": 7.035074843733901e-06,
      "loss": 2.8242,
      "step": 214489
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.985140562057495,
      "learning_rate": 7.03419420623148e-06,
      "loss": 2.9421,
      "step": 214490
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.971813201904297,
      "learning_rate": 7.03331362319699e-06,
      "loss": 2.6869,
      "step": 214491
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.8508856296539307,
      "learning_rate": 7.032433094630463e-06,
      "loss": 2.9118,
      "step": 214492
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.004234790802002,
      "learning_rate": 7.031552620532166e-06,
      "loss": 2.7877,
      "step": 214493
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.72184157371521,
      "learning_rate": 7.030672200902232e-06,
      "loss": 2.9652,
      "step": 214494
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6511285305023193,
      "learning_rate": 7.029791835740861e-06,
      "loss": 3.035,
      "step": 214495
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7871170043945312,
      "learning_rate": 7.028911525048153e-06,
      "loss": 2.9228,
      "step": 214496
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8351261615753174,
      "learning_rate": 7.028031268824308e-06,
      "loss": 2.8921,
      "step": 214497
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.821296453475952,
      "learning_rate": 7.027151067069458e-06,
      "loss": 2.7207,
      "step": 214498
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.842258930206299,
      "learning_rate": 7.026270919783805e-06,
      "loss": 2.9648,
      "step": 214499
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.5971620082855225,
      "learning_rate": 7.025390826967481e-06,
      "loss": 2.9496,
      "step": 214500
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.61975359916687,
      "learning_rate": 7.0245107886207184e-06,
      "loss": 2.7567,
      "step": 214501
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.192840099334717,
      "learning_rate": 7.023630804743618e-06,
      "loss": 2.7849,
      "step": 214502
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.66473126411438,
      "learning_rate": 7.022750875336347e-06,
      "loss": 2.9409,
      "step": 214503
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0156242847442627,
      "learning_rate": 7.0218710003990375e-06,
      "loss": 2.9119,
      "step": 214504
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0078823566436768,
      "learning_rate": 7.020991179931923e-06,
      "loss": 2.5943,
      "step": 214505
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9778871536254883,
      "learning_rate": 7.020111413935137e-06,
      "loss": 2.7668,
      "step": 214506
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.956472158432007,
      "learning_rate": 7.0192317024088455e-06,
      "loss": 2.8062,
      "step": 214507
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.11952543258667,
      "learning_rate": 7.018352045353215e-06,
      "loss": 3.2925,
      "step": 214508
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2672231197357178,
      "learning_rate": 7.017472442768379e-06,
      "loss": 3.0247,
      "step": 214509
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.732599973678589,
      "learning_rate": 7.016592894654538e-06,
      "loss": 3.0322,
      "step": 214510
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0922183990478516,
      "learning_rate": 7.015713401011858e-06,
      "loss": 2.8078,
      "step": 214511
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.360914707183838,
      "learning_rate": 7.014833961840438e-06,
      "loss": 2.8651,
      "step": 214512
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3693220615386963,
      "learning_rate": 7.013954577140546e-06,
      "loss": 2.8695,
      "step": 214513
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.687304973602295,
      "learning_rate": 7.013075246912281e-06,
      "loss": 2.8022,
      "step": 214514
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0804283618927,
      "learning_rate": 7.0121959711558094e-06,
      "loss": 2.6375,
      "step": 214515
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9566471576690674,
      "learning_rate": 7.0113167498712985e-06,
      "loss": 2.8881,
      "step": 214516
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.083138465881348,
      "learning_rate": 7.010437583058914e-06,
      "loss": 3.1513,
      "step": 214517
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7057056427001953,
      "learning_rate": 7.0095584707187906e-06,
      "loss": 2.9748,
      "step": 214518
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.0832343101501465,
      "learning_rate": 7.0086794128511595e-06,
      "loss": 2.9442,
      "step": 214519
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.658669948577881,
      "learning_rate": 7.0078004094561545e-06,
      "loss": 2.9912,
      "step": 214520
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.943249225616455,
      "learning_rate": 7.0069214605339096e-06,
      "loss": 2.8331,
      "step": 214521
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.649912118911743,
      "learning_rate": 7.006042566084591e-06,
      "loss": 2.7964,
      "step": 214522
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.982689619064331,
      "learning_rate": 7.0051637261084314e-06,
      "loss": 2.9671,
      "step": 214523
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.890761137008667,
      "learning_rate": 7.004284940605464e-06,
      "loss": 3.0147,
      "step": 214524
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.174023151397705,
      "learning_rate": 7.0034062095759906e-06,
      "loss": 3.0581,
      "step": 214525
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3561131954193115,
      "learning_rate": 7.002527533020108e-06,
      "loss": 2.9471,
      "step": 214526
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2975902557373047,
      "learning_rate": 7.0016489109380185e-06,
      "loss": 3.0001,
      "step": 214527
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.959125280380249,
      "learning_rate": 7.000770343329787e-06,
      "loss": 2.8531,
      "step": 214528
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.083035945892334,
      "learning_rate": 6.999891830195681e-06,
      "loss": 3.1009,
      "step": 214529
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8782150745391846,
      "learning_rate": 6.9990133715358e-06,
      "loss": 2.9912,
      "step": 214530
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6654775142669678,
      "learning_rate": 6.998134967350344e-06,
      "loss": 2.8018,
      "step": 214531
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9505066871643066,
      "learning_rate": 6.997256617639446e-06,
      "loss": 3.0466,
      "step": 214532
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8100714683532715,
      "learning_rate": 6.996378322403373e-06,
      "loss": 2.8103,
      "step": 214533
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3335273265838623,
      "learning_rate": 6.995500081642125e-06,
      "loss": 2.8077,
      "step": 214534
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0153326988220215,
      "learning_rate": 6.9946218953559675e-06,
      "loss": 2.968,
      "step": 214535
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.136584997177124,
      "learning_rate": 6.993743763545001e-06,
      "loss": 2.7475,
      "step": 214536
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.516600608825684,
      "learning_rate": 6.992865686209492e-06,
      "loss": 2.9708,
      "step": 214537
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.918165445327759,
      "learning_rate": 6.991987663349474e-06,
      "loss": 3.1088,
      "step": 214538
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7827858924865723,
      "learning_rate": 6.991109694965247e-06,
      "loss": 3.1588,
      "step": 214539
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.946462631225586,
      "learning_rate": 6.990231781056876e-06,
      "loss": 2.881,
      "step": 214540
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0337419509887695,
      "learning_rate": 6.989353921624563e-06,
      "loss": 3.0626,
      "step": 214541
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.761359691619873,
      "learning_rate": 6.988476116668407e-06,
      "loss": 3.0033,
      "step": 214542
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0580968856811523,
      "learning_rate": 6.987598366188673e-06,
      "loss": 3.1988,
      "step": 214543
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.07961106300354,
      "learning_rate": 6.986720670185464e-06,
      "loss": 2.9949,
      "step": 214544
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.232954502105713,
      "learning_rate": 6.985843028658944e-06,
      "loss": 2.8366,
      "step": 214545
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.473200559616089,
      "learning_rate": 6.9849654416093135e-06,
      "loss": 2.7056,
      "step": 214546
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.830726385116577,
      "learning_rate": 6.984087909036673e-06,
      "loss": 3.0876,
      "step": 214547
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.058142185211182,
      "learning_rate": 6.9832104309412555e-06,
      "loss": 3.0067,
      "step": 214548
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.407104969024658,
      "learning_rate": 6.982333007323193e-06,
      "loss": 2.6503,
      "step": 214549
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8362388610839844,
      "learning_rate": 6.981455638182587e-06,
      "loss": 3.0468,
      "step": 214550
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.879756212234497,
      "learning_rate": 6.980578323519737e-06,
      "loss": 2.9804,
      "step": 214551
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.8480422496795654,
      "learning_rate": 6.979701063334708e-06,
      "loss": 2.9594,
      "step": 214552
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.826828718185425,
      "learning_rate": 6.978823857627636e-06,
      "loss": 2.8921,
      "step": 214553
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9356772899627686,
      "learning_rate": 6.977946706398785e-06,
      "loss": 3.0012,
      "step": 214554
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.778324604034424,
      "learning_rate": 6.977069609648256e-06,
      "loss": 2.7748,
      "step": 214555
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1896791458129883,
      "learning_rate": 6.976192567376182e-06,
      "loss": 3.0078,
      "step": 214556
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.7207589149475098,
      "learning_rate": 6.975315579582797e-06,
      "loss": 2.8965,
      "step": 214557
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8127243518829346,
      "learning_rate": 6.974438646268232e-06,
      "loss": 2.9484,
      "step": 214558
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.86628794670105,
      "learning_rate": 6.9735617674326224e-06,
      "loss": 2.7654,
      "step": 214559
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2607948780059814,
      "learning_rate": 6.972684943076201e-06,
      "loss": 2.958,
      "step": 214560
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6878108978271484,
      "learning_rate": 6.971808173199067e-06,
      "loss": 2.826,
      "step": 214561
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1912529468536377,
      "learning_rate": 6.970931457801421e-06,
      "loss": 2.9081,
      "step": 214562
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.720468044281006,
      "learning_rate": 6.9700547968833955e-06,
      "loss": 2.9448,
      "step": 214563
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.863718271255493,
      "learning_rate": 6.96917819044519e-06,
      "loss": 3.0032,
      "step": 214564
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.211541175842285,
      "learning_rate": 6.968301638486906e-06,
      "loss": 3.1857,
      "step": 214565
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8980491161346436,
      "learning_rate": 6.967425141008775e-06,
      "loss": 2.9267,
      "step": 214566
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6535556316375732,
      "learning_rate": 6.966548698010898e-06,
      "loss": 2.8972,
      "step": 214567
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7125046253204346,
      "learning_rate": 6.965672309493509e-06,
      "loss": 2.8608,
      "step": 214568
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7078051567077637,
      "learning_rate": 6.964795975456738e-06,
      "loss": 2.8831,
      "step": 214569
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.069157361984253,
      "learning_rate": 6.963919695900722e-06,
      "loss": 2.8789,
      "step": 214570
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.234078884124756,
      "learning_rate": 6.963043470825625e-06,
      "loss": 3.0182,
      "step": 214571
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.872986078262329,
      "learning_rate": 6.96216730023168e-06,
      "loss": 2.9413,
      "step": 214572
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.15865159034729,
      "learning_rate": 6.9612911841189556e-06,
      "loss": 3.2884,
      "step": 214573
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2530605792999268,
      "learning_rate": 6.960415122487683e-06,
      "loss": 3.0456,
      "step": 214574
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.8439459800720215,
      "learning_rate": 6.959539115337997e-06,
      "loss": 2.939,
      "step": 214575
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.004761219024658,
      "learning_rate": 6.958663162670064e-06,
      "loss": 3.0535,
      "step": 214576
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.6091315746307373,
      "learning_rate": 6.957787264484016e-06,
      "loss": 2.9267,
      "step": 214577
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8112850189208984,
      "learning_rate": 6.956911420780087e-06,
      "loss": 2.6412,
      "step": 214578
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8577449321746826,
      "learning_rate": 6.956035631558376e-06,
      "loss": 2.8739,
      "step": 214579
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.11246395111084,
      "learning_rate": 6.955159896819084e-06,
      "loss": 3.0084,
      "step": 214580
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7227165699005127,
      "learning_rate": 6.954284216562378e-06,
      "loss": 2.8142,
      "step": 214581
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3940205574035645,
      "learning_rate": 6.95340859078839e-06,
      "loss": 2.6123,
      "step": 214582
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8026673793792725,
      "learning_rate": 6.952533019497253e-06,
      "loss": 2.9344,
      "step": 214583
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.3358988761901855,
      "learning_rate": 6.951657502689234e-06,
      "loss": 2.8748,
      "step": 214584
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0407848358154297,
      "learning_rate": 6.950782040364367e-06,
      "loss": 2.7111,
      "step": 214585
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.603139877319336,
      "learning_rate": 6.949906632522917e-06,
      "loss": 2.8087,
      "step": 214586
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.468701124191284,
      "learning_rate": 6.9490312791650515e-06,
      "loss": 2.776,
      "step": 214587
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8982598781585693,
      "learning_rate": 6.948155980290837e-06,
      "loss": 2.778,
      "step": 214588
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.4192936420440674,
      "learning_rate": 6.947280735900507e-06,
      "loss": 2.8673,
      "step": 214589
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.7131762504577637,
      "learning_rate": 6.946405545994227e-06,
      "loss": 2.9674,
      "step": 214590
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2417750358581543,
      "learning_rate": 6.945530410572098e-06,
      "loss": 3.1042,
      "step": 214591
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2031641006469727,
      "learning_rate": 6.944655329634352e-06,
      "loss": 3.0199,
      "step": 214592
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9096782207489014,
      "learning_rate": 6.943780303181123e-06,
      "loss": 2.8584,
      "step": 214593
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.888154983520508,
      "learning_rate": 6.942905331212611e-06,
      "loss": 2.6625,
      "step": 214594
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.6618380546569824,
      "learning_rate": 6.942030413728917e-06,
      "loss": 3.0488,
      "step": 214595
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.5569007396698,
      "learning_rate": 6.941155550730238e-06,
      "loss": 2.8565,
      "step": 214596
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.098182439804077,
      "learning_rate": 6.940280742216708e-06,
      "loss": 2.7875,
      "step": 214597
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.07084321975708,
      "learning_rate": 6.9394059881885285e-06,
      "loss": 2.7614,
      "step": 214598
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7752039432525635,
      "learning_rate": 6.938531288645832e-06,
      "loss": 3.1204,
      "step": 214599
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.969160556793213,
      "learning_rate": 6.937656643588851e-06,
      "loss": 3.0374,
      "step": 214600
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6650750637054443,
      "learning_rate": 6.93678205301762e-06,
      "loss": 2.8602,
      "step": 214601
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0367376804351807,
      "learning_rate": 6.935907516932404e-06,
      "loss": 2.9615,
      "step": 214602
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1189558506011963,
      "learning_rate": 6.935033035333304e-06,
      "loss": 3.0631,
      "step": 214603
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.875969171524048,
      "learning_rate": 6.934158608220552e-06,
      "loss": 2.738,
      "step": 214604
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9974822998046875,
      "learning_rate": 6.933284235594249e-06,
      "loss": 2.8172,
      "step": 214605
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8719100952148438,
      "learning_rate": 6.932409917454629e-06,
      "loss": 2.8071,
      "step": 214606
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0801048278808594,
      "learning_rate": 6.931535653801757e-06,
      "loss": 2.936,
      "step": 214607
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.5227949619293213,
      "learning_rate": 6.930661444635866e-06,
      "loss": 3.1665,
      "step": 214608
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.9397830963134766,
      "learning_rate": 6.929787289957056e-06,
      "loss": 2.6472,
      "step": 214609
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.344919443130493,
      "learning_rate": 6.928913189765595e-06,
      "loss": 2.9503,
      "step": 214610
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9992353916168213,
      "learning_rate": 6.928039144061515e-06,
      "loss": 3.1686,
      "step": 214611
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.122443675994873,
      "learning_rate": 6.927165152845115e-06,
      "loss": 3.0478,
      "step": 214612
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8817005157470703,
      "learning_rate": 6.926291216116431e-06,
      "loss": 2.9556,
      "step": 214613
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.5406391620635986,
      "learning_rate": 6.925417333875727e-06,
      "loss": 3.0049,
      "step": 214614
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6822071075439453,
      "learning_rate": 6.92454350612307e-06,
      "loss": 2.9874,
      "step": 214615
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.196462392807007,
      "learning_rate": 6.923669732858694e-06,
      "loss": 2.8168,
      "step": 214616
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6893837451934814,
      "learning_rate": 6.9227960140827324e-06,
      "loss": 3.0422,
      "step": 214617
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.7776854038238525,
      "learning_rate": 6.921922349795417e-06,
      "loss": 3.032,
      "step": 214618
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.788801908493042,
      "learning_rate": 6.921048739996749e-06,
      "loss": 2.9497,
      "step": 214619
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.0469751358032227,
      "learning_rate": 6.92017518468706e-06,
      "loss": 2.8301,
      "step": 214620
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.211895704269409,
      "learning_rate": 6.919301683866418e-06,
      "loss": 2.7673,
      "step": 214621
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2417521476745605,
      "learning_rate": 6.918428237535023e-06,
      "loss": 2.8149,
      "step": 214622
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.152972221374512,
      "learning_rate": 6.917554845692974e-06,
      "loss": 3.0104,
      "step": 214623
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.4665331840515137,
      "learning_rate": 6.916681508340538e-06,
      "loss": 2.7803,
      "step": 214624
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.158597230911255,
      "learning_rate": 6.915808225477814e-06,
      "loss": 3.0693,
      "step": 214625
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.822068214416504,
      "learning_rate": 6.9149349971050036e-06,
      "loss": 3.0475,
      "step": 214626
    },
    {
      "epoch": 2.79,
      "grad_norm": 4.145296573638916,
      "learning_rate": 6.914061823222172e-06,
      "loss": 2.9375,
      "step": 214627
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.877492904663086,
      "learning_rate": 6.913188703829586e-06,
      "loss": 2.8608,
      "step": 214628
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.619941473007202,
      "learning_rate": 6.912315638927346e-06,
      "loss": 2.9773,
      "step": 214629
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.787954330444336,
      "learning_rate": 6.911442628515684e-06,
      "loss": 2.9704,
      "step": 214630
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8192856311798096,
      "learning_rate": 6.9105696725947015e-06,
      "loss": 2.913,
      "step": 214631
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.340096950531006,
      "learning_rate": 6.909696771164564e-06,
      "loss": 2.8436,
      "step": 214632
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.2269649505615234,
      "learning_rate": 6.908823924225438e-06,
      "loss": 2.9024,
      "step": 214633
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.1740667819976807,
      "learning_rate": 6.9079511317775225e-06,
      "loss": 2.8265,
      "step": 214634
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8670926094055176,
      "learning_rate": 6.90707839382092e-06,
      "loss": 2.7059,
      "step": 214635
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.943009853363037,
      "learning_rate": 6.906205710355861e-06,
      "loss": 3.0329,
      "step": 214636
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.5212996006011963,
      "learning_rate": 6.905333081382447e-06,
      "loss": 2.9362,
      "step": 214637
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.243530750274658,
      "learning_rate": 6.904460506900844e-06,
      "loss": 2.8566,
      "step": 214638
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8187150955200195,
      "learning_rate": 6.903587986911285e-06,
      "loss": 2.9945,
      "step": 214639
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.673339605331421,
      "learning_rate": 6.902715521413837e-06,
      "loss": 3.133,
      "step": 214640
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.416323661804199,
      "learning_rate": 6.901843110408734e-06,
      "loss": 3.021,
      "step": 214641
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6832616329193115,
      "learning_rate": 6.900970753896106e-06,
      "loss": 2.798,
      "step": 214642
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.5080676078796387,
      "learning_rate": 6.900098451876124e-06,
      "loss": 2.8052,
      "step": 214643
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.135529041290283,
      "learning_rate": 6.899226204348918e-06,
      "loss": 3.0699,
      "step": 214644
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6294920444488525,
      "learning_rate": 6.898354011314689e-06,
      "loss": 3.1456,
      "step": 214645
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.263517379760742,
      "learning_rate": 6.897481872773603e-06,
      "loss": 3.1017,
      "step": 214646
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.201838493347168,
      "learning_rate": 6.896609788725793e-06,
      "loss": 3.1667,
      "step": 214647
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.749218225479126,
      "learning_rate": 6.89573775917146e-06,
      "loss": 2.9459,
      "step": 214648
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6850292682647705,
      "learning_rate": 6.894865784110737e-06,
      "loss": 2.9154,
      "step": 214649
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.9450817108154297,
      "learning_rate": 6.893993863543756e-06,
      "loss": 2.8482,
      "step": 214650
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.752211809158325,
      "learning_rate": 6.893121997470752e-06,
      "loss": 2.957,
      "step": 214651
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.268282413482666,
      "learning_rate": 6.892250185891823e-06,
      "loss": 3.1482,
      "step": 214652
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.9090049266815186,
      "learning_rate": 6.891378428807171e-06,
      "loss": 2.9656,
      "step": 214653
    },
    {
      "epoch": 2.79,
      "grad_norm": 3.013648748397827,
      "learning_rate": 6.89050672621696e-06,
      "loss": 3.0713,
      "step": 214654
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.75950288772583,
      "learning_rate": 6.889635078121325e-06,
      "loss": 2.722,
      "step": 214655
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.8746414184570312,
      "learning_rate": 6.888763484520432e-06,
      "loss": 2.8752,
      "step": 214656
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.773423910140991,
      "learning_rate": 6.887891945414481e-06,
      "loss": 3.2239,
      "step": 214657
    },
    {
      "epoch": 2.79,
      "grad_norm": 2.6610233783721924,
      "learning_rate": 6.887020460803538e-06,
      "loss": 2.9052,
      "step": 214658
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.659522771835327,
      "learning_rate": 6.8861490306879045e-06,
      "loss": 2.8473,
      "step": 214659
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.015995025634766,
      "learning_rate": 6.885277655067612e-06,
      "loss": 2.9312,
      "step": 214660
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8501572608947754,
      "learning_rate": 6.884406333942927e-06,
      "loss": 2.8762,
      "step": 214661
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8882808685302734,
      "learning_rate": 6.883535067313917e-06,
      "loss": 2.9294,
      "step": 214662
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6990113258361816,
      "learning_rate": 6.8826638551808475e-06,
      "loss": 2.8952,
      "step": 214663
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.223100185394287,
      "learning_rate": 6.881792697543753e-06,
      "loss": 2.7336,
      "step": 214664
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.7353179454803467,
      "learning_rate": 6.8809215944029325e-06,
      "loss": 2.8372,
      "step": 214665
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1616568565368652,
      "learning_rate": 6.88005054575842e-06,
      "loss": 2.979,
      "step": 214666
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.7777559757232666,
      "learning_rate": 6.879179551610514e-06,
      "loss": 2.6652,
      "step": 214667
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.9396471977233887,
      "learning_rate": 6.878308611959249e-06,
      "loss": 2.802,
      "step": 214668
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1827104091644287,
      "learning_rate": 6.877437726804858e-06,
      "loss": 2.7487,
      "step": 214669
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8434298038482666,
      "learning_rate": 6.876566896147439e-06,
      "loss": 2.9265,
      "step": 214670
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.539957284927368,
      "learning_rate": 6.875696119987262e-06,
      "loss": 2.8297,
      "step": 214671
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9187161922454834,
      "learning_rate": 6.8748253983243906e-06,
      "loss": 2.9482,
      "step": 214672
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9290833473205566,
      "learning_rate": 6.873954731159059e-06,
      "loss": 2.9988,
      "step": 214673
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3112645149230957,
      "learning_rate": 6.873084118491335e-06,
      "loss": 3.0455,
      "step": 214674
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4981555938720703,
      "learning_rate": 6.872213560321482e-06,
      "loss": 2.9812,
      "step": 214675
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0641841888427734,
      "learning_rate": 6.871343056649603e-06,
      "loss": 2.8145,
      "step": 214676
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.457218885421753,
      "learning_rate": 6.870472607475863e-06,
      "loss": 2.8272,
      "step": 214677
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.933422565460205,
      "learning_rate": 6.8696022128004294e-06,
      "loss": 3.0398,
      "step": 214678
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.258001327514648,
      "learning_rate": 6.868731872623534e-06,
      "loss": 3.1213,
      "step": 214679
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.247753143310547,
      "learning_rate": 6.867861586945211e-06,
      "loss": 2.9448,
      "step": 214680
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.455832004547119,
      "learning_rate": 6.866991355765728e-06,
      "loss": 3.0125,
      "step": 214681
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.719886541366577,
      "learning_rate": 6.866121179085149e-06,
      "loss": 2.7738,
      "step": 214682
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.474879026412964,
      "learning_rate": 6.865251056903742e-06,
      "loss": 2.8687,
      "step": 214683
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.749912977218628,
      "learning_rate": 6.864380989221574e-06,
      "loss": 2.9453,
      "step": 214684
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.961458206176758,
      "learning_rate": 6.863510976038911e-06,
      "loss": 2.9082,
      "step": 214685
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.781872272491455,
      "learning_rate": 6.862641017355819e-06,
      "loss": 2.8934,
      "step": 214686
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.817399501800537,
      "learning_rate": 6.861771113172498e-06,
      "loss": 2.938,
      "step": 214687
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1517174243927,
      "learning_rate": 6.860901263489083e-06,
      "loss": 3.0752,
      "step": 214688
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.1929144859313965,
      "learning_rate": 6.860031468305804e-06,
      "loss": 2.9682,
      "step": 214689
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.819767475128174,
      "learning_rate": 6.85916172762273e-06,
      "loss": 2.8746,
      "step": 214690
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.7464118003845215,
      "learning_rate": 6.858292041440161e-06,
      "loss": 3.0569,
      "step": 214691
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.346515417098999,
      "learning_rate": 6.857422409758062e-06,
      "loss": 2.9745,
      "step": 214692
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.906346559524536,
      "learning_rate": 6.856552832576767e-06,
      "loss": 2.7917,
      "step": 214693
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.009124994277954,
      "learning_rate": 6.855683309896343e-06,
      "loss": 2.9549,
      "step": 214694
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.11545467376709,
      "learning_rate": 6.8548138417169885e-06,
      "loss": 2.9711,
      "step": 214695
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3633933067321777,
      "learning_rate": 6.853944428038838e-06,
      "loss": 2.9113,
      "step": 214696
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.576822280883789,
      "learning_rate": 6.853075068862124e-06,
      "loss": 2.7495,
      "step": 214697
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.044785022735596,
      "learning_rate": 6.8522057641869134e-06,
      "loss": 2.6568,
      "step": 214698
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.783590316772461,
      "learning_rate": 6.851336514013439e-06,
      "loss": 3.0,
      "step": 214699
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.261408567428589,
      "learning_rate": 6.850467318341802e-06,
      "loss": 2.7948,
      "step": 214700
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6473140716552734,
      "learning_rate": 6.849598177172233e-06,
      "loss": 2.9634,
      "step": 214701
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.303182363510132,
      "learning_rate": 6.848729090504801e-06,
      "loss": 3.0792,
      "step": 214702
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4196841716766357,
      "learning_rate": 6.847860058339805e-06,
      "loss": 2.8199,
      "step": 214703
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9614083766937256,
      "learning_rate": 6.846991080677278e-06,
      "loss": 2.7708,
      "step": 214704
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.972951650619507,
      "learning_rate": 6.846122157517453e-06,
      "loss": 2.8839,
      "step": 214705
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.980203866958618,
      "learning_rate": 6.845253288860431e-06,
      "loss": 2.6577,
      "step": 214706
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0288095474243164,
      "learning_rate": 6.8443844747064105e-06,
      "loss": 2.7922,
      "step": 214707
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.136870861053467,
      "learning_rate": 6.843515715055559e-06,
      "loss": 2.5867,
      "step": 214708
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.884227752685547,
      "learning_rate": 6.842647009908075e-06,
      "loss": 2.6739,
      "step": 214709
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.521061897277832,
      "learning_rate": 6.841778359264027e-06,
      "loss": 2.9509,
      "step": 214710
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.698875665664673,
      "learning_rate": 6.840909763123681e-06,
      "loss": 2.8724,
      "step": 214711
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.108177423477173,
      "learning_rate": 6.8400412214870694e-06,
      "loss": 2.8199,
      "step": 214712
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.701862096786499,
      "learning_rate": 6.839172734354492e-06,
      "loss": 3.1156,
      "step": 214713
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8233554363250732,
      "learning_rate": 6.838304301725983e-06,
      "loss": 2.957,
      "step": 214714
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.601968288421631,
      "learning_rate": 6.837435923601808e-06,
      "loss": 2.8544,
      "step": 214715
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6620795726776123,
      "learning_rate": 6.8365675999821015e-06,
      "loss": 2.8352,
      "step": 214716
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.9307103157043457,
      "learning_rate": 6.835699330866995e-06,
      "loss": 2.9417,
      "step": 214717
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.090564250946045,
      "learning_rate": 6.834831116256656e-06,
      "loss": 3.0469,
      "step": 214718
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4571921825408936,
      "learning_rate": 6.833962956151284e-06,
      "loss": 3.0939,
      "step": 214719
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1026854515075684,
      "learning_rate": 6.833094850550946e-06,
      "loss": 2.8714,
      "step": 214720
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9476373195648193,
      "learning_rate": 6.8322267994559425e-06,
      "loss": 2.9456,
      "step": 214721
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.8317222595214844,
      "learning_rate": 6.831358802866338e-06,
      "loss": 2.7827,
      "step": 214722
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0701398849487305,
      "learning_rate": 6.8304908607823005e-06,
      "loss": 2.6917,
      "step": 214723
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.915686845779419,
      "learning_rate": 6.82962297320403e-06,
      "loss": 2.9759,
      "step": 214724
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.107727289199829,
      "learning_rate": 6.828755140131659e-06,
      "loss": 3.1114,
      "step": 214725
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7128746509552,
      "learning_rate": 6.827887361565354e-06,
      "loss": 2.9519,
      "step": 214726
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.763883113861084,
      "learning_rate": 6.827019637505282e-06,
      "loss": 3.0813,
      "step": 214727
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.556440353393555,
      "learning_rate": 6.82615196795161e-06,
      "loss": 3.1569,
      "step": 214728
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8998072147369385,
      "learning_rate": 6.82528435290447e-06,
      "loss": 3.2148,
      "step": 214729
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1690659523010254,
      "learning_rate": 6.8244167923640626e-06,
      "loss": 2.8842,
      "step": 214730
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2953813076019287,
      "learning_rate": 6.823549286330487e-06,
      "loss": 2.7613,
      "step": 214731
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1057064533233643,
      "learning_rate": 6.822681834804011e-06,
      "loss": 2.8286,
      "step": 214732
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.933739423751831,
      "learning_rate": 6.821814437784701e-06,
      "loss": 3.0385,
      "step": 214733
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.129387617111206,
      "learning_rate": 6.820947095272755e-06,
      "loss": 3.0656,
      "step": 214734
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2322397232055664,
      "learning_rate": 6.820079807268308e-06,
      "loss": 2.7402,
      "step": 214735
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4421310424804688,
      "learning_rate": 6.819212573771593e-06,
      "loss": 2.6847,
      "step": 214736
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0567734241485596,
      "learning_rate": 6.818345394782676e-06,
      "loss": 3.1854,
      "step": 214737
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7746052742004395,
      "learning_rate": 6.817478270301757e-06,
      "loss": 2.6141,
      "step": 214738
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.511784076690674,
      "learning_rate": 6.816611200329036e-06,
      "loss": 2.7589,
      "step": 214739
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8634486198425293,
      "learning_rate": 6.815744184864646e-06,
      "loss": 3.0603,
      "step": 214740
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.742711305618286,
      "learning_rate": 6.814877223908721e-06,
      "loss": 3.0284,
      "step": 214741
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.6401541233062744,
      "learning_rate": 6.8140103174614605e-06,
      "loss": 2.9816,
      "step": 214742
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.129866123199463,
      "learning_rate": 6.813143465522964e-06,
      "loss": 2.7444,
      "step": 214743
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2698612213134766,
      "learning_rate": 6.812276668093497e-06,
      "loss": 2.7961,
      "step": 214744
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.941299915313721,
      "learning_rate": 6.811409925173095e-06,
      "loss": 2.832,
      "step": 214745
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.782867670059204,
      "learning_rate": 6.810543236762089e-06,
      "loss": 2.9823,
      "step": 214746
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.855583667755127,
      "learning_rate": 6.809676602860448e-06,
      "loss": 3.0063,
      "step": 214747
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1856417655944824,
      "learning_rate": 6.80881002346847e-06,
      "loss": 2.7866,
      "step": 214748
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6108219623565674,
      "learning_rate": 6.807943498586221e-06,
      "loss": 3.0409,
      "step": 214749
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.6329407691955566,
      "learning_rate": 6.8070770282139365e-06,
      "loss": 2.847,
      "step": 214750
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1055984497070312,
      "learning_rate": 6.806210612351748e-06,
      "loss": 3.0545,
      "step": 214751
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2622222900390625,
      "learning_rate": 6.8053442509998556e-06,
      "loss": 2.7846,
      "step": 214752
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.918083906173706,
      "learning_rate": 6.804477944158326e-06,
      "loss": 2.7783,
      "step": 214753
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4496757984161377,
      "learning_rate": 6.803611691827426e-06,
      "loss": 2.6184,
      "step": 214754
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4765357971191406,
      "learning_rate": 6.802745494007222e-06,
      "loss": 3.0624,
      "step": 214755
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.6516809463500977,
      "learning_rate": 6.8018793506979804e-06,
      "loss": 2.8991,
      "step": 214756
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.12522029876709,
      "learning_rate": 6.801013261899735e-06,
      "loss": 2.7784,
      "step": 214757
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8407464027404785,
      "learning_rate": 6.800147227612818e-06,
      "loss": 3.0324,
      "step": 214758
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.712428092956543,
      "learning_rate": 6.799281247837196e-06,
      "loss": 2.9961,
      "step": 214759
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.841466188430786,
      "learning_rate": 6.79841532257317e-06,
      "loss": 2.808,
      "step": 214760
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.418928384780884,
      "learning_rate": 6.797549451820805e-06,
      "loss": 2.8867,
      "step": 214761
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.191854953765869,
      "learning_rate": 6.796683635580336e-06,
      "loss": 3.1561,
      "step": 214762
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.811086654663086,
      "learning_rate": 6.795817873851894e-06,
      "loss": 2.9507,
      "step": 214763
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9766392707824707,
      "learning_rate": 6.794952166635681e-06,
      "loss": 3.106,
      "step": 214764
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6937294006347656,
      "learning_rate": 6.794086513931762e-06,
      "loss": 2.905,
      "step": 214765
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4946203231811523,
      "learning_rate": 6.793220915740405e-06,
      "loss": 2.86,
      "step": 214766
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9482109546661377,
      "learning_rate": 6.792355372061675e-06,
      "loss": 2.9167,
      "step": 214767
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7909724712371826,
      "learning_rate": 6.7914898828958395e-06,
      "loss": 3.0563,
      "step": 214768
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.244842052459717,
      "learning_rate": 6.790624448242932e-06,
      "loss": 2.8709,
      "step": 214769
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.245469570159912,
      "learning_rate": 6.789759068103285e-06,
      "loss": 2.9489,
      "step": 214770
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8430628776550293,
      "learning_rate": 6.788893742476864e-06,
      "loss": 2.8254,
      "step": 214771
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.924330472946167,
      "learning_rate": 6.7880284713639376e-06,
      "loss": 2.8783,
      "step": 214772
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.082672595977783,
      "learning_rate": 6.787163254764672e-06,
      "loss": 2.9578,
      "step": 214773
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.645033597946167,
      "learning_rate": 6.7862980926791985e-06,
      "loss": 2.8206,
      "step": 214774
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9423699378967285,
      "learning_rate": 6.785432985107686e-06,
      "loss": 2.9405,
      "step": 214775
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9833085536956787,
      "learning_rate": 6.784567932050334e-06,
      "loss": 2.7642,
      "step": 214776
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.9141476154327393,
      "learning_rate": 6.783702933507207e-06,
      "loss": 2.6844,
      "step": 214777
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.8068597316741943,
      "learning_rate": 6.78283798947854e-06,
      "loss": 2.8647,
      "step": 214778
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.902583122253418,
      "learning_rate": 6.781973099964466e-06,
      "loss": 2.8171,
      "step": 214779
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.741703510284424,
      "learning_rate": 6.781108264965218e-06,
      "loss": 3.0769,
      "step": 214780
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7057735919952393,
      "learning_rate": 6.78024348448083e-06,
      "loss": 3.1447,
      "step": 214781
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.26475191116333,
      "learning_rate": 6.7793787585116e-06,
      "loss": 3.0103,
      "step": 214782
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.780780553817749,
      "learning_rate": 6.778514087057563e-06,
      "loss": 2.8883,
      "step": 214783
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.420347690582275,
      "learning_rate": 6.777649470118951e-06,
      "loss": 2.8329,
      "step": 214784
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4983267784118652,
      "learning_rate": 6.776784907695864e-06,
      "loss": 2.9763,
      "step": 214785
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8059170246124268,
      "learning_rate": 6.7759203997885705e-06,
      "loss": 2.5319,
      "step": 214786
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.108046770095825,
      "learning_rate": 6.775055946397135e-06,
      "loss": 2.9177,
      "step": 214787
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0443665981292725,
      "learning_rate": 6.77419154752179e-06,
      "loss": 3.1145,
      "step": 214788
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7255470752716064,
      "learning_rate": 6.773327203162638e-06,
      "loss": 2.869,
      "step": 214789
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8859663009643555,
      "learning_rate": 6.772462913319843e-06,
      "loss": 2.9042,
      "step": 214790
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8105647563934326,
      "learning_rate": 6.771598677993573e-06,
      "loss": 3.025,
      "step": 214791
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0127806663513184,
      "learning_rate": 6.7707344971840276e-06,
      "loss": 3.0009,
      "step": 214792
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2144696712493896,
      "learning_rate": 6.769870370891306e-06,
      "loss": 3.0072,
      "step": 214793
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9435229301452637,
      "learning_rate": 6.769006299115642e-06,
      "loss": 2.9054,
      "step": 214794
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.7128496170043945,
      "learning_rate": 6.768142281857136e-06,
      "loss": 3.0781,
      "step": 214795
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.943289041519165,
      "learning_rate": 6.767278319115987e-06,
      "loss": 3.2247,
      "step": 214796
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4300050735473633,
      "learning_rate": 6.7664144108922955e-06,
      "loss": 3.0577,
      "step": 214797
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9927854537963867,
      "learning_rate": 6.765550557186294e-06,
      "loss": 3.0511,
      "step": 214798
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.840761423110962,
      "learning_rate": 6.764686757998083e-06,
      "loss": 2.75,
      "step": 214799
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.575441598892212,
      "learning_rate": 6.763823013327896e-06,
      "loss": 2.8842,
      "step": 214800
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7997424602508545,
      "learning_rate": 6.7629593231758316e-06,
      "loss": 2.9883,
      "step": 214801
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.624037981033325,
      "learning_rate": 6.762095687542057e-06,
      "loss": 2.7672,
      "step": 214802
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.403519868850708,
      "learning_rate": 6.761232106426739e-06,
      "loss": 3.0279,
      "step": 214803
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2680790424346924,
      "learning_rate": 6.760368579830044e-06,
      "loss": 2.8534,
      "step": 214804
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.097271680831909,
      "learning_rate": 6.759505107752139e-06,
      "loss": 2.9681,
      "step": 214805
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.805294990539551,
      "learning_rate": 6.75864169019319e-06,
      "loss": 2.9544,
      "step": 214806
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.262641191482544,
      "learning_rate": 6.75777832715333e-06,
      "loss": 2.7761,
      "step": 214807
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.0545454025268555,
      "learning_rate": 6.756915018632725e-06,
      "loss": 3.0108,
      "step": 214808
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.880326271057129,
      "learning_rate": 6.756051764631576e-06,
      "loss": 2.6907,
      "step": 214809
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7962522506713867,
      "learning_rate": 6.7551885651499825e-06,
      "loss": 2.9092,
      "step": 214810
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7243905067443848,
      "learning_rate": 6.754325420188145e-06,
      "loss": 2.8558,
      "step": 214811
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0972676277160645,
      "learning_rate": 6.753462329746229e-06,
      "loss": 2.7479,
      "step": 214812
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.228827714920044,
      "learning_rate": 6.7525992938243674e-06,
      "loss": 3.1416,
      "step": 214813
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.577852249145508,
      "learning_rate": 6.751736312422695e-06,
      "loss": 3.2544,
      "step": 214814
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3349497318267822,
      "learning_rate": 6.750873385541478e-06,
      "loss": 2.8576,
      "step": 214815
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1869070529937744,
      "learning_rate": 6.7500105131807815e-06,
      "loss": 2.9825,
      "step": 214816
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6892154216766357,
      "learning_rate": 6.7491476953407734e-06,
      "loss": 2.8856,
      "step": 214817
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.81966233253479,
      "learning_rate": 6.748284932021653e-06,
      "loss": 2.8647,
      "step": 214818
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.5246005058288574,
      "learning_rate": 6.747422223223586e-06,
      "loss": 2.9542,
      "step": 214819
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.893069267272949,
      "learning_rate": 6.746559568946641e-06,
      "loss": 2.9073,
      "step": 214820
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8203253746032715,
      "learning_rate": 6.745696969191117e-06,
      "loss": 2.6706,
      "step": 214821
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.014004945755005,
      "learning_rate": 6.744834423957046e-06,
      "loss": 2.5494,
      "step": 214822
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.007477045059204,
      "learning_rate": 6.743971933244696e-06,
      "loss": 2.7882,
      "step": 214823
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.388258457183838,
      "learning_rate": 6.743109497054167e-06,
      "loss": 2.673,
      "step": 214824
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7475051879882812,
      "learning_rate": 6.742247115385624e-06,
      "loss": 2.9801,
      "step": 214825
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3116180896759033,
      "learning_rate": 6.741384788239201e-06,
      "loss": 2.7796,
      "step": 214826
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.7222111225128174,
      "learning_rate": 6.740522515615132e-06,
      "loss": 2.8639,
      "step": 214827
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.13034725189209,
      "learning_rate": 6.739660297513483e-06,
      "loss": 2.9406,
      "step": 214828
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8463187217712402,
      "learning_rate": 6.73879813393452e-06,
      "loss": 2.9391,
      "step": 214829
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2025792598724365,
      "learning_rate": 6.73793602487831e-06,
      "loss": 2.9892,
      "step": 214830
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9681787490844727,
      "learning_rate": 6.737073970345119e-06,
      "loss": 3.1121,
      "step": 214831
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.7727506160736084,
      "learning_rate": 6.736211970334981e-06,
      "loss": 3.0062,
      "step": 214832
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1331026554107666,
      "learning_rate": 6.735350024848163e-06,
      "loss": 2.7671,
      "step": 214833
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8929667472839355,
      "learning_rate": 6.73448813388473e-06,
      "loss": 2.8691,
      "step": 214834
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.5222008228302,
      "learning_rate": 6.733626297444916e-06,
      "loss": 2.7945,
      "step": 214835
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7320899963378906,
      "learning_rate": 6.732764515528822e-06,
      "loss": 3.1146,
      "step": 214836
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2642152309417725,
      "learning_rate": 6.731902788136711e-06,
      "loss": 2.8481,
      "step": 214837
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.141226053237915,
      "learning_rate": 6.73104111526862e-06,
      "loss": 3.073,
      "step": 214838
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.686885118484497,
      "learning_rate": 6.730179496924815e-06,
      "loss": 2.984,
      "step": 214839
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7395682334899902,
      "learning_rate": 6.7293179331053606e-06,
      "loss": 2.8698,
      "step": 214840
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.844698429107666,
      "learning_rate": 6.728456423810458e-06,
      "loss": 2.946,
      "step": 214841
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2123773097991943,
      "learning_rate": 6.727594969040273e-06,
      "loss": 3.0447,
      "step": 214842
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.739309072494507,
      "learning_rate": 6.72673356879504e-06,
      "loss": 2.8626,
      "step": 214843
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7795441150665283,
      "learning_rate": 6.725872223074757e-06,
      "loss": 2.9543,
      "step": 214844
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.123460531234741,
      "learning_rate": 6.725010931879693e-06,
      "loss": 2.9015,
      "step": 214845
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.613792896270752,
      "learning_rate": 6.7241496952099796e-06,
      "loss": 2.8941,
      "step": 214846
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.5075185298919678,
      "learning_rate": 6.723288513065817e-06,
      "loss": 2.9426,
      "step": 214847
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.514209747314453,
      "learning_rate": 6.722427385447271e-06,
      "loss": 3.062,
      "step": 214848
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.770784854888916,
      "learning_rate": 6.7215663123546425e-06,
      "loss": 2.8077,
      "step": 214849
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.700507640838623,
      "learning_rate": 6.7207052937879316e-06,
      "loss": 3.1489,
      "step": 214850
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.5279548168182373,
      "learning_rate": 6.719844329747437e-06,
      "loss": 2.8502,
      "step": 214851
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.885164260864258,
      "learning_rate": 6.718983420233192e-06,
      "loss": 2.884,
      "step": 214852
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.5928807258605957,
      "learning_rate": 6.718122565245499e-06,
      "loss": 2.7448,
      "step": 214853
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0979623794555664,
      "learning_rate": 6.717261764784387e-06,
      "loss": 3.0794,
      "step": 214854
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.086642265319824,
      "learning_rate": 6.716401018850126e-06,
      "loss": 2.8911,
      "step": 214855
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.4395272731781006,
      "learning_rate": 6.715540327442781e-06,
      "loss": 2.7609,
      "step": 214856
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.050086498260498,
      "learning_rate": 6.714679690562586e-06,
      "loss": 2.8746,
      "step": 214857
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6549534797668457,
      "learning_rate": 6.713819108209606e-06,
      "loss": 3.0148,
      "step": 214858
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6986501216888428,
      "learning_rate": 6.712958580384109e-06,
      "loss": 2.9315,
      "step": 214859
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9329452514648438,
      "learning_rate": 6.712098107086195e-06,
      "loss": 3.1081,
      "step": 214860
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2158098220825195,
      "learning_rate": 6.7112376883160624e-06,
      "loss": 3.1117,
      "step": 214861
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.693053722381592,
      "learning_rate": 6.710377324073846e-06,
      "loss": 2.8294,
      "step": 214862
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.657170057296753,
      "learning_rate": 6.709517014359678e-06,
      "loss": 2.8302,
      "step": 214863
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6224169731140137,
      "learning_rate": 6.708656759173758e-06,
      "loss": 2.8134,
      "step": 214864
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9243338108062744,
      "learning_rate": 6.707796558516254e-06,
      "loss": 2.8846,
      "step": 214865
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0410914421081543,
      "learning_rate": 6.706936412387265e-06,
      "loss": 2.9685,
      "step": 214866
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.517169237136841,
      "learning_rate": 6.706076320787024e-06,
      "loss": 2.8776,
      "step": 214867
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.8065829277038574,
      "learning_rate": 6.7052162837156646e-06,
      "loss": 2.8686,
      "step": 214868
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1543216705322266,
      "learning_rate": 6.704356301173352e-06,
      "loss": 2.7543,
      "step": 214869
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.021472930908203,
      "learning_rate": 6.703496373160189e-06,
      "loss": 2.8988,
      "step": 214870
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4678308963775635,
      "learning_rate": 6.702636499676406e-06,
      "loss": 2.7431,
      "step": 214871
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.980382204055786,
      "learning_rate": 6.701776680722137e-06,
      "loss": 2.9759,
      "step": 214872
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.792358875274658,
      "learning_rate": 6.70091691629755e-06,
      "loss": 2.9129,
      "step": 214873
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.385923385620117,
      "learning_rate": 6.7000572064028095e-06,
      "loss": 2.8964,
      "step": 214874
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6024231910705566,
      "learning_rate": 6.699197551038082e-06,
      "loss": 2.6732,
      "step": 214875
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.036027431488037,
      "learning_rate": 6.698337950203436e-06,
      "loss": 2.9294,
      "step": 214876
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.233412265777588,
      "learning_rate": 6.69747840389917e-06,
      "loss": 2.9833,
      "step": 214877
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2735769748687744,
      "learning_rate": 6.696618912125351e-06,
      "loss": 2.9912,
      "step": 214878
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.160879135131836,
      "learning_rate": 6.6957594748821785e-06,
      "loss": 2.8691,
      "step": 214879
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.861528158187866,
      "learning_rate": 6.694900092169819e-06,
      "loss": 2.8214,
      "step": 214880
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.8441321849823,
      "learning_rate": 6.6940407639884065e-06,
      "loss": 3.0733,
      "step": 214881
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.020808219909668,
      "learning_rate": 6.693181490338073e-06,
      "loss": 2.7435,
      "step": 214882
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8133597373962402,
      "learning_rate": 6.692322271219052e-06,
      "loss": 3.0695,
      "step": 214883
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8582632541656494,
      "learning_rate": 6.691463106631411e-06,
      "loss": 3.0736,
      "step": 214884
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6504952907562256,
      "learning_rate": 6.690603996575416e-06,
      "loss": 3.1205,
      "step": 214885
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0972306728363037,
      "learning_rate": 6.6897449410511675e-06,
      "loss": 2.8904,
      "step": 214886
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2885923385620117,
      "learning_rate": 6.6888859400588305e-06,
      "loss": 3.0319,
      "step": 214887
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9863228797912598,
      "learning_rate": 6.688026993598572e-06,
      "loss": 3.0394,
      "step": 214888
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.920952558517456,
      "learning_rate": 6.687168101670526e-06,
      "loss": 2.9586,
      "step": 214889
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.953869581222534,
      "learning_rate": 6.686309264274858e-06,
      "loss": 2.9826,
      "step": 214890
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1883366107940674,
      "learning_rate": 6.685450481411769e-06,
      "loss": 2.9082,
      "step": 214891
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.991396903991699,
      "learning_rate": 6.684591753081426e-06,
      "loss": 2.7835,
      "step": 214892
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.168939590454102,
      "learning_rate": 6.683733079283893e-06,
      "loss": 2.7928,
      "step": 214893
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.800015449523926,
      "learning_rate": 6.682874460019405e-06,
      "loss": 2.8882,
      "step": 214894
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9450323581695557,
      "learning_rate": 6.682015895288129e-06,
      "loss": 2.9889,
      "step": 214895
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.067138671875,
      "learning_rate": 6.681157385090164e-06,
      "loss": 2.9231,
      "step": 214896
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.237443685531616,
      "learning_rate": 6.6802989294257425e-06,
      "loss": 2.7206,
      "step": 214897
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3467323780059814,
      "learning_rate": 6.679440528294999e-06,
      "loss": 2.7104,
      "step": 214898
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.3208343982696533,
      "learning_rate": 6.6785821816980665e-06,
      "loss": 2.9266,
      "step": 214899
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9953110218048096,
      "learning_rate": 6.677723889635111e-06,
      "loss": 3.0118,
      "step": 214900
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.911797046661377,
      "learning_rate": 6.676865652106334e-06,
      "loss": 2.9216,
      "step": 214901
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.981109142303467,
      "learning_rate": 6.676007469111833e-06,
      "loss": 3.1854,
      "step": 214902
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8055248260498047,
      "learning_rate": 6.6751493406518085e-06,
      "loss": 2.8889,
      "step": 214903
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8182740211486816,
      "learning_rate": 6.674291266726428e-06,
      "loss": 2.8941,
      "step": 214904
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0582056045532227,
      "learning_rate": 6.673433247335824e-06,
      "loss": 2.709,
      "step": 214905
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.4627022743225098,
      "learning_rate": 6.672575282480164e-06,
      "loss": 2.7742,
      "step": 214906
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9175543785095215,
      "learning_rate": 6.671717372159579e-06,
      "loss": 3.068,
      "step": 214907
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.339338779449463,
      "learning_rate": 6.670859516374305e-06,
      "loss": 2.9563,
      "step": 214908
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.737386465072632,
      "learning_rate": 6.67000171512444e-06,
      "loss": 3.0305,
      "step": 214909
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6961283683776855,
      "learning_rate": 6.669143968410185e-06,
      "loss": 2.828,
      "step": 214910
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.903629779815674,
      "learning_rate": 6.668286276231605e-06,
      "loss": 3.1766,
      "step": 214911
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.968895196914673,
      "learning_rate": 6.667428638589001e-06,
      "loss": 3.028,
      "step": 214912
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.654136896133423,
      "learning_rate": 6.666571055482406e-06,
      "loss": 2.9317,
      "step": 214913
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.6907737255096436,
      "learning_rate": 6.6657135269120534e-06,
      "loss": 2.819,
      "step": 214914
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3898990154266357,
      "learning_rate": 6.6648560528780426e-06,
      "loss": 2.8206,
      "step": 214915
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3421387672424316,
      "learning_rate": 6.663998633380674e-06,
      "loss": 2.9564,
      "step": 214916
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2881178855895996,
      "learning_rate": 6.663141268419914e-06,
      "loss": 2.8066,
      "step": 214917
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.240711212158203,
      "learning_rate": 6.6622839579960615e-06,
      "loss": 3.0065,
      "step": 214918
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.898268461227417,
      "learning_rate": 6.661426702109185e-06,
      "loss": 2.8313,
      "step": 214919
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7332401275634766,
      "learning_rate": 6.660569500759516e-06,
      "loss": 2.8257,
      "step": 214920
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.763904571533203,
      "learning_rate": 6.659712353947155e-06,
      "loss": 2.8582,
      "step": 214921
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.773343563079834,
      "learning_rate": 6.658855261672336e-06,
      "loss": 2.6807,
      "step": 214922
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.479677677154541,
      "learning_rate": 6.657998223935124e-06,
      "loss": 2.8509,
      "step": 214923
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8077492713928223,
      "learning_rate": 6.657141240735786e-06,
      "loss": 3.1073,
      "step": 214924
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7642369270324707,
      "learning_rate": 6.656284312074356e-06,
      "loss": 3.0943,
      "step": 214925
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8951430320739746,
      "learning_rate": 6.6554274379510996e-06,
      "loss": 3.0603,
      "step": 214926
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.803764820098877,
      "learning_rate": 6.654570618366117e-06,
      "loss": 2.5864,
      "step": 214927
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2119028568267822,
      "learning_rate": 6.653713853319609e-06,
      "loss": 2.7224,
      "step": 214928
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.057539463043213,
      "learning_rate": 6.6528571428117075e-06,
      "loss": 2.8603,
      "step": 214929
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3239309787750244,
      "learning_rate": 6.6520004868425795e-06,
      "loss": 2.9026,
      "step": 214930
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.605437994003296,
      "learning_rate": 6.651143885412358e-06,
      "loss": 2.9078,
      "step": 214931
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.931239128112793,
      "learning_rate": 6.650287338521276e-06,
      "loss": 2.7752,
      "step": 214932
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0858688354492188,
      "learning_rate": 6.649430846169368e-06,
      "loss": 2.7575,
      "step": 214933
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0611443519592285,
      "learning_rate": 6.6485744083569326e-06,
      "loss": 3.123,
      "step": 214934
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6250438690185547,
      "learning_rate": 6.64771802508407e-06,
      "loss": 2.9659,
      "step": 214935
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0342674255371094,
      "learning_rate": 6.646861696350914e-06,
      "loss": 2.8439,
      "step": 214936
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.4579648971557617,
      "learning_rate": 6.646005422157596e-06,
      "loss": 3.1809,
      "step": 214937
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.939148187637329,
      "learning_rate": 6.645149202504385e-06,
      "loss": 2.7819,
      "step": 214938
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3922176361083984,
      "learning_rate": 6.644293037391346e-06,
      "loss": 2.7017,
      "step": 214939
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7349693775177,
      "learning_rate": 6.6434369268186795e-06,
      "loss": 2.9826,
      "step": 214940
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4385359287261963,
      "learning_rate": 6.642580870786551e-06,
      "loss": 2.9029,
      "step": 214941
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1989879608154297,
      "learning_rate": 6.641724869295095e-06,
      "loss": 3.0168,
      "step": 214942
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.098829746246338,
      "learning_rate": 6.640868922344478e-06,
      "loss": 3.0194,
      "step": 214943
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8882555961608887,
      "learning_rate": 6.640013029934865e-06,
      "loss": 2.8341,
      "step": 214944
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7724616527557373,
      "learning_rate": 6.639157192066391e-06,
      "loss": 2.8654,
      "step": 214945
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.956702470779419,
      "learning_rate": 6.638301408739255e-06,
      "loss": 2.949,
      "step": 214946
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.270568370819092,
      "learning_rate": 6.63744567995359e-06,
      "loss": 3.1725,
      "step": 214947
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8072550296783447,
      "learning_rate": 6.636590005709597e-06,
      "loss": 2.8482,
      "step": 214948
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9884915351867676,
      "learning_rate": 6.635734386007341e-06,
      "loss": 3.0265,
      "step": 214949
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.778838634490967,
      "learning_rate": 6.634878820847056e-06,
      "loss": 3.028,
      "step": 214950
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0097615718841553,
      "learning_rate": 6.6340233102288755e-06,
      "loss": 2.7883,
      "step": 214951
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.321122407913208,
      "learning_rate": 6.633167854152999e-06,
      "loss": 3.0022,
      "step": 214952
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3160369396209717,
      "learning_rate": 6.6323124526195596e-06,
      "loss": 2.9928,
      "step": 214953
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.237527847290039,
      "learning_rate": 6.63145710562869e-06,
      "loss": 2.9403,
      "step": 214954
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.035203218460083,
      "learning_rate": 6.630601813180558e-06,
      "loss": 2.8916,
      "step": 214955
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.325079917907715,
      "learning_rate": 6.629746575275363e-06,
      "loss": 2.7373,
      "step": 214956
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.5837626457214355,
      "learning_rate": 6.628891391913205e-06,
      "loss": 2.9571,
      "step": 214957
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.352598190307617,
      "learning_rate": 6.628036263094316e-06,
      "loss": 3.0453,
      "step": 214958
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.947831869125366,
      "learning_rate": 6.627181188818798e-06,
      "loss": 2.9505,
      "step": 214959
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8968029022216797,
      "learning_rate": 6.626326169086815e-06,
      "loss": 2.7671,
      "step": 214960
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7599549293518066,
      "learning_rate": 6.625471203898502e-06,
      "loss": 2.9572,
      "step": 214961
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.837094306945801,
      "learning_rate": 6.624616293254093e-06,
      "loss": 2.8638,
      "step": 214962
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9912476539611816,
      "learning_rate": 6.6237614371536854e-06,
      "loss": 2.9161,
      "step": 214963
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.836697816848755,
      "learning_rate": 6.62290663559748e-06,
      "loss": 3.0663,
      "step": 214964
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.5539844036102295,
      "learning_rate": 6.622051888585611e-06,
      "loss": 3.0794,
      "step": 214965
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1956727504730225,
      "learning_rate": 6.621197196118244e-06,
      "loss": 2.9887,
      "step": 214966
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.5897843837738037,
      "learning_rate": 6.6203425581955125e-06,
      "loss": 2.8943,
      "step": 214967
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3255553245544434,
      "learning_rate": 6.619487974817617e-06,
      "loss": 3.0158,
      "step": 214968
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.208543300628662,
      "learning_rate": 6.618633445984689e-06,
      "loss": 3.2445,
      "step": 214969
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8943231105804443,
      "learning_rate": 6.617778971696897e-06,
      "loss": 2.9267,
      "step": 214970
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1359543800354004,
      "learning_rate": 6.616924551954405e-06,
      "loss": 3.102,
      "step": 214971
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4943597316741943,
      "learning_rate": 6.616070186757383e-06,
      "loss": 2.9585,
      "step": 214972
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8555045127868652,
      "learning_rate": 6.6152158761058946e-06,
      "loss": 2.7354,
      "step": 214973
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0997109413146973,
      "learning_rate": 6.614361620000241e-06,
      "loss": 2.7862,
      "step": 214974
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.139389991760254,
      "learning_rate": 6.613507418440489e-06,
      "loss": 2.7122,
      "step": 214975
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8719184398651123,
      "learning_rate": 6.6126532714268375e-06,
      "loss": 3.1868,
      "step": 214976
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.499518871307373,
      "learning_rate": 6.611799178959454e-06,
      "loss": 3.1682,
      "step": 214977
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.7941648960113525,
      "learning_rate": 6.61094514103847e-06,
      "loss": 2.7628,
      "step": 214978
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.536444664001465,
      "learning_rate": 6.610091157663988e-06,
      "loss": 2.8607,
      "step": 214979
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9132418632507324,
      "learning_rate": 6.609237228836306e-06,
      "loss": 3.0665,
      "step": 214980
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2101967334747314,
      "learning_rate": 6.608383354555425e-06,
      "loss": 2.9482,
      "step": 214981
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.851804733276367,
      "learning_rate": 6.607529534821643e-06,
      "loss": 3.0647,
      "step": 214982
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.343533992767334,
      "learning_rate": 6.6066757696350616e-06,
      "loss": 2.7426,
      "step": 214983
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.263010263442993,
      "learning_rate": 6.605822058995813e-06,
      "loss": 2.908,
      "step": 214984
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6587727069854736,
      "learning_rate": 6.604968402904098e-06,
      "loss": 3.0027,
      "step": 214985
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.098181962966919,
      "learning_rate": 6.60411480136005e-06,
      "loss": 3.0514,
      "step": 214986
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.6024560928344727,
      "learning_rate": 6.603261254363801e-06,
      "loss": 2.7209,
      "step": 214987
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0596699714660645,
      "learning_rate": 6.602407761915584e-06,
      "loss": 2.8631,
      "step": 214988
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1116111278533936,
      "learning_rate": 6.601554324015534e-06,
      "loss": 3.0041,
      "step": 214989
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.8385162353515625,
      "learning_rate": 6.60070094066375e-06,
      "loss": 2.8479,
      "step": 214990
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8390121459960938,
      "learning_rate": 6.599847611860465e-06,
      "loss": 2.8903,
      "step": 214991
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.5527262687683105,
      "learning_rate": 6.5989943376057784e-06,
      "loss": 2.8077,
      "step": 214992
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9351096153259277,
      "learning_rate": 6.598141117899891e-06,
      "loss": 3.1318,
      "step": 214993
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.696061372756958,
      "learning_rate": 6.597287952742969e-06,
      "loss": 3.01,
      "step": 214994
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.579559803009033,
      "learning_rate": 6.596434842135112e-06,
      "loss": 3.014,
      "step": 214995
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9925177097320557,
      "learning_rate": 6.595581786076554e-06,
      "loss": 2.8253,
      "step": 214996
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6892480850219727,
      "learning_rate": 6.5947287845673934e-06,
      "loss": 3.0605,
      "step": 214997
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.855567693710327,
      "learning_rate": 6.593875837607799e-06,
      "loss": 3.0019,
      "step": 214998
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8200900554656982,
      "learning_rate": 6.593022945197968e-06,
      "loss": 2.9353,
      "step": 214999
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9277896881103516,
      "learning_rate": 6.592170107338035e-06,
      "loss": 2.7761,
      "step": 215000
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8030405044555664,
      "learning_rate": 6.591317324028134e-06,
      "loss": 2.8247,
      "step": 215001
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.679102659225464,
      "learning_rate": 6.590464595268463e-06,
      "loss": 2.8219,
      "step": 215002
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.861727237701416,
      "learning_rate": 6.589611921059157e-06,
      "loss": 2.8465,
      "step": 215003
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.657719850540161,
      "learning_rate": 6.588759301400348e-06,
      "loss": 3.2013,
      "step": 215004
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.813418388366699,
      "learning_rate": 6.58790673629227e-06,
      "loss": 2.7472,
      "step": 215005
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.604340553283691,
      "learning_rate": 6.5870542257350224e-06,
      "loss": 2.9864,
      "step": 215006
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.03718900680542,
      "learning_rate": 6.586201769728805e-06,
      "loss": 3.0694,
      "step": 215007
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.820712089538574,
      "learning_rate": 6.585349368273718e-06,
      "loss": 2.6658,
      "step": 215008
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.092726469039917,
      "learning_rate": 6.5844970213699945e-06,
      "loss": 2.8393,
      "step": 215009
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2469401359558105,
      "learning_rate": 6.583644729017701e-06,
      "loss": 2.8995,
      "step": 215010
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.79512619972229,
      "learning_rate": 6.582792491217071e-06,
      "loss": 2.9981,
      "step": 215011
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.834491729736328,
      "learning_rate": 6.581940307968203e-06,
      "loss": 3.017,
      "step": 215012
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.62038516998291,
      "learning_rate": 6.581088179271332e-06,
      "loss": 2.9683,
      "step": 215013
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.571977138519287,
      "learning_rate": 6.580236105126591e-06,
      "loss": 2.7591,
      "step": 215014
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8199455738067627,
      "learning_rate": 6.579384085534112e-06,
      "loss": 2.9901,
      "step": 215015
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.6610186100006104,
      "learning_rate": 6.578532120494029e-06,
      "loss": 3.0164,
      "step": 215016
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6420774459838867,
      "learning_rate": 6.577680210006575e-06,
      "loss": 3.0331,
      "step": 215017
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3259570598602295,
      "learning_rate": 6.576828354071817e-06,
      "loss": 3.0782,
      "step": 215018
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.394348621368408,
      "learning_rate": 6.575976552690021e-06,
      "loss": 2.8954,
      "step": 215019
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3265397548675537,
      "learning_rate": 6.575124805861287e-06,
      "loss": 2.8156,
      "step": 215020
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.781118154525757,
      "learning_rate": 6.574273113585749e-06,
      "loss": 2.9531,
      "step": 215021
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3675973415374756,
      "learning_rate": 6.573421475863605e-06,
      "loss": 2.9984,
      "step": 215022
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8857128620147705,
      "learning_rate": 6.57256989269499e-06,
      "loss": 2.8913,
      "step": 215023
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.244701385498047,
      "learning_rate": 6.571718364080069e-06,
      "loss": 3.0586,
      "step": 215024
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.410031795501709,
      "learning_rate": 6.570866890019011e-06,
      "loss": 3.1092,
      "step": 215025
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6599338054656982,
      "learning_rate": 6.57001547051198e-06,
      "loss": 3.1991,
      "step": 215026
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.260613441467285,
      "learning_rate": 6.569164105559144e-06,
      "loss": 2.7215,
      "step": 215027
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.5623600482940674,
      "learning_rate": 6.568312795160569e-06,
      "loss": 2.733,
      "step": 215028
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7342264652252197,
      "learning_rate": 6.5674615393165546e-06,
      "loss": 3.1479,
      "step": 215029
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4756112098693848,
      "learning_rate": 6.566610338027134e-06,
      "loss": 3.1209,
      "step": 215030
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4132649898529053,
      "learning_rate": 6.565759191292541e-06,
      "loss": 2.8958,
      "step": 215031
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.01741886138916,
      "learning_rate": 6.564908099112942e-06,
      "loss": 2.9522,
      "step": 215032
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4686601161956787,
      "learning_rate": 6.564057061488437e-06,
      "loss": 3.2018,
      "step": 215033
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.151740789413452,
      "learning_rate": 6.563206078419192e-06,
      "loss": 2.9324,
      "step": 215034
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3526175022125244,
      "learning_rate": 6.562355149905441e-06,
      "loss": 2.8288,
      "step": 215035
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1346349716186523,
      "learning_rate": 6.561504275947216e-06,
      "loss": 3.0156,
      "step": 215036
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.909769296646118,
      "learning_rate": 6.560653456544818e-06,
      "loss": 2.767,
      "step": 215037
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.322082757949829,
      "learning_rate": 6.5598026916982795e-06,
      "loss": 2.7693,
      "step": 215038
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.908958911895752,
      "learning_rate": 6.558951981407867e-06,
      "loss": 3.1201,
      "step": 215039
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2321419715881348,
      "learning_rate": 6.558101325673614e-06,
      "loss": 2.9514,
      "step": 215040
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.006071090698242,
      "learning_rate": 6.557250724495822e-06,
      "loss": 3.0083,
      "step": 215041
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.368912935256958,
      "learning_rate": 6.556400177874521e-06,
      "loss": 2.7903,
      "step": 215042
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9896795749664307,
      "learning_rate": 6.555549685809946e-06,
      "loss": 2.865,
      "step": 215043
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0322506427764893,
      "learning_rate": 6.554699248302231e-06,
      "loss": 2.9072,
      "step": 215044
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.542889595031738,
      "learning_rate": 6.553848865351541e-06,
      "loss": 2.686,
      "step": 215045
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.9126715660095215,
      "learning_rate": 6.55299853695801e-06,
      "loss": 2.8915,
      "step": 215046
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.624063491821289,
      "learning_rate": 6.552148263121837e-06,
      "loss": 2.9714,
      "step": 215047
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2981324195861816,
      "learning_rate": 6.551298043843156e-06,
      "loss": 2.8828,
      "step": 215048
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0073673725128174,
      "learning_rate": 6.550447879122133e-06,
      "loss": 2.9693,
      "step": 215049
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7481484413146973,
      "learning_rate": 6.5495977689589365e-06,
      "loss": 3.081,
      "step": 215050
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9584085941314697,
      "learning_rate": 6.548747713353664e-06,
      "loss": 2.9689,
      "step": 215051
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.1493659019470215,
      "learning_rate": 6.547897712306549e-06,
      "loss": 2.584,
      "step": 215052
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7232766151428223,
      "learning_rate": 6.547047765817725e-06,
      "loss": 2.9111,
      "step": 215053
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.996037483215332,
      "learning_rate": 6.546197873887294e-06,
      "loss": 2.7967,
      "step": 215054
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.7269692420959473,
      "learning_rate": 6.545348036515519e-06,
      "loss": 2.9324,
      "step": 215055
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.13346004486084,
      "learning_rate": 6.544498253702501e-06,
      "loss": 2.8533,
      "step": 215056
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9561874866485596,
      "learning_rate": 6.543648525448375e-06,
      "loss": 2.7114,
      "step": 215057
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.013881206512451,
      "learning_rate": 6.542798851753306e-06,
      "loss": 2.8361,
      "step": 215058
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.012673854827881,
      "learning_rate": 6.5419492326175275e-06,
      "loss": 3.1045,
      "step": 215059
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1711277961730957,
      "learning_rate": 6.541099668041072e-06,
      "loss": 2.7113,
      "step": 215060
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.834172010421753,
      "learning_rate": 6.540250158024241e-06,
      "loss": 3.1411,
      "step": 215061
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3001770973205566,
      "learning_rate": 6.5394007025670325e-06,
      "loss": 2.9565,
      "step": 215062
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2234814167022705,
      "learning_rate": 6.538551301669781e-06,
      "loss": 2.812,
      "step": 215063
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9668595790863037,
      "learning_rate": 6.537701955332486e-06,
      "loss": 2.8523,
      "step": 215064
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4476406574249268,
      "learning_rate": 6.536852663555414e-06,
      "loss": 2.939,
      "step": 215065
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1069302558898926,
      "learning_rate": 6.536003426338632e-06,
      "loss": 3.1014,
      "step": 215066
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2987220287323,
      "learning_rate": 6.535154243682406e-06,
      "loss": 3.1653,
      "step": 215067
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.861771583557129,
      "learning_rate": 6.534305115586769e-06,
      "loss": 3.0008,
      "step": 215068
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.077420711517334,
      "learning_rate": 6.533456042051987e-06,
      "loss": 2.8787,
      "step": 215069
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.386082649230957,
      "learning_rate": 6.532607023078162e-06,
      "loss": 2.8718,
      "step": 215070
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1374058723449707,
      "learning_rate": 6.531758058665493e-06,
      "loss": 2.6109,
      "step": 215071
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.368405342102051,
      "learning_rate": 6.5309091488140784e-06,
      "loss": 2.9206,
      "step": 215072
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.707058906555176,
      "learning_rate": 6.53006029352412e-06,
      "loss": 3.0389,
      "step": 215073
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3541831970214844,
      "learning_rate": 6.529211492795716e-06,
      "loss": 2.7921,
      "step": 215074
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.095895290374756,
      "learning_rate": 6.528362746629135e-06,
      "loss": 3.0152,
      "step": 215075
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.5224063396453857,
      "learning_rate": 6.527514055024475e-06,
      "loss": 2.8096,
      "step": 215076
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.992180109024048,
      "learning_rate": 6.526665417981836e-06,
      "loss": 2.965,
      "step": 215077
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.784914493560791,
      "learning_rate": 6.525816835501452e-06,
      "loss": 2.8657,
      "step": 215078
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.070695400238037,
      "learning_rate": 6.524968307583489e-06,
      "loss": 2.9394,
      "step": 215079
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.868189573287964,
      "learning_rate": 6.524119834228014e-06,
      "loss": 2.9261,
      "step": 215080
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9318296909332275,
      "learning_rate": 6.523271415435294e-06,
      "loss": 2.7413,
      "step": 215081
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.586381673812866,
      "learning_rate": 6.522423051205428e-06,
      "loss": 2.8008,
      "step": 215082
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.970674514770508,
      "learning_rate": 6.521574741538582e-06,
      "loss": 2.7414,
      "step": 215083
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.472537040710449,
      "learning_rate": 6.520726486434924e-06,
      "loss": 2.9266,
      "step": 215084
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.359623670578003,
      "learning_rate": 6.519878285894586e-06,
      "loss": 2.8944,
      "step": 215085
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.903872489929199,
      "learning_rate": 6.5190301399177356e-06,
      "loss": 2.9467,
      "step": 215086
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3765459060668945,
      "learning_rate": 6.518182048504572e-06,
      "loss": 2.9797,
      "step": 215087
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.823148727416992,
      "learning_rate": 6.517334011655195e-06,
      "loss": 2.9365,
      "step": 215088
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.080021858215332,
      "learning_rate": 6.516486029369772e-06,
      "loss": 2.9821,
      "step": 215089
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.46799373626709,
      "learning_rate": 6.515638101648502e-06,
      "loss": 3.0473,
      "step": 215090
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.142559051513672,
      "learning_rate": 6.5147902284914845e-06,
      "loss": 2.8898,
      "step": 215091
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.757441997528076,
      "learning_rate": 6.51394240989892e-06,
      "loss": 2.9881,
      "step": 215092
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2819435596466064,
      "learning_rate": 6.5130946458709755e-06,
      "loss": 2.9177,
      "step": 215093
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.526094913482666,
      "learning_rate": 6.51224693640775e-06,
      "loss": 3.0188,
      "step": 215094
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1771271228790283,
      "learning_rate": 6.511399281509444e-06,
      "loss": 2.5539,
      "step": 215095
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6120128631591797,
      "learning_rate": 6.510551681176257e-06,
      "loss": 2.9486,
      "step": 215096
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0657784938812256,
      "learning_rate": 6.509704135408222e-06,
      "loss": 3.0926,
      "step": 215097
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.939591407775879,
      "learning_rate": 6.50885664420564e-06,
      "loss": 2.939,
      "step": 215098
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.800084590911865,
      "learning_rate": 6.508009207568576e-06,
      "loss": 3.0391,
      "step": 215099
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.027423143386841,
      "learning_rate": 6.507161825497231e-06,
      "loss": 3.0299,
      "step": 215100
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.021411418914795,
      "learning_rate": 6.506314497991705e-06,
      "loss": 2.9208,
      "step": 215101
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.824777841567993,
      "learning_rate": 6.5054672250522296e-06,
      "loss": 3.0064,
      "step": 215102
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.458261489868164,
      "learning_rate": 6.5046200066788735e-06,
      "loss": 2.6814,
      "step": 215103
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.675975799560547,
      "learning_rate": 6.503772842871902e-06,
      "loss": 2.8592,
      "step": 215104
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.978576898574829,
      "learning_rate": 6.502925733631414e-06,
      "loss": 2.8529,
      "step": 215105
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2538344860076904,
      "learning_rate": 6.502078678957578e-06,
      "loss": 3.153,
      "step": 215106
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.624110221862793,
      "learning_rate": 6.5012316788505266e-06,
      "loss": 2.8923,
      "step": 215107
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.9903013706207275,
      "learning_rate": 6.50038473331046e-06,
      "loss": 2.9972,
      "step": 215108
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7861223220825195,
      "learning_rate": 6.499537842337477e-06,
      "loss": 2.9433,
      "step": 215109
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7765235900878906,
      "learning_rate": 6.498691005931777e-06,
      "loss": 2.9613,
      "step": 215110
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1663906574249268,
      "learning_rate": 6.497844224093563e-06,
      "loss": 3.0525,
      "step": 215111
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8149306774139404,
      "learning_rate": 6.496997496822898e-06,
      "loss": 3.1719,
      "step": 215112
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.027686357498169,
      "learning_rate": 6.49615082411995e-06,
      "loss": 3.1707,
      "step": 215113
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.7579681873321533,
      "learning_rate": 6.495304205984952e-06,
      "loss": 2.9905,
      "step": 215114
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.134282350540161,
      "learning_rate": 6.494457642418005e-06,
      "loss": 2.985,
      "step": 215115
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.6728591918945312,
      "learning_rate": 6.493611133419274e-06,
      "loss": 2.7906,
      "step": 215116
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1922309398651123,
      "learning_rate": 6.49276467898896e-06,
      "loss": 3.1397,
      "step": 215117
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.918764352798462,
      "learning_rate": 6.491918279127128e-06,
      "loss": 2.8892,
      "step": 215118
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1200144290924072,
      "learning_rate": 6.491071933834013e-06,
      "loss": 3.1644,
      "step": 215119
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.193796157836914,
      "learning_rate": 6.490225643109748e-06,
      "loss": 2.9958,
      "step": 215120
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.6959891319274902,
      "learning_rate": 6.489379406954465e-06,
      "loss": 3.1385,
      "step": 215121
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8080453872680664,
      "learning_rate": 6.488533225368364e-06,
      "loss": 3.1387,
      "step": 215122
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3425235748291016,
      "learning_rate": 6.48768709835158e-06,
      "loss": 3.0487,
      "step": 215123
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.180938959121704,
      "learning_rate": 6.4868410259043436e-06,
      "loss": 2.9839,
      "step": 215124
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.967771291732788,
      "learning_rate": 6.485995008026657e-06,
      "loss": 2.9552,
      "step": 215125
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.796783685684204,
      "learning_rate": 6.485149044718785e-06,
      "loss": 3.008,
      "step": 215126
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9538609981536865,
      "learning_rate": 6.4843031359808616e-06,
      "loss": 2.6679,
      "step": 215127
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.284999132156372,
      "learning_rate": 6.483457281813054e-06,
      "loss": 2.8006,
      "step": 215128
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2402288913726807,
      "learning_rate": 6.482611482215527e-06,
      "loss": 2.8067,
      "step": 215129
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7983148097991943,
      "learning_rate": 6.481765737188449e-06,
      "loss": 2.8117,
      "step": 215130
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.961508274078369,
      "learning_rate": 6.480920046731886e-06,
      "loss": 2.7841,
      "step": 215131
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9580135345458984,
      "learning_rate": 6.480074410846103e-06,
      "loss": 2.8982,
      "step": 215132
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.9953670501708984,
      "learning_rate": 6.479228829531169e-06,
      "loss": 2.7861,
      "step": 215133
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.813833713531494,
      "learning_rate": 6.478383302787316e-06,
      "loss": 2.9792,
      "step": 215134
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2473011016845703,
      "learning_rate": 6.477537830614676e-06,
      "loss": 2.7328,
      "step": 215135
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8499679565429688,
      "learning_rate": 6.4766924130134175e-06,
      "loss": 2.8496,
      "step": 215136
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.480189800262451,
      "learning_rate": 6.4758470499836735e-06,
      "loss": 2.9247,
      "step": 215137
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7539544105529785,
      "learning_rate": 6.4750017415256095e-06,
      "loss": 2.7509,
      "step": 215138
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.088993787765503,
      "learning_rate": 6.474156487639359e-06,
      "loss": 2.9161,
      "step": 215139
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.985698699951172,
      "learning_rate": 6.473311288325122e-06,
      "loss": 3.1014,
      "step": 215140
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.980874538421631,
      "learning_rate": 6.472466143583033e-06,
      "loss": 2.9068,
      "step": 215141
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.527862310409546,
      "learning_rate": 6.4716210534132894e-06,
      "loss": 2.9016,
      "step": 215142
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7909505367279053,
      "learning_rate": 6.470776017815959e-06,
      "loss": 3.0415,
      "step": 215143
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7957582473754883,
      "learning_rate": 6.469931036791276e-06,
      "loss": 2.8805,
      "step": 215144
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.330869674682617,
      "learning_rate": 6.469086110339339e-06,
      "loss": 3.1389,
      "step": 215145
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.922797679901123,
      "learning_rate": 6.468241238460381e-06,
      "loss": 2.9537,
      "step": 215146
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8494906425476074,
      "learning_rate": 6.467396421154503e-06,
      "loss": 2.8552,
      "step": 215147
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.379833698272705,
      "learning_rate": 6.4665516584219035e-06,
      "loss": 2.9079,
      "step": 215148
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.675631046295166,
      "learning_rate": 6.465706950262683e-06,
      "loss": 3.0432,
      "step": 215149
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.708942413330078,
      "learning_rate": 6.464862296677042e-06,
      "loss": 2.8102,
      "step": 215150
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1481642723083496,
      "learning_rate": 6.4640176976651136e-06,
      "loss": 2.9229,
      "step": 215151
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4679946899414062,
      "learning_rate": 6.463173153227064e-06,
      "loss": 2.7804,
      "step": 215152
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1844751834869385,
      "learning_rate": 6.462328663363059e-06,
      "loss": 3.0847,
      "step": 215153
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.238990306854248,
      "learning_rate": 6.461484228073233e-06,
      "loss": 2.8882,
      "step": 215154
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.235738754272461,
      "learning_rate": 6.460639847357785e-06,
      "loss": 2.7704,
      "step": 215155
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8988945484161377,
      "learning_rate": 6.459795521216849e-06,
      "loss": 2.837,
      "step": 215156
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2303626537323,
      "learning_rate": 6.458951249650557e-06,
      "loss": 2.9043,
      "step": 215157
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8141651153564453,
      "learning_rate": 6.4581070326590765e-06,
      "loss": 3.0171,
      "step": 215158
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.011162281036377,
      "learning_rate": 6.457262870242574e-06,
      "loss": 2.6882,
      "step": 215159
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.501204013824463,
      "learning_rate": 6.4564187624012495e-06,
      "loss": 2.9255,
      "step": 215160
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.982808828353882,
      "learning_rate": 6.455574709135203e-06,
      "loss": 2.6892,
      "step": 215161
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6369080543518066,
      "learning_rate": 6.4547307104446e-06,
      "loss": 3.0717,
      "step": 215162
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9765524864196777,
      "learning_rate": 6.453886766329574e-06,
      "loss": 2.8378,
      "step": 215163
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.9665286540985107,
      "learning_rate": 6.453042876790327e-06,
      "loss": 3.0204,
      "step": 215164
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2704319953918457,
      "learning_rate": 6.452199041826989e-06,
      "loss": 2.9289,
      "step": 215165
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.119842290878296,
      "learning_rate": 6.451355261439761e-06,
      "loss": 2.9083,
      "step": 215166
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.436338424682617,
      "learning_rate": 6.4505115356287775e-06,
      "loss": 3.0589,
      "step": 215167
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.6957080364227295,
      "learning_rate": 6.449667864394137e-06,
      "loss": 2.8907,
      "step": 215168
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.778541088104248,
      "learning_rate": 6.448824247736073e-06,
      "loss": 2.7934,
      "step": 215169
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.270040273666382,
      "learning_rate": 6.447980685654719e-06,
      "loss": 2.9935,
      "step": 215170
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8197622299194336,
      "learning_rate": 6.447137178150208e-06,
      "loss": 2.9212,
      "step": 215171
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9684853553771973,
      "learning_rate": 6.44629372522274e-06,
      "loss": 2.6986,
      "step": 215172
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.289006471633911,
      "learning_rate": 6.445450326872448e-06,
      "loss": 2.8995,
      "step": 215173
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.603593587875366,
      "learning_rate": 6.444606983099432e-06,
      "loss": 2.7149,
      "step": 215174
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9720659255981445,
      "learning_rate": 6.443763693903958e-06,
      "loss": 3.0145,
      "step": 215175
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1071841716766357,
      "learning_rate": 6.442920459286094e-06,
      "loss": 3.0447,
      "step": 215176
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1654484272003174,
      "learning_rate": 6.4420772792460715e-06,
      "loss": 3.1196,
      "step": 215177
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.787294864654541,
      "learning_rate": 6.441234153783992e-06,
      "loss": 2.9003,
      "step": 215178
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9366557598114014,
      "learning_rate": 6.440391082900054e-06,
      "loss": 3.1585,
      "step": 215179
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7421107292175293,
      "learning_rate": 6.4395480665943575e-06,
      "loss": 2.9047,
      "step": 215180
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9190337657928467,
      "learning_rate": 6.438705104867103e-06,
      "loss": 2.91,
      "step": 215181
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.397761344909668,
      "learning_rate": 6.4378621977183905e-06,
      "loss": 2.9965,
      "step": 215182
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7162160873413086,
      "learning_rate": 6.437019345148486e-06,
      "loss": 3.1171,
      "step": 215183
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.5999460220336914,
      "learning_rate": 6.436176547157457e-06,
      "loss": 2.9029,
      "step": 215184
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.163699150085449,
      "learning_rate": 6.4353338037454686e-06,
      "loss": 2.8466,
      "step": 215185
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.122974395751953,
      "learning_rate": 6.434491114912688e-06,
      "loss": 2.8442,
      "step": 215186
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.836263418197632,
      "learning_rate": 6.4336484806593146e-06,
      "loss": 3.0396,
      "step": 215187
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8338589668273926,
      "learning_rate": 6.432805900985416e-06,
      "loss": 2.8683,
      "step": 215188
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.816157341003418,
      "learning_rate": 6.4319633758912585e-06,
      "loss": 2.7097,
      "step": 215189
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8085780143737793,
      "learning_rate": 6.431120905376874e-06,
      "loss": 2.943,
      "step": 215190
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6883952617645264,
      "learning_rate": 6.430278489442564e-06,
      "loss": 3.07,
      "step": 215191
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.264910936355591,
      "learning_rate": 6.429436128088361e-06,
      "loss": 2.5829,
      "step": 215192
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9048032760620117,
      "learning_rate": 6.428593821314498e-06,
      "loss": 2.8635,
      "step": 215193
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8920836448669434,
      "learning_rate": 6.427751569121042e-06,
      "loss": 2.6825,
      "step": 215194
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.5294864177703857,
      "learning_rate": 6.42690937150826e-06,
      "loss": 2.6953,
      "step": 215195
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.027357816696167,
      "learning_rate": 6.4260672284762175e-06,
      "loss": 2.7812,
      "step": 215196
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3354804515838623,
      "learning_rate": 6.425225140025181e-06,
      "loss": 2.82,
      "step": 215197
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.379390239715576,
      "learning_rate": 6.424383106155151e-06,
      "loss": 2.9544,
      "step": 215198
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0320169925689697,
      "learning_rate": 6.423541126866427e-06,
      "loss": 3.0213,
      "step": 215199
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7688732147216797,
      "learning_rate": 6.422699202159076e-06,
      "loss": 2.796,
      "step": 215200
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.378812551498413,
      "learning_rate": 6.421857332033298e-06,
      "loss": 3.0479,
      "step": 215201
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.7482473850250244,
      "learning_rate": 6.421015516489225e-06,
      "loss": 2.838,
      "step": 215202
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9795405864715576,
      "learning_rate": 6.420173755527091e-06,
      "loss": 3.0135,
      "step": 215203
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4496264457702637,
      "learning_rate": 6.41933204914693e-06,
      "loss": 3.0338,
      "step": 215204
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.848487615585327,
      "learning_rate": 6.4184903973489744e-06,
      "loss": 2.7216,
      "step": 215205
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6334805488586426,
      "learning_rate": 6.417648800133324e-06,
      "loss": 2.9677,
      "step": 215206
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.308712959289551,
      "learning_rate": 6.416807257500212e-06,
      "loss": 3.0533,
      "step": 215207
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0536768436431885,
      "learning_rate": 6.415965769449738e-06,
      "loss": 2.9184,
      "step": 215208
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.314561367034912,
      "learning_rate": 6.415124335982136e-06,
      "loss": 2.9773,
      "step": 215209
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.6517083644866943,
      "learning_rate": 6.414282957097439e-06,
      "loss": 2.8804,
      "step": 215210
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.206512212753296,
      "learning_rate": 6.4134416327959125e-06,
      "loss": 3.0165,
      "step": 215211
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.450913190841675,
      "learning_rate": 6.412600363077625e-06,
      "loss": 2.8468,
      "step": 215212
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.10264253616333,
      "learning_rate": 6.411759147942807e-06,
      "loss": 3.0667,
      "step": 215213
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.9160983562469482,
      "learning_rate": 6.410917987391561e-06,
      "loss": 3.1304,
      "step": 215214
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2324705123901367,
      "learning_rate": 6.41007688142412e-06,
      "loss": 2.9431,
      "step": 215215
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.540348529815674,
      "learning_rate": 6.409235830040515e-06,
      "loss": 2.9727,
      "step": 215216
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.250375986099243,
      "learning_rate": 6.408394833241015e-06,
      "loss": 2.7666,
      "step": 215217
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0577597618103027,
      "learning_rate": 6.4075538910257185e-06,
      "loss": 2.9795,
      "step": 215218
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.075835943222046,
      "learning_rate": 6.406713003394826e-06,
      "loss": 2.8346,
      "step": 215219
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.955937147140503,
      "learning_rate": 6.405872170348436e-06,
      "loss": 3.128,
      "step": 215220
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4036178588867188,
      "learning_rate": 6.405031391886783e-06,
      "loss": 2.7926,
      "step": 215221
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3693153858184814,
      "learning_rate": 6.404190668009934e-06,
      "loss": 2.8703,
      "step": 215222
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0965144634246826,
      "learning_rate": 6.403349998718122e-06,
      "loss": 2.9188,
      "step": 215223
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.993349552154541,
      "learning_rate": 6.4025093840114115e-06,
      "loss": 2.9542,
      "step": 215224
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.7247872352600098,
      "learning_rate": 6.401668823890072e-06,
      "loss": 2.812,
      "step": 215225
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.066866159439087,
      "learning_rate": 6.4008283183541676e-06,
      "loss": 2.8083,
      "step": 215226
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9116551876068115,
      "learning_rate": 6.399987867403966e-06,
      "loss": 2.912,
      "step": 215227
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.945303440093994,
      "learning_rate": 6.399147471039468e-06,
      "loss": 2.663,
      "step": 215228
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.761953830718994,
      "learning_rate": 6.398307129260938e-06,
      "loss": 2.8377,
      "step": 215229
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1854536533355713,
      "learning_rate": 6.3974668420684775e-06,
      "loss": 2.9756,
      "step": 215230
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4613723754882812,
      "learning_rate": 6.396626609462318e-06,
      "loss": 2.9885,
      "step": 215231
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0401172637939453,
      "learning_rate": 6.395786431442529e-06,
      "loss": 2.9898,
      "step": 215232
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1527156829833984,
      "learning_rate": 6.394946308009341e-06,
      "loss": 2.7836,
      "step": 215233
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.208805561065674,
      "learning_rate": 6.394106239162855e-06,
      "loss": 2.7048,
      "step": 215234
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.4924912452697754,
      "learning_rate": 6.3932662249032375e-06,
      "loss": 3.2216,
      "step": 215235
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.5594499111175537,
      "learning_rate": 6.3924262652306546e-06,
      "loss": 2.9758,
      "step": 215236
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1619958877563477,
      "learning_rate": 6.391586360145273e-06,
      "loss": 2.9818,
      "step": 215237
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.371967315673828,
      "learning_rate": 6.390746509647227e-06,
      "loss": 3.0893,
      "step": 215238
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0411429405212402,
      "learning_rate": 6.389906713736714e-06,
      "loss": 2.9122,
      "step": 215239
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8945164680480957,
      "learning_rate": 6.389066972413837e-06,
      "loss": 2.9003,
      "step": 215240
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.6353161334991455,
      "learning_rate": 6.3882272856787934e-06,
      "loss": 3.0034,
      "step": 215241
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.231884002685547,
      "learning_rate": 6.3873876535316835e-06,
      "loss": 2.7148,
      "step": 215242
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.149838924407959,
      "learning_rate": 6.386548075972742e-06,
      "loss": 3.2159,
      "step": 215243
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1749672889709473,
      "learning_rate": 6.3857085530020334e-06,
      "loss": 3.0091,
      "step": 215244
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.459068536758423,
      "learning_rate": 6.384869084619792e-06,
      "loss": 2.7829,
      "step": 215245
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3588242530822754,
      "learning_rate": 6.3840296708261515e-06,
      "loss": 2.9345,
      "step": 215246
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9254987239837646,
      "learning_rate": 6.383190311621278e-06,
      "loss": 2.7782,
      "step": 215247
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.661881685256958,
      "learning_rate": 6.382351007005271e-06,
      "loss": 3.03,
      "step": 215248
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.30257248878479,
      "learning_rate": 6.381511756978363e-06,
      "loss": 3.0377,
      "step": 215249
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.4874041080474854,
      "learning_rate": 6.380672561540623e-06,
      "loss": 2.9309,
      "step": 215250
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9092254638671875,
      "learning_rate": 6.379833420692316e-06,
      "loss": 2.9683,
      "step": 215251
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.557812213897705,
      "learning_rate": 6.3789943344335405e-06,
      "loss": 3.0197,
      "step": 215252
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7687039375305176,
      "learning_rate": 6.378155302764398e-06,
      "loss": 3.1535,
      "step": 215253
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1184072494506836,
      "learning_rate": 6.377316325685155e-06,
      "loss": 2.6621,
      "step": 215254
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0148556232452393,
      "learning_rate": 6.376477403195879e-06,
      "loss": 2.9188,
      "step": 215255
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.068844795227051,
      "learning_rate": 6.3756385352967674e-06,
      "loss": 2.9295,
      "step": 215256
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8910844326019287,
      "learning_rate": 6.374799721987955e-06,
      "loss": 2.9546,
      "step": 215257
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.236044883728027,
      "learning_rate": 6.373960963269642e-06,
      "loss": 2.7713,
      "step": 215258
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.201256513595581,
      "learning_rate": 6.3731222591419275e-06,
      "loss": 2.8354,
      "step": 215259
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.5292861461639404,
      "learning_rate": 6.372283609605011e-06,
      "loss": 3.0072,
      "step": 215260
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.733292818069458,
      "learning_rate": 6.371445014659027e-06,
      "loss": 2.7369,
      "step": 215261
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1805598735809326,
      "learning_rate": 6.370606474304108e-06,
      "loss": 2.988,
      "step": 215262
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.814652442932129,
      "learning_rate": 6.369767988540453e-06,
      "loss": 3.0856,
      "step": 215263
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8295111656188965,
      "learning_rate": 6.368929557368196e-06,
      "loss": 2.8704,
      "step": 215264
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.9856653213500977,
      "learning_rate": 6.368091180787471e-06,
      "loss": 2.828,
      "step": 215265
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6800572872161865,
      "learning_rate": 6.3672528587985106e-06,
      "loss": 3.2278,
      "step": 215266
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.980184555053711,
      "learning_rate": 6.366414591401381e-06,
      "loss": 2.9477,
      "step": 215267
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.601961374282837,
      "learning_rate": 6.365576378596316e-06,
      "loss": 3.1108,
      "step": 215268
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.546908140182495,
      "learning_rate": 6.364738220383414e-06,
      "loss": 2.8143,
      "step": 215269
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3828179836273193,
      "learning_rate": 6.363900116762877e-06,
      "loss": 2.7687,
      "step": 215270
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4996695518493652,
      "learning_rate": 6.36306206773477e-06,
      "loss": 2.9454,
      "step": 215271
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2986204624176025,
      "learning_rate": 6.36222407329936e-06,
      "loss": 2.7487,
      "step": 215272
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.652207374572754,
      "learning_rate": 6.3613861334567475e-06,
      "loss": 2.807,
      "step": 215273
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1031150817871094,
      "learning_rate": 6.360548248207098e-06,
      "loss": 3.0433,
      "step": 215274
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.676478385925293,
      "learning_rate": 6.3597104175505454e-06,
      "loss": 3.0455,
      "step": 215275
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.039616584777832,
      "learning_rate": 6.358872641487289e-06,
      "loss": 3.0815,
      "step": 215276
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8893706798553467,
      "learning_rate": 6.358034920017463e-06,
      "loss": 2.8299,
      "step": 215277
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.324702739715576,
      "learning_rate": 6.3571972531411995e-06,
      "loss": 2.9005,
      "step": 215278
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9068992137908936,
      "learning_rate": 6.356359640858666e-06,
      "loss": 2.7756,
      "step": 215279
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0456509590148926,
      "learning_rate": 6.3555220831700615e-06,
      "loss": 2.9738,
      "step": 215280
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.857220411300659,
      "learning_rate": 6.354684580075487e-06,
      "loss": 2.6944,
      "step": 215281
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4529476165771484,
      "learning_rate": 6.3538471315751735e-06,
      "loss": 2.8232,
      "step": 215282
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.580003023147583,
      "learning_rate": 6.353009737669157e-06,
      "loss": 2.6857,
      "step": 215283
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.886530637741089,
      "learning_rate": 6.352172398357702e-06,
      "loss": 3.061,
      "step": 215284
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.130990743637085,
      "learning_rate": 6.3513351136408764e-06,
      "loss": 2.6835,
      "step": 215285
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.798663377761841,
      "learning_rate": 6.350497883518912e-06,
      "loss": 2.9413,
      "step": 215286
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.779758930206299,
      "learning_rate": 6.3496607079919106e-06,
      "loss": 3.1074,
      "step": 215287
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7628257274627686,
      "learning_rate": 6.348823587060103e-06,
      "loss": 2.7658,
      "step": 215288
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.919830560684204,
      "learning_rate": 6.347986520723525e-06,
      "loss": 2.8595,
      "step": 215289
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.175067901611328,
      "learning_rate": 6.347149508982441e-06,
      "loss": 3.0524,
      "step": 215290
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.128572940826416,
      "learning_rate": 6.346312551836952e-06,
      "loss": 3.1004,
      "step": 215291
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.898711681365967,
      "learning_rate": 6.3454756492872235e-06,
      "loss": 3.0124,
      "step": 215292
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2241225242614746,
      "learning_rate": 6.34463880133339e-06,
      "loss": 2.9265,
      "step": 215293
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6144394874572754,
      "learning_rate": 6.343802007975718e-06,
      "loss": 2.9643,
      "step": 215294
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9843125343322754,
      "learning_rate": 6.342965269214173e-06,
      "loss": 2.928,
      "step": 215295
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1375412940979004,
      "learning_rate": 6.342128585049089e-06,
      "loss": 2.8255,
      "step": 215296
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.311746835708618,
      "learning_rate": 6.341291955480498e-06,
      "loss": 2.6836,
      "step": 215297
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.4905290603637695,
      "learning_rate": 6.340455380508636e-06,
      "loss": 3.186,
      "step": 215298
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7209677696228027,
      "learning_rate": 6.3396188601336e-06,
      "loss": 2.976,
      "step": 215299
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.752363920211792,
      "learning_rate": 6.338782394355591e-06,
      "loss": 2.8292,
      "step": 215300
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9466230869293213,
      "learning_rate": 6.337945983174742e-06,
      "loss": 2.9071,
      "step": 215301
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.732337713241577,
      "learning_rate": 6.3371096265912195e-06,
      "loss": 2.8957,
      "step": 215302
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.917752265930176,
      "learning_rate": 6.336273324605124e-06,
      "loss": 2.9537,
      "step": 215303
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3577656745910645,
      "learning_rate": 6.335437077216721e-06,
      "loss": 2.6356,
      "step": 215304
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8800370693206787,
      "learning_rate": 6.3346008844260446e-06,
      "loss": 2.9491,
      "step": 215305
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.6077663898468018,
      "learning_rate": 6.333764746233394e-06,
      "loss": 3.0178,
      "step": 215306
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0166590213775635,
      "learning_rate": 6.3329286626387695e-06,
      "loss": 2.9028,
      "step": 215307
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.450237274169922,
      "learning_rate": 6.332092633642438e-06,
      "loss": 2.9948,
      "step": 215308
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.6942801475524902,
      "learning_rate": 6.331256659244466e-06,
      "loss": 3.069,
      "step": 215309
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8704259395599365,
      "learning_rate": 6.330420739445086e-06,
      "loss": 2.9214,
      "step": 215310
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.299403429031372,
      "learning_rate": 6.329584874244431e-06,
      "loss": 2.9797,
      "step": 215311
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8135740756988525,
      "learning_rate": 6.328749063642668e-06,
      "loss": 2.7179,
      "step": 215312
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8665075302124023,
      "learning_rate": 6.327913307639898e-06,
      "loss": 2.7421,
      "step": 215313
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.033006191253662,
      "learning_rate": 6.327077606236319e-06,
      "loss": 3.0071,
      "step": 215314
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.689587116241455,
      "learning_rate": 6.3262419594320655e-06,
      "loss": 3.037,
      "step": 215315
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.164687395095825,
      "learning_rate": 6.325406367227337e-06,
      "loss": 2.8079,
      "step": 215316
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.098560094833374,
      "learning_rate": 6.324570829622233e-06,
      "loss": 2.8601,
      "step": 215317
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9992923736572266,
      "learning_rate": 6.323735346616954e-06,
      "loss": 3.1873,
      "step": 215318
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.078089475631714,
      "learning_rate": 6.322899918211633e-06,
      "loss": 2.8656,
      "step": 215319
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6602909564971924,
      "learning_rate": 6.322064544406435e-06,
      "loss": 2.8625,
      "step": 215320
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.907686233520508,
      "learning_rate": 6.321229225201463e-06,
      "loss": 2.8082,
      "step": 215321
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9006755352020264,
      "learning_rate": 6.320393960596981e-06,
      "loss": 3.0314,
      "step": 215322
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0811409950256348,
      "learning_rate": 6.319558750593023e-06,
      "loss": 2.9384,
      "step": 215323
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3221046924591064,
      "learning_rate": 6.318723595189856e-06,
      "loss": 2.9048,
      "step": 215324
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4380741119384766,
      "learning_rate": 6.3178884943875795e-06,
      "loss": 2.7942,
      "step": 215325
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.781745672225952,
      "learning_rate": 6.317053448186327e-06,
      "loss": 2.9526,
      "step": 215326
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9107582569122314,
      "learning_rate": 6.3162184565862635e-06,
      "loss": 2.741,
      "step": 215327
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.633829355239868,
      "learning_rate": 6.315383519587592e-06,
      "loss": 2.7288,
      "step": 215328
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7892396450042725,
      "learning_rate": 6.314548637190408e-06,
      "loss": 2.9215,
      "step": 215329
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.169539451599121,
      "learning_rate": 6.313713809394915e-06,
      "loss": 3.0453,
      "step": 215330
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.8119711875915527,
      "learning_rate": 6.312879036201246e-06,
      "loss": 2.7256,
      "step": 215331
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2510833740234375,
      "learning_rate": 6.312044317609566e-06,
      "loss": 2.8503,
      "step": 215332
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.971392869949341,
      "learning_rate": 6.311209653619976e-06,
      "loss": 2.7641,
      "step": 215333
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9525110721588135,
      "learning_rate": 6.310375044232707e-06,
      "loss": 2.8767,
      "step": 215334
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.693396806716919,
      "learning_rate": 6.309540489447862e-06,
      "loss": 3.0726,
      "step": 215335
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.754859209060669,
      "learning_rate": 6.308705989265639e-06,
      "loss": 2.9231,
      "step": 215336
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.9464991092681885,
      "learning_rate": 6.307871543686171e-06,
      "loss": 2.7727,
      "step": 215337
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6367135047912598,
      "learning_rate": 6.307037152709593e-06,
      "loss": 2.7534,
      "step": 215338
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.58077073097229,
      "learning_rate": 6.3062028163361034e-06,
      "loss": 3.1156,
      "step": 215339
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0933399200439453,
      "learning_rate": 6.305368534565835e-06,
      "loss": 2.7268,
      "step": 215340
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.5188348293304443,
      "learning_rate": 6.304534307398923e-06,
      "loss": 3.0127,
      "step": 215341
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.20463490486145,
      "learning_rate": 6.303700134835565e-06,
      "loss": 3.0466,
      "step": 215342
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.5259416103363037,
      "learning_rate": 6.302866016875896e-06,
      "loss": 2.9299,
      "step": 215343
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.5289268493652344,
      "learning_rate": 6.302031953520015e-06,
      "loss": 2.8514,
      "step": 215344
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3199398517608643,
      "learning_rate": 6.301197944768188e-06,
      "loss": 2.9237,
      "step": 215345
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.683645486831665,
      "learning_rate": 6.300363990620516e-06,
      "loss": 2.8695,
      "step": 215346
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7077882289886475,
      "learning_rate": 6.299530091077098e-06,
      "loss": 2.8655,
      "step": 215347
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0924575328826904,
      "learning_rate": 6.2986962461381685e-06,
      "loss": 2.9089,
      "step": 215348
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.119699478149414,
      "learning_rate": 6.297862455803893e-06,
      "loss": 2.9001,
      "step": 215349
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.177563190460205,
      "learning_rate": 6.297028720074337e-06,
      "loss": 2.9761,
      "step": 215350
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.5676276683807373,
      "learning_rate": 6.2961950389497365e-06,
      "loss": 2.9437,
      "step": 215351
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.824920177459717,
      "learning_rate": 6.295361412430189e-06,
      "loss": 2.853,
      "step": 215352
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.221869945526123,
      "learning_rate": 6.294527840515895e-06,
      "loss": 2.9174,
      "step": 215353
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.8250577449798584,
      "learning_rate": 6.293694323206988e-06,
      "loss": 2.749,
      "step": 215354
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2822961807250977,
      "learning_rate": 6.292860860503634e-06,
      "loss": 2.8443,
      "step": 215355
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7066664695739746,
      "learning_rate": 6.292027452405968e-06,
      "loss": 2.9859,
      "step": 215356
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.139667510986328,
      "learning_rate": 6.291194098914187e-06,
      "loss": 3.1631,
      "step": 215357
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.847748041152954,
      "learning_rate": 6.2903608000283934e-06,
      "loss": 3.025,
      "step": 215358
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2192142009735107,
      "learning_rate": 6.289527555748752e-06,
      "loss": 2.9411,
      "step": 215359
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.266606569290161,
      "learning_rate": 6.288694366075464e-06,
      "loss": 3.168,
      "step": 215360
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0158748626708984,
      "learning_rate": 6.287861231008662e-06,
      "loss": 2.8404,
      "step": 215361
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.000802516937256,
      "learning_rate": 6.287028150548445e-06,
      "loss": 3.0608,
      "step": 215362
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2395365238189697,
      "learning_rate": 6.286195124695048e-06,
      "loss": 2.9363,
      "step": 215363
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.669355630874634,
      "learning_rate": 6.28536215344857e-06,
      "loss": 2.7604,
      "step": 215364
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2825608253479004,
      "learning_rate": 6.284529236809211e-06,
      "loss": 2.8623,
      "step": 215365
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.951606512069702,
      "learning_rate": 6.28369637477707e-06,
      "loss": 2.883,
      "step": 215366
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1156442165374756,
      "learning_rate": 6.282863567352381e-06,
      "loss": 2.6996,
      "step": 215367
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.213817596435547,
      "learning_rate": 6.282030814535211e-06,
      "loss": 2.9386,
      "step": 215368
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6969103813171387,
      "learning_rate": 6.281198116325758e-06,
      "loss": 2.7423,
      "step": 215369
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1287717819213867,
      "learning_rate": 6.280365472724191e-06,
      "loss": 2.7269,
      "step": 215370
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.02672815322876,
      "learning_rate": 6.279532883730643e-06,
      "loss": 2.7289,
      "step": 215371
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.04586124420166,
      "learning_rate": 6.278700349345245e-06,
      "loss": 2.9693,
      "step": 215372
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.893653392791748,
      "learning_rate": 6.277867869568265e-06,
      "loss": 2.9007,
      "step": 215373
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0269312858581543,
      "learning_rate": 6.277035444399703e-06,
      "loss": 2.9387,
      "step": 215374
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.260026931762695,
      "learning_rate": 6.276203073839791e-06,
      "loss": 2.7482,
      "step": 215375
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1746456623077393,
      "learning_rate": 6.275370757888698e-06,
      "loss": 2.9222,
      "step": 215376
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.4057087898254395,
      "learning_rate": 6.274538496546555e-06,
      "loss": 2.9252,
      "step": 215377
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.738689661026001,
      "learning_rate": 6.273706289813496e-06,
      "loss": 2.9523,
      "step": 215378
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.253016233444214,
      "learning_rate": 6.272874137689754e-06,
      "loss": 3.0331,
      "step": 215379
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1032607555389404,
      "learning_rate": 6.272042040175362e-06,
      "loss": 2.8208,
      "step": 215380
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9203925132751465,
      "learning_rate": 6.271209997270588e-06,
      "loss": 3.0221,
      "step": 215381
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6817569732666016,
      "learning_rate": 6.2703780089754965e-06,
      "loss": 2.6892,
      "step": 215382
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2785556316375732,
      "learning_rate": 6.269546075290322e-06,
      "loss": 3.2005,
      "step": 215383
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.7042739391326904,
      "learning_rate": 6.268714196215163e-06,
      "loss": 2.9587,
      "step": 215384
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1793973445892334,
      "learning_rate": 6.267882371750222e-06,
      "loss": 2.9869,
      "step": 215385
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.2899982929229736,
      "learning_rate": 6.267050601895595e-06,
      "loss": 2.9423,
      "step": 215386
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.5267741680145264,
      "learning_rate": 6.2662188866514864e-06,
      "loss": 2.7889,
      "step": 215387
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.550248384475708,
      "learning_rate": 6.265387226018026e-06,
      "loss": 2.9279,
      "step": 215388
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.119490385055542,
      "learning_rate": 6.264555619995382e-06,
      "loss": 2.796,
      "step": 215389
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.4717957973480225,
      "learning_rate": 6.2637240685836865e-06,
      "loss": 2.8669,
      "step": 215390
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.974090576171875,
      "learning_rate": 6.2628925717831405e-06,
      "loss": 2.707,
      "step": 215391
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9520506858825684,
      "learning_rate": 6.262061129593843e-06,
      "loss": 2.8423,
      "step": 215392
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.657407760620117,
      "learning_rate": 6.261229742015994e-06,
      "loss": 2.9765,
      "step": 215393
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3084514141082764,
      "learning_rate": 6.260398409049694e-06,
      "loss": 2.9509,
      "step": 215394
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.143777847290039,
      "learning_rate": 6.259567130695142e-06,
      "loss": 2.8805,
      "step": 215395
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.93873929977417,
      "learning_rate": 6.258735906952472e-06,
      "loss": 2.8362,
      "step": 215396
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.898319721221924,
      "learning_rate": 6.257904737821884e-06,
      "loss": 3.112,
      "step": 215397
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.8232080936431885,
      "learning_rate": 6.257073623303477e-06,
      "loss": 2.7977,
      "step": 215398
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.5209193229675293,
      "learning_rate": 6.256242563397451e-06,
      "loss": 3.0083,
      "step": 215399
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.774087905883789,
      "learning_rate": 6.255411558103873e-06,
      "loss": 3.0549,
      "step": 215400
    },
    {
      "epoch": 2.8,
      "grad_norm": 5.499231338500977,
      "learning_rate": 6.25458060742301e-06,
      "loss": 2.9442,
      "step": 215401
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.9748919010162354,
      "learning_rate": 6.2537497113549275e-06,
      "loss": 3.0784,
      "step": 215402
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1329123973846436,
      "learning_rate": 6.252918869899826e-06,
      "loss": 3.0989,
      "step": 215403
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.949214458465576,
      "learning_rate": 6.252088083057905e-06,
      "loss": 2.8278,
      "step": 215404
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.2212443351745605,
      "learning_rate": 6.251257350829231e-06,
      "loss": 2.8978,
      "step": 215405
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.3235385417938232,
      "learning_rate": 6.250426673213971e-06,
      "loss": 2.942,
      "step": 215406
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6974687576293945,
      "learning_rate": 6.249596050212324e-06,
      "loss": 2.796,
      "step": 215407
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.134861707687378,
      "learning_rate": 6.248765481824392e-06,
      "loss": 2.9709,
      "step": 215408
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.311431646347046,
      "learning_rate": 6.247934968050372e-06,
      "loss": 2.9565,
      "step": 215409
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.740541934967041,
      "learning_rate": 6.247104508890433e-06,
      "loss": 2.9283,
      "step": 215410
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.019996166229248,
      "learning_rate": 6.246274104344706e-06,
      "loss": 3.0689,
      "step": 215411
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0610923767089844,
      "learning_rate": 6.245443754413293e-06,
      "loss": 2.7763,
      "step": 215412
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8465471267700195,
      "learning_rate": 6.2446134590964255e-06,
      "loss": 2.9849,
      "step": 215413
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.689596176147461,
      "learning_rate": 6.243783218394238e-06,
      "loss": 2.7569,
      "step": 215414
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.662473678588867,
      "learning_rate": 6.242953032306863e-06,
      "loss": 2.696,
      "step": 215415
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.6542365550994873,
      "learning_rate": 6.242122900834501e-06,
      "loss": 2.8296,
      "step": 215416
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.610299825668335,
      "learning_rate": 6.2412928239772505e-06,
      "loss": 2.8311,
      "step": 215417
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.0680582523345947,
      "learning_rate": 6.2404628017352465e-06,
      "loss": 2.8698,
      "step": 215418
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.1975927352905273,
      "learning_rate": 6.239632834108755e-06,
      "loss": 2.9995,
      "step": 215419
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.103712320327759,
      "learning_rate": 6.238802921097807e-06,
      "loss": 2.7549,
      "step": 215420
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.100937843322754,
      "learning_rate": 6.237973062702639e-06,
      "loss": 3.0679,
      "step": 215421
    },
    {
      "epoch": 2.8,
      "grad_norm": 3.227548837661743,
      "learning_rate": 6.237143258923383e-06,
      "loss": 2.9872,
      "step": 215422
    },
    {
      "epoch": 2.8,
      "grad_norm": 4.1970014572143555,
      "learning_rate": 6.236313509760171e-06,
      "loss": 2.9003,
      "step": 215423
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.89194393157959,
      "learning_rate": 6.235483815213171e-06,
      "loss": 3.0506,
      "step": 215424
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.929938554763794,
      "learning_rate": 6.23465417528255e-06,
      "loss": 2.9504,
      "step": 215425
    },
    {
      "epoch": 2.8,
      "grad_norm": 2.8028199672698975,
      "learning_rate": 6.2338245899684395e-06,
      "loss": 2.712,
      "step": 215426
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.786241292953491,
      "learning_rate": 6.232995059271007e-06,
      "loss": 2.8748,
      "step": 215427
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.4580202102661133,
      "learning_rate": 6.2321655831904185e-06,
      "loss": 2.7469,
      "step": 215428
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.800949811935425,
      "learning_rate": 6.231336161726808e-06,
      "loss": 2.7976,
      "step": 215429
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.205967426300049,
      "learning_rate": 6.2305067948803414e-06,
      "loss": 3.0668,
      "step": 215430
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.3531100749969482,
      "learning_rate": 6.2296774826511855e-06,
      "loss": 3.1034,
      "step": 215431
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9211161136627197,
      "learning_rate": 6.22884822503944e-06,
      "loss": 3.033,
      "step": 215432
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.697983741760254,
      "learning_rate": 6.228019022045338e-06,
      "loss": 2.9466,
      "step": 215433
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.299379587173462,
      "learning_rate": 6.227189873668981e-06,
      "loss": 3.0376,
      "step": 215434
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.951873302459717,
      "learning_rate": 6.2263607799105e-06,
      "loss": 2.9731,
      "step": 215435
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0861785411834717,
      "learning_rate": 6.2255317407700956e-06,
      "loss": 2.614,
      "step": 215436
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.656639814376831,
      "learning_rate": 6.224702756247934e-06,
      "loss": 2.9687,
      "step": 215437
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.90818452835083,
      "learning_rate": 6.22387382634415e-06,
      "loss": 2.9412,
      "step": 215438
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7752718925476074,
      "learning_rate": 6.223044951058875e-06,
      "loss": 2.9204,
      "step": 215439
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0544538497924805,
      "learning_rate": 6.222216130392277e-06,
      "loss": 2.6672,
      "step": 215440
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.398451089859009,
      "learning_rate": 6.221387364344521e-06,
      "loss": 3.1251,
      "step": 215441
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9737367630004883,
      "learning_rate": 6.2205586529157745e-06,
      "loss": 2.9196,
      "step": 215442
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.805863857269287,
      "learning_rate": 6.219729996106137e-06,
      "loss": 3.0438,
      "step": 215443
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.0710954666137695,
      "learning_rate": 6.21890139391581e-06,
      "loss": 2.7542,
      "step": 215444
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.648406744003296,
      "learning_rate": 6.218072846344957e-06,
      "loss": 2.9727,
      "step": 215445
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2355010509490967,
      "learning_rate": 6.217244353393713e-06,
      "loss": 3.0508,
      "step": 215446
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.598430871963501,
      "learning_rate": 6.216415915062179e-06,
      "loss": 3.0066,
      "step": 215447
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.228632926940918,
      "learning_rate": 6.215587531350585e-06,
      "loss": 2.9022,
      "step": 215448
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.4876761436462402,
      "learning_rate": 6.214759202259034e-06,
      "loss": 2.8982,
      "step": 215449
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.007455587387085,
      "learning_rate": 6.213930927787758e-06,
      "loss": 2.8676,
      "step": 215450
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.258554458618164,
      "learning_rate": 6.213102707936823e-06,
      "loss": 2.7067,
      "step": 215451
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9714174270629883,
      "learning_rate": 6.212274542706464e-06,
      "loss": 2.8426,
      "step": 215452
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8709068298339844,
      "learning_rate": 6.211446432096712e-06,
      "loss": 2.7014,
      "step": 215453
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.943305253982544,
      "learning_rate": 6.210618376107868e-06,
      "loss": 2.8857,
      "step": 215454
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8297181129455566,
      "learning_rate": 6.209790374739965e-06,
      "loss": 2.7793,
      "step": 215455
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2657065391540527,
      "learning_rate": 6.208962427993236e-06,
      "loss": 2.9413,
      "step": 215456
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.093690872192383,
      "learning_rate": 6.208134535867781e-06,
      "loss": 2.5573,
      "step": 215457
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9756104946136475,
      "learning_rate": 6.207306698363801e-06,
      "loss": 2.8682,
      "step": 215458
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.305614948272705,
      "learning_rate": 6.206478915481428e-06,
      "loss": 2.7549,
      "step": 215459
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.4854726791381836,
      "learning_rate": 6.205651187220828e-06,
      "loss": 2.8277,
      "step": 215460
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.989471435546875,
      "learning_rate": 6.2048235135821026e-06,
      "loss": 2.9022,
      "step": 215461
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9726505279541016,
      "learning_rate": 6.203995894565483e-06,
      "loss": 3.0345,
      "step": 215462
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.57721209526062,
      "learning_rate": 6.203168330171071e-06,
      "loss": 2.7898,
      "step": 215463
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7086799144744873,
      "learning_rate": 6.202340820399032e-06,
      "loss": 2.7186,
      "step": 215464
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3945488929748535,
      "learning_rate": 6.201513365249566e-06,
      "loss": 2.808,
      "step": 215465
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0846307277679443,
      "learning_rate": 6.20068596472274e-06,
      "loss": 2.9058,
      "step": 215466
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.025197982788086,
      "learning_rate": 6.199858618818754e-06,
      "loss": 2.8928,
      "step": 215467
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.855437994003296,
      "learning_rate": 6.199031327537774e-06,
      "loss": 2.9664,
      "step": 215468
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.750694990158081,
      "learning_rate": 6.198204090879932e-06,
      "loss": 2.8975,
      "step": 215469
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.769355058670044,
      "learning_rate": 6.197376908845397e-06,
      "loss": 2.6478,
      "step": 215470
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8457045555114746,
      "learning_rate": 6.196549781434301e-06,
      "loss": 3.141,
      "step": 215471
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.729719638824463,
      "learning_rate": 6.195722708646844e-06,
      "loss": 3.1028,
      "step": 215472
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6922788619995117,
      "learning_rate": 6.194895690483126e-06,
      "loss": 3.0675,
      "step": 215473
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0668790340423584,
      "learning_rate": 6.1940687269433135e-06,
      "loss": 2.9156,
      "step": 215474
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.522761583328247,
      "learning_rate": 6.193241818027572e-06,
      "loss": 3.099,
      "step": 215475
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3127830028533936,
      "learning_rate": 6.1924149637360695e-06,
      "loss": 3.2572,
      "step": 215476
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.526132345199585,
      "learning_rate": 6.191588164068939e-06,
      "loss": 2.8272,
      "step": 215477
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9661223888397217,
      "learning_rate": 6.190761419026347e-06,
      "loss": 2.8847,
      "step": 215478
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.511791229248047,
      "learning_rate": 6.189934728608392e-06,
      "loss": 3.2298,
      "step": 215479
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.750681161880493,
      "learning_rate": 6.189108092815343e-06,
      "loss": 2.7897,
      "step": 215480
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.3910164833068848,
      "learning_rate": 6.188281511647231e-06,
      "loss": 2.9719,
      "step": 215481
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.367262363433838,
      "learning_rate": 6.18745498510429e-06,
      "loss": 2.9723,
      "step": 215482
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.786935806274414,
      "learning_rate": 6.18662851318662e-06,
      "loss": 2.791,
      "step": 215483
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.714487314224243,
      "learning_rate": 6.185802095894454e-06,
      "loss": 2.9364,
      "step": 215484
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.445629119873047,
      "learning_rate": 6.184975733227826e-06,
      "loss": 2.9877,
      "step": 215485
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.664231300354004,
      "learning_rate": 6.184149425187002e-06,
      "loss": 2.9607,
      "step": 215486
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.559894323348999,
      "learning_rate": 6.183323171772081e-06,
      "loss": 3.1705,
      "step": 215487
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.027559757232666,
      "learning_rate": 6.182496972983231e-06,
      "loss": 2.8702,
      "step": 215488
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.105900287628174,
      "learning_rate": 6.1816708288206175e-06,
      "loss": 2.9061,
      "step": 215489
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1387031078338623,
      "learning_rate": 6.1808447392843404e-06,
      "loss": 2.8453,
      "step": 215490
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.02793288230896,
      "learning_rate": 6.1800187043746005e-06,
      "loss": 2.9295,
      "step": 215491
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.840679883956909,
      "learning_rate": 6.179192724091564e-06,
      "loss": 2.5711,
      "step": 215492
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.888817548751831,
      "learning_rate": 6.17836679843533e-06,
      "loss": 2.9207,
      "step": 215493
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.123250722885132,
      "learning_rate": 6.177540927406099e-06,
      "loss": 2.7045,
      "step": 215494
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2534101009368896,
      "learning_rate": 6.176715111004005e-06,
      "loss": 2.7099,
      "step": 215495
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.371542453765869,
      "learning_rate": 6.175889349229246e-06,
      "loss": 2.84,
      "step": 215496
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.68198299407959,
      "learning_rate": 6.175063642081857e-06,
      "loss": 2.8016,
      "step": 215497
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.684595823287964,
      "learning_rate": 6.1742379895621365e-06,
      "loss": 2.8343,
      "step": 215498
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.818342924118042,
      "learning_rate": 6.173412391670118e-06,
      "loss": 2.6724,
      "step": 215499
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.732546329498291,
      "learning_rate": 6.172586848406069e-06,
      "loss": 2.9721,
      "step": 215500
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7213497161865234,
      "learning_rate": 6.1717613597700555e-06,
      "loss": 2.7682,
      "step": 215501
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.4267730712890625,
      "learning_rate": 6.1709359257622435e-06,
      "loss": 2.9131,
      "step": 215502
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.703697443008423,
      "learning_rate": 6.170110546382801e-06,
      "loss": 2.8972,
      "step": 215503
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7230305671691895,
      "learning_rate": 6.1692852216319256e-06,
      "loss": 2.9884,
      "step": 215504
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8834784030914307,
      "learning_rate": 6.1684599515096525e-06,
      "loss": 2.8933,
      "step": 215505
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.954641580581665,
      "learning_rate": 6.167634736016281e-06,
      "loss": 2.7895,
      "step": 215506
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.625854969024658,
      "learning_rate": 6.166809575151876e-06,
      "loss": 3.0613,
      "step": 215507
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.8497636318206787,
      "learning_rate": 6.165984468916573e-06,
      "loss": 2.8798,
      "step": 215508
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1730778217315674,
      "learning_rate": 6.165159417310573e-06,
      "loss": 3.0855,
      "step": 215509
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8497366905212402,
      "learning_rate": 6.164334420334038e-06,
      "loss": 2.7738,
      "step": 215510
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9980690479278564,
      "learning_rate": 6.163509477987072e-06,
      "loss": 3.2091,
      "step": 215511
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.702119827270508,
      "learning_rate": 6.162684590269873e-06,
      "loss": 2.8316,
      "step": 215512
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2169172763824463,
      "learning_rate": 6.161859757182574e-06,
      "loss": 2.7765,
      "step": 215513
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.249130964279175,
      "learning_rate": 6.1610349787253435e-06,
      "loss": 3.0057,
      "step": 215514
    },
    {
      "epoch": 2.81,
      "grad_norm": 6.830628395080566,
      "learning_rate": 6.160210254898312e-06,
      "loss": 2.8908,
      "step": 215515
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9222028255462646,
      "learning_rate": 6.159385585701649e-06,
      "loss": 2.9689,
      "step": 215516
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.810206651687622,
      "learning_rate": 6.158560971135484e-06,
      "loss": 3.0586,
      "step": 215517
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0083882808685303,
      "learning_rate": 6.1577364112000205e-06,
      "loss": 2.7673,
      "step": 215518
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.980790853500366,
      "learning_rate": 6.156911905895357e-06,
      "loss": 2.9069,
      "step": 215519
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.5320467948913574,
      "learning_rate": 6.1560874552216585e-06,
      "loss": 2.9567,
      "step": 215520
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.031191349029541,
      "learning_rate": 6.155263059179127e-06,
      "loss": 2.9346,
      "step": 215521
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.4966700077056885,
      "learning_rate": 6.154438717767829e-06,
      "loss": 2.8341,
      "step": 215522
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.496222972869873,
      "learning_rate": 6.15361443098803e-06,
      "loss": 2.8993,
      "step": 215523
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.02217173576355,
      "learning_rate": 6.152790198839797e-06,
      "loss": 2.8808,
      "step": 215524
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1019227504730225,
      "learning_rate": 6.1519660213232625e-06,
      "loss": 3.0755,
      "step": 215525
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.03315806388855,
      "learning_rate": 6.151141898438694e-06,
      "loss": 2.8262,
      "step": 215526
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.398939847946167,
      "learning_rate": 6.150317830186124e-06,
      "loss": 2.9674,
      "step": 215527
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.5907771587371826,
      "learning_rate": 6.149493816565787e-06,
      "loss": 2.8227,
      "step": 215528
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6191904544830322,
      "learning_rate": 6.148669857577782e-06,
      "loss": 2.9635,
      "step": 215529
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.220836877822876,
      "learning_rate": 6.147845953222308e-06,
      "loss": 2.8397,
      "step": 215530
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.4487497806549072,
      "learning_rate": 6.147022103499466e-06,
      "loss": 2.6626,
      "step": 215531
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6189253330230713,
      "learning_rate": 6.146198308409489e-06,
      "loss": 3.0587,
      "step": 215532
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0386440753936768,
      "learning_rate": 6.145374567952444e-06,
      "loss": 3.0983,
      "step": 215533
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2298402786254883,
      "learning_rate": 6.1445508821285295e-06,
      "loss": 2.9054,
      "step": 215534
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.5203471183776855,
      "learning_rate": 6.1437272509378795e-06,
      "loss": 2.6951,
      "step": 215535
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.529581546783447,
      "learning_rate": 6.142903674380661e-06,
      "loss": 2.7801,
      "step": 215536
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0232815742492676,
      "learning_rate": 6.14208015245704e-06,
      "loss": 3.1615,
      "step": 215537
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1109211444854736,
      "learning_rate": 6.14125668516715e-06,
      "loss": 2.8529,
      "step": 215538
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.6598668098449707,
      "learning_rate": 6.140433272511158e-06,
      "loss": 2.8495,
      "step": 215539
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.409569263458252,
      "learning_rate": 6.139609914489163e-06,
      "loss": 2.8309,
      "step": 215540
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.174553632736206,
      "learning_rate": 6.1387866111013985e-06,
      "loss": 2.8573,
      "step": 215541
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2224693298339844,
      "learning_rate": 6.137963362347964e-06,
      "loss": 3.1147,
      "step": 215542
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3016366958618164,
      "learning_rate": 6.137140168229027e-06,
      "loss": 2.7304,
      "step": 215543
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8779308795928955,
      "learning_rate": 6.136317028744752e-06,
      "loss": 2.8355,
      "step": 215544
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8010759353637695,
      "learning_rate": 6.1354939438953084e-06,
      "loss": 3.115,
      "step": 215545
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.312047958374023,
      "learning_rate": 6.134670913680762e-06,
      "loss": 3.0873,
      "step": 215546
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.068943738937378,
      "learning_rate": 6.133847938101377e-06,
      "loss": 2.8537,
      "step": 215547
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.315138578414917,
      "learning_rate": 6.133025017157223e-06,
      "loss": 3.2238,
      "step": 215548
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.3633177280426025,
      "learning_rate": 6.132202150848531e-06,
      "loss": 2.878,
      "step": 215549
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8417274951934814,
      "learning_rate": 6.131379339175402e-06,
      "loss": 2.8889,
      "step": 215550
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.856801986694336,
      "learning_rate": 6.130556582137969e-06,
      "loss": 2.9026,
      "step": 215551
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7750985622406006,
      "learning_rate": 6.129733879736398e-06,
      "loss": 2.76,
      "step": 215552
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.584200382232666,
      "learning_rate": 6.12891123197089e-06,
      "loss": 2.5681,
      "step": 215553
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.369924783706665,
      "learning_rate": 6.128088638841544e-06,
      "loss": 2.8516,
      "step": 215554
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.723766326904297,
      "learning_rate": 6.12726610034856e-06,
      "loss": 3.0737,
      "step": 215555
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7862839698791504,
      "learning_rate": 6.126443616492071e-06,
      "loss": 3.0476,
      "step": 215556
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8062832355499268,
      "learning_rate": 6.125621187272212e-06,
      "loss": 2.9148,
      "step": 215557
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.804121732711792,
      "learning_rate": 6.1247988126891135e-06,
      "loss": 2.9675,
      "step": 215558
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9608311653137207,
      "learning_rate": 6.1239764927430105e-06,
      "loss": 2.8727,
      "step": 215559
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8825623989105225,
      "learning_rate": 6.123154227433968e-06,
      "loss": 2.9601,
      "step": 215560
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.847433090209961,
      "learning_rate": 6.122332016762188e-06,
      "loss": 2.758,
      "step": 215561
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0967772006988525,
      "learning_rate": 6.1215098607278354e-06,
      "loss": 2.943,
      "step": 215562
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.776034355163574,
      "learning_rate": 6.120687759331044e-06,
      "loss": 3.0376,
      "step": 215563
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1434543132781982,
      "learning_rate": 6.119865712571914e-06,
      "loss": 2.9459,
      "step": 215564
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.673027276992798,
      "learning_rate": 6.119043720450678e-06,
      "loss": 2.9931,
      "step": 215565
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.26137375831604,
      "learning_rate": 6.11822178296747e-06,
      "loss": 2.8462,
      "step": 215566
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.757296323776245,
      "learning_rate": 6.117399900122422e-06,
      "loss": 2.7322,
      "step": 215567
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.261261224746704,
      "learning_rate": 6.116578071915701e-06,
      "loss": 2.9287,
      "step": 215568
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9229159355163574,
      "learning_rate": 6.115756298347474e-06,
      "loss": 3.1879,
      "step": 215569
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.584373950958252,
      "learning_rate": 6.11493457941784e-06,
      "loss": 2.8825,
      "step": 215570
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.164163112640381,
      "learning_rate": 6.114112915127001e-06,
      "loss": 2.9355,
      "step": 215571
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.9727866649627686,
      "learning_rate": 6.113291305475088e-06,
      "loss": 2.959,
      "step": 215572
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.232572078704834,
      "learning_rate": 6.1124697504622675e-06,
      "loss": 2.7359,
      "step": 215573
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.902031183242798,
      "learning_rate": 6.111648250088708e-06,
      "loss": 2.9282,
      "step": 215574
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.266157388687134,
      "learning_rate": 6.11082680435454e-06,
      "loss": 2.9749,
      "step": 215575
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8077595233917236,
      "learning_rate": 6.110005413259866e-06,
      "loss": 3.016,
      "step": 215576
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.6169674396514893,
      "learning_rate": 6.109184076804952e-06,
      "loss": 2.9473,
      "step": 215577
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.6884074211120605,
      "learning_rate": 6.108362794989829e-06,
      "loss": 2.8794,
      "step": 215578
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.204587459564209,
      "learning_rate": 6.107541567814766e-06,
      "loss": 3.0464,
      "step": 215579
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1478071212768555,
      "learning_rate": 6.106720395279829e-06,
      "loss": 2.9697,
      "step": 215580
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.284679412841797,
      "learning_rate": 6.105899277385218e-06,
      "loss": 2.9088,
      "step": 215581
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0138275623321533,
      "learning_rate": 6.105078214131032e-06,
      "loss": 2.8401,
      "step": 215582
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.6523706912994385,
      "learning_rate": 6.104257205517471e-06,
      "loss": 3.1043,
      "step": 215583
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6736040115356445,
      "learning_rate": 6.103436251544669e-06,
      "loss": 2.8248,
      "step": 215584
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6742262840270996,
      "learning_rate": 6.102615352212825e-06,
      "loss": 2.7975,
      "step": 215585
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.853276252746582,
      "learning_rate": 6.10179450752204e-06,
      "loss": 2.8317,
      "step": 215586
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0706560611724854,
      "learning_rate": 6.100973717472479e-06,
      "loss": 2.9136,
      "step": 215587
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1502792835235596,
      "learning_rate": 6.100152982064277e-06,
      "loss": 2.9328,
      "step": 215588
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.49018669128418,
      "learning_rate": 6.0993323012976325e-06,
      "loss": 3.1279,
      "step": 215589
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6560797691345215,
      "learning_rate": 6.098511675172613e-06,
      "loss": 2.7936,
      "step": 215590
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.239378452301025,
      "learning_rate": 6.097691103689484e-06,
      "loss": 2.6773,
      "step": 215591
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7151906490325928,
      "learning_rate": 6.096870586848313e-06,
      "loss": 3.095,
      "step": 215592
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2385940551757812,
      "learning_rate": 6.096050124649332e-06,
      "loss": 2.9629,
      "step": 215593
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.4161901473999023,
      "learning_rate": 6.095229717092575e-06,
      "loss": 3.039,
      "step": 215594
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.659125566482544,
      "learning_rate": 6.09440936417831e-06,
      "loss": 2.9626,
      "step": 215595
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0795581340789795,
      "learning_rate": 6.093589065906601e-06,
      "loss": 2.8967,
      "step": 215596
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6927645206451416,
      "learning_rate": 6.092768822277649e-06,
      "loss": 2.8226,
      "step": 215597
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9761900901794434,
      "learning_rate": 6.091948633291621e-06,
      "loss": 3.0651,
      "step": 215598
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.359603404998779,
      "learning_rate": 6.09112849894865e-06,
      "loss": 2.9271,
      "step": 215599
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.056251049041748,
      "learning_rate": 6.0903084192488685e-06,
      "loss": 2.9736,
      "step": 215600
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0102803707122803,
      "learning_rate": 6.089488394192476e-06,
      "loss": 3.0196,
      "step": 215601
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.7167181968688965,
      "learning_rate": 6.0886684237795415e-06,
      "loss": 2.8043,
      "step": 215602
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.647484064102173,
      "learning_rate": 6.087848508010329e-06,
      "loss": 3.0433,
      "step": 215603
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.963195562362671,
      "learning_rate": 6.087028646884873e-06,
      "loss": 2.8495,
      "step": 215604
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.8617632389068604,
      "learning_rate": 6.086208840403439e-06,
      "loss": 2.9712,
      "step": 215605
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.881152391433716,
      "learning_rate": 6.0853890885661285e-06,
      "loss": 3.0395,
      "step": 215606
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7225797176361084,
      "learning_rate": 6.084569391373073e-06,
      "loss": 3.017,
      "step": 215607
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0472464561462402,
      "learning_rate": 6.08374974882444e-06,
      "loss": 2.8036,
      "step": 215608
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.740293025970459,
      "learning_rate": 6.082930160920396e-06,
      "loss": 3.1069,
      "step": 215609
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.4155685901641846,
      "learning_rate": 6.082110627661074e-06,
      "loss": 3.168,
      "step": 215610
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.298002243041992,
      "learning_rate": 6.081291149046641e-06,
      "loss": 2.9503,
      "step": 215611
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.6221377849578857,
      "learning_rate": 6.080471725077262e-06,
      "loss": 2.9923,
      "step": 215612
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.677964448928833,
      "learning_rate": 6.07965235575304e-06,
      "loss": 2.8774,
      "step": 215613
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0028610229492188,
      "learning_rate": 6.078833041074171e-06,
      "loss": 2.9173,
      "step": 215614
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1462929248809814,
      "learning_rate": 6.078013781040791e-06,
      "loss": 2.8964,
      "step": 215615
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6951920986175537,
      "learning_rate": 6.077194575653066e-06,
      "loss": 2.8202,
      "step": 215616
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9076104164123535,
      "learning_rate": 6.0763754249111285e-06,
      "loss": 2.8456,
      "step": 215617
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2844512462615967,
      "learning_rate": 6.075556328815145e-06,
      "loss": 3.0052,
      "step": 215618
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.925015449523926,
      "learning_rate": 6.07473728736525e-06,
      "loss": 2.8208,
      "step": 215619
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.161062002182007,
      "learning_rate": 6.0739183005616424e-06,
      "loss": 3.129,
      "step": 215620
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.916851043701172,
      "learning_rate": 6.0730993684044216e-06,
      "loss": 2.8864,
      "step": 215621
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3530235290527344,
      "learning_rate": 6.072280490893755e-06,
      "loss": 2.8767,
      "step": 215622
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0342817306518555,
      "learning_rate": 6.071461668029809e-06,
      "loss": 2.9682,
      "step": 215623
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.6272380352020264,
      "learning_rate": 6.07064289981275e-06,
      "loss": 2.7372,
      "step": 215624
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.024108648300171,
      "learning_rate": 6.069824186242678e-06,
      "loss": 2.748,
      "step": 215625
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0757269859313965,
      "learning_rate": 6.069005527319759e-06,
      "loss": 2.9297,
      "step": 215626
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.412140130996704,
      "learning_rate": 6.068186923044161e-06,
      "loss": 3.0645,
      "step": 215627
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.817033529281616,
      "learning_rate": 6.067368373416081e-06,
      "loss": 2.9925,
      "step": 215628
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0335488319396973,
      "learning_rate": 6.066549878435589e-06,
      "loss": 2.9135,
      "step": 215629
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7590253353118896,
      "learning_rate": 6.065731438102916e-06,
      "loss": 3.0457,
      "step": 215630
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.101836919784546,
      "learning_rate": 6.064913052418097e-06,
      "loss": 2.8445,
      "step": 215631
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.658440113067627,
      "learning_rate": 6.06409472138143e-06,
      "loss": 2.7604,
      "step": 215632
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.291341781616211,
      "learning_rate": 6.0632764449929485e-06,
      "loss": 2.9038,
      "step": 215633
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8768258094787598,
      "learning_rate": 6.0624582232528865e-06,
      "loss": 2.7939,
      "step": 215634
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1507058143615723,
      "learning_rate": 6.061640056161343e-06,
      "loss": 2.9502,
      "step": 215635
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.5599896907806396,
      "learning_rate": 6.060821943718519e-06,
      "loss": 3.1224,
      "step": 215636
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9756062030792236,
      "learning_rate": 6.0600038859244806e-06,
      "loss": 3.0058,
      "step": 215637
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.334946870803833,
      "learning_rate": 6.059185882779494e-06,
      "loss": 2.821,
      "step": 215638
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7537717819213867,
      "learning_rate": 6.058367934283592e-06,
      "loss": 2.7644,
      "step": 215639
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.215747594833374,
      "learning_rate": 6.057550040437042e-06,
      "loss": 2.8185,
      "step": 215640
    },
    {
      "epoch": 2.81,
      "grad_norm": 5.789622783660889,
      "learning_rate": 6.056732201239911e-06,
      "loss": 3.053,
      "step": 215641
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2320950031280518,
      "learning_rate": 6.0559144166923974e-06,
      "loss": 2.9635,
      "step": 215642
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.839017868041992,
      "learning_rate": 6.055096686794636e-06,
      "loss": 3.0062,
      "step": 215643
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0121002197265625,
      "learning_rate": 6.054279011546792e-06,
      "loss": 2.959,
      "step": 215644
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8481452465057373,
      "learning_rate": 6.053461390948966e-06,
      "loss": 3.0401,
      "step": 215645
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1382319927215576,
      "learning_rate": 6.052643825001391e-06,
      "loss": 2.7469,
      "step": 215646
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8087611198425293,
      "learning_rate": 6.051826313704167e-06,
      "loss": 3.0965,
      "step": 215647
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.752983570098877,
      "learning_rate": 6.05100885705746e-06,
      "loss": 2.9252,
      "step": 215648
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6671884059906006,
      "learning_rate": 6.050191455061404e-06,
      "loss": 3.0445,
      "step": 215649
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.094587802886963,
      "learning_rate": 6.049374107716165e-06,
      "loss": 2.8866,
      "step": 215650
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.098379135131836,
      "learning_rate": 6.04855681502191e-06,
      "loss": 2.9239,
      "step": 215651
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.222726345062256,
      "learning_rate": 6.047739576978772e-06,
      "loss": 2.7435,
      "step": 215652
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6989288330078125,
      "learning_rate": 6.0469223935869175e-06,
      "loss": 2.9148,
      "step": 215653
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.476048707962036,
      "learning_rate": 6.046105264846479e-06,
      "loss": 3.1735,
      "step": 215654
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.918013095855713,
      "learning_rate": 6.0452881907576245e-06,
      "loss": 2.8738,
      "step": 215655
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.759446859359741,
      "learning_rate": 6.044471171320486e-06,
      "loss": 2.7545,
      "step": 215656
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0232906341552734,
      "learning_rate": 6.043654206535231e-06,
      "loss": 2.8034,
      "step": 215657
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6882739067077637,
      "learning_rate": 6.042837296402025e-06,
      "loss": 2.915,
      "step": 215658
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0567848682403564,
      "learning_rate": 6.042020440920969e-06,
      "loss": 2.856,
      "step": 215659
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.594625949859619,
      "learning_rate": 6.0412036400923295e-06,
      "loss": 2.8274,
      "step": 215660
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.967924118041992,
      "learning_rate": 6.040386893916105e-06,
      "loss": 3.0034,
      "step": 215661
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.020247220993042,
      "learning_rate": 6.039570202392563e-06,
      "loss": 3.0368,
      "step": 215662
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.6897075176239014,
      "learning_rate": 6.03875356552177e-06,
      "loss": 2.7879,
      "step": 215663
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.6175906658172607,
      "learning_rate": 6.037936983303959e-06,
      "loss": 2.715,
      "step": 215664
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.228100538253784,
      "learning_rate": 6.0371204557391976e-06,
      "loss": 2.9837,
      "step": 215665
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.125905990600586,
      "learning_rate": 6.036303982827751e-06,
      "loss": 2.6332,
      "step": 215666
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.134307622909546,
      "learning_rate": 6.035487564569652e-06,
      "loss": 2.6432,
      "step": 215667
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.5370724201202393,
      "learning_rate": 6.034671200965135e-06,
      "loss": 2.6768,
      "step": 215668
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8529481887817383,
      "learning_rate": 6.033854892014301e-06,
      "loss": 2.6122,
      "step": 215669
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8646535873413086,
      "learning_rate": 6.033038637717347e-06,
      "loss": 2.7696,
      "step": 215670
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3406710624694824,
      "learning_rate": 6.0322224380743745e-06,
      "loss": 2.9364,
      "step": 215671
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7385385036468506,
      "learning_rate": 6.031406293085617e-06,
      "loss": 2.7963,
      "step": 215672
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.547210693359375,
      "learning_rate": 6.030590202751107e-06,
      "loss": 2.8908,
      "step": 215673
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.5728166103363037,
      "learning_rate": 6.029774167071077e-06,
      "loss": 2.9763,
      "step": 215674
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.184123992919922,
      "learning_rate": 6.02895818604563e-06,
      "loss": 3.0712,
      "step": 215675
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1906867027282715,
      "learning_rate": 6.0281422596749955e-06,
      "loss": 3.0693,
      "step": 215676
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.21769380569458,
      "learning_rate": 6.027326387959241e-06,
      "loss": 3.0766,
      "step": 215677
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.852574110031128,
      "learning_rate": 6.026510570898635e-06,
      "loss": 2.9095,
      "step": 215678
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.8548130989074707,
      "learning_rate": 6.025694808493175e-06,
      "loss": 2.7563,
      "step": 215679
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.520224094390869,
      "learning_rate": 6.024879100743097e-06,
      "loss": 2.9943,
      "step": 215680
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8580150604248047,
      "learning_rate": 6.0240634476485305e-06,
      "loss": 2.906,
      "step": 215681
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.254042387008667,
      "learning_rate": 6.023247849209678e-06,
      "loss": 2.8579,
      "step": 215682
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1520907878875732,
      "learning_rate": 6.0224323054266385e-06,
      "loss": 2.8839,
      "step": 215683
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.031602382659912,
      "learning_rate": 6.0216168162995796e-06,
      "loss": 3.1593,
      "step": 215684
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0526492595672607,
      "learning_rate": 6.020801381828666e-06,
      "loss": 2.9783,
      "step": 215685
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.601922035217285,
      "learning_rate": 6.019986002013999e-06,
      "loss": 3.1304,
      "step": 215686
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9546921253204346,
      "learning_rate": 6.0191706768557775e-06,
      "loss": 2.8628,
      "step": 215687
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.021681308746338,
      "learning_rate": 6.018355406354169e-06,
      "loss": 2.8617,
      "step": 215688
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8592593669891357,
      "learning_rate": 6.01754019050924e-06,
      "loss": 3.0665,
      "step": 215689
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.4731545448303223,
      "learning_rate": 6.016725029321256e-06,
      "loss": 3.0193,
      "step": 215690
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.053978681564331,
      "learning_rate": 6.015909922790285e-06,
      "loss": 2.8156,
      "step": 215691
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2810235023498535,
      "learning_rate": 6.015094870916526e-06,
      "loss": 3.0278,
      "step": 215692
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.9072458744049072,
      "learning_rate": 6.014279873700079e-06,
      "loss": 2.8954,
      "step": 215693
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7959675788879395,
      "learning_rate": 6.013464931141177e-06,
      "loss": 2.7785,
      "step": 215694
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.4826886653900146,
      "learning_rate": 6.012650043239853e-06,
      "loss": 3.028,
      "step": 215695
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.06561017036438,
      "learning_rate": 6.011835209996374e-06,
      "loss": 2.7407,
      "step": 215696
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6180293560028076,
      "learning_rate": 6.0110204314108404e-06,
      "loss": 2.7868,
      "step": 215697
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1272330284118652,
      "learning_rate": 6.010205707483384e-06,
      "loss": 2.9911,
      "step": 215698
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.562084913253784,
      "learning_rate": 6.0093910382142065e-06,
      "loss": 2.8455,
      "step": 215699
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8271126747131348,
      "learning_rate": 6.00857642360344e-06,
      "loss": 3.1802,
      "step": 215700
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3380212783813477,
      "learning_rate": 6.007761863651183e-06,
      "loss": 2.8221,
      "step": 215701
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.5352203845977783,
      "learning_rate": 6.0069473583576725e-06,
      "loss": 2.8274,
      "step": 215702
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0315117835998535,
      "learning_rate": 6.0061329077230045e-06,
      "loss": 2.9089,
      "step": 215703
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.300218343734741,
      "learning_rate": 6.005318511747348e-06,
      "loss": 2.8426,
      "step": 215704
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2558934688568115,
      "learning_rate": 6.004504170430868e-06,
      "loss": 3.0483,
      "step": 215705
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.423489093780518,
      "learning_rate": 6.003689883773699e-06,
      "loss": 2.8996,
      "step": 215706
    },
    {
      "epoch": 2.81,
      "grad_norm": 5.550427436828613,
      "learning_rate": 6.00287565177594e-06,
      "loss": 2.8222,
      "step": 215707
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.6384148597717285,
      "learning_rate": 6.002061474437858e-06,
      "loss": 3.0051,
      "step": 215708
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6689090728759766,
      "learning_rate": 6.00124735175952e-06,
      "loss": 2.7543,
      "step": 215709
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0380053520202637,
      "learning_rate": 6.000433283741091e-06,
      "loss": 3.1347,
      "step": 215710
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8722007274627686,
      "learning_rate": 5.999619270382772e-06,
      "loss": 3.0426,
      "step": 215711
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9696693420410156,
      "learning_rate": 5.998805311684596e-06,
      "loss": 2.7627,
      "step": 215712
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7673988342285156,
      "learning_rate": 5.997991407646863e-06,
      "loss": 2.8247,
      "step": 215713
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8224902153015137,
      "learning_rate": 5.9971775582696394e-06,
      "loss": 3.2013,
      "step": 215714
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7633848190307617,
      "learning_rate": 5.996363763553091e-06,
      "loss": 2.8153,
      "step": 215715
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.9506704807281494,
      "learning_rate": 5.99555002349732e-06,
      "loss": 2.6632,
      "step": 215716
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3027751445770264,
      "learning_rate": 5.99473633810259e-06,
      "loss": 2.8592,
      "step": 215717
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.207562208175659,
      "learning_rate": 5.993922707368937e-06,
      "loss": 2.9383,
      "step": 215718
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.847728967666626,
      "learning_rate": 5.993109131296592e-06,
      "loss": 2.9184,
      "step": 215719
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.51377534866333,
      "learning_rate": 5.992295609885689e-06,
      "loss": 2.8305,
      "step": 215720
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0395395755767822,
      "learning_rate": 5.9914821431363616e-06,
      "loss": 3.2072,
      "step": 215721
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.083176136016846,
      "learning_rate": 5.990668731048742e-06,
      "loss": 2.893,
      "step": 215722
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.6713600158691406,
      "learning_rate": 5.9898553736230315e-06,
      "loss": 3.0199,
      "step": 215723
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.005858898162842,
      "learning_rate": 5.989042070859329e-06,
      "loss": 2.7235,
      "step": 215724
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7077622413635254,
      "learning_rate": 5.988228822757835e-06,
      "loss": 3.0371,
      "step": 215725
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8640897274017334,
      "learning_rate": 5.987415629318681e-06,
      "loss": 3.0416,
      "step": 215726
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0597176551818848,
      "learning_rate": 5.986602490542037e-06,
      "loss": 2.9914,
      "step": 215727
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1958324909210205,
      "learning_rate": 5.985789406427998e-06,
      "loss": 2.7176,
      "step": 215728
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.796729564666748,
      "learning_rate": 5.984976376976769e-06,
      "loss": 2.8885,
      "step": 215729
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9914660453796387,
      "learning_rate": 5.9841634021884464e-06,
      "loss": 2.6943,
      "step": 215730
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.944578170776367,
      "learning_rate": 5.983350482063265e-06,
      "loss": 2.9579,
      "step": 215731
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.4986255168914795,
      "learning_rate": 5.982537616601291e-06,
      "loss": 2.8219,
      "step": 215732
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8195462226867676,
      "learning_rate": 5.981724805802757e-06,
      "loss": 2.751,
      "step": 215733
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9737398624420166,
      "learning_rate": 5.98091204966773e-06,
      "loss": 2.9244,
      "step": 215734
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7518961429595947,
      "learning_rate": 5.980099348196443e-06,
      "loss": 2.992,
      "step": 215735
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.188398838043213,
      "learning_rate": 5.979286701388963e-06,
      "loss": 2.997,
      "step": 215736
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.562366008758545,
      "learning_rate": 5.978474109245523e-06,
      "loss": 3.0721,
      "step": 215737
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.514788866043091,
      "learning_rate": 5.9776615717661904e-06,
      "loss": 2.9435,
      "step": 215738
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.713988780975342,
      "learning_rate": 5.976849088951196e-06,
      "loss": 2.9163,
      "step": 215739
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.612389087677002,
      "learning_rate": 5.976036660800643e-06,
      "loss": 2.8834,
      "step": 215740
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.5897674560546875,
      "learning_rate": 5.975224287314695e-06,
      "loss": 3.0875,
      "step": 215741
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7381527423858643,
      "learning_rate": 5.974411968493487e-06,
      "loss": 2.9628,
      "step": 215742
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.416896343231201,
      "learning_rate": 5.973599704337218e-06,
      "loss": 2.6367,
      "step": 215743
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1763153076171875,
      "learning_rate": 5.972787494845954e-06,
      "loss": 2.6994,
      "step": 215744
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2178335189819336,
      "learning_rate": 5.971975340019996e-06,
      "loss": 3.0967,
      "step": 215745
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.197140693664551,
      "learning_rate": 5.971163239859311e-06,
      "loss": 2.8105,
      "step": 215746
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.7833163738250732,
      "learning_rate": 5.970351194364164e-06,
      "loss": 2.7969,
      "step": 215747
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9851205348968506,
      "learning_rate": 5.969539203534657e-06,
      "loss": 2.7319,
      "step": 215748
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8428919315338135,
      "learning_rate": 5.968727267371021e-06,
      "loss": 2.8296,
      "step": 215749
    },
    {
      "epoch": 2.81,
      "grad_norm": 5.609823226928711,
      "learning_rate": 5.9679153858732896e-06,
      "loss": 2.8439,
      "step": 215750
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.269726276397705,
      "learning_rate": 5.967103559041731e-06,
      "loss": 2.9905,
      "step": 215751
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.797895669937134,
      "learning_rate": 5.966291786876376e-06,
      "loss": 3.0688,
      "step": 215752
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.003814220428467,
      "learning_rate": 5.965480069377459e-06,
      "loss": 2.8938,
      "step": 215753
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0002987384796143,
      "learning_rate": 5.964668406545115e-06,
      "loss": 2.9051,
      "step": 215754
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.5089051723480225,
      "learning_rate": 5.963856798379507e-06,
      "loss": 3.0995,
      "step": 215755
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.602093458175659,
      "learning_rate": 5.963045244880738e-06,
      "loss": 2.7533,
      "step": 215756
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.5150959491729736,
      "learning_rate": 5.962233746049006e-06,
      "loss": 2.6585,
      "step": 215757
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.701558828353882,
      "learning_rate": 5.961422301884444e-06,
      "loss": 2.9859,
      "step": 215758
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.5680816173553467,
      "learning_rate": 5.96061091238722e-06,
      "loss": 2.9339,
      "step": 215759
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1606101989746094,
      "learning_rate": 5.959799577557434e-06,
      "loss": 3.0676,
      "step": 215760
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.695136070251465,
      "learning_rate": 5.958988297395284e-06,
      "loss": 2.8396,
      "step": 215761
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9301252365112305,
      "learning_rate": 5.958177071900905e-06,
      "loss": 2.9146,
      "step": 215762
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.501483917236328,
      "learning_rate": 5.9573659010744955e-06,
      "loss": 2.9973,
      "step": 215763
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.154322624206543,
      "learning_rate": 5.95655478491609e-06,
      "loss": 3.0126,
      "step": 215764
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.702871561050415,
      "learning_rate": 5.955743723425954e-06,
      "loss": 2.8483,
      "step": 215765
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2406811714172363,
      "learning_rate": 5.954932716604189e-06,
      "loss": 3.1526,
      "step": 215766
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0011072158813477,
      "learning_rate": 5.954121764450959e-06,
      "loss": 2.9048,
      "step": 215767
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.28259539604187,
      "learning_rate": 5.953310866966365e-06,
      "loss": 2.7166,
      "step": 215768
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.4173190593719482,
      "learning_rate": 5.952500024150675e-06,
      "loss": 2.9361,
      "step": 215769
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.995692253112793,
      "learning_rate": 5.9516892360039205e-06,
      "loss": 2.9294,
      "step": 215770
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.860668420791626,
      "learning_rate": 5.950878502526302e-06,
      "loss": 2.9265,
      "step": 215771
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.065035820007324,
      "learning_rate": 5.950067823717953e-06,
      "loss": 2.8818,
      "step": 215772
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9131338596343994,
      "learning_rate": 5.949257199579072e-06,
      "loss": 3.1133,
      "step": 215773
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.7869343757629395,
      "learning_rate": 5.948446630109727e-06,
      "loss": 3.0596,
      "step": 215774
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0857443809509277,
      "learning_rate": 5.947636115310151e-06,
      "loss": 2.9844,
      "step": 215775
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.132944345474243,
      "learning_rate": 5.9468256551804764e-06,
      "loss": 2.7419,
      "step": 215776
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6173651218414307,
      "learning_rate": 5.946015249720837e-06,
      "loss": 2.8768,
      "step": 215777
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.975334882736206,
      "learning_rate": 5.945204898931333e-06,
      "loss": 3.055,
      "step": 215778
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.650200843811035,
      "learning_rate": 5.944394602812197e-06,
      "loss": 2.7556,
      "step": 215779
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9473471641540527,
      "learning_rate": 5.943584361363529e-06,
      "loss": 3.0053,
      "step": 215780
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.5390233993530273,
      "learning_rate": 5.942774174585529e-06,
      "loss": 2.8487,
      "step": 215781
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.791882276535034,
      "learning_rate": 5.941964042478331e-06,
      "loss": 2.8955,
      "step": 215782
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0735650062561035,
      "learning_rate": 5.941153965042034e-06,
      "loss": 3.0162,
      "step": 215783
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.560912609100342,
      "learning_rate": 5.9403439422768375e-06,
      "loss": 2.706,
      "step": 215784
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7684271335601807,
      "learning_rate": 5.9395339741829086e-06,
      "loss": 2.9202,
      "step": 215785
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.975177764892578,
      "learning_rate": 5.938724060760314e-06,
      "loss": 3.0169,
      "step": 215786
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.48565411567688,
      "learning_rate": 5.93791420200932e-06,
      "loss": 2.7127,
      "step": 215787
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.323322296142578,
      "learning_rate": 5.937104397929992e-06,
      "loss": 3.0538,
      "step": 215788
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2930474281311035,
      "learning_rate": 5.936294648522499e-06,
      "loss": 3.0286,
      "step": 215789
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.91964054107666,
      "learning_rate": 5.935484953787039e-06,
      "loss": 2.9036,
      "step": 215790
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9645302295684814,
      "learning_rate": 5.93467531372368e-06,
      "loss": 2.5602,
      "step": 215791
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9674410820007324,
      "learning_rate": 5.933865728332621e-06,
      "loss": 2.8489,
      "step": 215792
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3912861347198486,
      "learning_rate": 5.933056197614028e-06,
      "loss": 2.9366,
      "step": 215793
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0214436054229736,
      "learning_rate": 5.932246721568035e-06,
      "loss": 2.7677,
      "step": 215794
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.340027093887329,
      "learning_rate": 5.931437300194741e-06,
      "loss": 2.7317,
      "step": 215795
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.047107219696045,
      "learning_rate": 5.930627933494381e-06,
      "loss": 2.8921,
      "step": 215796
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.369422197341919,
      "learning_rate": 5.929818621467053e-06,
      "loss": 3.0931,
      "step": 215797
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0470612049102783,
      "learning_rate": 5.929009364112958e-06,
      "loss": 2.9082,
      "step": 215798
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8664286136627197,
      "learning_rate": 5.928200161432195e-06,
      "loss": 3.0816,
      "step": 215799
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0245602130889893,
      "learning_rate": 5.9273910134249316e-06,
      "loss": 2.8285,
      "step": 215800
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9759082794189453,
      "learning_rate": 5.926581920091267e-06,
      "loss": 3.2298,
      "step": 215801
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.948490619659424,
      "learning_rate": 5.925772881431468e-06,
      "loss": 2.8155,
      "step": 215802
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.730440616607666,
      "learning_rate": 5.924963897445567e-06,
      "loss": 3.2629,
      "step": 215803
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.693477630615234,
      "learning_rate": 5.924154968133798e-06,
      "loss": 2.6375,
      "step": 215804
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1534054279327393,
      "learning_rate": 5.923346093496262e-06,
      "loss": 2.967,
      "step": 215805
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1173365116119385,
      "learning_rate": 5.92253727353319e-06,
      "loss": 2.8476,
      "step": 215806
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.603877544403076,
      "learning_rate": 5.921728508244583e-06,
      "loss": 2.9725,
      "step": 215807
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0121212005615234,
      "learning_rate": 5.920919797630741e-06,
      "loss": 2.8082,
      "step": 215808
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.211634874343872,
      "learning_rate": 5.920111141691697e-06,
      "loss": 3.092,
      "step": 215809
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.789074420928955,
      "learning_rate": 5.919302540427684e-06,
      "loss": 2.8059,
      "step": 215810
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.5831494331359863,
      "learning_rate": 5.918493993838802e-06,
      "loss": 2.6817,
      "step": 215811
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0245726108551025,
      "learning_rate": 5.917685501925284e-06,
      "loss": 2.7072,
      "step": 215812
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.642697334289551,
      "learning_rate": 5.916877064687165e-06,
      "loss": 3.0721,
      "step": 215813
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.998302936553955,
      "learning_rate": 5.916068682124675e-06,
      "loss": 2.8268,
      "step": 215814
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.076395273208618,
      "learning_rate": 5.915260354237916e-06,
      "loss": 2.8169,
      "step": 215815
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8016037940979004,
      "learning_rate": 5.914452081027088e-06,
      "loss": 2.7839,
      "step": 215816
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7402801513671875,
      "learning_rate": 5.913643862492256e-06,
      "loss": 3.0753,
      "step": 215817
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.600383758544922,
      "learning_rate": 5.912835698633722e-06,
      "loss": 2.8241,
      "step": 215818
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.908745527267456,
      "learning_rate": 5.912027589451485e-06,
      "loss": 2.9783,
      "step": 215819
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.6396636962890625,
      "learning_rate": 5.911219534945777e-06,
      "loss": 3.1047,
      "step": 215820
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.851677656173706,
      "learning_rate": 5.9104115351166655e-06,
      "loss": 3.0207,
      "step": 215821
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.34187650680542,
      "learning_rate": 5.909603589964418e-06,
      "loss": 2.807,
      "step": 215822
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.50640606880188,
      "learning_rate": 5.9087956994890995e-06,
      "loss": 2.6567,
      "step": 215823
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9155404567718506,
      "learning_rate": 5.907987863690944e-06,
      "loss": 2.9694,
      "step": 215824
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.4304323196411133,
      "learning_rate": 5.907180082569984e-06,
      "loss": 2.8713,
      "step": 215825
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9226908683776855,
      "learning_rate": 5.906372356126454e-06,
      "loss": 3.0975,
      "step": 215826
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8028483390808105,
      "learning_rate": 5.905564684360453e-06,
      "loss": 2.8964,
      "step": 215827
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7660892009735107,
      "learning_rate": 5.904757067272181e-06,
      "loss": 2.8171,
      "step": 215828
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.068905830383301,
      "learning_rate": 5.903949504861772e-06,
      "loss": 2.9904,
      "step": 215829
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.79814076423645,
      "learning_rate": 5.90314199712939e-06,
      "loss": 2.7568,
      "step": 215830
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8443546295166016,
      "learning_rate": 5.902334544075138e-06,
      "loss": 2.7205,
      "step": 215831
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.760507345199585,
      "learning_rate": 5.901527145699181e-06,
      "loss": 3.1447,
      "step": 215832
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.5193846225738525,
      "learning_rate": 5.900719802001685e-06,
      "loss": 2.7672,
      "step": 215833
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8489246368408203,
      "learning_rate": 5.899912512982819e-06,
      "loss": 2.9045,
      "step": 215834
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.11448335647583,
      "learning_rate": 5.899105278642679e-06,
      "loss": 2.9268,
      "step": 215835
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8872759342193604,
      "learning_rate": 5.898298098981502e-06,
      "loss": 2.8974,
      "step": 215836
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.035841226577759,
      "learning_rate": 5.897490973999352e-06,
      "loss": 2.9375,
      "step": 215837
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3323302268981934,
      "learning_rate": 5.896683903696398e-06,
      "loss": 3.1067,
      "step": 215838
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2389590740203857,
      "learning_rate": 5.895876888072804e-06,
      "loss": 2.9349,
      "step": 215839
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8685083389282227,
      "learning_rate": 5.8950699271287375e-06,
      "loss": 2.9007,
      "step": 215840
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.047891855239868,
      "learning_rate": 5.894263020864299e-06,
      "loss": 2.6096,
      "step": 215841
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.720400333404541,
      "learning_rate": 5.89345616927972e-06,
      "loss": 3.1404,
      "step": 215842
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8438117504119873,
      "learning_rate": 5.892649372375036e-06,
      "loss": 2.7841,
      "step": 215843
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.017352819442749,
      "learning_rate": 5.891842630150512e-06,
      "loss": 2.6958,
      "step": 215844
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1296823024749756,
      "learning_rate": 5.891035942606182e-06,
      "loss": 2.8813,
      "step": 215845
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.165684700012207,
      "learning_rate": 5.890229309742311e-06,
      "loss": 3.0552,
      "step": 215846
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1936490535736084,
      "learning_rate": 5.889422731559001e-06,
      "loss": 2.9488,
      "step": 215847
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9042844772338867,
      "learning_rate": 5.8886162080564185e-06,
      "loss": 2.9041,
      "step": 215848
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.708123207092285,
      "learning_rate": 5.887809739234628e-06,
      "loss": 2.9437,
      "step": 215849
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.380005359649658,
      "learning_rate": 5.887003325093898e-06,
      "loss": 2.9672,
      "step": 215850
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7199056148529053,
      "learning_rate": 5.886196965634293e-06,
      "loss": 3.1306,
      "step": 215851
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.720381736755371,
      "learning_rate": 5.885390660856015e-06,
      "loss": 2.7769,
      "step": 215852
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.220548629760742,
      "learning_rate": 5.884584410759163e-06,
      "loss": 2.8391,
      "step": 215853
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6830251216888428,
      "learning_rate": 5.8837782153440035e-06,
      "loss": 2.8992,
      "step": 215854
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.919119358062744,
      "learning_rate": 5.882972074610504e-06,
      "loss": 2.8667,
      "step": 215855
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.688190221786499,
      "learning_rate": 5.882165988558962e-06,
      "loss": 3.1558,
      "step": 215856
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7743356227874756,
      "learning_rate": 5.881359957189446e-06,
      "loss": 2.9153,
      "step": 215857
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0749831199645996,
      "learning_rate": 5.880553980502189e-06,
      "loss": 3.0192,
      "step": 215858
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.779054880142212,
      "learning_rate": 5.879748058497225e-06,
      "loss": 2.6071,
      "step": 215859
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6511588096618652,
      "learning_rate": 5.878942191174818e-06,
      "loss": 2.9741,
      "step": 215860
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0176842212677,
      "learning_rate": 5.87813637853507e-06,
      "loss": 2.7904,
      "step": 215861
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.526947021484375,
      "learning_rate": 5.877330620578114e-06,
      "loss": 2.8978,
      "step": 215862
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.5791544914245605,
      "learning_rate": 5.8765249173040815e-06,
      "loss": 2.7646,
      "step": 215863
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.123521327972412,
      "learning_rate": 5.875719268713208e-06,
      "loss": 3.2405,
      "step": 215864
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.051056385040283,
      "learning_rate": 5.8749136748055255e-06,
      "loss": 2.7117,
      "step": 215865
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.167560338973999,
      "learning_rate": 5.8741081355813015e-06,
      "loss": 3.1252,
      "step": 215866
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.214694023132324,
      "learning_rate": 5.8733026510406344e-06,
      "loss": 2.8124,
      "step": 215867
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8856136798858643,
      "learning_rate": 5.872497221183659e-06,
      "loss": 3.0838,
      "step": 215868
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9832499027252197,
      "learning_rate": 5.87169184601054e-06,
      "loss": 3.0014,
      "step": 215869
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1774685382843018,
      "learning_rate": 5.8708865255214124e-06,
      "loss": 2.9645,
      "step": 215870
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.59177827835083,
      "learning_rate": 5.8700812597164415e-06,
      "loss": 2.924,
      "step": 215871
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8985602855682373,
      "learning_rate": 5.869276048595795e-06,
      "loss": 3.2443,
      "step": 215872
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9607162475585938,
      "learning_rate": 5.868470892159604e-06,
      "loss": 3.0126,
      "step": 215873
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.339073896408081,
      "learning_rate": 5.867665790407971e-06,
      "loss": 2.8716,
      "step": 215874
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.677030563354492,
      "learning_rate": 5.866860743341128e-06,
      "loss": 2.8144,
      "step": 215875
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3005754947662354,
      "learning_rate": 5.866055750959209e-06,
      "loss": 3.0688,
      "step": 215876
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.304868698120117,
      "learning_rate": 5.865250813262279e-06,
      "loss": 2.7652,
      "step": 215877
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.723085880279541,
      "learning_rate": 5.864445930250605e-06,
      "loss": 2.9236,
      "step": 215878
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7354304790496826,
      "learning_rate": 5.863641101924288e-06,
      "loss": 2.814,
      "step": 215879
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.87296199798584,
      "learning_rate": 5.862836328283427e-06,
      "loss": 3.0125,
      "step": 215880
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9471964836120605,
      "learning_rate": 5.862031609328255e-06,
      "loss": 2.969,
      "step": 215881
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8575470447540283,
      "learning_rate": 5.861226945058839e-06,
      "loss": 2.9442,
      "step": 215882
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.156808853149414,
      "learning_rate": 5.860422335475412e-06,
      "loss": 3.0388,
      "step": 215883
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.258610725402832,
      "learning_rate": 5.859617780578074e-06,
      "loss": 2.986,
      "step": 215884
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9790782928466797,
      "learning_rate": 5.858813280366992e-06,
      "loss": 2.8842,
      "step": 215885
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.054480791091919,
      "learning_rate": 5.858008834842298e-06,
      "loss": 3.148,
      "step": 215886
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3002240657806396,
      "learning_rate": 5.85720444400416e-06,
      "loss": 2.9811,
      "step": 215887
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0126073360443115,
      "learning_rate": 5.85640010785271e-06,
      "loss": 2.8057,
      "step": 215888
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9654924869537354,
      "learning_rate": 5.855595826388115e-06,
      "loss": 2.73,
      "step": 215889
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.6490468978881836,
      "learning_rate": 5.854791599610542e-06,
      "loss": 2.9748,
      "step": 215890
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.149024724960327,
      "learning_rate": 5.8539874275200905e-06,
      "loss": 2.9919,
      "step": 215891
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.4579710960388184,
      "learning_rate": 5.853183310116927e-06,
      "loss": 3.0269,
      "step": 215892
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.5926618576049805,
      "learning_rate": 5.852379247401217e-06,
      "loss": 2.878,
      "step": 215893
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.292328119277954,
      "learning_rate": 5.8515752393730965e-06,
      "loss": 2.8821,
      "step": 215894
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.747880220413208,
      "learning_rate": 5.8507712860327295e-06,
      "loss": 2.838,
      "step": 215895
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8032431602478027,
      "learning_rate": 5.84996738738025e-06,
      "loss": 3.0563,
      "step": 215896
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.138862133026123,
      "learning_rate": 5.849163543415858e-06,
      "loss": 2.9539,
      "step": 215897
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.776607036590576,
      "learning_rate": 5.848359754139587e-06,
      "loss": 3.0671,
      "step": 215898
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.7834718227386475,
      "learning_rate": 5.8475560195517026e-06,
      "loss": 2.8539,
      "step": 215899
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1594460010528564,
      "learning_rate": 5.846752339652272e-06,
      "loss": 2.8343,
      "step": 215900
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1151809692382812,
      "learning_rate": 5.845948714441528e-06,
      "loss": 2.8753,
      "step": 215901
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.060398578643799,
      "learning_rate": 5.845145143919538e-06,
      "loss": 2.6177,
      "step": 215902
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.154340982437134,
      "learning_rate": 5.844341628086535e-06,
      "loss": 2.9218,
      "step": 215903
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.899040937423706,
      "learning_rate": 5.843538166942585e-06,
      "loss": 2.7203,
      "step": 215904
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.4812123775482178,
      "learning_rate": 5.842734760487888e-06,
      "loss": 2.9971,
      "step": 215905
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.711498260498047,
      "learning_rate": 5.841931408722578e-06,
      "loss": 2.7255,
      "step": 215906
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.85526442527771,
      "learning_rate": 5.841128111646787e-06,
      "loss": 2.8724,
      "step": 215907
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.018009901046753,
      "learning_rate": 5.840324869260682e-06,
      "loss": 3.1473,
      "step": 215908
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0855965614318848,
      "learning_rate": 5.839521681564463e-06,
      "loss": 2.8449,
      "step": 215909
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.196836471557617,
      "learning_rate": 5.838718548558197e-06,
      "loss": 2.925,
      "step": 215910
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2005374431610107,
      "learning_rate": 5.837915470242049e-06,
      "loss": 2.837,
      "step": 215911
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.630565881729126,
      "learning_rate": 5.837112446616188e-06,
      "loss": 2.77,
      "step": 215912
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.855963706970215,
      "learning_rate": 5.836309477680778e-06,
      "loss": 3.0409,
      "step": 215913
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8822808265686035,
      "learning_rate": 5.835506563435921e-06,
      "loss": 2.8682,
      "step": 215914
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.617403268814087,
      "learning_rate": 5.834703703881882e-06,
      "loss": 2.9941,
      "step": 215915
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.023606300354004,
      "learning_rate": 5.833900899018629e-06,
      "loss": 2.8356,
      "step": 215916
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.344707012176514,
      "learning_rate": 5.83309814884646e-06,
      "loss": 2.5256,
      "step": 215917
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.416633129119873,
      "learning_rate": 5.8322954533654434e-06,
      "loss": 2.9173,
      "step": 215918
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.734847068786621,
      "learning_rate": 5.831492812575744e-06,
      "loss": 2.9298,
      "step": 215919
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6354665756225586,
      "learning_rate": 5.83069022647753e-06,
      "loss": 2.9884,
      "step": 215920
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.070128917694092,
      "learning_rate": 5.829887695071001e-06,
      "loss": 2.7547,
      "step": 215921
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.330465316772461,
      "learning_rate": 5.8290852183561885e-06,
      "loss": 3.0303,
      "step": 215922
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0744643211364746,
      "learning_rate": 5.828282796333328e-06,
      "loss": 3.1456,
      "step": 215923
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1581308841705322,
      "learning_rate": 5.827480429002518e-06,
      "loss": 3.0486,
      "step": 215924
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.085771322250366,
      "learning_rate": 5.8266781163639585e-06,
      "loss": 2.8498,
      "step": 215925
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.014779806137085,
      "learning_rate": 5.82587585841775e-06,
      "loss": 2.9238,
      "step": 215926
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.782632827758789,
      "learning_rate": 5.825073655164059e-06,
      "loss": 2.6664,
      "step": 215927
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.751025438308716,
      "learning_rate": 5.824271506603084e-06,
      "loss": 3.0947,
      "step": 215928
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.406500339508057,
      "learning_rate": 5.823469412734893e-06,
      "loss": 2.8278,
      "step": 215929
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.628466844558716,
      "learning_rate": 5.822667373559653e-06,
      "loss": 2.9815,
      "step": 215930
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.256601572036743,
      "learning_rate": 5.821865389077596e-06,
      "loss": 2.8588,
      "step": 215931
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.190944194793701,
      "learning_rate": 5.821063459288755e-06,
      "loss": 2.8954,
      "step": 215932
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.752224445343018,
      "learning_rate": 5.820261584193331e-06,
      "loss": 2.5838,
      "step": 215933
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.241358995437622,
      "learning_rate": 5.819459763791523e-06,
      "loss": 2.7412,
      "step": 215934
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.5422210693359375,
      "learning_rate": 5.818657998083398e-06,
      "loss": 2.8874,
      "step": 215935
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.306887149810791,
      "learning_rate": 5.817856287069122e-06,
      "loss": 3.0919,
      "step": 215936
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.926346778869629,
      "learning_rate": 5.817054630748863e-06,
      "loss": 2.9283,
      "step": 215937
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.20601224899292,
      "learning_rate": 5.816253029122786e-06,
      "loss": 3.0934,
      "step": 215938
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3798906803131104,
      "learning_rate": 5.815451482191025e-06,
      "loss": 2.7683,
      "step": 215939
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.343158483505249,
      "learning_rate": 5.814649989953713e-06,
      "loss": 2.7474,
      "step": 215940
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.487873554229736,
      "learning_rate": 5.813848552411016e-06,
      "loss": 2.9609,
      "step": 215941
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1495227813720703,
      "learning_rate": 5.813047169563067e-06,
      "loss": 2.7861,
      "step": 215942
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9131722450256348,
      "learning_rate": 5.812245841410035e-06,
      "loss": 3.0156,
      "step": 215943
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.017017126083374,
      "learning_rate": 5.811444567952017e-06,
      "loss": 2.8632,
      "step": 215944
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.996330499649048,
      "learning_rate": 5.810643349189281e-06,
      "loss": 3.0141,
      "step": 215945
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9707930088043213,
      "learning_rate": 5.80984218512186e-06,
      "loss": 3.1547,
      "step": 215946
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9938364028930664,
      "learning_rate": 5.809041075749954e-06,
      "loss": 2.7657,
      "step": 215947
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1328296661376953,
      "learning_rate": 5.808240021073662e-06,
      "loss": 2.878,
      "step": 215948
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7719805240631104,
      "learning_rate": 5.807439021093219e-06,
      "loss": 3.1287,
      "step": 215949
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6371476650238037,
      "learning_rate": 5.806638075808689e-06,
      "loss": 2.8745,
      "step": 215950
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.145855188369751,
      "learning_rate": 5.805837185220274e-06,
      "loss": 2.9882,
      "step": 215951
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.960052251815796,
      "learning_rate": 5.8050363493281405e-06,
      "loss": 2.8224,
      "step": 215952
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6633598804473877,
      "learning_rate": 5.804235568132387e-06,
      "loss": 2.6519,
      "step": 215953
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0057313442230225,
      "learning_rate": 5.803434841633148e-06,
      "loss": 2.9816,
      "step": 215954
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.82680082321167,
      "learning_rate": 5.8026341698306224e-06,
      "loss": 2.9756,
      "step": 215955
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0155341625213623,
      "learning_rate": 5.80183355272491e-06,
      "loss": 2.8287,
      "step": 215956
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.750932216644287,
      "learning_rate": 5.801032990316245e-06,
      "loss": 2.9094,
      "step": 215957
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.5771899223327637,
      "learning_rate": 5.800232482604694e-06,
      "loss": 2.988,
      "step": 215958
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.779210329055786,
      "learning_rate": 5.7994320295904226e-06,
      "loss": 2.6268,
      "step": 215959
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6846251487731934,
      "learning_rate": 5.798631631273598e-06,
      "loss": 2.998,
      "step": 215960
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.86647367477417,
      "learning_rate": 5.797831287654387e-06,
      "loss": 2.8945,
      "step": 215961
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.609375476837158,
      "learning_rate": 5.797030998732888e-06,
      "loss": 2.8679,
      "step": 215962
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.337031126022339,
      "learning_rate": 5.796230764509269e-06,
      "loss": 2.8805,
      "step": 215963
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8982532024383545,
      "learning_rate": 5.795430584983729e-06,
      "loss": 2.9328,
      "step": 215964
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.005079507827759,
      "learning_rate": 5.794630460156302e-06,
      "loss": 3.0694,
      "step": 215965
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.864169120788574,
      "learning_rate": 5.793830390027254e-06,
      "loss": 2.7181,
      "step": 215966
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.8001463413238525,
      "learning_rate": 5.793030374596652e-06,
      "loss": 2.9798,
      "step": 215967
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.833794355392456,
      "learning_rate": 5.7922304138647295e-06,
      "loss": 3.0711,
      "step": 215968
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.738565683364868,
      "learning_rate": 5.791430507831552e-06,
      "loss": 3.0533,
      "step": 215969
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.4219610691070557,
      "learning_rate": 5.79063065649732e-06,
      "loss": 3.0976,
      "step": 215970
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9727783203125,
      "learning_rate": 5.7898308598621324e-06,
      "loss": 2.7704,
      "step": 215971
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2868592739105225,
      "learning_rate": 5.789031117926191e-06,
      "loss": 2.9648,
      "step": 215972
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.610506057739258,
      "learning_rate": 5.788231430689594e-06,
      "loss": 2.7838,
      "step": 215973
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.5684595108032227,
      "learning_rate": 5.787431798152575e-06,
      "loss": 2.5797,
      "step": 215974
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1128854751586914,
      "learning_rate": 5.7866322203152015e-06,
      "loss": 3.0261,
      "step": 215975
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7659857273101807,
      "learning_rate": 5.785832697177639e-06,
      "loss": 3.0068,
      "step": 215976
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.5651514530181885,
      "learning_rate": 5.785033228740022e-06,
      "loss": 2.795,
      "step": 215977
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.051511287689209,
      "learning_rate": 5.784233815002548e-06,
      "loss": 3.0462,
      "step": 215978
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.8914294242858887,
      "learning_rate": 5.783434455965319e-06,
      "loss": 2.9589,
      "step": 215979
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.978123426437378,
      "learning_rate": 5.7826351516285345e-06,
      "loss": 2.9509,
      "step": 215980
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7837846279144287,
      "learning_rate": 5.78183590199226e-06,
      "loss": 3.039,
      "step": 215981
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0194196701049805,
      "learning_rate": 5.7810367070567636e-06,
      "loss": 2.6327,
      "step": 215982
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.845639228820801,
      "learning_rate": 5.780237566822077e-06,
      "loss": 2.78,
      "step": 215983
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.439605236053467,
      "learning_rate": 5.7794384812884344e-06,
      "loss": 2.7892,
      "step": 215984
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.4103968143463135,
      "learning_rate": 5.778639450455902e-06,
      "loss": 2.7059,
      "step": 215985
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.5280447006225586,
      "learning_rate": 5.777840474324713e-06,
      "loss": 2.9213,
      "step": 215986
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0238258838653564,
      "learning_rate": 5.777041552894967e-06,
      "loss": 3.2749,
      "step": 215987
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.4428060054779053,
      "learning_rate": 5.776242686166832e-06,
      "loss": 2.731,
      "step": 215988
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.5218679904937744,
      "learning_rate": 5.775443874140439e-06,
      "loss": 2.8382,
      "step": 215989
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.9478979110717773,
      "learning_rate": 5.7746451168159565e-06,
      "loss": 2.9535,
      "step": 215990
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.044447422027588,
      "learning_rate": 5.773846414193484e-06,
      "loss": 2.5886,
      "step": 215991
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.780311107635498,
      "learning_rate": 5.773047766273253e-06,
      "loss": 2.9449,
      "step": 215992
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.616978168487549,
      "learning_rate": 5.7722491730552985e-06,
      "loss": 2.768,
      "step": 215993
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6440699100494385,
      "learning_rate": 5.77145063453992e-06,
      "loss": 3.1328,
      "step": 215994
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.852123975753784,
      "learning_rate": 5.770652150727151e-06,
      "loss": 2.7803,
      "step": 215995
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8320281505584717,
      "learning_rate": 5.769853721617157e-06,
      "loss": 2.8439,
      "step": 215996
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0235846042633057,
      "learning_rate": 5.769055347210105e-06,
      "loss": 2.9854,
      "step": 215997
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2342700958251953,
      "learning_rate": 5.768257027506163e-06,
      "loss": 2.8495,
      "step": 215998
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9531407356262207,
      "learning_rate": 5.7674587625053944e-06,
      "loss": 3.1337,
      "step": 215999
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.55430006980896,
      "learning_rate": 5.766660552208069e-06,
      "loss": 3.1272,
      "step": 216000
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.193495512008667,
      "learning_rate": 5.765862396614285e-06,
      "loss": 3.0695,
      "step": 216001
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.4107401371002197,
      "learning_rate": 5.765064295724142e-06,
      "loss": 3.0466,
      "step": 216002
    },
    {
      "epoch": 2.81,
      "grad_norm": 5.842474460601807,
      "learning_rate": 5.764266249537841e-06,
      "loss": 2.7989,
      "step": 216003
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.393872022628784,
      "learning_rate": 5.763468258055514e-06,
      "loss": 2.8222,
      "step": 216004
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.4162094593048096,
      "learning_rate": 5.762670321277296e-06,
      "loss": 2.8634,
      "step": 216005
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7075231075286865,
      "learning_rate": 5.761872439203386e-06,
      "loss": 2.8924,
      "step": 216006
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7995829582214355,
      "learning_rate": 5.761074611833882e-06,
      "loss": 2.9285,
      "step": 216007
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.099555492401123,
      "learning_rate": 5.7602768391689865e-06,
      "loss": 2.9404,
      "step": 216008
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0205814838409424,
      "learning_rate": 5.759479121208732e-06,
      "loss": 2.9299,
      "step": 216009
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.95888352394104,
      "learning_rate": 5.758681457953418e-06,
      "loss": 2.9526,
      "step": 216010
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.510139226913452,
      "learning_rate": 5.757883849403044e-06,
      "loss": 2.7218,
      "step": 216011
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8475332260131836,
      "learning_rate": 5.757086295557911e-06,
      "loss": 2.9968,
      "step": 216012
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.219366073608398,
      "learning_rate": 5.756288796418051e-06,
      "loss": 2.9397,
      "step": 216013
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0002713203430176,
      "learning_rate": 5.755491351983665e-06,
      "loss": 2.7468,
      "step": 216014
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7532923221588135,
      "learning_rate": 5.754693962254853e-06,
      "loss": 2.9691,
      "step": 216015
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2107231616973877,
      "learning_rate": 5.753896627231846e-06,
      "loss": 3.0608,
      "step": 216016
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9482860565185547,
      "learning_rate": 5.753099346914713e-06,
      "loss": 3.149,
      "step": 216017
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.640436887741089,
      "learning_rate": 5.75230212130362e-06,
      "loss": 2.9882,
      "step": 216018
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2721481323242188,
      "learning_rate": 5.7515049503987665e-06,
      "loss": 2.8165,
      "step": 216019
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.953472137451172,
      "learning_rate": 5.750707834200252e-06,
      "loss": 2.9843,
      "step": 216020
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9551308155059814,
      "learning_rate": 5.7499107727082105e-06,
      "loss": 2.7215,
      "step": 216021
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9643971920013428,
      "learning_rate": 5.749113765922841e-06,
      "loss": 2.7396,
      "step": 216022
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3673958778381348,
      "learning_rate": 5.748316813844245e-06,
      "loss": 2.7979,
      "step": 216023
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8711535930633545,
      "learning_rate": 5.74751991647262e-06,
      "loss": 2.9371,
      "step": 216024
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2047064304351807,
      "learning_rate": 5.746723073808068e-06,
      "loss": 2.9685,
      "step": 216025
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.6299257278442383,
      "learning_rate": 5.7459262858507886e-06,
      "loss": 2.8819,
      "step": 216026
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.95879864692688,
      "learning_rate": 5.745129552600847e-06,
      "loss": 2.7259,
      "step": 216027
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2670488357543945,
      "learning_rate": 5.744332874058443e-06,
      "loss": 2.952,
      "step": 216028
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.00772762298584,
      "learning_rate": 5.743536250223746e-06,
      "loss": 2.9686,
      "step": 216029
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2519423961639404,
      "learning_rate": 5.742739681096853e-06,
      "loss": 2.8289,
      "step": 216030
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.63608455657959,
      "learning_rate": 5.7419431666779645e-06,
      "loss": 2.9375,
      "step": 216031
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6306753158569336,
      "learning_rate": 5.741146706967215e-06,
      "loss": 2.7695,
      "step": 216032
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1984243392944336,
      "learning_rate": 5.740350301964702e-06,
      "loss": 2.7866,
      "step": 216033
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.5601301193237305,
      "learning_rate": 5.739553951670628e-06,
      "loss": 2.8266,
      "step": 216034
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8536951541900635,
      "learning_rate": 5.738757656085125e-06,
      "loss": 2.9077,
      "step": 216035
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.5719735622406006,
      "learning_rate": 5.7379614152083585e-06,
      "loss": 3.0289,
      "step": 216036
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8219213485717773,
      "learning_rate": 5.7371652290404635e-06,
      "loss": 2.8457,
      "step": 216037
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8154940605163574,
      "learning_rate": 5.736369097581573e-06,
      "loss": 3.0076,
      "step": 216038
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.4372940063476562,
      "learning_rate": 5.735573020831818e-06,
      "loss": 2.9252,
      "step": 216039
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.786165952682495,
      "learning_rate": 5.734776998791402e-06,
      "loss": 2.7513,
      "step": 216040
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.766166925430298,
      "learning_rate": 5.733981031460422e-06,
      "loss": 3.018,
      "step": 216041
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8886961936950684,
      "learning_rate": 5.7331851188390785e-06,
      "loss": 3.0701,
      "step": 216042
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.816998243331909,
      "learning_rate": 5.732389260927506e-06,
      "loss": 2.7599,
      "step": 216043
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1134519577026367,
      "learning_rate": 5.73159345772577e-06,
      "loss": 2.8965,
      "step": 216044
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9836230278015137,
      "learning_rate": 5.730797709234136e-06,
      "loss": 2.8226,
      "step": 216045
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.877012014389038,
      "learning_rate": 5.730002015452706e-06,
      "loss": 2.9665,
      "step": 216046
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.4112043380737305,
      "learning_rate": 5.7292063763816125e-06,
      "loss": 2.8921,
      "step": 216047
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0160775184631348,
      "learning_rate": 5.728410792021021e-06,
      "loss": 2.9765,
      "step": 216048
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8864452838897705,
      "learning_rate": 5.727615262371066e-06,
      "loss": 2.7064,
      "step": 216049
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6891934871673584,
      "learning_rate": 5.72681978743188e-06,
      "loss": 2.9813,
      "step": 216050
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3907968997955322,
      "learning_rate": 5.726024367203663e-06,
      "loss": 2.902,
      "step": 216051
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.281841278076172,
      "learning_rate": 5.7252290016865154e-06,
      "loss": 3.0731,
      "step": 216052
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6915719509124756,
      "learning_rate": 5.724433690880603e-06,
      "loss": 3.032,
      "step": 216053
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.060373306274414,
      "learning_rate": 5.723638434786093e-06,
      "loss": 2.6842,
      "step": 216054
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9054861068725586,
      "learning_rate": 5.722843233403118e-06,
      "loss": 2.7689,
      "step": 216055
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.904191255569458,
      "learning_rate": 5.722048086731779e-06,
      "loss": 2.9476,
      "step": 216056
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.5446395874023438,
      "learning_rate": 5.721252994772307e-06,
      "loss": 3.0114,
      "step": 216057
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.802180290222168,
      "learning_rate": 5.720457957524771e-06,
      "loss": 2.9191,
      "step": 216058
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0577855110168457,
      "learning_rate": 5.719662974989403e-06,
      "loss": 2.965,
      "step": 216059
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.7705440521240234,
      "learning_rate": 5.718868047166269e-06,
      "loss": 2.9212,
      "step": 216060
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1881253719329834,
      "learning_rate": 5.718073174055537e-06,
      "loss": 3.0261,
      "step": 216061
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.17149019241333,
      "learning_rate": 5.71727835565744e-06,
      "loss": 2.8824,
      "step": 216062
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2727625370025635,
      "learning_rate": 5.71648359197201e-06,
      "loss": 2.9016,
      "step": 216063
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0548317432403564,
      "learning_rate": 5.7156888829994145e-06,
      "loss": 2.9934,
      "step": 216064
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.430422067642212,
      "learning_rate": 5.714894228739886e-06,
      "loss": 2.9867,
      "step": 216065
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.898758888244629,
      "learning_rate": 5.714099629193458e-06,
      "loss": 3.1866,
      "step": 216066
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9679248332977295,
      "learning_rate": 5.7133050843603645e-06,
      "loss": 2.9993,
      "step": 216067
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.955718517303467,
      "learning_rate": 5.712510594240738e-06,
      "loss": 2.971,
      "step": 216068
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.114727258682251,
      "learning_rate": 5.711716158834678e-06,
      "loss": 2.7737,
      "step": 216069
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1013147830963135,
      "learning_rate": 5.710921778142385e-06,
      "loss": 2.9576,
      "step": 216070
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2142977714538574,
      "learning_rate": 5.710127452163993e-06,
      "loss": 2.9656,
      "step": 216071
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9128637313842773,
      "learning_rate": 5.7093331808996e-06,
      "loss": 3.0949,
      "step": 216072
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1805644035339355,
      "learning_rate": 5.708538964349441e-06,
      "loss": 2.9712,
      "step": 216073
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0256898403167725,
      "learning_rate": 5.707744802513614e-06,
      "loss": 2.9552,
      "step": 216074
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0095529556274414,
      "learning_rate": 5.706950695392287e-06,
      "loss": 2.8757,
      "step": 216075
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1594607830047607,
      "learning_rate": 5.70615664298556e-06,
      "loss": 2.8712,
      "step": 216076
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2693300247192383,
      "learning_rate": 5.705362645293632e-06,
      "loss": 2.9513,
      "step": 216077
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.2534990310668945,
      "learning_rate": 5.704568702316603e-06,
      "loss": 3.0071,
      "step": 216078
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6066489219665527,
      "learning_rate": 5.703774814054673e-06,
      "loss": 2.9961,
      "step": 216079
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.05147123336792,
      "learning_rate": 5.702980980507976e-06,
      "loss": 2.9007,
      "step": 216080
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0633745193481445,
      "learning_rate": 5.7021872016766445e-06,
      "loss": 2.8884,
      "step": 216081
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.72019362449646,
      "learning_rate": 5.701393477560811e-06,
      "loss": 2.7542,
      "step": 216082
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.552712917327881,
      "learning_rate": 5.700599808160677e-06,
      "loss": 2.965,
      "step": 216083
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7203619480133057,
      "learning_rate": 5.699806193476309e-06,
      "loss": 2.8576,
      "step": 216084
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0103936195373535,
      "learning_rate": 5.699012633507938e-06,
      "loss": 2.9931,
      "step": 216085
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.497694253921509,
      "learning_rate": 5.698219128255699e-06,
      "loss": 3.2132,
      "step": 216086
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.8665666580200195,
      "learning_rate": 5.6974256777196914e-06,
      "loss": 2.8235,
      "step": 216087
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.099989891052246,
      "learning_rate": 5.696632281900082e-06,
      "loss": 2.8075,
      "step": 216088
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2901577949523926,
      "learning_rate": 5.695838940797038e-06,
      "loss": 2.7368,
      "step": 216089
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7131102085113525,
      "learning_rate": 5.695045654410657e-06,
      "loss": 2.9344,
      "step": 216090
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7171878814697266,
      "learning_rate": 5.694252422741141e-06,
      "loss": 2.9932,
      "step": 216091
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.772982597351074,
      "learning_rate": 5.693459245788656e-06,
      "loss": 3.0234,
      "step": 216092
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0921075344085693,
      "learning_rate": 5.692666123553269e-06,
      "loss": 3.0069,
      "step": 216093
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2184200286865234,
      "learning_rate": 5.691873056035179e-06,
      "loss": 2.7081,
      "step": 216094
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.15681529045105,
      "learning_rate": 5.691080043234519e-06,
      "loss": 3.0422,
      "step": 216095
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.152979850769043,
      "learning_rate": 5.690287085151457e-06,
      "loss": 2.967,
      "step": 216096
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.033125877380371,
      "learning_rate": 5.689494181786125e-06,
      "loss": 2.7803,
      "step": 216097
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3618526458740234,
      "learning_rate": 5.68870133313869e-06,
      "loss": 2.6955,
      "step": 216098
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3880865573883057,
      "learning_rate": 5.687908539209252e-06,
      "loss": 2.9028,
      "step": 216099
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9847006797790527,
      "learning_rate": 5.687115799997976e-06,
      "loss": 2.7981,
      "step": 216100
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7729363441467285,
      "learning_rate": 5.686323115505065e-06,
      "loss": 2.8114,
      "step": 216101
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.388890504837036,
      "learning_rate": 5.685530485730582e-06,
      "loss": 2.9241,
      "step": 216102
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.3733625411987305,
      "learning_rate": 5.68473791067473e-06,
      "loss": 2.9017,
      "step": 216103
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.655838966369629,
      "learning_rate": 5.6839453903376405e-06,
      "loss": 3.1977,
      "step": 216104
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9372568130493164,
      "learning_rate": 5.683152924719481e-06,
      "loss": 3.1449,
      "step": 216105
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1411924362182617,
      "learning_rate": 5.68236051382035e-06,
      "loss": 3.1527,
      "step": 216106
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.92852783203125,
      "learning_rate": 5.681568157640448e-06,
      "loss": 2.7254,
      "step": 216107
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9731087684631348,
      "learning_rate": 5.6807758561798424e-06,
      "loss": 3.2918,
      "step": 216108
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.000533103942871,
      "learning_rate": 5.679983609438799e-06,
      "loss": 3.0401,
      "step": 216109
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2103145122528076,
      "learning_rate": 5.679191417417383e-06,
      "loss": 3.1123,
      "step": 216110
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.664292812347412,
      "learning_rate": 5.6783992801157645e-06,
      "loss": 2.7686,
      "step": 216111
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.053966522216797,
      "learning_rate": 5.677607197534073e-06,
      "loss": 3.0015,
      "step": 216112
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2101478576660156,
      "learning_rate": 5.676815169672477e-06,
      "loss": 3.3408,
      "step": 216113
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.6323347091674805,
      "learning_rate": 5.67602319653111e-06,
      "loss": 2.8249,
      "step": 216114
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.157320261001587,
      "learning_rate": 5.675231278110138e-06,
      "loss": 2.9265,
      "step": 216115
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.5883841514587402,
      "learning_rate": 5.674439414409726e-06,
      "loss": 2.8247,
      "step": 216116
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7525248527526855,
      "learning_rate": 5.673647605429943e-06,
      "loss": 3.0143,
      "step": 216117
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.305104732513428,
      "learning_rate": 5.6728558511709876e-06,
      "loss": 2.7328,
      "step": 216118
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0232889652252197,
      "learning_rate": 5.672064151633027e-06,
      "loss": 3.0637,
      "step": 216119
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.7791285514831543,
      "learning_rate": 5.6712725068161605e-06,
      "loss": 3.0126,
      "step": 216120
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.7396044731140137,
      "learning_rate": 5.670480916720588e-06,
      "loss": 2.8169,
      "step": 216121
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0662147998809814,
      "learning_rate": 5.669689381346443e-06,
      "loss": 2.8618,
      "step": 216122
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.32193660736084,
      "learning_rate": 5.6688979006938255e-06,
      "loss": 2.9658,
      "step": 216123
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3375260829925537,
      "learning_rate": 5.668106474762901e-06,
      "loss": 2.9618,
      "step": 216124
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.81044602394104,
      "learning_rate": 5.667315103553871e-06,
      "loss": 2.9347,
      "step": 216125
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.5402863025665283,
      "learning_rate": 5.666523787066801e-06,
      "loss": 3.1011,
      "step": 216126
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.447840690612793,
      "learning_rate": 5.665732525301925e-06,
      "loss": 2.527,
      "step": 216127
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7925610542297363,
      "learning_rate": 5.664941318259308e-06,
      "loss": 2.9194,
      "step": 216128
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8758959770202637,
      "learning_rate": 5.664150165939152e-06,
      "loss": 2.8769,
      "step": 216129
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2000482082366943,
      "learning_rate": 5.663359068341589e-06,
      "loss": 2.9051,
      "step": 216130
    },
    {
      "epoch": 2.81,
      "grad_norm": 5.0282182693481445,
      "learning_rate": 5.662568025466785e-06,
      "loss": 2.9584,
      "step": 216131
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8057429790496826,
      "learning_rate": 5.661777037314808e-06,
      "loss": 3.0675,
      "step": 216132
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0880258083343506,
      "learning_rate": 5.6609861038859225e-06,
      "loss": 3.1003,
      "step": 216133
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3244521617889404,
      "learning_rate": 5.660195225180164e-06,
      "loss": 3.291,
      "step": 216134
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.6052966117858887,
      "learning_rate": 5.659404401197765e-06,
      "loss": 2.9779,
      "step": 216135
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.59118390083313,
      "learning_rate": 5.6586136319388246e-06,
      "loss": 3.0149,
      "step": 216136
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.147528648376465,
      "learning_rate": 5.65782291740351e-06,
      "loss": 3.0608,
      "step": 216137
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.159541606903076,
      "learning_rate": 5.657032257591954e-06,
      "loss": 3.0645,
      "step": 216138
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.769848585128784,
      "learning_rate": 5.656241652504323e-06,
      "loss": 2.8863,
      "step": 216139
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.958003044128418,
      "learning_rate": 5.655451102140751e-06,
      "loss": 2.9751,
      "step": 216140
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.260512351989746,
      "learning_rate": 5.654660606501371e-06,
      "loss": 3.1316,
      "step": 216141
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.7499146461486816,
      "learning_rate": 5.6538701655863826e-06,
      "loss": 2.7158,
      "step": 216142
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.451087236404419,
      "learning_rate": 5.653079779395853e-06,
      "loss": 2.8791,
      "step": 216143
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.677973985671997,
      "learning_rate": 5.652289447930014e-06,
      "loss": 3.1554,
      "step": 216144
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.119213342666626,
      "learning_rate": 5.651499171188934e-06,
      "loss": 3.115,
      "step": 216145
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0023365020751953,
      "learning_rate": 5.650708949172811e-06,
      "loss": 3.052,
      "step": 216146
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8635730743408203,
      "learning_rate": 5.649918781881779e-06,
      "loss": 3.0235,
      "step": 216147
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.449030876159668,
      "learning_rate": 5.6491286693160055e-06,
      "loss": 2.7733,
      "step": 216148
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3416659832000732,
      "learning_rate": 5.648338611475556e-06,
      "loss": 2.8965,
      "step": 216149
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.206819772720337,
      "learning_rate": 5.647548608360697e-06,
      "loss": 2.885,
      "step": 216150
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.727266788482666,
      "learning_rate": 5.646758659971495e-06,
      "loss": 2.9615,
      "step": 216151
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2237415313720703,
      "learning_rate": 5.645968766308085e-06,
      "loss": 2.8413,
      "step": 216152
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.4704747200012207,
      "learning_rate": 5.645178927370697e-06,
      "loss": 2.8324,
      "step": 216153
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1256632804870605,
      "learning_rate": 5.6443891431594e-06,
      "loss": 2.6654,
      "step": 216154
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3426356315612793,
      "learning_rate": 5.64359941367436e-06,
      "loss": 3.0045,
      "step": 216155
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.473752021789551,
      "learning_rate": 5.642809738915743e-06,
      "loss": 2.9171,
      "step": 216156
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.2252118587493896,
      "learning_rate": 5.642020118883683e-06,
      "loss": 2.924,
      "step": 216157
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.307096242904663,
      "learning_rate": 5.641230553578313e-06,
      "loss": 3.0191,
      "step": 216158
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.187657594680786,
      "learning_rate": 5.6404410429998324e-06,
      "loss": 3.0279,
      "step": 216159
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0163769721984863,
      "learning_rate": 5.639651587148342e-06,
      "loss": 2.8896,
      "step": 216160
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.059086322784424,
      "learning_rate": 5.638862186023974e-06,
      "loss": 2.9858,
      "step": 216161
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.958901882171631,
      "learning_rate": 5.638072839626895e-06,
      "loss": 2.8677,
      "step": 216162
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0321521759033203,
      "learning_rate": 5.637283547957272e-06,
      "loss": 3.0084,
      "step": 216163
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.4133176803588867,
      "learning_rate": 5.636494311015205e-06,
      "loss": 3.0953,
      "step": 216164
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.204055070877075,
      "learning_rate": 5.635705128800927e-06,
      "loss": 3.1311,
      "step": 216165
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9999454021453857,
      "learning_rate": 5.634916001314504e-06,
      "loss": 2.9367,
      "step": 216166
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8820810317993164,
      "learning_rate": 5.634126928556071e-06,
      "loss": 2.7169,
      "step": 216167
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1383166313171387,
      "learning_rate": 5.633337910525859e-06,
      "loss": 2.8671,
      "step": 216168
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8132646083831787,
      "learning_rate": 5.632548947223903e-06,
      "loss": 3.0279,
      "step": 216169
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.070801019668579,
      "learning_rate": 5.631760038650468e-06,
      "loss": 2.715,
      "step": 216170
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.3701584339141846,
      "learning_rate": 5.630971184805655e-06,
      "loss": 2.9347,
      "step": 216171
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9050867557525635,
      "learning_rate": 5.6301823856895634e-06,
      "loss": 2.7525,
      "step": 216172
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.215994358062744,
      "learning_rate": 5.629393641302393e-06,
      "loss": 2.8617,
      "step": 216173
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.808429479598999,
      "learning_rate": 5.6286049516442775e-06,
      "loss": 3.0577,
      "step": 216174
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.478884696960449,
      "learning_rate": 5.62781631671535e-06,
      "loss": 3.1918,
      "step": 216175
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.9132542610168457,
      "learning_rate": 5.6270277365157765e-06,
      "loss": 3.2373,
      "step": 216176
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.625021457672119,
      "learning_rate": 5.626239211045691e-06,
      "loss": 2.9225,
      "step": 216177
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.26200532913208,
      "learning_rate": 5.62545074030526e-06,
      "loss": 2.9256,
      "step": 216178
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.102154731750488,
      "learning_rate": 5.624662324294582e-06,
      "loss": 3.1208,
      "step": 216179
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.8131356239318848,
      "learning_rate": 5.6238739630138585e-06,
      "loss": 3.0883,
      "step": 216180
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.786485433578491,
      "learning_rate": 5.623085656463222e-06,
      "loss": 3.0233,
      "step": 216181
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.891171932220459,
      "learning_rate": 5.622297404642806e-06,
      "loss": 2.8395,
      "step": 216182
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1188371181488037,
      "learning_rate": 5.6215092075527436e-06,
      "loss": 2.8281,
      "step": 216183
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.5490002632141113,
      "learning_rate": 5.620721065193234e-06,
      "loss": 2.9352,
      "step": 216184
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.990783452987671,
      "learning_rate": 5.619932977564346e-06,
      "loss": 2.9787,
      "step": 216185
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.647834539413452,
      "learning_rate": 5.61914494466631e-06,
      "loss": 3.0358,
      "step": 216186
    },
    {
      "epoch": 2.81,
      "grad_norm": 4.62269401550293,
      "learning_rate": 5.618356966499193e-06,
      "loss": 3.0079,
      "step": 216187
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.133060932159424,
      "learning_rate": 5.617569043063197e-06,
      "loss": 2.9467,
      "step": 216188
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.1446070671081543,
      "learning_rate": 5.616781174358453e-06,
      "loss": 3.0144,
      "step": 216189
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.5479187965393066,
      "learning_rate": 5.6159933603851295e-06,
      "loss": 2.9352,
      "step": 216190
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.242349624633789,
      "learning_rate": 5.615205601143291e-06,
      "loss": 3.1881,
      "step": 216191
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.7173757553100586,
      "learning_rate": 5.614417896633205e-06,
      "loss": 2.8078,
      "step": 216192
    },
    {
      "epoch": 2.81,
      "grad_norm": 3.0936269760131836,
      "learning_rate": 5.613630246854906e-06,
      "loss": 2.8448,
      "step": 216193
    },
    {
      "epoch": 2.81,
      "grad_norm": 2.8232035636901855,
      "learning_rate": 5.612842651808625e-06,
      "loss": 2.7484,
      "step": 216194
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.7487025260925293,
      "learning_rate": 5.612055111494429e-06,
      "loss": 2.8437,
      "step": 216195
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.274951219558716,
      "learning_rate": 5.611267625912585e-06,
      "loss": 3.0633,
      "step": 216196
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7327404022216797,
      "learning_rate": 5.610480195063094e-06,
      "loss": 2.8622,
      "step": 216197
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8588149547576904,
      "learning_rate": 5.609692818946221e-06,
      "loss": 3.0517,
      "step": 216198
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.379481315612793,
      "learning_rate": 5.608905497562033e-06,
      "loss": 2.8452,
      "step": 216199
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3176512718200684,
      "learning_rate": 5.60811823091073e-06,
      "loss": 3.0299,
      "step": 216200
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7127463817596436,
      "learning_rate": 5.607331018992378e-06,
      "loss": 2.9505,
      "step": 216201
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.731084108352661,
      "learning_rate": 5.606543861807278e-06,
      "loss": 2.9271,
      "step": 216202
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2076621055603027,
      "learning_rate": 5.605756759355396e-06,
      "loss": 2.843,
      "step": 216203
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.5423166751861572,
      "learning_rate": 5.604969711636997e-06,
      "loss": 2.9631,
      "step": 216204
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2500429153442383,
      "learning_rate": 5.60418271865215e-06,
      "loss": 2.8325,
      "step": 216205
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.024470567703247,
      "learning_rate": 5.603395780401088e-06,
      "loss": 2.6217,
      "step": 216206
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.650874137878418,
      "learning_rate": 5.602608896883876e-06,
      "loss": 2.6928,
      "step": 216207
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.7398436069488525,
      "learning_rate": 5.601822068100747e-06,
      "loss": 2.9287,
      "step": 216208
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.092848777770996,
      "learning_rate": 5.60103529405177e-06,
      "loss": 2.9743,
      "step": 216209
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8793160915374756,
      "learning_rate": 5.60024857473711e-06,
      "loss": 2.8545,
      "step": 216210
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8523383140563965,
      "learning_rate": 5.5994619101569e-06,
      "loss": 3.0438,
      "step": 216211
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.5046443939208984,
      "learning_rate": 5.59867530031134e-06,
      "loss": 3.0822,
      "step": 216212
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.120997905731201,
      "learning_rate": 5.5978887452005295e-06,
      "loss": 2.9489,
      "step": 216213
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.49802565574646,
      "learning_rate": 5.597102244824669e-06,
      "loss": 2.736,
      "step": 216214
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8300650119781494,
      "learning_rate": 5.596315799183826e-06,
      "loss": 3.0412,
      "step": 216215
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8859245777130127,
      "learning_rate": 5.595529408278199e-06,
      "loss": 2.8509,
      "step": 216216
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.486968517303467,
      "learning_rate": 5.594743072107888e-06,
      "loss": 2.8938,
      "step": 216217
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1609551906585693,
      "learning_rate": 5.593956790673093e-06,
      "loss": 3.0641,
      "step": 216218
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7910923957824707,
      "learning_rate": 5.593170563973947e-06,
      "loss": 2.9496,
      "step": 216219
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9619038105010986,
      "learning_rate": 5.592384392010585e-06,
      "loss": 2.7174,
      "step": 216220
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.724855899810791,
      "learning_rate": 5.591598274783171e-06,
      "loss": 2.9689,
      "step": 216221
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2148003578186035,
      "learning_rate": 5.590812212291807e-06,
      "loss": 2.7799,
      "step": 216222
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0364973545074463,
      "learning_rate": 5.590026204536658e-06,
      "loss": 3.1504,
      "step": 216223
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.6256895065307617,
      "learning_rate": 5.589240251517924e-06,
      "loss": 3.0126,
      "step": 216224
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8384273052215576,
      "learning_rate": 5.5884543532356715e-06,
      "loss": 2.8522,
      "step": 216225
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.8370015621185303,
      "learning_rate": 5.587668509690102e-06,
      "loss": 2.9929,
      "step": 216226
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6808505058288574,
      "learning_rate": 5.586882720881347e-06,
      "loss": 3.0599,
      "step": 216227
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2243599891662598,
      "learning_rate": 5.586096986809507e-06,
      "loss": 3.1104,
      "step": 216228
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.5068421363830566,
      "learning_rate": 5.5853113074748155e-06,
      "loss": 3.0006,
      "step": 216229
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.666964530944824,
      "learning_rate": 5.584525682877372e-06,
      "loss": 2.8277,
      "step": 216230
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3883440494537354,
      "learning_rate": 5.58374011301731e-06,
      "loss": 2.8129,
      "step": 216231
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0290279388427734,
      "learning_rate": 5.5829545978947955e-06,
      "loss": 2.7623,
      "step": 216232
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9827141761779785,
      "learning_rate": 5.582169137509962e-06,
      "loss": 3.0661,
      "step": 216233
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.119168758392334,
      "learning_rate": 5.581383731862943e-06,
      "loss": 2.8863,
      "step": 216234
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.437281847000122,
      "learning_rate": 5.580598380953905e-06,
      "loss": 2.8106,
      "step": 216235
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8033688068389893,
      "learning_rate": 5.579813084783013e-06,
      "loss": 2.9656,
      "step": 216236
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.389286994934082,
      "learning_rate": 5.57902784335037e-06,
      "loss": 2.9779,
      "step": 216237
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.4648656845092773,
      "learning_rate": 5.5782426566561735e-06,
      "loss": 3.0277,
      "step": 216238
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.131197690963745,
      "learning_rate": 5.577457524700523e-06,
      "loss": 2.7351,
      "step": 216239
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.570544481277466,
      "learning_rate": 5.576672447483554e-06,
      "loss": 2.8883,
      "step": 216240
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.860004186630249,
      "learning_rate": 5.575887425005465e-06,
      "loss": 2.8822,
      "step": 216241
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.445862293243408,
      "learning_rate": 5.575102457266356e-06,
      "loss": 2.6963,
      "step": 216242
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.5381855964660645,
      "learning_rate": 5.574317544266427e-06,
      "loss": 2.9563,
      "step": 216243
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.9313437938690186,
      "learning_rate": 5.573532686005777e-06,
      "loss": 3.1498,
      "step": 216244
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.8329217433929443,
      "learning_rate": 5.572747882484574e-06,
      "loss": 2.939,
      "step": 216245
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.97646164894104,
      "learning_rate": 5.57196313370295e-06,
      "loss": 2.6903,
      "step": 216246
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.111905813217163,
      "learning_rate": 5.571178439661039e-06,
      "loss": 3.1044,
      "step": 216247
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.8465921878814697,
      "learning_rate": 5.570393800359008e-06,
      "loss": 3.0641,
      "step": 216248
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.8330583572387695,
      "learning_rate": 5.569609215797022e-06,
      "loss": 2.8445,
      "step": 216249
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8345022201538086,
      "learning_rate": 5.568824685975182e-06,
      "loss": 2.7103,
      "step": 216250
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.648658514022827,
      "learning_rate": 5.568040210893654e-06,
      "loss": 2.9016,
      "step": 216251
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6676034927368164,
      "learning_rate": 5.567255790552605e-06,
      "loss": 2.9608,
      "step": 216252
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.7036571502685547,
      "learning_rate": 5.566471424952135e-06,
      "loss": 2.8509,
      "step": 216253
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.780092239379883,
      "learning_rate": 5.56568711409241e-06,
      "loss": 2.8297,
      "step": 216254
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.452221155166626,
      "learning_rate": 5.564902857973597e-06,
      "loss": 3.0211,
      "step": 216255
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.937546730041504,
      "learning_rate": 5.564118656595862e-06,
      "loss": 2.9084,
      "step": 216256
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.785297155380249,
      "learning_rate": 5.563334509959272e-06,
      "loss": 3.1332,
      "step": 216257
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9710638523101807,
      "learning_rate": 5.562550418064027e-06,
      "loss": 2.7216,
      "step": 216258
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8054568767547607,
      "learning_rate": 5.56176638091026e-06,
      "loss": 2.8658,
      "step": 216259
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9726924896240234,
      "learning_rate": 5.560982398498104e-06,
      "loss": 2.9647,
      "step": 216260
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1660594940185547,
      "learning_rate": 5.560198470827759e-06,
      "loss": 2.7286,
      "step": 216261
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.840519666671753,
      "learning_rate": 5.5594145978992584e-06,
      "loss": 2.8604,
      "step": 216262
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.0152907371521,
      "learning_rate": 5.558630779712902e-06,
      "loss": 2.7036,
      "step": 216263
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.067422389984131,
      "learning_rate": 5.557847016268724e-06,
      "loss": 2.6731,
      "step": 216264
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.736379861831665,
      "learning_rate": 5.557063307566889e-06,
      "loss": 3.1948,
      "step": 216265
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.486467123031616,
      "learning_rate": 5.556279653607565e-06,
      "loss": 2.9007,
      "step": 216266
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.7723312377929688,
      "learning_rate": 5.555496054390884e-06,
      "loss": 2.7027,
      "step": 216267
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.6638479232788086,
      "learning_rate": 5.554712509916981e-06,
      "loss": 2.8726,
      "step": 216268
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.862797975540161,
      "learning_rate": 5.553929020186054e-06,
      "loss": 3.0583,
      "step": 216269
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2695865631103516,
      "learning_rate": 5.553145585198171e-06,
      "loss": 2.9933,
      "step": 216270
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.076448440551758,
      "learning_rate": 5.552362204953531e-06,
      "loss": 2.8355,
      "step": 216271
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.943539619445801,
      "learning_rate": 5.551578879452268e-06,
      "loss": 2.6551,
      "step": 216272
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0114328861236572,
      "learning_rate": 5.550795608694514e-06,
      "loss": 2.9645,
      "step": 216273
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2854535579681396,
      "learning_rate": 5.550012392680436e-06,
      "loss": 2.7781,
      "step": 216274
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0517637729644775,
      "learning_rate": 5.549229231410168e-06,
      "loss": 2.8413,
      "step": 216275
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7591969966888428,
      "learning_rate": 5.548446124883843e-06,
      "loss": 2.9772,
      "step": 216276
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.752976179122925,
      "learning_rate": 5.5476630731016605e-06,
      "loss": 3.088,
      "step": 216277
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9165878295898438,
      "learning_rate": 5.546880076063653e-06,
      "loss": 2.6484,
      "step": 216278
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.850447177886963,
      "learning_rate": 5.546097133770089e-06,
      "loss": 3.0041,
      "step": 216279
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8806917667388916,
      "learning_rate": 5.545314246221067e-06,
      "loss": 2.8389,
      "step": 216280
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3127665519714355,
      "learning_rate": 5.544531413416719e-06,
      "loss": 2.8545,
      "step": 216281
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9738616943359375,
      "learning_rate": 5.5437486353572144e-06,
      "loss": 2.837,
      "step": 216282
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6825599670410156,
      "learning_rate": 5.542965912042652e-06,
      "loss": 2.937,
      "step": 216283
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.6731491088867188,
      "learning_rate": 5.542183243473231e-06,
      "loss": 2.8792,
      "step": 216284
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.5382590293884277,
      "learning_rate": 5.541400629649084e-06,
      "loss": 3.008,
      "step": 216285
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9919753074645996,
      "learning_rate": 5.540618070570313e-06,
      "loss": 2.6558,
      "step": 216286
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.178452253341675,
      "learning_rate": 5.539835566237149e-06,
      "loss": 2.8558,
      "step": 216287
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1997251510620117,
      "learning_rate": 5.539053116649661e-06,
      "loss": 2.8375,
      "step": 216288
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.413832426071167,
      "learning_rate": 5.5382707218080465e-06,
      "loss": 3.2337,
      "step": 216289
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.695028781890869,
      "learning_rate": 5.537488381712374e-06,
      "loss": 2.8881,
      "step": 216290
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6221206188201904,
      "learning_rate": 5.536706096362875e-06,
      "loss": 2.9079,
      "step": 216291
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.692012310028076,
      "learning_rate": 5.535923865759651e-06,
      "loss": 2.8548,
      "step": 216292
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8567912578582764,
      "learning_rate": 5.5351416899029e-06,
      "loss": 3.0732,
      "step": 216293
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.61863374710083,
      "learning_rate": 5.534359568792657e-06,
      "loss": 2.9189,
      "step": 216294
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.940134048461914,
      "learning_rate": 5.533577502429187e-06,
      "loss": 2.7362,
      "step": 216295
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6089131832122803,
      "learning_rate": 5.532795490812525e-06,
      "loss": 3.2651,
      "step": 216296
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.178501844406128,
      "learning_rate": 5.532013533942936e-06,
      "loss": 2.8719,
      "step": 216297
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.390615701675415,
      "learning_rate": 5.5312316318204545e-06,
      "loss": 2.5706,
      "step": 216298
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.924281120300293,
      "learning_rate": 5.530449784445345e-06,
      "loss": 2.9121,
      "step": 216299
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.5495684146881104,
      "learning_rate": 5.52966799181761e-06,
      "loss": 3.0458,
      "step": 216300
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9141087532043457,
      "learning_rate": 5.528886253937514e-06,
      "loss": 2.943,
      "step": 216301
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.289758682250977,
      "learning_rate": 5.528104570805125e-06,
      "loss": 2.7729,
      "step": 216302
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.6571714878082275,
      "learning_rate": 5.5273229424206425e-06,
      "loss": 2.8762,
      "step": 216303
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.66877818107605,
      "learning_rate": 5.526541368784165e-06,
      "loss": 3.0412,
      "step": 216304
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.7410049438476562,
      "learning_rate": 5.525759849895894e-06,
      "loss": 2.9263,
      "step": 216305
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9479281902313232,
      "learning_rate": 5.524978385755929e-06,
      "loss": 3.1474,
      "step": 216306
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0922977924346924,
      "learning_rate": 5.524196976364436e-06,
      "loss": 2.7428,
      "step": 216307
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.682197332382202,
      "learning_rate": 5.523415621721549e-06,
      "loss": 2.7478,
      "step": 216308
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.731271743774414,
      "learning_rate": 5.522634321827402e-06,
      "loss": 3.0835,
      "step": 216309
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1255440711975098,
      "learning_rate": 5.521853076682159e-06,
      "loss": 3.1768,
      "step": 216310
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.135277271270752,
      "learning_rate": 5.521071886285988e-06,
      "loss": 2.8594,
      "step": 216311
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.4274580478668213,
      "learning_rate": 5.520290750638989e-06,
      "loss": 2.8553,
      "step": 216312
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.960306406021118,
      "learning_rate": 5.519509669741362e-06,
      "loss": 2.9266,
      "step": 216313
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.439409017562866,
      "learning_rate": 5.518728643593173e-06,
      "loss": 2.8186,
      "step": 216314
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3185415267944336,
      "learning_rate": 5.517947672194622e-06,
      "loss": 2.8049,
      "step": 216315
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3417282104492188,
      "learning_rate": 5.5171667555458095e-06,
      "loss": 3.1684,
      "step": 216316
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.276263475418091,
      "learning_rate": 5.516385893646969e-06,
      "loss": 2.99,
      "step": 216317
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.07928466796875,
      "learning_rate": 5.515605086498198e-06,
      "loss": 3.0469,
      "step": 216318
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9797582626342773,
      "learning_rate": 5.514824334099566e-06,
      "loss": 2.8083,
      "step": 216319
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.836559772491455,
      "learning_rate": 5.514043636451338e-06,
      "loss": 3.0423,
      "step": 216320
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8948185443878174,
      "learning_rate": 5.513262993553613e-06,
      "loss": 2.8313,
      "step": 216321
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3270249366760254,
      "learning_rate": 5.512482405406493e-06,
      "loss": 2.942,
      "step": 216322
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3415069580078125,
      "learning_rate": 5.51170187201021e-06,
      "loss": 3.1583,
      "step": 216323
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.946758508682251,
      "learning_rate": 5.510921393364831e-06,
      "loss": 3.0263,
      "step": 216324
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.795614719390869,
      "learning_rate": 5.510140969470522e-06,
      "loss": 3.0917,
      "step": 216325
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.531607151031494,
      "learning_rate": 5.509360600327484e-06,
      "loss": 2.7939,
      "step": 216326
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.765639305114746,
      "learning_rate": 5.508580285935748e-06,
      "loss": 2.6393,
      "step": 216327
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6991021633148193,
      "learning_rate": 5.507800026295583e-06,
      "loss": 3.0438,
      "step": 216328
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7006874084472656,
      "learning_rate": 5.507019821407055e-06,
      "loss": 3.0772,
      "step": 216329
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0667877197265625,
      "learning_rate": 5.506239671270329e-06,
      "loss": 3.0415,
      "step": 216330
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6546573638916016,
      "learning_rate": 5.50545957588554e-06,
      "loss": 3.0018,
      "step": 216331
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1664774417877197,
      "learning_rate": 5.504679535252854e-06,
      "loss": 3.1227,
      "step": 216332
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1543397903442383,
      "learning_rate": 5.503899549372404e-06,
      "loss": 2.7987,
      "step": 216333
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.642183780670166,
      "learning_rate": 5.503119618244356e-06,
      "loss": 2.8489,
      "step": 216334
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.296931028366089,
      "learning_rate": 5.502339741868844e-06,
      "loss": 2.7987,
      "step": 216335
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8785617351531982,
      "learning_rate": 5.5015599202460015e-06,
      "loss": 2.7346,
      "step": 216336
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9199154376983643,
      "learning_rate": 5.500780153375928e-06,
      "loss": 3.2576,
      "step": 216337
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.4258615970611572,
      "learning_rate": 5.50000044125889e-06,
      "loss": 2.7368,
      "step": 216338
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2252588272094727,
      "learning_rate": 5.499220783894886e-06,
      "loss": 2.9843,
      "step": 216339
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.879467725753784,
      "learning_rate": 5.498441181284185e-06,
      "loss": 3.0695,
      "step": 216340
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9915425777435303,
      "learning_rate": 5.497661633426853e-06,
      "loss": 3.1454,
      "step": 216341
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8773083686828613,
      "learning_rate": 5.496882140323123e-06,
      "loss": 2.9046,
      "step": 216342
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.658029317855835,
      "learning_rate": 5.496102701973027e-06,
      "loss": 2.8239,
      "step": 216343
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.939500570297241,
      "learning_rate": 5.495323318376799e-06,
      "loss": 2.7977,
      "step": 216344
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.749441146850586,
      "learning_rate": 5.494543989534506e-06,
      "loss": 2.8853,
      "step": 216345
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.061659097671509,
      "learning_rate": 5.49376471544638e-06,
      "loss": 2.992,
      "step": 216346
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.093799114227295,
      "learning_rate": 5.49298549611249e-06,
      "loss": 3.1697,
      "step": 216347
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1542186737060547,
      "learning_rate": 5.4922063315330665e-06,
      "loss": 3.1767,
      "step": 216348
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.249599933624268,
      "learning_rate": 5.491427221708144e-06,
      "loss": 2.8828,
      "step": 216349
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9336516857147217,
      "learning_rate": 5.4906481666379564e-06,
      "loss": 2.8103,
      "step": 216350
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.815201997756958,
      "learning_rate": 5.4898691663226026e-06,
      "loss": 2.7426,
      "step": 216351
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.6385273933410645,
      "learning_rate": 5.4890902207622485e-06,
      "loss": 2.8906,
      "step": 216352
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0738167762756348,
      "learning_rate": 5.488311329956996e-06,
      "loss": 2.8495,
      "step": 216353
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3467931747436523,
      "learning_rate": 5.48753249390711e-06,
      "loss": 3.1459,
      "step": 216354
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.915503740310669,
      "learning_rate": 5.486753712612557e-06,
      "loss": 3.0141,
      "step": 216355
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.5744736194610596,
      "learning_rate": 5.485974986073638e-06,
      "loss": 3.0778,
      "step": 216356
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9852561950683594,
      "learning_rate": 5.485196314290385e-06,
      "loss": 2.999,
      "step": 216357
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.281878709793091,
      "learning_rate": 5.484417697263033e-06,
      "loss": 2.8741,
      "step": 216358
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.709071636199951,
      "learning_rate": 5.48363913499168e-06,
      "loss": 2.956,
      "step": 216359
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.694356918334961,
      "learning_rate": 5.482860627476493e-06,
      "loss": 2.8151,
      "step": 216360
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.637300968170166,
      "learning_rate": 5.482082174717573e-06,
      "loss": 2.9464,
      "step": 216361
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.490901470184326,
      "learning_rate": 5.481303776715118e-06,
      "loss": 2.9086,
      "step": 216362
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.726457357406616,
      "learning_rate": 5.48052543346923e-06,
      "loss": 2.9966,
      "step": 216363
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.8066861629486084,
      "learning_rate": 5.479747144980073e-06,
      "loss": 2.8184,
      "step": 216364
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.841984510421753,
      "learning_rate": 5.478968911247783e-06,
      "loss": 3.0364,
      "step": 216365
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.420267105102539,
      "learning_rate": 5.4781907322725584e-06,
      "loss": 2.8449,
      "step": 216366
    },
    {
      "epoch": 2.82,
      "grad_norm": 5.045916557312012,
      "learning_rate": 5.477412608054432e-06,
      "loss": 2.9148,
      "step": 216367
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3874573707580566,
      "learning_rate": 5.476634538593639e-06,
      "loss": 2.7417,
      "step": 216368
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.112595081329346,
      "learning_rate": 5.47585652389031e-06,
      "loss": 2.8159,
      "step": 216369
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9572513103485107,
      "learning_rate": 5.4750785639445795e-06,
      "loss": 2.8747,
      "step": 216370
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.07096004486084,
      "learning_rate": 5.474300658756548e-06,
      "loss": 2.9106,
      "step": 216371
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.165902853012085,
      "learning_rate": 5.47352280832648e-06,
      "loss": 2.8481,
      "step": 216372
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.5922768115997314,
      "learning_rate": 5.472745012654378e-06,
      "loss": 2.9345,
      "step": 216373
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.6641979217529297,
      "learning_rate": 5.471967271740507e-06,
      "loss": 3.1048,
      "step": 216374
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.642390489578247,
      "learning_rate": 5.471189585584901e-06,
      "loss": 3.0608,
      "step": 216375
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.758924722671509,
      "learning_rate": 5.470411954187792e-06,
      "loss": 3.039,
      "step": 216376
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8817641735076904,
      "learning_rate": 5.469634377549281e-06,
      "loss": 2.8132,
      "step": 216377
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.946155071258545,
      "learning_rate": 5.468856855669568e-06,
      "loss": 2.7836,
      "step": 216378
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8883705139160156,
      "learning_rate": 5.4680793885487185e-06,
      "loss": 2.8482,
      "step": 216379
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1198883056640625,
      "learning_rate": 5.4673019761869e-06,
      "loss": 2.9946,
      "step": 216380
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.5978078842163086,
      "learning_rate": 5.4665246185842785e-06,
      "loss": 2.8678,
      "step": 216381
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.5460143089294434,
      "learning_rate": 5.465747315740987e-06,
      "loss": 3.0348,
      "step": 216382
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.93544864654541,
      "learning_rate": 5.46497006765716e-06,
      "loss": 2.9087,
      "step": 216383
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9454431533813477,
      "learning_rate": 5.464192874333029e-06,
      "loss": 2.9979,
      "step": 216384
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.675966739654541,
      "learning_rate": 5.463415735768595e-06,
      "loss": 2.7564,
      "step": 216385
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.169238567352295,
      "learning_rate": 5.462638651964091e-06,
      "loss": 2.8985,
      "step": 216386
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.090035915374756,
      "learning_rate": 5.461861622919617e-06,
      "loss": 2.8749,
      "step": 216387
    },
    {
      "epoch": 2.82,
      "grad_norm": 6.386701583862305,
      "learning_rate": 5.4610846486353724e-06,
      "loss": 3.1557,
      "step": 216388
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.987077474594116,
      "learning_rate": 5.460307729111457e-06,
      "loss": 2.6933,
      "step": 216389
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8122129440307617,
      "learning_rate": 5.459530864348038e-06,
      "loss": 3.1695,
      "step": 216390
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1951167583465576,
      "learning_rate": 5.458754054345249e-06,
      "loss": 2.9616,
      "step": 216391
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.729523181915283,
      "learning_rate": 5.457977299103256e-06,
      "loss": 2.8796,
      "step": 216392
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8270950317382812,
      "learning_rate": 5.457200598622158e-06,
      "loss": 3.1066,
      "step": 216393
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.4411492347717285,
      "learning_rate": 5.456423952902156e-06,
      "loss": 2.7499,
      "step": 216394
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2336063385009766,
      "learning_rate": 5.455647361943316e-06,
      "loss": 2.9782,
      "step": 216395
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.107495307922363,
      "learning_rate": 5.454870825745871e-06,
      "loss": 2.9358,
      "step": 216396
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.041996479034424,
      "learning_rate": 5.454094344309922e-06,
      "loss": 2.8097,
      "step": 216397
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.564452648162842,
      "learning_rate": 5.453317917635636e-06,
      "loss": 2.9697,
      "step": 216398
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6069517135620117,
      "learning_rate": 5.452541545723077e-06,
      "loss": 2.895,
      "step": 216399
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.697335720062256,
      "learning_rate": 5.4517652285725135e-06,
      "loss": 2.999,
      "step": 216400
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1585023403167725,
      "learning_rate": 5.450988966183978e-06,
      "loss": 3.0192,
      "step": 216401
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3592870235443115,
      "learning_rate": 5.45021275855767e-06,
      "loss": 2.9595,
      "step": 216402
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.4469540119171143,
      "learning_rate": 5.449436605693758e-06,
      "loss": 2.9286,
      "step": 216403
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.278163194656372,
      "learning_rate": 5.4486605075923395e-06,
      "loss": 2.9267,
      "step": 216404
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1689436435699463,
      "learning_rate": 5.4478844642535825e-06,
      "loss": 2.9534,
      "step": 216405
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.770446538925171,
      "learning_rate": 5.44710847567762e-06,
      "loss": 2.808,
      "step": 216406
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1555533409118652,
      "learning_rate": 5.446332541864584e-06,
      "loss": 2.8646,
      "step": 216407
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9580817222595215,
      "learning_rate": 5.445556662814643e-06,
      "loss": 3.1182,
      "step": 216408
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9413437843322754,
      "learning_rate": 5.4447808385279625e-06,
      "loss": 3.1245,
      "step": 216409
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6263864040374756,
      "learning_rate": 5.444005069004609e-06,
      "loss": 2.9927,
      "step": 216410
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.467259407043457,
      "learning_rate": 5.443229354244816e-06,
      "loss": 2.7751,
      "step": 216411
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6054329872131348,
      "learning_rate": 5.44245369424865e-06,
      "loss": 3.0501,
      "step": 216412
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9895007610321045,
      "learning_rate": 5.44167808901631e-06,
      "loss": 2.7736,
      "step": 216413
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7248191833496094,
      "learning_rate": 5.440902538547931e-06,
      "loss": 3.0137,
      "step": 216414
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6704885959625244,
      "learning_rate": 5.440127042843645e-06,
      "loss": 3.0414,
      "step": 216415
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.846282482147217,
      "learning_rate": 5.439351601903585e-06,
      "loss": 2.8613,
      "step": 216416
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.276205062866211,
      "learning_rate": 5.4385762157279525e-06,
      "loss": 3.0933,
      "step": 216417
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.664470672607422,
      "learning_rate": 5.4378008843167785e-06,
      "loss": 2.8726,
      "step": 216418
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.051969289779663,
      "learning_rate": 5.437025607670331e-06,
      "loss": 2.8768,
      "step": 216419
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7090396881103516,
      "learning_rate": 5.4362503857887095e-06,
      "loss": 2.8636,
      "step": 216420
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.128753662109375,
      "learning_rate": 5.435475218672014e-06,
      "loss": 2.8181,
      "step": 216421
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6156961917877197,
      "learning_rate": 5.434700106320444e-06,
      "loss": 2.9399,
      "step": 216422
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.224200487136841,
      "learning_rate": 5.4339250487341006e-06,
      "loss": 2.9697,
      "step": 216423
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.121746063232422,
      "learning_rate": 5.433150045913148e-06,
      "loss": 2.808,
      "step": 216424
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7305853366851807,
      "learning_rate": 5.432375097857788e-06,
      "loss": 3.0238,
      "step": 216425
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.5954484939575195,
      "learning_rate": 5.431600204568054e-06,
      "loss": 3.1043,
      "step": 216426
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2123546600341797,
      "learning_rate": 5.430825366044178e-06,
      "loss": 2.9321,
      "step": 216427
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.033446788787842,
      "learning_rate": 5.43005058228626e-06,
      "loss": 3.0577,
      "step": 216428
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0211849212646484,
      "learning_rate": 5.429275853294468e-06,
      "loss": 2.9514,
      "step": 216429
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.866823196411133,
      "learning_rate": 5.4285011790689e-06,
      "loss": 3.0028,
      "step": 216430
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.073420524597168,
      "learning_rate": 5.427726559609757e-06,
      "loss": 3.264,
      "step": 216431
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3976657390594482,
      "learning_rate": 5.426951994917139e-06,
      "loss": 3.0626,
      "step": 216432
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.233717918395996,
      "learning_rate": 5.426177484991278e-06,
      "loss": 2.9233,
      "step": 216433
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.811910390853882,
      "learning_rate": 5.425403029832176e-06,
      "loss": 2.9585,
      "step": 216434
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.803571939468384,
      "learning_rate": 5.424628629440097e-06,
      "loss": 2.8613,
      "step": 216435
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.895282745361328,
      "learning_rate": 5.42385428381511e-06,
      "loss": 2.8638,
      "step": 216436
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.006760835647583,
      "learning_rate": 5.423079992957413e-06,
      "loss": 2.9918,
      "step": 216437
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9347903728485107,
      "learning_rate": 5.422305756867107e-06,
      "loss": 2.8391,
      "step": 216438
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8561737537384033,
      "learning_rate": 5.421531575544391e-06,
      "loss": 2.9105,
      "step": 216439
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.307326078414917,
      "learning_rate": 5.4207574489893324e-06,
      "loss": 2.938,
      "step": 216440
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.082915306091309,
      "learning_rate": 5.419983377202131e-06,
      "loss": 3.1211,
      "step": 216441
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.014162540435791,
      "learning_rate": 5.419209360182886e-06,
      "loss": 2.8338,
      "step": 216442
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8682901859283447,
      "learning_rate": 5.418435397931797e-06,
      "loss": 2.8559,
      "step": 216443
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.4572551250457764,
      "learning_rate": 5.417661490448966e-06,
      "loss": 2.7185,
      "step": 216444
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.4267566204071045,
      "learning_rate": 5.416887637734624e-06,
      "loss": 2.9856,
      "step": 216445
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.844395399093628,
      "learning_rate": 5.416113839788772e-06,
      "loss": 2.9077,
      "step": 216446
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.026766300201416,
      "learning_rate": 5.415340096611642e-06,
      "loss": 2.8342,
      "step": 216447
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.213531970977783,
      "learning_rate": 5.414566408203336e-06,
      "loss": 2.8935,
      "step": 216448
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0636157989501953,
      "learning_rate": 5.413792774564052e-06,
      "loss": 3.002,
      "step": 216449
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.645188570022583,
      "learning_rate": 5.413019195693891e-06,
      "loss": 2.6476,
      "step": 216450
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0674800872802734,
      "learning_rate": 5.412245671593052e-06,
      "loss": 2.8956,
      "step": 216451
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.56300687789917,
      "learning_rate": 5.411472202261602e-06,
      "loss": 2.8608,
      "step": 216452
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7405455112457275,
      "learning_rate": 5.410698787699707e-06,
      "loss": 2.8158,
      "step": 216453
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.5865936279296875,
      "learning_rate": 5.409925427907535e-06,
      "loss": 2.8127,
      "step": 216454
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.636211633682251,
      "learning_rate": 5.4091521228852185e-06,
      "loss": 2.8299,
      "step": 216455
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.263002872467041,
      "learning_rate": 5.4083788726328905e-06,
      "loss": 2.982,
      "step": 216456
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1629672050476074,
      "learning_rate": 5.4076056771507505e-06,
      "loss": 2.9608,
      "step": 216457
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.162012815475464,
      "learning_rate": 5.4068325364388655e-06,
      "loss": 2.8961,
      "step": 216458
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.754993438720703,
      "learning_rate": 5.406059450497402e-06,
      "loss": 2.7912,
      "step": 216459
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7895777225494385,
      "learning_rate": 5.405286419326527e-06,
      "loss": 2.8329,
      "step": 216460
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.925553321838379,
      "learning_rate": 5.404513442926373e-06,
      "loss": 2.9109,
      "step": 216461
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.558683395385742,
      "learning_rate": 5.403740521297073e-06,
      "loss": 2.8967,
      "step": 216462
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.702486276626587,
      "learning_rate": 5.402967654438761e-06,
      "loss": 2.8932,
      "step": 216463
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.081509113311768,
      "learning_rate": 5.402194842351637e-06,
      "loss": 2.9286,
      "step": 216464
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.108642339706421,
      "learning_rate": 5.401422085035801e-06,
      "loss": 2.9368,
      "step": 216465
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.810178756713867,
      "learning_rate": 5.400649382491384e-06,
      "loss": 3.1212,
      "step": 216466
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6223080158233643,
      "learning_rate": 5.399876734718555e-06,
      "loss": 3.2197,
      "step": 216467
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.992128610610962,
      "learning_rate": 5.399104141717414e-06,
      "loss": 2.9733,
      "step": 216468
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2715916633605957,
      "learning_rate": 5.3983316034881925e-06,
      "loss": 2.6636,
      "step": 216469
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8245291709899902,
      "learning_rate": 5.397559120030959e-06,
      "loss": 2.8771,
      "step": 216470
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.4052228927612305,
      "learning_rate": 5.396786691345911e-06,
      "loss": 2.8816,
      "step": 216471
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8405516147613525,
      "learning_rate": 5.396014317433117e-06,
      "loss": 2.7013,
      "step": 216472
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.7044260501861572,
      "learning_rate": 5.395241998292777e-06,
      "loss": 2.7748,
      "step": 216473
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.108299732208252,
      "learning_rate": 5.394469733925022e-06,
      "loss": 2.9814,
      "step": 216474
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1726338863372803,
      "learning_rate": 5.3936975243299876e-06,
      "loss": 3.1647,
      "step": 216475
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9724807739257812,
      "learning_rate": 5.392925369507872e-06,
      "loss": 2.8556,
      "step": 216476
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2400829792022705,
      "learning_rate": 5.392153269458743e-06,
      "loss": 2.9367,
      "step": 216477
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.059631824493408,
      "learning_rate": 5.391381224182767e-06,
      "loss": 2.7289,
      "step": 216478
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.032064199447632,
      "learning_rate": 5.3906092336801095e-06,
      "loss": 2.8405,
      "step": 216479
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.985156297683716,
      "learning_rate": 5.389837297950872e-06,
      "loss": 2.8352,
      "step": 216480
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.311541795730591,
      "learning_rate": 5.389065416995219e-06,
      "loss": 3.0094,
      "step": 216481
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3515288829803467,
      "learning_rate": 5.388293590813353e-06,
      "loss": 2.9015,
      "step": 216482
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.451927423477173,
      "learning_rate": 5.3875218194053384e-06,
      "loss": 2.8018,
      "step": 216483
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.048664569854736,
      "learning_rate": 5.3867501027713085e-06,
      "loss": 2.9899,
      "step": 216484
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.247762441635132,
      "learning_rate": 5.385978440911498e-06,
      "loss": 2.9344,
      "step": 216485
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8245370388031006,
      "learning_rate": 5.385206833825939e-06,
      "loss": 2.9901,
      "step": 216486
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1278398036956787,
      "learning_rate": 5.384435281514899e-06,
      "loss": 3.1582,
      "step": 216487
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.6073286533355713,
      "learning_rate": 5.383663783978409e-06,
      "loss": 2.8601,
      "step": 216488
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.587869167327881,
      "learning_rate": 5.382892341216672e-06,
      "loss": 3.1259,
      "step": 216489
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7577409744262695,
      "learning_rate": 5.3821209532298184e-06,
      "loss": 3.0784,
      "step": 216490
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7758660316467285,
      "learning_rate": 5.381349620017983e-06,
      "loss": 2.8015,
      "step": 216491
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.543940544128418,
      "learning_rate": 5.380578341581298e-06,
      "loss": 3.0058,
      "step": 216492
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1409547328948975,
      "learning_rate": 5.379807117919965e-06,
      "loss": 2.9541,
      "step": 216493
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2794058322906494,
      "learning_rate": 5.379035949034049e-06,
      "loss": 2.8192,
      "step": 216494
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.029825210571289,
      "learning_rate": 5.378264834923751e-06,
      "loss": 2.728,
      "step": 216495
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.772470235824585,
      "learning_rate": 5.3774937755892035e-06,
      "loss": 2.7625,
      "step": 216496
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1359124183654785,
      "learning_rate": 5.376722771030539e-06,
      "loss": 2.9488,
      "step": 216497
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0549156665802,
      "learning_rate": 5.375951821247859e-06,
      "loss": 2.9364,
      "step": 216498
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2092814445495605,
      "learning_rate": 5.375180926241396e-06,
      "loss": 2.8091,
      "step": 216499
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.820199728012085,
      "learning_rate": 5.3744100860112496e-06,
      "loss": 3.0051,
      "step": 216500
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.253993511199951,
      "learning_rate": 5.373639300557553e-06,
      "loss": 2.8455,
      "step": 216501
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.103675127029419,
      "learning_rate": 5.3728685698804396e-06,
      "loss": 3.0066,
      "step": 216502
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3211209774017334,
      "learning_rate": 5.372097893980076e-06,
      "loss": 3.0684,
      "step": 216503
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9563965797424316,
      "learning_rate": 5.371327272856629e-06,
      "loss": 2.9263,
      "step": 216504
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0460383892059326,
      "learning_rate": 5.370556706510199e-06,
      "loss": 2.8456,
      "step": 216505
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9098918437957764,
      "learning_rate": 5.369786194940951e-06,
      "loss": 2.8964,
      "step": 216506
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.752021074295044,
      "learning_rate": 5.369015738149018e-06,
      "loss": 2.8433,
      "step": 216507
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.879636526107788,
      "learning_rate": 5.368245336134536e-06,
      "loss": 2.9877,
      "step": 216508
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6981053352355957,
      "learning_rate": 5.367474988897636e-06,
      "loss": 2.9139,
      "step": 216509
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2672393321990967,
      "learning_rate": 5.36670469643855e-06,
      "loss": 3.1239,
      "step": 216510
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.9778857231140137,
      "learning_rate": 5.365934458757281e-06,
      "loss": 2.8217,
      "step": 216511
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1469101905822754,
      "learning_rate": 5.365164275854128e-06,
      "loss": 2.7588,
      "step": 216512
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.718235731124878,
      "learning_rate": 5.364394147729089e-06,
      "loss": 3.1941,
      "step": 216513
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.389554262161255,
      "learning_rate": 5.3636240743824e-06,
      "loss": 2.8625,
      "step": 216514
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.128277540206909,
      "learning_rate": 5.3628540558141586e-06,
      "loss": 2.83,
      "step": 216515
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.035374641418457,
      "learning_rate": 5.362084092024532e-06,
      "loss": 2.9908,
      "step": 216516
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.712907075881958,
      "learning_rate": 5.36131418301362e-06,
      "loss": 3.2127,
      "step": 216517
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3507473468780518,
      "learning_rate": 5.360544328781691e-06,
      "loss": 2.703,
      "step": 216518
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.474593162536621,
      "learning_rate": 5.359774529328708e-06,
      "loss": 2.763,
      "step": 216519
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.831122636795044,
      "learning_rate": 5.359004784654941e-06,
      "loss": 2.8465,
      "step": 216520
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0684316158294678,
      "learning_rate": 5.358235094760488e-06,
      "loss": 2.8811,
      "step": 216521
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.5767688751220703,
      "learning_rate": 5.3574654596455155e-06,
      "loss": 3.1355,
      "step": 216522
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0052342414855957,
      "learning_rate": 5.356695879310124e-06,
      "loss": 2.7214,
      "step": 216523
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7464652061462402,
      "learning_rate": 5.355926353754547e-06,
      "loss": 2.9699,
      "step": 216524
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0424489974975586,
      "learning_rate": 5.355156882978817e-06,
      "loss": 2.9704,
      "step": 216525
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.5134544372558594,
      "learning_rate": 5.354387466983134e-06,
      "loss": 2.7534,
      "step": 216526
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.691526412963867,
      "learning_rate": 5.353618105767599e-06,
      "loss": 2.8689,
      "step": 216527
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.140164375305176,
      "learning_rate": 5.352848799332443e-06,
      "loss": 2.8449,
      "step": 216528
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7793049812316895,
      "learning_rate": 5.352079547677701e-06,
      "loss": 3.0987,
      "step": 216529
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.461388111114502,
      "learning_rate": 5.351310350803606e-06,
      "loss": 2.716,
      "step": 216530
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7456562519073486,
      "learning_rate": 5.350541208710291e-06,
      "loss": 2.8305,
      "step": 216531
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.578198194503784,
      "learning_rate": 5.349772121397822e-06,
      "loss": 2.9052,
      "step": 216532
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.506499767303467,
      "learning_rate": 5.3490030888664e-06,
      "loss": 2.6921,
      "step": 216533
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.665621519088745,
      "learning_rate": 5.34823411111619e-06,
      "loss": 2.7791,
      "step": 216534
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.577831268310547,
      "learning_rate": 5.34746518814726e-06,
      "loss": 2.9236,
      "step": 216535
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8341686725616455,
      "learning_rate": 5.346696319959842e-06,
      "loss": 2.6453,
      "step": 216536
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.99605655670166,
      "learning_rate": 5.345927506554004e-06,
      "loss": 3.0473,
      "step": 216537
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8903958797454834,
      "learning_rate": 5.345158747929945e-06,
      "loss": 2.963,
      "step": 216538
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.5520169734954834,
      "learning_rate": 5.344390044087765e-06,
      "loss": 3.0101,
      "step": 216539
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.728489875793457,
      "learning_rate": 5.343621395027631e-06,
      "loss": 2.7675,
      "step": 216540
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6429390907287598,
      "learning_rate": 5.342852800749675e-06,
      "loss": 3.1843,
      "step": 216541
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.166968822479248,
      "learning_rate": 5.342084261254065e-06,
      "loss": 2.8686,
      "step": 216542
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6869428157806396,
      "learning_rate": 5.3413157765409e-06,
      "loss": 2.7422,
      "step": 216543
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.992155075073242,
      "learning_rate": 5.340547346610347e-06,
      "loss": 2.9449,
      "step": 216544
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9278013706207275,
      "learning_rate": 5.339778971462539e-06,
      "loss": 2.6541,
      "step": 216545
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.4227356910705566,
      "learning_rate": 5.339010651097675e-06,
      "loss": 2.9298,
      "step": 216546
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.796729803085327,
      "learning_rate": 5.33824238551579e-06,
      "loss": 3.1759,
      "step": 216547
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.303858518600464,
      "learning_rate": 5.337474174717149e-06,
      "loss": 2.9017,
      "step": 216548
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.847902774810791,
      "learning_rate": 5.336706018701786e-06,
      "loss": 3.0394,
      "step": 216549
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.778383255004883,
      "learning_rate": 5.335937917469935e-06,
      "loss": 2.9016,
      "step": 216550
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2754225730895996,
      "learning_rate": 5.335169871021661e-06,
      "loss": 2.8716,
      "step": 216551
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2418394088745117,
      "learning_rate": 5.334401879357131e-06,
      "loss": 2.8438,
      "step": 216552
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.213111639022827,
      "learning_rate": 5.333633942476512e-06,
      "loss": 2.8943,
      "step": 216553
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6316347122192383,
      "learning_rate": 5.332866060379937e-06,
      "loss": 2.6628,
      "step": 216554
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.595733404159546,
      "learning_rate": 5.332098233067572e-06,
      "loss": 2.9056,
      "step": 216555
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.5539391040802,
      "learning_rate": 5.331330460539485e-06,
      "loss": 2.6136,
      "step": 216556
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1497457027435303,
      "learning_rate": 5.330562742795874e-06,
      "loss": 3.0183,
      "step": 216557
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.270203113555908,
      "learning_rate": 5.329795079836907e-06,
      "loss": 2.7982,
      "step": 216558
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.205068588256836,
      "learning_rate": 5.32902747166265e-06,
      "loss": 2.6803,
      "step": 216559
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.560713052749634,
      "learning_rate": 5.328259918273337e-06,
      "loss": 3.1911,
      "step": 216560
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9563682079315186,
      "learning_rate": 5.327492419669032e-06,
      "loss": 3.0177,
      "step": 216561
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.032902956008911,
      "learning_rate": 5.326724975849905e-06,
      "loss": 2.8142,
      "step": 216562
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.755908250808716,
      "learning_rate": 5.325957586816121e-06,
      "loss": 2.9325,
      "step": 216563
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2350194454193115,
      "learning_rate": 5.325190252567779e-06,
      "loss": 2.8412,
      "step": 216564
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.208385944366455,
      "learning_rate": 5.324422973105047e-06,
      "loss": 3.1383,
      "step": 216565
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.798474073410034,
      "learning_rate": 5.323655748428091e-06,
      "loss": 2.9203,
      "step": 216566
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7568588256835938,
      "learning_rate": 5.322888578537044e-06,
      "loss": 2.75,
      "step": 216567
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8927879333496094,
      "learning_rate": 5.322121463432005e-06,
      "loss": 2.8389,
      "step": 216568
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8424792289733887,
      "learning_rate": 5.321354403113143e-06,
      "loss": 2.868,
      "step": 216569
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.98089599609375,
      "learning_rate": 5.320587397580623e-06,
      "loss": 2.869,
      "step": 216570
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.947758674621582,
      "learning_rate": 5.319820446834544e-06,
      "loss": 2.8116,
      "step": 216571
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9291181564331055,
      "learning_rate": 5.319053550875074e-06,
      "loss": 2.6503,
      "step": 216572
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8548004627227783,
      "learning_rate": 5.3182867097023796e-06,
      "loss": 2.7642,
      "step": 216573
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.95632266998291,
      "learning_rate": 5.31751992331656e-06,
      "loss": 2.9959,
      "step": 216574
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.598299503326416,
      "learning_rate": 5.316753191717782e-06,
      "loss": 2.9304,
      "step": 216575
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3167049884796143,
      "learning_rate": 5.315986514906178e-06,
      "loss": 2.7345,
      "step": 216576
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.4256606101989746,
      "learning_rate": 5.315219892881883e-06,
      "loss": 2.9723,
      "step": 216577
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.26291561126709,
      "learning_rate": 5.314453325645096e-06,
      "loss": 2.7373,
      "step": 216578
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.4201321601867676,
      "learning_rate": 5.313686813195883e-06,
      "loss": 3.0866,
      "step": 216579
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8007054328918457,
      "learning_rate": 5.3129203555344115e-06,
      "loss": 2.9786,
      "step": 216580
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.523162364959717,
      "learning_rate": 5.312153952660847e-06,
      "loss": 2.813,
      "step": 216581
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.501751661300659,
      "learning_rate": 5.3113876045753234e-06,
      "loss": 3.018,
      "step": 216582
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.5687882900238037,
      "learning_rate": 5.310621311277941e-06,
      "loss": 2.7745,
      "step": 216583
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.907330274581909,
      "learning_rate": 5.309855072768898e-06,
      "loss": 2.8558,
      "step": 216584
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.603388786315918,
      "learning_rate": 5.3090888890483295e-06,
      "loss": 3.1612,
      "step": 216585
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.6093955039978027,
      "learning_rate": 5.308322760116335e-06,
      "loss": 2.8639,
      "step": 216586
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.742244005203247,
      "learning_rate": 5.307556685973113e-06,
      "loss": 3.1086,
      "step": 216587
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.336148738861084,
      "learning_rate": 5.306790666618732e-06,
      "loss": 2.8145,
      "step": 216588
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.981592893600464,
      "learning_rate": 5.306024702053424e-06,
      "loss": 2.7292,
      "step": 216589
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9446496963500977,
      "learning_rate": 5.305258792277289e-06,
      "loss": 3.022,
      "step": 216590
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0674970149993896,
      "learning_rate": 5.304492937290461e-06,
      "loss": 2.6079,
      "step": 216591
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9228363037109375,
      "learning_rate": 5.303727137093072e-06,
      "loss": 3.2378,
      "step": 216592
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.564011335372925,
      "learning_rate": 5.3029613916853235e-06,
      "loss": 2.8023,
      "step": 216593
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.4091687202453613,
      "learning_rate": 5.30219570106728e-06,
      "loss": 3.0567,
      "step": 216594
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.999807834625244,
      "learning_rate": 5.301430065239143e-06,
      "loss": 2.8873,
      "step": 216595
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.964944362640381,
      "learning_rate": 5.300664484201045e-06,
      "loss": 2.7964,
      "step": 216596
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7474288940429688,
      "learning_rate": 5.299898957953053e-06,
      "loss": 2.8415,
      "step": 216597
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9487924575805664,
      "learning_rate": 5.2991334864954325e-06,
      "loss": 3.0152,
      "step": 216598
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.215423583984375,
      "learning_rate": 5.298368069828252e-06,
      "loss": 2.9862,
      "step": 216599
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.8945472240448,
      "learning_rate": 5.297602707951676e-06,
      "loss": 3.0495,
      "step": 216600
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.134800672531128,
      "learning_rate": 5.296837400865806e-06,
      "loss": 2.8604,
      "step": 216601
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.140443801879883,
      "learning_rate": 5.29607214857084e-06,
      "loss": 3.0991,
      "step": 216602
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8998782634735107,
      "learning_rate": 5.295306951066913e-06,
      "loss": 3.0798,
      "step": 216603
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7739005088806152,
      "learning_rate": 5.294541808354125e-06,
      "loss": 3.1188,
      "step": 216604
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.7006826400756836,
      "learning_rate": 5.293776720432674e-06,
      "loss": 2.9659,
      "step": 216605
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.482715368270874,
      "learning_rate": 5.2930116873026285e-06,
      "loss": 2.7681,
      "step": 216606
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7824015617370605,
      "learning_rate": 5.29224670896422e-06,
      "loss": 2.7978,
      "step": 216607
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.000244140625,
      "learning_rate": 5.291481785417517e-06,
      "loss": 3.0333,
      "step": 216608
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1689910888671875,
      "learning_rate": 5.290716916662685e-06,
      "loss": 2.869,
      "step": 216609
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.839421272277832,
      "learning_rate": 5.28995210269989e-06,
      "loss": 2.9783,
      "step": 216610
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.972391128540039,
      "learning_rate": 5.2891873435292665e-06,
      "loss": 2.7246,
      "step": 216611
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.00176739692688,
      "learning_rate": 5.288422639150913e-06,
      "loss": 2.6162,
      "step": 216612
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1827874183654785,
      "learning_rate": 5.2876579895650304e-06,
      "loss": 2.8034,
      "step": 216613
    },
    {
      "epoch": 2.82,
      "grad_norm": 5.141982555389404,
      "learning_rate": 5.286893394771719e-06,
      "loss": 2.9306,
      "step": 216614
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.09682297706604,
      "learning_rate": 5.286128854771177e-06,
      "loss": 2.89,
      "step": 216615
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.9058470726013184,
      "learning_rate": 5.285364369563472e-06,
      "loss": 2.8339,
      "step": 216616
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0397653579711914,
      "learning_rate": 5.2845999391488034e-06,
      "loss": 2.9114,
      "step": 216617
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.6569104194641113,
      "learning_rate": 5.283835563527239e-06,
      "loss": 3.0028,
      "step": 216618
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7724506855010986,
      "learning_rate": 5.2830712426990105e-06,
      "loss": 2.8314,
      "step": 216619
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7043466567993164,
      "learning_rate": 5.282306976664219e-06,
      "loss": 2.873,
      "step": 216620
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9766998291015625,
      "learning_rate": 5.2815427654230305e-06,
      "loss": 2.9222,
      "step": 216621
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.217817544937134,
      "learning_rate": 5.280778608975545e-06,
      "loss": 2.896,
      "step": 216622
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6689934730529785,
      "learning_rate": 5.280014507321928e-06,
      "loss": 2.8813,
      "step": 216623
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9149224758148193,
      "learning_rate": 5.279250460462314e-06,
      "loss": 2.7521,
      "step": 216624
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7214810848236084,
      "learning_rate": 5.278486468396837e-06,
      "loss": 2.998,
      "step": 216625
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.852926254272461,
      "learning_rate": 5.277722531125661e-06,
      "loss": 2.8276,
      "step": 216626
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.40887713432312,
      "learning_rate": 5.276958648648955e-06,
      "loss": 2.6963,
      "step": 216627
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.270474910736084,
      "learning_rate": 5.276194820966784e-06,
      "loss": 2.9447,
      "step": 216628
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.5895450115203857,
      "learning_rate": 5.275431048079382e-06,
      "loss": 2.9678,
      "step": 216629
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8758935928344727,
      "learning_rate": 5.2746673299867816e-06,
      "loss": 3.0411,
      "step": 216630
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8694400787353516,
      "learning_rate": 5.273903666689216e-06,
      "loss": 2.9816,
      "step": 216631
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.4228737354278564,
      "learning_rate": 5.273140058186754e-06,
      "loss": 2.6045,
      "step": 216632
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.9927985668182373,
      "learning_rate": 5.272376504479625e-06,
      "loss": 2.7708,
      "step": 216633
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8124213218688965,
      "learning_rate": 5.2716130055679315e-06,
      "loss": 2.9199,
      "step": 216634
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2657952308654785,
      "learning_rate": 5.270849561451807e-06,
      "loss": 2.8091,
      "step": 216635
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.186009645462036,
      "learning_rate": 5.270086172131349e-06,
      "loss": 3.0592,
      "step": 216636
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9432060718536377,
      "learning_rate": 5.2693228376067935e-06,
      "loss": 2.9302,
      "step": 216637
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.436638832092285,
      "learning_rate": 5.268559557878205e-06,
      "loss": 2.9039,
      "step": 216638
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6960537433624268,
      "learning_rate": 5.267796332945751e-06,
      "loss": 2.7739,
      "step": 216639
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1993284225463867,
      "learning_rate": 5.267033162809597e-06,
      "loss": 3.1215,
      "step": 216640
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.950778007507324,
      "learning_rate": 5.266270047469878e-06,
      "loss": 2.84,
      "step": 216641
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.789161205291748,
      "learning_rate": 5.265506986926693e-06,
      "loss": 2.8406,
      "step": 216642
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1222591400146484,
      "learning_rate": 5.264743981180209e-06,
      "loss": 2.6959,
      "step": 216643
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.432323694229126,
      "learning_rate": 5.263981030230591e-06,
      "loss": 2.8728,
      "step": 216644
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.7150979042053223,
      "learning_rate": 5.263218134077973e-06,
      "loss": 2.782,
      "step": 216645
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.166473627090454,
      "learning_rate": 5.2624552927224565e-06,
      "loss": 3.0636,
      "step": 216646
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.5448508262634277,
      "learning_rate": 5.261692506164239e-06,
      "loss": 2.7241,
      "step": 216647
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.022751808166504,
      "learning_rate": 5.260929774403422e-06,
      "loss": 2.7597,
      "step": 216648
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9379117488861084,
      "learning_rate": 5.260167097440171e-06,
      "loss": 2.7612,
      "step": 216649
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.20143723487854,
      "learning_rate": 5.259404475274587e-06,
      "loss": 3.0813,
      "step": 216650
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.679013252258301,
      "learning_rate": 5.258641907906902e-06,
      "loss": 2.889,
      "step": 216651
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7858147621154785,
      "learning_rate": 5.2578793953371835e-06,
      "loss": 2.7869,
      "step": 216652
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0295684337615967,
      "learning_rate": 5.257116937565564e-06,
      "loss": 2.9048,
      "step": 216653
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8569302558898926,
      "learning_rate": 5.2563545345922105e-06,
      "loss": 2.9944,
      "step": 216654
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7268693447113037,
      "learning_rate": 5.25559218641729e-06,
      "loss": 2.8446,
      "step": 216655
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.593958854675293,
      "learning_rate": 5.2548298930408685e-06,
      "loss": 2.8513,
      "step": 216656
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.974544048309326,
      "learning_rate": 5.254067654463179e-06,
      "loss": 2.902,
      "step": 216657
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.7464630603790283,
      "learning_rate": 5.253305470684288e-06,
      "loss": 3.1872,
      "step": 216658
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.060288906097412,
      "learning_rate": 5.2525433417044295e-06,
      "loss": 2.8445,
      "step": 216659
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9216439723968506,
      "learning_rate": 5.251781267523636e-06,
      "loss": 2.9812,
      "step": 216660
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.610827684402466,
      "learning_rate": 5.251019248142141e-06,
      "loss": 2.8305,
      "step": 216661
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.239633798599243,
      "learning_rate": 5.250257283560011e-06,
      "loss": 2.713,
      "step": 216662
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.078263282775879,
      "learning_rate": 5.249495373777446e-06,
      "loss": 3.0187,
      "step": 216663
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8592560291290283,
      "learning_rate": 5.248733518794546e-06,
      "loss": 2.9136,
      "step": 216664
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7183151245117188,
      "learning_rate": 5.247971718611477e-06,
      "loss": 2.7438,
      "step": 216665
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.307743549346924,
      "learning_rate": 5.247209973228372e-06,
      "loss": 2.83,
      "step": 216666
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.5399272441864014,
      "learning_rate": 5.246448282645399e-06,
      "loss": 2.7562,
      "step": 216667
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.743533134460449,
      "learning_rate": 5.245686646862624e-06,
      "loss": 2.955,
      "step": 216668
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6789729595184326,
      "learning_rate": 5.244925065880279e-06,
      "loss": 3.0568,
      "step": 216669
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.708702802658081,
      "learning_rate": 5.244163539698432e-06,
      "loss": 2.8895,
      "step": 216670
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8150551319122314,
      "learning_rate": 5.2434020683172815e-06,
      "loss": 2.7967,
      "step": 216671
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.152975082397461,
      "learning_rate": 5.2426406517369625e-06,
      "loss": 2.8354,
      "step": 216672
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0893213748931885,
      "learning_rate": 5.241879289957574e-06,
      "loss": 2.9723,
      "step": 216673
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.52939510345459,
      "learning_rate": 5.241117982979315e-06,
      "loss": 2.9001,
      "step": 216674
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.7151379585266113,
      "learning_rate": 5.240356730802286e-06,
      "loss": 2.9655,
      "step": 216675
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9255518913269043,
      "learning_rate": 5.239595533426588e-06,
      "loss": 3.1021,
      "step": 216676
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.80780291557312,
      "learning_rate": 5.238834390852486e-06,
      "loss": 2.9415,
      "step": 216677
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.6476850509643555,
      "learning_rate": 5.238073303080015e-06,
      "loss": 2.9187,
      "step": 216678
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.328684329986572,
      "learning_rate": 5.23731227010934e-06,
      "loss": 3.0812,
      "step": 216679
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.5613651275634766,
      "learning_rate": 5.236551291940627e-06,
      "loss": 2.898,
      "step": 216680
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7710630893707275,
      "learning_rate": 5.235790368574011e-06,
      "loss": 2.9561,
      "step": 216681
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.852215051651001,
      "learning_rate": 5.235029500009591e-06,
      "loss": 3.0031,
      "step": 216682
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.750849962234497,
      "learning_rate": 5.2342686862476006e-06,
      "loss": 2.8567,
      "step": 216683
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2806737422943115,
      "learning_rate": 5.233507927288072e-06,
      "loss": 2.8345,
      "step": 216684
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.03963565826416,
      "learning_rate": 5.232747223131206e-06,
      "loss": 2.9234,
      "step": 216685
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.5137429237365723,
      "learning_rate": 5.231986573777169e-06,
      "loss": 2.9009,
      "step": 216686
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.5245158672332764,
      "learning_rate": 5.231225979226028e-06,
      "loss": 2.8306,
      "step": 216687
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.414673328399658,
      "learning_rate": 5.230465439477982e-06,
      "loss": 3.1041,
      "step": 216688
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.217257261276245,
      "learning_rate": 5.229704954533198e-06,
      "loss": 2.8066,
      "step": 216689
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8185484409332275,
      "learning_rate": 5.228944524391743e-06,
      "loss": 3.1291,
      "step": 216690
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.849769115447998,
      "learning_rate": 5.2281841490537825e-06,
      "loss": 2.8764,
      "step": 216691
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1540839672088623,
      "learning_rate": 5.22742382851945e-06,
      "loss": 2.7223,
      "step": 216692
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0748403072357178,
      "learning_rate": 5.226663562788913e-06,
      "loss": 2.9303,
      "step": 216693
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.5774261951446533,
      "learning_rate": 5.225903351862337e-06,
      "loss": 2.936,
      "step": 216694
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7144110202789307,
      "learning_rate": 5.2251431957398226e-06,
      "loss": 2.8642,
      "step": 216695
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.160801887512207,
      "learning_rate": 5.2243830944215024e-06,
      "loss": 2.8333,
      "step": 216696
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.151946544647217,
      "learning_rate": 5.223623047907544e-06,
      "loss": 3.1394,
      "step": 216697
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.8540873527526855,
      "learning_rate": 5.222863056198079e-06,
      "loss": 3.0011,
      "step": 216698
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.829066753387451,
      "learning_rate": 5.2221031192932084e-06,
      "loss": 3.0732,
      "step": 216699
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6389694213867188,
      "learning_rate": 5.221343237193165e-06,
      "loss": 2.6793,
      "step": 216700
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0211727619171143,
      "learning_rate": 5.220583409898016e-06,
      "loss": 2.6774,
      "step": 216701
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.573509454727173,
      "learning_rate": 5.219823637407927e-06,
      "loss": 3.2074,
      "step": 216702
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.995246171951294,
      "learning_rate": 5.219063919723032e-06,
      "loss": 2.7261,
      "step": 216703
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.991270065307617,
      "learning_rate": 5.218304256843497e-06,
      "loss": 2.9101,
      "step": 216704
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.530940055847168,
      "learning_rate": 5.2175446487694225e-06,
      "loss": 3.0674,
      "step": 216705
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7920658588409424,
      "learning_rate": 5.216785095500975e-06,
      "loss": 2.8878,
      "step": 216706
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.7197325229644775,
      "learning_rate": 5.216025597038287e-06,
      "loss": 2.7887,
      "step": 216707
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0885629653930664,
      "learning_rate": 5.215266153381525e-06,
      "loss": 2.8889,
      "step": 216708
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.87880277633667,
      "learning_rate": 5.21450676453079e-06,
      "loss": 2.9555,
      "step": 216709
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2865240573883057,
      "learning_rate": 5.213747430486248e-06,
      "loss": 3.1402,
      "step": 216710
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.740394115447998,
      "learning_rate": 5.212988151248032e-06,
      "loss": 2.632,
      "step": 216711
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.927090883255005,
      "learning_rate": 5.2122289268163085e-06,
      "loss": 2.7516,
      "step": 216712
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.569735050201416,
      "learning_rate": 5.211469757191178e-06,
      "loss": 3.054,
      "step": 216713
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.6999621391296387,
      "learning_rate": 5.2107106423728065e-06,
      "loss": 2.8418,
      "step": 216714
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.959817886352539,
      "learning_rate": 5.2099515823612934e-06,
      "loss": 2.8709,
      "step": 216715
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6221764087677,
      "learning_rate": 5.2091925771568736e-06,
      "loss": 3.0353,
      "step": 216716
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7320382595062256,
      "learning_rate": 5.208433626759578e-06,
      "loss": 2.919,
      "step": 216717
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7884068489074707,
      "learning_rate": 5.207674731169609e-06,
      "loss": 2.9996,
      "step": 216718
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.824221134185791,
      "learning_rate": 5.206915890387131e-06,
      "loss": 2.8377,
      "step": 216719
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2448837757110596,
      "learning_rate": 5.2061571044122455e-06,
      "loss": 3.0898,
      "step": 216720
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0024733543395996,
      "learning_rate": 5.205398373245051e-06,
      "loss": 2.8551,
      "step": 216721
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.222256898880005,
      "learning_rate": 5.2046396968857814e-06,
      "loss": 2.94,
      "step": 216722
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.313281536102295,
      "learning_rate": 5.203881075334504e-06,
      "loss": 3.047,
      "step": 216723
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.5749778747558594,
      "learning_rate": 5.203122508591417e-06,
      "loss": 2.9992,
      "step": 216724
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.908738851547241,
      "learning_rate": 5.202363996656622e-06,
      "loss": 2.6529,
      "step": 216725
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.594468593597412,
      "learning_rate": 5.201605539530318e-06,
      "loss": 2.7324,
      "step": 216726
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.745896339416504,
      "learning_rate": 5.200847137212538e-06,
      "loss": 2.6038,
      "step": 216727
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8478426933288574,
      "learning_rate": 5.200088789703516e-06,
      "loss": 2.9316,
      "step": 216728
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0766162872314453,
      "learning_rate": 5.199330497003351e-06,
      "loss": 2.9757,
      "step": 216729
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.7236135005950928,
      "learning_rate": 5.198572259112177e-06,
      "loss": 2.91,
      "step": 216730
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.169917345046997,
      "learning_rate": 5.19781407603016e-06,
      "loss": 2.8264,
      "step": 216731
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.4105021953582764,
      "learning_rate": 5.1970559477575e-06,
      "loss": 2.9436,
      "step": 216732
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.032083511352539,
      "learning_rate": 5.196297874294198e-06,
      "loss": 2.8031,
      "step": 216733
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.7404520511627197,
      "learning_rate": 5.195539855640485e-06,
      "loss": 2.9841,
      "step": 216734
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8371036052703857,
      "learning_rate": 5.194781891796496e-06,
      "loss": 3.1261,
      "step": 216735
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0825037956237793,
      "learning_rate": 5.194023982762363e-06,
      "loss": 2.7479,
      "step": 216736
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2057132720947266,
      "learning_rate": 5.193266128538187e-06,
      "loss": 3.0217,
      "step": 216737
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.981295347213745,
      "learning_rate": 5.192508329124234e-06,
      "loss": 2.7981,
      "step": 216738
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9101665019989014,
      "learning_rate": 5.191750584520471e-06,
      "loss": 2.8874,
      "step": 216739
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2193102836608887,
      "learning_rate": 5.190992894727197e-06,
      "loss": 2.7261,
      "step": 216740
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.044473886489868,
      "learning_rate": 5.190235259744413e-06,
      "loss": 3.1027,
      "step": 216741
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7861244678497314,
      "learning_rate": 5.189477679572385e-06,
      "loss": 3.1784,
      "step": 216742
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3699123859405518,
      "learning_rate": 5.188720154211146e-06,
      "loss": 2.88,
      "step": 216743
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.23382306098938,
      "learning_rate": 5.187962683660962e-06,
      "loss": 2.9416,
      "step": 216744
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.650465488433838,
      "learning_rate": 5.187205267921834e-06,
      "loss": 2.7839,
      "step": 216745
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.641679525375366,
      "learning_rate": 5.186447906993996e-06,
      "loss": 2.756,
      "step": 216746
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.9860734939575195,
      "learning_rate": 5.1856906008775456e-06,
      "loss": 3.0271,
      "step": 216747
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.511812925338745,
      "learning_rate": 5.184933349572684e-06,
      "loss": 3.0017,
      "step": 216748
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.166710376739502,
      "learning_rate": 5.1841761530794445e-06,
      "loss": 2.9578,
      "step": 216749
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6069483757019043,
      "learning_rate": 5.1834190113980934e-06,
      "loss": 2.9442,
      "step": 216750
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.74765944480896,
      "learning_rate": 5.182661924528664e-06,
      "loss": 2.8768,
      "step": 216751
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.61899471282959,
      "learning_rate": 5.181904892471389e-06,
      "loss": 2.9758,
      "step": 216752
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.674551248550415,
      "learning_rate": 5.181147915226302e-06,
      "loss": 2.9084,
      "step": 216753
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.247685194015503,
      "learning_rate": 5.180390992793671e-06,
      "loss": 2.8405,
      "step": 216754
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.4850032329559326,
      "learning_rate": 5.179634125173493e-06,
      "loss": 2.9852,
      "step": 216755
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8435540199279785,
      "learning_rate": 5.17887731236607e-06,
      "loss": 2.7719,
      "step": 216756
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.8483526706695557,
      "learning_rate": 5.178120554371401e-06,
      "loss": 2.6816,
      "step": 216757
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9354000091552734,
      "learning_rate": 5.17736385118972e-06,
      "loss": 3.0851,
      "step": 216758
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1243112087249756,
      "learning_rate": 5.176607202821093e-06,
      "loss": 2.9399,
      "step": 216759
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.873891830444336,
      "learning_rate": 5.175850609265753e-06,
      "loss": 3.0128,
      "step": 216760
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.518482208251953,
      "learning_rate": 5.175094070523733e-06,
      "loss": 2.8387,
      "step": 216761
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.302000522613525,
      "learning_rate": 5.174337586595267e-06,
      "loss": 2.9597,
      "step": 216762
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1499452590942383,
      "learning_rate": 5.173581157480455e-06,
      "loss": 2.93,
      "step": 216763
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.739135980606079,
      "learning_rate": 5.172824783179397e-06,
      "loss": 3.0518,
      "step": 216764
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.940443754196167,
      "learning_rate": 5.1720684636923246e-06,
      "loss": 2.5868,
      "step": 216765
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.950198173522949,
      "learning_rate": 5.171312199019306e-06,
      "loss": 3.0592,
      "step": 216766
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2951807975769043,
      "learning_rate": 5.170555989160507e-06,
      "loss": 2.6771,
      "step": 216767
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.848498582839966,
      "learning_rate": 5.169799834116062e-06,
      "loss": 2.9657,
      "step": 216768
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.769639730453491,
      "learning_rate": 5.1690437338861355e-06,
      "loss": 2.6779,
      "step": 216769
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.064558267593384,
      "learning_rate": 5.168287688470829e-06,
      "loss": 3.1606,
      "step": 216770
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.437018394470215,
      "learning_rate": 5.167531697870309e-06,
      "loss": 2.8533,
      "step": 216771
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2650952339172363,
      "learning_rate": 5.166775762084707e-06,
      "loss": 2.9148,
      "step": 216772
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.843954086303711,
      "learning_rate": 5.166019881114192e-06,
      "loss": 2.9041,
      "step": 216773
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.108522891998291,
      "learning_rate": 5.165264054958862e-06,
      "loss": 2.8698,
      "step": 216774
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9144856929779053,
      "learning_rate": 5.1645082836188846e-06,
      "loss": 2.822,
      "step": 216775
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.774686574935913,
      "learning_rate": 5.1637525670943595e-06,
      "loss": 3.0692,
      "step": 216776
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.7566728591918945,
      "learning_rate": 5.162996905385486e-06,
      "loss": 2.8812,
      "step": 216777
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.92901349067688,
      "learning_rate": 5.162241298492364e-06,
      "loss": 2.8917,
      "step": 216778
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9593505859375,
      "learning_rate": 5.1614857464151616e-06,
      "loss": 2.9253,
      "step": 216779
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.096989870071411,
      "learning_rate": 5.16073024915401e-06,
      "loss": 2.665,
      "step": 216780
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.562107801437378,
      "learning_rate": 5.159974806709044e-06,
      "loss": 2.8285,
      "step": 216781
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.5618085861206055,
      "learning_rate": 5.159219419080363e-06,
      "loss": 2.945,
      "step": 216782
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0986411571502686,
      "learning_rate": 5.158464086268199e-06,
      "loss": 2.9298,
      "step": 216783
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8210408687591553,
      "learning_rate": 5.1577088082725874e-06,
      "loss": 2.892,
      "step": 216784
    },
    {
      "epoch": 2.82,
      "grad_norm": 5.124683856964111,
      "learning_rate": 5.156953585093759e-06,
      "loss": 2.915,
      "step": 216785
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.2414655685424805,
      "learning_rate": 5.156198416731816e-06,
      "loss": 2.9553,
      "step": 216786
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.450742244720459,
      "learning_rate": 5.155443303186924e-06,
      "loss": 2.6909,
      "step": 216787
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2586872577667236,
      "learning_rate": 5.154688244459149e-06,
      "loss": 2.8468,
      "step": 216788
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.9533448219299316,
      "learning_rate": 5.153933240548724e-06,
      "loss": 2.7835,
      "step": 216789
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7471749782562256,
      "learning_rate": 5.1531782914557175e-06,
      "loss": 2.937,
      "step": 216790
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7270750999450684,
      "learning_rate": 5.1524233971803274e-06,
      "loss": 2.8735,
      "step": 216791
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.971670627593994,
      "learning_rate": 5.151668557722655e-06,
      "loss": 3.0232,
      "step": 216792
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9085676670074463,
      "learning_rate": 5.150913773082898e-06,
      "loss": 3.0228,
      "step": 216793
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.661120653152466,
      "learning_rate": 5.150159043261093e-06,
      "loss": 2.9849,
      "step": 216794
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.7294294834136963,
      "learning_rate": 5.14940436825747e-06,
      "loss": 3.0143,
      "step": 216795
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8412182331085205,
      "learning_rate": 5.1486497480720975e-06,
      "loss": 3.0835,
      "step": 216796
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.946397066116333,
      "learning_rate": 5.147895182705175e-06,
      "loss": 2.8572,
      "step": 216797
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.095125198364258,
      "learning_rate": 5.147140672156835e-06,
      "loss": 2.9852,
      "step": 216798
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8850839138031006,
      "learning_rate": 5.1463862164272455e-06,
      "loss": 2.7893,
      "step": 216799
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0646541118621826,
      "learning_rate": 5.145631815516438e-06,
      "loss": 3.0107,
      "step": 216800
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.77512788772583,
      "learning_rate": 5.1448774694246794e-06,
      "loss": 3.0692,
      "step": 216801
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.242164134979248,
      "learning_rate": 5.144123178152004e-06,
      "loss": 2.9327,
      "step": 216802
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9520132541656494,
      "learning_rate": 5.143368941698644e-06,
      "loss": 3.1157,
      "step": 216803
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8591222763061523,
      "learning_rate": 5.1426147600646674e-06,
      "loss": 2.7274,
      "step": 216804
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.754262924194336,
      "learning_rate": 5.1418606332502725e-06,
      "loss": 3.0132,
      "step": 216805
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.700604200363159,
      "learning_rate": 5.141106561255559e-06,
      "loss": 3.0254,
      "step": 216806
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0490410327911377,
      "learning_rate": 5.140352544080662e-06,
      "loss": 2.7023,
      "step": 216807
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8748180866241455,
      "learning_rate": 5.139598581725746e-06,
      "loss": 3.0373,
      "step": 216808
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0193405151367188,
      "learning_rate": 5.138844674190979e-06,
      "loss": 3.1336,
      "step": 216809
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3825862407684326,
      "learning_rate": 5.138090821476426e-06,
      "loss": 3.0299,
      "step": 216810
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.827233076095581,
      "learning_rate": 5.137337023582322e-06,
      "loss": 3.1903,
      "step": 216811
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.580139636993408,
      "learning_rate": 5.136583280508666e-06,
      "loss": 3.0106,
      "step": 216812
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1430652141571045,
      "learning_rate": 5.135829592255758e-06,
      "loss": 2.9765,
      "step": 216813
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9973058700561523,
      "learning_rate": 5.1350759588236315e-06,
      "loss": 2.9626,
      "step": 216814
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.096475839614868,
      "learning_rate": 5.134322380212485e-06,
      "loss": 2.8437,
      "step": 216815
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.065546989440918,
      "learning_rate": 5.133568856422387e-06,
      "loss": 3.1665,
      "step": 216816
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.116546154022217,
      "learning_rate": 5.132815387453604e-06,
      "loss": 3.0651,
      "step": 216817
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0765464305877686,
      "learning_rate": 5.132061973306134e-06,
      "loss": 2.844,
      "step": 216818
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8576009273529053,
      "learning_rate": 5.131308613980178e-06,
      "loss": 2.7366,
      "step": 216819
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.734056234359741,
      "learning_rate": 5.130555309475903e-06,
      "loss": 2.8597,
      "step": 216820
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.4475464820861816,
      "learning_rate": 5.1298020597934085e-06,
      "loss": 2.8967,
      "step": 216821
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.647279739379883,
      "learning_rate": 5.129048864932828e-06,
      "loss": 2.9837,
      "step": 216822
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.135715961456299,
      "learning_rate": 5.128295724894394e-06,
      "loss": 2.8478,
      "step": 216823
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.788280487060547,
      "learning_rate": 5.1275426396781064e-06,
      "loss": 2.9423,
      "step": 216824
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9412341117858887,
      "learning_rate": 5.126789609284232e-06,
      "loss": 2.9198,
      "step": 216825
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.743195056915283,
      "learning_rate": 5.126036633712804e-06,
      "loss": 2.8115,
      "step": 216826
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8825838565826416,
      "learning_rate": 5.1252837129640235e-06,
      "loss": 2.887,
      "step": 216827
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8962390422821045,
      "learning_rate": 5.124530847038022e-06,
      "loss": 3.0958,
      "step": 216828
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.789010524749756,
      "learning_rate": 5.123778035934967e-06,
      "loss": 2.7554,
      "step": 216829
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6205527782440186,
      "learning_rate": 5.123025279654924e-06,
      "loss": 2.6841,
      "step": 216830
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7438912391662598,
      "learning_rate": 5.122272578198128e-06,
      "loss": 2.9144,
      "step": 216831
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6939921379089355,
      "learning_rate": 5.1215199315646106e-06,
      "loss": 3.0313,
      "step": 216832
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.95070219039917,
      "learning_rate": 5.120767339754606e-06,
      "loss": 2.9644,
      "step": 216833
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0764307975769043,
      "learning_rate": 5.1200148027681795e-06,
      "loss": 2.8739,
      "step": 216834
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.716546058654785,
      "learning_rate": 5.119262320605533e-06,
      "loss": 2.8926,
      "step": 216835
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.942336082458496,
      "learning_rate": 5.118509893266798e-06,
      "loss": 2.8926,
      "step": 216836
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.5693209171295166,
      "learning_rate": 5.117757520752108e-06,
      "loss": 2.8285,
      "step": 216837
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1397008895874023,
      "learning_rate": 5.117005203061531e-06,
      "loss": 2.9653,
      "step": 216838
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0240161418914795,
      "learning_rate": 5.116252940195331e-06,
      "loss": 2.8976,
      "step": 216839
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.5243711471557617,
      "learning_rate": 5.1155007321535435e-06,
      "loss": 2.8736,
      "step": 216840
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8732049465179443,
      "learning_rate": 5.114748578936367e-06,
      "loss": 3.0746,
      "step": 216841
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.760660409927368,
      "learning_rate": 5.113996480543936e-06,
      "loss": 3.0251,
      "step": 216842
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.136353015899658,
      "learning_rate": 5.113244436976383e-06,
      "loss": 2.8671,
      "step": 216843
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.70465350151062,
      "learning_rate": 5.112492448233807e-06,
      "loss": 3.06,
      "step": 216844
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1931395530700684,
      "learning_rate": 5.111740514316442e-06,
      "loss": 2.9014,
      "step": 216845
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.188192129135132,
      "learning_rate": 5.1109886352243225e-06,
      "loss": 2.9445,
      "step": 216846
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9400649070739746,
      "learning_rate": 5.110236810957646e-06,
      "loss": 2.9519,
      "step": 216847
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.286972761154175,
      "learning_rate": 5.109485041516548e-06,
      "loss": 2.8813,
      "step": 216848
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7627203464508057,
      "learning_rate": 5.10873332690116e-06,
      "loss": 3.102,
      "step": 216849
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.490736484527588,
      "learning_rate": 5.107981667111649e-06,
      "loss": 2.9619,
      "step": 216850
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2243494987487793,
      "learning_rate": 5.1072300621481156e-06,
      "loss": 2.9549,
      "step": 216851
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7880492210388184,
      "learning_rate": 5.106478512010692e-06,
      "loss": 3.0691,
      "step": 216852
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.723057746887207,
      "learning_rate": 5.105727016699546e-06,
      "loss": 2.6859,
      "step": 216853
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.862567663192749,
      "learning_rate": 5.104975576214843e-06,
      "loss": 2.8657,
      "step": 216854
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1277549266815186,
      "learning_rate": 5.10422419055665e-06,
      "loss": 2.8827,
      "step": 216855
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6071770191192627,
      "learning_rate": 5.103472859725199e-06,
      "loss": 2.846,
      "step": 216856
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3659462928771973,
      "learning_rate": 5.102721583720526e-06,
      "loss": 3.0743,
      "step": 216857
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7194271087646484,
      "learning_rate": 5.101970362542862e-06,
      "loss": 3.0706,
      "step": 216858
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0425057411193848,
      "learning_rate": 5.101219196192308e-06,
      "loss": 2.9943,
      "step": 216859
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9466233253479004,
      "learning_rate": 5.100468084668996e-06,
      "loss": 2.9063,
      "step": 216860
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8387134075164795,
      "learning_rate": 5.09971702797306e-06,
      "loss": 2.9479,
      "step": 216861
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.664400339126587,
      "learning_rate": 5.098966026104667e-06,
      "loss": 2.7454,
      "step": 216862
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8441572189331055,
      "learning_rate": 5.098215079063916e-06,
      "loss": 2.9617,
      "step": 216863
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2307796478271484,
      "learning_rate": 5.097464186851008e-06,
      "loss": 2.7552,
      "step": 216864
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2671992778778076,
      "learning_rate": 5.096713349466042e-06,
      "loss": 2.9807,
      "step": 216865
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.80588436126709,
      "learning_rate": 5.095962566909184e-06,
      "loss": 3.2814,
      "step": 216866
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.192438840866089,
      "learning_rate": 5.095211839180502e-06,
      "loss": 3.0046,
      "step": 216867
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.800553798675537,
      "learning_rate": 5.094461166280228e-06,
      "loss": 2.7272,
      "step": 216868
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.071089267730713,
      "learning_rate": 5.093710548208429e-06,
      "loss": 2.9002,
      "step": 216869
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3787546157836914,
      "learning_rate": 5.092959984965273e-06,
      "loss": 2.8058,
      "step": 216870
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.6642212867736816,
      "learning_rate": 5.092209476550924e-06,
      "loss": 2.908,
      "step": 216871
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.4315218925476074,
      "learning_rate": 5.091459022965516e-06,
      "loss": 2.9981,
      "step": 216872
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0420451164245605,
      "learning_rate": 5.0907086242091496e-06,
      "loss": 2.9722,
      "step": 216873
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0758092403411865,
      "learning_rate": 5.0899582802819915e-06,
      "loss": 3.0139,
      "step": 216874
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.609204053878784,
      "learning_rate": 5.089207991184174e-06,
      "loss": 2.785,
      "step": 216875
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.171147346496582,
      "learning_rate": 5.088457756915832e-06,
      "loss": 2.9512,
      "step": 216876
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0472378730773926,
      "learning_rate": 5.087707577477129e-06,
      "loss": 3.0175,
      "step": 216877
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.970095157623291,
      "learning_rate": 5.086957452868201e-06,
      "loss": 2.9664,
      "step": 216878
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.810765027999878,
      "learning_rate": 5.086207383089147e-06,
      "loss": 2.8084,
      "step": 216879
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.671886920928955,
      "learning_rate": 5.085457368140133e-06,
      "loss": 3.0554,
      "step": 216880
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.071322441101074,
      "learning_rate": 5.084707408021293e-06,
      "loss": 3.0773,
      "step": 216881
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2343902587890625,
      "learning_rate": 5.083957502732827e-06,
      "loss": 2.864,
      "step": 216882
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0076992511749268,
      "learning_rate": 5.0832076522747674e-06,
      "loss": 2.7243,
      "step": 216883
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0177347660064697,
      "learning_rate": 5.082457856647349e-06,
      "loss": 2.9059,
      "step": 216884
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.04738712310791,
      "learning_rate": 5.081708115850602e-06,
      "loss": 3.0733,
      "step": 216885
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.870266914367676,
      "learning_rate": 5.080958429884796e-06,
      "loss": 2.7753,
      "step": 216886
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1581783294677734,
      "learning_rate": 5.080208798749963e-06,
      "loss": 2.857,
      "step": 216887
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3831980228424072,
      "learning_rate": 5.079459222446302e-06,
      "loss": 3.1243,
      "step": 216888
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1494123935699463,
      "learning_rate": 5.078709700973948e-06,
      "loss": 2.8685,
      "step": 216889
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9955670833587646,
      "learning_rate": 5.077960234333034e-06,
      "loss": 2.7668,
      "step": 216890
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8874378204345703,
      "learning_rate": 5.0772108225236586e-06,
      "loss": 3.0498,
      "step": 216891
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.943779468536377,
      "learning_rate": 5.076461465546056e-06,
      "loss": 2.7701,
      "step": 216892
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.076399087905884,
      "learning_rate": 5.075712163400225e-06,
      "loss": 2.8095,
      "step": 216893
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7584872245788574,
      "learning_rate": 5.074962916086467e-06,
      "loss": 2.9134,
      "step": 216894
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.890096426010132,
      "learning_rate": 5.074213723604781e-06,
      "loss": 3.0148,
      "step": 216895
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.697263240814209,
      "learning_rate": 5.073464585955433e-06,
      "loss": 2.8541,
      "step": 216896
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8093605041503906,
      "learning_rate": 5.072715503138424e-06,
      "loss": 3.1926,
      "step": 216897
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.450000286102295,
      "learning_rate": 5.0719664751539876e-06,
      "loss": 2.8037,
      "step": 216898
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.364875555038452,
      "learning_rate": 5.071217502002223e-06,
      "loss": 2.8568,
      "step": 216899
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2734200954437256,
      "learning_rate": 5.0704685836833295e-06,
      "loss": 2.6831,
      "step": 216900
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.742591381072998,
      "learning_rate": 5.069719720197374e-06,
      "loss": 2.939,
      "step": 216901
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9518682956695557,
      "learning_rate": 5.068970911544557e-06,
      "loss": 2.8739,
      "step": 216902
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.3721134662628174,
      "learning_rate": 5.068222157724944e-06,
      "loss": 3.0227,
      "step": 216903
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6614606380462646,
      "learning_rate": 5.067473458738736e-06,
      "loss": 2.8507,
      "step": 216904
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7252094745635986,
      "learning_rate": 5.066724814586031e-06,
      "loss": 3.0092,
      "step": 216905
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.859992504119873,
      "learning_rate": 5.065976225267032e-06,
      "loss": 2.9339,
      "step": 216906
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8076961040496826,
      "learning_rate": 5.06522769078177e-06,
      "loss": 2.7696,
      "step": 216907
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0526809692382812,
      "learning_rate": 5.064479211130545e-06,
      "loss": 2.7003,
      "step": 216908
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1671085357666016,
      "learning_rate": 5.063730786313325e-06,
      "loss": 2.7818,
      "step": 216909
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.241861581802368,
      "learning_rate": 5.062982416330341e-06,
      "loss": 2.7919,
      "step": 216910
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2976598739624023,
      "learning_rate": 5.062234101181695e-06,
      "loss": 3.0325,
      "step": 216911
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6396238803863525,
      "learning_rate": 5.061485840867585e-06,
      "loss": 2.8144,
      "step": 216912
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.744455575942993,
      "learning_rate": 5.060737635388079e-06,
      "loss": 2.5604,
      "step": 216913
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1379382610321045,
      "learning_rate": 5.05998948474341e-06,
      "loss": 2.97,
      "step": 216914
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.3349556922912598,
      "learning_rate": 5.05924138893361e-06,
      "loss": 3.0267,
      "step": 216915
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.6326844692230225,
      "learning_rate": 5.058493347958881e-06,
      "loss": 2.9554,
      "step": 216916
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2281665802001953,
      "learning_rate": 5.057745361819321e-06,
      "loss": 2.9404,
      "step": 216917
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0237503051757812,
      "learning_rate": 5.056997430515097e-06,
      "loss": 2.8468,
      "step": 216918
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.7397191524505615,
      "learning_rate": 5.056249554046344e-06,
      "loss": 2.9567,
      "step": 216919
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1248726844787598,
      "learning_rate": 5.055501732413225e-06,
      "loss": 2.9417,
      "step": 216920
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2370190620422363,
      "learning_rate": 5.054753965615843e-06,
      "loss": 2.9044,
      "step": 216921
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.996907949447632,
      "learning_rate": 5.054006253654363e-06,
      "loss": 2.7754,
      "step": 216922
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.072728157043457,
      "learning_rate": 5.053258596528886e-06,
      "loss": 3.0516,
      "step": 216923
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.026043653488159,
      "learning_rate": 5.052510994239611e-06,
      "loss": 2.9705,
      "step": 216924
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.4089064598083496,
      "learning_rate": 5.051763446786572e-06,
      "loss": 3.008,
      "step": 216925
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.2877185344696045,
      "learning_rate": 5.051015954170035e-06,
      "loss": 3.0463,
      "step": 216926
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.707935094833374,
      "learning_rate": 5.050268516390099e-06,
      "loss": 2.7729,
      "step": 216927
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.782289981842041,
      "learning_rate": 5.049521133446866e-06,
      "loss": 2.7223,
      "step": 216928
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.6444473266601562,
      "learning_rate": 5.048773805340467e-06,
      "loss": 2.9262,
      "step": 216929
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8040637969970703,
      "learning_rate": 5.048026532071103e-06,
      "loss": 2.8918,
      "step": 216930
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.825639486312866,
      "learning_rate": 5.047279313638841e-06,
      "loss": 2.6904,
      "step": 216931
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8966963291168213,
      "learning_rate": 5.04653215004388e-06,
      "loss": 2.789,
      "step": 216932
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.0488405227661133,
      "learning_rate": 5.045785041286354e-06,
      "loss": 2.9361,
      "step": 216933
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.440274715423584,
      "learning_rate": 5.04503798736633e-06,
      "loss": 2.8143,
      "step": 216934
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.9514639377593994,
      "learning_rate": 5.044290988284072e-06,
      "loss": 2.9405,
      "step": 216935
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.4011566638946533,
      "learning_rate": 5.043544044039616e-06,
      "loss": 2.9047,
      "step": 216936
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.36045503616333,
      "learning_rate": 5.0427971546330935e-06,
      "loss": 2.6028,
      "step": 216937
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.870492696762085,
      "learning_rate": 5.0420503200647385e-06,
      "loss": 3.3359,
      "step": 216938
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.6793580055236816,
      "learning_rate": 5.041303540334618e-06,
      "loss": 2.8607,
      "step": 216939
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7980880737304688,
      "learning_rate": 5.040556815442865e-06,
      "loss": 2.7451,
      "step": 216940
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.911921977996826,
      "learning_rate": 5.039810145389678e-06,
      "loss": 3.0027,
      "step": 216941
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.312638759613037,
      "learning_rate": 5.039063530175158e-06,
      "loss": 2.8525,
      "step": 216942
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.5689849853515625,
      "learning_rate": 5.038316969799405e-06,
      "loss": 2.9981,
      "step": 216943
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.769087076187134,
      "learning_rate": 5.0375704642626195e-06,
      "loss": 2.8717,
      "step": 216944
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.457486152648926,
      "learning_rate": 5.036824013564933e-06,
      "loss": 2.8992,
      "step": 216945
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.7998948097229004,
      "learning_rate": 5.036077617706446e-06,
      "loss": 2.8076,
      "step": 216946
    },
    {
      "epoch": 2.82,
      "grad_norm": 4.497455596923828,
      "learning_rate": 5.035331276687327e-06,
      "loss": 2.8823,
      "step": 216947
    },
    {
      "epoch": 2.82,
      "grad_norm": 5.559205532073975,
      "learning_rate": 5.034584990507673e-06,
      "loss": 2.9085,
      "step": 216948
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.4629154205322266,
      "learning_rate": 5.033838759167719e-06,
      "loss": 2.7762,
      "step": 216949
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.104860544204712,
      "learning_rate": 5.033092582667497e-06,
      "loss": 2.8858,
      "step": 216950
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1738014221191406,
      "learning_rate": 5.032346461007242e-06,
      "loss": 2.9978,
      "step": 216951
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.511944532394409,
      "learning_rate": 5.031600394186986e-06,
      "loss": 2.6722,
      "step": 216952
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1569972038269043,
      "learning_rate": 5.030854382206961e-06,
      "loss": 3.0505,
      "step": 216953
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.120206356048584,
      "learning_rate": 5.0301084250672365e-06,
      "loss": 2.9501,
      "step": 216954
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8434243202209473,
      "learning_rate": 5.029362522767977e-06,
      "loss": 2.8489,
      "step": 216955
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.041776180267334,
      "learning_rate": 5.028616675309349e-06,
      "loss": 2.9606,
      "step": 216956
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.075284242630005,
      "learning_rate": 5.027870882691487e-06,
      "loss": 2.9428,
      "step": 216957
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.118906021118164,
      "learning_rate": 5.02712514491449e-06,
      "loss": 3.0223,
      "step": 216958
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.605104446411133,
      "learning_rate": 5.0263794619785244e-06,
      "loss": 2.7094,
      "step": 216959
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.8246870040893555,
      "learning_rate": 5.025633833883724e-06,
      "loss": 2.6072,
      "step": 216960
    },
    {
      "epoch": 2.82,
      "grad_norm": 2.604726552963257,
      "learning_rate": 5.024888260630222e-06,
      "loss": 3.1815,
      "step": 216961
    },
    {
      "epoch": 2.82,
      "grad_norm": 3.1400113105773926,
      "learning_rate": 5.024142742218151e-06,
      "loss": 2.8422,
      "step": 216962
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.4006316661834717,
      "learning_rate": 5.023397278647678e-06,
      "loss": 2.8606,
      "step": 216963
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0692646503448486,
      "learning_rate": 5.022651869918904e-06,
      "loss": 2.9096,
      "step": 216964
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.822117567062378,
      "learning_rate": 5.021906516032026e-06,
      "loss": 2.7709,
      "step": 216965
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9271042346954346,
      "learning_rate": 5.02116121698708e-06,
      "loss": 2.8754,
      "step": 216966
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.862457513809204,
      "learning_rate": 5.020415972784331e-06,
      "loss": 2.9967,
      "step": 216967
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.583390951156616,
      "learning_rate": 5.019670783423813e-06,
      "loss": 3.0697,
      "step": 216968
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.958207845687866,
      "learning_rate": 5.018925648905758e-06,
      "loss": 2.9935,
      "step": 216969
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9917032718658447,
      "learning_rate": 5.018180569230201e-06,
      "loss": 3.0976,
      "step": 216970
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.148226022720337,
      "learning_rate": 5.017435544397341e-06,
      "loss": 2.7882,
      "step": 216971
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.6285243034362793,
      "learning_rate": 5.0166905744073115e-06,
      "loss": 2.921,
      "step": 216972
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.6095011234283447,
      "learning_rate": 5.015945659260279e-06,
      "loss": 2.7574,
      "step": 216973
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.823840618133545,
      "learning_rate": 5.015200798956276e-06,
      "loss": 2.9281,
      "step": 216974
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0358617305755615,
      "learning_rate": 5.014455993495603e-06,
      "loss": 2.9558,
      "step": 216975
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.051340103149414,
      "learning_rate": 5.01371124287826e-06,
      "loss": 2.7815,
      "step": 216976
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9650871753692627,
      "learning_rate": 5.012966547104447e-06,
      "loss": 2.7724,
      "step": 216977
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7597439289093018,
      "learning_rate": 5.012221906174296e-06,
      "loss": 2.8653,
      "step": 216978
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.021038293838501,
      "learning_rate": 5.0114773200879425e-06,
      "loss": 2.8668,
      "step": 216979
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.766772747039795,
      "learning_rate": 5.010732788845484e-06,
      "loss": 3.0587,
      "step": 216980
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9143476486206055,
      "learning_rate": 5.009988312447189e-06,
      "loss": 2.9195,
      "step": 216981
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.5374388694763184,
      "learning_rate": 5.009243890893022e-06,
      "loss": 2.7727,
      "step": 216982
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9550621509552,
      "learning_rate": 5.008499524183251e-06,
      "loss": 2.8267,
      "step": 216983
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.721790313720703,
      "learning_rate": 5.0077552123179434e-06,
      "loss": 2.9381,
      "step": 216984
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.7331066131591797,
      "learning_rate": 5.007010955297263e-06,
      "loss": 3.0279,
      "step": 216985
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7231879234313965,
      "learning_rate": 5.006266753121347e-06,
      "loss": 2.9934,
      "step": 216986
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.828848123550415,
      "learning_rate": 5.0055226057903575e-06,
      "loss": 2.7556,
      "step": 216987
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.046008348464966,
      "learning_rate": 5.004778513304397e-06,
      "loss": 2.8846,
      "step": 216988
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.5855565071105957,
      "learning_rate": 5.004034475663598e-06,
      "loss": 3.1443,
      "step": 216989
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.860837936401367,
      "learning_rate": 5.003290492868128e-06,
      "loss": 3.047,
      "step": 216990
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9927737712860107,
      "learning_rate": 5.002546564918119e-06,
      "loss": 3.1431,
      "step": 216991
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.766597270965576,
      "learning_rate": 5.001802691813705e-06,
      "loss": 3.0367,
      "step": 216992
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.806403875350952,
      "learning_rate": 5.00105887355502e-06,
      "loss": 2.8811,
      "step": 216993
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2950336933135986,
      "learning_rate": 5.000315110142228e-06,
      "loss": 2.8756,
      "step": 216994
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.908839464187622,
      "learning_rate": 4.9995714015754304e-06,
      "loss": 2.8077,
      "step": 216995
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.55753755569458,
      "learning_rate": 4.9988277478547616e-06,
      "loss": 3.0935,
      "step": 216996
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9314143657684326,
      "learning_rate": 4.998084148980419e-06,
      "loss": 3.0611,
      "step": 216997
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9629199504852295,
      "learning_rate": 4.9973406049524715e-06,
      "loss": 2.647,
      "step": 216998
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.68007755279541,
      "learning_rate": 4.996597115771084e-06,
      "loss": 2.9745,
      "step": 216999
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6560568809509277,
      "learning_rate": 4.9958536814364235e-06,
      "loss": 2.8924,
      "step": 217000
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1371774673461914,
      "learning_rate": 4.99511030194859e-06,
      "loss": 3.0801,
      "step": 217001
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2207818031311035,
      "learning_rate": 4.994366977307718e-06,
      "loss": 3.0738,
      "step": 217002
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.238750457763672,
      "learning_rate": 4.9936237075140055e-06,
      "loss": 2.7946,
      "step": 217003
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.559671640396118,
      "learning_rate": 4.992880492567486e-06,
      "loss": 2.8296,
      "step": 217004
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.003183603286743,
      "learning_rate": 4.9921373324683935e-06,
      "loss": 2.9499,
      "step": 217005
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0444912910461426,
      "learning_rate": 4.9913942272168604e-06,
      "loss": 2.9968,
      "step": 217006
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.5888571739196777,
      "learning_rate": 4.9906511768129545e-06,
      "loss": 2.8774,
      "step": 217007
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7761330604553223,
      "learning_rate": 4.989908181256841e-06,
      "loss": 2.6031,
      "step": 217008
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.098515510559082,
      "learning_rate": 4.98916524054872e-06,
      "loss": 2.9991,
      "step": 217009
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6842567920684814,
      "learning_rate": 4.988422354688626e-06,
      "loss": 2.8107,
      "step": 217010
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.061335325241089,
      "learning_rate": 4.987679523676824e-06,
      "loss": 2.7865,
      "step": 217011
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7145981788635254,
      "learning_rate": 4.986936747513348e-06,
      "loss": 2.7366,
      "step": 217012
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.4755189418792725,
      "learning_rate": 4.986194026198365e-06,
      "loss": 2.9473,
      "step": 217013
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.7777090072631836,
      "learning_rate": 4.985451359732007e-06,
      "loss": 2.7756,
      "step": 217014
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.642728090286255,
      "learning_rate": 4.984708748114441e-06,
      "loss": 3.139,
      "step": 217015
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0209224224090576,
      "learning_rate": 4.983966191345734e-06,
      "loss": 3.0008,
      "step": 217016
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.814998149871826,
      "learning_rate": 4.983223689426152e-06,
      "loss": 2.7799,
      "step": 217017
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.590381622314453,
      "learning_rate": 4.982481242355729e-06,
      "loss": 3.1077,
      "step": 217018
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.7511136531829834,
      "learning_rate": 4.981738850134598e-06,
      "loss": 2.9531,
      "step": 217019
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0061275959014893,
      "learning_rate": 4.9809965127629585e-06,
      "loss": 2.9959,
      "step": 217020
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0219979286193848,
      "learning_rate": 4.980254230240943e-06,
      "loss": 2.7246,
      "step": 217021
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9613959789276123,
      "learning_rate": 4.97951200256862e-06,
      "loss": 2.8889,
      "step": 217022
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8954365253448486,
      "learning_rate": 4.978769829746221e-06,
      "loss": 3.0257,
      "step": 217023
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.363477945327759,
      "learning_rate": 4.978027711773814e-06,
      "loss": 2.8754,
      "step": 217024
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.86448073387146,
      "learning_rate": 4.977285648651563e-06,
      "loss": 2.9658,
      "step": 217025
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.883145570755005,
      "learning_rate": 4.976543640379604e-06,
      "loss": 2.9734,
      "step": 217026
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.13769268989563,
      "learning_rate": 4.975801686958103e-06,
      "loss": 3.1035,
      "step": 217027
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1035075187683105,
      "learning_rate": 4.975059788387126e-06,
      "loss": 2.8495,
      "step": 217028
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.47322154045105,
      "learning_rate": 4.974317944666872e-06,
      "loss": 2.9823,
      "step": 217029
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.719709873199463,
      "learning_rate": 4.9735761557974766e-06,
      "loss": 3.0929,
      "step": 217030
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2712242603302,
      "learning_rate": 4.972834421779004e-06,
      "loss": 2.9947,
      "step": 217031
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6399598121643066,
      "learning_rate": 4.972092742611722e-06,
      "loss": 2.9221,
      "step": 217032
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.403430461883545,
      "learning_rate": 4.97135111829563e-06,
      "loss": 2.9692,
      "step": 217033
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.916661024093628,
      "learning_rate": 4.970609548830995e-06,
      "loss": 2.8719,
      "step": 217034
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.381242513656616,
      "learning_rate": 4.9698680342178835e-06,
      "loss": 2.9389,
      "step": 217035
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7960550785064697,
      "learning_rate": 4.9691265744564276e-06,
      "loss": 3.2173,
      "step": 217036
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3056509494781494,
      "learning_rate": 4.968385169546763e-06,
      "loss": 2.862,
      "step": 217037
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.4738855361938477,
      "learning_rate": 4.9676438194890534e-06,
      "loss": 2.959,
      "step": 217038
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.139408588409424,
      "learning_rate": 4.966902524283433e-06,
      "loss": 2.9396,
      "step": 217039
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0800294876098633,
      "learning_rate": 4.9661612839300365e-06,
      "loss": 2.8324,
      "step": 217040
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1455724239349365,
      "learning_rate": 4.96542009842903e-06,
      "loss": 2.9022,
      "step": 217041
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.212533712387085,
      "learning_rate": 4.964678967780478e-06,
      "loss": 3.1416,
      "step": 217042
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1737008094787598,
      "learning_rate": 4.963937891984582e-06,
      "loss": 2.8612,
      "step": 217043
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.5589821338653564,
      "learning_rate": 4.9631968710414415e-06,
      "loss": 3.1845,
      "step": 217044
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3055648803710938,
      "learning_rate": 4.9624559049512236e-06,
      "loss": 2.7825,
      "step": 217045
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.151925563812256,
      "learning_rate": 4.961714993714061e-06,
      "loss": 2.7963,
      "step": 217046
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.2903964519500732,
      "learning_rate": 4.960974137330054e-06,
      "loss": 2.9129,
      "step": 217047
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7011966705322266,
      "learning_rate": 4.960233335799435e-06,
      "loss": 2.9483,
      "step": 217048
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.4166722297668457,
      "learning_rate": 4.959492589122205e-06,
      "loss": 2.6338,
      "step": 217049
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.035128355026245,
      "learning_rate": 4.958751897298629e-06,
      "loss": 2.7391,
      "step": 217050
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0473103523254395,
      "learning_rate": 4.958011260328742e-06,
      "loss": 2.999,
      "step": 217051
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7142457962036133,
      "learning_rate": 4.957270678212777e-06,
      "loss": 2.8055,
      "step": 217052
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9181509017944336,
      "learning_rate": 4.956530150950799e-06,
      "loss": 2.8646,
      "step": 217053
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.088160514831543,
      "learning_rate": 4.955789678543009e-06,
      "loss": 3.1458,
      "step": 217054
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.764308452606201,
      "learning_rate": 4.955049260989441e-06,
      "loss": 2.8599,
      "step": 217055
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.9445362091064453,
      "learning_rate": 4.95430889829036e-06,
      "loss": 2.7631,
      "step": 217056
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.761103391647339,
      "learning_rate": 4.9535685904458e-06,
      "loss": 3.112,
      "step": 217057
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.719362258911133,
      "learning_rate": 4.95282833745596e-06,
      "loss": 2.9736,
      "step": 217058
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.895052194595337,
      "learning_rate": 4.952088139320942e-06,
      "loss": 2.6413,
      "step": 217059
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.167654514312744,
      "learning_rate": 4.951347996040911e-06,
      "loss": 2.5087,
      "step": 217060
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.580850839614868,
      "learning_rate": 4.950607907616e-06,
      "loss": 2.9701,
      "step": 217061
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2740726470947266,
      "learning_rate": 4.949867874046343e-06,
      "loss": 3.2019,
      "step": 217062
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.8520169258117676,
      "learning_rate": 4.949127895332072e-06,
      "loss": 2.9701,
      "step": 217063
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.4957807064056396,
      "learning_rate": 4.9483879714733224e-06,
      "loss": 2.6197,
      "step": 217064
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.603257179260254,
      "learning_rate": 4.947648102470225e-06,
      "loss": 3.2805,
      "step": 217065
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.4757485389709473,
      "learning_rate": 4.946908288322949e-06,
      "loss": 2.9925,
      "step": 217066
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7532689571380615,
      "learning_rate": 4.946168529031625e-06,
      "loss": 2.9643,
      "step": 217067
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.886075019836426,
      "learning_rate": 4.945428824596353e-06,
      "loss": 3.0988,
      "step": 217068
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8944952487945557,
      "learning_rate": 4.944689175017302e-06,
      "loss": 3.0889,
      "step": 217069
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.86134934425354,
      "learning_rate": 4.943949580294604e-06,
      "loss": 2.9561,
      "step": 217070
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.823077917098999,
      "learning_rate": 4.9432100404283585e-06,
      "loss": 2.8693,
      "step": 217071
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.947293281555176,
      "learning_rate": 4.942470555418798e-06,
      "loss": 2.7008,
      "step": 217072
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.719727039337158,
      "learning_rate": 4.941731125265991e-06,
      "loss": 2.8258,
      "step": 217073
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8024508953094482,
      "learning_rate": 4.940991749970069e-06,
      "loss": 3.0843,
      "step": 217074
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.368987798690796,
      "learning_rate": 4.9402524295312e-06,
      "loss": 2.8771,
      "step": 217075
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.9292919635772705,
      "learning_rate": 4.939513163949482e-06,
      "loss": 3.027,
      "step": 217076
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6862616539001465,
      "learning_rate": 4.938773953225084e-06,
      "loss": 2.8282,
      "step": 217077
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7527546882629395,
      "learning_rate": 4.93803479735817e-06,
      "loss": 2.7866,
      "step": 217078
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7911150455474854,
      "learning_rate": 4.937295696348809e-06,
      "loss": 2.8159,
      "step": 217079
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.604886531829834,
      "learning_rate": 4.936556650197199e-06,
      "loss": 2.9214,
      "step": 217080
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7137835025787354,
      "learning_rate": 4.93581765890344e-06,
      "loss": 2.993,
      "step": 217081
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7277209758758545,
      "learning_rate": 4.9350787224677e-06,
      "loss": 2.7138,
      "step": 217082
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.2172698974609375,
      "learning_rate": 4.934339840890078e-06,
      "loss": 2.8443,
      "step": 217083
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2190685272216797,
      "learning_rate": 4.933601014170741e-06,
      "loss": 2.9599,
      "step": 217084
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0618765354156494,
      "learning_rate": 4.932862242309821e-06,
      "loss": 2.9881,
      "step": 217085
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.231998920440674,
      "learning_rate": 4.932123525307452e-06,
      "loss": 2.954,
      "step": 217086
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7179789543151855,
      "learning_rate": 4.931384863163735e-06,
      "loss": 2.6985,
      "step": 217087
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8246634006500244,
      "learning_rate": 4.9306462558789005e-06,
      "loss": 2.8355,
      "step": 217088
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.563607692718506,
      "learning_rate": 4.9299077034529844e-06,
      "loss": 2.958,
      "step": 217089
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.786900281906128,
      "learning_rate": 4.9291692058861855e-06,
      "loss": 3.0759,
      "step": 217090
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.9306092262268066,
      "learning_rate": 4.928430763178637e-06,
      "loss": 3.0168,
      "step": 217091
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.6635868549346924,
      "learning_rate": 4.927692375330439e-06,
      "loss": 2.8661,
      "step": 217092
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.009495735168457,
      "learning_rate": 4.926954042341758e-06,
      "loss": 3.2871,
      "step": 217093
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.910949230194092,
      "learning_rate": 4.926215764212727e-06,
      "loss": 2.9544,
      "step": 217094
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.246493101119995,
      "learning_rate": 4.925477540943479e-06,
      "loss": 2.9778,
      "step": 217095
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3176915645599365,
      "learning_rate": 4.924739372534181e-06,
      "loss": 3.1348,
      "step": 217096
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3276991844177246,
      "learning_rate": 4.924001258984933e-06,
      "loss": 2.8657,
      "step": 217097
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8633511066436768,
      "learning_rate": 4.923263200295868e-06,
      "loss": 2.6968,
      "step": 217098
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9920578002929688,
      "learning_rate": 4.922525196467153e-06,
      "loss": 2.9544,
      "step": 217099
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9589974880218506,
      "learning_rate": 4.92178724749892e-06,
      "loss": 2.8279,
      "step": 217100
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0613162517547607,
      "learning_rate": 4.9210493533912375e-06,
      "loss": 2.8636,
      "step": 217101
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.973806858062744,
      "learning_rate": 4.92031151414437e-06,
      "loss": 3.1278,
      "step": 217102
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1706230640411377,
      "learning_rate": 4.919573729758353e-06,
      "loss": 3.1332,
      "step": 217103
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.63657283782959,
      "learning_rate": 4.918836000233383e-06,
      "loss": 3.0048,
      "step": 217104
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.871227502822876,
      "learning_rate": 4.918098325569531e-06,
      "loss": 2.6287,
      "step": 217105
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6691408157348633,
      "learning_rate": 4.917360705767026e-06,
      "loss": 2.6924,
      "step": 217106
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.118391513824463,
      "learning_rate": 4.916623140825904e-06,
      "loss": 2.9331,
      "step": 217107
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.180630683898926,
      "learning_rate": 4.9158856307463965e-06,
      "loss": 3.0351,
      "step": 217108
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6934640407562256,
      "learning_rate": 4.915148175528571e-06,
      "loss": 3.2456,
      "step": 217109
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.5071139335632324,
      "learning_rate": 4.9144107751725615e-06,
      "loss": 2.8018,
      "step": 217110
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7931177616119385,
      "learning_rate": 4.9136734296786e-06,
      "loss": 2.8431,
      "step": 217111
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1773860454559326,
      "learning_rate": 4.912936139046719e-06,
      "loss": 2.8489,
      "step": 217112
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.897972822189331,
      "learning_rate": 4.912198903277087e-06,
      "loss": 3.0117,
      "step": 217113
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.838035821914673,
      "learning_rate": 4.911461722369836e-06,
      "loss": 3.1837,
      "step": 217114
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.113079786300659,
      "learning_rate": 4.910724596325166e-06,
      "loss": 2.8491,
      "step": 217115
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9440529346466064,
      "learning_rate": 4.909987525143111e-06,
      "loss": 2.8129,
      "step": 217116
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.5086991786956787,
      "learning_rate": 4.90925050882387e-06,
      "loss": 3.0336,
      "step": 217117
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8307244777679443,
      "learning_rate": 4.9085135473675765e-06,
      "loss": 2.6692,
      "step": 217118
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.879791736602783,
      "learning_rate": 4.907776640774363e-06,
      "loss": 3.0145,
      "step": 217119
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.5764920711517334,
      "learning_rate": 4.907039789044365e-06,
      "loss": 3.012,
      "step": 217120
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.549791097640991,
      "learning_rate": 4.906302992177746e-06,
      "loss": 2.7874,
      "step": 217121
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3759515285491943,
      "learning_rate": 4.9055662501745755e-06,
      "loss": 2.8791,
      "step": 217122
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0388846397399902,
      "learning_rate": 4.904829563035051e-06,
      "loss": 2.9373,
      "step": 217123
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.604382038116455,
      "learning_rate": 4.90409293075924e-06,
      "loss": 3.0682,
      "step": 217124
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.5641801357269287,
      "learning_rate": 4.9033563533473766e-06,
      "loss": 2.8662,
      "step": 217125
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.476861000061035,
      "learning_rate": 4.902619830799559e-06,
      "loss": 2.8976,
      "step": 217126
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9565272331237793,
      "learning_rate": 4.901883363115888e-06,
      "loss": 3.0565,
      "step": 217127
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.263259172439575,
      "learning_rate": 4.901146950296531e-06,
      "loss": 2.6462,
      "step": 217128
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.729029893875122,
      "learning_rate": 4.9004105923416524e-06,
      "loss": 2.8727,
      "step": 217129
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0728917121887207,
      "learning_rate": 4.899674289251321e-06,
      "loss": 2.9794,
      "step": 217130
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8647539615631104,
      "learning_rate": 4.8989380410257016e-06,
      "loss": 3.059,
      "step": 217131
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3877451419830322,
      "learning_rate": 4.898201847664962e-06,
      "loss": 2.9592,
      "step": 217132
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.871549129486084,
      "learning_rate": 4.897465709169235e-06,
      "loss": 3.2153,
      "step": 217133
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.6052191257476807,
      "learning_rate": 4.89672962553862e-06,
      "loss": 2.9761,
      "step": 217134
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.657717227935791,
      "learning_rate": 4.895993596773285e-06,
      "loss": 2.7945,
      "step": 217135
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.77675461769104,
      "learning_rate": 4.8952576228733276e-06,
      "loss": 2.9277,
      "step": 217136
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9287357330322266,
      "learning_rate": 4.894521703838916e-06,
      "loss": 2.7084,
      "step": 217137
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8050806522369385,
      "learning_rate": 4.893785839670183e-06,
      "loss": 2.9212,
      "step": 217138
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.385298252105713,
      "learning_rate": 4.8930500303672955e-06,
      "loss": 2.9789,
      "step": 217139
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8624958992004395,
      "learning_rate": 4.892314275930354e-06,
      "loss": 2.9724,
      "step": 217140
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.048515558242798,
      "learning_rate": 4.891578576359489e-06,
      "loss": 2.9831,
      "step": 217141
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6104836463928223,
      "learning_rate": 4.890842931654837e-06,
      "loss": 2.9338,
      "step": 217142
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.933706521987915,
      "learning_rate": 4.890107341816596e-06,
      "loss": 2.9777,
      "step": 217143
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1731350421905518,
      "learning_rate": 4.8893718068448e-06,
      "loss": 2.8545,
      "step": 217144
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1394996643066406,
      "learning_rate": 4.888636326739681e-06,
      "loss": 2.884,
      "step": 217145
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7557716369628906,
      "learning_rate": 4.887900901501307e-06,
      "loss": 2.9908,
      "step": 217146
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.308537483215332,
      "learning_rate": 4.887165531129877e-06,
      "loss": 2.9167,
      "step": 217147
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.117815971374512,
      "learning_rate": 4.886430215625459e-06,
      "loss": 2.9639,
      "step": 217148
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.485461711883545,
      "learning_rate": 4.885694954988217e-06,
      "loss": 2.9065,
      "step": 217149
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.5361568927764893,
      "learning_rate": 4.8849597492183204e-06,
      "loss": 2.9528,
      "step": 217150
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8526618480682373,
      "learning_rate": 4.8842245983158665e-06,
      "loss": 3.1492,
      "step": 217151
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.21748423576355,
      "learning_rate": 4.883489502281024e-06,
      "loss": 3.056,
      "step": 217152
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.786638021469116,
      "learning_rate": 4.882754461113891e-06,
      "loss": 2.973,
      "step": 217153
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1278862953186035,
      "learning_rate": 4.882019474814636e-06,
      "loss": 3.0515,
      "step": 217154
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.793855905532837,
      "learning_rate": 4.8812845433833905e-06,
      "loss": 3.073,
      "step": 217155
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9736580848693848,
      "learning_rate": 4.8805496668202545e-06,
      "loss": 3.0295,
      "step": 217156
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9986095428466797,
      "learning_rate": 4.879814845125429e-06,
      "loss": 3.179,
      "step": 217157
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.137518882751465,
      "learning_rate": 4.879080078299014e-06,
      "loss": 2.7416,
      "step": 217158
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.037134885787964,
      "learning_rate": 4.878345366341141e-06,
      "loss": 2.7907,
      "step": 217159
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1542654037475586,
      "learning_rate": 4.877610709251944e-06,
      "loss": 2.9889,
      "step": 217160
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1517882347106934,
      "learning_rate": 4.876876107031591e-06,
      "loss": 2.9216,
      "step": 217161
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8831539154052734,
      "learning_rate": 4.8761415596801805e-06,
      "loss": 2.9078,
      "step": 217162
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.877492666244507,
      "learning_rate": 4.875407067197878e-06,
      "loss": 2.862,
      "step": 217163
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6932177543640137,
      "learning_rate": 4.874672629584786e-06,
      "loss": 2.9953,
      "step": 217164
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2326040267944336,
      "learning_rate": 4.873938246841103e-06,
      "loss": 3.011,
      "step": 217165
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.832498550415039,
      "learning_rate": 4.873203918966895e-06,
      "loss": 2.9165,
      "step": 217166
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6964683532714844,
      "learning_rate": 4.872469645962329e-06,
      "loss": 2.9815,
      "step": 217167
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.698392152786255,
      "learning_rate": 4.871735427827539e-06,
      "loss": 3.0356,
      "step": 217168
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3551478385925293,
      "learning_rate": 4.871001264562691e-06,
      "loss": 2.7885,
      "step": 217169
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.58170485496521,
      "learning_rate": 4.870267156167884e-06,
      "loss": 2.8505,
      "step": 217170
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.838667154312134,
      "learning_rate": 4.869533102643253e-06,
      "loss": 2.9937,
      "step": 217171
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8141884803771973,
      "learning_rate": 4.868799103988963e-06,
      "loss": 2.8519,
      "step": 217172
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.329981803894043,
      "learning_rate": 4.868065160205115e-06,
      "loss": 2.818,
      "step": 217173
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1665685176849365,
      "learning_rate": 4.8673312712918744e-06,
      "loss": 2.7389,
      "step": 217174
    },
    {
      "epoch": 2.83,
      "grad_norm": 5.35186767578125,
      "learning_rate": 4.866597437249376e-06,
      "loss": 2.767,
      "step": 217175
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.897559881210327,
      "learning_rate": 4.865863658077751e-06,
      "loss": 2.8914,
      "step": 217176
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.102841377258301,
      "learning_rate": 4.865129933777101e-06,
      "loss": 3.032,
      "step": 217177
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.412808656692505,
      "learning_rate": 4.864396264347625e-06,
      "loss": 3.1535,
      "step": 217178
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2655162811279297,
      "learning_rate": 4.863662649789424e-06,
      "loss": 2.9058,
      "step": 217179
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.047356367111206,
      "learning_rate": 4.86292909010263e-06,
      "loss": 3.02,
      "step": 217180
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.860865354537964,
      "learning_rate": 4.8621955852873764e-06,
      "loss": 2.689,
      "step": 217181
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.626311779022217,
      "learning_rate": 4.8614621353438635e-06,
      "loss": 2.5996,
      "step": 217182
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7692031860351562,
      "learning_rate": 4.860728740272124e-06,
      "loss": 2.8711,
      "step": 217183
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.697148084640503,
      "learning_rate": 4.859995400072358e-06,
      "loss": 2.8829,
      "step": 217184
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.81880259513855,
      "learning_rate": 4.8592621147447e-06,
      "loss": 2.8055,
      "step": 217185
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1401407718658447,
      "learning_rate": 4.858528884289248e-06,
      "loss": 2.9066,
      "step": 217186
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.129685163497925,
      "learning_rate": 4.857795708706202e-06,
      "loss": 2.8422,
      "step": 217187
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.66711163520813,
      "learning_rate": 4.857062587995664e-06,
      "loss": 3.1662,
      "step": 217188
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9676074981689453,
      "learning_rate": 4.856329522157765e-06,
      "loss": 2.9277,
      "step": 217189
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.4834227561950684,
      "learning_rate": 4.8555965111926385e-06,
      "loss": 2.8538,
      "step": 217190
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9166626930236816,
      "learning_rate": 4.854863555100419e-06,
      "loss": 2.7568,
      "step": 217191
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.658992290496826,
      "learning_rate": 4.854130653881239e-06,
      "loss": 2.895,
      "step": 217192
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.9237077236175537,
      "learning_rate": 4.853397807535298e-06,
      "loss": 2.9601,
      "step": 217193
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8442773818969727,
      "learning_rate": 4.852665016062629e-06,
      "loss": 2.6742,
      "step": 217194
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9492788314819336,
      "learning_rate": 4.851932279463433e-06,
      "loss": 3.0607,
      "step": 217195
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6693029403686523,
      "learning_rate": 4.851199597737876e-06,
      "loss": 3.0782,
      "step": 217196
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0724496841430664,
      "learning_rate": 4.850466970886024e-06,
      "loss": 2.9152,
      "step": 217197
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8380000591278076,
      "learning_rate": 4.849734398908012e-06,
      "loss": 3.21,
      "step": 217198
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3556711673736572,
      "learning_rate": 4.8490018818040376e-06,
      "loss": 2.9648,
      "step": 217199
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6646883487701416,
      "learning_rate": 4.848269419574202e-06,
      "loss": 2.7177,
      "step": 217200
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8748281002044678,
      "learning_rate": 4.8475370122186384e-06,
      "loss": 2.925,
      "step": 217201
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8355088233947754,
      "learning_rate": 4.846804659737513e-06,
      "loss": 2.7696,
      "step": 217202
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.754805564880371,
      "learning_rate": 4.846072362130926e-06,
      "loss": 3.0925,
      "step": 217203
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.453875780105591,
      "learning_rate": 4.8453401193990105e-06,
      "loss": 2.9606,
      "step": 217204
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.364269256591797,
      "learning_rate": 4.844607931541933e-06,
      "loss": 2.8467,
      "step": 217205
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6156094074249268,
      "learning_rate": 4.843875798559793e-06,
      "loss": 2.9952,
      "step": 217206
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.4228179454803467,
      "learning_rate": 4.843143720452758e-06,
      "loss": 3.0022,
      "step": 217207
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0108017921447754,
      "learning_rate": 4.84241169722096e-06,
      "loss": 2.8082,
      "step": 217208
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9821159839630127,
      "learning_rate": 4.8416797288645e-06,
      "loss": 2.9473,
      "step": 217209
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.751486301422119,
      "learning_rate": 4.840947815383611e-06,
      "loss": 3.2087,
      "step": 217210
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.4797987937927246,
      "learning_rate": 4.840215956778326e-06,
      "loss": 2.9587,
      "step": 217211
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.6441285610198975,
      "learning_rate": 4.8394841530487785e-06,
      "loss": 2.7802,
      "step": 217212
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.328988552093506,
      "learning_rate": 4.838752404195201e-06,
      "loss": 3.0677,
      "step": 217213
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.825174331665039,
      "learning_rate": 4.838020710217627e-06,
      "loss": 2.648,
      "step": 217214
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.649995803833008,
      "learning_rate": 4.837289071116257e-06,
      "loss": 2.906,
      "step": 217215
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6372461318969727,
      "learning_rate": 4.836557486891224e-06,
      "loss": 3.1933,
      "step": 217216
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.335933208465576,
      "learning_rate": 4.835825957542594e-06,
      "loss": 3.0171,
      "step": 217217
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.444714069366455,
      "learning_rate": 4.8350944830706e-06,
      "loss": 2.9311,
      "step": 217218
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.4292943477630615,
      "learning_rate": 4.834363063475344e-06,
      "loss": 2.5222,
      "step": 217219
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.02968692779541,
      "learning_rate": 4.833631698756923e-06,
      "loss": 2.9302,
      "step": 217220
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.647855043411255,
      "learning_rate": 4.832900388915506e-06,
      "loss": 2.9547,
      "step": 217221
    },
    {
      "epoch": 2.83,
      "grad_norm": 5.016505241394043,
      "learning_rate": 4.832169133951225e-06,
      "loss": 2.7112,
      "step": 217222
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1372711658477783,
      "learning_rate": 4.831437933864213e-06,
      "loss": 2.8953,
      "step": 217223
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0519211292266846,
      "learning_rate": 4.830706788654604e-06,
      "loss": 2.9162,
      "step": 217224
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9373159408569336,
      "learning_rate": 4.829975698322564e-06,
      "loss": 2.8364,
      "step": 217225
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3300225734710693,
      "learning_rate": 4.8292446628681926e-06,
      "loss": 2.9334,
      "step": 217226
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1885876655578613,
      "learning_rate": 4.828513682291624e-06,
      "loss": 3.1532,
      "step": 217227
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.418927192687988,
      "learning_rate": 4.82778275659299e-06,
      "loss": 2.8797,
      "step": 217228
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.007892370223999,
      "learning_rate": 4.827051885772459e-06,
      "loss": 3.0893,
      "step": 217229
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7488279342651367,
      "learning_rate": 4.826321069830163e-06,
      "loss": 3.0707,
      "step": 217230
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6596856117248535,
      "learning_rate": 4.825590308766236e-06,
      "loss": 2.767,
      "step": 217231
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9357047080993652,
      "learning_rate": 4.8248596025808104e-06,
      "loss": 2.9376,
      "step": 217232
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.4409775733947754,
      "learning_rate": 4.824128951273953e-06,
      "loss": 3.0091,
      "step": 217233
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.912928581237793,
      "learning_rate": 4.823398354845931e-06,
      "loss": 2.8802,
      "step": 217234
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.210711717605591,
      "learning_rate": 4.822667813296743e-06,
      "loss": 2.7344,
      "step": 217235
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.196732759475708,
      "learning_rate": 4.821937326626657e-06,
      "loss": 2.9866,
      "step": 217236
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.705153703689575,
      "learning_rate": 4.821206894835738e-06,
      "loss": 2.9652,
      "step": 217237
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.054656505584717,
      "learning_rate": 4.820476517924088e-06,
      "loss": 2.9432,
      "step": 217238
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.344193696975708,
      "learning_rate": 4.819746195891905e-06,
      "loss": 2.8547,
      "step": 217239
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.721409320831299,
      "learning_rate": 4.819015928739323e-06,
      "loss": 2.9142,
      "step": 217240
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2894654273986816,
      "learning_rate": 4.818285716466408e-06,
      "loss": 2.9161,
      "step": 217241
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.806229591369629,
      "learning_rate": 4.817555559073394e-06,
      "loss": 2.9759,
      "step": 217242
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9246768951416016,
      "learning_rate": 4.816825456560347e-06,
      "loss": 2.5926,
      "step": 217243
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6693050861358643,
      "learning_rate": 4.816095408927434e-06,
      "loss": 2.8212,
      "step": 217244
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8079090118408203,
      "learning_rate": 4.815365416174754e-06,
      "loss": 2.8448,
      "step": 217245
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.4152679443359375,
      "learning_rate": 4.814635478302509e-06,
      "loss": 2.9517,
      "step": 217246
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9548003673553467,
      "learning_rate": 4.813905595310763e-06,
      "loss": 2.9517,
      "step": 217247
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.4124221801757812,
      "learning_rate": 4.8131757671997175e-06,
      "loss": 2.8959,
      "step": 217248
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.67121958732605,
      "learning_rate": 4.812445993969438e-06,
      "loss": 2.9012,
      "step": 217249
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.880746603012085,
      "learning_rate": 4.811716275620125e-06,
      "loss": 3.0149,
      "step": 217250
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.774734020233154,
      "learning_rate": 4.810986612151879e-06,
      "loss": 3.0546,
      "step": 217251
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.61672306060791,
      "learning_rate": 4.810257003564832e-06,
      "loss": 2.8083,
      "step": 217252
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.353445291519165,
      "learning_rate": 4.809527449859119e-06,
      "loss": 3.1359,
      "step": 217253
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.170445203781128,
      "learning_rate": 4.808797951034904e-06,
      "loss": 3.1034,
      "step": 217254
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.375417947769165,
      "learning_rate": 4.808068507092322e-06,
      "loss": 3.0196,
      "step": 217255
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8721024990081787,
      "learning_rate": 4.807339118031506e-06,
      "loss": 2.9781,
      "step": 217256
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.582536458969116,
      "learning_rate": 4.806609783852522e-06,
      "loss": 2.9675,
      "step": 217257
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9101319313049316,
      "learning_rate": 4.805880504555604e-06,
      "loss": 2.8015,
      "step": 217258
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.017585039138794,
      "learning_rate": 4.805151280140818e-06,
      "loss": 2.9321,
      "step": 217259
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.281223773956299,
      "learning_rate": 4.804422110608331e-06,
      "loss": 3.0768,
      "step": 217260
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8305423259735107,
      "learning_rate": 4.803692995958275e-06,
      "loss": 2.8079,
      "step": 217261
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.836071729660034,
      "learning_rate": 4.802963936190818e-06,
      "loss": 2.6261,
      "step": 217262
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.265049457550049,
      "learning_rate": 4.802234931306026e-06,
      "loss": 2.9268,
      "step": 217263
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8443896770477295,
      "learning_rate": 4.8015059813040654e-06,
      "loss": 2.9818,
      "step": 217264
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.895695924758911,
      "learning_rate": 4.80077708618507e-06,
      "loss": 2.7368,
      "step": 217265
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.808010101318359,
      "learning_rate": 4.8000482459492395e-06,
      "loss": 2.8778,
      "step": 217266
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0707104206085205,
      "learning_rate": 4.799319460596574e-06,
      "loss": 3.1279,
      "step": 217267
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.028724431991577,
      "learning_rate": 4.798590730127372e-06,
      "loss": 2.7576,
      "step": 217268
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.254830837249756,
      "learning_rate": 4.797862054541634e-06,
      "loss": 2.9317,
      "step": 217269
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.4468698501586914,
      "learning_rate": 4.797133433839562e-06,
      "loss": 2.5887,
      "step": 217270
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9694788455963135,
      "learning_rate": 4.796404868021253e-06,
      "loss": 2.7762,
      "step": 217271
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.599210500717163,
      "learning_rate": 4.795676357086875e-06,
      "loss": 3.034,
      "step": 217272
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1726276874542236,
      "learning_rate": 4.794947901036561e-06,
      "loss": 2.8265,
      "step": 217273
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8843095302581787,
      "learning_rate": 4.794219499870444e-06,
      "loss": 2.9842,
      "step": 217274
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.81562876701355,
      "learning_rate": 4.793491153588658e-06,
      "loss": 2.8534,
      "step": 217275
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.518498659133911,
      "learning_rate": 4.792762862191302e-06,
      "loss": 3.212,
      "step": 217276
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.760725498199463,
      "learning_rate": 4.792034625678576e-06,
      "loss": 2.9928,
      "step": 217277
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.5451958179473877,
      "learning_rate": 4.791306444050547e-06,
      "loss": 3.1362,
      "step": 217278
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8313238620758057,
      "learning_rate": 4.790578317307414e-06,
      "loss": 2.6816,
      "step": 217279
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8507962226867676,
      "learning_rate": 4.7898502454492785e-06,
      "loss": 2.7863,
      "step": 217280
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.6049513816833496,
      "learning_rate": 4.7891222284763054e-06,
      "loss": 3.1428,
      "step": 217281
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.821610927581787,
      "learning_rate": 4.788394266388596e-06,
      "loss": 2.9932,
      "step": 217282
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1684327125549316,
      "learning_rate": 4.7876663591862485e-06,
      "loss": 2.6994,
      "step": 217283
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.309224843978882,
      "learning_rate": 4.7869385068694975e-06,
      "loss": 2.8938,
      "step": 217284
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9457995891571045,
      "learning_rate": 4.78621070943841e-06,
      "loss": 2.8723,
      "step": 217285
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7034478187561035,
      "learning_rate": 4.78548296689315e-06,
      "loss": 2.9001,
      "step": 217286
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7923054695129395,
      "learning_rate": 4.7847552792338535e-06,
      "loss": 2.8661,
      "step": 217287
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0102744102478027,
      "learning_rate": 4.78402764646062e-06,
      "loss": 2.8539,
      "step": 217288
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.582047462463379,
      "learning_rate": 4.783300068573614e-06,
      "loss": 3.0953,
      "step": 217289
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1487655639648438,
      "learning_rate": 4.782572545572972e-06,
      "loss": 2.9499,
      "step": 217290
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8732199668884277,
      "learning_rate": 4.78184507745879e-06,
      "loss": 2.8883,
      "step": 217291
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.374051332473755,
      "learning_rate": 4.781117664231271e-06,
      "loss": 2.6851,
      "step": 217292
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1482222080230713,
      "learning_rate": 4.7803903058905136e-06,
      "loss": 2.8806,
      "step": 217293
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.078317403793335,
      "learning_rate": 4.779663002436618e-06,
      "loss": 2.8707,
      "step": 217294
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1148171424865723,
      "learning_rate": 4.778935753869784e-06,
      "loss": 3.0455,
      "step": 217295
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.929126501083374,
      "learning_rate": 4.7782085601901445e-06,
      "loss": 3.1975,
      "step": 217296
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.147649049758911,
      "learning_rate": 4.7774814213977665e-06,
      "loss": 2.9464,
      "step": 217297
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6565890312194824,
      "learning_rate": 4.77675433749285e-06,
      "loss": 2.9368,
      "step": 217298
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.259164333343506,
      "learning_rate": 4.776027308475494e-06,
      "loss": 2.8642,
      "step": 217299
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.564913272857666,
      "learning_rate": 4.775300334345833e-06,
      "loss": 3.0202,
      "step": 217300
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.943061590194702,
      "learning_rate": 4.774573415104066e-06,
      "loss": 2.9297,
      "step": 217301
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.587221622467041,
      "learning_rate": 4.773846550750227e-06,
      "loss": 3.1529,
      "step": 217302
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8250677585601807,
      "learning_rate": 4.773119741284548e-06,
      "loss": 2.8118,
      "step": 217303
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.5885651111602783,
      "learning_rate": 4.772392986707096e-06,
      "loss": 2.7332,
      "step": 217304
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6528754234313965,
      "learning_rate": 4.771666287018039e-06,
      "loss": 2.977,
      "step": 217305
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.394028663635254,
      "learning_rate": 4.770939642217475e-06,
      "loss": 2.8404,
      "step": 217306
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9792299270629883,
      "learning_rate": 4.770213052305604e-06,
      "loss": 2.8395,
      "step": 217307
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0455760955810547,
      "learning_rate": 4.769486517282462e-06,
      "loss": 2.8161,
      "step": 217308
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.5451624393463135,
      "learning_rate": 4.768760037148312e-06,
      "loss": 2.8802,
      "step": 217309
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0727102756500244,
      "learning_rate": 4.768033611903188e-06,
      "loss": 3.1134,
      "step": 217310
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2220497131347656,
      "learning_rate": 4.767307241547291e-06,
      "loss": 2.78,
      "step": 217311
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.5314393043518066,
      "learning_rate": 4.766580926080687e-06,
      "loss": 3.0895,
      "step": 217312
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.371680498123169,
      "learning_rate": 4.765854665503577e-06,
      "loss": 2.9699,
      "step": 217313
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1785683631896973,
      "learning_rate": 4.765128459816026e-06,
      "loss": 2.9032,
      "step": 217314
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.658783435821533,
      "learning_rate": 4.7644023090182335e-06,
      "loss": 3.0672,
      "step": 217315
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7451417446136475,
      "learning_rate": 4.763676213110334e-06,
      "loss": 3.1521,
      "step": 217316
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2699263095855713,
      "learning_rate": 4.762950172092428e-06,
      "loss": 2.9451,
      "step": 217317
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.171177864074707,
      "learning_rate": 4.762224185964647e-06,
      "loss": 2.9477,
      "step": 217318
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8073458671569824,
      "learning_rate": 4.761498254727158e-06,
      "loss": 2.8644,
      "step": 217319
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.188645601272583,
      "learning_rate": 4.760772378380063e-06,
      "loss": 2.7217,
      "step": 217320
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7422707080841064,
      "learning_rate": 4.760046556923524e-06,
      "loss": 2.7808,
      "step": 217321
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.771772623062134,
      "learning_rate": 4.759320790357679e-06,
      "loss": 3.2878,
      "step": 217322
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2193024158477783,
      "learning_rate": 4.758595078682658e-06,
      "loss": 2.6861,
      "step": 217323
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.667585611343384,
      "learning_rate": 4.757869421898531e-06,
      "loss": 2.5018,
      "step": 217324
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9372665882110596,
      "learning_rate": 4.757143820005527e-06,
      "loss": 2.9037,
      "step": 217325
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2223830223083496,
      "learning_rate": 4.756418273003748e-06,
      "loss": 3.0519,
      "step": 217326
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8798797130584717,
      "learning_rate": 4.755692780893294e-06,
      "loss": 2.6838,
      "step": 217327
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8255789279937744,
      "learning_rate": 4.754967343674365e-06,
      "loss": 3.1012,
      "step": 217328
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.5908443927764893,
      "learning_rate": 4.754241961347061e-06,
      "loss": 2.8841,
      "step": 217329
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9759726524353027,
      "learning_rate": 4.75351663391148e-06,
      "loss": 2.8986,
      "step": 217330
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.5852601528167725,
      "learning_rate": 4.752791361367825e-06,
      "loss": 2.966,
      "step": 217331
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.051055669784546,
      "learning_rate": 4.752066143716193e-06,
      "loss": 2.9962,
      "step": 217332
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.636444568634033,
      "learning_rate": 4.751340980956719e-06,
      "loss": 3.0819,
      "step": 217333
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.7802188396453857,
      "learning_rate": 4.750615873089536e-06,
      "loss": 2.9585,
      "step": 217334
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1173837184906006,
      "learning_rate": 4.7498908201148435e-06,
      "loss": 2.8407,
      "step": 217335
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8479700088500977,
      "learning_rate": 4.749165822032641e-06,
      "loss": 2.8326,
      "step": 217336
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.516374111175537,
      "learning_rate": 4.748440878843196e-06,
      "loss": 2.9643,
      "step": 217337
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.410723924636841,
      "learning_rate": 4.747715990546541e-06,
      "loss": 2.9108,
      "step": 217338
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.872018337249756,
      "learning_rate": 4.74699115714291e-06,
      "loss": 3.0586,
      "step": 217339
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6149661540985107,
      "learning_rate": 4.746266378632335e-06,
      "loss": 3.0762,
      "step": 217340
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.820963144302368,
      "learning_rate": 4.745541655015051e-06,
      "loss": 2.8748,
      "step": 217341
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7126381397247314,
      "learning_rate": 4.744816986291122e-06,
      "loss": 2.6699,
      "step": 217342
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.827126979827881,
      "learning_rate": 4.744092372460717e-06,
      "loss": 3.3168,
      "step": 217343
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.540468692779541,
      "learning_rate": 4.743367813523935e-06,
      "loss": 2.7967,
      "step": 217344
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.79782772064209,
      "learning_rate": 4.742643309480942e-06,
      "loss": 2.7474,
      "step": 217345
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6556520462036133,
      "learning_rate": 4.741918860331872e-06,
      "loss": 3.1766,
      "step": 217346
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.6215178966522217,
      "learning_rate": 4.7411944660768586e-06,
      "loss": 3.0254,
      "step": 217347
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7258682250976562,
      "learning_rate": 4.740470126716034e-06,
      "loss": 2.7557,
      "step": 217348
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8952465057373047,
      "learning_rate": 4.739745842249498e-06,
      "loss": 2.6884,
      "step": 217349
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.836658000946045,
      "learning_rate": 4.739021612677452e-06,
      "loss": 2.6647,
      "step": 217350
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.397493362426758,
      "learning_rate": 4.7382974379999604e-06,
      "loss": 3.0296,
      "step": 217351
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.703646659851074,
      "learning_rate": 4.737573318217225e-06,
      "loss": 2.9589,
      "step": 217352
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.138770818710327,
      "learning_rate": 4.736849253329344e-06,
      "loss": 3.1331,
      "step": 217353
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.769059181213379,
      "learning_rate": 4.736125243336453e-06,
      "loss": 3.2047,
      "step": 217354
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9898884296417236,
      "learning_rate": 4.735401288238683e-06,
      "loss": 2.9685,
      "step": 217355
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3854329586029053,
      "learning_rate": 4.734677388036168e-06,
      "loss": 2.9138,
      "step": 217356
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.9832775592803955,
      "learning_rate": 4.7339535427290744e-06,
      "loss": 2.7869,
      "step": 217357
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1997721195220947,
      "learning_rate": 4.733229752317469e-06,
      "loss": 2.8985,
      "step": 217358
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.128183364868164,
      "learning_rate": 4.732506016801585e-06,
      "loss": 3.002,
      "step": 217359
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2336084842681885,
      "learning_rate": 4.731782336181455e-06,
      "loss": 2.7823,
      "step": 217360
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8951869010925293,
      "learning_rate": 4.731058710457314e-06,
      "loss": 2.8962,
      "step": 217361
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6090946197509766,
      "learning_rate": 4.730335139629193e-06,
      "loss": 3.0522,
      "step": 217362
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0513083934783936,
      "learning_rate": 4.7296116236972935e-06,
      "loss": 3.0548,
      "step": 217363
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.5718514919281006,
      "learning_rate": 4.728888162661715e-06,
      "loss": 2.9768,
      "step": 217364
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7064766883850098,
      "learning_rate": 4.728164756522623e-06,
      "loss": 2.7103,
      "step": 217365
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9854726791381836,
      "learning_rate": 4.727441405280153e-06,
      "loss": 2.6166,
      "step": 217366
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3961853981018066,
      "learning_rate": 4.726718108934435e-06,
      "loss": 2.7377,
      "step": 217367
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0939035415649414,
      "learning_rate": 4.725994867485538e-06,
      "loss": 2.9799,
      "step": 217368
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0571775436401367,
      "learning_rate": 4.7252716809336955e-06,
      "loss": 2.9577,
      "step": 217369
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.079669952392578,
      "learning_rate": 4.724548549279006e-06,
      "loss": 3.0595,
      "step": 217370
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0558648109436035,
      "learning_rate": 4.7238254725215695e-06,
      "loss": 2.8623,
      "step": 217371
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.5067903995513916,
      "learning_rate": 4.723102450661587e-06,
      "loss": 2.7806,
      "step": 217372
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.392831802368164,
      "learning_rate": 4.722379483699123e-06,
      "loss": 2.9933,
      "step": 217373
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1123099327087402,
      "learning_rate": 4.7216565716343135e-06,
      "loss": 2.9156,
      "step": 217374
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.789996385574341,
      "learning_rate": 4.7209337144673895e-06,
      "loss": 3.0279,
      "step": 217375
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.70725154876709,
      "learning_rate": 4.720210912198352e-06,
      "loss": 2.8416,
      "step": 217376
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.809225559234619,
      "learning_rate": 4.7194881648274344e-06,
      "loss": 2.8044,
      "step": 217377
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.031851291656494,
      "learning_rate": 4.7187654723547685e-06,
      "loss": 3.0254,
      "step": 217378
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.5415139198303223,
      "learning_rate": 4.718042834780389e-06,
      "loss": 2.7328,
      "step": 217379
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8007259368896484,
      "learning_rate": 4.717320252104562e-06,
      "loss": 2.5955,
      "step": 217380
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6174678802490234,
      "learning_rate": 4.71659772432732e-06,
      "loss": 2.9736,
      "step": 217381
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.678557872772217,
      "learning_rate": 4.715875251448831e-06,
      "loss": 3.1534,
      "step": 217382
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.728029727935791,
      "learning_rate": 4.71515283346926e-06,
      "loss": 3.1157,
      "step": 217383
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7054498195648193,
      "learning_rate": 4.7144304703887414e-06,
      "loss": 2.8703,
      "step": 217384
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.526734828948975,
      "learning_rate": 4.713708162207308e-06,
      "loss": 2.8701,
      "step": 217385
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0583553314208984,
      "learning_rate": 4.7129859089252265e-06,
      "loss": 2.9926,
      "step": 217386
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8382513523101807,
      "learning_rate": 4.712263710542563e-06,
      "loss": 2.7756,
      "step": 217387
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.875336170196533,
      "learning_rate": 4.711541567059452e-06,
      "loss": 3.0748,
      "step": 217388
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.681034564971924,
      "learning_rate": 4.7108194784760575e-06,
      "loss": 3.1114,
      "step": 217389
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.5472469329833984,
      "learning_rate": 4.710097444792516e-06,
      "loss": 2.9023,
      "step": 217390
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0891900062561035,
      "learning_rate": 4.709375466008891e-06,
      "loss": 2.8783,
      "step": 217391
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0638628005981445,
      "learning_rate": 4.708653542125384e-06,
      "loss": 2.8126,
      "step": 217392
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8335280418395996,
      "learning_rate": 4.707931673142096e-06,
      "loss": 2.8043,
      "step": 217393
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.576658248901367,
      "learning_rate": 4.7072098590592244e-06,
      "loss": 2.9294,
      "step": 217394
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.008465528488159,
      "learning_rate": 4.706488099876837e-06,
      "loss": 2.9109,
      "step": 217395
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8666443824768066,
      "learning_rate": 4.705766395595101e-06,
      "loss": 2.751,
      "step": 217396
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8504717350006104,
      "learning_rate": 4.705044746214082e-06,
      "loss": 2.9716,
      "step": 217397
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.712991714477539,
      "learning_rate": 4.704323151734013e-06,
      "loss": 2.8477,
      "step": 217398
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.155935764312744,
      "learning_rate": 4.703601612154961e-06,
      "loss": 2.7879,
      "step": 217399
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.333277225494385,
      "learning_rate": 4.702880127477127e-06,
      "loss": 3.1012,
      "step": 217400
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7711846828460693,
      "learning_rate": 4.702158697700542e-06,
      "loss": 2.9059,
      "step": 217401
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7577638626098633,
      "learning_rate": 4.701437322825474e-06,
      "loss": 2.755,
      "step": 217402
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.667121648788452,
      "learning_rate": 4.700716002851923e-06,
      "loss": 3.1591,
      "step": 217403
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.685776710510254,
      "learning_rate": 4.699994737780121e-06,
      "loss": 2.7265,
      "step": 217404
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.124981164932251,
      "learning_rate": 4.699273527610104e-06,
      "loss": 2.9233,
      "step": 217405
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8007876873016357,
      "learning_rate": 4.698552372342135e-06,
      "loss": 2.9038,
      "step": 217406
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.849457025527954,
      "learning_rate": 4.697831271976215e-06,
      "loss": 2.9502,
      "step": 217407
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.332317352294922,
      "learning_rate": 4.697110226512612e-06,
      "loss": 2.9769,
      "step": 217408
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.495368242263794,
      "learning_rate": 4.696389235951359e-06,
      "loss": 2.8566,
      "step": 217409
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.507298469543457,
      "learning_rate": 4.695668300292621e-06,
      "loss": 2.7304,
      "step": 217410
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8009655475616455,
      "learning_rate": 4.694947419536533e-06,
      "loss": 3.0184,
      "step": 217411
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2178142070770264,
      "learning_rate": 4.694226593683225e-06,
      "loss": 3.1334,
      "step": 217412
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0897819995880127,
      "learning_rate": 4.6935058227328014e-06,
      "loss": 3.0682,
      "step": 217413
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.983119010925293,
      "learning_rate": 4.692785106685526e-06,
      "loss": 2.8547,
      "step": 217414
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8435471057891846,
      "learning_rate": 4.692064445541333e-06,
      "loss": 3.0776,
      "step": 217415
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.310896873474121,
      "learning_rate": 4.691343839300521e-06,
      "loss": 2.6696,
      "step": 217416
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.208106279373169,
      "learning_rate": 4.690623287963125e-06,
      "loss": 2.8444,
      "step": 217417
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.444044828414917,
      "learning_rate": 4.689902791529343e-06,
      "loss": 2.8592,
      "step": 217418
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6332995891571045,
      "learning_rate": 4.689182349999276e-06,
      "loss": 2.5881,
      "step": 217419
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.052372455596924,
      "learning_rate": 4.688461963373091e-06,
      "loss": 3.2228,
      "step": 217420
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.4192652702331543,
      "learning_rate": 4.687741631650854e-06,
      "loss": 3.023,
      "step": 217421
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0160834789276123,
      "learning_rate": 4.687021354832732e-06,
      "loss": 3.1637,
      "step": 217422
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.5122809410095215,
      "learning_rate": 4.686301132918891e-06,
      "loss": 3.0763,
      "step": 217423
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.54166841506958,
      "learning_rate": 4.6855809659094304e-06,
      "loss": 2.9394,
      "step": 217424
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.768629789352417,
      "learning_rate": 4.684860853804484e-06,
      "loss": 2.7474,
      "step": 217425
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0201516151428223,
      "learning_rate": 4.684140796604252e-06,
      "loss": 3.0088,
      "step": 217426
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0092926025390625,
      "learning_rate": 4.683420794308768e-06,
      "loss": 2.8399,
      "step": 217427
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.109593629837036,
      "learning_rate": 4.68270084691823e-06,
      "loss": 2.8922,
      "step": 217428
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3555145263671875,
      "learning_rate": 4.681980954432707e-06,
      "loss": 3.0111,
      "step": 217429
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9161717891693115,
      "learning_rate": 4.681261116852431e-06,
      "loss": 2.873,
      "step": 217430
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.734086513519287,
      "learning_rate": 4.6805413341774675e-06,
      "loss": 2.9523,
      "step": 217431
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0182735919952393,
      "learning_rate": 4.679821606407986e-06,
      "loss": 2.9724,
      "step": 217432
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.4215662479400635,
      "learning_rate": 4.679101933544049e-06,
      "loss": 2.8489,
      "step": 217433
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.5823559761047363,
      "learning_rate": 4.678382315585894e-06,
      "loss": 2.9396,
      "step": 217434
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.962045907974243,
      "learning_rate": 4.677662752533551e-06,
      "loss": 2.8427,
      "step": 217435
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7500321865081787,
      "learning_rate": 4.676943244387255e-06,
      "loss": 2.8746,
      "step": 217436
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.893362045288086,
      "learning_rate": 4.676223791147038e-06,
      "loss": 2.8724,
      "step": 217437
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0853655338287354,
      "learning_rate": 4.675504392813134e-06,
      "loss": 3.0648,
      "step": 217438
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.460878610610962,
      "learning_rate": 4.67478504938561e-06,
      "loss": 2.7274,
      "step": 217439
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.594783067703247,
      "learning_rate": 4.674065760864631e-06,
      "loss": 2.8988,
      "step": 217440
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8624181747436523,
      "learning_rate": 4.673346527250266e-06,
      "loss": 2.8863,
      "step": 217441
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7437214851379395,
      "learning_rate": 4.672627348542745e-06,
      "loss": 2.9556,
      "step": 217442
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8914809226989746,
      "learning_rate": 4.671908224742138e-06,
      "loss": 3.0744,
      "step": 217443
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6832382678985596,
      "learning_rate": 4.671189155848642e-06,
      "loss": 3.0499,
      "step": 217444
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.129235029220581,
      "learning_rate": 4.670470141862326e-06,
      "loss": 2.747,
      "step": 217445
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.228903293609619,
      "learning_rate": 4.6697511827833216e-06,
      "loss": 2.9324,
      "step": 217446
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1029059886932373,
      "learning_rate": 4.669032278611795e-06,
      "loss": 2.7827,
      "step": 217447
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.122779369354248,
      "learning_rate": 4.668313429347881e-06,
      "loss": 3.0298,
      "step": 217448
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3923704624176025,
      "learning_rate": 4.667594634991678e-06,
      "loss": 3.0086,
      "step": 217449
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.832672595977783,
      "learning_rate": 4.666875895543387e-06,
      "loss": 2.7652,
      "step": 217450
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8381731510162354,
      "learning_rate": 4.666157211003041e-06,
      "loss": 3.0852,
      "step": 217451
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2221920490264893,
      "learning_rate": 4.665438581370873e-06,
      "loss": 2.9362,
      "step": 217452
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.75822114944458,
      "learning_rate": 4.664720006646949e-06,
      "loss": 3.0944,
      "step": 217453
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.820241928100586,
      "learning_rate": 4.66400148683147e-06,
      "loss": 3.1162,
      "step": 217454
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.602813243865967,
      "learning_rate": 4.663283021924469e-06,
      "loss": 2.952,
      "step": 217455
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9320366382598877,
      "learning_rate": 4.6625646119261785e-06,
      "loss": 2.9716,
      "step": 217456
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7977871894836426,
      "learning_rate": 4.661846256836698e-06,
      "loss": 2.8596,
      "step": 217457
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.33962082862854,
      "learning_rate": 4.661127956656163e-06,
      "loss": 2.8195,
      "step": 217458
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0809292793273926,
      "learning_rate": 4.660409711384638e-06,
      "loss": 2.9299,
      "step": 217459
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1891939640045166,
      "learning_rate": 4.659691521022391e-06,
      "loss": 2.932,
      "step": 217460
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.669524908065796,
      "learning_rate": 4.65897338556942e-06,
      "loss": 2.8053,
      "step": 217461
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.9275577068328857,
      "learning_rate": 4.65825530502596e-06,
      "loss": 2.9425,
      "step": 217462
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.383867025375366,
      "learning_rate": 4.6575372793921094e-06,
      "loss": 2.8872,
      "step": 217463
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2433011531829834,
      "learning_rate": 4.6568193086679695e-06,
      "loss": 2.8595,
      "step": 217464
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1070847511291504,
      "learning_rate": 4.656101392853706e-06,
      "loss": 2.8726,
      "step": 217465
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1919493675231934,
      "learning_rate": 4.655383531949486e-06,
      "loss": 2.8959,
      "step": 217466
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9531917572021484,
      "learning_rate": 4.654665725955342e-06,
      "loss": 2.8913,
      "step": 217467
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.676522731781006,
      "learning_rate": 4.653947974871508e-06,
      "loss": 3.0372,
      "step": 217468
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.5048084259033203,
      "learning_rate": 4.653230278698117e-06,
      "loss": 3.05,
      "step": 217469
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.032353639602661,
      "learning_rate": 4.652512637435202e-06,
      "loss": 2.6154,
      "step": 217470
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.5449652671813965,
      "learning_rate": 4.651795051082996e-06,
      "loss": 2.8482,
      "step": 217471
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.034705877304077,
      "learning_rate": 4.651077519641599e-06,
      "loss": 3.0567,
      "step": 217472
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.26879620552063,
      "learning_rate": 4.6503600431111125e-06,
      "loss": 2.8677,
      "step": 217473
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.6819002628326416,
      "learning_rate": 4.649642621491701e-06,
      "loss": 2.8096,
      "step": 217474
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0964767932891846,
      "learning_rate": 4.648925254783531e-06,
      "loss": 2.8805,
      "step": 217475
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2664146423339844,
      "learning_rate": 4.648207942986671e-06,
      "loss": 2.8867,
      "step": 217476
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9855129718780518,
      "learning_rate": 4.647490686101285e-06,
      "loss": 2.7491,
      "step": 217477
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9913549423217773,
      "learning_rate": 4.646773484127508e-06,
      "loss": 2.6926,
      "step": 217478
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7002267837524414,
      "learning_rate": 4.646056337065474e-06,
      "loss": 3.0683,
      "step": 217479
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.008344888687134,
      "learning_rate": 4.645339244915347e-06,
      "loss": 2.9914,
      "step": 217480
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7620809078216553,
      "learning_rate": 4.644622207677195e-06,
      "loss": 3.1957,
      "step": 217481
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.85549259185791,
      "learning_rate": 4.643905225351152e-06,
      "loss": 2.8204,
      "step": 217482
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9581918716430664,
      "learning_rate": 4.64318829793745e-06,
      "loss": 2.9428,
      "step": 217483
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.511012315750122,
      "learning_rate": 4.64247142543609e-06,
      "loss": 3.0274,
      "step": 217484
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.987596273422241,
      "learning_rate": 4.6417546078473035e-06,
      "loss": 2.9024,
      "step": 217485
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7792656421661377,
      "learning_rate": 4.641037845171191e-06,
      "loss": 2.7592,
      "step": 217486
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.976477861404419,
      "learning_rate": 4.640321137407887e-06,
      "loss": 3.0539,
      "step": 217487
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.318284749984741,
      "learning_rate": 4.639604484557491e-06,
      "loss": 2.857,
      "step": 217488
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.510624885559082,
      "learning_rate": 4.638887886620202e-06,
      "loss": 2.8967,
      "step": 217489
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2455484867095947,
      "learning_rate": 4.63817134359612e-06,
      "loss": 2.8264,
      "step": 217490
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.952332019805908,
      "learning_rate": 4.637454855485345e-06,
      "loss": 2.7961,
      "step": 217491
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3277218341827393,
      "learning_rate": 4.6367384222880774e-06,
      "loss": 2.8726,
      "step": 217492
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.799715757369995,
      "learning_rate": 4.636022044004417e-06,
      "loss": 2.9148,
      "step": 217493
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9646458625793457,
      "learning_rate": 4.635305720634463e-06,
      "loss": 2.7982,
      "step": 217494
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8547685146331787,
      "learning_rate": 4.634589452178416e-06,
      "loss": 2.8096,
      "step": 217495
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.890533447265625,
      "learning_rate": 4.633873238636343e-06,
      "loss": 2.7587,
      "step": 217496
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8991031646728516,
      "learning_rate": 4.6331570800084095e-06,
      "loss": 2.8274,
      "step": 217497
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.5614006519317627,
      "learning_rate": 4.632440976294749e-06,
      "loss": 2.9932,
      "step": 217498
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.632280349731445,
      "learning_rate": 4.631724927495561e-06,
      "loss": 2.7633,
      "step": 217499
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.370171546936035,
      "learning_rate": 4.6310089336108136e-06,
      "loss": 2.8408,
      "step": 217500
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.541499376296997,
      "learning_rate": 4.630292994640805e-06,
      "loss": 2.9218,
      "step": 217501
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7868151664733887,
      "learning_rate": 4.629577110585536e-06,
      "loss": 3.0724,
      "step": 217502
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.106355905532837,
      "learning_rate": 4.628861281445273e-06,
      "loss": 2.8967,
      "step": 217503
    },
    {
      "epoch": 2.83,
      "grad_norm": 5.602034091949463,
      "learning_rate": 4.628145507220016e-06,
      "loss": 2.7956,
      "step": 217504
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.4899721145629883,
      "learning_rate": 4.627429787910031e-06,
      "loss": 2.9673,
      "step": 217505
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6521079540252686,
      "learning_rate": 4.6267141235153516e-06,
      "loss": 3.0486,
      "step": 217506
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.804966688156128,
      "learning_rate": 4.625998514036144e-06,
      "loss": 2.9457,
      "step": 217507
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.212773561477661,
      "learning_rate": 4.625282959472509e-06,
      "loss": 3.0404,
      "step": 217508
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9795262813568115,
      "learning_rate": 4.624567459824646e-06,
      "loss": 2.8945,
      "step": 217509
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7168238162994385,
      "learning_rate": 4.623852015092621e-06,
      "loss": 2.8635,
      "step": 217510
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1187634468078613,
      "learning_rate": 4.623136625276636e-06,
      "loss": 2.7908,
      "step": 217511
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1897339820861816,
      "learning_rate": 4.622421290376754e-06,
      "loss": 2.8874,
      "step": 217512
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1763124465942383,
      "learning_rate": 4.621706010393145e-06,
      "loss": 2.9356,
      "step": 217513
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.720020055770874,
      "learning_rate": 4.62099078532594e-06,
      "loss": 3.0045,
      "step": 217514
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.411280393600464,
      "learning_rate": 4.620275615175273e-06,
      "loss": 3.0691,
      "step": 217515
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.157625675201416,
      "learning_rate": 4.619560499941244e-06,
      "loss": 3.2811,
      "step": 217516
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.594101667404175,
      "learning_rate": 4.618845439624086e-06,
      "loss": 2.9659,
      "step": 217517
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.621258020401001,
      "learning_rate": 4.618130434223799e-06,
      "loss": 2.9148,
      "step": 217518
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2216622829437256,
      "learning_rate": 4.617415483740583e-06,
      "loss": 2.8685,
      "step": 217519
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.881051778793335,
      "learning_rate": 4.616700588174571e-06,
      "loss": 3.0675,
      "step": 217520
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.473895311355591,
      "learning_rate": 4.615985747525897e-06,
      "loss": 2.7989,
      "step": 217521
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.004757881164551,
      "learning_rate": 4.615270961794659e-06,
      "loss": 2.7236,
      "step": 217522
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.548264980316162,
      "learning_rate": 4.6145562309810925e-06,
      "loss": 3.0253,
      "step": 217523
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.036767959594727,
      "learning_rate": 4.613841555085162e-06,
      "loss": 2.8535,
      "step": 217524
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.736053943634033,
      "learning_rate": 4.61312693410717e-06,
      "loss": 2.994,
      "step": 217525
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.773790121078491,
      "learning_rate": 4.612412368047114e-06,
      "loss": 3.0252,
      "step": 217526
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.043060779571533,
      "learning_rate": 4.611697856905228e-06,
      "loss": 2.9883,
      "step": 217527
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.000860691070557,
      "learning_rate": 4.610983400681578e-06,
      "loss": 3.121,
      "step": 217528
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0586724281311035,
      "learning_rate": 4.610268999376332e-06,
      "loss": 2.9424,
      "step": 217529
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.063774824142456,
      "learning_rate": 4.609554652989622e-06,
      "loss": 3.1576,
      "step": 217530
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.516724109649658,
      "learning_rate": 4.608840361521582e-06,
      "loss": 2.8883,
      "step": 217531
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9537713527679443,
      "learning_rate": 4.608126124972311e-06,
      "loss": 2.8627,
      "step": 217532
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1574299335479736,
      "learning_rate": 4.607411943341977e-06,
      "loss": 2.9042,
      "step": 217533
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6722536087036133,
      "learning_rate": 4.606697816630678e-06,
      "loss": 2.9508,
      "step": 217534
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.5507500171661377,
      "learning_rate": 4.605983744838582e-06,
      "loss": 2.9147,
      "step": 217535
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.912095546722412,
      "learning_rate": 4.6052697279658214e-06,
      "loss": 2.988,
      "step": 217536
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.4820938110351562,
      "learning_rate": 4.60455576601253e-06,
      "loss": 2.789,
      "step": 217537
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7692482471466064,
      "learning_rate": 4.603841858978774e-06,
      "loss": 2.7849,
      "step": 217538
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1945509910583496,
      "learning_rate": 4.603128006864787e-06,
      "loss": 2.9642,
      "step": 217539
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7066245079040527,
      "learning_rate": 4.602414209670635e-06,
      "loss": 2.6508,
      "step": 217540
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8071556091308594,
      "learning_rate": 4.6017004673964856e-06,
      "loss": 2.7429,
      "step": 217541
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.841926097869873,
      "learning_rate": 4.600986780042437e-06,
      "loss": 3.0635,
      "step": 217542
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7625038623809814,
      "learning_rate": 4.6002731476086575e-06,
      "loss": 2.9854,
      "step": 217543
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1212096214294434,
      "learning_rate": 4.599559570095246e-06,
      "loss": 3.0246,
      "step": 217544
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2336530685424805,
      "learning_rate": 4.598846047502369e-06,
      "loss": 2.7284,
      "step": 217545
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.848210334777832,
      "learning_rate": 4.598132579830127e-06,
      "loss": 2.788,
      "step": 217546
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9620494842529297,
      "learning_rate": 4.597419167078687e-06,
      "loss": 3.0307,
      "step": 217547
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.6059491634368896,
      "learning_rate": 4.59670580924818e-06,
      "loss": 2.9877,
      "step": 217548
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.703064441680908,
      "learning_rate": 4.5959925063386745e-06,
      "loss": 2.9653,
      "step": 217549
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8038880825042725,
      "learning_rate": 4.595279258350371e-06,
      "loss": 2.792,
      "step": 217550
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7279388904571533,
      "learning_rate": 4.5945660652834e-06,
      "loss": 3.0794,
      "step": 217551
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.213087320327759,
      "learning_rate": 4.593852927137831e-06,
      "loss": 3.056,
      "step": 217552
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.976996898651123,
      "learning_rate": 4.593139843913862e-06,
      "loss": 2.9208,
      "step": 217553
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.4657070636749268,
      "learning_rate": 4.592426815611627e-06,
      "loss": 3.0323,
      "step": 217554
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.9635396003723145,
      "learning_rate": 4.591713842231226e-06,
      "loss": 2.8369,
      "step": 217555
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9527339935302734,
      "learning_rate": 4.591000923772792e-06,
      "loss": 3.2004,
      "step": 217556
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.445077657699585,
      "learning_rate": 4.590288060236458e-06,
      "loss": 2.7571,
      "step": 217557
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6644160747528076,
      "learning_rate": 4.589575251622391e-06,
      "loss": 2.6342,
      "step": 217558
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.939986228942871,
      "learning_rate": 4.58886249793069e-06,
      "loss": 3.1133,
      "step": 217559
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0518224239349365,
      "learning_rate": 4.5881497991614895e-06,
      "loss": 3.2162,
      "step": 217560
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9215316772460938,
      "learning_rate": 4.5874371553149215e-06,
      "loss": 3.2001,
      "step": 217561
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.795905113220215,
      "learning_rate": 4.58672456639112e-06,
      "loss": 2.8837,
      "step": 217562
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.053293228149414,
      "learning_rate": 4.586012032390218e-06,
      "loss": 2.8561,
      "step": 217563
    },
    {
      "epoch": 2.83,
      "grad_norm": 5.458466053009033,
      "learning_rate": 4.585299553312383e-06,
      "loss": 2.6717,
      "step": 217564
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9187300205230713,
      "learning_rate": 4.584587129157713e-06,
      "loss": 3.0844,
      "step": 217565
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.780333995819092,
      "learning_rate": 4.583874759926342e-06,
      "loss": 3.0473,
      "step": 217566
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7784996032714844,
      "learning_rate": 4.583162445618405e-06,
      "loss": 3.1195,
      "step": 217567
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8901760578155518,
      "learning_rate": 4.582450186234033e-06,
      "loss": 2.7667,
      "step": 217568
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8603262901306152,
      "learning_rate": 4.581737981773326e-06,
      "loss": 2.8361,
      "step": 217569
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.4844307899475098,
      "learning_rate": 4.581025832236484e-06,
      "loss": 2.8714,
      "step": 217570
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.757328510284424,
      "learning_rate": 4.580313737623609e-06,
      "loss": 2.7474,
      "step": 217571
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.560068130493164,
      "learning_rate": 4.579601697934831e-06,
      "loss": 2.9536,
      "step": 217572
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.776858329772949,
      "learning_rate": 4.578889713170286e-06,
      "loss": 3.0007,
      "step": 217573
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2059743404388428,
      "learning_rate": 4.578177783330073e-06,
      "loss": 2.842,
      "step": 217574
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9527604579925537,
      "learning_rate": 4.577465908414357e-06,
      "loss": 3.182,
      "step": 217575
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9382333755493164,
      "learning_rate": 4.576754088423307e-06,
      "loss": 2.847,
      "step": 217576
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6497344970703125,
      "learning_rate": 4.576042323356954e-06,
      "loss": 2.9755,
      "step": 217577
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8994593620300293,
      "learning_rate": 4.5753306132155665e-06,
      "loss": 2.8237,
      "step": 217578
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.007044553756714,
      "learning_rate": 4.574618957999143e-06,
      "loss": 2.951,
      "step": 217579
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.4091591835021973,
      "learning_rate": 4.573907357707885e-06,
      "loss": 2.8292,
      "step": 217580
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0835793018341064,
      "learning_rate": 4.573195812341924e-06,
      "loss": 2.9211,
      "step": 217581
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.545929431915283,
      "learning_rate": 4.572484321901393e-06,
      "loss": 2.7782,
      "step": 217582
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.683101177215576,
      "learning_rate": 4.57177288638636e-06,
      "loss": 2.9184,
      "step": 217583
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.503056764602661,
      "learning_rate": 4.571061505797091e-06,
      "loss": 2.9928,
      "step": 217584
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.775322914123535,
      "learning_rate": 4.570350180133586e-06,
      "loss": 2.8918,
      "step": 217585
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.010619878768921,
      "learning_rate": 4.569638909396045e-06,
      "loss": 2.8412,
      "step": 217586
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0151922702789307,
      "learning_rate": 4.568927693584568e-06,
      "loss": 2.8742,
      "step": 217587
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3035755157470703,
      "learning_rate": 4.5682165326993206e-06,
      "loss": 2.8125,
      "step": 217588
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.510925531387329,
      "learning_rate": 4.567505426740403e-06,
      "loss": 3.0223,
      "step": 217589
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.72357439994812,
      "learning_rate": 4.566794375708016e-06,
      "loss": 2.8821,
      "step": 217590
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8494043350219727,
      "learning_rate": 4.566083379602192e-06,
      "loss": 3.1579,
      "step": 217591
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8589234352111816,
      "learning_rate": 4.565372438423099e-06,
      "loss": 3.0576,
      "step": 217592
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3150134086608887,
      "learning_rate": 4.564661552170901e-06,
      "loss": 2.7845,
      "step": 217593
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0917985439300537,
      "learning_rate": 4.5639507208456995e-06,
      "loss": 2.7764,
      "step": 217594
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3457229137420654,
      "learning_rate": 4.563239944447628e-06,
      "loss": 2.9284,
      "step": 217595
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2190370559692383,
      "learning_rate": 4.562529222976852e-06,
      "loss": 2.9424,
      "step": 217596
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1617774963378906,
      "learning_rate": 4.561818556433472e-06,
      "loss": 2.7591,
      "step": 217597
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.782747507095337,
      "learning_rate": 4.561107944817622e-06,
      "loss": 2.9877,
      "step": 217598
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.867434024810791,
      "learning_rate": 4.560397388129433e-06,
      "loss": 3.4105,
      "step": 217599
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.8511483669281006,
      "learning_rate": 4.559686886369074e-06,
      "loss": 2.7616,
      "step": 217600
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.8150813579559326,
      "learning_rate": 4.558976439536577e-06,
      "loss": 2.8793,
      "step": 217601
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.065016746520996,
      "learning_rate": 4.558266047632209e-06,
      "loss": 2.8905,
      "step": 217602
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8427398204803467,
      "learning_rate": 4.557555710656035e-06,
      "loss": 2.8913,
      "step": 217603
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6709799766540527,
      "learning_rate": 4.556845428608158e-06,
      "loss": 2.9438,
      "step": 217604
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.5881478786468506,
      "learning_rate": 4.556135201488742e-06,
      "loss": 3.125,
      "step": 217605
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.046040058135986,
      "learning_rate": 4.5554250292979215e-06,
      "loss": 2.7112,
      "step": 217606
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3889472484588623,
      "learning_rate": 4.554714912035828e-06,
      "loss": 2.8218,
      "step": 217607
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8819632530212402,
      "learning_rate": 4.554004849702597e-06,
      "loss": 2.8471,
      "step": 217608
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1619112491607666,
      "learning_rate": 4.55329484229836e-06,
      "loss": 2.6774,
      "step": 217609
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8679981231689453,
      "learning_rate": 4.552584889823219e-06,
      "loss": 2.8932,
      "step": 217610
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.280011177062988,
      "learning_rate": 4.5518749922773376e-06,
      "loss": 2.9872,
      "step": 217611
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7946040630340576,
      "learning_rate": 4.551165149660851e-06,
      "loss": 2.7924,
      "step": 217612
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0090456008911133,
      "learning_rate": 4.550455361973859e-06,
      "loss": 2.916,
      "step": 217613
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9834089279174805,
      "learning_rate": 4.549745629216528e-06,
      "loss": 2.8637,
      "step": 217614
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9505515098571777,
      "learning_rate": 4.549035951388991e-06,
      "loss": 2.7625,
      "step": 217615
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7269201278686523,
      "learning_rate": 4.548326328491348e-06,
      "loss": 2.8854,
      "step": 217616
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9716954231262207,
      "learning_rate": 4.547616760523732e-06,
      "loss": 3.3633,
      "step": 217617
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7706503868103027,
      "learning_rate": 4.546907247486309e-06,
      "loss": 2.9046,
      "step": 217618
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.9436614513397217,
      "learning_rate": 4.54619778937918e-06,
      "loss": 2.7947,
      "step": 217619
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.7072858810424805,
      "learning_rate": 4.545488386202512e-06,
      "loss": 2.7618,
      "step": 217620
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.114223003387451,
      "learning_rate": 4.544779037956403e-06,
      "loss": 2.6457,
      "step": 217621
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.122922897338867,
      "learning_rate": 4.544069744641022e-06,
      "loss": 3.0856,
      "step": 217622
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.0283379554748535,
      "learning_rate": 4.5433605062564325e-06,
      "loss": 2.6866,
      "step": 217623
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.321338415145874,
      "learning_rate": 4.542651322802837e-06,
      "loss": 2.7603,
      "step": 217624
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.5671184062957764,
      "learning_rate": 4.541942194280335e-06,
      "loss": 2.98,
      "step": 217625
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.777339458465576,
      "learning_rate": 4.5412331206890584e-06,
      "loss": 2.7893,
      "step": 217626
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.089303016662598,
      "learning_rate": 4.540524102029142e-06,
      "loss": 3.1028,
      "step": 217627
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.4566357135772705,
      "learning_rate": 4.539815138300751e-06,
      "loss": 2.891,
      "step": 217628
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8975894451141357,
      "learning_rate": 4.539106229503919e-06,
      "loss": 2.8518,
      "step": 217629
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.086496591567993,
      "learning_rate": 4.5383973756389135e-06,
      "loss": 3.0725,
      "step": 217630
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.693861722946167,
      "learning_rate": 4.537688576705733e-06,
      "loss": 2.8575,
      "step": 217631
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1109516620635986,
      "learning_rate": 4.5369798327046124e-06,
      "loss": 3.0481,
      "step": 217632
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.839998483657837,
      "learning_rate": 4.53627114363565e-06,
      "loss": 2.8378,
      "step": 217633
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2128090858459473,
      "learning_rate": 4.53556250949898e-06,
      "loss": 2.8435,
      "step": 217634
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3068430423736572,
      "learning_rate": 4.534853930294702e-06,
      "loss": 2.7998,
      "step": 217635
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.8424651622772217,
      "learning_rate": 4.534145406022982e-06,
      "loss": 2.7922,
      "step": 217636
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8710644245147705,
      "learning_rate": 4.5334369366839205e-06,
      "loss": 2.7322,
      "step": 217637
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.863623857498169,
      "learning_rate": 4.532728522277684e-06,
      "loss": 2.8304,
      "step": 217638
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8945579528808594,
      "learning_rate": 4.532020162804406e-06,
      "loss": 2.8011,
      "step": 217639
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8693745136260986,
      "learning_rate": 4.531311858264186e-06,
      "loss": 2.7835,
      "step": 217640
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9530844688415527,
      "learning_rate": 4.530603608657158e-06,
      "loss": 2.8286,
      "step": 217641
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.158731460571289,
      "learning_rate": 4.529895413983519e-06,
      "loss": 2.897,
      "step": 217642
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7942235469818115,
      "learning_rate": 4.529187274243307e-06,
      "loss": 2.9484,
      "step": 217643
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.4244542121887207,
      "learning_rate": 4.528479189436718e-06,
      "loss": 2.7445,
      "step": 217644
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.146484613418579,
      "learning_rate": 4.527771159563853e-06,
      "loss": 2.8936,
      "step": 217645
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.098489284515381,
      "learning_rate": 4.527063184624813e-06,
      "loss": 2.766,
      "step": 217646
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3427951335906982,
      "learning_rate": 4.52635526461983e-06,
      "loss": 2.8768,
      "step": 217647
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7587859630584717,
      "learning_rate": 4.5256473995489705e-06,
      "loss": 2.9889,
      "step": 217648
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.119793653488159,
      "learning_rate": 4.524939589412336e-06,
      "loss": 2.7607,
      "step": 217649
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.9036848545074463,
      "learning_rate": 4.524231834210123e-06,
      "loss": 2.8474,
      "step": 217650
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.826021671295166,
      "learning_rate": 4.523524133942435e-06,
      "loss": 2.8761,
      "step": 217651
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.678374767303467,
      "learning_rate": 4.5228164886093706e-06,
      "loss": 2.7025,
      "step": 217652
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6735942363739014,
      "learning_rate": 4.522108898211097e-06,
      "loss": 2.7303,
      "step": 217653
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.225375175476074,
      "learning_rate": 4.5214013627477455e-06,
      "loss": 2.9555,
      "step": 217654
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2077555656433105,
      "learning_rate": 4.52069388221945e-06,
      "loss": 2.8515,
      "step": 217655
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.839489459991455,
      "learning_rate": 4.519986456626345e-06,
      "loss": 3.0726,
      "step": 217656
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.264003038406372,
      "learning_rate": 4.519279085968564e-06,
      "loss": 2.9818,
      "step": 217657
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9645156860351562,
      "learning_rate": 4.518571770246171e-06,
      "loss": 3.1163,
      "step": 217658
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.779313802719116,
      "learning_rate": 4.517864509459401e-06,
      "loss": 3.0482,
      "step": 217659
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8635547161102295,
      "learning_rate": 4.517157303608321e-06,
      "loss": 3.0277,
      "step": 217660
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.883700370788574,
      "learning_rate": 4.516450152693063e-06,
      "loss": 2.8216,
      "step": 217661
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.539287567138672,
      "learning_rate": 4.515743056713794e-06,
      "loss": 2.9147,
      "step": 217662
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9513392448425293,
      "learning_rate": 4.5150360156706145e-06,
      "loss": 2.8743,
      "step": 217663
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.1910266876220703,
      "learning_rate": 4.5143290295636906e-06,
      "loss": 2.8328,
      "step": 217664
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.5157053470611572,
      "learning_rate": 4.513622098393121e-06,
      "loss": 2.9985,
      "step": 217665
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.393196105957031,
      "learning_rate": 4.512915222159041e-06,
      "loss": 2.9545,
      "step": 217666
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.835407018661499,
      "learning_rate": 4.512208400861617e-06,
      "loss": 3.2226,
      "step": 217667
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.059570550918579,
      "learning_rate": 4.51150163450088e-06,
      "loss": 3.1007,
      "step": 217668
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.781982421875,
      "learning_rate": 4.510794923077099e-06,
      "loss": 2.9948,
      "step": 217669
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2209389209747314,
      "learning_rate": 4.5100882665903394e-06,
      "loss": 2.8591,
      "step": 217670
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.481196403503418,
      "learning_rate": 4.509381665040701e-06,
      "loss": 2.6412,
      "step": 217671
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.183252334594727,
      "learning_rate": 4.508675118428351e-06,
      "loss": 2.8221,
      "step": 217672
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7147865295410156,
      "learning_rate": 4.5079686267534225e-06,
      "loss": 3.0262,
      "step": 217673
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7404415607452393,
      "learning_rate": 4.507262190016014e-06,
      "loss": 2.8512,
      "step": 217674
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.082714796066284,
      "learning_rate": 4.506555808216328e-06,
      "loss": 2.9886,
      "step": 217675
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9169373512268066,
      "learning_rate": 4.505849481354429e-06,
      "loss": 2.9589,
      "step": 217676
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.7959978580474854,
      "learning_rate": 4.505143209430484e-06,
      "loss": 2.8553,
      "step": 217677
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2090277671813965,
      "learning_rate": 4.504436992444593e-06,
      "loss": 3.1515,
      "step": 217678
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.827038049697876,
      "learning_rate": 4.503730830396923e-06,
      "loss": 2.7355,
      "step": 217679
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0701937675476074,
      "learning_rate": 4.5030247232875405e-06,
      "loss": 3.203,
      "step": 217680
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.961066484451294,
      "learning_rate": 4.5023186711166785e-06,
      "loss": 2.8793,
      "step": 217681
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.85844087600708,
      "learning_rate": 4.501612673884403e-06,
      "loss": 2.741,
      "step": 217682
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.978257179260254,
      "learning_rate": 4.500906731590847e-06,
      "loss": 3.1048,
      "step": 217683
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7977499961853027,
      "learning_rate": 4.500200844236146e-06,
      "loss": 2.9113,
      "step": 217684
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.00423002243042,
      "learning_rate": 4.4994950118204646e-06,
      "loss": 2.9276,
      "step": 217685
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9710845947265625,
      "learning_rate": 4.498789234343836e-06,
      "loss": 3.1467,
      "step": 217686
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7719218730926514,
      "learning_rate": 4.498083511806527e-06,
      "loss": 2.8973,
      "step": 217687
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.4705514907836914,
      "learning_rate": 4.497377844208572e-06,
      "loss": 2.8216,
      "step": 217688
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2053134441375732,
      "learning_rate": 4.496672231550169e-06,
      "loss": 3.0108,
      "step": 217689
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9163498878479004,
      "learning_rate": 4.495966673831353e-06,
      "loss": 3.0287,
      "step": 217690
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.8772377967834473,
      "learning_rate": 4.495261171052356e-06,
      "loss": 2.9827,
      "step": 217691
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.789971113204956,
      "learning_rate": 4.4945557232132445e-06,
      "loss": 3.0473,
      "step": 217692
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6266019344329834,
      "learning_rate": 4.493850330314186e-06,
      "loss": 2.9623,
      "step": 217693
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.7387866973876953,
      "learning_rate": 4.493144992355313e-06,
      "loss": 2.692,
      "step": 217694
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3062684535980225,
      "learning_rate": 4.4924397093367595e-06,
      "loss": 3.2121,
      "step": 217695
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.0614848136901855,
      "learning_rate": 4.491734481258591e-06,
      "loss": 2.7754,
      "step": 217696
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.77065372467041,
      "learning_rate": 4.491029308121008e-06,
      "loss": 3.1295,
      "step": 217697
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.5716795921325684,
      "learning_rate": 4.490324189924111e-06,
      "loss": 2.8216,
      "step": 217698
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.6821327209472656,
      "learning_rate": 4.4896191266680315e-06,
      "loss": 2.8106,
      "step": 217699
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.4424924850463867,
      "learning_rate": 4.4889141183529375e-06,
      "loss": 2.8262,
      "step": 217700
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.900681972503662,
      "learning_rate": 4.488209164978929e-06,
      "loss": 2.8503,
      "step": 217701
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9255099296569824,
      "learning_rate": 4.487504266546138e-06,
      "loss": 3.0668,
      "step": 217702
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9191324710845947,
      "learning_rate": 4.486799423054699e-06,
      "loss": 2.896,
      "step": 217703
    },
    {
      "epoch": 2.83,
      "grad_norm": 4.219748020172119,
      "learning_rate": 4.486094634504711e-06,
      "loss": 2.8009,
      "step": 217704
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.841433525085449,
      "learning_rate": 4.485389900896375e-06,
      "loss": 2.9777,
      "step": 217705
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0930604934692383,
      "learning_rate": 4.4846852222297894e-06,
      "loss": 2.957,
      "step": 217706
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.3871195316314697,
      "learning_rate": 4.483980598505055e-06,
      "loss": 3.0636,
      "step": 217707
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.199862241744995,
      "learning_rate": 4.483276029722305e-06,
      "loss": 3.0891,
      "step": 217708
    },
    {
      "epoch": 2.83,
      "grad_norm": 5.052175521850586,
      "learning_rate": 4.482571515881739e-06,
      "loss": 2.7487,
      "step": 217709
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.463740348815918,
      "learning_rate": 4.481867056983424e-06,
      "loss": 2.7624,
      "step": 217710
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.10408353805542,
      "learning_rate": 4.4811626530274925e-06,
      "loss": 2.9922,
      "step": 217711
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.0351040363311768,
      "learning_rate": 4.480458304014112e-06,
      "loss": 2.8117,
      "step": 217712
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.071377754211426,
      "learning_rate": 4.4797540099434145e-06,
      "loss": 2.8785,
      "step": 217713
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.7294676303863525,
      "learning_rate": 4.479049770815435e-06,
      "loss": 3.0473,
      "step": 217714
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.595381259918213,
      "learning_rate": 4.478345586630438e-06,
      "loss": 2.6557,
      "step": 217715
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7613918781280518,
      "learning_rate": 4.477641457388459e-06,
      "loss": 2.9381,
      "step": 217716
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.2143771648406982,
      "learning_rate": 4.476937383089696e-06,
      "loss": 3.0294,
      "step": 217717
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.092243194580078,
      "learning_rate": 4.476233363734249e-06,
      "loss": 3.123,
      "step": 217718
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.4293644428253174,
      "learning_rate": 4.475529399322253e-06,
      "loss": 2.9455,
      "step": 217719
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.821504831314087,
      "learning_rate": 4.474825489853806e-06,
      "loss": 2.6326,
      "step": 217720
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.418524742126465,
      "learning_rate": 4.474121635329109e-06,
      "loss": 3.2499,
      "step": 217721
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.7420437335968018,
      "learning_rate": 4.473417835748194e-06,
      "loss": 2.842,
      "step": 217722
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.9287281036376953,
      "learning_rate": 4.472714091111296e-06,
      "loss": 2.7423,
      "step": 217723
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.131366014480591,
      "learning_rate": 4.47201040141848e-06,
      "loss": 2.657,
      "step": 217724
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.957504987716675,
      "learning_rate": 4.4713067666698794e-06,
      "loss": 2.701,
      "step": 217725
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.480440378189087,
      "learning_rate": 4.470603186865662e-06,
      "loss": 3.1521,
      "step": 217726
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.386488199234009,
      "learning_rate": 4.469899662005961e-06,
      "loss": 3.1432,
      "step": 217727
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.903123617172241,
      "learning_rate": 4.46919619209084e-06,
      "loss": 2.9044,
      "step": 217728
    },
    {
      "epoch": 2.83,
      "grad_norm": 3.840449094772339,
      "learning_rate": 4.468492777120502e-06,
      "loss": 2.7195,
      "step": 217729
    },
    {
      "epoch": 2.83,
      "grad_norm": 2.287923574447632,
      "learning_rate": 4.467789417095013e-06,
      "loss": 2.628,
      "step": 217730
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8958873748779297,
      "learning_rate": 4.467086112014573e-06,
      "loss": 2.8839,
      "step": 217731
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7493200302124023,
      "learning_rate": 4.466382861879281e-06,
      "loss": 2.9195,
      "step": 217732
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.371676206588745,
      "learning_rate": 4.4656796666892705e-06,
      "loss": 2.7921,
      "step": 217733
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.741048574447632,
      "learning_rate": 4.464976526444642e-06,
      "loss": 2.9585,
      "step": 217734
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.358222961425781,
      "learning_rate": 4.464273441145593e-06,
      "loss": 3.1377,
      "step": 217735
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8378307819366455,
      "learning_rate": 4.463570410792161e-06,
      "loss": 2.8766,
      "step": 217736
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.789776086807251,
      "learning_rate": 4.4628674353845765e-06,
      "loss": 2.9456,
      "step": 217737
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.02348518371582,
      "learning_rate": 4.462164514922939e-06,
      "loss": 2.7641,
      "step": 217738
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1156585216522217,
      "learning_rate": 4.461461649407317e-06,
      "loss": 2.9265,
      "step": 217739
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.129355430603027,
      "learning_rate": 4.4607588388379075e-06,
      "loss": 2.9632,
      "step": 217740
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.79681134223938,
      "learning_rate": 4.460056083214813e-06,
      "loss": 2.8455,
      "step": 217741
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.641206741333008,
      "learning_rate": 4.459353382538166e-06,
      "loss": 2.8041,
      "step": 217742
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2585480213165283,
      "learning_rate": 4.458650736808134e-06,
      "loss": 2.9386,
      "step": 217743
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1356778144836426,
      "learning_rate": 4.457948146024815e-06,
      "loss": 2.6866,
      "step": 217744
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8650147914886475,
      "learning_rate": 4.457245610188309e-06,
      "loss": 3.0161,
      "step": 217745
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8919460773468018,
      "learning_rate": 4.456543129298784e-06,
      "loss": 2.8687,
      "step": 217746
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.185470104217529,
      "learning_rate": 4.455840703356373e-06,
      "loss": 2.7754,
      "step": 217747
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9313716888427734,
      "learning_rate": 4.4551383323612075e-06,
      "loss": 2.8988,
      "step": 217748
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9969146251678467,
      "learning_rate": 4.454436016313423e-06,
      "loss": 2.6893,
      "step": 217749
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.888136386871338,
      "learning_rate": 4.4537337552131516e-06,
      "loss": 2.9871,
      "step": 217750
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.583608388900757,
      "learning_rate": 4.45303154906046e-06,
      "loss": 2.6185,
      "step": 217751
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.810260772705078,
      "learning_rate": 4.452329397855548e-06,
      "loss": 3.0231,
      "step": 217752
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.96124529838562,
      "learning_rate": 4.451627301598515e-06,
      "loss": 2.9423,
      "step": 217753
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2201881408691406,
      "learning_rate": 4.450925260289528e-06,
      "loss": 2.7857,
      "step": 217754
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.5789902210235596,
      "learning_rate": 4.450223273928688e-06,
      "loss": 2.9871,
      "step": 217755
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9029626846313477,
      "learning_rate": 4.44952134251616e-06,
      "loss": 2.9128,
      "step": 217756
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.124418258666992,
      "learning_rate": 4.448819466051978e-06,
      "loss": 2.8783,
      "step": 217757
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9445149898529053,
      "learning_rate": 4.448117644536375e-06,
      "loss": 2.9081,
      "step": 217758
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.166090250015259,
      "learning_rate": 4.447415877969451e-06,
      "loss": 2.9567,
      "step": 217759
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.337902307510376,
      "learning_rate": 4.446714166351306e-06,
      "loss": 3.1137,
      "step": 217760
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.352545738220215,
      "learning_rate": 4.44601250968214e-06,
      "loss": 2.8512,
      "step": 217761
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.919487237930298,
      "learning_rate": 4.445310907962019e-06,
      "loss": 2.9082,
      "step": 217762
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8647847175598145,
      "learning_rate": 4.444609361191076e-06,
      "loss": 3.0271,
      "step": 217763
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.066711902618408,
      "learning_rate": 4.443907869369479e-06,
      "loss": 2.7514,
      "step": 217764
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.9239540100097656,
      "learning_rate": 4.443206432497326e-06,
      "loss": 3.0711,
      "step": 217765
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7582297325134277,
      "learning_rate": 4.442505050574751e-06,
      "loss": 3.2034,
      "step": 217766
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8248422145843506,
      "learning_rate": 4.441803723601922e-06,
      "loss": 3.0401,
      "step": 217767
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.095364809036255,
      "learning_rate": 4.441102451578937e-06,
      "loss": 2.963,
      "step": 217768
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0482194423675537,
      "learning_rate": 4.440401234505897e-06,
      "loss": 2.7136,
      "step": 217769
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.7276408672332764,
      "learning_rate": 4.4397000723830015e-06,
      "loss": 3.0907,
      "step": 217770
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.571225166320801,
      "learning_rate": 4.438998965210316e-06,
      "loss": 2.9316,
      "step": 217771
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.5682928562164307,
      "learning_rate": 4.438297912988009e-06,
      "loss": 3.1701,
      "step": 217772
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.53780460357666,
      "learning_rate": 4.4375969157162125e-06,
      "loss": 2.9725,
      "step": 217773
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8876500129699707,
      "learning_rate": 4.436895973395061e-06,
      "loss": 2.8429,
      "step": 217774
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6670961380004883,
      "learning_rate": 4.436195086024652e-06,
      "loss": 3.1755,
      "step": 217775
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.764111042022705,
      "learning_rate": 4.435494253605121e-06,
      "loss": 2.8895,
      "step": 217776
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.290555238723755,
      "learning_rate": 4.4347934761366e-06,
      "loss": 3.0692,
      "step": 217777
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1384706497192383,
      "learning_rate": 4.434092753619256e-06,
      "loss": 2.7891,
      "step": 217778
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8883979320526123,
      "learning_rate": 4.4333920860532225e-06,
      "loss": 3.0657,
      "step": 217779
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.57038950920105,
      "learning_rate": 4.4326914734385655e-06,
      "loss": 2.8563,
      "step": 217780
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.849335193634033,
      "learning_rate": 4.431990915775418e-06,
      "loss": 3.0874,
      "step": 217781
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1382267475128174,
      "learning_rate": 4.431290413064015e-06,
      "loss": 2.9623,
      "step": 217782
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7796480655670166,
      "learning_rate": 4.4305899653043545e-06,
      "loss": 3.1116,
      "step": 217783
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9763107299804688,
      "learning_rate": 4.429889572496636e-06,
      "loss": 2.8228,
      "step": 217784
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.111694812774658,
      "learning_rate": 4.429189234640995e-06,
      "loss": 2.9294,
      "step": 217785
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.6891205310821533,
      "learning_rate": 4.4284889517375636e-06,
      "loss": 2.926,
      "step": 217786
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.744206428527832,
      "learning_rate": 4.4277887237864075e-06,
      "loss": 2.9101,
      "step": 217787
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.822730541229248,
      "learning_rate": 4.427088550787761e-06,
      "loss": 2.7733,
      "step": 217788
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.4792346954345703,
      "learning_rate": 4.426388432741656e-06,
      "loss": 3.0853,
      "step": 217789
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.750098705291748,
      "learning_rate": 4.425688369648295e-06,
      "loss": 3.0852,
      "step": 217790
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.145772695541382,
      "learning_rate": 4.424988361507742e-06,
      "loss": 2.6202,
      "step": 217791
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9312572479248047,
      "learning_rate": 4.4242884083201975e-06,
      "loss": 2.691,
      "step": 217792
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8934519290924072,
      "learning_rate": 4.423588510085729e-06,
      "loss": 2.8699,
      "step": 217793
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7175793647766113,
      "learning_rate": 4.422888666804536e-06,
      "loss": 2.9864,
      "step": 217794
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8028202056884766,
      "learning_rate": 4.422188878476651e-06,
      "loss": 3.0034,
      "step": 217795
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.514803171157837,
      "learning_rate": 4.421489145102308e-06,
      "loss": 3.0166,
      "step": 217796
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.362825632095337,
      "learning_rate": 4.42078946668154e-06,
      "loss": 2.7047,
      "step": 217797
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9540035724639893,
      "learning_rate": 4.4200898432146135e-06,
      "loss": 3.09,
      "step": 217798
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9362311363220215,
      "learning_rate": 4.419390274701495e-06,
      "loss": 2.9935,
      "step": 217799
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8273274898529053,
      "learning_rate": 4.418690761142418e-06,
      "loss": 2.7694,
      "step": 217800
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.177642583847046,
      "learning_rate": 4.417991302537449e-06,
      "loss": 2.6851,
      "step": 217801
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.5834717750549316,
      "learning_rate": 4.417291898886821e-06,
      "loss": 3.0457,
      "step": 217802
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9884884357452393,
      "learning_rate": 4.416592550190534e-06,
      "loss": 2.8018,
      "step": 217803
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.695584774017334,
      "learning_rate": 4.415893256448855e-06,
      "loss": 2.9551,
      "step": 217804
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.436805486679077,
      "learning_rate": 4.415194017661783e-06,
      "loss": 2.9678,
      "step": 217805
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.109053611755371,
      "learning_rate": 4.414494833829518e-06,
      "loss": 3.027,
      "step": 217806
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.84499454498291,
      "learning_rate": 4.413795704952161e-06,
      "loss": 2.9232,
      "step": 217807
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8711438179016113,
      "learning_rate": 4.413096631029878e-06,
      "loss": 2.9978,
      "step": 217808
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0638344287872314,
      "learning_rate": 4.412397612062768e-06,
      "loss": 2.9046,
      "step": 217809
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.600135087966919,
      "learning_rate": 4.411698648050999e-06,
      "loss": 2.9806,
      "step": 217810
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9911999702453613,
      "learning_rate": 4.410999738994669e-06,
      "loss": 2.8205,
      "step": 217811
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6352627277374268,
      "learning_rate": 4.410300884893914e-06,
      "loss": 3.0051,
      "step": 217812
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7957847118377686,
      "learning_rate": 4.409602085748831e-06,
      "loss": 2.8868,
      "step": 217813
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.5474390983581543,
      "learning_rate": 4.408903341559622e-06,
      "loss": 3.0619,
      "step": 217814
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8149571418762207,
      "learning_rate": 4.408204652326352e-06,
      "loss": 3.2003,
      "step": 217815
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6945483684539795,
      "learning_rate": 4.407506018049189e-06,
      "loss": 2.974,
      "step": 217816
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.902097225189209,
      "learning_rate": 4.406807438728266e-06,
      "loss": 2.9672,
      "step": 217817
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3645241260528564,
      "learning_rate": 4.406108914363682e-06,
      "loss": 2.7643,
      "step": 217818
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.292954206466675,
      "learning_rate": 4.4054104449555705e-06,
      "loss": 2.5998,
      "step": 217819
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.267381191253662,
      "learning_rate": 4.404712030504098e-06,
      "loss": 3.0696,
      "step": 217820
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.132916212081909,
      "learning_rate": 4.4040136710093324e-06,
      "loss": 3.1516,
      "step": 217821
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0534849166870117,
      "learning_rate": 4.403315366471471e-06,
      "loss": 2.7785,
      "step": 217822
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2653861045837402,
      "learning_rate": 4.402617116890617e-06,
      "loss": 2.9956,
      "step": 217823
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.974600315093994,
      "learning_rate": 4.401918922266867e-06,
      "loss": 2.9661,
      "step": 217824
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.987339735031128,
      "learning_rate": 4.40122078260039e-06,
      "loss": 2.9768,
      "step": 217825
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.990999698638916,
      "learning_rate": 4.400522697891318e-06,
      "loss": 3.0785,
      "step": 217826
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2174603939056396,
      "learning_rate": 4.399824668139751e-06,
      "loss": 2.7699,
      "step": 217827
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.802276611328125,
      "learning_rate": 4.3991266933458555e-06,
      "loss": 3.0995,
      "step": 217828
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.9022178649902344,
      "learning_rate": 4.398428773509766e-06,
      "loss": 3.2106,
      "step": 217829
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.968418836593628,
      "learning_rate": 4.397730908631547e-06,
      "loss": 2.8021,
      "step": 217830
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.97455096244812,
      "learning_rate": 4.397033098711367e-06,
      "loss": 2.7973,
      "step": 217831
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.122039794921875,
      "learning_rate": 4.396335343749391e-06,
      "loss": 2.8757,
      "step": 217832
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0621979236602783,
      "learning_rate": 4.3956376437456865e-06,
      "loss": 2.6398,
      "step": 217833
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1826868057250977,
      "learning_rate": 4.39493999870042e-06,
      "loss": 2.9027,
      "step": 217834
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3891518115997314,
      "learning_rate": 4.394242408613724e-06,
      "loss": 2.868,
      "step": 217835
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3624868392944336,
      "learning_rate": 4.3935448734857e-06,
      "loss": 3.0524,
      "step": 217836
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0805413722991943,
      "learning_rate": 4.392847393316512e-06,
      "loss": 2.878,
      "step": 217837
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8067786693573,
      "learning_rate": 4.392149968106262e-06,
      "loss": 2.8211,
      "step": 217838
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8000552654266357,
      "learning_rate": 4.391452597855116e-06,
      "loss": 2.7937,
      "step": 217839
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.051274299621582,
      "learning_rate": 4.390755282563174e-06,
      "loss": 3.0639,
      "step": 217840
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7481744289398193,
      "learning_rate": 4.390058022230569e-06,
      "loss": 2.5581,
      "step": 217841
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.970285654067993,
      "learning_rate": 4.3893608168574015e-06,
      "loss": 2.8927,
      "step": 217842
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.437464952468872,
      "learning_rate": 4.38866366644387e-06,
      "loss": 2.664,
      "step": 217843
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9930715560913086,
      "learning_rate": 4.387966570990009e-06,
      "loss": 3.0193,
      "step": 217844
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3227059841156006,
      "learning_rate": 4.387269530496085e-06,
      "loss": 2.8525,
      "step": 217845
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.217242479324341,
      "learning_rate": 4.386572544962097e-06,
      "loss": 3.1705,
      "step": 217846
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9471938610076904,
      "learning_rate": 4.385875614388246e-06,
      "loss": 2.6949,
      "step": 217847
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.851564407348633,
      "learning_rate": 4.385178738774631e-06,
      "loss": 2.875,
      "step": 217848
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.596890449523926,
      "learning_rate": 4.384481918121385e-06,
      "loss": 2.8671,
      "step": 217849
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.864666223526001,
      "learning_rate": 4.3837851524286425e-06,
      "loss": 3.0847,
      "step": 217850
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8017830848693848,
      "learning_rate": 4.383088441696536e-06,
      "loss": 2.9321,
      "step": 217851
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.607110023498535,
      "learning_rate": 4.382391785925199e-06,
      "loss": 3.0802,
      "step": 217852
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.340484142303467,
      "learning_rate": 4.381695185114764e-06,
      "loss": 3.0155,
      "step": 217853
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.508976697921753,
      "learning_rate": 4.380998639265332e-06,
      "loss": 2.6396,
      "step": 217854
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.880431652069092,
      "learning_rate": 4.380302148377102e-06,
      "loss": 2.7094,
      "step": 217855
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8340883255004883,
      "learning_rate": 4.379605712450074e-06,
      "loss": 2.8472,
      "step": 217856
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.210341930389404,
      "learning_rate": 4.3789093314845145e-06,
      "loss": 2.751,
      "step": 217857
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.188415050506592,
      "learning_rate": 4.378213005480491e-06,
      "loss": 2.9734,
      "step": 217858
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.5082221031188965,
      "learning_rate": 4.377516734438135e-06,
      "loss": 2.8266,
      "step": 217859
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0980398654937744,
      "learning_rate": 4.376820518357582e-06,
      "loss": 2.9537,
      "step": 217860
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.064154624938965,
      "learning_rate": 4.376124357238931e-06,
      "loss": 3.1412,
      "step": 217861
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.570394992828369,
      "learning_rate": 4.375428251082347e-06,
      "loss": 2.9118,
      "step": 217862
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.035487413406372,
      "learning_rate": 4.374732199887964e-06,
      "loss": 2.989,
      "step": 217863
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0246171951293945,
      "learning_rate": 4.374036203655884e-06,
      "loss": 2.8619,
      "step": 217864
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.215824604034424,
      "learning_rate": 4.3733402623863045e-06,
      "loss": 3.0137,
      "step": 217865
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.19966459274292,
      "learning_rate": 4.37264437607926e-06,
      "loss": 2.6882,
      "step": 217866
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.171405553817749,
      "learning_rate": 4.371948544734916e-06,
      "loss": 2.8604,
      "step": 217867
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.513641357421875,
      "learning_rate": 4.371252768353406e-06,
      "loss": 2.9563,
      "step": 217868
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.736140251159668,
      "learning_rate": 4.370557046934864e-06,
      "loss": 2.7777,
      "step": 217869
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.094874143600464,
      "learning_rate": 4.369861380479423e-06,
      "loss": 2.8281,
      "step": 217870
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8394503593444824,
      "learning_rate": 4.369165768987215e-06,
      "loss": 2.7431,
      "step": 217871
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.323380708694458,
      "learning_rate": 4.368470212458341e-06,
      "loss": 2.5778,
      "step": 217872
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.827284574508667,
      "learning_rate": 4.367774710892968e-06,
      "loss": 2.9003,
      "step": 217873
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9627304077148438,
      "learning_rate": 4.367079264291162e-06,
      "loss": 2.4941,
      "step": 217874
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8257908821105957,
      "learning_rate": 4.366383872653123e-06,
      "loss": 2.8575,
      "step": 217875
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.960097312927246,
      "learning_rate": 4.365688535978951e-06,
      "loss": 2.881,
      "step": 217876
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.238508701324463,
      "learning_rate": 4.364993254268812e-06,
      "loss": 2.935,
      "step": 217877
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9178500175476074,
      "learning_rate": 4.364298027522772e-06,
      "loss": 2.8343,
      "step": 217878
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.817735195159912,
      "learning_rate": 4.3636028557409996e-06,
      "loss": 2.7557,
      "step": 217879
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3281679153442383,
      "learning_rate": 4.362907738923593e-06,
      "loss": 2.9413,
      "step": 217880
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2876675128936768,
      "learning_rate": 4.36221267707072e-06,
      "loss": 2.7622,
      "step": 217881
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.750927448272705,
      "learning_rate": 4.361517670182479e-06,
      "loss": 3.0324,
      "step": 217882
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1946728229522705,
      "learning_rate": 4.360822718259038e-06,
      "loss": 2.9527,
      "step": 217883
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0373852252960205,
      "learning_rate": 4.360127821300463e-06,
      "loss": 3.2112,
      "step": 217884
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.823201894760132,
      "learning_rate": 4.359432979306954e-06,
      "loss": 2.9728,
      "step": 217885
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.748990535736084,
      "learning_rate": 4.358738192278577e-06,
      "loss": 3.0922,
      "step": 217886
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6143369674682617,
      "learning_rate": 4.358043460215532e-06,
      "loss": 3.0132,
      "step": 217887
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7637221813201904,
      "learning_rate": 4.357348783117853e-06,
      "loss": 2.9564,
      "step": 217888
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2385284900665283,
      "learning_rate": 4.3566541609858065e-06,
      "loss": 3.0168,
      "step": 217889
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3304734230041504,
      "learning_rate": 4.3559595938193584e-06,
      "loss": 3.042,
      "step": 217890
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3131721019744873,
      "learning_rate": 4.355265081618742e-06,
      "loss": 2.9418,
      "step": 217891
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.330852508544922,
      "learning_rate": 4.354570624384057e-06,
      "loss": 2.7937,
      "step": 217892
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0696167945861816,
      "learning_rate": 4.353876222115471e-06,
      "loss": 2.8735,
      "step": 217893
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3949170112609863,
      "learning_rate": 4.35318187481305e-06,
      "loss": 3.0001,
      "step": 217894
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.738132953643799,
      "learning_rate": 4.35248758247696e-06,
      "loss": 3.0412,
      "step": 217895
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.333777904510498,
      "learning_rate": 4.351793345107335e-06,
      "loss": 3.0624,
      "step": 217896
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.612783908843994,
      "learning_rate": 4.351099162704275e-06,
      "loss": 2.7782,
      "step": 217897
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.918105363845825,
      "learning_rate": 4.350405035267912e-06,
      "loss": 3.1768,
      "step": 217898
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.7372443675994873,
      "learning_rate": 4.349710962798414e-06,
      "loss": 2.8841,
      "step": 217899
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7146127223968506,
      "learning_rate": 4.34901694529588e-06,
      "loss": 2.9458,
      "step": 217900
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6893508434295654,
      "learning_rate": 4.348322982760443e-06,
      "loss": 2.722,
      "step": 217901
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0186784267425537,
      "learning_rate": 4.347629075192272e-06,
      "loss": 2.8937,
      "step": 217902
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.816521644592285,
      "learning_rate": 4.34693522259143e-06,
      "loss": 3.0683,
      "step": 217903
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4845430850982666,
      "learning_rate": 4.346241424958052e-06,
      "loss": 3.1155,
      "step": 217904
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.208190679550171,
      "learning_rate": 4.345547682292305e-06,
      "loss": 2.9439,
      "step": 217905
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0498857498168945,
      "learning_rate": 4.344853994594289e-06,
      "loss": 2.7036,
      "step": 217906
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9798128604888916,
      "learning_rate": 4.344160361864135e-06,
      "loss": 2.8041,
      "step": 217907
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.804234743118286,
      "learning_rate": 4.343466784102012e-06,
      "loss": 2.8033,
      "step": 217908
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3852903842926025,
      "learning_rate": 4.342773261308019e-06,
      "loss": 2.9081,
      "step": 217909
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1876633167266846,
      "learning_rate": 4.342079793482289e-06,
      "loss": 2.7833,
      "step": 217910
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.888477087020874,
      "learning_rate": 4.341386380624923e-06,
      "loss": 3.0256,
      "step": 217911
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.237637042999268,
      "learning_rate": 4.340693022736086e-06,
      "loss": 3.0386,
      "step": 217912
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.270439863204956,
      "learning_rate": 4.3399997198159116e-06,
      "loss": 3.065,
      "step": 217913
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1112148761749268,
      "learning_rate": 4.339306471864501e-06,
      "loss": 3.0779,
      "step": 217914
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7402169704437256,
      "learning_rate": 4.338613278881953e-06,
      "loss": 2.9494,
      "step": 217915
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0052404403686523,
      "learning_rate": 4.3379201408685e-06,
      "loss": 3.0921,
      "step": 217916
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9152848720550537,
      "learning_rate": 4.337227057824177e-06,
      "loss": 2.8534,
      "step": 217917
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1598081588745117,
      "learning_rate": 4.33653402974915e-06,
      "loss": 2.9472,
      "step": 217918
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9702837467193604,
      "learning_rate": 4.335841056643552e-06,
      "loss": 2.9185,
      "step": 217919
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.229133605957031,
      "learning_rate": 4.335148138507516e-06,
      "loss": 3.0631,
      "step": 217920
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.606882333755493,
      "learning_rate": 4.334455275341109e-06,
      "loss": 2.9396,
      "step": 217921
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9558608531951904,
      "learning_rate": 4.33376246714453e-06,
      "loss": 3.0766,
      "step": 217922
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.346170663833618,
      "learning_rate": 4.33306971391788e-06,
      "loss": 2.9345,
      "step": 217923
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.77670955657959,
      "learning_rate": 4.332377015661326e-06,
      "loss": 2.8129,
      "step": 217924
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.252434730529785,
      "learning_rate": 4.331684372374933e-06,
      "loss": 2.9028,
      "step": 217925
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.395644187927246,
      "learning_rate": 4.330991784058868e-06,
      "loss": 3.0863,
      "step": 217926
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.525035858154297,
      "learning_rate": 4.330299250713265e-06,
      "loss": 2.9576,
      "step": 217927
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9086344242095947,
      "learning_rate": 4.329606772338223e-06,
      "loss": 2.9402,
      "step": 217928
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8478622436523438,
      "learning_rate": 4.328914348933876e-06,
      "loss": 2.7465,
      "step": 217929
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.786079168319702,
      "learning_rate": 4.328221980500391e-06,
      "loss": 3.0403,
      "step": 217930
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3692848682403564,
      "learning_rate": 4.3275296670378656e-06,
      "loss": 2.902,
      "step": 217931
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.808866024017334,
      "learning_rate": 4.326837408546468e-06,
      "loss": 3.0881,
      "step": 217932
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.903031826019287,
      "learning_rate": 4.326145205026232e-06,
      "loss": 2.929,
      "step": 217933
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8112707138061523,
      "learning_rate": 4.325453056477357e-06,
      "loss": 3.1044,
      "step": 217934
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.878351926803589,
      "learning_rate": 4.324760962899976e-06,
      "loss": 3.0807,
      "step": 217935
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8872621059417725,
      "learning_rate": 4.324068924294221e-06,
      "loss": 2.7162,
      "step": 217936
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0576729774475098,
      "learning_rate": 4.323376940660161e-06,
      "loss": 2.898,
      "step": 217937
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.152287244796753,
      "learning_rate": 4.3226850119980285e-06,
      "loss": 2.9225,
      "step": 217938
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4207406044006348,
      "learning_rate": 4.3219931383078225e-06,
      "loss": 2.9824,
      "step": 217939
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0674386024475098,
      "learning_rate": 4.321301319589776e-06,
      "loss": 3.0149,
      "step": 217940
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.079531669616699,
      "learning_rate": 4.320609555843957e-06,
      "loss": 2.9958,
      "step": 217941
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0588200092315674,
      "learning_rate": 4.319917847070565e-06,
      "loss": 2.9649,
      "step": 217942
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.153470993041992,
      "learning_rate": 4.3192261932696315e-06,
      "loss": 2.7619,
      "step": 217943
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0312976837158203,
      "learning_rate": 4.3185345944413916e-06,
      "loss": 2.7907,
      "step": 217944
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3170411586761475,
      "learning_rate": 4.317843050585845e-06,
      "loss": 2.9317,
      "step": 217945
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.448837995529175,
      "learning_rate": 4.317151561703258e-06,
      "loss": 3.1051,
      "step": 217946
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4089577198028564,
      "learning_rate": 4.316460127793631e-06,
      "loss": 2.7663,
      "step": 217947
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.734714984893799,
      "learning_rate": 4.315768748857196e-06,
      "loss": 2.8751,
      "step": 217948
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2659502029418945,
      "learning_rate": 4.31507742489402e-06,
      "loss": 2.7391,
      "step": 217949
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2943432331085205,
      "learning_rate": 4.314386155904304e-06,
      "loss": 2.9373,
      "step": 217950
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.980790853500366,
      "learning_rate": 4.313694941888079e-06,
      "loss": 2.9123,
      "step": 217951
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3416643142700195,
      "learning_rate": 4.313003782845548e-06,
      "loss": 2.713,
      "step": 217952
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.957921028137207,
      "learning_rate": 4.312312678776775e-06,
      "loss": 2.9531,
      "step": 217953
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3366405963897705,
      "learning_rate": 4.3116216296819606e-06,
      "loss": 2.802,
      "step": 217954
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9940574169158936,
      "learning_rate": 4.310930635561138e-06,
      "loss": 3.1011,
      "step": 217955
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.044396162033081,
      "learning_rate": 4.310239696414575e-06,
      "loss": 2.7851,
      "step": 217956
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3353633880615234,
      "learning_rate": 4.30954881224227e-06,
      "loss": 2.9991,
      "step": 217957
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6878926753997803,
      "learning_rate": 4.308857983044423e-06,
      "loss": 3.025,
      "step": 217958
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.827160358428955,
      "learning_rate": 4.3081672088211e-06,
      "loss": 2.7936,
      "step": 217959
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.003957986831665,
      "learning_rate": 4.307476489572503e-06,
      "loss": 2.8042,
      "step": 217960
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9009459018707275,
      "learning_rate": 4.3067858252987305e-06,
      "loss": 3.0087,
      "step": 217961
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.7751028537750244,
      "learning_rate": 4.306095215999916e-06,
      "loss": 2.9201,
      "step": 217962
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.800997495651245,
      "learning_rate": 4.3054046616761595e-06,
      "loss": 2.725,
      "step": 217963
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.5584187507629395,
      "learning_rate": 4.304714162327627e-06,
      "loss": 2.9712,
      "step": 217964
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.35321044921875,
      "learning_rate": 4.304023717954419e-06,
      "loss": 2.8102,
      "step": 217965
    },
    {
      "epoch": 2.84,
      "grad_norm": 8.039137840270996,
      "learning_rate": 4.303333328556668e-06,
      "loss": 3.0707,
      "step": 217966
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9817512035369873,
      "learning_rate": 4.302642994134509e-06,
      "loss": 2.8632,
      "step": 217967
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.019904613494873,
      "learning_rate": 4.301952714688106e-06,
      "loss": 2.9742,
      "step": 217968
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1884634494781494,
      "learning_rate": 4.301262490217494e-06,
      "loss": 2.7977,
      "step": 217969
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4417290687561035,
      "learning_rate": 4.300572320722906e-06,
      "loss": 2.9986,
      "step": 217970
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7336206436157227,
      "learning_rate": 4.299882206204408e-06,
      "loss": 2.9526,
      "step": 217971
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.949716091156006,
      "learning_rate": 4.299192146662134e-06,
      "loss": 2.6802,
      "step": 217972
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6736013889312744,
      "learning_rate": 4.298502142096216e-06,
      "loss": 2.9759,
      "step": 217973
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1750597953796387,
      "learning_rate": 4.297812192506855e-06,
      "loss": 2.6265,
      "step": 217974
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4439613819122314,
      "learning_rate": 4.297122297894018e-06,
      "loss": 2.932,
      "step": 217975
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.58591890335083,
      "learning_rate": 4.2964324582580036e-06,
      "loss": 2.9666,
      "step": 217976
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8484013080596924,
      "learning_rate": 4.295742673598812e-06,
      "loss": 3.085,
      "step": 217977
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.158600091934204,
      "learning_rate": 4.295052943916644e-06,
      "loss": 3.0508,
      "step": 217978
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.199578285217285,
      "learning_rate": 4.2943632692115985e-06,
      "loss": 2.8322,
      "step": 217979
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4776833057403564,
      "learning_rate": 4.293673649483842e-06,
      "loss": 2.9655,
      "step": 217980
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.903764486312866,
      "learning_rate": 4.292984084733442e-06,
      "loss": 3.0335,
      "step": 217981
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.934152126312256,
      "learning_rate": 4.2922945749605646e-06,
      "loss": 3.0628,
      "step": 217982
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9105679988861084,
      "learning_rate": 4.291605120165309e-06,
      "loss": 2.6901,
      "step": 217983
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9618630409240723,
      "learning_rate": 4.290915720347876e-06,
      "loss": 3.0132,
      "step": 217984
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.322566270828247,
      "learning_rate": 4.290226375508299e-06,
      "loss": 2.7941,
      "step": 217985
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.752540111541748,
      "learning_rate": 4.289537085646777e-06,
      "loss": 3.0941,
      "step": 217986
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8544907569885254,
      "learning_rate": 4.2888478507634105e-06,
      "loss": 3.0555,
      "step": 217987
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.388739585876465,
      "learning_rate": 4.2881586708583325e-06,
      "loss": 2.925,
      "step": 217988
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.3651123046875,
      "learning_rate": 4.287469545931643e-06,
      "loss": 2.8211,
      "step": 217989
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.180518627166748,
      "learning_rate": 4.286780475983509e-06,
      "loss": 2.7976,
      "step": 217990
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.5511655807495117,
      "learning_rate": 4.286091461014063e-06,
      "loss": 2.9225,
      "step": 217991
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8384177684783936,
      "learning_rate": 4.285402501023405e-06,
      "loss": 2.8314,
      "step": 217992
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.7560853958129883,
      "learning_rate": 4.284713596011668e-06,
      "loss": 2.7605,
      "step": 217993
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.192636013031006,
      "learning_rate": 4.2840247459789866e-06,
      "loss": 2.9454,
      "step": 217994
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.776567220687866,
      "learning_rate": 4.283335950925459e-06,
      "loss": 2.8652,
      "step": 217995
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.797508478164673,
      "learning_rate": 4.282647210851287e-06,
      "loss": 2.9984,
      "step": 217996
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2903544902801514,
      "learning_rate": 4.281958525756501e-06,
      "loss": 2.9071,
      "step": 217997
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7220351696014404,
      "learning_rate": 4.2812698956413375e-06,
      "loss": 2.9153,
      "step": 217998
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9396636486053467,
      "learning_rate": 4.280581320505827e-06,
      "loss": 2.929,
      "step": 217999
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.35512375831604,
      "learning_rate": 4.279892800350171e-06,
      "loss": 2.7054,
      "step": 218000
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.328979969024658,
      "learning_rate": 4.279204335174435e-06,
      "loss": 2.8686,
      "step": 218001
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.907893180847168,
      "learning_rate": 4.278515924978821e-06,
      "loss": 3.1603,
      "step": 218002
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.107917785644531,
      "learning_rate": 4.277827569763359e-06,
      "loss": 3.094,
      "step": 218003
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2054755687713623,
      "learning_rate": 4.277139269528251e-06,
      "loss": 2.9286,
      "step": 218004
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.021770000457764,
      "learning_rate": 4.2764510242736305e-06,
      "loss": 2.9863,
      "step": 218005
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.946178913116455,
      "learning_rate": 4.275762833999563e-06,
      "loss": 2.8415,
      "step": 218006
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.8136062622070312,
      "learning_rate": 4.275074698706216e-06,
      "loss": 2.975,
      "step": 218007
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.29254150390625,
      "learning_rate": 4.274386618393722e-06,
      "loss": 3.009,
      "step": 218008
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.713442325592041,
      "learning_rate": 4.2736985930622134e-06,
      "loss": 2.8591,
      "step": 218009
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7574822902679443,
      "learning_rate": 4.2730106227118255e-06,
      "loss": 2.9113,
      "step": 218010
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.076813220977783,
      "learning_rate": 4.272322707342624e-06,
      "loss": 2.9276,
      "step": 218011
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.018082618713379,
      "learning_rate": 4.271634846954808e-06,
      "loss": 2.8419,
      "step": 218012
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8571908473968506,
      "learning_rate": 4.2709470415484444e-06,
      "loss": 3.0792,
      "step": 218013
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.5174412727355957,
      "learning_rate": 4.270259291123734e-06,
      "loss": 2.7332,
      "step": 218014
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8083975315093994,
      "learning_rate": 4.269571595680743e-06,
      "loss": 2.9418,
      "step": 218015
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.42029070854187,
      "learning_rate": 4.268883955219638e-06,
      "loss": 3.053,
      "step": 218016
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.956000566482544,
      "learning_rate": 4.268196369740518e-06,
      "loss": 2.6714,
      "step": 218017
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.695582866668701,
      "learning_rate": 4.267508839243517e-06,
      "loss": 2.6853,
      "step": 218018
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.3879075050354,
      "learning_rate": 4.266821363728768e-06,
      "loss": 2.8197,
      "step": 218019
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4220166206359863,
      "learning_rate": 4.266133943196404e-06,
      "loss": 3.1263,
      "step": 218020
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.7368855476379395,
      "learning_rate": 4.265446577646559e-06,
      "loss": 2.802,
      "step": 218021
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.68178391456604,
      "learning_rate": 4.264759267079298e-06,
      "loss": 2.9702,
      "step": 218022
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.884273052215576,
      "learning_rate": 4.26407201149489e-06,
      "loss": 2.8895,
      "step": 218023
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.173367977142334,
      "learning_rate": 4.2633848108933e-06,
      "loss": 3.0984,
      "step": 218024
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.173182725906372,
      "learning_rate": 4.2626976652747616e-06,
      "loss": 2.9093,
      "step": 218025
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.938513994216919,
      "learning_rate": 4.262010574639374e-06,
      "loss": 3.0144,
      "step": 218026
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7413933277130127,
      "learning_rate": 4.261323538987238e-06,
      "loss": 3.1815,
      "step": 218027
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.4190428256988525,
      "learning_rate": 4.26063655831852e-06,
      "loss": 2.9551,
      "step": 218028
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.965433120727539,
      "learning_rate": 4.259949632633353e-06,
      "loss": 2.9608,
      "step": 218029
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.888794422149658,
      "learning_rate": 4.259262761931803e-06,
      "loss": 3.0051,
      "step": 218030
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.328287124633789,
      "learning_rate": 4.2585759462140714e-06,
      "loss": 3.1,
      "step": 218031
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.578481674194336,
      "learning_rate": 4.257889185480223e-06,
      "loss": 2.795,
      "step": 218032
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.788740873336792,
      "learning_rate": 4.257202479730426e-06,
      "loss": 2.8,
      "step": 218033
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.6139018535614014,
      "learning_rate": 4.256515828964779e-06,
      "loss": 2.9788,
      "step": 218034
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9887614250183105,
      "learning_rate": 4.255829233183483e-06,
      "loss": 2.7554,
      "step": 218035
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7310895919799805,
      "learning_rate": 4.25514269238657e-06,
      "loss": 3.11,
      "step": 218036
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.375269889831543,
      "learning_rate": 4.254456206574241e-06,
      "loss": 2.884,
      "step": 218037
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.53342342376709,
      "learning_rate": 4.253769775746529e-06,
      "loss": 3.1497,
      "step": 218038
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.86287522315979,
      "learning_rate": 4.253083399903701e-06,
      "loss": 2.852,
      "step": 218039
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2573745250701904,
      "learning_rate": 4.252397079045721e-06,
      "loss": 2.9349,
      "step": 218040
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.894979238510132,
      "learning_rate": 4.251710813172893e-06,
      "loss": 2.7897,
      "step": 218041
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7918834686279297,
      "learning_rate": 4.2510246022852135e-06,
      "loss": 2.9243,
      "step": 218042
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1318395137786865,
      "learning_rate": 4.25033844638285e-06,
      "loss": 2.9408,
      "step": 218043
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.455512762069702,
      "learning_rate": 4.249652345465904e-06,
      "loss": 2.7872,
      "step": 218044
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9199252128601074,
      "learning_rate": 4.248966299534573e-06,
      "loss": 2.8786,
      "step": 218045
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.735917806625366,
      "learning_rate": 4.248280308588925e-06,
      "loss": 3.2292,
      "step": 218046
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.052842855453491,
      "learning_rate": 4.247594372629126e-06,
      "loss": 2.9913,
      "step": 218047
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7609333992004395,
      "learning_rate": 4.246908491655243e-06,
      "loss": 2.8908,
      "step": 218048
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6311538219451904,
      "learning_rate": 4.246222665667476e-06,
      "loss": 3.0012,
      "step": 218049
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.152778387069702,
      "learning_rate": 4.24553689466589e-06,
      "loss": 2.5686,
      "step": 218050
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.768859624862671,
      "learning_rate": 4.244851178650655e-06,
      "loss": 3.1014,
      "step": 218051
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2614970207214355,
      "learning_rate": 4.2441655176219e-06,
      "loss": 3.0326,
      "step": 218052
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.597771644592285,
      "learning_rate": 4.243479911579728e-06,
      "loss": 2.8587,
      "step": 218053
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.0486955642700195,
      "learning_rate": 4.242794360524271e-06,
      "loss": 2.8139,
      "step": 218054
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2333719730377197,
      "learning_rate": 4.242108864455663e-06,
      "loss": 2.9698,
      "step": 218055
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.13728928565979,
      "learning_rate": 4.241423423374035e-06,
      "loss": 2.8082,
      "step": 218056
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.854703664779663,
      "learning_rate": 4.24073803727949e-06,
      "loss": 3.0995,
      "step": 218057
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.716015338897705,
      "learning_rate": 4.240052706172192e-06,
      "loss": 2.893,
      "step": 218058
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.22651743888855,
      "learning_rate": 4.239367430052243e-06,
      "loss": 3.0738,
      "step": 218059
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6385657787323,
      "learning_rate": 4.238682208919775e-06,
      "loss": 3.1014,
      "step": 218060
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.633613109588623,
      "learning_rate": 4.237997042774955e-06,
      "loss": 3.0592,
      "step": 218061
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.133124351501465,
      "learning_rate": 4.237311931617815e-06,
      "loss": 2.8026,
      "step": 218062
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7480740547180176,
      "learning_rate": 4.23662687544859e-06,
      "loss": 2.8428,
      "step": 218063
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.137256622314453,
      "learning_rate": 4.235941874267312e-06,
      "loss": 3.0797,
      "step": 218064
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.7626373767852783,
      "learning_rate": 4.235256928074216e-06,
      "loss": 2.6328,
      "step": 218065
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.973466634750366,
      "learning_rate": 4.234572036869333e-06,
      "loss": 2.9493,
      "step": 218066
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7035207748413086,
      "learning_rate": 4.23388720065283e-06,
      "loss": 2.5987,
      "step": 218067
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.5045337677001953,
      "learning_rate": 4.233202419424841e-06,
      "loss": 2.878,
      "step": 218068
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.117428779602051,
      "learning_rate": 4.232517693185467e-06,
      "loss": 2.5746,
      "step": 218069
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1300735473632812,
      "learning_rate": 4.231833021934838e-06,
      "loss": 2.9137,
      "step": 218070
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4005489349365234,
      "learning_rate": 4.231148405673124e-06,
      "loss": 2.8739,
      "step": 218071
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.883497953414917,
      "learning_rate": 4.230463844400422e-06,
      "loss": 3.0877,
      "step": 218072
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.927901029586792,
      "learning_rate": 4.229779338116868e-06,
      "loss": 2.8424,
      "step": 218073
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.4834957122802734,
      "learning_rate": 4.229094886822526e-06,
      "loss": 2.879,
      "step": 218074
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.479642152786255,
      "learning_rate": 4.228410490517631e-06,
      "loss": 2.8563,
      "step": 218075
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.024536609649658,
      "learning_rate": 4.227726149202215e-06,
      "loss": 2.8228,
      "step": 218076
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.867295265197754,
      "learning_rate": 4.227041862876512e-06,
      "loss": 2.6794,
      "step": 218077
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.406421661376953,
      "learning_rate": 4.226357631540555e-06,
      "loss": 2.906,
      "step": 218078
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8759348392486572,
      "learning_rate": 4.225673455194478e-06,
      "loss": 2.8656,
      "step": 218079
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.4544172286987305,
      "learning_rate": 4.224989333838447e-06,
      "loss": 2.6979,
      "step": 218080
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0364882946014404,
      "learning_rate": 4.2243052674725945e-06,
      "loss": 2.7578,
      "step": 218081
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9438912868499756,
      "learning_rate": 4.2236212560969875e-06,
      "loss": 3.1726,
      "step": 218082
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0323987007141113,
      "learning_rate": 4.222937299711793e-06,
      "loss": 2.9832,
      "step": 218083
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1259429454803467,
      "learning_rate": 4.2222533983171765e-06,
      "loss": 2.8822,
      "step": 218084
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.783132314682007,
      "learning_rate": 4.221569551913206e-06,
      "loss": 2.5733,
      "step": 218085
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9122579097747803,
      "learning_rate": 4.220885760500014e-06,
      "loss": 2.9797,
      "step": 218086
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6766490936279297,
      "learning_rate": 4.220202024077768e-06,
      "loss": 3.0111,
      "step": 218087
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.511618137359619,
      "learning_rate": 4.219518342646566e-06,
      "loss": 2.7943,
      "step": 218088
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7895772457122803,
      "learning_rate": 4.2188347162065095e-06,
      "loss": 2.983,
      "step": 218089
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.966517210006714,
      "learning_rate": 4.2181511447577975e-06,
      "loss": 2.8775,
      "step": 218090
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.761761426925659,
      "learning_rate": 4.217467628300497e-06,
      "loss": 3.1139,
      "step": 218091
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.906891345977783,
      "learning_rate": 4.216784166834741e-06,
      "loss": 2.9712,
      "step": 218092
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.042191982269287,
      "learning_rate": 4.216100760360697e-06,
      "loss": 3.0276,
      "step": 218093
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0455925464630127,
      "learning_rate": 4.21541740887843e-06,
      "loss": 2.8565,
      "step": 218094
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.707427740097046,
      "learning_rate": 4.214734112388108e-06,
      "loss": 3.0521,
      "step": 218095
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.143675804138184,
      "learning_rate": 4.214050870889862e-06,
      "loss": 2.8174,
      "step": 218096
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1829285621643066,
      "learning_rate": 4.213367684383795e-06,
      "loss": 2.8007,
      "step": 218097
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9918675422668457,
      "learning_rate": 4.212684552870072e-06,
      "loss": 2.9418,
      "step": 218098
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.7136993408203125,
      "learning_rate": 4.2120014763487585e-06,
      "loss": 2.8697,
      "step": 218099
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7776224613189697,
      "learning_rate": 4.211318454820023e-06,
      "loss": 3.0334,
      "step": 218100
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4504146575927734,
      "learning_rate": 4.210635488283997e-06,
      "loss": 3.0549,
      "step": 218101
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1617579460144043,
      "learning_rate": 4.209952576740816e-06,
      "loss": 2.8896,
      "step": 218102
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0735654830932617,
      "learning_rate": 4.209269720190544e-06,
      "loss": 2.9637,
      "step": 218103
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.507356643676758,
      "learning_rate": 4.208586918633416e-06,
      "loss": 2.5729,
      "step": 218104
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0457284450531006,
      "learning_rate": 4.207904172069432e-06,
      "loss": 2.8842,
      "step": 218105
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.147688627243042,
      "learning_rate": 4.207221480498824e-06,
      "loss": 3.0131,
      "step": 218106
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2290022373199463,
      "learning_rate": 4.206538843921625e-06,
      "loss": 2.8181,
      "step": 218107
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8608155250549316,
      "learning_rate": 4.205856262338103e-06,
      "loss": 2.8204,
      "step": 218108
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.828355073928833,
      "learning_rate": 4.205173735748224e-06,
      "loss": 2.9137,
      "step": 218109
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.823354482650757,
      "learning_rate": 4.204491264152221e-06,
      "loss": 2.8248,
      "step": 218110
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1921825408935547,
      "learning_rate": 4.2038088475501275e-06,
      "loss": 2.785,
      "step": 218111
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.42397403717041,
      "learning_rate": 4.20312648594221e-06,
      "loss": 2.9257,
      "step": 218112
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6636786460876465,
      "learning_rate": 4.2024441793284345e-06,
      "loss": 2.7787,
      "step": 218113
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.950068235397339,
      "learning_rate": 4.201761927709102e-06,
      "loss": 2.6178,
      "step": 218114
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.847041368484497,
      "learning_rate": 4.201079731084178e-06,
      "loss": 2.904,
      "step": 218115
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2574808597564697,
      "learning_rate": 4.200397589453863e-06,
      "loss": 3.012,
      "step": 218116
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.187544107437134,
      "learning_rate": 4.199715502818291e-06,
      "loss": 2.9293,
      "step": 218117
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8765904903411865,
      "learning_rate": 4.199033471177593e-06,
      "loss": 2.8885,
      "step": 218118
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.308244228363037,
      "learning_rate": 4.1983514945318375e-06,
      "loss": 2.7763,
      "step": 218119
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4101388454437256,
      "learning_rate": 4.197669572881224e-06,
      "loss": 2.8833,
      "step": 218120
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.963141441345215,
      "learning_rate": 4.196987706225852e-06,
      "loss": 2.91,
      "step": 218121
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.546673059463501,
      "learning_rate": 4.196305894565821e-06,
      "loss": 2.7466,
      "step": 218122
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1290063858032227,
      "learning_rate": 4.195624137901266e-06,
      "loss": 3.0526,
      "step": 218123
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.351294755935669,
      "learning_rate": 4.194942436232352e-06,
      "loss": 2.9554,
      "step": 218124
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.861396074295044,
      "learning_rate": 4.194260789559179e-06,
      "loss": 2.7571,
      "step": 218125
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.448906421661377,
      "learning_rate": 4.19357919788188e-06,
      "loss": 2.8005,
      "step": 218126
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1167800426483154,
      "learning_rate": 4.192897661200589e-06,
      "loss": 3.1752,
      "step": 218127
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.442983865737915,
      "learning_rate": 4.192216179515407e-06,
      "loss": 2.8929,
      "step": 218128
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0871424674987793,
      "learning_rate": 4.191534752826464e-06,
      "loss": 2.657,
      "step": 218129
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.662879228591919,
      "learning_rate": 4.19085338113393e-06,
      "loss": 3.1193,
      "step": 218130
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.610769033432007,
      "learning_rate": 4.190172064437869e-06,
      "loss": 2.913,
      "step": 218131
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7655797004699707,
      "learning_rate": 4.189490802738449e-06,
      "loss": 2.8264,
      "step": 218132
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9093360900878906,
      "learning_rate": 4.1888095960358025e-06,
      "loss": 3.015,
      "step": 218133
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.070204496383667,
      "learning_rate": 4.18812844433003e-06,
      "loss": 2.9521,
      "step": 218134
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9678120613098145,
      "learning_rate": 4.187447347621265e-06,
      "loss": 2.9135,
      "step": 218135
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8661959171295166,
      "learning_rate": 4.186766305909639e-06,
      "loss": 3.0178,
      "step": 218136
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.7855212688446045,
      "learning_rate": 4.186085319195287e-06,
      "loss": 2.793,
      "step": 218137
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9742352962493896,
      "learning_rate": 4.185404387478308e-06,
      "loss": 3.2009,
      "step": 218138
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0071046352386475,
      "learning_rate": 4.184723510758869e-06,
      "loss": 2.8145,
      "step": 218139
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0782110691070557,
      "learning_rate": 4.184042689037071e-06,
      "loss": 2.8212,
      "step": 218140
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.5166432857513428,
      "learning_rate": 4.183361922313011e-06,
      "loss": 3.0445,
      "step": 218141
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7994070053100586,
      "learning_rate": 4.1826812105868575e-06,
      "loss": 3.0908,
      "step": 218142
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3292391300201416,
      "learning_rate": 4.182000553858745e-06,
      "loss": 3.1879,
      "step": 218143
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7267093658447266,
      "learning_rate": 4.18131995212877e-06,
      "loss": 3.1025,
      "step": 218144
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.690682888031006,
      "learning_rate": 4.180639405397102e-06,
      "loss": 2.9163,
      "step": 218145
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0546514987945557,
      "learning_rate": 4.1799589136638055e-06,
      "loss": 2.9029,
      "step": 218146
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3704617023468018,
      "learning_rate": 4.179278476929049e-06,
      "loss": 2.8564,
      "step": 218147
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.819840669631958,
      "learning_rate": 4.178598095192931e-06,
      "loss": 3.1268,
      "step": 218148
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8069660663604736,
      "learning_rate": 4.1779177684556185e-06,
      "loss": 3.0117,
      "step": 218149
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.831728935241699,
      "learning_rate": 4.177237496717212e-06,
      "loss": 3.0628,
      "step": 218150
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.149643659591675,
      "learning_rate": 4.176557279977843e-06,
      "loss": 2.9044,
      "step": 218151
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.379171371459961,
      "learning_rate": 4.175877118237614e-06,
      "loss": 2.5795,
      "step": 218152
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6775176525115967,
      "learning_rate": 4.17519701149669e-06,
      "loss": 2.899,
      "step": 218153
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3661117553710938,
      "learning_rate": 4.1745169597551695e-06,
      "loss": 2.9396,
      "step": 218154
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.075209140777588,
      "learning_rate": 4.173836963013188e-06,
      "loss": 2.6928,
      "step": 218155
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9244918823242188,
      "learning_rate": 4.173157021270912e-06,
      "loss": 2.7775,
      "step": 218156
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7701871395111084,
      "learning_rate": 4.172477134528407e-06,
      "loss": 2.887,
      "step": 218157
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9918622970581055,
      "learning_rate": 4.17179730278584e-06,
      "loss": 2.9592,
      "step": 218158
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.125657081604004,
      "learning_rate": 4.1711175260432775e-06,
      "loss": 3.0344,
      "step": 218159
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.543729543685913,
      "learning_rate": 4.170437804300919e-06,
      "loss": 3.0209,
      "step": 218160
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0641543865203857,
      "learning_rate": 4.169758137558832e-06,
      "loss": 2.8732,
      "step": 218161
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.103832483291626,
      "learning_rate": 4.169078525817215e-06,
      "loss": 2.9442,
      "step": 218162
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2824149131774902,
      "learning_rate": 4.168398969076137e-06,
      "loss": 3.1497,
      "step": 218163
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.800496816635132,
      "learning_rate": 4.167719467335729e-06,
      "loss": 2.9054,
      "step": 218164
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0278191566467285,
      "learning_rate": 4.167040020596091e-06,
      "loss": 2.8954,
      "step": 218165
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.102980852127075,
      "learning_rate": 4.166360628857457e-06,
      "loss": 2.8273,
      "step": 218166
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.334868907928467,
      "learning_rate": 4.1656812921198265e-06,
      "loss": 2.9218,
      "step": 218167
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1581454277038574,
      "learning_rate": 4.1650020103834e-06,
      "loss": 3.0817,
      "step": 218168
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.510409116744995,
      "learning_rate": 4.164322783648277e-06,
      "loss": 2.9471,
      "step": 218169
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.947190761566162,
      "learning_rate": 4.163643611914591e-06,
      "loss": 2.8433,
      "step": 218170
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0230185985565186,
      "learning_rate": 4.162964495182475e-06,
      "loss": 2.9597,
      "step": 218171
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.662313938140869,
      "learning_rate": 4.162285433452061e-06,
      "loss": 2.8032,
      "step": 218172
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3635616302490234,
      "learning_rate": 4.161606426723418e-06,
      "loss": 3.0491,
      "step": 218173
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.338022232055664,
      "learning_rate": 4.160927474996745e-06,
      "loss": 2.8177,
      "step": 218174
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.902541160583496,
      "learning_rate": 4.160248578272141e-06,
      "loss": 3.0302,
      "step": 218175
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.095108985900879,
      "learning_rate": 4.159569736549739e-06,
      "loss": 2.9723,
      "step": 218176
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4072771072387695,
      "learning_rate": 4.158890949829641e-06,
      "loss": 2.786,
      "step": 218177
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3683505058288574,
      "learning_rate": 4.158212218112011e-06,
      "loss": 2.8811,
      "step": 218178
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6072165966033936,
      "learning_rate": 4.157533541396918e-06,
      "loss": 2.7349,
      "step": 218179
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8906679153442383,
      "learning_rate": 4.156854919684561e-06,
      "loss": 3.0516,
      "step": 218180
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7217369079589844,
      "learning_rate": 4.156176352975038e-06,
      "loss": 2.8568,
      "step": 218181
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2045211791992188,
      "learning_rate": 4.155497841268418e-06,
      "loss": 2.776,
      "step": 218182
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7025606632232666,
      "learning_rate": 4.1548193845649e-06,
      "loss": 3.0314,
      "step": 218183
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0285613536834717,
      "learning_rate": 4.154140982864585e-06,
      "loss": 2.8617,
      "step": 218184
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.009284019470215,
      "learning_rate": 4.153462636167604e-06,
      "loss": 2.8904,
      "step": 218185
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.282299280166626,
      "learning_rate": 4.152784344474091e-06,
      "loss": 2.9375,
      "step": 218186
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.171095371246338,
      "learning_rate": 4.152106107784147e-06,
      "loss": 2.8786,
      "step": 218187
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9050889015197754,
      "learning_rate": 4.151427926097873e-06,
      "loss": 2.833,
      "step": 218188
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.048158884048462,
      "learning_rate": 4.150749799415498e-06,
      "loss": 2.7751,
      "step": 218189
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.6636743545532227,
      "learning_rate": 4.150071727737025e-06,
      "loss": 2.9775,
      "step": 218190
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7251577377319336,
      "learning_rate": 4.149393711062687e-06,
      "loss": 2.726,
      "step": 218191
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.884979486465454,
      "learning_rate": 4.148715749392517e-06,
      "loss": 3.0147,
      "step": 218192
    },
    {
      "epoch": 2.84,
      "grad_norm": 5.638628959655762,
      "learning_rate": 4.148037842726715e-06,
      "loss": 2.9386,
      "step": 218193
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.86089825630188,
      "learning_rate": 4.14735999106538e-06,
      "loss": 3.0447,
      "step": 218194
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7987282276153564,
      "learning_rate": 4.146682194408646e-06,
      "loss": 2.8594,
      "step": 218195
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.485827445983887,
      "learning_rate": 4.146004452756579e-06,
      "loss": 2.8481,
      "step": 218196
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.563547372817993,
      "learning_rate": 4.145326766109414e-06,
      "loss": 2.9239,
      "step": 218197
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8360328674316406,
      "learning_rate": 4.144649134467149e-06,
      "loss": 2.9327,
      "step": 218198
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8210928440093994,
      "learning_rate": 4.1439715578300505e-06,
      "loss": 2.9218,
      "step": 218199
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0360913276672363,
      "learning_rate": 4.143294036198119e-06,
      "loss": 3.0707,
      "step": 218200
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.5865790843963623,
      "learning_rate": 4.142616569571588e-06,
      "loss": 2.9051,
      "step": 218201
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.5992560386657715,
      "learning_rate": 4.141939157950458e-06,
      "loss": 2.8136,
      "step": 218202
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7984390258789062,
      "learning_rate": 4.141261801334994e-06,
      "loss": 2.8279,
      "step": 218203
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.964052438735962,
      "learning_rate": 4.140584499725197e-06,
      "loss": 2.7992,
      "step": 218204
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.98380970954895,
      "learning_rate": 4.139907253121299e-06,
      "loss": 2.9098,
      "step": 218205
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2844936847686768,
      "learning_rate": 4.139230061523369e-06,
      "loss": 2.9742,
      "step": 218206
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9330883026123047,
      "learning_rate": 4.1385529249315046e-06,
      "loss": 3.1432,
      "step": 218207
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2350106239318848,
      "learning_rate": 4.137875843345906e-06,
      "loss": 3.1128,
      "step": 218208
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.586448907852173,
      "learning_rate": 4.13719881676664e-06,
      "loss": 2.9771,
      "step": 218209
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8264219760894775,
      "learning_rate": 4.136521845193841e-06,
      "loss": 2.74,
      "step": 218210
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.927384614944458,
      "learning_rate": 4.135844928627674e-06,
      "loss": 2.9027,
      "step": 218211
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.4988536834716797,
      "learning_rate": 4.13516806706824e-06,
      "loss": 2.8432,
      "step": 218212
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.531405210494995,
      "learning_rate": 4.134491260515671e-06,
      "loss": 2.9356,
      "step": 218213
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.819550037384033,
      "learning_rate": 4.133814508970035e-06,
      "loss": 3.0361,
      "step": 218214
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6494431495666504,
      "learning_rate": 4.133137812431564e-06,
      "loss": 3.1105,
      "step": 218215
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9090170860290527,
      "learning_rate": 4.132461170900292e-06,
      "loss": 2.9791,
      "step": 218216
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8991806507110596,
      "learning_rate": 4.131784584376385e-06,
      "loss": 2.9485,
      "step": 218217
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.71036434173584,
      "learning_rate": 4.1311080528599774e-06,
      "loss": 3.0362,
      "step": 218218
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0275540351867676,
      "learning_rate": 4.130431576351201e-06,
      "loss": 2.7277,
      "step": 218219
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.6673920154571533,
      "learning_rate": 4.1297551548501225e-06,
      "loss": 2.6897,
      "step": 218220
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1440038681030273,
      "learning_rate": 4.12907878835691e-06,
      "loss": 2.9804,
      "step": 218221
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.8499972820281982,
      "learning_rate": 4.128402476871695e-06,
      "loss": 2.877,
      "step": 218222
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1579208374023438,
      "learning_rate": 4.127726220394612e-06,
      "loss": 3.0129,
      "step": 218223
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2155094146728516,
      "learning_rate": 4.127050018925759e-06,
      "loss": 2.8752,
      "step": 218224
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3844690322875977,
      "learning_rate": 4.126373872465272e-06,
      "loss": 2.5939,
      "step": 218225
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.5873420238494873,
      "learning_rate": 4.125697781013282e-06,
      "loss": 2.8162,
      "step": 218226
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.9385859966278076,
      "learning_rate": 4.125021744569889e-06,
      "loss": 2.8839,
      "step": 218227
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.480234146118164,
      "learning_rate": 4.1243457631352616e-06,
      "loss": 3.0928,
      "step": 218228
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8113951683044434,
      "learning_rate": 4.123669836709498e-06,
      "loss": 2.8362,
      "step": 218229
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.840216875076294,
      "learning_rate": 4.122993965292731e-06,
      "loss": 2.8912,
      "step": 218230
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.785710334777832,
      "learning_rate": 4.122318148885096e-06,
      "loss": 2.9539,
      "step": 218231
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.497175931930542,
      "learning_rate": 4.121642387486657e-06,
      "loss": 2.855,
      "step": 218232
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.752884864807129,
      "learning_rate": 4.120966681097648e-06,
      "loss": 2.8725,
      "step": 218233
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.224386692047119,
      "learning_rate": 4.120291029718103e-06,
      "loss": 3.1421,
      "step": 218234
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4900522232055664,
      "learning_rate": 4.119615433348189e-06,
      "loss": 2.7711,
      "step": 218235
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.792732000350952,
      "learning_rate": 4.118939891988038e-06,
      "loss": 2.5582,
      "step": 218236
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8590939044952393,
      "learning_rate": 4.11826440563775e-06,
      "loss": 2.9563,
      "step": 218237
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7308731079101562,
      "learning_rate": 4.117588974297426e-06,
      "loss": 2.7473,
      "step": 218238
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1402149200439453,
      "learning_rate": 4.116913597967298e-06,
      "loss": 2.9863,
      "step": 218239
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.25288987159729,
      "learning_rate": 4.116238276647365e-06,
      "loss": 2.8004,
      "step": 218240
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.688890218734741,
      "learning_rate": 4.11556301033783e-06,
      "loss": 3.0802,
      "step": 218241
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4811089038848877,
      "learning_rate": 4.11488779903879e-06,
      "loss": 2.9863,
      "step": 218242
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8907480239868164,
      "learning_rate": 4.114212642750414e-06,
      "loss": 2.8209,
      "step": 218243
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.698798179626465,
      "learning_rate": 4.1135375414727335e-06,
      "loss": 2.8669,
      "step": 218244
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.905954599380493,
      "learning_rate": 4.1128624952059815e-06,
      "loss": 2.7421,
      "step": 218245
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.979395627975464,
      "learning_rate": 4.1121875039501925e-06,
      "loss": 3.0224,
      "step": 218246
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.354973554611206,
      "learning_rate": 4.111512567705533e-06,
      "loss": 2.974,
      "step": 218247
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.645453691482544,
      "learning_rate": 4.1108376864721686e-06,
      "loss": 3.2419,
      "step": 218248
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.6348624229431152,
      "learning_rate": 4.110162860250166e-06,
      "loss": 2.95,
      "step": 218249
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.122586727142334,
      "learning_rate": 4.109488089039659e-06,
      "loss": 3.1196,
      "step": 218250
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.685884475708008,
      "learning_rate": 4.108813372840813e-06,
      "loss": 2.8921,
      "step": 218251
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0627644062042236,
      "learning_rate": 4.108138711653664e-06,
      "loss": 3.0066,
      "step": 218252
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6411731243133545,
      "learning_rate": 4.107464105478442e-06,
      "loss": 2.9548,
      "step": 218253
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.995913028717041,
      "learning_rate": 4.106789554315248e-06,
      "loss": 3.0729,
      "step": 218254
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.080900192260742,
      "learning_rate": 4.10611505816415e-06,
      "loss": 2.832,
      "step": 218255
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.993563175201416,
      "learning_rate": 4.105440617025313e-06,
      "loss": 2.9866,
      "step": 218256
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0907976627349854,
      "learning_rate": 4.10476623089887e-06,
      "loss": 2.7294,
      "step": 218257
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0648183822631836,
      "learning_rate": 4.104091899784922e-06,
      "loss": 3.063,
      "step": 218258
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0009257793426514,
      "learning_rate": 4.103417623683636e-06,
      "loss": 2.9561,
      "step": 218259
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.705294609069824,
      "learning_rate": 4.1027434025950764e-06,
      "loss": 2.7945,
      "step": 218260
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.621399402618408,
      "learning_rate": 4.102069236519445e-06,
      "loss": 2.9406,
      "step": 218261
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8774094581604004,
      "learning_rate": 4.101395125456808e-06,
      "loss": 2.839,
      "step": 218262
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.581833839416504,
      "learning_rate": 4.100721069407298e-06,
      "loss": 2.7951,
      "step": 218263
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.675957441329956,
      "learning_rate": 4.1000470683710485e-06,
      "loss": 2.9983,
      "step": 218264
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.376115083694458,
      "learning_rate": 4.099373122348193e-06,
      "loss": 2.7784,
      "step": 218265
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.5680625438690186,
      "learning_rate": 4.0986992313388315e-06,
      "loss": 2.7318,
      "step": 218266
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2874176502227783,
      "learning_rate": 4.09802539534313e-06,
      "loss": 2.9041,
      "step": 218267
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.284508228302002,
      "learning_rate": 4.097351614361188e-06,
      "loss": 2.7603,
      "step": 218268
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9455416202545166,
      "learning_rate": 4.0966778883931075e-06,
      "loss": 2.8337,
      "step": 218269
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.068408250808716,
      "learning_rate": 4.096004217439086e-06,
      "loss": 2.7105,
      "step": 218270
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9150173664093018,
      "learning_rate": 4.095330601499158e-06,
      "loss": 3.2371,
      "step": 218271
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8338606357574463,
      "learning_rate": 4.0946570405735234e-06,
      "loss": 2.8708,
      "step": 218272
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1476023197174072,
      "learning_rate": 4.093983534662248e-06,
      "loss": 2.8937,
      "step": 218273
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.5199716091156006,
      "learning_rate": 4.093310083765533e-06,
      "loss": 2.8663,
      "step": 218274
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4134762287139893,
      "learning_rate": 4.092636687883411e-06,
      "loss": 2.9596,
      "step": 218275
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.7930922508239746,
      "learning_rate": 4.091963347016081e-06,
      "loss": 2.7972,
      "step": 218276
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3709752559661865,
      "learning_rate": 4.091290061163644e-06,
      "loss": 2.8984,
      "step": 218277
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.5526185035705566,
      "learning_rate": 4.090616830326199e-06,
      "loss": 2.9182,
      "step": 218278
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1951587200164795,
      "learning_rate": 4.089943654503913e-06,
      "loss": 2.6226,
      "step": 218279
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4542739391326904,
      "learning_rate": 4.089270533696887e-06,
      "loss": 2.8301,
      "step": 218280
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9445319175720215,
      "learning_rate": 4.088597467905252e-06,
      "loss": 2.8631,
      "step": 218281
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6139135360717773,
      "learning_rate": 4.08792445712911e-06,
      "loss": 3.2449,
      "step": 218282
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1763579845428467,
      "learning_rate": 4.087251501368627e-06,
      "loss": 3.0,
      "step": 218283
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1531922817230225,
      "learning_rate": 4.086578600623936e-06,
      "loss": 3.0367,
      "step": 218284
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0017216205596924,
      "learning_rate": 4.0859057548951025e-06,
      "loss": 2.8864,
      "step": 218285
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.363523244857788,
      "learning_rate": 4.085232964182294e-06,
      "loss": 2.9681,
      "step": 218286
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.702152729034424,
      "learning_rate": 4.084560228485612e-06,
      "loss": 2.8121,
      "step": 218287
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.865624189376831,
      "learning_rate": 4.08388754780522e-06,
      "loss": 2.929,
      "step": 218288
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3548057079315186,
      "learning_rate": 4.0832149221412206e-06,
      "loss": 2.7677,
      "step": 218289
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1215288639068604,
      "learning_rate": 4.082542351493712e-06,
      "loss": 2.741,
      "step": 218290
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.737948179244995,
      "learning_rate": 4.081869835862861e-06,
      "loss": 2.9715,
      "step": 218291
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8386178016662598,
      "learning_rate": 4.081197375248801e-06,
      "loss": 2.8166,
      "step": 218292
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.332709550857544,
      "learning_rate": 4.0805249696516e-06,
      "loss": 2.9676,
      "step": 218293
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9954423904418945,
      "learning_rate": 4.079852619071422e-06,
      "loss": 2.8821,
      "step": 218294
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.5850327014923096,
      "learning_rate": 4.0791803235083686e-06,
      "loss": 2.8641,
      "step": 218295
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.129518508911133,
      "learning_rate": 4.078508082962606e-06,
      "loss": 2.8094,
      "step": 218296
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.252725601196289,
      "learning_rate": 4.077835897434234e-06,
      "loss": 3.2417,
      "step": 218297
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.273390054702759,
      "learning_rate": 4.077163766923386e-06,
      "loss": 3.126,
      "step": 218298
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.180619239807129,
      "learning_rate": 4.076491691430128e-06,
      "loss": 2.8719,
      "step": 218299
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.5529844760894775,
      "learning_rate": 4.075819670954694e-06,
      "loss": 2.6862,
      "step": 218300
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.942052125930786,
      "learning_rate": 4.075147705497117e-06,
      "loss": 2.8493,
      "step": 218301
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.755903959274292,
      "learning_rate": 4.074475795057564e-06,
      "loss": 3.0689,
      "step": 218302
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.6804966926574707,
      "learning_rate": 4.0738039396361665e-06,
      "loss": 2.9914,
      "step": 218303
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.038278102874756,
      "learning_rate": 4.073132139233026e-06,
      "loss": 2.9615,
      "step": 218304
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1361472606658936,
      "learning_rate": 4.072460393848276e-06,
      "loss": 2.7577,
      "step": 218305
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1528170108795166,
      "learning_rate": 4.071788703482048e-06,
      "loss": 2.9278,
      "step": 218306
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.012481212615967,
      "learning_rate": 4.071117068134411e-06,
      "loss": 2.8673,
      "step": 218307
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1744225025177,
      "learning_rate": 4.070445487805596e-06,
      "loss": 2.8465,
      "step": 218308
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.8083131313323975,
      "learning_rate": 4.069773962495671e-06,
      "loss": 2.833,
      "step": 218309
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.567878484725952,
      "learning_rate": 4.0691024922047675e-06,
      "loss": 2.8465,
      "step": 218310
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.4791436195373535,
      "learning_rate": 4.068431076932955e-06,
      "loss": 2.7928,
      "step": 218311
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.649179697036743,
      "learning_rate": 4.06775971668043e-06,
      "loss": 2.6922,
      "step": 218312
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.967465400695801,
      "learning_rate": 4.067088411447294e-06,
      "loss": 3.0834,
      "step": 218313
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1689627170562744,
      "learning_rate": 4.066417161233682e-06,
      "loss": 2.8754,
      "step": 218314
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.281054735183716,
      "learning_rate": 4.065745966039724e-06,
      "loss": 3.1341,
      "step": 218315
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.381655693054199,
      "learning_rate": 4.065074825865522e-06,
      "loss": 2.7988,
      "step": 218316
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.5163028240203857,
      "learning_rate": 4.064403740711175e-06,
      "loss": 2.963,
      "step": 218317
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.04411506652832,
      "learning_rate": 4.063732710576884e-06,
      "loss": 2.8317,
      "step": 218318
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.355226516723633,
      "learning_rate": 4.063061735462681e-06,
      "loss": 3.0526,
      "step": 218319
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0959603786468506,
      "learning_rate": 4.062390815368799e-06,
      "loss": 2.6755,
      "step": 218320
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9955828189849854,
      "learning_rate": 4.0617199502952725e-06,
      "loss": 2.6449,
      "step": 218321
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.166015625,
      "learning_rate": 4.061049140242267e-06,
      "loss": 2.8447,
      "step": 218322
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.646507740020752,
      "learning_rate": 4.060378385209884e-06,
      "loss": 3.0026,
      "step": 218323
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6454219818115234,
      "learning_rate": 4.059707685198288e-06,
      "loss": 2.9447,
      "step": 218324
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4284582138061523,
      "learning_rate": 4.059037040207547e-06,
      "loss": 2.9174,
      "step": 218325
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.713850975036621,
      "learning_rate": 4.05836645023786e-06,
      "loss": 2.9522,
      "step": 218326
    },
    {
      "epoch": 2.84,
      "grad_norm": 6.033685684204102,
      "learning_rate": 4.057695915289261e-06,
      "loss": 3.1244,
      "step": 218327
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.917271137237549,
      "learning_rate": 4.057025435361982e-06,
      "loss": 2.7956,
      "step": 218328
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8297832012176514,
      "learning_rate": 4.056355010456025e-06,
      "loss": 2.9121,
      "step": 218329
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.042797565460205,
      "learning_rate": 4.055684640571621e-06,
      "loss": 2.7261,
      "step": 218330
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1344351768493652,
      "learning_rate": 4.055014325708805e-06,
      "loss": 3.1297,
      "step": 218331
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8846211433410645,
      "learning_rate": 4.054344065867809e-06,
      "loss": 2.7645,
      "step": 218332
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2265307903289795,
      "learning_rate": 4.053673861048634e-06,
      "loss": 2.9142,
      "step": 218333
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.772152900695801,
      "learning_rate": 4.0530037112515455e-06,
      "loss": 2.9257,
      "step": 218334
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.856344223022461,
      "learning_rate": 4.052333616476544e-06,
      "loss": 2.9068,
      "step": 218335
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7656116485595703,
      "learning_rate": 4.051663576723796e-06,
      "loss": 3.1585,
      "step": 218336
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4317469596862793,
      "learning_rate": 4.050993591993434e-06,
      "loss": 3.1308,
      "step": 218337
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.002882719039917,
      "learning_rate": 4.05032366228556e-06,
      "loss": 2.8319,
      "step": 218338
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.037283420562744,
      "learning_rate": 4.049653787600338e-06,
      "loss": 2.9464,
      "step": 218339
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0692532062530518,
      "learning_rate": 4.048983967937902e-06,
      "loss": 2.895,
      "step": 218340
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.6040756702423096,
      "learning_rate": 4.048314203298286e-06,
      "loss": 2.7054,
      "step": 218341
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.514104127883911,
      "learning_rate": 4.047644493681723e-06,
      "loss": 2.767,
      "step": 218342
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.6804864406585693,
      "learning_rate": 4.04697483908828e-06,
      "loss": 2.8858,
      "step": 218343
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.984172821044922,
      "learning_rate": 4.046305239518088e-06,
      "loss": 3.0785,
      "step": 218344
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.7418813705444336,
      "learning_rate": 4.0456356949712494e-06,
      "loss": 2.99,
      "step": 218345
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.093597650527954,
      "learning_rate": 4.04496620544793e-06,
      "loss": 3.0259,
      "step": 218346
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2087512016296387,
      "learning_rate": 4.044296770948263e-06,
      "loss": 3.1884,
      "step": 218347
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.894301652908325,
      "learning_rate": 4.043627391472315e-06,
      "loss": 2.9642,
      "step": 218348
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7044425010681152,
      "learning_rate": 4.042958067020252e-06,
      "loss": 2.998,
      "step": 218349
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9760706424713135,
      "learning_rate": 4.042288797592208e-06,
      "loss": 2.9171,
      "step": 218350
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3653762340545654,
      "learning_rate": 4.041619583188249e-06,
      "loss": 2.9591,
      "step": 218351
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.990999698638916,
      "learning_rate": 4.040950423808575e-06,
      "loss": 3.0646,
      "step": 218352
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.597024917602539,
      "learning_rate": 4.040281319453254e-06,
      "loss": 2.9311,
      "step": 218353
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.5719969272613525,
      "learning_rate": 4.03961227012245e-06,
      "loss": 3.1706,
      "step": 218354
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8412625789642334,
      "learning_rate": 4.0389432758162644e-06,
      "loss": 2.9077,
      "step": 218355
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.5110368728637695,
      "learning_rate": 4.03827433653483e-06,
      "loss": 2.9318,
      "step": 218356
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.5982859134674072,
      "learning_rate": 4.037605452278248e-06,
      "loss": 2.7671,
      "step": 218357
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.456526041030884,
      "learning_rate": 4.036936623046649e-06,
      "loss": 2.9869,
      "step": 218358
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.788839340209961,
      "learning_rate": 4.0362678488402025e-06,
      "loss": 2.7504,
      "step": 218359
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0812976360321045,
      "learning_rate": 4.0355991296590065e-06,
      "loss": 2.9792,
      "step": 218360
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3256685733795166,
      "learning_rate": 4.034930465503161e-06,
      "loss": 2.9105,
      "step": 218361
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2789011001586914,
      "learning_rate": 4.0342618563728006e-06,
      "loss": 2.977,
      "step": 218362
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9574391841888428,
      "learning_rate": 4.0335933022680566e-06,
      "loss": 2.9147,
      "step": 218363
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.123518228530884,
      "learning_rate": 4.032924803189097e-06,
      "loss": 2.9278,
      "step": 218364
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.111069917678833,
      "learning_rate": 4.032256359135988e-06,
      "loss": 2.781,
      "step": 218365
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9274842739105225,
      "learning_rate": 4.031587970108829e-06,
      "loss": 2.9465,
      "step": 218366
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9459798336029053,
      "learning_rate": 4.030919636107821e-06,
      "loss": 2.7617,
      "step": 218367
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2318739891052246,
      "learning_rate": 4.03025135713303e-06,
      "loss": 3.063,
      "step": 218368
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.753972291946411,
      "learning_rate": 4.029583133184622e-06,
      "loss": 2.8534,
      "step": 218369
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.586164951324463,
      "learning_rate": 4.028914964262697e-06,
      "loss": 2.9026,
      "step": 218370
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.045182943344116,
      "learning_rate": 4.02824685036739e-06,
      "loss": 2.8293,
      "step": 218371
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6203620433807373,
      "learning_rate": 4.027578791498797e-06,
      "loss": 2.6668,
      "step": 218372
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.532289505004883,
      "learning_rate": 4.026910787657056e-06,
      "loss": 3.2411,
      "step": 218373
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.49320650100708,
      "learning_rate": 4.0262428388423305e-06,
      "loss": 3.0064,
      "step": 218374
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.914902448654175,
      "learning_rate": 4.025574945054688e-06,
      "loss": 3.0239,
      "step": 218375
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.036972761154175,
      "learning_rate": 4.024907106294295e-06,
      "loss": 2.8742,
      "step": 218376
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4759693145751953,
      "learning_rate": 4.024239322561284e-06,
      "loss": 2.9368,
      "step": 218377
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.494001865386963,
      "learning_rate": 4.02357159385569e-06,
      "loss": 2.7136,
      "step": 218378
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.895308017730713,
      "learning_rate": 4.022903920177745e-06,
      "loss": 3.1196,
      "step": 218379
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.231800079345703,
      "learning_rate": 4.022236301527515e-06,
      "loss": 2.9482,
      "step": 218380
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.874128818511963,
      "learning_rate": 4.0215687379051675e-06,
      "loss": 2.9039,
      "step": 218381
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.454481840133667,
      "learning_rate": 4.020901229310769e-06,
      "loss": 2.6073,
      "step": 218382
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.759395122528076,
      "learning_rate": 4.0202337757444855e-06,
      "loss": 3.0626,
      "step": 218383
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.9715473651885986,
      "learning_rate": 4.019566377206385e-06,
      "loss": 2.9611,
      "step": 218384
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.7718942165374756,
      "learning_rate": 4.018899033696699e-06,
      "loss": 3.0021,
      "step": 218385
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2853126525878906,
      "learning_rate": 4.018231745215428e-06,
      "loss": 2.9744,
      "step": 218386
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7251501083374023,
      "learning_rate": 4.017564511762805e-06,
      "loss": 2.8531,
      "step": 218387
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0953927040100098,
      "learning_rate": 4.016897333338864e-06,
      "loss": 2.7618,
      "step": 218388
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.009960412979126,
      "learning_rate": 4.016230209943805e-06,
      "loss": 3.0396,
      "step": 218389
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6946847438812256,
      "learning_rate": 4.0155631415776936e-06,
      "loss": 2.9637,
      "step": 218390
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2353649139404297,
      "learning_rate": 4.014896128240697e-06,
      "loss": 2.8198,
      "step": 218391
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7374627590179443,
      "learning_rate": 4.014229169932881e-06,
      "loss": 2.6865,
      "step": 218392
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.33206844329834,
      "learning_rate": 4.0135622666544135e-06,
      "loss": 2.5737,
      "step": 218393
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.598128080368042,
      "learning_rate": 4.012895418405427e-06,
      "loss": 2.9235,
      "step": 218394
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.720902681350708,
      "learning_rate": 4.012228625186054e-06,
      "loss": 2.8459,
      "step": 218395
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.6290664672851562,
      "learning_rate": 4.011561886996361e-06,
      "loss": 2.8201,
      "step": 218396
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.606869697570801,
      "learning_rate": 4.0108952038365175e-06,
      "loss": 2.7606,
      "step": 218397
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3747341632843018,
      "learning_rate": 4.01022857570662e-06,
      "loss": 2.9652,
      "step": 218398
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.88861346244812,
      "learning_rate": 4.009562002606836e-06,
      "loss": 2.9977,
      "step": 218399
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.829982042312622,
      "learning_rate": 4.008895484537233e-06,
      "loss": 2.8081,
      "step": 218400
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.20241641998291,
      "learning_rate": 4.008229021498011e-06,
      "loss": 2.9116,
      "step": 218401
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3298773765563965,
      "learning_rate": 4.0075626134891685e-06,
      "loss": 2.9412,
      "step": 218402
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4611904621124268,
      "learning_rate": 4.006896260510972e-06,
      "loss": 2.8543,
      "step": 218403
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8375720977783203,
      "learning_rate": 4.006229962563456e-06,
      "loss": 2.9353,
      "step": 218404
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9480481147766113,
      "learning_rate": 4.005563719646787e-06,
      "loss": 3.1081,
      "step": 218405
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.940429925918579,
      "learning_rate": 4.004897531761031e-06,
      "loss": 3.0491,
      "step": 218406
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.249119281768799,
      "learning_rate": 4.004231398906421e-06,
      "loss": 2.9952,
      "step": 218407
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.9227514266967773,
      "learning_rate": 4.0035653210829245e-06,
      "loss": 2.7361,
      "step": 218408
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.374826669692993,
      "learning_rate": 4.002899298290807e-06,
      "loss": 2.961,
      "step": 218409
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7261338233947754,
      "learning_rate": 4.002233330530136e-06,
      "loss": 2.8082,
      "step": 218410
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.801232099533081,
      "learning_rate": 4.001567417801044e-06,
      "loss": 2.9735,
      "step": 218411
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.5707709789276123,
      "learning_rate": 4.000901560103598e-06,
      "loss": 2.8374,
      "step": 218412
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8304402828216553,
      "learning_rate": 4.000235757438031e-06,
      "loss": 2.7732,
      "step": 218413
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.3083064556121826,
      "learning_rate": 3.999570009804376e-06,
      "loss": 2.6878,
      "step": 218414
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.795019626617432,
      "learning_rate": 3.9989043172028e-06,
      "loss": 2.6698,
      "step": 218415
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.29364275932312,
      "learning_rate": 3.998238679633403e-06,
      "loss": 2.7061,
      "step": 218416
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0142199993133545,
      "learning_rate": 3.9975730970963515e-06,
      "loss": 3.1083,
      "step": 218417
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.005974769592285,
      "learning_rate": 3.996907569591678e-06,
      "loss": 3.0103,
      "step": 218418
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9349942207336426,
      "learning_rate": 3.996242097119651e-06,
      "loss": 2.6382,
      "step": 218419
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.883554697036743,
      "learning_rate": 3.9955766796802345e-06,
      "loss": 2.8012,
      "step": 218420
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.075228452682495,
      "learning_rate": 3.9949113172736636e-06,
      "loss": 2.9952,
      "step": 218421
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.561128854751587,
      "learning_rate": 3.994246009900037e-06,
      "loss": 2.7503,
      "step": 218422
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9572043418884277,
      "learning_rate": 3.993580757559456e-06,
      "loss": 2.8968,
      "step": 218423
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8867902755737305,
      "learning_rate": 3.9929155602520194e-06,
      "loss": 2.9177,
      "step": 218424
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.879533529281616,
      "learning_rate": 3.9922504179779604e-06,
      "loss": 2.9633,
      "step": 218425
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.775454044342041,
      "learning_rate": 3.9915853307372795e-06,
      "loss": 2.8941,
      "step": 218426
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.159114122390747,
      "learning_rate": 3.990920298530143e-06,
      "loss": 2.9046,
      "step": 218427
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6044416427612305,
      "learning_rate": 3.990255321356684e-06,
      "loss": 2.8481,
      "step": 218428
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.81894850730896,
      "learning_rate": 3.989590399217069e-06,
      "loss": 2.7906,
      "step": 218429
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9878945350646973,
      "learning_rate": 3.988925532111331e-06,
      "loss": 2.7366,
      "step": 218430
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0771777629852295,
      "learning_rate": 3.988260720039638e-06,
      "loss": 2.9829,
      "step": 218431
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.966740846633911,
      "learning_rate": 3.9875959630021544e-06,
      "loss": 3.0894,
      "step": 218432
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7413361072540283,
      "learning_rate": 3.986931260998949e-06,
      "loss": 2.8302,
      "step": 218433
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.175175905227661,
      "learning_rate": 3.98626661403012e-06,
      "loss": 2.9579,
      "step": 218434
    },
    {
      "epoch": 2.84,
      "grad_norm": 5.684237480163574,
      "learning_rate": 3.9856020220959014e-06,
      "loss": 2.997,
      "step": 218435
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4959447383880615,
      "learning_rate": 3.984937485196293e-06,
      "loss": 2.9316,
      "step": 218436
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1987390518188477,
      "learning_rate": 3.984273003331495e-06,
      "loss": 2.8887,
      "step": 218437
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8194892406463623,
      "learning_rate": 3.983608576501607e-06,
      "loss": 2.9447,
      "step": 218438
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.898693084716797,
      "learning_rate": 3.982944204706762e-06,
      "loss": 3.0085,
      "step": 218439
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7802224159240723,
      "learning_rate": 3.982279887947026e-06,
      "loss": 2.8364,
      "step": 218440
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.9670467376708984,
      "learning_rate": 3.981615626222634e-06,
      "loss": 2.998,
      "step": 218441
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.295449733734131,
      "learning_rate": 3.980951419533618e-06,
      "loss": 2.9193,
      "step": 218442
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.883709669113159,
      "learning_rate": 3.980287267880145e-06,
      "loss": 3.0039,
      "step": 218443
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.593867778778076,
      "learning_rate": 3.979623171262314e-06,
      "loss": 2.8608,
      "step": 218444
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.123826265335083,
      "learning_rate": 3.97895912968026e-06,
      "loss": 2.9565,
      "step": 218445
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.832881450653076,
      "learning_rate": 3.978295143134081e-06,
      "loss": 2.9892,
      "step": 218446
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.781235694885254,
      "learning_rate": 3.9776312116239775e-06,
      "loss": 2.9018,
      "step": 218447
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.521986722946167,
      "learning_rate": 3.976967335149983e-06,
      "loss": 2.8517,
      "step": 218448
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.98555326461792,
      "learning_rate": 3.976303513712265e-06,
      "loss": 3.1575,
      "step": 218449
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4831671714782715,
      "learning_rate": 3.975639747310955e-06,
      "loss": 2.9442,
      "step": 218450
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.014625072479248,
      "learning_rate": 3.97497603594612e-06,
      "loss": 2.84,
      "step": 218451
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8192214965820312,
      "learning_rate": 3.974312379617961e-06,
      "loss": 2.8191,
      "step": 218452
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.5875754356384277,
      "learning_rate": 3.973648778326544e-06,
      "loss": 2.9187,
      "step": 218453
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.4926960468292236,
      "learning_rate": 3.972985232072035e-06,
      "loss": 2.973,
      "step": 218454
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0670435428619385,
      "learning_rate": 3.972321740854534e-06,
      "loss": 2.8666,
      "step": 218455
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.6927263736724854,
      "learning_rate": 3.971658304674175e-06,
      "loss": 2.8163,
      "step": 218456
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7378389835357666,
      "learning_rate": 3.970994923531057e-06,
      "loss": 2.9244,
      "step": 218457
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.843940019607544,
      "learning_rate": 3.970331597425314e-06,
      "loss": 3.004,
      "step": 218458
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.172292709350586,
      "learning_rate": 3.9696683263570786e-06,
      "loss": 2.9587,
      "step": 218459
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.5558788776397705,
      "learning_rate": 3.969005110326484e-06,
      "loss": 2.7464,
      "step": 218460
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.232112407684326,
      "learning_rate": 3.968341949333631e-06,
      "loss": 2.9761,
      "step": 218461
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.222745656967163,
      "learning_rate": 3.967678843378652e-06,
      "loss": 3.1203,
      "step": 218462
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.949699640274048,
      "learning_rate": 3.967015792461647e-06,
      "loss": 3.0044,
      "step": 218463
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.085092067718506,
      "learning_rate": 3.966352796582783e-06,
      "loss": 2.9143,
      "step": 218464
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.6890041828155518,
      "learning_rate": 3.96568985574216e-06,
      "loss": 2.8502,
      "step": 218465
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7136237621307373,
      "learning_rate": 3.965026969939911e-06,
      "loss": 2.9424,
      "step": 218466
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.646665096282959,
      "learning_rate": 3.964364139176135e-06,
      "loss": 2.7865,
      "step": 218467
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.823960542678833,
      "learning_rate": 3.963701363450999e-06,
      "loss": 2.783,
      "step": 218468
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1004045009613037,
      "learning_rate": 3.963038642764571e-06,
      "loss": 2.8378,
      "step": 218469
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.9043171405792236,
      "learning_rate": 3.962375977117016e-06,
      "loss": 2.8981,
      "step": 218470
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.01198673248291,
      "learning_rate": 3.961713366508401e-06,
      "loss": 3.0276,
      "step": 218471
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0273478031158447,
      "learning_rate": 3.961050810938959e-06,
      "loss": 2.743,
      "step": 218472
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0260369777679443,
      "learning_rate": 3.960388310408691e-06,
      "loss": 2.9033,
      "step": 218473
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.89833402633667,
      "learning_rate": 3.959725864917829e-06,
      "loss": 3.053,
      "step": 218474
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.279292583465576,
      "learning_rate": 3.959063474466406e-06,
      "loss": 2.7826,
      "step": 218475
    },
    {
      "epoch": 2.84,
      "grad_norm": 4.40233039855957,
      "learning_rate": 3.95840113905459e-06,
      "loss": 2.9597,
      "step": 218476
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.4064323902130127,
      "learning_rate": 3.95773885868248e-06,
      "loss": 2.917,
      "step": 218477
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.997072458267212,
      "learning_rate": 3.957076633350242e-06,
      "loss": 2.7907,
      "step": 218478
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.6236417293548584,
      "learning_rate": 3.956414463057944e-06,
      "loss": 2.9405,
      "step": 218479
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0311508178710938,
      "learning_rate": 3.955752347805785e-06,
      "loss": 2.7484,
      "step": 218480
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.839892864227295,
      "learning_rate": 3.9550902875937985e-06,
      "loss": 2.9161,
      "step": 218481
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.570335626602173,
      "learning_rate": 3.954428282422151e-06,
      "loss": 2.7306,
      "step": 218482
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.4784464836120605,
      "learning_rate": 3.953766332290942e-06,
      "loss": 3.0118,
      "step": 218483
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.0291833877563477,
      "learning_rate": 3.953104437200372e-06,
      "loss": 2.9004,
      "step": 218484
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.315304756164551,
      "learning_rate": 3.9524425971504406e-06,
      "loss": 3.0305,
      "step": 218485
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8535609245300293,
      "learning_rate": 3.951780812141414e-06,
      "loss": 3.0696,
      "step": 218486
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.954225778579712,
      "learning_rate": 3.951119082173293e-06,
      "loss": 2.9195,
      "step": 218487
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.903848886489868,
      "learning_rate": 3.950457407246277e-06,
      "loss": 2.5665,
      "step": 218488
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.5037992000579834,
      "learning_rate": 3.949795787360433e-06,
      "loss": 2.8747,
      "step": 218489
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.118114948272705,
      "learning_rate": 3.949134222515893e-06,
      "loss": 2.9369,
      "step": 218490
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.7877790927886963,
      "learning_rate": 3.9484727127128245e-06,
      "loss": 3.1832,
      "step": 218491
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8518474102020264,
      "learning_rate": 3.947811257951361e-06,
      "loss": 2.7895,
      "step": 218492
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.52803373336792,
      "learning_rate": 3.947149858231536e-06,
      "loss": 2.7326,
      "step": 218493
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.1280136108398438,
      "learning_rate": 3.946488513553514e-06,
      "loss": 2.9808,
      "step": 218494
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.054286479949951,
      "learning_rate": 3.9458272239174304e-06,
      "loss": 2.8445,
      "step": 218495
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.853668451309204,
      "learning_rate": 3.945165989323451e-06,
      "loss": 2.8335,
      "step": 218496
    },
    {
      "epoch": 2.84,
      "grad_norm": 3.2552778720855713,
      "learning_rate": 3.944504809771609e-06,
      "loss": 2.8142,
      "step": 218497
    },
    {
      "epoch": 2.84,
      "grad_norm": 2.8100905418395996,
      "learning_rate": 3.943843685262138e-06,
      "loss": 2.7023,
      "step": 218498
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.64361834526062,
      "learning_rate": 3.943182615795004e-06,
      "loss": 2.6953,
      "step": 218499
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.379722595214844,
      "learning_rate": 3.942521601370474e-06,
      "loss": 2.8922,
      "step": 218500
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3582139015197754,
      "learning_rate": 3.941860641988615e-06,
      "loss": 2.8227,
      "step": 218501
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1738133430480957,
      "learning_rate": 3.941199737649559e-06,
      "loss": 2.9158,
      "step": 218502
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0980846881866455,
      "learning_rate": 3.940538888353373e-06,
      "loss": 2.932,
      "step": 218503
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8850817680358887,
      "learning_rate": 3.939878094100291e-06,
      "loss": 2.8445,
      "step": 218504
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.5405502319335938,
      "learning_rate": 3.939217354890345e-06,
      "loss": 2.9196,
      "step": 218505
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.167494535446167,
      "learning_rate": 3.938556670723703e-06,
      "loss": 2.7991,
      "step": 218506
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6737618446350098,
      "learning_rate": 3.93789604160043e-06,
      "loss": 3.0016,
      "step": 218507
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8325328826904297,
      "learning_rate": 3.937235467520727e-06,
      "loss": 3.0981,
      "step": 218508
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.216000556945801,
      "learning_rate": 3.936574948484661e-06,
      "loss": 2.9731,
      "step": 218509
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.829481363296509,
      "learning_rate": 3.9359144844924305e-06,
      "loss": 3.1417,
      "step": 218510
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.879462718963623,
      "learning_rate": 3.935254075544036e-06,
      "loss": 2.9446,
      "step": 218511
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1016507148742676,
      "learning_rate": 3.9345937216397114e-06,
      "loss": 2.8923,
      "step": 218512
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1100597381591797,
      "learning_rate": 3.93393342277949e-06,
      "loss": 2.9059,
      "step": 218513
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.79845929145813,
      "learning_rate": 3.9332731789635694e-06,
      "loss": 3.0358,
      "step": 218514
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7441928386688232,
      "learning_rate": 3.932612990192019e-06,
      "loss": 2.8952,
      "step": 218515
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.5570125579833984,
      "learning_rate": 3.931952856465004e-06,
      "loss": 2.9314,
      "step": 218516
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.765037775039673,
      "learning_rate": 3.931292777782624e-06,
      "loss": 2.8609,
      "step": 218517
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.962928533554077,
      "learning_rate": 3.930632754145013e-06,
      "loss": 2.8979,
      "step": 218518
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8684372901916504,
      "learning_rate": 3.929972785552271e-06,
      "loss": 2.9469,
      "step": 218519
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.043890953063965,
      "learning_rate": 3.929312872004564e-06,
      "loss": 2.9025,
      "step": 218520
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.11336088180542,
      "learning_rate": 3.928653013501959e-06,
      "loss": 2.983,
      "step": 218521
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.496880054473877,
      "learning_rate": 3.927993210044622e-06,
      "loss": 2.7708,
      "step": 218522
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.897780418395996,
      "learning_rate": 3.927333461632654e-06,
      "loss": 2.9962,
      "step": 218523
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.097292423248291,
      "learning_rate": 3.926673768266187e-06,
      "loss": 2.8529,
      "step": 218524
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.5409317016601562,
      "learning_rate": 3.926014129945321e-06,
      "loss": 2.9006,
      "step": 218525
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.696922779083252,
      "learning_rate": 3.92535454667019e-06,
      "loss": 2.7423,
      "step": 218526
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.8410348892211914,
      "learning_rate": 3.92469501844096e-06,
      "loss": 2.7485,
      "step": 218527
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.5763070583343506,
      "learning_rate": 3.924035545257698e-06,
      "loss": 3.2571,
      "step": 218528
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9027867317199707,
      "learning_rate": 3.92337612712057e-06,
      "loss": 2.7869,
      "step": 218529
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.225752353668213,
      "learning_rate": 3.922716764029643e-06,
      "loss": 2.944,
      "step": 218530
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9978692531585693,
      "learning_rate": 3.922057455985084e-06,
      "loss": 2.8529,
      "step": 218531
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.7095940113067627,
      "learning_rate": 3.921398202986992e-06,
      "loss": 2.8713,
      "step": 218532
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1773295402526855,
      "learning_rate": 3.9207390050355e-06,
      "loss": 2.9283,
      "step": 218533
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.268502712249756,
      "learning_rate": 3.920079862130776e-06,
      "loss": 2.9031,
      "step": 218534
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.385829448699951,
      "learning_rate": 3.9194207742728525e-06,
      "loss": 2.7806,
      "step": 218535
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.246650457382202,
      "learning_rate": 3.918761741461929e-06,
      "loss": 2.8546,
      "step": 218536
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.612513542175293,
      "learning_rate": 3.918102763698072e-06,
      "loss": 2.9596,
      "step": 218537
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0216031074523926,
      "learning_rate": 3.917443840981449e-06,
      "loss": 2.9493,
      "step": 218538
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9062747955322266,
      "learning_rate": 3.916784973312126e-06,
      "loss": 2.7837,
      "step": 218539
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.86614727973938,
      "learning_rate": 3.916126160690303e-06,
      "loss": 3.0156,
      "step": 218540
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.3925673961639404,
      "learning_rate": 3.915467403116046e-06,
      "loss": 3.0328,
      "step": 218541
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9391140937805176,
      "learning_rate": 3.914808700589489e-06,
      "loss": 2.7551,
      "step": 218542
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6049628257751465,
      "learning_rate": 3.914150053110765e-06,
      "loss": 2.7456,
      "step": 218543
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.716442346572876,
      "learning_rate": 3.913491460679974e-06,
      "loss": 3.1711,
      "step": 218544
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0496773719787598,
      "learning_rate": 3.9128329232972486e-06,
      "loss": 2.9551,
      "step": 218545
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.5367050170898438,
      "learning_rate": 3.912174440962756e-06,
      "loss": 3.0434,
      "step": 218546
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.464890241622925,
      "learning_rate": 3.9115160136765634e-06,
      "loss": 2.7561,
      "step": 218547
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0522642135620117,
      "learning_rate": 3.910857641438769e-06,
      "loss": 2.7928,
      "step": 218548
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.7212674617767334,
      "learning_rate": 3.910199324249574e-06,
      "loss": 3.0398,
      "step": 218549
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7709991931915283,
      "learning_rate": 3.909541062109045e-06,
      "loss": 3.1091,
      "step": 218550
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.192424297332764,
      "learning_rate": 3.908882855017348e-06,
      "loss": 2.8384,
      "step": 218551
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.5652780532836914,
      "learning_rate": 3.90822470297455e-06,
      "loss": 3.0589,
      "step": 218552
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.02482533454895,
      "learning_rate": 3.907566605980849e-06,
      "loss": 2.9226,
      "step": 218553
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.801201820373535,
      "learning_rate": 3.906908564036249e-06,
      "loss": 3.0302,
      "step": 218554
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.685251235961914,
      "learning_rate": 3.906250577141012e-06,
      "loss": 3.0842,
      "step": 218555
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.985473394393921,
      "learning_rate": 3.905592645295142e-06,
      "loss": 2.8396,
      "step": 218556
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.283489465713501,
      "learning_rate": 3.904934768498835e-06,
      "loss": 2.8599,
      "step": 218557
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.817495346069336,
      "learning_rate": 3.90427694675216e-06,
      "loss": 3.0673,
      "step": 218558
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.823375701904297,
      "learning_rate": 3.903619180055351e-06,
      "loss": 2.8293,
      "step": 218559
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.686156749725342,
      "learning_rate": 3.9029614684083725e-06,
      "loss": 2.8298,
      "step": 218560
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7628042697906494,
      "learning_rate": 3.902303811811458e-06,
      "loss": 2.9942,
      "step": 218561
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.5268006324768066,
      "learning_rate": 3.901646210264642e-06,
      "loss": 3.1262,
      "step": 218562
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4515652656555176,
      "learning_rate": 3.900988663768156e-06,
      "loss": 2.8671,
      "step": 218563
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7843639850616455,
      "learning_rate": 3.900331172322036e-06,
      "loss": 2.9699,
      "step": 218564
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.785398006439209,
      "learning_rate": 3.899673735926478e-06,
      "loss": 2.9585,
      "step": 218565
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1597988605499268,
      "learning_rate": 3.899016354581486e-06,
      "loss": 2.7942,
      "step": 218566
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2384798526763916,
      "learning_rate": 3.898359028287324e-06,
      "loss": 3.0009,
      "step": 218567
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.826788902282715,
      "learning_rate": 3.897701757043992e-06,
      "loss": 3.0597,
      "step": 218568
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.616260528564453,
      "learning_rate": 3.89704454085169e-06,
      "loss": 2.6675,
      "step": 218569
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4665467739105225,
      "learning_rate": 3.89638737971052e-06,
      "loss": 2.9913,
      "step": 218570
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9673123359680176,
      "learning_rate": 3.8957302736206454e-06,
      "loss": 2.8212,
      "step": 218571
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3144302368164062,
      "learning_rate": 3.895073222582068e-06,
      "loss": 2.767,
      "step": 218572
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.016171932220459,
      "learning_rate": 3.894416226595021e-06,
      "loss": 3.0787,
      "step": 218573
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2710559368133545,
      "learning_rate": 3.8937592856595705e-06,
      "loss": 2.6056,
      "step": 218574
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.917569160461426,
      "learning_rate": 3.893102399775882e-06,
      "loss": 3.1327,
      "step": 218575
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0398969650268555,
      "learning_rate": 3.892445568944058e-06,
      "loss": 3.0842,
      "step": 218576
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8894171714782715,
      "learning_rate": 3.89178879316423e-06,
      "loss": 3.0429,
      "step": 218577
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.927460193634033,
      "learning_rate": 3.891132072436464e-06,
      "loss": 2.6879,
      "step": 218578
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.755887985229492,
      "learning_rate": 3.890475406760962e-06,
      "loss": 3.0364,
      "step": 218579
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.967745065689087,
      "learning_rate": 3.889818796137789e-06,
      "loss": 2.9691,
      "step": 218580
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.967468500137329,
      "learning_rate": 3.889162240567112e-06,
      "loss": 2.6414,
      "step": 218581
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9066219329833984,
      "learning_rate": 3.888505740048997e-06,
      "loss": 2.9988,
      "step": 218582
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.10718035697937,
      "learning_rate": 3.887849294583644e-06,
      "loss": 3.1212,
      "step": 218583
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.323216676712036,
      "learning_rate": 3.887192904171088e-06,
      "loss": 2.8492,
      "step": 218584
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0590004920959473,
      "learning_rate": 3.886536568811494e-06,
      "loss": 3.1432,
      "step": 218585
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0023434162139893,
      "learning_rate": 3.885880288504994e-06,
      "loss": 2.7083,
      "step": 218586
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.050109624862671,
      "learning_rate": 3.885224063251691e-06,
      "loss": 2.7192,
      "step": 218587
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.73620867729187,
      "learning_rate": 3.884567893051715e-06,
      "loss": 2.8616,
      "step": 218588
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.615326404571533,
      "learning_rate": 3.883911777905235e-06,
      "loss": 2.8419,
      "step": 218589
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0858426094055176,
      "learning_rate": 3.88325571781225e-06,
      "loss": 2.9568,
      "step": 218590
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.992011070251465,
      "learning_rate": 3.8825997127730265e-06,
      "loss": 2.9115,
      "step": 218591
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.643122673034668,
      "learning_rate": 3.881943762787565e-06,
      "loss": 2.9665,
      "step": 218592
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9472508430480957,
      "learning_rate": 3.881287867856065e-06,
      "loss": 2.7121,
      "step": 218593
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2703287601470947,
      "learning_rate": 3.8806320279785915e-06,
      "loss": 2.7936,
      "step": 218594
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2421131134033203,
      "learning_rate": 3.879976243155347e-06,
      "loss": 2.8617,
      "step": 218595
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9106733798980713,
      "learning_rate": 3.8793205133863635e-06,
      "loss": 3.0276,
      "step": 218596
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.002049207687378,
      "learning_rate": 3.8786648386718415e-06,
      "loss": 2.8105,
      "step": 218597
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.317997455596924,
      "learning_rate": 3.878009219011813e-06,
      "loss": 2.9532,
      "step": 218598
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4588756561279297,
      "learning_rate": 3.8773536544065124e-06,
      "loss": 2.7822,
      "step": 218599
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.6918742656707764,
      "learning_rate": 3.876698144855938e-06,
      "loss": 2.6989,
      "step": 218600
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0402941703796387,
      "learning_rate": 3.876042690360326e-06,
      "loss": 2.8539,
      "step": 218601
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7610490322113037,
      "learning_rate": 3.87538729091974e-06,
      "loss": 3.15,
      "step": 218602
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9406182765960693,
      "learning_rate": 3.874731946534315e-06,
      "loss": 2.9493,
      "step": 218603
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6979904174804688,
      "learning_rate": 3.87407665720415e-06,
      "loss": 3.0466,
      "step": 218604
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.671827793121338,
      "learning_rate": 3.873421422929379e-06,
      "loss": 3.02,
      "step": 218605
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.458711624145508,
      "learning_rate": 3.872766243710135e-06,
      "loss": 3.004,
      "step": 218606
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2519478797912598,
      "learning_rate": 3.8721111195465175e-06,
      "loss": 2.8574,
      "step": 218607
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1090588569641113,
      "learning_rate": 3.8714560504386925e-06,
      "loss": 2.9642,
      "step": 218608
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7436509132385254,
      "learning_rate": 3.870801036386761e-06,
      "loss": 2.9187,
      "step": 218609
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.316619634628296,
      "learning_rate": 3.870146077390823e-06,
      "loss": 2.7951,
      "step": 218610
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6504359245300293,
      "learning_rate": 3.869491173451011e-06,
      "loss": 2.8206,
      "step": 218611
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0018720626831055,
      "learning_rate": 3.868836324567426e-06,
      "loss": 2.7335,
      "step": 218612
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.483854293823242,
      "learning_rate": 3.868181530740266e-06,
      "loss": 2.8984,
      "step": 218613
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.218195676803589,
      "learning_rate": 3.8675267919695665e-06,
      "loss": 2.8957,
      "step": 218614
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1925833225250244,
      "learning_rate": 3.8668721082554925e-06,
      "loss": 3.0285,
      "step": 218615
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.177008867263794,
      "learning_rate": 3.866217479598144e-06,
      "loss": 2.7376,
      "step": 218616
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9498298168182373,
      "learning_rate": 3.865562905997687e-06,
      "loss": 2.9914,
      "step": 218617
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.140537738800049,
      "learning_rate": 3.864908387454157e-06,
      "loss": 3.129,
      "step": 218618
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7667124271392822,
      "learning_rate": 3.864253923967786e-06,
      "loss": 2.8414,
      "step": 218619
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0875508785247803,
      "learning_rate": 3.86359951553864e-06,
      "loss": 2.9802,
      "step": 218620
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.5781242847442627,
      "learning_rate": 3.862945162166786e-06,
      "loss": 2.9541,
      "step": 218621
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.778703451156616,
      "learning_rate": 3.862290863852457e-06,
      "loss": 3.1618,
      "step": 218622
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4073362350463867,
      "learning_rate": 3.861636620595688e-06,
      "loss": 2.913,
      "step": 218623
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.875931978225708,
      "learning_rate": 3.860982432396642e-06,
      "loss": 2.8846,
      "step": 218624
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2580435276031494,
      "learning_rate": 3.8603282992554215e-06,
      "loss": 3.0363,
      "step": 218625
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9892876148223877,
      "learning_rate": 3.859674221172193e-06,
      "loss": 2.6858,
      "step": 218626
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.715092182159424,
      "learning_rate": 3.8590201981469895e-06,
      "loss": 2.9553,
      "step": 218627
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9464499950408936,
      "learning_rate": 3.85836623018001e-06,
      "loss": 2.8548,
      "step": 218628
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.856252908706665,
      "learning_rate": 3.857712317271322e-06,
      "loss": 2.9797,
      "step": 218629
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.766461133956909,
      "learning_rate": 3.857058459421125e-06,
      "loss": 2.7421,
      "step": 218630
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.569324493408203,
      "learning_rate": 3.856404656629453e-06,
      "loss": 2.6875,
      "step": 218631
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6330583095550537,
      "learning_rate": 3.8557509088964715e-06,
      "loss": 2.743,
      "step": 218632
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.695768356323242,
      "learning_rate": 3.855097216222314e-06,
      "loss": 2.6882,
      "step": 218633
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.5362555980682373,
      "learning_rate": 3.854443578607081e-06,
      "loss": 3.0369,
      "step": 218634
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.5230319499969482,
      "learning_rate": 3.853789996050871e-06,
      "loss": 2.9294,
      "step": 218635
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.618696689605713,
      "learning_rate": 3.853136468553852e-06,
      "loss": 3.1739,
      "step": 218636
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6664042472839355,
      "learning_rate": 3.852482996116091e-06,
      "loss": 3.0678,
      "step": 218637
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4209489822387695,
      "learning_rate": 3.8518295787378195e-06,
      "loss": 3.0278,
      "step": 218638
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.5409560203552246,
      "learning_rate": 3.851176216419005e-06,
      "loss": 2.8991,
      "step": 218639
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.698641061782837,
      "learning_rate": 3.850522909159881e-06,
      "loss": 3.0451,
      "step": 218640
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0589540004730225,
      "learning_rate": 3.849869656960547e-06,
      "loss": 2.9492,
      "step": 218641
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.854395866394043,
      "learning_rate": 3.849216459821103e-06,
      "loss": 3.0251,
      "step": 218642
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.83608341217041,
      "learning_rate": 3.848563317741649e-06,
      "loss": 2.8409,
      "step": 218643
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.781534194946289,
      "learning_rate": 3.8479102307223845e-06,
      "loss": 2.7832,
      "step": 218644
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8347136974334717,
      "learning_rate": 3.847257198763376e-06,
      "loss": 3.054,
      "step": 218645
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.023648262023926,
      "learning_rate": 3.846604221864724e-06,
      "loss": 2.7936,
      "step": 218646
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2166121006011963,
      "learning_rate": 3.845951300026595e-06,
      "loss": 2.9747,
      "step": 218647
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0828378200531006,
      "learning_rate": 3.845298433249089e-06,
      "loss": 2.9747,
      "step": 218648
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7740705013275146,
      "learning_rate": 3.844645621532339e-06,
      "loss": 2.7025,
      "step": 218649
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9319570064544678,
      "learning_rate": 3.843992864876477e-06,
      "loss": 2.8689,
      "step": 218650
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.684329032897949,
      "learning_rate": 3.843340163281572e-06,
      "loss": 2.8599,
      "step": 218651
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1086409091949463,
      "learning_rate": 3.842687516747822e-06,
      "loss": 2.9248,
      "step": 218652
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.792705535888672,
      "learning_rate": 3.842034925275261e-06,
      "loss": 2.8843,
      "step": 218653
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.7808632850646973,
      "learning_rate": 3.841382388864089e-06,
      "loss": 2.8204,
      "step": 218654
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.505018949508667,
      "learning_rate": 3.840729907514339e-06,
      "loss": 2.9649,
      "step": 218655
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.796234607696533,
      "learning_rate": 3.840077481226278e-06,
      "loss": 2.8755,
      "step": 218656
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.9732093811035156,
      "learning_rate": 3.839425109999872e-06,
      "loss": 2.9005,
      "step": 218657
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.952936887741089,
      "learning_rate": 3.8387727938353205e-06,
      "loss": 2.9748,
      "step": 218658
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.482672929763794,
      "learning_rate": 3.838120532732725e-06,
      "loss": 2.7366,
      "step": 218659
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.742321014404297,
      "learning_rate": 3.83746832669225e-06,
      "loss": 2.7595,
      "step": 218660
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7205746173858643,
      "learning_rate": 3.83681617571393e-06,
      "loss": 3.0387,
      "step": 218661
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.191460609436035,
      "learning_rate": 3.836164079797966e-06,
      "loss": 2.8434,
      "step": 218662
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0775022506713867,
      "learning_rate": 3.835512038944455e-06,
      "loss": 3.2506,
      "step": 218663
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.37361741065979,
      "learning_rate": 3.834860053153499e-06,
      "loss": 3.2301,
      "step": 218664
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1878905296325684,
      "learning_rate": 3.834208122425231e-06,
      "loss": 2.8815,
      "step": 218665
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.685241937637329,
      "learning_rate": 3.833556246759783e-06,
      "loss": 2.7332,
      "step": 218666
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.647906541824341,
      "learning_rate": 3.8329044261572575e-06,
      "loss": 2.7536,
      "step": 218667
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3063957691192627,
      "learning_rate": 3.8322526606178186e-06,
      "loss": 3.0608,
      "step": 218668
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.165109157562256,
      "learning_rate": 3.831600950141533e-06,
      "loss": 3.2,
      "step": 218669
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.100446939468384,
      "learning_rate": 3.830949294728536e-06,
      "loss": 3.0346,
      "step": 218670
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2093446254730225,
      "learning_rate": 3.830297694378925e-06,
      "loss": 2.6424,
      "step": 218671
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.746337890625,
      "learning_rate": 3.829646149092902e-06,
      "loss": 3.0613,
      "step": 218672
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.122159242630005,
      "learning_rate": 3.828994658870532e-06,
      "loss": 2.8863,
      "step": 218673
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.599179744720459,
      "learning_rate": 3.828343223711916e-06,
      "loss": 2.894,
      "step": 218674
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2923054695129395,
      "learning_rate": 3.82769184361722e-06,
      "loss": 2.8005,
      "step": 218675
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.163843870162964,
      "learning_rate": 3.827040518586577e-06,
      "loss": 3.0634,
      "step": 218676
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.433023691177368,
      "learning_rate": 3.826389248620021e-06,
      "loss": 2.8453,
      "step": 218677
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.661052703857422,
      "learning_rate": 3.825738033717751e-06,
      "loss": 2.9568,
      "step": 218678
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4037537574768066,
      "learning_rate": 3.8250868738798675e-06,
      "loss": 2.9348,
      "step": 218679
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.471158742904663,
      "learning_rate": 3.824435769106504e-06,
      "loss": 2.8969,
      "step": 218680
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7947609424591064,
      "learning_rate": 3.82378471939776e-06,
      "loss": 3.1648,
      "step": 218681
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.997803211212158,
      "learning_rate": 3.8231337247537685e-06,
      "loss": 3.119,
      "step": 218682
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.600217819213867,
      "learning_rate": 3.82248278517463e-06,
      "loss": 3.0355,
      "step": 218683
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9389333724975586,
      "learning_rate": 3.82183190066051e-06,
      "loss": 2.945,
      "step": 218684
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.596038818359375,
      "learning_rate": 3.821181071211477e-06,
      "loss": 2.7293,
      "step": 218685
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7391953468322754,
      "learning_rate": 3.820530296827695e-06,
      "loss": 2.9291,
      "step": 218686
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.026512622833252,
      "learning_rate": 3.819879577509266e-06,
      "loss": 3.0071,
      "step": 218687
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.670273780822754,
      "learning_rate": 3.819228913256322e-06,
      "loss": 3.1124,
      "step": 218688
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.14198899269104,
      "learning_rate": 3.818578304068964e-06,
      "loss": 2.9662,
      "step": 218689
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.837251663208008,
      "learning_rate": 3.817927749947325e-06,
      "loss": 3.0805,
      "step": 218690
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4239063262939453,
      "learning_rate": 3.817277250891504e-06,
      "loss": 2.782,
      "step": 218691
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.773632526397705,
      "learning_rate": 3.8166268069016344e-06,
      "loss": 2.9332,
      "step": 218692
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.789802074432373,
      "learning_rate": 3.815976417977884e-06,
      "loss": 2.8362,
      "step": 218693
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.217313051223755,
      "learning_rate": 3.815326084120318e-06,
      "loss": 2.9536,
      "step": 218694
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.82061767578125,
      "learning_rate": 3.8146758053290705e-06,
      "loss": 2.6901,
      "step": 218695
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.639327049255371,
      "learning_rate": 3.8140255816042743e-06,
      "loss": 3.0312,
      "step": 218696
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3968751430511475,
      "learning_rate": 3.813375412946029e-06,
      "loss": 2.9427,
      "step": 218697
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9219167232513428,
      "learning_rate": 3.812725299354469e-06,
      "loss": 2.9182,
      "step": 218698
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1293747425079346,
      "learning_rate": 3.8120752408297262e-06,
      "loss": 2.8571,
      "step": 218699
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1569557189941406,
      "learning_rate": 3.8114252373719013e-06,
      "loss": 2.8598,
      "step": 218700
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7934701442718506,
      "learning_rate": 3.8107752889811273e-06,
      "loss": 2.9352,
      "step": 218701
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8557217121124268,
      "learning_rate": 3.8101253956575373e-06,
      "loss": 2.5965,
      "step": 218702
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.76865816116333,
      "learning_rate": 3.8094755574011983e-06,
      "loss": 3.0117,
      "step": 218703
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.5130040645599365,
      "learning_rate": 3.808825774212276e-06,
      "loss": 2.842,
      "step": 218704
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9870893955230713,
      "learning_rate": 3.808176046090905e-06,
      "loss": 2.8562,
      "step": 218705
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0802998542785645,
      "learning_rate": 3.807526373037184e-06,
      "loss": 3.0506,
      "step": 218706
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.688602924346924,
      "learning_rate": 3.8068767550512137e-06,
      "loss": 2.9546,
      "step": 218707
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7563371658325195,
      "learning_rate": 3.80622719213316e-06,
      "loss": 2.9418,
      "step": 218708
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.217844009399414,
      "learning_rate": 3.80557768428309e-06,
      "loss": 2.888,
      "step": 218709
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9180989265441895,
      "learning_rate": 3.804928231501203e-06,
      "loss": 2.8576,
      "step": 218710
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.863661289215088,
      "learning_rate": 3.804278833787533e-06,
      "loss": 3.087,
      "step": 218711
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.429983615875244,
      "learning_rate": 3.803629491142246e-06,
      "loss": 3.2109,
      "step": 218712
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4427053928375244,
      "learning_rate": 3.802980203565442e-06,
      "loss": 3.0695,
      "step": 218713
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.5756406784057617,
      "learning_rate": 3.8023309710572546e-06,
      "loss": 2.7641,
      "step": 218714
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.9669134616851807,
      "learning_rate": 3.8016817936178503e-06,
      "loss": 2.8226,
      "step": 218715
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.101975202560425,
      "learning_rate": 3.8010326712472618e-06,
      "loss": 3.1523,
      "step": 218716
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7845895290374756,
      "learning_rate": 3.8003836039456894e-06,
      "loss": 2.9894,
      "step": 218717
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.8260393142700195,
      "learning_rate": 3.799734591713166e-06,
      "loss": 2.9483,
      "step": 218718
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.7036218643188477,
      "learning_rate": 3.799085634549892e-06,
      "loss": 3.0957,
      "step": 218719
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8539700508117676,
      "learning_rate": 3.7984367324559673e-06,
      "loss": 2.9951,
      "step": 218720
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2769720554351807,
      "learning_rate": 3.7977878854314916e-06,
      "loss": 3.0049,
      "step": 218721
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8401150703430176,
      "learning_rate": 3.7971390934765978e-06,
      "loss": 2.8862,
      "step": 218722
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8454086780548096,
      "learning_rate": 3.7964903565914197e-06,
      "loss": 2.9053,
      "step": 218723
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.8519208431243896,
      "learning_rate": 3.7958416747760566e-06,
      "loss": 2.8347,
      "step": 218724
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.5710244178771973,
      "learning_rate": 3.7951930480306426e-06,
      "loss": 2.4844,
      "step": 218725
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.5860559940338135,
      "learning_rate": 3.7945444763552768e-06,
      "loss": 2.7469,
      "step": 218726
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.789731025695801,
      "learning_rate": 3.793895959750093e-06,
      "loss": 2.9867,
      "step": 218727
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6928839683532715,
      "learning_rate": 3.793247498215224e-06,
      "loss": 2.9955,
      "step": 218728
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0478904247283936,
      "learning_rate": 3.7925990917508033e-06,
      "loss": 2.9568,
      "step": 218729
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.77790904045105,
      "learning_rate": 3.7919507403569303e-06,
      "loss": 3.0319,
      "step": 218730
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8992350101470947,
      "learning_rate": 3.791302444033706e-06,
      "loss": 3.0856,
      "step": 218731
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.855687141418457,
      "learning_rate": 3.7906542027812626e-06,
      "loss": 3.1191,
      "step": 218732
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.482142686843872,
      "learning_rate": 3.7900060165997336e-06,
      "loss": 2.9575,
      "step": 218733
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.699988842010498,
      "learning_rate": 3.7893578854892192e-06,
      "loss": 3.1332,
      "step": 218734
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6410014629364014,
      "learning_rate": 3.7887098094498856e-06,
      "loss": 2.8358,
      "step": 218735
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.112291097640991,
      "learning_rate": 3.788061788481833e-06,
      "loss": 2.9636,
      "step": 218736
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.606956958770752,
      "learning_rate": 3.787413822585161e-06,
      "loss": 2.8727,
      "step": 218737
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.164689064025879,
      "learning_rate": 3.7867659117599703e-06,
      "loss": 2.9216,
      "step": 218738
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.959900379180908,
      "learning_rate": 3.7861180560064263e-06,
      "loss": 3.0893,
      "step": 218739
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7723371982574463,
      "learning_rate": 3.78547025532463e-06,
      "loss": 2.8882,
      "step": 218740
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.407198905944824,
      "learning_rate": 3.7848225097147132e-06,
      "loss": 3.1047,
      "step": 218741
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.6350271701812744,
      "learning_rate": 3.7841748191768104e-06,
      "loss": 2.8574,
      "step": 218742
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0520501136779785,
      "learning_rate": 3.7835271837109883e-06,
      "loss": 3.0329,
      "step": 218743
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0309996604919434,
      "learning_rate": 3.7828796033174125e-06,
      "loss": 2.9118,
      "step": 218744
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1989052295684814,
      "learning_rate": 3.7822320779961834e-06,
      "loss": 2.9375,
      "step": 218745
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2999424934387207,
      "learning_rate": 3.7815846077474343e-06,
      "loss": 2.9974,
      "step": 218746
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6590147018432617,
      "learning_rate": 3.780937192571298e-06,
      "loss": 2.9484,
      "step": 218747
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3778445720672607,
      "learning_rate": 3.780289832467842e-06,
      "loss": 2.9332,
      "step": 218748
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7607975006103516,
      "learning_rate": 3.7796425274372655e-06,
      "loss": 2.8969,
      "step": 218749
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1444103717803955,
      "learning_rate": 3.7789952774796018e-06,
      "loss": 2.9109,
      "step": 218750
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8573760986328125,
      "learning_rate": 3.778348082595051e-06,
      "loss": 2.8993,
      "step": 218751
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.4614956378936768,
      "learning_rate": 3.7777009427836457e-06,
      "loss": 2.8152,
      "step": 218752
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1164968013763428,
      "learning_rate": 3.777053858045587e-06,
      "loss": 2.8023,
      "step": 218753
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0497305393218994,
      "learning_rate": 3.7764068283809736e-06,
      "loss": 2.7998,
      "step": 218754
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.700709342956543,
      "learning_rate": 3.775759853789939e-06,
      "loss": 2.8342,
      "step": 218755
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.5521812438964844,
      "learning_rate": 3.7751129342725504e-06,
      "loss": 2.7671,
      "step": 218756
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.3869757652282715,
      "learning_rate": 3.7744660698289743e-06,
      "loss": 3.342,
      "step": 218757
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.6102001667022705,
      "learning_rate": 3.7738192604592767e-06,
      "loss": 3.0069,
      "step": 218758
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9854705333709717,
      "learning_rate": 3.7731725061636575e-06,
      "loss": 3.0259,
      "step": 218759
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4458091259002686,
      "learning_rate": 3.772525806942217e-06,
      "loss": 2.8806,
      "step": 218760
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.818983793258667,
      "learning_rate": 3.7718791627950216e-06,
      "loss": 2.915,
      "step": 218761
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9222218990325928,
      "learning_rate": 3.7712325737222047e-06,
      "loss": 3.0645,
      "step": 218762
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8255226612091064,
      "learning_rate": 3.770586039723966e-06,
      "loss": 3.0544,
      "step": 218763
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.598158121109009,
      "learning_rate": 3.769939560800306e-06,
      "loss": 2.9385,
      "step": 218764
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9482128620147705,
      "learning_rate": 3.7692931369514566e-06,
      "loss": 2.8368,
      "step": 218765
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.6604764461517334,
      "learning_rate": 3.768646768177452e-06,
      "loss": 3.0873,
      "step": 218766
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.178884744644165,
      "learning_rate": 3.7680004544784926e-06,
      "loss": 2.6918,
      "step": 218767
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2583365440368652,
      "learning_rate": 3.7673541958546106e-06,
      "loss": 2.8066,
      "step": 218768
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.180842638015747,
      "learning_rate": 3.766707992305973e-06,
      "loss": 2.6923,
      "step": 218769
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.717874526977539,
      "learning_rate": 3.7660618438326796e-06,
      "loss": 2.8195,
      "step": 218770
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.471464157104492,
      "learning_rate": 3.765415750434897e-06,
      "loss": 2.7226,
      "step": 218771
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.5009217262268066,
      "learning_rate": 3.764769712112725e-06,
      "loss": 2.6825,
      "step": 218772
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4004881381988525,
      "learning_rate": 3.764123728866264e-06,
      "loss": 3.0041,
      "step": 218773
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.686534881591797,
      "learning_rate": 3.7634778006956465e-06,
      "loss": 2.8181,
      "step": 218774
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.755491256713867,
      "learning_rate": 3.7628319276009734e-06,
      "loss": 2.9713,
      "step": 218775
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.627119302749634,
      "learning_rate": 3.7621861095823767e-06,
      "loss": 2.8678,
      "step": 218776
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.894495964050293,
      "learning_rate": 3.761540346640024e-06,
      "loss": 3.0552,
      "step": 218777
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.095337390899658,
      "learning_rate": 3.7608946387739815e-06,
      "loss": 3.0952,
      "step": 218778
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.884305953979492,
      "learning_rate": 3.7602489859843486e-06,
      "loss": 3.0483,
      "step": 218779
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.339759111404419,
      "learning_rate": 3.7596033882712928e-06,
      "loss": 3.0192,
      "step": 218780
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.947044610977173,
      "learning_rate": 3.7589578456349132e-06,
      "loss": 3.0559,
      "step": 218781
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.804687976837158,
      "learning_rate": 3.758312358075344e-06,
      "loss": 2.9171,
      "step": 218782
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.587963819503784,
      "learning_rate": 3.757666925592717e-06,
      "loss": 2.9925,
      "step": 218783
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.488481044769287,
      "learning_rate": 3.7570215481871003e-06,
      "loss": 2.8604,
      "step": 218784
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1913888454437256,
      "learning_rate": 3.7563762258586595e-06,
      "loss": 2.905,
      "step": 218785
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.072279691696167,
      "learning_rate": 3.7557309586074947e-06,
      "loss": 3.078,
      "step": 218786
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.809113025665283,
      "learning_rate": 3.7550857464337393e-06,
      "loss": 3.1019,
      "step": 218787
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.355632781982422,
      "learning_rate": 3.75444058933746e-06,
      "loss": 2.7717,
      "step": 218788
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6378912925720215,
      "learning_rate": 3.753795487318889e-06,
      "loss": 2.8426,
      "step": 218789
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.212228536605835,
      "learning_rate": 3.753150440378061e-06,
      "loss": 3.2921,
      "step": 218790
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4370810985565186,
      "learning_rate": 3.752505448515075e-06,
      "loss": 3.0537,
      "step": 218791
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.086824655532837,
      "learning_rate": 3.7518605117301314e-06,
      "loss": 2.839,
      "step": 218792
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0201170444488525,
      "learning_rate": 3.7512156300232967e-06,
      "loss": 3.0491,
      "step": 218793
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4166100025177,
      "learning_rate": 3.7505708033947036e-06,
      "loss": 2.8659,
      "step": 218794
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.011507272720337,
      "learning_rate": 3.749926031844486e-06,
      "loss": 3.0104,
      "step": 218795
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1720659732818604,
      "learning_rate": 3.7492813153727097e-06,
      "loss": 3.0966,
      "step": 218796
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.101961374282837,
      "learning_rate": 3.7486366539795753e-06,
      "loss": 3.0049,
      "step": 218797
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3238446712493896,
      "learning_rate": 3.747992047665149e-06,
      "loss": 3.0186,
      "step": 218798
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.805873155593872,
      "learning_rate": 3.7473474964295647e-06,
      "loss": 2.9288,
      "step": 218799
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3082399368286133,
      "learning_rate": 3.7467030002729216e-06,
      "loss": 3.0018,
      "step": 218800
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.509263038635254,
      "learning_rate": 3.746058559195386e-06,
      "loss": 2.8524,
      "step": 218801
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.145102024078369,
      "learning_rate": 3.7454141731970256e-06,
      "loss": 2.9078,
      "step": 218802
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.570815086364746,
      "learning_rate": 3.7447698422780057e-06,
      "loss": 2.7468,
      "step": 218803
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.350131034851074,
      "learning_rate": 3.7441255664384273e-06,
      "loss": 2.9786,
      "step": 218804
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1857335567474365,
      "learning_rate": 3.7434813456783898e-06,
      "loss": 2.8847,
      "step": 218805
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0614511966705322,
      "learning_rate": 3.742837179998026e-06,
      "loss": 3.0668,
      "step": 218806
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9595043659210205,
      "learning_rate": 3.7421930693975033e-06,
      "loss": 2.7244,
      "step": 218807
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.7012486457824707,
      "learning_rate": 3.7415490138768543e-06,
      "loss": 3.0216,
      "step": 218808
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1866629123687744,
      "learning_rate": 3.740905013436246e-06,
      "loss": 2.9441,
      "step": 218809
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.726268291473389,
      "learning_rate": 3.740261068075812e-06,
      "loss": 2.8571,
      "step": 218810
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0546090602874756,
      "learning_rate": 3.7396171777956508e-06,
      "loss": 2.7383,
      "step": 218811
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7175276279449463,
      "learning_rate": 3.7389733425958968e-06,
      "loss": 2.9624,
      "step": 218812
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3275530338287354,
      "learning_rate": 3.7383295624766497e-06,
      "loss": 2.9288,
      "step": 218813
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.215602874755859,
      "learning_rate": 3.7376858374380427e-06,
      "loss": 3.0102,
      "step": 218814
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.6930806636810303,
      "learning_rate": 3.7370421674802085e-06,
      "loss": 2.8925,
      "step": 218815
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.957261800765991,
      "learning_rate": 3.7363985526032147e-06,
      "loss": 3.0551,
      "step": 218816
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.6275999546051025,
      "learning_rate": 3.735754992807227e-06,
      "loss": 2.8953,
      "step": 218817
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.726417064666748,
      "learning_rate": 3.7351114880923793e-06,
      "loss": 2.8589,
      "step": 218818
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.029538869857788,
      "learning_rate": 3.7344680384587377e-06,
      "loss": 3.025,
      "step": 218819
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.046489715576172,
      "learning_rate": 3.733824643906469e-06,
      "loss": 2.7587,
      "step": 218820
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8800175189971924,
      "learning_rate": 3.7331813044356727e-06,
      "loss": 2.765,
      "step": 218821
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.115079164505005,
      "learning_rate": 3.7325380200464827e-06,
      "loss": 2.978,
      "step": 218822
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2948713302612305,
      "learning_rate": 3.7318947907389983e-06,
      "loss": 2.9757,
      "step": 218823
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6586575508117676,
      "learning_rate": 3.7312516165133197e-06,
      "loss": 3.0128,
      "step": 218824
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.059786558151245,
      "learning_rate": 3.730608497369614e-06,
      "loss": 3.1249,
      "step": 218825
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4998161792755127,
      "learning_rate": 3.7299654333079796e-06,
      "loss": 3.1137,
      "step": 218826
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.234455585479736,
      "learning_rate": 3.7293224243285514e-06,
      "loss": 3.0651,
      "step": 218827
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2848405838012695,
      "learning_rate": 3.7286794704314284e-06,
      "loss": 2.8332,
      "step": 218828
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7300009727478027,
      "learning_rate": 3.7280365716167106e-06,
      "loss": 2.9943,
      "step": 218829
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.522156000137329,
      "learning_rate": 3.727393727884598e-06,
      "loss": 2.8161,
      "step": 218830
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2449796199798584,
      "learning_rate": 3.7267509392350902e-06,
      "loss": 2.9815,
      "step": 218831
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1902072429656982,
      "learning_rate": 3.726108205668421e-06,
      "loss": 2.9477,
      "step": 218832
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.908906936645508,
      "learning_rate": 3.7254655271846567e-06,
      "loss": 2.9323,
      "step": 218833
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.5964066982269287,
      "learning_rate": 3.7248229037838973e-06,
      "loss": 2.7493,
      "step": 218834
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2265267372131348,
      "learning_rate": 3.7241803354662757e-06,
      "loss": 3.1358,
      "step": 218835
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7713515758514404,
      "learning_rate": 3.723537822231959e-06,
      "loss": 2.8819,
      "step": 218836
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9928770065307617,
      "learning_rate": 3.72289536408098e-06,
      "loss": 3.0951,
      "step": 218837
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.061657428741455,
      "learning_rate": 3.722252961013572e-06,
      "loss": 2.8966,
      "step": 218838
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8673598766326904,
      "learning_rate": 3.7216106130297685e-06,
      "loss": 2.99,
      "step": 218839
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7484991550445557,
      "learning_rate": 3.7209683201297027e-06,
      "loss": 2.8811,
      "step": 218840
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7296817302703857,
      "learning_rate": 3.720326082313474e-06,
      "loss": 2.7873,
      "step": 218841
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8589413166046143,
      "learning_rate": 3.7196838995812495e-06,
      "loss": 2.757,
      "step": 218842
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.219616413116455,
      "learning_rate": 3.719041771933129e-06,
      "loss": 2.8581,
      "step": 218843
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.097914695739746,
      "learning_rate": 3.7183996993692455e-06,
      "loss": 2.8483,
      "step": 218844
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1437878608703613,
      "learning_rate": 3.717757681889699e-06,
      "loss": 2.817,
      "step": 218845
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0208852291107178,
      "learning_rate": 3.717115719494623e-06,
      "loss": 2.6573,
      "step": 218846
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9394712448120117,
      "learning_rate": 3.716473812184084e-06,
      "loss": 3.1239,
      "step": 218847
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.874506950378418,
      "learning_rate": 3.715831959958282e-06,
      "loss": 3.0261,
      "step": 218848
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.148885726928711,
      "learning_rate": 3.715190162817283e-06,
      "loss": 2.988,
      "step": 218849
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.735819101333618,
      "learning_rate": 3.714548420761254e-06,
      "loss": 2.8266,
      "step": 218850
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0673346519470215,
      "learning_rate": 3.7139067337902617e-06,
      "loss": 3.0036,
      "step": 218851
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.037518262863159,
      "learning_rate": 3.713265101904439e-06,
      "loss": 2.9899,
      "step": 218852
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8046069145202637,
      "learning_rate": 3.7126235251039194e-06,
      "loss": 3.095,
      "step": 218853
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7617454528808594,
      "learning_rate": 3.7119820033888028e-06,
      "loss": 2.7616,
      "step": 218854
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.713709592819214,
      "learning_rate": 3.711340536759222e-06,
      "loss": 2.8478,
      "step": 218855
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.6427783966064453,
      "learning_rate": 3.710699125215344e-06,
      "loss": 2.8799,
      "step": 218856
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0147807598114014,
      "learning_rate": 3.710057768757202e-06,
      "loss": 2.8042,
      "step": 218857
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8987083435058594,
      "learning_rate": 3.7094164673849626e-06,
      "loss": 2.7831,
      "step": 218858
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.302743673324585,
      "learning_rate": 3.7087752210987253e-06,
      "loss": 3.1,
      "step": 218859
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.271141290664673,
      "learning_rate": 3.708134029898624e-06,
      "loss": 2.9585,
      "step": 218860
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9174389839172363,
      "learning_rate": 3.7074928937847583e-06,
      "loss": 2.7212,
      "step": 218861
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4562981128692627,
      "learning_rate": 3.706851812757294e-06,
      "loss": 3.0922,
      "step": 218862
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.876631021499634,
      "learning_rate": 3.7062107868162993e-06,
      "loss": 3.2369,
      "step": 218863
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.820314645767212,
      "learning_rate": 3.7055698159619398e-06,
      "loss": 2.826,
      "step": 218864
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.5559656620025635,
      "learning_rate": 3.7049289001942485e-06,
      "loss": 2.9129,
      "step": 218865
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.916294813156128,
      "learning_rate": 3.704288039513459e-06,
      "loss": 2.8693,
      "step": 218866
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9539248943328857,
      "learning_rate": 3.7036472339196043e-06,
      "loss": 2.7851,
      "step": 218867
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.961904287338257,
      "learning_rate": 3.7030064834128513e-06,
      "loss": 2.9088,
      "step": 218868
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7221903800964355,
      "learning_rate": 3.7023657879933e-06,
      "loss": 2.8447,
      "step": 218869
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.789788246154785,
      "learning_rate": 3.7017251476610833e-06,
      "loss": 2.8722,
      "step": 218870
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.9318060874938965,
      "learning_rate": 3.701084562416301e-06,
      "loss": 3.2963,
      "step": 218871
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3343284130096436,
      "learning_rate": 3.7004440322590534e-06,
      "loss": 2.8319,
      "step": 218872
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7260665893554688,
      "learning_rate": 3.699803557189507e-06,
      "loss": 2.7435,
      "step": 218873
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.035595417022705,
      "learning_rate": 3.699163137207761e-06,
      "loss": 2.8358,
      "step": 218874
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.982414960861206,
      "learning_rate": 3.6985227723139166e-06,
      "loss": 2.7658,
      "step": 218875
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.422783374786377,
      "learning_rate": 3.697882462508139e-06,
      "loss": 2.8246,
      "step": 218876
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.8874130249023438,
      "learning_rate": 3.6972422077905293e-06,
      "loss": 3.1267,
      "step": 218877
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.62994384765625,
      "learning_rate": 3.6966020081611535e-06,
      "loss": 2.7648,
      "step": 218878
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8565049171447754,
      "learning_rate": 3.695961863620178e-06,
      "loss": 2.8838,
      "step": 218879
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0979692935943604,
      "learning_rate": 3.6953217741677697e-06,
      "loss": 2.8126,
      "step": 218880
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9955010414123535,
      "learning_rate": 3.6946817398039286e-06,
      "loss": 2.8024,
      "step": 218881
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.4964513778686523,
      "learning_rate": 3.6940417605288543e-06,
      "loss": 2.9133,
      "step": 218882
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.697604179382324,
      "learning_rate": 3.69340183634268e-06,
      "loss": 2.7062,
      "step": 218883
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.597630739212036,
      "learning_rate": 3.692761967245472e-06,
      "loss": 2.8601,
      "step": 218884
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8761024475097656,
      "learning_rate": 3.6921221532373644e-06,
      "loss": 3.0296,
      "step": 218885
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0839414596557617,
      "learning_rate": 3.6914823943185234e-06,
      "loss": 2.9322,
      "step": 218886
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9367194175720215,
      "learning_rate": 3.690842690488982e-06,
      "loss": 2.8558,
      "step": 218887
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8314852714538574,
      "learning_rate": 3.6902030417489404e-06,
      "loss": 2.9282,
      "step": 218888
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.348020076751709,
      "learning_rate": 3.689563448098465e-06,
      "loss": 2.9886,
      "step": 218889
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2885591983795166,
      "learning_rate": 3.6889239095376886e-06,
      "loss": 2.8209,
      "step": 218890
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4588403701782227,
      "learning_rate": 3.6882844260667788e-06,
      "loss": 2.6579,
      "step": 218891
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1404120922088623,
      "learning_rate": 3.687644997685768e-06,
      "loss": 2.9805,
      "step": 218892
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.319519281387329,
      "learning_rate": 3.6870056243948233e-06,
      "loss": 2.7335,
      "step": 218893
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.056959390640259,
      "learning_rate": 3.686366306194044e-06,
      "loss": 3.0079,
      "step": 218894
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.572456121444702,
      "learning_rate": 3.685727043083597e-06,
      "loss": 2.8938,
      "step": 218895
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.9721362590789795,
      "learning_rate": 3.685087835063549e-06,
      "loss": 2.7811,
      "step": 218896
    },
    {
      "epoch": 2.85,
      "grad_norm": 5.678075313568115,
      "learning_rate": 3.6844486821340337e-06,
      "loss": 3.0012,
      "step": 218897
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7003986835479736,
      "learning_rate": 3.6838095842951497e-06,
      "loss": 2.7297,
      "step": 218898
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.7213377952575684,
      "learning_rate": 3.6831705415470646e-06,
      "loss": 2.579,
      "step": 218899
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.831313371658325,
      "learning_rate": 3.682531553889878e-06,
      "loss": 2.9187,
      "step": 218900
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2904889583587646,
      "learning_rate": 3.6818926213237234e-06,
      "loss": 3.1253,
      "step": 218901
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.150115966796875,
      "learning_rate": 3.6812537438486332e-06,
      "loss": 2.852,
      "step": 218902
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8896408081054688,
      "learning_rate": 3.6806149214648417e-06,
      "loss": 2.9416,
      "step": 218903
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1860642433166504,
      "learning_rate": 3.679976154172415e-06,
      "loss": 2.9383,
      "step": 218904
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.134125232696533,
      "learning_rate": 3.6793374419714526e-06,
      "loss": 2.9832,
      "step": 218905
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7617502212524414,
      "learning_rate": 3.678698784862122e-06,
      "loss": 3.0227,
      "step": 218906
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.590514898300171,
      "learning_rate": 3.678060182844489e-06,
      "loss": 2.9064,
      "step": 218907
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.785106658935547,
      "learning_rate": 3.6774216359187203e-06,
      "loss": 2.917,
      "step": 218908
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3704848289489746,
      "learning_rate": 3.676783144084883e-06,
      "loss": 2.9175,
      "step": 218909
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2589111328125,
      "learning_rate": 3.6761447073431427e-06,
      "loss": 2.7032,
      "step": 218910
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.402677297592163,
      "learning_rate": 3.6755063256936003e-06,
      "loss": 2.9679,
      "step": 218911
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.251434803009033,
      "learning_rate": 3.674867999136388e-06,
      "loss": 2.9881,
      "step": 218912
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.804152250289917,
      "learning_rate": 3.674229727671607e-06,
      "loss": 2.8792,
      "step": 218913
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.811335563659668,
      "learning_rate": 3.6735915112993564e-06,
      "loss": 2.9903,
      "step": 218914
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.384012222290039,
      "learning_rate": 3.6729533500198026e-06,
      "loss": 2.9299,
      "step": 218915
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.845710515975952,
      "learning_rate": 3.6723152438330128e-06,
      "loss": 3.0374,
      "step": 218916
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.433448314666748,
      "learning_rate": 3.671677192739153e-06,
      "loss": 2.8318,
      "step": 218917
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0400304794311523,
      "learning_rate": 3.6710391967383234e-06,
      "loss": 2.9815,
      "step": 218918
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.5818588733673096,
      "learning_rate": 3.6704012558306573e-06,
      "loss": 2.8755,
      "step": 218919
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8048899173736572,
      "learning_rate": 3.669763370016221e-06,
      "loss": 2.9164,
      "step": 218920
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1290040016174316,
      "learning_rate": 3.6691255392951814e-06,
      "loss": 2.7773,
      "step": 218921
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.818279266357422,
      "learning_rate": 3.668487763667638e-06,
      "loss": 3.2289,
      "step": 218922
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6790318489074707,
      "learning_rate": 3.6678500431337577e-06,
      "loss": 2.9043,
      "step": 218923
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.8995237350463867,
      "learning_rate": 3.6672123776935736e-06,
      "loss": 2.8656,
      "step": 218924
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9249379634857178,
      "learning_rate": 3.6665747673472854e-06,
      "loss": 2.7851,
      "step": 218925
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0378427505493164,
      "learning_rate": 3.665937212094927e-06,
      "loss": 2.9563,
      "step": 218926
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.605933904647827,
      "learning_rate": 3.665299711936698e-06,
      "loss": 2.8056,
      "step": 218927
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.246037244796753,
      "learning_rate": 3.6646622668726645e-06,
      "loss": 2.9543,
      "step": 218928
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7910170555114746,
      "learning_rate": 3.6640248769029935e-06,
      "loss": 2.9684,
      "step": 218929
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7772669792175293,
      "learning_rate": 3.663387542027718e-06,
      "loss": 2.9732,
      "step": 218930
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7324094772338867,
      "learning_rate": 3.662750262247105e-06,
      "loss": 2.9834,
      "step": 218931
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.998778820037842,
      "learning_rate": 3.6621130375610876e-06,
      "loss": 2.8419,
      "step": 218932
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8811497688293457,
      "learning_rate": 3.6614758679699317e-06,
      "loss": 2.7889,
      "step": 218933
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.589357376098633,
      "learning_rate": 3.6608387534736714e-06,
      "loss": 2.6643,
      "step": 218934
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.518651008605957,
      "learning_rate": 3.660201694072473e-06,
      "loss": 2.7089,
      "step": 218935
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7501702308654785,
      "learning_rate": 3.6595646897664367e-06,
      "loss": 2.9674,
      "step": 218936
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.79634428024292,
      "learning_rate": 3.6589277405556948e-06,
      "loss": 2.9253,
      "step": 218937
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.007436513900757,
      "learning_rate": 3.658290846440315e-06,
      "loss": 2.6571,
      "step": 218938
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.785236120223999,
      "learning_rate": 3.6576540074204962e-06,
      "loss": 2.7522,
      "step": 218939
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.5425589084625244,
      "learning_rate": 3.6570172234962726e-06,
      "loss": 2.9633,
      "step": 218940
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7933669090270996,
      "learning_rate": 3.65638049466781e-06,
      "loss": 2.9725,
      "step": 218941
    },
    {
      "epoch": 2.85,
      "grad_norm": 5.0709123611450195,
      "learning_rate": 3.655743820935242e-06,
      "loss": 2.876,
      "step": 218942
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6352057456970215,
      "learning_rate": 3.655107202298668e-06,
      "loss": 2.9971,
      "step": 218943
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.054966688156128,
      "learning_rate": 3.654470638758189e-06,
      "loss": 2.8062,
      "step": 218944
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.235506057739258,
      "learning_rate": 3.6538341303139375e-06,
      "loss": 2.9955,
      "step": 218945
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8964431285858154,
      "learning_rate": 3.653197676966013e-06,
      "loss": 2.786,
      "step": 218946
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.039241313934326,
      "learning_rate": 3.652561278714583e-06,
      "loss": 3.0983,
      "step": 218947
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.638561487197876,
      "learning_rate": 3.6519249355597135e-06,
      "loss": 3.0684,
      "step": 218948
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.905512809753418,
      "learning_rate": 3.651288647501571e-06,
      "loss": 2.9629,
      "step": 218949
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8875527381896973,
      "learning_rate": 3.650652414540223e-06,
      "loss": 3.0076,
      "step": 218950
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.4165587425231934,
      "learning_rate": 3.6500162366758347e-06,
      "loss": 2.8905,
      "step": 218951
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.168621301651001,
      "learning_rate": 3.6493801139084732e-06,
      "loss": 2.7629,
      "step": 218952
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.270627975463867,
      "learning_rate": 3.6487440462383388e-06,
      "loss": 2.7905,
      "step": 218953
    },
    {
      "epoch": 2.85,
      "grad_norm": 6.9374566078186035,
      "learning_rate": 3.6481080336654313e-06,
      "loss": 2.795,
      "step": 218954
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.722018003463745,
      "learning_rate": 3.6474720761899833e-06,
      "loss": 2.8505,
      "step": 218955
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.588064432144165,
      "learning_rate": 3.646836173812062e-06,
      "loss": 2.846,
      "step": 218956
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2231173515319824,
      "learning_rate": 3.6462003265317673e-06,
      "loss": 2.7978,
      "step": 218957
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.576231002807617,
      "learning_rate": 3.6455645343492323e-06,
      "loss": 3.15,
      "step": 218958
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.951169490814209,
      "learning_rate": 3.64492879726459e-06,
      "loss": 2.9264,
      "step": 218959
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8572840690612793,
      "learning_rate": 3.644293115277941e-06,
      "loss": 2.8635,
      "step": 218960
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.768695116043091,
      "learning_rate": 3.6436574883894178e-06,
      "loss": 2.9277,
      "step": 218961
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.652170419692993,
      "learning_rate": 3.643021916599154e-06,
      "loss": 2.9363,
      "step": 218962
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.917421579360962,
      "learning_rate": 3.642386399907249e-06,
      "loss": 2.8957,
      "step": 218963
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.499692678451538,
      "learning_rate": 3.64175093831377e-06,
      "loss": 3.001,
      "step": 218964
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.8518333435058594,
      "learning_rate": 3.64111553181895e-06,
      "loss": 2.877,
      "step": 218965
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.500410795211792,
      "learning_rate": 3.640480180422789e-06,
      "loss": 2.8457,
      "step": 218966
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.441272020339966,
      "learning_rate": 3.6398448841254537e-06,
      "loss": 2.9683,
      "step": 218967
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9209272861480713,
      "learning_rate": 3.63920964292711e-06,
      "loss": 2.8034,
      "step": 218968
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.128758668899536,
      "learning_rate": 3.638574456827792e-06,
      "loss": 2.9111,
      "step": 218969
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.814889907836914,
      "learning_rate": 3.6379393258276657e-06,
      "loss": 2.8195,
      "step": 218970
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.788966655731201,
      "learning_rate": 3.6373042499268645e-06,
      "loss": 3.1026,
      "step": 218971
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.9219088554382324,
      "learning_rate": 3.6366692291254552e-06,
      "loss": 2.975,
      "step": 218972
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.007097005844116,
      "learning_rate": 3.636034263423604e-06,
      "loss": 3.0729,
      "step": 218973
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.905745267868042,
      "learning_rate": 3.6353993528213777e-06,
      "loss": 2.817,
      "step": 218974
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.030949354171753,
      "learning_rate": 3.634764497318943e-06,
      "loss": 2.772,
      "step": 218975
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.6737260818481445,
      "learning_rate": 3.6341296969163992e-06,
      "loss": 2.774,
      "step": 218976
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.927734375,
      "learning_rate": 3.63349495161388e-06,
      "loss": 2.9647,
      "step": 218977
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4158871173858643,
      "learning_rate": 3.6328602614114523e-06,
      "loss": 2.8625,
      "step": 218978
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.014338493347168,
      "learning_rate": 3.6322256263093154e-06,
      "loss": 2.9496,
      "step": 218979
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.817553997039795,
      "learning_rate": 3.631591046307536e-06,
      "loss": 2.9337,
      "step": 218980
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.043078660964966,
      "learning_rate": 3.6309565214062143e-06,
      "loss": 3.0527,
      "step": 218981
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8039793968200684,
      "learning_rate": 3.630322051605483e-06,
      "loss": 2.8761,
      "step": 218982
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2742085456848145,
      "learning_rate": 3.6296876369055093e-06,
      "loss": 2.9666,
      "step": 218983
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9522268772125244,
      "learning_rate": 3.6290532773063264e-06,
      "loss": 3.0809,
      "step": 218984
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8694629669189453,
      "learning_rate": 3.6284189728081334e-06,
      "loss": 3.1256,
      "step": 218985
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.164968252182007,
      "learning_rate": 3.627784723410998e-06,
      "loss": 2.9915,
      "step": 218986
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8257858753204346,
      "learning_rate": 3.6271505291150527e-06,
      "loss": 2.9502,
      "step": 218987
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0145318508148193,
      "learning_rate": 3.6265163899203974e-06,
      "loss": 3.2608,
      "step": 218988
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.884694814682007,
      "learning_rate": 3.6258823058271657e-06,
      "loss": 2.6842,
      "step": 218989
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.5932459831237793,
      "learning_rate": 3.625248276835524e-06,
      "loss": 2.9486,
      "step": 218990
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6255292892456055,
      "learning_rate": 3.6246143029455056e-06,
      "loss": 2.9825,
      "step": 218991
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8555195331573486,
      "learning_rate": 3.6239803841572766e-06,
      "loss": 2.7363,
      "step": 218992
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4237310886383057,
      "learning_rate": 3.623346520470938e-06,
      "loss": 3.0321,
      "step": 218993
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.394318103790283,
      "learning_rate": 3.6227127118866217e-06,
      "loss": 2.897,
      "step": 218994
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.002418041229248,
      "learning_rate": 3.6220789584044287e-06,
      "loss": 3.0171,
      "step": 218995
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7918591499328613,
      "learning_rate": 3.6214452600244916e-06,
      "loss": 3.0649,
      "step": 218996
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.123649835586548,
      "learning_rate": 3.62081161674691e-06,
      "loss": 2.9089,
      "step": 218997
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3428597450256348,
      "learning_rate": 3.620178028571852e-06,
      "loss": 2.9159,
      "step": 218998
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8203930854797363,
      "learning_rate": 3.619544495499349e-06,
      "loss": 2.9727,
      "step": 218999
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.5486197471618652,
      "learning_rate": 3.618911017529602e-06,
      "loss": 2.9606,
      "step": 219000
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2825119495391846,
      "learning_rate": 3.6182775946626775e-06,
      "loss": 2.8582,
      "step": 219001
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8419318199157715,
      "learning_rate": 3.6176442268987083e-06,
      "loss": 2.621,
      "step": 219002
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1900885105133057,
      "learning_rate": 3.6170109142377946e-06,
      "loss": 2.862,
      "step": 219003
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.773768424987793,
      "learning_rate": 3.616377656680136e-06,
      "loss": 2.9075,
      "step": 219004
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8870205879211426,
      "learning_rate": 3.615744454225733e-06,
      "loss": 2.9143,
      "step": 219005
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9580485820770264,
      "learning_rate": 3.615111306874785e-06,
      "loss": 2.7583,
      "step": 219006
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.542088747024536,
      "learning_rate": 3.6144782146273586e-06,
      "loss": 2.7818,
      "step": 219007
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6129283905029297,
      "learning_rate": 3.613845177483621e-06,
      "loss": 3.1372,
      "step": 219008
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0758485794067383,
      "learning_rate": 3.6132121954436376e-06,
      "loss": 3.0767,
      "step": 219009
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.782797336578369,
      "learning_rate": 3.612579268507576e-06,
      "loss": 3.0809,
      "step": 219010
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8612842559814453,
      "learning_rate": 3.6119463966755023e-06,
      "loss": 2.8572,
      "step": 219011
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.5995025634765625,
      "learning_rate": 3.6113135799475832e-06,
      "loss": 2.9079,
      "step": 219012
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1944451332092285,
      "learning_rate": 3.610680818323919e-06,
      "loss": 2.7535,
      "step": 219013
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6036365032196045,
      "learning_rate": 3.6100481118046088e-06,
      "loss": 2.7346,
      "step": 219014
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3275747299194336,
      "learning_rate": 3.6094154603897862e-06,
      "loss": 2.9255,
      "step": 219015
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.033552885055542,
      "learning_rate": 3.608782864079618e-06,
      "loss": 3.132,
      "step": 219016
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.113917350769043,
      "learning_rate": 3.6081503228741037e-06,
      "loss": 3.0988,
      "step": 219017
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.558687686920166,
      "learning_rate": 3.607517836773477e-06,
      "loss": 2.8947,
      "step": 219018
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.6068930625915527,
      "learning_rate": 3.6068854057777707e-06,
      "loss": 2.8656,
      "step": 219019
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.739171266555786,
      "learning_rate": 3.6062530298871517e-06,
      "loss": 2.9472,
      "step": 219020
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3147342205047607,
      "learning_rate": 3.6056207091017197e-06,
      "loss": 2.7069,
      "step": 219021
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2575197219848633,
      "learning_rate": 3.6049884434216415e-06,
      "loss": 2.8519,
      "step": 219022
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.442094087600708,
      "learning_rate": 3.60435623284695e-06,
      "loss": 2.9197,
      "step": 219023
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0105526447296143,
      "learning_rate": 3.603724077377812e-06,
      "loss": 3.0896,
      "step": 219024
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.891756772994995,
      "learning_rate": 3.6030919770143274e-06,
      "loss": 2.7964,
      "step": 219025
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2826426029205322,
      "learning_rate": 3.6024599317566294e-06,
      "loss": 2.6797,
      "step": 219026
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.052730083465576,
      "learning_rate": 3.601827941604818e-06,
      "loss": 2.8826,
      "step": 219027
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.088449001312256,
      "learning_rate": 3.6011960065590596e-06,
      "loss": 2.9368,
      "step": 219028
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0547351837158203,
      "learning_rate": 3.6005641266193873e-06,
      "loss": 2.9553,
      "step": 219029
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1418168544769287,
      "learning_rate": 3.5999323017860015e-06,
      "loss": 2.7011,
      "step": 219030
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.859524965286255,
      "learning_rate": 3.599300532058935e-06,
      "loss": 3.1645,
      "step": 219031
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.367608070373535,
      "learning_rate": 3.598668817438388e-06,
      "loss": 2.608,
      "step": 219032
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.074260711669922,
      "learning_rate": 3.5980371579244603e-06,
      "loss": 2.8581,
      "step": 219033
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7586278915405273,
      "learning_rate": 3.597405553517252e-06,
      "loss": 2.6354,
      "step": 219034
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9784655570983887,
      "learning_rate": 3.596774004216829e-06,
      "loss": 2.8532,
      "step": 219035
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3926808834075928,
      "learning_rate": 3.5961425100234256e-06,
      "loss": 2.9239,
      "step": 219036
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.640434741973877,
      "learning_rate": 3.5955110709370406e-06,
      "loss": 2.7866,
      "step": 219037
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1924211978912354,
      "learning_rate": 3.5948796869578743e-06,
      "loss": 2.8791,
      "step": 219038
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.800248622894287,
      "learning_rate": 3.594248358085994e-06,
      "loss": 2.8891,
      "step": 219039
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2733447551727295,
      "learning_rate": 3.593617084321565e-06,
      "loss": 3.0468,
      "step": 219040
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.103595018386841,
      "learning_rate": 3.592985865664655e-06,
      "loss": 3.0205,
      "step": 219041
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2473058700561523,
      "learning_rate": 3.5923547021154297e-06,
      "loss": 2.7708,
      "step": 219042
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.122128009796143,
      "learning_rate": 3.5917235936739564e-06,
      "loss": 2.7015,
      "step": 219043
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2182629108428955,
      "learning_rate": 3.5910925403404012e-06,
      "loss": 3.0834,
      "step": 219044
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.012618064880371,
      "learning_rate": 3.5904615421147973e-06,
      "loss": 2.8407,
      "step": 219045
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1651365756988525,
      "learning_rate": 3.589830598997412e-06,
      "loss": 2.9313,
      "step": 219046
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.830181837081909,
      "learning_rate": 3.5891997109882108e-06,
      "loss": 3.2794,
      "step": 219047
    },
    {
      "epoch": 2.85,
      "grad_norm": 5.097375869750977,
      "learning_rate": 3.5885688780873945e-06,
      "loss": 2.7812,
      "step": 219048
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.130106210708618,
      "learning_rate": 3.5879381002950293e-06,
      "loss": 3.0079,
      "step": 219049
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.9973561763763428,
      "learning_rate": 3.5873073776113146e-06,
      "loss": 2.9706,
      "step": 219050
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.04814338684082,
      "learning_rate": 3.5866767100362516e-06,
      "loss": 2.9107,
      "step": 219051
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0778207778930664,
      "learning_rate": 3.586046097570072e-06,
      "loss": 3.1857,
      "step": 219052
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3589515686035156,
      "learning_rate": 3.5854155402128103e-06,
      "loss": 2.8965,
      "step": 219053
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.5686981678009033,
      "learning_rate": 3.5847850379646327e-06,
      "loss": 2.6865,
      "step": 219054
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.4110565185546875,
      "learning_rate": 3.5841545908256387e-06,
      "loss": 2.88,
      "step": 219055
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4451773166656494,
      "learning_rate": 3.5835241987959285e-06,
      "loss": 3.0014,
      "step": 219056
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.553954839706421,
      "learning_rate": 3.5828938618756352e-06,
      "loss": 2.5489,
      "step": 219057
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.034208297729492,
      "learning_rate": 3.582263580064926e-06,
      "loss": 2.8829,
      "step": 219058
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1103005409240723,
      "learning_rate": 3.581633353363833e-06,
      "loss": 2.7483,
      "step": 219059
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0679030418395996,
      "learning_rate": 3.5810031817724903e-06,
      "loss": 3.0416,
      "step": 219060
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.344881534576416,
      "learning_rate": 3.5803730652910643e-06,
      "loss": 2.7928,
      "step": 219061
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.5336813926696777,
      "learning_rate": 3.579743003919655e-06,
      "loss": 3.0136,
      "step": 219062
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3273236751556396,
      "learning_rate": 3.5791129976583286e-06,
      "loss": 2.7966,
      "step": 219063
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.326160430908203,
      "learning_rate": 3.5784830465072523e-06,
      "loss": 3.2037,
      "step": 219064
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.842665672302246,
      "learning_rate": 3.577853150466525e-06,
      "loss": 2.9314,
      "step": 219065
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1546518802642822,
      "learning_rate": 3.577223309536281e-06,
      "loss": 3.0448,
      "step": 219066
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9036004543304443,
      "learning_rate": 3.57659352371662e-06,
      "loss": 2.7928,
      "step": 219067
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.633397340774536,
      "learning_rate": 3.575963793007708e-06,
      "loss": 3.0574,
      "step": 219068
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.239088773727417,
      "learning_rate": 3.575334117409545e-06,
      "loss": 2.7367,
      "step": 219069
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.585050344467163,
      "learning_rate": 3.5747044969223647e-06,
      "loss": 2.8909,
      "step": 219070
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0827274322509766,
      "learning_rate": 3.574074931546267e-06,
      "loss": 2.4918,
      "step": 219071
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2121176719665527,
      "learning_rate": 3.5734454212812847e-06,
      "loss": 2.8289,
      "step": 219072
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.158449649810791,
      "learning_rate": 3.5728159661276513e-06,
      "loss": 2.9976,
      "step": 219073
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.50780987739563,
      "learning_rate": 3.5721865660854e-06,
      "loss": 2.6635,
      "step": 219074
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7488136291503906,
      "learning_rate": 3.571557221154664e-06,
      "loss": 2.7451,
      "step": 219075
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.786565065383911,
      "learning_rate": 3.57092793133561e-06,
      "loss": 3.093,
      "step": 219076
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6938560009002686,
      "learning_rate": 3.570298696628271e-06,
      "loss": 2.7612,
      "step": 219077
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3261289596557617,
      "learning_rate": 3.569669517032814e-06,
      "loss": 2.8925,
      "step": 219078
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9778616428375244,
      "learning_rate": 3.5690403925493716e-06,
      "loss": 2.966,
      "step": 219079
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.96881103515625,
      "learning_rate": 3.5684113231780443e-06,
      "loss": 2.6894,
      "step": 219080
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3288915157318115,
      "learning_rate": 3.567782308918932e-06,
      "loss": 2.7299,
      "step": 219081
    },
    {
      "epoch": 2.85,
      "grad_norm": 5.129125595092773,
      "learning_rate": 3.567153349772134e-06,
      "loss": 3.012,
      "step": 219082
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.3374807834625244,
      "learning_rate": 3.566524445737884e-06,
      "loss": 2.801,
      "step": 219083
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2002174854278564,
      "learning_rate": 3.5658955968161486e-06,
      "loss": 3.0212,
      "step": 219084
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.008665084838867,
      "learning_rate": 3.5652668030070943e-06,
      "loss": 2.9703,
      "step": 219085
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.363987922668457,
      "learning_rate": 3.5646380643108875e-06,
      "loss": 2.9755,
      "step": 219086
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.413681983947754,
      "learning_rate": 3.5640093807275948e-06,
      "loss": 2.9866,
      "step": 219087
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2453131675720215,
      "learning_rate": 3.56338075225735e-06,
      "loss": 2.9081,
      "step": 219088
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2453606128692627,
      "learning_rate": 3.5627521789002854e-06,
      "loss": 2.9414,
      "step": 219089
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9705803394317627,
      "learning_rate": 3.5621236606564683e-06,
      "loss": 2.9792,
      "step": 219090
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.965019702911377,
      "learning_rate": 3.5614951975260985e-06,
      "loss": 2.9195,
      "step": 219091
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4548959732055664,
      "learning_rate": 3.560866789509176e-06,
      "loss": 3.0439,
      "step": 219092
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8325839042663574,
      "learning_rate": 3.5602384366059334e-06,
      "loss": 2.7818,
      "step": 219093
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.392062187194824,
      "learning_rate": 3.5596101388164046e-06,
      "loss": 3.0441,
      "step": 219094
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0568132400512695,
      "learning_rate": 3.558981896140789e-06,
      "loss": 2.8027,
      "step": 219095
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.049830198287964,
      "learning_rate": 3.55835370857912e-06,
      "loss": 2.8348,
      "step": 219096
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.75614333152771,
      "learning_rate": 3.557725576131565e-06,
      "loss": 2.9163,
      "step": 219097
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.300577163696289,
      "learning_rate": 3.5570974987981895e-06,
      "loss": 2.9789,
      "step": 219098
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.505239248275757,
      "learning_rate": 3.5564694765791935e-06,
      "loss": 2.9648,
      "step": 219099
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.83117938041687,
      "learning_rate": 3.555841509474611e-06,
      "loss": 2.9869,
      "step": 219100
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2545628547668457,
      "learning_rate": 3.5552135974846074e-06,
      "loss": 2.5621,
      "step": 219101
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.075247287750244,
      "learning_rate": 3.5545857406092837e-06,
      "loss": 2.7754,
      "step": 219102
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.781735420227051,
      "learning_rate": 3.5539579388487726e-06,
      "loss": 3.0475,
      "step": 219103
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0804574489593506,
      "learning_rate": 3.553330192203141e-06,
      "loss": 2.8432,
      "step": 219104
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.817155599594116,
      "learning_rate": 3.5527025006725552e-06,
      "loss": 3.0158,
      "step": 219105
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0897538661956787,
      "learning_rate": 3.552074864257115e-06,
      "loss": 2.5825,
      "step": 219106
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3633334636688232,
      "learning_rate": 3.5514472829569873e-06,
      "loss": 3.009,
      "step": 219107
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.094508409500122,
      "learning_rate": 3.550819756772172e-06,
      "loss": 3.1774,
      "step": 219108
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.465493679046631,
      "learning_rate": 3.550192285702902e-06,
      "loss": 3.0342,
      "step": 219109
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6624937057495117,
      "learning_rate": 3.5495648697492106e-06,
      "loss": 2.8766,
      "step": 219110
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7259254455566406,
      "learning_rate": 3.548937508911265e-06,
      "loss": 2.842,
      "step": 219111
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.58402943611145,
      "learning_rate": 3.5483102031891643e-06,
      "loss": 3.1163,
      "step": 219112
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3976385593414307,
      "learning_rate": 3.5476829525830754e-06,
      "loss": 2.9584,
      "step": 219113
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0615596771240234,
      "learning_rate": 3.5470557570930315e-06,
      "loss": 2.9176,
      "step": 219114
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.37829852104187,
      "learning_rate": 3.546428616719166e-06,
      "loss": 3.275,
      "step": 219115
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7522716522216797,
      "learning_rate": 3.545801531461645e-06,
      "loss": 2.7652,
      "step": 219116
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.384071111679077,
      "learning_rate": 3.5451745013205356e-06,
      "loss": 2.9963,
      "step": 219117
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4474925994873047,
      "learning_rate": 3.544547526295971e-06,
      "loss": 3.1025,
      "step": 219118
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0676441192626953,
      "learning_rate": 3.5439206063881178e-06,
      "loss": 2.9001,
      "step": 219119
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0749711990356445,
      "learning_rate": 3.543293741597009e-06,
      "loss": 2.8701,
      "step": 219120
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.0846428871154785,
      "learning_rate": 3.5426669319228106e-06,
      "loss": 2.7576,
      "step": 219121
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.916344165802002,
      "learning_rate": 3.5420401773655905e-06,
      "loss": 2.6457,
      "step": 219122
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.660757064819336,
      "learning_rate": 3.5414134779255476e-06,
      "loss": 2.7874,
      "step": 219123
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.684223175048828,
      "learning_rate": 3.5407868336027156e-06,
      "loss": 2.9957,
      "step": 219124
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.275637149810791,
      "learning_rate": 3.5401602443972942e-06,
      "loss": 2.9526,
      "step": 219125
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.206658124923706,
      "learning_rate": 3.5395337103093168e-06,
      "loss": 2.9758,
      "step": 219126
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.979128360748291,
      "learning_rate": 3.5389072313389497e-06,
      "loss": 2.9187,
      "step": 219127
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6669700145721436,
      "learning_rate": 3.53828080748626e-06,
      "loss": 2.8248,
      "step": 219128
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7143585681915283,
      "learning_rate": 3.537654438751447e-06,
      "loss": 3.0412,
      "step": 219129
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.88785457611084,
      "learning_rate": 3.5370281251345444e-06,
      "loss": 2.9532,
      "step": 219130
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9706263542175293,
      "learning_rate": 3.5364018666357185e-06,
      "loss": 3.0168,
      "step": 219131
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.88690185546875,
      "learning_rate": 3.535775663255069e-06,
      "loss": 2.8153,
      "step": 219132
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8583390712738037,
      "learning_rate": 3.53514951499273e-06,
      "loss": 2.9455,
      "step": 219133
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1426517963409424,
      "learning_rate": 3.5345234218488006e-06,
      "loss": 2.9922,
      "step": 219134
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.670358419418335,
      "learning_rate": 3.533897383823381e-06,
      "loss": 3.0921,
      "step": 219135
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.8829357624053955,
      "learning_rate": 3.533271400916604e-06,
      "loss": 2.9388,
      "step": 219136
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.6398041248321533,
      "learning_rate": 3.5326454731286367e-06,
      "loss": 2.933,
      "step": 219137
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.363115310668945,
      "learning_rate": 3.5320196004595126e-06,
      "loss": 3.0065,
      "step": 219138
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4347736835479736,
      "learning_rate": 3.5313937829093973e-06,
      "loss": 2.7834,
      "step": 219139
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.65195631980896,
      "learning_rate": 3.5307680204783582e-06,
      "loss": 2.815,
      "step": 219140
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.901876449584961,
      "learning_rate": 3.5301423131665618e-06,
      "loss": 2.8491,
      "step": 219141
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.04994797706604,
      "learning_rate": 3.5295166609741075e-06,
      "loss": 3.0614,
      "step": 219142
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.278707981109619,
      "learning_rate": 3.5288910639010958e-06,
      "loss": 2.782,
      "step": 219143
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.896851062774658,
      "learning_rate": 3.528265521947693e-06,
      "loss": 2.7633,
      "step": 219144
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.265068292617798,
      "learning_rate": 3.5276400351139657e-06,
      "loss": 3.0138,
      "step": 219145
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.634371757507324,
      "learning_rate": 3.527014603400047e-06,
      "loss": 2.8573,
      "step": 219146
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2641704082489014,
      "learning_rate": 3.52638922680607e-06,
      "loss": 2.9743,
      "step": 219147
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.995466947555542,
      "learning_rate": 3.5257639053321017e-06,
      "loss": 2.9905,
      "step": 219148
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0261433124542236,
      "learning_rate": 3.5251386389783088e-06,
      "loss": 2.8033,
      "step": 219149
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.695617198944092,
      "learning_rate": 3.5245134277447906e-06,
      "loss": 2.922,
      "step": 219150
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8701629638671875,
      "learning_rate": 3.5238882716316475e-06,
      "loss": 2.9647,
      "step": 219151
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.707064628601074,
      "learning_rate": 3.523263170639046e-06,
      "loss": 2.9778,
      "step": 219152
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.545536518096924,
      "learning_rate": 3.522638124767052e-06,
      "loss": 2.9496,
      "step": 219153
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8242886066436768,
      "learning_rate": 3.5220131340157666e-06,
      "loss": 3.0102,
      "step": 219154
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.3507442474365234,
      "learning_rate": 3.5213881983853556e-06,
      "loss": 2.8322,
      "step": 219155
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.279433012008667,
      "learning_rate": 3.5207633178759185e-06,
      "loss": 2.7875,
      "step": 219156
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0475916862487793,
      "learning_rate": 3.5201384924875562e-06,
      "loss": 2.9997,
      "step": 219157
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.927819013595581,
      "learning_rate": 3.5195137222204017e-06,
      "loss": 2.7324,
      "step": 219158
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6155049800872803,
      "learning_rate": 3.5188890070745545e-06,
      "loss": 3.0439,
      "step": 219159
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2578539848327637,
      "learning_rate": 3.518264347050148e-06,
      "loss": 3.0196,
      "step": 219160
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.979081630706787,
      "learning_rate": 3.5176397421473155e-06,
      "loss": 2.6859,
      "step": 219161
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0455801486968994,
      "learning_rate": 3.517015192366157e-06,
      "loss": 3.0348,
      "step": 219162
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.173274517059326,
      "learning_rate": 3.5163906977067392e-06,
      "loss": 2.7658,
      "step": 219163
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.6495509147644043,
      "learning_rate": 3.5157662581692614e-06,
      "loss": 3.1967,
      "step": 219164
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.479189157485962,
      "learning_rate": 3.5151418737537574e-06,
      "loss": 2.7779,
      "step": 219165
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7713067531585693,
      "learning_rate": 3.5145175444603934e-06,
      "loss": 3.0635,
      "step": 219166
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.97012996673584,
      "learning_rate": 3.513893270289303e-06,
      "loss": 2.7684,
      "step": 219167
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.5430636405944824,
      "learning_rate": 3.5132690512405857e-06,
      "loss": 3.0329,
      "step": 219168
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.432154655456543,
      "learning_rate": 3.512644887314309e-06,
      "loss": 2.6229,
      "step": 219169
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.94111967086792,
      "learning_rate": 3.512020778510638e-06,
      "loss": 3.2947,
      "step": 219170
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0020878314971924,
      "learning_rate": 3.5113967248296738e-06,
      "loss": 2.871,
      "step": 219171
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.6374945640563965,
      "learning_rate": 3.5107727262715823e-06,
      "loss": 3.0619,
      "step": 219172
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.809948444366455,
      "learning_rate": 3.5101487828363974e-06,
      "loss": 2.9641,
      "step": 219173
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.285311698913574,
      "learning_rate": 3.509524894524285e-06,
      "loss": 2.8338,
      "step": 219174
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8116531372070312,
      "learning_rate": 3.5089010613353453e-06,
      "loss": 2.8702,
      "step": 219175
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.668971300125122,
      "learning_rate": 3.508277283269678e-06,
      "loss": 2.9985,
      "step": 219176
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.917956590652466,
      "learning_rate": 3.5076535603274502e-06,
      "loss": 2.8423,
      "step": 219177
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8187499046325684,
      "learning_rate": 3.507029892508728e-06,
      "loss": 2.832,
      "step": 219178
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.7136268615722656,
      "learning_rate": 3.506406279813645e-06,
      "loss": 2.9189,
      "step": 219179
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.27329421043396,
      "learning_rate": 3.505782722242334e-06,
      "loss": 2.8678,
      "step": 219180
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.078566074371338,
      "learning_rate": 3.505159219794862e-06,
      "loss": 3.0948,
      "step": 219181
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.882248640060425,
      "learning_rate": 3.5045357724714283e-06,
      "loss": 2.9661,
      "step": 219182
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4844210147857666,
      "learning_rate": 3.5039123802720336e-06,
      "loss": 2.6394,
      "step": 219183
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1792149543762207,
      "learning_rate": 3.5032890431969107e-06,
      "loss": 2.9655,
      "step": 219184
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.930338144302368,
      "learning_rate": 3.502665761246093e-06,
      "loss": 3.1015,
      "step": 219185
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0463085174560547,
      "learning_rate": 3.5020425344197468e-06,
      "loss": 2.9195,
      "step": 219186
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.824489116668701,
      "learning_rate": 3.501419362717972e-06,
      "loss": 3.0193,
      "step": 219187
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9601399898529053,
      "learning_rate": 3.500796246140869e-06,
      "loss": 2.9303,
      "step": 219188
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.80342698097229,
      "learning_rate": 3.500173184688537e-06,
      "loss": 2.8282,
      "step": 219189
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.687417507171631,
      "learning_rate": 3.4995501783611768e-06,
      "loss": 2.7013,
      "step": 219190
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.661069869995117,
      "learning_rate": 3.4989272271587876e-06,
      "loss": 2.9215,
      "step": 219191
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.483827829360962,
      "learning_rate": 3.498304331081603e-06,
      "loss": 2.8993,
      "step": 219192
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1289422512054443,
      "learning_rate": 3.4976814901296223e-06,
      "loss": 2.8997,
      "step": 219193
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8528621196746826,
      "learning_rate": 3.4970587043030795e-06,
      "loss": 3.0412,
      "step": 219194
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.17922043800354,
      "learning_rate": 3.496435973601974e-06,
      "loss": 2.8218,
      "step": 219195
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7617440223693848,
      "learning_rate": 3.4958132980265064e-06,
      "loss": 2.7379,
      "step": 219196
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.710852861404419,
      "learning_rate": 3.4951906775767426e-06,
      "loss": 2.8735,
      "step": 219197
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9401211738586426,
      "learning_rate": 3.494568112252849e-06,
      "loss": 2.6263,
      "step": 219198
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7728209495544434,
      "learning_rate": 3.493945602054926e-06,
      "loss": 2.8818,
      "step": 219199
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.562856674194336,
      "learning_rate": 3.49332314698304e-06,
      "loss": 2.7702,
      "step": 219200
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.00803804397583,
      "learning_rate": 3.492700747037358e-06,
      "loss": 2.8695,
      "step": 219201
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2280023097991943,
      "learning_rate": 3.4920784022179795e-06,
      "loss": 2.7792,
      "step": 219202
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7746644020080566,
      "learning_rate": 3.491456112525004e-06,
      "loss": 3.0071,
      "step": 219203
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8412091732025146,
      "learning_rate": 3.4908338779585652e-06,
      "loss": 2.8525,
      "step": 219204
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.926083564758301,
      "learning_rate": 3.4902116985187966e-06,
      "loss": 2.8924,
      "step": 219205
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0452704429626465,
      "learning_rate": 3.4895895742057978e-06,
      "loss": 2.8852,
      "step": 219206
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.1405222415924072,
      "learning_rate": 3.4889675050196687e-06,
      "loss": 2.9817,
      "step": 219207
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8230984210968018,
      "learning_rate": 3.4883454909605423e-06,
      "loss": 2.9812,
      "step": 219208
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8730850219726562,
      "learning_rate": 3.487723532028519e-06,
      "loss": 2.8591,
      "step": 219209
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.9399876594543457,
      "learning_rate": 3.487101628223732e-06,
      "loss": 2.8804,
      "step": 219210
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8704702854156494,
      "learning_rate": 3.486479779546314e-06,
      "loss": 2.9185,
      "step": 219211
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.072999954223633,
      "learning_rate": 3.485857985996332e-06,
      "loss": 2.897,
      "step": 219212
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6269359588623047,
      "learning_rate": 3.4852362475739194e-06,
      "loss": 2.6758,
      "step": 219213
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.961625337600708,
      "learning_rate": 3.484614564279209e-06,
      "loss": 2.8772,
      "step": 219214
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.9796650409698486,
      "learning_rate": 3.4839929361122677e-06,
      "loss": 2.8913,
      "step": 219215
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6069393157958984,
      "learning_rate": 3.483371363073295e-06,
      "loss": 3.0156,
      "step": 219216
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.746946334838867,
      "learning_rate": 3.482749845162358e-06,
      "loss": 2.9192,
      "step": 219217
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8975095748901367,
      "learning_rate": 3.4821283823795897e-06,
      "loss": 2.7025,
      "step": 219218
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.861051082611084,
      "learning_rate": 3.481506974725057e-06,
      "loss": 2.7651,
      "step": 219219
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.847327709197998,
      "learning_rate": 3.480885622198926e-06,
      "loss": 2.7918,
      "step": 219220
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.716675281524658,
      "learning_rate": 3.480264324801263e-06,
      "loss": 2.6882,
      "step": 219221
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8350048065185547,
      "learning_rate": 3.4796430825322686e-06,
      "loss": 2.8234,
      "step": 219222
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.009530782699585,
      "learning_rate": 3.479021895391976e-06,
      "loss": 3.1323,
      "step": 219223
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.6452419757843018,
      "learning_rate": 3.478400763380551e-06,
      "loss": 2.9954,
      "step": 219224
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.506452798843384,
      "learning_rate": 3.477779686498061e-06,
      "loss": 2.8927,
      "step": 219225
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6780965328216553,
      "learning_rate": 3.4771586647446727e-06,
      "loss": 2.7668,
      "step": 219226
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6634137630462646,
      "learning_rate": 3.4765376981204517e-06,
      "loss": 2.8399,
      "step": 219227
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.935114860534668,
      "learning_rate": 3.4759167866255653e-06,
      "loss": 3.0408,
      "step": 219228
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8923633098602295,
      "learning_rate": 3.47529593026008e-06,
      "loss": 2.9813,
      "step": 219229
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8409488201141357,
      "learning_rate": 3.4746751290241627e-06,
      "loss": 2.9992,
      "step": 219230
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6255569458007812,
      "learning_rate": 3.474054382917879e-06,
      "loss": 2.7021,
      "step": 219231
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.5506560802459717,
      "learning_rate": 3.473433691941363e-06,
      "loss": 2.9694,
      "step": 219232
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.7468035221099854,
      "learning_rate": 3.4728130560947478e-06,
      "loss": 2.7399,
      "step": 219233
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.6459460258483887,
      "learning_rate": 3.4721924753781328e-06,
      "loss": 3.1058,
      "step": 219234
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8723318576812744,
      "learning_rate": 3.4715719497916185e-06,
      "loss": 2.9722,
      "step": 219235
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.815462827682495,
      "learning_rate": 3.4709514793353375e-06,
      "loss": 2.9946,
      "step": 219236
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.26735782623291,
      "learning_rate": 3.470331064009424e-06,
      "loss": 3.0098,
      "step": 219237
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.871253252029419,
      "learning_rate": 3.469710703813977e-06,
      "loss": 2.9929,
      "step": 219238
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.814448356628418,
      "learning_rate": 3.4690903987490635e-06,
      "loss": 2.7238,
      "step": 219239
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8626809120178223,
      "learning_rate": 3.4684701488148834e-06,
      "loss": 2.9939,
      "step": 219240
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.347040891647339,
      "learning_rate": 3.467849954011537e-06,
      "loss": 3.0247,
      "step": 219241
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.4632961750030518,
      "learning_rate": 3.4672298143390562e-06,
      "loss": 2.9409,
      "step": 219242
    },
    {
      "epoch": 2.85,
      "grad_norm": 4.4382405281066895,
      "learning_rate": 3.4666097297976425e-06,
      "loss": 2.861,
      "step": 219243
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.7391767501831055,
      "learning_rate": 3.4659897003873616e-06,
      "loss": 2.7183,
      "step": 219244
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.5481536388397217,
      "learning_rate": 3.46536972610838e-06,
      "loss": 3.0219,
      "step": 219245
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.245975971221924,
      "learning_rate": 3.464749806960798e-06,
      "loss": 3.1741,
      "step": 219246
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.231812000274658,
      "learning_rate": 3.4641299429446823e-06,
      "loss": 3.0185,
      "step": 219247
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.490586280822754,
      "learning_rate": 3.463510134060199e-06,
      "loss": 3.017,
      "step": 219248
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8644824028015137,
      "learning_rate": 3.4628903803074147e-06,
      "loss": 2.9116,
      "step": 219249
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0718209743499756,
      "learning_rate": 3.4622706816864966e-06,
      "loss": 3.1451,
      "step": 219250
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2901227474212646,
      "learning_rate": 3.461651038197577e-06,
      "loss": 2.9407,
      "step": 219251
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0446765422821045,
      "learning_rate": 3.46103144984069e-06,
      "loss": 2.6439,
      "step": 219252
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.04067063331604,
      "learning_rate": 3.4604119166160017e-06,
      "loss": 2.9818,
      "step": 219253
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0554709434509277,
      "learning_rate": 3.4597924385236453e-06,
      "loss": 2.8607,
      "step": 219254
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0113131999969482,
      "learning_rate": 3.4591730155636876e-06,
      "loss": 3.0546,
      "step": 219255
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0505027770996094,
      "learning_rate": 3.4585536477362617e-06,
      "loss": 2.7333,
      "step": 219256
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8116586208343506,
      "learning_rate": 3.4579343350415013e-06,
      "loss": 2.8407,
      "step": 219257
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.146256446838379,
      "learning_rate": 3.457315077479472e-06,
      "loss": 2.8131,
      "step": 219258
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8119235038757324,
      "learning_rate": 3.456695875050408e-06,
      "loss": 2.8526,
      "step": 219259
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.6526997089385986,
      "learning_rate": 3.4560767277542754e-06,
      "loss": 2.7942,
      "step": 219260
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.210587978363037,
      "learning_rate": 3.4554576355912743e-06,
      "loss": 2.7277,
      "step": 219261
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.2629637718200684,
      "learning_rate": 3.4548385985615045e-06,
      "loss": 2.9853,
      "step": 219262
    },
    {
      "epoch": 2.85,
      "grad_norm": 3.0241403579711914,
      "learning_rate": 3.454219616665066e-06,
      "loss": 2.7302,
      "step": 219263
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.698213815689087,
      "learning_rate": 3.453600689902092e-06,
      "loss": 2.9778,
      "step": 219264
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.970735549926758,
      "learning_rate": 3.452981818272682e-06,
      "loss": 2.8033,
      "step": 219265
    },
    {
      "epoch": 2.85,
      "grad_norm": 2.8032820224761963,
      "learning_rate": 3.4523630017770033e-06,
      "loss": 3.0337,
      "step": 219266
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1457126140594482,
      "learning_rate": 3.4517442404150884e-06,
      "loss": 2.9667,
      "step": 219267
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1108388900756836,
      "learning_rate": 3.4511255341870714e-06,
      "loss": 3.2241,
      "step": 219268
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.107548713684082,
      "learning_rate": 3.4505068830931515e-06,
      "loss": 2.9694,
      "step": 219269
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.670041084289551,
      "learning_rate": 3.4498882871333287e-06,
      "loss": 2.7888,
      "step": 219270
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.941390037536621,
      "learning_rate": 3.44926974630777e-06,
      "loss": 2.8612,
      "step": 219271
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.784742593765259,
      "learning_rate": 3.4486512606166416e-06,
      "loss": 2.8169,
      "step": 219272
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0062994956970215,
      "learning_rate": 3.448032830059977e-06,
      "loss": 2.9505,
      "step": 219273
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.9846243858337402,
      "learning_rate": 3.447414454637876e-06,
      "loss": 2.9501,
      "step": 219274
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.32025146484375,
      "learning_rate": 3.446796134350571e-06,
      "loss": 3.1337,
      "step": 219275
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6874701976776123,
      "learning_rate": 3.44617786919803e-06,
      "loss": 2.8068,
      "step": 219276
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.3960912227630615,
      "learning_rate": 3.4455596591805192e-06,
      "loss": 2.861,
      "step": 219277
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.5736606121063232,
      "learning_rate": 3.444941504298038e-06,
      "loss": 2.8983,
      "step": 219278
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1091339588165283,
      "learning_rate": 3.4443234045507527e-06,
      "loss": 2.9119,
      "step": 219279
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2042529582977295,
      "learning_rate": 3.443705359938731e-06,
      "loss": 2.7631,
      "step": 219280
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1899256706237793,
      "learning_rate": 3.443087370462172e-06,
      "loss": 3.1005,
      "step": 219281
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.515848398208618,
      "learning_rate": 3.442469436121076e-06,
      "loss": 2.9461,
      "step": 219282
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6518430709838867,
      "learning_rate": 3.4418515569156757e-06,
      "loss": 2.7121,
      "step": 219283
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.646970272064209,
      "learning_rate": 3.4412337328460383e-06,
      "loss": 2.8948,
      "step": 219284
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.4284143447875977,
      "learning_rate": 3.440615963912263e-06,
      "loss": 2.8452,
      "step": 219285
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.028388500213623,
      "learning_rate": 3.4399982501144506e-06,
      "loss": 2.8287,
      "step": 219286
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1693692207336426,
      "learning_rate": 3.439380591452767e-06,
      "loss": 2.7505,
      "step": 219287
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9467923641204834,
      "learning_rate": 3.4387629879272793e-06,
      "loss": 2.8136,
      "step": 219288
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9982852935791016,
      "learning_rate": 3.4381454395381534e-06,
      "loss": 2.7976,
      "step": 219289
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.790184259414673,
      "learning_rate": 3.4375279462854565e-06,
      "loss": 3.019,
      "step": 219290
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6841790676116943,
      "learning_rate": 3.4369105081693215e-06,
      "loss": 3.196,
      "step": 219291
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0994365215301514,
      "learning_rate": 3.4362931251898484e-06,
      "loss": 3.023,
      "step": 219292
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.754300594329834,
      "learning_rate": 3.4356757973472036e-06,
      "loss": 3.0033,
      "step": 219293
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.991658926010132,
      "learning_rate": 3.435058524641421e-06,
      "loss": 3.0817,
      "step": 219294
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0023696422576904,
      "learning_rate": 3.4344413070726662e-06,
      "loss": 2.8174,
      "step": 219295
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8856709003448486,
      "learning_rate": 3.433824144641073e-06,
      "loss": 2.8894,
      "step": 219296
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0123391151428223,
      "learning_rate": 3.4332070373467414e-06,
      "loss": 2.9247,
      "step": 219297
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7577877044677734,
      "learning_rate": 3.4325899851897375e-06,
      "loss": 2.7803,
      "step": 219298
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1807312965393066,
      "learning_rate": 3.4319729881702284e-06,
      "loss": 2.7793,
      "step": 219299
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9689624309539795,
      "learning_rate": 3.431356046288314e-06,
      "loss": 2.846,
      "step": 219300
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.101071357727051,
      "learning_rate": 3.4307391595440936e-06,
      "loss": 2.9901,
      "step": 219301
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8322384357452393,
      "learning_rate": 3.4301223279377343e-06,
      "loss": 3.0146,
      "step": 219302
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1996054649353027,
      "learning_rate": 3.4295055514692692e-06,
      "loss": 2.9793,
      "step": 219303
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.7027037143707275,
      "learning_rate": 3.428888830138865e-06,
      "loss": 2.9176,
      "step": 219304
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.037013292312622,
      "learning_rate": 3.428272163946655e-06,
      "loss": 2.931,
      "step": 219305
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0111641883850098,
      "learning_rate": 3.4276555528927053e-06,
      "loss": 3.0785,
      "step": 219306
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0604727268218994,
      "learning_rate": 3.427038996977183e-06,
      "loss": 2.8908,
      "step": 219307
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.27976131439209,
      "learning_rate": 3.426422496200154e-06,
      "loss": 3.1443,
      "step": 219308
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.241213083267212,
      "learning_rate": 3.4258060505617523e-06,
      "loss": 3.0479,
      "step": 219309
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.226390838623047,
      "learning_rate": 3.425189660062078e-06,
      "loss": 3.2267,
      "step": 219310
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8858838081359863,
      "learning_rate": 3.424573324701263e-06,
      "loss": 2.9974,
      "step": 219311
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7265329360961914,
      "learning_rate": 3.4239570444794086e-06,
      "loss": 2.8755,
      "step": 219312
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.035802841186523,
      "learning_rate": 3.423340819396647e-06,
      "loss": 2.8966,
      "step": 219313
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1419403553009033,
      "learning_rate": 3.4227246494531125e-06,
      "loss": 3.0773,
      "step": 219314
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.5048024654388428,
      "learning_rate": 3.422108534648871e-06,
      "loss": 3.0945,
      "step": 219315
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.937879800796509,
      "learning_rate": 3.4214924749840556e-06,
      "loss": 3.0663,
      "step": 219316
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0852458477020264,
      "learning_rate": 3.4208764704587998e-06,
      "loss": 2.8267,
      "step": 219317
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.4690377712249756,
      "learning_rate": 3.4202605210731705e-06,
      "loss": 3.0111,
      "step": 219318
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.984874725341797,
      "learning_rate": 3.4196446268273336e-06,
      "loss": 2.8487,
      "step": 219319
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.4813482761383057,
      "learning_rate": 3.4190287877213897e-06,
      "loss": 2.9787,
      "step": 219320
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.687556505203247,
      "learning_rate": 3.4184130037554047e-06,
      "loss": 2.6816,
      "step": 219321
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.898526906967163,
      "learning_rate": 3.417797274929579e-06,
      "loss": 2.8532,
      "step": 219322
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9721548557281494,
      "learning_rate": 3.417181601243979e-06,
      "loss": 3.0823,
      "step": 219323
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.717700242996216,
      "learning_rate": 3.4165659826987044e-06,
      "loss": 2.5725,
      "step": 219324
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.934293031692505,
      "learning_rate": 3.4159504192938893e-06,
      "loss": 3.0235,
      "step": 219325
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.256476402282715,
      "learning_rate": 3.415334911029666e-06,
      "loss": 3.026,
      "step": 219326
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.7020580768585205,
      "learning_rate": 3.4147194579061345e-06,
      "loss": 2.8148,
      "step": 219327
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2258522510528564,
      "learning_rate": 3.4141040599233947e-06,
      "loss": 2.9652,
      "step": 219328
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8443660736083984,
      "learning_rate": 3.4134887170815808e-06,
      "loss": 2.7083,
      "step": 219329
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.83888578414917,
      "learning_rate": 3.412873429380758e-06,
      "loss": 3.0481,
      "step": 219330
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9210145473480225,
      "learning_rate": 3.412258196821127e-06,
      "loss": 2.9288,
      "step": 219331
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.577268600463867,
      "learning_rate": 3.411643019402721e-06,
      "loss": 3.1079,
      "step": 219332
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.036959648132324,
      "learning_rate": 3.4110278971257064e-06,
      "loss": 2.9568,
      "step": 219333
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.1937031745910645,
      "learning_rate": 3.4104128299901835e-06,
      "loss": 2.8875,
      "step": 219334
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8539557456970215,
      "learning_rate": 3.4097978179962513e-06,
      "loss": 3.0367,
      "step": 219335
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.5018885135650635,
      "learning_rate": 3.409182861144044e-06,
      "loss": 2.7672,
      "step": 219336
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0340425968170166,
      "learning_rate": 3.408567959433661e-06,
      "loss": 3.0359,
      "step": 219337
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.28808331489563,
      "learning_rate": 3.407953112865236e-06,
      "loss": 2.8829,
      "step": 219338
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9743540287017822,
      "learning_rate": 3.4073383214388685e-06,
      "loss": 2.9703,
      "step": 219339
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.5357515811920166,
      "learning_rate": 3.4067235851546913e-06,
      "loss": 2.9307,
      "step": 219340
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.7040717601776123,
      "learning_rate": 3.406108904012772e-06,
      "loss": 2.9791,
      "step": 219341
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.938673973083496,
      "learning_rate": 3.4054942780132764e-06,
      "loss": 2.8773,
      "step": 219342
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8440685272216797,
      "learning_rate": 3.4048797071562716e-06,
      "loss": 2.8571,
      "step": 219343
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.923252582550049,
      "learning_rate": 3.404265191441924e-06,
      "loss": 3.0123,
      "step": 219344
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9707589149475098,
      "learning_rate": 3.4036507308703e-06,
      "loss": 2.9775,
      "step": 219345
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2856972217559814,
      "learning_rate": 3.403036325441566e-06,
      "loss": 2.9836,
      "step": 219346
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.048366069793701,
      "learning_rate": 3.4024219751557893e-06,
      "loss": 2.8517,
      "step": 219347
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.937856674194336,
      "learning_rate": 3.4018076800131024e-06,
      "loss": 2.7359,
      "step": 219348
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.5008509159088135,
      "learning_rate": 3.4011934400136055e-06,
      "loss": 2.7921,
      "step": 219349
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.217618465423584,
      "learning_rate": 3.400579255157465e-06,
      "loss": 2.8357,
      "step": 219350
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.939039707183838,
      "learning_rate": 3.399965125444715e-06,
      "loss": 2.9745,
      "step": 219351
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.5031611919403076,
      "learning_rate": 3.3993510508755205e-06,
      "loss": 2.8259,
      "step": 219352
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0547122955322266,
      "learning_rate": 3.398737031449983e-06,
      "loss": 2.8463,
      "step": 219353
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.5609354972839355,
      "learning_rate": 3.3981230671682348e-06,
      "loss": 2.9824,
      "step": 219354
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.196072578430176,
      "learning_rate": 3.3975091580303425e-06,
      "loss": 2.8421,
      "step": 219355
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7771589756011963,
      "learning_rate": 3.3968953040365066e-06,
      "loss": 2.995,
      "step": 219356
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.975869655609131,
      "learning_rate": 3.39628150518676e-06,
      "loss": 3.1177,
      "step": 219357
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.9350945949554443,
      "learning_rate": 3.3956677614812356e-06,
      "loss": 2.7341,
      "step": 219358
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.486982822418213,
      "learning_rate": 3.395054072920034e-06,
      "loss": 3.0581,
      "step": 219359
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8551297187805176,
      "learning_rate": 3.394440439503321e-06,
      "loss": 2.9337,
      "step": 219360
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.248029947280884,
      "learning_rate": 3.3938268612311637e-06,
      "loss": 2.8591,
      "step": 219361
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.7247657775878906,
      "learning_rate": 3.3932133381037284e-06,
      "loss": 2.8197,
      "step": 219362
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.564646005630493,
      "learning_rate": 3.392599870121082e-06,
      "loss": 2.9936,
      "step": 219363
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.038264274597168,
      "learning_rate": 3.3919864572833242e-06,
      "loss": 2.7813,
      "step": 219364
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.228867769241333,
      "learning_rate": 3.3913730995905884e-06,
      "loss": 2.8282,
      "step": 219365
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2715566158294678,
      "learning_rate": 3.390759797043041e-06,
      "loss": 3.0705,
      "step": 219366
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.621685743331909,
      "learning_rate": 3.3901465496407153e-06,
      "loss": 3.0903,
      "step": 219367
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8245785236358643,
      "learning_rate": 3.3895333573837446e-06,
      "loss": 2.8585,
      "step": 219368
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0950005054473877,
      "learning_rate": 3.388920220272295e-06,
      "loss": 3.1691,
      "step": 219369
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9258813858032227,
      "learning_rate": 3.388307138306434e-06,
      "loss": 3.0529,
      "step": 219370
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2130749225616455,
      "learning_rate": 3.3876941114862944e-06,
      "loss": 3.2077,
      "step": 219371
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2695207595825195,
      "learning_rate": 3.3870811398119755e-06,
      "loss": 2.9094,
      "step": 219372
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.660540819168091,
      "learning_rate": 3.3864682232835783e-06,
      "loss": 2.9017,
      "step": 219373
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.3544363975524902,
      "learning_rate": 3.3858553619012352e-06,
      "loss": 2.7892,
      "step": 219374
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.146939277648926,
      "learning_rate": 3.3852425556651132e-06,
      "loss": 2.835,
      "step": 219375
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.129351854324341,
      "learning_rate": 3.384629804575212e-06,
      "loss": 2.9879,
      "step": 219376
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.296523094177246,
      "learning_rate": 3.3840171086317315e-06,
      "loss": 2.892,
      "step": 219377
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.26395845413208,
      "learning_rate": 3.3834044678347716e-06,
      "loss": 3.0766,
      "step": 219378
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.5143158435821533,
      "learning_rate": 3.382791882184399e-06,
      "loss": 2.8507,
      "step": 219379
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6915464401245117,
      "learning_rate": 3.382179351680814e-06,
      "loss": 3.023,
      "step": 219380
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.4452788829803467,
      "learning_rate": 3.381566876324049e-06,
      "loss": 3.0273,
      "step": 219381
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.3751893043518066,
      "learning_rate": 3.380954456114271e-06,
      "loss": 2.8046,
      "step": 219382
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.098400831222534,
      "learning_rate": 3.3803420910515466e-06,
      "loss": 2.8279,
      "step": 219383
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.719247341156006,
      "learning_rate": 3.3797297811360423e-06,
      "loss": 2.9773,
      "step": 219384
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.704723596572876,
      "learning_rate": 3.379117526367825e-06,
      "loss": 3.1504,
      "step": 219385
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.281680107116699,
      "learning_rate": 3.3785053267470276e-06,
      "loss": 3.0567,
      "step": 219386
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.6741209030151367,
      "learning_rate": 3.377893182273783e-06,
      "loss": 3.0642,
      "step": 219387
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.690195083618164,
      "learning_rate": 3.377281092948192e-06,
      "loss": 2.9234,
      "step": 219388
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8932154178619385,
      "learning_rate": 3.3766690587703206e-06,
      "loss": 2.7706,
      "step": 219389
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7937417030334473,
      "learning_rate": 3.3760570797403686e-06,
      "loss": 2.6672,
      "step": 219390
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0323119163513184,
      "learning_rate": 3.3754451558583695e-06,
      "loss": 2.956,
      "step": 219391
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2942442893981934,
      "learning_rate": 3.374833287124523e-06,
      "loss": 2.9723,
      "step": 219392
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.113556146621704,
      "learning_rate": 3.374221473538863e-06,
      "loss": 2.8984,
      "step": 219393
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.96278977394104,
      "learning_rate": 3.373609715101555e-06,
      "loss": 2.9986,
      "step": 219394
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1595993041992188,
      "learning_rate": 3.3729980118126665e-06,
      "loss": 3.0968,
      "step": 219395
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.6668612957000732,
      "learning_rate": 3.37238636367233e-06,
      "loss": 3.1179,
      "step": 219396
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.361046314239502,
      "learning_rate": 3.3717747706806797e-06,
      "loss": 2.835,
      "step": 219397
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.619248628616333,
      "learning_rate": 3.3711632328378478e-06,
      "loss": 2.8747,
      "step": 219398
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0260519981384277,
      "learning_rate": 3.370551750143868e-06,
      "loss": 2.9017,
      "step": 219399
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8357348442077637,
      "learning_rate": 3.369940322598941e-06,
      "loss": 3.0835,
      "step": 219400
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.299689292907715,
      "learning_rate": 3.3693289502030984e-06,
      "loss": 3.0372,
      "step": 219401
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.986778974533081,
      "learning_rate": 3.3687176329565412e-06,
      "loss": 3.0159,
      "step": 219402
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8408474922180176,
      "learning_rate": 3.3681063708593027e-06,
      "loss": 2.9451,
      "step": 219403
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8581273555755615,
      "learning_rate": 3.3674951639115488e-06,
      "loss": 2.918,
      "step": 219404
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.873180389404297,
      "learning_rate": 3.36688401211338e-06,
      "loss": 2.8977,
      "step": 219405
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.344799041748047,
      "learning_rate": 3.366272915464896e-06,
      "loss": 2.9123,
      "step": 219406
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.249240398406982,
      "learning_rate": 3.36566187396623e-06,
      "loss": 2.8054,
      "step": 219407
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9510104656219482,
      "learning_rate": 3.3650508876175154e-06,
      "loss": 2.7223,
      "step": 219408
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8537845611572266,
      "learning_rate": 3.364439956418785e-06,
      "loss": 3.0164,
      "step": 219409
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0080318450927734,
      "learning_rate": 3.3638290803702393e-06,
      "loss": 3.002,
      "step": 219410
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6709322929382324,
      "learning_rate": 3.3632182594719114e-06,
      "loss": 2.7466,
      "step": 219411
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.908064126968384,
      "learning_rate": 3.362607493724001e-06,
      "loss": 2.9883,
      "step": 219412
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.482480525970459,
      "learning_rate": 3.3619967831265747e-06,
      "loss": 2.895,
      "step": 219413
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9912312030792236,
      "learning_rate": 3.361386127679766e-06,
      "loss": 2.8105,
      "step": 219414
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.65300989151001,
      "learning_rate": 3.360775527383641e-06,
      "loss": 2.8447,
      "step": 219415
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.986452341079712,
      "learning_rate": 3.3601649822384002e-06,
      "loss": 2.9782,
      "step": 219416
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.290958881378174,
      "learning_rate": 3.359554492244043e-06,
      "loss": 3.1157,
      "step": 219417
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.896028757095337,
      "learning_rate": 3.3589440574007696e-06,
      "loss": 2.9823,
      "step": 219418
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0763676166534424,
      "learning_rate": 3.35833367770868e-06,
      "loss": 3.0108,
      "step": 219419
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8897132873535156,
      "learning_rate": 3.357723353167874e-06,
      "loss": 2.9651,
      "step": 219420
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.789889097213745,
      "learning_rate": 3.357113083778451e-06,
      "loss": 2.8199,
      "step": 219421
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9824278354644775,
      "learning_rate": 3.3565028695405782e-06,
      "loss": 3.0086,
      "step": 219422
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8567440509796143,
      "learning_rate": 3.3558927104542887e-06,
      "loss": 2.9401,
      "step": 219423
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.244978427886963,
      "learning_rate": 3.3552826065197493e-06,
      "loss": 2.731,
      "step": 219424
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.108617067337036,
      "learning_rate": 3.3546725577370594e-06,
      "loss": 2.9696,
      "step": 219425
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9910809993743896,
      "learning_rate": 3.354062564106319e-06,
      "loss": 2.9614,
      "step": 219426
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.5397064685821533,
      "learning_rate": 3.353452625627695e-06,
      "loss": 2.7137,
      "step": 219427
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.878196954727173,
      "learning_rate": 3.3528427423012538e-06,
      "loss": 2.8845,
      "step": 219428
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6719272136688232,
      "learning_rate": 3.352232914127095e-06,
      "loss": 2.9781,
      "step": 219429
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.950634479522705,
      "learning_rate": 3.3516231411053864e-06,
      "loss": 3.0139,
      "step": 219430
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.819828510284424,
      "learning_rate": 3.3510134232362263e-06,
      "loss": 3.0211,
      "step": 219431
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.339995861053467,
      "learning_rate": 3.3504037605196486e-06,
      "loss": 3.0616,
      "step": 219432
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8621935844421387,
      "learning_rate": 3.349794152955887e-06,
      "loss": 3.0687,
      "step": 219433
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0142035484313965,
      "learning_rate": 3.349184600544974e-06,
      "loss": 2.8074,
      "step": 219434
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.648306369781494,
      "learning_rate": 3.3485751032870434e-06,
      "loss": 2.7176,
      "step": 219435
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6437244415283203,
      "learning_rate": 3.347965661182228e-06,
      "loss": 3.0487,
      "step": 219436
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.80629301071167,
      "learning_rate": 3.347356274230628e-06,
      "loss": 2.986,
      "step": 219437
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.214053153991699,
      "learning_rate": 3.3467469424323433e-06,
      "loss": 3.0132,
      "step": 219438
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0121078491210938,
      "learning_rate": 3.346137665787507e-06,
      "loss": 2.8263,
      "step": 219439
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.089341163635254,
      "learning_rate": 3.345528444296186e-06,
      "loss": 2.8577,
      "step": 219440
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.758406162261963,
      "learning_rate": 3.34491927795858e-06,
      "loss": 2.8747,
      "step": 219441
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.6696012020111084,
      "learning_rate": 3.344310166774722e-06,
      "loss": 3.2422,
      "step": 219442
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.003296375274658,
      "learning_rate": 3.3437011107447786e-06,
      "loss": 2.7818,
      "step": 219443
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2248306274414062,
      "learning_rate": 3.343092109868817e-06,
      "loss": 2.8219,
      "step": 219444
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.395402431488037,
      "learning_rate": 3.342483164147003e-06,
      "loss": 2.845,
      "step": 219445
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.940316915512085,
      "learning_rate": 3.3418742735794035e-06,
      "loss": 2.9848,
      "step": 219446
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.975478410720825,
      "learning_rate": 3.3412654381661517e-06,
      "loss": 2.9212,
      "step": 219447
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7035739421844482,
      "learning_rate": 3.340656657907348e-06,
      "loss": 2.5236,
      "step": 219448
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.051968812942505,
      "learning_rate": 3.3400479328031584e-06,
      "loss": 2.8904,
      "step": 219449
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.7457058429718018,
      "learning_rate": 3.339439262853616e-06,
      "loss": 3.0366,
      "step": 219450
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0099070072174072,
      "learning_rate": 3.3388306480588543e-06,
      "loss": 3.0753,
      "step": 219451
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7987728118896484,
      "learning_rate": 3.33822208841904e-06,
      "loss": 2.771,
      "step": 219452
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.59468412399292,
      "learning_rate": 3.33761358393424e-06,
      "loss": 2.8245,
      "step": 219453
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.913682460784912,
      "learning_rate": 3.337005134604587e-06,
      "loss": 2.9643,
      "step": 219454
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9364590644836426,
      "learning_rate": 3.3363967404301805e-06,
      "loss": 2.8554,
      "step": 219455
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.3107283115386963,
      "learning_rate": 3.3357884014111215e-06,
      "loss": 3.0219,
      "step": 219456
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.407656669616699,
      "learning_rate": 3.3351801175475425e-06,
      "loss": 3.0786,
      "step": 219457
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.330300807952881,
      "learning_rate": 3.3345718888395434e-06,
      "loss": 2.7353,
      "step": 219458
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.206604242324829,
      "learning_rate": 3.3339637152872576e-06,
      "loss": 2.8194,
      "step": 219459
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.978438138961792,
      "learning_rate": 3.3333555968908186e-06,
      "loss": 3.1194,
      "step": 219460
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7627439498901367,
      "learning_rate": 3.332747533650293e-06,
      "loss": 2.7735,
      "step": 219461
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2001216411590576,
      "learning_rate": 3.3321395255657803e-06,
      "loss": 3.1631,
      "step": 219462
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.000009298324585,
      "learning_rate": 3.331531572637447e-06,
      "loss": 2.8316,
      "step": 219463
    },
    {
      "epoch": 2.86,
      "grad_norm": 5.428502559661865,
      "learning_rate": 3.33092367486536e-06,
      "loss": 2.8813,
      "step": 219464
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.075563669204712,
      "learning_rate": 3.3303158322496858e-06,
      "loss": 2.8157,
      "step": 219465
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1203136444091797,
      "learning_rate": 3.329708044790491e-06,
      "loss": 2.7921,
      "step": 219466
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.4882466793060303,
      "learning_rate": 3.329100312487942e-06,
      "loss": 2.8192,
      "step": 219467
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7627980709075928,
      "learning_rate": 3.328492635342039e-06,
      "loss": 2.8435,
      "step": 219468
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.039216995239258,
      "learning_rate": 3.3278850133530155e-06,
      "loss": 2.638,
      "step": 219469
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0535876750946045,
      "learning_rate": 3.327277446520937e-06,
      "loss": 3.1152,
      "step": 219470
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.196566343307495,
      "learning_rate": 3.3266699348459045e-06,
      "loss": 2.7371,
      "step": 219471
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.697054624557495,
      "learning_rate": 3.326062478328051e-06,
      "loss": 2.7934,
      "step": 219472
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.488893985748291,
      "learning_rate": 3.3254550769675093e-06,
      "loss": 2.8763,
      "step": 219473
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9591197967529297,
      "learning_rate": 3.324847730764313e-06,
      "loss": 3.1235,
      "step": 219474
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.7985124588012695,
      "learning_rate": 3.324240439718662e-06,
      "loss": 2.9823,
      "step": 219475
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.97871994972229,
      "learning_rate": 3.323633203830622e-06,
      "loss": 2.8824,
      "step": 219476
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0586462020874023,
      "learning_rate": 3.323026023100328e-06,
      "loss": 2.9603,
      "step": 219477
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.5307953357696533,
      "learning_rate": 3.3224188975278455e-06,
      "loss": 3.07,
      "step": 219478
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.4806125164031982,
      "learning_rate": 3.3218118271134075e-06,
      "loss": 2.843,
      "step": 219479
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.313094139099121,
      "learning_rate": 3.321204811856981e-06,
      "loss": 2.9448,
      "step": 219480
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.143383502960205,
      "learning_rate": 3.320597851758766e-06,
      "loss": 2.847,
      "step": 219481
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.108880043029785,
      "learning_rate": 3.3199909468188292e-06,
      "loss": 2.7931,
      "step": 219482
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.499337911605835,
      "learning_rate": 3.319384097037303e-06,
      "loss": 3.04,
      "step": 219483
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.787003755569458,
      "learning_rate": 3.3187773024143214e-06,
      "loss": 2.6538,
      "step": 219484
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8173301219940186,
      "learning_rate": 3.3181705629500177e-06,
      "loss": 3.0379,
      "step": 219485
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.493272066116333,
      "learning_rate": 3.3175638786443916e-06,
      "loss": 2.6188,
      "step": 219486
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7105906009674072,
      "learning_rate": 3.316957249497676e-06,
      "loss": 2.9691,
      "step": 219487
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.7147486209869385,
      "learning_rate": 3.316350675509938e-06,
      "loss": 3.0062,
      "step": 219488
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.628251791000366,
      "learning_rate": 3.315744156681277e-06,
      "loss": 2.8305,
      "step": 219489
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.983157157897949,
      "learning_rate": 3.3151376930118268e-06,
      "loss": 2.8396,
      "step": 219490
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1580121517181396,
      "learning_rate": 3.3145312845017202e-06,
      "loss": 3.1106,
      "step": 219491
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.882580041885376,
      "learning_rate": 3.313924931150991e-06,
      "loss": 2.8305,
      "step": 219492
    },
    {
      "epoch": 2.86,
      "grad_norm": 5.48677396774292,
      "learning_rate": 3.313318632959838e-06,
      "loss": 2.9435,
      "step": 219493
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.282621145248413,
      "learning_rate": 3.312712389928296e-06,
      "loss": 2.7823,
      "step": 219494
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9561455249786377,
      "learning_rate": 3.3121062020565634e-06,
      "loss": 2.9674,
      "step": 219495
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9916045665740967,
      "learning_rate": 3.3115000693447082e-06,
      "loss": 3.1317,
      "step": 219496
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.454718828201294,
      "learning_rate": 3.310893991792829e-06,
      "loss": 3.0275,
      "step": 219497
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.728177309036255,
      "learning_rate": 3.3102879694010597e-06,
      "loss": 2.9259,
      "step": 219498
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2999181747436523,
      "learning_rate": 3.3096820021695336e-06,
      "loss": 2.8087,
      "step": 219499
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.031301498413086,
      "learning_rate": 3.309076090098284e-06,
      "loss": 2.9153,
      "step": 219500
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.656299352645874,
      "learning_rate": 3.3084702331875433e-06,
      "loss": 2.8425,
      "step": 219501
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.1078643798828125,
      "learning_rate": 3.3078644314373125e-06,
      "loss": 2.9901,
      "step": 219502
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.411736249923706,
      "learning_rate": 3.3072586848477577e-06,
      "loss": 2.7971,
      "step": 219503
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.169280767440796,
      "learning_rate": 3.306652993418979e-06,
      "loss": 2.7762,
      "step": 219504
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.5911948680877686,
      "learning_rate": 3.3060473571511093e-06,
      "loss": 2.7803,
      "step": 219505
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.970012903213501,
      "learning_rate": 3.3054417760442487e-06,
      "loss": 3.0174,
      "step": 219506
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.271545886993408,
      "learning_rate": 3.304836250098497e-06,
      "loss": 2.834,
      "step": 219507
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.092414379119873,
      "learning_rate": 3.304230779313988e-06,
      "loss": 2.6619,
      "step": 219508
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.095334053039551,
      "learning_rate": 3.3036253636908204e-06,
      "loss": 2.7866,
      "step": 219509
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.377675771713257,
      "learning_rate": 3.3030200032290955e-06,
      "loss": 2.696,
      "step": 219510
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2781670093536377,
      "learning_rate": 3.3024146979289455e-06,
      "loss": 2.5731,
      "step": 219511
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.602332353591919,
      "learning_rate": 3.301809447790471e-06,
      "loss": 2.6515,
      "step": 219512
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.419668674468994,
      "learning_rate": 3.301204252813805e-06,
      "loss": 2.8807,
      "step": 219513
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1016509532928467,
      "learning_rate": 3.300599112999014e-06,
      "loss": 2.901,
      "step": 219514
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.086549997329712,
      "learning_rate": 3.2999940283462977e-06,
      "loss": 3.0473,
      "step": 219515
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.4642293453216553,
      "learning_rate": 3.29938899885569e-06,
      "loss": 2.9324,
      "step": 219516
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.652980089187622,
      "learning_rate": 3.29878402452729e-06,
      "loss": 2.9865,
      "step": 219517
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2336840629577637,
      "learning_rate": 3.2981791053612985e-06,
      "loss": 2.9814,
      "step": 219518
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.931119918823242,
      "learning_rate": 3.297574241357748e-06,
      "loss": 2.8597,
      "step": 219519
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.990139961242676,
      "learning_rate": 3.296969432516772e-06,
      "loss": 3.151,
      "step": 219520
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.733386993408203,
      "learning_rate": 3.296364678838503e-06,
      "loss": 3.1098,
      "step": 219521
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0605368614196777,
      "learning_rate": 3.295759980323076e-06,
      "loss": 2.7455,
      "step": 219522
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.451284170150757,
      "learning_rate": 3.295155336970523e-06,
      "loss": 2.8755,
      "step": 219523
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.335350513458252,
      "learning_rate": 3.294550748781011e-06,
      "loss": 2.8199,
      "step": 219524
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.801835060119629,
      "learning_rate": 3.293946215754606e-06,
      "loss": 2.8832,
      "step": 219525
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.986776113510132,
      "learning_rate": 3.2933417378915085e-06,
      "loss": 2.9747,
      "step": 219526
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9705100059509277,
      "learning_rate": 3.2927373151917515e-06,
      "loss": 3.134,
      "step": 219527
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8294427394866943,
      "learning_rate": 3.292132947655535e-06,
      "loss": 2.9551,
      "step": 219528
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.403282880783081,
      "learning_rate": 3.2915286352828586e-06,
      "loss": 2.6674,
      "step": 219529
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.771183729171753,
      "learning_rate": 3.290924378073889e-06,
      "loss": 2.8215,
      "step": 219530
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.767232656478882,
      "learning_rate": 3.2903201760287267e-06,
      "loss": 3.0001,
      "step": 219531
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.9114763736724854,
      "learning_rate": 3.2897160291475045e-06,
      "loss": 3.1296,
      "step": 219532
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0708303451538086,
      "learning_rate": 3.289111937430322e-06,
      "loss": 2.9888,
      "step": 219533
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.65973162651062,
      "learning_rate": 3.2885079008773464e-06,
      "loss": 2.8003,
      "step": 219534
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.5114309787750244,
      "learning_rate": 3.287903919488577e-06,
      "loss": 3.1606,
      "step": 219535
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.751917839050293,
      "learning_rate": 3.2872999932642143e-06,
      "loss": 2.6299,
      "step": 219536
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.882831335067749,
      "learning_rate": 3.286696122204324e-06,
      "loss": 2.9703,
      "step": 219537
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.5865070819854736,
      "learning_rate": 3.2860923063090404e-06,
      "loss": 2.8921,
      "step": 219538
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7832698822021484,
      "learning_rate": 3.2854885455784964e-06,
      "loss": 3.1735,
      "step": 219539
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.3000245094299316,
      "learning_rate": 3.284884840012791e-06,
      "loss": 2.8536,
      "step": 219540
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8550705909729004,
      "learning_rate": 3.2842811896119926e-06,
      "loss": 3.0947,
      "step": 219541
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.3245396614074707,
      "learning_rate": 3.283677594376233e-06,
      "loss": 2.8699,
      "step": 219542
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.672999382019043,
      "learning_rate": 3.283074054305679e-06,
      "loss": 2.9132,
      "step": 219543
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.529428005218506,
      "learning_rate": 3.282470569400364e-06,
      "loss": 2.9032,
      "step": 219544
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2599854469299316,
      "learning_rate": 3.2818671396604547e-06,
      "loss": 2.9696,
      "step": 219545
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.267054557800293,
      "learning_rate": 3.281263765086084e-06,
      "loss": 2.9168,
      "step": 219546
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6716206073760986,
      "learning_rate": 3.2806604456772856e-06,
      "loss": 2.7489,
      "step": 219547
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.530813455581665,
      "learning_rate": 3.2800571814342257e-06,
      "loss": 2.8581,
      "step": 219548
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.4291207790374756,
      "learning_rate": 3.2794539723570045e-06,
      "loss": 3.0582,
      "step": 219549
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.5017247200012207,
      "learning_rate": 3.278850818445722e-06,
      "loss": 3.4034,
      "step": 219550
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8922576904296875,
      "learning_rate": 3.278247719700511e-06,
      "loss": 2.9049,
      "step": 219551
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9862608909606934,
      "learning_rate": 3.277644676121505e-06,
      "loss": 2.8241,
      "step": 219552
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.5942630767822266,
      "learning_rate": 3.277041687708737e-06,
      "loss": 2.9861,
      "step": 219553
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.8463172912597656,
      "learning_rate": 3.276438754462407e-06,
      "loss": 2.9471,
      "step": 219554
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.590446949005127,
      "learning_rate": 3.2758358763825486e-06,
      "loss": 2.7457,
      "step": 219555
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1159026622772217,
      "learning_rate": 3.275233053469328e-06,
      "loss": 2.8702,
      "step": 219556
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.081087112426758,
      "learning_rate": 3.2746302857228457e-06,
      "loss": 2.8668,
      "step": 219557
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8774335384368896,
      "learning_rate": 3.274027573143234e-06,
      "loss": 2.8943,
      "step": 219558
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.007089138031006,
      "learning_rate": 3.27342491573056e-06,
      "loss": 3.2179,
      "step": 219559
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.504835605621338,
      "learning_rate": 3.2728223134849575e-06,
      "loss": 2.9482,
      "step": 219560
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0967917442321777,
      "learning_rate": 3.272219766406525e-06,
      "loss": 2.7538,
      "step": 219561
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.953352928161621,
      "learning_rate": 3.2716172744953973e-06,
      "loss": 2.9556,
      "step": 219562
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.571734666824341,
      "learning_rate": 3.271014837751673e-06,
      "loss": 2.8017,
      "step": 219563
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2649738788604736,
      "learning_rate": 3.2704124561755196e-06,
      "loss": 2.668,
      "step": 219564
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.883810520172119,
      "learning_rate": 3.2698101297669365e-06,
      "loss": 3.118,
      "step": 219565
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.411673069000244,
      "learning_rate": 3.269207858526124e-06,
      "loss": 3.1414,
      "step": 219566
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7815017700195312,
      "learning_rate": 3.268605642453148e-06,
      "loss": 3.0004,
      "step": 219567
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.853708267211914,
      "learning_rate": 3.2680034815481427e-06,
      "loss": 3.039,
      "step": 219568
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.9613025188446045,
      "learning_rate": 3.2674013758112406e-06,
      "loss": 2.9512,
      "step": 219569
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9790594577789307,
      "learning_rate": 3.2667993252425417e-06,
      "loss": 2.962,
      "step": 219570
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7557430267333984,
      "learning_rate": 3.2661973298420796e-06,
      "loss": 2.8728,
      "step": 219571
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.71034836769104,
      "learning_rate": 3.265595389610087e-06,
      "loss": 2.8069,
      "step": 219572
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.637502431869507,
      "learning_rate": 3.2649935045465982e-06,
      "loss": 2.983,
      "step": 219573
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7273614406585693,
      "learning_rate": 3.2643916746517784e-06,
      "loss": 2.9365,
      "step": 219574
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.175086736679077,
      "learning_rate": 3.2637898999256616e-06,
      "loss": 2.8572,
      "step": 219575
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.782334089279175,
      "learning_rate": 3.2631881803684477e-06,
      "loss": 2.8937,
      "step": 219576
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2250914573669434,
      "learning_rate": 3.2625865159802033e-06,
      "loss": 3.0994,
      "step": 219577
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.785412073135376,
      "learning_rate": 3.261984906761028e-06,
      "loss": 3.1763,
      "step": 219578
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1634111404418945,
      "learning_rate": 3.2613833527110553e-06,
      "loss": 2.8318,
      "step": 219579
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.271604537963867,
      "learning_rate": 3.2607818538303853e-06,
      "loss": 2.8565,
      "step": 219580
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.879617929458618,
      "learning_rate": 3.260180410119151e-06,
      "loss": 2.9772,
      "step": 219581
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.7780802249908447,
      "learning_rate": 3.259579021577452e-06,
      "loss": 2.9502,
      "step": 219582
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.315906047821045,
      "learning_rate": 3.2589776882053886e-06,
      "loss": 2.9814,
      "step": 219583
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1436235904693604,
      "learning_rate": 3.258376410003094e-06,
      "loss": 2.8996,
      "step": 219584
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.356940507888794,
      "learning_rate": 3.257775186970668e-06,
      "loss": 2.7871,
      "step": 219585
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8177478313446045,
      "learning_rate": 3.257174019108211e-06,
      "loss": 3.0195,
      "step": 219586
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.8477630615234375,
      "learning_rate": 3.256572906415855e-06,
      "loss": 2.7037,
      "step": 219587
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9816415309906006,
      "learning_rate": 3.2559718488937013e-06,
      "loss": 2.9041,
      "step": 219588
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.065617799758911,
      "learning_rate": 3.2553708465418827e-06,
      "loss": 3.0912,
      "step": 219589
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.620443105697632,
      "learning_rate": 3.2547698993604654e-06,
      "loss": 3.0635,
      "step": 219590
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.686903715133667,
      "learning_rate": 3.2541690073496163e-06,
      "loss": 3.0626,
      "step": 219591
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.746890068054199,
      "learning_rate": 3.253568170509402e-06,
      "loss": 3.0602,
      "step": 219592
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.9336936473846436,
      "learning_rate": 3.252967388839922e-06,
      "loss": 2.944,
      "step": 219593
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0044426918029785,
      "learning_rate": 3.2523666623413437e-06,
      "loss": 2.8511,
      "step": 219594
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.1522393226623535,
      "learning_rate": 3.2517659910137663e-06,
      "loss": 2.8652,
      "step": 219595
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.8519179821014404,
      "learning_rate": 3.2511653748572565e-06,
      "loss": 2.8366,
      "step": 219596
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.629286050796509,
      "learning_rate": 3.250564813871981e-06,
      "loss": 2.9528,
      "step": 219597
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9836087226867676,
      "learning_rate": 3.2499643080580395e-06,
      "loss": 2.8873,
      "step": 219598
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.206118106842041,
      "learning_rate": 3.249363857415499e-06,
      "loss": 2.9744,
      "step": 219599
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.763838529586792,
      "learning_rate": 3.2487634619445256e-06,
      "loss": 2.923,
      "step": 219600
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.209467887878418,
      "learning_rate": 3.2481631216451863e-06,
      "loss": 2.6851,
      "step": 219601
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.5293591022491455,
      "learning_rate": 3.247562836517614e-06,
      "loss": 2.8103,
      "step": 219602
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.351762294769287,
      "learning_rate": 3.2469626065619425e-06,
      "loss": 2.9162,
      "step": 219603
    },
    {
      "epoch": 2.86,
      "grad_norm": 5.258582592010498,
      "learning_rate": 3.246362431778238e-06,
      "loss": 2.9243,
      "step": 219604
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9321045875549316,
      "learning_rate": 3.2457623121666664e-06,
      "loss": 2.9219,
      "step": 219605
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.038398504257202,
      "learning_rate": 3.2451622477272953e-06,
      "loss": 2.9164,
      "step": 219606
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.597015619277954,
      "learning_rate": 3.2445622384602576e-06,
      "loss": 2.929,
      "step": 219607
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.167938232421875,
      "learning_rate": 3.2439622843656533e-06,
      "loss": 2.8052,
      "step": 219608
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.788282871246338,
      "learning_rate": 3.2433623854436153e-06,
      "loss": 3.0477,
      "step": 219609
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6290442943573,
      "learning_rate": 3.242762541694177e-06,
      "loss": 2.7867,
      "step": 219610
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.828878879547119,
      "learning_rate": 3.242162753117572e-06,
      "loss": 2.9965,
      "step": 219611
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.586808919906616,
      "learning_rate": 3.241563019713833e-06,
      "loss": 2.7891,
      "step": 219612
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2502686977386475,
      "learning_rate": 3.2409633414830937e-06,
      "loss": 2.7294,
      "step": 219613
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.556196451187134,
      "learning_rate": 3.240363718425454e-06,
      "loss": 2.7354,
      "step": 219614
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.033993721008301,
      "learning_rate": 3.2397641505410466e-06,
      "loss": 2.938,
      "step": 219615
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.680063009262085,
      "learning_rate": 3.2391646378299383e-06,
      "loss": 2.9726,
      "step": 219616
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1337602138519287,
      "learning_rate": 3.2385651802922964e-06,
      "loss": 2.9918,
      "step": 219617
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.247974395751953,
      "learning_rate": 3.2379657779281863e-06,
      "loss": 2.8442,
      "step": 219618
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2321863174438477,
      "learning_rate": 3.2373664307377755e-06,
      "loss": 3.0507,
      "step": 219619
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.608981132507324,
      "learning_rate": 3.236767138721097e-06,
      "loss": 3.0692,
      "step": 219620
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.576279401779175,
      "learning_rate": 3.2361679018783172e-06,
      "loss": 3.2563,
      "step": 219621
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.286478519439697,
      "learning_rate": 3.235568720209536e-06,
      "loss": 2.7375,
      "step": 219622
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.185621738433838,
      "learning_rate": 3.2349695937148866e-06,
      "loss": 3.1141,
      "step": 219623
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9774467945098877,
      "learning_rate": 3.234370522394436e-06,
      "loss": 2.853,
      "step": 219624
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7584710121154785,
      "learning_rate": 3.2337715062483505e-06,
      "loss": 2.8038,
      "step": 219625
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.847687005996704,
      "learning_rate": 3.233172545276663e-06,
      "loss": 2.7156,
      "step": 219626
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.944458484649658,
      "learning_rate": 3.232573639479541e-06,
      "loss": 2.8619,
      "step": 219627
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.732409715652466,
      "learning_rate": 3.2319747888570836e-06,
      "loss": 3.0364,
      "step": 219628
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.173290252685547,
      "learning_rate": 3.231375993409424e-06,
      "loss": 2.8331,
      "step": 219629
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0195472240448,
      "learning_rate": 3.230777253136663e-06,
      "loss": 2.9528,
      "step": 219630
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.735365390777588,
      "learning_rate": 3.2301785680388993e-06,
      "loss": 2.8197,
      "step": 219631
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.903101682662964,
      "learning_rate": 3.2295799381162335e-06,
      "loss": 2.8674,
      "step": 219632
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.6513824462890625,
      "learning_rate": 3.2289813633687657e-06,
      "loss": 2.8913,
      "step": 219633
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.81368350982666,
      "learning_rate": 3.228382843796662e-06,
      "loss": 2.9391,
      "step": 219634
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.4098687171936035,
      "learning_rate": 3.2277843793999893e-06,
      "loss": 3.0526,
      "step": 219635
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8421976566314697,
      "learning_rate": 3.2271859701788806e-06,
      "loss": 2.9564,
      "step": 219636
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.844210147857666,
      "learning_rate": 3.2265876161334692e-06,
      "loss": 3.1121,
      "step": 219637
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8520586490631104,
      "learning_rate": 3.2259893172637883e-06,
      "loss": 2.9304,
      "step": 219638
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8126330375671387,
      "learning_rate": 3.2253910735700385e-06,
      "loss": 2.7802,
      "step": 219639
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.3063900470733643,
      "learning_rate": 3.224792885052252e-06,
      "loss": 2.7539,
      "step": 219640
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.249319553375244,
      "learning_rate": 3.2241947517106294e-06,
      "loss": 2.8237,
      "step": 219641
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.152449607849121,
      "learning_rate": 3.2235966735451704e-06,
      "loss": 2.8467,
      "step": 219642
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.4741005897521973,
      "learning_rate": 3.222998650556108e-06,
      "loss": 2.981,
      "step": 219643
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1823654174804688,
      "learning_rate": 3.2224006827434425e-06,
      "loss": 2.8988,
      "step": 219644
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.329941749572754,
      "learning_rate": 3.2218027701073736e-06,
      "loss": 3.0781,
      "step": 219645
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.5912368297576904,
      "learning_rate": 3.2212049126479342e-06,
      "loss": 2.7879,
      "step": 219646
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6581642627716064,
      "learning_rate": 3.220607110365292e-06,
      "loss": 3.2189,
      "step": 219647
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.048551082611084,
      "learning_rate": 3.220009363259546e-06,
      "loss": 2.8674,
      "step": 219648
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.995048999786377,
      "learning_rate": 3.219411671330829e-06,
      "loss": 3.039,
      "step": 219649
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2769815921783447,
      "learning_rate": 3.2188140345791756e-06,
      "loss": 3.0137,
      "step": 219650
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.839057207107544,
      "learning_rate": 3.218216453004785e-06,
      "loss": 3.146,
      "step": 219651
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0083203315734863,
      "learning_rate": 3.21761892660769e-06,
      "loss": 3.0153,
      "step": 219652
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.425293445587158,
      "learning_rate": 3.217021455388058e-06,
      "loss": 3.0389,
      "step": 219653
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1961939334869385,
      "learning_rate": 3.2164240393459883e-06,
      "loss": 2.913,
      "step": 219654
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.449937105178833,
      "learning_rate": 3.2158266784816147e-06,
      "loss": 3.019,
      "step": 219655
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.369680643081665,
      "learning_rate": 3.215229372795003e-06,
      "loss": 2.8055,
      "step": 219656
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.832768678665161,
      "learning_rate": 3.2146321222862535e-06,
      "loss": 2.8184,
      "step": 219657
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.941671848297119,
      "learning_rate": 3.214034926955533e-06,
      "loss": 3.1648,
      "step": 219658
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.920689582824707,
      "learning_rate": 3.213437786802908e-06,
      "loss": 2.8688,
      "step": 219659
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8633365631103516,
      "learning_rate": 3.212840701828512e-06,
      "loss": 2.894,
      "step": 219660
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9434800148010254,
      "learning_rate": 3.2122436720324773e-06,
      "loss": 2.8612,
      "step": 219661
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.143402576446533,
      "learning_rate": 3.2116466974148714e-06,
      "loss": 2.8289,
      "step": 219662
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.959796667098999,
      "learning_rate": 3.211049777975827e-06,
      "loss": 2.9358,
      "step": 219663
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2495992183685303,
      "learning_rate": 3.2104529137154445e-06,
      "loss": 3.1598,
      "step": 219664
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.568481206893921,
      "learning_rate": 3.2098561046338568e-06,
      "loss": 2.9533,
      "step": 219665
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.473353624343872,
      "learning_rate": 3.2092593507311305e-06,
      "loss": 3.1202,
      "step": 219666
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8147776126861572,
      "learning_rate": 3.208662652007432e-06,
      "loss": 2.99,
      "step": 219667
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2492127418518066,
      "learning_rate": 3.2080660084628283e-06,
      "loss": 2.8062,
      "step": 219668
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.212110996246338,
      "learning_rate": 3.2074694200974527e-06,
      "loss": 3.1142,
      "step": 219669
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7221217155456543,
      "learning_rate": 3.2068728869114048e-06,
      "loss": 3.045,
      "step": 219670
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.405919551849365,
      "learning_rate": 3.2062764089048176e-06,
      "loss": 2.784,
      "step": 219671
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.3506252765655518,
      "learning_rate": 3.205679986077758e-06,
      "loss": 2.9644,
      "step": 219672
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9122097492218018,
      "learning_rate": 3.205083618430393e-06,
      "loss": 3.0249,
      "step": 219673
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2766857147216797,
      "learning_rate": 3.2044873059627885e-06,
      "loss": 2.8225,
      "step": 219674
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.148294687271118,
      "learning_rate": 3.203891048675111e-06,
      "loss": 2.6067,
      "step": 219675
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.884486675262451,
      "learning_rate": 3.2032948465673613e-06,
      "loss": 2.8969,
      "step": 219676
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9073781967163086,
      "learning_rate": 3.202698699639772e-06,
      "loss": 3.0244,
      "step": 219677
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8380815982818604,
      "learning_rate": 3.2021026078923764e-06,
      "loss": 2.8685,
      "step": 219678
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.45234751701355,
      "learning_rate": 3.2015065713253407e-06,
      "loss": 2.9003,
      "step": 219679
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.501371145248413,
      "learning_rate": 3.2009105899387657e-06,
      "loss": 2.967,
      "step": 219680
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7187857627868652,
      "learning_rate": 3.200314663732684e-06,
      "loss": 3.0564,
      "step": 219681
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.368532657623291,
      "learning_rate": 3.1997187927072954e-06,
      "loss": 2.9862,
      "step": 219682
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8249435424804688,
      "learning_rate": 3.1991229768627004e-06,
      "loss": 2.7814,
      "step": 219683
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.798635482788086,
      "learning_rate": 3.198527216198965e-06,
      "loss": 2.8567,
      "step": 219684
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1774191856384277,
      "learning_rate": 3.1979315107162228e-06,
      "loss": 2.9096,
      "step": 219685
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6454734802246094,
      "learning_rate": 3.1973358604146073e-06,
      "loss": 3.0082,
      "step": 219686
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.010967254638672,
      "learning_rate": 3.1967402652941844e-06,
      "loss": 2.6099,
      "step": 219687
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6757211685180664,
      "learning_rate": 3.196144725355121e-06,
      "loss": 2.9802,
      "step": 219688
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.657609224319458,
      "learning_rate": 3.1955492405974835e-06,
      "loss": 2.785,
      "step": 219689
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.4973700046539307,
      "learning_rate": 3.1949538110214055e-06,
      "loss": 2.9319,
      "step": 219690
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.886833667755127,
      "learning_rate": 3.1943584366269537e-06,
      "loss": 2.728,
      "step": 219691
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8600990772247314,
      "learning_rate": 3.1937631174143276e-06,
      "loss": 3.0438,
      "step": 219692
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1184117794036865,
      "learning_rate": 3.193167853383527e-06,
      "loss": 3.0447,
      "step": 219693
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.064575672149658,
      "learning_rate": 3.1925726445347522e-06,
      "loss": 2.861,
      "step": 219694
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.293241024017334,
      "learning_rate": 3.1919774908680694e-06,
      "loss": 2.8701,
      "step": 219695
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.373385429382324,
      "learning_rate": 3.1913823923836125e-06,
      "loss": 3.0067,
      "step": 219696
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1726326942443848,
      "learning_rate": 3.1907873490814805e-06,
      "loss": 2.8463,
      "step": 219697
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.790987014770508,
      "learning_rate": 3.190192360961774e-06,
      "loss": 3.0985,
      "step": 219698
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8841817378997803,
      "learning_rate": 3.1895974280245927e-06,
      "loss": 2.9932,
      "step": 219699
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.738088607788086,
      "learning_rate": 3.189002550270103e-06,
      "loss": 2.8435,
      "step": 219700
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8734095096588135,
      "learning_rate": 3.1884077276983723e-06,
      "loss": 2.7703,
      "step": 219701
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.831411600112915,
      "learning_rate": 3.1878129603095327e-06,
      "loss": 2.77,
      "step": 219702
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7224061489105225,
      "learning_rate": 3.1872182481036513e-06,
      "loss": 2.8599,
      "step": 219703
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9950973987579346,
      "learning_rate": 3.1866235910808944e-06,
      "loss": 2.8129,
      "step": 219704
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.129857301712036,
      "learning_rate": 3.1860289892413295e-06,
      "loss": 2.8748,
      "step": 219705
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.5641226768493652,
      "learning_rate": 3.1854344425850887e-06,
      "loss": 2.9179,
      "step": 219706
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.924384593963623,
      "learning_rate": 3.1848399511122723e-06,
      "loss": 2.8604,
      "step": 219707
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.087778091430664,
      "learning_rate": 3.184245514823014e-06,
      "loss": 2.8498,
      "step": 219708
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0598740577697754,
      "learning_rate": 3.18365113371738e-06,
      "loss": 3.0936,
      "step": 219709
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1200332641601562,
      "learning_rate": 3.1830568077955365e-06,
      "loss": 2.843,
      "step": 219710
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6847009658813477,
      "learning_rate": 3.1824625370575508e-06,
      "loss": 2.7352,
      "step": 219711
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.529994010925293,
      "learning_rate": 3.1818683215035557e-06,
      "loss": 2.9486,
      "step": 219712
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1623687744140625,
      "learning_rate": 3.1812741611336513e-06,
      "loss": 2.7359,
      "step": 219713
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.7044198513031006,
      "learning_rate": 3.180680055947937e-06,
      "loss": 3.0224,
      "step": 219714
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6689112186431885,
      "learning_rate": 3.1800860059465473e-06,
      "loss": 2.9324,
      "step": 219715
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.662122964859009,
      "learning_rate": 3.179492011129614e-06,
      "loss": 2.8243,
      "step": 219716
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9488720893859863,
      "learning_rate": 3.178898071497171e-06,
      "loss": 2.664,
      "step": 219717
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9004881381988525,
      "learning_rate": 3.178304187049385e-06,
      "loss": 3.2538,
      "step": 219718
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0782153606414795,
      "learning_rate": 3.177710357786356e-06,
      "loss": 2.7969,
      "step": 219719
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.441313743591309,
      "learning_rate": 3.1771165837082167e-06,
      "loss": 2.9349,
      "step": 219720
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.652179718017578,
      "learning_rate": 3.176522864815001e-06,
      "loss": 2.9138,
      "step": 219721
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.000033378601074,
      "learning_rate": 3.1759292011069416e-06,
      "loss": 3.0431,
      "step": 219722
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1606948375701904,
      "learning_rate": 3.1753355925840384e-06,
      "loss": 2.4995,
      "step": 219723
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.183960199356079,
      "learning_rate": 3.1747420392464586e-06,
      "loss": 3.0496,
      "step": 219724
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.532095432281494,
      "learning_rate": 3.174148541094268e-06,
      "loss": 3.0483,
      "step": 219725
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.4840352535247803,
      "learning_rate": 3.173555098127634e-06,
      "loss": 3.1161,
      "step": 219726
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.4553980827331543,
      "learning_rate": 3.172961710346622e-06,
      "loss": 3.1129,
      "step": 219727
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7415614128112793,
      "learning_rate": 3.1723683777514e-06,
      "loss": 2.9746,
      "step": 219728
    },
    {
      "epoch": 2.86,
      "grad_norm": 5.679669380187988,
      "learning_rate": 3.171775100342e-06,
      "loss": 2.7614,
      "step": 219729
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.5323612689971924,
      "learning_rate": 3.171181878118589e-06,
      "loss": 2.9831,
      "step": 219730
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.4650847911834717,
      "learning_rate": 3.1705887110812343e-06,
      "loss": 2.867,
      "step": 219731
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8688037395477295,
      "learning_rate": 3.1699955992300685e-06,
      "loss": 2.9869,
      "step": 219732
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8052783012390137,
      "learning_rate": 3.169402542565225e-06,
      "loss": 2.9725,
      "step": 219733
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7418549060821533,
      "learning_rate": 3.168809541086803e-06,
      "loss": 3.0592,
      "step": 219734
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8572068214416504,
      "learning_rate": 3.16821659479487e-06,
      "loss": 2.8388,
      "step": 219735
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.346763610839844,
      "learning_rate": 3.1676237036895925e-06,
      "loss": 2.8383,
      "step": 219736
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.150550365447998,
      "learning_rate": 3.1670308677710365e-06,
      "loss": 3.0831,
      "step": 219737
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1436476707458496,
      "learning_rate": 3.166438087039369e-06,
      "loss": 2.7849,
      "step": 219738
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.6310315132141113,
      "learning_rate": 3.165845361494623e-06,
      "loss": 2.792,
      "step": 219739
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9778778553009033,
      "learning_rate": 3.1652526911369657e-06,
      "loss": 2.9716,
      "step": 219740
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.5025341510772705,
      "learning_rate": 3.1646600759664964e-06,
      "loss": 3.0449,
      "step": 219741
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.782926082611084,
      "learning_rate": 3.164067515983315e-06,
      "loss": 2.9565,
      "step": 219742
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.7259418964385986,
      "learning_rate": 3.1634750111875216e-06,
      "loss": 3.0996,
      "step": 219743
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.801480293273926,
      "learning_rate": 3.162882561579283e-06,
      "loss": 2.9652,
      "step": 219744
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0859367847442627,
      "learning_rate": 3.162290167158632e-06,
      "loss": 2.7999,
      "step": 219745
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.111928939819336,
      "learning_rate": 3.161697827925702e-06,
      "loss": 2.8234,
      "step": 219746
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.819974660873413,
      "learning_rate": 3.1611055438806597e-06,
      "loss": 2.7685,
      "step": 219747
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.690875768661499,
      "learning_rate": 3.160513315023572e-06,
      "loss": 2.9452,
      "step": 219748
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1013591289520264,
      "learning_rate": 3.1599211413545044e-06,
      "loss": 2.9868,
      "step": 219749
    },
    {
      "epoch": 2.86,
      "grad_norm": 5.868384838104248,
      "learning_rate": 3.1593290228736247e-06,
      "loss": 2.9451,
      "step": 219750
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7298688888549805,
      "learning_rate": 3.158736959581032e-06,
      "loss": 2.7984,
      "step": 219751
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.7941479682922363,
      "learning_rate": 3.1581449514768266e-06,
      "loss": 3.0008,
      "step": 219752
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.7604331970214844,
      "learning_rate": 3.1575529985611416e-06,
      "loss": 2.7942,
      "step": 219753
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.655358076095581,
      "learning_rate": 3.1569611008340767e-06,
      "loss": 2.9985,
      "step": 219754
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0853021144866943,
      "learning_rate": 3.1563692582957323e-06,
      "loss": 3.1658,
      "step": 219755
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9197540283203125,
      "learning_rate": 3.155777470946208e-06,
      "loss": 3.0148,
      "step": 219756
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0791821479797363,
      "learning_rate": 3.155185738785637e-06,
      "loss": 2.9023,
      "step": 219757
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.863611936569214,
      "learning_rate": 3.1545940618141196e-06,
      "loss": 2.7864,
      "step": 219758
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.3420286178588867,
      "learning_rate": 3.1540024400317554e-06,
      "loss": 2.779,
      "step": 219759
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.140033721923828,
      "learning_rate": 3.1534108734386776e-06,
      "loss": 2.816,
      "step": 219760
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6222424507141113,
      "learning_rate": 3.152819362034986e-06,
      "loss": 2.916,
      "step": 219761
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.770876169204712,
      "learning_rate": 3.1522279058207813e-06,
      "loss": 2.726,
      "step": 219762
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.4482452869415283,
      "learning_rate": 3.151636504796162e-06,
      "loss": 2.769,
      "step": 219763
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6980440616607666,
      "learning_rate": 3.151045158961296e-06,
      "loss": 3.1123,
      "step": 219764
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.659438133239746,
      "learning_rate": 3.1504538683162496e-06,
      "loss": 3.1059,
      "step": 219765
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.9882259368896484,
      "learning_rate": 3.149862632861122e-06,
      "loss": 3.0559,
      "step": 219766
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.396454334259033,
      "learning_rate": 3.149271452596047e-06,
      "loss": 3.1683,
      "step": 219767
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.022535562515259,
      "learning_rate": 3.1486803275211247e-06,
      "loss": 2.8798,
      "step": 219768
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0309653282165527,
      "learning_rate": 3.1480892576364215e-06,
      "loss": 3.1671,
      "step": 219769
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0673487186431885,
      "learning_rate": 3.14749824294217e-06,
      "loss": 2.7781,
      "step": 219770
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6590077877044678,
      "learning_rate": 3.1469072834383715e-06,
      "loss": 2.9664,
      "step": 219771
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9125490188598633,
      "learning_rate": 3.1463163791251246e-06,
      "loss": 2.7985,
      "step": 219772
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.92631196975708,
      "learning_rate": 3.1457255300026295e-06,
      "loss": 2.839,
      "step": 219773
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.038121223449707,
      "learning_rate": 3.1451347360709534e-06,
      "loss": 3.0033,
      "step": 219774
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.363067388534546,
      "learning_rate": 3.1445439973301624e-06,
      "loss": 2.9663,
      "step": 219775
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.869434118270874,
      "learning_rate": 3.143953313780423e-06,
      "loss": 3.087,
      "step": 219776
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.959127426147461,
      "learning_rate": 3.1433626854218354e-06,
      "loss": 2.9724,
      "step": 219777
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9375357627868652,
      "learning_rate": 3.142772112254466e-06,
      "loss": 2.9127,
      "step": 219778
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.842003583908081,
      "learning_rate": 3.1421815942784812e-06,
      "loss": 3.0439,
      "step": 219779
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8469367027282715,
      "learning_rate": 3.1415911314939814e-06,
      "loss": 2.8136,
      "step": 219780
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0767745971679688,
      "learning_rate": 3.141000723901066e-06,
      "loss": 2.959,
      "step": 219781
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1975464820861816,
      "learning_rate": 3.140410371499835e-06,
      "loss": 2.726,
      "step": 219782
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.5684008598327637,
      "learning_rate": 3.139820074290389e-06,
      "loss": 2.7696,
      "step": 219783
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6887662410736084,
      "learning_rate": 3.1392298322728604e-06,
      "loss": 2.7874,
      "step": 219784
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.715191125869751,
      "learning_rate": 3.1386396454473495e-06,
      "loss": 3.0253,
      "step": 219785
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2530384063720703,
      "learning_rate": 3.138049513813956e-06,
      "loss": 2.8712,
      "step": 219786
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.1683759689331055,
      "learning_rate": 3.1374594373728467e-06,
      "loss": 2.704,
      "step": 219787
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.3203799724578857,
      "learning_rate": 3.1368694161240548e-06,
      "loss": 2.9407,
      "step": 219788
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8181045055389404,
      "learning_rate": 3.1362794500677467e-06,
      "loss": 2.7991,
      "step": 219789
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.5380897521972656,
      "learning_rate": 3.1356895392039894e-06,
      "loss": 3.0281,
      "step": 219790
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9258968830108643,
      "learning_rate": 3.1350996835329155e-06,
      "loss": 2.9071,
      "step": 219791
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9175941944122314,
      "learning_rate": 3.1345098830546258e-06,
      "loss": 2.7564,
      "step": 219792
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9232561588287354,
      "learning_rate": 3.1339201377692193e-06,
      "loss": 2.9375,
      "step": 219793
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.319213628768921,
      "learning_rate": 3.1333304476768295e-06,
      "loss": 2.8989,
      "step": 219794
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.644416093826294,
      "learning_rate": 3.13274081277759e-06,
      "loss": 3.0112,
      "step": 219795
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.521792411804199,
      "learning_rate": 3.132151233071567e-06,
      "loss": 2.6898,
      "step": 219796
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1552038192749023,
      "learning_rate": 3.1315617085588607e-06,
      "loss": 2.64,
      "step": 219797
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.910335063934326,
      "learning_rate": 3.130972239239604e-06,
      "loss": 3.0306,
      "step": 219798
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.000602960586548,
      "learning_rate": 3.1303828251139306e-06,
      "loss": 2.8291,
      "step": 219799
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7431352138519287,
      "learning_rate": 3.1297934661818735e-06,
      "loss": 2.8732,
      "step": 219800
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.4160804748535156,
      "learning_rate": 3.1292041624436328e-06,
      "loss": 2.7953,
      "step": 219801
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.059591770172119,
      "learning_rate": 3.1286149138992743e-06,
      "loss": 3.101,
      "step": 219802
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7148847579956055,
      "learning_rate": 3.128025720548899e-06,
      "loss": 2.5273,
      "step": 219803
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7988522052764893,
      "learning_rate": 3.1274365823926064e-06,
      "loss": 2.9717,
      "step": 219804
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7321090698242188,
      "learning_rate": 3.1268474994305624e-06,
      "loss": 2.6899,
      "step": 219805
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.110276222229004,
      "learning_rate": 3.1262584716628014e-06,
      "loss": 3.0062,
      "step": 219806
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.667262554168701,
      "learning_rate": 3.1256694990895225e-06,
      "loss": 2.9715,
      "step": 219807
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.724642515182495,
      "learning_rate": 3.1250805817107594e-06,
      "loss": 3.1396,
      "step": 219808
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.3577592372894287,
      "learning_rate": 3.124491719526645e-06,
      "loss": 2.8851,
      "step": 219809
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.877084255218506,
      "learning_rate": 3.1239029125373126e-06,
      "loss": 2.9919,
      "step": 219810
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.974045991897583,
      "learning_rate": 3.123314160742829e-06,
      "loss": 2.6771,
      "step": 219811
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8576834201812744,
      "learning_rate": 3.1227254641433275e-06,
      "loss": 2.7603,
      "step": 219812
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.446723461151123,
      "learning_rate": 3.1221368227389076e-06,
      "loss": 3.0714,
      "step": 219813
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9272711277008057,
      "learning_rate": 3.121548236529703e-06,
      "loss": 2.7736,
      "step": 219814
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.016751289367676,
      "learning_rate": 3.120959705515813e-06,
      "loss": 2.7063,
      "step": 219815
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0946147441864014,
      "learning_rate": 3.120371229697338e-06,
      "loss": 3.008,
      "step": 219816
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.865917444229126,
      "learning_rate": 3.1197828090743784e-06,
      "loss": 2.9313,
      "step": 219817
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1169419288635254,
      "learning_rate": 3.119194443647033e-06,
      "loss": 2.8052,
      "step": 219818
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.547407388687134,
      "learning_rate": 3.118606133415469e-06,
      "loss": 3.0138,
      "step": 219819
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1993279457092285,
      "learning_rate": 3.1180178783797527e-06,
      "loss": 2.6881,
      "step": 219820
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.62874436378479,
      "learning_rate": 3.1174296785400177e-06,
      "loss": 2.8746,
      "step": 219821
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9277682304382324,
      "learning_rate": 3.1168415338963304e-06,
      "loss": 2.9518,
      "step": 219822
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7170310020446777,
      "learning_rate": 3.116253444448824e-06,
      "loss": 2.8179,
      "step": 219823
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.288397789001465,
      "learning_rate": 3.1156654101975986e-06,
      "loss": 2.8427,
      "step": 219824
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.4627416133880615,
      "learning_rate": 3.1150774311428208e-06,
      "loss": 2.789,
      "step": 219825
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.783581256866455,
      "learning_rate": 3.114489507284523e-06,
      "loss": 3.2663,
      "step": 219826
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.774858236312866,
      "learning_rate": 3.113901638622873e-06,
      "loss": 3.0617,
      "step": 219827
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.863848924636841,
      "learning_rate": 3.113313825157904e-06,
      "loss": 2.6278,
      "step": 219828
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0987746715545654,
      "learning_rate": 3.1127260668898146e-06,
      "loss": 3.025,
      "step": 219829
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1037347316741943,
      "learning_rate": 3.1121383638186393e-06,
      "loss": 2.8466,
      "step": 219830
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.479903221130371,
      "learning_rate": 3.1115507159445443e-06,
      "loss": 3.0642,
      "step": 219831
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.119138717651367,
      "learning_rate": 3.110963123267629e-06,
      "loss": 2.907,
      "step": 219832
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.60058856010437,
      "learning_rate": 3.110375585787994e-06,
      "loss": 2.9407,
      "step": 219833
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.4979910850524902,
      "learning_rate": 3.1097881035057058e-06,
      "loss": 2.7967,
      "step": 219834
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1786258220672607,
      "learning_rate": 3.109200676420931e-06,
      "loss": 2.6374,
      "step": 219835
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.036893844604492,
      "learning_rate": 3.108613304533769e-06,
      "loss": 2.9032,
      "step": 219836
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7762820720672607,
      "learning_rate": 3.1080259878443204e-06,
      "loss": 2.6847,
      "step": 219837
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.1235198974609375,
      "learning_rate": 3.1074387263526844e-06,
      "loss": 2.8338,
      "step": 219838
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.5118038654327393,
      "learning_rate": 3.106851520058995e-06,
      "loss": 2.7815,
      "step": 219839
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2222096920013428,
      "learning_rate": 3.106264368963318e-06,
      "loss": 2.8881,
      "step": 219840
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1553330421447754,
      "learning_rate": 3.1056772730658206e-06,
      "loss": 2.9317,
      "step": 219841
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.66652512550354,
      "learning_rate": 3.105090232366536e-06,
      "loss": 3.1499,
      "step": 219842
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.866879940032959,
      "learning_rate": 3.104503246865664e-06,
      "loss": 2.7624,
      "step": 219843
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0072519779205322,
      "learning_rate": 3.103916316563271e-06,
      "loss": 2.8997,
      "step": 219844
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.524890899658203,
      "learning_rate": 3.103329441459457e-06,
      "loss": 2.9384,
      "step": 219845
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0921552181243896,
      "learning_rate": 3.1027426215543215e-06,
      "loss": 2.9413,
      "step": 219846
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2189126014709473,
      "learning_rate": 3.1021558568479987e-06,
      "loss": 2.7772,
      "step": 219847
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.703130006790161,
      "learning_rate": 3.101569147340588e-06,
      "loss": 3.1762,
      "step": 219848
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.035093307495117,
      "learning_rate": 3.1009824930321893e-06,
      "loss": 2.6263,
      "step": 219849
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9687232971191406,
      "learning_rate": 3.100395893922936e-06,
      "loss": 2.8764,
      "step": 219850
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.031618595123291,
      "learning_rate": 3.099809350012927e-06,
      "loss": 2.9031,
      "step": 219851
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8077685832977295,
      "learning_rate": 3.099222861302264e-06,
      "loss": 2.7749,
      "step": 219852
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.4457848072052,
      "learning_rate": 3.098636427791079e-06,
      "loss": 3.0374,
      "step": 219853
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.811140537261963,
      "learning_rate": 3.098050049479406e-06,
      "loss": 2.8474,
      "step": 219854
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.3626692295074463,
      "learning_rate": 3.097463726367477e-06,
      "loss": 3.0248,
      "step": 219855
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.5008273124694824,
      "learning_rate": 3.0968774584552935e-06,
      "loss": 3.2786,
      "step": 219856
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.082909345626831,
      "learning_rate": 3.096291245743021e-06,
      "loss": 3.0383,
      "step": 219857
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7527687549591064,
      "learning_rate": 3.095705088230727e-06,
      "loss": 2.8264,
      "step": 219858
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.842834234237671,
      "learning_rate": 3.0951189859185767e-06,
      "loss": 3.07,
      "step": 219859
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.757333517074585,
      "learning_rate": 3.0945329388066375e-06,
      "loss": 2.7787,
      "step": 219860
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7971038818359375,
      "learning_rate": 3.0939469468950427e-06,
      "loss": 2.9108,
      "step": 219861
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.827605724334717,
      "learning_rate": 3.0933610101838592e-06,
      "loss": 3.0569,
      "step": 219862
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8352558612823486,
      "learning_rate": 3.0927751286732525e-06,
      "loss": 2.6888,
      "step": 219863
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1181254386901855,
      "learning_rate": 3.0921893023632905e-06,
      "loss": 3.1363,
      "step": 219864
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8054850101470947,
      "learning_rate": 3.091603531254072e-06,
      "loss": 2.9396,
      "step": 219865
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.289275646209717,
      "learning_rate": 3.0910178153457644e-06,
      "loss": 3.093,
      "step": 219866
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.703780174255371,
      "learning_rate": 3.0904321546384338e-06,
      "loss": 2.8963,
      "step": 219867
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.684258460998535,
      "learning_rate": 3.08984654913218e-06,
      "loss": 2.9263,
      "step": 219868
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.873779535293579,
      "learning_rate": 3.0892609988271366e-06,
      "loss": 2.8753,
      "step": 219869
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.362187623977661,
      "learning_rate": 3.0886755037234033e-06,
      "loss": 2.949,
      "step": 219870
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7406833171844482,
      "learning_rate": 3.0880900638210805e-06,
      "loss": 2.7899,
      "step": 219871
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.996906280517578,
      "learning_rate": 3.087504679120334e-06,
      "loss": 2.728,
      "step": 219872
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.162069797515869,
      "learning_rate": 3.0869193496211973e-06,
      "loss": 2.7137,
      "step": 219873
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.4561874866485596,
      "learning_rate": 3.0863340753237708e-06,
      "loss": 2.915,
      "step": 219874
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.421175479888916,
      "learning_rate": 3.0857488562282205e-06,
      "loss": 3.0434,
      "step": 219875
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.499448299407959,
      "learning_rate": 3.0851636923346802e-06,
      "loss": 2.9476,
      "step": 219876
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1673977375030518,
      "learning_rate": 3.084578583643149e-06,
      "loss": 2.8871,
      "step": 219877
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.8248610496520996,
      "learning_rate": 3.0839935301538276e-06,
      "loss": 3.1488,
      "step": 219878
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.834353446960449,
      "learning_rate": 3.083408531866782e-06,
      "loss": 2.9772,
      "step": 219879
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.222917318344116,
      "learning_rate": 3.0828235887821464e-06,
      "loss": 2.9763,
      "step": 219880
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0279526710510254,
      "learning_rate": 3.0822387009000526e-06,
      "loss": 2.8608,
      "step": 219881
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.200726270675659,
      "learning_rate": 3.0816538682205348e-06,
      "loss": 2.8699,
      "step": 219882
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.374818801879883,
      "learning_rate": 3.081069090743726e-06,
      "loss": 2.8959,
      "step": 219883
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6876001358032227,
      "learning_rate": 3.0804843684697933e-06,
      "loss": 2.7497,
      "step": 219884
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.541896343231201,
      "learning_rate": 3.079899701398769e-06,
      "loss": 3.067,
      "step": 219885
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.938570976257324,
      "learning_rate": 3.0793150895308206e-06,
      "loss": 2.9652,
      "step": 219886
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.64927077293396,
      "learning_rate": 3.078730532866047e-06,
      "loss": 2.7326,
      "step": 219887
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.18672251701355,
      "learning_rate": 3.078146031404516e-06,
      "loss": 2.9128,
      "step": 219888
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.307525157928467,
      "learning_rate": 3.07756158514636e-06,
      "loss": 3.0575,
      "step": 219889
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6877357959747314,
      "learning_rate": 3.0769771940916786e-06,
      "loss": 2.9066,
      "step": 219890
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.0077714920043945,
      "learning_rate": 3.076392858240606e-06,
      "loss": 2.7725,
      "step": 219891
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.434415817260742,
      "learning_rate": 3.075808577593242e-06,
      "loss": 2.8673,
      "step": 219892
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9696404933929443,
      "learning_rate": 3.075224352149719e-06,
      "loss": 3.1677,
      "step": 219893
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.7289586067199707,
      "learning_rate": 3.074640181910071e-06,
      "loss": 2.9979,
      "step": 219894
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2721729278564453,
      "learning_rate": 3.074056066874464e-06,
      "loss": 3.0733,
      "step": 219895
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.31626033782959,
      "learning_rate": 3.0734720070429984e-06,
      "loss": 2.8505,
      "step": 219896
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0010952949523926,
      "learning_rate": 3.0728880024157743e-06,
      "loss": 2.9382,
      "step": 219897
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.183661460876465,
      "learning_rate": 3.0723040529929243e-06,
      "loss": 2.9275,
      "step": 219898
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9017059803009033,
      "learning_rate": 3.0717201587745153e-06,
      "loss": 2.6386,
      "step": 219899
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6428544521331787,
      "learning_rate": 3.071136319760714e-06,
      "loss": 2.8461,
      "step": 219900
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.967597246170044,
      "learning_rate": 3.0705525359515536e-06,
      "loss": 2.9561,
      "step": 219901
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.308147430419922,
      "learning_rate": 3.0699688073472008e-06,
      "loss": 2.8911,
      "step": 219902
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.563873767852783,
      "learning_rate": 3.0693851339477214e-06,
      "loss": 3.0346,
      "step": 219903
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.813079357147217,
      "learning_rate": 3.0688015157532497e-06,
      "loss": 2.8419,
      "step": 219904
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2155401706695557,
      "learning_rate": 3.0682179527639183e-06,
      "loss": 2.9941,
      "step": 219905
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9007277488708496,
      "learning_rate": 3.0676344449798275e-06,
      "loss": 2.8001,
      "step": 219906
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.5209193229675293,
      "learning_rate": 3.0670509924010433e-06,
      "loss": 2.8679,
      "step": 219907
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.036289691925049,
      "learning_rate": 3.0664675950276997e-06,
      "loss": 2.6704,
      "step": 219908
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0074641704559326,
      "learning_rate": 3.0658842528598958e-06,
      "loss": 2.8123,
      "step": 219909
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.042323350906372,
      "learning_rate": 3.0653009658977657e-06,
      "loss": 2.9393,
      "step": 219910
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.634704113006592,
      "learning_rate": 3.0647177341413755e-06,
      "loss": 2.5572,
      "step": 219911
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2451770305633545,
      "learning_rate": 3.0641345575908914e-06,
      "loss": 2.7264,
      "step": 219912
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.853346347808838,
      "learning_rate": 3.0635514362463475e-06,
      "loss": 2.7382,
      "step": 219913
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.4780232906341553,
      "learning_rate": 3.0629683701079433e-06,
      "loss": 2.6121,
      "step": 219914
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8601889610290527,
      "learning_rate": 3.062385359175712e-06,
      "loss": 2.9302,
      "step": 219915
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2950315475463867,
      "learning_rate": 3.0618024034497868e-06,
      "loss": 3.0892,
      "step": 219916
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.351493835449219,
      "learning_rate": 3.061219502930301e-06,
      "loss": 2.9032,
      "step": 219917
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.5544540882110596,
      "learning_rate": 3.0606366576173213e-06,
      "loss": 3.0705,
      "step": 219918
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.576946973800659,
      "learning_rate": 3.060053867510981e-06,
      "loss": 2.7649,
      "step": 219919
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9001970291137695,
      "learning_rate": 3.0594711326113795e-06,
      "loss": 2.8497,
      "step": 219920
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.606123924255371,
      "learning_rate": 3.0588884529186175e-06,
      "loss": 2.6916,
      "step": 219921
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.506075620651245,
      "learning_rate": 3.058305828432828e-06,
      "loss": 2.648,
      "step": 219922
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.233080863952637,
      "learning_rate": 3.05772325915411e-06,
      "loss": 2.8526,
      "step": 219923
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1335041522979736,
      "learning_rate": 3.0571407450825313e-06,
      "loss": 3.0097,
      "step": 219924
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6744022369384766,
      "learning_rate": 3.056558286218258e-06,
      "loss": 3.0242,
      "step": 219925
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.6532561779022217,
      "learning_rate": 3.05597588256139e-06,
      "loss": 3.1436,
      "step": 219926
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7159292697906494,
      "learning_rate": 3.055393534111994e-06,
      "loss": 3.1109,
      "step": 219927
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.676602602005005,
      "learning_rate": 3.0548112408702362e-06,
      "loss": 3.0906,
      "step": 219928
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9131128787994385,
      "learning_rate": 3.0542290028361506e-06,
      "loss": 3.3005,
      "step": 219929
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.014864921569824,
      "learning_rate": 3.0536468200099364e-06,
      "loss": 3.1887,
      "step": 219930
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.193342685699463,
      "learning_rate": 3.053064692391627e-06,
      "loss": 2.9877,
      "step": 219931
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8250370025634766,
      "learning_rate": 3.0524826199813556e-06,
      "loss": 2.8276,
      "step": 219932
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.8564953804016113,
      "learning_rate": 3.0519006027792557e-06,
      "loss": 3.0863,
      "step": 219933
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8472683429718018,
      "learning_rate": 3.051318640785394e-06,
      "loss": 2.7849,
      "step": 219934
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0197343826293945,
      "learning_rate": 3.0507367339999034e-06,
      "loss": 2.853,
      "step": 219935
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.820375919342041,
      "learning_rate": 3.050154882422884e-06,
      "loss": 2.7948,
      "step": 219936
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.597365140914917,
      "learning_rate": 3.0495730860544354e-06,
      "loss": 2.7641,
      "step": 219937
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7456681728363037,
      "learning_rate": 3.0489913448947246e-06,
      "loss": 2.9147,
      "step": 219938
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.401218891143799,
      "learning_rate": 3.0484096589437514e-06,
      "loss": 3.2044,
      "step": 219939
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0486042499542236,
      "learning_rate": 3.0478280282017153e-06,
      "loss": 2.8655,
      "step": 219940
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8218798637390137,
      "learning_rate": 3.0472464526686836e-06,
      "loss": 2.7337,
      "step": 219941
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7490079402923584,
      "learning_rate": 3.046664932344789e-06,
      "loss": 2.9016,
      "step": 219942
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.726909875869751,
      "learning_rate": 3.0460834672300984e-06,
      "loss": 2.8463,
      "step": 219943
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.614316463470459,
      "learning_rate": 3.045502057324778e-06,
      "loss": 2.9149,
      "step": 219944
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7612781524658203,
      "learning_rate": 3.044920702628861e-06,
      "loss": 2.9342,
      "step": 219945
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.553086042404175,
      "learning_rate": 3.044339403142548e-06,
      "loss": 2.848,
      "step": 219946
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.7839229106903076,
      "learning_rate": 3.0437581588658723e-06,
      "loss": 3.1165,
      "step": 219947
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1926589012145996,
      "learning_rate": 3.043176969798966e-06,
      "loss": 2.899,
      "step": 219948
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8945326805114746,
      "learning_rate": 3.0425958359419635e-06,
      "loss": 2.7483,
      "step": 219949
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1118404865264893,
      "learning_rate": 3.0420147572948974e-06,
      "loss": 3.2387,
      "step": 219950
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.323115587234497,
      "learning_rate": 3.0414337338579674e-06,
      "loss": 2.7278,
      "step": 219951
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.7119007110595703,
      "learning_rate": 3.0408527656312408e-06,
      "loss": 2.7902,
      "step": 219952
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.781956672668457,
      "learning_rate": 3.040271852614784e-06,
      "loss": 2.8622,
      "step": 219953
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0814871788024902,
      "learning_rate": 3.0396909948087966e-06,
      "loss": 2.7967,
      "step": 219954
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.252094268798828,
      "learning_rate": 3.039110192213312e-06,
      "loss": 2.7717,
      "step": 219955
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.092802047729492,
      "learning_rate": 3.0385294448284637e-06,
      "loss": 3.0117,
      "step": 219956
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0636162757873535,
      "learning_rate": 3.037948752654351e-06,
      "loss": 3.0306,
      "step": 219957
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.3515419960021973,
      "learning_rate": 3.0373681156911077e-06,
      "loss": 3.1525,
      "step": 219958
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.931230306625366,
      "learning_rate": 3.0367875339388004e-06,
      "loss": 3.1336,
      "step": 219959
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.317194938659668,
      "learning_rate": 3.036207007397595e-06,
      "loss": 2.9488,
      "step": 219960
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7371468544006348,
      "learning_rate": 3.0356265360675593e-06,
      "loss": 2.8468,
      "step": 219961
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.071264266967773,
      "learning_rate": 3.0350461199487585e-06,
      "loss": 2.8741,
      "step": 219962
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0660786628723145,
      "learning_rate": 3.0344657590413936e-06,
      "loss": 2.6351,
      "step": 219963
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7980024814605713,
      "learning_rate": 3.033885453345497e-06,
      "loss": 2.8938,
      "step": 219964
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.349742889404297,
      "learning_rate": 3.033305202861236e-06,
      "loss": 2.928,
      "step": 219965
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8992998600006104,
      "learning_rate": 3.032725007588677e-06,
      "loss": 2.8681,
      "step": 219966
    },
    {
      "epoch": 2.86,
      "grad_norm": 5.482680797576904,
      "learning_rate": 3.0321448675279526e-06,
      "loss": 2.8719,
      "step": 219967
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2741858959198,
      "learning_rate": 3.03156478267913e-06,
      "loss": 2.7757,
      "step": 219968
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.700167655944824,
      "learning_rate": 3.0309847530423757e-06,
      "loss": 2.7846,
      "step": 219969
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8544890880584717,
      "learning_rate": 3.0304047786177233e-06,
      "loss": 2.8031,
      "step": 219970
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6669328212738037,
      "learning_rate": 3.029824859405372e-06,
      "loss": 3.0228,
      "step": 219971
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7462315559387207,
      "learning_rate": 3.0292449954053557e-06,
      "loss": 3.0727,
      "step": 219972
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.300561904907227,
      "learning_rate": 3.0286651866178403e-06,
      "loss": 2.7911,
      "step": 219973
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1024155616760254,
      "learning_rate": 3.0280854330428593e-06,
      "loss": 2.7878,
      "step": 219974
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0900588035583496,
      "learning_rate": 3.0275057346805796e-06,
      "loss": 2.975,
      "step": 219975
    },
    {
      "epoch": 2.86,
      "grad_norm": 5.218975067138672,
      "learning_rate": 3.0269260915310677e-06,
      "loss": 2.9503,
      "step": 219976
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.909668207168579,
      "learning_rate": 3.02634650359449e-06,
      "loss": 2.7964,
      "step": 219977
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.993447780609131,
      "learning_rate": 3.0257669708709465e-06,
      "loss": 2.7846,
      "step": 219978
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.812835216522217,
      "learning_rate": 3.0251874933604702e-06,
      "loss": 2.6948,
      "step": 219979
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2165753841400146,
      "learning_rate": 3.024608071063228e-06,
      "loss": 2.967,
      "step": 219980
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.838759660720825,
      "learning_rate": 3.02402870397932e-06,
      "loss": 2.9246,
      "step": 219981
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.898756980895996,
      "learning_rate": 3.0234493921088456e-06,
      "loss": 3.1364,
      "step": 219982
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7416157722473145,
      "learning_rate": 3.022870135451938e-06,
      "loss": 3.1286,
      "step": 219983
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8746328353881836,
      "learning_rate": 3.022290934008664e-06,
      "loss": 2.7278,
      "step": 219984
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9435441493988037,
      "learning_rate": 3.0217117877791575e-06,
      "loss": 2.9266,
      "step": 219985
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.208514451980591,
      "learning_rate": 3.0211326967635174e-06,
      "loss": 3.0329,
      "step": 219986
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.026606559753418,
      "learning_rate": 3.0205536609618775e-06,
      "loss": 3.0214,
      "step": 219987
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.6447112560272217,
      "learning_rate": 3.0199746803742708e-06,
      "loss": 2.8918,
      "step": 219988
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.957451105117798,
      "learning_rate": 3.0193957550008972e-06,
      "loss": 3.0622,
      "step": 219989
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8729074001312256,
      "learning_rate": 3.0188168848418237e-06,
      "loss": 2.8827,
      "step": 219990
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.083895683288574,
      "learning_rate": 3.01823806989715e-06,
      "loss": 2.824,
      "step": 219991
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0083189010620117,
      "learning_rate": 3.017659310167009e-06,
      "loss": 2.9059,
      "step": 219992
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.23158597946167,
      "learning_rate": 3.017080605651467e-06,
      "loss": 2.7591,
      "step": 219993
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8542520999908447,
      "learning_rate": 3.0165019563506587e-06,
      "loss": 2.7494,
      "step": 219994
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0498876571655273,
      "learning_rate": 3.0159233622646826e-06,
      "loss": 3.1098,
      "step": 219995
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.4242818355560303,
      "learning_rate": 3.0153448233936728e-06,
      "loss": 3.1487,
      "step": 219996
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0297367572784424,
      "learning_rate": 3.0147663397377287e-06,
      "loss": 2.7597,
      "step": 219997
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.3709025382995605,
      "learning_rate": 3.014187911296917e-06,
      "loss": 3.099,
      "step": 219998
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.127424716949463,
      "learning_rate": 3.013609538071371e-06,
      "loss": 2.9316,
      "step": 219999
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0490896701812744,
      "learning_rate": 3.0130312200611905e-06,
      "loss": 2.6191,
      "step": 220000
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.998563528060913,
      "learning_rate": 3.0124529572665425e-06,
      "loss": 2.7668,
      "step": 220001
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.4636082649230957,
      "learning_rate": 3.0118747496874262e-06,
      "loss": 2.7336,
      "step": 220002
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.812485933303833,
      "learning_rate": 3.0112965973240754e-06,
      "loss": 2.981,
      "step": 220003
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9619336128234863,
      "learning_rate": 3.01071850017649e-06,
      "loss": 2.835,
      "step": 220004
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7680118083953857,
      "learning_rate": 3.010140458244803e-06,
      "loss": 2.9708,
      "step": 220005
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.691303014755249,
      "learning_rate": 3.0095624715291477e-06,
      "loss": 3.1366,
      "step": 220006
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2222001552581787,
      "learning_rate": 3.0089845400296573e-06,
      "loss": 3.0527,
      "step": 220007
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1495490074157715,
      "learning_rate": 3.0084066637463656e-06,
      "loss": 2.9424,
      "step": 220008
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.422001600265503,
      "learning_rate": 3.007828842679438e-06,
      "loss": 2.5941,
      "step": 220009
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0786774158477783,
      "learning_rate": 3.0072510768289424e-06,
      "loss": 2.7604,
      "step": 220010
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.944492816925049,
      "learning_rate": 3.0066733661950117e-06,
      "loss": 2.8336,
      "step": 220011
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9187164306640625,
      "learning_rate": 3.006095710777745e-06,
      "loss": 2.8933,
      "step": 220012
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1645772457122803,
      "learning_rate": 3.005518110577243e-06,
      "loss": 2.7851,
      "step": 220013
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.8152730464935303,
      "learning_rate": 3.0049405655936387e-06,
      "loss": 2.805,
      "step": 220014
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.834520101547241,
      "learning_rate": 3.004363075827032e-06,
      "loss": 2.6789,
      "step": 220015
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.3167881965637207,
      "learning_rate": 3.0037856412774895e-06,
      "loss": 2.9688,
      "step": 220016
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1381354331970215,
      "learning_rate": 3.003208261945178e-06,
      "loss": 2.8712,
      "step": 220017
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.087353229522705,
      "learning_rate": 3.002630937830164e-06,
      "loss": 2.83,
      "step": 220018
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.661789894104004,
      "learning_rate": 3.002053668932547e-06,
      "loss": 3.0957,
      "step": 220019
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.772310495376587,
      "learning_rate": 3.0014764552524605e-06,
      "loss": 2.9592,
      "step": 220020
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.82639479637146,
      "learning_rate": 3.000899296790038e-06,
      "loss": 2.9409,
      "step": 220021
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.0388829708099365,
      "learning_rate": 3.000322193545346e-06,
      "loss": 3.1514,
      "step": 220022
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.6657612323760986,
      "learning_rate": 2.999745145518484e-06,
      "loss": 2.9664,
      "step": 220023
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1102871894836426,
      "learning_rate": 2.999168152709586e-06,
      "loss": 3.1076,
      "step": 220024
    },
    {
      "epoch": 2.86,
      "grad_norm": 4.343254566192627,
      "learning_rate": 2.9985912151187506e-06,
      "loss": 3.0545,
      "step": 220025
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.1507015228271484,
      "learning_rate": 2.998014332746046e-06,
      "loss": 2.891,
      "step": 220026
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2551629543304443,
      "learning_rate": 2.997437505591671e-06,
      "loss": 3.0071,
      "step": 220027
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.738624334335327,
      "learning_rate": 2.9968607336556592e-06,
      "loss": 2.7996,
      "step": 220028
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.560581684112549,
      "learning_rate": 2.996284016938144e-06,
      "loss": 3.0082,
      "step": 220029
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.2001211643218994,
      "learning_rate": 2.9957073554391917e-06,
      "loss": 2.988,
      "step": 220030
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7113516330718994,
      "learning_rate": 2.9951307491589694e-06,
      "loss": 3.0389,
      "step": 220031
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.9338502883911133,
      "learning_rate": 2.994554198097543e-06,
      "loss": 2.9406,
      "step": 220032
    },
    {
      "epoch": 2.86,
      "grad_norm": 3.9717178344726562,
      "learning_rate": 2.993977702255046e-06,
      "loss": 2.7876,
      "step": 220033
    },
    {
      "epoch": 2.86,
      "grad_norm": 2.7132503986358643,
      "learning_rate": 2.9934012616316115e-06,
      "loss": 2.937,
      "step": 220034
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.5414538383483887,
      "learning_rate": 2.99282487622724e-06,
      "loss": 2.9575,
      "step": 220035
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.16787576675415,
      "learning_rate": 2.992248546042164e-06,
      "loss": 3.0856,
      "step": 220036
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.062311887741089,
      "learning_rate": 2.991672271076417e-06,
      "loss": 2.7552,
      "step": 220037
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.465834379196167,
      "learning_rate": 2.991096051330133e-06,
      "loss": 2.9618,
      "step": 220038
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8711137771606445,
      "learning_rate": 2.9905198868034107e-06,
      "loss": 2.7916,
      "step": 220039
    },
    {
      "epoch": 2.87,
      "grad_norm": 5.7144341468811035,
      "learning_rate": 2.989943777496351e-06,
      "loss": 2.7611,
      "step": 220040
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7511425018310547,
      "learning_rate": 2.989367723409053e-06,
      "loss": 2.8792,
      "step": 220041
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.7374703884124756,
      "learning_rate": 2.9887917245416506e-06,
      "loss": 3.0106,
      "step": 220042
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.42634654045105,
      "learning_rate": 2.9882157808942433e-06,
      "loss": 3.0229,
      "step": 220043
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.200705051422119,
      "learning_rate": 2.9876398924668975e-06,
      "loss": 2.8929,
      "step": 220044
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.882084608078003,
      "learning_rate": 2.9870640592598137e-06,
      "loss": 2.8793,
      "step": 220045
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.820697784423828,
      "learning_rate": 2.9864882812730253e-06,
      "loss": 3.0814,
      "step": 220046
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.888949394226074,
      "learning_rate": 2.9859125585066313e-06,
      "loss": 3.0145,
      "step": 220047
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.769540309906006,
      "learning_rate": 2.9853368909607656e-06,
      "loss": 2.938,
      "step": 220048
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8419642448425293,
      "learning_rate": 2.984761278635528e-06,
      "loss": 2.6419,
      "step": 220049
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9840123653411865,
      "learning_rate": 2.9841857215310515e-06,
      "loss": 2.7388,
      "step": 220050
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.934506416320801,
      "learning_rate": 2.9836102196474364e-06,
      "loss": 2.763,
      "step": 220051
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.643193483352661,
      "learning_rate": 2.983034772984749e-06,
      "loss": 2.5759,
      "step": 220052
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1033856868743896,
      "learning_rate": 2.982459381543123e-06,
      "loss": 2.7991,
      "step": 220053
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0669898986816406,
      "learning_rate": 2.981884045322658e-06,
      "loss": 2.8083,
      "step": 220054
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.009125232696533,
      "learning_rate": 2.9813087643234867e-06,
      "loss": 2.7943,
      "step": 220055
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.002513885498047,
      "learning_rate": 2.9807335385456764e-06,
      "loss": 2.8982,
      "step": 220056
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8809735774993896,
      "learning_rate": 2.9801583679893605e-06,
      "loss": 2.761,
      "step": 220057
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7756075859069824,
      "learning_rate": 2.9795832526546714e-06,
      "loss": 2.9508,
      "step": 220058
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.6855568885803223,
      "learning_rate": 2.979008192541643e-06,
      "loss": 2.6447,
      "step": 220059
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6846888065338135,
      "learning_rate": 2.978433187650442e-06,
      "loss": 3.0672,
      "step": 220060
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.017754316329956,
      "learning_rate": 2.9778582379811343e-06,
      "loss": 2.8328,
      "step": 220061
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1174607276916504,
      "learning_rate": 2.977283343533887e-06,
      "loss": 2.9277,
      "step": 220062
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9606528282165527,
      "learning_rate": 2.9767085043087335e-06,
      "loss": 3.0146,
      "step": 220063
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0789878368377686,
      "learning_rate": 2.97613372030584e-06,
      "loss": 3.0087,
      "step": 220064
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.643688201904297,
      "learning_rate": 2.975558991525273e-06,
      "loss": 2.8032,
      "step": 220065
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6846213340759277,
      "learning_rate": 2.974984317967166e-06,
      "loss": 2.7754,
      "step": 220066
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.00074577331543,
      "learning_rate": 2.9744096996316193e-06,
      "loss": 2.8327,
      "step": 220067
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7700300216674805,
      "learning_rate": 2.9738351365187316e-06,
      "loss": 2.986,
      "step": 220068
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.722196578979492,
      "learning_rate": 2.973260628628604e-06,
      "loss": 2.7627,
      "step": 220069
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7720766067504883,
      "learning_rate": 2.972686175961403e-06,
      "loss": 2.9796,
      "step": 220070
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9748411178588867,
      "learning_rate": 2.9721117785171276e-06,
      "loss": 2.6951,
      "step": 220071
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.144349098205566,
      "learning_rate": 2.971537436295979e-06,
      "loss": 2.7081,
      "step": 220072
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0872480869293213,
      "learning_rate": 2.9709631492980223e-06,
      "loss": 3.1144,
      "step": 220073
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.544807195663452,
      "learning_rate": 2.9703889175233587e-06,
      "loss": 3.0584,
      "step": 220074
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.7509572505950928,
      "learning_rate": 2.9698147409720872e-06,
      "loss": 2.9467,
      "step": 220075
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.600590467453003,
      "learning_rate": 2.9692406196444086e-06,
      "loss": 2.6071,
      "step": 220076
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.04056715965271,
      "learning_rate": 2.9686665535402886e-06,
      "loss": 3.1361,
      "step": 220077
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9421982765197754,
      "learning_rate": 2.968092542659928e-06,
      "loss": 2.8078,
      "step": 220078
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.5596182346343994,
      "learning_rate": 2.967518587003359e-06,
      "loss": 2.8084,
      "step": 220079
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.47212553024292,
      "learning_rate": 2.9669446865707825e-06,
      "loss": 2.7204,
      "step": 220080
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9444823265075684,
      "learning_rate": 2.966370841362231e-06,
      "loss": 2.6897,
      "step": 220081
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8539011478424072,
      "learning_rate": 2.965797051377905e-06,
      "loss": 2.9655,
      "step": 220082
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8671395778656006,
      "learning_rate": 2.9652233166177707e-06,
      "loss": 3.0772,
      "step": 220083
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.844005823135376,
      "learning_rate": 2.9646496370820283e-06,
      "loss": 3.0616,
      "step": 220084
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.489375352859497,
      "learning_rate": 2.9640760127707775e-06,
      "loss": 3.1749,
      "step": 220085
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.829949378967285,
      "learning_rate": 2.9635024436840847e-06,
      "loss": 2.8402,
      "step": 220086
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.284778356552124,
      "learning_rate": 2.962928929822084e-06,
      "loss": 2.9376,
      "step": 220087
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.167466402053833,
      "learning_rate": 2.9623554711849405e-06,
      "loss": 2.8228,
      "step": 220088
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7676987648010254,
      "learning_rate": 2.9617820677726554e-06,
      "loss": 2.7693,
      "step": 220089
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.973551034927368,
      "learning_rate": 2.961208719585395e-06,
      "loss": 2.9698,
      "step": 220090
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6932895183563232,
      "learning_rate": 2.9606354266232257e-06,
      "loss": 2.8004,
      "step": 220091
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.678605794906616,
      "learning_rate": 2.960062188886314e-06,
      "loss": 3.036,
      "step": 220092
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.53509783744812,
      "learning_rate": 2.9594890063746934e-06,
      "loss": 2.8926,
      "step": 220093
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8361711502075195,
      "learning_rate": 2.9589158790885635e-06,
      "loss": 3.172,
      "step": 220094
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.5108256340026855,
      "learning_rate": 2.9583428070279246e-06,
      "loss": 3.1011,
      "step": 220095
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1401171684265137,
      "learning_rate": 2.9577697901929763e-06,
      "loss": 2.9912,
      "step": 220096
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6455318927764893,
      "learning_rate": 2.9571968285837854e-06,
      "loss": 2.8131,
      "step": 220097
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.127628803253174,
      "learning_rate": 2.9566239222004516e-06,
      "loss": 2.9411,
      "step": 220098
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8014984130859375,
      "learning_rate": 2.956051071043075e-06,
      "loss": 2.7767,
      "step": 220099
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.4100522994995117,
      "learning_rate": 2.955478275111789e-06,
      "loss": 3.0651,
      "step": 220100
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9717555046081543,
      "learning_rate": 2.9549055344066595e-06,
      "loss": 2.9643,
      "step": 220101
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.5947132110595703,
      "learning_rate": 2.954332848927854e-06,
      "loss": 2.8525,
      "step": 220102
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3159310817718506,
      "learning_rate": 2.9537602186754384e-06,
      "loss": 2.9637,
      "step": 220103
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.962808847427368,
      "learning_rate": 2.9531876436495126e-06,
      "loss": 3.0568,
      "step": 220104
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6960482597351074,
      "learning_rate": 2.9526151238502104e-06,
      "loss": 2.9729,
      "step": 220105
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6668126583099365,
      "learning_rate": 2.9520426592776313e-06,
      "loss": 2.722,
      "step": 220106
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0148115158081055,
      "learning_rate": 2.9514702499318756e-06,
      "loss": 2.8698,
      "step": 220107
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.525111198425293,
      "learning_rate": 2.9508978958130424e-06,
      "loss": 2.9807,
      "step": 220108
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.6943295001983643,
      "learning_rate": 2.950325596921199e-06,
      "loss": 3.0055,
      "step": 220109
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.634759426116943,
      "learning_rate": 2.9497533532565453e-06,
      "loss": 2.9143,
      "step": 220110
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.081610918045044,
      "learning_rate": 2.9491811648191142e-06,
      "loss": 2.8103,
      "step": 220111
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1402525901794434,
      "learning_rate": 2.948609031609073e-06,
      "loss": 3.0778,
      "step": 220112
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1525564193725586,
      "learning_rate": 2.948036953626487e-06,
      "loss": 2.8428,
      "step": 220113
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2405104637145996,
      "learning_rate": 2.947464930871457e-06,
      "loss": 3.1744,
      "step": 220114
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.242396831512451,
      "learning_rate": 2.946892963344083e-06,
      "loss": 2.9164,
      "step": 220115
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9357190132141113,
      "learning_rate": 2.9463210510445314e-06,
      "loss": 2.8403,
      "step": 220116
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.307323694229126,
      "learning_rate": 2.9457491939728017e-06,
      "loss": 2.9069,
      "step": 220117
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0545690059661865,
      "learning_rate": 2.9451773921291276e-06,
      "loss": 3.0204,
      "step": 220118
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.97064471244812,
      "learning_rate": 2.9446056455135093e-06,
      "loss": 2.9098,
      "step": 220119
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.632404327392578,
      "learning_rate": 2.944033954126146e-06,
      "loss": 2.9804,
      "step": 220120
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.873985528945923,
      "learning_rate": 2.943462317967038e-06,
      "loss": 2.9726,
      "step": 220121
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.59977650642395,
      "learning_rate": 2.942890737036385e-06,
      "loss": 2.6439,
      "step": 220122
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.138296365737915,
      "learning_rate": 2.942319211334221e-06,
      "loss": 2.7159,
      "step": 220123
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8024370670318604,
      "learning_rate": 2.9417477408607114e-06,
      "loss": 3.0252,
      "step": 220124
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.585566997528076,
      "learning_rate": 2.941176325615957e-06,
      "loss": 3.0485,
      "step": 220125
    },
    {
      "epoch": 2.87,
      "grad_norm": 5.0121235847473145,
      "learning_rate": 2.940604965600024e-06,
      "loss": 2.9066,
      "step": 220126
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2187538146972656,
      "learning_rate": 2.940033660813046e-06,
      "loss": 3.1319,
      "step": 220127
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.472809314727783,
      "learning_rate": 2.939462411255122e-06,
      "loss": 3.1649,
      "step": 220128
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9422554969787598,
      "learning_rate": 2.93889121692632e-06,
      "loss": 3.0522,
      "step": 220129
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8749473094940186,
      "learning_rate": 2.938320077826839e-06,
      "loss": 2.9501,
      "step": 220130
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.13663387298584,
      "learning_rate": 2.9377489939567123e-06,
      "loss": 2.968,
      "step": 220131
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9857170581817627,
      "learning_rate": 2.9371779653160733e-06,
      "loss": 3.0873,
      "step": 220132
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.446161985397339,
      "learning_rate": 2.9366069919050217e-06,
      "loss": 2.7639,
      "step": 220133
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.9829792976379395,
      "learning_rate": 2.9360360737236243e-06,
      "loss": 3.227,
      "step": 220134
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.343799114227295,
      "learning_rate": 2.935465210772048e-06,
      "loss": 3.0544,
      "step": 220135
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.946307420730591,
      "learning_rate": 2.934894403050392e-06,
      "loss": 3.1674,
      "step": 220136
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7623496055603027,
      "learning_rate": 2.9343236505587563e-06,
      "loss": 3.0871,
      "step": 220137
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3929007053375244,
      "learning_rate": 2.9337529532972083e-06,
      "loss": 2.8527,
      "step": 220138
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.6240503787994385,
      "learning_rate": 2.9331823112659137e-06,
      "loss": 3.0431,
      "step": 220139
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.309610605239868,
      "learning_rate": 2.932611724464906e-06,
      "loss": 2.8907,
      "step": 220140
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.859456777572632,
      "learning_rate": 2.932041192894352e-06,
      "loss": 2.8385,
      "step": 220141
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2854955196380615,
      "learning_rate": 2.9314707165543516e-06,
      "loss": 2.8095,
      "step": 220142
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8522987365722656,
      "learning_rate": 2.9309002954450045e-06,
      "loss": 3.0753,
      "step": 220143
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.3085832595825195,
      "learning_rate": 2.9303299295663772e-06,
      "loss": 3.2438,
      "step": 220144
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8018712997436523,
      "learning_rate": 2.9297596189186366e-06,
      "loss": 3.0924,
      "step": 220145
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7428455352783203,
      "learning_rate": 2.929189363501849e-06,
      "loss": 3.0889,
      "step": 220146
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.8749430179595947,
      "learning_rate": 2.9286191633161482e-06,
      "loss": 2.6291,
      "step": 220147
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.6487114429473877,
      "learning_rate": 2.9280490183616e-06,
      "loss": 3.3106,
      "step": 220148
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.510568618774414,
      "learning_rate": 2.9274789286383716e-06,
      "loss": 3.0347,
      "step": 220149
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.67303729057312,
      "learning_rate": 2.9269088941464958e-06,
      "loss": 2.8952,
      "step": 220150
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7762067317962646,
      "learning_rate": 2.9263389148861394e-06,
      "loss": 2.8345,
      "step": 220151
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7979443073272705,
      "learning_rate": 2.925768990857336e-06,
      "loss": 2.9858,
      "step": 220152
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7225255966186523,
      "learning_rate": 2.9251991220602846e-06,
      "loss": 2.8621,
      "step": 220153
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.86787486076355,
      "learning_rate": 2.924629308495052e-06,
      "loss": 3.0685,
      "step": 220154
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1721160411834717,
      "learning_rate": 2.9240595501617394e-06,
      "loss": 2.769,
      "step": 220155
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.100476264953613,
      "learning_rate": 2.923489847060412e-06,
      "loss": 3.1512,
      "step": 220156
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0558674335479736,
      "learning_rate": 2.92292019919127e-06,
      "loss": 3.0105,
      "step": 220157
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0276968479156494,
      "learning_rate": 2.9223506065543135e-06,
      "loss": 2.7027,
      "step": 220158
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6884000301361084,
      "learning_rate": 2.9217810691497423e-06,
      "loss": 2.9059,
      "step": 220159
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.910057544708252,
      "learning_rate": 2.92121158697759e-06,
      "loss": 2.943,
      "step": 220160
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.870464324951172,
      "learning_rate": 2.9206421600380225e-06,
      "loss": 2.9402,
      "step": 220161
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.520846128463745,
      "learning_rate": 2.9200727883310736e-06,
      "loss": 3.018,
      "step": 220162
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6907947063446045,
      "learning_rate": 2.91950347185691e-06,
      "loss": 2.8813,
      "step": 220163
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.260886192321777,
      "learning_rate": 2.918934210615631e-06,
      "loss": 2.8531,
      "step": 220164
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2902884483337402,
      "learning_rate": 2.9183650046073035e-06,
      "loss": 2.8872,
      "step": 220165
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.038672685623169,
      "learning_rate": 2.9177958538320946e-06,
      "loss": 3.1237,
      "step": 220166
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.180635452270508,
      "learning_rate": 2.91722675829007e-06,
      "loss": 2.9401,
      "step": 220167
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.867537260055542,
      "learning_rate": 2.91665771798133e-06,
      "loss": 2.909,
      "step": 220168
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.868335485458374,
      "learning_rate": 2.9160887329059747e-06,
      "loss": 3.0397,
      "step": 220169
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.062849760055542,
      "learning_rate": 2.9155198030641367e-06,
      "loss": 3.0945,
      "step": 220170
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2736117839813232,
      "learning_rate": 2.914950928455917e-06,
      "loss": 2.9163,
      "step": 220171
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8169214725494385,
      "learning_rate": 2.9143821090814145e-06,
      "loss": 3.1424,
      "step": 220172
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.495925188064575,
      "learning_rate": 2.913813344940763e-06,
      "loss": 2.8191,
      "step": 220173
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.8135006427764893,
      "learning_rate": 2.9132446360339957e-06,
      "loss": 3.2511,
      "step": 220174
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1767733097076416,
      "learning_rate": 2.912675982361279e-06,
      "loss": 2.8142,
      "step": 220175
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.262594223022461,
      "learning_rate": 2.9121073839227126e-06,
      "loss": 2.7346,
      "step": 220176
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.876680374145508,
      "learning_rate": 2.911538840718397e-06,
      "loss": 2.9933,
      "step": 220177
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.215394973754883,
      "learning_rate": 2.9109703527483987e-06,
      "loss": 2.8253,
      "step": 220178
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0336782932281494,
      "learning_rate": 2.910401920012917e-06,
      "loss": 3.0548,
      "step": 220179
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.659597873687744,
      "learning_rate": 2.909833542511952e-06,
      "loss": 2.638,
      "step": 220180
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.587402582168579,
      "learning_rate": 2.9092652202457046e-06,
      "loss": 2.9197,
      "step": 220181
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.015188455581665,
      "learning_rate": 2.9086969532141736e-06,
      "loss": 2.9012,
      "step": 220182
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.4862849712371826,
      "learning_rate": 2.908128741417559e-06,
      "loss": 2.7416,
      "step": 220183
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.004941701889038,
      "learning_rate": 2.907560584855928e-06,
      "loss": 2.9659,
      "step": 220184
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9916603565216064,
      "learning_rate": 2.906992483529413e-06,
      "loss": 2.893,
      "step": 220185
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.4671201705932617,
      "learning_rate": 2.9064244374380484e-06,
      "loss": 2.7949,
      "step": 220186
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1993117332458496,
      "learning_rate": 2.905856446582e-06,
      "loss": 2.8879,
      "step": 220187
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.167149305343628,
      "learning_rate": 2.9052885109613676e-06,
      "loss": 3.1097,
      "step": 220188
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1556224822998047,
      "learning_rate": 2.9047206305762517e-06,
      "loss": 3.0484,
      "step": 220189
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3403663635253906,
      "learning_rate": 2.9041528054267516e-06,
      "loss": 3.0238,
      "step": 220190
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.939065933227539,
      "learning_rate": 2.903585035513001e-06,
      "loss": 2.8663,
      "step": 220191
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.12274169921875,
      "learning_rate": 2.903017320835066e-06,
      "loss": 2.799,
      "step": 220192
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6519346237182617,
      "learning_rate": 2.9024496613930805e-06,
      "loss": 3.099,
      "step": 220193
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3244636058807373,
      "learning_rate": 2.9018820571871106e-06,
      "loss": 3.0701,
      "step": 220194
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.7614617347717285,
      "learning_rate": 2.9013145082172895e-06,
      "loss": 2.9182,
      "step": 220195
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1151986122131348,
      "learning_rate": 2.9007470144837173e-06,
      "loss": 3.0177,
      "step": 220196
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1955530643463135,
      "learning_rate": 2.9001795759865277e-06,
      "loss": 2.8061,
      "step": 220197
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0380144119262695,
      "learning_rate": 2.89961219272582e-06,
      "loss": 2.9652,
      "step": 220198
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.4728891849517822,
      "learning_rate": 2.8990448647016607e-06,
      "loss": 3.0707,
      "step": 220199
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.316211462020874,
      "learning_rate": 2.898477591914183e-06,
      "loss": 2.999,
      "step": 220200
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.601757287979126,
      "learning_rate": 2.897910374363488e-06,
      "loss": 2.973,
      "step": 220201
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0698211193084717,
      "learning_rate": 2.897343212049641e-06,
      "loss": 2.7833,
      "step": 220202
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9513721466064453,
      "learning_rate": 2.896776104972842e-06,
      "loss": 3.1388,
      "step": 220203
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.5413706302642822,
      "learning_rate": 2.8962090531330918e-06,
      "loss": 2.6781,
      "step": 220204
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1489856243133545,
      "learning_rate": 2.8956420565305894e-06,
      "loss": 3.1289,
      "step": 220205
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.047314405441284,
      "learning_rate": 2.8950751151653684e-06,
      "loss": 2.9927,
      "step": 220206
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1604013442993164,
      "learning_rate": 2.894508229037562e-06,
      "loss": 2.9614,
      "step": 220207
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6489417552948,
      "learning_rate": 2.89394139814727e-06,
      "loss": 2.6516,
      "step": 220208
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9314966201782227,
      "learning_rate": 2.8933746224946263e-06,
      "loss": 2.9781,
      "step": 220209
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.884767532348633,
      "learning_rate": 2.8928079020796966e-06,
      "loss": 2.8695,
      "step": 220210
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0213451385498047,
      "learning_rate": 2.8922412369025815e-06,
      "loss": 3.3795,
      "step": 220211
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.156904697418213,
      "learning_rate": 2.8916746269634473e-06,
      "loss": 2.8413,
      "step": 220212
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8347795009613037,
      "learning_rate": 2.89110807226236e-06,
      "loss": 3.2405,
      "step": 220213
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.7593579292297363,
      "learning_rate": 2.8905415727993875e-06,
      "loss": 2.9868,
      "step": 220214
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.639773368835449,
      "learning_rate": 2.889975128574695e-06,
      "loss": 2.723,
      "step": 220215
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.090660572052002,
      "learning_rate": 2.8894087395883502e-06,
      "loss": 2.8329,
      "step": 220216
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8505802154541016,
      "learning_rate": 2.8888424058404856e-06,
      "loss": 2.831,
      "step": 220217
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.864278554916382,
      "learning_rate": 2.8882761273312016e-06,
      "loss": 3.0504,
      "step": 220218
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1292564868927,
      "learning_rate": 2.887709904060598e-06,
      "loss": 2.7792,
      "step": 220219
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.68780779838562,
      "learning_rate": 2.8871437360287408e-06,
      "loss": 2.7159,
      "step": 220220
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.364129066467285,
      "learning_rate": 2.8865776232357973e-06,
      "loss": 3.174,
      "step": 220221
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2385754585266113,
      "learning_rate": 2.8860115656818674e-06,
      "loss": 2.8986,
      "step": 220222
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7090470790863037,
      "learning_rate": 2.8854455633669835e-06,
      "loss": 3.0962,
      "step": 220223
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2401580810546875,
      "learning_rate": 2.8848796162913466e-06,
      "loss": 2.894,
      "step": 220224
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.011491298675537,
      "learning_rate": 2.8843137244549896e-06,
      "loss": 3.0526,
      "step": 220225
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.7906734943389893,
      "learning_rate": 2.883747887858079e-06,
      "loss": 3.1032,
      "step": 220226
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.215986728668213,
      "learning_rate": 2.883182106500681e-06,
      "loss": 3.0459,
      "step": 220227
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.408740997314453,
      "learning_rate": 2.882616380382896e-06,
      "loss": 2.8692,
      "step": 220228
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.221954345703125,
      "learning_rate": 2.882050709504824e-06,
      "loss": 2.8842,
      "step": 220229
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.4117424488067627,
      "learning_rate": 2.881485093866631e-06,
      "loss": 2.9149,
      "step": 220230
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.178879737854004,
      "learning_rate": 2.880919533468351e-06,
      "loss": 2.8173,
      "step": 220231
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.927497386932373,
      "learning_rate": 2.8803540283101166e-06,
      "loss": 3.0948,
      "step": 220232
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.5750889778137207,
      "learning_rate": 2.8797885783920282e-06,
      "loss": 3.0163,
      "step": 220233
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.4084017276763916,
      "learning_rate": 2.879223183714219e-06,
      "loss": 2.917,
      "step": 220234
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.823622941970825,
      "learning_rate": 2.878657844276755e-06,
      "loss": 2.9532,
      "step": 220235
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.4925484657287598,
      "learning_rate": 2.87809256007977e-06,
      "loss": 2.8391,
      "step": 220236
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3721976280212402,
      "learning_rate": 2.8775273311233302e-06,
      "loss": 2.9336,
      "step": 220237
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.897223711013794,
      "learning_rate": 2.876962157407603e-06,
      "loss": 3.0527,
      "step": 220238
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7987382411956787,
      "learning_rate": 2.8763970389326207e-06,
      "loss": 3.0774,
      "step": 220239
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8449182510375977,
      "learning_rate": 2.8758319756985503e-06,
      "loss": 3.1254,
      "step": 220240
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6383228302001953,
      "learning_rate": 2.8752669677054585e-06,
      "loss": 2.97,
      "step": 220241
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.178032875061035,
      "learning_rate": 2.8747020149534784e-06,
      "loss": 2.8635,
      "step": 220242
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9594948291778564,
      "learning_rate": 2.8741371174426764e-06,
      "loss": 2.9091,
      "step": 220243
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.968327283859253,
      "learning_rate": 2.873572275173186e-06,
      "loss": 2.8541,
      "step": 220244
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.161317825317383,
      "learning_rate": 2.8730074881451072e-06,
      "loss": 2.8031,
      "step": 220245
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0097172260284424,
      "learning_rate": 2.8724427563585727e-06,
      "loss": 3.009,
      "step": 220246
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.769467353820801,
      "learning_rate": 2.8718780798136164e-06,
      "loss": 3.133,
      "step": 220247
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9063470363616943,
      "learning_rate": 2.871313458510438e-06,
      "loss": 2.9457,
      "step": 220248
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.901064157485962,
      "learning_rate": 2.8707488924490374e-06,
      "loss": 2.8266,
      "step": 220249
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1982359886169434,
      "learning_rate": 2.8701843816296145e-06,
      "loss": 3.1034,
      "step": 220250
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.779291868209839,
      "learning_rate": 2.8696199260522023e-06,
      "loss": 2.7175,
      "step": 220251
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.126638889312744,
      "learning_rate": 2.8690555257169678e-06,
      "loss": 2.9926,
      "step": 220252
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0500872135162354,
      "learning_rate": 2.8684911806239443e-06,
      "loss": 3.0669,
      "step": 220253
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.310004472732544,
      "learning_rate": 2.867926890773331e-06,
      "loss": 2.8996,
      "step": 220254
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.837718963623047,
      "learning_rate": 2.8673626561651286e-06,
      "loss": 3.1317,
      "step": 220255
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.800426483154297,
      "learning_rate": 2.866798476799503e-06,
      "loss": 2.966,
      "step": 220256
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2694833278656006,
      "learning_rate": 2.866234352676555e-06,
      "loss": 2.9404,
      "step": 220257
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0795764923095703,
      "learning_rate": 2.8656702837964175e-06,
      "loss": 2.8876,
      "step": 220258
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8543334007263184,
      "learning_rate": 2.865106270159123e-06,
      "loss": 3.0505,
      "step": 220259
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.5306265354156494,
      "learning_rate": 2.8645423117648057e-06,
      "loss": 3.2499,
      "step": 220260
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.929838180541992,
      "learning_rate": 2.8639784086135988e-06,
      "loss": 3.0386,
      "step": 220261
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.078387498855591,
      "learning_rate": 2.863414560705568e-06,
      "loss": 2.8828,
      "step": 220262
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.95275616645813,
      "learning_rate": 2.8628507680408476e-06,
      "loss": 2.8336,
      "step": 220263
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9593558311462402,
      "learning_rate": 2.8622870306195367e-06,
      "loss": 3.0763,
      "step": 220264
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1225578784942627,
      "learning_rate": 2.8617233484417358e-06,
      "loss": 2.8905,
      "step": 220265
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2701704502105713,
      "learning_rate": 2.861159721507578e-06,
      "loss": 3.0091,
      "step": 220266
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3122360706329346,
      "learning_rate": 2.860596149817096e-06,
      "loss": 3.0204,
      "step": 220267
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.102581739425659,
      "learning_rate": 2.860032633370457e-06,
      "loss": 3.04,
      "step": 220268
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7961325645446777,
      "learning_rate": 2.8594691721677276e-06,
      "loss": 2.6909,
      "step": 220269
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8243613243103027,
      "learning_rate": 2.8589057662090408e-06,
      "loss": 2.9704,
      "step": 220270
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.834646224975586,
      "learning_rate": 2.8583424154944966e-06,
      "loss": 2.9776,
      "step": 220271
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1647887229919434,
      "learning_rate": 2.857779120024195e-06,
      "loss": 3.1324,
      "step": 220272
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7304024696350098,
      "learning_rate": 2.857215879798236e-06,
      "loss": 3.0172,
      "step": 220273
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.675888776779175,
      "learning_rate": 2.856652694816752e-06,
      "loss": 3.0827,
      "step": 220274
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.9022178649902344,
      "learning_rate": 2.8560895650797776e-06,
      "loss": 2.9524,
      "step": 220275
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.944183111190796,
      "learning_rate": 2.855526490587512e-06,
      "loss": 2.9195,
      "step": 220276
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.567159652709961,
      "learning_rate": 2.8549634713400214e-06,
      "loss": 2.7228,
      "step": 220277
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7872958183288574,
      "learning_rate": 2.854400507337373e-06,
      "loss": 2.9611,
      "step": 220278
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1900885105133057,
      "learning_rate": 2.8538375985797004e-06,
      "loss": 2.6163,
      "step": 220279
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.917840003967285,
      "learning_rate": 2.8532747450671024e-06,
      "loss": 3.1492,
      "step": 220280
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7799417972564697,
      "learning_rate": 2.8527119467997128e-06,
      "loss": 3.0049,
      "step": 220281
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.472529411315918,
      "learning_rate": 2.8521492037775983e-06,
      "loss": 2.6586,
      "step": 220282
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.258455753326416,
      "learning_rate": 2.851586516000892e-06,
      "loss": 2.9126,
      "step": 220283
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9361090660095215,
      "learning_rate": 2.851023883469661e-06,
      "loss": 2.8783,
      "step": 220284
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.412327289581299,
      "learning_rate": 2.8504613061840375e-06,
      "loss": 2.9441,
      "step": 220285
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0167949199676514,
      "learning_rate": 2.8498987841441555e-06,
      "loss": 2.9328,
      "step": 220286
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.23854398727417,
      "learning_rate": 2.849336317350048e-06,
      "loss": 2.7905,
      "step": 220287
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1581201553344727,
      "learning_rate": 2.848773905801882e-06,
      "loss": 3.0984,
      "step": 220288
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1820454597473145,
      "learning_rate": 2.8482115494997236e-06,
      "loss": 2.8566,
      "step": 220289
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.603994369506836,
      "learning_rate": 2.8476492484437063e-06,
      "loss": 2.73,
      "step": 220290
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.080543279647827,
      "learning_rate": 2.8470870026338965e-06,
      "loss": 2.9486,
      "step": 220291
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0266005992889404,
      "learning_rate": 2.846524812070461e-06,
      "loss": 3.0649,
      "step": 220292
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.83272123336792,
      "learning_rate": 2.8459626767534325e-06,
      "loss": 2.9598,
      "step": 220293
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.062835454940796,
      "learning_rate": 2.8454005966829452e-06,
      "loss": 2.8175,
      "step": 220294
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.589747190475464,
      "learning_rate": 2.8448385718591317e-06,
      "loss": 2.8773,
      "step": 220295
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.870539665222168,
      "learning_rate": 2.8442766022820586e-06,
      "loss": 2.9649,
      "step": 220296
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0495097637176514,
      "learning_rate": 2.8437146879518257e-06,
      "loss": 2.8666,
      "step": 220297
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.933209180831909,
      "learning_rate": 2.8431528288686002e-06,
      "loss": 2.9038,
      "step": 220298
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.775674343109131,
      "learning_rate": 2.8425910250324147e-06,
      "loss": 2.582,
      "step": 220299
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0860984325408936,
      "learning_rate": 2.8420292764434026e-06,
      "loss": 3.1186,
      "step": 220300
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.759686231613159,
      "learning_rate": 2.8414675831016643e-06,
      "loss": 2.764,
      "step": 220301
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9718246459960938,
      "learning_rate": 2.840905945007299e-06,
      "loss": 2.8226,
      "step": 220302
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.7258729934692383,
      "learning_rate": 2.8403443621604407e-06,
      "loss": 2.9564,
      "step": 220303
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9564249515533447,
      "learning_rate": 2.839782834561155e-06,
      "loss": 2.7612,
      "step": 220304
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.804481267929077,
      "learning_rate": 2.839221362209576e-06,
      "loss": 2.9024,
      "step": 220305
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.5855560302734375,
      "learning_rate": 2.83865994510577e-06,
      "loss": 2.853,
      "step": 220306
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2397987842559814,
      "learning_rate": 2.8380985832499035e-06,
      "loss": 2.7948,
      "step": 220307
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9213340282440186,
      "learning_rate": 2.83753727664201e-06,
      "loss": 2.992,
      "step": 220308
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.266616106033325,
      "learning_rate": 2.836976025282256e-06,
      "loss": 2.8376,
      "step": 220309
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9614005088806152,
      "learning_rate": 2.8364148291706746e-06,
      "loss": 2.7823,
      "step": 220310
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2409722805023193,
      "learning_rate": 2.8358536883074657e-06,
      "loss": 2.9164,
      "step": 220311
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9145126342773438,
      "learning_rate": 2.8352926026926627e-06,
      "loss": 2.8426,
      "step": 220312
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.953394651412964,
      "learning_rate": 2.834731572326365e-06,
      "loss": 2.7135,
      "step": 220313
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0681698322296143,
      "learning_rate": 2.834170597208707e-06,
      "loss": 2.9071,
      "step": 220314
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.9686851501464844,
      "learning_rate": 2.833609677339821e-06,
      "loss": 2.8996,
      "step": 220315
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.835355758666992,
      "learning_rate": 2.8330488127197403e-06,
      "loss": 3.0105,
      "step": 220316
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0628623962402344,
      "learning_rate": 2.8324880033486318e-06,
      "loss": 2.6222,
      "step": 220317
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1645395755767822,
      "learning_rate": 2.8319272492265623e-06,
      "loss": 2.9139,
      "step": 220318
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.799224853515625,
      "learning_rate": 2.831366550353631e-06,
      "loss": 2.9901,
      "step": 220319
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.129061460494995,
      "learning_rate": 2.8308059067299714e-06,
      "loss": 2.9266,
      "step": 220320
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0460216999053955,
      "learning_rate": 2.830245318355684e-06,
      "loss": 3.0894,
      "step": 220321
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.364154577255249,
      "learning_rate": 2.8296847852308346e-06,
      "loss": 2.9412,
      "step": 220322
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.5398988723754883,
      "learning_rate": 2.8291243073555903e-06,
      "loss": 3.2611,
      "step": 220323
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.129225730895996,
      "learning_rate": 2.828563884729984e-06,
      "loss": 3.0262,
      "step": 220324
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7007055282592773,
      "learning_rate": 2.828003517354216e-06,
      "loss": 2.7524,
      "step": 220325
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8354437351226807,
      "learning_rate": 2.8274432052282527e-06,
      "loss": 2.6999,
      "step": 220326
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0511112213134766,
      "learning_rate": 2.826882948352327e-06,
      "loss": 2.8195,
      "step": 220327
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1852893829345703,
      "learning_rate": 2.8263227467264727e-06,
      "loss": 2.9108,
      "step": 220328
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.660585880279541,
      "learning_rate": 2.825762600350856e-06,
      "loss": 2.7725,
      "step": 220329
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9939115047454834,
      "learning_rate": 2.825202509225477e-06,
      "loss": 3.1228,
      "step": 220330
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1235835552215576,
      "learning_rate": 2.824642473350569e-06,
      "loss": 3.1085,
      "step": 220331
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.513270378112793,
      "learning_rate": 2.824082492726132e-06,
      "loss": 3.2263,
      "step": 220332
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8075313568115234,
      "learning_rate": 2.823522567352332e-06,
      "loss": 3.1105,
      "step": 220333
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.475064754486084,
      "learning_rate": 2.822962697229203e-06,
      "loss": 2.9516,
      "step": 220334
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6833648681640625,
      "learning_rate": 2.8224028823569443e-06,
      "loss": 2.8418,
      "step": 220335
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.921677589416504,
      "learning_rate": 2.8218431227355564e-06,
      "loss": 2.9753,
      "step": 220336
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.898498058319092,
      "learning_rate": 2.8212834183652387e-06,
      "loss": 3.1464,
      "step": 220337
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0010015964508057,
      "learning_rate": 2.820723769246058e-06,
      "loss": 2.7027,
      "step": 220338
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.7728142738342285,
      "learning_rate": 2.8201641753781145e-06,
      "loss": 2.8725,
      "step": 220339
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.013741970062256,
      "learning_rate": 2.8196046367614745e-06,
      "loss": 2.89,
      "step": 220340
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.640644073486328,
      "learning_rate": 2.8190451533963045e-06,
      "loss": 2.8808,
      "step": 220341
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.908841133117676,
      "learning_rate": 2.818485725282671e-06,
      "loss": 3.1397,
      "step": 220342
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.095845937728882,
      "learning_rate": 2.8179263524207073e-06,
      "loss": 3.1405,
      "step": 220343
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6967101097106934,
      "learning_rate": 2.817367034810514e-06,
      "loss": 2.7402,
      "step": 220344
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.292912244796753,
      "learning_rate": 2.8168077724521565e-06,
      "loss": 3.0144,
      "step": 220345
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1676554679870605,
      "learning_rate": 2.816248565345769e-06,
      "loss": 2.9422,
      "step": 220346
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8488516807556152,
      "learning_rate": 2.815689413491451e-06,
      "loss": 2.965,
      "step": 220347
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0271730422973633,
      "learning_rate": 2.8151303168893027e-06,
      "loss": 2.8952,
      "step": 220348
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.847930431365967,
      "learning_rate": 2.814571275539457e-06,
      "loss": 2.9047,
      "step": 220349
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.709137439727783,
      "learning_rate": 2.81401228944198e-06,
      "loss": 3.0687,
      "step": 220350
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.080528497695923,
      "learning_rate": 2.813453358596973e-06,
      "loss": 2.8674,
      "step": 220351
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.227444887161255,
      "learning_rate": 2.8128944830045352e-06,
      "loss": 2.8905,
      "step": 220352
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.390138626098633,
      "learning_rate": 2.812335662664833e-06,
      "loss": 3.047,
      "step": 220353
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8260838985443115,
      "learning_rate": 2.8117768975779e-06,
      "loss": 2.9235,
      "step": 220354
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.675520896911621,
      "learning_rate": 2.8112181877439023e-06,
      "loss": 2.6974,
      "step": 220355
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1144702434539795,
      "learning_rate": 2.8106595331628735e-06,
      "loss": 2.8817,
      "step": 220356
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7136940956115723,
      "learning_rate": 2.81010093383498e-06,
      "loss": 2.9928,
      "step": 220357
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0040488243103027,
      "learning_rate": 2.8095423897602556e-06,
      "loss": 2.9491,
      "step": 220358
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.048266649246216,
      "learning_rate": 2.8089839009388993e-06,
      "loss": 2.9074,
      "step": 220359
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.556342363357544,
      "learning_rate": 2.8084254673709117e-06,
      "loss": 2.8857,
      "step": 220360
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0275931358337402,
      "learning_rate": 2.8078670890564926e-06,
      "loss": 2.9489,
      "step": 220361
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6252214908599854,
      "learning_rate": 2.807308765995675e-06,
      "loss": 3.0498,
      "step": 220362
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1746022701263428,
      "learning_rate": 2.806750498188626e-06,
      "loss": 3.1692,
      "step": 220363
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.06404972076416,
      "learning_rate": 2.806192285635378e-06,
      "loss": 2.8746,
      "step": 220364
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7855417728424072,
      "learning_rate": 2.805634128336065e-06,
      "loss": 2.7695,
      "step": 220365
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.111274003982544,
      "learning_rate": 2.805076026290787e-06,
      "loss": 2.8672,
      "step": 220366
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7476160526275635,
      "learning_rate": 2.8045179794996763e-06,
      "loss": 2.9762,
      "step": 220367
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0832948684692383,
      "learning_rate": 2.8039599879628338e-06,
      "loss": 3.0965,
      "step": 220368
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.8370120525360107,
      "learning_rate": 2.803402051680326e-06,
      "loss": 2.88,
      "step": 220369
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.5194990634918213,
      "learning_rate": 2.802844170652252e-06,
      "loss": 2.8755,
      "step": 220370
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2836198806762695,
      "learning_rate": 2.802286344878779e-06,
      "loss": 2.9956,
      "step": 220371
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.76828670501709,
      "learning_rate": 2.8017285743599406e-06,
      "loss": 2.9674,
      "step": 220372
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7956743240356445,
      "learning_rate": 2.801170859095869e-06,
      "loss": 2.9014,
      "step": 220373
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.922785758972168,
      "learning_rate": 2.8006131990866985e-06,
      "loss": 3.1557,
      "step": 220374
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7663686275482178,
      "learning_rate": 2.8000555943324953e-06,
      "loss": 2.829,
      "step": 220375
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.877589464187622,
      "learning_rate": 2.7994980448333594e-06,
      "loss": 3.0111,
      "step": 220376
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0126142501831055,
      "learning_rate": 2.7989405505894235e-06,
      "loss": 3.0541,
      "step": 220377
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0717201232910156,
      "learning_rate": 2.7983831116007547e-06,
      "loss": 2.9698,
      "step": 220378
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7664639949798584,
      "learning_rate": 2.7978257278674866e-06,
      "loss": 2.803,
      "step": 220379
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.75807785987854,
      "learning_rate": 2.7972683993897183e-06,
      "loss": 3.0962,
      "step": 220380
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8191025257110596,
      "learning_rate": 2.79671112616755e-06,
      "loss": 2.8696,
      "step": 220381
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1978507041931152,
      "learning_rate": 2.796153908201049e-06,
      "loss": 3.1008,
      "step": 220382
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.8016927242279053,
      "learning_rate": 2.7955967454903803e-06,
      "loss": 2.9867,
      "step": 220383
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.4946157932281494,
      "learning_rate": 2.795039638035612e-06,
      "loss": 2.742,
      "step": 220384
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.4091389179229736,
      "learning_rate": 2.794482585836877e-06,
      "loss": 2.6168,
      "step": 220385
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3960089683532715,
      "learning_rate": 2.793925588894241e-06,
      "loss": 2.938,
      "step": 220386
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.821240186691284,
      "learning_rate": 2.7933686472078052e-06,
      "loss": 3.0492,
      "step": 220387
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.630491256713867,
      "learning_rate": 2.7928117607777356e-06,
      "loss": 3.1446,
      "step": 220388
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.107840061187744,
      "learning_rate": 2.792254929604065e-06,
      "loss": 2.7858,
      "step": 220389
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.492535352706909,
      "learning_rate": 2.7916981536869275e-06,
      "loss": 2.6998,
      "step": 220390
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.4920690059661865,
      "learning_rate": 2.7911414330264225e-06,
      "loss": 3.1368,
      "step": 220391
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.977329730987549,
      "learning_rate": 2.7905847676226835e-06,
      "loss": 2.8868,
      "step": 220392
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.4392004013061523,
      "learning_rate": 2.790028157475743e-06,
      "loss": 2.832,
      "step": 220393
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.909647226333618,
      "learning_rate": 2.789471602585769e-06,
      "loss": 2.8018,
      "step": 220394
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.865100622177124,
      "learning_rate": 2.7889151029528266e-06,
      "loss": 2.8874,
      "step": 220395
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.905263900756836,
      "learning_rate": 2.78835865857705e-06,
      "loss": 2.8253,
      "step": 220396
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0290236473083496,
      "learning_rate": 2.787802269458539e-06,
      "loss": 3.0412,
      "step": 220397
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.136896848678589,
      "learning_rate": 2.78724593559736e-06,
      "loss": 2.763,
      "step": 220398
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.767636775970459,
      "learning_rate": 2.786689656993646e-06,
      "loss": 2.7031,
      "step": 220399
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.054612874984741,
      "learning_rate": 2.7861334336475304e-06,
      "loss": 2.9994,
      "step": 220400
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.062856912612915,
      "learning_rate": 2.785577265559047e-06,
      "loss": 2.9869,
      "step": 220401
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.019942283630371,
      "learning_rate": 2.7850211527283286e-06,
      "loss": 2.9462,
      "step": 220402
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0490775108337402,
      "learning_rate": 2.784465095155508e-06,
      "loss": 2.6493,
      "step": 220403
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.253357172012329,
      "learning_rate": 2.7839090928406528e-06,
      "loss": 2.9861,
      "step": 220404
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8206539154052734,
      "learning_rate": 2.783353145783895e-06,
      "loss": 3.1838,
      "step": 220405
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3188157081604004,
      "learning_rate": 2.7827972539853026e-06,
      "loss": 2.9432,
      "step": 220406
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.461742401123047,
      "learning_rate": 2.7822414174450083e-06,
      "loss": 2.837,
      "step": 220407
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1518590450286865,
      "learning_rate": 2.7816856361631114e-06,
      "loss": 2.9522,
      "step": 220408
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3894729614257812,
      "learning_rate": 2.7811299101397124e-06,
      "loss": 2.8393,
      "step": 220409
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.84981632232666,
      "learning_rate": 2.7805742393748777e-06,
      "loss": 3.0005,
      "step": 220410
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6673548221588135,
      "learning_rate": 2.780018623868807e-06,
      "loss": 3.0605,
      "step": 220411
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.901402473449707,
      "learning_rate": 2.779463063621501e-06,
      "loss": 3.0296,
      "step": 220412
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0309534072875977,
      "learning_rate": 2.778907558633092e-06,
      "loss": 2.9096,
      "step": 220413
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0706045627593994,
      "learning_rate": 2.778352108903714e-06,
      "loss": 2.9322,
      "step": 220414
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.544328451156616,
      "learning_rate": 2.7777967144334336e-06,
      "loss": 2.7184,
      "step": 220415
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.970914363861084,
      "learning_rate": 2.777241375222383e-06,
      "loss": 2.9499,
      "step": 220416
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9371025562286377,
      "learning_rate": 2.7766860912706635e-06,
      "loss": 2.8741,
      "step": 220417
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.742875814437866,
      "learning_rate": 2.776130862578374e-06,
      "loss": 3.0994,
      "step": 220418
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3888912200927734,
      "learning_rate": 2.7755756891455816e-06,
      "loss": 3.0081,
      "step": 220419
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2644944190979004,
      "learning_rate": 2.7750205709724527e-06,
      "loss": 2.8479,
      "step": 220420
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8206727504730225,
      "learning_rate": 2.7744655080590205e-06,
      "loss": 2.7915,
      "step": 220421
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.256453275680542,
      "learning_rate": 2.773910500405452e-06,
      "loss": 3.0148,
      "step": 220422
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.899012565612793,
      "learning_rate": 2.7733555480118463e-06,
      "loss": 2.815,
      "step": 220423
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9427897930145264,
      "learning_rate": 2.7728006508782376e-06,
      "loss": 2.7007,
      "step": 220424
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3265295028686523,
      "learning_rate": 2.7722458090047916e-06,
      "loss": 2.8947,
      "step": 220425
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.4032490253448486,
      "learning_rate": 2.771691022391609e-06,
      "loss": 3.0633,
      "step": 220426
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8244545459747314,
      "learning_rate": 2.7711362910387556e-06,
      "loss": 2.7979,
      "step": 220427
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7919723987579346,
      "learning_rate": 2.770581614946399e-06,
      "loss": 3.0336,
      "step": 220428
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1711652278900146,
      "learning_rate": 2.7700269941145714e-06,
      "loss": 2.9755,
      "step": 220429
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.910841941833496,
      "learning_rate": 2.7694724285434066e-06,
      "loss": 2.9957,
      "step": 220430
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0462920665740967,
      "learning_rate": 2.7689179182330046e-06,
      "loss": 2.8978,
      "step": 220431
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8436031341552734,
      "learning_rate": 2.7683634631834984e-06,
      "loss": 2.745,
      "step": 220432
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.659212589263916,
      "learning_rate": 2.7678090633949214e-06,
      "loss": 3.0661,
      "step": 220433
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.087730407714844,
      "learning_rate": 2.7672547188674397e-06,
      "loss": 3.1155,
      "step": 220434
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.964674472808838,
      "learning_rate": 2.766700429601154e-06,
      "loss": 2.923,
      "step": 220435
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7559688091278076,
      "learning_rate": 2.7661461955961304e-06,
      "loss": 3.0435,
      "step": 220436
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.4522604942321777,
      "learning_rate": 2.7655920168525023e-06,
      "loss": 2.9197,
      "step": 220437
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.735867500305176,
      "learning_rate": 2.7650378933703697e-06,
      "loss": 2.9168,
      "step": 220438
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8295328617095947,
      "learning_rate": 2.764483825149799e-06,
      "loss": 3.1331,
      "step": 220439
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.632916212081909,
      "learning_rate": 2.763929812190924e-06,
      "loss": 2.645,
      "step": 220440
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6666903495788574,
      "learning_rate": 2.7633758544938765e-06,
      "loss": 3.0468,
      "step": 220441
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.312230110168457,
      "learning_rate": 2.7628219520586914e-06,
      "loss": 2.9317,
      "step": 220442
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.222365140914917,
      "learning_rate": 2.7622681048855345e-06,
      "loss": 2.927,
      "step": 220443
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.8165159225463867,
      "learning_rate": 2.7617143129744724e-06,
      "loss": 2.7945,
      "step": 220444
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3563873767852783,
      "learning_rate": 2.761160576325605e-06,
      "loss": 3.0246,
      "step": 220445
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.187584161758423,
      "learning_rate": 2.7606068949390657e-06,
      "loss": 2.8542,
      "step": 220446
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9862849712371826,
      "learning_rate": 2.7600532688149545e-06,
      "loss": 3.0191,
      "step": 220447
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9713644981384277,
      "learning_rate": 2.7594996979533378e-06,
      "loss": 2.8275,
      "step": 220448
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0894110202789307,
      "learning_rate": 2.758946182354349e-06,
      "loss": 2.8503,
      "step": 220449
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9183402061462402,
      "learning_rate": 2.7583927220180877e-06,
      "loss": 2.9685,
      "step": 220450
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.625535011291504,
      "learning_rate": 2.7578393169446213e-06,
      "loss": 3.0224,
      "step": 220451
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.967205762863159,
      "learning_rate": 2.7572859671341153e-06,
      "loss": 2.8845,
      "step": 220452
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.911956310272217,
      "learning_rate": 2.756732672586637e-06,
      "loss": 2.9272,
      "step": 220453
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.320042610168457,
      "learning_rate": 2.756179433302286e-06,
      "loss": 2.8458,
      "step": 220454
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.685746669769287,
      "learning_rate": 2.755626249281162e-06,
      "loss": 2.8584,
      "step": 220455
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8330698013305664,
      "learning_rate": 2.755073120523399e-06,
      "loss": 2.9922,
      "step": 220456
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.516608715057373,
      "learning_rate": 2.754520047029063e-06,
      "loss": 3.114,
      "step": 220457
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1531155109405518,
      "learning_rate": 2.7539670287982875e-06,
      "loss": 3.0502,
      "step": 220458
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7553577423095703,
      "learning_rate": 2.753414065831172e-06,
      "loss": 2.9577,
      "step": 220459
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9267847537994385,
      "learning_rate": 2.7528611581277837e-06,
      "loss": 2.9945,
      "step": 220460
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1644644737243652,
      "learning_rate": 2.752308305688222e-06,
      "loss": 2.7943,
      "step": 220461
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1232967376708984,
      "learning_rate": 2.7517555085126875e-06,
      "loss": 3.3435,
      "step": 220462
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9160537719726562,
      "learning_rate": 2.751202766601146e-06,
      "loss": 3.2554,
      "step": 220463
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.89111065864563,
      "learning_rate": 2.7506500799537977e-06,
      "loss": 2.7653,
      "step": 220464
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.23400354385376,
      "learning_rate": 2.7500974485707094e-06,
      "loss": 2.9521,
      "step": 220465
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.9379355907440186,
      "learning_rate": 2.7495448724520143e-06,
      "loss": 2.5522,
      "step": 220466
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.557152032852173,
      "learning_rate": 2.7489923515977454e-06,
      "loss": 2.89,
      "step": 220467
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.064099073410034,
      "learning_rate": 2.7484398860081026e-06,
      "loss": 2.8987,
      "step": 220468
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.278903007507324,
      "learning_rate": 2.747887475683086e-06,
      "loss": 3.01,
      "step": 220469
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.008671760559082,
      "learning_rate": 2.747335120622862e-06,
      "loss": 3.0765,
      "step": 220470
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.171701192855835,
      "learning_rate": 2.746782820827531e-06,
      "loss": 2.8399,
      "step": 220471
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.865055561065674,
      "learning_rate": 2.746230576297159e-06,
      "loss": 2.6929,
      "step": 220472
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.8945388793945312,
      "learning_rate": 2.745678387031913e-06,
      "loss": 3.1878,
      "step": 220473
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9347569942474365,
      "learning_rate": 2.7451262530318264e-06,
      "loss": 2.9684,
      "step": 220474
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.632430076599121,
      "learning_rate": 2.7445741742970317e-06,
      "loss": 2.7787,
      "step": 220475
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.926816701889038,
      "learning_rate": 2.7440221508276626e-06,
      "loss": 2.8065,
      "step": 220476
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.970590114593506,
      "learning_rate": 2.7434701826237526e-06,
      "loss": 3.0077,
      "step": 220477
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6742184162139893,
      "learning_rate": 2.742918269685468e-06,
      "loss": 3.0439,
      "step": 220478
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.791384696960449,
      "learning_rate": 2.7423664120128752e-06,
      "loss": 2.8344,
      "step": 220479
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.947758197784424,
      "learning_rate": 2.7418146096060746e-06,
      "loss": 2.636,
      "step": 220480
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.549269199371338,
      "learning_rate": 2.741262862465232e-06,
      "loss": 3.0546,
      "step": 220481
    },
    {
      "epoch": 2.87,
      "grad_norm": 7.621844291687012,
      "learning_rate": 2.7407111705903484e-06,
      "loss": 2.8779,
      "step": 220482
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.5234906673431396,
      "learning_rate": 2.7401595339815897e-06,
      "loss": 2.9952,
      "step": 220483
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8185505867004395,
      "learning_rate": 2.739607952639056e-06,
      "loss": 3.0459,
      "step": 220484
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8986856937408447,
      "learning_rate": 2.739056426562847e-06,
      "loss": 2.8445,
      "step": 220485
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0336852073669434,
      "learning_rate": 2.7385049557530292e-06,
      "loss": 2.9716,
      "step": 220486
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9512157440185547,
      "learning_rate": 2.7379535402097697e-06,
      "loss": 3.111,
      "step": 220487
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9029767513275146,
      "learning_rate": 2.737402179933135e-06,
      "loss": 2.993,
      "step": 220488
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.701559066772461,
      "learning_rate": 2.736850874923191e-06,
      "loss": 2.863,
      "step": 220489
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.8921704292297363,
      "learning_rate": 2.7362996251801052e-06,
      "loss": 2.8402,
      "step": 220490
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3086605072021484,
      "learning_rate": 2.7357484307039433e-06,
      "loss": 2.8529,
      "step": 220491
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.029446125030518,
      "learning_rate": 2.7351972914948393e-06,
      "loss": 2.898,
      "step": 220492
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.784773349761963,
      "learning_rate": 2.7346462075528597e-06,
      "loss": 3.3161,
      "step": 220493
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2704145908355713,
      "learning_rate": 2.734095178878104e-06,
      "loss": 2.8757,
      "step": 220494
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.811830997467041,
      "learning_rate": 2.733544205470706e-06,
      "loss": 2.8158,
      "step": 220495
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.4824092388153076,
      "learning_rate": 2.732993287330765e-06,
      "loss": 2.6969,
      "step": 220496
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.180738687515259,
      "learning_rate": 2.7324424244583478e-06,
      "loss": 2.9725,
      "step": 220497
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.5617733001708984,
      "learning_rate": 2.7318916168535874e-06,
      "loss": 2.8866,
      "step": 220498
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.136052131652832,
      "learning_rate": 2.7313408645165847e-06,
      "loss": 2.8686,
      "step": 220499
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.098370313644409,
      "learning_rate": 2.730790167447405e-06,
      "loss": 3.1286,
      "step": 220500
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1649928092956543,
      "learning_rate": 2.7302395256462496e-06,
      "loss": 2.8825,
      "step": 220501
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.6068294048309326,
      "learning_rate": 2.729688939113117e-06,
      "loss": 2.8017,
      "step": 220502
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8247361183166504,
      "learning_rate": 2.7291384078481416e-06,
      "loss": 2.9111,
      "step": 220503
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.5864651203155518,
      "learning_rate": 2.7285879318514227e-06,
      "loss": 2.7974,
      "step": 220504
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.348444938659668,
      "learning_rate": 2.7280375111230936e-06,
      "loss": 2.8315,
      "step": 220505
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.152305841445923,
      "learning_rate": 2.727487145663221e-06,
      "loss": 2.8926,
      "step": 220506
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1265640258789062,
      "learning_rate": 2.726936835471938e-06,
      "loss": 2.8693,
      "step": 220507
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.027332305908203,
      "learning_rate": 2.7263865805493113e-06,
      "loss": 3.0258,
      "step": 220508
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.5316011905670166,
      "learning_rate": 2.725836380895474e-06,
      "loss": 3.1941,
      "step": 220509
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0696606636047363,
      "learning_rate": 2.7252862365104932e-06,
      "loss": 3.0331,
      "step": 220510
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6455631256103516,
      "learning_rate": 2.7247361473945017e-06,
      "loss": 2.5633,
      "step": 220511
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.769824504852295,
      "learning_rate": 2.7241861135475996e-06,
      "loss": 2.5935,
      "step": 220512
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.69228196144104,
      "learning_rate": 2.7236361349698867e-06,
      "loss": 3.0637,
      "step": 220513
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.748863458633423,
      "learning_rate": 2.7230862116614627e-06,
      "loss": 3.0767,
      "step": 220514
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.3795838356018066,
      "learning_rate": 2.7225363436224278e-06,
      "loss": 3.0087,
      "step": 220515
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.288914680480957,
      "learning_rate": 2.7219865308528487e-06,
      "loss": 3.0605,
      "step": 220516
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.589442729949951,
      "learning_rate": 2.7214367733529254e-06,
      "loss": 2.8719,
      "step": 220517
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.4638798236846924,
      "learning_rate": 2.720887071122624e-06,
      "loss": 2.7856,
      "step": 220518
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0470619201660156,
      "learning_rate": 2.720337424162178e-06,
      "loss": 3.1303,
      "step": 220519
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.913025140762329,
      "learning_rate": 2.7197878324716204e-06,
      "loss": 3.2093,
      "step": 220520
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8084473609924316,
      "learning_rate": 2.719238296051085e-06,
      "loss": 3.0315,
      "step": 220521
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1801676750183105,
      "learning_rate": 2.7186888149006046e-06,
      "loss": 3.0092,
      "step": 220522
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1356232166290283,
      "learning_rate": 2.7181393890203795e-06,
      "loss": 2.9035,
      "step": 220523
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.167198657989502,
      "learning_rate": 2.7175900184104426e-06,
      "loss": 2.8308,
      "step": 220524
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8832039833068848,
      "learning_rate": 2.7170407030709275e-06,
      "loss": 2.8463,
      "step": 220525
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.99937105178833,
      "learning_rate": 2.7164914430019e-06,
      "loss": 2.7895,
      "step": 220526
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7604594230651855,
      "learning_rate": 2.715942238203528e-06,
      "loss": 2.9562,
      "step": 220527
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8319289684295654,
      "learning_rate": 2.7153930886758436e-06,
      "loss": 3.0327,
      "step": 220528
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.354436874389648,
      "learning_rate": 2.7148439944189806e-06,
      "loss": 2.7882,
      "step": 220529
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6143219470977783,
      "learning_rate": 2.7142949554330384e-06,
      "loss": 2.7427,
      "step": 220530
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7757058143615723,
      "learning_rate": 2.713745971718151e-06,
      "loss": 2.8062,
      "step": 220531
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7287003993988037,
      "learning_rate": 2.713197043274351e-06,
      "loss": 3.0241,
      "step": 220532
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.5986528396606445,
      "learning_rate": 2.712648170101839e-06,
      "loss": 3.0445,
      "step": 220533
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.538940191268921,
      "learning_rate": 2.712099352200581e-06,
      "loss": 2.8587,
      "step": 220534
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.62829852104187,
      "learning_rate": 2.7115505895708102e-06,
      "loss": 3.0453,
      "step": 220535
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0964362621307373,
      "learning_rate": 2.7110018822125267e-06,
      "loss": 2.984,
      "step": 220536
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8933565616607666,
      "learning_rate": 2.7104532301259306e-06,
      "loss": 2.6754,
      "step": 220537
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.046010732650757,
      "learning_rate": 2.709904633311022e-06,
      "loss": 3.2064,
      "step": 220538
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.5454981327056885,
      "learning_rate": 2.709356091768e-06,
      "loss": 3.0217,
      "step": 220539
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9925057888031006,
      "learning_rate": 2.708807605496899e-06,
      "loss": 2.9476,
      "step": 220540
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.200634479522705,
      "learning_rate": 2.708259174497851e-06,
      "loss": 3.1376,
      "step": 220541
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.269529104232788,
      "learning_rate": 2.7077107987709236e-06,
      "loss": 2.7641,
      "step": 220542
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.144033908843994,
      "learning_rate": 2.7071624783162827e-06,
      "loss": 2.8826,
      "step": 220543
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0149829387664795,
      "learning_rate": 2.706614213133962e-06,
      "loss": 2.9415,
      "step": 220544
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7285637855529785,
      "learning_rate": 2.7060660032241277e-06,
      "loss": 2.925,
      "step": 220545
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.197901487350464,
      "learning_rate": 2.7055178485868135e-06,
      "loss": 2.9256,
      "step": 220546
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.386821746826172,
      "learning_rate": 2.7049697492221854e-06,
      "loss": 2.9119,
      "step": 220547
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0625216960906982,
      "learning_rate": 2.7044217051302776e-06,
      "loss": 2.8948,
      "step": 220548
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.471810817718506,
      "learning_rate": 2.7038737163112554e-06,
      "loss": 2.9339,
      "step": 220549
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7195980548858643,
      "learning_rate": 2.7033257827651864e-06,
      "loss": 2.8718,
      "step": 220550
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.115579605102539,
      "learning_rate": 2.7027779044922037e-06,
      "loss": 2.8364,
      "step": 220551
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.6065688133239746,
      "learning_rate": 2.7022300814923403e-06,
      "loss": 2.9133,
      "step": 220552
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.785127639770508,
      "learning_rate": 2.701682313765796e-06,
      "loss": 2.9088,
      "step": 220553
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.877565383911133,
      "learning_rate": 2.701134601312571e-06,
      "loss": 2.8991,
      "step": 220554
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.86141300201416,
      "learning_rate": 2.7005869441328653e-06,
      "loss": 3.2164,
      "step": 220555
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.758516550064087,
      "learning_rate": 2.7000393422266785e-06,
      "loss": 3.0164,
      "step": 220556
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7558786869049072,
      "learning_rate": 2.6994917955942107e-06,
      "loss": 2.7833,
      "step": 220557
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.804215431213379,
      "learning_rate": 2.6989443042354954e-06,
      "loss": 2.8289,
      "step": 220558
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.880671262741089,
      "learning_rate": 2.6983968681506985e-06,
      "loss": 2.7483,
      "step": 220559
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9102866649627686,
      "learning_rate": 2.6978494873398206e-06,
      "loss": 3.2428,
      "step": 220560
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.08681058883667,
      "learning_rate": 2.697302161803061e-06,
      "loss": 2.7999,
      "step": 220561
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0391674041748047,
      "learning_rate": 2.6967548915404868e-06,
      "loss": 3.0267,
      "step": 220562
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.193403959274292,
      "learning_rate": 2.6962076765521977e-06,
      "loss": 2.8689,
      "step": 220563
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.302492380142212,
      "learning_rate": 2.695660516838294e-06,
      "loss": 3.0521,
      "step": 220564
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9468836784362793,
      "learning_rate": 2.695113412398875e-06,
      "loss": 2.7258,
      "step": 220565
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.037933111190796,
      "learning_rate": 2.6945663632340408e-06,
      "loss": 2.9648,
      "step": 220566
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.503350257873535,
      "learning_rate": 2.694019369343892e-06,
      "loss": 3.0977,
      "step": 220567
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.5814225673675537,
      "learning_rate": 2.693472430728527e-06,
      "loss": 2.8633,
      "step": 220568
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9594886302948,
      "learning_rate": 2.6929255473880805e-06,
      "loss": 3.0016,
      "step": 220569
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.3842735290527344,
      "learning_rate": 2.692378719322652e-06,
      "loss": 3.0265,
      "step": 220570
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.7558200359344482,
      "learning_rate": 2.6918319465322745e-06,
      "loss": 2.8662,
      "step": 220571
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.982654571533203,
      "learning_rate": 2.6912852290171482e-06,
      "loss": 3.0113,
      "step": 220572
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8901207447052,
      "learning_rate": 2.6907385667773063e-06,
      "loss": 3.2481,
      "step": 220573
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.670109748840332,
      "learning_rate": 2.6901919598128486e-06,
      "loss": 2.7257,
      "step": 220574
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.8235232830047607,
      "learning_rate": 2.6896454081239082e-06,
      "loss": 3.1251,
      "step": 220575
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7389674186706543,
      "learning_rate": 2.6890989117105854e-06,
      "loss": 2.8467,
      "step": 220576
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.869481325149536,
      "learning_rate": 2.6885524705729468e-06,
      "loss": 3.0789,
      "step": 220577
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.022091388702393,
      "learning_rate": 2.6880060847111252e-06,
      "loss": 2.8362,
      "step": 220578
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9393787384033203,
      "learning_rate": 2.6874597541252207e-06,
      "loss": 2.9817,
      "step": 220579
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.719437837600708,
      "learning_rate": 2.6869134788153336e-06,
      "loss": 2.9968,
      "step": 220580
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.5746514797210693,
      "learning_rate": 2.686367258781563e-06,
      "loss": 2.9746,
      "step": 220581
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.040961503982544,
      "learning_rate": 2.68582109402401e-06,
      "loss": 3.0928,
      "step": 220582
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.884766101837158,
      "learning_rate": 2.6852749845427735e-06,
      "loss": 3.1483,
      "step": 220583
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7277028560638428,
      "learning_rate": 2.684728930337954e-06,
      "loss": 2.9445,
      "step": 220584
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.075819730758667,
      "learning_rate": 2.6841829314096506e-06,
      "loss": 2.7911,
      "step": 220585
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.5982749462127686,
      "learning_rate": 2.6836369877579645e-06,
      "loss": 2.9211,
      "step": 220586
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.6637167930603027,
      "learning_rate": 2.683091099383028e-06,
      "loss": 2.6962,
      "step": 220587
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8980019092559814,
      "learning_rate": 2.682545266284908e-06,
      "loss": 2.9535,
      "step": 220588
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.6112756729125977,
      "learning_rate": 2.681999488463704e-06,
      "loss": 3.1117,
      "step": 220589
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.336230993270874,
      "learning_rate": 2.681453765919517e-06,
      "loss": 2.7638,
      "step": 220590
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.4299798011779785,
      "learning_rate": 2.6809080986524788e-06,
      "loss": 3.011,
      "step": 220591
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.757078170776367,
      "learning_rate": 2.680362486662657e-06,
      "loss": 3.0914,
      "step": 220592
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.9002645015716553,
      "learning_rate": 2.679816929950218e-06,
      "loss": 3.0807,
      "step": 220593
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.884753942489624,
      "learning_rate": 2.6792714285151617e-06,
      "loss": 2.7084,
      "step": 220594
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.919875144958496,
      "learning_rate": 2.6787259823576545e-06,
      "loss": 2.9135,
      "step": 220595
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1668038368225098,
      "learning_rate": 2.6781805914777965e-06,
      "loss": 2.9717,
      "step": 220596
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.71417236328125,
      "learning_rate": 2.6776352558756543e-06,
      "loss": 3.0774,
      "step": 220597
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.964951515197754,
      "learning_rate": 2.6770899755513608e-06,
      "loss": 2.9229,
      "step": 220598
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6229052543640137,
      "learning_rate": 2.6765447505050163e-06,
      "loss": 2.7128,
      "step": 220599
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7218918800354004,
      "learning_rate": 2.675999580736721e-06,
      "loss": 2.7634,
      "step": 220600
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7783539295196533,
      "learning_rate": 2.6754544662465406e-06,
      "loss": 2.8467,
      "step": 220601
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1075949668884277,
      "learning_rate": 2.6749094070346423e-06,
      "loss": 3.0642,
      "step": 220602
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7392237186431885,
      "learning_rate": 2.674364403101059e-06,
      "loss": 3.0987,
      "step": 220603
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.260371685028076,
      "learning_rate": 2.673819454445958e-06,
      "loss": 3.0175,
      "step": 220604
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.035552978515625,
      "learning_rate": 2.673274561069372e-06,
      "loss": 2.9539,
      "step": 220605
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0716288089752197,
      "learning_rate": 2.6727297229714675e-06,
      "loss": 2.8974,
      "step": 220606
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.045837879180908,
      "learning_rate": 2.6721849401522775e-06,
      "loss": 2.9933,
      "step": 220607
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.937232255935669,
      "learning_rate": 2.6716402126119696e-06,
      "loss": 3.0622,
      "step": 220608
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.159841775894165,
      "learning_rate": 2.6710955403506094e-06,
      "loss": 2.8218,
      "step": 220609
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.8939249515533447,
      "learning_rate": 2.6705509233682974e-06,
      "loss": 2.8291,
      "step": 220610
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1696293354034424,
      "learning_rate": 2.6700063616651334e-06,
      "loss": 2.9988,
      "step": 220611
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.7010533809661865,
      "learning_rate": 2.669461855241284e-06,
      "loss": 2.8213,
      "step": 220612
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2792468070983887,
      "learning_rate": 2.6689174040967155e-06,
      "loss": 2.6808,
      "step": 220613
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.5373735427856445,
      "learning_rate": 2.668373008231661e-06,
      "loss": 3.1756,
      "step": 220614
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2369821071624756,
      "learning_rate": 2.667828667646155e-06,
      "loss": 2.8857,
      "step": 220615
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1335558891296387,
      "learning_rate": 2.6672843823402957e-06,
      "loss": 2.7929,
      "step": 220616
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6441447734832764,
      "learning_rate": 2.6667401523142172e-06,
      "loss": 2.6324,
      "step": 220617
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.108269214630127,
      "learning_rate": 2.6661959775680196e-06,
      "loss": 2.8159,
      "step": 220618
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.88142728805542,
      "learning_rate": 2.6656518581017695e-06,
      "loss": 2.8576,
      "step": 220619
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.212542772293091,
      "learning_rate": 2.6651077939155997e-06,
      "loss": 2.9882,
      "step": 220620
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.655092716217041,
      "learning_rate": 2.664563785009577e-06,
      "loss": 2.8835,
      "step": 220621
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.063859939575195,
      "learning_rate": 2.6640198313838345e-06,
      "loss": 2.7669,
      "step": 220622
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.185593605041504,
      "learning_rate": 2.6634759330384725e-06,
      "loss": 2.9627,
      "step": 220623
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.810027837753296,
      "learning_rate": 2.6629320899735905e-06,
      "loss": 3.0361,
      "step": 220624
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.248610496520996,
      "learning_rate": 2.662388302189256e-06,
      "loss": 2.9799,
      "step": 220625
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.156987190246582,
      "learning_rate": 2.6618445696856006e-06,
      "loss": 3.1012,
      "step": 220626
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8339521884918213,
      "learning_rate": 2.6613008924627253e-06,
      "loss": 2.6591,
      "step": 220627
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.850895881652832,
      "learning_rate": 2.6607572705207302e-06,
      "loss": 2.877,
      "step": 220628
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9541115760803223,
      "learning_rate": 2.660213703859715e-06,
      "loss": 2.9254,
      "step": 220629
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9533207416534424,
      "learning_rate": 2.6596701924797794e-06,
      "loss": 2.8971,
      "step": 220630
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.114511489868164,
      "learning_rate": 2.659126736381023e-06,
      "loss": 2.9417,
      "step": 220631
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.8815383911132812,
      "learning_rate": 2.6585833355635467e-06,
      "loss": 3.0515,
      "step": 220632
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9564175605773926,
      "learning_rate": 2.6580399900274496e-06,
      "loss": 2.9835,
      "step": 220633
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.895311117172241,
      "learning_rate": 2.657496699772832e-06,
      "loss": 3.1024,
      "step": 220634
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1769590377807617,
      "learning_rate": 2.656953464799794e-06,
      "loss": 2.9948,
      "step": 220635
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7950305938720703,
      "learning_rate": 2.6564102851084677e-06,
      "loss": 3.019,
      "step": 220636
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.660522222518921,
      "learning_rate": 2.6558671606989213e-06,
      "loss": 3.2324,
      "step": 220637
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.18172550201416,
      "learning_rate": 2.655324091571254e-06,
      "loss": 2.8908,
      "step": 220638
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7995407581329346,
      "learning_rate": 2.6547810777255653e-06,
      "loss": 2.9385,
      "step": 220639
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.74145245552063,
      "learning_rate": 2.6542381191619555e-06,
      "loss": 3.1665,
      "step": 220640
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.87968111038208,
      "learning_rate": 2.653695215880558e-06,
      "loss": 3.072,
      "step": 220641
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.787335157394409,
      "learning_rate": 2.6531523678814394e-06,
      "loss": 3.1487,
      "step": 220642
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.67099928855896,
      "learning_rate": 2.6526095751647326e-06,
      "loss": 2.8262,
      "step": 220643
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.731497049331665,
      "learning_rate": 2.6520668377305044e-06,
      "loss": 2.9932,
      "step": 220644
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7654452323913574,
      "learning_rate": 2.6515241555788547e-06,
      "loss": 3.095,
      "step": 220645
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8784372806549072,
      "learning_rate": 2.65098152870995e-06,
      "loss": 3.1129,
      "step": 220646
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.379523277282715,
      "learning_rate": 2.6504389571237903e-06,
      "loss": 2.9905,
      "step": 220647
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.333895444869995,
      "learning_rate": 2.6498964408205425e-06,
      "loss": 2.9018,
      "step": 220648
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.725346565246582,
      "learning_rate": 2.649353979800306e-06,
      "loss": 2.7166,
      "step": 220649
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3881380558013916,
      "learning_rate": 2.648811574063148e-06,
      "loss": 2.7331,
      "step": 220650
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.4802372455596924,
      "learning_rate": 2.648269223609201e-06,
      "loss": 2.9339,
      "step": 220651
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3574204444885254,
      "learning_rate": 2.6477269284385318e-06,
      "loss": 2.7895,
      "step": 220652
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0427234172821045,
      "learning_rate": 2.647184688551274e-06,
      "loss": 2.8846,
      "step": 220653
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.948312759399414,
      "learning_rate": 2.646642503947527e-06,
      "loss": 2.9749,
      "step": 220654
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1440963745117188,
      "learning_rate": 2.646100374627391e-06,
      "loss": 2.9015,
      "step": 220655
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.717406749725342,
      "learning_rate": 2.645558300590933e-06,
      "loss": 2.89,
      "step": 220656
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.74993896484375,
      "learning_rate": 2.645016281838286e-06,
      "loss": 3.118,
      "step": 220657
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.758692979812622,
      "learning_rate": 2.6444743183695495e-06,
      "loss": 2.8367,
      "step": 220658
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9326298236846924,
      "learning_rate": 2.6439324101848237e-06,
      "loss": 2.9949,
      "step": 220659
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.77423095703125,
      "learning_rate": 2.6433905572842083e-06,
      "loss": 2.8747,
      "step": 220660
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8411097526550293,
      "learning_rate": 2.6428487596677706e-06,
      "loss": 3.1064,
      "step": 220661
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.4205541610717773,
      "learning_rate": 2.642307017335643e-06,
      "loss": 2.7753,
      "step": 220662
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0231375694274902,
      "learning_rate": 2.6417653302879595e-06,
      "loss": 3.0403,
      "step": 220663
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0919876098632812,
      "learning_rate": 2.6412236985247526e-06,
      "loss": 2.9807,
      "step": 220664
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.470628023147583,
      "learning_rate": 2.6406821220461562e-06,
      "loss": 2.6013,
      "step": 220665
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2166907787323,
      "learning_rate": 2.640140600852303e-06,
      "loss": 2.8889,
      "step": 220666
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.8419411182403564,
      "learning_rate": 2.639599134943227e-06,
      "loss": 3.0171,
      "step": 220667
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.510044574737549,
      "learning_rate": 2.63905772431906e-06,
      "loss": 3.0599,
      "step": 220668
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.025599956512451,
      "learning_rate": 2.638516368979937e-06,
      "loss": 2.9542,
      "step": 220669
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.5308523178100586,
      "learning_rate": 2.6379750689258906e-06,
      "loss": 2.9597,
      "step": 220670
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.841546058654785,
      "learning_rate": 2.6374338241570867e-06,
      "loss": 3.0447,
      "step": 220671
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6764891147613525,
      "learning_rate": 2.636892634673593e-06,
      "loss": 2.8228,
      "step": 220672
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.852965831756592,
      "learning_rate": 2.6363515004755087e-06,
      "loss": 3.0597,
      "step": 220673
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7763309478759766,
      "learning_rate": 2.6358104215629337e-06,
      "loss": 2.8594,
      "step": 220674
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9699172973632812,
      "learning_rate": 2.6352693979360017e-06,
      "loss": 3.0102,
      "step": 220675
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3874542713165283,
      "learning_rate": 2.634728429594746e-06,
      "loss": 3.2232,
      "step": 220676
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2224531173706055,
      "learning_rate": 2.6341875165393323e-06,
      "loss": 3.0813,
      "step": 220677
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.098320722579956,
      "learning_rate": 2.6336466587697947e-06,
      "loss": 3.0508,
      "step": 220678
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0142531394958496,
      "learning_rate": 2.633105856286366e-06,
      "loss": 2.6554,
      "step": 220679
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.470127582550049,
      "learning_rate": 2.63256510908898e-06,
      "loss": 2.8828,
      "step": 220680
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1051013469696045,
      "learning_rate": 2.632024417177836e-06,
      "loss": 2.9244,
      "step": 220681
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.505075216293335,
      "learning_rate": 2.631483780553001e-06,
      "loss": 3.0449,
      "step": 220682
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.4827733039855957,
      "learning_rate": 2.630943199214608e-06,
      "loss": 3.2969,
      "step": 220683
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8245904445648193,
      "learning_rate": 2.6304026731626906e-06,
      "loss": 2.9262,
      "step": 220684
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0597925186157227,
      "learning_rate": 2.629862202397448e-06,
      "loss": 3.0867,
      "step": 220685
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.809126138687134,
      "learning_rate": 2.6293217869189143e-06,
      "loss": 2.9622,
      "step": 220686
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.740321397781372,
      "learning_rate": 2.628781426727189e-06,
      "loss": 2.9568,
      "step": 220687
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.16256046295166,
      "learning_rate": 2.628241121822372e-06,
      "loss": 3.0281,
      "step": 220688
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.787301540374756,
      "learning_rate": 2.62770087220463e-06,
      "loss": 3.0011,
      "step": 220689
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2667040824890137,
      "learning_rate": 2.627160677873963e-06,
      "loss": 2.8824,
      "step": 220690
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.048309564590454,
      "learning_rate": 2.6266205388305706e-06,
      "loss": 2.8595,
      "step": 220691
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.024061441421509,
      "learning_rate": 2.626080455074453e-06,
      "loss": 2.7773,
      "step": 220692
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7393081188201904,
      "learning_rate": 2.625540426605777e-06,
      "loss": 2.8185,
      "step": 220693
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.100717782974243,
      "learning_rate": 2.6250004534246084e-06,
      "loss": 2.7247,
      "step": 220694
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6872894763946533,
      "learning_rate": 2.6244605355311143e-06,
      "loss": 3.1298,
      "step": 220695
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.6591479778289795,
      "learning_rate": 2.623920672925295e-06,
      "loss": 2.964,
      "step": 220696
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9503612518310547,
      "learning_rate": 2.62338086560735e-06,
      "loss": 3.2662,
      "step": 220697
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1757349967956543,
      "learning_rate": 2.622841113577279e-06,
      "loss": 2.6825,
      "step": 220698
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.134883403778076,
      "learning_rate": 2.622301416835282e-06,
      "loss": 2.7177,
      "step": 220699
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2477149963378906,
      "learning_rate": 2.621761775381359e-06,
      "loss": 3.142,
      "step": 220700
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.762786388397217,
      "learning_rate": 2.6212221892157104e-06,
      "loss": 2.8736,
      "step": 220701
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.939929723739624,
      "learning_rate": 2.620682658338369e-06,
      "loss": 2.837,
      "step": 220702
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0998268127441406,
      "learning_rate": 2.620143182749501e-06,
      "loss": 2.9143,
      "step": 220703
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.7407917976379395,
      "learning_rate": 2.619603762449107e-06,
      "loss": 2.7204,
      "step": 220704
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.430098533630371,
      "learning_rate": 2.6190643974373536e-06,
      "loss": 3.0896,
      "step": 220705
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.80902361869812,
      "learning_rate": 2.6185250877143406e-06,
      "loss": 2.8244,
      "step": 220706
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.817298412322998,
      "learning_rate": 2.6179858332801675e-06,
      "loss": 2.9957,
      "step": 220707
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.136000156402588,
      "learning_rate": 2.617446634134901e-06,
      "loss": 2.9574,
      "step": 220708
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.93772554397583,
      "learning_rate": 2.616907490278675e-06,
      "loss": 2.9838,
      "step": 220709
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.074888229370117,
      "learning_rate": 2.6163684017115882e-06,
      "loss": 3.0162,
      "step": 220710
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6892096996307373,
      "learning_rate": 2.6158293684337082e-06,
      "loss": 2.7922,
      "step": 220711
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.818368434906006,
      "learning_rate": 2.615290390445168e-06,
      "loss": 3.0539,
      "step": 220712
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1134414672851562,
      "learning_rate": 2.614751467746068e-06,
      "loss": 2.7922,
      "step": 220713
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.662694215774536,
      "learning_rate": 2.614212600336474e-06,
      "loss": 2.7023,
      "step": 220714
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.60317325592041,
      "learning_rate": 2.6136737882165527e-06,
      "loss": 3.0343,
      "step": 220715
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.695594310760498,
      "learning_rate": 2.6131350313863374e-06,
      "loss": 2.9782,
      "step": 220716
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.865157127380371,
      "learning_rate": 2.6125963298459618e-06,
      "loss": 2.8862,
      "step": 220717
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8021140098571777,
      "learning_rate": 2.6120576835955254e-06,
      "loss": 2.7279,
      "step": 220718
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.949188709259033,
      "learning_rate": 2.611519092635095e-06,
      "loss": 2.7438,
      "step": 220719
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3126869201660156,
      "learning_rate": 2.6109805569648034e-06,
      "loss": 2.7726,
      "step": 220720
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8922119140625,
      "learning_rate": 2.6104420765847845e-06,
      "loss": 2.7538,
      "step": 220721
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.702056646347046,
      "learning_rate": 2.6099036514950713e-06,
      "loss": 2.8377,
      "step": 220722
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7849764823913574,
      "learning_rate": 2.6093652816957966e-06,
      "loss": 3.0047,
      "step": 220723
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.696962833404541,
      "learning_rate": 2.608826967187028e-06,
      "loss": 2.8824,
      "step": 220724
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.4977834224700928,
      "learning_rate": 2.6082887079688976e-06,
      "loss": 2.9709,
      "step": 220725
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.020554304122925,
      "learning_rate": 2.6077505040415057e-06,
      "loss": 2.7265,
      "step": 220726
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7634856700897217,
      "learning_rate": 2.607212355404986e-06,
      "loss": 2.7904,
      "step": 220727
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6394379138946533,
      "learning_rate": 2.6066742620593385e-06,
      "loss": 2.8111,
      "step": 220728
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2831616401672363,
      "learning_rate": 2.6061362240047625e-06,
      "loss": 2.9336,
      "step": 220729
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.651348352432251,
      "learning_rate": 2.6055982412412913e-06,
      "loss": 2.9478,
      "step": 220730
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.932964324951172,
      "learning_rate": 2.6050603137690917e-06,
      "loss": 2.7567,
      "step": 220731
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.106034755706787,
      "learning_rate": 2.6045224415881972e-06,
      "loss": 2.7509,
      "step": 220732
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.522257089614868,
      "learning_rate": 2.6039846246987404e-06,
      "loss": 2.8106,
      "step": 220733
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.765556573867798,
      "learning_rate": 2.603446863100822e-06,
      "loss": 2.8089,
      "step": 220734
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.68094539642334,
      "learning_rate": 2.6029091567945413e-06,
      "loss": 2.9124,
      "step": 220735
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.4653074741363525,
      "learning_rate": 2.6023715057799654e-06,
      "loss": 2.9141,
      "step": 220736
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1350491046905518,
      "learning_rate": 2.6018339100572605e-06,
      "loss": 2.9022,
      "step": 220737
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.006098508834839,
      "learning_rate": 2.60129636962646e-06,
      "loss": 2.7505,
      "step": 220738
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8056352138519287,
      "learning_rate": 2.600758884487697e-06,
      "loss": 3.02,
      "step": 220739
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9147801399230957,
      "learning_rate": 2.6002214546410716e-06,
      "loss": 3.2017,
      "step": 220740
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9312620162963867,
      "learning_rate": 2.599684080086684e-06,
      "loss": 2.9277,
      "step": 220741
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.118360996246338,
      "learning_rate": 2.599146760824633e-06,
      "loss": 3.0766,
      "step": 220742
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8521437644958496,
      "learning_rate": 2.5986094968549863e-06,
      "loss": 2.8661,
      "step": 220743
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.006263017654419,
      "learning_rate": 2.5980722881779104e-06,
      "loss": 2.9341,
      "step": 220744
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.133100986480713,
      "learning_rate": 2.597535134793438e-06,
      "loss": 2.9036,
      "step": 220745
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.5525455474853516,
      "learning_rate": 2.596998036701703e-06,
      "loss": 3.1122,
      "step": 220746
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.513468027114868,
      "learning_rate": 2.596460993902805e-06,
      "loss": 2.8932,
      "step": 220747
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.431501865386963,
      "learning_rate": 2.5959240063968435e-06,
      "loss": 2.7693,
      "step": 220748
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2527499198913574,
      "learning_rate": 2.595387074183919e-06,
      "loss": 2.9973,
      "step": 220749
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3481225967407227,
      "learning_rate": 2.5948501972640977e-06,
      "loss": 3.0458,
      "step": 220750
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.849864959716797,
      "learning_rate": 2.5943133756375467e-06,
      "loss": 2.7817,
      "step": 220751
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1387131214141846,
      "learning_rate": 2.593776609304299e-06,
      "loss": 2.9878,
      "step": 220752
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.350579023361206,
      "learning_rate": 2.5932398982644874e-06,
      "loss": 2.9521,
      "step": 220753
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8530027866363525,
      "learning_rate": 2.5927032425182125e-06,
      "loss": 2.914,
      "step": 220754
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.830784559249878,
      "learning_rate": 2.5921666420655407e-06,
      "loss": 3.0254,
      "step": 220755
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.730241298675537,
      "learning_rate": 2.5916300969066384e-06,
      "loss": 2.9167,
      "step": 220756
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.5582756996154785,
      "learning_rate": 2.5910936070415388e-06,
      "loss": 2.6871,
      "step": 220757
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9797489643096924,
      "learning_rate": 2.590557172470409e-06,
      "loss": 2.9455,
      "step": 220758
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.024069309234619,
      "learning_rate": 2.590020793193248e-06,
      "loss": 2.894,
      "step": 220759
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.909846544265747,
      "learning_rate": 2.5894844692102567e-06,
      "loss": 2.9418,
      "step": 220760
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0358057022094727,
      "learning_rate": 2.588948200521468e-06,
      "loss": 2.5706,
      "step": 220761
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.517075538635254,
      "learning_rate": 2.588411987127048e-06,
      "loss": 2.9364,
      "step": 220762
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.178882598876953,
      "learning_rate": 2.5878758290270306e-06,
      "loss": 2.8481,
      "step": 220763
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.760806083679199,
      "learning_rate": 2.5873397262215488e-06,
      "loss": 2.7195,
      "step": 220764
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1604466438293457,
      "learning_rate": 2.5868036787107025e-06,
      "loss": 2.8768,
      "step": 220765
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7139856815338135,
      "learning_rate": 2.586267686494559e-06,
      "loss": 3.0083,
      "step": 220766
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.336977481842041,
      "learning_rate": 2.58573174957325e-06,
      "loss": 2.8047,
      "step": 220767
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.155320644378662,
      "learning_rate": 2.5851958679468763e-06,
      "loss": 2.802,
      "step": 220768
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.135328531265259,
      "learning_rate": 2.584660041615505e-06,
      "loss": 3.0115,
      "step": 220769
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7372899055480957,
      "learning_rate": 2.584124270579302e-06,
      "loss": 2.6368,
      "step": 220770
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1346967220306396,
      "learning_rate": 2.583588554838301e-06,
      "loss": 2.8954,
      "step": 220771
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2384910583496094,
      "learning_rate": 2.5830528943926344e-06,
      "loss": 2.8516,
      "step": 220772
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1190624237060547,
      "learning_rate": 2.5825172892423694e-06,
      "loss": 2.8815,
      "step": 220773
    },
    {
      "epoch": 2.87,
      "grad_norm": 4.868344783782959,
      "learning_rate": 2.5819817393876395e-06,
      "loss": 2.8168,
      "step": 220774
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.1534905433654785,
      "learning_rate": 2.5814462448285443e-06,
      "loss": 2.7107,
      "step": 220775
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2416064739227295,
      "learning_rate": 2.580910805565184e-06,
      "loss": 2.9772,
      "step": 220776
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3723363876342773,
      "learning_rate": 2.5803754215975915e-06,
      "loss": 2.8286,
      "step": 220777
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.56339693069458,
      "learning_rate": 2.5798400929259667e-06,
      "loss": 3.103,
      "step": 220778
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8380398750305176,
      "learning_rate": 2.579304819550343e-06,
      "loss": 2.9768,
      "step": 220779
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.4585132598876953,
      "learning_rate": 2.578769601470887e-06,
      "loss": 3.1252,
      "step": 220780
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9602906703948975,
      "learning_rate": 2.5782344386875984e-06,
      "loss": 2.9475,
      "step": 220781
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.7197906970977783,
      "learning_rate": 2.577699331200678e-06,
      "loss": 2.911,
      "step": 220782
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9691245555877686,
      "learning_rate": 2.5771642790101244e-06,
      "loss": 2.8447,
      "step": 220783
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.656614065170288,
      "learning_rate": 2.576629282116138e-06,
      "loss": 3.0859,
      "step": 220784
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.422454833984375,
      "learning_rate": 2.5760943405187196e-06,
      "loss": 2.7242,
      "step": 220785
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.3090147972106934,
      "learning_rate": 2.575559454218068e-06,
      "loss": 3.1158,
      "step": 220786
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.441466808319092,
      "learning_rate": 2.5750246232141837e-06,
      "loss": 3.0091,
      "step": 220787
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.85213303565979,
      "learning_rate": 2.5744898475072662e-06,
      "loss": 2.9012,
      "step": 220788
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.8197429180145264,
      "learning_rate": 2.573955127097349e-06,
      "loss": 2.7239,
      "step": 220789
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2562122344970703,
      "learning_rate": 2.573420461984532e-06,
      "loss": 3.2295,
      "step": 220790
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.6799428462982178,
      "learning_rate": 2.5728858521689487e-06,
      "loss": 2.7753,
      "step": 220791
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.9787092208862305,
      "learning_rate": 2.5723512976506657e-06,
      "loss": 2.7292,
      "step": 220792
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.333235502243042,
      "learning_rate": 2.5718167984298153e-06,
      "loss": 2.9684,
      "step": 220793
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.2623276710510254,
      "learning_rate": 2.5712823545064656e-06,
      "loss": 2.9002,
      "step": 220794
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.664025068283081,
      "learning_rate": 2.5707479658807483e-06,
      "loss": 2.8567,
      "step": 220795
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.895406484603882,
      "learning_rate": 2.5702136325527312e-06,
      "loss": 2.9978,
      "step": 220796
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.438983917236328,
      "learning_rate": 2.5696793545225135e-06,
      "loss": 2.7825,
      "step": 220797
    },
    {
      "epoch": 2.87,
      "grad_norm": 2.519174575805664,
      "learning_rate": 2.5691451317901958e-06,
      "loss": 2.7697,
      "step": 220798
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.075779438018799,
      "learning_rate": 2.5686109643559104e-06,
      "loss": 2.9154,
      "step": 220799
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.0691938400268555,
      "learning_rate": 2.568076852219725e-06,
      "loss": 2.7946,
      "step": 220800
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.9250400066375732,
      "learning_rate": 2.5675427953817716e-06,
      "loss": 3.0045,
      "step": 220801
    },
    {
      "epoch": 2.87,
      "grad_norm": 3.9339287281036377,
      "learning_rate": 2.567008793842118e-06,
      "loss": 3.1102,
      "step": 220802
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.732208013534546,
      "learning_rate": 2.5664748476008634e-06,
      "loss": 3.002,
      "step": 220803
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4436988830566406,
      "learning_rate": 2.565940956658108e-06,
      "loss": 2.8725,
      "step": 220804
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7585864067077637,
      "learning_rate": 2.5654071210139514e-06,
      "loss": 3.1112,
      "step": 220805
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.395009994506836,
      "learning_rate": 2.564873340668527e-06,
      "loss": 2.9176,
      "step": 220806
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3196170330047607,
      "learning_rate": 2.564339615621902e-06,
      "loss": 3.0125,
      "step": 220807
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1523427963256836,
      "learning_rate": 2.5638059458741755e-06,
      "loss": 2.9397,
      "step": 220808
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9972734451293945,
      "learning_rate": 2.563272331425448e-06,
      "loss": 3.0165,
      "step": 220809
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9013867378234863,
      "learning_rate": 2.562738772275852e-06,
      "loss": 2.9884,
      "step": 220810
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.159055709838867,
      "learning_rate": 2.5622052684254213e-06,
      "loss": 2.9514,
      "step": 220811
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.378261566162109,
      "learning_rate": 2.561671819874289e-06,
      "loss": 2.8315,
      "step": 220812
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.405837059020996,
      "learning_rate": 2.561138426622589e-06,
      "loss": 2.7358,
      "step": 220813
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2719507217407227,
      "learning_rate": 2.5606050886703532e-06,
      "loss": 2.931,
      "step": 220814
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.186863422393799,
      "learning_rate": 2.5600718060177494e-06,
      "loss": 2.889,
      "step": 220815
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.093568325042725,
      "learning_rate": 2.5595385786648105e-06,
      "loss": 2.775,
      "step": 220816
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9386415481567383,
      "learning_rate": 2.5590054066116695e-06,
      "loss": 2.8386,
      "step": 220817
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9752116203308105,
      "learning_rate": 2.5584722898584597e-06,
      "loss": 2.94,
      "step": 220818
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9976959228515625,
      "learning_rate": 2.5579392284052147e-06,
      "loss": 2.7917,
      "step": 220819
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.865220069885254,
      "learning_rate": 2.5574062222520675e-06,
      "loss": 2.8829,
      "step": 220820
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0430617332458496,
      "learning_rate": 2.556873271399118e-06,
      "loss": 2.8447,
      "step": 220821
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.5344367027282715,
      "learning_rate": 2.5563403758464663e-06,
      "loss": 3.0454,
      "step": 220822
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.405725955963135,
      "learning_rate": 2.555807535594179e-06,
      "loss": 2.572,
      "step": 220823
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.231841564178467,
      "learning_rate": 2.5552747506423885e-06,
      "loss": 2.9814,
      "step": 220824
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.49076247215271,
      "learning_rate": 2.5547420209912295e-06,
      "loss": 3.0204,
      "step": 220825
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7578883171081543,
      "learning_rate": 2.554209346640734e-06,
      "loss": 2.9801,
      "step": 220826
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.027359962463379,
      "learning_rate": 2.553676727591003e-06,
      "loss": 2.6428,
      "step": 220827
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.782238721847534,
      "learning_rate": 2.553144163842169e-06,
      "loss": 2.9157,
      "step": 220828
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9100096225738525,
      "learning_rate": 2.552611655394332e-06,
      "loss": 3.2315,
      "step": 220829
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1168599128723145,
      "learning_rate": 2.5520792022475588e-06,
      "loss": 2.9127,
      "step": 220830
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.316072463989258,
      "learning_rate": 2.551546804402016e-06,
      "loss": 2.8321,
      "step": 220831
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2118420600891113,
      "learning_rate": 2.5510144618577035e-06,
      "loss": 2.8892,
      "step": 220832
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.747248649597168,
      "learning_rate": 2.5504821746147874e-06,
      "loss": 2.8869,
      "step": 220833
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2934062480926514,
      "learning_rate": 2.5499499426733684e-06,
      "loss": 2.8779,
      "step": 220834
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.854109525680542,
      "learning_rate": 2.5494177660335124e-06,
      "loss": 2.862,
      "step": 220835
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4621846675872803,
      "learning_rate": 2.54888564469532e-06,
      "loss": 2.7446,
      "step": 220836
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9095351696014404,
      "learning_rate": 2.548353578658924e-06,
      "loss": 2.9037,
      "step": 220837
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6637191772460938,
      "learning_rate": 2.5478215679243907e-06,
      "loss": 2.7265,
      "step": 220838
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8643414974212646,
      "learning_rate": 2.547289612491854e-06,
      "loss": 2.9492,
      "step": 220839
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2624571323394775,
      "learning_rate": 2.54675771236138e-06,
      "loss": 2.9823,
      "step": 220840
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.625582218170166,
      "learning_rate": 2.5462258675330694e-06,
      "loss": 2.8262,
      "step": 220841
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.490025997161865,
      "learning_rate": 2.5456940780070545e-06,
      "loss": 3.0179,
      "step": 220842
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.276697158813477,
      "learning_rate": 2.5451623437834022e-06,
      "loss": 3.062,
      "step": 220843
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.794182300567627,
      "learning_rate": 2.5446306648622126e-06,
      "loss": 2.8299,
      "step": 220844
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4108309745788574,
      "learning_rate": 2.5440990412435856e-06,
      "loss": 2.9899,
      "step": 220845
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.9060745239257812,
      "learning_rate": 2.543567472927621e-06,
      "loss": 2.7844,
      "step": 220846
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.571477174758911,
      "learning_rate": 2.5430359599144523e-06,
      "loss": 3.0222,
      "step": 220847
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.226146936416626,
      "learning_rate": 2.5425045022041123e-06,
      "loss": 2.9995,
      "step": 220848
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3779215812683105,
      "learning_rate": 2.541973099796768e-06,
      "loss": 2.8347,
      "step": 220849
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1969757080078125,
      "learning_rate": 2.5414417526924526e-06,
      "loss": 3.0805,
      "step": 220850
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3962597846984863,
      "learning_rate": 2.5409104608913323e-06,
      "loss": 3.0395,
      "step": 220851
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.066087484359741,
      "learning_rate": 2.5403792243934407e-06,
      "loss": 2.8661,
      "step": 220852
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.950524091720581,
      "learning_rate": 2.539848043198911e-06,
      "loss": 3.0869,
      "step": 220853
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2208311557769775,
      "learning_rate": 2.539316917307843e-06,
      "loss": 2.7459,
      "step": 220854
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3462302684783936,
      "learning_rate": 2.5387858467203705e-06,
      "loss": 3.0334,
      "step": 220855
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.304750919342041,
      "learning_rate": 2.5382548314364927e-06,
      "loss": 2.9414,
      "step": 220856
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.982384204864502,
      "learning_rate": 2.5377238714564095e-06,
      "loss": 2.5916,
      "step": 220857
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7644524574279785,
      "learning_rate": 2.5371929667801548e-06,
      "loss": 2.9352,
      "step": 220858
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2372448444366455,
      "learning_rate": 2.5366621174078615e-06,
      "loss": 2.7715,
      "step": 220859
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9352426528930664,
      "learning_rate": 2.5361313233395962e-06,
      "loss": 2.9636,
      "step": 220860
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2965831756591797,
      "learning_rate": 2.5356005845755258e-06,
      "loss": 3.034,
      "step": 220861
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.093156337738037,
      "learning_rate": 2.5350699011156827e-06,
      "loss": 2.8995,
      "step": 220862
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7064034938812256,
      "learning_rate": 2.5345392729601675e-06,
      "loss": 2.8513,
      "step": 220863
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.727511167526245,
      "learning_rate": 2.5340087001091135e-06,
      "loss": 2.9241,
      "step": 220864
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1479482650756836,
      "learning_rate": 2.5334781825625872e-06,
      "loss": 2.8343,
      "step": 220865
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8855173587799072,
      "learning_rate": 2.532947720320688e-06,
      "loss": 2.8641,
      "step": 220866
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.617079019546509,
      "learning_rate": 2.5324173133835836e-06,
      "loss": 3.0143,
      "step": 220867
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.7312450408935547,
      "learning_rate": 2.5318869617512728e-06,
      "loss": 3.0433,
      "step": 220868
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8593642711639404,
      "learning_rate": 2.5313566654239225e-06,
      "loss": 3.1579,
      "step": 220869
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.759709596633911,
      "learning_rate": 2.5308264244015996e-06,
      "loss": 3.0698,
      "step": 220870
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.94096040725708,
      "learning_rate": 2.5302962386844037e-06,
      "loss": 2.9538,
      "step": 220871
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.16084623336792,
      "learning_rate": 2.5297661082724352e-06,
      "loss": 2.9033,
      "step": 220872
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8878965377807617,
      "learning_rate": 2.5292360331658266e-06,
      "loss": 2.7895,
      "step": 220873
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7084412574768066,
      "learning_rate": 2.5287060133646453e-06,
      "loss": 3.0254,
      "step": 220874
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.936215877532959,
      "learning_rate": 2.5281760488689573e-06,
      "loss": 2.9325,
      "step": 220875
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7916195392608643,
      "learning_rate": 2.5276461396789294e-06,
      "loss": 3.0521,
      "step": 220876
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6911494731903076,
      "learning_rate": 2.5271162857946283e-06,
      "loss": 2.8099,
      "step": 220877
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.111097812652588,
      "learning_rate": 2.52658648721612e-06,
      "loss": 3.0364,
      "step": 220878
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.7047529220581055,
      "learning_rate": 2.5260567439435385e-06,
      "loss": 2.5965,
      "step": 220879
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.998788833618164,
      "learning_rate": 2.5255270559770167e-06,
      "loss": 2.8833,
      "step": 220880
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3529105186462402,
      "learning_rate": 2.5249974233165883e-06,
      "loss": 3.1223,
      "step": 220881
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.143739700317383,
      "learning_rate": 2.5244678459623856e-06,
      "loss": 3.0311,
      "step": 220882
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.950028419494629,
      "learning_rate": 2.5239383239145096e-06,
      "loss": 2.9452,
      "step": 220883
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8843793869018555,
      "learning_rate": 2.523408857173026e-06,
      "loss": 3.0017,
      "step": 220884
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.4067935943603516,
      "learning_rate": 2.5228794457380684e-06,
      "loss": 2.8563,
      "step": 220885
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7254104614257812,
      "learning_rate": 2.522350089609704e-06,
      "loss": 3.0024,
      "step": 220886
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0617353916168213,
      "learning_rate": 2.521820788788065e-06,
      "loss": 2.8744,
      "step": 220887
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.351801633834839,
      "learning_rate": 2.521291543273252e-06,
      "loss": 2.7722,
      "step": 220888
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.85144305229187,
      "learning_rate": 2.520762353065331e-06,
      "loss": 2.8375,
      "step": 220889
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9157285690307617,
      "learning_rate": 2.5202332181644024e-06,
      "loss": 3.2067,
      "step": 220890
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.5080909729003906,
      "learning_rate": 2.5197041385705997e-06,
      "loss": 3.0036,
      "step": 220891
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.921067953109741,
      "learning_rate": 2.519175114283989e-06,
      "loss": 3.0478,
      "step": 220892
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2031211853027344,
      "learning_rate": 2.51864614530467e-06,
      "loss": 2.8834,
      "step": 220893
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.956484079360962,
      "learning_rate": 2.518117231632744e-06,
      "loss": 3.1504,
      "step": 220894
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.6397511959075928,
      "learning_rate": 2.517588373268342e-06,
      "loss": 2.8863,
      "step": 220895
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.175898313522339,
      "learning_rate": 2.5170595702115326e-06,
      "loss": 3.047,
      "step": 220896
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.77679443359375,
      "learning_rate": 2.516530822462415e-06,
      "loss": 2.8244,
      "step": 220897
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.384666681289673,
      "learning_rate": 2.5160021300210886e-06,
      "loss": 2.8455,
      "step": 220898
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.908860445022583,
      "learning_rate": 2.515473492887654e-06,
      "loss": 2.9528,
      "step": 220899
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7889130115509033,
      "learning_rate": 2.514944911062178e-06,
      "loss": 2.8354,
      "step": 220900
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9693095684051514,
      "learning_rate": 2.514416384544826e-06,
      "loss": 2.9686,
      "step": 220901
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.909212589263916,
      "learning_rate": 2.513887913335633e-06,
      "loss": 2.6774,
      "step": 220902
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.178682804107666,
      "learning_rate": 2.5133594974347637e-06,
      "loss": 2.8249,
      "step": 220903
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.864593505859375,
      "learning_rate": 2.5128311368422525e-06,
      "loss": 2.9461,
      "step": 220904
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1346335411071777,
      "learning_rate": 2.5123028315581997e-06,
      "loss": 2.9972,
      "step": 220905
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7814791202545166,
      "learning_rate": 2.5117745815827373e-06,
      "loss": 2.927,
      "step": 220906
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0771610736846924,
      "learning_rate": 2.511246386915966e-06,
      "loss": 2.9376,
      "step": 220907
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.776665210723877,
      "learning_rate": 2.5107182475579522e-06,
      "loss": 2.9761,
      "step": 220908
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.097774028778076,
      "learning_rate": 2.5101901635088293e-06,
      "loss": 2.9288,
      "step": 220909
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.736224412918091,
      "learning_rate": 2.509662134768697e-06,
      "loss": 2.7782,
      "step": 220910
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0321998596191406,
      "learning_rate": 2.509134161337589e-06,
      "loss": 3.1257,
      "step": 220911
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.117523431777954,
      "learning_rate": 2.508606243215672e-06,
      "loss": 2.7989,
      "step": 220912
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.5081992149353027,
      "learning_rate": 2.508078380403011e-06,
      "loss": 3.0416,
      "step": 220913
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.101675271987915,
      "learning_rate": 2.507550572899708e-06,
      "loss": 2.706,
      "step": 220914
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.543011426925659,
      "learning_rate": 2.5070228207058953e-06,
      "loss": 3.0388,
      "step": 220915
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.624478340148926,
      "learning_rate": 2.506495123821639e-06,
      "loss": 2.7804,
      "step": 220916
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.5712225437164307,
      "learning_rate": 2.5059674822470065e-06,
      "loss": 3.0997,
      "step": 220917
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7108333110809326,
      "learning_rate": 2.505439895982164e-06,
      "loss": 2.8655,
      "step": 220918
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6203866004943848,
      "learning_rate": 2.504912365027145e-06,
      "loss": 2.6691,
      "step": 220919
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.736199378967285,
      "learning_rate": 2.504384889382083e-06,
      "loss": 2.8574,
      "step": 220920
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.338007688522339,
      "learning_rate": 2.5038574690471103e-06,
      "loss": 2.823,
      "step": 220921
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2903759479522705,
      "learning_rate": 2.503330104022261e-06,
      "loss": 2.8523,
      "step": 220922
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.59256649017334,
      "learning_rate": 2.5028027943076345e-06,
      "loss": 3.1515,
      "step": 220923
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1852355003356934,
      "learning_rate": 2.502275539903398e-06,
      "loss": 2.816,
      "step": 220924
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0097594261169434,
      "learning_rate": 2.501748340809584e-06,
      "loss": 2.9833,
      "step": 220925
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4769365787506104,
      "learning_rate": 2.501221197026293e-06,
      "loss": 2.9862,
      "step": 220926
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8919472694396973,
      "learning_rate": 2.500694108553658e-06,
      "loss": 3.0597,
      "step": 220927
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.303138494491577,
      "learning_rate": 2.500167075391746e-06,
      "loss": 2.9216,
      "step": 220928
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7695119380950928,
      "learning_rate": 2.4996400975406893e-06,
      "loss": 2.9739,
      "step": 220929
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.684265613555908,
      "learning_rate": 2.4991131750005556e-06,
      "loss": 3.0795,
      "step": 220930
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.021857738494873,
      "learning_rate": 2.4985863077714442e-06,
      "loss": 2.8127,
      "step": 220931
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7928097248077393,
      "learning_rate": 2.498059495853488e-06,
      "loss": 2.8461,
      "step": 220932
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.279872179031372,
      "learning_rate": 2.4975327392467548e-06,
      "loss": 2.9133,
      "step": 220933
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.312349796295166,
      "learning_rate": 2.49700603795131e-06,
      "loss": 2.8146,
      "step": 220934
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9893171787261963,
      "learning_rate": 2.496479391967321e-06,
      "loss": 3.1123,
      "step": 220935
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.207199811935425,
      "learning_rate": 2.4959528012948535e-06,
      "loss": 3.0674,
      "step": 220936
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1724941730499268,
      "learning_rate": 2.495426265933975e-06,
      "loss": 2.786,
      "step": 220937
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1797895431518555,
      "learning_rate": 2.4948997858848517e-06,
      "loss": 2.8863,
      "step": 220938
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8567023277282715,
      "learning_rate": 2.4943733611474837e-06,
      "loss": 2.9079,
      "step": 220939
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.091221332550049,
      "learning_rate": 2.4938469917220707e-06,
      "loss": 2.6824,
      "step": 220940
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0386834144592285,
      "learning_rate": 2.493320677608679e-06,
      "loss": 2.8991,
      "step": 220941
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9567275047302246,
      "learning_rate": 2.4927944188073757e-06,
      "loss": 3.0634,
      "step": 220942
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3894965648651123,
      "learning_rate": 2.4922682153182606e-06,
      "loss": 3.0715,
      "step": 220943
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.110098361968994,
      "learning_rate": 2.4917420671415e-06,
      "loss": 2.964,
      "step": 220944
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1221115589141846,
      "learning_rate": 2.4912159742770942e-06,
      "loss": 2.8965,
      "step": 220945
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8526508808135986,
      "learning_rate": 2.4906899367252098e-06,
      "loss": 2.924,
      "step": 220946
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7419188022613525,
      "learning_rate": 2.490163954485913e-06,
      "loss": 2.9014,
      "step": 220947
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1455419063568115,
      "learning_rate": 2.489638027559304e-06,
      "loss": 3.2325,
      "step": 220948
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.755709648132324,
      "learning_rate": 2.489112155945483e-06,
      "loss": 3.1206,
      "step": 220949
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.759012222290039,
      "learning_rate": 2.4885863396445826e-06,
      "loss": 2.8374,
      "step": 220950
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7760684490203857,
      "learning_rate": 2.4880605786566368e-06,
      "loss": 2.9545,
      "step": 220951
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.20404314994812,
      "learning_rate": 2.487534872981778e-06,
      "loss": 3.0581,
      "step": 220952
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.389604091644287,
      "learning_rate": 2.4870092226201398e-06,
      "loss": 3.0978,
      "step": 220953
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.783189535140991,
      "learning_rate": 2.486483627571756e-06,
      "loss": 2.7103,
      "step": 220954
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.416917324066162,
      "learning_rate": 2.485958087836726e-06,
      "loss": 3.0139,
      "step": 220955
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6831676959991455,
      "learning_rate": 2.485432603415216e-06,
      "loss": 2.8402,
      "step": 220956
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1314356327056885,
      "learning_rate": 2.48490717430726e-06,
      "loss": 2.9366,
      "step": 220957
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0059938430786133,
      "learning_rate": 2.484381800512958e-06,
      "loss": 3.0002,
      "step": 220958
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7600677013397217,
      "learning_rate": 2.4838564820324756e-06,
      "loss": 2.8427,
      "step": 220959
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.0953803062438965,
      "learning_rate": 2.4833312188658137e-06,
      "loss": 2.8722,
      "step": 220960
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.960111618041992,
      "learning_rate": 2.482806011013139e-06,
      "loss": 2.7457,
      "step": 220961
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6553547382354736,
      "learning_rate": 2.482280858474517e-06,
      "loss": 2.824,
      "step": 220962
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.29905104637146,
      "learning_rate": 2.4817557612500484e-06,
      "loss": 3.1068,
      "step": 220963
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.106348514556885,
      "learning_rate": 2.4812307193398663e-06,
      "loss": 2.7687,
      "step": 220964
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.5977859497070312,
      "learning_rate": 2.480705732744037e-06,
      "loss": 3.1201,
      "step": 220965
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1364500522613525,
      "learning_rate": 2.480180801462628e-06,
      "loss": 3.1378,
      "step": 220966
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.394552230834961,
      "learning_rate": 2.4796559254957716e-06,
      "loss": 3.0444,
      "step": 220967
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.061147928237915,
      "learning_rate": 2.4791311048436015e-06,
      "loss": 2.856,
      "step": 220968
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9886698722839355,
      "learning_rate": 2.4786063395061506e-06,
      "loss": 3.066,
      "step": 220969
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1307554244995117,
      "learning_rate": 2.4780816294835527e-06,
      "loss": 2.8007,
      "step": 220970
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.5455875396728516,
      "learning_rate": 2.477556974775874e-06,
      "loss": 3.0918,
      "step": 220971
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.751401662826538,
      "learning_rate": 2.4770323753832477e-06,
      "loss": 2.8032,
      "step": 220972
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.058768272399902,
      "learning_rate": 2.476507831305774e-06,
      "loss": 2.8742,
      "step": 220973
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9259586334228516,
      "learning_rate": 2.4759833425435195e-06,
      "loss": 2.8222,
      "step": 220974
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7776896953582764,
      "learning_rate": 2.4754589090965835e-06,
      "loss": 2.8259,
      "step": 220975
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.04781174659729,
      "learning_rate": 2.474934530965067e-06,
      "loss": 2.983,
      "step": 220976
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0504610538482666,
      "learning_rate": 2.4744102081491025e-06,
      "loss": 2.9412,
      "step": 220977
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.5742735862731934,
      "learning_rate": 2.47388594064879e-06,
      "loss": 2.839,
      "step": 220978
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0990967750549316,
      "learning_rate": 2.4733617284641296e-06,
      "loss": 3.0736,
      "step": 220979
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.620837688446045,
      "learning_rate": 2.472837571595321e-06,
      "loss": 2.7985,
      "step": 220980
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9657139778137207,
      "learning_rate": 2.472313470042431e-06,
      "loss": 2.9584,
      "step": 220981
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0426058769226074,
      "learning_rate": 2.4717894238055593e-06,
      "loss": 2.9472,
      "step": 220982
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.488400459289551,
      "learning_rate": 2.471265432884806e-06,
      "loss": 2.9925,
      "step": 220983
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9733195304870605,
      "learning_rate": 2.4707414972802374e-06,
      "loss": 2.913,
      "step": 220984
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8213918209075928,
      "learning_rate": 2.4702176169919874e-06,
      "loss": 3.1473,
      "step": 220985
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.029316425323486,
      "learning_rate": 2.469693792020122e-06,
      "loss": 2.7382,
      "step": 220986
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.392862558364868,
      "learning_rate": 2.4691700223647746e-06,
      "loss": 2.9919,
      "step": 220987
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1822550296783447,
      "learning_rate": 2.468646308026012e-06,
      "loss": 2.8254,
      "step": 220988
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.4391491413116455,
      "learning_rate": 2.4681226490039675e-06,
      "loss": 2.6846,
      "step": 220989
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.925745964050293,
      "learning_rate": 2.467599045298707e-06,
      "loss": 2.8671,
      "step": 220990
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0863959789276123,
      "learning_rate": 2.467075496910298e-06,
      "loss": 3.0469,
      "step": 220991
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0933871269226074,
      "learning_rate": 2.46655200383894e-06,
      "loss": 2.8797,
      "step": 220992
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7244582176208496,
      "learning_rate": 2.466028566084599e-06,
      "loss": 2.6875,
      "step": 220993
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.731203079223633,
      "learning_rate": 2.465505183647476e-06,
      "loss": 2.8978,
      "step": 220994
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7706515789031982,
      "learning_rate": 2.464981856527637e-06,
      "loss": 3.0251,
      "step": 220995
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.006908655166626,
      "learning_rate": 2.4644585847251817e-06,
      "loss": 2.5472,
      "step": 220996
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.807861089706421,
      "learning_rate": 2.4639353682401773e-06,
      "loss": 2.9418,
      "step": 220997
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3223214149475098,
      "learning_rate": 2.463412207072757e-06,
      "loss": 2.7541,
      "step": 220998
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.772857904434204,
      "learning_rate": 2.462889101222987e-06,
      "loss": 2.7799,
      "step": 220999
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6894984245300293,
      "learning_rate": 2.4623660506910004e-06,
      "loss": 3.0789,
      "step": 221000
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2801005840301514,
      "learning_rate": 2.461843055476864e-06,
      "loss": 2.8747,
      "step": 221001
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2635233402252197,
      "learning_rate": 2.4613201155807117e-06,
      "loss": 3.0227,
      "step": 221002
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.837981939315796,
      "learning_rate": 2.4607972310026093e-06,
      "loss": 2.8074,
      "step": 221003
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.597856044769287,
      "learning_rate": 2.4602744017426566e-06,
      "loss": 3.0211,
      "step": 221004
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6826887130737305,
      "learning_rate": 2.459751627800921e-06,
      "loss": 2.6923,
      "step": 221005
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.944216728210449,
      "learning_rate": 2.4592289091775684e-06,
      "loss": 2.8157,
      "step": 221006
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.19492769241333,
      "learning_rate": 2.458706245872666e-06,
      "loss": 2.6899,
      "step": 221007
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.6528453826904297,
      "learning_rate": 2.4581836378862797e-06,
      "loss": 2.6633,
      "step": 221008
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.312119960784912,
      "learning_rate": 2.4576610852185764e-06,
      "loss": 2.7327,
      "step": 221009
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3339383602142334,
      "learning_rate": 2.4571385878695895e-06,
      "loss": 2.8068,
      "step": 221010
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.642122745513916,
      "learning_rate": 2.4566161458394185e-06,
      "loss": 3.0575,
      "step": 221011
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.660947322845459,
      "learning_rate": 2.456093759128197e-06,
      "loss": 2.8035,
      "step": 221012
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.928107976913452,
      "learning_rate": 2.4555714277359916e-06,
      "loss": 3.0026,
      "step": 221013
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3574321269989014,
      "learning_rate": 2.4550491516629357e-06,
      "loss": 3.0318,
      "step": 221014
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.805631160736084,
      "learning_rate": 2.4545269309090955e-06,
      "loss": 2.8886,
      "step": 221015
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1454014778137207,
      "learning_rate": 2.4540047654745375e-06,
      "loss": 2.9299,
      "step": 221016
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.355290174484253,
      "learning_rate": 2.453482655359429e-06,
      "loss": 2.6439,
      "step": 221017
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8518478870391846,
      "learning_rate": 2.452960600563836e-06,
      "loss": 2.8661,
      "step": 221018
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6832847595214844,
      "learning_rate": 2.4524386010878582e-06,
      "loss": 2.9382,
      "step": 221019
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.958527088165283,
      "learning_rate": 2.451916656931596e-06,
      "loss": 2.9691,
      "step": 221020
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0320932865142822,
      "learning_rate": 2.4513947680951163e-06,
      "loss": 3.0501,
      "step": 221021
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4597909450531006,
      "learning_rate": 2.4508729345785514e-06,
      "loss": 2.9722,
      "step": 221022
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.951968193054199,
      "learning_rate": 2.450351156381969e-06,
      "loss": 3.0239,
      "step": 221023
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1097655296325684,
      "learning_rate": 2.4498294335055014e-06,
      "loss": 3.0108,
      "step": 221024
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.5832624435424805,
      "learning_rate": 2.4493077659492157e-06,
      "loss": 2.8223,
      "step": 221025
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9829483032226562,
      "learning_rate": 2.4487861537132447e-06,
      "loss": 2.7708,
      "step": 221026
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.034219980239868,
      "learning_rate": 2.4482645967976554e-06,
      "loss": 3.0558,
      "step": 221027
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.142108678817749,
      "learning_rate": 2.4477430952025146e-06,
      "loss": 2.968,
      "step": 221028
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8187637329101562,
      "learning_rate": 2.4472216489279883e-06,
      "loss": 3.0422,
      "step": 221029
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.9859578609466553,
      "learning_rate": 2.44670025797411e-06,
      "loss": 2.8062,
      "step": 221030
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.5359902381896973,
      "learning_rate": 2.4461789223410468e-06,
      "loss": 2.7797,
      "step": 221031
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6109020709991455,
      "learning_rate": 2.445657642028831e-06,
      "loss": 2.8543,
      "step": 221032
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.66454815864563,
      "learning_rate": 2.4451364170375964e-06,
      "loss": 2.9642,
      "step": 221033
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.73758602142334,
      "learning_rate": 2.4446152473674096e-06,
      "loss": 2.8604,
      "step": 221034
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.181590557098389,
      "learning_rate": 2.4440941330184037e-06,
      "loss": 2.7073,
      "step": 221035
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.045931577682495,
      "learning_rate": 2.4435730739906455e-06,
      "loss": 2.9791,
      "step": 221036
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.944481611251831,
      "learning_rate": 2.443052070284235e-06,
      "loss": 2.9745,
      "step": 221037
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8745620250701904,
      "learning_rate": 2.442531121899305e-06,
      "loss": 3.0932,
      "step": 221038
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.058509349822998,
      "learning_rate": 2.4420102288359223e-06,
      "loss": 2.7282,
      "step": 221039
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.696533441543579,
      "learning_rate": 2.4414893910941536e-06,
      "loss": 2.8278,
      "step": 221040
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8000171184539795,
      "learning_rate": 2.4409686086741654e-06,
      "loss": 2.7653,
      "step": 221041
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.22581148147583,
      "learning_rate": 2.4404478815759908e-06,
      "loss": 2.9181,
      "step": 221042
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.979918956756592,
      "learning_rate": 2.439927209799797e-06,
      "loss": 2.9287,
      "step": 221043
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7817232608795166,
      "learning_rate": 2.439406593345583e-06,
      "loss": 2.919,
      "step": 221044
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.795839548110962,
      "learning_rate": 2.4388860322135495e-06,
      "loss": 2.8858,
      "step": 221045
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.695650339126587,
      "learning_rate": 2.4383655264036962e-06,
      "loss": 2.9974,
      "step": 221046
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0237090587615967,
      "learning_rate": 2.4378450759162227e-06,
      "loss": 2.7308,
      "step": 221047
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.6878137588500977,
      "learning_rate": 2.437324680751096e-06,
      "loss": 2.9232,
      "step": 221048
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.772777557373047,
      "learning_rate": 2.4368043409085493e-06,
      "loss": 3.0098,
      "step": 221049
    },
    {
      "epoch": 2.88,
      "grad_norm": 5.2464070320129395,
      "learning_rate": 2.4362840563886154e-06,
      "loss": 2.7275,
      "step": 221050
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9287357330322266,
      "learning_rate": 2.4357638271913615e-06,
      "loss": 3.0912,
      "step": 221051
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7862467765808105,
      "learning_rate": 2.4352436533169206e-06,
      "loss": 3.0432,
      "step": 221052
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.173943281173706,
      "learning_rate": 2.4347235347654258e-06,
      "loss": 2.8979,
      "step": 221053
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.7104718685150146,
      "learning_rate": 2.4342034715368773e-06,
      "loss": 2.9561,
      "step": 221054
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.892632246017456,
      "learning_rate": 2.433683463631475e-06,
      "loss": 3.0122,
      "step": 221055
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4332127571105957,
      "learning_rate": 2.433163511049252e-06,
      "loss": 2.8496,
      "step": 221056
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.267275333404541,
      "learning_rate": 2.4326436137903083e-06,
      "loss": 2.7801,
      "step": 221057
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1258416175842285,
      "learning_rate": 2.432123771854744e-06,
      "loss": 2.9225,
      "step": 221058
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0353477001190186,
      "learning_rate": 2.4316039852426916e-06,
      "loss": 2.9986,
      "step": 221059
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.744521379470825,
      "learning_rate": 2.4310842539541854e-06,
      "loss": 2.647,
      "step": 221060
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.873208999633789,
      "learning_rate": 2.430564577989391e-06,
      "loss": 3.0882,
      "step": 221061
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8567256927490234,
      "learning_rate": 2.430044957348376e-06,
      "loss": 2.4474,
      "step": 221062
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.5004963874816895,
      "learning_rate": 2.42952539203124e-06,
      "loss": 2.9078,
      "step": 221063
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7627804279327393,
      "learning_rate": 2.4290058820380156e-06,
      "loss": 3.0193,
      "step": 221064
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.6361939907073975,
      "learning_rate": 2.428486427368903e-06,
      "loss": 2.8386,
      "step": 221065
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4753353595733643,
      "learning_rate": 2.427967028023936e-06,
      "loss": 3.0169,
      "step": 221066
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.733623743057251,
      "learning_rate": 2.4274476840032475e-06,
      "loss": 2.875,
      "step": 221067
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.9956820011138916,
      "learning_rate": 2.426928395306871e-06,
      "loss": 2.8427,
      "step": 221068
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6327357292175293,
      "learning_rate": 2.4264091619350056e-06,
      "loss": 2.7032,
      "step": 221069
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1310677528381348,
      "learning_rate": 2.425889983887652e-06,
      "loss": 2.961,
      "step": 221070
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3117659091949463,
      "learning_rate": 2.4253708611649438e-06,
      "loss": 2.9009,
      "step": 221071
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.069891929626465,
      "learning_rate": 2.4248517937669797e-06,
      "loss": 2.9374,
      "step": 221072
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.154512405395508,
      "learning_rate": 2.4243327816938606e-06,
      "loss": 3.0047,
      "step": 221073
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.795081615447998,
      "learning_rate": 2.4238138249456527e-06,
      "loss": 2.7186,
      "step": 221074
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8428921699523926,
      "learning_rate": 2.42329492352249e-06,
      "loss": 3.0313,
      "step": 221075
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0375843048095703,
      "learning_rate": 2.422776077424471e-06,
      "loss": 2.8689,
      "step": 221076
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1211578845977783,
      "learning_rate": 2.422257286651663e-06,
      "loss": 2.8607,
      "step": 221077
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2321438789367676,
      "learning_rate": 2.421738551204133e-06,
      "loss": 3.0437,
      "step": 221078
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.034865379333496,
      "learning_rate": 2.421219871082081e-06,
      "loss": 2.9461,
      "step": 221079
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.742143154144287,
      "learning_rate": 2.4207012462855056e-06,
      "loss": 3.0982,
      "step": 221080
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.015502452850342,
      "learning_rate": 2.4201826768145416e-06,
      "loss": 3.1168,
      "step": 221081
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.461296319961548,
      "learning_rate": 2.4196641626692883e-06,
      "loss": 2.9561,
      "step": 221082
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3145763874053955,
      "learning_rate": 2.4191457038498452e-06,
      "loss": 2.918,
      "step": 221083
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.5494866371154785,
      "learning_rate": 2.4186273003562794e-06,
      "loss": 3.0939,
      "step": 221084
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7934558391571045,
      "learning_rate": 2.418108952188724e-06,
      "loss": 2.857,
      "step": 221085
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.160721302032471,
      "learning_rate": 2.417590659347246e-06,
      "loss": 2.9024,
      "step": 221086
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.5487098693847656,
      "learning_rate": 2.417072421831978e-06,
      "loss": 2.83,
      "step": 221087
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0308282375335693,
      "learning_rate": 2.416554239642987e-06,
      "loss": 2.8602,
      "step": 221088
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8320930004119873,
      "learning_rate": 2.416036112780373e-06,
      "loss": 2.8548,
      "step": 221089
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.058025598526001,
      "learning_rate": 2.4155180412442353e-06,
      "loss": 2.9747,
      "step": 221090
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7036502361297607,
      "learning_rate": 2.4150000250346747e-06,
      "loss": 3.0522,
      "step": 221091
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.5783026218414307,
      "learning_rate": 2.414482064151757e-06,
      "loss": 2.7301,
      "step": 221092
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6728262901306152,
      "learning_rate": 2.413964158595616e-06,
      "loss": 2.9806,
      "step": 221093
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.013486385345459,
      "learning_rate": 2.4134463083663514e-06,
      "loss": 3.0802,
      "step": 221094
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8557443618774414,
      "learning_rate": 2.412928513464063e-06,
      "loss": 3.1625,
      "step": 221095
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.40702486038208,
      "learning_rate": 2.4124107738887845e-06,
      "loss": 2.919,
      "step": 221096
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0380046367645264,
      "learning_rate": 2.4118930896406817e-06,
      "loss": 2.8591,
      "step": 221097
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.720214366912842,
      "learning_rate": 2.411375460719822e-06,
      "loss": 2.9749,
      "step": 221098
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9297025203704834,
      "learning_rate": 2.410857887126305e-06,
      "loss": 3.0299,
      "step": 221099
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0083818435668945,
      "learning_rate": 2.410340368860231e-06,
      "loss": 2.9415,
      "step": 221100
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9468801021575928,
      "learning_rate": 2.4098229059216656e-06,
      "loss": 2.9861,
      "step": 221101
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9294614791870117,
      "learning_rate": 2.409305498310743e-06,
      "loss": 2.8121,
      "step": 221102
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.346149206161499,
      "learning_rate": 2.4087881460275626e-06,
      "loss": 2.8456,
      "step": 221103
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.808627128601074,
      "learning_rate": 2.4082708490721913e-06,
      "loss": 2.8505,
      "step": 221104
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.5864076614379883,
      "learning_rate": 2.407753607444762e-06,
      "loss": 3.0551,
      "step": 221105
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.108067750930786,
      "learning_rate": 2.4072364211453423e-06,
      "loss": 2.8316,
      "step": 221106
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.196589946746826,
      "learning_rate": 2.406719290173997e-06,
      "loss": 2.6369,
      "step": 221107
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.897810697555542,
      "learning_rate": 2.4062022145308945e-06,
      "loss": 2.6842,
      "step": 221108
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8793346881866455,
      "learning_rate": 2.4056851942161005e-06,
      "loss": 3.1891,
      "step": 221109
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.221963882446289,
      "learning_rate": 2.4051682292296816e-06,
      "loss": 2.7083,
      "step": 221110
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0538721084594727,
      "learning_rate": 2.404651319571804e-06,
      "loss": 3.2449,
      "step": 221111
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.948822021484375,
      "learning_rate": 2.4041344652424687e-06,
      "loss": 2.9134,
      "step": 221112
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.136545181274414,
      "learning_rate": 2.4036176662418415e-06,
      "loss": 2.9049,
      "step": 221113
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0208024978637695,
      "learning_rate": 2.4031009225700228e-06,
      "loss": 3.0152,
      "step": 221114
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.646089792251587,
      "learning_rate": 2.4025842342270453e-06,
      "loss": 2.7661,
      "step": 221115
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.229567289352417,
      "learning_rate": 2.4020676012130757e-06,
      "loss": 3.0768,
      "step": 221116
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.787989854812622,
      "learning_rate": 2.401551023528181e-06,
      "loss": 2.9953,
      "step": 221117
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2533457279205322,
      "learning_rate": 2.4010345011724276e-06,
      "loss": 2.8861,
      "step": 221118
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2211358547210693,
      "learning_rate": 2.4005180341459486e-06,
      "loss": 2.9699,
      "step": 221119
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.598426580429077,
      "learning_rate": 2.4000016224488437e-06,
      "loss": 2.9513,
      "step": 221120
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8426027297973633,
      "learning_rate": 2.3994852660811803e-06,
      "loss": 2.5658,
      "step": 221121
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4066624641418457,
      "learning_rate": 2.398968965043091e-06,
      "loss": 3.0968,
      "step": 221122
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3484103679656982,
      "learning_rate": 2.3984527193346425e-06,
      "loss": 2.8484,
      "step": 221123
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4538049697875977,
      "learning_rate": 2.397936528955935e-06,
      "loss": 3.042,
      "step": 221124
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.5451903343200684,
      "learning_rate": 2.397420393907068e-06,
      "loss": 2.8972,
      "step": 221125
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6048872470855713,
      "learning_rate": 2.3969043141881417e-06,
      "loss": 2.5568,
      "step": 221126
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.131357431411743,
      "learning_rate": 2.396388289799256e-06,
      "loss": 2.9688,
      "step": 221127
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1887166500091553,
      "learning_rate": 2.3958723207404772e-06,
      "loss": 2.6761,
      "step": 221128
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.737480401992798,
      "learning_rate": 2.3953564070119393e-06,
      "loss": 2.9017,
      "step": 221129
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.660877227783203,
      "learning_rate": 2.394840548613741e-06,
      "loss": 2.8597,
      "step": 221130
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.7858498096466064,
      "learning_rate": 2.394324745545917e-06,
      "loss": 2.94,
      "step": 221131
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.585155963897705,
      "learning_rate": 2.393808997808633e-06,
      "loss": 2.7198,
      "step": 221132
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3234152793884277,
      "learning_rate": 2.3932933054019554e-06,
      "loss": 3.0417,
      "step": 221133
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.83799147605896,
      "learning_rate": 2.3927776683259845e-06,
      "loss": 3.0814,
      "step": 221134
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.774989366531372,
      "learning_rate": 2.3922620865807875e-06,
      "loss": 2.9212,
      "step": 221135
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.11447811126709,
      "learning_rate": 2.39174656016653e-06,
      "loss": 2.9973,
      "step": 221136
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4375367164611816,
      "learning_rate": 2.3912310890832452e-06,
      "loss": 2.9028,
      "step": 221137
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.056339740753174,
      "learning_rate": 2.390715673331034e-06,
      "loss": 2.8671,
      "step": 221138
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9171440601348877,
      "learning_rate": 2.3902003129100288e-06,
      "loss": 2.9914,
      "step": 221139
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7867441177368164,
      "learning_rate": 2.389685007820297e-06,
      "loss": 2.6392,
      "step": 221140
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.5989251136779785,
      "learning_rate": 2.389169758061904e-06,
      "loss": 2.7901,
      "step": 221141
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.340237855911255,
      "learning_rate": 2.388654563635051e-06,
      "loss": 3.0512,
      "step": 221142
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.006171941757202,
      "learning_rate": 2.3881394245397035e-06,
      "loss": 2.9339,
      "step": 221143
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3212766647338867,
      "learning_rate": 2.387624340776062e-06,
      "loss": 3.111,
      "step": 221144
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0993871688842773,
      "learning_rate": 2.38710931234416e-06,
      "loss": 2.9594,
      "step": 221145
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8491015434265137,
      "learning_rate": 2.3865943392440967e-06,
      "loss": 3.0763,
      "step": 221146
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.5111658573150635,
      "learning_rate": 2.386079421476006e-06,
      "loss": 2.8601,
      "step": 221147
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.5633325576782227,
      "learning_rate": 2.385564559039954e-06,
      "loss": 3.0606,
      "step": 221148
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6699676513671875,
      "learning_rate": 2.385049751936041e-06,
      "loss": 2.9443,
      "step": 221149
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.5565154552459717,
      "learning_rate": 2.384535000164367e-06,
      "loss": 2.8649,
      "step": 221150
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.397280693054199,
      "learning_rate": 2.3840203037250314e-06,
      "loss": 2.9493,
      "step": 221151
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7149198055267334,
      "learning_rate": 2.3835056626181347e-06,
      "loss": 2.9286,
      "step": 221152
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7212953567504883,
      "learning_rate": 2.3829910768437097e-06,
      "loss": 2.7759,
      "step": 221153
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0662143230438232,
      "learning_rate": 2.3824765464019903e-06,
      "loss": 3.0414,
      "step": 221154
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0130529403686523,
      "learning_rate": 2.381962071292909e-06,
      "loss": 2.8582,
      "step": 221155
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6883022785186768,
      "learning_rate": 2.381447651516666e-06,
      "loss": 3.1723,
      "step": 221156
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.5745692253112793,
      "learning_rate": 2.3809332870733277e-06,
      "loss": 3.1462,
      "step": 221157
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.373638391494751,
      "learning_rate": 2.3804189779629945e-06,
      "loss": 2.9288,
      "step": 221158
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2544140815734863,
      "learning_rate": 2.3799047241857662e-06,
      "loss": 2.9494,
      "step": 221159
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.153529167175293,
      "learning_rate": 2.3793905257417424e-06,
      "loss": 2.949,
      "step": 221160
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0508694648742676,
      "learning_rate": 2.37887638263099e-06,
      "loss": 2.744,
      "step": 221161
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7514984607696533,
      "learning_rate": 2.3783622948536417e-06,
      "loss": 2.843,
      "step": 221162
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.442592144012451,
      "learning_rate": 2.3778482624097317e-06,
      "loss": 2.7519,
      "step": 221163
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.69447922706604,
      "learning_rate": 2.377334285299426e-06,
      "loss": 2.9993,
      "step": 221164
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.024509906768799,
      "learning_rate": 2.376820363522791e-06,
      "loss": 3.011,
      "step": 221165
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9557652473449707,
      "learning_rate": 2.376306497079927e-06,
      "loss": 2.8672,
      "step": 221166
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.038363456726074,
      "learning_rate": 2.375792685970934e-06,
      "loss": 2.8859,
      "step": 221167
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6926369667053223,
      "learning_rate": 2.3752789301958784e-06,
      "loss": 2.9033,
      "step": 221168
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4808526039123535,
      "learning_rate": 2.37476522975486e-06,
      "loss": 2.8543,
      "step": 221169
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3490498065948486,
      "learning_rate": 2.374251584648046e-06,
      "loss": 2.6882,
      "step": 221170
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2921266555786133,
      "learning_rate": 2.373737994875402e-06,
      "loss": 3.0169,
      "step": 221171
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.6915950775146484,
      "learning_rate": 2.3732244604371954e-06,
      "loss": 2.5662,
      "step": 221172
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6966969966888428,
      "learning_rate": 2.372710981333359e-06,
      "loss": 3.0241,
      "step": 221173
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0173332691192627,
      "learning_rate": 2.3721975575640594e-06,
      "loss": 3.1259,
      "step": 221174
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.851545572280884,
      "learning_rate": 2.3716841891293638e-06,
      "loss": 2.9218,
      "step": 221175
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.695465087890625,
      "learning_rate": 2.371170876029438e-06,
      "loss": 2.8517,
      "step": 221176
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.01462459564209,
      "learning_rate": 2.3706576182643156e-06,
      "loss": 2.7505,
      "step": 221177
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.438292980194092,
      "learning_rate": 2.3701444158340965e-06,
      "loss": 2.8202,
      "step": 221178
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0556488037109375,
      "learning_rate": 2.369631268738881e-06,
      "loss": 3.0084,
      "step": 221179
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.119633197784424,
      "learning_rate": 2.3691181769788014e-06,
      "loss": 3.0311,
      "step": 221180
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8873071670532227,
      "learning_rate": 2.368605140553892e-06,
      "loss": 2.871,
      "step": 221181
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.837808609008789,
      "learning_rate": 2.3680921594642853e-06,
      "loss": 2.9165,
      "step": 221182
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.871508836746216,
      "learning_rate": 2.3675792337100486e-06,
      "loss": 2.8739,
      "step": 221183
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2703466415405273,
      "learning_rate": 2.3670663632913143e-06,
      "loss": 2.9308,
      "step": 221184
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1038620471954346,
      "learning_rate": 2.3665535482081833e-06,
      "loss": 3.0572,
      "step": 221185
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.827613353729248,
      "learning_rate": 2.366040788460688e-06,
      "loss": 3.1414,
      "step": 221186
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9479143619537354,
      "learning_rate": 2.365528084048995e-06,
      "loss": 2.9305,
      "step": 221187
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.127653121948242,
      "learning_rate": 2.365015434973139e-06,
      "loss": 3.2465,
      "step": 221188
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.218252420425415,
      "learning_rate": 2.3645028412332513e-06,
      "loss": 2.9683,
      "step": 221189
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7604246139526367,
      "learning_rate": 2.3639903028294328e-06,
      "loss": 2.7324,
      "step": 221190
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.97273588180542,
      "learning_rate": 2.3634778197617833e-06,
      "loss": 2.8231,
      "step": 221191
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6475560665130615,
      "learning_rate": 2.3629653920303694e-06,
      "loss": 2.9274,
      "step": 221192
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4957990646362305,
      "learning_rate": 2.362453019635291e-06,
      "loss": 3.0328,
      "step": 221193
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.729283332824707,
      "learning_rate": 2.361940702576648e-06,
      "loss": 2.7069,
      "step": 221194
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9750468730926514,
      "learning_rate": 2.3614284408545404e-06,
      "loss": 2.9329,
      "step": 221195
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.38694167137146,
      "learning_rate": 2.360916234469068e-06,
      "loss": 3.0455,
      "step": 221196
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6450257301330566,
      "learning_rate": 2.3604040834203308e-06,
      "loss": 2.9661,
      "step": 221197
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.5553536415100098,
      "learning_rate": 2.359891987708362e-06,
      "loss": 3.0492,
      "step": 221198
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.5550148487091064,
      "learning_rate": 2.3593799473333617e-06,
      "loss": 2.9598,
      "step": 221199
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.330262660980225,
      "learning_rate": 2.35886796229533e-06,
      "loss": 2.8592,
      "step": 221200
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.7032663822174072,
      "learning_rate": 2.358356032594433e-06,
      "loss": 2.9491,
      "step": 221201
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0345640182495117,
      "learning_rate": 2.3578441582307373e-06,
      "loss": 2.6193,
      "step": 221202
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.975391149520874,
      "learning_rate": 2.357332339204343e-06,
      "loss": 2.9343,
      "step": 221203
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8863165378570557,
      "learning_rate": 2.356820575515317e-06,
      "loss": 2.9901,
      "step": 221204
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2781243324279785,
      "learning_rate": 2.356308867163792e-06,
      "loss": 3.1201,
      "step": 221205
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.7220747470855713,
      "learning_rate": 2.355797214149835e-06,
      "loss": 2.821,
      "step": 221206
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3139359951019287,
      "learning_rate": 2.3552856164735457e-06,
      "loss": 2.9839,
      "step": 221207
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.622452974319458,
      "learning_rate": 2.3547740741350574e-06,
      "loss": 3.1116,
      "step": 221208
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.667628765106201,
      "learning_rate": 2.354262587134437e-06,
      "loss": 3.0386,
      "step": 221209
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.791114091873169,
      "learning_rate": 2.353751155471717e-06,
      "loss": 2.9697,
      "step": 221210
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.897281169891357,
      "learning_rate": 2.3532397791471315e-06,
      "loss": 2.9841,
      "step": 221211
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.31345272064209,
      "learning_rate": 2.3527284581606466e-06,
      "loss": 2.7192,
      "step": 221212
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.9056320190429688,
      "learning_rate": 2.352217192512429e-06,
      "loss": 3.1369,
      "step": 221213
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7808992862701416,
      "learning_rate": 2.3517059822025118e-06,
      "loss": 2.9817,
      "step": 221214
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3537349700927734,
      "learning_rate": 2.3511948272310956e-06,
      "loss": 2.8011,
      "step": 221215
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.600687265396118,
      "learning_rate": 2.3506837275981795e-06,
      "loss": 2.9965,
      "step": 221216
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.543184757232666,
      "learning_rate": 2.350172683303897e-06,
      "loss": 3.0194,
      "step": 221217
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2424416542053223,
      "learning_rate": 2.3496616943483147e-06,
      "loss": 3.0734,
      "step": 221218
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.810260057449341,
      "learning_rate": 2.3491507607315663e-06,
      "loss": 2.9963,
      "step": 221219
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0369620323181152,
      "learning_rate": 2.3486398824537177e-06,
      "loss": 2.9344,
      "step": 221220
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2638535499572754,
      "learning_rate": 2.3481290595149025e-06,
      "loss": 2.782,
      "step": 221221
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.733208179473877,
      "learning_rate": 2.3476182919151545e-06,
      "loss": 2.8638,
      "step": 221222
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.754561185836792,
      "learning_rate": 2.347107579654639e-06,
      "loss": 2.8215,
      "step": 221223
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1603856086730957,
      "learning_rate": 2.34659692273339e-06,
      "loss": 2.9963,
      "step": 221224
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8671371936798096,
      "learning_rate": 2.346086321151508e-06,
      "loss": 3.15,
      "step": 221225
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6258163452148438,
      "learning_rate": 2.345575774909125e-06,
      "loss": 2.7424,
      "step": 221226
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8671867847442627,
      "learning_rate": 2.3450652840063422e-06,
      "loss": 2.8463,
      "step": 221227
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.3975021839141846,
      "learning_rate": 2.3445548484432253e-06,
      "loss": 2.9614,
      "step": 221228
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.712710380554199,
      "learning_rate": 2.3440444682198744e-06,
      "loss": 2.9751,
      "step": 221229
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.902158737182617,
      "learning_rate": 2.3435341433363563e-06,
      "loss": 2.7973,
      "step": 221230
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.7168469429016113,
      "learning_rate": 2.343023873792804e-06,
      "loss": 2.992,
      "step": 221231
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7238028049468994,
      "learning_rate": 2.3425136595892848e-06,
      "loss": 2.9598,
      "step": 221232
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.78074312210083,
      "learning_rate": 2.342003500725964e-06,
      "loss": 3.0368,
      "step": 221233
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.561162233352661,
      "learning_rate": 2.341493397202843e-06,
      "loss": 2.9435,
      "step": 221234
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.512953042984009,
      "learning_rate": 2.3409833490200867e-06,
      "loss": 2.7543,
      "step": 221235
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.6695547103881836,
      "learning_rate": 2.34047335617773e-06,
      "loss": 2.8792,
      "step": 221236
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9541633129119873,
      "learning_rate": 2.3399634186759053e-06,
      "loss": 2.9831,
      "step": 221237
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.112760543823242,
      "learning_rate": 2.3394535365147125e-06,
      "loss": 3.3457,
      "step": 221238
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.751431941986084,
      "learning_rate": 2.3389437096942187e-06,
      "loss": 2.953,
      "step": 221239
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.063415765762329,
      "learning_rate": 2.338433938214557e-06,
      "loss": 2.7607,
      "step": 221240
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0254359245300293,
      "learning_rate": 2.33792422207576e-06,
      "loss": 2.9143,
      "step": 221241
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3699185848236084,
      "learning_rate": 2.337414561277995e-06,
      "loss": 3.0152,
      "step": 221242
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.020024299621582,
      "learning_rate": 2.3369049558213284e-06,
      "loss": 2.9159,
      "step": 221243
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.332953929901123,
      "learning_rate": 2.336395405705793e-06,
      "loss": 2.8678,
      "step": 221244
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.817545175552368,
      "learning_rate": 2.335885910931623e-06,
      "loss": 2.9356,
      "step": 221245
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7018394470214844,
      "learning_rate": 2.3353764714987842e-06,
      "loss": 2.82,
      "step": 221246
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.6202924251556396,
      "learning_rate": 2.3348670874074106e-06,
      "loss": 3.0751,
      "step": 221247
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.190809488296509,
      "learning_rate": 2.3343577586576346e-06,
      "loss": 3.1084,
      "step": 221248
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8776350021362305,
      "learning_rate": 2.3338484852494898e-06,
      "loss": 2.8342,
      "step": 221249
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.757087230682373,
      "learning_rate": 2.33333926718311e-06,
      "loss": 3.0158,
      "step": 221250
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.978463888168335,
      "learning_rate": 2.332830104458594e-06,
      "loss": 2.9142,
      "step": 221251
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.6449134349823,
      "learning_rate": 2.3323209970760093e-06,
      "loss": 2.9799,
      "step": 221252
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.5465290546417236,
      "learning_rate": 2.331811945035489e-06,
      "loss": 2.957,
      "step": 221253
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.957101345062256,
      "learning_rate": 2.331302948337066e-06,
      "loss": 2.9112,
      "step": 221254
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0076844692230225,
      "learning_rate": 2.330794006980907e-06,
      "loss": 3.0286,
      "step": 221255
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.633551597595215,
      "learning_rate": 2.330285120967046e-06,
      "loss": 2.6826,
      "step": 221256
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.01043963432312,
      "learning_rate": 2.3297762902956487e-06,
      "loss": 2.7476,
      "step": 221257
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8651161193847656,
      "learning_rate": 2.329267514966715e-06,
      "loss": 2.9561,
      "step": 221258
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8290281295776367,
      "learning_rate": 2.328758794980412e-06,
      "loss": 2.8884,
      "step": 221259
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.433821678161621,
      "learning_rate": 2.328250130336806e-06,
      "loss": 2.7154,
      "step": 221260
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.776076555252075,
      "learning_rate": 2.327741521035997e-06,
      "loss": 2.8757,
      "step": 221261
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1094603538513184,
      "learning_rate": 2.327232967078052e-06,
      "loss": 2.8644,
      "step": 221262
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1342849731445312,
      "learning_rate": 2.3267244684631368e-06,
      "loss": 2.9281,
      "step": 221263
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1481895446777344,
      "learning_rate": 2.3262160251912853e-06,
      "loss": 2.9827,
      "step": 221264
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2853167057037354,
      "learning_rate": 2.3257076372626303e-06,
      "loss": 3.123,
      "step": 221265
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.829998016357422,
      "learning_rate": 2.3251993046772056e-06,
      "loss": 2.9927,
      "step": 221266
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.6389076709747314,
      "learning_rate": 2.3246910274351773e-06,
      "loss": 2.744,
      "step": 221267
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6767497062683105,
      "learning_rate": 2.3241828055365783e-06,
      "loss": 2.8031,
      "step": 221268
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.063645362854004,
      "learning_rate": 2.3236746389815765e-06,
      "loss": 2.986,
      "step": 221269
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.406517744064331,
      "learning_rate": 2.323166527770204e-06,
      "loss": 3.0388,
      "step": 221270
    },
    {
      "epoch": 2.88,
      "grad_norm": 5.261387825012207,
      "learning_rate": 2.322658471902561e-06,
      "loss": 2.9546,
      "step": 221271
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8988609313964844,
      "learning_rate": 2.3221504713787474e-06,
      "loss": 2.7883,
      "step": 221272
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.573049306869507,
      "learning_rate": 2.3216425261988967e-06,
      "loss": 2.8964,
      "step": 221273
    },
    {
      "epoch": 2.88,
      "grad_norm": 5.071329593658447,
      "learning_rate": 2.3211346363630425e-06,
      "loss": 2.8847,
      "step": 221274
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.829061985015869,
      "learning_rate": 2.320626801871317e-06,
      "loss": 3.1813,
      "step": 221275
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9541609287261963,
      "learning_rate": 2.3201190227238208e-06,
      "loss": 3.0528,
      "step": 221276
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.220628261566162,
      "learning_rate": 2.3196112989206205e-06,
      "loss": 3.1997,
      "step": 221277
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2521650791168213,
      "learning_rate": 2.319103630461816e-06,
      "loss": 3.1615,
      "step": 221278
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6170272827148438,
      "learning_rate": 2.3185960173475406e-06,
      "loss": 2.7425,
      "step": 221279
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.5108323097229004,
      "learning_rate": 2.318088459577827e-06,
      "loss": 2.8676,
      "step": 221280
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.582301378250122,
      "learning_rate": 2.3175809571528093e-06,
      "loss": 3.0145,
      "step": 221281
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.017848491668701,
      "learning_rate": 2.317073510072587e-06,
      "loss": 3.037,
      "step": 221282
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7723228931427,
      "learning_rate": 2.316566118337193e-06,
      "loss": 2.9137,
      "step": 221283
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.275006055831909,
      "learning_rate": 2.316058781946828e-06,
      "loss": 3.0082,
      "step": 221284
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.7140390872955322,
      "learning_rate": 2.3155515009014914e-06,
      "loss": 3.0748,
      "step": 221285
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7162113189697266,
      "learning_rate": 2.3150442752013165e-06,
      "loss": 2.8238,
      "step": 221286
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.402778387069702,
      "learning_rate": 2.3145371048464036e-06,
      "loss": 3.2157,
      "step": 221287
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0833232402801514,
      "learning_rate": 2.3140299898368186e-06,
      "loss": 2.8474,
      "step": 221288
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0410943031311035,
      "learning_rate": 2.3135229301726955e-06,
      "loss": 2.8297,
      "step": 221289
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8975813388824463,
      "learning_rate": 2.3130159258541004e-06,
      "loss": 2.9045,
      "step": 221290
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.320603370666504,
      "learning_rate": 2.3125089768811335e-06,
      "loss": 2.9266,
      "step": 221291
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0098915100097656,
      "learning_rate": 2.3120020832538944e-06,
      "loss": 2.823,
      "step": 221292
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.932422161102295,
      "learning_rate": 2.311495244972483e-06,
      "loss": 3.0338,
      "step": 221293
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.860952138900757,
      "learning_rate": 2.310988462036967e-06,
      "loss": 3.0342,
      "step": 221294
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1545000076293945,
      "learning_rate": 2.3104817344474445e-06,
      "loss": 2.5865,
      "step": 221295
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.727781295776367,
      "learning_rate": 2.30997506220405e-06,
      "loss": 2.9092,
      "step": 221296
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3259012699127197,
      "learning_rate": 2.3094684453068502e-06,
      "loss": 3.0716,
      "step": 221297
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.776353359222412,
      "learning_rate": 2.308961883755911e-06,
      "loss": 2.7788,
      "step": 221298
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6059911251068115,
      "learning_rate": 2.3084553775513657e-06,
      "loss": 3.0714,
      "step": 221299
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.707212448120117,
      "learning_rate": 2.3079489266933483e-06,
      "loss": 2.8918,
      "step": 221300
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7185070514678955,
      "learning_rate": 2.307442531181858e-06,
      "loss": 3.1676,
      "step": 221301
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.014821767807007,
      "learning_rate": 2.306936191017028e-06,
      "loss": 2.767,
      "step": 221302
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.6664462089538574,
      "learning_rate": 2.3064299061989586e-06,
      "loss": 2.9377,
      "step": 221303
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4566214084625244,
      "learning_rate": 2.30592367672775e-06,
      "loss": 2.924,
      "step": 221304
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2014851570129395,
      "learning_rate": 2.3054175026035014e-06,
      "loss": 2.8959,
      "step": 221305
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1323554515838623,
      "learning_rate": 2.30491138382628e-06,
      "loss": 2.707,
      "step": 221306
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.681722402572632,
      "learning_rate": 2.3044053203961854e-06,
      "loss": 2.85,
      "step": 221307
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8494980335235596,
      "learning_rate": 2.303899312313351e-06,
      "loss": 2.9398,
      "step": 221308
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2969818115234375,
      "learning_rate": 2.30339335957781e-06,
      "loss": 2.9769,
      "step": 221309
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9831771850585938,
      "learning_rate": 2.3028874621897287e-06,
      "loss": 2.8007,
      "step": 221310
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8502936363220215,
      "learning_rate": 2.3023816201491074e-06,
      "loss": 2.9688,
      "step": 221311
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.903303623199463,
      "learning_rate": 2.301875833456146e-06,
      "loss": 2.869,
      "step": 221312
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.94065523147583,
      "learning_rate": 2.3013701021108443e-06,
      "loss": 3.0177,
      "step": 221313
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6740329265594482,
      "learning_rate": 2.300864426113369e-06,
      "loss": 2.9125,
      "step": 221314
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8617186546325684,
      "learning_rate": 2.3003588054637535e-06,
      "loss": 3.02,
      "step": 221315
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0326802730560303,
      "learning_rate": 2.2998532401621306e-06,
      "loss": 2.7627,
      "step": 221316
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0543508529663086,
      "learning_rate": 2.2993477302086004e-06,
      "loss": 2.829,
      "step": 221317
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0573787689208984,
      "learning_rate": 2.29884227560323e-06,
      "loss": 2.7795,
      "step": 221318
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1405444145202637,
      "learning_rate": 2.2983368763461184e-06,
      "loss": 2.8476,
      "step": 221319
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0940587520599365,
      "learning_rate": 2.2978315324373666e-06,
      "loss": 2.7373,
      "step": 221320
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.6024603843688965,
      "learning_rate": 2.2973262438770734e-06,
      "loss": 2.8801,
      "step": 221321
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3781471252441406,
      "learning_rate": 2.2968210106653397e-06,
      "loss": 2.9685,
      "step": 221322
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.772315740585327,
      "learning_rate": 2.296315832802198e-06,
      "loss": 2.9158,
      "step": 221323
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9305038452148438,
      "learning_rate": 2.2958107102878487e-06,
      "loss": 2.8093,
      "step": 221324
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8619112968444824,
      "learning_rate": 2.2953056431222918e-06,
      "loss": 2.9058,
      "step": 221325
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.757427453994751,
      "learning_rate": 2.2948006313056932e-06,
      "loss": 2.9053,
      "step": 221326
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.411013603210449,
      "learning_rate": 2.2942956748380535e-06,
      "loss": 3.2232,
      "step": 221327
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7764413356781006,
      "learning_rate": 2.2937907737195725e-06,
      "loss": 2.5833,
      "step": 221328
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.276142120361328,
      "learning_rate": 2.2932859279502503e-06,
      "loss": 3.1153,
      "step": 221329
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.344550609588623,
      "learning_rate": 2.292781137530286e-06,
      "loss": 2.9382,
      "step": 221330
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.463742971420288,
      "learning_rate": 2.2922764024596808e-06,
      "loss": 2.9353,
      "step": 221331
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1892049312591553,
      "learning_rate": 2.291771722738567e-06,
      "loss": 2.7974,
      "step": 221332
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7577109336853027,
      "learning_rate": 2.291267098367011e-06,
      "loss": 2.956,
      "step": 221333
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8690178394317627,
      "learning_rate": 2.290762529345147e-06,
      "loss": 2.933,
      "step": 221334
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.971060037612915,
      "learning_rate": 2.290258015673041e-06,
      "loss": 2.9405,
      "step": 221335
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1577775478363037,
      "learning_rate": 2.2897535573508263e-06,
      "loss": 3.0696,
      "step": 221336
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.024083137512207,
      "learning_rate": 2.2892491543785363e-06,
      "loss": 2.5796,
      "step": 221337
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1820220947265625,
      "learning_rate": 2.288744806756304e-06,
      "loss": 2.8252,
      "step": 221338
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7251856327056885,
      "learning_rate": 2.2882405144841964e-06,
      "loss": 2.9049,
      "step": 221339
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.5520427227020264,
      "learning_rate": 2.2877362775623464e-06,
      "loss": 2.9973,
      "step": 221340
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0669026374816895,
      "learning_rate": 2.287232095990821e-06,
      "loss": 2.9184,
      "step": 221341
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7104129791259766,
      "learning_rate": 2.2867279697697194e-06,
      "loss": 3.0644,
      "step": 221342
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0382087230682373,
      "learning_rate": 2.2862238988991423e-06,
      "loss": 2.8074,
      "step": 221343
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.313844680786133,
      "learning_rate": 2.285719883379156e-06,
      "loss": 2.9158,
      "step": 221344
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9089512825012207,
      "learning_rate": 2.28521592320986e-06,
      "loss": 2.7839,
      "step": 221345
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8431146144866943,
      "learning_rate": 2.284712018391388e-06,
      "loss": 2.7841,
      "step": 221346
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.8258743286132812,
      "learning_rate": 2.284208168923807e-06,
      "loss": 2.7762,
      "step": 221347
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1806764602661133,
      "learning_rate": 2.283704374807216e-06,
      "loss": 2.881,
      "step": 221348
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.515570640563965,
      "learning_rate": 2.2832006360417153e-06,
      "loss": 2.8406,
      "step": 221349
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9967613220214844,
      "learning_rate": 2.282696952627372e-06,
      "loss": 2.9766,
      "step": 221350
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.904364585876465,
      "learning_rate": 2.2821933245642856e-06,
      "loss": 2.8252,
      "step": 221351
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.395843505859375,
      "learning_rate": 2.2816897518525555e-06,
      "loss": 2.8184,
      "step": 221352
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.129971981048584,
      "learning_rate": 2.2811862344922825e-06,
      "loss": 3.0179,
      "step": 221353
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.772671937942505,
      "learning_rate": 2.2806827724835662e-06,
      "loss": 2.7364,
      "step": 221354
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1904900074005127,
      "learning_rate": 2.2801793658265066e-06,
      "loss": 2.7991,
      "step": 221355
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.015916347503662,
      "learning_rate": 2.27967601452117e-06,
      "loss": 3.0675,
      "step": 221356
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7172040939331055,
      "learning_rate": 2.2791727185676566e-06,
      "loss": 2.9009,
      "step": 221357
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4491326808929443,
      "learning_rate": 2.2786694779660665e-06,
      "loss": 2.7903,
      "step": 221358
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.580972909927368,
      "learning_rate": 2.2781662927164655e-06,
      "loss": 2.8474,
      "step": 221359
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.7068097591400146,
      "learning_rate": 2.2776631628190214e-06,
      "loss": 2.8475,
      "step": 221360
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9355905055999756,
      "learning_rate": 2.277160088273766e-06,
      "loss": 2.8891,
      "step": 221361
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8799359798431396,
      "learning_rate": 2.2766570690807673e-06,
      "loss": 2.8588,
      "step": 221362
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.5348615646362305,
      "learning_rate": 2.276154105240191e-06,
      "loss": 3.088,
      "step": 221363
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6792991161346436,
      "learning_rate": 2.275651196752104e-06,
      "loss": 2.7649,
      "step": 221364
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.705780506134033,
      "learning_rate": 2.275148343616573e-06,
      "loss": 3.0023,
      "step": 221365
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.222486972808838,
      "learning_rate": 2.274645545833731e-06,
      "loss": 2.7242,
      "step": 221366
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.112910270690918,
      "learning_rate": 2.2741428034036446e-06,
      "loss": 2.9239,
      "step": 221367
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2703189849853516,
      "learning_rate": 2.273640116326414e-06,
      "loss": 2.8638,
      "step": 221368
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.438166618347168,
      "learning_rate": 2.2731374846021387e-06,
      "loss": 2.6888,
      "step": 221369
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.868095636367798,
      "learning_rate": 2.272634908230886e-06,
      "loss": 2.8669,
      "step": 221370
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.212306499481201,
      "learning_rate": 2.2721323872127883e-06,
      "loss": 3.2202,
      "step": 221371
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.170713424682617,
      "learning_rate": 2.271629921547913e-06,
      "loss": 2.8277,
      "step": 221372
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.913393259048462,
      "learning_rate": 2.2711275112363926e-06,
      "loss": 2.8653,
      "step": 221373
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4914441108703613,
      "learning_rate": 2.2706251562782606e-06,
      "loss": 3.1635,
      "step": 221374
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7538323402404785,
      "learning_rate": 2.270122856673651e-06,
      "loss": 2.7845,
      "step": 221375
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6594624519348145,
      "learning_rate": 2.269620612422629e-06,
      "loss": 2.9964,
      "step": 221376
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9596784114837646,
      "learning_rate": 2.2691184235252956e-06,
      "loss": 2.8825,
      "step": 221377
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.908848524093628,
      "learning_rate": 2.268616289981784e-06,
      "loss": 2.9167,
      "step": 221378
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1010963916778564,
      "learning_rate": 2.2681142117921604e-06,
      "loss": 3.0157,
      "step": 221379
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1782753467559814,
      "learning_rate": 2.2676121889564915e-06,
      "loss": 3.0367,
      "step": 221380
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7313220500946045,
      "learning_rate": 2.26711022147491e-06,
      "loss": 2.9495,
      "step": 221381
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6580514907836914,
      "learning_rate": 2.266608309347451e-06,
      "loss": 2.7924,
      "step": 221382
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.171818971633911,
      "learning_rate": 2.2661064525743124e-06,
      "loss": 2.8481,
      "step": 221383
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4932913780212402,
      "learning_rate": 2.2656046511554615e-06,
      "loss": 2.8799,
      "step": 221384
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7562694549560547,
      "learning_rate": 2.265102905091132e-06,
      "loss": 2.9852,
      "step": 221385
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1257879734039307,
      "learning_rate": 2.2646012143812896e-06,
      "loss": 2.917,
      "step": 221386
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3253955841064453,
      "learning_rate": 2.2640995790260686e-06,
      "loss": 3.0168,
      "step": 221387
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.120314598083496,
      "learning_rate": 2.263597999025568e-06,
      "loss": 2.8686,
      "step": 221388
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7870774269104004,
      "learning_rate": 2.263096474379922e-06,
      "loss": 2.9808,
      "step": 221389
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6954684257507324,
      "learning_rate": 2.2625950050891626e-06,
      "loss": 2.6912,
      "step": 221390
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2361183166503906,
      "learning_rate": 2.2620935911534244e-06,
      "loss": 2.81,
      "step": 221391
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3910138607025146,
      "learning_rate": 2.2615922325727733e-06,
      "loss": 3.0155,
      "step": 221392
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9782495498657227,
      "learning_rate": 2.2610909293473088e-06,
      "loss": 2.549,
      "step": 221393
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.550135374069214,
      "learning_rate": 2.2605896814771317e-06,
      "loss": 2.7061,
      "step": 221394
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0934011936187744,
      "learning_rate": 2.2600884889623416e-06,
      "loss": 3.0962,
      "step": 221395
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2445714473724365,
      "learning_rate": 2.259587351803005e-06,
      "loss": 2.8792,
      "step": 221396
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.914748430252075,
      "learning_rate": 2.2590862699992216e-06,
      "loss": 2.9339,
      "step": 221397
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.867837905883789,
      "learning_rate": 2.258585243551125e-06,
      "loss": 2.9746,
      "step": 221398
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8455097675323486,
      "learning_rate": 2.258084272458749e-06,
      "loss": 3.0092,
      "step": 221399
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0973713397979736,
      "learning_rate": 2.2575833567222256e-06,
      "loss": 2.8917,
      "step": 221400
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9291954040527344,
      "learning_rate": 2.257082496341656e-06,
      "loss": 2.9876,
      "step": 221401
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.956181526184082,
      "learning_rate": 2.256581691317072e-06,
      "loss": 2.8116,
      "step": 221402
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0006210803985596,
      "learning_rate": 2.2560809416486414e-06,
      "loss": 2.8954,
      "step": 221403
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.6767830848693848,
      "learning_rate": 2.2555802473364306e-06,
      "loss": 2.7266,
      "step": 221404
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.625831365585327,
      "learning_rate": 2.255079608380539e-06,
      "loss": 2.9655,
      "step": 221405
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.773488998413086,
      "learning_rate": 2.254579024781e-06,
      "loss": 3.1157,
      "step": 221406
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.306713104248047,
      "learning_rate": 2.2540784965380142e-06,
      "loss": 2.9626,
      "step": 221407
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0656416416168213,
      "learning_rate": 2.253578023651548e-06,
      "loss": 2.6892,
      "step": 221408
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0137155055999756,
      "learning_rate": 2.2530776061218338e-06,
      "loss": 2.9783,
      "step": 221409
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9968698024749756,
      "learning_rate": 2.2525772439488387e-06,
      "loss": 2.9821,
      "step": 221410
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3648979663848877,
      "learning_rate": 2.2520769371327297e-06,
      "loss": 3.0378,
      "step": 221411
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.988027811050415,
      "learning_rate": 2.2515766856735727e-06,
      "loss": 2.744,
      "step": 221412
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8827319145202637,
      "learning_rate": 2.2510764895715017e-06,
      "loss": 3.1298,
      "step": 221413
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.790969133377075,
      "learning_rate": 2.250576348826516e-06,
      "loss": 2.7831,
      "step": 221414
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.288524866104126,
      "learning_rate": 2.250076263438816e-06,
      "loss": 2.9408,
      "step": 221415
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.357311725616455,
      "learning_rate": 2.2495762334084343e-06,
      "loss": 3.1126,
      "step": 221416
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.156942129135132,
      "learning_rate": 2.249076258735505e-06,
      "loss": 3.144,
      "step": 221417
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1117055416107178,
      "learning_rate": 2.2485763394200273e-06,
      "loss": 2.9485,
      "step": 221418
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.090226411819458,
      "learning_rate": 2.2480764754622018e-06,
      "loss": 3.0943,
      "step": 221419
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6480414867401123,
      "learning_rate": 2.247576666862061e-06,
      "loss": 2.8146,
      "step": 221420
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.5878472328186035,
      "learning_rate": 2.2470769136197385e-06,
      "loss": 2.6822,
      "step": 221421
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.665677309036255,
      "learning_rate": 2.246577215735301e-06,
      "loss": 3.0609,
      "step": 221422
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1275413036346436,
      "learning_rate": 2.246077573208849e-06,
      "loss": 2.9597,
      "step": 221423
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0356717109680176,
      "learning_rate": 2.2455779860404476e-06,
      "loss": 3.2046,
      "step": 221424
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2744433879852295,
      "learning_rate": 2.2450784542302315e-06,
      "loss": 2.7342,
      "step": 221425
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.973146677017212,
      "learning_rate": 2.2445789777782665e-06,
      "loss": 2.9973,
      "step": 221426
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.646799325942993,
      "learning_rate": 2.2440795566846857e-06,
      "loss": 3.1094,
      "step": 221427
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.427785873413086,
      "learning_rate": 2.2435801909495234e-06,
      "loss": 2.7158,
      "step": 221428
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0101306438446045,
      "learning_rate": 2.2430808805729118e-06,
      "loss": 2.8239,
      "step": 221429
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.191570281982422,
      "learning_rate": 2.2425816255549176e-06,
      "loss": 3.0227,
      "step": 221430
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.082374095916748,
      "learning_rate": 2.2420824258956414e-06,
      "loss": 2.7712,
      "step": 221431
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.462308406829834,
      "learning_rate": 2.241583281595183e-06,
      "loss": 2.966,
      "step": 221432
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.336855888366699,
      "learning_rate": 2.241084192653675e-06,
      "loss": 3.0221,
      "step": 221433
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.206947088241577,
      "learning_rate": 2.2405851590711176e-06,
      "loss": 2.7647,
      "step": 221434
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.108898401260376,
      "learning_rate": 2.2400861808477112e-06,
      "loss": 3.0168,
      "step": 221435
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9815125465393066,
      "learning_rate": 2.239587257983455e-06,
      "loss": 2.9567,
      "step": 221436
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.16789174079895,
      "learning_rate": 2.239088390478483e-06,
      "loss": 3.1084,
      "step": 221437
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.027254819869995,
      "learning_rate": 2.2385895783328943e-06,
      "loss": 2.8319,
      "step": 221438
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8034753799438477,
      "learning_rate": 2.2380908215467562e-06,
      "loss": 2.7519,
      "step": 221439
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9193735122680664,
      "learning_rate": 2.2375921201202018e-06,
      "loss": 2.728,
      "step": 221440
    },
    {
      "epoch": 2.88,
      "grad_norm": 4.085467338562012,
      "learning_rate": 2.2370934740532974e-06,
      "loss": 2.9754,
      "step": 221441
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9608333110809326,
      "learning_rate": 2.23659488334611e-06,
      "loss": 2.8504,
      "step": 221442
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.175067186355591,
      "learning_rate": 2.2360963479987726e-06,
      "loss": 2.8122,
      "step": 221443
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.527911901473999,
      "learning_rate": 2.235597868011385e-06,
      "loss": 2.6682,
      "step": 221444
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1001510620117188,
      "learning_rate": 2.235099443384014e-06,
      "loss": 2.7328,
      "step": 221445
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8239080905914307,
      "learning_rate": 2.2346010741167597e-06,
      "loss": 2.7847,
      "step": 221446
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0584328174591064,
      "learning_rate": 2.2341027602096883e-06,
      "loss": 2.9537,
      "step": 221447
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8854291439056396,
      "learning_rate": 2.2336045016629335e-06,
      "loss": 2.9358,
      "step": 221448
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.237772226333618,
      "learning_rate": 2.2331062984765946e-06,
      "loss": 3.0428,
      "step": 221449
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.654573440551758,
      "learning_rate": 2.2326081506507053e-06,
      "loss": 2.6876,
      "step": 221450
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.750168800354004,
      "learning_rate": 2.232110058185399e-06,
      "loss": 3.1521,
      "step": 221451
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.731415033340454,
      "learning_rate": 2.2316120210808085e-06,
      "loss": 2.6774,
      "step": 221452
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8293330669403076,
      "learning_rate": 2.2311140393369344e-06,
      "loss": 2.9356,
      "step": 221453
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.809563636779785,
      "learning_rate": 2.2306161129539424e-06,
      "loss": 3.0204,
      "step": 221454
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.5286409854888916,
      "learning_rate": 2.2301182419318663e-06,
      "loss": 2.7555,
      "step": 221455
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.851198673248291,
      "learning_rate": 2.2296204262708727e-06,
      "loss": 2.8173,
      "step": 221456
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.867990732192993,
      "learning_rate": 2.229122665970995e-06,
      "loss": 2.9855,
      "step": 221457
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.321786403656006,
      "learning_rate": 2.2286249610323325e-06,
      "loss": 2.8354,
      "step": 221458
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.298565626144409,
      "learning_rate": 2.2281273114550193e-06,
      "loss": 2.8628,
      "step": 221459
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2622854709625244,
      "learning_rate": 2.227629717239088e-06,
      "loss": 2.859,
      "step": 221460
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1939570903778076,
      "learning_rate": 2.227132178384672e-06,
      "loss": 2.78,
      "step": 221461
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.223562240600586,
      "learning_rate": 2.2266346948918713e-06,
      "loss": 2.8612,
      "step": 221462
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0793228149414062,
      "learning_rate": 2.2261372667607525e-06,
      "loss": 2.7045,
      "step": 221463
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0609664916992188,
      "learning_rate": 2.2256398939913823e-06,
      "loss": 2.8445,
      "step": 221464
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.113752841949463,
      "learning_rate": 2.225142576583927e-06,
      "loss": 2.9757,
      "step": 221465
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3096303939819336,
      "learning_rate": 2.2246453145384204e-06,
      "loss": 2.7972,
      "step": 221466
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2205088138580322,
      "learning_rate": 2.2241481078549617e-06,
      "loss": 2.854,
      "step": 221467
    },
    {
      "epoch": 2.88,
      "grad_norm": 5.7207746505737305,
      "learning_rate": 2.2236509565336845e-06,
      "loss": 2.7968,
      "step": 221468
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.825878858566284,
      "learning_rate": 2.2231538605746226e-06,
      "loss": 3.1418,
      "step": 221469
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.784766435623169,
      "learning_rate": 2.2226568199779415e-06,
      "loss": 3.3105,
      "step": 221470
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2105467319488525,
      "learning_rate": 2.222159834743642e-06,
      "loss": 3.0111,
      "step": 221471
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.509242534637451,
      "learning_rate": 2.2216629048718903e-06,
      "loss": 2.9946,
      "step": 221472
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0221104621887207,
      "learning_rate": 2.221166030362753e-06,
      "loss": 2.9561,
      "step": 221473
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.162604808807373,
      "learning_rate": 2.220669211216297e-06,
      "loss": 2.9291,
      "step": 221474
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6374237537384033,
      "learning_rate": 2.2201724474326556e-06,
      "loss": 2.8138,
      "step": 221475
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.046701669692993,
      "learning_rate": 2.2196757390118948e-06,
      "loss": 3.0135,
      "step": 221476
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.79636287689209,
      "learning_rate": 2.219179085954148e-06,
      "loss": 2.8111,
      "step": 221477
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.842322826385498,
      "learning_rate": 2.2186824882594488e-06,
      "loss": 2.9933,
      "step": 221478
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.348021984100342,
      "learning_rate": 2.2181859459279305e-06,
      "loss": 3.2346,
      "step": 221479
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4106760025024414,
      "learning_rate": 2.2176894589596596e-06,
      "loss": 3.0166,
      "step": 221480
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4088335037231445,
      "learning_rate": 2.2171930273547356e-06,
      "loss": 3.0688,
      "step": 221481
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.93650484085083,
      "learning_rate": 2.216696651113292e-06,
      "loss": 3.0446,
      "step": 221482
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.693913698196411,
      "learning_rate": 2.2162003302353627e-06,
      "loss": 2.9519,
      "step": 221483
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7911477088928223,
      "learning_rate": 2.21570406472108e-06,
      "loss": 2.9822,
      "step": 221484
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.454221248626709,
      "learning_rate": 2.215207854570478e-06,
      "loss": 2.9529,
      "step": 221485
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.5218801498413086,
      "learning_rate": 2.2147116997837222e-06,
      "loss": 3.0555,
      "step": 221486
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9340076446533203,
      "learning_rate": 2.214215600360847e-06,
      "loss": 2.9241,
      "step": 221487
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.5101115703582764,
      "learning_rate": 2.2137195563020184e-06,
      "loss": 3.1765,
      "step": 221488
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.436492443084717,
      "learning_rate": 2.213223567607236e-06,
      "loss": 2.9925,
      "step": 221489
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0738699436187744,
      "learning_rate": 2.2127276342766677e-06,
      "loss": 3.0278,
      "step": 221490
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.938615083694458,
      "learning_rate": 2.2122317563103454e-06,
      "loss": 3.0176,
      "step": 221491
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.925791025161743,
      "learning_rate": 2.211735933708403e-06,
      "loss": 2.8506,
      "step": 221492
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4733505249023438,
      "learning_rate": 2.2112401664709066e-06,
      "loss": 2.9292,
      "step": 221493
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8073184490203857,
      "learning_rate": 2.2107444545979904e-06,
      "loss": 2.9318,
      "step": 221494
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.5109810829162598,
      "learning_rate": 2.21024879808972e-06,
      "loss": 2.6231,
      "step": 221495
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.4086074829101562,
      "learning_rate": 2.2097531969461625e-06,
      "loss": 2.8497,
      "step": 221496
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.5677151679992676,
      "learning_rate": 2.2092576511674177e-06,
      "loss": 2.9814,
      "step": 221497
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9180335998535156,
      "learning_rate": 2.2087621607536186e-06,
      "loss": 2.9031,
      "step": 221498
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9861490726470947,
      "learning_rate": 2.2082667257048326e-06,
      "loss": 2.9797,
      "step": 221499
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.891695737838745,
      "learning_rate": 2.2077713460211588e-06,
      "loss": 2.9315,
      "step": 221500
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9882140159606934,
      "learning_rate": 2.207276021702664e-06,
      "loss": 2.6524,
      "step": 221501
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.982512950897217,
      "learning_rate": 2.2067807527494817e-06,
      "loss": 2.8708,
      "step": 221502
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2114415168762207,
      "learning_rate": 2.2062855391616453e-06,
      "loss": 2.8306,
      "step": 221503
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3891677856445312,
      "learning_rate": 2.2057903809392875e-06,
      "loss": 2.9708,
      "step": 221504
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.5732476711273193,
      "learning_rate": 2.2052952780825085e-06,
      "loss": 2.9026,
      "step": 221505
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1146552562713623,
      "learning_rate": 2.204800230591375e-06,
      "loss": 2.7606,
      "step": 221506
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3789680004119873,
      "learning_rate": 2.2043052384660197e-06,
      "loss": 2.8778,
      "step": 221507
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3344504833221436,
      "learning_rate": 2.2038103017064767e-06,
      "loss": 2.8968,
      "step": 221508
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2336018085479736,
      "learning_rate": 2.203315420312879e-06,
      "loss": 2.9118,
      "step": 221509
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8999056816101074,
      "learning_rate": 2.202820594285293e-06,
      "loss": 2.7814,
      "step": 221510
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2725393772125244,
      "learning_rate": 2.2023258236238184e-06,
      "loss": 2.9292,
      "step": 221511
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.765716791152954,
      "learning_rate": 2.201831108328589e-06,
      "loss": 2.6809,
      "step": 221512
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2581334114074707,
      "learning_rate": 2.2013364483996374e-06,
      "loss": 2.9365,
      "step": 221513
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9303171634674072,
      "learning_rate": 2.2008418438370977e-06,
      "loss": 2.6854,
      "step": 221514
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.26285719871521,
      "learning_rate": 2.200347294641003e-06,
      "loss": 2.6237,
      "step": 221515
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0715761184692383,
      "learning_rate": 2.199852800811519e-06,
      "loss": 2.7841,
      "step": 221516
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1895556449890137,
      "learning_rate": 2.19935836234868e-06,
      "loss": 3.0668,
      "step": 221517
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.105811834335327,
      "learning_rate": 2.1988639792526184e-06,
      "loss": 2.8213,
      "step": 221518
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.066511392593384,
      "learning_rate": 2.1983696515234017e-06,
      "loss": 2.7801,
      "step": 221519
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7096126079559326,
      "learning_rate": 2.1978753791611293e-06,
      "loss": 2.9654,
      "step": 221520
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6109812259674072,
      "learning_rate": 2.197381162165901e-06,
      "loss": 2.9123,
      "step": 221521
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2845613956451416,
      "learning_rate": 2.1968870005377837e-06,
      "loss": 2.8747,
      "step": 221522
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7285871505737305,
      "learning_rate": 2.1963928942769104e-06,
      "loss": 2.9812,
      "step": 221523
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.872246742248535,
      "learning_rate": 2.195898843383348e-06,
      "loss": 2.8811,
      "step": 221524
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.470618963241577,
      "learning_rate": 2.1954048478571627e-06,
      "loss": 2.6934,
      "step": 221525
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2932748794555664,
      "learning_rate": 2.194910907698488e-06,
      "loss": 2.9238,
      "step": 221526
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.4313859939575195,
      "learning_rate": 2.194417022907391e-06,
      "loss": 3.1652,
      "step": 221527
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2691168785095215,
      "learning_rate": 2.193923193484004e-06,
      "loss": 2.7878,
      "step": 221528
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.196272850036621,
      "learning_rate": 2.193429419428361e-06,
      "loss": 3.187,
      "step": 221529
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6043167114257812,
      "learning_rate": 2.1929357007405945e-06,
      "loss": 3.0144,
      "step": 221530
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.175919771194458,
      "learning_rate": 2.192442037420772e-06,
      "loss": 2.9656,
      "step": 221531
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.408712863922119,
      "learning_rate": 2.1919484294689928e-06,
      "loss": 2.7689,
      "step": 221532
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9084553718566895,
      "learning_rate": 2.1914548768853903e-06,
      "loss": 2.8439,
      "step": 221533
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.131122350692749,
      "learning_rate": 2.190961379669964e-06,
      "loss": 3.0925,
      "step": 221534
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.943962574005127,
      "learning_rate": 2.1904679378228816e-06,
      "loss": 2.727,
      "step": 221535
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8501996994018555,
      "learning_rate": 2.1899745513442426e-06,
      "loss": 2.7793,
      "step": 221536
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3673713207244873,
      "learning_rate": 2.1894812202340462e-06,
      "loss": 2.981,
      "step": 221537
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.7230823040008545,
      "learning_rate": 2.188987944492493e-06,
      "loss": 2.7944,
      "step": 221538
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.532719373703003,
      "learning_rate": 2.188494724119616e-06,
      "loss": 2.8635,
      "step": 221539
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.762751340866089,
      "learning_rate": 2.1880015591155487e-06,
      "loss": 2.7843,
      "step": 221540
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2975730895996094,
      "learning_rate": 2.187508449480291e-06,
      "loss": 3.0589,
      "step": 221541
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.846803903579712,
      "learning_rate": 2.1870153952140423e-06,
      "loss": 3.0902,
      "step": 221542
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1688027381896973,
      "learning_rate": 2.1865223963168366e-06,
      "loss": 2.8641,
      "step": 221543
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.5597422122955322,
      "learning_rate": 2.186029452788773e-06,
      "loss": 3.098,
      "step": 221544
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.6975016593933105,
      "learning_rate": 2.185536564629986e-06,
      "loss": 2.9089,
      "step": 221545
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.778017520904541,
      "learning_rate": 2.1850437318404745e-06,
      "loss": 2.8891,
      "step": 221546
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2383222579956055,
      "learning_rate": 2.1845509544204053e-06,
      "loss": 2.8071,
      "step": 221547
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.821540594100952,
      "learning_rate": 2.1840582323698786e-06,
      "loss": 3.0864,
      "step": 221548
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.9508748054504395,
      "learning_rate": 2.183565565688927e-06,
      "loss": 3.0964,
      "step": 221549
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8392395973205566,
      "learning_rate": 2.1830729543776846e-06,
      "loss": 3.0122,
      "step": 221550
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0535576343536377,
      "learning_rate": 2.1825803984362176e-06,
      "loss": 2.6981,
      "step": 221551
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0410642623901367,
      "learning_rate": 2.182087897864626e-06,
      "loss": 3.0126,
      "step": 221552
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.8911514282226562,
      "learning_rate": 2.1815954526630096e-06,
      "loss": 2.7507,
      "step": 221553
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.0678467750549316,
      "learning_rate": 2.1811030628314684e-06,
      "loss": 3.0982,
      "step": 221554
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1073174476623535,
      "learning_rate": 2.180610728370069e-06,
      "loss": 3.0461,
      "step": 221555
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.945502281188965,
      "learning_rate": 2.180118449278945e-06,
      "loss": 3.0366,
      "step": 221556
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.2255985736846924,
      "learning_rate": 2.1796262255581287e-06,
      "loss": 3.0313,
      "step": 221557
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3120172023773193,
      "learning_rate": 2.179134057207754e-06,
      "loss": 2.9193,
      "step": 221558
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.999997854232788,
      "learning_rate": 2.178641944227888e-06,
      "loss": 3.0068,
      "step": 221559
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.994199514389038,
      "learning_rate": 2.17814988661863e-06,
      "loss": 2.9117,
      "step": 221560
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3634159564971924,
      "learning_rate": 2.177657884380113e-06,
      "loss": 2.8477,
      "step": 221561
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.932922124862671,
      "learning_rate": 2.1771659375123707e-06,
      "loss": 3.1676,
      "step": 221562
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.035309314727783,
      "learning_rate": 2.176674046015503e-06,
      "loss": 2.938,
      "step": 221563
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.275141477584839,
      "learning_rate": 2.1761822098896097e-06,
      "loss": 2.9375,
      "step": 221564
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.3434531688690186,
      "learning_rate": 2.1756904291348244e-06,
      "loss": 2.8971,
      "step": 221565
    },
    {
      "epoch": 2.88,
      "grad_norm": 2.7940313816070557,
      "learning_rate": 2.1751987037511463e-06,
      "loss": 2.7956,
      "step": 221566
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1613526344299316,
      "learning_rate": 2.1747070337387763e-06,
      "loss": 2.8134,
      "step": 221567
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.1812548637390137,
      "learning_rate": 2.1742154190977133e-06,
      "loss": 3.0224,
      "step": 221568
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.304346799850464,
      "learning_rate": 2.1737238598280915e-06,
      "loss": 2.864,
      "step": 221569
    },
    {
      "epoch": 2.88,
      "grad_norm": 3.136828899383545,
      "learning_rate": 2.1732323559300104e-06,
      "loss": 2.8283,
      "step": 221570
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1249146461486816,
      "learning_rate": 2.172740907403536e-06,
      "loss": 3.0869,
      "step": 221571
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7655107975006104,
      "learning_rate": 2.1722495142487363e-06,
      "loss": 3.0316,
      "step": 221572
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.043308973312378,
      "learning_rate": 2.1717581764658097e-06,
      "loss": 3.0054,
      "step": 221573
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.722606658935547,
      "learning_rate": 2.171266894054724e-06,
      "loss": 2.6986,
      "step": 221574
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.5930933952331543,
      "learning_rate": 2.170775667015645e-06,
      "loss": 2.914,
      "step": 221575
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.107315540313721,
      "learning_rate": 2.1702844953486065e-06,
      "loss": 2.8776,
      "step": 221576
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9772496223449707,
      "learning_rate": 2.1697933790537745e-06,
      "loss": 2.8747,
      "step": 221577
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4081099033355713,
      "learning_rate": 2.1693023181311832e-06,
      "loss": 2.7546,
      "step": 221578
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9176931381225586,
      "learning_rate": 2.1688113125809315e-06,
      "loss": 3.0085,
      "step": 221579
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8140416145324707,
      "learning_rate": 2.168320362403153e-06,
      "loss": 2.8364,
      "step": 221580
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2460973262786865,
      "learning_rate": 2.1678294675978813e-06,
      "loss": 2.9516,
      "step": 221581
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.159432888031006,
      "learning_rate": 2.1673386281652162e-06,
      "loss": 2.8992,
      "step": 221582
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.7936220169067383,
      "learning_rate": 2.1668478441052905e-06,
      "loss": 2.8576,
      "step": 221583
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9300425052642822,
      "learning_rate": 2.166357115418138e-06,
      "loss": 2.9478,
      "step": 221584
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2086386680603027,
      "learning_rate": 2.165866442103925e-06,
      "loss": 3.0011,
      "step": 221585
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.075066566467285,
      "learning_rate": 2.165375824162685e-06,
      "loss": 2.9685,
      "step": 221586
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8917622566223145,
      "learning_rate": 2.1648852615945176e-06,
      "loss": 2.6964,
      "step": 221587
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7584800720214844,
      "learning_rate": 2.1643947543995232e-06,
      "loss": 3.0262,
      "step": 221588
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8100922107696533,
      "learning_rate": 2.1639043025778013e-06,
      "loss": 2.845,
      "step": 221589
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9173803329467773,
      "learning_rate": 2.1634139061293853e-06,
      "loss": 3.1559,
      "step": 221590
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.236182928085327,
      "learning_rate": 2.162923565054475e-06,
      "loss": 2.8067,
      "step": 221591
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2192118167877197,
      "learning_rate": 2.162433279353071e-06,
      "loss": 3.0365,
      "step": 221592
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.776449680328369,
      "learning_rate": 2.161943049025272e-06,
      "loss": 2.8327,
      "step": 221593
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1319141387939453,
      "learning_rate": 2.161452874071212e-06,
      "loss": 2.8387,
      "step": 221594
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0617315769195557,
      "learning_rate": 2.160962754490958e-06,
      "loss": 2.9772,
      "step": 221595
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.893021821975708,
      "learning_rate": 2.160472690284609e-06,
      "loss": 3.0492,
      "step": 221596
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.237501621246338,
      "learning_rate": 2.1599826814522324e-06,
      "loss": 3.0779,
      "step": 221597
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.202763319015503,
      "learning_rate": 2.159492727993961e-06,
      "loss": 3.0307,
      "step": 221598
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.3042571544647217,
      "learning_rate": 2.1590028299098616e-06,
      "loss": 2.9762,
      "step": 221599
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9351820945739746,
      "learning_rate": 2.1585129872000005e-06,
      "loss": 2.8165,
      "step": 221600
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6104767322540283,
      "learning_rate": 2.1580231998645114e-06,
      "loss": 2.6438,
      "step": 221601
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.58137845993042,
      "learning_rate": 2.1575334679034606e-06,
      "loss": 2.8689,
      "step": 221602
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.860595703125,
      "learning_rate": 2.157043791316948e-06,
      "loss": 2.7987,
      "step": 221603
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6509339809417725,
      "learning_rate": 2.1565541701050404e-06,
      "loss": 2.8048,
      "step": 221604
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.318476676940918,
      "learning_rate": 2.156064604267904e-06,
      "loss": 3.0103,
      "step": 221605
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.054826736450195,
      "learning_rate": 2.1555750938055393e-06,
      "loss": 2.8887,
      "step": 221606
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.7003087997436523,
      "learning_rate": 2.1550856387181125e-06,
      "loss": 2.8276,
      "step": 221607
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.055368423461914,
      "learning_rate": 2.1545962390056236e-06,
      "loss": 3.0052,
      "step": 221608
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.029717206954956,
      "learning_rate": 2.154106894668239e-06,
      "loss": 2.9905,
      "step": 221609
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.845503807067871,
      "learning_rate": 2.1536176057060263e-06,
      "loss": 2.77,
      "step": 221610
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4049243927001953,
      "learning_rate": 2.153128372119117e-06,
      "loss": 3.1292,
      "step": 221611
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.214320421218872,
      "learning_rate": 2.152639193907513e-06,
      "loss": 3.0748,
      "step": 221612
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.958263874053955,
      "learning_rate": 2.152150071071379e-06,
      "loss": 2.9459,
      "step": 221613
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.5943286418914795,
      "learning_rate": 2.151661003610783e-06,
      "loss": 2.7702,
      "step": 221614
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.441451072692871,
      "learning_rate": 2.151171991525824e-06,
      "loss": 3.0948,
      "step": 221615
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7745420932769775,
      "learning_rate": 2.1506830348165695e-06,
      "loss": 2.7264,
      "step": 221616
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8751461505889893,
      "learning_rate": 2.1501941334831183e-06,
      "loss": 2.8656,
      "step": 221617
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.3503706455230713,
      "learning_rate": 2.1497052875255714e-06,
      "loss": 2.9506,
      "step": 221618
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.5286307334899902,
      "learning_rate": 2.1492164969440286e-06,
      "loss": 2.9086,
      "step": 221619
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.976125955581665,
      "learning_rate": 2.1487277617385556e-06,
      "loss": 3.1528,
      "step": 221620
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.885883092880249,
      "learning_rate": 2.1482390819092867e-06,
      "loss": 3.0139,
      "step": 221621
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.867048501968384,
      "learning_rate": 2.1477504574562545e-06,
      "loss": 2.8817,
      "step": 221622
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.476867198944092,
      "learning_rate": 2.147261888379559e-06,
      "loss": 3.1139,
      "step": 221623
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2223875522613525,
      "learning_rate": 2.146773374679367e-06,
      "loss": 3.2093,
      "step": 221624
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.3051323890686035,
      "learning_rate": 2.1462849163556785e-06,
      "loss": 3.0049,
      "step": 221625
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7907772064208984,
      "learning_rate": 2.1457965134085932e-06,
      "loss": 2.8711,
      "step": 221626
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.802586317062378,
      "learning_rate": 2.1453081658382775e-06,
      "loss": 2.7053,
      "step": 221627
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.953472137451172,
      "learning_rate": 2.144819873644732e-06,
      "loss": 2.9743,
      "step": 221628
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.866934299468994,
      "learning_rate": 2.1443316368280894e-06,
      "loss": 3.0084,
      "step": 221629
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.82883620262146,
      "learning_rate": 2.143843455388483e-06,
      "loss": 3.1295,
      "step": 221630
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8294503688812256,
      "learning_rate": 2.143355329325913e-06,
      "loss": 3.0106,
      "step": 221631
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9543166160583496,
      "learning_rate": 2.1428672586405126e-06,
      "loss": 3.2347,
      "step": 221632
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0247201919555664,
      "learning_rate": 2.142379243332415e-06,
      "loss": 3.0338,
      "step": 221633
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.3545939922332764,
      "learning_rate": 2.14189128340162e-06,
      "loss": 2.913,
      "step": 221634
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.276716947555542,
      "learning_rate": 2.1414033788483278e-06,
      "loss": 2.9354,
      "step": 221635
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.083397626876831,
      "learning_rate": 2.140915529672538e-06,
      "loss": 3.0243,
      "step": 221636
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9125638008117676,
      "learning_rate": 2.1404277358743504e-06,
      "loss": 3.1928,
      "step": 221637
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2273380756378174,
      "learning_rate": 2.1399399974539323e-06,
      "loss": 2.8664,
      "step": 221638
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.108042001724243,
      "learning_rate": 2.139452314411283e-06,
      "loss": 2.9952,
      "step": 221639
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.09527850151062,
      "learning_rate": 2.138964686746536e-06,
      "loss": 2.704,
      "step": 221640
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0040557384490967,
      "learning_rate": 2.1384771144597912e-06,
      "loss": 2.9286,
      "step": 221641
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.629176139831543,
      "learning_rate": 2.1379895975511486e-06,
      "loss": 3.0897,
      "step": 221642
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6299026012420654,
      "learning_rate": 2.1375021360206413e-06,
      "loss": 2.9403,
      "step": 221643
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.213644504547119,
      "learning_rate": 2.1370147298684027e-06,
      "loss": 2.7488,
      "step": 221644
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.13710355758667,
      "learning_rate": 2.1365273790944993e-06,
      "loss": 2.8128,
      "step": 221645
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8540446758270264,
      "learning_rate": 2.136040083699064e-06,
      "loss": 3.0108,
      "step": 221646
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8615612983703613,
      "learning_rate": 2.1355528436821644e-06,
      "loss": 2.9404,
      "step": 221647
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8169591426849365,
      "learning_rate": 2.1350656590438663e-06,
      "loss": 2.8603,
      "step": 221648
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.723966121673584,
      "learning_rate": 2.134578529784303e-06,
      "loss": 3.0564,
      "step": 221649
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5089428424835205,
      "learning_rate": 2.1340914559035415e-06,
      "loss": 2.9398,
      "step": 221650
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.371152877807617,
      "learning_rate": 2.1336044374016483e-06,
      "loss": 3.0534,
      "step": 221651
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2944839000701904,
      "learning_rate": 2.133117474278756e-06,
      "loss": 2.866,
      "step": 221652
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.065978765487671,
      "learning_rate": 2.132630566534965e-06,
      "loss": 3.0121,
      "step": 221653
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.6402969360351562,
      "learning_rate": 2.132143714170309e-06,
      "loss": 3.0268,
      "step": 221654
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7964494228363037,
      "learning_rate": 2.1316569171849206e-06,
      "loss": 3.0483,
      "step": 221655
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.086292028427124,
      "learning_rate": 2.1311701755789e-06,
      "loss": 2.7999,
      "step": 221656
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.801166534423828,
      "learning_rate": 2.1306834893522805e-06,
      "loss": 2.9491,
      "step": 221657
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5874791145324707,
      "learning_rate": 2.1301968585051952e-06,
      "loss": 2.9344,
      "step": 221658
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.670225143432617,
      "learning_rate": 2.1297102830377445e-06,
      "loss": 2.8104,
      "step": 221659
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.030763626098633,
      "learning_rate": 2.1292237629500274e-06,
      "loss": 3.1396,
      "step": 221660
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.931142568588257,
      "learning_rate": 2.1287372982420782e-06,
      "loss": 3.0034,
      "step": 221661
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9176104068756104,
      "learning_rate": 2.128250888914029e-06,
      "loss": 2.9706,
      "step": 221662
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8959801197052,
      "learning_rate": 2.127764534965948e-06,
      "loss": 2.6523,
      "step": 221663
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8369407653808594,
      "learning_rate": 2.1272782363979333e-06,
      "loss": 2.7567,
      "step": 221664
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.22959041595459,
      "learning_rate": 2.1267919932100864e-06,
      "loss": 2.7797,
      "step": 221665
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0633745193481445,
      "learning_rate": 2.1263058054025063e-06,
      "loss": 2.9021,
      "step": 221666
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6783294677734375,
      "learning_rate": 2.1258196729752596e-06,
      "loss": 2.8322,
      "step": 221667
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.6425161361694336,
      "learning_rate": 2.1253335959284466e-06,
      "loss": 2.8271,
      "step": 221668
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.608924627304077,
      "learning_rate": 2.1248475742621672e-06,
      "loss": 2.9271,
      "step": 221669
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8244919776916504,
      "learning_rate": 2.124361607976488e-06,
      "loss": 2.9762,
      "step": 221670
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1089816093444824,
      "learning_rate": 2.1238756970715086e-06,
      "loss": 2.9128,
      "step": 221671
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9634666442871094,
      "learning_rate": 2.1233898415473626e-06,
      "loss": 2.9703,
      "step": 221672
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.880841016769409,
      "learning_rate": 2.1229040414040497e-06,
      "loss": 2.8846,
      "step": 221673
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.632293224334717,
      "learning_rate": 2.122418296641737e-06,
      "loss": 2.9571,
      "step": 221674
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8340020179748535,
      "learning_rate": 2.121932607260457e-06,
      "loss": 3.2372,
      "step": 221675
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9296510219573975,
      "learning_rate": 2.121446973260377e-06,
      "loss": 2.8744,
      "step": 221676
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2487242221832275,
      "learning_rate": 2.1209613946415292e-06,
      "loss": 3.1584,
      "step": 221677
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8709359169006348,
      "learning_rate": 2.120475871404015e-06,
      "loss": 2.9563,
      "step": 221678
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6102936267852783,
      "learning_rate": 2.119990403547933e-06,
      "loss": 3.0923,
      "step": 221679
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.3661279678344727,
      "learning_rate": 2.1195049910733507e-06,
      "loss": 2.7561,
      "step": 221680
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.057762861251831,
      "learning_rate": 2.119019633980401e-06,
      "loss": 2.8864,
      "step": 221681
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.598519802093506,
      "learning_rate": 2.1185343322691173e-06,
      "loss": 2.5187,
      "step": 221682
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.7264292240142822,
      "learning_rate": 2.1180490859396327e-06,
      "loss": 2.9501,
      "step": 221683
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.514207601547241,
      "learning_rate": 2.117563894992047e-06,
      "loss": 3.0545,
      "step": 221684
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.737438678741455,
      "learning_rate": 2.117078759426394e-06,
      "loss": 2.8798,
      "step": 221685
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.183042049407959,
      "learning_rate": 2.1165936792428395e-06,
      "loss": 2.8093,
      "step": 221686
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9124393463134766,
      "learning_rate": 2.116108654441384e-06,
      "loss": 2.9356,
      "step": 221687
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.670667886734009,
      "learning_rate": 2.1156236850222274e-06,
      "loss": 3.1094,
      "step": 221688
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5247702598571777,
      "learning_rate": 2.115138770985336e-06,
      "loss": 2.7897,
      "step": 221689
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0183401107788086,
      "learning_rate": 2.1146539123309103e-06,
      "loss": 3.1011,
      "step": 221690
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7801103591918945,
      "learning_rate": 2.114169109058983e-06,
      "loss": 3.143,
      "step": 221691
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.136495590209961,
      "learning_rate": 2.113684361169654e-06,
      "loss": 2.9787,
      "step": 221692
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.610868215560913,
      "learning_rate": 2.1131996686629905e-06,
      "loss": 3.0879,
      "step": 221693
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.3792307376861572,
      "learning_rate": 2.112715031539125e-06,
      "loss": 3.2368,
      "step": 221694
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.94185209274292,
      "learning_rate": 2.1122304497981245e-06,
      "loss": 2.857,
      "step": 221695
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8963279724121094,
      "learning_rate": 2.1117459234401224e-06,
      "loss": 2.9264,
      "step": 221696
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6300206184387207,
      "learning_rate": 2.1112614524651184e-06,
      "loss": 2.9497,
      "step": 221697
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7619361877441406,
      "learning_rate": 2.110777036873279e-06,
      "loss": 2.8345,
      "step": 221698
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.744667053222656,
      "learning_rate": 2.1102926766646376e-06,
      "loss": 2.5009,
      "step": 221699
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.223193407058716,
      "learning_rate": 2.109808371839361e-06,
      "loss": 2.9092,
      "step": 221700
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9344279766082764,
      "learning_rate": 2.109324122397449e-06,
      "loss": 3.0305,
      "step": 221701
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.7376394271850586,
      "learning_rate": 2.1088399283390677e-06,
      "loss": 2.9071,
      "step": 221702
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.013287544250488,
      "learning_rate": 2.1083557896642513e-06,
      "loss": 2.8506,
      "step": 221703
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1561331748962402,
      "learning_rate": 2.1078717063731653e-06,
      "loss": 2.9712,
      "step": 221704
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.190218210220337,
      "learning_rate": 2.1073876784657774e-06,
      "loss": 2.8806,
      "step": 221705
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.809300184249878,
      "learning_rate": 2.10690370594232e-06,
      "loss": 2.9464,
      "step": 221706
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8436191082000732,
      "learning_rate": 2.10641978880276e-06,
      "loss": 2.8641,
      "step": 221707
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8176074028015137,
      "learning_rate": 2.105935927047264e-06,
      "loss": 2.884,
      "step": 221708
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6128618717193604,
      "learning_rate": 2.105452120675899e-06,
      "loss": 2.7321,
      "step": 221709
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.3628129959106445,
      "learning_rate": 2.104968369688764e-06,
      "loss": 2.8853,
      "step": 221710
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.312326192855835,
      "learning_rate": 2.1044846740858934e-06,
      "loss": 2.7892,
      "step": 221711
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.6540188789367676,
      "learning_rate": 2.1040010338674527e-06,
      "loss": 3.0843,
      "step": 221712
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.110504627227783,
      "learning_rate": 2.1035174490335094e-06,
      "loss": 2.9558,
      "step": 221713
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6176843643188477,
      "learning_rate": 2.1030339195841295e-06,
      "loss": 3.1208,
      "step": 221714
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5524771213531494,
      "learning_rate": 2.102550445519413e-06,
      "loss": 2.8495,
      "step": 221715
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6502487659454346,
      "learning_rate": 2.1020670268394933e-06,
      "loss": 3.1082,
      "step": 221716
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.159560441970825,
      "learning_rate": 2.1015836635443708e-06,
      "loss": 3.0418,
      "step": 221717
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4197325706481934,
      "learning_rate": 2.101100355634244e-06,
      "loss": 2.8853,
      "step": 221718
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4299750328063965,
      "learning_rate": 2.100617103109081e-06,
      "loss": 2.8824,
      "step": 221719
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.716212034225464,
      "learning_rate": 2.1001339059690816e-06,
      "loss": 2.8702,
      "step": 221720
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5216948986053467,
      "learning_rate": 2.099650764214278e-06,
      "loss": 2.9363,
      "step": 221721
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4587833881378174,
      "learning_rate": 2.0991676778447706e-06,
      "loss": 2.6775,
      "step": 221722
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.930050849914551,
      "learning_rate": 2.09868464686066e-06,
      "loss": 2.7645,
      "step": 221723
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0983386039733887,
      "learning_rate": 2.0982016712620452e-06,
      "loss": 2.9169,
      "step": 221724
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.972165584564209,
      "learning_rate": 2.09771875104896e-06,
      "loss": 3.1644,
      "step": 221725
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9770917892456055,
      "learning_rate": 2.0972358862215376e-06,
      "loss": 3.1114,
      "step": 221726
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.08196759223938,
      "learning_rate": 2.096753076779878e-06,
      "loss": 2.8691,
      "step": 221727
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8970279693603516,
      "learning_rate": 2.0962703227240474e-06,
      "loss": 2.9046,
      "step": 221728
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.7003612518310547,
      "learning_rate": 2.095787624054146e-06,
      "loss": 3.129,
      "step": 221729
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.837531328201294,
      "learning_rate": 2.0953049807702405e-06,
      "loss": 2.7135,
      "step": 221730
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.903447151184082,
      "learning_rate": 2.094822392872497e-06,
      "loss": 2.9513,
      "step": 221731
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9329240322113037,
      "learning_rate": 2.0943398603609163e-06,
      "loss": 2.8885,
      "step": 221732
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6480026245117188,
      "learning_rate": 2.0938573832355976e-06,
      "loss": 3.0576,
      "step": 221733
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7304463386535645,
      "learning_rate": 2.0933749614966746e-06,
      "loss": 2.935,
      "step": 221734
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8162360191345215,
      "learning_rate": 2.092892595144213e-06,
      "loss": 3.0828,
      "step": 221735
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9799232482910156,
      "learning_rate": 2.092410284178314e-06,
      "loss": 3.0012,
      "step": 221736
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.3038110733032227,
      "learning_rate": 2.091928028599077e-06,
      "loss": 3.0196,
      "step": 221737
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.580143451690674,
      "learning_rate": 2.091445828406535e-06,
      "loss": 2.681,
      "step": 221738
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.937575101852417,
      "learning_rate": 2.0909636836008547e-06,
      "loss": 2.7837,
      "step": 221739
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9155685901641846,
      "learning_rate": 2.0904815941820364e-06,
      "loss": 2.808,
      "step": 221740
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.611260414123535,
      "learning_rate": 2.0899995601502793e-06,
      "loss": 2.8696,
      "step": 221741
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.684976816177368,
      "learning_rate": 2.0895175815055844e-06,
      "loss": 2.8777,
      "step": 221742
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.5588154792785645,
      "learning_rate": 2.0890356582480504e-06,
      "loss": 2.8025,
      "step": 221743
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.845966100692749,
      "learning_rate": 2.0885537903778115e-06,
      "loss": 2.9165,
      "step": 221744
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.3214614391326904,
      "learning_rate": 2.0880719778949672e-06,
      "loss": 2.8433,
      "step": 221745
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.2440967559814453,
      "learning_rate": 2.0875902207995176e-06,
      "loss": 3.1232,
      "step": 221746
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.592545509338379,
      "learning_rate": 2.0871085190916625e-06,
      "loss": 2.8998,
      "step": 221747
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.272783041000366,
      "learning_rate": 2.0866268727713685e-06,
      "loss": 3.0021,
      "step": 221748
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.804431915283203,
      "learning_rate": 2.086145281838869e-06,
      "loss": 2.6823,
      "step": 221749
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6127002239227295,
      "learning_rate": 2.0856637462941307e-06,
      "loss": 2.8838,
      "step": 221750
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7667365074157715,
      "learning_rate": 2.0851822661373195e-06,
      "loss": 2.5957,
      "step": 221751
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9354705810546875,
      "learning_rate": 2.0847008413684696e-06,
      "loss": 2.77,
      "step": 221752
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8618581295013428,
      "learning_rate": 2.0842194719877136e-06,
      "loss": 2.8627,
      "step": 221753
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8278677463531494,
      "learning_rate": 2.0837381579950853e-06,
      "loss": 2.8366,
      "step": 221754
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1419830322265625,
      "learning_rate": 2.083256899390784e-06,
      "loss": 2.8017,
      "step": 221755
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.051368236541748,
      "learning_rate": 2.0827756961747765e-06,
      "loss": 3.1391,
      "step": 221756
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2062206268310547,
      "learning_rate": 2.08229454834723e-06,
      "loss": 3.0139,
      "step": 221757
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9451162815093994,
      "learning_rate": 2.0818134559081768e-06,
      "loss": 3.1558,
      "step": 221758
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.7091009616851807,
      "learning_rate": 2.081332418857784e-06,
      "loss": 2.8574,
      "step": 221759
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8977856636047363,
      "learning_rate": 2.0808514371960515e-06,
      "loss": 3.0841,
      "step": 221760
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.502324342727661,
      "learning_rate": 2.080370510923146e-06,
      "loss": 3.0241,
      "step": 221761
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7932727336883545,
      "learning_rate": 2.0798896400391006e-06,
      "loss": 2.9984,
      "step": 221762
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.017531156539917,
      "learning_rate": 2.0794088245440486e-06,
      "loss": 2.7909,
      "step": 221763
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9378883838653564,
      "learning_rate": 2.078928064438057e-06,
      "loss": 2.868,
      "step": 221764
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5196921825408936,
      "learning_rate": 2.0784473597211916e-06,
      "loss": 2.8003,
      "step": 221765
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.094175100326538,
      "learning_rate": 2.077966710393586e-06,
      "loss": 2.901,
      "step": 221766
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6216354370117188,
      "learning_rate": 2.077486116455307e-06,
      "loss": 2.9992,
      "step": 221767
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.259586811065674,
      "learning_rate": 2.077005577906421e-06,
      "loss": 2.8366,
      "step": 221768
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8876073360443115,
      "learning_rate": 2.0765250947470946e-06,
      "loss": 2.6485,
      "step": 221769
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1567087173461914,
      "learning_rate": 2.076044666977328e-06,
      "loss": 2.9883,
      "step": 221770
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.6810364723205566,
      "learning_rate": 2.075564294597254e-06,
      "loss": 2.7863,
      "step": 221771
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1996915340423584,
      "learning_rate": 2.075083977606973e-06,
      "loss": 2.5639,
      "step": 221772
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.513892650604248,
      "learning_rate": 2.074603716006551e-06,
      "loss": 2.7948,
      "step": 221773
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.69124174118042,
      "learning_rate": 2.074123509796055e-06,
      "loss": 2.839,
      "step": 221774
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7229363918304443,
      "learning_rate": 2.073643358975652e-06,
      "loss": 2.8842,
      "step": 221775
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5109434127807617,
      "learning_rate": 2.073163263545341e-06,
      "loss": 3.0503,
      "step": 221776
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.917372465133667,
      "learning_rate": 2.0726832235052893e-06,
      "loss": 2.7896,
      "step": 221777
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1203765869140625,
      "learning_rate": 2.0722032388554964e-06,
      "loss": 2.7813,
      "step": 221778
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.949978828430176,
      "learning_rate": 2.0717233095961627e-06,
      "loss": 3.0062,
      "step": 221779
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.866941452026367,
      "learning_rate": 2.0712434357272878e-06,
      "loss": 2.916,
      "step": 221780
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.013269901275635,
      "learning_rate": 2.070763617249005e-06,
      "loss": 3.1485,
      "step": 221781
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.806642532348633,
      "learning_rate": 2.070283854161381e-06,
      "loss": 2.9307,
      "step": 221782
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.085541248321533,
      "learning_rate": 2.0698041464645155e-06,
      "loss": 3.0737,
      "step": 221783
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9241409301757812,
      "learning_rate": 2.0693244941585086e-06,
      "loss": 2.6454,
      "step": 221784
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4588632583618164,
      "learning_rate": 2.068844897243427e-06,
      "loss": 2.9758,
      "step": 221785
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.937343120574951,
      "learning_rate": 2.0683653557193704e-06,
      "loss": 2.9525,
      "step": 221786
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0447378158569336,
      "learning_rate": 2.0678858695864386e-06,
      "loss": 3.0206,
      "step": 221787
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.019848108291626,
      "learning_rate": 2.0674064388446987e-06,
      "loss": 2.9959,
      "step": 221788
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8119144439697266,
      "learning_rate": 2.0669270634942836e-06,
      "loss": 2.9462,
      "step": 221789
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.6206042766571045,
      "learning_rate": 2.0664477435351934e-06,
      "loss": 2.9789,
      "step": 221790
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9721107482910156,
      "learning_rate": 2.065968478967628e-06,
      "loss": 2.9741,
      "step": 221791
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5795235633850098,
      "learning_rate": 2.065489269791587e-06,
      "loss": 3.1877,
      "step": 221792
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2391140460968018,
      "learning_rate": 2.065010116007204e-06,
      "loss": 2.9828,
      "step": 221793
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.88559889793396,
      "learning_rate": 2.0645310176145792e-06,
      "loss": 2.9835,
      "step": 221794
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6517229080200195,
      "learning_rate": 2.0640519746137786e-06,
      "loss": 3.1094,
      "step": 221795
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.802727699279785,
      "learning_rate": 2.063572987004869e-06,
      "loss": 2.9176,
      "step": 221796
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2508668899536133,
      "learning_rate": 2.0630940547879838e-06,
      "loss": 3.1876,
      "step": 221797
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.183298110961914,
      "learning_rate": 2.0626151779631894e-06,
      "loss": 2.8176,
      "step": 221798
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1353518962860107,
      "learning_rate": 2.062136356530586e-06,
      "loss": 3.1514,
      "step": 221799
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.466360092163086,
      "learning_rate": 2.06165759049024e-06,
      "loss": 3.1426,
      "step": 221800
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.872601270675659,
      "learning_rate": 2.0611788798422845e-06,
      "loss": 3.0548,
      "step": 221801
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7796034812927246,
      "learning_rate": 2.0607002245867533e-06,
      "loss": 3.0284,
      "step": 221802
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.981658935546875,
      "learning_rate": 2.060221624723779e-06,
      "loss": 2.9736,
      "step": 221803
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.281198501586914,
      "learning_rate": 2.0597430802533954e-06,
      "loss": 2.8425,
      "step": 221804
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7016544342041016,
      "learning_rate": 2.059264591175769e-06,
      "loss": 2.9939,
      "step": 221805
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1318764686584473,
      "learning_rate": 2.0587861574909324e-06,
      "loss": 3.0387,
      "step": 221806
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1345503330230713,
      "learning_rate": 2.0583077791989865e-06,
      "loss": 3.0567,
      "step": 221807
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.899860143661499,
      "learning_rate": 2.057829456300064e-06,
      "loss": 2.8496,
      "step": 221808
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.094466209411621,
      "learning_rate": 2.0573511887941984e-06,
      "loss": 3.1045,
      "step": 221809
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.028158187866211,
      "learning_rate": 2.056872976681456e-06,
      "loss": 3.0612,
      "step": 221810
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.050325632095337,
      "learning_rate": 2.056394819962004e-06,
      "loss": 2.8317,
      "step": 221811
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.672182083129883,
      "learning_rate": 2.055916718635875e-06,
      "loss": 3.2083,
      "step": 221812
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.952436685562134,
      "learning_rate": 2.0554386727032025e-06,
      "loss": 3.0365,
      "step": 221813
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.7471604347229004,
      "learning_rate": 2.0549606821640196e-06,
      "loss": 3.162,
      "step": 221814
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5173327922821045,
      "learning_rate": 2.0544827470184265e-06,
      "loss": 3.0987,
      "step": 221815
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7873051166534424,
      "learning_rate": 2.0540048672665567e-06,
      "loss": 3.0574,
      "step": 221816
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.765071392059326,
      "learning_rate": 2.053527042908476e-06,
      "loss": 2.8543,
      "step": 221817
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.897987127304077,
      "learning_rate": 2.053049273944285e-06,
      "loss": 2.8574,
      "step": 221818
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2865278720855713,
      "learning_rate": 2.0525715603740165e-06,
      "loss": 2.7364,
      "step": 221819
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5469141006469727,
      "learning_rate": 2.0520939021978376e-06,
      "loss": 2.9489,
      "step": 221820
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.677455186843872,
      "learning_rate": 2.0516162994157482e-06,
      "loss": 2.8964,
      "step": 221821
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.4164695739746094,
      "learning_rate": 2.0511387520279145e-06,
      "loss": 2.8098,
      "step": 221822
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.993863821029663,
      "learning_rate": 2.050661260034403e-06,
      "loss": 2.821,
      "step": 221823
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.26570463180542,
      "learning_rate": 2.050183823435314e-06,
      "loss": 3.0467,
      "step": 221824
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0411243438720703,
      "learning_rate": 2.049706442230681e-06,
      "loss": 2.9503,
      "step": 221825
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.8072006702423096,
      "learning_rate": 2.04922911642067e-06,
      "loss": 2.7347,
      "step": 221826
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.7146100997924805,
      "learning_rate": 2.048751846005314e-06,
      "loss": 3.3158,
      "step": 221827
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.246089935302734,
      "learning_rate": 2.0482746309847144e-06,
      "loss": 2.8811,
      "step": 221828
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.390810489654541,
      "learning_rate": 2.0477974713589695e-06,
      "loss": 3.019,
      "step": 221829
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.990355968475342,
      "learning_rate": 2.04732036712818e-06,
      "loss": 3.0474,
      "step": 221830
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.03959321975708,
      "learning_rate": 2.0468433182923795e-06,
      "loss": 2.9758,
      "step": 221831
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.648386240005493,
      "learning_rate": 2.046366324851734e-06,
      "loss": 2.8771,
      "step": 221832
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8288767337799072,
      "learning_rate": 2.0458893868062432e-06,
      "loss": 2.8008,
      "step": 221833
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.66619610786438,
      "learning_rate": 2.0454125041561077e-06,
      "loss": 2.9263,
      "step": 221834
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7123525142669678,
      "learning_rate": 2.044935676901294e-06,
      "loss": 3.0551,
      "step": 221835
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2006142139434814,
      "learning_rate": 2.0444589050420014e-06,
      "loss": 2.9328,
      "step": 221836
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4717042446136475,
      "learning_rate": 2.0439821885782635e-06,
      "loss": 2.8147,
      "step": 221837
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9181575775146484,
      "learning_rate": 2.0435055275101474e-06,
      "loss": 2.8026,
      "step": 221838
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6679444313049316,
      "learning_rate": 2.043028921837786e-06,
      "loss": 3.1538,
      "step": 221839
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.635826587677002,
      "learning_rate": 2.0425523715612456e-06,
      "loss": 2.9693,
      "step": 221840
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.258833646774292,
      "learning_rate": 2.0420758766805936e-06,
      "loss": 3.1608,
      "step": 221841
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.741640567779541,
      "learning_rate": 2.0415994371959954e-06,
      "loss": 3.0728,
      "step": 221842
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2293283939361572,
      "learning_rate": 2.041123053107452e-06,
      "loss": 3.0735,
      "step": 221843
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9953982830047607,
      "learning_rate": 2.0406467244150958e-06,
      "loss": 2.9298,
      "step": 221844
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5473597049713135,
      "learning_rate": 2.040170451118994e-06,
      "loss": 3.1021,
      "step": 221845
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.252042055130005,
      "learning_rate": 2.03969423321928e-06,
      "loss": 2.8065,
      "step": 221846
    },
    {
      "epoch": 2.89,
      "grad_norm": 7.164913177490234,
      "learning_rate": 2.039218070715987e-06,
      "loss": 2.8669,
      "step": 221847
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.08339262008667,
      "learning_rate": 2.0387419636092474e-06,
      "loss": 2.8291,
      "step": 221848
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4561197757720947,
      "learning_rate": 2.038265911899095e-06,
      "loss": 2.9121,
      "step": 221849
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6735951900482178,
      "learning_rate": 2.0377899155856968e-06,
      "loss": 2.8782,
      "step": 221850
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2808456420898438,
      "learning_rate": 2.0373139746690858e-06,
      "loss": 3.0014,
      "step": 221851
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0161983966827393,
      "learning_rate": 2.0368380891493286e-06,
      "loss": 3.0072,
      "step": 221852
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8891103267669678,
      "learning_rate": 2.0363622590265914e-06,
      "loss": 3.1558,
      "step": 221853
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1706786155700684,
      "learning_rate": 2.035886484300908e-06,
      "loss": 2.9461,
      "step": 221854
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1154541969299316,
      "learning_rate": 2.0354107649723783e-06,
      "loss": 2.8567,
      "step": 221855
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7802066802978516,
      "learning_rate": 2.0349351010410688e-06,
      "loss": 2.8014,
      "step": 221856
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.67223858833313,
      "learning_rate": 2.0344594925071125e-06,
      "loss": 2.6889,
      "step": 221857
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2473552227020264,
      "learning_rate": 2.0339839393705424e-06,
      "loss": 3.2239,
      "step": 221858
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1307597160339355,
      "learning_rate": 2.033508441631493e-06,
      "loss": 3.1872,
      "step": 221859
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.709606170654297,
      "learning_rate": 2.0330329992900963e-06,
      "loss": 2.9582,
      "step": 221860
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.705064058303833,
      "learning_rate": 2.0325576123463194e-06,
      "loss": 2.7338,
      "step": 221861
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.3211381435394287,
      "learning_rate": 2.032082280800329e-06,
      "loss": 3.1803,
      "step": 221862
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6679558753967285,
      "learning_rate": 2.0316070046521914e-06,
      "loss": 2.976,
      "step": 221863
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.70637583732605,
      "learning_rate": 2.0311317839020068e-06,
      "loss": 2.946,
      "step": 221864
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.270263195037842,
      "learning_rate": 2.030656618549875e-06,
      "loss": 2.9885,
      "step": 221865
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.4819929599761963,
      "learning_rate": 2.030181508595863e-06,
      "loss": 2.5919,
      "step": 221866
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.857828378677368,
      "learning_rate": 2.02970645404007e-06,
      "loss": 2.9148,
      "step": 221867
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.10722017288208,
      "learning_rate": 2.029231454882563e-06,
      "loss": 2.9001,
      "step": 221868
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9811360836029053,
      "learning_rate": 2.0287565111234418e-06,
      "loss": 3.0365,
      "step": 221869
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.751702308654785,
      "learning_rate": 2.0282816227628063e-06,
      "loss": 2.9747,
      "step": 221870
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8082661628723145,
      "learning_rate": 2.0278067898007564e-06,
      "loss": 3.0521,
      "step": 221871
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4158668518066406,
      "learning_rate": 2.027332012237359e-06,
      "loss": 2.7312,
      "step": 221872
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.989076852798462,
      "learning_rate": 2.026857290072681e-06,
      "loss": 2.9261,
      "step": 221873
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.237335681915283,
      "learning_rate": 2.0263826233068546e-06,
      "loss": 2.868,
      "step": 221874
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.948514938354492,
      "learning_rate": 2.0259080119399473e-06,
      "loss": 3.1622,
      "step": 221875
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.839444398880005,
      "learning_rate": 2.0254334559720254e-06,
      "loss": 2.9564,
      "step": 221876
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.956434965133667,
      "learning_rate": 2.024958955403222e-06,
      "loss": 2.875,
      "step": 221877
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9022862911224365,
      "learning_rate": 2.024484510233604e-06,
      "loss": 2.9718,
      "step": 221878
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7426412105560303,
      "learning_rate": 2.024010120463271e-06,
      "loss": 3.2029,
      "step": 221879
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.64034104347229,
      "learning_rate": 2.0235357860922565e-06,
      "loss": 2.7653,
      "step": 221880
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7297489643096924,
      "learning_rate": 2.0230615071207267e-06,
      "loss": 2.8821,
      "step": 221881
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.833961009979248,
      "learning_rate": 2.0225872835487156e-06,
      "loss": 2.9296,
      "step": 221882
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4580698013305664,
      "learning_rate": 2.0221131153763557e-06,
      "loss": 2.9618,
      "step": 221883
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.501478433609009,
      "learning_rate": 2.021639002603681e-06,
      "loss": 2.9063,
      "step": 221884
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.3355815410614014,
      "learning_rate": 2.021164945230824e-06,
      "loss": 2.6416,
      "step": 221885
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7320995330810547,
      "learning_rate": 2.0206909432578523e-06,
      "loss": 2.7477,
      "step": 221886
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.443582057952881,
      "learning_rate": 2.0202169966848647e-06,
      "loss": 3.1097,
      "step": 221887
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5850484371185303,
      "learning_rate": 2.019743105511962e-06,
      "loss": 3.0119,
      "step": 221888
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6276257038116455,
      "learning_rate": 2.0192692697391765e-06,
      "loss": 2.8784,
      "step": 221889
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0239267349243164,
      "learning_rate": 2.0187954893666425e-06,
      "loss": 2.6915,
      "step": 221890
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6287841796875,
      "learning_rate": 2.0183217643944594e-06,
      "loss": 2.9671,
      "step": 221891
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.114243507385254,
      "learning_rate": 2.0178480948226606e-06,
      "loss": 2.8961,
      "step": 221892
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.920646905899048,
      "learning_rate": 2.0173744806514125e-06,
      "loss": 2.7338,
      "step": 221893
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.217078685760498,
      "learning_rate": 2.016900921880715e-06,
      "loss": 2.8406,
      "step": 221894
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.491960287094116,
      "learning_rate": 2.0164274185107355e-06,
      "loss": 2.9974,
      "step": 221895
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6309001445770264,
      "learning_rate": 2.0159539705415064e-06,
      "loss": 2.8513,
      "step": 221896
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.857750654220581,
      "learning_rate": 2.015480577973161e-06,
      "loss": 2.9099,
      "step": 221897
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7884328365325928,
      "learning_rate": 2.0150072408057327e-06,
      "loss": 2.8253,
      "step": 221898
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7344746589660645,
      "learning_rate": 2.014533959039355e-06,
      "loss": 2.9748,
      "step": 221899
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1195452213287354,
      "learning_rate": 2.0140607326740944e-06,
      "loss": 2.9969,
      "step": 221900
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.339203119277954,
      "learning_rate": 2.0135875617100505e-06,
      "loss": 2.8072,
      "step": 221901
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1213109493255615,
      "learning_rate": 2.0131144461472905e-06,
      "loss": 3.0369,
      "step": 221902
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2931578159332275,
      "learning_rate": 2.012641385985947e-06,
      "loss": 2.9,
      "step": 221903
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5023279190063477,
      "learning_rate": 2.0121683812260536e-06,
      "loss": 2.8534,
      "step": 221904
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8725063800811768,
      "learning_rate": 2.0116954318677435e-06,
      "loss": 2.7685,
      "step": 221905
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7271840572357178,
      "learning_rate": 2.0112225379110502e-06,
      "loss": 2.6632,
      "step": 221906
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0345160961151123,
      "learning_rate": 2.0107496993561067e-06,
      "loss": 2.8599,
      "step": 221907
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.990225076675415,
      "learning_rate": 2.0102769162030132e-06,
      "loss": 3.0163,
      "step": 221908
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0503451824188232,
      "learning_rate": 2.0098041884518355e-06,
      "loss": 3.0836,
      "step": 221909
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.918004274368286,
      "learning_rate": 2.0093315161026413e-06,
      "loss": 2.717,
      "step": 221910
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.736276626586914,
      "learning_rate": 2.0088588991555297e-06,
      "loss": 3.2193,
      "step": 221911
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.974998712539673,
      "learning_rate": 2.0083863376106015e-06,
      "loss": 3.1985,
      "step": 221912
    },
    {
      "epoch": 2.89,
      "grad_norm": 6.197139739990234,
      "learning_rate": 2.0079138314679554e-06,
      "loss": 3.0946,
      "step": 221913
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.904764175415039,
      "learning_rate": 2.007441380727659e-06,
      "loss": 3.081,
      "step": 221914
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4453284740448,
      "learning_rate": 2.006968985389812e-06,
      "loss": 2.7873,
      "step": 221915
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.708158016204834,
      "learning_rate": 2.0064966454544473e-06,
      "loss": 2.9574,
      "step": 221916
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8098833560943604,
      "learning_rate": 2.006024360921765e-06,
      "loss": 2.8556,
      "step": 221917
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6874337196350098,
      "learning_rate": 2.0055521317917323e-06,
      "loss": 2.8034,
      "step": 221918
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.836491584777832,
      "learning_rate": 2.005079958064515e-06,
      "loss": 2.7778,
      "step": 221919
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0384371280670166,
      "learning_rate": 2.0046078397401797e-06,
      "loss": 2.8949,
      "step": 221920
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.476376533508301,
      "learning_rate": 2.004135776818827e-06,
      "loss": 2.9222,
      "step": 221921
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.661322832107544,
      "learning_rate": 2.003663769300523e-06,
      "loss": 2.8592,
      "step": 221922
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1741106510162354,
      "learning_rate": 2.0031918171853677e-06,
      "loss": 2.8975,
      "step": 221923
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.122664213180542,
      "learning_rate": 2.002719920473428e-06,
      "loss": 2.6373,
      "step": 221924
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.140017032623291,
      "learning_rate": 2.002248079164803e-06,
      "loss": 2.7581,
      "step": 221925
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.804935932159424,
      "learning_rate": 2.001776293259594e-06,
      "loss": 2.9978,
      "step": 221926
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6459767818450928,
      "learning_rate": 2.0013045627578993e-06,
      "loss": 2.8077,
      "step": 221927
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.985987901687622,
      "learning_rate": 2.0008328876597535e-06,
      "loss": 2.8463,
      "step": 221928
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4352169036865234,
      "learning_rate": 2.000361267965289e-06,
      "loss": 2.8972,
      "step": 221929
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.439640522003174,
      "learning_rate": 1.9998897036746064e-06,
      "loss": 2.9039,
      "step": 221930
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.028820514678955,
      "learning_rate": 1.9994181947877385e-06,
      "loss": 2.824,
      "step": 221931
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7525320053100586,
      "learning_rate": 1.998946741304819e-06,
      "loss": 2.8888,
      "step": 221932
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.18440580368042,
      "learning_rate": 1.998475343225914e-06,
      "loss": 3.1275,
      "step": 221933
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0266411304473877,
      "learning_rate": 1.998004000551123e-06,
      "loss": 3.0713,
      "step": 221934
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.785285234451294,
      "learning_rate": 1.9975327132805474e-06,
      "loss": 2.8252,
      "step": 221935
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4445884227752686,
      "learning_rate": 1.9970614814142193e-06,
      "loss": 2.8628,
      "step": 221936
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.5854074954986572,
      "learning_rate": 1.9965903049522726e-06,
      "loss": 3.0007,
      "step": 221937
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.3842952251434326,
      "learning_rate": 1.996119183894773e-06,
      "loss": 2.8592,
      "step": 221938
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.841625690460205,
      "learning_rate": 1.995648118241855e-06,
      "loss": 3.0244,
      "step": 221939
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.117818593978882,
      "learning_rate": 1.9951771079935506e-06,
      "loss": 3.0559,
      "step": 221940
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8350372314453125,
      "learning_rate": 1.994706153149994e-06,
      "loss": 2.8218,
      "step": 221941
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.5468292236328125,
      "learning_rate": 1.994235253711218e-06,
      "loss": 2.8066,
      "step": 221942
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6847426891326904,
      "learning_rate": 1.9937644096773564e-06,
      "loss": 2.7707,
      "step": 221943
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.688480854034424,
      "learning_rate": 1.993293621048442e-06,
      "loss": 2.6961,
      "step": 221944
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.669088125228882,
      "learning_rate": 1.992822887824641e-06,
      "loss": 2.8359,
      "step": 221945
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.072051763534546,
      "learning_rate": 1.9923522100059875e-06,
      "loss": 3.026,
      "step": 221946
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8672335147857666,
      "learning_rate": 1.991881587592581e-06,
      "loss": 2.8652,
      "step": 221947
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8513002395629883,
      "learning_rate": 1.991411020584488e-06,
      "loss": 2.9548,
      "step": 221948
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2420785427093506,
      "learning_rate": 1.990940508981842e-06,
      "loss": 2.7533,
      "step": 221949
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.135685920715332,
      "learning_rate": 1.9904700527847096e-06,
      "loss": 2.8276,
      "step": 221950
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.642977714538574,
      "learning_rate": 1.9899996519931573e-06,
      "loss": 2.9645,
      "step": 221951
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7118589878082275,
      "learning_rate": 1.989529306607285e-06,
      "loss": 3.0637,
      "step": 221952
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.430692195892334,
      "learning_rate": 1.989059016627226e-06,
      "loss": 2.8728,
      "step": 221953
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.898892402648926,
      "learning_rate": 1.98858878205298e-06,
      "loss": 2.9588,
      "step": 221954
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.009927749633789,
      "learning_rate": 1.9881186028846806e-06,
      "loss": 3.1057,
      "step": 221955
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9798848628997803,
      "learning_rate": 1.987648479122428e-06,
      "loss": 2.775,
      "step": 221956
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9588351249694824,
      "learning_rate": 1.9871784107663213e-06,
      "loss": 2.988,
      "step": 221957
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.108924627304077,
      "learning_rate": 1.9867083978163943e-06,
      "loss": 2.9879,
      "step": 221958
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.026190757751465,
      "learning_rate": 1.9862384402727804e-06,
      "loss": 2.7399,
      "step": 221959
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2616121768951416,
      "learning_rate": 1.9857685381355457e-06,
      "loss": 2.9841,
      "step": 221960
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.345147132873535,
      "learning_rate": 1.9852986914047575e-06,
      "loss": 2.8018,
      "step": 221961
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.618907928466797,
      "learning_rate": 1.9848289000805484e-06,
      "loss": 3.0825,
      "step": 221962
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8729755878448486,
      "learning_rate": 1.9843591641629853e-06,
      "loss": 2.5718,
      "step": 221963
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7085535526275635,
      "learning_rate": 1.9838894836521678e-06,
      "loss": 2.9227,
      "step": 221964
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.6371490955352783,
      "learning_rate": 1.983419858548163e-06,
      "loss": 2.9523,
      "step": 221965
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.16426420211792,
      "learning_rate": 1.9829502888510373e-06,
      "loss": 3.0046,
      "step": 221966
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.852229118347168,
      "learning_rate": 1.9824807745609573e-06,
      "loss": 3.0182,
      "step": 221967
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9849088191986084,
      "learning_rate": 1.9820113156779227e-06,
      "loss": 3.0099,
      "step": 221968
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.603015661239624,
      "learning_rate": 1.981541912202067e-06,
      "loss": 2.9459,
      "step": 221969
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0335896015167236,
      "learning_rate": 1.981072564133457e-06,
      "loss": 3.1423,
      "step": 221970
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.9986414909362793,
      "learning_rate": 1.9806032714722254e-06,
      "loss": 3.0301,
      "step": 221971
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.126934766769409,
      "learning_rate": 1.9801340342183726e-06,
      "loss": 2.8933,
      "step": 221972
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.201093912124634,
      "learning_rate": 1.979664852372098e-06,
      "loss": 2.8878,
      "step": 221973
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.75947904586792,
      "learning_rate": 1.979195725933369e-06,
      "loss": 2.9633,
      "step": 221974
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.782845973968506,
      "learning_rate": 1.978726654902385e-06,
      "loss": 2.968,
      "step": 221975
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.5819387435913086,
      "learning_rate": 1.978257639279146e-06,
      "loss": 2.9631,
      "step": 221976
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.921226978302002,
      "learning_rate": 1.9777886790637854e-06,
      "loss": 2.6734,
      "step": 221977
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6109020709991455,
      "learning_rate": 1.977319774256403e-06,
      "loss": 3.078,
      "step": 221978
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.086679935455322,
      "learning_rate": 1.9768509248570317e-06,
      "loss": 2.8656,
      "step": 221979
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.543205738067627,
      "learning_rate": 1.9763821308658058e-06,
      "loss": 2.8943,
      "step": 221980
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8433737754821777,
      "learning_rate": 1.9759133922827907e-06,
      "loss": 2.8717,
      "step": 221981
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7528793811798096,
      "learning_rate": 1.975444709108087e-06,
      "loss": 2.875,
      "step": 221982
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1102027893066406,
      "learning_rate": 1.9749760813417283e-06,
      "loss": 3.151,
      "step": 221983
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2348215579986572,
      "learning_rate": 1.9745075089839135e-06,
      "loss": 3.0168,
      "step": 221984
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.262895345687866,
      "learning_rate": 1.9740389920346433e-06,
      "loss": 3.1265,
      "step": 221985
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.514120101928711,
      "learning_rate": 1.973570530493984e-06,
      "loss": 3.1466,
      "step": 221986
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9230711460113525,
      "learning_rate": 1.9731021243621026e-06,
      "loss": 2.8939,
      "step": 221987
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.2593841552734375,
      "learning_rate": 1.972633773639065e-06,
      "loss": 2.9373,
      "step": 221988
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.902271270751953,
      "learning_rate": 1.972165478324905e-06,
      "loss": 3.1556,
      "step": 221989
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.169332027435303,
      "learning_rate": 1.971697238419756e-06,
      "loss": 2.9677,
      "step": 221990
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.436220169067383,
      "learning_rate": 1.971229053923684e-06,
      "loss": 3.0655,
      "step": 221991
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7552003860473633,
      "learning_rate": 1.9707609248367893e-06,
      "loss": 2.8348,
      "step": 221992
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6067867279052734,
      "learning_rate": 1.970292851159172e-06,
      "loss": 2.857,
      "step": 221993
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9908676147460938,
      "learning_rate": 1.9698248328908983e-06,
      "loss": 2.9254,
      "step": 221994
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.837261199951172,
      "learning_rate": 1.969356870032035e-06,
      "loss": 2.6883,
      "step": 221995
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.325681686401367,
      "learning_rate": 1.9688889625827154e-06,
      "loss": 2.8459,
      "step": 221996
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1947314739227295,
      "learning_rate": 1.968421110543006e-06,
      "loss": 3.0123,
      "step": 221997
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5880484580993652,
      "learning_rate": 1.9679533139130066e-06,
      "loss": 3.1422,
      "step": 221998
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.5350208282470703,
      "learning_rate": 1.967485572692784e-06,
      "loss": 3.0633,
      "step": 221999
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.536965847015381,
      "learning_rate": 1.967017886882405e-06,
      "loss": 2.9259,
      "step": 222000
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7408173084259033,
      "learning_rate": 1.9665502564820023e-06,
      "loss": 2.8931,
      "step": 222001
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.748347282409668,
      "learning_rate": 1.966082681491643e-06,
      "loss": 2.862,
      "step": 222002
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.966331958770752,
      "learning_rate": 1.9656151619113936e-06,
      "loss": 3.1159,
      "step": 222003
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.343120574951172,
      "learning_rate": 1.9651476977413873e-06,
      "loss": 3.0257,
      "step": 222004
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.901062488555908,
      "learning_rate": 1.9646802889816905e-06,
      "loss": 2.8907,
      "step": 222005
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8487448692321777,
      "learning_rate": 1.96421293563237e-06,
      "loss": 2.9493,
      "step": 222006
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8743975162506104,
      "learning_rate": 1.963745637693526e-06,
      "loss": 2.7412,
      "step": 222007
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.203453540802002,
      "learning_rate": 1.963278395165291e-06,
      "loss": 2.9619,
      "step": 222008
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.935206174850464,
      "learning_rate": 1.962811208047632e-06,
      "loss": 2.8159,
      "step": 222009
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.760138750076294,
      "learning_rate": 1.9623440763407826e-06,
      "loss": 2.9956,
      "step": 222010
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0692086219787598,
      "learning_rate": 1.961877000044709e-06,
      "loss": 2.9624,
      "step": 222011
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.834350109100342,
      "learning_rate": 1.9614099791595783e-06,
      "loss": 3.095,
      "step": 222012
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.395573139190674,
      "learning_rate": 1.9609430136854565e-06,
      "loss": 2.9658,
      "step": 222013
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9868125915527344,
      "learning_rate": 1.960476103622377e-06,
      "loss": 2.9124,
      "step": 222014
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.608949899673462,
      "learning_rate": 1.9600092489705066e-06,
      "loss": 3.1269,
      "step": 222015
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7296478748321533,
      "learning_rate": 1.9595424497298783e-06,
      "loss": 2.9717,
      "step": 222016
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0520660877227783,
      "learning_rate": 1.9590757059005924e-06,
      "loss": 2.9483,
      "step": 222017
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.859649419784546,
      "learning_rate": 1.958609017482782e-06,
      "loss": 2.8901,
      "step": 222018
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8776047229766846,
      "learning_rate": 1.9581423844764467e-06,
      "loss": 2.884,
      "step": 222019
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.312593936920166,
      "learning_rate": 1.9576758068817533e-06,
      "loss": 2.7655,
      "step": 222020
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.807248592376709,
      "learning_rate": 1.957209284698735e-06,
      "loss": 2.9984,
      "step": 222021
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.80238676071167,
      "learning_rate": 1.9567428179274926e-06,
      "loss": 2.6545,
      "step": 222022
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2005183696746826,
      "learning_rate": 1.956276406568125e-06,
      "loss": 2.9966,
      "step": 222023
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.076051712036133,
      "learning_rate": 1.9558100506206988e-06,
      "loss": 2.9259,
      "step": 222024
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0844812393188477,
      "learning_rate": 1.9553437500853473e-06,
      "loss": 3.1542,
      "step": 222025
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.5886006355285645,
      "learning_rate": 1.9548775049621047e-06,
      "loss": 3.0995,
      "step": 222026
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1691348552703857,
      "learning_rate": 1.95441131525107e-06,
      "loss": 2.9181,
      "step": 222027
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5822336673736572,
      "learning_rate": 1.9539451809523433e-06,
      "loss": 3.0717,
      "step": 222028
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.5525264739990234,
      "learning_rate": 1.9534791020659913e-06,
      "loss": 3.1415,
      "step": 222029
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.907846450805664,
      "learning_rate": 1.9530130785921137e-06,
      "loss": 3.088,
      "step": 222030
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.51611065864563,
      "learning_rate": 1.952547110530811e-06,
      "loss": 3.0373,
      "step": 222031
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.71144437789917,
      "learning_rate": 1.9520811978821494e-06,
      "loss": 2.7599,
      "step": 222032
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.44277024269104,
      "learning_rate": 1.951615340646229e-06,
      "loss": 2.799,
      "step": 222033
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.010716438293457,
      "learning_rate": 1.951149538823116e-06,
      "loss": 2.7973,
      "step": 222034
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.948584794998169,
      "learning_rate": 1.9506837924129102e-06,
      "loss": 3.0457,
      "step": 222035
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.6411831378936768,
      "learning_rate": 1.9502181014157126e-06,
      "loss": 2.8475,
      "step": 222036
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.243983268737793,
      "learning_rate": 1.9497524658315888e-06,
      "loss": 3.1938,
      "step": 222037
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8251259326934814,
      "learning_rate": 1.949286885660639e-06,
      "loss": 3.1228,
      "step": 222038
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9056434631347656,
      "learning_rate": 1.94882136090293e-06,
      "loss": 2.9236,
      "step": 222039
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.579378604888916,
      "learning_rate": 1.948355891558562e-06,
      "loss": 3.026,
      "step": 222040
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8460891246795654,
      "learning_rate": 1.9478904776276337e-06,
      "loss": 3.058,
      "step": 222041
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0313737392425537,
      "learning_rate": 1.947425119110213e-06,
      "loss": 2.8899,
      "step": 222042
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.708092212677002,
      "learning_rate": 1.9469598160063994e-06,
      "loss": 3.175,
      "step": 222043
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.802373170852661,
      "learning_rate": 1.9464945683162593e-06,
      "loss": 3.2312,
      "step": 222044
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.5952510833740234,
      "learning_rate": 1.9460293760398926e-06,
      "loss": 2.816,
      "step": 222045
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2803287506103516,
      "learning_rate": 1.9455642391773997e-06,
      "loss": 2.8814,
      "step": 222046
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8108513355255127,
      "learning_rate": 1.9450991577288134e-06,
      "loss": 2.8497,
      "step": 222047
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7703213691711426,
      "learning_rate": 1.9446341316943004e-06,
      "loss": 2.7451,
      "step": 222048
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.662555694580078,
      "learning_rate": 1.944169161073894e-06,
      "loss": 2.8048,
      "step": 222049
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.3065900802612305,
      "learning_rate": 1.9437042458676944e-06,
      "loss": 2.7969,
      "step": 222050
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9540412425994873,
      "learning_rate": 1.9432393860758012e-06,
      "loss": 2.9259,
      "step": 222051
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.777517557144165,
      "learning_rate": 1.9427745816982477e-06,
      "loss": 3.0524,
      "step": 222052
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.982529640197754,
      "learning_rate": 1.9423098327352006e-06,
      "loss": 2.9467,
      "step": 222053
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.923081398010254,
      "learning_rate": 1.9418451391866595e-06,
      "loss": 2.8247,
      "step": 222054
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6757473945617676,
      "learning_rate": 1.941380501052792e-06,
      "loss": 2.994,
      "step": 222055
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.656797170639038,
      "learning_rate": 1.940915918333663e-06,
      "loss": 2.7576,
      "step": 222056
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.149034261703491,
      "learning_rate": 1.940451391029307e-06,
      "loss": 3.0564,
      "step": 222057
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.3808088302612305,
      "learning_rate": 1.9399869191398576e-06,
      "loss": 3.1689,
      "step": 222058
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.677170991897583,
      "learning_rate": 1.9395225026653803e-06,
      "loss": 2.8822,
      "step": 222059
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.46958327293396,
      "learning_rate": 1.9390581416059757e-06,
      "loss": 2.5979,
      "step": 222060
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4833474159240723,
      "learning_rate": 1.938593835961744e-06,
      "loss": 2.8158,
      "step": 222061
    },
    {
      "epoch": 2.89,
      "grad_norm": 5.943853855133057,
      "learning_rate": 1.9381295857327506e-06,
      "loss": 2.808,
      "step": 222062
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.827512502670288,
      "learning_rate": 1.937665390919063e-06,
      "loss": 2.8028,
      "step": 222063
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5037999153137207,
      "learning_rate": 1.9372012515208146e-06,
      "loss": 2.8487,
      "step": 222064
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.395374059677124,
      "learning_rate": 1.936737167538038e-06,
      "loss": 2.9694,
      "step": 222065
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.042922019958496,
      "learning_rate": 1.9362731389708676e-06,
      "loss": 2.8765,
      "step": 222066
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0314974784851074,
      "learning_rate": 1.9358091658193687e-06,
      "loss": 2.7681,
      "step": 222067
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.877939224243164,
      "learning_rate": 1.9353452480836417e-06,
      "loss": 2.9735,
      "step": 222068
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1156368255615234,
      "learning_rate": 1.9348813857637535e-06,
      "loss": 3.3086,
      "step": 222069
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5653088092803955,
      "learning_rate": 1.9344175788598038e-06,
      "loss": 2.9521,
      "step": 222070
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.069732904434204,
      "learning_rate": 1.9339538273718593e-06,
      "loss": 2.8948,
      "step": 222071
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.947396993637085,
      "learning_rate": 1.9334901313000196e-06,
      "loss": 2.9199,
      "step": 222072
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6613755226135254,
      "learning_rate": 1.9330264906443516e-06,
      "loss": 2.8678,
      "step": 222073
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.044461250305176,
      "learning_rate": 1.9325629054049884e-06,
      "loss": 2.8374,
      "step": 222074
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8702054023742676,
      "learning_rate": 1.932099375581997e-06,
      "loss": 2.7982,
      "step": 222075
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.566387891769409,
      "learning_rate": 1.9316359011754434e-06,
      "loss": 2.8339,
      "step": 222076
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.068767547607422,
      "learning_rate": 1.931172482185428e-06,
      "loss": 2.9557,
      "step": 222077
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0127623081207275,
      "learning_rate": 1.930709118612017e-06,
      "loss": 2.8642,
      "step": 222078
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4796602725982666,
      "learning_rate": 1.930245810455311e-06,
      "loss": 3.0315,
      "step": 222079
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.096285581588745,
      "learning_rate": 1.929782557715409e-06,
      "loss": 2.8669,
      "step": 222080
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2678794860839844,
      "learning_rate": 1.9293193603924115e-06,
      "loss": 2.7645,
      "step": 222081
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7744922637939453,
      "learning_rate": 1.928856218486352e-06,
      "loss": 2.823,
      "step": 222082
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2294211387634277,
      "learning_rate": 1.9283931319973633e-06,
      "loss": 2.6406,
      "step": 222083
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.407621145248413,
      "learning_rate": 1.927930100925479e-06,
      "loss": 2.8662,
      "step": 222084
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.280181646347046,
      "learning_rate": 1.927467125270832e-06,
      "loss": 3.0417,
      "step": 222085
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.310039758682251,
      "learning_rate": 1.927004205033522e-06,
      "loss": 2.9508,
      "step": 222086
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.461935520172119,
      "learning_rate": 1.9265413402135833e-06,
      "loss": 2.9864,
      "step": 222087
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.836266279220581,
      "learning_rate": 1.9260785308111483e-06,
      "loss": 2.9562,
      "step": 222088
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.8824379444122314,
      "learning_rate": 1.9256157768262504e-06,
      "loss": 3.1865,
      "step": 222089
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.985502243041992,
      "learning_rate": 1.925153078259023e-06,
      "loss": 3.1394,
      "step": 222090
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.210843324661255,
      "learning_rate": 1.924690435109533e-06,
      "loss": 2.9509,
      "step": 222091
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8568625450134277,
      "learning_rate": 1.9242278473778792e-06,
      "loss": 3.1719,
      "step": 222092
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.202507734298706,
      "learning_rate": 1.923765315064163e-06,
      "loss": 2.8628,
      "step": 222093
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.6241893768310547,
      "learning_rate": 1.9233028381683836e-06,
      "loss": 2.9643,
      "step": 222094
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.034488201141357,
      "learning_rate": 1.9228404166907407e-06,
      "loss": 3.1645,
      "step": 222095
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.340115547180176,
      "learning_rate": 1.9223780506312346e-06,
      "loss": 2.7714,
      "step": 222096
    },
    {
      "epoch": 2.89,
      "grad_norm": 6.390406131744385,
      "learning_rate": 1.9219157399899988e-06,
      "loss": 2.927,
      "step": 222097
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.34578800201416,
      "learning_rate": 1.9214534847670993e-06,
      "loss": 2.7951,
      "step": 222098
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8002376556396484,
      "learning_rate": 1.9209912849626695e-06,
      "loss": 2.8756,
      "step": 222099
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.684257984161377,
      "learning_rate": 1.92052914057671e-06,
      "loss": 3.0338,
      "step": 222100
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.6223032474517822,
      "learning_rate": 1.920067051609353e-06,
      "loss": 2.7767,
      "step": 222101
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.766584634780884,
      "learning_rate": 1.9196050180606658e-06,
      "loss": 2.9934,
      "step": 222102
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0733566284179688,
      "learning_rate": 1.9191430399307816e-06,
      "loss": 3.034,
      "step": 222103
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9174442291259766,
      "learning_rate": 1.918681117219767e-06,
      "loss": 2.7467,
      "step": 222104
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0977089405059814,
      "learning_rate": 1.9182192499276882e-06,
      "loss": 2.7473,
      "step": 222105
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.249821901321411,
      "learning_rate": 1.9177574380546125e-06,
      "loss": 2.8292,
      "step": 222106
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.722867727279663,
      "learning_rate": 1.9172956816006723e-06,
      "loss": 2.9336,
      "step": 222107
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.815110683441162,
      "learning_rate": 1.916833980565935e-06,
      "loss": 2.8293,
      "step": 222108
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.906496047973633,
      "learning_rate": 1.916372334950467e-06,
      "loss": 3.0179,
      "step": 222109
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9001991748809814,
      "learning_rate": 1.9159107447544007e-06,
      "loss": 2.8561,
      "step": 222110
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.905681610107422,
      "learning_rate": 1.915449209977771e-06,
      "loss": 2.959,
      "step": 222111
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.112560749053955,
      "learning_rate": 1.914987730620676e-06,
      "loss": 2.8488,
      "step": 222112
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9116735458374023,
      "learning_rate": 1.9145263066832506e-06,
      "loss": 2.7378,
      "step": 222113
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8192226886749268,
      "learning_rate": 1.914064938165494e-06,
      "loss": 2.8268,
      "step": 222114
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.928863525390625,
      "learning_rate": 1.913603625067572e-06,
      "loss": 2.8893,
      "step": 222115
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6689469814300537,
      "learning_rate": 1.913142367389553e-06,
      "loss": 2.9682,
      "step": 222116
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.115365982055664,
      "learning_rate": 1.912681165131469e-06,
      "loss": 3.0318,
      "step": 222117
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6282408237457275,
      "learning_rate": 1.9122200182934534e-06,
      "loss": 2.65,
      "step": 222118
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.049160003662109,
      "learning_rate": 1.9117589268756063e-06,
      "loss": 2.9303,
      "step": 222119
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7710726261138916,
      "learning_rate": 1.9112978908779607e-06,
      "loss": 2.8295,
      "step": 222120
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.827524185180664,
      "learning_rate": 1.9108369103006504e-06,
      "loss": 2.843,
      "step": 222121
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1401848793029785,
      "learning_rate": 1.910375985143742e-06,
      "loss": 2.8485,
      "step": 222122
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.947812557220459,
      "learning_rate": 1.9099151154073343e-06,
      "loss": 2.7681,
      "step": 222123
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2358808517456055,
      "learning_rate": 1.909454301091462e-06,
      "loss": 3.1919,
      "step": 222124
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8230724334716797,
      "learning_rate": 1.908993542196291e-06,
      "loss": 3.2598,
      "step": 222125
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9880971908569336,
      "learning_rate": 1.908532838721821e-06,
      "loss": 2.9509,
      "step": 222126
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.5925405025482178,
      "learning_rate": 1.9080721906682193e-06,
      "loss": 2.9982,
      "step": 222127
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8939876556396484,
      "learning_rate": 1.9076115980355187e-06,
      "loss": 2.8798,
      "step": 222128
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9147677421569824,
      "learning_rate": 1.9071510608238194e-06,
      "loss": 2.9769,
      "step": 222129
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7549214363098145,
      "learning_rate": 1.9066905790331877e-06,
      "loss": 2.7073,
      "step": 222130
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1290218830108643,
      "learning_rate": 1.9062301526637568e-06,
      "loss": 2.788,
      "step": 222131
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.028998613357544,
      "learning_rate": 1.9057697817155603e-06,
      "loss": 2.8614,
      "step": 222132
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.724087953567505,
      "learning_rate": 1.9053094661887315e-06,
      "loss": 2.8234,
      "step": 222133
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1728272438049316,
      "learning_rate": 1.9048492060833365e-06,
      "loss": 3.0322,
      "step": 222134
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.988128662109375,
      "learning_rate": 1.9043890013994755e-06,
      "loss": 2.9708,
      "step": 222135
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.539003610610962,
      "learning_rate": 1.9039288521371488e-06,
      "loss": 2.9456,
      "step": 222136
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.029996395111084,
      "learning_rate": 1.9034687582965558e-06,
      "loss": 3.1076,
      "step": 222137
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.687307834625244,
      "learning_rate": 1.90300871987773e-06,
      "loss": 2.9876,
      "step": 222138
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4210355281829834,
      "learning_rate": 1.9025487368807713e-06,
      "loss": 3.1501,
      "step": 222139
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.3697900772094727,
      "learning_rate": 1.902088809305713e-06,
      "loss": 3.3269,
      "step": 222140
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.2126970291137695,
      "learning_rate": 1.901628937152755e-06,
      "loss": 2.7885,
      "step": 222141
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6028130054473877,
      "learning_rate": 1.9011691204218638e-06,
      "loss": 3.0939,
      "step": 222142
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.832756519317627,
      "learning_rate": 1.9007093591131727e-06,
      "loss": 3.0399,
      "step": 222143
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0213632583618164,
      "learning_rate": 1.9002496532267487e-06,
      "loss": 2.8725,
      "step": 222144
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.3158884048461914,
      "learning_rate": 1.8997900027627243e-06,
      "loss": 2.8328,
      "step": 222145
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7170627117156982,
      "learning_rate": 1.8993304077211336e-06,
      "loss": 2.6815,
      "step": 222146
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9894378185272217,
      "learning_rate": 1.8988708681021426e-06,
      "loss": 2.8387,
      "step": 222147
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.3765084743499756,
      "learning_rate": 1.898411383905718e-06,
      "loss": 2.8766,
      "step": 222148
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7051072120666504,
      "learning_rate": 1.8979519551320266e-06,
      "loss": 2.9046,
      "step": 222149
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.861480474472046,
      "learning_rate": 1.8974925817811348e-06,
      "loss": 3.0089,
      "step": 222150
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9728198051452637,
      "learning_rate": 1.8970332638531094e-06,
      "loss": 3.0804,
      "step": 222151
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5652801990509033,
      "learning_rate": 1.8965740013480835e-06,
      "loss": 3.0608,
      "step": 222152
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.6528537273406982,
      "learning_rate": 1.8961147942660903e-06,
      "loss": 2.8915,
      "step": 222153
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9225282669067383,
      "learning_rate": 1.89565564260723e-06,
      "loss": 2.9275,
      "step": 222154
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6084375381469727,
      "learning_rate": 1.8951965463716358e-06,
      "loss": 2.8841,
      "step": 222155
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6209821701049805,
      "learning_rate": 1.8947375055593073e-06,
      "loss": 2.9926,
      "step": 222156
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.469850540161133,
      "learning_rate": 1.8942785201703782e-06,
      "loss": 3.232,
      "step": 222157
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0125694274902344,
      "learning_rate": 1.893819590204948e-06,
      "loss": 2.8216,
      "step": 222158
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.6028642654418945,
      "learning_rate": 1.8933607156630837e-06,
      "loss": 3.1057,
      "step": 222159
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9682157039642334,
      "learning_rate": 1.8929018965448516e-06,
      "loss": 3.0822,
      "step": 222160
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.88301682472229,
      "learning_rate": 1.8924431328503853e-06,
      "loss": 2.781,
      "step": 222161
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.7691569328308105,
      "learning_rate": 1.8919844245797177e-06,
      "loss": 3.1231,
      "step": 222162
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2694473266601562,
      "learning_rate": 1.891525771732949e-06,
      "loss": 3.0641,
      "step": 222163
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0744423866271973,
      "learning_rate": 1.8910671743101791e-06,
      "loss": 2.931,
      "step": 222164
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.809830665588379,
      "learning_rate": 1.8906086323115077e-06,
      "loss": 2.9013,
      "step": 222165
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.4040517807006836,
      "learning_rate": 1.8901501457369682e-06,
      "loss": 2.8424,
      "step": 222166
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.932135581970215,
      "learning_rate": 1.889691714586694e-06,
      "loss": 2.8641,
      "step": 222167
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.814042568206787,
      "learning_rate": 1.8892333388607518e-06,
      "loss": 2.9443,
      "step": 222168
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.690863847732544,
      "learning_rate": 1.888775018559241e-06,
      "loss": 2.7868,
      "step": 222169
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8690109252929688,
      "learning_rate": 1.8883167536822286e-06,
      "loss": 2.8548,
      "step": 222170
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8815248012542725,
      "learning_rate": 1.8878585442298144e-06,
      "loss": 2.9753,
      "step": 222171
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.421154737472534,
      "learning_rate": 1.887400390202065e-06,
      "loss": 3.0348,
      "step": 222172
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.281893014907837,
      "learning_rate": 1.8869422915990474e-06,
      "loss": 2.778,
      "step": 222173
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.577442169189453,
      "learning_rate": 1.8864842484208942e-06,
      "loss": 2.8531,
      "step": 222174
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.740421772003174,
      "learning_rate": 1.8860262606676724e-06,
      "loss": 2.9733,
      "step": 222175
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.940980911254883,
      "learning_rate": 1.8855683283394818e-06,
      "loss": 2.9601,
      "step": 222176
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.712082862854004,
      "learning_rate": 1.8851104514363891e-06,
      "loss": 3.0321,
      "step": 222177
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4412245750427246,
      "learning_rate": 1.8846526299584607e-06,
      "loss": 2.9666,
      "step": 222178
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0718398094177246,
      "learning_rate": 1.8841948639058302e-06,
      "loss": 2.851,
      "step": 222179
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.6161372661590576,
      "learning_rate": 1.8837371532785305e-06,
      "loss": 3.1776,
      "step": 222180
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5345489978790283,
      "learning_rate": 1.883279498076662e-06,
      "loss": 2.8339,
      "step": 222181
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.163294792175293,
      "learning_rate": 1.8828218983003573e-06,
      "loss": 2.9284,
      "step": 222182
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.742403507232666,
      "learning_rate": 1.8823643539496503e-06,
      "loss": 2.8584,
      "step": 222183
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0654592514038086,
      "learning_rate": 1.8819068650246071e-06,
      "loss": 2.596,
      "step": 222184
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9603278636932373,
      "learning_rate": 1.8814494315253613e-06,
      "loss": 2.9811,
      "step": 222185
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1129109859466553,
      "learning_rate": 1.880992053452013e-06,
      "loss": 3.0447,
      "step": 222186
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2134740352630615,
      "learning_rate": 1.8805347308045615e-06,
      "loss": 2.9983,
      "step": 222187
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6601510047912598,
      "learning_rate": 1.8800774635832073e-06,
      "loss": 2.9758,
      "step": 222188
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.909362316131592,
      "learning_rate": 1.87962025178795e-06,
      "loss": 2.8593,
      "step": 222189
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.494629383087158,
      "learning_rate": 1.8791630954188896e-06,
      "loss": 2.8642,
      "step": 222190
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4980618953704834,
      "learning_rate": 1.878705994476093e-06,
      "loss": 2.8691,
      "step": 222191
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6632893085479736,
      "learning_rate": 1.8782489489597264e-06,
      "loss": 2.7663,
      "step": 222192
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.63505220413208,
      "learning_rate": 1.8777919588697899e-06,
      "loss": 2.9076,
      "step": 222193
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.825186014175415,
      "learning_rate": 1.8773350242064166e-06,
      "loss": 2.8509,
      "step": 222194
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2342100143432617,
      "learning_rate": 1.8768781449696402e-06,
      "loss": 3.012,
      "step": 222195
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7614877223968506,
      "learning_rate": 1.8764213211596268e-06,
      "loss": 2.9684,
      "step": 222196
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9765985012054443,
      "learning_rate": 1.8759645527763767e-06,
      "loss": 2.9097,
      "step": 222197
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7536709308624268,
      "learning_rate": 1.875507839820023e-06,
      "loss": 2.7517,
      "step": 222198
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.493208885192871,
      "learning_rate": 1.8750511822906322e-06,
      "loss": 3.0282,
      "step": 222199
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8012354373931885,
      "learning_rate": 1.8745945801883378e-06,
      "loss": 2.5963,
      "step": 222200
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8701090812683105,
      "learning_rate": 1.8741380335131395e-06,
      "loss": 2.6625,
      "step": 222201
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.972256898880005,
      "learning_rate": 1.873681542265204e-06,
      "loss": 3.0136,
      "step": 222202
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.759122133255005,
      "learning_rate": 1.8732251064445313e-06,
      "loss": 3.0045,
      "step": 222203
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.874281167984009,
      "learning_rate": 1.872768726051288e-06,
      "loss": 2.8699,
      "step": 222204
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.6518983840942383,
      "learning_rate": 1.8723124010855074e-06,
      "loss": 2.9294,
      "step": 222205
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5057497024536133,
      "learning_rate": 1.871856131547289e-06,
      "loss": 2.9547,
      "step": 222206
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.632626533508301,
      "learning_rate": 1.8713999174367334e-06,
      "loss": 3.0208,
      "step": 222207
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8024580478668213,
      "learning_rate": 1.8709437587539066e-06,
      "loss": 3.1536,
      "step": 222208
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1319127082824707,
      "learning_rate": 1.870487655498909e-06,
      "loss": 3.0354,
      "step": 222209
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.892305612564087,
      "learning_rate": 1.870031607671807e-06,
      "loss": 2.841,
      "step": 222210
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4540774822235107,
      "learning_rate": 1.8695756152727004e-06,
      "loss": 2.9165,
      "step": 222211
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1153974533081055,
      "learning_rate": 1.869119678301656e-06,
      "loss": 3.0692,
      "step": 222212
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.045832395553589,
      "learning_rate": 1.8686637967587736e-06,
      "loss": 2.9924,
      "step": 222213
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.271358966827393,
      "learning_rate": 1.8682079706441534e-06,
      "loss": 2.933,
      "step": 222214
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.3294320106506348,
      "learning_rate": 1.8677521999578283e-06,
      "loss": 2.8731,
      "step": 222215
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.869159698486328,
      "learning_rate": 1.8672964846999318e-06,
      "loss": 2.8763,
      "step": 222216
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2385077476501465,
      "learning_rate": 1.8668408248705302e-06,
      "loss": 2.7455,
      "step": 222217
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.652482509613037,
      "learning_rate": 1.8663852204696906e-06,
      "loss": 2.8675,
      "step": 222218
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.895202875137329,
      "learning_rate": 1.8659296714975458e-06,
      "loss": 3.0334,
      "step": 222219
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.251875877380371,
      "learning_rate": 1.8654741779541627e-06,
      "loss": 2.7677,
      "step": 222220
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8416049480438232,
      "learning_rate": 1.8650187398395744e-06,
      "loss": 2.9294,
      "step": 222221
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0501139163970947,
      "learning_rate": 1.8645633571539475e-06,
      "loss": 3.1761,
      "step": 222222
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2165658473968506,
      "learning_rate": 1.8641080298973154e-06,
      "loss": 2.7316,
      "step": 222223
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7498276233673096,
      "learning_rate": 1.8636527580697447e-06,
      "loss": 2.7698,
      "step": 222224
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.6697700023651123,
      "learning_rate": 1.8631975416713686e-06,
      "loss": 3.1409,
      "step": 222225
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0516388416290283,
      "learning_rate": 1.8627423807022868e-06,
      "loss": 2.9777,
      "step": 222226
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.165598154067993,
      "learning_rate": 1.8622872751624995e-06,
      "loss": 2.8929,
      "step": 222227
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9791042804718018,
      "learning_rate": 1.8618322250521734e-06,
      "loss": 2.7335,
      "step": 222228
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9227073192596436,
      "learning_rate": 1.8613772303713414e-06,
      "loss": 2.8338,
      "step": 222229
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6812868118286133,
      "learning_rate": 1.8609222911201039e-06,
      "loss": 2.9291,
      "step": 222230
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.514045476913452,
      "learning_rate": 1.860467407298527e-06,
      "loss": 2.9415,
      "step": 222231
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9137423038482666,
      "learning_rate": 1.8600125789067776e-06,
      "loss": 2.8663,
      "step": 222232
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.329681396484375,
      "learning_rate": 1.8595578059448556e-06,
      "loss": 2.9999,
      "step": 222233
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.3496816158294678,
      "learning_rate": 1.8591030884128611e-06,
      "loss": 3.1482,
      "step": 222234
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.7036526203155518,
      "learning_rate": 1.8586484263108602e-06,
      "loss": 2.9146,
      "step": 222235
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7236313819885254,
      "learning_rate": 1.85819381963902e-06,
      "loss": 2.9836,
      "step": 222236
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6420910358428955,
      "learning_rate": 1.857739268397307e-06,
      "loss": 2.8492,
      "step": 222237
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.898385524749756,
      "learning_rate": 1.8572847725859209e-06,
      "loss": 2.9846,
      "step": 222238
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9073078632354736,
      "learning_rate": 1.8568303322048616e-06,
      "loss": 3.1946,
      "step": 222239
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.308603286743164,
      "learning_rate": 1.856375947254296e-06,
      "loss": 2.7766,
      "step": 222240
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9292774200439453,
      "learning_rate": 1.8559216177341906e-06,
      "loss": 2.7545,
      "step": 222241
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.229998826980591,
      "learning_rate": 1.8554673436447455e-06,
      "loss": 3.2117,
      "step": 222242
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2443461418151855,
      "learning_rate": 1.85501312498596e-06,
      "loss": 3.2234,
      "step": 222243
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7993929386138916,
      "learning_rate": 1.8545589617579682e-06,
      "loss": 2.9093,
      "step": 222244
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8217933177948,
      "learning_rate": 1.8541048539608694e-06,
      "loss": 2.8206,
      "step": 222245
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.802821159362793,
      "learning_rate": 1.853650801594697e-06,
      "loss": 2.7675,
      "step": 222246
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.6073992252349854,
      "learning_rate": 1.8531968046595513e-06,
      "loss": 2.9639,
      "step": 222247
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2305898666381836,
      "learning_rate": 1.8527428631555318e-06,
      "loss": 2.9142,
      "step": 222248
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9771010875701904,
      "learning_rate": 1.8522889770827053e-06,
      "loss": 3.0173,
      "step": 222249
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8669931888580322,
      "learning_rate": 1.851835146441205e-06,
      "loss": 3.0521,
      "step": 222250
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7890772819519043,
      "learning_rate": 1.851381371231031e-06,
      "loss": 3.0027,
      "step": 222251
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1645264625549316,
      "learning_rate": 1.8509276514523497e-06,
      "loss": 3.1472,
      "step": 222252
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.041245698928833,
      "learning_rate": 1.8504739871051944e-06,
      "loss": 2.9861,
      "step": 222253
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.12088680267334,
      "learning_rate": 1.850020378189665e-06,
      "loss": 2.88,
      "step": 222254
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.52229905128479,
      "learning_rate": 1.849566824705828e-06,
      "loss": 2.8176,
      "step": 222255
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6608822345733643,
      "learning_rate": 1.849113326653784e-06,
      "loss": 2.8653,
      "step": 222256
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.175090789794922,
      "learning_rate": 1.8486598840336653e-06,
      "loss": 2.7438,
      "step": 222257
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6721577644348145,
      "learning_rate": 1.848206496845439e-06,
      "loss": 2.861,
      "step": 222258
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7729461193084717,
      "learning_rate": 1.8477531650893052e-06,
      "loss": 2.7412,
      "step": 222259
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.6444034576416016,
      "learning_rate": 1.8472998887652967e-06,
      "loss": 2.897,
      "step": 222260
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0502219200134277,
      "learning_rate": 1.8468466678734805e-06,
      "loss": 2.8854,
      "step": 222261
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4171833992004395,
      "learning_rate": 1.8463935024139898e-06,
      "loss": 2.9465,
      "step": 222262
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.922287940979004,
      "learning_rate": 1.845940392386891e-06,
      "loss": 3.0861,
      "step": 222263
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.312652111053467,
      "learning_rate": 1.8454873377922507e-06,
      "loss": 3.0316,
      "step": 222264
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.240293502807617,
      "learning_rate": 1.845034338630136e-06,
      "loss": 3.1197,
      "step": 222265
    },
    {
      "epoch": 2.89,
      "grad_norm": 4.524205207824707,
      "learning_rate": 1.8445813949006793e-06,
      "loss": 2.868,
      "step": 222266
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0033771991729736,
      "learning_rate": 1.844128506603948e-06,
      "loss": 2.8199,
      "step": 222267
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.883230209350586,
      "learning_rate": 1.8436756737400082e-06,
      "loss": 2.963,
      "step": 222268
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.738445997238159,
      "learning_rate": 1.8432228963089602e-06,
      "loss": 2.9491,
      "step": 222269
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8321821689605713,
      "learning_rate": 1.8427701743109036e-06,
      "loss": 2.9304,
      "step": 222270
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8456008434295654,
      "learning_rate": 1.8423175077458718e-06,
      "loss": 2.9699,
      "step": 222271
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2757210731506348,
      "learning_rate": 1.8418648966139981e-06,
      "loss": 2.8307,
      "step": 222272
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.3117966651916504,
      "learning_rate": 1.8414123409153493e-06,
      "loss": 2.9091,
      "step": 222273
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.109757661819458,
      "learning_rate": 1.8409598406499915e-06,
      "loss": 2.9609,
      "step": 222274
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7023823261260986,
      "learning_rate": 1.8405073958180916e-06,
      "loss": 2.9908,
      "step": 222275
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.738948106765747,
      "learning_rate": 1.8400550064196163e-06,
      "loss": 2.9831,
      "step": 222276
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.715557098388672,
      "learning_rate": 1.8396026724546986e-06,
      "loss": 2.9097,
      "step": 222277
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.03625750541687,
      "learning_rate": 1.8391503939234388e-06,
      "loss": 2.7433,
      "step": 222278
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8976433277130127,
      "learning_rate": 1.838698170825903e-06,
      "loss": 2.9088,
      "step": 222279
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.6936910152435303,
      "learning_rate": 1.8382460031621915e-06,
      "loss": 2.732,
      "step": 222280
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.6306748390197754,
      "learning_rate": 1.8377938909323707e-06,
      "loss": 3.0874,
      "step": 222281
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8422141075134277,
      "learning_rate": 1.837341834136541e-06,
      "loss": 2.8603,
      "step": 222282
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.737187147140503,
      "learning_rate": 1.8368898327747682e-06,
      "loss": 2.8956,
      "step": 222283
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.656343698501587,
      "learning_rate": 1.836437886847153e-06,
      "loss": 3.0005,
      "step": 222284
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0418739318847656,
      "learning_rate": 1.8359859963537615e-06,
      "loss": 2.8226,
      "step": 222285
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.197040319442749,
      "learning_rate": 1.835534161294694e-06,
      "loss": 2.6932,
      "step": 222286
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1764347553253174,
      "learning_rate": 1.83508238167005e-06,
      "loss": 2.8922,
      "step": 222287
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.313426971435547,
      "learning_rate": 1.8346306574798631e-06,
      "loss": 2.7208,
      "step": 222288
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.706089735031128,
      "learning_rate": 1.8341789887242663e-06,
      "loss": 2.891,
      "step": 222289
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8699278831481934,
      "learning_rate": 1.8337273754033266e-06,
      "loss": 2.891,
      "step": 222290
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8709466457366943,
      "learning_rate": 1.8332758175171104e-06,
      "loss": 2.8749,
      "step": 222291
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9510903358459473,
      "learning_rate": 1.8328243150657173e-06,
      "loss": 3.1001,
      "step": 222292
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8844053745269775,
      "learning_rate": 1.8323728680492478e-06,
      "loss": 2.9059,
      "step": 222293
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.77083420753479,
      "learning_rate": 1.8319214764677347e-06,
      "loss": 2.9499,
      "step": 222294
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.10371470451355,
      "learning_rate": 1.8314701403213116e-06,
      "loss": 3.0913,
      "step": 222295
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.066967010498047,
      "learning_rate": 1.831018859610045e-06,
      "loss": 2.9451,
      "step": 222296
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.707113742828369,
      "learning_rate": 1.8305676343340346e-06,
      "loss": 2.8026,
      "step": 222297
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9482219219207764,
      "learning_rate": 1.8301164644933474e-06,
      "loss": 3.2092,
      "step": 222298
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.04899525642395,
      "learning_rate": 1.829665350088083e-06,
      "loss": 2.9653,
      "step": 222299
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.047136068344116,
      "learning_rate": 1.829214291118275e-06,
      "loss": 2.8813,
      "step": 222300
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.256615400314331,
      "learning_rate": 1.8287632875840564e-06,
      "loss": 2.8668,
      "step": 222301
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9359092712402344,
      "learning_rate": 1.8283123394854938e-06,
      "loss": 2.9951,
      "step": 222302
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.704221487045288,
      "learning_rate": 1.8278614468227204e-06,
      "loss": 2.7382,
      "step": 222303
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.593721866607666,
      "learning_rate": 1.8274106095957364e-06,
      "loss": 2.9006,
      "step": 222304
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.089466094970703,
      "learning_rate": 1.8269598278046748e-06,
      "loss": 2.724,
      "step": 222305
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.912142038345337,
      "learning_rate": 1.8265091014496024e-06,
      "loss": 3.2541,
      "step": 222306
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9421775341033936,
      "learning_rate": 1.826058430530619e-06,
      "loss": 3.0717,
      "step": 222307
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9540302753448486,
      "learning_rate": 1.8256078150477915e-06,
      "loss": 3.0583,
      "step": 222308
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8548312187194824,
      "learning_rate": 1.8251572550012195e-06,
      "loss": 3.0302,
      "step": 222309
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.045750379562378,
      "learning_rate": 1.8247067503909697e-06,
      "loss": 2.9934,
      "step": 222310
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9295287132263184,
      "learning_rate": 1.8242563012171418e-06,
      "loss": 3.0896,
      "step": 222311
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.9667131900787354,
      "learning_rate": 1.823805907479836e-06,
      "loss": 3.0376,
      "step": 222312
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0034472942352295,
      "learning_rate": 1.8233555691790857e-06,
      "loss": 2.742,
      "step": 222313
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.681887626647949,
      "learning_rate": 1.8229052863149907e-06,
      "loss": 2.7721,
      "step": 222314
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.2620160579681396,
      "learning_rate": 1.8224550588876841e-06,
      "loss": 2.7809,
      "step": 222315
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.936642646789551,
      "learning_rate": 1.8220048868971659e-06,
      "loss": 2.882,
      "step": 222316
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.3694937229156494,
      "learning_rate": 1.8215547703436028e-06,
      "loss": 2.8826,
      "step": 222317
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.7908856868743896,
      "learning_rate": 1.8211047092270282e-06,
      "loss": 2.9098,
      "step": 222318
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.966352701187134,
      "learning_rate": 1.8206547035475417e-06,
      "loss": 2.8562,
      "step": 222319
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.653390884399414,
      "learning_rate": 1.82020475330521e-06,
      "loss": 2.7852,
      "step": 222320
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.959611177444458,
      "learning_rate": 1.8197548585001332e-06,
      "loss": 2.7739,
      "step": 222321
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.9292168617248535,
      "learning_rate": 1.819305019132411e-06,
      "loss": 2.9562,
      "step": 222322
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.7825634479522705,
      "learning_rate": 1.81885523520211e-06,
      "loss": 2.9031,
      "step": 222323
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.5002388954162598,
      "learning_rate": 1.8184055067092972e-06,
      "loss": 3.0943,
      "step": 222324
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.657808542251587,
      "learning_rate": 1.8179558336540723e-06,
      "loss": 2.6462,
      "step": 222325
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.547074317932129,
      "learning_rate": 1.8175062160365017e-06,
      "loss": 3.0196,
      "step": 222326
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.4362664222717285,
      "learning_rate": 1.817056653856719e-06,
      "loss": 2.9753,
      "step": 222327
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.6233999729156494,
      "learning_rate": 1.816607147114757e-06,
      "loss": 2.9329,
      "step": 222328
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.059920072555542,
      "learning_rate": 1.816157695810716e-06,
      "loss": 2.676,
      "step": 222329
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.5193030834198,
      "learning_rate": 1.8157082999446958e-06,
      "loss": 3.0516,
      "step": 222330
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.895298480987549,
      "learning_rate": 1.8152589595167633e-06,
      "loss": 2.8567,
      "step": 222331
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0268847942352295,
      "learning_rate": 1.8148096745269846e-06,
      "loss": 3.0466,
      "step": 222332
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8792200088500977,
      "learning_rate": 1.8143604449754601e-06,
      "loss": 2.9405,
      "step": 222333
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.1274056434631348,
      "learning_rate": 1.8139112708622894e-06,
      "loss": 2.8751,
      "step": 222334
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.753746271133423,
      "learning_rate": 1.8134621521875392e-06,
      "loss": 2.8824,
      "step": 222335
    },
    {
      "epoch": 2.89,
      "grad_norm": 2.8823602199554443,
      "learning_rate": 1.8130130889512761e-06,
      "loss": 2.9718,
      "step": 222336
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.0479774475097656,
      "learning_rate": 1.8125640811536002e-06,
      "loss": 2.922,
      "step": 222337
    },
    {
      "epoch": 2.89,
      "grad_norm": 3.413142204284668,
      "learning_rate": 1.8121151287946444e-06,
      "loss": 2.9674,
      "step": 222338
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3332602977752686,
      "learning_rate": 1.811666231874409e-06,
      "loss": 2.8229,
      "step": 222339
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.221196174621582,
      "learning_rate": 1.811217390392994e-06,
      "loss": 2.8969,
      "step": 222340
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7998714447021484,
      "learning_rate": 1.8107686043505655e-06,
      "loss": 3.0695,
      "step": 222341
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.207514762878418,
      "learning_rate": 1.8103198737470903e-06,
      "loss": 2.8925,
      "step": 222342
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2957139015197754,
      "learning_rate": 1.8098711985827019e-06,
      "loss": 2.6694,
      "step": 222343
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.668186902999878,
      "learning_rate": 1.8094225788575333e-06,
      "loss": 2.8827,
      "step": 222344
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.301074743270874,
      "learning_rate": 1.8089740145715847e-06,
      "loss": 2.9895,
      "step": 222345
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.731813430786133,
      "learning_rate": 1.8085255057249893e-06,
      "loss": 2.7424,
      "step": 222346
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9407873153686523,
      "learning_rate": 1.8080770523178134e-06,
      "loss": 2.7585,
      "step": 222347
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0454061031341553,
      "learning_rate": 1.8076286543501573e-06,
      "loss": 2.9313,
      "step": 222348
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.380872964859009,
      "learning_rate": 1.807180311822054e-06,
      "loss": 2.6724,
      "step": 222349
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.8649778366088867,
      "learning_rate": 1.8067320247336702e-06,
      "loss": 3.1057,
      "step": 222350
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.915503978729248,
      "learning_rate": 1.806283793085006e-06,
      "loss": 2.9986,
      "step": 222351
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.723050832748413,
      "learning_rate": 1.8058356168762278e-06,
      "loss": 2.8711,
      "step": 222352
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.740142345428467,
      "learning_rate": 1.8053874961073357e-06,
      "loss": 3.1517,
      "step": 222353
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.2680840492248535,
      "learning_rate": 1.8049394307784626e-06,
      "loss": 3.123,
      "step": 222354
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1169931888580322,
      "learning_rate": 1.8044914208896754e-06,
      "loss": 2.9576,
      "step": 222355
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.92862606048584,
      "learning_rate": 1.8040434664410741e-06,
      "loss": 2.8689,
      "step": 222356
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0169098377227783,
      "learning_rate": 1.803595567432725e-06,
      "loss": 2.9448,
      "step": 222357
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9213943481445312,
      "learning_rate": 1.8031477238647285e-06,
      "loss": 2.9048,
      "step": 222358
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7602460384368896,
      "learning_rate": 1.8026999357371175e-06,
      "loss": 2.8662,
      "step": 222359
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.978490114212036,
      "learning_rate": 1.8022522030500586e-06,
      "loss": 3.1139,
      "step": 222360
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.709235191345215,
      "learning_rate": 1.8018045258035518e-06,
      "loss": 2.8228,
      "step": 222361
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.939091920852661,
      "learning_rate": 1.8013569039977638e-06,
      "loss": 3.0017,
      "step": 222362
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6351404190063477,
      "learning_rate": 1.8009093376326943e-06,
      "loss": 2.6719,
      "step": 222363
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8840243816375732,
      "learning_rate": 1.800461826708477e-06,
      "loss": 2.8546,
      "step": 222364
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7974510192871094,
      "learning_rate": 1.800014371225178e-06,
      "loss": 2.8845,
      "step": 222365
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.58589506149292,
      "learning_rate": 1.7995669711828975e-06,
      "loss": 2.9265,
      "step": 222366
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.542754888534546,
      "learning_rate": 1.7991196265816689e-06,
      "loss": 2.7071,
      "step": 222367
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.861332416534424,
      "learning_rate": 1.7986723374216582e-06,
      "loss": 3.1259,
      "step": 222368
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.631214141845703,
      "learning_rate": 1.7982251037028661e-06,
      "loss": 2.8966,
      "step": 222369
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.096062183380127,
      "learning_rate": 1.7977779254254588e-06,
      "loss": 2.8524,
      "step": 222370
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0556325912475586,
      "learning_rate": 1.797330802589436e-06,
      "loss": 2.9097,
      "step": 222371
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.836057662963867,
      "learning_rate": 1.7968837351949316e-06,
      "loss": 2.8852,
      "step": 222372
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.062268018722534,
      "learning_rate": 1.796436723242012e-06,
      "loss": 2.8357,
      "step": 222373
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.726285934448242,
      "learning_rate": 1.7959897667307766e-06,
      "loss": 2.7186,
      "step": 222374
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.802739143371582,
      "learning_rate": 1.7955428656612926e-06,
      "loss": 3.1419,
      "step": 222375
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7596824169158936,
      "learning_rate": 1.795096020033626e-06,
      "loss": 2.7395,
      "step": 222376
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.003270387649536,
      "learning_rate": 1.7946492298478775e-06,
      "loss": 3.028,
      "step": 222377
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.057824611663818,
      "learning_rate": 1.79420249510418e-06,
      "loss": 2.8946,
      "step": 222378
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7588071823120117,
      "learning_rate": 1.7937558158025334e-06,
      "loss": 2.837,
      "step": 222379
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1394357681274414,
      "learning_rate": 1.7933091919430709e-06,
      "loss": 2.9379,
      "step": 222380
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5645480155944824,
      "learning_rate": 1.7928626235258258e-06,
      "loss": 2.9501,
      "step": 222381
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.191258668899536,
      "learning_rate": 1.7924161105509648e-06,
      "loss": 2.8614,
      "step": 222382
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.547778606414795,
      "learning_rate": 1.7919696530184879e-06,
      "loss": 2.7279,
      "step": 222383
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.6718435287475586,
      "learning_rate": 1.7915232509285283e-06,
      "loss": 2.8976,
      "step": 222384
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.06650447845459,
      "learning_rate": 1.7910769042811524e-06,
      "loss": 2.9355,
      "step": 222385
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0927364826202393,
      "learning_rate": 1.790630613076427e-06,
      "loss": 2.9175,
      "step": 222386
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6793594360351562,
      "learning_rate": 1.7901843773144853e-06,
      "loss": 2.8005,
      "step": 222387
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9109268188476562,
      "learning_rate": 1.7897381969953605e-06,
      "loss": 2.8765,
      "step": 222388
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.012746572494507,
      "learning_rate": 1.7892920721191195e-06,
      "loss": 2.745,
      "step": 222389
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0334224700927734,
      "learning_rate": 1.7888460026859287e-06,
      "loss": 2.5741,
      "step": 222390
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1895060539245605,
      "learning_rate": 1.7883999886958211e-06,
      "loss": 3.0727,
      "step": 222391
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.613410711288452,
      "learning_rate": 1.7879540301488637e-06,
      "loss": 2.9042,
      "step": 222392
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.015438079833984,
      "learning_rate": 1.7875081270451231e-06,
      "loss": 2.7738,
      "step": 222393
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.9438154697418213,
      "learning_rate": 1.7870622793847656e-06,
      "loss": 2.7749,
      "step": 222394
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2541682720184326,
      "learning_rate": 1.7866164871677913e-06,
      "loss": 2.8938,
      "step": 222395
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.531398057937622,
      "learning_rate": 1.7861707503943001e-06,
      "loss": 2.9548,
      "step": 222396
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.976322889328003,
      "learning_rate": 1.7857250690644254e-06,
      "loss": 3.1178,
      "step": 222397
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.035706043243408,
      "learning_rate": 1.7852794431782002e-06,
      "loss": 2.9274,
      "step": 222398
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.690153121948242,
      "learning_rate": 1.7848338727357247e-06,
      "loss": 2.9133,
      "step": 222399
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1227216720581055,
      "learning_rate": 1.7843883577370654e-06,
      "loss": 2.8775,
      "step": 222400
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7465109825134277,
      "learning_rate": 1.7839428981823223e-06,
      "loss": 3.0651,
      "step": 222401
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6221137046813965,
      "learning_rate": 1.7834974940715952e-06,
      "loss": 2.9166,
      "step": 222402
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6277735233306885,
      "learning_rate": 1.7830521454049174e-06,
      "loss": 2.778,
      "step": 222403
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.814953327178955,
      "learning_rate": 1.782606852182422e-06,
      "loss": 3.0043,
      "step": 222404
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.706512928009033,
      "learning_rate": 1.7821616144041429e-06,
      "loss": 2.9433,
      "step": 222405
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.058479070663452,
      "learning_rate": 1.7817164320702126e-06,
      "loss": 2.8805,
      "step": 222406
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6901845932006836,
      "learning_rate": 1.781271305180665e-06,
      "loss": 2.9001,
      "step": 222407
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7508773803710938,
      "learning_rate": 1.7808262337356328e-06,
      "loss": 2.9967,
      "step": 222408
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5069849491119385,
      "learning_rate": 1.7803812177351828e-06,
      "loss": 2.8947,
      "step": 222409
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.013413429260254,
      "learning_rate": 1.7799362571793484e-06,
      "loss": 2.8718,
      "step": 222410
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.045177459716797,
      "learning_rate": 1.7794913520682963e-06,
      "loss": 3.0748,
      "step": 222411
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.832367420196533,
      "learning_rate": 1.7790465024020261e-06,
      "loss": 3.2548,
      "step": 222412
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1671576499938965,
      "learning_rate": 1.7786017081806713e-06,
      "loss": 2.7851,
      "step": 222413
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.16652774810791,
      "learning_rate": 1.7781569694043319e-06,
      "loss": 3.1563,
      "step": 222414
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.131044387817383,
      "learning_rate": 1.7777122860730407e-06,
      "loss": 2.8455,
      "step": 222415
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.917900323867798,
      "learning_rate": 1.7772676581868984e-06,
      "loss": 2.8006,
      "step": 222416
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.062380790710449,
      "learning_rate": 1.7768230857460042e-06,
      "loss": 3.041,
      "step": 222417
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.221989870071411,
      "learning_rate": 1.776378568750425e-06,
      "loss": 3.0789,
      "step": 222418
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.4610483646392822,
      "learning_rate": 1.7759341072002275e-06,
      "loss": 2.8288,
      "step": 222419
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3582940101623535,
      "learning_rate": 1.7754897010955449e-06,
      "loss": 3.0271,
      "step": 222420
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.131199359893799,
      "learning_rate": 1.7750453504364103e-06,
      "loss": 2.7968,
      "step": 222421
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7250754833221436,
      "learning_rate": 1.7746010552229239e-06,
      "loss": 2.7772,
      "step": 222422
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.058046579360962,
      "learning_rate": 1.7741568154551856e-06,
      "loss": 2.9902,
      "step": 222423
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1449854373931885,
      "learning_rate": 1.7737126311332284e-06,
      "loss": 3.2017,
      "step": 222424
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7459845542907715,
      "learning_rate": 1.7732685022571858e-06,
      "loss": 2.9823,
      "step": 222425
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.5957770347595215,
      "learning_rate": 1.7728244288271244e-06,
      "loss": 3.0387,
      "step": 222426
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3028197288513184,
      "learning_rate": 1.7723804108431438e-06,
      "loss": 2.7854,
      "step": 222427
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.995222330093384,
      "learning_rate": 1.7719364483052779e-06,
      "loss": 2.8826,
      "step": 222428
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.162546396255493,
      "learning_rate": 1.771492541213626e-06,
      "loss": 3.0078,
      "step": 222429
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.017836570739746,
      "learning_rate": 1.7710486895683218e-06,
      "loss": 2.6774,
      "step": 222430
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8584163188934326,
      "learning_rate": 1.7706048933693651e-06,
      "loss": 2.908,
      "step": 222431
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.416330814361572,
      "learning_rate": 1.7701611526169224e-06,
      "loss": 3.1502,
      "step": 222432
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.526956558227539,
      "learning_rate": 1.769717467311027e-06,
      "loss": 3.0529,
      "step": 222433
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2925448417663574,
      "learning_rate": 1.7692738374517458e-06,
      "loss": 2.7606,
      "step": 222434
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0038793087005615,
      "learning_rate": 1.7688302630392116e-06,
      "loss": 3.1183,
      "step": 222435
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3714489936828613,
      "learning_rate": 1.7683867440734912e-06,
      "loss": 2.8617,
      "step": 222436
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9326305389404297,
      "learning_rate": 1.7679432805546178e-06,
      "loss": 2.8985,
      "step": 222437
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.020427942276001,
      "learning_rate": 1.7674998724827584e-06,
      "loss": 3.1516,
      "step": 222438
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0612235069274902,
      "learning_rate": 1.7670565198579456e-06,
      "loss": 2.8209,
      "step": 222439
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7987029552459717,
      "learning_rate": 1.7666132226802133e-06,
      "loss": 2.9344,
      "step": 222440
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8566064834594727,
      "learning_rate": 1.766169980949761e-06,
      "loss": 3.2865,
      "step": 222441
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.882962942123413,
      "learning_rate": 1.7657267946665555e-06,
      "loss": 2.8457,
      "step": 222442
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.923065662384033,
      "learning_rate": 1.7652836638307633e-06,
      "loss": 3.1939,
      "step": 222443
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.979142427444458,
      "learning_rate": 1.764840588442451e-06,
      "loss": 2.9037,
      "step": 222444
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7953319549560547,
      "learning_rate": 1.764397568501652e-06,
      "loss": 3.1443,
      "step": 222445
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9595956802368164,
      "learning_rate": 1.7639546040084661e-06,
      "loss": 3.1386,
      "step": 222446
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9606547355651855,
      "learning_rate": 1.7635116949630267e-06,
      "loss": 2.8186,
      "step": 222447
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3104491233825684,
      "learning_rate": 1.763068841365367e-06,
      "loss": 2.8852,
      "step": 222448
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.8184163570404053,
      "learning_rate": 1.762626043215587e-06,
      "loss": 3.1606,
      "step": 222449
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1249184608459473,
      "learning_rate": 1.762183300513753e-06,
      "loss": 2.6282,
      "step": 222450
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.5880684852600098,
      "learning_rate": 1.7617406132599655e-06,
      "loss": 2.9877,
      "step": 222451
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.496066093444824,
      "learning_rate": 1.7612979814542904e-06,
      "loss": 2.6574,
      "step": 222452
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.8222193717956543,
      "learning_rate": 1.7608554050968615e-06,
      "loss": 2.946,
      "step": 222453
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.347069501876831,
      "learning_rate": 1.7604128841876786e-06,
      "loss": 3.1003,
      "step": 222454
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.4458038806915283,
      "learning_rate": 1.759970418726875e-06,
      "loss": 2.9164,
      "step": 222455
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.174652338027954,
      "learning_rate": 1.759528008714517e-06,
      "loss": 2.996,
      "step": 222456
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2646117210388184,
      "learning_rate": 1.7590856541507048e-06,
      "loss": 2.8971,
      "step": 222457
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3245227336883545,
      "learning_rate": 1.758643355035505e-06,
      "loss": 2.9972,
      "step": 222458
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7437126636505127,
      "learning_rate": 1.7582011113689841e-06,
      "loss": 3.0318,
      "step": 222459
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.602980375289917,
      "learning_rate": 1.7577589231512423e-06,
      "loss": 2.7811,
      "step": 222460
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.870676040649414,
      "learning_rate": 1.7573167903823793e-06,
      "loss": 2.8611,
      "step": 222461
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.137742519378662,
      "learning_rate": 1.7568747130624616e-06,
      "loss": 2.9465,
      "step": 222462
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.399027347564697,
      "learning_rate": 1.7564326911915895e-06,
      "loss": 2.9542,
      "step": 222463
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.178384780883789,
      "learning_rate": 1.7559907247697958e-06,
      "loss": 2.8119,
      "step": 222464
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.558847427368164,
      "learning_rate": 1.755548813797214e-06,
      "loss": 3.0655,
      "step": 222465
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3566901683807373,
      "learning_rate": 1.7551069582738775e-06,
      "loss": 2.8742,
      "step": 222466
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0086069107055664,
      "learning_rate": 1.7546651581999194e-06,
      "loss": 2.9873,
      "step": 222467
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6802992820739746,
      "learning_rate": 1.754223413575373e-06,
      "loss": 2.898,
      "step": 222468
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.55769419670105,
      "learning_rate": 1.7537817244003715e-06,
      "loss": 3.0185,
      "step": 222469
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.079162359237671,
      "learning_rate": 1.7533400906749817e-06,
      "loss": 2.99,
      "step": 222470
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.648895263671875,
      "learning_rate": 1.75289851239927e-06,
      "loss": 2.5778,
      "step": 222471
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.073608875274658,
      "learning_rate": 1.7524569895733031e-06,
      "loss": 3.0238,
      "step": 222472
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.678974151611328,
      "learning_rate": 1.7520155221972144e-06,
      "loss": 2.782,
      "step": 222473
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.436574697494507,
      "learning_rate": 1.7515741102710035e-06,
      "loss": 2.7317,
      "step": 222474
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.353818655014038,
      "learning_rate": 1.7511327537948705e-06,
      "loss": 2.8892,
      "step": 222475
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.522263765335083,
      "learning_rate": 1.750691452768782e-06,
      "loss": 2.958,
      "step": 222476
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.152191638946533,
      "learning_rate": 1.7502502071929047e-06,
      "loss": 2.6831,
      "step": 222477
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.162231922149658,
      "learning_rate": 1.7498090170672717e-06,
      "loss": 2.8304,
      "step": 222478
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5936148166656494,
      "learning_rate": 1.7493678823919832e-06,
      "loss": 3.1945,
      "step": 222479
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7014806270599365,
      "learning_rate": 1.7489268031671054e-06,
      "loss": 2.7941,
      "step": 222480
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0973618030548096,
      "learning_rate": 1.7484857793927387e-06,
      "loss": 3.0178,
      "step": 222481
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.217534065246582,
      "learning_rate": 1.7480448110689494e-06,
      "loss": 2.6161,
      "step": 222482
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.759974956512451,
      "learning_rate": 1.7476038981958374e-06,
      "loss": 2.9428,
      "step": 222483
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8680648803710938,
      "learning_rate": 1.7471630407734694e-06,
      "loss": 3.0092,
      "step": 222484
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8673291206359863,
      "learning_rate": 1.7467222388019453e-06,
      "loss": 2.848,
      "step": 222485
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.875617027282715,
      "learning_rate": 1.7462814922813317e-06,
      "loss": 2.8298,
      "step": 222486
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1207730770111084,
      "learning_rate": 1.7458408012116953e-06,
      "loss": 2.6402,
      "step": 222487
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.042071342468262,
      "learning_rate": 1.7454001655931692e-06,
      "loss": 2.795,
      "step": 222488
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.4624555110931396,
      "learning_rate": 1.744959585425787e-06,
      "loss": 3.0794,
      "step": 222489
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8674473762512207,
      "learning_rate": 1.744519060709648e-06,
      "loss": 3.0215,
      "step": 222490
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.234292984008789,
      "learning_rate": 1.7440785914448197e-06,
      "loss": 2.9224,
      "step": 222491
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8932371139526367,
      "learning_rate": 1.7436381776314012e-06,
      "loss": 2.6068,
      "step": 222492
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1097662448883057,
      "learning_rate": 1.7431978192694928e-06,
      "loss": 2.7469,
      "step": 222493
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.723507881164551,
      "learning_rate": 1.7427575163591611e-06,
      "loss": 2.86,
      "step": 222494
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.949845314025879,
      "learning_rate": 1.7423172689004394e-06,
      "loss": 2.8875,
      "step": 222495
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.266590118408203,
      "learning_rate": 1.7418770768934608e-06,
      "loss": 3.3097,
      "step": 222496
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.10280704498291,
      "learning_rate": 1.7414369403383254e-06,
      "loss": 2.7329,
      "step": 222497
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.981651544570923,
      "learning_rate": 1.7409968592350665e-06,
      "loss": 2.9382,
      "step": 222498
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.821859359741211,
      "learning_rate": 1.740556833583784e-06,
      "loss": 3.0709,
      "step": 222499
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.719573974609375,
      "learning_rate": 1.7401168633845775e-06,
      "loss": 2.9432,
      "step": 222500
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9037253856658936,
      "learning_rate": 1.739676948637514e-06,
      "loss": 3.0451,
      "step": 222501
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.97441029548645,
      "learning_rate": 1.73923708934266e-06,
      "loss": 2.797,
      "step": 222502
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.17317533493042,
      "learning_rate": 1.7387972855001153e-06,
      "loss": 2.9295,
      "step": 222503
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.990943670272827,
      "learning_rate": 1.7383575371099468e-06,
      "loss": 2.962,
      "step": 222504
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.657543897628784,
      "learning_rate": 1.7379178441722542e-06,
      "loss": 2.9638,
      "step": 222505
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.74250864982605,
      "learning_rate": 1.7374782066871373e-06,
      "loss": 3.2107,
      "step": 222506
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.768700122833252,
      "learning_rate": 1.7370386246546298e-06,
      "loss": 3.1779,
      "step": 222507
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8869447708129883,
      "learning_rate": 1.7365990980748313e-06,
      "loss": 2.8067,
      "step": 222508
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.484511375427246,
      "learning_rate": 1.7361596269478418e-06,
      "loss": 3.0146,
      "step": 222509
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1237213611602783,
      "learning_rate": 1.735720211273728e-06,
      "loss": 3.0253,
      "step": 222510
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.038135051727295,
      "learning_rate": 1.7352808510525896e-06,
      "loss": 3.1729,
      "step": 222511
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.465874195098877,
      "learning_rate": 1.73484154628446e-06,
      "loss": 2.8357,
      "step": 222512
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8945484161376953,
      "learning_rate": 1.7344022969694725e-06,
      "loss": 2.9752,
      "step": 222513
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0262203216552734,
      "learning_rate": 1.733963103107694e-06,
      "loss": 3.0982,
      "step": 222514
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1974034309387207,
      "learning_rate": 1.7335239646991904e-06,
      "loss": 3.07,
      "step": 222515
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9564363956451416,
      "learning_rate": 1.7330848817440624e-06,
      "loss": 2.7992,
      "step": 222516
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.5594849586486816,
      "learning_rate": 1.732645854242376e-06,
      "loss": 3.1395,
      "step": 222517
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9597673416137695,
      "learning_rate": 1.7322068821942315e-06,
      "loss": 2.9013,
      "step": 222518
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.6375415325164795,
      "learning_rate": 1.7317679655996951e-06,
      "loss": 3.0294,
      "step": 222519
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2599830627441406,
      "learning_rate": 1.7313291044588674e-06,
      "loss": 2.9543,
      "step": 222520
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.750035285949707,
      "learning_rate": 1.730890298771781e-06,
      "loss": 3.0047,
      "step": 222521
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.626894474029541,
      "learning_rate": 1.7304515485385696e-06,
      "loss": 3.0432,
      "step": 222522
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.219633102416992,
      "learning_rate": 1.7300128537593327e-06,
      "loss": 3.1805,
      "step": 222523
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.784403085708618,
      "learning_rate": 1.7295742144340707e-06,
      "loss": 2.7718,
      "step": 222524
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1270787715911865,
      "learning_rate": 1.7291356305629167e-06,
      "loss": 2.8292,
      "step": 222525
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0640132427215576,
      "learning_rate": 1.7286971021459706e-06,
      "loss": 3.1521,
      "step": 222526
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.220536231994629,
      "learning_rate": 1.7282586291832655e-06,
      "loss": 2.9186,
      "step": 222527
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.575352907180786,
      "learning_rate": 1.727820211674935e-06,
      "loss": 3.1203,
      "step": 222528
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.239579677581787,
      "learning_rate": 1.7273818496210122e-06,
      "loss": 2.8379,
      "step": 222529
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3626129627227783,
      "learning_rate": 1.7269435430215972e-06,
      "loss": 2.7275,
      "step": 222530
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.4405555725097656,
      "learning_rate": 1.7265052918767563e-06,
      "loss": 2.9025,
      "step": 222531
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3845763206481934,
      "learning_rate": 1.726067096186623e-06,
      "loss": 2.9177,
      "step": 222532
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.791917085647583,
      "learning_rate": 1.7256289559512304e-06,
      "loss": 3.1032,
      "step": 222533
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.8665895462036133,
      "learning_rate": 1.7251908711706787e-06,
      "loss": 2.7794,
      "step": 222534
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1840403079986572,
      "learning_rate": 1.724752841845034e-06,
      "loss": 2.9308,
      "step": 222535
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.81587553024292,
      "learning_rate": 1.7243148679743968e-06,
      "loss": 2.8864,
      "step": 222536
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.981174945831299,
      "learning_rate": 1.7238769495588667e-06,
      "loss": 2.8431,
      "step": 222537
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.303981304168701,
      "learning_rate": 1.7234390865984438e-06,
      "loss": 2.9825,
      "step": 222538
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9972167015075684,
      "learning_rate": 1.7230012790932945e-06,
      "loss": 2.9063,
      "step": 222539
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6883273124694824,
      "learning_rate": 1.7225635270434856e-06,
      "loss": 2.8997,
      "step": 222540
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8331527709960938,
      "learning_rate": 1.7221258304490504e-06,
      "loss": 2.9102,
      "step": 222541
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.574066162109375,
      "learning_rate": 1.721688189310122e-06,
      "loss": 2.877,
      "step": 222542
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0720911026000977,
      "learning_rate": 1.7212506036267337e-06,
      "loss": 2.7731,
      "step": 222543
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7187459468841553,
      "learning_rate": 1.7208130733990188e-06,
      "loss": 2.9174,
      "step": 222544
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0622124671936035,
      "learning_rate": 1.720375598627044e-06,
      "loss": 2.9993,
      "step": 222545
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9669244289398193,
      "learning_rate": 1.7199381793108756e-06,
      "loss": 3.0225,
      "step": 222546
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2464585304260254,
      "learning_rate": 1.7195008154505808e-06,
      "loss": 2.8855,
      "step": 222547
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0634100437164307,
      "learning_rate": 1.719063507046292e-06,
      "loss": 2.9405,
      "step": 222548
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7905869483947754,
      "learning_rate": 1.7186262540980433e-06,
      "loss": 3.2146,
      "step": 222549
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.762535810470581,
      "learning_rate": 1.7181890566059341e-06,
      "loss": 2.9211,
      "step": 222550
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.8287179470062256,
      "learning_rate": 1.7177519145700313e-06,
      "loss": 2.8902,
      "step": 222551
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.15854024887085,
      "learning_rate": 1.717314827990468e-06,
      "loss": 2.8497,
      "step": 222552
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.057595729827881,
      "learning_rate": 1.7168777968672442e-06,
      "loss": 2.9242,
      "step": 222553
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.713308095932007,
      "learning_rate": 1.7164408212004933e-06,
      "loss": 3.1171,
      "step": 222554
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6889212131500244,
      "learning_rate": 1.7160039009903148e-06,
      "loss": 2.7158,
      "step": 222555
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7932145595550537,
      "learning_rate": 1.7155670362367423e-06,
      "loss": 3.0911,
      "step": 222556
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1213247776031494,
      "learning_rate": 1.7151302269398426e-06,
      "loss": 2.8524,
      "step": 222557
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0537259578704834,
      "learning_rate": 1.714693473099782e-06,
      "loss": 2.9543,
      "step": 222558
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.698064088821411,
      "learning_rate": 1.7142567747165604e-06,
      "loss": 2.9509,
      "step": 222559
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.916682243347168,
      "learning_rate": 1.713820131790311e-06,
      "loss": 3.1202,
      "step": 222560
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.828361988067627,
      "learning_rate": 1.7133835443210674e-06,
      "loss": 2.8003,
      "step": 222561
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.147407054901123,
      "learning_rate": 1.712947012308963e-06,
      "loss": 2.94,
      "step": 222562
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.114469051361084,
      "learning_rate": 1.7125105357540303e-06,
      "loss": 2.8672,
      "step": 222563
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.4764564037323,
      "learning_rate": 1.7120741146563699e-06,
      "loss": 2.7824,
      "step": 222564
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.132551431655884,
      "learning_rate": 1.7116377490160814e-06,
      "loss": 2.9055,
      "step": 222565
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0125844478607178,
      "learning_rate": 1.7112014388332318e-06,
      "loss": 2.9878,
      "step": 222566
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5744712352752686,
      "learning_rate": 1.7107651841078873e-06,
      "loss": 2.8558,
      "step": 222567
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2916722297668457,
      "learning_rate": 1.7103289848401481e-06,
      "loss": 2.9649,
      "step": 222568
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.988116979598999,
      "learning_rate": 1.7098928410300806e-06,
      "loss": 2.89,
      "step": 222569
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1716501712799072,
      "learning_rate": 1.7094567526777847e-06,
      "loss": 2.9142,
      "step": 222570
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.397017478942871,
      "learning_rate": 1.7090207197833271e-06,
      "loss": 3.1954,
      "step": 222571
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1115481853485107,
      "learning_rate": 1.708584742346808e-06,
      "loss": 3.0372,
      "step": 222572
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.846932888031006,
      "learning_rate": 1.7081488203682936e-06,
      "loss": 3.0211,
      "step": 222573
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.934082269668579,
      "learning_rate": 1.7077129538478506e-06,
      "loss": 3.0712,
      "step": 222574
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8790946006774902,
      "learning_rate": 1.707277142785579e-06,
      "loss": 2.9676,
      "step": 222575
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.806689739227295,
      "learning_rate": 1.7068413871815456e-06,
      "loss": 3.0692,
      "step": 222576
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3244872093200684,
      "learning_rate": 1.70640568703585e-06,
      "loss": 2.7727,
      "step": 222577
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2862071990966797,
      "learning_rate": 1.705970042348559e-06,
      "loss": 2.7738,
      "step": 222578
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.924311876296997,
      "learning_rate": 1.7055344531197723e-06,
      "loss": 2.806,
      "step": 222579
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.4473276138305664,
      "learning_rate": 1.7050989193495567e-06,
      "loss": 2.8795,
      "step": 222580
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5726864337921143,
      "learning_rate": 1.7046634410379788e-06,
      "loss": 2.9832,
      "step": 222581
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6648764610290527,
      "learning_rate": 1.7042280181851387e-06,
      "loss": 2.9081,
      "step": 222582
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8953609466552734,
      "learning_rate": 1.7037926507911359e-06,
      "loss": 2.882,
      "step": 222583
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2654457092285156,
      "learning_rate": 1.703357338856004e-06,
      "loss": 2.9333,
      "step": 222584
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.452420234680176,
      "learning_rate": 1.702922082379843e-06,
      "loss": 2.9461,
      "step": 222585
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.064633369445801,
      "learning_rate": 1.7024868813627856e-06,
      "loss": 2.8705,
      "step": 222586
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8627476692199707,
      "learning_rate": 1.7020517358047992e-06,
      "loss": 3.0172,
      "step": 222587
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1135406494140625,
      "learning_rate": 1.7016166457060831e-06,
      "loss": 2.9382,
      "step": 222588
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.4008429050445557,
      "learning_rate": 1.7011816110666376e-06,
      "loss": 2.9584,
      "step": 222589
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.364675521850586,
      "learning_rate": 1.7007466318865959e-06,
      "loss": 2.7913,
      "step": 222590
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.123962879180908,
      "learning_rate": 1.700311708165991e-06,
      "loss": 2.9296,
      "step": 222591
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.718419313430786,
      "learning_rate": 1.6998768399049566e-06,
      "loss": 2.9932,
      "step": 222592
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6552560329437256,
      "learning_rate": 1.699442027103526e-06,
      "loss": 2.8651,
      "step": 222593
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.8033218383789062,
      "learning_rate": 1.6990072697618318e-06,
      "loss": 2.8117,
      "step": 222594
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1388556957244873,
      "learning_rate": 1.6985725678798746e-06,
      "loss": 3.0862,
      "step": 222595
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.047213077545166,
      "learning_rate": 1.6981379214578206e-06,
      "loss": 2.9327,
      "step": 222596
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9928765296936035,
      "learning_rate": 1.69770333049567e-06,
      "loss": 2.9158,
      "step": 222597
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.829834222793579,
      "learning_rate": 1.6972687949935893e-06,
      "loss": 2.7991,
      "step": 222598
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9568252563476562,
      "learning_rate": 1.6968343149516116e-06,
      "loss": 2.9325,
      "step": 222599
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.127652645111084,
      "learning_rate": 1.6963998903698372e-06,
      "loss": 2.8257,
      "step": 222600
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7127304077148438,
      "learning_rate": 1.6959655212482991e-06,
      "loss": 3.0086,
      "step": 222601
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9158172607421875,
      "learning_rate": 1.6955312075871308e-06,
      "loss": 2.9745,
      "step": 222602
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.151785135269165,
      "learning_rate": 1.6950969493863653e-06,
      "loss": 2.7124,
      "step": 222603
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0104074478149414,
      "learning_rate": 1.694662746646136e-06,
      "loss": 2.8431,
      "step": 222604
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9683737754821777,
      "learning_rate": 1.6942285993665094e-06,
      "loss": 2.9532,
      "step": 222605
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.151601552963257,
      "learning_rate": 1.6937945075475522e-06,
      "loss": 3.0591,
      "step": 222606
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.715564727783203,
      "learning_rate": 1.6933604711893312e-06,
      "loss": 2.9077,
      "step": 222607
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6367921829223633,
      "learning_rate": 1.6929264902919793e-06,
      "loss": 3.0912,
      "step": 222608
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.958035945892334,
      "learning_rate": 1.6924925648555298e-06,
      "loss": 2.9597,
      "step": 222609
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7503209114074707,
      "learning_rate": 1.6920586948800496e-06,
      "loss": 2.7369,
      "step": 222610
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7213134765625,
      "learning_rate": 1.6916248803656718e-06,
      "loss": 3.013,
      "step": 222611
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0701916217803955,
      "learning_rate": 1.691191121312463e-06,
      "loss": 2.5605,
      "step": 222612
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9984447956085205,
      "learning_rate": 1.6907574177204563e-06,
      "loss": 3.0666,
      "step": 222613
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.4528727531433105,
      "learning_rate": 1.6903237695898186e-06,
      "loss": 2.9112,
      "step": 222614
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.658342123031616,
      "learning_rate": 1.6898901769205165e-06,
      "loss": 3.1206,
      "step": 222615
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.553673267364502,
      "learning_rate": 1.6894566397127497e-06,
      "loss": 3.0512,
      "step": 222616
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.243269920349121,
      "learning_rate": 1.689023157966518e-06,
      "loss": 3.0508,
      "step": 222617
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.4985005855560303,
      "learning_rate": 1.6885897316819218e-06,
      "loss": 2.7434,
      "step": 222618
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.973299264907837,
      "learning_rate": 1.6881563608590941e-06,
      "loss": 2.9207,
      "step": 222619
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6968133449554443,
      "learning_rate": 1.6877230454980018e-06,
      "loss": 2.7515,
      "step": 222620
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.000140905380249,
      "learning_rate": 1.6872897855988443e-06,
      "loss": 2.9285,
      "step": 222621
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8082168102264404,
      "learning_rate": 1.686856581161622e-06,
      "loss": 2.8234,
      "step": 222622
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7440876960754395,
      "learning_rate": 1.6864234321864679e-06,
      "loss": 3.058,
      "step": 222623
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.135374069213867,
      "learning_rate": 1.6859903386734153e-06,
      "loss": 2.7811,
      "step": 222624
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.753788471221924,
      "learning_rate": 1.6855573006225975e-06,
      "loss": 2.8938,
      "step": 222625
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5096631050109863,
      "learning_rate": 1.6851243180340146e-06,
      "loss": 2.853,
      "step": 222626
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2642476558685303,
      "learning_rate": 1.6846913909078663e-06,
      "loss": 3.2314,
      "step": 222627
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.71014404296875,
      "learning_rate": 1.6842585192441194e-06,
      "loss": 2.9624,
      "step": 222628
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3641505241394043,
      "learning_rate": 1.683825703042907e-06,
      "loss": 2.9126,
      "step": 222629
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.078199625015259,
      "learning_rate": 1.6833929423042958e-06,
      "loss": 2.7696,
      "step": 222630
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2804319858551025,
      "learning_rate": 1.6829602370283857e-06,
      "loss": 3.0754,
      "step": 222631
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5490150451660156,
      "learning_rate": 1.6825275872152434e-06,
      "loss": 2.8072,
      "step": 222632
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.5363686084747314,
      "learning_rate": 1.6820949928649684e-06,
      "loss": 3.1478,
      "step": 222633
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.62634015083313,
      "learning_rate": 1.6816624539775946e-06,
      "loss": 2.8104,
      "step": 222634
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1699256896972656,
      "learning_rate": 1.681229970553255e-06,
      "loss": 2.7422,
      "step": 222635
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8380284309387207,
      "learning_rate": 1.680797542591983e-06,
      "loss": 3.0765,
      "step": 222636
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5278172492980957,
      "learning_rate": 1.680365170093878e-06,
      "loss": 2.8851,
      "step": 222637
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0716092586517334,
      "learning_rate": 1.6799328530590406e-06,
      "loss": 2.8402,
      "step": 222638
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8491621017456055,
      "learning_rate": 1.679500591487537e-06,
      "loss": 2.8215,
      "step": 222639
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8016655445098877,
      "learning_rate": 1.679068385379434e-06,
      "loss": 3.0593,
      "step": 222640
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3109958171844482,
      "learning_rate": 1.6786362347348314e-06,
      "loss": 3.0721,
      "step": 222641
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7938661575317383,
      "learning_rate": 1.678204139553796e-06,
      "loss": 2.9223,
      "step": 222642
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5933916568756104,
      "learning_rate": 1.6777720998364274e-06,
      "loss": 2.89,
      "step": 222643
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9751124382019043,
      "learning_rate": 1.6773401155827593e-06,
      "loss": 2.8219,
      "step": 222644
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8476974964141846,
      "learning_rate": 1.6769081867929245e-06,
      "loss": 3.1948,
      "step": 222645
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.69628643989563,
      "learning_rate": 1.6764763134669901e-06,
      "loss": 2.9331,
      "step": 222646
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.77984619140625,
      "learning_rate": 1.6760444956050224e-06,
      "loss": 2.8544,
      "step": 222647
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.962568521499634,
      "learning_rate": 1.6756127332071212e-06,
      "loss": 2.8317,
      "step": 222648
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0246682167053223,
      "learning_rate": 1.6751810262733535e-06,
      "loss": 2.7631,
      "step": 222649
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8824965953826904,
      "learning_rate": 1.6747493748037856e-06,
      "loss": 2.7974,
      "step": 222650
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.6106367111206055,
      "learning_rate": 1.6743177787985173e-06,
      "loss": 2.999,
      "step": 222651
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.9128243923187256,
      "learning_rate": 1.673886238257649e-06,
      "loss": 2.8847,
      "step": 222652
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.108159065246582,
      "learning_rate": 1.6734547531812136e-06,
      "loss": 3.0701,
      "step": 222653
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.925987720489502,
      "learning_rate": 1.6730233235693113e-06,
      "loss": 2.9271,
      "step": 222654
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.138468027114868,
      "learning_rate": 1.6725919494220085e-06,
      "loss": 2.8221,
      "step": 222655
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5641067028045654,
      "learning_rate": 1.6721606307394386e-06,
      "loss": 3.1143,
      "step": 222656
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8178184032440186,
      "learning_rate": 1.6717293675216348e-06,
      "loss": 3.0338,
      "step": 222657
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0163185596466064,
      "learning_rate": 1.671298159768697e-06,
      "loss": 3.0752,
      "step": 222658
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.72023606300354,
      "learning_rate": 1.6708670074806919e-06,
      "loss": 2.9224,
      "step": 222659
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.9071571826934814,
      "learning_rate": 1.6704359106576859e-06,
      "loss": 2.9056,
      "step": 222660
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3583109378814697,
      "learning_rate": 1.6700048692997792e-06,
      "loss": 3.1066,
      "step": 222661
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0816545486450195,
      "learning_rate": 1.6695738834070382e-06,
      "loss": 3.1015,
      "step": 222662
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.058661937713623,
      "learning_rate": 1.6691429529795963e-06,
      "loss": 2.7159,
      "step": 222663
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.894256114959717,
      "learning_rate": 1.6687120780174535e-06,
      "loss": 2.6909,
      "step": 222664
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.101811408996582,
      "learning_rate": 1.6682812585207761e-06,
      "loss": 2.9787,
      "step": 222665
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9680235385894775,
      "learning_rate": 1.667850494489531e-06,
      "loss": 3.0965,
      "step": 222666
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9275145530700684,
      "learning_rate": 1.667419785923918e-06,
      "loss": 2.8327,
      "step": 222667
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9267983436584473,
      "learning_rate": 1.6669891328239372e-06,
      "loss": 2.5847,
      "step": 222668
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.628164529800415,
      "learning_rate": 1.6665585351897215e-06,
      "loss": 2.838,
      "step": 222669
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.00776743888855,
      "learning_rate": 1.6661279930212711e-06,
      "loss": 2.9122,
      "step": 222670
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7185957431793213,
      "learning_rate": 1.665697506318786e-06,
      "loss": 3.0034,
      "step": 222671
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.7251060009002686,
      "learning_rate": 1.6652670750822328e-06,
      "loss": 3.1054,
      "step": 222672
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1354520320892334,
      "learning_rate": 1.6648366993117445e-06,
      "loss": 2.8932,
      "step": 222673
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9947516918182373,
      "learning_rate": 1.664406379007388e-06,
      "loss": 2.8916,
      "step": 222674
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.724027156829834,
      "learning_rate": 1.663976114169263e-06,
      "loss": 2.7917,
      "step": 222675
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.025487184524536,
      "learning_rate": 1.6635459047974365e-06,
      "loss": 2.864,
      "step": 222676
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.933785915374756,
      "learning_rate": 1.6631157508920078e-06,
      "loss": 2.9612,
      "step": 222677
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8454065322875977,
      "learning_rate": 1.6626856524530108e-06,
      "loss": 2.816,
      "step": 222678
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7044320106506348,
      "learning_rate": 1.6622556094805783e-06,
      "loss": 2.84,
      "step": 222679
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.277442455291748,
      "learning_rate": 1.6618256219747107e-06,
      "loss": 2.7921,
      "step": 222680
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.5537972450256348,
      "learning_rate": 1.6613956899356074e-06,
      "loss": 2.9353,
      "step": 222681
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9659276008605957,
      "learning_rate": 1.6609658133632353e-06,
      "loss": 2.7608,
      "step": 222682
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8241159915924072,
      "learning_rate": 1.6605359922577277e-06,
      "loss": 2.7791,
      "step": 222683
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3967783451080322,
      "learning_rate": 1.6601062266191844e-06,
      "loss": 2.9461,
      "step": 222684
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0223779678344727,
      "learning_rate": 1.6596765164476388e-06,
      "loss": 3.0598,
      "step": 222685
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.392756700515747,
      "learning_rate": 1.659246861743191e-06,
      "loss": 3.0593,
      "step": 222686
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2505455017089844,
      "learning_rate": 1.658817262505907e-06,
      "loss": 3.0933,
      "step": 222687
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.129066228866577,
      "learning_rate": 1.6583877187358874e-06,
      "loss": 2.9763,
      "step": 222688
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.9521162509918213,
      "learning_rate": 1.6579582304332317e-06,
      "loss": 2.8508,
      "step": 222689
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2513577938079834,
      "learning_rate": 1.6575287975979735e-06,
      "loss": 2.8927,
      "step": 222690
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.385030746459961,
      "learning_rate": 1.6570994202302123e-06,
      "loss": 2.8736,
      "step": 222691
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.8797755241394043,
      "learning_rate": 1.6566700983300152e-06,
      "loss": 3.1113,
      "step": 222692
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5900955200195312,
      "learning_rate": 1.6562408318975152e-06,
      "loss": 2.8761,
      "step": 222693
    },
    {
      "epoch": 2.9,
      "grad_norm": 5.143344879150391,
      "learning_rate": 1.6558116209327121e-06,
      "loss": 2.8103,
      "step": 222694
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.061221122741699,
      "learning_rate": 1.6553824654357395e-06,
      "loss": 2.959,
      "step": 222695
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6097571849823,
      "learning_rate": 1.6549533654066639e-06,
      "loss": 2.7471,
      "step": 222696
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1289403438568115,
      "learning_rate": 1.6545243208455516e-06,
      "loss": 2.8966,
      "step": 222697
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8729300498962402,
      "learning_rate": 1.6540953317525363e-06,
      "loss": 2.8958,
      "step": 222698
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7278542518615723,
      "learning_rate": 1.6536663981276176e-06,
      "loss": 2.8697,
      "step": 222699
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.603877067565918,
      "learning_rate": 1.6532375199708958e-06,
      "loss": 2.7384,
      "step": 222700
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9121057987213135,
      "learning_rate": 1.6528086972825039e-06,
      "loss": 3.0125,
      "step": 222701
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.596278667449951,
      "learning_rate": 1.6523799300625084e-06,
      "loss": 2.8107,
      "step": 222702
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.6054632663726807,
      "learning_rate": 1.6519512183109097e-06,
      "loss": 2.9423,
      "step": 222703
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9801313877105713,
      "learning_rate": 1.6515225620278738e-06,
      "loss": 2.9718,
      "step": 222704
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.295461893081665,
      "learning_rate": 1.6510939612134677e-06,
      "loss": 2.9254,
      "step": 222705
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.829012632369995,
      "learning_rate": 1.6506654158677246e-06,
      "loss": 2.8398,
      "step": 222706
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8357715606689453,
      "learning_rate": 1.6502369259907777e-06,
      "loss": 2.9285,
      "step": 222707
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6831791400909424,
      "learning_rate": 1.6498084915826605e-06,
      "loss": 2.8941,
      "step": 222708
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.070053815841675,
      "learning_rate": 1.6493801126435059e-06,
      "loss": 2.7881,
      "step": 222709
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.794099807739258,
      "learning_rate": 1.6489517891733473e-06,
      "loss": 2.6057,
      "step": 222710
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.4609181880950928,
      "learning_rate": 1.6485235211722514e-06,
      "loss": 2.9766,
      "step": 222711
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.622591257095337,
      "learning_rate": 1.6480953086403847e-06,
      "loss": 2.653,
      "step": 222712
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.922703981399536,
      "learning_rate": 1.647667151577714e-06,
      "loss": 3.0018,
      "step": 222713
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.98876690864563,
      "learning_rate": 1.6472390499844058e-06,
      "loss": 2.7765,
      "step": 222714
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.044500350952148,
      "learning_rate": 1.6468110038604932e-06,
      "loss": 2.8807,
      "step": 222715
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.84592866897583,
      "learning_rate": 1.6463830132061095e-06,
      "loss": 3.2021,
      "step": 222716
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3651657104492188,
      "learning_rate": 1.645955078021255e-06,
      "loss": 2.8924,
      "step": 222717
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8376224040985107,
      "learning_rate": 1.6455271983060626e-06,
      "loss": 3.0582,
      "step": 222718
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.440894842147827,
      "learning_rate": 1.6450993740605988e-06,
      "loss": 2.9975,
      "step": 222719
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0286753177642822,
      "learning_rate": 1.6446716052849308e-06,
      "loss": 2.7162,
      "step": 222720
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.214630603790283,
      "learning_rate": 1.6442438919791577e-06,
      "loss": 3.0902,
      "step": 222721
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.4111924171447754,
      "learning_rate": 1.643816234143347e-06,
      "loss": 2.8293,
      "step": 222722
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.042996406555176,
      "learning_rate": 1.6433886317775646e-06,
      "loss": 2.9265,
      "step": 222723
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.267357110977173,
      "learning_rate": 1.642961084881944e-06,
      "loss": 2.9256,
      "step": 222724
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.8807568550109863,
      "learning_rate": 1.6425335934565187e-06,
      "loss": 3.0684,
      "step": 222725
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9282479286193848,
      "learning_rate": 1.6421061575013884e-06,
      "loss": 2.8916,
      "step": 222726
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7768161296844482,
      "learning_rate": 1.6416787770165863e-06,
      "loss": 3.0373,
      "step": 222727
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9415509700775146,
      "learning_rate": 1.6412514520022456e-06,
      "loss": 2.9877,
      "step": 222728
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.299302339553833,
      "learning_rate": 1.6408241824584334e-06,
      "loss": 2.956,
      "step": 222729
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7030553817749023,
      "learning_rate": 1.6403969683852492e-06,
      "loss": 2.9402,
      "step": 222730
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.079189777374268,
      "learning_rate": 1.639969809782693e-06,
      "loss": 2.979,
      "step": 222731
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.769561290740967,
      "learning_rate": 1.6395427066509648e-06,
      "loss": 2.9607,
      "step": 222732
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.911853313446045,
      "learning_rate": 1.6391156589900311e-06,
      "loss": 2.8171,
      "step": 222733
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.6082265377044678,
      "learning_rate": 1.6386886668000254e-06,
      "loss": 2.8791,
      "step": 222734
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.23768949508667,
      "learning_rate": 1.6382617300810142e-06,
      "loss": 3.0794,
      "step": 222735
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.012410879135132,
      "learning_rate": 1.6378348488330971e-06,
      "loss": 2.9439,
      "step": 222736
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.917870044708252,
      "learning_rate": 1.637408023056308e-06,
      "loss": 3.0423,
      "step": 222737
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.6531898975372314,
      "learning_rate": 1.636981252750813e-06,
      "loss": 2.7547,
      "step": 222738
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.188451051712036,
      "learning_rate": 1.636554537916579e-06,
      "loss": 3.0499,
      "step": 222739
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.006974935531616,
      "learning_rate": 1.6361278785537724e-06,
      "loss": 3.0339,
      "step": 222740
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.4967854022979736,
      "learning_rate": 1.6357012746624266e-06,
      "loss": 2.7704,
      "step": 222741
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7360854148864746,
      "learning_rate": 1.6352747262426413e-06,
      "loss": 2.9413,
      "step": 222742
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.4743340015411377,
      "learning_rate": 1.6348482332944834e-06,
      "loss": 3.0947,
      "step": 222743
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.8835060596466064,
      "learning_rate": 1.6344217958180527e-06,
      "loss": 3.0294,
      "step": 222744
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.162698268890381,
      "learning_rate": 1.6339954138134158e-06,
      "loss": 3.1519,
      "step": 222745
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.880247116088867,
      "learning_rate": 1.6335690872806395e-06,
      "loss": 2.7071,
      "step": 222746
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.699030876159668,
      "learning_rate": 1.6331428162198234e-06,
      "loss": 2.7374,
      "step": 222747
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.8602209091186523,
      "learning_rate": 1.6327166006310344e-06,
      "loss": 2.9023,
      "step": 222748
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.665436029434204,
      "learning_rate": 1.6322904405143388e-06,
      "loss": 3.0451,
      "step": 222749
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7768943309783936,
      "learning_rate": 1.6318643358698701e-06,
      "loss": 2.9421,
      "step": 222750
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.73517107963562,
      "learning_rate": 1.6314382866976616e-06,
      "loss": 2.8148,
      "step": 222751
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.108656883239746,
      "learning_rate": 1.6310122929977799e-06,
      "loss": 2.8061,
      "step": 222752
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1107044219970703,
      "learning_rate": 1.6305863547703245e-06,
      "loss": 2.4891,
      "step": 222753
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.5098953247070312,
      "learning_rate": 1.630160472015396e-06,
      "loss": 2.9477,
      "step": 222754
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2285828590393066,
      "learning_rate": 1.6297346447330273e-06,
      "loss": 2.9691,
      "step": 222755
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7280731201171875,
      "learning_rate": 1.6293088729233517e-06,
      "loss": 3.0241,
      "step": 222756
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0542755126953125,
      "learning_rate": 1.6288831565864025e-06,
      "loss": 2.7843,
      "step": 222757
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.678061008453369,
      "learning_rate": 1.6284574957222796e-06,
      "loss": 2.8811,
      "step": 222758
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9457178115844727,
      "learning_rate": 1.6280318903310497e-06,
      "loss": 2.9162,
      "step": 222759
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7079691886901855,
      "learning_rate": 1.6276063404128125e-06,
      "loss": 2.971,
      "step": 222760
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2096288204193115,
      "learning_rate": 1.627180845967635e-06,
      "loss": 2.9879,
      "step": 222761
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.000272035598755,
      "learning_rate": 1.6267554069956168e-06,
      "loss": 2.7485,
      "step": 222762
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9475138187408447,
      "learning_rate": 1.6263300234967913e-06,
      "loss": 2.8671,
      "step": 222763
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.448190450668335,
      "learning_rate": 1.6259046954712584e-06,
      "loss": 2.9199,
      "step": 222764
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.565173864364624,
      "learning_rate": 1.6254794229191182e-06,
      "loss": 2.8366,
      "step": 222765
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.7508442401885986,
      "learning_rate": 1.6250542058404038e-06,
      "loss": 2.9047,
      "step": 222766
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8254878520965576,
      "learning_rate": 1.6246290442352483e-06,
      "loss": 2.9368,
      "step": 222767
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.4580342769622803,
      "learning_rate": 1.6242039381037187e-06,
      "loss": 2.8571,
      "step": 222768
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.116787910461426,
      "learning_rate": 1.6237788874458813e-06,
      "loss": 2.7657,
      "step": 222769
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.60082745552063,
      "learning_rate": 1.6233538922618029e-06,
      "loss": 2.7668,
      "step": 222770
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.623720407485962,
      "learning_rate": 1.62292895255155e-06,
      "loss": 2.8061,
      "step": 222771
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5796496868133545,
      "learning_rate": 1.6225040683152558e-06,
      "loss": 2.9401,
      "step": 222772
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0015082359313965,
      "learning_rate": 1.6220792395529536e-06,
      "loss": 2.9167,
      "step": 222773
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.729111909866333,
      "learning_rate": 1.6216544662647769e-06,
      "loss": 2.9603,
      "step": 222774
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8143389225006104,
      "learning_rate": 1.6212297484507252e-06,
      "loss": 2.9622,
      "step": 222775
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.785338878631592,
      "learning_rate": 1.6208050861109656e-06,
      "loss": 2.8264,
      "step": 222776
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.5447793006896973,
      "learning_rate": 1.6203804792454644e-06,
      "loss": 2.8194,
      "step": 222777
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.495053291320801,
      "learning_rate": 1.6199559278544217e-06,
      "loss": 2.9789,
      "step": 222778
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.380408763885498,
      "learning_rate": 1.619531431937837e-06,
      "loss": 3.2785,
      "step": 222779
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.919543981552124,
      "learning_rate": 1.619106991495811e-06,
      "loss": 2.8753,
      "step": 222780
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3465750217437744,
      "learning_rate": 1.618682606528443e-06,
      "loss": 3.0984,
      "step": 222781
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.125269651412964,
      "learning_rate": 1.6182582770357998e-06,
      "loss": 2.6471,
      "step": 222782
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7893917560577393,
      "learning_rate": 1.617834003017915e-06,
      "loss": 2.8956,
      "step": 222783
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.4037537574768066,
      "learning_rate": 1.6174097844749545e-06,
      "loss": 3.0204,
      "step": 222784
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0228004455566406,
      "learning_rate": 1.6169856214069188e-06,
      "loss": 3.0462,
      "step": 222785
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8431620597839355,
      "learning_rate": 1.616561513813941e-06,
      "loss": 3.0181,
      "step": 222786
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.687944173812866,
      "learning_rate": 1.6161374616960543e-06,
      "loss": 2.9256,
      "step": 222787
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.075343370437622,
      "learning_rate": 1.6157134650533587e-06,
      "loss": 2.9537,
      "step": 222788
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.6857571601867676,
      "learning_rate": 1.6152895238859541e-06,
      "loss": 2.9867,
      "step": 222789
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8259851932525635,
      "learning_rate": 1.6148656381939073e-06,
      "loss": 3.0096,
      "step": 222790
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6072185039520264,
      "learning_rate": 1.6144418079772514e-06,
      "loss": 2.618,
      "step": 222791
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9766182899475098,
      "learning_rate": 1.6140180332361198e-06,
      "loss": 2.8238,
      "step": 222792
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7061476707458496,
      "learning_rate": 1.6135943139705788e-06,
      "loss": 2.9448,
      "step": 222793
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0270540714263916,
      "learning_rate": 1.6131706501806951e-06,
      "loss": 3.175,
      "step": 222794
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9653468132019043,
      "learning_rate": 1.6127470418665688e-06,
      "loss": 2.6353,
      "step": 222795
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2543625831604004,
      "learning_rate": 1.6123234890282665e-06,
      "loss": 2.9485,
      "step": 222796
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.800123929977417,
      "learning_rate": 1.6118999916658548e-06,
      "loss": 2.7895,
      "step": 222797
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.920097827911377,
      "learning_rate": 1.6114765497794335e-06,
      "loss": 2.883,
      "step": 222798
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.9951705932617188,
      "learning_rate": 1.6110531633690694e-06,
      "loss": 2.8651,
      "step": 222799
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9208407402038574,
      "learning_rate": 1.6106298324348287e-06,
      "loss": 2.8134,
      "step": 222800
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.697801113128662,
      "learning_rate": 1.6102065569768119e-06,
      "loss": 2.9627,
      "step": 222801
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.95052433013916,
      "learning_rate": 1.6097833369950851e-06,
      "loss": 3.0786,
      "step": 222802
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.667952299118042,
      "learning_rate": 1.6093601724897487e-06,
      "loss": 2.8758,
      "step": 222803
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.404451370239258,
      "learning_rate": 1.608937063460869e-06,
      "loss": 3.0079,
      "step": 222804
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8761889934539795,
      "learning_rate": 1.6085140099085125e-06,
      "loss": 2.6659,
      "step": 222805
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0042412281036377,
      "learning_rate": 1.608091011832746e-06,
      "loss": 2.6488,
      "step": 222806
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2037699222564697,
      "learning_rate": 1.607668069233703e-06,
      "loss": 3.0646,
      "step": 222807
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0000483989715576,
      "learning_rate": 1.607245182111383e-06,
      "loss": 2.9444,
      "step": 222808
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.832569122314453,
      "learning_rate": 1.6068223504659528e-06,
      "loss": 3.0197,
      "step": 222809
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.666707992553711,
      "learning_rate": 1.6063995742974122e-06,
      "loss": 2.9691,
      "step": 222810
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.6726362705230713,
      "learning_rate": 1.6059768536058947e-06,
      "loss": 2.9039,
      "step": 222811
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6937811374664307,
      "learning_rate": 1.6055541883914668e-06,
      "loss": 2.9565,
      "step": 222812
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.657987594604492,
      "learning_rate": 1.6051315786541952e-06,
      "loss": 2.845,
      "step": 222813
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2312326431274414,
      "learning_rate": 1.6047090243941462e-06,
      "loss": 2.9656,
      "step": 222814
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.040665864944458,
      "learning_rate": 1.6042865256114201e-06,
      "loss": 3.203,
      "step": 222815
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.232250928878784,
      "learning_rate": 1.6038640823060834e-06,
      "loss": 3.1484,
      "step": 222816
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.5269079208374023,
      "learning_rate": 1.6034416944782359e-06,
      "loss": 2.8879,
      "step": 222817
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.178258180618286,
      "learning_rate": 1.6030193621279442e-06,
      "loss": 2.882,
      "step": 222818
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2343406677246094,
      "learning_rate": 1.602597085255275e-06,
      "loss": 2.8341,
      "step": 222819
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.563538074493408,
      "learning_rate": 1.602174863860295e-06,
      "loss": 2.994,
      "step": 222820
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.551732063293457,
      "learning_rate": 1.6017526979431372e-06,
      "loss": 3.1213,
      "step": 222821
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9396777153015137,
      "learning_rate": 1.6013305875038018e-06,
      "loss": 3.1271,
      "step": 222822
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.4229471683502197,
      "learning_rate": 1.6009085325424554e-06,
      "loss": 2.8214,
      "step": 222823
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1116318702697754,
      "learning_rate": 1.600486533059131e-06,
      "loss": 3.1663,
      "step": 222824
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7745492458343506,
      "learning_rate": 1.6000645890538953e-06,
      "loss": 2.8416,
      "step": 222825
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.690952777862549,
      "learning_rate": 1.5996427005268486e-06,
      "loss": 3.1018,
      "step": 222826
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.066422462463379,
      "learning_rate": 1.5992208674780572e-06,
      "loss": 2.9733,
      "step": 222827
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0015382766723633,
      "learning_rate": 1.5987990899075876e-06,
      "loss": 2.8103,
      "step": 222828
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.8913090229034424,
      "learning_rate": 1.5983773678155398e-06,
      "loss": 2.908,
      "step": 222829
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0216569900512695,
      "learning_rate": 1.5979557012020139e-06,
      "loss": 2.7778,
      "step": 222830
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.993797540664673,
      "learning_rate": 1.597534090067043e-06,
      "loss": 2.8831,
      "step": 222831
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.956537961959839,
      "learning_rate": 1.597112534410694e-06,
      "loss": 2.9776,
      "step": 222832
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.352109432220459,
      "learning_rate": 1.5966910342330996e-06,
      "loss": 3.049,
      "step": 222833
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6806511878967285,
      "learning_rate": 1.596269589534327e-06,
      "loss": 2.8538,
      "step": 222834
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.4496326446533203,
      "learning_rate": 1.5958482003144423e-06,
      "loss": 2.7632,
      "step": 222835
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.963179349899292,
      "learning_rate": 1.5954268665735125e-06,
      "loss": 2.6574,
      "step": 222836
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0252857208251953,
      "learning_rate": 1.5950055883116043e-06,
      "loss": 2.6809,
      "step": 222837
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.779158592224121,
      "learning_rate": 1.5945843655288172e-06,
      "loss": 2.7345,
      "step": 222838
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.074496030807495,
      "learning_rate": 1.5941631982252845e-06,
      "loss": 2.9352,
      "step": 222839
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1149981021881104,
      "learning_rate": 1.5937420864009732e-06,
      "loss": 2.9067,
      "step": 222840
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8243825435638428,
      "learning_rate": 1.5933210300560496e-06,
      "loss": 2.9447,
      "step": 222841
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0700347423553467,
      "learning_rate": 1.5929000291905469e-06,
      "loss": 3.1567,
      "step": 222842
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7766737937927246,
      "learning_rate": 1.5924790838045653e-06,
      "loss": 2.676,
      "step": 222843
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.919719696044922,
      "learning_rate": 1.592058193898138e-06,
      "loss": 3.0182,
      "step": 222844
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.219350337982178,
      "learning_rate": 1.5916373594714315e-06,
      "loss": 2.8646,
      "step": 222845
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6443681716918945,
      "learning_rate": 1.5912165805244459e-06,
      "loss": 2.8362,
      "step": 222846
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.450059413909912,
      "learning_rate": 1.5907958570573143e-06,
      "loss": 2.9216,
      "step": 222847
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0493946075439453,
      "learning_rate": 1.5903751890700367e-06,
      "loss": 2.9066,
      "step": 222848
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9626903533935547,
      "learning_rate": 1.5899545765627797e-06,
      "loss": 2.5877,
      "step": 222849
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6393985748291016,
      "learning_rate": 1.5895340195355432e-06,
      "loss": 2.9725,
      "step": 222850
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.35321307182312,
      "learning_rate": 1.589113517988494e-06,
      "loss": 2.9433,
      "step": 222851
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9236159324645996,
      "learning_rate": 1.5886930719216317e-06,
      "loss": 2.7017,
      "step": 222852
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.847334384918213,
      "learning_rate": 1.5882726813350898e-06,
      "loss": 2.8979,
      "step": 222853
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9598240852355957,
      "learning_rate": 1.5878523462289018e-06,
      "loss": 2.9133,
      "step": 222854
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.810061454772949,
      "learning_rate": 1.587432066603167e-06,
      "loss": 3.0901,
      "step": 222855
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.497069835662842,
      "learning_rate": 1.5870118424579526e-06,
      "loss": 2.998,
      "step": 222856
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.23853874206543,
      "learning_rate": 1.5865916737933581e-06,
      "loss": 2.9161,
      "step": 222857
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.490445613861084,
      "learning_rate": 1.5861715606094505e-06,
      "loss": 3.2182,
      "step": 222858
    },
    {
      "epoch": 2.9,
      "grad_norm": 6.009108543395996,
      "learning_rate": 1.5857515029062962e-06,
      "loss": 2.962,
      "step": 222859
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5134148597717285,
      "learning_rate": 1.585331500683995e-06,
      "loss": 2.8831,
      "step": 222860
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9881324768066406,
      "learning_rate": 1.5849115539426138e-06,
      "loss": 2.9861,
      "step": 222861
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.874835729598999,
      "learning_rate": 1.584491662682219e-06,
      "loss": 2.8778,
      "step": 222862
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.341270685195923,
      "learning_rate": 1.5840718269029107e-06,
      "loss": 2.881,
      "step": 222863
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.970518112182617,
      "learning_rate": 1.5836520466047554e-06,
      "loss": 2.9125,
      "step": 222864
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3920960426330566,
      "learning_rate": 1.5832323217878196e-06,
      "loss": 2.8754,
      "step": 222865
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.043004035949707,
      "learning_rate": 1.5828126524522034e-06,
      "loss": 2.882,
      "step": 222866
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.004804849624634,
      "learning_rate": 1.5823930385979733e-06,
      "loss": 3.055,
      "step": 222867
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.7761588096618652,
      "learning_rate": 1.5819734802252292e-06,
      "loss": 2.998,
      "step": 222868
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.467918634414673,
      "learning_rate": 1.5815539773340046e-06,
      "loss": 3.1612,
      "step": 222869
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.551917314529419,
      "learning_rate": 1.5811345299243994e-06,
      "loss": 2.5919,
      "step": 222870
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.857990264892578,
      "learning_rate": 1.5807151379965132e-06,
      "loss": 2.9004,
      "step": 222871
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.080127239227295,
      "learning_rate": 1.580295801550413e-06,
      "loss": 2.6194,
      "step": 222872
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0591914653778076,
      "learning_rate": 1.5798765205861318e-06,
      "loss": 3.0245,
      "step": 222873
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9430861473083496,
      "learning_rate": 1.5794572951038031e-06,
      "loss": 2.7076,
      "step": 222874
    },
    {
      "epoch": 2.9,
      "grad_norm": 5.158109188079834,
      "learning_rate": 1.5790381251034934e-06,
      "loss": 2.9066,
      "step": 222875
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.537836790084839,
      "learning_rate": 1.5786190105852692e-06,
      "loss": 2.6385,
      "step": 222876
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3723721504211426,
      "learning_rate": 1.5781999515492305e-06,
      "loss": 2.8893,
      "step": 222877
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.622633218765259,
      "learning_rate": 1.5777809479954107e-06,
      "loss": 2.7101,
      "step": 222878
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.657546043395996,
      "learning_rate": 1.5773619999239095e-06,
      "loss": 2.9158,
      "step": 222879
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6767120361328125,
      "learning_rate": 1.5769431073348271e-06,
      "loss": 2.7514,
      "step": 222880
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9992897510528564,
      "learning_rate": 1.5765242702282299e-06,
      "loss": 3.1121,
      "step": 222881
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.874682903289795,
      "learning_rate": 1.5761054886041846e-06,
      "loss": 2.8024,
      "step": 222882
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5683765411376953,
      "learning_rate": 1.5756867624627579e-06,
      "loss": 2.8121,
      "step": 222883
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.759436130523682,
      "learning_rate": 1.5752680918040828e-06,
      "loss": 3.0371,
      "step": 222884
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.531248092651367,
      "learning_rate": 1.5748494766281595e-06,
      "loss": 2.963,
      "step": 222885
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.897181749343872,
      "learning_rate": 1.574430916935121e-06,
      "loss": 2.8863,
      "step": 222886
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.018430709838867,
      "learning_rate": 1.574012412725001e-06,
      "loss": 2.9784,
      "step": 222887
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.6747868061065674,
      "learning_rate": 1.5735939639979322e-06,
      "loss": 3.153,
      "step": 222888
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3094000816345215,
      "learning_rate": 1.5731755707539816e-06,
      "loss": 2.9282,
      "step": 222889
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.486355781555176,
      "learning_rate": 1.5727572329931826e-06,
      "loss": 2.9959,
      "step": 222890
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.848541259765625,
      "learning_rate": 1.572338950715668e-06,
      "loss": 3.077,
      "step": 222891
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0887725353240967,
      "learning_rate": 1.5719207239214715e-06,
      "loss": 2.9667,
      "step": 222892
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.4448227882385254,
      "learning_rate": 1.5715025526106928e-06,
      "loss": 3.3687,
      "step": 222893
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9845833778381348,
      "learning_rate": 1.5710844367833985e-06,
      "loss": 2.8903,
      "step": 222894
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7385947704315186,
      "learning_rate": 1.5706663764396555e-06,
      "loss": 2.8877,
      "step": 222895
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.4743902683258057,
      "learning_rate": 1.5702483715795965e-06,
      "loss": 2.7624,
      "step": 222896
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.762333393096924,
      "learning_rate": 1.5698304222032553e-06,
      "loss": 2.8522,
      "step": 222897
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.6333541870117188,
      "learning_rate": 1.5694125283106983e-06,
      "loss": 3.0262,
      "step": 222898
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1989924907684326,
      "learning_rate": 1.5689946899020255e-06,
      "loss": 2.9187,
      "step": 222899
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.044501543045044,
      "learning_rate": 1.5685769069773036e-06,
      "loss": 3.1132,
      "step": 222900
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.704141139984131,
      "learning_rate": 1.5681591795366321e-06,
      "loss": 2.7878,
      "step": 222901
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8022613525390625,
      "learning_rate": 1.5677415075800782e-06,
      "loss": 2.9322,
      "step": 222902
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.833619594573975,
      "learning_rate": 1.5673238911076746e-06,
      "loss": 2.781,
      "step": 222903
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.363603115081787,
      "learning_rate": 1.5669063301195884e-06,
      "loss": 3.0833,
      "step": 222904
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5924694538116455,
      "learning_rate": 1.566488824615819e-06,
      "loss": 2.83,
      "step": 222905
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0133042335510254,
      "learning_rate": 1.566071374596467e-06,
      "loss": 2.8516,
      "step": 222906
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.737318277359009,
      "learning_rate": 1.5656539800616318e-06,
      "loss": 2.8268,
      "step": 222907
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.5462710857391357,
      "learning_rate": 1.5652366410113804e-06,
      "loss": 2.805,
      "step": 222908
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6304359436035156,
      "learning_rate": 1.564819357445779e-06,
      "loss": 2.9199,
      "step": 222909
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7703962326049805,
      "learning_rate": 1.5644021293649278e-06,
      "loss": 2.7086,
      "step": 222910
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8801345825195312,
      "learning_rate": 1.56398495676886e-06,
      "loss": 2.7502,
      "step": 222911
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.424665689468384,
      "learning_rate": 1.5635678396576756e-06,
      "loss": 2.8114,
      "step": 222912
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.031748056411743,
      "learning_rate": 1.5631507780314746e-06,
      "loss": 3.0446,
      "step": 222913
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7727596759796143,
      "learning_rate": 1.5627337718903232e-06,
      "loss": 2.8036,
      "step": 222914
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.084054470062256,
      "learning_rate": 1.5623168212342885e-06,
      "loss": 2.9292,
      "step": 222915
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9623217582702637,
      "learning_rate": 1.5618999260634368e-06,
      "loss": 2.7617,
      "step": 222916
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3312861919403076,
      "learning_rate": 1.5614830863778682e-06,
      "loss": 3.047,
      "step": 222917
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0235002040863037,
      "learning_rate": 1.5610663021776492e-06,
      "loss": 2.9961,
      "step": 222918
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2667946815490723,
      "learning_rate": 1.5606495734628799e-06,
      "loss": 2.9691,
      "step": 222919
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.209770441055298,
      "learning_rate": 1.5602329002336267e-06,
      "loss": 3.0569,
      "step": 222920
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.936701536178589,
      "learning_rate": 1.5598162824899229e-06,
      "loss": 2.8616,
      "step": 222921
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.290560007095337,
      "learning_rate": 1.559399720231902e-06,
      "loss": 2.6062,
      "step": 222922
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.362222194671631,
      "learning_rate": 1.5589832134596304e-06,
      "loss": 2.7369,
      "step": 222923
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5278706550598145,
      "learning_rate": 1.5585667621731745e-06,
      "loss": 2.7827,
      "step": 222924
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2925631999969482,
      "learning_rate": 1.558150366372568e-06,
      "loss": 2.9413,
      "step": 222925
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.131918430328369,
      "learning_rate": 1.5577340260580106e-06,
      "loss": 2.6594,
      "step": 222926
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.4416134357452393,
      "learning_rate": 1.557317741229469e-06,
      "loss": 2.883,
      "step": 222927
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.587244749069214,
      "learning_rate": 1.5569015118870431e-06,
      "loss": 3.2716,
      "step": 222928
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3065667152404785,
      "learning_rate": 1.5564853380308329e-06,
      "loss": 2.6046,
      "step": 222929
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3417422771453857,
      "learning_rate": 1.5560692196609047e-06,
      "loss": 2.9788,
      "step": 222930
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.868256092071533,
      "learning_rate": 1.5556531567773256e-06,
      "loss": 3.0167,
      "step": 222931
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.790520668029785,
      "learning_rate": 1.5552371493802286e-06,
      "loss": 3.0962,
      "step": 222932
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9274072647094727,
      "learning_rate": 1.5548211974696133e-06,
      "loss": 2.9256,
      "step": 222933
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3951218128204346,
      "learning_rate": 1.5544053010455803e-06,
      "loss": 2.6875,
      "step": 222934
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.402033805847168,
      "learning_rate": 1.5539894601082292e-06,
      "loss": 2.8078,
      "step": 222935
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.903700590133667,
      "learning_rate": 1.5535736746576267e-06,
      "loss": 2.9743,
      "step": 222936
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.911022901535034,
      "learning_rate": 1.5531579446938391e-06,
      "loss": 3.0519,
      "step": 222937
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7893364429473877,
      "learning_rate": 1.5527422702169666e-06,
      "loss": 2.9513,
      "step": 222938
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0970513820648193,
      "learning_rate": 1.552326651227076e-06,
      "loss": 2.9877,
      "step": 222939
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0743319988250732,
      "learning_rate": 1.5519110877242335e-06,
      "loss": 2.8646,
      "step": 222940
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2733688354492188,
      "learning_rate": 1.5514955797085393e-06,
      "loss": 2.9575,
      "step": 222941
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8220596313476562,
      "learning_rate": 1.5510801271800266e-06,
      "loss": 2.945,
      "step": 222942
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.827367067337036,
      "learning_rate": 1.5506647301388287e-06,
      "loss": 2.9883,
      "step": 222943
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6013948917388916,
      "learning_rate": 1.5502493885849787e-06,
      "loss": 3.1542,
      "step": 222944
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.5044217109680176,
      "learning_rate": 1.5498341025185767e-06,
      "loss": 2.6816,
      "step": 222945
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.987668514251709,
      "learning_rate": 1.5494188719396893e-06,
      "loss": 2.9218,
      "step": 222946
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0084259510040283,
      "learning_rate": 1.5490036968484165e-06,
      "loss": 2.7106,
      "step": 222947
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.935173988342285,
      "learning_rate": 1.5485885772447915e-06,
      "loss": 3.1306,
      "step": 222948
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.712369918823242,
      "learning_rate": 1.5481735131289142e-06,
      "loss": 2.8016,
      "step": 222949
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.616727590560913,
      "learning_rate": 1.5477585045008845e-06,
      "loss": 2.8617,
      "step": 222950
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.874143600463867,
      "learning_rate": 1.547343551360769e-06,
      "loss": 2.9757,
      "step": 222951
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1787710189819336,
      "learning_rate": 1.5469286537086346e-06,
      "loss": 2.7623,
      "step": 222952
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.5915849208831787,
      "learning_rate": 1.5465138115445475e-06,
      "loss": 2.745,
      "step": 222953
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6410458087921143,
      "learning_rate": 1.5460990248685744e-06,
      "loss": 2.8088,
      "step": 222954
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3654773235321045,
      "learning_rate": 1.5456842936808489e-06,
      "loss": 2.8164,
      "step": 222955
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9133224487304688,
      "learning_rate": 1.545269617981404e-06,
      "loss": 2.7809,
      "step": 222956
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.041839599609375,
      "learning_rate": 1.5448549977703396e-06,
      "loss": 2.9153,
      "step": 222957
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.8669445514678955,
      "learning_rate": 1.544440433047689e-06,
      "loss": 2.7886,
      "step": 222958
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.002084732055664,
      "learning_rate": 1.5440259238135854e-06,
      "loss": 2.8898,
      "step": 222959
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8587818145751953,
      "learning_rate": 1.5436114700680958e-06,
      "loss": 2.8599,
      "step": 222960
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.770822286605835,
      "learning_rate": 1.543197071811253e-06,
      "loss": 2.768,
      "step": 222961
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3819119930267334,
      "learning_rate": 1.5427827290431572e-06,
      "loss": 2.9872,
      "step": 222962
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0973827838897705,
      "learning_rate": 1.5423684417639416e-06,
      "loss": 2.9685,
      "step": 222963
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3170058727264404,
      "learning_rate": 1.5419542099735727e-06,
      "loss": 3.1792,
      "step": 222964
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.396868944168091,
      "learning_rate": 1.5415400336722172e-06,
      "loss": 3.0124,
      "step": 222965
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3174171447753906,
      "learning_rate": 1.5411259128599084e-06,
      "loss": 2.7244,
      "step": 222966
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6349406242370605,
      "learning_rate": 1.5407118475367796e-06,
      "loss": 2.9608,
      "step": 222967
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.602370023727417,
      "learning_rate": 1.5402978377028309e-06,
      "loss": 2.9967,
      "step": 222968
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1526808738708496,
      "learning_rate": 1.539883883358195e-06,
      "loss": 2.8634,
      "step": 222969
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.127288818359375,
      "learning_rate": 1.5394699845029057e-06,
      "loss": 2.8254,
      "step": 222970
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.841420888900757,
      "learning_rate": 1.5390561411370627e-06,
      "loss": 3.0405,
      "step": 222971
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3135223388671875,
      "learning_rate": 1.538642353260766e-06,
      "loss": 3.1449,
      "step": 222972
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7905139923095703,
      "learning_rate": 1.538228620874049e-06,
      "loss": 3.0037,
      "step": 222973
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.759925603866577,
      "learning_rate": 1.5378149439770116e-06,
      "loss": 2.9029,
      "step": 222974
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7044425010681152,
      "learning_rate": 1.5374013225697536e-06,
      "loss": 2.9806,
      "step": 222975
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.001744031906128,
      "learning_rate": 1.536987756652308e-06,
      "loss": 2.9139,
      "step": 222976
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.123983383178711,
      "learning_rate": 1.5365742462247754e-06,
      "loss": 3.1194,
      "step": 222977
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1189351081848145,
      "learning_rate": 1.5361607912871888e-06,
      "loss": 2.8912,
      "step": 222978
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2265610694885254,
      "learning_rate": 1.5357473918397146e-06,
      "loss": 2.7972,
      "step": 222979
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.881833076477051,
      "learning_rate": 1.5353340478823528e-06,
      "loss": 2.9408,
      "step": 222980
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0858349800109863,
      "learning_rate": 1.5349207594152369e-06,
      "loss": 2.6724,
      "step": 222981
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.95824933052063,
      "learning_rate": 1.5345075264383665e-06,
      "loss": 2.9891,
      "step": 222982
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.741816282272339,
      "learning_rate": 1.5340943489519086e-06,
      "loss": 2.9635,
      "step": 222983
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.025961875915527,
      "learning_rate": 1.533681226955863e-06,
      "loss": 3.0365,
      "step": 222984
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.4209911823272705,
      "learning_rate": 1.5332681604503627e-06,
      "loss": 2.9445,
      "step": 222985
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8168036937713623,
      "learning_rate": 1.5328551494354412e-06,
      "loss": 3.0029,
      "step": 222986
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.990429401397705,
      "learning_rate": 1.532442193911232e-06,
      "loss": 2.8602,
      "step": 222987
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8945088386535645,
      "learning_rate": 1.5320292938777345e-06,
      "loss": 2.9978,
      "step": 222988
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.93788743019104,
      "learning_rate": 1.5316164493350824e-06,
      "loss": 2.9008,
      "step": 222989
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.814418315887451,
      "learning_rate": 1.5312036602833423e-06,
      "loss": 2.7469,
      "step": 222990
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.532595634460449,
      "learning_rate": 1.5307909267225804e-06,
      "loss": 3.0173,
      "step": 222991
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.036503791809082,
      "learning_rate": 1.5303782486528636e-06,
      "loss": 2.9453,
      "step": 222992
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7203638553619385,
      "learning_rate": 1.5299656260743254e-06,
      "loss": 2.9396,
      "step": 222993
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9462106227874756,
      "learning_rate": 1.5295530589869652e-06,
      "loss": 3.0299,
      "step": 222994
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.751575469970703,
      "learning_rate": 1.5291405473909168e-06,
      "loss": 2.9313,
      "step": 222995
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.843751907348633,
      "learning_rate": 1.5287280912862133e-06,
      "loss": 3.0139,
      "step": 222996
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2278640270233154,
      "learning_rate": 1.5283156906729543e-06,
      "loss": 2.937,
      "step": 222997
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.778203248977661,
      "learning_rate": 1.5279033455512401e-06,
      "loss": 2.9441,
      "step": 222998
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.93560791015625,
      "learning_rate": 1.527491055921104e-06,
      "loss": 2.9178,
      "step": 222999
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6898181438446045,
      "learning_rate": 1.5270788217826456e-06,
      "loss": 2.7648,
      "step": 223000
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6871337890625,
      "learning_rate": 1.526666643135932e-06,
      "loss": 3.3871,
      "step": 223001
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0272552967071533,
      "learning_rate": 1.5262545199810628e-06,
      "loss": 3.0396,
      "step": 223002
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.641261339187622,
      "learning_rate": 1.5258424523180712e-06,
      "loss": 2.647,
      "step": 223003
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0103485584259033,
      "learning_rate": 1.5254304401470573e-06,
      "loss": 2.9685,
      "step": 223004
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.001368522644043,
      "learning_rate": 1.5250184834681213e-06,
      "loss": 2.7698,
      "step": 223005
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1027896404266357,
      "learning_rate": 1.5246065822813292e-06,
      "loss": 3.0646,
      "step": 223006
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.082937240600586,
      "learning_rate": 1.524194736586748e-06,
      "loss": 2.9042,
      "step": 223007
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.7241194248199463,
      "learning_rate": 1.523782946384411e-06,
      "loss": 2.9078,
      "step": 223008
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0530457496643066,
      "learning_rate": 1.5233712116744512e-06,
      "loss": 2.7789,
      "step": 223009
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9639828205108643,
      "learning_rate": 1.5229595324569355e-06,
      "loss": 2.8865,
      "step": 223010
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.897526741027832,
      "learning_rate": 1.5225479087319637e-06,
      "loss": 2.8954,
      "step": 223011
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.235767364501953,
      "learning_rate": 1.522136340499569e-06,
      "loss": 2.7157,
      "step": 223012
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.799403429031372,
      "learning_rate": 1.5217248277598181e-06,
      "loss": 2.7704,
      "step": 223013
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5629658699035645,
      "learning_rate": 1.5213133705128443e-06,
      "loss": 3.0042,
      "step": 223014
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8882577419281006,
      "learning_rate": 1.520901968758681e-06,
      "loss": 2.6296,
      "step": 223015
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.752253532409668,
      "learning_rate": 1.5204906224973944e-06,
      "loss": 2.9651,
      "step": 223016
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.091954231262207,
      "learning_rate": 1.5200793317291183e-06,
      "loss": 2.9895,
      "step": 223017
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3148696422576904,
      "learning_rate": 1.5196680964538855e-06,
      "loss": 2.9794,
      "step": 223018
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.460364818572998,
      "learning_rate": 1.5192569166717628e-06,
      "loss": 2.9524,
      "step": 223019
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5692458152770996,
      "learning_rate": 1.51884579238285e-06,
      "loss": 3.0114,
      "step": 223020
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.5319888591766357,
      "learning_rate": 1.5184347235872473e-06,
      "loss": 2.8922,
      "step": 223021
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.904778480529785,
      "learning_rate": 1.5180237102849545e-06,
      "loss": 2.7693,
      "step": 223022
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9400594234466553,
      "learning_rate": 1.5176127524761384e-06,
      "loss": 3.0529,
      "step": 223023
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.7019054889678955,
      "learning_rate": 1.5172018501607986e-06,
      "loss": 2.9625,
      "step": 223024
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0637588500976562,
      "learning_rate": 1.5167910033390685e-06,
      "loss": 2.8303,
      "step": 223025
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.828376770019531,
      "learning_rate": 1.5163802120109814e-06,
      "loss": 2.9687,
      "step": 223026
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0274782180786133,
      "learning_rate": 1.5159694761766373e-06,
      "loss": 3.0169,
      "step": 223027
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8253681659698486,
      "learning_rate": 1.5155587958361027e-06,
      "loss": 2.7991,
      "step": 223028
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3615262508392334,
      "learning_rate": 1.5151481709894776e-06,
      "loss": 2.6473,
      "step": 223029
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.407968997955322,
      "learning_rate": 1.5147376016368284e-06,
      "loss": 2.9069,
      "step": 223030
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3947694301605225,
      "learning_rate": 1.514327087778222e-06,
      "loss": 2.7411,
      "step": 223031
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2763795852661133,
      "learning_rate": 1.513916629413725e-06,
      "loss": 2.8895,
      "step": 223032
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9437623023986816,
      "learning_rate": 1.5135062265434372e-06,
      "loss": 2.8921,
      "step": 223033
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.7778053283691406,
      "learning_rate": 1.5130958791673919e-06,
      "loss": 2.9367,
      "step": 223034
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8199596405029297,
      "learning_rate": 1.5126855872857225e-06,
      "loss": 3.0695,
      "step": 223035
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1411118507385254,
      "learning_rate": 1.5122753508984619e-06,
      "loss": 2.6669,
      "step": 223036
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9157016277313232,
      "learning_rate": 1.5118651700057439e-06,
      "loss": 3.0197,
      "step": 223037
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.2985341548919678,
      "learning_rate": 1.5114550446075679e-06,
      "loss": 2.7235,
      "step": 223038
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9544904232025146,
      "learning_rate": 1.5110449747040342e-06,
      "loss": 2.9084,
      "step": 223039
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8033194541931152,
      "learning_rate": 1.5106349602952428e-06,
      "loss": 2.8058,
      "step": 223040
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.4802005290985107,
      "learning_rate": 1.5102250013812933e-06,
      "loss": 2.835,
      "step": 223041
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.902961492538452,
      "learning_rate": 1.5098150979622192e-06,
      "loss": 2.854,
      "step": 223042
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.618901491165161,
      "learning_rate": 1.509405250038054e-06,
      "loss": 3.098,
      "step": 223043
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.6821651458740234,
      "learning_rate": 1.5089954576089635e-06,
      "loss": 2.9171,
      "step": 223044
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.451230525970459,
      "learning_rate": 1.5085857206749818e-06,
      "loss": 2.7499,
      "step": 223045
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.265048503875732,
      "learning_rate": 1.5081760392361753e-06,
      "loss": 2.8924,
      "step": 223046
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0971717834472656,
      "learning_rate": 1.507766413292677e-06,
      "loss": 2.9454,
      "step": 223047
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0764870643615723,
      "learning_rate": 1.507356842844487e-06,
      "loss": 2.9479,
      "step": 223048
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.010676860809326,
      "learning_rate": 1.5069473278917054e-06,
      "loss": 2.914,
      "step": 223049
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.644591808319092,
      "learning_rate": 1.506537868434432e-06,
      "loss": 2.8667,
      "step": 223050
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.106247901916504,
      "learning_rate": 1.5061284644727333e-06,
      "loss": 3.1685,
      "step": 223051
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.788332462310791,
      "learning_rate": 1.5057191160066428e-06,
      "loss": 3.0003,
      "step": 223052
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.97198486328125,
      "learning_rate": 1.5053098230362936e-06,
      "loss": 3.0468,
      "step": 223053
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.190981388092041,
      "learning_rate": 1.5049005855617523e-06,
      "loss": 2.9668,
      "step": 223054
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3172969818115234,
      "learning_rate": 1.5044914035830858e-06,
      "loss": 2.9178,
      "step": 223055
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1521542072296143,
      "learning_rate": 1.5040822771003602e-06,
      "loss": 2.7532,
      "step": 223056
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8163001537323,
      "learning_rate": 1.5036732061136425e-06,
      "loss": 2.9381,
      "step": 223057
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.669011354446411,
      "learning_rate": 1.5032641906230658e-06,
      "loss": 2.7518,
      "step": 223058
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.709920883178711,
      "learning_rate": 1.5028552306286635e-06,
      "loss": 2.7973,
      "step": 223059
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.871586561203003,
      "learning_rate": 1.502446326130502e-06,
      "loss": 3.0195,
      "step": 223060
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1932268142700195,
      "learning_rate": 1.502037477128648e-06,
      "loss": 2.8607,
      "step": 223061
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1370322704315186,
      "learning_rate": 1.501628683623235e-06,
      "loss": 2.908,
      "step": 223062
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.688455820083618,
      "learning_rate": 1.5012199456142625e-06,
      "loss": 2.8943,
      "step": 223063
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0020062923431396,
      "learning_rate": 1.5008112631018976e-06,
      "loss": 2.9972,
      "step": 223064
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.598243474960327,
      "learning_rate": 1.5004026360861065e-06,
      "loss": 2.8936,
      "step": 223065
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.210493326187134,
      "learning_rate": 1.4999940645670893e-06,
      "loss": 2.8874,
      "step": 223066
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.349687099456787,
      "learning_rate": 1.4995855485448127e-06,
      "loss": 2.9107,
      "step": 223067
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9435901641845703,
      "learning_rate": 1.499177088019443e-06,
      "loss": 2.862,
      "step": 223068
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.211289644241333,
      "learning_rate": 1.4987686829909473e-06,
      "loss": 2.9887,
      "step": 223069
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.7279770374298096,
      "learning_rate": 1.498360333459525e-06,
      "loss": 2.7942,
      "step": 223070
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.958568572998047,
      "learning_rate": 1.4979520394251433e-06,
      "loss": 2.906,
      "step": 223071
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0326783657073975,
      "learning_rate": 1.497543800887968e-06,
      "loss": 3.2419,
      "step": 223072
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5578367710113525,
      "learning_rate": 1.4971356178480332e-06,
      "loss": 2.8626,
      "step": 223073
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.845385789871216,
      "learning_rate": 1.496727490305405e-06,
      "loss": 3.0457,
      "step": 223074
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1674180030822754,
      "learning_rate": 1.4963194182601502e-06,
      "loss": 2.9707,
      "step": 223075
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.338139533996582,
      "learning_rate": 1.4959114017123686e-06,
      "loss": 3.0041,
      "step": 223076
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.382735252380371,
      "learning_rate": 1.4955034406621268e-06,
      "loss": 2.5871,
      "step": 223077
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.8869051933288574,
      "learning_rate": 1.495095535109525e-06,
      "loss": 2.7373,
      "step": 223078
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.5849971771240234,
      "learning_rate": 1.4946876850546296e-06,
      "loss": 2.9857,
      "step": 223079
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.39109206199646,
      "learning_rate": 1.4942798904975073e-06,
      "loss": 3.0646,
      "step": 223080
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9260690212249756,
      "learning_rate": 1.4938721514381912e-06,
      "loss": 2.9476,
      "step": 223081
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.953927993774414,
      "learning_rate": 1.4934644678768482e-06,
      "loss": 2.8246,
      "step": 223082
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.861988067626953,
      "learning_rate": 1.493056839813478e-06,
      "loss": 2.9738,
      "step": 223083
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.492868185043335,
      "learning_rate": 1.4926492672481804e-06,
      "loss": 2.6909,
      "step": 223084
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8357951641082764,
      "learning_rate": 1.4922417501810557e-06,
      "loss": 2.857,
      "step": 223085
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.6791558265686035,
      "learning_rate": 1.491834288612137e-06,
      "loss": 2.767,
      "step": 223086
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.829629421234131,
      "learning_rate": 1.4914268825415244e-06,
      "loss": 3.1084,
      "step": 223087
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.792396068572998,
      "learning_rate": 1.4910195319692842e-06,
      "loss": 3.0567,
      "step": 223088
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.8398587703704834,
      "learning_rate": 1.4906122368955164e-06,
      "loss": 2.8972,
      "step": 223089
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0761592388153076,
      "learning_rate": 1.4902049973202546e-06,
      "loss": 2.7072,
      "step": 223090
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.1971020698547363,
      "learning_rate": 1.4897978132436316e-06,
      "loss": 2.678,
      "step": 223091
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.5428009033203125,
      "learning_rate": 1.489390684665681e-06,
      "loss": 2.9137,
      "step": 223092
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0667829513549805,
      "learning_rate": 1.4889836115864695e-06,
      "loss": 2.8817,
      "step": 223093
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.773437738418579,
      "learning_rate": 1.4885765940060968e-06,
      "loss": 2.8586,
      "step": 223094
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0131468772888184,
      "learning_rate": 1.4881696319246295e-06,
      "loss": 2.8366,
      "step": 223095
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.713505983352661,
      "learning_rate": 1.4877627253421676e-06,
      "loss": 3.1467,
      "step": 223096
    },
    {
      "epoch": 2.9,
      "grad_norm": 4.202837944030762,
      "learning_rate": 1.4873558742587444e-06,
      "loss": 2.9641,
      "step": 223097
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.0605852603912354,
      "learning_rate": 1.4869490786744598e-06,
      "loss": 2.7732,
      "step": 223098
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.578899621963501,
      "learning_rate": 1.4865423385893805e-06,
      "loss": 2.9764,
      "step": 223099
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.9608964920043945,
      "learning_rate": 1.4861356540036062e-06,
      "loss": 2.885,
      "step": 223100
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3612558841705322,
      "learning_rate": 1.4857290249171705e-06,
      "loss": 2.999,
      "step": 223101
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.3923304080963135,
      "learning_rate": 1.4853224513302065e-06,
      "loss": 2.6117,
      "step": 223102
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.483039379119873,
      "learning_rate": 1.4849159332427474e-06,
      "loss": 3.0282,
      "step": 223103
    },
    {
      "epoch": 2.9,
      "grad_norm": 3.076064348220825,
      "learning_rate": 1.48450947065486e-06,
      "loss": 2.8681,
      "step": 223104
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.815190553665161,
      "learning_rate": 1.484103063566644e-06,
      "loss": 2.8986,
      "step": 223105
    },
    {
      "epoch": 2.9,
      "grad_norm": 2.814281463623047,
      "learning_rate": 1.4836967119781663e-06,
      "loss": 3.2034,
      "step": 223106
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1266634464263916,
      "learning_rate": 1.4832904158894931e-06,
      "loss": 2.6655,
      "step": 223107
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7991878986358643,
      "learning_rate": 1.4828841753007248e-06,
      "loss": 2.9836,
      "step": 223108
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.379404067993164,
      "learning_rate": 1.4824779902119276e-06,
      "loss": 2.926,
      "step": 223109
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8234479427337646,
      "learning_rate": 1.4820718606231684e-06,
      "loss": 3.2506,
      "step": 223110
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.427642345428467,
      "learning_rate": 1.481665786534514e-06,
      "loss": 2.9013,
      "step": 223111
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3761796951293945,
      "learning_rate": 1.4812597679460636e-06,
      "loss": 2.7726,
      "step": 223112
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.101735830307007,
      "learning_rate": 1.4808538048578845e-06,
      "loss": 2.7711,
      "step": 223113
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9482102394104004,
      "learning_rate": 1.4804478972700428e-06,
      "loss": 2.8194,
      "step": 223114
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.203890085220337,
      "learning_rate": 1.480042045182639e-06,
      "loss": 2.6558,
      "step": 223115
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.077108383178711,
      "learning_rate": 1.4796362485957058e-06,
      "loss": 2.9703,
      "step": 223116
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7559421062469482,
      "learning_rate": 1.4792305075093435e-06,
      "loss": 2.7963,
      "step": 223117
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.64530873298645,
      "learning_rate": 1.478824821923652e-06,
      "loss": 2.9687,
      "step": 223118
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1249914169311523,
      "learning_rate": 1.4784191918386646e-06,
      "loss": 2.8905,
      "step": 223119
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9183833599090576,
      "learning_rate": 1.478013617254481e-06,
      "loss": 2.799,
      "step": 223120
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3155441284179688,
      "learning_rate": 1.477608098171168e-06,
      "loss": 3.2714,
      "step": 223121
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.937178134918213,
      "learning_rate": 1.4772026345888256e-06,
      "loss": 3.2683,
      "step": 223122
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.561957597732544,
      "learning_rate": 1.4767972265074535e-06,
      "loss": 3.0394,
      "step": 223123
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.181408643722534,
      "learning_rate": 1.4763918739272184e-06,
      "loss": 3.0174,
      "step": 223124
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.566473960876465,
      "learning_rate": 1.4759865768481537e-06,
      "loss": 2.8518,
      "step": 223125
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.350741147994995,
      "learning_rate": 1.475581335270326e-06,
      "loss": 2.7168,
      "step": 223126
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.119593381881714,
      "learning_rate": 1.4751761491938353e-06,
      "loss": 2.9468,
      "step": 223127
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8109211921691895,
      "learning_rate": 1.4747710186187145e-06,
      "loss": 2.8111,
      "step": 223128
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.6392385959625244,
      "learning_rate": 1.474365943545097e-06,
      "loss": 2.8905,
      "step": 223129
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.409729480743408,
      "learning_rate": 1.4739609239730499e-06,
      "loss": 2.792,
      "step": 223130
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3142685890197754,
      "learning_rate": 1.4735559599025727e-06,
      "loss": 3.0567,
      "step": 223131
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3962130546569824,
      "learning_rate": 1.473151051333832e-06,
      "loss": 3.0056,
      "step": 223132
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2577195167541504,
      "learning_rate": 1.4727461982668609e-06,
      "loss": 2.8895,
      "step": 223133
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.093953847885132,
      "learning_rate": 1.4723414007017597e-06,
      "loss": 2.762,
      "step": 223134
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4926908016204834,
      "learning_rate": 1.4719366586385617e-06,
      "loss": 2.9725,
      "step": 223135
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.860086679458618,
      "learning_rate": 1.4715319720774e-06,
      "loss": 2.7245,
      "step": 223136
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7936289310455322,
      "learning_rate": 1.4711273410182745e-06,
      "loss": 2.8395,
      "step": 223137
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.520962953567505,
      "learning_rate": 1.4707227654613185e-06,
      "loss": 2.7864,
      "step": 223138
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.823753595352173,
      "learning_rate": 1.4703182454065654e-06,
      "loss": 3.0877,
      "step": 223139
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6509382724761963,
      "learning_rate": 1.4699137808541484e-06,
      "loss": 2.886,
      "step": 223140
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.8712990283966064,
      "learning_rate": 1.4695093718041007e-06,
      "loss": 2.7866,
      "step": 223141
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.079648971557617,
      "learning_rate": 1.469105018256489e-06,
      "loss": 2.8875,
      "step": 223142
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.021263360977173,
      "learning_rate": 1.4687007202114464e-06,
      "loss": 2.8683,
      "step": 223143
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.452960729598999,
      "learning_rate": 1.468296477668973e-06,
      "loss": 2.6056,
      "step": 223144
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.5610620975494385,
      "learning_rate": 1.467892290629169e-06,
      "loss": 2.896,
      "step": 223145
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.9680991172790527,
      "learning_rate": 1.4674881590921672e-06,
      "loss": 2.8672,
      "step": 223146
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2153022289276123,
      "learning_rate": 1.4670840830579678e-06,
      "loss": 2.9217,
      "step": 223147
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.4202585220336914,
      "learning_rate": 1.4666800625266706e-06,
      "loss": 3.0305,
      "step": 223148
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6558492183685303,
      "learning_rate": 1.4662760974983424e-06,
      "loss": 2.8864,
      "step": 223149
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.276158094406128,
      "learning_rate": 1.465872187973083e-06,
      "loss": 2.854,
      "step": 223150
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1515889167785645,
      "learning_rate": 1.4654683339509588e-06,
      "loss": 3.0111,
      "step": 223151
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.463464260101318,
      "learning_rate": 1.4650645354320035e-06,
      "loss": 3.0273,
      "step": 223152
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.724346160888672,
      "learning_rate": 1.4646607924163834e-06,
      "loss": 2.9231,
      "step": 223153
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8073809146881104,
      "learning_rate": 1.4642571049040653e-06,
      "loss": 2.7065,
      "step": 223154
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.3827595710754395,
      "learning_rate": 1.4638534728952156e-06,
      "loss": 3.0396,
      "step": 223155
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8180599212646484,
      "learning_rate": 1.4634498963898344e-06,
      "loss": 3.0543,
      "step": 223156
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.82975435256958,
      "learning_rate": 1.463046375388055e-06,
      "loss": 2.9345,
      "step": 223157
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4349279403686523,
      "learning_rate": 1.462642909889944e-06,
      "loss": 2.5913,
      "step": 223158
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.949855327606201,
      "learning_rate": 1.4622394998955677e-06,
      "loss": 3.0621,
      "step": 223159
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0082569122314453,
      "learning_rate": 1.46183614540496e-06,
      "loss": 3.1262,
      "step": 223160
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7079129219055176,
      "learning_rate": 1.4614328464182535e-06,
      "loss": 3.1503,
      "step": 223161
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.281820058822632,
      "learning_rate": 1.4610296029354817e-06,
      "loss": 2.812,
      "step": 223162
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.43837571144104,
      "learning_rate": 1.460626414956778e-06,
      "loss": 2.878,
      "step": 223163
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9944233894348145,
      "learning_rate": 1.4602232824821758e-06,
      "loss": 3.0187,
      "step": 223164
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.776344060897827,
      "learning_rate": 1.4598202055117414e-06,
      "loss": 3.0753,
      "step": 223165
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0938925743103027,
      "learning_rate": 1.4594171840455416e-06,
      "loss": 2.7372,
      "step": 223166
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9273979663848877,
      "learning_rate": 1.459014218083676e-06,
      "loss": 2.654,
      "step": 223167
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.925952434539795,
      "learning_rate": 1.4586113076262118e-06,
      "loss": 3.1284,
      "step": 223168
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.249952793121338,
      "learning_rate": 1.4582084526732485e-06,
      "loss": 3.1879,
      "step": 223169
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.371701240539551,
      "learning_rate": 1.4578056532248528e-06,
      "loss": 2.9458,
      "step": 223170
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7476022243499756,
      "learning_rate": 1.457402909281058e-06,
      "loss": 3.1287,
      "step": 223171
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2090563774108887,
      "learning_rate": 1.457000220841964e-06,
      "loss": 2.9193,
      "step": 223172
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.937004327774048,
      "learning_rate": 1.4565975879076708e-06,
      "loss": 2.8971,
      "step": 223173
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.296869993209839,
      "learning_rate": 1.4561950104782115e-06,
      "loss": 2.8889,
      "step": 223174
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.5891995429992676,
      "learning_rate": 1.4557924885536866e-06,
      "loss": 3.0393,
      "step": 223175
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0491771697998047,
      "learning_rate": 1.455390022134195e-06,
      "loss": 3.1444,
      "step": 223176
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.602362632751465,
      "learning_rate": 1.4549876112197378e-06,
      "loss": 2.9545,
      "step": 223177
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9297165870666504,
      "learning_rate": 1.4545852558104477e-06,
      "loss": 2.8395,
      "step": 223178
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.033064365386963,
      "learning_rate": 1.4541829559063912e-06,
      "loss": 2.8722,
      "step": 223179
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2884199619293213,
      "learning_rate": 1.453780711507635e-06,
      "loss": 2.8814,
      "step": 223180
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.866436719894409,
      "learning_rate": 1.453378522614279e-06,
      "loss": 2.9972,
      "step": 223181
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7635226249694824,
      "learning_rate": 1.4529763892263568e-06,
      "loss": 2.9311,
      "step": 223182
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4681639671325684,
      "learning_rate": 1.452574311343968e-06,
      "loss": 2.9408,
      "step": 223183
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9513304233551025,
      "learning_rate": 1.4521722889671461e-06,
      "loss": 2.76,
      "step": 223184
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.611599922180176,
      "learning_rate": 1.4517703220960242e-06,
      "loss": 3.0563,
      "step": 223185
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.722322940826416,
      "learning_rate": 1.451368410730669e-06,
      "loss": 2.9131,
      "step": 223186
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9518165588378906,
      "learning_rate": 1.4509665548711136e-06,
      "loss": 3.1198,
      "step": 223187
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.663336753845215,
      "learning_rate": 1.4505647545174914e-06,
      "loss": 3.0652,
      "step": 223188
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.145474910736084,
      "learning_rate": 1.4501630096698358e-06,
      "loss": 2.9267,
      "step": 223189
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.952653408050537,
      "learning_rate": 1.4497613203282131e-06,
      "loss": 2.9551,
      "step": 223190
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8007724285125732,
      "learning_rate": 1.4493596864927236e-06,
      "loss": 3.1684,
      "step": 223191
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.003178596496582,
      "learning_rate": 1.4489581081634338e-06,
      "loss": 2.9154,
      "step": 223192
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.845614433288574,
      "learning_rate": 1.4485565853404102e-06,
      "loss": 2.8912,
      "step": 223193
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.263197422027588,
      "learning_rate": 1.4481551180237527e-06,
      "loss": 2.7718,
      "step": 223194
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9387173652648926,
      "learning_rate": 1.447753706213528e-06,
      "loss": 3.124,
      "step": 223195
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.654667615890503,
      "learning_rate": 1.4473523499097694e-06,
      "loss": 3.0147,
      "step": 223196
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.906733274459839,
      "learning_rate": 1.4469510491125769e-06,
      "loss": 2.8554,
      "step": 223197
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.050868034362793,
      "learning_rate": 1.4465498038220503e-06,
      "loss": 2.7457,
      "step": 223198
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1949503421783447,
      "learning_rate": 1.4461486140382562e-06,
      "loss": 3.0295,
      "step": 223199
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9508612155914307,
      "learning_rate": 1.445747479761261e-06,
      "loss": 3.0364,
      "step": 223200
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8126220703125,
      "learning_rate": 1.445346400991132e-06,
      "loss": 2.6313,
      "step": 223201
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.112480640411377,
      "learning_rate": 1.4449453777279351e-06,
      "loss": 2.7275,
      "step": 223202
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3360047340393066,
      "learning_rate": 1.4445444099717708e-06,
      "loss": 3.0168,
      "step": 223203
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1395256519317627,
      "learning_rate": 1.4441434977226718e-06,
      "loss": 2.8963,
      "step": 223204
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0693156719207764,
      "learning_rate": 1.4437426409807718e-06,
      "loss": 3.0196,
      "step": 223205
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.5320324897766113,
      "learning_rate": 1.443341839746104e-06,
      "loss": 2.9506,
      "step": 223206
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.936431646347046,
      "learning_rate": 1.4429410940188014e-06,
      "loss": 3.0578,
      "step": 223207
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8995449542999268,
      "learning_rate": 1.442540403798831e-06,
      "loss": 3.0722,
      "step": 223208
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.678926944732666,
      "learning_rate": 1.442139769086359e-06,
      "loss": 2.9418,
      "step": 223209
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.671549081802368,
      "learning_rate": 1.441739189881419e-06,
      "loss": 2.8273,
      "step": 223210
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8436927795410156,
      "learning_rate": 1.4413386661840776e-06,
      "loss": 2.8176,
      "step": 223211
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.643709182739258,
      "learning_rate": 1.4409381979944345e-06,
      "loss": 2.7476,
      "step": 223212
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7671587467193604,
      "learning_rate": 1.4405377853125899e-06,
      "loss": 2.9691,
      "step": 223213
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7509679794311523,
      "learning_rate": 1.440137428138577e-06,
      "loss": 2.8457,
      "step": 223214
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1348509788513184,
      "learning_rate": 1.4397371264724622e-06,
      "loss": 2.8539,
      "step": 223215
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2254855632781982,
      "learning_rate": 1.4393368803143124e-06,
      "loss": 2.7931,
      "step": 223216
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.303556442260742,
      "learning_rate": 1.4389366896642607e-06,
      "loss": 2.9138,
      "step": 223217
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.5770092010498047,
      "learning_rate": 1.4385365545223403e-06,
      "loss": 2.7071,
      "step": 223218
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.4358582496643066,
      "learning_rate": 1.4381364748886182e-06,
      "loss": 2.79,
      "step": 223219
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.340425729751587,
      "learning_rate": 1.437736450763194e-06,
      "loss": 2.8444,
      "step": 223220
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.8569531440734863,
      "learning_rate": 1.4373364821461341e-06,
      "loss": 2.6994,
      "step": 223221
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.458076238632202,
      "learning_rate": 1.4369365690375057e-06,
      "loss": 2.8119,
      "step": 223222
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8846640586853027,
      "learning_rate": 1.436536711437375e-06,
      "loss": 3.1337,
      "step": 223223
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.5686850547790527,
      "learning_rate": 1.436136909345842e-06,
      "loss": 2.8697,
      "step": 223224
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.838754892349243,
      "learning_rate": 1.4357371627629732e-06,
      "loss": 2.6995,
      "step": 223225
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2798211574554443,
      "learning_rate": 1.4353374716888355e-06,
      "loss": 2.8176,
      "step": 223226
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.92275333404541,
      "learning_rate": 1.434937836123462e-06,
      "loss": 3.0754,
      "step": 223227
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3521299362182617,
      "learning_rate": 1.4345382560670194e-06,
      "loss": 2.9842,
      "step": 223228
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.684584617614746,
      "learning_rate": 1.4341387315195075e-06,
      "loss": 3.018,
      "step": 223229
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.711915969848633,
      "learning_rate": 1.4337392624810262e-06,
      "loss": 2.8409,
      "step": 223230
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9735190868377686,
      "learning_rate": 1.4333398489516756e-06,
      "loss": 2.7442,
      "step": 223231
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.026474952697754,
      "learning_rate": 1.432940490931489e-06,
      "loss": 3.0385,
      "step": 223232
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1343910694122314,
      "learning_rate": 1.432541188420533e-06,
      "loss": 2.8726,
      "step": 223233
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.334306716918945,
      "learning_rate": 1.4321419414189074e-06,
      "loss": 3.1288,
      "step": 223234
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.5678627490997314,
      "learning_rate": 1.431742749926712e-06,
      "loss": 3.1085,
      "step": 223235
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7894232273101807,
      "learning_rate": 1.4313436139439804e-06,
      "loss": 3.0459,
      "step": 223236
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.800335645675659,
      "learning_rate": 1.4309445334707792e-06,
      "loss": 2.9901,
      "step": 223237
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3519186973571777,
      "learning_rate": 1.4305455085072082e-06,
      "loss": 2.8589,
      "step": 223238
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7723636627197266,
      "learning_rate": 1.430146539053334e-06,
      "loss": 2.8641,
      "step": 223239
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.604947328567505,
      "learning_rate": 1.4297476251092566e-06,
      "loss": 3.0993,
      "step": 223240
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.064664840698242,
      "learning_rate": 1.4293487666749759e-06,
      "loss": 3.0842,
      "step": 223241
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.752875804901123,
      "learning_rate": 1.4289499637506585e-06,
      "loss": 2.8999,
      "step": 223242
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.404512882232666,
      "learning_rate": 1.4285512163363378e-06,
      "loss": 3.0848,
      "step": 223243
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.781792163848877,
      "learning_rate": 1.42815252443208e-06,
      "loss": 2.8683,
      "step": 223244
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0175845623016357,
      "learning_rate": 1.427753888037919e-06,
      "loss": 2.8472,
      "step": 223245
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.5067951679229736,
      "learning_rate": 1.4273553071540211e-06,
      "loss": 2.8973,
      "step": 223246
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9165239334106445,
      "learning_rate": 1.4269567817803862e-06,
      "loss": 3.0502,
      "step": 223247
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.062481164932251,
      "learning_rate": 1.4265583119171475e-06,
      "loss": 2.9427,
      "step": 223248
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.580714464187622,
      "learning_rate": 1.4261598975643386e-06,
      "loss": 2.9366,
      "step": 223249
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9104769229888916,
      "learning_rate": 1.4257615387220255e-06,
      "loss": 2.8291,
      "step": 223250
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.347282648086548,
      "learning_rate": 1.4253632353903088e-06,
      "loss": 2.7915,
      "step": 223251
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.972111225128174,
      "learning_rate": 1.424964987569255e-06,
      "loss": 3.0691,
      "step": 223252
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8257806301116943,
      "learning_rate": 1.4245667952589302e-06,
      "loss": 2.942,
      "step": 223253
    },
    {
      "epoch": 2.91,
      "grad_norm": 6.192998886108398,
      "learning_rate": 1.4241686584594348e-06,
      "loss": 3.2144,
      "step": 223254
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.757936954498291,
      "learning_rate": 1.423770577170802e-06,
      "loss": 3.0533,
      "step": 223255
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9147634506225586,
      "learning_rate": 1.4233725513931316e-06,
      "loss": 3.0895,
      "step": 223256
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.8530843257904053,
      "learning_rate": 1.4229745811264904e-06,
      "loss": 2.8931,
      "step": 223257
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.55259370803833,
      "learning_rate": 1.4225766663709782e-06,
      "loss": 2.9532,
      "step": 223258
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.8423547744750977,
      "learning_rate": 1.422178807126595e-06,
      "loss": 3.1485,
      "step": 223259
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.185317039489746,
      "learning_rate": 1.4217810033935073e-06,
      "loss": 2.6064,
      "step": 223260
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3047590255737305,
      "learning_rate": 1.4213832551717152e-06,
      "loss": 3.0735,
      "step": 223261
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1851184368133545,
      "learning_rate": 1.420985562461352e-06,
      "loss": 2.941,
      "step": 223262
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.705148458480835,
      "learning_rate": 1.4205879252624508e-06,
      "loss": 3.0903,
      "step": 223263
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.203537702560425,
      "learning_rate": 1.4201903435751116e-06,
      "loss": 2.8521,
      "step": 223264
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.872615098953247,
      "learning_rate": 1.4197928173993678e-06,
      "loss": 3.0683,
      "step": 223265
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8150994777679443,
      "learning_rate": 1.4193953467353525e-06,
      "loss": 3.0752,
      "step": 223266
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4289743900299072,
      "learning_rate": 1.4189979315830658e-06,
      "loss": 2.9076,
      "step": 223267
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9376413822174072,
      "learning_rate": 1.418600571942674e-06,
      "loss": 3.0828,
      "step": 223268
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7284469604492188,
      "learning_rate": 1.4182032678141775e-06,
      "loss": 2.8062,
      "step": 223269
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.175151348114014,
      "learning_rate": 1.4178060191976758e-06,
      "loss": 2.8384,
      "step": 223270
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9785382747650146,
      "learning_rate": 1.417408826093236e-06,
      "loss": 2.8889,
      "step": 223271
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.114262342453003,
      "learning_rate": 1.4170116885009243e-06,
      "loss": 2.7517,
      "step": 223272
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.127505302429199,
      "learning_rate": 1.4166146064208407e-06,
      "loss": 2.7562,
      "step": 223273
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.7023816108703613,
      "learning_rate": 1.4162175798530518e-06,
      "loss": 2.8055,
      "step": 223274
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.012148380279541,
      "learning_rate": 1.4158206087976243e-06,
      "loss": 2.9509,
      "step": 223275
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2109293937683105,
      "learning_rate": 1.4154236932546248e-06,
      "loss": 2.8197,
      "step": 223276
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6883938312530518,
      "learning_rate": 1.4150268332241199e-06,
      "loss": 2.9659,
      "step": 223277
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.5989134311676025,
      "learning_rate": 1.4146300287062095e-06,
      "loss": 3.0625,
      "step": 223278
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.699099063873291,
      "learning_rate": 1.414233279700927e-06,
      "loss": 2.816,
      "step": 223279
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.9498205184936523,
      "learning_rate": 1.4138365862084388e-06,
      "loss": 2.8551,
      "step": 223280
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8183717727661133,
      "learning_rate": 1.4134399482287118e-06,
      "loss": 2.9069,
      "step": 223281
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.593482255935669,
      "learning_rate": 1.413043365761879e-06,
      "loss": 2.941,
      "step": 223282
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.6469221115112305,
      "learning_rate": 1.4126468388079736e-06,
      "loss": 2.7299,
      "step": 223283
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.670956611633301,
      "learning_rate": 1.4122503673670959e-06,
      "loss": 2.7576,
      "step": 223284
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.593242883682251,
      "learning_rate": 1.4118539514393457e-06,
      "loss": 2.7375,
      "step": 223285
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.315072536468506,
      "learning_rate": 1.4114575910247562e-06,
      "loss": 3.0728,
      "step": 223286
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.6571204662323,
      "learning_rate": 1.411061286123394e-06,
      "loss": 3.051,
      "step": 223287
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8098175525665283,
      "learning_rate": 1.4106650367353922e-06,
      "loss": 2.8924,
      "step": 223288
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.938896656036377,
      "learning_rate": 1.4102688428607512e-06,
      "loss": 2.7786,
      "step": 223289
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0332958698272705,
      "learning_rate": 1.4098727044995706e-06,
      "loss": 2.8649,
      "step": 223290
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6198158264160156,
      "learning_rate": 1.4094766216519505e-06,
      "loss": 3.0728,
      "step": 223291
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.024784803390503,
      "learning_rate": 1.4090805943179572e-06,
      "loss": 2.7202,
      "step": 223292
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6941566467285156,
      "learning_rate": 1.4086846224976245e-06,
      "loss": 2.8174,
      "step": 223293
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8729968070983887,
      "learning_rate": 1.4082887061910852e-06,
      "loss": 2.7113,
      "step": 223294
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.181220769882202,
      "learning_rate": 1.4078928453983396e-06,
      "loss": 2.7891,
      "step": 223295
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9728760719299316,
      "learning_rate": 1.4074970401195207e-06,
      "loss": 2.93,
      "step": 223296
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.767521858215332,
      "learning_rate": 1.4071012903546951e-06,
      "loss": 2.7147,
      "step": 223297
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.457810163497925,
      "learning_rate": 1.4067055961039297e-06,
      "loss": 2.6599,
      "step": 223298
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.201978921890259,
      "learning_rate": 1.4063099573672576e-06,
      "loss": 3.0967,
      "step": 223299
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2410809993743896,
      "learning_rate": 1.4059143741448454e-06,
      "loss": 2.7597,
      "step": 223300
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8442609310150146,
      "learning_rate": 1.4055188464366596e-06,
      "loss": 2.9997,
      "step": 223301
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.398890495300293,
      "learning_rate": 1.4051233742428335e-06,
      "loss": 2.8788,
      "step": 223302
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.033377170562744,
      "learning_rate": 1.404727957563434e-06,
      "loss": 2.7999,
      "step": 223303
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7275452613830566,
      "learning_rate": 1.4043325963985607e-06,
      "loss": 2.8622,
      "step": 223304
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.6869313716888428,
      "learning_rate": 1.403937290748247e-06,
      "loss": 2.9715,
      "step": 223305
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.471142053604126,
      "learning_rate": 1.4035420406125596e-06,
      "loss": 2.9121,
      "step": 223306
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7679150104522705,
      "learning_rate": 1.4031468459915983e-06,
      "loss": 2.8489,
      "step": 223307
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8994100093841553,
      "learning_rate": 1.4027517068854299e-06,
      "loss": 3.0821,
      "step": 223308
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7249393463134766,
      "learning_rate": 1.4023566232941208e-06,
      "loss": 2.8887,
      "step": 223309
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3395659923553467,
      "learning_rate": 1.4019615952177377e-06,
      "loss": 2.9183,
      "step": 223310
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.573232412338257,
      "learning_rate": 1.4015666226564138e-06,
      "loss": 2.8578,
      "step": 223311
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1301372051239014,
      "learning_rate": 1.4011717056101157e-06,
      "loss": 2.6464,
      "step": 223312
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6712839603424072,
      "learning_rate": 1.4007768440790102e-06,
      "loss": 2.7191,
      "step": 223313
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.345275640487671,
      "learning_rate": 1.4003820380631302e-06,
      "loss": 2.8712,
      "step": 223314
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.228402853012085,
      "learning_rate": 1.399987287562576e-06,
      "loss": 3.0279,
      "step": 223315
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1794145107269287,
      "learning_rate": 1.399592592577381e-06,
      "loss": 2.6332,
      "step": 223316
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.671578884124756,
      "learning_rate": 1.3991979531076447e-06,
      "loss": 2.8289,
      "step": 223317
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.935917615890503,
      "learning_rate": 1.3988033691534007e-06,
      "loss": 2.8376,
      "step": 223318
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.458710193634033,
      "learning_rate": 1.398408840714782e-06,
      "loss": 2.8929,
      "step": 223319
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6454100608825684,
      "learning_rate": 1.3980143677918554e-06,
      "loss": 2.7048,
      "step": 223320
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3990747928619385,
      "learning_rate": 1.397619950384654e-06,
      "loss": 2.9596,
      "step": 223321
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.980386734008789,
      "learning_rate": 1.3972255884932781e-06,
      "loss": 2.8202,
      "step": 223322
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7422516345977783,
      "learning_rate": 1.396831282117794e-06,
      "loss": 2.6407,
      "step": 223323
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.766002655029297,
      "learning_rate": 1.3964370312582685e-06,
      "loss": 3.0591,
      "step": 223324
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.530996561050415,
      "learning_rate": 1.396042835914768e-06,
      "loss": 2.9597,
      "step": 223325
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.388657569885254,
      "learning_rate": 1.3956486960873925e-06,
      "loss": 2.9659,
      "step": 223326
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.085197687149048,
      "learning_rate": 1.3952546117762087e-06,
      "loss": 2.9167,
      "step": 223327
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1571106910705566,
      "learning_rate": 1.3948605829812831e-06,
      "loss": 3.2496,
      "step": 223328
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.90380859375,
      "learning_rate": 1.3944666097027157e-06,
      "loss": 2.7585,
      "step": 223329
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.164088487625122,
      "learning_rate": 1.3940726919405065e-06,
      "loss": 2.8584,
      "step": 223330
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.668682336807251,
      "learning_rate": 1.3936788296947888e-06,
      "loss": 2.98,
      "step": 223331
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.363978862762451,
      "learning_rate": 1.393285022965629e-06,
      "loss": 2.9253,
      "step": 223332
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.826551914215088,
      "learning_rate": 1.3928912717530938e-06,
      "loss": 2.9559,
      "step": 223333
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.874924898147583,
      "learning_rate": 1.3924975760572833e-06,
      "loss": 2.8234,
      "step": 223334
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.338362693786621,
      "learning_rate": 1.3921039358781972e-06,
      "loss": 3.0947,
      "step": 223335
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8354010581970215,
      "learning_rate": 1.391710351215969e-06,
      "loss": 3.0677,
      "step": 223336
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1178245544433594,
      "learning_rate": 1.3913168220706982e-06,
      "loss": 2.9163,
      "step": 223337
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.121888160705566,
      "learning_rate": 1.390923348442352e-06,
      "loss": 2.9422,
      "step": 223338
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.263385772705078,
      "learning_rate": 1.3905299303311302e-06,
      "loss": 3.0429,
      "step": 223339
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3191583156585693,
      "learning_rate": 1.3901365677369992e-06,
      "loss": 3.0474,
      "step": 223340
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.828123092651367,
      "learning_rate": 1.3897432606601256e-06,
      "loss": 2.9399,
      "step": 223341
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.455854654312134,
      "learning_rate": 1.3893500091004761e-06,
      "loss": 3.1953,
      "step": 223342
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.897123336791992,
      "learning_rate": 1.3889568130582508e-06,
      "loss": 2.9399,
      "step": 223343
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.102851390838623,
      "learning_rate": 1.3885636725333826e-06,
      "loss": 2.7304,
      "step": 223344
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.121676445007324,
      "learning_rate": 1.3881705875260719e-06,
      "loss": 2.9543,
      "step": 223345
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.60674786567688,
      "learning_rate": 1.3877775580363182e-06,
      "loss": 2.9569,
      "step": 223346
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.719001054763794,
      "learning_rate": 1.3873845840642216e-06,
      "loss": 3.0538,
      "step": 223347
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.765023708343506,
      "learning_rate": 1.386991665609849e-06,
      "loss": 2.9619,
      "step": 223348
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.077810764312744,
      "learning_rate": 1.3865988026732666e-06,
      "loss": 2.988,
      "step": 223349
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.8123838901519775,
      "learning_rate": 1.3862059952545412e-06,
      "loss": 2.8385,
      "step": 223350
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9440290927886963,
      "learning_rate": 1.3858132433537728e-06,
      "loss": 2.929,
      "step": 223351
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.321017265319824,
      "learning_rate": 1.3854205469709944e-06,
      "loss": 3.0766,
      "step": 223352
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.8764452934265137,
      "learning_rate": 1.3850279061063396e-06,
      "loss": 2.6641,
      "step": 223353
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.954118490219116,
      "learning_rate": 1.3846353207598083e-06,
      "loss": 2.8979,
      "step": 223354
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8462917804718018,
      "learning_rate": 1.3842427909315334e-06,
      "loss": 2.9803,
      "step": 223355
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4088621139526367,
      "learning_rate": 1.3838503166215487e-06,
      "loss": 2.7572,
      "step": 223356
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.858959674835205,
      "learning_rate": 1.3834578978299538e-06,
      "loss": 3.1676,
      "step": 223357
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.804515838623047,
      "learning_rate": 1.3830655345567821e-06,
      "loss": 2.9958,
      "step": 223358
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8326172828674316,
      "learning_rate": 1.3826732268022001e-06,
      "loss": 3.0561,
      "step": 223359
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9668805599212646,
      "learning_rate": 1.3822809745661413e-06,
      "loss": 3.1111,
      "step": 223360
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.995981454849243,
      "learning_rate": 1.3818887778488053e-06,
      "loss": 2.7503,
      "step": 223361
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9829816818237305,
      "learning_rate": 1.3814966366501923e-06,
      "loss": 2.953,
      "step": 223362
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.916022539138794,
      "learning_rate": 1.3811045509704022e-06,
      "loss": 2.8214,
      "step": 223363
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.5758144855499268,
      "learning_rate": 1.3807125208095016e-06,
      "loss": 3.1252,
      "step": 223364
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.760319232940674,
      "learning_rate": 1.380320546167557e-06,
      "loss": 2.9739,
      "step": 223365
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.014120101928711,
      "learning_rate": 1.3799286270446686e-06,
      "loss": 3.0557,
      "step": 223366
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.827723979949951,
      "learning_rate": 1.3795367634408694e-06,
      "loss": 2.74,
      "step": 223367
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.634185314178467,
      "learning_rate": 1.3791449553562595e-06,
      "loss": 2.8873,
      "step": 223368
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1864583492279053,
      "learning_rate": 1.378753202790872e-06,
      "loss": 2.9718,
      "step": 223369
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.660331964492798,
      "learning_rate": 1.3783615057448406e-06,
      "loss": 2.7879,
      "step": 223370
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9111244678497314,
      "learning_rate": 1.3779698642182314e-06,
      "loss": 2.8774,
      "step": 223371
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.3548054695129395,
      "learning_rate": 1.3775782782110445e-06,
      "loss": 3.016,
      "step": 223372
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8776142597198486,
      "learning_rate": 1.3771867477234466e-06,
      "loss": 2.8901,
      "step": 223373
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4332451820373535,
      "learning_rate": 1.3767952727554377e-06,
      "loss": 3.0027,
      "step": 223374
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2421419620513916,
      "learning_rate": 1.3764038533071176e-06,
      "loss": 3.0753,
      "step": 223375
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9128568172454834,
      "learning_rate": 1.3760124893785862e-06,
      "loss": 2.838,
      "step": 223376
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0709972381591797,
      "learning_rate": 1.375621180969877e-06,
      "loss": 2.7927,
      "step": 223377
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.7605860233306885,
      "learning_rate": 1.3752299280810898e-06,
      "loss": 2.9136,
      "step": 223378
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.148930788040161,
      "learning_rate": 1.3748387307122578e-06,
      "loss": 2.9085,
      "step": 223379
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1629281044006348,
      "learning_rate": 1.374447588863481e-06,
      "loss": 2.7482,
      "step": 223380
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9757494926452637,
      "learning_rate": 1.3740565025348592e-06,
      "loss": 2.7716,
      "step": 223381
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8287930488586426,
      "learning_rate": 1.3736654717264261e-06,
      "loss": 2.9832,
      "step": 223382
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.8281242847442627,
      "learning_rate": 1.3732744964382481e-06,
      "loss": 2.9486,
      "step": 223383
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9685723781585693,
      "learning_rate": 1.3728835766704248e-06,
      "loss": 2.9565,
      "step": 223384
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9792892932891846,
      "learning_rate": 1.3724927124230233e-06,
      "loss": 2.798,
      "step": 223385
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4386816024780273,
      "learning_rate": 1.37210190369611e-06,
      "loss": 2.9315,
      "step": 223386
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2151970863342285,
      "learning_rate": 1.3717111504897849e-06,
      "loss": 2.8427,
      "step": 223387
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1407101154327393,
      "learning_rate": 1.3713204528040477e-06,
      "loss": 2.7736,
      "step": 223388
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.6038942337036133,
      "learning_rate": 1.3709298106390654e-06,
      "loss": 2.8967,
      "step": 223389
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8535208702087402,
      "learning_rate": 1.3705392239948376e-06,
      "loss": 2.951,
      "step": 223390
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.153714656829834,
      "learning_rate": 1.3701486928714644e-06,
      "loss": 2.9245,
      "step": 223391
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.449362277984619,
      "learning_rate": 1.3697582172690458e-06,
      "loss": 3.1175,
      "step": 223392
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8652241230010986,
      "learning_rate": 1.3693677971875815e-06,
      "loss": 2.6928,
      "step": 223393
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.564493417739868,
      "learning_rate": 1.3689774326272053e-06,
      "loss": 2.8894,
      "step": 223394
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.664403200149536,
      "learning_rate": 1.368587123587983e-06,
      "loss": 2.8937,
      "step": 223395
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1080169677734375,
      "learning_rate": 1.3681968700699818e-06,
      "loss": 2.6715,
      "step": 223396
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3264660835266113,
      "learning_rate": 1.3678066720732683e-06,
      "loss": 2.8164,
      "step": 223397
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.067197322845459,
      "learning_rate": 1.3674165295979089e-06,
      "loss": 2.9106,
      "step": 223398
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6787078380584717,
      "learning_rate": 1.3670264426439703e-06,
      "loss": 2.8906,
      "step": 223399
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.5120697021484375,
      "learning_rate": 1.3666364112115524e-06,
      "loss": 2.9709,
      "step": 223400
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.812426805496216,
      "learning_rate": 1.3662464353007218e-06,
      "loss": 2.794,
      "step": 223401
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9876434803009033,
      "learning_rate": 1.3658565149115452e-06,
      "loss": 2.8993,
      "step": 223402
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8914997577667236,
      "learning_rate": 1.3654666500440892e-06,
      "loss": 2.8702,
      "step": 223403
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2556259632110596,
      "learning_rate": 1.3650768406984203e-06,
      "loss": 2.8827,
      "step": 223404
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8547022342681885,
      "learning_rate": 1.3646870868746384e-06,
      "loss": 3.045,
      "step": 223405
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8571665287017822,
      "learning_rate": 1.364297388572777e-06,
      "loss": 2.6119,
      "step": 223406
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.299333333969116,
      "learning_rate": 1.363907745792936e-06,
      "loss": 2.8957,
      "step": 223407
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8547966480255127,
      "learning_rate": 1.3635181585351818e-06,
      "loss": 3.334,
      "step": 223408
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.8402259349823,
      "learning_rate": 1.363128626799581e-06,
      "loss": 3.1078,
      "step": 223409
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.638519763946533,
      "learning_rate": 1.3627391505862339e-06,
      "loss": 2.9955,
      "step": 223410
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.908726215362549,
      "learning_rate": 1.3623497298951403e-06,
      "loss": 3.1516,
      "step": 223411
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1236491203308105,
      "learning_rate": 1.3619603647264665e-06,
      "loss": 2.796,
      "step": 223412
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.769934892654419,
      "learning_rate": 1.3615710550802462e-06,
      "loss": 2.921,
      "step": 223413
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4908761978149414,
      "learning_rate": 1.3611818009565457e-06,
      "loss": 3.0225,
      "step": 223414
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9394941329956055,
      "learning_rate": 1.3607926023554316e-06,
      "loss": 2.7662,
      "step": 223415
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9502596855163574,
      "learning_rate": 1.3604034592769708e-06,
      "loss": 2.5957,
      "step": 223416
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8343944549560547,
      "learning_rate": 1.360014371721263e-06,
      "loss": 2.8748,
      "step": 223417
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.26558780670166,
      "learning_rate": 1.3596253396883417e-06,
      "loss": 2.9612,
      "step": 223418
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6573705673217773,
      "learning_rate": 1.3592363631783397e-06,
      "loss": 3.0607,
      "step": 223419
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1739959716796875,
      "learning_rate": 1.358847442191291e-06,
      "loss": 3.0285,
      "step": 223420
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.6079299449920654,
      "learning_rate": 1.3584585767272283e-06,
      "loss": 3.0577,
      "step": 223421
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6557931900024414,
      "learning_rate": 1.358069766786285e-06,
      "loss": 3.0038,
      "step": 223422
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.7687182426452637,
      "learning_rate": 1.3576810123685278e-06,
      "loss": 2.9492,
      "step": 223423
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7338263988494873,
      "learning_rate": 1.3572923134740232e-06,
      "loss": 3.0206,
      "step": 223424
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.903895378112793,
      "learning_rate": 1.3569036701028047e-06,
      "loss": 2.7304,
      "step": 223425
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.04190731048584,
      "learning_rate": 1.3565150822550053e-06,
      "loss": 2.9064,
      "step": 223426
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.16375994682312,
      "learning_rate": 1.356126549930625e-06,
      "loss": 3.0048,
      "step": 223427
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.314032793045044,
      "learning_rate": 1.3557380731298306e-06,
      "loss": 3.0554,
      "step": 223428
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9943201541900635,
      "learning_rate": 1.3553496518525886e-06,
      "loss": 2.901,
      "step": 223429
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6754753589630127,
      "learning_rate": 1.3549612860990655e-06,
      "loss": 2.6744,
      "step": 223430
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6703667640686035,
      "learning_rate": 1.3545729758692615e-06,
      "loss": 2.9285,
      "step": 223431
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.883979320526123,
      "learning_rate": 1.3541847211633095e-06,
      "loss": 3.1153,
      "step": 223432
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.5118377208709717,
      "learning_rate": 1.353796521981243e-06,
      "loss": 3.0464,
      "step": 223433
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9060416221618652,
      "learning_rate": 1.3534083783231286e-06,
      "loss": 2.7829,
      "step": 223434
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6788792610168457,
      "learning_rate": 1.3530202901890664e-06,
      "loss": 2.7296,
      "step": 223435
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0075130462646484,
      "learning_rate": 1.3526322575791226e-06,
      "loss": 2.7719,
      "step": 223436
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1509857177734375,
      "learning_rate": 1.3522442804933309e-06,
      "loss": 2.8796,
      "step": 223437
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1357815265655518,
      "learning_rate": 1.3518563589318576e-06,
      "loss": 3.0904,
      "step": 223438
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7371938228607178,
      "learning_rate": 1.3514684928946361e-06,
      "loss": 3.0317,
      "step": 223439
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9071602821350098,
      "learning_rate": 1.3510806823818665e-06,
      "loss": 2.8767,
      "step": 223440
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.787834882736206,
      "learning_rate": 1.3506929273935486e-06,
      "loss": 2.8152,
      "step": 223441
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9462599754333496,
      "learning_rate": 1.3503052279297822e-06,
      "loss": 3.0066,
      "step": 223442
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.435225486755371,
      "learning_rate": 1.3499175839906007e-06,
      "loss": 2.8911,
      "step": 223443
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9921326637268066,
      "learning_rate": 1.3495299955761374e-06,
      "loss": 2.966,
      "step": 223444
    },
    {
      "epoch": 2.91,
      "grad_norm": 5.00026273727417,
      "learning_rate": 1.3491424626864256e-06,
      "loss": 2.8964,
      "step": 223445
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9370670318603516,
      "learning_rate": 1.348754985321565e-06,
      "loss": 2.6973,
      "step": 223446
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.67722225189209,
      "learning_rate": 1.348367563481556e-06,
      "loss": 2.941,
      "step": 223447
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7723615169525146,
      "learning_rate": 1.347980197166565e-06,
      "loss": 2.8506,
      "step": 223448
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.764373540878296,
      "learning_rate": 1.347592886376625e-06,
      "loss": 2.8312,
      "step": 223449
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.368892192840576,
      "learning_rate": 1.3472056311118028e-06,
      "loss": 2.6705,
      "step": 223450
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.6059811115264893,
      "learning_rate": 1.3468184313721653e-06,
      "loss": 3.1209,
      "step": 223451
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8766074180603027,
      "learning_rate": 1.3464312871577788e-06,
      "loss": 2.9023,
      "step": 223452
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.77880859375,
      "learning_rate": 1.346044198468743e-06,
      "loss": 3.088,
      "step": 223453
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.20869517326355,
      "learning_rate": 1.3456571653051252e-06,
      "loss": 3.1114,
      "step": 223454
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6650688648223877,
      "learning_rate": 1.345270187666958e-06,
      "loss": 3.0176,
      "step": 223455
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4033203125,
      "learning_rate": 1.344883265554375e-06,
      "loss": 2.7503,
      "step": 223456
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.360715866088867,
      "learning_rate": 1.3444963989673763e-06,
      "loss": 2.8375,
      "step": 223457
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8995165824890137,
      "learning_rate": 1.344109587906128e-06,
      "loss": 2.8913,
      "step": 223458
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.08058500289917,
      "learning_rate": 1.3437228323705972e-06,
      "loss": 2.8288,
      "step": 223459
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.234837055206299,
      "learning_rate": 1.343336132360917e-06,
      "loss": 2.8248,
      "step": 223460
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8126132488250732,
      "learning_rate": 1.342949487877154e-06,
      "loss": 3.1127,
      "step": 223461
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8852884769439697,
      "learning_rate": 1.3425628989194083e-06,
      "loss": 3.0762,
      "step": 223462
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.132103443145752,
      "learning_rate": 1.3421763654876794e-06,
      "loss": 3.033,
      "step": 223463
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7074501514434814,
      "learning_rate": 1.3417898875820676e-06,
      "loss": 2.9638,
      "step": 223464
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8581011295318604,
      "learning_rate": 1.341403465202673e-06,
      "loss": 2.8911,
      "step": 223465
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9919488430023193,
      "learning_rate": 1.3410170983495615e-06,
      "loss": 2.8098,
      "step": 223466
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8215482234954834,
      "learning_rate": 1.3406307870227673e-06,
      "loss": 2.7606,
      "step": 223467
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.249722719192505,
      "learning_rate": 1.3402445312223897e-06,
      "loss": 2.9483,
      "step": 223468
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8269200325012207,
      "learning_rate": 1.3398583309484955e-06,
      "loss": 3.1487,
      "step": 223469
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.264935255050659,
      "learning_rate": 1.3394721862011849e-06,
      "loss": 2.7486,
      "step": 223470
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8308265209198,
      "learning_rate": 1.3390860969804906e-06,
      "loss": 2.9109,
      "step": 223471
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.805826425552368,
      "learning_rate": 1.338700063286513e-06,
      "loss": 2.8015,
      "step": 223472
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.847445249557495,
      "learning_rate": 1.338314085119252e-06,
      "loss": 3.0194,
      "step": 223473
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.194068908691406,
      "learning_rate": 1.3379281624789074e-06,
      "loss": 2.8309,
      "step": 223474
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.8061578273773193,
      "learning_rate": 1.3375422953654457e-06,
      "loss": 2.7957,
      "step": 223475
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1013736724853516,
      "learning_rate": 1.3371564837789672e-06,
      "loss": 2.8047,
      "step": 223476
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9655704498291016,
      "learning_rate": 1.3367707277195717e-06,
      "loss": 2.8616,
      "step": 223477
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3476266860961914,
      "learning_rate": 1.3363850271872921e-06,
      "loss": 2.7647,
      "step": 223478
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.078892469406128,
      "learning_rate": 1.3359993821821958e-06,
      "loss": 2.8687,
      "step": 223479
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.649527072906494,
      "learning_rate": 1.3356137927044153e-06,
      "loss": 2.96,
      "step": 223480
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.990168333053589,
      "learning_rate": 1.335228258753951e-06,
      "loss": 2.8207,
      "step": 223481
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.86531925201416,
      "learning_rate": 1.3348427803309359e-06,
      "loss": 2.9836,
      "step": 223482
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9400784969329834,
      "learning_rate": 1.3344573574353702e-06,
      "loss": 3.0417,
      "step": 223483
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3986656665802,
      "learning_rate": 1.3340719900674201e-06,
      "loss": 2.883,
      "step": 223484
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.5612428188323975,
      "learning_rate": 1.3336866782270527e-06,
      "loss": 3.1513,
      "step": 223485
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6588950157165527,
      "learning_rate": 1.3333014219144345e-06,
      "loss": 2.961,
      "step": 223486
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.817612886428833,
      "learning_rate": 1.3329162211295652e-06,
      "loss": 2.9266,
      "step": 223487
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7711591720581055,
      "learning_rate": 1.3325310758725449e-06,
      "loss": 2.8651,
      "step": 223488
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.5456292629241943,
      "learning_rate": 1.3321459861434402e-06,
      "loss": 2.9003,
      "step": 223489
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4248368740081787,
      "learning_rate": 1.331760951942351e-06,
      "loss": 2.9495,
      "step": 223490
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.175534963607788,
      "learning_rate": 1.3313759732693108e-06,
      "loss": 2.8459,
      "step": 223491
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9569451808929443,
      "learning_rate": 1.330991050124386e-06,
      "loss": 2.921,
      "step": 223492
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.201894760131836,
      "learning_rate": 1.3306061825077096e-06,
      "loss": 2.7952,
      "step": 223493
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4090962409973145,
      "learning_rate": 1.3302213704192822e-06,
      "loss": 2.7833,
      "step": 223494
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9994635581970215,
      "learning_rate": 1.3298366138592032e-06,
      "loss": 2.812,
      "step": 223495
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.953123092651367,
      "learning_rate": 1.3294519128275728e-06,
      "loss": 3.2076,
      "step": 223496
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0189826488494873,
      "learning_rate": 1.3290672673243908e-06,
      "loss": 2.9274,
      "step": 223497
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2397429943084717,
      "learning_rate": 1.3286826773498238e-06,
      "loss": 2.8671,
      "step": 223498
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0428972244262695,
      "learning_rate": 1.328298142903872e-06,
      "loss": 2.8675,
      "step": 223499
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1416523456573486,
      "learning_rate": 1.3279136639866017e-06,
      "loss": 2.8998,
      "step": 223500
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.254420042037964,
      "learning_rate": 1.3275292405981463e-06,
      "loss": 2.8022,
      "step": 223501
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1352975368499756,
      "learning_rate": 1.327144872738506e-06,
      "loss": 3.0194,
      "step": 223502
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.7229299545288086,
      "learning_rate": 1.3267605604078136e-06,
      "loss": 2.8136,
      "step": 223503
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.5338900089263916,
      "learning_rate": 1.3263763036061025e-06,
      "loss": 2.9306,
      "step": 223504
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4590446949005127,
      "learning_rate": 1.3259921023334729e-06,
      "loss": 2.6851,
      "step": 223505
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.880744457244873,
      "learning_rate": 1.325607956589958e-06,
      "loss": 2.7386,
      "step": 223506
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.8926162719726562,
      "learning_rate": 1.3252238663756576e-06,
      "loss": 2.8543,
      "step": 223507
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9929845333099365,
      "learning_rate": 1.3248398316906384e-06,
      "loss": 2.9289,
      "step": 223508
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.5738365650177,
      "learning_rate": 1.3244558525349669e-06,
      "loss": 2.8147,
      "step": 223509
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0927608013153076,
      "learning_rate": 1.324071928908743e-06,
      "loss": 2.9299,
      "step": 223510
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2593891620635986,
      "learning_rate": 1.3236880608120003e-06,
      "loss": 2.8633,
      "step": 223511
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9962093830108643,
      "learning_rate": 1.3233042482448053e-06,
      "loss": 3.0402,
      "step": 223512
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.738323211669922,
      "learning_rate": 1.3229204912072577e-06,
      "loss": 2.8943,
      "step": 223513
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2805259227752686,
      "learning_rate": 1.322536789699391e-06,
      "loss": 3.0718,
      "step": 223514
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.663567304611206,
      "learning_rate": 1.3221531437213383e-06,
      "loss": 3.103,
      "step": 223515
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.634220600128174,
      "learning_rate": 1.3217695532731e-06,
      "loss": 2.9014,
      "step": 223516
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9619054794311523,
      "learning_rate": 1.3213860183548086e-06,
      "loss": 2.8107,
      "step": 223517
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8080132007598877,
      "learning_rate": 1.321002538966498e-06,
      "loss": 2.9615,
      "step": 223518
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.801074981689453,
      "learning_rate": 1.3206191151082679e-06,
      "loss": 2.9823,
      "step": 223519
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.632918357849121,
      "learning_rate": 1.3202357467801516e-06,
      "loss": 2.7443,
      "step": 223520
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8175570964813232,
      "learning_rate": 1.3198524339822493e-06,
      "loss": 2.7161,
      "step": 223521
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2616536617279053,
      "learning_rate": 1.3194691767146271e-06,
      "loss": 2.8794,
      "step": 223522
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.686800003051758,
      "learning_rate": 1.3190859749773519e-06,
      "loss": 2.9466,
      "step": 223523
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.558218240737915,
      "learning_rate": 1.3187028287704905e-06,
      "loss": 3.0277,
      "step": 223524
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.929448127746582,
      "learning_rate": 1.3183197380941424e-06,
      "loss": 2.7215,
      "step": 223525
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1795151233673096,
      "learning_rate": 1.3179367029483411e-06,
      "loss": 2.634,
      "step": 223526
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.765045404434204,
      "learning_rate": 1.3175537233331868e-06,
      "loss": 2.833,
      "step": 223527
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.690899610519409,
      "learning_rate": 1.3171707992487123e-06,
      "loss": 3.04,
      "step": 223528
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8512799739837646,
      "learning_rate": 1.316787930695018e-06,
      "loss": 3.0763,
      "step": 223529
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6318817138671875,
      "learning_rate": 1.3164051176721702e-06,
      "loss": 3.1337,
      "step": 223530
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4497623443603516,
      "learning_rate": 1.316022360180269e-06,
      "loss": 2.9533,
      "step": 223531
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.722862720489502,
      "learning_rate": 1.315639658219314e-06,
      "loss": 2.6904,
      "step": 223532
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1380093097686768,
      "learning_rate": 1.3152570117894723e-06,
      "loss": 2.9287,
      "step": 223533
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8052055835723877,
      "learning_rate": 1.3148744208907103e-06,
      "loss": 2.9859,
      "step": 223534
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2899062633514404,
      "learning_rate": 1.3144918855231944e-06,
      "loss": 2.8365,
      "step": 223535
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8875575065612793,
      "learning_rate": 1.314109405686925e-06,
      "loss": 2.7904,
      "step": 223536
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9092252254486084,
      "learning_rate": 1.3137269813820016e-06,
      "loss": 2.8587,
      "step": 223537
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9873154163360596,
      "learning_rate": 1.3133446126085246e-06,
      "loss": 2.9412,
      "step": 223538
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9899113178253174,
      "learning_rate": 1.3129622993664935e-06,
      "loss": 2.7465,
      "step": 223539
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.230748176574707,
      "learning_rate": 1.3125800416560417e-06,
      "loss": 2.9112,
      "step": 223540
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.867764472961426,
      "learning_rate": 1.3121978394772025e-06,
      "loss": 2.993,
      "step": 223541
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2297372817993164,
      "learning_rate": 1.3118156928301093e-06,
      "loss": 2.8538,
      "step": 223542
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0192642211914062,
      "learning_rate": 1.311433601714762e-06,
      "loss": 2.9921,
      "step": 223543
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.918797731399536,
      "learning_rate": 1.311051566131227e-06,
      "loss": 2.7038,
      "step": 223544
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.58158016204834,
      "learning_rate": 1.3106695860796378e-06,
      "loss": 2.9318,
      "step": 223545
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8278956413269043,
      "learning_rate": 1.3102876615600277e-06,
      "loss": 2.8213,
      "step": 223546
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.755988359451294,
      "learning_rate": 1.3099057925724632e-06,
      "loss": 3.0741,
      "step": 223547
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9820139408111572,
      "learning_rate": 1.3095239791170443e-06,
      "loss": 2.9139,
      "step": 223548
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.91745662689209,
      "learning_rate": 1.3091422211938041e-06,
      "loss": 2.9813,
      "step": 223549
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9237232208251953,
      "learning_rate": 1.3087605188028428e-06,
      "loss": 2.9143,
      "step": 223550
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.780818223953247,
      "learning_rate": 1.3083788719442267e-06,
      "loss": 2.6493,
      "step": 223551
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.84785795211792,
      "learning_rate": 1.3079972806180229e-06,
      "loss": 2.9685,
      "step": 223552
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.466351509094238,
      "learning_rate": 1.3076157448242642e-06,
      "loss": 2.7743,
      "step": 223553
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.984534978866577,
      "learning_rate": 1.3072342645631174e-06,
      "loss": 2.7873,
      "step": 223554
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.786975383758545,
      "learning_rate": 1.306852839834549e-06,
      "loss": 3.1499,
      "step": 223555
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9980547428131104,
      "learning_rate": 1.3064714706386924e-06,
      "loss": 2.8013,
      "step": 223556
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.789731025695801,
      "learning_rate": 1.3060901569755812e-06,
      "loss": 2.9875,
      "step": 223557
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0178749561309814,
      "learning_rate": 1.3057088988453147e-06,
      "loss": 2.9982,
      "step": 223558
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1433959007263184,
      "learning_rate": 1.3053276962479932e-06,
      "loss": 2.9087,
      "step": 223559
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9326343536376953,
      "learning_rate": 1.3049465491836164e-06,
      "loss": 3.1563,
      "step": 223560
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9894843101501465,
      "learning_rate": 1.3045654576522845e-06,
      "loss": 2.968,
      "step": 223561
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3661673069000244,
      "learning_rate": 1.3041844216540643e-06,
      "loss": 3.0511,
      "step": 223562
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.067021608352661,
      "learning_rate": 1.3038034411890552e-06,
      "loss": 2.9269,
      "step": 223563
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.990285634994507,
      "learning_rate": 1.303422516257291e-06,
      "loss": 3.0193,
      "step": 223564
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1381750106811523,
      "learning_rate": 1.3030416468588712e-06,
      "loss": 2.8572,
      "step": 223565
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.829535484313965,
      "learning_rate": 1.3026608329938626e-06,
      "loss": 2.8056,
      "step": 223566
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.042728424072266,
      "learning_rate": 1.3022800746623318e-06,
      "loss": 2.7279,
      "step": 223567
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.399477243423462,
      "learning_rate": 1.3018993718643123e-06,
      "loss": 2.9194,
      "step": 223568
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.6554877758026123,
      "learning_rate": 1.301518724599937e-06,
      "loss": 3.0129,
      "step": 223569
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7347450256347656,
      "learning_rate": 1.3011381328692394e-06,
      "loss": 2.9202,
      "step": 223570
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8228611946105957,
      "learning_rate": 1.300757596672286e-06,
      "loss": 3.0866,
      "step": 223571
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.185826063156128,
      "learning_rate": 1.3003771160091768e-06,
      "loss": 3.097,
      "step": 223572
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.741905689239502,
      "learning_rate": 1.2999966908799454e-06,
      "loss": 2.9901,
      "step": 223573
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.663611888885498,
      "learning_rate": 1.2996163212847244e-06,
      "loss": 2.9466,
      "step": 223574
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.952667713165283,
      "learning_rate": 1.2992360072235143e-06,
      "loss": 2.8885,
      "step": 223575
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7501275539398193,
      "learning_rate": 1.2988557486964146e-06,
      "loss": 2.9147,
      "step": 223576
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3570010662078857,
      "learning_rate": 1.2984755457034923e-06,
      "loss": 2.9511,
      "step": 223577
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.341130495071411,
      "learning_rate": 1.2980953982448138e-06,
      "loss": 2.8355,
      "step": 223578
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9452579021453857,
      "learning_rate": 1.2977153063204793e-06,
      "loss": 2.9626,
      "step": 223579
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.624403476715088,
      "learning_rate": 1.2973352699305218e-06,
      "loss": 2.8568,
      "step": 223580
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.8407723903656006,
      "learning_rate": 1.2969552890750411e-06,
      "loss": 2.8057,
      "step": 223581
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.547736167907715,
      "learning_rate": 1.2965753637541042e-06,
      "loss": 2.8673,
      "step": 223582
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8460934162139893,
      "learning_rate": 1.2961954939677443e-06,
      "loss": 3.017,
      "step": 223583
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2556679248809814,
      "learning_rate": 1.2958156797160945e-06,
      "loss": 3.2178,
      "step": 223584
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9881863594055176,
      "learning_rate": 1.295435920999155e-06,
      "loss": 2.7695,
      "step": 223585
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.5780131816864014,
      "learning_rate": 1.2950562178170588e-06,
      "loss": 3.086,
      "step": 223586
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.096226215362549,
      "learning_rate": 1.294676570169806e-06,
      "loss": 2.9218,
      "step": 223587
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8677990436553955,
      "learning_rate": 1.294296978057563e-06,
      "loss": 2.9287,
      "step": 223588
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3212552070617676,
      "learning_rate": 1.29391744148033e-06,
      "loss": 2.7906,
      "step": 223589
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8189377784729004,
      "learning_rate": 1.2935379604382068e-06,
      "loss": 2.9905,
      "step": 223590
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4419972896575928,
      "learning_rate": 1.2931585349312269e-06,
      "loss": 2.8874,
      "step": 223591
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.009139060974121,
      "learning_rate": 1.292779164959523e-06,
      "loss": 2.9693,
      "step": 223592
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.965658664703369,
      "learning_rate": 1.292399850523096e-06,
      "loss": 3.0947,
      "step": 223593
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.5142147541046143,
      "learning_rate": 1.2920205916220783e-06,
      "loss": 3.0212,
      "step": 223594
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.228541374206543,
      "learning_rate": 1.2916413882565034e-06,
      "loss": 2.9591,
      "step": 223595
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.646822690963745,
      "learning_rate": 1.2912622404264384e-06,
      "loss": 2.8195,
      "step": 223596
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.007256507873535,
      "learning_rate": 1.2908831481319826e-06,
      "loss": 2.4943,
      "step": 223597
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1822774410247803,
      "learning_rate": 1.2905041113731696e-06,
      "loss": 2.6986,
      "step": 223598
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1043882369995117,
      "learning_rate": 1.2901251301500992e-06,
      "loss": 3.0796,
      "step": 223599
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.937931537628174,
      "learning_rate": 1.289746204462838e-06,
      "loss": 2.86,
      "step": 223600
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.024679183959961,
      "learning_rate": 1.289367334311453e-06,
      "loss": 2.8619,
      "step": 223601
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0686638355255127,
      "learning_rate": 1.2889885196960104e-06,
      "loss": 2.9198,
      "step": 223602
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1182186603546143,
      "learning_rate": 1.2886097606165767e-06,
      "loss": 2.6858,
      "step": 223603
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.4187378883361816,
      "learning_rate": 1.2882310570732524e-06,
      "loss": 2.9823,
      "step": 223604
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6137852668762207,
      "learning_rate": 1.2878524090660703e-06,
      "loss": 2.8528,
      "step": 223605
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.233214855194092,
      "learning_rate": 1.287473816595097e-06,
      "loss": 2.9866,
      "step": 223606
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8361334800720215,
      "learning_rate": 1.2870952796604327e-06,
      "loss": 2.8731,
      "step": 223607
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1404519081115723,
      "learning_rate": 1.286716798262144e-06,
      "loss": 3.0251,
      "step": 223608
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.720895290374756,
      "learning_rate": 1.2863383724002973e-06,
      "loss": 2.8916,
      "step": 223609
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7740836143493652,
      "learning_rate": 1.2859600020749595e-06,
      "loss": 3.1027,
      "step": 223610
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.741312265396118,
      "learning_rate": 1.2855816872861967e-06,
      "loss": 2.947,
      "step": 223611
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6373982429504395,
      "learning_rate": 1.285203428034076e-06,
      "loss": 3.058,
      "step": 223612
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8756394386291504,
      "learning_rate": 1.2848252243186642e-06,
      "loss": 2.7203,
      "step": 223613
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3515214920043945,
      "learning_rate": 1.2844470761400605e-06,
      "loss": 2.7901,
      "step": 223614
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.320585012435913,
      "learning_rate": 1.2840689834982986e-06,
      "loss": 3.0232,
      "step": 223615
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8256499767303467,
      "learning_rate": 1.2836909463935119e-06,
      "loss": 3.0053,
      "step": 223616
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.077543020248413,
      "learning_rate": 1.2833129648256668e-06,
      "loss": 2.9469,
      "step": 223617
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.6387410163879395,
      "learning_rate": 1.28293503879493e-06,
      "loss": 2.8525,
      "step": 223618
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0307130813598633,
      "learning_rate": 1.2825571683013013e-06,
      "loss": 2.9136,
      "step": 223619
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.934570789337158,
      "learning_rate": 1.2821793533449143e-06,
      "loss": 3.0122,
      "step": 223620
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4805245399475098,
      "learning_rate": 1.281801593925802e-06,
      "loss": 2.9446,
      "step": 223621
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.000609874725342,
      "learning_rate": 1.2814238900440643e-06,
      "loss": 2.8722,
      "step": 223622
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8842012882232666,
      "learning_rate": 1.2810462416997013e-06,
      "loss": 2.7438,
      "step": 223623
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.916560173034668,
      "learning_rate": 1.2806686488928797e-06,
      "loss": 3.0988,
      "step": 223624
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.391416549682617,
      "learning_rate": 1.2802911116235658e-06,
      "loss": 2.931,
      "step": 223625
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.016724109649658,
      "learning_rate": 1.2799136298919266e-06,
      "loss": 2.8304,
      "step": 223626
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.870098114013672,
      "learning_rate": 1.279536203697995e-06,
      "loss": 2.7292,
      "step": 223627
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.5006539821624756,
      "learning_rate": 1.2791588330418045e-06,
      "loss": 3.0359,
      "step": 223628
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7785956859588623,
      "learning_rate": 1.2787815179234551e-06,
      "loss": 2.7803,
      "step": 223629
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2275567054748535,
      "learning_rate": 1.2784042583430465e-06,
      "loss": 2.7328,
      "step": 223630
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0168092250823975,
      "learning_rate": 1.2780270543006122e-06,
      "loss": 2.85,
      "step": 223631
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7636027336120605,
      "learning_rate": 1.2776499057962185e-06,
      "loss": 2.9366,
      "step": 223632
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.237602472305298,
      "learning_rate": 1.2772728128299657e-06,
      "loss": 2.8238,
      "step": 223633
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2015621662139893,
      "learning_rate": 1.2768957754019206e-06,
      "loss": 3.1428,
      "step": 223634
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1825737953186035,
      "learning_rate": 1.2765187935121158e-06,
      "loss": 3.0247,
      "step": 223635
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.52477765083313,
      "learning_rate": 1.2761418671606515e-06,
      "loss": 2.8607,
      "step": 223636
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6135785579681396,
      "learning_rate": 1.2757649963475613e-06,
      "loss": 2.7754,
      "step": 223637
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.281886100769043,
      "learning_rate": 1.2753881810729781e-06,
      "loss": 3.1382,
      "step": 223638
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7889857292175293,
      "learning_rate": 1.2750114213369356e-06,
      "loss": 2.9333,
      "step": 223639
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0495574474334717,
      "learning_rate": 1.2746347171395333e-06,
      "loss": 3.1045,
      "step": 223640
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7287068367004395,
      "learning_rate": 1.2742580684807712e-06,
      "loss": 3.1127,
      "step": 223641
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1333580017089844,
      "learning_rate": 1.273881475360783e-06,
      "loss": 2.9145,
      "step": 223642
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.489067316055298,
      "learning_rate": 1.2735049377796015e-06,
      "loss": 2.7831,
      "step": 223643
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2520759105682373,
      "learning_rate": 1.2731284557373266e-06,
      "loss": 3.0294,
      "step": 223644
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3128833770751953,
      "learning_rate": 1.2727520292340253e-06,
      "loss": 3.3315,
      "step": 223645
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.162721633911133,
      "learning_rate": 1.2723756582697642e-06,
      "loss": 2.7746,
      "step": 223646
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7931771278381348,
      "learning_rate": 1.2719993428445763e-06,
      "loss": 2.8148,
      "step": 223647
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4822278022766113,
      "learning_rate": 1.271623082958595e-06,
      "loss": 3.0637,
      "step": 223648
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0171797275543213,
      "learning_rate": 1.2712468786118202e-06,
      "loss": 2.9234,
      "step": 223649
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.779268264770508,
      "learning_rate": 1.2708707298043853e-06,
      "loss": 2.9459,
      "step": 223650
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.241081953048706,
      "learning_rate": 1.2704946365363567e-06,
      "loss": 2.8951,
      "step": 223651
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.362175941467285,
      "learning_rate": 1.2701185988077677e-06,
      "loss": 2.6749,
      "step": 223652
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.564863920211792,
      "learning_rate": 1.269742616618652e-06,
      "loss": 3.0429,
      "step": 223653
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.293818712234497,
      "learning_rate": 1.2693666899691756e-06,
      "loss": 3.0342,
      "step": 223654
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1219849586486816,
      "learning_rate": 1.2689908188593723e-06,
      "loss": 2.9603,
      "step": 223655
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9218220710754395,
      "learning_rate": 1.268615003289275e-06,
      "loss": 3.0105,
      "step": 223656
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.022226572036743,
      "learning_rate": 1.268239243259017e-06,
      "loss": 2.8637,
      "step": 223657
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8458549976348877,
      "learning_rate": 1.2678635387685986e-06,
      "loss": 2.8646,
      "step": 223658
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2405309677124023,
      "learning_rate": 1.2674878898181196e-06,
      "loss": 3.1301,
      "step": 223659
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8801398277282715,
      "learning_rate": 1.2671122964076795e-06,
      "loss": 2.9371,
      "step": 223660
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.9883289337158203,
      "learning_rate": 1.2667367585373122e-06,
      "loss": 2.8923,
      "step": 223661
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.913003444671631,
      "learning_rate": 1.266361276207084e-06,
      "loss": 2.9779,
      "step": 223662
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1176230907440186,
      "learning_rate": 1.265985849417095e-06,
      "loss": 2.8603,
      "step": 223663
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8408257961273193,
      "learning_rate": 1.265610478167378e-06,
      "loss": 3.0868,
      "step": 223664
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2221744060516357,
      "learning_rate": 1.265235162458067e-06,
      "loss": 3.0334,
      "step": 223665
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.959251642227173,
      "learning_rate": 1.2648599022891614e-06,
      "loss": 2.9015,
      "step": 223666
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.20882511138916,
      "learning_rate": 1.264484697660728e-06,
      "loss": 2.8439,
      "step": 223667
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.670490026473999,
      "learning_rate": 1.2641095485729003e-06,
      "loss": 2.8447,
      "step": 223668
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.083592176437378,
      "learning_rate": 1.263734455025711e-06,
      "loss": 2.9475,
      "step": 223669
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.9124679565429688,
      "learning_rate": 1.2633594170192273e-06,
      "loss": 2.8167,
      "step": 223670
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.005343437194824,
      "learning_rate": 1.2629844345535488e-06,
      "loss": 2.7769,
      "step": 223671
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0840418338775635,
      "learning_rate": 1.2626095076286758e-06,
      "loss": 2.7954,
      "step": 223672
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.6833837032318115,
      "learning_rate": 1.262234636244741e-06,
      "loss": 2.8975,
      "step": 223673
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8396100997924805,
      "learning_rate": 1.2618598204018116e-06,
      "loss": 3.0784,
      "step": 223674
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.9220566749572754,
      "learning_rate": 1.2614850600999204e-06,
      "loss": 3.0384,
      "step": 223675
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7482757568359375,
      "learning_rate": 1.2611103553391677e-06,
      "loss": 2.9052,
      "step": 223676
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.128793716430664,
      "learning_rate": 1.2607357061196199e-06,
      "loss": 3.0118,
      "step": 223677
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.067683219909668,
      "learning_rate": 1.2603611124413105e-06,
      "loss": 2.9263,
      "step": 223678
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.009307622909546,
      "learning_rate": 1.2599865743043724e-06,
      "loss": 2.6699,
      "step": 223679
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.785618782043457,
      "learning_rate": 1.2596120917088392e-06,
      "loss": 2.9676,
      "step": 223680
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8685059547424316,
      "learning_rate": 1.2592376646547774e-06,
      "loss": 3.1024,
      "step": 223681
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.785156011581421,
      "learning_rate": 1.2588632931422537e-06,
      "loss": 2.7656,
      "step": 223682
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.9667720794677734,
      "learning_rate": 1.258488977171368e-06,
      "loss": 2.8041,
      "step": 223683
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.4527506828308105,
      "learning_rate": 1.2581147167421534e-06,
      "loss": 3.016,
      "step": 223684
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.079772472381592,
      "learning_rate": 1.257740511854677e-06,
      "loss": 2.9322,
      "step": 223685
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0344460010528564,
      "learning_rate": 1.257366362509038e-06,
      "loss": 3.1124,
      "step": 223686
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4110872745513916,
      "learning_rate": 1.2569922687053035e-06,
      "loss": 2.9466,
      "step": 223687
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.970015525817871,
      "learning_rate": 1.2566182304435401e-06,
      "loss": 2.9805,
      "step": 223688
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.854196548461914,
      "learning_rate": 1.2562442477238145e-06,
      "loss": 2.7283,
      "step": 223689
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.139967918395996,
      "learning_rate": 1.2558703205461595e-06,
      "loss": 2.7262,
      "step": 223690
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.229013204574585,
      "learning_rate": 1.255496448910709e-06,
      "loss": 2.8017,
      "step": 223691
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3046438694000244,
      "learning_rate": 1.2551226328174624e-06,
      "loss": 2.9325,
      "step": 223692
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.348952293395996,
      "learning_rate": 1.2547488722665533e-06,
      "loss": 3.1571,
      "step": 223693
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0748958587646484,
      "learning_rate": 1.2543751672580482e-06,
      "loss": 2.6462,
      "step": 223694
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9512999057769775,
      "learning_rate": 1.2540015177919803e-06,
      "loss": 2.57,
      "step": 223695
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.899313449859619,
      "learning_rate": 1.2536279238684166e-06,
      "loss": 3.0195,
      "step": 223696
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2813971042633057,
      "learning_rate": 1.2532543854874234e-06,
      "loss": 3.0966,
      "step": 223697
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.7006192207336426,
      "learning_rate": 1.2528809026491337e-06,
      "loss": 2.9423,
      "step": 223698
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.659187078475952,
      "learning_rate": 1.252507475353548e-06,
      "loss": 3.0212,
      "step": 223699
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9800236225128174,
      "learning_rate": 1.252134103600766e-06,
      "loss": 3.122,
      "step": 223700
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.977759838104248,
      "learning_rate": 1.2517607873908542e-06,
      "loss": 2.9558,
      "step": 223701
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6706464290618896,
      "learning_rate": 1.2513875267238793e-06,
      "loss": 2.8365,
      "step": 223702
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.5737087726593018,
      "learning_rate": 1.251014321599908e-06,
      "loss": 2.8782,
      "step": 223703
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.4489855766296387,
      "learning_rate": 1.2506411720190068e-06,
      "loss": 2.7471,
      "step": 223704
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.201449394226074,
      "learning_rate": 1.2502680779812756e-06,
      "loss": 2.9013,
      "step": 223705
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.766388416290283,
      "learning_rate": 1.249895039486748e-06,
      "loss": 2.877,
      "step": 223706
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1237094402313232,
      "learning_rate": 1.2495220565354902e-06,
      "loss": 2.7313,
      "step": 223707
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1800196170806885,
      "learning_rate": 1.249149129127569e-06,
      "loss": 2.9645,
      "step": 223708
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.734762668609619,
      "learning_rate": 1.2487762572631176e-06,
      "loss": 2.7423,
      "step": 223709
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9399430751800537,
      "learning_rate": 1.248403440942136e-06,
      "loss": 2.8289,
      "step": 223710
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1323416233062744,
      "learning_rate": 1.248030680164691e-06,
      "loss": 2.8196,
      "step": 223711
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.602144956588745,
      "learning_rate": 1.2476579749309156e-06,
      "loss": 2.5934,
      "step": 223712
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.341530799865723,
      "learning_rate": 1.2472853252408432e-06,
      "loss": 2.7708,
      "step": 223713
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6839494705200195,
      "learning_rate": 1.246912731094507e-06,
      "loss": 3.2369,
      "step": 223714
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9506397247314453,
      "learning_rate": 1.246540192492007e-06,
      "loss": 3.1278,
      "step": 223715
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.629628896713257,
      "learning_rate": 1.2461677094334432e-06,
      "loss": 2.7491,
      "step": 223716
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1396634578704834,
      "learning_rate": 1.2457952819188488e-06,
      "loss": 2.7936,
      "step": 223717
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.814317464828491,
      "learning_rate": 1.2454229099482904e-06,
      "loss": 3.1823,
      "step": 223718
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.097054958343506,
      "learning_rate": 1.245050593521868e-06,
      "loss": 2.9404,
      "step": 223719
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.812692165374756,
      "learning_rate": 1.2446783326395815e-06,
      "loss": 3.2816,
      "step": 223720
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8610804080963135,
      "learning_rate": 1.2443061273015975e-06,
      "loss": 2.8411,
      "step": 223721
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3177247047424316,
      "learning_rate": 1.243933977507916e-06,
      "loss": 2.9819,
      "step": 223722
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.596611499786377,
      "learning_rate": 1.243561883258637e-06,
      "loss": 2.5423,
      "step": 223723
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.133352756500244,
      "learning_rate": 1.2431898445538268e-06,
      "loss": 3.1784,
      "step": 223724
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.5476224422454834,
      "learning_rate": 1.242817861393519e-06,
      "loss": 2.9089,
      "step": 223725
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7036521434783936,
      "learning_rate": 1.2424459337778137e-06,
      "loss": 2.9935,
      "step": 223726
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.890918016433716,
      "learning_rate": 1.2420740617068104e-06,
      "loss": 3.2132,
      "step": 223727
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8625810146331787,
      "learning_rate": 1.2417022451805092e-06,
      "loss": 3.0551,
      "step": 223728
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6342806816101074,
      "learning_rate": 1.2413304841990436e-06,
      "loss": 3.0076,
      "step": 223729
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0212409496307373,
      "learning_rate": 1.2409587787624464e-06,
      "loss": 2.9971,
      "step": 223730
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.7565677165985107,
      "learning_rate": 1.2405871288707847e-06,
      "loss": 2.9213,
      "step": 223731
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7913496494293213,
      "learning_rate": 1.240215534524125e-06,
      "loss": 2.9508,
      "step": 223732
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.750147819519043,
      "learning_rate": 1.239843995722567e-06,
      "loss": 2.6965,
      "step": 223733
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0587692260742188,
      "learning_rate": 1.2394725124661442e-06,
      "loss": 2.8085,
      "step": 223734
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9491732120513916,
      "learning_rate": 1.2391010847549565e-06,
      "loss": 2.8814,
      "step": 223735
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.5339744091033936,
      "learning_rate": 1.2387297125890704e-06,
      "loss": 3.0464,
      "step": 223736
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.5127837657928467,
      "learning_rate": 1.2383583959685528e-06,
      "loss": 2.8538,
      "step": 223737
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.220595836639404,
      "learning_rate": 1.2379871348934368e-06,
      "loss": 3.105,
      "step": 223738
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.83480167388916,
      "learning_rate": 1.2376159293638221e-06,
      "loss": 2.8935,
      "step": 223739
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.155418634414673,
      "learning_rate": 1.2372447793797758e-06,
      "loss": 2.9501,
      "step": 223740
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7220988273620605,
      "learning_rate": 1.236873684941364e-06,
      "loss": 2.9187,
      "step": 223741
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.123842477798462,
      "learning_rate": 1.2365026460486538e-06,
      "loss": 2.7872,
      "step": 223742
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3006391525268555,
      "learning_rate": 1.2361316627017448e-06,
      "loss": 2.9413,
      "step": 223743
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4495956897735596,
      "learning_rate": 1.2357607349006704e-06,
      "loss": 3.1038,
      "step": 223744
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.454437255859375,
      "learning_rate": 1.2353898626454973e-06,
      "loss": 2.7884,
      "step": 223745
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7535927295684814,
      "learning_rate": 1.2350190459362918e-06,
      "loss": 2.954,
      "step": 223746
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.750316858291626,
      "learning_rate": 1.2346482847731543e-06,
      "loss": 3.047,
      "step": 223747
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8957126140594482,
      "learning_rate": 1.2342775791561178e-06,
      "loss": 3.003,
      "step": 223748
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1999759674072266,
      "learning_rate": 1.233906929085282e-06,
      "loss": 3.104,
      "step": 223749
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8934338092803955,
      "learning_rate": 1.233536334560714e-06,
      "loss": 2.9028,
      "step": 223750
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6596689224243164,
      "learning_rate": 1.233165795582447e-06,
      "loss": 2.8731,
      "step": 223751
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0583057403564453,
      "learning_rate": 1.2327953121505807e-06,
      "loss": 2.9819,
      "step": 223752
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.673652172088623,
      "learning_rate": 1.2324248842651818e-06,
      "loss": 2.6038,
      "step": 223753
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8675546646118164,
      "learning_rate": 1.232054511926317e-06,
      "loss": 2.9225,
      "step": 223754
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0776314735412598,
      "learning_rate": 1.2316841951340529e-06,
      "loss": 2.8533,
      "step": 223755
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.499396324157715,
      "learning_rate": 1.2313139338884558e-06,
      "loss": 3.1993,
      "step": 223756
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7182857990264893,
      "learning_rate": 1.2309437281895928e-06,
      "loss": 3.0434,
      "step": 223757
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4559590816497803,
      "learning_rate": 1.2305735780375303e-06,
      "loss": 2.7315,
      "step": 223758
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.328749179840088,
      "learning_rate": 1.230203483432368e-06,
      "loss": 3.031,
      "step": 223759
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.653740167617798,
      "learning_rate": 1.2298334443741398e-06,
      "loss": 2.7561,
      "step": 223760
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7593703269958496,
      "learning_rate": 1.2294634608629117e-06,
      "loss": 3.311,
      "step": 223761
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.480361223220825,
      "learning_rate": 1.229093532898784e-06,
      "loss": 2.9293,
      "step": 223762
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2025790214538574,
      "learning_rate": 1.2287236604817896e-06,
      "loss": 2.8763,
      "step": 223763
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.903345823287964,
      "learning_rate": 1.2283538436120289e-06,
      "loss": 2.735,
      "step": 223764
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.882891893386841,
      "learning_rate": 1.2279840822895682e-06,
      "loss": 2.89,
      "step": 223765
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.7818944454193115,
      "learning_rate": 1.2276143765144408e-06,
      "loss": 2.7503,
      "step": 223766
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.403541326522827,
      "learning_rate": 1.2272447262867468e-06,
      "loss": 2.9076,
      "step": 223767
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.977055788040161,
      "learning_rate": 1.226875131606586e-06,
      "loss": 2.93,
      "step": 223768
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.889411211013794,
      "learning_rate": 1.2265055924739253e-06,
      "loss": 3.0694,
      "step": 223769
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.116288185119629,
      "learning_rate": 1.226136108888931e-06,
      "loss": 2.94,
      "step": 223770
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0764548778533936,
      "learning_rate": 1.2257666808516364e-06,
      "loss": 2.7571,
      "step": 223771
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.734678268432617,
      "learning_rate": 1.2253973083621082e-06,
      "loss": 2.7676,
      "step": 223772
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.743985652923584,
      "learning_rate": 1.225027991420413e-06,
      "loss": 2.9446,
      "step": 223773
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.44698166847229,
      "learning_rate": 1.2246587300266507e-06,
      "loss": 3.0929,
      "step": 223774
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1773900985717773,
      "learning_rate": 1.2242895241808214e-06,
      "loss": 2.6708,
      "step": 223775
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2026126384735107,
      "learning_rate": 1.2239203738830583e-06,
      "loss": 2.941,
      "step": 223776
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7072720527648926,
      "learning_rate": 1.2235512791333946e-06,
      "loss": 2.8939,
      "step": 223777
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.166561126708984,
      "learning_rate": 1.2231822399319635e-06,
      "loss": 2.978,
      "step": 223778
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9598021507263184,
      "learning_rate": 1.2228132562787318e-06,
      "loss": 2.9023,
      "step": 223779
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7884867191314697,
      "learning_rate": 1.2224443281738328e-06,
      "loss": 2.879,
      "step": 223780
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0637896060943604,
      "learning_rate": 1.222075455617333e-06,
      "loss": 2.8003,
      "step": 223781
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2955994606018066,
      "learning_rate": 1.2217066386092655e-06,
      "loss": 2.7408,
      "step": 223782
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.299110174179077,
      "learning_rate": 1.2213378771497306e-06,
      "loss": 3.02,
      "step": 223783
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1038219928741455,
      "learning_rate": 1.2209691712387948e-06,
      "loss": 2.9727,
      "step": 223784
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8526930809020996,
      "learning_rate": 1.2206005208765245e-06,
      "loss": 2.9932,
      "step": 223785
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0420451164245605,
      "learning_rate": 1.2202319260629866e-06,
      "loss": 2.6433,
      "step": 223786
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.305802583694458,
      "learning_rate": 1.2198633867982143e-06,
      "loss": 2.9166,
      "step": 223787
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.016042470932007,
      "learning_rate": 1.2194949030823409e-06,
      "loss": 2.7198,
      "step": 223788
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6950528621673584,
      "learning_rate": 1.2191264749153995e-06,
      "loss": 2.8928,
      "step": 223789
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7293949127197266,
      "learning_rate": 1.218758102297457e-06,
      "loss": 2.7427,
      "step": 223790
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.956279993057251,
      "learning_rate": 1.2183897852286129e-06,
      "loss": 3.1301,
      "step": 223791
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6291940212249756,
      "learning_rate": 1.2180215237089008e-06,
      "loss": 3.0869,
      "step": 223792
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.451181650161743,
      "learning_rate": 1.2176533177383541e-06,
      "loss": 2.9422,
      "step": 223793
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.360724925994873,
      "learning_rate": 1.2172851673171391e-06,
      "loss": 2.9513,
      "step": 223794
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.884922981262207,
      "learning_rate": 1.2169170724452226e-06,
      "loss": 2.98,
      "step": 223795
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.745398759841919,
      "learning_rate": 1.216549033122771e-06,
      "loss": 2.8997,
      "step": 223796
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8113250732421875,
      "learning_rate": 1.2161810493497849e-06,
      "loss": 2.7508,
      "step": 223797
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7554759979248047,
      "learning_rate": 1.21581312112633e-06,
      "loss": 2.8241,
      "step": 223798
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.26377534866333,
      "learning_rate": 1.2154452484525069e-06,
      "loss": 2.831,
      "step": 223799
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.022894859313965,
      "learning_rate": 1.2150774313283818e-06,
      "loss": 3.1107,
      "step": 223800
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6679320335388184,
      "learning_rate": 1.2147096697539883e-06,
      "loss": 3.0015,
      "step": 223801
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7197577953338623,
      "learning_rate": 1.2143419637294593e-06,
      "loss": 2.7593,
      "step": 223802
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.279890298843384,
      "learning_rate": 1.2139743132547952e-06,
      "loss": 2.8155,
      "step": 223803
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7119293212890625,
      "learning_rate": 1.2136067183301291e-06,
      "loss": 2.9314,
      "step": 223804
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9900615215301514,
      "learning_rate": 1.2132391789554607e-06,
      "loss": 2.8781,
      "step": 223805
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.866546392440796,
      "learning_rate": 1.2128716951308903e-06,
      "loss": 2.8998,
      "step": 223806
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.016425609588623,
      "learning_rate": 1.2125042668564844e-06,
      "loss": 2.8484,
      "step": 223807
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0747838020324707,
      "learning_rate": 1.2121368941323428e-06,
      "loss": 2.8621,
      "step": 223808
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6062264442443848,
      "learning_rate": 1.2117695769584656e-06,
      "loss": 3.0961,
      "step": 223809
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.4754395484924316,
      "learning_rate": 1.211402315334986e-06,
      "loss": 2.8935,
      "step": 223810
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0178215503692627,
      "learning_rate": 1.2110351092619374e-06,
      "loss": 3.1302,
      "step": 223811
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8309898376464844,
      "learning_rate": 1.2106679587394197e-06,
      "loss": 2.764,
      "step": 223812
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8955090045928955,
      "learning_rate": 1.2103008637674327e-06,
      "loss": 2.8857,
      "step": 223813
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.086461067199707,
      "learning_rate": 1.2099338243461431e-06,
      "loss": 2.8069,
      "step": 223814
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.662536144256592,
      "learning_rate": 1.2095668404755178e-06,
      "loss": 3.0081,
      "step": 223815
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.976224899291992,
      "learning_rate": 1.2091999121557227e-06,
      "loss": 3.2218,
      "step": 223816
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.2608280181884766,
      "learning_rate": 1.2088330393867585e-06,
      "loss": 2.7414,
      "step": 223817
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.0173287391662598,
      "learning_rate": 1.2084662221686913e-06,
      "loss": 3.0098,
      "step": 223818
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.780661106109619,
      "learning_rate": 1.2080994605016215e-06,
      "loss": 2.875,
      "step": 223819
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1412665843963623,
      "learning_rate": 1.2077327543856153e-06,
      "loss": 2.8796,
      "step": 223820
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.6438145637512207,
      "learning_rate": 1.2073661038207394e-06,
      "loss": 2.8451,
      "step": 223821
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9519944190979004,
      "learning_rate": 1.2069995088070606e-06,
      "loss": 2.8036,
      "step": 223822
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.755002737045288,
      "learning_rate": 1.206632969344612e-06,
      "loss": 2.8193,
      "step": 223823
    },
    {
      "epoch": 2.91,
      "grad_norm": 5.91571569442749,
      "learning_rate": 1.2062664854334935e-06,
      "loss": 2.7986,
      "step": 223824
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.766221523284912,
      "learning_rate": 1.205900057073772e-06,
      "loss": 2.7477,
      "step": 223825
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.059678554534912,
      "learning_rate": 1.2055336842655139e-06,
      "loss": 3.0153,
      "step": 223826
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7375240325927734,
      "learning_rate": 1.2051673670087859e-06,
      "loss": 2.7694,
      "step": 223827
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.575687408447266,
      "learning_rate": 1.2048011053036878e-06,
      "loss": 2.8824,
      "step": 223828
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.096395969390869,
      "learning_rate": 1.2044348991502195e-06,
      "loss": 2.8366,
      "step": 223829
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1056673526763916,
      "learning_rate": 1.2040687485485146e-06,
      "loss": 3.1361,
      "step": 223830
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.496598482131958,
      "learning_rate": 1.2037026534986062e-06,
      "loss": 2.9903,
      "step": 223831
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.156966209411621,
      "learning_rate": 1.2033366140005608e-06,
      "loss": 3.0476,
      "step": 223832
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.1682522296905518,
      "learning_rate": 1.202970630054445e-06,
      "loss": 2.9699,
      "step": 223833
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.719731569290161,
      "learning_rate": 1.202604701660359e-06,
      "loss": 3.0954,
      "step": 223834
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.588916540145874,
      "learning_rate": 1.202238828818336e-06,
      "loss": 2.826,
      "step": 223835
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.052574634552002,
      "learning_rate": 1.2018730115284758e-06,
      "loss": 2.7244,
      "step": 223836
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.31484317779541,
      "learning_rate": 1.2015072497908118e-06,
      "loss": 2.7794,
      "step": 223837
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.327312707901001,
      "learning_rate": 1.201141543605444e-06,
      "loss": 2.7509,
      "step": 223838
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.906057596206665,
      "learning_rate": 1.2007758929724055e-06,
      "loss": 2.7174,
      "step": 223839
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.948706865310669,
      "learning_rate": 1.2004102978917963e-06,
      "loss": 2.9262,
      "step": 223840
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.825681447982788,
      "learning_rate": 1.2000447583636498e-06,
      "loss": 3.0314,
      "step": 223841
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3898603916168213,
      "learning_rate": 1.1996792743880657e-06,
      "loss": 2.7597,
      "step": 223842
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.914365768432617,
      "learning_rate": 1.1993138459651109e-06,
      "loss": 3.0041,
      "step": 223843
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3932697772979736,
      "learning_rate": 1.1989484730948519e-06,
      "loss": 3.1125,
      "step": 223844
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7497432231903076,
      "learning_rate": 1.1985831557773218e-06,
      "loss": 3.2617,
      "step": 223845
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8701913356781006,
      "learning_rate": 1.1982178940126207e-06,
      "loss": 2.5959,
      "step": 223846
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.510056495666504,
      "learning_rate": 1.1978526878008154e-06,
      "loss": 2.7633,
      "step": 223847
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.242295265197754,
      "learning_rate": 1.1974875371419722e-06,
      "loss": 2.8414,
      "step": 223848
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.227616548538208,
      "learning_rate": 1.197122442036158e-06,
      "loss": 2.8914,
      "step": 223849
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8477470874786377,
      "learning_rate": 1.1967574024834392e-06,
      "loss": 2.5881,
      "step": 223850
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.807826519012451,
      "learning_rate": 1.1963924184838825e-06,
      "loss": 3.0479,
      "step": 223851
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7661120891571045,
      "learning_rate": 1.1960274900375543e-06,
      "loss": 2.9066,
      "step": 223852
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.784064531326294,
      "learning_rate": 1.1956626171445217e-06,
      "loss": 2.909,
      "step": 223853
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.310980319976807,
      "learning_rate": 1.1952977998048841e-06,
      "loss": 3.1421,
      "step": 223854
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.402580499649048,
      "learning_rate": 1.1949330380186417e-06,
      "loss": 2.9773,
      "step": 223855
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.7623913288116455,
      "learning_rate": 1.1945683317859277e-06,
      "loss": 3.3878,
      "step": 223856
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.636727809906006,
      "learning_rate": 1.1942036811067758e-06,
      "loss": 3.052,
      "step": 223857
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.851219654083252,
      "learning_rate": 1.1938390859812518e-06,
      "loss": 2.9691,
      "step": 223858
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4546995162963867,
      "learning_rate": 1.1934745464094564e-06,
      "loss": 2.961,
      "step": 223859
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.167353868484497,
      "learning_rate": 1.193110062391389e-06,
      "loss": 3.0191,
      "step": 223860
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3200557231903076,
      "learning_rate": 1.1927456339272168e-06,
      "loss": 3.0606,
      "step": 223861
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.9527230262756348,
      "learning_rate": 1.1923812610169058e-06,
      "loss": 3.0226,
      "step": 223862
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.6819231510162354,
      "learning_rate": 1.1920169436606232e-06,
      "loss": 2.8914,
      "step": 223863
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.829742670059204,
      "learning_rate": 1.1916526818583683e-06,
      "loss": 2.773,
      "step": 223864
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.209489345550537,
      "learning_rate": 1.1912884756102082e-06,
      "loss": 2.6834,
      "step": 223865
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.747079610824585,
      "learning_rate": 1.1909243249162093e-06,
      "loss": 2.8827,
      "step": 223866
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.4046061038970947,
      "learning_rate": 1.1905602297765049e-06,
      "loss": 2.8356,
      "step": 223867
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.53800892829895,
      "learning_rate": 1.190196190191095e-06,
      "loss": 2.9103,
      "step": 223868
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3807289600372314,
      "learning_rate": 1.1898322061600462e-06,
      "loss": 2.9322,
      "step": 223869
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.001063346862793,
      "learning_rate": 1.1894682776834919e-06,
      "loss": 2.9377,
      "step": 223870
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.653120517730713,
      "learning_rate": 1.1891044047614319e-06,
      "loss": 2.8947,
      "step": 223871
    },
    {
      "epoch": 2.91,
      "grad_norm": 3.3943536281585693,
      "learning_rate": 1.188740587393966e-06,
      "loss": 2.5862,
      "step": 223872
    },
    {
      "epoch": 2.91,
      "grad_norm": 2.8254947662353516,
      "learning_rate": 1.1883768255811277e-06,
      "loss": 2.8979,
      "step": 223873
    },
    {
      "epoch": 2.91,
      "grad_norm": 4.142644882202148,
      "learning_rate": 1.1880131193230503e-06,
      "loss": 3.0568,
      "step": 223874
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6842286586761475,
      "learning_rate": 1.1876494686197335e-06,
      "loss": 2.9164,
      "step": 223875
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2428109645843506,
      "learning_rate": 1.1872858734712776e-06,
      "loss": 2.8795,
      "step": 223876
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7459332942962646,
      "learning_rate": 1.186922333877749e-06,
      "loss": 2.99,
      "step": 223877
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2151975631713867,
      "learning_rate": 1.1865588498392143e-06,
      "loss": 2.8115,
      "step": 223878
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7077724933624268,
      "learning_rate": 1.18619542135574e-06,
      "loss": 2.7683,
      "step": 223879
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.572965621948242,
      "learning_rate": 1.185832048427393e-06,
      "loss": 2.8645,
      "step": 223880
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.045241832733154,
      "learning_rate": 1.1854687310542399e-06,
      "loss": 2.9712,
      "step": 223881
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.722599506378174,
      "learning_rate": 1.185105469236347e-06,
      "loss": 2.7991,
      "step": 223882
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9368484020233154,
      "learning_rate": 1.1847422629737813e-06,
      "loss": 3.0115,
      "step": 223883
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1660752296447754,
      "learning_rate": 1.184379112266609e-06,
      "loss": 3.0904,
      "step": 223884
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0747265815734863,
      "learning_rate": 1.1840160171149305e-06,
      "loss": 3.0925,
      "step": 223885
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0955936908721924,
      "learning_rate": 1.1836529775187454e-06,
      "loss": 2.7726,
      "step": 223886
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.152007818222046,
      "learning_rate": 1.183289993478187e-06,
      "loss": 3.2176,
      "step": 223887
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.5527751445770264,
      "learning_rate": 1.1829270649932555e-06,
      "loss": 2.9374,
      "step": 223888
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8629682064056396,
      "learning_rate": 1.182564192064117e-06,
      "loss": 2.791,
      "step": 223889
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6287455558776855,
      "learning_rate": 1.1822013746907388e-06,
      "loss": 3.0488,
      "step": 223890
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6291885375976562,
      "learning_rate": 1.1818386128732204e-06,
      "loss": 2.8571,
      "step": 223891
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8164567947387695,
      "learning_rate": 1.1814759066116619e-06,
      "loss": 3.0148,
      "step": 223892
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3472490310668945,
      "learning_rate": 1.1811132559061297e-06,
      "loss": 3.1383,
      "step": 223893
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3990559577941895,
      "learning_rate": 1.1807506607566242e-06,
      "loss": 2.7988,
      "step": 223894
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7943685054779053,
      "learning_rate": 1.1803881211632783e-06,
      "loss": 3.0109,
      "step": 223895
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0045299530029297,
      "learning_rate": 1.1800256371261584e-06,
      "loss": 2.7051,
      "step": 223896
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.433772325515747,
      "learning_rate": 1.179663208645265e-06,
      "loss": 2.8814,
      "step": 223897
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.4669878482818604,
      "learning_rate": 1.1793008357207645e-06,
      "loss": 2.71,
      "step": 223898
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6863012313842773,
      "learning_rate": 1.1789385183526234e-06,
      "loss": 2.7762,
      "step": 223899
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.368107795715332,
      "learning_rate": 1.178576256540975e-06,
      "loss": 2.894,
      "step": 223900
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.8854053020477295,
      "learning_rate": 1.1782140502859195e-06,
      "loss": 2.9869,
      "step": 223901
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.640475034713745,
      "learning_rate": 1.1778518995874232e-06,
      "loss": 3.1957,
      "step": 223902
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6662650108337402,
      "learning_rate": 1.1774898044456194e-06,
      "loss": 3.1389,
      "step": 223903
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.889699697494507,
      "learning_rate": 1.1771277648605415e-06,
      "loss": 3.0443,
      "step": 223904
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0611608028411865,
      "learning_rate": 1.1767657808322893e-06,
      "loss": 2.9245,
      "step": 223905
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.199402093887329,
      "learning_rate": 1.17640385236093e-06,
      "loss": 2.6979,
      "step": 223906
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.019770383834839,
      "learning_rate": 1.176041979446496e-06,
      "loss": 3.1023,
      "step": 223907
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.237391948699951,
      "learning_rate": 1.1756801620890876e-06,
      "loss": 3.1193,
      "step": 223908
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.64965558052063,
      "learning_rate": 1.1753184002887716e-06,
      "loss": 2.8935,
      "step": 223909
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0629005432128906,
      "learning_rate": 1.1749566940455812e-06,
      "loss": 3.066,
      "step": 223910
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.142991065979004,
      "learning_rate": 1.1745950433596164e-06,
      "loss": 2.8032,
      "step": 223911
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.354541301727295,
      "learning_rate": 1.1742334482309434e-06,
      "loss": 2.8016,
      "step": 223912
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.987821578979492,
      "learning_rate": 1.1738719086596293e-06,
      "loss": 2.8777,
      "step": 223913
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0094494819641113,
      "learning_rate": 1.1735104246457072e-06,
      "loss": 2.9556,
      "step": 223914
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8718760013580322,
      "learning_rate": 1.1731489961893103e-06,
      "loss": 2.8702,
      "step": 223915
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.5952341556549072,
      "learning_rate": 1.1727876232904054e-06,
      "loss": 3.1414,
      "step": 223916
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.215087413787842,
      "learning_rate": 1.172426305949159e-06,
      "loss": 2.8976,
      "step": 223917
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8230462074279785,
      "learning_rate": 1.1720650441656043e-06,
      "loss": 2.8966,
      "step": 223918
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.430758237838745,
      "learning_rate": 1.171703837939808e-06,
      "loss": 2.9456,
      "step": 223919
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.6917386054992676,
      "learning_rate": 1.1713426872718034e-06,
      "loss": 2.7426,
      "step": 223920
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.997474193572998,
      "learning_rate": 1.1709815921617238e-06,
      "loss": 2.9449,
      "step": 223921
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.809324026107788,
      "learning_rate": 1.170620552609569e-06,
      "loss": 3.0901,
      "step": 223922
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0586705207824707,
      "learning_rate": 1.1702595686154392e-06,
      "loss": 2.8813,
      "step": 223923
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.93001389503479,
      "learning_rate": 1.1698986401794009e-06,
      "loss": 2.7497,
      "step": 223924
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.621849536895752,
      "learning_rate": 1.1695377673015205e-06,
      "loss": 3.0854,
      "step": 223925
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.177567720413208,
      "learning_rate": 1.1691769499818648e-06,
      "loss": 2.7876,
      "step": 223926
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3815839290618896,
      "learning_rate": 1.1688161882205005e-06,
      "loss": 2.8718,
      "step": 223927
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1945457458496094,
      "learning_rate": 1.168455482017494e-06,
      "loss": 2.8428,
      "step": 223928
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.559784173965454,
      "learning_rate": 1.168094831372912e-06,
      "loss": 3.0703,
      "step": 223929
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7878174781799316,
      "learning_rate": 1.1677342362868214e-06,
      "loss": 2.767,
      "step": 223930
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.774359941482544,
      "learning_rate": 1.1673736967593217e-06,
      "loss": 2.8332,
      "step": 223931
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.629803419113159,
      "learning_rate": 1.1670132127904131e-06,
      "loss": 3.1681,
      "step": 223932
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.219503402709961,
      "learning_rate": 1.1666527843801954e-06,
      "loss": 3.1519,
      "step": 223933
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.542311191558838,
      "learning_rate": 1.1662924115287354e-06,
      "loss": 2.9496,
      "step": 223934
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.955284595489502,
      "learning_rate": 1.1659320942361328e-06,
      "loss": 2.9886,
      "step": 223935
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.761500835418701,
      "learning_rate": 1.1655718325023877e-06,
      "loss": 3.0475,
      "step": 223936
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.067164421081543,
      "learning_rate": 1.1652116263276335e-06,
      "loss": 2.7644,
      "step": 223937
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.69705867767334,
      "learning_rate": 1.1648514757119031e-06,
      "loss": 2.9572,
      "step": 223938
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.734466314315796,
      "learning_rate": 1.1644913806552303e-06,
      "loss": 2.7447,
      "step": 223939
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8772823810577393,
      "learning_rate": 1.1641313411577814e-06,
      "loss": 3.0021,
      "step": 223940
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.033902883529663,
      "learning_rate": 1.163771357219523e-06,
      "loss": 2.8857,
      "step": 223941
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.686330556869507,
      "learning_rate": 1.163411428840555e-06,
      "loss": 2.6784,
      "step": 223942
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1087121963500977,
      "learning_rate": 1.1630515560209774e-06,
      "loss": 3.0425,
      "step": 223943
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.146991729736328,
      "learning_rate": 1.1626917387607904e-06,
      "loss": 2.9035,
      "step": 223944
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2247631549835205,
      "learning_rate": 1.162331977060127e-06,
      "loss": 2.9345,
      "step": 223945
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.839493989944458,
      "learning_rate": 1.1619722709190204e-06,
      "loss": 2.8446,
      "step": 223946
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.641690492630005,
      "learning_rate": 1.1616126203375376e-06,
      "loss": 3.0942,
      "step": 223947
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.733604907989502,
      "learning_rate": 1.161253025315778e-06,
      "loss": 2.7909,
      "step": 223948
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.044236660003662,
      "learning_rate": 1.1608934858537755e-06,
      "loss": 2.8484,
      "step": 223949
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.052053213119507,
      "learning_rate": 1.1605340019515963e-06,
      "loss": 2.9105,
      "step": 223950
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.262906312942505,
      "learning_rate": 1.1601745736093071e-06,
      "loss": 2.9866,
      "step": 223951
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.724609136581421,
      "learning_rate": 1.1598152008269745e-06,
      "loss": 3.1565,
      "step": 223952
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9497861862182617,
      "learning_rate": 1.1594558836046985e-06,
      "loss": 2.8914,
      "step": 223953
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6838605403900146,
      "learning_rate": 1.1590966219425124e-06,
      "loss": 2.8968,
      "step": 223954
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.954805374145508,
      "learning_rate": 1.1587374158404827e-06,
      "loss": 2.8329,
      "step": 223955
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.9932210445404053,
      "learning_rate": 1.1583782652987094e-06,
      "loss": 2.9263,
      "step": 223956
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.338805675506592,
      "learning_rate": 1.1580191703171925e-06,
      "loss": 2.6572,
      "step": 223957
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1175155639648438,
      "learning_rate": 1.1576601308960653e-06,
      "loss": 2.7432,
      "step": 223958
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0869219303131104,
      "learning_rate": 1.157301147035361e-06,
      "loss": 2.9602,
      "step": 223959
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.80777645111084,
      "learning_rate": 1.1569422187351796e-06,
      "loss": 2.7542,
      "step": 223960
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8145973682403564,
      "learning_rate": 1.156583345995521e-06,
      "loss": 2.7665,
      "step": 223961
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6381688117980957,
      "learning_rate": 1.1562245288165517e-06,
      "loss": 2.9597,
      "step": 223962
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.848688840866089,
      "learning_rate": 1.1558657671982386e-06,
      "loss": 3.0237,
      "step": 223963
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7458486557006836,
      "learning_rate": 1.1555070611407147e-06,
      "loss": 2.96,
      "step": 223964
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.496675491333008,
      "learning_rate": 1.1551484106439801e-06,
      "loss": 2.9771,
      "step": 223965
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.990522623062134,
      "learning_rate": 1.1547898157082014e-06,
      "loss": 3.0568,
      "step": 223966
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.5086028575897217,
      "learning_rate": 1.1544312763333452e-06,
      "loss": 2.6433,
      "step": 223967
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2327568531036377,
      "learning_rate": 1.1540727925195447e-06,
      "loss": 3.0982,
      "step": 223968
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.5863864421844482,
      "learning_rate": 1.1537143642668335e-06,
      "loss": 2.9822,
      "step": 223969
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.300006151199341,
      "learning_rate": 1.153355991575311e-06,
      "loss": 2.9469,
      "step": 223970
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.863516092300415,
      "learning_rate": 1.1529976744449776e-06,
      "loss": 3.1394,
      "step": 223971
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9031033515930176,
      "learning_rate": 1.1526394128759663e-06,
      "loss": 3.152,
      "step": 223972
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7725346088409424,
      "learning_rate": 1.152281206868344e-06,
      "loss": 2.9797,
      "step": 223973
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.933087110519409,
      "learning_rate": 1.1519230564221438e-06,
      "loss": 2.9388,
      "step": 223974
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.5489814281463623,
      "learning_rate": 1.151564961537399e-06,
      "loss": 2.7803,
      "step": 223975
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9368200302124023,
      "learning_rate": 1.151206922214276e-06,
      "loss": 2.9272,
      "step": 223976
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.473590612411499,
      "learning_rate": 1.150848938452742e-06,
      "loss": 2.8474,
      "step": 223977
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.334446907043457,
      "learning_rate": 1.1504910102529296e-06,
      "loss": 2.8514,
      "step": 223978
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6327128410339355,
      "learning_rate": 1.1501331376148725e-06,
      "loss": 2.9169,
      "step": 223979
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.8094706535339355,
      "learning_rate": 1.1497753205386706e-06,
      "loss": 2.852,
      "step": 223980
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.832882881164551,
      "learning_rate": 1.1494175590243238e-06,
      "loss": 2.788,
      "step": 223981
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.087773323059082,
      "learning_rate": 1.1490598530719653e-06,
      "loss": 2.992,
      "step": 223982
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.423642158508301,
      "learning_rate": 1.1487022026816284e-06,
      "loss": 2.8782,
      "step": 223983
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0046095848083496,
      "learning_rate": 1.148344607853413e-06,
      "loss": 2.9364,
      "step": 223984
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7572925090789795,
      "learning_rate": 1.1479870685873195e-06,
      "loss": 2.8047,
      "step": 223985
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.005204677581787,
      "learning_rate": 1.1476295848835138e-06,
      "loss": 2.7143,
      "step": 223986
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.5593607425689697,
      "learning_rate": 1.147272156741963e-06,
      "loss": 2.8308,
      "step": 223987
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.045691967010498,
      "learning_rate": 1.1469147841628002e-06,
      "loss": 3.0149,
      "step": 223988
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.486368417739868,
      "learning_rate": 1.1465574671460255e-06,
      "loss": 3.0411,
      "step": 223989
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.5843966007232666,
      "learning_rate": 1.1462002056918052e-06,
      "loss": 3.195,
      "step": 223990
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7786989212036133,
      "learning_rate": 1.1458429998001062e-06,
      "loss": 2.8635,
      "step": 223991
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6347148418426514,
      "learning_rate": 1.1454858494710616e-06,
      "loss": 2.9497,
      "step": 223992
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.927879571914673,
      "learning_rate": 1.1451287547046717e-06,
      "loss": 2.9001,
      "step": 223993
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.79317569732666,
      "learning_rate": 1.1447717155011026e-06,
      "loss": 2.6447,
      "step": 223994
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6915295124053955,
      "learning_rate": 1.1444147318603214e-06,
      "loss": 2.7959,
      "step": 223995
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.068000078201294,
      "learning_rate": 1.1440578037824277e-06,
      "loss": 2.6435,
      "step": 223996
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.490942001342773,
      "learning_rate": 1.1437009312675216e-06,
      "loss": 2.9704,
      "step": 223997
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2879509925842285,
      "learning_rate": 1.1433441143156363e-06,
      "loss": 2.9407,
      "step": 223998
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.849618673324585,
      "learning_rate": 1.1429873529268385e-06,
      "loss": 2.9407,
      "step": 223999
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.673987865447998,
      "learning_rate": 1.1426306471011948e-06,
      "loss": 2.8685,
      "step": 224000
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2558417320251465,
      "learning_rate": 1.1422739968387718e-06,
      "loss": 3.0034,
      "step": 224001
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.646867036819458,
      "learning_rate": 1.1419174021396692e-06,
      "loss": 2.8606,
      "step": 224002
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7107350826263428,
      "learning_rate": 1.1415608630039207e-06,
      "loss": 2.7499,
      "step": 224003
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.589183807373047,
      "learning_rate": 1.1412043794315596e-06,
      "loss": 3.0898,
      "step": 224004
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.720961332321167,
      "learning_rate": 1.140847951422752e-06,
      "loss": 3.1792,
      "step": 224005
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.075181007385254,
      "learning_rate": 1.1404915789774649e-06,
      "loss": 3.0802,
      "step": 224006
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6863722801208496,
      "learning_rate": 1.1401352620957983e-06,
      "loss": 3.1088,
      "step": 224007
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.705307483673096,
      "learning_rate": 1.1397790007778184e-06,
      "loss": 2.8529,
      "step": 224008
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3900632858276367,
      "learning_rate": 1.1394227950235922e-06,
      "loss": 2.8795,
      "step": 224009
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.781313180923462,
      "learning_rate": 1.1390666448332197e-06,
      "loss": 2.7532,
      "step": 224010
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.479344606399536,
      "learning_rate": 1.138710550206734e-06,
      "loss": 2.7331,
      "step": 224011
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1054039001464844,
      "learning_rate": 1.1383545111441682e-06,
      "loss": 3.012,
      "step": 224012
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9593451023101807,
      "learning_rate": 1.137998527645656e-06,
      "loss": 3.034,
      "step": 224013
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9363224506378174,
      "learning_rate": 1.1376425997112305e-06,
      "loss": 2.8212,
      "step": 224014
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.8447179794311523,
      "learning_rate": 1.1372867273409248e-06,
      "loss": 3.0328,
      "step": 224015
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9440667629241943,
      "learning_rate": 1.1369309105348723e-06,
      "loss": 2.8004,
      "step": 224016
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8729450702667236,
      "learning_rate": 1.1365751492931062e-06,
      "loss": 2.8083,
      "step": 224017
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.607102870941162,
      "learning_rate": 1.1362194436156601e-06,
      "loss": 3.0473,
      "step": 224018
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.796553134918213,
      "learning_rate": 1.135863793502667e-06,
      "loss": 3.0665,
      "step": 224019
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9282519817352295,
      "learning_rate": 1.135508198954127e-06,
      "loss": 3.0175,
      "step": 224020
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.912074089050293,
      "learning_rate": 1.1351526599701732e-06,
      "loss": 2.9298,
      "step": 224021
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.404024600982666,
      "learning_rate": 1.1347971765508057e-06,
      "loss": 2.9168,
      "step": 224022
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.547663688659668,
      "learning_rate": 1.1344417486961244e-06,
      "loss": 2.9314,
      "step": 224023
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2174062728881836,
      "learning_rate": 1.134086376406196e-06,
      "loss": 3.1582,
      "step": 224024
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6007676124572754,
      "learning_rate": 1.1337310596810867e-06,
      "loss": 2.8493,
      "step": 224025
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1460072994232178,
      "learning_rate": 1.1333757985208635e-06,
      "loss": 2.8343,
      "step": 224026
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8361575603485107,
      "learning_rate": 1.133020592925593e-06,
      "loss": 2.8435,
      "step": 224027
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.67749285697937,
      "learning_rate": 1.132665442895342e-06,
      "loss": 2.8248,
      "step": 224028
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3750200271606445,
      "learning_rate": 1.1323103484301432e-06,
      "loss": 3.1128,
      "step": 224029
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.6153626441955566,
      "learning_rate": 1.131955309530097e-06,
      "loss": 3.0589,
      "step": 224030
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0578854084014893,
      "learning_rate": 1.13160032619527e-06,
      "loss": 3.0443,
      "step": 224031
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.116987466812134,
      "learning_rate": 1.1312453984256953e-06,
      "loss": 2.7388,
      "step": 224032
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.056903600692749,
      "learning_rate": 1.1308905262214728e-06,
      "loss": 2.8344,
      "step": 224033
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.377138614654541,
      "learning_rate": 1.1305357095827028e-06,
      "loss": 2.9366,
      "step": 224034
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.261189937591553,
      "learning_rate": 1.1301809485093515e-06,
      "loss": 3.3042,
      "step": 224035
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7965545654296875,
      "learning_rate": 1.1298262430015526e-06,
      "loss": 2.9741,
      "step": 224036
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3385396003723145,
      "learning_rate": 1.1294715930594055e-06,
      "loss": 2.8535,
      "step": 224037
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.5202107429504395,
      "learning_rate": 1.1291169986828775e-06,
      "loss": 2.8961,
      "step": 224038
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9240880012512207,
      "learning_rate": 1.1287624598721013e-06,
      "loss": 2.7344,
      "step": 224039
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.690063953399658,
      "learning_rate": 1.1284079766271436e-06,
      "loss": 2.8772,
      "step": 224040
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.844888687133789,
      "learning_rate": 1.1280535489480714e-06,
      "loss": 2.8382,
      "step": 224041
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.247541666030884,
      "learning_rate": 1.1276991768348842e-06,
      "loss": 3.0903,
      "step": 224042
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.090942144393921,
      "learning_rate": 1.1273448602877488e-06,
      "loss": 2.9869,
      "step": 224043
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.194533824920654,
      "learning_rate": 1.1269905993066652e-06,
      "loss": 2.8164,
      "step": 224044
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.450467586517334,
      "learning_rate": 1.1266363938917e-06,
      "loss": 2.8805,
      "step": 224045
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.498883008956909,
      "learning_rate": 1.1262822440429531e-06,
      "loss": 2.9713,
      "step": 224046
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.092132806777954,
      "learning_rate": 1.1259281497604577e-06,
      "loss": 2.8485,
      "step": 224047
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9169647693634033,
      "learning_rate": 1.1255741110443139e-06,
      "loss": 2.7655,
      "step": 224048
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6293511390686035,
      "learning_rate": 1.125220127894555e-06,
      "loss": 3.0245,
      "step": 224049
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.435009717941284,
      "learning_rate": 1.1248662003112474e-06,
      "loss": 2.927,
      "step": 224050
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2450077533721924,
      "learning_rate": 1.1245123282944912e-06,
      "loss": 2.7974,
      "step": 224051
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0067949295043945,
      "learning_rate": 1.1241585118443196e-06,
      "loss": 2.8656,
      "step": 224052
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.701422929763794,
      "learning_rate": 1.1238047509608327e-06,
      "loss": 3.0664,
      "step": 224053
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1135053634643555,
      "learning_rate": 1.1234510456440304e-06,
      "loss": 3.0846,
      "step": 224054
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.652900218963623,
      "learning_rate": 1.1230973958940459e-06,
      "loss": 2.8487,
      "step": 224055
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1429355144500732,
      "learning_rate": 1.1227438017109125e-06,
      "loss": 2.8998,
      "step": 224056
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.530127763748169,
      "learning_rate": 1.1223902630946968e-06,
      "loss": 3.0649,
      "step": 224057
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1330573558807373,
      "learning_rate": 1.1220367800454656e-06,
      "loss": 2.8973,
      "step": 224058
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3402099609375,
      "learning_rate": 1.1216833525633184e-06,
      "loss": 2.9526,
      "step": 224059
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1357696056365967,
      "learning_rate": 1.1213299806482556e-06,
      "loss": 2.9555,
      "step": 224060
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8273489475250244,
      "learning_rate": 1.1209766643004104e-06,
      "loss": 2.9131,
      "step": 224061
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2748193740844727,
      "learning_rate": 1.1206234035197826e-06,
      "loss": 2.5906,
      "step": 224062
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0175118446350098,
      "learning_rate": 1.1202701983065054e-06,
      "loss": 2.9575,
      "step": 224063
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3600881099700928,
      "learning_rate": 1.1199170486606123e-06,
      "loss": 3.0442,
      "step": 224064
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.4881560802459717,
      "learning_rate": 1.1195639545821367e-06,
      "loss": 2.8785,
      "step": 224065
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0967419147491455,
      "learning_rate": 1.1192109160712116e-06,
      "loss": 3.1177,
      "step": 224066
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.5835371017456055,
      "learning_rate": 1.1188579331278702e-06,
      "loss": 3.0354,
      "step": 224067
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.559168815612793,
      "learning_rate": 1.118505005752146e-06,
      "loss": 3.1179,
      "step": 224068
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.83872127532959,
      "learning_rate": 1.118152133944139e-06,
      "loss": 3.0002,
      "step": 224069
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1225838661193848,
      "learning_rate": 1.1177993177039158e-06,
      "loss": 2.9426,
      "step": 224070
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.178673505783081,
      "learning_rate": 1.1174465570315428e-06,
      "loss": 2.8183,
      "step": 224071
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.665446758270264,
      "learning_rate": 1.1170938519270533e-06,
      "loss": 2.8311,
      "step": 224072
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.5470545291900635,
      "learning_rate": 1.1167412023905809e-06,
      "loss": 2.8736,
      "step": 224073
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.6907753944396973,
      "learning_rate": 1.116388608422092e-06,
      "loss": 2.9391,
      "step": 224074
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2291078567504883,
      "learning_rate": 1.116036070021753e-06,
      "loss": 3.0625,
      "step": 224075
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.507878303527832,
      "learning_rate": 1.115683587189564e-06,
      "loss": 2.9251,
      "step": 224076
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.687304973602295,
      "learning_rate": 1.1153311599256253e-06,
      "loss": 3.0166,
      "step": 224077
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7166409492492676,
      "learning_rate": 1.11497878822997e-06,
      "loss": 3.1145,
      "step": 224078
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.4176185131073,
      "learning_rate": 1.1146264721026976e-06,
      "loss": 2.7168,
      "step": 224079
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3526196479797363,
      "learning_rate": 1.114274211543842e-06,
      "loss": 3.0956,
      "step": 224080
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.5448217391967773,
      "learning_rate": 1.1139220065535026e-06,
      "loss": 2.8964,
      "step": 224081
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.752688407897949,
      "learning_rate": 1.113569857131713e-06,
      "loss": 3.1543,
      "step": 224082
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1211657524108887,
      "learning_rate": 1.1132177632785732e-06,
      "loss": 3.0852,
      "step": 224083
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.841949939727783,
      "learning_rate": 1.1128657249941163e-06,
      "loss": 2.9969,
      "step": 224084
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8325700759887695,
      "learning_rate": 1.1125137422784092e-06,
      "loss": 2.8644,
      "step": 224085
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8643314838409424,
      "learning_rate": 1.1121618151315514e-06,
      "loss": 2.9377,
      "step": 224086
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.820690870285034,
      "learning_rate": 1.1118099435535765e-06,
      "loss": 2.8609,
      "step": 224087
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3773956298828125,
      "learning_rate": 1.1114581275445178e-06,
      "loss": 2.8182,
      "step": 224088
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.943349838256836,
      "learning_rate": 1.1111063671045416e-06,
      "loss": 2.9129,
      "step": 224089
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.637633800506592,
      "learning_rate": 1.110754662233615e-06,
      "loss": 3.1559,
      "step": 224090
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7617568969726562,
      "learning_rate": 1.1104030129318708e-06,
      "loss": 3.0894,
      "step": 224091
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.485671281814575,
      "learning_rate": 1.1100514191993092e-06,
      "loss": 2.9302,
      "step": 224092
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.946568489074707,
      "learning_rate": 1.1096998810360303e-06,
      "loss": 3.0164,
      "step": 224093
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0478568077087402,
      "learning_rate": 1.1093483984421335e-06,
      "loss": 2.91,
      "step": 224094
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7308502197265625,
      "learning_rate": 1.1089969714176194e-06,
      "loss": 2.7915,
      "step": 224095
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.320956230163574,
      "learning_rate": 1.108645599962621e-06,
      "loss": 2.892,
      "step": 224096
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1304333209991455,
      "learning_rate": 1.108294284077138e-06,
      "loss": 2.8438,
      "step": 224097
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.767448663711548,
      "learning_rate": 1.1079430237612708e-06,
      "loss": 2.9526,
      "step": 224098
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.7335939407348633,
      "learning_rate": 1.1075918190150857e-06,
      "loss": 2.9765,
      "step": 224099
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.092114210128784,
      "learning_rate": 1.107240669838616e-06,
      "loss": 2.8564,
      "step": 224100
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.78006911277771,
      "learning_rate": 1.1068895762319952e-06,
      "loss": 3.1621,
      "step": 224101
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.855945348739624,
      "learning_rate": 1.106538538195223e-06,
      "loss": 3.1048,
      "step": 224102
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9844114780426025,
      "learning_rate": 1.1061875557283994e-06,
      "loss": 2.9972,
      "step": 224103
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1921050548553467,
      "learning_rate": 1.1058366288315578e-06,
      "loss": 2.8303,
      "step": 224104
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7119064331054688,
      "learning_rate": 1.1054857575047981e-06,
      "loss": 2.7716,
      "step": 224105
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.933370351791382,
      "learning_rate": 1.1051349417481537e-06,
      "loss": 2.9775,
      "step": 224106
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1591477394104004,
      "learning_rate": 1.1047841815617242e-06,
      "loss": 2.7138,
      "step": 224107
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.901090383529663,
      "learning_rate": 1.1044334769455432e-06,
      "loss": 3.0466,
      "step": 224108
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6663198471069336,
      "learning_rate": 1.1040828278997106e-06,
      "loss": 2.9538,
      "step": 224109
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.5841352939605713,
      "learning_rate": 1.1037322344242595e-06,
      "loss": 3.0489,
      "step": 224110
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1081345081329346,
      "learning_rate": 1.1033816965192566e-06,
      "loss": 2.8522,
      "step": 224111
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.1107330322265625,
      "learning_rate": 1.1030312141847686e-06,
      "loss": 2.8951,
      "step": 224112
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.066680908203125,
      "learning_rate": 1.1026807874208955e-06,
      "loss": 3.0882,
      "step": 224113
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.113863468170166,
      "learning_rate": 1.1023304162276702e-06,
      "loss": 2.7567,
      "step": 224114
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9175381660461426,
      "learning_rate": 1.1019801006051597e-06,
      "loss": 2.9557,
      "step": 224115
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.325409412384033,
      "learning_rate": 1.1016298405534308e-06,
      "loss": 2.8124,
      "step": 224116
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.150003433227539,
      "learning_rate": 1.1012796360725496e-06,
      "loss": 3.0489,
      "step": 224117
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1649200916290283,
      "learning_rate": 1.100929487162583e-06,
      "loss": 2.8317,
      "step": 224118
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.885488986968994,
      "learning_rate": 1.1005793938236307e-06,
      "loss": 2.8268,
      "step": 224119
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2088029384613037,
      "learning_rate": 1.1002293560556929e-06,
      "loss": 3.0475,
      "step": 224120
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.752384901046753,
      "learning_rate": 1.0998793738588362e-06,
      "loss": 2.9289,
      "step": 224121
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7912333011627197,
      "learning_rate": 1.0995294472331939e-06,
      "loss": 2.8669,
      "step": 224122
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3082118034362793,
      "learning_rate": 1.099179576178766e-06,
      "loss": 2.9527,
      "step": 224123
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2294697761535645,
      "learning_rate": 1.0988297606956852e-06,
      "loss": 2.952,
      "step": 224124
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.975658416748047,
      "learning_rate": 1.0984800007839189e-06,
      "loss": 2.8673,
      "step": 224125
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6968955993652344,
      "learning_rate": 1.0981302964436334e-06,
      "loss": 2.8709,
      "step": 224126
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1305863857269287,
      "learning_rate": 1.0977806476748285e-06,
      "loss": 2.9707,
      "step": 224127
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.474709987640381,
      "learning_rate": 1.097431054477571e-06,
      "loss": 2.8153,
      "step": 224128
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.370431661605835,
      "learning_rate": 1.097081516851961e-06,
      "loss": 3.0166,
      "step": 224129
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.640460729598999,
      "learning_rate": 1.0967320347980647e-06,
      "loss": 2.9107,
      "step": 224130
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.361335277557373,
      "learning_rate": 1.096382608315882e-06,
      "loss": 2.9067,
      "step": 224131
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.811715841293335,
      "learning_rate": 1.096033237405547e-06,
      "loss": 3.052,
      "step": 224132
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.755364179611206,
      "learning_rate": 1.0956839220671253e-06,
      "loss": 2.8263,
      "step": 224133
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.187880754470825,
      "learning_rate": 1.0953346623006508e-06,
      "loss": 3.0387,
      "step": 224134
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.746220588684082,
      "learning_rate": 1.0949854581061568e-06,
      "loss": 2.7115,
      "step": 224135
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6009929180145264,
      "learning_rate": 1.0946363094837763e-06,
      "loss": 2.9245,
      "step": 224136
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0660760402679443,
      "learning_rate": 1.0942872164335426e-06,
      "loss": 3.0216,
      "step": 224137
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.931952714920044,
      "learning_rate": 1.0939381789555224e-06,
      "loss": 2.8892,
      "step": 224138
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.5426836013793945,
      "learning_rate": 1.0935891970497823e-06,
      "loss": 2.8913,
      "step": 224139
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8733720779418945,
      "learning_rate": 1.093240270716389e-06,
      "loss": 3.0353,
      "step": 224140
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8422343730926514,
      "learning_rate": 1.0928913999554089e-06,
      "loss": 2.8059,
      "step": 224141
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.526622772216797,
      "learning_rate": 1.092542584766909e-06,
      "loss": 2.9492,
      "step": 224142
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7434723377227783,
      "learning_rate": 1.092193825150922e-06,
      "loss": 2.839,
      "step": 224143
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.275628089904785,
      "learning_rate": 1.0918451211075486e-06,
      "loss": 2.7794,
      "step": 224144
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6862218379974365,
      "learning_rate": 1.0914964726368546e-06,
      "loss": 2.7608,
      "step": 224145
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.137824058532715,
      "learning_rate": 1.091147879738874e-06,
      "loss": 2.9088,
      "step": 224146
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8150317668914795,
      "learning_rate": 1.0907993424137063e-06,
      "loss": 2.9706,
      "step": 224147
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.183251142501831,
      "learning_rate": 1.090450860661385e-06,
      "loss": 2.7961,
      "step": 224148
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.312868595123291,
      "learning_rate": 1.0901024344820097e-06,
      "loss": 3.0065,
      "step": 224149
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.048038959503174,
      "learning_rate": 1.0897540638756142e-06,
      "loss": 3.0164,
      "step": 224150
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.424437999725342,
      "learning_rate": 1.089405748842298e-06,
      "loss": 2.8185,
      "step": 224151
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7857871055603027,
      "learning_rate": 1.0890574893820948e-06,
      "loss": 2.8244,
      "step": 224152
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2811920642852783,
      "learning_rate": 1.0887092854950708e-06,
      "loss": 2.7562,
      "step": 224153
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7899701595306396,
      "learning_rate": 1.0883611371812928e-06,
      "loss": 2.8426,
      "step": 224154
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0936782360076904,
      "learning_rate": 1.0880130444408274e-06,
      "loss": 3.0728,
      "step": 224155
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.919243335723877,
      "learning_rate": 1.0876650072737413e-06,
      "loss": 2.7561,
      "step": 224156
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.374405860900879,
      "learning_rate": 1.0873170256801344e-06,
      "loss": 2.9864,
      "step": 224157
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6685421466827393,
      "learning_rate": 1.0869690996600068e-06,
      "loss": 2.8715,
      "step": 224158
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9968042373657227,
      "learning_rate": 1.086621229213458e-06,
      "loss": 2.949,
      "step": 224159
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8713245391845703,
      "learning_rate": 1.0862734143405549e-06,
      "loss": 2.7994,
      "step": 224160
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.106226682662964,
      "learning_rate": 1.0859256550413308e-06,
      "loss": 2.9949,
      "step": 224161
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2726080417633057,
      "learning_rate": 1.0855779513159191e-06,
      "loss": 2.9843,
      "step": 224162
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.753657817840576,
      "learning_rate": 1.0852303031643194e-06,
      "loss": 3.1453,
      "step": 224163
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1392226219177246,
      "learning_rate": 1.084882710586632e-06,
      "loss": 3.076,
      "step": 224164
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.102871894836426,
      "learning_rate": 1.08453517358289e-06,
      "loss": 2.8785,
      "step": 224165
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.196906328201294,
      "learning_rate": 1.08418769215316e-06,
      "loss": 3.0962,
      "step": 224166
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.200289249420166,
      "learning_rate": 1.083840266297542e-06,
      "loss": 2.9487,
      "step": 224167
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.788975954055786,
      "learning_rate": 1.0834928960161027e-06,
      "loss": 2.9349,
      "step": 224168
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.114760160446167,
      "learning_rate": 1.0831455813088419e-06,
      "loss": 2.9576,
      "step": 224169
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.5700974464416504,
      "learning_rate": 1.0827983221758928e-06,
      "loss": 2.8625,
      "step": 224170
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.7169857025146484,
      "learning_rate": 1.082451118617289e-06,
      "loss": 2.893,
      "step": 224171
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.611320972442627,
      "learning_rate": 1.082103970633097e-06,
      "loss": 3.1238,
      "step": 224172
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0494561195373535,
      "learning_rate": 1.0817568782233832e-06,
      "loss": 2.9947,
      "step": 224173
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.6536026000976562,
      "learning_rate": 1.0814098413882144e-06,
      "loss": 2.9916,
      "step": 224174
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.325129270553589,
      "learning_rate": 1.0810628601276573e-06,
      "loss": 2.8853,
      "step": 224175
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1598992347717285,
      "learning_rate": 1.0807159344417782e-06,
      "loss": 3.0916,
      "step": 224176
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0151610374450684,
      "learning_rate": 1.0803690643306106e-06,
      "loss": 3.0285,
      "step": 224177
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.526402711868286,
      "learning_rate": 1.080022249794288e-06,
      "loss": 2.7977,
      "step": 224178
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0870954990386963,
      "learning_rate": 1.0796754908328098e-06,
      "loss": 2.8152,
      "step": 224179
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.494776725769043,
      "learning_rate": 1.0793287874462431e-06,
      "loss": 2.9561,
      "step": 224180
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8391594886779785,
      "learning_rate": 1.0789821396346876e-06,
      "loss": 2.973,
      "step": 224181
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.791903495788574,
      "learning_rate": 1.07863554739821e-06,
      "loss": 3.2312,
      "step": 224182
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.600297451019287,
      "learning_rate": 1.0782890107368436e-06,
      "loss": 2.8106,
      "step": 224183
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.638592004776001,
      "learning_rate": 1.077942529650655e-06,
      "loss": 2.7898,
      "step": 224184
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.427430152893066,
      "learning_rate": 1.0775961041397108e-06,
      "loss": 2.9223,
      "step": 224185
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7535722255706787,
      "learning_rate": 1.0772497342041108e-06,
      "loss": 2.7616,
      "step": 224186
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9918224811553955,
      "learning_rate": 1.0769034198438886e-06,
      "loss": 2.6872,
      "step": 224187
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.35371732711792,
      "learning_rate": 1.0765571610591105e-06,
      "loss": 2.7486,
      "step": 224188
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.5249953269958496,
      "learning_rate": 1.0762109578498434e-06,
      "loss": 3.0552,
      "step": 224189
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8517813682556152,
      "learning_rate": 1.0758648102161537e-06,
      "loss": 2.9014,
      "step": 224190
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8466427326202393,
      "learning_rate": 1.0755187181580748e-06,
      "loss": 2.9142,
      "step": 224191
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8959035873413086,
      "learning_rate": 1.07517268167574e-06,
      "loss": 2.8675,
      "step": 224192
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0231995582580566,
      "learning_rate": 1.074826700769149e-06,
      "loss": 2.8697,
      "step": 224193
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3473408222198486,
      "learning_rate": 1.074480775438402e-06,
      "loss": 2.9164,
      "step": 224194
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8023264408111572,
      "learning_rate": 1.0741349056835324e-06,
      "loss": 3.0228,
      "step": 224195
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3106603622436523,
      "learning_rate": 1.07378909150464e-06,
      "loss": 2.7273,
      "step": 224196
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0333991050720215,
      "learning_rate": 1.0734433329017579e-06,
      "loss": 2.9822,
      "step": 224197
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7725186347961426,
      "learning_rate": 1.073097629874986e-06,
      "loss": 2.9351,
      "step": 224198
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8547632694244385,
      "learning_rate": 1.072751982424358e-06,
      "loss": 2.6849,
      "step": 224199
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0264294147491455,
      "learning_rate": 1.0724063905499404e-06,
      "loss": 2.9783,
      "step": 224200
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8478269577026367,
      "learning_rate": 1.0720608542518328e-06,
      "loss": 2.968,
      "step": 224201
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7625060081481934,
      "learning_rate": 1.0717153735300355e-06,
      "loss": 2.8805,
      "step": 224202
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9152257442474365,
      "learning_rate": 1.0713699483846816e-06,
      "loss": 3.1695,
      "step": 224203
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2069437503814697,
      "learning_rate": 1.0710245788157713e-06,
      "loss": 3.2624,
      "step": 224204
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.822594404220581,
      "learning_rate": 1.0706792648234375e-06,
      "loss": 2.8671,
      "step": 224205
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.068300485610962,
      "learning_rate": 1.0703340064076804e-06,
      "loss": 2.7888,
      "step": 224206
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0417985916137695,
      "learning_rate": 1.0699888035686e-06,
      "loss": 3.1841,
      "step": 224207
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.982728958129883,
      "learning_rate": 1.0696436563062294e-06,
      "loss": 2.8781,
      "step": 224208
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1136951446533203,
      "learning_rate": 1.0692985646207019e-06,
      "loss": 2.9162,
      "step": 224209
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7026546001434326,
      "learning_rate": 1.0689535285119844e-06,
      "loss": 3.0799,
      "step": 224210
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.897246837615967,
      "learning_rate": 1.0686085479802098e-06,
      "loss": 3.1795,
      "step": 224211
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.705919027328491,
      "learning_rate": 1.068263623025445e-06,
      "loss": 2.9102,
      "step": 224212
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9795401096343994,
      "learning_rate": 1.067918753647723e-06,
      "loss": 2.8144,
      "step": 224213
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.880815029144287,
      "learning_rate": 1.0675739398470774e-06,
      "loss": 2.895,
      "step": 224214
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.362425804138184,
      "learning_rate": 1.0672291816236744e-06,
      "loss": 3.0483,
      "step": 224215
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.631507396697998,
      "learning_rate": 1.066884478977481e-06,
      "loss": 2.8921,
      "step": 224216
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.794862747192383,
      "learning_rate": 1.0665398319085972e-06,
      "loss": 2.8777,
      "step": 224217
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.231572389602661,
      "learning_rate": 1.0661952404170893e-06,
      "loss": 2.9826,
      "step": 224218
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.93170166015625,
      "learning_rate": 1.065850704503024e-06,
      "loss": 2.9007,
      "step": 224219
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8155012130737305,
      "learning_rate": 1.0655062241664347e-06,
      "loss": 2.7423,
      "step": 224220
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.722607374191284,
      "learning_rate": 1.0651617994074546e-06,
      "loss": 2.9188,
      "step": 224221
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0564725399017334,
      "learning_rate": 1.0648174302260838e-06,
      "loss": 2.9347,
      "step": 224222
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2804322242736816,
      "learning_rate": 1.0644731166223886e-06,
      "loss": 3.1432,
      "step": 224223
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9549784660339355,
      "learning_rate": 1.0641288585964691e-06,
      "loss": 2.868,
      "step": 224224
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9282045364379883,
      "learning_rate": 1.063784656148392e-06,
      "loss": 2.8524,
      "step": 224225
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.576087713241577,
      "learning_rate": 1.0634405092781573e-06,
      "loss": 2.7038,
      "step": 224226
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0173425674438477,
      "learning_rate": 1.0630964179858981e-06,
      "loss": 2.9022,
      "step": 224227
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.891273260116577,
      "learning_rate": 1.0627523822716478e-06,
      "loss": 2.842,
      "step": 224228
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9060614109039307,
      "learning_rate": 1.062408402135473e-06,
      "loss": 2.8255,
      "step": 224229
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.681273937225342,
      "learning_rate": 1.06206447757744e-06,
      "loss": 2.8487,
      "step": 224230
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.558720588684082,
      "learning_rate": 1.061720608597616e-06,
      "loss": 2.8775,
      "step": 224231
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7991538047790527,
      "learning_rate": 1.0613767951960338e-06,
      "loss": 2.7911,
      "step": 224232
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9752559661865234,
      "learning_rate": 1.0610330373727938e-06,
      "loss": 2.9207,
      "step": 224233
    },
    {
      "epoch": 2.92,
      "grad_norm": 6.571829319000244,
      "learning_rate": 1.0606893351279621e-06,
      "loss": 2.7406,
      "step": 224234
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9765591621398926,
      "learning_rate": 1.0603456884616058e-06,
      "loss": 2.9841,
      "step": 224235
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.5192670822143555,
      "learning_rate": 1.0600020973737578e-06,
      "loss": 3.0076,
      "step": 224236
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.45404314994812,
      "learning_rate": 1.0596585618645182e-06,
      "loss": 3.1137,
      "step": 224237
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7444849014282227,
      "learning_rate": 1.059315081933887e-06,
      "loss": 3.1019,
      "step": 224238
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.943767786026001,
      "learning_rate": 1.0589716575819973e-06,
      "loss": 3.219,
      "step": 224239
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6495628356933594,
      "learning_rate": 1.0586282888088826e-06,
      "loss": 3.0977,
      "step": 224240
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1456637382507324,
      "learning_rate": 1.0582849756146428e-06,
      "loss": 2.9763,
      "step": 224241
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7139103412628174,
      "learning_rate": 1.0579417179992777e-06,
      "loss": 3.1849,
      "step": 224242
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8132588863372803,
      "learning_rate": 1.0575985159628874e-06,
      "loss": 2.9786,
      "step": 224243
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.5462658405303955,
      "learning_rate": 1.0572553695055385e-06,
      "loss": 2.8768,
      "step": 224244
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9860565662384033,
      "learning_rate": 1.0569122786272976e-06,
      "loss": 2.8686,
      "step": 224245
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.966296434402466,
      "learning_rate": 1.056569243328198e-06,
      "loss": 2.9462,
      "step": 224246
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1429622173309326,
      "learning_rate": 1.0562262636083397e-06,
      "loss": 2.9872,
      "step": 224247
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8741798400878906,
      "learning_rate": 1.0558833394677891e-06,
      "loss": 3.0229,
      "step": 224248
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.201517105102539,
      "learning_rate": 1.0555404709065795e-06,
      "loss": 2.9314,
      "step": 224249
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0680720806121826,
      "learning_rate": 1.0551976579247778e-06,
      "loss": 2.9139,
      "step": 224250
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8827064037323,
      "learning_rate": 1.0548549005224505e-06,
      "loss": 2.8015,
      "step": 224251
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3376638889312744,
      "learning_rate": 1.054512198699664e-06,
      "loss": 2.9345,
      "step": 224252
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0064470767974854,
      "learning_rate": 1.0541695524565187e-06,
      "loss": 2.9993,
      "step": 224253
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0863680839538574,
      "learning_rate": 1.0538269617930474e-06,
      "loss": 3.0926,
      "step": 224254
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.5950145721435547,
      "learning_rate": 1.0534844267092834e-06,
      "loss": 3.007,
      "step": 224255
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2408533096313477,
      "learning_rate": 1.053141947205327e-06,
      "loss": 2.9756,
      "step": 224256
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7327470779418945,
      "learning_rate": 1.0527995232812448e-06,
      "loss": 2.8231,
      "step": 224257
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0148370265960693,
      "learning_rate": 1.0524571549370697e-06,
      "loss": 3.0074,
      "step": 224258
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7044200897216797,
      "learning_rate": 1.052114842172902e-06,
      "loss": 3.1085,
      "step": 224259
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.4699950218200684,
      "learning_rate": 1.0517725849888082e-06,
      "loss": 3.211,
      "step": 224260
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.38300895690918,
      "learning_rate": 1.0514303833848215e-06,
      "loss": 3.1151,
      "step": 224261
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.92073655128479,
      "learning_rate": 1.0510882373609754e-06,
      "loss": 2.9183,
      "step": 224262
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6982367038726807,
      "learning_rate": 1.050746146917436e-06,
      "loss": 2.8163,
      "step": 224263
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8892974853515625,
      "learning_rate": 1.0504041120541705e-06,
      "loss": 3.0376,
      "step": 224264
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.085519313812256,
      "learning_rate": 1.0500621327712787e-06,
      "loss": 2.8425,
      "step": 224265
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3741846084594727,
      "learning_rate": 1.0497202090688272e-06,
      "loss": 2.9534,
      "step": 224266
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.741438627243042,
      "learning_rate": 1.0493783409468825e-06,
      "loss": 3.0014,
      "step": 224267
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.832289934158325,
      "learning_rate": 1.0490365284055113e-06,
      "loss": 2.7991,
      "step": 224268
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.013535261154175,
      "learning_rate": 1.048694771444747e-06,
      "loss": 2.6662,
      "step": 224269
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.237787961959839,
      "learning_rate": 1.0483530700646892e-06,
      "loss": 2.9598,
      "step": 224270
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9989373683929443,
      "learning_rate": 1.0480114242653715e-06,
      "loss": 2.7606,
      "step": 224271
    },
    {
      "epoch": 2.92,
      "grad_norm": 5.700235843658447,
      "learning_rate": 1.0476698340468604e-06,
      "loss": 2.8746,
      "step": 224272
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0297889709472656,
      "learning_rate": 1.0473282994092558e-06,
      "loss": 2.8912,
      "step": 224273
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8286094665527344,
      "learning_rate": 1.0469868203525912e-06,
      "loss": 2.7096,
      "step": 224274
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.7373554706573486,
      "learning_rate": 1.0466453968769328e-06,
      "loss": 2.8281,
      "step": 224275
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2423646450042725,
      "learning_rate": 1.0463040289823476e-06,
      "loss": 2.7775,
      "step": 224276
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.581876039505005,
      "learning_rate": 1.0459627166689022e-06,
      "loss": 3.0427,
      "step": 224277
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1650588512420654,
      "learning_rate": 1.0456214599366298e-06,
      "loss": 2.7221,
      "step": 224278
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.174623727798462,
      "learning_rate": 1.0452802587856302e-06,
      "loss": 2.7406,
      "step": 224279
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9676122665405273,
      "learning_rate": 1.0449391132159702e-06,
      "loss": 3.0721,
      "step": 224280
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0717387199401855,
      "learning_rate": 1.0445980232277162e-06,
      "loss": 2.9623,
      "step": 224281
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.862741470336914,
      "learning_rate": 1.0442569888208685e-06,
      "loss": 3.0803,
      "step": 224282
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.527146339416504,
      "learning_rate": 1.0439160099955602e-06,
      "loss": 3.0277,
      "step": 224283
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.084101676940918,
      "learning_rate": 1.0435750867518244e-06,
      "loss": 2.7114,
      "step": 224284
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.5571494102478027,
      "learning_rate": 1.0432342190897613e-06,
      "loss": 3.1351,
      "step": 224285
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8248119354248047,
      "learning_rate": 1.0428934070093708e-06,
      "loss": 2.9333,
      "step": 224286
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.062619209289551,
      "learning_rate": 1.0425526505107862e-06,
      "loss": 2.9161,
      "step": 224287
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.339470386505127,
      "learning_rate": 1.042211949593974e-06,
      "loss": 2.865,
      "step": 224288
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.663119077682495,
      "learning_rate": 1.0418713042591008e-06,
      "loss": 3.0105,
      "step": 224289
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3042683601379395,
      "learning_rate": 1.041530714506167e-06,
      "loss": 2.6436,
      "step": 224290
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.4181630611419678,
      "learning_rate": 1.0411901803352718e-06,
      "loss": 2.8104,
      "step": 224291
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.251166582107544,
      "learning_rate": 1.0408497017464823e-06,
      "loss": 3.0634,
      "step": 224292
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.030630111694336,
      "learning_rate": 1.0405092787397984e-06,
      "loss": 3.1854,
      "step": 224293
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.833068370819092,
      "learning_rate": 1.0401689113153533e-06,
      "loss": 3.0964,
      "step": 224294
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.5617122650146484,
      "learning_rate": 1.0398285994731804e-06,
      "loss": 3.0714,
      "step": 224295
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8721323013305664,
      "learning_rate": 1.0394883432133127e-06,
      "loss": 2.9923,
      "step": 224296
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1591131687164307,
      "learning_rate": 1.039148142535884e-06,
      "loss": 2.8232,
      "step": 224297
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.192728281021118,
      "learning_rate": 1.0388079974409268e-06,
      "loss": 2.9495,
      "step": 224298
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9680333137512207,
      "learning_rate": 1.0384679079284752e-06,
      "loss": 3.0015,
      "step": 224299
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.338765859603882,
      "learning_rate": 1.0381278739986288e-06,
      "loss": 2.8694,
      "step": 224300
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7897348403930664,
      "learning_rate": 1.037787895651454e-06,
      "loss": 2.9345,
      "step": 224301
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.8250114917755127,
      "learning_rate": 1.0374479728869845e-06,
      "loss": 2.9416,
      "step": 224302
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1210620403289795,
      "learning_rate": 1.0371081057052865e-06,
      "loss": 2.7453,
      "step": 224303
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9804294109344482,
      "learning_rate": 1.036768294106427e-06,
      "loss": 2.7441,
      "step": 224304
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6742141246795654,
      "learning_rate": 1.0364285380905058e-06,
      "loss": 3.1657,
      "step": 224305
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.6355624198913574,
      "learning_rate": 1.0360888376575226e-06,
      "loss": 3.0833,
      "step": 224306
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.8452036380767822,
      "learning_rate": 1.0357491928075778e-06,
      "loss": 2.8463,
      "step": 224307
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.817253589630127,
      "learning_rate": 1.0354096035407379e-06,
      "loss": 2.8138,
      "step": 224308
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0987913608551025,
      "learning_rate": 1.035070069857069e-06,
      "loss": 2.9083,
      "step": 224309
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.4022867679595947,
      "learning_rate": 1.0347305917566384e-06,
      "loss": 2.9979,
      "step": 224310
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8805348873138428,
      "learning_rate": 1.0343911692394457e-06,
      "loss": 2.695,
      "step": 224311
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3123109340667725,
      "learning_rate": 1.0340518023056576e-06,
      "loss": 2.9573,
      "step": 224312
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.867093086242676,
      "learning_rate": 1.0337124909552407e-06,
      "loss": 2.8078,
      "step": 224313
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3945157527923584,
      "learning_rate": 1.0333732351883284e-06,
      "loss": 2.8555,
      "step": 224314
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6799817085266113,
      "learning_rate": 1.0330340350049537e-06,
      "loss": 3.1978,
      "step": 224315
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.5100483894348145,
      "learning_rate": 1.0326948904051502e-06,
      "loss": 2.793,
      "step": 224316
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.210503339767456,
      "learning_rate": 1.0323558013890508e-06,
      "loss": 2.8874,
      "step": 224317
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9836792945861816,
      "learning_rate": 1.032016767956656e-06,
      "loss": 2.8956,
      "step": 224318
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3486242294311523,
      "learning_rate": 1.031677790108032e-06,
      "loss": 2.9511,
      "step": 224319
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9951822757720947,
      "learning_rate": 1.0313388678433122e-06,
      "loss": 2.7647,
      "step": 224320
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8090460300445557,
      "learning_rate": 1.0310000011624963e-06,
      "loss": 2.9181,
      "step": 224321
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.464995384216309,
      "learning_rate": 1.0306611900656514e-06,
      "loss": 2.8598,
      "step": 224322
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.097447872161865,
      "learning_rate": 1.0303224345528438e-06,
      "loss": 2.8208,
      "step": 224323
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.328280210494995,
      "learning_rate": 1.0299837346241402e-06,
      "loss": 2.8896,
      "step": 224324
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.867492437362671,
      "learning_rate": 1.0296450902796072e-06,
      "loss": 3.0893,
      "step": 224325
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.248091220855713,
      "learning_rate": 1.0293065015193115e-06,
      "loss": 3.0269,
      "step": 224326
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6697065830230713,
      "learning_rate": 1.0289679683433195e-06,
      "loss": 3.1189,
      "step": 224327
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.103808879852295,
      "learning_rate": 1.028629490751698e-06,
      "loss": 2.85,
      "step": 224328
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2653980255126953,
      "learning_rate": 1.02829106874448e-06,
      "loss": 2.9209,
      "step": 224329
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.192436456680298,
      "learning_rate": 1.0279527023217327e-06,
      "loss": 2.9025,
      "step": 224330
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.977731943130493,
      "learning_rate": 1.0276143914835556e-06,
      "loss": 2.9838,
      "step": 224331
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.986764430999756,
      "learning_rate": 1.0272761362299819e-06,
      "loss": 3.1405,
      "step": 224332
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.494123697280884,
      "learning_rate": 1.0269379365610787e-06,
      "loss": 2.8175,
      "step": 224333
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2490899562835693,
      "learning_rate": 1.026599792476912e-06,
      "loss": 3.1816,
      "step": 224334
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.507709503173828,
      "learning_rate": 1.026261703977549e-06,
      "loss": 3.0952,
      "step": 224335
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3489019870758057,
      "learning_rate": 1.0259236710630226e-06,
      "loss": 2.7335,
      "step": 224336
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.798309087753296,
      "learning_rate": 1.0255856937334328e-06,
      "loss": 3.0617,
      "step": 224337
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.594444751739502,
      "learning_rate": 1.0252477719888463e-06,
      "loss": 2.5674,
      "step": 224338
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6707751750946045,
      "learning_rate": 1.0249099058292965e-06,
      "loss": 3.1095,
      "step": 224339
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6336028575897217,
      "learning_rate": 1.024572095254883e-06,
      "loss": 2.736,
      "step": 224340
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.352376699447632,
      "learning_rate": 1.0242343402656062e-06,
      "loss": 2.9421,
      "step": 224341
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3549108505249023,
      "learning_rate": 1.0238966408615989e-06,
      "loss": 2.8702,
      "step": 224342
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8813564777374268,
      "learning_rate": 1.0235589970428614e-06,
      "loss": 2.9223,
      "step": 224343
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9748475551605225,
      "learning_rate": 1.023221408809527e-06,
      "loss": 2.8372,
      "step": 224344
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.321218967437744,
      "learning_rate": 1.0228838761615954e-06,
      "loss": 2.965,
      "step": 224345
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6131591796875,
      "learning_rate": 1.0225463990991666e-06,
      "loss": 3.1879,
      "step": 224346
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1286792755126953,
      "learning_rate": 1.0222089776222742e-06,
      "loss": 3.1221,
      "step": 224347
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.715005397796631,
      "learning_rate": 1.0218716117310178e-06,
      "loss": 2.9488,
      "step": 224348
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9743285179138184,
      "learning_rate": 1.0215343014253974e-06,
      "loss": 3.1506,
      "step": 224349
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.376337766647339,
      "learning_rate": 1.0211970467055464e-06,
      "loss": 2.772,
      "step": 224350
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3244781494140625,
      "learning_rate": 1.020859847571498e-06,
      "loss": 3.155,
      "step": 224351
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3217551708221436,
      "learning_rate": 1.0205227040233188e-06,
      "loss": 2.8824,
      "step": 224352
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.125509262084961,
      "learning_rate": 1.0201856160610754e-06,
      "loss": 2.6901,
      "step": 224353
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.727187156677246,
      "learning_rate": 1.0198485836848015e-06,
      "loss": 2.8884,
      "step": 224354
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.697287082672119,
      "learning_rate": 1.0195116068945963e-06,
      "loss": 2.9611,
      "step": 224355
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.030749797821045,
      "learning_rate": 1.0191746856904937e-06,
      "loss": 3.0113,
      "step": 224356
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.817558765411377,
      "learning_rate": 1.0188378200725933e-06,
      "loss": 3.2115,
      "step": 224357
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7838165760040283,
      "learning_rate": 1.0185010100409286e-06,
      "loss": 2.8706,
      "step": 224358
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2203705310821533,
      "learning_rate": 1.0181642555955661e-06,
      "loss": 3.0694,
      "step": 224359
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0104334354400635,
      "learning_rate": 1.017827556736539e-06,
      "loss": 2.7725,
      "step": 224360
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.116941213607788,
      "learning_rate": 1.017490913463981e-06,
      "loss": 2.8235,
      "step": 224361
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.4397497177124023,
      "learning_rate": 1.0171543257778913e-06,
      "loss": 2.7364,
      "step": 224362
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.690157413482666,
      "learning_rate": 1.0168177936783705e-06,
      "loss": 3.0155,
      "step": 224363
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.548098087310791,
      "learning_rate": 1.0164813171654518e-06,
      "loss": 2.8965,
      "step": 224364
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.508039712905884,
      "learning_rate": 1.0161448962392348e-06,
      "loss": 3.0251,
      "step": 224365
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.720069169998169,
      "learning_rate": 1.0158085308997532e-06,
      "loss": 2.841,
      "step": 224366
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2693045139312744,
      "learning_rate": 1.0154722211470733e-06,
      "loss": 2.6472,
      "step": 224367
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.215818405151367,
      "learning_rate": 1.0151359669812619e-06,
      "loss": 3.2961,
      "step": 224368
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.757526397705078,
      "learning_rate": 1.0147997684023856e-06,
      "loss": 2.5681,
      "step": 224369
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.6751437187194824,
      "learning_rate": 1.0144636254104777e-06,
      "loss": 2.7459,
      "step": 224370
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1768057346343994,
      "learning_rate": 1.0141275380056712e-06,
      "loss": 2.9429,
      "step": 224371
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7833144664764404,
      "learning_rate": 1.0137915061879331e-06,
      "loss": 2.7679,
      "step": 224372
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.647221803665161,
      "learning_rate": 1.0134555299573965e-06,
      "loss": 3.216,
      "step": 224373
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0143685340881348,
      "learning_rate": 1.013119609314128e-06,
      "loss": 3.1001,
      "step": 224374
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2397220134735107,
      "learning_rate": 1.0127837442581278e-06,
      "loss": 2.811,
      "step": 224375
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8821847438812256,
      "learning_rate": 1.0124479347894953e-06,
      "loss": 3.0596,
      "step": 224376
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.11382794380188,
      "learning_rate": 1.0121121809082977e-06,
      "loss": 2.8531,
      "step": 224377
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.7762610912323,
      "learning_rate": 1.0117764826146014e-06,
      "loss": 2.9498,
      "step": 224378
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.004042387008667,
      "learning_rate": 1.0114408399084394e-06,
      "loss": 2.7087,
      "step": 224379
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.7249317169189453,
      "learning_rate": 1.0111052527899454e-06,
      "loss": 3.0913,
      "step": 224380
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.4868993759155273,
      "learning_rate": 1.0107697212590859e-06,
      "loss": 2.9248,
      "step": 224381
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3075010776519775,
      "learning_rate": 1.0104342453159608e-06,
      "loss": 2.7584,
      "step": 224382
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.950169324874878,
      "learning_rate": 1.01009882496067e-06,
      "loss": 2.9526,
      "step": 224383
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.193358421325684,
      "learning_rate": 1.0097634601932136e-06,
      "loss": 2.9488,
      "step": 224384
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.9267337322235107,
      "learning_rate": 1.0094281510137247e-06,
      "loss": 3.0043,
      "step": 224385
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.743863821029663,
      "learning_rate": 1.0090928974222035e-06,
      "loss": 3.1069,
      "step": 224386
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2905938625335693,
      "learning_rate": 1.0087576994187495e-06,
      "loss": 2.9054,
      "step": 224387
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2960455417633057,
      "learning_rate": 1.0084225570033965e-06,
      "loss": 3.1003,
      "step": 224388
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.861520290374756,
      "learning_rate": 1.008087470176211e-06,
      "loss": 2.8611,
      "step": 224389
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0235397815704346,
      "learning_rate": 1.0077524389372925e-06,
      "loss": 3.002,
      "step": 224390
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.7599494457244873,
      "learning_rate": 1.007417463286675e-06,
      "loss": 2.8164,
      "step": 224391
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.755382776260376,
      "learning_rate": 1.0070825432243913e-06,
      "loss": 2.8316,
      "step": 224392
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.90659499168396,
      "learning_rate": 1.0067476787505745e-06,
      "loss": 2.8894,
      "step": 224393
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.331327438354492,
      "learning_rate": 1.006412869865225e-06,
      "loss": 2.9521,
      "step": 224394
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.045830249786377,
      "learning_rate": 1.0060781165684428e-06,
      "loss": 3.1661,
      "step": 224395
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6609528064727783,
      "learning_rate": 1.005743418860261e-06,
      "loss": 3.0246,
      "step": 224396
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.870058298110962,
      "learning_rate": 1.005408776740746e-06,
      "loss": 2.9401,
      "step": 224397
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.375877857208252,
      "learning_rate": 1.0050741902099645e-06,
      "loss": 3.1745,
      "step": 224398
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.888580799102783,
      "learning_rate": 1.0047396592680168e-06,
      "loss": 2.8383,
      "step": 224399
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.946751832962036,
      "learning_rate": 1.0044051839149024e-06,
      "loss": 2.8752,
      "step": 224400
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.692436933517456,
      "learning_rate": 1.004070764150755e-06,
      "loss": 2.7239,
      "step": 224401
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6167566776275635,
      "learning_rate": 1.003736399975541e-06,
      "loss": 2.8938,
      "step": 224402
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.756869316101074,
      "learning_rate": 1.0034020913894269e-06,
      "loss": 2.8721,
      "step": 224403
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.894576072692871,
      "learning_rate": 1.0030678383923795e-06,
      "loss": 2.728,
      "step": 224404
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0914812088012695,
      "learning_rate": 1.0027336409845322e-06,
      "loss": 2.7251,
      "step": 224405
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2238667011260986,
      "learning_rate": 1.002399499165918e-06,
      "loss": 2.8765,
      "step": 224406
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.772613525390625,
      "learning_rate": 1.0020654129366035e-06,
      "loss": 2.8026,
      "step": 224407
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.5660040378570557,
      "learning_rate": 1.0017313822966223e-06,
      "loss": 2.8232,
      "step": 224408
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.488109588623047,
      "learning_rate": 1.0013974072461072e-06,
      "loss": 2.8426,
      "step": 224409
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1324262619018555,
      "learning_rate": 1.0010634877850255e-06,
      "loss": 3.1131,
      "step": 224410
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7808918952941895,
      "learning_rate": 1.0007296239135432e-06,
      "loss": 3.2486,
      "step": 224411
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.4017093181610107,
      "learning_rate": 1.0003958156316271e-06,
      "loss": 2.7589,
      "step": 224412
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.640652656555176,
      "learning_rate": 1.0000620629394108e-06,
      "loss": 2.7211,
      "step": 224413
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3010635375976562,
      "learning_rate": 9.99728365836927e-07,
      "loss": 3.1852,
      "step": 224414
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9640371799468994,
      "learning_rate": 9.993947243242095e-07,
      "loss": 3.0337,
      "step": 224415
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9536774158477783,
      "learning_rate": 9.99061138401358e-07,
      "loss": 2.9665,
      "step": 224416
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8550643920898438,
      "learning_rate": 9.987276080684393e-07,
      "loss": 2.8036,
      "step": 224417
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.661693811416626,
      "learning_rate": 9.983941333254864e-07,
      "loss": 2.9641,
      "step": 224418
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.995182991027832,
      "learning_rate": 9.980607141725993e-07,
      "loss": 2.9193,
      "step": 224419
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.920433521270752,
      "learning_rate": 9.977273506098115e-07,
      "loss": 2.7764,
      "step": 224420
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.395559310913086,
      "learning_rate": 9.973940426371564e-07,
      "loss": 2.702,
      "step": 224421
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.549844741821289,
      "learning_rate": 9.970607902547668e-07,
      "loss": 2.9982,
      "step": 224422
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.676377058029175,
      "learning_rate": 9.967275934626428e-07,
      "loss": 2.7413,
      "step": 224423
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.5125951766967773,
      "learning_rate": 9.963944522608846e-07,
      "loss": 2.9187,
      "step": 224424
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9167466163635254,
      "learning_rate": 9.960613666495588e-07,
      "loss": 2.9627,
      "step": 224425
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.785325288772583,
      "learning_rate": 9.95728336628665e-07,
      "loss": 3.0914,
      "step": 224426
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2592592239379883,
      "learning_rate": 9.953953621983367e-07,
      "loss": 2.9471,
      "step": 224427
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6802775859832764,
      "learning_rate": 9.950624433586073e-07,
      "loss": 2.7776,
      "step": 224428
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.7788424491882324,
      "learning_rate": 9.947295801095433e-07,
      "loss": 2.7114,
      "step": 224429
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.62504506111145,
      "learning_rate": 9.943967724511781e-07,
      "loss": 2.9006,
      "step": 224430
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1263065338134766,
      "learning_rate": 9.940640203836448e-07,
      "loss": 3.0531,
      "step": 224431
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.648801803588867,
      "learning_rate": 9.937313239069433e-07,
      "loss": 2.8973,
      "step": 224432
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.664367198944092,
      "learning_rate": 9.933986830211405e-07,
      "loss": 3.0674,
      "step": 224433
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2845563888549805,
      "learning_rate": 9.930660977263028e-07,
      "loss": 2.4825,
      "step": 224434
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.4473845958709717,
      "learning_rate": 9.927335680224968e-07,
      "loss": 2.8456,
      "step": 224435
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.861905097961426,
      "learning_rate": 9.924010939097893e-07,
      "loss": 2.9482,
      "step": 224436
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9329617023468018,
      "learning_rate": 9.920686753882468e-07,
      "loss": 2.8728,
      "step": 224437
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.5098183155059814,
      "learning_rate": 9.917363124579358e-07,
      "loss": 2.8976,
      "step": 224438
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8002705574035645,
      "learning_rate": 9.9140400511889e-07,
      "loss": 3.0209,
      "step": 224439
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.798095464706421,
      "learning_rate": 9.910717533711754e-07,
      "loss": 2.7652,
      "step": 224440
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.823254108428955,
      "learning_rate": 9.907395572148924e-07,
      "loss": 2.8963,
      "step": 224441
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0013253688812256,
      "learning_rate": 9.90407416650041e-07,
      "loss": 3.3049,
      "step": 224442
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7634835243225098,
      "learning_rate": 9.90075331676754e-07,
      "loss": 2.6143,
      "step": 224443
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.747328042984009,
      "learning_rate": 9.89743302295032e-07,
      "loss": 2.6226,
      "step": 224444
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.002351760864258,
      "learning_rate": 9.89411328504941e-07,
      "loss": 2.9104,
      "step": 224445
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1312832832336426,
      "learning_rate": 9.890794103065813e-07,
      "loss": 3.0275,
      "step": 224446
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.823063850402832,
      "learning_rate": 9.887475477000195e-07,
      "loss": 2.9301,
      "step": 224447
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.333528757095337,
      "learning_rate": 9.884157406852554e-07,
      "loss": 2.7897,
      "step": 224448
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.962977409362793,
      "learning_rate": 9.880839892624226e-07,
      "loss": 3.0536,
      "step": 224449
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9163875579833984,
      "learning_rate": 9.877522934315541e-07,
      "loss": 2.8353,
      "step": 224450
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.151113510131836,
      "learning_rate": 9.874206531926832e-07,
      "loss": 3.0823,
      "step": 224451
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.921191453933716,
      "learning_rate": 9.870890685458765e-07,
      "loss": 2.9582,
      "step": 224452
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9144251346588135,
      "learning_rate": 9.867575394912675e-07,
      "loss": 2.9948,
      "step": 224453
    },
    {
      "epoch": 2.92,
      "grad_norm": 5.175509452819824,
      "learning_rate": 9.864260660288225e-07,
      "loss": 2.672,
      "step": 224454
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.245105266571045,
      "learning_rate": 9.86094648158642e-07,
      "loss": 2.6945,
      "step": 224455
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.113574504852295,
      "learning_rate": 9.857632858808252e-07,
      "loss": 2.9581,
      "step": 224456
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0419838428497314,
      "learning_rate": 9.854319791953724e-07,
      "loss": 2.8338,
      "step": 224457
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6446480751037598,
      "learning_rate": 9.851007281023836e-07,
      "loss": 2.7401,
      "step": 224458
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8802223205566406,
      "learning_rate": 9.847695326019256e-07,
      "loss": 2.9102,
      "step": 224459
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.826528549194336,
      "learning_rate": 9.84438392693998e-07,
      "loss": 3.0612,
      "step": 224460
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2473652362823486,
      "learning_rate": 9.841073083787343e-07,
      "loss": 2.6265,
      "step": 224461
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.312009572982788,
      "learning_rate": 9.837762796561677e-07,
      "loss": 2.9443,
      "step": 224462
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.617924928665161,
      "learning_rate": 9.834453065263647e-07,
      "loss": 2.8153,
      "step": 224463
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1595540046691895,
      "learning_rate": 9.831143889893921e-07,
      "loss": 3.1421,
      "step": 224464
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.524125576019287,
      "learning_rate": 9.827835270452832e-07,
      "loss": 2.836,
      "step": 224465
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9918768405914307,
      "learning_rate": 9.82452720694138e-07,
      "loss": 3.1995,
      "step": 224466
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1723709106445312,
      "learning_rate": 9.821219699359895e-07,
      "loss": 2.7593,
      "step": 224467
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2115938663482666,
      "learning_rate": 9.817912747709046e-07,
      "loss": 3.061,
      "step": 224468
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.333874225616455,
      "learning_rate": 9.814606351989495e-07,
      "loss": 2.6257,
      "step": 224469
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.681615114212036,
      "learning_rate": 9.811300512201914e-07,
      "loss": 2.9189,
      "step": 224470
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9076380729675293,
      "learning_rate": 9.807995228346965e-07,
      "loss": 2.8764,
      "step": 224471
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.4971847534179688,
      "learning_rate": 9.804690500424983e-07,
      "loss": 2.8154,
      "step": 224472
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7962801456451416,
      "learning_rate": 9.801386328436633e-07,
      "loss": 2.8997,
      "step": 224473
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3027029037475586,
      "learning_rate": 9.798082712382915e-07,
      "loss": 2.6538,
      "step": 224474
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.928327798843384,
      "learning_rate": 9.794779652264163e-07,
      "loss": 2.8054,
      "step": 224475
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.685443639755249,
      "learning_rate": 9.791477148081039e-07,
      "loss": 2.9324,
      "step": 224476
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8766939640045166,
      "learning_rate": 9.78817519983388e-07,
      "loss": 2.9778,
      "step": 224477
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1167447566986084,
      "learning_rate": 9.784873807523685e-07,
      "loss": 3.0661,
      "step": 224478
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.065613031387329,
      "learning_rate": 9.781572971150785e-07,
      "loss": 3.1111,
      "step": 224479
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.768285036087036,
      "learning_rate": 9.778272690716182e-07,
      "loss": 2.982,
      "step": 224480
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1989805698394775,
      "learning_rate": 9.774972966219874e-07,
      "loss": 2.7189,
      "step": 224481
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8933513164520264,
      "learning_rate": 9.771673797663193e-07,
      "loss": 3.0103,
      "step": 224482
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7627029418945312,
      "learning_rate": 9.768375185046472e-07,
      "loss": 2.8002,
      "step": 224483
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3036112785339355,
      "learning_rate": 9.765077128370046e-07,
      "loss": 3.0292,
      "step": 224484
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7190120220184326,
      "learning_rate": 9.76177962763458e-07,
      "loss": 3.1459,
      "step": 224485
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3162238597869873,
      "learning_rate": 9.758482682841074e-07,
      "loss": 2.7775,
      "step": 224486
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0812110900878906,
      "learning_rate": 9.75518629398986e-07,
      "loss": 3.1498,
      "step": 224487
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.818178415298462,
      "learning_rate": 9.751890461081603e-07,
      "loss": 2.8635,
      "step": 224488
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.7027530670166016,
      "learning_rate": 9.74859518411697e-07,
      "loss": 2.6747,
      "step": 224489
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.176414728164673,
      "learning_rate": 9.745300463096628e-07,
      "loss": 3.1161,
      "step": 224490
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6716158390045166,
      "learning_rate": 9.74200629802091e-07,
      "loss": 2.8317,
      "step": 224491
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2806098461151123,
      "learning_rate": 9.738712688890482e-07,
      "loss": 2.9368,
      "step": 224492
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.5582334995269775,
      "learning_rate": 9.735419635706344e-07,
      "loss": 3.0356,
      "step": 224493
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1124298572540283,
      "learning_rate": 9.732127138468825e-07,
      "loss": 2.9867,
      "step": 224494
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9310033321380615,
      "learning_rate": 9.728835197178264e-07,
      "loss": 2.829,
      "step": 224495
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8798599243164062,
      "learning_rate": 9.725543811835656e-07,
      "loss": 2.9284,
      "step": 224496
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.509211540222168,
      "learning_rate": 9.72225298244167e-07,
      "loss": 2.8673,
      "step": 224497
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.982510566711426,
      "learning_rate": 9.718962708996635e-07,
      "loss": 2.7913,
      "step": 224498
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8302812576293945,
      "learning_rate": 9.715672991501222e-07,
      "loss": 3.198,
      "step": 224499
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8947947025299072,
      "learning_rate": 9.712383829956428e-07,
      "loss": 3.0376,
      "step": 224500
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0909407138824463,
      "learning_rate": 9.70909522436225e-07,
      "loss": 3.1292,
      "step": 224501
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7258152961730957,
      "learning_rate": 9.705807174719692e-07,
      "loss": 2.8131,
      "step": 224502
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.390097141265869,
      "learning_rate": 9.702519681029087e-07,
      "loss": 2.7893,
      "step": 224503
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.025428533554077,
      "learning_rate": 9.699232743291762e-07,
      "loss": 3.1642,
      "step": 224504
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1476078033447266,
      "learning_rate": 9.695946361507056e-07,
      "loss": 2.9104,
      "step": 224505
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.845604658126831,
      "learning_rate": 9.692660535676967e-07,
      "loss": 2.8942,
      "step": 224506
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.418353796005249,
      "learning_rate": 9.689375265801158e-07,
      "loss": 3.1371,
      "step": 224507
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0034916400909424,
      "learning_rate": 9.686090551880632e-07,
      "loss": 2.8381,
      "step": 224508
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.645735740661621,
      "learning_rate": 9.68280639391572e-07,
      "loss": 2.9331,
      "step": 224509
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.953974485397339,
      "learning_rate": 9.679522791907424e-07,
      "loss": 2.9572,
      "step": 224510
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8096070289611816,
      "learning_rate": 9.676239745856074e-07,
      "loss": 3.1562,
      "step": 224511
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.155895233154297,
      "learning_rate": 9.672957255762336e-07,
      "loss": 2.8736,
      "step": 224512
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0676140785217285,
      "learning_rate": 9.66967532162688e-07,
      "loss": 2.7345,
      "step": 224513
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2748260498046875,
      "learning_rate": 9.666393943450367e-07,
      "loss": 2.7657,
      "step": 224514
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0045082569122314,
      "learning_rate": 9.663113121233135e-07,
      "loss": 2.8546,
      "step": 224515
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6459248065948486,
      "learning_rate": 9.659832854976179e-07,
      "loss": 3.0549,
      "step": 224516
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8266773223876953,
      "learning_rate": 9.656553144679502e-07,
      "loss": 2.9656,
      "step": 224517
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.060570240020752,
      "learning_rate": 9.653273990344434e-07,
      "loss": 2.8766,
      "step": 224518
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.4804415702819824,
      "learning_rate": 9.64999539197131e-07,
      "loss": 3.0,
      "step": 224519
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.263786554336548,
      "learning_rate": 9.646717349560796e-07,
      "loss": 2.934,
      "step": 224520
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.190593719482422,
      "learning_rate": 9.643439863113222e-07,
      "loss": 2.7524,
      "step": 224521
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.997553825378418,
      "learning_rate": 9.64016293262926e-07,
      "loss": 2.982,
      "step": 224522
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.322861433029175,
      "learning_rate": 9.636886558109902e-07,
      "loss": 2.8499,
      "step": 224523
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.960510730743408,
      "learning_rate": 9.633610739555486e-07,
      "loss": 2.8893,
      "step": 224524
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.371046781539917,
      "learning_rate": 9.630335476966344e-07,
      "loss": 2.8642,
      "step": 224525
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.805901050567627,
      "learning_rate": 9.627060770343476e-07,
      "loss": 3.1199,
      "step": 224526
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9621946811676025,
      "learning_rate": 9.623786619687546e-07,
      "loss": 2.8422,
      "step": 224527
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.258080244064331,
      "learning_rate": 9.620513024998888e-07,
      "loss": 2.8957,
      "step": 224528
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.768425941467285,
      "learning_rate": 9.617239986278169e-07,
      "loss": 2.9591,
      "step": 224529
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.181387424468994,
      "learning_rate": 9.613967503526388e-07,
      "loss": 2.9968,
      "step": 224530
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1974737644195557,
      "learning_rate": 9.61069557674321e-07,
      "loss": 3.0187,
      "step": 224531
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.166365385055542,
      "learning_rate": 9.607424205930303e-07,
      "loss": 2.945,
      "step": 224532
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0273749828338623,
      "learning_rate": 9.604153391087667e-07,
      "loss": 2.9012,
      "step": 224533
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.5562634468078613,
      "learning_rate": 9.600883132216298e-07,
      "loss": 2.8313,
      "step": 224534
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.199554443359375,
      "learning_rate": 9.597613429316198e-07,
      "loss": 2.8632,
      "step": 224535
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1375815868377686,
      "learning_rate": 9.594344282388699e-07,
      "loss": 2.8177,
      "step": 224536
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2118160724639893,
      "learning_rate": 9.591075691433803e-07,
      "loss": 3.0698,
      "step": 224537
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9033701419830322,
      "learning_rate": 9.587807656452507e-07,
      "loss": 2.8925,
      "step": 224538
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.851943016052246,
      "learning_rate": 9.584540177445144e-07,
      "loss": 3.1239,
      "step": 224539
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.095188856124878,
      "learning_rate": 9.581273254412715e-07,
      "loss": 3.0959,
      "step": 224540
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.821275234222412,
      "learning_rate": 9.578006887355549e-07,
      "loss": 3.0836,
      "step": 224541
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7157936096191406,
      "learning_rate": 9.574741076273985e-07,
      "loss": 2.9352,
      "step": 224542
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.065822124481201,
      "learning_rate": 9.571475821169017e-07,
      "loss": 2.887,
      "step": 224543
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2761950492858887,
      "learning_rate": 9.568211122041314e-07,
      "loss": 2.9743,
      "step": 224544
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.860687017440796,
      "learning_rate": 9.564946978891207e-07,
      "loss": 2.9204,
      "step": 224545
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.261312961578369,
      "learning_rate": 9.561683391719698e-07,
      "loss": 2.7888,
      "step": 224546
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2222187519073486,
      "learning_rate": 9.558420360526787e-07,
      "loss": 2.5913,
      "step": 224547
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.5203473567962646,
      "learning_rate": 9.555157885313803e-07,
      "loss": 2.9875,
      "step": 224548
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.683892250061035,
      "learning_rate": 9.551895966080415e-07,
      "loss": 3.0308,
      "step": 224549
    },
    {
      "epoch": 2.92,
      "grad_norm": 5.435775279998779,
      "learning_rate": 9.548634602828287e-07,
      "loss": 2.8191,
      "step": 224550
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1120853424072266,
      "learning_rate": 9.545373795557088e-07,
      "loss": 2.8274,
      "step": 224551
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7255699634552,
      "learning_rate": 9.542113544268148e-07,
      "loss": 2.8416,
      "step": 224552
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.164970874786377,
      "learning_rate": 9.538853848961804e-07,
      "loss": 2.7813,
      "step": 224553
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8829915523529053,
      "learning_rate": 9.535594709638384e-07,
      "loss": 2.7317,
      "step": 224554
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9369924068450928,
      "learning_rate": 9.53233612629889e-07,
      "loss": 2.8361,
      "step": 224555
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.185886859893799,
      "learning_rate": 9.529078098943654e-07,
      "loss": 2.8041,
      "step": 224556
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0390288829803467,
      "learning_rate": 9.525820627573677e-07,
      "loss": 2.8276,
      "step": 224557
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.4584925174713135,
      "learning_rate": 9.522563712188958e-07,
      "loss": 2.6147,
      "step": 224558
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.324312448501587,
      "learning_rate": 9.519307352790828e-07,
      "loss": 2.8983,
      "step": 224559
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0917465686798096,
      "learning_rate": 9.516051549379289e-07,
      "loss": 3.1884,
      "step": 224560
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.5233724117279053,
      "learning_rate": 9.512796301955339e-07,
      "loss": 2.5891,
      "step": 224561
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.300255298614502,
      "learning_rate": 9.509541610518978e-07,
      "loss": 2.7975,
      "step": 224562
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1047213077545166,
      "learning_rate": 9.506287475071539e-07,
      "loss": 2.7993,
      "step": 224563
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.491330146789551,
      "learning_rate": 9.503033895613354e-07,
      "loss": 2.8311,
      "step": 224564
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3439598083496094,
      "learning_rate": 9.499780872145091e-07,
      "loss": 2.971,
      "step": 224565
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.5811548233032227,
      "learning_rate": 9.496528404667081e-07,
      "loss": 2.9851,
      "step": 224566
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.371572494506836,
      "learning_rate": 9.493276493180324e-07,
      "loss": 2.8422,
      "step": 224567
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1813559532165527,
      "learning_rate": 9.49002513768482e-07,
      "loss": 2.6983,
      "step": 224568
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1352522373199463,
      "learning_rate": 9.486774338181902e-07,
      "loss": 2.9783,
      "step": 224569
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9681556224823,
      "learning_rate": 9.483524094671902e-07,
      "loss": 2.7655,
      "step": 224570
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0034139156341553,
      "learning_rate": 9.480274407155153e-07,
      "loss": 2.7469,
      "step": 224571
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.348616361618042,
      "learning_rate": 9.477025275632655e-07,
      "loss": 2.838,
      "step": 224572
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2553353309631348,
      "learning_rate": 9.47377670010474e-07,
      "loss": 2.755,
      "step": 224573
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1098949909210205,
      "learning_rate": 9.470528680572076e-07,
      "loss": 3.0324,
      "step": 224574
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.626377582550049,
      "learning_rate": 9.467281217035328e-07,
      "loss": 2.7031,
      "step": 224575
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0982558727264404,
      "learning_rate": 9.464034309494828e-07,
      "loss": 2.8157,
      "step": 224576
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.5615220069885254,
      "learning_rate": 9.46078795795191e-07,
      "loss": 3.1757,
      "step": 224577
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8037421703338623,
      "learning_rate": 9.45754216240624e-07,
      "loss": 3.0158,
      "step": 224578
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7068474292755127,
      "learning_rate": 9.45429692285915e-07,
      "loss": 2.8513,
      "step": 224579
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.257700204849243,
      "learning_rate": 9.451052239310642e-07,
      "loss": 3.0341,
      "step": 224580
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.218228340148926,
      "learning_rate": 9.447808111762046e-07,
      "loss": 2.8244,
      "step": 224581
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.5915441513061523,
      "learning_rate": 9.444564540213029e-07,
      "loss": 2.9036,
      "step": 224582
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.342457294464111,
      "learning_rate": 9.441321524665258e-07,
      "loss": 3.0679,
      "step": 224583
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.58562970161438,
      "learning_rate": 9.438079065118398e-07,
      "loss": 3.0479,
      "step": 224584
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1608688831329346,
      "learning_rate": 9.434837161573783e-07,
      "loss": 3.0344,
      "step": 224585
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.540769100189209,
      "learning_rate": 9.431595814031411e-07,
      "loss": 2.8914,
      "step": 224586
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9265506267547607,
      "learning_rate": 9.428355022492285e-07,
      "loss": 2.7218,
      "step": 224587
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.96150541305542,
      "learning_rate": 9.425114786956733e-07,
      "loss": 3.0251,
      "step": 224588
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.821906328201294,
      "learning_rate": 9.421875107425759e-07,
      "loss": 2.66,
      "step": 224589
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.757352828979492,
      "learning_rate": 9.418635983899691e-07,
      "loss": 2.9827,
      "step": 224590
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1032180786132812,
      "learning_rate": 9.415397416378867e-07,
      "loss": 2.9166,
      "step": 224591
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.073429584503174,
      "learning_rate": 9.412159404864284e-07,
      "loss": 2.8611,
      "step": 224592
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1045594215393066,
      "learning_rate": 9.408921949356607e-07,
      "loss": 2.7591,
      "step": 224593
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6238389015197754,
      "learning_rate": 9.405685049856171e-07,
      "loss": 3.0869,
      "step": 224594
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9593405723571777,
      "learning_rate": 9.402448706363641e-07,
      "loss": 3.0881,
      "step": 224595
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2792611122131348,
      "learning_rate": 9.399212918879684e-07,
      "loss": 2.7607,
      "step": 224596
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2494850158691406,
      "learning_rate": 9.395977687404966e-07,
      "loss": 2.8356,
      "step": 224597
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9867570400238037,
      "learning_rate": 9.392743011939485e-07,
      "loss": 3.0764,
      "step": 224598
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8680617809295654,
      "learning_rate": 9.38950889248491e-07,
      "loss": 2.8378,
      "step": 224599
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.784667730331421,
      "learning_rate": 9.386275329040904e-07,
      "loss": 2.7096,
      "step": 224600
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9722073078155518,
      "learning_rate": 9.383042321608802e-07,
      "loss": 3.0899,
      "step": 224601
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.544948101043701,
      "learning_rate": 9.379809870188604e-07,
      "loss": 2.9705,
      "step": 224602
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.659362316131592,
      "learning_rate": 9.376577974781308e-07,
      "loss": 2.9229,
      "step": 224603
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6948788166046143,
      "learning_rate": 9.373346635387247e-07,
      "loss": 3.0013,
      "step": 224604
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0657575130462646,
      "learning_rate": 9.370115852007088e-07,
      "loss": 2.8654,
      "step": 224605
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.6956000328063965,
      "learning_rate": 9.366885624641496e-07,
      "loss": 2.8698,
      "step": 224606
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8675243854522705,
      "learning_rate": 9.363655953291138e-07,
      "loss": 3.1047,
      "step": 224607
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.764954090118408,
      "learning_rate": 9.360426837956347e-07,
      "loss": 2.9537,
      "step": 224608
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.549570322036743,
      "learning_rate": 9.357198278638123e-07,
      "loss": 3.0693,
      "step": 224609
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9697020053863525,
      "learning_rate": 9.353970275336798e-07,
      "loss": 3.124,
      "step": 224610
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0748538970947266,
      "learning_rate": 9.350742828053037e-07,
      "loss": 3.1361,
      "step": 224611
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0651254653930664,
      "learning_rate": 9.347515936787175e-07,
      "loss": 2.7122,
      "step": 224612
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1039764881134033,
      "learning_rate": 9.344289601540545e-07,
      "loss": 2.845,
      "step": 224613
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.989074230194092,
      "learning_rate": 9.341063822312811e-07,
      "loss": 2.8515,
      "step": 224614
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9326322078704834,
      "learning_rate": 9.337838599105307e-07,
      "loss": 2.9525,
      "step": 224615
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.827200174331665,
      "learning_rate": 9.334613931918034e-07,
      "loss": 2.9614,
      "step": 224616
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7872233390808105,
      "learning_rate": 9.331389820752322e-07,
      "loss": 2.9002,
      "step": 224617
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3480777740478516,
      "learning_rate": 9.328166265608173e-07,
      "loss": 2.9089,
      "step": 224618
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.2868943214416504,
      "learning_rate": 9.324943266486251e-07,
      "loss": 2.8242,
      "step": 224619
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.3434066772460938,
      "learning_rate": 9.321720823387557e-07,
      "loss": 2.717,
      "step": 224620
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.149724006652832,
      "learning_rate": 9.318498936312091e-07,
      "loss": 2.9056,
      "step": 224621
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.843919515609741,
      "learning_rate": 9.315277605260852e-07,
      "loss": 2.6841,
      "step": 224622
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0477466583251953,
      "learning_rate": 9.312056830234505e-07,
      "loss": 2.8718,
      "step": 224623
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.419577121734619,
      "learning_rate": 9.308836611233383e-07,
      "loss": 2.9958,
      "step": 224624
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8843421936035156,
      "learning_rate": 9.305616948258154e-07,
      "loss": 2.8899,
      "step": 224625
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.8746652603149414,
      "learning_rate": 9.302397841309483e-07,
      "loss": 3.002,
      "step": 224626
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.912867307662964,
      "learning_rate": 9.299179290388036e-07,
      "loss": 3.0827,
      "step": 224627
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.7193655967712402,
      "learning_rate": 9.29596129549448e-07,
      "loss": 2.7814,
      "step": 224628
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1682093143463135,
      "learning_rate": 9.292743856629148e-07,
      "loss": 2.9753,
      "step": 224629
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.6999285221099854,
      "learning_rate": 9.289526973792371e-07,
      "loss": 2.906,
      "step": 224630
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.9303739070892334,
      "learning_rate": 9.286310646985484e-07,
      "loss": 2.6829,
      "step": 224631
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.833138942718506,
      "learning_rate": 9.283094876208819e-07,
      "loss": 2.7441,
      "step": 224632
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.430896759033203,
      "learning_rate": 9.279879661462708e-07,
      "loss": 3.2269,
      "step": 224633
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.581929922103882,
      "learning_rate": 9.276665002747819e-07,
      "loss": 3.0667,
      "step": 224634
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.1423182487487793,
      "learning_rate": 9.273450900065149e-07,
      "loss": 2.851,
      "step": 224635
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.90472674369812,
      "learning_rate": 9.2702373534147e-07,
      "loss": 2.9051,
      "step": 224636
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.371340036392212,
      "learning_rate": 9.26702436279747e-07,
      "loss": 2.7049,
      "step": 224637
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.7945168018341064,
      "learning_rate": 9.263811928214126e-07,
      "loss": 2.7655,
      "step": 224638
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.0547070503234863,
      "learning_rate": 9.260600049664668e-07,
      "loss": 3.0444,
      "step": 224639
    },
    {
      "epoch": 2.92,
      "grad_norm": 3.155341386795044,
      "learning_rate": 9.257388727150427e-07,
      "loss": 2.9543,
      "step": 224640
    },
    {
      "epoch": 2.92,
      "grad_norm": 2.728900909423828,
      "learning_rate": 9.254177960671738e-07,
      "loss": 3.0584,
      "step": 224641
    },
    {
      "epoch": 2.92,
      "grad_norm": 4.334042072296143,
      "learning_rate": 9.250967750229265e-07,
      "loss": 2.8482,
      "step": 224642
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8430585861206055,
      "learning_rate": 9.247758095823343e-07,
      "loss": 3.0588,
      "step": 224643
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1232757568359375,
      "learning_rate": 9.244548997454637e-07,
      "loss": 2.9293,
      "step": 224644
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.507871389389038,
      "learning_rate": 9.241340455123814e-07,
      "loss": 2.7927,
      "step": 224645
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8515682220458984,
      "learning_rate": 9.238132468831539e-07,
      "loss": 2.7909,
      "step": 224646
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0777885913848877,
      "learning_rate": 9.234925038578145e-07,
      "loss": 2.9214,
      "step": 224647
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5540566444396973,
      "learning_rate": 9.231718164364632e-07,
      "loss": 2.857,
      "step": 224648
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.012986421585083,
      "learning_rate": 9.228511846191666e-07,
      "loss": 2.9013,
      "step": 224649
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1778175830841064,
      "learning_rate": 9.225306084059248e-07,
      "loss": 2.861,
      "step": 224650
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.54646372795105,
      "learning_rate": 9.222100877968375e-07,
      "loss": 2.7738,
      "step": 224651
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8565280437469482,
      "learning_rate": 9.218896227919381e-07,
      "loss": 2.9648,
      "step": 224652
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.246943950653076,
      "learning_rate": 9.215692133913266e-07,
      "loss": 3.1508,
      "step": 224653
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.750119209289551,
      "learning_rate": 9.212488595950363e-07,
      "loss": 3.1321,
      "step": 224654
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6303625106811523,
      "learning_rate": 9.209285614031337e-07,
      "loss": 2.8779,
      "step": 224655
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.31563138961792,
      "learning_rate": 9.206083188156854e-07,
      "loss": 2.7693,
      "step": 224656
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8966405391693115,
      "learning_rate": 9.202881318327249e-07,
      "loss": 3.1147,
      "step": 224657
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.14052152633667,
      "learning_rate": 9.199680004543187e-07,
      "loss": 2.9444,
      "step": 224658
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0342190265655518,
      "learning_rate": 9.196479246805666e-07,
      "loss": 3.0747,
      "step": 224659
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.767799139022827,
      "learning_rate": 9.193279045114688e-07,
      "loss": 2.835,
      "step": 224660
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0369417667388916,
      "learning_rate": 9.19007939947125e-07,
      "loss": 2.777,
      "step": 224661
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9721126556396484,
      "learning_rate": 9.186880309876022e-07,
      "loss": 2.9164,
      "step": 224662
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1820156574249268,
      "learning_rate": 9.183681776329332e-07,
      "loss": 2.8607,
      "step": 224663
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.562685966491699,
      "learning_rate": 9.180483798831517e-07,
      "loss": 2.8829,
      "step": 224664
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.7313692569732666,
      "learning_rate": 9.177286377383907e-07,
      "loss": 3.0838,
      "step": 224665
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5026156902313232,
      "learning_rate": 9.174089511986505e-07,
      "loss": 3.1306,
      "step": 224666
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.136728286743164,
      "learning_rate": 9.170893202639973e-07,
      "loss": 3.0263,
      "step": 224667
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.888875722885132,
      "learning_rate": 9.167697449345312e-07,
      "loss": 3.0711,
      "step": 224668
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3055419921875,
      "learning_rate": 9.164502252102523e-07,
      "loss": 2.9182,
      "step": 224669
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.975111484527588,
      "learning_rate": 9.161307610912938e-07,
      "loss": 2.9869,
      "step": 224670
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.31827974319458,
      "learning_rate": 9.158113525776222e-07,
      "loss": 2.7349,
      "step": 224671
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.91127610206604,
      "learning_rate": 9.154919996694044e-07,
      "loss": 2.9504,
      "step": 224672
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8103957176208496,
      "learning_rate": 9.151727023665733e-07,
      "loss": 2.932,
      "step": 224673
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.361344814300537,
      "learning_rate": 9.148534606692959e-07,
      "loss": 3.0675,
      "step": 224674
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.870258331298828,
      "learning_rate": 9.145342745776052e-07,
      "loss": 2.8863,
      "step": 224675
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1604466438293457,
      "learning_rate": 9.142151440915347e-07,
      "loss": 2.9355,
      "step": 224676
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0849223136901855,
      "learning_rate": 9.138960692111508e-07,
      "loss": 3.0369,
      "step": 224677
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.701491594314575,
      "learning_rate": 9.135770499365203e-07,
      "loss": 2.9549,
      "step": 224678
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5536935329437256,
      "learning_rate": 9.132580862677096e-07,
      "loss": 2.9663,
      "step": 224679
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0730104446411133,
      "learning_rate": 9.129391782047857e-07,
      "loss": 2.6404,
      "step": 224680
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8086860179901123,
      "learning_rate": 9.126203257477483e-07,
      "loss": 2.9803,
      "step": 224681
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.219515323638916,
      "learning_rate": 9.123015288967306e-07,
      "loss": 2.65,
      "step": 224682
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2588469982147217,
      "learning_rate": 9.119827876517661e-07,
      "loss": 2.8833,
      "step": 224683
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1094744205474854,
      "learning_rate": 9.116641020128878e-07,
      "loss": 2.83,
      "step": 224684
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6359150409698486,
      "learning_rate": 9.11345471980196e-07,
      "loss": 2.8624,
      "step": 224685
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1070590019226074,
      "learning_rate": 9.110268975537239e-07,
      "loss": 2.8038,
      "step": 224686
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.287274122238159,
      "learning_rate": 9.107083787335379e-07,
      "loss": 3.0832,
      "step": 224687
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.863792896270752,
      "learning_rate": 9.103899155197047e-07,
      "loss": 2.8022,
      "step": 224688
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.749168634414673,
      "learning_rate": 9.100715079122578e-07,
      "loss": 2.8682,
      "step": 224689
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.091827392578125,
      "learning_rate": 9.09753155911297e-07,
      "loss": 2.9951,
      "step": 224690
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.9007184505462646,
      "learning_rate": 9.094348595168222e-07,
      "loss": 2.696,
      "step": 224691
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9573354721069336,
      "learning_rate": 9.091166187289667e-07,
      "loss": 2.8403,
      "step": 224692
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.5699071884155273,
      "learning_rate": 9.087984335477305e-07,
      "loss": 2.7823,
      "step": 224693
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.743011951446533,
      "learning_rate": 9.084803039732136e-07,
      "loss": 3.17,
      "step": 224694
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.736954689025879,
      "learning_rate": 9.081622300054492e-07,
      "loss": 2.7551,
      "step": 224695
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.708387851715088,
      "learning_rate": 9.07844211644504e-07,
      "loss": 3.0562,
      "step": 224696
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.997410297393799,
      "learning_rate": 9.075262488904111e-07,
      "loss": 2.7606,
      "step": 224697
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0238261222839355,
      "learning_rate": 9.072083417432707e-07,
      "loss": 2.6126,
      "step": 224698
    },
    {
      "epoch": 2.93,
      "grad_norm": 5.233264923095703,
      "learning_rate": 9.068904902031493e-07,
      "loss": 3.1555,
      "step": 224699
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.858299970626831,
      "learning_rate": 9.065726942700801e-07,
      "loss": 2.6948,
      "step": 224700
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5137691497802734,
      "learning_rate": 9.062549539440966e-07,
      "loss": 2.8269,
      "step": 224701
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9542267322540283,
      "learning_rate": 9.059372692252987e-07,
      "loss": 3.0687,
      "step": 224702
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.868061065673828,
      "learning_rate": 9.056196401137194e-07,
      "loss": 2.8723,
      "step": 224703
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.187119245529175,
      "learning_rate": 9.053020666094591e-07,
      "loss": 2.8594,
      "step": 224704
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0454463958740234,
      "learning_rate": 9.049845487125173e-07,
      "loss": 2.8347,
      "step": 224705
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7211668491363525,
      "learning_rate": 9.046670864230277e-07,
      "loss": 2.8686,
      "step": 224706
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8786520957946777,
      "learning_rate": 9.043496797409567e-07,
      "loss": 2.7619,
      "step": 224707
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.883824110031128,
      "learning_rate": 9.040323286664375e-07,
      "loss": 2.7923,
      "step": 224708
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6185970306396484,
      "learning_rate": 9.037150331995036e-07,
      "loss": 2.7492,
      "step": 224709
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.15399169921875,
      "learning_rate": 9.033977933402214e-07,
      "loss": 2.8224,
      "step": 224710
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0047173500061035,
      "learning_rate": 9.030806090886245e-07,
      "loss": 2.7302,
      "step": 224711
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6334211826324463,
      "learning_rate": 9.027634804448125e-07,
      "loss": 3.0661,
      "step": 224712
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.53460431098938,
      "learning_rate": 9.024464074088189e-07,
      "loss": 2.8221,
      "step": 224713
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5535430908203125,
      "learning_rate": 9.021293899807103e-07,
      "loss": 2.6731,
      "step": 224714
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9734649658203125,
      "learning_rate": 9.018124281605199e-07,
      "loss": 3.1227,
      "step": 224715
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2357611656188965,
      "learning_rate": 9.014955219483477e-07,
      "loss": 2.8224,
      "step": 224716
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.734687328338623,
      "learning_rate": 9.011786713442271e-07,
      "loss": 3.012,
      "step": 224717
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0853095054626465,
      "learning_rate": 9.008618763482245e-07,
      "loss": 2.9356,
      "step": 224718
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.629445791244507,
      "learning_rate": 9.005451369603733e-07,
      "loss": 2.9769,
      "step": 224719
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.098619222640991,
      "learning_rate": 9.002284531808069e-07,
      "loss": 2.8264,
      "step": 224720
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.464033842086792,
      "learning_rate": 8.999118250094917e-07,
      "loss": 3.1752,
      "step": 224721
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.741455554962158,
      "learning_rate": 8.995952524465278e-07,
      "loss": 2.6613,
      "step": 224722
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.077519655227661,
      "learning_rate": 8.99278735492015e-07,
      "loss": 2.8461,
      "step": 224723
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5805978775024414,
      "learning_rate": 8.989622741459201e-07,
      "loss": 2.8974,
      "step": 224724
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.493433713912964,
      "learning_rate": 8.986458684084097e-07,
      "loss": 3.0164,
      "step": 224725
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.757002592086792,
      "learning_rate": 8.983295182794503e-07,
      "loss": 2.9149,
      "step": 224726
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6882591247558594,
      "learning_rate": 8.98013223759142e-07,
      "loss": 2.9211,
      "step": 224727
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1672980785369873,
      "learning_rate": 8.976969848475513e-07,
      "loss": 3.0559,
      "step": 224728
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.094761848449707,
      "learning_rate": 8.973808015447114e-07,
      "loss": 2.9092,
      "step": 224729
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.582839250564575,
      "learning_rate": 8.970646738506893e-07,
      "loss": 2.8603,
      "step": 224730
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5835044384002686,
      "learning_rate": 8.967486017655845e-07,
      "loss": 2.7465,
      "step": 224731
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9776129722595215,
      "learning_rate": 8.964325852893972e-07,
      "loss": 3.0619,
      "step": 224732
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9172885417938232,
      "learning_rate": 8.96116624422194e-07,
      "loss": 3.068,
      "step": 224733
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9848713874816895,
      "learning_rate": 8.958007191640748e-07,
      "loss": 3.1081,
      "step": 224734
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.4792959690093994,
      "learning_rate": 8.954848695150397e-07,
      "loss": 2.9532,
      "step": 224735
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8859071731567383,
      "learning_rate": 8.951690754751883e-07,
      "loss": 2.9957,
      "step": 224736
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9981789588928223,
      "learning_rate": 8.948533370445876e-07,
      "loss": 2.8507,
      "step": 224737
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8112735748291016,
      "learning_rate": 8.945376542232708e-07,
      "loss": 2.7241,
      "step": 224738
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6327450275421143,
      "learning_rate": 8.942220270113376e-07,
      "loss": 2.8872,
      "step": 224739
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9666261672973633,
      "learning_rate": 8.939064554087882e-07,
      "loss": 2.8402,
      "step": 224740
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.784409523010254,
      "learning_rate": 8.935909394156892e-07,
      "loss": 2.8774,
      "step": 224741
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0924861431121826,
      "learning_rate": 8.932754790321405e-07,
      "loss": 2.8806,
      "step": 224742
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.042247772216797,
      "learning_rate": 8.929600742581755e-07,
      "loss": 2.9824,
      "step": 224743
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.0165324211120605,
      "learning_rate": 8.926447250938273e-07,
      "loss": 3.1296,
      "step": 224744
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.77203369140625,
      "learning_rate": 8.923294315392293e-07,
      "loss": 2.7793,
      "step": 224745
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.745175838470459,
      "learning_rate": 8.920141935943481e-07,
      "loss": 2.8507,
      "step": 224746
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.619089365005493,
      "learning_rate": 8.91699011259317e-07,
      "loss": 2.9633,
      "step": 224747
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7665607929229736,
      "learning_rate": 8.913838845341692e-07,
      "loss": 2.9298,
      "step": 224748
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6868715286254883,
      "learning_rate": 8.910688134189381e-07,
      "loss": 2.8867,
      "step": 224749
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.5838911533355713,
      "learning_rate": 8.907537979137236e-07,
      "loss": 2.8967,
      "step": 224750
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.96750807762146,
      "learning_rate": 8.90438838018559e-07,
      "loss": 2.8387,
      "step": 224751
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.4994654655456543,
      "learning_rate": 8.901239337334775e-07,
      "loss": 3.1609,
      "step": 224752
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0335230827331543,
      "learning_rate": 8.898090850586126e-07,
      "loss": 2.853,
      "step": 224753
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6365349292755127,
      "learning_rate": 8.89494291993964e-07,
      "loss": 2.9571,
      "step": 224754
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.3540656566619873,
      "learning_rate": 8.891795545395986e-07,
      "loss": 2.8413,
      "step": 224755
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.660423994064331,
      "learning_rate": 8.888648726955827e-07,
      "loss": 2.8592,
      "step": 224756
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.4504199028015137,
      "learning_rate": 8.885502464619831e-07,
      "loss": 3.0908,
      "step": 224757
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.416349411010742,
      "learning_rate": 8.882356758387998e-07,
      "loss": 2.7474,
      "step": 224758
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9200682640075684,
      "learning_rate": 8.879211608261993e-07,
      "loss": 2.9622,
      "step": 224759
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.194052219390869,
      "learning_rate": 8.876067014241484e-07,
      "loss": 2.8955,
      "step": 224760
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.576934576034546,
      "learning_rate": 8.872922976327468e-07,
      "loss": 2.8168,
      "step": 224761
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0184450149536133,
      "learning_rate": 8.869779494520613e-07,
      "loss": 2.7574,
      "step": 224762
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8480336666107178,
      "learning_rate": 8.86663656882125e-07,
      "loss": 2.7692,
      "step": 224763
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8947272300720215,
      "learning_rate": 8.863494199229715e-07,
      "loss": 2.728,
      "step": 224764
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7436118125915527,
      "learning_rate": 8.860352385747338e-07,
      "loss": 2.8165,
      "step": 224765
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.017940044403076,
      "learning_rate": 8.85721112837412e-07,
      "loss": 2.7995,
      "step": 224766
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.143618583679199,
      "learning_rate": 8.854070427110727e-07,
      "loss": 3.0765,
      "step": 224767
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.602296829223633,
      "learning_rate": 8.850930281957825e-07,
      "loss": 2.9491,
      "step": 224768
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.093897819519043,
      "learning_rate": 8.847790692916079e-07,
      "loss": 2.8683,
      "step": 224769
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.813368797302246,
      "learning_rate": 8.844651659985824e-07,
      "loss": 2.9563,
      "step": 224770
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.963306188583374,
      "learning_rate": 8.841513183168058e-07,
      "loss": 2.9591,
      "step": 224771
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7621843814849854,
      "learning_rate": 8.838375262463115e-07,
      "loss": 2.6002,
      "step": 224772
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.4740145206451416,
      "learning_rate": 8.835237897871661e-07,
      "loss": 3.1014,
      "step": 224773
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.991722822189331,
      "learning_rate": 8.832101089393695e-07,
      "loss": 3.2177,
      "step": 224774
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.883281946182251,
      "learning_rate": 8.828964837030883e-07,
      "loss": 3.0177,
      "step": 224775
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.785304069519043,
      "learning_rate": 8.825829140782892e-07,
      "loss": 3.0133,
      "step": 224776
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0041956901550293,
      "learning_rate": 8.822694000650721e-07,
      "loss": 2.8272,
      "step": 224777
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8780221939086914,
      "learning_rate": 8.819559416634703e-07,
      "loss": 2.7663,
      "step": 224778
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.034815549850464,
      "learning_rate": 8.816425388735837e-07,
      "loss": 3.0122,
      "step": 224779
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.965134620666504,
      "learning_rate": 8.813291916954456e-07,
      "loss": 3.1917,
      "step": 224780
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.468023777008057,
      "learning_rate": 8.810159001291229e-07,
      "loss": 3.1551,
      "step": 224781
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.165257215499878,
      "learning_rate": 8.807026641746484e-07,
      "loss": 2.9425,
      "step": 224782
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.23337459564209,
      "learning_rate": 8.803894838320891e-07,
      "loss": 2.8261,
      "step": 224783
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7188024520874023,
      "learning_rate": 8.800763591015114e-07,
      "loss": 2.81,
      "step": 224784
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.7745511531829834,
      "learning_rate": 8.797632899830154e-07,
      "loss": 2.937,
      "step": 224785
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.532166004180908,
      "learning_rate": 8.794502764765676e-07,
      "loss": 2.9311,
      "step": 224786
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9359540939331055,
      "learning_rate": 8.791373185823014e-07,
      "loss": 2.9013,
      "step": 224787
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.256765604019165,
      "learning_rate": 8.7882441630025e-07,
      "loss": 3.0018,
      "step": 224788
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8001792430877686,
      "learning_rate": 8.785115696304468e-07,
      "loss": 2.6946,
      "step": 224789
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.8858327865600586,
      "learning_rate": 8.781987785730249e-07,
      "loss": 2.9692,
      "step": 224790
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0482380390167236,
      "learning_rate": 8.778860431279511e-07,
      "loss": 2.8052,
      "step": 224791
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.648773670196533,
      "learning_rate": 8.775733632953252e-07,
      "loss": 3.0409,
      "step": 224792
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.141247510910034,
      "learning_rate": 8.77260739075214e-07,
      "loss": 2.9978,
      "step": 224793
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.155622720718384,
      "learning_rate": 8.769481704676839e-07,
      "loss": 2.9658,
      "step": 224794
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.800571918487549,
      "learning_rate": 8.766356574727351e-07,
      "loss": 2.8828,
      "step": 224795
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.020916223526001,
      "learning_rate": 8.763232000905007e-07,
      "loss": 3.1104,
      "step": 224796
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8756086826324463,
      "learning_rate": 8.760107983210141e-07,
      "loss": 2.7603,
      "step": 224797
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0829098224639893,
      "learning_rate": 8.756984521642752e-07,
      "loss": 2.6945,
      "step": 224798
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8920397758483887,
      "learning_rate": 8.753861616204172e-07,
      "loss": 2.8898,
      "step": 224799
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.713897228240967,
      "learning_rate": 8.750739266894736e-07,
      "loss": 2.5597,
      "step": 224800
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6731112003326416,
      "learning_rate": 8.747617473715107e-07,
      "loss": 3.001,
      "step": 224801
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7043933868408203,
      "learning_rate": 8.744496236665622e-07,
      "loss": 2.9177,
      "step": 224802
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.983466625213623,
      "learning_rate": 8.741375555746944e-07,
      "loss": 2.6564,
      "step": 224803
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2141530513763428,
      "learning_rate": 8.738255430959739e-07,
      "loss": 2.8008,
      "step": 224804
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8996291160583496,
      "learning_rate": 8.735135862304676e-07,
      "loss": 3.0912,
      "step": 224805
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.015826940536499,
      "learning_rate": 8.732016849782086e-07,
      "loss": 2.8796,
      "step": 224806
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.164653778076172,
      "learning_rate": 8.728898393392636e-07,
      "loss": 3.0339,
      "step": 224807
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.4635982513427734,
      "learning_rate": 8.725780493136991e-07,
      "loss": 3.0806,
      "step": 224808
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.945064067840576,
      "learning_rate": 8.722663149015819e-07,
      "loss": 2.9113,
      "step": 224809
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6580443382263184,
      "learning_rate": 8.719546361029783e-07,
      "loss": 2.6076,
      "step": 224810
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.039777994155884,
      "learning_rate": 8.716430129178886e-07,
      "loss": 2.8811,
      "step": 224811
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.008898973464966,
      "learning_rate": 8.713314453464126e-07,
      "loss": 2.9275,
      "step": 224812
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.073944568634033,
      "learning_rate": 8.710199333885837e-07,
      "loss": 2.9506,
      "step": 224813
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.97161602973938,
      "learning_rate": 8.707084770445017e-07,
      "loss": 3.2256,
      "step": 224814
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.233970880508423,
      "learning_rate": 8.703970763141998e-07,
      "loss": 2.9954,
      "step": 224815
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.912611246109009,
      "learning_rate": 8.70085731197745e-07,
      "loss": 2.8179,
      "step": 224816
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5682098865509033,
      "learning_rate": 8.697744416951702e-07,
      "loss": 3.0185,
      "step": 224817
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.496584415435791,
      "learning_rate": 8.694632078065755e-07,
      "loss": 3.055,
      "step": 224818
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1596546173095703,
      "learning_rate": 8.69152029531961e-07,
      "loss": 2.8211,
      "step": 224819
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9217920303344727,
      "learning_rate": 8.688409068714264e-07,
      "loss": 2.909,
      "step": 224820
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.792593240737915,
      "learning_rate": 8.685298398250384e-07,
      "loss": 2.9975,
      "step": 224821
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.984553337097168,
      "learning_rate": 8.682188283928304e-07,
      "loss": 2.8729,
      "step": 224822
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.993687152862549,
      "learning_rate": 8.679078725748689e-07,
      "loss": 2.7532,
      "step": 224823
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.259779691696167,
      "learning_rate": 8.675969723712206e-07,
      "loss": 2.8826,
      "step": 224824
    },
    {
      "epoch": 2.93,
      "grad_norm": 5.346301078796387,
      "learning_rate": 8.672861277819188e-07,
      "loss": 2.7856,
      "step": 224825
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.009190797805786,
      "learning_rate": 8.669753388070299e-07,
      "loss": 2.8913,
      "step": 224826
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.8881492614746094,
      "learning_rate": 8.666646054466208e-07,
      "loss": 2.8604,
      "step": 224827
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6959595680236816,
      "learning_rate": 8.663539277007581e-07,
      "loss": 2.8705,
      "step": 224828
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.938772678375244,
      "learning_rate": 8.660433055694415e-07,
      "loss": 3.0512,
      "step": 224829
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1646177768707275,
      "learning_rate": 8.657327390528379e-07,
      "loss": 2.8862,
      "step": 224830
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3888254165649414,
      "learning_rate": 8.654222281509138e-07,
      "loss": 2.5734,
      "step": 224831
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.981504201889038,
      "learning_rate": 8.651117728637358e-07,
      "loss": 3.0315,
      "step": 224832
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.652038335800171,
      "learning_rate": 8.648013731914039e-07,
      "loss": 3.1653,
      "step": 224833
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.193993330001831,
      "learning_rate": 8.64491029133918e-07,
      "loss": 3.0037,
      "step": 224834
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7002947330474854,
      "learning_rate": 8.641807406914114e-07,
      "loss": 2.7568,
      "step": 224835
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.613443374633789,
      "learning_rate": 8.638705078638841e-07,
      "loss": 2.9459,
      "step": 224836
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7237062454223633,
      "learning_rate": 8.635603306514027e-07,
      "loss": 2.7772,
      "step": 224837
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.118567943572998,
      "learning_rate": 8.632502090540339e-07,
      "loss": 2.7883,
      "step": 224838
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.119612216949463,
      "learning_rate": 8.629401430718441e-07,
      "loss": 2.7501,
      "step": 224839
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1931538581848145,
      "learning_rate": 8.626301327048668e-07,
      "loss": 2.8869,
      "step": 224840
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3777546882629395,
      "learning_rate": 8.623201779532019e-07,
      "loss": 2.9677,
      "step": 224841
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.651416540145874,
      "learning_rate": 8.620102788168492e-07,
      "loss": 2.8755,
      "step": 224842
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7516133785247803,
      "learning_rate": 8.617004352959089e-07,
      "loss": 2.9026,
      "step": 224843
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.146367311477661,
      "learning_rate": 8.613906473904142e-07,
      "loss": 2.8997,
      "step": 224844
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3938255310058594,
      "learning_rate": 8.610809151004317e-07,
      "loss": 2.605,
      "step": 224845
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.482994318008423,
      "learning_rate": 8.607712384260279e-07,
      "loss": 2.881,
      "step": 224846
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.660102367401123,
      "learning_rate": 8.604616173672696e-07,
      "loss": 3.1644,
      "step": 224847
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7251763343811035,
      "learning_rate": 8.6015205192419e-07,
      "loss": 2.8477,
      "step": 224848
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8658218383789062,
      "learning_rate": 8.598425420968225e-07,
      "loss": 2.9165,
      "step": 224849
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1547324657440186,
      "learning_rate": 8.595330878853001e-07,
      "loss": 2.9707,
      "step": 224850
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5673775672912598,
      "learning_rate": 8.592236892896231e-07,
      "loss": 2.9014,
      "step": 224851
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.761561632156372,
      "learning_rate": 8.58914346309858e-07,
      "loss": 2.9597,
      "step": 224852
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8455331325531006,
      "learning_rate": 8.586050589460714e-07,
      "loss": 2.7292,
      "step": 224853
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.107391357421875,
      "learning_rate": 8.582958271982965e-07,
      "loss": 2.9323,
      "step": 224854
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3715851306915283,
      "learning_rate": 8.579866510666334e-07,
      "loss": 2.8687,
      "step": 224855
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.9147753715515137,
      "learning_rate": 8.576775305511152e-07,
      "loss": 2.6846,
      "step": 224856
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7900888919830322,
      "learning_rate": 8.573684656518087e-07,
      "loss": 3.0434,
      "step": 224857
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.727926254272461,
      "learning_rate": 8.570594563687472e-07,
      "loss": 3.118,
      "step": 224858
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3154244422912598,
      "learning_rate": 8.567505027020304e-07,
      "loss": 2.7963,
      "step": 224859
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.026639223098755,
      "learning_rate": 8.564416046516587e-07,
      "loss": 2.9858,
      "step": 224860
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.539874315261841,
      "learning_rate": 8.561327622177316e-07,
      "loss": 2.9637,
      "step": 224861
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.88835072517395,
      "learning_rate": 8.55823975400316e-07,
      "loss": 2.9762,
      "step": 224862
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.159867525100708,
      "learning_rate": 8.555152441994117e-07,
      "loss": 2.9432,
      "step": 224863
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.752448320388794,
      "learning_rate": 8.552065686151522e-07,
      "loss": 2.8274,
      "step": 224864
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6624720096588135,
      "learning_rate": 8.548979486475371e-07,
      "loss": 2.8406,
      "step": 224865
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.844313144683838,
      "learning_rate": 8.545893842966333e-07,
      "loss": 2.8063,
      "step": 224866
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9902706146240234,
      "learning_rate": 8.542808755625407e-07,
      "loss": 3.0998,
      "step": 224867
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.141253709793091,
      "learning_rate": 8.539724224452593e-07,
      "loss": 2.8998,
      "step": 224868
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.995042085647583,
      "learning_rate": 8.536640249448556e-07,
      "loss": 2.8789,
      "step": 224869
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0038986206054688,
      "learning_rate": 8.53355683061463e-07,
      "loss": 3.0332,
      "step": 224870
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8012237548828125,
      "learning_rate": 8.530473967950147e-07,
      "loss": 2.9602,
      "step": 224871
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.609065532684326,
      "learning_rate": 8.527391661456773e-07,
      "loss": 2.7357,
      "step": 224872
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7957956790924072,
      "learning_rate": 8.524309911134175e-07,
      "loss": 2.9371,
      "step": 224873
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2331604957580566,
      "learning_rate": 8.521228716983685e-07,
      "loss": 2.7686,
      "step": 224874
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.4535346031188965,
      "learning_rate": 8.518148079005305e-07,
      "loss": 3.166,
      "step": 224875
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8519628047943115,
      "learning_rate": 8.515067997200365e-07,
      "loss": 3.0449,
      "step": 224876
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9239091873168945,
      "learning_rate": 8.511988471568532e-07,
      "loss": 2.6846,
      "step": 224877
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.983374834060669,
      "learning_rate": 8.508909502110806e-07,
      "loss": 2.9746,
      "step": 224878
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.4946086406707764,
      "learning_rate": 8.505831088827852e-07,
      "loss": 3.2608,
      "step": 224879
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.593754529953003,
      "learning_rate": 8.502753231720005e-07,
      "loss": 2.4288,
      "step": 224880
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.313314914703369,
      "learning_rate": 8.499675930788264e-07,
      "loss": 2.8948,
      "step": 224881
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5266048908233643,
      "learning_rate": 8.496599186032627e-07,
      "loss": 2.9173,
      "step": 224882
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.794267177581787,
      "learning_rate": 8.493522997454094e-07,
      "loss": 2.7536,
      "step": 224883
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9112493991851807,
      "learning_rate": 8.490447365053e-07,
      "loss": 2.6658,
      "step": 224884
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6515066623687744,
      "learning_rate": 8.487372288830008e-07,
      "loss": 2.991,
      "step": 224885
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.775744915008545,
      "learning_rate": 8.484297768785786e-07,
      "loss": 2.8463,
      "step": 224886
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.089902639389038,
      "learning_rate": 8.481223804920667e-07,
      "loss": 2.8082,
      "step": 224887
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.6506175994873047,
      "learning_rate": 8.478150397235317e-07,
      "loss": 2.8837,
      "step": 224888
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.595426082611084,
      "learning_rate": 8.475077545730402e-07,
      "loss": 2.9542,
      "step": 224889
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8778622150421143,
      "learning_rate": 8.472005250406589e-07,
      "loss": 2.9541,
      "step": 224890
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.712291955947876,
      "learning_rate": 8.468933511264208e-07,
      "loss": 3.0621,
      "step": 224891
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7817957401275635,
      "learning_rate": 8.46586232830393e-07,
      "loss": 2.7905,
      "step": 224892
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.490687131881714,
      "learning_rate": 8.462791701526084e-07,
      "loss": 2.9797,
      "step": 224893
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.747823476791382,
      "learning_rate": 8.45972163093167e-07,
      "loss": 2.8547,
      "step": 224894
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.819244861602783,
      "learning_rate": 8.456652116521023e-07,
      "loss": 3.0359,
      "step": 224895
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9168920516967773,
      "learning_rate": 8.453583158294808e-07,
      "loss": 2.7621,
      "step": 224896
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1890246868133545,
      "learning_rate": 8.45051475625369e-07,
      "loss": 3.0971,
      "step": 224897
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.37734055519104,
      "learning_rate": 8.447446910398003e-07,
      "loss": 2.9303,
      "step": 224898
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8809897899627686,
      "learning_rate": 8.444379620728081e-07,
      "loss": 2.7474,
      "step": 224899
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3629677295684814,
      "learning_rate": 8.441312887245255e-07,
      "loss": 2.9173,
      "step": 224900
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.95755934715271,
      "learning_rate": 8.438246709949525e-07,
      "loss": 2.7186,
      "step": 224901
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.906810998916626,
      "learning_rate": 8.435181088841559e-07,
      "loss": 2.8436,
      "step": 224902
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.722825288772583,
      "learning_rate": 8.432116023921687e-07,
      "loss": 2.6479,
      "step": 224903
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.384280204772949,
      "learning_rate": 8.429051515191243e-07,
      "loss": 2.8807,
      "step": 224904
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8769142627716064,
      "learning_rate": 8.425987562649894e-07,
      "loss": 2.8134,
      "step": 224905
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8370473384857178,
      "learning_rate": 8.422924166298972e-07,
      "loss": 2.7692,
      "step": 224906
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8134028911590576,
      "learning_rate": 8.419861326138478e-07,
      "loss": 2.6362,
      "step": 224907
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.617929220199585,
      "learning_rate": 8.416799042169075e-07,
      "loss": 2.9285,
      "step": 224908
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.327757835388184,
      "learning_rate": 8.413737314391766e-07,
      "loss": 3.0395,
      "step": 224909
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2638678550720215,
      "learning_rate": 8.41067614280655e-07,
      "loss": 2.884,
      "step": 224910
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0336978435516357,
      "learning_rate": 8.407615527414424e-07,
      "loss": 2.8789,
      "step": 224911
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.6018807888031006,
      "learning_rate": 8.404555468215723e-07,
      "loss": 2.7915,
      "step": 224912
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.913820505142212,
      "learning_rate": 8.401495965211447e-07,
      "loss": 3.0493,
      "step": 224913
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.322866439819336,
      "learning_rate": 8.39843701840126e-07,
      "loss": 2.724,
      "step": 224914
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.7998178005218506,
      "learning_rate": 8.395378627786497e-07,
      "loss": 2.7769,
      "step": 224915
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5965189933776855,
      "learning_rate": 8.392320793367491e-07,
      "loss": 3.0405,
      "step": 224916
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7121851444244385,
      "learning_rate": 8.389263515144906e-07,
      "loss": 2.892,
      "step": 224917
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.228919267654419,
      "learning_rate": 8.386206793119077e-07,
      "loss": 3.0129,
      "step": 224918
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1523265838623047,
      "learning_rate": 8.383150627291002e-07,
      "loss": 2.9063,
      "step": 224919
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8163673877716064,
      "learning_rate": 8.380095017660682e-07,
      "loss": 2.9665,
      "step": 224920
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.3655872344970703,
      "learning_rate": 8.377039964229115e-07,
      "loss": 2.546,
      "step": 224921
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.12115216255188,
      "learning_rate": 8.373985466996635e-07,
      "loss": 2.9449,
      "step": 224922
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5806288719177246,
      "learning_rate": 8.370931525963909e-07,
      "loss": 2.7853,
      "step": 224923
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3939082622528076,
      "learning_rate": 8.367878141131601e-07,
      "loss": 2.7972,
      "step": 224924
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7273125648498535,
      "learning_rate": 8.364825312500045e-07,
      "loss": 3.0338,
      "step": 224925
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.130197286605835,
      "learning_rate": 8.361773040070241e-07,
      "loss": 2.8885,
      "step": 224926
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8531038761138916,
      "learning_rate": 8.358721323842188e-07,
      "loss": 3.0288,
      "step": 224927
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3416030406951904,
      "learning_rate": 8.355670163816552e-07,
      "loss": 2.6994,
      "step": 224928
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9031779766082764,
      "learning_rate": 8.352619559994334e-07,
      "loss": 3.0243,
      "step": 224929
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.918630361557007,
      "learning_rate": 8.349569512375865e-07,
      "loss": 2.8635,
      "step": 224930
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5691287517547607,
      "learning_rate": 8.346520020961478e-07,
      "loss": 2.9078,
      "step": 224931
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.126802682876587,
      "learning_rate": 8.343471085752173e-07,
      "loss": 2.9696,
      "step": 224932
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5679197311401367,
      "learning_rate": 8.340422706748284e-07,
      "loss": 2.9063,
      "step": 224933
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.973440647125244,
      "learning_rate": 8.337374883950143e-07,
      "loss": 2.9674,
      "step": 224934
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9107277393341064,
      "learning_rate": 8.334327617358749e-07,
      "loss": 3.032,
      "step": 224935
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7706453800201416,
      "learning_rate": 8.331280906974436e-07,
      "loss": 3.2692,
      "step": 224936
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.8454861640930176,
      "learning_rate": 8.328234752797869e-07,
      "loss": 2.8298,
      "step": 224937
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.6092915534973145,
      "learning_rate": 8.325189154829381e-07,
      "loss": 2.9806,
      "step": 224938
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.679184675216675,
      "learning_rate": 8.32214411306964e-07,
      "loss": 3.0105,
      "step": 224939
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.200130462646484,
      "learning_rate": 8.319099627519644e-07,
      "loss": 2.8986,
      "step": 224940
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.378749132156372,
      "learning_rate": 8.316055698179392e-07,
      "loss": 2.6549,
      "step": 224941
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0292763710021973,
      "learning_rate": 8.313012325049551e-07,
      "loss": 2.979,
      "step": 224942
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.7724833488464355,
      "learning_rate": 8.30996950813112e-07,
      "loss": 2.883,
      "step": 224943
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.634951114654541,
      "learning_rate": 8.3069272474241e-07,
      "loss": 2.9084,
      "step": 224944
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7148969173431396,
      "learning_rate": 8.303885542929156e-07,
      "loss": 2.9723,
      "step": 224945
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.9912309646606445,
      "learning_rate": 8.300844394647288e-07,
      "loss": 3.0325,
      "step": 224946
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0090651512145996,
      "learning_rate": 8.297803802578495e-07,
      "loss": 2.9394,
      "step": 224947
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1351194381713867,
      "learning_rate": 8.294763766723777e-07,
      "loss": 2.7029,
      "step": 224948
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.721665143966675,
      "learning_rate": 8.291724287083801e-07,
      "loss": 2.805,
      "step": 224949
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.7182116508483887,
      "learning_rate": 8.288685363658232e-07,
      "loss": 3.0142,
      "step": 224950
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3852860927581787,
      "learning_rate": 8.285646996448736e-07,
      "loss": 2.8818,
      "step": 224951
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.290825366973877,
      "learning_rate": 8.282609185455313e-07,
      "loss": 2.6875,
      "step": 224952
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.170973777770996,
      "learning_rate": 8.27957193067863e-07,
      "loss": 3.112,
      "step": 224953
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.07552433013916,
      "learning_rate": 8.276535232119019e-07,
      "loss": 2.6655,
      "step": 224954
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9024574756622314,
      "learning_rate": 8.273499089777813e-07,
      "loss": 3.1738,
      "step": 224955
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8909084796905518,
      "learning_rate": 8.270463503654678e-07,
      "loss": 2.935,
      "step": 224956
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6834771633148193,
      "learning_rate": 8.267428473750615e-07,
      "loss": 2.9989,
      "step": 224957
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.919833183288574,
      "learning_rate": 8.264394000065954e-07,
      "loss": 2.9166,
      "step": 224958
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.930319309234619,
      "learning_rate": 8.261360082601365e-07,
      "loss": 2.9212,
      "step": 224959
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.019073486328125,
      "learning_rate": 8.258326721357511e-07,
      "loss": 2.9991,
      "step": 224960
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0606164932250977,
      "learning_rate": 8.255293916335392e-07,
      "loss": 2.9381,
      "step": 224961
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.035557270050049,
      "learning_rate": 8.252261667534677e-07,
      "loss": 2.809,
      "step": 224962
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0950520038604736,
      "learning_rate": 8.249229974956362e-07,
      "loss": 3.0891,
      "step": 224963
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.832632541656494,
      "learning_rate": 8.246198838600781e-07,
      "loss": 2.9902,
      "step": 224964
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.07326078414917,
      "learning_rate": 8.243168258468935e-07,
      "loss": 3.1183,
      "step": 224965
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.935422658920288,
      "learning_rate": 8.240138234561155e-07,
      "loss": 3.1679,
      "step": 224966
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.605008125305176,
      "learning_rate": 8.237108766878109e-07,
      "loss": 2.7631,
      "step": 224967
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.013808012008667,
      "learning_rate": 8.234079855420128e-07,
      "loss": 2.9131,
      "step": 224968
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.089318037033081,
      "learning_rate": 8.23105150018788e-07,
      "loss": 3.064,
      "step": 224969
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.934081792831421,
      "learning_rate": 8.228023701182029e-07,
      "loss": 2.9821,
      "step": 224970
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.804278612136841,
      "learning_rate": 8.22499645840291e-07,
      "loss": 2.9989,
      "step": 224971
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.821465253829956,
      "learning_rate": 8.221969771851522e-07,
      "loss": 2.7707,
      "step": 224972
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.823594570159912,
      "learning_rate": 8.218943641528197e-07,
      "loss": 2.7876,
      "step": 224973
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.759622097015381,
      "learning_rate": 8.215918067432936e-07,
      "loss": 2.7992,
      "step": 224974
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9551994800567627,
      "learning_rate": 8.212893049567071e-07,
      "loss": 2.9583,
      "step": 224975
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.845301866531372,
      "learning_rate": 8.209868587930935e-07,
      "loss": 2.87,
      "step": 224976
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6857786178588867,
      "learning_rate": 8.206844682525193e-07,
      "loss": 2.8266,
      "step": 224977
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.747170925140381,
      "learning_rate": 8.20382133335018e-07,
      "loss": 2.6798,
      "step": 224978
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7421863079071045,
      "learning_rate": 8.200798540406562e-07,
      "loss": 2.9098,
      "step": 224979
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6823768615722656,
      "learning_rate": 8.19777630369467e-07,
      "loss": 2.5738,
      "step": 224980
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6732122898101807,
      "learning_rate": 8.194754623215505e-07,
      "loss": 2.7847,
      "step": 224981
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3289546966552734,
      "learning_rate": 8.191733498969399e-07,
      "loss": 3.1508,
      "step": 224982
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6212339401245117,
      "learning_rate": 8.18871293095702e-07,
      "loss": 2.7662,
      "step": 224983
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1881096363067627,
      "learning_rate": 8.185692919178366e-07,
      "loss": 2.6704,
      "step": 224984
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.847902297973633,
      "learning_rate": 8.182673463635103e-07,
      "loss": 2.9246,
      "step": 224985
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1925594806671143,
      "learning_rate": 8.179654564326898e-07,
      "loss": 2.8229,
      "step": 224986
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1786375045776367,
      "learning_rate": 8.176636221254418e-07,
      "loss": 2.7903,
      "step": 224987
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.863513708114624,
      "learning_rate": 8.173618434418328e-07,
      "loss": 2.7352,
      "step": 224988
    },
    {
      "epoch": 2.93,
      "grad_norm": 5.701101303100586,
      "learning_rate": 8.170601203819627e-07,
      "loss": 3.0743,
      "step": 224989
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7122485637664795,
      "learning_rate": 8.167584529457983e-07,
      "loss": 2.8814,
      "step": 224990
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.951326608657837,
      "learning_rate": 8.164568411334727e-07,
      "loss": 2.7153,
      "step": 224991
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.6612725257873535,
      "learning_rate": 8.161552849450192e-07,
      "loss": 3.2137,
      "step": 224992
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1275179386138916,
      "learning_rate": 8.158537843805046e-07,
      "loss": 3.0635,
      "step": 224993
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9184467792510986,
      "learning_rate": 8.155523394399288e-07,
      "loss": 3.1264,
      "step": 224994
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.96626615524292,
      "learning_rate": 8.15250950123425e-07,
      "loss": 3.0618,
      "step": 224995
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.066643476486206,
      "learning_rate": 8.149496164309932e-07,
      "loss": 2.8158,
      "step": 224996
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.5048561096191406,
      "learning_rate": 8.146483383627334e-07,
      "loss": 3.1024,
      "step": 224997
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6159615516662598,
      "learning_rate": 8.143471159186787e-07,
      "loss": 3.091,
      "step": 224998
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1177549362182617,
      "learning_rate": 8.140459490988627e-07,
      "loss": 3.3112,
      "step": 224999
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.405029296875,
      "learning_rate": 8.137448379033518e-07,
      "loss": 3.1011,
      "step": 225000
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8226120471954346,
      "learning_rate": 8.134437823322459e-07,
      "loss": 2.787,
      "step": 225001
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.83124041557312,
      "learning_rate": 8.131427823855452e-07,
      "loss": 2.8356,
      "step": 225002
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3199565410614014,
      "learning_rate": 8.128418380633162e-07,
      "loss": 2.9782,
      "step": 225003
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.159029722213745,
      "learning_rate": 8.125409493656588e-07,
      "loss": 2.8269,
      "step": 225004
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.092130184173584,
      "learning_rate": 8.122401162925729e-07,
      "loss": 2.9816,
      "step": 225005
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3910725116729736,
      "learning_rate": 8.119393388441586e-07,
      "loss": 2.8606,
      "step": 225006
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0835587978363037,
      "learning_rate": 8.116386170204492e-07,
      "loss": 2.8968,
      "step": 225007
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2761924266815186,
      "learning_rate": 8.113379508215112e-07,
      "loss": 2.9549,
      "step": 225008
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7578635215759277,
      "learning_rate": 8.11037340247378e-07,
      "loss": 3.089,
      "step": 225009
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3311727046966553,
      "learning_rate": 8.107367852981494e-07,
      "loss": 2.8668,
      "step": 225010
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.944087266921997,
      "learning_rate": 8.104362859737923e-07,
      "loss": 2.7929,
      "step": 225011
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2858433723449707,
      "learning_rate": 8.101358422744731e-07,
      "loss": 2.9675,
      "step": 225012
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.724111318588257,
      "learning_rate": 8.098354542001917e-07,
      "loss": 3.1126,
      "step": 225013
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.4664483070373535,
      "learning_rate": 8.095351217509816e-07,
      "loss": 2.8319,
      "step": 225014
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1355793476104736,
      "learning_rate": 8.092348449269759e-07,
      "loss": 3.0312,
      "step": 225015
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2330374717712402,
      "learning_rate": 8.089346237281413e-07,
      "loss": 2.9386,
      "step": 225016
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.8873605728149414,
      "learning_rate": 8.086344581545778e-07,
      "loss": 2.8689,
      "step": 225017
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.070615768432617,
      "learning_rate": 8.08334348206352e-07,
      "loss": 2.9349,
      "step": 225018
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1677944660186768,
      "learning_rate": 8.080342938834972e-07,
      "loss": 2.9662,
      "step": 225019
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.994030475616455,
      "learning_rate": 8.077342951860799e-07,
      "loss": 2.599,
      "step": 225020
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0579514503479004,
      "learning_rate": 8.074343521141335e-07,
      "loss": 2.7449,
      "step": 225021
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.924592971801758,
      "learning_rate": 8.07134464667758e-07,
      "loss": 2.7155,
      "step": 225022
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.89048171043396,
      "learning_rate": 8.068346328469532e-07,
      "loss": 2.9844,
      "step": 225023
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8725035190582275,
      "learning_rate": 8.065348566518193e-07,
      "loss": 2.6737,
      "step": 225024
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.064316272735596,
      "learning_rate": 8.062351360824226e-07,
      "loss": 2.8386,
      "step": 225025
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.5221071243286133,
      "learning_rate": 8.059354711387634e-07,
      "loss": 2.8304,
      "step": 225026
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.310377359390259,
      "learning_rate": 8.056358618209413e-07,
      "loss": 2.9424,
      "step": 225027
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0825819969177246,
      "learning_rate": 8.053363081290232e-07,
      "loss": 3.0361,
      "step": 225028
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.325472831726074,
      "learning_rate": 8.050368100630089e-07,
      "loss": 3.0112,
      "step": 225029
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2824084758758545,
      "learning_rate": 8.047373676229985e-07,
      "loss": 2.998,
      "step": 225030
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.8133208751678467,
      "learning_rate": 8.044379808089919e-07,
      "loss": 3.0214,
      "step": 225031
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7413489818573,
      "learning_rate": 8.041386496211555e-07,
      "loss": 2.8214,
      "step": 225032
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.236888885498047,
      "learning_rate": 8.038393740594229e-07,
      "loss": 3.0392,
      "step": 225033
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.685506343841553,
      "learning_rate": 8.035401541239605e-07,
      "loss": 3.0644,
      "step": 225034
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.8825314044952393,
      "learning_rate": 8.032409898147352e-07,
      "loss": 2.8108,
      "step": 225035
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3689072132110596,
      "learning_rate": 8.029418811318133e-07,
      "loss": 2.8866,
      "step": 225036
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.013453722000122,
      "learning_rate": 8.02642828075295e-07,
      "loss": 2.9188,
      "step": 225037
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8841731548309326,
      "learning_rate": 8.023438306452135e-07,
      "loss": 3.1166,
      "step": 225038
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.099276542663574,
      "learning_rate": 8.020448888416353e-07,
      "loss": 3.0951,
      "step": 225039
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.69643235206604,
      "learning_rate": 8.017460026645939e-07,
      "loss": 3.1672,
      "step": 225040
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.087932586669922,
      "learning_rate": 8.014471721141558e-07,
      "loss": 2.9681,
      "step": 225041
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0093021392822266,
      "learning_rate": 8.011483971903875e-07,
      "loss": 2.8889,
      "step": 225042
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.803595781326294,
      "learning_rate": 8.008496778933227e-07,
      "loss": 2.9883,
      "step": 225043
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.633321523666382,
      "learning_rate": 8.005510142230276e-07,
      "loss": 2.8197,
      "step": 225044
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.112442970275879,
      "learning_rate": 8.002524061795356e-07,
      "loss": 2.9517,
      "step": 225045
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1471424102783203,
      "learning_rate": 7.999538537629469e-07,
      "loss": 2.8806,
      "step": 225046
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0348825454711914,
      "learning_rate": 7.996553569732944e-07,
      "loss": 2.9961,
      "step": 225047
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.975254774093628,
      "learning_rate": 7.99356915810645e-07,
      "loss": 2.9831,
      "step": 225048
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7649648189544678,
      "learning_rate": 7.990585302750318e-07,
      "loss": 3.0253,
      "step": 225049
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.951813220977783,
      "learning_rate": 7.987602003665217e-07,
      "loss": 3.202,
      "step": 225050
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.011930465698242,
      "learning_rate": 7.984619260851477e-07,
      "loss": 3.1797,
      "step": 225051
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0476901531219482,
      "learning_rate": 7.981637074310432e-07,
      "loss": 2.8722,
      "step": 225052
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.794358968734741,
      "learning_rate": 7.978655444041749e-07,
      "loss": 2.9551,
      "step": 225053
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.7031381130218506,
      "learning_rate": 7.975674370046093e-07,
      "loss": 2.7522,
      "step": 225054
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.5752925872802734,
      "learning_rate": 7.972693852324464e-07,
      "loss": 2.8582,
      "step": 225055
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.849635124206543,
      "learning_rate": 7.969713890877194e-07,
      "loss": 2.8632,
      "step": 225056
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.037152051925659,
      "learning_rate": 7.966734485704951e-07,
      "loss": 2.5401,
      "step": 225057
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6883225440979004,
      "learning_rate": 7.963755636808066e-07,
      "loss": 2.7207,
      "step": 225058
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.827789068222046,
      "learning_rate": 7.960777344187208e-07,
      "loss": 2.8445,
      "step": 225059
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0364859104156494,
      "learning_rate": 7.957799607842708e-07,
      "loss": 2.8569,
      "step": 225060
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.232232093811035,
      "learning_rate": 7.954822427775565e-07,
      "loss": 3.0986,
      "step": 225061
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.132500410079956,
      "learning_rate": 7.951845803986113e-07,
      "loss": 2.9743,
      "step": 225062
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0160136222839355,
      "learning_rate": 7.948869736474684e-07,
      "loss": 2.6022,
      "step": 225063
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.646833658218384,
      "learning_rate": 7.945894225242278e-07,
      "loss": 2.8152,
      "step": 225064
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.117022752761841,
      "learning_rate": 7.942919270288894e-07,
      "loss": 2.8395,
      "step": 225065
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2228360176086426,
      "learning_rate": 7.939944871615866e-07,
      "loss": 2.984,
      "step": 225066
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8190367221832275,
      "learning_rate": 7.936971029222861e-07,
      "loss": 3.0169,
      "step": 225067
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7995734214782715,
      "learning_rate": 7.933997743111209e-07,
      "loss": 2.8735,
      "step": 225068
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1948964595794678,
      "learning_rate": 7.931025013280578e-07,
      "loss": 2.9995,
      "step": 225069
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7733142375946045,
      "learning_rate": 7.928052839732635e-07,
      "loss": 2.7311,
      "step": 225070
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6003291606903076,
      "learning_rate": 7.925081222467044e-07,
      "loss": 2.9508,
      "step": 225071
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5951950550079346,
      "learning_rate": 7.922110161484807e-07,
      "loss": 2.9626,
      "step": 225072
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.01944637298584,
      "learning_rate": 7.919139656786256e-07,
      "loss": 2.7338,
      "step": 225073
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0088512897491455,
      "learning_rate": 7.916169708372056e-07,
      "loss": 2.8382,
      "step": 225074
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2760963439941406,
      "learning_rate": 7.91320031624254e-07,
      "loss": 2.928,
      "step": 225075
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.852097511291504,
      "learning_rate": 7.910231480398377e-07,
      "loss": 2.8872,
      "step": 225076
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.946065664291382,
      "learning_rate": 7.907263200840564e-07,
      "loss": 3.1178,
      "step": 225077
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1706314086914062,
      "learning_rate": 7.9042954775691e-07,
      "loss": 2.7915,
      "step": 225078
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2727861404418945,
      "learning_rate": 7.901328310584653e-07,
      "loss": 2.9988,
      "step": 225079
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2481741905212402,
      "learning_rate": 7.89836169988789e-07,
      "loss": 2.982,
      "step": 225080
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6728880405426025,
      "learning_rate": 7.895395645479141e-07,
      "loss": 2.7389,
      "step": 225081
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0970230102539062,
      "learning_rate": 7.892430147359407e-07,
      "loss": 2.9926,
      "step": 225082
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.5703556537628174,
      "learning_rate": 7.889465205528688e-07,
      "loss": 3.0467,
      "step": 225083
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.462125301361084,
      "learning_rate": 7.886500819987984e-07,
      "loss": 2.9359,
      "step": 225084
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7228729724884033,
      "learning_rate": 7.883536990737627e-07,
      "loss": 2.7683,
      "step": 225085
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.789328098297119,
      "learning_rate": 7.88057371777795e-07,
      "loss": 2.988,
      "step": 225086
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.18802547454834,
      "learning_rate": 7.877611001109951e-07,
      "loss": 2.8209,
      "step": 225087
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9434783458709717,
      "learning_rate": 7.874648840733965e-07,
      "loss": 3.1212,
      "step": 225088
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.978732109069824,
      "learning_rate": 7.871687236650659e-07,
      "loss": 2.8106,
      "step": 225089
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2737414836883545,
      "learning_rate": 7.868726188860364e-07,
      "loss": 2.8947,
      "step": 225090
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.361326217651367,
      "learning_rate": 7.865765697363746e-07,
      "loss": 2.919,
      "step": 225091
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.053471326828003,
      "learning_rate": 7.862805762161473e-07,
      "loss": 2.8156,
      "step": 225092
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9045369625091553,
      "learning_rate": 7.859846383253876e-07,
      "loss": 2.7756,
      "step": 225093
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8382468223571777,
      "learning_rate": 7.856887560641956e-07,
      "loss": 2.9715,
      "step": 225094
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1910548210144043,
      "learning_rate": 7.853929294325712e-07,
      "loss": 2.9445,
      "step": 225095
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.202613353729248,
      "learning_rate": 7.850971584305477e-07,
      "loss": 2.515,
      "step": 225096
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7672150135040283,
      "learning_rate": 7.848014430582583e-07,
      "loss": 2.7959,
      "step": 225097
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.060306072235107,
      "learning_rate": 7.845057833157365e-07,
      "loss": 2.9686,
      "step": 225098
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6810081005096436,
      "learning_rate": 7.842101792030153e-07,
      "loss": 2.7698,
      "step": 225099
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.237785816192627,
      "learning_rate": 7.839146307201615e-07,
      "loss": 2.9703,
      "step": 225100
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.259114980697632,
      "learning_rate": 7.836191378672085e-07,
      "loss": 2.7833,
      "step": 225101
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9957821369171143,
      "learning_rate": 7.833237006442227e-07,
      "loss": 2.7691,
      "step": 225102
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.6131694316864014,
      "learning_rate": 7.83028319051271e-07,
      "loss": 2.8994,
      "step": 225103
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.841567277908325,
      "learning_rate": 7.827329930884196e-07,
      "loss": 3.0542,
      "step": 225104
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.819164752960205,
      "learning_rate": 7.824377227557022e-07,
      "loss": 3.1123,
      "step": 225105
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.586326837539673,
      "learning_rate": 7.821425080531518e-07,
      "loss": 2.8033,
      "step": 225106
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.046298027038574,
      "learning_rate": 7.818473489809018e-07,
      "loss": 3.0796,
      "step": 225107
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.522742748260498,
      "learning_rate": 7.815522455389189e-07,
      "loss": 2.8626,
      "step": 225108
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.677173376083374,
      "learning_rate": 7.81257197727303e-07,
      "loss": 2.8689,
      "step": 225109
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1281492710113525,
      "learning_rate": 7.809622055460874e-07,
      "loss": 2.9547,
      "step": 225110
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2144277095794678,
      "learning_rate": 7.806672689953386e-07,
      "loss": 3.2059,
      "step": 225111
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1627514362335205,
      "learning_rate": 7.803723880751233e-07,
      "loss": 3.1344,
      "step": 225112
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7924740314483643,
      "learning_rate": 7.800775627854749e-07,
      "loss": 3.0646,
      "step": 225113
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9439425468444824,
      "learning_rate": 7.7978279312646e-07,
      "loss": 3.0285,
      "step": 225114
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2035257816314697,
      "learning_rate": 7.79488079098145e-07,
      "loss": 3.0515,
      "step": 225115
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.678380012512207,
      "learning_rate": 7.791934207005634e-07,
      "loss": 2.9337,
      "step": 225116
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.774172306060791,
      "learning_rate": 7.788988179337818e-07,
      "loss": 3.1817,
      "step": 225117
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.075631618499756,
      "learning_rate": 7.786042707978335e-07,
      "loss": 2.9327,
      "step": 225118
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9985294342041016,
      "learning_rate": 7.783097792928184e-07,
      "loss": 3.2846,
      "step": 225119
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.871157646179199,
      "learning_rate": 7.780153434187364e-07,
      "loss": 2.9627,
      "step": 225120
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0390515327453613,
      "learning_rate": 7.777209631756876e-07,
      "loss": 2.7225,
      "step": 225121
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9586007595062256,
      "learning_rate": 7.774266385637051e-07,
      "loss": 3.1028,
      "step": 225122
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8914437294006348,
      "learning_rate": 7.771323695828557e-07,
      "loss": 3.0114,
      "step": 225123
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.028008460998535,
      "learning_rate": 7.768381562331394e-07,
      "loss": 3.138,
      "step": 225124
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7923104763031006,
      "learning_rate": 7.765439985147226e-07,
      "loss": 3.1184,
      "step": 225125
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.883627414703369,
      "learning_rate": 7.762498964275721e-07,
      "loss": 2.9178,
      "step": 225126
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.930539846420288,
      "learning_rate": 7.759558499717544e-07,
      "loss": 2.8818,
      "step": 225127
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.918645143508911,
      "learning_rate": 7.756618591473363e-07,
      "loss": 3.1317,
      "step": 225128
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.29144024848938,
      "learning_rate": 7.753679239543509e-07,
      "loss": 3.0495,
      "step": 225129
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6333093643188477,
      "learning_rate": 7.750740443928982e-07,
      "loss": 2.926,
      "step": 225130
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.282537937164307,
      "learning_rate": 7.747802204630116e-07,
      "loss": 2.8016,
      "step": 225131
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0831615924835205,
      "learning_rate": 7.744864521647242e-07,
      "loss": 2.9949,
      "step": 225132
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.721280336380005,
      "learning_rate": 7.741927394981362e-07,
      "loss": 3.0094,
      "step": 225133
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.677612543106079,
      "learning_rate": 7.738990824632474e-07,
      "loss": 2.9153,
      "step": 225134
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.233678102493286,
      "learning_rate": 7.736054810601577e-07,
      "loss": 3.0217,
      "step": 225135
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3392691612243652,
      "learning_rate": 7.733119352888672e-07,
      "loss": 2.7891,
      "step": 225136
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.677011013031006,
      "learning_rate": 7.730184451495092e-07,
      "loss": 2.6397,
      "step": 225137
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.310105800628662,
      "learning_rate": 7.727250106420835e-07,
      "loss": 2.8846,
      "step": 225138
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0343177318573,
      "learning_rate": 7.724316317666568e-07,
      "loss": 2.881,
      "step": 225139
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9430317878723145,
      "learning_rate": 7.721383085232624e-07,
      "loss": 3.0093,
      "step": 225140
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.116049289703369,
      "learning_rate": 7.718450409120002e-07,
      "loss": 3.0438,
      "step": 225141
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0146617889404297,
      "learning_rate": 7.715518289329037e-07,
      "loss": 2.8359,
      "step": 225142
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7937114238739014,
      "learning_rate": 7.712586725860059e-07,
      "loss": 2.8967,
      "step": 225143
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7907187938690186,
      "learning_rate": 7.709655718713737e-07,
      "loss": 2.7778,
      "step": 225144
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.798384189605713,
      "learning_rate": 7.706725267891068e-07,
      "loss": 2.8784,
      "step": 225145
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7917685508728027,
      "learning_rate": 7.703795373391719e-07,
      "loss": 2.8354,
      "step": 225146
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.932995557785034,
      "learning_rate": 7.700866035217024e-07,
      "loss": 2.6964,
      "step": 225147
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6672637462615967,
      "learning_rate": 7.697937253366982e-07,
      "loss": 2.841,
      "step": 225148
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.852792739868164,
      "learning_rate": 7.695009027842591e-07,
      "loss": 3.0956,
      "step": 225149
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0156242847442627,
      "learning_rate": 7.692081358644187e-07,
      "loss": 2.8524,
      "step": 225150
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.140200614929199,
      "learning_rate": 7.6891542457721e-07,
      "loss": 2.8157,
      "step": 225151
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3065240383148193,
      "learning_rate": 7.686227689227331e-07,
      "loss": 2.9989,
      "step": 225152
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2081458568573,
      "learning_rate": 7.683301689010213e-07,
      "loss": 2.7885,
      "step": 225153
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1361758708953857,
      "learning_rate": 7.680376245120745e-07,
      "loss": 2.8695,
      "step": 225154
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.736827850341797,
      "learning_rate": 7.677451357560593e-07,
      "loss": 3.1277,
      "step": 225155
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.308771848678589,
      "learning_rate": 7.674527026329424e-07,
      "loss": 2.7974,
      "step": 225156
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2426865100860596,
      "learning_rate": 7.671603251428237e-07,
      "loss": 3.0241,
      "step": 225157
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.042011022567749,
      "learning_rate": 7.668680032857033e-07,
      "loss": 3.1319,
      "step": 225158
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6094439029693604,
      "learning_rate": 7.66575737061681e-07,
      "loss": 2.8923,
      "step": 225159
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8047335147857666,
      "learning_rate": 7.662835264707901e-07,
      "loss": 2.885,
      "step": 225160
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.763169288635254,
      "learning_rate": 7.659913715131305e-07,
      "loss": 2.8928,
      "step": 225161
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.925358533859253,
      "learning_rate": 7.656992721887024e-07,
      "loss": 2.9576,
      "step": 225162
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7429916858673096,
      "learning_rate": 7.654072284976054e-07,
      "loss": 2.8037,
      "step": 225163
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.186434030532837,
      "learning_rate": 7.651152404398064e-07,
      "loss": 2.8301,
      "step": 225164
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.261460304260254,
      "learning_rate": 7.648233080154719e-07,
      "loss": 2.8419,
      "step": 225165
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.723461866378784,
      "learning_rate": 7.64531431224602e-07,
      "loss": 2.8058,
      "step": 225166
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.448434591293335,
      "learning_rate": 7.642396100672299e-07,
      "loss": 3.0298,
      "step": 225167
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.518308401107788,
      "learning_rate": 7.639478445434555e-07,
      "loss": 3.0209,
      "step": 225168
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9090189933776855,
      "learning_rate": 7.636561346533121e-07,
      "loss": 2.8884,
      "step": 225169
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0247466564178467,
      "learning_rate": 7.633644803968331e-07,
      "loss": 2.897,
      "step": 225170
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.743745803833008,
      "learning_rate": 7.630728817741183e-07,
      "loss": 2.8684,
      "step": 225171
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7286293506622314,
      "learning_rate": 7.627813387851678e-07,
      "loss": 2.8833,
      "step": 225172
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9927704334259033,
      "learning_rate": 7.624898514300814e-07,
      "loss": 2.9998,
      "step": 225173
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.146317958831787,
      "learning_rate": 7.621984197088926e-07,
      "loss": 3.0832,
      "step": 225174
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.867682456970215,
      "learning_rate": 7.619070436216679e-07,
      "loss": 2.8579,
      "step": 225175
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.863025665283203,
      "learning_rate": 7.616157231684406e-07,
      "loss": 3.1166,
      "step": 225176
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.5138399600982666,
      "learning_rate": 7.613244583493106e-07,
      "loss": 2.9298,
      "step": 225177
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7558743953704834,
      "learning_rate": 7.610332491642446e-07,
      "loss": 2.891,
      "step": 225178
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.92075514793396,
      "learning_rate": 7.607420956134092e-07,
      "loss": 2.9386,
      "step": 225179
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0521769523620605,
      "learning_rate": 7.60450997696771e-07,
      "loss": 3.1757,
      "step": 225180
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.615931510925293,
      "learning_rate": 7.601599554143966e-07,
      "loss": 2.8527,
      "step": 225181
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7193033695220947,
      "learning_rate": 7.598689687663861e-07,
      "loss": 3.0882,
      "step": 225182
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.798626184463501,
      "learning_rate": 7.595780377527394e-07,
      "loss": 2.9384,
      "step": 225183
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9530112743377686,
      "learning_rate": 7.592871623735563e-07,
      "loss": 3.0398,
      "step": 225184
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7862095832824707,
      "learning_rate": 7.589963426288703e-07,
      "loss": 2.5643,
      "step": 225185
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.4541311264038086,
      "learning_rate": 7.587055785187146e-07,
      "loss": 2.8963,
      "step": 225186
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6983401775360107,
      "learning_rate": 7.584148700431891e-07,
      "loss": 2.6702,
      "step": 225187
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.815429210662842,
      "learning_rate": 7.581242172022939e-07,
      "loss": 3.1413,
      "step": 225188
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.9133856296539307,
      "learning_rate": 7.578336199961288e-07,
      "loss": 2.7816,
      "step": 225189
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.032022476196289,
      "learning_rate": 7.575430784247271e-07,
      "loss": 3.178,
      "step": 225190
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.132676839828491,
      "learning_rate": 7.572525924881556e-07,
      "loss": 2.8727,
      "step": 225191
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8920347690582275,
      "learning_rate": 7.569621621864807e-07,
      "loss": 3.1419,
      "step": 225192
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.850029468536377,
      "learning_rate": 7.566717875197026e-07,
      "loss": 3.0236,
      "step": 225193
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9730684757232666,
      "learning_rate": 7.56381468487921e-07,
      "loss": 3.174,
      "step": 225194
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.576557159423828,
      "learning_rate": 7.560912050911694e-07,
      "loss": 2.7124,
      "step": 225195
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8614308834075928,
      "learning_rate": 7.558009973295143e-07,
      "loss": 3.0045,
      "step": 225196
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6889734268188477,
      "learning_rate": 7.55510845202989e-07,
      "loss": 2.9083,
      "step": 225197
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1189844608306885,
      "learning_rate": 7.552207487116935e-07,
      "loss": 2.8337,
      "step": 225198
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.5846686363220215,
      "learning_rate": 7.549307078556277e-07,
      "loss": 2.8331,
      "step": 225199
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.6928837299346924,
      "learning_rate": 7.546407226348584e-07,
      "loss": 3.1124,
      "step": 225200
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.05277943611145,
      "learning_rate": 7.54350793049452e-07,
      "loss": 2.8837,
      "step": 225201
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.904627561569214,
      "learning_rate": 7.54060919099475e-07,
      "loss": 2.7593,
      "step": 225202
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.616321086883545,
      "learning_rate": 7.537711007849612e-07,
      "loss": 3.0221,
      "step": 225203
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9671385288238525,
      "learning_rate": 7.534813381059768e-07,
      "loss": 2.8214,
      "step": 225204
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.5327208042144775,
      "learning_rate": 7.531916310625885e-07,
      "loss": 3.0538,
      "step": 225205
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.7486634254455566,
      "learning_rate": 7.529019796547964e-07,
      "loss": 2.8388,
      "step": 225206
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.4704976081848145,
      "learning_rate": 7.526123838827003e-07,
      "loss": 2.877,
      "step": 225207
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5854382514953613,
      "learning_rate": 7.523228437463668e-07,
      "loss": 2.8863,
      "step": 225208
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9489803314208984,
      "learning_rate": 7.520333592457961e-07,
      "loss": 2.9798,
      "step": 225209
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9865880012512207,
      "learning_rate": 7.517439303810547e-07,
      "loss": 2.7796,
      "step": 225210
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.221613883972168,
      "learning_rate": 7.514545571522423e-07,
      "loss": 2.9212,
      "step": 225211
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.305084466934204,
      "learning_rate": 7.511652395593925e-07,
      "loss": 3.0087,
      "step": 225212
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.151594638824463,
      "learning_rate": 7.508759776025053e-07,
      "loss": 2.8206,
      "step": 225213
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9288384914398193,
      "learning_rate": 7.505867712817138e-07,
      "loss": 2.7854,
      "step": 225214
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7802188396453857,
      "learning_rate": 7.502976205970179e-07,
      "loss": 2.9044,
      "step": 225215
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9048495292663574,
      "learning_rate": 7.500085255485177e-07,
      "loss": 2.8248,
      "step": 225216
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6862599849700928,
      "learning_rate": 7.497194861362466e-07,
      "loss": 2.9883,
      "step": 225217
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9895782470703125,
      "learning_rate": 7.494305023602043e-07,
      "loss": 2.7859,
      "step": 225218
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.4619975090026855,
      "learning_rate": 7.491415742205243e-07,
      "loss": 2.9028,
      "step": 225219
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.916743278503418,
      "learning_rate": 7.488527017172064e-07,
      "loss": 3.1294,
      "step": 225220
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9507973194122314,
      "learning_rate": 7.485638848503505e-07,
      "loss": 3.1867,
      "step": 225221
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0879557132720947,
      "learning_rate": 7.482751236199902e-07,
      "loss": 3.0236,
      "step": 225222
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.5467209815979004,
      "learning_rate": 7.479864180261585e-07,
      "loss": 2.8011,
      "step": 225223
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.00248646736145,
      "learning_rate": 7.476977680689222e-07,
      "loss": 2.82,
      "step": 225224
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1070923805236816,
      "learning_rate": 7.474091737483145e-07,
      "loss": 2.9648,
      "step": 225225
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1375932693481445,
      "learning_rate": 7.471206350644687e-07,
      "loss": 2.8312,
      "step": 225226
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.673076868057251,
      "learning_rate": 7.468321520173182e-07,
      "loss": 3.0507,
      "step": 225227
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.062657594680786,
      "learning_rate": 7.465437246070294e-07,
      "loss": 2.8593,
      "step": 225228
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3344194889068604,
      "learning_rate": 7.462553528336024e-07,
      "loss": 2.7688,
      "step": 225229
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.4110236167907715,
      "learning_rate": 7.459670366970705e-07,
      "loss": 3.0386,
      "step": 225230
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.005995273590088,
      "learning_rate": 7.456787761975336e-07,
      "loss": 2.7853,
      "step": 225231
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.081416130065918,
      "learning_rate": 7.45390571335025e-07,
      "loss": 2.7228,
      "step": 225232
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.052621603012085,
      "learning_rate": 7.451024221095781e-07,
      "loss": 2.8546,
      "step": 225233
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.230788230895996,
      "learning_rate": 7.448143285212594e-07,
      "loss": 2.9065,
      "step": 225234
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.511134147644043,
      "learning_rate": 7.445262905701688e-07,
      "loss": 3.0368,
      "step": 225235
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.081083297729492,
      "learning_rate": 7.442383082563064e-07,
      "loss": 2.6963,
      "step": 225236
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.104963302612305,
      "learning_rate": 7.439503815797054e-07,
      "loss": 2.9171,
      "step": 225237
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.3262627124786377,
      "learning_rate": 7.436625105404659e-07,
      "loss": 2.954,
      "step": 225238
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2067530155181885,
      "learning_rate": 7.433746951386544e-07,
      "loss": 3.0633,
      "step": 225239
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7527527809143066,
      "learning_rate": 7.430869353742708e-07,
      "loss": 2.8254,
      "step": 225240
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1339621543884277,
      "learning_rate": 7.427992312474151e-07,
      "loss": 2.6857,
      "step": 225241
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1066925525665283,
      "learning_rate": 7.425115827581207e-07,
      "loss": 3.0145,
      "step": 225242
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.528264284133911,
      "learning_rate": 7.422239899064208e-07,
      "loss": 2.763,
      "step": 225243
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.058659315109253,
      "learning_rate": 7.419364526924154e-07,
      "loss": 2.909,
      "step": 225244
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.961465835571289,
      "learning_rate": 7.416489711161377e-07,
      "loss": 2.907,
      "step": 225245
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.4162027835845947,
      "learning_rate": 7.413615451776212e-07,
      "loss": 2.7931,
      "step": 225246
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1465890407562256,
      "learning_rate": 7.410741748769323e-07,
      "loss": 3.0929,
      "step": 225247
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3838274478912354,
      "learning_rate": 7.407868602141376e-07,
      "loss": 2.7348,
      "step": 225248
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.937747001647949,
      "learning_rate": 7.404996011892705e-07,
      "loss": 2.8222,
      "step": 225249
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.9806463718414307,
      "learning_rate": 7.402123978023977e-07,
      "loss": 3.0356,
      "step": 225250
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7561075687408447,
      "learning_rate": 7.399252500535524e-07,
      "loss": 2.979,
      "step": 225251
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1636710166931152,
      "learning_rate": 7.396381579428013e-07,
      "loss": 2.8062,
      "step": 225252
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0421507358551025,
      "learning_rate": 7.393511214702441e-07,
      "loss": 3.132,
      "step": 225253
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8616888523101807,
      "learning_rate": 7.390641406358477e-07,
      "loss": 2.9399,
      "step": 225254
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.610703706741333,
      "learning_rate": 7.38777215439712e-07,
      "loss": 2.9447,
      "step": 225255
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1243278980255127,
      "learning_rate": 7.384903458819036e-07,
      "loss": 2.9403,
      "step": 225256
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.983123779296875,
      "learning_rate": 7.382035319624558e-07,
      "loss": 2.9259,
      "step": 225257
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5777132511138916,
      "learning_rate": 7.379167736814351e-07,
      "loss": 3.048,
      "step": 225258
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.895405292510986,
      "learning_rate": 7.37630071038875e-07,
      "loss": 3.1642,
      "step": 225259
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.845773458480835,
      "learning_rate": 7.373434240348086e-07,
      "loss": 2.9699,
      "step": 225260
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.37190580368042,
      "learning_rate": 7.370568326693693e-07,
      "loss": 3.0631,
      "step": 225261
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3101541996002197,
      "learning_rate": 7.367702969425238e-07,
      "loss": 2.7661,
      "step": 225262
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.85212779045105,
      "learning_rate": 7.364838168543719e-07,
      "loss": 2.7892,
      "step": 225263
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.139044761657715,
      "learning_rate": 7.361973924049469e-07,
      "loss": 3.1267,
      "step": 225264
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8348615169525146,
      "learning_rate": 7.359110235943488e-07,
      "loss": 2.7807,
      "step": 225265
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.414517879486084,
      "learning_rate": 7.356247104225443e-07,
      "loss": 2.7971,
      "step": 225266
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.5361669063568115,
      "learning_rate": 7.353384528896667e-07,
      "loss": 2.8402,
      "step": 225267
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.810734272003174,
      "learning_rate": 7.35052250995749e-07,
      "loss": 2.9744,
      "step": 225268
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5114243030548096,
      "learning_rate": 7.347661047408249e-07,
      "loss": 3.1647,
      "step": 225269
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.5508296489715576,
      "learning_rate": 7.344800141249275e-07,
      "loss": 2.829,
      "step": 225270
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1498165130615234,
      "learning_rate": 7.341939791481566e-07,
      "loss": 2.7754,
      "step": 225271
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9225685596466064,
      "learning_rate": 7.339079998105791e-07,
      "loss": 3.0237,
      "step": 225272
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.71120023727417,
      "learning_rate": 7.336220761121947e-07,
      "loss": 2.7869,
      "step": 225273
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.00921630859375,
      "learning_rate": 7.333362080530703e-07,
      "loss": 2.8249,
      "step": 225274
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.4041247367858887,
      "learning_rate": 7.330503956332723e-07,
      "loss": 2.8151,
      "step": 225275
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.050079584121704,
      "learning_rate": 7.327646388528341e-07,
      "loss": 3.1768,
      "step": 225276
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9639956951141357,
      "learning_rate": 7.324789377118556e-07,
      "loss": 3.0457,
      "step": 225277
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.148221015930176,
      "learning_rate": 7.321932922103369e-07,
      "loss": 2.7635,
      "step": 225278
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.80841326713562,
      "learning_rate": 7.319077023483777e-07,
      "loss": 3.011,
      "step": 225279
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7185251712799072,
      "learning_rate": 7.316221681259782e-07,
      "loss": 2.7887,
      "step": 225280
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9607744216918945,
      "learning_rate": 7.313366895432049e-07,
      "loss": 3.0678,
      "step": 225281
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.4755916595458984,
      "learning_rate": 7.310512666001578e-07,
      "loss": 2.6916,
      "step": 225282
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8835537433624268,
      "learning_rate": 7.307658992968368e-07,
      "loss": 2.9206,
      "step": 225283
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9586753845214844,
      "learning_rate": 7.304805876333419e-07,
      "loss": 2.864,
      "step": 225284
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8385415077209473,
      "learning_rate": 7.301953316097064e-07,
      "loss": 2.7269,
      "step": 225285
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.360934257507324,
      "learning_rate": 7.299101312259304e-07,
      "loss": 3.0515,
      "step": 225286
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.801795482635498,
      "learning_rate": 7.296249864821468e-07,
      "loss": 2.8943,
      "step": 225287
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0273654460906982,
      "learning_rate": 7.29339897378356e-07,
      "loss": 3.0553,
      "step": 225288
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.694399833679199,
      "learning_rate": 7.290548639146576e-07,
      "loss": 2.8708,
      "step": 225289
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.649416446685791,
      "learning_rate": 7.287698860910519e-07,
      "loss": 2.8785,
      "step": 225290
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.986846446990967,
      "learning_rate": 7.284849639076384e-07,
      "loss": 2.7142,
      "step": 225291
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8615264892578125,
      "learning_rate": 7.282000973644175e-07,
      "loss": 2.9043,
      "step": 225292
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.273383378982544,
      "learning_rate": 7.27915286461489e-07,
      "loss": 2.8314,
      "step": 225293
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.6579396724700928,
      "learning_rate": 7.276305311988861e-07,
      "loss": 3.1316,
      "step": 225294
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.611274480819702,
      "learning_rate": 7.273458315766756e-07,
      "loss": 2.7877,
      "step": 225295
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1126763820648193,
      "learning_rate": 7.27061187594924e-07,
      "loss": 2.9412,
      "step": 225296
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7619338035583496,
      "learning_rate": 7.267765992536312e-07,
      "loss": 3.0091,
      "step": 225297
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.4954373836517334,
      "learning_rate": 7.26492066552864e-07,
      "loss": 2.8825,
      "step": 225298
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.319448232650757,
      "learning_rate": 7.262075894927222e-07,
      "loss": 2.7501,
      "step": 225299
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8141322135925293,
      "learning_rate": 7.259231680732059e-07,
      "loss": 3.049,
      "step": 225300
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.976438283920288,
      "learning_rate": 7.256388022944148e-07,
      "loss": 2.7848,
      "step": 225301
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8631644248962402,
      "learning_rate": 7.253544921563492e-07,
      "loss": 2.8539,
      "step": 225302
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7139601707458496,
      "learning_rate": 7.250702376591089e-07,
      "loss": 2.9103,
      "step": 225303
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0228593349456787,
      "learning_rate": 7.247860388027271e-07,
      "loss": 2.9736,
      "step": 225304
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1750354766845703,
      "learning_rate": 7.245018955872372e-07,
      "loss": 3.0676,
      "step": 225305
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9796764850616455,
      "learning_rate": 7.242178080127392e-07,
      "loss": 2.8672,
      "step": 225306
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.088177442550659,
      "learning_rate": 7.239337760792329e-07,
      "loss": 2.9016,
      "step": 225307
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0134811401367188,
      "learning_rate": 7.236497997868185e-07,
      "loss": 2.976,
      "step": 225308
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.931027412414551,
      "learning_rate": 7.233658791354957e-07,
      "loss": 2.9341,
      "step": 225309
    },
    {
      "epoch": 2.93,
      "grad_norm": 6.166818141937256,
      "learning_rate": 7.230820141253646e-07,
      "loss": 2.8158,
      "step": 225310
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8468945026397705,
      "learning_rate": 7.227982047564917e-07,
      "loss": 3.2238,
      "step": 225311
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.380685329437256,
      "learning_rate": 7.225144510288439e-07,
      "loss": 2.9083,
      "step": 225312
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.821434497833252,
      "learning_rate": 7.222307529425875e-07,
      "loss": 2.7125,
      "step": 225313
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.330472946166992,
      "learning_rate": 7.219471104976893e-07,
      "loss": 2.8934,
      "step": 225314
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.492161273956299,
      "learning_rate": 7.216635236942159e-07,
      "loss": 3.0175,
      "step": 225315
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1786959171295166,
      "learning_rate": 7.213799925322672e-07,
      "loss": 2.8242,
      "step": 225316
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8550968170166016,
      "learning_rate": 7.210965170118433e-07,
      "loss": 3.1205,
      "step": 225317
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.477140188217163,
      "learning_rate": 7.208130971330107e-07,
      "loss": 3.0142,
      "step": 225318
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.285963535308838,
      "learning_rate": 7.205297328958359e-07,
      "loss": 2.8261,
      "step": 225319
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.00239634513855,
      "learning_rate": 7.202464243003525e-07,
      "loss": 3.0105,
      "step": 225320
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0745887756347656,
      "learning_rate": 7.199631713466602e-07,
      "loss": 2.9691,
      "step": 225321
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3432974815368652,
      "learning_rate": 7.196799740347259e-07,
      "loss": 2.9945,
      "step": 225322
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.253032922744751,
      "learning_rate": 7.193968323647159e-07,
      "loss": 2.8369,
      "step": 225323
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8950936794281006,
      "learning_rate": 7.191137463365637e-07,
      "loss": 2.9058,
      "step": 225324
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.886512041091919,
      "learning_rate": 7.188307159504026e-07,
      "loss": 2.9907,
      "step": 225325
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.78661584854126,
      "learning_rate": 7.185477412062657e-07,
      "loss": 2.8144,
      "step": 225326
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.167398452758789,
      "learning_rate": 7.182648221041865e-07,
      "loss": 3.0435,
      "step": 225327
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.4518473148345947,
      "learning_rate": 7.179819586442315e-07,
      "loss": 2.8617,
      "step": 225328
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.01716685295105,
      "learning_rate": 7.176991508264341e-07,
      "loss": 2.8284,
      "step": 225329
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0822057723999023,
      "learning_rate": 7.174163986508941e-07,
      "loss": 2.7162,
      "step": 225330
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.628873586654663,
      "learning_rate": 7.171337021176449e-07,
      "loss": 2.9691,
      "step": 225331
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.5895087718963623,
      "learning_rate": 7.168510612266865e-07,
      "loss": 2.9427,
      "step": 225332
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.722679853439331,
      "learning_rate": 7.16568475978152e-07,
      "loss": 2.9918,
      "step": 225333
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9179675579071045,
      "learning_rate": 7.162859463720416e-07,
      "loss": 3.0351,
      "step": 225334
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.13873028755188,
      "learning_rate": 7.160034724084552e-07,
      "loss": 3.0624,
      "step": 225335
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2649943828582764,
      "learning_rate": 7.157210540873593e-07,
      "loss": 2.9103,
      "step": 225336
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0446622371673584,
      "learning_rate": 7.154386914088872e-07,
      "loss": 2.9984,
      "step": 225337
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8949012756347656,
      "learning_rate": 7.151563843730723e-07,
      "loss": 2.9538,
      "step": 225338
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.451416492462158,
      "learning_rate": 7.148741329799812e-07,
      "loss": 3.1342,
      "step": 225339
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.4930286407470703,
      "learning_rate": 7.145919372295805e-07,
      "loss": 2.9889,
      "step": 225340
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1130475997924805,
      "learning_rate": 7.143097971220369e-07,
      "loss": 3.1354,
      "step": 225341
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.22452712059021,
      "learning_rate": 7.140277126573168e-07,
      "loss": 2.7902,
      "step": 225342
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.785823106765747,
      "learning_rate": 7.137456838355537e-07,
      "loss": 2.8742,
      "step": 225343
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3870716094970703,
      "learning_rate": 7.134637106567143e-07,
      "loss": 2.8933,
      "step": 225344
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0629189014434814,
      "learning_rate": 7.131817931208983e-07,
      "loss": 2.8408,
      "step": 225345
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.195539712905884,
      "learning_rate": 7.128999312281724e-07,
      "loss": 3.1586,
      "step": 225346
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.23177170753479,
      "learning_rate": 7.1261812497857e-07,
      "loss": 2.842,
      "step": 225347
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.547663450241089,
      "learning_rate": 7.123363743721244e-07,
      "loss": 3.1029,
      "step": 225348
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2651021480560303,
      "learning_rate": 7.120546794089021e-07,
      "loss": 3.1759,
      "step": 225349
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2119364738464355,
      "learning_rate": 7.117730400889365e-07,
      "loss": 3.004,
      "step": 225350
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.133527994155884,
      "learning_rate": 7.114914564123609e-07,
      "loss": 3.1976,
      "step": 225351
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.9084208011627197,
      "learning_rate": 7.112099283791417e-07,
      "loss": 2.8876,
      "step": 225352
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.05536150932312,
      "learning_rate": 7.109284559893457e-07,
      "loss": 2.8473,
      "step": 225353
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.462772846221924,
      "learning_rate": 7.106470392430397e-07,
      "loss": 2.9457,
      "step": 225354
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5328116416931152,
      "learning_rate": 7.1036567814029e-07,
      "loss": 3.1042,
      "step": 225355
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.11208438873291,
      "learning_rate": 7.100843726810967e-07,
      "loss": 3.0115,
      "step": 225356
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0555667877197266,
      "learning_rate": 7.098031228655932e-07,
      "loss": 3.0171,
      "step": 225357
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8314990997314453,
      "learning_rate": 7.095219286937459e-07,
      "loss": 2.5757,
      "step": 225358
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.061404228210449,
      "learning_rate": 7.092407901656882e-07,
      "loss": 3.0063,
      "step": 225359
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0643491744995117,
      "learning_rate": 7.089597072813868e-07,
      "loss": 3.1889,
      "step": 225360
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.8836538791656494,
      "learning_rate": 7.08678680040975e-07,
      "loss": 2.7855,
      "step": 225361
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7205331325531006,
      "learning_rate": 7.083977084444525e-07,
      "loss": 2.8636,
      "step": 225362
    },
    {
      "epoch": 2.93,
      "grad_norm": 4.666853904724121,
      "learning_rate": 7.081167924919195e-07,
      "loss": 2.8909,
      "step": 225363
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.6491105556488037,
      "learning_rate": 7.07835932183376e-07,
      "loss": 2.7413,
      "step": 225364
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2689597606658936,
      "learning_rate": 7.075551275188884e-07,
      "loss": 2.9948,
      "step": 225365
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1938552856445312,
      "learning_rate": 7.072743784985236e-07,
      "loss": 3.0307,
      "step": 225366
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.850904703140259,
      "learning_rate": 7.069936851223146e-07,
      "loss": 2.9879,
      "step": 225367
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1272411346435547,
      "learning_rate": 7.067130473903282e-07,
      "loss": 2.8391,
      "step": 225368
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7855286598205566,
      "learning_rate": 7.06432465302631e-07,
      "loss": 3.0067,
      "step": 225369
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.791553258895874,
      "learning_rate": 7.061519388592562e-07,
      "loss": 3.0214,
      "step": 225370
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.497014284133911,
      "learning_rate": 7.058714680602373e-07,
      "loss": 2.7233,
      "step": 225371
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0582895278930664,
      "learning_rate": 7.055910529056408e-07,
      "loss": 2.9373,
      "step": 225372
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.4185678958892822,
      "learning_rate": 7.053106933955332e-07,
      "loss": 3.0294,
      "step": 225373
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.719921112060547,
      "learning_rate": 7.050303895299814e-07,
      "loss": 3.0245,
      "step": 225374
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.174508571624756,
      "learning_rate": 7.047501413089851e-07,
      "loss": 2.9687,
      "step": 225375
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1014509201049805,
      "learning_rate": 7.044699487326444e-07,
      "loss": 2.9426,
      "step": 225376
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.719329833984375,
      "learning_rate": 7.041898118009925e-07,
      "loss": 2.6745,
      "step": 225377
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.30918025970459,
      "learning_rate": 7.039097305140629e-07,
      "loss": 2.9441,
      "step": 225378
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7689504623413086,
      "learning_rate": 7.036297048719219e-07,
      "loss": 2.7633,
      "step": 225379
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1574296951293945,
      "learning_rate": 7.033497348746697e-07,
      "loss": 3.0001,
      "step": 225380
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.511586904525757,
      "learning_rate": 7.030698205222729e-07,
      "loss": 2.5518,
      "step": 225381
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8299665451049805,
      "learning_rate": 7.027899618148314e-07,
      "loss": 2.6812,
      "step": 225382
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.749941110610962,
      "learning_rate": 7.025101587524118e-07,
      "loss": 3.0176,
      "step": 225383
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2550995349884033,
      "learning_rate": 7.022304113350141e-07,
      "loss": 2.9006,
      "step": 225384
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2764668464660645,
      "learning_rate": 7.019507195627383e-07,
      "loss": 2.907,
      "step": 225385
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.841036558151245,
      "learning_rate": 7.016710834356176e-07,
      "loss": 2.7585,
      "step": 225386
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1114697456359863,
      "learning_rate": 7.013915029537187e-07,
      "loss": 3.0064,
      "step": 225387
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.771031618118286,
      "learning_rate": 7.011119781170749e-07,
      "loss": 2.8596,
      "step": 225388
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.5859062671661377,
      "learning_rate": 7.008325089257195e-07,
      "loss": 2.7855,
      "step": 225389
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8590054512023926,
      "learning_rate": 7.005530953797522e-07,
      "loss": 2.7086,
      "step": 225390
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1606836318969727,
      "learning_rate": 7.002737374792067e-07,
      "loss": 2.9382,
      "step": 225391
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.6472043991088867,
      "learning_rate": 6.99994435224116e-07,
      "loss": 2.7809,
      "step": 225392
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.8491272926330566,
      "learning_rate": 6.997151886145468e-07,
      "loss": 2.9357,
      "step": 225393
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1299221515655518,
      "learning_rate": 6.994359976505659e-07,
      "loss": 2.6674,
      "step": 225394
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.094031810760498,
      "learning_rate": 6.991568623322063e-07,
      "loss": 2.9375,
      "step": 225395
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.779240846633911,
      "learning_rate": 6.988777826595015e-07,
      "loss": 3.0413,
      "step": 225396
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.921922206878662,
      "learning_rate": 6.985987586325514e-07,
      "loss": 2.8965,
      "step": 225397
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.850728988647461,
      "learning_rate": 6.98319790251356e-07,
      "loss": 2.9855,
      "step": 225398
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.848811149597168,
      "learning_rate": 6.980408775160151e-07,
      "loss": 3.1023,
      "step": 225399
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.741103410720825,
      "learning_rate": 6.977620204265622e-07,
      "loss": 2.9019,
      "step": 225400
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.860161066055298,
      "learning_rate": 6.974832189830304e-07,
      "loss": 2.8,
      "step": 225401
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.0892531871795654,
      "learning_rate": 6.972044731854864e-07,
      "loss": 2.7109,
      "step": 225402
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.7517917156219482,
      "learning_rate": 6.969257830339637e-07,
      "loss": 2.9137,
      "step": 225403
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.563575506210327,
      "learning_rate": 6.966471485285619e-07,
      "loss": 2.8652,
      "step": 225404
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.9786577224731445,
      "learning_rate": 6.963685696692811e-07,
      "loss": 2.8764,
      "step": 225405
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.2595298290252686,
      "learning_rate": 6.960900464562214e-07,
      "loss": 2.7415,
      "step": 225406
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.1714909076690674,
      "learning_rate": 6.958115788893825e-07,
      "loss": 2.7561,
      "step": 225407
    },
    {
      "epoch": 2.93,
      "grad_norm": 2.653275728225708,
      "learning_rate": 6.955331669688646e-07,
      "loss": 2.7522,
      "step": 225408
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.3205628395080566,
      "learning_rate": 6.952548106946676e-07,
      "loss": 2.9858,
      "step": 225409
    },
    {
      "epoch": 2.93,
      "grad_norm": 3.221468687057495,
      "learning_rate": 6.949765100669246e-07,
      "loss": 2.8716,
      "step": 225410
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9428515434265137,
      "learning_rate": 6.946982650855692e-07,
      "loss": 2.9809,
      "step": 225411
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7239725589752197,
      "learning_rate": 6.944200757507678e-07,
      "loss": 2.9967,
      "step": 225412
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.081162452697754,
      "learning_rate": 6.941419420625205e-07,
      "loss": 2.8626,
      "step": 225413
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.152880907058716,
      "learning_rate": 6.938638640208604e-07,
      "loss": 2.7077,
      "step": 225414
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.4357991218566895,
      "learning_rate": 6.935858416258544e-07,
      "loss": 2.9911,
      "step": 225415
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.905229330062866,
      "learning_rate": 6.933078748776022e-07,
      "loss": 2.9206,
      "step": 225416
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8312158584594727,
      "learning_rate": 6.930299637760706e-07,
      "loss": 2.695,
      "step": 225417
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.4896814823150635,
      "learning_rate": 6.927521083213928e-07,
      "loss": 2.974,
      "step": 225418
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.4000768661499023,
      "learning_rate": 6.924743085135353e-07,
      "loss": 2.7593,
      "step": 225419
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.939164400100708,
      "learning_rate": 6.921965643526317e-07,
      "loss": 2.9382,
      "step": 225420
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.5591719150543213,
      "learning_rate": 6.919188758386818e-07,
      "loss": 2.9301,
      "step": 225421
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7459447383880615,
      "learning_rate": 6.916412429717522e-07,
      "loss": 2.8797,
      "step": 225422
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.071606397628784,
      "learning_rate": 6.913636657519095e-07,
      "loss": 2.9804,
      "step": 225423
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1670937538146973,
      "learning_rate": 6.910861441792204e-07,
      "loss": 2.8759,
      "step": 225424
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8620731830596924,
      "learning_rate": 6.908086782536515e-07,
      "loss": 2.913,
      "step": 225425
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1723268032073975,
      "learning_rate": 6.905312679753361e-07,
      "loss": 2.9689,
      "step": 225426
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2019248008728027,
      "learning_rate": 6.902539133443074e-07,
      "loss": 2.7889,
      "step": 225427
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8604300022125244,
      "learning_rate": 6.899766143605989e-07,
      "loss": 3.0593,
      "step": 225428
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.058708906173706,
      "learning_rate": 6.896993710242771e-07,
      "loss": 2.9678,
      "step": 225429
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.998025894165039,
      "learning_rate": 6.894221833353752e-07,
      "loss": 2.8129,
      "step": 225430
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.231736183166504,
      "learning_rate": 6.891450512939933e-07,
      "loss": 2.7574,
      "step": 225431
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.086909770965576,
      "learning_rate": 6.888679749001314e-07,
      "loss": 3.1159,
      "step": 225432
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6390528678894043,
      "learning_rate": 6.885909541538559e-07,
      "loss": 2.8533,
      "step": 225433
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.956679105758667,
      "learning_rate": 6.883139890552336e-07,
      "loss": 2.9537,
      "step": 225434
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8457648754119873,
      "learning_rate": 6.880370796042978e-07,
      "loss": 2.8823,
      "step": 225435
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9327242374420166,
      "learning_rate": 6.877602258010817e-07,
      "loss": 2.8591,
      "step": 225436
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6908535957336426,
      "learning_rate": 6.874834276456853e-07,
      "loss": 2.8474,
      "step": 225437
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.5600647926330566,
      "learning_rate": 6.872066851381419e-07,
      "loss": 2.982,
      "step": 225438
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3865201473236084,
      "learning_rate": 6.869299982784849e-07,
      "loss": 3.1294,
      "step": 225439
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.970093011856079,
      "learning_rate": 6.866533670667807e-07,
      "loss": 2.8461,
      "step": 225440
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2866859436035156,
      "learning_rate": 6.863767915030627e-07,
      "loss": 2.6885,
      "step": 225441
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7665627002716064,
      "learning_rate": 6.861002715874308e-07,
      "loss": 2.8487,
      "step": 225442
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.786501884460449,
      "learning_rate": 6.858238073198852e-07,
      "loss": 2.8711,
      "step": 225443
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.7343478202819824,
      "learning_rate": 6.855473987004923e-07,
      "loss": 2.9094,
      "step": 225444
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0533878803253174,
      "learning_rate": 6.852710457293187e-07,
      "loss": 2.724,
      "step": 225445
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0588772296905518,
      "learning_rate": 6.849947484063645e-07,
      "loss": 3.0635,
      "step": 225446
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0988850593566895,
      "learning_rate": 6.847185067317629e-07,
      "loss": 3.0819,
      "step": 225447
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9008984565734863,
      "learning_rate": 6.844423207055138e-07,
      "loss": 2.8258,
      "step": 225448
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.822004556655884,
      "learning_rate": 6.841661903276508e-07,
      "loss": 2.7521,
      "step": 225449
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6662049293518066,
      "learning_rate": 6.838901155982734e-07,
      "loss": 2.9355,
      "step": 225450
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7900257110595703,
      "learning_rate": 6.836140965174153e-07,
      "loss": 2.8799,
      "step": 225451
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.323984384536743,
      "learning_rate": 6.833381330851096e-07,
      "loss": 2.9961,
      "step": 225452
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.547273874282837,
      "learning_rate": 6.830622253014229e-07,
      "loss": 3.0264,
      "step": 225453
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.489619731903076,
      "learning_rate": 6.827863731664218e-07,
      "loss": 2.8082,
      "step": 225454
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2585010528564453,
      "learning_rate": 6.825105766801064e-07,
      "loss": 2.8323,
      "step": 225455
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.618917465209961,
      "learning_rate": 6.8223483584261e-07,
      "loss": 2.8955,
      "step": 225456
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0902769565582275,
      "learning_rate": 6.819591506538991e-07,
      "loss": 2.8676,
      "step": 225457
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.8433854579925537,
      "learning_rate": 6.816835211140736e-07,
      "loss": 2.8825,
      "step": 225458
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.642641305923462,
      "learning_rate": 6.81407947223167e-07,
      "loss": 2.6113,
      "step": 225459
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.744767904281616,
      "learning_rate": 6.811324289812459e-07,
      "loss": 3.1215,
      "step": 225460
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.968935251235962,
      "learning_rate": 6.808569663883434e-07,
      "loss": 2.9583,
      "step": 225461
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9502501487731934,
      "learning_rate": 6.805815594445263e-07,
      "loss": 2.8353,
      "step": 225462
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.078800916671753,
      "learning_rate": 6.803062081498278e-07,
      "loss": 2.9515,
      "step": 225463
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8856301307678223,
      "learning_rate": 6.800309125043146e-07,
      "loss": 2.9633,
      "step": 225464
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.099247455596924,
      "learning_rate": 6.797556725080533e-07,
      "loss": 2.9364,
      "step": 225465
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7287542819976807,
      "learning_rate": 6.794804881610438e-07,
      "loss": 3.2712,
      "step": 225466
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.664299964904785,
      "learning_rate": 6.792053594634195e-07,
      "loss": 3.0169,
      "step": 225467
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.260441541671753,
      "learning_rate": 6.789302864151469e-07,
      "loss": 2.8249,
      "step": 225468
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1909003257751465,
      "learning_rate": 6.786552690162928e-07,
      "loss": 3.0456,
      "step": 225469
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.225836753845215,
      "learning_rate": 6.783803072669569e-07,
      "loss": 2.9317,
      "step": 225470
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7592360973358154,
      "learning_rate": 6.781054011671394e-07,
      "loss": 3.0025,
      "step": 225471
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3685953617095947,
      "learning_rate": 6.778305507169402e-07,
      "loss": 3.0203,
      "step": 225472
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1877732276916504,
      "learning_rate": 6.775557559163591e-07,
      "loss": 2.9208,
      "step": 225473
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.7898659706115723,
      "learning_rate": 6.772810167654963e-07,
      "loss": 3.0397,
      "step": 225474
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.056826591491699,
      "learning_rate": 6.770063332643516e-07,
      "loss": 2.6733,
      "step": 225475
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0174176692962646,
      "learning_rate": 6.767317054129917e-07,
      "loss": 3.2567,
      "step": 225476
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1084883213043213,
      "learning_rate": 6.764571332115166e-07,
      "loss": 2.9405,
      "step": 225477
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8965160846710205,
      "learning_rate": 6.761826166598927e-07,
      "loss": 2.9543,
      "step": 225478
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3976306915283203,
      "learning_rate": 6.759081557582535e-07,
      "loss": 2.9714,
      "step": 225479
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.922506809234619,
      "learning_rate": 6.756337505065989e-07,
      "loss": 2.7945,
      "step": 225480
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2050650119781494,
      "learning_rate": 6.753594009049957e-07,
      "loss": 2.9267,
      "step": 225481
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.9231653213500977,
      "learning_rate": 6.750851069534768e-07,
      "loss": 2.9042,
      "step": 225482
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.855423927307129,
      "learning_rate": 6.748108686521425e-07,
      "loss": 2.7843,
      "step": 225483
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0725486278533936,
      "learning_rate": 6.745366860009927e-07,
      "loss": 3.1443,
      "step": 225484
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.1457414627075195,
      "learning_rate": 6.742625590000939e-07,
      "loss": 3.0081,
      "step": 225485
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0814473628997803,
      "learning_rate": 6.739884876495128e-07,
      "loss": 2.9561,
      "step": 225486
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.669996500015259,
      "learning_rate": 6.737144719492826e-07,
      "loss": 2.7112,
      "step": 225487
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.741631269454956,
      "learning_rate": 6.734405118994368e-07,
      "loss": 2.7768,
      "step": 225488
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.8315885066986084,
      "learning_rate": 6.731666075000752e-07,
      "loss": 2.9137,
      "step": 225489
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.771707534790039,
      "learning_rate": 6.728927587511979e-07,
      "loss": 3.1216,
      "step": 225490
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7317113876342773,
      "learning_rate": 6.726189656529046e-07,
      "loss": 2.6123,
      "step": 225491
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9428293704986572,
      "learning_rate": 6.723452282051955e-07,
      "loss": 2.8625,
      "step": 225492
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.137336015701294,
      "learning_rate": 6.720715464081705e-07,
      "loss": 2.9643,
      "step": 225493
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.316120624542236,
      "learning_rate": 6.717979202618628e-07,
      "loss": 2.8343,
      "step": 225494
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0802743434906006,
      "learning_rate": 6.715243497663059e-07,
      "loss": 2.9423,
      "step": 225495
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2213470935821533,
      "learning_rate": 6.712508349215662e-07,
      "loss": 2.9611,
      "step": 225496
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.346069097518921,
      "learning_rate": 6.709773757276771e-07,
      "loss": 2.9388,
      "step": 225497
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.6095738410949707,
      "learning_rate": 6.707039721847385e-07,
      "loss": 2.8947,
      "step": 225498
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.080538272857666,
      "learning_rate": 6.704306242927505e-07,
      "loss": 2.8705,
      "step": 225499
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0118517875671387,
      "learning_rate": 6.701573320517462e-07,
      "loss": 2.6294,
      "step": 225500
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.9836931228637695,
      "learning_rate": 6.69884095461859e-07,
      "loss": 2.8819,
      "step": 225501
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.126911640167236,
      "learning_rate": 6.696109145230554e-07,
      "loss": 2.8906,
      "step": 225502
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9488930702209473,
      "learning_rate": 6.693377892354357e-07,
      "loss": 2.9073,
      "step": 225503
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.153353214263916,
      "learning_rate": 6.690647195990328e-07,
      "loss": 2.7407,
      "step": 225504
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.413928985595703,
      "learning_rate": 6.687917056139136e-07,
      "loss": 3.1029,
      "step": 225505
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.4958040714263916,
      "learning_rate": 6.68518747280078e-07,
      "loss": 2.9184,
      "step": 225506
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.996046781539917,
      "learning_rate": 6.68245844597659e-07,
      "loss": 2.8858,
      "step": 225507
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.119384288787842,
      "learning_rate": 6.67972997566657e-07,
      "loss": 3.0094,
      "step": 225508
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.91990327835083,
      "learning_rate": 6.677002061871051e-07,
      "loss": 2.9031,
      "step": 225509
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.325468063354492,
      "learning_rate": 6.674274704591032e-07,
      "loss": 2.7336,
      "step": 225510
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.752298355102539,
      "learning_rate": 6.671547903826513e-07,
      "loss": 2.8847,
      "step": 225511
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.773587942123413,
      "learning_rate": 6.668821659578493e-07,
      "loss": 2.9838,
      "step": 225512
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1869425773620605,
      "learning_rate": 6.666095971847308e-07,
      "loss": 2.6763,
      "step": 225513
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1504757404327393,
      "learning_rate": 6.663370840632953e-07,
      "loss": 3.2641,
      "step": 225514
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3681793212890625,
      "learning_rate": 6.660646265936765e-07,
      "loss": 3.0674,
      "step": 225515
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.6813085079193115,
      "learning_rate": 6.65792224775874e-07,
      "loss": 3.0088,
      "step": 225516
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.594367027282715,
      "learning_rate": 6.655198786099547e-07,
      "loss": 3.0416,
      "step": 225517
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1493306159973145,
      "learning_rate": 6.652475880959851e-07,
      "loss": 2.6815,
      "step": 225518
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.087174892425537,
      "learning_rate": 6.649753532339653e-07,
      "loss": 2.8182,
      "step": 225519
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.6795809268951416,
      "learning_rate": 6.64703174023995e-07,
      "loss": 2.7516,
      "step": 225520
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9875948429107666,
      "learning_rate": 6.644310504661076e-07,
      "loss": 2.8563,
      "step": 225521
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7275593280792236,
      "learning_rate": 6.641589825603366e-07,
      "loss": 2.8974,
      "step": 225522
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7180745601654053,
      "learning_rate": 6.638869703067484e-07,
      "loss": 2.8904,
      "step": 225523
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6435751914978027,
      "learning_rate": 6.636150137054097e-07,
      "loss": 2.832,
      "step": 225524
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.133723258972168,
      "learning_rate": 6.633431127563538e-07,
      "loss": 2.8273,
      "step": 225525
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.326099395751953,
      "learning_rate": 6.63071267459614e-07,
      "loss": 2.9037,
      "step": 225526
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9358866214752197,
      "learning_rate": 6.627994778152901e-07,
      "loss": 2.8535,
      "step": 225527
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8687164783477783,
      "learning_rate": 6.625277438233823e-07,
      "loss": 2.9795,
      "step": 225528
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0030293464660645,
      "learning_rate": 6.62256065483957e-07,
      "loss": 2.9246,
      "step": 225529
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2391767501831055,
      "learning_rate": 6.619844427970811e-07,
      "loss": 2.8844,
      "step": 225530
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.5434558391571045,
      "learning_rate": 6.617128757627876e-07,
      "loss": 2.7499,
      "step": 225531
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2336437702178955,
      "learning_rate": 6.614413643811434e-07,
      "loss": 3.0065,
      "step": 225532
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.595487356185913,
      "learning_rate": 6.611699086521815e-07,
      "loss": 2.9546,
      "step": 225533
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.114856004714966,
      "learning_rate": 6.608985085759355e-07,
      "loss": 2.9724,
      "step": 225534
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.279071092605591,
      "learning_rate": 6.606271641525051e-07,
      "loss": 2.9832,
      "step": 225535
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.657200574874878,
      "learning_rate": 6.603558753818905e-07,
      "loss": 3.067,
      "step": 225536
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2147085666656494,
      "learning_rate": 6.600846422641914e-07,
      "loss": 2.8856,
      "step": 225537
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8878097534179688,
      "learning_rate": 6.598134647994413e-07,
      "loss": 3.0073,
      "step": 225538
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.314699649810791,
      "learning_rate": 6.595423429876401e-07,
      "loss": 2.9555,
      "step": 225539
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.146067142486572,
      "learning_rate": 6.59271276828921e-07,
      "loss": 2.5562,
      "step": 225540
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.567216634750366,
      "learning_rate": 6.590002663232841e-07,
      "loss": 2.7023,
      "step": 225541
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.572319746017456,
      "learning_rate": 6.587293114707626e-07,
      "loss": 3.0996,
      "step": 225542
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.9200637340545654,
      "learning_rate": 6.584584122714564e-07,
      "loss": 2.908,
      "step": 225543
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1580145359039307,
      "learning_rate": 6.58187568725399e-07,
      "loss": 3.0417,
      "step": 225544
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0245256423950195,
      "learning_rate": 6.579167808326235e-07,
      "loss": 3.0096,
      "step": 225545
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.7706758975982666,
      "learning_rate": 6.576460485931967e-07,
      "loss": 3.0188,
      "step": 225546
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.627523183822632,
      "learning_rate": 6.57375372007185e-07,
      "loss": 3.0686,
      "step": 225547
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8611645698547363,
      "learning_rate": 6.571047510745886e-07,
      "loss": 2.8473,
      "step": 225548
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.5304770469665527,
      "learning_rate": 6.56834185795474e-07,
      "loss": 2.9796,
      "step": 225549
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.423222780227661,
      "learning_rate": 6.565636761699411e-07,
      "loss": 3.0361,
      "step": 225550
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.984719753265381,
      "learning_rate": 6.5629322219799e-07,
      "loss": 2.8361,
      "step": 225551
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1389944553375244,
      "learning_rate": 6.560228238796872e-07,
      "loss": 2.9469,
      "step": 225552
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.024245262145996,
      "learning_rate": 6.55752481215066e-07,
      "loss": 2.9111,
      "step": 225553
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7790029048919678,
      "learning_rate": 6.554821942041932e-07,
      "loss": 2.8257,
      "step": 225554
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.126823663711548,
      "learning_rate": 6.552119628471353e-07,
      "loss": 2.9579,
      "step": 225555
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.825418472290039,
      "learning_rate": 6.549417871439255e-07,
      "loss": 2.9507,
      "step": 225556
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.8268513679504395,
      "learning_rate": 6.546716670945973e-07,
      "loss": 3.0554,
      "step": 225557
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.25994873046875,
      "learning_rate": 6.544016026992171e-07,
      "loss": 3.1226,
      "step": 225558
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.7688918113708496,
      "learning_rate": 6.541315939578185e-07,
      "loss": 2.8726,
      "step": 225559
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.5064406394958496,
      "learning_rate": 6.53861640870501e-07,
      "loss": 2.8169,
      "step": 225560
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3452935218811035,
      "learning_rate": 6.535917434372651e-07,
      "loss": 3.121,
      "step": 225561
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9080922603607178,
      "learning_rate": 6.53321901658177e-07,
      "loss": 2.7548,
      "step": 225562
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3153419494628906,
      "learning_rate": 6.530521155332702e-07,
      "loss": 2.9182,
      "step": 225563
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.4793479442596436,
      "learning_rate": 6.527823850626446e-07,
      "loss": 3.1199,
      "step": 225564
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9782636165618896,
      "learning_rate": 6.525127102463002e-07,
      "loss": 2.8224,
      "step": 225565
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.00724458694458,
      "learning_rate": 6.522430910843035e-07,
      "loss": 2.986,
      "step": 225566
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.855116605758667,
      "learning_rate": 6.519735275766879e-07,
      "loss": 2.8487,
      "step": 225567
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9332733154296875,
      "learning_rate": 6.517040197235535e-07,
      "loss": 2.9952,
      "step": 225568
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9284000396728516,
      "learning_rate": 6.514345675249e-07,
      "loss": 2.9569,
      "step": 225569
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6603922843933105,
      "learning_rate": 6.511651709808274e-07,
      "loss": 3.0237,
      "step": 225570
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7498204708099365,
      "learning_rate": 6.508958300913025e-07,
      "loss": 3.1063,
      "step": 225571
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.821744203567505,
      "learning_rate": 6.506265448564585e-07,
      "loss": 3.0523,
      "step": 225572
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.4840264320373535,
      "learning_rate": 6.503573152762953e-07,
      "loss": 2.7758,
      "step": 225573
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.069453239440918,
      "learning_rate": 6.500881413509129e-07,
      "loss": 2.9345,
      "step": 225574
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.621441125869751,
      "learning_rate": 6.49819023080278e-07,
      "loss": 3.1002,
      "step": 225575
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.161930799484253,
      "learning_rate": 6.495499604645571e-07,
      "loss": 2.8405,
      "step": 225576
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0908591747283936,
      "learning_rate": 6.492809535036836e-07,
      "loss": 3.147,
      "step": 225577
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1806557178497314,
      "learning_rate": 6.490120021977907e-07,
      "loss": 2.7896,
      "step": 225578
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.639981269836426,
      "learning_rate": 6.487431065468785e-07,
      "loss": 2.9638,
      "step": 225579
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8875179290771484,
      "learning_rate": 6.484742665510134e-07,
      "loss": 2.7728,
      "step": 225580
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7146220207214355,
      "learning_rate": 6.482054822102622e-07,
      "loss": 2.8078,
      "step": 225581
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1162235736846924,
      "learning_rate": 6.479367535246582e-07,
      "loss": 2.7979,
      "step": 225582
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.575998306274414,
      "learning_rate": 6.476680804942347e-07,
      "loss": 2.7204,
      "step": 225583
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3477299213409424,
      "learning_rate": 6.473994631190916e-07,
      "loss": 2.8251,
      "step": 225584
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.596122980117798,
      "learning_rate": 6.471309013992287e-07,
      "loss": 3.0918,
      "step": 225585
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.255594253540039,
      "learning_rate": 6.46862395334713e-07,
      "loss": 2.5388,
      "step": 225586
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.952587366104126,
      "learning_rate": 6.465939449256107e-07,
      "loss": 2.7341,
      "step": 225587
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.366032361984253,
      "learning_rate": 6.463255501719555e-07,
      "loss": 3.109,
      "step": 225588
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8348605632781982,
      "learning_rate": 6.460572110737805e-07,
      "loss": 2.8715,
      "step": 225589
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.879610538482666,
      "learning_rate": 6.457889276311523e-07,
      "loss": 2.865,
      "step": 225590
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6553561687469482,
      "learning_rate": 6.455206998441376e-07,
      "loss": 3.0471,
      "step": 225591
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.914062261581421,
      "learning_rate": 6.452525277127696e-07,
      "loss": 3.0789,
      "step": 225592
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.4768452644348145,
      "learning_rate": 6.449844112370816e-07,
      "loss": 3.0789,
      "step": 225593
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7980856895446777,
      "learning_rate": 6.447163504171737e-07,
      "loss": 3.0529,
      "step": 225594
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3085827827453613,
      "learning_rate": 6.444483452530458e-07,
      "loss": 3.1789,
      "step": 225595
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.28320574760437,
      "learning_rate": 6.441803957447645e-07,
      "loss": 2.9147,
      "step": 225596
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1314337253570557,
      "learning_rate": 6.439125018923963e-07,
      "loss": 2.8749,
      "step": 225597
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7161147594451904,
      "learning_rate": 6.436446636959747e-07,
      "loss": 2.9044,
      "step": 225598
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3277153968811035,
      "learning_rate": 6.433768811555329e-07,
      "loss": 2.9954,
      "step": 225599
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.045385837554932,
      "learning_rate": 6.431091542711375e-07,
      "loss": 2.8086,
      "step": 225600
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.785487413406372,
      "learning_rate": 6.428414830428552e-07,
      "loss": 2.9611,
      "step": 225601
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2392072677612305,
      "learning_rate": 6.425738674707193e-07,
      "loss": 3.1629,
      "step": 225602
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.576176404953003,
      "learning_rate": 6.423063075547629e-07,
      "loss": 2.8787,
      "step": 225603
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.822002649307251,
      "learning_rate": 6.42038803295053e-07,
      "loss": 2.8755,
      "step": 225604
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.869840621948242,
      "learning_rate": 6.417713546916559e-07,
      "loss": 2.9198,
      "step": 225605
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2380053997039795,
      "learning_rate": 6.41503961744605e-07,
      "loss": 2.6542,
      "step": 225606
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.038344383239746,
      "learning_rate": 6.412366244539335e-07,
      "loss": 3.0362,
      "step": 225607
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1004505157470703,
      "learning_rate": 6.409693428197082e-07,
      "loss": 2.9646,
      "step": 225608
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0189902782440186,
      "learning_rate": 6.407021168419624e-07,
      "loss": 2.9779,
      "step": 225609
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.217827320098877,
      "learning_rate": 6.404349465207958e-07,
      "loss": 2.8589,
      "step": 225610
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.046455144882202,
      "learning_rate": 6.401678318562087e-07,
      "loss": 2.8026,
      "step": 225611
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.879915475845337,
      "learning_rate": 6.399007728482674e-07,
      "loss": 2.8465,
      "step": 225612
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6270017623901367,
      "learning_rate": 6.396337694970056e-07,
      "loss": 2.7917,
      "step": 225613
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.5440173149108887,
      "learning_rate": 6.393668218024894e-07,
      "loss": 3.0186,
      "step": 225614
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.607591152191162,
      "learning_rate": 6.390999297647858e-07,
      "loss": 2.872,
      "step": 225615
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.615905284881592,
      "learning_rate": 6.388330933839281e-07,
      "loss": 2.797,
      "step": 225616
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.27158260345459,
      "learning_rate": 6.385663126599494e-07,
      "loss": 2.8829,
      "step": 225617
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1222164630889893,
      "learning_rate": 6.382995875929165e-07,
      "loss": 2.8793,
      "step": 225618
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.992767810821533,
      "learning_rate": 6.380329181828625e-07,
      "loss": 2.9547,
      "step": 225619
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3912253379821777,
      "learning_rate": 6.377663044298542e-07,
      "loss": 2.7777,
      "step": 225620
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.6438655853271484,
      "learning_rate": 6.374997463339249e-07,
      "loss": 2.9072,
      "step": 225621
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.76640248298645,
      "learning_rate": 6.37233243895141e-07,
      "loss": 2.8187,
      "step": 225622
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9059817790985107,
      "learning_rate": 6.369667971135695e-07,
      "loss": 2.7649,
      "step": 225623
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.728020429611206,
      "learning_rate": 6.3670040598921e-07,
      "loss": 2.8469,
      "step": 225624
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.715336799621582,
      "learning_rate": 6.364340705221627e-07,
      "loss": 2.8367,
      "step": 225625
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6920077800750732,
      "learning_rate": 6.361677907124274e-07,
      "loss": 3.0804,
      "step": 225626
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.921308755874634,
      "learning_rate": 6.359015665601041e-07,
      "loss": 2.9831,
      "step": 225627
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.4308035373687744,
      "learning_rate": 6.356353980651929e-07,
      "loss": 2.9482,
      "step": 225628
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9220077991485596,
      "learning_rate": 6.353692852277603e-07,
      "loss": 3.0125,
      "step": 225629
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.902297019958496,
      "learning_rate": 6.351032280479063e-07,
      "loss": 3.217,
      "step": 225630
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.4825515747070312,
      "learning_rate": 6.348372265255975e-07,
      "loss": 2.9402,
      "step": 225631
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.256520986557007,
      "learning_rate": 6.345712806609338e-07,
      "loss": 2.6414,
      "step": 225632
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.095184326171875,
      "learning_rate": 6.343053904539819e-07,
      "loss": 2.7522,
      "step": 225633
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.928570032119751,
      "learning_rate": 6.340395559047417e-07,
      "loss": 2.7855,
      "step": 225634
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8918261528015137,
      "learning_rate": 6.3377377701328e-07,
      "loss": 2.9062,
      "step": 225635
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.595646381378174,
      "learning_rate": 6.335080537796633e-07,
      "loss": 2.9105,
      "step": 225636
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.010465383529663,
      "learning_rate": 6.332423862039248e-07,
      "loss": 3.0006,
      "step": 225637
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.5432798862457275,
      "learning_rate": 6.32976774286098e-07,
      "loss": 3.238,
      "step": 225638
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.415064811706543,
      "learning_rate": 6.327112180262827e-07,
      "loss": 2.8025,
      "step": 225639
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.4299838542938232,
      "learning_rate": 6.32445717424479e-07,
      "loss": 2.8731,
      "step": 225640
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.7683088779449463,
      "learning_rate": 6.321802724807534e-07,
      "loss": 2.9396,
      "step": 225641
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9356026649475098,
      "learning_rate": 6.319148831951393e-07,
      "loss": 2.9256,
      "step": 225642
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.860445499420166,
      "learning_rate": 6.316495495677365e-07,
      "loss": 2.8917,
      "step": 225643
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.170079708099365,
      "learning_rate": 6.31384271598545e-07,
      "loss": 2.6927,
      "step": 225644
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7187821865081787,
      "learning_rate": 6.311190492876317e-07,
      "loss": 2.9033,
      "step": 225645
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.897590160369873,
      "learning_rate": 6.308538826350629e-07,
      "loss": 3.0718,
      "step": 225646
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.139683485031128,
      "learning_rate": 6.305887716408386e-07,
      "loss": 2.8653,
      "step": 225647
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8913230895996094,
      "learning_rate": 6.30323716305059e-07,
      "loss": 2.8536,
      "step": 225648
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8916854858398438,
      "learning_rate": 6.30058716627757e-07,
      "loss": 3.0291,
      "step": 225649
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.619887590408325,
      "learning_rate": 6.297937726089663e-07,
      "loss": 2.8883,
      "step": 225650
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.688483715057373,
      "learning_rate": 6.295288842487534e-07,
      "loss": 2.8453,
      "step": 225651
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2284128665924072,
      "learning_rate": 6.292640515471514e-07,
      "loss": 2.9641,
      "step": 225652
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.521888494491577,
      "learning_rate": 6.289992745042271e-07,
      "loss": 2.9309,
      "step": 225653
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.429884910583496,
      "learning_rate": 6.287345531200139e-07,
      "loss": 2.6743,
      "step": 225654
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.9614195823669434,
      "learning_rate": 6.284698873946115e-07,
      "loss": 2.8183,
      "step": 225655
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.978224754333496,
      "learning_rate": 6.282052773279867e-07,
      "loss": 2.8769,
      "step": 225656
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9681146144866943,
      "learning_rate": 6.279407229202393e-07,
      "loss": 2.8818,
      "step": 225657
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0958995819091797,
      "learning_rate": 6.276762241714028e-07,
      "loss": 2.8932,
      "step": 225658
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0426111221313477,
      "learning_rate": 6.274117810815437e-07,
      "loss": 2.9226,
      "step": 225659
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.879868268966675,
      "learning_rate": 6.271473936506954e-07,
      "loss": 3.0271,
      "step": 225660
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.770627021789551,
      "learning_rate": 6.268830618789244e-07,
      "loss": 2.8375,
      "step": 225661
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.493952751159668,
      "learning_rate": 6.266187857662308e-07,
      "loss": 2.8453,
      "step": 225662
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8007586002349854,
      "learning_rate": 6.263545653127145e-07,
      "loss": 2.7854,
      "step": 225663
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9758121967315674,
      "learning_rate": 6.260904005184419e-07,
      "loss": 2.6867,
      "step": 225664
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8319709300994873,
      "learning_rate": 6.2582629138338e-07,
      "loss": 2.8938,
      "step": 225665
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9075632095336914,
      "learning_rate": 6.255622379076619e-07,
      "loss": 2.7443,
      "step": 225666
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.9167747497558594,
      "learning_rate": 6.252982400912876e-07,
      "loss": 2.9194,
      "step": 225667
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8265180587768555,
      "learning_rate": 6.25034297934357e-07,
      "loss": 2.9935,
      "step": 225668
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.762178659439087,
      "learning_rate": 6.247704114368368e-07,
      "loss": 3.0966,
      "step": 225669
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.690666675567627,
      "learning_rate": 6.245065805988603e-07,
      "loss": 2.8819,
      "step": 225670
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7901806831359863,
      "learning_rate": 6.242428054204274e-07,
      "loss": 2.8683,
      "step": 225671
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.915388822555542,
      "learning_rate": 6.239790859015714e-07,
      "loss": 2.9266,
      "step": 225672
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.021435499191284,
      "learning_rate": 6.237154220423923e-07,
      "loss": 2.9751,
      "step": 225673
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.6751062870025635,
      "learning_rate": 6.234518138429234e-07,
      "loss": 2.8439,
      "step": 225674
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.4038186073303223,
      "learning_rate": 6.23188261303198e-07,
      "loss": 2.7844,
      "step": 225675
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.949582099914551,
      "learning_rate": 6.229247644232494e-07,
      "loss": 2.9333,
      "step": 225676
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.976253032684326,
      "learning_rate": 6.226613232031774e-07,
      "loss": 2.8334,
      "step": 225677
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.7557387351989746,
      "learning_rate": 6.223979376430155e-07,
      "loss": 3.1513,
      "step": 225678
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.955883502960205,
      "learning_rate": 6.221346077427636e-07,
      "loss": 2.9538,
      "step": 225679
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7567763328552246,
      "learning_rate": 6.218713335025216e-07,
      "loss": 2.9725,
      "step": 225680
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.094874858856201,
      "learning_rate": 6.216081149223562e-07,
      "loss": 2.8477,
      "step": 225681
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.8455638885498047,
      "learning_rate": 6.21344952002234e-07,
      "loss": 2.77,
      "step": 225682
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.4440903663635254,
      "learning_rate": 6.210818447422883e-07,
      "loss": 2.9216,
      "step": 225683
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8241138458251953,
      "learning_rate": 6.20818793142519e-07,
      "loss": 3.3531,
      "step": 225684
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8854146003723145,
      "learning_rate": 6.205557972030262e-07,
      "loss": 2.923,
      "step": 225685
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.23146653175354,
      "learning_rate": 6.202928569237764e-07,
      "loss": 2.9956,
      "step": 225686
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.5898685455322266,
      "learning_rate": 6.200299723049029e-07,
      "loss": 2.932,
      "step": 225687
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.340747833251953,
      "learning_rate": 6.197671433464058e-07,
      "loss": 2.8874,
      "step": 225688
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0547890663146973,
      "learning_rate": 6.195043700483182e-07,
      "loss": 3.06,
      "step": 225689
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.290419816970825,
      "learning_rate": 6.192416524107402e-07,
      "loss": 3.0095,
      "step": 225690
    },
    {
      "epoch": 2.94,
      "grad_norm": 5.09561014175415,
      "learning_rate": 6.18978990433705e-07,
      "loss": 3.0195,
      "step": 225691
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8688833713531494,
      "learning_rate": 6.18716384117246e-07,
      "loss": 2.8057,
      "step": 225692
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.617337703704834,
      "learning_rate": 6.184538334613964e-07,
      "loss": 3.1587,
      "step": 225693
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.910224199295044,
      "learning_rate": 6.181913384662563e-07,
      "loss": 2.9595,
      "step": 225694
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.261823654174805,
      "learning_rate": 6.179288991318254e-07,
      "loss": 3.0061,
      "step": 225695
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.855919599533081,
      "learning_rate": 6.176665154581706e-07,
      "loss": 2.9845,
      "step": 225696
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1051828861236572,
      "learning_rate": 6.174041874453583e-07,
      "loss": 3.155,
      "step": 225697
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7190442085266113,
      "learning_rate": 6.17141915093422e-07,
      "loss": 3.0156,
      "step": 225698
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.830416440963745,
      "learning_rate": 6.168796984023949e-07,
      "loss": 3.0229,
      "step": 225699
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.288785934448242,
      "learning_rate": 6.16617537372377e-07,
      "loss": 2.8355,
      "step": 225700
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7458438873291016,
      "learning_rate": 6.163554320033682e-07,
      "loss": 2.8081,
      "step": 225701
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8421034812927246,
      "learning_rate": 6.160933822954017e-07,
      "loss": 3.1309,
      "step": 225702
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0902504920959473,
      "learning_rate": 6.158313882485777e-07,
      "loss": 2.9492,
      "step": 225703
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.23398756980896,
      "learning_rate": 6.15569449862896e-07,
      "loss": 2.92,
      "step": 225704
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7950687408447266,
      "learning_rate": 6.153075671384566e-07,
      "loss": 2.9771,
      "step": 225705
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.291682720184326,
      "learning_rate": 6.150457400752928e-07,
      "loss": 2.8725,
      "step": 225706
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.993441104888916,
      "learning_rate": 6.147839686734379e-07,
      "loss": 3.0136,
      "step": 225707
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9987010955810547,
      "learning_rate": 6.145222529329252e-07,
      "loss": 3.0373,
      "step": 225708
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.9119532108306885,
      "learning_rate": 6.142605928538214e-07,
      "loss": 2.7424,
      "step": 225709
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.236945390701294,
      "learning_rate": 6.139989884361929e-07,
      "loss": 3.0866,
      "step": 225710
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.894174814224243,
      "learning_rate": 6.137374396800731e-07,
      "loss": 3.097,
      "step": 225711
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.515867233276367,
      "learning_rate": 6.134759465855288e-07,
      "loss": 2.9626,
      "step": 225712
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9710373878479004,
      "learning_rate": 6.132145091525598e-07,
      "loss": 2.9769,
      "step": 225713
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2595584392547607,
      "learning_rate": 6.129531273812661e-07,
      "loss": 3.0805,
      "step": 225714
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.74161696434021,
      "learning_rate": 6.126918012716475e-07,
      "loss": 2.8389,
      "step": 225715
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7847707271575928,
      "learning_rate": 6.124305308238042e-07,
      "loss": 2.6658,
      "step": 225716
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2001638412475586,
      "learning_rate": 6.121693160377694e-07,
      "loss": 2.7048,
      "step": 225717
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6372992992401123,
      "learning_rate": 6.119081569135765e-07,
      "loss": 2.7926,
      "step": 225718
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6859681606292725,
      "learning_rate": 6.116470534512585e-07,
      "loss": 2.8496,
      "step": 225719
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6555538177490234,
      "learning_rate": 6.113860056509156e-07,
      "loss": 2.8017,
      "step": 225720
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.304375410079956,
      "learning_rate": 6.111250135125479e-07,
      "loss": 2.9423,
      "step": 225721
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9530067443847656,
      "learning_rate": 6.108640770362216e-07,
      "loss": 2.7263,
      "step": 225722
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.141354560852051,
      "learning_rate": 6.106031962220037e-07,
      "loss": 2.7387,
      "step": 225723
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9649670124053955,
      "learning_rate": 6.103423710699273e-07,
      "loss": 2.9545,
      "step": 225724
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.377063751220703,
      "learning_rate": 6.100816015800259e-07,
      "loss": 3.0627,
      "step": 225725
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3241281509399414,
      "learning_rate": 6.098208877523658e-07,
      "loss": 3.033,
      "step": 225726
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.19366717338562,
      "learning_rate": 6.095602295869806e-07,
      "loss": 2.775,
      "step": 225727
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.084775924682617,
      "learning_rate": 6.0929962708397e-07,
      "loss": 3.0469,
      "step": 225728
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.147183418273926,
      "learning_rate": 6.090390802433009e-07,
      "loss": 3.1002,
      "step": 225729
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8056766986846924,
      "learning_rate": 6.087785890650731e-07,
      "loss": 2.8573,
      "step": 225730
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9001309871673584,
      "learning_rate": 6.0851815354932e-07,
      "loss": 3.0969,
      "step": 225731
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.158409595489502,
      "learning_rate": 6.082577736961081e-07,
      "loss": 2.9231,
      "step": 225732
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.928213596343994,
      "learning_rate": 6.079974495054707e-07,
      "loss": 2.9569,
      "step": 225733
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8661000728607178,
      "learning_rate": 6.077371809774744e-07,
      "loss": 2.8289,
      "step": 225734
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.69292950630188,
      "learning_rate": 6.074769681121194e-07,
      "loss": 3.0348,
      "step": 225735
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2062816619873047,
      "learning_rate": 6.072168109095055e-07,
      "loss": 2.6445,
      "step": 225736
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.080770254135132,
      "learning_rate": 6.069567093696326e-07,
      "loss": 2.7927,
      "step": 225737
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8778626918792725,
      "learning_rate": 6.066966634926341e-07,
      "loss": 2.8378,
      "step": 225738
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.680739164352417,
      "learning_rate": 6.064366732784431e-07,
      "loss": 2.9948,
      "step": 225739
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9546167850494385,
      "learning_rate": 6.061767387271932e-07,
      "loss": 2.7724,
      "step": 225740
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.853337049484253,
      "learning_rate": 6.059168598389175e-07,
      "loss": 2.9102,
      "step": 225741
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6925532817840576,
      "learning_rate": 6.056570366136493e-07,
      "loss": 2.8712,
      "step": 225742
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8526313304901123,
      "learning_rate": 6.053972690514219e-07,
      "loss": 2.8922,
      "step": 225743
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.686283826828003,
      "learning_rate": 6.05137557152302e-07,
      "loss": 2.7991,
      "step": 225744
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6501026153564453,
      "learning_rate": 6.048779009163562e-07,
      "loss": 2.9765,
      "step": 225745
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0233640670776367,
      "learning_rate": 6.046183003436178e-07,
      "loss": 2.7852,
      "step": 225746
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.951186418533325,
      "learning_rate": 6.0435875543412e-07,
      "loss": 2.8239,
      "step": 225747
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.4515836238861084,
      "learning_rate": 6.040992661879296e-07,
      "loss": 3.0052,
      "step": 225748
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.839566707611084,
      "learning_rate": 6.038398326050797e-07,
      "loss": 3.0268,
      "step": 225749
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.090038776397705,
      "learning_rate": 6.035804546856371e-07,
      "loss": 2.8006,
      "step": 225750
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8283298015594482,
      "learning_rate": 6.03321132429635e-07,
      "loss": 3.1112,
      "step": 225751
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2202208042144775,
      "learning_rate": 6.0306186583714e-07,
      "loss": 3.0309,
      "step": 225752
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1702773571014404,
      "learning_rate": 6.028026549081522e-07,
      "loss": 2.8735,
      "step": 225753
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.697093963623047,
      "learning_rate": 6.025434996428047e-07,
      "loss": 3.0232,
      "step": 225754
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9609854221343994,
      "learning_rate": 6.022844000410643e-07,
      "loss": 2.9615,
      "step": 225755
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.163430213928223,
      "learning_rate": 6.020253561029975e-07,
      "loss": 2.8782,
      "step": 225756
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6496615409851074,
      "learning_rate": 6.017663678287044e-07,
      "loss": 2.6849,
      "step": 225757
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.693373203277588,
      "learning_rate": 6.015074352181515e-07,
      "loss": 2.7788,
      "step": 225758
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.520869255065918,
      "learning_rate": 6.012485582714721e-07,
      "loss": 2.9567,
      "step": 225759
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1401453018188477,
      "learning_rate": 6.009897369886663e-07,
      "loss": 2.8199,
      "step": 225760
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3039023876190186,
      "learning_rate": 6.007309713697672e-07,
      "loss": 3.0754,
      "step": 225761
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.016329765319824,
      "learning_rate": 6.004722614148417e-07,
      "loss": 2.9347,
      "step": 225762
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.672011613845825,
      "learning_rate": 6.002136071239561e-07,
      "loss": 2.7204,
      "step": 225763
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0910685062408447,
      "learning_rate": 5.999550084971439e-07,
      "loss": 2.7311,
      "step": 225764
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1740145683288574,
      "learning_rate": 5.996964655344382e-07,
      "loss": 3.1057,
      "step": 225765
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6664414405822754,
      "learning_rate": 5.99437978235906e-07,
      "loss": 2.816,
      "step": 225766
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8625447750091553,
      "learning_rate": 5.991795466015803e-07,
      "loss": 2.7605,
      "step": 225767
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7332983016967773,
      "learning_rate": 5.989211706315278e-07,
      "loss": 2.9788,
      "step": 225768
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.209099769592285,
      "learning_rate": 5.98662850325815e-07,
      "loss": 3.3558,
      "step": 225769
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7833659648895264,
      "learning_rate": 5.984045856844088e-07,
      "loss": 2.8029,
      "step": 225770
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2220611572265625,
      "learning_rate": 5.981463767074424e-07,
      "loss": 3.0466,
      "step": 225771
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.859154224395752,
      "learning_rate": 5.978882233949489e-07,
      "loss": 2.7291,
      "step": 225772
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7460083961486816,
      "learning_rate": 5.976301257469285e-07,
      "loss": 2.9856,
      "step": 225773
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.569821357727051,
      "learning_rate": 5.97372083763481e-07,
      "loss": 2.7454,
      "step": 225774
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8407390117645264,
      "learning_rate": 5.971140974446065e-07,
      "loss": 2.6881,
      "step": 225775
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.740791082382202,
      "learning_rate": 5.968561667904049e-07,
      "loss": 2.6527,
      "step": 225776
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.833980083465576,
      "learning_rate": 5.965982918009093e-07,
      "loss": 2.6393,
      "step": 225777
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.5293266773223877,
      "learning_rate": 5.9634047247612e-07,
      "loss": 2.739,
      "step": 225778
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.5426201820373535,
      "learning_rate": 5.960827088161368e-07,
      "loss": 2.9372,
      "step": 225779
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.894540548324585,
      "learning_rate": 5.958250008210263e-07,
      "loss": 2.9682,
      "step": 225780
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2775678634643555,
      "learning_rate": 5.955673484907553e-07,
      "loss": 2.7022,
      "step": 225781
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1679184436798096,
      "learning_rate": 5.953097518254568e-07,
      "loss": 3.265,
      "step": 225782
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0095810890197754,
      "learning_rate": 5.95052210825131e-07,
      "loss": 2.9511,
      "step": 225783
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9182960987091064,
      "learning_rate": 5.947947254898111e-07,
      "loss": 2.7767,
      "step": 225784
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.823305130004883,
      "learning_rate": 5.945372958195971e-07,
      "loss": 2.9574,
      "step": 225785
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.694216728210449,
      "learning_rate": 5.942799218145222e-07,
      "loss": 2.9143,
      "step": 225786
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.170779228210449,
      "learning_rate": 5.940226034745865e-07,
      "loss": 2.9532,
      "step": 225787
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.487949848175049,
      "learning_rate": 5.937653407998899e-07,
      "loss": 2.9156,
      "step": 225788
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2960410118103027,
      "learning_rate": 5.935081337904657e-07,
      "loss": 2.7309,
      "step": 225789
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.815910339355469,
      "learning_rate": 5.932509824463805e-07,
      "loss": 2.8078,
      "step": 225790
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2056779861450195,
      "learning_rate": 5.929938867676342e-07,
      "loss": 2.743,
      "step": 225791
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.908900260925293,
      "learning_rate": 5.927368467542936e-07,
      "loss": 2.9094,
      "step": 225792
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9431416988372803,
      "learning_rate": 5.924798624064253e-07,
      "loss": 3.2409,
      "step": 225793
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.144914150238037,
      "learning_rate": 5.922229337240958e-07,
      "loss": 2.99,
      "step": 225794
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.956127166748047,
      "learning_rate": 5.919660607073051e-07,
      "loss": 2.884,
      "step": 225795
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.938102960586548,
      "learning_rate": 5.917092433561199e-07,
      "loss": 2.9647,
      "step": 225796
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.859757423400879,
      "learning_rate": 5.914524816705735e-07,
      "loss": 2.921,
      "step": 225797
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3878135681152344,
      "learning_rate": 5.911957756507324e-07,
      "loss": 2.9145,
      "step": 225798
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0982718467712402,
      "learning_rate": 5.909391252966301e-07,
      "loss": 2.7893,
      "step": 225799
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2431912422180176,
      "learning_rate": 5.906825306083662e-07,
      "loss": 2.9538,
      "step": 225800
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.166736602783203,
      "learning_rate": 5.904259915859078e-07,
      "loss": 2.9725,
      "step": 225801
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8501150608062744,
      "learning_rate": 5.901695082293545e-07,
      "loss": 3.0503,
      "step": 225802
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.317887544631958,
      "learning_rate": 5.899130805387397e-07,
      "loss": 3.1191,
      "step": 225803
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8572635650634766,
      "learning_rate": 5.896567085140968e-07,
      "loss": 3.1204,
      "step": 225804
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.4724178314208984,
      "learning_rate": 5.894003921555257e-07,
      "loss": 2.8532,
      "step": 225805
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.788698196411133,
      "learning_rate": 5.89144131462993e-07,
      "loss": 3.2196,
      "step": 225806
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0988965034484863,
      "learning_rate": 5.888879264365987e-07,
      "loss": 2.8055,
      "step": 225807
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.5874252319335938,
      "learning_rate": 5.886317770764093e-07,
      "loss": 2.8265,
      "step": 225808
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7899627685546875,
      "learning_rate": 5.883756833824249e-07,
      "loss": 3.0358,
      "step": 225809
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9458112716674805,
      "learning_rate": 5.881196453547121e-07,
      "loss": 2.627,
      "step": 225810
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.966592311859131,
      "learning_rate": 5.878636629933376e-07,
      "loss": 3.0367,
      "step": 225811
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.236873149871826,
      "learning_rate": 5.876077362983012e-07,
      "loss": 2.7588,
      "step": 225812
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.627760887145996,
      "learning_rate": 5.873518652697029e-07,
      "loss": 2.8425,
      "step": 225813
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8400766849517822,
      "learning_rate": 5.870960499075761e-07,
      "loss": 3.188,
      "step": 225814
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9898345470428467,
      "learning_rate": 5.86840290211954e-07,
      "loss": 2.7327,
      "step": 225815
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1539156436920166,
      "learning_rate": 5.8658458618287e-07,
      "loss": 2.9109,
      "step": 225816
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.974785089492798,
      "learning_rate": 5.863289378204239e-07,
      "loss": 3.0429,
      "step": 225817
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0046229362487793,
      "learning_rate": 5.860733451245825e-07,
      "loss": 3.0994,
      "step": 225818
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.174185276031494,
      "learning_rate": 5.858178080954789e-07,
      "loss": 2.9365,
      "step": 225819
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.466671466827393,
      "learning_rate": 5.855623267331133e-07,
      "loss": 3.0227,
      "step": 225820
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3153786659240723,
      "learning_rate": 5.853069010375522e-07,
      "loss": 2.8336,
      "step": 225821
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7567312717437744,
      "learning_rate": 5.850515310088288e-07,
      "loss": 2.8766,
      "step": 225822
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9806017875671387,
      "learning_rate": 5.8479621664701e-07,
      "loss": 2.8079,
      "step": 225823
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.5945382118225098,
      "learning_rate": 5.845409579520954e-07,
      "loss": 2.9483,
      "step": 225824
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9958178997039795,
      "learning_rate": 5.842857549241853e-07,
      "loss": 2.8203,
      "step": 225825
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.38470721244812,
      "learning_rate": 5.840306075633128e-07,
      "loss": 2.8677,
      "step": 225826
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.935187816619873,
      "learning_rate": 5.837755158695445e-07,
      "loss": 2.8341,
      "step": 225827
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.870709180831909,
      "learning_rate": 5.835204798428472e-07,
      "loss": 3.1282,
      "step": 225828
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0168569087982178,
      "learning_rate": 5.832654994833874e-07,
      "loss": 2.8562,
      "step": 225829
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6724236011505127,
      "learning_rate": 5.830105747910985e-07,
      "loss": 2.8354,
      "step": 225830
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.370934009552002,
      "learning_rate": 5.827557057661137e-07,
      "loss": 2.8931,
      "step": 225831
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.162989854812622,
      "learning_rate": 5.825008924084329e-07,
      "loss": 2.7555,
      "step": 225832
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.901040554046631,
      "learning_rate": 5.822461347181229e-07,
      "loss": 2.7408,
      "step": 225833
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.5866899490356445,
      "learning_rate": 5.819914326952168e-07,
      "loss": 2.6712,
      "step": 225834
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2143049240112305,
      "learning_rate": 5.817367863397482e-07,
      "loss": 2.9401,
      "step": 225835
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.6976194381713867,
      "learning_rate": 5.814821956518168e-07,
      "loss": 2.9486,
      "step": 225836
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6875855922698975,
      "learning_rate": 5.812276606314559e-07,
      "loss": 3.1198,
      "step": 225837
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.139279365539551,
      "learning_rate": 5.809731812786655e-07,
      "loss": 2.8197,
      "step": 225838
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.891171455383301,
      "learning_rate": 5.807187575935457e-07,
      "loss": 2.6182,
      "step": 225839
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.100569486618042,
      "learning_rate": 5.804643895760963e-07,
      "loss": 2.9447,
      "step": 225840
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9640679359436035,
      "learning_rate": 5.802100772264173e-07,
      "loss": 2.7888,
      "step": 225841
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.817335367202759,
      "learning_rate": 5.799558205445088e-07,
      "loss": 2.9576,
      "step": 225842
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.548088312149048,
      "learning_rate": 5.797016195304371e-07,
      "loss": 3.1489,
      "step": 225843
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3286147117614746,
      "learning_rate": 5.794474741842692e-07,
      "loss": 2.7994,
      "step": 225844
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.7437171936035156,
      "learning_rate": 5.791933845060382e-07,
      "loss": 2.8888,
      "step": 225845
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.286418914794922,
      "learning_rate": 5.789393504957774e-07,
      "loss": 3.0948,
      "step": 225846
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.14170503616333,
      "learning_rate": 5.786853721535201e-07,
      "loss": 2.9378,
      "step": 225847
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8834381103515625,
      "learning_rate": 5.784314494793662e-07,
      "loss": 3.1097,
      "step": 225848
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1879146099090576,
      "learning_rate": 5.781775824733159e-07,
      "loss": 2.8376,
      "step": 225849
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.645392656326294,
      "learning_rate": 5.77923771135469e-07,
      "loss": 2.5976,
      "step": 225850
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.917067766189575,
      "learning_rate": 5.776700154657921e-07,
      "loss": 3.0527,
      "step": 225851
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.021942377090454,
      "learning_rate": 5.774163154644185e-07,
      "loss": 2.9985,
      "step": 225852
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7719647884368896,
      "learning_rate": 5.771626711313148e-07,
      "loss": 2.7242,
      "step": 225853
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6695289611816406,
      "learning_rate": 5.769090824665812e-07,
      "loss": 3.068,
      "step": 225854
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.295746088027954,
      "learning_rate": 5.76655549470284e-07,
      "loss": 2.9305,
      "step": 225855
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0405795574188232,
      "learning_rate": 5.764020721424234e-07,
      "loss": 3.0468,
      "step": 225856
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2546777725219727,
      "learning_rate": 5.761486504830326e-07,
      "loss": 3.0812,
      "step": 225857
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.858712911605835,
      "learning_rate": 5.758952844922116e-07,
      "loss": 2.8134,
      "step": 225858
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.706275463104248,
      "learning_rate": 5.756419741699935e-07,
      "loss": 2.7443,
      "step": 225859
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.945255994796753,
      "learning_rate": 5.753887195163787e-07,
      "loss": 3.1056,
      "step": 225860
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.372175455093384,
      "learning_rate": 5.751355205315e-07,
      "loss": 2.9039,
      "step": 225861
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.68887996673584,
      "learning_rate": 5.748823772153244e-07,
      "loss": 3.1094,
      "step": 225862
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.293097734451294,
      "learning_rate": 5.746292895679516e-07,
      "loss": 2.9214,
      "step": 225863
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.74480938911438,
      "learning_rate": 5.743762575893817e-07,
      "loss": 2.9803,
      "step": 225864
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7609729766845703,
      "learning_rate": 5.741232812797147e-07,
      "loss": 3.0365,
      "step": 225865
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3920528888702393,
      "learning_rate": 5.738703606389172e-07,
      "loss": 2.9242,
      "step": 225866
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7013466358184814,
      "learning_rate": 5.736174956671558e-07,
      "loss": 2.8873,
      "step": 225867
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.780141830444336,
      "learning_rate": 5.733646863643637e-07,
      "loss": 2.9381,
      "step": 225868
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0299134254455566,
      "learning_rate": 5.731119327306744e-07,
      "loss": 3.0632,
      "step": 225869
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.733124256134033,
      "learning_rate": 5.728592347660544e-07,
      "loss": 2.8808,
      "step": 225870
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8987059593200684,
      "learning_rate": 5.726065924706369e-07,
      "loss": 2.8822,
      "step": 225871
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.521399736404419,
      "learning_rate": 5.723540058443888e-07,
      "loss": 2.7969,
      "step": 225872
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0550904273986816,
      "learning_rate": 5.721014748874098e-07,
      "loss": 2.9269,
      "step": 225873
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.493751287460327,
      "learning_rate": 5.718489995997e-07,
      "loss": 2.6384,
      "step": 225874
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8527088165283203,
      "learning_rate": 5.715965799813593e-07,
      "loss": 2.8492,
      "step": 225875
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9508233070373535,
      "learning_rate": 5.713442160324211e-07,
      "loss": 2.9437,
      "step": 225876
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.990694046020508,
      "learning_rate": 5.710919077529186e-07,
      "loss": 3.0251,
      "step": 225877
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7121050357818604,
      "learning_rate": 5.708396551428851e-07,
      "loss": 3.0358,
      "step": 225878
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.958874464035034,
      "learning_rate": 5.705874582023873e-07,
      "loss": 2.7202,
      "step": 225879
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9707062244415283,
      "learning_rate": 5.703353169314584e-07,
      "loss": 3.3597,
      "step": 225880
    },
    {
      "epoch": 2.94,
      "grad_norm": 5.491371154785156,
      "learning_rate": 5.700832313301984e-07,
      "loss": 2.9695,
      "step": 225881
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2864787578582764,
      "learning_rate": 5.698312013985739e-07,
      "loss": 2.8993,
      "step": 225882
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0203981399536133,
      "learning_rate": 5.69579227136685e-07,
      "loss": 2.7847,
      "step": 225883
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.4963977336883545,
      "learning_rate": 5.693273085445648e-07,
      "loss": 2.9929,
      "step": 225884
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.43765926361084,
      "learning_rate": 5.690754456222469e-07,
      "loss": 2.8762,
      "step": 225885
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8831734657287598,
      "learning_rate": 5.688236383697975e-07,
      "loss": 2.8792,
      "step": 225886
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7504241466522217,
      "learning_rate": 5.685718867872835e-07,
      "loss": 3.0253,
      "step": 225887
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.023374319076538,
      "learning_rate": 5.683201908747048e-07,
      "loss": 3.1329,
      "step": 225888
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.548274517059326,
      "learning_rate": 5.680685506320948e-07,
      "loss": 2.8743,
      "step": 225889
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8236942291259766,
      "learning_rate": 5.678169660595533e-07,
      "loss": 2.7323,
      "step": 225890
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7548203468322754,
      "learning_rate": 5.67565437157147e-07,
      "loss": 3.1111,
      "step": 225891
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2697150707244873,
      "learning_rate": 5.673139639248426e-07,
      "loss": 2.7634,
      "step": 225892
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7419791221618652,
      "learning_rate": 5.670625463627065e-07,
      "loss": 2.7938,
      "step": 225893
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2943527698516846,
      "learning_rate": 5.66811184470839e-07,
      "loss": 2.9447,
      "step": 225894
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2017672061920166,
      "learning_rate": 5.665598782492398e-07,
      "loss": 2.9367,
      "step": 225895
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8765878677368164,
      "learning_rate": 5.663086276979756e-07,
      "loss": 2.9273,
      "step": 225896
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.760850667953491,
      "learning_rate": 5.66057432817113e-07,
      "loss": 3.0899,
      "step": 225897
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7744102478027344,
      "learning_rate": 5.658062936066187e-07,
      "loss": 2.9039,
      "step": 225898
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0030829906463623,
      "learning_rate": 5.655552100666261e-07,
      "loss": 2.9869,
      "step": 225899
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.228886842727661,
      "learning_rate": 5.653041821971682e-07,
      "loss": 3.2014,
      "step": 225900
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7679388523101807,
      "learning_rate": 5.650532099982453e-07,
      "loss": 2.9421,
      "step": 225901
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.730982542037964,
      "learning_rate": 5.648022934699236e-07,
      "loss": 2.7854,
      "step": 225902
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7883877754211426,
      "learning_rate": 5.645514326122702e-07,
      "loss": 2.9915,
      "step": 225903
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.166598081588745,
      "learning_rate": 5.643006274253181e-07,
      "loss": 2.9599,
      "step": 225904
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.6491334438323975,
      "learning_rate": 5.64049877909134e-07,
      "loss": 2.8152,
      "step": 225905
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7421891689300537,
      "learning_rate": 5.63799184063718e-07,
      "loss": 2.8287,
      "step": 225906
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.937431812286377,
      "learning_rate": 5.635485458891364e-07,
      "loss": 3.0164,
      "step": 225907
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.685847282409668,
      "learning_rate": 5.632979633854562e-07,
      "loss": 2.9671,
      "step": 225908
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0506460666656494,
      "learning_rate": 5.630474365527105e-07,
      "loss": 2.8028,
      "step": 225909
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7219603061676025,
      "learning_rate": 5.627969653909659e-07,
      "loss": 2.9652,
      "step": 225910
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3997225761413574,
      "learning_rate": 5.625465499002224e-07,
      "loss": 2.6874,
      "step": 225911
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.844705820083618,
      "learning_rate": 5.6229619008058e-07,
      "loss": 2.9581,
      "step": 225912
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.048882007598877,
      "learning_rate": 5.620458859320387e-07,
      "loss": 2.7986,
      "step": 225913
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.88854718208313,
      "learning_rate": 5.617956374546651e-07,
      "loss": 2.9691,
      "step": 225914
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1458797454833984,
      "learning_rate": 5.615454446484924e-07,
      "loss": 3.0237,
      "step": 225915
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.037076234817505,
      "learning_rate": 5.612953075136206e-07,
      "loss": 2.9174,
      "step": 225916
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3890671730041504,
      "learning_rate": 5.610452260500498e-07,
      "loss": 2.9701,
      "step": 225917
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6586127281188965,
      "learning_rate": 5.607952002578131e-07,
      "loss": 3.0124,
      "step": 225918
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.7810847759246826,
      "learning_rate": 5.605452301369772e-07,
      "loss": 2.9791,
      "step": 225919
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7707679271698,
      "learning_rate": 5.602953156875756e-07,
      "loss": 3.2345,
      "step": 225920
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.676485538482666,
      "learning_rate": 5.60045456909708e-07,
      "loss": 2.8596,
      "step": 225921
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.608163356781006,
      "learning_rate": 5.59795653803341e-07,
      "loss": 2.9004,
      "step": 225922
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7548956871032715,
      "learning_rate": 5.595459063685748e-07,
      "loss": 2.783,
      "step": 225923
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2072606086730957,
      "learning_rate": 5.592962146054425e-07,
      "loss": 2.9713,
      "step": 225924
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9928159713745117,
      "learning_rate": 5.590465785140108e-07,
      "loss": 2.8899,
      "step": 225925
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6739754676818848,
      "learning_rate": 5.587969980942797e-07,
      "loss": 3.0272,
      "step": 225926
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.671220302581787,
      "learning_rate": 5.585474733463157e-07,
      "loss": 2.9028,
      "step": 225927
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9885921478271484,
      "learning_rate": 5.582980042701857e-07,
      "loss": 2.9289,
      "step": 225928
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0212950706481934,
      "learning_rate": 5.580485908659227e-07,
      "loss": 2.997,
      "step": 225929
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.5987534523010254,
      "learning_rate": 5.577992331335601e-07,
      "loss": 2.8454,
      "step": 225930
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.871868848800659,
      "learning_rate": 5.575499310731646e-07,
      "loss": 2.7921,
      "step": 225931
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.073432207107544,
      "learning_rate": 5.573006846847694e-07,
      "loss": 2.8373,
      "step": 225932
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.5442304611206055,
      "learning_rate": 5.570514939684078e-07,
      "loss": 2.8503,
      "step": 225933
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.423098087310791,
      "learning_rate": 5.568023589241466e-07,
      "loss": 2.9313,
      "step": 225934
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7195451259613037,
      "learning_rate": 5.565532795520522e-07,
      "loss": 2.6873,
      "step": 225935
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.191457748413086,
      "learning_rate": 5.563042558521247e-07,
      "loss": 2.9097,
      "step": 225936
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9547948837280273,
      "learning_rate": 5.560552878244307e-07,
      "loss": 2.9594,
      "step": 225937
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.022470474243164,
      "learning_rate": 5.558063754690367e-07,
      "loss": 3.2273,
      "step": 225938
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2143807411193848,
      "learning_rate": 5.555575187859762e-07,
      "loss": 3.0398,
      "step": 225939
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9845423698425293,
      "learning_rate": 5.553087177752824e-07,
      "loss": 3.0352,
      "step": 225940
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.464138984680176,
      "learning_rate": 5.550599724369886e-07,
      "loss": 3.1181,
      "step": 225941
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.593698024749756,
      "learning_rate": 5.548112827711948e-07,
      "loss": 3.0134,
      "step": 225942
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0927889347076416,
      "learning_rate": 5.545626487778676e-07,
      "loss": 3.1178,
      "step": 225943
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.521214008331299,
      "learning_rate": 5.543140704571402e-07,
      "loss": 2.9242,
      "step": 225944
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.871729612350464,
      "learning_rate": 5.540655478090128e-07,
      "loss": 3.0652,
      "step": 225945
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.631316900253296,
      "learning_rate": 5.538170808335186e-07,
      "loss": 2.8675,
      "step": 225946
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6074163913726807,
      "learning_rate": 5.53568669530724e-07,
      "loss": 3.0099,
      "step": 225947
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.019749641418457,
      "learning_rate": 5.533203139006958e-07,
      "loss": 2.9076,
      "step": 225948
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.928030490875244,
      "learning_rate": 5.530720139434341e-07,
      "loss": 2.9698,
      "step": 225949
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.98348069190979,
      "learning_rate": 5.528237696590387e-07,
      "loss": 3.1733,
      "step": 225950
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.925353765487671,
      "learning_rate": 5.525755810474764e-07,
      "loss": 3.0931,
      "step": 225951
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8670437335968018,
      "learning_rate": 5.523274481088802e-07,
      "loss": 2.7823,
      "step": 225952
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3498120307922363,
      "learning_rate": 5.520793708432503e-07,
      "loss": 2.8908,
      "step": 225953
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.5843937397003174,
      "learning_rate": 5.518313492506532e-07,
      "loss": 2.6619,
      "step": 225954
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2843921184539795,
      "learning_rate": 5.515833833311222e-07,
      "loss": 2.9691,
      "step": 225955
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.192462682723999,
      "learning_rate": 5.513354730846909e-07,
      "loss": 2.8608,
      "step": 225956
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0179836750030518,
      "learning_rate": 5.510876185114255e-07,
      "loss": 2.8702,
      "step": 225957
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.002147912979126,
      "learning_rate": 5.508398196113595e-07,
      "loss": 2.8385,
      "step": 225958
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.788985252380371,
      "learning_rate": 5.505920763845262e-07,
      "loss": 2.8223,
      "step": 225959
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.965761423110962,
      "learning_rate": 5.503443888310255e-07,
      "loss": 3.0444,
      "step": 225960
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2704148292541504,
      "learning_rate": 5.500967569508574e-07,
      "loss": 2.6679,
      "step": 225961
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.906996011734009,
      "learning_rate": 5.498491807440886e-07,
      "loss": 2.9134,
      "step": 225962
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6534647941589355,
      "learning_rate": 5.496016602107523e-07,
      "loss": 2.8448,
      "step": 225963
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3468034267425537,
      "learning_rate": 5.493541953508818e-07,
      "loss": 2.8383,
      "step": 225964
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9286649227142334,
      "learning_rate": 5.491067861645437e-07,
      "loss": 3.2256,
      "step": 225965
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8047969341278076,
      "learning_rate": 5.488594326517715e-07,
      "loss": 3.0391,
      "step": 225966
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9168038368225098,
      "learning_rate": 5.486121348126316e-07,
      "loss": 2.9856,
      "step": 225967
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2549469470977783,
      "learning_rate": 5.483648926471573e-07,
      "loss": 3.1012,
      "step": 225968
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.323474884033203,
      "learning_rate": 5.481177061553821e-07,
      "loss": 3.054,
      "step": 225969
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.6871094703674316,
      "learning_rate": 5.478705753373724e-07,
      "loss": 3.0979,
      "step": 225970
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.592176675796509,
      "learning_rate": 5.476235001931617e-07,
      "loss": 2.7949,
      "step": 225971
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9903149604797363,
      "learning_rate": 5.473764807228165e-07,
      "loss": 3.0738,
      "step": 225972
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.396646738052368,
      "learning_rate": 5.471295169263701e-07,
      "loss": 2.916,
      "step": 225973
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8294692039489746,
      "learning_rate": 5.468826088038558e-07,
      "loss": 2.742,
      "step": 225974
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2815017700195312,
      "learning_rate": 5.46635756355307e-07,
      "loss": 2.8497,
      "step": 225975
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.678250312805176,
      "learning_rate": 5.463889595808236e-07,
      "loss": 2.6725,
      "step": 225976
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9840211868286133,
      "learning_rate": 5.461422184804054e-07,
      "loss": 2.7565,
      "step": 225977
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.5345914363861084,
      "learning_rate": 5.458955330541193e-07,
      "loss": 3.0336,
      "step": 225978
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.059501886367798,
      "learning_rate": 5.456489033019984e-07,
      "loss": 2.9915,
      "step": 225979
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.5647006034851074,
      "learning_rate": 5.454023292240761e-07,
      "loss": 3.0221,
      "step": 225980
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.299182653427124,
      "learning_rate": 5.451558108204191e-07,
      "loss": 2.9515,
      "step": 225981
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.948289632797241,
      "learning_rate": 5.449093480910937e-07,
      "loss": 3.0001,
      "step": 225982
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.733706474304199,
      "learning_rate": 5.446629410361003e-07,
      "loss": 3.1489,
      "step": 225983
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9818215370178223,
      "learning_rate": 5.444165896555386e-07,
      "loss": 2.7447,
      "step": 225984
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.095778703689575,
      "learning_rate": 5.441702939493753e-07,
      "loss": 2.9849,
      "step": 225985
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.78721022605896,
      "learning_rate": 5.439240539177437e-07,
      "loss": 2.9684,
      "step": 225986
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.967716693878174,
      "learning_rate": 5.436778695606437e-07,
      "loss": 3.0294,
      "step": 225987
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1215779781341553,
      "learning_rate": 5.434317408781086e-07,
      "loss": 2.7412,
      "step": 225988
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.19397234916687,
      "learning_rate": 5.431856678702052e-07,
      "loss": 2.8808,
      "step": 225989
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.478219985961914,
      "learning_rate": 5.429396505369998e-07,
      "loss": 2.8177,
      "step": 225990
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.714829683303833,
      "learning_rate": 5.426936888784927e-07,
      "loss": 3.0026,
      "step": 225991
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.384763717651367,
      "learning_rate": 5.424477828947504e-07,
      "loss": 2.8057,
      "step": 225992
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8505942821502686,
      "learning_rate": 5.422019325858395e-07,
      "loss": 2.9307,
      "step": 225993
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.830954074859619,
      "learning_rate": 5.419561379517601e-07,
      "loss": 3.0245,
      "step": 225994
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.658839702606201,
      "learning_rate": 5.417103989925786e-07,
      "loss": 2.8797,
      "step": 225995
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.6209356784820557,
      "learning_rate": 5.414647157083618e-07,
      "loss": 2.7426,
      "step": 225996
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0650596618652344,
      "learning_rate": 5.41219088099143e-07,
      "loss": 2.8327,
      "step": 225997
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.003056049346924,
      "learning_rate": 5.409735161649553e-07,
      "loss": 2.8786,
      "step": 225998
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.950392723083496,
      "learning_rate": 5.407279999058655e-07,
      "loss": 3.1106,
      "step": 225999
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2041008472442627,
      "learning_rate": 5.40482539321907e-07,
      "loss": 2.8953,
      "step": 226000
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8368752002716064,
      "learning_rate": 5.402371344131128e-07,
      "loss": 3.0407,
      "step": 226001
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0325570106506348,
      "learning_rate": 5.399917851795499e-07,
      "loss": 2.9822,
      "step": 226002
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.5418481826782227,
      "learning_rate": 5.397464916212846e-07,
      "loss": 2.9631,
      "step": 226003
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.31200909614563,
      "learning_rate": 5.395012537382836e-07,
      "loss": 2.9796,
      "step": 226004
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8717095851898193,
      "learning_rate": 5.39256071530647e-07,
      "loss": 3.0062,
      "step": 226005
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6503591537475586,
      "learning_rate": 5.390109449984414e-07,
      "loss": 3.0045,
      "step": 226006
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.80723237991333,
      "learning_rate": 5.387658741416667e-07,
      "loss": 2.6311,
      "step": 226007
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.169499158859253,
      "learning_rate": 5.385208589603895e-07,
      "loss": 2.9425,
      "step": 226008
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6532459259033203,
      "learning_rate": 5.382758994546765e-07,
      "loss": 2.6995,
      "step": 226009
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.4065420627593994,
      "learning_rate": 5.380309956245276e-07,
      "loss": 2.8047,
      "step": 226010
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.5629630088806152,
      "learning_rate": 5.377861474700429e-07,
      "loss": 2.8721,
      "step": 226011
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.872786283493042,
      "learning_rate": 5.375413549911889e-07,
      "loss": 2.7033,
      "step": 226012
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.612882375717163,
      "learning_rate": 5.372966181880988e-07,
      "loss": 3.0362,
      "step": 226013
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.608715534210205,
      "learning_rate": 5.370519370607396e-07,
      "loss": 3.1057,
      "step": 226014
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.011582374572754,
      "learning_rate": 5.368073116092442e-07,
      "loss": 2.9564,
      "step": 226015
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7380712032318115,
      "learning_rate": 5.365627418335794e-07,
      "loss": 3.1108,
      "step": 226016
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.666684865951538,
      "learning_rate": 5.363182277338119e-07,
      "loss": 2.9351,
      "step": 226017
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.709174871444702,
      "learning_rate": 5.360737693100081e-07,
      "loss": 2.9619,
      "step": 226018
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.5344693660736084,
      "learning_rate": 5.358293665622015e-07,
      "loss": 2.7603,
      "step": 226019
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.456737756729126,
      "learning_rate": 5.355850194904254e-07,
      "loss": 3.0013,
      "step": 226020
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.237334728240967,
      "learning_rate": 5.353407280947464e-07,
      "loss": 3.0141,
      "step": 226021
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0009047985076904,
      "learning_rate": 5.350964923751977e-07,
      "loss": 2.9082,
      "step": 226022
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3929600715637207,
      "learning_rate": 5.34852312331846e-07,
      "loss": 3.0215,
      "step": 226023
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.360989809036255,
      "learning_rate": 5.346081879646913e-07,
      "loss": 2.9656,
      "step": 226024
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6337504386901855,
      "learning_rate": 5.343641192738335e-07,
      "loss": 2.7175,
      "step": 226025
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1811842918395996,
      "learning_rate": 5.341201062592393e-07,
      "loss": 2.9057,
      "step": 226026
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.811878204345703,
      "learning_rate": 5.338761489210752e-07,
      "loss": 2.7437,
      "step": 226027
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7323672771453857,
      "learning_rate": 5.336322472592747e-07,
      "loss": 2.9208,
      "step": 226028
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0561587810516357,
      "learning_rate": 5.333884012739376e-07,
      "loss": 3.0557,
      "step": 226029
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1613128185272217,
      "learning_rate": 5.331446109650639e-07,
      "loss": 2.9036,
      "step": 226030
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8251559734344482,
      "learning_rate": 5.329008763327536e-07,
      "loss": 3.1723,
      "step": 226031
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.50443172454834,
      "learning_rate": 5.326571973770399e-07,
      "loss": 3.0767,
      "step": 226032
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.073134183883667,
      "learning_rate": 5.324135740979563e-07,
      "loss": 2.9585,
      "step": 226033
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.485950469970703,
      "learning_rate": 5.321700064955359e-07,
      "loss": 3.1273,
      "step": 226034
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.873487710952759,
      "learning_rate": 5.319264945698787e-07,
      "loss": 2.948,
      "step": 226035
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.5501930713653564,
      "learning_rate": 5.316830383209514e-07,
      "loss": 3.0333,
      "step": 226036
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8046271800994873,
      "learning_rate": 5.314396377488205e-07,
      "loss": 2.9254,
      "step": 226037
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.7698471546173096,
      "learning_rate": 5.311962928535862e-07,
      "loss": 2.743,
      "step": 226038
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0617854595184326,
      "learning_rate": 5.309530036352483e-07,
      "loss": 2.8455,
      "step": 226039
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.60050106048584,
      "learning_rate": 5.307097700938401e-07,
      "loss": 2.9896,
      "step": 226040
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.169618606567383,
      "learning_rate": 5.304665922294282e-07,
      "loss": 2.8634,
      "step": 226041
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8487331867218018,
      "learning_rate": 5.302234700420794e-07,
      "loss": 3.0455,
      "step": 226042
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2948620319366455,
      "learning_rate": 5.299804035317934e-07,
      "loss": 2.7428,
      "step": 226043
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.852172374725342,
      "learning_rate": 5.29737392698637e-07,
      "loss": 2.8447,
      "step": 226044
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.349714517593384,
      "learning_rate": 5.294944375426769e-07,
      "loss": 2.7536,
      "step": 226045
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.180030584335327,
      "learning_rate": 5.292515380639129e-07,
      "loss": 2.9053,
      "step": 226046
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9138031005859375,
      "learning_rate": 5.290086942624117e-07,
      "loss": 3.0751,
      "step": 226047
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.910043478012085,
      "learning_rate": 5.287659061382399e-07,
      "loss": 2.6735,
      "step": 226048
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.521244525909424,
      "learning_rate": 5.285231736914309e-07,
      "loss": 3.0023,
      "step": 226049
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.932506561279297,
      "learning_rate": 5.282804969219845e-07,
      "loss": 2.6598,
      "step": 226050
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8921146392822266,
      "learning_rate": 5.280378758300008e-07,
      "loss": 2.9709,
      "step": 226051
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9632487297058105,
      "learning_rate": 5.277953104155131e-07,
      "loss": 2.7397,
      "step": 226052
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.201711416244507,
      "learning_rate": 5.275528006785879e-07,
      "loss": 3.2374,
      "step": 226053
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.4043843746185303,
      "learning_rate": 5.27310346619192e-07,
      "loss": 3.0816,
      "step": 226054
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.097367525100708,
      "learning_rate": 5.270679482374585e-07,
      "loss": 2.8386,
      "step": 226055
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3565549850463867,
      "learning_rate": 5.268256055333875e-07,
      "loss": 3.0503,
      "step": 226056
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.930243968963623,
      "learning_rate": 5.265833185070123e-07,
      "loss": 2.9732,
      "step": 226057
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3701422214508057,
      "learning_rate": 5.263410871584329e-07,
      "loss": 2.7759,
      "step": 226058
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.282055139541626,
      "learning_rate": 5.26098911487649e-07,
      "loss": 2.9303,
      "step": 226059
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.137486696243286,
      "learning_rate": 5.258567914946943e-07,
      "loss": 3.0847,
      "step": 226060
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6520819664001465,
      "learning_rate": 5.256147271796684e-07,
      "loss": 2.9326,
      "step": 226061
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.5952305793762207,
      "learning_rate": 5.253727185425383e-07,
      "loss": 3.0269,
      "step": 226062
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.979072332382202,
      "learning_rate": 5.251307655834369e-07,
      "loss": 2.9709,
      "step": 226063
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1006357669830322,
      "learning_rate": 5.248888683023644e-07,
      "loss": 2.8039,
      "step": 226064
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6075243949890137,
      "learning_rate": 5.246470266993541e-07,
      "loss": 2.9569,
      "step": 226065
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.745603084564209,
      "learning_rate": 5.244052407744726e-07,
      "loss": 2.9618,
      "step": 226066
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8278489112854004,
      "learning_rate": 5.241635105277531e-07,
      "loss": 3.0506,
      "step": 226067
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1390161514282227,
      "learning_rate": 5.23921835959229e-07,
      "loss": 3.2118,
      "step": 226068
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.026956081390381,
      "learning_rate": 5.236802170689669e-07,
      "loss": 2.9396,
      "step": 226069
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0353946685791016,
      "learning_rate": 5.234386538570334e-07,
      "loss": 2.9357,
      "step": 226070
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7529821395874023,
      "learning_rate": 5.231971463233952e-07,
      "loss": 3.076,
      "step": 226071
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7674717903137207,
      "learning_rate": 5.229556944681856e-07,
      "loss": 2.7724,
      "step": 226072
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8304505348205566,
      "learning_rate": 5.227142982914045e-07,
      "loss": 3.1171,
      "step": 226073
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7035958766937256,
      "learning_rate": 5.224729577931186e-07,
      "loss": 3.0436,
      "step": 226074
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.875059127807617,
      "learning_rate": 5.222316729733277e-07,
      "loss": 2.7707,
      "step": 226075
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.490417957305908,
      "learning_rate": 5.21990443832132e-07,
      "loss": 2.9439,
      "step": 226076
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7373573780059814,
      "learning_rate": 5.217492703695314e-07,
      "loss": 2.9866,
      "step": 226077
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0128743648529053,
      "learning_rate": 5.215081525856257e-07,
      "loss": 2.7016,
      "step": 226078
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6647090911865234,
      "learning_rate": 5.212670904803818e-07,
      "loss": 2.9785,
      "step": 226079
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2985992431640625,
      "learning_rate": 5.210260840538994e-07,
      "loss": 2.9253,
      "step": 226080
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.687439203262329,
      "learning_rate": 5.207851333062119e-07,
      "loss": 3.2283,
      "step": 226081
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.6169941425323486,
      "learning_rate": 5.205442382373858e-07,
      "loss": 2.7007,
      "step": 226082
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.926969051361084,
      "learning_rate": 5.203033988474215e-07,
      "loss": 2.8442,
      "step": 226083
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.144001007080078,
      "learning_rate": 5.200626151364185e-07,
      "loss": 2.8539,
      "step": 226084
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.329482078552246,
      "learning_rate": 5.198218871043436e-07,
      "loss": 2.6762,
      "step": 226085
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8590853214263916,
      "learning_rate": 5.195812147512967e-07,
      "loss": 3.0642,
      "step": 226086
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.593597412109375,
      "learning_rate": 5.193405980773446e-07,
      "loss": 2.876,
      "step": 226087
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.011587381362915,
      "learning_rate": 5.191000370824872e-07,
      "loss": 2.9347,
      "step": 226088
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.827688217163086,
      "learning_rate": 5.188595317667577e-07,
      "loss": 3.1184,
      "step": 226089
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9838578701019287,
      "learning_rate": 5.186190821302227e-07,
      "loss": 2.8656,
      "step": 226090
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.598062038421631,
      "learning_rate": 5.183786881729491e-07,
      "loss": 2.8065,
      "step": 226091
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6460299491882324,
      "learning_rate": 5.181383498949699e-07,
      "loss": 2.951,
      "step": 226092
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.718310594558716,
      "learning_rate": 5.178980672963184e-07,
      "loss": 2.9989,
      "step": 226093
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.91910982131958,
      "learning_rate": 5.176578403770282e-07,
      "loss": 2.9842,
      "step": 226094
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.579406499862671,
      "learning_rate": 5.174176691371656e-07,
      "loss": 2.9146,
      "step": 226095
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8733115196228027,
      "learning_rate": 5.171775535767642e-07,
      "loss": 2.6467,
      "step": 226096
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.079847574234009,
      "learning_rate": 5.169374936958903e-07,
      "loss": 3.0116,
      "step": 226097
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7454357147216797,
      "learning_rate": 5.166974894945442e-07,
      "loss": 2.8203,
      "step": 226098
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0835483074188232,
      "learning_rate": 5.164575409728256e-07,
      "loss": 2.9832,
      "step": 226099
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.019643783569336,
      "learning_rate": 5.162176481307346e-07,
      "loss": 2.6971,
      "step": 226100
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.891700029373169,
      "learning_rate": 5.159778109683377e-07,
      "loss": 3.0298,
      "step": 226101
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.155200481414795,
      "learning_rate": 5.157380294856683e-07,
      "loss": 2.9315,
      "step": 226102
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1202807426452637,
      "learning_rate": 5.154983036827598e-07,
      "loss": 3.0468,
      "step": 226103
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0472195148468018,
      "learning_rate": 5.15258633559712e-07,
      "loss": 2.7301,
      "step": 226104
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.005411386489868,
      "learning_rate": 5.150190191164916e-07,
      "loss": 3.0608,
      "step": 226105
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.263566493988037,
      "learning_rate": 5.147794603532319e-07,
      "loss": 2.9229,
      "step": 226106
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.4408669471740723,
      "learning_rate": 5.145399572698661e-07,
      "loss": 2.7595,
      "step": 226107
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.5814433097839355,
      "learning_rate": 5.143005098665609e-07,
      "loss": 3.0534,
      "step": 226108
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.085933685302734,
      "learning_rate": 5.140611181432496e-07,
      "loss": 2.5579,
      "step": 226109
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0864474773406982,
      "learning_rate": 5.138217821000656e-07,
      "loss": 2.9835,
      "step": 226110
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9113359451293945,
      "learning_rate": 5.135825017369755e-07,
      "loss": 2.8674,
      "step": 226111
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8110244274139404,
      "learning_rate": 5.133432770541124e-07,
      "loss": 2.9405,
      "step": 226112
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.62560772895813,
      "learning_rate": 5.13104108051443e-07,
      "loss": 3.1748,
      "step": 226113
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.077378034591675,
      "learning_rate": 5.128649947290342e-07,
      "loss": 2.8415,
      "step": 226114
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.015928268432617,
      "learning_rate": 5.126259370869523e-07,
      "loss": 2.8126,
      "step": 226115
    },
    {
      "epoch": 2.94,
      "grad_norm": 5.289843559265137,
      "learning_rate": 5.123869351252307e-07,
      "loss": 2.8971,
      "step": 226116
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.791249990463257,
      "learning_rate": 5.121479888439029e-07,
      "loss": 2.7746,
      "step": 226117
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.997600555419922,
      "learning_rate": 5.119090982430019e-07,
      "loss": 2.9697,
      "step": 226118
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9881131649017334,
      "learning_rate": 5.116702633226277e-07,
      "loss": 2.612,
      "step": 226119
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.204106569290161,
      "learning_rate": 5.114314840827471e-07,
      "loss": 2.7685,
      "step": 226120
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9832780361175537,
      "learning_rate": 5.111927605234601e-07,
      "loss": 2.9758,
      "step": 226121
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3225677013397217,
      "learning_rate": 5.109540926447997e-07,
      "loss": 2.7733,
      "step": 226122
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8018195629119873,
      "learning_rate": 5.107154804467995e-07,
      "loss": 2.9602,
      "step": 226123
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.835862398147583,
      "learning_rate": 5.104769239295259e-07,
      "loss": 2.7682,
      "step": 226124
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7998688220977783,
      "learning_rate": 5.102384230930123e-07,
      "loss": 2.996,
      "step": 226125
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.05175518989563,
      "learning_rate": 5.09999977937292e-07,
      "loss": 2.9962,
      "step": 226126
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.304793357849121,
      "learning_rate": 5.097615884623984e-07,
      "loss": 2.9285,
      "step": 226127
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1702377796173096,
      "learning_rate": 5.095232546683981e-07,
      "loss": 2.8542,
      "step": 226128
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.427659273147583,
      "learning_rate": 5.092849765553242e-07,
      "loss": 3.0362,
      "step": 226129
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.041933536529541,
      "learning_rate": 5.090467541232435e-07,
      "loss": 3.0225,
      "step": 226130
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.2316019535064697,
      "learning_rate": 5.088085873721893e-07,
      "loss": 3.0266,
      "step": 226131
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.672760248184204,
      "learning_rate": 5.085704763021947e-07,
      "loss": 2.4121,
      "step": 226132
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.921844005584717,
      "learning_rate": 5.083324209132933e-07,
      "loss": 3.0667,
      "step": 226133
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9962639808654785,
      "learning_rate": 5.080944212055849e-07,
      "loss": 3.0256,
      "step": 226134
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6197259426116943,
      "learning_rate": 5.078564771790361e-07,
      "loss": 3.1172,
      "step": 226135
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7384979724884033,
      "learning_rate": 5.076185888337469e-07,
      "loss": 3.0197,
      "step": 226136
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.565925359725952,
      "learning_rate": 5.073807561697507e-07,
      "loss": 3.1269,
      "step": 226137
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3879776000976562,
      "learning_rate": 5.071429791870807e-07,
      "loss": 3.0389,
      "step": 226138
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.5604918003082275,
      "learning_rate": 5.069052578857702e-07,
      "loss": 3.028,
      "step": 226139
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.672555685043335,
      "learning_rate": 5.066675922658858e-07,
      "loss": 3.1039,
      "step": 226140
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.7124505043029785,
      "learning_rate": 5.064299823274942e-07,
      "loss": 2.9936,
      "step": 226141
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.959080457687378,
      "learning_rate": 5.061924280705621e-07,
      "loss": 2.6516,
      "step": 226142
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.5832371711730957,
      "learning_rate": 5.059549294952226e-07,
      "loss": 3.1031,
      "step": 226143
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.386409044265747,
      "learning_rate": 5.057174866014758e-07,
      "loss": 2.8518,
      "step": 226144
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7426116466522217,
      "learning_rate": 5.054800993893549e-07,
      "loss": 2.9263,
      "step": 226145
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.034282684326172,
      "learning_rate": 5.052427678589266e-07,
      "loss": 3.0442,
      "step": 226146
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.421025276184082,
      "learning_rate": 5.050054920102243e-07,
      "loss": 3.0479,
      "step": 226147
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.830817699432373,
      "learning_rate": 5.04768271843281e-07,
      "loss": 2.8931,
      "step": 226148
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.5063812732696533,
      "learning_rate": 5.045311073581637e-07,
      "loss": 2.8723,
      "step": 226149
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.651080846786499,
      "learning_rate": 5.042939985549055e-07,
      "loss": 3.215,
      "step": 226150
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.5590062141418457,
      "learning_rate": 5.04056945433573e-07,
      "loss": 3.1179,
      "step": 226151
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.583799362182617,
      "learning_rate": 5.038199479941663e-07,
      "loss": 3.0528,
      "step": 226152
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.81957745552063,
      "learning_rate": 5.035830062367519e-07,
      "loss": 2.9964,
      "step": 226153
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.403829574584961,
      "learning_rate": 5.033461201613964e-07,
      "loss": 3.124,
      "step": 226154
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.8838913440704346,
      "learning_rate": 5.031092897681332e-07,
      "loss": 2.8236,
      "step": 226155
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.798792839050293,
      "learning_rate": 5.028725150569624e-07,
      "loss": 3.0201,
      "step": 226156
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.7866711616516113,
      "learning_rate": 5.026357960279504e-07,
      "loss": 3.0978,
      "step": 226157
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9162044525146484,
      "learning_rate": 5.023991326811971e-07,
      "loss": 3.1529,
      "step": 226158
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.774946689605713,
      "learning_rate": 5.021625250166694e-07,
      "loss": 3.1085,
      "step": 226159
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.042132616043091,
      "learning_rate": 5.019259730344671e-07,
      "loss": 2.9547,
      "step": 226160
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.029590368270874,
      "learning_rate": 5.016894767345902e-07,
      "loss": 2.9371,
      "step": 226161
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.079372406005859,
      "learning_rate": 5.014530361171055e-07,
      "loss": 2.7074,
      "step": 226162
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.388953924179077,
      "learning_rate": 5.012166511820459e-07,
      "loss": 2.7398,
      "step": 226163
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6999399662017822,
      "learning_rate": 5.009803219294783e-07,
      "loss": 2.7886,
      "step": 226164
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1445159912109375,
      "learning_rate": 5.007440483594027e-07,
      "loss": 3.062,
      "step": 226165
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.4489376544952393,
      "learning_rate": 5.00507830471919e-07,
      "loss": 2.9028,
      "step": 226166
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1562013626098633,
      "learning_rate": 5.002716682670604e-07,
      "loss": 2.7192,
      "step": 226167
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.324730396270752,
      "learning_rate": 5.000355617448271e-07,
      "loss": 2.9288,
      "step": 226168
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.709965944290161,
      "learning_rate": 4.997995109053188e-07,
      "loss": 2.8954,
      "step": 226169
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.1885266304016113,
      "learning_rate": 4.995635157485023e-07,
      "loss": 2.9649,
      "step": 226170
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.6713624000549316,
      "learning_rate": 4.993275762745108e-07,
      "loss": 3.3473,
      "step": 226171
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.918241262435913,
      "learning_rate": 4.990916924833444e-07,
      "loss": 2.941,
      "step": 226172
    },
    {
      "epoch": 2.94,
      "grad_norm": 4.248615264892578,
      "learning_rate": 4.988558643750362e-07,
      "loss": 2.8761,
      "step": 226173
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.943830966949463,
      "learning_rate": 4.98620091949653e-07,
      "loss": 2.8905,
      "step": 226174
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.3756184577941895,
      "learning_rate": 4.98384375207228e-07,
      "loss": 2.822,
      "step": 226175
    },
    {
      "epoch": 2.94,
      "grad_norm": 3.0196774005889893,
      "learning_rate": 4.981487141477947e-07,
      "loss": 2.7269,
      "step": 226176
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.901115655899048,
      "learning_rate": 4.979131087714194e-07,
      "loss": 2.8242,
      "step": 226177
    },
    {
      "epoch": 2.94,
      "grad_norm": 2.9660394191741943,
      "learning_rate": 4.976775590781356e-07,
      "loss": 3.1317,
      "step": 226178
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4137253761291504,
      "learning_rate": 4.9744206506801e-07,
      "loss": 2.7889,
      "step": 226179
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3221330642700195,
      "learning_rate": 4.972066267410424e-07,
      "loss": 3.0051,
      "step": 226180
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9619429111480713,
      "learning_rate": 4.969712440972995e-07,
      "loss": 2.7441,
      "step": 226181
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.379437208175659,
      "learning_rate": 4.967359171368146e-07,
      "loss": 2.5785,
      "step": 226182
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.233412742614746,
      "learning_rate": 4.965006458596543e-07,
      "loss": 2.8798,
      "step": 226183
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.755955457687378,
      "learning_rate": 4.962654302658187e-07,
      "loss": 3.0963,
      "step": 226184
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0018341541290283,
      "learning_rate": 4.960302703554075e-07,
      "loss": 2.7655,
      "step": 226185
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.84163236618042,
      "learning_rate": 4.957951661284543e-07,
      "loss": 2.9589,
      "step": 226186
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9189295768737793,
      "learning_rate": 4.955601175849589e-07,
      "loss": 2.763,
      "step": 226187
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.930022716522217,
      "learning_rate": 4.953251247249879e-07,
      "loss": 3.036,
      "step": 226188
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1493709087371826,
      "learning_rate": 4.95090187548608e-07,
      "loss": 3.0178,
      "step": 226189
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.664093017578125,
      "learning_rate": 4.948553060558191e-07,
      "loss": 2.9576,
      "step": 226190
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.581829071044922,
      "learning_rate": 4.946204802467213e-07,
      "loss": 3.0547,
      "step": 226191
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.664696216583252,
      "learning_rate": 4.943857101213144e-07,
      "loss": 2.843,
      "step": 226192
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0605125427246094,
      "learning_rate": 4.941509956796652e-07,
      "loss": 2.7807,
      "step": 226193
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.331270694732666,
      "learning_rate": 4.939163369217736e-07,
      "loss": 3.059,
      "step": 226194
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3312249183654785,
      "learning_rate": 4.936817338477394e-07,
      "loss": 3.0564,
      "step": 226195
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9527037143707275,
      "learning_rate": 4.934471864575962e-07,
      "loss": 2.6549,
      "step": 226196
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.338695764541626,
      "learning_rate": 4.932126947513771e-07,
      "loss": 2.941,
      "step": 226197
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6089694499969482,
      "learning_rate": 4.92978258729082e-07,
      "loss": 2.8898,
      "step": 226198
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.363555431365967,
      "learning_rate": 4.927438783908444e-07,
      "loss": 3.1936,
      "step": 226199
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8643620014190674,
      "learning_rate": 4.925095537366309e-07,
      "loss": 2.9486,
      "step": 226200
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.69368577003479,
      "learning_rate": 4.922752847665079e-07,
      "loss": 2.8688,
      "step": 226201
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2987232208251953,
      "learning_rate": 4.920410714805423e-07,
      "loss": 2.9297,
      "step": 226202
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.008704662322998,
      "learning_rate": 4.91806913878734e-07,
      "loss": 2.9296,
      "step": 226203
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.878671884536743,
      "learning_rate": 4.915728119611828e-07,
      "loss": 2.9685,
      "step": 226204
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.608447313308716,
      "learning_rate": 4.913387657278888e-07,
      "loss": 2.9103,
      "step": 226205
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.742478609085083,
      "learning_rate": 4.911047751789188e-07,
      "loss": 2.9588,
      "step": 226206
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8471462726593018,
      "learning_rate": 4.908708403143058e-07,
      "loss": 3.0729,
      "step": 226207
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.390887498855591,
      "learning_rate": 4.906369611340832e-07,
      "loss": 2.9411,
      "step": 226208
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2515532970428467,
      "learning_rate": 4.904031376383177e-07,
      "loss": 2.8899,
      "step": 226209
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.162860870361328,
      "learning_rate": 4.901693698270092e-07,
      "loss": 2.983,
      "step": 226210
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.210320234298706,
      "learning_rate": 4.899356577002577e-07,
      "loss": 3.0361,
      "step": 226211
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9870567321777344,
      "learning_rate": 4.897020012580632e-07,
      "loss": 3.0193,
      "step": 226212
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.676107883453369,
      "learning_rate": 4.894684005005256e-07,
      "loss": 3.022,
      "step": 226213
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9119606018066406,
      "learning_rate": 4.892348554276116e-07,
      "loss": 2.9865,
      "step": 226214
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.000211715698242,
      "learning_rate": 4.89001366039421e-07,
      "loss": 2.8234,
      "step": 226215
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9407057762145996,
      "learning_rate": 4.887679323359539e-07,
      "loss": 2.8262,
      "step": 226216
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.8691487312316895,
      "learning_rate": 4.885345543173102e-07,
      "loss": 2.8371,
      "step": 226217
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.793043375015259,
      "learning_rate": 4.8830123198349e-07,
      "loss": 3.0758,
      "step": 226218
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9904282093048096,
      "learning_rate": 4.880679653345265e-07,
      "loss": 3.0415,
      "step": 226219
    },
    {
      "epoch": 2.95,
      "grad_norm": 5.275142192840576,
      "learning_rate": 4.878347543705196e-07,
      "loss": 2.8544,
      "step": 226220
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9834651947021484,
      "learning_rate": 4.876015990914695e-07,
      "loss": 3.021,
      "step": 226221
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.911862373352051,
      "learning_rate": 4.873684994974092e-07,
      "loss": 2.9427,
      "step": 226222
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2488296031951904,
      "learning_rate": 4.871354555884388e-07,
      "loss": 2.9448,
      "step": 226223
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.167712926864624,
      "learning_rate": 4.86902467364525e-07,
      "loss": 3.0086,
      "step": 226224
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.825131416320801,
      "learning_rate": 4.866695348257676e-07,
      "loss": 2.9035,
      "step": 226225
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4825899600982666,
      "learning_rate": 4.864366579721668e-07,
      "loss": 2.7925,
      "step": 226226
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.5571584701538086,
      "learning_rate": 4.862038368038223e-07,
      "loss": 3.1584,
      "step": 226227
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.094386339187622,
      "learning_rate": 4.859710713207343e-07,
      "loss": 3.1416,
      "step": 226228
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7881019115448,
      "learning_rate": 4.857383615229692e-07,
      "loss": 2.8399,
      "step": 226229
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.850449562072754,
      "learning_rate": 4.855057074105273e-07,
      "loss": 2.8937,
      "step": 226230
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7293241024017334,
      "learning_rate": 4.852731089835083e-07,
      "loss": 2.838,
      "step": 226231
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.000640869140625,
      "learning_rate": 4.850405662419454e-07,
      "loss": 2.711,
      "step": 226232
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9756596088409424,
      "learning_rate": 4.848080791858389e-07,
      "loss": 2.9209,
      "step": 226233
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2050013542175293,
      "learning_rate": 4.845756478152884e-07,
      "loss": 2.9405,
      "step": 226234
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.671034097671509,
      "learning_rate": 4.843432721302609e-07,
      "loss": 2.7576,
      "step": 226235
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.878086805343628,
      "learning_rate": 4.841109521308894e-07,
      "loss": 3.0961,
      "step": 226236
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.07835054397583,
      "learning_rate": 4.838786878171741e-07,
      "loss": 3.2038,
      "step": 226237
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1209726333618164,
      "learning_rate": 4.836464791891481e-07,
      "loss": 2.8329,
      "step": 226238
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.6173224449157715,
      "learning_rate": 4.834143262468781e-07,
      "loss": 3.0199,
      "step": 226239
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7981553077697754,
      "learning_rate": 4.831822289903642e-07,
      "loss": 2.9729,
      "step": 226240
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.871596097946167,
      "learning_rate": 4.829501874197061e-07,
      "loss": 2.9088,
      "step": 226241
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.977757453918457,
      "learning_rate": 4.827182015349373e-07,
      "loss": 3.037,
      "step": 226242
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4418540000915527,
      "learning_rate": 4.824862713360578e-07,
      "loss": 2.5293,
      "step": 226243
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0366005897521973,
      "learning_rate": 4.82254396823134e-07,
      "loss": 2.8764,
      "step": 226244
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1318132877349854,
      "learning_rate": 4.820225779962328e-07,
      "loss": 2.9278,
      "step": 226245
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9638335704803467,
      "learning_rate": 4.81790814855354e-07,
      "loss": 2.7541,
      "step": 226246
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.179244041442871,
      "learning_rate": 4.815591074005975e-07,
      "loss": 3.0238,
      "step": 226247
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8068454265594482,
      "learning_rate": 4.813274556319635e-07,
      "loss": 2.8153,
      "step": 226248
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.857757806777954,
      "learning_rate": 4.810958595494851e-07,
      "loss": 2.9424,
      "step": 226249
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.738267660140991,
      "learning_rate": 4.808643191532291e-07,
      "loss": 2.875,
      "step": 226250
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6312310695648193,
      "learning_rate": 4.806328344432286e-07,
      "loss": 2.8236,
      "step": 226251
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2243692874908447,
      "learning_rate": 4.804014054195505e-07,
      "loss": 2.7351,
      "step": 226252
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.649606704711914,
      "learning_rate": 4.801700320822277e-07,
      "loss": 2.9701,
      "step": 226253
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.676419496536255,
      "learning_rate": 4.799387144312606e-07,
      "loss": 2.9505,
      "step": 226254
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8180766105651855,
      "learning_rate": 4.797074524667488e-07,
      "loss": 2.9866,
      "step": 226255
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.809316635131836,
      "learning_rate": 4.794762461887259e-07,
      "loss": 3.1755,
      "step": 226256
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2803287506103516,
      "learning_rate": 4.792450955971916e-07,
      "loss": 2.8557,
      "step": 226257
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3713912963867188,
      "learning_rate": 4.790140006922461e-07,
      "loss": 3.0446,
      "step": 226258
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.198139190673828,
      "learning_rate": 4.787829614738892e-07,
      "loss": 3.1209,
      "step": 226259
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6923813819885254,
      "learning_rate": 4.785519779421876e-07,
      "loss": 3.0535,
      "step": 226260
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8227109909057617,
      "learning_rate": 4.783210500971745e-07,
      "loss": 3.06,
      "step": 226261
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9292192459106445,
      "learning_rate": 4.780901779388835e-07,
      "loss": 3.0712,
      "step": 226262
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8717455863952637,
      "learning_rate": 4.778593614673809e-07,
      "loss": 2.934,
      "step": 226263
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.128861427307129,
      "learning_rate": 4.776286006827334e-07,
      "loss": 2.785,
      "step": 226264
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.318113088607788,
      "learning_rate": 4.773978955849079e-07,
      "loss": 3.2176,
      "step": 226265
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.560188055038452,
      "learning_rate": 4.771672461740039e-07,
      "loss": 2.9321,
      "step": 226266
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6169450283050537,
      "learning_rate": 4.769366524500217e-07,
      "loss": 3.1867,
      "step": 226267
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9040720462799072,
      "learning_rate": 4.767061144130613e-07,
      "loss": 2.9054,
      "step": 226268
    },
    {
      "epoch": 2.95,
      "grad_norm": 5.049900531768799,
      "learning_rate": 4.7647563206312246e-07,
      "loss": 2.6663,
      "step": 226269
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7298436164855957,
      "learning_rate": 4.762452054002386e-07,
      "loss": 3.1177,
      "step": 226270
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.931893825531006,
      "learning_rate": 4.760148344245096e-07,
      "loss": 2.8798,
      "step": 226271
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4396586418151855,
      "learning_rate": 4.757845191359355e-07,
      "loss": 3.0057,
      "step": 226272
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.728565216064453,
      "learning_rate": 4.755542595345496e-07,
      "loss": 2.7425,
      "step": 226273
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.5149457454681396,
      "learning_rate": 4.7532405562045184e-07,
      "loss": 3.0541,
      "step": 226274
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8073251247406006,
      "learning_rate": 4.7509390739360884e-07,
      "loss": 2.9336,
      "step": 226275
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.769725799560547,
      "learning_rate": 4.748638148540873e-07,
      "loss": 2.9944,
      "step": 226276
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.161386489868164,
      "learning_rate": 4.746337780019871e-07,
      "loss": 2.7988,
      "step": 226277
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9836649894714355,
      "learning_rate": 4.7440379683727493e-07,
      "loss": 2.9931,
      "step": 226278
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.5395092964172363,
      "learning_rate": 4.741738713600507e-07,
      "loss": 2.6243,
      "step": 226279
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.579296350479126,
      "learning_rate": 4.739440015703144e-07,
      "loss": 2.9622,
      "step": 226280
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8153090476989746,
      "learning_rate": 4.7371418746809944e-07,
      "loss": 2.6773,
      "step": 226281
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.399166107177734,
      "learning_rate": 4.734844290535056e-07,
      "loss": 2.7833,
      "step": 226282
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9792704582214355,
      "learning_rate": 4.7325472632656627e-07,
      "loss": 2.7761,
      "step": 226283
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.724165439605713,
      "learning_rate": 4.7302507928728137e-07,
      "loss": 2.7512,
      "step": 226284
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.10539174079895,
      "learning_rate": 4.7279548793568435e-07,
      "loss": 2.9418,
      "step": 226285
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.138672351837158,
      "learning_rate": 4.725659522719083e-07,
      "loss": 2.8542,
      "step": 226286
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.519989252090454,
      "learning_rate": 4.723364722958867e-07,
      "loss": 3.033,
      "step": 226287
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.047501802444458,
      "learning_rate": 4.721070480077194e-07,
      "loss": 2.9479,
      "step": 226288
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1824419498443604,
      "learning_rate": 4.71877679407473e-07,
      "loss": 2.9785,
      "step": 226289
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.82025408744812,
      "learning_rate": 4.716483664951143e-07,
      "loss": 3.148,
      "step": 226290
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.28767204284668,
      "learning_rate": 4.7141910927077644e-07,
      "loss": 2.8661,
      "step": 226291
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2361810207366943,
      "learning_rate": 4.711899077344261e-07,
      "loss": 2.9522,
      "step": 226292
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0990302562713623,
      "learning_rate": 4.7096076188612996e-07,
      "loss": 3.0367,
      "step": 226293
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4704532623291016,
      "learning_rate": 4.707316717259546e-07,
      "loss": 3.0986,
      "step": 226294
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1808197498321533,
      "learning_rate": 4.7050263725393335e-07,
      "loss": 2.8809,
      "step": 226295
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.760867118835449,
      "learning_rate": 4.7027365847006615e-07,
      "loss": 2.9726,
      "step": 226296
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.010971784591675,
      "learning_rate": 4.7004473537445295e-07,
      "loss": 3.0341,
      "step": 226297
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6967122554779053,
      "learning_rate": 4.6981586796709383e-07,
      "loss": 2.767,
      "step": 226298
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8798470497131348,
      "learning_rate": 4.6958705624805527e-07,
      "loss": 2.8366,
      "step": 226299
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.738415479660034,
      "learning_rate": 4.6935830021740397e-07,
      "loss": 2.7149,
      "step": 226300
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6995933055877686,
      "learning_rate": 4.691295998751399e-07,
      "loss": 2.9127,
      "step": 226301
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3084375858306885,
      "learning_rate": 4.6890095522129635e-07,
      "loss": 2.6547,
      "step": 226302
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9147260189056396,
      "learning_rate": 4.6867236625593993e-07,
      "loss": 2.8469,
      "step": 226303
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4849469661712646,
      "learning_rate": 4.684438329791373e-07,
      "loss": 3.0233,
      "step": 226304
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.663180351257324,
      "learning_rate": 4.682153553908885e-07,
      "loss": 2.8235,
      "step": 226305
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1985771656036377,
      "learning_rate": 4.6798693349126004e-07,
      "loss": 2.7866,
      "step": 226306
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8901944160461426,
      "learning_rate": 4.6775856728028526e-07,
      "loss": 2.9478,
      "step": 226307
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.814002752304077,
      "learning_rate": 4.675302567579975e-07,
      "loss": 3.0206,
      "step": 226308
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.078211545944214,
      "learning_rate": 4.673020019244633e-07,
      "loss": 2.7811,
      "step": 226309
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.136488199234009,
      "learning_rate": 4.6707380277971605e-07,
      "loss": 2.7151,
      "step": 226310
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2805063724517822,
      "learning_rate": 4.6684565932375574e-07,
      "loss": 2.9654,
      "step": 226311
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.231661796569824,
      "learning_rate": 4.6661757155671554e-07,
      "loss": 2.8168,
      "step": 226312
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.246178150177002,
      "learning_rate": 4.663895394785622e-07,
      "loss": 2.6083,
      "step": 226313
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.933910608291626,
      "learning_rate": 4.6616156308936223e-07,
      "loss": 2.7198,
      "step": 226314
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.190683364868164,
      "learning_rate": 4.659336423891491e-07,
      "loss": 2.8015,
      "step": 226315
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0870423316955566,
      "learning_rate": 4.657057773779893e-07,
      "loss": 3.0695,
      "step": 226316
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6527981758117676,
      "learning_rate": 4.654779680558829e-07,
      "loss": 2.9178,
      "step": 226317
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0253653526306152,
      "learning_rate": 4.6525021442289644e-07,
      "loss": 2.8966,
      "step": 226318
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.504734516143799,
      "learning_rate": 4.650225164790966e-07,
      "loss": 2.9499,
      "step": 226319
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.108467102050781,
      "learning_rate": 4.647948742245167e-07,
      "loss": 2.8925,
      "step": 226320
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.554567575454712,
      "learning_rate": 4.6456728765915663e-07,
      "loss": 2.8228,
      "step": 226321
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.7029049396514893,
      "learning_rate": 4.643397567831164e-07,
      "loss": 2.6835,
      "step": 226322
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.385282039642334,
      "learning_rate": 4.641122815963627e-07,
      "loss": 2.7602,
      "step": 226323
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.877210855484009,
      "learning_rate": 4.6388486209902874e-07,
      "loss": 2.9298,
      "step": 226324
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.193183660507202,
      "learning_rate": 4.636574982910812e-07,
      "loss": 2.897,
      "step": 226325
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.622909069061279,
      "learning_rate": 4.634301901726201e-07,
      "loss": 2.8605,
      "step": 226326
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.709538698196411,
      "learning_rate": 4.6320293774364525e-07,
      "loss": 2.9221,
      "step": 226327
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1306779384613037,
      "learning_rate": 4.629757410042234e-07,
      "loss": 2.8843,
      "step": 226328
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.808617115020752,
      "learning_rate": 4.6274859995438785e-07,
      "loss": 2.8578,
      "step": 226329
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0450668334960938,
      "learning_rate": 4.625215145941719e-07,
      "loss": 2.8161,
      "step": 226330
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0139877796173096,
      "learning_rate": 4.6229448492364205e-07,
      "loss": 2.9469,
      "step": 226331
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9104273319244385,
      "learning_rate": 4.6206751094283177e-07,
      "loss": 2.7751,
      "step": 226332
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7591235637664795,
      "learning_rate": 4.618405926517743e-07,
      "loss": 2.7183,
      "step": 226333
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.150453805923462,
      "learning_rate": 4.6161373005046966e-07,
      "loss": 2.8309,
      "step": 226334
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.307160377502441,
      "learning_rate": 4.6138692313905103e-07,
      "loss": 2.9347,
      "step": 226335
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4180312156677246,
      "learning_rate": 4.6116017191751844e-07,
      "loss": 2.9081,
      "step": 226336
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0065832138061523,
      "learning_rate": 4.609334763859052e-07,
      "loss": 2.8327,
      "step": 226337
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.6148838996887207,
      "learning_rate": 4.6070683654424457e-07,
      "loss": 2.8894,
      "step": 226338
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.9248151779174805,
      "learning_rate": 4.6048025239260323e-07,
      "loss": 2.8744,
      "step": 226339
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.774409770965576,
      "learning_rate": 4.602537239309811e-07,
      "loss": 2.7459,
      "step": 226340
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.7394745349884033,
      "learning_rate": 4.6002725115947826e-07,
      "loss": 2.8271,
      "step": 226341
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.771206378936768,
      "learning_rate": 4.5980083407812787e-07,
      "loss": 3.1387,
      "step": 226342
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.808661937713623,
      "learning_rate": 4.595744726869299e-07,
      "loss": 2.9973,
      "step": 226343
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4906721115112305,
      "learning_rate": 4.5934816698595113e-07,
      "loss": 2.9775,
      "step": 226344
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9044528007507324,
      "learning_rate": 4.59121916975258e-07,
      "loss": 2.7817,
      "step": 226345
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.801063060760498,
      "learning_rate": 4.5889572265485065e-07,
      "loss": 2.9879,
      "step": 226346
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7397241592407227,
      "learning_rate": 4.586695840247956e-07,
      "loss": 3.0926,
      "step": 226347
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0645394325256348,
      "learning_rate": 4.584435010851262e-07,
      "loss": 2.9806,
      "step": 226348
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.678039312362671,
      "learning_rate": 4.582174738358757e-07,
      "loss": 2.9566,
      "step": 226349
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.7717278003692627,
      "learning_rate": 4.579915022771108e-07,
      "loss": 2.9009,
      "step": 226350
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.580946445465088,
      "learning_rate": 4.5776558640886474e-07,
      "loss": 3.0233,
      "step": 226351
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.633344888687134,
      "learning_rate": 4.575397262311376e-07,
      "loss": 2.9032,
      "step": 226352
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7870962619781494,
      "learning_rate": 4.5731392174406243e-07,
      "loss": 2.9139,
      "step": 226353
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.093568801879883,
      "learning_rate": 4.570881729475728e-07,
      "loss": 2.828,
      "step": 226354
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6132824420928955,
      "learning_rate": 4.568624798418019e-07,
      "loss": 2.9445,
      "step": 226355
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0727388858795166,
      "learning_rate": 4.566368424267497e-07,
      "loss": 3.0177,
      "step": 226356
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.346703052520752,
      "learning_rate": 4.5641126070244947e-07,
      "loss": 2.4778,
      "step": 226357
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7656800746917725,
      "learning_rate": 4.5618573466896793e-07,
      "loss": 2.6978,
      "step": 226358
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.696373701095581,
      "learning_rate": 4.5596026432633826e-07,
      "loss": 2.964,
      "step": 226359
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9920003414154053,
      "learning_rate": 4.5573484967456055e-07,
      "loss": 2.9733,
      "step": 226360
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.5521442890167236,
      "learning_rate": 4.5550949071376797e-07,
      "loss": 3.0587,
      "step": 226361
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4419140815734863,
      "learning_rate": 4.5528418744392725e-07,
      "loss": 2.9539,
      "step": 226362
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.434512138366699,
      "learning_rate": 4.55058939865105e-07,
      "loss": 3.0028,
      "step": 226363
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.122225761413574,
      "learning_rate": 4.548337479773345e-07,
      "loss": 2.7785,
      "step": 226364
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8443081378936768,
      "learning_rate": 4.5460861178068243e-07,
      "loss": 3.0169,
      "step": 226365
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.080461025238037,
      "learning_rate": 4.54383531275182e-07,
      "loss": 2.8557,
      "step": 226366
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.8463871479034424,
      "learning_rate": 4.5415850646083327e-07,
      "loss": 3.0996,
      "step": 226367
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.082908868789673,
      "learning_rate": 4.539335373377362e-07,
      "loss": 2.9366,
      "step": 226368
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8489363193511963,
      "learning_rate": 4.537086239058907e-07,
      "loss": 2.9732,
      "step": 226369
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3210132122039795,
      "learning_rate": 4.534837661653634e-07,
      "loss": 3.1144,
      "step": 226370
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.358663558959961,
      "learning_rate": 4.532589641161877e-07,
      "loss": 3.128,
      "step": 226371
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.7651124000549316,
      "learning_rate": 4.5303421775839674e-07,
      "loss": 3.1299,
      "step": 226372
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.200834035873413,
      "learning_rate": 4.528095270920573e-07,
      "loss": 2.9253,
      "step": 226373
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.45698618888855,
      "learning_rate": 4.5258489211720263e-07,
      "loss": 2.9955,
      "step": 226374
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8715667724609375,
      "learning_rate": 4.5236031283383266e-07,
      "loss": 2.9547,
      "step": 226375
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2753357887268066,
      "learning_rate": 4.521357892420474e-07,
      "loss": 2.7305,
      "step": 226376
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1772799491882324,
      "learning_rate": 4.5191132134184683e-07,
      "loss": 2.8412,
      "step": 226377
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.5056848526000977,
      "learning_rate": 4.516869091332975e-07,
      "loss": 2.9251,
      "step": 226378
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7797646522521973,
      "learning_rate": 4.514625526164328e-07,
      "loss": 2.6742,
      "step": 226379
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.7689549922943115,
      "learning_rate": 4.512382517913193e-07,
      "loss": 2.8708,
      "step": 226380
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.102418899536133,
      "learning_rate": 4.510140066579571e-07,
      "loss": 2.9807,
      "step": 226381
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.117934703826904,
      "learning_rate": 4.5078981721641263e-07,
      "loss": 2.7472,
      "step": 226382
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8605306148529053,
      "learning_rate": 4.505656834667193e-07,
      "loss": 2.8606,
      "step": 226383
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.5831308364868164,
      "learning_rate": 4.503416054089104e-07,
      "loss": 2.9389,
      "step": 226384
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3625001907348633,
      "learning_rate": 4.5011758304301925e-07,
      "loss": 2.9922,
      "step": 226385
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7629058361053467,
      "learning_rate": 4.498936163691458e-07,
      "loss": 2.9835,
      "step": 226386
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.362759828567505,
      "learning_rate": 4.4966970538725665e-07,
      "loss": 2.9583,
      "step": 226387
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2093210220336914,
      "learning_rate": 4.494458500974518e-07,
      "loss": 3.0683,
      "step": 226388
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6400809288024902,
      "learning_rate": 4.4922205049976455e-07,
      "loss": 3.2335,
      "step": 226389
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8444814682006836,
      "learning_rate": 4.489983065941949e-07,
      "loss": 2.8623,
      "step": 226390
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.464914083480835,
      "learning_rate": 4.4877461838080943e-07,
      "loss": 2.9317,
      "step": 226391
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1577870845794678,
      "learning_rate": 4.4855098585964146e-07,
      "loss": 2.8855,
      "step": 226392
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2624168395996094,
      "learning_rate": 4.483274090307576e-07,
      "loss": 2.6889,
      "step": 226393
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.5233211517333984,
      "learning_rate": 4.4810388789419115e-07,
      "loss": 2.9772,
      "step": 226394
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.090291976928711,
      "learning_rate": 4.4788042244997546e-07,
      "loss": 3.0071,
      "step": 226395
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0864579677581787,
      "learning_rate": 4.476570126981438e-07,
      "loss": 3.0699,
      "step": 226396
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8788647651672363,
      "learning_rate": 4.474336586387628e-07,
      "loss": 2.8624,
      "step": 226397
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.124737501144409,
      "learning_rate": 4.4721036027183244e-07,
      "loss": 2.7882,
      "step": 226398
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8825387954711914,
      "learning_rate": 4.4698711759745266e-07,
      "loss": 2.9585,
      "step": 226399
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6050031185150146,
      "learning_rate": 4.467639306156234e-07,
      "loss": 3.0223,
      "step": 226400
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.416349172592163,
      "learning_rate": 4.465407993263781e-07,
      "loss": 2.8789,
      "step": 226401
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8427019119262695,
      "learning_rate": 4.4631772372978325e-07,
      "loss": 2.8011,
      "step": 226402
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0845460891723633,
      "learning_rate": 4.460947038258722e-07,
      "loss": 2.8581,
      "step": 226403
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.917606830596924,
      "learning_rate": 4.458717396146783e-07,
      "loss": 2.9936,
      "step": 226404
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9307773113250732,
      "learning_rate": 4.45648831096268e-07,
      "loss": 3.0274,
      "step": 226405
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.502004623413086,
      "learning_rate": 4.4542597827064154e-07,
      "loss": 2.6909,
      "step": 226406
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.081904888153076,
      "learning_rate": 4.4520318113789865e-07,
      "loss": 2.9397,
      "step": 226407
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.297534942626953,
      "learning_rate": 4.4498043969803944e-07,
      "loss": 2.8898,
      "step": 226408
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.804278612136841,
      "learning_rate": 4.4475775395109716e-07,
      "loss": 2.8452,
      "step": 226409
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9222559928894043,
      "learning_rate": 4.445351238971384e-07,
      "loss": 2.9999,
      "step": 226410
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.4217727184295654,
      "learning_rate": 4.4431254953619655e-07,
      "loss": 2.6952,
      "step": 226411
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7809886932373047,
      "learning_rate": 4.4409003086830486e-07,
      "loss": 2.7172,
      "step": 226412
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.841404438018799,
      "learning_rate": 4.4386756789349665e-07,
      "loss": 3.0262,
      "step": 226413
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.399606704711914,
      "learning_rate": 4.436451606118718e-07,
      "loss": 2.9364,
      "step": 226414
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.5168702602386475,
      "learning_rate": 4.434228090233971e-07,
      "loss": 2.9094,
      "step": 226415
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9619219303131104,
      "learning_rate": 4.432005131281391e-07,
      "loss": 2.9277,
      "step": 226416
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.173870801925659,
      "learning_rate": 4.429782729261644e-07,
      "loss": 2.8324,
      "step": 226417
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.813117504119873,
      "learning_rate": 4.42756088417473e-07,
      "loss": 2.9129,
      "step": 226418
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.754293441772461,
      "learning_rate": 4.425339596021316e-07,
      "loss": 2.8463,
      "step": 226419
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.81644868850708,
      "learning_rate": 4.4231188648020665e-07,
      "loss": 2.758,
      "step": 226420
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.829540252685547,
      "learning_rate": 4.42089869051665e-07,
      "loss": 2.7502,
      "step": 226421
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9502546787261963,
      "learning_rate": 4.4186790731663977e-07,
      "loss": 2.847,
      "step": 226422
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8050696849823,
      "learning_rate": 4.4164600127509776e-07,
      "loss": 2.923,
      "step": 226423
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.244901180267334,
      "learning_rate": 4.4142415092710547e-07,
      "loss": 2.893,
      "step": 226424
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7493367195129395,
      "learning_rate": 4.412023562727296e-07,
      "loss": 2.8902,
      "step": 226425
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7542731761932373,
      "learning_rate": 4.409806173119701e-07,
      "loss": 2.9134,
      "step": 226426
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7692759037017822,
      "learning_rate": 4.407589340448936e-07,
      "loss": 2.9814,
      "step": 226427
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4686546325683594,
      "learning_rate": 4.405373064715334e-07,
      "loss": 3.0799,
      "step": 226428
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8635993003845215,
      "learning_rate": 4.403157345919228e-07,
      "loss": 3.0633,
      "step": 226429
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.177459478378296,
      "learning_rate": 4.4009421840612844e-07,
      "loss": 2.799,
      "step": 226430
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.163503408432007,
      "learning_rate": 4.3987275791418363e-07,
      "loss": 2.9351,
      "step": 226431
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3006343841552734,
      "learning_rate": 4.3965135311608833e-07,
      "loss": 3.0131,
      "step": 226432
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3022334575653076,
      "learning_rate": 4.394300040119425e-07,
      "loss": 2.8398,
      "step": 226433
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.984855890274048,
      "learning_rate": 4.3920871060174613e-07,
      "loss": 2.8053,
      "step": 226434
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.671006441116333,
      "learning_rate": 4.389874728855658e-07,
      "loss": 2.8182,
      "step": 226435
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.240245819091797,
      "learning_rate": 4.3876629086343483e-07,
      "loss": 2.7903,
      "step": 226436
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.157665252685547,
      "learning_rate": 4.3854516453538655e-07,
      "loss": 2.7025,
      "step": 226437
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.226199150085449,
      "learning_rate": 4.383240939014543e-07,
      "loss": 2.7889,
      "step": 226438
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.724008083343506,
      "learning_rate": 4.381030789617046e-07,
      "loss": 2.8765,
      "step": 226439
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.929422378540039,
      "learning_rate": 4.378821197161708e-07,
      "loss": 3.1426,
      "step": 226440
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4156038761138916,
      "learning_rate": 4.3766121616488627e-07,
      "loss": 2.7947,
      "step": 226441
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.17193865776062,
      "learning_rate": 4.374403683078842e-07,
      "loss": 2.8855,
      "step": 226442
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.9010837078094482,
      "learning_rate": 4.372195761452313e-07,
      "loss": 2.9105,
      "step": 226443
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.088224411010742,
      "learning_rate": 4.3699883967696083e-07,
      "loss": 2.7945,
      "step": 226444
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7019741535186768,
      "learning_rate": 4.3677815890307277e-07,
      "loss": 2.9546,
      "step": 226445
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8546640872955322,
      "learning_rate": 4.365575338236671e-07,
      "loss": 2.9422,
      "step": 226446
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6845147609710693,
      "learning_rate": 4.363369644387771e-07,
      "loss": 2.5947,
      "step": 226447
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9365856647491455,
      "learning_rate": 4.3611645074840274e-07,
      "loss": 2.9785,
      "step": 226448
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.606213331222534,
      "learning_rate": 4.358959927526107e-07,
      "loss": 2.9986,
      "step": 226449
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7869794368743896,
      "learning_rate": 4.356755904514342e-07,
      "loss": 3.1321,
      "step": 226450
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.7666938304901123,
      "learning_rate": 4.354552438449399e-07,
      "loss": 2.8983,
      "step": 226451
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7152762413024902,
      "learning_rate": 4.3523495293312784e-07,
      "loss": 3.1036,
      "step": 226452
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2921764850616455,
      "learning_rate": 4.3501471771606454e-07,
      "loss": 2.9499,
      "step": 226453
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6843435764312744,
      "learning_rate": 4.347945381938167e-07,
      "loss": 2.7342,
      "step": 226454
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.91536545753479,
      "learning_rate": 4.34574414366351e-07,
      "loss": 2.8947,
      "step": 226455
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.969167947769165,
      "learning_rate": 4.343543462337673e-07,
      "loss": 2.8438,
      "step": 226456
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6530532836914062,
      "learning_rate": 4.34134333796099e-07,
      "loss": 2.8993,
      "step": 226457
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.133645534515381,
      "learning_rate": 4.3391437705337927e-07,
      "loss": 2.9157,
      "step": 226458
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.708301544189453,
      "learning_rate": 4.3369447600567485e-07,
      "loss": 3.1437,
      "step": 226459
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.558988332748413,
      "learning_rate": 4.334746306529524e-07,
      "loss": 2.8961,
      "step": 226460
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0774221420288086,
      "learning_rate": 4.332548409953451e-07,
      "loss": 2.7672,
      "step": 226461
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2051706314086914,
      "learning_rate": 4.330351070328197e-07,
      "loss": 2.9142,
      "step": 226462
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.351867198944092,
      "learning_rate": 4.3281542876544284e-07,
      "loss": 2.8435,
      "step": 226463
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9858899116516113,
      "learning_rate": 4.3259580619328104e-07,
      "loss": 2.6562,
      "step": 226464
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.173511028289795,
      "learning_rate": 4.323762393163677e-07,
      "loss": 2.944,
      "step": 226465
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.6838090419769287,
      "learning_rate": 4.321567281347027e-07,
      "loss": 2.659,
      "step": 226466
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.5668604373931885,
      "learning_rate": 4.319372726483528e-07,
      "loss": 2.8702,
      "step": 226467
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2833378314971924,
      "learning_rate": 4.3171787285738444e-07,
      "loss": 2.9976,
      "step": 226468
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2004406452178955,
      "learning_rate": 4.314985287617978e-07,
      "loss": 2.9544,
      "step": 226469
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.775064706802368,
      "learning_rate": 4.312792403616594e-07,
      "loss": 3.0506,
      "step": 226470
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4844112396240234,
      "learning_rate": 4.3106000765700256e-07,
      "loss": 2.9711,
      "step": 226471
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2385687828063965,
      "learning_rate": 4.3084083064786057e-07,
      "loss": 2.8508,
      "step": 226472
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0431792736053467,
      "learning_rate": 4.3062170933426676e-07,
      "loss": 3.032,
      "step": 226473
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.686387777328491,
      "learning_rate": 4.3040264371628774e-07,
      "loss": 3.0239,
      "step": 226474
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1666603088378906,
      "learning_rate": 4.301836337939568e-07,
      "loss": 2.7526,
      "step": 226475
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.232942581176758,
      "learning_rate": 4.299646795673073e-07,
      "loss": 2.7199,
      "step": 226476
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.288789987564087,
      "learning_rate": 4.297457810363725e-07,
      "loss": 3.0304,
      "step": 226477
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6503968238830566,
      "learning_rate": 4.29526938201219e-07,
      "loss": 2.8728,
      "step": 226478
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.777294635772705,
      "learning_rate": 4.293081510618468e-07,
      "loss": 3.073,
      "step": 226479
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.654365062713623,
      "learning_rate": 4.2908941961835586e-07,
      "loss": 3.1301,
      "step": 226480
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.886740207672119,
      "learning_rate": 4.2887074387071286e-07,
      "loss": 2.7904,
      "step": 226481
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8386001586914062,
      "learning_rate": 4.286521238190177e-07,
      "loss": 2.7896,
      "step": 226482
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6159701347351074,
      "learning_rate": 4.284335594633037e-07,
      "loss": 3.0281,
      "step": 226483
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1261165142059326,
      "learning_rate": 4.282150508035709e-07,
      "loss": 2.9066,
      "step": 226484
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6164698600769043,
      "learning_rate": 4.2799659783991915e-07,
      "loss": 2.9593,
      "step": 226485
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.6227784156799316,
      "learning_rate": 4.2777820057234844e-07,
      "loss": 2.8662,
      "step": 226486
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6054623126983643,
      "learning_rate": 4.2755985900089215e-07,
      "loss": 2.9037,
      "step": 226487
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0746774673461914,
      "learning_rate": 4.273415731256169e-07,
      "loss": 3.1282,
      "step": 226488
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.806060791015625,
      "learning_rate": 4.271233429465559e-07,
      "loss": 2.9862,
      "step": 226489
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8428242206573486,
      "learning_rate": 4.2690516846374254e-07,
      "loss": 2.7672,
      "step": 226490
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9025325775146484,
      "learning_rate": 4.266870496772101e-07,
      "loss": 3.1555,
      "step": 226491
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7672667503356934,
      "learning_rate": 4.2646898658702523e-07,
      "loss": 3.0813,
      "step": 226492
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.809307813644409,
      "learning_rate": 4.2625097919322113e-07,
      "loss": 2.898,
      "step": 226493
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.401015043258667,
      "learning_rate": 4.2603302749583126e-07,
      "loss": 2.9019,
      "step": 226494
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9295144081115723,
      "learning_rate": 4.258151314948888e-07,
      "loss": 2.8605,
      "step": 226495
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.91819429397583,
      "learning_rate": 4.2559729119042705e-07,
      "loss": 2.5843,
      "step": 226496
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0088417530059814,
      "learning_rate": 4.2537950658254603e-07,
      "loss": 2.7766,
      "step": 226497
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.850675106048584,
      "learning_rate": 4.251617776712124e-07,
      "loss": 3.0592,
      "step": 226498
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8725008964538574,
      "learning_rate": 4.2494410445649274e-07,
      "loss": 3.0553,
      "step": 226499
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4375617504119873,
      "learning_rate": 4.2472648693845367e-07,
      "loss": 3.0522,
      "step": 226500
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8555097579956055,
      "learning_rate": 4.245089251170619e-07,
      "loss": 2.8056,
      "step": 226501
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2443675994873047,
      "learning_rate": 4.2429141899245067e-07,
      "loss": 2.757,
      "step": 226502
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8489348888397217,
      "learning_rate": 4.2407396856461994e-07,
      "loss": 2.6969,
      "step": 226503
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9457387924194336,
      "learning_rate": 4.238565738336031e-07,
      "loss": 2.7627,
      "step": 226504
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9793004989624023,
      "learning_rate": 4.2363923479943327e-07,
      "loss": 2.826,
      "step": 226505
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3196396827697754,
      "learning_rate": 4.234219514621773e-07,
      "loss": 2.7691,
      "step": 226506
    },
    {
      "epoch": 2.95,
      "grad_norm": 5.51895809173584,
      "learning_rate": 4.232047238218683e-07,
      "loss": 2.9035,
      "step": 226507
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.6126832962036133,
      "learning_rate": 4.2298755187853973e-07,
      "loss": 3.0495,
      "step": 226508
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1137592792510986,
      "learning_rate": 4.2277043563222477e-07,
      "loss": 2.6719,
      "step": 226509
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7709970474243164,
      "learning_rate": 4.225533750829568e-07,
      "loss": 2.6762,
      "step": 226510
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9737260341644287,
      "learning_rate": 4.2233637023080247e-07,
      "loss": 3.0561,
      "step": 226511
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0636589527130127,
      "learning_rate": 4.22119421075795e-07,
      "loss": 2.8983,
      "step": 226512
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.780331611633301,
      "learning_rate": 4.219025276179677e-07,
      "loss": 3.056,
      "step": 226513
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.5911569595336914,
      "learning_rate": 4.21685689857354e-07,
      "loss": 2.8615,
      "step": 226514
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7613799571990967,
      "learning_rate": 4.214689077940203e-07,
      "loss": 3.029,
      "step": 226515
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8401002883911133,
      "learning_rate": 4.212521814280001e-07,
      "loss": 3.0182,
      "step": 226516
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.065441370010376,
      "learning_rate": 4.2103551075929334e-07,
      "loss": 2.6486,
      "step": 226517
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7050862312316895,
      "learning_rate": 4.208188957879999e-07,
      "loss": 2.9024,
      "step": 226518
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1623826026916504,
      "learning_rate": 4.206023365141198e-07,
      "loss": 2.9029,
      "step": 226519
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2990128993988037,
      "learning_rate": 4.2038583293768637e-07,
      "loss": 3.1788,
      "step": 226520
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6499545574188232,
      "learning_rate": 4.2016938505879947e-07,
      "loss": 2.966,
      "step": 226521
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9442920684814453,
      "learning_rate": 4.199529928774259e-07,
      "loss": 3.0195,
      "step": 226522
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.514190196990967,
      "learning_rate": 4.1973665639363216e-07,
      "loss": 2.8492,
      "step": 226523
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.6722238063812256,
      "learning_rate": 4.1952037560748496e-07,
      "loss": 3.0192,
      "step": 226524
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0535671710968018,
      "learning_rate": 4.1930415051898426e-07,
      "loss": 2.8232,
      "step": 226525
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4903266429901123,
      "learning_rate": 4.1908798112822995e-07,
      "loss": 3.1581,
      "step": 226526
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.875558853149414,
      "learning_rate": 4.1887186743518873e-07,
      "loss": 2.7488,
      "step": 226527
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.899437189102173,
      "learning_rate": 4.1865580943992727e-07,
      "loss": 2.8736,
      "step": 226528
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.024892568588257,
      "learning_rate": 4.1843980714251215e-07,
      "loss": 2.7099,
      "step": 226529
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.562687635421753,
      "learning_rate": 4.182238605429433e-07,
      "loss": 2.9838,
      "step": 226530
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.327545166015625,
      "learning_rate": 4.180079696413208e-07,
      "loss": 3.0616,
      "step": 226531
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6617026329040527,
      "learning_rate": 4.1779213443761115e-07,
      "loss": 3.0571,
      "step": 226532
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.022406816482544,
      "learning_rate": 4.1757635493191446e-07,
      "loss": 2.8854,
      "step": 226533
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9538233280181885,
      "learning_rate": 4.1736063112419725e-07,
      "loss": 2.9554,
      "step": 226534
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.729124069213867,
      "learning_rate": 4.171449630145929e-07,
      "loss": 2.7539,
      "step": 226535
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.581390380859375,
      "learning_rate": 4.1692935060306796e-07,
      "loss": 2.6758,
      "step": 226536
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1244919300079346,
      "learning_rate": 4.167137938897225e-07,
      "loss": 2.9183,
      "step": 226537
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2090485095977783,
      "learning_rate": 4.164982928745564e-07,
      "loss": 2.8908,
      "step": 226538
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.387268543243408,
      "learning_rate": 4.16282847557603e-07,
      "loss": 2.8741,
      "step": 226539
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.924753427505493,
      "learning_rate": 4.160674579389289e-07,
      "loss": 2.5424,
      "step": 226540
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.907153367996216,
      "learning_rate": 4.158521240185675e-07,
      "loss": 2.8579,
      "step": 226541
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.8366308212280273,
      "learning_rate": 4.156368457965187e-07,
      "loss": 2.853,
      "step": 226542
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.586909055709839,
      "learning_rate": 4.154216232729157e-07,
      "loss": 2.9264,
      "step": 226543
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.185333728790283,
      "learning_rate": 4.15206456447692e-07,
      "loss": 3.0555,
      "step": 226544
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3409383296966553,
      "learning_rate": 4.1499134532098075e-07,
      "loss": 3.016,
      "step": 226545
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.4139766693115234,
      "learning_rate": 4.147762898927487e-07,
      "loss": 2.794,
      "step": 226546
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0516397953033447,
      "learning_rate": 4.1456129016306236e-07,
      "loss": 2.6335,
      "step": 226547
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7393805980682373,
      "learning_rate": 4.1434634613195517e-07,
      "loss": 2.9879,
      "step": 226548
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.622055768966675,
      "learning_rate": 4.1413145779949365e-07,
      "loss": 2.7986,
      "step": 226549
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.951151132583618,
      "learning_rate": 4.139166251657111e-07,
      "loss": 2.7529,
      "step": 226550
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.938668966293335,
      "learning_rate": 4.137018482306076e-07,
      "loss": 2.9887,
      "step": 226551
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.847188711166382,
      "learning_rate": 4.13487126994283e-07,
      "loss": 2.8882,
      "step": 226552
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0858829021453857,
      "learning_rate": 4.132724614567373e-07,
      "loss": 2.9743,
      "step": 226553
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.898085355758667,
      "learning_rate": 4.130578516180039e-07,
      "loss": 2.8641,
      "step": 226554
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.908419132232666,
      "learning_rate": 4.128432974781493e-07,
      "loss": 3.0287,
      "step": 226555
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.352764844894409,
      "learning_rate": 4.126287990372068e-07,
      "loss": 2.8922,
      "step": 226556
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.58341908454895,
      "learning_rate": 4.124143562952098e-07,
      "loss": 3.1595,
      "step": 226557
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.677445411682129,
      "learning_rate": 4.1219996925219156e-07,
      "loss": 2.8694,
      "step": 226558
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.9933860301971436,
      "learning_rate": 4.119856379081854e-07,
      "loss": 2.8057,
      "step": 226559
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.210186243057251,
      "learning_rate": 4.117713622632912e-07,
      "loss": 2.9436,
      "step": 226560
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.185807704925537,
      "learning_rate": 4.115571423174757e-07,
      "loss": 2.7887,
      "step": 226561
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3714382648468018,
      "learning_rate": 4.1134297807080553e-07,
      "loss": 2.9077,
      "step": 226562
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8732571601867676,
      "learning_rate": 4.111288695233139e-07,
      "loss": 2.8044,
      "step": 226563
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.367579221725464,
      "learning_rate": 4.1091481667506754e-07,
      "loss": 2.9987,
      "step": 226564
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.117509126663208,
      "learning_rate": 4.107008195260997e-07,
      "loss": 2.9384,
      "step": 226565
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.906221628189087,
      "learning_rate": 4.1048687807641034e-07,
      "loss": 2.7517,
      "step": 226566
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.112581253051758,
      "learning_rate": 4.1027299232606616e-07,
      "loss": 2.7124,
      "step": 226567
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.300633192062378,
      "learning_rate": 4.100591622751004e-07,
      "loss": 2.878,
      "step": 226568
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6790714263916016,
      "learning_rate": 4.09845387923613e-07,
      "loss": 2.9031,
      "step": 226569
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2773540019989014,
      "learning_rate": 4.0963166927153734e-07,
      "loss": 2.7577,
      "step": 226570
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2527947425842285,
      "learning_rate": 4.094180063189734e-07,
      "loss": 2.8827,
      "step": 226571
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2755868434906006,
      "learning_rate": 4.092043990659544e-07,
      "loss": 2.9151,
      "step": 226572
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8002617359161377,
      "learning_rate": 4.0899084751254696e-07,
      "loss": 3.0475,
      "step": 226573
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.882160186767578,
      "learning_rate": 4.0877735165871784e-07,
      "loss": 2.8075,
      "step": 226574
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.551543951034546,
      "learning_rate": 4.0856391150460024e-07,
      "loss": 2.771,
      "step": 226575
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.371278285980225,
      "learning_rate": 4.083505270501608e-07,
      "loss": 2.928,
      "step": 226576
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1078131198883057,
      "learning_rate": 4.081371982954662e-07,
      "loss": 2.942,
      "step": 226577
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7411293983459473,
      "learning_rate": 4.0792392524054975e-07,
      "loss": 2.8718,
      "step": 226578
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.926656723022461,
      "learning_rate": 4.07710707885478e-07,
      "loss": 2.8903,
      "step": 226579
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6735570430755615,
      "learning_rate": 4.07497546230251e-07,
      "loss": 3.2208,
      "step": 226580
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.983104944229126,
      "learning_rate": 4.0728444027490205e-07,
      "loss": 2.7558,
      "step": 226581
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9794352054595947,
      "learning_rate": 4.0707139001953106e-07,
      "loss": 2.8203,
      "step": 226582
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6464297771453857,
      "learning_rate": 4.0685839546413804e-07,
      "loss": 2.9077,
      "step": 226583
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.755469799041748,
      "learning_rate": 4.0664545660875627e-07,
      "loss": 3.0438,
      "step": 226584
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.089858293533325,
      "learning_rate": 4.0643257345341905e-07,
      "loss": 2.7479,
      "step": 226585
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.016761064529419,
      "learning_rate": 4.062197459981931e-07,
      "loss": 2.8233,
      "step": 226586
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.861321449279785,
      "learning_rate": 4.060069742431116e-07,
      "loss": 3.0515,
      "step": 226587
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8827717304229736,
      "learning_rate": 4.057942581882079e-07,
      "loss": 2.9046,
      "step": 226588
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2076220512390137,
      "learning_rate": 4.055815978335153e-07,
      "loss": 2.8489,
      "step": 226589
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1357388496398926,
      "learning_rate": 4.0536899317910044e-07,
      "loss": 2.8986,
      "step": 226590
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0328688621520996,
      "learning_rate": 4.051564442249633e-07,
      "loss": 3.1481,
      "step": 226591
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0058414936065674,
      "learning_rate": 4.049439509711705e-07,
      "loss": 2.6427,
      "step": 226592
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4053914546966553,
      "learning_rate": 4.0473151341775536e-07,
      "loss": 2.7013,
      "step": 226593
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.742267608642578,
      "learning_rate": 4.045191315647511e-07,
      "loss": 2.9785,
      "step": 226594
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.581929922103882,
      "learning_rate": 4.0430680541219117e-07,
      "loss": 3.0294,
      "step": 226595
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4442687034606934,
      "learning_rate": 4.040945349601421e-07,
      "loss": 2.9334,
      "step": 226596
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6394712924957275,
      "learning_rate": 4.0388232020860387e-07,
      "loss": 3.0432,
      "step": 226597
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.9877326488494873,
      "learning_rate": 4.0367016115767647e-07,
      "loss": 2.8391,
      "step": 226598
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.81261944770813,
      "learning_rate": 4.0345805780732656e-07,
      "loss": 3.0415,
      "step": 226599
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.984736442565918,
      "learning_rate": 4.03246010157654e-07,
      "loss": 2.833,
      "step": 226600
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7949678897857666,
      "learning_rate": 4.030340182086589e-07,
      "loss": 3.0738,
      "step": 226601
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9534988403320312,
      "learning_rate": 4.0282208196040776e-07,
      "loss": 2.809,
      "step": 226602
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.981576919555664,
      "learning_rate": 4.026102014129007e-07,
      "loss": 3.0949,
      "step": 226603
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2160964012145996,
      "learning_rate": 4.0239837656623754e-07,
      "loss": 2.9646,
      "step": 226604
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0521628856658936,
      "learning_rate": 4.021866074204183e-07,
      "loss": 3.0169,
      "step": 226605
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.758897066116333,
      "learning_rate": 4.0197489397547634e-07,
      "loss": 2.8296,
      "step": 226606
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.6189234256744385,
      "learning_rate": 4.0176323623144493e-07,
      "loss": 2.8373,
      "step": 226607
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.808506727218628,
      "learning_rate": 4.0155163418842395e-07,
      "loss": 2.7176,
      "step": 226608
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6078364849090576,
      "learning_rate": 4.0134008784638017e-07,
      "loss": 2.9125,
      "step": 226609
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.121627330780029,
      "learning_rate": 4.0112859720538017e-07,
      "loss": 2.8101,
      "step": 226610
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6271650791168213,
      "learning_rate": 4.009171622654905e-07,
      "loss": 2.9559,
      "step": 226611
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.468109130859375,
      "learning_rate": 4.007057830267113e-07,
      "loss": 3.0859,
      "step": 226612
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.751035451889038,
      "learning_rate": 4.0049445948907575e-07,
      "loss": 3.0283,
      "step": 226613
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6128132343292236,
      "learning_rate": 4.0028319165268385e-07,
      "loss": 2.9284,
      "step": 226614
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9459917545318604,
      "learning_rate": 4.0007197951750223e-07,
      "loss": 3.0202,
      "step": 226615
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9649691581726074,
      "learning_rate": 3.998608230836309e-07,
      "loss": 2.8662,
      "step": 226616
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.7350258827209473,
      "learning_rate": 3.9964972235103645e-07,
      "loss": 2.8428,
      "step": 226617
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.338547468185425,
      "learning_rate": 3.9943867731985214e-07,
      "loss": 3.1847,
      "step": 226618
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4948625564575195,
      "learning_rate": 3.9922768799004465e-07,
      "loss": 3.1518,
      "step": 226619
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.294527769088745,
      "learning_rate": 3.990167543616807e-07,
      "loss": 3.0434,
      "step": 226620
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.5736939907073975,
      "learning_rate": 3.988058764347934e-07,
      "loss": 3.1171,
      "step": 226621
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7945923805236816,
      "learning_rate": 3.985950542094163e-07,
      "loss": 2.6758,
      "step": 226622
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.39284348487854,
      "learning_rate": 3.983842876855825e-07,
      "loss": 2.8796,
      "step": 226623
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.571486234664917,
      "learning_rate": 3.9817357686339204e-07,
      "loss": 2.8781,
      "step": 226624
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.4626011848449707,
      "learning_rate": 3.9796292174277823e-07,
      "loss": 2.9019,
      "step": 226625
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.779010057449341,
      "learning_rate": 3.9775232232387435e-07,
      "loss": 2.7324,
      "step": 226626
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.697164297103882,
      "learning_rate": 3.9754177860668035e-07,
      "loss": 2.9334,
      "step": 226627
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9611165523529053,
      "learning_rate": 3.9733129059122957e-07,
      "loss": 2.8769,
      "step": 226628
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4297757148742676,
      "learning_rate": 3.9712085827758867e-07,
      "loss": 2.8677,
      "step": 226629
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9682962894439697,
      "learning_rate": 3.9691048166575754e-07,
      "loss": 3.0368,
      "step": 226630
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.736690044403076,
      "learning_rate": 3.967001607558029e-07,
      "loss": 3.0852,
      "step": 226631
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.181170701980591,
      "learning_rate": 3.96489895547758e-07,
      "loss": 2.9056,
      "step": 226632
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.266977310180664,
      "learning_rate": 3.9627968604168945e-07,
      "loss": 2.7277,
      "step": 226633
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1084344387054443,
      "learning_rate": 3.96069532237564e-07,
      "loss": 2.9111,
      "step": 226634
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.764514923095703,
      "learning_rate": 3.958594341354815e-07,
      "loss": 2.739,
      "step": 226635
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.5836400985717773,
      "learning_rate": 3.9564939173547525e-07,
      "loss": 2.8379,
      "step": 226636
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.573070526123047,
      "learning_rate": 3.9543940503754533e-07,
      "loss": 2.9029,
      "step": 226637
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.4697015285491943,
      "learning_rate": 3.952294740417916e-07,
      "loss": 2.8133,
      "step": 226638
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6735827922821045,
      "learning_rate": 3.950195987482141e-07,
      "loss": 3.0805,
      "step": 226639
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.368180990219116,
      "learning_rate": 3.9480977915684606e-07,
      "loss": 2.978,
      "step": 226640
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.028015375137329,
      "learning_rate": 3.946000152677209e-07,
      "loss": 2.9873,
      "step": 226641
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.726947069168091,
      "learning_rate": 3.943903070809384e-07,
      "loss": 2.984,
      "step": 226642
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.5884034633636475,
      "learning_rate": 3.941806545964654e-07,
      "loss": 2.8459,
      "step": 226643
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.765641689300537,
      "learning_rate": 3.9397105781436844e-07,
      "loss": 3.1111,
      "step": 226644
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6594736576080322,
      "learning_rate": 3.937615167346808e-07,
      "loss": 2.9589,
      "step": 226645
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7991251945495605,
      "learning_rate": 3.9355203135746917e-07,
      "loss": 3.041,
      "step": 226646
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6484122276306152,
      "learning_rate": 3.9334260168273345e-07,
      "loss": 2.6014,
      "step": 226647
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1564102172851562,
      "learning_rate": 3.9313322771057364e-07,
      "loss": 2.9009,
      "step": 226648
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8827807903289795,
      "learning_rate": 3.929239094409231e-07,
      "loss": 2.786,
      "step": 226649
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.256561517715454,
      "learning_rate": 3.9271464687391507e-07,
      "loss": 3.0079,
      "step": 226650
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0678672790527344,
      "learning_rate": 3.9250544000958284e-07,
      "loss": 2.9838,
      "step": 226651
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.054356336593628,
      "learning_rate": 3.9229628884789313e-07,
      "loss": 2.9208,
      "step": 226652
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6810686588287354,
      "learning_rate": 3.9208719338894577e-07,
      "loss": 2.9517,
      "step": 226653
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.7295968532562256,
      "learning_rate": 3.9187815363277417e-07,
      "loss": 2.9869,
      "step": 226654
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9099740982055664,
      "learning_rate": 3.9166916957941163e-07,
      "loss": 2.9124,
      "step": 226655
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.729283094406128,
      "learning_rate": 3.914602412288914e-07,
      "loss": 2.8393,
      "step": 226656
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.874114513397217,
      "learning_rate": 3.912513685812468e-07,
      "loss": 2.9395,
      "step": 226657
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.49164080619812,
      "learning_rate": 3.910425516365112e-07,
      "loss": 3.0579,
      "step": 226658
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.918647527694702,
      "learning_rate": 3.908337903947512e-07,
      "loss": 3.1132,
      "step": 226659
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.5988032817840576,
      "learning_rate": 3.9062508485599996e-07,
      "loss": 2.7479,
      "step": 226660
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4621024131774902,
      "learning_rate": 3.9041643502029095e-07,
      "loss": 2.864,
      "step": 226661
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9528274536132812,
      "learning_rate": 3.9020784088762414e-07,
      "loss": 2.6367,
      "step": 226662
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.242208957672119,
      "learning_rate": 3.899993024580994e-07,
      "loss": 2.8878,
      "step": 226663
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9816839694976807,
      "learning_rate": 3.897908197317168e-07,
      "loss": 2.9632,
      "step": 226664
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.7956666946411133,
      "learning_rate": 3.8958239270854287e-07,
      "loss": 2.8292,
      "step": 226665
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1090941429138184,
      "learning_rate": 3.8937402138861096e-07,
      "loss": 2.8077,
      "step": 226666
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1534197330474854,
      "learning_rate": 3.8916570577192105e-07,
      "loss": 2.6182,
      "step": 226667
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1941752433776855,
      "learning_rate": 3.8895744585853984e-07,
      "loss": 2.8804,
      "step": 226668
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7770333290100098,
      "learning_rate": 3.8874924164853383e-07,
      "loss": 2.7915,
      "step": 226669
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.071133852005005,
      "learning_rate": 3.8854109314190306e-07,
      "loss": 3.0658,
      "step": 226670
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.999079704284668,
      "learning_rate": 3.8833300033868084e-07,
      "loss": 3.0435,
      "step": 226671
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7780256271362305,
      "learning_rate": 3.881249632389338e-07,
      "loss": 2.9721,
      "step": 226672
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.80295467376709,
      "learning_rate": 3.8791698184269524e-07,
      "loss": 3.08,
      "step": 226673
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.891698122024536,
      "learning_rate": 3.877090561500318e-07,
      "loss": 2.6967,
      "step": 226674
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9148669242858887,
      "learning_rate": 3.8750118616091006e-07,
      "loss": 2.8511,
      "step": 226675
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8222174644470215,
      "learning_rate": 3.8729337187539677e-07,
      "loss": 2.9734,
      "step": 226676
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.35674786567688,
      "learning_rate": 3.8708561329355846e-07,
      "loss": 2.8414,
      "step": 226677
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.6191558837890625,
      "learning_rate": 3.868779104154285e-07,
      "loss": 3.0467,
      "step": 226678
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7430930137634277,
      "learning_rate": 3.866702632410068e-07,
      "loss": 2.9602,
      "step": 226679
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.78739857673645,
      "learning_rate": 3.8646267177036004e-07,
      "loss": 2.7549,
      "step": 226680
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3889553546905518,
      "learning_rate": 3.8625513600355484e-07,
      "loss": 2.7483,
      "step": 226681
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.858574390411377,
      "learning_rate": 3.860476559405912e-07,
      "loss": 3.1726,
      "step": 226682
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.804518222808838,
      "learning_rate": 3.8584023158150237e-07,
      "loss": 2.7513,
      "step": 226683
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2238874435424805,
      "learning_rate": 3.85632862926355e-07,
      "loss": 2.8231,
      "step": 226684
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.122330904006958,
      "learning_rate": 3.854255499751491e-07,
      "loss": 3.0125,
      "step": 226685
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.948838233947754,
      "learning_rate": 3.8521829272795123e-07,
      "loss": 2.8435,
      "step": 226686
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.067000150680542,
      "learning_rate": 3.850110911848281e-07,
      "loss": 2.9845,
      "step": 226687
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.791731595993042,
      "learning_rate": 3.848039453457796e-07,
      "loss": 2.7696,
      "step": 226688
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.220993757247925,
      "learning_rate": 3.8459685521083917e-07,
      "loss": 2.8671,
      "step": 226689
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.304243564605713,
      "learning_rate": 3.843898207800733e-07,
      "loss": 2.6735,
      "step": 226690
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.824583053588867,
      "learning_rate": 3.8418284205348206e-07,
      "loss": 2.6153,
      "step": 226691
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.043067693710327,
      "learning_rate": 3.83975919031132e-07,
      "loss": 2.7792,
      "step": 226692
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.294113874435425,
      "learning_rate": 3.837690517130565e-07,
      "loss": 2.7324,
      "step": 226693
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9703729152679443,
      "learning_rate": 3.8356224009928883e-07,
      "loss": 2.9316,
      "step": 226694
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.397554874420166,
      "learning_rate": 3.8335548418989556e-07,
      "loss": 2.8292,
      "step": 226695
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.445302724838257,
      "learning_rate": 3.8314878398487683e-07,
      "loss": 2.8167,
      "step": 226696
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7879977226257324,
      "learning_rate": 3.8294213948429907e-07,
      "loss": 2.9847,
      "step": 226697
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0983667373657227,
      "learning_rate": 3.8273555068819574e-07,
      "loss": 2.8979,
      "step": 226698
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.747514009475708,
      "learning_rate": 3.8252901759656674e-07,
      "loss": 2.8398,
      "step": 226699
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.808167934417725,
      "learning_rate": 3.823225402094787e-07,
      "loss": 2.8674,
      "step": 226700
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8324990272521973,
      "learning_rate": 3.821161185269983e-07,
      "loss": 2.8819,
      "step": 226701
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7411789894104004,
      "learning_rate": 3.819097525491255e-07,
      "loss": 3.0108,
      "step": 226702
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6974127292633057,
      "learning_rate": 3.817034422759269e-07,
      "loss": 2.8937,
      "step": 226703
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8504157066345215,
      "learning_rate": 3.8149718770740246e-07,
      "loss": 2.6704,
      "step": 226704
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4921371936798096,
      "learning_rate": 3.8129098884365215e-07,
      "loss": 2.969,
      "step": 226705
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9961211681365967,
      "learning_rate": 3.810848456846427e-07,
      "loss": 2.6925,
      "step": 226706
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.389376640319824,
      "learning_rate": 3.8087875823044066e-07,
      "loss": 3.0968,
      "step": 226707
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1801962852478027,
      "learning_rate": 3.8067272648111267e-07,
      "loss": 2.9051,
      "step": 226708
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2008137702941895,
      "learning_rate": 3.8046675043665875e-07,
      "loss": 2.7048,
      "step": 226709
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9510929584503174,
      "learning_rate": 3.8026083009711215e-07,
      "loss": 2.6487,
      "step": 226710
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.127992868423462,
      "learning_rate": 3.800549654625729e-07,
      "loss": 2.9185,
      "step": 226711
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7991156578063965,
      "learning_rate": 3.798491565330075e-07,
      "loss": 2.9006,
      "step": 226712
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.206367015838623,
      "learning_rate": 3.7964340330848277e-07,
      "loss": 3.0303,
      "step": 226713
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9003241062164307,
      "learning_rate": 3.794377057890652e-07,
      "loss": 3.0633,
      "step": 226714
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.974133253097534,
      "learning_rate": 3.7923206397475483e-07,
      "loss": 2.8488,
      "step": 226715
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.07877254486084,
      "learning_rate": 3.7902647786558493e-07,
      "loss": 2.9056,
      "step": 226716
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9065065383911133,
      "learning_rate": 3.7882094746162215e-07,
      "loss": 2.8781,
      "step": 226717
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9452474117279053,
      "learning_rate": 3.7861547276286654e-07,
      "loss": 3.0594,
      "step": 226718
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.5973267555236816,
      "learning_rate": 3.7841005376941793e-07,
      "loss": 2.9547,
      "step": 226719
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.05975604057312,
      "learning_rate": 3.7820469048127634e-07,
      "loss": 3.1015,
      "step": 226720
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.162271738052368,
      "learning_rate": 3.779993828984751e-07,
      "loss": 3.0117,
      "step": 226721
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.784273147583008,
      "learning_rate": 3.777941310210475e-07,
      "loss": 2.8942,
      "step": 226722
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.89984393119812,
      "learning_rate": 3.7758893484906014e-07,
      "loss": 3.189,
      "step": 226723
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4597089290618896,
      "learning_rate": 3.773837943825131e-07,
      "loss": 2.688,
      "step": 226724
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6205830574035645,
      "learning_rate": 3.771787096214729e-07,
      "loss": 2.9138,
      "step": 226725
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.5169358253479004,
      "learning_rate": 3.769736805659729e-07,
      "loss": 2.7761,
      "step": 226726
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.874138355255127,
      "learning_rate": 3.7676870721607967e-07,
      "loss": 2.832,
      "step": 226727
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8439321517944336,
      "learning_rate": 3.765637895717599e-07,
      "loss": 2.8905,
      "step": 226728
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7968811988830566,
      "learning_rate": 3.763589276331136e-07,
      "loss": 2.9623,
      "step": 226729
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.343110084533691,
      "learning_rate": 3.76154121400174e-07,
      "loss": 2.8587,
      "step": 226730
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0649876594543457,
      "learning_rate": 3.7594937087294106e-07,
      "loss": 2.9396,
      "step": 226731
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.5615200996398926,
      "learning_rate": 3.757446760514482e-07,
      "loss": 2.9702,
      "step": 226732
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.058521270751953,
      "learning_rate": 3.755400369357953e-07,
      "loss": 3.1478,
      "step": 226733
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2574119567871094,
      "learning_rate": 3.7533545352598226e-07,
      "loss": 2.9383,
      "step": 226734
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.393031597137451,
      "learning_rate": 3.7513092582204253e-07,
      "loss": 2.6005,
      "step": 226735
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.201873302459717,
      "learning_rate": 3.7492645382404265e-07,
      "loss": 2.8154,
      "step": 226736
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.7806191444396973,
      "learning_rate": 3.747220375319826e-07,
      "loss": 2.7284,
      "step": 226737
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.725935935974121,
      "learning_rate": 3.7451767694589576e-07,
      "loss": 2.9165,
      "step": 226738
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.86415433883667,
      "learning_rate": 3.74313372065882e-07,
      "loss": 2.9757,
      "step": 226739
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6457910537719727,
      "learning_rate": 3.7410912289190796e-07,
      "loss": 3.051,
      "step": 226740
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9778122901916504,
      "learning_rate": 3.739049294240737e-07,
      "loss": 3.1341,
      "step": 226741
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.55542254447937,
      "learning_rate": 3.737007916623791e-07,
      "loss": 2.7928,
      "step": 226742
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.047128200531006,
      "learning_rate": 3.734967096068575e-07,
      "loss": 3.2884,
      "step": 226743
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.348066568374634,
      "learning_rate": 3.7329268325754224e-07,
      "loss": 2.8702,
      "step": 226744
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.967543601989746,
      "learning_rate": 3.730887126144999e-07,
      "loss": 3.0529,
      "step": 226745
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0644214153289795,
      "learning_rate": 3.728847976777638e-07,
      "loss": 2.9059,
      "step": 226746
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.260904312133789,
      "learning_rate": 3.726809384473672e-07,
      "loss": 3.1273,
      "step": 226747
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.142000675201416,
      "learning_rate": 3.724771349233435e-07,
      "loss": 2.8967,
      "step": 226748
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.154494047164917,
      "learning_rate": 3.7227338710572595e-07,
      "loss": 2.9445,
      "step": 226749
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.924569845199585,
      "learning_rate": 3.720696949945812e-07,
      "loss": 3.0057,
      "step": 226750
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.818821668624878,
      "learning_rate": 3.7186605858990916e-07,
      "loss": 3.0115,
      "step": 226751
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8236243724823,
      "learning_rate": 3.716624778917432e-07,
      "loss": 2.7117,
      "step": 226752
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.228969097137451,
      "learning_rate": 3.7145895290018323e-07,
      "loss": 3.2274,
      "step": 226753
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6177635192871094,
      "learning_rate": 3.71255483615196e-07,
      "loss": 2.8658,
      "step": 226754
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8828182220458984,
      "learning_rate": 3.71052070036848e-07,
      "loss": 3.0524,
      "step": 226755
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1648168563842773,
      "learning_rate": 3.7084871216517264e-07,
      "loss": 3.0151,
      "step": 226756
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.882751941680908,
      "learning_rate": 3.706454100002365e-07,
      "loss": 3.1544,
      "step": 226757
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.396611213684082,
      "learning_rate": 3.704421635420396e-07,
      "loss": 2.8946,
      "step": 226758
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2898147106170654,
      "learning_rate": 3.702389727906152e-07,
      "loss": 2.6722,
      "step": 226759
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.471996307373047,
      "learning_rate": 3.7003583774602997e-07,
      "loss": 3.1791,
      "step": 226760
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.6157126426696777,
      "learning_rate": 3.698327584083172e-07,
      "loss": 2.8829,
      "step": 226761
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.051126718521118,
      "learning_rate": 3.696297347775101e-07,
      "loss": 2.9104,
      "step": 226762
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.673633337020874,
      "learning_rate": 3.6942676685364215e-07,
      "loss": 3.0116,
      "step": 226763
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.795644998550415,
      "learning_rate": 3.692238546367465e-07,
      "loss": 3.0161,
      "step": 226764
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.4069113731384277,
      "learning_rate": 3.690209981268899e-07,
      "loss": 3.0752,
      "step": 226765
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.881840467453003,
      "learning_rate": 3.688181973240722e-07,
      "loss": 2.985,
      "step": 226766
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.8703274726867676,
      "learning_rate": 3.686154522283602e-07,
      "loss": 2.8905,
      "step": 226767
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.211557626724243,
      "learning_rate": 3.684127628397537e-07,
      "loss": 3.0794,
      "step": 226768
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6676645278930664,
      "learning_rate": 3.682101291583195e-07,
      "loss": 3.0817,
      "step": 226769
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.93816614151001,
      "learning_rate": 3.680075511841241e-07,
      "loss": 2.9324,
      "step": 226770
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.23421573638916,
      "learning_rate": 3.6780502891713416e-07,
      "loss": 2.7074,
      "step": 226771
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6138741970062256,
      "learning_rate": 3.6760256235744966e-07,
      "loss": 2.9349,
      "step": 226772
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3711154460906982,
      "learning_rate": 3.6740015150507064e-07,
      "loss": 2.8953,
      "step": 226773
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8556976318359375,
      "learning_rate": 3.6719779636006365e-07,
      "loss": 2.99,
      "step": 226774
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8109190464019775,
      "learning_rate": 3.669954969224287e-07,
      "loss": 2.76,
      "step": 226775
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9919350147247314,
      "learning_rate": 3.667932531922324e-07,
      "loss": 3.0741,
      "step": 226776
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8356640338897705,
      "learning_rate": 3.665910651695081e-07,
      "loss": 3.0385,
      "step": 226777
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1274423599243164,
      "learning_rate": 3.6638893285428903e-07,
      "loss": 2.82,
      "step": 226778
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.608391523361206,
      "learning_rate": 3.661868562466086e-07,
      "loss": 2.9847,
      "step": 226779
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.117926836013794,
      "learning_rate": 3.659848353465333e-07,
      "loss": 2.8628,
      "step": 226780
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2390193939208984,
      "learning_rate": 3.6578287015406324e-07,
      "loss": 2.8114,
      "step": 226781
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.7948293685913086,
      "learning_rate": 3.655809606692317e-07,
      "loss": 3.0041,
      "step": 226782
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.812068223953247,
      "learning_rate": 3.6537910689210526e-07,
      "loss": 3.01,
      "step": 226783
    },
    {
      "epoch": 2.95,
      "grad_norm": 5.2854743003845215,
      "learning_rate": 3.6517730882271725e-07,
      "loss": 2.9354,
      "step": 226784
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8523988723754883,
      "learning_rate": 3.6497556646110095e-07,
      "loss": 2.8872,
      "step": 226785
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9668455123901367,
      "learning_rate": 3.647738798072897e-07,
      "loss": 2.9497,
      "step": 226786
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8989651203155518,
      "learning_rate": 3.6457224886131677e-07,
      "loss": 2.862,
      "step": 226787
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6632487773895264,
      "learning_rate": 3.643706736232488e-07,
      "loss": 3.0009,
      "step": 226788
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.4587440490722656,
      "learning_rate": 3.6416915409308576e-07,
      "loss": 2.9411,
      "step": 226789
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.06449031829834,
      "learning_rate": 3.6396769027086105e-07,
      "loss": 3.177,
      "step": 226790
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.945795774459839,
      "learning_rate": 3.6376628215664116e-07,
      "loss": 3.0175,
      "step": 226791
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.392317771911621,
      "learning_rate": 3.635649297504595e-07,
      "loss": 2.9867,
      "step": 226792
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.092393398284912,
      "learning_rate": 3.6336363305234927e-07,
      "loss": 2.9796,
      "step": 226793
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7069194316864014,
      "learning_rate": 3.6316239206237717e-07,
      "loss": 3.0293,
      "step": 226794
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2332570552825928,
      "learning_rate": 3.629612067805099e-07,
      "loss": 3.0734,
      "step": 226795
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8927793502807617,
      "learning_rate": 3.62760077206814e-07,
      "loss": 2.5671,
      "step": 226796
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9282398223876953,
      "learning_rate": 3.625590033413561e-07,
      "loss": 2.7615,
      "step": 226797
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2613892555236816,
      "learning_rate": 3.623579851841363e-07,
      "loss": 2.8417,
      "step": 226798
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.812119245529175,
      "learning_rate": 3.6215702273522106e-07,
      "loss": 3.1433,
      "step": 226799
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0960536003112793,
      "learning_rate": 3.619561159946438e-07,
      "loss": 3.0037,
      "step": 226800
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.8194420337677,
      "learning_rate": 3.617552649624378e-07,
      "loss": 2.9707,
      "step": 226801
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9128172397613525,
      "learning_rate": 3.6155446963863634e-07,
      "loss": 2.7554,
      "step": 226802
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.483919858932495,
      "learning_rate": 3.6135373002327273e-07,
      "loss": 3.1246,
      "step": 226803
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.216533660888672,
      "learning_rate": 3.611530461163803e-07,
      "loss": 3.1148,
      "step": 226804
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.507714033126831,
      "learning_rate": 3.6095241791802563e-07,
      "loss": 2.643,
      "step": 226805
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2215867042541504,
      "learning_rate": 3.607518454282088e-07,
      "loss": 3.1002,
      "step": 226806
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.626850128173828,
      "learning_rate": 3.605513286469963e-07,
      "loss": 2.9976,
      "step": 226807
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.703216314315796,
      "learning_rate": 3.603508675744216e-07,
      "loss": 3.2184,
      "step": 226808
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.727776050567627,
      "learning_rate": 3.601504622104845e-07,
      "loss": 2.9065,
      "step": 226809
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.906852960586548,
      "learning_rate": 3.599501125552851e-07,
      "loss": 2.8797,
      "step": 226810
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.37212872505188,
      "learning_rate": 3.5974981860878995e-07,
      "loss": 2.8501,
      "step": 226811
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.208690643310547,
      "learning_rate": 3.5954958037109903e-07,
      "loss": 2.9705,
      "step": 226812
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0167882442474365,
      "learning_rate": 3.593493978422457e-07,
      "loss": 2.6267,
      "step": 226813
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.050981044769287,
      "learning_rate": 3.591492710221966e-07,
      "loss": 2.9481,
      "step": 226814
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.692763328552246,
      "learning_rate": 3.589491999110516e-07,
      "loss": 2.9719,
      "step": 226815
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.791238307952881,
      "learning_rate": 3.5874918450884415e-07,
      "loss": 3.0203,
      "step": 226816
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9966535568237305,
      "learning_rate": 3.585492248156074e-07,
      "loss": 2.8704,
      "step": 226817
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.5974910259246826,
      "learning_rate": 3.583493208313748e-07,
      "loss": 2.7488,
      "step": 226818
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9511070251464844,
      "learning_rate": 3.581494725561795e-07,
      "loss": 2.9603,
      "step": 226819
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.688199758529663,
      "learning_rate": 3.57949679990055e-07,
      "loss": 2.6826,
      "step": 226820
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.230958938598633,
      "learning_rate": 3.577499431330344e-07,
      "loss": 2.8767,
      "step": 226821
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2479054927825928,
      "learning_rate": 3.575502619851511e-07,
      "loss": 2.8467,
      "step": 226822
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3243186473846436,
      "learning_rate": 3.5735063654647176e-07,
      "loss": 2.8325,
      "step": 226823
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.013484001159668,
      "learning_rate": 3.5715106681702966e-07,
      "loss": 2.8775,
      "step": 226824
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.145967721939087,
      "learning_rate": 3.5695155279685805e-07,
      "loss": 2.8269,
      "step": 226825
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.931539535522461,
      "learning_rate": 3.56752094485957e-07,
      "loss": 2.9123,
      "step": 226826
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4655849933624268,
      "learning_rate": 3.5655269188439305e-07,
      "loss": 2.8138,
      "step": 226827
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7127628326416016,
      "learning_rate": 3.563533449922329e-07,
      "loss": 3.0674,
      "step": 226828
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.484002113342285,
      "learning_rate": 3.5615405380944317e-07,
      "loss": 2.7846,
      "step": 226829
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6412272453308105,
      "learning_rate": 3.559548183361238e-07,
      "loss": 3.0145,
      "step": 226830
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7311718463897705,
      "learning_rate": 3.5575563857227484e-07,
      "loss": 3.0248,
      "step": 226831
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7818145751953125,
      "learning_rate": 3.5555651451796287e-07,
      "loss": 2.982,
      "step": 226832
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.5787956714630127,
      "learning_rate": 3.553574461731878e-07,
      "loss": 3.0322,
      "step": 226833
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.964935541152954,
      "learning_rate": 3.551584335380497e-07,
      "loss": 3.0145,
      "step": 226834
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3700320720672607,
      "learning_rate": 3.549594766124819e-07,
      "loss": 2.9163,
      "step": 226835
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.895575761795044,
      "learning_rate": 3.547605753966509e-07,
      "loss": 3.1429,
      "step": 226836
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0465521812438965,
      "learning_rate": 3.545617298904901e-07,
      "loss": 2.8253,
      "step": 226837
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.5892157554626465,
      "learning_rate": 3.5436294009406616e-07,
      "loss": 2.9212,
      "step": 226838
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.769385576248169,
      "learning_rate": 3.541642060074457e-07,
      "loss": 3.2778,
      "step": 226839
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1930370330810547,
      "learning_rate": 3.539655276306286e-07,
      "loss": 2.9638,
      "step": 226840
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.7049899101257324,
      "learning_rate": 3.537669049636482e-07,
      "loss": 2.9325,
      "step": 226841
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7234716415405273,
      "learning_rate": 3.5356833800657124e-07,
      "loss": 3.1412,
      "step": 226842
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.5307579040527344,
      "learning_rate": 3.5336982675943094e-07,
      "loss": 2.764,
      "step": 226843
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.014357328414917,
      "learning_rate": 3.531713712222606e-07,
      "loss": 3.0773,
      "step": 226844
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.6590638160705566,
      "learning_rate": 3.529729713950935e-07,
      "loss": 2.9813,
      "step": 226845
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8313465118408203,
      "learning_rate": 3.527746272779297e-07,
      "loss": 2.852,
      "step": 226846
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9938008785247803,
      "learning_rate": 3.525763388708691e-07,
      "loss": 3.087,
      "step": 226847
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1255850791931152,
      "learning_rate": 3.523781061739117e-07,
      "loss": 2.9256,
      "step": 226848
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3733882904052734,
      "learning_rate": 3.521799291871241e-07,
      "loss": 2.9815,
      "step": 226849
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.289947986602783,
      "learning_rate": 3.5198180791050636e-07,
      "loss": 2.8439,
      "step": 226850
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2946393489837646,
      "learning_rate": 3.51783742344125e-07,
      "loss": 2.9902,
      "step": 226851
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.374805450439453,
      "learning_rate": 3.5158573248798004e-07,
      "loss": 3.0687,
      "step": 226852
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.798475742340088,
      "learning_rate": 3.5138777834213817e-07,
      "loss": 2.9172,
      "step": 226853
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1221511363983154,
      "learning_rate": 3.511898799066326e-07,
      "loss": 2.7285,
      "step": 226854
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.5309324264526367,
      "learning_rate": 3.509920371814967e-07,
      "loss": 2.8973,
      "step": 226855
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3086326122283936,
      "learning_rate": 3.507942501667638e-07,
      "loss": 2.9876,
      "step": 226856
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3257522583007812,
      "learning_rate": 3.505965188625004e-07,
      "loss": 3.0641,
      "step": 226857
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1617095470428467,
      "learning_rate": 3.503988432687066e-07,
      "loss": 2.9043,
      "step": 226858
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1162796020507812,
      "learning_rate": 3.5020122338541565e-07,
      "loss": 2.8847,
      "step": 226859
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8377349376678467,
      "learning_rate": 3.5000365921269423e-07,
      "loss": 2.8725,
      "step": 226860
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.081482172012329,
      "learning_rate": 3.498061507505423e-07,
      "loss": 3.056,
      "step": 226861
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.5628535747528076,
      "learning_rate": 3.4960869799902645e-07,
      "loss": 3.1105,
      "step": 226862
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1001949310302734,
      "learning_rate": 3.4941130095818004e-07,
      "loss": 3.0122,
      "step": 226863
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.76615309715271,
      "learning_rate": 3.4921395962803634e-07,
      "loss": 2.8677,
      "step": 226864
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4194295406341553,
      "learning_rate": 3.4901667400862866e-07,
      "loss": 2.977,
      "step": 226865
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7516796588897705,
      "learning_rate": 3.488194440999903e-07,
      "loss": 2.8919,
      "step": 226866
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.40799880027771,
      "learning_rate": 3.486222699021546e-07,
      "loss": 2.7234,
      "step": 226867
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.148087501525879,
      "learning_rate": 3.484251514151881e-07,
      "loss": 2.8831,
      "step": 226868
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0410640239715576,
      "learning_rate": 3.482280886390909e-07,
      "loss": 2.8415,
      "step": 226869
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.145662546157837,
      "learning_rate": 3.4803108157389625e-07,
      "loss": 2.9694,
      "step": 226870
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0019800662994385,
      "learning_rate": 3.4783413021970405e-07,
      "loss": 2.6785,
      "step": 226871
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0057363510131836,
      "learning_rate": 3.476372345764478e-07,
      "loss": 2.9692,
      "step": 226872
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6614909172058105,
      "learning_rate": 3.4744039464426056e-07,
      "loss": 2.9292,
      "step": 226873
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.162365674972534,
      "learning_rate": 3.472436104231424e-07,
      "loss": 2.9717,
      "step": 226874
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6199655532836914,
      "learning_rate": 3.470468819131267e-07,
      "loss": 2.8039,
      "step": 226875
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.980618476867676,
      "learning_rate": 3.4685020911424667e-07,
      "loss": 2.7247,
      "step": 226876
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.042663097381592,
      "learning_rate": 3.4665359202653564e-07,
      "loss": 3.1194,
      "step": 226877
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.538317918777466,
      "learning_rate": 3.4645703065006027e-07,
      "loss": 2.8034,
      "step": 226878
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3748958110809326,
      "learning_rate": 3.462605249848205e-07,
      "loss": 2.8824,
      "step": 226879
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3251285552978516,
      "learning_rate": 3.4606407503084966e-07,
      "loss": 2.8399,
      "step": 226880
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.085479736328125,
      "learning_rate": 3.4586768078824766e-07,
      "loss": 2.7724,
      "step": 226881
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1837267875671387,
      "learning_rate": 3.456713422569479e-07,
      "loss": 3.0158,
      "step": 226882
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.5845043659210205,
      "learning_rate": 3.4547505943708363e-07,
      "loss": 2.7686,
      "step": 226883
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.070836305618286,
      "learning_rate": 3.4527883232865485e-07,
      "loss": 3.0918,
      "step": 226884
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0117809772491455,
      "learning_rate": 3.4508266093166147e-07,
      "loss": 2.7392,
      "step": 226885
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4957032203674316,
      "learning_rate": 3.448865452462035e-07,
      "loss": 2.7839,
      "step": 226886
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.100193500518799,
      "learning_rate": 3.446904852722809e-07,
      "loss": 2.8176,
      "step": 226887
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2132980823516846,
      "learning_rate": 3.4449448100992705e-07,
      "loss": 2.8286,
      "step": 226888
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.757750988006592,
      "learning_rate": 3.442985324592085e-07,
      "loss": 3.2228,
      "step": 226889
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.545220375061035,
      "learning_rate": 3.4410263962012516e-07,
      "loss": 3.0679,
      "step": 226890
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8867290019989014,
      "learning_rate": 3.4390680249274385e-07,
      "loss": 3.004,
      "step": 226891
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.3741843700408936,
      "learning_rate": 3.437110210770644e-07,
      "loss": 2.7591,
      "step": 226892
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.35160756111145,
      "learning_rate": 3.4351529537315346e-07,
      "loss": 2.8637,
      "step": 226893
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6908493041992188,
      "learning_rate": 3.433196253810444e-07,
      "loss": 2.9056,
      "step": 226894
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.4509973526000977,
      "learning_rate": 3.4312401110077046e-07,
      "loss": 2.9597,
      "step": 226895
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.53039813041687,
      "learning_rate": 3.42928452532365e-07,
      "loss": 2.7937,
      "step": 226896
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9438579082489014,
      "learning_rate": 3.427329496758613e-07,
      "loss": 2.8648,
      "step": 226897
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6759471893310547,
      "learning_rate": 3.425375025312926e-07,
      "loss": 3.0213,
      "step": 226898
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.240692377090454,
      "learning_rate": 3.423421110987257e-07,
      "loss": 2.9013,
      "step": 226899
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1468405723571777,
      "learning_rate": 3.421467753781604e-07,
      "loss": 3.026,
      "step": 226900
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.077820301055908,
      "learning_rate": 3.419514953696301e-07,
      "loss": 3.0179,
      "step": 226901
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.386197805404663,
      "learning_rate": 3.4175627107320137e-07,
      "loss": 2.8858,
      "step": 226902
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6748297214508057,
      "learning_rate": 3.4156110248890755e-07,
      "loss": 3.0026,
      "step": 226903
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.554077625274658,
      "learning_rate": 3.4136598961678197e-07,
      "loss": 2.9022,
      "step": 226904
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.122382879257202,
      "learning_rate": 3.411709324568246e-07,
      "loss": 2.9912,
      "step": 226905
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0369338989257812,
      "learning_rate": 3.4097593100910204e-07,
      "loss": 3.0577,
      "step": 226906
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8617208003997803,
      "learning_rate": 3.4078098527368094e-07,
      "loss": 3.1997,
      "step": 226907
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.533019781112671,
      "learning_rate": 3.405860952505279e-07,
      "loss": 2.8081,
      "step": 226908
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9845855236053467,
      "learning_rate": 3.40391260939743e-07,
      "loss": 2.9107,
      "step": 226909
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.89801287651062,
      "learning_rate": 3.4019648234132615e-07,
      "loss": 2.6516,
      "step": 226910
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.210545063018799,
      "learning_rate": 3.400017594553106e-07,
      "loss": 2.8332,
      "step": 226911
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.575087070465088,
      "learning_rate": 3.398070922817631e-07,
      "loss": 2.9633,
      "step": 226912
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.5132381916046143,
      "learning_rate": 3.396124808206835e-07,
      "loss": 3.2422,
      "step": 226913
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2297887802124023,
      "learning_rate": 3.394179250721385e-07,
      "loss": 3.1851,
      "step": 226914
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.726511001586914,
      "learning_rate": 3.392234250361614e-07,
      "loss": 2.8927,
      "step": 226915
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.042647123336792,
      "learning_rate": 3.3902898071275217e-07,
      "loss": 2.7679,
      "step": 226916
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8196189403533936,
      "learning_rate": 3.3883459210201083e-07,
      "loss": 2.8991,
      "step": 226917
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.084836959838867,
      "learning_rate": 3.386402592039039e-07,
      "loss": 3.0084,
      "step": 226918
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.2929084300994873,
      "learning_rate": 3.3844598201849814e-07,
      "loss": 2.784,
      "step": 226919
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.868006706237793,
      "learning_rate": 3.382517605458601e-07,
      "loss": 3.1216,
      "step": 226920
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.7728376388549805,
      "learning_rate": 3.3805759478598983e-07,
      "loss": 2.8814,
      "step": 226921
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8689072132110596,
      "learning_rate": 3.3786348473892054e-07,
      "loss": 2.8033,
      "step": 226922
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.085875988006592,
      "learning_rate": 3.3766943040471894e-07,
      "loss": 2.7863,
      "step": 226923
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9714698791503906,
      "learning_rate": 3.37475431783385e-07,
      "loss": 2.9242,
      "step": 226924
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.031691551208496,
      "learning_rate": 3.37281488874952e-07,
      "loss": 3.0744,
      "step": 226925
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.8392858505249023,
      "learning_rate": 3.3708760167948654e-07,
      "loss": 2.9433,
      "step": 226926
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.6678919792175293,
      "learning_rate": 3.36893770197022e-07,
      "loss": 2.8591,
      "step": 226927
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6141843795776367,
      "learning_rate": 3.366999944275917e-07,
      "loss": 2.8508,
      "step": 226928
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.740051746368408,
      "learning_rate": 3.365062743712288e-07,
      "loss": 2.9648,
      "step": 226929
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.6185076236724854,
      "learning_rate": 3.363126100279667e-07,
      "loss": 2.8296,
      "step": 226930
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.751607656478882,
      "learning_rate": 3.361190013978055e-07,
      "loss": 2.7334,
      "step": 226931
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9603042602539062,
      "learning_rate": 3.359254484808449e-07,
      "loss": 2.9143,
      "step": 226932
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0434861183166504,
      "learning_rate": 3.3573195127708506e-07,
      "loss": 2.6264,
      "step": 226933
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.354058742523193,
      "learning_rate": 3.355385097865926e-07,
      "loss": 2.8289,
      "step": 226934
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.292694568634033,
      "learning_rate": 3.3534512400936743e-07,
      "loss": 2.8655,
      "step": 226935
    },
    {
      "epoch": 2.95,
      "grad_norm": 4.303088188171387,
      "learning_rate": 3.351517939454429e-07,
      "loss": 2.9526,
      "step": 226936
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.921020030975342,
      "learning_rate": 3.3495851959488564e-07,
      "loss": 3.154,
      "step": 226937
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.783076763153076,
      "learning_rate": 3.347653009576956e-07,
      "loss": 3.0565,
      "step": 226938
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.245609760284424,
      "learning_rate": 3.3457213803393943e-07,
      "loss": 3.1161,
      "step": 226939
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.899832248687744,
      "learning_rate": 3.343790308236505e-07,
      "loss": 2.6827,
      "step": 226940
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.057291030883789,
      "learning_rate": 3.34185979326862e-07,
      "loss": 2.9135,
      "step": 226941
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.0269782543182373,
      "learning_rate": 3.33992983543574e-07,
      "loss": 3.0491,
      "step": 226942
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.9704902172088623,
      "learning_rate": 3.3380004347388633e-07,
      "loss": 3.1637,
      "step": 226943
    },
    {
      "epoch": 2.95,
      "grad_norm": 2.3059911727905273,
      "learning_rate": 3.336071591177991e-07,
      "loss": 2.931,
      "step": 226944
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.1651804447174072,
      "learning_rate": 3.3341433047534563e-07,
      "loss": 3.1363,
      "step": 226945
    },
    {
      "epoch": 2.95,
      "grad_norm": 3.9692556858062744,
      "learning_rate": 3.332215575465591e-07,
      "loss": 2.8886,
      "step": 226946
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.821054220199585,
      "learning_rate": 3.3302884033147294e-07,
      "loss": 2.9766,
      "step": 226947
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.575758218765259,
      "learning_rate": 3.3283617883015366e-07,
      "loss": 2.8201,
      "step": 226948
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.803621530532837,
      "learning_rate": 3.3264357304260135e-07,
      "loss": 2.9653,
      "step": 226949
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.206477165222168,
      "learning_rate": 3.324510229688826e-07,
      "loss": 2.6027,
      "step": 226950
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.708653450012207,
      "learning_rate": 3.3225852860903066e-07,
      "loss": 2.8769,
      "step": 226951
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6478843688964844,
      "learning_rate": 3.3206608996304563e-07,
      "loss": 3.0325,
      "step": 226952
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7659711837768555,
      "learning_rate": 3.31873707030994e-07,
      "loss": 2.9977,
      "step": 226953
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9026124477386475,
      "learning_rate": 3.316813798129092e-07,
      "loss": 3.2705,
      "step": 226954
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7285351753234863,
      "learning_rate": 3.3148910830879116e-07,
      "loss": 2.8833,
      "step": 226955
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9454843997955322,
      "learning_rate": 3.312968925187398e-07,
      "loss": 2.9325,
      "step": 226956
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.964301109313965,
      "learning_rate": 3.3110473244275514e-07,
      "loss": 2.9291,
      "step": 226957
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1444687843322754,
      "learning_rate": 3.3091262808083717e-07,
      "loss": 2.8845,
      "step": 226958
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.775775671005249,
      "learning_rate": 3.3072057943308585e-07,
      "loss": 2.7977,
      "step": 226959
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2682154178619385,
      "learning_rate": 3.305285864995344e-07,
      "loss": 2.7724,
      "step": 226960
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.021016836166382,
      "learning_rate": 3.303366492801496e-07,
      "loss": 3.045,
      "step": 226961
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.71863055229187,
      "learning_rate": 3.301447677750313e-07,
      "loss": 2.8033,
      "step": 226962
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.878469228744507,
      "learning_rate": 3.2995294198421283e-07,
      "loss": 2.9416,
      "step": 226963
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.72104549407959,
      "learning_rate": 3.2976117190769424e-07,
      "loss": 2.9467,
      "step": 226964
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.627595901489258,
      "learning_rate": 3.295694575455421e-07,
      "loss": 2.8771,
      "step": 226965
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.098965883255005,
      "learning_rate": 3.293777988977564e-07,
      "loss": 2.6782,
      "step": 226966
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.809000015258789,
      "learning_rate": 3.291861959644371e-07,
      "loss": 2.8357,
      "step": 226967
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2566118240356445,
      "learning_rate": 3.2899464874555084e-07,
      "loss": 3.0421,
      "step": 226968
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0473556518554688,
      "learning_rate": 3.2880315724113095e-07,
      "loss": 3.1305,
      "step": 226969
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0910890102386475,
      "learning_rate": 3.2861172145127737e-07,
      "loss": 2.7891,
      "step": 226970
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.920391321182251,
      "learning_rate": 3.284203413760234e-07,
      "loss": 2.904,
      "step": 226971
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6672770977020264,
      "learning_rate": 3.2822901701533564e-07,
      "loss": 2.7964,
      "step": 226972
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.858168601989746,
      "learning_rate": 3.2803774836928085e-07,
      "loss": 2.8832,
      "step": 226973
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.105436325073242,
      "learning_rate": 3.278465354378923e-07,
      "loss": 2.8615,
      "step": 226974
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.301086664199829,
      "learning_rate": 3.2765537822123656e-07,
      "loss": 3.0555,
      "step": 226975
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7217376232147217,
      "learning_rate": 3.2746427671934696e-07,
      "loss": 3.0287,
      "step": 226976
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.4696521759033203,
      "learning_rate": 3.2727323093219015e-07,
      "loss": 2.976,
      "step": 226977
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0317928791046143,
      "learning_rate": 3.2708224085989944e-07,
      "loss": 3.0826,
      "step": 226978
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.064805030822754,
      "learning_rate": 3.2689130650240813e-07,
      "loss": 2.8637,
      "step": 226979
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2698888778686523,
      "learning_rate": 3.267004278598495e-07,
      "loss": 2.8893,
      "step": 226980
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.900707960128784,
      "learning_rate": 3.265096049321903e-07,
      "loss": 3.0229,
      "step": 226981
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8331105709075928,
      "learning_rate": 3.26318837719497e-07,
      "loss": 2.9613,
      "step": 226982
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7830119132995605,
      "learning_rate": 3.2612812622180294e-07,
      "loss": 2.8269,
      "step": 226983
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7608585357666016,
      "learning_rate": 3.2593747043910823e-07,
      "loss": 2.8763,
      "step": 226984
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.254429817199707,
      "learning_rate": 3.257468703715127e-07,
      "loss": 2.8555,
      "step": 226985
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8409922122955322,
      "learning_rate": 3.255563260190164e-07,
      "loss": 2.7915,
      "step": 226986
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9899635314941406,
      "learning_rate": 3.2536583738161925e-07,
      "loss": 2.9403,
      "step": 226987
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7843964099884033,
      "learning_rate": 3.2517540445942126e-07,
      "loss": 2.7377,
      "step": 226988
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.273454427719116,
      "learning_rate": 3.249850272524557e-07,
      "loss": 2.9253,
      "step": 226989
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8491570949554443,
      "learning_rate": 3.247947057606892e-07,
      "loss": 2.9756,
      "step": 226990
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0861871242523193,
      "learning_rate": 3.2460443998422183e-07,
      "loss": 3.1365,
      "step": 226991
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.763444662094116,
      "learning_rate": 3.2441422992305343e-07,
      "loss": 2.5946,
      "step": 226992
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.502349615097046,
      "learning_rate": 3.242240755772507e-07,
      "loss": 3.0823,
      "step": 226993
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.05202317237854,
      "learning_rate": 3.2403397694681363e-07,
      "loss": 2.8596,
      "step": 226994
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.5308711528778076,
      "learning_rate": 3.2384393403180884e-07,
      "loss": 2.6511,
      "step": 226995
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.4074015617370605,
      "learning_rate": 3.236539468322696e-07,
      "loss": 2.9726,
      "step": 226996
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.117922782897949,
      "learning_rate": 3.23464015348196e-07,
      "loss": 3.0123,
      "step": 226997
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.579463005065918,
      "learning_rate": 3.2327413957965454e-07,
      "loss": 3.0256,
      "step": 226998
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1736702919006348,
      "learning_rate": 3.230843195266453e-07,
      "loss": 2.9118,
      "step": 226999
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1245970726013184,
      "learning_rate": 3.228945551892681e-07,
      "loss": 3.0109,
      "step": 227000
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7580432891845703,
      "learning_rate": 3.2270484656752303e-07,
      "loss": 2.9254,
      "step": 227001
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.788564443588257,
      "learning_rate": 3.225151936614434e-07,
      "loss": 3.2546,
      "step": 227002
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.4243245124816895,
      "learning_rate": 3.223255964710625e-07,
      "loss": 3.0126,
      "step": 227003
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.695939540863037,
      "learning_rate": 3.221360549964136e-07,
      "loss": 2.7736,
      "step": 227004
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7359769344329834,
      "learning_rate": 3.2194656923753005e-07,
      "loss": 2.8927,
      "step": 227005
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.553351402282715,
      "learning_rate": 3.2175713919444514e-07,
      "loss": 3.0297,
      "step": 227006
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7660458087921143,
      "learning_rate": 3.2156776486722545e-07,
      "loss": 3.1641,
      "step": 227007
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.625004529953003,
      "learning_rate": 3.2137844625587104e-07,
      "loss": 3.0311,
      "step": 227008
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.5825514793395996,
      "learning_rate": 3.211891833604152e-07,
      "loss": 3.0161,
      "step": 227009
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0653271675109863,
      "learning_rate": 3.2099997618092454e-07,
      "loss": 2.7516,
      "step": 227010
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.3244569301605225,
      "learning_rate": 3.2081082471743235e-07,
      "loss": 2.8907,
      "step": 227011
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8517873287200928,
      "learning_rate": 3.2062172896993864e-07,
      "loss": 2.8819,
      "step": 227012
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6642978191375732,
      "learning_rate": 3.2043268893847675e-07,
      "loss": 2.7215,
      "step": 227013
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.114471912384033,
      "learning_rate": 3.202437046231465e-07,
      "loss": 2.8311,
      "step": 227014
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.145946502685547,
      "learning_rate": 3.2005477602391473e-07,
      "loss": 3.1878,
      "step": 227015
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.4608914852142334,
      "learning_rate": 3.198659031408479e-07,
      "loss": 3.0879,
      "step": 227016
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.3893208503723145,
      "learning_rate": 3.1967708597397944e-07,
      "loss": 2.7807,
      "step": 227017
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2457587718963623,
      "learning_rate": 3.194883245233426e-07,
      "loss": 2.9361,
      "step": 227018
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.767683506011963,
      "learning_rate": 3.192996187889707e-07,
      "loss": 2.7853,
      "step": 227019
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.070152759552002,
      "learning_rate": 3.1911096877089704e-07,
      "loss": 2.7504,
      "step": 227020
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.073917865753174,
      "learning_rate": 3.189223744691216e-07,
      "loss": 2.8334,
      "step": 227021
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.044694423675537,
      "learning_rate": 3.1873383588374435e-07,
      "loss": 2.9224,
      "step": 227022
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0749857425689697,
      "learning_rate": 3.1854535301476525e-07,
      "loss": 3.0606,
      "step": 227023
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7212281227111816,
      "learning_rate": 3.183569258622509e-07,
      "loss": 2.9744,
      "step": 227024
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.6550850868225098,
      "learning_rate": 3.1816855442620135e-07,
      "loss": 2.7495,
      "step": 227025
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.125096082687378,
      "learning_rate": 3.179802387066499e-07,
      "loss": 2.9718,
      "step": 227026
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1897428035736084,
      "learning_rate": 3.177919787036298e-07,
      "loss": 2.8783,
      "step": 227027
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.558905839920044,
      "learning_rate": 3.1760377441720774e-07,
      "loss": 2.8967,
      "step": 227028
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.859285831451416,
      "learning_rate": 3.174156258473837e-07,
      "loss": 2.9894,
      "step": 227029
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.361090660095215,
      "learning_rate": 3.1722753299422423e-07,
      "loss": 2.9721,
      "step": 227030
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.5522027015686035,
      "learning_rate": 3.170394958577627e-07,
      "loss": 2.9922,
      "step": 227031
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.297732353210449,
      "learning_rate": 3.1685151443799906e-07,
      "loss": 2.8512,
      "step": 227032
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1126742362976074,
      "learning_rate": 3.16663588735e-07,
      "loss": 3.0538,
      "step": 227033
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1387453079223633,
      "learning_rate": 3.1647571874876544e-07,
      "loss": 2.8918,
      "step": 227034
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.384615898132324,
      "learning_rate": 3.1628790447939535e-07,
      "loss": 2.6036,
      "step": 227035
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0366530418395996,
      "learning_rate": 3.161001459268564e-07,
      "loss": 2.7686,
      "step": 227036
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.834805965423584,
      "learning_rate": 3.1591244309121523e-07,
      "loss": 2.9626,
      "step": 227037
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7311384677886963,
      "learning_rate": 3.157247959725051e-07,
      "loss": 2.7406,
      "step": 227038
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.531964063644409,
      "learning_rate": 3.155372045707594e-07,
      "loss": 2.7321,
      "step": 227039
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.4812912940979004,
      "learning_rate": 3.1534966888601133e-07,
      "loss": 2.9649,
      "step": 227040
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.8174517154693604,
      "learning_rate": 3.1516218891829427e-07,
      "loss": 3.2568,
      "step": 227041
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.906233549118042,
      "learning_rate": 3.1497476466767477e-07,
      "loss": 2.7981,
      "step": 227042
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2739720344543457,
      "learning_rate": 3.147873961341196e-07,
      "loss": 2.7092,
      "step": 227043
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.096400260925293,
      "learning_rate": 3.146000833176954e-07,
      "loss": 3.0595,
      "step": 227044
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9923532009124756,
      "learning_rate": 3.1441282621846865e-07,
      "loss": 2.6852,
      "step": 227045
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7871172428131104,
      "learning_rate": 3.142256248364394e-07,
      "loss": 2.8623,
      "step": 227046
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.78853702545166,
      "learning_rate": 3.140384791716744e-07,
      "loss": 2.7387,
      "step": 227047
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.980710744857788,
      "learning_rate": 3.138513892241734e-07,
      "loss": 2.7964,
      "step": 227048
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.0970611572265625,
      "learning_rate": 3.136643549939699e-07,
      "loss": 2.6013,
      "step": 227049
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.018491744995117,
      "learning_rate": 3.1347737648113047e-07,
      "loss": 2.8769,
      "step": 227050
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.921255350112915,
      "learning_rate": 3.132904536856884e-07,
      "loss": 3.0356,
      "step": 227051
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.010565757751465,
      "learning_rate": 3.1310358660761035e-07,
      "loss": 3.0771,
      "step": 227052
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.600966691970825,
      "learning_rate": 3.129167752470296e-07,
      "loss": 2.6305,
      "step": 227053
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.985443353652954,
      "learning_rate": 3.127300196039129e-07,
      "loss": 2.838,
      "step": 227054
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1644251346588135,
      "learning_rate": 3.125433196783267e-07,
      "loss": 2.9701,
      "step": 227055
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8901023864746094,
      "learning_rate": 3.123566754703044e-07,
      "loss": 2.8467,
      "step": 227056
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.4936981201171875,
      "learning_rate": 3.1217008697984604e-07,
      "loss": 3.1597,
      "step": 227057
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9865264892578125,
      "learning_rate": 3.1198355420701813e-07,
      "loss": 2.9583,
      "step": 227058
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.943838357925415,
      "learning_rate": 3.117970771518874e-07,
      "loss": 3.0134,
      "step": 227059
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1725261211395264,
      "learning_rate": 3.1161065581438715e-07,
      "loss": 2.9525,
      "step": 227060
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.103274345397949,
      "learning_rate": 3.11424290194684e-07,
      "loss": 2.8877,
      "step": 227061
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1007442474365234,
      "learning_rate": 3.112379802927112e-07,
      "loss": 2.7939,
      "step": 227062
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9827871322631836,
      "learning_rate": 3.1105172610853545e-07,
      "loss": 2.8903,
      "step": 227063
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.5402326583862305,
      "learning_rate": 3.1086552764215675e-07,
      "loss": 2.9714,
      "step": 227064
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2622482776641846,
      "learning_rate": 3.1067938489370834e-07,
      "loss": 2.7676,
      "step": 227065
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9241247177124023,
      "learning_rate": 3.1049329786312363e-07,
      "loss": 2.9462,
      "step": 227066
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9981424808502197,
      "learning_rate": 3.103072665504691e-07,
      "loss": 2.864,
      "step": 227067
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.145481586456299,
      "learning_rate": 3.101212909558115e-07,
      "loss": 2.6796,
      "step": 227068
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.011900901794434,
      "learning_rate": 3.099353710791508e-07,
      "loss": 2.6591,
      "step": 227069
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.490161418914795,
      "learning_rate": 3.0974950692052024e-07,
      "loss": 3.0315,
      "step": 227070
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7924182415008545,
      "learning_rate": 3.095636984799865e-07,
      "loss": 2.7577,
      "step": 227071
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0500879287719727,
      "learning_rate": 3.0937794575754957e-07,
      "loss": 2.8584,
      "step": 227072
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0397212505340576,
      "learning_rate": 3.0919224875324277e-07,
      "loss": 3.0762,
      "step": 227073
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.991330623626709,
      "learning_rate": 3.090066074671327e-07,
      "loss": 2.838,
      "step": 227074
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.240490913391113,
      "learning_rate": 3.088210218992193e-07,
      "loss": 2.7609,
      "step": 227075
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0940308570861816,
      "learning_rate": 3.0863549204956926e-07,
      "loss": 3.0107,
      "step": 227076
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8271920680999756,
      "learning_rate": 3.084500179182159e-07,
      "loss": 2.9904,
      "step": 227077
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1217453479766846,
      "learning_rate": 3.082645995051258e-07,
      "loss": 2.7817,
      "step": 227078
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1027727127075195,
      "learning_rate": 3.0807923681043236e-07,
      "loss": 2.8175,
      "step": 227079
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.3442819118499756,
      "learning_rate": 3.078939298341021e-07,
      "loss": 2.7279,
      "step": 227080
    },
    {
      "epoch": 2.96,
      "grad_norm": 5.114685535430908,
      "learning_rate": 3.0770867857620175e-07,
      "loss": 2.9077,
      "step": 227081
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7066636085510254,
      "learning_rate": 3.0752348303676454e-07,
      "loss": 2.8222,
      "step": 227082
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.848275661468506,
      "learning_rate": 3.0733834321579056e-07,
      "loss": 2.7345,
      "step": 227083
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.3050856590270996,
      "learning_rate": 3.071532591133796e-07,
      "loss": 2.9647,
      "step": 227084
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6938371658325195,
      "learning_rate": 3.069682307294985e-07,
      "loss": 2.6183,
      "step": 227085
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9673309326171875,
      "learning_rate": 3.067832580641805e-07,
      "loss": 3.1795,
      "step": 227086
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0483338832855225,
      "learning_rate": 3.065983411175255e-07,
      "loss": 3.1666,
      "step": 227087
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.998262882232666,
      "learning_rate": 3.064134798895335e-07,
      "loss": 2.7191,
      "step": 227088
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7122080326080322,
      "learning_rate": 3.062286743802378e-07,
      "loss": 2.8146,
      "step": 227089
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8667783737182617,
      "learning_rate": 3.0604392458963843e-07,
      "loss": 2.8587,
      "step": 227090
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.844667673110962,
      "learning_rate": 3.0585923051783537e-07,
      "loss": 2.8188,
      "step": 227091
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0033035278320312,
      "learning_rate": 3.056745921648285e-07,
      "loss": 2.7982,
      "step": 227092
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0254478454589844,
      "learning_rate": 3.0549000953065116e-07,
      "loss": 3.1771,
      "step": 227093
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.092527151107788,
      "learning_rate": 3.053054826153367e-07,
      "loss": 3.034,
      "step": 227094
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7650704383850098,
      "learning_rate": 3.0512101141891844e-07,
      "loss": 3.0251,
      "step": 227095
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.702775478363037,
      "learning_rate": 3.049365959414296e-07,
      "loss": 2.9353,
      "step": 227096
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1189403533935547,
      "learning_rate": 3.0475223618290357e-07,
      "loss": 2.8208,
      "step": 227097
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0455124378204346,
      "learning_rate": 3.045679321434069e-07,
      "loss": 2.8921,
      "step": 227098
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.741788864135742,
      "learning_rate": 3.0438368382293967e-07,
      "loss": 2.9639,
      "step": 227099
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8458352088928223,
      "learning_rate": 3.041994912215351e-07,
      "loss": 3.0036,
      "step": 227100
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7640392780303955,
      "learning_rate": 3.0401535433925986e-07,
      "loss": 2.9645,
      "step": 227101
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0652101039886475,
      "learning_rate": 3.0383127317611387e-07,
      "loss": 2.7017,
      "step": 227102
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0008292198181152,
      "learning_rate": 3.0364724773213056e-07,
      "loss": 2.8904,
      "step": 227103
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.942770481109619,
      "learning_rate": 3.0346327800734317e-07,
      "loss": 3.041,
      "step": 227104
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.733243942260742,
      "learning_rate": 3.032793640018516e-07,
      "loss": 3.1406,
      "step": 227105
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.522627830505371,
      "learning_rate": 3.0309550571558927e-07,
      "loss": 3.0918,
      "step": 227106
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9718756675720215,
      "learning_rate": 3.0291170314865607e-07,
      "loss": 2.9801,
      "step": 227107
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.565995454788208,
      "learning_rate": 3.027279563010854e-07,
      "loss": 2.9587,
      "step": 227108
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.741065263748169,
      "learning_rate": 3.025442651728771e-07,
      "loss": 3.0712,
      "step": 227109
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7512435913085938,
      "learning_rate": 3.023606297640646e-07,
      "loss": 2.9599,
      "step": 227110
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.478208065032959,
      "learning_rate": 3.0217705007471446e-07,
      "loss": 3.1455,
      "step": 227111
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.763028860092163,
      "learning_rate": 3.0199352610486003e-07,
      "loss": 2.9161,
      "step": 227112
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6487128734588623,
      "learning_rate": 3.018100578545346e-07,
      "loss": 3.0436,
      "step": 227113
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9268345832824707,
      "learning_rate": 3.0162664532373815e-07,
      "loss": 2.8978,
      "step": 227114
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.111884593963623,
      "learning_rate": 3.01443288512504e-07,
      "loss": 2.9234,
      "step": 227115
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0415499210357666,
      "learning_rate": 3.0125998742093206e-07,
      "loss": 2.8117,
      "step": 227116
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2025246620178223,
      "learning_rate": 3.0107674204898903e-07,
      "loss": 2.6949,
      "step": 227117
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7431674003601074,
      "learning_rate": 3.008935523967415e-07,
      "loss": 2.8004,
      "step": 227118
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7827847003936768,
      "learning_rate": 3.0071041846422284e-07,
      "loss": 2.8748,
      "step": 227119
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8975670337677,
      "learning_rate": 3.00527340251433e-07,
      "loss": 2.9779,
      "step": 227120
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.38639497756958,
      "learning_rate": 3.003443177584386e-07,
      "loss": 2.6689,
      "step": 227121
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1465232372283936,
      "learning_rate": 3.0016135098527294e-07,
      "loss": 2.9883,
      "step": 227122
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.771139621734619,
      "learning_rate": 2.9997843993196935e-07,
      "loss": 2.9871,
      "step": 227123
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.063530683517456,
      "learning_rate": 2.997955845985611e-07,
      "loss": 2.7018,
      "step": 227124
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9882068634033203,
      "learning_rate": 2.996127849850816e-07,
      "loss": 2.8046,
      "step": 227125
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1378140449523926,
      "learning_rate": 2.9943004109156396e-07,
      "loss": 3.0798,
      "step": 227126
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.658005714416504,
      "learning_rate": 2.992473529180084e-07,
      "loss": 3.0372,
      "step": 227127
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2762997150421143,
      "learning_rate": 2.9906472046451467e-07,
      "loss": 3.1456,
      "step": 227128
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.379411458969116,
      "learning_rate": 2.9888214373104955e-07,
      "loss": 2.9455,
      "step": 227129
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8709332942962646,
      "learning_rate": 2.98699622717713e-07,
      "loss": 2.8247,
      "step": 227130
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.469078779220581,
      "learning_rate": 2.9851715742447157e-07,
      "loss": 3.0144,
      "step": 227131
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1965816020965576,
      "learning_rate": 2.983347478514253e-07,
      "loss": 2.8842,
      "step": 227132
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7779343128204346,
      "learning_rate": 2.9815239399857413e-07,
      "loss": 2.7981,
      "step": 227133
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.5633482933044434,
      "learning_rate": 2.979700958659181e-07,
      "loss": 2.8245,
      "step": 227134
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.960221529006958,
      "learning_rate": 2.9778785345355715e-07,
      "loss": 2.7889,
      "step": 227135
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.3093552589416504,
      "learning_rate": 2.9760566676149123e-07,
      "loss": 2.8977,
      "step": 227136
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.044846534729004,
      "learning_rate": 2.9742353578975365e-07,
      "loss": 3.056,
      "step": 227137
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7576100826263428,
      "learning_rate": 2.972414605383777e-07,
      "loss": 2.8323,
      "step": 227138
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.693654775619507,
      "learning_rate": 2.970594410074301e-07,
      "loss": 2.7673,
      "step": 227139
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.976341485977173,
      "learning_rate": 2.9687747719687735e-07,
      "loss": 2.8459,
      "step": 227140
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.026975154876709,
      "learning_rate": 2.9669556910681956e-07,
      "loss": 2.8803,
      "step": 227141
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.789980411529541,
      "learning_rate": 2.9651371673725666e-07,
      "loss": 2.8091,
      "step": 227142
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.951603651046753,
      "learning_rate": 2.963319200882219e-07,
      "loss": 2.8859,
      "step": 227143
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.5938425064086914,
      "learning_rate": 2.961501791597487e-07,
      "loss": 3.0264,
      "step": 227144
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0550546646118164,
      "learning_rate": 2.959684939519036e-07,
      "loss": 2.9516,
      "step": 227145
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.422214984893799,
      "learning_rate": 2.957868644646866e-07,
      "loss": 2.7724,
      "step": 227146
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0672383308410645,
      "learning_rate": 2.95605290698131e-07,
      "loss": 3.3575,
      "step": 227147
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.004408836364746,
      "learning_rate": 2.954237726522701e-07,
      "loss": 3.0837,
      "step": 227148
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7621536254882812,
      "learning_rate": 2.952423103271706e-07,
      "loss": 2.8213,
      "step": 227149
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.812422275543213,
      "learning_rate": 2.9506090372283244e-07,
      "loss": 2.6905,
      "step": 227150
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.285916805267334,
      "learning_rate": 2.9487955283928886e-07,
      "loss": 2.5229,
      "step": 227151
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7339553833007812,
      "learning_rate": 2.9469825767660657e-07,
      "loss": 3.0487,
      "step": 227152
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.4988064765930176,
      "learning_rate": 2.9451701823478557e-07,
      "loss": 2.8783,
      "step": 227153
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1434407234191895,
      "learning_rate": 2.943358345138924e-07,
      "loss": 2.9285,
      "step": 227154
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9465227127075195,
      "learning_rate": 2.941547065138938e-07,
      "loss": 2.8306,
      "step": 227155
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.549532890319824,
      "learning_rate": 2.9397363423488973e-07,
      "loss": 3.0063,
      "step": 227156
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.53393292427063,
      "learning_rate": 2.937926176769134e-07,
      "loss": 2.7528,
      "step": 227157
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0911877155303955,
      "learning_rate": 2.936116568399649e-07,
      "loss": 2.9623,
      "step": 227158
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.833357334136963,
      "learning_rate": 2.9343075172407746e-07,
      "loss": 2.9462,
      "step": 227159
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.637885093688965,
      "learning_rate": 2.9324990232931775e-07,
      "loss": 2.9515,
      "step": 227160
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1106650829315186,
      "learning_rate": 2.930691086556858e-07,
      "loss": 2.8794,
      "step": 227161
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.817932367324829,
      "learning_rate": 2.928883707032481e-07,
      "loss": 2.7296,
      "step": 227162
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.251075267791748,
      "learning_rate": 2.927076884720048e-07,
      "loss": 2.9497,
      "step": 227163
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.132741451263428,
      "learning_rate": 2.9252706196202235e-07,
      "loss": 2.66,
      "step": 227164
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1899302005767822,
      "learning_rate": 2.923464911733009e-07,
      "loss": 2.8908,
      "step": 227165
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.167060613632202,
      "learning_rate": 2.92165976105907e-07,
      "loss": 2.7949,
      "step": 227166
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.669529914855957,
      "learning_rate": 2.919855167598073e-07,
      "loss": 3.0143,
      "step": 227167
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0853662490844727,
      "learning_rate": 2.918051131351351e-07,
      "loss": 3.0899,
      "step": 227168
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.6883976459503174,
      "learning_rate": 2.9162476523185705e-07,
      "loss": 2.8912,
      "step": 227169
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.055706739425659,
      "learning_rate": 2.9144447305003984e-07,
      "loss": 3.2342,
      "step": 227170
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.3128955364227295,
      "learning_rate": 2.9126423658968336e-07,
      "loss": 2.9129,
      "step": 227171
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0665669441223145,
      "learning_rate": 2.9108405585082096e-07,
      "loss": 3.0915,
      "step": 227172
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.134580373764038,
      "learning_rate": 2.909039308335193e-07,
      "loss": 2.9734,
      "step": 227173
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7085132598876953,
      "learning_rate": 2.907238615378116e-07,
      "loss": 2.9458,
      "step": 227174
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.093417167663574,
      "learning_rate": 2.905438479636979e-07,
      "loss": 2.8828,
      "step": 227175
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.946561813354492,
      "learning_rate": 2.9036389011124485e-07,
      "loss": 2.9162,
      "step": 227176
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2162063121795654,
      "learning_rate": 2.9018398798045237e-07,
      "loss": 2.8424,
      "step": 227177
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.3188416957855225,
      "learning_rate": 2.9000414157138716e-07,
      "loss": 3.1256,
      "step": 227178
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7469074726104736,
      "learning_rate": 2.8982435088404923e-07,
      "loss": 2.8852,
      "step": 227179
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.954432964324951,
      "learning_rate": 2.8964461591850506e-07,
      "loss": 2.966,
      "step": 227180
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2579667568206787,
      "learning_rate": 2.894649366747881e-07,
      "loss": 3.0464,
      "step": 227181
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.003620624542236,
      "learning_rate": 2.8928531315289824e-07,
      "loss": 2.7904,
      "step": 227182
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1218695640563965,
      "learning_rate": 2.891057453528689e-07,
      "loss": 2.8851,
      "step": 227183
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.774139404296875,
      "learning_rate": 2.889262332747999e-07,
      "loss": 2.9253,
      "step": 227184
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0335609912872314,
      "learning_rate": 2.88746776918658e-07,
      "loss": 2.9738,
      "step": 227185
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7979629039764404,
      "learning_rate": 2.8856737628447645e-07,
      "loss": 2.9533,
      "step": 227186
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.680288791656494,
      "learning_rate": 2.883880313723219e-07,
      "loss": 2.6261,
      "step": 227187
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.016179323196411,
      "learning_rate": 2.8820874218219435e-07,
      "loss": 3.0939,
      "step": 227188
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.150423049926758,
      "learning_rate": 2.880295087141604e-07,
      "loss": 2.9531,
      "step": 227189
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.570122003555298,
      "learning_rate": 2.878503309682534e-07,
      "loss": 2.8092,
      "step": 227190
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9409210681915283,
      "learning_rate": 2.8767120894447326e-07,
      "loss": 2.8388,
      "step": 227191
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.5318188667297363,
      "learning_rate": 2.8749214264288665e-07,
      "loss": 2.8586,
      "step": 227192
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.848823070526123,
      "learning_rate": 2.873131320634936e-07,
      "loss": 3.0168,
      "step": 227193
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8552017211914062,
      "learning_rate": 2.871341772063607e-07,
      "loss": 2.9144,
      "step": 227194
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.601693868637085,
      "learning_rate": 2.869552780715212e-07,
      "loss": 2.8357,
      "step": 227195
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.120568037033081,
      "learning_rate": 2.8677643465897513e-07,
      "loss": 2.9036,
      "step": 227196
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8279340267181396,
      "learning_rate": 2.865976469687892e-07,
      "loss": 2.6287,
      "step": 227197
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1281540393829346,
      "learning_rate": 2.8641891500096326e-07,
      "loss": 2.8028,
      "step": 227198
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8542978763580322,
      "learning_rate": 2.86240238755564e-07,
      "loss": 2.8324,
      "step": 227199
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.725192070007324,
      "learning_rate": 2.8606161823259144e-07,
      "loss": 2.7136,
      "step": 227200
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1361100673675537,
      "learning_rate": 2.8588305343211214e-07,
      "loss": 2.9056,
      "step": 227201
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.5111515522003174,
      "learning_rate": 2.8570454435412615e-07,
      "loss": 2.9436,
      "step": 227202
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.626581907272339,
      "learning_rate": 2.8552609099870007e-07,
      "loss": 2.6817,
      "step": 227203
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.789551019668579,
      "learning_rate": 2.8534769336586714e-07,
      "loss": 2.8205,
      "step": 227204
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.580030918121338,
      "learning_rate": 2.8516935145562747e-07,
      "loss": 2.9687,
      "step": 227205
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0734975337982178,
      "learning_rate": 2.849910652680476e-07,
      "loss": 2.9658,
      "step": 227206
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1721019744873047,
      "learning_rate": 2.8481283480312755e-07,
      "loss": 3.0669,
      "step": 227207
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1932294368743896,
      "learning_rate": 2.8463466006093395e-07,
      "loss": 2.9487,
      "step": 227208
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.033468723297119,
      "learning_rate": 2.8445654104146676e-07,
      "loss": 2.9521,
      "step": 227209
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.4845526218414307,
      "learning_rate": 2.842784777447926e-07,
      "loss": 2.9307,
      "step": 227210
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9558069705963135,
      "learning_rate": 2.841004701709115e-07,
      "loss": 2.7792,
      "step": 227211
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.603215217590332,
      "learning_rate": 2.839225183198901e-07,
      "loss": 3.0668,
      "step": 227212
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.69767165184021,
      "learning_rate": 2.8374462219172835e-07,
      "loss": 2.617,
      "step": 227213
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.3104052543640137,
      "learning_rate": 2.835667817864928e-07,
      "loss": 3.0252,
      "step": 227214
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.5294015407562256,
      "learning_rate": 2.8338899710421694e-07,
      "loss": 2.9833,
      "step": 227215
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.280081033706665,
      "learning_rate": 2.8321126814486725e-07,
      "loss": 2.7918,
      "step": 227216
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8458211421966553,
      "learning_rate": 2.830335949085771e-07,
      "loss": 2.8519,
      "step": 227217
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8225321769714355,
      "learning_rate": 2.828559773952799e-07,
      "loss": 2.7693,
      "step": 227218
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.890550136566162,
      "learning_rate": 2.826784156050754e-07,
      "loss": 2.8017,
      "step": 227219
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1020214557647705,
      "learning_rate": 2.8250090953799706e-07,
      "loss": 2.9861,
      "step": 227220
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0840792655944824,
      "learning_rate": 2.823234591940449e-07,
      "loss": 2.7711,
      "step": 227221
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9713826179504395,
      "learning_rate": 2.821460645732854e-07,
      "loss": 3.095,
      "step": 227222
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.493529796600342,
      "learning_rate": 2.819687256757186e-07,
      "loss": 2.9371,
      "step": 227223
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9073939323425293,
      "learning_rate": 2.817914425013779e-07,
      "loss": 2.7025,
      "step": 227224
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.366680860519409,
      "learning_rate": 2.816142150502965e-07,
      "loss": 3.0274,
      "step": 227225
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.555849552154541,
      "learning_rate": 2.814370433225743e-07,
      "loss": 2.8904,
      "step": 227226
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.291378974914551,
      "learning_rate": 2.8125992731814485e-07,
      "loss": 2.9966,
      "step": 227227
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6736817359924316,
      "learning_rate": 2.8108286703710793e-07,
      "loss": 2.8918,
      "step": 227228
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.072272539138794,
      "learning_rate": 2.8090586247946355e-07,
      "loss": 2.8332,
      "step": 227229
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0278725624084473,
      "learning_rate": 2.807289136452784e-07,
      "loss": 2.9837,
      "step": 227230
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.5659139156341553,
      "learning_rate": 2.805520205345524e-07,
      "loss": 2.476,
      "step": 227231
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9632298946380615,
      "learning_rate": 2.803751831473189e-07,
      "loss": 2.8885,
      "step": 227232
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.321267604827881,
      "learning_rate": 2.801984014836112e-07,
      "loss": 2.8898,
      "step": 227233
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2681281566619873,
      "learning_rate": 2.8002167554349586e-07,
      "loss": 2.857,
      "step": 227234
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.186201572418213,
      "learning_rate": 2.79845005326973e-07,
      "loss": 2.8958,
      "step": 227235
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8542988300323486,
      "learning_rate": 2.796683908340758e-07,
      "loss": 3.0221,
      "step": 227236
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1679575443267822,
      "learning_rate": 2.794918320648709e-07,
      "loss": 3.3374,
      "step": 227237
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.861572504043579,
      "learning_rate": 2.793153290193584e-07,
      "loss": 2.9055,
      "step": 227238
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.413336992263794,
      "learning_rate": 2.7913888169757146e-07,
      "loss": 2.9387,
      "step": 227239
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.725189685821533,
      "learning_rate": 2.789624900995435e-07,
      "loss": 2.6928,
      "step": 227240
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9497885704040527,
      "learning_rate": 2.787861542253078e-07,
      "loss": 2.8631,
      "step": 227241
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.4746782779693604,
      "learning_rate": 2.7860987407493097e-07,
      "loss": 3.0736,
      "step": 227242
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9365899562835693,
      "learning_rate": 2.7843364964841297e-07,
      "loss": 3.0758,
      "step": 227243
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.804914712905884,
      "learning_rate": 2.7825748094575387e-07,
      "loss": 2.9399,
      "step": 227244
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.794370412826538,
      "learning_rate": 2.780813679670868e-07,
      "loss": 3.0251,
      "step": 227245
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9171504974365234,
      "learning_rate": 2.7790531071234524e-07,
      "loss": 3.0251,
      "step": 227246
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.188874244689941,
      "learning_rate": 2.7772930918159573e-07,
      "loss": 2.9664,
      "step": 227247
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0404727458953857,
      "learning_rate": 2.7755336337487165e-07,
      "loss": 2.9701,
      "step": 227248
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.818854808807373,
      "learning_rate": 2.773774732922396e-07,
      "loss": 2.8609,
      "step": 227249
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9702749252319336,
      "learning_rate": 2.772016389336662e-07,
      "loss": 2.9512,
      "step": 227250
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8905327320098877,
      "learning_rate": 2.7702586029921815e-07,
      "loss": 2.8998,
      "step": 227251
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.988804817199707,
      "learning_rate": 2.76850137388962e-07,
      "loss": 3.0041,
      "step": 227252
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8727424144744873,
      "learning_rate": 2.7667447020286446e-07,
      "loss": 2.7929,
      "step": 227253
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6033449172973633,
      "learning_rate": 2.764988587409922e-07,
      "loss": 2.7683,
      "step": 227254
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6862542629241943,
      "learning_rate": 2.763233030034118e-07,
      "loss": 2.8538,
      "step": 227255
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.837926149368286,
      "learning_rate": 2.761478029900899e-07,
      "loss": 2.7917,
      "step": 227256
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.954213857650757,
      "learning_rate": 2.759723587010931e-07,
      "loss": 2.9147,
      "step": 227257
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0188846588134766,
      "learning_rate": 2.757969701364549e-07,
      "loss": 3.1554,
      "step": 227258
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.724048376083374,
      "learning_rate": 2.7562163729620836e-07,
      "loss": 2.8338,
      "step": 227259
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6948752403259277,
      "learning_rate": 2.754463601803536e-07,
      "loss": 2.588,
      "step": 227260
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0028910636901855,
      "learning_rate": 2.7527113878899055e-07,
      "loss": 2.9581,
      "step": 227261
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6697659492492676,
      "learning_rate": 2.7509597312208585e-07,
      "loss": 2.9123,
      "step": 227262
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.785634756088257,
      "learning_rate": 2.749208631797062e-07,
      "loss": 3.0971,
      "step": 227263
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.656787395477295,
      "learning_rate": 2.7474580896188483e-07,
      "loss": 3.057,
      "step": 227264
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7241597175598145,
      "learning_rate": 2.7457081046865505e-07,
      "loss": 2.9159,
      "step": 227265
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.788809061050415,
      "learning_rate": 2.743958677000169e-07,
      "loss": 2.8674,
      "step": 227266
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2217228412628174,
      "learning_rate": 2.742209806560369e-07,
      "loss": 2.9291,
      "step": 227267
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.546334743499756,
      "learning_rate": 2.740461493367152e-07,
      "loss": 2.8068,
      "step": 227268
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.484419107437134,
      "learning_rate": 2.7387137374211834e-07,
      "loss": 2.8356,
      "step": 227269
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8408453464508057,
      "learning_rate": 2.7369665387227956e-07,
      "loss": 3.0847,
      "step": 227270
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6643643379211426,
      "learning_rate": 2.7352198972723225e-07,
      "loss": 3.3828,
      "step": 227271
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.870763063430786,
      "learning_rate": 2.733473813069764e-07,
      "loss": 3.2084,
      "step": 227272
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.753406286239624,
      "learning_rate": 2.7317282861154534e-07,
      "loss": 2.8577,
      "step": 227273
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.235109806060791,
      "learning_rate": 2.729983316410056e-07,
      "loss": 3.0021,
      "step": 227274
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.5776569843292236,
      "learning_rate": 2.7282389039539054e-07,
      "loss": 2.9125,
      "step": 227275
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.817448854446411,
      "learning_rate": 2.7264950487470016e-07,
      "loss": 3.0419,
      "step": 227276
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.869314193725586,
      "learning_rate": 2.7247517507896775e-07,
      "loss": 2.8113,
      "step": 227277
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.292210578918457,
      "learning_rate": 2.7230090100825996e-07,
      "loss": 2.9721,
      "step": 227278
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.464669704437256,
      "learning_rate": 2.721266826625768e-07,
      "loss": 2.9469,
      "step": 227279
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.846745252609253,
      "learning_rate": 2.7195252004195146e-07,
      "loss": 3.0509,
      "step": 227280
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.374744176864624,
      "learning_rate": 2.7177841314645065e-07,
      "loss": 3.1821,
      "step": 227281
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.5328404903411865,
      "learning_rate": 2.7160436197607435e-07,
      "loss": 2.8765,
      "step": 227282
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8304507732391357,
      "learning_rate": 2.714303665308892e-07,
      "loss": 3.0238,
      "step": 227283
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8662643432617188,
      "learning_rate": 2.712564268108619e-07,
      "loss": 2.8,
      "step": 227284
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6322882175445557,
      "learning_rate": 2.7108254281609234e-07,
      "loss": 2.9883,
      "step": 227285
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.862609624862671,
      "learning_rate": 2.709087145465805e-07,
      "loss": 3.0239,
      "step": 227286
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9635872840881348,
      "learning_rate": 2.707349420023597e-07,
      "loss": 3.1195,
      "step": 227287
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.810964822769165,
      "learning_rate": 2.705612251834632e-07,
      "loss": 2.8286,
      "step": 227288
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.489278793334961,
      "learning_rate": 2.703875640899578e-07,
      "loss": 2.8629,
      "step": 227289
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.836477756500244,
      "learning_rate": 2.7021395872180997e-07,
      "loss": 2.6941,
      "step": 227290
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0964646339416504,
      "learning_rate": 2.700404090790864e-07,
      "loss": 3.2534,
      "step": 227291
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.5592832565307617,
      "learning_rate": 2.6986691516185376e-07,
      "loss": 2.8187,
      "step": 227292
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.452249050140381,
      "learning_rate": 2.696934769700787e-07,
      "loss": 2.7145,
      "step": 227293
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8382906913757324,
      "learning_rate": 2.695200945038278e-07,
      "loss": 2.673,
      "step": 227294
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9418699741363525,
      "learning_rate": 2.693467677631345e-07,
      "loss": 3.1816,
      "step": 227295
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.4991912841796875,
      "learning_rate": 2.691734967480319e-07,
      "loss": 2.7746,
      "step": 227296
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.7756247520446777,
      "learning_rate": 2.6900028145855346e-07,
      "loss": 2.8769,
      "step": 227297
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.3345816135406494,
      "learning_rate": 2.6882712189473243e-07,
      "loss": 2.9894,
      "step": 227298
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0012011528015137,
      "learning_rate": 2.686540180565688e-07,
      "loss": 2.8881,
      "step": 227299
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.3434031009674072,
      "learning_rate": 2.684809699441626e-07,
      "loss": 2.6046,
      "step": 227300
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.267621040344238,
      "learning_rate": 2.6830797755744707e-07,
      "loss": 3.0768,
      "step": 227301
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9447391033172607,
      "learning_rate": 2.6813504089655544e-07,
      "loss": 2.8727,
      "step": 227302
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6293370723724365,
      "learning_rate": 2.679621599614545e-07,
      "loss": 2.8503,
      "step": 227303
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9215705394744873,
      "learning_rate": 2.6778933475221085e-07,
      "loss": 2.8111,
      "step": 227304
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.5875444412231445,
      "learning_rate": 2.676165652688245e-07,
      "loss": 3.0745,
      "step": 227305
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1442434787750244,
      "learning_rate": 2.6744385151136195e-07,
      "loss": 3.1338,
      "step": 227306
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.937842845916748,
      "learning_rate": 2.672711934798233e-07,
      "loss": 2.6705,
      "step": 227307
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0027997493743896,
      "learning_rate": 2.670985911742751e-07,
      "loss": 2.8391,
      "step": 227308
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.761481523513794,
      "learning_rate": 2.6692604459471747e-07,
      "loss": 2.8207,
      "step": 227309
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6862406730651855,
      "learning_rate": 2.667535537412169e-07,
      "loss": 3.1533,
      "step": 227310
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.941384792327881,
      "learning_rate": 2.665811186137734e-07,
      "loss": 2.8833,
      "step": 227311
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0364320278167725,
      "learning_rate": 2.6640873921242034e-07,
      "loss": 2.9309,
      "step": 227312
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.171631097793579,
      "learning_rate": 2.66236415537191e-07,
      "loss": 2.8497,
      "step": 227313
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.253173351287842,
      "learning_rate": 2.66064147588152e-07,
      "loss": 2.9474,
      "step": 227314
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9347102642059326,
      "learning_rate": 2.6589193536530327e-07,
      "loss": 2.9436,
      "step": 227315
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9951889514923096,
      "learning_rate": 2.6571977886867825e-07,
      "loss": 2.7329,
      "step": 227316
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1448988914489746,
      "learning_rate": 2.655476780983101e-07,
      "loss": 2.8701,
      "step": 227317
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.77787184715271,
      "learning_rate": 2.6537563305423224e-07,
      "loss": 2.9312,
      "step": 227318
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0093026161193848,
      "learning_rate": 2.6520364373647794e-07,
      "loss": 2.8685,
      "step": 227319
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8428947925567627,
      "learning_rate": 2.6503171014511384e-07,
      "loss": 2.8428,
      "step": 227320
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8518972396850586,
      "learning_rate": 2.648598322801065e-07,
      "loss": 3.0376,
      "step": 227321
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.102025270462036,
      "learning_rate": 2.6468801014152275e-07,
      "loss": 2.8017,
      "step": 227322
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2982969284057617,
      "learning_rate": 2.645162437293957e-07,
      "loss": 2.9096,
      "step": 227323
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7206273078918457,
      "learning_rate": 2.643445330437588e-07,
      "loss": 2.841,
      "step": 227324
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.3938398361206055,
      "learning_rate": 2.6417287808461195e-07,
      "loss": 2.7303,
      "step": 227325
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0921480655670166,
      "learning_rate": 2.640012788520218e-07,
      "loss": 2.7495,
      "step": 227326
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0756914615631104,
      "learning_rate": 2.638297353460217e-07,
      "loss": 2.94,
      "step": 227327
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.687051773071289,
      "learning_rate": 2.636582475666449e-07,
      "loss": 2.9179,
      "step": 227328
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.993818759918213,
      "learning_rate": 2.6348681551389137e-07,
      "loss": 3.1911,
      "step": 227329
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.049323320388794,
      "learning_rate": 2.6331543918782784e-07,
      "loss": 3.1368,
      "step": 227330
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.077967882156372,
      "learning_rate": 2.631441185884875e-07,
      "loss": 2.7966,
      "step": 227331
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.892911672592163,
      "learning_rate": 2.629728537158704e-07,
      "loss": 3.0975,
      "step": 227332
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0685181617736816,
      "learning_rate": 2.6280164457000985e-07,
      "loss": 2.8436,
      "step": 227333
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.895702362060547,
      "learning_rate": 2.6263049115097245e-07,
      "loss": 2.7414,
      "step": 227334
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1100094318389893,
      "learning_rate": 2.6245939345875824e-07,
      "loss": 3.0094,
      "step": 227335
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0199062824249268,
      "learning_rate": 2.6228835149340045e-07,
      "loss": 3.0721,
      "step": 227336
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.5812766551971436,
      "learning_rate": 2.6211736525496575e-07,
      "loss": 2.8133,
      "step": 227337
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.897437810897827,
      "learning_rate": 2.6194643474345413e-07,
      "loss": 3.0148,
      "step": 227338
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.856841802597046,
      "learning_rate": 2.617755599588989e-07,
      "loss": 2.7113,
      "step": 227339
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9761359691619873,
      "learning_rate": 2.616047409013333e-07,
      "loss": 3.1444,
      "step": 227340
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.929704189300537,
      "learning_rate": 2.614339775707908e-07,
      "loss": 3.0326,
      "step": 227341
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6367909908294678,
      "learning_rate": 2.6126326996730453e-07,
      "loss": 3.1098,
      "step": 227342
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.5868639945983887,
      "learning_rate": 2.610926180909412e-07,
      "loss": 3.204,
      "step": 227343
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.9607744216918945,
      "learning_rate": 2.6092202194166745e-07,
      "loss": 2.9743,
      "step": 227344
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.741586685180664,
      "learning_rate": 2.607514815195499e-07,
      "loss": 3.018,
      "step": 227345
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2032856941223145,
      "learning_rate": 2.6058099682462197e-07,
      "loss": 2.8369,
      "step": 227346
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8319692611694336,
      "learning_rate": 2.6041056785691685e-07,
      "loss": 3.0708,
      "step": 227347
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0434463024139404,
      "learning_rate": 2.6024019461643455e-07,
      "loss": 2.7331,
      "step": 227348
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.022376298904419,
      "learning_rate": 2.6006987710324167e-07,
      "loss": 2.782,
      "step": 227349
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.8967957496643066,
      "learning_rate": 2.5989961531737157e-07,
      "loss": 3.0073,
      "step": 227350
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.854119300842285,
      "learning_rate": 2.5972940925885754e-07,
      "loss": 2.7596,
      "step": 227351
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.15838360786438,
      "learning_rate": 2.595592589276996e-07,
      "loss": 2.9209,
      "step": 227352
    },
    {
      "epoch": 2.96,
      "grad_norm": 7.199189186096191,
      "learning_rate": 2.59389164323931e-07,
      "loss": 2.8699,
      "step": 227353
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.2814860343933105,
      "learning_rate": 2.5921912544761834e-07,
      "loss": 2.8186,
      "step": 227354
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.985015392303467,
      "learning_rate": 2.5904914229876175e-07,
      "loss": 2.6874,
      "step": 227355
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9590373039245605,
      "learning_rate": 2.5887921487742767e-07,
      "loss": 2.9989,
      "step": 227356
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0354323387145996,
      "learning_rate": 2.5870934318361627e-07,
      "loss": 2.7805,
      "step": 227357
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.919705867767334,
      "learning_rate": 2.585395272173607e-07,
      "loss": 2.741,
      "step": 227358
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8802943229675293,
      "learning_rate": 2.5836976697872767e-07,
      "loss": 2.9629,
      "step": 227359
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.942732095718384,
      "learning_rate": 2.582000624676839e-07,
      "loss": 2.8759,
      "step": 227360
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1480765342712402,
      "learning_rate": 2.580304136843292e-07,
      "loss": 2.9512,
      "step": 227361
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8577983379364014,
      "learning_rate": 2.5786082062866364e-07,
      "loss": 2.9416,
      "step": 227362
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9185826778411865,
      "learning_rate": 2.5769128330068724e-07,
      "loss": 2.8192,
      "step": 227363
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1784627437591553,
      "learning_rate": 2.5752180170049986e-07,
      "loss": 2.9224,
      "step": 227364
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.029984712600708,
      "learning_rate": 2.5735237582806825e-07,
      "loss": 3.0755,
      "step": 227365
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.992940902709961,
      "learning_rate": 2.5718300568345895e-07,
      "loss": 2.9652,
      "step": 227366
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.913571834564209,
      "learning_rate": 2.5701369126670536e-07,
      "loss": 2.957,
      "step": 227367
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8149631023406982,
      "learning_rate": 2.568444325778407e-07,
      "loss": 2.8293,
      "step": 227368
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0585150718688965,
      "learning_rate": 2.5667522961689833e-07,
      "loss": 2.7352,
      "step": 227369
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.3304808139801025,
      "learning_rate": 2.565060823838783e-07,
      "loss": 3.0045,
      "step": 227370
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.970215082168579,
      "learning_rate": 2.5633699087881375e-07,
      "loss": 2.8041,
      "step": 227371
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1398820877075195,
      "learning_rate": 2.561679551017715e-07,
      "loss": 2.7011,
      "step": 227372
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.950613021850586,
      "learning_rate": 2.5599897505278466e-07,
      "loss": 2.609,
      "step": 227373
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.210165500640869,
      "learning_rate": 2.5583005073182006e-07,
      "loss": 2.8752,
      "step": 227374
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0712170600891113,
      "learning_rate": 2.5566118213897757e-07,
      "loss": 3.1738,
      "step": 227375
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.499032974243164,
      "learning_rate": 2.5549236927429053e-07,
      "loss": 2.6597,
      "step": 227376
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.408353567123413,
      "learning_rate": 2.5532361213772556e-07,
      "loss": 2.8351,
      "step": 227377
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.651442050933838,
      "learning_rate": 2.5515491072938264e-07,
      "loss": 2.8248,
      "step": 227378
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.480769157409668,
      "learning_rate": 2.549862650492618e-07,
      "loss": 2.6565,
      "step": 227379
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.413688898086548,
      "learning_rate": 2.548176750973963e-07,
      "loss": 3.1449,
      "step": 227380
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7415878772735596,
      "learning_rate": 2.546491408737861e-07,
      "loss": 2.8623,
      "step": 227381
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6876230239868164,
      "learning_rate": 2.5448066237853114e-07,
      "loss": 3.0117,
      "step": 227382
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7942659854888916,
      "learning_rate": 2.543122396116315e-07,
      "loss": 2.9581,
      "step": 227383
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1246328353881836,
      "learning_rate": 2.5414387257308713e-07,
      "loss": 2.7838,
      "step": 227384
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0101544857025146,
      "learning_rate": 2.539755612629646e-07,
      "loss": 2.8595,
      "step": 227385
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8337366580963135,
      "learning_rate": 2.538073056812973e-07,
      "loss": 3.0576,
      "step": 227386
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6021838188171387,
      "learning_rate": 2.536391058280851e-07,
      "loss": 3.1226,
      "step": 227387
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.788820743560791,
      "learning_rate": 2.5347096170339474e-07,
      "loss": 2.8364,
      "step": 227388
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7544522285461426,
      "learning_rate": 2.533028733072262e-07,
      "loss": 2.9685,
      "step": 227389
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.960895538330078,
      "learning_rate": 2.5313484063964606e-07,
      "loss": 2.849,
      "step": 227390
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2300684452056885,
      "learning_rate": 2.5296686370065435e-07,
      "loss": 3.0032,
      "step": 227391
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.772122859954834,
      "learning_rate": 2.5279894249031764e-07,
      "loss": 2.952,
      "step": 227392
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7561757564544678,
      "learning_rate": 2.526310770086026e-07,
      "loss": 2.7975,
      "step": 227393
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.141547679901123,
      "learning_rate": 2.5246326725560926e-07,
      "loss": 2.8723,
      "step": 227394
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.6994152069091797,
      "learning_rate": 2.5229551323133754e-07,
      "loss": 3.0304,
      "step": 227395
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9657673835754395,
      "learning_rate": 2.521278149358208e-07,
      "loss": 2.8405,
      "step": 227396
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9125962257385254,
      "learning_rate": 2.5196017236909225e-07,
      "loss": 2.9124,
      "step": 227397
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.100278854370117,
      "learning_rate": 2.5179258553118533e-07,
      "loss": 2.8735,
      "step": 227398
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2774646282196045,
      "learning_rate": 2.516250544221332e-07,
      "loss": 3.0759,
      "step": 227399
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.572765827178955,
      "learning_rate": 2.51457579041936e-07,
      "loss": 2.7507,
      "step": 227400
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9994871616363525,
      "learning_rate": 2.5129015939069354e-07,
      "loss": 2.8593,
      "step": 227401
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.259246587753296,
      "learning_rate": 2.5112279546837255e-07,
      "loss": 2.7609,
      "step": 227402
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.7733101844787598,
      "learning_rate": 2.5095548727500635e-07,
      "loss": 3.0476,
      "step": 227403
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.138468027114868,
      "learning_rate": 2.5078823481066155e-07,
      "loss": 3.004,
      "step": 227404
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.3098020553588867,
      "learning_rate": 2.506210380753715e-07,
      "loss": 2.7737,
      "step": 227405
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6555497646331787,
      "learning_rate": 2.5045389706910277e-07,
      "loss": 3.1136,
      "step": 227406
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.971985340118408,
      "learning_rate": 2.5028681179198875e-07,
      "loss": 2.7642,
      "step": 227407
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.5835559368133545,
      "learning_rate": 2.5011978224396266e-07,
      "loss": 2.7262,
      "step": 227408
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.706538438796997,
      "learning_rate": 2.4995280842512457e-07,
      "loss": 2.7159,
      "step": 227409
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.558717727661133,
      "learning_rate": 2.4978589033547437e-07,
      "loss": 2.8699,
      "step": 227410
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8574631214141846,
      "learning_rate": 2.496190279750454e-07,
      "loss": 3.0151,
      "step": 227411
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.720902919769287,
      "learning_rate": 2.494522213438377e-07,
      "loss": 2.8974,
      "step": 227412
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9829189777374268,
      "learning_rate": 2.4928547044195115e-07,
      "loss": 2.8745,
      "step": 227413
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9434971809387207,
      "learning_rate": 2.4911877526938575e-07,
      "loss": 2.8409,
      "step": 227414
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.586700916290283,
      "learning_rate": 2.489521358261415e-07,
      "loss": 3.0732,
      "step": 227415
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.981954574584961,
      "learning_rate": 2.48785552112285e-07,
      "loss": 2.9878,
      "step": 227416
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.835383892059326,
      "learning_rate": 2.4861902412784964e-07,
      "loss": 2.7932,
      "step": 227417
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6681551933288574,
      "learning_rate": 2.484525518728353e-07,
      "loss": 2.9889,
      "step": 227418
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.5482540130615234,
      "learning_rate": 2.4828613534730867e-07,
      "loss": 2.9279,
      "step": 227419
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2577123641967773,
      "learning_rate": 2.4811977455126974e-07,
      "loss": 2.7859,
      "step": 227420
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9761099815368652,
      "learning_rate": 2.479534694847518e-07,
      "loss": 2.8448,
      "step": 227421
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.5513253211975098,
      "learning_rate": 2.477872201478215e-07,
      "loss": 2.8516,
      "step": 227422
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.096238374710083,
      "learning_rate": 2.4762102654047876e-07,
      "loss": 3.0705,
      "step": 227423
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.5011603832244873,
      "learning_rate": 2.4745488866279026e-07,
      "loss": 2.96,
      "step": 227424
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.711167097091675,
      "learning_rate": 2.472888065147227e-07,
      "loss": 2.7127,
      "step": 227425
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8708107471466064,
      "learning_rate": 2.4712278009634267e-07,
      "loss": 3.0199,
      "step": 227426
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.199685573577881,
      "learning_rate": 2.4695680940768347e-07,
      "loss": 2.8957,
      "step": 227427
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.998913049697876,
      "learning_rate": 2.467908944487784e-07,
      "loss": 2.9986,
      "step": 227428
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6906075477600098,
      "learning_rate": 2.466250352196608e-07,
      "loss": 2.9996,
      "step": 227429
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8968420028686523,
      "learning_rate": 2.4645923172033065e-07,
      "loss": 2.7154,
      "step": 227430
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.016085624694824,
      "learning_rate": 2.4629348395088787e-07,
      "loss": 2.9784,
      "step": 227431
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.333573579788208,
      "learning_rate": 2.4612779191126587e-07,
      "loss": 2.9436,
      "step": 227432
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.3292362689971924,
      "learning_rate": 2.459621556015645e-07,
      "loss": 2.6547,
      "step": 227433
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1453857421875,
      "learning_rate": 2.4579657502181714e-07,
      "loss": 2.8611,
      "step": 227434
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.869565486907959,
      "learning_rate": 2.4563105017202377e-07,
      "loss": 2.8199,
      "step": 227435
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6355748176574707,
      "learning_rate": 2.4546558105221767e-07,
      "loss": 3.1225,
      "step": 227436
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.799680709838867,
      "learning_rate": 2.453001676624322e-07,
      "loss": 3.174,
      "step": 227437
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.124030590057373,
      "learning_rate": 2.451348100027006e-07,
      "loss": 2.8654,
      "step": 227438
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1435704231262207,
      "learning_rate": 2.449695080730896e-07,
      "loss": 2.9839,
      "step": 227439
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7324113845825195,
      "learning_rate": 2.4480426187356574e-07,
      "loss": 2.8375,
      "step": 227440
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6196017265319824,
      "learning_rate": 2.446390714041957e-07,
      "loss": 3.0928,
      "step": 227441
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.139737367630005,
      "learning_rate": 2.444739366650128e-07,
      "loss": 2.9983,
      "step": 227442
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.18753719329834,
      "learning_rate": 2.4430885765605034e-07,
      "loss": 2.9152,
      "step": 227443
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.3401544094085693,
      "learning_rate": 2.4414383437730834e-07,
      "loss": 3.0043,
      "step": 227444
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.432274580001831,
      "learning_rate": 2.4397886682885335e-07,
      "loss": 3.0438,
      "step": 227445
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.5324761867523193,
      "learning_rate": 2.4381395501068544e-07,
      "loss": 3.0261,
      "step": 227446
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8946590423583984,
      "learning_rate": 2.436490989228712e-07,
      "loss": 3.0739,
      "step": 227447
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.746124505996704,
      "learning_rate": 2.434842985654106e-07,
      "loss": 2.7035,
      "step": 227448
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.3961639404296875,
      "learning_rate": 2.43319553938337e-07,
      "loss": 2.9287,
      "step": 227449
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.202481985092163,
      "learning_rate": 2.431548650416837e-07,
      "loss": 3.1061,
      "step": 227450
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9414007663726807,
      "learning_rate": 2.4299023187551726e-07,
      "loss": 2.8814,
      "step": 227451
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.69265079498291,
      "learning_rate": 2.428256544398044e-07,
      "loss": 2.8159,
      "step": 227452
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7149746417999268,
      "learning_rate": 2.42661132734645e-07,
      "loss": 2.6774,
      "step": 227453
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.4099385738372803,
      "learning_rate": 2.424966667600059e-07,
      "loss": 2.9212,
      "step": 227454
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.128986120223999,
      "learning_rate": 2.4233225651595355e-07,
      "loss": 3.1576,
      "step": 227455
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9584240913391113,
      "learning_rate": 2.42167902002488e-07,
      "loss": 2.8585,
      "step": 227456
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.570833444595337,
      "learning_rate": 2.4200360321967595e-07,
      "loss": 2.7806,
      "step": 227457
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.956968069076538,
      "learning_rate": 2.418393601675506e-07,
      "loss": 3.0257,
      "step": 227458
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8899433612823486,
      "learning_rate": 2.4167517284611194e-07,
      "loss": 2.8874,
      "step": 227459
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.113814115524292,
      "learning_rate": 2.415110412553933e-07,
      "loss": 3.0509,
      "step": 227460
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.099801778793335,
      "learning_rate": 2.413469653954614e-07,
      "loss": 2.9935,
      "step": 227461
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.029890298843384,
      "learning_rate": 2.411829452663161e-07,
      "loss": 2.8783,
      "step": 227462
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.355405807495117,
      "learning_rate": 2.4101898086799074e-07,
      "loss": 2.9957,
      "step": 227463
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.357548475265503,
      "learning_rate": 2.408550722005187e-07,
      "loss": 2.8465,
      "step": 227464
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7985386848449707,
      "learning_rate": 2.406912192638999e-07,
      "loss": 2.9522,
      "step": 227465
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8447229862213135,
      "learning_rate": 2.4052742205823427e-07,
      "loss": 2.9128,
      "step": 227466
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.5843563079833984,
      "learning_rate": 2.403636805834885e-07,
      "loss": 2.9668,
      "step": 227467
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.513862133026123,
      "learning_rate": 2.4019999483972927e-07,
      "loss": 2.9379,
      "step": 227468
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.209016799926758,
      "learning_rate": 2.4003636482698983e-07,
      "loss": 2.9569,
      "step": 227469
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.562993049621582,
      "learning_rate": 2.398727905452369e-07,
      "loss": 2.8757,
      "step": 227470
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.62739372253418,
      "learning_rate": 2.3970927199460363e-07,
      "loss": 2.9942,
      "step": 227471
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.7917656898498535,
      "learning_rate": 2.395458091750568e-07,
      "loss": 2.7569,
      "step": 227472
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9703023433685303,
      "learning_rate": 2.393824020866297e-07,
      "loss": 2.8491,
      "step": 227473
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.8896965980529785,
      "learning_rate": 2.3921905072932236e-07,
      "loss": 2.8995,
      "step": 227474
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8505594730377197,
      "learning_rate": 2.390557551032679e-07,
      "loss": 3.0145,
      "step": 227475
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2625415325164795,
      "learning_rate": 2.388925152083665e-07,
      "loss": 3.0417,
      "step": 227476
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1763057708740234,
      "learning_rate": 2.387293310447513e-07,
      "loss": 2.9796,
      "step": 227477
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.720977306365967,
      "learning_rate": 2.3856620261242243e-07,
      "loss": 3.0762,
      "step": 227478
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1122119426727295,
      "learning_rate": 2.3840312991137978e-07,
      "loss": 2.8444,
      "step": 227479
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7682695388793945,
      "learning_rate": 2.3824011294169e-07,
      "loss": 2.8472,
      "step": 227480
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7745416164398193,
      "learning_rate": 2.3807715170338636e-07,
      "loss": 2.8555,
      "step": 227481
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.062609672546387,
      "learning_rate": 2.379142461964356e-07,
      "loss": 2.9201,
      "step": 227482
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7081596851348877,
      "learning_rate": 2.3775139642097096e-07,
      "loss": 3.0729,
      "step": 227483
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7545297145843506,
      "learning_rate": 2.3758860237692578e-07,
      "loss": 2.972,
      "step": 227484
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.875959634780884,
      "learning_rate": 2.374258640643667e-07,
      "loss": 2.9326,
      "step": 227485
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8132903575897217,
      "learning_rate": 2.3726318148336032e-07,
      "loss": 2.7649,
      "step": 227486
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2694363594055176,
      "learning_rate": 2.3710055463390664e-07,
      "loss": 2.9594,
      "step": 227487
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1037518978118896,
      "learning_rate": 2.369379835160057e-07,
      "loss": 2.9366,
      "step": 227488
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.972886323928833,
      "learning_rate": 2.3677546812972403e-07,
      "loss": 2.9841,
      "step": 227489
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.5547897815704346,
      "learning_rate": 2.3661300847509501e-07,
      "loss": 2.5913,
      "step": 227490
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.597635269165039,
      "learning_rate": 2.3645060455211862e-07,
      "loss": 3.1181,
      "step": 227491
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.89361310005188,
      "learning_rate": 2.3628825636086145e-07,
      "loss": 2.9739,
      "step": 227492
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.6902191638946533,
      "learning_rate": 2.3612596390135685e-07,
      "loss": 2.9607,
      "step": 227493
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8919005393981934,
      "learning_rate": 2.3596372717357147e-07,
      "loss": 3.0904,
      "step": 227494
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9274709224700928,
      "learning_rate": 2.3580154617760526e-07,
      "loss": 2.8646,
      "step": 227495
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.563361883163452,
      "learning_rate": 2.3563942091345822e-07,
      "loss": 2.8712,
      "step": 227496
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.58538556098938,
      "learning_rate": 2.3547735138116365e-07,
      "loss": 2.6779,
      "step": 227497
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.733201742172241,
      "learning_rate": 2.3531533758075482e-07,
      "loss": 3.2195,
      "step": 227498
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.7427217960357666,
      "learning_rate": 2.3515337951223178e-07,
      "loss": 2.884,
      "step": 227499
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.681783676147461,
      "learning_rate": 2.3499147717569443e-07,
      "loss": 3.029,
      "step": 227500
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.9249119758605957,
      "learning_rate": 2.3482963057110948e-07,
      "loss": 3.1205,
      "step": 227501
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7879488468170166,
      "learning_rate": 2.3466783969854354e-07,
      "loss": 3.1214,
      "step": 227502
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.694033622741699,
      "learning_rate": 2.3450610455799656e-07,
      "loss": 2.984,
      "step": 227503
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0287716388702393,
      "learning_rate": 2.343444251495019e-07,
      "loss": 3.0638,
      "step": 227504
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0038983821868896,
      "learning_rate": 2.3418280147312618e-07,
      "loss": 3.0655,
      "step": 227505
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9636166095733643,
      "learning_rate": 2.3402123352886937e-07,
      "loss": 3.0594,
      "step": 227506
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7562766075134277,
      "learning_rate": 2.3385972131676478e-07,
      "loss": 2.812,
      "step": 227507
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.092827320098877,
      "learning_rate": 2.3369826483681242e-07,
      "loss": 2.5349,
      "step": 227508
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0813653469085693,
      "learning_rate": 2.335368640891122e-07,
      "loss": 2.8723,
      "step": 227509
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.5037689208984375,
      "learning_rate": 2.3337551907363084e-07,
      "loss": 2.6851,
      "step": 227510
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.009136199951172,
      "learning_rate": 2.3321422979043492e-07,
      "loss": 3.0777,
      "step": 227511
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9515931606292725,
      "learning_rate": 2.3305299623955775e-07,
      "loss": 2.9343,
      "step": 227512
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.051262378692627,
      "learning_rate": 2.3289181842099935e-07,
      "loss": 2.6855,
      "step": 227513
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6785728931427,
      "learning_rate": 2.3273069633482633e-07,
      "loss": 2.858,
      "step": 227514
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8982903957366943,
      "learning_rate": 2.3256962998100537e-07,
      "loss": 2.8259,
      "step": 227515
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.3914408683776855,
      "learning_rate": 2.324086193596364e-07,
      "loss": 3.0011,
      "step": 227516
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9297776222229004,
      "learning_rate": 2.322476644707194e-07,
      "loss": 3.0449,
      "step": 227517
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.566995859146118,
      "learning_rate": 2.320867653142877e-07,
      "loss": 2.9443,
      "step": 227518
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.823467254638672,
      "learning_rate": 2.3192592189037463e-07,
      "loss": 3.1648,
      "step": 227519
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.108231544494629,
      "learning_rate": 2.3176513419898013e-07,
      "loss": 2.9476,
      "step": 227520
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9356653690338135,
      "learning_rate": 2.3160440224017086e-07,
      "loss": 3.0608,
      "step": 227521
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8950345516204834,
      "learning_rate": 2.314437260139801e-07,
      "loss": 3.0718,
      "step": 227522
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.524048089981079,
      "learning_rate": 2.3128310552040786e-07,
      "loss": 2.8975,
      "step": 227523
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6812472343444824,
      "learning_rate": 2.3112254075952076e-07,
      "loss": 2.5728,
      "step": 227524
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.673961639404297,
      "learning_rate": 2.3096203173128548e-07,
      "loss": 3.1595,
      "step": 227525
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.717789888381958,
      "learning_rate": 2.3080157843580193e-07,
      "loss": 2.8594,
      "step": 227526
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9894680976867676,
      "learning_rate": 2.3064118087307016e-07,
      "loss": 2.7532,
      "step": 227527
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.580305576324463,
      "learning_rate": 2.304808390431234e-07,
      "loss": 3.0414,
      "step": 227528
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8577880859375,
      "learning_rate": 2.3032055294596174e-07,
      "loss": 2.8729,
      "step": 227529
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0097460746765137,
      "learning_rate": 2.3016032258165172e-07,
      "loss": 2.859,
      "step": 227530
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.4322521686553955,
      "learning_rate": 2.300001479502267e-07,
      "loss": 3.0251,
      "step": 227531
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.5529632568359375,
      "learning_rate": 2.2984002905171994e-07,
      "loss": 2.8528,
      "step": 227532
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2178637981414795,
      "learning_rate": 2.2967996588609817e-07,
      "loss": 2.8425,
      "step": 227533
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.541006088256836,
      "learning_rate": 2.2951995845349457e-07,
      "loss": 3.1588,
      "step": 227534
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9125237464904785,
      "learning_rate": 2.2936000675384257e-07,
      "loss": 2.8511,
      "step": 227535
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.942606210708618,
      "learning_rate": 2.292001107872088e-07,
      "loss": 2.9484,
      "step": 227536
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.404916763305664,
      "learning_rate": 2.290402705536265e-07,
      "loss": 3.1364,
      "step": 227537
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1738500595092773,
      "learning_rate": 2.2888048605316234e-07,
      "loss": 2.6188,
      "step": 227538
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.5699002742767334,
      "learning_rate": 2.2872075728574968e-07,
      "loss": 2.9533,
      "step": 227539
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9398319721221924,
      "learning_rate": 2.2856108425152177e-07,
      "loss": 3.0128,
      "step": 227540
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0340218544006348,
      "learning_rate": 2.2840146695044525e-07,
      "loss": 2.8188,
      "step": 227541
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.671999931335449,
      "learning_rate": 2.282419053825535e-07,
      "loss": 2.9042,
      "step": 227542
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.845447063446045,
      "learning_rate": 2.2808239954791306e-07,
      "loss": 3.0046,
      "step": 227543
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7855687141418457,
      "learning_rate": 2.27922949446524e-07,
      "loss": 2.8006,
      "step": 227544
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.459530830383301,
      "learning_rate": 2.2776355507841958e-07,
      "loss": 3.0628,
      "step": 227545
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.749459981918335,
      "learning_rate": 2.2760421644363313e-07,
      "loss": 2.8332,
      "step": 227546
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.88481068611145,
      "learning_rate": 2.2744493354219794e-07,
      "loss": 2.8948,
      "step": 227547
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8646180629730225,
      "learning_rate": 2.2728570637414732e-07,
      "loss": 3.2174,
      "step": 227548
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.280658006668091,
      "learning_rate": 2.271265349394813e-07,
      "loss": 2.9239,
      "step": 227549
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9914562702178955,
      "learning_rate": 2.2696741923826646e-07,
      "loss": 3.0023,
      "step": 227550
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7241411209106445,
      "learning_rate": 2.2680835927050278e-07,
      "loss": 3.0071,
      "step": 227551
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7485532760620117,
      "learning_rate": 2.2664935503625693e-07,
      "loss": 2.8508,
      "step": 227552
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.2265119552612305,
      "learning_rate": 2.2649040653552885e-07,
      "loss": 2.8234,
      "step": 227553
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.515650987625122,
      "learning_rate": 2.263315137683519e-07,
      "loss": 3.1853,
      "step": 227554
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.307605504989624,
      "learning_rate": 2.2617267673472605e-07,
      "loss": 3.1676,
      "step": 227555
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.865973949432373,
      "learning_rate": 2.2601389543475125e-07,
      "loss": 2.6955,
      "step": 227556
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.155575752258301,
      "learning_rate": 2.2585516986842745e-07,
      "loss": 3.1632,
      "step": 227557
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.069061040878296,
      "learning_rate": 2.256965000357547e-07,
      "loss": 3.0858,
      "step": 227558
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.4044485092163086,
      "learning_rate": 2.2553788593679956e-07,
      "loss": 3.1176,
      "step": 227559
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0933313369750977,
      "learning_rate": 2.2537932757156207e-07,
      "loss": 2.8951,
      "step": 227560
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9470620155334473,
      "learning_rate": 2.2522082494007555e-07,
      "loss": 2.888,
      "step": 227561
    },
    {
      "epoch": 2.96,
      "grad_norm": 5.174045085906982,
      "learning_rate": 2.2506237804237325e-07,
      "loss": 3.051,
      "step": 227562
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7552430629730225,
      "learning_rate": 2.2490398687852184e-07,
      "loss": 2.9181,
      "step": 227563
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9226109981536865,
      "learning_rate": 2.247456514485213e-07,
      "loss": 2.9882,
      "step": 227564
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9510908126831055,
      "learning_rate": 2.2458737175237164e-07,
      "loss": 3.1852,
      "step": 227565
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.038459300994873,
      "learning_rate": 2.2442914779013942e-07,
      "loss": 3.0161,
      "step": 227566
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9171316623687744,
      "learning_rate": 2.24270979561858e-07,
      "loss": 2.8839,
      "step": 227567
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9500343799591064,
      "learning_rate": 2.241128670675274e-07,
      "loss": 2.9161,
      "step": 227568
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8779332637786865,
      "learning_rate": 2.239548103071809e-07,
      "loss": 2.8492,
      "step": 227569
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0019516944885254,
      "learning_rate": 2.2379680928088507e-07,
      "loss": 2.7738,
      "step": 227570
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0910637378692627,
      "learning_rate": 2.2363886398863996e-07,
      "loss": 3.2548,
      "step": 227571
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.5297584533691406,
      "learning_rate": 2.2348097443047885e-07,
      "loss": 2.8083,
      "step": 227572
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0807697772979736,
      "learning_rate": 2.233231406064351e-07,
      "loss": 2.9107,
      "step": 227573
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.938063621520996,
      "learning_rate": 2.2316536251650864e-07,
      "loss": 2.8566,
      "step": 227574
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.845994710922241,
      "learning_rate": 2.230076401607994e-07,
      "loss": 2.8903,
      "step": 227575
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8498408794403076,
      "learning_rate": 2.2284997353924084e-07,
      "loss": 2.7566,
      "step": 227576
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9749412536621094,
      "learning_rate": 2.226923626519328e-07,
      "loss": 2.5724,
      "step": 227577
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.295413017272949,
      "learning_rate": 2.2253480749890861e-07,
      "loss": 2.7346,
      "step": 227578
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.7401716709136963,
      "learning_rate": 2.22377308080135e-07,
      "loss": 3.3125,
      "step": 227579
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6059134006500244,
      "learning_rate": 2.2221986439571182e-07,
      "loss": 2.8748,
      "step": 227580
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.904038667678833,
      "learning_rate": 2.2206247644563912e-07,
      "loss": 2.6194,
      "step": 227581
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.526890993118286,
      "learning_rate": 2.219051442299169e-07,
      "loss": 2.9502,
      "step": 227582
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.953634023666382,
      "learning_rate": 2.2174786774861174e-07,
      "loss": 3.1797,
      "step": 227583
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1211283206939697,
      "learning_rate": 2.21590647001757e-07,
      "loss": 2.8242,
      "step": 227584
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0890820026397705,
      "learning_rate": 2.2143348198935263e-07,
      "loss": 3.2054,
      "step": 227585
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.769552707672119,
      "learning_rate": 2.2127637271143195e-07,
      "loss": 3.0117,
      "step": 227586
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.6118409633636475,
      "learning_rate": 2.2111931916806158e-07,
      "loss": 3.2703,
      "step": 227587
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1554195880889893,
      "learning_rate": 2.2096232135920821e-07,
      "loss": 2.9134,
      "step": 227588
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1559531688690186,
      "learning_rate": 2.2080537928497176e-07,
      "loss": 3.0594,
      "step": 227589
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.4595420360565186,
      "learning_rate": 2.2064849294531893e-07,
      "loss": 2.7057,
      "step": 227590
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7094290256500244,
      "learning_rate": 2.2049166234034965e-07,
      "loss": 2.9909,
      "step": 227591
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.5339407920837402,
      "learning_rate": 2.2033488746999728e-07,
      "loss": 2.8095,
      "step": 227592
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7569661140441895,
      "learning_rate": 2.2017816833436176e-07,
      "loss": 2.6408,
      "step": 227593
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6549110412597656,
      "learning_rate": 2.2002150493347638e-07,
      "loss": 2.6643,
      "step": 227594
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.631617307662964,
      "learning_rate": 2.1986489726734114e-07,
      "loss": 2.6137,
      "step": 227595
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.739368200302124,
      "learning_rate": 2.1970834533595604e-07,
      "loss": 2.8698,
      "step": 227596
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.760089874267578,
      "learning_rate": 2.1955184913942103e-07,
      "loss": 3.0737,
      "step": 227597
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8583245277404785,
      "learning_rate": 2.1939540867773609e-07,
      "loss": 2.9811,
      "step": 227598
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0102224349975586,
      "learning_rate": 2.192390239509012e-07,
      "loss": 2.8892,
      "step": 227599
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0410959720611572,
      "learning_rate": 2.190826949589497e-07,
      "loss": 2.9422,
      "step": 227600
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.693305492401123,
      "learning_rate": 2.189264217019815e-07,
      "loss": 2.9296,
      "step": 227601
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1346523761749268,
      "learning_rate": 2.1877020417992996e-07,
      "loss": 3.1407,
      "step": 227602
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.830260992050171,
      "learning_rate": 2.1861404239289504e-07,
      "loss": 3.0736,
      "step": 227603
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.500667095184326,
      "learning_rate": 2.1845793634087673e-07,
      "loss": 3.1107,
      "step": 227604
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.742685556411743,
      "learning_rate": 2.183018860239083e-07,
      "loss": 3.0808,
      "step": 227605
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.153019666671753,
      "learning_rate": 2.181458914419898e-07,
      "loss": 2.8604,
      "step": 227606
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8411383628845215,
      "learning_rate": 2.179899525951878e-07,
      "loss": 2.9617,
      "step": 227607
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0015745162963867,
      "learning_rate": 2.1783406948353567e-07,
      "loss": 2.8558,
      "step": 227608
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7775022983551025,
      "learning_rate": 2.1767824210703334e-07,
      "loss": 2.8884,
      "step": 227609
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.715869188308716,
      "learning_rate": 2.1752247046571414e-07,
      "loss": 3.2267,
      "step": 227610
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8389968872070312,
      "learning_rate": 2.173667545596447e-07,
      "loss": 2.8787,
      "step": 227611
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8109796047210693,
      "learning_rate": 2.172110943887917e-07,
      "loss": 2.8305,
      "step": 227612
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.886091709136963,
      "learning_rate": 2.1705548995325507e-07,
      "loss": 2.9011,
      "step": 227613
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.340757369995117,
      "learning_rate": 2.1689994125300148e-07,
      "loss": 2.9114,
      "step": 227614
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.549191474914551,
      "learning_rate": 2.1674444828809757e-07,
      "loss": 3.2787,
      "step": 227615
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6317169666290283,
      "learning_rate": 2.165890110585433e-07,
      "loss": 2.7579,
      "step": 227616
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.5051934719085693,
      "learning_rate": 2.1643362956440536e-07,
      "loss": 2.7417,
      "step": 227617
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6673901081085205,
      "learning_rate": 2.1627830380568367e-07,
      "loss": 2.9158,
      "step": 227618
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.533810615539551,
      "learning_rate": 2.1612303378241158e-07,
      "loss": 2.9733,
      "step": 227619
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.833162307739258,
      "learning_rate": 2.1596781949462237e-07,
      "loss": 2.7694,
      "step": 227620
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0956642627716064,
      "learning_rate": 2.158126609423494e-07,
      "loss": 2.8274,
      "step": 227621
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.715653896331787,
      "learning_rate": 2.156575581255926e-07,
      "loss": 2.9667,
      "step": 227622
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7276175022125244,
      "learning_rate": 2.1550251104441861e-07,
      "loss": 3.1245,
      "step": 227623
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9795475006103516,
      "learning_rate": 2.1534751969886076e-07,
      "loss": 3.1189,
      "step": 227624
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.9355669021606445,
      "learning_rate": 2.1519258408888574e-07,
      "loss": 2.9523,
      "step": 227625
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9104225635528564,
      "learning_rate": 2.1503770421459342e-07,
      "loss": 2.8591,
      "step": 227626
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.106595516204834,
      "learning_rate": 2.1488288007598385e-07,
      "loss": 2.809,
      "step": 227627
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9536800384521484,
      "learning_rate": 2.1472811167309033e-07,
      "loss": 2.9427,
      "step": 227628
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.925713062286377,
      "learning_rate": 2.1457339900594616e-07,
      "loss": 3.1715,
      "step": 227629
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8543288707733154,
      "learning_rate": 2.1441874207455134e-07,
      "loss": 2.7598,
      "step": 227630
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.104200839996338,
      "learning_rate": 2.1426414087893917e-07,
      "loss": 2.9754,
      "step": 227631
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.457204818725586,
      "learning_rate": 2.1410959541917627e-07,
      "loss": 3.3151,
      "step": 227632
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.660130739212036,
      "learning_rate": 2.1395510569526264e-07,
      "loss": 2.8838,
      "step": 227633
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.782076358795166,
      "learning_rate": 2.1380067170726488e-07,
      "loss": 3.0054,
      "step": 227634
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0408761501312256,
      "learning_rate": 2.1364629345514972e-07,
      "loss": 2.9507,
      "step": 227635
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6604080200195312,
      "learning_rate": 2.1349197093898373e-07,
      "loss": 2.823,
      "step": 227636
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.874147653579712,
      "learning_rate": 2.1333770415880024e-07,
      "loss": 3.0648,
      "step": 227637
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2588493824005127,
      "learning_rate": 2.1318349311459927e-07,
      "loss": 3.0534,
      "step": 227638
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.299306869506836,
      "learning_rate": 2.1302933780641406e-07,
      "loss": 2.7296,
      "step": 227639
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.554999589920044,
      "learning_rate": 2.128752382343113e-07,
      "loss": 2.8821,
      "step": 227640
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0656983852386475,
      "learning_rate": 2.1272119439825764e-07,
      "loss": 2.9939,
      "step": 227641
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.764068365097046,
      "learning_rate": 2.12567206298353e-07,
      "loss": 2.8729,
      "step": 227642
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9662466049194336,
      "learning_rate": 2.1241327393459738e-07,
      "loss": 3.1016,
      "step": 227643
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1761903762817383,
      "learning_rate": 2.1225939730699082e-07,
      "loss": 3.0514,
      "step": 227644
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.518401622772217,
      "learning_rate": 2.121055764155999e-07,
      "loss": 2.9482,
      "step": 227645
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9908225536346436,
      "learning_rate": 2.119518112604246e-07,
      "loss": 2.9233,
      "step": 227646
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.4930858612060547,
      "learning_rate": 2.1179810184149825e-07,
      "loss": 2.7641,
      "step": 227647
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.3604319095611572,
      "learning_rate": 2.1164444815888747e-07,
      "loss": 2.8057,
      "step": 227648
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.9245455265045166,
      "learning_rate": 2.1149085021255896e-07,
      "loss": 3.084,
      "step": 227649
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.4872257709503174,
      "learning_rate": 2.113373080026126e-07,
      "loss": 2.6553,
      "step": 227650
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0803959369659424,
      "learning_rate": 2.1118382152898182e-07,
      "loss": 2.9635,
      "step": 227651
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.881706953048706,
      "learning_rate": 2.1103039079179985e-07,
      "loss": 2.7998,
      "step": 227652
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1449854373931885,
      "learning_rate": 2.1087701579103334e-07,
      "loss": 2.952,
      "step": 227653
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.62758207321167,
      "learning_rate": 2.1072369652671562e-07,
      "loss": 2.8917,
      "step": 227654
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.6926891803741455,
      "learning_rate": 2.1057043299887998e-07,
      "loss": 2.7712,
      "step": 227655
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0116689205169678,
      "learning_rate": 2.1041722520755978e-07,
      "loss": 2.914,
      "step": 227656
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7873740196228027,
      "learning_rate": 2.1026407315278826e-07,
      "loss": 2.9331,
      "step": 227657
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1751461029052734,
      "learning_rate": 2.1011097683459876e-07,
      "loss": 2.904,
      "step": 227658
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.214724540710449,
      "learning_rate": 2.0995793625299128e-07,
      "loss": 2.8108,
      "step": 227659
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7530548572540283,
      "learning_rate": 2.0980495140803245e-07,
      "loss": 2.946,
      "step": 227660
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.480727195739746,
      "learning_rate": 2.0965202229968892e-07,
      "loss": 2.9375,
      "step": 227661
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.282371997833252,
      "learning_rate": 2.0949914892806064e-07,
      "loss": 2.8566,
      "step": 227662
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9720377922058105,
      "learning_rate": 2.093463312931476e-07,
      "loss": 2.8307,
      "step": 227663
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6932058334350586,
      "learning_rate": 2.091935693949498e-07,
      "loss": 2.8424,
      "step": 227664
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9656283855438232,
      "learning_rate": 2.0904086323356717e-07,
      "loss": 2.9131,
      "step": 227665
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1009719371795654,
      "learning_rate": 2.0888821280893307e-07,
      "loss": 3.1285,
      "step": 227666
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.698850154876709,
      "learning_rate": 2.0873561812114748e-07,
      "loss": 2.9678,
      "step": 227667
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.631978750228882,
      "learning_rate": 2.0858307917024365e-07,
      "loss": 2.9045,
      "step": 227668
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0842692852020264,
      "learning_rate": 2.0843059595618827e-07,
      "loss": 2.9693,
      "step": 227669
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1054928302764893,
      "learning_rate": 2.08278168479048e-07,
      "loss": 2.9975,
      "step": 227670
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.829420566558838,
      "learning_rate": 2.081257967388561e-07,
      "loss": 3.0152,
      "step": 227671
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.227933883666992,
      "learning_rate": 2.079734807356459e-07,
      "loss": 2.8877,
      "step": 227672
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.6377768516540527,
      "learning_rate": 2.078212204694174e-07,
      "loss": 2.9182,
      "step": 227673
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0742199420928955,
      "learning_rate": 2.076690159402039e-07,
      "loss": 2.8688,
      "step": 227674
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9965407848358154,
      "learning_rate": 2.075168671480387e-07,
      "loss": 2.9955,
      "step": 227675
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9617819786071777,
      "learning_rate": 2.0736477409298847e-07,
      "loss": 2.7472,
      "step": 227676
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.023484706878662,
      "learning_rate": 2.072127367750198e-07,
      "loss": 2.5489,
      "step": 227677
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1302618980407715,
      "learning_rate": 2.070607551941994e-07,
      "loss": 2.9378,
      "step": 227678
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.110862731933594,
      "learning_rate": 2.0690882935056053e-07,
      "loss": 2.9553,
      "step": 227679
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.91579532623291,
      "learning_rate": 2.0675695924410318e-07,
      "loss": 2.6995,
      "step": 227680
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.651333808898926,
      "learning_rate": 2.066051448748607e-07,
      "loss": 2.9113,
      "step": 227681
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.3934810161590576,
      "learning_rate": 2.0645338624286634e-07,
      "loss": 3.1905,
      "step": 227682
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2582011222839355,
      "learning_rate": 2.0630168334815344e-07,
      "loss": 2.9632,
      "step": 227683
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9835848808288574,
      "learning_rate": 2.0615003619075531e-07,
      "loss": 2.8932,
      "step": 227684
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7902379035949707,
      "learning_rate": 2.0599844477070526e-07,
      "loss": 2.941,
      "step": 227685
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.088789939880371,
      "learning_rate": 2.0584690908800327e-07,
      "loss": 2.9043,
      "step": 227686
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9756131172180176,
      "learning_rate": 2.0569542914268266e-07,
      "loss": 2.9299,
      "step": 227687
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2550559043884277,
      "learning_rate": 2.0554400493481004e-07,
      "loss": 2.7339,
      "step": 227688
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8499085903167725,
      "learning_rate": 2.053926364643521e-07,
      "loss": 2.7725,
      "step": 227689
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.750737428665161,
      "learning_rate": 2.0524132373140878e-07,
      "loss": 3.0644,
      "step": 227690
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2650930881500244,
      "learning_rate": 2.0509006673594674e-07,
      "loss": 3.07,
      "step": 227691
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9171979427337646,
      "learning_rate": 2.0493886547803262e-07,
      "loss": 3.0632,
      "step": 227692
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.054973840713501,
      "learning_rate": 2.0478771995766642e-07,
      "loss": 2.9111,
      "step": 227693
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.17616868019104,
      "learning_rate": 2.046366301748814e-07,
      "loss": 3.1285,
      "step": 227694
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.9429476261138916,
      "learning_rate": 2.0448559612974426e-07,
      "loss": 2.8491,
      "step": 227695
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8557651042938232,
      "learning_rate": 2.043346178222216e-07,
      "loss": 2.745,
      "step": 227696
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7919600009918213,
      "learning_rate": 2.0418369525238009e-07,
      "loss": 3.2319,
      "step": 227697
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.728320598602295,
      "learning_rate": 2.0403282842025304e-07,
      "loss": 3.2055,
      "step": 227698
    },
    {
      "epoch": 2.96,
      "grad_norm": 4.291885852813721,
      "learning_rate": 2.0388201732584043e-07,
      "loss": 2.8687,
      "step": 227699
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.446943759918213,
      "learning_rate": 2.0373126196917556e-07,
      "loss": 2.8604,
      "step": 227700
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.2521753311157227,
      "learning_rate": 2.0358056235032505e-07,
      "loss": 2.9937,
      "step": 227701
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.494616746902466,
      "learning_rate": 2.034299184692889e-07,
      "loss": 2.9272,
      "step": 227702
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8596103191375732,
      "learning_rate": 2.0327933032606714e-07,
      "loss": 2.8763,
      "step": 227703
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.620736837387085,
      "learning_rate": 2.0312879792072635e-07,
      "loss": 2.8442,
      "step": 227704
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.4320735931396484,
      "learning_rate": 2.029783212532665e-07,
      "loss": 2.8939,
      "step": 227705
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.514155864715576,
      "learning_rate": 2.0282790032375429e-07,
      "loss": 2.992,
      "step": 227706
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.934823513031006,
      "learning_rate": 2.0267753513218965e-07,
      "loss": 2.8706,
      "step": 227707
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0328736305236816,
      "learning_rate": 2.0252722567860592e-07,
      "loss": 3.0198,
      "step": 227708
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.7023701667785645,
      "learning_rate": 2.0237697196303637e-07,
      "loss": 2.9908,
      "step": 227709
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.0001418590545654,
      "learning_rate": 2.0222677398551434e-07,
      "loss": 2.9657,
      "step": 227710
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.691985607147217,
      "learning_rate": 2.0207663174603983e-07,
      "loss": 2.9314,
      "step": 227711
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.1503560543060303,
      "learning_rate": 2.0192654524464614e-07,
      "loss": 2.9915,
      "step": 227712
    },
    {
      "epoch": 2.96,
      "grad_norm": 2.8036653995513916,
      "learning_rate": 2.0177651448139988e-07,
      "loss": 3.0574,
      "step": 227713
    },
    {
      "epoch": 2.96,
      "grad_norm": 3.579785108566284,
      "learning_rate": 2.0162653945630102e-07,
      "loss": 3.1429,
      "step": 227714
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.819064140319824,
      "learning_rate": 2.0147662016934962e-07,
      "loss": 3.157,
      "step": 227715
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8618602752685547,
      "learning_rate": 2.0132675662064558e-07,
      "loss": 2.9021,
      "step": 227716
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.95560359954834,
      "learning_rate": 2.0117694881015557e-07,
      "loss": 2.9399,
      "step": 227717
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2551283836364746,
      "learning_rate": 2.010271967379129e-07,
      "loss": 2.8869,
      "step": 227718
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7510266304016113,
      "learning_rate": 2.008775004039509e-07,
      "loss": 2.886,
      "step": 227719
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.6679961681365967,
      "learning_rate": 2.0072785980833615e-07,
      "loss": 2.8961,
      "step": 227720
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7862069606781006,
      "learning_rate": 2.005782749510354e-07,
      "loss": 3.0118,
      "step": 227721
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.097872495651245,
      "learning_rate": 2.0042874583214852e-07,
      "loss": 2.8438,
      "step": 227722
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6435625553131104,
      "learning_rate": 2.002792724516089e-07,
      "loss": 2.856,
      "step": 227723
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.753434896469116,
      "learning_rate": 2.001298548095165e-07,
      "loss": 2.8567,
      "step": 227724
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.762049436569214,
      "learning_rate": 1.9998049290590458e-07,
      "loss": 2.8785,
      "step": 227725
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8045032024383545,
      "learning_rate": 1.9983118674073984e-07,
      "loss": 2.9735,
      "step": 227726
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7965927124023438,
      "learning_rate": 1.996819363141222e-07,
      "loss": 2.8753,
      "step": 227727
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6361470222473145,
      "learning_rate": 1.995327416260184e-07,
      "loss": 2.8798,
      "step": 227728
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0165228843688965,
      "learning_rate": 1.993836026764617e-07,
      "loss": 2.8224,
      "step": 227729
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2265303134918213,
      "learning_rate": 1.9923451946551872e-07,
      "loss": 2.6259,
      "step": 227730
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.194899797439575,
      "learning_rate": 1.9908549199322278e-07,
      "loss": 2.7372,
      "step": 227731
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.103123903274536,
      "learning_rate": 1.9893652025954054e-07,
      "loss": 3.0397,
      "step": 227732
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9930946826934814,
      "learning_rate": 1.9878760426453865e-07,
      "loss": 2.9236,
      "step": 227733
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.4850361347198486,
      "learning_rate": 1.9863874400825043e-07,
      "loss": 3.0434,
      "step": 227734
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.712468385696411,
      "learning_rate": 1.9848993949070913e-07,
      "loss": 3.0237,
      "step": 227735
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.913726329803467,
      "learning_rate": 1.983411907119148e-07,
      "loss": 3.0163,
      "step": 227736
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.034878969192505,
      "learning_rate": 1.9819249767190071e-07,
      "loss": 2.7761,
      "step": 227737
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.618210792541504,
      "learning_rate": 1.980438603707002e-07,
      "loss": 3.0687,
      "step": 227738
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.002142906188965,
      "learning_rate": 1.9789527880834656e-07,
      "loss": 3.0397,
      "step": 227739
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.298515796661377,
      "learning_rate": 1.9774675298487308e-07,
      "loss": 2.9952,
      "step": 227740
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.967491388320923,
      "learning_rate": 1.975982829002798e-07,
      "loss": 2.9776,
      "step": 227741
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0837385654449463,
      "learning_rate": 1.974498685546333e-07,
      "loss": 3.1718,
      "step": 227742
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.365359306335449,
      "learning_rate": 1.973015099479336e-07,
      "loss": 2.9508,
      "step": 227743
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2852537631988525,
      "learning_rate": 1.9715320708021397e-07,
      "loss": 2.9772,
      "step": 227744
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.086789846420288,
      "learning_rate": 1.9700495995150778e-07,
      "loss": 2.939,
      "step": 227745
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.737027406692505,
      "learning_rate": 1.96856768561815e-07,
      "loss": 2.7279,
      "step": 227746
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8003761768341064,
      "learning_rate": 1.967086329112022e-07,
      "loss": 2.6126,
      "step": 227747
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2474145889282227,
      "learning_rate": 1.9656055299966945e-07,
      "loss": 2.7973,
      "step": 227748
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1970181465148926,
      "learning_rate": 1.9641252882728332e-07,
      "loss": 2.9455,
      "step": 227749
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.422044277191162,
      "learning_rate": 1.9626456039401051e-07,
      "loss": 2.9346,
      "step": 227750
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7657082080841064,
      "learning_rate": 1.9611664769991764e-07,
      "loss": 3.071,
      "step": 227751
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7090277671813965,
      "learning_rate": 1.95968790745038e-07,
      "loss": 2.9827,
      "step": 227752
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0812995433807373,
      "learning_rate": 1.9582098952940494e-07,
      "loss": 3.0815,
      "step": 227753
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.354276657104492,
      "learning_rate": 1.9567324405298513e-07,
      "loss": 2.9117,
      "step": 227754
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8450496196746826,
      "learning_rate": 1.9552555431587846e-07,
      "loss": 2.9067,
      "step": 227755
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.00076961517334,
      "learning_rate": 1.9537792031808498e-07,
      "loss": 2.6256,
      "step": 227756
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7919580936431885,
      "learning_rate": 1.9523034205960463e-07,
      "loss": 3.0165,
      "step": 227757
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.946199417114258,
      "learning_rate": 1.950828195405041e-07,
      "loss": 2.7536,
      "step": 227758
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7699670791625977,
      "learning_rate": 1.949353527607833e-07,
      "loss": 3.0931,
      "step": 227759
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9034435749053955,
      "learning_rate": 1.9478794172050894e-07,
      "loss": 2.9118,
      "step": 227760
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8280367851257324,
      "learning_rate": 1.9464058641968093e-07,
      "loss": 2.8733,
      "step": 227761
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3037023544311523,
      "learning_rate": 1.9449328685829936e-07,
      "loss": 2.9858,
      "step": 227762
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9306371212005615,
      "learning_rate": 1.9434604303646406e-07,
      "loss": 3.0032,
      "step": 227763
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2163822650909424,
      "learning_rate": 1.9419885495410848e-07,
      "loss": 2.9362,
      "step": 227764
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7594730854034424,
      "learning_rate": 1.9405172261136582e-07,
      "loss": 2.9719,
      "step": 227765
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0683507919311523,
      "learning_rate": 1.9390464600816948e-07,
      "loss": 2.7381,
      "step": 227766
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.500507354736328,
      "learning_rate": 1.9375762514458603e-07,
      "loss": 2.7943,
      "step": 227767
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.537262439727783,
      "learning_rate": 1.9361066002064885e-07,
      "loss": 2.8005,
      "step": 227768
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3522207736968994,
      "learning_rate": 1.934637506363912e-07,
      "loss": 3.0168,
      "step": 227769
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7187001705169678,
      "learning_rate": 1.9331689699181306e-07,
      "loss": 2.8832,
      "step": 227770
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.870335102081299,
      "learning_rate": 1.9317009908698112e-07,
      "loss": 2.8266,
      "step": 227771
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.64153790473938,
      "learning_rate": 1.93023356921862e-07,
      "loss": 2.9135,
      "step": 227772
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.938941240310669,
      "learning_rate": 1.9287667049652233e-07,
      "loss": 2.9172,
      "step": 227773
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.009723663330078,
      "learning_rate": 1.9273003981099544e-07,
      "loss": 2.9341,
      "step": 227774
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.742814064025879,
      "learning_rate": 1.925834648653146e-07,
      "loss": 2.8492,
      "step": 227775
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8831675052642822,
      "learning_rate": 1.9243694565947987e-07,
      "loss": 3.0127,
      "step": 227776
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3330459594726562,
      "learning_rate": 1.922904821935578e-07,
      "loss": 2.9457,
      "step": 227777
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7661423683166504,
      "learning_rate": 1.9214407446751513e-07,
      "loss": 3.0344,
      "step": 227778
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.163174629211426,
      "learning_rate": 1.9199772248141842e-07,
      "loss": 2.6543,
      "step": 227779
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.5611374378204346,
      "learning_rate": 1.9185142623526771e-07,
      "loss": 2.9093,
      "step": 227780
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.564120292663574,
      "learning_rate": 1.9170518572916293e-07,
      "loss": 3.0207,
      "step": 227781
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7432427406311035,
      "learning_rate": 1.9155900096303745e-07,
      "loss": 2.8753,
      "step": 227782
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9263665676116943,
      "learning_rate": 1.9141287193695786e-07,
      "loss": 2.9852,
      "step": 227783
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8224925994873047,
      "learning_rate": 1.9126679865099082e-07,
      "loss": 2.876,
      "step": 227784
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.449512243270874,
      "learning_rate": 1.9112078110510298e-07,
      "loss": 2.7467,
      "step": 227785
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1859710216522217,
      "learning_rate": 1.9097481929932769e-07,
      "loss": 2.8927,
      "step": 227786
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.5017740726470947,
      "learning_rate": 1.908289132337315e-07,
      "loss": 3.1768,
      "step": 227787
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2027902603149414,
      "learning_rate": 1.9068306290831448e-07,
      "loss": 2.8703,
      "step": 227788
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.8657021522521973,
      "learning_rate": 1.905372683231099e-07,
      "loss": 2.7228,
      "step": 227789
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.857562303543091,
      "learning_rate": 1.9039152947815107e-07,
      "loss": 2.8504,
      "step": 227790
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.303291082382202,
      "learning_rate": 1.9024584637343797e-07,
      "loss": 2.8628,
      "step": 227791
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9300179481506348,
      "learning_rate": 1.9010021900903727e-07,
      "loss": 2.9249,
      "step": 227792
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6977977752685547,
      "learning_rate": 1.8995464738494892e-07,
      "loss": 2.9306,
      "step": 227793
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.668337345123291,
      "learning_rate": 1.8980913150120626e-07,
      "loss": 3.005,
      "step": 227794
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.12291955947876,
      "learning_rate": 1.8966367135784256e-07,
      "loss": 3.0534,
      "step": 227795
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.396605968475342,
      "learning_rate": 1.8951826695489113e-07,
      "loss": 2.687,
      "step": 227796
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.59114670753479,
      "learning_rate": 1.8937291829235202e-07,
      "loss": 2.8966,
      "step": 227797
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.574021339416504,
      "learning_rate": 1.8922762537025848e-07,
      "loss": 2.9291,
      "step": 227798
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7348060607910156,
      "learning_rate": 1.8908238818867715e-07,
      "loss": 2.8249,
      "step": 227799
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.966330051422119,
      "learning_rate": 1.8893720674757473e-07,
      "loss": 2.8057,
      "step": 227800
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.138179302215576,
      "learning_rate": 1.887920810470178e-07,
      "loss": 2.6997,
      "step": 227801
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.90997576713562,
      "learning_rate": 1.8864701108700643e-07,
      "loss": 2.8897,
      "step": 227802
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1402182579040527,
      "learning_rate": 1.8850199686760715e-07,
      "loss": 2.8906,
      "step": 227803
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.378042221069336,
      "learning_rate": 1.8835703838882e-07,
      "loss": 2.8813,
      "step": 227804
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1473283767700195,
      "learning_rate": 1.8821213565067827e-07,
      "loss": 2.7402,
      "step": 227805
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.056591749191284,
      "learning_rate": 1.880672886532153e-07,
      "loss": 2.9083,
      "step": 227806
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.495274782180786,
      "learning_rate": 1.8792249739643107e-07,
      "loss": 2.8454,
      "step": 227807
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7893056869506836,
      "learning_rate": 1.8777776188039218e-07,
      "loss": 2.9709,
      "step": 227808
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.551323890686035,
      "learning_rate": 1.8763308210509863e-07,
      "loss": 2.7386,
      "step": 227809
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.918372631072998,
      "learning_rate": 1.8748845807058377e-07,
      "loss": 2.9684,
      "step": 227810
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.847703695297241,
      "learning_rate": 1.8734388977684757e-07,
      "loss": 3.0933,
      "step": 227811
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8486106395721436,
      "learning_rate": 1.8719937722398992e-07,
      "loss": 2.6515,
      "step": 227812
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2803075313568115,
      "learning_rate": 1.8705492041194426e-07,
      "loss": 2.9818,
      "step": 227813
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2545621395111084,
      "learning_rate": 1.8691051934081048e-07,
      "loss": 3.0523,
      "step": 227814
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.559272050857544,
      "learning_rate": 1.867661740105886e-07,
      "loss": 2.9976,
      "step": 227815
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.492114782333374,
      "learning_rate": 1.8662188442131188e-07,
      "loss": 3.0907,
      "step": 227816
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.590916872024536,
      "learning_rate": 1.8647765057298037e-07,
      "loss": 2.9475,
      "step": 227817
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.67653489112854,
      "learning_rate": 1.8633347246566065e-07,
      "loss": 3.2688,
      "step": 227818
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.5707759857177734,
      "learning_rate": 1.8618935009935276e-07,
      "loss": 3.0104,
      "step": 227819
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6664443016052246,
      "learning_rate": 1.8604528347408997e-07,
      "loss": 3.0639,
      "step": 227820
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9323556423187256,
      "learning_rate": 1.859012725899056e-07,
      "loss": 2.9823,
      "step": 227821
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.847146511077881,
      "learning_rate": 1.8575731744683299e-07,
      "loss": 2.6329,
      "step": 227822
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.740736961364746,
      "learning_rate": 1.8561341804487206e-07,
      "loss": 2.8459,
      "step": 227823
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.8755102157592773,
      "learning_rate": 1.854695743840562e-07,
      "loss": 2.5768,
      "step": 227824
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.5886693000793457,
      "learning_rate": 1.8532578646445195e-07,
      "loss": 2.7265,
      "step": 227825
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.311473846435547,
      "learning_rate": 1.8518205428602607e-07,
      "loss": 2.9138,
      "step": 227826
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.875930070877075,
      "learning_rate": 1.8503837784884513e-07,
      "loss": 2.9681,
      "step": 227827
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.5763447284698486,
      "learning_rate": 1.8489475715294245e-07,
      "loss": 2.8175,
      "step": 227828
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.7229576110839844,
      "learning_rate": 1.8475119219828471e-07,
      "loss": 2.6269,
      "step": 227829
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.915484666824341,
      "learning_rate": 1.8460768298497186e-07,
      "loss": 2.7129,
      "step": 227830
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.5137436389923096,
      "learning_rate": 1.8446422951300388e-07,
      "loss": 3.0236,
      "step": 227831
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9496524333953857,
      "learning_rate": 1.8432083178238077e-07,
      "loss": 2.9199,
      "step": 227832
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.6184303760528564,
      "learning_rate": 1.8417748979316917e-07,
      "loss": 2.9561,
      "step": 227833
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3118479251861572,
      "learning_rate": 1.84034203545369e-07,
      "loss": 3.0724,
      "step": 227834
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9900310039520264,
      "learning_rate": 1.8389097303901367e-07,
      "loss": 2.8981,
      "step": 227835
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1959245204925537,
      "learning_rate": 1.8374779827416974e-07,
      "loss": 2.9016,
      "step": 227836
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1764893531799316,
      "learning_rate": 1.836046792507706e-07,
      "loss": 2.8217,
      "step": 227837
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.400341510772705,
      "learning_rate": 1.8346161596894948e-07,
      "loss": 2.9948,
      "step": 227838
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7284700870513916,
      "learning_rate": 1.8331860842863976e-07,
      "loss": 2.9903,
      "step": 227839
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6941275596618652,
      "learning_rate": 1.831756566299414e-07,
      "loss": 2.9212,
      "step": 227840
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.177703619003296,
      "learning_rate": 1.83032760572821e-07,
      "loss": 3.0943,
      "step": 227841
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.030654430389404,
      "learning_rate": 1.8288992025734528e-07,
      "loss": 2.9557,
      "step": 227842
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.801670789718628,
      "learning_rate": 1.827471356835475e-07,
      "loss": 3.1781,
      "step": 227843
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0721728801727295,
      "learning_rate": 1.8260440685142762e-07,
      "loss": 2.8935,
      "step": 227844
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.155042886734009,
      "learning_rate": 1.8246173376101902e-07,
      "loss": 3.1006,
      "step": 227845
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.173489809036255,
      "learning_rate": 1.8231911641235497e-07,
      "loss": 2.8426,
      "step": 227846
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.668592929840088,
      "learning_rate": 1.8217655480543548e-07,
      "loss": 3.019,
      "step": 227847
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.229954481124878,
      "learning_rate": 1.8203404894032713e-07,
      "loss": 2.7156,
      "step": 227848
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1491594314575195,
      "learning_rate": 1.8189159881702998e-07,
      "loss": 2.9105,
      "step": 227849
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.791348695755005,
      "learning_rate": 1.817492044356106e-07,
      "loss": 3.0496,
      "step": 227850
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.051544427871704,
      "learning_rate": 1.8160686579600237e-07,
      "loss": 3.1041,
      "step": 227851
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.893507719039917,
      "learning_rate": 1.8146458289833854e-07,
      "loss": 2.7073,
      "step": 227852
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0970230102539062,
      "learning_rate": 1.813223557425858e-07,
      "loss": 3.2653,
      "step": 227853
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.893181800842285,
      "learning_rate": 1.8118018432881077e-07,
      "loss": 2.8093,
      "step": 227854
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1329128742218018,
      "learning_rate": 1.810380686569801e-07,
      "loss": 2.8574,
      "step": 227855
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.159740924835205,
      "learning_rate": 1.8089600872716048e-07,
      "loss": 2.7355,
      "step": 227856
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.06921124458313,
      "learning_rate": 1.8075400453938515e-07,
      "loss": 2.9767,
      "step": 227857
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3045995235443115,
      "learning_rate": 1.8061205609365415e-07,
      "loss": 2.7652,
      "step": 227858
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.156480312347412,
      "learning_rate": 1.8047016339000077e-07,
      "loss": 2.8115,
      "step": 227859
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.164106607437134,
      "learning_rate": 1.803283264284916e-07,
      "loss": 2.8513,
      "step": 227860
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.197065353393555,
      "learning_rate": 1.8018654520909338e-07,
      "loss": 2.8215,
      "step": 227861
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8491623401641846,
      "learning_rate": 1.8004481973183938e-07,
      "loss": 3.081,
      "step": 227862
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1861329078674316,
      "learning_rate": 1.7990314999679624e-07,
      "loss": 2.8564,
      "step": 227863
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6697449684143066,
      "learning_rate": 1.7976153600396393e-07,
      "loss": 2.8403,
      "step": 227864
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0143539905548096,
      "learning_rate": 1.7961997775337579e-07,
      "loss": 2.9292,
      "step": 227865
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1178698539733887,
      "learning_rate": 1.794784752450651e-07,
      "loss": 2.8999,
      "step": 227866
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.05644154548645,
      "learning_rate": 1.7933702847903187e-07,
      "loss": 2.6676,
      "step": 227867
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.724153995513916,
      "learning_rate": 1.7919563745534272e-07,
      "loss": 2.9872,
      "step": 227868
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.910482406616211,
      "learning_rate": 1.7905430217399763e-07,
      "loss": 2.7648,
      "step": 227869
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.840538740158081,
      "learning_rate": 1.7891302263499663e-07,
      "loss": 2.7815,
      "step": 227870
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8064019680023193,
      "learning_rate": 1.787717988384063e-07,
      "loss": 2.967,
      "step": 227871
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7892255783081055,
      "learning_rate": 1.7863063078425999e-07,
      "loss": 2.9966,
      "step": 227872
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.090057849884033,
      "learning_rate": 1.7848951847255765e-07,
      "loss": 2.9858,
      "step": 227873
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.081307888031006,
      "learning_rate": 1.7834846190333264e-07,
      "loss": 2.8663,
      "step": 227874
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.427468776702881,
      "learning_rate": 1.7820746107661821e-07,
      "loss": 3.0078,
      "step": 227875
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.070215702056885,
      "learning_rate": 1.780665159924144e-07,
      "loss": 2.7861,
      "step": 227876
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.212766170501709,
      "learning_rate": 1.779256266507878e-07,
      "loss": 2.8754,
      "step": 227877
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2620961666107178,
      "learning_rate": 1.7778479305173843e-07,
      "loss": 3.0575,
      "step": 227878
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.230799198150635,
      "learning_rate": 1.776440151952996e-07,
      "loss": 2.6985,
      "step": 227879
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.540078639984131,
      "learning_rate": 1.7750329308150458e-07,
      "loss": 3.0936,
      "step": 227880
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2782299518585205,
      "learning_rate": 1.773626267103534e-07,
      "loss": 3.0485,
      "step": 227881
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.654602289199829,
      "learning_rate": 1.7722201608187937e-07,
      "loss": 2.9912,
      "step": 227882
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1262192726135254,
      "learning_rate": 1.770814611961491e-07,
      "loss": 2.8615,
      "step": 227883
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.01767635345459,
      "learning_rate": 1.7694096205312923e-07,
      "loss": 2.8849,
      "step": 227884
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.843385219573975,
      "learning_rate": 1.7680051865291978e-07,
      "loss": 2.9396,
      "step": 227885
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1776957511901855,
      "learning_rate": 1.7666013099545407e-07,
      "loss": 2.6926,
      "step": 227886
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9054768085479736,
      "learning_rate": 1.7651979908083202e-07,
      "loss": 3.0298,
      "step": 227887
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.379848003387451,
      "learning_rate": 1.7637952290905366e-07,
      "loss": 2.4588,
      "step": 227888
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8643720149993896,
      "learning_rate": 1.7623930248015228e-07,
      "loss": 2.907,
      "step": 227889
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9440488815307617,
      "learning_rate": 1.7609913779412787e-07,
      "loss": 2.759,
      "step": 227890
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9611191749572754,
      "learning_rate": 1.7595902885104708e-07,
      "loss": 3.0428,
      "step": 227891
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1245529651641846,
      "learning_rate": 1.7581897565090986e-07,
      "loss": 3.0518,
      "step": 227892
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.994910955429077,
      "learning_rate": 1.7567897819374954e-07,
      "loss": 2.8239,
      "step": 227893
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.662792682647705,
      "learning_rate": 1.7553903647956612e-07,
      "loss": 2.7393,
      "step": 227894
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9600021839141846,
      "learning_rate": 1.7539915050842624e-07,
      "loss": 2.9231,
      "step": 227895
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3659112453460693,
      "learning_rate": 1.7525932028032986e-07,
      "loss": 2.8995,
      "step": 227896
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0310184955596924,
      "learning_rate": 1.751195457953436e-07,
      "loss": 3.0088,
      "step": 227897
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0694806575775146,
      "learning_rate": 1.7497982705343415e-07,
      "loss": 2.9383,
      "step": 227898
    },
    {
      "epoch": 2.97,
      "grad_norm": 5.005450248718262,
      "learning_rate": 1.7484016405466818e-07,
      "loss": 2.9912,
      "step": 227899
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.714433193206787,
      "learning_rate": 1.747005567990456e-07,
      "loss": 2.7116,
      "step": 227900
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.712416172027588,
      "learning_rate": 1.7456100528659978e-07,
      "loss": 3.1133,
      "step": 227901
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9865660667419434,
      "learning_rate": 1.744215095173973e-07,
      "loss": 3.085,
      "step": 227902
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6777422428131104,
      "learning_rate": 1.7428206949140488e-07,
      "loss": 2.7777,
      "step": 227903
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0332398414611816,
      "learning_rate": 1.7414268520868911e-07,
      "loss": 3.1088,
      "step": 227904
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.964386463165283,
      "learning_rate": 1.7400335666925002e-07,
      "loss": 2.8775,
      "step": 227905
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.146304130554199,
      "learning_rate": 1.7386408387312088e-07,
      "loss": 3.2435,
      "step": 227906
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0991902351379395,
      "learning_rate": 1.7372486682030174e-07,
      "loss": 3.0415,
      "step": 227907
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.863449811935425,
      "learning_rate": 1.7358570551089247e-07,
      "loss": 3.1646,
      "step": 227908
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9700300693511963,
      "learning_rate": 1.7344659994485975e-07,
      "loss": 2.9705,
      "step": 227909
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.975637197494507,
      "learning_rate": 1.7330755012223695e-07,
      "loss": 3.0965,
      "step": 227910
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.5561656951904297,
      "learning_rate": 1.7316855604305734e-07,
      "loss": 2.9527,
      "step": 227911
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.433117389678955,
      "learning_rate": 1.7302961770735424e-07,
      "loss": 2.685,
      "step": 227912
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6230320930480957,
      "learning_rate": 1.7289073511512763e-07,
      "loss": 3.0059,
      "step": 227913
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.504115581512451,
      "learning_rate": 1.7275190826644413e-07,
      "loss": 2.7572,
      "step": 227914
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.432060480117798,
      "learning_rate": 1.7261313716127045e-07,
      "loss": 2.8656,
      "step": 227915
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.794151782989502,
      "learning_rate": 1.724744217997065e-07,
      "loss": 3.1593,
      "step": 227916
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.153543472290039,
      "learning_rate": 1.7233576218171895e-07,
      "loss": 3.0012,
      "step": 227917
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.252638339996338,
      "learning_rate": 1.7219715830734115e-07,
      "loss": 3.0299,
      "step": 227918
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.875866413116455,
      "learning_rate": 1.7205861017663968e-07,
      "loss": 2.9651,
      "step": 227919
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2279741764068604,
      "learning_rate": 1.7192011778958126e-07,
      "loss": 3.1008,
      "step": 227920
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.404871702194214,
      "learning_rate": 1.7178168114626579e-07,
      "loss": 2.9589,
      "step": 227921
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.731752634048462,
      "learning_rate": 1.7164330024662664e-07,
      "loss": 2.8459,
      "step": 227922
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9680471420288086,
      "learning_rate": 1.7150497509076378e-07,
      "loss": 2.9823,
      "step": 227923
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8818278312683105,
      "learning_rate": 1.7136670567867717e-07,
      "loss": 2.825,
      "step": 227924
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.62119197845459,
      "learning_rate": 1.7122849201040012e-07,
      "loss": 2.9351,
      "step": 227925
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.341716766357422,
      "learning_rate": 1.7109033408593266e-07,
      "loss": 2.6903,
      "step": 227926
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.6996009349823,
      "learning_rate": 1.7095223190534134e-07,
      "loss": 2.9411,
      "step": 227927
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.004384756088257,
      "learning_rate": 1.7081418546862624e-07,
      "loss": 2.9318,
      "step": 227928
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.745081663131714,
      "learning_rate": 1.7067619477578731e-07,
      "loss": 2.9136,
      "step": 227929
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.05739688873291,
      "learning_rate": 1.705382598269245e-07,
      "loss": 2.9935,
      "step": 227930
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.125473976135254,
      "learning_rate": 1.7040038062197115e-07,
      "loss": 2.7506,
      "step": 227931
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1319234371185303,
      "learning_rate": 1.702625571610272e-07,
      "loss": 2.6881,
      "step": 227932
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.231152057647705,
      "learning_rate": 1.701247894440927e-07,
      "loss": 2.6864,
      "step": 227933
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8755133152008057,
      "learning_rate": 1.699870774712009e-07,
      "loss": 2.6038,
      "step": 227934
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.114485263824463,
      "learning_rate": 1.698494212423518e-07,
      "loss": 2.895,
      "step": 227935
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2005414962768555,
      "learning_rate": 1.697118207575787e-07,
      "loss": 3.0357,
      "step": 227936
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.492652177810669,
      "learning_rate": 1.6957427601694828e-07,
      "loss": 2.9591,
      "step": 227937
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8264338970184326,
      "learning_rate": 1.6943678702042718e-07,
      "loss": 2.9399,
      "step": 227938
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.854065179824829,
      "learning_rate": 1.692993537680487e-07,
      "loss": 2.8471,
      "step": 227939
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9877922534942627,
      "learning_rate": 1.6916197625991278e-07,
      "loss": 2.9545,
      "step": 227940
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7587201595306396,
      "learning_rate": 1.690246544959528e-07,
      "loss": 2.9509,
      "step": 227941
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0606236457824707,
      "learning_rate": 1.688873884762354e-07,
      "loss": 2.7449,
      "step": 227942
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1489367485046387,
      "learning_rate": 1.6875017820076053e-07,
      "loss": 2.9767,
      "step": 227943
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1079864501953125,
      "learning_rate": 1.6861302366959485e-07,
      "loss": 2.7445,
      "step": 227944
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.372870922088623,
      "learning_rate": 1.6847592488277162e-07,
      "loss": 2.8613,
      "step": 227945
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.864034414291382,
      "learning_rate": 1.6833888184025756e-07,
      "loss": 3.1332,
      "step": 227946
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.743668794631958,
      "learning_rate": 1.682018945421193e-07,
      "loss": 3.0181,
      "step": 227947
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0265212059020996,
      "learning_rate": 1.680649629883568e-07,
      "loss": 2.846,
      "step": 227948
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9540388584136963,
      "learning_rate": 1.679280871790034e-07,
      "loss": 2.8812,
      "step": 227949
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2774245738983154,
      "learning_rate": 1.677912671141257e-07,
      "loss": 2.9415,
      "step": 227950
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9302151203155518,
      "learning_rate": 1.676545027936904e-07,
      "loss": 2.8277,
      "step": 227951
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8319497108459473,
      "learning_rate": 1.6751779421776412e-07,
      "loss": 2.9346,
      "step": 227952
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.597660541534424,
      "learning_rate": 1.6738114138634683e-07,
      "loss": 3.1468,
      "step": 227953
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7518062591552734,
      "learning_rate": 1.6724454429947186e-07,
      "loss": 2.9391,
      "step": 227954
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.61545991897583,
      "learning_rate": 1.6710800295717253e-07,
      "loss": 2.6976,
      "step": 227955
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8206353187561035,
      "learning_rate": 1.669715173594488e-07,
      "loss": 3.0096,
      "step": 227956
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.361668586730957,
      "learning_rate": 1.668350875063673e-07,
      "loss": 3.154,
      "step": 227957
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.092928647994995,
      "learning_rate": 1.6669871339792805e-07,
      "loss": 3.0027,
      "step": 227958
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.8236281871795654,
      "learning_rate": 1.6656239503416434e-07,
      "loss": 2.8914,
      "step": 227959
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.5887835025787354,
      "learning_rate": 1.6642613241507617e-07,
      "loss": 2.9509,
      "step": 227960
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0173158645629883,
      "learning_rate": 1.6628992554073017e-07,
      "loss": 2.9048,
      "step": 227961
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.256394863128662,
      "learning_rate": 1.66153774411093e-07,
      "loss": 3.1012,
      "step": 227962
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.401594400405884,
      "learning_rate": 1.660176790262646e-07,
      "loss": 2.8589,
      "step": 227963
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.9968817234039307,
      "learning_rate": 1.65881639386245e-07,
      "loss": 2.8881,
      "step": 227964
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.696357250213623,
      "learning_rate": 1.6574565549100083e-07,
      "loss": 2.7594,
      "step": 227965
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.5686898231506348,
      "learning_rate": 1.6560972734063205e-07,
      "loss": 2.8851,
      "step": 227966
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.4250969886779785,
      "learning_rate": 1.6547385493513866e-07,
      "loss": 3.0496,
      "step": 227967
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.9901952743530273,
      "learning_rate": 1.6533803827455395e-07,
      "loss": 2.9507,
      "step": 227968
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8169684410095215,
      "learning_rate": 1.6520227735887793e-07,
      "loss": 2.9835,
      "step": 227969
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8588669300079346,
      "learning_rate": 1.650665721881439e-07,
      "loss": 2.6428,
      "step": 227970
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7976889610290527,
      "learning_rate": 1.6493092276238518e-07,
      "loss": 3.1017,
      "step": 227971
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9168078899383545,
      "learning_rate": 1.6479532908163507e-07,
      "loss": 3.0083,
      "step": 227972
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2618794441223145,
      "learning_rate": 1.6465979114589357e-07,
      "loss": 2.8365,
      "step": 227973
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9257009029388428,
      "learning_rate": 1.6452430895522728e-07,
      "loss": 2.7187,
      "step": 227974
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.96097993850708,
      "learning_rate": 1.6438888250960292e-07,
      "loss": 2.927,
      "step": 227975
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.5382466316223145,
      "learning_rate": 1.642535118091204e-07,
      "loss": 3.1166,
      "step": 227976
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.777992010116577,
      "learning_rate": 1.6411819685374638e-07,
      "loss": 2.7771,
      "step": 227977
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.691758155822754,
      "learning_rate": 1.6398293764351423e-07,
      "loss": 2.695,
      "step": 227978
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3345589637756348,
      "learning_rate": 1.638477341784572e-07,
      "loss": 3.0934,
      "step": 227979
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.711470603942871,
      "learning_rate": 1.637125864585753e-07,
      "loss": 2.8244,
      "step": 227980
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.8876001834869385,
      "learning_rate": 1.635774944839685e-07,
      "loss": 2.9983,
      "step": 227981
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0527381896972656,
      "learning_rate": 1.6344245825457015e-07,
      "loss": 2.9465,
      "step": 227982
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.722222089767456,
      "learning_rate": 1.6330747777048015e-07,
      "loss": 2.8871,
      "step": 227983
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9512503147125244,
      "learning_rate": 1.631725530316652e-07,
      "loss": 3.161,
      "step": 227984
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1164698600769043,
      "learning_rate": 1.6303768403819195e-07,
      "loss": 3.0216,
      "step": 227985
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.83601713180542,
      "learning_rate": 1.6290287079009366e-07,
      "loss": 3.0737,
      "step": 227986
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7822413444519043,
      "learning_rate": 1.6276811328733709e-07,
      "loss": 2.8008,
      "step": 227987
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.4187114238739014,
      "learning_rate": 1.6263341152998876e-07,
      "loss": 2.764,
      "step": 227988
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.584862232208252,
      "learning_rate": 1.6249876551808206e-07,
      "loss": 2.988,
      "step": 227989
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.72011661529541,
      "learning_rate": 1.6236417525158362e-07,
      "loss": 2.7548,
      "step": 227990
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8030552864074707,
      "learning_rate": 1.622296407305934e-07,
      "loss": 2.8476,
      "step": 227991
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.008005142211914,
      "learning_rate": 1.620951619551114e-07,
      "loss": 2.8611,
      "step": 227992
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9348464012145996,
      "learning_rate": 1.6196073892513762e-07,
      "loss": 3.0721,
      "step": 227993
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.618124485015869,
      "learning_rate": 1.6182637164073863e-07,
      "loss": 3.1629,
      "step": 227994
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9348762035369873,
      "learning_rate": 1.616920601019145e-07,
      "loss": 2.9118,
      "step": 227995
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1565775871276855,
      "learning_rate": 1.6155780430866516e-07,
      "loss": 2.9678,
      "step": 227996
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9579503536224365,
      "learning_rate": 1.6142360426105727e-07,
      "loss": 2.9045,
      "step": 227997
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.619719982147217,
      "learning_rate": 1.612894599590908e-07,
      "loss": 2.5747,
      "step": 227998
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.944742679595947,
      "learning_rate": 1.6115537140283242e-07,
      "loss": 2.8779,
      "step": 227999
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8486533164978027,
      "learning_rate": 1.6102133859224875e-07,
      "loss": 2.8864,
      "step": 228000
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.8803162574768066,
      "learning_rate": 1.6088736152737313e-07,
      "loss": 3.0914,
      "step": 228001
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9258711338043213,
      "learning_rate": 1.6075344020827218e-07,
      "loss": 2.9619,
      "step": 228002
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0042128562927246,
      "learning_rate": 1.606195746349459e-07,
      "loss": 2.912,
      "step": 228003
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8825888633728027,
      "learning_rate": 1.6048576480742758e-07,
      "loss": 2.8879,
      "step": 228004
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.979950428009033,
      "learning_rate": 1.6035201072571725e-07,
      "loss": 3.06,
      "step": 228005
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.7935900688171387,
      "learning_rate": 1.6021831238988146e-07,
      "loss": 2.921,
      "step": 228006
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.074888229370117,
      "learning_rate": 1.60084669799887e-07,
      "loss": 2.8729,
      "step": 228007
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7809317111968994,
      "learning_rate": 1.599510829558337e-07,
      "loss": 2.8577,
      "step": 228008
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.147777795791626,
      "learning_rate": 1.59817551857655e-07,
      "loss": 3.0186,
      "step": 228009
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.396014928817749,
      "learning_rate": 1.596840765054508e-07,
      "loss": 2.9395,
      "step": 228010
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.211329936981201,
      "learning_rate": 1.595506568992544e-07,
      "loss": 2.5527,
      "step": 228011
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1900548934936523,
      "learning_rate": 1.594172930389992e-07,
      "loss": 2.9617,
      "step": 228012
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.848874092102051,
      "learning_rate": 1.5928398492481842e-07,
      "loss": 2.9236,
      "step": 228013
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.630173921585083,
      "learning_rate": 1.5915073255664546e-07,
      "loss": 3.1414,
      "step": 228014
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.827991247177124,
      "learning_rate": 1.5901753593458022e-07,
      "loss": 2.7712,
      "step": 228015
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.215949535369873,
      "learning_rate": 1.5888439505858941e-07,
      "loss": 2.8495,
      "step": 228016
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.125035047531128,
      "learning_rate": 1.5875130992873962e-07,
      "loss": 2.9202,
      "step": 228017
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.320967674255371,
      "learning_rate": 1.586182805450309e-07,
      "loss": 2.8315,
      "step": 228018
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.6955626010894775,
      "learning_rate": 1.584853069074965e-07,
      "loss": 3.1565,
      "step": 228019
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8598883152008057,
      "learning_rate": 1.5835238901616975e-07,
      "loss": 3.0646,
      "step": 228020
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.310506820678711,
      "learning_rate": 1.5821952687105067e-07,
      "loss": 2.7984,
      "step": 228021
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.9035301208496094,
      "learning_rate": 1.5808672047217253e-07,
      "loss": 2.9702,
      "step": 228022
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6530203819274902,
      "learning_rate": 1.5795396981956865e-07,
      "loss": 2.969,
      "step": 228023
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8194541931152344,
      "learning_rate": 1.5782127491327234e-07,
      "loss": 3.1157,
      "step": 228024
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.572641372680664,
      "learning_rate": 1.576886357532836e-07,
      "loss": 2.7874,
      "step": 228025
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.317903995513916,
      "learning_rate": 1.5755605233966907e-07,
      "loss": 2.902,
      "step": 228026
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.959853172302246,
      "learning_rate": 1.574235246723954e-07,
      "loss": 2.8488,
      "step": 228027
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7180862426757812,
      "learning_rate": 1.572910527515292e-07,
      "loss": 2.8091,
      "step": 228028
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.250723361968994,
      "learning_rate": 1.571586365770705e-07,
      "loss": 2.9459,
      "step": 228029
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.144226551055908,
      "learning_rate": 1.5702627614905261e-07,
      "loss": 2.9264,
      "step": 228030
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3543715476989746,
      "learning_rate": 1.5689397146750883e-07,
      "loss": 2.8302,
      "step": 228031
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.057004690170288,
      "learning_rate": 1.5676172253247242e-07,
      "loss": 2.8697,
      "step": 228032
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.60274600982666,
      "learning_rate": 1.5662952934394345e-07,
      "loss": 2.9619,
      "step": 228033
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.10914945602417,
      "learning_rate": 1.5649739190195522e-07,
      "loss": 2.9005,
      "step": 228034
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.394578456878662,
      "learning_rate": 1.5636531020654096e-07,
      "loss": 2.9603,
      "step": 228035
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.205766201019287,
      "learning_rate": 1.5623328425773406e-07,
      "loss": 3.2535,
      "step": 228036
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8970260620117188,
      "learning_rate": 1.561013140555345e-07,
      "loss": 2.8073,
      "step": 228037
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.245474338531494,
      "learning_rate": 1.5596939959994227e-07,
      "loss": 2.919,
      "step": 228038
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.726649284362793,
      "learning_rate": 1.5583754089105727e-07,
      "loss": 3.0037,
      "step": 228039
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.403294086456299,
      "learning_rate": 1.5570573792884622e-07,
      "loss": 3.0491,
      "step": 228040
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7023637294769287,
      "learning_rate": 1.5557399071334243e-07,
      "loss": 3.0941,
      "step": 228041
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8947036266326904,
      "learning_rate": 1.5544229924457917e-07,
      "loss": 2.9493,
      "step": 228042
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.792762517929077,
      "learning_rate": 1.5531066352258982e-07,
      "loss": 2.9281,
      "step": 228043
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.307396650314331,
      "learning_rate": 1.551790835473743e-07,
      "loss": 2.9893,
      "step": 228044
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.885908842086792,
      "learning_rate": 1.5504755931896594e-07,
      "loss": 2.6854,
      "step": 228045
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9946155548095703,
      "learning_rate": 1.549160908374314e-07,
      "loss": 3.0944,
      "step": 228046
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8486783504486084,
      "learning_rate": 1.54784678102704e-07,
      "loss": 2.8198,
      "step": 228047
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.993945837020874,
      "learning_rate": 1.5465332111491701e-07,
      "loss": 2.9019,
      "step": 228048
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8998525142669678,
      "learning_rate": 1.545220198740038e-07,
      "loss": 2.9019,
      "step": 228049
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.158576250076294,
      "learning_rate": 1.54390774380031e-07,
      "loss": 2.7717,
      "step": 228050
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.249556064605713,
      "learning_rate": 1.542595846330319e-07,
      "loss": 2.9669,
      "step": 228051
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.779900312423706,
      "learning_rate": 1.541284506330065e-07,
      "loss": 2.9525,
      "step": 228052
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8797895908355713,
      "learning_rate": 1.539973723799881e-07,
      "loss": 2.9872,
      "step": 228053
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.954878330230713,
      "learning_rate": 1.5386634987397673e-07,
      "loss": 2.872,
      "step": 228054
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.928057909011841,
      "learning_rate": 1.5373538311507226e-07,
      "loss": 3.09,
      "step": 228055
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.483832597732544,
      "learning_rate": 1.5360447210320815e-07,
      "loss": 2.8069,
      "step": 228056
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3388659954071045,
      "learning_rate": 1.5347361683845094e-07,
      "loss": 2.9762,
      "step": 228057
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8777236938476562,
      "learning_rate": 1.5334281732083397e-07,
      "loss": 3.0687,
      "step": 228058
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.301149845123291,
      "learning_rate": 1.5321207355039056e-07,
      "loss": 3.1485,
      "step": 228059
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7739782333374023,
      "learning_rate": 1.530813855270874e-07,
      "loss": 2.8174,
      "step": 228060
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2001733779907227,
      "learning_rate": 1.5295075325102434e-07,
      "loss": 2.8132,
      "step": 228061
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.857712507247925,
      "learning_rate": 1.5282017672216819e-07,
      "loss": 3.069,
      "step": 228062
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.233659267425537,
      "learning_rate": 1.5268965594055215e-07,
      "loss": 2.846,
      "step": 228063
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1238017082214355,
      "learning_rate": 1.5255919090620962e-07,
      "loss": 2.8706,
      "step": 228064
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9181830883026123,
      "learning_rate": 1.5242878161920713e-07,
      "loss": 2.9553,
      "step": 228065
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7370057106018066,
      "learning_rate": 1.5229842807947813e-07,
      "loss": 2.9813,
      "step": 228066
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.815274477005005,
      "learning_rate": 1.521681302871225e-07,
      "loss": 2.7029,
      "step": 228067
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.820082426071167,
      "learning_rate": 1.5203788824214024e-07,
      "loss": 3.041,
      "step": 228068
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.931879758834839,
      "learning_rate": 1.519077019445647e-07,
      "loss": 2.9503,
      "step": 228069
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.466362953186035,
      "learning_rate": 1.5177757139436253e-07,
      "loss": 2.7375,
      "step": 228070
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.114022970199585,
      "learning_rate": 1.5164749659163367e-07,
      "loss": 2.9419,
      "step": 228071
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3089847564697266,
      "learning_rate": 1.515174775363781e-07,
      "loss": 2.7192,
      "step": 228072
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0471889972686768,
      "learning_rate": 1.5138751422862916e-07,
      "loss": 2.827,
      "step": 228073
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.848301887512207,
      "learning_rate": 1.5125760666838683e-07,
      "loss": 2.7656,
      "step": 228074
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.846301317214966,
      "learning_rate": 1.511277548556844e-07,
      "loss": 3.0314,
      "step": 228075
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.572523593902588,
      "learning_rate": 1.509979587905219e-07,
      "loss": 3.1297,
      "step": 228076
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1262829303741455,
      "learning_rate": 1.5086821847299923e-07,
      "loss": 3.0262,
      "step": 228077
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.950563669204712,
      "learning_rate": 1.507385339030498e-07,
      "loss": 2.8382,
      "step": 228078
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.942561626434326,
      "learning_rate": 1.5060890508074018e-07,
      "loss": 2.9789,
      "step": 228079
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.79483699798584,
      "learning_rate": 1.5047933200610374e-07,
      "loss": 3.2556,
      "step": 228080
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6466565132141113,
      "learning_rate": 1.5034981467917373e-07,
      "loss": 2.979,
      "step": 228081
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7906062602996826,
      "learning_rate": 1.5022035309991687e-07,
      "loss": 3.0789,
      "step": 228082
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.888015031814575,
      "learning_rate": 1.5009094726839976e-07,
      "loss": 3.0629,
      "step": 228083
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3453099727630615,
      "learning_rate": 1.4996159718465572e-07,
      "loss": 2.7698,
      "step": 228084
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.408475160598755,
      "learning_rate": 1.4983230284868474e-07,
      "loss": 2.8749,
      "step": 228085
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9456870555877686,
      "learning_rate": 1.4970306426052014e-07,
      "loss": 2.8289,
      "step": 228086
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.9280974864959717,
      "learning_rate": 1.495738814201952e-07,
      "loss": 2.8151,
      "step": 228087
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.098806619644165,
      "learning_rate": 1.4944475432774329e-07,
      "loss": 2.996,
      "step": 228088
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.9952595233917236,
      "learning_rate": 1.4931568298313102e-07,
      "loss": 2.8867,
      "step": 228089
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.981060266494751,
      "learning_rate": 1.4918666738642504e-07,
      "loss": 2.9361,
      "step": 228090
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.6074485778808594,
      "learning_rate": 1.4905770753765867e-07,
      "loss": 2.7848,
      "step": 228091
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0508627891540527,
      "learning_rate": 1.4892880343683188e-07,
      "loss": 2.9921,
      "step": 228092
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9633049964904785,
      "learning_rate": 1.4879995508401133e-07,
      "loss": 3.1362,
      "step": 228093
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0787642002105713,
      "learning_rate": 1.4867116247916365e-07,
      "loss": 2.7465,
      "step": 228094
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0569746494293213,
      "learning_rate": 1.4854242562232221e-07,
      "loss": 2.7697,
      "step": 228095
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.186856508255005,
      "learning_rate": 1.4841374451355358e-07,
      "loss": 2.6792,
      "step": 228096
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6702005863189697,
      "learning_rate": 1.4828511915285778e-07,
      "loss": 2.9128,
      "step": 228097
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0888679027557373,
      "learning_rate": 1.481565495402348e-07,
      "loss": 2.9456,
      "step": 228098
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9382522106170654,
      "learning_rate": 1.4802803567575127e-07,
      "loss": 2.9733,
      "step": 228099
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6257412433624268,
      "learning_rate": 1.4789957755937388e-07,
      "loss": 2.887,
      "step": 228100
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.667480230331421,
      "learning_rate": 1.4777117519120253e-07,
      "loss": 3.0146,
      "step": 228101
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.180138349533081,
      "learning_rate": 1.476428285712039e-07,
      "loss": 3.158,
      "step": 228102
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.971440076828003,
      "learning_rate": 1.4751453769941135e-07,
      "loss": 3.0872,
      "step": 228103
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3071887493133545,
      "learning_rate": 1.4738630257585816e-07,
      "loss": 2.7918,
      "step": 228104
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6686322689056396,
      "learning_rate": 1.4725812320057762e-07,
      "loss": 2.9966,
      "step": 228105
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9176690578460693,
      "learning_rate": 1.4712999957356975e-07,
      "loss": 3.0836,
      "step": 228106
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.89347243309021,
      "learning_rate": 1.4700193169486784e-07,
      "loss": 2.7893,
      "step": 228107
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7400262355804443,
      "learning_rate": 1.4687391956450523e-07,
      "loss": 2.9969,
      "step": 228108
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1267359256744385,
      "learning_rate": 1.467459631825152e-07,
      "loss": 2.8862,
      "step": 228109
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9227516651153564,
      "learning_rate": 1.4661806254886444e-07,
      "loss": 2.9867,
      "step": 228110
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0902180671691895,
      "learning_rate": 1.4649021766365288e-07,
      "loss": 2.8531,
      "step": 228111
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9676883220672607,
      "learning_rate": 1.463624285268472e-07,
      "loss": 2.7625,
      "step": 228112
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.352141857147217,
      "learning_rate": 1.4623469513851404e-07,
      "loss": 2.9542,
      "step": 228113
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.4585583209991455,
      "learning_rate": 1.4610701749865339e-07,
      "loss": 3.004,
      "step": 228114
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1660659313201904,
      "learning_rate": 1.4597939560729855e-07,
      "loss": 2.9831,
      "step": 228115
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3945581912994385,
      "learning_rate": 1.4585182946444952e-07,
      "loss": 2.8343,
      "step": 228116
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.932257652282715,
      "learning_rate": 1.457243190701396e-07,
      "loss": 2.8376,
      "step": 228117
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.841042995452881,
      "learning_rate": 1.455968644244021e-07,
      "loss": 2.8637,
      "step": 228118
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.371720790863037,
      "learning_rate": 1.4546946552727034e-07,
      "loss": 2.6689,
      "step": 228119
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.47930383682251,
      "learning_rate": 1.4534212237874432e-07,
      "loss": 3.0652,
      "step": 228120
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.701172113418579,
      "learning_rate": 1.4521483497889065e-07,
      "loss": 2.9106,
      "step": 228121
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3804845809936523,
      "learning_rate": 1.45087603327676e-07,
      "loss": 3.3702,
      "step": 228122
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.945849895477295,
      "learning_rate": 1.449604274251337e-07,
      "loss": 2.7751,
      "step": 228123
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7989747524261475,
      "learning_rate": 1.4483330727133037e-07,
      "loss": 3.0164,
      "step": 228124
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.5294015407562256,
      "learning_rate": 1.4470624286626597e-07,
      "loss": 2.8165,
      "step": 228125
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6979594230651855,
      "learning_rate": 1.4457923420994055e-07,
      "loss": 2.9811,
      "step": 228126
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7004568576812744,
      "learning_rate": 1.4445228130242071e-07,
      "loss": 2.996,
      "step": 228127
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9388816356658936,
      "learning_rate": 1.4432538414367313e-07,
      "loss": 3.0216,
      "step": 228128
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.596287488937378,
      "learning_rate": 1.441985427337977e-07,
      "loss": 2.9604,
      "step": 228129
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2818057537078857,
      "learning_rate": 1.4407175707272788e-07,
      "loss": 2.9664,
      "step": 228130
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9235806465148926,
      "learning_rate": 1.4394502716056354e-07,
      "loss": 2.8258,
      "step": 228131
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.982168197631836,
      "learning_rate": 1.438183529973047e-07,
      "loss": 3.0002,
      "step": 228132
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.845106601715088,
      "learning_rate": 1.4369173458295137e-07,
      "loss": 2.8944,
      "step": 228133
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.645667791366577,
      "learning_rate": 1.435651719175701e-07,
      "loss": 2.9548,
      "step": 228134
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1298434734344482,
      "learning_rate": 1.4343866500116096e-07,
      "loss": 2.7522,
      "step": 228135
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8876473903656006,
      "learning_rate": 1.4331221383372393e-07,
      "loss": 2.955,
      "step": 228136
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.070817470550537,
      "learning_rate": 1.431858184153256e-07,
      "loss": 3.0336,
      "step": 228137
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0546154975891113,
      "learning_rate": 1.430594787459327e-07,
      "loss": 2.9876,
      "step": 228138
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0033745765686035,
      "learning_rate": 1.4293319482564514e-07,
      "loss": 2.908,
      "step": 228139
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9051873683929443,
      "learning_rate": 1.428069666544296e-07,
      "loss": 2.9782,
      "step": 228140
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.7453534603118896,
      "learning_rate": 1.4268079423235268e-07,
      "loss": 3.0655,
      "step": 228141
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.173304796218872,
      "learning_rate": 1.425546775593811e-07,
      "loss": 3.2553,
      "step": 228142
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.5920395851135254,
      "learning_rate": 1.4242861663558146e-07,
      "loss": 2.9104,
      "step": 228143
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.069389581680298,
      "learning_rate": 1.423026114609538e-07,
      "loss": 2.7139,
      "step": 228144
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1195662021636963,
      "learning_rate": 1.4217666203553135e-07,
      "loss": 2.9704,
      "step": 228145
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.717916965484619,
      "learning_rate": 1.4205076835934747e-07,
      "loss": 2.6636,
      "step": 228146
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7231833934783936,
      "learning_rate": 1.4192493043240217e-07,
      "loss": 2.8388,
      "step": 228147
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7554550170898438,
      "learning_rate": 1.41799148254762e-07,
      "loss": 2.9909,
      "step": 228148
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7679176330566406,
      "learning_rate": 1.4167342182639374e-07,
      "loss": 2.8116,
      "step": 228149
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8217179775238037,
      "learning_rate": 1.4154775114736395e-07,
      "loss": 2.9909,
      "step": 228150
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0655574798583984,
      "learning_rate": 1.4142213621767262e-07,
      "loss": 2.6188,
      "step": 228151
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.582345485687256,
      "learning_rate": 1.412965770373531e-07,
      "loss": 3.0932,
      "step": 228152
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.040250062942505,
      "learning_rate": 1.4117107360643865e-07,
      "loss": 2.9704,
      "step": 228153
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.153242349624634,
      "learning_rate": 1.410456259249293e-07,
      "loss": 2.7532,
      "step": 228154
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.133435010910034,
      "learning_rate": 1.4092023399285834e-07,
      "loss": 2.8417,
      "step": 228155
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.875483751296997,
      "learning_rate": 1.407948978102591e-07,
      "loss": 3.1484,
      "step": 228156
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8445332050323486,
      "learning_rate": 1.4066961737713156e-07,
      "loss": 2.9783,
      "step": 228157
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0601003170013428,
      "learning_rate": 1.4054439269354233e-07,
      "loss": 2.7983,
      "step": 228158
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0336246490478516,
      "learning_rate": 1.4041922375945813e-07,
      "loss": 2.8618,
      "step": 228159
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.8712334632873535,
      "learning_rate": 1.4029411057494556e-07,
      "loss": 2.7669,
      "step": 228160
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7117505073547363,
      "learning_rate": 1.401690531400046e-07,
      "loss": 2.7696,
      "step": 228161
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.5596208572387695,
      "learning_rate": 1.4004405145466858e-07,
      "loss": 3.0074,
      "step": 228162
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8046631813049316,
      "learning_rate": 1.399191055189708e-07,
      "loss": 2.8956,
      "step": 228163
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.441443681716919,
      "learning_rate": 1.3979421533291124e-07,
      "loss": 2.8537,
      "step": 228164
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7997655868530273,
      "learning_rate": 1.3966938089652324e-07,
      "loss": 2.7347,
      "step": 228165
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.083484649658203,
      "learning_rate": 1.395446022098068e-07,
      "loss": 3.0077,
      "step": 228166
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.4397072792053223,
      "learning_rate": 1.394198792728618e-07,
      "loss": 2.8356,
      "step": 228167
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7286672592163086,
      "learning_rate": 1.3929521208562167e-07,
      "loss": 2.6856,
      "step": 228168
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.716850996017456,
      "learning_rate": 1.39170600648153e-07,
      "loss": 2.6419,
      "step": 228169
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9259958267211914,
      "learning_rate": 1.390460449604891e-07,
      "loss": 3.0021,
      "step": 228170
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8493778705596924,
      "learning_rate": 1.3892154502263e-07,
      "loss": 2.859,
      "step": 228171
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3489041328430176,
      "learning_rate": 1.3879710083457562e-07,
      "loss": 3.0606,
      "step": 228172
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.5224812030792236,
      "learning_rate": 1.3867271239642598e-07,
      "loss": 2.873,
      "step": 228173
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.022287368774414,
      "learning_rate": 1.3854837970811438e-07,
      "loss": 2.6611,
      "step": 228174
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0456702709198,
      "learning_rate": 1.3842410276974082e-07,
      "loss": 3.0329,
      "step": 228175
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.98071551322937,
      "learning_rate": 1.3829988158127192e-07,
      "loss": 2.8531,
      "step": 228176
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2838780879974365,
      "learning_rate": 1.3817571614277435e-07,
      "loss": 2.8475,
      "step": 228177
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.5893945693969727,
      "learning_rate": 1.3805160645421475e-07,
      "loss": 2.7521,
      "step": 228178
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.456716537475586,
      "learning_rate": 1.379275525156931e-07,
      "loss": 2.9526,
      "step": 228179
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1010167598724365,
      "learning_rate": 1.3780355432717604e-07,
      "loss": 3.3818,
      "step": 228180
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7035434246063232,
      "learning_rate": 1.3767961188869693e-07,
      "loss": 3.1248,
      "step": 228181
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.950803518295288,
      "learning_rate": 1.37555725200289e-07,
      "loss": 2.8286,
      "step": 228182
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8949790000915527,
      "learning_rate": 1.3743189426195235e-07,
      "loss": 2.8741,
      "step": 228183
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8101837635040283,
      "learning_rate": 1.373081190737535e-07,
      "loss": 3.0705,
      "step": 228184
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.000488758087158,
      "learning_rate": 1.371843996356592e-07,
      "loss": 3.0624,
      "step": 228185
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.5131328105926514,
      "learning_rate": 1.3706073594773604e-07,
      "loss": 2.9673,
      "step": 228186
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.94140362739563,
      "learning_rate": 1.3693712801001732e-07,
      "loss": 3.0051,
      "step": 228187
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2941253185272217,
      "learning_rate": 1.3681357582246978e-07,
      "loss": 2.8303,
      "step": 228188
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0087337493896484,
      "learning_rate": 1.3669007938515997e-07,
      "loss": 3.0683,
      "step": 228189
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6513662338256836,
      "learning_rate": 1.3656663869812124e-07,
      "loss": 3.0649,
      "step": 228190
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0758683681488037,
      "learning_rate": 1.3644325376132027e-07,
      "loss": 2.7903,
      "step": 228191
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.826564073562622,
      "learning_rate": 1.3631992457482367e-07,
      "loss": 2.8606,
      "step": 228192
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.6225647926330566,
      "learning_rate": 1.3619665113866474e-07,
      "loss": 2.9154,
      "step": 228193
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.747454881668091,
      "learning_rate": 1.360734334528435e-07,
      "loss": 2.8741,
      "step": 228194
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.110265016555786,
      "learning_rate": 1.3595027151735994e-07,
      "loss": 3.0214,
      "step": 228195
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.4196460247039795,
      "learning_rate": 1.3582716533228065e-07,
      "loss": 2.9102,
      "step": 228196
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.515267848968506,
      "learning_rate": 1.3570411489760568e-07,
      "loss": 2.998,
      "step": 228197
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6330273151397705,
      "learning_rate": 1.3558112021340163e-07,
      "loss": 3.0059,
      "step": 228198
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0353899002075195,
      "learning_rate": 1.3545818127960183e-07,
      "loss": 3.09,
      "step": 228199
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8653132915496826,
      "learning_rate": 1.3533529809630628e-07,
      "loss": 3.0405,
      "step": 228200
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9047250747680664,
      "learning_rate": 1.3521247066351494e-07,
      "loss": 2.9346,
      "step": 228201
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.818742275238037,
      "learning_rate": 1.3508969898122779e-07,
      "loss": 2.9302,
      "step": 228202
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.095313310623169,
      "learning_rate": 1.3496698304951147e-07,
      "loss": 3.1088,
      "step": 228203
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.068992853164673,
      "learning_rate": 1.34844322868366e-07,
      "loss": 2.8981,
      "step": 228204
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9965693950653076,
      "learning_rate": 1.3472171843779132e-07,
      "loss": 2.964,
      "step": 228205
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6399805545806885,
      "learning_rate": 1.3459916975782082e-07,
      "loss": 2.8401,
      "step": 228206
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8426482677459717,
      "learning_rate": 1.3447667682852103e-07,
      "loss": 2.9266,
      "step": 228207
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.079989433288574,
      "learning_rate": 1.34354239649892e-07,
      "loss": 2.8527,
      "step": 228208
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1339974403381348,
      "learning_rate": 1.3423185822190042e-07,
      "loss": 2.9263,
      "step": 228209
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.479893684387207,
      "learning_rate": 1.3410953254464618e-07,
      "loss": 2.7534,
      "step": 228210
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1545865535736084,
      "learning_rate": 1.339872626181293e-07,
      "loss": 2.9111,
      "step": 228211
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.477649450302124,
      "learning_rate": 1.338650484423498e-07,
      "loss": 3.0562,
      "step": 228212
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.478567123413086,
      "learning_rate": 1.3374289001734094e-07,
      "loss": 3.0597,
      "step": 228213
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9744017124176025,
      "learning_rate": 1.336207873431361e-07,
      "loss": 3.2302,
      "step": 228214
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2489943504333496,
      "learning_rate": 1.334987404197685e-07,
      "loss": 2.7967,
      "step": 228215
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.217477321624756,
      "learning_rate": 1.3337674924720488e-07,
      "loss": 2.8854,
      "step": 228216
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.911506175994873,
      "learning_rate": 1.3325481382554515e-07,
      "loss": 3.0343,
      "step": 228217
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.4946558475494385,
      "learning_rate": 1.3313293415475602e-07,
      "loss": 2.8474,
      "step": 228218
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.590134382247925,
      "learning_rate": 1.3301111023487078e-07,
      "loss": 2.8672,
      "step": 228219
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.892033338546753,
      "learning_rate": 1.328893420659227e-07,
      "loss": 2.8009,
      "step": 228220
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0231642723083496,
      "learning_rate": 1.3276762964794518e-07,
      "loss": 2.94,
      "step": 228221
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.259855270385742,
      "learning_rate": 1.3264597298093815e-07,
      "loss": 2.9602,
      "step": 228222
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.459958076477051,
      "learning_rate": 1.3252437206493492e-07,
      "loss": 3.0064,
      "step": 228223
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7426016330718994,
      "learning_rate": 1.3240282689993553e-07,
      "loss": 2.8403,
      "step": 228224
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.241227388381958,
      "learning_rate": 1.3228133748600656e-07,
      "loss": 3.0079,
      "step": 228225
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.498070240020752,
      "learning_rate": 1.32159903823148e-07,
      "loss": 2.8462,
      "step": 228226
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.4310829639434814,
      "learning_rate": 1.320385259113932e-07,
      "loss": 3.0759,
      "step": 228227
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.883077383041382,
      "learning_rate": 1.319172037507088e-07,
      "loss": 3.0242,
      "step": 228228
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8429763317108154,
      "learning_rate": 1.3179593734119475e-07,
      "loss": 2.7114,
      "step": 228229
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.051225185394287,
      "learning_rate": 1.3167472668285107e-07,
      "loss": 2.7965,
      "step": 228230
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7907116413116455,
      "learning_rate": 1.315535717756444e-07,
      "loss": 2.6809,
      "step": 228231
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0507874488830566,
      "learning_rate": 1.3143247261967472e-07,
      "loss": 3.1445,
      "step": 228232
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8180453777313232,
      "learning_rate": 1.3131142921494197e-07,
      "loss": 3.1882,
      "step": 228233
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.881526470184326,
      "learning_rate": 1.311904415614462e-07,
      "loss": 3.0131,
      "step": 228234
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.276402235031128,
      "learning_rate": 1.310695096592207e-07,
      "loss": 3.0125,
      "step": 228235
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.7748358249664307,
      "learning_rate": 1.3094863350826546e-07,
      "loss": 2.9756,
      "step": 228236
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.716547966003418,
      "learning_rate": 1.3082781310864708e-07,
      "loss": 2.784,
      "step": 228237
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.846987247467041,
      "learning_rate": 1.3070704846039893e-07,
      "loss": 3.1403,
      "step": 228238
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.478642702102661,
      "learning_rate": 1.3058633956345432e-07,
      "loss": 3.0564,
      "step": 228239
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.246673583984375,
      "learning_rate": 1.3046568641794651e-07,
      "loss": 2.9558,
      "step": 228240
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.8031539916992188,
      "learning_rate": 1.303450890238089e-07,
      "loss": 2.8763,
      "step": 228241
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.6084353923797607,
      "learning_rate": 1.3022454738114141e-07,
      "loss": 2.9908,
      "step": 228242
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.002955436706543,
      "learning_rate": 1.301040614898774e-07,
      "loss": 2.7524,
      "step": 228243
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2190308570861816,
      "learning_rate": 1.299836313501168e-07,
      "loss": 2.8911,
      "step": 228244
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.874598503112793,
      "learning_rate": 1.2986325696185962e-07,
      "loss": 3.0223,
      "step": 228245
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.930103063583374,
      "learning_rate": 1.2974293832510584e-07,
      "loss": 2.857,
      "step": 228246
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2253079414367676,
      "learning_rate": 1.296226754398888e-07,
      "loss": 2.9057,
      "step": 228247
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.122330665588379,
      "learning_rate": 1.295024683062418e-07,
      "loss": 2.7846,
      "step": 228248
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.715331554412842,
      "learning_rate": 1.293823169241648e-07,
      "loss": 2.8514,
      "step": 228249
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1948299407958984,
      "learning_rate": 1.2926222129372444e-07,
      "loss": 2.9465,
      "step": 228250
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.109158039093018,
      "learning_rate": 1.2914218141492073e-07,
      "loss": 2.9648,
      "step": 228251
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.4618632793426514,
      "learning_rate": 1.2902219728775364e-07,
      "loss": 2.984,
      "step": 228252
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.113595485687256,
      "learning_rate": 1.2890226891225651e-07,
      "loss": 2.905,
      "step": 228253
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.183215618133545,
      "learning_rate": 1.2878239628846266e-07,
      "loss": 2.8392,
      "step": 228254
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.442138433456421,
      "learning_rate": 1.2866257941637204e-07,
      "loss": 2.9681,
      "step": 228255
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.048006057739258,
      "learning_rate": 1.285428182960513e-07,
      "loss": 2.7459,
      "step": 228256
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.940068244934082,
      "learning_rate": 1.284231129274671e-07,
      "loss": 2.9824,
      "step": 228257
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.823946237564087,
      "learning_rate": 1.283034633106861e-07,
      "loss": 2.8741,
      "step": 228258
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1757454872131348,
      "learning_rate": 1.2818386944570824e-07,
      "loss": 3.1402,
      "step": 228259
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.6774165630340576,
      "learning_rate": 1.2806433133256688e-07,
      "loss": 2.7141,
      "step": 228260
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.983881950378418,
      "learning_rate": 1.279448489712953e-07,
      "loss": 2.7693,
      "step": 228261
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2376580238342285,
      "learning_rate": 1.2782542236186022e-07,
      "loss": 2.8424,
      "step": 228262
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8435044288635254,
      "learning_rate": 1.2770605150436153e-07,
      "loss": 2.3411,
      "step": 228263
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.032836437225342,
      "learning_rate": 1.2758673639876594e-07,
      "loss": 3.1537,
      "step": 228264
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.789935827255249,
      "learning_rate": 1.2746747704510672e-07,
      "loss": 2.9872,
      "step": 228265
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2197084426879883,
      "learning_rate": 1.2734827344341724e-07,
      "loss": 2.9694,
      "step": 228266
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7286603450775146,
      "learning_rate": 1.2722912559369747e-07,
      "loss": 2.9843,
      "step": 228267
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.5498104095458984,
      "learning_rate": 1.2711003349601402e-07,
      "loss": 3.1302,
      "step": 228268
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8711092472076416,
      "learning_rate": 1.2699099715036687e-07,
      "loss": 3.2166,
      "step": 228269
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.251688003540039,
      "learning_rate": 1.2687201655675606e-07,
      "loss": 3.0671,
      "step": 228270
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.717695951461792,
      "learning_rate": 1.2675309171521486e-07,
      "loss": 2.8882,
      "step": 228271
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.052165985107422,
      "learning_rate": 1.266342226257766e-07,
      "loss": 2.8328,
      "step": 228272
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.194704294204712,
      "learning_rate": 1.265154092884413e-07,
      "loss": 2.8194,
      "step": 228273
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.118255615234375,
      "learning_rate": 1.2639665170327552e-07,
      "loss": 2.8927,
      "step": 228274
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8606176376342773,
      "learning_rate": 1.2627794987024598e-07,
      "loss": 3.1251,
      "step": 228275
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.004246473312378,
      "learning_rate": 1.261593037894193e-07,
      "loss": 2.8749,
      "step": 228276
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.140594005584717,
      "learning_rate": 1.2604071346079548e-07,
      "loss": 2.8953,
      "step": 228277
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1826138496398926,
      "learning_rate": 1.2592217888440782e-07,
      "loss": 2.9611,
      "step": 228278
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.150367259979248,
      "learning_rate": 1.258037000602896e-07,
      "loss": 3.0486,
      "step": 228279
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8591701984405518,
      "learning_rate": 1.256852769884076e-07,
      "loss": 2.8479,
      "step": 228280
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.038787364959717,
      "learning_rate": 1.2556690966882832e-07,
      "loss": 2.8982,
      "step": 228281
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6650187969207764,
      "learning_rate": 1.2544859810158514e-07,
      "loss": 2.9967,
      "step": 228282
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1448171138763428,
      "learning_rate": 1.2533034228667803e-07,
      "loss": 2.8875,
      "step": 228283
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.162848472595215,
      "learning_rate": 1.2521214222410703e-07,
      "loss": 2.8972,
      "step": 228284
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.8904876708984375,
      "learning_rate": 1.2509399791393871e-07,
      "loss": 2.8667,
      "step": 228285
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.8724236488342285,
      "learning_rate": 1.2497590935620637e-07,
      "loss": 2.9558,
      "step": 228286
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.754920482635498,
      "learning_rate": 1.2485787655084344e-07,
      "loss": 2.9766,
      "step": 228287
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.155423879623413,
      "learning_rate": 1.2473989949798313e-07,
      "loss": 2.9428,
      "step": 228288
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.847259998321533,
      "learning_rate": 1.246219781975588e-07,
      "loss": 2.7782,
      "step": 228289
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.760589122772217,
      "learning_rate": 1.245041126496371e-07,
      "loss": 3.2046,
      "step": 228290
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.061701774597168,
      "learning_rate": 1.2438630285421803e-07,
      "loss": 3.1027,
      "step": 228291
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.561546564102173,
      "learning_rate": 1.2426854881136816e-07,
      "loss": 3.0919,
      "step": 228292
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.190819501876831,
      "learning_rate": 1.2415085052105422e-07,
      "loss": 2.9104,
      "step": 228293
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9886059761047363,
      "learning_rate": 1.240332079833095e-07,
      "loss": 2.8899,
      "step": 228294
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.668485403060913,
      "learning_rate": 1.2391562119820064e-07,
      "loss": 3.0264,
      "step": 228295
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.027601480484009,
      "learning_rate": 1.237980901656943e-07,
      "loss": 3.0557,
      "step": 228296
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8781089782714844,
      "learning_rate": 1.236806148858571e-07,
      "loss": 2.7551,
      "step": 228297
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.297673463821411,
      "learning_rate": 1.2356319535865577e-07,
      "loss": 2.7481,
      "step": 228298
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.683281421661377,
      "learning_rate": 1.2344583158415688e-07,
      "loss": 2.7466,
      "step": 228299
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.025749683380127,
      "learning_rate": 1.2332852356239377e-07,
      "loss": 2.8239,
      "step": 228300
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0588464736938477,
      "learning_rate": 1.2321127129333308e-07,
      "loss": 2.9801,
      "step": 228301
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.194253444671631,
      "learning_rate": 1.2309407477704148e-07,
      "loss": 3.2948,
      "step": 228302
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8595199584960938,
      "learning_rate": 1.2297693401351894e-07,
      "loss": 2.9026,
      "step": 228303
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.19088077545166,
      "learning_rate": 1.2285984900279877e-07,
      "loss": 3.0485,
      "step": 228304
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6939775943756104,
      "learning_rate": 1.227428197449143e-07,
      "loss": 2.8772,
      "step": 228305
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.362910747528076,
      "learning_rate": 1.226258462398655e-07,
      "loss": 2.9495,
      "step": 228306
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0774991512298584,
      "learning_rate": 1.2250892848765236e-07,
      "loss": 2.8169,
      "step": 228307
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.46738600730896,
      "learning_rate": 1.2239206648837485e-07,
      "loss": 2.8677,
      "step": 228308
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2347402572631836,
      "learning_rate": 1.2227526024196634e-07,
      "loss": 2.8899,
      "step": 228309
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.032240152359009,
      "learning_rate": 1.221585097484934e-07,
      "loss": 3.0656,
      "step": 228310
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.571875810623169,
      "learning_rate": 1.2204181500798937e-07,
      "loss": 2.6868,
      "step": 228311
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9170315265655518,
      "learning_rate": 1.2192517602045426e-07,
      "loss": 2.9241,
      "step": 228312
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.605057716369629,
      "learning_rate": 1.2180859278588805e-07,
      "loss": 3.076,
      "step": 228313
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.121570110321045,
      "learning_rate": 1.216920653043574e-07,
      "loss": 2.669,
      "step": 228314
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.390239715576172,
      "learning_rate": 1.2157559357586221e-07,
      "loss": 2.8623,
      "step": 228315
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.273568868637085,
      "learning_rate": 1.2145917760043588e-07,
      "loss": 2.812,
      "step": 228316
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.757485866546631,
      "learning_rate": 1.213428173781117e-07,
      "loss": 2.8908,
      "step": 228317
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.716313123703003,
      "learning_rate": 1.2122651290885633e-07,
      "loss": 2.9454,
      "step": 228318
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.858898878097534,
      "learning_rate": 1.211102641927364e-07,
      "loss": 3.0191,
      "step": 228319
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1932804584503174,
      "learning_rate": 1.2099407122975191e-07,
      "loss": 2.8432,
      "step": 228320
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.880237340927124,
      "learning_rate": 1.2087793401993617e-07,
      "loss": 3.0575,
      "step": 228321
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.782994270324707,
      "learning_rate": 1.207618525633225e-07,
      "loss": 2.9049,
      "step": 228322
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.100135087966919,
      "learning_rate": 1.2064582685991088e-07,
      "loss": 2.8265,
      "step": 228323
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2435221672058105,
      "learning_rate": 1.205298569097346e-07,
      "loss": 2.9125,
      "step": 228324
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0345418453216553,
      "learning_rate": 1.20413942712827e-07,
      "loss": 2.8782,
      "step": 228325
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3167316913604736,
      "learning_rate": 1.2029808426915477e-07,
      "loss": 3.0157,
      "step": 228326
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.8565940856933594,
      "learning_rate": 1.2018228157881782e-07,
      "loss": 2.8736,
      "step": 228327
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7635228633880615,
      "learning_rate": 1.2006653464178285e-07,
      "loss": 2.852,
      "step": 228328
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9826340675354004,
      "learning_rate": 1.1995084345811645e-07,
      "loss": 2.6681,
      "step": 228329
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.758704423904419,
      "learning_rate": 1.1983520802778535e-07,
      "loss": 2.8807,
      "step": 228330
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.6086835861206055,
      "learning_rate": 1.1971962835082283e-07,
      "loss": 3.2344,
      "step": 228331
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.802471876144409,
      "learning_rate": 1.196041044272955e-07,
      "loss": 3.0434,
      "step": 228332
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3079428672790527,
      "learning_rate": 1.1948863625717008e-07,
      "loss": 3.065,
      "step": 228333
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.373971939086914,
      "learning_rate": 1.1937322384051318e-07,
      "loss": 3.1243,
      "step": 228334
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1120924949645996,
      "learning_rate": 1.1925786717732477e-07,
      "loss": 3.0535,
      "step": 228335
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.897303342819214,
      "learning_rate": 1.1914256626760488e-07,
      "loss": 2.788,
      "step": 228336
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.6458346843719482,
      "learning_rate": 1.190273211114201e-07,
      "loss": 3.0603,
      "step": 228337
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9347453117370605,
      "learning_rate": 1.1891213170873715e-07,
      "loss": 2.9725,
      "step": 228338
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8396594524383545,
      "learning_rate": 1.1879699805965593e-07,
      "loss": 2.8476,
      "step": 228339
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.528475522994995,
      "learning_rate": 1.1868192016410982e-07,
      "loss": 2.8451,
      "step": 228340
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.04845929145813,
      "learning_rate": 1.1856689802219877e-07,
      "loss": 2.9732,
      "step": 228341
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.4342477321624756,
      "learning_rate": 1.1845193163388944e-07,
      "loss": 2.8517,
      "step": 228342
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2828614711761475,
      "learning_rate": 1.1833702099921516e-07,
      "loss": 2.8389,
      "step": 228343
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.649705171585083,
      "learning_rate": 1.1822216611820922e-07,
      "loss": 2.9013,
      "step": 228344
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8284194469451904,
      "learning_rate": 1.1810736699087164e-07,
      "loss": 3.0094,
      "step": 228345
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0119118690490723,
      "learning_rate": 1.1799262361723571e-07,
      "loss": 2.9441,
      "step": 228346
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.938236951828003,
      "learning_rate": 1.1787793599733476e-07,
      "loss": 2.9025,
      "step": 228347
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.646876573562622,
      "learning_rate": 1.1776330413120205e-07,
      "loss": 3.2995,
      "step": 228348
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3447301387786865,
      "learning_rate": 1.1764872801883762e-07,
      "loss": 3.1862,
      "step": 228349
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.151995897293091,
      "learning_rate": 1.1753420766024147e-07,
      "loss": 2.8623,
      "step": 228350
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1795661449432373,
      "learning_rate": 1.1741974305544688e-07,
      "loss": 2.7999,
      "step": 228351
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.862501621246338,
      "learning_rate": 1.1730533420448717e-07,
      "loss": 2.9814,
      "step": 228352
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2276172637939453,
      "learning_rate": 1.1719098110739567e-07,
      "loss": 2.8546,
      "step": 228353
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.247555732727051,
      "learning_rate": 1.1707668376420564e-07,
      "loss": 2.8836,
      "step": 228354
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8785386085510254,
      "learning_rate": 1.1696244217488382e-07,
      "loss": 2.8496,
      "step": 228355
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3539538383483887,
      "learning_rate": 1.1684825633946349e-07,
      "loss": 2.9517,
      "step": 228356
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.783867835998535,
      "learning_rate": 1.1673412625801126e-07,
      "loss": 2.8648,
      "step": 228357
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9557201862335205,
      "learning_rate": 1.1662005193049384e-07,
      "loss": 3.0487,
      "step": 228358
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.109978199005127,
      "learning_rate": 1.1650603335697784e-07,
      "loss": 2.8432,
      "step": 228359
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.791841506958008,
      "learning_rate": 1.1639207053742994e-07,
      "loss": 2.7465,
      "step": 228360
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.074798107147217,
      "learning_rate": 1.1627816347195007e-07,
      "loss": 2.5578,
      "step": 228361
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3255715370178223,
      "learning_rate": 1.1616431216050492e-07,
      "loss": 2.7419,
      "step": 228362
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.942560911178589,
      "learning_rate": 1.160505166030945e-07,
      "loss": 2.8741,
      "step": 228363
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.06730318069458,
      "learning_rate": 1.1593677679978542e-07,
      "loss": 2.7375,
      "step": 228364
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.5765528678894043,
      "learning_rate": 1.1582309275061097e-07,
      "loss": 2.7062,
      "step": 228365
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7198374271392822,
      "learning_rate": 1.1570946445553786e-07,
      "loss": 2.8149,
      "step": 228366
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.710312604904175,
      "learning_rate": 1.155958919146327e-07,
      "loss": 2.8599,
      "step": 228367
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.288379669189453,
      "learning_rate": 1.1548237512789549e-07,
      "loss": 2.983,
      "step": 228368
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.373133420944214,
      "learning_rate": 1.1536891409532622e-07,
      "loss": 3.0098,
      "step": 228369
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.566234588623047,
      "learning_rate": 1.1525550881699153e-07,
      "loss": 2.9723,
      "step": 228370
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6716716289520264,
      "learning_rate": 1.1514215929289139e-07,
      "loss": 3.1499,
      "step": 228371
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.129572629928589,
      "learning_rate": 1.1502886552305913e-07,
      "loss": 2.9629,
      "step": 228372
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9101758003234863,
      "learning_rate": 1.1491562750746141e-07,
      "loss": 3.0089,
      "step": 228373
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7533669471740723,
      "learning_rate": 1.148024452461982e-07,
      "loss": 3.1628,
      "step": 228374
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1061017513275146,
      "learning_rate": 1.1468931873923615e-07,
      "loss": 2.8522,
      "step": 228375
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1848111152648926,
      "learning_rate": 1.1457624798664189e-07,
      "loss": 2.8416,
      "step": 228376
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8034164905548096,
      "learning_rate": 1.1446323298838212e-07,
      "loss": 2.7562,
      "step": 228377
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7107620239257812,
      "learning_rate": 1.1435027374452343e-07,
      "loss": 2.7463,
      "step": 228378
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.081105947494507,
      "learning_rate": 1.1423737025506585e-07,
      "loss": 2.7852,
      "step": 228379
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.842546224594116,
      "learning_rate": 1.1412452252000937e-07,
      "loss": 3.0589,
      "step": 228380
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9357402324676514,
      "learning_rate": 1.1401173053942059e-07,
      "loss": 3.0843,
      "step": 228381
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9465622901916504,
      "learning_rate": 1.1389899431329953e-07,
      "loss": 2.9941,
      "step": 228382
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.046787738800049,
      "learning_rate": 1.1378631384164616e-07,
      "loss": 3.0421,
      "step": 228383
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7151730060577393,
      "learning_rate": 1.1367368912449382e-07,
      "loss": 2.8401,
      "step": 228384
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9574129581451416,
      "learning_rate": 1.1356112016190911e-07,
      "loss": 2.7916,
      "step": 228385
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7893075942993164,
      "learning_rate": 1.134486069538254e-07,
      "loss": 2.7528,
      "step": 228386
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.6348471641540527,
      "learning_rate": 1.1333614950034264e-07,
      "loss": 2.8211,
      "step": 228387
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.6035454273223877,
      "learning_rate": 1.1322374780146082e-07,
      "loss": 2.7897,
      "step": 228388
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9556891918182373,
      "learning_rate": 1.1311140185717992e-07,
      "loss": 2.9616,
      "step": 228389
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3490142822265625,
      "learning_rate": 1.1299911166753328e-07,
      "loss": 2.8896,
      "step": 228390
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.054988384246826,
      "learning_rate": 1.1288687723252088e-07,
      "loss": 2.9545,
      "step": 228391
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8488426208496094,
      "learning_rate": 1.1277469855220933e-07,
      "loss": 3.0299,
      "step": 228392
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.08483624458313,
      "learning_rate": 1.1266257562659863e-07,
      "loss": 3.0562,
      "step": 228393
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3584651947021484,
      "learning_rate": 1.125505084556888e-07,
      "loss": 2.755,
      "step": 228394
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.883624315261841,
      "learning_rate": 1.1243849703951313e-07,
      "loss": 2.985,
      "step": 228395
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0165719985961914,
      "learning_rate": 1.1232654137810493e-07,
      "loss": 2.7994,
      "step": 228396
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.839376926422119,
      "learning_rate": 1.122146414714642e-07,
      "loss": 2.565,
      "step": 228397
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7818188667297363,
      "learning_rate": 1.1210279731962424e-07,
      "loss": 2.9261,
      "step": 228398
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.17793345451355,
      "learning_rate": 1.1199100892261836e-07,
      "loss": 2.8966,
      "step": 228399
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7600159645080566,
      "learning_rate": 1.1187927628044657e-07,
      "loss": 2.9183,
      "step": 228400
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2576305866241455,
      "learning_rate": 1.1176759939314218e-07,
      "loss": 2.8201,
      "step": 228401
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.910288095474243,
      "learning_rate": 1.1165597826073846e-07,
      "loss": 2.8768,
      "step": 228402
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7974677085876465,
      "learning_rate": 1.1154441288320215e-07,
      "loss": 3.0931,
      "step": 228403
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.816129446029663,
      "learning_rate": 1.1143290326063314e-07,
      "loss": 2.6784,
      "step": 228404
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.779818058013916,
      "learning_rate": 1.1132144939296484e-07,
      "loss": 2.8766,
      "step": 228405
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.929842472076416,
      "learning_rate": 1.1121005128029714e-07,
      "loss": 2.9708,
      "step": 228406
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.17514705657959,
      "learning_rate": 1.1109870892259676e-07,
      "loss": 2.8877,
      "step": 228407
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.87923002243042,
      "learning_rate": 1.10987422319897e-07,
      "loss": 2.978,
      "step": 228408
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.4131340980529785,
      "learning_rate": 1.1087619147223115e-07,
      "loss": 3.0168,
      "step": 228409
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.424910306930542,
      "learning_rate": 1.1076501637963254e-07,
      "loss": 2.6291,
      "step": 228410
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.05556583404541,
      "learning_rate": 1.1065389704210115e-07,
      "loss": 2.9483,
      "step": 228411
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.794405460357666,
      "learning_rate": 1.1054283345963699e-07,
      "loss": 2.912,
      "step": 228412
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9492733478546143,
      "learning_rate": 1.1043182563230669e-07,
      "loss": 2.9037,
      "step": 228413
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.4462642669677734,
      "learning_rate": 1.103208735600769e-07,
      "loss": 2.9101,
      "step": 228414
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.9582912921905518,
      "learning_rate": 1.1020997724304758e-07,
      "loss": 2.9218,
      "step": 228415
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0379691123962402,
      "learning_rate": 1.1009913668115211e-07,
      "loss": 2.9361,
      "step": 228416
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.231928586959839,
      "learning_rate": 1.0998835187445709e-07,
      "loss": 2.7073,
      "step": 228417
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.579829692840576,
      "learning_rate": 1.0987762282299584e-07,
      "loss": 2.9839,
      "step": 228418
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.224463939666748,
      "learning_rate": 1.0976694952673504e-07,
      "loss": 3.1292,
      "step": 228419
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.853771924972534,
      "learning_rate": 1.0965633198574131e-07,
      "loss": 3.0298,
      "step": 228420
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0017189979553223,
      "learning_rate": 1.0954577020004796e-07,
      "loss": 2.9185,
      "step": 228421
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7616841793060303,
      "learning_rate": 1.0943526416962167e-07,
      "loss": 2.9897,
      "step": 228422
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8295373916625977,
      "learning_rate": 1.0932481389452907e-07,
      "loss": 2.8194,
      "step": 228423
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9587361812591553,
      "learning_rate": 1.0921441937477016e-07,
      "loss": 2.9698,
      "step": 228424
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.5535471439361572,
      "learning_rate": 1.0910408061037824e-07,
      "loss": 2.9895,
      "step": 228425
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2518532276153564,
      "learning_rate": 1.089937976013533e-07,
      "loss": 2.7986,
      "step": 228426
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.864849328994751,
      "learning_rate": 1.0888357034772865e-07,
      "loss": 2.9174,
      "step": 228427
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8790462017059326,
      "learning_rate": 1.0877339884950432e-07,
      "loss": 2.8692,
      "step": 228428
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2981882095336914,
      "learning_rate": 1.0866328310674688e-07,
      "loss": 2.9412,
      "step": 228429
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.4316515922546387,
      "learning_rate": 1.0855322311945636e-07,
      "loss": 2.9375,
      "step": 228430
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.356280565261841,
      "learning_rate": 1.0844321888763274e-07,
      "loss": 2.9727,
      "step": 228431
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9422109127044678,
      "learning_rate": 1.0833327041130935e-07,
      "loss": 3.0026,
      "step": 228432
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.5973031520843506,
      "learning_rate": 1.0822337769048616e-07,
      "loss": 2.8017,
      "step": 228433
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.54962420463562,
      "learning_rate": 1.0811354072522982e-07,
      "loss": 2.6295,
      "step": 228434
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.538792371749878,
      "learning_rate": 1.080037595155403e-07,
      "loss": 2.8233,
      "step": 228435
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1377694606781006,
      "learning_rate": 1.0789403406141761e-07,
      "loss": 2.9802,
      "step": 228436
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3419125080108643,
      "learning_rate": 1.0778436436292837e-07,
      "loss": 2.8522,
      "step": 228437
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8777875900268555,
      "learning_rate": 1.0767475042003925e-07,
      "loss": 2.9549,
      "step": 228438
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8300538063049316,
      "learning_rate": 1.075651922327836e-07,
      "loss": 2.9568,
      "step": 228439
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2650537490844727,
      "learning_rate": 1.0745568980122799e-07,
      "loss": 2.7512,
      "step": 228440
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9254813194274902,
      "learning_rate": 1.0734624312530581e-07,
      "loss": 2.9515,
      "step": 228441
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.655864715576172,
      "learning_rate": 1.0723685220511702e-07,
      "loss": 2.7313,
      "step": 228442
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.639925718307495,
      "learning_rate": 1.0712751704066158e-07,
      "loss": 3.0447,
      "step": 228443
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9476473331451416,
      "learning_rate": 1.070182376319395e-07,
      "loss": 3.0911,
      "step": 228444
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.0398480892181396,
      "learning_rate": 1.0690901397901741e-07,
      "loss": 3.248,
      "step": 228445
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.2458980083465576,
      "learning_rate": 1.0679984608182868e-07,
      "loss": 3.0511,
      "step": 228446
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.258521318435669,
      "learning_rate": 1.0669073394047322e-07,
      "loss": 2.8843,
      "step": 228447
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.3360822200775146,
      "learning_rate": 1.0658167755495107e-07,
      "loss": 2.9871,
      "step": 228448
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.5249722003936768,
      "learning_rate": 1.0647267692526218e-07,
      "loss": 3.0379,
      "step": 228449
    },
    {
      "epoch": 2.97,
      "grad_norm": 5.832726955413818,
      "learning_rate": 1.0636373205147319e-07,
      "loss": 2.9493,
      "step": 228450
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.529489517211914,
      "learning_rate": 1.062548429335508e-07,
      "loss": 2.8639,
      "step": 228451
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.878405809402466,
      "learning_rate": 1.061460095715283e-07,
      "loss": 3.0643,
      "step": 228452
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.920186996459961,
      "learning_rate": 1.0603723196543901e-07,
      "loss": 3.0918,
      "step": 228453
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.950655460357666,
      "learning_rate": 1.0592851011528292e-07,
      "loss": 2.7587,
      "step": 228454
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.938401699066162,
      "learning_rate": 1.0581984402112664e-07,
      "loss": 2.8318,
      "step": 228455
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.5402095317840576,
      "learning_rate": 1.0571123368293688e-07,
      "loss": 3.0981,
      "step": 228456
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8774683475494385,
      "learning_rate": 1.0560267910074693e-07,
      "loss": 2.753,
      "step": 228457
    },
    {
      "epoch": 2.97,
      "grad_norm": 4.180442810058594,
      "learning_rate": 1.0549418027462342e-07,
      "loss": 2.9541,
      "step": 228458
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.993687391281128,
      "learning_rate": 1.0538573720453303e-07,
      "loss": 3.1506,
      "step": 228459
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.592916488647461,
      "learning_rate": 1.0527734989047576e-07,
      "loss": 2.7601,
      "step": 228460
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7516815662384033,
      "learning_rate": 1.0516901833255153e-07,
      "loss": 2.9185,
      "step": 228461
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.5952298641204834,
      "learning_rate": 1.0506074253072704e-07,
      "loss": 2.9171,
      "step": 228462
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7987842559814453,
      "learning_rate": 1.049525224850356e-07,
      "loss": 3.0198,
      "step": 228463
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1676883697509766,
      "learning_rate": 1.048443581954772e-07,
      "loss": 3.0037,
      "step": 228464
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.168017864227295,
      "learning_rate": 1.0473624966211846e-07,
      "loss": 3.1072,
      "step": 228465
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9636330604553223,
      "learning_rate": 1.0462819688492608e-07,
      "loss": 2.6483,
      "step": 228466
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7419276237487793,
      "learning_rate": 1.0452019986396665e-07,
      "loss": 2.8207,
      "step": 228467
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.181070327758789,
      "learning_rate": 1.0441225859920687e-07,
      "loss": 2.936,
      "step": 228468
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.118175506591797,
      "learning_rate": 1.0430437309071337e-07,
      "loss": 3.0286,
      "step": 228469
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.816390037536621,
      "learning_rate": 1.0419654333851946e-07,
      "loss": 2.894,
      "step": 228470
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9281585216522217,
      "learning_rate": 1.040887693425585e-07,
      "loss": 3.1267,
      "step": 228471
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1609137058258057,
      "learning_rate": 1.0398105110296373e-07,
      "loss": 2.9037,
      "step": 228472
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.537095785140991,
      "learning_rate": 1.0387338861966854e-07,
      "loss": 2.9883,
      "step": 228473
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.716212511062622,
      "learning_rate": 1.0376578189273954e-07,
      "loss": 2.9056,
      "step": 228474
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.1419684886932373,
      "learning_rate": 1.0365823092214342e-07,
      "loss": 2.926,
      "step": 228475
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.9413890838623047,
      "learning_rate": 1.0355073570798012e-07,
      "loss": 3.0846,
      "step": 228476
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.8708887100219727,
      "learning_rate": 1.034432962502163e-07,
      "loss": 3.0262,
      "step": 228477
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.7586591243743896,
      "learning_rate": 1.033359125488853e-07,
      "loss": 2.9819,
      "step": 228478
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.4431817531585693,
      "learning_rate": 1.0322858460398709e-07,
      "loss": 3.1562,
      "step": 228479
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.976673126220703,
      "learning_rate": 1.0312131241558829e-07,
      "loss": 2.9786,
      "step": 228480
    },
    {
      "epoch": 2.97,
      "grad_norm": 2.725524425506592,
      "learning_rate": 1.0301409598365562e-07,
      "loss": 2.7681,
      "step": 228481
    },
    {
      "epoch": 2.97,
      "grad_norm": 3.495427370071411,
      "learning_rate": 1.0290693530825567e-07,
      "loss": 3.0155,
      "step": 228482
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0205750465393066,
      "learning_rate": 1.0279983038938844e-07,
      "loss": 2.8804,
      "step": 228483
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.5598912239074707,
      "learning_rate": 1.0269278122705394e-07,
      "loss": 2.8599,
      "step": 228484
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1977293491363525,
      "learning_rate": 1.0258578782128546e-07,
      "loss": 3.0583,
      "step": 228485
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9419665336608887,
      "learning_rate": 1.0247885017211633e-07,
      "loss": 2.7052,
      "step": 228486
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8098909854888916,
      "learning_rate": 1.0237196827954652e-07,
      "loss": 2.7685,
      "step": 228487
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1942131519317627,
      "learning_rate": 1.0226514214364267e-07,
      "loss": 2.7762,
      "step": 228488
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2545998096466064,
      "learning_rate": 1.0215837176433817e-07,
      "loss": 3.0892,
      "step": 228489
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7434241771698,
      "learning_rate": 1.0205165714173292e-07,
      "loss": 3.0142,
      "step": 228490
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.803774356842041,
      "learning_rate": 1.0194499827582692e-07,
      "loss": 3.085,
      "step": 228491
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.95833158493042,
      "learning_rate": 1.0183839516662017e-07,
      "loss": 2.9963,
      "step": 228492
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.072437286376953,
      "learning_rate": 1.0173184781414601e-07,
      "loss": 2.7012,
      "step": 228493
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.5637929439544678,
      "learning_rate": 1.016253562184044e-07,
      "loss": 2.9311,
      "step": 228494
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.748872756958008,
      "learning_rate": 1.0151892037942866e-07,
      "loss": 2.7991,
      "step": 228495
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.193535327911377,
      "learning_rate": 1.014125402972521e-07,
      "loss": 2.905,
      "step": 228496
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.148263692855835,
      "learning_rate": 1.0130621597190802e-07,
      "loss": 2.7404,
      "step": 228497
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3457345962524414,
      "learning_rate": 1.0119994740336312e-07,
      "loss": 2.8706,
      "step": 228498
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8571717739105225,
      "learning_rate": 1.0109373459168402e-07,
      "loss": 3.1512,
      "step": 228499
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.112501859664917,
      "learning_rate": 1.0098757753683739e-07,
      "loss": 2.7324,
      "step": 228500
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8940837383270264,
      "learning_rate": 1.0088147623892317e-07,
      "loss": 3.065,
      "step": 228501
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.223599433898926,
      "learning_rate": 1.0077543069787475e-07,
      "loss": 3.0154,
      "step": 228502
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.205294132232666,
      "learning_rate": 1.0066944091379203e-07,
      "loss": 2.6848,
      "step": 228503
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7972474098205566,
      "learning_rate": 1.0056350688664172e-07,
      "loss": 2.7267,
      "step": 228504
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8491578102111816,
      "learning_rate": 1.0045762861645712e-07,
      "loss": 2.9302,
      "step": 228505
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.846325397491455,
      "learning_rate": 1.0035180610323822e-07,
      "loss": 2.8305,
      "step": 228506
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9447450637817383,
      "learning_rate": 1.0024603934705167e-07,
      "loss": 3.0274,
      "step": 228507
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.686847686767578,
      "learning_rate": 1.0014032834786412e-07,
      "loss": 2.5756,
      "step": 228508
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.248714447021484,
      "learning_rate": 1.000346731057422e-07,
      "loss": 3.0545,
      "step": 228509
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.637981414794922,
      "learning_rate": 9.992907362068591e-08,
      "loss": 3.0771,
      "step": 228510
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7904415130615234,
      "learning_rate": 9.982352989269527e-08,
      "loss": 2.9452,
      "step": 228511
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8815064430236816,
      "learning_rate": 9.971804192183686e-08,
      "loss": 2.9417,
      "step": 228512
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.593898296356201,
      "learning_rate": 9.961260970807738e-08,
      "loss": 2.8022,
      "step": 228513
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.111021041870117,
      "learning_rate": 9.950723325148346e-08,
      "loss": 2.9263,
      "step": 228514
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3019280433654785,
      "learning_rate": 9.940191255202179e-08,
      "loss": 2.8929,
      "step": 228515
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0389719009399414,
      "learning_rate": 9.929664760975898e-08,
      "loss": 3.0062,
      "step": 228516
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8659276962280273,
      "learning_rate": 9.919143842472832e-08,
      "loss": 2.9644,
      "step": 228517
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9553592205047607,
      "learning_rate": 9.908628499686322e-08,
      "loss": 2.7919,
      "step": 228518
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.942173719406128,
      "learning_rate": 9.89811873262969e-08,
      "loss": 3.0286,
      "step": 228519
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.637212038040161,
      "learning_rate": 9.887614541292944e-08,
      "loss": 2.9504,
      "step": 228520
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.210702896118164,
      "learning_rate": 9.877115925689405e-08,
      "loss": 2.9206,
      "step": 228521
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7972612380981445,
      "learning_rate": 9.866622885812414e-08,
      "loss": 3.1903,
      "step": 228522
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7773971557617188,
      "learning_rate": 9.856135421668632e-08,
      "loss": 3.1289,
      "step": 228523
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.729311943054199,
      "learning_rate": 9.845653533258058e-08,
      "loss": 3.0685,
      "step": 228524
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3838491439819336,
      "learning_rate": 9.835177220584023e-08,
      "loss": 2.8658,
      "step": 228525
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.203984022140503,
      "learning_rate": 9.824706483649858e-08,
      "loss": 2.8753,
      "step": 228526
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.737910747528076,
      "learning_rate": 9.814241322452232e-08,
      "loss": 2.8414,
      "step": 228527
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.4518227577209473,
      "learning_rate": 9.803781736997807e-08,
      "loss": 2.8297,
      "step": 228528
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.083521842956543,
      "learning_rate": 9.793327727283251e-08,
      "loss": 2.8702,
      "step": 228529
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.4725093841552734,
      "learning_rate": 9.782879293318558e-08,
      "loss": 2.9951,
      "step": 228530
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.5324575901031494,
      "learning_rate": 9.772436435100395e-08,
      "loss": 2.8664,
      "step": 228531
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.669600248336792,
      "learning_rate": 9.761999152632095e-08,
      "loss": 3.0362,
      "step": 228532
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.4677913188934326,
      "learning_rate": 9.751567445913654e-08,
      "loss": 3.022,
      "step": 228533
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7151832580566406,
      "learning_rate": 9.741141314948408e-08,
      "loss": 3.0199,
      "step": 228534
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.101961135864258,
      "learning_rate": 9.730720759739685e-08,
      "loss": 3.0742,
      "step": 228535
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.78644061088562,
      "learning_rate": 9.720305780287485e-08,
      "loss": 3.0737,
      "step": 228536
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.990076780319214,
      "learning_rate": 9.709896376595139e-08,
      "loss": 2.6321,
      "step": 228537
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.093332529067993,
      "learning_rate": 9.699492548662646e-08,
      "loss": 3.1021,
      "step": 228538
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.820042610168457,
      "learning_rate": 9.689094296493339e-08,
      "loss": 2.988,
      "step": 228539
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8006834983825684,
      "learning_rate": 9.678701620090545e-08,
      "loss": 2.9383,
      "step": 228540
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2587850093841553,
      "learning_rate": 9.668314519454268e-08,
      "loss": 2.8399,
      "step": 228541
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.610776901245117,
      "learning_rate": 9.657932994584505e-08,
      "loss": 2.8956,
      "step": 228542
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.02744722366333,
      "learning_rate": 9.647557045487919e-08,
      "loss": 3.3276,
      "step": 228543
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.433689594268799,
      "learning_rate": 9.637186672161179e-08,
      "loss": 2.9509,
      "step": 228544
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.02705717086792,
      "learning_rate": 9.626821874610946e-08,
      "loss": 2.9593,
      "step": 228545
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.717682361602783,
      "learning_rate": 9.61646265283722e-08,
      "loss": 2.6895,
      "step": 228546
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.831935167312622,
      "learning_rate": 9.606109006840001e-08,
      "loss": 2.42,
      "step": 228547
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1754446029663086,
      "learning_rate": 9.595760936625951e-08,
      "loss": 2.9414,
      "step": 228548
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2245936393737793,
      "learning_rate": 9.585418442191739e-08,
      "loss": 2.9025,
      "step": 228549
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.7282721996307373,
      "learning_rate": 9.575081523544026e-08,
      "loss": 3.004,
      "step": 228550
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1139702796936035,
      "learning_rate": 9.564750180679482e-08,
      "loss": 2.9403,
      "step": 228551
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.681952714920044,
      "learning_rate": 9.554424413608097e-08,
      "loss": 2.8573,
      "step": 228552
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.4722814559936523,
      "learning_rate": 9.544104222323213e-08,
      "loss": 2.9258,
      "step": 228553
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7523744106292725,
      "learning_rate": 9.533789606828157e-08,
      "loss": 2.7989,
      "step": 228554
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.948906421661377,
      "learning_rate": 9.523480567129594e-08,
      "loss": 3.0058,
      "step": 228555
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9397733211517334,
      "learning_rate": 9.513177103227521e-08,
      "loss": 3.0521,
      "step": 228556
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.988738775253296,
      "learning_rate": 9.502879215121939e-08,
      "loss": 2.7628,
      "step": 228557
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9039151668548584,
      "learning_rate": 9.492586902812848e-08,
      "loss": 2.7982,
      "step": 228558
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.5734567642211914,
      "learning_rate": 9.482300166310242e-08,
      "loss": 2.911,
      "step": 228559
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2695202827453613,
      "learning_rate": 9.472019005607457e-08,
      "loss": 2.9413,
      "step": 228560
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.402284860610962,
      "learning_rate": 9.461743420711154e-08,
      "loss": 2.8487,
      "step": 228561
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.4685027599334717,
      "learning_rate": 9.451473411621335e-08,
      "loss": 3.0005,
      "step": 228562
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.071153163909912,
      "learning_rate": 9.44120897834133e-08,
      "loss": 2.9738,
      "step": 228563
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.075011730194092,
      "learning_rate": 9.43095012087447e-08,
      "loss": 2.7093,
      "step": 228564
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6418864727020264,
      "learning_rate": 9.420696839217424e-08,
      "loss": 2.7717,
      "step": 228565
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.621422052383423,
      "learning_rate": 9.410449133376852e-08,
      "loss": 3.1949,
      "step": 228566
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0870957374572754,
      "learning_rate": 9.400207003352755e-08,
      "loss": 2.8971,
      "step": 228567
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.640061378479004,
      "learning_rate": 9.389970449145135e-08,
      "loss": 3.0304,
      "step": 228568
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6100587844848633,
      "learning_rate": 9.37973947076065e-08,
      "loss": 2.7917,
      "step": 228569
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.939174175262451,
      "learning_rate": 9.369514068195971e-08,
      "loss": 3.2581,
      "step": 228570
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7778468132019043,
      "learning_rate": 9.35929424145776e-08,
      "loss": 3.0367,
      "step": 228571
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7608251571655273,
      "learning_rate": 9.349079990546015e-08,
      "loss": 3.0255,
      "step": 228572
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.094757318496704,
      "learning_rate": 9.338871315460738e-08,
      "loss": 2.9111,
      "step": 228573
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2526674270629883,
      "learning_rate": 9.32866821620859e-08,
      "loss": 2.9024,
      "step": 228574
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9844133853912354,
      "learning_rate": 9.318470692786239e-08,
      "loss": 3.2092,
      "step": 228575
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1958024501800537,
      "learning_rate": 9.308278745197017e-08,
      "loss": 2.9155,
      "step": 228576
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9789986610412598,
      "learning_rate": 9.298092373444255e-08,
      "loss": 3.052,
      "step": 228577
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9890198707580566,
      "learning_rate": 9.287911577527952e-08,
      "loss": 3.0529,
      "step": 228578
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3842051029205322,
      "learning_rate": 9.277736357451437e-08,
      "loss": 3.0902,
      "step": 228579
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3195860385894775,
      "learning_rate": 9.267566713218044e-08,
      "loss": 2.8238,
      "step": 228580
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1020455360412598,
      "learning_rate": 9.257402644827772e-08,
      "loss": 2.9939,
      "step": 228581
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2027902603149414,
      "learning_rate": 9.24724415228062e-08,
      "loss": 2.6514,
      "step": 228582
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7108681201934814,
      "learning_rate": 9.237091235583249e-08,
      "loss": 2.6377,
      "step": 228583
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7051992416381836,
      "learning_rate": 9.226943894732331e-08,
      "loss": 3.014,
      "step": 228584
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8174362182617188,
      "learning_rate": 9.216802129734524e-08,
      "loss": 2.8602,
      "step": 228585
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8548946380615234,
      "learning_rate": 9.206665940586499e-08,
      "loss": 3.0326,
      "step": 228586
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.4687156677246094,
      "learning_rate": 9.196535327294918e-08,
      "loss": 2.832,
      "step": 228587
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1684975624084473,
      "learning_rate": 9.18641028985978e-08,
      "loss": 2.8527,
      "step": 228588
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7449111938476562,
      "learning_rate": 9.176290828284416e-08,
      "loss": 3.1103,
      "step": 228589
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.18271541595459,
      "learning_rate": 9.166176942565495e-08,
      "loss": 2.7784,
      "step": 228590
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.587646722793579,
      "learning_rate": 9.15606863271301e-08,
      "loss": 2.9432,
      "step": 228591
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.998439311981201,
      "learning_rate": 9.145965898720298e-08,
      "loss": 3.1495,
      "step": 228592
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.816910743713379,
      "learning_rate": 9.135868740597352e-08,
      "loss": 3.0439,
      "step": 228593
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.951268434524536,
      "learning_rate": 9.125777158340841e-08,
      "loss": 2.9277,
      "step": 228594
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.4894535541534424,
      "learning_rate": 9.115691151954097e-08,
      "loss": 3.0294,
      "step": 228595
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.5199429988861084,
      "learning_rate": 9.105610721437117e-08,
      "loss": 2.9365,
      "step": 228596
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.938349485397339,
      "learning_rate": 9.095535866796567e-08,
      "loss": 2.8768,
      "step": 228597
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.6087725162506104,
      "learning_rate": 9.08546658802911e-08,
      "loss": 2.8898,
      "step": 228598
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0272223949432373,
      "learning_rate": 9.075402885138084e-08,
      "loss": 2.7663,
      "step": 228599
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.5181002616882324,
      "learning_rate": 9.065344758126813e-08,
      "loss": 3.0206,
      "step": 228600
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.050368070602417,
      "learning_rate": 9.055292206995302e-08,
      "loss": 2.8864,
      "step": 228601
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.754178524017334,
      "learning_rate": 9.045245231750209e-08,
      "loss": 2.6512,
      "step": 228602
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.119813919067383,
      "learning_rate": 9.035203832388205e-08,
      "loss": 2.7355,
      "step": 228603
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1530821323394775,
      "learning_rate": 9.02516800890929e-08,
      "loss": 3.0586,
      "step": 228604
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1021206378936768,
      "learning_rate": 9.015137761323454e-08,
      "loss": 2.9641,
      "step": 228605
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.255866050720215,
      "learning_rate": 9.005113089624039e-08,
      "loss": 2.9205,
      "step": 228606
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7812087535858154,
      "learning_rate": 8.995093993821034e-08,
      "loss": 2.926,
      "step": 228607
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.658947706222534,
      "learning_rate": 8.98508047390778e-08,
      "loss": 3.0949,
      "step": 228608
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3345892429351807,
      "learning_rate": 8.975072529890937e-08,
      "loss": 2.7785,
      "step": 228609
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.906452178955078,
      "learning_rate": 8.965070161773835e-08,
      "loss": 3.072,
      "step": 228610
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.03598690032959,
      "learning_rate": 8.955073369556476e-08,
      "loss": 3.1259,
      "step": 228611
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.4715371131896973,
      "learning_rate": 8.945082153238858e-08,
      "loss": 2.9396,
      "step": 228612
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6676459312438965,
      "learning_rate": 8.935096512824314e-08,
      "loss": 3.0349,
      "step": 228613
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2384238243103027,
      "learning_rate": 8.925116448316172e-08,
      "loss": 2.9452,
      "step": 228614
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.01560640335083,
      "learning_rate": 8.915141959711103e-08,
      "loss": 2.7799,
      "step": 228615
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.876610517501831,
      "learning_rate": 8.905173047019099e-08,
      "loss": 2.8787,
      "step": 228616
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8521695137023926,
      "learning_rate": 8.895209710236828e-08,
      "loss": 3.0652,
      "step": 228617
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.978123903274536,
      "learning_rate": 8.885251949367622e-08,
      "loss": 3.0801,
      "step": 228618
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.008216142654419,
      "learning_rate": 8.875299764411481e-08,
      "loss": 2.9795,
      "step": 228619
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.712236166000366,
      "learning_rate": 8.865353155371735e-08,
      "loss": 2.9117,
      "step": 228620
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9411394596099854,
      "learning_rate": 8.855412122248385e-08,
      "loss": 2.9397,
      "step": 228621
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.199756622314453,
      "learning_rate": 8.84547666504809e-08,
      "loss": 2.8531,
      "step": 228622
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.968428373336792,
      "learning_rate": 8.835546783767522e-08,
      "loss": 2.854,
      "step": 228623
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7369821071624756,
      "learning_rate": 8.825622478413342e-08,
      "loss": 2.7464,
      "step": 228624
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.254976749420166,
      "learning_rate": 8.815703748982217e-08,
      "loss": 2.926,
      "step": 228625
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.008239984512329,
      "learning_rate": 8.80579059547748e-08,
      "loss": 3.0215,
      "step": 228626
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8041045665740967,
      "learning_rate": 8.795883017902461e-08,
      "loss": 3.0133,
      "step": 228627
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1162610054016113,
      "learning_rate": 8.785981016260491e-08,
      "loss": 2.7883,
      "step": 228628
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1071348190307617,
      "learning_rate": 8.776084590551568e-08,
      "loss": 2.8253,
      "step": 228629
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7061843872070312,
      "learning_rate": 8.766193740772365e-08,
      "loss": 2.8464,
      "step": 228630
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8501434326171875,
      "learning_rate": 8.756308466936202e-08,
      "loss": 2.9926,
      "step": 228631
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.542613983154297,
      "learning_rate": 8.746428769033088e-08,
      "loss": 3.0048,
      "step": 228632
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6806254386901855,
      "learning_rate": 8.736554647073013e-08,
      "loss": 2.8906,
      "step": 228633
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.301372528076172,
      "learning_rate": 8.726686101052648e-08,
      "loss": 2.8883,
      "step": 228634
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.882791757583618,
      "learning_rate": 8.716823130978656e-08,
      "loss": 2.9967,
      "step": 228635
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6744704246520996,
      "learning_rate": 8.706965736847704e-08,
      "loss": 2.9554,
      "step": 228636
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1179938316345215,
      "learning_rate": 8.697113918666453e-08,
      "loss": 2.716,
      "step": 228637
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.448084592819214,
      "learning_rate": 8.687267676434906e-08,
      "loss": 3.3196,
      "step": 228638
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.84472918510437,
      "learning_rate": 8.677427010153059e-08,
      "loss": 2.8729,
      "step": 228639
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.068648099899292,
      "learning_rate": 8.667591919824246e-08,
      "loss": 2.9278,
      "step": 228640
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.5173661708831787,
      "learning_rate": 8.657762405451796e-08,
      "loss": 2.8407,
      "step": 228641
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.823099374771118,
      "learning_rate": 8.647938467035709e-08,
      "loss": 2.7286,
      "step": 228642
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2063255310058594,
      "learning_rate": 8.638120104575986e-08,
      "loss": 2.8539,
      "step": 228643
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8240623474121094,
      "learning_rate": 8.628307318079286e-08,
      "loss": 2.9837,
      "step": 228644
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3786942958831787,
      "learning_rate": 8.618500107542281e-08,
      "loss": 3.2014,
      "step": 228645
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1670405864715576,
      "learning_rate": 8.608698472971631e-08,
      "loss": 2.9336,
      "step": 228646
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7044994831085205,
      "learning_rate": 8.598902414364006e-08,
      "loss": 2.7639,
      "step": 228647
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0083260536193848,
      "learning_rate": 8.589111931726067e-08,
      "loss": 2.8543,
      "step": 228648
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9891459941864014,
      "learning_rate": 8.579327025054483e-08,
      "loss": 2.8969,
      "step": 228649
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2718074321746826,
      "learning_rate": 8.569547694359247e-08,
      "loss": 2.8286,
      "step": 228650
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2194690704345703,
      "learning_rate": 8.559773939633696e-08,
      "loss": 2.9823,
      "step": 228651
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.879956007003784,
      "learning_rate": 8.550005760884493e-08,
      "loss": 3.1248,
      "step": 228652
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.496349573135376,
      "learning_rate": 8.540243158108307e-08,
      "loss": 2.7925,
      "step": 228653
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7402503490448,
      "learning_rate": 8.530486131315129e-08,
      "loss": 2.8927,
      "step": 228654
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.435364007949829,
      "learning_rate": 8.520734680498298e-08,
      "loss": 2.6796,
      "step": 228655
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.9801313877105713,
      "learning_rate": 8.510988805667806e-08,
      "loss": 3.0339,
      "step": 228656
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9100801944732666,
      "learning_rate": 8.501248506820324e-08,
      "loss": 2.9524,
      "step": 228657
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8365721702575684,
      "learning_rate": 8.49151378395585e-08,
      "loss": 2.9301,
      "step": 228658
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6915059089660645,
      "learning_rate": 8.481784637081047e-08,
      "loss": 2.8059,
      "step": 228659
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.80067777633667,
      "learning_rate": 8.472061066195912e-08,
      "loss": 2.9714,
      "step": 228660
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8506057262420654,
      "learning_rate": 8.462343071300448e-08,
      "loss": 2.9756,
      "step": 228661
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.992866039276123,
      "learning_rate": 8.452630652397985e-08,
      "loss": 2.9311,
      "step": 228662
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.270928382873535,
      "learning_rate": 8.442923809491852e-08,
      "loss": 2.9082,
      "step": 228663
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.47880482673645,
      "learning_rate": 8.433222542582052e-08,
      "loss": 2.6904,
      "step": 228664
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9159393310546875,
      "learning_rate": 8.423526851668583e-08,
      "loss": 3.0863,
      "step": 228665
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.539444923400879,
      "learning_rate": 8.413836736758105e-08,
      "loss": 2.7982,
      "step": 228666
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.258328914642334,
      "learning_rate": 8.404152197847291e-08,
      "loss": 3.2985,
      "step": 228667
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9104905128479004,
      "learning_rate": 8.3944732349428e-08,
      "loss": 2.9635,
      "step": 228668
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.4460597038269043,
      "learning_rate": 8.384799848041302e-08,
      "loss": 3.3054,
      "step": 228669
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.895972490310669,
      "learning_rate": 8.375132037149457e-08,
      "loss": 2.953,
      "step": 228670
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.447909355163574,
      "learning_rate": 8.365469802267266e-08,
      "loss": 3.116,
      "step": 228671
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1168036460876465,
      "learning_rate": 8.355813143391399e-08,
      "loss": 2.9062,
      "step": 228672
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.8930373191833496,
      "learning_rate": 8.346162060531847e-08,
      "loss": 2.7947,
      "step": 228673
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3037493228912354,
      "learning_rate": 8.33651655368861e-08,
      "loss": 2.8367,
      "step": 228674
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1387922763824463,
      "learning_rate": 8.326876622858358e-08,
      "loss": 2.7716,
      "step": 228675
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7944159507751465,
      "learning_rate": 8.317242268047752e-08,
      "loss": 3.0532,
      "step": 228676
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.7608048915863037,
      "learning_rate": 8.307613489256792e-08,
      "loss": 2.9943,
      "step": 228677
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.6386258602142334,
      "learning_rate": 8.297990286488809e-08,
      "loss": 2.9359,
      "step": 228678
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0446760654449463,
      "learning_rate": 8.288372659743803e-08,
      "loss": 2.8859,
      "step": 228679
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2369816303253174,
      "learning_rate": 8.278760609021773e-08,
      "loss": 3.0972,
      "step": 228680
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0006322860717773,
      "learning_rate": 8.269154134329381e-08,
      "loss": 2.8474,
      "step": 228681
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6401474475860596,
      "learning_rate": 8.259553235663297e-08,
      "loss": 3.0003,
      "step": 228682
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1488094329833984,
      "learning_rate": 8.24995791303018e-08,
      "loss": 3.0985,
      "step": 228683
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.4182257652282715,
      "learning_rate": 8.240368166430034e-08,
      "loss": 2.943,
      "step": 228684
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.797253131866455,
      "learning_rate": 8.230783995862856e-08,
      "loss": 2.9489,
      "step": 228685
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.678584575653076,
      "learning_rate": 8.221205401331976e-08,
      "loss": 3.0005,
      "step": 228686
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.132899284362793,
      "learning_rate": 8.211632382840727e-08,
      "loss": 2.8,
      "step": 228687
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.968961715698242,
      "learning_rate": 8.202064940385778e-08,
      "loss": 2.94,
      "step": 228688
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.924664258956909,
      "learning_rate": 8.192503073973788e-08,
      "loss": 2.8771,
      "step": 228689
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8986566066741943,
      "learning_rate": 8.18294678360476e-08,
      "loss": 2.7413,
      "step": 228690
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2507119178771973,
      "learning_rate": 8.173396069282023e-08,
      "loss": 3.0607,
      "step": 228691
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.870340585708618,
      "learning_rate": 8.163850931005577e-08,
      "loss": 2.9511,
      "step": 228692
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.590196132659912,
      "learning_rate": 8.154311368775424e-08,
      "loss": 3.1042,
      "step": 228693
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9584708213806152,
      "learning_rate": 8.144777382598222e-08,
      "loss": 2.7112,
      "step": 228694
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6570441722869873,
      "learning_rate": 8.135248972470642e-08,
      "loss": 2.8513,
      "step": 228695
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0834641456604004,
      "learning_rate": 8.125726138399347e-08,
      "loss": 2.8683,
      "step": 228696
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.062603950500488,
      "learning_rate": 8.116208880381003e-08,
      "loss": 2.9385,
      "step": 228697
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.209326267242432,
      "learning_rate": 8.106697198422274e-08,
      "loss": 2.7956,
      "step": 228698
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.205474853515625,
      "learning_rate": 8.097191092523158e-08,
      "loss": 2.94,
      "step": 228699
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.16431188583374,
      "learning_rate": 8.087690562680327e-08,
      "loss": 3.0128,
      "step": 228700
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0326008796691895,
      "learning_rate": 8.078195608903771e-08,
      "loss": 3.0061,
      "step": 228701
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7878408432006836,
      "learning_rate": 8.068706231193489e-08,
      "loss": 2.9685,
      "step": 228702
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9391396045684814,
      "learning_rate": 8.059222429546153e-08,
      "loss": 2.9816,
      "step": 228703
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0969436168670654,
      "learning_rate": 8.049744203968422e-08,
      "loss": 3.0183,
      "step": 228704
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.828606128692627,
      "learning_rate": 8.040271554456968e-08,
      "loss": 2.874,
      "step": 228705
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8848321437835693,
      "learning_rate": 8.030804481021779e-08,
      "loss": 3.0517,
      "step": 228706
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.894669771194458,
      "learning_rate": 8.021342983656198e-08,
      "loss": 3.0005,
      "step": 228707
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8406052589416504,
      "learning_rate": 8.011887062366885e-08,
      "loss": 2.8516,
      "step": 228708
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.306833505630493,
      "learning_rate": 8.002436717153837e-08,
      "loss": 3.0713,
      "step": 228709
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.717487335205078,
      "learning_rate": 7.992991948020389e-08,
      "loss": 3.1168,
      "step": 228710
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2412543296813965,
      "learning_rate": 7.983552754966537e-08,
      "loss": 2.871,
      "step": 228711
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0314388275146484,
      "learning_rate": 7.974119137995615e-08,
      "loss": 2.9919,
      "step": 228712
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7759571075439453,
      "learning_rate": 7.964691097107623e-08,
      "loss": 2.8961,
      "step": 228713
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6744766235351562,
      "learning_rate": 7.955268632305889e-08,
      "loss": 3.0571,
      "step": 228714
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.966614246368408,
      "learning_rate": 7.945851743590415e-08,
      "loss": 3.1365,
      "step": 228715
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7945148944854736,
      "learning_rate": 7.93644043096453e-08,
      "loss": 3.123,
      "step": 228716
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.9468629360198975,
      "learning_rate": 7.927034694428235e-08,
      "loss": 3.3031,
      "step": 228717
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6832501888275146,
      "learning_rate": 7.917634533984862e-08,
      "loss": 2.8296,
      "step": 228718
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.623093605041504,
      "learning_rate": 7.90823994963774e-08,
      "loss": 3.0729,
      "step": 228719
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.745640754699707,
      "learning_rate": 7.898850941383539e-08,
      "loss": 2.9356,
      "step": 228720
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.279839277267456,
      "learning_rate": 7.88946750922892e-08,
      "loss": 2.9651,
      "step": 228721
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.981450319290161,
      "learning_rate": 7.880089653173883e-08,
      "loss": 2.8241,
      "step": 228722
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8425705432891846,
      "learning_rate": 7.870717373218428e-08,
      "loss": 3.1552,
      "step": 228723
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6575100421905518,
      "learning_rate": 7.861350669369215e-08,
      "loss": 2.8272,
      "step": 228724
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.5970208644866943,
      "learning_rate": 7.851989541622917e-08,
      "loss": 2.8143,
      "step": 228725
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.079143762588501,
      "learning_rate": 7.84263398998286e-08,
      "loss": 2.9576,
      "step": 228726
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.484837055206299,
      "learning_rate": 7.833284014449049e-08,
      "loss": 2.9478,
      "step": 228727
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3595218658447266,
      "learning_rate": 7.82393961502814e-08,
      "loss": 2.898,
      "step": 228728
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.807706594467163,
      "learning_rate": 7.814600791716807e-08,
      "loss": 2.9662,
      "step": 228729
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.443498373031616,
      "learning_rate": 7.805267544521709e-08,
      "loss": 3.0343,
      "step": 228730
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1072328090667725,
      "learning_rate": 7.795939873439516e-08,
      "loss": 2.7995,
      "step": 228731
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.9636383056640625,
      "learning_rate": 7.786617778473559e-08,
      "loss": 3.0752,
      "step": 228732
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9344985485076904,
      "learning_rate": 7.777301259627167e-08,
      "loss": 2.8681,
      "step": 228733
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7189884185791016,
      "learning_rate": 7.767990316900341e-08,
      "loss": 2.9592,
      "step": 228734
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9236278533935547,
      "learning_rate": 7.758684950296413e-08,
      "loss": 2.8073,
      "step": 228735
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.655548095703125,
      "learning_rate": 7.74938515981871e-08,
      "loss": 2.8794,
      "step": 228736
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1087656021118164,
      "learning_rate": 7.740090945463906e-08,
      "loss": 2.9364,
      "step": 228737
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3106212615966797,
      "learning_rate": 7.730802307235329e-08,
      "loss": 2.9495,
      "step": 228738
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.152602672576904,
      "learning_rate": 7.72151924513964e-08,
      "loss": 2.8857,
      "step": 228739
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6604671478271484,
      "learning_rate": 7.712241759170179e-08,
      "loss": 2.8484,
      "step": 228740
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.65507435798645,
      "learning_rate": 7.702969849336937e-08,
      "loss": 3.1628,
      "step": 228741
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.070049524307251,
      "learning_rate": 7.693703515636585e-08,
      "loss": 2.9386,
      "step": 228742
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0438780784606934,
      "learning_rate": 7.68444275806912e-08,
      "loss": 2.7931,
      "step": 228743
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7256224155426025,
      "learning_rate": 7.675187576644537e-08,
      "loss": 2.9238,
      "step": 228744
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.9378390312194824,
      "learning_rate": 7.665937971356173e-08,
      "loss": 2.7736,
      "step": 228745
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.502983808517456,
      "learning_rate": 7.65669394220736e-08,
      "loss": 3.2042,
      "step": 228746
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.017861843109131,
      "learning_rate": 7.647455489204757e-08,
      "loss": 2.6746,
      "step": 228747
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.5623176097869873,
      "learning_rate": 7.638222612345035e-08,
      "loss": 2.702,
      "step": 228748
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9102466106414795,
      "learning_rate": 7.628995311631524e-08,
      "loss": 3.0676,
      "step": 228749
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.907015562057495,
      "learning_rate": 7.619773587064227e-08,
      "loss": 2.9168,
      "step": 228750
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.291165351867676,
      "learning_rate": 7.610557438649801e-08,
      "loss": 2.8473,
      "step": 228751
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.102483034133911,
      "learning_rate": 7.601346866384917e-08,
      "loss": 2.6551,
      "step": 228752
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.883005142211914,
      "learning_rate": 7.592141870272906e-08,
      "loss": 3.0105,
      "step": 228753
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9389514923095703,
      "learning_rate": 7.582942450317098e-08,
      "loss": 2.9505,
      "step": 228754
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3134827613830566,
      "learning_rate": 7.573748606514163e-08,
      "loss": 2.8866,
      "step": 228755
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7628908157348633,
      "learning_rate": 7.564560338874093e-08,
      "loss": 2.5648,
      "step": 228756
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.7791807651519775,
      "learning_rate": 7.555377647390227e-08,
      "loss": 2.6688,
      "step": 228757
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.988880157470703,
      "learning_rate": 7.546200532072555e-08,
      "loss": 2.9863,
      "step": 228758
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.748732328414917,
      "learning_rate": 7.537028992914418e-08,
      "loss": 3.0732,
      "step": 228759
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2189176082611084,
      "learning_rate": 7.527863029922477e-08,
      "loss": 2.8859,
      "step": 228760
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7336881160736084,
      "learning_rate": 7.51870264309673e-08,
      "loss": 2.9217,
      "step": 228761
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.29388427734375,
      "learning_rate": 7.50954783244051e-08,
      "loss": 2.8278,
      "step": 228762
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7452259063720703,
      "learning_rate": 7.500398597950485e-08,
      "loss": 2.9519,
      "step": 228763
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1142940521240234,
      "learning_rate": 7.491254939636648e-08,
      "loss": 2.7371,
      "step": 228764
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3105764389038086,
      "learning_rate": 7.482116857495668e-08,
      "loss": 2.8348,
      "step": 228765
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.408488750457764,
      "learning_rate": 7.472984351527544e-08,
      "loss": 2.8958,
      "step": 228766
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.992133378982544,
      "learning_rate": 7.463857421738939e-08,
      "loss": 2.9329,
      "step": 228767
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.4538512229919434,
      "learning_rate": 7.454736068129852e-08,
      "loss": 2.9042,
      "step": 228768
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.781118869781494,
      "learning_rate": 7.445620290696952e-08,
      "loss": 2.9919,
      "step": 228769
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.078199625015259,
      "learning_rate": 7.436510089450231e-08,
      "loss": 3.0864,
      "step": 228770
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.719867706298828,
      "learning_rate": 7.427405464383029e-08,
      "loss": 2.9749,
      "step": 228771
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.208590507507324,
      "learning_rate": 7.418306415505337e-08,
      "loss": 3.1354,
      "step": 228772
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.986147165298462,
      "learning_rate": 7.409212942813825e-08,
      "loss": 2.7564,
      "step": 228773
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.096979856491089,
      "learning_rate": 7.400125046308491e-08,
      "loss": 2.9775,
      "step": 228774
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7478463649749756,
      "learning_rate": 7.391042725995999e-08,
      "loss": 3.053,
      "step": 228775
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7122111320495605,
      "learning_rate": 7.381965981876348e-08,
      "loss": 2.6678,
      "step": 228776
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.7470035552978516,
      "learning_rate": 7.372894813949538e-08,
      "loss": 2.6883,
      "step": 228777
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6587438583374023,
      "learning_rate": 7.363829222215567e-08,
      "loss": 3.0628,
      "step": 228778
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.987311601638794,
      "learning_rate": 7.354769206681099e-08,
      "loss": 2.9322,
      "step": 228779
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.685962200164795,
      "learning_rate": 7.345714767346134e-08,
      "loss": 2.8947,
      "step": 228780
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.833928108215332,
      "learning_rate": 7.33666590421067e-08,
      "loss": 3.1455,
      "step": 228781
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.539273977279663,
      "learning_rate": 7.32762261727804e-08,
      "loss": 2.8723,
      "step": 228782
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.5817694664001465,
      "learning_rate": 7.318584906551572e-08,
      "loss": 3.0558,
      "step": 228783
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.637269973754883,
      "learning_rate": 7.309552772027938e-08,
      "loss": 3.2188,
      "step": 228784
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.165334701538086,
      "learning_rate": 7.300526213710467e-08,
      "loss": 2.7861,
      "step": 228785
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.742624282836914,
      "learning_rate": 7.291505231605821e-08,
      "loss": 2.9133,
      "step": 228786
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.432302713394165,
      "learning_rate": 7.282489825710669e-08,
      "loss": 2.8275,
      "step": 228787
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8683032989501953,
      "learning_rate": 7.273479996025012e-08,
      "loss": 3.0692,
      "step": 228788
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.007550001144409,
      "learning_rate": 7.264475742555509e-08,
      "loss": 2.789,
      "step": 228789
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.75573992729187,
      "learning_rate": 7.255477065302162e-08,
      "loss": 2.6771,
      "step": 228790
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.217501640319824,
      "learning_rate": 7.246483964264971e-08,
      "loss": 3.028,
      "step": 228791
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1205945014953613,
      "learning_rate": 7.237496439447265e-08,
      "loss": 2.9943,
      "step": 228792
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.639258861541748,
      "learning_rate": 7.228514490852378e-08,
      "loss": 2.911,
      "step": 228793
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.5953378677368164,
      "learning_rate": 7.219538118476975e-08,
      "loss": 2.8088,
      "step": 228794
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8929293155670166,
      "learning_rate": 7.21056732232772e-08,
      "loss": 2.9293,
      "step": 228795
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.453418016433716,
      "learning_rate": 7.201602102401283e-08,
      "loss": 2.94,
      "step": 228796
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2212843894958496,
      "learning_rate": 7.192642458704323e-08,
      "loss": 3.104,
      "step": 228797
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.839653730392456,
      "learning_rate": 7.183688391236842e-08,
      "loss": 3.1835,
      "step": 228798
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.83292555809021,
      "learning_rate": 7.174739899998839e-08,
      "loss": 2.9506,
      "step": 228799
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6346354484558105,
      "learning_rate": 7.165796984993644e-08,
      "loss": 2.8714,
      "step": 228800
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.038755416870117,
      "learning_rate": 7.156859646221258e-08,
      "loss": 3.0718,
      "step": 228801
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.335090398788452,
      "learning_rate": 7.147927883685012e-08,
      "loss": 3.0635,
      "step": 228802
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3458540439605713,
      "learning_rate": 7.139001697388236e-08,
      "loss": 2.9009,
      "step": 228803
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.317934513092041,
      "learning_rate": 7.13008108733093e-08,
      "loss": 2.8536,
      "step": 228804
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8405346870422363,
      "learning_rate": 7.121166053509763e-08,
      "loss": 2.7783,
      "step": 228805
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.035024881362915,
      "learning_rate": 7.112256595934728e-08,
      "loss": 2.6824,
      "step": 228806
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.685450792312622,
      "learning_rate": 7.103352714602495e-08,
      "loss": 3.0256,
      "step": 228807
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.4750165939331055,
      "learning_rate": 7.094454409516392e-08,
      "loss": 2.9283,
      "step": 228808
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.935983896255493,
      "learning_rate": 7.085561680676421e-08,
      "loss": 3.0544,
      "step": 228809
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.978306531906128,
      "learning_rate": 7.076674528085912e-08,
      "loss": 2.9833,
      "step": 228810
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.39799427986145,
      "learning_rate": 7.067792951748197e-08,
      "loss": 2.7928,
      "step": 228811
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.257152795791626,
      "learning_rate": 7.058916951659943e-08,
      "loss": 2.9518,
      "step": 228812
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7272839546203613,
      "learning_rate": 7.050046527827813e-08,
      "loss": 3.2011,
      "step": 228813
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3283889293670654,
      "learning_rate": 7.041181680248476e-08,
      "loss": 2.7737,
      "step": 228814
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8635261058807373,
      "learning_rate": 7.032322408928592e-08,
      "loss": 3.1276,
      "step": 228815
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.494433641433716,
      "learning_rate": 7.023468713864833e-08,
      "loss": 2.7759,
      "step": 228816
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0861711502075195,
      "learning_rate": 7.014620595063858e-08,
      "loss": 2.9632,
      "step": 228817
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.347460985183716,
      "learning_rate": 7.005778052525669e-08,
      "loss": 2.575,
      "step": 228818
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7642416954040527,
      "learning_rate": 6.996941086250263e-08,
      "loss": 2.8583,
      "step": 228819
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.748377561569214,
      "learning_rate": 6.988109696240973e-08,
      "loss": 2.9171,
      "step": 228820
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2446439266204834,
      "learning_rate": 6.9792838824978e-08,
      "loss": 2.8763,
      "step": 228821
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.6963839530944824,
      "learning_rate": 6.970463645024071e-08,
      "loss": 2.8087,
      "step": 228822
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.652125120162964,
      "learning_rate": 6.96164898381979e-08,
      "loss": 2.7427,
      "step": 228823
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.5582284927368164,
      "learning_rate": 6.952839898888286e-08,
      "loss": 2.8464,
      "step": 228824
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.4177258014678955,
      "learning_rate": 6.944036390229557e-08,
      "loss": 2.9114,
      "step": 228825
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.9993112087249756,
      "learning_rate": 6.935238457846936e-08,
      "loss": 2.866,
      "step": 228826
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.334872245788574,
      "learning_rate": 6.926446101740423e-08,
      "loss": 3.0609,
      "step": 228827
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.769026756286621,
      "learning_rate": 6.917659321913349e-08,
      "loss": 3.0951,
      "step": 228828
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0943877696990967,
      "learning_rate": 6.908878118365713e-08,
      "loss": 3.0144,
      "step": 228829
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.165198564529419,
      "learning_rate": 6.900102491100845e-08,
      "loss": 2.9298,
      "step": 228830
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.85123348236084,
      "learning_rate": 6.891332440118747e-08,
      "loss": 2.6893,
      "step": 228831
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.4046664237976074,
      "learning_rate": 6.882567965422747e-08,
      "loss": 2.9654,
      "step": 228832
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.736546754837036,
      "learning_rate": 6.873809067009517e-08,
      "loss": 2.8048,
      "step": 228833
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.855557918548584,
      "learning_rate": 6.865055744889048e-08,
      "loss": 2.7292,
      "step": 228834
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.144205331802368,
      "learning_rate": 6.856307999058008e-08,
      "loss": 2.7854,
      "step": 228835
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8790154457092285,
      "learning_rate": 6.8475658295164e-08,
      "loss": 2.8957,
      "step": 228836
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6712305545806885,
      "learning_rate": 6.838829236270882e-08,
      "loss": 2.9195,
      "step": 228837
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.4346654415130615,
      "learning_rate": 6.830098219318125e-08,
      "loss": 2.8518,
      "step": 228838
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.060805320739746,
      "learning_rate": 6.82137277866146e-08,
      "loss": 2.8099,
      "step": 228839
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0018177032470703,
      "learning_rate": 6.812652914304217e-08,
      "loss": 2.7984,
      "step": 228840
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.8192334175109863,
      "learning_rate": 6.803938626246397e-08,
      "loss": 3.0773,
      "step": 228841
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.086214065551758,
      "learning_rate": 6.795229914487998e-08,
      "loss": 3.0619,
      "step": 228842
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8648183345794678,
      "learning_rate": 6.786526779032353e-08,
      "loss": 3.3323,
      "step": 228843
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6931376457214355,
      "learning_rate": 6.777829219882791e-08,
      "loss": 2.6427,
      "step": 228844
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8224635124206543,
      "learning_rate": 6.769137237042643e-08,
      "loss": 3.0639,
      "step": 228845
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.537472724914551,
      "learning_rate": 6.760450830505249e-08,
      "loss": 2.758,
      "step": 228846
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.026801109313965,
      "learning_rate": 6.751770000280599e-08,
      "loss": 2.9244,
      "step": 228847
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0906693935394287,
      "learning_rate": 6.743094746365363e-08,
      "loss": 3.0058,
      "step": 228848
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.465237617492676,
      "learning_rate": 6.734425068762872e-08,
      "loss": 2.7699,
      "step": 228849
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.387458324432373,
      "learning_rate": 6.725760967473126e-08,
      "loss": 2.8361,
      "step": 228850
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.795494794845581,
      "learning_rate": 6.717102442502787e-08,
      "loss": 3.2573,
      "step": 228851
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9584598541259766,
      "learning_rate": 6.708449493845192e-08,
      "loss": 3.0025,
      "step": 228852
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9546608924865723,
      "learning_rate": 6.699802121510334e-08,
      "loss": 2.667,
      "step": 228853
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.978090524673462,
      "learning_rate": 6.691160325494882e-08,
      "loss": 2.9136,
      "step": 228854
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8777055740356445,
      "learning_rate": 6.682524105802167e-08,
      "loss": 3.0903,
      "step": 228855
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0484864711761475,
      "learning_rate": 6.67389346243219e-08,
      "loss": 2.9279,
      "step": 228856
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6485323905944824,
      "learning_rate": 6.66526839539161e-08,
      "loss": 3.047,
      "step": 228857
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.446530818939209,
      "learning_rate": 6.656648904673767e-08,
      "loss": 2.8558,
      "step": 228858
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.84445858001709,
      "learning_rate": 6.648034990285322e-08,
      "loss": 2.9118,
      "step": 228859
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.700235605239868,
      "learning_rate": 6.639426652229607e-08,
      "loss": 2.6952,
      "step": 228860
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.297454357147217,
      "learning_rate": 6.63082389050329e-08,
      "loss": 3.0778,
      "step": 228861
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8723554611206055,
      "learning_rate": 6.622226705109701e-08,
      "loss": 2.9501,
      "step": 228862
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.060365676879883,
      "learning_rate": 6.613635096055503e-08,
      "loss": 3.0765,
      "step": 228863
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2471063137054443,
      "learning_rate": 6.605049063334034e-08,
      "loss": 2.9074,
      "step": 228864
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6135993003845215,
      "learning_rate": 6.596468606951955e-08,
      "loss": 3.0196,
      "step": 228865
    },
    {
      "epoch": 2.98,
      "grad_norm": 5.423892974853516,
      "learning_rate": 6.587893726909265e-08,
      "loss": 2.9327,
      "step": 228866
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1150810718536377,
      "learning_rate": 6.579324423209298e-08,
      "loss": 2.9699,
      "step": 228867
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1231722831726074,
      "learning_rate": 6.57076069585205e-08,
      "loss": 2.8749,
      "step": 228868
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.458287239074707,
      "learning_rate": 6.562202544840855e-08,
      "loss": 3.1742,
      "step": 228869
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.835552453994751,
      "learning_rate": 6.55364997017238e-08,
      "loss": 2.7658,
      "step": 228870
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6319305896759033,
      "learning_rate": 6.545102971853288e-08,
      "loss": 2.9181,
      "step": 228871
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0768537521362305,
      "learning_rate": 6.536561549883579e-08,
      "loss": 2.9521,
      "step": 228872
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.103290319442749,
      "learning_rate": 6.528025704266582e-08,
      "loss": 2.9673,
      "step": 228873
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.27421498298645,
      "learning_rate": 6.519495435002297e-08,
      "loss": 3.1491,
      "step": 228874
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0056326389312744,
      "learning_rate": 6.510970742090727e-08,
      "loss": 2.9915,
      "step": 228875
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.086442232131958,
      "learning_rate": 6.502451625535198e-08,
      "loss": 2.8418,
      "step": 228876
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.383497714996338,
      "learning_rate": 6.493938085335715e-08,
      "loss": 2.9564,
      "step": 228877
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8296523094177246,
      "learning_rate": 6.485430121495605e-08,
      "loss": 2.8203,
      "step": 228878
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.788954734802246,
      "learning_rate": 6.4769277340182e-08,
      "loss": 2.9244,
      "step": 228879
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.279445171356201,
      "learning_rate": 6.468430922900169e-08,
      "loss": 2.9618,
      "step": 228880
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7539000511169434,
      "learning_rate": 6.459939688148175e-08,
      "loss": 3.0302,
      "step": 228881
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.7291769981384277,
      "learning_rate": 6.451454029758884e-08,
      "loss": 3.0501,
      "step": 228882
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.103625535964966,
      "learning_rate": 6.44297394773896e-08,
      "loss": 3.0194,
      "step": 228883
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.049341917037964,
      "learning_rate": 6.434499442085073e-08,
      "loss": 2.8296,
      "step": 228884
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.376788377761841,
      "learning_rate": 6.426030512803881e-08,
      "loss": 2.9613,
      "step": 228885
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0226621627807617,
      "learning_rate": 6.417567159892056e-08,
      "loss": 3.1231,
      "step": 228886
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.674962282180786,
      "learning_rate": 6.409109383356259e-08,
      "loss": 2.6356,
      "step": 228887
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.9178547859191895,
      "learning_rate": 6.400657183193158e-08,
      "loss": 2.5481,
      "step": 228888
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.122913360595703,
      "learning_rate": 6.392210559406086e-08,
      "loss": 2.914,
      "step": 228889
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.801678419113159,
      "learning_rate": 6.383769511995041e-08,
      "loss": 3.1789,
      "step": 228890
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.496612787246704,
      "learning_rate": 6.375334040966684e-08,
      "loss": 2.9229,
      "step": 228891
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.945122718811035,
      "learning_rate": 6.366904146317686e-08,
      "loss": 2.816,
      "step": 228892
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.715592384338379,
      "learning_rate": 6.358479828051377e-08,
      "loss": 2.8999,
      "step": 228893
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2136824131011963,
      "learning_rate": 6.350061086171087e-08,
      "loss": 3.1794,
      "step": 228894
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.060060501098633,
      "learning_rate": 6.341647920673488e-08,
      "loss": 3.0066,
      "step": 228895
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.5974044799804688,
      "learning_rate": 6.333240331565237e-08,
      "loss": 2.9932,
      "step": 228896
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8303301334381104,
      "learning_rate": 6.324838318846337e-08,
      "loss": 2.7632,
      "step": 228897
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7199535369873047,
      "learning_rate": 6.316441882516787e-08,
      "loss": 3.1523,
      "step": 228898
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7649056911468506,
      "learning_rate": 6.308051022579919e-08,
      "loss": 2.9478,
      "step": 228899
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8578991889953613,
      "learning_rate": 6.299665739035731e-08,
      "loss": 2.8034,
      "step": 228900
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.854978322982788,
      "learning_rate": 6.291286031884224e-08,
      "loss": 2.8709,
      "step": 228901
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.004956007003784,
      "learning_rate": 6.282911901132059e-08,
      "loss": 3.0902,
      "step": 228902
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8350372314453125,
      "learning_rate": 6.274543346779237e-08,
      "loss": 3.0297,
      "step": 228903
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.073453426361084,
      "learning_rate": 6.266180368825757e-08,
      "loss": 3.017,
      "step": 228904
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8746888637542725,
      "learning_rate": 6.257822967271619e-08,
      "loss": 2.8909,
      "step": 228905
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.6899726390838623,
      "learning_rate": 6.249471142120155e-08,
      "loss": 3.141,
      "step": 228906
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7630040645599365,
      "learning_rate": 6.241124893374694e-08,
      "loss": 2.9267,
      "step": 228907
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1803908348083496,
      "learning_rate": 6.232784221035236e-08,
      "loss": 3.0155,
      "step": 228908
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.827075719833374,
      "learning_rate": 6.224449125101782e-08,
      "loss": 2.866,
      "step": 228909
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1187806129455566,
      "learning_rate": 6.216119605577664e-08,
      "loss": 3.043,
      "step": 228910
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1877052783966064,
      "learning_rate": 6.207795662466208e-08,
      "loss": 2.8918,
      "step": 228911
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8346691131591797,
      "learning_rate": 6.199477295764088e-08,
      "loss": 2.7352,
      "step": 228912
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.101977586746216,
      "learning_rate": 6.191164505477963e-08,
      "loss": 3.0901,
      "step": 228913
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.411147117614746,
      "learning_rate": 6.182857291607835e-08,
      "loss": 2.8393,
      "step": 228914
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.578659772872925,
      "learning_rate": 6.174555654150371e-08,
      "loss": 3.032,
      "step": 228915
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.765406847000122,
      "learning_rate": 6.166259593115563e-08,
      "loss": 2.9596,
      "step": 228916
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.138622999191284,
      "learning_rate": 6.157969108496752e-08,
      "loss": 2.8762,
      "step": 228917
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0763564109802246,
      "learning_rate": 6.149684200300598e-08,
      "loss": 2.8128,
      "step": 228918
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3224027156829834,
      "learning_rate": 6.141404868530431e-08,
      "loss": 2.7718,
      "step": 228919
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.951871871948242,
      "learning_rate": 6.13313111318292e-08,
      "loss": 2.8184,
      "step": 228920
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2056357860565186,
      "learning_rate": 6.124862934258067e-08,
      "loss": 2.9499,
      "step": 228921
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.867462158203125,
      "learning_rate": 6.116600331765864e-08,
      "loss": 3.0531,
      "step": 228922
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6131179332733154,
      "learning_rate": 6.108343305699648e-08,
      "loss": 2.8907,
      "step": 228923
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3609211444854736,
      "learning_rate": 6.100091856066081e-08,
      "loss": 2.7736,
      "step": 228924
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.9986939430236816,
      "learning_rate": 6.091845982861832e-08,
      "loss": 2.7519,
      "step": 228925
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0617992877960205,
      "learning_rate": 6.083605686093562e-08,
      "loss": 2.8117,
      "step": 228926
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8226802349090576,
      "learning_rate": 6.075370965761272e-08,
      "loss": 2.8378,
      "step": 228927
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7052218914031982,
      "learning_rate": 6.067141821864963e-08,
      "loss": 2.8634,
      "step": 228928
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.79890513420105,
      "learning_rate": 6.058918254404632e-08,
      "loss": 3.0596,
      "step": 228929
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.115957260131836,
      "learning_rate": 6.050700263386942e-08,
      "loss": 2.9387,
      "step": 228930
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6542367935180664,
      "learning_rate": 6.042487848808564e-08,
      "loss": 2.8437,
      "step": 228931
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.753547191619873,
      "learning_rate": 6.034281010676157e-08,
      "loss": 2.9556,
      "step": 228932
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.347853422164917,
      "learning_rate": 6.02607974898639e-08,
      "loss": 2.9165,
      "step": 228933
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1340062618255615,
      "learning_rate": 6.017884063742595e-08,
      "loss": 3.0268,
      "step": 228934
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.90537166595459,
      "learning_rate": 6.009693954944772e-08,
      "loss": 3.0422,
      "step": 228935
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2365334033966064,
      "learning_rate": 6.001509422596251e-08,
      "loss": 3.0259,
      "step": 228936
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1272337436676025,
      "learning_rate": 5.993330466700364e-08,
      "loss": 2.7824,
      "step": 228937
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.529263734817505,
      "learning_rate": 5.985157087253779e-08,
      "loss": 3.0627,
      "step": 228938
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.146113634109497,
      "learning_rate": 5.976989284263156e-08,
      "loss": 2.6101,
      "step": 228939
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8307533264160156,
      "learning_rate": 5.968827057725167e-08,
      "loss": 2.8531,
      "step": 228940
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.5534651279449463,
      "learning_rate": 5.960670407646473e-08,
      "loss": 3.0538,
      "step": 228941
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.739086151123047,
      "learning_rate": 5.952519334023742e-08,
      "loss": 2.8842,
      "step": 228942
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.0632853507995605,
      "learning_rate": 5.9443738368636364e-08,
      "loss": 2.8423,
      "step": 228943
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.371840476989746,
      "learning_rate": 5.9362339161628244e-08,
      "loss": 2.8463,
      "step": 228944
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.273376226425171,
      "learning_rate": 5.928099571924638e-08,
      "loss": 3.0156,
      "step": 228945
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.4140026569366455,
      "learning_rate": 5.9199708041490764e-08,
      "loss": 3.1246,
      "step": 228946
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9523580074310303,
      "learning_rate": 5.911847612839471e-08,
      "loss": 2.7926,
      "step": 228947
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.415698766708374,
      "learning_rate": 5.903729997999152e-08,
      "loss": 2.9201,
      "step": 228948
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.813389778137207,
      "learning_rate": 5.8956179596247875e-08,
      "loss": 2.8962,
      "step": 228949
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9443371295928955,
      "learning_rate": 5.887511497723041e-08,
      "loss": 2.6686,
      "step": 228950
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.4391746520996094,
      "learning_rate": 5.87941061229058e-08,
      "loss": 3.1639,
      "step": 228951
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8047831058502197,
      "learning_rate": 5.8713153033340676e-08,
      "loss": 2.9171,
      "step": 228952
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1623363494873047,
      "learning_rate": 5.863225570850172e-08,
      "loss": 2.8987,
      "step": 228953
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.252506971359253,
      "learning_rate": 5.8551414148455546e-08,
      "loss": 3.0214,
      "step": 228954
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7890501022338867,
      "learning_rate": 5.847062835316885e-08,
      "loss": 2.5379,
      "step": 228955
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.719675302505493,
      "learning_rate": 5.838989832264163e-08,
      "loss": 2.9855,
      "step": 228956
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.573143720626831,
      "learning_rate": 5.83092240569738e-08,
      "loss": 2.9934,
      "step": 228957
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.5990514755249023,
      "learning_rate": 5.8228605556098764e-08,
      "loss": 2.8303,
      "step": 228958
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.439868688583374,
      "learning_rate": 5.814804282004981e-08,
      "loss": 3.0437,
      "step": 228959
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.6881473064422607,
      "learning_rate": 5.806753584889356e-08,
      "loss": 3.0413,
      "step": 228960
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1732630729675293,
      "learning_rate": 5.798708464256341e-08,
      "loss": 2.8127,
      "step": 228961
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9233477115631104,
      "learning_rate": 5.7906689201125955e-08,
      "loss": 2.8482,
      "step": 228962
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.567936420440674,
      "learning_rate": 5.7826349524614514e-08,
      "loss": 2.8678,
      "step": 228963
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.160291910171509,
      "learning_rate": 5.7746065612995776e-08,
      "loss": 3.0098,
      "step": 228964
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2654099464416504,
      "learning_rate": 5.766583746626974e-08,
      "loss": 2.9703,
      "step": 228965
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3290388584136963,
      "learning_rate": 5.758566508453632e-08,
      "loss": 3.0054,
      "step": 228966
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7506155967712402,
      "learning_rate": 5.750554846772892e-08,
      "loss": 3.0432,
      "step": 228967
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.7331039905548096,
      "learning_rate": 5.7425487615914144e-08,
      "loss": 2.6964,
      "step": 228968
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.809548854827881,
      "learning_rate": 5.734548252905868e-08,
      "loss": 2.924,
      "step": 228969
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.428734064102173,
      "learning_rate": 5.726553320722915e-08,
      "loss": 2.636,
      "step": 228970
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0333399772644043,
      "learning_rate": 5.718563965042555e-08,
      "loss": 3.0879,
      "step": 228971
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.405168056488037,
      "learning_rate": 5.7105801858614576e-08,
      "loss": 2.8925,
      "step": 228972
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.5373706817626953,
      "learning_rate": 5.702601983186283e-08,
      "loss": 3.0973,
      "step": 228973
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6263489723205566,
      "learning_rate": 5.694629357020364e-08,
      "loss": 3.168,
      "step": 228974
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.51865816116333,
      "learning_rate": 5.6866623073570375e-08,
      "loss": 3.1071,
      "step": 228975
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8112576007843018,
      "learning_rate": 5.678700834206295e-08,
      "loss": 3.0962,
      "step": 228976
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7284557819366455,
      "learning_rate": 5.670744937564808e-08,
      "loss": 2.8459,
      "step": 228977
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6300885677337646,
      "learning_rate": 5.6627946174359064e-08,
      "loss": 2.9847,
      "step": 228978
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.900667428970337,
      "learning_rate": 5.654849873819589e-08,
      "loss": 2.8786,
      "step": 228979
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.6396942138671875,
      "learning_rate": 5.646910706719188e-08,
      "loss": 2.8813,
      "step": 228980
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0151169300079346,
      "learning_rate": 5.638977116134702e-08,
      "loss": 2.7412,
      "step": 228981
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.6950507164001465,
      "learning_rate": 5.631049102069462e-08,
      "loss": 2.9509,
      "step": 228982
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3822519779205322,
      "learning_rate": 5.623126664520139e-08,
      "loss": 3.0515,
      "step": 228983
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7410736083984375,
      "learning_rate": 5.6152098034933925e-08,
      "loss": 2.9697,
      "step": 228984
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.358354330062866,
      "learning_rate": 5.607298518992553e-08,
      "loss": 2.9981,
      "step": 228985
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8359272480010986,
      "learning_rate": 5.5993928110109606e-08,
      "loss": 2.724,
      "step": 228986
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.945578098297119,
      "learning_rate": 5.5914926795552764e-08,
      "loss": 3.0135,
      "step": 228987
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8899590969085693,
      "learning_rate": 5.58359812462883e-08,
      "loss": 3.0068,
      "step": 228988
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8627278804779053,
      "learning_rate": 5.575709146228291e-08,
      "loss": 2.9966,
      "step": 228989
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.4850614070892334,
      "learning_rate": 5.567825744356991e-08,
      "loss": 2.6954,
      "step": 228990
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.719456434249878,
      "learning_rate": 5.559947919018259e-08,
      "loss": 3.1467,
      "step": 228991
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6782002449035645,
      "learning_rate": 5.5520756702120975e-08,
      "loss": 2.7612,
      "step": 228992
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0023586750030518,
      "learning_rate": 5.544208997938504e-08,
      "loss": 2.8548,
      "step": 228993
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0566587448120117,
      "learning_rate": 5.536347902200811e-08,
      "loss": 2.8106,
      "step": 228994
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.994692325592041,
      "learning_rate": 5.5284923830023474e-08,
      "loss": 2.7897,
      "step": 228995
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.5890796184539795,
      "learning_rate": 5.520642440339784e-08,
      "loss": 2.8014,
      "step": 228996
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3482840061187744,
      "learning_rate": 5.512798074219782e-08,
      "loss": 2.7544,
      "step": 228997
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.402644395828247,
      "learning_rate": 5.50495928463901e-08,
      "loss": 3.025,
      "step": 228998
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.820835828781128,
      "learning_rate": 5.4971260716007995e-08,
      "loss": 2.7333,
      "step": 228999
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6052234172821045,
      "learning_rate": 5.48929843510848e-08,
      "loss": 2.8435,
      "step": 229000
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.016751766204834,
      "learning_rate": 5.481476375158722e-08,
      "loss": 2.7817,
      "step": 229001
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.4033942222595215,
      "learning_rate": 5.4736598917581865e-08,
      "loss": 2.8822,
      "step": 229002
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.446689605712891,
      "learning_rate": 5.465848984906873e-08,
      "loss": 2.8637,
      "step": 229003
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.883127450942993,
      "learning_rate": 5.4580436546047826e-08,
      "loss": 2.9883,
      "step": 229004
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.6081347465515137,
      "learning_rate": 5.4502439008552444e-08,
      "loss": 2.7434,
      "step": 229005
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6805429458618164,
      "learning_rate": 5.4424497236582596e-08,
      "loss": 3.0595,
      "step": 229006
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2398488521575928,
      "learning_rate": 5.434661123017159e-08,
      "loss": 3.147,
      "step": 229007
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.744126081466675,
      "learning_rate": 5.4268780989286106e-08,
      "loss": 2.9919,
      "step": 229008
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.330016613006592,
      "learning_rate": 5.4191006513992775e-08,
      "loss": 2.9037,
      "step": 229009
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9016778469085693,
      "learning_rate": 5.4113287804291584e-08,
      "loss": 2.7276,
      "step": 229010
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.076218605041504,
      "learning_rate": 5.403562486018254e-08,
      "loss": 2.7394,
      "step": 229011
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.141326904296875,
      "learning_rate": 5.395801768169894e-08,
      "loss": 2.9529,
      "step": 229012
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8844635486602783,
      "learning_rate": 5.388046626884079e-08,
      "loss": 2.8702,
      "step": 229013
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.065091848373413,
      "learning_rate": 5.38029706216414e-08,
      "loss": 2.9945,
      "step": 229014
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7279579639434814,
      "learning_rate": 5.372553074010078e-08,
      "loss": 2.9463,
      "step": 229015
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1582696437835693,
      "learning_rate": 5.3648146624218904e-08,
      "loss": 2.9481,
      "step": 229016
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2884366512298584,
      "learning_rate": 5.3570818274029093e-08,
      "loss": 2.9831,
      "step": 229017
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.046428918838501,
      "learning_rate": 5.3493545689531346e-08,
      "loss": 3.0968,
      "step": 229018
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.914520740509033,
      "learning_rate": 5.3416328870758974e-08,
      "loss": 2.8492,
      "step": 229019
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1871657371520996,
      "learning_rate": 5.3339167817745277e-08,
      "loss": 2.7106,
      "step": 229020
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0023624897003174,
      "learning_rate": 5.326206253045695e-08,
      "loss": 2.93,
      "step": 229021
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7254228591918945,
      "learning_rate": 5.31850130089273e-08,
      "loss": 2.9879,
      "step": 229022
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.5439085960388184,
      "learning_rate": 5.310801925315633e-08,
      "loss": 2.9272,
      "step": 229023
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.835444450378418,
      "learning_rate": 5.3031081263210654e-08,
      "loss": 3.0631,
      "step": 229024
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.6498048305511475,
      "learning_rate": 5.295419903902365e-08,
      "loss": 2.7949,
      "step": 229025
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9567062854766846,
      "learning_rate": 5.2877372580695246e-08,
      "loss": 2.9319,
      "step": 229026
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8155763149261475,
      "learning_rate": 5.280060188819213e-08,
      "loss": 2.9534,
      "step": 229027
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.212205171585083,
      "learning_rate": 5.272388696151431e-08,
      "loss": 3.0782,
      "step": 229028
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.780017852783203,
      "learning_rate": 5.264722780072839e-08,
      "loss": 2.8668,
      "step": 229029
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.915482521057129,
      "learning_rate": 5.257062440576776e-08,
      "loss": 2.8445,
      "step": 229030
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.696423292160034,
      "learning_rate": 5.249407677673234e-08,
      "loss": 2.8735,
      "step": 229031
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.4220736026763916,
      "learning_rate": 5.2417584913588825e-08,
      "loss": 2.9434,
      "step": 229032
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.8221688270568848,
      "learning_rate": 5.2341148816370526e-08,
      "loss": 2.8581,
      "step": 229033
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3437728881835938,
      "learning_rate": 5.226476848511074e-08,
      "loss": 3.2923,
      "step": 229034
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.746737480163574,
      "learning_rate": 5.218844391974286e-08,
      "loss": 3.0299,
      "step": 229035
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.863438129425049,
      "learning_rate": 5.2112175120366806e-08,
      "loss": 2.7537,
      "step": 229036
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9943315982818604,
      "learning_rate": 5.203596208698257e-08,
      "loss": 2.9145,
      "step": 229037
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.095189094543457,
      "learning_rate": 5.195980481955686e-08,
      "loss": 2.9204,
      "step": 229038
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.40614914894104,
      "learning_rate": 5.1883703318122974e-08,
      "loss": 2.9723,
      "step": 229039
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.920145273208618,
      "learning_rate": 5.1807657582714215e-08,
      "loss": 2.9436,
      "step": 229040
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0145301818847656,
      "learning_rate": 5.173166761336389e-08,
      "loss": 3.1446,
      "step": 229041
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.0671844482421875,
      "learning_rate": 5.1655733410038704e-08,
      "loss": 2.995,
      "step": 229042
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.861536979675293,
      "learning_rate": 5.157985497277195e-08,
      "loss": 2.8586,
      "step": 229043
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9840049743652344,
      "learning_rate": 5.150403230159694e-08,
      "loss": 2.8344,
      "step": 229044
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.477015495300293,
      "learning_rate": 5.142826539648037e-08,
      "loss": 2.888,
      "step": 229045
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.841675281524658,
      "learning_rate": 5.135255425748885e-08,
      "loss": 2.9924,
      "step": 229046
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6840157508850098,
      "learning_rate": 5.127689888458908e-08,
      "loss": 2.9328,
      "step": 229047
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.5628321170806885,
      "learning_rate": 5.1201299277814355e-08,
      "loss": 2.9222,
      "step": 229048
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.97103214263916,
      "learning_rate": 5.1125755437231296e-08,
      "loss": 2.9411,
      "step": 229049
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.842062473297119,
      "learning_rate": 5.105026736277329e-08,
      "loss": 2.8982,
      "step": 229050
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.967616081237793,
      "learning_rate": 5.097483505447364e-08,
      "loss": 2.9685,
      "step": 229051
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1476452350616455,
      "learning_rate": 5.0899458512365656e-08,
      "loss": 2.9981,
      "step": 229052
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7242469787597656,
      "learning_rate": 5.082413773648264e-08,
      "loss": 2.7421,
      "step": 229053
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.91843843460083,
      "learning_rate": 5.074887272679129e-08,
      "loss": 3.1219,
      "step": 229054
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8157799243927,
      "learning_rate": 5.067366348332491e-08,
      "loss": 2.9802,
      "step": 229055
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.235623836517334,
      "learning_rate": 5.0598510006116813e-08,
      "loss": 3.0659,
      "step": 229056
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.902096748352051,
      "learning_rate": 5.052341229516699e-08,
      "loss": 3.1184,
      "step": 229057
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7050414085388184,
      "learning_rate": 5.044837035047544e-08,
      "loss": 2.9975,
      "step": 229058
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.805636405944824,
      "learning_rate": 5.0373384172075485e-08,
      "loss": 2.8999,
      "step": 229059
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.153667688369751,
      "learning_rate": 5.0298453759967106e-08,
      "loss": 2.8365,
      "step": 229060
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.179621934890747,
      "learning_rate": 5.022357911415031e-08,
      "loss": 3.0573,
      "step": 229061
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.956402063369751,
      "learning_rate": 5.014876023469172e-08,
      "loss": 2.9924,
      "step": 229062
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.4462168216705322,
      "learning_rate": 5.007399712155802e-08,
      "loss": 3.059,
      "step": 229063
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.329274892807007,
      "learning_rate": 4.999928977478251e-08,
      "loss": 3.087,
      "step": 229064
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7291479110717773,
      "learning_rate": 4.9924638194365205e-08,
      "loss": 2.7542,
      "step": 229065
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.212998628616333,
      "learning_rate": 4.9850042380339406e-08,
      "loss": 3.054,
      "step": 229066
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9016623497009277,
      "learning_rate": 4.977550233270511e-08,
      "loss": 2.8722,
      "step": 229067
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.194304943084717,
      "learning_rate": 4.9701018051462316e-08,
      "loss": 2.8608,
      "step": 229068
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0344419479370117,
      "learning_rate": 4.962658953664434e-08,
      "loss": 3.1881,
      "step": 229069
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.690627098083496,
      "learning_rate": 4.955221678828447e-08,
      "loss": 2.8317,
      "step": 229070
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7649779319763184,
      "learning_rate": 4.9477899806382723e-08,
      "loss": 2.8296,
      "step": 229071
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0397939682006836,
      "learning_rate": 4.9403638590905795e-08,
      "loss": 2.9905,
      "step": 229072
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.251729726791382,
      "learning_rate": 4.932943314195359e-08,
      "loss": 2.8624,
      "step": 229073
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9295220375061035,
      "learning_rate": 4.9255283459459504e-08,
      "loss": 2.849,
      "step": 229074
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.5839507579803467,
      "learning_rate": 4.9181189543490154e-08,
      "loss": 3.2578,
      "step": 229075
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.236231803894043,
      "learning_rate": 4.910715139401222e-08,
      "loss": 3.1781,
      "step": 229076
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.5125138759613037,
      "learning_rate": 4.903316901109233e-08,
      "loss": 2.8774,
      "step": 229077
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8739240169525146,
      "learning_rate": 4.8959242394697176e-08,
      "loss": 2.7393,
      "step": 229078
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.4671459197998047,
      "learning_rate": 4.8885371544893357e-08,
      "loss": 2.9579,
      "step": 229079
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9826762676239014,
      "learning_rate": 4.8811556461647585e-08,
      "loss": 2.9326,
      "step": 229080
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9531989097595215,
      "learning_rate": 4.873779714499315e-08,
      "loss": 2.9961,
      "step": 229081
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.72414493560791,
      "learning_rate": 4.8664093594930066e-08,
      "loss": 2.9005,
      "step": 229082
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9148199558258057,
      "learning_rate": 4.859044581149163e-08,
      "loss": 2.7712,
      "step": 229083
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1695048809051514,
      "learning_rate": 4.851685379467785e-08,
      "loss": 2.9468,
      "step": 229084
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1381945610046387,
      "learning_rate": 4.844331754452202e-08,
      "loss": 2.9884,
      "step": 229085
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.4159979820251465,
      "learning_rate": 4.8369837060990846e-08,
      "loss": 2.8814,
      "step": 229086
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.4768996238708496,
      "learning_rate": 4.829641234415094e-08,
      "loss": 3.0122,
      "step": 229087
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9587583541870117,
      "learning_rate": 4.822304339400229e-08,
      "loss": 2.6875,
      "step": 229088
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.938694953918457,
      "learning_rate": 4.814973021054491e-08,
      "loss": 3.1914,
      "step": 229089
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1499216556549072,
      "learning_rate": 4.80764727937788e-08,
      "loss": 2.8439,
      "step": 229090
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7920498847961426,
      "learning_rate": 4.8003271143737256e-08,
      "loss": 3.0209,
      "step": 229091
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6940126419067383,
      "learning_rate": 4.793012526045359e-08,
      "loss": 2.9969,
      "step": 229092
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.729246139526367,
      "learning_rate": 4.78570351439278e-08,
      "loss": 3.0468,
      "step": 229093
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3477401733398438,
      "learning_rate": 4.778400079415989e-08,
      "loss": 2.6631,
      "step": 229094
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1138508319854736,
      "learning_rate": 4.771102221118317e-08,
      "loss": 2.9139,
      "step": 229095
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.666851758956909,
      "learning_rate": 4.7638099394964324e-08,
      "loss": 2.999,
      "step": 229096
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.777003049850464,
      "learning_rate": 4.756523234556997e-08,
      "loss": 2.9187,
      "step": 229097
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9767532348632812,
      "learning_rate": 4.7492421063000105e-08,
      "loss": 3.2335,
      "step": 229098
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.810497760772705,
      "learning_rate": 4.741966554725474e-08,
      "loss": 3.0316,
      "step": 229099
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8562159538269043,
      "learning_rate": 4.7346965798367165e-08,
      "loss": 2.9152,
      "step": 229100
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.929126262664795,
      "learning_rate": 4.727432181633739e-08,
      "loss": 2.8445,
      "step": 229101
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.435121536254883,
      "learning_rate": 4.7201733601165414e-08,
      "loss": 2.8019,
      "step": 229102
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0831069946289062,
      "learning_rate": 4.712920115291785e-08,
      "loss": 2.8851,
      "step": 229103
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8448925018310547,
      "learning_rate": 4.705672447152808e-08,
      "loss": 3.0332,
      "step": 229104
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2429141998291016,
      "learning_rate": 4.698430355706273e-08,
      "loss": 2.9077,
      "step": 229105
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1367807388305664,
      "learning_rate": 4.691193840955509e-08,
      "loss": 3.1254,
      "step": 229106
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.865262746810913,
      "learning_rate": 4.683962902897187e-08,
      "loss": 2.805,
      "step": 229107
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8288373947143555,
      "learning_rate": 4.676737541531306e-08,
      "loss": 3.0524,
      "step": 229108
    },
    {
      "epoch": 2.98,
      "grad_norm": 5.624073505401611,
      "learning_rate": 4.6695177568678574e-08,
      "loss": 3.0847,
      "step": 229109
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3023829460144043,
      "learning_rate": 4.662303548896851e-08,
      "loss": 3.2316,
      "step": 229110
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1181509494781494,
      "learning_rate": 4.655094917628277e-08,
      "loss": 2.9185,
      "step": 229111
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7000374794006348,
      "learning_rate": 4.6478918630621365e-08,
      "loss": 3.0842,
      "step": 229112
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.5542194843292236,
      "learning_rate": 4.6406943851950984e-08,
      "loss": 3.1408,
      "step": 229113
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.525547742843628,
      "learning_rate": 4.633502484033824e-08,
      "loss": 2.6297,
      "step": 229114
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.438584327697754,
      "learning_rate": 4.6263161595749834e-08,
      "loss": 2.7524,
      "step": 229115
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.966845989227295,
      "learning_rate": 4.619135411821906e-08,
      "loss": 3.096,
      "step": 229116
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.947805881500244,
      "learning_rate": 4.6119602407779234e-08,
      "loss": 2.8975,
      "step": 229117
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0144588947296143,
      "learning_rate": 4.604790646443035e-08,
      "loss": 2.8193,
      "step": 229118
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.848491907119751,
      "learning_rate": 4.597626628820572e-08,
      "loss": 2.6306,
      "step": 229119
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6838629245758057,
      "learning_rate": 4.5904681879072035e-08,
      "loss": 2.9637,
      "step": 229120
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.598001718521118,
      "learning_rate": 4.58331532370626e-08,
      "loss": 2.5031,
      "step": 229121
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6608009338378906,
      "learning_rate": 4.5761680362177425e-08,
      "loss": 2.8607,
      "step": 229122
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.585977792739868,
      "learning_rate": 4.5690263254483103e-08,
      "loss": 3.0686,
      "step": 229123
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.957141399383545,
      "learning_rate": 4.561890191394635e-08,
      "loss": 2.7157,
      "step": 229124
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1948494911193848,
      "learning_rate": 4.554759634056715e-08,
      "loss": 2.9555,
      "step": 229125
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.58430814743042,
      "learning_rate": 4.5476346534412126e-08,
      "loss": 2.869,
      "step": 229126
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.801231622695923,
      "learning_rate": 4.540515249544796e-08,
      "loss": 2.7715,
      "step": 229127
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9062535762786865,
      "learning_rate": 4.533401422374128e-08,
      "loss": 2.9919,
      "step": 229128
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8179402351379395,
      "learning_rate": 4.526293171922546e-08,
      "loss": 3.0011,
      "step": 229129
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9687340259552,
      "learning_rate": 4.5191904981967117e-08,
      "loss": 2.844,
      "step": 229130
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.93949556350708,
      "learning_rate": 4.5120934011966256e-08,
      "loss": 2.4643,
      "step": 229131
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6829276084899902,
      "learning_rate": 4.505001880925618e-08,
      "loss": 2.8234,
      "step": 229132
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.955392599105835,
      "learning_rate": 4.4979159373836896e-08,
      "loss": 2.7787,
      "step": 229133
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.068783760070801,
      "learning_rate": 4.490835570570839e-08,
      "loss": 2.9479,
      "step": 229134
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8445255756378174,
      "learning_rate": 4.483760780487067e-08,
      "loss": 2.9905,
      "step": 229135
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.025303840637207,
      "learning_rate": 4.4766915671390345e-08,
      "loss": 2.654,
      "step": 229136
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.439424753189087,
      "learning_rate": 4.469627930523412e-08,
      "loss": 2.8286,
      "step": 229137
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.155763626098633,
      "learning_rate": 4.462569870643529e-08,
      "loss": 2.6573,
      "step": 229138
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3630714416503906,
      "learning_rate": 4.4555173875027164e-08,
      "loss": 2.9815,
      "step": 229139
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.6366758346557617,
      "learning_rate": 4.448470481097644e-08,
      "loss": 2.7937,
      "step": 229140
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.409587860107422,
      "learning_rate": 4.4414291514316414e-08,
      "loss": 2.9117,
      "step": 229141
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2919270992279053,
      "learning_rate": 4.43439339850471e-08,
      "loss": 2.5518,
      "step": 229142
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.012413501739502,
      "learning_rate": 4.42736322232351e-08,
      "loss": 2.8431,
      "step": 229143
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.199108362197876,
      "learning_rate": 4.420338622881381e-08,
      "loss": 2.6537,
      "step": 229144
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.732527017593384,
      "learning_rate": 4.413319600184983e-08,
      "loss": 2.9141,
      "step": 229145
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7856109142303467,
      "learning_rate": 4.4063061542343184e-08,
      "loss": 3.2004,
      "step": 229146
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0421502590179443,
      "learning_rate": 4.3992982850327154e-08,
      "loss": 3.0758,
      "step": 229147
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.474670886993408,
      "learning_rate": 4.3922959925768444e-08,
      "loss": 3.1973,
      "step": 229148
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3987741470336914,
      "learning_rate": 4.3852992768733665e-08,
      "loss": 2.5915,
      "step": 229149
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.939000129699707,
      "learning_rate": 4.378308137918951e-08,
      "loss": 2.9561,
      "step": 229150
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.822523832321167,
      "learning_rate": 4.371322575716929e-08,
      "loss": 3.2207,
      "step": 229151
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.073849678039551,
      "learning_rate": 4.364342590270631e-08,
      "loss": 2.9157,
      "step": 229152
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9162518978118896,
      "learning_rate": 4.357368181576726e-08,
      "loss": 2.7659,
      "step": 229153
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9128386974334717,
      "learning_rate": 4.3503993496385446e-08,
      "loss": 3.1327,
      "step": 229154
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.39620041847229,
      "learning_rate": 4.343436094459418e-08,
      "loss": 3.0856,
      "step": 229155
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.330211639404297,
      "learning_rate": 4.336478416039346e-08,
      "loss": 2.6676,
      "step": 229156
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0257339477539062,
      "learning_rate": 4.3295263143783286e-08,
      "loss": 2.7805,
      "step": 229157
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.4814348220825195,
      "learning_rate": 4.322579789479697e-08,
      "loss": 2.9382,
      "step": 229158
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.269546031951904,
      "learning_rate": 4.3156388413434496e-08,
      "loss": 2.8981,
      "step": 229159
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7110157012939453,
      "learning_rate": 4.308703469969588e-08,
      "loss": 2.9321,
      "step": 229160
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.752054452896118,
      "learning_rate": 4.3017736753614415e-08,
      "loss": 2.9277,
      "step": 229161
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0584325790405273,
      "learning_rate": 4.2948494575223423e-08,
      "loss": 3.0384,
      "step": 229162
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.859893560409546,
      "learning_rate": 4.287930816448959e-08,
      "loss": 2.7332,
      "step": 229163
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1091625690460205,
      "learning_rate": 4.281017752144622e-08,
      "loss": 2.7923,
      "step": 229164
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.692365884780884,
      "learning_rate": 4.274110264609332e-08,
      "loss": 2.7435,
      "step": 229165
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7222909927368164,
      "learning_rate": 4.267208353846419e-08,
      "loss": 2.9369,
      "step": 229166
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0265517234802246,
      "learning_rate": 4.2603120198592135e-08,
      "loss": 2.8098,
      "step": 229167
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2443277835845947,
      "learning_rate": 4.253421262644385e-08,
      "loss": 2.9108,
      "step": 229168
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.758301019668579,
      "learning_rate": 4.246536082201934e-08,
      "loss": 2.9799,
      "step": 229169
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8773345947265625,
      "learning_rate": 4.239656478541853e-08,
      "loss": 2.8031,
      "step": 229170
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8714442253112793,
      "learning_rate": 4.232782451657479e-08,
      "loss": 3.0016,
      "step": 229171
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.8707659244537354,
      "learning_rate": 4.225914001552144e-08,
      "loss": 2.7878,
      "step": 229172
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8420095443725586,
      "learning_rate": 4.219051128225848e-08,
      "loss": 2.6789,
      "step": 229173
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0465970039367676,
      "learning_rate": 4.2121938316819206e-08,
      "loss": 3.0476,
      "step": 229174
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.3727900981903076,
      "learning_rate": 4.205342111923693e-08,
      "loss": 3.3465,
      "step": 229175
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7926993370056152,
      "learning_rate": 4.1984959689478346e-08,
      "loss": 2.9035,
      "step": 229176
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.772468328475952,
      "learning_rate": 4.191655402761007e-08,
      "loss": 2.8461,
      "step": 229177
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.4108216762542725,
      "learning_rate": 4.1848204133565486e-08,
      "loss": 2.8869,
      "step": 229178
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.730053186416626,
      "learning_rate": 4.177991000741121e-08,
      "loss": 3.0115,
      "step": 229179
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.789534330368042,
      "learning_rate": 4.1711671649180544e-08,
      "loss": 2.8082,
      "step": 229180
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.206493616104126,
      "learning_rate": 4.164348905884019e-08,
      "loss": 2.9396,
      "step": 229181
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.099024534225464,
      "learning_rate": 4.1575362236423436e-08,
      "loss": 2.9528,
      "step": 229182
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1668550968170166,
      "learning_rate": 4.15072911819303e-08,
      "loss": 2.9997,
      "step": 229183
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8497486114501953,
      "learning_rate": 4.1439275895394085e-08,
      "loss": 2.7406,
      "step": 229184
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.596113681793213,
      "learning_rate": 4.137131637681479e-08,
      "loss": 2.9677,
      "step": 229185
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.939366340637207,
      "learning_rate": 4.1303412626192415e-08,
      "loss": 3.0927,
      "step": 229186
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.572021484375,
      "learning_rate": 4.1235564643593566e-08,
      "loss": 2.9966,
      "step": 229187
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.055665969848633,
      "learning_rate": 4.116777242895164e-08,
      "loss": 2.9198,
      "step": 229188
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8712539672851562,
      "learning_rate": 4.1100035982333244e-08,
      "loss": 2.8711,
      "step": 229189
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8481712341308594,
      "learning_rate": 4.103235530373838e-08,
      "loss": 2.6996,
      "step": 229190
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7078335285186768,
      "learning_rate": 4.0964730393167055e-08,
      "loss": 2.8215,
      "step": 229191
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.540699005126953,
      "learning_rate": 4.089716125065257e-08,
      "loss": 2.8892,
      "step": 229192
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0987815856933594,
      "learning_rate": 4.0829647876194914e-08,
      "loss": 3.0338,
      "step": 229193
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.768747091293335,
      "learning_rate": 4.0762190269827413e-08,
      "loss": 2.815,
      "step": 229194
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.831392288208008,
      "learning_rate": 4.0694788431516746e-08,
      "loss": 2.9274,
      "step": 229195
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7686235904693604,
      "learning_rate": 4.0627442361296225e-08,
      "loss": 3.1393,
      "step": 229196
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1183297634124756,
      "learning_rate": 4.0560152059199155e-08,
      "loss": 2.9046,
      "step": 229197
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.389056921005249,
      "learning_rate": 4.0492917525225544e-08,
      "loss": 2.8264,
      "step": 229198
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.746112823486328,
      "learning_rate": 4.042573875940869e-08,
      "loss": 3.0897,
      "step": 229199
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0608785152435303,
      "learning_rate": 4.0358615761715286e-08,
      "loss": 3.0878,
      "step": 229200
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0897021293640137,
      "learning_rate": 4.029154853217864e-08,
      "loss": 2.811,
      "step": 229201
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.5499372482299805,
      "learning_rate": 4.0224537070832064e-08,
      "loss": 2.6339,
      "step": 229202
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2047715187072754,
      "learning_rate": 4.0157581377642245e-08,
      "loss": 2.9037,
      "step": 229203
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.899232864379883,
      "learning_rate": 4.00906814526758e-08,
      "loss": 3.0419,
      "step": 229204
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.999894857406616,
      "learning_rate": 4.002383729589942e-08,
      "loss": 2.9049,
      "step": 229205
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.340780258178711,
      "learning_rate": 3.995704890734641e-08,
      "loss": 2.7795,
      "step": 229206
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.89803409576416,
      "learning_rate": 3.9890316287050086e-08,
      "loss": 3.0186,
      "step": 229207
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.572603702545166,
      "learning_rate": 3.982363943497713e-08,
      "loss": 2.8191,
      "step": 229208
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2551276683807373,
      "learning_rate": 3.975701835116085e-08,
      "loss": 3.019,
      "step": 229209
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9438068866729736,
      "learning_rate": 3.969045303563456e-08,
      "loss": 3.1021,
      "step": 229210
    },
    {
      "epoch": 2.98,
      "grad_norm": 5.431274890899658,
      "learning_rate": 3.962394348836495e-08,
      "loss": 3.0016,
      "step": 229211
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6962838172912598,
      "learning_rate": 3.955748970941863e-08,
      "loss": 2.8124,
      "step": 229212
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1695451736450195,
      "learning_rate": 3.9491091698762303e-08,
      "loss": 2.8825,
      "step": 229213
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7771496772766113,
      "learning_rate": 3.942474945642926e-08,
      "loss": 2.8268,
      "step": 229214
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0963385105133057,
      "learning_rate": 3.935846298245282e-08,
      "loss": 2.9694,
      "step": 229215
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1915335655212402,
      "learning_rate": 3.929223227679967e-08,
      "loss": 3.2225,
      "step": 229216
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2730135917663574,
      "learning_rate": 3.9226057339503125e-08,
      "loss": 2.8377,
      "step": 229217
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8679311275482178,
      "learning_rate": 3.915993817059648e-08,
      "loss": 2.8335,
      "step": 229218
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.060002565383911,
      "learning_rate": 3.909387477004644e-08,
      "loss": 2.8897,
      "step": 229219
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.101102590560913,
      "learning_rate": 3.90278671378863e-08,
      "loss": 3.0591,
      "step": 229220
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.322167158126831,
      "learning_rate": 3.8961915274182683e-08,
      "loss": 3.0435,
      "step": 229221
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8091719150543213,
      "learning_rate": 3.889601917883567e-08,
      "loss": 2.903,
      "step": 229222
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.674165964126587,
      "learning_rate": 3.8830178851978476e-08,
      "loss": 3.0826,
      "step": 229223
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.8971619606018066,
      "learning_rate": 3.876439429351119e-08,
      "loss": 3.0716,
      "step": 229224
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.495243549346924,
      "learning_rate": 3.8698665503533734e-08,
      "loss": 2.9176,
      "step": 229225
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9198272228240967,
      "learning_rate": 3.863299248201279e-08,
      "loss": 2.7257,
      "step": 229226
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.8229494094848633,
      "learning_rate": 3.856737522898168e-08,
      "loss": 2.9365,
      "step": 229227
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.893841028213501,
      "learning_rate": 3.850181374444039e-08,
      "loss": 2.816,
      "step": 229228
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.192915201187134,
      "learning_rate": 3.843630802838893e-08,
      "loss": 2.9707,
      "step": 229229
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9561591148376465,
      "learning_rate": 3.83708580808606e-08,
      "loss": 2.99,
      "step": 229230
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.5397887229919434,
      "learning_rate": 3.830546390188871e-08,
      "loss": 2.8807,
      "step": 229231
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.3660993576049805,
      "learning_rate": 3.824012549140665e-08,
      "loss": 2.8453,
      "step": 229232
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7601699829101562,
      "learning_rate": 3.817484284951433e-08,
      "loss": 2.8364,
      "step": 229233
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1360361576080322,
      "learning_rate": 3.8109615976178454e-08,
      "loss": 2.9435,
      "step": 229234
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.0256943702697754,
      "learning_rate": 3.804444487143232e-08,
      "loss": 3.0094,
      "step": 229235
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.6870276927948,
      "learning_rate": 3.797932953524263e-08,
      "loss": 2.8902,
      "step": 229236
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.810497999191284,
      "learning_rate": 3.791426996767599e-08,
      "loss": 2.9393,
      "step": 229237
    },
    {
      "epoch": 2.98,
      "grad_norm": 4.54355525970459,
      "learning_rate": 3.7849266168732407e-08,
      "loss": 3.3627,
      "step": 229238
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2909865379333496,
      "learning_rate": 3.778431813841187e-08,
      "loss": 2.8399,
      "step": 229239
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.4004063606262207,
      "learning_rate": 3.771942587671439e-08,
      "loss": 3.0238,
      "step": 229240
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.1206395626068115,
      "learning_rate": 3.765458938370658e-08,
      "loss": 2.9467,
      "step": 229241
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.809414863586426,
      "learning_rate": 3.7589808659321816e-08,
      "loss": 3.1911,
      "step": 229242
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.2752692699432373,
      "learning_rate": 3.7525083703626726e-08,
      "loss": 3.1134,
      "step": 229243
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.681246042251587,
      "learning_rate": 3.746041451658799e-08,
      "loss": 3.0514,
      "step": 229244
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.7523581981658936,
      "learning_rate": 3.7395801098272225e-08,
      "loss": 2.6514,
      "step": 229245
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.9417009353637695,
      "learning_rate": 3.733124344867944e-08,
      "loss": 2.6871,
      "step": 229246
    },
    {
      "epoch": 2.98,
      "grad_norm": 2.858415365219116,
      "learning_rate": 3.726674156780962e-08,
      "loss": 3.0585,
      "step": 229247
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.478363513946533,
      "learning_rate": 3.7202295455629474e-08,
      "loss": 3.2231,
      "step": 229248
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.026716709136963,
      "learning_rate": 3.713790511223891e-08,
      "loss": 3.0284,
      "step": 229249
    },
    {
      "epoch": 2.98,
      "grad_norm": 3.4806175231933594,
      "learning_rate": 3.7073570537604625e-08,
      "loss": 2.9465,
      "step": 229250
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.135878801345825,
      "learning_rate": 3.700929173172662e-08,
      "loss": 2.8797,
      "step": 229251
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0891873836517334,
      "learning_rate": 3.69450686946382e-08,
      "loss": 2.6034,
      "step": 229252
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.994870662689209,
      "learning_rate": 3.688090142633937e-08,
      "loss": 2.839,
      "step": 229253
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.9675538539886475,
      "learning_rate": 3.681678992683013e-08,
      "loss": 2.8407,
      "step": 229254
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.730976104736328,
      "learning_rate": 3.675273419617708e-08,
      "loss": 2.6941,
      "step": 229255
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.807877779006958,
      "learning_rate": 3.668873423431362e-08,
      "loss": 2.763,
      "step": 229256
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.4683640003204346,
      "learning_rate": 3.6624790041339666e-08,
      "loss": 2.7498,
      "step": 229257
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2140774726867676,
      "learning_rate": 3.656090161718861e-08,
      "loss": 2.8053,
      "step": 229258
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.878770351409912,
      "learning_rate": 3.649706896189375e-08,
      "loss": 2.5979,
      "step": 229259
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2020621299743652,
      "learning_rate": 3.6433292075488397e-08,
      "loss": 3.0175,
      "step": 229260
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.385406732559204,
      "learning_rate": 3.636957095797255e-08,
      "loss": 3.0975,
      "step": 229261
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.398646354675293,
      "learning_rate": 3.630590560934621e-08,
      "loss": 2.8045,
      "step": 229262
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1808972358703613,
      "learning_rate": 3.624229602964268e-08,
      "loss": 2.8623,
      "step": 229263
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6942250728607178,
      "learning_rate": 3.6178742218895277e-08,
      "loss": 2.8774,
      "step": 229264
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.5902559757232666,
      "learning_rate": 3.611524417703737e-08,
      "loss": 3.242,
      "step": 229265
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.196734666824341,
      "learning_rate": 3.605180190413559e-08,
      "loss": 2.886,
      "step": 229266
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3480732440948486,
      "learning_rate": 3.5988415400223234e-08,
      "loss": 3.0866,
      "step": 229267
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8681232929229736,
      "learning_rate": 3.5925084665267e-08,
      "loss": 2.9016,
      "step": 229268
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.5915184020996094,
      "learning_rate": 3.586180969926689e-08,
      "loss": 3.1205,
      "step": 229269
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.8878369331359863,
      "learning_rate": 3.57985905022895e-08,
      "loss": 2.5215,
      "step": 229270
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.172081470489502,
      "learning_rate": 3.573542707433485e-08,
      "loss": 3.1975,
      "step": 229271
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.4584643840789795,
      "learning_rate": 3.5672319415369634e-08,
      "loss": 2.7132,
      "step": 229272
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3920159339904785,
      "learning_rate": 3.560926752546045e-08,
      "loss": 3.0367,
      "step": 229273
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.943650722503662,
      "learning_rate": 3.5546271404574e-08,
      "loss": 2.8621,
      "step": 229274
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.290087938308716,
      "learning_rate": 3.5483331052743594e-08,
      "loss": 2.8664,
      "step": 229275
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.071265935897827,
      "learning_rate": 3.542044646996922e-08,
      "loss": 2.7897,
      "step": 229276
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.725599765777588,
      "learning_rate": 3.5357617656284196e-08,
      "loss": 2.9244,
      "step": 229277
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.846912145614624,
      "learning_rate": 3.5294844611688525e-08,
      "loss": 2.9635,
      "step": 229278
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.704547643661499,
      "learning_rate": 3.52321273362155e-08,
      "loss": 3.0747,
      "step": 229279
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.613128185272217,
      "learning_rate": 3.516946582983182e-08,
      "loss": 3.1909,
      "step": 229280
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0134005546569824,
      "learning_rate": 3.51068600925708e-08,
      "loss": 2.9618,
      "step": 229281
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8269834518432617,
      "learning_rate": 3.5044310124465733e-08,
      "loss": 3.0682,
      "step": 229282
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.449471950531006,
      "learning_rate": 3.4981815925483325e-08,
      "loss": 2.9505,
      "step": 229283
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.92610239982605,
      "learning_rate": 3.4919377495656874e-08,
      "loss": 2.8863,
      "step": 229284
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.955744743347168,
      "learning_rate": 3.4856994835019694e-08,
      "loss": 2.9328,
      "step": 229285
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0758795738220215,
      "learning_rate": 3.479466794357177e-08,
      "loss": 3.115,
      "step": 229286
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.586055040359497,
      "learning_rate": 3.4732396821313126e-08,
      "loss": 2.8927,
      "step": 229287
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.737779140472412,
      "learning_rate": 3.467018146824374e-08,
      "loss": 2.7256,
      "step": 229288
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9245898723602295,
      "learning_rate": 3.460802188439693e-08,
      "loss": 2.8303,
      "step": 229289
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.170043468475342,
      "learning_rate": 3.4545918069772695e-08,
      "loss": 2.9285,
      "step": 229290
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.638636827468872,
      "learning_rate": 3.4483870024404334e-08,
      "loss": 3.0602,
      "step": 229291
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8082692623138428,
      "learning_rate": 3.442187774829186e-08,
      "loss": 3.1732,
      "step": 229292
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.819540023803711,
      "learning_rate": 3.435994124143526e-08,
      "loss": 2.7591,
      "step": 229293
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.829005241394043,
      "learning_rate": 3.4298060503834546e-08,
      "loss": 3.0194,
      "step": 229294
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.665860652923584,
      "learning_rate": 3.423623553555632e-08,
      "loss": 2.9217,
      "step": 229295
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.726487398147583,
      "learning_rate": 3.417446633653398e-08,
      "loss": 3.0174,
      "step": 229296
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0197246074676514,
      "learning_rate": 3.411275290683413e-08,
      "loss": 2.9908,
      "step": 229297
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.814206600189209,
      "learning_rate": 3.405109524649008e-08,
      "loss": 3.1746,
      "step": 229298
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.745292901992798,
      "learning_rate": 3.398949335543522e-08,
      "loss": 2.8996,
      "step": 229299
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.88175368309021,
      "learning_rate": 3.392794723373615e-08,
      "loss": 2.7779,
      "step": 229300
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8880574703216553,
      "learning_rate": 3.386645688139289e-08,
      "loss": 2.8199,
      "step": 229301
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1336066722869873,
      "learning_rate": 3.3805022298438736e-08,
      "loss": 2.8459,
      "step": 229302
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.8256165981292725,
      "learning_rate": 3.374364348484038e-08,
      "loss": 3.0231,
      "step": 229303
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.883841037750244,
      "learning_rate": 3.3682320440631125e-08,
      "loss": 2.744,
      "step": 229304
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.922499418258667,
      "learning_rate": 3.362105316581098e-08,
      "loss": 3.0107,
      "step": 229305
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2665319442749023,
      "learning_rate": 3.355984166044656e-08,
      "loss": 2.7475,
      "step": 229306
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.891890287399292,
      "learning_rate": 3.3498685924471244e-08,
      "loss": 2.6121,
      "step": 229307
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.491772174835205,
      "learning_rate": 3.343758595791834e-08,
      "loss": 3.124,
      "step": 229308
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2540109157562256,
      "learning_rate": 3.3376541760821164e-08,
      "loss": 2.7169,
      "step": 229309
    },
    {
      "epoch": 2.99,
      "grad_norm": 5.029388427734375,
      "learning_rate": 3.3315553333213006e-08,
      "loss": 2.8311,
      "step": 229310
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.825666666030884,
      "learning_rate": 3.325462067506057e-08,
      "loss": 3.0677,
      "step": 229311
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.833073139190674,
      "learning_rate": 3.3193743786363855e-08,
      "loss": 2.9322,
      "step": 229312
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9172439575195312,
      "learning_rate": 3.3132922667189474e-08,
      "loss": 2.7727,
      "step": 229313
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.1822662353515625,
      "learning_rate": 3.307215731750412e-08,
      "loss": 3.0527,
      "step": 229314
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.457519054412842,
      "learning_rate": 3.3011447737307794e-08,
      "loss": 2.8718,
      "step": 229315
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8654274940490723,
      "learning_rate": 3.2950793926667106e-08,
      "loss": 2.8288,
      "step": 229316
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.5828845500946045,
      "learning_rate": 3.289019588554875e-08,
      "loss": 2.9758,
      "step": 229317
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0310356616973877,
      "learning_rate": 3.282965361398604e-08,
      "loss": 2.8029,
      "step": 229318
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.558196544647217,
      "learning_rate": 3.2769167111978964e-08,
      "loss": 2.8542,
      "step": 229319
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1029133796691895,
      "learning_rate": 3.270873637956084e-08,
      "loss": 3.008,
      "step": 229320
    },
    {
      "epoch": 2.99,
      "grad_norm": 5.027568340301514,
      "learning_rate": 3.2648361416698356e-08,
      "loss": 3.042,
      "step": 229321
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0106570720672607,
      "learning_rate": 3.2588042223458125e-08,
      "loss": 3.2082,
      "step": 229322
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9856550693511963,
      "learning_rate": 3.252777879980684e-08,
      "loss": 2.9265,
      "step": 229323
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0989019870758057,
      "learning_rate": 3.2467571145744495e-08,
      "loss": 2.8241,
      "step": 229324
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1730000972747803,
      "learning_rate": 3.240741926133772e-08,
      "loss": 3.1161,
      "step": 229325
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.829274892807007,
      "learning_rate": 3.23473231465865e-08,
      "loss": 2.7937,
      "step": 229326
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.4745090007781982,
      "learning_rate": 3.228728280145754e-08,
      "loss": 3.1992,
      "step": 229327
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.4434590339660645,
      "learning_rate": 3.222729822598413e-08,
      "loss": 2.9526,
      "step": 229328
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.479864120483398,
      "learning_rate": 3.216736942019959e-08,
      "loss": 2.7467,
      "step": 229329
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2264761924743652,
      "learning_rate": 3.2107496384070616e-08,
      "loss": 2.8456,
      "step": 229330
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3454246520996094,
      "learning_rate": 3.204767911766382e-08,
      "loss": 2.792,
      "step": 229331
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.085911750793457,
      "learning_rate": 3.198791762094588e-08,
      "loss": 2.8184,
      "step": 229332
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8052005767822266,
      "learning_rate": 3.192821189395012e-08,
      "loss": 3.0698,
      "step": 229333
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0867977142333984,
      "learning_rate": 3.186856193667653e-08,
      "loss": 2.7465,
      "step": 229334
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6152877807617188,
      "learning_rate": 3.180896774915842e-08,
      "loss": 2.7724,
      "step": 229335
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2326626777648926,
      "learning_rate": 3.174942933136249e-08,
      "loss": 2.9512,
      "step": 229336
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.947662353515625,
      "learning_rate": 3.168994668332203e-08,
      "loss": 2.9131,
      "step": 229337
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.799814224243164,
      "learning_rate": 3.163051980507036e-08,
      "loss": 2.8948,
      "step": 229338
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7284164428710938,
      "learning_rate": 3.1571148696607486e-08,
      "loss": 2.8676,
      "step": 229339
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.230059862136841,
      "learning_rate": 3.1511833357933393e-08,
      "loss": 2.6531,
      "step": 229340
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0703938007354736,
      "learning_rate": 3.145257378908139e-08,
      "loss": 2.8718,
      "step": 229341
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1939613819122314,
      "learning_rate": 3.139336999001818e-08,
      "loss": 2.9181,
      "step": 229342
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.287519931793213,
      "learning_rate": 3.133422196077706e-08,
      "loss": 3.1478,
      "step": 229343
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.646212577819824,
      "learning_rate": 3.127512970139134e-08,
      "loss": 3.0461,
      "step": 229344
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.648411512374878,
      "learning_rate": 3.121609321182772e-08,
      "loss": 2.9771,
      "step": 229345
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.539665937423706,
      "learning_rate": 3.1157112492152806e-08,
      "loss": 2.6676,
      "step": 229346
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.843090295791626,
      "learning_rate": 3.109818754233329e-08,
      "loss": 2.8642,
      "step": 229347
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.674445629119873,
      "learning_rate": 3.103931836240248e-08,
      "loss": 2.8644,
      "step": 229348
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.607841968536377,
      "learning_rate": 3.0980504952360375e-08,
      "loss": 2.9123,
      "step": 229349
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.853510618209839,
      "learning_rate": 3.092174731220698e-08,
      "loss": 2.8537,
      "step": 229350
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0073153972625732,
      "learning_rate": 3.0863045442008904e-08,
      "loss": 2.7065,
      "step": 229351
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.68523907661438,
      "learning_rate": 3.080439934169954e-08,
      "loss": 2.7893,
      "step": 229352
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.762468099594116,
      "learning_rate": 3.074580901134549e-08,
      "loss": 2.7355,
      "step": 229353
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0987977981567383,
      "learning_rate": 3.068727445091346e-08,
      "loss": 2.9725,
      "step": 229354
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.820866346359253,
      "learning_rate": 3.062879566047005e-08,
      "loss": 3.0313,
      "step": 229355
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.828016519546509,
      "learning_rate": 3.057037263994866e-08,
      "loss": 2.8932,
      "step": 229356
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.5483052730560303,
      "learning_rate": 3.05120053894492e-08,
      "loss": 2.9381,
      "step": 229357
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.378943920135498,
      "learning_rate": 3.045369390893837e-08,
      "loss": 3.0943,
      "step": 229358
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7140419483184814,
      "learning_rate": 3.039543819841617e-08,
      "loss": 2.9456,
      "step": 229359
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9126040935516357,
      "learning_rate": 3.033723825788259e-08,
      "loss": 2.8884,
      "step": 229360
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.231492280960083,
      "learning_rate": 3.027909408740426e-08,
      "loss": 2.737,
      "step": 229361
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8505256175994873,
      "learning_rate": 3.022100568694785e-08,
      "loss": 2.741,
      "step": 229362
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8309879302978516,
      "learning_rate": 3.0162973056546694e-08,
      "loss": 3.061,
      "step": 229363
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.978639602661133,
      "learning_rate": 3.010499619620077e-08,
      "loss": 3.1252,
      "step": 229364
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.5904219150543213,
      "learning_rate": 3.004707510591009e-08,
      "loss": 3.1156,
      "step": 229365
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.830610752105713,
      "learning_rate": 2.9989209785707956e-08,
      "loss": 2.6941,
      "step": 229366
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9446399211883545,
      "learning_rate": 2.993140023556107e-08,
      "loss": 2.794,
      "step": 229367
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.149718284606934,
      "learning_rate": 2.9873646455536026e-08,
      "loss": 2.971,
      "step": 229368
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.8638973236083984,
      "learning_rate": 2.9815948445632845e-08,
      "loss": 3.1026,
      "step": 229369
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7261815071105957,
      "learning_rate": 2.9758306205818206e-08,
      "loss": 2.8381,
      "step": 229370
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2029802799224854,
      "learning_rate": 2.970071973615873e-08,
      "loss": 2.7267,
      "step": 229371
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0818703174591064,
      "learning_rate": 2.9643189036654415e-08,
      "loss": 2.7273,
      "step": 229372
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1063008308410645,
      "learning_rate": 2.958571410727195e-08,
      "loss": 2.9439,
      "step": 229373
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.664358377456665,
      "learning_rate": 2.952829494804465e-08,
      "loss": 2.8775,
      "step": 229374
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.696300029754639,
      "learning_rate": 2.9470931559005817e-08,
      "loss": 3.034,
      "step": 229375
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.922750949859619,
      "learning_rate": 2.9413623940155452e-08,
      "loss": 3.0439,
      "step": 229376
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6047191619873047,
      "learning_rate": 2.9356372091493552e-08,
      "loss": 3.0113,
      "step": 229377
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.913353443145752,
      "learning_rate": 2.9299176013053426e-08,
      "loss": 3.0724,
      "step": 229378
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9710869789123535,
      "learning_rate": 2.9242035704801768e-08,
      "loss": 3.0289,
      "step": 229379
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.781275749206543,
      "learning_rate": 2.9184951166805193e-08,
      "loss": 2.7168,
      "step": 229380
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9203362464904785,
      "learning_rate": 2.912792239903039e-08,
      "loss": 2.9311,
      "step": 229381
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0989139080047607,
      "learning_rate": 2.907094940147736e-08,
      "loss": 2.902,
      "step": 229382
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9288501739501953,
      "learning_rate": 2.9014032174212722e-08,
      "loss": 3.0356,
      "step": 229383
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.754713296890259,
      "learning_rate": 2.895717071720316e-08,
      "loss": 2.6388,
      "step": 229384
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.770427942276001,
      "learning_rate": 2.8900365030481987e-08,
      "loss": 2.9171,
      "step": 229385
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.843996286392212,
      "learning_rate": 2.88436151140492e-08,
      "loss": 2.764,
      "step": 229386
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.166628360748291,
      "learning_rate": 2.878692096793811e-08,
      "loss": 2.6633,
      "step": 229387
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0244288444519043,
      "learning_rate": 2.87302825920821e-08,
      "loss": 2.8838,
      "step": 229388
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.486417531967163,
      "learning_rate": 2.8673699986614394e-08,
      "loss": 2.8309,
      "step": 229389
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.867851972579956,
      "learning_rate": 2.861717315143508e-08,
      "loss": 2.8953,
      "step": 229390
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.934375286102295,
      "learning_rate": 2.8560702086610764e-08,
      "loss": 2.7728,
      "step": 229391
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.916598320007324,
      "learning_rate": 2.850428679214145e-08,
      "loss": 2.9366,
      "step": 229392
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9932732582092285,
      "learning_rate": 2.8447927268027136e-08,
      "loss": 2.6818,
      "step": 229393
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9111123085021973,
      "learning_rate": 2.8391623514267824e-08,
      "loss": 3.0832,
      "step": 229394
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.80104398727417,
      "learning_rate": 2.8335375530930127e-08,
      "loss": 2.9152,
      "step": 229395
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9622552394866943,
      "learning_rate": 2.8279183317980735e-08,
      "loss": 2.8596,
      "step": 229396
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7593133449554443,
      "learning_rate": 2.8223046875419654e-08,
      "loss": 2.9532,
      "step": 229397
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.939053773880005,
      "learning_rate": 2.8166966203280184e-08,
      "loss": 3.028,
      "step": 229398
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3905189037323,
      "learning_rate": 2.8110941301595637e-08,
      "loss": 3.1138,
      "step": 229399
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.6031410694122314,
      "learning_rate": 2.8054972170299395e-08,
      "loss": 2.8261,
      "step": 229400
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.886592149734497,
      "learning_rate": 2.7999058809491382e-08,
      "loss": 2.9568,
      "step": 229401
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.700566053390503,
      "learning_rate": 2.7943201219104982e-08,
      "loss": 2.8428,
      "step": 229402
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7950439453125,
      "learning_rate": 2.788739939920681e-08,
      "loss": 2.9086,
      "step": 229403
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.492802619934082,
      "learning_rate": 2.7831653349796866e-08,
      "loss": 2.8312,
      "step": 229404
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.990602731704712,
      "learning_rate": 2.7775963070841844e-08,
      "loss": 3.013,
      "step": 229405
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.805147409439087,
      "learning_rate": 2.7720328562408355e-08,
      "loss": 2.9998,
      "step": 229406
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.624326229095459,
      "learning_rate": 2.76647498244964e-08,
      "loss": 2.7428,
      "step": 229407
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.471212148666382,
      "learning_rate": 2.7609226857072675e-08,
      "loss": 3.1346,
      "step": 229408
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.91964054107666,
      "learning_rate": 2.755375966020379e-08,
      "loss": 3.1271,
      "step": 229409
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8343727588653564,
      "learning_rate": 2.7498348233889743e-08,
      "loss": 2.9196,
      "step": 229410
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6757171154022217,
      "learning_rate": 2.7442992578097234e-08,
      "loss": 2.9681,
      "step": 229411
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.5489726066589355,
      "learning_rate": 2.7387692692859565e-08,
      "loss": 3.1059,
      "step": 229412
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.76252818107605,
      "learning_rate": 2.733244857821004e-08,
      "loss": 2.6026,
      "step": 229413
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.857326030731201,
      "learning_rate": 2.727726023414867e-08,
      "loss": 2.9839,
      "step": 229414
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7413206100463867,
      "learning_rate": 2.7222127660642134e-08,
      "loss": 2.954,
      "step": 229415
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.875605344772339,
      "learning_rate": 2.7167050857790362e-08,
      "loss": 2.7783,
      "step": 229416
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2063710689544678,
      "learning_rate": 2.711202982549343e-08,
      "loss": 2.7728,
      "step": 229417
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.130450963973999,
      "learning_rate": 2.705706456385126e-08,
      "loss": 2.9201,
      "step": 229418
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2348709106445312,
      "learning_rate": 2.700215507286385e-08,
      "loss": 2.7419,
      "step": 229419
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9616501331329346,
      "learning_rate": 2.694730135246459e-08,
      "loss": 2.9558,
      "step": 229420
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.968064308166504,
      "learning_rate": 2.6892503402753395e-08,
      "loss": 2.861,
      "step": 229421
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3990206718444824,
      "learning_rate": 2.6837761223696963e-08,
      "loss": 3.0132,
      "step": 229422
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.318817615509033,
      "learning_rate": 2.67830748153286e-08,
      "loss": 2.9961,
      "step": 229423
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.832646131515503,
      "learning_rate": 2.6728444177614993e-08,
      "loss": 3.0658,
      "step": 229424
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.829345703125,
      "learning_rate": 2.6673869310622763e-08,
      "loss": 3.0072,
      "step": 229425
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6533279418945312,
      "learning_rate": 2.6619350214318603e-08,
      "loss": 2.5164,
      "step": 229426
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.221304416656494,
      "learning_rate": 2.6564886888735814e-08,
      "loss": 2.8909,
      "step": 229427
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.873206853866577,
      "learning_rate": 2.6510479333874402e-08,
      "loss": 3.2136,
      "step": 229428
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.72748064994812,
      "learning_rate": 2.645612754976767e-08,
      "loss": 2.9179,
      "step": 229429
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.703422784805298,
      "learning_rate": 2.6401831536382313e-08,
      "loss": 2.7976,
      "step": 229430
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.5527122020721436,
      "learning_rate": 2.6347591293751636e-08,
      "loss": 2.989,
      "step": 229431
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9811885356903076,
      "learning_rate": 2.629340682187564e-08,
      "loss": 2.9545,
      "step": 229432
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2046287059783936,
      "learning_rate": 2.6239278120787633e-08,
      "loss": 2.6722,
      "step": 229433
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.523210287094116,
      "learning_rate": 2.6185205190487613e-08,
      "loss": 3.0484,
      "step": 229434
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6571011543273926,
      "learning_rate": 2.613118803097558e-08,
      "loss": 2.9509,
      "step": 229435
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.7855217456817627,
      "learning_rate": 2.6077226642251537e-08,
      "loss": 2.9057,
      "step": 229436
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9079947471618652,
      "learning_rate": 2.6023321024382092e-08,
      "loss": 2.8661,
      "step": 229437
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.975466728210449,
      "learning_rate": 2.596947117730064e-08,
      "loss": 3.1613,
      "step": 229438
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.994372606277466,
      "learning_rate": 2.5915677101073783e-08,
      "loss": 3.0788,
      "step": 229439
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0070438385009766,
      "learning_rate": 2.586193879570153e-08,
      "loss": 3.028,
      "step": 229440
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3999359607696533,
      "learning_rate": 2.5808256261150572e-08,
      "loss": 2.9731,
      "step": 229441
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.711812734603882,
      "learning_rate": 2.5754629497487522e-08,
      "loss": 3.005,
      "step": 229442
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0421204566955566,
      "learning_rate": 2.5701058504679073e-08,
      "loss": 2.7818,
      "step": 229443
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.987870454788208,
      "learning_rate": 2.5647543282791838e-08,
      "loss": 2.6467,
      "step": 229444
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1664247512817383,
      "learning_rate": 2.5594083831759204e-08,
      "loss": 2.7153,
      "step": 229445
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.4186763763427734,
      "learning_rate": 2.5540680151647787e-08,
      "loss": 2.7678,
      "step": 229446
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.5156679153442383,
      "learning_rate": 2.548733224245758e-08,
      "loss": 3.0196,
      "step": 229447
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8367323875427246,
      "learning_rate": 2.543404010418859e-08,
      "loss": 2.6894,
      "step": 229448
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.890867233276367,
      "learning_rate": 2.5380803736840817e-08,
      "loss": 3.0174,
      "step": 229449
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.821841239929199,
      "learning_rate": 2.5327623140480867e-08,
      "loss": 3.0514,
      "step": 229450
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.949213981628418,
      "learning_rate": 2.5274498315042134e-08,
      "loss": 2.8564,
      "step": 229451
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.5025546550750732,
      "learning_rate": 2.5221429260557925e-08,
      "loss": 2.7013,
      "step": 229452
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7067203521728516,
      "learning_rate": 2.516841597706154e-08,
      "loss": 3.0218,
      "step": 229453
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8037898540496826,
      "learning_rate": 2.5115458464519678e-08,
      "loss": 2.811,
      "step": 229454
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.477447032928467,
      "learning_rate": 2.506255672299895e-08,
      "loss": 2.6763,
      "step": 229455
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8960046768188477,
      "learning_rate": 2.5009710752499356e-08,
      "loss": 3.0547,
      "step": 229456
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8936829566955566,
      "learning_rate": 2.495692055298759e-08,
      "loss": 2.6813,
      "step": 229457
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.968008279800415,
      "learning_rate": 2.490418612449696e-08,
      "loss": 3.0863,
      "step": 229458
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1225194931030273,
      "learning_rate": 2.485150746706077e-08,
      "loss": 2.8242,
      "step": 229459
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.339345693588257,
      "learning_rate": 2.4798884580645716e-08,
      "loss": 2.6075,
      "step": 229460
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7815182209014893,
      "learning_rate": 2.4746317465285105e-08,
      "loss": 3.0881,
      "step": 229461
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7675442695617676,
      "learning_rate": 2.469380612097893e-08,
      "loss": 2.9282,
      "step": 229462
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.014117956161499,
      "learning_rate": 2.4641350547760508e-08,
      "loss": 3.0911,
      "step": 229463
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1745574474334717,
      "learning_rate": 2.4588950745629832e-08,
      "loss": 2.9977,
      "step": 229464
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9491677284240723,
      "learning_rate": 2.4536606714586905e-08,
      "loss": 2.8962,
      "step": 229465
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1641464233398438,
      "learning_rate": 2.4484318454631724e-08,
      "loss": 2.8851,
      "step": 229466
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.806492567062378,
      "learning_rate": 2.4432085965797598e-08,
      "loss": 2.8999,
      "step": 229467
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.199003219604492,
      "learning_rate": 2.4379909248084527e-08,
      "loss": 3.2369,
      "step": 229468
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1886701583862305,
      "learning_rate": 2.4327788301492513e-08,
      "loss": 2.9432,
      "step": 229469
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.213812828063965,
      "learning_rate": 2.4275723126054857e-08,
      "loss": 2.7701,
      "step": 229470
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.188611030578613,
      "learning_rate": 2.422371372173826e-08,
      "loss": 2.6614,
      "step": 229471
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9933362007141113,
      "learning_rate": 2.4171760088609327e-08,
      "loss": 2.9734,
      "step": 229472
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3510539531707764,
      "learning_rate": 2.4119862226634755e-08,
      "loss": 3.0564,
      "step": 229473
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.813371419906616,
      "learning_rate": 2.4068020135847853e-08,
      "loss": 2.7886,
      "step": 229474
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.9131124019622803,
      "learning_rate": 2.401623381624862e-08,
      "loss": 2.7177,
      "step": 229475
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2659947872161865,
      "learning_rate": 2.3964503267837053e-08,
      "loss": 3.0729,
      "step": 229476
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9365193843841553,
      "learning_rate": 2.3912828490646464e-08,
      "loss": 2.8809,
      "step": 229477
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0001413822174072,
      "learning_rate": 2.3861209484676846e-08,
      "loss": 3.0449,
      "step": 229478
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.9269213676452637,
      "learning_rate": 2.3809646249928204e-08,
      "loss": 3.2171,
      "step": 229479
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8269283771514893,
      "learning_rate": 2.3758138786400537e-08,
      "loss": 2.7954,
      "step": 229480
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.6659929752349854,
      "learning_rate": 2.3706687094127153e-08,
      "loss": 2.9277,
      "step": 229481
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.632755756378174,
      "learning_rate": 2.365529117310805e-08,
      "loss": 2.7284,
      "step": 229482
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.008171319961548,
      "learning_rate": 2.3603951023376533e-08,
      "loss": 3.0223,
      "step": 229483
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.5080273151397705,
      "learning_rate": 2.35526666448993e-08,
      "loss": 2.8094,
      "step": 229484
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.628185749053955,
      "learning_rate": 2.3501438037709653e-08,
      "loss": 3.0057,
      "step": 229485
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.5868418216705322,
      "learning_rate": 2.3450265201807595e-08,
      "loss": 3.0013,
      "step": 229486
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.62589430809021,
      "learning_rate": 2.3399148137193123e-08,
      "loss": 2.7993,
      "step": 229487
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.956474781036377,
      "learning_rate": 2.3348086843932856e-08,
      "loss": 2.832,
      "step": 229488
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.18109393119812,
      "learning_rate": 2.3297081321960177e-08,
      "loss": 2.9147,
      "step": 229489
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9720356464385986,
      "learning_rate": 2.3246131571341696e-08,
      "loss": 3.1144,
      "step": 229490
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.574820041656494,
      "learning_rate": 2.3195237592044114e-08,
      "loss": 2.9014,
      "step": 229491
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.994910478591919,
      "learning_rate": 2.3144399384100732e-08,
      "loss": 2.9781,
      "step": 229492
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.068622350692749,
      "learning_rate": 2.3093616947544856e-08,
      "loss": 3.0328,
      "step": 229493
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.768455743789673,
      "learning_rate": 2.304289028230988e-08,
      "loss": 2.826,
      "step": 229494
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2558271884918213,
      "learning_rate": 2.2992219388495713e-08,
      "loss": 2.8401,
      "step": 229495
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9407174587249756,
      "learning_rate": 2.2941604266069057e-08,
      "loss": 2.8408,
      "step": 229496
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.32187819480896,
      "learning_rate": 2.2891044914996605e-08,
      "loss": 3.023,
      "step": 229497
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1033225059509277,
      "learning_rate": 2.284054133537827e-08,
      "loss": 2.9529,
      "step": 229498
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.617128849029541,
      "learning_rate": 2.2790093527147445e-08,
      "loss": 2.9158,
      "step": 229499
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.977952480316162,
      "learning_rate": 2.2739701490370744e-08,
      "loss": 2.8696,
      "step": 229500
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.057466745376587,
      "learning_rate": 2.2689365225014854e-08,
      "loss": 2.9935,
      "step": 229501
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.270672559738159,
      "learning_rate": 2.263908473107978e-08,
      "loss": 2.9606,
      "step": 229502
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7446823120117188,
      "learning_rate": 2.2588860008632136e-08,
      "loss": 3.0277,
      "step": 229503
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.874042510986328,
      "learning_rate": 2.2538691057638615e-08,
      "loss": 3.1343,
      "step": 229504
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7673609256744385,
      "learning_rate": 2.2488577878099212e-08,
      "loss": 2.8226,
      "step": 229505
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3175392150878906,
      "learning_rate": 2.2438520470080544e-08,
      "loss": 2.9298,
      "step": 229506
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8999338150024414,
      "learning_rate": 2.2388518833516e-08,
      "loss": 2.8132,
      "step": 229507
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.5140793323516846,
      "learning_rate": 2.233857296847219e-08,
      "loss": 3.0223,
      "step": 229508
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9345974922180176,
      "learning_rate": 2.228868287491581e-08,
      "loss": 3.1593,
      "step": 229509
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0851004123687744,
      "learning_rate": 2.2238848552913468e-08,
      "loss": 3.0053,
      "step": 229510
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8792824745178223,
      "learning_rate": 2.2189070002398556e-08,
      "loss": 2.8759,
      "step": 229511
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.101422071456909,
      "learning_rate": 2.2139347223437688e-08,
      "loss": 2.9926,
      "step": 229512
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.857057809829712,
      "learning_rate": 2.2089680216030858e-08,
      "loss": 2.9743,
      "step": 229513
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.461519241333008,
      "learning_rate": 2.2040068980178072e-08,
      "loss": 2.8163,
      "step": 229514
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6785197257995605,
      "learning_rate": 2.1990513515912633e-08,
      "loss": 3.0645,
      "step": 229515
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7240874767303467,
      "learning_rate": 2.1941013823201237e-08,
      "loss": 2.873,
      "step": 229516
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.155014753341675,
      "learning_rate": 2.1891569902043883e-08,
      "loss": 2.8547,
      "step": 229517
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9620871543884277,
      "learning_rate": 2.1842181752507183e-08,
      "loss": 2.7551,
      "step": 229518
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.818453311920166,
      "learning_rate": 2.1792849374591136e-08,
      "loss": 2.6548,
      "step": 229519
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8918371200561523,
      "learning_rate": 2.1743572768262442e-08,
      "loss": 2.8273,
      "step": 229520
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.030475378036499,
      "learning_rate": 2.16943519335544e-08,
      "loss": 2.8733,
      "step": 229521
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.7096590995788574,
      "learning_rate": 2.164518687050032e-08,
      "loss": 2.7646,
      "step": 229522
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.28666090965271,
      "learning_rate": 2.1596077579066896e-08,
      "loss": 2.9295,
      "step": 229523
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1012821197509766,
      "learning_rate": 2.1547024059254126e-08,
      "loss": 3.0351,
      "step": 229524
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.385021209716797,
      "learning_rate": 2.1498026311128625e-08,
      "loss": 2.932,
      "step": 229525
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7644426822662354,
      "learning_rate": 2.1449084334657085e-08,
      "loss": 2.9042,
      "step": 229526
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9317214488983154,
      "learning_rate": 2.1400198129872816e-08,
      "loss": 2.8302,
      "step": 229527
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1827149391174316,
      "learning_rate": 2.1351367696775812e-08,
      "loss": 2.8997,
      "step": 229528
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.622283458709717,
      "learning_rate": 2.130259303536608e-08,
      "loss": 2.6566,
      "step": 229529
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6294217109680176,
      "learning_rate": 2.1253874145643612e-08,
      "loss": 2.9524,
      "step": 229530
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2123169898986816,
      "learning_rate": 2.120521102767503e-08,
      "loss": 2.9707,
      "step": 229531
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1143476963043213,
      "learning_rate": 2.1156603681393713e-08,
      "loss": 2.9087,
      "step": 229532
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.245415449142456,
      "learning_rate": 2.1108052106832973e-08,
      "loss": 2.7949,
      "step": 229533
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7799932956695557,
      "learning_rate": 2.1059556304026114e-08,
      "loss": 2.5956,
      "step": 229534
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.887233257293701,
      "learning_rate": 2.101111627297314e-08,
      "loss": 2.7177,
      "step": 229535
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2424161434173584,
      "learning_rate": 2.0962732013674044e-08,
      "loss": 2.9571,
      "step": 229536
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.211148262023926,
      "learning_rate": 2.091440352612883e-08,
      "loss": 2.6232,
      "step": 229537
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.298534393310547,
      "learning_rate": 2.086613081037081e-08,
      "loss": 2.9622,
      "step": 229538
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.499363899230957,
      "learning_rate": 2.0817913866366663e-08,
      "loss": 3.033,
      "step": 229539
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.371417760848999,
      "learning_rate": 2.0769752694183017e-08,
      "loss": 2.8531,
      "step": 229540
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1031579971313477,
      "learning_rate": 2.0721647293786557e-08,
      "loss": 3.1515,
      "step": 229541
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7090508937835693,
      "learning_rate": 2.0673597665210596e-08,
      "loss": 2.9915,
      "step": 229542
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3219122886657715,
      "learning_rate": 2.0625603808455128e-08,
      "loss": 2.9194,
      "step": 229543
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.5433382987976074,
      "learning_rate": 2.0577665723520154e-08,
      "loss": 2.9592,
      "step": 229544
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.862034559249878,
      "learning_rate": 2.0529783410438983e-08,
      "loss": 2.9523,
      "step": 229545
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.926929235458374,
      "learning_rate": 2.0481956869211612e-08,
      "loss": 2.8456,
      "step": 229546
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.5223586559295654,
      "learning_rate": 2.043418609980474e-08,
      "loss": 2.9148,
      "step": 229547
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.54585599899292,
      "learning_rate": 2.0386471102284975e-08,
      "loss": 2.9899,
      "step": 229548
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.87229585647583,
      "learning_rate": 2.033881187661901e-08,
      "loss": 3.0458,
      "step": 229549
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0807859897613525,
      "learning_rate": 2.0291208422840154e-08,
      "loss": 2.9562,
      "step": 229550
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3015122413635254,
      "learning_rate": 2.0243660740981716e-08,
      "loss": 3.1844,
      "step": 229551
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.188748359680176,
      "learning_rate": 2.019616883097708e-08,
      "loss": 2.7964,
      "step": 229552
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.219863176345825,
      "learning_rate": 2.0148732692926163e-08,
      "loss": 2.9026,
      "step": 229553
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2254273891448975,
      "learning_rate": 2.0101352326762354e-08,
      "loss": 2.89,
      "step": 229554
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.682656764984131,
      "learning_rate": 2.0054027732518964e-08,
      "loss": 2.9971,
      "step": 229555
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7328457832336426,
      "learning_rate": 2.0006758910229292e-08,
      "loss": 2.678,
      "step": 229556
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.17887020111084,
      "learning_rate": 1.995954585986004e-08,
      "loss": 2.8198,
      "step": 229557
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.50473952293396,
      "learning_rate": 1.9912388581477813e-08,
      "loss": 2.9718,
      "step": 229558
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.331686496734619,
      "learning_rate": 1.9865287075016002e-08,
      "loss": 3.0243,
      "step": 229559
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8898608684539795,
      "learning_rate": 1.981824134054122e-08,
      "loss": 2.8756,
      "step": 229560
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.973417282104492,
      "learning_rate": 1.9771251378053465e-08,
      "loss": 2.9767,
      "step": 229561
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2062032222747803,
      "learning_rate": 1.9724317187552742e-08,
      "loss": 3.11,
      "step": 229562
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.234513282775879,
      "learning_rate": 1.9677438769039046e-08,
      "loss": 2.7081,
      "step": 229563
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.796200752258301,
      "learning_rate": 1.9630616122545685e-08,
      "loss": 2.6336,
      "step": 229564
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.882354259490967,
      "learning_rate": 1.958384924803935e-08,
      "loss": 2.9351,
      "step": 229565
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7997019290924072,
      "learning_rate": 1.9537138145586662e-08,
      "loss": 3.0334,
      "step": 229566
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.01673698425293,
      "learning_rate": 1.9490482815121e-08,
      "loss": 2.9967,
      "step": 229567
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8895061016082764,
      "learning_rate": 1.9443883256742288e-08,
      "loss": 2.8801,
      "step": 229568
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.238532781600952,
      "learning_rate": 1.939733947038391e-08,
      "loss": 2.9147,
      "step": 229569
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.358828067779541,
      "learning_rate": 1.9350851456079174e-08,
      "loss": 2.8362,
      "step": 229570
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7695536613464355,
      "learning_rate": 1.930441921386139e-08,
      "loss": 2.9925,
      "step": 229571
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.5147082805633545,
      "learning_rate": 1.9258042743697243e-08,
      "loss": 2.9721,
      "step": 229572
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.773266315460205,
      "learning_rate": 1.9211722045620047e-08,
      "loss": 2.873,
      "step": 229573
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.905933380126953,
      "learning_rate": 1.91654571196298e-08,
      "loss": 2.6767,
      "step": 229574
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.556401014328003,
      "learning_rate": 1.911924796575981e-08,
      "loss": 2.7418,
      "step": 229575
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.0547590255737305,
      "learning_rate": 1.9073094583976766e-08,
      "loss": 2.8544,
      "step": 229576
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.950160503387451,
      "learning_rate": 1.902699697431398e-08,
      "loss": 2.9968,
      "step": 229577
    },
    {
      "epoch": 2.99,
      "grad_norm": 7.870997428894043,
      "learning_rate": 1.8980955136771448e-08,
      "loss": 2.9412,
      "step": 229578
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9161388874053955,
      "learning_rate": 1.8934969071382478e-08,
      "loss": 2.7038,
      "step": 229579
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7824673652648926,
      "learning_rate": 1.8889038778113764e-08,
      "loss": 2.8456,
      "step": 229580
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.825050115585327,
      "learning_rate": 1.8843164257031918e-08,
      "loss": 2.8509,
      "step": 229581
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.757909059524536,
      "learning_rate": 1.8797345508070327e-08,
      "loss": 2.8641,
      "step": 229582
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.886645793914795,
      "learning_rate": 1.8751582531295607e-08,
      "loss": 2.8694,
      "step": 229583
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.843165874481201,
      "learning_rate": 1.8705875326707753e-08,
      "loss": 2.7883,
      "step": 229584
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.5329337120056152,
      "learning_rate": 1.866022389427346e-08,
      "loss": 2.8853,
      "step": 229585
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.034947156906128,
      "learning_rate": 1.861462823405935e-08,
      "loss": 3.2683,
      "step": 229586
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.261966705322266,
      "learning_rate": 1.85690883460321e-08,
      "loss": 2.8828,
      "step": 229587
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.622750759124756,
      "learning_rate": 1.852360423022503e-08,
      "loss": 2.5492,
      "step": 229588
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1241462230682373,
      "learning_rate": 1.8478175886638135e-08,
      "loss": 2.6604,
      "step": 229589
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6016488075256348,
      "learning_rate": 1.8432803315271417e-08,
      "loss": 2.9416,
      "step": 229590
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.762193441390991,
      "learning_rate": 1.8387486516124873e-08,
      "loss": 3.0839,
      "step": 229591
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.470306873321533,
      "learning_rate": 1.8342225489265115e-08,
      "loss": 2.9788,
      "step": 229592
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0952870845794678,
      "learning_rate": 1.8297020234625537e-08,
      "loss": 3.0649,
      "step": 229593
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.742093563079834,
      "learning_rate": 1.8251870752272746e-08,
      "loss": 3.0833,
      "step": 229594
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.16713809967041,
      "learning_rate": 1.8206777042173437e-08,
      "loss": 2.9542,
      "step": 229595
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8120675086975098,
      "learning_rate": 1.816173910432761e-08,
      "loss": 2.7483,
      "step": 229596
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7448716163635254,
      "learning_rate": 1.811675693880188e-08,
      "loss": 3.1691,
      "step": 229597
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.283104419708252,
      "learning_rate": 1.8071830545562936e-08,
      "loss": 2.716,
      "step": 229598
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.6055500507354736,
      "learning_rate": 1.8026959924610786e-08,
      "loss": 2.8899,
      "step": 229599
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.525386095046997,
      "learning_rate": 1.7982145076012034e-08,
      "loss": 2.9977,
      "step": 229600
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2580671310424805,
      "learning_rate": 1.7937385999700072e-08,
      "loss": 3.0201,
      "step": 229601
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.680696487426758,
      "learning_rate": 1.7892682695708206e-08,
      "loss": 2.9686,
      "step": 229602
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.62681245803833,
      "learning_rate": 1.784803516406974e-08,
      "loss": 2.9654,
      "step": 229603
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.519017457962036,
      "learning_rate": 1.7803443404784677e-08,
      "loss": 2.6511,
      "step": 229604
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6681761741638184,
      "learning_rate": 1.775890741781971e-08,
      "loss": 3.0899,
      "step": 229605
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.375213146209717,
      "learning_rate": 1.7714427203241456e-08,
      "loss": 2.9248,
      "step": 229606
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.823961019515991,
      "learning_rate": 1.76700027610166e-08,
      "loss": 2.9742,
      "step": 229607
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.664093494415283,
      "learning_rate": 1.7625634091178452e-08,
      "loss": 2.8967,
      "step": 229608
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.523524284362793,
      "learning_rate": 1.7581321193760322e-08,
      "loss": 2.8408,
      "step": 229609
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.4635109901428223,
      "learning_rate": 1.7537064068695594e-08,
      "loss": 2.9008,
      "step": 229610
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.8058836460113525,
      "learning_rate": 1.749286271605088e-08,
      "loss": 2.9673,
      "step": 229611
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3739874362945557,
      "learning_rate": 1.7448717135792876e-08,
      "loss": 3.0193,
      "step": 229612
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8948049545288086,
      "learning_rate": 1.740462732795489e-08,
      "loss": 2.8976,
      "step": 229613
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.4645745754241943,
      "learning_rate": 1.7360593292570224e-08,
      "loss": 3.2303,
      "step": 229614
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7082326412200928,
      "learning_rate": 1.7316615029605574e-08,
      "loss": 3.0035,
      "step": 229615
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.025979995727539,
      "learning_rate": 1.7272692539094245e-08,
      "loss": 3.0671,
      "step": 229616
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9492666721343994,
      "learning_rate": 1.722882582103624e-08,
      "loss": 2.8543,
      "step": 229617
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.780992031097412,
      "learning_rate": 1.718501487543156e-08,
      "loss": 3.0303,
      "step": 229618
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1139752864837646,
      "learning_rate": 1.7141259702280196e-08,
      "loss": 2.9984,
      "step": 229619
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1540756225585938,
      "learning_rate": 1.7097560301615466e-08,
      "loss": 2.6265,
      "step": 229620
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0893537998199463,
      "learning_rate": 1.705391667343736e-08,
      "loss": 3.0881,
      "step": 229621
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.666984796524048,
      "learning_rate": 1.701032881774589e-08,
      "loss": 2.7354,
      "step": 229622
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.280231237411499,
      "learning_rate": 1.6966796734574352e-08,
      "loss": 3.042,
      "step": 229623
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2891557216644287,
      "learning_rate": 1.692332042388944e-08,
      "loss": 2.9024,
      "step": 229624
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.224012851715088,
      "learning_rate": 1.687989988572447e-08,
      "loss": 3.003,
      "step": 229625
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8000595569610596,
      "learning_rate": 1.6836535120112737e-08,
      "loss": 3.0202,
      "step": 229626
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0326240062713623,
      "learning_rate": 1.6793226126987636e-08,
      "loss": 2.8695,
      "step": 229627
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7328550815582275,
      "learning_rate": 1.6749972906415776e-08,
      "loss": 3.0357,
      "step": 229628
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.98512864112854,
      "learning_rate": 1.6706775458430467e-08,
      "loss": 3.0779,
      "step": 229629
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.9336740970611572,
      "learning_rate": 1.666363378296509e-08,
      "loss": 2.7709,
      "step": 229630
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9047954082489014,
      "learning_rate": 1.662054788005296e-08,
      "loss": 2.9683,
      "step": 229631
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.250828742980957,
      "learning_rate": 1.6577517749760682e-08,
      "loss": 3.1528,
      "step": 229632
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.980639696121216,
      "learning_rate": 1.653454339202165e-08,
      "loss": 2.9562,
      "step": 229633
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9278626441955566,
      "learning_rate": 1.6491624806869165e-08,
      "loss": 3.0436,
      "step": 229634
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0994441509246826,
      "learning_rate": 1.6448761994303228e-08,
      "loss": 2.8252,
      "step": 229635
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.357354164123535,
      "learning_rate": 1.6405954954357147e-08,
      "loss": 2.9398,
      "step": 229636
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.4957735538482666,
      "learning_rate": 1.6363203687030922e-08,
      "loss": 2.6737,
      "step": 229637
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7672030925750732,
      "learning_rate": 1.6320508192324554e-08,
      "loss": 2.8847,
      "step": 229638
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.5083794593811035,
      "learning_rate": 1.6277868470238043e-08,
      "loss": 2.9809,
      "step": 229639
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.069970607757568,
      "learning_rate": 1.6235284520771384e-08,
      "loss": 2.7256,
      "step": 229640
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6858160495758057,
      "learning_rate": 1.619275634395789e-08,
      "loss": 2.6922,
      "step": 229641
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1037583351135254,
      "learning_rate": 1.6150283939830866e-08,
      "loss": 2.9201,
      "step": 229642
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8724215030670166,
      "learning_rate": 1.6107867308323697e-08,
      "loss": 3.1026,
      "step": 229643
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.24308443069458,
      "learning_rate": 1.6065506449502996e-08,
      "loss": 2.9626,
      "step": 229644
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.977066993713379,
      "learning_rate": 1.6023201363368765e-08,
      "loss": 2.9592,
      "step": 229645
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.223194122314453,
      "learning_rate": 1.5980952049887697e-08,
      "loss": 2.7307,
      "step": 229646
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.209573984146118,
      "learning_rate": 1.5938758509126405e-08,
      "loss": 2.7923,
      "step": 229647
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.96163272857666,
      "learning_rate": 1.589662074105158e-08,
      "loss": 2.9505,
      "step": 229648
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.658693313598633,
      "learning_rate": 1.5854538745696533e-08,
      "loss": 2.628,
      "step": 229649
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.5837271213531494,
      "learning_rate": 1.5812512523027953e-08,
      "loss": 3.1767,
      "step": 229650
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0822324752807617,
      "learning_rate": 1.577054207311246e-08,
      "loss": 2.9872,
      "step": 229651
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.683788776397705,
      "learning_rate": 1.5728627395916737e-08,
      "loss": 2.8204,
      "step": 229652
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7157785892486572,
      "learning_rate": 1.5686768491440792e-08,
      "loss": 2.892,
      "step": 229653
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.527594566345215,
      "learning_rate": 1.564496535975124e-08,
      "loss": 3.1767,
      "step": 229654
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0135934352874756,
      "learning_rate": 1.5603218000781458e-08,
      "loss": 2.7677,
      "step": 229655
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.821746349334717,
      "learning_rate": 1.556152641459807e-08,
      "loss": 2.5227,
      "step": 229656
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0727572441101074,
      "learning_rate": 1.551989060116776e-08,
      "loss": 2.9709,
      "step": 229657
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.72031569480896,
      "learning_rate": 1.5478310560523842e-08,
      "loss": 2.9454,
      "step": 229658
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3934688568115234,
      "learning_rate": 1.5436786292666314e-08,
      "loss": 3.0197,
      "step": 229659
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.942591905593872,
      "learning_rate": 1.5395317797595176e-08,
      "loss": 2.8845,
      "step": 229660
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.829507350921631,
      "learning_rate": 1.5353905075310423e-08,
      "loss": 2.9065,
      "step": 229661
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2237093448638916,
      "learning_rate": 1.5312548125878676e-08,
      "loss": 2.9302,
      "step": 229662
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9048855304718018,
      "learning_rate": 1.527124694923332e-08,
      "loss": 2.8409,
      "step": 229663
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0835041999816895,
      "learning_rate": 1.5230001545407656e-08,
      "loss": 2.793,
      "step": 229664
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0751218795776367,
      "learning_rate": 1.518881191440169e-08,
      "loss": 3.0184,
      "step": 229665
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2058403491973877,
      "learning_rate": 1.5147678056248723e-08,
      "loss": 2.8182,
      "step": 229666
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.5594851970672607,
      "learning_rate": 1.5106599970948764e-08,
      "loss": 2.782,
      "step": 229667
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.741619348526001,
      "learning_rate": 1.5065577658501805e-08,
      "loss": 2.8086,
      "step": 229668
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9087774753570557,
      "learning_rate": 1.5024611118941153e-08,
      "loss": 3.0378,
      "step": 229669
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8803908824920654,
      "learning_rate": 1.49837003522002e-08,
      "loss": 2.7799,
      "step": 229670
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.4420876502990723,
      "learning_rate": 1.4942845358378863e-08,
      "loss": 2.9666,
      "step": 229671
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.087369441986084,
      "learning_rate": 1.4902046137410527e-08,
      "loss": 2.9028,
      "step": 229672
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.325472593307495,
      "learning_rate": 1.486130268936181e-08,
      "loss": 2.7063,
      "step": 229673
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7343082427978516,
      "learning_rate": 1.48206150141994e-08,
      "loss": 2.8806,
      "step": 229674
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6244606971740723,
      "learning_rate": 1.47799831119233e-08,
      "loss": 2.9916,
      "step": 229675
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.181353807449341,
      "learning_rate": 1.4739406982600121e-08,
      "loss": 3.095,
      "step": 229676
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2719547748565674,
      "learning_rate": 1.4698886626163253e-08,
      "loss": 2.958,
      "step": 229677
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.119901657104492,
      "learning_rate": 1.4658422042679307e-08,
      "loss": 2.7723,
      "step": 229678
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3940036296844482,
      "learning_rate": 1.4618013232148285e-08,
      "loss": 2.8413,
      "step": 229679
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.574690103530884,
      "learning_rate": 1.4577660194536878e-08,
      "loss": 2.779,
      "step": 229680
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.744168519973755,
      "learning_rate": 1.4537362929878394e-08,
      "loss": 3.1066,
      "step": 229681
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.584651470184326,
      "learning_rate": 1.4497121438206139e-08,
      "loss": 2.792,
      "step": 229682
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.861584186553955,
      "learning_rate": 1.4456935719453499e-08,
      "loss": 2.8186,
      "step": 229683
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.516448497772217,
      "learning_rate": 1.4416805773720396e-08,
      "loss": 2.8145,
      "step": 229684
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.781954288482666,
      "learning_rate": 1.4376731600940217e-08,
      "loss": 2.9455,
      "step": 229685
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.4658889770507812,
      "learning_rate": 1.4336713201179572e-08,
      "loss": 2.95,
      "step": 229686
    },
    {
      "epoch": 2.99,
      "grad_norm": 5.399930953979492,
      "learning_rate": 1.4296750574405158e-08,
      "loss": 2.9875,
      "step": 229687
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.5315253734588623,
      "learning_rate": 1.4256843720616974e-08,
      "loss": 3.0792,
      "step": 229688
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3839216232299805,
      "learning_rate": 1.4216992639848324e-08,
      "loss": 2.801,
      "step": 229689
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.5044968128204346,
      "learning_rate": 1.4177197332132518e-08,
      "loss": 2.8343,
      "step": 229690
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.022581100463867,
      "learning_rate": 1.413745779740294e-08,
      "loss": 2.9152,
      "step": 229691
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.087867259979248,
      "learning_rate": 1.4097774035726206e-08,
      "loss": 3.2165,
      "step": 229692
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8450822830200195,
      "learning_rate": 1.4058146047102314e-08,
      "loss": 2.9928,
      "step": 229693
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.900019407272339,
      "learning_rate": 1.4018573831497959e-08,
      "loss": 3.09,
      "step": 229694
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6340134143829346,
      "learning_rate": 1.3979057388979753e-08,
      "loss": 2.9645,
      "step": 229695
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1312313079833984,
      "learning_rate": 1.3939596719481083e-08,
      "loss": 3.0081,
      "step": 229696
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.657982587814331,
      "learning_rate": 1.390019182310187e-08,
      "loss": 2.7303,
      "step": 229697
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.581923723220825,
      "learning_rate": 1.3860842699775499e-08,
      "loss": 2.854,
      "step": 229698
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.81752872467041,
      "learning_rate": 1.3821549349535277e-08,
      "loss": 2.6916,
      "step": 229699
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.514404058456421,
      "learning_rate": 1.3782311772414512e-08,
      "loss": 2.6483,
      "step": 229700
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6231489181518555,
      "learning_rate": 1.3743129968379896e-08,
      "loss": 2.8137,
      "step": 229701
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.222926139831543,
      "learning_rate": 1.370400393743143e-08,
      "loss": 2.9388,
      "step": 229702
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.703646659851074,
      "learning_rate": 1.3664933679635726e-08,
      "loss": 2.9335,
      "step": 229703
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.201342582702637,
      "learning_rate": 1.3625919194926172e-08,
      "loss": 2.939,
      "step": 229704
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8966376781463623,
      "learning_rate": 1.358696048336938e-08,
      "loss": 2.9312,
      "step": 229705
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.880354404449463,
      "learning_rate": 1.3548057544932046e-08,
      "loss": 3.1299,
      "step": 229706
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.6594817638397217,
      "learning_rate": 1.3509210379647472e-08,
      "loss": 2.9473,
      "step": 229707
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1199939250946045,
      "learning_rate": 1.3470418987515663e-08,
      "loss": 2.9124,
      "step": 229708
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.201573610305786,
      "learning_rate": 1.3431683368536616e-08,
      "loss": 2.9379,
      "step": 229709
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.987078905105591,
      "learning_rate": 1.339300352274364e-08,
      "loss": 2.7725,
      "step": 229710
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.214585304260254,
      "learning_rate": 1.3354379450103425e-08,
      "loss": 2.9715,
      "step": 229711
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7767930030822754,
      "learning_rate": 1.331581115064928e-08,
      "loss": 3.0383,
      "step": 229712
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.86252760887146,
      "learning_rate": 1.3277298624381204e-08,
      "loss": 2.9467,
      "step": 229713
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.246748208999634,
      "learning_rate": 1.3238841871332507e-08,
      "loss": 3.1591,
      "step": 229714
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0272328853607178,
      "learning_rate": 1.320044089143657e-08,
      "loss": 2.7621,
      "step": 229715
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.895084857940674,
      "learning_rate": 1.3162095684793316e-08,
      "loss": 2.901,
      "step": 229716
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2240514755249023,
      "learning_rate": 1.3123806251336132e-08,
      "loss": 2.9101,
      "step": 229717
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.126955032348633,
      "learning_rate": 1.3085572591098326e-08,
      "loss": 2.8668,
      "step": 229718
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.823854923248291,
      "learning_rate": 1.3047394704113201e-08,
      "loss": 2.9355,
      "step": 229719
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8247673511505127,
      "learning_rate": 1.3009272590347453e-08,
      "loss": 2.8478,
      "step": 229720
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.612679958343506,
      "learning_rate": 1.2971206249867694e-08,
      "loss": 3.1406,
      "step": 229721
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.360443353652954,
      "learning_rate": 1.2933195682574005e-08,
      "loss": 2.9671,
      "step": 229722
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.096168041229248,
      "learning_rate": 1.2895240888599611e-08,
      "loss": 2.9601,
      "step": 229723
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8725037574768066,
      "learning_rate": 1.2857341867844595e-08,
      "loss": 3.0854,
      "step": 229724
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.374554395675659,
      "learning_rate": 1.2819498620375568e-08,
      "loss": 3.0786,
      "step": 229725
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.161841869354248,
      "learning_rate": 1.278171114619253e-08,
      "loss": 3.0441,
      "step": 229726
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.726186752319336,
      "learning_rate": 1.2743979445295482e-08,
      "loss": 3.0762,
      "step": 229727
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1613950729370117,
      "learning_rate": 1.2706303517684424e-08,
      "loss": 2.6621,
      "step": 229728
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6708340644836426,
      "learning_rate": 1.2668683363392663e-08,
      "loss": 3.1138,
      "step": 229729
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.700871229171753,
      "learning_rate": 1.263111898238689e-08,
      "loss": 2.8295,
      "step": 229730
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.107783317565918,
      "learning_rate": 1.259361037473372e-08,
      "loss": 2.9289,
      "step": 229731
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.749143600463867,
      "learning_rate": 1.255615754036654e-08,
      "loss": 2.891,
      "step": 229732
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.570587635040283,
      "learning_rate": 1.2518760479318657e-08,
      "loss": 3.0381,
      "step": 229733
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7281577587127686,
      "learning_rate": 1.2481419191623376e-08,
      "loss": 3.0205,
      "step": 229734
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8647682666778564,
      "learning_rate": 1.2444133677280699e-08,
      "loss": 3.0241,
      "step": 229735
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.169672966003418,
      "learning_rate": 1.2406903936257317e-08,
      "loss": 3.0788,
      "step": 229736
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2645606994628906,
      "learning_rate": 1.2369729968619846e-08,
      "loss": 2.9603,
      "step": 229737
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7016024589538574,
      "learning_rate": 1.2332611774334977e-08,
      "loss": 2.9009,
      "step": 229738
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3173036575317383,
      "learning_rate": 1.2295549353436018e-08,
      "loss": 2.9318,
      "step": 229739
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9550516605377197,
      "learning_rate": 1.2258542705889662e-08,
      "loss": 2.8869,
      "step": 229740
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.716384172439575,
      "learning_rate": 1.2221591831729216e-08,
      "loss": 3.1131,
      "step": 229741
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7296195030212402,
      "learning_rate": 1.2184696730954679e-08,
      "loss": 3.0647,
      "step": 229742
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1544854640960693,
      "learning_rate": 1.2147857403599359e-08,
      "loss": 2.9375,
      "step": 229743
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6752610206604004,
      "learning_rate": 1.2111073849629949e-08,
      "loss": 2.9951,
      "step": 229744
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.108006477355957,
      "learning_rate": 1.2074346069079754e-08,
      "loss": 2.9914,
      "step": 229745
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8347299098968506,
      "learning_rate": 1.2037674061948777e-08,
      "loss": 2.7681,
      "step": 229746
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.681980609893799,
      "learning_rate": 1.2001057828237016e-08,
      "loss": 2.8172,
      "step": 229747
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.016920566558838,
      "learning_rate": 1.1964497367977777e-08,
      "loss": 2.7477,
      "step": 229748
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0003225803375244,
      "learning_rate": 1.1927992681137754e-08,
      "loss": 2.901,
      "step": 229749
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.062511444091797,
      "learning_rate": 1.1891543767750256e-08,
      "loss": 2.7489,
      "step": 229750
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.450200319290161,
      "learning_rate": 1.1855150627815279e-08,
      "loss": 2.7387,
      "step": 229751
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.226155996322632,
      "learning_rate": 1.1818813261332827e-08,
      "loss": 2.9734,
      "step": 229752
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1766068935394287,
      "learning_rate": 1.1782531668302897e-08,
      "loss": 2.8142,
      "step": 229753
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.499516725540161,
      "learning_rate": 1.1746305848758796e-08,
      "loss": 3.1994,
      "step": 229754
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3200197219848633,
      "learning_rate": 1.1710135802700527e-08,
      "loss": 2.6913,
      "step": 229755
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7288670539855957,
      "learning_rate": 1.1674021530128086e-08,
      "loss": 2.8738,
      "step": 229756
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.4292681217193604,
      "learning_rate": 1.1637963031041476e-08,
      "loss": 2.8307,
      "step": 229757
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6430130004882812,
      "learning_rate": 1.1601960305474001e-08,
      "loss": 2.7594,
      "step": 229758
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8601744174957275,
      "learning_rate": 1.1566013353392356e-08,
      "loss": 2.8834,
      "step": 229759
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3902430534362793,
      "learning_rate": 1.1530122174796542e-08,
      "loss": 2.7625,
      "step": 229760
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7053165435791016,
      "learning_rate": 1.1494286769753169e-08,
      "loss": 3.0717,
      "step": 229761
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.4921538829803467,
      "learning_rate": 1.1458507138228933e-08,
      "loss": 2.6763,
      "step": 229762
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.934521436691284,
      "learning_rate": 1.1422783280257142e-08,
      "loss": 2.8,
      "step": 229763
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.953847646713257,
      "learning_rate": 1.1387115195804485e-08,
      "loss": 2.857,
      "step": 229764
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8019440174102783,
      "learning_rate": 1.1351502884904273e-08,
      "loss": 3.3629,
      "step": 229765
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1231420040130615,
      "learning_rate": 1.1315946347556504e-08,
      "loss": 2.8089,
      "step": 229766
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.896469831466675,
      "learning_rate": 1.1280445583761178e-08,
      "loss": 2.968,
      "step": 229767
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.07535457611084,
      "learning_rate": 1.1245000593518295e-08,
      "loss": 2.965,
      "step": 229768
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7057676315307617,
      "learning_rate": 1.120961137686116e-08,
      "loss": 2.922,
      "step": 229769
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3316354751586914,
      "learning_rate": 1.1174277933789778e-08,
      "loss": 2.7765,
      "step": 229770
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.818798542022705,
      "learning_rate": 1.1139000264304144e-08,
      "loss": 2.7575,
      "step": 229771
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2017669677734375,
      "learning_rate": 1.1103778368404258e-08,
      "loss": 2.9029,
      "step": 229772
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8129994869232178,
      "learning_rate": 1.1068612246090125e-08,
      "loss": 2.7966,
      "step": 229773
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9160819053649902,
      "learning_rate": 1.1033501897395048e-08,
      "loss": 2.7884,
      "step": 229774
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.6153182983398438,
      "learning_rate": 1.0998447322319026e-08,
      "loss": 3.0252,
      "step": 229775
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.003749370574951,
      "learning_rate": 1.0963448520862062e-08,
      "loss": 2.7032,
      "step": 229776
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.8959598541259766,
      "learning_rate": 1.0928505493024154e-08,
      "loss": 2.9485,
      "step": 229777
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.289676666259766,
      "learning_rate": 1.0893618238805301e-08,
      "loss": 2.9896,
      "step": 229778
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.025954246520996,
      "learning_rate": 1.0858786758238814e-08,
      "loss": 3.0113,
      "step": 229779
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.880044937133789,
      "learning_rate": 1.0824011051291382e-08,
      "loss": 3.1459,
      "step": 229780
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6351606845855713,
      "learning_rate": 1.0789291118029619e-08,
      "loss": 2.8796,
      "step": 229781
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.884935140609741,
      "learning_rate": 1.0754626958420221e-08,
      "loss": 2.7269,
      "step": 229782
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9687469005584717,
      "learning_rate": 1.0720018572463184e-08,
      "loss": 2.6966,
      "step": 229783
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9943339824676514,
      "learning_rate": 1.0685465960158512e-08,
      "loss": 2.9898,
      "step": 229784
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0948410034179688,
      "learning_rate": 1.0650969121539509e-08,
      "loss": 2.76,
      "step": 229785
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.354395627975464,
      "learning_rate": 1.0616528056639484e-08,
      "loss": 2.7197,
      "step": 229786
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.865365505218506,
      "learning_rate": 1.0582142765391821e-08,
      "loss": 3.1636,
      "step": 229787
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.703242301940918,
      "learning_rate": 1.0547813247829828e-08,
      "loss": 3.2128,
      "step": 229788
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.5478858947753906,
      "learning_rate": 1.0513539503986812e-08,
      "loss": 3.0824,
      "step": 229789
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.883659839630127,
      "learning_rate": 1.0479321533862771e-08,
      "loss": 3.0375,
      "step": 229790
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7461659908294678,
      "learning_rate": 1.0445159337457709e-08,
      "loss": 2.6595,
      "step": 229791
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.963409185409546,
      "learning_rate": 1.0411052914738315e-08,
      "loss": 2.8991,
      "step": 229792
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.591651678085327,
      "learning_rate": 1.0377002265771207e-08,
      "loss": 2.6839,
      "step": 229793
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.6273083686828613,
      "learning_rate": 1.034300739055638e-08,
      "loss": 2.6982,
      "step": 229794
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7871317863464355,
      "learning_rate": 1.0309068289027223e-08,
      "loss": 2.6507,
      "step": 229795
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.967735767364502,
      "learning_rate": 1.0275184961283656e-08,
      "loss": 2.7992,
      "step": 229796
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0244359970092773,
      "learning_rate": 1.0241357407292372e-08,
      "loss": 3.0596,
      "step": 229797
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.219804525375366,
      "learning_rate": 1.0207585627053372e-08,
      "loss": 2.7139,
      "step": 229798
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.863511323928833,
      "learning_rate": 1.0173869620566655e-08,
      "loss": 3.2375,
      "step": 229799
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9404056072235107,
      "learning_rate": 1.0140209387865527e-08,
      "loss": 2.8125,
      "step": 229800
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.320237398147583,
      "learning_rate": 1.010660492894999e-08,
      "loss": 2.6962,
      "step": 229801
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2926599979400635,
      "learning_rate": 1.0073056243820043e-08,
      "loss": 2.8637,
      "step": 229802
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7449419498443604,
      "learning_rate": 1.0039563332475686e-08,
      "loss": 3.1115,
      "step": 229803
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8618550300598145,
      "learning_rate": 1.000612619491692e-08,
      "loss": 2.8991,
      "step": 229804
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0153603553771973,
      "learning_rate": 9.972744831143741e-09,
      "loss": 3.1075,
      "step": 229805
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.973581552505493,
      "learning_rate": 9.939419241222768e-09,
      "loss": 2.7095,
      "step": 229806
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7929790019989014,
      "learning_rate": 9.906149425087385e-09,
      "loss": 2.9244,
      "step": 229807
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6981379985809326,
      "learning_rate": 9.872935382770898e-09,
      "loss": 2.7463,
      "step": 229808
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.386108875274658,
      "learning_rate": 9.839777114306612e-09,
      "loss": 2.8827,
      "step": 229809
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.144758462905884,
      "learning_rate": 9.806674619661225e-09,
      "loss": 3.0936,
      "step": 229810
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.654613733291626,
      "learning_rate": 9.773627898834735e-09,
      "loss": 2.9142,
      "step": 229811
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.984607458114624,
      "learning_rate": 9.740636951893754e-09,
      "loss": 2.9473,
      "step": 229812
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.889169692993164,
      "learning_rate": 9.707701778771671e-09,
      "loss": 2.9581,
      "step": 229813
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.563201665878296,
      "learning_rate": 9.674822379535097e-09,
      "loss": 2.9254,
      "step": 229814
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.6360256671905518,
      "learning_rate": 9.641998754150726e-09,
      "loss": 3.1081,
      "step": 229815
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.974984884262085,
      "learning_rate": 9.609230902618559e-09,
      "loss": 2.8518,
      "step": 229816
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7283499240875244,
      "learning_rate": 9.576518825005207e-09,
      "loss": 3.2688,
      "step": 229817
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.706437110900879,
      "learning_rate": 9.543862521244061e-09,
      "loss": 2.9349,
      "step": 229818
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.9552440643310547,
      "learning_rate": 9.511261991368424e-09,
      "loss": 2.9072,
      "step": 229819
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.636552095413208,
      "learning_rate": 9.478717235378297e-09,
      "loss": 2.7604,
      "step": 229820
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.4393389225006104,
      "learning_rate": 9.446228253306987e-09,
      "loss": 2.817,
      "step": 229821
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.01420259475708,
      "learning_rate": 9.413795045154493e-09,
      "loss": 2.9761,
      "step": 229822
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.544520139694214,
      "learning_rate": 9.381417610887509e-09,
      "loss": 3.0078,
      "step": 229823
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1863255500793457,
      "learning_rate": 9.349095950572649e-09,
      "loss": 2.9845,
      "step": 229824
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0224456787109375,
      "learning_rate": 9.316830064143299e-09,
      "loss": 2.8751,
      "step": 229825
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0799899101257324,
      "learning_rate": 9.284619951666073e-09,
      "loss": 3.073,
      "step": 229826
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.953761339187622,
      "learning_rate": 9.252465613140969e-09,
      "loss": 2.8346,
      "step": 229827
    },
    {
      "epoch": 2.99,
      "grad_norm": 5.0615081787109375,
      "learning_rate": 9.220367048534682e-09,
      "loss": 3.2357,
      "step": 229828
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2953908443450928,
      "learning_rate": 9.188324257880519e-09,
      "loss": 2.7751,
      "step": 229829
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.01173734664917,
      "learning_rate": 9.15633724117848e-09,
      "loss": 3.0611,
      "step": 229830
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.593651294708252,
      "learning_rate": 9.12440599846187e-09,
      "loss": 2.8944,
      "step": 229831
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.024322509765625,
      "learning_rate": 9.092530529664076e-09,
      "loss": 2.9738,
      "step": 229832
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8909614086151123,
      "learning_rate": 9.060710834885022e-09,
      "loss": 2.9643,
      "step": 229833
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.878342628479004,
      "learning_rate": 9.028946914058089e-09,
      "loss": 2.7033,
      "step": 229834
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.401973009109497,
      "learning_rate": 8.997238767216585e-09,
      "loss": 2.9256,
      "step": 229835
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8033361434936523,
      "learning_rate": 8.96558639439382e-09,
      "loss": 2.8474,
      "step": 229836
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0404765605926514,
      "learning_rate": 8.933989795523177e-09,
      "loss": 2.7925,
      "step": 229837
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.688720703125,
      "learning_rate": 8.902448970704578e-09,
      "loss": 3.0842,
      "step": 229838
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.24324893951416,
      "learning_rate": 8.870963919838103e-09,
      "loss": 2.7226,
      "step": 229839
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.6513259410858154,
      "learning_rate": 8.839534643023671e-09,
      "loss": 3.2027,
      "step": 229840
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.4250757694244385,
      "learning_rate": 8.808161140227976e-09,
      "loss": 3.005,
      "step": 229841
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7194788455963135,
      "learning_rate": 8.776843411451018e-09,
      "loss": 2.8872,
      "step": 229842
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.750394105911255,
      "learning_rate": 8.745581456692797e-09,
      "loss": 2.8172,
      "step": 229843
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8591458797454834,
      "learning_rate": 8.714375275986618e-09,
      "loss": 3.0811,
      "step": 229844
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.5647618770599365,
      "learning_rate": 8.683224869299176e-09,
      "loss": 2.7019,
      "step": 229845
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6872267723083496,
      "learning_rate": 8.652130236697086e-09,
      "loss": 2.914,
      "step": 229846
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.135349750518799,
      "learning_rate": 8.621091378113732e-09,
      "loss": 2.805,
      "step": 229847
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6905345916748047,
      "learning_rate": 8.590108293615727e-09,
      "loss": 2.8938,
      "step": 229848
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.957954406738281,
      "learning_rate": 8.559180983169767e-09,
      "loss": 2.9042,
      "step": 229849
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.9862115383148193,
      "learning_rate": 8.528309446809156e-09,
      "loss": 2.9777,
      "step": 229850
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.949424982070923,
      "learning_rate": 8.497493684533897e-09,
      "loss": 3.0563,
      "step": 229851
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.811920642852783,
      "learning_rate": 8.466733696310679e-09,
      "loss": 2.8398,
      "step": 229852
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.028735637664795,
      "learning_rate": 8.436029482206119e-09,
      "loss": 2.9856,
      "step": 229853
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.603524923324585,
      "learning_rate": 8.40538104218691e-09,
      "loss": 3.0514,
      "step": 229854
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.384840250015259,
      "learning_rate": 8.37478837625305e-09,
      "loss": 2.8643,
      "step": 229855
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.194514513015747,
      "learning_rate": 8.344251484437847e-09,
      "loss": 2.855,
      "step": 229856
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.921222686767578,
      "learning_rate": 8.313770366707994e-09,
      "loss": 2.7144,
      "step": 229857
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1232306957244873,
      "learning_rate": 8.283345023130105e-09,
      "loss": 3.0288,
      "step": 229858
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3090178966522217,
      "learning_rate": 8.252975453670873e-09,
      "loss": 2.8666,
      "step": 229859
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.878844738006592,
      "learning_rate": 8.222661658330298e-09,
      "loss": 2.932,
      "step": 229860
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.965024471282959,
      "learning_rate": 8.192403637141686e-09,
      "loss": 3.049,
      "step": 229861
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.354642391204834,
      "learning_rate": 8.162201390071732e-09,
      "loss": 3.0685,
      "step": 229862
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.239600896835327,
      "learning_rate": 8.132054917153741e-09,
      "loss": 3.0119,
      "step": 229863
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.115164279937744,
      "learning_rate": 8.101964218387713e-09,
      "loss": 3.034,
      "step": 229864
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.003363132476807,
      "learning_rate": 8.07192929377365e-09,
      "loss": 2.858,
      "step": 229865
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.30678129196167,
      "learning_rate": 8.041950143311548e-09,
      "loss": 3.0924,
      "step": 229866
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.097912311553955,
      "learning_rate": 8.012026767034718e-09,
      "loss": 2.838,
      "step": 229867
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.7825491428375244,
      "learning_rate": 7.982159164943159e-09,
      "loss": 2.9748,
      "step": 229868
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9545929431915283,
      "learning_rate": 7.952347337003561e-09,
      "loss": 3.0472,
      "step": 229869
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.200960159301758,
      "learning_rate": 7.922591283249235e-09,
      "loss": 2.9158,
      "step": 229870
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.296065807342529,
      "learning_rate": 7.892891003713487e-09,
      "loss": 2.8498,
      "step": 229871
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.520789623260498,
      "learning_rate": 7.8632464983297e-09,
      "loss": 2.9937,
      "step": 229872
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.581958293914795,
      "learning_rate": 7.833657767197798e-09,
      "loss": 2.7382,
      "step": 229873
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0091404914855957,
      "learning_rate": 7.80412481021786e-09,
      "loss": 2.8337,
      "step": 229874
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9116384983062744,
      "learning_rate": 7.774647627489804e-09,
      "loss": 2.7542,
      "step": 229875
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0044050216674805,
      "learning_rate": 7.745226218980328e-09,
      "loss": 3.119,
      "step": 229876
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.668684244155884,
      "learning_rate": 7.715860584689426e-09,
      "loss": 2.933,
      "step": 229877
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7819936275482178,
      "learning_rate": 7.686550724617102e-09,
      "loss": 2.8319,
      "step": 229878
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.499335765838623,
      "learning_rate": 7.657296638763355e-09,
      "loss": 2.89,
      "step": 229879
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.76273512840271,
      "learning_rate": 7.628098327161492e-09,
      "loss": 3.1308,
      "step": 229880
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.667205333709717,
      "learning_rate": 7.598955789811512e-09,
      "loss": 3.1013,
      "step": 229881
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9254863262176514,
      "learning_rate": 7.569869026713415e-09,
      "loss": 2.7646,
      "step": 229882
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0037734508514404,
      "learning_rate": 7.540838037867203e-09,
      "loss": 2.848,
      "step": 229883
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.763122081756592,
      "learning_rate": 7.511862823272874e-09,
      "loss": 2.9316,
      "step": 229884
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8668675422668457,
      "learning_rate": 7.482943382963736e-09,
      "loss": 2.8749,
      "step": 229885
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7098968029022217,
      "learning_rate": 7.45407971690648e-09,
      "loss": 2.8595,
      "step": 229886
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.35042667388916,
      "learning_rate": 7.425271825134416e-09,
      "loss": 2.8487,
      "step": 229887
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.35129451751709,
      "learning_rate": 7.396519707647541e-09,
      "loss": 2.7911,
      "step": 229888
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.5964150428771973,
      "learning_rate": 7.3678233644458575e-09,
      "loss": 3.1917,
      "step": 229889
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.373516082763672,
      "learning_rate": 7.339182795562671e-09,
      "loss": 2.9581,
      "step": 229890
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.675049066543579,
      "learning_rate": 7.310598000931367e-09,
      "loss": 3.2047,
      "step": 229891
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8673617839813232,
      "learning_rate": 7.282068980618561e-09,
      "loss": 2.8824,
      "step": 229892
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9655327796936035,
      "learning_rate": 7.253595734624251e-09,
      "loss": 2.5964,
      "step": 229893
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.068047523498535,
      "learning_rate": 7.225178262948439e-09,
      "loss": 2.9056,
      "step": 229894
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.090778827667236,
      "learning_rate": 7.196816565591124e-09,
      "loss": 2.8936,
      "step": 229895
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7158992290496826,
      "learning_rate": 7.1685106425523055e-09,
      "loss": 2.9649,
      "step": 229896
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.9690723419189453,
      "learning_rate": 7.140260493865291e-09,
      "loss": 2.9627,
      "step": 229897
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.858865737915039,
      "learning_rate": 7.112066119463466e-09,
      "loss": 2.9545,
      "step": 229898
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8720500469207764,
      "learning_rate": 7.083927519446752e-09,
      "loss": 2.8611,
      "step": 229899
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8355698585510254,
      "learning_rate": 7.055844693748536e-09,
      "loss": 3.0311,
      "step": 229900
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1180155277252197,
      "learning_rate": 7.027817642435429e-09,
      "loss": 2.845,
      "step": 229901
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.7569580078125,
      "learning_rate": 6.99984636544082e-09,
      "loss": 2.9634,
      "step": 229902
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.851163148880005,
      "learning_rate": 6.9719308628313206e-09,
      "loss": 2.7197,
      "step": 229903
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.792576789855957,
      "learning_rate": 6.944071134573625e-09,
      "loss": 3.036,
      "step": 229904
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7449421882629395,
      "learning_rate": 6.9162671807010405e-09,
      "loss": 3.0037,
      "step": 229905
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.6427550315856934,
      "learning_rate": 6.888519001213566e-09,
      "loss": 2.9267,
      "step": 229906
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2906861305236816,
      "learning_rate": 6.860826596077895e-09,
      "loss": 2.888,
      "step": 229907
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8026437759399414,
      "learning_rate": 6.8331899653606414e-09,
      "loss": 3.0427,
      "step": 229908
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0735256671905518,
      "learning_rate": 6.805609109028498e-09,
      "loss": 3.0357,
      "step": 229909
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.804755926132202,
      "learning_rate": 6.7780840270814655e-09,
      "loss": 2.986,
      "step": 229910
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.696779489517212,
      "learning_rate": 6.750614719552849e-09,
      "loss": 3.1001,
      "step": 229911
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6581919193267822,
      "learning_rate": 6.723201186409344e-09,
      "loss": 3.0052,
      "step": 229912
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.395272970199585,
      "learning_rate": 6.695843427684255e-09,
      "loss": 2.8818,
      "step": 229913
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.891465187072754,
      "learning_rate": 6.6685414434108904e-09,
      "loss": 2.9246,
      "step": 229914
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0320487022399902,
      "learning_rate": 6.641295233522637e-09,
      "loss": 2.9224,
      "step": 229915
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9572741985321045,
      "learning_rate": 6.614104798086106e-09,
      "loss": 3.1658,
      "step": 229916
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.529222011566162,
      "learning_rate": 6.586970137067992e-09,
      "loss": 2.7404,
      "step": 229917
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.5309090614318848,
      "learning_rate": 6.5598912504682965e-09,
      "loss": 2.7566,
      "step": 229918
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0218472480773926,
      "learning_rate": 6.532868138353631e-09,
      "loss": 3.0359,
      "step": 229919
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0626325607299805,
      "learning_rate": 6.505900800657382e-09,
      "loss": 2.6507,
      "step": 229920
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.5490236282348633,
      "learning_rate": 6.478989237446164e-09,
      "loss": 2.9884,
      "step": 229921
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.678722620010376,
      "learning_rate": 6.452133448653362e-09,
      "loss": 2.7994,
      "step": 229922
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.5595202445983887,
      "learning_rate": 6.425333434345592e-09,
      "loss": 2.9591,
      "step": 229923
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.933155059814453,
      "learning_rate": 6.398589194522852e-09,
      "loss": 3.072,
      "step": 229924
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2350990772247314,
      "learning_rate": 6.3719007291518355e-09,
      "loss": 2.7864,
      "step": 229925
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3795623779296875,
      "learning_rate": 6.345268038265849e-09,
      "loss": 2.9238,
      "step": 229926
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1498749256134033,
      "learning_rate": 6.318691121864894e-09,
      "loss": 2.7985,
      "step": 229927
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.8916728496551514,
      "learning_rate": 6.292169979948969e-09,
      "loss": 3.1483,
      "step": 229928
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.4633891582489014,
      "learning_rate": 6.265704612518075e-09,
      "loss": 3.1768,
      "step": 229929
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0915772914886475,
      "learning_rate": 6.23929501957221e-09,
      "loss": 2.9224,
      "step": 229930
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.795567512512207,
      "learning_rate": 6.212941201144683e-09,
      "loss": 3.1238,
      "step": 229931
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7815136909484863,
      "learning_rate": 6.186643157235493e-09,
      "loss": 2.9212,
      "step": 229932
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.8330538272857666,
      "learning_rate": 6.160400887844641e-09,
      "loss": 3.0718,
      "step": 229933
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.77097749710083,
      "learning_rate": 6.1342143929388184e-09,
      "loss": 2.824,
      "step": 229934
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8291866779327393,
      "learning_rate": 6.108083672584641e-09,
      "loss": 2.9307,
      "step": 229935
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9135842323303223,
      "learning_rate": 6.082008726748799e-09,
      "loss": 2.9281,
      "step": 229936
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.87850284576416,
      "learning_rate": 6.055989555464602e-09,
      "loss": 2.7945,
      "step": 229937
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.5184905529022217,
      "learning_rate": 6.030026158698742e-09,
      "loss": 2.9249,
      "step": 229938
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7220165729522705,
      "learning_rate": 6.004118536451219e-09,
      "loss": 2.7068,
      "step": 229939
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.956846237182617,
      "learning_rate": 5.978266688788647e-09,
      "loss": 2.8156,
      "step": 229940
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.8315255641937256,
      "learning_rate": 5.952470615644411e-09,
      "loss": 3.1387,
      "step": 229941
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.544174909591675,
      "learning_rate": 5.926730317085127e-09,
      "loss": 2.7192,
      "step": 229942
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7199008464813232,
      "learning_rate": 5.901045793077486e-09,
      "loss": 3.0037,
      "step": 229943
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0540435314178467,
      "learning_rate": 5.875417043621489e-09,
      "loss": 3.0663,
      "step": 229944
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.590480327606201,
      "learning_rate": 5.849844068750442e-09,
      "loss": 2.99,
      "step": 229945
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.273798942565918,
      "learning_rate": 5.824326868464346e-09,
      "loss": 2.994,
      "step": 229946
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6509275436401367,
      "learning_rate": 5.798865442729894e-09,
      "loss": 3.0614,
      "step": 229947
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.85880970954895,
      "learning_rate": 5.7734597916137e-09,
      "loss": 2.786,
      "step": 229948
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1275365352630615,
      "learning_rate": 5.748109915082455e-09,
      "loss": 2.9983,
      "step": 229949
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.759281873703003,
      "learning_rate": 5.722815813136161e-09,
      "loss": 2.8192,
      "step": 229950
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.655762195587158,
      "learning_rate": 5.697577485774818e-09,
      "loss": 3.3907,
      "step": 229951
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.062892436981201,
      "learning_rate": 5.672394933031732e-09,
      "loss": 2.8933,
      "step": 229952
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.793577194213867,
      "learning_rate": 5.647268154873596e-09,
      "loss": 2.9698,
      "step": 229953
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.14487361907959,
      "learning_rate": 5.622197151367025e-09,
      "loss": 3.104,
      "step": 229954
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9805307388305664,
      "learning_rate": 5.597181922445404e-09,
      "loss": 2.8138,
      "step": 229955
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2063961029052734,
      "learning_rate": 5.572222468175347e-09,
      "loss": 2.7912,
      "step": 229956
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.090484857559204,
      "learning_rate": 5.547318788523547e-09,
      "loss": 3.0938,
      "step": 229957
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.820012331008911,
      "learning_rate": 5.5224708834900045e-09,
      "loss": 3.0426,
      "step": 229958
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3701493740081787,
      "learning_rate": 5.497678753108026e-09,
      "loss": 2.8476,
      "step": 229959
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.359171152114868,
      "learning_rate": 5.472942397344305e-09,
      "loss": 2.7344,
      "step": 229960
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3167011737823486,
      "learning_rate": 5.448261816232147e-09,
      "loss": 3.1497,
      "step": 229961
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7986862659454346,
      "learning_rate": 5.423637009771553e-09,
      "loss": 2.7526,
      "step": 229962
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1754586696624756,
      "learning_rate": 5.3990679779958304e-09,
      "loss": 2.5695,
      "step": 229963
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9279727935791016,
      "learning_rate": 5.374554720838364e-09,
      "loss": 2.9771,
      "step": 229964
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.811927556991577,
      "learning_rate": 5.3500972383657695e-09,
      "loss": 2.933,
      "step": 229965
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2115416526794434,
      "learning_rate": 5.325695530578045e-09,
      "loss": 2.8694,
      "step": 229966
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2257890701293945,
      "learning_rate": 5.301349597441884e-09,
      "loss": 2.8978,
      "step": 229967
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.232478380203247,
      "learning_rate": 5.2770594389572875e-09,
      "loss": 2.8544,
      "step": 229968
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.86700701713562,
      "learning_rate": 5.252825055190868e-09,
      "loss": 3.0208,
      "step": 229969
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.850520372390747,
      "learning_rate": 5.2286464461093194e-09,
      "loss": 2.9013,
      "step": 229970
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.803806781768799,
      "learning_rate": 5.204523611712641e-09,
      "loss": 2.748,
      "step": 229971
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.940291404724121,
      "learning_rate": 5.180456552000833e-09,
      "loss": 2.8082,
      "step": 229972
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0220069885253906,
      "learning_rate": 5.156445267007203e-09,
      "loss": 2.8151,
      "step": 229973
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6950550079345703,
      "learning_rate": 5.132489756698444e-09,
      "loss": 2.8403,
      "step": 229974
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.00553297996521,
      "learning_rate": 5.108590021107861e-09,
      "loss": 3.0187,
      "step": 229975
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.929201364517212,
      "learning_rate": 5.084746060235456e-09,
      "loss": 2.8941,
      "step": 229976
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.351550817489624,
      "learning_rate": 5.060957874081228e-09,
      "loss": 3.0776,
      "step": 229977
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.4295191764831543,
      "learning_rate": 5.037225462645178e-09,
      "loss": 3.1238,
      "step": 229978
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.967639684677124,
      "learning_rate": 5.013548825927305e-09,
      "loss": 3.1721,
      "step": 229979
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0270140171051025,
      "learning_rate": 4.9899279639276094e-09,
      "loss": 2.9066,
      "step": 229980
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.6420650482177734,
      "learning_rate": 4.966362876679397e-09,
      "loss": 2.9344,
      "step": 229981
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.77840256690979,
      "learning_rate": 4.94285356418267e-09,
      "loss": 2.9712,
      "step": 229982
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.086595296859741,
      "learning_rate": 4.9194000264374255e-09,
      "loss": 2.7703,
      "step": 229983
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.783456802368164,
      "learning_rate": 4.896002263410359e-09,
      "loss": 2.7427,
      "step": 229984
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7364754676818848,
      "learning_rate": 4.872660275134776e-09,
      "loss": 2.7449,
      "step": 229985
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.901878595352173,
      "learning_rate": 4.849374061643984e-09,
      "loss": 2.5708,
      "step": 229986
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.674138069152832,
      "learning_rate": 4.826143622904677e-09,
      "loss": 3.192,
      "step": 229987
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.653470993041992,
      "learning_rate": 4.802968958916853e-09,
      "loss": 2.6337,
      "step": 229988
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.53997802734375,
      "learning_rate": 4.77985006971382e-09,
      "loss": 3.0561,
      "step": 229989
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.1957247257232666,
      "learning_rate": 4.756786955295577e-09,
      "loss": 3.0137,
      "step": 229990
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.051805019378662,
      "learning_rate": 4.733779615628819e-09,
      "loss": 2.9536,
      "step": 229991
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7230517864227295,
      "learning_rate": 4.710828050746851e-09,
      "loss": 2.7686,
      "step": 229992
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.484821081161499,
      "learning_rate": 4.687932260682981e-09,
      "loss": 2.9841,
      "step": 229993
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2350544929504395,
      "learning_rate": 4.665092245370594e-09,
      "loss": 2.97,
      "step": 229994
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.2611780166625977,
      "learning_rate": 4.642308004876305e-09,
      "loss": 2.9549,
      "step": 229995
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.9683754444122314,
      "learning_rate": 4.619579539166807e-09,
      "loss": 3.1062,
      "step": 229996
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.0221784114837646,
      "learning_rate": 4.596906848275406e-09,
      "loss": 2.8429,
      "step": 229997
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.854205369949341,
      "learning_rate": 4.574289932202102e-09,
      "loss": 3.0468,
      "step": 229998
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.800738573074341,
      "learning_rate": 4.551728790946896e-09,
      "loss": 3.0281,
      "step": 229999
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.7490830421447754,
      "learning_rate": 4.52922342447648e-09,
      "loss": 3.1228,
      "step": 230000
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3890879154205322,
      "learning_rate": 4.506773832824162e-09,
      "loss": 2.968,
      "step": 230001
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.5765528678894043,
      "learning_rate": 4.484380016023248e-09,
      "loss": 3.084,
      "step": 230002
    },
    {
      "epoch": 2.99,
      "grad_norm": 4.353407382965088,
      "learning_rate": 4.462041974040431e-09,
      "loss": 3.1999,
      "step": 230003
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.642144203186035,
      "learning_rate": 4.4397597069090185e-09,
      "loss": 2.9682,
      "step": 230004
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8332772254943848,
      "learning_rate": 4.417533214595703e-09,
      "loss": 3.1405,
      "step": 230005
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.864668846130371,
      "learning_rate": 4.395362497133792e-09,
      "loss": 2.9163,
      "step": 230006
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.858611822128296,
      "learning_rate": 4.373247554489978e-09,
      "loss": 3.2136,
      "step": 230007
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.9504659175872803,
      "learning_rate": 4.351188386730875e-09,
      "loss": 2.8604,
      "step": 230008
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.7593846321105957,
      "learning_rate": 4.329184993823176e-09,
      "loss": 2.9464,
      "step": 230009
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.772202968597412,
      "learning_rate": 4.3072373757335745e-09,
      "loss": 3.0304,
      "step": 230010
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.6018712520599365,
      "learning_rate": 4.28534553256199e-09,
      "loss": 2.9717,
      "step": 230011
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.5697591304779053,
      "learning_rate": 4.263509464208503e-09,
      "loss": 3.0533,
      "step": 230012
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.8176121711730957,
      "learning_rate": 4.241729170739727e-09,
      "loss": 2.9645,
      "step": 230013
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.3040053844451904,
      "learning_rate": 4.220004652155662e-09,
      "loss": 2.7885,
      "step": 230014
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.029200553894043,
      "learning_rate": 4.198335908456307e-09,
      "loss": 2.6741,
      "step": 230015
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.9974443912506104,
      "learning_rate": 4.176722939641663e-09,
      "loss": 3.0113,
      "step": 230016
    },
    {
      "epoch": 2.99,
      "grad_norm": 2.901108980178833,
      "learning_rate": 4.1551657456784236e-09,
      "loss": 2.9228,
      "step": 230017
    },
    {
      "epoch": 2.99,
      "grad_norm": 3.362396240234375,
      "learning_rate": 4.133664326633201e-09,
      "loss": 2.7947,
      "step": 230018
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.668999433517456,
      "learning_rate": 4.11221868247269e-09,
      "loss": 2.7378,
      "step": 230019
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0168182849884033,
      "learning_rate": 4.090828813230196e-09,
      "loss": 3.0669,
      "step": 230020
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1779208183288574,
      "learning_rate": 4.069494718872412e-09,
      "loss": 2.6859,
      "step": 230021
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.776811122894287,
      "learning_rate": 4.048216399432646e-09,
      "loss": 2.8118,
      "step": 230022
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.2964088916778564,
      "learning_rate": 4.026993854877591e-09,
      "loss": 2.9627,
      "step": 230023
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.6237173080444336,
      "learning_rate": 4.005827085273861e-09,
      "loss": 3.0776,
      "step": 230024
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.416034698486328,
      "learning_rate": 3.98471609055484e-09,
      "loss": 3.1134,
      "step": 230025
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.3894801139831543,
      "learning_rate": 3.963660870753838e-09,
      "loss": 2.736,
      "step": 230026
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.471924066543579,
      "learning_rate": 3.942661425904159e-09,
      "loss": 2.8239,
      "step": 230027
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7428648471832275,
      "learning_rate": 3.921717755972498e-09,
      "loss": 2.8329,
      "step": 230028
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.5696067810058594,
      "learning_rate": 3.900829860992161e-09,
      "loss": 2.914,
      "step": 230029
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1172354221343994,
      "learning_rate": 3.879997740929841e-09,
      "loss": 2.8755,
      "step": 230030
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.032393217086792,
      "learning_rate": 3.8592213958188455e-09,
      "loss": 3.1144,
      "step": 230031
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.774996042251587,
      "learning_rate": 3.838500825659174e-09,
      "loss": 2.998,
      "step": 230032
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.2981112003326416,
      "learning_rate": 3.81783603041752e-09,
      "loss": 3.1594,
      "step": 230033
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.4346630573272705,
      "learning_rate": 3.797227010160497e-09,
      "loss": 2.9549,
      "step": 230034
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.6209218502044678,
      "learning_rate": 3.776673764854799e-09,
      "loss": 2.725,
      "step": 230035
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1579864025115967,
      "learning_rate": 3.756176294500423e-09,
      "loss": 2.7847,
      "step": 230036
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.3884217739105225,
      "learning_rate": 3.73573459913068e-09,
      "loss": 3.0371,
      "step": 230037
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.508483648300171,
      "learning_rate": 3.71534867871226e-09,
      "loss": 2.8297,
      "step": 230038
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.2696948051452637,
      "learning_rate": 3.695018533245164e-09,
      "loss": 2.8699,
      "step": 230039
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7579474449157715,
      "learning_rate": 3.674744162796006e-09,
      "loss": 3.1749,
      "step": 230040
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.954263925552368,
      "learning_rate": 3.6545255672981722e-09,
      "loss": 2.9395,
      "step": 230041
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.9441540241241455,
      "learning_rate": 3.634362746784969e-09,
      "loss": 2.6961,
      "step": 230042
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.54815936088562,
      "learning_rate": 3.6142557012897034e-09,
      "loss": 3.0514,
      "step": 230043
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.973351001739502,
      "learning_rate": 3.594204430745762e-09,
      "loss": 2.949,
      "step": 230044
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1807801723480225,
      "learning_rate": 3.574208935219758e-09,
      "loss": 3.0027,
      "step": 230045
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.4290668964385986,
      "learning_rate": 3.5542692146783846e-09,
      "loss": 2.8919,
      "step": 230046
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.551011800765991,
      "learning_rate": 3.534385269154949e-09,
      "loss": 3.4126,
      "step": 230047
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.274970769882202,
      "learning_rate": 3.514557098616144e-09,
      "loss": 2.7741,
      "step": 230048
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0487465858459473,
      "learning_rate": 3.494784703095277e-09,
      "loss": 3.069,
      "step": 230049
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.4912116527557373,
      "learning_rate": 3.475068082592347e-09,
      "loss": 3.0273,
      "step": 230050
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.795790195465088,
      "learning_rate": 3.455407237107355e-09,
      "loss": 2.7709,
      "step": 230051
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.131376028060913,
      "learning_rate": 3.4358021666403e-09,
      "loss": 3.1239,
      "step": 230052
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7871363162994385,
      "learning_rate": 3.4162528712244898e-09,
      "loss": 3.0474,
      "step": 230053
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0200393199920654,
      "learning_rate": 3.39675935079331e-09,
      "loss": 2.8956,
      "step": 230054
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8552801609039307,
      "learning_rate": 3.3773216054133745e-09,
      "loss": 2.7355,
      "step": 230055
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.6268458366394043,
      "learning_rate": 3.3579396350846833e-09,
      "loss": 3.0114,
      "step": 230056
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.792585611343384,
      "learning_rate": 3.3386134397739294e-09,
      "loss": 2.7181,
      "step": 230057
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.3774774074554443,
      "learning_rate": 3.3193430194811134e-09,
      "loss": 2.8142,
      "step": 230058
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.912701368331909,
      "learning_rate": 3.300128374272848e-09,
      "loss": 2.9008,
      "step": 230059
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1144707202911377,
      "learning_rate": 3.280969504115827e-09,
      "loss": 3.2039,
      "step": 230060
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.6776363849639893,
      "learning_rate": 3.2618664089767432e-09,
      "loss": 3.0898,
      "step": 230061
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.934582471847534,
      "learning_rate": 3.2428190889222106e-09,
      "loss": 3.0004,
      "step": 230062
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.2465457916259766,
      "learning_rate": 3.2238275439189222e-09,
      "loss": 2.9349,
      "step": 230063
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.9500539302825928,
      "learning_rate": 3.2048917739668777e-09,
      "loss": 2.9994,
      "step": 230064
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.703296184539795,
      "learning_rate": 3.1860117790993843e-09,
      "loss": 2.8538,
      "step": 230065
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.418384552001953,
      "learning_rate": 3.167187559316442e-09,
      "loss": 3.0016,
      "step": 230066
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.3194665908813477,
      "learning_rate": 3.148419114584744e-09,
      "loss": 2.8146,
      "step": 230067
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7803022861480713,
      "learning_rate": 3.1297064449375964e-09,
      "loss": 2.8832,
      "step": 230068
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.772326946258545,
      "learning_rate": 3.111049550375e-09,
      "loss": 2.9021,
      "step": 230069
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.9535624980926514,
      "learning_rate": 3.0924484308969544e-09,
      "loss": 2.8086,
      "step": 230070
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.938175916671753,
      "learning_rate": 3.07390308650346e-09,
      "loss": 2.955,
      "step": 230071
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.8043711185455322,
      "learning_rate": 3.055413517194516e-09,
      "loss": 2.9407,
      "step": 230072
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1447081565856934,
      "learning_rate": 3.036979722970123e-09,
      "loss": 2.9087,
      "step": 230073
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8106114864349365,
      "learning_rate": 3.0186017038635878e-09,
      "loss": 3.0935,
      "step": 230074
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.818880319595337,
      "learning_rate": 3.0002794598416036e-09,
      "loss": 2.9665,
      "step": 230075
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.163125991821289,
      "learning_rate": 2.9820129909707833e-09,
      "loss": 2.8362,
      "step": 230076
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.757788896560669,
      "learning_rate": 2.9638022971512077e-09,
      "loss": 2.9223,
      "step": 230077
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.3732218742370605,
      "learning_rate": 2.945647378482796e-09,
      "loss": 2.7457,
      "step": 230078
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8358728885650635,
      "learning_rate": 2.9275482348989355e-09,
      "loss": 2.827,
      "step": 230079
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.6382548809051514,
      "learning_rate": 2.909504866466239e-09,
      "loss": 2.7805,
      "step": 230080
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.6799936294555664,
      "learning_rate": 2.8915172731180937e-09,
      "loss": 2.7703,
      "step": 230081
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.9061989784240723,
      "learning_rate": 2.8735854549211125e-09,
      "loss": 3.2174,
      "step": 230082
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.089214324951172,
      "learning_rate": 2.855709411841989e-09,
      "loss": 2.9941,
      "step": 230083
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.060903787612915,
      "learning_rate": 2.8378891439140295e-09,
      "loss": 2.7692,
      "step": 230084
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.776475191116333,
      "learning_rate": 2.8201246511039278e-09,
      "loss": 2.8363,
      "step": 230085
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7280216217041016,
      "learning_rate": 2.8024159334449903e-09,
      "loss": 2.8641,
      "step": 230086
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.3644015789031982,
      "learning_rate": 2.7847629909039104e-09,
      "loss": 2.8142,
      "step": 230087
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.559234142303467,
      "learning_rate": 2.7671658235473017e-09,
      "loss": 2.6506,
      "step": 230088
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8373873233795166,
      "learning_rate": 2.7496244313085505e-09,
      "loss": 2.8383,
      "step": 230089
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7005674839019775,
      "learning_rate": 2.732138814220963e-09,
      "loss": 2.726,
      "step": 230090
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8469126224517822,
      "learning_rate": 2.7147089722845407e-09,
      "loss": 2.7481,
      "step": 230091
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.767413377761841,
      "learning_rate": 2.697334905532589e-09,
      "loss": 3.0726,
      "step": 230092
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.8167498111724854,
      "learning_rate": 2.6800166139318012e-09,
      "loss": 2.6921,
      "step": 230093
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.946866989135742,
      "learning_rate": 2.6627540974821782e-09,
      "loss": 2.6822,
      "step": 230094
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.4232640266418457,
      "learning_rate": 2.645547356217026e-09,
      "loss": 2.99,
      "step": 230095
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.706672430038452,
      "learning_rate": 2.628396390103038e-09,
      "loss": 2.9684,
      "step": 230096
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.168016195297241,
      "learning_rate": 2.611301199173521e-09,
      "loss": 2.9531,
      "step": 230097
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.498605251312256,
      "learning_rate": 2.5942617834284752e-09,
      "loss": 3.0105,
      "step": 230098
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.867370128631592,
      "learning_rate": 2.5772781428679e-09,
      "loss": 2.798,
      "step": 230099
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1806039810180664,
      "learning_rate": 2.5603502774917962e-09,
      "loss": 3.026,
      "step": 230100
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.949735641479492,
      "learning_rate": 2.5434781872668563e-09,
      "loss": 2.8192,
      "step": 230101
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.648758888244629,
      "learning_rate": 2.5266618722596944e-09,
      "loss": 2.9017,
      "step": 230102
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.309091329574585,
      "learning_rate": 2.50990133247031e-09,
      "loss": 2.8594,
      "step": 230103
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.7740519046783447,
      "learning_rate": 2.4931965678320897e-09,
      "loss": 2.7938,
      "step": 230104
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.9514658451080322,
      "learning_rate": 2.4765475784116474e-09,
      "loss": 2.9672,
      "step": 230105
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.993501663208008,
      "learning_rate": 2.4599543642089824e-09,
      "loss": 2.9232,
      "step": 230106
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1121959686279297,
      "learning_rate": 2.443416925190789e-09,
      "loss": 3.07,
      "step": 230107
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.6056888103485107,
      "learning_rate": 2.4269352613903726e-09,
      "loss": 3.1538,
      "step": 230108
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.749746799468994,
      "learning_rate": 2.410509372807734e-09,
      "loss": 2.757,
      "step": 230109
    },
    {
      "epoch": 3.0,
      "grad_norm": 4.046578884124756,
      "learning_rate": 2.3941392594095665e-09,
      "loss": 3.1496,
      "step": 230110
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8771817684173584,
      "learning_rate": 2.3778249212624835e-09,
      "loss": 2.9422,
      "step": 230111
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.964388608932495,
      "learning_rate": 2.3615663583331778e-09,
      "loss": 2.9277,
      "step": 230112
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.8301546573638916,
      "learning_rate": 2.34536357062165e-09,
      "loss": 2.9536,
      "step": 230113
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.3672640323638916,
      "learning_rate": 2.3292165581279e-09,
      "loss": 3.1989,
      "step": 230114
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.504509449005127,
      "learning_rate": 2.3131253208519275e-09,
      "loss": 2.7586,
      "step": 230115
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.444347858428955,
      "learning_rate": 2.297089858827039e-09,
      "loss": 2.9407,
      "step": 230116
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8684988021850586,
      "learning_rate": 2.2811101720532355e-09,
      "loss": 2.7652,
      "step": 230117
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.6670515537261963,
      "learning_rate": 2.265186260497209e-09,
      "loss": 2.8964,
      "step": 230118
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0487301349639893,
      "learning_rate": 2.2493181241922677e-09,
      "loss": 2.8416,
      "step": 230119
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.3936421871185303,
      "learning_rate": 2.2335057631384103e-09,
      "loss": 2.9754,
      "step": 230120
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.5879881381988525,
      "learning_rate": 2.2177491773023305e-09,
      "loss": 2.8645,
      "step": 230121
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.6661057472229004,
      "learning_rate": 2.2020483667506417e-09,
      "loss": 3.0396,
      "step": 230122
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.2726476192474365,
      "learning_rate": 2.1864033314500372e-09,
      "loss": 3.2919,
      "step": 230123
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.4971370697021484,
      "learning_rate": 2.1708140714005173e-09,
      "loss": 2.619,
      "step": 230124
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.669264793395996,
      "learning_rate": 2.155280586602082e-09,
      "loss": 2.8904,
      "step": 230125
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.857722043991089,
      "learning_rate": 2.1398028770547304e-09,
      "loss": 2.8826,
      "step": 230126
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7298614978790283,
      "learning_rate": 2.1243809427917702e-09,
      "loss": 2.8533,
      "step": 230127
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.9311161041259766,
      "learning_rate": 2.109014783779894e-09,
      "loss": 2.9846,
      "step": 230128
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.917691469192505,
      "learning_rate": 2.0937044000524094e-09,
      "loss": 2.7506,
      "step": 230129
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.12842059135437,
      "learning_rate": 2.0784497916093155e-09,
      "loss": 3.1234,
      "step": 230130
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.5132904052734375,
      "learning_rate": 2.063250958417306e-09,
      "loss": 2.8954,
      "step": 230131
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.519066333770752,
      "learning_rate": 2.048107900509688e-09,
      "loss": 2.7604,
      "step": 230132
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.656656265258789,
      "learning_rate": 2.0330206179197673e-09,
      "loss": 2.7996,
      "step": 230133
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.073857545852661,
      "learning_rate": 2.017989110580931e-09,
      "loss": 2.7921,
      "step": 230134
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8236916065216064,
      "learning_rate": 2.0030133785264855e-09,
      "loss": 2.8527,
      "step": 230135
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.9890952110290527,
      "learning_rate": 1.9880934217897383e-09,
      "loss": 3.0524,
      "step": 230136
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0156631469726562,
      "learning_rate": 1.973229240304075e-09,
      "loss": 3.0407,
      "step": 230137
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0082976818084717,
      "learning_rate": 1.9584208341361096e-09,
      "loss": 3.1038,
      "step": 230138
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8772037029266357,
      "learning_rate": 1.943668203285842e-09,
      "loss": 3.0964,
      "step": 230139
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.77068829536438,
      "learning_rate": 1.9289713477199654e-09,
      "loss": 3.3051,
      "step": 230140
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1185142993927,
      "learning_rate": 1.9143302674717863e-09,
      "loss": 3.0608,
      "step": 230141
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.2903993129730225,
      "learning_rate": 1.8997449625079985e-09,
      "loss": 2.7049,
      "step": 230142
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.454906940460205,
      "learning_rate": 1.8852154328619084e-09,
      "loss": 3.1044,
      "step": 230143
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7475476264953613,
      "learning_rate": 1.870741678533516e-09,
      "loss": 2.9492,
      "step": 230144
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1009044647216797,
      "learning_rate": 1.8563236995228214e-09,
      "loss": 2.7948,
      "step": 230145
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1152641773223877,
      "learning_rate": 1.8419614958298245e-09,
      "loss": 2.9985,
      "step": 230146
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.998507499694824,
      "learning_rate": 1.8276550674545254e-09,
      "loss": 2.9346,
      "step": 230147
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.9884581565856934,
      "learning_rate": 1.813404414396924e-09,
      "loss": 2.7212,
      "step": 230148
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.871729850769043,
      "learning_rate": 1.7992095366570203e-09,
      "loss": 2.9579,
      "step": 230149
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.736480474472046,
      "learning_rate": 1.785070434268121e-09,
      "loss": 3.1482,
      "step": 230150
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.6284501552581787,
      "learning_rate": 1.7709871071969195e-09,
      "loss": 3.0981,
      "step": 230151
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.790492057800293,
      "learning_rate": 1.7569595554434156e-09,
      "loss": 2.7839,
      "step": 230152
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.2529280185699463,
      "learning_rate": 1.7429877790409164e-09,
      "loss": 2.8221,
      "step": 230153
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.2250659465789795,
      "learning_rate": 1.7290717779894215e-09,
      "loss": 2.8015,
      "step": 230154
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.9286773204803467,
      "learning_rate": 1.715211552288931e-09,
      "loss": 2.6441,
      "step": 230155
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8479576110839844,
      "learning_rate": 1.7014071019061382e-09,
      "loss": 2.8465,
      "step": 230156
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.034242868423462,
      "learning_rate": 1.6876584268743498e-09,
      "loss": 2.7802,
      "step": 230157
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.285409927368164,
      "learning_rate": 1.673965527193566e-09,
      "loss": 3.0757,
      "step": 230158
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.928598403930664,
      "learning_rate": 1.6603284028637864e-09,
      "loss": 2.9596,
      "step": 230159
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.634281873703003,
      "learning_rate": 1.6467470538850113e-09,
      "loss": 2.9379,
      "step": 230160
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.4314377307891846,
      "learning_rate": 1.6332214802905474e-09,
      "loss": 2.9104,
      "step": 230161
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.4779932498931885,
      "learning_rate": 1.6197516820137813e-09,
      "loss": 3.0879,
      "step": 230162
    },
    {
      "epoch": 3.0,
      "grad_norm": 4.300003528594971,
      "learning_rate": 1.6063376591213261e-09,
      "loss": 2.6742,
      "step": 230163
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.4211552143096924,
      "learning_rate": 1.5929794115798755e-09,
      "loss": 2.8775,
      "step": 230164
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.5632405281066895,
      "learning_rate": 1.579676939422736e-09,
      "loss": 3.0356,
      "step": 230165
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.2850379943847656,
      "learning_rate": 1.5664302426499076e-09,
      "loss": 2.9697,
      "step": 230166
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.6799263954162598,
      "learning_rate": 1.553239321194777e-09,
      "loss": 2.8992,
      "step": 230167
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8679141998291016,
      "learning_rate": 1.540104175157264e-09,
      "loss": 2.9148,
      "step": 230168
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0136570930480957,
      "learning_rate": 1.5270248045040624e-09,
      "loss": 2.9109,
      "step": 230169
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.737391233444214,
      "learning_rate": 1.514001209201865e-09,
      "loss": 3.0489,
      "step": 230170
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0876429080963135,
      "learning_rate": 1.5010333892839788e-09,
      "loss": 2.9583,
      "step": 230171
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7443389892578125,
      "learning_rate": 1.4881213447504038e-09,
      "loss": 3.0125,
      "step": 230172
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.302629232406616,
      "learning_rate": 1.4752650756011398e-09,
      "loss": 2.6829,
      "step": 230173
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8959574699401855,
      "learning_rate": 1.4624645818694936e-09,
      "loss": 2.9225,
      "step": 230174
    },
    {
      "epoch": 3.0,
      "grad_norm": 4.014377117156982,
      "learning_rate": 1.4497198634888518e-09,
      "loss": 2.9354,
      "step": 230175
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.002704381942749,
      "learning_rate": 1.4370309205258279e-09,
      "loss": 2.9272,
      "step": 230176
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.051877737045288,
      "learning_rate": 1.4243977529471151e-09,
      "loss": 2.867,
      "step": 230177
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.5339012145996094,
      "learning_rate": 1.41182036078602e-09,
      "loss": 3.1994,
      "step": 230178
    },
    {
      "epoch": 3.0,
      "grad_norm": 4.17586612701416,
      "learning_rate": 1.3992987440092362e-09,
      "loss": 2.8453,
      "step": 230179
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8378026485443115,
      "learning_rate": 1.3868329026500701e-09,
      "loss": 3.0698,
      "step": 230180
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8717620372772217,
      "learning_rate": 1.3744228366752153e-09,
      "loss": 2.9382,
      "step": 230181
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1821444034576416,
      "learning_rate": 1.362068546117978e-09,
      "loss": 3.1185,
      "step": 230182
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.6770572662353516,
      "learning_rate": 1.3497700309783587e-09,
      "loss": 2.6471,
      "step": 230183
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.897307872772217,
      "learning_rate": 1.3375272912230506e-09,
      "loss": 3.1636,
      "step": 230184
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0220067501068115,
      "learning_rate": 1.32534032688536e-09,
      "loss": 2.8215,
      "step": 230185
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.852616548538208,
      "learning_rate": 1.3132091379985943e-09,
      "loss": 3.0347,
      "step": 230186
    },
    {
      "epoch": 3.0,
      "grad_norm": 4.708652496337891,
      "learning_rate": 1.3011337244961395e-09,
      "loss": 3.0577,
      "step": 230187
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.5872602462768555,
      "learning_rate": 1.2891140864446093e-09,
      "loss": 2.9179,
      "step": 230188
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.629951238632202,
      "learning_rate": 1.2771502238106968e-09,
      "loss": 3.0053,
      "step": 230189
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1325628757476807,
      "learning_rate": 1.2652421365944022e-09,
      "loss": 2.9687,
      "step": 230190
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8774733543395996,
      "learning_rate": 1.2533898247957252e-09,
      "loss": 2.7711,
      "step": 230191
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8688879013061523,
      "learning_rate": 1.2415932884146662e-09,
      "loss": 2.9572,
      "step": 230192
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.926558494567871,
      "learning_rate": 1.2298525274845317e-09,
      "loss": 3.2341,
      "step": 230193
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0688183307647705,
      "learning_rate": 1.2181675420053217e-09,
      "loss": 2.7306,
      "step": 230194
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.9383275508880615,
      "learning_rate": 1.2065383319437293e-09,
      "loss": 2.707,
      "step": 230195
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.756828784942627,
      "learning_rate": 1.1949648973330616e-09,
      "loss": 2.8269,
      "step": 230196
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.776878833770752,
      "learning_rate": 1.1834472381400118e-09,
      "loss": 2.9212,
      "step": 230197
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.366666793823242,
      "learning_rate": 1.171985354431193e-09,
      "loss": 2.8646,
      "step": 230198
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.169755458831787,
      "learning_rate": 1.1605792461399922e-09,
      "loss": 2.9798,
      "step": 230199
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.865694999694824,
      "learning_rate": 1.1492289132997158e-09,
      "loss": 2.852,
      "step": 230200
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.9105546474456787,
      "learning_rate": 1.137934355877057e-09,
      "loss": 2.83,
      "step": 230201
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.431079626083374,
      "learning_rate": 1.1266955739386295e-09,
      "loss": 2.8219,
      "step": 230202
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8791322708129883,
      "learning_rate": 1.1155125674511268e-09,
      "loss": 2.9274,
      "step": 230203
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.179734945297241,
      "learning_rate": 1.1043853364145483e-09,
      "loss": 2.8759,
      "step": 230204
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.926783561706543,
      "learning_rate": 1.093313880862201e-09,
      "loss": 3.1478,
      "step": 230205
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.19469952583313,
      "learning_rate": 1.0822982007274716e-09,
      "loss": 2.9948,
      "step": 230206
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.78464674949646,
      "learning_rate": 1.0713382960769733e-09,
      "loss": 2.9151,
      "step": 230207
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.6961824893951416,
      "learning_rate": 1.0604341668773996e-09,
      "loss": 3.0222,
      "step": 230208
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.2079200744628906,
      "learning_rate": 1.0495858131620572e-09,
      "loss": 2.7459,
      "step": 230209
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1083250045776367,
      "learning_rate": 1.038793234897639e-09,
      "loss": 2.9704,
      "step": 230210
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.661288261413574,
      "learning_rate": 1.0280564321174522e-09,
      "loss": 3.0572,
      "step": 230211
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.502284288406372,
      "learning_rate": 1.0173754048214967e-09,
      "loss": 3.1487,
      "step": 230212
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.2204017639160156,
      "learning_rate": 1.0067501529764654e-09,
      "loss": 2.8089,
      "step": 230213
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.6843602657318115,
      "learning_rate": 9.961806765823588e-10,
      "loss": 2.8023,
      "step": 230214
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.617204189300537,
      "learning_rate": 9.8566697570579e-10,
      "loss": 3.2689,
      "step": 230215
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8840153217315674,
      "learning_rate": 9.752090503134523e-10,
      "loss": 2.8277,
      "step": 230216
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7383220195770264,
      "learning_rate": 9.648069003720393e-10,
      "loss": 2.8897,
      "step": 230217
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1555051803588867,
      "learning_rate": 9.544605259148574e-10,
      "loss": 2.8621,
      "step": 230218
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.3933346271514893,
      "learning_rate": 9.441699269752135e-10,
      "loss": 3.048,
      "step": 230219
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.815121650695801,
      "learning_rate": 9.339351035198006e-10,
      "loss": 2.9911,
      "step": 230220
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.7720885276794434,
      "learning_rate": 9.237560555153122e-10,
      "loss": 2.759,
      "step": 230221
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.6399800777435303,
      "learning_rate": 9.136327830283618e-10,
      "loss": 3.0453,
      "step": 230222
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.974976062774658,
      "learning_rate": 9.035652860256426e-10,
      "loss": 2.9837,
      "step": 230223
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0575854778289795,
      "learning_rate": 8.935535645404612e-10,
      "loss": 2.6496,
      "step": 230224
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1405770778656006,
      "learning_rate": 8.835976185395111e-10,
      "loss": 3.0095,
      "step": 230225
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0549662113189697,
      "learning_rate": 8.73697448022792e-10,
      "loss": 2.9373,
      "step": 230226
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.775989294052124,
      "learning_rate": 8.63853053023611e-10,
      "loss": 2.9271,
      "step": 230227
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.078601360321045,
      "learning_rate": 8.54064433508661e-10,
      "loss": 2.9273,
      "step": 230228
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.9923954010009766,
      "learning_rate": 8.44331589511249e-10,
      "loss": 3.0073,
      "step": 230229
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8101885318756104,
      "learning_rate": 8.346545210313749e-10,
      "loss": 2.8098,
      "step": 230230
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.9317498207092285,
      "learning_rate": 8.25033228035732e-10,
      "loss": 2.9694,
      "step": 230231
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.332820415496826,
      "learning_rate": 8.154677105576269e-10,
      "loss": 2.9679,
      "step": 230232
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.875316858291626,
      "learning_rate": 8.059579685970596e-10,
      "loss": 2.978,
      "step": 230233
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.4899544715881348,
      "learning_rate": 7.965040021540303e-10,
      "loss": 2.8931,
      "step": 230234
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7146506309509277,
      "learning_rate": 7.871058111952322e-10,
      "loss": 2.8695,
      "step": 230235
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.828864812850952,
      "learning_rate": 7.777633957872786e-10,
      "loss": 3.0531,
      "step": 230236
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1323869228363037,
      "learning_rate": 7.68476755896863e-10,
      "loss": 3.0292,
      "step": 230237
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.9566149711608887,
      "learning_rate": 7.592458914906784e-10,
      "loss": 3.0762,
      "step": 230238
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7826688289642334,
      "learning_rate": 7.500708026353386e-10,
      "loss": 3.1811,
      "step": 230239
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.9981818199157715,
      "learning_rate": 7.409514892975365e-10,
      "loss": 2.6835,
      "step": 230240
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8292019367218018,
      "learning_rate": 7.318879514772724e-10,
      "loss": 2.674,
      "step": 230241
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.2086057662963867,
      "learning_rate": 7.228801892078529e-10,
      "loss": 2.9879,
      "step": 230242
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.678273916244507,
      "learning_rate": 7.139282024559712e-10,
      "loss": 3.0461,
      "step": 230243
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.827542304992676,
      "learning_rate": 7.050319912216273e-10,
      "loss": 2.9638,
      "step": 230244
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8585364818573,
      "learning_rate": 6.961915555381281e-10,
      "loss": 3.328,
      "step": 230245
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.9289979934692383,
      "learning_rate": 6.874068953721668e-10,
      "loss": 2.8795,
      "step": 230246
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.063305616378784,
      "learning_rate": 6.7867801075705e-10,
      "loss": 3.0171,
      "step": 230247
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.788019895553589,
      "learning_rate": 6.700049016594711e-10,
      "loss": 2.8211,
      "step": 230248
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.74343204498291,
      "learning_rate": 6.613875681127367e-10,
      "loss": 2.802,
      "step": 230249
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.790778636932373,
      "learning_rate": 6.528260100835403e-10,
      "loss": 2.9903,
      "step": 230250
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.339782238006592,
      "learning_rate": 6.443202276384951e-10,
      "loss": 2.8626,
      "step": 230251
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1904501914978027,
      "learning_rate": 6.358702207109878e-10,
      "loss": 3.0784,
      "step": 230252
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1917247772216797,
      "learning_rate": 6.274759893343251e-10,
      "loss": 2.9997,
      "step": 230253
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.156855583190918,
      "learning_rate": 6.191375334752003e-10,
      "loss": 2.9427,
      "step": 230254
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.3000643253326416,
      "learning_rate": 6.108548532002267e-10,
      "loss": 2.8935,
      "step": 230255
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.9479451179504395,
      "learning_rate": 6.026279484760977e-10,
      "loss": 2.9001,
      "step": 230256
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.166757345199585,
      "learning_rate": 5.944568192695065e-10,
      "loss": 3.0126,
      "step": 230257
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0532379150390625,
      "learning_rate": 5.863414656470667e-10,
      "loss": 2.7611,
      "step": 230258
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.863884687423706,
      "learning_rate": 5.782818875754713e-10,
      "loss": 2.9798,
      "step": 230259
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.6773276329040527,
      "learning_rate": 5.702780850547207e-10,
      "loss": 2.8806,
      "step": 230260
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.5854690074920654,
      "learning_rate": 5.623300580848145e-10,
      "loss": 3.1271,
      "step": 230261
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.62608003616333,
      "learning_rate": 5.544378066657529e-10,
      "loss": 3.0406,
      "step": 230262
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0194263458251953,
      "learning_rate": 5.466013308308425e-10,
      "loss": 3.0086,
      "step": 230263
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.569899797439575,
      "learning_rate": 5.388206305134701e-10,
      "loss": 2.9315,
      "step": 230264
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.02787446975708,
      "learning_rate": 5.310957058135557e-10,
      "loss": 3.0453,
      "step": 230265
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1532046794891357,
      "learning_rate": 5.234265566311791e-10,
      "loss": 2.8249,
      "step": 230266
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7794547080993652,
      "learning_rate": 5.158131830329537e-10,
      "loss": 2.9307,
      "step": 230267
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.946276903152466,
      "learning_rate": 5.082555850188796e-10,
      "loss": 2.82,
      "step": 230268
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.370016098022461,
      "learning_rate": 5.007537625556501e-10,
      "loss": 3.0395,
      "step": 230269
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.407614231109619,
      "learning_rate": 4.933077156765719e-10,
      "loss": 2.8986,
      "step": 230270
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7489635944366455,
      "learning_rate": 4.859174443483382e-10,
      "loss": 2.9564,
      "step": 230271
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1795570850372314,
      "learning_rate": 4.785829486042558e-10,
      "loss": 2.8702,
      "step": 230272
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0413074493408203,
      "learning_rate": 4.713042284443247e-10,
      "loss": 3.0318,
      "step": 230273
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.003962516784668,
      "learning_rate": 4.6408128383523814e-10,
      "loss": 2.8891,
      "step": 230274
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.933335065841675,
      "learning_rate": 4.5691411481030283e-10,
      "loss": 2.9524,
      "step": 230275
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.9837112426757812,
      "learning_rate": 4.498027213695188e-10,
      "loss": 2.5914,
      "step": 230276
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8873291015625,
      "learning_rate": 4.42747103512886e-10,
      "loss": 3.2443,
      "step": 230277
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.925743341445923,
      "learning_rate": 4.357472612070978e-10,
      "loss": 2.7799,
      "step": 230278
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.729229211807251,
      "learning_rate": 4.2880319451876754e-10,
      "loss": 2.8436,
      "step": 230279
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0889925956726074,
      "learning_rate": 4.2191490341458855e-10,
      "loss": 2.8322,
      "step": 230280
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0574231147766113,
      "learning_rate": 4.1508238786125414e-10,
      "loss": 2.8326,
      "step": 230281
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0275983810424805,
      "learning_rate": 4.083056479253777e-10,
      "loss": 2.945,
      "step": 230282
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.236506223678589,
      "learning_rate": 4.0158468357365246e-10,
      "loss": 2.7694,
      "step": 230283
    },
    {
      "epoch": 3.0,
      "grad_norm": 4.21071720123291,
      "learning_rate": 3.9491949480607856e-10,
      "loss": 2.8994,
      "step": 230284
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.5939080715179443,
      "learning_rate": 3.883100816226559e-10,
      "loss": 2.9034,
      "step": 230285
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.210754632949829,
      "learning_rate": 3.8175644402338447e-10,
      "loss": 3.1075,
      "step": 230286
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.699047565460205,
      "learning_rate": 3.752585820082643e-10,
      "loss": 2.9293,
      "step": 230287
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.2411961555480957,
      "learning_rate": 3.688164956106021e-10,
      "loss": 2.9714,
      "step": 230288
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.890568971633911,
      "learning_rate": 3.624301847970912e-10,
      "loss": 2.9061,
      "step": 230289
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7366316318511963,
      "learning_rate": 3.560996495677315e-10,
      "loss": 2.905,
      "step": 230290
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.621324062347412,
      "learning_rate": 3.498248899558298e-10,
      "loss": 3.0448,
      "step": 230291
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.200287342071533,
      "learning_rate": 3.4360590592807935e-10,
      "loss": 2.7433,
      "step": 230292
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0773444175720215,
      "learning_rate": 3.3744269751778686e-10,
      "loss": 3.1509,
      "step": 230293
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.3046343326568604,
      "learning_rate": 3.3133526469164564e-10,
      "loss": 2.9305,
      "step": 230294
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.691340684890747,
      "learning_rate": 3.2528360748296233e-10,
      "loss": 3.091,
      "step": 230295
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1230533123016357,
      "learning_rate": 3.192877258584303e-10,
      "loss": 2.905,
      "step": 230296
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.993638277053833,
      "learning_rate": 3.1334761985135627e-10,
      "loss": 2.8913,
      "step": 230297
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1936991214752197,
      "learning_rate": 3.0746328942843346e-10,
      "loss": 2.8771,
      "step": 230298
    },
    {
      "epoch": 3.0,
      "grad_norm": 4.77067756652832,
      "learning_rate": 3.016347346229686e-10,
      "loss": 2.7069,
      "step": 230299
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.791584014892578,
      "learning_rate": 2.9586195543496173e-10,
      "loss": 2.97,
      "step": 230300
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.813370704650879,
      "learning_rate": 2.9014495183110606e-10,
      "loss": 2.9068,
      "step": 230301
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0483767986297607,
      "learning_rate": 2.844837238447084e-10,
      "loss": 2.9521,
      "step": 230302
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.483194351196289,
      "learning_rate": 2.788782714757687e-10,
      "loss": 3.0844,
      "step": 230303
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1995763778686523,
      "learning_rate": 2.733285947242869e-10,
      "loss": 2.9461,
      "step": 230304
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.9117205142974854,
      "learning_rate": 2.678346935902631e-10,
      "loss": 2.8979,
      "step": 230305
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.087533950805664,
      "learning_rate": 2.6239656804039053e-10,
      "loss": 3.0538,
      "step": 230306
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.366335868835449,
      "learning_rate": 2.5701421810797594e-10,
      "loss": 2.9132,
      "step": 230307
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7780020236968994,
      "learning_rate": 2.51687643826326e-10,
      "loss": 2.6339,
      "step": 230308
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.6376256942749023,
      "learning_rate": 2.464168451288273e-10,
      "loss": 2.926,
      "step": 230309
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.2008068561553955,
      "learning_rate": 2.412018220487866e-10,
      "loss": 2.8992,
      "step": 230310
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.405684471130371,
      "learning_rate": 2.360425746195105e-10,
      "loss": 2.8739,
      "step": 230311
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.821028232574463,
      "learning_rate": 2.3093910277438565e-10,
      "loss": 2.7969,
      "step": 230312
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.397034168243408,
      "learning_rate": 2.2589140654671878e-10,
      "loss": 2.753,
      "step": 230313
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.6379623413085938,
      "learning_rate": 2.2089948596981655e-10,
      "loss": 2.8033,
      "step": 230314
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.606475830078125,
      "learning_rate": 2.1596334097706557e-10,
      "loss": 3.0462,
      "step": 230315
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.5623557567596436,
      "learning_rate": 2.1108297163507926e-10,
      "loss": 2.9667,
      "step": 230316
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8596465587615967,
      "learning_rate": 2.0625837791055089e-10,
      "loss": 2.6986,
      "step": 230317
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.226656913757324,
      "learning_rate": 2.0148955980348047e-10,
      "loss": 3.0518,
      "step": 230318
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.9428293704986572,
      "learning_rate": 1.967765173471747e-10,
      "loss": 2.8654,
      "step": 230319
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8034486770629883,
      "learning_rate": 1.921192504750202e-10,
      "loss": 2.9347,
      "step": 230320
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.2571206092834473,
      "learning_rate": 1.8751775925363032e-10,
      "loss": 2.9284,
      "step": 230321
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1122875213623047,
      "learning_rate": 1.829720436830051e-10,
      "loss": 3.1378,
      "step": 230322
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.9195032119750977,
      "learning_rate": 1.7848210369653115e-10,
      "loss": 2.7383,
      "step": 230323
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0076982975006104,
      "learning_rate": 1.7404793936082183e-10,
      "loss": 2.9049,
      "step": 230324
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.372164011001587,
      "learning_rate": 1.6966955067587719e-10,
      "loss": 2.9916,
      "step": 230325
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.203400135040283,
      "learning_rate": 1.6534693760839047e-10,
      "loss": 2.815,
      "step": 230326
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.85341477394104,
      "learning_rate": 1.610801001583617e-10,
      "loss": 2.894,
      "step": 230327
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.728938579559326,
      "learning_rate": 1.5686903835909758e-10,
      "loss": 3.0814,
      "step": 230328
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0647709369659424,
      "learning_rate": 1.5271375217729143e-10,
      "loss": 2.6936,
      "step": 230329
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0585358142852783,
      "learning_rate": 1.486142416462499e-10,
      "loss": 2.7711,
      "step": 230330
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.9234397411346436,
      "learning_rate": 1.4457050673266635e-10,
      "loss": 3.1027,
      "step": 230331
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.315650224685669,
      "learning_rate": 1.4058254746984743e-10,
      "loss": 2.8208,
      "step": 230332
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.196218252182007,
      "learning_rate": 1.3665036382448646e-10,
      "loss": 2.8752,
      "step": 230333
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0744223594665527,
      "learning_rate": 1.3277395582989013e-10,
      "loss": 3.0071,
      "step": 230334
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.3786213397979736,
      "learning_rate": 1.2895332348605848e-10,
      "loss": 3.001,
      "step": 230335
    },
    {
      "epoch": 3.0,
      "grad_norm": 4.330563545227051,
      "learning_rate": 1.2518846675968476e-10,
      "loss": 3.0782,
      "step": 230336
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.871664524078369,
      "learning_rate": 1.2147938568407568e-10,
      "loss": 2.604,
      "step": 230337
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.820564031600952,
      "learning_rate": 1.1782608025923123e-10,
      "loss": 3.0566,
      "step": 230338
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.072741985321045,
      "learning_rate": 1.1422855045184475e-10,
      "loss": 2.8482,
      "step": 230339
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.2279915809631348,
      "learning_rate": 1.1068679629522293e-10,
      "loss": 2.9827,
      "step": 230340
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.202367067337036,
      "learning_rate": 1.0720081778936573e-10,
      "loss": 2.9159,
      "step": 230341
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8578639030456543,
      "learning_rate": 1.0377061490096649e-10,
      "loss": 2.9736,
      "step": 230342
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8494651317596436,
      "learning_rate": 1.003961876966386e-10,
      "loss": 2.907,
      "step": 230343
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.609980344772339,
      "learning_rate": 9.707753610976865e-11,
      "loss": 3.1665,
      "step": 230344
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.6941356658935547,
      "learning_rate": 9.381466017366335e-11,
      "loss": 2.9554,
      "step": 230345
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1144604682922363,
      "learning_rate": 9.060755988832269e-11,
      "loss": 2.9848,
      "step": 230346
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.6913352012634277,
      "learning_rate": 8.745623525374668e-11,
      "loss": 2.9035,
      "step": 230347
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.996687889099121,
      "learning_rate": 8.436068623662861e-11,
      "loss": 2.9041,
      "step": 230348
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.109039545059204,
      "learning_rate": 8.132091290358189e-11,
      "loss": 3.0323,
      "step": 230349
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.5681722164154053,
      "learning_rate": 7.833691518799313e-11,
      "loss": 3.1002,
      "step": 230350
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.55143141746521,
      "learning_rate": 7.540869312316899e-11,
      "loss": 2.9456,
      "step": 230351
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.9298958778381348,
      "learning_rate": 7.25362467424162e-11,
      "loss": 2.9124,
      "step": 230352
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0891926288604736,
      "learning_rate": 6.971957597912137e-11,
      "loss": 2.806,
      "step": 230353
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.939058780670166,
      "learning_rate": 6.695868086659118e-11,
      "loss": 2.997,
      "step": 230354
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.914064884185791,
      "learning_rate": 6.425356140482562e-11,
      "loss": 3.0805,
      "step": 230355
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.5913145542144775,
      "learning_rate": 6.160421759382473e-11,
      "loss": 3.0784,
      "step": 230356
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7909464836120605,
      "learning_rate": 5.901064946689516e-11,
      "loss": 2.9863,
      "step": 230357
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.475764513015747,
      "learning_rate": 5.647285695742354e-11,
      "loss": 2.806,
      "step": 230358
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.6902480125427246,
      "learning_rate": 5.399084009871657e-11,
      "loss": 2.9526,
      "step": 230359
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.796771764755249,
      "learning_rate": 5.1564598924080937e-11,
      "loss": 2.7482,
      "step": 230360
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8597664833068848,
      "learning_rate": 4.919413336690325e-11,
      "loss": 2.742,
      "step": 230361
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.14319109916687,
      "learning_rate": 4.687944349379691e-11,
      "loss": 2.8049,
      "step": 230362
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.35093092918396,
      "learning_rate": 4.4620529238148515e-11,
      "loss": 2.9157,
      "step": 230363
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.7674882411956787,
      "learning_rate": 4.2417390666571463e-11,
      "loss": 2.9746,
      "step": 230364
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1632180213928223,
      "learning_rate": 4.027002774575905e-11,
      "loss": 2.989,
      "step": 230365
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.570526123046875,
      "learning_rate": 3.817844047571128e-11,
      "loss": 2.8395,
      "step": 230366
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1725480556488037,
      "learning_rate": 3.614262885642816e-11,
      "loss": 2.8165,
      "step": 230367
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.784795045852661,
      "learning_rate": 3.416259288790968e-11,
      "loss": 2.9117,
      "step": 230368
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.66579008102417,
      "learning_rate": 3.2238332603462536e-11,
      "loss": 2.8502,
      "step": 230369
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7737627029418945,
      "learning_rate": 3.036984796978004e-11,
      "loss": 2.9279,
      "step": 230370
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.035747528076172,
      "learning_rate": 2.8557138986862183e-11,
      "loss": 2.8807,
      "step": 230371
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.567340135574341,
      "learning_rate": 2.6800205654708972e-11,
      "loss": 2.767,
      "step": 230372
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.953943967819214,
      "learning_rate": 2.5099047973320406e-11,
      "loss": 2.9221,
      "step": 230373
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.9712929725646973,
      "learning_rate": 2.3453665976003177e-11,
      "loss": 2.8814,
      "step": 230374
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.937732219696045,
      "learning_rate": 2.18640595961439e-11,
      "loss": 2.9158,
      "step": 230375
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0620322227478027,
      "learning_rate": 2.033022890035596e-11,
      "loss": 3.2035,
      "step": 230376
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0188844203948975,
      "learning_rate": 1.8852173888639356e-11,
      "loss": 3.0346,
      "step": 230377
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.6074249744415283,
      "learning_rate": 1.7429894494380702e-11,
      "loss": 3.3074,
      "step": 230378
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.582047939300537,
      "learning_rate": 1.6063390784193386e-11,
      "loss": 3.0031,
      "step": 230379
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.513345956802368,
      "learning_rate": 1.4752662724770715e-11,
      "loss": 2.8088,
      "step": 230380
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.814512014389038,
      "learning_rate": 1.3497710316112687e-11,
      "loss": 2.7678,
      "step": 230381
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8841490745544434,
      "learning_rate": 1.2298533591525994e-11,
      "loss": 2.8242,
      "step": 230382
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.466636896133423,
      "learning_rate": 1.1155132517703946e-11,
      "loss": 2.9985,
      "step": 230383
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1731858253479004,
      "learning_rate": 1.0067507094646543e-11,
      "loss": 2.7077,
      "step": 230384
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7056171894073486,
      "learning_rate": 9.035657355660475e-12,
      "loss": 3.0992,
      "step": 230385
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.597740650177002,
      "learning_rate": 8.05958326743905e-12,
      "loss": 3.0034,
      "step": 230386
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0925161838531494,
      "learning_rate": 7.139284829982273e-12,
      "loss": 2.8498,
      "step": 230387
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.725440502166748,
      "learning_rate": 6.274762043290138e-12,
      "loss": 2.8493,
      "step": 230388
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.885136127471924,
      "learning_rate": 5.466014940669339e-12,
      "loss": 2.9157,
      "step": 230389
    },
    {
      "epoch": 3.0,
      "grad_norm": 4.951696872711182,
      "learning_rate": 4.713043522119875e-12,
      "loss": 2.749,
      "step": 230390
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.00618577003479,
      "learning_rate": 4.015847721028365e-12,
      "loss": 3.0559,
      "step": 230391
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.943103790283203,
      "learning_rate": 3.3744276040081897e-12,
      "loss": 2.8516,
      "step": 230392
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.1738150119781494,
      "learning_rate": 2.788783137752659e-12,
      "loss": 2.9861,
      "step": 230393
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.7637922763824463,
      "learning_rate": 2.258914355568464e-12,
      "loss": 2.8657,
      "step": 230394
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.9272944927215576,
      "learning_rate": 1.7848212241489135e-12,
      "loss": 2.7891,
      "step": 230395
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.0851657390594482,
      "learning_rate": 1.3665037434940074e-12,
      "loss": 3.012,
      "step": 230396
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8792765140533447,
      "learning_rate": 1.0039619469104366e-12,
      "loss": 3.0396,
      "step": 230397
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.6704812049865723,
      "learning_rate": 6.971958010915102e-13,
      "loss": 3.1724,
      "step": 230398
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.3846943378448486,
      "learning_rate": 4.462053060372284e-13,
      "loss": 2.8984,
      "step": 230399
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.930975914001465,
      "learning_rate": 2.5099049505428183e-13,
      "loss": 2.6637,
      "step": 230400
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.8738067150115967,
      "learning_rate": 1.1155133483597978e-13,
      "loss": 2.7687,
      "step": 230401
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.928095817565918,
      "learning_rate": 2.788782538232226e-14,
      "loss": 3.0078,
      "step": 230402
    },
    {
      "epoch": 3.0,
      "grad_norm": 3.78537917137146,
      "learning_rate": 0.0,
      "loss": 3.0122,
      "step": 230403
    },
    {
      "epoch": 3.0,
      "step": 230403,
      "total_flos": 4.722526376342323e+16,
      "train_loss": 3.0161496390128235,
      "train_runtime": 29870.0005,
      "train_samples_per_second": 493.663,
      "train_steps_per_second": 7.714
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 230403,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 5000,
  "total_flos": 4.722526376342323e+16,
  "train_batch_size": 64,
  "trial_name": null,
  "trial_params": null
}
